aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/Kconfig12
-rw-r--r--arch/arm/boot/dts/nxp/imx/imx7-mba7.dtsi2
-rw-r--r--arch/arm/boot/dts/nxp/imx/imx7s-warp.dts1
-rw-r--r--arch/arm/mach-omap2/board-n8x0.c23
-rw-r--r--arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi16
-rw-r--r--arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi40
-rw-r--r--arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi16
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi2
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi2
-rw-r--r--arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi8
-rw-r--r--arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi2
-rw-r--r--arch/arm64/include/asm/tlbflush.h20
-rw-r--r--arch/arm64/kernel/head.S36
-rw-r--r--arch/arm64/kernel/ptrace.c5
-rw-r--r--arch/arm64/kvm/arm.c13
-rw-r--r--arch/arm64/kvm/hyp/nvhe/tlb.c3
-rw-r--r--arch/arm64/kvm/hyp/pgtable.c23
-rw-r--r--arch/arm64/kvm/hyp/vhe/tlb.c3
-rw-r--r--arch/arm64/kvm/mmu.c2
-rw-r--r--arch/arm64/mm/hugetlbpage.c5
-rw-r--r--arch/arm64/mm/pageattr.c3
-rw-r--r--arch/loongarch/Kconfig2
-rw-r--r--arch/loongarch/boot/dts/loongson-2k1000.dtsi7
-rw-r--r--arch/loongarch/boot/dts/loongson-2k2000-ref.dts33
-rw-r--r--arch/loongarch/boot/dts/loongson-2k2000.dtsi24
-rw-r--r--arch/loongarch/include/asm/addrspace.h1
-rw-r--r--arch/loongarch/include/asm/crash_reserve.h (renamed from arch/loongarch/include/asm/crash_core.h)4
-rw-r--r--arch/loongarch/include/asm/io.h20
-rw-r--r--arch/loongarch/include/asm/kfence.h9
-rw-r--r--arch/loongarch/include/asm/page.h26
-rw-r--r--arch/loongarch/include/asm/perf_event.h8
-rw-r--r--arch/loongarch/include/asm/tlb.h2
-rw-r--r--arch/loongarch/kernel/perf_event.c2
-rw-r--r--arch/loongarch/mm/fault.c4
-rw-r--r--arch/loongarch/mm/mmap.c4
-rw-r--r--arch/loongarch/mm/pgtable.c4
-rw-r--r--arch/mips/include/asm/ptrace.h2
-rw-r--r--arch/mips/kernel/asm-offsets.c1
-rw-r--r--arch/mips/kernel/ptrace.c15
-rw-r--r--arch/mips/kernel/scall32-o32.S23
-rw-r--r--arch/mips/kernel/scall64-n32.S3
-rw-r--r--arch/mips/kernel/scall64-n64.S3
-rw-r--r--arch/mips/kernel/scall64-o32.S33
-rw-r--r--arch/nios2/kernel/prom.c6
-rw-r--r--arch/powerpc/crypto/chacha-p10-glue.c8
-rw-r--r--arch/powerpc/include/asm/vdso/gettimeofday.h3
-rw-r--r--arch/powerpc/kernel/iommu.c7
-rw-r--r--arch/riscv/Makefile2
-rw-r--r--arch/riscv/include/asm/pgtable.h6
-rw-r--r--arch/riscv/include/asm/syscall_wrapper.h3
-rw-r--r--arch/riscv/include/asm/uaccess.h4
-rw-r--r--arch/riscv/include/uapi/asm/auxvec.h2
-rw-r--r--arch/riscv/kernel/compat_vdso/Makefile2
-rw-r--r--arch/riscv/kernel/patch.c8
-rw-r--r--arch/riscv/kernel/process.c5
-rw-r--r--arch/riscv/kernel/signal.c15
-rw-r--r--arch/riscv/kernel/traps.c2
-rw-r--r--arch/riscv/kernel/vdso/Makefile1
-rw-r--r--arch/riscv/kvm/aia_aplic.c37
-rw-r--r--arch/riscv/kvm/vcpu_onereg.c2
-rw-r--r--arch/riscv/mm/tlbflush.c4
-rw-r--r--arch/s390/include/asm/atomic.h44
-rw-r--r--arch/s390/include/asm/atomic_ops.h22
-rw-r--r--arch/s390/include/asm/preempt.h36
-rw-r--r--arch/s390/kernel/entry.S4
-rw-r--r--arch/s390/kernel/perf_pai_crypto.c10
-rw-r--r--arch/s390/kernel/perf_pai_ext.c10
-rw-r--r--arch/s390/mm/fault.c2
-rw-r--r--arch/x86/Kconfig10
-rw-r--r--arch/x86/coco/core.c93
-rw-r--r--arch/x86/entry/common.c75
-rw-r--r--arch/x86/entry/entry_64.S61
-rw-r--r--arch/x86/entry/entry_64_compat.S16
-rw-r--r--arch/x86/entry/entry_fred.c10
-rw-r--r--arch/x86/entry/syscall_32.c21
-rw-r--r--arch/x86/entry/syscall_64.c19
-rw-r--r--arch/x86/entry/syscall_x32.c10
-rw-r--r--arch/x86/events/core.c1
-rw-r--r--arch/x86/events/intel/ds.c8
-rw-r--r--arch/x86/events/intel/lbr.c1
-rw-r--r--arch/x86/hyperv/hv_apic.c16
-rw-r--r--arch/x86/hyperv/hv_proc.c22
-rw-r--r--arch/x86/include/asm/alternative.h4
-rw-r--r--arch/x86/include/asm/apic.h3
-rw-r--r--arch/x86/include/asm/barrier.h3
-rw-r--r--arch/x86/include/asm/coco.h2
-rw-r--r--arch/x86/include/asm/cpufeature.h2
-rw-r--r--arch/x86/include/asm/cpufeatures.h7
-rw-r--r--arch/x86/include/asm/kvm_host.h1
-rw-r--r--arch/x86/include/asm/msr-index.h9
-rw-r--r--arch/x86/include/asm/nospec-branch.h17
-rw-r--r--arch/x86/include/asm/perf_event.h1
-rw-r--r--arch/x86/include/asm/sev.h4
-rw-r--r--arch/x86/include/asm/syscall.h11
-rw-r--r--arch/x86/include/uapi/asm/kvm.h23
-rw-r--r--arch/x86/include/uapi/asm/kvm_para.h1
-rw-r--r--arch/x86/kernel/apic/apic.c6
-rw-r--r--arch/x86/kernel/callthunks.c4
-rw-r--r--arch/x86/kernel/cpu/amd.c53
-rw-r--r--arch/x86/kernel/cpu/bugs.c170
-rw-r--r--arch/x86/kernel/cpu/common.c70
-rw-r--r--arch/x86/kernel/cpu/cpuid-deps.c6
-rw-r--r--arch/x86/kernel/cpu/mce/core.c4
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c2
-rw-r--r--arch/x86/kernel/cpu/resctrl/internal.h3
-rw-r--r--arch/x86/kernel/cpu/scattered.c1
-rw-r--r--arch/x86/kernel/cpu/topology.c7
-rw-r--r--arch/x86/kernel/cpu/topology_amd.c51
-rw-r--r--arch/x86/kernel/kvm.c11
-rw-r--r--arch/x86/kernel/setup.c2
-rw-r--r--arch/x86/kernel/sev.c10
-rw-r--r--arch/x86/kvm/Kconfig1
-rw-r--r--arch/x86/kvm/Makefile5
-rw-r--r--arch/x86/kvm/cpuid.c43
-rw-r--r--arch/x86/kvm/cpuid.h10
-rw-r--r--arch/x86/kvm/lapic.c3
-rw-r--r--arch/x86/kvm/mmu/mmu.c11
-rw-r--r--arch/x86/kvm/mmu/tdp_mmu.c51
-rw-r--r--arch/x86/kvm/pmu.c16
-rw-r--r--arch/x86/kvm/reverse_cpuid.h5
-rw-r--r--arch/x86/kvm/svm/sev.c62
-rw-r--r--arch/x86/kvm/svm/svm.c17
-rw-r--r--arch/x86/kvm/svm/svm.h3
-rw-r--r--arch/x86/kvm/svm/vmenter.S97
-rw-r--r--arch/x86/kvm/trace.h10
-rw-r--r--arch/x86/kvm/vmx/pmu_intel.c2
-rw-r--r--arch/x86/kvm/vmx/vmenter.S2
-rw-r--r--arch/x86/kvm/vmx/vmx.c41
-rw-r--r--arch/x86/kvm/vmx/vmx.h6
-rw-r--r--arch/x86/kvm/x86.c4
-rw-r--r--arch/x86/lib/retpoline.S13
-rw-r--r--arch/x86/mm/numa_32.c1
-rw-r--r--arch/x86/mm/pat/memtype.c49
-rw-r--r--arch/x86/net/bpf_jit_comp.c19
-rw-r--r--arch/x86/virt/svm/sev.c26
135 files changed, 1400 insertions, 660 deletions
diff --git a/arch/Kconfig b/arch/Kconfig
index 9f066785bb71d9..65afb1de48b36e 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -1172,12 +1172,12 @@ config PAGE_SIZE_LESS_THAN_256KB
config PAGE_SHIFT
int
- default 12 if PAGE_SIZE_4KB
- default 13 if PAGE_SIZE_8KB
- default 14 if PAGE_SIZE_16KB
- default 15 if PAGE_SIZE_32KB
- default 16 if PAGE_SIZE_64KB
- default 18 if PAGE_SIZE_256KB
+ default 12 if PAGE_SIZE_4KB
+ default 13 if PAGE_SIZE_8KB
+ default 14 if PAGE_SIZE_16KB
+ default 15 if PAGE_SIZE_32KB
+ default 16 if PAGE_SIZE_64KB
+ default 18 if PAGE_SIZE_256KB
# This allows to use a set of generic functions to determine mmap base
# address by giving priority to top-down scheme only if the process
diff --git a/arch/arm/boot/dts/nxp/imx/imx7-mba7.dtsi b/arch/arm/boot/dts/nxp/imx/imx7-mba7.dtsi
index 1235a71c6abe96..52869e68f833c4 100644
--- a/arch/arm/boot/dts/nxp/imx/imx7-mba7.dtsi
+++ b/arch/arm/boot/dts/nxp/imx/imx7-mba7.dtsi
@@ -666,7 +666,7 @@
bus-width = <4>;
no-1-8-v;
no-sdio;
- no-emmc;
+ no-mmc;
status = "okay";
};
diff --git a/arch/arm/boot/dts/nxp/imx/imx7s-warp.dts b/arch/arm/boot/dts/nxp/imx/imx7s-warp.dts
index ba7231b364bb8c..7bab113ca6da79 100644
--- a/arch/arm/boot/dts/nxp/imx/imx7s-warp.dts
+++ b/arch/arm/boot/dts/nxp/imx/imx7s-warp.dts
@@ -210,6 +210,7 @@
remote-endpoint = <&mipi_from_sensor>;
clock-lanes = <0>;
data-lanes = <1>;
+ link-frequencies = /bits/ 64 <330000000>;
};
};
};
diff --git a/arch/arm/mach-omap2/board-n8x0.c b/arch/arm/mach-omap2/board-n8x0.c
index 31755a378c7364..ff2a4a4d822047 100644
--- a/arch/arm/mach-omap2/board-n8x0.c
+++ b/arch/arm/mach-omap2/board-n8x0.c
@@ -79,10 +79,8 @@ static struct musb_hdrc_platform_data tusb_data = {
static struct gpiod_lookup_table tusb_gpio_table = {
.dev_id = "musb-tusb",
.table = {
- GPIO_LOOKUP("gpio-0-15", 0, "enable",
- GPIO_ACTIVE_HIGH),
- GPIO_LOOKUP("gpio-48-63", 10, "int",
- GPIO_ACTIVE_HIGH),
+ GPIO_LOOKUP("gpio-0-31", 0, "enable", GPIO_ACTIVE_HIGH),
+ GPIO_LOOKUP("gpio-32-63", 26, "int", GPIO_ACTIVE_HIGH),
{ }
},
};
@@ -140,12 +138,11 @@ static int slot1_cover_open;
static int slot2_cover_open;
static struct device *mmc_device;
-static struct gpiod_lookup_table nokia8xx_mmc_gpio_table = {
+static struct gpiod_lookup_table nokia800_mmc_gpio_table = {
.dev_id = "mmci-omap.0",
.table = {
/* Slot switch, GPIO 96 */
- GPIO_LOOKUP("gpio-80-111", 16,
- "switch", GPIO_ACTIVE_HIGH),
+ GPIO_LOOKUP("gpio-96-127", 0, "switch", GPIO_ACTIVE_HIGH),
{ }
},
};
@@ -153,12 +150,12 @@ static struct gpiod_lookup_table nokia8xx_mmc_gpio_table = {
static struct gpiod_lookup_table nokia810_mmc_gpio_table = {
.dev_id = "mmci-omap.0",
.table = {
+ /* Slot switch, GPIO 96 */
+ GPIO_LOOKUP("gpio-96-127", 0, "switch", GPIO_ACTIVE_HIGH),
/* Slot index 1, VSD power, GPIO 23 */
- GPIO_LOOKUP_IDX("gpio-16-31", 7,
- "vsd", 1, GPIO_ACTIVE_HIGH),
+ GPIO_LOOKUP_IDX("gpio-0-31", 23, "vsd", 1, GPIO_ACTIVE_HIGH),
/* Slot index 1, VIO power, GPIO 9 */
- GPIO_LOOKUP_IDX("gpio-0-15", 9,
- "vio", 1, GPIO_ACTIVE_HIGH),
+ GPIO_LOOKUP_IDX("gpio-0-31", 9, "vio", 1, GPIO_ACTIVE_HIGH),
{ }
},
};
@@ -415,8 +412,6 @@ static struct omap_mmc_platform_data *mmc_data[OMAP24XX_NR_MMC];
static void __init n8x0_mmc_init(void)
{
- gpiod_add_lookup_table(&nokia8xx_mmc_gpio_table);
-
if (board_is_n810()) {
mmc1_data.slots[0].name = "external";
@@ -429,6 +424,8 @@ static void __init n8x0_mmc_init(void)
mmc1_data.slots[1].name = "internal";
mmc1_data.slots[1].ban_openended = 1;
gpiod_add_lookup_table(&nokia810_mmc_gpio_table);
+ } else {
+ gpiod_add_lookup_table(&nokia800_mmc_gpio_table);
}
mmc1_data.nr_slots = 2;
diff --git a/arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi b/arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi
index 3c42240e78e245..4aaf5a0c1ed8af 100644
--- a/arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi
@@ -41,7 +41,7 @@ conn_subsys: bus@5b000000 {
interrupts = <GIC_SPI 267 IRQ_TYPE_LEVEL_HIGH>;
fsl,usbphy = <&usbphy1>;
fsl,usbmisc = <&usbmisc1 0>;
- clocks = <&usb2_lpcg 0>;
+ clocks = <&usb2_lpcg IMX_LPCG_CLK_6>;
ahb-burst-config = <0x0>;
tx-burst-size-dword = <0x10>;
rx-burst-size-dword = <0x10>;
@@ -58,7 +58,7 @@ conn_subsys: bus@5b000000 {
usbphy1: usbphy@5b100000 {
compatible = "fsl,imx7ulp-usbphy";
reg = <0x5b100000 0x1000>;
- clocks = <&usb2_lpcg 1>;
+ clocks = <&usb2_lpcg IMX_LPCG_CLK_7>;
power-domains = <&pd IMX_SC_R_USB_0_PHY>;
status = "disabled";
};
@@ -67,8 +67,8 @@ conn_subsys: bus@5b000000 {
interrupts = <GIC_SPI 232 IRQ_TYPE_LEVEL_HIGH>;
reg = <0x5b010000 0x10000>;
clocks = <&sdhc0_lpcg IMX_LPCG_CLK_4>,
- <&sdhc0_lpcg IMX_LPCG_CLK_0>,
- <&sdhc0_lpcg IMX_LPCG_CLK_5>;
+ <&sdhc0_lpcg IMX_LPCG_CLK_5>,
+ <&sdhc0_lpcg IMX_LPCG_CLK_0>;
clock-names = "ipg", "ahb", "per";
power-domains = <&pd IMX_SC_R_SDHC_0>;
status = "disabled";
@@ -78,8 +78,8 @@ conn_subsys: bus@5b000000 {
interrupts = <GIC_SPI 233 IRQ_TYPE_LEVEL_HIGH>;
reg = <0x5b020000 0x10000>;
clocks = <&sdhc1_lpcg IMX_LPCG_CLK_4>,
- <&sdhc1_lpcg IMX_LPCG_CLK_0>,
- <&sdhc1_lpcg IMX_LPCG_CLK_5>;
+ <&sdhc1_lpcg IMX_LPCG_CLK_5>,
+ <&sdhc1_lpcg IMX_LPCG_CLK_0>;
clock-names = "ipg", "ahb", "per";
power-domains = <&pd IMX_SC_R_SDHC_1>;
fsl,tuning-start-tap = <20>;
@@ -91,8 +91,8 @@ conn_subsys: bus@5b000000 {
interrupts = <GIC_SPI 234 IRQ_TYPE_LEVEL_HIGH>;
reg = <0x5b030000 0x10000>;
clocks = <&sdhc2_lpcg IMX_LPCG_CLK_4>,
- <&sdhc2_lpcg IMX_LPCG_CLK_0>,
- <&sdhc2_lpcg IMX_LPCG_CLK_5>;
+ <&sdhc2_lpcg IMX_LPCG_CLK_5>,
+ <&sdhc2_lpcg IMX_LPCG_CLK_0>;
clock-names = "ipg", "ahb", "per";
power-domains = <&pd IMX_SC_R_SDHC_2>;
status = "disabled";
diff --git a/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi b/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi
index cab3468b1875ee..f7a91d43a0ffe1 100644
--- a/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi
@@ -28,8 +28,8 @@ dma_subsys: bus@5a000000 {
#size-cells = <0>;
interrupts = <GIC_SPI 336 IRQ_TYPE_LEVEL_HIGH>;
interrupt-parent = <&gic>;
- clocks = <&spi0_lpcg 0>,
- <&spi0_lpcg 1>;
+ clocks = <&spi0_lpcg IMX_LPCG_CLK_0>,
+ <&spi0_lpcg IMX_LPCG_CLK_4>;
clock-names = "per", "ipg";
assigned-clocks = <&clk IMX_SC_R_SPI_0 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <60000000>;
@@ -44,8 +44,8 @@ dma_subsys: bus@5a000000 {
#size-cells = <0>;
interrupts = <GIC_SPI 337 IRQ_TYPE_LEVEL_HIGH>;
interrupt-parent = <&gic>;
- clocks = <&spi1_lpcg 0>,
- <&spi1_lpcg 1>;
+ clocks = <&spi1_lpcg IMX_LPCG_CLK_0>,
+ <&spi1_lpcg IMX_LPCG_CLK_4>;
clock-names = "per", "ipg";
assigned-clocks = <&clk IMX_SC_R_SPI_1 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <60000000>;
@@ -60,8 +60,8 @@ dma_subsys: bus@5a000000 {
#size-cells = <0>;
interrupts = <GIC_SPI 338 IRQ_TYPE_LEVEL_HIGH>;
interrupt-parent = <&gic>;
- clocks = <&spi2_lpcg 0>,
- <&spi2_lpcg 1>;
+ clocks = <&spi2_lpcg IMX_LPCG_CLK_0>,
+ <&spi2_lpcg IMX_LPCG_CLK_4>;
clock-names = "per", "ipg";
assigned-clocks = <&clk IMX_SC_R_SPI_2 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <60000000>;
@@ -76,8 +76,8 @@ dma_subsys: bus@5a000000 {
#size-cells = <0>;
interrupts = <GIC_SPI 339 IRQ_TYPE_LEVEL_HIGH>;
interrupt-parent = <&gic>;
- clocks = <&spi3_lpcg 0>,
- <&spi3_lpcg 1>;
+ clocks = <&spi3_lpcg IMX_LPCG_CLK_0>,
+ <&spi3_lpcg IMX_LPCG_CLK_4>;
clock-names = "per", "ipg";
assigned-clocks = <&clk IMX_SC_R_SPI_3 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <60000000>;
@@ -145,8 +145,8 @@ dma_subsys: bus@5a000000 {
compatible = "fsl,imx8qxp-pwm", "fsl,imx27-pwm";
reg = <0x5a190000 0x1000>;
interrupts = <GIC_SPI 127 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&adma_pwm_lpcg 1>,
- <&adma_pwm_lpcg 0>;
+ clocks = <&adma_pwm_lpcg IMX_LPCG_CLK_4>,
+ <&adma_pwm_lpcg IMX_LPCG_CLK_0>;
clock-names = "ipg", "per";
assigned-clocks = <&clk IMX_SC_R_LCD_0_PWM_0 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <24000000>;
@@ -355,8 +355,8 @@ dma_subsys: bus@5a000000 {
reg = <0x5a880000 0x10000>;
interrupts = <GIC_SPI 240 IRQ_TYPE_LEVEL_HIGH>;
interrupt-parent = <&gic>;
- clocks = <&adc0_lpcg 0>,
- <&adc0_lpcg 1>;
+ clocks = <&adc0_lpcg IMX_LPCG_CLK_0>,
+ <&adc0_lpcg IMX_LPCG_CLK_4>;
clock-names = "per", "ipg";
assigned-clocks = <&clk IMX_SC_R_ADC_0 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <24000000>;
@@ -370,8 +370,8 @@ dma_subsys: bus@5a000000 {
reg = <0x5a890000 0x10000>;
interrupts = <GIC_SPI 241 IRQ_TYPE_LEVEL_HIGH>;
interrupt-parent = <&gic>;
- clocks = <&adc1_lpcg 0>,
- <&adc1_lpcg 1>;
+ clocks = <&adc1_lpcg IMX_LPCG_CLK_0>,
+ <&adc1_lpcg IMX_LPCG_CLK_4>;
clock-names = "per", "ipg";
assigned-clocks = <&clk IMX_SC_R_ADC_1 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <24000000>;
@@ -384,8 +384,8 @@ dma_subsys: bus@5a000000 {
reg = <0x5a8d0000 0x10000>;
interrupts = <GIC_SPI 235 IRQ_TYPE_LEVEL_HIGH>;
interrupt-parent = <&gic>;
- clocks = <&can0_lpcg 1>,
- <&can0_lpcg 0>;
+ clocks = <&can0_lpcg IMX_LPCG_CLK_4>,
+ <&can0_lpcg IMX_LPCG_CLK_0>;
clock-names = "ipg", "per";
assigned-clocks = <&clk IMX_SC_R_CAN_0 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <40000000>;
@@ -405,8 +405,8 @@ dma_subsys: bus@5a000000 {
* CAN1 shares CAN0's clock and to enable CAN0's clock it
* has to be powered on.
*/
- clocks = <&can0_lpcg 1>,
- <&can0_lpcg 0>;
+ clocks = <&can0_lpcg IMX_LPCG_CLK_4>,
+ <&can0_lpcg IMX_LPCG_CLK_0>;
clock-names = "ipg", "per";
assigned-clocks = <&clk IMX_SC_R_CAN_0 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <40000000>;
@@ -426,8 +426,8 @@ dma_subsys: bus@5a000000 {
* CAN2 shares CAN0's clock and to enable CAN0's clock it
* has to be powered on.
*/
- clocks = <&can0_lpcg 1>,
- <&can0_lpcg 0>;
+ clocks = <&can0_lpcg IMX_LPCG_CLK_4>,
+ <&can0_lpcg IMX_LPCG_CLK_0>;
clock-names = "ipg", "per";
assigned-clocks = <&clk IMX_SC_R_CAN_0 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <40000000>;
diff --git a/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi b/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi
index 7e510b21bbac55..764c1a08e3b118 100644
--- a/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi
@@ -25,8 +25,8 @@ lsio_subsys: bus@5d000000 {
compatible = "fsl,imx27-pwm";
reg = <0x5d000000 0x10000>;
clock-names = "ipg", "per";
- clocks = <&pwm0_lpcg 4>,
- <&pwm0_lpcg 1>;
+ clocks = <&pwm0_lpcg IMX_LPCG_CLK_6>,
+ <&pwm0_lpcg IMX_LPCG_CLK_1>;
assigned-clocks = <&clk IMX_SC_R_PWM_0 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <24000000>;
#pwm-cells = <3>;
@@ -38,8 +38,8 @@ lsio_subsys: bus@5d000000 {
compatible = "fsl,imx27-pwm";
reg = <0x5d010000 0x10000>;
clock-names = "ipg", "per";
- clocks = <&pwm1_lpcg 4>,
- <&pwm1_lpcg 1>;
+ clocks = <&pwm1_lpcg IMX_LPCG_CLK_6>,
+ <&pwm1_lpcg IMX_LPCG_CLK_1>;
assigned-clocks = <&clk IMX_SC_R_PWM_1 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <24000000>;
#pwm-cells = <3>;
@@ -51,8 +51,8 @@ lsio_subsys: bus@5d000000 {
compatible = "fsl,imx27-pwm";
reg = <0x5d020000 0x10000>;
clock-names = "ipg", "per";
- clocks = <&pwm2_lpcg 4>,
- <&pwm2_lpcg 1>;
+ clocks = <&pwm2_lpcg IMX_LPCG_CLK_6>,
+ <&pwm2_lpcg IMX_LPCG_CLK_1>;
assigned-clocks = <&clk IMX_SC_R_PWM_2 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <24000000>;
#pwm-cells = <3>;
@@ -64,8 +64,8 @@ lsio_subsys: bus@5d000000 {
compatible = "fsl,imx27-pwm";
reg = <0x5d030000 0x10000>;
clock-names = "ipg", "per";
- clocks = <&pwm3_lpcg 4>,
- <&pwm3_lpcg 1>;
+ clocks = <&pwm3_lpcg IMX_LPCG_CLK_6>,
+ <&pwm3_lpcg IMX_LPCG_CLK_1>;
assigned-clocks = <&clk IMX_SC_R_PWM_3 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <24000000>;
#pwm-cells = <3>;
diff --git a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi
index 41c79d2ebdd620..f24b14744799e1 100644
--- a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi
@@ -14,6 +14,7 @@
pinctrl-0 = <&pinctrl_usbcon1>;
type = "micro";
label = "otg";
+ vbus-supply = <&reg_usb1_vbus>;
id-gpios = <&gpio3 21 GPIO_ACTIVE_HIGH>;
port {
@@ -183,7 +184,6 @@
};
&usb3_phy0 {
- vbus-supply = <&reg_usb1_vbus>;
status = "okay";
};
diff --git a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi
index d5c400b355af56..f5491a608b2f37 100644
--- a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi
@@ -14,6 +14,7 @@
pinctrl-0 = <&pinctrl_usbcon1>;
type = "micro";
label = "otg";
+ vbus-supply = <&reg_usb1_vbus>;
id-gpios = <&gpio3 21 GPIO_ACTIVE_HIGH>;
port {
@@ -202,7 +203,6 @@
};
&usb3_phy0 {
- vbus-supply = <&reg_usb1_vbus>;
status = "okay";
};
diff --git a/arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi b/arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi
index 11626fae5f97f3..aa9f28c4431d02 100644
--- a/arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi
@@ -153,15 +153,15 @@
};
&flexcan2 {
- clocks = <&can1_lpcg 1>,
- <&can1_lpcg 0>;
+ clocks = <&can1_lpcg IMX_LPCG_CLK_4>,
+ <&can1_lpcg IMX_LPCG_CLK_0>;
assigned-clocks = <&clk IMX_SC_R_CAN_1 IMX_SC_PM_CLK_PER>;
fsl,clk-source = /bits/ 8 <1>;
};
&flexcan3 {
- clocks = <&can2_lpcg 1>,
- <&can2_lpcg 0>;
+ clocks = <&can2_lpcg IMX_LPCG_CLK_4>,
+ <&can2_lpcg IMX_LPCG_CLK_0>;
assigned-clocks = <&clk IMX_SC_R_CAN_2 IMX_SC_PM_CLK_PER>;
fsl,clk-source = /bits/ 8 <1>;
};
diff --git a/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi b/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi
index f3a6da8b289019..5260c63db0078b 100644
--- a/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi
+++ b/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi
@@ -944,6 +944,8 @@ ap_spi_fp: &spi10 {
vddrf-supply = <&pp1300_l2c>;
vddch0-supply = <&pp3300_l10c>;
max-speed = <3200000>;
+
+ qcom,local-bd-address-broken;
};
};
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index 3b0e8248e1a41a..a75de2665d8445 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -161,12 +161,18 @@ static inline unsigned long get_trans_granule(void)
#define MAX_TLBI_RANGE_PAGES __TLBI_RANGE_PAGES(31, 3)
/*
- * Generate 'num' values from -1 to 30 with -1 rejected by the
- * __flush_tlb_range() loop below.
+ * Generate 'num' values from -1 to 31 with -1 rejected by the
+ * __flush_tlb_range() loop below. Its return value is only
+ * significant for a maximum of MAX_TLBI_RANGE_PAGES pages. If
+ * 'pages' is more than that, you must iterate over the overall
+ * range.
*/
-#define TLBI_RANGE_MASK GENMASK_ULL(4, 0)
-#define __TLBI_RANGE_NUM(pages, scale) \
- ((((pages) >> (5 * (scale) + 1)) & TLBI_RANGE_MASK) - 1)
+#define __TLBI_RANGE_NUM(pages, scale) \
+ ({ \
+ int __pages = min((pages), \
+ __TLBI_RANGE_PAGES(31, (scale))); \
+ (__pages >> (5 * (scale) + 1)) - 1; \
+ })
/*
* TLB Invalidation
@@ -379,10 +385,6 @@ static inline void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
* 3. If there is 1 page remaining, flush it through non-range operations. Range
* operations can only span an even number of pages. We save this for last to
* ensure 64KB start alignment is maintained for the LPA2 case.
- *
- * Note that certain ranges can be represented by either num = 31 and
- * scale or num = 0 and scale + 1. The loop below favours the latter
- * since num is limited to 30 by the __TLBI_RANGE_NUM() macro.
*/
#define __flush_tlb_range_op(op, start, pages, stride, \
asid, tlb_level, tlbi_user, lpa2) \
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index ce08b744aaab22..cb68adcabe0789 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -289,8 +289,28 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL)
adr_l x1, __hyp_text_end
adr_l x2, dcache_clean_poc
blr x2
+
+ mov_q x0, INIT_SCTLR_EL2_MMU_OFF
+ pre_disable_mmu_workaround
+ msr sctlr_el2, x0
+ isb
0:
mov_q x0, HCR_HOST_NVHE_FLAGS
+
+ /*
+ * Compliant CPUs advertise their VHE-onlyness with
+ * ID_AA64MMFR4_EL1.E2H0 < 0. HCR_EL2.E2H can be
+ * RES1 in that case. Publish the E2H bit early so that
+ * it can be picked up by the init_el2_state macro.
+ *
+ * Fruity CPUs seem to have HCR_EL2.E2H set to RAO/WI, but
+ * don't advertise it (they predate this relaxation).
+ */
+ mrs_s x1, SYS_ID_AA64MMFR4_EL1
+ tbz x1, #(ID_AA64MMFR4_EL1_E2H0_SHIFT + ID_AA64MMFR4_EL1_E2H0_WIDTH - 1), 1f
+
+ orr x0, x0, #HCR_E2H
+1:
msr hcr_el2, x0
isb
@@ -303,30 +323,16 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL)
mov_q x1, INIT_SCTLR_EL1_MMU_OFF
- /*
- * Compliant CPUs advertise their VHE-onlyness with
- * ID_AA64MMFR4_EL1.E2H0 < 0. HCR_EL2.E2H can be
- * RES1 in that case.
- *
- * Fruity CPUs seem to have HCR_EL2.E2H set to RES1, but
- * don't advertise it (they predate this relaxation).
- */
- mrs_s x0, SYS_ID_AA64MMFR4_EL1
- ubfx x0, x0, #ID_AA64MMFR4_EL1_E2H0_SHIFT, #ID_AA64MMFR4_EL1_E2H0_WIDTH
- tbnz x0, #(ID_AA64MMFR4_EL1_E2H0_SHIFT + ID_AA64MMFR4_EL1_E2H0_WIDTH - 1), 1f
-
mrs x0, hcr_el2
and x0, x0, #HCR_E2H
cbz x0, 2f
-1:
+
/* Set a sane SCTLR_EL1, the VHE way */
- pre_disable_mmu_workaround
msr_s SYS_SCTLR_EL12, x1
mov x2, #BOOT_CPU_FLAG_E2H
b 3f
2:
- pre_disable_mmu_workaround
msr sctlr_el1, x1
mov x2, xzr
3:
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 162b030ab9da33..0d022599eb61b3 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -761,7 +761,6 @@ static void sve_init_header_from_task(struct user_sve_header *header,
{
unsigned int vq;
bool active;
- bool fpsimd_only;
enum vec_type task_type;
memset(header, 0, sizeof(*header));
@@ -777,12 +776,10 @@ static void sve_init_header_from_task(struct user_sve_header *header,
case ARM64_VEC_SVE:
if (test_tsk_thread_flag(target, TIF_SVE_VL_INHERIT))
header->flags |= SVE_PT_VL_INHERIT;
- fpsimd_only = !test_tsk_thread_flag(target, TIF_SVE);
break;
case ARM64_VEC_SME:
if (test_tsk_thread_flag(target, TIF_SME_VL_INHERIT))
header->flags |= SVE_PT_VL_INHERIT;
- fpsimd_only = false;
break;
default:
WARN_ON_ONCE(1);
@@ -790,7 +787,7 @@ static void sve_init_header_from_task(struct user_sve_header *header,
}
if (active) {
- if (fpsimd_only) {
+ if (target->thread.fp_type == FP_STATE_FPSIMD) {
header->flags |= SVE_PT_REGS_FPSIMD;
} else {
header->flags |= SVE_PT_REGS_SVE;
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 3dee5490eea94d..c4a0a35e02c728 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -2597,14 +2597,11 @@ static __init int kvm_arm_init(void)
if (err)
goto out_hyp;
- if (is_protected_kvm_enabled()) {
- kvm_info("Protected nVHE mode initialized successfully\n");
- } else if (in_hyp_mode) {
- kvm_info("VHE mode initialized successfully\n");
- } else {
- char mode = cpus_have_final_cap(ARM64_KVM_HVHE) ? 'h' : 'n';
- kvm_info("Hyp mode (%cVHE) initialized successfully\n", mode);
- }
+ kvm_info("%s%sVHE mode initialized successfully\n",
+ in_hyp_mode ? "" : (is_protected_kvm_enabled() ?
+ "Protected " : "Hyp "),
+ in_hyp_mode ? "" : (cpus_have_final_cap(ARM64_KVM_HVHE) ?
+ "h" : "n"));
/*
* FIXME: Do something reasonable if kvm_init() fails after pKVM
diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c
index a60fb13e21924f..2fc68da4036d90 100644
--- a/arch/arm64/kvm/hyp/nvhe/tlb.c
+++ b/arch/arm64/kvm/hyp/nvhe/tlb.c
@@ -154,7 +154,8 @@ void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
/* Switch to requested VMID */
__tlb_switch_to_guest(mmu, &cxt, false);
- __flush_s2_tlb_range_op(ipas2e1is, start, pages, stride, 0);
+ __flush_s2_tlb_range_op(ipas2e1is, start, pages, stride,
+ TLBI_TTL_UNKNOWN);
dsb(ish);
__tlbi(vmalle1is);
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 3fae5830f8d2c7..5a59ef88b646f0 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -528,7 +528,7 @@ static int hyp_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx,
kvm_clear_pte(ctx->ptep);
dsb(ishst);
- __tlbi_level(vae2is, __TLBI_VADDR(ctx->addr, 0), ctx->level);
+ __tlbi_level(vae2is, __TLBI_VADDR(ctx->addr, 0), TLBI_TTL_UNKNOWN);
} else {
if (ctx->end - ctx->addr < granule)
return -EINVAL;
@@ -843,12 +843,15 @@ static bool stage2_try_break_pte(const struct kvm_pgtable_visit_ctx *ctx,
* Perform the appropriate TLB invalidation based on the
* evicted pte value (if any).
*/
- if (kvm_pte_table(ctx->old, ctx->level))
- kvm_tlb_flush_vmid_range(mmu, ctx->addr,
- kvm_granule_size(ctx->level));
- else if (kvm_pte_valid(ctx->old))
+ if (kvm_pte_table(ctx->old, ctx->level)) {
+ u64 size = kvm_granule_size(ctx->level);
+ u64 addr = ALIGN_DOWN(ctx->addr, size);
+
+ kvm_tlb_flush_vmid_range(mmu, addr, size);
+ } else if (kvm_pte_valid(ctx->old)) {
kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu,
ctx->addr, ctx->level);
+ }
}
if (stage2_pte_is_counted(ctx->old))
@@ -896,9 +899,13 @@ static void stage2_unmap_put_pte(const struct kvm_pgtable_visit_ctx *ctx,
if (kvm_pte_valid(ctx->old)) {
kvm_clear_pte(ctx->ptep);
- if (!stage2_unmap_defer_tlb_flush(pgt))
- kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu,
- ctx->addr, ctx->level);
+ if (kvm_pte_table(ctx->old, ctx->level)) {
+ kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr,
+ TLBI_TTL_UNKNOWN);
+ } else if (!stage2_unmap_defer_tlb_flush(pgt)) {
+ kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr,
+ ctx->level);
+ }
}
mm_ops->put_page(ctx->ptep);
diff --git a/arch/arm64/kvm/hyp/vhe/tlb.c b/arch/arm64/kvm/hyp/vhe/tlb.c
index b32e2940df7dc8..1a60b95381e8e9 100644
--- a/arch/arm64/kvm/hyp/vhe/tlb.c
+++ b/arch/arm64/kvm/hyp/vhe/tlb.c
@@ -171,7 +171,8 @@ void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
/* Switch to requested VMID */
__tlb_switch_to_guest(mmu, &cxt);
- __flush_s2_tlb_range_op(ipas2e1is, start, pages, stride, 0);
+ __flush_s2_tlb_range_op(ipas2e1is, start, pages, stride,
+ TLBI_TTL_UNKNOWN);
dsb(ish);
__tlbi(vmalle1is);
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 18680771cdb0ea..dc04bc7678659a 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -1637,7 +1637,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
is_iabt = kvm_vcpu_trap_is_iabt(vcpu);
- if (esr_fsc_is_permission_fault(esr)) {
+ if (esr_fsc_is_translation_fault(esr)) {
/* Beyond sanitised PARange (which is the IPA limit) */
if (fault_ipa >= BIT_ULL(get_kvm_ipa_limit())) {
kvm_inject_size_fault(vcpu);
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index 0f0e10bb0a9540..b872b003a55f3d 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -276,7 +276,10 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
pte_t *ptep = NULL;
pgdp = pgd_offset(mm, addr);
- p4dp = p4d_offset(pgdp, addr);
+ p4dp = p4d_alloc(mm, pgdp, addr);
+ if (!p4dp)
+ return NULL;
+
pudp = pud_alloc(mm, p4dp, addr);
if (!pudp)
return NULL;
diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c
index 0c4e3ecf989d43..0e270a1c51e645 100644
--- a/arch/arm64/mm/pageattr.c
+++ b/arch/arm64/mm/pageattr.c
@@ -219,9 +219,6 @@ bool kernel_page_present(struct page *page)
pte_t *ptep;
unsigned long addr = (unsigned long)page_address(page);
- if (!can_set_direct_map())
- return true;
-
pgdp = pgd_offset_k(addr);
if (pgd_none(READ_ONCE(*pgdp)))
return false;
diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index a5f300ec6f2808..54ad04dacdee94 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -595,7 +595,7 @@ config ARCH_SELECTS_CRASH_DUMP
select RELOCATABLE
config ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION
- def_bool CRASH_CORE
+ def_bool CRASH_RESERVE
config RELOCATABLE
bool "Relocatable kernel"
diff --git a/arch/loongarch/boot/dts/loongson-2k1000.dtsi b/arch/loongarch/boot/dts/loongson-2k1000.dtsi
index 49a70f8c3cab22..b6aeb1f70e2a03 100644
--- a/arch/loongarch/boot/dts/loongson-2k1000.dtsi
+++ b/arch/loongarch/boot/dts/loongson-2k1000.dtsi
@@ -100,6 +100,13 @@
#size-cells = <2>;
dma-coherent;
+ isa@18000000 {
+ compatible = "isa";
+ #size-cells = <1>;
+ #address-cells = <2>;
+ ranges = <1 0x0 0x0 0x18000000 0x4000>;
+ };
+
liointc0: interrupt-controller@1fe01400 {
compatible = "loongson,liointc-2.0";
reg = <0x0 0x1fe01400 0x0 0x40>,
diff --git a/arch/loongarch/boot/dts/loongson-2k2000-ref.dts b/arch/loongarch/boot/dts/loongson-2k2000-ref.dts
index dca91caf895e3c..74b99bd234cc38 100644
--- a/arch/loongarch/boot/dts/loongson-2k2000-ref.dts
+++ b/arch/loongarch/boot/dts/loongson-2k2000-ref.dts
@@ -61,12 +61,45 @@
&gmac0 {
status = "okay";
+
+ phy-mode = "gmii";
+ phy-handle = <&phy0>;
+ mdio {
+ compatible = "snps,dwmac-mdio";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ phy0: ethernet-phy@0 {
+ reg = <2>;
+ };
+ };
};
&gmac1 {
status = "okay";
+
+ phy-mode = "gmii";
+ phy-handle = <&phy1>;
+ mdio {
+ compatible = "snps,dwmac-mdio";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ phy1: ethernet-phy@1 {
+ reg = <2>;
+ };
+ };
};
&gmac2 {
status = "okay";
+
+ phy-mode = "rgmii";
+ phy-handle = <&phy2>;
+ mdio {
+ compatible = "snps,dwmac-mdio";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ phy2: ethernet-phy@2 {
+ reg = <0>;
+ };
+ };
};
diff --git a/arch/loongarch/boot/dts/loongson-2k2000.dtsi b/arch/loongarch/boot/dts/loongson-2k2000.dtsi
index a231949b5f553a..9eab2d02cbe8bf 100644
--- a/arch/loongarch/boot/dts/loongson-2k2000.dtsi
+++ b/arch/loongarch/boot/dts/loongson-2k2000.dtsi
@@ -51,6 +51,13 @@
#address-cells = <2>;
#size-cells = <2>;
+ isa@18400000 {
+ compatible = "isa";
+ #size-cells = <1>;
+ #address-cells = <2>;
+ ranges = <1 0x0 0x0 0x18400000 0x4000>;
+ };
+
pmc: power-management@100d0000 {
compatible = "loongson,ls2k2000-pmc", "loongson,ls2k0500-pmc", "syscon";
reg = <0x0 0x100d0000 0x0 0x58>;
@@ -109,6 +116,8 @@
msi: msi-controller@1fe01140 {
compatible = "loongson,pch-msi-1.0";
reg = <0x0 0x1fe01140 0x0 0x8>;
+ interrupt-controller;
+ #interrupt-cells = <1>;
msi-controller;
loongson,msi-base-vec = <64>;
loongson,msi-num-vecs = <192>;
@@ -140,27 +149,34 @@
#address-cells = <3>;
#size-cells = <2>;
device_type = "pci";
+ msi-parent = <&msi>;
bus-range = <0x0 0xff>;
- ranges = <0x01000000 0x0 0x00008000 0x0 0x18400000 0x0 0x00008000>,
+ ranges = <0x01000000 0x0 0x00008000 0x0 0x18408000 0x0 0x00008000>,
<0x02000000 0x0 0x60000000 0x0 0x60000000 0x0 0x20000000>;
gmac0: ethernet@3,0 {
reg = <0x1800 0x0 0x0 0x0 0x0>;
- interrupts = <12 IRQ_TYPE_LEVEL_HIGH>;
+ interrupts = <12 IRQ_TYPE_LEVEL_HIGH>,
+ <13 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "macirq", "eth_lpi";
interrupt-parent = <&pic>;
status = "disabled";
};
gmac1: ethernet@3,1 {
reg = <0x1900 0x0 0x0 0x0 0x0>;
- interrupts = <14 IRQ_TYPE_LEVEL_HIGH>;
+ interrupts = <14 IRQ_TYPE_LEVEL_HIGH>,
+ <15 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "macirq", "eth_lpi";
interrupt-parent = <&pic>;
status = "disabled";
};
gmac2: ethernet@3,2 {
reg = <0x1a00 0x0 0x0 0x0 0x0>;
- interrupts = <17 IRQ_TYPE_LEVEL_HIGH>;
+ interrupts = <17 IRQ_TYPE_LEVEL_HIGH>,
+ <18 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "macirq", "eth_lpi";
interrupt-parent = <&pic>;
status = "disabled";
};
diff --git a/arch/loongarch/include/asm/addrspace.h b/arch/loongarch/include/asm/addrspace.h
index b24437e28c6eda..7bd47d65bf7a04 100644
--- a/arch/loongarch/include/asm/addrspace.h
+++ b/arch/loongarch/include/asm/addrspace.h
@@ -11,6 +11,7 @@
#define _ASM_ADDRSPACE_H
#include <linux/const.h>
+#include <linux/sizes.h>
#include <asm/loongarch.h>
diff --git a/arch/loongarch/include/asm/crash_core.h b/arch/loongarch/include/asm/crash_reserve.h
index 218bdbfa527ba8..a1d9b84b1c7d51 100644
--- a/arch/loongarch/include/asm/crash_core.h
+++ b/arch/loongarch/include/asm/crash_reserve.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0-only */
-#ifndef _LOONGARCH_CRASH_CORE_H
-#define _LOONGARCH_CRASH_CORE_H
+#ifndef _LOONGARCH_CRASH_RESERVE_H
+#define _LOONGARCH_CRASH_RESERVE_H
#define CRASH_ALIGN SZ_2M
diff --git a/arch/loongarch/include/asm/io.h b/arch/loongarch/include/asm/io.h
index 4a8adcca329b81..c2f9979b2979e5 100644
--- a/arch/loongarch/include/asm/io.h
+++ b/arch/loongarch/include/asm/io.h
@@ -14,11 +14,6 @@
#include <asm/pgtable-bits.h>
#include <asm/string.h>
-/*
- * Change "struct page" to physical address.
- */
-#define page_to_phys(page) ((phys_addr_t)page_to_pfn(page) << PAGE_SHIFT)
-
extern void __init __iomem *early_ioremap(u64 phys_addr, unsigned long size);
extern void __init early_iounmap(void __iomem *addr, unsigned long size);
@@ -73,6 +68,21 @@ extern void __memcpy_fromio(void *to, const volatile void __iomem *from, size_t
#define __io_aw() mmiowb()
+#ifdef CONFIG_KFENCE
+#define virt_to_phys(kaddr) \
+({ \
+ (likely((unsigned long)kaddr < vm_map_base)) ? __pa((unsigned long)kaddr) : \
+ page_to_phys(tlb_virt_to_page((unsigned long)kaddr)) + offset_in_page((unsigned long)kaddr);\
+})
+
+#define phys_to_virt(paddr) \
+({ \
+ extern char *__kfence_pool; \
+ (unlikely(__kfence_pool == NULL)) ? __va((unsigned long)paddr) : \
+ page_address(phys_to_page((unsigned long)paddr)) + offset_in_page((unsigned long)paddr);\
+})
+#endif
+
#include <asm-generic/io.h>
#define ARCH_HAS_VALID_PHYS_ADDR_RANGE
diff --git a/arch/loongarch/include/asm/kfence.h b/arch/loongarch/include/asm/kfence.h
index 6c82aea1c99398..a6a5760da3a332 100644
--- a/arch/loongarch/include/asm/kfence.h
+++ b/arch/loongarch/include/asm/kfence.h
@@ -16,6 +16,7 @@
static inline bool arch_kfence_init_pool(void)
{
int err;
+ char *kaddr, *vaddr;
char *kfence_pool = __kfence_pool;
struct vm_struct *area;
@@ -35,6 +36,14 @@ static inline bool arch_kfence_init_pool(void)
return false;
}
+ kaddr = kfence_pool;
+ vaddr = __kfence_pool;
+ while (kaddr < kfence_pool + KFENCE_POOL_SIZE) {
+ set_page_address(virt_to_page(kaddr), vaddr);
+ kaddr += PAGE_SIZE;
+ vaddr += PAGE_SIZE;
+ }
+
return true;
}
diff --git a/arch/loongarch/include/asm/page.h b/arch/loongarch/include/asm/page.h
index 44027060c54a28..e85df33f11c772 100644
--- a/arch/loongarch/include/asm/page.h
+++ b/arch/loongarch/include/asm/page.h
@@ -78,7 +78,26 @@ typedef struct { unsigned long pgprot; } pgprot_t;
struct page *dmw_virt_to_page(unsigned long kaddr);
struct page *tlb_virt_to_page(unsigned long kaddr);
-#define virt_to_pfn(kaddr) PFN_DOWN(PHYSADDR(kaddr))
+#define pfn_to_phys(pfn) __pfn_to_phys(pfn)
+#define phys_to_pfn(paddr) __phys_to_pfn(paddr)
+
+#define page_to_phys(page) pfn_to_phys(page_to_pfn(page))
+#define phys_to_page(paddr) pfn_to_page(phys_to_pfn(paddr))
+
+#ifndef CONFIG_KFENCE
+
+#define page_to_virt(page) __va(page_to_phys(page))
+#define virt_to_page(kaddr) phys_to_page(__pa(kaddr))
+
+#else
+
+#define WANT_PAGE_VIRTUAL
+
+#define page_to_virt(page) \
+({ \
+ extern char *__kfence_pool; \
+ (__kfence_pool == NULL) ? __va(page_to_phys(page)) : page_address(page); \
+})
#define virt_to_page(kaddr) \
({ \
@@ -86,6 +105,11 @@ struct page *tlb_virt_to_page(unsigned long kaddr);
dmw_virt_to_page((unsigned long)kaddr) : tlb_virt_to_page((unsigned long)kaddr);\
})
+#endif
+
+#define pfn_to_virt(pfn) page_to_virt(pfn_to_page(pfn))
+#define virt_to_pfn(kaddr) page_to_pfn(virt_to_page(kaddr))
+
extern int __virt_addr_valid(volatile void *kaddr);
#define virt_addr_valid(kaddr) __virt_addr_valid((volatile void *)(kaddr))
diff --git a/arch/loongarch/include/asm/perf_event.h b/arch/loongarch/include/asm/perf_event.h
index 2a35a0bc2aaabf..52b638059e40b3 100644
--- a/arch/loongarch/include/asm/perf_event.h
+++ b/arch/loongarch/include/asm/perf_event.h
@@ -7,6 +7,14 @@
#ifndef __LOONGARCH_PERF_EVENT_H__
#define __LOONGARCH_PERF_EVENT_H__
+#include <asm/ptrace.h>
+
#define perf_arch_bpf_user_pt_regs(regs) (struct user_pt_regs *)regs
+#define perf_arch_fetch_caller_regs(regs, __ip) { \
+ (regs)->csr_era = (__ip); \
+ (regs)->regs[3] = current_stack_pointer; \
+ (regs)->regs[22] = (unsigned long) __builtin_frame_address(0); \
+}
+
#endif /* __LOONGARCH_PERF_EVENT_H__ */
diff --git a/arch/loongarch/include/asm/tlb.h b/arch/loongarch/include/asm/tlb.h
index da7a3b5b9374ae..e071f5e9e85802 100644
--- a/arch/loongarch/include/asm/tlb.h
+++ b/arch/loongarch/include/asm/tlb.h
@@ -132,8 +132,6 @@ static __always_inline void invtlb_all(u32 op, u32 info, u64 addr)
);
}
-#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
-
static void tlb_flush(struct mmu_gather *tlb);
#define tlb_flush tlb_flush
diff --git a/arch/loongarch/kernel/perf_event.c b/arch/loongarch/kernel/perf_event.c
index 0491bf453cd496..cac7cba81b65f7 100644
--- a/arch/loongarch/kernel/perf_event.c
+++ b/arch/loongarch/kernel/perf_event.c
@@ -884,4 +884,4 @@ static int __init init_hw_perf_events(void)
return 0;
}
-early_initcall(init_hw_perf_events);
+pure_initcall(init_hw_perf_events);
diff --git a/arch/loongarch/mm/fault.c b/arch/loongarch/mm/fault.c
index 1fc2f6813ea027..97b40defde0608 100644
--- a/arch/loongarch/mm/fault.c
+++ b/arch/loongarch/mm/fault.c
@@ -202,10 +202,10 @@ good_area:
if (!(vma->vm_flags & VM_WRITE))
goto bad_area;
} else {
- if (!(vma->vm_flags & VM_READ) && address != exception_era(regs))
- goto bad_area;
if (!(vma->vm_flags & VM_EXEC) && address == exception_era(regs))
goto bad_area;
+ if (!(vma->vm_flags & (VM_READ | VM_WRITE)) && address != exception_era(regs))
+ goto bad_area;
}
/*
diff --git a/arch/loongarch/mm/mmap.c b/arch/loongarch/mm/mmap.c
index a9630a81b38abb..89af7c12e8c08d 100644
--- a/arch/loongarch/mm/mmap.c
+++ b/arch/loongarch/mm/mmap.c
@@ -4,6 +4,7 @@
*/
#include <linux/export.h>
#include <linux/io.h>
+#include <linux/kfence.h>
#include <linux/memblock.h>
#include <linux/mm.h>
#include <linux/mman.h>
@@ -111,6 +112,9 @@ int __virt_addr_valid(volatile void *kaddr)
{
unsigned long vaddr = (unsigned long)kaddr;
+ if (is_kfence_address((void *)kaddr))
+ return 1;
+
if ((vaddr < PAGE_OFFSET) || (vaddr >= vm_map_base))
return 0;
diff --git a/arch/loongarch/mm/pgtable.c b/arch/loongarch/mm/pgtable.c
index 2aae72e638713a..bda018150000e6 100644
--- a/arch/loongarch/mm/pgtable.c
+++ b/arch/loongarch/mm/pgtable.c
@@ -11,13 +11,13 @@
struct page *dmw_virt_to_page(unsigned long kaddr)
{
- return pfn_to_page(virt_to_pfn(kaddr));
+ return phys_to_page(__pa(kaddr));
}
EXPORT_SYMBOL(dmw_virt_to_page);
struct page *tlb_virt_to_page(unsigned long kaddr)
{
- return pfn_to_page(pte_pfn(*virt_to_kpte(kaddr)));
+ return phys_to_page(pfn_to_phys(pte_pfn(*virt_to_kpte(kaddr))));
}
EXPORT_SYMBOL(tlb_virt_to_page);
diff --git a/arch/mips/include/asm/ptrace.h b/arch/mips/include/asm/ptrace.h
index d14d0e37ad02dd..4a2b40ce39e091 100644
--- a/arch/mips/include/asm/ptrace.h
+++ b/arch/mips/include/asm/ptrace.h
@@ -159,7 +159,7 @@ extern unsigned long exception_ip(struct pt_regs *regs);
#define exception_ip(regs) exception_ip(regs)
#define profile_pc(regs) instruction_pointer(regs)
-extern asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall);
+extern asmlinkage long syscall_trace_enter(struct pt_regs *regs);
extern asmlinkage void syscall_trace_leave(struct pt_regs *regs);
extern void die(const char *, struct pt_regs *) __noreturn;
diff --git a/arch/mips/kernel/asm-offsets.c b/arch/mips/kernel/asm-offsets.c
index d1b11f66f748f0..cb1045ebab0621 100644
--- a/arch/mips/kernel/asm-offsets.c
+++ b/arch/mips/kernel/asm-offsets.c
@@ -101,6 +101,7 @@ void output_thread_info_defines(void)
OFFSET(TI_CPU, thread_info, cpu);
OFFSET(TI_PRE_COUNT, thread_info, preempt_count);
OFFSET(TI_REGS, thread_info, regs);
+ OFFSET(TI_SYSCALL, thread_info, syscall);
DEFINE(_THREAD_SIZE, THREAD_SIZE);
DEFINE(_THREAD_MASK, THREAD_MASK);
DEFINE(_IRQ_STACK_SIZE, IRQ_STACK_SIZE);
diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index 59288c13b581b8..61503a36067e9e 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -1317,16 +1317,13 @@ long arch_ptrace(struct task_struct *child, long request,
* Notification of system call entry/exit
* - triggered by current->work.syscall_trace
*/
-asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall)
+asmlinkage long syscall_trace_enter(struct pt_regs *regs)
{
user_exit();
- current_thread_info()->syscall = syscall;
-
if (test_thread_flag(TIF_SYSCALL_TRACE)) {
if (ptrace_report_syscall_entry(regs))
return -1;
- syscall = current_thread_info()->syscall;
}
#ifdef CONFIG_SECCOMP
@@ -1335,7 +1332,7 @@ asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall)
struct seccomp_data sd;
unsigned long args[6];
- sd.nr = syscall;
+ sd.nr = current_thread_info()->syscall;
sd.arch = syscall_get_arch(current);
syscall_get_arguments(current, regs, args);
for (i = 0; i < 6; i++)
@@ -1345,23 +1342,23 @@ asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall)
ret = __secure_computing(&sd);
if (ret == -1)
return ret;
- syscall = current_thread_info()->syscall;
}
#endif
if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
trace_sys_enter(regs, regs->regs[2]);
- audit_syscall_entry(syscall, regs->regs[4], regs->regs[5],
+ audit_syscall_entry(current_thread_info()->syscall,
+ regs->regs[4], regs->regs[5],
regs->regs[6], regs->regs[7]);
/*
* Negative syscall numbers are mistaken for rejected syscalls, but
* won't have had the return value set appropriately, so we do so now.
*/
- if (syscall < 0)
+ if (current_thread_info()->syscall < 0)
syscall_set_return_value(current, regs, -ENOSYS, 0);
- return syscall;
+ return current_thread_info()->syscall;
}
/*
diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S
index 18dc9b34505614..2c604717e63080 100644
--- a/arch/mips/kernel/scall32-o32.S
+++ b/arch/mips/kernel/scall32-o32.S
@@ -77,6 +77,18 @@ loads_done:
PTR_WD load_a7, bad_stack_a7
.previous
+ /*
+ * syscall number is in v0 unless we called syscall(__NR_###)
+ * where the real syscall number is in a0
+ */
+ subu t2, v0, __NR_O32_Linux
+ bnez t2, 1f /* __NR_syscall at offset 0 */
+ LONG_S a0, TI_SYSCALL($28) # Save a0 as syscall number
+ b 2f
+1:
+ LONG_S v0, TI_SYSCALL($28) # Save v0 as syscall number
+2:
+
lw t0, TI_FLAGS($28) # syscall tracing enabled?
li t1, _TIF_WORK_SYSCALL_ENTRY
and t0, t1
@@ -114,16 +126,7 @@ syscall_trace_entry:
SAVE_STATIC
move a0, sp
- /*
- * syscall number is in v0 unless we called syscall(__NR_###)
- * where the real syscall number is in a0
- */
- move a1, v0
- subu t2, v0, __NR_O32_Linux
- bnez t2, 1f /* __NR_syscall at offset 0 */
- lw a1, PT_R4(sp)
-
-1: jal syscall_trace_enter
+ jal syscall_trace_enter
bltz v0, 1f # seccomp failed? Skip syscall
diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
index 97456b2ca7dc32..97788859238c34 100644
--- a/arch/mips/kernel/scall64-n32.S
+++ b/arch/mips/kernel/scall64-n32.S
@@ -44,6 +44,8 @@ NESTED(handle_sysn32, PT_SIZE, sp)
sd a3, PT_R26(sp) # save a3 for syscall restarting
+ LONG_S v0, TI_SYSCALL($28) # Store syscall number
+
li t1, _TIF_WORK_SYSCALL_ENTRY
LONG_L t0, TI_FLAGS($28) # syscall tracing enabled?
and t0, t1, t0
@@ -72,7 +74,6 @@ syscall_common:
n32_syscall_trace_entry:
SAVE_STATIC
move a0, sp
- move a1, v0
jal syscall_trace_enter
bltz v0, 1f # seccomp failed? Skip syscall
diff --git a/arch/mips/kernel/scall64-n64.S b/arch/mips/kernel/scall64-n64.S
index e6264aa62e457f..be11ea5cc67e04 100644
--- a/arch/mips/kernel/scall64-n64.S
+++ b/arch/mips/kernel/scall64-n64.S
@@ -46,6 +46,8 @@ NESTED(handle_sys64, PT_SIZE, sp)
sd a3, PT_R26(sp) # save a3 for syscall restarting
+ LONG_S v0, TI_SYSCALL($28) # Store syscall number
+
li t1, _TIF_WORK_SYSCALL_ENTRY
LONG_L t0, TI_FLAGS($28) # syscall tracing enabled?
and t0, t1, t0
@@ -82,7 +84,6 @@ n64_syscall_exit:
syscall_trace_entry:
SAVE_STATIC
move a0, sp
- move a1, v0
jal syscall_trace_enter
bltz v0, 1f # seccomp failed? Skip syscall
diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
index d3c2616cba2269..7a5abb73e53127 100644
--- a/arch/mips/kernel/scall64-o32.S
+++ b/arch/mips/kernel/scall64-o32.S
@@ -79,6 +79,22 @@ loads_done:
PTR_WD load_a7, bad_stack_a7
.previous
+ /*
+ * absolute syscall number is in v0 unless we called syscall(__NR_###)
+ * where the real syscall number is in a0
+ * note: NR_syscall is the first O32 syscall but the macro is
+ * only defined when compiling with -mabi=32 (CONFIG_32BIT)
+ * therefore __NR_O32_Linux is used (4000)
+ */
+
+ subu t2, v0, __NR_O32_Linux
+ bnez t2, 1f /* __NR_syscall at offset 0 */
+ LONG_S a0, TI_SYSCALL($28) # Save a0 as syscall number
+ b 2f
+1:
+ LONG_S v0, TI_SYSCALL($28) # Save v0 as syscall number
+2:
+
li t1, _TIF_WORK_SYSCALL_ENTRY
LONG_L t0, TI_FLAGS($28) # syscall tracing enabled?
and t0, t1, t0
@@ -113,22 +129,7 @@ trace_a_syscall:
sd a7, PT_R11(sp) # For indirect syscalls
move a0, sp
- /*
- * absolute syscall number is in v0 unless we called syscall(__NR_###)
- * where the real syscall number is in a0
- * note: NR_syscall is the first O32 syscall but the macro is
- * only defined when compiling with -mabi=32 (CONFIG_32BIT)
- * therefore __NR_O32_Linux is used (4000)
- */
- .set push
- .set reorder
- subu t1, v0, __NR_O32_Linux
- move a1, v0
- bnez t1, 1f /* __NR_syscall at offset 0 */
- ld a1, PT_R4(sp) /* Arg1 for __NR_syscall case */
- .set pop
-
-1: jal syscall_trace_enter
+ jal syscall_trace_enter
bltz v0, 1f # seccomp failed? Skip syscall
diff --git a/arch/nios2/kernel/prom.c b/arch/nios2/kernel/prom.c
index 8d98af5c7201bb..9a8393e6b4a85e 100644
--- a/arch/nios2/kernel/prom.c
+++ b/arch/nios2/kernel/prom.c
@@ -21,7 +21,8 @@
void __init early_init_devtree(void *params)
{
- __be32 *dtb = (u32 *)__dtb_start;
+ __be32 __maybe_unused *dtb = (u32 *)__dtb_start;
+
#if defined(CONFIG_NIOS2_DTB_AT_PHYS_ADDR)
if (be32_to_cpup((__be32 *)CONFIG_NIOS2_DTB_PHYS_ADDR) ==
OF_DT_HEADER) {
@@ -30,8 +31,11 @@ void __init early_init_devtree(void *params)
return;
}
#endif
+
+#ifdef CONFIG_NIOS2_DTB_SOURCE_BOOL
if (be32_to_cpu((__be32) *dtb) == OF_DT_HEADER)
params = (void *)__dtb_start;
+#endif
early_init_dt_scan(params);
}
diff --git a/arch/powerpc/crypto/chacha-p10-glue.c b/arch/powerpc/crypto/chacha-p10-glue.c
index 74fb86b0d2097c..7c728755852e1a 100644
--- a/arch/powerpc/crypto/chacha-p10-glue.c
+++ b/arch/powerpc/crypto/chacha-p10-glue.c
@@ -197,6 +197,9 @@ static struct skcipher_alg algs[] = {
static int __init chacha_p10_init(void)
{
+ if (!cpu_has_feature(CPU_FTR_ARCH_31))
+ return 0;
+
static_branch_enable(&have_p10);
return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
@@ -204,10 +207,13 @@ static int __init chacha_p10_init(void)
static void __exit chacha_p10_exit(void)
{
+ if (!static_branch_likely(&have_p10))
+ return;
+
crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
}
-module_cpu_feature_match(PPC_MODULE_FEATURE_P10, chacha_p10_init);
+module_init(chacha_p10_init);
module_exit(chacha_p10_exit);
MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (P10 accelerated)");
diff --git a/arch/powerpc/include/asm/vdso/gettimeofday.h b/arch/powerpc/include/asm/vdso/gettimeofday.h
index f0a4cf01e85c03..78302f6c258006 100644
--- a/arch/powerpc/include/asm/vdso/gettimeofday.h
+++ b/arch/powerpc/include/asm/vdso/gettimeofday.h
@@ -4,7 +4,6 @@
#ifndef __ASSEMBLY__
-#include <asm/page.h>
#include <asm/vdso/timebase.h>
#include <asm/barrier.h>
#include <asm/unistd.h>
@@ -95,7 +94,7 @@ const struct vdso_data *__arch_get_vdso_data(void);
static __always_inline
const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd)
{
- return (void *)vd + PAGE_SIZE;
+ return (void *)vd + (1U << CONFIG_PAGE_SHIFT);
}
#endif
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 1185efebf032b6..29a8c8e185851b 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -1285,15 +1285,14 @@ spapr_tce_platform_iommu_attach_dev(struct iommu_domain *platform_domain,
struct device *dev)
{
struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
- struct iommu_group *grp = iommu_group_get(dev);
struct iommu_table_group *table_group;
+ struct iommu_group *grp;
/* At first attach the ownership is already set */
- if (!domain) {
- iommu_group_put(grp);
+ if (!domain)
return 0;
- }
+ grp = iommu_group_get(dev);
table_group = iommu_group_get_iommudata(grp);
/*
* The domain being set to PLATFORM from earlier
diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index 252d63942f34eb..5b3115a1985226 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -151,7 +151,7 @@ endif
endif
vdso-install-y += arch/riscv/kernel/vdso/vdso.so.dbg
-vdso-install-$(CONFIG_COMPAT) += arch/riscv/kernel/compat_vdso/compat_vdso.so.dbg:../compat_vdso/compat_vdso.so
+vdso-install-$(CONFIG_COMPAT) += arch/riscv/kernel/compat_vdso/compat_vdso.so.dbg
ifneq ($(CONFIG_XIP_KERNEL),y)
ifeq ($(CONFIG_RISCV_M_MODE)$(CONFIG_ARCH_CANAAN),yy)
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 97fcde30e2477d..9f8ea0e33eb104 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -593,6 +593,12 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
return ptep_test_and_clear_young(vma, address, ptep);
}
+#define pgprot_nx pgprot_nx
+static inline pgprot_t pgprot_nx(pgprot_t _prot)
+{
+ return __pgprot(pgprot_val(_prot) & ~_PAGE_EXEC);
+}
+
#define pgprot_noncached pgprot_noncached
static inline pgprot_t pgprot_noncached(pgprot_t _prot)
{
diff --git a/arch/riscv/include/asm/syscall_wrapper.h b/arch/riscv/include/asm/syscall_wrapper.h
index 980094c2e9761d..ac80216549ffa6 100644
--- a/arch/riscv/include/asm/syscall_wrapper.h
+++ b/arch/riscv/include/asm/syscall_wrapper.h
@@ -36,7 +36,8 @@ asmlinkage long __riscv_sys_ni_syscall(const struct pt_regs *);
ulong) \
__attribute__((alias(__stringify(___se_##prefix##name)))); \
__diag_pop(); \
- static long noinline ___se_##prefix##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \
+ static long noinline ___se_##prefix##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \
+ __used; \
static long ___se_##prefix##name(__MAP(x,__SC_LONG,__VA_ARGS__))
#define SC_RISCV_REGS_TO_ARGS(x, ...) \
diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h
index ec0cab9fbddd0d..72ec1d9bd3f312 100644
--- a/arch/riscv/include/asm/uaccess.h
+++ b/arch/riscv/include/asm/uaccess.h
@@ -319,7 +319,7 @@ unsigned long __must_check clear_user(void __user *to, unsigned long n)
#define __get_kernel_nofault(dst, src, type, err_label) \
do { \
- long __kr_err; \
+ long __kr_err = 0; \
\
__get_user_nocheck(*((type *)(dst)), (type *)(src), __kr_err); \
if (unlikely(__kr_err)) \
@@ -328,7 +328,7 @@ do { \
#define __put_kernel_nofault(dst, src, type, err_label) \
do { \
- long __kr_err; \
+ long __kr_err = 0; \
\
__put_user_nocheck(*((type *)(src)), (type *)(dst), __kr_err); \
if (unlikely(__kr_err)) \
diff --git a/arch/riscv/include/uapi/asm/auxvec.h b/arch/riscv/include/uapi/asm/auxvec.h
index 10aaa83db89ef7..95050ebe9ad00b 100644
--- a/arch/riscv/include/uapi/asm/auxvec.h
+++ b/arch/riscv/include/uapi/asm/auxvec.h
@@ -34,7 +34,7 @@
#define AT_L3_CACHEGEOMETRY 47
/* entries in ARCH_DLINFO */
-#define AT_VECTOR_SIZE_ARCH 9
+#define AT_VECTOR_SIZE_ARCH 10
#define AT_MINSIGSTKSZ 51
#endif /* _UAPI_ASM_RISCV_AUXVEC_H */
diff --git a/arch/riscv/kernel/compat_vdso/Makefile b/arch/riscv/kernel/compat_vdso/Makefile
index 62fa393b2eb2ea..3df4cb788c1fa4 100644
--- a/arch/riscv/kernel/compat_vdso/Makefile
+++ b/arch/riscv/kernel/compat_vdso/Makefile
@@ -74,5 +74,5 @@ quiet_cmd_compat_vdsold = VDSOLD $@
rm $@.tmp
# actual build commands
-quiet_cmd_compat_vdsoas = VDSOAS $@
+quiet_cmd_compat_vdsoas = VDSOAS $@
cmd_compat_vdsoas = $(COMPAT_CC) $(a_flags) $(COMPAT_CC_FLAGS) -c -o $@ $<
diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c
index 37e87fdcf6a000..30e12b310cab73 100644
--- a/arch/riscv/kernel/patch.c
+++ b/arch/riscv/kernel/patch.c
@@ -80,6 +80,8 @@ static int __patch_insn_set(void *addr, u8 c, size_t len)
*/
lockdep_assert_held(&text_mutex);
+ preempt_disable();
+
if (across_pages)
patch_map(addr + PAGE_SIZE, FIX_TEXT_POKE1);
@@ -92,6 +94,8 @@ static int __patch_insn_set(void *addr, u8 c, size_t len)
if (across_pages)
patch_unmap(FIX_TEXT_POKE1);
+ preempt_enable();
+
return 0;
}
NOKPROBE_SYMBOL(__patch_insn_set);
@@ -122,6 +126,8 @@ static int __patch_insn_write(void *addr, const void *insn, size_t len)
if (!riscv_patch_in_stop_machine)
lockdep_assert_held(&text_mutex);
+ preempt_disable();
+
if (across_pages)
patch_map(addr + PAGE_SIZE, FIX_TEXT_POKE1);
@@ -134,6 +140,8 @@ static int __patch_insn_write(void *addr, const void *insn, size_t len)
if (across_pages)
patch_unmap(FIX_TEXT_POKE1);
+ preempt_enable();
+
return ret;
}
NOKPROBE_SYMBOL(__patch_insn_write);
diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c
index 92922dbd5b5c1f..e4bc61c4e58af9 100644
--- a/arch/riscv/kernel/process.c
+++ b/arch/riscv/kernel/process.c
@@ -27,8 +27,6 @@
#include <asm/vector.h>
#include <asm/cpufeature.h>
-register unsigned long gp_in_global __asm__("gp");
-
#if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_STACKPROTECTOR_PER_TASK)
#include <linux/stackprotector.h>
unsigned long __stack_chk_guard __read_mostly;
@@ -37,7 +35,7 @@ EXPORT_SYMBOL(__stack_chk_guard);
extern asmlinkage void ret_from_fork(void);
-void arch_cpu_idle(void)
+void noinstr arch_cpu_idle(void)
{
cpu_do_idle();
}
@@ -207,7 +205,6 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
if (unlikely(args->fn)) {
/* Kernel thread */
memset(childregs, 0, sizeof(struct pt_regs));
- childregs->gp = gp_in_global;
/* Supervisor/Machine, irqs on: */
childregs->status = SR_PP | SR_PIE;
diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c
index 501e66debf6972..5a2edd7f027e5d 100644
--- a/arch/riscv/kernel/signal.c
+++ b/arch/riscv/kernel/signal.c
@@ -119,6 +119,13 @@ static long __restore_v_state(struct pt_regs *regs, void __user *sc_vec)
struct __sc_riscv_v_state __user *state = sc_vec;
void __user *datap;
+ /*
+ * Mark the vstate as clean prior performing the actual copy,
+ * to avoid getting the vstate incorrectly clobbered by the
+ * discarded vector state.
+ */
+ riscv_v_vstate_set_restore(current, regs);
+
/* Copy everything of __sc_riscv_v_state except datap. */
err = __copy_from_user(&current->thread.vstate, &state->v_state,
offsetof(struct __riscv_v_ext_state, datap));
@@ -133,13 +140,7 @@ static long __restore_v_state(struct pt_regs *regs, void __user *sc_vec)
* Copy the whole vector content from user space datap. Use
* copy_from_user to prevent information leak.
*/
- err = copy_from_user(current->thread.vstate.datap, datap, riscv_v_vsize);
- if (unlikely(err))
- return err;
-
- riscv_v_vstate_set_restore(current, regs);
-
- return err;
+ return copy_from_user(current->thread.vstate.datap, datap, riscv_v_vsize);
}
#else
#define save_v_state(task, regs) (0)
diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
index 868d6280cf667e..05a16b1f0aee85 100644
--- a/arch/riscv/kernel/traps.c
+++ b/arch/riscv/kernel/traps.c
@@ -122,7 +122,7 @@ void do_trap(struct pt_regs *regs, int signo, int code, unsigned long addr)
print_vma_addr(KERN_CONT " in ", instruction_pointer(regs));
pr_cont("\n");
__show_regs(regs);
- dump_instr(KERN_EMERG, regs);
+ dump_instr(KERN_INFO, regs);
}
force_sig_fault(signo, code, (void __user *)addr);
diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile
index 9b517fe1b8a8ec..272c431ac5b9f8 100644
--- a/arch/riscv/kernel/vdso/Makefile
+++ b/arch/riscv/kernel/vdso/Makefile
@@ -37,6 +37,7 @@ endif
# Disable -pg to prevent insert call site
CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS)
+CFLAGS_REMOVE_hwprobe.o = $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS)
# Disable profiling and instrumentation for VDSO code
GCOV_PROFILE := n
diff --git a/arch/riscv/kvm/aia_aplic.c b/arch/riscv/kvm/aia_aplic.c
index 39e72aa016a4cc..b467ba5ed91000 100644
--- a/arch/riscv/kvm/aia_aplic.c
+++ b/arch/riscv/kvm/aia_aplic.c
@@ -137,11 +137,21 @@ static void aplic_write_pending(struct aplic *aplic, u32 irq, bool pending)
raw_spin_lock_irqsave(&irqd->lock, flags);
sm = irqd->sourcecfg & APLIC_SOURCECFG_SM_MASK;
- if (!pending &&
- ((sm == APLIC_SOURCECFG_SM_LEVEL_HIGH) ||
- (sm == APLIC_SOURCECFG_SM_LEVEL_LOW)))
+ if (sm == APLIC_SOURCECFG_SM_INACTIVE)
goto skip_write_pending;
+ if (sm == APLIC_SOURCECFG_SM_LEVEL_HIGH ||
+ sm == APLIC_SOURCECFG_SM_LEVEL_LOW) {
+ if (!pending)
+ goto skip_write_pending;
+ if ((irqd->state & APLIC_IRQ_STATE_INPUT) &&
+ sm == APLIC_SOURCECFG_SM_LEVEL_LOW)
+ goto skip_write_pending;
+ if (!(irqd->state & APLIC_IRQ_STATE_INPUT) &&
+ sm == APLIC_SOURCECFG_SM_LEVEL_HIGH)
+ goto skip_write_pending;
+ }
+
if (pending)
irqd->state |= APLIC_IRQ_STATE_PENDING;
else
@@ -187,16 +197,31 @@ static void aplic_write_enabled(struct aplic *aplic, u32 irq, bool enabled)
static bool aplic_read_input(struct aplic *aplic, u32 irq)
{
- bool ret;
- unsigned long flags;
+ u32 sourcecfg, sm, raw_input, irq_inverted;
struct aplic_irq *irqd;
+ unsigned long flags;
+ bool ret = false;
if (!irq || aplic->nr_irqs <= irq)
return false;
irqd = &aplic->irqs[irq];
raw_spin_lock_irqsave(&irqd->lock, flags);
- ret = (irqd->state & APLIC_IRQ_STATE_INPUT) ? true : false;
+
+ sourcecfg = irqd->sourcecfg;
+ if (sourcecfg & APLIC_SOURCECFG_D)
+ goto skip;
+
+ sm = sourcecfg & APLIC_SOURCECFG_SM_MASK;
+ if (sm == APLIC_SOURCECFG_SM_INACTIVE)
+ goto skip;
+
+ raw_input = (irqd->state & APLIC_IRQ_STATE_INPUT) ? 1 : 0;
+ irq_inverted = (sm == APLIC_SOURCECFG_SM_LEVEL_LOW ||
+ sm == APLIC_SOURCECFG_SM_EDGE_FALL) ? 1 : 0;
+ ret = !!(raw_input ^ irq_inverted);
+
+skip:
raw_spin_unlock_irqrestore(&irqd->lock, flags);
return ret;
diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c
index f4a6124d25c939..994adc26db4b10 100644
--- a/arch/riscv/kvm/vcpu_onereg.c
+++ b/arch/riscv/kvm/vcpu_onereg.c
@@ -986,7 +986,7 @@ static int copy_isa_ext_reg_indices(const struct kvm_vcpu *vcpu,
static inline unsigned long num_isa_ext_regs(const struct kvm_vcpu *vcpu)
{
- return copy_isa_ext_reg_indices(vcpu, NULL);;
+ return copy_isa_ext_reg_indices(vcpu, NULL);
}
static int copy_sbi_ext_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c
index 893566e004b73f..07d743f87b3f69 100644
--- a/arch/riscv/mm/tlbflush.c
+++ b/arch/riscv/mm/tlbflush.c
@@ -99,7 +99,7 @@ static void __ipi_flush_tlb_range_asid(void *info)
local_flush_tlb_range_asid(d->start, d->size, d->stride, d->asid);
}
-static void __flush_tlb_range(struct cpumask *cmask, unsigned long asid,
+static void __flush_tlb_range(const struct cpumask *cmask, unsigned long asid,
unsigned long start, unsigned long size,
unsigned long stride)
{
@@ -200,7 +200,7 @@ void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
void flush_tlb_kernel_range(unsigned long start, unsigned long end)
{
- __flush_tlb_range((struct cpumask *)cpu_online_mask, FLUSH_TLB_NO_ASID,
+ __flush_tlb_range(cpu_online_mask, FLUSH_TLB_NO_ASID,
start, end - start, PAGE_SIZE);
}
diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h
index 7138d189cc420a..0c4cad7d5a5b11 100644
--- a/arch/s390/include/asm/atomic.h
+++ b/arch/s390/include/asm/atomic.h
@@ -15,31 +15,31 @@
#include <asm/barrier.h>
#include <asm/cmpxchg.h>
-static inline int arch_atomic_read(const atomic_t *v)
+static __always_inline int arch_atomic_read(const atomic_t *v)
{
return __atomic_read(v);
}
#define arch_atomic_read arch_atomic_read
-static inline void arch_atomic_set(atomic_t *v, int i)
+static __always_inline void arch_atomic_set(atomic_t *v, int i)
{
__atomic_set(v, i);
}
#define arch_atomic_set arch_atomic_set
-static inline int arch_atomic_add_return(int i, atomic_t *v)
+static __always_inline int arch_atomic_add_return(int i, atomic_t *v)
{
return __atomic_add_barrier(i, &v->counter) + i;
}
#define arch_atomic_add_return arch_atomic_add_return
-static inline int arch_atomic_fetch_add(int i, atomic_t *v)
+static __always_inline int arch_atomic_fetch_add(int i, atomic_t *v)
{
return __atomic_add_barrier(i, &v->counter);
}
#define arch_atomic_fetch_add arch_atomic_fetch_add
-static inline void arch_atomic_add(int i, atomic_t *v)
+static __always_inline void arch_atomic_add(int i, atomic_t *v)
{
__atomic_add(i, &v->counter);
}
@@ -50,11 +50,11 @@ static inline void arch_atomic_add(int i, atomic_t *v)
#define arch_atomic_fetch_sub(_i, _v) arch_atomic_fetch_add(-(int)(_i), _v)
#define ATOMIC_OPS(op) \
-static inline void arch_atomic_##op(int i, atomic_t *v) \
+static __always_inline void arch_atomic_##op(int i, atomic_t *v) \
{ \
__atomic_##op(i, &v->counter); \
} \
-static inline int arch_atomic_fetch_##op(int i, atomic_t *v) \
+static __always_inline int arch_atomic_fetch_##op(int i, atomic_t *v) \
{ \
return __atomic_##op##_barrier(i, &v->counter); \
}
@@ -74,7 +74,7 @@ ATOMIC_OPS(xor)
#define arch_atomic_xchg(v, new) (arch_xchg(&((v)->counter), new))
-static inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new)
+static __always_inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new)
{
return __atomic_cmpxchg(&v->counter, old, new);
}
@@ -82,31 +82,31 @@ static inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new)
#define ATOMIC64_INIT(i) { (i) }
-static inline s64 arch_atomic64_read(const atomic64_t *v)
+static __always_inline s64 arch_atomic64_read(const atomic64_t *v)
{
return __atomic64_read(v);
}
#define arch_atomic64_read arch_atomic64_read
-static inline void arch_atomic64_set(atomic64_t *v, s64 i)
+static __always_inline void arch_atomic64_set(atomic64_t *v, s64 i)
{
__atomic64_set(v, i);
}
#define arch_atomic64_set arch_atomic64_set
-static inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v)
+static __always_inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v)
{
return __atomic64_add_barrier(i, (long *)&v->counter) + i;
}
#define arch_atomic64_add_return arch_atomic64_add_return
-static inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v)
+static __always_inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v)
{
return __atomic64_add_barrier(i, (long *)&v->counter);
}
#define arch_atomic64_fetch_add arch_atomic64_fetch_add
-static inline void arch_atomic64_add(s64 i, atomic64_t *v)
+static __always_inline void arch_atomic64_add(s64 i, atomic64_t *v)
{
__atomic64_add(i, (long *)&v->counter);
}
@@ -114,20 +114,20 @@ static inline void arch_atomic64_add(s64 i, atomic64_t *v)
#define arch_atomic64_xchg(v, new) (arch_xchg(&((v)->counter), new))
-static inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
+static __always_inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
{
return __atomic64_cmpxchg((long *)&v->counter, old, new);
}
#define arch_atomic64_cmpxchg arch_atomic64_cmpxchg
-#define ATOMIC64_OPS(op) \
-static inline void arch_atomic64_##op(s64 i, atomic64_t *v) \
-{ \
- __atomic64_##op(i, (long *)&v->counter); \
-} \
-static inline long arch_atomic64_fetch_##op(s64 i, atomic64_t *v) \
-{ \
- return __atomic64_##op##_barrier(i, (long *)&v->counter); \
+#define ATOMIC64_OPS(op) \
+static __always_inline void arch_atomic64_##op(s64 i, atomic64_t *v) \
+{ \
+ __atomic64_##op(i, (long *)&v->counter); \
+} \
+static __always_inline long arch_atomic64_fetch_##op(s64 i, atomic64_t *v) \
+{ \
+ return __atomic64_##op##_barrier(i, (long *)&v->counter); \
}
ATOMIC64_OPS(and)
diff --git a/arch/s390/include/asm/atomic_ops.h b/arch/s390/include/asm/atomic_ops.h
index 50510e08b893b5..7fa5f96a553a47 100644
--- a/arch/s390/include/asm/atomic_ops.h
+++ b/arch/s390/include/asm/atomic_ops.h
@@ -8,7 +8,7 @@
#ifndef __ARCH_S390_ATOMIC_OPS__
#define __ARCH_S390_ATOMIC_OPS__
-static inline int __atomic_read(const atomic_t *v)
+static __always_inline int __atomic_read(const atomic_t *v)
{
int c;
@@ -18,14 +18,14 @@ static inline int __atomic_read(const atomic_t *v)
return c;
}
-static inline void __atomic_set(atomic_t *v, int i)
+static __always_inline void __atomic_set(atomic_t *v, int i)
{
asm volatile(
" st %1,%0\n"
: "=R" (v->counter) : "d" (i));
}
-static inline s64 __atomic64_read(const atomic64_t *v)
+static __always_inline s64 __atomic64_read(const atomic64_t *v)
{
s64 c;
@@ -35,7 +35,7 @@ static inline s64 __atomic64_read(const atomic64_t *v)
return c;
}
-static inline void __atomic64_set(atomic64_t *v, s64 i)
+static __always_inline void __atomic64_set(atomic64_t *v, s64 i)
{
asm volatile(
" stg %1,%0\n"
@@ -45,7 +45,7 @@ static inline void __atomic64_set(atomic64_t *v, s64 i)
#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
#define __ATOMIC_OP(op_name, op_type, op_string, op_barrier) \
-static inline op_type op_name(op_type val, op_type *ptr) \
+static __always_inline op_type op_name(op_type val, op_type *ptr) \
{ \
op_type old; \
\
@@ -96,7 +96,7 @@ __ATOMIC_CONST_OPS(__atomic64_add_const, long, "agsi")
#else /* CONFIG_HAVE_MARCH_Z196_FEATURES */
#define __ATOMIC_OP(op_name, op_string) \
-static inline int op_name(int val, int *ptr) \
+static __always_inline int op_name(int val, int *ptr) \
{ \
int old, new; \
\
@@ -122,7 +122,7 @@ __ATOMIC_OPS(__atomic_xor, "xr")
#undef __ATOMIC_OPS
#define __ATOMIC64_OP(op_name, op_string) \
-static inline long op_name(long val, long *ptr) \
+static __always_inline long op_name(long val, long *ptr) \
{ \
long old, new; \
\
@@ -154,7 +154,7 @@ __ATOMIC64_OPS(__atomic64_xor, "xgr")
#endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */
-static inline int __atomic_cmpxchg(int *ptr, int old, int new)
+static __always_inline int __atomic_cmpxchg(int *ptr, int old, int new)
{
asm volatile(
" cs %[old],%[new],%[ptr]"
@@ -164,7 +164,7 @@ static inline int __atomic_cmpxchg(int *ptr, int old, int new)
return old;
}
-static inline bool __atomic_cmpxchg_bool(int *ptr, int old, int new)
+static __always_inline bool __atomic_cmpxchg_bool(int *ptr, int old, int new)
{
int old_expected = old;
@@ -176,7 +176,7 @@ static inline bool __atomic_cmpxchg_bool(int *ptr, int old, int new)
return old == old_expected;
}
-static inline long __atomic64_cmpxchg(long *ptr, long old, long new)
+static __always_inline long __atomic64_cmpxchg(long *ptr, long old, long new)
{
asm volatile(
" csg %[old],%[new],%[ptr]"
@@ -186,7 +186,7 @@ static inline long __atomic64_cmpxchg(long *ptr, long old, long new)
return old;
}
-static inline bool __atomic64_cmpxchg_bool(long *ptr, long old, long new)
+static __always_inline bool __atomic64_cmpxchg_bool(long *ptr, long old, long new)
{
long old_expected = old;
diff --git a/arch/s390/include/asm/preempt.h b/arch/s390/include/asm/preempt.h
index bf15da0fedbca5..0e3da500e98c19 100644
--- a/arch/s390/include/asm/preempt.h
+++ b/arch/s390/include/asm/preempt.h
@@ -12,12 +12,12 @@
#define PREEMPT_NEED_RESCHED 0x80000000
#define PREEMPT_ENABLED (0 + PREEMPT_NEED_RESCHED)
-static inline int preempt_count(void)
+static __always_inline int preempt_count(void)
{
return READ_ONCE(S390_lowcore.preempt_count) & ~PREEMPT_NEED_RESCHED;
}
-static inline void preempt_count_set(int pc)
+static __always_inline void preempt_count_set(int pc)
{
int old, new;
@@ -29,22 +29,22 @@ static inline void preempt_count_set(int pc)
old, new) != old);
}
-static inline void set_preempt_need_resched(void)
+static __always_inline void set_preempt_need_resched(void)
{
__atomic_and(~PREEMPT_NEED_RESCHED, &S390_lowcore.preempt_count);
}
-static inline void clear_preempt_need_resched(void)
+static __always_inline void clear_preempt_need_resched(void)
{
__atomic_or(PREEMPT_NEED_RESCHED, &S390_lowcore.preempt_count);
}
-static inline bool test_preempt_need_resched(void)
+static __always_inline bool test_preempt_need_resched(void)
{
return !(READ_ONCE(S390_lowcore.preempt_count) & PREEMPT_NEED_RESCHED);
}
-static inline void __preempt_count_add(int val)
+static __always_inline void __preempt_count_add(int val)
{
/*
* With some obscure config options and CONFIG_PROFILE_ALL_BRANCHES
@@ -59,17 +59,17 @@ static inline void __preempt_count_add(int val)
__atomic_add(val, &S390_lowcore.preempt_count);
}
-static inline void __preempt_count_sub(int val)
+static __always_inline void __preempt_count_sub(int val)
{
__preempt_count_add(-val);
}
-static inline bool __preempt_count_dec_and_test(void)
+static __always_inline bool __preempt_count_dec_and_test(void)
{
return __atomic_add(-1, &S390_lowcore.preempt_count) == 1;
}
-static inline bool should_resched(int preempt_offset)
+static __always_inline bool should_resched(int preempt_offset)
{
return unlikely(READ_ONCE(S390_lowcore.preempt_count) ==
preempt_offset);
@@ -79,45 +79,45 @@ static inline bool should_resched(int preempt_offset)
#define PREEMPT_ENABLED (0)
-static inline int preempt_count(void)
+static __always_inline int preempt_count(void)
{
return READ_ONCE(S390_lowcore.preempt_count);
}
-static inline void preempt_count_set(int pc)
+static __always_inline void preempt_count_set(int pc)
{
S390_lowcore.preempt_count = pc;
}
-static inline void set_preempt_need_resched(void)
+static __always_inline void set_preempt_need_resched(void)
{
}
-static inline void clear_preempt_need_resched(void)
+static __always_inline void clear_preempt_need_resched(void)
{
}
-static inline bool test_preempt_need_resched(void)
+static __always_inline bool test_preempt_need_resched(void)
{
return false;
}
-static inline void __preempt_count_add(int val)
+static __always_inline void __preempt_count_add(int val)
{
S390_lowcore.preempt_count += val;
}
-static inline void __preempt_count_sub(int val)
+static __always_inline void __preempt_count_sub(int val)
{
S390_lowcore.preempt_count -= val;
}
-static inline bool __preempt_count_dec_and_test(void)
+static __always_inline bool __preempt_count_dec_and_test(void)
{
return !--S390_lowcore.preempt_count && tif_need_resched();
}
-static inline bool should_resched(int preempt_offset)
+static __always_inline bool should_resched(int preempt_offset)
{
return unlikely(preempt_count() == preempt_offset &&
tif_need_resched());
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 787394978bc0f8..6a1e0fbbaa15b3 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -340,7 +340,8 @@ SYM_CODE_START(pgm_check_handler)
mvc __PT_LAST_BREAK(8,%r11),__LC_PGM_LAST_BREAK
stctg %c1,%c1,__PT_CR1(%r11)
#if IS_ENABLED(CONFIG_KVM)
- lg %r12,__LC_GMAP
+ ltg %r12,__LC_GMAP
+ jz 5f
clc __GMAP_ASCE(8,%r12), __PT_CR1(%r11)
jne 5f
BPENTER __SF_SIE_FLAGS(%r10),_TIF_ISOLATE_BP_GUEST
@@ -635,6 +636,7 @@ SYM_DATA_START_LOCAL(daton_psw)
SYM_DATA_END(daton_psw)
.section .rodata, "a"
+ .balign 8
#define SYSCALL(esame,emu) .quad __s390x_ ## esame
SYM_DATA_START(sys_call_table)
#include "asm/syscall_table.h"
diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c
index 823d652e3917f8..4ad472d130a3c0 100644
--- a/arch/s390/kernel/perf_pai_crypto.c
+++ b/arch/s390/kernel/perf_pai_crypto.c
@@ -90,7 +90,6 @@ static void paicrypt_event_destroy(struct perf_event *event)
event->cpu);
struct paicrypt_map *cpump = mp->mapptr;
- cpump->event = NULL;
static_branch_dec(&pai_key);
mutex_lock(&pai_reserve_mutex);
debug_sprintf_event(cfm_dbg, 5, "%s event %#llx cpu %d users %d"
@@ -356,10 +355,15 @@ static int paicrypt_add(struct perf_event *event, int flags)
static void paicrypt_stop(struct perf_event *event, int flags)
{
- if (!event->attr.sample_period) /* Counting */
+ struct paicrypt_mapptr *mp = this_cpu_ptr(paicrypt_root.mapptr);
+ struct paicrypt_map *cpump = mp->mapptr;
+
+ if (!event->attr.sample_period) { /* Counting */
paicrypt_read(event);
- else /* Sampling */
+ } else { /* Sampling */
perf_sched_cb_dec(event->pmu);
+ cpump->event = NULL;
+ }
event->hw.state = PERF_HES_STOPPED;
}
diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c
index 616a25606cd63d..a6da7e0cc7a66d 100644
--- a/arch/s390/kernel/perf_pai_ext.c
+++ b/arch/s390/kernel/perf_pai_ext.c
@@ -122,7 +122,6 @@ static void paiext_event_destroy(struct perf_event *event)
free_page(PAI_SAVE_AREA(event));
mutex_lock(&paiext_reserve_mutex);
- cpump->event = NULL;
if (refcount_dec_and_test(&cpump->refcnt)) /* Last reference gone */
paiext_free(mp);
paiext_root_free();
@@ -362,10 +361,15 @@ static int paiext_add(struct perf_event *event, int flags)
static void paiext_stop(struct perf_event *event, int flags)
{
- if (!event->attr.sample_period) /* Counting */
+ struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
+ struct paiext_map *cpump = mp->mapptr;
+
+ if (!event->attr.sample_period) { /* Counting */
paiext_read(event);
- else /* Sampling */
+ } else { /* Sampling */
perf_sched_cb_dec(event->pmu);
+ cpump->event = NULL;
+ }
event->hw.state = PERF_HES_STOPPED;
}
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index c421dd44ffbe03..0c66b32e0f9f1b 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -75,7 +75,7 @@ static enum fault_type get_fault_type(struct pt_regs *regs)
if (!IS_ENABLED(CONFIG_PGSTE))
return KERNEL_FAULT;
gmap = (struct gmap *)S390_lowcore.gmap;
- if (regs->cr1 == gmap->asce)
+ if (gmap && gmap->asce == regs->cr1)
return GMAP_FAULT;
return KERNEL_FAULT;
}
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 4fff6ed46e902c..4474bf32d0a497 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2633,6 +2633,16 @@ config MITIGATION_RFDS
stored in floating point, vector and integer registers.
See also <file:Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst>
+config MITIGATION_SPECTRE_BHI
+ bool "Mitigate Spectre-BHB (Branch History Injection)"
+ depends on CPU_SUP_INTEL
+ default y
+ help
+ Enable BHI mitigations. BHI attacks are a form of Spectre V2 attacks
+ where the branch history buffer is poisoned to speculatively steer
+ indirect branches.
+ See <file:Documentation/admin-guide/hw-vuln/spectre.rst>
+
endif
config ARCH_HAS_ADD_PAGES
diff --git a/arch/x86/coco/core.c b/arch/x86/coco/core.c
index d07be9d05cd037..b31ef2424d194b 100644
--- a/arch/x86/coco/core.c
+++ b/arch/x86/coco/core.c
@@ -3,19 +3,28 @@
* Confidential Computing Platform Capability checks
*
* Copyright (C) 2021 Advanced Micro Devices, Inc.
+ * Copyright (C) 2024 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*
* Author: Tom Lendacky <thomas.lendacky@amd.com>
*/
#include <linux/export.h>
#include <linux/cc_platform.h>
+#include <linux/string.h>
+#include <linux/random.h>
+#include <asm/archrandom.h>
#include <asm/coco.h>
#include <asm/processor.h>
enum cc_vendor cc_vendor __ro_after_init = CC_VENDOR_NONE;
u64 cc_mask __ro_after_init;
+static struct cc_attr_flags {
+ __u64 host_sev_snp : 1,
+ __resv : 63;
+} cc_flags;
+
static bool noinstr intel_cc_platform_has(enum cc_attr attr)
{
switch (attr) {
@@ -89,6 +98,9 @@ static bool noinstr amd_cc_platform_has(enum cc_attr attr)
case CC_ATTR_GUEST_SEV_SNP:
return sev_status & MSR_AMD64_SEV_SNP_ENABLED;
+ case CC_ATTR_HOST_SEV_SNP:
+ return cc_flags.host_sev_snp;
+
default:
return false;
}
@@ -148,3 +160,84 @@ u64 cc_mkdec(u64 val)
}
}
EXPORT_SYMBOL_GPL(cc_mkdec);
+
+static void amd_cc_platform_clear(enum cc_attr attr)
+{
+ switch (attr) {
+ case CC_ATTR_HOST_SEV_SNP:
+ cc_flags.host_sev_snp = 0;
+ break;
+ default:
+ break;
+ }
+}
+
+void cc_platform_clear(enum cc_attr attr)
+{
+ switch (cc_vendor) {
+ case CC_VENDOR_AMD:
+ amd_cc_platform_clear(attr);
+ break;
+ default:
+ break;
+ }
+}
+
+static void amd_cc_platform_set(enum cc_attr attr)
+{
+ switch (attr) {
+ case CC_ATTR_HOST_SEV_SNP:
+ cc_flags.host_sev_snp = 1;
+ break;
+ default:
+ break;
+ }
+}
+
+void cc_platform_set(enum cc_attr attr)
+{
+ switch (cc_vendor) {
+ case CC_VENDOR_AMD:
+ amd_cc_platform_set(attr);
+ break;
+ default:
+ break;
+ }
+}
+
+__init void cc_random_init(void)
+{
+ /*
+ * The seed is 32 bytes (in units of longs), which is 256 bits, which
+ * is the security level that the RNG is targeting.
+ */
+ unsigned long rng_seed[32 / sizeof(long)];
+ size_t i, longs;
+
+ if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
+ return;
+
+ /*
+ * Since the CoCo threat model includes the host, the only reliable
+ * source of entropy that can be neither observed nor manipulated is
+ * RDRAND. Usually, RDRAND failure is considered tolerable, but since
+ * CoCo guests have no other unobservable source of entropy, it's
+ * important to at least ensure the RNG gets some initial random seeds.
+ */
+ for (i = 0; i < ARRAY_SIZE(rng_seed); i += longs) {
+ longs = arch_get_random_longs(&rng_seed[i], ARRAY_SIZE(rng_seed) - i);
+
+ /*
+ * A zero return value means that the guest doesn't have RDRAND
+ * or the CPU is physically broken, and in both cases that
+ * means most crypto inside of the CoCo instance will be
+ * broken, defeating the purpose of CoCo in the first place. So
+ * just panic here because it's absolutely unsafe to continue
+ * executing.
+ */
+ if (longs == 0)
+ panic("RDRAND is defective.");
+ }
+ add_device_randomness(rng_seed, sizeof(rng_seed));
+ memzero_explicit(rng_seed, sizeof(rng_seed));
+}
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 6356060caaf311..51cc9c7cb9bdc0 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -49,7 +49,7 @@ static __always_inline bool do_syscall_x64(struct pt_regs *regs, int nr)
if (likely(unr < NR_syscalls)) {
unr = array_index_nospec(unr, NR_syscalls);
- regs->ax = sys_call_table[unr](regs);
+ regs->ax = x64_sys_call(regs, unr);
return true;
}
return false;
@@ -66,7 +66,7 @@ static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr)
if (IS_ENABLED(CONFIG_X86_X32_ABI) && likely(xnr < X32_NR_syscalls)) {
xnr = array_index_nospec(xnr, X32_NR_syscalls);
- regs->ax = x32_sys_call_table[xnr](regs);
+ regs->ax = x32_sys_call(regs, xnr);
return true;
}
return false;
@@ -162,7 +162,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs, int nr)
if (likely(unr < IA32_NR_syscalls)) {
unr = array_index_nospec(unr, IA32_NR_syscalls);
- regs->ax = ia32_sys_call_table[unr](regs);
+ regs->ax = ia32_sys_call(regs, unr);
} else if (nr != -1) {
regs->ax = __ia32_sys_ni_syscall(regs);
}
@@ -189,7 +189,7 @@ static __always_inline bool int80_is_external(void)
}
/**
- * int80_emulation - 32-bit legacy syscall entry
+ * do_int80_emulation - 32-bit legacy syscall C entry from asm
*
* This entry point can be used by 32-bit and 64-bit programs to perform
* 32-bit system calls. Instances of INT $0x80 can be found inline in
@@ -207,7 +207,7 @@ static __always_inline bool int80_is_external(void)
* eax: system call number
* ebx, ecx, edx, esi, edi, ebp: arg1 - arg 6
*/
-DEFINE_IDTENTRY_RAW(int80_emulation)
+__visible noinstr void do_int80_emulation(struct pt_regs *regs)
{
int nr;
@@ -255,6 +255,71 @@ DEFINE_IDTENTRY_RAW(int80_emulation)
instrumentation_end();
syscall_exit_to_user_mode(regs);
}
+
+#ifdef CONFIG_X86_FRED
+/*
+ * A FRED-specific INT80 handler is warranted for the follwing reasons:
+ *
+ * 1) As INT instructions and hardware interrupts are separate event
+ * types, FRED does not preclude the use of vector 0x80 for external
+ * interrupts. As a result, the FRED setup code does not reserve
+ * vector 0x80 and calling int80_is_external() is not merely
+ * suboptimal but actively incorrect: it could cause a system call
+ * to be incorrectly ignored.
+ *
+ * 2) It is called only for handling vector 0x80 of event type
+ * EVENT_TYPE_SWINT and will never be called to handle any external
+ * interrupt (event type EVENT_TYPE_EXTINT).
+ *
+ * 3) FRED has separate entry flows depending on if the event came from
+ * user space or kernel space, and because the kernel does not use
+ * INT insns, the FRED kernel entry handler fred_entry_from_kernel()
+ * falls through to fred_bad_type() if the event type is
+ * EVENT_TYPE_SWINT, i.e., INT insns. So if the kernel is handling
+ * an INT insn, it can only be from a user level.
+ *
+ * 4) int80_emulation() does a CLEAR_BRANCH_HISTORY. While FRED will
+ * likely take a different approach if it is ever needed: it
+ * probably belongs in either fred_intx()/ fred_other() or
+ * asm_fred_entrypoint_user(), depending on if this ought to be done
+ * for all entries from userspace or only system
+ * calls.
+ *
+ * 5) INT $0x80 is the fast path for 32-bit system calls under FRED.
+ */
+DEFINE_FREDENTRY_RAW(int80_emulation)
+{
+ int nr;
+
+ enter_from_user_mode(regs);
+
+ instrumentation_begin();
+ add_random_kstack_offset();
+
+ /*
+ * FRED pushed 0 into regs::orig_ax and regs::ax contains the
+ * syscall number.
+ *
+ * User tracing code (ptrace or signal handlers) might assume
+ * that the regs::orig_ax contains a 32-bit number on invoking
+ * a 32-bit syscall.
+ *
+ * Establish the syscall convention by saving the 32bit truncated
+ * syscall number in regs::orig_ax and by invalidating regs::ax.
+ */
+ regs->orig_ax = regs->ax & GENMASK(31, 0);
+ regs->ax = -ENOSYS;
+
+ nr = syscall_32_enter(regs);
+
+ local_irq_enable();
+ nr = syscall_enter_from_user_mode_work(regs, nr);
+ do_syscall_32_irqs_on(regs, nr);
+
+ instrumentation_end();
+ syscall_exit_to_user_mode(regs);
+}
+#endif
#else /* CONFIG_IA32_EMULATION */
/* Handles int $0x80 on a 32bit kernel */
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 8af2a26b24f6a9..1b5be07f86698a 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -116,6 +116,7 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL)
/* clobbers %rax, make sure it is after saving the syscall nr */
IBRS_ENTER
UNTRAIN_RET
+ CLEAR_BRANCH_HISTORY
call do_syscall_64 /* returns with IRQs disabled */
@@ -1491,3 +1492,63 @@ SYM_CODE_START_NOALIGN(rewind_stack_and_make_dead)
call make_task_dead
SYM_CODE_END(rewind_stack_and_make_dead)
.popsection
+
+/*
+ * This sequence executes branches in order to remove user branch information
+ * from the branch history tracker in the Branch Predictor, therefore removing
+ * user influence on subsequent BTB lookups.
+ *
+ * It should be used on parts prior to Alder Lake. Newer parts should use the
+ * BHI_DIS_S hardware control instead. If a pre-Alder Lake part is being
+ * virtualized on newer hardware the VMM should protect against BHI attacks by
+ * setting BHI_DIS_S for the guests.
+ *
+ * CALLs/RETs are necessary to prevent Loop Stream Detector(LSD) from engaging
+ * and not clearing the branch history. The call tree looks like:
+ *
+ * call 1
+ * call 2
+ * call 2
+ * call 2
+ * call 2
+ * call 2
+ * ret
+ * ret
+ * ret
+ * ret
+ * ret
+ * ret
+ *
+ * This means that the stack is non-constant and ORC can't unwind it with %rsp
+ * alone. Therefore we unconditionally set up the frame pointer, which allows
+ * ORC to unwind properly.
+ *
+ * The alignment is for performance and not for safety, and may be safely
+ * refactored in the future if needed.
+ */
+SYM_FUNC_START(clear_bhb_loop)
+ push %rbp
+ mov %rsp, %rbp
+ movl $5, %ecx
+ ANNOTATE_INTRA_FUNCTION_CALL
+ call 1f
+ jmp 5f
+ .align 64, 0xcc
+ ANNOTATE_INTRA_FUNCTION_CALL
+1: call 2f
+ RET
+ .align 64, 0xcc
+2: movl $5, %eax
+3: jmp 4f
+ nop
+4: sub $1, %eax
+ jnz 3b
+ sub $1, %ecx
+ jnz 1b
+ RET
+5: lfence
+ pop %rbp
+ RET
+SYM_FUNC_END(clear_bhb_loop)
+EXPORT_SYMBOL_GPL(clear_bhb_loop)
+STACK_FRAME_NON_STANDARD(clear_bhb_loop)
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index eabf48c4d4b4c3..c779046cc3fe79 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -92,6 +92,7 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL)
IBRS_ENTER
UNTRAIN_RET
+ CLEAR_BRANCH_HISTORY
/*
* SYSENTER doesn't filter flags, so we need to clear NT and AC
@@ -206,6 +207,7 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_after_hwframe, SYM_L_GLOBAL)
IBRS_ENTER
UNTRAIN_RET
+ CLEAR_BRANCH_HISTORY
movq %rsp, %rdi
call do_fast_syscall_32
@@ -276,3 +278,17 @@ SYM_INNER_LABEL(entry_SYSRETL_compat_end, SYM_L_GLOBAL)
ANNOTATE_NOENDBR
int3
SYM_CODE_END(entry_SYSCALL_compat)
+
+/*
+ * int 0x80 is used by 32 bit mode as a system call entry. Normally idt entries
+ * point to C routines, however since this is a system call interface the branch
+ * history needs to be scrubbed to protect against BHI attacks, and that
+ * scrubbing needs to take place in assembly code prior to entering any C
+ * routines.
+ */
+SYM_CODE_START(int80_emulation)
+ ANNOTATE_NOENDBR
+ UNWIND_HINT_FUNC
+ CLEAR_BRANCH_HISTORY
+ jmp do_int80_emulation
+SYM_CODE_END(int80_emulation)
diff --git a/arch/x86/entry/entry_fred.c b/arch/x86/entry/entry_fred.c
index ac120cbdaaf2b4..89c1476fcdd9f9 100644
--- a/arch/x86/entry/entry_fred.c
+++ b/arch/x86/entry/entry_fred.c
@@ -28,9 +28,9 @@ static noinstr void fred_bad_type(struct pt_regs *regs, unsigned long error_code
if (regs->fred_cs.sl > 0) {
pr_emerg("PANIC: invalid or fatal FRED event; event type %u "
"vector %u error 0x%lx aux 0x%lx at %04x:%016lx\n",
- regs->fred_ss.type, regs->fred_ss.vector, regs->orig_ax,
+ regs->fred_ss.type, regs->fred_ss.vector, error_code,
fred_event_data(regs), regs->cs, regs->ip);
- die("invalid or fatal FRED event", regs, regs->orig_ax);
+ die("invalid or fatal FRED event", regs, error_code);
panic("invalid or fatal FRED event");
} else {
unsigned long flags = oops_begin();
@@ -38,10 +38,10 @@ static noinstr void fred_bad_type(struct pt_regs *regs, unsigned long error_code
pr_alert("BUG: invalid or fatal FRED event; event type %u "
"vector %u error 0x%lx aux 0x%lx at %04x:%016lx\n",
- regs->fred_ss.type, regs->fred_ss.vector, regs->orig_ax,
+ regs->fred_ss.type, regs->fred_ss.vector, error_code,
fred_event_data(regs), regs->cs, regs->ip);
- if (__die("Invalid or fatal FRED event", regs, regs->orig_ax))
+ if (__die("Invalid or fatal FRED event", regs, error_code))
sig = 0;
oops_end(flags, regs, sig);
@@ -66,7 +66,7 @@ static noinstr void fred_intx(struct pt_regs *regs)
/* INT80 */
case IA32_SYSCALL_VECTOR:
if (ia32_enabled())
- return int80_emulation(regs);
+ return fred_int80_emulation(regs);
fallthrough;
#endif
diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c
index 8cfc9bc73e7f8b..c2235bae17ef66 100644
--- a/arch/x86/entry/syscall_32.c
+++ b/arch/x86/entry/syscall_32.c
@@ -18,8 +18,25 @@
#include <asm/syscalls_32.h>
#undef __SYSCALL
+/*
+ * The sys_call_table[] is no longer used for system calls, but
+ * kernel/trace/trace_syscalls.c still wants to know the system
+ * call address.
+ */
+#ifdef CONFIG_X86_32
#define __SYSCALL(nr, sym) __ia32_##sym,
-
-__visible const sys_call_ptr_t ia32_sys_call_table[] = {
+const sys_call_ptr_t sys_call_table[] = {
#include <asm/syscalls_32.h>
};
+#undef __SYSCALL
+#endif
+
+#define __SYSCALL(nr, sym) case nr: return __ia32_##sym(regs);
+
+long ia32_sys_call(const struct pt_regs *regs, unsigned int nr)
+{
+ switch (nr) {
+ #include <asm/syscalls_32.h>
+ default: return __ia32_sys_ni_syscall(regs);
+ }
+};
diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c
index be120eec1fc9f9..33b3f09e6f151e 100644
--- a/arch/x86/entry/syscall_64.c
+++ b/arch/x86/entry/syscall_64.c
@@ -11,8 +11,23 @@
#include <asm/syscalls_64.h>
#undef __SYSCALL
+/*
+ * The sys_call_table[] is no longer used for system calls, but
+ * kernel/trace/trace_syscalls.c still wants to know the system
+ * call address.
+ */
#define __SYSCALL(nr, sym) __x64_##sym,
-
-asmlinkage const sys_call_ptr_t sys_call_table[] = {
+const sys_call_ptr_t sys_call_table[] = {
#include <asm/syscalls_64.h>
};
+#undef __SYSCALL
+
+#define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs);
+
+long x64_sys_call(const struct pt_regs *regs, unsigned int nr)
+{
+ switch (nr) {
+ #include <asm/syscalls_64.h>
+ default: return __x64_sys_ni_syscall(regs);
+ }
+};
diff --git a/arch/x86/entry/syscall_x32.c b/arch/x86/entry/syscall_x32.c
index bdd0e03a1265d2..03de4a93213182 100644
--- a/arch/x86/entry/syscall_x32.c
+++ b/arch/x86/entry/syscall_x32.c
@@ -11,8 +11,12 @@
#include <asm/syscalls_x32.h>
#undef __SYSCALL
-#define __SYSCALL(nr, sym) __x64_##sym,
+#define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs);
-asmlinkage const sys_call_ptr_t x32_sys_call_table[] = {
-#include <asm/syscalls_x32.h>
+long x32_sys_call(const struct pt_regs *regs, unsigned int nr)
+{
+ switch (nr) {
+ #include <asm/syscalls_x32.h>
+ default: return __x64_sys_ni_syscall(regs);
+ }
};
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 09050641ce5d3c..5b0dd07b1ef19e 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -1644,6 +1644,7 @@ static void x86_pmu_del(struct perf_event *event, int flags)
while (++i < cpuc->n_events) {
cpuc->event_list[i-1] = cpuc->event_list[i];
cpuc->event_constraint[i-1] = cpuc->event_constraint[i];
+ cpuc->assign[i-1] = cpuc->assign[i];
}
cpuc->event_constraint[i-1] = NULL;
--cpuc->n_events;
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 2641ba620f12a5..e010bfed841705 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1237,11 +1237,11 @@ pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc,
struct pmu *pmu = event->pmu;
/*
- * Make sure we get updated with the first PEBS
- * event. It will trigger also during removal, but
- * that does not hurt:
+ * Make sure we get updated with the first PEBS event.
+ * During removal, ->pebs_data_cfg is still valid for
+ * the last PEBS event. Don't clear it.
*/
- if (cpuc->n_pebs == 1)
+ if ((cpuc->n_pebs == 1) && add)
cpuc->pebs_data_cfg = PEBS_UPDATE_DS_SW;
if (needed_cb != pebs_needs_sched_cb(cpuc)) {
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 78cd5084104e9c..4367aa77cb8d9f 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -1693,6 +1693,7 @@ void x86_perf_get_lbr(struct x86_pmu_lbr *lbr)
lbr->from = x86_pmu.lbr_from;
lbr->to = x86_pmu.lbr_to;
lbr->info = x86_pmu.lbr_info;
+ lbr->has_callstack = x86_pmu_has_lbr_callstack();
}
EXPORT_SYMBOL_GPL(x86_perf_get_lbr);
diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c
index 5fc45543e95502..0569f579338b51 100644
--- a/arch/x86/hyperv/hv_apic.c
+++ b/arch/x86/hyperv/hv_apic.c
@@ -105,7 +105,7 @@ static bool cpu_is_self(int cpu)
* IPI implementation on Hyper-V.
*/
static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector,
- bool exclude_self)
+ bool exclude_self)
{
struct hv_send_ipi_ex *ipi_arg;
unsigned long flags;
@@ -132,8 +132,8 @@ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector,
if (!cpumask_equal(mask, cpu_present_mask) || exclude_self) {
ipi_arg->vp_set.format = HV_GENERIC_SET_SPARSE_4K;
- nr_bank = cpumask_to_vpset_skip(&(ipi_arg->vp_set), mask,
- exclude_self ? cpu_is_self : NULL);
+ nr_bank = cpumask_to_vpset_skip(&ipi_arg->vp_set, mask,
+ exclude_self ? cpu_is_self : NULL);
/*
* 'nr_bank <= 0' means some CPUs in cpumask can't be
@@ -147,7 +147,7 @@ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector,
}
status = hv_do_rep_hypercall(HVCALL_SEND_IPI_EX, 0, nr_bank,
- ipi_arg, NULL);
+ ipi_arg, NULL);
ipi_mask_ex_done:
local_irq_restore(flags);
@@ -155,7 +155,7 @@ ipi_mask_ex_done:
}
static bool __send_ipi_mask(const struct cpumask *mask, int vector,
- bool exclude_self)
+ bool exclude_self)
{
int cur_cpu, vcpu, this_cpu = smp_processor_id();
struct hv_send_ipi ipi_arg;
@@ -181,7 +181,7 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector,
return false;
}
- if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR))
+ if (vector < HV_IPI_LOW_VECTOR || vector > HV_IPI_HIGH_VECTOR)
return false;
/*
@@ -218,7 +218,7 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector,
}
status = hv_do_fast_hypercall16(HVCALL_SEND_IPI, ipi_arg.vector,
- ipi_arg.cpu_mask);
+ ipi_arg.cpu_mask);
return hv_result_success(status);
do_ex_hypercall:
@@ -241,7 +241,7 @@ static bool __send_ipi_one(int cpu, int vector)
return false;
}
- if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR))
+ if (vector < HV_IPI_LOW_VECTOR || vector > HV_IPI_HIGH_VECTOR)
return false;
if (vp >= 64)
diff --git a/arch/x86/hyperv/hv_proc.c b/arch/x86/hyperv/hv_proc.c
index 68a0843d4750f7..3fa1f2ee7b0d06 100644
--- a/arch/x86/hyperv/hv_proc.c
+++ b/arch/x86/hyperv/hv_proc.c
@@ -3,7 +3,6 @@
#include <linux/vmalloc.h>
#include <linux/mm.h>
#include <linux/clockchips.h>
-#include <linux/acpi.h>
#include <linux/hyperv.h>
#include <linux/slab.h>
#include <linux/cpuhotplug.h>
@@ -116,12 +115,11 @@ free_buf:
int hv_call_add_logical_proc(int node, u32 lp_index, u32 apic_id)
{
- struct hv_add_logical_processor_in *input;
- struct hv_add_logical_processor_out *output;
+ struct hv_input_add_logical_processor *input;
+ struct hv_output_add_logical_processor *output;
u64 status;
unsigned long flags;
int ret = HV_STATUS_SUCCESS;
- int pxm = node_to_pxm(node);
/*
* When adding a logical processor, the hypervisor may return
@@ -137,11 +135,7 @@ int hv_call_add_logical_proc(int node, u32 lp_index, u32 apic_id)
input->lp_index = lp_index;
input->apic_id = apic_id;
- input->flags = 0;
- input->proximity_domain_info.domain_id = pxm;
- input->proximity_domain_info.flags.reserved = 0;
- input->proximity_domain_info.flags.proximity_info_valid = 1;
- input->proximity_domain_info.flags.proximity_preferred = 1;
+ input->proximity_domain_info = hv_numa_node_to_pxm_info(node);
status = hv_do_hypercall(HVCALL_ADD_LOGICAL_PROCESSOR,
input, output);
local_irq_restore(flags);
@@ -166,7 +160,6 @@ int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags)
u64 status;
unsigned long irq_flags;
int ret = HV_STATUS_SUCCESS;
- int pxm = node_to_pxm(node);
/* Root VPs don't seem to need pages deposited */
if (partition_id != hv_current_partition_id) {
@@ -185,14 +178,7 @@ int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags)
input->vp_index = vp_index;
input->flags = flags;
input->subnode_type = HvSubnodeAny;
- if (node != NUMA_NO_NODE) {
- input->proximity_domain_info.domain_id = pxm;
- input->proximity_domain_info.flags.reserved = 0;
- input->proximity_domain_info.flags.proximity_info_valid = 1;
- input->proximity_domain_info.flags.proximity_preferred = 1;
- } else {
- input->proximity_domain_info.as_uint64 = 0;
- }
+ input->proximity_domain_info = hv_numa_node_to_pxm_info(node);
status = hv_do_hypercall(HVCALL_CREATE_VP, input, NULL);
local_irq_restore(irq_flags);
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index fcd20c6dc7f90c..67b68d0d17d1ec 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -117,7 +117,7 @@ extern void callthunks_patch_builtin_calls(void);
extern void callthunks_patch_module_calls(struct callthunk_sites *sites,
struct module *mod);
extern void *callthunks_translate_call_dest(void *dest);
-extern int x86_call_depth_emit_accounting(u8 **pprog, void *func);
+extern int x86_call_depth_emit_accounting(u8 **pprog, void *func, void *ip);
#else
static __always_inline void callthunks_patch_builtin_calls(void) {}
static __always_inline void
@@ -128,7 +128,7 @@ static __always_inline void *callthunks_translate_call_dest(void *dest)
return dest;
}
static __always_inline int x86_call_depth_emit_accounting(u8 **pprog,
- void *func)
+ void *func, void *ip)
{
return 0;
}
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 94ce0f7c9d3a26..e6ab0cf15ed573 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -13,6 +13,7 @@
#include <asm/mpspec.h>
#include <asm/msr.h>
#include <asm/hardirq.h>
+#include <asm/io.h>
#define ARCH_APICTIMER_STOPS_ON_C3 1
@@ -98,7 +99,7 @@ static inline void native_apic_mem_write(u32 reg, u32 v)
static inline u32 native_apic_mem_read(u32 reg)
{
- return *((volatile u32 *)(APIC_BASE + reg));
+ return readl((void __iomem *)(APIC_BASE + reg));
}
static inline void native_apic_mem_eoi(void)
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index fe1e7e3cc844a8..63bdc6b8521971 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -79,6 +79,9 @@ do { \
#define __smp_mb__before_atomic() do { } while (0)
#define __smp_mb__after_atomic() do { } while (0)
+/* Writing to CR3 provides a full memory barrier in switch_mm(). */
+#define smp_mb__after_switch_mm() do { } while (0)
+
#include <asm-generic/barrier.h>
#endif /* _ASM_X86_BARRIER_H */
diff --git a/arch/x86/include/asm/coco.h b/arch/x86/include/asm/coco.h
index fb7388bbc212f9..c086699b0d0c59 100644
--- a/arch/x86/include/asm/coco.h
+++ b/arch/x86/include/asm/coco.h
@@ -22,6 +22,7 @@ static inline void cc_set_mask(u64 mask)
u64 cc_mkenc(u64 val);
u64 cc_mkdec(u64 val);
+void cc_random_init(void);
#else
#define cc_vendor (CC_VENDOR_NONE)
@@ -34,6 +35,7 @@ static inline u64 cc_mkdec(u64 val)
{
return val;
}
+static inline void cc_random_init(void) { }
#endif
#endif /* _ASM_X86_COCO_H */
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 42157ddcc09d43..686e92d2663eee 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -33,6 +33,8 @@ enum cpuid_leafs
CPUID_7_EDX,
CPUID_8000_001F_EAX,
CPUID_8000_0021_EAX,
+ CPUID_LNX_5,
+ NR_CPUID_WORDS,
};
#define X86_CAP_FMT_NUM "%d:%d"
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index a38f8f9ba65729..3c7434329661c6 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -461,11 +461,15 @@
/*
* Extended auxiliary flags: Linux defined - for features scattered in various
- * CPUID levels like 0x80000022, etc.
+ * CPUID levels like 0x80000022, etc and Linux defined features.
*
* Reuse free bits when adding new feature flags!
*/
#define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* AMD LBR and PMC Freeze */
+#define X86_FEATURE_CLEAR_BHB_LOOP (21*32+ 1) /* "" Clear branch history at syscall entry using SW loop */
+#define X86_FEATURE_BHI_CTRL (21*32+ 2) /* "" BHI_DIS_S HW control available */
+#define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* "" BHI_DIS_S HW control enabled */
+#define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* "" Clear branch history at vmexit using SW loop */
/*
* BUG word(s)
@@ -515,4 +519,5 @@
#define X86_BUG_SRSO X86_BUG(1*32 + 0) /* AMD SRSO bug */
#define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* AMD DIV0 speculation bug */
#define X86_BUG_RFDS X86_BUG(1*32 + 2) /* CPU is vulnerable to Register File Data Sampling */
+#define X86_BUG_BHI X86_BUG(1*32 + 3) /* CPU is affected by Branch History Injection */
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 16e07a2eee195d..6efd1497b02636 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -855,6 +855,7 @@ struct kvm_vcpu_arch {
int cpuid_nent;
struct kvm_cpuid_entry2 *cpuid_entries;
struct kvm_hypervisor_cpuid kvm_cpuid;
+ bool is_amd_compatible;
/*
* FIXME: Drop this macro and use KVM_NR_GOVERNED_FEATURES directly
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 05956bd8bacf50..e72c2b87295799 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -61,10 +61,13 @@
#define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */
#define SPEC_CTRL_RRSBA_DIS_S_SHIFT 6 /* Disable RRSBA behavior */
#define SPEC_CTRL_RRSBA_DIS_S BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT)
+#define SPEC_CTRL_BHI_DIS_S_SHIFT 10 /* Disable Branch History Injection behavior */
+#define SPEC_CTRL_BHI_DIS_S BIT(SPEC_CTRL_BHI_DIS_S_SHIFT)
/* A mask for bits which the kernel toggles when controlling mitigations */
#define SPEC_CTRL_MITIGATIONS_MASK (SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD \
- | SPEC_CTRL_RRSBA_DIS_S)
+ | SPEC_CTRL_RRSBA_DIS_S \
+ | SPEC_CTRL_BHI_DIS_S)
#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */
#define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */
@@ -163,6 +166,10 @@
* are restricted to targets in
* kernel.
*/
+#define ARCH_CAP_BHI_NO BIT(20) /*
+ * CPU is not affected by Branch
+ * History Injection.
+ */
#define ARCH_CAP_PBRSB_NO BIT(24) /*
* Not susceptible to Post-Barrier
* Return Stack Buffer Predictions.
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 170c89ed22fcd3..ff5f1ecc7d1e65 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -326,6 +326,19 @@
ALTERNATIVE "", __stringify(verw _ASM_RIP(mds_verw_sel)), X86_FEATURE_CLEAR_CPU_BUF
.endm
+#ifdef CONFIG_X86_64
+.macro CLEAR_BRANCH_HISTORY
+ ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP
+.endm
+
+.macro CLEAR_BRANCH_HISTORY_VMEXIT
+ ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT
+.endm
+#else
+#define CLEAR_BRANCH_HISTORY
+#define CLEAR_BRANCH_HISTORY_VMEXIT
+#endif
+
#else /* __ASSEMBLY__ */
#define ANNOTATE_RETPOLINE_SAFE \
@@ -368,6 +381,10 @@ extern void srso_alias_return_thunk(void);
extern void entry_untrain_ret(void);
extern void entry_ibpb(void);
+#ifdef CONFIG_X86_64
+extern void clear_bhb_loop(void);
+#endif
+
extern void (*x86_return_thunk)(void);
extern void __warn_thunk(void);
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 3736b8a46c04de..7f1e17250546bd 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -555,6 +555,7 @@ struct x86_pmu_lbr {
unsigned int from;
unsigned int to;
unsigned int info;
+ bool has_callstack;
};
extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap);
diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
index 07e125f3252839..7f57382afee417 100644
--- a/arch/x86/include/asm/sev.h
+++ b/arch/x86/include/asm/sev.h
@@ -228,7 +228,6 @@ int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct sn
void snp_accept_memory(phys_addr_t start, phys_addr_t end);
u64 snp_get_unsupported_features(u64 status);
u64 sev_get_status(void);
-void kdump_sev_callback(void);
void sev_show_status(void);
#else
static inline void sev_es_ist_enter(struct pt_regs *regs) { }
@@ -258,7 +257,6 @@ static inline int snp_issue_guest_request(u64 exit_code, struct snp_req_data *in
static inline void snp_accept_memory(phys_addr_t start, phys_addr_t end) { }
static inline u64 snp_get_unsupported_features(u64 status) { return 0; }
static inline u64 sev_get_status(void) { return 0; }
-static inline void kdump_sev_callback(void) { }
static inline void sev_show_status(void) { }
#endif
@@ -270,6 +268,7 @@ int psmash(u64 pfn);
int rmp_make_private(u64 pfn, u64 gpa, enum pg_level level, u32 asid, bool immutable);
int rmp_make_shared(u64 pfn, enum pg_level level);
void snp_leak_pages(u64 pfn, unsigned int npages);
+void kdump_sev_callback(void);
#else
static inline bool snp_probe_rmptable_info(void) { return false; }
static inline int snp_lookup_rmpentry(u64 pfn, bool *assigned, int *level) { return -ENODEV; }
@@ -282,6 +281,7 @@ static inline int rmp_make_private(u64 pfn, u64 gpa, enum pg_level level, u32 as
}
static inline int rmp_make_shared(u64 pfn, enum pg_level level) { return -ENODEV; }
static inline void snp_leak_pages(u64 pfn, unsigned int npages) {}
+static inline void kdump_sev_callback(void) { }
#endif
#endif
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index f44e2f9ab65d77..2fc7bc3863ff6f 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -16,19 +16,17 @@
#include <asm/thread_info.h> /* for TS_COMPAT */
#include <asm/unistd.h>
+/* This is used purely for kernel/trace/trace_syscalls.c */
typedef long (*sys_call_ptr_t)(const struct pt_regs *);
extern const sys_call_ptr_t sys_call_table[];
-#if defined(CONFIG_X86_32)
-#define ia32_sys_call_table sys_call_table
-#else
/*
* These may not exist, but still put the prototypes in so we
* can use IS_ENABLED().
*/
-extern const sys_call_ptr_t ia32_sys_call_table[];
-extern const sys_call_ptr_t x32_sys_call_table[];
-#endif
+extern long ia32_sys_call(const struct pt_regs *, unsigned int nr);
+extern long x32_sys_call(const struct pt_regs *, unsigned int nr);
+extern long x64_sys_call(const struct pt_regs *, unsigned int nr);
/*
* Only the low 32 bits of orig_ax are meaningful, so we return int.
@@ -127,6 +125,7 @@ static inline int syscall_get_arch(struct task_struct *task)
}
bool do_syscall_64(struct pt_regs *regs, int nr);
+void do_int80_emulation(struct pt_regs *regs);
#endif /* CONFIG_X86_32 */
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index ad29984d5e398d..ef11aa4cab4253 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -694,6 +694,7 @@ enum sev_cmd_id {
struct kvm_sev_cmd {
__u32 id;
+ __u32 pad0;
__u64 data;
__u32 error;
__u32 sev_fd;
@@ -704,28 +705,35 @@ struct kvm_sev_launch_start {
__u32 policy;
__u64 dh_uaddr;
__u32 dh_len;
+ __u32 pad0;
__u64 session_uaddr;
__u32 session_len;
+ __u32 pad1;
};
struct kvm_sev_launch_update_data {
__u64 uaddr;
__u32 len;
+ __u32 pad0;
};
struct kvm_sev_launch_secret {
__u64 hdr_uaddr;
__u32 hdr_len;
+ __u32 pad0;
__u64 guest_uaddr;
__u32 guest_len;
+ __u32 pad1;
__u64 trans_uaddr;
__u32 trans_len;
+ __u32 pad2;
};
struct kvm_sev_launch_measure {
__u64 uaddr;
__u32 len;
+ __u32 pad0;
};
struct kvm_sev_guest_status {
@@ -738,33 +746,43 @@ struct kvm_sev_dbg {
__u64 src_uaddr;
__u64 dst_uaddr;
__u32 len;
+ __u32 pad0;
};
struct kvm_sev_attestation_report {
__u8 mnonce[16];
__u64 uaddr;
__u32 len;
+ __u32 pad0;
};
struct kvm_sev_send_start {
__u32 policy;
+ __u32 pad0;
__u64 pdh_cert_uaddr;
__u32 pdh_cert_len;
+ __u32 pad1;
__u64 plat_certs_uaddr;
__u32 plat_certs_len;
+ __u32 pad2;
__u64 amd_certs_uaddr;
__u32 amd_certs_len;
+ __u32 pad3;
__u64 session_uaddr;
__u32 session_len;
+ __u32 pad4;
};
struct kvm_sev_send_update_data {
__u64 hdr_uaddr;
__u32 hdr_len;
+ __u32 pad0;
__u64 guest_uaddr;
__u32 guest_len;
+ __u32 pad1;
__u64 trans_uaddr;
__u32 trans_len;
+ __u32 pad2;
};
struct kvm_sev_receive_start {
@@ -772,17 +790,22 @@ struct kvm_sev_receive_start {
__u32 policy;
__u64 pdh_uaddr;
__u32 pdh_len;
+ __u32 pad0;
__u64 session_uaddr;
__u32 session_len;
+ __u32 pad1;
};
struct kvm_sev_receive_update_data {
__u64 hdr_uaddr;
__u32 hdr_len;
+ __u32 pad0;
__u64 guest_uaddr;
__u32 guest_len;
+ __u32 pad1;
__u64 trans_uaddr;
__u32 trans_len;
+ __u32 pad2;
};
#define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0)
diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
index 6bc3456a8ebf1d..a1efa7907a0b10 100644
--- a/arch/x86/include/uapi/asm/kvm_para.h
+++ b/arch/x86/include/uapi/asm/kvm_para.h
@@ -142,7 +142,6 @@ struct kvm_vcpu_pv_apf_data {
__u32 token;
__u8 pad[56];
- __u32 enabled;
};
#define KVM_PV_EOI_BIT 0
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index a42d8a6f714958..c342c4aa9c6848 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1687,11 +1687,11 @@ static int x2apic_state;
static bool x2apic_hw_locked(void)
{
- u64 ia32_cap;
+ u64 x86_arch_cap_msr;
u64 msr;
- ia32_cap = x86_read_arch_cap_msr();
- if (ia32_cap & ARCH_CAP_XAPIC_DISABLE) {
+ x86_arch_cap_msr = x86_read_arch_cap_msr();
+ if (x86_arch_cap_msr & ARCH_CAP_XAPIC_DISABLE) {
rdmsrl(MSR_IA32_XAPIC_DISABLE_STATUS, msr);
return (msr & LEGACY_XAPIC_DISABLED);
}
diff --git a/arch/x86/kernel/callthunks.c b/arch/x86/kernel/callthunks.c
index 30335182b6b0ae..e92ff0c11db814 100644
--- a/arch/x86/kernel/callthunks.c
+++ b/arch/x86/kernel/callthunks.c
@@ -314,7 +314,7 @@ static bool is_callthunk(void *addr)
return !bcmp(pad, insn_buff, tmpl_size);
}
-int x86_call_depth_emit_accounting(u8 **pprog, void *func)
+int x86_call_depth_emit_accounting(u8 **pprog, void *func, void *ip)
{
unsigned int tmpl_size = SKL_TMPL_SIZE;
u8 insn_buff[MAX_PATCH_LEN];
@@ -327,7 +327,7 @@ int x86_call_depth_emit_accounting(u8 **pprog, void *func)
return 0;
memcpy(insn_buff, skl_call_thunk_template, tmpl_size);
- apply_relocation(insn_buff, tmpl_size, *pprog,
+ apply_relocation(insn_buff, tmpl_size, ip,
skl_call_thunk_template, tmpl_size);
memcpy(*pprog, insn_buff, tmpl_size);
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 6d8677e80ddbb1..cb9eece55904d0 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -345,6 +345,28 @@ static void srat_detect_node(struct cpuinfo_x86 *c)
#endif
}
+static void bsp_determine_snp(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_ARCH_HAS_CC_PLATFORM
+ cc_vendor = CC_VENDOR_AMD;
+
+ if (cpu_has(c, X86_FEATURE_SEV_SNP)) {
+ /*
+ * RMP table entry format is not architectural and is defined by the
+ * per-processor PPR. Restrict SNP support on the known CPU models
+ * for which the RMP table entry format is currently defined for.
+ */
+ if (!cpu_has(c, X86_FEATURE_HYPERVISOR) &&
+ c->x86 >= 0x19 && snp_probe_rmptable_info()) {
+ cc_platform_set(CC_ATTR_HOST_SEV_SNP);
+ } else {
+ setup_clear_cpu_cap(X86_FEATURE_SEV_SNP);
+ cc_platform_clear(CC_ATTR_HOST_SEV_SNP);
+ }
+ }
+#endif
+}
+
static void bsp_init_amd(struct cpuinfo_x86 *c)
{
if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) {
@@ -452,21 +474,7 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
break;
}
- if (cpu_has(c, X86_FEATURE_SEV_SNP)) {
- /*
- * RMP table entry format is not architectural and it can vary by processor
- * and is defined by the per-processor PPR. Restrict SNP support on the
- * known CPU model and family for which the RMP table entry format is
- * currently defined for.
- */
- if (!boot_cpu_has(X86_FEATURE_ZEN3) &&
- !boot_cpu_has(X86_FEATURE_ZEN4) &&
- !boot_cpu_has(X86_FEATURE_ZEN5))
- setup_clear_cpu_cap(X86_FEATURE_SEV_SNP);
- else if (!snp_probe_rmptable_info())
- setup_clear_cpu_cap(X86_FEATURE_SEV_SNP);
- }
-
+ bsp_determine_snp(c);
return;
warn:
@@ -527,7 +535,6 @@ clear_sev:
static void early_init_amd(struct cpuinfo_x86 *c)
{
- u64 value;
u32 dummy;
if (c->x86 >= 0xf)
@@ -595,20 +602,6 @@ static void early_init_amd(struct cpuinfo_x86 *c)
early_detect_mem_encrypt(c);
- /* Re-enable TopologyExtensions if switched off by BIOS */
- if (c->x86 == 0x15 &&
- (c->x86_model >= 0x10 && c->x86_model <= 0x6f) &&
- !cpu_has(c, X86_FEATURE_TOPOEXT)) {
-
- if (msr_set_bit(0xc0011005, 54) > 0) {
- rdmsrl(0xc0011005, value);
- if (value & BIT_64(54)) {
- set_cpu_cap(c, X86_FEATURE_TOPOEXT);
- pr_info_once(FW_INFO "CPU: Re-enabling disabled Topology Extensions Support.\n");
- }
- }
- }
-
if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && !cpu_has(c, X86_FEATURE_IBPB_BRTYPE)) {
if (c->x86 == 0x17 && boot_cpu_has(X86_FEATURE_AMD_IBPB))
setup_force_cpu_cap(X86_FEATURE_IBPB_BRTYPE);
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index e7ba936d798b81..ab18185894dfd5 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -61,6 +61,8 @@ EXPORT_PER_CPU_SYMBOL_GPL(x86_spec_ctrl_current);
u64 x86_pred_cmd __ro_after_init = PRED_CMD_IBPB;
EXPORT_SYMBOL_GPL(x86_pred_cmd);
+static u64 __ro_after_init x86_arch_cap_msr;
+
static DEFINE_MUTEX(spec_ctrl_mutex);
void (*x86_return_thunk)(void) __ro_after_init = __x86_return_thunk;
@@ -144,6 +146,8 @@ void __init cpu_select_mitigations(void)
x86_spec_ctrl_base &= ~SPEC_CTRL_MITIGATIONS_MASK;
}
+ x86_arch_cap_msr = x86_read_arch_cap_msr();
+
/* Select the proper CPU mitigations before patching alternatives: */
spectre_v1_select_mitigation();
spectre_v2_select_mitigation();
@@ -301,8 +305,6 @@ static const char * const taa_strings[] = {
static void __init taa_select_mitigation(void)
{
- u64 ia32_cap;
-
if (!boot_cpu_has_bug(X86_BUG_TAA)) {
taa_mitigation = TAA_MITIGATION_OFF;
return;
@@ -341,9 +343,8 @@ static void __init taa_select_mitigation(void)
* On MDS_NO=1 CPUs if ARCH_CAP_TSX_CTRL_MSR is not set, microcode
* update is required.
*/
- ia32_cap = x86_read_arch_cap_msr();
- if ( (ia32_cap & ARCH_CAP_MDS_NO) &&
- !(ia32_cap & ARCH_CAP_TSX_CTRL_MSR))
+ if ( (x86_arch_cap_msr & ARCH_CAP_MDS_NO) &&
+ !(x86_arch_cap_msr & ARCH_CAP_TSX_CTRL_MSR))
taa_mitigation = TAA_MITIGATION_UCODE_NEEDED;
/*
@@ -401,8 +402,6 @@ static const char * const mmio_strings[] = {
static void __init mmio_select_mitigation(void)
{
- u64 ia32_cap;
-
if (!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA) ||
boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN) ||
cpu_mitigations_off()) {
@@ -413,8 +412,6 @@ static void __init mmio_select_mitigation(void)
if (mmio_mitigation == MMIO_MITIGATION_OFF)
return;
- ia32_cap = x86_read_arch_cap_msr();
-
/*
* Enable CPU buffer clear mitigation for host and VMM, if also affected
* by MDS or TAA. Otherwise, enable mitigation for VMM only.
@@ -437,7 +434,7 @@ static void __init mmio_select_mitigation(void)
* be propagated to uncore buffers, clearing the Fill buffers on idle
* is required irrespective of SMT state.
*/
- if (!(ia32_cap & ARCH_CAP_FBSDP_NO))
+ if (!(x86_arch_cap_msr & ARCH_CAP_FBSDP_NO))
static_branch_enable(&mds_idle_clear);
/*
@@ -447,10 +444,10 @@ static void __init mmio_select_mitigation(void)
* FB_CLEAR or by the presence of both MD_CLEAR and L1D_FLUSH on MDS
* affected systems.
*/
- if ((ia32_cap & ARCH_CAP_FB_CLEAR) ||
+ if ((x86_arch_cap_msr & ARCH_CAP_FB_CLEAR) ||
(boot_cpu_has(X86_FEATURE_MD_CLEAR) &&
boot_cpu_has(X86_FEATURE_FLUSH_L1D) &&
- !(ia32_cap & ARCH_CAP_MDS_NO)))
+ !(x86_arch_cap_msr & ARCH_CAP_MDS_NO)))
mmio_mitigation = MMIO_MITIGATION_VERW;
else
mmio_mitigation = MMIO_MITIGATION_UCODE_NEEDED;
@@ -508,7 +505,7 @@ static void __init rfds_select_mitigation(void)
if (rfds_mitigation == RFDS_MITIGATION_OFF)
return;
- if (x86_read_arch_cap_msr() & ARCH_CAP_RFDS_CLEAR)
+ if (x86_arch_cap_msr & ARCH_CAP_RFDS_CLEAR)
setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF);
else
rfds_mitigation = RFDS_MITIGATION_UCODE_NEEDED;
@@ -659,8 +656,6 @@ void update_srbds_msr(void)
static void __init srbds_select_mitigation(void)
{
- u64 ia32_cap;
-
if (!boot_cpu_has_bug(X86_BUG_SRBDS))
return;
@@ -669,8 +664,7 @@ static void __init srbds_select_mitigation(void)
* are only exposed to SRBDS when TSX is enabled or when CPU is affected
* by Processor MMIO Stale Data vulnerability.
*/
- ia32_cap = x86_read_arch_cap_msr();
- if ((ia32_cap & ARCH_CAP_MDS_NO) && !boot_cpu_has(X86_FEATURE_RTM) &&
+ if ((x86_arch_cap_msr & ARCH_CAP_MDS_NO) && !boot_cpu_has(X86_FEATURE_RTM) &&
!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA))
srbds_mitigation = SRBDS_MITIGATION_TSX_OFF;
else if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
@@ -813,7 +807,7 @@ static void __init gds_select_mitigation(void)
/* Will verify below that mitigation _can_ be disabled */
/* No microcode */
- if (!(x86_read_arch_cap_msr() & ARCH_CAP_GDS_CTRL)) {
+ if (!(x86_arch_cap_msr & ARCH_CAP_GDS_CTRL)) {
if (gds_mitigation == GDS_MITIGATION_FORCE) {
/*
* This only needs to be done on the boot CPU so do it
@@ -1544,20 +1538,25 @@ static enum spectre_v2_mitigation __init spectre_v2_select_retpoline(void)
return SPECTRE_V2_RETPOLINE;
}
+static bool __ro_after_init rrsba_disabled;
+
/* Disable in-kernel use of non-RSB RET predictors */
static void __init spec_ctrl_disable_kernel_rrsba(void)
{
- u64 ia32_cap;
+ if (rrsba_disabled)
+ return;
- if (!boot_cpu_has(X86_FEATURE_RRSBA_CTRL))
+ if (!(x86_arch_cap_msr & ARCH_CAP_RRSBA)) {
+ rrsba_disabled = true;
return;
+ }
- ia32_cap = x86_read_arch_cap_msr();
+ if (!boot_cpu_has(X86_FEATURE_RRSBA_CTRL))
+ return;
- if (ia32_cap & ARCH_CAP_RRSBA) {
- x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S;
- update_spec_ctrl(x86_spec_ctrl_base);
- }
+ x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S;
+ update_spec_ctrl(x86_spec_ctrl_base);
+ rrsba_disabled = true;
}
static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_mitigation mode)
@@ -1607,6 +1606,74 @@ static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_
dump_stack();
}
+/*
+ * Set BHI_DIS_S to prevent indirect branches in kernel to be influenced by
+ * branch history in userspace. Not needed if BHI_NO is set.
+ */
+static bool __init spec_ctrl_bhi_dis(void)
+{
+ if (!boot_cpu_has(X86_FEATURE_BHI_CTRL))
+ return false;
+
+ x86_spec_ctrl_base |= SPEC_CTRL_BHI_DIS_S;
+ update_spec_ctrl(x86_spec_ctrl_base);
+ setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_HW);
+
+ return true;
+}
+
+enum bhi_mitigations {
+ BHI_MITIGATION_OFF,
+ BHI_MITIGATION_ON,
+};
+
+static enum bhi_mitigations bhi_mitigation __ro_after_init =
+ IS_ENABLED(CONFIG_MITIGATION_SPECTRE_BHI) ? BHI_MITIGATION_ON : BHI_MITIGATION_OFF;
+
+static int __init spectre_bhi_parse_cmdline(char *str)
+{
+ if (!str)
+ return -EINVAL;
+
+ if (!strcmp(str, "off"))
+ bhi_mitigation = BHI_MITIGATION_OFF;
+ else if (!strcmp(str, "on"))
+ bhi_mitigation = BHI_MITIGATION_ON;
+ else
+ pr_err("Ignoring unknown spectre_bhi option (%s)", str);
+
+ return 0;
+}
+early_param("spectre_bhi", spectre_bhi_parse_cmdline);
+
+static void __init bhi_select_mitigation(void)
+{
+ if (bhi_mitigation == BHI_MITIGATION_OFF)
+ return;
+
+ /* Retpoline mitigates against BHI unless the CPU has RRSBA behavior */
+ if (boot_cpu_has(X86_FEATURE_RETPOLINE) &&
+ !boot_cpu_has(X86_FEATURE_RETPOLINE_LFENCE)) {
+ spec_ctrl_disable_kernel_rrsba();
+ if (rrsba_disabled)
+ return;
+ }
+
+ if (spec_ctrl_bhi_dis())
+ return;
+
+ if (!IS_ENABLED(CONFIG_X86_64))
+ return;
+
+ /* Mitigate KVM by default */
+ setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT);
+ pr_info("Spectre BHI mitigation: SW BHB clearing on vm exit\n");
+
+ /* Mitigate syscalls when the mitigation is forced =on */
+ setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP);
+ pr_info("Spectre BHI mitigation: SW BHB clearing on syscall\n");
+}
+
static void __init spectre_v2_select_mitigation(void)
{
enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
@@ -1718,6 +1785,9 @@ static void __init spectre_v2_select_mitigation(void)
mode == SPECTRE_V2_RETPOLINE)
spec_ctrl_disable_kernel_rrsba();
+ if (boot_cpu_has(X86_BUG_BHI))
+ bhi_select_mitigation();
+
spectre_v2_enabled = mode;
pr_info("%s\n", spectre_v2_strings[mode]);
@@ -1832,8 +1902,6 @@ static void update_indir_branch_cond(void)
/* Update the static key controlling the MDS CPU buffer clear in idle */
static void update_mds_branch_idle(void)
{
- u64 ia32_cap = x86_read_arch_cap_msr();
-
/*
* Enable the idle clearing if SMT is active on CPUs which are
* affected only by MSBDS and not any other MDS variant.
@@ -1848,7 +1916,7 @@ static void update_mds_branch_idle(void)
if (sched_smt_active()) {
static_branch_enable(&mds_idle_clear);
} else if (mmio_mitigation == MMIO_MITIGATION_OFF ||
- (ia32_cap & ARCH_CAP_FBSDP_NO)) {
+ (x86_arch_cap_msr & ARCH_CAP_FBSDP_NO)) {
static_branch_disable(&mds_idle_clear);
}
}
@@ -2695,15 +2763,15 @@ static char *stibp_state(void)
switch (spectre_v2_user_stibp) {
case SPECTRE_V2_USER_NONE:
- return ", STIBP: disabled";
+ return "; STIBP: disabled";
case SPECTRE_V2_USER_STRICT:
- return ", STIBP: forced";
+ return "; STIBP: forced";
case SPECTRE_V2_USER_STRICT_PREFERRED:
- return ", STIBP: always-on";
+ return "; STIBP: always-on";
case SPECTRE_V2_USER_PRCTL:
case SPECTRE_V2_USER_SECCOMP:
if (static_key_enabled(&switch_to_cond_stibp))
- return ", STIBP: conditional";
+ return "; STIBP: conditional";
}
return "";
}
@@ -2712,10 +2780,10 @@ static char *ibpb_state(void)
{
if (boot_cpu_has(X86_FEATURE_IBPB)) {
if (static_key_enabled(&switch_mm_always_ibpb))
- return ", IBPB: always-on";
+ return "; IBPB: always-on";
if (static_key_enabled(&switch_mm_cond_ibpb))
- return ", IBPB: conditional";
- return ", IBPB: disabled";
+ return "; IBPB: conditional";
+ return "; IBPB: disabled";
}
return "";
}
@@ -2725,14 +2793,32 @@ static char *pbrsb_eibrs_state(void)
if (boot_cpu_has_bug(X86_BUG_EIBRS_PBRSB)) {
if (boot_cpu_has(X86_FEATURE_RSB_VMEXIT_LITE) ||
boot_cpu_has(X86_FEATURE_RSB_VMEXIT))
- return ", PBRSB-eIBRS: SW sequence";
+ return "; PBRSB-eIBRS: SW sequence";
else
- return ", PBRSB-eIBRS: Vulnerable";
+ return "; PBRSB-eIBRS: Vulnerable";
} else {
- return ", PBRSB-eIBRS: Not affected";
+ return "; PBRSB-eIBRS: Not affected";
}
}
+static const char *spectre_bhi_state(void)
+{
+ if (!boot_cpu_has_bug(X86_BUG_BHI))
+ return "; BHI: Not affected";
+ else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_HW))
+ return "; BHI: BHI_DIS_S";
+ else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP))
+ return "; BHI: SW loop, KVM: SW loop";
+ else if (boot_cpu_has(X86_FEATURE_RETPOLINE) &&
+ !boot_cpu_has(X86_FEATURE_RETPOLINE_LFENCE) &&
+ rrsba_disabled)
+ return "; BHI: Retpoline";
+ else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT))
+ return "; BHI: Vulnerable, KVM: SW loop";
+
+ return "; BHI: Vulnerable";
+}
+
static ssize_t spectre_v2_show_state(char *buf)
{
if (spectre_v2_enabled == SPECTRE_V2_LFENCE)
@@ -2745,13 +2831,15 @@ static ssize_t spectre_v2_show_state(char *buf)
spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE)
return sysfs_emit(buf, "Vulnerable: eIBRS+LFENCE with unprivileged eBPF and SMT\n");
- return sysfs_emit(buf, "%s%s%s%s%s%s%s\n",
+ return sysfs_emit(buf, "%s%s%s%s%s%s%s%s\n",
spectre_v2_strings[spectre_v2_enabled],
ibpb_state(),
- boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
+ boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? "; IBRS_FW" : "",
stibp_state(),
- boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "",
+ boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? "; RSB filling" : "",
pbrsb_eibrs_state(),
+ spectre_bhi_state(),
+ /* this should always be at the end */
spectre_v2_module_string());
}
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 5c1e6d6be267af..605c26c009c8ac 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1120,6 +1120,7 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
#define NO_SPECTRE_V2 BIT(8)
#define NO_MMIO BIT(9)
#define NO_EIBRS_PBRSB BIT(10)
+#define NO_BHI BIT(11)
#define VULNWL(vendor, family, model, whitelist) \
X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, whitelist)
@@ -1182,18 +1183,18 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
VULNWL_INTEL(ATOM_TREMONT_D, NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB),
/* AMD Family 0xf - 0x12 */
- VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
- VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
- VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
- VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
+ VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI),
+ VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI),
+ VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI),
+ VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI),
/* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */
- VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB),
- VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB),
+ VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB | NO_BHI),
+ VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB | NO_BHI),
/* Zhaoxin Family 7 */
- VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO),
- VULNWL(ZHAOXIN, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO),
+ VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO | NO_BHI),
+ VULNWL(ZHAOXIN, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO | NO_BHI),
{}
};
@@ -1283,25 +1284,25 @@ static bool __init cpu_matches(const struct x86_cpu_id *table, unsigned long whi
u64 x86_read_arch_cap_msr(void)
{
- u64 ia32_cap = 0;
+ u64 x86_arch_cap_msr = 0;
if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
- rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap);
+ rdmsrl(MSR_IA32_ARCH_CAPABILITIES, x86_arch_cap_msr);
- return ia32_cap;
+ return x86_arch_cap_msr;
}
-static bool arch_cap_mmio_immune(u64 ia32_cap)
+static bool arch_cap_mmio_immune(u64 x86_arch_cap_msr)
{
- return (ia32_cap & ARCH_CAP_FBSDP_NO &&
- ia32_cap & ARCH_CAP_PSDP_NO &&
- ia32_cap & ARCH_CAP_SBDR_SSDP_NO);
+ return (x86_arch_cap_msr & ARCH_CAP_FBSDP_NO &&
+ x86_arch_cap_msr & ARCH_CAP_PSDP_NO &&
+ x86_arch_cap_msr & ARCH_CAP_SBDR_SSDP_NO);
}
-static bool __init vulnerable_to_rfds(u64 ia32_cap)
+static bool __init vulnerable_to_rfds(u64 x86_arch_cap_msr)
{
/* The "immunity" bit trumps everything else: */
- if (ia32_cap & ARCH_CAP_RFDS_NO)
+ if (x86_arch_cap_msr & ARCH_CAP_RFDS_NO)
return false;
/*
@@ -1309,7 +1310,7 @@ static bool __init vulnerable_to_rfds(u64 ia32_cap)
* indicate that mitigation is needed because guest is running on a
* vulnerable hardware or may migrate to such hardware:
*/
- if (ia32_cap & ARCH_CAP_RFDS_CLEAR)
+ if (x86_arch_cap_msr & ARCH_CAP_RFDS_CLEAR)
return true;
/* Only consult the blacklist when there is no enumeration: */
@@ -1318,11 +1319,11 @@ static bool __init vulnerable_to_rfds(u64 ia32_cap)
static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
{
- u64 ia32_cap = x86_read_arch_cap_msr();
+ u64 x86_arch_cap_msr = x86_read_arch_cap_msr();
/* Set ITLB_MULTIHIT bug if cpu is not in the whitelist and not mitigated */
if (!cpu_matches(cpu_vuln_whitelist, NO_ITLB_MULTIHIT) &&
- !(ia32_cap & ARCH_CAP_PSCHANGE_MC_NO))
+ !(x86_arch_cap_msr & ARCH_CAP_PSCHANGE_MC_NO))
setup_force_cpu_bug(X86_BUG_ITLB_MULTIHIT);
if (cpu_matches(cpu_vuln_whitelist, NO_SPECULATION))
@@ -1334,7 +1335,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
if (!cpu_matches(cpu_vuln_whitelist, NO_SSB) &&
- !(ia32_cap & ARCH_CAP_SSB_NO) &&
+ !(x86_arch_cap_msr & ARCH_CAP_SSB_NO) &&
!cpu_has(c, X86_FEATURE_AMD_SSB_NO))
setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS);
@@ -1345,17 +1346,17 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
* Don't use AutoIBRS when SNP is enabled because it degrades host
* userspace indirect branch performance.
*/
- if ((ia32_cap & ARCH_CAP_IBRS_ALL) ||
+ if ((x86_arch_cap_msr & ARCH_CAP_IBRS_ALL) ||
(cpu_has(c, X86_FEATURE_AUTOIBRS) &&
!cpu_feature_enabled(X86_FEATURE_SEV_SNP))) {
setup_force_cpu_cap(X86_FEATURE_IBRS_ENHANCED);
if (!cpu_matches(cpu_vuln_whitelist, NO_EIBRS_PBRSB) &&
- !(ia32_cap & ARCH_CAP_PBRSB_NO))
+ !(x86_arch_cap_msr & ARCH_CAP_PBRSB_NO))
setup_force_cpu_bug(X86_BUG_EIBRS_PBRSB);
}
if (!cpu_matches(cpu_vuln_whitelist, NO_MDS) &&
- !(ia32_cap & ARCH_CAP_MDS_NO)) {
+ !(x86_arch_cap_msr & ARCH_CAP_MDS_NO)) {
setup_force_cpu_bug(X86_BUG_MDS);
if (cpu_matches(cpu_vuln_whitelist, MSBDS_ONLY))
setup_force_cpu_bug(X86_BUG_MSBDS_ONLY);
@@ -1374,9 +1375,9 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
* TSX_CTRL check alone is not sufficient for cases when the microcode
* update is not present or running as guest that don't get TSX_CTRL.
*/
- if (!(ia32_cap & ARCH_CAP_TAA_NO) &&
+ if (!(x86_arch_cap_msr & ARCH_CAP_TAA_NO) &&
(cpu_has(c, X86_FEATURE_RTM) ||
- (ia32_cap & ARCH_CAP_TSX_CTRL_MSR)))
+ (x86_arch_cap_msr & ARCH_CAP_TSX_CTRL_MSR)))
setup_force_cpu_bug(X86_BUG_TAA);
/*
@@ -1402,7 +1403,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
* Set X86_BUG_MMIO_UNKNOWN for CPUs that are neither in the blacklist,
* nor in the whitelist and also don't enumerate MSR ARCH_CAP MMIO bits.
*/
- if (!arch_cap_mmio_immune(ia32_cap)) {
+ if (!arch_cap_mmio_immune(x86_arch_cap_msr)) {
if (cpu_matches(cpu_vuln_blacklist, MMIO))
setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA);
else if (!cpu_matches(cpu_vuln_whitelist, NO_MMIO))
@@ -1410,7 +1411,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
}
if (!cpu_has(c, X86_FEATURE_BTC_NO)) {
- if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA))
+ if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (x86_arch_cap_msr & ARCH_CAP_RSBA))
setup_force_cpu_bug(X86_BUG_RETBLEED);
}
@@ -1428,18 +1429,25 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
* disabling AVX2. The only way to do this in HW is to clear XCR0[2],
* which means that AVX will be disabled.
*/
- if (cpu_matches(cpu_vuln_blacklist, GDS) && !(ia32_cap & ARCH_CAP_GDS_NO) &&
+ if (cpu_matches(cpu_vuln_blacklist, GDS) && !(x86_arch_cap_msr & ARCH_CAP_GDS_NO) &&
boot_cpu_has(X86_FEATURE_AVX))
setup_force_cpu_bug(X86_BUG_GDS);
- if (vulnerable_to_rfds(ia32_cap))
+ if (vulnerable_to_rfds(x86_arch_cap_msr))
setup_force_cpu_bug(X86_BUG_RFDS);
+ /* When virtualized, eIBRS could be hidden, assume vulnerable */
+ if (!(x86_arch_cap_msr & ARCH_CAP_BHI_NO) &&
+ !cpu_matches(cpu_vuln_whitelist, NO_BHI) &&
+ (boot_cpu_has(X86_FEATURE_IBRS_ENHANCED) ||
+ boot_cpu_has(X86_FEATURE_HYPERVISOR)))
+ setup_force_cpu_bug(X86_BUG_BHI);
+
if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))
return;
/* Rogue Data Cache Load? No! */
- if (ia32_cap & ARCH_CAP_RDCL_NO)
+ if (x86_arch_cap_msr & ARCH_CAP_RDCL_NO)
return;
setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c
index b7174209d855c6..946813d816bfc2 100644
--- a/arch/x86/kernel/cpu/cpuid-deps.c
+++ b/arch/x86/kernel/cpu/cpuid-deps.c
@@ -44,7 +44,10 @@ static const struct cpuid_dep cpuid_deps[] = {
{ X86_FEATURE_F16C, X86_FEATURE_XMM2, },
{ X86_FEATURE_AES, X86_FEATURE_XMM2 },
{ X86_FEATURE_SHA_NI, X86_FEATURE_XMM2 },
+ { X86_FEATURE_GFNI, X86_FEATURE_XMM2 },
{ X86_FEATURE_FMA, X86_FEATURE_AVX },
+ { X86_FEATURE_VAES, X86_FEATURE_AVX },
+ { X86_FEATURE_VPCLMULQDQ, X86_FEATURE_AVX },
{ X86_FEATURE_AVX2, X86_FEATURE_AVX, },
{ X86_FEATURE_AVX512F, X86_FEATURE_AVX, },
{ X86_FEATURE_AVX512IFMA, X86_FEATURE_AVX512F },
@@ -56,9 +59,6 @@ static const struct cpuid_dep cpuid_deps[] = {
{ X86_FEATURE_AVX512VL, X86_FEATURE_AVX512F },
{ X86_FEATURE_AVX512VBMI, X86_FEATURE_AVX512F },
{ X86_FEATURE_AVX512_VBMI2, X86_FEATURE_AVX512VL },
- { X86_FEATURE_GFNI, X86_FEATURE_AVX512VL },
- { X86_FEATURE_VAES, X86_FEATURE_AVX512VL },
- { X86_FEATURE_VPCLMULQDQ, X86_FEATURE_AVX512VL },
{ X86_FEATURE_AVX512_VNNI, X86_FEATURE_AVX512VL },
{ X86_FEATURE_AVX512_BITALG, X86_FEATURE_AVX512VL },
{ X86_FEATURE_AVX512_4VNNIW, X86_FEATURE_AVX512F },
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index b5cc557cfc3736..84d41be6d06ba4 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -2500,12 +2500,14 @@ static ssize_t set_bank(struct device *s, struct device_attribute *attr,
return -EINVAL;
b = &per_cpu(mce_banks_array, s->id)[bank];
-
if (!b->init)
return -ENODEV;
b->ctl = new;
+
+ mutex_lock(&mce_sysfs_mutex);
mce_restart();
+ mutex_unlock(&mce_sysfs_mutex);
return size;
}
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 422a4ddc2ab7c9..7b29ebda024f4e 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -108,7 +108,7 @@ static inline void k8_check_syscfg_dram_mod_en(void)
(boot_cpu_data.x86 >= 0x0f)))
return;
- if (cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+ if (cc_platform_has(CC_ATTR_HOST_SEV_SNP))
return;
rdmsr(MSR_AMD64_SYSCFG, lo, hi);
diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index c99f26ebe7a653..1a8687f8073a89 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -78,7 +78,8 @@ cpumask_any_housekeeping(const struct cpumask *mask, int exclude_cpu)
else
cpu = cpumask_any_but(mask, exclude_cpu);
- if (!IS_ENABLED(CONFIG_NO_HZ_FULL))
+ /* Only continue if tick_nohz_full_mask has been initialized. */
+ if (!tick_nohz_full_enabled())
return cpu;
/* If the CPU picked isn't marked nohz_full nothing more needs doing. */
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index a515328d9d7d88..af5aa2c754c222 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -28,6 +28,7 @@ static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 },
{ X86_FEATURE_INTEL_PPIN, CPUID_EBX, 0, 0x00000007, 1 },
{ X86_FEATURE_RRSBA_CTRL, CPUID_EDX, 2, 0x00000007, 2 },
+ { X86_FEATURE_BHI_CTRL, CPUID_EDX, 4, 0x00000007, 2 },
{ X86_FEATURE_CQM_LLC, CPUID_EDX, 1, 0x0000000f, 0 },
{ X86_FEATURE_CQM_OCCUP_LLC, CPUID_EDX, 0, 0x0000000f, 1 },
{ X86_FEATURE_CQM_MBM_TOTAL, CPUID_EDX, 1, 0x0000000f, 1 },
diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c
index aaca8d235dc2bb..d17c9b71eb4a25 100644
--- a/arch/x86/kernel/cpu/topology.c
+++ b/arch/x86/kernel/cpu/topology.c
@@ -123,7 +123,6 @@ static void topo_set_cpuids(unsigned int cpu, u32 apic_id, u32 acpi_id)
early_per_cpu(x86_cpu_to_apicid, cpu) = apic_id;
early_per_cpu(x86_cpu_to_acpiid, cpu) = acpi_id;
#endif
- set_cpu_possible(cpu, true);
set_cpu_present(cpu, true);
}
@@ -210,7 +209,11 @@ static __init void topo_register_apic(u32 apic_id, u32 acpi_id, bool present)
topo_info.nr_disabled_cpus++;
}
- /* Register present and possible CPUs in the domain maps */
+ /*
+ * Register present and possible CPUs in the domain
+ * maps. cpu_possible_map will be updated in
+ * topology_init_possible_cpus() after enumeration is done.
+ */
for (dom = TOPO_SMT_DOMAIN; dom < TOPO_MAX_DOMAIN; dom++)
set_bit(topo_apicid(apic_id, dom), apic_maps[dom].map);
}
diff --git a/arch/x86/kernel/cpu/topology_amd.c b/arch/x86/kernel/cpu/topology_amd.c
index 1a8b3ad493afef..a7aa6eff4ae5ba 100644
--- a/arch/x86/kernel/cpu/topology_amd.c
+++ b/arch/x86/kernel/cpu/topology_amd.c
@@ -29,11 +29,21 @@ static bool parse_8000_0008(struct topo_scan *tscan)
if (!sft)
sft = get_count_order(ecx.cpu_nthreads + 1);
- topology_set_dom(tscan, TOPO_SMT_DOMAIN, sft, ecx.cpu_nthreads + 1);
+ /*
+ * cpu_nthreads describes the number of threads in the package
+ * sft is the number of APIC ID bits per package
+ *
+ * As the number of actual threads per core is not described in
+ * this leaf, just set the CORE domain shift and let the later
+ * parsers set SMT shift. Assume one thread per core by default
+ * which is correct if there are no other CPUID leafs to parse.
+ */
+ topology_update_dom(tscan, TOPO_SMT_DOMAIN, 0, 1);
+ topology_set_dom(tscan, TOPO_CORE_DOMAIN, sft, ecx.cpu_nthreads + 1);
return true;
}
-static void store_node(struct topo_scan *tscan, unsigned int nr_nodes, u16 node_id)
+static void store_node(struct topo_scan *tscan, u16 nr_nodes, u16 node_id)
{
/*
* Starting with Fam 17h the DIE domain could probably be used to
@@ -73,12 +83,14 @@ static bool parse_8000_001e(struct topo_scan *tscan, bool has_0xb)
tscan->c->topo.initial_apicid = leaf.ext_apic_id;
/*
- * If leaf 0xb is available, then SMT shift is set already. If not
- * take it from ecx.threads_per_core and use topo_update_dom() -
- * topology_set_dom() would propagate and overwrite the already
- * propagated CORE level.
+ * If leaf 0xb is available, then the domain shifts are set
+ * already and nothing to do here.
*/
if (!has_0xb) {
+ /*
+ * Leaf 0x80000008 set the CORE domain shift already.
+ * Update the SMT domain, but do not propagate it.
+ */
unsigned int nthreads = leaf.core_nthreads + 1;
topology_update_dom(tscan, TOPO_SMT_DOMAIN, get_count_order(nthreads), nthreads);
@@ -109,13 +121,13 @@ static bool parse_8000_001e(struct topo_scan *tscan, bool has_0xb)
static bool parse_fam10h_node_id(struct topo_scan *tscan)
{
- struct {
- union {
+ union {
+ struct {
u64 node_id : 3,
nodes_per_pkg : 3,
unused : 58;
- u64 msr;
};
+ u64 msr;
} nid;
if (!boot_cpu_has(X86_FEATURE_NODEID_MSR))
@@ -135,6 +147,26 @@ static void legacy_set_llc(struct topo_scan *tscan)
tscan->c->topo.llc_id = apicid >> tscan->dom_shifts[TOPO_CORE_DOMAIN];
}
+static void topoext_fixup(struct topo_scan *tscan)
+{
+ struct cpuinfo_x86 *c = tscan->c;
+ u64 msrval;
+
+ /* Try to re-enable TopologyExtensions if switched off by BIOS */
+ if (cpu_has(c, X86_FEATURE_TOPOEXT) || c->x86_vendor != X86_VENDOR_AMD ||
+ c->x86 != 0x15 || c->x86_model < 0x10 || c->x86_model > 0x6f)
+ return;
+
+ if (msr_set_bit(0xc0011005, 54) <= 0)
+ return;
+
+ rdmsrl(0xc0011005, msrval);
+ if (msrval & BIT_64(54)) {
+ set_cpu_cap(c, X86_FEATURE_TOPOEXT);
+ pr_info_once(FW_INFO "CPU: Re-enabling disabled Topology Extensions Support.\n");
+ }
+}
+
static void parse_topology_amd(struct topo_scan *tscan)
{
bool has_0xb = false;
@@ -164,6 +196,7 @@ static void parse_topology_amd(struct topo_scan *tscan)
void cpu_parse_topology_amd(struct topo_scan *tscan)
{
tscan->amd_nodes_per_pkg = 1;
+ topoext_fixup(tscan);
parse_topology_amd(tscan);
if (tscan->amd_nodes_per_pkg > 1)
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 4cadfd606e8e6a..7f0732bc0ccd23 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -65,6 +65,7 @@ static int __init parse_no_stealacc(char *arg)
early_param("no-steal-acc", parse_no_stealacc);
+static DEFINE_PER_CPU_READ_MOSTLY(bool, async_pf_enabled);
static DEFINE_PER_CPU_DECRYPTED(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64);
DEFINE_PER_CPU_DECRYPTED(struct kvm_steal_time, steal_time) __aligned(64) __visible;
static int has_steal_clock = 0;
@@ -244,7 +245,7 @@ noinstr u32 kvm_read_and_reset_apf_flags(void)
{
u32 flags = 0;
- if (__this_cpu_read(apf_reason.enabled)) {
+ if (__this_cpu_read(async_pf_enabled)) {
flags = __this_cpu_read(apf_reason.flags);
__this_cpu_write(apf_reason.flags, 0);
}
@@ -295,7 +296,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_kvm_asyncpf_interrupt)
inc_irq_stat(irq_hv_callback_count);
- if (__this_cpu_read(apf_reason.enabled)) {
+ if (__this_cpu_read(async_pf_enabled)) {
token = __this_cpu_read(apf_reason.token);
kvm_async_pf_task_wake(token);
__this_cpu_write(apf_reason.token, 0);
@@ -362,7 +363,7 @@ static void kvm_guest_cpu_init(void)
wrmsrl(MSR_KVM_ASYNC_PF_INT, HYPERVISOR_CALLBACK_VECTOR);
wrmsrl(MSR_KVM_ASYNC_PF_EN, pa);
- __this_cpu_write(apf_reason.enabled, 1);
+ __this_cpu_write(async_pf_enabled, true);
pr_debug("setup async PF for cpu %d\n", smp_processor_id());
}
@@ -383,11 +384,11 @@ static void kvm_guest_cpu_init(void)
static void kvm_pv_disable_apf(void)
{
- if (!__this_cpu_read(apf_reason.enabled))
+ if (!__this_cpu_read(async_pf_enabled))
return;
wrmsrl(MSR_KVM_ASYNC_PF_EN, 0);
- __this_cpu_write(apf_reason.enabled, 0);
+ __this_cpu_write(async_pf_enabled, false);
pr_debug("disable async PF for cpu %d\n", smp_processor_id());
}
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 0109e6c510e02f..e125e059e2c45d 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -35,6 +35,7 @@
#include <asm/bios_ebda.h>
#include <asm/bugs.h>
#include <asm/cacheinfo.h>
+#include <asm/coco.h>
#include <asm/cpu.h>
#include <asm/efi.h>
#include <asm/gart.h>
@@ -991,6 +992,7 @@ void __init setup_arch(char **cmdline_p)
* memory size.
*/
mem_encrypt_setup_arch();
+ cc_random_init();
efi_fake_memmap();
efi_find_mirror();
diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
index 7e1e63cc48e67d..38ad066179d81f 100644
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -2284,16 +2284,6 @@ static int __init snp_init_platform_device(void)
}
device_initcall(snp_init_platform_device);
-void kdump_sev_callback(void)
-{
- /*
- * Do wbinvd() on remote CPUs when SNP is enabled in order to
- * safely do SNP_SHUTDOWN on the local CPU.
- */
- if (cpu_feature_enabled(X86_FEATURE_SEV_SNP))
- wbinvd();
-}
-
void sev_show_status(void)
{
int i;
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 3aaf7e86a859a2..0ebdd088f28b85 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -122,6 +122,7 @@ config KVM_AMD_SEV
default y
depends on KVM_AMD && X86_64
depends on CRYPTO_DEV_SP_PSP && !(KVM_AMD=y && CRYPTO_DEV_CCP_DD=m)
+ select ARCH_HAS_CC_PLATFORM
help
Provides support for launching Encrypted VMs (SEV) and Encrypted VMs
with Encrypted State (SEV-ES) on AMD processors.
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index a88bb14266b69f..addc44fc7187d6 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -3,11 +3,6 @@
ccflags-y += -I $(srctree)/arch/x86/kvm
ccflags-$(CONFIG_KVM_WERROR) += -Werror
-ifeq ($(CONFIG_FRAME_POINTER),y)
-OBJECT_FILES_NON_STANDARD_vmx/vmenter.o := y
-OBJECT_FILES_NON_STANDARD_svm/vmenter.o := y
-endif
-
include $(srctree)/virt/kvm/Makefile.kvm
kvm-y += x86.o emulate.o i8259.o irq.o lapic.o \
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index adba49afb5fe63..77352a4abd87f8 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -189,15 +189,15 @@ static int kvm_cpuid_check_equal(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2
return 0;
}
-static struct kvm_hypervisor_cpuid kvm_get_hypervisor_cpuid(struct kvm_vcpu *vcpu,
- const char *sig)
+static struct kvm_hypervisor_cpuid __kvm_get_hypervisor_cpuid(struct kvm_cpuid_entry2 *entries,
+ int nent, const char *sig)
{
struct kvm_hypervisor_cpuid cpuid = {};
struct kvm_cpuid_entry2 *entry;
u32 base;
for_each_possible_hypervisor_cpuid_base(base) {
- entry = kvm_find_cpuid_entry(vcpu, base);
+ entry = cpuid_entry2_find(entries, nent, base, KVM_CPUID_INDEX_NOT_SIGNIFICANT);
if (entry) {
u32 signature[3];
@@ -217,22 +217,29 @@ static struct kvm_hypervisor_cpuid kvm_get_hypervisor_cpuid(struct kvm_vcpu *vcp
return cpuid;
}
-static struct kvm_cpuid_entry2 *__kvm_find_kvm_cpuid_features(struct kvm_vcpu *vcpu,
- struct kvm_cpuid_entry2 *entries, int nent)
+static struct kvm_hypervisor_cpuid kvm_get_hypervisor_cpuid(struct kvm_vcpu *vcpu,
+ const char *sig)
{
- u32 base = vcpu->arch.kvm_cpuid.base;
-
- if (!base)
- return NULL;
+ return __kvm_get_hypervisor_cpuid(vcpu->arch.cpuid_entries,
+ vcpu->arch.cpuid_nent, sig);
+}
- return cpuid_entry2_find(entries, nent, base | KVM_CPUID_FEATURES,
+static struct kvm_cpuid_entry2 *__kvm_find_kvm_cpuid_features(struct kvm_cpuid_entry2 *entries,
+ int nent, u32 kvm_cpuid_base)
+{
+ return cpuid_entry2_find(entries, nent, kvm_cpuid_base | KVM_CPUID_FEATURES,
KVM_CPUID_INDEX_NOT_SIGNIFICANT);
}
static struct kvm_cpuid_entry2 *kvm_find_kvm_cpuid_features(struct kvm_vcpu *vcpu)
{
- return __kvm_find_kvm_cpuid_features(vcpu, vcpu->arch.cpuid_entries,
- vcpu->arch.cpuid_nent);
+ u32 base = vcpu->arch.kvm_cpuid.base;
+
+ if (!base)
+ return NULL;
+
+ return __kvm_find_kvm_cpuid_features(vcpu->arch.cpuid_entries,
+ vcpu->arch.cpuid_nent, base);
}
void kvm_update_pv_runtime(struct kvm_vcpu *vcpu)
@@ -266,6 +273,7 @@ static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_e
int nent)
{
struct kvm_cpuid_entry2 *best;
+ struct kvm_hypervisor_cpuid kvm_cpuid;
best = cpuid_entry2_find(entries, nent, 1, KVM_CPUID_INDEX_NOT_SIGNIFICANT);
if (best) {
@@ -292,10 +300,12 @@ static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_e
cpuid_entry_has(best, X86_FEATURE_XSAVEC)))
best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
- best = __kvm_find_kvm_cpuid_features(vcpu, entries, nent);
- if (kvm_hlt_in_guest(vcpu->kvm) && best &&
- (best->eax & (1 << KVM_FEATURE_PV_UNHALT)))
- best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT);
+ kvm_cpuid = __kvm_get_hypervisor_cpuid(entries, nent, KVM_SIGNATURE);
+ if (kvm_cpuid.base) {
+ best = __kvm_find_kvm_cpuid_features(entries, nent, kvm_cpuid.base);
+ if (kvm_hlt_in_guest(vcpu->kvm) && best)
+ best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT);
+ }
if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT)) {
best = cpuid_entry2_find(entries, nent, 0x1, KVM_CPUID_INDEX_NOT_SIGNIFICANT);
@@ -366,6 +376,7 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
kvm_update_pv_runtime(vcpu);
+ vcpu->arch.is_amd_compatible = guest_cpuid_is_amd_or_hygon(vcpu);
vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
vcpu->arch.reserved_gpa_bits = kvm_vcpu_reserved_gpa_bits_raw(vcpu);
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index 856e3037e74f3f..23dbb9eb277c74 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -120,6 +120,16 @@ static inline bool guest_cpuid_is_intel(struct kvm_vcpu *vcpu)
return best && is_guest_vendor_intel(best->ebx, best->ecx, best->edx);
}
+static inline bool guest_cpuid_is_amd_compatible(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.is_amd_compatible;
+}
+
+static inline bool guest_cpuid_is_intel_compatible(struct kvm_vcpu *vcpu)
+{
+ return !guest_cpuid_is_amd_compatible(vcpu);
+}
+
static inline int guest_cpuid_family(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index cf37586f04668d..ebf41023be3829 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -2776,7 +2776,8 @@ int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type)
trig_mode = reg & APIC_LVT_LEVEL_TRIGGER;
r = __apic_accept_irq(apic, mode, vector, 1, trig_mode, NULL);
- if (r && lvt_type == APIC_LVTPC)
+ if (r && lvt_type == APIC_LVTPC &&
+ guest_cpuid_is_intel_compatible(apic->vcpu))
kvm_lapic_set_reg(apic, APIC_LVTPC, reg | APIC_LVT_MASKED);
return r;
}
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 992e651540e852..db007a4dffa2e1 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4935,7 +4935,7 @@ static void reset_guest_rsvds_bits_mask(struct kvm_vcpu *vcpu,
context->cpu_role.base.level, is_efer_nx(context),
guest_can_use(vcpu, X86_FEATURE_GBPAGES),
is_cr4_pse(context),
- guest_cpuid_is_amd_or_hygon(vcpu));
+ guest_cpuid_is_amd_compatible(vcpu));
}
static void __reset_rsvds_bits_mask_ept(struct rsvd_bits_validate *rsvd_check,
@@ -5576,9 +5576,9 @@ void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu)
* that problem is swept under the rug; KVM's CPUID API is horrific and
* it's all but impossible to solve it without introducing a new API.
*/
- vcpu->arch.root_mmu.root_role.word = 0;
- vcpu->arch.guest_mmu.root_role.word = 0;
- vcpu->arch.nested_mmu.root_role.word = 0;
+ vcpu->arch.root_mmu.root_role.invalid = 1;
+ vcpu->arch.guest_mmu.root_role.invalid = 1;
+ vcpu->arch.nested_mmu.root_role.invalid = 1;
vcpu->arch.root_mmu.cpu_role.ext.valid = 0;
vcpu->arch.guest_mmu.cpu_role.ext.valid = 0;
vcpu->arch.nested_mmu.cpu_role.ext.valid = 0;
@@ -7399,7 +7399,8 @@ bool kvm_arch_post_set_memory_attributes(struct kvm *kvm,
* by the memslot, KVM can't use a hugepage due to the
* misaligned address regardless of memory attributes.
*/
- if (gfn >= slot->base_gfn) {
+ if (gfn >= slot->base_gfn &&
+ gfn + nr_pages <= slot->base_gfn + slot->npages) {
if (hugepage_has_attrs(kvm, slot, gfn, level, attrs))
hugepage_clear_mixed(slot, gfn, level);
else
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index d078157e62aa40..04c1f0957fea87 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -1548,17 +1548,21 @@ void kvm_tdp_mmu_try_split_huge_pages(struct kvm *kvm,
}
}
-/*
- * Clear the dirty status of all the SPTEs mapping GFNs in the memslot. If
- * AD bits are enabled, this will involve clearing the dirty bit on each SPTE.
- * If AD bits are not enabled, this will require clearing the writable bit on
- * each SPTE. Returns true if an SPTE has been changed and the TLBs need to
- * be flushed.
- */
+static bool tdp_mmu_need_write_protect(struct kvm_mmu_page *sp)
+{
+ /*
+ * All TDP MMU shadow pages share the same role as their root, aside
+ * from level, so it is valid to key off any shadow page to determine if
+ * write protection is needed for an entire tree.
+ */
+ return kvm_mmu_page_ad_need_write_protect(sp) || !kvm_ad_enabled();
+}
+
static bool clear_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
gfn_t start, gfn_t end)
{
- u64 dbit = kvm_ad_enabled() ? shadow_dirty_mask : PT_WRITABLE_MASK;
+ const u64 dbit = tdp_mmu_need_write_protect(root) ? PT_WRITABLE_MASK :
+ shadow_dirty_mask;
struct tdp_iter iter;
bool spte_set = false;
@@ -1573,7 +1577,7 @@ retry:
if (tdp_mmu_iter_cond_resched(kvm, &iter, false, true))
continue;
- KVM_MMU_WARN_ON(kvm_ad_enabled() &&
+ KVM_MMU_WARN_ON(dbit == shadow_dirty_mask &&
spte_ad_need_write_protect(iter.old_spte));
if (!(iter.old_spte & dbit))
@@ -1590,11 +1594,9 @@ retry:
}
/*
- * Clear the dirty status of all the SPTEs mapping GFNs in the memslot. If
- * AD bits are enabled, this will involve clearing the dirty bit on each SPTE.
- * If AD bits are not enabled, this will require clearing the writable bit on
- * each SPTE. Returns true if an SPTE has been changed and the TLBs need to
- * be flushed.
+ * Clear the dirty status (D-bit or W-bit) of all the SPTEs mapping GFNs in the
+ * memslot. Returns true if an SPTE has been changed and the TLBs need to be
+ * flushed.
*/
bool kvm_tdp_mmu_clear_dirty_slot(struct kvm *kvm,
const struct kvm_memory_slot *slot)
@@ -1610,18 +1612,11 @@ bool kvm_tdp_mmu_clear_dirty_slot(struct kvm *kvm,
return spte_set;
}
-/*
- * Clears the dirty status of all the 4k SPTEs mapping GFNs for which a bit is
- * set in mask, starting at gfn. The given memslot is expected to contain all
- * the GFNs represented by set bits in the mask. If AD bits are enabled,
- * clearing the dirty status will involve clearing the dirty bit on each SPTE
- * or, if AD bits are not enabled, clearing the writable bit on each SPTE.
- */
static void clear_dirty_pt_masked(struct kvm *kvm, struct kvm_mmu_page *root,
gfn_t gfn, unsigned long mask, bool wrprot)
{
- u64 dbit = (wrprot || !kvm_ad_enabled()) ? PT_WRITABLE_MASK :
- shadow_dirty_mask;
+ const u64 dbit = (wrprot || tdp_mmu_need_write_protect(root)) ? PT_WRITABLE_MASK :
+ shadow_dirty_mask;
struct tdp_iter iter;
lockdep_assert_held_write(&kvm->mmu_lock);
@@ -1633,7 +1628,7 @@ static void clear_dirty_pt_masked(struct kvm *kvm, struct kvm_mmu_page *root,
if (!mask)
break;
- KVM_MMU_WARN_ON(kvm_ad_enabled() &&
+ KVM_MMU_WARN_ON(dbit == shadow_dirty_mask &&
spte_ad_need_write_protect(iter.old_spte));
if (iter.level > PG_LEVEL_4K ||
@@ -1659,11 +1654,9 @@ static void clear_dirty_pt_masked(struct kvm *kvm, struct kvm_mmu_page *root,
}
/*
- * Clears the dirty status of all the 4k SPTEs mapping GFNs for which a bit is
- * set in mask, starting at gfn. The given memslot is expected to contain all
- * the GFNs represented by set bits in the mask. If AD bits are enabled,
- * clearing the dirty status will involve clearing the dirty bit on each SPTE
- * or, if AD bits are not enabled, clearing the writable bit on each SPTE.
+ * Clear the dirty status (D-bit or W-bit) of all the 4k SPTEs mapping GFNs for
+ * which a bit is set in mask, starting at gfn. The given memslot is expected to
+ * contain all the GFNs represented by set bits in the mask.
*/
void kvm_tdp_mmu_clear_dirty_pt_masked(struct kvm *kvm,
struct kvm_memory_slot *slot,
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index c397b28e3d1b68..a593b03c9aed67 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -775,8 +775,20 @@ void kvm_pmu_refresh(struct kvm_vcpu *vcpu)
pmu->pebs_data_cfg_mask = ~0ull;
bitmap_zero(pmu->all_valid_pmc_idx, X86_PMC_IDX_MAX);
- if (vcpu->kvm->arch.enable_pmu)
- static_call(kvm_x86_pmu_refresh)(vcpu);
+ if (!vcpu->kvm->arch.enable_pmu)
+ return;
+
+ static_call(kvm_x86_pmu_refresh)(vcpu);
+
+ /*
+ * At RESET, both Intel and AMD CPUs set all enable bits for general
+ * purpose counters in IA32_PERF_GLOBAL_CTRL (so that software that
+ * was written for v1 PMUs don't unknowingly leave GP counters disabled
+ * in the global controls). Emulate that behavior when refreshing the
+ * PMU so that userspace doesn't need to manually set PERF_GLOBAL_CTRL.
+ */
+ if (kvm_pmu_has_perf_global_ctrl(pmu) && pmu->nr_arch_gp_counters)
+ pmu->global_ctrl = GENMASK_ULL(pmu->nr_arch_gp_counters - 1, 0);
}
void kvm_pmu_init(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h
index aadefcaa9561d0..2f4e155080badc 100644
--- a/arch/x86/kvm/reverse_cpuid.h
+++ b/arch/x86/kvm/reverse_cpuid.h
@@ -52,7 +52,7 @@ enum kvm_only_cpuid_leafs {
#define X86_FEATURE_IPRED_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 1)
#define KVM_X86_FEATURE_RRSBA_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 2)
#define X86_FEATURE_DDPD_U KVM_X86_FEATURE(CPUID_7_2_EDX, 3)
-#define X86_FEATURE_BHI_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 4)
+#define KVM_X86_FEATURE_BHI_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 4)
#define X86_FEATURE_MCDT_NO KVM_X86_FEATURE(CPUID_7_2_EDX, 5)
/* CPUID level 0x80000007 (EDX). */
@@ -102,10 +102,12 @@ static const struct cpuid_reg reverse_cpuid[] = {
*/
static __always_inline void reverse_cpuid_check(unsigned int x86_leaf)
{
+ BUILD_BUG_ON(NR_CPUID_WORDS != NCAPINTS);
BUILD_BUG_ON(x86_leaf == CPUID_LNX_1);
BUILD_BUG_ON(x86_leaf == CPUID_LNX_2);
BUILD_BUG_ON(x86_leaf == CPUID_LNX_3);
BUILD_BUG_ON(x86_leaf == CPUID_LNX_4);
+ BUILD_BUG_ON(x86_leaf == CPUID_LNX_5);
BUILD_BUG_ON(x86_leaf >= ARRAY_SIZE(reverse_cpuid));
BUILD_BUG_ON(reverse_cpuid[x86_leaf].function == 0);
}
@@ -126,6 +128,7 @@ static __always_inline u32 __feature_translate(int x86_feature)
KVM_X86_TRANSLATE_FEATURE(CONSTANT_TSC);
KVM_X86_TRANSLATE_FEATURE(PERFMON_V2);
KVM_X86_TRANSLATE_FEATURE(RRSBA_CTRL);
+ KVM_X86_TRANSLATE_FEATURE(BHI_CTRL);
default:
return x86_feature;
}
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index ae0ac12382b927..759581bb2128da 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -84,9 +84,10 @@ struct enc_region {
};
/* Called with the sev_bitmap_lock held, or on shutdown */
-static int sev_flush_asids(int min_asid, int max_asid)
+static int sev_flush_asids(unsigned int min_asid, unsigned int max_asid)
{
- int ret, asid, error = 0;
+ int ret, error = 0;
+ unsigned int asid;
/* Check if there are any ASIDs to reclaim before performing a flush */
asid = find_next_bit(sev_reclaim_asid_bitmap, nr_asids, min_asid);
@@ -116,7 +117,7 @@ static inline bool is_mirroring_enc_context(struct kvm *kvm)
}
/* Must be called with the sev_bitmap_lock held */
-static bool __sev_recycle_asids(int min_asid, int max_asid)
+static bool __sev_recycle_asids(unsigned int min_asid, unsigned int max_asid)
{
if (sev_flush_asids(min_asid, max_asid))
return false;
@@ -143,8 +144,20 @@ static void sev_misc_cg_uncharge(struct kvm_sev_info *sev)
static int sev_asid_new(struct kvm_sev_info *sev)
{
- int asid, min_asid, max_asid, ret;
+ /*
+ * SEV-enabled guests must use asid from min_sev_asid to max_sev_asid.
+ * SEV-ES-enabled guest can use from 1 to min_sev_asid - 1.
+ * Note: min ASID can end up larger than the max if basic SEV support is
+ * effectively disabled by disallowing use of ASIDs for SEV guests.
+ */
+ unsigned int min_asid = sev->es_active ? 1 : min_sev_asid;
+ unsigned int max_asid = sev->es_active ? min_sev_asid - 1 : max_sev_asid;
+ unsigned int asid;
bool retry = true;
+ int ret;
+
+ if (min_asid > max_asid)
+ return -ENOTTY;
WARN_ON(sev->misc_cg);
sev->misc_cg = get_current_misc_cg();
@@ -157,12 +170,6 @@ static int sev_asid_new(struct kvm_sev_info *sev)
mutex_lock(&sev_bitmap_lock);
- /*
- * SEV-enabled guests must use asid from min_sev_asid to max_sev_asid.
- * SEV-ES-enabled guest can use from 1 to min_sev_asid - 1.
- */
- min_asid = sev->es_active ? 1 : min_sev_asid;
- max_asid = sev->es_active ? min_sev_asid - 1 : max_sev_asid;
again:
asid = find_next_zero_bit(sev_asid_bitmap, max_asid + 1, min_asid);
if (asid > max_asid) {
@@ -179,7 +186,8 @@ again:
mutex_unlock(&sev_bitmap_lock);
- return asid;
+ sev->asid = asid;
+ return 0;
e_uncharge:
sev_misc_cg_uncharge(sev);
put_misc_cg(sev->misc_cg);
@@ -187,7 +195,7 @@ e_uncharge:
return ret;
}
-static int sev_get_asid(struct kvm *kvm)
+static unsigned int sev_get_asid(struct kvm *kvm)
{
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
@@ -247,21 +255,19 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct sev_platform_init_args init_args = {0};
- int asid, ret;
+ int ret;
if (kvm->created_vcpus)
return -EINVAL;
- ret = -EBUSY;
if (unlikely(sev->active))
- return ret;
+ return -EINVAL;
sev->active = true;
sev->es_active = argp->id == KVM_SEV_ES_INIT;
- asid = sev_asid_new(sev);
- if (asid < 0)
+ ret = sev_asid_new(sev);
+ if (ret)
goto e_no_asid;
- sev->asid = asid;
init_args.probe = false;
ret = sev_platform_init(&init_args);
@@ -287,8 +293,8 @@ e_no_asid:
static int sev_bind_asid(struct kvm *kvm, unsigned int handle, int *error)
{
+ unsigned int asid = sev_get_asid(kvm);
struct sev_data_activate activate;
- int asid = sev_get_asid(kvm);
int ret;
/* activate ASID on the given handle */
@@ -428,7 +434,7 @@ static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr,
/* Avoid using vmalloc for smaller buffers. */
size = npages * sizeof(struct page *);
if (size > PAGE_SIZE)
- pages = __vmalloc(size, GFP_KERNEL_ACCOUNT | __GFP_ZERO);
+ pages = __vmalloc(size, GFP_KERNEL_ACCOUNT);
else
pages = kmalloc(size, GFP_KERNEL_ACCOUNT);
@@ -2240,8 +2246,10 @@ void __init sev_hardware_setup(void)
goto out;
}
- sev_asid_count = max_sev_asid - min_sev_asid + 1;
- WARN_ON_ONCE(misc_cg_set_capacity(MISC_CG_RES_SEV, sev_asid_count));
+ if (min_sev_asid <= max_sev_asid) {
+ sev_asid_count = max_sev_asid - min_sev_asid + 1;
+ WARN_ON_ONCE(misc_cg_set_capacity(MISC_CG_RES_SEV, sev_asid_count));
+ }
sev_supported = true;
/* SEV-ES support requested? */
@@ -2272,7 +2280,9 @@ void __init sev_hardware_setup(void)
out:
if (boot_cpu_has(X86_FEATURE_SEV))
pr_info("SEV %s (ASIDs %u - %u)\n",
- sev_supported ? "enabled" : "disabled",
+ sev_supported ? min_sev_asid <= max_sev_asid ? "enabled" :
+ "unusable" :
+ "disabled",
min_sev_asid, max_sev_asid);
if (boot_cpu_has(X86_FEATURE_SEV_ES))
pr_info("SEV-ES %s (ASIDs %u - %u)\n",
@@ -2320,7 +2330,7 @@ int sev_cpu_init(struct svm_cpu_data *sd)
*/
static void sev_flush_encrypted_page(struct kvm_vcpu *vcpu, void *va)
{
- int asid = to_kvm_svm(vcpu->kvm)->sev_info.asid;
+ unsigned int asid = sev_get_asid(vcpu->kvm);
/*
* Note! The address must be a kernel address, as regular page walk
@@ -2638,7 +2648,7 @@ void sev_es_unmap_ghcb(struct vcpu_svm *svm)
void pre_sev_run(struct vcpu_svm *svm, int cpu)
{
struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, cpu);
- int asid = sev_get_asid(svm->vcpu.kvm);
+ unsigned int asid = sev_get_asid(svm->vcpu.kvm);
/* Assign the asid allocated with this SEV guest */
svm->asid = asid;
@@ -3174,7 +3184,7 @@ struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu)
unsigned long pfn;
struct page *p;
- if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+ if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
return alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
/*
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index d1a9f995163581..9aaf83c8d57df7 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1503,6 +1503,11 @@ static void svm_vcpu_free(struct kvm_vcpu *vcpu)
__free_pages(virt_to_page(svm->msrpm), get_order(MSRPM_SIZE));
}
+static struct sev_es_save_area *sev_es_host_save_area(struct svm_cpu_data *sd)
+{
+ return page_address(sd->save_area) + 0x400;
+}
+
static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -1519,12 +1524,8 @@ static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
* or subsequent vmload of host save area.
*/
vmsave(sd->save_area_pa);
- if (sev_es_guest(vcpu->kvm)) {
- struct sev_es_save_area *hostsa;
- hostsa = (struct sev_es_save_area *)(page_address(sd->save_area) + 0x400);
-
- sev_es_prepare_switch_to_guest(hostsa);
- }
+ if (sev_es_guest(vcpu->kvm))
+ sev_es_prepare_switch_to_guest(sev_es_host_save_area(sd));
if (tsc_scaling)
__svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio);
@@ -4101,6 +4102,7 @@ static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, bool spec_ctrl_intercepted)
{
+ struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, vcpu->cpu);
struct vcpu_svm *svm = to_svm(vcpu);
guest_state_enter_irqoff();
@@ -4108,7 +4110,8 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, bool spec_ctrl_in
amd_clear_divider();
if (sev_es_guest(vcpu->kvm))
- __svm_sev_es_vcpu_run(svm, spec_ctrl_intercepted);
+ __svm_sev_es_vcpu_run(svm, spec_ctrl_intercepted,
+ sev_es_host_save_area(sd));
else
__svm_vcpu_run(svm, spec_ctrl_intercepted);
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 7f1fbd874c4582..33878efdebc829 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -698,7 +698,8 @@ struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu);
/* vmenter.S */
-void __svm_sev_es_vcpu_run(struct vcpu_svm *svm, bool spec_ctrl_intercepted);
+void __svm_sev_es_vcpu_run(struct vcpu_svm *svm, bool spec_ctrl_intercepted,
+ struct sev_es_save_area *hostsa);
void __svm_vcpu_run(struct vcpu_svm *svm, bool spec_ctrl_intercepted);
#define DEFINE_KVM_GHCB_ACCESSORS(field) \
diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S
index 187018c424bfb4..a0c8eb37d3e1c6 100644
--- a/arch/x86/kvm/svm/vmenter.S
+++ b/arch/x86/kvm/svm/vmenter.S
@@ -3,6 +3,7 @@
#include <asm/asm.h>
#include <asm/asm-offsets.h>
#include <asm/bitsperlong.h>
+#include <asm/frame.h>
#include <asm/kvm_vcpu_regs.h>
#include <asm/nospec-branch.h>
#include "kvm-asm-offsets.h"
@@ -67,7 +68,7 @@
"", X86_FEATURE_V_SPEC_CTRL
901:
.endm
-.macro RESTORE_HOST_SPEC_CTRL_BODY
+.macro RESTORE_HOST_SPEC_CTRL_BODY spec_ctrl_intercepted:req
900:
/* Same for after vmexit. */
mov $MSR_IA32_SPEC_CTRL, %ecx
@@ -76,7 +77,7 @@
* Load the value that the guest had written into MSR_IA32_SPEC_CTRL,
* if it was not intercepted during guest execution.
*/
- cmpb $0, (%_ASM_SP)
+ cmpb $0, \spec_ctrl_intercepted
jnz 998f
rdmsr
movl %eax, SVM_spec_ctrl(%_ASM_DI)
@@ -99,6 +100,7 @@
*/
SYM_FUNC_START(__svm_vcpu_run)
push %_ASM_BP
+ mov %_ASM_SP, %_ASM_BP
#ifdef CONFIG_X86_64
push %r15
push %r14
@@ -268,7 +270,7 @@ SYM_FUNC_START(__svm_vcpu_run)
RET
RESTORE_GUEST_SPEC_CTRL_BODY
- RESTORE_HOST_SPEC_CTRL_BODY
+ RESTORE_HOST_SPEC_CTRL_BODY (%_ASM_SP)
10: cmpb $0, _ASM_RIP(kvm_rebooting)
jne 2b
@@ -290,66 +292,68 @@ SYM_FUNC_START(__svm_vcpu_run)
SYM_FUNC_END(__svm_vcpu_run)
+#ifdef CONFIG_KVM_AMD_SEV
+
+
+#ifdef CONFIG_X86_64
+#define SEV_ES_GPRS_BASE 0x300
+#define SEV_ES_RBX (SEV_ES_GPRS_BASE + __VCPU_REGS_RBX * WORD_SIZE)
+#define SEV_ES_RBP (SEV_ES_GPRS_BASE + __VCPU_REGS_RBP * WORD_SIZE)
+#define SEV_ES_RSI (SEV_ES_GPRS_BASE + __VCPU_REGS_RSI * WORD_SIZE)
+#define SEV_ES_RDI (SEV_ES_GPRS_BASE + __VCPU_REGS_RDI * WORD_SIZE)
+#define SEV_ES_R12 (SEV_ES_GPRS_BASE + __VCPU_REGS_R12 * WORD_SIZE)
+#define SEV_ES_R13 (SEV_ES_GPRS_BASE + __VCPU_REGS_R13 * WORD_SIZE)
+#define SEV_ES_R14 (SEV_ES_GPRS_BASE + __VCPU_REGS_R14 * WORD_SIZE)
+#define SEV_ES_R15 (SEV_ES_GPRS_BASE + __VCPU_REGS_R15 * WORD_SIZE)
+#endif
+
/**
* __svm_sev_es_vcpu_run - Run a SEV-ES vCPU via a transition to SVM guest mode
* @svm: struct vcpu_svm *
* @spec_ctrl_intercepted: bool
*/
SYM_FUNC_START(__svm_sev_es_vcpu_run)
- push %_ASM_BP
-#ifdef CONFIG_X86_64
- push %r15
- push %r14
- push %r13
- push %r12
-#else
- push %edi
- push %esi
-#endif
- push %_ASM_BX
+ FRAME_BEGIN
/*
- * Save variables needed after vmexit on the stack, in inverse
- * order compared to when they are needed.
+ * Save non-volatile (callee-saved) registers to the host save area.
+ * Except for RAX and RSP, all GPRs are restored on #VMEXIT, but not
+ * saved on VMRUN.
*/
+ mov %rbp, SEV_ES_RBP (%rdx)
+ mov %r15, SEV_ES_R15 (%rdx)
+ mov %r14, SEV_ES_R14 (%rdx)
+ mov %r13, SEV_ES_R13 (%rdx)
+ mov %r12, SEV_ES_R12 (%rdx)
+ mov %rbx, SEV_ES_RBX (%rdx)
- /* Accessed directly from the stack in RESTORE_HOST_SPEC_CTRL. */
- push %_ASM_ARG2
-
- /* Save @svm. */
- push %_ASM_ARG1
-
-.ifnc _ASM_ARG1, _ASM_DI
/*
- * Stash @svm in RDI early. On 32-bit, arguments are in RAX, RCX
- * and RDX which are clobbered by RESTORE_GUEST_SPEC_CTRL.
+ * Save volatile registers that hold arguments that are needed after
+ * #VMEXIT (RDI=@svm and RSI=@spec_ctrl_intercepted).
*/
- mov %_ASM_ARG1, %_ASM_DI
-.endif
+ mov %rdi, SEV_ES_RDI (%rdx)
+ mov %rsi, SEV_ES_RSI (%rdx)
- /* Clobbers RAX, RCX, RDX. */
+ /* Clobbers RAX, RCX, RDX (@hostsa). */
RESTORE_GUEST_SPEC_CTRL
/* Get svm->current_vmcb->pa into RAX. */
- mov SVM_current_vmcb(%_ASM_DI), %_ASM_AX
- mov KVM_VMCB_pa(%_ASM_AX), %_ASM_AX
+ mov SVM_current_vmcb(%rdi), %rax
+ mov KVM_VMCB_pa(%rax), %rax
/* Enter guest mode */
sti
-1: vmrun %_ASM_AX
+1: vmrun %rax
2: cli
- /* Pop @svm to RDI, guest registers have been saved already. */
- pop %_ASM_DI
-
#ifdef CONFIG_MITIGATION_RETPOLINE
/* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */
- FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
+ FILL_RETURN_BUFFER %rax, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
#endif
- /* Clobbers RAX, RCX, RDX. */
+ /* Clobbers RAX, RCX, RDX, consumes RDI (@svm) and RSI (@spec_ctrl_intercepted). */
RESTORE_HOST_SPEC_CTRL
/*
@@ -361,30 +365,17 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run)
*/
UNTRAIN_RET_VM
- /* "Pop" @spec_ctrl_intercepted. */
- pop %_ASM_BX
-
- pop %_ASM_BX
-
-#ifdef CONFIG_X86_64
- pop %r12
- pop %r13
- pop %r14
- pop %r15
-#else
- pop %esi
- pop %edi
-#endif
- pop %_ASM_BP
+ FRAME_END
RET
RESTORE_GUEST_SPEC_CTRL_BODY
- RESTORE_HOST_SPEC_CTRL_BODY
+ RESTORE_HOST_SPEC_CTRL_BODY %sil
-3: cmpb $0, _ASM_RIP(kvm_rebooting)
+3: cmpb $0, kvm_rebooting(%rip)
jne 2b
ud2
_ASM_EXTABLE(1b, 3b)
SYM_FUNC_END(__svm_sev_es_vcpu_run)
+#endif /* CONFIG_KVM_AMD_SEV */
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 88659de4d2a714..c6b4b1728006d5 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -735,13 +735,13 @@ TRACE_EVENT(kvm_nested_intr_vmexit,
* Tracepoint for nested #vmexit because of interrupt pending
*/
TRACE_EVENT(kvm_invlpga,
- TP_PROTO(__u64 rip, int asid, u64 address),
+ TP_PROTO(__u64 rip, unsigned int asid, u64 address),
TP_ARGS(rip, asid, address),
TP_STRUCT__entry(
- __field( __u64, rip )
- __field( int, asid )
- __field( __u64, address )
+ __field( __u64, rip )
+ __field( unsigned int, asid )
+ __field( __u64, address )
),
TP_fast_assign(
@@ -750,7 +750,7 @@ TRACE_EVENT(kvm_invlpga,
__entry->address = address;
),
- TP_printk("rip: 0x%016llx asid: %d address: 0x%016llx",
+ TP_printk("rip: 0x%016llx asid: %u address: 0x%016llx",
__entry->rip, __entry->asid, __entry->address)
);
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 12ade343a17ed5..be40474de6e4db 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -535,7 +535,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
perf_capabilities = vcpu_get_perf_capabilities(vcpu);
if (cpuid_model_is_consistent(vcpu) &&
(perf_capabilities & PMU_CAP_LBR_FMT))
- x86_perf_get_lbr(&lbr_desc->records);
+ memcpy(&lbr_desc->records, &vmx_lbr_caps, sizeof(vmx_lbr_caps));
else
lbr_desc->records.nr = 0;
diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
index 2bfbf758d06110..f6986dee6f8c7c 100644
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -275,6 +275,8 @@ SYM_INNER_LABEL_ALIGN(vmx_vmexit, SYM_L_GLOBAL)
call vmx_spec_ctrl_restore_host
+ CLEAR_BRANCH_HISTORY_VMEXIT
+
/* Put return value in AX */
mov %_ASM_BX, %_ASM_AX
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index c37a89eda90f82..22411f4aff5303 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -218,6 +218,8 @@ module_param(ple_window_max, uint, 0444);
int __read_mostly pt_mode = PT_MODE_SYSTEM;
module_param(pt_mode, int, S_IRUGO);
+struct x86_pmu_lbr __ro_after_init vmx_lbr_caps;
+
static DEFINE_STATIC_KEY_FALSE(vmx_l1d_should_flush);
static DEFINE_STATIC_KEY_FALSE(vmx_l1d_flush_cond);
static DEFINE_MUTEX(vmx_l1d_flush_mutex);
@@ -7862,10 +7864,9 @@ static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
vmx_update_exception_bitmap(vcpu);
}
-static u64 vmx_get_perf_capabilities(void)
+static __init u64 vmx_get_perf_capabilities(void)
{
u64 perf_cap = PMU_CAP_FW_WRITES;
- struct x86_pmu_lbr lbr;
u64 host_perf_cap = 0;
if (!enable_pmu)
@@ -7875,15 +7876,43 @@ static u64 vmx_get_perf_capabilities(void)
rdmsrl(MSR_IA32_PERF_CAPABILITIES, host_perf_cap);
if (!cpu_feature_enabled(X86_FEATURE_ARCH_LBR)) {
- x86_perf_get_lbr(&lbr);
- if (lbr.nr)
+ x86_perf_get_lbr(&vmx_lbr_caps);
+
+ /*
+ * KVM requires LBR callstack support, as the overhead due to
+ * context switching LBRs without said support is too high.
+ * See intel_pmu_create_guest_lbr_event() for more info.
+ */
+ if (!vmx_lbr_caps.has_callstack)
+ memset(&vmx_lbr_caps, 0, sizeof(vmx_lbr_caps));
+ else if (vmx_lbr_caps.nr)
perf_cap |= host_perf_cap & PMU_CAP_LBR_FMT;
}
if (vmx_pebs_supported()) {
perf_cap |= host_perf_cap & PERF_CAP_PEBS_MASK;
- if ((perf_cap & PERF_CAP_PEBS_FORMAT) < 4)
- perf_cap &= ~PERF_CAP_PEBS_BASELINE;
+
+ /*
+ * Disallow adaptive PEBS as it is functionally broken, can be
+ * used by the guest to read *host* LBRs, and can be used to
+ * bypass userspace event filters. To correctly and safely
+ * support adaptive PEBS, KVM needs to:
+ *
+ * 1. Account for the ADAPTIVE flag when (re)programming fixed
+ * counters.
+ *
+ * 2. Gain support from perf (or take direct control of counter
+ * programming) to support events without adaptive PEBS
+ * enabled for the hardware counter.
+ *
+ * 3. Ensure LBR MSRs cannot hold host data on VM-Entry with
+ * adaptive PEBS enabled and MSR_PEBS_DATA_CFG.LBRS=1.
+ *
+ * 4. Document which PMU events are effectively exposed to the
+ * guest via adaptive PEBS, and make adaptive PEBS mutually
+ * exclusive with KVM_SET_PMU_EVENT_FILTER if necessary.
+ */
+ perf_cap &= ~PERF_CAP_PEBS_BASELINE;
}
return perf_cap;
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 65786dbe7d60bd..90f9e443464645 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -15,6 +15,7 @@
#include "vmx_ops.h"
#include "../cpuid.h"
#include "run_flags.h"
+#include "../mmu.h"
#define MSR_TYPE_R 1
#define MSR_TYPE_W 2
@@ -109,6 +110,8 @@ struct lbr_desc {
bool msr_passthrough;
};
+extern struct x86_pmu_lbr vmx_lbr_caps;
+
/*
* The nested_vmx structure is part of vcpu_vmx, and holds information we need
* for correct emulation of VMX (i.e., nested VMX) on this vcpu.
@@ -719,7 +722,8 @@ static inline bool vmx_need_pf_intercept(struct kvm_vcpu *vcpu)
if (!enable_ept)
return true;
- return allow_smaller_maxphyaddr && cpuid_maxphyaddr(vcpu) < boot_cpu_data.x86_phys_bits;
+ return allow_smaller_maxphyaddr &&
+ cpuid_maxphyaddr(vcpu) < kvm_get_shadow_phys_bits();
}
static inline bool is_unrestricted_guest(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 47d9f03b777837..91478b769af089 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1621,7 +1621,7 @@ static bool kvm_is_immutable_feature_msr(u32 msr)
ARCH_CAP_PSCHANGE_MC_NO | ARCH_CAP_TSX_CTRL_MSR | ARCH_CAP_TAA_NO | \
ARCH_CAP_SBDR_SSDP_NO | ARCH_CAP_FBSDP_NO | ARCH_CAP_PSDP_NO | \
ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO | ARCH_CAP_GDS_NO | \
- ARCH_CAP_RFDS_NO | ARCH_CAP_RFDS_CLEAR)
+ ARCH_CAP_RFDS_NO | ARCH_CAP_RFDS_CLEAR | ARCH_CAP_BHI_NO)
static u64 kvm_get_arch_capabilities(void)
{
@@ -3470,7 +3470,7 @@ static bool is_mci_status_msr(u32 msr)
static bool can_set_mci_status(struct kvm_vcpu *vcpu)
{
/* McStatusWrEn enabled? */
- if (guest_cpuid_is_amd_or_hygon(vcpu))
+ if (guest_cpuid_is_amd_compatible(vcpu))
return !!(vcpu->arch.msr_hwcr & BIT_ULL(18));
return false;
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
index 02cde194a99e69..391059b2c6fbc4 100644
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -228,8 +228,12 @@ SYM_CODE_END(srso_return_thunk)
#else /* !CONFIG_MITIGATION_SRSO */
/* Dummy for the alternative in CALL_UNTRAIN_RET. */
SYM_CODE_START(srso_alias_untrain_ret)
- RET
+ ANNOTATE_UNRET_SAFE
+ ANNOTATE_NOENDBR
+ ret
+ int3
SYM_FUNC_END(srso_alias_untrain_ret)
+__EXPORT_THUNK(srso_alias_untrain_ret)
#define JMP_SRSO_UNTRAIN_RET "ud2"
#endif /* CONFIG_MITIGATION_SRSO */
@@ -378,8 +382,15 @@ SYM_FUNC_END(call_depth_return_thunk)
SYM_CODE_START(__x86_return_thunk)
UNWIND_HINT_FUNC
ANNOTATE_NOENDBR
+#if defined(CONFIG_MITIGATION_UNRET_ENTRY) || \
+ defined(CONFIG_MITIGATION_SRSO) || \
+ defined(CONFIG_MITIGATION_CALL_DEPTH_TRACKING)
ALTERNATIVE __stringify(ANNOTATE_UNRET_SAFE; ret), \
"jmp warn_thunk_thunk", X86_FEATURE_ALWAYS
+#else
+ ANNOTATE_UNRET_SAFE
+ ret
+#endif
int3
SYM_CODE_END(__x86_return_thunk)
EXPORT_SYMBOL(__x86_return_thunk)
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 104544359d69cd..025fd7ea5d69f5 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -24,6 +24,7 @@
#include <linux/memblock.h>
#include <linux/init.h>
+#include <asm/pgtable_areas.h>
#include "numa_internal.h"
diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c
index 0d72183b5dd028..36b603d0cddefc 100644
--- a/arch/x86/mm/pat/memtype.c
+++ b/arch/x86/mm/pat/memtype.c
@@ -947,6 +947,38 @@ static void free_pfn_range(u64 paddr, unsigned long size)
memtype_free(paddr, paddr + size);
}
+static int get_pat_info(struct vm_area_struct *vma, resource_size_t *paddr,
+ pgprot_t *pgprot)
+{
+ unsigned long prot;
+
+ VM_WARN_ON_ONCE(!(vma->vm_flags & VM_PAT));
+
+ /*
+ * We need the starting PFN and cachemode used for track_pfn_remap()
+ * that covered the whole VMA. For most mappings, we can obtain that
+ * information from the page tables. For COW mappings, we might now
+ * suddenly have anon folios mapped and follow_phys() will fail.
+ *
+ * Fallback to using vma->vm_pgoff, see remap_pfn_range_notrack(), to
+ * detect the PFN. If we need the cachemode as well, we're out of luck
+ * for now and have to fail fork().
+ */
+ if (!follow_phys(vma, vma->vm_start, 0, &prot, paddr)) {
+ if (pgprot)
+ *pgprot = __pgprot(prot);
+ return 0;
+ }
+ if (is_cow_mapping(vma->vm_flags)) {
+ if (pgprot)
+ return -EINVAL;
+ *paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT;
+ return 0;
+ }
+ WARN_ON_ONCE(1);
+ return -EINVAL;
+}
+
/*
* track_pfn_copy is called when vma that is covering the pfnmap gets
* copied through copy_page_range().
@@ -957,20 +989,13 @@ static void free_pfn_range(u64 paddr, unsigned long size)
int track_pfn_copy(struct vm_area_struct *vma)
{
resource_size_t paddr;
- unsigned long prot;
unsigned long vma_size = vma->vm_end - vma->vm_start;
pgprot_t pgprot;
if (vma->vm_flags & VM_PAT) {
- /*
- * reserve the whole chunk covered by vma. We need the
- * starting address and protection from pte.
- */
- if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) {
- WARN_ON_ONCE(1);
+ if (get_pat_info(vma, &paddr, &pgprot))
return -EINVAL;
- }
- pgprot = __pgprot(prot);
+ /* reserve the whole chunk covered by vma. */
return reserve_pfn_range(paddr, vma_size, &pgprot, 1);
}
@@ -1045,7 +1070,6 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
unsigned long size, bool mm_wr_locked)
{
resource_size_t paddr;
- unsigned long prot;
if (vma && !(vma->vm_flags & VM_PAT))
return;
@@ -1053,11 +1077,8 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
/* free the chunk starting from pfn or the whole chunk */
paddr = (resource_size_t)pfn << PAGE_SHIFT;
if (!paddr && !size) {
- if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) {
- WARN_ON_ONCE(1);
+ if (get_pat_info(vma, &paddr, NULL))
return;
- }
-
size = vma->vm_end - vma->vm_start;
}
free_pfn_range(paddr, size);
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index a7ba8e1786452d..df5fac428408fe 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -480,7 +480,7 @@ static int emit_call(u8 **pprog, void *func, void *ip)
static int emit_rsb_call(u8 **pprog, void *func, void *ip)
{
OPTIMIZER_HIDE_VAR(func);
- x86_call_depth_emit_accounting(pprog, func);
+ ip += x86_call_depth_emit_accounting(pprog, func, ip);
return emit_patch(pprog, func, ip, 0xE8);
}
@@ -1972,20 +1972,17 @@ populate_extable:
/* call */
case BPF_JMP | BPF_CALL: {
- int offs;
+ u8 *ip = image + addrs[i - 1];
func = (u8 *) __bpf_call_base + imm32;
if (tail_call_reachable) {
RESTORE_TAIL_CALL_CNT(bpf_prog->aux->stack_depth);
- if (!imm32)
- return -EINVAL;
- offs = 7 + x86_call_depth_emit_accounting(&prog, func);
- } else {
- if (!imm32)
- return -EINVAL;
- offs = x86_call_depth_emit_accounting(&prog, func);
+ ip += 7;
}
- if (emit_call(&prog, func, image + addrs[i - 1] + offs))
+ if (!imm32)
+ return -EINVAL;
+ ip += x86_call_depth_emit_accounting(&prog, func, ip);
+ if (emit_call(&prog, func, ip))
return -EINVAL;
break;
}
@@ -2835,7 +2832,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
* Direct-call fentry stub, as such it needs accounting for the
* __fentry__ call.
*/
- x86_call_depth_emit_accounting(&prog, NULL);
+ x86_call_depth_emit_accounting(&prog, NULL, image);
}
EMIT1(0x55); /* push rbp */
EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */
diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c
index cffe1157a90acf..ab0e8448bb6eb2 100644
--- a/arch/x86/virt/svm/sev.c
+++ b/arch/x86/virt/svm/sev.c
@@ -77,7 +77,7 @@ static int __mfd_enable(unsigned int cpu)
{
u64 val;
- if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+ if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
return 0;
rdmsrl(MSR_AMD64_SYSCFG, val);
@@ -98,7 +98,7 @@ static int __snp_enable(unsigned int cpu)
{
u64 val;
- if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+ if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
return 0;
rdmsrl(MSR_AMD64_SYSCFG, val);
@@ -174,11 +174,11 @@ static int __init snp_rmptable_init(void)
u64 rmptable_size;
u64 val;
- if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+ if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
return 0;
if (!amd_iommu_snp_en)
- return 0;
+ goto nosnp;
if (!probed_rmp_size)
goto nosnp;
@@ -225,7 +225,7 @@ skip_enable:
return 0;
nosnp:
- setup_clear_cpu_cap(X86_FEATURE_SEV_SNP);
+ cc_platform_clear(CC_ATTR_HOST_SEV_SNP);
return -ENOSYS;
}
@@ -246,7 +246,7 @@ static struct rmpentry *__snp_lookup_rmpentry(u64 pfn, int *level)
{
struct rmpentry *large_entry, *entry;
- if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+ if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
return ERR_PTR(-ENODEV);
entry = get_rmpentry(pfn);
@@ -363,7 +363,7 @@ int psmash(u64 pfn)
unsigned long paddr = pfn << PAGE_SHIFT;
int ret;
- if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+ if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
return -ENODEV;
if (!pfn_valid(pfn))
@@ -472,7 +472,7 @@ static int rmpupdate(u64 pfn, struct rmp_state *state)
unsigned long paddr = pfn << PAGE_SHIFT;
int ret, level;
- if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+ if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
return -ENODEV;
level = RMP_TO_PG_LEVEL(state->pagesize);
@@ -558,3 +558,13 @@ void snp_leak_pages(u64 pfn, unsigned int npages)
spin_unlock(&snp_leaked_pages_list_lock);
}
EXPORT_SYMBOL_GPL(snp_leak_pages);
+
+void kdump_sev_callback(void)
+{
+ /*
+ * Do wbinvd() on remote CPUs when SNP is enabled in order to
+ * safely do SNP_SHUTDOWN on the local CPU.
+ */
+ if (cc_platform_has(CC_ATTR_HOST_SEV_SNP))
+ wbinvd();
+}