aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.mailmap28
-rw-r--r--CREDITS4
-rw-r--r--Documentation/ABI/testing/sysfs-kernel-mm-damon6
-rw-r--r--Documentation/ABI/testing/sysfs-kernel-mm-transparent-hugepage18
-rw-r--r--Documentation/admin-guide/blockdev/zram.rst5
-rw-r--r--Documentation/admin-guide/cgroup-v1/memory.rst8
-rw-r--r--Documentation/admin-guide/cgroup-v2.rst18
-rw-r--r--Documentation/admin-guide/hw-vuln/spectre.rst44
-rw-r--r--Documentation/admin-guide/kdump/kdump.rst8
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt21
-rw-r--r--Documentation/admin-guide/mm/damon/usage.rst26
-rw-r--r--Documentation/admin-guide/mm/hugetlbpage.rst7
-rw-r--r--Documentation/admin-guide/mm/transhuge.rst35
-rw-r--r--Documentation/admin-guide/mm/zswap.rst33
-rw-r--r--Documentation/admin-guide/perf/hisi-pmu.rst1
-rw-r--r--Documentation/admin-guide/perf/hns3-pmu.rst8
-rw-r--r--Documentation/admin-guide/perf/qcom_l2_pmu.rst2
-rw-r--r--Documentation/admin-guide/perf/qcom_l3_pmu.rst2
-rw-r--r--Documentation/admin-guide/perf/thunderx2-pmu.rst2
-rw-r--r--Documentation/admin-guide/perf/xgene-pmu.rst2
-rw-r--r--Documentation/admin-guide/sysctl/vm.rst16
-rw-r--r--Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst597
-rw-r--r--Documentation/arch/x86/resctrl.rst2
-rw-r--r--Documentation/core-api/floating-point.rst78
-rw-r--r--Documentation/core-api/index.rst1
-rw-r--r--Documentation/dev-tools/testing-overview.rst2
-rw-r--r--Documentation/devicetree/bindings/Makefile34
-rw-r--r--Documentation/devicetree/bindings/arm/amlogic.yaml13
-rw-r--r--Documentation/devicetree/bindings/arm/aspeed/aspeed.yaml6
-rw-r--r--Documentation/devicetree/bindings/clock/keystone-gate.txt2
-rw-r--r--Documentation/devicetree/bindings/clock/keystone-pll.txt2
-rw-r--r--Documentation/devicetree/bindings/clock/ti/adpll.txt2
-rw-r--r--Documentation/devicetree/bindings/clock/ti/apll.txt2
-rw-r--r--Documentation/devicetree/bindings/clock/ti/autoidle.txt2
-rw-r--r--Documentation/devicetree/bindings/clock/ti/clockdomain.txt2
-rw-r--r--Documentation/devicetree/bindings/clock/ti/composite.txt2
-rw-r--r--Documentation/devicetree/bindings/clock/ti/divider.txt2
-rw-r--r--Documentation/devicetree/bindings/clock/ti/dpll.txt2
-rw-r--r--Documentation/devicetree/bindings/clock/ti/fapll.txt2
-rw-r--r--Documentation/devicetree/bindings/clock/ti/fixed-factor-clock.txt2
-rw-r--r--Documentation/devicetree/bindings/clock/ti/gate.txt2
-rw-r--r--Documentation/devicetree/bindings/clock/ti/interface.txt2
-rw-r--r--Documentation/devicetree/bindings/clock/ti/mux.txt2
-rw-r--r--Documentation/devicetree/bindings/display/msm/qcom,sm8150-mdss.yaml9
-rw-r--r--Documentation/devicetree/bindings/dts-coding-style.rst2
-rw-r--r--Documentation/devicetree/bindings/eeprom/at24.yaml5
-rw-r--r--Documentation/devicetree/bindings/iio/health/maxim,max30102.yaml2
-rw-r--r--Documentation/devicetree/bindings/net/bluetooth/qualcomm-bluetooth.yaml4
-rw-r--r--Documentation/devicetree/bindings/pwm/mediatek,pwm-disp.yaml3
-rw-r--r--Documentation/devicetree/bindings/remoteproc/ti,davinci-rproc.txt3
-rw-r--r--Documentation/devicetree/bindings/serial/amlogic,meson-uart.yaml4
-rw-r--r--Documentation/devicetree/bindings/serial/atmel,at91-usart.yaml2
-rw-r--r--Documentation/devicetree/bindings/soc/fsl/fsl,layerscape-dcfg.yaml2
-rw-r--r--Documentation/devicetree/bindings/soc/fsl/fsl,layerscape-scfg.yaml2
-rw-r--r--Documentation/devicetree/bindings/soc/rockchip/grf.yaml1
-rw-r--r--Documentation/devicetree/bindings/sound/rt5645.txt6
-rw-r--r--Documentation/devicetree/bindings/timer/arm,arch_timer_mmio.yaml2
-rw-r--r--Documentation/devicetree/bindings/ufs/qcom,ufs.yaml38
-rw-r--r--Documentation/driver-api/crypto/iaa/iaa-crypto.rst2
-rw-r--r--Documentation/driver-api/virtio/writing_virtio_drivers.rst1
-rw-r--r--Documentation/filesystems/api-summary.rst3
-rw-r--r--Documentation/filesystems/bcachefs/index.rst11
-rw-r--r--Documentation/filesystems/buffer.rst12
-rw-r--r--Documentation/filesystems/index.rst2
-rw-r--r--Documentation/filesystems/proc.rst29
-rw-r--r--Documentation/kbuild/kconfig-language.rst3
-rw-r--r--Documentation/kbuild/llvm.rst2
-rw-r--r--Documentation/mm/allocation-profiling.rst100
-rw-r--r--Documentation/mm/damon/design.rst20
-rw-r--r--Documentation/mm/index.rst1
-rw-r--r--Documentation/mm/page_owner.rst73
-rw-r--r--Documentation/mm/page_table_check.rst9
-rw-r--r--Documentation/mm/transhuge.rst12
-rw-r--r--Documentation/mm/vmemmap_dedup.rst22
-rw-r--r--Documentation/networking/devlink/devlink-eswitch-attr.rst76
-rw-r--r--Documentation/networking/devlink/index.rst1
-rw-r--r--Documentation/networking/representors.rst1
-rw-r--r--Documentation/process/changes.rst9
-rw-r--r--Documentation/process/embargoed-hardware-issues.rst2
-rw-r--r--Documentation/rust/arch-support.rst2
-rw-r--r--Documentation/timers/no_hz.rst7
-rw-r--r--Documentation/translations/zh_CN/core-api/cachetlb.rst2
-rw-r--r--Documentation/userspace-api/index.rst1
-rw-r--r--Documentation/userspace-api/mseal.rst199
-rw-r--r--Documentation/virt/kvm/x86/amd-memory-encryption.rst42
-rw-r--r--Documentation/virt/kvm/x86/msr.rst19
-rw-r--r--MAINTAINERS245
-rw-r--r--Makefile31
-rw-r--r--arch/Kconfig26
-rw-r--r--arch/alpha/kernel/osf_sys.c5
-rw-r--r--arch/alpha/kernel/pci_iommu.c2
-rw-r--r--arch/alpha/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/alpha/lib/checksum.c1
-rw-r--r--arch/alpha/lib/fpreg.c1
-rw-r--r--arch/arc/Kconfig1
-rw-r--r--arch/arc/boot/Makefile4
-rw-r--r--arch/arc/boot/dts/axc003.dtsi4
-rw-r--r--arch/arc/boot/dts/hsdk.dts1
-rw-r--r--arch/arc/boot/dts/vdk_axs10x_mb.dtsi2
-rw-r--r--arch/arc/include/asm/cachetype.h9
-rw-r--r--arch/arc/include/asm/dsp.h2
-rw-r--r--arch/arc/include/asm/entry-compact.h10
-rw-r--r--arch/arc/include/asm/entry.h4
-rw-r--r--arch/arc/include/asm/irq.h2
-rw-r--r--arch/arc/include/asm/irqflags-compact.h2
-rw-r--r--arch/arc/include/asm/mmu-arcv2.h2
-rw-r--r--arch/arc/include/asm/mmu_context.h2
-rw-r--r--arch/arc/include/asm/pgtable-bits-arcv2.h2
-rw-r--r--arch/arc/include/asm/ptrace.h2
-rw-r--r--arch/arc/include/asm/shmparam.h2
-rw-r--r--arch/arc/include/asm/smp.h4
-rw-r--r--arch/arc/include/asm/thread_info.h2
-rw-r--r--arch/arc/include/uapi/asm/swab.h2
-rw-r--r--arch/arc/kernel/entry-arcv2.S8
-rw-r--r--arch/arc/kernel/entry.S4
-rw-r--r--arch/arc/kernel/head.S2
-rw-r--r--arch/arc/kernel/intc-arcv2.c2
-rw-r--r--arch/arc/kernel/kprobes.c7
-rw-r--r--arch/arc/kernel/perf_event.c2
-rw-r--r--arch/arc/kernel/setup.c2
-rw-r--r--arch/arc/kernel/signal.c7
-rw-r--r--arch/arc/kernel/traps.c2
-rw-r--r--arch/arc/kernel/vmlinux.lds.S4
-rw-r--r--arch/arc/mm/mmap.c4
-rw-r--r--arch/arc/mm/tlb.c4
-rw-r--r--arch/arc/mm/tlbex.S8
-rw-r--r--arch/arm/Kconfig3
-rw-r--r--arch/arm/Makefile7
-rw-r--r--arch/arm/boot/dts/aspeed/Makefile8
-rw-r--r--arch/arm/boot/dts/aspeed/aspeed-bmc-asrock-e3c256d4i.dts322
-rw-r--r--arch/arm/boot/dts/aspeed/aspeed-bmc-asrock-spc621d8hm3.dts324
-rw-r--r--arch/arm/boot/dts/aspeed/aspeed-bmc-asrock-x570d4u.dts377
-rw-r--r--arch/arm/boot/dts/aspeed/aspeed-bmc-asus-x4tf.dts592
-rw-r--r--arch/arm/boot/dts/aspeed/aspeed-bmc-facebook-harma.dts620
-rw-r--r--arch/arm/boot/dts/aspeed/aspeed-bmc-facebook-minerva-cmc.dts265
-rw-r--r--arch/arm/boot/dts/aspeed/aspeed-bmc-facebook-minerva.dts543
-rw-r--r--arch/arm/boot/dts/aspeed/aspeed-bmc-ibm-system1.dts1623
-rw-r--r--arch/arm/boot/dts/aspeed/aspeed-g6.dtsi4
-rw-r--r--arch/arm/boot/dts/aspeed/ibm-power10-dual.dtsi2
-rw-r--r--arch/arm/boot/dts/microchip/at91-sama7g54_curiosity.dts8
-rw-r--r--arch/arm/boot/dts/microchip/at91-sama7g5ek.dts8
-rw-r--r--arch/arm/boot/dts/nxp/imx/imx6ull-tarragon-common.dtsi1
-rw-r--r--arch/arm/boot/dts/nxp/imx/imx7-mba7.dtsi2
-rw-r--r--arch/arm/boot/dts/nxp/imx/imx7s-warp.dts1
-rw-r--r--arch/arm/include/asm/fpu.h15
-rw-r--r--arch/arm/include/asm/hugetlb.h6
-rw-r--r--arch/arm/include/asm/mman.h14
-rw-r--r--arch/arm/include/asm/pgtable-2level.h5
-rw-r--r--arch/arm/include/asm/pgtable-3level-hwdef.h1
-rw-r--r--arch/arm/include/asm/pgtable-3level.h5
-rw-r--r--arch/arm/kernel/irq.c1
-rw-r--r--arch/arm/kernel/traps.c1
-rw-r--r--arch/arm/lib/Makefile3
-rw-r--r--arch/arm/lib/uaccess_with_memcpy.c4
-rw-r--r--arch/arm/mach-omap2/board-n8x0.c23
-rw-r--r--arch/arm/mm/Makefile1
-rw-r--r--arch/arm/mm/fault.c32
-rw-r--r--arch/arm/mm/hugetlbpage.c34
-rw-r--r--arch/arm/mm/mmap.c5
-rw-r--r--arch/arm/net/bpf_jit_32.c56
-rw-r--r--arch/arm/tools/syscall.tbl1
-rw-r--r--arch/arm64/Kconfig3
-rw-r--r--arch/arm64/Makefile20
-rw-r--r--arch/arm64/boot/.gitignore1
-rw-r--r--arch/arm64/boot/Makefile6
-rw-r--r--arch/arm64/boot/dts/amlogic/Makefile7
-rw-r--r--arch/arm64/boot/dts/amlogic/amlogic-a4-a113l2-ba400.dts42
-rw-r--r--arch/arm64/boot/dts/amlogic/amlogic-a4-common.dtsi66
-rw-r--r--arch/arm64/boot/dts/amlogic/amlogic-a4.dtsi40
-rw-r--r--arch/arm64/boot/dts/amlogic/amlogic-a5-a113x2-av400.dts42
-rw-r--r--arch/arm64/boot/dts/amlogic/amlogic-a5.dtsi40
-rw-r--r--arch/arm64/boot/dts/amlogic/amlogic-t7-reset.h197
-rw-r--r--arch/arm64/boot/dts/amlogic/amlogic-t7.dtsi7
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi70
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-g12b-bananapi-cm4-mnt-reform2.dts384
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-khadas-vim3-ts050.dtso108
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-s4.dtsi13
-rw-r--r--arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi16
-rw-r--r--arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi40
-rw-r--r--arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi16
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi2
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi2
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mp.dtsi2
-rw-r--r--arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi8
-rw-r--r--arch/arm64/boot/dts/mediatek/mt2712-evb.dts8
-rw-r--r--arch/arm64/boot/dts/mediatek/mt2712e.dtsi3
-rw-r--r--arch/arm64/boot/dts/mediatek/mt7622.dtsi34
-rw-r--r--arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dts6
-rw-r--r--arch/arm64/boot/dts/mediatek/mt7986a.dtsi8
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi1
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8183.dtsi1
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8186-corsola.dtsi2
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8192-asurada.dtsi6
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8192.dtsi1
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi36
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8195.dtsi5
-rw-r--r--arch/arm64/boot/dts/microchip/sparx5.dtsi4
-rw-r--r--arch/arm64/boot/dts/microchip/sparx5_pcb134_board.dtsi149
-rw-r--r--arch/arm64/boot/dts/microchip/sparx5_pcb135_board.dtsi35
-rw-r--r--arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi2
-rw-r--r--arch/arm64/boot/dts/qcom/sc7280.dtsi4
-rw-r--r--arch/arm64/boot/dts/qcom/sc8180x.dtsi2
-rw-r--r--arch/arm64/boot/dts/qcom/sc8280xp.dtsi11
-rw-r--r--arch/arm64/boot/dts/qcom/sm6350.dtsi4
-rw-r--r--arch/arm64/boot/dts/qcom/sm6375.dtsi2
-rw-r--r--arch/arm64/boot/dts/qcom/sm8250.dtsi6
-rw-r--r--arch/arm64/boot/dts/qcom/sm8450.dtsi16
-rw-r--r--arch/arm64/boot/dts/qcom/sm8550.dtsi10
-rw-r--r--arch/arm64/boot/dts/qcom/sm8650.dtsi10
-rw-r--r--arch/arm64/boot/dts/qcom/x1e80100.dtsi4
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi3
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3399-kobol-helios64.dts2
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts1
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts2
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi53
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3566-lubancat-1.dts1
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts6
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3568-lubancat-2.dts1
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5.dtsi4
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588-orangepi-5-plus.dts3
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588-quartzpro64.dts1
-rw-r--r--arch/arm64/configs/defconfig1
-rw-r--r--arch/arm64/include/asm/assembler.h7
-rw-r--r--arch/arm64/include/asm/cputype.h2
-rw-r--r--arch/arm64/include/asm/el2_setup.h9
-rw-r--r--arch/arm64/include/asm/fpu.h15
-rw-r--r--arch/arm64/include/asm/hugetlb.h6
-rw-r--r--arch/arm64/include/asm/irqflags.h1
-rw-r--r--arch/arm64/include/asm/pgtable.h89
-rw-r--r--arch/arm64/include/asm/tlbflush.h53
-rw-r--r--arch/arm64/include/asm/unistd.h2
-rw-r--r--arch/arm64/include/asm/unistd32.h2
-rw-r--r--arch/arm64/kernel/acpi.c10
-rw-r--r--arch/arm64/kernel/efi.c1
-rw-r--r--arch/arm64/kernel/head.S36
-rw-r--r--arch/arm64/kernel/ptrace.c5
-rw-r--r--arch/arm64/kvm/arm.c13
-rw-r--r--arch/arm64/kvm/hyp/nvhe/tlb.c3
-rw-r--r--arch/arm64/kvm/hyp/pgtable.c23
-rw-r--r--arch/arm64/kvm/hyp/vhe/tlb.c3
-rw-r--r--arch/arm64/kvm/mmu.c2
-rw-r--r--arch/arm64/lib/Makefile6
-rw-r--r--arch/arm64/mm/contpte.c29
-rw-r--r--arch/arm64/mm/fault.c56
-rw-r--r--arch/arm64/mm/hugetlbpage.c23
-rw-r--r--arch/arm64/mm/mmu.c101
-rw-r--r--arch/arm64/mm/mteswap.c45
-rw-r--r--arch/arm64/mm/pageattr.c3
-rw-r--r--arch/arm64/net/bpf_jit_comp.c10
-rw-r--r--arch/csky/abiv1/mmap.c12
-rw-r--r--arch/hexagon/kernel/vmlinux.lds.S1
-rw-r--r--arch/loongarch/Kconfig5
-rw-r--r--arch/loongarch/Makefile5
-rw-r--r--arch/loongarch/boot/dts/loongson-2k1000.dtsi7
-rw-r--r--arch/loongarch/boot/dts/loongson-2k2000-ref.dts33
-rw-r--r--arch/loongarch/boot/dts/loongson-2k2000.dtsi24
-rw-r--r--arch/loongarch/include/asm/addrspace.h1
-rw-r--r--arch/loongarch/include/asm/crash_reserve.h (renamed from arch/loongarch/include/asm/crash_core.h)4
-rw-r--r--arch/loongarch/include/asm/fpu.h1
-rw-r--r--arch/loongarch/include/asm/io.h20
-rw-r--r--arch/loongarch/include/asm/kfence.h10
-rw-r--r--arch/loongarch/include/asm/page.h26
-rw-r--r--arch/loongarch/include/asm/perf_event.h8
-rw-r--r--arch/loongarch/include/asm/tlb.h2
-rw-r--r--arch/loongarch/kernel/perf_event.c2
-rw-r--r--arch/loongarch/mm/fault.c4
-rw-r--r--arch/loongarch/mm/hugetlbpage.c12
-rw-r--r--arch/loongarch/mm/mmap.c7
-rw-r--r--arch/loongarch/mm/pgtable.c4
-rw-r--r--arch/m68k/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/microblaze/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/mips/Kconfig20
-rw-r--r--arch/mips/include/asm/pgtable-32.h2
-rw-r--r--arch/mips/include/asm/pgtable-64.h2
-rw-r--r--arch/mips/include/asm/ptrace.h2
-rw-r--r--arch/mips/jazz/jazzdma.c2
-rw-r--r--arch/mips/kernel/asm-offsets.c1
-rw-r--r--arch/mips/kernel/ptrace.c15
-rw-r--r--arch/mips/kernel/scall32-o32.S23
-rw-r--r--arch/mips/kernel/scall64-n32.S3
-rw-r--r--arch/mips/kernel/scall64-n64.S3
-rw-r--r--arch/mips/kernel/scall64-o32.S33
-rw-r--r--arch/mips/kernel/syscalls/syscall_n32.tbl1
-rw-r--r--arch/mips/kernel/syscalls/syscall_n64.tbl1
-rw-r--r--arch/mips/kernel/syscalls/syscall_o32.tbl1
-rw-r--r--arch/mips/mm/hugetlbpage.c10
-rw-r--r--arch/mips/mm/mmap.c3
-rw-r--r--arch/mips/mm/tlb-r4k.c2
-rw-r--r--arch/nios2/kernel/prom.c6
-rw-r--r--arch/parisc/include/asm/mman.h14
-rw-r--r--arch/parisc/kernel/sys_parisc.c6
-rw-r--r--arch/parisc/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/parisc/kernel/vdso32/Makefile7
-rw-r--r--arch/parisc/mm/hugetlbpage.c11
-rw-r--r--arch/powerpc/Kconfig3
-rw-r--r--arch/powerpc/Makefile5
-rw-r--r--arch/powerpc/crypto/chacha-p10-glue.c8
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgtable-4k.h20
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgtable-64k.h25
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgtable.h27
-rw-r--r--arch/powerpc/include/asm/fpu.h28
-rw-r--r--arch/powerpc/include/asm/mmu.h4
-rw-r--r--arch/powerpc/include/asm/nohash/pgtable.h10
-rw-r--r--arch/powerpc/include/asm/vdso/gettimeofday.h3
-rw-r--r--arch/powerpc/kernel/dma-iommu.c2
-rw-r--r--arch/powerpc/kernel/fadump.c5
-rw-r--r--arch/powerpc/kernel/iommu.c8
-rw-r--r--arch/powerpc/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/powerpc/mm/book3s64/slice.c20
-rw-r--r--arch/powerpc/mm/fault.c33
-rw-r--r--arch/powerpc/mm/mem.c1
-rw-r--r--arch/powerpc/mm/pgtable_64.c6
-rw-r--r--arch/powerpc/platforms/ps3/system-bus.c4
-rw-r--r--arch/powerpc/platforms/pseries/vio.c2
-rw-r--r--arch/riscv/Kconfig3
-rw-r--r--arch/riscv/Kconfig.errata8
-rw-r--r--arch/riscv/Makefile5
-rw-r--r--arch/riscv/boot/dts/starfive/jh7110-starfive-visionfive-2.dtsi99
-rw-r--r--arch/riscv/errata/thead/errata.c24
-rw-r--r--arch/riscv/include/asm/errata_list.h20
-rw-r--r--arch/riscv/include/asm/fpu.h16
-rw-r--r--arch/riscv/include/asm/hugetlb.h6
-rw-r--r--arch/riscv/include/asm/page.h2
-rw-r--r--arch/riscv/include/asm/pgtable.h9
-rw-r--r--arch/riscv/include/asm/syscall_wrapper.h3
-rw-r--r--arch/riscv/include/asm/uaccess.h4
-rw-r--r--arch/riscv/include/uapi/asm/auxvec.h2
-rw-r--r--arch/riscv/include/uapi/asm/hwprobe.h2
-rw-r--r--arch/riscv/kernel/Makefile1
-rw-r--r--arch/riscv/kernel/compat_vdso/Makefile2
-rw-r--r--arch/riscv/kernel/elf_kexec.c1
-rw-r--r--arch/riscv/kernel/kernel_mode_fpu.c28
-rw-r--r--arch/riscv/kernel/patch.c8
-rw-r--r--arch/riscv/kernel/probes/kprobes.c1
-rw-r--r--arch/riscv/kernel/process.c5
-rw-r--r--arch/riscv/kernel/signal.c15
-rw-r--r--arch/riscv/kernel/traps.c2
-rw-r--r--arch/riscv/kernel/vdso/Makefile1
-rw-r--r--arch/riscv/kvm/aia_aplic.c37
-rw-r--r--arch/riscv/kvm/vcpu_onereg.c2
-rw-r--r--arch/riscv/mm/fault.c5
-rw-r--r--arch/riscv/mm/hugetlbpage.c10
-rw-r--r--arch/riscv/mm/init.c2
-rw-r--r--arch/riscv/mm/tlbflush.c4
-rw-r--r--arch/riscv/net/bpf_jit_comp64.c22
-rw-r--r--arch/s390/Kconfig2
-rw-r--r--arch/s390/include/asm/atomic.h44
-rw-r--r--arch/s390/include/asm/atomic_ops.h22
-rw-r--r--arch/s390/include/asm/dwarf.h1
-rw-r--r--arch/s390/include/asm/hugetlb.h6
-rw-r--r--arch/s390/include/asm/pgtable.h1
-rw-r--r--arch/s390/include/asm/preempt.h36
-rw-r--r--arch/s390/kernel/cert_store.c1
-rw-r--r--arch/s390/kernel/entry.S4
-rw-r--r--arch/s390/kernel/ipl.c1
-rw-r--r--arch/s390/kernel/perf_pai_crypto.c10
-rw-r--r--arch/s390/kernel/perf_pai_ext.c10
-rw-r--r--arch/s390/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/s390/kernel/vdso64/vdso_user_wrapper.S2
-rw-r--r--arch/s390/mm/fault.c5
-rw-r--r--arch/s390/mm/gmap.c2
-rw-r--r--arch/s390/mm/hugetlbpage.c21
-rw-r--r--arch/s390/mm/mmap.c9
-rw-r--r--arch/s390/net/bpf_jit_comp.c46
-rw-r--r--arch/s390/pci/pci_mmio.c4
-rw-r--r--arch/sh/Kconfig8
-rw-r--r--arch/sh/configs/apsh4a3a_defconfig1
-rw-r--r--arch/sh/configs/apsh4ad0a_defconfig1
-rw-r--r--arch/sh/configs/edosk7705_defconfig1
-rw-r--r--arch/sh/configs/hp6xx_defconfig1
-rw-r--r--arch/sh/configs/landisk_defconfig1
-rw-r--r--arch/sh/configs/magicpanelr2_defconfig1
-rw-r--r--arch/sh/configs/rsk7264_defconfig1
-rw-r--r--arch/sh/configs/rsk7269_defconfig1
-rw-r--r--arch/sh/configs/se7619_defconfig1
-rw-r--r--arch/sh/configs/se7705_defconfig1
-rw-r--r--arch/sh/configs/se7722_defconfig1
-rw-r--r--arch/sh/configs/se7750_defconfig1
-rw-r--r--arch/sh/configs/secureedge5410_defconfig1
-rw-r--r--arch/sh/configs/sh7710voipgw_defconfig1
-rw-r--r--arch/sh/configs/sh7724_generic_defconfig1
-rw-r--r--arch/sh/configs/sh7770_generic_defconfig1
-rw-r--r--arch/sh/configs/sh7785lcr_32bit_defconfig1
-rw-r--r--arch/sh/configs/sh7785lcr_defconfig1
-rw-r--r--arch/sh/configs/urquell_defconfig1
-rw-r--r--arch/sh/include/asm/hugetlb.h6
-rw-r--r--arch/sh/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/sh/mm/cache-sh4.c5
-rw-r--r--arch/sh/mm/cache.c2
-rw-r--r--arch/sh/mm/hugetlbpage.c10
-rw-r--r--arch/sh/mm/mmap.c5
-rw-r--r--arch/sparc/include/asm/pgtable_64.h3
-rw-r--r--arch/sparc/kernel/sys_sparc_32.c3
-rw-r--r--arch/sparc/kernel/sys_sparc_64.c20
-rw-r--r--arch/sparc/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/sparc/mm/hugetlbpage.c21
-rw-r--r--arch/sparc/mm/tlb.c6
-rw-r--r--arch/um/include/shared/um_malloc.h3
-rw-r--r--arch/x86/Kbuild2
-rw-r--r--arch/x86/Kconfig34
-rw-r--r--arch/x86/Makefile22
-rw-r--r--arch/x86/boot/compressed/efi_mixed.S20
-rw-r--r--arch/x86/coco/core.c93
-rw-r--r--arch/x86/entry/common.c75
-rw-r--r--arch/x86/entry/entry_64.S61
-rw-r--r--arch/x86/entry/entry_64_compat.S16
-rw-r--r--arch/x86/entry/entry_fred.c10
-rw-r--r--arch/x86/entry/syscall_32.c21
-rw-r--r--arch/x86/entry/syscall_64.c19
-rw-r--r--arch/x86/entry/syscall_x32.c10
-rw-r--r--arch/x86/entry/syscalls/syscall_32.tbl1
-rw-r--r--arch/x86/entry/syscalls/syscall_64.tbl1
-rw-r--r--arch/x86/entry/vdso/Makefile1
-rw-r--r--arch/x86/events/amd/core.c39
-rw-r--r--arch/x86/events/amd/lbr.c16
-rw-r--r--arch/x86/events/core.c1
-rw-r--r--arch/x86/events/intel/ds.c8
-rw-r--r--arch/x86/events/intel/lbr.c1
-rw-r--r--arch/x86/hyperv/hv_apic.c16
-rw-r--r--arch/x86/hyperv/hv_proc.c22
-rw-r--r--arch/x86/hyperv/ivm.c12
-rw-r--r--arch/x86/include/asm/alternative.h4
-rw-r--r--arch/x86/include/asm/apic.h3
-rw-r--r--arch/x86/include/asm/asm-prototypes.h1
-rw-r--r--arch/x86/include/asm/barrier.h3
-rw-r--r--arch/x86/include/asm/coco.h3
-rw-r--r--arch/x86/include/asm/cpufeature.h8
-rw-r--r--arch/x86/include/asm/cpufeatures.h15
-rw-r--r--arch/x86/include/asm/crash_reserve.h2
-rw-r--r--arch/x86/include/asm/disabled-features.h3
-rw-r--r--arch/x86/include/asm/fpu.h13
-rw-r--r--arch/x86/include/asm/fpu/types.h6
-rw-r--r--arch/x86/include/asm/io.h1
-rw-r--r--arch/x86/include/asm/kvm_host.h1
-rw-r--r--arch/x86/include/asm/msr-index.h9
-rw-r--r--arch/x86/include/asm/nospec-branch.h38
-rw-r--r--arch/x86/include/asm/perf_event.h1
-rw-r--r--arch/x86/include/asm/pgtable.h20
-rw-r--r--arch/x86/include/asm/pgtable_64.h1
-rw-r--r--arch/x86/include/asm/pgtable_types.h3
-rw-r--r--arch/x86/include/asm/required-features.h3
-rw-r--r--arch/x86/include/asm/sev.h8
-rw-r--r--arch/x86/include/asm/syscall.h11
-rw-r--r--arch/x86/include/asm/x86_init.h3
-rw-r--r--arch/x86/include/uapi/asm/kvm.h23
-rw-r--r--arch/x86/include/uapi/asm/kvm_para.h1
-rw-r--r--arch/x86/kernel/amd_gart_64.c2
-rw-r--r--arch/x86/kernel/apic/apic.c6
-rw-r--r--arch/x86/kernel/callthunks.c4
-rw-r--r--arch/x86/kernel/cpu/amd.c56
-rw-r--r--arch/x86/kernel/cpu/bugs.c170
-rw-r--r--arch/x86/kernel/cpu/common.c70
-rw-r--r--arch/x86/kernel/cpu/cpuid-deps.c6
-rw-r--r--arch/x86/kernel/cpu/mce/core.c4
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c2
-rw-r--r--arch/x86/kernel/cpu/resctrl/internal.h3
-rw-r--r--arch/x86/kernel/cpu/scattered.c2
-rw-r--r--arch/x86/kernel/cpu/sgx/driver.c2
-rw-r--r--arch/x86/kernel/cpu/sgx/main.c1
-rw-r--r--arch/x86/kernel/cpu/topology.c7
-rw-r--r--arch/x86/kernel/cpu/topology_amd.c51
-rw-r--r--arch/x86/kernel/eisa.c3
-rw-r--r--arch/x86/kernel/irq_64.c1
-rw-r--r--arch/x86/kernel/kvm.c11
-rw-r--r--arch/x86/kernel/nmi.c24
-rw-r--r--arch/x86/kernel/probe_roms.c10
-rw-r--r--arch/x86/kernel/process_64.c2
-rw-r--r--arch/x86/kernel/setup.c7
-rw-r--r--arch/x86/kernel/sev-shared.c6
-rw-r--r--arch/x86/kernel/sev.c37
-rw-r--r--arch/x86/kernel/sys_x86_64.c42
-rw-r--r--arch/x86/kernel/x86_init.c2
-rw-r--r--arch/x86/kvm/Kconfig1
-rw-r--r--arch/x86/kvm/Makefile5
-rw-r--r--arch/x86/kvm/cpuid.c43
-rw-r--r--arch/x86/kvm/cpuid.h10
-rw-r--r--arch/x86/kvm/lapic.c3
-rw-r--r--arch/x86/kvm/mmu/mmu.c11
-rw-r--r--arch/x86/kvm/mmu/tdp_mmu.c51
-rw-r--r--arch/x86/kvm/pmu.c16
-rw-r--r--arch/x86/kvm/reverse_cpuid.h5
-rw-r--r--arch/x86/kvm/svm/sev.c62
-rw-r--r--arch/x86/kvm/svm/svm.c17
-rw-r--r--arch/x86/kvm/svm/svm.h3
-rw-r--r--arch/x86/kvm/svm/vmenter.S97
-rw-r--r--arch/x86/kvm/trace.h10
-rw-r--r--arch/x86/kvm/vmx/pmu_intel.c2
-rw-r--r--arch/x86/kvm/vmx/vmenter.S2
-rw-r--r--arch/x86/kvm/vmx/vmx.c41
-rw-r--r--arch/x86/kvm/vmx/vmx.h6
-rw-r--r--arch/x86/kvm/x86.c4
-rw-r--r--arch/x86/lib/copy_mc.c21
-rw-r--r--arch/x86/lib/retpoline.S22
-rw-r--r--arch/x86/mm/fault.c24
-rw-r--r--arch/x86/mm/hugetlbpage.c35
-rw-r--r--arch/x86/mm/ident_map.c23
-rw-r--r--arch/x86/mm/init.c47
-rw-r--r--arch/x86/mm/mem_encrypt_amd.c18
-rw-r--r--arch/x86/mm/mmap.c4
-rw-r--r--arch/x86/mm/numa_32.c2
-rw-r--r--arch/x86/mm/pat/memtype.c73
-rw-r--r--arch/x86/mm/pgtable.c4
-rw-r--r--arch/x86/net/bpf_jit_comp.c82
-rw-r--r--arch/x86/virt/Makefile2
-rw-r--r--arch/x86/virt/svm/sev.c26
-rw-r--r--arch/xtensa/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/xtensa/mm/cache.c6
-rw-r--r--arch/xtensa/mm/tlb.c11
-rw-r--r--block/bdev.c115
-rw-r--r--block/blk-cgroup.c9
-rw-r--r--block/blk-cgroup.h2
-rw-r--r--block/blk-core.c3
-rw-r--r--block/blk-iocost.c14
-rw-r--r--block/blk-merge.c2
-rw-r--r--block/blk-mq.c9
-rw-r--r--block/blk-settings.c19
-rw-r--r--block/blk.h1
-rw-r--r--block/ioctl.c8
-rw-r--r--block/partitions/ldm.c6
-rw-r--r--crypto/asymmetric_keys/mscode_parser.c3
-rw-r--r--crypto/asymmetric_keys/pkcs7_parser.c4
-rw-r--r--crypto/asymmetric_keys/public_key.c3
-rw-r--r--crypto/asymmetric_keys/signature.c2
-rw-r--r--crypto/asymmetric_keys/x509_cert_parser.c8
-rw-r--r--crypto/testmgr.h80
-rw-r--r--drivers/accel/ivpu/ivpu_drv.c40
-rw-r--r--drivers/accel/ivpu/ivpu_drv.h3
-rw-r--r--drivers/accel/ivpu/ivpu_hw.h6
-rw-r--r--drivers/accel/ivpu/ivpu_hw_37xx.c11
-rw-r--r--drivers/accel/ivpu/ivpu_hw_40xx.c6
-rw-r--r--drivers/accel/ivpu/ivpu_ipc.c8
-rw-r--r--drivers/accel/ivpu/ivpu_mmu.c8
-rw-r--r--drivers/accel/ivpu/ivpu_mmu_context.c1
-rw-r--r--drivers/accel/ivpu/ivpu_pm.c14
-rw-r--r--drivers/accessibility/speakup/main.c2
-rw-r--r--drivers/acpi/acpica/dbnames.c8
-rw-r--r--drivers/acpi/acpica/tbfadt.c30
-rw-r--r--drivers/acpi/acpica/tbutils.c7
-rw-r--r--drivers/acpi/apei/einj-core.c2
-rw-r--r--drivers/acpi/cppc_acpi.c57
-rw-r--r--drivers/acpi/scan.c3
-rw-r--r--drivers/acpi/thermal.c22
-rw-r--r--drivers/acpi/x86/s2idle.c8
-rw-r--r--drivers/android/binder.c4
-rw-r--r--drivers/ata/ahci.c85
-rw-r--r--drivers/ata/ahci_st.c1
-rw-r--r--drivers/ata/libata-core.c2
-rw-r--r--drivers/ata/libata-eh.c5
-rw-r--r--drivers/ata/libata-scsi.c16
-rw-r--r--drivers/ata/pata_macio.c3
-rw-r--r--drivers/ata/sata_gemini.c5
-rw-r--r--drivers/ata/sata_mv.c63
-rw-r--r--drivers/ata/sata_sx4.c6
-rw-r--r--drivers/base/core.c26
-rw-r--r--drivers/base/regmap/regcache-maple.c6
-rw-r--r--drivers/base/regmap/regmap.c37
-rw-r--r--drivers/block/null_blk/main.c4
-rw-r--r--drivers/block/zram/zram_drv.c31
-rw-r--r--drivers/bluetooth/btmtk.c7
-rw-r--r--drivers/bluetooth/btqca.c46
-rw-r--r--drivers/bluetooth/btusb.c11
-rw-r--r--drivers/bluetooth/hci_qca.c46
-rw-r--r--drivers/cache/sifive_ccache.c74
-rw-r--r--drivers/char/mem.c2
-rw-r--r--drivers/char/random.c10
-rw-r--r--drivers/clk/clk.c173
-rw-r--r--drivers/clk/mediatek/clk-mt7988-infracfg.c2
-rw-r--r--drivers/clk/mediatek/clk-mtk.c15
-rw-r--r--drivers/clk/qcom/clk-smd-rpm.c1
-rw-r--r--drivers/clk/qcom/gdsc.c11
-rw-r--r--drivers/clk/samsung/clk-exynos-clkout.c13
-rw-r--r--drivers/comedi/drivers/vmk80xx.c35
-rw-r--r--drivers/crypto/ccp/sev-dev.c2
-rw-r--r--drivers/crypto/intel/iaa/iaa_crypto_main.c10
-rw-r--r--drivers/cxl/Kconfig13
-rw-r--r--drivers/cxl/acpi.c13
-rw-r--r--drivers/cxl/core/cdat.c152
-rw-r--r--drivers/cxl/core/mbox.c39
-rw-r--r--drivers/cxl/core/port.c114
-rw-r--r--drivers/cxl/core/regs.c5
-rw-r--r--drivers/cxl/cxl.h8
-rw-r--r--drivers/cxl/cxlmem.h2
-rw-r--r--drivers/dax/device.c6
-rw-r--r--drivers/dax/kmem.c30
-rw-r--r--drivers/dma-buf/st-dma-fence-chain.c6
-rw-r--r--drivers/dma/idma64.c4
-rw-r--r--drivers/dma/idxd/cdev.c5
-rw-r--r--drivers/dma/idxd/debugfs.c4
-rw-r--r--drivers/dma/idxd/device.c8
-rw-r--r--drivers/dma/idxd/idxd.h2
-rw-r--r--drivers/dma/idxd/init.c2
-rw-r--r--drivers/dma/idxd/irq.c4
-rw-r--r--drivers/dma/idxd/perfmon.c9
-rw-r--r--drivers/dma/owl-dma.c4
-rw-r--r--drivers/dma/pl330.c3
-rw-r--r--drivers/dma/tegra186-gpc-dma.c3
-rw-r--r--drivers/dma/xilinx/xdma-regs.h3
-rw-r--r--drivers/dma/xilinx/xdma.c42
-rw-r--r--drivers/dma/xilinx/xilinx_dpdma.c13
-rw-r--r--drivers/dpll/Kconfig2
-rw-r--r--drivers/dpll/dpll_core.c58
-rw-r--r--drivers/firewire/ohci.c6
-rw-r--r--drivers/firmware/arm_ffa/driver.c2
-rw-r--r--drivers/firmware/arm_scmi/powercap.c2
-rw-r--r--drivers/firmware/arm_scmi/raw_mode.c7
-rw-r--r--drivers/firmware/efi/libstub/randomalloc.c2
-rw-r--r--drivers/firmware/efi/libstub/x86-stub.c1
-rw-r--r--drivers/firmware/efi/unaccepted_memory.c4
-rw-r--r--drivers/firmware/microchip/mpfs-auto-update.c8
-rw-r--r--drivers/firmware/qcom/qcom_qseecom_uefisecapp.c137
-rw-r--r--drivers/firmware/qcom/qcom_scm.c37
-rw-r--r--drivers/fpga/dfl-pci.c3
-rw-r--r--drivers/gpio/gpio-crystalcove.c2
-rw-r--r--drivers/gpio/gpio-lpc32xx.c1
-rw-r--r--drivers/gpio/gpio-tangier.c9
-rw-r--r--drivers/gpio/gpio-tegra186.c20
-rw-r--r--drivers/gpio/gpio-wcove.c2
-rw-r--r--drivers/gpio/gpiolib-cdev.c58
-rw-r--r--drivers/gpio/gpiolib.c35
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c35
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c28
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_job.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.c24
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.h22
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c46
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c65
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c15
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.h20
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c72
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c15
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_v11_0.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c19
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c26
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc21.c32
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c14
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_chardev.c8
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device.c17
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c1
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_migrate.c16
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process.c19
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_svm.c2
-rw-r--r--drivers/gpu/drm/amd/display/Kconfig2
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c38
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c8
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.h2
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c6
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c35
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c19
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c57
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc.c6
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_state.c9
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c8
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce110/Makefile2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce112/Makefile2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce120/Makefile2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce60/Makefile2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce80/Makefile2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c54
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.h14
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_link_encoder.c8
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c5
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_link_encoder.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/Makefile36
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c103
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/Makefile36
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c6
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c41
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c41
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c41
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c11
-rw-r--r--drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c13
-rw-r--r--drivers/gpu/drm/amd/include/umsch_mm_4_0_api_def.h13
-rw-r--r--drivers/gpu/drm/amd/pm/amdgpu_pm.c7
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c27
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h1
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h33
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_pmfw.h55
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_ppsmc.h46
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h10
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h1
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c8
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c12
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c25
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c52
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c368
-rw-r--r--drivers/gpu/drm/ast/ast_dp.c3
-rw-r--r--drivers/gpu/drm/display/drm_dp_dual_mode_helper.c4
-rw-r--r--drivers/gpu/drm/display/drm_dp_helper.c7
-rw-r--r--drivers/gpu/drm/drm_client_modeset.c3
-rw-r--r--drivers/gpu/drm/drm_gem_atomic_helper.c4
-rw-r--r--drivers/gpu/drm/drm_prime.c7
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_gpu.c24
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_gpu.h12
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_hwdb.c34
-rw-r--r--drivers/gpu/drm/gma500/Makefile1
-rw-r--r--drivers/gpu/drm/gma500/mmu.c1
-rw-r--r--drivers/gpu/drm/gma500/psb_device.c5
-rw-r--r--drivers/gpu/drm/gma500/psb_drv.h9
-rw-r--r--drivers/gpu/drm/gma500/psb_lid.c80
-rw-r--r--drivers/gpu/drm/i915/Makefile7
-rw-r--r--drivers/gpu/drm/i915/display/g4x_dp.c2
-rw-r--r--drivers/gpu/drm/i915/display/icl_dsi.c3
-rw-r--r--drivers/gpu/drm/i915/display/intel_bios.c46
-rw-r--r--drivers/gpu/drm/i915/display/intel_cdclk.c42
-rw-r--r--drivers/gpu/drm/i915/display/intel_cdclk.h3
-rw-r--r--drivers/gpu/drm/i915/display/intel_cursor.c4
-rw-r--r--drivers/gpu/drm/i915/display/intel_ddi.c5
-rw-r--r--drivers/gpu/drm/i915/display/intel_display.c9
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_device.h1
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_types.h3
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp.c29
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp_hdcp.c5
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp_mst.c2
-rw-r--r--drivers/gpu/drm/i915/display/intel_dpll_mgr.c2
-rw-r--r--drivers/gpu/drm/i915/display/intel_drrs.c14
-rw-r--r--drivers/gpu/drm/i915/display/intel_drrs.h3
-rw-r--r--drivers/gpu/drm/i915/display/intel_dsb.c14
-rw-r--r--drivers/gpu/drm/i915/display/intel_fb_pin.c10
-rw-r--r--drivers/gpu/drm/i915/display/intel_psr.c89
-rw-r--r--drivers/gpu/drm/i915/display/intel_sdvo.c4
-rw-r--r--drivers/gpu/drm/i915/display/intel_vrr.c14
-rw-r--r--drivers/gpu/drm/i915/display/skl_universal_plane.c3
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_pages.c1
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c1
-rw-r--r--drivers/gpu/drm/i915/gt/gen8_ppgtt.c3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_cs.c17
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_pm.c3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_execlists_submission.c3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt.c6
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt.h9
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c39
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h13
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_regs.h6
-rw-r--r--drivers/gpu/drm/i915/gt/intel_workarounds.c31
-rw-r--r--drivers/gpu/drm/i915/gt/shmem_utils.c1
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c23
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc.c4
-rw-r--r--drivers/gpu/drm/i915/gvt/firmware.c1
-rw-r--r--drivers/gpu/drm/i915/gvt/gtt.c1
-rw-r--r--drivers/gpu/drm/i915/gvt/handlers.c1
-rw-r--r--drivers/gpu/drm/i915/gvt/mmio.c1
-rw-r--r--drivers/gpu/drm/i915/gvt/vgpu.c1
-rw-r--r--drivers/gpu/drm/i915/i915_driver.c2
-rw-r--r--drivers/gpu/drm/i915/i915_hwmon.c37
-rw-r--r--drivers/gpu/drm/i915/i915_memcpy.c2
-rw-r--r--drivers/gpu/drm/i915/i915_reg.h2
-rw-r--r--drivers/gpu/drm/i915/i915_vma.c50
-rw-r--r--drivers/gpu/drm/i915/intel_gvt.c1
-rw-r--r--drivers/gpu/drm/imagination/pvr_vm_mips.c1
-rw-r--r--drivers/gpu/drm/mediatek/mtk_drm_gem.c1
-rw-r--r--drivers/gpu/drm/msm/adreno/a6xx_gpu.c4
-rw-r--r--drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c2
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_2_x1e80100.h34
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c10
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c8
-rw-r--r--drivers/gpu/drm/msm/dp/dp_display.c6
-rw-r--r--drivers/gpu/drm/msm/msm_fb.c6
-rw-r--r--drivers/gpu/drm/msm/msm_kms.c4
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_bios.c13
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_dmem.c12
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_dp.c23
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_uvmm.c6
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowof.c7
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm107.c12
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/devinit/r535.c1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c7
-rw-r--r--drivers/gpu/drm/omapdrm/omap_gem.c1
-rw-r--r--drivers/gpu/drm/panel/panel-novatek-nt36672e.c2
-rw-r--r--drivers/gpu/drm/panel/panel-visionox-rm69299.c2
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_gpu.c6
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_mmu.c13
-rw-r--r--drivers/gpu/drm/qxl/qxl_cmd.c2
-rw-r--r--drivers/gpu/drm/qxl/qxl_ioctl.c4
-rw-r--r--drivers/gpu/drm/qxl/qxl_release.c50
-rw-r--r--drivers/gpu/drm/radeon/pptable.h10
-rw-r--r--drivers/gpu/drm/radeon/radeon_atombios.c8
-rw-r--r--drivers/gpu/drm/rockchip/rockchip_vop2_reg.c2
-rw-r--r--drivers/gpu/drm/scheduler/sched_entity.c12
-rw-r--r--drivers/gpu/drm/ttm/ttm_pool.c38
-rw-r--r--drivers/gpu/drm/ttm/ttm_tt.c2
-rw-r--r--drivers/gpu/drm/v3d/v3d_bo.c1
-rw-r--r--drivers/gpu/drm/v3d/v3d_irq.c4
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_binding.c1
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_blit.c35
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_bo.c8
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_bo.h2
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_cmd.c1
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_devcaps.c1
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_drv.c28
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_drv.h3
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c1
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_gem.c32
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c1
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_kms.c11
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_kms.h4
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_prime.c15
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c44
-rw-r--r--drivers/gpu/drm/xe/Makefile4
-rw-r--r--drivers/gpu/drm/xe/display/intel_fb_bo.c8
-rw-r--r--drivers/gpu/drm/xe/display/xe_display.c5
-rw-r--r--drivers/gpu/drm/xe/regs/xe_engine_regs.h2
-rw-r--r--drivers/gpu/drm/xe/xe_bo.c59
-rw-r--r--drivers/gpu/drm/xe/xe_bo_types.h19
-rw-r--r--drivers/gpu/drm/xe/xe_device.c11
-rw-r--r--drivers/gpu/drm/xe/xe_device.h4
-rw-r--r--drivers/gpu/drm/xe/xe_device_types.h3
-rw-r--r--drivers/gpu/drm/xe/xe_exec.c79
-rw-r--r--drivers/gpu/drm/xe/xe_exec_queue.c2
-rw-r--r--drivers/gpu/drm/xe/xe_exec_queue_types.h5
-rw-r--r--drivers/gpu/drm/xe/xe_gt.c4
-rw-r--r--drivers/gpu/drm/xe/xe_gt_ccs_mode.c19
-rw-r--r--drivers/gpu/drm/xe/xe_gt_ccs_mode.h2
-rw-r--r--drivers/gpu/drm/xe/xe_gt_pagefault.c3
-rw-r--r--drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c1
-rw-r--r--drivers/gpu/drm/xe/xe_gt_types.h7
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ct.c4
-rw-r--r--drivers/gpu/drm/xe/xe_guc_submit.c2
-rw-r--r--drivers/gpu/drm/xe/xe_huc.c9
-rw-r--r--drivers/gpu/drm/xe/xe_hwmon.c4
-rw-r--r--drivers/gpu/drm/xe/xe_lrc.c25
-rw-r--r--drivers/gpu/drm/xe/xe_migrate.c8
-rw-r--r--drivers/gpu/drm/xe/xe_preempt_fence.c2
-rw-r--r--drivers/gpu/drm/xe/xe_pt.c25
-rw-r--r--drivers/gpu/drm/xe/xe_query.c2
-rw-r--r--drivers/gpu/drm/xe/xe_ring_ops.c11
-rw-r--r--drivers/gpu/drm/xe/xe_sched_job.c10
-rw-r--r--drivers/gpu/drm/xe/xe_sched_job_types.h2
-rw-r--r--drivers/gpu/drm/xe/xe_vm.c131
-rw-r--r--drivers/gpu/drm/xe/xe_vm.h8
-rw-r--r--drivers/gpu/drm/xe/xe_vm_types.h8
-rw-r--r--drivers/gpu/drm/xen/xen_drm_front_gem.c1
-rw-r--r--drivers/gpu/host1x/bus.c8
-rw-r--r--drivers/hid/hid-logitech-dj.c4
-rw-r--r--drivers/hid/hid-mcp2221.c2
-rw-r--r--drivers/hid/hid-nintendo.c8
-rw-r--r--drivers/hid/i2c-hid/i2c-hid-core.c38
-rw-r--r--drivers/hid/intel-ish-hid/ipc/ipc.c2
-rw-r--r--drivers/hv/channel.c29
-rw-r--r--drivers/hv/connection.c29
-rw-r--r--drivers/hv/vmbus_drv.c100
-rw-r--r--drivers/hwtracing/coresight/coresight-trbe.c1
-rw-r--r--drivers/hwtracing/intel_th/core.c6
-rw-r--r--drivers/i2c/busses/i2c-i801.c7
-rw-r--r--drivers/i2c/busses/i2c-pxa.c2
-rw-r--r--drivers/i2c/i2c-core-base.c12
-rw-r--r--drivers/iio/accel/mxc4005.c92
-rw-r--r--drivers/iio/common/inv_sensors/inv_sensors_timestamp.c33
-rw-r--r--drivers/iio/imu/adis16475.c4
-rw-r--r--drivers/iio/imu/inv_icm42600/inv_icm42600_buffer.c20
-rw-r--r--drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c2
-rw-r--r--drivers/iio/pressure/bmp280-core.c1
-rw-r--r--drivers/iio/pressure/bmp280-spi.c13
-rw-r--r--drivers/iio/pressure/bmp280.h1
-rw-r--r--drivers/infiniband/core/cm.c11
-rw-r--r--drivers/infiniband/hw/mlx5/mad.c3
-rw-r--r--drivers/infiniband/hw/qib/qib_fs.c1
-rw-r--r--drivers/infiniband/sw/rxe/rxe.c2
-rw-r--r--drivers/input/joystick/xpad.c2
-rw-r--r--drivers/interconnect/core.c8
-rw-r--r--drivers/interconnect/qcom/x1e80100.c26
-rw-r--r--drivers/iommu/amd/amd_iommu.h5
-rw-r--r--drivers/iommu/amd/init.c25
-rw-r--r--drivers/iommu/amd/iommu.c11
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c38
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h2
-rw-r--r--drivers/iommu/dma-iommu.c2
-rw-r--r--drivers/iommu/intel/iommu.c11
-rw-r--r--drivers/iommu/intel/perfmon.c2
-rw-r--r--drivers/iommu/intel/svm.c2
-rw-r--r--drivers/iommu/iommu.c11
-rw-r--r--drivers/iommu/iommufd/Kconfig1
-rw-r--r--drivers/iommu/mtk_iommu.c1
-rw-r--r--drivers/iommu/mtk_iommu_v1.c1
-rw-r--r--drivers/irqchip/irq-armada-370-xp.c2
-rw-r--r--drivers/irqchip/irq-gic-v3-its.c17
-rw-r--r--drivers/isdn/mISDN/socket.c10
-rw-r--r--drivers/md/dm-integrity.c2
-rw-r--r--drivers/md/dm-vdo/murmurhash3.c35
-rw-r--r--drivers/md/dm.c10
-rw-r--r--drivers/md/raid1.c2
-rw-r--r--drivers/media/cec/platform/sti/stih-cec.c1
-rw-r--r--drivers/media/platform/mediatek/vcodec/common/mtk_vcodec_fw_vpu.c8
-rw-r--r--drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec_drv.c5
-rw-r--r--drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec_drv.h2
-rw-r--r--drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_hevc_req_multi_if.c2
-rw-r--r--drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp8_if.c2
-rw-r--r--drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp9_if.c11
-rw-r--r--drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp9_req_lat_if.c4
-rw-r--r--drivers/media/platform/mediatek/vcodec/decoder/vdec_vpu_if.c2
-rw-r--r--drivers/media/platform/mediatek/vcodec/encoder/mtk_vcodec_enc_drv.c5
-rw-r--r--drivers/media/platform/mediatek/vcodec/encoder/mtk_vcodec_enc_drv.h2
-rw-r--r--drivers/media/platform/mediatek/vcodec/encoder/venc_vpu_if.c2
-rw-r--r--drivers/media/rc/mtk-cir.c1
-rw-r--r--drivers/media/rc/serial_ir.c1
-rw-r--r--drivers/media/rc/st_rc.c1
-rw-r--r--drivers/media/rc/sunxi-cir.c1
-rw-r--r--drivers/misc/cardreader/rtsx_pcr.c2
-rw-r--r--drivers/misc/eeprom/at24.c18
-rw-r--r--drivers/misc/mei/hw-me-regs.h2
-rw-r--r--drivers/misc/mei/pci-me.c4
-rw-r--r--drivers/misc/mei/platform-vsc.c17
-rw-r--r--drivers/misc/mei/pxp/mei_pxp.c7
-rw-r--r--drivers/misc/mei/vsc-tp.c84
-rw-r--r--drivers/misc/mei/vsc-tp.h3
-rw-r--r--drivers/mmc/core/block.c4
-rw-r--r--drivers/mmc/host/moxart-mmc.c1
-rw-r--r--drivers/mmc/host/omap.c48
-rw-r--r--drivers/mmc/host/sdhci-msm.c16
-rw-r--r--drivers/mmc/host/sdhci-of-dwcmshc.c29
-rw-r--r--drivers/mmc/host/sdhci-omap.c3
-rw-r--r--drivers/mtd/devices/block2mtd.c2
-rw-r--r--drivers/mtd/mtdcore.c2
-rw-r--r--drivers/mtd/nand/raw/brcmnand/brcmnand.c2
-rw-r--r--drivers/mtd/nand/raw/diskonchip.c4
-rw-r--r--drivers/mtd/nand/raw/qcom_nandc.c7
-rw-r--r--drivers/mux/core.c4
-rw-r--r--drivers/net/dsa/mt7530.c267
-rw-r--r--drivers/net/dsa/mt7530.h10
-rw-r--r--drivers/net/dsa/mv88e6xxx/chip.c62
-rw-r--r--drivers/net/dsa/mv88e6xxx/port.h23
-rw-r--r--drivers/net/dsa/sja1105/sja1105_mdio.c2
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_com.c2
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_netdev.c35
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_xdp.c4
-rw-r--r--drivers/net/ethernet/amd/pds_core/core.c13
-rw-r--r--drivers/net/ethernet/amd/pds_core/core.h2
-rw-r--r--drivers/net/ethernet/amd/pds_core/dev.c3
-rw-r--r--drivers/net/ethernet/amd/pds_core/main.c1
-rw-r--r--drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c64
-rw-r--r--drivers/net/ethernet/broadcom/b44.c14
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.c84
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c6
-rw-r--r--drivers/net/ethernet/broadcom/genet/bcmgenet.c16
-rw-r--r--drivers/net/ethernet/brocade/bna/bnad_debugfs.c4
-rw-r--r--drivers/net/ethernet/freescale/fec_main.c11
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_tqp_stats.c2
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c19
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c4
-rw-r--r--drivers/net/ethernet/intel/e1000e/hw.h2
-rw-r--r--drivers/net/ethernet/intel/e1000e/ich8lan.c38
-rw-r--r--drivers/net/ethernet/intel/e1000e/netdev.c18
-rw-r--r--drivers/net/ethernet/intel/e1000e/phy.c182
-rw-r--r--drivers/net/ethernet/intel/e1000e/phy.h2
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e.h1
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_main.c19
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_register.h3
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.c82
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.h1
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c45
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_main.c30
-rw-r--r--drivers/net/ethernet/intel/ice/ice_adminq_cmd.h3
-rw-r--r--drivers/net/ethernet/intel/ice/ice_common.c10
-rw-r--r--drivers/net/ethernet/intel/ice/ice_debugfs.c8
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ethtool.c2
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lag.c4
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lib.c18
-rw-r--r--drivers/net/ethernet/intel/ice/ice_switch.c24
-rw-r--r--drivers/net/ethernet/intel/ice/ice_switch.h4
-rw-r--r--drivers/net/ethernet/intel/ice/ice_tc_lib.c15
-rw-r--r--drivers/net/ethernet/intel/ice/ice_vf_lib.c16
-rw-r--r--drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c18
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_txrx.c4
-rw-r--r--drivers/net/ethernet/intel/igc/igc.h2
-rw-r--r--drivers/net/ethernet/intel/igc/igc_leds.c38
-rw-r--r--drivers/net/ethernet/intel/igc/igc_main.c7
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c16
-rw-r--r--drivers/net/ethernet/marvell/octeon_ep/octep_pfvf_mbox.c1
-rw-r--r--drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_mbox.c1
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/cgx.c5
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c2
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c4
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c20
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c3
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c2
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c7
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/qos.c1
-rw-r--r--drivers/net/ethernet/mediatek/mtk_wed.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/qos.c33
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/selq.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c27
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c49
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tx.c7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.c9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c13
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.c17
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c44
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c82
-rw-r--r--drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c31
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core_env.c20
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/pci.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c115
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h5
-rw-r--r--drivers/net/ethernet/micrel/ks8851.h3
-rw-r--r--drivers/net/ethernet/micrel/ks8851_common.c16
-rw-r--r--drivers/net/ethernet/micrel/ks8851_par.c11
-rw-r--r--drivers/net/ethernet/micrel/ks8851_spi.c11
-rw-r--r--drivers/net/ethernet/microchip/lan743x_main.c18
-rw-r--r--drivers/net/ethernet/microchip/lan743x_main.h4
-rw-r--r--drivers/net/ethernet/microchip/sparx5/sparx5_port.c4
-rw-r--r--drivers/net/ethernet/microchip/sparx5/sparx5_tc_flower.c61
-rw-r--r--drivers/net/ethernet/microsoft/mana/hw_channel.c1
-rw-r--r--drivers/net/ethernet/microsoft/mana/mana_en.c2
-rw-r--r--drivers/net/ethernet/realtek/r8169.h6
-rw-r--r--drivers/net/ethernet/realtek/r8169_leds.c35
-rw-r--r--drivers/net/ethernet/realtek/r8169_main.c49
-rw-r--r--drivers/net/ethernet/renesas/ravb_main.c103
-rw-r--r--drivers/net/ethernet/renesas/sh_eth.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/common.h1
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c47
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c56
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/mmc.h2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/mmc_core.c15
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_main.c29
-rw-r--r--drivers/net/ethernet/ti/am65-cpsw-nuss.c18
-rw-r--r--drivers/net/ethernet/ti/am65-cpts.c5
-rw-r--r--drivers/net/ethernet/ti/icssg/icssg_prueth.c8
-rw-r--r--drivers/net/ethernet/wangxun/libwx/wx_lib.c2
-rw-r--r--drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c2
-rw-r--r--drivers/net/ethernet/xilinx/ll_temac_main.c2
-rw-r--r--drivers/net/geneve.c4
-rw-r--r--drivers/net/gtp.c3
-rw-r--r--drivers/net/hyperv/netvsc.c7
-rw-r--r--drivers/net/macsec.c46
-rw-r--r--drivers/net/phy/dp83869.c3
-rw-r--r--drivers/net/phy/mediatek-ge-soc.c43
-rw-r--r--drivers/net/phy/micrel.c31
-rw-r--r--drivers/net/phy/qcom/at803x.c4
-rw-r--r--drivers/net/tun.c18
-rw-r--r--drivers/net/usb/ax88179_178a.c17
-rw-r--r--drivers/net/usb/qmi_wwan.c4
-rw-r--r--drivers/net/virtio_net.c26
-rw-r--r--drivers/net/vxlan/vxlan_core.c4
-rw-r--r--drivers/net/wireless/ath/ath11k/mac.c4
-rw-r--r--drivers/net/wireless/intel/iwlwifi/cfg/bz.c2
-rw-r--r--drivers/net/wireless/intel/iwlwifi/cfg/sc.c2
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/dbg.c15
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/d3.c16
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c11
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c2
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/link.c61
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c9
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/mld-sta.c7
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/mvm.h4
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/rfi.c8
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c20
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/scan.c3
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/time-event.c5
-rw-r--r--drivers/net/wireless/intel/iwlwifi/queue/tx.c2
-rw-r--r--drivers/net/wireless/realtek/rtw89/rtw8922a.c2
-rw-r--r--drivers/net/wireless/virtual/mac80211_hwsim.c2
-rw-r--r--drivers/net/wwan/t7xx/t7xx_cldma.c4
-rw-r--r--drivers/net/wwan/t7xx/t7xx_hif_cldma.c9
-rw-r--r--drivers/net/wwan/t7xx/t7xx_pcie_mac.c8
-rw-r--r--drivers/net/xen-netfront.c1
-rw-r--r--drivers/nfc/trf7970a.c42
-rw-r--r--drivers/nvme/host/core.c41
-rw-r--r--drivers/nvme/host/fc.c4
-rw-r--r--drivers/nvme/host/nvme.h12
-rw-r--r--drivers/nvme/host/zns.c33
-rw-r--r--drivers/nvme/target/configfs.c47
-rw-r--r--drivers/nvme/target/core.c7
-rw-r--r--drivers/nvme/target/fc.c17
-rw-r--r--drivers/of/dynamic.c12
-rw-r--r--drivers/of/module.c15
-rw-r--r--drivers/parisc/ccio-dma.c2
-rw-r--r--drivers/parisc/sba_iommu.c2
-rw-r--r--drivers/pci/quirks.c8
-rw-r--r--drivers/perf/alibaba_uncore_drw_pmu.c11
-rw-r--r--drivers/perf/amlogic/meson_ddr_pmu_core.c1
-rw-r--r--drivers/perf/arm-cci.c1
-rw-r--r--drivers/perf/arm-ccn.c1
-rw-r--r--drivers/perf/arm-cmn.c11
-rw-r--r--drivers/perf/arm_cspmu/arm_cspmu.c9
-rw-r--r--drivers/perf/arm_dmc620_pmu.c1
-rw-r--r--drivers/perf/arm_dsu_pmu.c20
-rw-r--r--drivers/perf/arm_pmu_platform.c1
-rw-r--r--drivers/perf/arm_smmuv3_pmu.c1
-rw-r--r--drivers/perf/arm_spe_pmu.c1
-rw-r--r--drivers/perf/dwc_pcie_pmu.c10
-rw-r--r--drivers/perf/fsl_imx8_ddr_perf.c1
-rw-r--r--drivers/perf/hisilicon/hisi_pcie_pmu.c10
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_pmu.c7
-rw-r--r--drivers/perf/hisilicon/hns3_pmu.c1
-rw-r--r--drivers/perf/qcom_l2_pmu.c9
-rw-r--r--drivers/perf/qcom_l3_pmu.c1
-rw-r--r--drivers/perf/riscv_pmu.c4
-rw-r--r--drivers/perf/riscv_pmu_legacy.c1
-rw-r--r--drivers/perf/riscv_pmu_sbi.c2
-rw-r--r--drivers/perf/thunderx2_pmu.c30
-rw-r--r--drivers/perf/xgene_pmu.c1
-rw-r--r--drivers/phy/freescale/phy-fsl-imx8m-pcie.c6
-rw-r--r--drivers/phy/marvell/phy-mvebu-a3700-comphy.c9
-rw-r--r--drivers/phy/qualcomm/phy-qcom-m31.c2
-rw-r--r--drivers/phy/qualcomm/phy-qcom-qmp-combo.c12
-rw-r--r--drivers/phy/qualcomm/phy-qcom-qmp-dp-phy-v5.h1
-rw-r--r--drivers/phy/qualcomm/phy-qcom-qmp-dp-phy-v6.h1
-rw-r--r--drivers/phy/rockchip/Kconfig1
-rw-r--r--drivers/phy/rockchip/phy-rockchip-naneng-combphy.c36
-rw-r--r--drivers/phy/rockchip/phy-rockchip-snps-pcie3.c31
-rw-r--r--drivers/phy/ti/phy-tusb1210.c23
-rw-r--r--drivers/pinctrl/aspeed/Makefile2
-rw-r--r--drivers/pinctrl/intel/pinctrl-baytrail.c78
-rw-r--r--drivers/pinctrl/intel/pinctrl-intel.h4
-rw-r--r--drivers/pinctrl/pinctrl-amd.c2
-rw-r--r--drivers/platform/chrome/cros_ec_uart.c28
-rw-r--r--drivers/platform/x86/acer-wmi.c9
-rw-r--r--drivers/platform/x86/amd/pmc/pmc-quirks.c9
-rw-r--r--drivers/platform/x86/amd/pmf/Makefile2
-rw-r--r--drivers/platform/x86/amd/pmf/acpi.c7
-rw-r--r--drivers/platform/x86/amd/pmf/core.c1
-rw-r--r--drivers/platform/x86/amd/pmf/pmf-quirks.c51
-rw-r--r--drivers/platform/x86/amd/pmf/pmf.h3
-rw-r--r--drivers/platform/x86/intel/hid.c9
-rw-r--r--drivers/platform/x86/intel/speed_select_if/isst_if_common.c1
-rw-r--r--drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c4
-rw-r--r--drivers/platform/x86/intel/vbtn.c11
-rw-r--r--drivers/platform/x86/lg-laptop.c2
-rw-r--r--drivers/platform/x86/toshiba_acpi.c4
-rw-r--r--drivers/platform/x86/uv_sysfs.c1
-rw-r--r--drivers/power/supply/mt6360_charger.c2
-rw-r--r--drivers/power/supply/rt9455_charger.c2
-rw-r--r--drivers/pps/clients/pps_parport.c6
-rw-r--r--drivers/pwm/core.c2
-rw-r--r--drivers/pwm/pwm-dwc-core.c1
-rw-r--r--drivers/pwm/pwm-dwc.c88
-rw-r--r--drivers/pwm/pwm-dwc.h6
-rw-r--r--drivers/pwm/pwm-img.c4
-rw-r--r--drivers/ras/amd/fmpm.c57
-rw-r--r--drivers/ras/debugfs.h4
-rw-r--r--drivers/regulator/irq_helpers.c3
-rw-r--r--drivers/regulator/mt6360-regulator.c32
-rw-r--r--drivers/regulator/qcom-refgen-regulator.c1
-rw-r--r--drivers/regulator/tps65132-regulator.c7
-rw-r--r--drivers/regulator/vqmmc-ipq4019-regulator.c1
-rw-r--r--drivers/s390/char/raw3270.c6
-rw-r--r--drivers/s390/cio/device.c13
-rw-r--r--drivers/s390/cio/device_fsm.c5
-rw-r--r--drivers/s390/cio/device_ops.c8
-rw-r--r--drivers/s390/cio/qdio_main.c28
-rw-r--r--drivers/s390/net/ism_drv.c37
-rw-r--r--drivers/s390/net/qeth_core_main.c38
-rw-r--r--drivers/scsi/bnx2fc/bnx2fc_tgt.c2
-rw-r--r--drivers/scsi/ch.c20
-rw-r--r--drivers/scsi/cxlflash/main.c17
-rw-r--r--drivers/scsi/hisi_sas/hisi_sas_main.c2
-rw-r--r--drivers/scsi/hisi_sas/hisi_sas_v3_hw.c10
-rw-r--r--drivers/scsi/hosts.c7
-rw-r--r--drivers/scsi/libsas/sas_expander.c53
-rw-r--r--drivers/scsi/lpfc/lpfc.h2
-rw-r--r--drivers/scsi/lpfc/lpfc_attr.c4
-rw-r--r--drivers/scsi/lpfc/lpfc_bsg.c40
-rw-r--r--drivers/scsi/lpfc/lpfc_debugfs.c12
-rw-r--r--drivers/scsi/lpfc/lpfc_els.c45
-rw-r--r--drivers/scsi/lpfc/lpfc_hbadisc.c33
-rw-r--r--drivers/scsi/lpfc/lpfc_init.c13
-rw-r--r--drivers/scsi/lpfc/lpfc_mbox.c30
-rw-r--r--drivers/scsi/lpfc/lpfc_nportdisc.c12
-rw-r--r--drivers/scsi/lpfc/lpfc_nvme.c4
-rw-r--r--drivers/scsi/lpfc/lpfc_nvmet.c2
-rw-r--r--drivers/scsi/lpfc/lpfc_scsi.c23
-rw-r--r--drivers/scsi/lpfc/lpfc_sli.c99
-rw-r--r--drivers/scsi/lpfc/lpfc_sli.h30
-rw-r--r--drivers/scsi/lpfc/lpfc_sli4.h7
-rw-r--r--drivers/scsi/lpfc/lpfc_version.h2
-rw-r--r--drivers/scsi/lpfc/lpfc_vport.c10
-rw-r--r--drivers/scsi/mpi3mr/mpi3mr_app.c2
-rw-r--r--drivers/scsi/mpi3mr/mpi3mr_transport.c2
-rw-r--r--drivers/scsi/myrb.c20
-rw-r--r--drivers/scsi/myrs.c24
-rw-r--r--drivers/scsi/pmcraid.c20
-rw-r--r--drivers/scsi/qla2xxx/qla_attr.c14
-rw-r--r--drivers/scsi/qla2xxx/qla_def.h2
-rw-r--r--drivers/scsi/qla2xxx/qla_edif.c2
-rw-r--r--drivers/scsi/qla2xxx/qla_gbl.h2
-rw-r--r--drivers/scsi/qla2xxx/qla_init.c128
-rw-r--r--drivers/scsi/qla2xxx/qla_iocb.c68
-rw-r--r--drivers/scsi/qla2xxx/qla_mbx.c2
-rw-r--r--drivers/scsi/qla2xxx/qla_os.c3
-rw-r--r--drivers/scsi/qla2xxx/qla_target.c10
-rw-r--r--drivers/scsi/qla2xxx/qla_version.h4
-rw-r--r--drivers/scsi/scsi_lib.c7
-rw-r--r--drivers/scsi/scsi_scan.c34
-rw-r--r--drivers/scsi/sd.c29
-rw-r--r--drivers/scsi/sg.c38
-rw-r--r--drivers/scsi/st.c4
-rw-r--r--drivers/soc/mediatek/Kconfig1
-rw-r--r--drivers/soc/mediatek/mtk-svs.c7
-rw-r--r--drivers/soundwire/amd_manager.c15
-rw-r--r--drivers/soundwire/amd_manager.h3
-rw-r--r--drivers/spi/spi-axi-spi-engine.c2
-rw-r--r--drivers/spi/spi-fsl-lpspi.c14
-rw-r--r--drivers/spi/spi-hisi-kunpeng.c2
-rw-r--r--drivers/spi/spi-pci1xxxx.c2
-rw-r--r--drivers/spi/spi-s3c64xx.c5
-rw-r--r--drivers/staging/media/atomisp/pci/hmm/hmm.c2
-rw-r--r--drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.c5
-rw-r--r--drivers/target/iscsi/iscsi_target_erl1.c3
-rw-r--r--drivers/target/target_core_configfs.c12
-rw-r--r--drivers/thermal/devfreq_cooling.c2
-rw-r--r--drivers/thermal/gov_power_allocator.c14
-rw-r--r--drivers/thermal/thermal_debugfs.c1
-rw-r--r--drivers/thermal/thermal_trip.c19
-rw-r--r--drivers/thunderbolt/switch.c48
-rw-r--r--drivers/thunderbolt/tb.c10
-rw-r--r--drivers/thunderbolt/tb.h3
-rw-r--r--drivers/thunderbolt/usb4.c13
-rw-r--r--drivers/tty/serial/8250/8250_dw.c6
-rw-r--r--drivers/tty/serial/8250/8250_lpc18xx.c2
-rw-r--r--drivers/tty/serial/8250/8250_pci.c6
-rw-r--r--drivers/tty/serial/mxs-auart.c8
-rw-r--r--drivers/tty/serial/pmac_zilog.c14
-rw-r--r--drivers/tty/serial/serial_base.h4
-rw-r--r--drivers/tty/serial/serial_core.c23
-rw-r--r--drivers/tty/serial/serial_port.c34
-rw-r--r--drivers/tty/serial/stm32-usart.c13
-rw-r--r--drivers/ufs/core/ufs-mcq.c2
-rw-r--r--drivers/ufs/core/ufshcd.c9
-rw-r--r--drivers/ufs/host/ufs-qcom.c14
-rw-r--r--drivers/uio/uio.c2
-rw-r--r--drivers/uio/uio_dmem_genirq.c4
-rw-r--r--drivers/uio/uio_hv_generic.c12
-rw-r--r--drivers/uio/uio_pruss.c2
-rw-r--r--drivers/usb/core/hub.c23
-rw-r--r--drivers/usb/core/hub.h2
-rw-r--r--drivers/usb/core/port.c50
-rw-r--r--drivers/usb/core/sysfs.c16
-rw-r--r--drivers/usb/dwc2/core.h14
-rw-r--r--drivers/usb/dwc2/core_intr.c72
-rw-r--r--drivers/usb/dwc2/gadget.c10
-rw-r--r--drivers/usb/dwc2/hcd.c49
-rw-r--r--drivers/usb/dwc2/hcd_ddma.c19
-rw-r--r--drivers/usb/dwc2/hw.h2
-rw-r--r--drivers/usb/dwc2/platform.c2
-rw-r--r--drivers/usb/dwc3/core.c92
-rw-r--r--drivers/usb/dwc3/core.h3
-rw-r--r--drivers/usb/dwc3/dwc3-pci.c2
-rw-r--r--drivers/usb/dwc3/ep0.c3
-rw-r--r--drivers/usb/dwc3/gadget.c12
-rw-r--r--drivers/usb/dwc3/host.c38
-rw-r--r--drivers/usb/gadget/composite.c6
-rw-r--r--drivers/usb/gadget/function/f_fs.c38
-rw-r--r--drivers/usb/gadget/function/f_ncm.c4
-rw-r--r--drivers/usb/gadget/function/uvc_configfs.c4
-rw-r--r--drivers/usb/gadget/udc/core.c4
-rw-r--r--drivers/usb/gadget/udc/fsl_udc_core.c5
-rw-r--r--drivers/usb/host/xhci-plat.h4
-rw-r--r--drivers/usb/host/xhci-ring.c9
-rw-r--r--drivers/usb/host/xhci-rzv2m.c1
-rw-r--r--drivers/usb/host/xhci-trace.h12
-rw-r--r--drivers/usb/misc/onboard_usb_hub.c6
-rw-r--r--drivers/usb/misc/usb-ljca.c22
-rw-r--r--drivers/usb/phy/phy-generic.c7
-rw-r--r--drivers/usb/serial/option.c40
-rw-r--r--drivers/usb/storage/uas.c28
-rw-r--r--drivers/usb/typec/class.c7
-rw-r--r--drivers/usb/typec/mux/it5205.c2
-rw-r--r--drivers/usb/typec/tcpm/qcom/qcom_pmic_typec.c8
-rw-r--r--drivers/usb/typec/tcpm/qcom/qcom_pmic_typec_pdphy.c11
-rw-r--r--drivers/usb/typec/tcpm/tcpm.c10
-rw-r--r--drivers/usb/typec/ucsi/ucsi.c92
-rw-r--r--drivers/usb/typec/ucsi/ucsi.h5
-rw-r--r--drivers/usb/typec/ucsi/ucsi_acpi.c75
-rw-r--r--drivers/usb/typec/ucsi/ucsi_glink.c14
-rw-r--r--drivers/vdpa/vdpa.c6
-rw-r--r--drivers/vfio/pci/pds/dirty.c1
-rw-r--r--drivers/vfio/vfio_iommu_type1.c4
-rw-r--r--drivers/vhost/vhost.c26
-rw-r--r--drivers/video/fbdev/Kconfig3
-rw-r--r--drivers/video/fbdev/core/fb_defio.c2
-rw-r--r--drivers/virt/acrn/mm.c61
-rw-r--r--drivers/virt/vmgenid.c2
-rw-r--r--drivers/virtio/virtio.c6
-rw-r--r--drivers/virtio/virtio_mem.c1
-rw-r--r--drivers/xen/grant-dma-ops.c2
-rw-r--r--drivers/xen/swiotlb-xen.c2
-rw-r--r--fs/9p/fid.h3
-rw-r--r--fs/9p/v9fs.h11
-rw-r--r--fs/9p/vfs_file.c2
-rw-r--r--fs/9p/vfs_inode.c60
-rw-r--r--fs/9p/vfs_inode_dotl.c34
-rw-r--r--fs/9p/vfs_super.c19
-rw-r--r--fs/aio.c2
-rw-r--r--fs/bcachefs/Makefile3
-rw-r--r--fs/bcachefs/acl.c30
-rw-r--r--fs/bcachefs/alloc_background.c47
-rw-r--r--fs/bcachefs/alloc_foreground.c4
-rw-r--r--fs/bcachefs/alloc_types.h3
-rw-r--r--fs/bcachefs/backpointers.c194
-rw-r--r--fs/bcachefs/backpointers.h41
-rw-r--r--fs/bcachefs/bcachefs.h10
-rw-r--r--fs/bcachefs/bcachefs_format.h29
-rw-r--r--fs/bcachefs/bkey.h6
-rw-r--r--fs/bcachefs/bkey_methods.c8
-rw-r--r--fs/bcachefs/bset.c14
-rw-r--r--fs/bcachefs/bset.h2
-rw-r--r--fs/bcachefs/btree_cache.c78
-rw-r--r--fs/bcachefs/btree_gc.c531
-rw-r--r--fs/bcachefs/btree_io.c37
-rw-r--r--fs/bcachefs/btree_iter.c52
-rw-r--r--fs/bcachefs/btree_iter.h11
-rw-r--r--fs/bcachefs/btree_journal_iter.c109
-rw-r--r--fs/bcachefs/btree_journal_iter.h8
-rw-r--r--fs/bcachefs/btree_key_cache.c23
-rw-r--r--fs/bcachefs/btree_locking.c28
-rw-r--r--fs/bcachefs/btree_node_scan.c521
-rw-r--r--fs/bcachefs/btree_node_scan.h11
-rw-r--r--fs/bcachefs/btree_node_scan_types.h30
-rw-r--r--fs/bcachefs/btree_trans_commit.c39
-rw-r--r--fs/bcachefs/btree_types.h16
-rw-r--r--fs/bcachefs/btree_update.c6
-rw-r--r--fs/bcachefs/btree_update_interior.c409
-rw-r--r--fs/bcachefs/btree_update_interior.h31
-rw-r--r--fs/bcachefs/btree_write_buffer.c28
-rw-r--r--fs/bcachefs/buckets.c12
-rw-r--r--fs/bcachefs/buckets.h9
-rw-r--r--fs/bcachefs/chardev.c104
-rw-r--r--fs/bcachefs/checksum.c23
-rw-r--r--fs/bcachefs/checksum.h5
-rw-r--r--fs/bcachefs/compress.h8
-rw-r--r--fs/bcachefs/data_update.c29
-rw-r--r--fs/bcachefs/debug.c75
-rw-r--r--fs/bcachefs/ec.c54
-rw-r--r--fs/bcachefs/ec.h2
-rw-r--r--fs/bcachefs/errcode.h3
-rw-r--r--fs/bcachefs/error.c6
-rw-r--r--fs/bcachefs/error.h6
-rw-r--r--fs/bcachefs/extents.c70
-rw-r--r--fs/bcachefs/extents.h25
-rw-r--r--fs/bcachefs/eytzinger.c234
-rw-r--r--fs/bcachefs/eytzinger.h81
-rw-r--r--fs/bcachefs/fs-io-direct.c23
-rw-r--r--fs/bcachefs/fs-io.c16
-rw-r--r--fs/bcachefs/fs.c10
-rw-r--r--fs/bcachefs/fsck.c264
-rw-r--r--fs/bcachefs/inode.c2
-rw-r--r--fs/bcachefs/io_misc.c2
-rw-r--r--fs/bcachefs/journal_io.c77
-rw-r--r--fs/bcachefs/journal_reclaim.c2
-rw-r--r--fs/bcachefs/journal_seq_blacklist.c3
-rw-r--r--fs/bcachefs/journal_types.h1
-rw-r--r--fs/bcachefs/logged_ops.c7
-rw-r--r--fs/bcachefs/mean_and_variance_test.c28
-rw-r--r--fs/bcachefs/opts.c33
-rw-r--r--fs/bcachefs/opts.h21
-rw-r--r--fs/bcachefs/recovery.c415
-rw-r--r--fs/bcachefs/recovery.h32
-rw-r--r--fs/bcachefs/recovery_passes.c249
-rw-r--r--fs/bcachefs/recovery_passes.h17
-rw-r--r--fs/bcachefs/recovery_passes_types.h (renamed from fs/bcachefs/recovery_types.h)11
-rw-r--r--fs/bcachefs/reflink.c3
-rw-r--r--fs/bcachefs/replicas.c19
-rw-r--r--fs/bcachefs/sb-clean.c8
-rw-r--r--fs/bcachefs/sb-downgrade.c7
-rw-r--r--fs/bcachefs/sb-errors_types.h11
-rw-r--r--fs/bcachefs/sb-members.c53
-rw-r--r--fs/bcachefs/sb-members.h21
-rw-r--r--fs/bcachefs/snapshot.c227
-rw-r--r--fs/bcachefs/snapshot.h89
-rw-r--r--fs/bcachefs/subvolume.c72
-rw-r--r--fs/bcachefs/subvolume.h3
-rw-r--r--fs/bcachefs/subvolume_types.h2
-rw-r--r--fs/bcachefs/super-io.c20
-rw-r--r--fs/bcachefs/super.c22
-rw-r--r--fs/bcachefs/super_types.h2
-rw-r--r--fs/bcachefs/sysfs.c17
-rw-r--r--fs/bcachefs/tests.c2
-rw-r--r--fs/bcachefs/thread_with_file.c15
-rw-r--r--fs/bcachefs/thread_with_file.h3
-rw-r--r--fs/bcachefs/util.c143
-rw-r--r--fs/bcachefs/util.h22
-rw-r--r--fs/binfmt_elf.c2
-rw-r--r--fs/binfmt_elf_fdpic.c2
-rw-r--r--fs/btrfs/backref.c12
-rw-r--r--fs/btrfs/block-group.c3
-rw-r--r--fs/btrfs/delayed-inode.c3
-rw-r--r--fs/btrfs/extent-tree.c8
-rw-r--r--fs/btrfs/extent_io.c34
-rw-r--r--fs/btrfs/extent_map.c18
-rw-r--r--fs/btrfs/inode.c28
-rw-r--r--fs/btrfs/ioctl.c70
-rw-r--r--fs/btrfs/messages.c2
-rw-r--r--fs/btrfs/qgroup.c23
-rw-r--r--fs/btrfs/root-tree.c10
-rw-r--r--fs/btrfs/root-tree.h2
-rw-r--r--fs/btrfs/scrub.c30
-rw-r--r--fs/btrfs/tests/extent-map-tests.c5
-rw-r--r--fs/btrfs/transaction.c19
-rw-r--r--fs/btrfs/volumes.c28
-rw-r--r--fs/btrfs/zoned.c14
-rw-r--r--fs/buffer.c165
-rw-r--r--fs/ceph/addr.c4
-rw-r--r--fs/ceph/caps.c4
-rw-r--r--fs/ceph/mds_client.c9
-rw-r--r--fs/ceph/mds_client.h3
-rw-r--r--fs/cramfs/inode.c2
-rw-r--r--fs/crypto/inline_crypt.c6
-rw-r--r--fs/dax.c14
-rw-r--r--fs/erofs/fscache.c2
-rw-r--r--fs/erofs/internal.h7
-rw-r--r--fs/erofs/super.c125
-rw-r--r--fs/exec.c12
-rw-r--r--fs/ext4/super.c8
-rw-r--r--fs/f2fs/data.c5
-rw-r--r--fs/f2fs/super.c2
-rw-r--r--fs/fat/dir.c12
-rw-r--r--fs/fuse/cuse.c4
-rw-r--r--fs/fuse/dir.c1
-rw-r--r--fs/fuse/file.c12
-rw-r--r--fs/fuse/fuse_i.h7
-rw-r--r--fs/fuse/inode.c1
-rw-r--r--fs/fuse/iomode.c60
-rw-r--r--fs/gfs2/bmap.c5
-rw-r--r--fs/hugetlbfs/inode.c11
-rw-r--r--fs/ioctl.c4
-rw-r--r--fs/jfs/jfs_logmgr.c4
-rw-r--r--fs/kernfs/file.c9
-rw-r--r--fs/namei.c7
-rw-r--r--fs/netfs/buffered_write.c23
-rw-r--r--fs/nfs/iostat.h5
-rw-r--r--fs/nfsd/nfs4callback.c26
-rw-r--r--fs/nfsd/nfs4state.c43
-rw-r--r--fs/nfsd/nfs4xdr.c49
-rw-r--r--fs/nfsd/state.h2
-rw-r--r--fs/nfsd/vfs.c3
-rw-r--r--fs/nilfs2/btree.c23
-rw-r--r--fs/nilfs2/dir.c2
-rw-r--r--fs/nilfs2/gcinode.c1
-rw-r--r--fs/nilfs2/nilfs.h4
-rw-r--r--fs/nilfs2/recovery.c1
-rw-r--r--fs/nilfs2/super.c388
-rw-r--r--fs/nilfs2/the_nilfs.c5
-rw-r--r--fs/nilfs2/the_nilfs.h6
-rw-r--r--fs/ntfs3/Kconfig9
-rw-r--r--fs/ntfs3/dir.c7
-rw-r--r--fs/ntfs3/file.c8
-rw-r--r--fs/ntfs3/inode.c20
-rw-r--r--fs/ntfs3/ntfs_fs.h4
-rw-r--r--fs/ntfs3/super.c65
-rw-r--r--fs/ocfs2/aops.c2
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c12
-rw-r--r--fs/ocfs2/export.c12
-rw-r--r--fs/ocfs2/file.c2
-rw-r--r--fs/ocfs2/inode.c2
-rw-r--r--fs/ocfs2/ioctl.c1
-rw-r--r--fs/ocfs2/localalloc.c34
-rw-r--r--fs/ocfs2/move_extents.c2
-rw-r--r--fs/ocfs2/namei.c4
-rw-r--r--fs/ocfs2/ocfs2_fs.h3
-rw-r--r--fs/ocfs2/refcounttree.c2
-rw-r--r--fs/ocfs2/reservations.c2
-rw-r--r--fs/ocfs2/resize.c8
-rw-r--r--fs/ocfs2/suballoc.c117
-rw-r--r--fs/ocfs2/suballoc.h6
-rw-r--r--fs/proc/Makefile2
-rw-r--r--fs/proc/bootconfig.c12
-rw-r--r--fs/proc/inode.c10
-rw-r--r--fs/proc/meminfo.c3
-rw-r--r--fs/proc/page.c76
-rw-r--r--fs/proc/task_mmu.c93
-rw-r--r--fs/proc/vmcore.c5
-rw-r--r--fs/ramfs/file-mmu.c2
-rw-r--r--fs/reiserfs/journal.c2
-rw-r--r--fs/romfs/super.c2
-rw-r--r--fs/smb/client/cached_dir.c6
-rw-r--r--fs/smb/client/cifs_debug.c6
-rw-r--r--fs/smb/client/cifsfs.c14
-rw-r--r--fs/smb/client/cifsglob.h23
-rw-r--r--fs/smb/client/cifspdu.h4
-rw-r--r--fs/smb/client/cifsproto.h29
-rw-r--r--fs/smb/client/cifssmb.c6
-rw-r--r--fs/smb/client/connect.c180
-rw-r--r--fs/smb/client/dfs.c51
-rw-r--r--fs/smb/client/dfs.h33
-rw-r--r--fs/smb/client/dfs_cache.c53
-rw-r--r--fs/smb/client/dir.c22
-rw-r--r--fs/smb/client/file.c111
-rw-r--r--fs/smb/client/fs_context.c39
-rw-r--r--fs/smb/client/fs_context.h16
-rw-r--r--fs/smb/client/fscache.c36
-rw-r--r--fs/smb/client/fscache.h6
-rw-r--r--fs/smb/client/inode.c5
-rw-r--r--fs/smb/client/ioctl.c6
-rw-r--r--fs/smb/client/misc.c20
-rw-r--r--fs/smb/client/smb1ops.c4
-rw-r--r--fs/smb/client/smb2misc.c14
-rw-r--r--fs/smb/client/smb2ops.c114
-rw-r--r--fs/smb/client/smb2pdu.c21
-rw-r--r--fs/smb/client/smb2pdu.h2
-rw-r--r--fs/smb/client/smb2transport.c4
-rw-r--r--fs/smb/client/trace.h96
-rw-r--r--fs/smb/client/transport.c7
-rw-r--r--fs/smb/common/smb2pdu.h2
-rw-r--r--fs/smb/server/ksmbd_netlink.h38
-rw-r--r--fs/smb/server/mgmt/share_config.c7
-rw-r--r--fs/smb/server/server.c13
-rw-r--r--fs/smb/server/smb2ops.c10
-rw-r--r--fs/smb/server/smb2pdu.c18
-rw-r--r--fs/smb/server/transport_ipc.c37
-rw-r--r--fs/smb/server/vfs.c5
-rw-r--r--fs/squashfs/inode.c5
-rw-r--r--fs/squashfs/namei.c14
-rw-r--r--fs/super.c24
-rw-r--r--fs/sysfs/file.c2
-rw-r--r--fs/tracefs/event_inode.c14
-rw-r--r--fs/userfaultfd.c9
-rw-r--r--fs/vboxsf/file.c1
-rw-r--r--fs/vboxsf/super.c9
-rw-r--r--fs/vboxsf/utils.c3
-rw-r--r--fs/xfs/libxfs/xfs_sb.c40
-rw-r--r--fs/xfs/libxfs/xfs_sb.h5
-rw-r--r--fs/xfs/scrub/common.c4
-rw-r--r--fs/xfs/xfs_aops.c7
-rw-r--r--fs/xfs/xfs_buf.c2
-rw-r--r--fs/xfs/xfs_icache.c8
-rw-r--r--fs/xfs/xfs_inode.c15
-rw-r--r--fs/xfs/xfs_super.c6
-rw-r--r--fs/xfs/xfs_trans.h9
-rw-r--r--fs/zonefs/super.c2
-rw-r--r--include/acpi/acpi_bus.h8
-rw-r--r--include/acpi/platform/aclinuxex.h19
-rw-r--r--include/asm-generic/barrier.h8
-rw-r--r--include/asm-generic/bug.h5
-rw-r--r--include/asm-generic/codetag.lds.h14
-rw-r--r--include/asm-generic/export.h11
-rw-r--r--include/asm-generic/hyperv-tlfs.h19
-rw-r--r--include/asm-generic/io.h2
-rw-r--r--include/asm-generic/mshyperv.h14
-rw-r--r--include/asm-generic/pgalloc.h35
-rw-r--r--include/asm-generic/vmlinux.lds.h22
-rw-r--r--include/crypto/hash.h7
-rw-r--r--include/crypto/internal/acompress.h5
-rw-r--r--include/crypto/skcipher.h7
-rw-r--r--include/kvm/arm_pmu.h2
-rw-r--r--include/linux/alloc_tag.h216
-rw-r--r--include/linux/bitops.h12
-rw-r--r--include/linux/blkdev.h13
-rw-r--r--include/linux/bootconfig.h8
-rw-r--r--include/linux/bpf.h49
-rw-r--r--include/linux/bpfptr.h5
-rw-r--r--include/linux/buffer_head.h48
-rw-r--r--include/linux/cc_platform.h12
-rw-r--r--include/linux/clk.h5
-rw-r--r--include/linux/codetag.h81
-rw-r--r--include/linux/compiler.h2
-rw-r--r--include/linux/compiler_types.h11
-rw-r--r--include/linux/cpu.h11
-rw-r--r--include/linux/cpumask.h28
-rw-r--r--include/linux/damon.h2
-rw-r--r--include/linux/device.h1
-rw-r--r--include/linux/dma-fence-chain.h6
-rw-r--r--include/linux/dma-fence.h7
-rw-r--r--include/linux/dma-map-ops.h2
-rw-r--r--include/linux/energy_model.h1
-rw-r--r--include/linux/etherdevice.h25
-rw-r--r--include/linux/filter.h1
-rw-r--r--include/linux/firmware/qcom/qcom_qseecom.h55
-rw-r--r--include/linux/firmware/qcom/qcom_scm.h10
-rw-r--r--include/linux/fortify-string.h5
-rw-r--r--include/linux/fpu.h12
-rw-r--r--include/linux/framer/framer.h4
-rw-r--r--include/linux/fs.h8
-rw-r--r--include/linux/gfp.h126
-rw-r--r--include/linux/gfp_types.h13
-rw-r--r--include/linux/gpio/driver.h17
-rw-r--r--include/linux/gpio/property.h1
-rw-r--r--include/linux/hid_bpf.h6
-rw-r--r--include/linux/huge_mm.h129
-rw-r--r--include/linux/hugetlb.h95
-rw-r--r--include/linux/hyperv.h1
-rw-r--r--include/linux/iio/common/inv_sensors_timestamp.h3
-rw-r--r--include/linux/instrumented.h35
-rw-r--r--include/linux/interrupt.h3
-rw-r--r--include/linux/io.h1
-rw-r--r--include/linux/io_uring_types.h3
-rw-r--r--include/linux/irqflags.h2
-rw-r--r--include/linux/jbd2.h12
-rw-r--r--include/linux/kexec.h6
-rw-r--r--include/linux/kfifo.h9
-rw-r--r--include/linux/kmsan-checks.h15
-rw-r--r--include/linux/ksm.h35
-rw-r--r--include/linux/libata.h1
-rw-r--r--include/linux/memcontrol.h76
-rw-r--r--include/linux/memory-tiers.h13
-rw-r--r--include/linux/mempolicy.h5
-rw-r--r--include/linux/mempool.h73
-rw-r--r--include/linux/mm.h235
-rw-r--r--include/linux/mm_types.h35
-rw-r--r--include/linux/mman.h8
-rw-r--r--include/linux/mmap_lock.h10
-rw-r--r--include/linux/oid_registry.h4
-rw-r--r--include/linux/page-flags.h204
-rw-r--r--include/linux/page-isolation.h5
-rw-r--r--include/linux/page_ext.h5
-rw-r--r--include/linux/page_idle.h68
-rw-r--r--include/linux/page_ref.h11
-rw-r--r--include/linux/pageblock-flags.h8
-rw-r--r--include/linux/pagemap.h21
-rw-r--r--include/linux/pagevec.h4
-rw-r--r--include/linux/pds/pds_common.h2
-rw-r--r--include/linux/peci.h1
-rw-r--r--include/linux/percpu.h30
-rw-r--r--include/linux/pgalloc_tag.h132
-rw-r--r--include/linux/pgtable.h119
-rw-r--r--include/linux/profile.h5
-rw-r--r--include/linux/ptr_ring.h28
-rw-r--r--include/linux/randomize_kstack.h2
-rw-r--r--include/linux/regmap.h8
-rw-r--r--include/linux/regulator/consumer.h4
-rw-r--r--include/linux/rhashtable-types.h11
-rw-r--r--include/linux/rmap.h50
-rw-r--r--include/linux/rwbase_rt.h4
-rw-r--r--include/linux/rwsem.h6
-rw-r--r--include/linux/sched.h24
-rw-r--r--include/linux/sched/coredump.h5
-rw-r--r--include/linux/sched/mm.h22
-rw-r--r--include/linux/secretmem.h21
-rw-r--r--include/linux/shmem_fs.h9
-rw-r--r--include/linux/skb_array.h19
-rw-r--r--include/linux/skbuff.h27
-rw-r--r--include/linux/skmsg.h10
-rw-r--r--include/linux/slab.h184
-rw-r--r--include/linux/sockptr.h35
-rw-r--r--include/linux/stackdepot.h7
-rw-r--r--include/linux/string.h4
-rw-r--r--include/linux/sunrpc/svc_rdma.h13
-rw-r--r--include/linux/swap.h40
-rw-r--r--include/linux/swapops.h65
-rw-r--r--include/linux/syscalls.h1
-rw-r--r--include/linux/timecounter.h11
-rw-r--r--include/linux/timekeeping.h49
-rw-r--r--include/linux/timer.h12
-rw-r--r--include/linux/u64_stats_sync.h9
-rw-r--r--include/linux/udp.h30
-rw-r--r--include/linux/virtio.h7
-rw-r--r--include/linux/vmalloc.h60
-rw-r--r--include/linux/vmstat.h8
-rw-r--r--include/linux/writeback.h1
-rw-r--r--include/linux/xarray.h14
-rw-r--r--include/linux/zpool.h4
-rw-r--r--include/linux/zswap.h2
-rw-r--r--include/net/addrconf.h4
-rw-r--r--include/net/af_unix.h3
-rw-r--r--include/net/bluetooth/bluetooth.h9
-rw-r--r--include/net/bluetooth/hci.h9
-rw-r--r--include/net/bluetooth/hci_core.h8
-rw-r--r--include/net/cfg80211.h2
-rw-r--r--include/net/inet_connection_sock.h1
-rw-r--r--include/net/ip_tunnels.h33
-rw-r--r--include/net/mac80211.h3
-rw-r--r--include/net/macsec.h2
-rw-r--r--include/net/mana/mana.h1
-rw-r--r--include/net/netfilter/nf_flow_table.h12
-rw-r--r--include/net/netfilter/nf_tables.h14
-rw-r--r--include/net/netlabel.h16
-rw-r--r--include/net/netlink.h5
-rw-r--r--include/net/request_sock.h5
-rw-r--r--include/net/sch_generic.h1
-rw-r--r--include/net/sock.h45
-rw-r--r--include/net/tcx.h5
-rw-r--r--include/net/tls.h3
-rw-r--r--include/net/xdp_sock.h2
-rw-r--r--include/rdma/rdmavt_qp.h1
-rw-r--r--include/scsi/scsi_driver.h1
-rw-r--r--include/scsi/scsi_host.h1
-rw-r--r--include/sound/cs35l56.h2
-rw-r--r--include/sound/emu10k1.h7
-rw-r--r--include/sound/hdaudio_ext.h3
-rw-r--r--include/sound/intel-nhlt.h10
-rw-r--r--include/sound/tas2781-tlv.h2
-rw-r--r--include/trace/events/fs_dax.h16
-rw-r--r--include/trace/events/huge_memory.h12
-rw-r--r--include/trace/events/mmflags.h3
-rw-r--r--include/trace/events/page_ref.h4
-rw-r--r--include/trace/events/rpcgss.h4
-rw-r--r--include/uapi/asm-generic/unistd.h5
-rw-r--r--include/uapi/drm/etnaviv_drm.h5
-rw-r--r--include/uapi/linux/kfd_ioctl.h17
-rw-r--r--include/uapi/linux/vdpa.h6
-rw-r--r--include/uapi/linux/vhost.h15
-rw-r--r--include/uapi/scsi/scsi_bsg_mpi3mr.h2
-rw-r--r--include/ufs/ufshcd.h1
-rw-r--r--include/vdso/datapage.h8
-rw-r--r--init/Kconfig6
-rw-r--r--init/do_mounts_initrd.c1
-rw-r--r--init/initramfs.c4
-rw-r--r--init/main.c33
-rw-r--r--io_uring/io_uring.c59
-rw-r--r--io_uring/kbuf.c118
-rw-r--r--io_uring/kbuf.h8
-rw-r--r--io_uring/net.c1
-rw-r--r--io_uring/rw.c9
-rw-r--r--ipc/ipc_sysctl.c1
-rw-r--r--ipc/mq_sysctl.c1
-rw-r--r--kernel/bpf/Makefile2
-rw-r--r--kernel/bpf/arena.c27
-rw-r--r--kernel/bpf/bloom_filter.c13
-rw-r--r--kernel/bpf/core.c9
-rw-r--r--kernel/bpf/helpers.c2
-rw-r--r--kernel/bpf/memalloc.c6
-rw-r--r--kernel/bpf/syscall.c37
-rw-r--r--kernel/bpf/verifier.c63
-rw-r--r--kernel/configs/hardening.config11
-rw-r--r--kernel/cpu.c13
-rw-r--r--kernel/crash_core.c2
-rw-r--r--kernel/crash_reserve.c11
-rw-r--r--kernel/dma/mapping.c4
-rw-r--r--kernel/dma/swiotlb.c91
-rw-r--r--kernel/events/core.c4
-rw-r--r--kernel/fork.c35
-rw-r--r--kernel/irq/manage.c9
-rw-r--r--kernel/kallsyms.c6
-rw-r--r--kernel/kallsyms_internal.h30
-rw-r--r--kernel/kallsyms_selftest.c2
-rw-r--r--kernel/kcov.c3
-rw-r--r--kernel/kprobes.c18
-rw-r--r--kernel/ksysfs.c4
-rw-r--r--kernel/module/Kconfig5
-rw-r--r--kernel/module/main.c29
-rw-r--r--kernel/power/suspend.c6
-rw-r--r--kernel/printk/printk.c6
-rw-r--r--kernel/profile.c43
-rw-r--r--kernel/regset.c6
-rw-r--r--kernel/sched/fair.c34
-rw-r--r--kernel/sched/isolation.c18
-rw-r--r--kernel/sched/sched.h20
-rw-r--r--kernel/sys.c7
-rw-r--r--kernel/sys_ni.c1
-rw-r--r--kernel/time/posix-clock.c16
-rw-r--r--kernel/time/tick-common.c17
-rw-r--r--kernel/time/tick-sched.c54
-rw-r--r--kernel/time/tick-sched.h2
-rw-r--r--kernel/time/timer.c22
-rw-r--r--kernel/time/timer_migration.c32
-rw-r--r--kernel/trace/Kconfig2
-rw-r--r--kernel/trace/blktrace.c3
-rw-r--r--kernel/trace/bpf_trace.c10
-rw-r--r--kernel/trace/ring_buffer.c6
-rw-r--r--kernel/trace/trace_events.c4
-rw-r--r--kernel/trace/trace_probe.c2
-rw-r--r--kernel/vmcore_info.c8
-rw-r--r--lib/Kconfig.debug38
-rw-r--r--lib/Kconfig.kgdb1
-rw-r--r--lib/Makefile29
-rw-r--r--lib/alloc_tag.c246
-rw-r--r--lib/bootconfig.c22
-rwxr-xr-xlib/build_OID_registry5
-rw-r--r--lib/buildid.c4
-rw-r--r--lib/checksum_kunit.c5
-rw-r--r--lib/codetag.c283
-rw-r--r--lib/devres.c26
-rw-r--r--lib/kfifo.c8
-rw-r--r--lib/maple_tree.c16
-rw-r--r--lib/raid6/Makefile33
-rw-r--r--lib/rhashtable.c22
-rw-r--r--lib/scatterlist.c2
-rw-r--r--lib/stackdepot.c8
-rw-r--r--lib/test_fpu.h8
-rw-r--r--lib/test_fpu_glue.c (renamed from lib/test_fpu.c)37
-rw-r--r--lib/test_fpu_impl.c37
-rw-r--r--lib/test_hexdump.c2
-rw-r--r--lib/test_hmm.c8
-rw-r--r--lib/test_ubsan.c2
-rw-r--r--lib/test_xarray.c106
-rw-r--r--lib/ubsan.c18
-rw-r--r--lib/xarray.c52
-rw-r--r--mm/Kconfig8
-rw-r--r--mm/Makefile8
-rw-r--r--mm/backing-dev.c200
-rw-r--r--mm/cma.c4
-rw-r--r--mm/compaction.c8
-rw-r--r--mm/damon/paddr.c64
-rw-r--r--mm/damon/sysfs-schemes.c1
-rw-r--r--mm/debug.c25
-rw-r--r--mm/debug_page_alloc.c12
-rw-r--r--mm/debug_vm_pgtable.c1
-rw-r--r--mm/filemap.c134
-rw-r--r--mm/folio-compat.c12
-rw-r--r--mm/gup.c868
-rw-r--r--mm/hmm.c9
-rw-r--r--mm/huge_memory.c383
-rw-r--r--mm/hugetlb.c454
-rw-r--r--mm/hugetlb_cgroup.c2
-rw-r--r--mm/hugetlb_vmemmap.c1
-rw-r--r--mm/hwpoison-inject.c11
-rw-r--r--mm/internal.h250
-rw-r--r--mm/kasan/hw_tags.c1
-rw-r--r--mm/kfence/core.c14
-rw-r--r--mm/kfence/kfence.h4
-rw-r--r--mm/khugepaged.c335
-rw-r--r--mm/kmemleak.c6
-rw-r--r--mm/kmsan/hooks.c11
-rw-r--r--mm/ksm.c291
-rw-r--r--mm/madvise.c253
-rw-r--r--mm/memcontrol.c276
-rw-r--r--mm/memory-failure.c195
-rw-r--r--mm/memory-tiers.c123
-rw-r--r--mm/memory.c305
-rw-r--r--mm/memory_hotplug.c5
-rw-r--r--mm/mempolicy.c104
-rw-r--r--mm/mempool.c36
-rw-r--r--mm/memremap.c40
-rw-r--r--mm/migrate.c43
-rw-r--r--mm/migrate_device.c41
-rw-r--r--mm/mlock.c2
-rw-r--r--mm/mm_init.c216
-rw-r--r--mm/mmap.c260
-rw-r--r--mm/mprotect.c13
-rw-r--r--mm/mremap.c33
-rw-r--r--mm/mseal.c307
-rw-r--r--mm/nommu.c77
-rw-r--r--mm/oom_kill.c1
-rw-r--r--mm/page-writeback.c80
-rw-r--r--mm/page_alloc.c886
-rw-r--r--mm/page_ext.c15
-rw-r--r--mm/page_io.c3
-rw-r--r--mm/page_isolation.c121
-rw-r--r--mm/page_owner.c225
-rw-r--r--mm/page_table_check.c30
-rw-r--r--mm/page_vma_mapped.c22
-rw-r--r--mm/percpu-internal.h26
-rw-r--r--mm/percpu-vm.c4
-rw-r--r--mm/percpu.c118
-rw-r--r--mm/pgtable-generic.c2
-rw-r--r--mm/readahead.c12
-rw-r--r--mm/rmap.c93
-rw-r--r--mm/shmem.c19
-rw-r--r--mm/shmem_quota.c10
-rw-r--r--mm/show_mem.c26
-rw-r--r--mm/slab.h60
-rw-r--r--mm/slab_common.c6
-rw-r--r--mm/slub.c468
-rw-r--r--mm/sparse.c28
-rw-r--r--mm/swap.c64
-rw-r--r--mm/swap_slots.c8
-rw-r--r--mm/swap_state.c10
-rw-r--r--mm/swapfile.c366
-rw-r--r--mm/truncate.c36
-rw-r--r--mm/userfaultfd.c34
-rw-r--r--mm/util.c40
-rw-r--r--mm/vmalloc.c216
-rw-r--r--mm/vmscan.c77
-rw-r--r--mm/z3fold.c10
-rw-r--r--mm/zbud.c10
-rw-r--r--mm/zpool.c10
-rw-r--r--mm/zsmalloc.c6
-rw-r--r--mm/zswap.c451
-rw-r--r--net/9p/client.c10
-rw-r--r--net/9p/trans_fd.c1
-rw-r--r--net/ax25/af_ax25.c2
-rw-r--r--net/ax25/ax25_dev.c2
-rw-r--r--net/batman-adv/translation-table.c2
-rw-r--r--net/bluetooth/hci_conn.c6
-rw-r--r--net/bluetooth/hci_core.c6
-rw-r--r--net/bluetooth/hci_debugfs.c48
-rw-r--r--net/bluetooth/hci_event.c48
-rw-r--r--net/bluetooth/hci_request.c4
-rw-r--r--net/bluetooth/hci_sock.c21
-rw-r--r--net/bluetooth/hci_sync.c25
-rw-r--r--net/bluetooth/iso.c46
-rw-r--r--net/bluetooth/l2cap_core.c5
-rw-r--r--net/bluetooth/l2cap_sock.c59
-rw-r--r--net/bluetooth/mgmt.c24
-rw-r--r--net/bluetooth/rfcomm/sock.c14
-rw-r--r--net/bluetooth/sco.c30
-rw-r--r--net/bridge/br_input.c15
-rw-r--r--net/bridge/br_netfilter_hooks.c6
-rw-r--r--net/bridge/br_netlink.c2
-rw-r--r--net/bridge/br_private.h1
-rw-r--r--net/bridge/netfilter/ebtables.c6
-rw-r--r--net/bridge/netfilter/nf_conntrack_bridge.c14
-rw-r--r--net/core/dev.c8
-rw-r--r--net/core/filter.c42
-rw-r--r--net/core/gro.c3
-rw-r--r--net/core/skmsg.c5
-rw-r--r--net/core/sock.c4
-rw-r--r--net/core/sock_map.c6
-rw-r--r--net/ethernet/eth.c12
-rw-r--r--net/hsr/hsr_device.c13
-rw-r--r--net/hsr/hsr_slave.c3
-rw-r--r--net/ipv4/fib_frontend.c5
-rw-r--r--net/ipv4/icmp.c12
-rw-r--r--net/ipv4/inet_connection_sock.c44
-rw-r--r--net/ipv4/inet_fragment.c70
-rw-r--r--net/ipv4/ip_fragment.c2
-rw-r--r--net/ipv4/ip_gre.c5
-rw-r--r--net/ipv4/netfilter/Kconfig1
-rw-r--r--net/ipv4/netfilter/arp_tables.c8
-rw-r--r--net/ipv4/netfilter/ip_tables.c8
-rw-r--r--net/ipv4/nexthop.c4
-rw-r--r--net/ipv4/route.c7
-rw-r--r--net/ipv4/tcp.c2
-rw-r--r--net/ipv4/tcp_ao.c3
-rw-r--r--net/ipv4/udp.c12
-rw-r--r--net/ipv4/udp_offload.c23
-rw-r--r--net/ipv6/addrconf.c12
-rw-r--r--net/ipv6/ip6_fib.c21
-rw-r--r--net/ipv6/ip6_gre.c3
-rw-r--r--net/ipv6/netfilter/ip6_tables.c8
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c2
-rw-r--r--net/ipv6/udp.c7
-rw-r--r--net/ipv6/udp_offload.c8
-rw-r--r--net/l2tp/l2tp_eth.c3
-rw-r--r--net/mac80211/cfg.c5
-rw-r--r--net/mac80211/chan.c27
-rw-r--r--net/mac80211/debug.h2
-rw-r--r--net/mac80211/ieee80211_i.h4
-rw-r--r--net/mac80211/mesh.c8
-rw-r--r--net/mac80211/mesh.h36
-rw-r--r--net/mac80211/mesh_pathtbl.c31
-rw-r--r--net/mac80211/mlme.c46
-rw-r--r--net/mac80211/rate.c6
-rw-r--r--net/mac80211/rx.c17
-rw-r--r--net/mac80211/scan.c1
-rw-r--r--net/mac80211/tx.c13
-rw-r--r--net/mptcp/protocol.c2
-rw-r--r--net/mptcp/sockopt.c4
-rw-r--r--net/mptcp/subflow.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_sctp.c6
-rw-r--r--net/netfilter/nf_flow_table_inet.c3
-rw-r--r--net/netfilter/nf_flow_table_ip.c10
-rw-r--r--net/netfilter/nf_tables_api.c174
-rw-r--r--net/netfilter/nft_chain_filter.c4
-rw-r--r--net/netfilter/nft_lookup.c1
-rw-r--r--net/netfilter/nft_set_bitmap.c4
-rw-r--r--net/netfilter/nft_set_hash.c8
-rw-r--r--net/netfilter/nft_set_pipapo.c25
-rw-r--r--net/netfilter/nft_set_rbtree.c4
-rw-r--r--net/nfc/llcp_sock.c12
-rw-r--r--net/nfc/nci/core.c5
-rw-r--r--net/nsh/nsh.c14
-rw-r--r--net/openvswitch/conntrack.c9
-rw-r--r--net/rds/rdma.c2
-rw-r--r--net/sched/act_skbmod.c10
-rw-r--r--net/sched/sch_api.c2
-rw-r--r--net/sched/sch_generic.c1
-rw-r--r--net/sunrpc/auth_gss/auth_gss_internal.h6
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_crypto.c14
-rw-r--r--net/sunrpc/svcsock.c10
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_rw.c86
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c5
-rw-r--r--net/tls/tls.h2
-rw-r--r--net/tls/tls_strp.c6
-rw-r--r--net/tls/tls_sw.c7
-rw-r--r--net/unix/af_unix.c16
-rw-r--r--net/unix/garbage.c18
-rw-r--r--net/vmw_vsock/virtio_transport.c3
-rw-r--r--net/wireless/nl80211.c2
-rw-r--r--net/wireless/trace.h6
-rw-r--r--net/wireless/wext-core.c7
-rw-r--r--net/xdp/xsk.c2
-rw-r--r--net/xfrm/xfrm_policy.c2
-rw-r--r--rust/Makefile1
-rw-r--r--rust/helpers.c8
-rw-r--r--rust/kernel/init.rs11
-rw-r--r--rust/kernel/lib.rs2
-rw-r--r--rust/kernel/net/phy.rs4
-rw-r--r--rust/macros/lib.rs12
-rw-r--r--rust/macros/module.rs190
-rw-r--r--samples/kfifo/dma-example.c3
-rw-r--r--scripts/Makefile.build2
-rw-r--r--scripts/Makefile.extrawarn39
-rw-r--r--scripts/Makefile.lib18
-rw-r--r--scripts/Makefile.modfinal2
-rwxr-xr-xscripts/bpf_doc.py4
-rw-r--r--scripts/gcc-plugins/stackleak_plugin.c2
-rw-r--r--scripts/gdb/linux/cpus.py11
-rw-r--r--scripts/gdb/linux/tasks.py2
-rw-r--r--scripts/gdb/linux/utils.py2
-rw-r--r--scripts/kallsyms.c13
-rw-r--r--scripts/kconfig/conf.c10
-rw-r--r--scripts/kconfig/confdata.c35
-rw-r--r--scripts/kconfig/expr.h5
-rw-r--r--scripts/kconfig/gconf.c4
-rw-r--r--scripts/kconfig/lexer.l1
-rw-r--r--scripts/kconfig/lkc.h15
-rw-r--r--scripts/kconfig/lxdialog/checklist.c2
-rw-r--r--scripts/kconfig/lxdialog/dialog.h12
-rw-r--r--scripts/kconfig/lxdialog/inputbox.c2
-rw-r--r--scripts/kconfig/lxdialog/menubox.c2
-rw-r--r--scripts/kconfig/lxdialog/textbox.c2
-rw-r--r--scripts/kconfig/lxdialog/util.c2
-rw-r--r--scripts/kconfig/lxdialog/yesno.c2
-rw-r--r--scripts/kconfig/mconf.c4
-rw-r--r--scripts/kconfig/menu.c55
-rw-r--r--scripts/kconfig/parser.y26
-rw-r--r--scripts/kconfig/symbol.c2
-rw-r--r--scripts/kconfig/tests/choice/Kconfig26
-rw-r--r--scripts/kconfig/tests/choice/__init__.py2
-rw-r--r--scripts/kconfig/tests/choice/allmod_expected_config4
-rw-r--r--scripts/kconfig/tests/choice/allyes_expected_config4
-rw-r--r--scripts/kconfig/tests/choice/oldask0_expected_stdout2
-rw-r--r--scripts/kconfig/tests/choice/oldask1_config1
-rw-r--r--scripts/kconfig/tests/choice/oldask1_expected_stdout6
-rwxr-xr-xscripts/kernel-doc3
-rwxr-xr-xscripts/make_fit.py290
-rw-r--r--scripts/mod/modpost.c7
-rw-r--r--scripts/module.lds.S7
-rwxr-xr-xscripts/package/buildtar24
-rw-r--r--scripts/unifdef.c12
-rw-r--r--security/Kconfig.hardening15
-rw-r--r--security/security.c4
-rw-r--r--security/selinux/selinuxfs.c12
-rw-r--r--sound/aoa/soundbus/i2sbus/core.c2
-rw-r--r--sound/core/seq/seq_ump_convert.c2
-rw-r--r--sound/hda/intel-nhlt.c26
-rw-r--r--sound/oss/dmasound/dmasound_paula.c8
-rw-r--r--sound/pci/emu10k1/emu10k1.c3
-rw-r--r--sound/pci/emu10k1/emu10k1_callback.c7
-rw-r--r--sound/pci/emu10k1/emu10k1_main.c158
-rw-r--r--sound/pci/emu10k1/emumixer.c18
-rw-r--r--sound/pci/emu10k1/emuproc.c9
-rw-r--r--sound/pci/emu10k1/io.c49
-rw-r--r--sound/pci/hda/cs35l41_hda.c1
-rw-r--r--sound/pci/hda/cs35l41_hda_property.c6
-rw-r--r--sound/pci/hda/cs35l56_hda.c12
-rw-r--r--sound/pci/hda/cs35l56_hda_i2c.c13
-rw-r--r--sound/pci/hda/cs35l56_hda_spi.c13
-rw-r--r--sound/pci/hda/patch_realtek.c105
-rw-r--r--sound/pci/hda/tas2781_hda_i2c.c122
-rw-r--r--sound/sh/aica.c17
-rw-r--r--sound/soc/amd/acp/acp-pci.c13
-rw-r--r--sound/soc/amd/yc/acp6x-mach.c7
-rw-r--r--sound/soc/codecs/cs-amp-lib.c5
-rw-r--r--sound/soc/codecs/cs35l41.c26
-rw-r--r--sound/soc/codecs/cs35l56-sdw.c2
-rw-r--r--sound/soc/codecs/cs35l56-shared.c85
-rw-r--r--sound/soc/codecs/cs35l56.c26
-rw-r--r--sound/soc/codecs/cs42l43.c12
-rw-r--r--sound/soc/codecs/es8326.c37
-rw-r--r--sound/soc/codecs/es8326.h2
-rw-r--r--sound/soc/codecs/rt1316-sdw.c8
-rw-r--r--sound/soc/codecs/rt1318-sdw.c8
-rw-r--r--sound/soc/codecs/rt5645.c25
-rw-r--r--sound/soc/codecs/rt5682-sdw.c16
-rw-r--r--sound/soc/codecs/rt700.c16
-rw-r--r--sound/soc/codecs/rt711-sdca-sdw.c6
-rw-r--r--sound/soc/codecs/rt711-sdca.c18
-rw-r--r--sound/soc/codecs/rt711-sdw.c8
-rw-r--r--sound/soc/codecs/rt711.c16
-rw-r--r--sound/soc/codecs/rt712-sdca-dmic.c24
-rw-r--r--sound/soc/codecs/rt712-sdca-sdw.c7
-rw-r--r--sound/soc/codecs/rt712-sdca.c20
-rw-r--r--sound/soc/codecs/rt715-sdca-sdw.c2
-rw-r--r--sound/soc/codecs/rt715-sdca.c54
-rw-r--r--sound/soc/codecs/rt715-sdw.c5
-rw-r--r--sound/soc/codecs/rt715.c24
-rw-r--r--sound/soc/codecs/rt722-sdca-sdw.c4
-rw-r--r--sound/soc/codecs/rt722-sdca.c48
-rw-r--r--sound/soc/codecs/rt722-sdca.h3
-rw-r--r--sound/soc/codecs/wm_adsp.c3
-rw-r--r--sound/soc/codecs/wsa881x.c1
-rw-r--r--sound/soc/intel/avs/boards/da7219.c1
-rw-r--r--sound/soc/intel/avs/boards/dmic.c1
-rw-r--r--sound/soc/intel/avs/boards/es8336.c1
-rw-r--r--sound/soc/intel/avs/boards/i2s_test.c1
-rw-r--r--sound/soc/intel/avs/boards/max98357a.c1
-rw-r--r--sound/soc/intel/avs/boards/max98373.c1
-rw-r--r--sound/soc/intel/avs/boards/max98927.c1
-rw-r--r--sound/soc/intel/avs/boards/nau8825.c1
-rw-r--r--sound/soc/intel/avs/boards/probe.c1
-rw-r--r--sound/soc/intel/avs/boards/rt274.c1
-rw-r--r--sound/soc/intel/avs/boards/rt286.c1
-rw-r--r--sound/soc/intel/avs/boards/rt298.c1
-rw-r--r--sound/soc/intel/avs/boards/rt5514.c1
-rw-r--r--sound/soc/intel/avs/boards/rt5663.c1
-rw-r--r--sound/soc/intel/avs/boards/rt5682.c1
-rw-r--r--sound/soc/intel/avs/boards/ssm4567.c1
-rw-r--r--sound/soc/intel/avs/icl.c2
-rw-r--r--sound/soc/intel/avs/topology.c2
-rw-r--r--sound/soc/intel/boards/bytcr_rt5640.c14
-rw-r--r--sound/soc/soc-ops.c2
-rw-r--r--sound/soc/sof/amd/acp.c8
-rw-r--r--sound/soc/sof/core.c18
-rw-r--r--sound/soc/sof/debug.c18
-rw-r--r--sound/soc/sof/intel/hda-common-ops.c3
-rw-r--r--sound/soc/sof/intel/hda-dai-ops.c11
-rw-r--r--sound/soc/sof/intel/hda-dsp.c20
-rw-r--r--sound/soc/sof/intel/hda-pcm.c29
-rw-r--r--sound/soc/sof/intel/hda-stream.c70
-rw-r--r--sound/soc/sof/intel/hda.h6
-rw-r--r--sound/soc/sof/intel/lnl.c34
-rw-r--r--sound/soc/sof/intel/mtl.c14
-rw-r--r--sound/soc/sof/intel/mtl.h10
-rw-r--r--sound/soc/sof/intel/pci-lnl.c3
-rw-r--r--sound/soc/sof/ipc3-pcm.c1
-rw-r--r--sound/soc/sof/ipc4-mtrace.c11
-rw-r--r--sound/soc/sof/ipc4-pcm.c298
-rw-r--r--sound/soc/sof/ipc4-priv.h14
-rw-r--r--sound/soc/sof/ipc4-topology.c41
-rw-r--r--sound/soc/sof/ops.h24
-rw-r--r--sound/soc/sof/pcm.c21
-rw-r--r--sound/soc/sof/sof-audio.h11
-rw-r--r--sound/soc/sof/sof-priv.h24
-rw-r--r--sound/soc/tegra/tegra186_dspk.c7
-rw-r--r--sound/soc/ti/davinci-mcasp.c12
-rw-r--r--sound/usb/line6/driver.c6
-rw-r--r--tools/Makefile13
-rw-r--r--tools/arch/arm64/include/asm/cputype.h4
-rw-r--r--tools/arch/arm64/include/uapi/asm/kvm.h15
-rw-r--r--tools/arch/powerpc/include/uapi/asm/kvm.h45
-rw-r--r--tools/arch/s390/include/uapi/asm/kvm.h315
-rw-r--r--tools/arch/x86/include/asm/cpufeatures.h22
-rw-r--r--tools/arch/x86/include/asm/disabled-features.h11
-rw-r--r--tools/arch/x86/include/asm/msr-index.h83
-rw-r--r--tools/arch/x86/include/asm/required-features.h3
-rw-r--r--tools/arch/x86/include/uapi/asm/kvm.h308
-rw-r--r--tools/bpf/bpftool/gen.c2
-rw-r--r--tools/cgroup/memcg_slabinfo.py5
-rw-r--r--tools/hv/hv_kvp_daemon.c227
-rw-r--r--tools/include/asm-generic/bitops/__fls.h8
-rw-r--r--tools/include/asm-generic/bitops/fls.h8
-rw-r--r--tools/include/linux/bits.h8
-rw-r--r--tools/include/linux/btf_ids.h2
-rw-r--r--tools/include/linux/kernel.h1
-rw-r--r--tools/include/linux/mm.h5
-rw-r--r--tools/include/linux/panic.h19
-rw-r--r--tools/include/linux/rbtree_augmented.h4
-rw-r--r--tools/include/uapi/asm-generic/bitsperlong.h4
-rw-r--r--tools/include/uapi/asm-generic/fcntl.h221
-rw-r--r--tools/include/uapi/drm/i915_drm.h16
-rw-r--r--tools/include/uapi/linux/bits.h15
-rw-r--r--tools/include/uapi/linux/kvm.h689
-rw-r--r--tools/include/uapi/linux/memfd.h39
-rw-r--r--tools/include/uapi/linux/openat2.h43
-rw-r--r--tools/include/uapi/linux/userfaultfd.h386
-rw-r--r--tools/lib/bpf/libbpf.c10
-rw-r--r--tools/lib/perf/cpumap.c33
-rw-r--r--tools/lib/perf/include/perf/cpumap.h16
-rw-r--r--tools/lib/perf/libperf.map4
-rw-r--r--tools/lib/perf/mmap.c2
-rw-r--r--tools/lib/rbtree.c2
-rw-r--r--tools/lib/subcmd/run-command.c70
-rw-r--r--tools/lib/subcmd/run-command.h3
-rw-r--r--tools/net/ynl/lib/ynl.py1
-rwxr-xr-xtools/net/ynl/ynl-gen-c.py7
-rw-r--r--tools/objtool/check.c2
-rw-r--r--tools/perf/Build14
-rw-r--r--tools/perf/Documentation/perf-arm-spe.txt12
-rw-r--r--tools/perf/Documentation/perf-list.txt1
-rw-r--r--tools/perf/Documentation/perf-report.txt9
-rw-r--r--tools/perf/Documentation/perf-script.txt7
-rw-r--r--tools/perf/Documentation/perf-test.txt13
-rw-r--r--tools/perf/Makefile.config25
-rw-r--r--tools/perf/Makefile.perf100
-rw-r--r--tools/perf/arch/arm/util/cs-etm.c114
-rw-r--r--tools/perf/arch/arm64/util/arm-spe.c4
-rw-r--r--tools/perf/arch/arm64/util/header.c13
-rw-r--r--tools/perf/arch/riscv/util/header.c2
-rw-r--r--tools/perf/arch/x86/Build14
-rw-r--r--tools/perf/arch/x86/tests/Build14
-rwxr-xr-xtools/perf/arch/x86/tests/gen-insn-x86-dat.sh2
-rw-r--r--tools/perf/arch/x86/util/intel-bts.c4
-rw-r--r--tools/perf/arch/x86/util/intel-pt.c10
-rw-r--r--tools/perf/bench/bench.h2
-rw-r--r--tools/perf/bench/uprobe.c22
-rw-r--r--tools/perf/builtin-annotate.c122
-rw-r--r--tools/perf/builtin-bench.c2
-rw-r--r--tools/perf/builtin-c2c.c8
-rw-r--r--tools/perf/builtin-inject.c27
-rw-r--r--tools/perf/builtin-kmem.c2
-rw-r--r--tools/perf/builtin-list.c24
-rw-r--r--tools/perf/builtin-record.c14
-rw-r--r--tools/perf/builtin-report.c7
-rw-r--r--tools/perf/builtin-sched.c11
-rw-r--r--tools/perf/builtin-script.c66
-rw-r--r--tools/perf/builtin-stat.c36
-rw-r--r--tools/perf/builtin-trace.c28
-rw-r--r--tools/perf/builtin.h4
-rwxr-xr-xtools/perf/check-headers.sh23
-rwxr-xr-xtools/perf/perf-archive.sh2
-rw-r--r--tools/perf/perf-completion.sh23
-rw-r--r--tools/perf/perf.c23
-rw-r--r--tools/perf/pmu-events/arch/arm64/ampere/ampereone/cache.json4
-rw-r--r--tools/perf/pmu-events/arch/arm64/ampere/ampereonex/cache.json4
-rw-r--r--tools/perf/pmu-events/arch/s390/cf_z16/transaction.json28
-rw-r--r--tools/perf/pmu-events/arch/s390/mapfile.csv2
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json35
-rw-r--r--tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json85
-rw-r--r--tools/perf/pmu-events/arch/x86/cascadelakex/frontend.json10
-rw-r--r--tools/perf/pmu-events/arch/x86/cascadelakex/memory.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/cascadelakex/other.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/cascadelakex/pipeline.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/cascadelakex/uncore-interconnect.json14
-rw-r--r--tools/perf/pmu-events/arch/x86/cascadelakex/virtual-memory.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/emeraldrapids/frontend.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/emeraldrapids/memory.json1
-rw-r--r--tools/perf/pmu-events/arch/x86/emeraldrapids/pipeline.json3
-rw-r--r--tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-cache.json112
-rw-r--r--tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-interconnect.json26
-rw-r--r--tools/perf/pmu-events/arch/x86/grandridge/pipeline.json43
-rw-r--r--tools/perf/pmu-events/arch/x86/grandridge/uncore-cache.json28
-rw-r--r--tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json35
-rw-r--r--tools/perf/pmu-events/arch/x86/icelakex/frontend.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json95
-rw-r--r--tools/perf/pmu-events/arch/x86/icelakex/memory.json1
-rw-r--r--tools/perf/pmu-events/arch/x86/icelakex/uncore-cache.json22
-rw-r--r--tools/perf/pmu-events/arch/x86/icelakex/uncore-interconnect.json64
-rw-r--r--tools/perf/pmu-events/arch/x86/icelakex/uncore-io.json11
-rw-r--r--tools/perf/pmu-events/arch/x86/lunarlake/cache.json24
-rw-r--r--tools/perf/pmu-events/arch/x86/lunarlake/frontend.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/lunarlake/memory.json4
-rw-r--r--tools/perf/pmu-events/arch/x86/lunarlake/other.json4
-rw-r--r--tools/perf/pmu-events/arch/x86/lunarlake/pipeline.json109
-rw-r--r--tools/perf/pmu-events/arch/x86/mapfile.csv20
-rw-r--r--tools/perf/pmu-events/arch/x86/meteorlake/cache.json30
-rw-r--r--tools/perf/pmu-events/arch/x86/meteorlake/frontend.json4
-rw-r--r--tools/perf/pmu-events/arch/x86/meteorlake/memory.json20
-rw-r--r--tools/perf/pmu-events/arch/x86/meteorlake/other.json42
-rw-r--r--tools/perf/pmu-events/arch/x86/meteorlake/pipeline.json44
-rw-r--r--tools/perf/pmu-events/arch/x86/meteorlake/uncore-interconnect.json22
-rw-r--r--tools/perf/pmu-events/arch/x86/sapphirerapids/cache.json1
-rw-r--r--tools/perf/pmu-events/arch/x86/sapphirerapids/frontend.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/sapphirerapids/memory.json1
-rw-r--r--tools/perf/pmu-events/arch/x86/sapphirerapids/pipeline.json19
-rw-r--r--tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json119
-rw-r--r--tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-cache.json112
-rw-r--r--tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-interconnect.json26
-rw-r--r--tools/perf/pmu-events/arch/x86/sierraforest/pipeline.json36
-rw-r--r--tools/perf/pmu-events/arch/x86/skylake/frontend.json10
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/cache.json9
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/frontend.json10
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/memory.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/other.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/pipeline.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json85
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/uncore-interconnect.json14
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/uncore-io.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/virtual-memory.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/snowridgex/uncore-cache.json4
-rw-r--r--tools/perf/pmu-events/arch/x86/snowridgex/uncore-interconnect.json6
-rw-r--r--tools/perf/pmu-events/arch/x86/snowridgex/uncore-io.json11
-rwxr-xr-xtools/perf/scripts/python/parallel-perf.py988
-rw-r--r--tools/perf/tests/bitmap.c13
-rw-r--r--tools/perf/tests/builtin-test.c62
-rw-r--r--tools/perf/tests/code-reading.c10
-rw-r--r--tools/perf/tests/config-fragments/config3
-rw-r--r--tools/perf/tests/evsel-roundtrip-name.c4
-rw-r--r--tools/perf/tests/parse-events.c45
-rwxr-xr-xtools/perf/tests/shell/annotate.sh83
-rwxr-xr-xtools/perf/tests/shell/base_probe/test_adding_kernel.sh1
-rw-r--r--tools/perf/tests/shell/lib/stat_output.sh2
-rwxr-xr-xtools/perf/tests/shell/script.sh26
-rwxr-xr-xtools/perf/tests/shell/stat+json_output.sh2
-rwxr-xr-xtools/perf/tests/shell/stat_bpf_counters.sh75
-rwxr-xr-xtools/perf/tests/shell/test_arm_callgraph_fp.sh4
-rwxr-xr-xtools/perf/tests/shell/test_arm_coresight.sh2
-rw-r--r--tools/perf/tests/topology.c46
-rw-r--r--tools/perf/tests/workloads/datasym.c16
-rw-r--r--tools/perf/trace/beauty/Build15
-rw-r--r--tools/perf/trace/beauty/arch/x86/include/asm/irq_vectors.h (renamed from tools/arch/x86/include/asm/irq_vectors.h)2
-rw-r--r--tools/perf/trace/beauty/arch/x86/include/uapi/asm/prctl.h (renamed from tools/arch/x86/include/uapi/asm/prctl.h)0
-rwxr-xr-xtools/perf/trace/beauty/arch_errno_names.sh8
-rw-r--r--tools/perf/trace/beauty/beauty.h7
-rw-r--r--tools/perf/trace/beauty/clone.c46
-rwxr-xr-xtools/perf/trace/beauty/clone.sh17
-rw-r--r--tools/perf/trace/beauty/fcntl.c2
-rw-r--r--tools/perf/trace/beauty/flock.c2
-rw-r--r--tools/perf/trace/beauty/fs_at_flags.c58
-rwxr-xr-xtools/perf/trace/beauty/fs_at_flags.sh21
-rwxr-xr-xtools/perf/trace/beauty/fsconfig.sh6
-rw-r--r--tools/perf/trace/beauty/fsmount.c9
-rwxr-xr-xtools/perf/trace/beauty/fsmount.sh6
-rwxr-xr-xtools/perf/trace/beauty/fspick.sh6
-rw-r--r--tools/perf/trace/beauty/include/uapi/linux/fcntl.h (renamed from tools/include/uapi/linux/fcntl.h)0
-rw-r--r--tools/perf/trace/beauty/include/uapi/linux/fs.h (renamed from tools/include/uapi/linux/fs.h)30
-rw-r--r--tools/perf/trace/beauty/include/uapi/linux/mount.h (renamed from tools/include/uapi/linux/mount.h)0
-rw-r--r--tools/perf/trace/beauty/include/uapi/linux/prctl.h (renamed from tools/include/uapi/linux/prctl.h)0
-rw-r--r--tools/perf/trace/beauty/include/uapi/linux/sched.h (renamed from tools/include/uapi/linux/sched.h)0
-rw-r--r--tools/perf/trace/beauty/include/uapi/linux/stat.h195
-rw-r--r--tools/perf/trace/beauty/include/uapi/linux/usbdevice_fs.h (renamed from tools/include/uapi/linux/usbdevice_fs.h)0
-rw-r--r--tools/perf/trace/beauty/include/uapi/linux/vhost.h (renamed from tools/include/uapi/linux/vhost.h)20
-rw-r--r--tools/perf/trace/beauty/include/uapi/sound/asound.h (renamed from tools/include/uapi/sound/asound.h)4
-rwxr-xr-xtools/perf/trace/beauty/mount_flags.sh6
-rwxr-xr-xtools/perf/trace/beauty/move_mount_flags.sh6
-rw-r--r--tools/perf/trace/beauty/prctl.c2
-rwxr-xr-xtools/perf/trace/beauty/prctl_option.sh6
-rwxr-xr-xtools/perf/trace/beauty/rename_flags.sh2
-rwxr-xr-xtools/perf/trace/beauty/sndrv_ctl_ioctl.sh4
-rwxr-xr-xtools/perf/trace/beauty/sndrv_pcm_ioctl.sh4
-rw-r--r--tools/perf/trace/beauty/statx.c67
-rwxr-xr-xtools/perf/trace/beauty/statx_mask.sh23
-rw-r--r--tools/perf/trace/beauty/sync_file_range.c11
-rwxr-xr-xtools/perf/trace/beauty/sync_file_range.sh2
-rwxr-xr-xtools/perf/trace/beauty/tracepoints/x86_irq_vectors.sh6
-rwxr-xr-xtools/perf/trace/beauty/usbdevfs_ioctl.sh6
-rwxr-xr-xtools/perf/trace/beauty/vhost_virtio_ioctl.sh6
-rwxr-xr-xtools/perf/trace/beauty/x86_arch_prctl.sh4
-rw-r--r--tools/perf/ui/browsers/Build1
-rw-r--r--tools/perf/ui/browsers/annotate-data.c312
-rw-r--r--tools/perf/ui/browsers/annotate.c17
-rw-r--r--tools/perf/ui/browsers/hists.c31
-rw-r--r--tools/perf/ui/hist.c92
-rw-r--r--tools/perf/util/Build15
-rw-r--r--tools/perf/util/annotate-data.c1527
-rw-r--r--tools/perf/util/annotate-data.h74
-rw-r--r--tools/perf/util/annotate.c2207
-rw-r--r--tools/perf/util/annotate.h129
-rw-r--r--tools/perf/util/auxtrace.c8
-rw-r--r--tools/perf/util/bpf_kwork.c16
-rw-r--r--tools/perf/util/bpf_kwork_top.c12
-rw-r--r--tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c21
-rw-r--r--tools/perf/util/bpf_skel/bench_uprobe.bpf.c16
-rw-r--r--tools/perf/util/bpf_skel/lock_contention.bpf.c5
-rw-r--r--tools/perf/util/build-id.c114
-rw-r--r--tools/perf/util/build-id.h2
-rw-r--r--tools/perf/util/cpumap.c12
-rw-r--r--tools/perf/util/debug.c3
-rw-r--r--tools/perf/util/debug.h1
-rw-r--r--tools/perf/util/disasm.c1837
-rw-r--r--tools/perf/util/disasm.h112
-rw-r--r--tools/perf/util/dso.c63
-rw-r--r--tools/perf/util/dso.h100
-rw-r--r--tools/perf/util/dsos.c327
-rw-r--r--tools/perf/util/dsos.h24
-rw-r--r--tools/perf/util/dump-insn.h1
-rw-r--r--tools/perf/util/dwarf-aux.c320
-rw-r--r--tools/perf/util/dwarf-aux.h33
-rw-r--r--tools/perf/util/genelf.h3
-rw-r--r--tools/perf/util/help-unknown-cmd.c51
-rw-r--r--tools/perf/util/hist.c12
-rw-r--r--tools/perf/util/hist.h209
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-decoder.c2
-rw-r--r--tools/perf/util/intel-pt.c2
-rw-r--r--tools/perf/util/machine.c168
-rw-r--r--tools/perf/util/machine.h4
-rw-r--r--tools/perf/util/map.c21
-rw-r--r--tools/perf/util/map.h3
-rw-r--r--tools/perf/util/metricgroup.c7
-rw-r--r--tools/perf/util/metricgroup.h1
-rw-r--r--tools/perf/util/parse-events.c482
-rw-r--r--tools/perf/util/parse-events.h49
-rw-r--r--tools/perf/util/parse-events.l196
-rw-r--r--tools/perf/util/parse-events.y261
-rw-r--r--tools/perf/util/perf_event_attr_fprintf.c26
-rw-r--r--tools/perf/util/pmu.c110
-rw-r--r--tools/perf/util/pmu.h6
-rw-r--r--tools/perf/util/pmus.c94
-rw-r--r--tools/perf/util/pmus.h1
-rw-r--r--tools/perf/util/print-events.c55
-rw-r--r--tools/perf/util/print_insn.c75
-rw-r--r--tools/perf/util/print_insn.h8
-rw-r--r--tools/perf/util/probe-event.c7
-rw-r--r--tools/perf/util/python.c10
-rw-r--r--tools/perf/util/record.c2
-rw-r--r--tools/perf/util/scripting-engines/trace-event-python.c12
-rw-r--r--tools/perf/util/session.c26
-rw-r--r--tools/perf/util/session.h2
-rw-r--r--tools/perf/util/sort.c28
-rw-r--r--tools/perf/util/sort.h190
-rw-r--r--tools/perf/util/stat.c2
-rw-r--r--tools/perf/util/stat.h1
-rw-r--r--tools/perf/util/svghelper.c20
-rw-r--r--tools/perf/util/values.h1
-rw-r--r--tools/perf/util/vdso.c48
-rw-r--r--tools/power/x86/turbostat/turbostat.818
-rw-r--r--tools/power/x86/turbostat/turbostat.c2017
-rw-r--r--tools/testing/cxl/test/cxl.c10
-rw-r--r--tools/testing/kunit/configs/all_tests.config3
-rw-r--r--tools/testing/radix-tree/linux/kernel.h2
-rw-r--r--tools/testing/selftests/bpf/bpf_arena_common.h2
-rw-r--r--tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c3
-rw-r--r--tools/testing/selftests/bpf/prog_tests/arena_htab.c8
-rw-r--r--tools/testing/selftests/bpf/prog_tests/arena_list.c7
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/verifier.c2
-rw-r--r--tools/testing/selftests/bpf/progs/arena_htab.c2
-rw-r--r--tools/testing/selftests/bpf/progs/arena_list.c10
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_arena.c10
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_arena_large.c68
-rw-r--r--tools/testing/selftests/dmabuf-heaps/config3
-rw-r--r--tools/testing/selftests/drivers/net/netdevsim/settings1
-rw-r--r--tools/testing/selftests/exec/Makefile10
-rwxr-xr-xtools/testing/selftests/exec/binfmt_script.py10
-rw-r--r--tools/testing/selftests/exec/execveat.c12
-rw-r--r--tools/testing/selftests/exec/load_address.c34
-rw-r--r--tools/testing/selftests/exec/recursion-depth.c53
-rw-r--r--tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc6
-rw-r--r--tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc2
-rw-r--r--tools/testing/selftests/iommu/config2
-rw-r--r--tools/testing/selftests/kselftest.h43
-rw-r--r--tools/testing/selftests/kselftest_harness.h19
-rw-r--r--tools/testing/selftests/kvm/aarch64/arch_timer.c4
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/processor.h11
-rw-r--r--tools/testing/selftests/kvm/max_guest_memory_test.c15
-rw-r--r--tools/testing/selftests/kvm/riscv/arch_timer.c2
-rw-r--r--tools/testing/selftests/kvm/set_memory_region_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86_64/kvm_pv_test.c39
-rw-r--r--tools/testing/selftests/kvm/x86_64/pmu_counters_test.c20
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c60
-rw-r--r--tools/testing/selftests/lib.mk9
-rw-r--r--tools/testing/selftests/mm/.gitignore2
-rw-r--r--tools/testing/selftests/mm/Makefile4
-rw-r--r--tools/testing/selftests/mm/gup_test.c2
-rw-r--r--tools/testing/selftests/mm/ksm_functional_tests.c135
-rw-r--r--tools/testing/selftests/mm/mdwe_test.c1
-rw-r--r--tools/testing/selftests/mm/memfd_secret.c51
-rw-r--r--tools/testing/selftests/mm/mlock2-tests.c15
-rw-r--r--tools/testing/selftests/mm/mremap_test.c204
-rw-r--r--tools/testing/selftests/mm/mseal_test.c1893
-rw-r--r--tools/testing/selftests/mm/protection_keys.c34
-rwxr-xr-xtools/testing/selftests/mm/run_vmtests.sh15
-rw-r--r--tools/testing/selftests/mm/seal_elf.c179
-rw-r--r--tools/testing/selftests/mm/soft-dirty.c4
-rw-r--r--tools/testing/selftests/mm/split_huge_page_test.c4
-rw-r--r--tools/testing/selftests/mm/uffd-common.c3
-rw-r--r--tools/testing/selftests/mm/uffd-common.h2
-rw-r--r--tools/testing/selftests/mm/uffd-unit-tests.c13
-rw-r--r--tools/testing/selftests/mm/virtual_address_range.c78
-rw-r--r--tools/testing/selftests/mm/vm_util.h2
-rw-r--r--tools/testing/selftests/net/bind_wildcard.c783
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect.sh9
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh4
-rw-r--r--tools/testing/selftests/net/reuseaddr_conflict.c2
-rw-r--r--tools/testing/selftests/net/tcp_ao/lib/proc.c2
-rw-r--r--tools/testing/selftests/net/tcp_ao/lib/setup.c12
-rw-r--r--tools/testing/selftests/net/tcp_ao/rst.c23
-rw-r--r--tools/testing/selftests/net/tcp_ao/setsockopt-closed.c2
-rwxr-xr-xtools/testing/selftests/net/test_vxlan_mdb.sh205
-rw-r--r--tools/testing/selftests/net/tls.c34
-rwxr-xr-xtools/testing/selftests/net/udpgro_fwd.sh10
-rw-r--r--tools/testing/selftests/net/udpgso.c2
-rw-r--r--tools/testing/selftests/powerpc/papr_vpd/papr_vpd.c2
-rw-r--r--tools/testing/selftests/riscv/hwprobe/cbo.c2
-rw-r--r--tools/testing/selftests/riscv/hwprobe/hwprobe.h10
-rw-r--r--tools/testing/selftests/seccomp/settings2
-rw-r--r--tools/testing/selftests/syscall_user_dispatch/sud_test.c14
-rw-r--r--tools/testing/selftests/timers/posix_timers.c105
-rw-r--r--tools/testing/selftests/timers/valid-adjtimex.c73
-rwxr-xr-xtools/testing/selftests/turbostat/defcolumns.py60
-rw-r--r--tools/testing/selftests/x86/test_shadow_stack.c67
-rw-r--r--tools/writeback/wb_monitor.py172
-rw-r--r--virt/kvm/kvm_main.c7
-rw-r--r--virt/kvm/kvm_mm.h6
-rw-r--r--virt/kvm/pfncache.c50
2472 files changed, 52738 insertions, 23100 deletions
diff --git a/.mailmap b/.mailmap
index 2216b5d5c84e44..9f41b3906b6637 100644
--- a/.mailmap
+++ b/.mailmap
@@ -20,6 +20,7 @@ Adam Oldham <oldhamca@gmail.com>
Adam Radford <aradford@gmail.com>
Adriana Reus <adi.reus@gmail.com> <adriana.reus@intel.com>
Adrian Bunk <bunk@stusta.de>
+Ajay Kaher <ajay.kaher@broadcom.com> <akaher@vmware.com>
Akhil P Oommen <quic_akhilpo@quicinc.com> <akhilpo@codeaurora.org>
Alan Cox <alan@lxorguk.ukuu.org.uk>
Alan Cox <root@hraefn.swansea.linux.org.uk>
@@ -36,6 +37,17 @@ Alexei Avshalom Lazar <quic_ailizaro@quicinc.com> <ailizaro@codeaurora.org>
Alexei Starovoitov <ast@kernel.org> <alexei.starovoitov@gmail.com>
Alexei Starovoitov <ast@kernel.org> <ast@fb.com>
Alexei Starovoitov <ast@kernel.org> <ast@plumgrid.com>
+Alexey Makhalov <alexey.amakhalov@broadcom.com> <amakhalov@vmware.com>
+Alex Elder <elder@kernel.org>
+Alex Elder <elder@kernel.org> <aelder@sgi.com>
+Alex Elder <elder@kernel.org> <alex.elder@linaro.org>
+Alex Elder <elder@kernel.org> <alex.elder@linary.org>
+Alex Elder <elder@kernel.org> <elder@dreamhost.com>
+Alex Elder <elder@kernel.org> <elder@dreawmhost.com>
+Alex Elder <elder@kernel.org> <elder@ieee.org>
+Alex Elder <elder@kernel.org> <elder@inktank.com>
+Alex Elder <elder@kernel.org> <elder@linaro.org>
+Alex Elder <elder@kernel.org> <elder@newdream.net>
Alex Hung <alexhung@gmail.com> <alex.hung@canonical.com>
Alex Shi <alexs@kernel.org> <alex.shi@intel.com>
Alex Shi <alexs@kernel.org> <alex.shi@linaro.org>
@@ -96,6 +108,8 @@ Ben Widawsky <bwidawsk@kernel.org> <ben@bwidawsk.net>
Ben Widawsky <bwidawsk@kernel.org> <ben.widawsky@intel.com>
Ben Widawsky <bwidawsk@kernel.org> <benjamin.widawsky@intel.com>
Benjamin Poirier <benjamin.poirier@gmail.com> <bpoirier@suse.de>
+Benjamin Tissoires <bentiss@kernel.org> <benjamin.tissoires@gmail.com>
+Benjamin Tissoires <bentiss@kernel.org> <benjamin.tissoires@redhat.com>
Bjorn Andersson <andersson@kernel.org> <bjorn@kryo.se>
Bjorn Andersson <andersson@kernel.org> <bjorn.andersson@linaro.org>
Bjorn Andersson <andersson@kernel.org> <bjorn.andersson@sonymobile.com>
@@ -110,6 +124,7 @@ Brendan Higgins <brendan.higgins@linux.dev> <brendanhiggins@google.com>
Brian Avery <b.avery@hp.com>
Brian King <brking@us.ibm.com>
Brian Silverman <bsilver16384@gmail.com> <brian.silverman@bluerivertech.com>
+Bryan Tan <bryan-bt.tan@broadcom.com> <bryantan@vmware.com>
Cai Huoqing <cai.huoqing@linux.dev> <caihuoqing@baidu.com>
Can Guo <quic_cang@quicinc.com> <cang@codeaurora.org>
Carl Huang <quic_cjhuang@quicinc.com> <cjhuang@codeaurora.org>
@@ -340,7 +355,8 @@ Lee Jones <lee@kernel.org> <joneslee@google.com>
Lee Jones <lee@kernel.org> <lee.jones@canonical.com>
Lee Jones <lee@kernel.org> <lee.jones@linaro.org>
Lee Jones <lee@kernel.org> <lee@ubuntu.com>
-Leonard Crestez <leonard.crestez@nxp.com> Leonard Crestez <cdleonard@gmail.com>
+Leonard Crestez <cdleonard@gmail.com> <leonard.crestez@nxp.com>
+Leonard Crestez <cdleonard@gmail.com> <leonard.crestez@intel.com>
Leonardo Bras <leobras.c@gmail.com> <leonardo@linux.ibm.com>
Leonard Göhrs <l.goehrs@pengutronix.de>
Leonid I Ananiev <leonid.i.ananiev@intel.com>
@@ -442,7 +458,8 @@ Mythri P K <mythripk@ti.com>
Nadav Amit <nadav.amit@gmail.com> <namit@vmware.com>
Nadav Amit <nadav.amit@gmail.com> <namit@cs.technion.ac.il>
Nadia Yvette Chambers <nyc@holomorphy.com> William Lee Irwin III <wli@holomorphy.com>
-Naoya Horiguchi <naoya.horiguchi@nec.com> <n-horiguchi@ah.jp.nec.com>
+Naoya Horiguchi <nao.horiguchi@gmail.com> <n-horiguchi@ah.jp.nec.com>
+Naoya Horiguchi <nao.horiguchi@gmail.com> <naoya.horiguchi@nec.com>
Nathan Chancellor <nathan@kernel.org> <natechancellor@gmail.com>
Neeraj Upadhyay <quic_neeraju@quicinc.com> <neeraju@codeaurora.org>
Neil Armstrong <neil.armstrong@linaro.org> <narmstrong@baylibre.com>
@@ -495,9 +512,11 @@ Praveen BP <praveenbp@ti.com>
Pradeep Kumar Chitrapu <quic_pradeepc@quicinc.com> <pradeepc@codeaurora.org>
Prasad Sodagudi <quic_psodagud@quicinc.com> <psodagud@codeaurora.org>
Punit Agrawal <punitagrawal@gmail.com> <punit.agrawal@arm.com>
+Puranjay Mohan <puranjay@kernel.org> <puranjay12@gmail.com>
Qais Yousef <qyousef@layalina.io> <qais.yousef@imgtec.com>
Qais Yousef <qyousef@layalina.io> <qais.yousef@arm.com>
-Quentin Monnet <quentin@isovalent.com> <quentin.monnet@netronome.com>
+Quentin Monnet <qmo@kernel.org> <quentin.monnet@netronome.com>
+Quentin Monnet <qmo@kernel.org> <quentin@isovalent.com>
Quentin Perret <qperret@qperret.net> <quentin.perret@arm.com>
Rafael J. Wysocki <rjw@rjwysocki.net> <rjw@sisk.pl>
Rajeev Nandan <quic_rajeevny@quicinc.com> <rajeevny@codeaurora.org>
@@ -519,6 +538,7 @@ Rémi Denis-Courmont <rdenis@simphalempin.com>
Ricardo Ribalda <ribalda@kernel.org> <ricardo@ribalda.com>
Ricardo Ribalda <ribalda@kernel.org> Ricardo Ribalda Delgado <ribalda@kernel.org>
Ricardo Ribalda <ribalda@kernel.org> <ricardo.ribalda@gmail.com>
+Richard Genoud <richard.genoud@bootlin.com> <richard.genoud@gmail.com>
Richard Leitner <richard.leitner@linux.dev> <dev@g0hl1n.net>
Richard Leitner <richard.leitner@linux.dev> <me@g0hl1n.net>
Richard Leitner <richard.leitner@linux.dev> <richard.leitner@skidata.com>
@@ -527,6 +547,7 @@ Rocky Liao <quic_rjliao@quicinc.com> <rjliao@codeaurora.org>
Roman Gushchin <roman.gushchin@linux.dev> <guro@fb.com>
Roman Gushchin <roman.gushchin@linux.dev> <guroan@gmail.com>
Roman Gushchin <roman.gushchin@linux.dev> <klamm@yandex-team.ru>
+Ronak Doshi <ronak.doshi@broadcom.com> <doshir@vmware.com>
Muchun Song <muchun.song@linux.dev> <songmuchun@bytedance.com>
Muchun Song <muchun.song@linux.dev> <smuchun@gmail.com>
Ross Zwisler <zwisler@kernel.org> <ross.zwisler@linux.intel.com>
@@ -649,6 +670,7 @@ Viresh Kumar <vireshk@kernel.org> <viresh.kumar@st.com>
Viresh Kumar <vireshk@kernel.org> <viresh.linux@gmail.com>
Viresh Kumar <viresh.kumar@linaro.org> <viresh.kumar@linaro.org>
Viresh Kumar <viresh.kumar@linaro.org> <viresh.kumar@linaro.com>
+Vishnu Dasa <vishnu.dasa@broadcom.com> <vdasa@vmware.com>
Vivek Aknurwar <quic_viveka@quicinc.com> <viveka@codeaurora.org>
Vivien Didelot <vivien.didelot@gmail.com> <vivien.didelot@savoirfairelinux.com>
Vlad Dogaru <ddvlad@gmail.com> <vlad.dogaru@intel.com>
diff --git a/CREDITS b/CREDITS
index c55c5a0ee4ff65..0107047f807bfc 100644
--- a/CREDITS
+++ b/CREDITS
@@ -3146,6 +3146,10 @@ S: Triftstra=DFe 55
S: 13353 Berlin
S: Germany
+N: Gustavo Pimental
+E: gustavo.pimentel@synopsys.com
+D: PCI driver for Synopsys DesignWare
+
N: Emanuel Pirker
E: epirker@edu.uni-klu.ac.at
D: AIC5800 IEEE 1394, RAW I/O on 1394
diff --git a/Documentation/ABI/testing/sysfs-kernel-mm-damon b/Documentation/ABI/testing/sysfs-kernel-mm-damon
index dad4d5ffd78656..cef6e1d20b1855 100644
--- a/Documentation/ABI/testing/sysfs-kernel-mm-damon
+++ b/Documentation/ABI/testing/sysfs-kernel-mm-damon
@@ -314,9 +314,9 @@ Date: Dec 2022
Contact: SeongJae Park <sj@kernel.org>
Description: Writing to and reading from this file sets and gets the type of
the memory of the interest. 'anon' for anonymous pages,
- 'memcg' for specific memory cgroup, 'addr' for address range
- (an open-ended interval), or 'target' for DAMON monitoring
- target can be written and read.
+ 'memcg' for specific memory cgroup, 'young' for young pages,
+ 'addr' for address range (an open-ended interval), or 'target'
+ for DAMON monitoring target can be written and read.
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/filters/<F>/memcg_path
Date: Dec 2022
diff --git a/Documentation/ABI/testing/sysfs-kernel-mm-transparent-hugepage b/Documentation/ABI/testing/sysfs-kernel-mm-transparent-hugepage
new file mode 100644
index 00000000000000..7bfbb9cc2c1130
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-kernel-mm-transparent-hugepage
@@ -0,0 +1,18 @@
+What: /sys/kernel/mm/transparent_hugepage/
+Date: April 2024
+Contact: Linux memory management mailing list <linux-mm@kvack.org>
+Description:
+ /sys/kernel/mm/transparent_hugepage/ contains a number of files and
+ subdirectories,
+
+ - defrag
+ - enabled
+ - hpage_pmd_size
+ - khugepaged
+ - shmem_enabled
+ - use_zero_page
+ - subdirectories of the form hugepages-<size>kB, where <size>
+ is the page size of the hugepages supported by the kernel/CPU
+ combination.
+
+ See Documentation/admin-guide/mm/transhuge.rst for details.
diff --git a/Documentation/admin-guide/blockdev/zram.rst b/Documentation/admin-guide/blockdev/zram.rst
index ee2b0030d4168a..091e8bb388875d 100644
--- a/Documentation/admin-guide/blockdev/zram.rst
+++ b/Documentation/admin-guide/blockdev/zram.rst
@@ -466,6 +466,11 @@ of equal or greater size:::
#recompress idle pages larger than 2000 bytes
echo "type=idle threshold=2000" > /sys/block/zramX/recompress
+It is also possible to limit the number of pages zram re-compression will
+attempt to recompress:::
+
+ echo "type=huge_idle max_pages=42" > /sys/block/zramX/recompress
+
Recompression of idle pages requires memory tracking.
During re-compression for every page, that matches re-compression criteria,
diff --git a/Documentation/admin-guide/cgroup-v1/memory.rst b/Documentation/admin-guide/cgroup-v1/memory.rst
index ca7d9402f6be16..9cde26d3384355 100644
--- a/Documentation/admin-guide/cgroup-v1/memory.rst
+++ b/Documentation/admin-guide/cgroup-v1/memory.rst
@@ -300,14 +300,14 @@ When oom event notifier is registered, event will be delivered.
Lock order is as follows::
- Page lock (PG_locked bit of page->flags)
+ folio_lock
mm->page_table_lock or split pte_lock
folio_memcg_lock (memcg->move_lock)
mapping->i_pages lock
lruvec->lru_lock.
Per-node-per-memcgroup LRU (cgroup's private LRU) is guarded by
-lruvec->lru_lock; PG_lru bit of page->flags is cleared before
+lruvec->lru_lock; the folio LRU flag is cleared before
isolating a page from its LRU under lruvec->lru_lock.
.. _cgroup-v1-memory-kernel-extension:
@@ -802,8 +802,8 @@ a page or a swap can be moved only when it is charged to the task's current
| | anonymous pages, file pages (and swaps) in the range mmapped by the task |
| | will be moved even if the task hasn't done page fault, i.e. they might |
| | not be the task's "RSS", but other task's "RSS" that maps the same file. |
-| | And mapcount of the page is ignored (the page can be moved even if |
-| | page_mapcount(page) > 1). You must enable Swap Extension (see 2.4) to |
+| | The mapcount of the page is ignored (the page can be moved independent |
+| | of the mapcount). You must enable Swap Extension (see 2.4) to |
| | enable move of swap charges. |
+---+--------------------------------------------------------------------------+
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index 17e6e956515640..0270517ade47cf 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -1296,17 +1296,10 @@ PAGE_SIZE multiple when read back.
This is a simple interface to trigger memory reclaim in the
target cgroup.
- This file accepts a single key, the number of bytes to reclaim.
- No nested keys are currently supported.
-
Example::
echo "1G" > memory.reclaim
- The interface can be later extended with nested keys to
- configure the reclaim behavior. For example, specify the
- type of memory to reclaim from (anon, file, ..).
-
Please note that the kernel can over or under reclaim from
the target cgroup. If less bytes are reclaimed than the
specified amount, -EAGAIN is returned.
@@ -1318,6 +1311,17 @@ PAGE_SIZE multiple when read back.
This means that the networking layer will not adapt based on
reclaim induced by memory.reclaim.
+The following nested keys are defined.
+
+ ========== ================================
+ swappiness Swappiness value to reclaim with
+ ========== ================================
+
+ Specifying a swappiness value instructs the kernel to perform
+ the reclaim with that swappiness value. Note that this has the
+ same semantics as vm.swappiness applied to memcg reclaim with
+ all the existing limitations and potential future extensions.
+
memory.peak
A read-only single value file which exists on non-root
cgroups.
diff --git a/Documentation/admin-guide/hw-vuln/spectre.rst b/Documentation/admin-guide/hw-vuln/spectre.rst
index cce768afec6bed..25a04cda4c2c05 100644
--- a/Documentation/admin-guide/hw-vuln/spectre.rst
+++ b/Documentation/admin-guide/hw-vuln/spectre.rst
@@ -138,11 +138,10 @@ associated with the source address of the indirect branch. Specifically,
the BHB might be shared across privilege levels even in the presence of
Enhanced IBRS.
-Currently the only known real-world BHB attack vector is via
-unprivileged eBPF. Therefore, it's highly recommended to not enable
-unprivileged eBPF, especially when eIBRS is used (without retpolines).
-For a full mitigation against BHB attacks, it's recommended to use
-retpolines (or eIBRS combined with retpolines).
+Previously the only known real-world BHB attack vector was via unprivileged
+eBPF. Further research has found attacks that don't require unprivileged eBPF.
+For a full mitigation against BHB attacks it is recommended to set BHI_DIS_S or
+use the BHB clearing sequence.
Attack scenarios
----------------
@@ -430,6 +429,23 @@ The possible values in this file are:
'PBRSB-eIBRS: Not affected' CPU is not affected by PBRSB
=========================== =======================================================
+ - Branch History Injection (BHI) protection status:
+
+.. list-table::
+
+ * - BHI: Not affected
+ - System is not affected
+ * - BHI: Retpoline
+ - System is protected by retpoline
+ * - BHI: BHI_DIS_S
+ - System is protected by BHI_DIS_S
+ * - BHI: SW loop, KVM SW loop
+ - System is protected by software clearing sequence
+ * - BHI: Vulnerable
+ - System is vulnerable to BHI
+ * - BHI: Vulnerable, KVM: SW loop
+ - System is vulnerable; KVM is protected by software clearing sequence
+
Full mitigation might require a microcode update from the CPU
vendor. When the necessary microcode is not available, the kernel will
report vulnerability.
@@ -484,7 +500,11 @@ Spectre variant 2
Systems which support enhanced IBRS (eIBRS) enable IBRS protection once at
boot, by setting the IBRS bit, and they're automatically protected against
- Spectre v2 variant attacks.
+ some Spectre v2 variant attacks. The BHB can still influence the choice of
+ indirect branch predictor entry, and although branch predictor entries are
+ isolated between modes when eIBRS is enabled, the BHB itself is not isolated
+ between modes. Systems which support BHI_DIS_S will set it to protect against
+ BHI attacks.
On Intel's enhanced IBRS systems, this includes cross-thread branch target
injections on SMT systems (STIBP). In other words, Intel eIBRS enables
@@ -638,6 +658,18 @@ kernel command line.
spectre_v2=off. Spectre variant 1 mitigations
cannot be disabled.
+ spectre_bhi=
+
+ [X86] Control mitigation of Branch History Injection
+ (BHI) vulnerability. This setting affects the deployment
+ of the HW BHI control and the SW BHB clearing sequence.
+
+ on
+ (default) Enable the HW or SW mitigation as
+ needed.
+ off
+ Disable the mitigation.
+
For spectre_v2_user see Documentation/admin-guide/kernel-parameters.txt
Mitigation selection guide
diff --git a/Documentation/admin-guide/kdump/kdump.rst b/Documentation/admin-guide/kdump/kdump.rst
index 0302a93b1d40b7..5376890adbeb07 100644
--- a/Documentation/admin-guide/kdump/kdump.rst
+++ b/Documentation/admin-guide/kdump/kdump.rst
@@ -136,10 +136,6 @@ System kernel config options
CONFIG_KEXEC_CORE=y
- Subsequently, CRASH_CORE is selected by KEXEC_CORE::
-
- CONFIG_CRASH_CORE=y
-
2) Enable "sysfs file system support" in "Filesystem" -> "Pseudo
filesystems." This is usually enabled by default::
@@ -168,6 +164,10 @@ Dump-capture kernel config options (Arch Independent)
CONFIG_CRASH_DUMP=y
+ And this will select VMCORE_INFO and CRASH_RESERVE::
+ CONFIG_VMCORE_INFO=y
+ CONFIG_CRASH_RESERVE=y
+
2) Enable "/proc/vmcore support" under "Filesystems" -> "Pseudo filesystems"::
CONFIG_PROC_VMCORE=y
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index bb884c14b2f679..827b51450cc28d 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2148,6 +2148,12 @@
Format: 0 | 1
Default set by CONFIG_INIT_ON_FREE_DEFAULT_ON.
+ init_mlocked_on_free= [MM] Fill freed userspace memory with zeroes if
+ it was mlock'ed and not explicitly munlock'ed
+ afterwards.
+ Format: 0 | 1
+ Default set by CONFIG_INIT_MLOCKED_ON_FREE_DEFAULT_ON
+
init_pkru= [X86] Specify the default memory protection keys rights
register contents for all processes. 0x55555554 by
default (disallow access to all but pkey 0). Can
@@ -3423,6 +3429,9 @@
arch-independent options, each of which is an
aggregation of existing arch-specific options.
+ Note, "mitigations" is supported if and only if the
+ kernel was built with CPU_MITIGATIONS=y.
+
off
Disable all optional CPU mitigations. This
improves system performance, but it may also
@@ -3444,6 +3453,7 @@
retbleed=off [X86]
spec_rstack_overflow=off [X86]
spec_store_bypass_disable=off [X86,PPC]
+ spectre_bhi=off [X86]
spectre_v2_user=off [X86]
srbds=off [X86,INTEL]
ssbd=force-off [ARM64]
@@ -6063,6 +6073,15 @@
sonypi.*= [HW] Sony Programmable I/O Control Device driver
See Documentation/admin-guide/laptops/sonypi.rst
+ spectre_bhi= [X86] Control mitigation of Branch History Injection
+ (BHI) vulnerability. This setting affects the
+ deployment of the HW BHI control and the SW BHB
+ clearing sequence.
+
+ on - (default) Enable the HW or SW mitigation
+ as needed.
+ off - Disable the mitigation.
+
spectre_v2= [X86,EARLY] Control mitigation of Spectre variant 2
(indirect branch speculation) vulnerability.
The default operation protects the kernel from
@@ -6599,7 +6618,7 @@
To turn off having tracepoints sent to printk,
echo 0 > /proc/sys/kernel/tracepoint_printk
Note, echoing 1 into this file without the
- tracepoint_printk kernel cmdline option has no effect.
+ tp_printk kernel cmdline option has no effect.
The tp_printk_stop_on_boot (see below) can also be used
to stop the printing of events to console at
diff --git a/Documentation/admin-guide/mm/damon/usage.rst b/Documentation/admin-guide/mm/damon/usage.rst
index 6fce035fdbf5c7..69bc8fabf3781c 100644
--- a/Documentation/admin-guide/mm/damon/usage.rst
+++ b/Documentation/admin-guide/mm/damon/usage.rst
@@ -410,19 +410,19 @@ in the numeric order.
Each filter directory contains six files, namely ``type``, ``matcing``,
``memcg_path``, ``addr_start``, ``addr_end``, and ``target_idx``. To ``type``
-file, you can write one of four special keywords: ``anon`` for anonymous pages,
-``memcg`` for specific memory cgroup, ``addr`` for specific address range (an
-open-ended interval), or ``target`` for specific DAMON monitoring target
-filtering. In case of the memory cgroup filtering, you can specify the memory
-cgroup of the interest by writing the path of the memory cgroup from the
-cgroups mount point to ``memcg_path`` file. In case of the address range
-filtering, you can specify the start and end address of the range to
-``addr_start`` and ``addr_end`` files, respectively. For the DAMON monitoring
-target filtering, you can specify the index of the target between the list of
-the DAMON context's monitoring targets list to ``target_idx`` file. You can
-write ``Y`` or ``N`` to ``matching`` file to filter out pages that does or does
-not match to the type, respectively. Then, the scheme's action will not be
-applied to the pages that specified to be filtered out.
+file, you can write one of five special keywords: ``anon`` for anonymous pages,
+``memcg`` for specific memory cgroup, ``young`` for young pages, ``addr`` for
+specific address range (an open-ended interval), or ``target`` for specific
+DAMON monitoring target filtering. In case of the memory cgroup filtering, you
+can specify the memory cgroup of the interest by writing the path of the memory
+cgroup from the cgroups mount point to ``memcg_path`` file. In case of the
+address range filtering, you can specify the start and end address of the range
+to ``addr_start`` and ``addr_end`` files, respectively. For the DAMON
+monitoring target filtering, you can specify the index of the target between
+the list of the DAMON context's monitoring targets list to ``target_idx`` file.
+You can write ``Y`` or ``N`` to ``matching`` file to filter out pages that does
+or does not match to the type, respectively. Then, the scheme's action will
+not be applied to the pages that specified to be filtered out.
For example, below restricts a DAMOS action to be applied to only non-anonymous
pages of all memory cgroups except ``/having_care_already``.::
diff --git a/Documentation/admin-guide/mm/hugetlbpage.rst b/Documentation/admin-guide/mm/hugetlbpage.rst
index e4d4b4a8dc9736..f34a0d798d5b53 100644
--- a/Documentation/admin-guide/mm/hugetlbpage.rst
+++ b/Documentation/admin-guide/mm/hugetlbpage.rst
@@ -376,6 +376,13 @@ Note that the number of overcommit and reserve pages remain global quantities,
as we don't know until fault time, when the faulting task's mempolicy is
applied, from which node the huge page allocation will be attempted.
+The hugetlb may be migrated between the per-node hugepages pool in the following
+scenarios: memory offline, memory failure, longterm pinning, syscalls(mbind,
+migrate_pages and move_pages), alloc_contig_range() and alloc_contig_pages().
+Now only memory offline, memory failure and syscalls allow fallbacking to allocate
+a new hugetlb on a different node if the current node is unable to allocate during
+hugetlb migration, that means these 3 cases can break the per-node hugepages pool.
+
.. _using_huge_pages:
Using Huge Pages
diff --git a/Documentation/admin-guide/mm/transhuge.rst b/Documentation/admin-guide/mm/transhuge.rst
index 04eb45a2f94069..076443cc10a6c7 100644
--- a/Documentation/admin-guide/mm/transhuge.rst
+++ b/Documentation/admin-guide/mm/transhuge.rst
@@ -278,7 +278,8 @@ collapsed, resulting fewer pages being collapsed into
THPs, and lower memory access performance.
``max_ptes_shared`` specifies how many pages can be shared across multiple
-processes. Exceeding the number would block the collapse::
+processes. khugepaged might treat pages of THPs as shared if any page of
+that THP is shared. Exceeding the number would block the collapse::
/sys/kernel/mm/transparent_hugepage/khugepaged/max_ptes_shared
@@ -369,7 +370,7 @@ monitor how successfully the system is providing huge pages for use.
thp_fault_alloc
is incremented every time a huge page is successfully
- allocated to handle a page fault.
+ allocated and charged to handle a page fault.
thp_collapse_alloc
is incremented by khugepaged when it has found
@@ -377,7 +378,7 @@ thp_collapse_alloc
successfully allocated a new huge page to store the data.
thp_fault_fallback
- is incremented if a page fault fails to allocate
+ is incremented if a page fault fails to allocate or charge
a huge page and instead falls back to using small pages.
thp_fault_fallback_charge
@@ -447,6 +448,34 @@ thp_swpout_fallback
Usually because failed to allocate some continuous swap space
for the huge page.
+In /sys/kernel/mm/transparent_hugepage/hugepages-<size>kB/stats, There are
+also individual counters for each huge page size, which can be utilized to
+monitor the system's effectiveness in providing huge pages for usage. Each
+counter has its own corresponding file.
+
+anon_fault_alloc
+ is incremented every time a huge page is successfully
+ allocated and charged to handle a page fault.
+
+anon_fault_fallback
+ is incremented if a page fault fails to allocate or charge
+ a huge page and instead falls back to using huge pages with
+ lower orders or small pages.
+
+anon_fault_fallback_charge
+ is incremented if a page fault fails to charge a huge page and
+ instead falls back to using huge pages with lower orders or
+ small pages even though the allocation was successful.
+
+anon_swpout
+ is incremented every time a huge page is swapped out in one
+ piece without splitting.
+
+anon_swpout_fallback
+ is incremented if a huge page has to be split before swapout.
+ Usually because failed to allocate some continuous swap space
+ for the huge page.
+
As the system ages, allocating huge pages may be expensive as the
system uses memory compaction to copy data around memory to free a
huge page for use. There are some counters in ``/proc/vmstat`` to help
diff --git a/Documentation/admin-guide/mm/zswap.rst b/Documentation/admin-guide/mm/zswap.rst
index b42132969e3157..3598dcd7dbe7e1 100644
--- a/Documentation/admin-guide/mm/zswap.rst
+++ b/Documentation/admin-guide/mm/zswap.rst
@@ -111,35 +111,6 @@ checked if it is a same-value filled page before compressing it. If true, the
compressed length of the page is set to zero and the pattern or same-filled
value is stored.
-Same-value filled pages identification feature is enabled by default and can be
-disabled at boot time by setting the ``same_filled_pages_enabled`` attribute
-to 0, e.g. ``zswap.same_filled_pages_enabled=0``. It can also be enabled and
-disabled at runtime using the sysfs ``same_filled_pages_enabled``
-attribute, e.g.::
-
- echo 1 > /sys/module/zswap/parameters/same_filled_pages_enabled
-
-When zswap same-filled page identification is disabled at runtime, it will stop
-checking for the same-value filled pages during store operation.
-In other words, every page will be then considered non-same-value filled.
-However, the existing pages which are marked as same-value filled pages remain
-stored unchanged in zswap until they are either loaded or invalidated.
-
-In some circumstances it might be advantageous to make use of just the zswap
-ability to efficiently store same-filled pages without enabling the whole
-compressed page storage.
-In this case the handling of non-same-value pages by zswap (enabled by default)
-can be disabled by setting the ``non_same_filled_pages_enabled`` attribute
-to 0, e.g. ``zswap.non_same_filled_pages_enabled=0``.
-It can also be enabled and disabled at runtime using the sysfs
-``non_same_filled_pages_enabled`` attribute, e.g.::
-
- echo 1 > /sys/module/zswap/parameters/non_same_filled_pages_enabled
-
-Disabling both ``zswap.same_filled_pages_enabled`` and
-``zswap.non_same_filled_pages_enabled`` effectively disables accepting any new
-pages by zswap.
-
To prevent zswap from shrinking pool when zswap is full and there's a high
pressure on swap (this will result in flipping pages in and out zswap pool
without any real benefit but with a performance drop for the system), a
@@ -155,7 +126,7 @@ Setting this parameter to 100 will disable the hysteresis.
Some users cannot tolerate the swapping that comes with zswap store failures
and zswap writebacks. Swapping can be disabled entirely (without disabling
-zswap itself) on a cgroup-basis as follows:
+zswap itself) on a cgroup-basis as follows::
echo 0 > /sys/fs/cgroup/<cgroup-name>/memory.zswap.writeback
@@ -166,7 +137,7 @@ writeback (because the same pages might be rejected again and again).
When there is a sizable amount of cold memory residing in the zswap pool, it
can be advantageous to proactively write these cold pages to swap and reclaim
the memory for other use cases. By default, the zswap shrinker is disabled.
-User can enable it as follows:
+User can enable it as follows::
echo Y > /sys/module/zswap/parameters/shrinker_enabled
diff --git a/Documentation/admin-guide/perf/hisi-pmu.rst b/Documentation/admin-guide/perf/hisi-pmu.rst
index e0174d20809a18..5cc248d18c63a7 100644
--- a/Documentation/admin-guide/perf/hisi-pmu.rst
+++ b/Documentation/admin-guide/perf/hisi-pmu.rst
@@ -20,7 +20,6 @@ interrupt, and the PMU driver shall register perf PMU drivers like L3C,
HHA and DDRC etc. The available events and configuration options shall
be described in the sysfs, see:
-/sys/devices/hisi_sccl{X}_<l3c{Y}/hha{Y}/ddrc{Y}>/, or
/sys/bus/event_source/devices/hisi_sccl{X}_<l3c{Y}/hha{Y}/ddrc{Y}>.
The "perf list" command shall list the available events from sysfs.
diff --git a/Documentation/admin-guide/perf/hns3-pmu.rst b/Documentation/admin-guide/perf/hns3-pmu.rst
index 75a40846d47f5f..1195e570f2d6b5 100644
--- a/Documentation/admin-guide/perf/hns3-pmu.rst
+++ b/Documentation/admin-guide/perf/hns3-pmu.rst
@@ -16,7 +16,7 @@ HNS3 PMU driver
The HNS3 PMU driver registers a perf PMU with the name of its sicl id.::
- /sys/devices/hns3_pmu_sicl_<sicl_id>
+ /sys/bus/event_source/devices/hns3_pmu_sicl_<sicl_id>
PMU driver provides description of available events, filter modes, format,
identifier and cpumask in sysfs.
@@ -40,9 +40,9 @@ device.
Example usage of checking event code and subevent code::
- $# cat /sys/devices/hns3_pmu_sicl_0/events/dly_tx_normal_to_mac_time
+ $# cat /sys/bus/event_source/devices/hns3_pmu_sicl_0/events/dly_tx_normal_to_mac_time
config=0x00204
- $# cat /sys/devices/hns3_pmu_sicl_0/events/dly_tx_normal_to_mac_packet_num
+ $# cat /sys/bus/event_source/devices/hns3_pmu_sicl_0/events/dly_tx_normal_to_mac_packet_num
config=0x10204
Each performance statistic has a pair of events to get two values to
@@ -60,7 +60,7 @@ computation to calculate real performance data is:::
Example usage of checking supported filter mode::
- $# cat /sys/devices/hns3_pmu_sicl_0/filtermode/bw_ssu_rpu_byte_num
+ $# cat /sys/bus/event_source/devices/hns3_pmu_sicl_0/filtermode/bw_ssu_rpu_byte_num
filter mode supported: global/port/port-tc/func/func-queue/
Example usage of perf::
diff --git a/Documentation/admin-guide/perf/qcom_l2_pmu.rst b/Documentation/admin-guide/perf/qcom_l2_pmu.rst
index c130178a4a5559..c37c6be9b8d889 100644
--- a/Documentation/admin-guide/perf/qcom_l2_pmu.rst
+++ b/Documentation/admin-guide/perf/qcom_l2_pmu.rst
@@ -10,7 +10,7 @@ There is one logical L2 PMU exposed, which aggregates the results from
the physical PMUs.
The driver provides a description of its available events and configuration
-options in sysfs, see /sys/devices/l2cache_0.
+options in sysfs, see /sys/bus/event_source/devices/l2cache_0.
The "format" directory describes the format of the events.
diff --git a/Documentation/admin-guide/perf/qcom_l3_pmu.rst b/Documentation/admin-guide/perf/qcom_l3_pmu.rst
index a3d014a46bfdb3..a66556b7e985c1 100644
--- a/Documentation/admin-guide/perf/qcom_l3_pmu.rst
+++ b/Documentation/admin-guide/perf/qcom_l3_pmu.rst
@@ -9,7 +9,7 @@ PMU with device name l3cache_<socket>_<instance>. User space is responsible
for aggregating across slices.
The driver provides a description of its available events and configuration
-options in sysfs, see /sys/devices/l3cache*. Given that these are uncore PMUs
+options in sysfs, see /sys/bus/event_source/devices/l3cache*. Given that these are uncore PMUs
the driver also exposes a "cpumask" sysfs attribute which contains a mask
consisting of one CPU per socket which will be used to handle all the PMU
events on that socket.
diff --git a/Documentation/admin-guide/perf/thunderx2-pmu.rst b/Documentation/admin-guide/perf/thunderx2-pmu.rst
index 01f158238ae1ee..9255f7bf945202 100644
--- a/Documentation/admin-guide/perf/thunderx2-pmu.rst
+++ b/Documentation/admin-guide/perf/thunderx2-pmu.rst
@@ -22,7 +22,7 @@ The thunderx2_pmu driver registers per-socket perf PMUs for the DMC and
L3C devices. Each PMU can be used to count up to 4 (DMC/L3C) or up to 8
(CCPI2) events simultaneously. The PMUs provide a description of their
available events and configuration options under sysfs, see
-/sys/devices/uncore_<l3c_S/dmc_S/ccpi2_S/>; S is the socket id.
+/sys/bus/event_source/devices/uncore_<l3c_S/dmc_S/ccpi2_S/>; S is the socket id.
The driver does not support sampling, therefore "perf record" will not
work. Per-task perf sessions are also not supported.
diff --git a/Documentation/admin-guide/perf/xgene-pmu.rst b/Documentation/admin-guide/perf/xgene-pmu.rst
index 644f8ed89152d6..98ccb8e777c4d1 100644
--- a/Documentation/admin-guide/perf/xgene-pmu.rst
+++ b/Documentation/admin-guide/perf/xgene-pmu.rst
@@ -13,7 +13,7 @@ PMU (perf) driver
The xgene-pmu driver registers several perf PMU drivers. Each of the perf
driver provides description of its available events and configuration options
-in sysfs, see /sys/devices/<l3cX/iobX/mcbX/mcX>/.
+in sysfs, see /sys/bus/event_source/devices/<l3cX/iobX/mcbX/mcX>/.
The "format" directory describes format of the config (event ID),
config1 (agent ID) fields of the perf_event_attr structure. The "events"
diff --git a/Documentation/admin-guide/sysctl/vm.rst b/Documentation/admin-guide/sysctl/vm.rst
index c59889de122b9f..e86c968a7a0ece 100644
--- a/Documentation/admin-guide/sysctl/vm.rst
+++ b/Documentation/admin-guide/sysctl/vm.rst
@@ -43,6 +43,7 @@ Currently, these files are in /proc/sys/vm:
- legacy_va_layout
- lowmem_reserve_ratio
- max_map_count
+- mem_profiling (only if CONFIG_MEM_ALLOC_PROFILING=y)
- memory_failure_early_kill
- memory_failure_recovery
- min_free_kbytes
@@ -425,6 +426,21 @@ e.g., up to one or two maps per allocation.
The default value is 65530.
+mem_profiling
+==============
+
+Enable memory profiling (when CONFIG_MEM_ALLOC_PROFILING=y)
+
+1: Enable memory profiling.
+
+0: Disable memory profiling.
+
+Enabling memory profiling introduces a small performance overhead for all
+memory allocations.
+
+The default value depends on CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT.
+
+
memory_failure_early_kill:
==========================
diff --git a/Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst b/Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst
index d3504826f40154..c389d4fd7599df 100644
--- a/Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst
+++ b/Documentation/admin-guide/verify-bugs-and-bisect-regressions.rst
@@ -29,7 +29,7 @@ The essence of the process (aka 'TL;DR')
========================================
*[If you are new to building or bisecting Linux, ignore this section and head
-over to the* ":ref:`step-by-step guide<introguide_bissbs>`" *below. It utilizes
+over to the* ':ref:`step-by-step guide <introguide_bissbs>`' *below. It utilizes
the same commands as this section while describing them in brief fashion. The
steps are nevertheless easy to follow and together with accompanying entries
in a reference section mention many alternatives, pitfalls, and additional
@@ -38,8 +38,8 @@ aspects, all of which might be essential in your present case.]*
**In case you want to check if a bug is present in code currently supported by
developers**, execute just the *preparations* and *segment 1*; while doing so,
consider the newest Linux kernel you regularly use to be the 'working' kernel.
-In the following example that's assumed to be 6.0.13, which is why the sources
-of 6.0 will be used to prepare the .config file.
+In the following example that's assumed to be 6.0, which is why its sources
+will be used to prepare the .config file.
**In case you face a regression**, follow the steps at least till the end of
*segment 2*. Then you can submit a preliminary report -- or continue with
@@ -61,7 +61,7 @@ will be considered the 'good' release and used to prepare the .config file.
cd ~/linux/
git remote add -t master stable \
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
- git checkout --detach v6.0
+ git switch --detach v6.0
# * Hint: if you used an existing clone, ensure no stale .config is around.
make olddefconfig
# * Ensure the former command picked the .config of the 'working' kernel.
@@ -87,7 +87,7 @@ will be considered the 'good' release and used to prepare the .config file.
a) Checking out latest mainline code::
cd ~/linux/
- git checkout --force --detach mainline/master
+ git switch --discard-changes --detach mainline/master
b) Build, install, and boot a kernel::
@@ -125,7 +125,7 @@ will be considered the 'good' release and used to prepare the .config file.
a) Start by checking out the sources of the 'good' version::
cd ~/linux/
- git checkout --force --detach v6.0
+ git switch --discard-changes --detach v6.0
b) Build, install, and boot a kernel as described earlier in *segment 1,
section b* -- just feel free to skip the 'du' commands, as you have a rough
@@ -136,8 +136,7 @@ will be considered the 'good' release and used to prepare the .config file.
* **Segment 3**: perform and validate the bisection.
- a) In case your 'broken' version is a stable/longterm release, add the Git
- branch holding it::
+ a) Retrieve the sources for your 'bad' version::
git remote set-branches --add stable linux-6.1.y
git fetch stable
@@ -157,11 +156,12 @@ will be considered the 'good' release and used to prepare the .config file.
works with the newly built kernel. If it does, tell Git by executing
``git bisect good``; if it does not, run ``git bisect bad`` instead.
- All three commands will make Git checkout another commit; then re-execute
+ All three commands will make Git check out another commit; then re-execute
this step (e.g. build, install, boot, and test a kernel to then tell Git
the outcome). Do so again and again until Git shows which commit broke
things. If you run short of disk space during this process, check the
- "Supplementary tasks" section below.
+ section 'Complementary tasks: cleanup during and after the process'
+ below.
d) Once your finished the bisection, put a few things away::
@@ -172,14 +172,17 @@ will be considered the 'good' release and used to prepare the .config file.
e) Try to verify the bisection result::
- git checkout --force --detach mainline/master
+ git switch --discard-changes --detach mainline/master
git revert --no-edit cafec0cacaca0
+ cp ~/kernel-config-working .config
+ ./scripts/config --set-str CONFIG_LOCALVERSION '-local-cafec0cacaca0-reverted'
This is optional, as some commits are impossible to revert. But if the
second command worked flawlessly, build, install, and boot one more kernel
- kernel, which should not show the regression.
+ kernel; just this time skip the first command copying the base .config file
+ over, as that already has been taken care off.
-* **Supplementary tasks**: cleanup during and after the process.
+* **Complementary tasks**: cleanup during and after the process.
a) To avoid running out of disk space during a bisection, you might need to
remove some kernels you built earlier. You most likely want to keep those
@@ -202,13 +205,25 @@ will be considered the 'good' release and used to prepare the .config file.
the kernels you built earlier and later you might want to keep around for
a week or two.
+* **Optional task**: test a debug patch or a proposed fix later::
+
+ git fetch mainline
+ git switch --discard-changes --detach mainline/master
+ git apply /tmp/foobars-proposed-fix-v1.patch
+ cp ~/kernel-config-working .config
+ ./scripts/config --set-str CONFIG_LOCALVERSION '-local-foobars-fix-v1'
+
+ Build, install, and boot a kernel as described in *segment 1, section b* --
+ but this time omit the first command copying the build configuration over,
+ as that has been taken care of already.
+
.. _introguide_bissbs:
Step-by-step guide on how to verify bugs and bisect regressions
===============================================================
This guide describes how to set up your own Linux kernels for investigating bugs
-or regressions you intent to report. How far you want to follow the instructions
+or regressions you intend to report. How far you want to follow the instructions
depends on your issue:
Execute all steps till the end of *segment 1* to **verify if your kernel problem
@@ -221,15 +236,17 @@ report; instead of the latter your could also head straight on and follow
*segment 3* to **perform a bisection** for a full-fledged regression report
developers are obliged to act upon.
- :ref:`Preparations: set up everything to build your own kernels.<introprep_bissbs>`
+ :ref:`Preparations: set up everything to build your own kernels <introprep_bissbs>`.
- :ref:`Segment 1: try to reproduce the problem with the latest codebase.<introlatestcheck_bissbs>`
+ :ref:`Segment 1: try to reproduce the problem with the latest codebase <introlatestcheck_bissbs>`.
- :ref:`Segment 2: check if the kernels you build work fine.<introworkingcheck_bissbs>`
+ :ref:`Segment 2: check if the kernels you build work fine <introworkingcheck_bissbs>`.
- :ref:`Segment 3: perform a bisection and validate the result.<introbisect_bissbs>`
+ :ref:`Segment 3: perform a bisection and validate the result <introbisect_bissbs>`.
- :ref:`Supplementary tasks: cleanup during and after following this guide.<introclosure_bissbs>`
+ :ref:`Complementary tasks: cleanup during and after following this guide <introclosure_bissbs>`.
+
+ :ref:`Optional tasks: test reverts, patches, or later versions <introoptional_bissbs>`.
The steps in each segment illustrate the important aspects of the process, while
a comprehensive reference section holds additional details for almost all of the
@@ -240,24 +257,35 @@ to get things rolling again.
For further details on how to report Linux kernel issues or regressions check
out Documentation/admin-guide/reporting-issues.rst, which works in conjunction
with this document. It among others explains why you need to verify bugs with
-the latest 'mainline' kernel, even if you face a problem with a kernel from a
-'stable/longterm' series; for users facing a regression it also explains that
-sending a preliminary report after finishing segment 2 might be wise, as the
-regression and its culprit might be known already. For further details on
-what actually qualifies as a regression check out
-Documentation/admin-guide/reporting-regressions.rst.
+the latest 'mainline' kernel (e.g. versions like 6.0, 6.1-rc1, or 6.1-rc6),
+even if you face a problem with a kernel from a 'stable/longterm' series
+(say 6.0.13).
+
+For users facing a regression that document also explains why sending a
+preliminary report after segment 2 might be wise, as the regression and its
+culprit might be known already. For further details on what actually qualifies
+as a regression check out Documentation/admin-guide/reporting-regressions.rst.
+
+If you run into any problems while following this guide or have ideas how to
+improve it, :ref:`please let the kernel developers know <submit_improvements>`.
.. _introprep_bissbs:
Preparations: set up everything to build your own kernels
---------------------------------------------------------
+The following steps lay the groundwork for all further tasks.
+
+Note: the instructions assume you are building and testing on the same
+machine; if you want to compile the kernel on another system, check
+:ref:`Build kernels on a different machine <buildhost_bis>` below.
+
.. _backup_bissbs:
* Create a fresh backup and put system repair and restore tools at hand, just
to be prepared for the unlikely case of something going sideways.
- [:ref:`details<backup_bisref>`]
+ [:ref:`details <backup_bisref>`]
.. _vanilla_bissbs:
@@ -265,7 +293,7 @@ Preparations: set up everything to build your own kernels
builds them automatically. That includes but is not limited to DKMS, openZFS,
VirtualBox, and Nvidia's graphics drivers (including the GPLed kernel module).
- [:ref:`details<vanilla_bisref>`]
+ [:ref:`details <vanilla_bisref>`]
.. _secureboot_bissbs:
@@ -276,48 +304,49 @@ Preparations: set up everything to build your own kernels
their restrictions through a process initiated by
``mokutil --disable-validation``.
- [:ref:`details<secureboot_bisref>`]
+ [:ref:`details <secureboot_bisref>`]
.. _rangecheck_bissbs:
* Determine the kernel versions considered 'good' and 'bad' throughout this
- guide.
+ guide:
- Do you follow this guide to verify if a bug is present in the code developers
- care for? Then consider the mainline release your 'working' kernel (the newest
- one you regularly use) is based on to be the 'good' version; if your 'working'
- kernel for example is 6.0.11, then your 'good' kernel is 6.0.
+ * Do you follow this guide to verify if a bug is present in the code the
+ primary developers care for? Then consider the version of the newest kernel
+ you regularly use currently as 'good' (e.g. 6.0, 6.0.13, or 6.1-rc2).
- In case you face a regression, it depends on the version range where the
- regression was introduced:
+ * Do you face a regression, e.g. something broke or works worse after
+ switching to a newer kernel version? In that case it depends on the version
+ range during which the problem appeared:
- * Something which used to work in Linux 6.0 broke when switching to Linux
- 6.1-rc1? Then henceforth regard 6.0 as the last known 'good' version
- and 6.1-rc1 as the first 'bad' one.
+ * Something regressed when updating from a stable/longterm release
+ (say 6.0.13) to a newer mainline series (like 6.1-rc7 or 6.1) or a
+ stable/longterm version based on one (say 6.1.5)? Then consider the
+ mainline release your working kernel is based on to be the 'good'
+ version (e.g. 6.0) and the first version to be broken as the 'bad' one
+ (e.g. 6.1-rc7, 6.1, or 6.1.5). Note, at this point it is merely assumed
+ that 6.0 is fine; this hypothesis will be checked in segment 2.
- * Some function stopped working when updating from 6.0.11 to 6.1.4? Then for
- the time being consider 6.0 as the last 'good' version and 6.1.4 as
- the 'bad' one. Note, at this point it is merely assumed that 6.0 is fine;
- this assumption will be checked in segment 2.
+ * Something regressed when switching from one mainline version (say 6.0) to
+ a later one (like 6.1-rc1) or a stable/longterm release based on it
+ (say 6.1.5)? Then regard the last working version (e.g. 6.0) as 'good' and
+ the first broken (e.g. 6.1-rc1 or 6.1.5) as 'bad'.
- * A feature you used in 6.0.11 does not work at all or worse in 6.1.13? In
- that case you want to bisect within a stable/longterm series: consider
- 6.0.11 as the last known 'good' version and 6.0.13 as the first 'bad'
- one. Note, in this case you still want to compile and test a mainline kernel
- as explained in segment 1: the outcome will determine if you need to report
- your issue to the regular developers or the stable team.
+ * Something regressed when updating within a stable/longterm series (say
+ from 6.0.13 to 6.0.15)? Then consider those versions as 'good' and 'bad'
+ (e.g. 6.0.13 and 6.0.15), as you need to bisect within that series.
*Note, do not confuse 'good' version with 'working' kernel; the latter term
throughout this guide will refer to the last kernel that has been working
fine.*
- [:ref:`details<rangecheck_bisref>`]
+ [:ref:`details <rangecheck_bisref>`]
.. _bootworking_bissbs:
* Boot into the 'working' kernel and briefly use the apparently broken feature.
- [:ref:`details<bootworking_bisref>`]
+ [:ref:`details <bootworking_bisref>`]
.. _diskspace_bissbs:
@@ -327,7 +356,7 @@ Preparations: set up everything to build your own kernels
debug symbols: both explain approaches reducing the amount of space, which
should allow you to master these tasks with about 4 Gigabytes free space.
- [:ref:`details<diskspace_bisref>`]
+ [:ref:`details <diskspace_bisref>`]
.. _buildrequires_bissbs:
@@ -337,7 +366,7 @@ Preparations: set up everything to build your own kernels
reference section shows how to quickly install those on various popular Linux
distributions.
- [:ref:`details<buildrequires_bisref>`]
+ [:ref:`details <buildrequires_bisref>`]
.. _sources_bissbs:
@@ -360,14 +389,23 @@ Preparations: set up everything to build your own kernels
git remote add -t master stable \
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
- [:ref:`details<sources_bisref>`]
+ [:ref:`details <sources_bisref>`]
+
+.. _stablesources_bissbs:
+
+* Is one of the versions you earlier established as 'good' or 'bad' a stable or
+ longterm release (say 6.1.5)? Then download the code for the series it belongs
+ to ('linux-6.1.y' in this example)::
+
+ git remote set-branches --add stable linux-6.1.y
+ git fetch stable
.. _oldconfig_bissbs:
* Start preparing a kernel build configuration (the '.config' file).
Before doing so, ensure you are still running the 'working' kernel an earlier
- step told you to boot; if you are unsure, check the current kernel release
+ step told you to boot; if you are unsure, check the current kernelrelease
identifier using ``uname -r``.
Afterwards check out the source code for the version earlier established as
@@ -375,7 +413,7 @@ Preparations: set up everything to build your own kernels
the version number in this and all later Git commands needs to be prefixed
with a 'v'::
- git checkout --detach v6.0
+ git switch --discard-changes --detach v6.0
Now create a build configuration file::
@@ -398,7 +436,7 @@ Preparations: set up everything to build your own kernels
'make olddefconfig' again and check if it now picked up the right config file
as base.
- [:ref:`details<oldconfig_bisref>`]
+ [:ref:`details <oldconfig_bisref>`]
.. _localmodconfig_bissbs:
@@ -432,7 +470,7 @@ Preparations: set up everything to build your own kernels
spending much effort on, as long as it boots and allows to properly test the
feature that causes trouble.
- [:ref:`details<localmodconfig_bisref>`]
+ [:ref:`details <localmodconfig_bisref>`]
.. _tagging_bissbs:
@@ -442,7 +480,7 @@ Preparations: set up everything to build your own kernels
./scripts/config --set-str CONFIG_LOCALVERSION '-local'
./scripts/config -e CONFIG_LOCALVERSION_AUTO
- [:ref:`details<tagging_bisref>`]
+ [:ref:`details <tagging_bisref>`]
.. _debugsymbols_bissbs:
@@ -461,7 +499,7 @@ Preparations: set up everything to build your own kernels
./scripts/config -d DEBUG_INFO -d DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT \
-d DEBUG_INFO_DWARF4 -d DEBUG_INFO_DWARF5 -e CONFIG_DEBUG_INFO_NONE
- [:ref:`details<debugsymbols_bisref>`]
+ [:ref:`details <debugsymbols_bisref>`]
.. _configmods_bissbs:
@@ -471,14 +509,14 @@ Preparations: set up everything to build your own kernels
* Are you running Debian? Then you want to avoid known problems by performing
additional adjustments explained in the reference section.
- [:ref:`details<configmods_distros_bisref>`].
+ [:ref:`details <configmods_distros_bisref>`].
* If you want to influence other aspects of the configuration, do so now using
your preferred tool. Note, to use make targets like 'menuconfig' or
'nconfig', you will need to install the development files of ncurses; for
'xconfig' you likewise need the Qt5 or Qt6 headers.
- [:ref:`details<configmods_individual_bisref>`].
+ [:ref:`details <configmods_individual_bisref>`].
.. _saveconfig_bissbs:
@@ -488,7 +526,7 @@ Preparations: set up everything to build your own kernels
make olddefconfig
cp .config ~/kernel-config-working
- [:ref:`details<saveconfig_bisref>`]
+ [:ref:`details <saveconfig_bisref>`]
.. _introlatestcheck_bissbs:
@@ -498,16 +536,30 @@ Segment 1: try to reproduce the problem with the latest codebase
The following steps verify if the problem occurs with the code currently
supported by developers. In case you face a regression, it also checks that the
problem is not caused by some .config change, as reporting the issue then would
-be a waste of time. [:ref:`details<introlatestcheck_bisref>`]
+be a waste of time. [:ref:`details <introlatestcheck_bisref>`]
.. _checkoutmaster_bissbs:
-* Check out the latest Linux codebase::
+* Check out the latest Linux codebase.
- cd ~/linux/
- git checkout --force --detach mainline/master
+ * Are your 'good' and 'bad' versions from the same stable or longterm series?
+ Then check the `front page of kernel.org <https://kernel.org/>`_: if it
+ lists a release from that series without an '[EOL]' tag, checkout the series
+ latest version ('linux-6.1.y' in the following example)::
+
+ cd ~/linux/
+ git switch --discard-changes --detach stable/linux-6.1.y
+
+ Your series is unsupported, if is not listed or carrying a 'end of life'
+ tag. In that case you might want to check if a successor series (say
+ linux-6.2.y) or mainline (see next point) fix the bug.
- [:ref:`details<checkoutmaster_bisref>`]
+ * In all other cases, run::
+
+ cd ~/linux/
+ git switch --discard-changes --detach mainline/master
+
+ [:ref:`details <checkoutmaster_bisref>`]
.. _build_bissbs:
@@ -522,7 +574,7 @@ be a waste of time. [:ref:`details<introlatestcheck_bisref>`]
reference section for alternatives, which obviously will require other
steps to install as well.
- [:ref:`details<build_bisref>`]
+ [:ref:`details <build_bisref>`]
.. _install_bissbs:
@@ -555,7 +607,7 @@ be a waste of time. [:ref:`details<introlatestcheck_bisref>`]
down: if you will build more kernels as described in segment 2 and 3, you will
have to perform those again after executing ``command -v installkernel [...]``.
- [:ref:`details<install_bisref>`]
+ [:ref:`details <install_bisref>`]
.. _storagespace_bissbs:
@@ -568,7 +620,7 @@ be a waste of time. [:ref:`details<introlatestcheck_bisref>`]
Write down or remember those two values for later: they enable you to prevent
running out of disk space accidentally during a bisection.
- [:ref:`details<storagespace_bisref>`]
+ [:ref:`details <storagespace_bisref>`]
.. _kernelrelease_bissbs:
@@ -595,7 +647,7 @@ be a waste of time. [:ref:`details<introlatestcheck_bisref>`]
If that command does not return '0', check the reference section, as the cause
for this might interfere with your testing.
- [:ref:`details<tainted_bisref>`]
+ [:ref:`details <tainted_bisref>`]
.. _recheckbroken_bissbs:
@@ -603,21 +655,19 @@ be a waste of time. [:ref:`details<introlatestcheck_bisref>`]
out the instructions in the reference section to ensure nothing went sideways
during your tests.
- [:ref:`details<recheckbroken_bisref>`]
+ [:ref:`details <recheckbroken_bisref>`]
.. _recheckstablebroken_bissbs:
-* Are you facing a problem within a stable/longterm series, but failed to
- reproduce it with the mainline kernel you just built? One that according to
- the `front page of kernel.org <https://kernel.org/>`_ is still supported? Then
- check if the latest codebase for the particular series might already fix the
- problem. To do so, add the stable series Git branch for your 'good' kernel
- (again, this here is assumed to be 6.0) and check out the latest version::
+* Did you just built a stable or longterm kernel? And were you able to reproduce
+ the regression with it? Then you should test the latest mainline codebase as
+ well, because the result determines which developers the bug must be submitted
+ to.
+
+ To prepare that test, check out current mainline::
cd ~/linux/
- git remote set-branches --add stable linux-6.0.y
- git fetch stable
- git checkout --force --detach linux-6.0.y
+ git switch --discard-changes --detach mainline/master
Now use the checked out code to build and install another kernel using the
commands the earlier steps already described in more detail::
@@ -639,14 +689,16 @@ be a waste of time. [:ref:`details<introlatestcheck_bisref>`]
uname -r
cat /proc/sys/kernel/tainted
- Now verify if this kernel is showing the problem.
+ Now verify if this kernel is showing the problem. If it does, then you need
+ to report the bug to the primary developers; if it does not, report it to the
+ stable team. See Documentation/admin-guide/reporting-issues.rst for details.
- [:ref:`details<recheckstablebroken_bisref>`]
+ [:ref:`details <recheckstablebroken_bisref>`]
Do you follow this guide to verify if a problem is present in the code
currently supported by Linux kernel developers? Then you are done at this
point. If you later want to remove the kernel you just built, check out
-:ref:`Supplementary tasks: cleanup during and after following this guide<introclosure_bissbs>`.
+:ref:`Complementary tasks: cleanup during and after following this guide <introclosure_bissbs>`.
In case you face a regression, move on and execute at least the next segment
as well.
@@ -658,7 +710,7 @@ Segment 2: check if the kernels you build work fine
In case of a regression, you now want to ensure the trimmed configuration file
you created earlier works as expected; a bisection with the .config file
-otherwise would be a waste of time. [:ref:`details<introworkingcheck_bisref>`]
+otherwise would be a waste of time. [:ref:`details <introworkingcheck_bisref>`]
.. _recheckworking_bissbs:
@@ -669,7 +721,7 @@ otherwise would be a waste of time. [:ref:`details<introworkingcheck_bisref>`]
'good' (once again assumed to be 6.0 here)::
cd ~/linux/
- git checkout --detach v6.0
+ git switch --discard-changes --detach v6.0
Now use the checked out code to configure, build, and install another kernel
using the commands the previous subsection explained in more detail::
@@ -693,7 +745,7 @@ otherwise would be a waste of time. [:ref:`details<introworkingcheck_bisref>`]
Now check if this kernel works as expected; if not, consult the reference
section for further instructions.
- [:ref:`details<recheckworking_bisref>`]
+ [:ref:`details <recheckworking_bisref>`]
.. _introbisect_bissbs:
@@ -703,18 +755,11 @@ Segment 3: perform the bisection and validate the result
With all the preparations and precaution builds taken care of, you are now ready
to begin the bisection. This will make you build quite a few kernels -- usually
about 15 in case you encountered a regression when updating to a newer series
-(say from 6.0.11 to 6.1.3). But do not worry, due to the trimmed build
+(say from 6.0.13 to 6.1.5). But do not worry, due to the trimmed build
configuration created earlier this works a lot faster than many people assume:
overall on average it will often just take about 10 to 15 minutes to compile
each kernel on commodity x86 machines.
-* In case your 'bad' version is a stable/longterm release (say 6.1.5), add its
- stable branch, unless you already did so earlier::
-
- cd ~/linux/
- git remote set-branches --add stable linux-6.1.y
- git fetch stable
-
.. _bisectstart_bissbs:
* Start the bisection and tell Git about the versions earlier established as
@@ -725,7 +770,7 @@ each kernel on commodity x86 machines.
git bisect good v6.0
git bisect bad v6.1.5
- [:ref:`details<bisectstart_bisref>`]
+ [:ref:`details <bisectstart_bisref>`]
.. _bisectbuild_bissbs:
@@ -745,7 +790,7 @@ each kernel on commodity x86 machines.
If compilation fails for some reason, run ``git bisect skip`` and restart
executing the stack of commands from the beginning.
- In case you skipped the "test latest codebase" step in the guide, check its
+ In case you skipped the 'test latest codebase' step in the guide, check its
description as for why the 'df [...]' and 'make -s kernelrelease [...]'
commands are here.
@@ -754,7 +799,7 @@ each kernel on commodity x86 machines.
totally normal to see release identifiers like '6.0-rc1-local-gcafec0cacaca0'
if you bisect between versions 6.1 and 6.2 for example.
- [:ref:`details<bisectbuild_bisref>`]
+ [:ref:`details <bisectbuild_bisref>`]
.. _bisecttest_bissbs:
@@ -794,7 +839,7 @@ each kernel on commodity x86 machines.
might need to scroll up to see the message mentioning the culprit;
alternatively, run ``git bisect log > ~/bisection-log``.
- [:ref:`details<bisecttest_bisref>`]
+ [:ref:`details <bisecttest_bisref>`]
.. _bisectlog_bissbs:
@@ -806,7 +851,7 @@ each kernel on commodity x86 machines.
cp .config ~/bisection-config-culprit
git bisect reset
- [:ref:`details<bisectlog_bisref>`]
+ [:ref:`details <bisectlog_bisref>`]
.. _revert_bissbs:
@@ -823,16 +868,16 @@ each kernel on commodity x86 machines.
Begin by checking out the latest codebase depending on the range you bisected:
* Did you face a regression within a stable/longterm series (say between
- 6.0.11 and 6.0.13) that does not happen in mainline? Then check out the
+ 6.0.13 and 6.0.15) that does not happen in mainline? Then check out the
latest codebase for the affected series like this::
git fetch stable
- git checkout --force --detach linux-6.0.y
+ git switch --discard-changes --detach linux-6.0.y
* In all other cases check out latest mainline::
git fetch mainline
- git checkout --force --detach mainline/master
+ git switch --discard-changes --detach mainline/master
If you bisected a regression within a stable/longterm series that also
happens in mainline, there is one more thing to do: look up the mainline
@@ -846,27 +891,33 @@ each kernel on commodity x86 machines.
git revert --no-edit cafec0cacaca0
- If that fails, give up trying and move on to the next step. But if it works,
- build a kernel again using the familiar command sequence::
+ If that fails, give up trying and move on to the next step; if it works,
+ adjust the tag to facilitate the identification and prevent accidentally
+ overwriting another kernel::
cp ~/kernel-config-working .config
+ ./scripts/config --set-str CONFIG_LOCALVERSION '-local-cafec0cacaca0-reverted'
+
+ Build a kernel using the familiar command sequence, just without copying the
+ the base .config over::
+
make olddefconfig &&
- make -j $(nproc --all) &&
+ make -j $(nproc --all)
# * Check if the free space suffices holding another kernel:
df -h /boot/ /lib/modules/
sudo make modules_install
command -v installkernel && sudo make install
- Make -s kernelrelease | tee -a ~/kernels-built
+ make -s kernelrelease | tee -a ~/kernels-built
reboot
- Now check one last time if the feature that made you perform a bisection work
- with that kernel.
+ Now check one last time if the feature that made you perform a bisection works
+ with that kernel: if everything went well, it should not show the regression.
- [:ref:`details<revert_bisref>`]
+ [:ref:`details <revert_bisref>`]
.. _introclosure_bissbs:
-Supplementary tasks: cleanup during and after the bisection
+Complementary tasks: cleanup during and after the bisection
-----------------------------------------------------------
During and after following this guide you might want or need to remove some of
@@ -903,7 +954,7 @@ space might run out.
kernel image and related files behind; in that case remove them as described
in the reference section.
- [:ref:`details<makeroom_bisref>`]
+ [:ref:`details <makeroom_bisref>`]
.. _finishingtouch_bissbs:
@@ -926,18 +977,99 @@ space might run out.
the version considered 'good', and the last three or four you compiled
during the actual bisection process.
- [:ref:`details<finishingtouch_bisref>`]
+ [:ref:`details <finishingtouch_bisref>`]
+
+.. _introoptional_bissbs:
+
+Optional: test reverts, patches, or later versions
+--------------------------------------------------
+
+While or after reporting a bug, you might want or potentially will be asked to
+test reverts, debug patches, proposed fixes, or other versions. In that case
+follow these instructions.
+
+* Update your Git clone and check out the latest code.
+
+ * In case you want to test mainline, fetch its latest changes before checking
+ its code out::
+
+ git fetch mainline
+ git switch --discard-changes --detach mainline/master
+
+ * In case you want to test a stable or longterm kernel, first add the branch
+ holding the series you are interested in (6.2 in the example), unless you
+ already did so earlier::
+
+ git remote set-branches --add stable linux-6.2.y
+
+ Then fetch the latest changes and check out the latest version from the
+ series::
+
+ git fetch stable
+ git switch --discard-changes --detach stable/linux-6.2.y
+
+* Copy your kernel build configuration over::
+
+ cp ~/kernel-config-working .config
+
+* Your next step depends on what you want to do:
+
+ * In case you just want to test the latest codebase, head to the next step,
+ you are already all set.
+
+ * In case you want to test if a revert fixes an issue, revert one or multiple
+ changes by specifying their commit ids::
+
+ git revert --no-edit cafec0cacaca0
+
+ Now give that kernel a special tag to facilitates its identification and
+ prevent accidentally overwriting another kernel::
+
+ ./scripts/config --set-str CONFIG_LOCALVERSION '-local-cafec0cacaca0-reverted'
+
+ * In case you want to test a patch, store the patch in a file like
+ '/tmp/foobars-proposed-fix-v1.patch' and apply it like this::
+
+ git apply /tmp/foobars-proposed-fix-v1.patch
+
+ In case of multiple patches, repeat this step with the others.
+
+ Now give that kernel a special tag to facilitates its identification and
+ prevent accidentally overwriting another kernel::
+
+ ./scripts/config --set-str CONFIG_LOCALVERSION '-local-foobars-fix-v1'
+
+* Build a kernel using the familiar commands, just without copying the kernel
+ build configuration over, as that has been taken care of already::
+
+ make olddefconfig &&
+ make -j $(nproc --all)
+ # * Check if the free space suffices holding another kernel:
+ df -h /boot/ /lib/modules/
+ sudo make modules_install
+ command -v installkernel && sudo make install
+ make -s kernelrelease | tee -a ~/kernels-built
+ reboot
+
+* Now verify you booted the newly built kernel and check it.
+
+[:ref:`details <introoptional_bisref>`]
.. _submit_improvements:
-This concludes the step-by-step guide.
+Conclusion
+----------
+
+You have reached the end of the step-by-step guide.
Did you run into trouble following any of the above steps not cleared up by the
reference section below? Did you spot errors? Or do you have ideas how to
-improve the guide? Then please take a moment and let the maintainer of this
+improve the guide?
+
+If any of that applies, please take a moment and let the maintainer of this
document know by email (Thorsten Leemhuis <linux@leemhuis.info>), ideally while
CCing the Linux docs mailing list (linux-doc@vger.kernel.org). Such feedback is
-vital to improve this document further, which is in everybody's interest, as it
+vital to improve this text further, which is in everybody's interest, as it
will enable more people to master the task described here -- and hopefully also
improve similar guides inspired by this one.
@@ -948,10 +1080,20 @@ Reference section for the step-by-step guide
This section holds additional information for almost all the items in the above
step-by-step guide.
+Preparations for building your own kernels
+------------------------------------------
+
+ *The steps in this section lay the groundwork for all further tests.*
+ [:ref:`... <introprep_bissbs>`]
+
+The steps in all later sections of this guide depend on those described here.
+
+[:ref:`back to step-by-step guide <introprep_bissbs>`].
+
.. _backup_bisref:
Prepare for emergencies
------------------------
+~~~~~~~~~~~~~~~~~~~~~~~
*Create a fresh backup and put system repair and restore tools at hand.*
[:ref:`... <backup_bissbs>`]
@@ -966,7 +1108,7 @@ for something going sideways, even if that should not happen.
.. _vanilla_bisref:
Remove anything related to externally maintained kernel modules
----------------------------------------------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*Remove all software that depends on externally developed kernel drivers or
builds them automatically.* [:ref:`...<vanilla_bissbs>`]
@@ -984,7 +1126,7 @@ explains in more detail.
.. _secureboot_bisref:
Deal with techniques like Secure Boot
--------------------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*On platforms with 'Secure Boot' or similar techniques, prepare everything to
ensure the system will permit your self-compiled kernel to boot later.*
@@ -1021,7 +1163,7 @@ Afterwards, permit MokManager to reboot the machine.
.. _bootworking_bisref:
Boot the last kernel that was working
--------------------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*Boot into the last working kernel and briefly recheck if the feature that
regressed really works.* [:ref:`...<bootworking_bissbs>`]
@@ -1034,7 +1176,7 @@ the right thing.
.. _diskspace_bisref:
Space requirements
-------------------
+~~~~~~~~~~~~~~~~~~
*Ensure to have enough free space for building Linux.*
[:ref:`... <diskspace_bissbs>`]
@@ -1052,32 +1194,32 @@ space by quite a few gigabytes.
.. _rangecheck_bisref:
Bisection range
----------------
+~~~~~~~~~~~~~~~
*Determine the kernel versions considered 'good' and 'bad' throughout this
guide.* [:ref:`...<rangecheck_bissbs>`]
Establishing the range of commits to be checked is mostly straightforward,
except when a regression occurred when switching from a release of one stable
-series to a release of a later series (e.g. from 6.0.11 to 6.1.4). In that case
+series to a release of a later series (e.g. from 6.0.13 to 6.1.5). In that case
Git will need some hand holding, as there is no straight line of descent.
That's because with the release of 6.0 mainline carried on to 6.1 while the
stable series 6.0.y branched to the side. It's therefore theoretically possible
-that the issue you face with 6.1.4 only worked in 6.0.11, as it was fixed by a
+that the issue you face with 6.1.5 only worked in 6.0.13, as it was fixed by a
commit that went into one of the 6.0.y releases, but never hit mainline or the
6.1.y series. Thankfully that normally should not happen due to the way the
stable/longterm maintainers maintain the code. It's thus pretty safe to assume
6.0 as a 'good' kernel. That assumption will be tested anyway, as that kernel
will be built and tested in the segment '2' of this guide; Git would force you
-to do this as well, if you tried bisecting between 6.0.11 and 6.1.13.
+to do this as well, if you tried bisecting between 6.0.13 and 6.1.15.
[:ref:`back to step-by-step guide <rangecheck_bissbs>`]
.. _buildrequires_bisref:
Install build requirements
---------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~
*Install all software required to build a Linux kernel.*
[:ref:`...<buildrequires_bissbs>`]
@@ -1117,7 +1259,7 @@ These commands install a few packages that are often, but not always needed. You
for example might want to skip installing the development headers for ncurses,
which you will only need in case you later might want to adjust the kernel build
configuration using make the targets 'menuconfig' or 'nconfig'; likewise omit
-the headers of Qt6 is you do not plan to adjust the .config using 'xconfig'.
+the headers of Qt6 if you do not plan to adjust the .config using 'xconfig'.
You furthermore might need additional libraries and their development headers
for tasks not covered in this guide -- for example when building utilities from
@@ -1128,7 +1270,7 @@ the kernel's tools/ directory.
.. _sources_bisref:
Download the sources using Git
-------------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*Retrieve the Linux mainline sources.*
[:ref:`...<sources_bissbs>`]
@@ -1148,7 +1290,7 @@ work better for you:
.. _sources_bundle_bisref:
Downloading Linux mainline sources using a bundle
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+"""""""""""""""""""""""""""""""""""""""""""""""""
Use the following commands to retrieve the Linux mainline sources using a
bundle::
@@ -1184,7 +1326,7 @@ First, execute the following command to retrieve the latest mainline codebase::
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
Now deepen your clone's history to the second predecessor of the mainline
-release of your 'good' version. In case the latter are 6.0 or 6.0.11, 5.19 would
+release of your 'good' version. In case the latter are 6.0 or 6.0.13, 5.19 would
be the first predecessor and 5.18 the second -- hence deepen the history up to
that version::
@@ -1219,7 +1361,7 @@ Note, shallow clones have a few peculiar characteristics:
.. _oldconfig_bisref:
Start defining the build configuration for your kernel
-------------------------------------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*Start preparing a kernel build configuration (the '.config' file).*
[:ref:`... <oldconfig_bissbs>`]
@@ -1279,7 +1421,7 @@ that file to the build machine and store it as ~/linux/.config; afterwards run
.. _localmodconfig_bisref:
Trim the build configuration for your kernel
---------------------------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*Disable any kernel modules apparently superfluous for your setup.*
[:ref:`... <localmodconfig_bissbs>`]
@@ -1328,7 +1470,7 @@ step-by-step guide mentions::
.. _tagging_bisref:
Tag the kernels about to be build
----------------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*Ensure all the kernels you will build are clearly identifiable using a
special tag and a unique version identifier.* [:ref:`... <tagging_bissbs>`]
@@ -1344,7 +1486,7 @@ confusing during the bisection.
.. _debugsymbols_bisref:
Decide to enable or disable debug symbols
------------------------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*Decide how to handle debug symbols.* [:ref:`... <debugsymbols_bissbs>`]
@@ -1373,7 +1515,7 @@ explains this process in more detail.
.. _configmods_bisref:
Adjust build configuration
---------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~
*Check if you may want or need to adjust some other kernel configuration
options:*
@@ -1384,7 +1526,7 @@ kernel configuration options.
.. _configmods_distros_bisref:
Distro specific adjustments
-~~~~~~~~~~~~~~~~~~~~~~~~~~~
+"""""""""""""""""""""""""""
*Are you running* [:ref:`... <configmods_bissbs>`]
@@ -1409,7 +1551,7 @@ when following this guide on a few commodity distributions.
.. _configmods_individual_bisref:
Individual adjustments
-~~~~~~~~~~~~~~~~~~~~~~
+""""""""""""""""""""""
*If you want to influence the other aspects of the configuration, do so
now.* [:ref:`... <configmods_bissbs>`]
@@ -1426,13 +1568,13 @@ is missing.
.. _saveconfig_bisref:
Put the .config file aside
---------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~
*Reprocess the .config after the latest changes and store it in a safe place.*
[:ref:`... <saveconfig_bissbs>`]
Put the .config you prepared aside, as you want to copy it back to the build
-directory every time during this guide before you start building another
+directory every time during this guide before you start building another
kernel. That's because going back and forth between different versions can alter
.config files in odd ways; those occasionally cause side effects that could
confuse testing or in some cases render the result of your bisection
@@ -1442,8 +1584,8 @@ meaningless.
.. _introlatestcheck_bisref:
-Try to reproduce the regression
------------------------------------------
+Try to reproduce the problem with the latest codebase
+-----------------------------------------------------
*Verify the regression is not caused by some .config change and check if it
still occurs with the latest codebase.* [:ref:`... <introlatestcheck_bissbs>`]
@@ -1490,28 +1632,28 @@ highly recommended for these reasons:
Your report might be ignored if you send it to the wrong party -- and even
when you get a reply there is a decent chance that developers tell you to
- evaluate which of the two cases it is before they take a closer look.
+ evaluate which of the two cases it is before they take a closer look.
[:ref:`back to step-by-step guide <introlatestcheck_bissbs>`]
.. _checkoutmaster_bisref:
Check out the latest Linux codebase
------------------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*Check out the latest Linux codebase.*
- [:ref:`... <introlatestcheck_bissbs>`]
+ [:ref:`... <checkoutmaster_bissbs>`]
In case you later want to recheck if an ever newer codebase might fix the
problem, remember to run that ``git fetch --shallow-exclude [...]`` command
again mentioned earlier to update your local Git repository.
-[:ref:`back to step-by-step guide <introlatestcheck_bissbs>`]
+[:ref:`back to step-by-step guide <checkoutmaster_bissbs>`]
.. _build_bisref:
Build your kernel
------------------
+~~~~~~~~~~~~~~~~~
*Build the image and the modules of your first kernel using the config file
you prepared.* [:ref:`... <build_bissbs>`]
@@ -1521,7 +1663,7 @@ yourself. Another subsection explains how to directly package your kernel up as
deb, rpm or tar file.
Dealing with build errors
-~~~~~~~~~~~~~~~~~~~~~~~~~
+"""""""""""""""""""""""""
When a build error occurs, it might be caused by some aspect of your machine's
setup that often can be fixed quickly; other times though the problem lies in
@@ -1552,11 +1694,11 @@ by modifying your search terms or using another line from the error messages.
In the end, most issues you run into have likely been encountered and
reported by others already. That includes issues where the cause is not your
-system, but lies in the code. If you run into one of those, you might thus find a
-solution (e.g. a patch) or workaround for your issue, too.
+system, but lies in the code. If you run into one of those, you might thus find
+a solution (e.g. a patch) or workaround for your issue, too.
Package your kernel up
-~~~~~~~~~~~~~~~~~~~~~~
+""""""""""""""""""""""
The step-by-step guide uses the default make targets (e.g. 'bzImage' and
'modules' on x86) to build the image and the modules of your kernel, which later
@@ -1587,7 +1729,7 @@ distribution's kernel packages.
.. _install_bisref:
Put the kernel in place
------------------------
+~~~~~~~~~~~~~~~~~~~~~~~
*Install the kernel you just built.* [:ref:`... <install_bissbs>`]
@@ -1630,7 +1772,7 @@ process. Afterwards add your kernel to your bootloader configuration and reboot.
.. _storagespace_bisref:
Storage requirements per kernel
--------------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*Check how much storage space the kernel, its modules, and other related files
like the initramfs consume.* [:ref:`... <storagespace_bissbs>`]
@@ -1651,7 +1793,7 @@ need to look in different places.
.. _tainted_bisref:
Check if your newly built kernel considers itself 'tainted'
------------------------------------------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*Check if the kernel marked itself as 'tainted'.*
[:ref:`... <tainted_bissbs>`]
@@ -1670,7 +1812,7 @@ interest, as your testing might be flawed otherwise.
.. _recheckbroken_bisref:
Check the kernel built from a recent mainline codebase
-------------------------------------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*Verify if your bug occurs with the newly built kernel.*
[:ref:`... <recheckbroken_bissbs>`]
@@ -1696,7 +1838,7 @@ the kernel you built from the latest codebase. These are the most frequent:
.. _recheckstablebroken_bisref:
Check the kernel built from the latest stable/longterm codebase
----------------------------------------------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*Are you facing a regression within a stable/longterm release, but failed to
reproduce it with the kernel you just built using the latest mainline sources?
@@ -1741,7 +1883,7 @@ ensure the kernel version you assumed to be 'good' earlier in the process (e.g.
.. _recheckworking_bisref:
Build your own version of the 'good' kernel
--------------------------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*Build your own variant of the working kernel and check if the feature that
regressed works as expected with it.* [:ref:`... <recheckworking_bissbs>`]
@@ -1767,15 +1909,25 @@ multitude of reasons why this might happen. Some ideas where to look:
Note, if you found and fixed problems with the .config file, you want to use it
to build another kernel from the latest codebase, as your earlier tests with
-mainline and the latest version from an affected stable/longterm series were most
-likely flawed.
+mainline and the latest version from an affected stable/longterm series were
+most likely flawed.
[:ref:`back to step-by-step guide <recheckworking_bissbs>`]
+Perform a bisection and validate the result
+-------------------------------------------
+
+ *With all the preparations and precaution builds taken care of, you are now
+ ready to begin the bisection.* [:ref:`... <introbisect_bissbs>`]
+
+The steps in this segment perform and validate the bisection.
+
+[:ref:`back to step-by-step guide <introbisect_bissbs>`].
+
.. _bisectstart_bisref:
Start the bisection
--------------------
+~~~~~~~~~~~~~~~~~~~
*Start the bisection and tell Git about the versions earlier established as
'good' and 'bad'.* [:ref:`... <bisectstart_bissbs>`]
@@ -1789,7 +1941,7 @@ for you to test.
.. _bisectbuild_bisref:
Build a kernel from the bisection point
----------------------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*Build, install, and boot a kernel from the code Git checked out using the
same commands you used earlier.* [:ref:`... <bisectbuild_bissbs>`]
@@ -1817,7 +1969,7 @@ There are two things worth of note here:
.. _bisecttest_bisref:
Bisection checkpoint
---------------------
+~~~~~~~~~~~~~~~~~~~~
*Check if the feature that regressed works in the kernel you just built.*
[:ref:`... <bisecttest_bissbs>`]
@@ -1831,7 +1983,7 @@ will be for nothing.
.. _bisectlog_bisref:
Put the bisection log away
---------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~
*Store Git's bisection log and the current .config file in a safe place.*
[:ref:`... <bisectlog_bissbs>`]
@@ -1851,7 +2003,7 @@ ask for it after you report the regression.
.. _revert_bisref:
Try reverting the culprit
--------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~
*Try reverting the culprit on top of the latest codebase to see if this fixes
your regression.* [:ref:`... <revert_bissbs>`]
@@ -1869,14 +2021,20 @@ succeeds, test that kernel version instead.
[:ref:`back to step-by-step guide <revert_bissbs>`]
+Cleanup steps during and after following this guide
+---------------------------------------------------
-Supplementary tasks: cleanup during and after the bisection
------------------------------------------------------------
+ *During and after following this guide you might want or need to remove some
+ of the kernels you installed.* [:ref:`... <introclosure_bissbs>`]
+
+The steps in this section describe clean-up procedures.
+
+[:ref:`back to step-by-step guide <introclosure_bissbs>`].
.. _makeroom_bisref:
Cleaning up during the bisection
---------------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*To remove one of the kernels you installed, look up its 'kernelrelease'
identifier.* [:ref:`... <makeroom_bissbs>`]
@@ -1911,13 +2069,13 @@ Now remove the boot entry for the kernel from your bootloader's configuration;
the steps to do that vary quite a bit between Linux distributions.
Note, be careful with wildcards like '*' when deleting files or directories
-for kernels manually: you might accidentally remove files of a 6.0.11 kernel
+for kernels manually: you might accidentally remove files of a 6.0.13 kernel
when all you want is to remove 6.0 or 6.0.1.
[:ref:`back to step-by-step guide <makeroom_bissbs>`]
Cleaning up after the bisection
--------------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. _finishingtouch_bisref:
@@ -1932,26 +2090,105 @@ build artifacts and the Linux sources, but will leave the Git repository
(~/linux/.git/) behind -- a simple ``git reset --hard`` thus will bring the
sources back.
-Removing the repository as well would likely be unwise at this point: there is a
-decent chance developers will ask you to build another kernel to perform
-additional tests. This is often required to debug an issue or check proposed
-fixes. Before doing so you want to run the ``git fetch mainline`` command again
-followed by ``git checkout mainline/master`` to bring your clone up to date and
-checkout the latest codebase. Then apply the patch using ``git apply
-<filename>`` or ``git am <filename>`` and build yet another kernel using the
-familiar commands.
+Removing the repository as well would likely be unwise at this point: there
+is a decent chance developers will ask you to build another kernel to
+perform additional tests -- like testing a debug patch or a proposed fix.
+Details on how to perform those can be found in the section :ref:`Optional
+tasks: test reverts, patches, or later versions <introoptional_bissbs>`.
Additional tests are also the reason why you want to keep the
~/kernel-config-working file around for a few weeks.
[:ref:`back to step-by-step guide <finishingtouch_bissbs>`]
+.. _introoptional_bisref:
-Additional reading material
-===========================
+Test reverts, patches, or later versions
+----------------------------------------
+
+ *While or after reporting a bug, you might want or potentially will be asked
+ to test reverts, patches, proposed fixes, or other versions.*
+ [:ref:`... <introoptional_bissbs>`]
+
+All the commands used in this section should be pretty straight forward, so
+there is not much to add except one thing: when setting a kernel tag as
+instructed, ensure it is not much longer than the one used in the example, as
+problems will arise if the kernelrelease identifier exceeds 63 characters.
+
+[:ref:`back to step-by-step guide <introoptional_bissbs>`].
+
+
+Additional information
+======================
+
+.. _buildhost_bis:
+
+Build kernels on a different machine
+------------------------------------
+
+To compile kernels on another system, slightly alter the step-by-step guide's
+instructions:
+
+* Start following the guide on the machine where you want to install and test
+ the kernels later.
+
+* After executing ':ref:`Boot into the working kernel and briefly use the
+ apparently broken feature <bootworking_bissbs>`', save the list of loaded
+ modules to a file using ``lsmod > ~/test-machine-lsmod``. Then locate the
+ build configuration for the running kernel (see ':ref:`Start defining the
+ build configuration for your kernel <oldconfig_bisref>`' for hints on where
+ to find it) and store it as '~/test-machine-config-working'. Transfer both
+ files to the home directory of your build host.
+
+* Continue the guide on the build host (e.g. with ':ref:`Ensure to have enough
+ free space for building [...] <diskspace_bissbs>`').
+
+* When you reach ':ref:`Start preparing a kernel build configuration[...]
+ <oldconfig_bissbs>`': before running ``make olddefconfig`` for the first time,
+ execute the following command to base your configuration on the one from the
+ test machine's 'working' kernel::
+
+ cp ~/test-machine-config-working ~/linux/.config
+
+* During the next step to ':ref:`disable any apparently superfluous kernel
+ modules <localmodconfig_bissbs>`' use the following command instead::
-Further sources
----------------
+ yes '' | make localmodconfig LSMOD=~/lsmod_foo-machine localmodconfig
+
+* Continue the guide, but ignore the instructions outlining how to compile,
+ install, and reboot into a kernel every time they come up. Instead build
+ like this::
+
+ cp ~/kernel-config-working .config
+ make olddefconfig &&
+ make -j $(nproc --all) targz-pkg
+
+ This will generate a gzipped tar file whose name is printed in the last
+ line shown; for example, a kernel with the kernelrelease identifier
+ '6.0.0-rc1-local-g928a87efa423' built for x86 machines usually will
+ be stored as '~/linux/linux-6.0.0-rc1-local-g928a87efa423-x86.tar.gz'.
+
+ Copy that file to your test machine's home directory.
+
+* Switch to the test machine to check if you have enough space to hold another
+ kernel. Then extract the file you transferred::
+
+ sudo tar -xvzf ~/linux-6.0.0-rc1-local-g928a87efa423-x86.tar.gz -C /
+
+ Afterwards :ref:`generate the initramfs and add the kernel to your boot
+ loader's configuration <install_bisref>`; on some distributions the following
+ command will take care of both these tasks::
+
+ sudo /sbin/installkernel 6.0.0-rc1-local-g928a87efa423 /boot/vmlinuz-6.0.0-rc1-local-g928a87efa423
+
+ Now reboot and ensure you started the intended kernel.
+
+This approach even works when building for another architecture: just install
+cross-compilers and add the appropriate parameters to every invocation of make
+(e.g. ``make ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- [...]``).
+
+Additional reading material
+---------------------------
* The `man page for 'git bisect' <https://git-scm.com/docs/git-bisect>`_ and
`fighting regressions with 'git bisect' <https://git-scm.com/docs/git-bisect-lk2009.html>`_
diff --git a/Documentation/arch/x86/resctrl.rst b/Documentation/arch/x86/resctrl.rst
index 3712d81cb50c67..6c245582d8fb16 100644
--- a/Documentation/arch/x86/resctrl.rst
+++ b/Documentation/arch/x86/resctrl.rst
@@ -574,7 +574,7 @@ Memory b/w domain is L3 cache.
MB:<cache_id0>=bandwidth0;<cache_id1>=bandwidth1;...
Memory bandwidth Allocation specified in MiBps
----------------------------------------------
+----------------------------------------------
Memory bandwidth domain is L3 cache.
::
diff --git a/Documentation/core-api/floating-point.rst b/Documentation/core-api/floating-point.rst
new file mode 100644
index 00000000000000..a8d0d4b050529f
--- /dev/null
+++ b/Documentation/core-api/floating-point.rst
@@ -0,0 +1,78 @@
+.. SPDX-License-Identifier: GPL-2.0+
+
+Floating-point API
+==================
+
+Kernel code is normally prohibited from using floating-point (FP) registers or
+instructions, including the C float and double data types. This rule reduces
+system call overhead, because the kernel does not need to save and restore the
+userspace floating-point register state.
+
+However, occasionally drivers or library functions may need to include FP code.
+This is supported by isolating the functions containing FP code to a separate
+translation unit (a separate source file), and saving/restoring the FP register
+state around calls to those functions. This creates "critical sections" of
+floating-point usage.
+
+The reason for this isolation is to prevent the compiler from generating code
+touching the FP registers outside these critical sections. Compilers sometimes
+use FP registers to optimize inlined ``memcpy`` or variable assignment, as
+floating-point registers may be wider than general-purpose registers.
+
+Usability of floating-point code within the kernel is architecture-specific.
+Additionally, because a single kernel may be configured to support platforms
+both with and without a floating-point unit, FPU availability must be checked
+both at build time and at run time.
+
+Several architectures implement the generic kernel floating-point API from
+``linux/fpu.h``, as described below. Some other architectures implement their
+own unique APIs, which are documented separately.
+
+Build-time API
+--------------
+
+Floating-point code may be built if the option ``ARCH_HAS_KERNEL_FPU_SUPPORT``
+is enabled. For C code, such code must be placed in a separate file, and that
+file must have its compilation flags adjusted using the following pattern::
+
+ CFLAGS_foo.o += $(CC_FLAGS_FPU)
+ CFLAGS_REMOVE_foo.o += $(CC_FLAGS_NO_FPU)
+
+Architectures are expected to define one or both of these variables in their
+top-level Makefile as needed. For example::
+
+ CC_FLAGS_FPU := -mhard-float
+
+or::
+
+ CC_FLAGS_NO_FPU := -msoft-float
+
+Normal kernel code is assumed to use the equivalent of ``CC_FLAGS_NO_FPU``.
+
+Runtime API
+-----------
+
+The runtime API is provided in ``linux/fpu.h``. This header cannot be included
+from files implementing FP code (those with their compilation flags adjusted as
+above). Instead, it must be included when defining the FP critical sections.
+
+.. c:function:: bool kernel_fpu_available( void )
+
+ This function reports if floating-point code can be used on this CPU or
+ platform. The value returned by this function is not expected to change
+ at runtime, so it only needs to be called once, not before every
+ critical section.
+
+.. c:function:: void kernel_fpu_begin( void )
+ void kernel_fpu_end( void )
+
+ These functions create a floating-point critical section. It is only
+ valid to call ``kernel_fpu_begin()`` after a previous call to
+ ``kernel_fpu_available()`` returned ``true``. These functions are only
+ guaranteed to be callable from (preemptible or non-preemptible) process
+ context.
+
+ Preemption may be disabled inside critical sections, so their size
+ should be minimized. They are *not* required to be reentrant. If the
+ caller expects to nest critical sections, it must implement its own
+ reference counting.
diff --git a/Documentation/core-api/index.rst b/Documentation/core-api/index.rst
index 7a3a08d81f111c..974beccd671f64 100644
--- a/Documentation/core-api/index.rst
+++ b/Documentation/core-api/index.rst
@@ -48,6 +48,7 @@ Library functionality that is used throughout the kernel.
errseq
wrappers/atomic_t
wrappers/atomic_bitops
+ floating-point
Low level entry and exit
========================
diff --git a/Documentation/dev-tools/testing-overview.rst b/Documentation/dev-tools/testing-overview.rst
index 0aaf6ea53608fc..1619e5e5cc9c4b 100644
--- a/Documentation/dev-tools/testing-overview.rst
+++ b/Documentation/dev-tools/testing-overview.rst
@@ -104,6 +104,8 @@ Some of these tools are listed below:
KASAN and can be used in production. See Documentation/dev-tools/kfence.rst
* lockdep is a locking correctness validator. See
Documentation/locking/lockdep-design.rst
+* Runtime Verification (RV) supports checking specific behaviours for a given
+ subsystem. See Documentation/trace/rv/runtime-verification.rst
* There are several other pieces of debug instrumentation in the kernel, many
of which can be found in lib/Kconfig.debug
diff --git a/Documentation/devicetree/bindings/Makefile b/Documentation/devicetree/bindings/Makefile
index 5e08e3a6a97b10..8cdda477987fd6 100644
--- a/Documentation/devicetree/bindings/Makefile
+++ b/Documentation/devicetree/bindings/Makefile
@@ -32,16 +32,18 @@ find_cmd = $(find_all_cmd) | \
sed 's|^$(srctree)/||' | \
grep -F -e "$(subst :," -e ",$(DT_SCHEMA_FILES))" | \
sed 's|^|$(srctree)/|'
-CHK_DT_DOCS := $(shell $(find_cmd))
+CHK_DT_EXAMPLES := $(patsubst $(srctree)/%.yaml,%.example.dtb, $(shell $(find_cmd)))
quiet_cmd_yamllint = LINT $(src)
cmd_yamllint = ($(find_cmd) | \
xargs -n200 -P$$(nproc) \
- $(DT_SCHEMA_LINT) -f parsable -c $(srctree)/$(src)/.yamllint >&2) || true
+ $(DT_SCHEMA_LINT) -f parsable -c $(srctree)/$(src)/.yamllint >&2) \
+ && touch $@ || true
-quiet_cmd_chk_bindings = CHKDT $@
+quiet_cmd_chk_bindings = CHKDT $(src)
cmd_chk_bindings = ($(find_cmd) | \
- xargs -n200 -P$$(nproc) $(DT_DOC_CHECKER) -u $(srctree)/$(src)) || true
+ xargs -n200 -P$$(nproc) $(DT_DOC_CHECKER) -u $(srctree)/$(src)) \
+ && touch $@ || true
quiet_cmd_mk_schema = SCHEMA $@
cmd_mk_schema = f=$$(mktemp) ; \
@@ -49,12 +51,6 @@ quiet_cmd_mk_schema = SCHEMA $@
$(DT_MK_SCHEMA) -j $(DT_MK_SCHEMA_FLAGS) @$$f > $@ ; \
rm -f $$f
-define rule_chkdt
- $(if $(DT_SCHEMA_LINT),$(call cmd,yamllint),)
- $(call cmd,chk_bindings)
- $(call cmd,mk_schema)
-endef
-
DT_DOCS = $(patsubst $(srctree)/%,%,$(shell $(find_all_cmd)))
override DTC_FLAGS := \
@@ -64,12 +60,19 @@ override DTC_FLAGS := \
-Wno-unique_unit_address \
-Wunique_unit_address_if_enabled
-$(obj)/processed-schema.json: $(DT_DOCS) $(src)/.yamllint check_dtschema_version FORCE
- $(call if_changed_rule,chkdt)
+$(obj)/processed-schema.json: $(DT_DOCS) check_dtschema_version FORCE
+ $(call if_changed,mk_schema)
+
+targets += .dt-binding.checked .yamllint.checked
+$(obj)/.yamllint.checked: $(DT_DOCS) $(src)/.yamllint FORCE
+ $(if $(DT_SCHEMA_LINT),$(call if_changed,yamllint),)
+
+$(obj)/.dt-binding.checked: $(DT_DOCS) FORCE
+ $(call if_changed,chk_bindings)
always-y += processed-schema.json
-always-$(CHECK_DT_BINDING) += $(patsubst $(srctree)/$(src)/%.yaml,%.example.dts, $(CHK_DT_DOCS))
-always-$(CHECK_DT_BINDING) += $(patsubst $(srctree)/$(src)/%.yaml,%.example.dtb, $(CHK_DT_DOCS))
+targets += $(patsubst $(obj)/%,%, $(CHK_DT_EXAMPLES))
+targets += $(patsubst $(obj)/%.dtb,%.dts, $(CHK_DT_EXAMPLES))
# Hack: avoid 'Argument list too long' error for 'make clean'. Remove most of
# build artifacts here before they are processed by scripts/Makefile.clean
@@ -78,3 +81,6 @@ clean-files = $(shell find $(obj) \( -name '*.example.dts' -o \
dt_compatible_check: $(obj)/processed-schema.json
$(Q)$(srctree)/scripts/dtc/dt-extract-compatibles $(srctree) | xargs dt-check-compatible -v -s $<
+
+PHONY += dt_binding_check
+dt_binding_check: $(obj)/.dt-binding.checked $(obj)/.yamllint.checked $(CHK_DT_EXAMPLES)
diff --git a/Documentation/devicetree/bindings/arm/amlogic.yaml b/Documentation/devicetree/bindings/arm/amlogic.yaml
index 949537cea6be23..a374b98080feac 100644
--- a/Documentation/devicetree/bindings/arm/amlogic.yaml
+++ b/Documentation/devicetree/bindings/arm/amlogic.yaml
@@ -157,6 +157,7 @@ properties:
items:
- enum:
- bananapi,bpi-cm4io
+ - mntre,reform2-cm4
- const: bananapi,bpi-cm4
- const: amlogic,a311d
- const: amlogic,g12b
@@ -201,6 +202,18 @@ properties:
- amlogic,ad402
- const: amlogic,a1
+ - description: Boards with the Amlogic A4 A113L2 SoC
+ items:
+ - enum:
+ - amlogic,ba400
+ - const: amlogic,a4
+
+ - description: Boards with the Amlogic A5 A113X2 SoC
+ items:
+ - enum:
+ - amlogic,av400
+ - const: amlogic,a5
+
- description: Boards with the Amlogic C3 C302X/C308L SoC
items:
- enum:
diff --git a/Documentation/devicetree/bindings/arm/aspeed/aspeed.yaml b/Documentation/devicetree/bindings/arm/aspeed/aspeed.yaml
index 749ee54a3ff83a..9a47c4ed4d17b3 100644
--- a/Documentation/devicetree/bindings/arm/aspeed/aspeed.yaml
+++ b/Documentation/devicetree/bindings/arm/aspeed/aspeed.yaml
@@ -35,7 +35,10 @@ properties:
- ampere,mtjade-bmc
- aspeed,ast2500-evb
- asrock,e3c246d4i-bmc
+ - asrock,e3c256d4i-bmc
- asrock,romed8hm3-bmc
+ - asrock,spc621d8hm3-bmc
+ - asrock,x570d4u-bmc
- bytedance,g220a-bmc
- facebook,cmm-bmc
- facebook,minipack-bmc
@@ -74,15 +77,18 @@ properties:
- ampere,mtmitchell-bmc
- aspeed,ast2600-evb
- aspeed,ast2600-evb-a1
+ - asus,x4tf
- facebook,bletchley-bmc
- facebook,cloudripper-bmc
- facebook,elbert-bmc
- facebook,fuji-bmc
- facebook,greatlakes-bmc
+ - facebook,harma-bmc
- facebook,minerva-cmc
- facebook,yosemite4-bmc
- ibm,everest-bmc
- ibm,rainier-bmc
+ - ibm,system1-bmc
- ibm,tacoma-bmc
- inventec,starscream-bmc
- inventec,transformer-bmc
diff --git a/Documentation/devicetree/bindings/clock/keystone-gate.txt b/Documentation/devicetree/bindings/clock/keystone-gate.txt
index c5aa187026e3a5..43f6fb6c939276 100644
--- a/Documentation/devicetree/bindings/clock/keystone-gate.txt
+++ b/Documentation/devicetree/bindings/clock/keystone-gate.txt
@@ -1,5 +1,3 @@
-Status: Unstable - ABI compatibility may be broken in the future
-
Binding for Keystone gate control driver which uses PSC controller IP.
This binding uses the common clock binding[1].
diff --git a/Documentation/devicetree/bindings/clock/keystone-pll.txt b/Documentation/devicetree/bindings/clock/keystone-pll.txt
index 9a3fbc66560652..69b0eb7c03c9e6 100644
--- a/Documentation/devicetree/bindings/clock/keystone-pll.txt
+++ b/Documentation/devicetree/bindings/clock/keystone-pll.txt
@@ -1,5 +1,3 @@
-Status: Unstable - ABI compatibility may be broken in the future
-
Binding for keystone PLLs. The main PLL IP typically has a multiplier,
a divider and a post divider. The additional PLL IPs like ARMPLL, DDRPLL
and PAPLL are controlled by the memory mapped register where as the Main
diff --git a/Documentation/devicetree/bindings/clock/ti/adpll.txt b/Documentation/devicetree/bindings/clock/ti/adpll.txt
index 4c8a2ce2cd7018..3122360adcf3c0 100644
--- a/Documentation/devicetree/bindings/clock/ti/adpll.txt
+++ b/Documentation/devicetree/bindings/clock/ti/adpll.txt
@@ -1,7 +1,5 @@
Binding for Texas Instruments ADPLL clock.
-Binding status: Unstable - ABI compatibility may be broken in the future
-
This binding uses the common clock binding[1]. It assumes a
register-mapped ADPLL with two to three selectable input clocks
and three to four children.
diff --git a/Documentation/devicetree/bindings/clock/ti/apll.txt b/Documentation/devicetree/bindings/clock/ti/apll.txt
index ade4dd4c30f0e1..bbd505c1199df5 100644
--- a/Documentation/devicetree/bindings/clock/ti/apll.txt
+++ b/Documentation/devicetree/bindings/clock/ti/apll.txt
@@ -1,7 +1,5 @@
Binding for Texas Instruments APLL clock.
-Binding status: Unstable - ABI compatibility may be broken in the future
-
This binding uses the common clock binding[1]. It assumes a
register-mapped APLL with usually two selectable input clocks
(reference clock and bypass clock), with analog phase locked
diff --git a/Documentation/devicetree/bindings/clock/ti/autoidle.txt b/Documentation/devicetree/bindings/clock/ti/autoidle.txt
index 7c735dde9fe971..05645a10a9e33c 100644
--- a/Documentation/devicetree/bindings/clock/ti/autoidle.txt
+++ b/Documentation/devicetree/bindings/clock/ti/autoidle.txt
@@ -1,7 +1,5 @@
Binding for Texas Instruments autoidle clock.
-Binding status: Unstable - ABI compatibility may be broken in the future
-
This binding uses the common clock binding[1]. It assumes a register mapped
clock which can be put to idle automatically by hardware based on the usage
and a configuration bit setting. Autoidle clock is never an individual
diff --git a/Documentation/devicetree/bindings/clock/ti/clockdomain.txt b/Documentation/devicetree/bindings/clock/ti/clockdomain.txt
index 9c6199249ce596..edf0b5d4276824 100644
--- a/Documentation/devicetree/bindings/clock/ti/clockdomain.txt
+++ b/Documentation/devicetree/bindings/clock/ti/clockdomain.txt
@@ -1,7 +1,5 @@
Binding for Texas Instruments clockdomain.
-Binding status: Unstable - ABI compatibility may be broken in the future
-
This binding uses the common clock binding[1] in consumer role.
Every clock on TI SoC belongs to one clockdomain, but software
only needs this information for specific clocks which require
diff --git a/Documentation/devicetree/bindings/clock/ti/composite.txt b/Documentation/devicetree/bindings/clock/ti/composite.txt
index 33ac7c9ad053c7..6f7e1331b5466c 100644
--- a/Documentation/devicetree/bindings/clock/ti/composite.txt
+++ b/Documentation/devicetree/bindings/clock/ti/composite.txt
@@ -1,7 +1,5 @@
Binding for TI composite clock.
-Binding status: Unstable - ABI compatibility may be broken in the future
-
This binding uses the common clock binding[1]. It assumes a
register-mapped composite clock with multiple different sub-types;
diff --git a/Documentation/devicetree/bindings/clock/ti/divider.txt b/Documentation/devicetree/bindings/clock/ti/divider.txt
index 9b13b32974f992..4d7c76f0b35695 100644
--- a/Documentation/devicetree/bindings/clock/ti/divider.txt
+++ b/Documentation/devicetree/bindings/clock/ti/divider.txt
@@ -1,7 +1,5 @@
Binding for TI divider clock
-Binding status: Unstable - ABI compatibility may be broken in the future
-
This binding uses the common clock binding[1]. It assumes a
register-mapped adjustable clock rate divider that does not gate and has
only one input clock or parent. By default the value programmed into
diff --git a/Documentation/devicetree/bindings/clock/ti/dpll.txt b/Documentation/devicetree/bindings/clock/ti/dpll.txt
index 37a7cb6ad07d87..14a1b72c2e7120 100644
--- a/Documentation/devicetree/bindings/clock/ti/dpll.txt
+++ b/Documentation/devicetree/bindings/clock/ti/dpll.txt
@@ -1,7 +1,5 @@
Binding for Texas Instruments DPLL clock.
-Binding status: Unstable - ABI compatibility may be broken in the future
-
This binding uses the common clock binding[1]. It assumes a
register-mapped DPLL with usually two selectable input clocks
(reference clock and bypass clock), with digital phase locked
diff --git a/Documentation/devicetree/bindings/clock/ti/fapll.txt b/Documentation/devicetree/bindings/clock/ti/fapll.txt
index c19b3f253b8cf7..88986ef39ddd24 100644
--- a/Documentation/devicetree/bindings/clock/ti/fapll.txt
+++ b/Documentation/devicetree/bindings/clock/ti/fapll.txt
@@ -1,7 +1,5 @@
Binding for Texas Instruments FAPLL clock.
-Binding status: Unstable - ABI compatibility may be broken in the future
-
This binding uses the common clock binding[1]. It assumes a
register-mapped FAPLL with usually two selectable input clocks
(reference clock and bypass clock), and one or more child
diff --git a/Documentation/devicetree/bindings/clock/ti/fixed-factor-clock.txt b/Documentation/devicetree/bindings/clock/ti/fixed-factor-clock.txt
index 518e3c1422762c..dc69477b6e98eb 100644
--- a/Documentation/devicetree/bindings/clock/ti/fixed-factor-clock.txt
+++ b/Documentation/devicetree/bindings/clock/ti/fixed-factor-clock.txt
@@ -1,7 +1,5 @@
Binding for TI fixed factor rate clock sources.
-Binding status: Unstable - ABI compatibility may be broken in the future
-
This binding uses the common clock binding[1], and also uses the autoidle
support from TI autoidle clock [2].
diff --git a/Documentation/devicetree/bindings/clock/ti/gate.txt b/Documentation/devicetree/bindings/clock/ti/gate.txt
index 4982615c01b9cb..a8e0335b006a07 100644
--- a/Documentation/devicetree/bindings/clock/ti/gate.txt
+++ b/Documentation/devicetree/bindings/clock/ti/gate.txt
@@ -1,7 +1,5 @@
Binding for Texas Instruments gate clock.
-Binding status: Unstable - ABI compatibility may be broken in the future
-
This binding uses the common clock binding[1]. This clock is
quite much similar to the basic gate-clock [2], however,
it supports a number of additional features. If no register
diff --git a/Documentation/devicetree/bindings/clock/ti/interface.txt b/Documentation/devicetree/bindings/clock/ti/interface.txt
index d3eb5ca92a7fe6..85fb1f2d2d286b 100644
--- a/Documentation/devicetree/bindings/clock/ti/interface.txt
+++ b/Documentation/devicetree/bindings/clock/ti/interface.txt
@@ -1,7 +1,5 @@
Binding for Texas Instruments interface clock.
-Binding status: Unstable - ABI compatibility may be broken in the future
-
This binding uses the common clock binding[1]. This clock is
quite much similar to the basic gate-clock [2], however,
it supports a number of additional features, including
diff --git a/Documentation/devicetree/bindings/clock/ti/mux.txt b/Documentation/devicetree/bindings/clock/ti/mux.txt
index b33f641f104321..cd56d3c1c09f3b 100644
--- a/Documentation/devicetree/bindings/clock/ti/mux.txt
+++ b/Documentation/devicetree/bindings/clock/ti/mux.txt
@@ -1,7 +1,5 @@
Binding for TI mux clock.
-Binding status: Unstable - ABI compatibility may be broken in the future
-
This binding uses the common clock binding[1]. It assumes a
register-mapped multiplexer with multiple input clock signals or
parents, one of which can be selected as output. This clock does not
diff --git a/Documentation/devicetree/bindings/display/msm/qcom,sm8150-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,sm8150-mdss.yaml
index c0d6a4fdff97e3..e6dc5494baee29 100644
--- a/Documentation/devicetree/bindings/display/msm/qcom,sm8150-mdss.yaml
+++ b/Documentation/devicetree/bindings/display/msm/qcom,sm8150-mdss.yaml
@@ -53,6 +53,15 @@ patternProperties:
compatible:
const: qcom,sm8150-dpu
+ "^displayport-controller@[0-9a-f]+$":
+ type: object
+ additionalProperties: true
+
+ properties:
+ compatible:
+ contains:
+ const: qcom,sm8150-dp
+
"^dsi@[0-9a-f]+$":
type: object
additionalProperties: true
diff --git a/Documentation/devicetree/bindings/dts-coding-style.rst b/Documentation/devicetree/bindings/dts-coding-style.rst
index a9bdd2b59dcab6..8a68331075a098 100644
--- a/Documentation/devicetree/bindings/dts-coding-style.rst
+++ b/Documentation/devicetree/bindings/dts-coding-style.rst
@@ -144,6 +144,8 @@ Example::
#dma-cells = <1>;
clocks = <&clock_controller 0>, <&clock_controller 1>;
clock-names = "bus", "host";
+ #address-cells = <1>;
+ #size-cells = <1>;
vendor,custom-property = <2>;
status = "disabled";
diff --git a/Documentation/devicetree/bindings/eeprom/at24.yaml b/Documentation/devicetree/bindings/eeprom/at24.yaml
index 1812ef31d5f1e9..3c36cd0510de83 100644
--- a/Documentation/devicetree/bindings/eeprom/at24.yaml
+++ b/Documentation/devicetree/bindings/eeprom/at24.yaml
@@ -69,14 +69,10 @@ properties:
- items:
pattern: c32$
- items:
- pattern: c32d-wl$
- - items:
pattern: cs32$
- items:
pattern: c64$
- items:
- pattern: c64d-wl$
- - items:
pattern: cs64$
- items:
pattern: c128$
@@ -136,6 +132,7 @@ properties:
- renesas,r1ex24128
- samsung,s524ad0xd1
- const: atmel,24c128
+ - pattern: '^atmel,24c(32|64)d-wl$' # Actual vendor is st
label:
description: Descriptive name of the EEPROM.
diff --git a/Documentation/devicetree/bindings/iio/health/maxim,max30102.yaml b/Documentation/devicetree/bindings/iio/health/maxim,max30102.yaml
index c13c10c8d65da2..eed0df9d3a2322 100644
--- a/Documentation/devicetree/bindings/iio/health/maxim,max30102.yaml
+++ b/Documentation/devicetree/bindings/iio/health/maxim,max30102.yaml
@@ -42,7 +42,7 @@ allOf:
properties:
compatible:
contains:
- const: maxim,max30100
+ const: maxim,max30102
then:
properties:
maxim,green-led-current-microamp: false
diff --git a/Documentation/devicetree/bindings/net/bluetooth/qualcomm-bluetooth.yaml b/Documentation/devicetree/bindings/net/bluetooth/qualcomm-bluetooth.yaml
index 528ef3572b621e..055a3351880bc1 100644
--- a/Documentation/devicetree/bindings/net/bluetooth/qualcomm-bluetooth.yaml
+++ b/Documentation/devicetree/bindings/net/bluetooth/qualcomm-bluetooth.yaml
@@ -94,6 +94,10 @@ properties:
local-bd-address: true
+ qcom,local-bd-address-broken:
+ type: boolean
+ description:
+ boot firmware is incorrectly passing the address in big-endian order
required:
- compatible
diff --git a/Documentation/devicetree/bindings/pwm/mediatek,pwm-disp.yaml b/Documentation/devicetree/bindings/pwm/mediatek,pwm-disp.yaml
index afcdeed4e88af6..bc813fe74faba5 100644
--- a/Documentation/devicetree/bindings/pwm/mediatek,pwm-disp.yaml
+++ b/Documentation/devicetree/bindings/pwm/mediatek,pwm-disp.yaml
@@ -52,6 +52,9 @@ properties:
- const: main
- const: mm
+ power-domains:
+ maxItems: 1
+
required:
- compatible
- reg
diff --git a/Documentation/devicetree/bindings/remoteproc/ti,davinci-rproc.txt b/Documentation/devicetree/bindings/remoteproc/ti,davinci-rproc.txt
index 25f8658e216ff0..48a49c516b62cb 100644
--- a/Documentation/devicetree/bindings/remoteproc/ti,davinci-rproc.txt
+++ b/Documentation/devicetree/bindings/remoteproc/ti,davinci-rproc.txt
@@ -1,9 +1,6 @@
TI Davinci DSP devices
=======================
-Binding status: Unstable - Subject to changes for DT representation of clocks
- and resets
-
The TI Davinci family of SoCs usually contains a TI DSP Core sub-system that
is used to offload some of the processor-intensive tasks or algorithms, for
achieving various system level goals.
diff --git a/Documentation/devicetree/bindings/serial/amlogic,meson-uart.yaml b/Documentation/devicetree/bindings/serial/amlogic,meson-uart.yaml
index 2e189e548327da..0565fb7649c5b3 100644
--- a/Documentation/devicetree/bindings/serial/amlogic,meson-uart.yaml
+++ b/Documentation/devicetree/bindings/serial/amlogic,meson-uart.yaml
@@ -54,7 +54,9 @@ properties:
- const: amlogic,meson-gx-uart
- description: UART controller on S4 compatible SoCs
items:
- - const: amlogic,t7-uart
+ - enum:
+ - amlogic,a4-uart
+ - amlogic,t7-uart
- const: amlogic,meson-s4-uart
reg:
diff --git a/Documentation/devicetree/bindings/serial/atmel,at91-usart.yaml b/Documentation/devicetree/bindings/serial/atmel,at91-usart.yaml
index 65cb2e5c5eee08..eb2992a447d79c 100644
--- a/Documentation/devicetree/bindings/serial/atmel,at91-usart.yaml
+++ b/Documentation/devicetree/bindings/serial/atmel,at91-usart.yaml
@@ -8,7 +8,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
title: Atmel Universal Synchronous Asynchronous Receiver/Transmitter (USART)
maintainers:
- - Richard Genoud <richard.genoud@gmail.com>
+ - Richard Genoud <richard.genoud@bootlin.com>
properties:
compatible:
diff --git a/Documentation/devicetree/bindings/soc/fsl/fsl,layerscape-dcfg.yaml b/Documentation/devicetree/bindings/soc/fsl/fsl,layerscape-dcfg.yaml
index 397f75909b2058..ce1a6505eb5149 100644
--- a/Documentation/devicetree/bindings/soc/fsl/fsl,layerscape-dcfg.yaml
+++ b/Documentation/devicetree/bindings/soc/fsl/fsl,layerscape-dcfg.yaml
@@ -51,7 +51,7 @@ properties:
ranges: true
patternProperties:
- "^clock-controller@[0-9a-z]+$":
+ "^clock-controller@[0-9a-f]+$":
$ref: /schemas/clock/fsl,flexspi-clock.yaml#
required:
diff --git a/Documentation/devicetree/bindings/soc/fsl/fsl,layerscape-scfg.yaml b/Documentation/devicetree/bindings/soc/fsl/fsl,layerscape-scfg.yaml
index 8d088b5fe8236b..a6a511b00a1281 100644
--- a/Documentation/devicetree/bindings/soc/fsl/fsl,layerscape-scfg.yaml
+++ b/Documentation/devicetree/bindings/soc/fsl/fsl,layerscape-scfg.yaml
@@ -41,7 +41,7 @@ properties:
ranges: true
patternProperties:
- "^interrupt-controller@[a-z0-9]+$":
+ "^interrupt-controller@[a-f0-9]+$":
$ref: /schemas/interrupt-controller/fsl,ls-extirq.yaml#
required:
diff --git a/Documentation/devicetree/bindings/soc/rockchip/grf.yaml b/Documentation/devicetree/bindings/soc/rockchip/grf.yaml
index 0b87c266760c6e..79798c7474768a 100644
--- a/Documentation/devicetree/bindings/soc/rockchip/grf.yaml
+++ b/Documentation/devicetree/bindings/soc/rockchip/grf.yaml
@@ -171,6 +171,7 @@ allOf:
unevaluatedProperties: false
pcie-phy:
+ type: object
description:
Documentation/devicetree/bindings/phy/rockchip-pcie-phy.txt
diff --git a/Documentation/devicetree/bindings/sound/rt5645.txt b/Documentation/devicetree/bindings/sound/rt5645.txt
index 41a62fd2ae1ffb..c1fa379f5f3ea1 100644
--- a/Documentation/devicetree/bindings/sound/rt5645.txt
+++ b/Documentation/devicetree/bindings/sound/rt5645.txt
@@ -20,6 +20,11 @@ Optional properties:
a GPIO spec for the external headphone detect pin. If jd-mode = 0,
we will get the JD status by getting the value of hp-detect-gpios.
+- cbj-sleeve-gpios:
+ a GPIO spec to control the external combo jack circuit to tie the sleeve/ring2
+ contacts to the ground or floating. It could avoid some electric noise from the
+ active speaker jacks.
+
- realtek,in2-differential
Boolean. Indicate MIC2 input are differential, rather than single-ended.
@@ -68,6 +73,7 @@ codec: rt5650@1a {
compatible = "realtek,rt5650";
reg = <0x1a>;
hp-detect-gpios = <&gpio 19 0>;
+ cbj-sleeve-gpios = <&gpio 20 0>;
interrupt-parent = <&gpio>;
interrupts = <7 IRQ_TYPE_EDGE_FALLING>;
realtek,dmic-en = "true";
diff --git a/Documentation/devicetree/bindings/timer/arm,arch_timer_mmio.yaml b/Documentation/devicetree/bindings/timer/arm,arch_timer_mmio.yaml
index 7a4a6ab85970d6..ab8f28993139e5 100644
--- a/Documentation/devicetree/bindings/timer/arm,arch_timer_mmio.yaml
+++ b/Documentation/devicetree/bindings/timer/arm,arch_timer_mmio.yaml
@@ -60,7 +60,7 @@ properties:
be implemented in an always-on power domain."
patternProperties:
- '^frame@[0-9a-z]*$':
+ '^frame@[0-9a-f]+$':
type: object
additionalProperties: false
description: A timer node has up to 8 frame sub-nodes, each with the following properties.
diff --git a/Documentation/devicetree/bindings/ufs/qcom,ufs.yaml b/Documentation/devicetree/bindings/ufs/qcom,ufs.yaml
index 10c146424baa1e..cd3680dc002f96 100644
--- a/Documentation/devicetree/bindings/ufs/qcom,ufs.yaml
+++ b/Documentation/devicetree/bindings/ufs/qcom,ufs.yaml
@@ -27,10 +27,13 @@ properties:
- qcom,msm8996-ufshc
- qcom,msm8998-ufshc
- qcom,sa8775p-ufshc
+ - qcom,sc7180-ufshc
- qcom,sc7280-ufshc
+ - qcom,sc8180x-ufshc
- qcom,sc8280xp-ufshc
- qcom,sdm845-ufshc
- qcom,sm6115-ufshc
+ - qcom,sm6125-ufshc
- qcom,sm6350-ufshc
- qcom,sm8150-ufshc
- qcom,sm8250-ufshc
@@ -42,11 +45,11 @@ properties:
- const: jedec,ufs-2.0
clocks:
- minItems: 8
+ minItems: 7
maxItems: 11
clock-names:
- minItems: 8
+ minItems: 7
maxItems: 11
dma-coherent: true
@@ -117,9 +120,35 @@ allOf:
compatible:
contains:
enum:
+ - qcom,sc7180-ufshc
+ then:
+ properties:
+ clocks:
+ minItems: 7
+ maxItems: 7
+ clock-names:
+ items:
+ - const: core_clk
+ - const: bus_aggr_clk
+ - const: iface_clk
+ - const: core_clk_unipro
+ - const: ref_clk
+ - const: tx_lane0_sync_clk
+ - const: rx_lane0_sync_clk
+ reg:
+ maxItems: 1
+ reg-names:
+ maxItems: 1
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
- qcom,msm8998-ufshc
- qcom,sa8775p-ufshc
- qcom,sc7280-ufshc
+ - qcom,sc8180x-ufshc
- qcom,sc8280xp-ufshc
- qcom,sm8250-ufshc
- qcom,sm8350-ufshc
@@ -215,6 +244,7 @@ allOf:
contains:
enum:
- qcom,sm6115-ufshc
+ - qcom,sm6125-ufshc
then:
properties:
clocks:
@@ -248,7 +278,7 @@ allOf:
reg:
maxItems: 1
clocks:
- minItems: 8
+ minItems: 7
maxItems: 8
else:
properties:
@@ -256,7 +286,7 @@ allOf:
minItems: 1
maxItems: 2
clocks:
- minItems: 8
+ minItems: 7
maxItems: 11
unevaluatedProperties: false
diff --git a/Documentation/driver-api/crypto/iaa/iaa-crypto.rst b/Documentation/driver-api/crypto/iaa/iaa-crypto.rst
index de587cf9cbed45..4cb1d52ea6dc4d 100644
--- a/Documentation/driver-api/crypto/iaa/iaa-crypto.rst
+++ b/Documentation/driver-api/crypto/iaa/iaa-crypto.rst
@@ -457,7 +457,6 @@ Use the following commands to enable zswap::
# echo deflate-iaa > /sys/module/zswap/parameters/compressor
# echo zsmalloc > /sys/module/zswap/parameters/zpool
# echo 1 > /sys/module/zswap/parameters/enabled
- # echo 0 > /sys/module/zswap/parameters/same_filled_pages_enabled
# echo 100 > /proc/sys/vm/swappiness
# echo never > /sys/kernel/mm/transparent_hugepage/enabled
# echo 1 > /proc/sys/vm/overcommit_memory
@@ -599,7 +598,6 @@ the 'fixed' compression mode::
echo deflate-iaa > /sys/module/zswap/parameters/compressor
echo zsmalloc > /sys/module/zswap/parameters/zpool
echo 1 > /sys/module/zswap/parameters/enabled
- echo 0 > /sys/module/zswap/parameters/same_filled_pages_enabled
echo 100 > /proc/sys/vm/swappiness
echo never > /sys/kernel/mm/transparent_hugepage/enabled
diff --git a/Documentation/driver-api/virtio/writing_virtio_drivers.rst b/Documentation/driver-api/virtio/writing_virtio_drivers.rst
index e14c58796d2501..e5de6f5d061a7c 100644
--- a/Documentation/driver-api/virtio/writing_virtio_drivers.rst
+++ b/Documentation/driver-api/virtio/writing_virtio_drivers.rst
@@ -97,7 +97,6 @@ like this::
static struct virtio_driver virtio_dummy_driver = {
.driver.name = KBUILD_MODNAME,
- .driver.owner = THIS_MODULE,
.id_table = id_table,
.probe = virtio_dummy_probe,
.remove = virtio_dummy_remove,
diff --git a/Documentation/filesystems/api-summary.rst b/Documentation/filesystems/api-summary.rst
index 98db2ea5fa12c6..cc5cc7f3fbd8e9 100644
--- a/Documentation/filesystems/api-summary.rst
+++ b/Documentation/filesystems/api-summary.rst
@@ -56,9 +56,6 @@ Other Functions
.. kernel-doc:: fs/namei.c
:export:
-.. kernel-doc:: fs/buffer.c
- :export:
-
.. kernel-doc:: block/bio.c
:export:
diff --git a/Documentation/filesystems/bcachefs/index.rst b/Documentation/filesystems/bcachefs/index.rst
new file mode 100644
index 00000000000000..e2bd61ccd96fff
--- /dev/null
+++ b/Documentation/filesystems/bcachefs/index.rst
@@ -0,0 +1,11 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+======================
+bcachefs Documentation
+======================
+
+.. toctree::
+ :maxdepth: 2
+ :numbered:
+
+ errorcodes
diff --git a/Documentation/filesystems/buffer.rst b/Documentation/filesystems/buffer.rst
new file mode 100644
index 00000000000000..ae24faf68efaba
--- /dev/null
+++ b/Documentation/filesystems/buffer.rst
@@ -0,0 +1,12 @@
+Buffer Heads
+============
+
+Linux uses buffer heads to maintain state about individual filesystem blocks.
+Buffer heads are deprecated and new filesystems should use iomap instead.
+
+Functions
+---------
+
+.. kernel-doc:: include/linux/buffer_head.h
+.. kernel-doc:: fs/buffer.c
+ :export:
diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst
index 0ea1e44fa02823..8f5c1ee02e2f94 100644
--- a/Documentation/filesystems/index.rst
+++ b/Documentation/filesystems/index.rst
@@ -50,6 +50,7 @@ filesystem implementations.
.. toctree::
:maxdepth: 2
+ buffer
journalling
fscrypt
fsverity
@@ -69,6 +70,7 @@ Documentation for filesystem implementations.
afs
autofs
autofs-mount-control
+ bcachefs/index
befs
bfs
btrfs
diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst
index c6a6b9df210497..245269dd6e0289 100644
--- a/Documentation/filesystems/proc.rst
+++ b/Documentation/filesystems/proc.rst
@@ -688,6 +688,7 @@ files are there, and which are missing.
============ ===============================================================
File Content
============ ===============================================================
+ allocinfo Memory allocations profiling information
apm Advanced power management info
bootconfig Kernel command line obtained from boot config,
and, if there were kernel parameters from the
@@ -953,6 +954,34 @@ also be allocatable although a lot of filesystem metadata may have to be
reclaimed to achieve this.
+allocinfo
+~~~~~~~~~
+
+Provides information about memory allocations at all locations in the code
+base. Each allocation in the code is identified by its source file, line
+number, module (if originates from a loadable module) and the function calling
+the allocation. The number of bytes allocated and number of calls at each
+location are reported.
+
+Example output.
+
+::
+
+ > sort -rn /proc/allocinfo
+ 127664128 31168 mm/page_ext.c:270 func:alloc_page_ext
+ 56373248 4737 mm/slub.c:2259 func:alloc_slab_page
+ 14880768 3633 mm/readahead.c:247 func:page_cache_ra_unbounded
+ 14417920 3520 mm/mm_init.c:2530 func:alloc_large_system_hash
+ 13377536 234 block/blk-mq.c:3421 func:blk_mq_alloc_rqs
+ 11718656 2861 mm/filemap.c:1919 func:__filemap_get_folio
+ 9192960 2800 kernel/fork.c:307 func:alloc_thread_stack_node
+ 4206592 4 net/netfilter/nf_conntrack_core.c:2567 func:nf_ct_alloc_hashtable
+ 4136960 1010 drivers/staging/ctagmod/ctagmod.c:20 [ctagmod] func:ctagmod_start
+ 3940352 962 mm/memory.c:4214 func:alloc_anon_folio
+ 2894464 22613 fs/kernfs/dir.c:615 func:__kernfs_new_node
+ ...
+
+
meminfo
~~~~~~~
diff --git a/Documentation/kbuild/kconfig-language.rst b/Documentation/kbuild/kconfig-language.rst
index 79ac2e8184f6f0..555c2f8399697c 100644
--- a/Documentation/kbuild/kconfig-language.rst
+++ b/Documentation/kbuild/kconfig-language.rst
@@ -410,9 +410,6 @@ to be set to 'm'. This can be used if multiple drivers for a single
hardware exists and only a single driver can be compiled/loaded into
the kernel, but all drivers can be compiled as modules.
-A choice accepts another option "optional", which allows to set the
-choice to 'n' and no entry needs to be selected.
-
comment::
"comment" <prompt>
diff --git a/Documentation/kbuild/llvm.rst b/Documentation/kbuild/llvm.rst
index b1d97fafddcfc9..bb5c44f8bd1c49 100644
--- a/Documentation/kbuild/llvm.rst
+++ b/Documentation/kbuild/llvm.rst
@@ -178,7 +178,7 @@ yet. Bug reports are always welcome at the issue tracker below!
- ``LLVM=1``
* - s390
- Maintained
- - ``CC=clang``
+ - ``LLVM=1`` (LLVM >= 18.1.0), ``CC=clang`` (LLVM < 18.1.0)
* - um (User Mode)
- Maintained
- ``LLVM=1``
diff --git a/Documentation/mm/allocation-profiling.rst b/Documentation/mm/allocation-profiling.rst
new file mode 100644
index 00000000000000..d3b733b41ae658
--- /dev/null
+++ b/Documentation/mm/allocation-profiling.rst
@@ -0,0 +1,100 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===========================
+MEMORY ALLOCATION PROFILING
+===========================
+
+Low overhead (suitable for production) accounting of all memory allocations,
+tracked by file and line number.
+
+Usage:
+kconfig options:
+- CONFIG_MEM_ALLOC_PROFILING
+
+- CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT
+
+- CONFIG_MEM_ALLOC_PROFILING_DEBUG
+ adds warnings for allocations that weren't accounted because of a
+ missing annotation
+
+Boot parameter:
+ sysctl.vm.mem_profiling=0|1|never
+
+ When set to "never", memory allocation profiling overhead is minimized and it
+ cannot be enabled at runtime (sysctl becomes read-only).
+ When CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT=y, default value is "1".
+ When CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT=n, default value is "never".
+
+sysctl:
+ /proc/sys/vm/mem_profiling
+
+Runtime info:
+ /proc/allocinfo
+
+Example output::
+
+ root@moria-kvm:~# sort -g /proc/allocinfo|tail|numfmt --to=iec
+ 2.8M 22648 fs/kernfs/dir.c:615 func:__kernfs_new_node
+ 3.8M 953 mm/memory.c:4214 func:alloc_anon_folio
+ 4.0M 1010 drivers/staging/ctagmod/ctagmod.c:20 [ctagmod] func:ctagmod_start
+ 4.1M 4 net/netfilter/nf_conntrack_core.c:2567 func:nf_ct_alloc_hashtable
+ 6.0M 1532 mm/filemap.c:1919 func:__filemap_get_folio
+ 8.8M 2785 kernel/fork.c:307 func:alloc_thread_stack_node
+ 13M 234 block/blk-mq.c:3421 func:blk_mq_alloc_rqs
+ 14M 3520 mm/mm_init.c:2530 func:alloc_large_system_hash
+ 15M 3656 mm/readahead.c:247 func:page_cache_ra_unbounded
+ 55M 4887 mm/slub.c:2259 func:alloc_slab_page
+ 122M 31168 mm/page_ext.c:270 func:alloc_page_ext
+
+===================
+Theory of operation
+===================
+
+Memory allocation profiling builds off of code tagging, which is a library for
+declaring static structs (that typically describe a file and line number in
+some way, hence code tagging) and then finding and operating on them at runtime,
+- i.e. iterating over them to print them in debugfs/procfs.
+
+To add accounting for an allocation call, we replace it with a macro
+invocation, alloc_hooks(), that
+- declares a code tag
+- stashes a pointer to it in task_struct
+- calls the real allocation function
+- and finally, restores the task_struct alloc tag pointer to its previous value.
+
+This allows for alloc_hooks() calls to be nested, with the most recent one
+taking effect. This is important for allocations internal to the mm/ code that
+do not properly belong to the outer allocation context and should be counted
+separately: for example, slab object extension vectors, or when the slab
+allocates pages from the page allocator.
+
+Thus, proper usage requires determining which function in an allocation call
+stack should be tagged. There are many helper functions that essentially wrap
+e.g. kmalloc() and do a little more work, then are called in multiple places;
+we'll generally want the accounting to happen in the callers of these helpers,
+not in the helpers themselves.
+
+To fix up a given helper, for example foo(), do the following:
+- switch its allocation call to the _noprof() version, e.g. kmalloc_noprof()
+
+- rename it to foo_noprof()
+
+- define a macro version of foo() like so:
+
+ #define foo(...) alloc_hooks(foo_noprof(__VA_ARGS__))
+
+It's also possible to stash a pointer to an alloc tag in your own data structures.
+
+Do this when you're implementing a generic data structure that does allocations
+"on behalf of" some other code - for example, the rhashtable code. This way,
+instead of seeing a large line in /proc/allocinfo for rhashtable.c, we can
+break it out by rhashtable type.
+
+To do so:
+- Hook your data structure's init function, like any other allocation function.
+
+- Within your init function, use the convenience macro alloc_tag_record() to
+ record alloc tag in your data structure.
+
+- Then, use the following form for your allocations:
+ alloc_hooks_tag(ht->your_saved_tag, kmalloc_noprof(...))
diff --git a/Documentation/mm/damon/design.rst b/Documentation/mm/damon/design.rst
index 5620aab9b38507..f2baf617184d0a 100644
--- a/Documentation/mm/damon/design.rst
+++ b/Documentation/mm/damon/design.rst
@@ -461,15 +461,17 @@ number of filters for each scheme. Each filter specifies the type of target
memory, and whether it should exclude the memory of the type (filter-out), or
all except the memory of the type (filter-in).
-Currently, anonymous page, memory cgroup, address range, and DAMON monitoring
-target type filters are supported by the feature. Some filter target types
-require additional arguments. The memory cgroup filter type asks users to
-specify the file path of the memory cgroup for the filter. The address range
-type asks the start and end addresses of the range. The DAMON monitoring
-target type asks the index of the target from the context's monitoring targets
-list. Hence, users can apply specific schemes to only anonymous pages,
-non-anonymous pages, pages of specific cgroups, all pages excluding those of
-specific cgroups, pages in specific address range, pages in specific DAMON
+Currently, anonymous page, memory cgroup, young page, address range, and DAMON
+monitoring target type filters are supported by the feature. Some filter
+target types require additional arguments. The memory cgroup filter type asks
+users to specify the file path of the memory cgroup for the filter. The
+address range type asks the start and end addresses of the range. The DAMON
+monitoring target type asks the index of the target from the context's
+monitoring targets list. Hence, users can apply specific schemes to only
+anonymous pages, non-anonymous pages, pages of specific cgroups, all pages
+excluding those of specific cgroups, pages that not accessed after the last
+access check from the scheme, pages that accessed after the last access check
+from the scheme, pages in specific address range, pages in specific DAMON
monitoring targets, and any combination of those.
To handle filters efficiently, the address range and DAMON monitoring target
diff --git a/Documentation/mm/index.rst b/Documentation/mm/index.rst
index 31d2ac3064387b..48b9b559ca7b45 100644
--- a/Documentation/mm/index.rst
+++ b/Documentation/mm/index.rst
@@ -26,6 +26,7 @@ see the :doc:`admin guide <../admin-guide/mm/index>`.
page_cache
shmfs
oom
+ allocation-profiling
Legacy Documentation
====================
diff --git a/Documentation/mm/page_owner.rst b/Documentation/mm/page_owner.rst
index 0d0334cd51798b..3a45a20fc05a1f 100644
--- a/Documentation/mm/page_owner.rst
+++ b/Documentation/mm/page_owner.rst
@@ -24,10 +24,10 @@ fragmentation statistics can be obtained through gfp flag information of
each page. It is already implemented and activated if page owner is
enabled. Other usages are more than welcome.
-It can also be used to show all the stacks and their outstanding
-allocations, which gives us a quick overview of where the memory is going
-without the need to screen through all the pages and match the allocation
-and free operation.
+It can also be used to show all the stacks and their current number of
+allocated base pages, which gives us a quick overview of where the memory
+is going without the need to screen through all the pages and match the
+allocation and free operation.
page owner is disabled by default. So, if you'd like to use it, you need
to add "page_owner=on" to your boot cmdline. If the kernel is built
@@ -75,42 +75,45 @@ Usage
cat /sys/kernel/debug/page_owner_stacks/show_stacks > stacks.txt
cat stacks.txt
- prep_new_page+0xa9/0x120
- get_page_from_freelist+0x7e6/0x2140
- __alloc_pages+0x18a/0x370
- new_slab+0xc8/0x580
- ___slab_alloc+0x1f2/0xaf0
- __slab_alloc.isra.86+0x22/0x40
- kmem_cache_alloc+0x31b/0x350
- __khugepaged_enter+0x39/0x100
- dup_mmap+0x1c7/0x5ce
- copy_process+0x1afe/0x1c90
- kernel_clone+0x9a/0x3c0
- __do_sys_clone+0x66/0x90
- do_syscall_64+0x7f/0x160
- entry_SYSCALL_64_after_hwframe+0x6c/0x74
- stack_count: 234
+ post_alloc_hook+0x177/0x1a0
+ get_page_from_freelist+0xd01/0xd80
+ __alloc_pages+0x39e/0x7e0
+ allocate_slab+0xbc/0x3f0
+ ___slab_alloc+0x528/0x8a0
+ kmem_cache_alloc+0x224/0x3b0
+ sk_prot_alloc+0x58/0x1a0
+ sk_alloc+0x32/0x4f0
+ inet_create+0x427/0xb50
+ __sock_create+0x2e4/0x650
+ inet_ctl_sock_create+0x30/0x180
+ igmp_net_init+0xc1/0x130
+ ops_init+0x167/0x410
+ setup_net+0x304/0xa60
+ copy_net_ns+0x29b/0x4a0
+ create_new_namespaces+0x4a1/0x820
+ nr_base_pages: 16
...
...
echo 7000 > /sys/kernel/debug/page_owner_stacks/count_threshold
cat /sys/kernel/debug/page_owner_stacks/show_stacks> stacks_7000.txt
cat stacks_7000.txt
- prep_new_page+0xa9/0x120
- get_page_from_freelist+0x7e6/0x2140
- __alloc_pages+0x18a/0x370
- alloc_pages_mpol+0xdf/0x1e0
- folio_alloc+0x14/0x50
- filemap_alloc_folio+0xb0/0x100
- page_cache_ra_unbounded+0x97/0x180
- filemap_fault+0x4b4/0x1200
- __do_fault+0x2d/0x110
- do_pte_missing+0x4b0/0xa30
- __handle_mm_fault+0x7fa/0xb70
- handle_mm_fault+0x125/0x300
- do_user_addr_fault+0x3c9/0x840
- exc_page_fault+0x68/0x150
- asm_exc_page_fault+0x22/0x30
- stack_count: 8248
+ post_alloc_hook+0x177/0x1a0
+ get_page_from_freelist+0xd01/0xd80
+ __alloc_pages+0x39e/0x7e0
+ alloc_pages_mpol+0x22e/0x490
+ folio_alloc+0xd5/0x110
+ filemap_alloc_folio+0x78/0x230
+ page_cache_ra_order+0x287/0x6f0
+ filemap_get_pages+0x517/0x1160
+ filemap_read+0x304/0x9f0
+ xfs_file_buffered_read+0xe6/0x1d0 [xfs]
+ xfs_file_read_iter+0x1f0/0x380 [xfs]
+ __kernel_read+0x3b9/0x730
+ kernel_read_file+0x309/0x4d0
+ __do_sys_finit_module+0x381/0x730
+ do_syscall_64+0x8d/0x150
+ entry_SYSCALL_64_after_hwframe+0x62/0x6a
+ nr_base_pages: 20824
...
cat /sys/kernel/debug/page_owner > page_owner_full.txt
diff --git a/Documentation/mm/page_table_check.rst b/Documentation/mm/page_table_check.rst
index c12838ce6b8de2..c59f22eb6a0f9a 100644
--- a/Documentation/mm/page_table_check.rst
+++ b/Documentation/mm/page_table_check.rst
@@ -14,7 +14,7 @@ Page table check performs extra verifications at the time when new pages become
accessible from the userspace by getting their page table entries (PTEs PMDs
etc.) added into the table.
-In case of detected corruption, the kernel is crashed. There is a small
+In case of most detected corruption, the kernel is crashed. There is a small
performance and memory overhead associated with the page table check. Therefore,
it is disabled by default, but can be optionally enabled on systems where the
extra hardening outweighs the performance costs. Also, because page table check
@@ -22,6 +22,13 @@ is synchronous, it can help with debugging double map memory corruption issues,
by crashing kernel at the time wrong mapping occurs instead of later which is
often the case with memory corruptions bugs.
+It can also be used to do page table entry checks over various flags, dump
+warnings when illegal combinations of entry flags are detected. Currently,
+userfaultfd is the only user of such to sanity check wr-protect bit against
+any writable flags. Illegal flag combinations will not directly cause data
+corruption in this case immediately, but that will cause read-only data to
+be writable, leading to corrupt when the page content is later modified.
+
Double mapping detection logic
==============================
diff --git a/Documentation/mm/transhuge.rst b/Documentation/mm/transhuge.rst
index 93c9239b9ebe23..1ba0ad63246c53 100644
--- a/Documentation/mm/transhuge.rst
+++ b/Documentation/mm/transhuge.rst
@@ -116,14 +116,14 @@ pages:
succeeds on tail pages.
- map/unmap of a PMD entry for the whole THP increment/decrement
- folio->_entire_mapcount and also increment/decrement
- folio->_nr_pages_mapped by ENTIRELY_MAPPED when _entire_mapcount
- goes from -1 to 0 or 0 to -1.
+ folio->_entire_mapcount, increment/decrement folio->_large_mapcount
+ and also increment/decrement folio->_nr_pages_mapped by ENTIRELY_MAPPED
+ when _entire_mapcount goes from -1 to 0 or 0 to -1.
- map/unmap of individual pages with PTE entry increment/decrement
- page->_mapcount and also increment/decrement folio->_nr_pages_mapped
- when page->_mapcount goes from -1 to 0 or 0 to -1 as this counts
- the number of pages mapped by PTE.
+ page->_mapcount, increment/decrement folio->_large_mapcount and also
+ increment/decrement folio->_nr_pages_mapped when page->_mapcount goes
+ from -1 to 0 or 0 to -1 as this counts the number of pages mapped by PTE.
split_huge_page internally has to distribute the refcounts in the head
page to the tail pages before clearing all PG_head/tail bits from the page
diff --git a/Documentation/mm/vmemmap_dedup.rst b/Documentation/mm/vmemmap_dedup.rst
index 593ede6d314ba6..b4a55b6569fa54 100644
--- a/Documentation/mm/vmemmap_dedup.rst
+++ b/Documentation/mm/vmemmap_dedup.rst
@@ -180,27 +180,7 @@ this correctly. There is only **one** head ``struct page``, the tail
``struct page`` with ``PG_head`` are fake head ``struct page``. We need an
approach to distinguish between those two different types of ``struct page`` so
that ``compound_head()`` can return the real head ``struct page`` when the
-parameter is the tail ``struct page`` but with ``PG_head``. The following code
-snippet describes how to distinguish between real and fake head ``struct page``.
-
-.. code-block:: c
-
- if (test_bit(PG_head, &page->flags)) {
- unsigned long head = READ_ONCE(page[1].compound_head);
-
- if (head & 1) {
- if (head == (unsigned long)page + 1)
- /* head struct page */
- else
- /* tail struct page */
- } else {
- /* head struct page */
- }
- }
-
-We can safely access the field of the **page[1]** with ``PG_head`` because the
-page is a compound page composed with at least two contiguous pages.
-The implementation refers to ``page_fixed_fake_head()``.
+parameter is the tail ``struct page`` but with ``PG_head``.
Device DAX
==========
diff --git a/Documentation/networking/devlink/devlink-eswitch-attr.rst b/Documentation/networking/devlink/devlink-eswitch-attr.rst
new file mode 100644
index 00000000000000..08bb39ab152864
--- /dev/null
+++ b/Documentation/networking/devlink/devlink-eswitch-attr.rst
@@ -0,0 +1,76 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==========================
+Devlink E-Switch Attribute
+==========================
+
+Devlink E-Switch supports two modes of operation: legacy and switchdev.
+Legacy mode operates based on traditional MAC/VLAN steering rules. Switching
+decisions are made based on MAC addresses, VLANs, etc. There is limited ability
+to offload switching rules to hardware.
+
+On the other hand, switchdev mode allows for more advanced offloading
+capabilities of the E-Switch to hardware. In switchdev mode, more switching
+rules and logic can be offloaded to the hardware switch ASIC. It enables
+representor netdevices that represent the slow path of virtual functions (VFs)
+or scalable-functions (SFs) of the device. See more information about
+:ref:`Documentation/networking/switchdev.rst <switchdev>` and
+:ref:`Documentation/networking/representors.rst <representors>`.
+
+In addition, the devlink E-Switch also comes with other attributes listed
+in the following section.
+
+Attributes Description
+======================
+
+The following is a list of E-Switch attributes.
+
+.. list-table:: E-Switch attributes
+ :widths: 8 5 45
+
+ * - Name
+ - Type
+ - Description
+ * - ``mode``
+ - enum
+ - The mode of the device. The mode can be one of the following:
+
+ * ``legacy`` operates based on traditional MAC/VLAN steering
+ rules.
+ * ``switchdev`` allows for more advanced offloading capabilities of
+ the E-Switch to hardware.
+ * - ``inline-mode``
+ - enum
+ - Some HWs need the VF driver to put part of the packet
+ headers on the TX descriptor so the e-switch can do proper
+ matching and steering. Support for both switchdev mode and legacy mode.
+
+ * ``none`` none.
+ * ``link`` L2 mode.
+ * ``network`` L3 mode.
+ * ``transport`` L4 mode.
+ * - ``encap-mode``
+ - enum
+ - The encapsulation mode of the device. Support for both switchdev mode
+ and legacy mode. The mode can be one of the following:
+
+ * ``none`` Disable encapsulation support.
+ * ``basic`` Enable encapsulation support.
+
+Example Usage
+=============
+
+.. code:: shell
+
+ # enable switchdev mode
+ $ devlink dev eswitch set pci/0000:08:00.0 mode switchdev
+
+ # set inline-mode and encap-mode
+ $ devlink dev eswitch set pci/0000:08:00.0 inline-mode none encap-mode basic
+
+ # display devlink device eswitch attributes
+ $ devlink dev eswitch show pci/0000:08:00.0
+ pci/0000:08:00.0: mode switchdev inline-mode none encap-mode basic
+
+ # enable encap-mode with legacy mode
+ $ devlink dev eswitch set pci/0000:08:00.0 mode legacy inline-mode none encap-mode basic
diff --git a/Documentation/networking/devlink/index.rst b/Documentation/networking/devlink/index.rst
index e14d7a701b72bc..948c8c44e233f6 100644
--- a/Documentation/networking/devlink/index.rst
+++ b/Documentation/networking/devlink/index.rst
@@ -67,6 +67,7 @@ general.
devlink-selftests
devlink-trap
devlink-linecard
+ devlink-eswitch-attr
Driver-specific documentation
-----------------------------
diff --git a/Documentation/networking/representors.rst b/Documentation/networking/representors.rst
index decb39c19b9ed2..5e23386f69687f 100644
--- a/Documentation/networking/representors.rst
+++ b/Documentation/networking/representors.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GPL-2.0
+.. _representors:
=============================
Network Function Representors
diff --git a/Documentation/process/changes.rst b/Documentation/process/changes.rst
index 7ef8de58f7f892..3a39395bd9d3b2 100644
--- a/Documentation/process/changes.rst
+++ b/Documentation/process/changes.rst
@@ -62,6 +62,7 @@ Sphinx\ [#f1]_ 2.4.4 sphinx-build --version
cpio any cpio --version
GNU tar 1.28 tar --version
gtags (optional) 6.6.5 gtags --version
+mkimage (optional) 2017.01 mkimage --version
====================== =============== ========================================
.. [#f1] Sphinx is needed only to build the Kernel documentation
@@ -189,6 +190,14 @@ The kernel build requires GNU GLOBAL version 6.6.5 or later to generate
tag files through ``make gtags``. This is due to its use of the gtags
``-C (--directory)`` flag.
+mkimage
+-------
+
+This tool is used when building a Flat Image Tree (FIT), commonly used on ARM
+platforms. The tool is available via the ``u-boot-tools`` package or can be
+built from the U-Boot source code. See the instructions at
+https://docs.u-boot.org/en/latest/build/tools.html#building-tools-for-linux
+
System utilities
****************
diff --git a/Documentation/process/embargoed-hardware-issues.rst b/Documentation/process/embargoed-hardware-issues.rst
index bb2100228cc7b6..6e9a4597bf2cbb 100644
--- a/Documentation/process/embargoed-hardware-issues.rst
+++ b/Documentation/process/embargoed-hardware-issues.rst
@@ -252,7 +252,7 @@ an involved disclosed party. The current ambassadors list:
AMD Tom Lendacky <thomas.lendacky@amd.com>
Ampere Darren Hart <darren@os.amperecomputing.com>
ARM Catalin Marinas <catalin.marinas@arm.com>
- IBM Power Anton Blanchard <anton@linux.ibm.com>
+ IBM Power Michael Ellerman <ellerman@au.ibm.com>
IBM Z Christian Borntraeger <borntraeger@de.ibm.com>
Intel Tony Luck <tony.luck@intel.com>
Qualcomm Trilok Soni <quic_tsoni@quicinc.com>
diff --git a/Documentation/rust/arch-support.rst b/Documentation/rust/arch-support.rst
index 5c4fa9f5d1cdb7..c9137710633a37 100644
--- a/Documentation/rust/arch-support.rst
+++ b/Documentation/rust/arch-support.rst
@@ -16,7 +16,7 @@ support corresponds to ``S`` values in the ``MAINTAINERS`` file.
Architecture Level of support Constraints
============= ================ ==============================================
``arm64`` Maintained Little Endian only.
-``loongarch`` Maintained -
+``loongarch`` Maintained \-
``um`` Maintained ``x86_64`` only.
``x86`` Maintained ``x86_64`` only.
============= ================ ==============================================
diff --git a/Documentation/timers/no_hz.rst b/Documentation/timers/no_hz.rst
index f8786be15183c1..7fe8ef9718d8e3 100644
--- a/Documentation/timers/no_hz.rst
+++ b/Documentation/timers/no_hz.rst
@@ -129,11 +129,8 @@ adaptive-tick CPUs: At least one non-adaptive-tick CPU must remain
online to handle timekeeping tasks in order to ensure that system
calls like gettimeofday() returns accurate values on adaptive-tick CPUs.
(This is not an issue for CONFIG_NO_HZ_IDLE=y because there are no running
-user processes to observe slight drifts in clock rate.) Therefore, the
-boot CPU is prohibited from entering adaptive-ticks mode. Specifying a
-"nohz_full=" mask that includes the boot CPU will result in a boot-time
-error message, and the boot CPU will be removed from the mask. Note that
-this means that your system must have at least two CPUs in order for
+user processes to observe slight drifts in clock rate.) Note that this
+means that your system must have at least two CPUs in order for
CONFIG_NO_HZ_FULL=y to do anything for you.
Finally, adaptive-ticks CPUs must have their RCU callbacks offloaded.
diff --git a/Documentation/translations/zh_CN/core-api/cachetlb.rst b/Documentation/translations/zh_CN/core-api/cachetlb.rst
index b4a76ec75daa57..64295c61d1c136 100644
--- a/Documentation/translations/zh_CN/core-api/cachetlb.rst
+++ b/Documentation/translations/zh_CN/core-api/cachetlb.rst
@@ -260,7 +260,7 @@ HyperSparc cpu就是这样一个具有这种属性的cpu。
如果D-cache别名不是一个问题,这个程序可以简单地定义为该架构上
的nop。
- 在page->flags (PG_arch_1)中有一个位是“架构私有”。内核保证,
+ 在folio->flags (PG_arch_1)中有一个位是“架构私有”。内核保证,
对于分页缓存的页面,当这样的页面第一次进入分页缓存时,它将清除
这个位。
diff --git a/Documentation/userspace-api/index.rst b/Documentation/userspace-api/index.rst
index afecfe3cc4a86b..5926115ec0ed86 100644
--- a/Documentation/userspace-api/index.rst
+++ b/Documentation/userspace-api/index.rst
@@ -20,6 +20,7 @@ System calls
futex2
ebpf/index
ioctl/index
+ mseal
Security-related interfaces
===========================
diff --git a/Documentation/userspace-api/mseal.rst b/Documentation/userspace-api/mseal.rst
new file mode 100644
index 00000000000000..4132eec995a399
--- /dev/null
+++ b/Documentation/userspace-api/mseal.rst
@@ -0,0 +1,199 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=====================
+Introduction of mseal
+=====================
+
+:Author: Jeff Xu <jeffxu@chromium.org>
+
+Modern CPUs support memory permissions such as RW and NX bits. The memory
+permission feature improves security stance on memory corruption bugs, i.e.
+the attacker can’t just write to arbitrary memory and point the code to it,
+the memory has to be marked with X bit, or else an exception will happen.
+
+Memory sealing additionally protects the mapping itself against
+modifications. This is useful to mitigate memory corruption issues where a
+corrupted pointer is passed to a memory management system. For example,
+such an attacker primitive can break control-flow integrity guarantees
+since read-only memory that is supposed to be trusted can become writable
+or .text pages can get remapped. Memory sealing can automatically be
+applied by the runtime loader to seal .text and .rodata pages and
+applications can additionally seal security critical data at runtime.
+
+A similar feature already exists in the XNU kernel with the
+VM_FLAGS_PERMANENT flag [1] and on OpenBSD with the mimmutable syscall [2].
+
+User API
+========
+mseal()
+-----------
+The mseal() syscall has the following signature:
+
+``int mseal(void addr, size_t len, unsigned long flags)``
+
+**addr/len**: virtual memory address range.
+
+The address range set by ``addr``/``len`` must meet:
+ - The start address must be in an allocated VMA.
+ - The start address must be page aligned.
+ - The end address (``addr`` + ``len``) must be in an allocated VMA.
+ - no gap (unallocated memory) between start and end address.
+
+The ``len`` will be paged aligned implicitly by the kernel.
+
+**flags**: reserved for future use.
+
+**return values**:
+
+- ``0``: Success.
+
+- ``-EINVAL``:
+ - Invalid input ``flags``.
+ - The start address (``addr``) is not page aligned.
+ - Address range (``addr`` + ``len``) overflow.
+
+- ``-ENOMEM``:
+ - The start address (``addr``) is not allocated.
+ - The end address (``addr`` + ``len``) is not allocated.
+ - A gap (unallocated memory) between start and end address.
+
+- ``-EPERM``:
+ - sealing is supported only on 64-bit CPUs, 32-bit is not supported.
+
+- For above error cases, users can expect the given memory range is
+ unmodified, i.e. no partial update.
+
+- There might be other internal errors/cases not listed here, e.g.
+ error during merging/splitting VMAs, or the process reaching the max
+ number of supported VMAs. In those cases, partial updates to the given
+ memory range could happen. However, those cases should be rare.
+
+**Blocked operations after sealing**:
+ Unmapping, moving to another location, and shrinking the size,
+ via munmap() and mremap(), can leave an empty space, therefore
+ can be replaced with a VMA with a new set of attributes.
+
+ Moving or expanding a different VMA into the current location,
+ via mremap().
+
+ Modifying a VMA via mmap(MAP_FIXED).
+
+ Size expansion, via mremap(), does not appear to pose any
+ specific risks to sealed VMAs. It is included anyway because
+ the use case is unclear. In any case, users can rely on
+ merging to expand a sealed VMA.
+
+ mprotect() and pkey_mprotect().
+
+ Some destructive madvice() behaviors (e.g. MADV_DONTNEED)
+ for anonymous memory, when users don't have write permission to the
+ memory. Those behaviors can alter region contents by discarding pages,
+ effectively a memset(0) for anonymous memory.
+
+ Kernel will return -EPERM for blocked operations.
+
+ For blocked operations, one can expect the given address is unmodified,
+ i.e. no partial update. Note, this is different from existing mm
+ system call behaviors, where partial updates are made till an error is
+ found and returned to userspace. To give an example:
+
+ Assume following code sequence:
+
+ - ptr = mmap(null, 8192, PROT_NONE);
+ - munmap(ptr + 4096, 4096);
+ - ret1 = mprotect(ptr, 8192, PROT_READ);
+ - mseal(ptr, 4096);
+ - ret2 = mprotect(ptr, 8192, PROT_NONE);
+
+ ret1 will be -ENOMEM, the page from ptr is updated to PROT_READ.
+
+ ret2 will be -EPERM, the page remains to be PROT_READ.
+
+**Note**:
+
+- mseal() only works on 64-bit CPUs, not 32-bit CPU.
+
+- users can call mseal() multiple times, mseal() on an already sealed memory
+ is a no-action (not error).
+
+- munseal() is not supported.
+
+Use cases:
+==========
+- glibc:
+ The dynamic linker, during loading ELF executables, can apply sealing to
+ non-writable memory segments.
+
+- Chrome browser: protect some security sensitive data-structures.
+
+Notes on which memory to seal:
+==============================
+
+It might be important to note that sealing changes the lifetime of a mapping,
+i.e. the sealed mapping won’t be unmapped till the process terminates or the
+exec system call is invoked. Applications can apply sealing to any virtual
+memory region from userspace, but it is crucial to thoroughly analyze the
+mapping's lifetime prior to apply the sealing.
+
+For example:
+
+- aio/shm
+
+ aio/shm can call mmap()/munmap() on behalf of userspace, e.g. ksys_shmdt() in
+ shm.c. The lifetime of those mapping are not tied to the lifetime of the
+ process. If those memories are sealed from userspace, then munmap() will fail,
+ causing leaks in VMA address space during the lifetime of the process.
+
+- Brk (heap)
+
+ Currently, userspace applications can seal parts of the heap by calling
+ malloc() and mseal().
+ let's assume following calls from user space:
+
+ - ptr = malloc(size);
+ - mprotect(ptr, size, RO);
+ - mseal(ptr, size);
+ - free(ptr);
+
+ Technically, before mseal() is added, the user can change the protection of
+ the heap by calling mprotect(RO). As long as the user changes the protection
+ back to RW before free(), the memory range can be reused.
+
+ Adding mseal() into the picture, however, the heap is then sealed partially,
+ the user can still free it, but the memory remains to be RO. If the address
+ is re-used by the heap manager for another malloc, the process might crash
+ soon after. Therefore, it is important not to apply sealing to any memory
+ that might get recycled.
+
+ Furthermore, even if the application never calls the free() for the ptr,
+ the heap manager may invoke the brk system call to shrink the size of the
+ heap. In the kernel, the brk-shrink will call munmap(). Consequently,
+ depending on the location of the ptr, the outcome of brk-shrink is
+ nondeterministic.
+
+
+Additional notes:
+=================
+As Jann Horn pointed out in [3], there are still a few ways to write
+to RO memory, which is, in a way, by design. Those cases are not covered
+by mseal(). If applications want to block such cases, sandbox tools (such as
+seccomp, LSM, etc) might be considered.
+
+Those cases are:
+
+- Write to read-only memory through /proc/self/mem interface.
+- Write to read-only memory through ptrace (such as PTRACE_POKETEXT).
+- userfaultfd.
+
+The idea that inspired this patch comes from Stephen Röttger’s work in V8
+CFI [4]. Chrome browser in ChromeOS will be the first user of this API.
+
+Reference:
+==========
+[1] https://github.com/apple-oss-distributions/xnu/blob/1031c584a5e37aff177559b9f69dbd3c8c3fd30a/osfmk/mach/vm_statistics.h#L274
+
+[2] https://man.openbsd.org/mimmutable.2
+
+[3] https://lore.kernel.org/lkml/CAG48ez3ShUYey+ZAFsU2i1RpQn0a5eOs2hzQ426FkcgnfUGLvA@mail.gmail.com
+
+[4] https://docs.google.com/document/d/1O2jwK4dxI3nRcOJuPYkonhTkNQfbmwdvxQMyXgeaRHo/edit#heading=h.bvaojj9fu6hc
diff --git a/Documentation/virt/kvm/x86/amd-memory-encryption.rst b/Documentation/virt/kvm/x86/amd-memory-encryption.rst
index 995780088eb231..84335d119ff136 100644
--- a/Documentation/virt/kvm/x86/amd-memory-encryption.rst
+++ b/Documentation/virt/kvm/x86/amd-memory-encryption.rst
@@ -46,21 +46,16 @@ SEV hardware uses ASIDs to associate a memory encryption key with a VM.
Hence, the ASID for the SEV-enabled guests must be from 1 to a maximum value
defined in the CPUID 0x8000001f[ecx] field.
-SEV Key Management
-==================
+The KVM_MEMORY_ENCRYPT_OP ioctl
+===============================
-The SEV guest key management is handled by a separate processor called the AMD
-Secure Processor (AMD-SP). Firmware running inside the AMD-SP provides a secure
-key management interface to perform common hypervisor activities such as
-encrypting bootstrap code, snapshot, migrating and debugging the guest. For more
-information, see the SEV Key Management spec [api-spec]_
-
-The main ioctl to access SEV is KVM_MEMORY_ENCRYPT_OP. If the argument
-to KVM_MEMORY_ENCRYPT_OP is NULL, the ioctl returns 0 if SEV is enabled
-and ``ENOTTY`` if it is disabled (on some older versions of Linux,
-the ioctl runs normally even with a NULL argument, and therefore will
-likely return ``EFAULT``). If non-NULL, the argument to KVM_MEMORY_ENCRYPT_OP
-must be a struct kvm_sev_cmd::
+The main ioctl to access SEV is KVM_MEMORY_ENCRYPT_OP, which operates on
+the VM file descriptor. If the argument to KVM_MEMORY_ENCRYPT_OP is NULL,
+the ioctl returns 0 if SEV is enabled and ``ENOTTY`` if it is disabled
+(on some older versions of Linux, the ioctl tries to run normally even
+with a NULL argument, and therefore will likely return ``EFAULT`` instead
+of zero if SEV is enabled). If non-NULL, the argument to
+KVM_MEMORY_ENCRYPT_OP must be a struct kvm_sev_cmd::
struct kvm_sev_cmd {
__u32 id;
@@ -87,10 +82,6 @@ guests, such as launching, running, snapshotting, migrating and decommissioning.
The KVM_SEV_INIT command is used by the hypervisor to initialize the SEV platform
context. In a typical workflow, this command should be the first command issued.
-The firmware can be initialized either by using its own non-volatile storage or
-the OS can manage the NV storage for the firmware using the module parameter
-``init_ex_path``. If the file specified by ``init_ex_path`` does not exist or
-is invalid, the OS will create or override the file with output from PSP.
Returns: 0 on success, -negative on error
@@ -434,6 +425,21 @@ issued by the hypervisor to make the guest ready for execution.
Returns: 0 on success, -negative on error
+Firmware Management
+===================
+
+The SEV guest key management is handled by a separate processor called the AMD
+Secure Processor (AMD-SP). Firmware running inside the AMD-SP provides a secure
+key management interface to perform common hypervisor activities such as
+encrypting bootstrap code, snapshot, migrating and debugging the guest. For more
+information, see the SEV Key Management spec [api-spec]_
+
+The AMD-SP firmware can be initialized either by using its own non-volatile
+storage or the OS can manage the NV storage for the firmware using
+parameter ``init_ex_path`` of the ``ccp`` module. If the file specified
+by ``init_ex_path`` does not exist or is invalid, the OS will create or
+override the file with PSP non-volatile storage.
+
References
==========
diff --git a/Documentation/virt/kvm/x86/msr.rst b/Documentation/virt/kvm/x86/msr.rst
index 9315fc385fb0be..3aecf2a70e7b43 100644
--- a/Documentation/virt/kvm/x86/msr.rst
+++ b/Documentation/virt/kvm/x86/msr.rst
@@ -193,8 +193,8 @@ data:
Asynchronous page fault (APF) control MSR.
Bits 63-6 hold 64-byte aligned physical address of a 64 byte memory area
- which must be in guest RAM and must be zeroed. This memory is expected
- to hold a copy of the following structure::
+ which must be in guest RAM. This memory is expected to hold the
+ following structure::
struct kvm_vcpu_pv_apf_data {
/* Used for 'page not present' events delivered via #PF */
@@ -204,7 +204,6 @@ data:
__u32 token;
__u8 pad[56];
- __u32 enabled;
};
Bits 5-4 of the MSR are reserved and should be zero. Bit 0 is set to 1
@@ -232,14 +231,14 @@ data:
as regular page fault, guest must reset 'flags' to '0' before it does
something that can generate normal page fault.
- Bytes 5-7 of 64 byte memory location ('token') will be written to by the
+ Bytes 4-7 of 64 byte memory location ('token') will be written to by the
hypervisor at the time of APF 'page ready' event injection. The content
- of these bytes is a token which was previously delivered as 'page not
- present' event. The event indicates the page in now available. Guest is
- supposed to write '0' to 'token' when it is done handling 'page ready'
- event and to write 1' to MSR_KVM_ASYNC_PF_ACK after clearing the location;
- writing to the MSR forces KVM to re-scan its queue and deliver the next
- pending notification.
+ of these bytes is a token which was previously delivered in CR2 as
+ 'page not present' event. The event indicates the page is now available.
+ Guest is supposed to write '0' to 'token' when it is done handling
+ 'page ready' event and to write '1' to MSR_KVM_ASYNC_PF_ACK after
+ clearing the location; writing to the MSR forces KVM to re-scan its
+ queue and deliver the next pending notification.
Note, MSR_KVM_ASYNC_PF_INT MSR specifying the interrupt vector for 'page
ready' APF delivery needs to be written to before enabling APF mechanism
diff --git a/MAINTAINERS b/MAINTAINERS
index aa3b947fb0801d..873299a39d1955 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -553,7 +553,7 @@ F: Documentation/devicetree/bindings/iio/accel/adi,adxl345.yaml
F: drivers/input/misc/adxl34x.c
ADXL355 THREE-AXIS DIGITAL ACCELEROMETER DRIVER
-M: Puranjay Mohan <puranjay12@gmail.com>
+M: Puranjay Mohan <puranjay@kernel.org>
L: linux-iio@vger.kernel.org
S: Supported
F: Documentation/devicetree/bindings/iio/accel/adi,adxl355.yaml
@@ -2191,7 +2191,6 @@ N: mxs
ARM/FREESCALE LAYERSCAPE ARM ARCHITECTURE
M: Shawn Guo <shawnguo@kernel.org>
-M: Li Yang <leoyang.li@nxp.com>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/shawnguo/linux.git
@@ -2708,7 +2707,7 @@ F: sound/soc/rockchip/
N: rockchip
ARM/SAMSUNG S3C, S5P AND EXYNOS ARM ARCHITECTURES
-M: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M: Krzysztof Kozlowski <krzk@kernel.org>
R: Alim Akhtar <alim.akhtar@samsung.com>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
L: linux-samsung-soc@vger.kernel.org
@@ -3051,6 +3050,13 @@ F: drivers/mmc/host/sdhci-of-arasan.c
N: zynq
N: xilinx
+ARM64 FIT SUPPORT
+M: Simon Glass <sjg@chromium.org>
+L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+S: Maintained
+F: arch/arm64/boot/Makefile
+F: scripts/make_fit.py
+
ARM64 PORT (AARCH64 ARCHITECTURE)
M: Catalin Marinas <catalin.marinas@arm.com>
M: Will Deacon <will@kernel.org>
@@ -3573,6 +3579,7 @@ S: Supported
C: irc://irc.oftc.net/bcache
T: git https://evilpiepirate.org/git/bcachefs.git
F: fs/bcachefs/
+F: Documentation/filesystems/bcachefs/
BDISP ST MEDIA DRIVER
M: Fabien Dessenne <fabien.dessenne@foss.st.com>
@@ -3714,7 +3721,7 @@ F: drivers/iio/imu/bmi323/
BPF JIT for ARM
M: Russell King <linux@armlinux.org.uk>
-M: Puranjay Mohan <puranjay12@gmail.com>
+M: Puranjay Mohan <puranjay@kernel.org>
L: bpf@vger.kernel.org
S: Maintained
F: arch/arm/net/
@@ -3764,6 +3771,8 @@ X: arch/riscv/net/bpf_jit_comp64.c
BPF JIT for RISC-V (64-bit)
M: Björn Töpel <bjorn@kernel.org>
+R: Pu Lehui <pulehui@huawei.com>
+R: Puranjay Mohan <puranjay@kernel.org>
L: bpf@vger.kernel.org
S: Maintained
F: arch/riscv/net/
@@ -3942,8 +3951,7 @@ F: kernel/bpf/ringbuf.c
BPF [SECURITY & LSM] (Security Audit and Enforcement using BPF)
M: KP Singh <kpsingh@kernel.org>
-R: Florent Revest <revest@chromium.org>
-R: Brendan Jackman <jackmanb@chromium.org>
+R: Matt Bobrowski <mattbobrowski@google.com>
L: bpf@vger.kernel.org
S: Maintained
F: Documentation/bpf/prog_lsm.rst
@@ -3968,7 +3976,7 @@ F: kernel/bpf/bpf_lru*
F: kernel/bpf/cgroup.c
BPF [TOOLING] (bpftool)
-M: Quentin Monnet <quentin@isovalent.com>
+M: Quentin Monnet <qmo@kernel.org>
L: bpf@vger.kernel.org
S: Maintained
F: kernel/bpf/disasm.*
@@ -4870,7 +4878,6 @@ F: drivers/power/supply/cw2015_battery.c
CEPH COMMON CODE (LIBCEPH)
M: Ilya Dryomov <idryomov@gmail.com>
M: Xiubo Li <xiubli@redhat.com>
-R: Jeff Layton <jlayton@kernel.org>
L: ceph-devel@vger.kernel.org
S: Supported
W: http://ceph.com/
@@ -4882,7 +4889,6 @@ F: net/ceph/
CEPH DISTRIBUTED FILE SYSTEM CLIENT (CEPH)
M: Xiubo Li <xiubli@redhat.com>
M: Ilya Dryomov <idryomov@gmail.com>
-R: Jeff Layton <jlayton@kernel.org>
L: ceph-devel@vger.kernel.org
S: Supported
W: http://ceph.com/
@@ -5255,6 +5261,14 @@ S: Supported
F: Documentation/process/code-of-conduct-interpretation.rst
F: Documentation/process/code-of-conduct.rst
+CODE TAGGING
+M: Suren Baghdasaryan <surenb@google.com>
+M: Kent Overstreet <kent.overstreet@linux.dev>
+S: Maintained
+F: include/asm-generic/codetag.lds.h
+F: include/linux/codetag.h
+F: lib/codetag.c
+
COMEDI DRIVERS
M: Ian Abbott <abbotti@mev.co.uk>
M: H Hartley Sweeten <hsweeten@visionengravers.com>
@@ -5558,7 +5572,7 @@ F: drivers/cpuidle/cpuidle-big_little.c
CPUIDLE DRIVER - ARM EXYNOS
M: Daniel Lezcano <daniel.lezcano@linaro.org>
M: Kukjin Kim <kgene@kernel.org>
-R: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+R: Krzysztof Kozlowski <krzk@kernel.org>
L: linux-pm@vger.kernel.org
L: linux-samsung-soc@vger.kernel.org
S: Maintained
@@ -6157,7 +6171,6 @@ DEVICE-MAPPER (LVM)
M: Alasdair Kergon <agk@redhat.com>
M: Mike Snitzer <snitzer@kernel.org>
M: Mikulas Patocka <mpatocka@redhat.com>
-M: dm-devel@lists.linux.dev
L: dm-devel@lists.linux.dev
S: Maintained
Q: http://patchwork.kernel.org/project/dm-devel/list/
@@ -6173,7 +6186,6 @@ F: include/uapi/linux/dm-*.h
DEVICE-MAPPER VDO TARGET
M: Matthew Sakai <msakai@redhat.com>
-M: dm-devel@lists.linux.dev
L: dm-devel@lists.linux.dev
S: Maintained
F: Documentation/admin-guide/device-mapper/vdo*.rst
@@ -7834,9 +7846,8 @@ W: http://aeschi.ch.eu.org/efs/
F: fs/efs/
EHEA (IBM pSeries eHEA 10Gb ethernet adapter) DRIVER
-M: Douglas Miller <dougmill@linux.ibm.com>
L: netdev@vger.kernel.org
-S: Maintained
+S: Orphan
F: drivers/net/ethernet/ibm/ehea/
ELM327 CAN NETWORK DRIVER
@@ -7941,6 +7952,7 @@ M: Gao Xiang <xiang@kernel.org>
M: Chao Yu <chao@kernel.org>
R: Yue Hu <huyue2@coolpad.com>
R: Jeffle Xu <jefflexu@linux.alibaba.com>
+R: Sandeep Dhavale <dhavale@google.com>
L: linux-erofs@lists.ozlabs.org
S: Maintained
W: https://erofs.docs.kernel.org
@@ -8525,7 +8537,6 @@ S: Maintained
F: drivers/video/fbdev/fsl-diu-fb.*
FREESCALE DMA DRIVER
-M: Li Yang <leoyang.li@nxp.com>
M: Zhang Wei <zw@zh-kernel.org>
L: linuxppc-dev@lists.ozlabs.org
S: Maintained
@@ -8690,10 +8701,9 @@ F: drivers/soc/fsl/qe/tsa.h
F: include/dt-bindings/soc/cpm1-fsl,tsa.h
FREESCALE QUICC ENGINE UCC ETHERNET DRIVER
-M: Li Yang <leoyang.li@nxp.com>
L: netdev@vger.kernel.org
L: linuxppc-dev@lists.ozlabs.org
-S: Maintained
+S: Orphan
F: drivers/net/ethernet/freescale/ucc_geth*
FREESCALE QUICC ENGINE UCC HDLC DRIVER
@@ -8710,10 +8720,9 @@ S: Maintained
F: drivers/tty/serial/ucc_uart.c
FREESCALE SOC DRIVERS
-M: Li Yang <leoyang.li@nxp.com>
L: linuxppc-dev@lists.ozlabs.org
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
-S: Maintained
+S: Orphan
F: Documentation/devicetree/bindings/misc/fsl,dpaa2-console.yaml
F: Documentation/devicetree/bindings/soc/fsl/
F: drivers/soc/fsl/
@@ -8747,17 +8756,15 @@ F: Documentation/devicetree/bindings/sound/fsl,qmc-audio.yaml
F: sound/soc/fsl/fsl_qmc_audio.c
FREESCALE USB PERIPHERAL DRIVERS
-M: Li Yang <leoyang.li@nxp.com>
L: linux-usb@vger.kernel.org
L: linuxppc-dev@lists.ozlabs.org
-S: Maintained
+S: Orphan
F: drivers/usb/gadget/udc/fsl*
FREESCALE USB PHY DRIVER
-M: Ran Wang <ran.wang_1@nxp.com>
L: linux-usb@vger.kernel.org
L: linuxppc-dev@lists.ozlabs.org
-S: Maintained
+S: Orphan
F: drivers/usb/phy/phy-fsl-usb*
FREEVXFS FILESYSTEM
@@ -9002,7 +9009,7 @@ F: drivers/i2c/muxes/i2c-mux-gpio.c
F: include/linux/platform_data/i2c-mux-gpio.h
GENERIC GPIO RESET DRIVER
-M: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M: Krzysztof Kozlowski <krzk@kernel.org>
S: Maintained
F: drivers/reset/reset-gpio.c
@@ -9585,7 +9592,7 @@ F: kernel/power/
HID CORE LAYER
M: Jiri Kosina <jikos@kernel.org>
-M: Benjamin Tissoires <benjamin.tissoires@redhat.com>
+M: Benjamin Tissoires <bentiss@kernel.org>
L: linux-input@vger.kernel.org
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/hid/hid.git
@@ -9653,7 +9660,9 @@ L: linux-input@vger.kernel.org
S: Maintained
F: drivers/hid/hid-logitech-hidpp.c
-HIGH-RESOLUTION TIMERS, CLOCKEVENTS
+HIGH-RESOLUTION TIMERS, TIMER WHEEL, CLOCKEVENTS
+M: Anna-Maria Behnsen <anna-maria@linutronix.de>
+M: Frederic Weisbecker <frederic@kernel.org>
M: Thomas Gleixner <tglx@linutronix.de>
L: linux-kernel@vger.kernel.org
S: Maintained
@@ -9661,9 +9670,13 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers/core
F: Documentation/timers/
F: include/linux/clockchips.h
F: include/linux/hrtimer.h
+F: include/linux/timer.h
F: kernel/time/clockevents.c
F: kernel/time/hrtimer.c
-F: kernel/time/timer_*.c
+F: kernel/time/timer.c
+F: kernel/time/timer_list.c
+F: kernel/time/timer_migration.*
+F: tools/testing/selftests/timers/
HIGH-SPEED SCC DRIVER FOR AX.25
L: linux-hams@vger.kernel.org
@@ -10026,7 +10039,7 @@ F: drivers/media/platform/st/sti/hva
HWPOISON MEMORY FAILURE HANDLING
M: Miaohe Lin <linmiaohe@huawei.com>
-R: Naoya Horiguchi <naoya.horiguchi@nec.com>
+R: Naoya Horiguchi <nao.horiguchi@gmail.com>
L: linux-mm@kvack.org
S: Maintained
F: mm/hwpoison-inject.c
@@ -11997,7 +12010,7 @@ F: include/keys/encrypted-type.h
F: security/keys/encrypted-keys/
KEYS-TRUSTED
-M: James Bottomley <jejb@linux.ibm.com>
+M: James Bottomley <James.Bottomley@HansenPartnership.com>
M: Jarkko Sakkinen <jarkko@kernel.org>
M: Mimi Zohar <zohar@linux.ibm.com>
L: linux-integrity@vger.kernel.org
@@ -12044,6 +12057,7 @@ M: Mimi Zohar <zohar@linux.ibm.com>
L: linux-integrity@vger.kernel.org
L: keyrings@vger.kernel.org
S: Supported
+W: https://kernsec.org/wiki/index.php/Linux_Kernel_Integrity
F: security/integrity/platform_certs
KFENCE
@@ -13134,6 +13148,7 @@ F: drivers/net/ethernet/marvell/mvpp2/
MARVELL MWIFIEX WIRELESS DRIVER
M: Brian Norris <briannorris@chromium.org>
+R: Francesco Dolcini <francesco@dolcini.it>
L: linux-wireless@vger.kernel.org
S: Odd Fixes
F: drivers/net/wireless/marvell/mwifiex/
@@ -13290,7 +13305,7 @@ F: drivers/iio/adc/max11205.c
MAXIM MAX17040 FAMILY FUEL GAUGE DRIVERS
R: Iskren Chernev <iskren.chernev@gmail.com>
-R: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+R: Krzysztof Kozlowski <krzk@kernel.org>
R: Marek Szyprowski <m.szyprowski@samsung.com>
R: Matheus Castello <matheus@castello.eng.br>
L: linux-pm@vger.kernel.org
@@ -13300,7 +13315,7 @@ F: drivers/power/supply/max17040_battery.c
MAXIM MAX17042 FAMILY FUEL GAUGE DRIVERS
R: Hans de Goede <hdegoede@redhat.com>
-R: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+R: Krzysztof Kozlowski <krzk@kernel.org>
R: Marek Szyprowski <m.szyprowski@samsung.com>
R: Sebastian Krzyszkowiak <sebastian.krzyszkowiak@puri.sm>
R: Purism Kernel Team <kernel@puri.sm>
@@ -13358,7 +13373,7 @@ F: Documentation/devicetree/bindings/power/supply/maxim,max77976.yaml
F: drivers/power/supply/max77976_charger.c
MAXIM MUIC CHARGER DRIVERS FOR EXYNOS BASED BOARDS
-M: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M: Krzysztof Kozlowski <krzk@kernel.org>
L: linux-pm@vger.kernel.org
S: Maintained
B: mailto:linux-samsung-soc@vger.kernel.org
@@ -13369,7 +13384,7 @@ F: drivers/power/supply/max77693_charger.c
MAXIM PMIC AND MUIC DRIVERS FOR EXYNOS BASED BOARDS
M: Chanwoo Choi <cw00.choi@samsung.com>
-M: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M: Krzysztof Kozlowski <krzk@kernel.org>
L: linux-kernel@vger.kernel.org
S: Maintained
B: mailto:linux-samsung-soc@vger.kernel.org
@@ -14014,6 +14029,7 @@ F: drivers/net/ethernet/mellanox/mlx4/en_*
MELLANOX ETHERNET DRIVER (mlx5e)
M: Saeed Mahameed <saeedm@nvidia.com>
+M: Tariq Toukan <tariqt@nvidia.com>
L: netdev@vger.kernel.org
S: Supported
W: http://www.mellanox.com
@@ -14081,6 +14097,7 @@ F: include/uapi/rdma/mlx4-abi.h
MELLANOX MLX5 core VPI driver
M: Saeed Mahameed <saeedm@nvidia.com>
M: Leon Romanovsky <leonro@nvidia.com>
+M: Tariq Toukan <tariqt@nvidia.com>
L: netdev@vger.kernel.org
L: linux-rdma@vger.kernel.org
S: Supported
@@ -14150,8 +14167,18 @@ F: mm/memblock.c
F: mm/mm_init.c
F: tools/testing/memblock/
+MEMORY ALLOCATION PROFILING
+M: Suren Baghdasaryan <surenb@google.com>
+M: Kent Overstreet <kent.overstreet@linux.dev>
+L: linux-mm@kvack.org
+S: Maintained
+F: Documentation/mm/allocation-profiling.rst
+F: include/linux/alloc_tag.h
+F: include/linux/pgalloc_tag.h
+F: lib/alloc_tag.c
+
MEMORY CONTROLLER DRIVERS
-M: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M: Krzysztof Kozlowski <krzk@kernel.org>
L: linux-kernel@vger.kernel.org
S: Maintained
B: mailto:krzysztof.kozlowski@linaro.org
@@ -14356,7 +14383,7 @@ F: drivers/dma/at_xdmac.c
F: include/dt-bindings/dma/at91.h
MICROCHIP AT91 SERIAL DRIVER
-M: Richard Genoud <richard.genoud@gmail.com>
+M: Richard Genoud <richard.genoud@bootlin.com>
S: Maintained
F: Documentation/devicetree/bindings/serial/atmel,at91-usart.yaml
F: drivers/tty/serial/atmel_serial.c
@@ -15532,7 +15559,7 @@ F: include/uapi/linux/nexthop.h
F: net/ipv4/nexthop.c
NFC SUBSYSTEM
-M: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M: Krzysztof Kozlowski <krzk@kernel.org>
L: netdev@vger.kernel.org
S: Maintained
F: Documentation/devicetree/bindings/net/nfc/
@@ -15627,9 +15654,10 @@ F: drivers/misc/nsm.c
F: include/uapi/linux/nsm.h
NOHZ, DYNTICKS SUPPORT
+M: Anna-Maria Behnsen <anna-maria@linutronix.de>
M: Frederic Weisbecker <frederic@kernel.org>
-M: Thomas Gleixner <tglx@linutronix.de>
M: Ingo Molnar <mingo@kernel.org>
+M: Thomas Gleixner <tglx@linutronix.de>
L: linux-kernel@vger.kernel.org
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers/nohz
@@ -15908,7 +15936,7 @@ F: Documentation/devicetree/bindings/regulator/nxp,pf8x00-regulator.yaml
F: drivers/regulator/pf8x00-regulator.c
NXP PTN5150A CC LOGIC AND EXTCON DRIVER
-M: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M: Krzysztof Kozlowski <krzk@kernel.org>
L: linux-kernel@vger.kernel.org
S: Maintained
F: Documentation/devicetree/bindings/extcon/extcon-ptn5150.yaml
@@ -16519,7 +16547,7 @@ K: of_overlay_remove
OPEN FIRMWARE AND FLATTENED DEVICE TREE BINDINGS
M: Rob Herring <robh@kernel.org>
-M: Krzysztof Kozlowski <krzysztof.kozlowski+dt@linaro.org>
+M: Krzysztof Kozlowski <krzk+dt@kernel.org>
M: Conor Dooley <conor+dt@kernel.org>
L: devicetree@vger.kernel.org
S: Maintained
@@ -16725,9 +16753,9 @@ F: include/uapi/linux/ppdev.h
PARAVIRT_OPS INTERFACE
M: Juergen Gross <jgross@suse.com>
-R: Ajay Kaher <akaher@vmware.com>
-R: Alexey Makhalov <amakhalov@vmware.com>
-R: VMware PV-Drivers Reviewers <pv-drivers@vmware.com>
+R: Ajay Kaher <ajay.kaher@broadcom.com>
+R: Alexey Makhalov <alexey.amakhalov@broadcom.com>
+R: Broadcom internal kernel review list <bcm-kernel-feedback-list@broadcom.com>
L: virtualization@lists.linux.dev
L: x86@kernel.org
S: Supported
@@ -16798,12 +16826,6 @@ S: Maintained
F: drivers/leds/leds-pca9532.c
F: include/linux/leds-pca9532.h
-PCA9541 I2C BUS MASTER SELECTOR DRIVER
-M: Guenter Roeck <linux@roeck-us.net>
-L: linux-i2c@vger.kernel.org
-S: Maintained
-F: drivers/i2c/muxes/i2c-mux-pca9541.c
-
PCI DRIVER FOR AARDVARK (Marvell Armada 3700)
M: Thomas Petazzoni <thomas.petazzoni@bootlin.com>
M: Pali Rohár <pali@kernel.org>
@@ -16966,7 +16988,6 @@ F: drivers/pci/controller/dwc/pci-exynos.c
PCI DRIVER FOR SYNOPSYS DESIGNWARE
M: Jingoo Han <jingoohan1@gmail.com>
-M: Gustavo Pimentel <gustavo.pimentel@synopsys.com>
M: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
L: linux-pci@vger.kernel.org
S: Maintained
@@ -17296,6 +17317,7 @@ R: Alexander Shishkin <alexander.shishkin@linux.intel.com>
R: Jiri Olsa <jolsa@kernel.org>
R: Ian Rogers <irogers@google.com>
R: Adrian Hunter <adrian.hunter@intel.com>
+R: "Liang, Kan" <kan.liang@linux.intel.com>
L: linux-perf-users@vger.kernel.org
L: linux-kernel@vger.kernel.org
S: Supported
@@ -17477,7 +17499,7 @@ F: Documentation/devicetree/bindings/pinctrl/renesas,*
F: drivers/pinctrl/renesas/
PIN CONTROLLER - SAMSUNG
-M: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M: Krzysztof Kozlowski <krzk@kernel.org>
M: Sylwester Nawrocki <s.nawrocki@samsung.com>
R: Alim Akhtar <alim.akhtar@samsung.com>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
@@ -17590,15 +17612,20 @@ F: drivers/pnp/
F: include/linux/pnp.h
POSIX CLOCKS and TIMERS
+M: Anna-Maria Behnsen <anna-maria@linutronix.de>
+M: Frederic Weisbecker <frederic@kernel.org>
M: Thomas Gleixner <tglx@linutronix.de>
L: linux-kernel@vger.kernel.org
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers/core
F: fs/timerfd.c
F: include/linux/time_namespace.h
-F: include/linux/timer*
+F: include/linux/timerfd.h
+F: include/uapi/linux/time.h
+F: include/uapi/linux/timerfd.h
F: include/trace/events/timer*
-F: kernel/time/*timer*
+F: kernel/time/itimer.c
+F: kernel/time/posix-*
F: kernel/time/namespace.c
POWER MANAGEMENT CORE
@@ -17868,7 +17895,7 @@ F: Documentation/devicetree/bindings/leds/irled/pwm-ir-tx.yaml
F: drivers/media/rc/pwm-ir-tx.c
PWM SUBSYSTEM
-M: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
+M: Uwe Kleine-König <ukleinek@kernel.org>
L: linux-pwm@vger.kernel.org
S: Maintained
Q: https://patchwork.ozlabs.org/project/linux-pwm/list/
@@ -18645,18 +18672,21 @@ REALTEK WIRELESS DRIVER (rtlwifi family)
M: Ping-Ke Shih <pkshih@realtek.com>
L: linux-wireless@vger.kernel.org
S: Maintained
+T: git https://github.com/pkshih/rtw.git
F: drivers/net/wireless/realtek/rtlwifi/
REALTEK WIRELESS DRIVER (rtw88)
M: Ping-Ke Shih <pkshih@realtek.com>
L: linux-wireless@vger.kernel.org
S: Maintained
+T: git https://github.com/pkshih/rtw.git
F: drivers/net/wireless/realtek/rtw88/
REALTEK WIRELESS DRIVER (rtw89)
M: Ping-Ke Shih <pkshih@realtek.com>
L: linux-wireless@vger.kernel.org
S: Maintained
+T: git https://github.com/pkshih/rtw.git
F: drivers/net/wireless/realtek/rtw89/
REDPINE WIRELESS DRIVER
@@ -18727,13 +18757,24 @@ S: Supported
F: Documentation/devicetree/bindings/i2c/renesas,iic-emev2.yaml
F: drivers/i2c/busses/i2c-emev2.c
-RENESAS ETHERNET DRIVERS
+RENESAS ETHERNET AVB DRIVER
R: Sergey Shtylyov <s.shtylyov@omp.ru>
L: netdev@vger.kernel.org
L: linux-renesas-soc@vger.kernel.org
-F: Documentation/devicetree/bindings/net/renesas,*.yaml
-F: drivers/net/ethernet/renesas/
-F: include/linux/sh_eth.h
+F: Documentation/devicetree/bindings/net/renesas,etheravb.yaml
+F: drivers/net/ethernet/renesas/Kconfig
+F: drivers/net/ethernet/renesas/Makefile
+F: drivers/net/ethernet/renesas/ravb*
+
+RENESAS ETHERNET SWITCH DRIVER
+R: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
+L: netdev@vger.kernel.org
+L: linux-renesas-soc@vger.kernel.org
+F: Documentation/devicetree/bindings/net/renesas,*ether-switch.yaml
+F: drivers/net/ethernet/renesas/Kconfig
+F: drivers/net/ethernet/renesas/Makefile
+F: drivers/net/ethernet/renesas/rcar_gen4*
+F: drivers/net/ethernet/renesas/rswitch*
RENESAS IDT821034 ASoC CODEC
M: Herve Codina <herve.codina@bootlin.com>
@@ -18843,6 +18884,16 @@ S: Supported
F: Documentation/devicetree/bindings/i2c/renesas,rzv2m.yaml
F: drivers/i2c/busses/i2c-rzv2m.c
+RENESAS SUPERH ETHERNET DRIVER
+R: Sergey Shtylyov <s.shtylyov@omp.ru>
+L: netdev@vger.kernel.org
+L: linux-renesas-soc@vger.kernel.org
+F: Documentation/devicetree/bindings/net/renesas,ether.yaml
+F: drivers/net/ethernet/renesas/Kconfig
+F: drivers/net/ethernet/renesas/Makefile
+F: drivers/net/ethernet/renesas/sh_eth*
+F: include/linux/sh_eth.h
+
RENESAS USB PHY DRIVER
M: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
L: linux-renesas-soc@vger.kernel.org
@@ -19179,12 +19230,14 @@ M: Hin-Tak Leung <hintak.leung@gmail.com>
M: Larry Finger <Larry.Finger@lwfinger.net>
L: linux-wireless@vger.kernel.org
S: Maintained
+T: git https://github.com/pkshih/rtw.git
F: drivers/net/wireless/realtek/rtl818x/rtl8187/
RTL8XXXU WIRELESS DRIVER (rtl8xxxu)
M: Jes Sorensen <Jes.Sorensen@gmail.com>
L: linux-wireless@vger.kernel.org
S: Maintained
+T: git https://github.com/pkshih/rtw.git
F: drivers/net/wireless/realtek/rtl8xxxu/
RTRS TRANSPORT DRIVERS
@@ -19414,7 +19467,7 @@ F: Documentation/devicetree/bindings/sound/samsung*
F: sound/soc/samsung/
SAMSUNG EXYNOS PSEUDO RANDOM NUMBER GENERATOR (RNG) DRIVER
-M: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M: Krzysztof Kozlowski <krzk@kernel.org>
L: linux-crypto@vger.kernel.org
L: linux-samsung-soc@vger.kernel.org
S: Maintained
@@ -19449,7 +19502,7 @@ S: Maintained
F: drivers/platform/x86/samsung-laptop.c
SAMSUNG MULTIFUNCTION PMIC DEVICE DRIVERS
-M: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M: Krzysztof Kozlowski <krzk@kernel.org>
L: linux-kernel@vger.kernel.org
L: linux-samsung-soc@vger.kernel.org
S: Maintained
@@ -19475,7 +19528,7 @@ F: drivers/media/platform/samsung/s3c-camif/
F: include/media/drv-intf/s3c_camif.h
SAMSUNG S3FWRN5 NFC DRIVER
-M: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M: Krzysztof Kozlowski <krzk@kernel.org>
S: Maintained
F: Documentation/devicetree/bindings/net/nfc/samsung,s3fwrn5.yaml
F: drivers/nfc/s3fwrn5
@@ -19496,7 +19549,7 @@ S: Supported
F: drivers/media/i2c/s5k5baf.c
SAMSUNG S5P Security SubSystem (SSS) DRIVER
-M: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M: Krzysztof Kozlowski <krzk@kernel.org>
M: Vladimir Zapolskiy <vz@mleia.com>
L: linux-crypto@vger.kernel.org
L: linux-samsung-soc@vger.kernel.org
@@ -19518,7 +19571,7 @@ F: Documentation/devicetree/bindings/media/samsung,fimc.yaml
F: drivers/media/platform/samsung/exynos4-is/
SAMSUNG SOC CLOCK DRIVERS
-M: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M: Krzysztof Kozlowski <krzk@kernel.org>
M: Sylwester Nawrocki <s.nawrocki@samsung.com>
M: Chanwoo Choi <cw00.choi@samsung.com>
R: Alim Akhtar <alim.akhtar@samsung.com>
@@ -19550,7 +19603,7 @@ F: drivers/net/ethernet/samsung/sxgbe/
SAMSUNG THERMAL DRIVER
M: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
-M: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M: Krzysztof Kozlowski <krzk@kernel.org>
L: linux-pm@vger.kernel.org
L: linux-samsung-soc@vger.kernel.org
S: Maintained
@@ -19637,7 +19690,7 @@ F: drivers/scsi/sg.c
F: include/scsi/sg.h
SCSI SUBSYSTEM
-M: "James E.J. Bottomley" <jejb@linux.ibm.com>
+M: "James E.J. Bottomley" <James.Bottomley@HansenPartnership.com>
M: "Martin K. Petersen" <martin.petersen@oracle.com>
L: linux-scsi@vger.kernel.org
S: Maintained
@@ -20146,7 +20199,6 @@ F: include/linux/platform_data/simplefb.h
SIOX
M: Thorsten Scherer <t.scherer@eckelmann.de>
-M: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
R: Pengutronix Kernel Team <kernel@pengutronix.de>
S: Supported
F: drivers/gpio/gpio-siox.c
@@ -21895,7 +21947,7 @@ F: include/linux/soc/ti/ti_sci_inta_msi.h
F: include/linux/soc/ti/ti_sci_protocol.h
TEXAS INSTRUMENTS' TMP117 TEMPERATURE SENSOR DRIVER
-M: Puranjay Mohan <puranjay12@gmail.com>
+M: Puranjay Mohan <puranjay@kernel.org>
L: linux-iio@vger.kernel.org
S: Supported
F: Documentation/devicetree/bindings/iio/temperature/ti,tmp117.yaml
@@ -22254,13 +22306,20 @@ S: Supported
T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers/core
F: include/linux/clocksource.h
F: include/linux/time.h
+F: include/linux/timekeeper_internal.h
+F: include/linux/timekeeping.h
F: include/linux/timex.h
F: include/uapi/linux/time.h
F: include/uapi/linux/timex.h
F: kernel/time/alarmtimer.c
-F: kernel/time/clocksource.c
-F: kernel/time/ntp.c
-F: kernel/time/time*.c
+F: kernel/time/clocksource*
+F: kernel/time/ntp*
+F: kernel/time/time.c
+F: kernel/time/timeconst.bc
+F: kernel/time/timeconv.c
+F: kernel/time/timecounter.c
+F: kernel/time/timekeeping*
+F: kernel/time/time_test.c
F: tools/testing/selftests/timers/
TIPC NETWORK LAYER
@@ -22381,9 +22440,10 @@ M: Jarkko Sakkinen <jarkko@kernel.org>
R: Jason Gunthorpe <jgg@ziepe.ca>
L: linux-integrity@vger.kernel.org
S: Maintained
-W: https://kernsec.org/wiki/index.php/Linux_Kernel_Integrity
+W: https://gitlab.com/jarkkojs/linux-tpmdd-test
Q: https://patchwork.kernel.org/project/linux-integrity/list/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/jarkko/linux-tpmdd.git
+F: Documentation/devicetree/bindings/tpm/
F: drivers/char/tpm/
TPS546D24 DRIVER
@@ -22530,6 +22590,7 @@ Q: https://patchwork.kernel.org/project/linux-pm/list/
B: https://bugzilla.kernel.org
T: git git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux.git turbostat
F: tools/power/x86/turbostat/
+F: tools/testing/selftests/turbostat/
TW5864 VIDEO4LINUX DRIVER
M: Bluecherry Maintainers <maintainers@bluecherrydvr.com>
@@ -22799,7 +22860,7 @@ F: drivers/usb/host/ehci*
USB HID/HIDBP DRIVERS (USB KEYBOARDS, MICE, REMOTE CONTROLS, ...)
M: Jiri Kosina <jikos@kernel.org>
-M: Benjamin Tissoires <benjamin.tissoires@redhat.com>
+M: Benjamin Tissoires <bentiss@kernel.org>
L: linux-usb@vger.kernel.org
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/hid/hid.git
@@ -23608,9 +23669,9 @@ S: Supported
F: drivers/misc/vmw_balloon.c
VMWARE HYPERVISOR INTERFACE
-M: Ajay Kaher <akaher@vmware.com>
-M: Alexey Makhalov <amakhalov@vmware.com>
-R: VMware PV-Drivers Reviewers <pv-drivers@vmware.com>
+M: Ajay Kaher <ajay.kaher@broadcom.com>
+M: Alexey Makhalov <alexey.amakhalov@broadcom.com>
+R: Broadcom internal kernel review list <bcm-kernel-feedback-list@broadcom.com>
L: virtualization@lists.linux.dev
L: x86@kernel.org
S: Supported
@@ -23619,34 +23680,34 @@ F: arch/x86/include/asm/vmware.h
F: arch/x86/kernel/cpu/vmware.c
VMWARE PVRDMA DRIVER
-M: Bryan Tan <bryantan@vmware.com>
-M: Vishnu Dasa <vdasa@vmware.com>
-R: VMware PV-Drivers Reviewers <pv-drivers@vmware.com>
+M: Bryan Tan <bryan-bt.tan@broadcom.com>
+M: Vishnu Dasa <vishnu.dasa@broadcom.com>
+R: Broadcom internal kernel review list <bcm-kernel-feedback-list@broadcom.com>
L: linux-rdma@vger.kernel.org
S: Supported
F: drivers/infiniband/hw/vmw_pvrdma/
VMWARE PVSCSI DRIVER
-M: Vishal Bhakta <vbhakta@vmware.com>
-R: VMware PV-Drivers Reviewers <pv-drivers@vmware.com>
+M: Vishal Bhakta <vishal.bhakta@broadcom.com>
+R: Broadcom internal kernel review list <bcm-kernel-feedback-list@broadcom.com>
L: linux-scsi@vger.kernel.org
S: Supported
F: drivers/scsi/vmw_pvscsi.c
F: drivers/scsi/vmw_pvscsi.h
VMWARE VIRTUAL PTP CLOCK DRIVER
-M: Jeff Sipek <jsipek@vmware.com>
-R: Ajay Kaher <akaher@vmware.com>
-R: Alexey Makhalov <amakhalov@vmware.com>
-R: VMware PV-Drivers Reviewers <pv-drivers@vmware.com>
+M: Nick Shi <nick.shi@broadcom.com>
+R: Ajay Kaher <ajay.kaher@broadcom.com>
+R: Alexey Makhalov <alexey.amakhalov@broadcom.com>
+R: Broadcom internal kernel review list <bcm-kernel-feedback-list@broadcom.com>
L: netdev@vger.kernel.org
S: Supported
F: drivers/ptp/ptp_vmw.c
VMWARE VMCI DRIVER
-M: Bryan Tan <bryantan@vmware.com>
-M: Vishnu Dasa <vdasa@vmware.com>
-R: VMware PV-Drivers Reviewers <pv-drivers@vmware.com>
+M: Bryan Tan <bryan-bt.tan@broadcom.com>
+M: Vishnu Dasa <vishnu.dasa@broadcom.com>
+R: Broadcom internal kernel review list <bcm-kernel-feedback-list@broadcom.com>
L: linux-kernel@vger.kernel.org
S: Supported
F: drivers/misc/vmw_vmci/
@@ -23661,16 +23722,16 @@ F: drivers/input/mouse/vmmouse.c
F: drivers/input/mouse/vmmouse.h
VMWARE VMXNET3 ETHERNET DRIVER
-M: Ronak Doshi <doshir@vmware.com>
-R: VMware PV-Drivers Reviewers <pv-drivers@vmware.com>
+M: Ronak Doshi <ronak.doshi@broadcom.com>
+R: Broadcom internal kernel review list <bcm-kernel-feedback-list@broadcom.com>
L: netdev@vger.kernel.org
S: Supported
F: drivers/net/vmxnet3/
VMWARE VSOCK VMCI TRANSPORT DRIVER
-M: Bryan Tan <bryantan@vmware.com>
-M: Vishnu Dasa <vdasa@vmware.com>
-R: VMware PV-Drivers Reviewers <pv-drivers@vmware.com>
+M: Bryan Tan <bryan-bt.tan@broadcom.com>
+M: Vishnu Dasa <vishnu.dasa@broadcom.com>
+R: Broadcom internal kernel review list <bcm-kernel-feedback-list@broadcom.com>
L: linux-kernel@vger.kernel.org
S: Supported
F: net/vmw_vsock/vmci_transport*
@@ -23738,7 +23799,7 @@ S: Orphan
F: drivers/mmc/host/vub300.c
W1 DALLAS'S 1-WIRE BUS
-M: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M: Krzysztof Kozlowski <krzk@kernel.org>
S: Maintained
F: Documentation/devicetree/bindings/w1/
F: Documentation/w1/
diff --git a/Makefile b/Makefile
index 763b6792d3d513..f4fe5b0ea931b4 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
VERSION = 6
PATCHLEVEL = 9
SUBLEVEL = 0
-EXTRAVERSION = -rc1
+EXTRAVERSION = -rc6
NAME = Hurr durr I'ma ninja sloth
# *DOCUMENTATION*
@@ -964,6 +964,11 @@ KBUILD_CFLAGS += $(CC_FLAGS_CFI)
export CC_FLAGS_CFI
endif
+# Architectures can define flags to add/remove for floating-point support
+CC_FLAGS_FPU += -D_LINUX_FPU_COMPILATION_UNIT
+export CC_FLAGS_FPU
+export CC_FLAGS_NO_FPU
+
ifneq ($(CONFIG_FUNCTION_ALIGNMENT),0)
# Set the minimal function alignment. Use the newer GCC option
# -fmin-function-alignment if it is available, or fall back to -falign-funtions.
@@ -1398,12 +1403,12 @@ dtbs: dtbs_prepare
# dtbs_install depend on it as dtbs_install may run as root.
dtbs_prepare: include/config/kernel.release scripts_dtc
-ifneq ($(filter dtbs_check, $(MAKECMDGOALS)),)
+ifneq ($(filter dt_binding_check dtbs_check, $(MAKECMDGOALS)),)
export CHECK_DTBS=y
endif
ifneq ($(CHECK_DTBS),)
-dtbs_prepare: dt_binding_check
+dtbs_prepare: dt_binding_schemas
endif
dtbs_check: dtbs
@@ -1421,16 +1426,15 @@ PHONY += scripts_dtc
scripts_dtc: scripts_basic
$(Q)$(MAKE) $(build)=scripts/dtc
-ifneq ($(filter dt_binding_check, $(MAKECMDGOALS)),)
-export CHECK_DT_BINDING=y
-endif
+PHONY += dt_binding_check dt_binding_schemas
+dt_binding_check: dt_binding_schemas scripts_dtc
+ $(Q)$(MAKE) $(build)=Documentation/devicetree/bindings $@
-PHONY += dt_binding_check
-dt_binding_check: scripts_dtc
+dt_binding_schemas:
$(Q)$(MAKE) $(build)=Documentation/devicetree/bindings
PHONY += dt_compatible_check
-dt_compatible_check: dt_binding_check
+dt_compatible_check: dt_binding_schemas
$(Q)$(MAKE) $(build)=Documentation/devicetree/bindings $@
# ---------------------------------------------------------------------------
@@ -1626,10 +1630,11 @@ help:
@echo ''
@$(if $(dtstree), \
echo 'Devicetree:'; \
- echo '* dtbs - Build device tree blobs for enabled boards'; \
- echo ' dtbs_install - Install dtbs to $(INSTALL_DTBS_PATH)'; \
- echo ' dt_binding_check - Validate device tree binding documents'; \
- echo ' dtbs_check - Validate device tree source files';\
+ echo '* dtbs - Build device tree blobs for enabled boards'; \
+ echo ' dtbs_install - Install dtbs to $(INSTALL_DTBS_PATH)'; \
+ echo ' dt_binding_check - Validate device tree binding documents and examples'; \
+ echo ' dt_binding_schema - Build processed device tree binding schemas'; \
+ echo ' dtbs_check - Validate device tree source files';\
echo '')
@echo 'Userspace tools targets:'
diff --git a/arch/Kconfig b/arch/Kconfig
index 9f066785bb71d9..c5962cd8356109 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -9,6 +9,14 @@
#
source "arch/$(SRCARCH)/Kconfig"
+config ARCH_CONFIGURES_CPU_MITIGATIONS
+ bool
+
+if !ARCH_CONFIGURES_CPU_MITIGATIONS
+config CPU_MITIGATIONS
+ def_bool y
+endif
+
menu "General architecture-dependent options"
config ARCH_HAS_SUBPAGE_FAULTS
@@ -1172,12 +1180,12 @@ config PAGE_SIZE_LESS_THAN_256KB
config PAGE_SHIFT
int
- default 12 if PAGE_SIZE_4KB
- default 13 if PAGE_SIZE_8KB
- default 14 if PAGE_SIZE_16KB
- default 15 if PAGE_SIZE_32KB
- default 16 if PAGE_SIZE_64KB
- default 18 if PAGE_SIZE_256KB
+ default 12 if PAGE_SIZE_4KB
+ default 13 if PAGE_SIZE_8KB
+ default 14 if PAGE_SIZE_16KB
+ default 15 if PAGE_SIZE_32KB
+ default 16 if PAGE_SIZE_64KB
+ default 18 if PAGE_SIZE_256KB
# This allows to use a set of generic functions to determine mmap base
# address by giving priority to top-down scheme only if the process
@@ -1569,6 +1577,12 @@ config ARCH_HAS_NONLEAF_PMD_YOUNG
address translations. Page table walkers that clear the accessed bit
may use this capability to reduce their search space.
+config ARCH_HAS_KERNEL_FPU_SUPPORT
+ bool
+ help
+ Architectures that select this option can run floating-point code in
+ the kernel, as described in Documentation/core-api/floating-point.rst.
+
source "kernel/gcov/Kconfig"
source "scripts/gcc-plugins/Kconfig"
diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index 5db88b6274396d..e5f881bc82881c 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -1218,14 +1218,11 @@ static unsigned long
arch_get_unmapped_area_1(unsigned long addr, unsigned long len,
unsigned long limit)
{
- struct vm_unmapped_area_info info;
+ struct vm_unmapped_area_info info = {};
- info.flags = 0;
info.length = len;
info.low_limit = addr;
info.high_limit = limit;
- info.align_mask = 0;
- info.align_offset = 0;
return vm_unmapped_area(&info);
}
diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c
index c81183935e9707..7fcf3e9b710305 100644
--- a/arch/alpha/kernel/pci_iommu.c
+++ b/arch/alpha/kernel/pci_iommu.c
@@ -929,7 +929,7 @@ const struct dma_map_ops alpha_pci_ops = {
.dma_supported = alpha_pci_supported,
.mmap = dma_common_mmap,
.get_sgtable = dma_common_get_sgtable,
- .alloc_pages = dma_common_alloc_pages,
+ .alloc_pages_op = dma_common_alloc_pages,
.free_pages = dma_common_free_pages,
};
EXPORT_SYMBOL(alpha_pci_ops);
diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl
index 8ff110826ce21d..d8f96362e9f83e 100644
--- a/arch/alpha/kernel/syscalls/syscall.tbl
+++ b/arch/alpha/kernel/syscalls/syscall.tbl
@@ -501,3 +501,4 @@
569 common lsm_get_self_attr sys_lsm_get_self_attr
570 common lsm_set_self_attr sys_lsm_set_self_attr
571 common lsm_list_modules sys_lsm_list_modules
+572 common mseal sys_mseal
diff --git a/arch/alpha/lib/checksum.c b/arch/alpha/lib/checksum.c
index 3f35c3ed694886..c29b98ef9c8267 100644
--- a/arch/alpha/lib/checksum.c
+++ b/arch/alpha/lib/checksum.c
@@ -14,6 +14,7 @@
#include <linux/string.h>
#include <asm/byteorder.h>
+#include <asm/checksum.h>
static inline unsigned short from64to16(unsigned long x)
{
diff --git a/arch/alpha/lib/fpreg.c b/arch/alpha/lib/fpreg.c
index 7c08b225261c4a..3d32165043f8b1 100644
--- a/arch/alpha/lib/fpreg.c
+++ b/arch/alpha/lib/fpreg.c
@@ -8,6 +8,7 @@
#include <linux/compiler.h>
#include <linux/export.h>
#include <linux/preempt.h>
+#include <asm/fpu.h>
#include <asm/thread_info.h>
#if defined(CONFIG_ALPHA_EV6) || defined(CONFIG_ALPHA_EV67)
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index 99d2845f3feb95..4092bec198beca 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -6,7 +6,6 @@
config ARC
def_bool y
select ARC_TIMERS
- select ARCH_HAS_CPU_CACHE_ALIASING
select ARCH_HAS_CACHE_LINE_SIZE
select ARCH_HAS_DEBUG_VM_PGTABLE
select ARCH_HAS_DMA_PREP_COHERENT
diff --git a/arch/arc/boot/Makefile b/arch/arc/boot/Makefile
index 5648748c285f52..5a8550124b73ec 100644
--- a/arch/arc/boot/Makefile
+++ b/arch/arc/boot/Makefile
@@ -1,8 +1,8 @@
# SPDX-License-Identifier: GPL-2.0
-# uImage build relies on mkimage being availble on your host for ARC target
+# uImage build relies on mkimage being available on your host for ARC target
# You will need to build u-boot for ARC, rename mkimage to arc-elf32-mkimage
-# and make sure it's reacable from your PATH
+# and make sure it's reachable from your PATH
OBJCOPYFLAGS= -O binary -R .note -R .note.gnu.build-id -R .comment -S
diff --git a/arch/arc/boot/dts/axc003.dtsi b/arch/arc/boot/dts/axc003.dtsi
index 3434c8131ecd54..c0a812674ce9ed 100644
--- a/arch/arc/boot/dts/axc003.dtsi
+++ b/arch/arc/boot/dts/axc003.dtsi
@@ -119,9 +119,9 @@
/*
* The DW APB ICTL intc on MB is connected to CPU intc via a
* DT "invisible" DW APB GPIO block, configured to simply pass thru
- * interrupts - setup accordinly in platform init (plat-axs10x/ax10x.c)
+ * interrupts - setup accordingly in platform init (plat-axs10x/ax10x.c)
*
- * So here we mimic a direct connection betwen them, ignoring the
+ * So here we mimic a direct connection between them, ignoring the
* ABPG GPIO. Thus set "interrupts = <24>" (DW APB GPIO to core)
* instead of "interrupts = <12>" (DW APB ICTL to DW APB GPIO)
*
diff --git a/arch/arc/boot/dts/hsdk.dts b/arch/arc/boot/dts/hsdk.dts
index 6691f425507788..41b980df862b14 100644
--- a/arch/arc/boot/dts/hsdk.dts
+++ b/arch/arc/boot/dts/hsdk.dts
@@ -205,7 +205,6 @@
};
gmac: ethernet@8000 {
- #interrupt-cells = <1>;
compatible = "snps,dwmac";
reg = <0x8000 0x2000>;
interrupts = <10>;
diff --git a/arch/arc/boot/dts/vdk_axs10x_mb.dtsi b/arch/arc/boot/dts/vdk_axs10x_mb.dtsi
index 90a412026e6433..0e0e2d337bf871 100644
--- a/arch/arc/boot/dts/vdk_axs10x_mb.dtsi
+++ b/arch/arc/boot/dts/vdk_axs10x_mb.dtsi
@@ -113,7 +113,7 @@
/*
* Embedded Vision subsystem UIO mappings; only relevant for EV VDK
*
- * This node is intentionally put outside of MB above becase
+ * This node is intentionally put outside of MB above because
* it maps areas outside of MB's 0xez-0xfz.
*/
uio_ev: uio@d0000000 {
diff --git a/arch/arc/include/asm/cachetype.h b/arch/arc/include/asm/cachetype.h
deleted file mode 100644
index 05fc7ed5971258..00000000000000
--- a/arch/arc/include/asm/cachetype.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __ASM_ARC_CACHETYPE_H
-#define __ASM_ARC_CACHETYPE_H
-
-#include <linux/types.h>
-
-#define cpu_dcache_is_aliasing() true
-
-#endif
diff --git a/arch/arc/include/asm/dsp.h b/arch/arc/include/asm/dsp.h
index 202c78e567045b..f496dbc4640b24 100644
--- a/arch/arc/include/asm/dsp.h
+++ b/arch/arc/include/asm/dsp.h
@@ -12,7 +12,7 @@
/*
* DSP-related saved registers - need to be saved only when you are
* scheduled out.
- * structure fields name must correspond to aux register defenitions for
+ * structure fields name must correspond to aux register definitions for
* automatic offset calculation in DSP_AUX_SAVE_RESTORE macros
*/
struct dsp_callee_regs {
diff --git a/arch/arc/include/asm/entry-compact.h b/arch/arc/include/asm/entry-compact.h
index 92c3e9f1325219..00946fe04c9b26 100644
--- a/arch/arc/include/asm/entry-compact.h
+++ b/arch/arc/include/asm/entry-compact.h
@@ -7,7 +7,7 @@
* Stack switching code can no longer reliably rely on the fact that
* if we are NOT in user mode, stack is switched to kernel mode.
* e.g. L2 IRQ interrupted a L1 ISR which had not yet completed
- * it's prologue including stack switching from user mode
+ * its prologue including stack switching from user mode
*
* Vineetg: Aug 28th 2008: Bug #94984
* -Zero Overhead Loop Context shd be cleared when entering IRQ/EXcp/Trap
@@ -143,7 +143,7 @@
* 2. L1 IRQ taken, ISR starts (CPU auto-switched to KERNEL mode)
* 3. But before it could switch SP from USER to KERNEL stack
* a L2 IRQ "Interrupts" L1
- * Thay way although L2 IRQ happened in Kernel mode, stack is still
+ * That way although L2 IRQ happened in Kernel mode, stack is still
* not switched.
* To handle this, we may need to switch stack even if in kernel mode
* provided SP has values in range of USER mode stack ( < 0x7000_0000 )
@@ -173,7 +173,7 @@
GET_CURR_TASK_ON_CPU r9
- /* With current tsk in r9, get it's kernel mode stack base */
+ /* With current tsk in r9, get its kernel mode stack base */
GET_TSK_STACK_BASE r9, r9
/* save U mode SP @ pt_regs->sp */
@@ -282,7 +282,7 @@
* NOTE:
*
* It is recommended that lp_count/ilink1/ilink2 not be used as a dest reg
- * for memory load operations. If used in that way interrupts are deffered
+ * for memory load operations. If used in that way interrupts are deferred
* by hardware and that is not good.
*-------------------------------------------------------------*/
.macro EXCEPTION_EPILOGUE
@@ -350,7 +350,7 @@
* NOTE:
*
* It is recommended that lp_count/ilink1/ilink2 not be used as a dest reg
- * for memory load operations. If used in that way interrupts are deffered
+ * for memory load operations. If used in that way interrupts are deferred
* by hardware and that is not good.
*-------------------------------------------------------------*/
.macro INTERRUPT_EPILOGUE LVL
diff --git a/arch/arc/include/asm/entry.h b/arch/arc/include/asm/entry.h
index cf1ba376e992c6..38c35722cebf03 100644
--- a/arch/arc/include/asm/entry.h
+++ b/arch/arc/include/asm/entry.h
@@ -7,7 +7,7 @@
#ifndef __ASM_ARC_ENTRY_H
#define __ASM_ARC_ENTRY_H
-#include <asm/unistd.h> /* For NR_syscalls defination */
+#include <asm/unistd.h> /* For NR_syscalls definition */
#include <asm/arcregs.h>
#include <asm/ptrace.h>
#include <asm/processor.h> /* For VMALLOC_START */
@@ -56,7 +56,7 @@
.endm
/*-------------------------------------------------------------
- * given a tsk struct, get to the base of it's kernel mode stack
+ * given a tsk struct, get to the base of its kernel mode stack
* tsk->thread_info is really a PAGE, whose bottom hoists stack
* which grows upwards towards thread_info
*------------------------------------------------------------*/
diff --git a/arch/arc/include/asm/irq.h b/arch/arc/include/asm/irq.h
index c574712ad86589..9cd79263acba82 100644
--- a/arch/arc/include/asm/irq.h
+++ b/arch/arc/include/asm/irq.h
@@ -10,7 +10,7 @@
* ARCv2 can support 240 interrupts in the core interrupts controllers and
* 128 interrupts in IDU. Thus 512 virtual IRQs must be enough for most
* configurations of boards.
- * This doesnt affect ARCompact, but we change it to same value
+ * This doesn't affect ARCompact, but we change it to same value
*/
#define NR_IRQS 512
diff --git a/arch/arc/include/asm/irqflags-compact.h b/arch/arc/include/asm/irqflags-compact.h
index 0d63e568d64cb5..936a2f21f315ed 100644
--- a/arch/arc/include/asm/irqflags-compact.h
+++ b/arch/arc/include/asm/irqflags-compact.h
@@ -46,7 +46,7 @@
* IRQ Control Macros
*
* All of them have "memory" clobber (compiler barrier) which is needed to
- * ensure that LD/ST requiring irq safetly (R-M-W when LLSC is not available)
+ * ensure that LD/ST requiring irq safety (R-M-W when LLSC is not available)
* are redone after IRQs are re-enabled (and gcc doesn't reuse stale register)
*
* Noted at the time of Abilis Timer List corruption
diff --git a/arch/arc/include/asm/mmu-arcv2.h b/arch/arc/include/asm/mmu-arcv2.h
index ed9036d4ede310..d85dc07219071f 100644
--- a/arch/arc/include/asm/mmu-arcv2.h
+++ b/arch/arc/include/asm/mmu-arcv2.h
@@ -9,6 +9,8 @@
#ifndef _ASM_ARC_MMU_ARCV2_H
#define _ASM_ARC_MMU_ARCV2_H
+#include <soc/arc/aux.h>
+
/*
* TLB Management regs
*/
diff --git a/arch/arc/include/asm/mmu_context.h b/arch/arc/include/asm/mmu_context.h
index dda471f5f05bbf..9963bb1a5733fb 100644
--- a/arch/arc/include/asm/mmu_context.h
+++ b/arch/arc/include/asm/mmu_context.h
@@ -165,7 +165,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
* for retiring-mm. However destroy_context( ) still needs to do that because
* between mm_release( ) = >deactive_mm( ) and
* mmput => .. => __mmdrop( ) => destroy_context( )
- * there is a good chance that task gets sched-out/in, making it's ASID valid
+ * there is a good chance that task gets sched-out/in, making its ASID valid
* again (this teased me for a whole day).
*/
diff --git a/arch/arc/include/asm/pgtable-bits-arcv2.h b/arch/arc/include/asm/pgtable-bits-arcv2.h
index f3eea3f30b2e29..8ebec1b21d246e 100644
--- a/arch/arc/include/asm/pgtable-bits-arcv2.h
+++ b/arch/arc/include/asm/pgtable-bits-arcv2.h
@@ -66,7 +66,7 @@
* Other rules which cause the divergence from 1:1 mapping
*
* 1. Although ARC700 can do exclusive execute/write protection (meaning R
- * can be tracked independet of X/W unlike some other CPUs), still to
+ * can be tracked independently of X/W unlike some other CPUs), still to
* keep things consistent with other archs:
* -Write implies Read: W => R
* -Execute implies Read: X => R
diff --git a/arch/arc/include/asm/ptrace.h b/arch/arc/include/asm/ptrace.h
index 00b9318e551e7e..cf79df0b257053 100644
--- a/arch/arc/include/asm/ptrace.h
+++ b/arch/arc/include/asm/ptrace.h
@@ -169,7 +169,7 @@ static inline unsigned long regs_get_register(struct pt_regs *regs,
return *(unsigned long *)((unsigned long)regs + offset);
}
-extern int syscall_trace_entry(struct pt_regs *);
+extern int syscall_trace_enter(struct pt_regs *);
extern void syscall_trace_exit(struct pt_regs *);
#endif /* !__ASSEMBLY__ */
diff --git a/arch/arc/include/asm/shmparam.h b/arch/arc/include/asm/shmparam.h
index 8b0251464ffd42..719112af0f41ae 100644
--- a/arch/arc/include/asm/shmparam.h
+++ b/arch/arc/include/asm/shmparam.h
@@ -6,7 +6,7 @@
#ifndef __ARC_ASM_SHMPARAM_H
#define __ARC_ASM_SHMPARAM_H
-/* Handle upto 2 cache bins */
+/* Handle up to 2 cache bins */
#define SHMLBA (2 * PAGE_SIZE)
/* Enforce SHMLBA in shmat */
diff --git a/arch/arc/include/asm/smp.h b/arch/arc/include/asm/smp.h
index e0913f52c2cdda..990f834909f088 100644
--- a/arch/arc/include/asm/smp.h
+++ b/arch/arc/include/asm/smp.h
@@ -77,7 +77,7 @@ static inline const char *arc_platform_smp_cpuinfo(void)
/*
* ARC700 doesn't support atomic Read-Modify-Write ops.
- * Originally Interrupts had to be disabled around code to gaurantee atomicity.
+ * Originally Interrupts had to be disabled around code to guarantee atomicity.
* The LLOCK/SCOND insns allow writing interrupt-hassle-free based atomic ops
* based on retry-if-irq-in-atomic (with hardware assist).
* However despite these, we provide the IRQ disabling variant
@@ -86,7 +86,7 @@ static inline const char *arc_platform_smp_cpuinfo(void)
* support needed.
*
* (2) In a SMP setup, the LLOCK/SCOND atomicity across CPUs needs to be
- * gaurantted by the platform (not something which core handles).
+ * guaranteed by the platform (not something which core handles).
* Assuming a platform won't, SMP Linux needs to use spinlocks + local IRQ
* disabling for atomicity.
*
diff --git a/arch/arc/include/asm/thread_info.h b/arch/arc/include/asm/thread_info.h
index 4c530cf131f339..12daaf3a61eaf6 100644
--- a/arch/arc/include/asm/thread_info.h
+++ b/arch/arc/include/asm/thread_info.h
@@ -38,7 +38,7 @@
struct thread_info {
unsigned long flags; /* low level flags */
unsigned long ksp; /* kernel mode stack top in __switch_to */
- int preempt_count; /* 0 => preemptable, <0 => BUG */
+ int preempt_count; /* 0 => preemptible, <0 => BUG */
int cpu; /* current CPU */
unsigned long thr_ptr; /* TLS ptr */
struct task_struct *task; /* main task structure */
diff --git a/arch/arc/include/uapi/asm/swab.h b/arch/arc/include/uapi/asm/swab.h
index 02109cd48ee129..8d1f1ef44ba750 100644
--- a/arch/arc/include/uapi/asm/swab.h
+++ b/arch/arc/include/uapi/asm/swab.h
@@ -62,7 +62,7 @@
* 8051fdc4: st r2,[r1,20] ; Mem op : save result back to mem
*
* Joern suggested a better "C" algorithm which is great since
- * (1) It is portable to any architecure
+ * (1) It is portable to any architecture
* (2) At the same time it takes advantage of ARC ISA (rotate intrns)
*/
diff --git a/arch/arc/kernel/entry-arcv2.S b/arch/arc/kernel/entry-arcv2.S
index 2e49c81c8086b1..e238b5fd3c8cff 100644
--- a/arch/arc/kernel/entry-arcv2.S
+++ b/arch/arc/kernel/entry-arcv2.S
@@ -5,7 +5,7 @@
* Copyright (C) 2013 Synopsys, Inc. (www.synopsys.com)
*/
-#include <linux/linkage.h> /* ARC_{EXTRY,EXIT} */
+#include <linux/linkage.h> /* ARC_{ENTRY,EXIT} */
#include <asm/entry.h> /* SAVE_ALL_{INT1,INT2,TRAP...} */
#include <asm/errno.h>
#include <asm/arcregs.h>
@@ -31,7 +31,7 @@ VECTOR res_service ; Reset Vector
VECTOR mem_service ; Mem exception
VECTOR instr_service ; Instrn Error
VECTOR EV_MachineCheck ; Fatal Machine check
-VECTOR EV_TLBMissI ; Intruction TLB miss
+VECTOR EV_TLBMissI ; Instruction TLB miss
VECTOR EV_TLBMissD ; Data TLB miss
VECTOR EV_TLBProtV ; Protection Violation
VECTOR EV_PrivilegeV ; Privilege Violation
@@ -76,11 +76,11 @@ ENTRY(handle_interrupt)
# query in hard ISR path would return false (since .IE is set) which would
# trips genirq interrupt handling asserts.
#
- # So do a "soft" disable of interrutps here.
+ # So do a "soft" disable of interrupts here.
#
# Note this disable is only for consistent book-keeping as further interrupts
# will be disabled anyways even w/o this. Hardware tracks active interrupts
- # seperately in AUX_IRQ_ACT.active and will not take new interrupts
+ # separately in AUX_IRQ_ACT.active and will not take new interrupts
# unless this one returns (or higher prio becomes pending in 2-prio scheme)
IRQ_DISABLE
diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S
index 089f6680518f82..3c7e74aba6794e 100644
--- a/arch/arc/kernel/entry.S
+++ b/arch/arc/kernel/entry.S
@@ -95,7 +95,7 @@ ENTRY(EV_MachineCheck)
lr r0, [efa]
mov r1, sp
- ; MC excpetions disable MMU
+ ; MC exceptions disable MMU
ARC_MMU_REENABLE r3
lsr r3, r10, 8
@@ -209,7 +209,7 @@ trap_with_param:
; ---------------------------------------------
; syscall TRAP
-; ABI: (r0-r7) upto 8 args, (r8) syscall number
+; ABI: (r0-r7) up to 8 args, (r8) syscall number
; ---------------------------------------------
ENTRY(EV_Trap)
diff --git a/arch/arc/kernel/head.S b/arch/arc/kernel/head.S
index 9152782444b557..8d541f53fae3ec 100644
--- a/arch/arc/kernel/head.S
+++ b/arch/arc/kernel/head.S
@@ -165,7 +165,7 @@ ENTRY(first_lines_of_secondary)
; setup stack (fp, sp)
mov fp, 0
- ; set it's stack base to tsk->thread_info bottom
+ ; set its stack base to tsk->thread_info bottom
GET_TSK_STACK_BASE r0, sp
j start_kernel_secondary
diff --git a/arch/arc/kernel/intc-arcv2.c b/arch/arc/kernel/intc-arcv2.c
index 678898757e4739..f324f0e3341a39 100644
--- a/arch/arc/kernel/intc-arcv2.c
+++ b/arch/arc/kernel/intc-arcv2.c
@@ -56,7 +56,7 @@ void arc_init_IRQ(void)
WRITE_AUX(AUX_IRQ_CTRL, ictrl);
/*
- * ARCv2 core intc provides multiple interrupt priorities (upto 16).
+ * ARCv2 core intc provides multiple interrupt priorities (up to 16).
* Typical builds though have only two levels (0-high, 1-low)
* Linux by default uses lower prio 1 for most irqs, reserving 0 for
* NMI style interrupts in future (say perf)
diff --git a/arch/arc/kernel/kprobes.c b/arch/arc/kernel/kprobes.c
index e71d64119d71ac..f8e2960832d943 100644
--- a/arch/arc/kernel/kprobes.c
+++ b/arch/arc/kernel/kprobes.c
@@ -190,7 +190,8 @@ static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs)
}
}
-int __kprobes arc_kprobe_handler(unsigned long addr, struct pt_regs *regs)
+static int
+__kprobes arc_kprobe_handler(unsigned long addr, struct pt_regs *regs)
{
struct kprobe *p;
struct kprobe_ctlblk *kcb;
@@ -241,8 +242,8 @@ int __kprobes arc_kprobe_handler(unsigned long addr, struct pt_regs *regs)
return 0;
}
-static int __kprobes arc_post_kprobe_handler(unsigned long addr,
- struct pt_regs *regs)
+static int
+__kprobes arc_post_kprobe_handler(unsigned long addr, struct pt_regs *regs)
{
struct kprobe *cur = kprobe_running();
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
diff --git a/arch/arc/kernel/perf_event.c b/arch/arc/kernel/perf_event.c
index adff957962da88..6e5a651cd75cf5 100644
--- a/arch/arc/kernel/perf_event.c
+++ b/arch/arc/kernel/perf_event.c
@@ -38,7 +38,7 @@
* (based on a specific RTL build)
* Below is the static map between perf generic/arc specific event_id and
* h/w condition names.
- * At the time of probe, we loop thru each index and find it's name to
+ * At the time of probe, we loop thru each index and find its name to
* complete the mapping of perf event_id to h/w index as latter is needed
* to program the counter really
*/
diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c
index d08a5092c2b4d4..7b6a9beba9db6d 100644
--- a/arch/arc/kernel/setup.c
+++ b/arch/arc/kernel/setup.c
@@ -390,7 +390,7 @@ static void arc_chk_core_config(struct cpuinfo_arc *info)
#ifdef CONFIG_ARC_HAS_DCCM
/*
* DCCM can be arbit placed in hardware.
- * Make sure it's placement/sz matches what Linux is built with
+ * Make sure its placement/sz matches what Linux is built with
*/
if ((unsigned int)__arc_dccm_base != info->dccm.base)
panic("Linux built with incorrect DCCM Base address\n");
diff --git a/arch/arc/kernel/signal.c b/arch/arc/kernel/signal.c
index 8f6f4a5429646f..fefa705a863850 100644
--- a/arch/arc/kernel/signal.c
+++ b/arch/arc/kernel/signal.c
@@ -8,15 +8,16 @@
*
* vineetg: Nov 2009 (Everything needed for TIF_RESTORE_SIGMASK)
* -do_signal() supports TIF_RESTORE_SIGMASK
- * -do_signal() no loner needs oldset, required by OLD sys_sigsuspend
- * -sys_rt_sigsuspend() now comes from generic code, so discard arch implemen
+ * -do_signal() no longer needs oldset, required by OLD sys_sigsuspend
+ * -sys_rt_sigsuspend() now comes from generic code, so discard arch
+ * implementation
* -sys_sigsuspend() no longer needs to fudge ptregs, hence that arg removed
* -sys_sigsuspend() no longer loops for do_signal(), sets TIF_xxx and leaves
* the job to do_signal()
*
* vineetg: July 2009
* -Modified Code to support the uClibc provided userland sigreturn stub
- * to avoid kernel synthesing it on user stack at runtime, costing TLB
+ * to avoid kernel synthesizing it on user stack at runtime, costing TLB
* probes and Cache line flushes.
*
* vineetg: July 2009
diff --git a/arch/arc/kernel/traps.c b/arch/arc/kernel/traps.c
index 9b9570b79362ee..a19751e824fb4c 100644
--- a/arch/arc/kernel/traps.c
+++ b/arch/arc/kernel/traps.c
@@ -89,7 +89,7 @@ int do_misaligned_access(unsigned long address, struct pt_regs *regs,
/*
* Entry point for miscll errors such as Nested Exceptions
- * -Duplicate TLB entry is handled seperately though
+ * -Duplicate TLB entry is handled separately though
*/
void do_machine_check_fault(unsigned long address, struct pt_regs *regs)
{
diff --git a/arch/arc/kernel/vmlinux.lds.S b/arch/arc/kernel/vmlinux.lds.S
index 549c3f40791869..61a1b2b96e1d81 100644
--- a/arch/arc/kernel/vmlinux.lds.S
+++ b/arch/arc/kernel/vmlinux.lds.S
@@ -41,8 +41,8 @@ SECTIONS
#endif
/*
- * The reason for having a seperate subsection .init.ramfs is to
- * prevent objump from including it in kernel dumps
+ * The reason for having a separate subsection .init.ramfs is to
+ * prevent objdump from including it in kernel dumps
*
* Reason for having .init.ramfs above .init is to make sure that the
* binary blob is tucked away to one side, reducing the displacement
diff --git a/arch/arc/mm/mmap.c b/arch/arc/mm/mmap.c
index 3c1c7ae732925c..69a91529715599 100644
--- a/arch/arc/mm/mmap.c
+++ b/arch/arc/mm/mmap.c
@@ -27,7 +27,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
- struct vm_unmapped_area_info info;
+ struct vm_unmapped_area_info info = {};
/*
* We enforce the MAP_FIXED case.
@@ -51,11 +51,9 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
return addr;
}
- info.flags = 0;
info.length = len;
info.low_limit = mm->mmap_base;
info.high_limit = TASK_SIZE;
- info.align_mask = 0;
info.align_offset = pgoff << PAGE_SHIFT;
return vm_unmapped_area(&info);
}
diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c
index ad702b49aeb3b8..cae4a7aae0ed4e 100644
--- a/arch/arc/mm/tlb.c
+++ b/arch/arc/mm/tlb.c
@@ -212,7 +212,7 @@ void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
unsigned long flags;
/* If range @start to @end is more than 32 TLB entries deep,
- * its better to move to a new ASID rather than searching for
+ * it's better to move to a new ASID rather than searching for
* individual entries and then shooting them down
*
* The calc above is rough, doesn't account for unaligned parts,
@@ -408,7 +408,7 @@ static void create_tlb(struct vm_area_struct *vma, unsigned long vaddr, pte_t *p
* -More importantly it makes this handler inconsistent with fast-path
* TLB Refill handler which always deals with "current"
*
- * Lets see the use cases when current->mm != vma->mm and we land here
+ * Let's see the use cases when current->mm != vma->mm and we land here
* 1. execve->copy_strings()->__get_user_pages->handle_mm_fault
* Here VM wants to pre-install a TLB entry for user stack while
* current->mm still points to pre-execve mm (hence the condition).
diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S
index e054780a8fe0c9..dc65e87a531fdb 100644
--- a/arch/arc/mm/tlbex.S
+++ b/arch/arc/mm/tlbex.S
@@ -5,19 +5,19 @@
* Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
*
* Vineetg: April 2011 :
- * -MMU v1: moved out legacy code into a seperate file
+ * -MMU v1: moved out legacy code into a separate file
* -MMU v3: PD{0,1} bits layout changed: They don't overlap anymore,
* helps avoid a shift when preparing PD0 from PTE
*
* Vineetg: July 2009
- * -For MMU V2, we need not do heuristics at the time of commiting a D-TLB
- * entry, so that it doesn't knock out it's I-TLB entry
+ * -For MMU V2, we need not do heuristics at the time of committing a D-TLB
+ * entry, so that it doesn't knock out its I-TLB entry
* -Some more fine tuning:
* bmsk instead of add, asl.cc instead of branch, delay slot utilise etc
*
* Vineetg: July 2009
* -Practically rewrote the I/D TLB Miss handlers
- * Now 40 and 135 instructions a peice as compared to 131 and 449 resp.
+ * Now 40 and 135 instructions apiece as compared to 131 and 449 resp.
* Hence Leaner by 1.5 K
* Used Conditional arithmetic to replace excessive branching
* Also used short instructions wherever possible
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index b14aed3a17abb6..35fc382635354a 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -15,6 +15,7 @@ config ARM
select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_KEEPINITRD
select ARCH_HAS_KCOV
+ select ARCH_HAS_KERNEL_FPU_SUPPORT if KERNEL_MODE_NEON
select ARCH_HAS_MEMBARRIER_SYNC_CORE
select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
select ARCH_HAS_PTE_SPECIAL if ARM_LPAE
@@ -99,7 +100,7 @@ config ARM
select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE
select HAVE_EFFICIENT_UNALIGNED_ACCESS if (CPU_V6 || CPU_V6K || CPU_V7) && MMU
select HAVE_EXIT_THREAD
- select HAVE_FAST_GUP if ARM_LPAE
+ select HAVE_GUP_FAST if ARM_LPAE
select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL
select HAVE_FUNCTION_ERROR_INJECTION
select HAVE_FUNCTION_GRAPH_TRACER
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index d82908b1b1bb44..71afdd98ddf27f 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -130,6 +130,13 @@ endif
# Accept old syntax despite ".syntax unified"
AFLAGS_NOWARN :=$(call as-option,-Wa$(comma)-mno-warn-deprecated,-Wa$(comma)-W)
+# The GCC option -ffreestanding is required in order to compile code containing
+# ARM/NEON intrinsics in a non C99-compliant environment (such as the kernel)
+CC_FLAGS_FPU := -ffreestanding
+# Enable <arm_neon.h>
+CC_FLAGS_FPU += -isystem $(shell $(CC) -print-file-name=include)
+CC_FLAGS_FPU += -march=armv7-a -mfloat-abi=softfp -mfpu=neon
+
ifeq ($(CONFIG_THUMB2_KERNEL),y)
CFLAGS_ISA :=-Wa,-mimplicit-it=always $(AFLAGS_NOWARN)
AFLAGS_ISA :=$(CFLAGS_ISA) -Wa$(comma)-mthumb
diff --git a/arch/arm/boot/dts/aspeed/Makefile b/arch/arm/boot/dts/aspeed/Makefile
index d3ac20e316d01e..715106b3baa166 100644
--- a/arch/arm/boot/dts/aspeed/Makefile
+++ b/arch/arm/boot/dts/aspeed/Makefile
@@ -9,7 +9,11 @@ dtb-$(CONFIG_ARCH_ASPEED) += \
aspeed-bmc-ampere-mtmitchell.dtb \
aspeed-bmc-arm-stardragon4800-rep2.dtb \
aspeed-bmc-asrock-e3c246d4i.dtb \
+ aspeed-bmc-asrock-e3c256d4i.dtb \
aspeed-bmc-asrock-romed8hm3.dtb \
+ aspeed-bmc-asrock-spc621d8hm3.dtb \
+ aspeed-bmc-asrock-x570d4u.dtb \
+ aspeed-bmc-asus-x4tf.dtb \
aspeed-bmc-bytedance-g220a.dtb \
aspeed-bmc-delta-ahe50dc.dtb \
aspeed-bmc-facebook-bletchley.dtb \
@@ -19,7 +23,8 @@ dtb-$(CONFIG_ARCH_ASPEED) += \
aspeed-bmc-facebook-fuji.dtb \
aspeed-bmc-facebook-galaxy100.dtb \
aspeed-bmc-facebook-greatlakes.dtb \
- aspeed-bmc-facebook-minerva-cmc.dtb \
+ aspeed-bmc-facebook-harma.dtb \
+ aspeed-bmc-facebook-minerva.dtb \
aspeed-bmc-facebook-minipack.dtb \
aspeed-bmc-facebook-tiogapass.dtb \
aspeed-bmc-facebook-wedge40.dtb \
@@ -33,6 +38,7 @@ dtb-$(CONFIG_ARCH_ASPEED) += \
aspeed-bmc-ibm-rainier.dtb \
aspeed-bmc-ibm-rainier-1s4u.dtb \
aspeed-bmc-ibm-rainier-4u.dtb \
+ aspeed-bmc-ibm-system1.dtb \
aspeed-bmc-intel-s2600wf.dtb \
aspeed-bmc-inspur-fp5280g2.dtb \
aspeed-bmc-inspur-nf5280m6.dtb \
diff --git a/arch/arm/boot/dts/aspeed/aspeed-bmc-asrock-e3c256d4i.dts b/arch/arm/boot/dts/aspeed/aspeed-bmc-asrock-e3c256d4i.dts
new file mode 100644
index 00000000000000..263fcc8106ffaa
--- /dev/null
+++ b/arch/arm/boot/dts/aspeed/aspeed-bmc-asrock-e3c256d4i.dts
@@ -0,0 +1,322 @@
+// SPDX-License-Identifier: GPL-2.0+
+/dts-v1/;
+
+#include "aspeed-g5.dtsi"
+#include <dt-bindings/gpio/aspeed-gpio.h>
+#include <dt-bindings/i2c/i2c.h>
+#include <dt-bindings/interrupt-controller/irq.h>
+#include <dt-bindings/leds/common.h>
+#include <dt-bindings/watchdog/aspeed-wdt.h>
+
+/{
+ model = "ASRock E3C256D4I BMC";
+ compatible = "asrock,e3c256d4i-bmc", "aspeed,ast2500";
+
+ aliases {
+ serial4 = &uart5;
+
+ i2c20 = &i2c2mux0ch0;
+ i2c21 = &i2c2mux0ch1;
+ i2c22 = &i2c2mux0ch2;
+ i2c23 = &i2c2mux0ch3;
+ };
+
+ chosen {
+ stdout-path = &uart5;
+ };
+
+ memory@80000000 {
+ reg = <0x80000000 0x20000000>;
+ };
+
+ leds {
+ compatible = "gpio-leds";
+
+ /* BMC heartbeat */
+ led-0 {
+ gpios = <&gpio ASPEED_GPIO(H, 6) GPIO_ACTIVE_LOW>;
+ function = LED_FUNCTION_HEARTBEAT;
+ color = <LED_COLOR_ID_GREEN>;
+ linux,default-trigger = "timer";
+ };
+
+ /* system fault */
+ led-1 {
+ gpios = <&gpio ASPEED_GPIO(Z, 2) GPIO_ACTIVE_LOW>;
+ function = LED_FUNCTION_FAULT;
+ color = <LED_COLOR_ID_RED>;
+ panic-indicator;
+ };
+ };
+
+ iio-hwmon {
+ compatible = "iio-hwmon";
+ io-channels = <&adc 0>, <&adc 1>, <&adc 2>, <&adc 3>,
+ <&adc 4>, <&adc 5>, <&adc 6>, <&adc 7>,
+ <&adc 8>, <&adc 9>, <&adc 10>, <&adc 11>,
+ <&adc 12>, <&adc 13>, <&adc 14>, <&adc 15>;
+ };
+};
+
+&fmc {
+ status = "okay";
+ flash@0 {
+ status = "okay";
+ m25p,fast-read;
+ label = "bmc";
+ spi-max-frequency = <100000000>; /* 100 MHz */
+#include "openbmc-flash-layout-64.dtsi"
+ };
+};
+
+&uart1 {
+ status = "okay";
+};
+
+&uart2 {
+ status = "okay";
+};
+
+&uart3 {
+ status = "okay";
+};
+
+&uart4 {
+ status = "okay";
+};
+
+&uart5 {
+ status = "okay";
+};
+
+&uart_routing {
+ status = "okay";
+};
+
+&mac0 {
+ status = "okay";
+
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_rgmii1_default &pinctrl_mdio1_default>;
+
+ nvmem-cells = <&eth0_macaddress>;
+ nvmem-cell-names = "mac-address";
+};
+
+&i2c0 {
+ status = "okay";
+};
+
+&i2c1 {
+ status = "okay";
+};
+
+&i2c2 {
+ status = "okay";
+
+ i2c-mux@70 {
+ compatible = "nxp,pca9545";
+ reg = <0x70>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ i2c2mux0ch0: i2c@0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0>;
+ };
+
+ i2c2mux0ch1: i2c@1 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <1>;
+ };
+
+ i2c2mux0ch2: i2c@2 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <2>;
+ };
+
+ i2c2mux0ch3: i2c@3 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <3>;
+ };
+ };
+};
+
+&i2c3 {
+ status = "okay";
+};
+
+&i2c4 {
+ status = "okay";
+};
+
+&i2c5 {
+ status = "okay";
+};
+
+&i2c6 {
+ status = "okay";
+};
+
+&i2c7 {
+ status = "okay";
+};
+
+&i2c9 {
+ status = "okay";
+};
+
+&i2c10 {
+ status = "okay";
+};
+
+&i2c11 {
+ status = "okay";
+
+ vrm@60 {
+ compatible = "renesas,isl69269", "isl69269";
+ reg = <0x60>;
+ };
+};
+
+&i2c12 {
+ status = "okay";
+
+ /* FRU eeprom */
+ eeprom@57 {
+ compatible = "st,24c128", "atmel,24c128";
+ reg = <0x57>;
+ pagesize = <16>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ eth0_macaddress: macaddress@3f80 {
+ reg = <0x3f80 6>;
+ };
+ };
+};
+
+&video {
+ status = "okay";
+};
+
+&vhub {
+ status = "okay";
+};
+
+&lpc_ctrl {
+ status = "okay";
+};
+
+&lpc_snoop {
+ status = "okay";
+ snoop-ports = <0x80>;
+};
+
+&kcs3 {
+ status = "okay";
+ aspeed,lpc-io-reg = <0xca2>;
+};
+
+&peci0 {
+ status = "okay";
+};
+
+&wdt1 {
+ aspeed,reset-mask = <(AST2500_WDT_RESET_DEFAULT & ~AST2500_WDT_RESET_LPC)>;
+};
+
+&wdt2 {
+ aspeed,reset-mask = <(AST2500_WDT_RESET_DEFAULT & ~AST2500_WDT_RESET_LPC)>;
+};
+
+&pwm_tacho {
+ status = "okay";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_pwm0_default /* CPU */
+ &pinctrl_pwm2_default /* rear */
+ &pinctrl_pwm4_default>; /* front */
+
+ /* CPU */
+ fan@0 {
+ reg = <0x00>;
+ aspeed,fan-tach-ch = /bits/ 8 <0x00>;
+ };
+
+ /* rear */
+ fan@2 {
+ reg = <0x02>;
+ aspeed,fan-tach-ch = /bits/ 8 <0x02>;
+ };
+
+ /* front */
+ fan@4 {
+ reg = <0x04>;
+ aspeed,fan-tach-ch = /bits/ 8 <0x04>;
+ };
+};
+
+&gpio {
+ status = "okay";
+ gpio-line-names =
+ /* A */ "", "", "NMI_BTN_N", "BMC_NMI", "", "", "", "",
+ /* B */ "", "", "", "", "", "", "", "",
+ /* C */ "", "", "", "", "", "", "", "",
+ /* D */ "BMC_PSIN", "BMC_PSOUT", "BMC_RESETCON", "RESETCON",
+ "", "", "", "",
+ /* E */ "", "", "", "", "", "", "", "",
+ /* F */ "LOCATORLED_STATUS_N", "LOCATORBTN", "", "",
+ "", "", "BMC_PCH_SCI_LPC", "BMC_NCSI_MUX_CTL",
+ /* G */ "HWM_BAT_EN", "CHASSIS_ID0", "CHASSIS_ID1", "CHASSIS_ID2",
+ "", "", "", "",
+ /* H */ "FM_ME_RCVR_N", "O_PWROK", "", "D4_DIMM_EVENT_3V_N",
+ "MFG_MODE_N", "BMC_RTCRST", "BMC_HB_LED_N", "BMC_CASEOPEN",
+ /* I */ "", "", "", "", "", "", "", "",
+ /* J */ "BMC_READY", "BMC_PCH_BIOS_CS_N", "BMC_SMI", "", "", "", "", "",
+ /* K */ "", "", "", "", "", "", "", "",
+ /* L */ "", "", "", "", "", "", "", "",
+ /* M */ "", "", "", "", "", "", "", "",
+ /* N */ "", "", "", "", "", "", "", "",
+ /* O */ "", "", "", "", "", "", "", "",
+ /* P */ "", "", "", "", "", "", "", "",
+ /* Q */ "", "", "", "", "", "", "", "",
+ /* R */ "", "", "", "", "", "", "", "",
+ /* S */ "PCHHOT_BMC_N", "", "RSMRST", "", "", "", "", "",
+ /* T */ "", "", "", "", "", "", "", "",
+ /* U */ "", "", "", "", "", "", "", "",
+ /* V */ "", "", "", "", "", "", "", "",
+ /* W */ "", "", "", "", "", "", "", "",
+ /* X */ "", "", "", "", "", "", "", "",
+ /* Y */ "SLP_S3", "SLP_S5", "", "", "", "", "", "",
+ /* Z */ "CPU_CATERR_BMC_N", "", "SYSTEM_FAULT_LED_N", "BMC_THROTTLE_N",
+ "", "", "", "",
+ /* AA */ "CPU1_THERMTRIP_LATCH_N", "", "CPU1_PROCHOT_N", "",
+ "", "", "IRQ_SMI_ACTIVE_N", "FM_BIOS_POST_CMPLT_N",
+ /* AB */ "", "", "ME_OVERRIDE", "BMC_DMI_MODIFY", "", "", "", "",
+ /* AC */ "", "", "", "", "", "", "", "";
+};
+
+&adc {
+ status = "okay";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_adc0_default /* 3VSB */
+ &pinctrl_adc1_default /* 5VSB */
+ &pinctrl_adc2_default /* CPU1 */
+ &pinctrl_adc3_default /* VCCSA */
+ &pinctrl_adc4_default /* VCCM */
+ &pinctrl_adc5_default /* V10M */
+ &pinctrl_adc6_default /* VCCIO */
+ &pinctrl_adc7_default /* VCCGT */
+ &pinctrl_adc8_default /* VPPM */
+ &pinctrl_adc9_default /* BAT */
+ &pinctrl_adc10_default /* 3V */
+ &pinctrl_adc11_default /* 5V */
+ &pinctrl_adc12_default /* 12V */
+ &pinctrl_adc13_default /* GND */
+ &pinctrl_adc14_default /* GND */
+ &pinctrl_adc15_default>; /* GND */
+};
diff --git a/arch/arm/boot/dts/aspeed/aspeed-bmc-asrock-spc621d8hm3.dts b/arch/arm/boot/dts/aspeed/aspeed-bmc-asrock-spc621d8hm3.dts
new file mode 100644
index 00000000000000..555485871e7a7d
--- /dev/null
+++ b/arch/arm/boot/dts/aspeed/aspeed-bmc-asrock-spc621d8hm3.dts
@@ -0,0 +1,324 @@
+// SPDX-License-Identifier: GPL-2.0+
+/dts-v1/;
+
+#include "aspeed-g5.dtsi"
+#include <dt-bindings/gpio/aspeed-gpio.h>
+#include <dt-bindings/i2c/i2c.h>
+#include <dt-bindings/interrupt-controller/irq.h>
+#include <dt-bindings/leds/common.h>
+
+/{
+ model = "ASRock SPC621D8HM3 BMC";
+ compatible = "asrock,spc621d8hm3-bmc", "aspeed,ast2500";
+
+ aliases {
+ serial4 = &uart5;
+
+ i2c20 = &i2c1mux0ch0;
+ i2c21 = &i2c1mux0ch1;
+ };
+
+ chosen {
+ stdout-path = &uart5;
+ };
+
+ memory@80000000 {
+ reg = <0x80000000 0x20000000>;
+ };
+
+ leds {
+ compatible = "gpio-leds";
+
+ /* BMC heartbeat */
+ led-0 {
+ gpios = <&gpio ASPEED_GPIO(H, 6) GPIO_ACTIVE_LOW>;
+ function = LED_FUNCTION_HEARTBEAT;
+ color = <LED_COLOR_ID_GREEN>;
+ linux,default-trigger = "timer";
+ };
+
+ /* system fault */
+ led-1 {
+ gpios = <&gpio ASPEED_GPIO(Z, 2) GPIO_ACTIVE_LOW>;
+ function = LED_FUNCTION_FAULT;
+ color = <LED_COLOR_ID_RED>;
+ panic-indicator;
+ };
+ };
+
+ iio-hwmon {
+ compatible = "iio-hwmon";
+ io-channels = <&adc 0>, <&adc 1>, <&adc 2>, <&adc 3>,
+ <&adc 4>, <&adc 5>, <&adc 6>, <&adc 7>,
+ <&adc 8>, <&adc 9>, <&adc 10>, <&adc 11>,
+ <&adc 12>, <&adc 13>, <&adc 14>, <&adc 15>;
+ };
+};
+
+&fmc {
+ status = "okay";
+ flash@0 {
+ status = "okay";
+ m25p,fast-read;
+ label = "bmc";
+ spi-max-frequency = <50000000>; /* 50 MHz */
+#include "openbmc-flash-layout-64.dtsi"
+ };
+};
+
+&uart5 {
+ status = "okay";
+};
+
+&vuart {
+ status = "okay";
+ aspeed,lpc-io-reg = <0x2f8>;
+ aspeed,lpc-interrupts = <3 IRQ_TYPE_LEVEL_HIGH>;
+};
+
+&mac0 {
+ status = "okay";
+
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_rgmii1_default &pinctrl_mdio1_default>;
+
+ nvmem-cells = <&eth0_macaddress>;
+ nvmem-cell-names = "mac-address";
+};
+
+&i2c0 {
+ status = "okay";
+};
+
+&i2c1 {
+ status = "okay";
+
+ /* hardware monitor/thermal sensor */
+ temperature-sensor@29 {
+ compatible = "nuvoton,nct7802";
+ reg = <0x29>;
+ };
+
+ /* motherboard temp sensor (TMP1, near BMC) */
+ temperature-sensor@4c {
+ compatible = "nuvoton,w83773g";
+ reg = <0x4c>;
+ };
+
+ /* motherboard FRU eeprom */
+ eeprom@50 {
+ compatible = "st,24c128", "atmel,24c128";
+ reg = <0x50>;
+ pagesize = <16>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ eth0_macaddress: macaddress@3f80 {
+ reg = <0x3f80 6>;
+ };
+ };
+
+ /* M.2 slot smbus mux */
+ i2c-mux@71 {
+ compatible = "nxp,pca9545";
+ reg = <0x71>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ i2c1mux0ch0: i2c@0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0>;
+ };
+
+ i2c1mux0ch1: i2c@1 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <1>;
+ };
+ };
+};
+
+&i2c2 {
+ status = "okay";
+};
+
+&i2c3 {
+ status = "okay";
+};
+
+&i2c4 {
+ status = "okay";
+};
+
+&i2c5 {
+ status = "okay";
+};
+
+&i2c6 {
+ status = "okay";
+};
+
+&i2c7 {
+ status = "okay";
+};
+
+&i2c8 {
+ status = "okay";
+};
+
+&i2c9 {
+ status = "okay";
+};
+
+&i2c10 {
+ status = "okay";
+};
+
+&i2c11 {
+ status = "okay";
+};
+
+&i2c12 {
+ status = "okay";
+};
+
+&i2c13 {
+ status = "okay";
+};
+
+&video {
+ status = "okay";
+};
+
+&vhub {
+ status = "okay";
+};
+
+&lpc_ctrl {
+ status = "okay";
+};
+
+&lpc_snoop {
+ status = "okay";
+ snoop-ports = <0x80>;
+};
+
+&kcs3 {
+ status = "okay";
+ aspeed,lpc-io-reg = <0xca2>;
+};
+
+&peci0 {
+ status = "okay";
+};
+
+&pwm_tacho {
+ status = "okay";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_pwm0_default
+ &pinctrl_pwm2_default
+ &pinctrl_pwm3_default
+ &pinctrl_pwm4_default>;
+
+ fan@0 {
+ reg = <0x00>;
+ aspeed,fan-tach-ch = /bits/ 8 <0x00>;
+ };
+
+ fan@2 {
+ reg = <0x02>;
+ aspeed,fan-tach-ch = /bits/ 8 <0x02>;
+ };
+
+ fan@3 {
+ reg = <0x03>;
+ aspeed,fan-tach-ch = /bits/ 8 <0x03>;
+ };
+
+ fan@4 {
+ reg = <0x04>;
+ aspeed,fan-tach-ch = /bits/ 8 <0x04>;
+ };
+};
+
+&gpio {
+ status = "okay";
+ gpio-line-names =
+ /* A */ "LOCATORLED_STATUS_N", "LOCATORBTN_N",
+ "BMC_READY_N", "FM_SPD_DDRCPU_LVLSHFT_EN",
+ "", "", "", "",
+ /* B */ "NODE_ID_1", "NODE_ID_2", "PSU_FAN_FAIL_N", "",
+ "", "", "", "GPIO_RST",
+ /* C */ "", "", "", "", "", "", "", "",
+ /* D */ "FP_PWR_BTN_MUX_N", "FM_BMC_PWRBTN_OUT_N",
+ "FP_RST_BTN_N", "RST_BMC_RSTBTN_OUT_N",
+ "NMI_BTN_N", "BMC_NMI",
+ "", "",
+ /* E */ "", "", "", "FM_ME_RCVR_N", "", "", "", "",
+ /* F */ "BMC_SMB_SEL_N", "FM_CPU2_DISABLE_COD_N",
+ "FM_REMOTE_DEBUG_BMC_EN", "FM_CPU_ERR0_LVT3_EN",
+ "FM_CPU_ERR1_LVT3_EN", "FM_CPU_ERR2_LVT3_EN",
+ "FM_MEM_THERM_EVENT_CPU1_LVT3_N", "FM_MEM_THERM_EVENT_CPU2_LVT3_N",
+ /* G */ "HWM_BAT_EN", "", "BMC_PHYRST_N", "FM_BIOS_SPI_BMC_CTRL",
+ "BMC_ALERT1_N", "BMC_ALERT2_N", "BMC_ALERT3_N", "IRQ_SML0_ALERT_N",
+ /* H */ "BMC_SMB_PRESENT_1_N", "FM_PCH_CORE_VID_0", "FM_PCH_CORE_VID_1", "",
+ "FM_MFG_MODE", "BMC_RTCRST", "BMC_HB_LED_N", "BMC_CASEOPEN",
+ /* I */ "IRQ_PVDDQ_ABCD_CPU1_VRHOT_LVC3_N", "IRQ_PVDDQ_ABCD_CPU2_VRHOT_LVC3_N",
+ "IRQ_PVDDQ_EFGH_CPU1_VRHOT_LVC3_N", "IRQ_PVDDQ_EFGH_CPU2_VRHOT_LVC3_N",
+ "", "", "", "",
+ /* J */ "", "", "", "", "", "", "", "",
+ /* K */ "", "", "", "", "", "", "", "",
+ /* L */ "", "", "", "", "", "", "", "",
+ /* M */ "FM_PVCCIN_CPU1_PWR_IN_ALERT_N", "FM_PVCCIN_CPU2_PWR_IN_ALERT_N",
+ "IRQ_PVCCIN_CPU1_VRHOT_LVC3_N", "IRQ_PVCCIN_CPU2_VRHOT_LVC3_N",
+ "FM_CPU1_PROCHOT_BMC_LVC3_N", "",
+ "FM_CPU1_MEMHOT_OUT_N", "FM_CPU2_MEMHOT_OUT_N",
+ /* N */ "", "", "", "", "", "", "", "",
+ /* O */ "", "", "", "", "", "", "", "",
+ /* P */ "", "", "", "", "", "", "", "",
+ /* Q */ "", "", "", "", "", "", "RST_GLB_RST_WARN_N", "PCIE_WAKE_N",
+ /* R */ "", "", "FM_BMC_SUSACK_N", "FM_BMC_EUP_LOT6_N",
+ "", "FM_BMC_PCH_SCI_LPC_N", "", "",
+ /* S */ "FM_DBP_PRESENT_N", "FM_CPU2_SKTOCC_LCT3_N",
+ "FM_CPU1_FIVR_FAULT_LVT3", "FM_CPU2_FIVR_FAULT_LVT3",
+ "", "", "", "",
+ /* T */ "", "", "", "", "", "", "", "",
+ /* U */ "", "", "", "", "", "", "", "",
+ /* V */ "", "", "", "", "", "", "", "",
+ /* W */ "", "", "", "", "", "", "", "",
+ /* X */ "", "", "", "", "", "", "", "",
+ /* Y */ "FM_SLPS3_N", "FM_SLPS4_N", "", "FM_BMC_ONCTL_N_PLD",
+ "", "", "", "",
+ /* Z */ "FM_CPU_MSMI_CATERR_LVT3_N", "", "SYSTEM_FAULT_LED_N", "BMC_THROTTLE_N",
+ "", "", "", "",
+ /* AA */ "FM_CPU1_THERMTRIP_LATCH_LVT3_N", "FM_CPU2_THERMTRIP_LATCH_LVT3_N",
+ "FM_BIOS_POST_COMPLT_N", "DBP_BMC_SYSPWROK",
+ "", "IRQ_SML0_ALERT_MUX_N",
+ "IRQ_SMI_ACTIVE_N", "IRQ_NMI_EVENT_N",
+ /* AB */ "FM_PCH_BMC_THERMTRIP_N", "PWRGD_SYS_PWROK",
+ "ME_OVERRIDE", "IRQ_BMC_PCH_SMI_LPC_N",
+ "", "", "", "",
+ /* AC */ "", "", "", "", "", "", "", "";
+};
+
+&adc {
+ status = "okay";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_adc0_default /* 3VSB */
+ &pinctrl_adc1_default /* 5VSB */
+ &pinctrl_adc2_default /* CPU1 */
+ &pinctrl_adc3_default /* NC */
+ &pinctrl_adc4_default /* VCCMABCD */
+ &pinctrl_adc5_default /* VCCMEFGH */
+ &pinctrl_adc6_default /* NC */
+ &pinctrl_adc7_default /* NC */
+ &pinctrl_adc8_default /* PVNN_PCH */
+ &pinctrl_adc9_default /* 1P05PCH */
+ &pinctrl_adc10_default /* 1P8PCH */
+ &pinctrl_adc11_default /* BAT */
+ &pinctrl_adc12_default /* 3V */
+ &pinctrl_adc13_default /* 5V */
+ &pinctrl_adc14_default /* 12V */
+ &pinctrl_adc15_default>; /* GND */
+};
diff --git a/arch/arm/boot/dts/aspeed/aspeed-bmc-asrock-x570d4u.dts b/arch/arm/boot/dts/aspeed/aspeed-bmc-asrock-x570d4u.dts
new file mode 100644
index 00000000000000..3c975bc41ae7de
--- /dev/null
+++ b/arch/arm/boot/dts/aspeed/aspeed-bmc-asrock-x570d4u.dts
@@ -0,0 +1,377 @@
+// SPDX-License-Identifier: GPL-2.0+
+/dts-v1/;
+#include "aspeed-g5.dtsi"
+#include <dt-bindings/gpio/aspeed-gpio.h>
+#include <dt-bindings/leds/common.h>
+
+/ {
+ model = "Asrock Rack X570D4U BMC";
+ compatible = "asrock,x570d4u-bmc", "aspeed,ast2500";
+
+ aliases {
+ i2c40 = &i2c4mux0ch0;
+ i2c41 = &i2c4mux0ch1;
+ i2c42 = &i2c4mux0ch2;
+ i2c43 = &i2c4mux0ch3;
+ };
+
+ chosen {
+ stdout-path = &uart5;
+ };
+
+ memory@80000000 {
+ reg = <0x80000000 0x20000000>;
+ };
+
+ reserved-memory {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges;
+
+ pci_memory: region@9a000000 {
+ no-map;
+ reg = <0x9a000000 0x00010000>; /* 64K */
+ };
+
+ video_engine_memory: jpegbuffer {
+ size = <0x02800000>; /* 40M */
+ alignment = <0x01000000>;
+ compatible = "shared-dma-pool";
+ reusable;
+ };
+
+ gfx_memory: framebuffer {
+ size = <0x01000000>;
+ alignment = <0x01000000>;
+ compatible = "shared-dma-pool";
+ reusable;
+ };
+ };
+
+ leds {
+ compatible = "gpio-leds";
+
+ led-0 {
+ /* led-heartbeat-n */
+ gpios = <&gpio ASPEED_GPIO(H, 6) GPIO_ACTIVE_LOW>;
+ color = <LED_COLOR_ID_GREEN>;
+ function = LED_FUNCTION_HEARTBEAT;
+ linux,default-trigger = "timer";
+ };
+
+ led-1 {
+ /* led-fault-n */
+ gpios = <&gpio ASPEED_GPIO(Z, 2) GPIO_ACTIVE_LOW>;
+ color = <LED_COLOR_ID_AMBER>;
+ function = LED_FUNCTION_FAULT;
+ panic-indicator;
+ };
+ };
+
+ iio-hwmon {
+ compatible = "iio-hwmon";
+ io-channels = <&adc 0>, <&adc 1>, <&adc 2>, <&adc 3>, <&adc 4>,
+ <&adc 5>, <&adc 6>, <&adc 7>, <&adc 8>, <&adc 9>,
+ <&adc 10>, <&adc 11>, <&adc 12>;
+ };
+};
+
+&gpio {
+ status = "okay";
+ gpio-line-names =
+ /*A0-A3*/ "status-locatorled-n", "", "button-nmi-n", "",
+ /*A4-A7*/ "", "", "", "",
+ /*B0-B3*/ "input-bios-post-cmplt-n", "", "", "",
+ /*B4-B7*/ "", "", "", "",
+ /*C0-C3*/ "", "", "", "",
+ /*C4-C7*/ "", "", "control-locatorbutton", "",
+ /*D0-D3*/ "button-power", "control-power", "button-reset", "control-reset",
+ /*D4-D7*/ "", "", "", "",
+ /*E0-E3*/ "", "", "", "",
+ /*E4-E7*/ "", "", "", "",
+ /*F0-F3*/ "", "", "", "",
+ /*F4-F7*/ "", "", "", "",
+ /*G0-G3*/ "output-rtc-battery-voltage-read-enable", "input-id0", "input-id1", "input-id2",
+ /*G4-G7*/ "input-alert1-n", "input-alert2-n", "input-alert3-n", "",
+ /*H0-H3*/ "", "", "", "",
+ /*H4-H7*/ "input-mfg", "", "led-heartbeat-n", "input-caseopen",
+ /*I0-I3*/ "", "", "", "",
+ /*I4-I7*/ "", "", "", "",
+ /*J0-J3*/ "output-bmc-ready", "", "", "",
+ /*J4-J7*/ "", "", "", "",
+ /*K0-K3*/ "", "", "", "",
+ /*K4-K7*/ "", "", "", "",
+ /*L0-L3*/ "", "", "", "",
+ /*L4-L7*/ "", "", "", "",
+ /*M0-M3*/ "", "", "", "",
+ /*M4-M7*/ "", "", "", "",
+ /*N0-N3*/ "", "", "", "",
+ /*N4-N7*/ "", "", "", "",
+ /*O0-O3*/ "", "", "", "",
+ /*O4-O7*/ "", "", "", "",
+ /*P0-P3*/ "", "", "", "",
+ /*P4-P7*/ "", "", "", "",
+ /*Q0-Q3*/ "", "", "", "",
+ /*Q4-Q7*/ "", "", "", "",
+ /*R0-R3*/ "", "", "", "",
+ /*R4-R7*/ "", "", "", "",
+ /*S0-S3*/ "input-bmc-pchhot-n", "", "", "",
+ /*S4-S7*/ "", "", "", "",
+ /*T0-T3*/ "", "", "", "",
+ /*T4-T7*/ "", "", "", "",
+ /*U0-U3*/ "", "", "", "",
+ /*U4-U7*/ "", "", "", "",
+ /*V0-V3*/ "", "", "", "",
+ /*V4-V7*/ "", "", "", "",
+ /*W0-W3*/ "", "", "", "",
+ /*W4-W7*/ "", "", "", "",
+ /*X0-X3*/ "", "", "", "",
+ /*X4-X7*/ "", "", "", "",
+ /*Y0-Y3*/ "", "", "", "",
+ /*Y4-Y7*/ "", "", "", "",
+ /*Z0-Z3*/ "", "", "led-fault-n", "output-bmc-throttle-n",
+ /*Z4-Z7*/ "", "", "", "",
+ /*AA0-AA3*/ "input-cpu1-thermtrip-latch-n", "", "input-cpu1-prochot-n", "",
+ /*AA4-AC7*/ "", "", "", "",
+ /*AB0-AB3*/ "", "", "", "",
+ /*AB4-AC7*/ "", "", "", "",
+ /*AC0-AC3*/ "", "", "", "",
+ /*AC4-AC7*/ "", "", "", "";
+};
+
+&fmc {
+ status = "okay";
+ flash@0 {
+ status = "okay";
+ label = "bmc";
+ m25p,fast-read;
+ spi-max-frequency = <10000000>;
+#include "openbmc-flash-layout-64.dtsi"
+ };
+};
+
+&uart5 {
+ status = "okay";
+};
+
+&vuart {
+ status = "okay";
+};
+
+&mac0 {
+ status = "okay";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_rgmii1_default &pinctrl_mdio1_default>;
+
+ nvmem-cells = <&eth0_macaddress>;
+ nvmem-cell-names = "mac-address";
+};
+
+&mac1 {
+ status = "okay";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_rmii2_default &pinctrl_mdio2_default>;
+ use-ncsi;
+
+ nvmem-cells = <&eth1_macaddress>;
+ nvmem-cell-names = "mac-address";
+};
+
+&i2c0 {
+ /* SMBus on auxiliary panel header (AUX_PANEL1) */
+ status = "okay";
+};
+
+&i2c1 {
+ status = "okay";
+
+ w83773g@4c {
+ compatible = "nuvoton,w83773g";
+ reg = <0x4c>;
+ };
+};
+
+&i2c2 {
+ /* PSU SMBus (PSU_SMB1) */
+ status = "okay";
+};
+
+&i2c3 {
+ status = "okay";
+};
+
+&i2c4 {
+ status = "okay";
+
+ i2c-mux@70 {
+ compatible = "nxp,pca9545";
+ reg = <0x70>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ i2c4mux0ch0: i2c@0 {
+ /* SMBus on PCI express 16x slot */
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0>;
+ };
+
+ i2c4mux0ch1: i2c@1 {
+ /* SMBus on PCI express 8x slot */
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <1>;
+ };
+
+ i2c4mux0ch2: i2c@2 {
+ /* Unknown */
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <2>;
+ };
+
+ i2c4mux0ch3: i2c@3 {
+ /* SMBus on PCI express 1x slot */
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <3>;
+ };
+ };
+};
+
+&i2c5 {
+ status = "okay";
+};
+
+&i2c7 {
+ /* FRU and SPD EEPROM SMBus */
+ status = "okay";
+
+ eeprom@57 {
+ compatible = "st,24c128", "atmel,24c128";
+ reg = <0x57>;
+ pagesize = <16>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ eth0_macaddress: macaddress@3f80 {
+ reg = <0x3f80 6>;
+ };
+
+ eth1_macaddress: macaddress@3f88 {
+ reg = <0x3f88 6>;
+ };
+ };
+};
+
+&gfx {
+ status = "okay";
+};
+
+&pinctrl {
+ aspeed,external-nodes = <&gfx &lhc>;
+};
+
+&vhub {
+ status = "okay";
+};
+
+&ehci1 {
+ status = "okay";
+};
+
+&uhci {
+ status = "okay";
+};
+
+&kcs3 {
+ aspeed,lpc-io-reg = <0xca2>;
+ status = "okay";
+};
+
+&lpc_ctrl {
+ status = "okay";
+};
+
+&lpc_snoop {
+ status = "okay";
+ snoop-ports = <0x80>;
+};
+
+&p2a {
+ status = "okay";
+ memory-region = <&pci_memory>;
+};
+
+&video {
+ status = "okay";
+ memory-region = <&video_engine_memory>;
+};
+
+&pwm_tacho {
+ status = "okay";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_pwm0_default
+ &pinctrl_pwm1_default
+ &pinctrl_pwm2_default
+ &pinctrl_pwm3_default
+ &pinctrl_pwm4_default
+ &pinctrl_pwm5_default>;
+
+ fan@0 {
+ /* FAN1 (4-pin) */
+ reg = <0x00>;
+ aspeed,fan-tach-ch = /bits/ 8 <0x00>;
+ };
+
+ fan@1 {
+ /* FAN2 (4-pin) */
+ reg = <0x01>;
+ aspeed,fan-tach-ch = /bits/ 8 <0x01>;
+ };
+
+ fan@2 {
+ /* FAN3 (4-pin) */
+ reg = <0x02>;
+ aspeed,fan-tach-ch = /bits/ 8 <0x02>;
+ };
+
+ fan@3 {
+ /* FAN4 (6-pin) */
+ reg = <0x03>;
+ aspeed,fan-tach-ch = /bits/ 8 <0x04 0x0b>;
+ };
+
+ fan@4 {
+ /* FAN6 (6-pin) */
+ reg = <0x04>;
+ aspeed,fan-tach-ch = /bits/ 8 <0x06 0x0d>;
+ };
+
+ fan@5 {
+ /* FAN5 (6-pin) */
+ reg = <0x05>;
+ aspeed,fan-tach-ch = /bits/ 8 <0x05 0x0c>;
+ };
+};
+
+&adc {
+ status = "okay";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_adc0_default
+ &pinctrl_adc1_default
+ &pinctrl_adc2_default
+ &pinctrl_adc3_default
+ &pinctrl_adc4_default
+ &pinctrl_adc5_default
+ &pinctrl_adc6_default
+ &pinctrl_adc7_default
+ &pinctrl_adc8_default
+ &pinctrl_adc9_default
+ &pinctrl_adc10_default
+ &pinctrl_adc11_default
+ &pinctrl_adc12_default
+ &pinctrl_adc13_default
+ &pinctrl_adc14_default
+ &pinctrl_adc15_default>;
+};
diff --git a/arch/arm/boot/dts/aspeed/aspeed-bmc-asus-x4tf.dts b/arch/arm/boot/dts/aspeed/aspeed-bmc-asus-x4tf.dts
new file mode 100644
index 00000000000000..1bda14a6604247
--- /dev/null
+++ b/arch/arm/boot/dts/aspeed/aspeed-bmc-asus-x4tf.dts
@@ -0,0 +1,592 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+// Copyright 2024 ASUS Corp.
+
+/dts-v1/;
+
+#include "aspeed-g6.dtsi"
+#include "aspeed-g6-pinctrl.dtsi"
+#include <dt-bindings/i2c/i2c.h>
+#include <dt-bindings/gpio/aspeed-gpio.h>
+
+/ {
+ model = "ASUS-X4TF";
+ compatible = "asus,x4tf", "aspeed,ast2600";
+
+ aliases {
+ serial4 = &uart5;
+ };
+
+ chosen {
+ stdout-path = "serial4:115200n8";
+ };
+
+ memory@80000000 {
+ device_type = "memory";
+ reg = <0x80000000 0x40000000>;
+ };
+
+ reserved-memory {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges;
+
+ video_engine_memory: video {
+ size = <0x04000000>;
+ alignment = <0x01000000>;
+ compatible = "shared-dma-pool";
+ reusable;
+ };
+ };
+
+ iio-hwmon {
+ compatible = "iio-hwmon";
+ io-channels = <&adc0 0>, <&adc0 1>, <&adc0 2>, <&adc0 3>,
+ <&adc0 4>, <&adc0 5>, <&adc0 6>, <&adc0 7>,
+ <&adc1 0>, <&adc1 1>, <&adc1 2>, <&adc1 3>,
+ <&adc1 4>, <&adc1 5>, <&adc1 6>, <&adc1 7>;
+ };
+
+ leds {
+ compatible = "gpio-leds";
+
+ led-heartbeat {
+ gpios = <&gpio0 ASPEED_GPIO(P, 7) GPIO_ACTIVE_LOW>;
+ linux,default-trigger = "heartbeat";
+ };
+
+ led-uid {
+ gpios = <&gpio0 ASPEED_GPIO(P, 1) (GPIO_ACTIVE_LOW | GPIO_OPEN_DRAIN)>;
+ default-state = "off";
+ };
+
+ led-status_Y {
+ gpios = <&gpio1 ASPEED_GPIO(B, 1) GPIO_ACTIVE_LOW>;
+ default-state = "off";
+ };
+
+ led-sys_boot_status {
+ gpios = <&gpio1 ASPEED_GPIO(B, 0) GPIO_ACTIVE_LOW>;
+ default-state = "off";
+ };
+ };
+};
+
+&adc0 {
+ vref = <2500>;
+ status = "okay";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_adc0_default &pinctrl_adc1_default
+ &pinctrl_adc2_default &pinctrl_adc3_default
+ &pinctrl_adc4_default &pinctrl_adc5_default
+ &pinctrl_adc6_default &pinctrl_adc7_default>;
+};
+
+&adc1 {
+ vref = <2500>;
+ status = "okay";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_adc8_default &pinctrl_adc9_default
+ &pinctrl_adc10_default &pinctrl_adc11_default
+ &pinctrl_adc12_default &pinctrl_adc13_default
+ &pinctrl_adc14_default &pinctrl_adc15_default>;
+};
+
+&peci0 {
+ status = "okay";
+};
+
+&lpc_snoop {
+ snoop-ports = <0x80>;
+ status = "okay";
+};
+
+&mac2 {
+ status = "okay";
+ phy-mode = "rmii";
+ use-ncsi;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_rmii3_default>;
+};
+
+&mac3 {
+ status = "okay";
+ phy-mode = "rmii";
+ use-ncsi;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_rmii4_default>;
+};
+
+&fmc {
+ status = "okay";
+
+ flash@0 {
+ status = "okay";
+ m25p,fast-read;
+ label = "bmc-spi";
+ spi-max-frequency = <50000000>;
+#include "openbmc-flash-layout-128.dtsi"
+ };
+};
+
+&spi1 {
+ status = "okay";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_spi1_default>;
+
+ flash@0 {
+ status = "okay";
+ label = "bios-spi";
+ spi-max-frequency = <50000000>;
+
+ partitions {
+ compatible = "fixed-partitions";
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ biosfullimg@0 {
+ reg = <0x0 0x2000000>; //32768 *1024 = 32 MB
+ label = "biosfullimg";
+ };
+ };
+ };
+};
+
+&i2c0 {
+ status = "okay";
+};
+
+&i2c1 {
+ status = "okay";
+};
+
+&i2c2 {
+ status = "okay";
+};
+
+&i2c3 {
+ status = "okay";
+};
+
+&i2c4 {
+ status = "okay";
+
+ temperature-sensor@48 {
+ compatible = "ti,tmp75";
+ reg = <0x48>;
+ };
+
+ temperature-sensor@49 {
+ compatible = "ti,tmp75";
+ reg = <0x49>;
+ };
+
+ pca9555_4_20: gpio@20 {
+ compatible = "nxp,pca9555";
+ reg = <0x20>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ };
+
+ pca9555_4_22: gpio@22 {
+ compatible = "nxp,pca9555";
+ reg = <0x22>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ };
+
+ pca9555_4_24: gpio@24 {
+ compatible = "nxp,pca9555";
+ reg = <0x24>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ gpio-line-names =
+ /*A0 - A3 0*/ "", "STRAP_BMC_BATTERY_GPIO1", "", "",
+ /*A4 - A7 4*/ "", "", "", "",
+ /*B0 - B7 8*/ "", "", "", "", "", "", "", "";
+ };
+
+ pca9555_4_26: gpio@26 {
+ compatible = "nxp,pca9555";
+ reg = <0x26>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ };
+
+ i2c-mux@70 {
+ compatible = "nxp,pca9546";
+ status = "okay";
+ reg = <0x70>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ channel_1: i2c@0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0>;
+ };
+
+ channel_2: i2c@1 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <1>;
+ };
+
+ channel_3: i2c@2 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <2>;
+ };
+
+ channel_4: i2c@3 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <3>;
+ };
+ };
+};
+
+&i2c5 {
+ status = "okay";
+
+ pca9555_5_24: gpio@24 {
+ compatible = "nxp,pca9555";
+ reg = <0x24>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ };
+
+ i2c-mux@70 {
+ compatible = "nxp,pca9546";
+ status = "okay";
+ reg = <0x70 >;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ channel_5: i2c@0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0>;
+
+ pca9555_5_5_20: gpio@20 {
+ compatible = "nxp,pca9555";
+ reg = <0x20>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ gpio-line-names =
+ "", "", "", "", "", "", "", "",
+ "", "", "SYS_FAN6", "SYS_FAN5",
+ "SYS_FAN4", "SYS_FAN3",
+ "SYS_FAN2", "SYS_FAN1";
+ };
+
+ pca9555_5_5_21: gpio@21 {
+ compatible = "nxp,pca9555";
+ reg = <0x21>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ };
+
+ power-monitor@44 {
+ compatible = "ti,ina219";
+ reg = <0x44>;
+ shunt-resistor = <2>;
+ };
+ };
+
+ channel_6: i2c@1 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <1>;
+ };
+
+ channel_7: i2c@2 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <2>;
+ };
+
+ channel_8: i2c@3 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <3>;
+ };
+ };
+};
+
+&i2c6 {
+ status = "okay";
+
+ pca9555_6_27: gpio@27 {
+ compatible = "nxp,pca9555";
+ reg = <0x27>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ };
+
+ pca9555_6_20: gpio@20 {
+ compatible = "nxp,pca9555";
+ reg = <0x20>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ gpio-line-names =
+ /*A0 0*/ "", "", "", "", "", "", "", "",
+ /*B0 8*/ "Drive_NVMe1", "Drive_NVMe2", "", "",
+ /*B4 12*/ "", "", "", "";
+ };
+
+ pca9555_6_21: gpio@21 {
+ compatible = "nxp,pca9555";
+ reg = <0x21>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ };
+};
+
+&i2c7 {
+ status = "okay";
+
+ i2c-mux@70 {
+ compatible = "nxp,pca9546";
+ status = "okay";
+ reg = <0x70>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ idle-state = <1>;
+
+ channel_9: i2c@0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0>;
+
+ temperature-sensor@48 {
+ compatible = "ti,tmp75";
+ reg = <0x48>;
+ };
+
+ temperature-sensor@49 {
+ compatible = "ti,tmp75";
+ reg = <0x49>;
+ };
+
+ power-monitor@40 {
+ compatible = "ti,ina219";
+ reg = <0x40>;
+ shunt-resistor = <2>;
+ };
+
+ power-monitor@41 {
+ compatible = "ti,ina219";
+ reg = <0x41>;
+ shunt-resistor = <5>;
+ };
+ };
+
+ channel_10: i2c@1 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <1>;
+ };
+
+ channel_11: i2c@2 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <2>;
+ };
+
+ channel_12: i2c@3 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <3>;
+ };
+ };
+
+ i2c-mux@71 {
+ compatible = "nxp,pca9546";
+ status = "okay";
+ reg = <0x71>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ i2c-mux-idle-disconnect;
+
+ channel_13: i2c@0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0>;
+ };
+
+ channel_14: i2c@1 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <1>;
+ };
+
+ channel_15: i2c@2 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <2>;
+ };
+
+ channel_16: i2c@3 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <3>;
+ };
+ };
+};
+
+&i2c8 {
+ status = "okay";
+
+ i2c-mux@70 {
+ compatible = "nxp,pca9546";
+ status = "okay";
+ reg = <0x70>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ i2c-mux-idle-disconnect;
+
+ channel_17: i2c@0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0>;
+ };
+
+ channel_18: i2c@1 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <1>;
+
+ temperature-sensor@48 {
+ compatible = "ti,tmp75";
+ reg = <0x48>;
+ };
+
+ power-monitor@41 {
+ compatible = "ti,ina219";
+ reg = <0x41>;
+ shunt-resistor = <5>;
+ };
+ };
+
+ channel_19: i2c@2 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <2>;
+ };
+
+ channel_20: i2c@3 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <3>;
+ };
+ };
+};
+
+&i2c9 {
+ status = "okay";
+};
+
+&i2c10 {
+ status = "okay";
+};
+
+&i2c11 {
+ status = "okay";
+};
+
+&i2c14 {
+ status = "okay";
+ multi-master;
+
+ eeprom@50 {
+ compatible = "atmel,24c08";
+ reg = <0x50>;
+ };
+
+ eeprom@51 {
+ compatible = "atmel,24c08";
+ reg = <0x51>;
+ };
+};
+
+&sgpiom0 {
+ status = "okay";
+ ngpios = <128>;
+};
+
+&video {
+ status = "okay";
+ memory-region = <&video_engine_memory>;
+};
+
+&sdc {
+ status = "okay";
+};
+
+&lpc_snoop {
+ status = "okay";
+ snoop-ports = <0x80>;
+};
+
+&kcs1 {
+ aspeed,lpc-io-reg = <0xca0>;
+ status = "okay";
+};
+
+&kcs2 {
+ aspeed,lpc-io-reg = <0xca8>;
+ status = "okay";
+};
+
+&kcs3 {
+ aspeed,lpc-io-reg = <0xca2>;
+ status = "okay";
+};
+
+&uart3 {
+ status = "okay";
+};
+
+&uart5 {
+ status = "okay";
+};
+
+&uart_routing {
+ status = "okay";
+};
+
+&vhub {
+ status = "okay";
+};
+
+&gpio0 {
+ gpio-line-names =
+ /*A0 0*/ "", "", "", "", "", "", "", "",
+ /*B0 8*/ "", "", "", "", "", "", "PS_PWROK", "",
+ /*C0 16*/ "", "", "", "", "", "", "", "",
+ /*D0 24*/ "", "", "", "", "", "", "", "",
+ /*E0 32*/ "", "", "", "", "", "", "", "",
+ /*F0 40*/ "", "", "", "", "", "", "", "",
+ /*G0 48*/ "", "", "", "", "", "", "", "",
+ /*H0 56*/ "", "", "", "", "", "", "", "",
+ /*I0 64*/ "", "", "", "", "", "", "", "",
+ /*J0 72*/ "", "", "", "", "", "", "", "",
+ /*K0 80*/ "", "", "", "", "", "", "", "",
+ /*L0 88*/ "", "", "", "", "", "", "", "",
+ /*M0 96*/ "", "", "", "", "", "", "", "",
+ /*N0 104*/ "", "", "", "",
+ /*N4 108*/ "POST_COMPLETE", "ESR1_GPIO_AST_SPISEL", "", "",
+ /*O0 112*/ "", "", "", "", "", "", "", "",
+ /*P0 120*/ "ID_BUTTON", "ID_OUT", "POWER_BUTTON", "POWER_OUT",
+ /*P4 124*/ "RESET_BUTTON", "RESET_OUT", "", "HEARTBEAT",
+ /*Q0 128*/ "", "", "", "", "", "", "", "",
+ /*R0 136*/ "", "", "", "", "", "", "", "",
+ /*S0 144*/ "", "", "", "", "", "", "", "",
+ /*T0 152*/ "", "", "", "", "", "", "", "",
+ /*U0 160*/ "", "", "", "", "", "", "", "",
+ /*V0 168*/ "", "", "", "", "", "", "", "",
+ /*W0 176*/ "", "", "", "", "", "", "", "",
+ /*X0 184*/ "", "", "", "", "", "", "", "",
+ /*Y0 192*/ "", "", "", "", "", "", "", "",
+ /*Z0 200*/ "", "", "", "", "", "", "", "";
+};
diff --git a/arch/arm/boot/dts/aspeed/aspeed-bmc-facebook-harma.dts b/arch/arm/boot/dts/aspeed/aspeed-bmc-facebook-harma.dts
new file mode 100644
index 00000000000000..bbbab8023cd66c
--- /dev/null
+++ b/arch/arm/boot/dts/aspeed/aspeed-bmc-facebook-harma.dts
@@ -0,0 +1,620 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+// Copyright 2023 Facebook Inc.
+
+/dts-v1/;
+#include "aspeed-g6.dtsi"
+#include <dt-bindings/gpio/aspeed-gpio.h>
+#include <dt-bindings/i2c/i2c.h>
+
+/ {
+ model = "Facebook Harma";
+ compatible = "facebook,harma-bmc", "aspeed,ast2600";
+
+ aliases {
+ serial0 = &uart1;
+ serial1 = &uart2;
+ serial2 = &uart4;
+ serial4 = &uart5;
+
+ i2c20 = &imux20;
+ i2c21 = &imux21;
+ i2c22 = &imux22;
+ i2c23 = &imux23;
+ i2c24 = &imux24;
+ i2c25 = &imux25;
+ i2c26 = &imux26;
+ i2c27 = &imux27;
+ i2c28 = &imux28;
+ i2c29 = &imux29;
+ i2c30 = &imux30;
+ i2c31 = &imux31;
+
+ spi1 = &spi_gpio;
+ };
+
+ chosen {
+ stdout-path = &uart5;
+ };
+
+ memory@80000000 {
+ device_type = "memory";
+ reg = <0x80000000 0x80000000>;
+ };
+
+ iio-hwmon {
+ compatible = "iio-hwmon";
+ io-channels = <&adc0 0>, <&adc0 1>, <&adc0 2>, <&adc0 3>,
+ <&adc0 4>, <&adc0 5>, <&adc0 6>, <&adc0 7>,
+ <&adc1 2>;
+ };
+
+ leds {
+ compatible = "gpio-leds";
+
+ led-0 {
+ label = "bmc_heartbeat_amber";
+ gpios = <&gpio0 ASPEED_GPIO(P, 7) GPIO_ACTIVE_LOW>;
+ linux,default-trigger = "heartbeat";
+ };
+
+ led-1 {
+ label = "fp_id_amber";
+ default-state = "off";
+ gpios = <&gpio0 13 GPIO_ACTIVE_HIGH>;
+ };
+
+ led-2 {
+ label = "power_blue";
+ default-state = "off";
+ gpios = <&gpio0 124 GPIO_ACTIVE_HIGH>;
+ };
+ };
+
+ spi_gpio: spi-gpio {
+ status = "okay";
+ compatible = "spi-gpio";
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ gpio-sck = <&gpio0 ASPEED_GPIO(Z, 3) GPIO_ACTIVE_HIGH>;
+ gpio-mosi = <&gpio0 ASPEED_GPIO(Z, 4) GPIO_ACTIVE_HIGH>;
+ gpio-miso = <&gpio0 ASPEED_GPIO(Z, 5) GPIO_ACTIVE_HIGH>;
+ num-chipselects = <1>;
+ cs-gpios = <&gpio0 ASPEED_GPIO(Z, 0) GPIO_ACTIVE_LOW>;
+
+ tpmdev@0 {
+ compatible = "infineon,slb9670", "tcg,tpm_tis-spi";
+ spi-max-frequency = <33000000>;
+ reg = <0>;
+ };
+ };
+};
+
+// HOST BIOS Debug
+&uart1 {
+ status = "okay";
+};
+
+// SOL Host Console
+&uart2 {
+ status = "okay";
+ pinctrl-0 = <>;
+};
+
+// SOL BMC Console
+&uart4 {
+ status = "okay";
+ pinctrl-0 = <>;
+};
+
+// BMC Debug Console
+&uart5 {
+ status = "okay";
+};
+
+// MTIA
+&uart6 {
+ status = "okay";
+};
+
+&uart_routing {
+ status = "okay";
+};
+
+&wdt1 {
+ status = "okay";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_wdtrst1_default>;
+ aspeed,reset-type = "soc";
+ aspeed,external-signal;
+ aspeed,ext-push-pull;
+ aspeed,ext-active-high;
+ aspeed,ext-pulse-duration = <256>;
+};
+
+&mac3 {
+ status = "okay";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_rmii4_default>;
+ use-ncsi;
+ mlx,multi-host;
+};
+
+&rtc {
+ status = "okay";
+};
+
+&fmc {
+ status = "okay";
+
+ flash@0 {
+ status = "okay";
+ m25p,fast-read;
+ label = "bmc";
+ spi-max-frequency = <50000000>;
+#include "openbmc-flash-layout-128.dtsi"
+ };
+
+ flash@1 {
+ status = "okay";
+ m25p,fast-read;
+ label = "alt-bmc";
+ spi-max-frequency = <50000000>;
+ };
+};
+
+// BIOS Flash
+&spi2 {
+ status = "okay";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_spi2_default>;
+
+ flash@0 {
+ status = "okay";
+ m25p,fast-read;
+ label = "pnor";
+ spi-max-frequency = <12000000>;
+ spi-tx-bus-width = <2>;
+ spi-rx-bus-width = <2>;
+ };
+};
+
+&kcs2 {
+ status = "okay";
+ aspeed,lpc-io-reg = <0xca8>;
+};
+
+&kcs3 {
+ status = "okay";
+ aspeed,lpc-io-reg = <0xca2>;
+};
+
+&i2c0 {
+ status = "okay";
+
+ max31790@5e{
+ compatible = "max31790";
+ reg = <0x5e>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ };
+};
+
+&i2c1 {
+ status = "okay";
+
+ tmp75@4b {
+ compatible = "ti,tmp75";
+ reg = <0x4b>;
+ };
+
+ // MB NIC FRU
+ eeprom@50 {
+ compatible = "atmel,24c64";
+ reg = <0x50>;
+ };
+};
+
+&i2c2 {
+ status = "okay";
+
+ max31790@5e{
+ compatible = "max31790";
+ reg = <0x5e>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ };
+};
+
+&i2c3 {
+ status = "okay";
+
+ i2c-mux@70 {
+ compatible = "nxp,pca9543";
+ reg = <0x70>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ imux20: i2c@0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0>;
+ //Retimer Flash
+ eeprom@50 {
+ compatible = "atmel,24c2048";
+ reg = <0x50>;
+ pagesize = <128>;
+ };
+ };
+ imux21: i2c@1 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <1>;
+ };
+ };
+};
+
+&i2c4 {
+ status = "okay";
+ // PDB FRU
+ eeprom@52 {
+ compatible = "atmel,24c64";
+ reg = <0x52>;
+ };
+
+ delta_brick@69 {
+ compatible = "pmbus";
+ reg = <0x69>;
+ };
+
+ tmp75@49 {
+ compatible = "ti,tmp75";
+ reg = <0x49>;
+ };
+
+ power-monitor@22 {
+ compatible = "lltc,ltc4286";
+ reg = <0x22>;
+ adi,vrange-low-enable;
+ shunt-resistor-micro-ohms = <500>;
+ };
+};
+
+&i2c5 {
+ status = "okay";
+};
+
+&i2c6 {
+ status = "okay";
+
+ i2c-mux@70 {
+ compatible = "nxp,pca9543";
+ reg = <0x70>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ imux22: i2c@0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0>;
+ };
+ imux23: i2c@1 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <1>;
+ };
+ };
+};
+
+&i2c7 {
+ status = "okay";
+};
+
+&i2c8 {
+ status = "okay";
+};
+
+&i2c9 {
+ status = "okay";
+
+ gpio@30 {
+ compatible = "nxp,pca9555";
+ reg = <0x30>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ };
+ gpio@31 {
+ compatible = "nxp,pca9555";
+ reg = <0x31>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ };
+
+ i2c-mux@71 {
+ compatible = "nxp,pca9546";
+ reg = <0x71>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ imux24: i2c@0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0>;
+ };
+ imux25: i2c@1 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <1>;
+ };
+ imux26: i2c@2 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <2>;
+ };
+ imux27: i2c@3 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <3>;
+ };
+ };
+ // PTTV FRU
+ eeprom@52 {
+ compatible = "atmel,24c64";
+ reg = <0x52>;
+ };
+};
+
+&i2c11 {
+ status = "okay";
+};
+
+&i2c12 {
+ status = "okay";
+};
+
+&i2c13 {
+ status = "okay";
+
+ i2c-mux@70 {
+ compatible = "nxp,pca9545";
+ reg = <0x70>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ imux28: i2c@0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0>;
+ };
+ imux29: i2c@1 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <1>;
+ //MB FRU
+ eeprom@54 {
+ compatible = "atmel,24c64";
+ reg = <0x54>;
+ };
+ };
+ imux30: i2c@2 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <2>;
+ };
+ imux31: i2c@3 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <3>;
+ };
+ };
+};
+
+// To Debug card
+&i2c14 {
+ status = "okay";
+ multi-master;
+
+ ipmb@10 {
+ compatible = "ipmb-dev";
+ reg = <(0x10 | I2C_OWN_SLAVE_ADDRESS)>;
+ i2c-protocol;
+ };
+};
+
+&i2c15 {
+ status = "okay";
+
+ // SCM FRU
+ eeprom@50 {
+ compatible = "atmel,24c64";
+ reg = <0x50>;
+ };
+
+ // BSM FRU
+ eeprom@56 {
+ compatible = "atmel,24c64";
+ reg = <0x56>;
+ };
+};
+
+&adc0 {
+ aspeed,int-vref-microvolt = <2500000>;
+ status = "okay";
+ pinctrl-0 = <&pinctrl_adc0_default &pinctrl_adc1_default
+ &pinctrl_adc2_default &pinctrl_adc3_default
+ &pinctrl_adc4_default &pinctrl_adc5_default
+ &pinctrl_adc6_default &pinctrl_adc7_default>;
+};
+
+&adc1 {
+ aspeed,int-vref-microvolt = <2500000>;
+ status = "okay";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_adc10_default>;
+};
+
+&ehci0 {
+ status = "okay";
+};
+
+&gpio0 {
+ pinctrl-names = "default";
+ gpio-line-names =
+ /*A0-A7*/ "","","","","","","","",
+ /*B0-B7*/ "","","","",
+ "bmc-spi-mux-select-0","led-identify","","",
+ /*C0-C7*/ "","","","","","","","",
+ /*D0-D7*/ "","","sol-uart-select","","","","","",
+ /*E0-E7*/ "","","","","","","","",
+ /*F0-F7*/ "","","","","","","","",
+ /*G0-G7*/ "","","","","","","","",
+ /*H0-H7*/ "","","","","","","","",
+ /*I0-I7*/ "","","","","","","","",
+ /*J0-J7*/ "","","","","","","","",
+ /*K0-K7*/ "","","","","","","","",
+ /*L0-L7*/ "","","","","","","","",
+ /*M0-M7*/ "","","","","","","","",
+ /*N0-N7*/ "led-postcode-0","led-postcode-1",
+ "led-postcode-2","led-postcode-3",
+ "led-postcode-4","led-postcode-5",
+ "led-postcode-6","led-postcode-7",
+ /*O0-O7*/ "","","","","","","","",
+ /*P0-P7*/ "power-button","power-host-control",
+ "reset-button","","led-power","","","",
+ /*Q0-Q7*/ "","","","","","","","",
+ /*R0-R7*/ "","","","","","","","",
+ /*S0-S7*/ "","","","","","","","",
+ /*T0-T7*/ "","","","","","","","",
+ /*U0-U7*/ "","","","","","","led-identify-gate","",
+ /*V0-V7*/ "","","","",
+ "rtc-battery-voltage-read-enable","","","",
+ /*W0-W7*/ "","","","","","","","",
+ /*X0-X7*/ "","","","","","","","",
+ /*Y0-Y7*/ "","","","","","","","",
+ /*Z0-Z7*/ "","","","","","","","";
+};
+
+&sgpiom0 {
+ status = "okay";
+ max-ngpios = <128>;
+ ngpios = <128>;
+ bus-frequency = <2000000>;
+ gpio-line-names =
+ /*in - out - in - out */
+ /*A0-A3 line 0-7*/
+ "presence-scm-cable","power-config-disable-e1s-0",
+ "","",
+ "","power-config-disable-e1s-1",
+ "","",
+ /*A4-A7 line 8-15*/
+ "","power-config-asic-module-enable",
+ "","power-config-asic-power-good",
+ "","power-config-pdb-power-good",
+ "presence-cpu","smi-control-n",
+ /*B0-B3 line 16-23*/
+ "","nmi-control-n",
+ "","nmi-control-sync-flood-n",
+ "","",
+ "","",
+ /*B4-B7 line 24-31*/
+ "","FM_CPU_SP5R1",
+ "reset-cause-rsmrst","FM_CPU_SP5R2",
+ "","FM_CPU_SP5R3",
+ "","FM_CPU_SP5R4",
+ /*C0-C3 line 32-39*/
+ "","FM_CPU0_SA0",
+ "","FM_CPU0_SA1",
+ "","rt-cpu0-p0-enable",
+ "","rt-cpu0-p1-enable",
+ /*C4-C7 line 40-47*/
+ "","smb-rt-rom-p0-select",
+ "","smb-rt-rom-p1-select",
+ "","i3c-cpu-mux0-oe-n",
+ "","i3c-cpu-mux0-select",
+ /*D0-D3 line 48-55*/
+ "","i3c-cpu-mux1-oe-n",
+ "","i3c-cpu-mux1-select",
+ "","reset-control-bmc",
+ "","reset-control-cpu0-p0-mux",
+ /*D4-D7 line 56-63*/
+ "","reset-control-cpu0-p1-mux",
+ "","reset-control-e1s-mux",
+ "power-host-good","reset-control-mb-mux",
+ "power-cpu-good","reset-control-smb-e1s-0",
+ /*E0-E3 line 64-71*/
+ "","reset-control-smb-e1s-1",
+ "host-ready-n","reset-control-srst",
+ "presence-e1s-0","reset-control-usb-hub",
+ "","reset-control",
+ /*E4-E7 line 72-79*/
+ "presence-e1s-1","reset-control-cpu-kbrst",
+ "","reset-control-platrst",
+ "","bmc-jtag-mux-select-0",
+ "","bmc-jtag-mux-select-1",
+ /*F0-F3 line 80-87*/
+ "","bmc-jtag-select",
+ "","bmc-ready-n",
+ "","bmc-ready-sgpio",
+ "","rt-cpu0-p0-force-enable",
+ /*F4-F7 line 88-95*/
+ "presence-asic-modules-0","rt-cpu0-p1-force-enable",
+ "presence-asic-modules-1","bios-debug-msg-disable",
+ "","uart-control-buffer-select",
+ "","ac-control-n",
+ /*G0-G3 line 96-103*/
+ "FM_CPU_CORETYPE2","",
+ "FM_CPU_CORETYPE1","",
+ "FM_CPU_CORETYPE0","",
+ "FM_BOARD_REV_ID5","",
+ /*G4-G7 line 104-111*/
+ "FM_BOARD_REV_ID4","",
+ "FM_BOARD_REV_ID3","",
+ "FM_BOARD_REV_ID2","",
+ "FM_BOARD_REV_ID1","",
+ /*H0-H3 line 112-119*/
+ "FM_BOARD_REV_ID0","",
+ "","","","","","",
+ /*H4-H7 line 120-127*/
+ "","",
+ "reset-control-pcie-expansion-3","",
+ "reset-control-pcie-expansion-2","",
+ "reset-control-pcie-expansion-1","",
+ /*I0-I3 line 128-135*/
+ "reset-control-pcie-expansion-0","",
+ "FM_EXP_SLOT_ID1","",
+ "FM_EXP_SLOT_ID0","",
+ "","",
+ /*I4-I7 line 136-143*/
+ "","","","","","","","",
+ /*J0-J3 line 144-151*/
+ "","","","","","","","",
+ /*J4-J7 line 152-159*/
+ "SLOT_ID_BCB_0","",
+ "SLOT_ID_BCB_1","",
+ "SLOT_ID_BCB_2","",
+ "SLOT_ID_BCB_3","",
+ /*K0-K3 line 160-167*/
+ "","","","","","","","",
+ /*K4-K7 line 168-175*/
+ "","","","","","","","",
+ /*L0-L3 line 176-183*/
+ "","","","","","","","",
+ /*L4-L7 line 184-191*/
+ "","","","","","","","",
+ /*M0-M3 line 192-199*/
+ "","","","","","","","",
+ /*M4-M7 line 200-207*/
+ "","","","","","","","",
+ /*N0-N3 line 208-215*/
+ "","","","","","","","",
+ /*N4-N7 line 216-223*/
+ "","","","","","","","",
+ /*O0-O3 line 224-231*/
+ "","","","","","","","",
+ /*O4-O7 line 232-239*/
+ "","","","","","","","",
+ /*P0-P3 line 240-247*/
+ "","","","","","","","",
+ /*P4-P7 line 248-255*/
+ "","","","","","","","";
+};
diff --git a/arch/arm/boot/dts/aspeed/aspeed-bmc-facebook-minerva-cmc.dts b/arch/arm/boot/dts/aspeed/aspeed-bmc-facebook-minerva-cmc.dts
deleted file mode 100644
index f04ef906352080..00000000000000
--- a/arch/arm/boot/dts/aspeed/aspeed-bmc-facebook-minerva-cmc.dts
+++ /dev/null
@@ -1,265 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-// Copyright (c) 2023 Facebook Inc.
-/dts-v1/;
-
-#include "aspeed-g6.dtsi"
-#include <dt-bindings/gpio/aspeed-gpio.h>
-#include <dt-bindings/i2c/i2c.h>
-
-/ {
- model = "Facebook Minerva CMC";
- compatible = "facebook,minerva-cmc", "aspeed,ast2600";
-
- aliases {
- serial5 = &uart5;
- };
-
- chosen {
- stdout-path = "serial5:57600n8";
- };
-
- memory@80000000 {
- device_type = "memory";
- reg = <0x80000000 0x80000000>;
- };
-
- iio-hwmon {
- compatible = "iio-hwmon";
- io-channels = <&adc0 0>, <&adc0 1>, <&adc0 2>, <&adc0 3>,
- <&adc0 4>, <&adc0 5>, <&adc0 6>, <&adc0 7>,
- <&adc1 2>;
- };
-};
-
-&uart6 {
- status = "okay";
-};
-
-&wdt1 {
- status = "okay";
- pinctrl-names = "default";
- pinctrl-0 = <&pinctrl_wdtrst1_default>;
- aspeed,reset-type = "soc";
- aspeed,external-signal;
- aspeed,ext-push-pull;
- aspeed,ext-active-high;
- aspeed,ext-pulse-duration = <256>;
-};
-
-&mac3 {
- status = "okay";
- pinctrl-names = "default";
- pinctrl-0 = <&pinctrl_rmii4_default>;
- use-ncsi;
- mlx,multi-host;
-};
-
-&fmc {
- status = "okay";
- flash@0 {
- status = "okay";
- m25p,fast-read;
- label = "bmc";
- spi-max-frequency = <50000000>;
-#include "openbmc-flash-layout-128.dtsi"
- };
- flash@1 {
- status = "okay";
- m25p,fast-read;
- label = "alt-bmc";
- spi-max-frequency = <50000000>;
- };
-};
-
-&rtc {
- status = "okay";
-};
-
-&sgpiom1 {
- status = "okay";
- ngpios = <128>;
- bus-frequency = <2000000>;
-};
-
-&i2c0 {
- status = "okay";
-};
-
-&i2c1 {
- status = "okay";
-
- temperature-sensor@4b {
- compatible = "ti,tmp75";
- reg = <0x4B>;
- };
-
- eeprom@51 {
- compatible = "atmel,24c128";
- reg = <0x51>;
- };
-};
-
-&i2c2 {
- status = "okay";
-
- i2c-mux@77 {
- compatible = "nxp,pca9548";
- reg = <0x77>;
- #address-cells = <1>;
- #size-cells = <0>;
- i2c-mux-idle-disconnect;
-
- i2c@0 {
- #address-cells = <1>;
- #size-cells = <0>;
- reg = <0>;
-
- eeprom@50 {
- compatible = "atmel,24c128";
- reg = <0x50>;
- };
- };
-
- i2c@1 {
- #address-cells = <1>;
- #size-cells = <0>;
- reg = <1>;
-
- eeprom@50 {
- compatible = "atmel,24c128";
- reg = <0x50>;
- };
- };
-
- i2c@2 {
- #address-cells = <1>;
- #size-cells = <0>;
- reg = <2>;
-
- eeprom@50 {
- compatible = "atmel,24c128";
- reg = <0x50>;
- };
- };
-
- i2c@3 {
- #address-cells = <1>;
- #size-cells = <0>;
- reg = <3>;
-
- eeprom@50 {
- compatible = "atmel,24c128";
- reg = <0x50>;
- };
- };
-
- i2c@4 {
- #address-cells = <1>;
- #size-cells = <0>;
- reg = <4>;
-
- eeprom@50 {
- compatible = "atmel,24c128";
- reg = <0x50>;
- };
- };
-
- i2c@5 {
- #address-cells = <1>;
- #size-cells = <0>;
- reg = <5>;
-
- eeprom@50 {
- compatible = "atmel,24c128";
- reg = <0x50>;
- };
- };
- };
-};
-
-&i2c3 {
- status = "okay";
-};
-
-&i2c4 {
- status = "okay";
-};
-
-&i2c5 {
- status = "okay";
-};
-
-&i2c6 {
- status = "okay";
-};
-
-&i2c7 {
- status = "okay";
-};
-
-&i2c8 {
- status = "okay";
-};
-
-&i2c9 {
- status = "okay";
-};
-
-&i2c10 {
- status = "okay";
-};
-
-&i2c11 {
- status = "okay";
-};
-
-&i2c12 {
- status = "okay";
-};
-
-&i2c13 {
- status = "okay";
-};
-
-&i2c14 {
- status = "okay";
- multi-master;
-
- ipmb@10 {
- compatible = "ipmb-dev";
- reg = <(0x10 | I2C_OWN_SLAVE_ADDRESS)>;
- i2c-protocol;
- };
-};
-
-&i2c15 {
- status = "okay";
-
- eeprom@50 {
- compatible = "atmel,24c128";
- reg = <0x50>;
- };
-};
-
-&adc0 {
- aspeed,int-vref-microvolt = <2500000>;
- status = "okay";
- pinctrl-0 = <&pinctrl_adc0_default &pinctrl_adc1_default
- &pinctrl_adc2_default &pinctrl_adc3_default
- &pinctrl_adc4_default &pinctrl_adc5_default
- &pinctrl_adc6_default &pinctrl_adc7_default>;
-};
-
-&adc1 {
- aspeed,int-vref-microvolt = <2500000>;
- status = "okay";
- pinctrl-0 = <&pinctrl_adc10_default>;
-};
-
-&ehci1 {
- status = "okay";
-};
-
-&uhci {
- status = "okay";
-};
diff --git a/arch/arm/boot/dts/aspeed/aspeed-bmc-facebook-minerva.dts b/arch/arm/boot/dts/aspeed/aspeed-bmc-facebook-minerva.dts
new file mode 100644
index 00000000000000..942e53d5c71409
--- /dev/null
+++ b/arch/arm/boot/dts/aspeed/aspeed-bmc-facebook-minerva.dts
@@ -0,0 +1,543 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright (c) 2023 Facebook Inc.
+/dts-v1/;
+
+#include "aspeed-g6.dtsi"
+#include <dt-bindings/gpio/aspeed-gpio.h>
+#include <dt-bindings/i2c/i2c.h>
+
+/ {
+ model = "Facebook Minerva CMM";
+ compatible = "facebook,minerva-cmc", "aspeed,ast2600";
+
+ aliases {
+ serial5 = &uart5;
+ /*
+ * PCA9548 (2-0077) provides 8 channels connecting to
+ * 6 pcs of FCB (Fan Controller Board).
+ */
+ i2c16 = &imux16;
+ i2c17 = &imux17;
+ i2c18 = &imux18;
+ i2c19 = &imux19;
+ i2c20 = &imux20;
+ i2c21 = &imux21;
+ };
+
+ chosen {
+ stdout-path = "serial5:57600n8";
+ };
+
+ memory@80000000 {
+ device_type = "memory";
+ reg = <0x80000000 0x80000000>;
+ };
+
+ iio-hwmon {
+ compatible = "iio-hwmon";
+ io-channels = <&adc0 0>, <&adc0 1>, <&adc0 2>, <&adc0 3>,
+ <&adc0 4>, <&adc0 5>, <&adc0 6>, <&adc0 7>,
+ <&adc1 2>;
+ };
+
+ leds {
+ compatible = "gpio-leds";
+
+ led-fan-fault {
+ label = "led-fan-fault";
+ gpios = <&leds_gpio 9 GPIO_ACTIVE_HIGH>;
+ default-state = "off";
+ };
+ };
+};
+
+&uart6 {
+ status = "okay";
+};
+
+&wdt1 {
+ status = "okay";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_wdtrst1_default>;
+ aspeed,reset-type = "soc";
+ aspeed,external-signal;
+ aspeed,ext-push-pull;
+ aspeed,ext-active-high;
+ aspeed,ext-pulse-duration = <256>;
+};
+
+&mac3 {
+ status = "okay";
+ phy-mode = "rmii";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_rmii4_default>;
+ fixed-link {
+ speed = <100>;
+ full-duplex;
+ };
+};
+
+&fmc {
+ status = "okay";
+ flash@0 {
+ status = "okay";
+ m25p,fast-read;
+ label = "bmc";
+ spi-max-frequency = <50000000>;
+#include "openbmc-flash-layout-128.dtsi"
+ };
+ flash@1 {
+ status = "okay";
+ m25p,fast-read;
+ label = "alt-bmc";
+ spi-max-frequency = <50000000>;
+ };
+};
+
+&rtc {
+ status = "okay";
+};
+
+&sgpiom0 {
+ status = "okay";
+ ngpios = <128>;
+ bus-frequency = <2000000>;
+};
+
+&i2c0 {
+ status = "okay";
+
+ power-monitor@40 {
+ compatible = "ti,ina230";
+ reg = <0x40>;
+ shunt-resistor = <1000>;
+ };
+
+ power-monitor@41 {
+ compatible = "ti,ina230";
+ reg = <0x41>;
+ shunt-resistor = <1000>;
+ };
+
+ power-monitor@67 {
+ compatible = "adi,ltc2945";
+ reg = <0x67>;
+ };
+
+ power-monitor@68 {
+ compatible = "adi,ltc2945";
+ reg = <0x68>;
+ };
+
+ leds_gpio: gpio@19 {
+ compatible = "nxp,pca9555";
+ reg = <0x19>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ };
+};
+
+&i2c1 {
+ status = "okay";
+
+ temperature-sensor@4b {
+ compatible = "ti,tmp75";
+ reg = <0x4b>;
+ };
+
+ temperature-sensor@48 {
+ compatible = "ti,tmp75";
+ reg = <0x48>;
+ };
+
+ eeprom@54 {
+ compatible = "atmel,24c128";
+ reg = <0x54>;
+ };
+};
+
+&i2c2 {
+ status = "okay";
+
+ i2c-mux@77 {
+ compatible = "nxp,pca9548";
+ reg = <0x77>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ i2c-mux-idle-disconnect;
+
+ imux16: i2c@0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0>;
+
+ eeprom@50 {
+ compatible = "atmel,24c128";
+ reg = <0x50>;
+ };
+
+ pwm@5e{
+ compatible = "max31790";
+ reg = <0x5e>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ };
+ };
+
+ imux17: i2c@1 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <1>;
+
+ eeprom@50 {
+ compatible = "atmel,24c128";
+ reg = <0x50>;
+ };
+
+ pwm@5e{
+ compatible = "max31790";
+ reg = <0x5e>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ };
+ };
+
+ imux18: i2c@2 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <2>;
+
+ eeprom@50 {
+ compatible = "atmel,24c128";
+ reg = <0x50>;
+ };
+
+ pwm@5e{
+ compatible = "max31790";
+ reg = <0x5e>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ };
+ };
+
+ imux19: i2c@3 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <3>;
+
+ eeprom@50 {
+ compatible = "atmel,24c128";
+ reg = <0x50>;
+ };
+
+ pwm@5e{
+ compatible = "max31790";
+ reg = <0x5e>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ };
+ };
+
+ imux20: i2c@4 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <4>;
+
+ eeprom@50 {
+ compatible = "atmel,24c128";
+ reg = <0x50>;
+ };
+
+ pwm@5e{
+ compatible = "max31790";
+ reg = <0x5e>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ };
+ };
+
+ imux21: i2c@5 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <5>;
+
+ eeprom@50 {
+ compatible = "atmel,24c128";
+ reg = <0x50>;
+ };
+
+ pwm@5e{
+ compatible = "max31790";
+ reg = <0x5e>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ };
+ };
+ };
+};
+
+&i2c3 {
+ status = "okay";
+};
+
+&i2c4 {
+ status = "okay";
+};
+
+&i2c5 {
+ status = "okay";
+};
+
+&i2c6 {
+ status = "okay";
+};
+
+&i2c7 {
+ status = "okay";
+};
+
+&i2c8 {
+ status = "okay";
+};
+
+&i2c9 {
+ status = "okay";
+};
+
+&i2c10 {
+ status = "okay";
+};
+
+&i2c11 {
+ status = "okay";
+};
+
+&i2c12 {
+ status = "okay";
+};
+
+&i2c13 {
+ status = "okay";
+};
+
+&i2c14 {
+ status = "okay";
+ multi-master;
+
+ ipmb@10 {
+ compatible = "ipmb-dev";
+ reg = <(0x10 | I2C_OWN_SLAVE_ADDRESS)>;
+ i2c-protocol;
+ };
+};
+
+&i2c15 {
+ status = "okay";
+
+ eeprom@50 {
+ compatible = "atmel,24c128";
+ reg = <0x50>;
+ };
+};
+
+&adc0 {
+ aspeed,int-vref-microvolt = <2500000>;
+ status = "okay";
+ pinctrl-0 = <&pinctrl_adc0_default &pinctrl_adc1_default
+ &pinctrl_adc2_default &pinctrl_adc3_default
+ &pinctrl_adc4_default &pinctrl_adc5_default
+ &pinctrl_adc6_default &pinctrl_adc7_default>;
+};
+
+&adc1 {
+ aspeed,int-vref-microvolt = <2500000>;
+ status = "okay";
+ pinctrl-0 = <&pinctrl_adc10_default>;
+};
+
+&ehci1 {
+ status = "okay";
+};
+
+&uhci {
+ status = "okay";
+};
+
+&gpio0 {
+ gpio-line-names =
+ /*A0-A7*/ "","","","","","","","",
+ /*B0-B7*/ "","","","","","","","",
+ /*C0-C7*/ "","","","","BLADE_UART_SEL2","","","",
+ /*D0-D7*/ "","","","","","","","",
+ /*E0-E7*/ "","","","","","","","",
+ /*F0-F7*/ "","","","","","","","",
+ /*G0-G7*/ "","","","","","","","",
+ /*H0-H7*/ "","","","","","","","",
+ /*I0-I7*/ "","","","","","","","",
+ /*J0-J7*/ "","","","","","","","",
+ /*K0-K7*/ "","","","","","","","",
+ /*L0-L7*/ "","","","","BLADE_UART_SEL0","","","",
+ /*M0-M7*/ "","","","","","BLADE_UART_SEL1","","",
+ /*N0-N7*/ "","","","","","","","",
+ /*O0-O7*/ "","","","","","","","",
+ /*P0-P7*/ "","","","","","","","",
+ /*Q0-Q7*/ "","","","","","","","",
+ /*R0-R7*/ "","","","","","","","",
+ /*S0-S7*/ "","","","","","","","",
+ /*T0-T7*/ "","","","","","","","",
+ /*U0-U7*/ "","","","","","","","",
+ /*V0-V7*/ "","","","","BAT_DETECT","","","",
+ /*W0-W7*/ "","","","","","","","",
+ /*X0-X7*/ "","","BLADE_UART_SEL3","","","","","",
+ /*Y0-Y7*/ "","","","","","","","",
+ /*Z0-Z7*/ "","","","","","","","";
+};
+
+&sgpiom0 {
+ gpio-line-names =
+ /*"input pin","output pin"*/
+ /*A0 - A7*/
+ "PRSNT_MTIA_BLADE0_N","PWREN_MTIA_BLADE0_EN",
+ "PRSNT_MTIA_BLADE1_N","PWREN_MTIA_BLADE1_EN",
+ "PRSNT_MTIA_BLADE2_N","PWREN_MTIA_BLADE2_EN",
+ "PRSNT_MTIA_BLADE3_N","PWREN_MTIA_BLADE3_EN",
+ "PRSNT_MTIA_BLADE4_N","PWREN_MTIA_BLADE4_EN",
+ "PRSNT_MTIA_BLADE5_N","PWREN_MTIA_BLADE5_EN",
+ "PRSNT_MTIA_BLADE6_N","PWREN_MTIA_BLADE6_EN",
+ "PRSNT_MTIA_BLADE7_N","PWREN_MTIA_BLADE7_EN",
+ /*B0 - B7*/
+ "PRSNT_MTIA_BLADE8_N","PWREN_MTIA_BLADE8_EN",
+ "PRSNT_MTIA_BLADE9_N","PWREN_MTIA_BLADE9_EN",
+ "PRSNT_MTIA_BLADE10_N","PWREN_MTIA_BLADE10_EN",
+ "PRSNT_MTIA_BLADE11_N","PWREN_MTIA_BLADE11_EN",
+ "PRSNT_MTIA_BLADE12_N","PWREN_MTIA_BLADE12_EN",
+ "PRSNT_MTIA_BLADE13_N","PWREN_MTIA_BLADE13_EN",
+ "PRSNT_MTIA_BLADE14_N","PWREN_MTIA_BLADE14_EN",
+ "PRSNT_MTIA_BLADE15_N","PWREN_MTIA_BLADE15_EN",
+ /*C0 - C7*/
+ "PRSNT_NW_BLADE0_N","PWREN_NW_BLADE0_EN",
+ "PRSNT_NW_BLADE1_N","PWREN_NW_BLADE1_EN",
+ "PRSNT_NW_BLADE2_N","PWREN_NW_BLADE2_EN",
+ "PRSNT_NW_BLADE3_N","PWREN_NW_BLADE3_EN",
+ "PRSNT_NW_BLADE4_N","PWREN_NW_BLADE4_EN",
+ "PRSNT_NW_BLADE5_N","PWREN_NW_BLADE5_EN",
+ "PRSNT_FCB_TOP_0_N","PWREN_MTIA_BLADE0_HSC_EN",
+ "PRSNT_FCB_TOP_1_N","PWREN_MTIA_BLADE1_HSC_EN",
+ /*D0 - D7*/
+ "PRSNT_FCB_MIDDLE_0_N","PWREN_MTIA_BLADE2_HSC_EN",
+ "PRSNT_FCB_MIDDLE_1_N","PWREN_MTIA_BLADE3_HSC_EN",
+ "PRSNT_FCB_BOTTOM_0_N","PWREN_MTIA_BLADE4_HSC_EN",
+ "PRSNT_FCB_BOTTOM_1_N","PWREN_MTIA_BLADE5_HSC_EN",
+ "PWRGD_MTIA_BLADE0_PWROK_L_BUF","PWREN_MTIA_BLADE6_HSC_EN",
+ "PWRGD_MTIA_BLADE1_PWROK_L_BUF","PWREN_MTIA_BLADE7_HSC_EN",
+ "PWRGD_MTIA_BLADE2_PWROK_L_BUF","PWREN_MTIA_BLADE8_HSC_EN",
+ "PWRGD_MTIA_BLADE3_PWROK_L_BUF","PWREN_MTIA_BLADE9_HSC_EN",
+ /*E0 - E7*/
+ "PWRGD_MTIA_BLADE4_PWROK_L_BUF","PWREN_MTIA_BLADE10_HSC_EN",
+ "PWRGD_MTIA_BLADE5_PWROK_L_BUF","PWREN_MTIA_BLADE11_HSC_EN",
+ "PWRGD_MTIA_BLADE6_PWROK_L_BUF","PWREN_MTIA_BLADE12_HSC_EN",
+ "PWRGD_MTIA_BLADE7_PWROK_L_BUF","PWREN_MTIA_BLADE13_HSC_EN",
+ "PWRGD_MTIA_BLADE8_PWROK_L_BUF","PWREN_MTIA_BLADE14_HSC_EN",
+ "PWRGD_MTIA_BLADE9_PWROK_L_BUF","PWREN_MTIA_BLADE15_HSC_EN",
+ "PWRGD_MTIA_BLADE10_PWROK_L_BUF","PWREN_NW_BLADE0_HSC_EN",
+ "PWRGD_MTIA_BLADE11_PWROK_L_BUF","PWREN_NW_BLADE1_HSC_EN",
+ /*F0 - F7*/
+ "PWRGD_MTIA_BLADE12_PWROK_L_BUF","PWREN_NW_BLADE2_HSC_EN",
+ "PWRGD_MTIA_BLADE13_PWROK_L_BUF","PWREN_NW_BLADE3_HSC_EN",
+ "PWRGD_MTIA_BLADE14_PWROK_L_BUF","PWREN_NW_BLADE4_HSC_EN",
+ "PWRGD_MTIA_BLADE15_PWROK_L_BUF","PWREN_NW_BLADE5_HSC_EN",
+ "PWRGD_NW_BLADE0_PWROK_L_BUF","PWREN_FCB_TOP_L_EN",
+ "PWRGD_NW_BLADE1_PWROK_L_BUF","PWREN_FCB_TOP_R_EN",
+ "PWRGD_NW_BLADE2_PWROK_L_BUF","PWREN_FCB_MIDDLE_L_EN",
+ "PWRGD_NW_BLADE3_PWROK_L_BUF","PWREN_FCB_MIDDLE_R_EN",
+ /*G0 - G7*/
+ "PWRGD_NW_BLADE4_PWROK_L_BUF","PWREN_FCB_BOTTOM_L_EN",
+ "PWRGD_NW_BLADE5_PWROK_L_BUF","PWREN_FCB_BOTTOM_R_EN",
+ "PWRGD_FCB_TOP_0_PWROK_L_BUF","FM_CMM_AC_CYCLE_N",
+ "PWRGD_FCB_TOP_1_PWROK_L_BUF","MGMT_SFP_TX_DIS",
+ "PWRGD_FCB_MIDDLE_0_PWROK_L_BUF","",
+ "PWRGD_FCB_MIDDLE_1_PWROK_L_BUF","RST_I2CRST_MTIA_BLADE0_1_N",
+ "PWRGD_FCB_BOTTOM_0_PWROK_L_BUF","RST_I2CRST_MTIA_BLADE2_3_N",
+ "PWRGD_FCB_BOTTOM_1_PWROK_L_BUF","RST_I2CRST_MTIA_BLADE4_5_N",
+ /*H0 - H7*/
+ "LEAK_DETECT_MTIA_BLADE0_N_BUF","RST_I2CRST_MTIA_BLADE6_7_N",
+ "LEAK_DETECT_MTIA_BLADE1_N_BUF","RST_I2CRST_MTIA_BLADE8_9_N",
+ "LEAK_DETECT_MTIA_BLADE2_N_BUF","RST_I2CRST_MTIA_BLADE10_11_N",
+ "LEAK_DETECT_MTIA_BLADE3_N_BUF","RST_I2CRST_MTIA_BLADE12_13_N",
+ "LEAK_DETECT_MTIA_BLADE4_N_BUF","RST_I2CRST_MTIA_BLADE14_15_N",
+ "LEAK_DETECT_MTIA_BLADE5_N_BUF","RST_I2CRST_NW_BLADE0_1_2_N",
+ "LEAK_DETECT_MTIA_BLADE6_N_BUF","RST_I2CRST_NW_BLADE3_4_5_N",
+ "LEAK_DETECT_MTIA_BLADE7_N_BUF","RST_I2CRST_FCB_N",
+ /*I0 - I7*/
+ "LEAK_DETECT_MTIA_BLADE8_N_BUF","RST_I2CRST_FCB_B_L_N",
+ "LEAK_DETECT_MTIA_BLADE9_N_BUF","RST_I2CRST_FCB_B_R_N",
+ "LEAK_DETECT_MTIA_BLADE10_N_BUF","RST_I2CRST_FCB_M_L_N",
+ "LEAK_DETECT_MTIA_BLADE11_N_BUF","RST_I2CRST_FCB_M_R_N",
+ "LEAK_DETECT_MTIA_BLADE12_N_BUF","RST_I2CRST_FCB_T_L_N",
+ "LEAK_DETECT_MTIA_BLADE13_N_BUF","RST_I2CRST_FCB_T_R_N",
+ "LEAK_DETECT_MTIA_BLADE14_N_BUF","BMC_READY",
+ "LEAK_DETECT_MTIA_BLADE15_N_BUF","wFM_88E6393X_BIN_UPDATE_EN_N",
+ /*J0 - J7*/
+ "LEAK_DETECT_NW_BLADE0_N_BUF","WATER_VALVE_CLOSED_N",
+ "LEAK_DETECT_NW_BLADE1_N_BUF","",
+ "LEAK_DETECT_NW_BLADE2_N_BUF","",
+ "LEAK_DETECT_NW_BLADE3_N_BUF","",
+ "LEAK_DETECT_NW_BLADE4_N_BUF","",
+ "LEAK_DETECT_NW_BLADE5_N_BUF","",
+ "MTIA_BLADE0_STATUS_LED","",
+ "MTIA_BLADE1_STATUS_LED","",
+ /*K0 - K7*/
+ "MTIA_BLADE2_STATUS_LED","",
+ "MTIA_BLADE3_STATUS_LED","",
+ "MTIA_BLADE4_STATUS_LED","",
+ "MTIA_BLADE5_STATUS_LED","",
+ "MTIA_BLADE6_STATUS_LED","",
+ "MTIA_BLADE7_STATUS_LED","",
+ "MTIA_BLADE8_STATUS_LED","",
+ "MTIA_BLADE9_STATUS_LED","",
+ /*L0 - L7*/
+ "MTIA_BLADE10_STATUS_LED","",
+ "MTIA_BLADE11_STATUS_LED","",
+ "MTIA_BLADE12_STATUS_LED","",
+ "MTIA_BLADE13_STATUS_LED","",
+ "MTIA_BLADE14_STATUS_LED","",
+ "MTIA_BLADE15_STATUS_LED","",
+ "NW_BLADE0_STATUS_LED","",
+ "NW_BLADE1_STATUS_LED","",
+ /*M0 - M7*/
+ "NW_BLADE2_STATUS_LED","",
+ "NW_BLADE3_STATUS_LED","",
+ "NW_BLADE4_STATUS_LED","",
+ "NW_BLADE5_STATUS_LED","",
+ "RPU_READY","",
+ "IT_GEAR_RPU_LINK_N","",
+ "IT_GEAR_LEAK","",
+ "WATER_VALVE_CLOSED_N","",
+ /*N0 - N7*/
+ "VALVE_STS0","",
+ "VALVE_STS1","",
+ "VALVE_STS2","",
+ "VALVE_STS3","",
+ "CR_TOGGLE_BOOT_BUF_N","",
+ "CMM_LC_RDY_LED_N","",
+ "CMM_LC_UNRDY_LED_N","",
+ "CMM_CABLE_CARTRIDGE_PRSNT_BOT_N","",
+ /*O0 - O7*/
+ "CMM_CABLE_CARTRIDGE_PRSNT_TOP_N","",
+ "BOT_BCB_CABLE_PRSNT_N","",
+ "TOP_BCB_CABLE_PRSNT_N","",
+ "CHASSIS0_LEAK_Q_N","",
+ "CHASSIS1_LEAK_Q_N","",
+ "LEAK0_DETECT","",
+ "LEAK1_DETECT","",
+ "MGMT_SFP_PRSNT_N","",
+ /*P0 - P7*/
+ "MGMT_SFP_TX_FAULT","",
+ "MGMT_SFP_RX_LOS","",
+ "","",
+ "","",
+ "","",
+ "","",
+ "","",
+ "","";
+};
diff --git a/arch/arm/boot/dts/aspeed/aspeed-bmc-ibm-system1.dts b/arch/arm/boot/dts/aspeed/aspeed-bmc-ibm-system1.dts
new file mode 100644
index 00000000000000..dcbc16308ab50b
--- /dev/null
+++ b/arch/arm/boot/dts/aspeed/aspeed-bmc-ibm-system1.dts
@@ -0,0 +1,1623 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+// Copyright 2023 IBM Corp.
+/dts-v1/;
+
+#include "aspeed-g6.dtsi"
+#include <dt-bindings/gpio/aspeed-gpio.h>
+#include <dt-bindings/i2c/i2c.h>
+#include <dt-bindings/leds/leds-pca955x.h>
+
+/ {
+ model = "System1";
+ compatible = "ibm,system1-bmc", "aspeed,ast2600";
+
+ aliases {
+ i2c16 = &i2c8mux1chn0;
+ i2c17 = &i2c8mux1chn1;
+ i2c18 = &i2c8mux1chn2;
+ i2c19 = &i2c8mux1chn3;
+ i2c20 = &i2c8mux1chn4;
+ i2c21 = &i2c8mux1chn5;
+ i2c22 = &i2c8mux1chn6;
+ i2c23 = &i2c8mux1chn7;
+ i2c24 = &i2c3mux0chn0;
+ i2c25 = &i2c3mux0chn1;
+ i2c26 = &i2c3mux0chn2;
+ i2c27 = &i2c3mux0chn3;
+ i2c28 = &i2c3mux0chn4;
+ i2c29 = &i2c3mux0chn5;
+ i2c30 = &i2c3mux0chn6;
+ i2c31 = &i2c3mux0chn7;
+ i2c32 = &i2c6mux0chn0;
+ i2c33 = &i2c6mux0chn1;
+ i2c34 = &i2c6mux0chn2;
+ i2c35 = &i2c6mux0chn3;
+ i2c36 = &i2c6mux0chn4;
+ i2c37 = &i2c6mux0chn5;
+ i2c38 = &i2c6mux0chn6;
+ i2c39 = &i2c6mux0chn7;
+ i2c40 = &i2c7mux0chn0;
+ i2c41 = &i2c7mux0chn1;
+ i2c42 = &i2c7mux0chn2;
+ i2c43 = &i2c7mux0chn3;
+ i2c44 = &i2c7mux0chn4;
+ i2c45 = &i2c7mux0chn5;
+ i2c46 = &i2c7mux0chn6;
+ i2c47 = &i2c7mux0chn7;
+ i2c48 = &i2c8mux0chn0;
+ i2c49 = &i2c8mux0chn1;
+ i2c50 = &i2c8mux0chn2;
+ i2c51 = &i2c8mux0chn3;
+ i2c52 = &i2c8mux0chn4;
+ i2c53 = &i2c8mux0chn5;
+ i2c54 = &i2c8mux0chn6;
+ i2c55 = &i2c8mux0chn7;
+ i2c56 = &i2c14mux0chn0;
+ i2c57 = &i2c14mux0chn1;
+ i2c58 = &i2c14mux0chn2;
+ i2c59 = &i2c14mux0chn3;
+ i2c60 = &i2c14mux0chn4;
+ i2c61 = &i2c14mux0chn5;
+ i2c62 = &i2c14mux0chn6;
+ i2c63 = &i2c14mux0chn7;
+ i2c64 = &i2c15mux0chn0;
+ i2c65 = &i2c15mux0chn1;
+ i2c66 = &i2c15mux0chn2;
+ i2c67 = &i2c15mux0chn3;
+ i2c68 = &i2c15mux0chn4;
+ i2c69 = &i2c15mux0chn5;
+ i2c70 = &i2c15mux0chn6;
+ i2c71 = &i2c15mux0chn7;
+ };
+
+ chosen {
+ stdout-path = "uart5:115200n8";
+ };
+
+ memory@80000000 {
+ device_type = "memory";
+ reg = <0x80000000 0x40000000>;
+ };
+
+ reserved-memory {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges;
+
+ eventlog: tcg-event-log@b3d00000 {
+ no-map;
+ reg = <0xb3d00000 0x100000>;
+ };
+
+ ramoops@b3e00000 {
+ compatible = "ramoops";
+ reg = <0xb3e00000 0x200000>; /* 16 * (4 * 0x8000) */
+ record-size = <0x8000>;
+ console-size = <0x8000>;
+ ftrace-size = <0x8000>;
+ pmsg-size = <0x8000>;
+ max-reason = <3>; /* KMSG_DUMP_EMERG */
+ };
+
+ /* LPC FW cycle bridge region requires natural alignment */
+ flash_memory: region@b4000000 {
+ no-map;
+ reg = <0xb4000000 0x04000000>; /* 64M */
+ };
+
+ /* VGA region is dictated by hardware strapping */
+ vga_memory: region@bf000000 {
+ no-map;
+ compatible = "shared-dma-pool";
+ reg = <0xbf000000 0x01000000>; /* 16M */
+ };
+ };
+
+ leds {
+ compatible = "gpio-leds";
+
+ led-0 {
+ gpios = <&gpio0 ASPEED_GPIO(L, 7) GPIO_ACTIVE_HIGH>;
+ };
+
+ led-1 {
+ gpios = <&gpio0 ASPEED_GPIO(P, 7) GPIO_ACTIVE_HIGH>;
+ };
+
+ led-2 {
+ gpios = <&gpio0 ASPEED_GPIO(S, 6) GPIO_ACTIVE_HIGH>;
+ };
+
+ led-3 {
+ gpios = <&gpio0 ASPEED_GPIO(S, 7) GPIO_ACTIVE_HIGH>;
+ };
+
+ led-4 {
+ gpios = <&pca3 5 GPIO_ACTIVE_LOW>;
+ };
+
+ led-5 {
+ gpios = <&pca3 6 GPIO_ACTIVE_LOW>;
+ };
+
+ led-6 {
+ gpios = <&pca3 7 GPIO_ACTIVE_LOW>;
+ };
+
+ led-7 {
+ gpios = <&pca3 8 GPIO_ACTIVE_LOW>;
+ };
+
+ led-8 {
+ gpios = <&pca3 9 GPIO_ACTIVE_LOW>;
+ };
+
+ led-9 {
+ gpios = <&pca3 10 GPIO_ACTIVE_LOW>;
+ };
+
+ led-a {
+ gpios = <&pca3 11 GPIO_ACTIVE_LOW>;
+ };
+
+ led-b {
+ gpios = <&pca4 4 GPIO_ACTIVE_HIGH>;
+ };
+
+ led-c {
+ gpios = <&pca4 5 GPIO_ACTIVE_HIGH>;
+ };
+
+ led-d {
+ gpios = <&pca4 6 GPIO_ACTIVE_HIGH>;
+ };
+
+ led-e {
+ gpios = <&pca4 7 GPIO_ACTIVE_HIGH>;
+ };
+ };
+
+ gpio-keys-polled {
+ compatible = "gpio-keys-polled";
+ poll-interval = <1000>;
+
+ event-nvme0-presence {
+ label = "nvme0-presence";
+ gpios = <&pca4 0 GPIO_ACTIVE_LOW>;
+ linux,code = <0>;
+ };
+
+ event-nvme1-presence {
+ label = "nvme1-presence";
+ gpios = <&pca4 1 GPIO_ACTIVE_LOW>;
+ linux,code = <1>;
+ };
+
+ event-nvme2-presence {
+ label = "nvme2-presence";
+ gpios = <&pca4 2 GPIO_ACTIVE_LOW>;
+ linux,code = <2>;
+ };
+
+ event-nvme3-presence {
+ label = "nvme3-presence";
+ gpios = <&pca4 3 GPIO_ACTIVE_LOW>;
+ linux,code = <3>;
+ };
+ };
+
+ iio-hwmon {
+ compatible = "iio-hwmon";
+ io-channels = <&p12v_vd 0>, <&p5v_aux_vd 0>,
+ <&p5v_bmc_aux_vd 0>, <&p3v3_aux_vd 0>,
+ <&p3v3_bmc_aux_vd 0>, <&p1v8_bmc_aux_vd 0>,
+ <&adc1 4>, <&adc0 2>, <&adc1 0>,
+ <&p2v5_aux_vd 0>, <&adc1 7>;
+ };
+
+ p12v_vd: voltage-divider1 {
+ compatible = "voltage-divider";
+ io-channels = <&adc1 3>;
+ #io-channel-cells = <1>;
+
+ /*
+ * Scale the system voltage by 1127/127 to fit the ADC range.
+ * Use small nominator to prevent integer overflow.
+ */
+ output-ohms = <15>;
+ full-ohms = <133>;
+ };
+
+ p5v_aux_vd: voltage-divider2 {
+ compatible = "voltage-divider";
+ io-channels = <&adc1 5>;
+ #io-channel-cells = <1>;
+
+ /*
+ * Scale the system voltage by 1365/365 to fit the ADC range.
+ * Use small nominator to prevent integer overflow.
+ */
+ output-ohms = <50>;
+ full-ohms = <187>;
+ };
+
+ p5v_bmc_aux_vd: voltage-divider3 {
+ compatible = "voltage-divider";
+ io-channels = <&adc0 3>;
+ #io-channel-cells = <1>;
+
+ /*
+ * Scale the system voltage by 1365/365 to fit the ADC range.
+ * Use small nominator to prevent integer overflow.
+ */
+ output-ohms = <50>;
+ full-ohms = <187>;
+ };
+
+ p3v3_aux_vd: voltage-divider4 {
+ compatible = "voltage-divider";
+ io-channels = <&adc1 2>;
+ #io-channel-cells = <1>;
+
+ /*
+ * Scale the system voltage by 1698/698 to fit the ADC range.
+ * Use small nominator to prevent integer overflow.
+ */
+ output-ohms = <14>;
+ full-ohms = <34>;
+ };
+
+ p3v3_bmc_aux_vd: voltage-divider5 {
+ compatible = "voltage-divider";
+ io-channels = <&adc0 7>;
+ #io-channel-cells = <1>;
+
+ /*
+ * Scale the system voltage by 1698/698 to fit the ADC range.
+ * Use small nominator to prevent integer overflow.
+ */
+ output-ohms = <14>;
+ full-ohms = <34>;
+ };
+
+ p1v8_bmc_aux_vd: voltage-divider6 {
+ compatible = "voltage-divider";
+ io-channels = <&adc0 6>;
+ #io-channel-cells = <1>;
+
+ /*
+ * Scale the system voltage by 4000/3000 to fit the ADC range.
+ * Use small nominator to prevent integer overflow.
+ */
+ output-ohms = <3>;
+ full-ohms = <4>;
+ };
+
+ p2v5_aux_vd: voltage-divider7 {
+ compatible = "voltage-divider";
+ io-channels = <&adc1 1>;
+ #io-channel-cells = <1>;
+
+ /*
+ * Scale the system voltage by 2100/1100 to fit the ADC range.
+ * Use small nominator to prevent integer overflow.
+ */
+ output-ohms = <11>;
+ full-ohms = <21>;
+ };
+
+ p1v8_bmc_aux: fixedregulator-p1v8-bmc-aux {
+ compatible = "regulator-fixed";
+ regulator-name = "p1v8_bmc_aux";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-always-on;
+ };
+};
+
+&adc0 {
+ status = "okay";
+ vref-supply = <&p1v8_bmc_aux>;
+
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_adc0_default
+ &pinctrl_adc1_default
+ &pinctrl_adc2_default
+ &pinctrl_adc3_default
+ &pinctrl_adc4_default
+ &pinctrl_adc5_default
+ &pinctrl_adc6_default
+ &pinctrl_adc7_default>;
+};
+
+&adc1 {
+ status = "okay";
+ vref-supply = <&p1v8_bmc_aux>;
+ aspeed,battery-sensing;
+
+ aspeed,int-vref-microvolt = <2500000>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_adc8_default
+ &pinctrl_adc9_default
+ &pinctrl_adc10_default
+ &pinctrl_adc11_default
+ &pinctrl_adc12_default
+ &pinctrl_adc13_default
+ &pinctrl_adc14_default
+ &pinctrl_adc15_default>;
+};
+
+&ehci1 {
+ status = "okay";
+};
+
+&uhci {
+ status = "okay";
+};
+
+&gpio0 {
+ gpio-line-names =
+ /*A0-A7*/ "","","","","","","","",
+ /*B0-B7*/ "","","","","bmc-tpm-reset","","","",
+ /*C0-C7*/ "","","","","","","","",
+ /*D0-D7*/ "","","","","","","","",
+ /*E0-E7*/ "","","","","","","","",
+ /*F0-F7*/ "","","","","","","","",
+ /*G0-G7*/ "","","","","","","","",
+ /*H0-H7*/ "","","","","","","","",
+ /*I0-I7*/ "","","","","","","","",
+ /*J0-J7*/ "","","","","","","","",
+ /*K0-K7*/ "","","","","","","","",
+ /*L0-L7*/ "","","","","","","","bmc-ready",
+ /*M0-M7*/ "","","","","","","","",
+ /*N0-N7*/ "","","","","","","","",
+ /*O0-O7*/ "","","","","","","","",
+ /*P0-P7*/ "","","","","","","","bmc-hb",
+ /*Q0-Q7*/ "","","","","","","","",
+ /*R0-R7*/ "","","","","","","","",
+ /*S0-S7*/ "","","","","","","rear-enc-fault0","rear-enc-id0",
+ /*T0-T7*/ "","","","","","","","",
+ /*U0-U7*/ "","","","","","","","",
+ /*V0-V7*/ "","rtc-battery-voltage-read-enable","","power-chassis-control","","","","",
+ /*W0-W7*/ "","","","","","","","",
+ /*X0-X7*/ "","power-chassis-good","","","","","","",
+ /*Y0-Y7*/ "","","","","","","","",
+ /*Z0-Z7*/ "","","","","","","","";
+};
+
+&emmc_controller {
+ status = "okay";
+};
+
+&pinctrl_emmc_default {
+ bias-disable;
+};
+
+&emmc {
+ status = "okay";
+ clk-phase-mmc-hs200 = <180>, <180>;
+};
+
+&ibt {
+ status = "okay";
+};
+
+&uart2 {
+ status = "okay";
+};
+
+&vuart1 {
+ status = "okay";
+};
+
+&vuart2 {
+ status = "okay";
+};
+
+&lpc_ctrl {
+ status = "okay";
+ memory-region = <&flash_memory>;
+};
+
+&mac2 {
+ status = "okay";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_rmii3_default>;
+ clocks = <&syscon ASPEED_CLK_GATE_MAC3CLK>,
+ <&syscon ASPEED_CLK_MAC3RCLK>;
+ clock-names = "MACCLK", "RCLK";
+ use-ncsi;
+};
+
+&mac3 {
+ status = "okay";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_rmii4_default>;
+ clocks = <&syscon ASPEED_CLK_GATE_MAC4CLK>,
+ <&syscon ASPEED_CLK_MAC4RCLK>;
+ clock-names = "MACCLK", "RCLK";
+ use-ncsi;
+};
+
+&wdt1 {
+ aspeed,reset-type = "none";
+ aspeed,external-signal;
+ aspeed,ext-push-pull;
+ aspeed,ext-active-high;
+
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_wdtrst1_default>;
+};
+
+&wdt2 {
+ status = "okay";
+};
+
+&kcs2 {
+ status = "okay";
+ aspeed,lpc-io-reg = <0xca8 0xcac>;
+};
+
+&kcs3 {
+ status = "okay";
+ aspeed,lpc-io-reg = <0xca2>;
+ aspeed,lpc-interrupts = <11 IRQ_TYPE_LEVEL_LOW>;
+};
+
+&i2c0 {
+ status = "okay";
+
+ eeprom@50 {
+ compatible = "atmel,24c64";
+ reg = <0x50>;
+ };
+
+ regulator@60 {
+ compatible = "maxim,max8952";
+ reg = <0x60>;
+
+ max8952,default-mode = <0>;
+ max8952,dvs-mode-microvolt = <1250000>, <1200000>,
+ <1050000>, <950000>;
+ max8952,sync-freq = <0>;
+ max8952,ramp-speed = <0>;
+
+ regulator-name = "VR_v77_1v4";
+ regulator-min-microvolt = <770000>;
+ regulator-max-microvolt = <1400000>;
+ regulator-always-on;
+ regulator-boot-on;
+ };
+};
+
+&i2c1 {
+ status = "okay";
+
+ regulator@42 {
+ compatible = "infineon,ir38263";
+ reg = <0x42>;
+ };
+
+ led-controller@60 {
+ compatible = "nxp,pca9552";
+ reg = <0x60>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ gpio-controller;
+ #gpio-cells = <2>;
+
+ led@0 {
+ label = "nic1-perst";
+ reg = <0>;
+ retain-state-shutdown;
+ default-state = "keep";
+ type = <PCA955X_TYPE_LED>;
+ };
+
+ led@1 {
+ label = "bmc-perst";
+ reg = <1>;
+ retain-state-shutdown;
+ default-state = "keep";
+ type = <PCA955X_TYPE_LED>;
+ };
+
+ led@2 {
+ label = "reset-M2-SSD1-2-perst";
+ reg = <2>;
+ retain-state-shutdown;
+ default-state = "keep";
+ type = <PCA955X_TYPE_LED>;
+ };
+
+ led@3 {
+ label = "pcie-perst1";
+ reg = <3>;
+ retain-state-shutdown;
+ default-state = "keep";
+ type = <PCA955X_TYPE_LED>;
+ };
+
+ led@4 {
+ label = "pcie-perst2";
+ reg = <4>;
+ retain-state-shutdown;
+ default-state = "keep";
+ type = <PCA955X_TYPE_LED>;
+ };
+
+ led@5 {
+ label = "pcie-perst3";
+ reg = <5>;
+ retain-state-shutdown;
+ default-state = "keep";
+ type = <PCA955X_TYPE_LED>;
+ };
+
+ led@6 {
+ label = "pcie-perst4";
+ reg = <6>;
+ retain-state-shutdown;
+ default-state = "keep";
+ type = <PCA955X_TYPE_LED>;
+ };
+
+ led@7 {
+ label = "pcie-perst5";
+ reg = <7>;
+ retain-state-shutdown;
+ default-state = "keep";
+ type = <PCA955X_TYPE_LED>;
+ };
+
+ led@8 {
+ label = "pcie-perst6";
+ reg = <8>;
+ retain-state-shutdown;
+ default-state = "keep";
+ type = <PCA955X_TYPE_LED>;
+ };
+
+ led@9 {
+ label = "pcie-perst7";
+ reg = <9>;
+ retain-state-shutdown;
+ default-state = "keep";
+ type = <PCA955X_TYPE_LED>;
+ };
+
+ led@10 {
+ label = "pcie-perst8";
+ reg = <10>;
+ retain-state-shutdown;
+ default-state = "keep";
+ type = <PCA955X_TYPE_LED>;
+ };
+
+ led@11 {
+ label = "PV-cp0-sw1stk4-perst";
+ reg = <11>;
+ retain-state-shutdown;
+ default-state = "keep";
+ type = <PCA955X_TYPE_LED>;
+ };
+
+ led@12 {
+ label = "PV-cp0-sw1stk5-perst";
+ reg = <12>;
+ retain-state-shutdown;
+ default-state = "keep";
+ type = <PCA955X_TYPE_LED>;
+ };
+
+ led@13 {
+ label = "pe-cp-drv0-perst";
+ reg = <13>;
+ retain-state-shutdown;
+ default-state = "keep";
+ type = <PCA955X_TYPE_LED>;
+ };
+
+ led@14 {
+ label = "pe-cp-drv1-perst";
+ reg = <14>;
+ retain-state-shutdown;
+ default-state = "keep";
+ type = <PCA955X_TYPE_LED>;
+ };
+
+ led@15 {
+ label = "lom-perst";
+ reg = <15>;
+ retain-state-shutdown;
+ default-state = "keep";
+ type = <PCA955X_TYPE_LED>;
+ };
+ };
+
+ gpio@74 {
+ compatible = "nxp,pca9539";
+ reg = <0x74>;
+
+ gpio-controller;
+ #gpio-cells = <2>;
+
+ gpio-line-names =
+ "PLUG_DETECT_PCIE_J101_N",
+ "PLUG_DETECT_PCIE_J102_N",
+ "PLUG_DETECT_PCIE_J103_N",
+ "PLUG_DETECT_PCIE_J104_N",
+ "PLUG_DETECT_PCIE_J105_N",
+ "PLUG_DETECT_PCIE_J106_N",
+ "PLUG_DETECT_PCIE_J107_N",
+ "PLUG_DETECT_PCIE_J108_N",
+ "PLUG_DETECT_M2_SSD1_N",
+ "PLUG_DETECT_NIC1_N",
+ "SEL_SMB_DIMM_CPU0",
+ "presence-ps2",
+ "presence-ps3",
+ "", "",
+ "PWRBRD_PLUG_DETECT2_N";
+ };
+};
+
+&i2c2 {
+ status = "okay";
+
+ power-supply@58 {
+ compatible = "ibm,cffps";
+ reg = <0x58>;
+ };
+
+ power-supply@59 {
+ compatible = "ibm,cffps";
+ reg = <0x59>;
+ };
+
+ power-supply@5a {
+ compatible = "ibm,cffps";
+ reg = <0x5a>;
+ };
+
+ power-supply@5b {
+ compatible = "ibm,cffps";
+ reg = <0x5b>;
+ };
+};
+
+&i2c3 {
+ status = "okay";
+
+ i2c-mux@70 {
+ compatible = "nxp,pca9548";
+ reg = <0x70>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ i2c-mux-idle-disconnect;
+
+ i2c3mux0chn0: i2c@0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0>;
+ };
+
+ i2c3mux0chn1: i2c@1 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <1>;
+ };
+
+ i2c3mux0chn2: i2c@2 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <2>;
+ };
+
+ i2c3mux0chn3: i2c@3 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <3>;
+ };
+
+ i2c3mux0chn4: i2c@4 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <4>;
+ };
+
+ i2c3mux0chn5: i2c@5 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <5>;
+ };
+
+ i2c3mux0chn6: i2c@6 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <6>;
+ };
+
+ i2c3mux0chn7: i2c@7 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <7>;
+ };
+ };
+};
+
+&i2c4 {
+ status = "okay";
+};
+
+&i2c5 {
+ status = "okay";
+
+ regulator@42 {
+ compatible = "infineon,ir38263";
+ reg = <0x42>;
+ };
+
+ regulator@43 {
+ compatible = "infineon,ir38060";
+ reg = <0x43>;
+ };
+};
+
+&i2c6 {
+ status = "okay";
+
+ fan-controller@52 {
+ compatible = "maxim,max31785a";
+ reg = <0x52>;
+ };
+
+ fan-controller@54 {
+ compatible = "maxim,max31785a";
+ reg = <0x54>;
+ };
+
+ eeprom@55 {
+ compatible = "atmel,24c64";
+ reg = <0x55>;
+ };
+
+ i2c-mux@70 {
+ compatible = "nxp,pca9548";
+ reg = <0x70>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ i2c-mux-idle-disconnect;
+
+ i2c6mux0chn0: i2c@0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0>;
+ };
+
+ i2c6mux0chn1: i2c@1 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <1>;
+ };
+
+ i2c6mux0chn2: i2c@2 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <2>;
+ };
+
+ i2c6mux0chn3: i2c@3 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <3>;
+ };
+
+ i2c6mux0chn4: i2c@4 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <4>;
+
+ humidity-sensor@40 {
+ compatible = "ti,hdc1080";
+ reg = <0x40>;
+ };
+
+ temperature-sensor@48 {
+ compatible = "ti,tmp275";
+ reg = <0x48>;
+ };
+
+ eeprom@50 {
+ compatible = "atmel,24c32";
+ reg = <0x50>;
+ };
+
+ led-controller@60 {
+ compatible = "nxp,pca9551";
+ reg = <0x60>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ gpio-controller;
+ #gpio-cells = <2>;
+
+ led@0 {
+ label = "enclosure-id-led";
+ reg = <0>;
+ retain-state-shutdown;
+ default-state = "keep";
+ type = <PCA955X_TYPE_LED>;
+ };
+
+ led@1 {
+ label = "attention-led";
+ reg = <1>;
+ retain-state-shutdown;
+ default-state = "keep";
+ type = <PCA955X_TYPE_LED>;
+ };
+
+ led@2 {
+ label = "enclosure-fault-rollup-led";
+ reg = <2>;
+ retain-state-shutdown;
+ default-state = "keep";
+ type = <PCA955X_TYPE_LED>;
+ };
+
+ led@3 {
+ label = "power-on-led";
+ reg = <3>;
+ retain-state-shutdown;
+ default-state = "keep";
+ type = <PCA955X_TYPE_LED>;
+ };
+ };
+
+ temperature-sensor@76 {
+ compatible = "infineon,dps310";
+ reg = <0x76>;
+ };
+ };
+
+ i2c6mux0chn5: i2c@5 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <5>;
+ };
+
+ i2c6mux0chn6: i2c@6 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <6>;
+ };
+
+ i2c6mux0chn7: i2c@7 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <7>;
+ };
+ };
+
+ pca3: gpio@74 {
+ compatible = "nxp,pca9539";
+ reg = <0x74>;
+
+ gpio-controller;
+ #gpio-cells = <2>;
+ };
+
+ pca4: gpio@77 {
+ compatible = "nxp,pca9539";
+ reg = <0x77>;
+
+ gpio-controller;
+ #gpio-cells = <2>;
+
+ gpio-line-names =
+ "PE_NVMED0_EXP_PRSNT_N",
+ "PE_NVMED1_EXP_PRSNT_N",
+ "PE_NVMED2_EXP_PRSNT_N",
+ "PE_NVMED3_EXP_PRSNT_N",
+ "LED_FAULT_NVMED0",
+ "LED_FAULT_NVMED1",
+ "LED_FAULT_NVMED2",
+ "LED_FAULT_NVMED3",
+ "FAN0_PRESENCE_R_N",
+ "FAN1_PRESENCE_R_N",
+ "FAN2_PRESENCE_R_N",
+ "FAN3_PRESENCE_R_N",
+ "FAN4_PRESENCE_R_N",
+ "FAN5_PRESENCE_N",
+ "FAN6_PRESENCE_N",
+ "";
+ };
+};
+
+&i2c7 {
+ status = "okay";
+
+ i2c-mux@70 {
+ compatible = "nxp,pca9548";
+ reg = <0x70>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ i2c-mux-idle-disconnect;
+
+ i2c7mux0chn0: i2c@0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0>;
+ };
+
+ i2c7mux0chn1: i2c@1 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <1>;
+ };
+
+ i2c7mux0chn2: i2c@2 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <2>;
+ };
+
+ i2c7mux0chn3: i2c@3 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <3>;
+
+ regulator@58 {
+ compatible = "mps,mp2973";
+ reg = <0x58>;
+ };
+ };
+
+ i2c7mux0chn4: i2c@4 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <4>;
+ };
+
+ i2c7mux0chn5: i2c@5 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <5>;
+
+ regulator@40 {
+ compatible = "infineon,tda38640";
+ reg = <0x40>;
+ };
+ };
+
+ i2c7mux0chn6: i2c@6 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <6>;
+ };
+
+ i2c7mux0chn7: i2c@7 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <7>;
+ };
+ };
+};
+
+&i2c8 {
+ status = "okay";
+
+ i2c-mux@71 {
+ compatible = "nxp,pca9548";
+ reg = <0x71>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ i2c-mux-idle-disconnect;
+
+ i2c8mux0chn0: i2c@0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0>;
+
+ regulator@58 {
+ compatible = "mps,mp2971";
+ reg = <0x58>;
+ };
+ };
+
+ i2c8mux0chn1: i2c@1 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <1>;
+
+ regulator@40 {
+ compatible = "infineon,tda38640";
+ reg = <0x40>;
+ };
+
+ regulator@41 {
+ compatible = "infineon,tda38640";
+ reg = <0x41>;
+ };
+
+ regulator@58 {
+ compatible = "mps,mp2971";
+ reg = <0x58>;
+ };
+
+ regulator@5b {
+ compatible = "mps,mp2971";
+ reg = <0x5b>;
+ };
+ };
+
+ i2c8mux0chn2: i2c@2 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <2>;
+ };
+
+ i2c8mux0chn3: i2c@3 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <3>;
+ };
+
+ i2c8mux0chn4: i2c@4 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <4>;
+
+ i2c-mux@70 {
+ compatible = "nxp,pca9548";
+ reg = <0x70>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ i2c-mux-idle-disconnect;
+
+ i2c8mux1chn0: i2c@0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0>;
+ };
+
+ i2c8mux1chn1: i2c@1 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <1>;
+ };
+
+ i2c8mux1chn2: i2c@2 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <2>;
+ };
+
+ i2c8mux1chn3: i2c@3 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <3>;
+ };
+
+ i2c8mux1chn4: i2c@4 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <4>;
+ };
+
+ i2c8mux1chn5: i2c@5 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <5>;
+ };
+
+ i2c8mux1chn6: i2c@6 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <6>;
+ };
+
+ i2c8mux1chn7: i2c@7 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <7>;
+ };
+ };
+ };
+
+ i2c8mux0chn5: i2c@5 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <5>;
+ };
+
+ i2c8mux0chn6: i2c@6 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <6>;
+
+ temperature-sensor@4c {
+ compatible = "ti,tmp423";
+ reg = <0x4c>;
+ };
+ };
+
+ i2c8mux0chn7: i2c@7 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <7>;
+
+ regulator@40 {
+ compatible = "infineon,ir38060";
+ reg = <0x40>;
+ };
+ };
+ };
+};
+
+&i2c9 {
+ status = "okay";
+
+ regulator@40 {
+ compatible = "infineon,ir38263";
+ reg = <0x40>;
+ };
+
+ regulator@41 {
+ compatible = "infineon,ir38263";
+ reg = <0x41>;
+ };
+
+ eeprom@50 {
+ compatible = "atmel,24c64";
+ reg = <0x50>;
+ };
+
+ regulator@60 {
+ compatible = "maxim,max8952";
+ reg = <0x60>;
+
+ max8952,default-mode = <0>;
+ max8952,dvs-mode-microvolt = <1250000>, <1200000>,
+ <1050000>, <950000>;
+ max8952,sync-freq = <0>;
+ max8952,ramp-speed = <0>;
+
+ regulator-name = "VR_v77_1v4";
+ regulator-min-microvolt = <770000>;
+ regulator-max-microvolt = <1400000>;
+ regulator-always-on;
+ regulator-boot-on;
+ };
+};
+
+&i2c11 {
+ status = "okay";
+
+ tpm@2e {
+ compatible = "tcg,tpm-tis-i2c";
+ reg = <0x2e>;
+ memory-region = <&eventlog>;
+ };
+};
+
+&i2c12 {
+ status = "okay";
+};
+
+&i2c13 {
+ status = "okay";
+
+ regulator@41 {
+ compatible = "infineon,ir38263";
+ reg = <0x41>;
+ };
+
+ led-controller@61 {
+ compatible = "nxp,pca9552";
+ reg = <0x61>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ gpio-controller;
+ #gpio-cells = <2>;
+
+ led@0 {
+ label = "efuse-12v-slots";
+ reg = <0>;
+ retain-state-shutdown;
+ default-state = "keep";
+ type = <PCA955X_TYPE_LED>;
+ };
+
+ led@1 {
+ label = "efuse-3p3v-slot";
+ reg = <1>;
+ retain-state-shutdown;
+ default-state = "keep";
+ type = <PCA955X_TYPE_LED>;
+ };
+
+ led@3 {
+ label = "nic2-pert";
+ reg = <3>;
+ retain-state-shutdown;
+ default-state = "keep";
+ type = <PCA955X_TYPE_LED>;
+ };
+
+ led@4 {
+ label = "pcie-perst9";
+ reg = <4>;
+ retain-state-shutdown;
+ default-state = "keep";
+ type = <PCA955X_TYPE_LED>;
+ };
+
+ led@5 {
+ label = "pcie-perst10";
+ reg = <5>;
+ retain-state-shutdown;
+ default-state = "keep";
+ type = <PCA955X_TYPE_LED>;
+ };
+
+ led@6 {
+ label = "pcie-perst11";
+ reg = <6>;
+ retain-state-shutdown;
+ default-state = "keep";
+ type = <PCA955X_TYPE_LED>;
+ };
+
+ led@7 {
+ label = "pcie-perst12";
+ reg = <7>;
+ retain-state-shutdown;
+ default-state = "keep";
+ type = <PCA955X_TYPE_LED>;
+ };
+
+ led@8 {
+ label = "pcie-perst13";
+ reg = <8>;
+ retain-state-shutdown;
+ default-state = "keep";
+ type = <PCA955X_TYPE_LED>;
+ };
+
+ led@9 {
+ label = "pcie-perst14";
+ reg = <9>;
+ retain-state-shutdown;
+ default-state = "keep";
+ type = <PCA955X_TYPE_LED>;
+ };
+
+ led@10 {
+ label = "pcie-perst15";
+ reg = <10>;
+ retain-state-shutdown;
+ default-state = "keep";
+ type = <PCA955X_TYPE_LED>;
+ };
+
+ led@11 {
+ label = "pcie-perst16";
+ reg = <11>;
+ retain-state-shutdown;
+ default-state = "keep";
+ type = <PCA955X_TYPE_LED>;
+ };
+
+ led@12 {
+ label = "PV-cp1-sw1stk4-perst";
+ reg = <12>;
+ retain-state-shutdown;
+ default-state = "keep";
+ type = <PCA955X_TYPE_LED>;
+ };
+
+ led@13 {
+ label = "PV-cp1-sw1stk5-perst";
+ reg = <13>;
+ retain-state-shutdown;
+ default-state = "keep";
+ type = <PCA955X_TYPE_LED>;
+ };
+
+ led@14 {
+ label = "pe-cp-drv2-perst";
+ reg = <14>;
+ retain-state-shutdown;
+ default-state = "keep";
+ type = <PCA955X_TYPE_LED>;
+ };
+
+ led@15 {
+ label = "pe-cp-drv3-perst";
+ reg = <15>;
+ retain-state-shutdown;
+ default-state = "keep";
+ type = <PCA955X_TYPE_LED>;
+ };
+ };
+
+ gpio@75 {
+ compatible = "nxp,pca9539";
+ reg = <0x75>;
+
+ gpio-controller;
+ #gpio-cells = <2>;
+
+ gpio-line-names =
+ "PLUG_DETECT_PCIE_J109_N",
+ "PLUG_DETECT_PCIE_J110_N",
+ "PLUG_DETECT_PCIE_J111_N",
+ "PLUG_DETECT_PCIE_J112_N",
+ "PLUG_DETECT_PCIE_J113_N",
+ "PLUG_DETECT_PCIE_J114_N",
+ "PLUG_DETECT_PCIE_J115_N",
+ "PLUG_DETECT_PCIE_J116_N",
+ "PLUG_DETECT_M2_SSD2_N",
+ "PLUG_DETECT_NIC2_N",
+ "SEL_SMB_DIMM_CPU1",
+ "presence-ps0",
+ "presence-ps1",
+ "", "",
+ "PWRBRD_PLUG_DETECT1_N";
+ };
+
+ gpio@76 {
+ compatible = "nxp,pca9539";
+ reg = <0x76>;
+
+ gpio-controller;
+ #gpio-cells = <2>;
+
+ gpio-line-names =
+ "SW1_BOOTRCVRYB1_N",
+ "SW1_BOOTRCVRYB0_N",
+ "SW2_BOOTRCVRYB1_N",
+ "SW2_BOOTRCVRYB0_N",
+ "SW3_4_BOOTRCVRYB1_N",
+ "SW3_4_BOOTRCVRYB0_N",
+ "SW5_BOOTRCVRYB1_N",
+ "SW5_BOOTRCVRYB0_N",
+ "SW6_BOOTRCVRYB1_N",
+ "SW6_BOOTRCVRYB0_N",
+ "SW1_RESET_N",
+ "SW3_RESET_N",
+ "SW4_RESET_N",
+ "SW2_RESET_N",
+ "SW5_RESET_N",
+ "SW6_RESET_N";
+ };
+};
+
+&i2c14 {
+ status = "okay";
+
+ i2c-mux@70 {
+ compatible = "nxp,pca9548";
+ reg = <0x70>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ i2c-mux-idle-disconnect;
+
+ i2c14mux0chn0: i2c@0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0>;
+ };
+
+ i2c14mux0chn1: i2c@1 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <1>;
+ };
+
+ i2c14mux0chn2: i2c@2 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <2>;
+ };
+
+ i2c14mux0chn3: i2c@3 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <3>;
+
+ regulator@58 {
+ compatible = "mps,mp2973";
+ reg = <0x58>;
+ };
+ };
+
+ i2c14mux0chn4: i2c@4 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <4>;
+ };
+
+ i2c14mux0chn5: i2c@5 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <5>;
+
+ regulator@40 {
+ compatible = "infineon,tda38640";
+ reg = <0x40>;
+ };
+ };
+
+ i2c14mux0chn6: i2c@6 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <6>;
+ };
+
+ i2c14mux0chn7: i2c@7 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <7>;
+ };
+ };
+};
+
+&i2c15 {
+ status = "okay";
+
+ i2c-mux@71 {
+ compatible = "nxp,pca9548";
+ reg = <0x71>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ i2c-mux-idle-disconnect;
+
+ i2c15mux0chn0: i2c@0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0>;
+
+ regulator@58 {
+ compatible = "mps,mp2971";
+ reg = <0x58>;
+ };
+ };
+
+ i2c15mux0chn1: i2c@1 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <1>;
+
+ regulator@40 {
+ compatible = "infineon,tda38640";
+ reg = <0x40>;
+ };
+
+ regulator@41 {
+ compatible = "infineon,tda38640";
+ reg = <0x41>;
+ };
+
+ regulator@58 {
+ compatible = "mps,mp2971";
+ reg = <0x58>;
+ };
+
+ regulator@5b {
+ compatible = "mps,mp2971";
+ reg = <0x5b>;
+ };
+ };
+
+ i2c15mux0chn2: i2c@2 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <2>;
+ };
+
+ i2c15mux0chn3: i2c@3 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <3>;
+ };
+
+ i2c15mux0chn4: i2c@4 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <4>;
+
+ i2c-mux@70 {
+ compatible = "nxp,pca9548";
+ reg = <0x70>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ i2c-mux-idle-disconnect;
+
+ i2c15mux1chn0: i2c@0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0>;
+ };
+
+ i2c15mux1chn1: i2c@1 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <1>;
+ };
+
+ i2c15mux1chn2: i2c@2 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <2>;
+ };
+
+ i2c15mux1chn3: i2c@3 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <3>;
+ };
+
+ i2c15mux1chn4: i2c@4 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <4>;
+ };
+
+ i2c15mux1chn5: i2c@5 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <5>;
+ };
+
+ i2c15mux1chn6: i2c@6 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <6>;
+ };
+
+ i2c15mux1chn7: i2c@7 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <7>;
+ };
+ };
+ };
+
+ i2c15mux0chn5: i2c@5 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <5>;
+ };
+
+ i2c15mux0chn6: i2c@6 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <6>;
+
+ temperature-sensor@4c {
+ compatible = "ti,tmp423";
+ reg = <0x4c>;
+ };
+ };
+
+ i2c15mux0chn7: i2c@7 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <7>;
+
+ regulator@40 {
+ compatible = "infineon,ir38060";
+ reg = <0x40>;
+ };
+
+ temperature-sensor@4c {
+ compatible = "ti,tmp423";
+ reg = <0x4c>;
+ };
+ };
+ };
+};
diff --git a/arch/arm/boot/dts/aspeed/aspeed-g6.dtsi b/arch/arm/boot/dts/aspeed/aspeed-g6.dtsi
index 29f94696d8b189..7fb421153596b7 100644
--- a/arch/arm/boot/dts/aspeed/aspeed-g6.dtsi
+++ b/arch/arm/boot/dts/aspeed/aspeed-g6.dtsi
@@ -867,22 +867,26 @@
};
fsim0: fsi@1e79b000 {
+ #interrupt-cells = <1>;
compatible = "aspeed,ast2600-fsi-master", "fsi-master";
reg = <0x1e79b000 0x94>;
interrupts = <GIC_SPI 100 IRQ_TYPE_LEVEL_HIGH>;
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_fsi1_default>;
clocks = <&syscon ASPEED_CLK_GATE_FSICLK>;
+ interrupt-controller;
status = "disabled";
};
fsim1: fsi@1e79b100 {
+ #interrupt-cells = <1>;
compatible = "aspeed,ast2600-fsi-master", "fsi-master";
reg = <0x1e79b100 0x94>;
interrupts = <GIC_SPI 101 IRQ_TYPE_LEVEL_HIGH>;
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_fsi2_default>;
clocks = <&syscon ASPEED_CLK_GATE_FSICLK>;
+ interrupt-controller;
status = "disabled";
};
diff --git a/arch/arm/boot/dts/aspeed/ibm-power10-dual.dtsi b/arch/arm/boot/dts/aspeed/ibm-power10-dual.dtsi
index cc466910bb52f9..07ce3b2bc62a3b 100644
--- a/arch/arm/boot/dts/aspeed/ibm-power10-dual.dtsi
+++ b/arch/arm/boot/dts/aspeed/ibm-power10-dual.dtsi
@@ -165,10 +165,12 @@
};
fsi_hub0: hub@3400 {
+ #interrupt-cells = <1>;
compatible = "fsi-master-hub";
reg = <0x3400 0x400>;
#address-cells = <2>;
#size-cells = <0>;
+ interrupt-controller;
};
};
};
diff --git a/arch/arm/boot/dts/microchip/at91-sama7g54_curiosity.dts b/arch/arm/boot/dts/microchip/at91-sama7g54_curiosity.dts
index 4f609e9e510ef6..009d2c83242102 100644
--- a/arch/arm/boot/dts/microchip/at91-sama7g54_curiosity.dts
+++ b/arch/arm/boot/dts/microchip/at91-sama7g54_curiosity.dts
@@ -242,7 +242,7 @@
regulator-state-standby {
regulator-on-in-suspend;
- regulator-suspend-voltage = <1150000>;
+ regulator-suspend-microvolt = <1150000>;
regulator-mode = <4>;
};
@@ -263,7 +263,7 @@
regulator-state-standby {
regulator-on-in-suspend;
- regulator-suspend-voltage = <1050000>;
+ regulator-suspend-microvolt = <1050000>;
regulator-mode = <4>;
};
@@ -280,7 +280,7 @@
regulator-always-on;
regulator-state-standby {
- regulator-suspend-voltage = <1800000>;
+ regulator-suspend-microvolt = <1800000>;
regulator-on-in-suspend;
};
@@ -296,7 +296,7 @@
regulator-always-on;
regulator-state-standby {
- regulator-suspend-voltage = <3300000>;
+ regulator-suspend-microvolt = <3300000>;
regulator-on-in-suspend;
};
diff --git a/arch/arm/boot/dts/microchip/at91-sama7g5ek.dts b/arch/arm/boot/dts/microchip/at91-sama7g5ek.dts
index 217e9b96c61e5d..20b2497657ae48 100644
--- a/arch/arm/boot/dts/microchip/at91-sama7g5ek.dts
+++ b/arch/arm/boot/dts/microchip/at91-sama7g5ek.dts
@@ -293,7 +293,7 @@
regulator-state-standby {
regulator-on-in-suspend;
- regulator-suspend-voltage = <1150000>;
+ regulator-suspend-microvolt = <1150000>;
regulator-mode = <4>;
};
@@ -314,7 +314,7 @@
regulator-state-standby {
regulator-on-in-suspend;
- regulator-suspend-voltage = <1050000>;
+ regulator-suspend-microvolt = <1050000>;
regulator-mode = <4>;
};
@@ -331,7 +331,7 @@
regulator-always-on;
regulator-state-standby {
- regulator-suspend-voltage = <1800000>;
+ regulator-suspend-microvolt = <1800000>;
regulator-on-in-suspend;
};
@@ -346,7 +346,7 @@
regulator-max-microvolt = <3700000>;
regulator-state-standby {
- regulator-suspend-voltage = <1800000>;
+ regulator-suspend-microvolt = <1800000>;
regulator-on-in-suspend;
};
diff --git a/arch/arm/boot/dts/nxp/imx/imx6ull-tarragon-common.dtsi b/arch/arm/boot/dts/nxp/imx/imx6ull-tarragon-common.dtsi
index 3fdece5bd31f9d..5248a058230c86 100644
--- a/arch/arm/boot/dts/nxp/imx/imx6ull-tarragon-common.dtsi
+++ b/arch/arm/boot/dts/nxp/imx/imx6ull-tarragon-common.dtsi
@@ -805,6 +805,7 @@
&pinctrl_usb_pwr>;
dr_mode = "host";
power-active-high;
+ over-current-active-low;
disable-over-current;
status = "okay";
};
diff --git a/arch/arm/boot/dts/nxp/imx/imx7-mba7.dtsi b/arch/arm/boot/dts/nxp/imx/imx7-mba7.dtsi
index 1235a71c6abe96..52869e68f833c4 100644
--- a/arch/arm/boot/dts/nxp/imx/imx7-mba7.dtsi
+++ b/arch/arm/boot/dts/nxp/imx/imx7-mba7.dtsi
@@ -666,7 +666,7 @@
bus-width = <4>;
no-1-8-v;
no-sdio;
- no-emmc;
+ no-mmc;
status = "okay";
};
diff --git a/arch/arm/boot/dts/nxp/imx/imx7s-warp.dts b/arch/arm/boot/dts/nxp/imx/imx7s-warp.dts
index ba7231b364bb8c..7bab113ca6da79 100644
--- a/arch/arm/boot/dts/nxp/imx/imx7s-warp.dts
+++ b/arch/arm/boot/dts/nxp/imx/imx7s-warp.dts
@@ -210,6 +210,7 @@
remote-endpoint = <&mipi_from_sensor>;
clock-lanes = <0>;
data-lanes = <1>;
+ link-frequencies = /bits/ 64 <330000000>;
};
};
};
diff --git a/arch/arm/include/asm/fpu.h b/arch/arm/include/asm/fpu.h
new file mode 100644
index 00000000000000..2ae50bdce59b43
--- /dev/null
+++ b/arch/arm/include/asm/fpu.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2023 SiFive
+ */
+
+#ifndef __ASM_FPU_H
+#define __ASM_FPU_H
+
+#include <asm/neon.h>
+
+#define kernel_fpu_available() cpu_has_neon()
+#define kernel_fpu_begin() kernel_neon_begin()
+#define kernel_fpu_end() kernel_neon_end()
+
+#endif /* ! __ASM_FPU_H */
diff --git a/arch/arm/include/asm/hugetlb.h b/arch/arm/include/asm/hugetlb.h
index a3a82b7158d4cb..b766c4b373f647 100644
--- a/arch/arm/include/asm/hugetlb.h
+++ b/arch/arm/include/asm/hugetlb.h
@@ -15,10 +15,10 @@
#include <asm/hugetlb-3level.h>
#include <asm-generic/hugetlb.h>
-static inline void arch_clear_hugepage_flags(struct page *page)
+static inline void arch_clear_hugetlb_flags(struct folio *folio)
{
- clear_bit(PG_dcache_clean, &page->flags);
+ clear_bit(PG_dcache_clean, &folio->flags);
}
-#define arch_clear_hugepage_flags arch_clear_hugepage_flags
+#define arch_clear_hugetlb_flags arch_clear_hugetlb_flags
#endif /* _ASM_ARM_HUGETLB_H */
diff --git a/arch/arm/include/asm/mman.h b/arch/arm/include/asm/mman.h
new file mode 100644
index 00000000000000..2189e507c8e08b
--- /dev/null
+++ b/arch/arm/include/asm/mman.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_MMAN_H__
+#define __ASM_MMAN_H__
+
+#include <asm/system_info.h>
+#include <uapi/asm/mman.h>
+
+static inline bool arch_memory_deny_write_exec_supported(void)
+{
+ return cpu_architecture() >= CPU_ARCH_ARMv6;
+}
+#define arch_memory_deny_write_exec_supported arch_memory_deny_write_exec_supported
+
+#endif /* __ASM_MMAN_H__ */
diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h
index b0a262566eb95a..6b5392e20f4113 100644
--- a/arch/arm/include/asm/pgtable-2level.h
+++ b/arch/arm/include/asm/pgtable-2level.h
@@ -213,8 +213,8 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr)
#define pmd_pfn(pmd) (__phys_to_pfn(pmd_val(pmd) & PHYS_MASK))
-#define pmd_leaf(pmd) (pmd_val(pmd) & 2)
-#define pmd_bad(pmd) (pmd_val(pmd) & 2)
+#define pmd_leaf(pmd) (pmd_val(pmd) & PMD_TYPE_SECT)
+#define pmd_bad(pmd) pmd_leaf(pmd)
#define pmd_present(pmd) (pmd_val(pmd))
#define copy_pmd(pmdpd,pmdps) \
@@ -241,7 +241,6 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr)
* define empty stubs for use by pin_page_for_write.
*/
#define pmd_hugewillfault(pmd) (0)
-#define pmd_thp_or_huge(pmd) (0)
#endif /* __ASSEMBLY__ */
diff --git a/arch/arm/include/asm/pgtable-3level-hwdef.h b/arch/arm/include/asm/pgtable-3level-hwdef.h
index 2f35b4eddaa809..e7b666cf006031 100644
--- a/arch/arm/include/asm/pgtable-3level-hwdef.h
+++ b/arch/arm/include/asm/pgtable-3level-hwdef.h
@@ -14,6 +14,7 @@
* + Level 1/2 descriptor
* - common
*/
+#define PUD_TABLE_BIT (_AT(pmdval_t, 1) << 1)
#define PMD_TYPE_MASK (_AT(pmdval_t, 3) << 0)
#define PMD_TYPE_FAULT (_AT(pmdval_t, 0) << 0)
#define PMD_TYPE_TABLE (_AT(pmdval_t, 3) << 0)
diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h
index 4b1d9eb3908a22..fa5939eb9864ef 100644
--- a/arch/arm/include/asm/pgtable-3level.h
+++ b/arch/arm/include/asm/pgtable-3level.h
@@ -112,7 +112,7 @@
#ifndef __ASSEMBLY__
#define pud_none(pud) (!pud_val(pud))
-#define pud_bad(pud) (!(pud_val(pud) & 2))
+#define pud_bad(pud) (!(pud_val(pud) & PUD_TABLE_BIT))
#define pud_present(pud) (pud_val(pud))
#define pmd_table(pmd) ((pmd_val(pmd) & PMD_TYPE_MASK) == \
PMD_TYPE_TABLE)
@@ -137,7 +137,7 @@ static inline pmd_t *pud_pgtable(pud_t pud)
return __va(pud_val(pud) & PHYS_MASK & (s32)PAGE_MASK);
}
-#define pmd_bad(pmd) (!(pmd_val(pmd) & 2))
+#define pmd_bad(pmd) (!(pmd_val(pmd) & PMD_TABLE_BIT))
#define copy_pmd(pmdpd,pmdps) \
do { \
@@ -190,7 +190,6 @@ static inline pte_t pte_mkspecial(pte_t pte)
#define pmd_dirty(pmd) (pmd_isset((pmd), L_PMD_SECT_DIRTY))
#define pmd_hugewillfault(pmd) (!pmd_young(pmd) || !pmd_write(pmd))
-#define pmd_thp_or_huge(pmd) (pmd_huge(pmd) || pmd_trans_huge(pmd))
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#define pmd_trans_huge(pmd) (pmd_val(pmd) && !pmd_table(pmd))
diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c
index fe28fc1f759d93..dab42d066d068b 100644
--- a/arch/arm/kernel/irq.c
+++ b/arch/arm/kernel/irq.c
@@ -32,6 +32,7 @@
#include <linux/kallsyms.h>
#include <linux/proc_fs.h>
#include <linux/export.h>
+#include <linux/vmalloc.h>
#include <asm/hardware/cache-l2x0.h>
#include <asm/hardware/cache-uniphier.h>
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 72c82a4d63ac20..480e307501bb4b 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -26,6 +26,7 @@
#include <linux/sched/debug.h>
#include <linux/sched/task_stack.h>
#include <linux/irq.h>
+#include <linux/vmalloc.h>
#include <linux/atomic.h>
#include <asm/cacheflush.h>
diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile
index 650404be6768a0..0ca5aae1bcc3e3 100644
--- a/arch/arm/lib/Makefile
+++ b/arch/arm/lib/Makefile
@@ -40,8 +40,7 @@ $(obj)/csumpartialcopy.o: $(obj)/csumpartialcopygeneric.S
$(obj)/csumpartialcopyuser.o: $(obj)/csumpartialcopygeneric.S
ifeq ($(CONFIG_KERNEL_MODE_NEON),y)
- NEON_FLAGS := -march=armv7-a -mfloat-abi=softfp -mfpu=neon
- CFLAGS_xor-neon.o += $(NEON_FLAGS)
+ CFLAGS_xor-neon.o += $(CC_FLAGS_FPU)
obj-$(CONFIG_XOR_BLOCKS) += xor-neon.o
endif
diff --git a/arch/arm/lib/uaccess_with_memcpy.c b/arch/arm/lib/uaccess_with_memcpy.c
index 2f6163f05e9357..c0ac7796d77508 100644
--- a/arch/arm/lib/uaccess_with_memcpy.c
+++ b/arch/arm/lib/uaccess_with_memcpy.c
@@ -56,10 +56,10 @@ pin_page_for_write(const void __user *_addr, pte_t **ptep, spinlock_t **ptlp)
* to see that it's still huge and whether or not we will
* need to fault on write.
*/
- if (unlikely(pmd_thp_or_huge(*pmd))) {
+ if (unlikely(pmd_leaf(*pmd))) {
ptl = &current->mm->page_table_lock;
spin_lock(ptl);
- if (unlikely(!pmd_thp_or_huge(*pmd)
+ if (unlikely(!pmd_leaf(*pmd)
|| pmd_hugewillfault(*pmd))) {
spin_unlock(ptl);
return 0;
diff --git a/arch/arm/mach-omap2/board-n8x0.c b/arch/arm/mach-omap2/board-n8x0.c
index 31755a378c7364..ff2a4a4d822047 100644
--- a/arch/arm/mach-omap2/board-n8x0.c
+++ b/arch/arm/mach-omap2/board-n8x0.c
@@ -79,10 +79,8 @@ static struct musb_hdrc_platform_data tusb_data = {
static struct gpiod_lookup_table tusb_gpio_table = {
.dev_id = "musb-tusb",
.table = {
- GPIO_LOOKUP("gpio-0-15", 0, "enable",
- GPIO_ACTIVE_HIGH),
- GPIO_LOOKUP("gpio-48-63", 10, "int",
- GPIO_ACTIVE_HIGH),
+ GPIO_LOOKUP("gpio-0-31", 0, "enable", GPIO_ACTIVE_HIGH),
+ GPIO_LOOKUP("gpio-32-63", 26, "int", GPIO_ACTIVE_HIGH),
{ }
},
};
@@ -140,12 +138,11 @@ static int slot1_cover_open;
static int slot2_cover_open;
static struct device *mmc_device;
-static struct gpiod_lookup_table nokia8xx_mmc_gpio_table = {
+static struct gpiod_lookup_table nokia800_mmc_gpio_table = {
.dev_id = "mmci-omap.0",
.table = {
/* Slot switch, GPIO 96 */
- GPIO_LOOKUP("gpio-80-111", 16,
- "switch", GPIO_ACTIVE_HIGH),
+ GPIO_LOOKUP("gpio-96-127", 0, "switch", GPIO_ACTIVE_HIGH),
{ }
},
};
@@ -153,12 +150,12 @@ static struct gpiod_lookup_table nokia8xx_mmc_gpio_table = {
static struct gpiod_lookup_table nokia810_mmc_gpio_table = {
.dev_id = "mmci-omap.0",
.table = {
+ /* Slot switch, GPIO 96 */
+ GPIO_LOOKUP("gpio-96-127", 0, "switch", GPIO_ACTIVE_HIGH),
/* Slot index 1, VSD power, GPIO 23 */
- GPIO_LOOKUP_IDX("gpio-16-31", 7,
- "vsd", 1, GPIO_ACTIVE_HIGH),
+ GPIO_LOOKUP_IDX("gpio-0-31", 23, "vsd", 1, GPIO_ACTIVE_HIGH),
/* Slot index 1, VIO power, GPIO 9 */
- GPIO_LOOKUP_IDX("gpio-0-15", 9,
- "vio", 1, GPIO_ACTIVE_HIGH),
+ GPIO_LOOKUP_IDX("gpio-0-31", 9, "vio", 1, GPIO_ACTIVE_HIGH),
{ }
},
};
@@ -415,8 +412,6 @@ static struct omap_mmc_platform_data *mmc_data[OMAP24XX_NR_MMC];
static void __init n8x0_mmc_init(void)
{
- gpiod_add_lookup_table(&nokia8xx_mmc_gpio_table);
-
if (board_is_n810()) {
mmc1_data.slots[0].name = "external";
@@ -429,6 +424,8 @@ static void __init n8x0_mmc_init(void)
mmc1_data.slots[1].name = "internal";
mmc1_data.slots[1].ban_openended = 1;
gpiod_add_lookup_table(&nokia810_mmc_gpio_table);
+ } else {
+ gpiod_add_lookup_table(&nokia800_mmc_gpio_table);
}
mmc1_data.nr_slots = 2;
diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile
index 71b858c9b10c47..1779e12db0850d 100644
--- a/arch/arm/mm/Makefile
+++ b/arch/arm/mm/Makefile
@@ -21,7 +21,6 @@ KASAN_SANITIZE_physaddr.o := n
obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o
obj-$(CONFIG_ALIGNMENT_TRAP) += alignment.o
-obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
obj-$(CONFIG_ARM_PV_FIXUP) += pv-fixup-asm.o
obj-$(CONFIG_CPU_ABRT_NOMMU) += abort-nommu.o
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index 439dc6a26bb982..bf07afdb2dd9e1 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -226,9 +226,6 @@ void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
}
#ifdef CONFIG_MMU
-#define VM_FAULT_BADMAP ((__force vm_fault_t)0x010000)
-#define VM_FAULT_BADACCESS ((__force vm_fault_t)0x020000)
-
static inline bool is_permission_fault(unsigned int fsr)
{
int fs = fsr_fs(fsr);
@@ -294,7 +291,10 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
if (!(vma->vm_flags & vm_flags)) {
vma_end_read(vma);
- goto lock_mmap;
+ count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
+ fault = 0;
+ code = SEGV_ACCERR;
+ goto bad_area;
}
fault = handle_mm_fault(vma, addr, flags | FAULT_FLAG_VMA_LOCK, regs);
if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED)))
@@ -319,7 +319,8 @@ lock_mmap:
retry:
vma = lock_mm_and_find_vma(mm, addr, regs);
if (unlikely(!vma)) {
- fault = VM_FAULT_BADMAP;
+ fault = 0;
+ code = SEGV_MAPERR;
goto bad_area;
}
@@ -327,10 +328,14 @@ retry:
* ok, we have a good vm_area for this memory access, check the
* permissions on the VMA allow for the fault which occurred.
*/
- if (!(vma->vm_flags & vm_flags))
- fault = VM_FAULT_BADACCESS;
- else
- fault = handle_mm_fault(vma, addr & PAGE_MASK, flags, regs);
+ if (!(vma->vm_flags & vm_flags)) {
+ mmap_read_unlock(mm);
+ fault = 0;
+ code = SEGV_ACCERR;
+ goto bad_area;
+ }
+
+ fault = handle_mm_fault(vma, addr & PAGE_MASK, flags, regs);
/* If we need to retry but a fatal signal is pending, handle the
* signal first. We do not need to release the mmap_lock because
@@ -356,12 +361,11 @@ retry:
mmap_read_unlock(mm);
done:
- /*
- * Handle the "normal" case first - VM_FAULT_MAJOR
- */
- if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP | VM_FAULT_BADACCESS))))
+ /* Handle the "normal" case first */
+ if (likely(!(fault & VM_FAULT_ERROR)))
return 0;
+ code = SEGV_MAPERR;
bad_area:
/*
* If we are in kernel mode at this point, we
@@ -393,8 +397,6 @@ bad_area:
* isn't in our memory map..
*/
sig = SIGSEGV;
- code = fault == VM_FAULT_BADACCESS ?
- SEGV_ACCERR : SEGV_MAPERR;
}
__do_user_fault(addr, fsr, sig, code, regs);
diff --git a/arch/arm/mm/hugetlbpage.c b/arch/arm/mm/hugetlbpage.c
deleted file mode 100644
index dd7a0277c5c094..00000000000000
--- a/arch/arm/mm/hugetlbpage.c
+++ /dev/null
@@ -1,34 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * arch/arm/mm/hugetlbpage.c
- *
- * Copyright (C) 2012 ARM Ltd.
- *
- * Based on arch/x86/include/asm/hugetlb.h and Bill Carson's patches
- */
-
-#include <linux/init.h>
-#include <linux/fs.h>
-#include <linux/mm.h>
-#include <linux/hugetlb.h>
-#include <linux/pagemap.h>
-#include <linux/err.h>
-#include <linux/sysctl.h>
-#include <asm/mman.h>
-#include <asm/tlb.h>
-#include <asm/tlbflush.h>
-
-/*
- * On ARM, huge pages are backed by pmd's rather than pte's, so we do a lot
- * of type casting from pmd_t * to pte_t *.
- */
-
-int pud_huge(pud_t pud)
-{
- return 0;
-}
-
-int pmd_huge(pmd_t pmd)
-{
- return pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT);
-}
diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c
index a0f8a0ca0788ad..d65d0e6ed10ab4 100644
--- a/arch/arm/mm/mmap.c
+++ b/arch/arm/mm/mmap.c
@@ -34,7 +34,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
struct vm_area_struct *vma;
int do_align = 0;
int aliasing = cache_is_vipt_aliasing();
- struct vm_unmapped_area_info info;
+ struct vm_unmapped_area_info info = {};
/*
* We only need to do colour alignment if either the I or D
@@ -68,7 +68,6 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
return addr;
}
- info.flags = 0;
info.length = len;
info.low_limit = mm->mmap_base;
info.high_limit = TASK_SIZE;
@@ -87,7 +86,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
unsigned long addr = addr0;
int do_align = 0;
int aliasing = cache_is_vipt_aliasing();
- struct vm_unmapped_area_info info;
+ struct vm_unmapped_area_info info = {};
/*
* We only need to do colour alignment if either the I or D
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index 1d672457d02ff3..72b5cd697f5d94 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -871,16 +871,11 @@ static inline void emit_a32_alu_r64(const bool is64, const s8 dst[],
}
/* dst = src (4 bytes)*/
-static inline void emit_a32_mov_r(const s8 dst, const s8 src, const u8 off,
- struct jit_ctx *ctx) {
+static inline void emit_a32_mov_r(const s8 dst, const s8 src, struct jit_ctx *ctx) {
const s8 *tmp = bpf2a32[TMP_REG_1];
s8 rt;
rt = arm_bpf_get_reg32(src, tmp[0], ctx);
- if (off && off != 32) {
- emit(ARM_LSL_I(rt, rt, 32 - off), ctx);
- emit(ARM_ASR_I(rt, rt, 32 - off), ctx);
- }
arm_bpf_put_reg32(dst, rt, ctx);
}
@@ -889,15 +884,15 @@ static inline void emit_a32_mov_r64(const bool is64, const s8 dst[],
const s8 src[],
struct jit_ctx *ctx) {
if (!is64) {
- emit_a32_mov_r(dst_lo, src_lo, 0, ctx);
+ emit_a32_mov_r(dst_lo, src_lo, ctx);
if (!ctx->prog->aux->verifier_zext)
/* Zero out high 4 bytes */
emit_a32_mov_i(dst_hi, 0, ctx);
} else if (__LINUX_ARM_ARCH__ < 6 &&
ctx->cpu_architecture < CPU_ARCH_ARMv5TE) {
/* complete 8 byte move */
- emit_a32_mov_r(dst_lo, src_lo, 0, ctx);
- emit_a32_mov_r(dst_hi, src_hi, 0, ctx);
+ emit_a32_mov_r(dst_lo, src_lo, ctx);
+ emit_a32_mov_r(dst_hi, src_hi, ctx);
} else if (is_stacked(src_lo) && is_stacked(dst_lo)) {
const u8 *tmp = bpf2a32[TMP_REG_1];
@@ -917,17 +912,52 @@ static inline void emit_a32_mov_r64(const bool is64, const s8 dst[],
static inline void emit_a32_movsx_r64(const bool is64, const u8 off, const s8 dst[], const s8 src[],
struct jit_ctx *ctx) {
const s8 *tmp = bpf2a32[TMP_REG_1];
- const s8 *rt;
+ s8 rs;
+ s8 rd;
- rt = arm_bpf_get_reg64(dst, tmp, ctx);
+ if (is_stacked(dst_lo))
+ rd = tmp[1];
+ else
+ rd = dst_lo;
+ rs = arm_bpf_get_reg32(src_lo, rd, ctx);
+ /* rs may be one of src[1], dst[1], or tmp[1] */
+
+ /* Sign extend rs if needed. If off == 32, lower 32-bits of src are moved to dst and sign
+ * extension only happens in the upper 64 bits.
+ */
+ if (off != 32) {
+ /* Sign extend rs into rd */
+ emit(ARM_LSL_I(rd, rs, 32 - off), ctx);
+ emit(ARM_ASR_I(rd, rd, 32 - off), ctx);
+ } else {
+ rd = rs;
+ }
+
+ /* Write rd to dst_lo
+ *
+ * Optimization:
+ * Assume:
+ * 1. dst == src and stacked.
+ * 2. off == 32
+ *
+ * In this case src_lo was loaded into rd(tmp[1]) but rd was not sign extended as off==32.
+ * So, we don't need to write rd back to dst_lo as they have the same value.
+ * This saves us one str instruction.
+ */
+ if (dst_lo != src_lo || off != 32)
+ arm_bpf_put_reg32(dst_lo, rd, ctx);
- emit_a32_mov_r(dst_lo, src_lo, off, ctx);
if (!is64) {
if (!ctx->prog->aux->verifier_zext)
/* Zero out high 4 bytes */
emit_a32_mov_i(dst_hi, 0, ctx);
} else {
- emit(ARM_ASR_I(rt[0], rt[1], 31), ctx);
+ if (is_stacked(dst_hi)) {
+ emit(ARM_ASR_I(tmp[0], rd, 31), ctx);
+ arm_bpf_put_reg32(dst_hi, tmp[0], ctx);
+ } else {
+ emit(ARM_ASR_I(dst_hi, rd, 31), ctx);
+ }
}
}
diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
index b6c9e01e14f559..2ed7d229c8f96d 100644
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -475,3 +475,4 @@
459 common lsm_get_self_attr sys_lsm_get_self_attr
460 common lsm_set_self_attr sys_lsm_set_self_attr
461 common lsm_list_modules sys_lsm_list_modules
+462 common mseal sys_mseal
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 7b11c98b3e84bf..1bc3d2470ad1e5 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -30,6 +30,7 @@ config ARM64
select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_HAS_GIGANTIC_PAGE
select ARCH_HAS_KCOV
+ select ARCH_HAS_KERNEL_FPU_SUPPORT if KERNEL_MODE_NEON
select ARCH_HAS_KEEPINITRD
select ARCH_HAS_MEMBARRIER_SYNC_CORE
select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
@@ -205,7 +206,7 @@ config ARM64
select HAVE_SAMPLE_FTRACE_DIRECT
select HAVE_SAMPLE_FTRACE_DIRECT_MULTI
select HAVE_EFFICIENT_UNALIGNED_ACCESS
- select HAVE_FAST_GUP
+ select HAVE_GUP_FAST
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_FUNCTION_TRACER
select HAVE_FUNCTION_ERROR_INJECTION
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 0e075d3c546b48..3f0f35fd5bb7b1 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -36,7 +36,14 @@ ifeq ($(CONFIG_BROKEN_GAS_INST),y)
$(warning Detected assembler with broken .inst; disassembly will be unreliable)
endif
-KBUILD_CFLAGS += -mgeneral-regs-only \
+# The GCC option -ffreestanding is required in order to compile code containing
+# ARM/NEON intrinsics in a non C99-compliant environment (such as the kernel)
+CC_FLAGS_FPU := -ffreestanding
+# Enable <arm_neon.h>
+CC_FLAGS_FPU += -isystem $(shell $(CC) -print-file-name=include)
+CC_FLAGS_NO_FPU := -mgeneral-regs-only
+
+KBUILD_CFLAGS += $(CC_FLAGS_NO_FPU) \
$(compat_vdso) $(cc_has_k_constraint)
KBUILD_CFLAGS += $(call cc-disable-warning, psabi)
KBUILD_AFLAGS += $(compat_vdso)
@@ -154,6 +161,10 @@ libs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a
# Default target when executing plain make
boot := arch/arm64/boot
+BOOT_TARGETS := Image vmlinuz.efi image.fit
+
+PHONY += $(BOOT_TARGETS)
+
ifeq ($(CONFIG_EFI_ZBOOT),)
KBUILD_IMAGE := $(boot)/Image.gz
else
@@ -162,8 +173,10 @@ endif
all: $(notdir $(KBUILD_IMAGE))
-vmlinuz.efi: Image
-Image vmlinuz.efi: vmlinux
+image.fit: dtbs
+
+vmlinuz.efi image.fit: Image
+$(BOOT_TARGETS): vmlinux
$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
Image.%: Image
@@ -215,6 +228,7 @@ virtconfig:
define archhelp
echo '* Image.gz - Compressed kernel image (arch/$(ARCH)/boot/Image.gz)'
echo ' Image - Uncompressed kernel image (arch/$(ARCH)/boot/Image)'
+ echo ' image.fit - Flat Image Tree (arch/$(ARCH)/boot/image.fit)'
echo ' install - Install uncompressed kernel'
echo ' zinstall - Install compressed kernel'
echo ' Install using (your) ~/bin/installkernel or'
diff --git a/arch/arm64/boot/.gitignore b/arch/arm64/boot/.gitignore
index af5dc61f8b4388..abaae9de1bdda2 100644
--- a/arch/arm64/boot/.gitignore
+++ b/arch/arm64/boot/.gitignore
@@ -2,3 +2,4 @@
Image
Image.gz
vmlinuz*
+image.fit
diff --git a/arch/arm64/boot/Makefile b/arch/arm64/boot/Makefile
index a5a7873711173b..607a67a649c463 100644
--- a/arch/arm64/boot/Makefile
+++ b/arch/arm64/boot/Makefile
@@ -16,7 +16,8 @@
OBJCOPYFLAGS_Image :=-O binary -R .note -R .note.gnu.build-id -R .comment -S
-targets := Image Image.bz2 Image.gz Image.lz4 Image.lzma Image.lzo Image.zst
+targets := Image Image.bz2 Image.gz Image.lz4 Image.lzma Image.lzo \
+ Image.zst image.fit
$(obj)/Image: vmlinux FORCE
$(call if_changed,objcopy)
@@ -39,6 +40,9 @@ $(obj)/Image.lzo: $(obj)/Image FORCE
$(obj)/Image.zst: $(obj)/Image FORCE
$(call if_changed,zstd)
+$(obj)/image.fit: $(obj)/Image $(obj)/dts/dtbs-list FORCE
+ $(call if_changed,fit)
+
EFI_ZBOOT_PAYLOAD := Image
EFI_ZBOOT_BFD_TARGET := elf64-littleaarch64
EFI_ZBOOT_MACH_TYPE := ARM64
diff --git a/arch/arm64/boot/dts/amlogic/Makefile b/arch/arm64/boot/dts/amlogic/Makefile
index 1ab160bf928ae4..0f29517da5ec15 100644
--- a/arch/arm64/boot/dts/amlogic/Makefile
+++ b/arch/arm64/boot/dts/amlogic/Makefile
@@ -1,4 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
+dtb-$(CONFIG_ARCH_MESON) += amlogic-a4-a113l2-ba400.dtb
+dtb-$(CONFIG_ARCH_MESON) += amlogic-a5-a113x2-av400.dtb
dtb-$(CONFIG_ARCH_MESON) += amlogic-c3-c302x-aw409.dtb
dtb-$(CONFIG_ARCH_MESON) += amlogic-t7-a311d2-an400.dtb
dtb-$(CONFIG_ARCH_MESON) += amlogic-t7-a311d2-khadas-vim4.dtb
@@ -16,7 +18,9 @@ dtb-$(CONFIG_ARCH_MESON) += meson-g12a-u200.dtb
dtb-$(CONFIG_ARCH_MESON) += meson-g12a-x96-max.dtb
dtb-$(CONFIG_ARCH_MESON) += meson-g12b-a311d-bananapi-m2s.dtb
dtb-$(CONFIG_ARCH_MESON) += meson-g12b-a311d-khadas-vim3.dtb
+dtb-$(CONFIG_ARCH_MESON) += meson-g12b-a311d-khadas-vim3-ts050.dtb
dtb-$(CONFIG_ARCH_MESON) += meson-g12b-bananapi-cm4-cm4io.dtb
+dtb-$(CONFIG_ARCH_MESON) += meson-g12b-bananapi-cm4-mnt-reform2.dtb
dtb-$(CONFIG_ARCH_MESON) += meson-g12b-gsking-x.dtb
dtb-$(CONFIG_ARCH_MESON) += meson-g12b-gtking-pro.dtb
dtb-$(CONFIG_ARCH_MESON) += meson-g12b-gtking.dtb
@@ -76,6 +80,7 @@ dtb-$(CONFIG_ARCH_MESON) += meson-sm1-bananapi-m2-pro.dtb
dtb-$(CONFIG_ARCH_MESON) += meson-sm1-bananapi-m5.dtb
dtb-$(CONFIG_ARCH_MESON) += meson-sm1-h96-max.dtb
dtb-$(CONFIG_ARCH_MESON) += meson-sm1-khadas-vim3l.dtb
+dtb-$(CONFIG_ARCH_MESON) += meson-sm1-khadas-vim3l-ts050.dtb
dtb-$(CONFIG_ARCH_MESON) += meson-sm1-s905d3-libretech-cc.dtb
dtb-$(CONFIG_ARCH_MESON) += meson-sm1-odroid-c4.dtb
dtb-$(CONFIG_ARCH_MESON) += meson-sm1-odroid-hc4.dtb
@@ -86,3 +91,5 @@ dtb-$(CONFIG_ARCH_MESON) += meson-sm1-x96-air.dtb
# Overlays
meson-g12a-fbx8am-brcm-dtbs := meson-g12a-fbx8am.dtb meson-g12a-fbx8am-brcm.dtbo
meson-g12a-fbx8am-realtek-dtbs := meson-g12a-fbx8am.dtb meson-g12a-fbx8am-realtek.dtbo
+meson-g12b-a311d-khadas-vim3-ts050-dtbs := meson-g12b-a311d-khadas-vim3.dtb meson-khadas-vim3-ts050.dtbo
+meson-sm1-khadas-vim3l-ts050-dtbs := meson-sm1-khadas-vim3l.dtb meson-khadas-vim3-ts050.dtbo
diff --git a/arch/arm64/boot/dts/amlogic/amlogic-a4-a113l2-ba400.dts b/arch/arm64/boot/dts/amlogic/amlogic-a4-a113l2-ba400.dts
new file mode 100644
index 00000000000000..ad3127e695d98c
--- /dev/null
+++ b/arch/arm64/boot/dts/amlogic/amlogic-a4-a113l2-ba400.dts
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * Copyright (c) 2024 Amlogic, Inc. All rights reserved.
+ */
+
+/dts-v1/;
+
+#include "amlogic-a4.dtsi"
+
+/ {
+ model = "Amlogic A113L2 ba400 Development Board";
+ compatible = "amlogic,ba400", "amlogic,a4";
+ interrupt-parent = <&gic>;
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ aliases {
+ serial0 = &uart_b;
+ };
+
+ memory@0 {
+ device_type = "memory";
+ reg = <0x0 0x0 0x0 0x40000000>;
+ };
+
+ reserved-memory {
+ #address-cells = <2>;
+ #size-cells = <2>;
+ ranges;
+
+ /* 10 MiB reserved for ARM Trusted Firmware */
+ secmon_reserved: secmon@5000000 {
+ compatible = "shared-dma-pool";
+ reg = <0x0 0x05000000 0x0 0xa00000>;
+ no-map;
+ };
+ };
+};
+
+&uart_b {
+ status = "okay";
+};
diff --git a/arch/arm64/boot/dts/amlogic/amlogic-a4-common.dtsi b/arch/arm64/boot/dts/amlogic/amlogic-a4-common.dtsi
new file mode 100644
index 00000000000000..b6106ad4a07261
--- /dev/null
+++ b/arch/arm64/boot/dts/amlogic/amlogic-a4-common.dtsi
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * Copyright (c) 2024 Amlogic, Inc. All rights reserved.
+ */
+
+#include <dt-bindings/interrupt-controller/irq.h>
+#include <dt-bindings/interrupt-controller/arm-gic.h>
+#include <dt-bindings/gpio/gpio.h>
+/ {
+ timer {
+ compatible = "arm,armv8-timer";
+ interrupts = <GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
+ <GIC_PPI 14 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
+ <GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
+ <GIC_PPI 10 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>;
+ };
+
+ psci {
+ compatible = "arm,psci-1.0";
+ method = "smc";
+ };
+
+ xtal: xtal-clk {
+ compatible = "fixed-clock";
+ clock-frequency = <24000000>;
+ clock-output-names = "xtal";
+ #clock-cells = <0>;
+ };
+
+ soc {
+ compatible = "simple-bus";
+ #address-cells = <2>;
+ #size-cells = <2>;
+ ranges;
+
+ gic: interrupt-controller@fff01000 {
+ compatible = "arm,gic-400";
+ reg = <0x0 0xfff01000 0 0x1000>,
+ <0x0 0xfff02000 0 0x2000>,
+ <0x0 0xfff04000 0 0x2000>,
+ <0x0 0xfff06000 0 0x2000>;
+ #interrupt-cells = <3>;
+ #address-cells = <0>;
+ interrupt-controller;
+ interrupts = <GIC_PPI 9 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_HIGH)>;
+ };
+
+ apb: bus@fe000000 {
+ compatible = "simple-bus";
+ reg = <0x0 0xfe000000 0x0 0x480000>;
+ #address-cells = <2>;
+ #size-cells = <2>;
+ ranges = <0x0 0x0 0x0 0xfe000000 0x0 0x480000>;
+
+ uart_b: serial@7a000 {
+ compatible = "amlogic,a4-uart",
+ "amlogic,meson-s4-uart";
+ reg = <0x0 0x7a000 0x0 0x18>;
+ interrupts = <GIC_SPI 169 IRQ_TYPE_EDGE_RISING>;
+ clocks = <&xtal>, <&xtal>, <&xtal>;
+ clock-names = "xtal", "pclk", "baud";
+ status = "disabled";
+ };
+ };
+ };
+};
diff --git a/arch/arm64/boot/dts/amlogic/amlogic-a4.dtsi b/arch/arm64/boot/dts/amlogic/amlogic-a4.dtsi
new file mode 100644
index 00000000000000..73ca1d7eed81d5
--- /dev/null
+++ b/arch/arm64/boot/dts/amlogic/amlogic-a4.dtsi
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * Copyright (c) 2024 Amlogic, Inc. All rights reserved.
+ */
+
+#include "amlogic-a4-common.dtsi"
+/ {
+ cpus {
+ #address-cells = <2>;
+ #size-cells = <0>;
+
+ cpu0: cpu@0 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a53";
+ reg = <0x0 0x0>;
+ enable-method = "psci";
+ };
+
+ cpu1: cpu@1 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a53";
+ reg = <0x0 0x1>;
+ enable-method = "psci";
+ };
+
+ cpu2: cpu@2 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a53";
+ reg = <0x0 0x2>;
+ enable-method = "psci";
+ };
+
+ cpu3: cpu@3 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a53";
+ reg = <0x0 0x3>;
+ enable-method = "psci";
+ };
+ };
+};
diff --git a/arch/arm64/boot/dts/amlogic/amlogic-a5-a113x2-av400.dts b/arch/arm64/boot/dts/amlogic/amlogic-a5-a113x2-av400.dts
new file mode 100644
index 00000000000000..11d8b88c1ce511
--- /dev/null
+++ b/arch/arm64/boot/dts/amlogic/amlogic-a5-a113x2-av400.dts
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * Copyright (c) 2024 Amlogic, Inc. All rights reserved.
+ */
+
+/dts-v1/;
+
+#include "amlogic-a5.dtsi"
+
+/ {
+ model = "Amlogic A113X2 av400 Development Board";
+ compatible = "amlogic,av400", "amlogic,a5";
+ interrupt-parent = <&gic>;
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ aliases {
+ serial0 = &uart_b;
+ };
+
+ memory@0 {
+ device_type = "memory";
+ reg = <0x0 0x0 0x0 0x40000000>;
+ };
+
+ reserved-memory {
+ #address-cells = <2>;
+ #size-cells = <2>;
+ ranges;
+
+ /* 10 MiB reserved for ARM Trusted Firmware */
+ secmon_reserved: secmon@5000000 {
+ compatible = "shared-dma-pool";
+ reg = <0x0 0x05000000 0x0 0xa00000>;
+ no-map;
+ };
+ };
+};
+
+&uart_b {
+ status = "okay";
+};
diff --git a/arch/arm64/boot/dts/amlogic/amlogic-a5.dtsi b/arch/arm64/boot/dts/amlogic/amlogic-a5.dtsi
new file mode 100644
index 00000000000000..43f68a7da2f742
--- /dev/null
+++ b/arch/arm64/boot/dts/amlogic/amlogic-a5.dtsi
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * Copyright (c) 2024 Amlogic, Inc. All rights reserved.
+ */
+
+#include "amlogic-a4-common.dtsi"
+/ {
+ cpus {
+ #address-cells = <2>;
+ #size-cells = <0>;
+
+ cpu0: cpu@0 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a55";
+ reg = <0x0 0x0>;
+ enable-method = "psci";
+ };
+
+ cpu1: cpu@100 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a55";
+ reg = <0x0 0x100>;
+ enable-method = "psci";
+ };
+
+ cpu2: cpu@200 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a55";
+ reg = <0x0 0x200>;
+ enable-method = "psci";
+ };
+
+ cpu3: cpu@300 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a55";
+ reg = <0x0 0x300>;
+ enable-method = "psci";
+ };
+ };
+};
diff --git a/arch/arm64/boot/dts/amlogic/amlogic-t7-reset.h b/arch/arm64/boot/dts/amlogic/amlogic-t7-reset.h
new file mode 100644
index 00000000000000..ec90a11df508f0
--- /dev/null
+++ b/arch/arm64/boot/dts/amlogic/amlogic-t7-reset.h
@@ -0,0 +1,197 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR MIT) */
+/*
+ * Copyright (c) 2024 Amlogic, Inc. All rights reserved.
+ */
+
+#ifndef __DTS_AMLOGIC_T7_RESET_H
+#define __DTS_AMLOGIC_T7_RESET_H
+
+/* RESET0 */
+/* 0-3 */
+#define RESET_USB 4
+#define RESET_U2DRD 5
+#define RESET_U3DRD 6
+#define RESET_U3DRD_PIPE0 7
+#define RESET_U2PHY20 8
+#define RESET_U2PHY21 9
+#define RESET_GDC 10
+#define RESET_HDMI20_AES 11
+#define RESET_HDMIRX 12
+#define RESET_HDMIRX_APB 13
+#define RESET_DEWARP 14
+/* 15 */
+#define RESET_HDMITX_CAPB3 16
+#define RESET_BRG_VCBUG_DEC 17
+#define RESET_VCBUS 18
+#define RESET_VID_PLL_DIV 19
+#define RESET_VDI6 20
+#define RESET_GE2D 21
+#define RESET_HDMITXPHY 22
+#define RESET_VID_LOCK 23
+#define RESET_VENC0 24
+#define RESET_VDAC 25
+#define RESET_VENC2 26
+#define RESET_VENC1 27
+#define RESET_RDMA 28
+#define RESET_HDMITX 29
+#define RESET_VIU 30
+#define RESET_VENC 31
+
+/* RESET1 */
+#define RESET_AUDIO 32
+#define RESET_MALI_CAPB3 33
+#define RESET_MALI 34
+#define RESET_DDR_APB 35
+#define RESET_DDR 36
+#define RESET_DOS_CAPB3 37
+#define RESET_DOS 38
+#define RESET_COMBO_DPHY_CHAN2 39
+#define RESET_DEBUG_B 40
+#define RESET_DEBUG_A 41
+#define RESET_DSP_B 42
+#define RESET_DSP_A 43
+#define RESET_PCIE_A 44
+#define RESET_PCIE_PHY 45
+#define RESET_PCIE_APB 46
+#define RESET_ANAKIN 47
+#define RESET_ETH 48
+#define RESET_EDP0_CTRL 49
+#define RESET_EDP1_CTRL 50
+#define RESET_COMBO_DPHY_CHAN0 51
+#define RESET_COMBO_DPHY_CHAN1 52
+#define RESET_DSI_LVDS_EDP_TOP 53
+#define RESET_PCIE1_PHY 54
+#define RESET_PCIE1_APB 55
+#define RESET_DDR_1 56
+/* 57 */
+#define RESET_EDP1_PIPELINE 58
+#define RESET_EDP0_PIPELINE 59
+#define RESET_MIPI_DSI1_PHY 60
+#define RESET_MIPI_DSI0_PHY 61
+#define RESET_MIPI_DSI_A_HOST 62
+#define RESET_MIPI_DSI_B_HOST 63
+
+/* RESET2 */
+#define RESET_DEVICE_MMC_ARB 64
+#define RESET_IR_CTRL 65
+#define RESET_TS_A73 66
+#define RESET_TS_A53 67
+#define RESET_SPICC_2 68
+#define RESET_SPICC_3 69
+#define RESET_SPICC_4 70
+#define RESET_SPICC_5 71
+#define RESET_SMART_CARD 72
+#define RESET_SPICC_0 73
+#define RESET_SPICC_1 74
+#define RESET_RSA 75
+/* 76-79 */
+#define RESET_MSR_CLK 80
+#define RESET_SPIFC 81
+#define RESET_SAR_ADC 82
+#define RESET_BT 83
+/* 84-87 */
+#define RESET_ACODEC 88
+#define RESET_CEC 89
+#define RESET_AFIFO 90
+#define RESET_WATCHDOG 91
+/* 92-95 */
+
+/* RESET3 */
+#define RESET_BRG_NIC1_GPV 96
+#define RESET_BRG_NIC2_GPV 97
+#define RESET_BRG_NIC3_GPV 98
+#define RESET_BRG_NIC4_GPV 99
+#define RESET_BRG_NIC5_GPV 100
+/* 101-121 */
+#define RESET_MIPI_ISP 122
+#define RESET_BRG_ADB_MALI_1 123
+#define RESET_BRG_ADB_MALI_0 124
+#define RESET_BRG_ADB_A73 125
+#define RESET_BRG_ADB_A53 126
+#define RESET_BRG_CCI 127
+
+/* RESET4 */
+#define RESET_PWM_AO_AB 128
+#define RESET_PWM_AO_CD 129
+#define RESET_PWM_AO_EF 130
+#define RESET_PWM_AO_GH 131
+#define RESET_PWM_AB 132
+#define RESET_PWM_CD 133
+#define RESET_PWM_EF 134
+/* 135-137 */
+#define RESET_UART_A 138
+#define RESET_UART_B 139
+#define RESET_UART_C 140
+#define RESET_UART_D 141
+#define RESET_UART_E 142
+#define RESET_UART_F 143
+#define RESET_I2C_S_A 144
+#define RESET_I2C_M_A 145
+#define RESET_I2C_M_B 146
+#define RESET_I2C_M_C 147
+#define RESET_I2C_M_D 148
+#define RESET_I2C_M_E 149
+#define RESET_I2C_M_F 150
+#define RESET_I2C_M_AO_A 151
+#define RESET_SD_EMMC_A 152
+#define RESET_SD_EMMC_B 153
+#define RESET_SD_EMMC_C 154
+#define RESET_I2C_M_AO_B 155
+#define RESET_TS_GPU 156
+#define RESET_TS_NNA 157
+#define RESET_TS_VPN 158
+#define RESET_TS_HEVC 159
+
+/* RESET5 */
+#define RESET_BRG_NOC_DDR_1 160
+#define RESET_BRG_NOC_DDR_0 161
+#define RESET_BRG_NOC_MAIN 162
+#define RESET_BRG_NOC_ALL 163
+/* 164-167 */
+#define RESET_BRG_NIC2_SYS 168
+#define RESET_BRG_NIC2_MAIN 169
+#define RESET_BRG_NIC2_HDMI 170
+#define RESET_BRG_NIC2_ALL 171
+#define RESET_BRG_NIC3_WAVE 172
+#define RESET_BRG_NIC3_VDEC 173
+#define RESET_BRG_NIC3_HEVCF 174
+#define RESET_BRG_NIC3_HEVCB 175
+#define RESET_BRG_NIC3_HCODEC 176
+#define RESET_BRG_NIC3_GE2D 177
+#define RESET_BRG_NIC3_GDC 178
+#define RESET_BRG_NIC3_AMLOGIC 179
+#define RESET_BRG_NIC3_MAIN 180
+#define RESET_BRG_NIC3_ALL 181
+#define RESET_BRG_NIC5_VPU 182
+/* 183-185 */
+#define RESET_BRG_NIC4_DSPB 186
+#define RESET_BRG_NIC4_DSPA 187
+#define RESET_BRG_NIC4_VAPB 188
+#define RESET_BRG_NIC4_CLK81 189
+#define RESET_BRG_NIC4_MAIN 190
+#define RESET_BRG_NIC4_ALL 191
+
+/* RESET6 */
+#define RESET_BRG_VDEC_PIPEL 192
+#define RESET_BRG_HEVCF_DMC_PIPEL 193
+#define RESET_BRG_NIC2TONIC4_PIPEL 194
+#define RESET_BRG_HDMIRXTONIC2_PIPEL 195
+#define RESET_BRG_SECTONIC4_PIPEL 196
+#define RESET_BRG_VPUTONOC_PIPEL 197
+#define RESET_BRG_NIC4TONOC_PIPEL 198
+#define RESET_BRG_NIC3TONOC_PIPEL 199
+#define RESET_BRG_NIC2TONOC_PIPEL 200
+#define RESET_BRG_NNATONOC_PIPEL 201
+#define RESET_BRG_FRISP3_PIPEL 202
+#define RESET_BRG_FRISP2_PIPEL 203
+#define RESET_BRG_FRISP1_PIPEL 204
+#define RESET_BRG_FRISP0_PIPEL 205
+/* 206-217 */
+#define RESET_BRG_AMPIPE_NAND 218
+#define RESET_BRG_AMPIPE_ETH 219
+/* 220 */
+#define RESET_BRG_AM2AXI0 221
+#define RESET_BRG_AM2AXI1 222
+#define RESET_BRG_AM2AXI2 223
+
+#endif /* ___DTS_AMLOGIC_T7_RESET_H */
diff --git a/arch/arm64/boot/dts/amlogic/amlogic-t7.dtsi b/arch/arm64/boot/dts/amlogic/amlogic-t7.dtsi
index 5248bdf824ea60..c23efc6c7ac027 100644
--- a/arch/arm64/boot/dts/amlogic/amlogic-t7.dtsi
+++ b/arch/arm64/boot/dts/amlogic/amlogic-t7.dtsi
@@ -5,6 +5,7 @@
#include <dt-bindings/interrupt-controller/arm-gic.h>
#include <dt-bindings/power/amlogic,t7-pwrc.h>
+#include "amlogic-t7-reset.h"
/ {
interrupt-parent = <&gic>;
@@ -149,6 +150,12 @@
#size-cells = <2>;
ranges = <0x0 0x0 0x0 0xfe000000 0x0 0x480000>;
+ reset: reset-controller@2000 {
+ compatible = "amlogic,t7-reset";
+ reg = <0x0 0x2000 0x0 0x98>;
+ #reset-cells = <1>;
+ };
+
watchdog@2100 {
compatible = "amlogic,t7-wdt";
reg = <0x0 0x2100 0x0 0x10>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi b/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi
index 9d5eab6595d022..b058ed78faf006 100644
--- a/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi
@@ -1663,9 +1663,28 @@
<250000000>,
<0>; /* Do Nothing */
};
+
+ mipi_analog_dphy: phy {
+ compatible = "amlogic,g12a-mipi-dphy-analog";
+ #phy-cells = <0>;
+ status = "disabled";
+ };
};
};
+ mipi_dphy: phy@44000 {
+ compatible = "amlogic,axg-mipi-dphy";
+ reg = <0x0 0x44000 0x0 0x2000>;
+ clocks = <&clkc CLKID_MIPI_DSI_PHY>;
+ clock-names = "pclk";
+ resets = <&reset RESET_MIPI_DSI_PHY>;
+ reset-names = "phy";
+ phys = <&mipi_analog_dphy>;
+ phy-names = "analog";
+ #phy-cells = <0>;
+ status = "disabled";
+ };
+
usb3_pcie_phy: phy@46000 {
compatible = "amlogic,g12a-usb3-pcie-phy";
reg = <0x0 0x46000 0x0 0x2000>;
@@ -2152,6 +2171,15 @@
remote-endpoint = <&hdmi_tx_in>;
};
};
+
+ /* DPI output port */
+ dpi_port: port@2 {
+ reg = <2>;
+
+ dpi_out: endpoint {
+ remote-endpoint = <&mipi_dsi_in>;
+ };
+ };
};
gic: interrupt-controller@ffc01000 {
@@ -2189,6 +2217,48 @@
amlogic,channel-interrupts = <64 65 66 67 68 69 70 71>;
};
+ mipi_dsi: dsi@7000 {
+ compatible = "amlogic,meson-g12a-dw-mipi-dsi";
+ reg = <0x0 0x7000 0x0 0x1000>;
+ resets = <&reset RESET_MIPI_DSI_HOST>;
+ reset-names = "top";
+ clocks = <&clkc CLKID_MIPI_DSI_HOST>,
+ <&clkc CLKID_MIPI_DSI_PXCLK>,
+ <&clkc CLKID_CTS_ENCL>;
+ clock-names = "pclk", "bit", "px";
+ phys = <&mipi_dphy>;
+ phy-names = "dphy";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ status = "disabled";
+
+ assigned-clocks = <&clkc CLKID_MIPI_DSI_PXCLK_SEL>,
+ <&clkc CLKID_CTS_ENCL_SEL>,
+ <&clkc CLKID_VCLK2_SEL>;
+ assigned-clock-parents = <&clkc CLKID_GP0_PLL>,
+ <&clkc CLKID_VCLK2_DIV1>,
+ <&clkc CLKID_GP0_PLL>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ /* VPU VENC Input */
+ mipi_dsi_venc_port: port@0 {
+ reg = <0>;
+
+ mipi_dsi_in: endpoint {
+ remote-endpoint = <&dpi_out>;
+ };
+ };
+
+ /* DSI Output */
+ mipi_dsi_panel_port: port@1 {
+ reg = <1>;
+ };
+ };
+ };
+
watchdog: watchdog@f0d0 {
compatible = "amlogic,meson-gxbb-wdt";
reg = <0x0 0xf0d0 0x0 0x10>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-g12b-bananapi-cm4-mnt-reform2.dts b/arch/arm64/boot/dts/amlogic/meson-g12b-bananapi-cm4-mnt-reform2.dts
new file mode 100644
index 00000000000000..003efed529ba3b
--- /dev/null
+++ b/arch/arm64/boot/dts/amlogic/meson-g12b-bananapi-cm4-mnt-reform2.dts
@@ -0,0 +1,384 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * Copyright (c) 2023 Neil Armstrong <neil.armstrong@linaro.org>
+ * Copyright 2023 MNT Research GmbH
+ */
+
+/dts-v1/;
+
+#include "meson-g12b-bananapi-cm4.dtsi"
+#include <dt-bindings/input/input.h>
+#include <dt-bindings/leds/common.h>
+#include <dt-bindings/sound/meson-g12a-tohdmitx.h>
+
+/ {
+ model = "MNT Reform 2 with BPI-CM4 Module";
+ compatible = "mntre,reform2-cm4", "bananapi,bpi-cm4", "amlogic,a311d", "amlogic,g12b";
+ chassis-type = "laptop";
+
+ aliases {
+ ethernet0 = &ethmac;
+ i2c0 = &i2c1;
+ i2c1 = &i2c3;
+ };
+
+ hdmi_connector: hdmi-connector {
+ compatible = "hdmi-connector";
+ type = "a";
+
+ port {
+ hdmi_connector_in: endpoint {
+ remote-endpoint = <&hdmi_tx_tmds_out>;
+ };
+ };
+ };
+
+ leds {
+ compatible = "gpio-leds";
+
+ led-blue {
+ color = <LED_COLOR_ID_BLUE>;
+ function = LED_FUNCTION_STATUS;
+ gpios = <&gpio_ao GPIOAO_7 GPIO_ACTIVE_HIGH>;
+ linux,default-trigger = "heartbeat";
+ };
+
+ led-green {
+ color = <LED_COLOR_ID_GREEN>;
+ function = LED_FUNCTION_STATUS;
+ gpios = <&gpio_ao GPIOAO_2 GPIO_ACTIVE_HIGH>;
+ };
+ };
+
+ sound {
+ compatible = "amlogic,axg-sound-card";
+ model = "MNT-REFORM2-BPI-CM4";
+ audio-widgets = "Headphone", "Headphone Jack",
+ "Speaker", "External Speaker",
+ "Microphone", "Mic Jack";
+ audio-aux-devs = <&tdmout_a>, <&tdmout_b>, <&tdmin_b>;
+ audio-routing = "TDMOUT_A IN 0", "FRDDR_A OUT 0",
+ "TDMOUT_A IN 1", "FRDDR_B OUT 0",
+ "TDMOUT_A IN 2", "FRDDR_C OUT 0",
+ "TDM_A Playback", "TDMOUT_A OUT",
+ "TDMOUT_B IN 0", "FRDDR_A OUT 1",
+ "TDMOUT_B IN 1", "FRDDR_B OUT 1",
+ "TDMOUT_B IN 2", "FRDDR_C OUT 1",
+ "TDM_B Playback", "TDMOUT_B OUT",
+ "TDMIN_B IN 1", "TDM_B Capture",
+ "TDMIN_B IN 4", "TDM_B Loopback",
+ "TODDR_A IN 1", "TDMIN_B OUT",
+ "TODDR_B IN 1", "TDMIN_B OUT",
+ "TODDR_C IN 1", "TDMIN_B OUT",
+ "Headphone Jack", "HP_L",
+ "Headphone Jack", "HP_R",
+ "External Speaker", "SPK_LP",
+ "External Speaker", "SPK_LN",
+ "External Speaker", "SPK_RP",
+ "External Speaker", "SPK_RN",
+ "LINPUT1", "Mic Jack",
+ "Mic Jack", "MICB";
+
+ assigned-clocks = <&clkc CLKID_MPLL2>,
+ <&clkc CLKID_MPLL0>,
+ <&clkc CLKID_MPLL1>;
+ assigned-clock-parents = <0>, <0>, <0>;
+ assigned-clock-rates = <294912000>,
+ <270950400>,
+ <393216000>;
+
+ dai-link-0 {
+ sound-dai = <&frddr_a>;
+ };
+
+ dai-link-1 {
+ sound-dai = <&frddr_b>;
+ };
+
+ dai-link-2 {
+ sound-dai = <&frddr_c>;
+ };
+
+ dai-link-3 {
+ sound-dai = <&toddr_a>;
+ };
+
+ dai-link-4 {
+ sound-dai = <&toddr_b>;
+ };
+
+ dai-link-5 {
+ sound-dai = <&toddr_c>;
+ };
+
+ /* 8ch hdmi interface */
+ dai-link-6 {
+ sound-dai = <&tdmif_a>;
+ dai-format = "i2s";
+ dai-tdm-slot-tx-mask-0 = <1 1>;
+ dai-tdm-slot-tx-mask-1 = <1 1>;
+ dai-tdm-slot-tx-mask-2 = <1 1>;
+ dai-tdm-slot-tx-mask-3 = <1 1>;
+ mclk-fs = <256>;
+
+ codec {
+ sound-dai = <&tohdmitx TOHDMITX_I2S_IN_A>;
+ };
+ };
+
+ /* Analog Audio */
+ dai-link-7 {
+ sound-dai = <&tdmif_b>;
+ dai-format = "i2s";
+ dai-tdm-slot-tx-mask-0 = <1 1>;
+ mclk-fs = <256>;
+
+ codec {
+ sound-dai = <&wm8960>;
+ };
+ };
+
+ /* hdmi glue */
+ dai-link-8 {
+ sound-dai = <&tohdmitx TOHDMITX_I2S_OUT>;
+
+ codec {
+ sound-dai = <&hdmi_tx>;
+ };
+ };
+ };
+
+ reg_main_1v8: regulator-main-1v8 {
+ compatible = "regulator-fixed";
+ regulator-name = "1V8";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ vin-supply = <&reg_main_3v3>;
+ };
+
+ reg_main_1v2: regulator-main-1v2 {
+ compatible = "regulator-fixed";
+ regulator-name = "1V2";
+ regulator-min-microvolt = <1200000>;
+ regulator-max-microvolt = <1200000>;
+ vin-supply = <&reg_main_5v>;
+ };
+
+ reg_main_3v3: regulator-main-3v3 {
+ compatible = "regulator-fixed";
+ regulator-name = "3V3";
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ };
+
+ reg_main_5v: regulator-main-5v {
+ compatible = "regulator-fixed";
+ regulator-name = "5V";
+ regulator-min-microvolt = <5000000>;
+ regulator-max-microvolt = <5000000>;
+ };
+
+ reg_main_usb: regulator-main-usb {
+ compatible = "regulator-fixed";
+ regulator-name = "USB_PWR";
+ regulator-min-microvolt = <5000000>;
+ regulator-max-microvolt = <5000000>;
+ vin-supply = <&reg_main_5v>;
+ };
+
+ backlight: backlight {
+ compatible = "pwm-backlight";
+ pwms = <&pwm_AO_ab 0 10000 0>;
+ power-supply = <&reg_main_usb>;
+ enable-gpios = <&gpio 58 GPIO_ACTIVE_HIGH>;
+ brightness-levels = <0 32 64 128 160 200 255>;
+ default-brightness-level = <6>;
+ };
+
+ panel {
+ compatible = "innolux,n125hce-gn1";
+ power-supply = <&reg_main_3v3>;
+ backlight = <&backlight>;
+ no-hpd;
+
+ port {
+ panel_in: endpoint {
+ remote-endpoint = <&edp_bridge_out>;
+ };
+ };
+ };
+
+ clock_12288: clock_12288 {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <12288000>;
+ };
+};
+
+&mipi_analog_dphy {
+ status = "okay";
+};
+
+&mipi_dphy {
+ status = "okay";
+};
+
+&mipi_dsi {
+ status = "okay";
+
+ assigned-clocks = <&clkc CLKID_GP0_PLL>,
+ <&clkc CLKID_MIPI_DSI_PXCLK_SEL>,
+ <&clkc CLKID_MIPI_DSI_PXCLK>,
+ <&clkc CLKID_CTS_ENCL_SEL>,
+ <&clkc CLKID_VCLK2_SEL>;
+ assigned-clock-parents = <0>,
+ <&clkc CLKID_GP0_PLL>,
+ <0>,
+ <&clkc CLKID_VCLK2_DIV1>,
+ <&clkc CLKID_GP0_PLL>;
+ assigned-clock-rates = <936000000>,
+ <0>,
+ <936000000>,
+ <0>,
+ <0>;
+};
+
+&mipi_dsi_panel_port {
+ mipi_dsi_out: endpoint {
+ remote-endpoint = <&edp_bridge_in>;
+ };
+};
+
+&cecb_AO {
+ status = "okay";
+};
+
+&ethmac {
+ status = "okay";
+};
+
+&hdmi_tx {
+ status = "okay";
+};
+
+&hdmi_tx_tmds_port {
+ hdmi_tx_tmds_out: endpoint {
+ remote-endpoint = <&hdmi_connector_in>;
+ };
+};
+
+&pwm_AO_ab {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pwm_ao_a_pins>;
+ status = "okay";
+};
+
+&i2c0 {
+ status = "okay";
+};
+
+&i2c3 {
+ status = "okay";
+
+ edp_bridge: bridge@2c {
+ compatible = "ti,sn65dsi86";
+ reg = <0x2c>;
+ enable-gpios = <&gpio GPIOX_10 GPIO_ACTIVE_HIGH>; // PIN_24 / GPIO8
+ vccio-supply = <&reg_main_1v8>;
+ vpll-supply = <&reg_main_1v8>;
+ vcca-supply = <&reg_main_1v2>;
+ vcc-supply = <&reg_main_1v2>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ edp_bridge_in: endpoint {
+ remote-endpoint = <&mipi_dsi_out>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+
+ edp_bridge_out: endpoint {
+ remote-endpoint = <&panel_in>;
+ };
+ };
+ };
+ };
+};
+
+&i2c2 {
+ status = "okay";
+
+ wm8960: codec@1a {
+ compatible = "wlf,wm8960";
+ reg = <0x1a>;
+ clocks = <&clock_12288>;
+ clock-names = "mclk";
+ #sound-dai-cells = <0>;
+ wlf,shared-lrclk;
+ };
+
+ rtc@68 {
+ compatible = "nxp,pcf8523";
+ reg = <0x68>;
+ };
+};
+
+&pcie {
+ status = "okay";
+};
+
+&sd_emmc_b {
+ status = "okay";
+};
+
+&tdmif_a {
+ status = "okay";
+};
+
+&tdmout_a {
+ status = "okay";
+};
+
+&tdmif_b {
+ pinctrl-0 = <&tdm_b_dout0_pins>, <&tdm_b_fs_pins>, <&tdm_b_sclk_pins>, <&tdm_b_din1_pins>;
+ pinctrl-names = "default";
+
+ assigned-clocks = <&clkc_audio AUD_CLKID_TDM_SCLK_PAD1>,
+ <&clkc_audio AUD_CLKID_TDM_LRCLK_PAD1>;
+ assigned-clock-parents = <&clkc_audio AUD_CLKID_MST_B_SCLK>,
+ <&clkc_audio AUD_CLKID_MST_B_LRCLK>;
+ assigned-clock-rates = <0>, <0>;
+};
+
+&tdmin_b {
+ status = "okay";
+};
+
+&toddr_a {
+ status = "okay";
+};
+
+&toddr_b {
+ status = "okay";
+};
+
+&toddr_c {
+ status = "okay";
+};
+
+&tohdmitx {
+ status = "okay";
+};
+
+&usb {
+ dr_mode = "host";
+
+ status = "okay";
+};
diff --git a/arch/arm64/boot/dts/amlogic/meson-khadas-vim3-ts050.dtso b/arch/arm64/boot/dts/amlogic/meson-khadas-vim3-ts050.dtso
new file mode 100644
index 00000000000000..a41b4e619580f7
--- /dev/null
+++ b/arch/arm64/boot/dts/amlogic/meson-khadas-vim3-ts050.dtso
@@ -0,0 +1,108 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * Copyright (c) 2019 BayLibre, SAS
+ * Author: Neil Armstrong <narmstrong@baylibre.com>
+ */
+
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/clock/g12a-clkc.h>
+#include <dt-bindings/interrupt-controller/irq.h>
+#include <dt-bindings/interrupt-controller/arm-gic.h>
+#include <dt-bindings/interrupt-controller/amlogic,meson-g12a-gpio-intc.h>
+
+/dts-v1/;
+/plugin/;
+
+/*
+ * Enable Khadas TS050 DSI Panel + Touch Controller
+ * on Khadas VIM3 (A311D) and VIM3L (S905D3)
+ */
+
+&{/} {
+ panel_backlight: backlight {
+ compatible = "pwm-backlight";
+ pwms = <&pwm_AO_cd 0 25000 0>;
+ brightness-levels = <0 255>;
+ num-interpolated-steps = <255>;
+ default-brightness-level = <200>;
+ };
+};
+
+&i2c3 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ pinctrl-0 = <&i2c3_sda_a_pins>, <&i2c3_sck_a_pins>;
+ pinctrl-names = "default";
+ status = "okay";
+
+ touch-controller@38 {
+ compatible = "edt,edt-ft5206";
+ reg = <0x38>;
+ interrupt-parent = <&gpio_intc>;
+ interrupts = <IRQID_GPIOA_5 IRQ_TYPE_EDGE_FALLING>;
+ reset-gpios = <&gpio_expander 6 GPIO_ACTIVE_LOW>;
+ touchscreen-size-x = <1080>;
+ touchscreen-size-y = <1920>;
+ status = "okay";
+ };
+};
+
+&mipi_dsi {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ status = "okay";
+
+ assigned-clocks = <&clkc CLKID_GP0_PLL>,
+ <&clkc CLKID_MIPI_DSI_PXCLK_SEL>,
+ <&clkc CLKID_MIPI_DSI_PXCLK>,
+ <&clkc CLKID_CTS_ENCL_SEL>,
+ <&clkc CLKID_VCLK2_SEL>;
+ assigned-clock-parents = <0>,
+ <&clkc CLKID_GP0_PLL>,
+ <0>,
+ <&clkc CLKID_VCLK2_DIV1>,
+ <&clkc CLKID_GP0_PLL>;
+ assigned-clock-rates = <960000000>,
+ <0>,
+ <960000000>,
+ <0>,
+ <0>;
+
+ panel@0 {
+ compatible = "khadas,ts050";
+ reset-gpios = <&gpio_expander 0 GPIO_ACTIVE_LOW>;
+ enable-gpios = <&gpio_expander 1 GPIO_ACTIVE_HIGH>;
+ power-supply = <&vcc_3v3>;
+ backlight = <&panel_backlight>;
+ reg = <0>;
+
+ port {
+ mipi_in_panel: endpoint {
+ remote-endpoint = <&mipi_out_panel>;
+ };
+ };
+ };
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@1 {
+ mipi_out_panel: endpoint {
+ remote-endpoint = <&mipi_in_panel>;
+ };
+ };
+ };
+};
+
+&mipi_analog_dphy {
+ status = "okay";
+};
+
+&mipi_dphy {
+ status = "okay";
+};
+
+&pwm_AO_cd {
+ pinctrl-0 = <&pwm_ao_c_6_pins>, <&pwm_ao_d_e_pins>;
+};
diff --git a/arch/arm64/boot/dts/amlogic/meson-s4.dtsi b/arch/arm64/boot/dts/amlogic/meson-s4.dtsi
index ce90b35686a212..10896f9df682d8 100644
--- a/arch/arm64/boot/dts/amlogic/meson-s4.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-s4.dtsi
@@ -65,10 +65,15 @@
#clock-cells = <0>;
};
- pwrc: power-controller {
- compatible = "amlogic,meson-s4-pwrc";
- #power-domain-cells = <1>;
- status = "okay";
+ firmware {
+ sm: secure-monitor {
+ compatible = "amlogic,meson-gxbb-sm";
+
+ pwrc: power-controller {
+ compatible = "amlogic,meson-s4-pwrc";
+ #power-domain-cells = <1>;
+ };
+ };
};
soc {
diff --git a/arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi b/arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi
index 3c42240e78e245..4aaf5a0c1ed8af 100644
--- a/arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi
@@ -41,7 +41,7 @@ conn_subsys: bus@5b000000 {
interrupts = <GIC_SPI 267 IRQ_TYPE_LEVEL_HIGH>;
fsl,usbphy = <&usbphy1>;
fsl,usbmisc = <&usbmisc1 0>;
- clocks = <&usb2_lpcg 0>;
+ clocks = <&usb2_lpcg IMX_LPCG_CLK_6>;
ahb-burst-config = <0x0>;
tx-burst-size-dword = <0x10>;
rx-burst-size-dword = <0x10>;
@@ -58,7 +58,7 @@ conn_subsys: bus@5b000000 {
usbphy1: usbphy@5b100000 {
compatible = "fsl,imx7ulp-usbphy";
reg = <0x5b100000 0x1000>;
- clocks = <&usb2_lpcg 1>;
+ clocks = <&usb2_lpcg IMX_LPCG_CLK_7>;
power-domains = <&pd IMX_SC_R_USB_0_PHY>;
status = "disabled";
};
@@ -67,8 +67,8 @@ conn_subsys: bus@5b000000 {
interrupts = <GIC_SPI 232 IRQ_TYPE_LEVEL_HIGH>;
reg = <0x5b010000 0x10000>;
clocks = <&sdhc0_lpcg IMX_LPCG_CLK_4>,
- <&sdhc0_lpcg IMX_LPCG_CLK_0>,
- <&sdhc0_lpcg IMX_LPCG_CLK_5>;
+ <&sdhc0_lpcg IMX_LPCG_CLK_5>,
+ <&sdhc0_lpcg IMX_LPCG_CLK_0>;
clock-names = "ipg", "ahb", "per";
power-domains = <&pd IMX_SC_R_SDHC_0>;
status = "disabled";
@@ -78,8 +78,8 @@ conn_subsys: bus@5b000000 {
interrupts = <GIC_SPI 233 IRQ_TYPE_LEVEL_HIGH>;
reg = <0x5b020000 0x10000>;
clocks = <&sdhc1_lpcg IMX_LPCG_CLK_4>,
- <&sdhc1_lpcg IMX_LPCG_CLK_0>,
- <&sdhc1_lpcg IMX_LPCG_CLK_5>;
+ <&sdhc1_lpcg IMX_LPCG_CLK_5>,
+ <&sdhc1_lpcg IMX_LPCG_CLK_0>;
clock-names = "ipg", "ahb", "per";
power-domains = <&pd IMX_SC_R_SDHC_1>;
fsl,tuning-start-tap = <20>;
@@ -91,8 +91,8 @@ conn_subsys: bus@5b000000 {
interrupts = <GIC_SPI 234 IRQ_TYPE_LEVEL_HIGH>;
reg = <0x5b030000 0x10000>;
clocks = <&sdhc2_lpcg IMX_LPCG_CLK_4>,
- <&sdhc2_lpcg IMX_LPCG_CLK_0>,
- <&sdhc2_lpcg IMX_LPCG_CLK_5>;
+ <&sdhc2_lpcg IMX_LPCG_CLK_5>,
+ <&sdhc2_lpcg IMX_LPCG_CLK_0>;
clock-names = "ipg", "ahb", "per";
power-domains = <&pd IMX_SC_R_SDHC_2>;
status = "disabled";
diff --git a/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi b/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi
index cab3468b1875ee..f7a91d43a0ffe1 100644
--- a/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi
@@ -28,8 +28,8 @@ dma_subsys: bus@5a000000 {
#size-cells = <0>;
interrupts = <GIC_SPI 336 IRQ_TYPE_LEVEL_HIGH>;
interrupt-parent = <&gic>;
- clocks = <&spi0_lpcg 0>,
- <&spi0_lpcg 1>;
+ clocks = <&spi0_lpcg IMX_LPCG_CLK_0>,
+ <&spi0_lpcg IMX_LPCG_CLK_4>;
clock-names = "per", "ipg";
assigned-clocks = <&clk IMX_SC_R_SPI_0 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <60000000>;
@@ -44,8 +44,8 @@ dma_subsys: bus@5a000000 {
#size-cells = <0>;
interrupts = <GIC_SPI 337 IRQ_TYPE_LEVEL_HIGH>;
interrupt-parent = <&gic>;
- clocks = <&spi1_lpcg 0>,
- <&spi1_lpcg 1>;
+ clocks = <&spi1_lpcg IMX_LPCG_CLK_0>,
+ <&spi1_lpcg IMX_LPCG_CLK_4>;
clock-names = "per", "ipg";
assigned-clocks = <&clk IMX_SC_R_SPI_1 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <60000000>;
@@ -60,8 +60,8 @@ dma_subsys: bus@5a000000 {
#size-cells = <0>;
interrupts = <GIC_SPI 338 IRQ_TYPE_LEVEL_HIGH>;
interrupt-parent = <&gic>;
- clocks = <&spi2_lpcg 0>,
- <&spi2_lpcg 1>;
+ clocks = <&spi2_lpcg IMX_LPCG_CLK_0>,
+ <&spi2_lpcg IMX_LPCG_CLK_4>;
clock-names = "per", "ipg";
assigned-clocks = <&clk IMX_SC_R_SPI_2 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <60000000>;
@@ -76,8 +76,8 @@ dma_subsys: bus@5a000000 {
#size-cells = <0>;
interrupts = <GIC_SPI 339 IRQ_TYPE_LEVEL_HIGH>;
interrupt-parent = <&gic>;
- clocks = <&spi3_lpcg 0>,
- <&spi3_lpcg 1>;
+ clocks = <&spi3_lpcg IMX_LPCG_CLK_0>,
+ <&spi3_lpcg IMX_LPCG_CLK_4>;
clock-names = "per", "ipg";
assigned-clocks = <&clk IMX_SC_R_SPI_3 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <60000000>;
@@ -145,8 +145,8 @@ dma_subsys: bus@5a000000 {
compatible = "fsl,imx8qxp-pwm", "fsl,imx27-pwm";
reg = <0x5a190000 0x1000>;
interrupts = <GIC_SPI 127 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&adma_pwm_lpcg 1>,
- <&adma_pwm_lpcg 0>;
+ clocks = <&adma_pwm_lpcg IMX_LPCG_CLK_4>,
+ <&adma_pwm_lpcg IMX_LPCG_CLK_0>;
clock-names = "ipg", "per";
assigned-clocks = <&clk IMX_SC_R_LCD_0_PWM_0 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <24000000>;
@@ -355,8 +355,8 @@ dma_subsys: bus@5a000000 {
reg = <0x5a880000 0x10000>;
interrupts = <GIC_SPI 240 IRQ_TYPE_LEVEL_HIGH>;
interrupt-parent = <&gic>;
- clocks = <&adc0_lpcg 0>,
- <&adc0_lpcg 1>;
+ clocks = <&adc0_lpcg IMX_LPCG_CLK_0>,
+ <&adc0_lpcg IMX_LPCG_CLK_4>;
clock-names = "per", "ipg";
assigned-clocks = <&clk IMX_SC_R_ADC_0 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <24000000>;
@@ -370,8 +370,8 @@ dma_subsys: bus@5a000000 {
reg = <0x5a890000 0x10000>;
interrupts = <GIC_SPI 241 IRQ_TYPE_LEVEL_HIGH>;
interrupt-parent = <&gic>;
- clocks = <&adc1_lpcg 0>,
- <&adc1_lpcg 1>;
+ clocks = <&adc1_lpcg IMX_LPCG_CLK_0>,
+ <&adc1_lpcg IMX_LPCG_CLK_4>;
clock-names = "per", "ipg";
assigned-clocks = <&clk IMX_SC_R_ADC_1 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <24000000>;
@@ -384,8 +384,8 @@ dma_subsys: bus@5a000000 {
reg = <0x5a8d0000 0x10000>;
interrupts = <GIC_SPI 235 IRQ_TYPE_LEVEL_HIGH>;
interrupt-parent = <&gic>;
- clocks = <&can0_lpcg 1>,
- <&can0_lpcg 0>;
+ clocks = <&can0_lpcg IMX_LPCG_CLK_4>,
+ <&can0_lpcg IMX_LPCG_CLK_0>;
clock-names = "ipg", "per";
assigned-clocks = <&clk IMX_SC_R_CAN_0 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <40000000>;
@@ -405,8 +405,8 @@ dma_subsys: bus@5a000000 {
* CAN1 shares CAN0's clock and to enable CAN0's clock it
* has to be powered on.
*/
- clocks = <&can0_lpcg 1>,
- <&can0_lpcg 0>;
+ clocks = <&can0_lpcg IMX_LPCG_CLK_4>,
+ <&can0_lpcg IMX_LPCG_CLK_0>;
clock-names = "ipg", "per";
assigned-clocks = <&clk IMX_SC_R_CAN_0 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <40000000>;
@@ -426,8 +426,8 @@ dma_subsys: bus@5a000000 {
* CAN2 shares CAN0's clock and to enable CAN0's clock it
* has to be powered on.
*/
- clocks = <&can0_lpcg 1>,
- <&can0_lpcg 0>;
+ clocks = <&can0_lpcg IMX_LPCG_CLK_4>,
+ <&can0_lpcg IMX_LPCG_CLK_0>;
clock-names = "ipg", "per";
assigned-clocks = <&clk IMX_SC_R_CAN_0 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <40000000>;
diff --git a/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi b/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi
index 7e510b21bbac55..764c1a08e3b118 100644
--- a/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi
@@ -25,8 +25,8 @@ lsio_subsys: bus@5d000000 {
compatible = "fsl,imx27-pwm";
reg = <0x5d000000 0x10000>;
clock-names = "ipg", "per";
- clocks = <&pwm0_lpcg 4>,
- <&pwm0_lpcg 1>;
+ clocks = <&pwm0_lpcg IMX_LPCG_CLK_6>,
+ <&pwm0_lpcg IMX_LPCG_CLK_1>;
assigned-clocks = <&clk IMX_SC_R_PWM_0 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <24000000>;
#pwm-cells = <3>;
@@ -38,8 +38,8 @@ lsio_subsys: bus@5d000000 {
compatible = "fsl,imx27-pwm";
reg = <0x5d010000 0x10000>;
clock-names = "ipg", "per";
- clocks = <&pwm1_lpcg 4>,
- <&pwm1_lpcg 1>;
+ clocks = <&pwm1_lpcg IMX_LPCG_CLK_6>,
+ <&pwm1_lpcg IMX_LPCG_CLK_1>;
assigned-clocks = <&clk IMX_SC_R_PWM_1 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <24000000>;
#pwm-cells = <3>;
@@ -51,8 +51,8 @@ lsio_subsys: bus@5d000000 {
compatible = "fsl,imx27-pwm";
reg = <0x5d020000 0x10000>;
clock-names = "ipg", "per";
- clocks = <&pwm2_lpcg 4>,
- <&pwm2_lpcg 1>;
+ clocks = <&pwm2_lpcg IMX_LPCG_CLK_6>,
+ <&pwm2_lpcg IMX_LPCG_CLK_1>;
assigned-clocks = <&clk IMX_SC_R_PWM_2 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <24000000>;
#pwm-cells = <3>;
@@ -64,8 +64,8 @@ lsio_subsys: bus@5d000000 {
compatible = "fsl,imx27-pwm";
reg = <0x5d030000 0x10000>;
clock-names = "ipg", "per";
- clocks = <&pwm3_lpcg 4>,
- <&pwm3_lpcg 1>;
+ clocks = <&pwm3_lpcg IMX_LPCG_CLK_6>,
+ <&pwm3_lpcg IMX_LPCG_CLK_1>;
assigned-clocks = <&clk IMX_SC_R_PWM_3 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <24000000>;
#pwm-cells = <3>;
diff --git a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi
index 41c79d2ebdd620..f24b14744799e1 100644
--- a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi
@@ -14,6 +14,7 @@
pinctrl-0 = <&pinctrl_usbcon1>;
type = "micro";
label = "otg";
+ vbus-supply = <&reg_usb1_vbus>;
id-gpios = <&gpio3 21 GPIO_ACTIVE_HIGH>;
port {
@@ -183,7 +184,6 @@
};
&usb3_phy0 {
- vbus-supply = <&reg_usb1_vbus>;
status = "okay";
};
diff --git a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi
index d5c400b355af56..f5491a608b2f37 100644
--- a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi
@@ -14,6 +14,7 @@
pinctrl-0 = <&pinctrl_usbcon1>;
type = "micro";
label = "otg";
+ vbus-supply = <&reg_usb1_vbus>;
id-gpios = <&gpio3 21 GPIO_ACTIVE_HIGH>;
port {
@@ -202,7 +203,6 @@
};
&usb3_phy0 {
- vbus-supply = <&reg_usb1_vbus>;
status = "okay";
};
diff --git a/arch/arm64/boot/dts/freescale/imx8mp.dtsi b/arch/arm64/boot/dts/freescale/imx8mp.dtsi
index bfc5c81a5bd4eb..8141926e4ef142 100644
--- a/arch/arm64/boot/dts/freescale/imx8mp.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mp.dtsi
@@ -1672,7 +1672,7 @@
<&clk IMX8MP_CLK_MEDIA_MIPI_PHY1_REF_ROOT>,
<&clk IMX8MP_CLK_MEDIA_AXI_ROOT>;
clock-names = "pclk", "wrap", "phy", "axi";
- assigned-clocks = <&clk IMX8MP_CLK_MEDIA_CAM1_PIX>,
+ assigned-clocks = <&clk IMX8MP_CLK_MEDIA_CAM2_PIX>,
<&clk IMX8MP_CLK_MEDIA_MIPI_PHY1_REF>;
assigned-clock-parents = <&clk IMX8MP_SYS_PLL2_1000M>,
<&clk IMX8MP_CLK_24M>;
diff --git a/arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi b/arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi
index 11626fae5f97f3..aa9f28c4431d02 100644
--- a/arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi
@@ -153,15 +153,15 @@
};
&flexcan2 {
- clocks = <&can1_lpcg 1>,
- <&can1_lpcg 0>;
+ clocks = <&can1_lpcg IMX_LPCG_CLK_4>,
+ <&can1_lpcg IMX_LPCG_CLK_0>;
assigned-clocks = <&clk IMX_SC_R_CAN_1 IMX_SC_PM_CLK_PER>;
fsl,clk-source = /bits/ 8 <1>;
};
&flexcan3 {
- clocks = <&can2_lpcg 1>,
- <&can2_lpcg 0>;
+ clocks = <&can2_lpcg IMX_LPCG_CLK_4>,
+ <&can2_lpcg IMX_LPCG_CLK_0>;
assigned-clocks = <&clk IMX_SC_R_CAN_2 IMX_SC_PM_CLK_PER>;
fsl,clk-source = /bits/ 8 <1>;
};
diff --git a/arch/arm64/boot/dts/mediatek/mt2712-evb.dts b/arch/arm64/boot/dts/mediatek/mt2712-evb.dts
index 0c38f7b5176377..234e3b23d7a8d3 100644
--- a/arch/arm64/boot/dts/mediatek/mt2712-evb.dts
+++ b/arch/arm64/boot/dts/mediatek/mt2712-evb.dts
@@ -129,7 +129,7 @@
};
&pio {
- eth_default: eth_default {
+ eth_default: eth-default-pins {
tx_pins {
pinmux = <MT2712_PIN_71_GBE_TXD3__FUNC_GBE_TXD3>,
<MT2712_PIN_72_GBE_TXD2__FUNC_GBE_TXD2>,
@@ -156,7 +156,7 @@
};
};
- eth_sleep: eth_sleep {
+ eth_sleep: eth-sleep-pins {
tx_pins {
pinmux = <MT2712_PIN_71_GBE_TXD3__FUNC_GPIO71>,
<MT2712_PIN_72_GBE_TXD2__FUNC_GPIO72>,
@@ -182,14 +182,14 @@
};
};
- usb0_id_pins_float: usb0_iddig {
+ usb0_id_pins_float: usb0-iddig-pins {
pins_iddig {
pinmux = <MT2712_PIN_12_IDDIG_P0__FUNC_IDDIG_A>;
bias-pull-up;
};
};
- usb1_id_pins_float: usb1_iddig {
+ usb1_id_pins_float: usb1-iddig-pins {
pins_iddig {
pinmux = <MT2712_PIN_14_IDDIG_P1__FUNC_IDDIG_B>;
bias-pull-up;
diff --git a/arch/arm64/boot/dts/mediatek/mt2712e.dtsi b/arch/arm64/boot/dts/mediatek/mt2712e.dtsi
index 6d218caa198cfd..082672efba0a34 100644
--- a/arch/arm64/boot/dts/mediatek/mt2712e.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt2712e.dtsi
@@ -249,10 +249,11 @@
#clock-cells = <1>;
};
- infracfg: syscon@10001000 {
+ infracfg: clock-controller@10001000 {
compatible = "mediatek,mt2712-infracfg", "syscon";
reg = <0 0x10001000 0 0x1000>;
#clock-cells = <1>;
+ #reset-cells = <1>;
};
pericfg: syscon@10003000 {
diff --git a/arch/arm64/boot/dts/mediatek/mt7622.dtsi b/arch/arm64/boot/dts/mediatek/mt7622.dtsi
index 3ee9266fa8e985..917fa39a74f8d7 100644
--- a/arch/arm64/boot/dts/mediatek/mt7622.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt7622.dtsi
@@ -252,7 +252,7 @@
clock-names = "hif_sel";
};
- cir: cir@10009000 {
+ cir: ir-receiver@10009000 {
compatible = "mediatek,mt7622-cir";
reg = <0 0x10009000 0 0x1000>;
interrupts = <GIC_SPI 175 IRQ_TYPE_LEVEL_LOW>;
@@ -283,16 +283,14 @@
};
};
- apmixedsys: apmixedsys@10209000 {
- compatible = "mediatek,mt7622-apmixedsys",
- "syscon";
+ apmixedsys: clock-controller@10209000 {
+ compatible = "mediatek,mt7622-apmixedsys";
reg = <0 0x10209000 0 0x1000>;
#clock-cells = <1>;
};
- topckgen: topckgen@10210000 {
- compatible = "mediatek,mt7622-topckgen",
- "syscon";
+ topckgen: clock-controller@10210000 {
+ compatible = "mediatek,mt7622-topckgen";
reg = <0 0x10210000 0 0x1000>;
#clock-cells = <1>;
};
@@ -515,7 +513,6 @@
<&pericfg CLK_PERI_AUXADC_PD>;
clock-names = "therm", "auxadc";
resets = <&pericfg MT7622_PERI_THERM_SW_RST>;
- reset-names = "therm";
mediatek,auxadc = <&auxadc>;
mediatek,apmixedsys = <&apmixedsys>;
nvmem-cells = <&thermal_calibration>;
@@ -734,9 +731,8 @@
power-domains = <&scpsys MT7622_POWER_DOMAIN_WB>;
};
- ssusbsys: ssusbsys@1a000000 {
- compatible = "mediatek,mt7622-ssusbsys",
- "syscon";
+ ssusbsys: clock-controller@1a000000 {
+ compatible = "mediatek,mt7622-ssusbsys";
reg = <0 0x1a000000 0 0x1000>;
#clock-cells = <1>;
#reset-cells = <1>;
@@ -793,9 +789,8 @@
};
};
- pciesys: pciesys@1a100800 {
- compatible = "mediatek,mt7622-pciesys",
- "syscon";
+ pciesys: clock-controller@1a100800 {
+ compatible = "mediatek,mt7622-pciesys";
reg = <0 0x1a100800 0 0x1000>;
#clock-cells = <1>;
#reset-cells = <1>;
@@ -921,12 +916,13 @@
};
};
- hifsys: syscon@1af00000 {
- compatible = "mediatek,mt7622-hifsys", "syscon";
+ hifsys: clock-controller@1af00000 {
+ compatible = "mediatek,mt7622-hifsys";
reg = <0 0x1af00000 0 0x70>;
+ #clock-cells = <1>;
};
- ethsys: syscon@1b000000 {
+ ethsys: clock-controller@1b000000 {
compatible = "mediatek,mt7622-ethsys",
"syscon";
reg = <0 0x1b000000 0 0x1000>;
@@ -966,9 +962,7 @@
};
eth: ethernet@1b100000 {
- compatible = "mediatek,mt7622-eth",
- "mediatek,mt2701-eth",
- "syscon";
+ compatible = "mediatek,mt7622-eth";
reg = <0 0x1b100000 0 0x20000>;
interrupts = <GIC_SPI 223 IRQ_TYPE_LEVEL_LOW>,
<GIC_SPI 224 IRQ_TYPE_LEVEL_LOW>,
diff --git a/arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dts b/arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dts
index e04b1c0c0ebbfb..ed79ad1ae8716e 100644
--- a/arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dts
+++ b/arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dts
@@ -146,19 +146,19 @@
&cpu_thermal {
cooling-maps {
- cpu-active-high {
+ map-cpu-active-high {
/* active: set fan to cooling level 2 */
cooling-device = <&fan 2 2>;
trip = <&cpu_trip_active_high>;
};
- cpu-active-med {
+ map-cpu-active-med {
/* active: set fan to cooling level 1 */
cooling-device = <&fan 1 1>;
trip = <&cpu_trip_active_med>;
};
- cpu-active-low {
+ map-cpu-active-low {
/* active: set fan to cooling level 0 */
cooling-device = <&fan 0 0>;
trip = <&cpu_trip_active_low>;
diff --git a/arch/arm64/boot/dts/mediatek/mt7986a.dtsi b/arch/arm64/boot/dts/mediatek/mt7986a.dtsi
index b3f416b9a7a4da..559990dcd1d179 100644
--- a/arch/arm64/boot/dts/mediatek/mt7986a.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt7986a.dtsi
@@ -332,9 +332,8 @@
reg = <0 0x1100c800 0 0x800>;
interrupts = <GIC_SPI 138 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&infracfg CLK_INFRA_THERM_CK>,
- <&infracfg CLK_INFRA_ADC_26M_CK>,
- <&infracfg CLK_INFRA_ADC_FRC_CK>;
- clock-names = "therm", "auxadc", "adc_32k";
+ <&infracfg CLK_INFRA_ADC_26M_CK>;
+ clock-names = "therm", "auxadc";
nvmem-cells = <&thermal_calibration>;
nvmem-cell-names = "calibration-data";
#thermal-sensor-cells = <1>;
@@ -492,8 +491,6 @@
compatible = "mediatek,mt7986-ethsys",
"syscon";
reg = <0 0x15000000 0 0x1000>;
- #address-cells = <1>;
- #size-cells = <1>;
#clock-cells = <1>;
#reset-cells = <1>;
};
@@ -556,7 +553,6 @@
<&topckgen CLK_TOP_SGM_325M_SEL>;
assigned-clock-parents = <&apmixedsys CLK_APMIXED_NET2PLL>,
<&apmixedsys CLK_APMIXED_SGMPLL>;
- #reset-cells = <1>;
#address-cells = <1>;
#size-cells = <0>;
mediatek,ethsys = <&ethsys>;
diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi b/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi
index 6bd7424ef66c53..100191c6453ba3 100644
--- a/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi
@@ -433,7 +433,6 @@
};
&mt6358_vgpu_reg {
- regulator-min-microvolt = <625000>;
regulator-max-microvolt = <900000>;
regulator-coupled-with = <&mt6358_vsram_gpu_reg>;
diff --git a/arch/arm64/boot/dts/mediatek/mt8183.dtsi b/arch/arm64/boot/dts/mediatek/mt8183.dtsi
index 93dfbf1302315d..774ae5d9143f1e 100644
--- a/arch/arm64/boot/dts/mediatek/mt8183.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt8183.dtsi
@@ -1637,6 +1637,7 @@
compatible = "mediatek,mt8183-mfgcfg", "syscon";
reg = <0 0x13000000 0 0x1000>;
#clock-cells = <1>;
+ power-domains = <&spm MT8183_POWER_DOMAIN_MFG_ASYNC>;
};
gpu: gpu@13040000 {
diff --git a/arch/arm64/boot/dts/mediatek/mt8186-corsola.dtsi b/arch/arm64/boot/dts/mediatek/mt8186-corsola.dtsi
index 3dea28f1d80612..1807e9d6cb0e41 100644
--- a/arch/arm64/boot/dts/mediatek/mt8186-corsola.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt8186-corsola.dtsi
@@ -1296,7 +1296,7 @@
* regulator coupling requirements.
*/
regulator-name = "ppvar_dvdd_vgpu";
- regulator-min-microvolt = <600000>;
+ regulator-min-microvolt = <500000>;
regulator-max-microvolt = <950000>;
regulator-ramp-delay = <6250>;
regulator-enable-ramp-delay = <200>;
diff --git a/arch/arm64/boot/dts/mediatek/mt8192-asurada.dtsi b/arch/arm64/boot/dts/mediatek/mt8192-asurada.dtsi
index 9b738f6a5d213a..7a704246678f03 100644
--- a/arch/arm64/boot/dts/mediatek/mt8192-asurada.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt8192-asurada.dtsi
@@ -1421,7 +1421,7 @@
mt6315_6_vbuck1: vbuck1 {
regulator-compatible = "vbuck1";
regulator-name = "Vbcpu";
- regulator-min-microvolt = <300000>;
+ regulator-min-microvolt = <400000>;
regulator-max-microvolt = <1193750>;
regulator-enable-ramp-delay = <256>;
regulator-allowed-modes = <0 1 2>;
@@ -1431,7 +1431,7 @@
mt6315_6_vbuck3: vbuck3 {
regulator-compatible = "vbuck3";
regulator-name = "Vlcpu";
- regulator-min-microvolt = <300000>;
+ regulator-min-microvolt = <400000>;
regulator-max-microvolt = <1193750>;
regulator-enable-ramp-delay = <256>;
regulator-allowed-modes = <0 1 2>;
@@ -1448,7 +1448,7 @@
mt6315_7_vbuck1: vbuck1 {
regulator-compatible = "vbuck1";
regulator-name = "Vgpu";
- regulator-min-microvolt = <606250>;
+ regulator-min-microvolt = <400000>;
regulator-max-microvolt = <800000>;
regulator-enable-ramp-delay = <256>;
regulator-allowed-modes = <0 1 2>;
diff --git a/arch/arm64/boot/dts/mediatek/mt8192.dtsi b/arch/arm64/boot/dts/mediatek/mt8192.dtsi
index 05e401670bced3..84cbdf6e9eb0ca 100644
--- a/arch/arm64/boot/dts/mediatek/mt8192.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt8192.dtsi
@@ -1464,6 +1464,7 @@
reg = <0 0x14001000 0 0x1000>;
interrupts = <GIC_SPI 252 IRQ_TYPE_LEVEL_HIGH 0>;
clocks = <&mmsys CLK_MM_DISP_MUTEX0>;
+ mediatek,gce-client-reg = <&gce SUBSYS_1400XXXX 0x1000 0x1000>;
mediatek,gce-events = <CMDQ_EVENT_DISP_STREAM_DONE_ENG_EVENT_0>,
<CMDQ_EVENT_DISP_STREAM_DONE_ENG_EVENT_1>;
power-domains = <&spm MT8192_POWER_DOMAIN_DISP>;
diff --git a/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi b/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi
index f94c07f8b9334e..4a11918da37048 100644
--- a/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi
@@ -264,6 +264,38 @@
status = "okay";
};
+&cpu0 {
+ cpu-supply = <&mt6359_vcore_buck_reg>;
+};
+
+&cpu1 {
+ cpu-supply = <&mt6359_vcore_buck_reg>;
+};
+
+&cpu2 {
+ cpu-supply = <&mt6359_vcore_buck_reg>;
+};
+
+&cpu3 {
+ cpu-supply = <&mt6359_vcore_buck_reg>;
+};
+
+&cpu4 {
+ cpu-supply = <&mt6315_6_vbuck1>;
+};
+
+&cpu5 {
+ cpu-supply = <&mt6315_6_vbuck1>;
+};
+
+&cpu6 {
+ cpu-supply = <&mt6315_6_vbuck1>;
+};
+
+&cpu7 {
+ cpu-supply = <&mt6315_6_vbuck1>;
+};
+
&dp_intf0 {
status = "okay";
@@ -1214,7 +1246,7 @@
mt6315_6_vbuck1: vbuck1 {
regulator-compatible = "vbuck1";
regulator-name = "Vbcpu";
- regulator-min-microvolt = <300000>;
+ regulator-min-microvolt = <400000>;
regulator-max-microvolt = <1193750>;
regulator-enable-ramp-delay = <256>;
regulator-ramp-delay = <6250>;
@@ -1232,7 +1264,7 @@
mt6315_7_vbuck1: vbuck1 {
regulator-compatible = "vbuck1";
regulator-name = "Vgpu";
- regulator-min-microvolt = <625000>;
+ regulator-min-microvolt = <400000>;
regulator-max-microvolt = <1193750>;
regulator-enable-ramp-delay = <256>;
regulator-ramp-delay = <6250>;
diff --git a/arch/arm64/boot/dts/mediatek/mt8195.dtsi b/arch/arm64/boot/dts/mediatek/mt8195.dtsi
index ea6dc220e1cce2..5d8b68f86ce446 100644
--- a/arch/arm64/boot/dts/mediatek/mt8195.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt8195.dtsi
@@ -2028,6 +2028,7 @@
compatible = "mediatek,mt8195-vppsys0", "syscon";
reg = <0 0x14000000 0 0x1000>;
#clock-cells = <1>;
+ mediatek,gce-client-reg = <&gce1 SUBSYS_1400XXXX 0 0x1000>;
};
dma-controller@14001000 {
@@ -2251,6 +2252,7 @@
compatible = "mediatek,mt8195-vppsys1", "syscon";
reg = <0 0x14f00000 0 0x1000>;
#clock-cells = <1>;
+ mediatek,gce-client-reg = <&gce1 SUBSYS_14f0XXXX 0 0x1000>;
};
mutex@14f01000 {
@@ -3080,6 +3082,7 @@
reg = <0 0x1c01a000 0 0x1000>;
mboxes = <&gce0 0 CMDQ_THR_PRIO_4>;
#clock-cells = <1>;
+ mediatek,gce-client-reg = <&gce0 SUBSYS_1c01XXXX 0xa000 0x1000>;
};
@@ -3261,6 +3264,7 @@
interrupts = <GIC_SPI 658 IRQ_TYPE_LEVEL_HIGH 0>;
power-domains = <&spm MT8195_POWER_DOMAIN_VDOSYS0>;
clocks = <&vdosys0 CLK_VDO0_DISP_MUTEX0>;
+ mediatek,gce-client-reg = <&gce0 SUBSYS_1c01XXXX 0x6000 0x1000>;
mediatek,gce-events = <CMDQ_EVENT_VDO0_DISP_STREAM_DONE_0>;
};
@@ -3331,6 +3335,7 @@
power-domains = <&spm MT8195_POWER_DOMAIN_VDOSYS1>;
clocks = <&vdosys1 CLK_VDO1_DISP_MUTEX>;
clock-names = "vdo1_mutex";
+ mediatek,gce-client-reg = <&gce0 SUBSYS_1c10XXXX 0x1000 0x1000>;
mediatek,gce-events = <CMDQ_EVENT_VDO1_STREAM_DONE_ENG_0>;
};
diff --git a/arch/arm64/boot/dts/microchip/sparx5.dtsi b/arch/arm64/boot/dts/microchip/sparx5.dtsi
index 24075cd9142050..c3029e0abacc05 100644
--- a/arch/arm64/boot/dts/microchip/sparx5.dtsi
+++ b/arch/arm64/boot/dts/microchip/sparx5.dtsi
@@ -447,7 +447,7 @@
pinctrl-names = "default";
#address-cells = <1>;
#size-cells = <0>;
- reg = <0x6 0x110102d4 0x24>;
+ reg = <0x6 0x110102f8 0x24>;
};
mdio3: mdio@61101031c {
@@ -460,7 +460,7 @@
reg = <0x6 0x1101031c 0x24>;
};
- serdes: serdes@10808000 {
+ serdes: serdes@610808000 {
compatible = "microchip,sparx5-serdes";
#phy-cells = <1>;
clocks = <&sys_clk>;
diff --git a/arch/arm64/boot/dts/microchip/sparx5_pcb134_board.dtsi b/arch/arm64/boot/dts/microchip/sparx5_pcb134_board.dtsi
index f3e226de5e5e91..2c5574734c9e32 100644
--- a/arch/arm64/boot/dts/microchip/sparx5_pcb134_board.dtsi
+++ b/arch/arm64/boot/dts/microchip/sparx5_pcb134_board.dtsi
@@ -15,234 +15,234 @@
leds {
compatible = "gpio-leds";
- led@0 {
+ led-0 {
label = "twr0:green";
gpios = <&sgpio_out0 8 0 GPIO_ACTIVE_LOW>;
};
- led@1 {
+ led-1 {
label = "twr0:yellow";
gpios = <&sgpio_out0 8 1 GPIO_ACTIVE_LOW>;
};
- led@2 {
+ led-2 {
label = "twr1:green";
gpios = <&sgpio_out0 9 0 GPIO_ACTIVE_LOW>;
};
- led@3 {
+ led-3 {
label = "twr1:yellow";
gpios = <&sgpio_out0 9 1 GPIO_ACTIVE_LOW>;
};
- led@4 {
+ led-4 {
label = "twr2:green";
gpios = <&sgpio_out0 10 0 GPIO_ACTIVE_LOW>;
};
- led@5 {
+ led-5 {
label = "twr2:yellow";
gpios = <&sgpio_out0 10 1 GPIO_ACTIVE_LOW>;
};
- led@6 {
+ led-6 {
label = "twr3:green";
gpios = <&sgpio_out0 11 0 GPIO_ACTIVE_LOW>;
};
- led@7 {
+ led-7 {
label = "twr3:yellow";
gpios = <&sgpio_out0 11 1 GPIO_ACTIVE_LOW>;
};
- led@8 {
+ led-8 {
label = "eth12:green";
gpios = <&sgpio_out0 12 0 GPIO_ACTIVE_HIGH>;
default-state = "off";
};
- led@9 {
+ led-9 {
label = "eth12:yellow";
gpios = <&sgpio_out0 12 1 GPIO_ACTIVE_HIGH>;
default-state = "off";
};
- led@10 {
+ led-10 {
label = "eth13:green";
gpios = <&sgpio_out0 13 0 GPIO_ACTIVE_HIGH>;
default-state = "off";
};
- led@11 {
+ led-11 {
label = "eth13:yellow";
gpios = <&sgpio_out0 13 1 GPIO_ACTIVE_HIGH>;
default-state = "off";
};
- led@12 {
+ led-12 {
label = "eth14:green";
gpios = <&sgpio_out0 14 0 GPIO_ACTIVE_HIGH>;
default-state = "off";
};
- led@13 {
+ led-13 {
label = "eth14:yellow";
gpios = <&sgpio_out0 14 1 GPIO_ACTIVE_HIGH>;
default-state = "off";
};
- led@14 {
+ led-14 {
label = "eth15:green";
gpios = <&sgpio_out0 15 0 GPIO_ACTIVE_HIGH>;
default-state = "off";
};
- led@15 {
+ led-15 {
label = "eth15:yellow";
gpios = <&sgpio_out0 15 1 GPIO_ACTIVE_HIGH>;
default-state = "off";
};
- led@16 {
+ led-16 {
label = "eth48:green";
gpios = <&sgpio_out1 16 0 GPIO_ACTIVE_HIGH>;
default-state = "off";
};
- led@17 {
+ led-17 {
label = "eth48:yellow";
gpios = <&sgpio_out1 16 1 GPIO_ACTIVE_HIGH>;
default-state = "off";
};
- led@18 {
+ led-18 {
label = "eth49:green";
gpios = <&sgpio_out1 17 0 GPIO_ACTIVE_HIGH>;
default-state = "off";
};
- led@19 {
+ led-19 {
label = "eth49:yellow";
gpios = <&sgpio_out1 17 1 GPIO_ACTIVE_HIGH>;
default-state = "off";
};
- led@20 {
+ led-20 {
label = "eth50:green";
gpios = <&sgpio_out1 18 0 GPIO_ACTIVE_HIGH>;
default-state = "off";
};
- led@21 {
+ led-21 {
label = "eth50:yellow";
gpios = <&sgpio_out1 18 1 GPIO_ACTIVE_HIGH>;
default-state = "off";
};
- led@22 {
+ led-22 {
label = "eth51:green";
gpios = <&sgpio_out1 19 0 GPIO_ACTIVE_HIGH>;
default-state = "off";
};
- led@23 {
+ led-23 {
label = "eth51:yellow";
gpios = <&sgpio_out1 19 1 GPIO_ACTIVE_HIGH>;
default-state = "off";
};
- led@24 {
+ led-24 {
label = "eth52:green";
gpios = <&sgpio_out1 20 0 GPIO_ACTIVE_HIGH>;
default-state = "off";
};
- led@25 {
+ led-25 {
label = "eth52:yellow";
gpios = <&sgpio_out1 20 1 GPIO_ACTIVE_HIGH>;
default-state = "off";
};
- led@26 {
+ led-26 {
label = "eth53:green";
gpios = <&sgpio_out1 21 0 GPIO_ACTIVE_HIGH>;
default-state = "off";
};
- led@27 {
+ led-27 {
label = "eth53:yellow";
gpios = <&sgpio_out1 21 1 GPIO_ACTIVE_HIGH>;
default-state = "off";
};
- led@28 {
+ led-28 {
label = "eth54:green";
gpios = <&sgpio_out1 22 0 GPIO_ACTIVE_HIGH>;
default-state = "off";
};
- led@29 {
+ led-29 {
label = "eth54:yellow";
gpios = <&sgpio_out1 22 1 GPIO_ACTIVE_HIGH>;
default-state = "off";
};
- led@30 {
+ led-30 {
label = "eth55:green";
gpios = <&sgpio_out1 23 0 GPIO_ACTIVE_HIGH>;
default-state = "off";
};
- led@31 {
+ led-31 {
label = "eth55:yellow";
gpios = <&sgpio_out1 23 1 GPIO_ACTIVE_HIGH>;
default-state = "off";
};
- led@32 {
+ led-32 {
label = "eth56:green";
gpios = <&sgpio_out1 24 0 GPIO_ACTIVE_HIGH>;
default-state = "off";
};
- led@33 {
+ led-33 {
label = "eth56:yellow";
gpios = <&sgpio_out1 24 1 GPIO_ACTIVE_HIGH>;
default-state = "off";
};
- led@34 {
+ led-34 {
label = "eth57:green";
gpios = <&sgpio_out1 25 0 GPIO_ACTIVE_HIGH>;
default-state = "off";
};
- led@35 {
+ led-35 {
label = "eth57:yellow";
gpios = <&sgpio_out1 25 1 GPIO_ACTIVE_HIGH>;
default-state = "off";
};
- led@36 {
+ led-36 {
label = "eth58:green";
gpios = <&sgpio_out1 26 0 GPIO_ACTIVE_HIGH>;
default-state = "off";
};
- led@37 {
+ led-37 {
label = "eth58:yellow";
gpios = <&sgpio_out1 26 1 GPIO_ACTIVE_HIGH>;
default-state = "off";
};
- led@38 {
+ led-38 {
label = "eth59:green";
gpios = <&sgpio_out1 27 0 GPIO_ACTIVE_HIGH>;
default-state = "off";
};
- led@39 {
+ led-39 {
label = "eth59:yellow";
gpios = <&sgpio_out1 27 1 GPIO_ACTIVE_HIGH>;
default-state = "off";
};
- led@40 {
+ led-40 {
label = "eth60:green";
gpios = <&sgpio_out1 28 0 GPIO_ACTIVE_HIGH>;
default-state = "off";
};
- led@41 {
+ led-41 {
label = "eth60:yellow";
gpios = <&sgpio_out1 28 1 GPIO_ACTIVE_HIGH>;
default-state = "off";
};
- led@42 {
+ led-42 {
label = "eth61:green";
gpios = <&sgpio_out1 29 0 GPIO_ACTIVE_HIGH>;
default-state = "off";
};
- led@43 {
+ led-43 {
label = "eth61:yellow";
gpios = <&sgpio_out1 29 1 GPIO_ACTIVE_HIGH>;
default-state = "off";
};
- led@44 {
+ led-44 {
label = "eth62:green";
gpios = <&sgpio_out1 30 0 GPIO_ACTIVE_HIGH>;
default-state = "off";
};
- led@45 {
+ led-45 {
label = "eth62:yellow";
gpios = <&sgpio_out1 30 1 GPIO_ACTIVE_HIGH>;
default-state = "off";
};
- led@46 {
+ led-46 {
label = "eth63:green";
gpios = <&sgpio_out1 31 0 GPIO_ACTIVE_HIGH>;
default-state = "off";
};
- led@47 {
+ led-47 {
label = "eth63:yellow";
gpios = <&sgpio_out1 31 1 GPIO_ACTIVE_HIGH>;
default-state = "off";
@@ -274,15 +274,6 @@
&spi0 {
status = "okay";
- flash@0 {
- compatible = "jedec,spi-nor";
- spi-max-frequency = <8000000>;
- reg = <0>;
- };
-};
-
-&spi0 {
- status = "okay";
spi@0 {
compatible = "spi-mux";
mux-controls = <&mux>;
@@ -395,13 +386,13 @@
};
&axi {
- i2c0_imux: i2c0-imux@0 {
+ i2c0_imux: i2c-mux-0 {
compatible = "i2c-mux-pinctrl";
#address-cells = <1>;
#size-cells = <0>;
i2c-parent = <&i2c0>;
};
- i2c0_emux: i2c0-emux@0 {
+ i2c0_emux: i2c-mux-1 {
compatible = "i2c-mux-gpio";
#address-cells = <1>;
#size-cells = <0>;
@@ -427,62 +418,62 @@
pinctrl-10 = <&i2cmux_10>;
pinctrl-11 = <&i2cmux_11>;
pinctrl-12 = <&i2cmux_pins_i>;
- i2c_sfp1: i2c_sfp1 {
+ i2c_sfp1: i2c@0 {
reg = <0x0>;
#address-cells = <1>;
#size-cells = <0>;
};
- i2c_sfp2: i2c_sfp2 {
+ i2c_sfp2: i2c@1 {
reg = <0x1>;
#address-cells = <1>;
#size-cells = <0>;
};
- i2c_sfp3: i2c_sfp3 {
+ i2c_sfp3: i2c@2 {
reg = <0x2>;
#address-cells = <1>;
#size-cells = <0>;
};
- i2c_sfp4: i2c_sfp4 {
+ i2c_sfp4: i2c@3 {
reg = <0x3>;
#address-cells = <1>;
#size-cells = <0>;
};
- i2c_sfp5: i2c_sfp5 {
+ i2c_sfp5: i2c@4 {
reg = <0x4>;
#address-cells = <1>;
#size-cells = <0>;
};
- i2c_sfp6: i2c_sfp6 {
+ i2c_sfp6: i2c@5 {
reg = <0x5>;
#address-cells = <1>;
#size-cells = <0>;
};
- i2c_sfp7: i2c_sfp7 {
+ i2c_sfp7: i2c@6 {
reg = <0x6>;
#address-cells = <1>;
#size-cells = <0>;
};
- i2c_sfp8: i2c_sfp8 {
+ i2c_sfp8: i2c@7 {
reg = <0x7>;
#address-cells = <1>;
#size-cells = <0>;
};
- i2c_sfp9: i2c_sfp9 {
+ i2c_sfp9: i2c@8 {
reg = <0x8>;
#address-cells = <1>;
#size-cells = <0>;
};
- i2c_sfp10: i2c_sfp10 {
+ i2c_sfp10: i2c@9 {
reg = <0x9>;
#address-cells = <1>;
#size-cells = <0>;
};
- i2c_sfp11: i2c_sfp11 {
+ i2c_sfp11: i2c@a {
reg = <0xa>;
#address-cells = <1>;
#size-cells = <0>;
};
- i2c_sfp12: i2c_sfp12 {
+ i2c_sfp12: i2c@b {
reg = <0xb>;
#address-cells = <1>;
#size-cells = <0>;
@@ -495,42 +486,42 @@
&gpio 61 GPIO_ACTIVE_HIGH
&gpio 54 GPIO_ACTIVE_HIGH>;
idle-state = <0x8>;
- i2c_sfp13: i2c_sfp13 {
+ i2c_sfp13: i2c@0 {
reg = <0x0>;
#address-cells = <1>;
#size-cells = <0>;
};
- i2c_sfp14: i2c_sfp14 {
+ i2c_sfp14: i2c@1 {
reg = <0x1>;
#address-cells = <1>;
#size-cells = <0>;
};
- i2c_sfp15: i2c_sfp15 {
+ i2c_sfp15: i2c@2 {
reg = <0x2>;
#address-cells = <1>;
#size-cells = <0>;
};
- i2c_sfp16: i2c_sfp16 {
+ i2c_sfp16: i2c@3 {
reg = <0x3>;
#address-cells = <1>;
#size-cells = <0>;
};
- i2c_sfp17: i2c_sfp17 {
+ i2c_sfp17: i2c@4 {
reg = <0x4>;
#address-cells = <1>;
#size-cells = <0>;
};
- i2c_sfp18: i2c_sfp18 {
+ i2c_sfp18: i2c@5 {
reg = <0x5>;
#address-cells = <1>;
#size-cells = <0>;
};
- i2c_sfp19: i2c_sfp19 {
+ i2c_sfp19: i2c@6 {
reg = <0x6>;
#address-cells = <1>;
#size-cells = <0>;
};
- i2c_sfp20: i2c_sfp20 {
+ i2c_sfp20: i2c@7 {
reg = <0x7>;
#address-cells = <1>;
#size-cells = <0>;
diff --git a/arch/arm64/boot/dts/microchip/sparx5_pcb135_board.dtsi b/arch/arm64/boot/dts/microchip/sparx5_pcb135_board.dtsi
index 82ce007d99592c..af2f1831f07f89 100644
--- a/arch/arm64/boot/dts/microchip/sparx5_pcb135_board.dtsi
+++ b/arch/arm64/boot/dts/microchip/sparx5_pcb135_board.dtsi
@@ -15,42 +15,42 @@
leds {
compatible = "gpio-leds";
- led@0 {
+ led-0 {
label = "eth60:yellow";
gpios = <&sgpio_out1 28 0 GPIO_ACTIVE_LOW>;
default-state = "off";
};
- led@1 {
+ led-1 {
label = "eth60:green";
gpios = <&sgpio_out1 28 1 GPIO_ACTIVE_LOW>;
default-state = "off";
};
- led@2 {
+ led-2 {
label = "eth61:yellow";
gpios = <&sgpio_out1 29 0 GPIO_ACTIVE_LOW>;
default-state = "off";
};
- led@3 {
+ led-3 {
label = "eth61:green";
gpios = <&sgpio_out1 29 1 GPIO_ACTIVE_LOW>;
default-state = "off";
};
- led@4 {
+ led-4 {
label = "eth62:yellow";
gpios = <&sgpio_out1 30 0 GPIO_ACTIVE_LOW>;
default-state = "off";
};
- led@5 {
+ led-5 {
label = "eth62:green";
gpios = <&sgpio_out1 30 1 GPIO_ACTIVE_LOW>;
default-state = "off";
};
- led@6 {
+ led-6 {
label = "eth63:yellow";
gpios = <&sgpio_out1 31 0 GPIO_ACTIVE_LOW>;
default-state = "off";
};
- led@7 {
+ led-7 {
label = "eth63:green";
gpios = <&sgpio_out1 31 1 GPIO_ACTIVE_LOW>;
default-state = "off";
@@ -89,15 +89,6 @@
&spi0 {
status = "okay";
- flash@0 {
- compatible = "jedec,spi-nor";
- spi-max-frequency = <8000000>;
- reg = <0>;
- };
-};
-
-&spi0 {
- status = "okay";
spi@0 {
compatible = "spi-mux";
mux-controls = <&mux>;
@@ -129,7 +120,7 @@
};
&axi {
- i2c0_imux: i2c0-imux@0 {
+ i2c0_imux: i2c-mux {
compatible = "i2c-mux-pinctrl";
#address-cells = <1>;
#size-cells = <0>;
@@ -146,22 +137,22 @@
pinctrl-2 = <&i2cmux_s31>;
pinctrl-3 = <&i2cmux_s32>;
pinctrl-4 = <&i2cmux_pins_i>;
- i2c_sfp1: i2c_sfp1 {
+ i2c_sfp1: i2c@0 {
reg = <0x0>;
#address-cells = <1>;
#size-cells = <0>;
};
- i2c_sfp2: i2c_sfp2 {
+ i2c_sfp2: i2c@1 {
reg = <0x1>;
#address-cells = <1>;
#size-cells = <0>;
};
- i2c_sfp3: i2c_sfp3 {
+ i2c_sfp3: i2c@2 {
reg = <0x2>;
#address-cells = <1>;
#size-cells = <0>;
};
- i2c_sfp4: i2c_sfp4 {
+ i2c_sfp4: i2c@3 {
reg = <0x3>;
#address-cells = <1>;
#size-cells = <0>;
diff --git a/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi b/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi
index f3a6da8b289019..5260c63db0078b 100644
--- a/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi
+++ b/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi
@@ -944,6 +944,8 @@ ap_spi_fp: &spi10 {
vddrf-supply = <&pp1300_l2c>;
vddch0-supply = <&pp3300_l10c>;
max-speed = <3200000>;
+
+ qcom,local-bd-address-broken;
};
};
diff --git a/arch/arm64/boot/dts/qcom/sc7280.dtsi b/arch/arm64/boot/dts/qcom/sc7280.dtsi
index 7e7f0f0fb41ba0..41f51d32611107 100644
--- a/arch/arm64/boot/dts/qcom/sc7280.dtsi
+++ b/arch/arm64/boot/dts/qcom/sc7280.dtsi
@@ -3707,7 +3707,7 @@
compatible = "qcom,sc7280-adsp-pas";
reg = <0 0x03700000 0 0x100>;
- interrupts-extended = <&pdc 6 IRQ_TYPE_LEVEL_HIGH>,
+ interrupts-extended = <&pdc 6 IRQ_TYPE_EDGE_RISING>,
<&adsp_smp2p_in 0 IRQ_TYPE_EDGE_RISING>,
<&adsp_smp2p_in 1 IRQ_TYPE_EDGE_RISING>,
<&adsp_smp2p_in 2 IRQ_TYPE_EDGE_RISING>,
@@ -3944,7 +3944,7 @@
compatible = "qcom,sc7280-cdsp-pas";
reg = <0 0x0a300000 0 0x10000>;
- interrupts-extended = <&intc GIC_SPI 578 IRQ_TYPE_LEVEL_HIGH>,
+ interrupts-extended = <&intc GIC_SPI 578 IRQ_TYPE_EDGE_RISING>,
<&cdsp_smp2p_in 0 IRQ_TYPE_EDGE_RISING>,
<&cdsp_smp2p_in 1 IRQ_TYPE_EDGE_RISING>,
<&cdsp_smp2p_in 2 IRQ_TYPE_EDGE_RISING>,
diff --git a/arch/arm64/boot/dts/qcom/sc8180x.dtsi b/arch/arm64/boot/dts/qcom/sc8180x.dtsi
index 32afc78d5b769d..053f7861c3cece 100644
--- a/arch/arm64/boot/dts/qcom/sc8180x.dtsi
+++ b/arch/arm64/boot/dts/qcom/sc8180x.dtsi
@@ -2701,7 +2701,7 @@
resets = <&gcc GCC_USB30_SEC_BCR>;
power-domains = <&gcc USB30_SEC_GDSC>;
interrupts-extended = <&intc GIC_SPI 136 IRQ_TYPE_LEVEL_HIGH>,
- <&pdc 7 IRQ_TYPE_LEVEL_HIGH>,
+ <&pdc 40 IRQ_TYPE_LEVEL_HIGH>,
<&pdc 10 IRQ_TYPE_EDGE_BOTH>,
<&pdc 11 IRQ_TYPE_EDGE_BOTH>;
interrupt-names = "hs_phy_irq", "ss_phy_irq",
diff --git a/arch/arm64/boot/dts/qcom/sc8280xp.dtsi b/arch/arm64/boot/dts/qcom/sc8280xp.dtsi
index a5b194813079e9..d0f82e12289e1b 100644
--- a/arch/arm64/boot/dts/qcom/sc8280xp.dtsi
+++ b/arch/arm64/boot/dts/qcom/sc8280xp.dtsi
@@ -1774,6 +1774,7 @@
reset-names = "pci";
power-domains = <&gcc PCIE_4_GDSC>;
+ required-opps = <&rpmhpd_opp_nom>;
phys = <&pcie4_phy>;
phy-names = "pciephy";
@@ -1872,6 +1873,7 @@
reset-names = "pci";
power-domains = <&gcc PCIE_3B_GDSC>;
+ required-opps = <&rpmhpd_opp_nom>;
phys = <&pcie3b_phy>;
phy-names = "pciephy";
@@ -1970,6 +1972,7 @@
reset-names = "pci";
power-domains = <&gcc PCIE_3A_GDSC>;
+ required-opps = <&rpmhpd_opp_nom>;
phys = <&pcie3a_phy>;
phy-names = "pciephy";
@@ -2071,6 +2074,7 @@
reset-names = "pci";
power-domains = <&gcc PCIE_2B_GDSC>;
+ required-opps = <&rpmhpd_opp_nom>;
phys = <&pcie2b_phy>;
phy-names = "pciephy";
@@ -2169,6 +2173,7 @@
reset-names = "pci";
power-domains = <&gcc PCIE_2A_GDSC>;
+ required-opps = <&rpmhpd_opp_nom>;
phys = <&pcie2a_phy>;
phy-names = "pciephy";
@@ -2641,7 +2646,7 @@
compatible = "qcom,sc8280xp-adsp-pas";
reg = <0 0x03000000 0 0x100>;
- interrupts-extended = <&intc GIC_SPI 162 IRQ_TYPE_LEVEL_HIGH>,
+ interrupts-extended = <&intc GIC_SPI 162 IRQ_TYPE_EDGE_RISING>,
<&smp2p_adsp_in 0 IRQ_TYPE_EDGE_RISING>,
<&smp2p_adsp_in 1 IRQ_TYPE_EDGE_RISING>,
<&smp2p_adsp_in 2 IRQ_TYPE_EDGE_RISING>,
@@ -4977,7 +4982,7 @@
compatible = "qcom,sc8280xp-nsp0-pas";
reg = <0 0x1b300000 0 0x100>;
- interrupts-extended = <&intc GIC_SPI 578 IRQ_TYPE_LEVEL_HIGH>,
+ interrupts-extended = <&intc GIC_SPI 578 IRQ_TYPE_EDGE_RISING>,
<&smp2p_nsp0_in 0 IRQ_TYPE_EDGE_RISING>,
<&smp2p_nsp0_in 1 IRQ_TYPE_EDGE_RISING>,
<&smp2p_nsp0_in 2 IRQ_TYPE_EDGE_RISING>,
@@ -5108,7 +5113,7 @@
compatible = "qcom,sc8280xp-nsp1-pas";
reg = <0 0x21300000 0 0x100>;
- interrupts-extended = <&intc GIC_SPI 887 IRQ_TYPE_LEVEL_HIGH>,
+ interrupts-extended = <&intc GIC_SPI 887 IRQ_TYPE_EDGE_RISING>,
<&smp2p_nsp1_in 0 IRQ_TYPE_EDGE_RISING>,
<&smp2p_nsp1_in 1 IRQ_TYPE_EDGE_RISING>,
<&smp2p_nsp1_in 2 IRQ_TYPE_EDGE_RISING>,
diff --git a/arch/arm64/boot/dts/qcom/sm6350.dtsi b/arch/arm64/boot/dts/qcom/sm6350.dtsi
index 24bcec3366efd5..0be053555602c0 100644
--- a/arch/arm64/boot/dts/qcom/sm6350.dtsi
+++ b/arch/arm64/boot/dts/qcom/sm6350.dtsi
@@ -1252,7 +1252,7 @@
compatible = "qcom,sm6350-adsp-pas";
reg = <0 0x03000000 0 0x100>;
- interrupts-extended = <&pdc 6 IRQ_TYPE_LEVEL_HIGH>,
+ interrupts-extended = <&pdc 6 IRQ_TYPE_EDGE_RISING>,
<&smp2p_adsp_in 0 IRQ_TYPE_EDGE_RISING>,
<&smp2p_adsp_in 1 IRQ_TYPE_EDGE_RISING>,
<&smp2p_adsp_in 2 IRQ_TYPE_EDGE_RISING>,
@@ -1511,7 +1511,7 @@
compatible = "qcom,sm6350-cdsp-pas";
reg = <0 0x08300000 0 0x10000>;
- interrupts-extended = <&intc GIC_SPI 578 IRQ_TYPE_LEVEL_HIGH>,
+ interrupts-extended = <&intc GIC_SPI 578 IRQ_TYPE_EDGE_RISING>,
<&smp2p_cdsp_in 0 IRQ_TYPE_EDGE_RISING>,
<&smp2p_cdsp_in 1 IRQ_TYPE_EDGE_RISING>,
<&smp2p_cdsp_in 2 IRQ_TYPE_EDGE_RISING>,
diff --git a/arch/arm64/boot/dts/qcom/sm6375.dtsi b/arch/arm64/boot/dts/qcom/sm6375.dtsi
index 4386f8a9c636c1..f40509d91bbda8 100644
--- a/arch/arm64/boot/dts/qcom/sm6375.dtsi
+++ b/arch/arm64/boot/dts/qcom/sm6375.dtsi
@@ -1561,7 +1561,7 @@
compatible = "qcom,sm6375-adsp-pas";
reg = <0 0x0a400000 0 0x100>;
- interrupts-extended = <&intc GIC_SPI 282 IRQ_TYPE_LEVEL_HIGH>,
+ interrupts-extended = <&intc GIC_SPI 282 IRQ_TYPE_EDGE_RISING>,
<&smp2p_adsp_in 0 IRQ_TYPE_EDGE_RISING>,
<&smp2p_adsp_in 1 IRQ_TYPE_EDGE_RISING>,
<&smp2p_adsp_in 2 IRQ_TYPE_EDGE_RISING>,
diff --git a/arch/arm64/boot/dts/qcom/sm8250.dtsi b/arch/arm64/boot/dts/qcom/sm8250.dtsi
index 39bd8f0eba1e65..7f2333c9d17d6d 100644
--- a/arch/arm64/boot/dts/qcom/sm8250.dtsi
+++ b/arch/arm64/boot/dts/qcom/sm8250.dtsi
@@ -3062,7 +3062,7 @@
compatible = "qcom,sm8250-slpi-pas";
reg = <0 0x05c00000 0 0x4000>;
- interrupts-extended = <&pdc 9 IRQ_TYPE_LEVEL_HIGH>,
+ interrupts-extended = <&pdc 9 IRQ_TYPE_EDGE_RISING>,
<&smp2p_slpi_in 0 IRQ_TYPE_EDGE_RISING>,
<&smp2p_slpi_in 1 IRQ_TYPE_EDGE_RISING>,
<&smp2p_slpi_in 2 IRQ_TYPE_EDGE_RISING>,
@@ -3766,7 +3766,7 @@
compatible = "qcom,sm8250-cdsp-pas";
reg = <0 0x08300000 0 0x10000>;
- interrupts-extended = <&intc GIC_SPI 578 IRQ_TYPE_LEVEL_HIGH>,
+ interrupts-extended = <&intc GIC_SPI 578 IRQ_TYPE_EDGE_RISING>,
<&smp2p_cdsp_in 0 IRQ_TYPE_EDGE_RISING>,
<&smp2p_cdsp_in 1 IRQ_TYPE_EDGE_RISING>,
<&smp2p_cdsp_in 2 IRQ_TYPE_EDGE_RISING>,
@@ -5928,7 +5928,7 @@
compatible = "qcom,sm8250-adsp-pas";
reg = <0 0x17300000 0 0x100>;
- interrupts-extended = <&pdc 6 IRQ_TYPE_LEVEL_HIGH>,
+ interrupts-extended = <&pdc 6 IRQ_TYPE_EDGE_RISING>,
<&smp2p_adsp_in 0 IRQ_TYPE_EDGE_RISING>,
<&smp2p_adsp_in 1 IRQ_TYPE_EDGE_RISING>,
<&smp2p_adsp_in 2 IRQ_TYPE_EDGE_RISING>,
diff --git a/arch/arm64/boot/dts/qcom/sm8450.dtsi b/arch/arm64/boot/dts/qcom/sm8450.dtsi
index b86be34a912b94..024d2653cc3075 100644
--- a/arch/arm64/boot/dts/qcom/sm8450.dtsi
+++ b/arch/arm64/boot/dts/qcom/sm8450.dtsi
@@ -1777,12 +1777,8 @@
ranges = <0x01000000 0x0 0x00000000 0x0 0x60200000 0x0 0x100000>,
<0x02000000 0x0 0x60300000 0x0 0x60300000 0x0 0x3d00000>;
- /*
- * MSIs for BDF (1:0.0) only works with Device ID 0x5980.
- * Hence, the IDs are swapped.
- */
- msi-map = <0x0 &gic_its 0x5981 0x1>,
- <0x100 &gic_its 0x5980 0x1>;
+ msi-map = <0x0 &gic_its 0x5980 0x1>,
+ <0x100 &gic_its 0x5981 0x1>;
msi-map-mask = <0xff00>;
interrupts = <GIC_SPI 141 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 142 IRQ_TYPE_LEVEL_HIGH>,
@@ -1900,12 +1896,8 @@
ranges = <0x01000000 0x0 0x00000000 0x0 0x40200000 0x0 0x100000>,
<0x02000000 0x0 0x40300000 0x0 0x40300000 0x0 0x1fd00000>;
- /*
- * MSIs for BDF (1:0.0) only works with Device ID 0x5a00.
- * Hence, the IDs are swapped.
- */
- msi-map = <0x0 &gic_its 0x5a01 0x1>,
- <0x100 &gic_its 0x5a00 0x1>;
+ msi-map = <0x0 &gic_its 0x5a00 0x1>,
+ <0x100 &gic_its 0x5a01 0x1>;
msi-map-mask = <0xff00>;
interrupts = <GIC_SPI 307 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 308 IRQ_TYPE_LEVEL_HIGH>,
diff --git a/arch/arm64/boot/dts/qcom/sm8550.dtsi b/arch/arm64/boot/dts/qcom/sm8550.dtsi
index 3904348075f679..3348bc06db488a 100644
--- a/arch/arm64/boot/dts/qcom/sm8550.dtsi
+++ b/arch/arm64/boot/dts/qcom/sm8550.dtsi
@@ -1755,9 +1755,8 @@
<&gem_noc MASTER_APPSS_PROC 0 &cnoc_main SLAVE_PCIE_0 0>;
interconnect-names = "pcie-mem", "cpu-pcie";
- /* Entries are reversed due to the unusual ITS DeviceID encoding */
- msi-map = <0x0 &gic_its 0x1401 0x1>,
- <0x100 &gic_its 0x1400 0x1>;
+ msi-map = <0x0 &gic_its 0x1400 0x1>,
+ <0x100 &gic_its 0x1401 0x1>;
iommu-map = <0x0 &apps_smmu 0x1400 0x1>,
<0x100 &apps_smmu 0x1401 0x1>;
@@ -1867,9 +1866,8 @@
<&gem_noc MASTER_APPSS_PROC 0 &cnoc_main SLAVE_PCIE_1 0>;
interconnect-names = "pcie-mem", "cpu-pcie";
- /* Entries are reversed due to the unusual ITS DeviceID encoding */
- msi-map = <0x0 &gic_its 0x1481 0x1>,
- <0x100 &gic_its 0x1480 0x1>;
+ msi-map = <0x0 &gic_its 0x1480 0x1>,
+ <0x100 &gic_its 0x1481 0x1>;
iommu-map = <0x0 &apps_smmu 0x1480 0x1>,
<0x100 &apps_smmu 0x1481 0x1>;
diff --git a/arch/arm64/boot/dts/qcom/sm8650.dtsi b/arch/arm64/boot/dts/qcom/sm8650.dtsi
index ba72d8f3842073..eb117866e59ff8 100644
--- a/arch/arm64/boot/dts/qcom/sm8650.dtsi
+++ b/arch/arm64/boot/dts/qcom/sm8650.dtsi
@@ -2274,9 +2274,8 @@
interrupt-map-mask = <0 0 0 0x7>;
#interrupt-cells = <1>;
- /* Entries are reversed due to the unusual ITS DeviceID encoding */
- msi-map = <0x0 &gic_its 0x1401 0x1>,
- <0x100 &gic_its 0x1400 0x1>;
+ msi-map = <0x0 &gic_its 0x1400 0x1>,
+ <0x100 &gic_its 0x1401 0x1>;
msi-map-mask = <0xff00>;
linux,pci-domain = <0>;
@@ -2402,9 +2401,8 @@
interrupt-map-mask = <0 0 0 0x7>;
#interrupt-cells = <1>;
- /* Entries are reversed due to the unusual ITS DeviceID encoding */
- msi-map = <0x0 &gic_its 0x1481 0x1>,
- <0x100 &gic_its 0x1480 0x1>;
+ msi-map = <0x0 &gic_its 0x1480 0x1>,
+ <0x100 &gic_its 0x1481 0x1>;
msi-map-mask = <0xff00>;
linux,pci-domain = <1>;
diff --git a/arch/arm64/boot/dts/qcom/x1e80100.dtsi b/arch/arm64/boot/dts/qcom/x1e80100.dtsi
index 8e517f76189e19..6b40082bac68ce 100644
--- a/arch/arm64/boot/dts/qcom/x1e80100.dtsi
+++ b/arch/arm64/boot/dts/qcom/x1e80100.dtsi
@@ -284,7 +284,7 @@
domain-idle-states {
CLUSTER_CL4: cluster-sleep-0 {
- compatible = "arm,idle-state";
+ compatible = "domain-idle-state";
idle-state-name = "l2-ret";
arm,psci-suspend-param = <0x01000044>;
entry-latency-us = <350>;
@@ -293,7 +293,7 @@
};
CLUSTER_CL5: cluster-sleep-1 {
- compatible = "arm,idle-state";
+ compatible = "domain-idle-state";
idle-state-name = "ret-pll-off";
arm,psci-suspend-param = <0x01000054>;
entry-latency-us = <2200>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi
index 5846a11f0e848f..d5e035823eb5e4 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi
@@ -663,7 +663,7 @@ camera: &i2c7 {
port@1 {
reg = <1>;
- mipi1_in_panel: endpoint@1 {
+ mipi1_in_panel: endpoint {
remote-endpoint = <&mipi1_out_panel>;
};
};
@@ -689,7 +689,6 @@ camera: &i2c7 {
ep-gpios = <&gpio0 3 GPIO_ACTIVE_HIGH>;
/* PERST# asserted in S3 */
- pcie-reset-suspend = <1>;
vpcie3v3-supply = <&wlan_3v3>;
vpcie1v8-supply = <&pp1800_pcie>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-kobol-helios64.dts b/arch/arm64/boot/dts/rockchip/rk3399-kobol-helios64.dts
index dfb2a0bdea5b73..9586bb12a5d8f5 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-kobol-helios64.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3399-kobol-helios64.dts
@@ -611,7 +611,7 @@
#size-cells = <0>;
interface@0 { /* interface 0 of configuration 1 */
- compatible = "usbbda,8156.config1.0";
+ compatible = "usbifbda,8156.config1.0";
reg = <0 1>;
};
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts b/arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts
index 054c6a4d1a45f7..294eb2de263deb 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts
@@ -779,7 +779,6 @@
};
&pcie0 {
- bus-scan-delay-ms = <1000>;
ep-gpios = <&gpio2 RK_PD4 GPIO_ACTIVE_HIGH>;
num-lanes = <4>;
pinctrl-names = "default";
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts b/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts
index 2c3984a880af64..f6f15946579ebf 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts
@@ -194,6 +194,8 @@
num-lanes = <4>;
pinctrl-names = "default";
pinctrl-0 = <&pcie_clkreqn_cpm>;
+ vpcie3v3-supply = <&vcc3v3_baseboard>;
+ vpcie12v-supply = <&dc_12v>;
status = "okay";
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi
index c08e69391c0154..ccbe3a7a1d2c2f 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi
@@ -79,6 +79,26 @@
regulator-max-microvolt = <5000000>;
};
+ vcca_0v9: vcca-0v9-regulator {
+ compatible = "regulator-fixed";
+ regulator-name = "vcca_0v9";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <900000>;
+ regulator-max-microvolt = <900000>;
+ vin-supply = <&vcc_1v8>;
+ };
+
+ vcca_1v8: vcca-1v8-regulator {
+ compatible = "regulator-fixed";
+ regulator-name = "vcca_1v8";
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ vin-supply = <&vcc3v3_sys>;
+ };
+
vdd_log: vdd-log {
compatible = "pwm-regulator";
pwms = <&pwm2 0 25000 1>;
@@ -416,16 +436,28 @@
gpio1830-supply = <&vcc_1v8>;
};
-&pmu_io_domains {
- status = "okay";
- pmu1830-supply = <&vcc_1v8>;
+&pcie0 {
+ /* PCIe PHY supplies */
+ vpcie0v9-supply = <&vcca_0v9>;
+ vpcie1v8-supply = <&vcca_1v8>;
};
-&pwm2 {
- status = "okay";
+&pcie_clkreqn_cpm {
+ rockchip,pins =
+ <2 RK_PD2 RK_FUNC_GPIO &pcfg_pull_up>;
};
&pinctrl {
+ pinctrl-names = "default";
+ pinctrl-0 = <&q7_thermal_pin>;
+
+ gpios {
+ q7_thermal_pin: q7-thermal-pin {
+ rockchip,pins =
+ <0 RK_PA3 RK_FUNC_GPIO &pcfg_pull_up>;
+ };
+ };
+
i2c8 {
i2c8_xfer_a: i2c8-xfer {
rockchip,pins =
@@ -458,11 +490,20 @@
usb3 {
usb3_id: usb3-id {
rockchip,pins =
- <1 RK_PC2 RK_FUNC_GPIO &pcfg_pull_none>;
+ <1 RK_PC2 RK_FUNC_GPIO &pcfg_pull_up>;
};
};
};
+&pmu_io_domains {
+ status = "okay";
+ pmu1830-supply = <&vcc_1v8>;
+};
+
+&pwm2 {
+ status = "okay";
+};
+
&sdhci {
/*
* Signal integrity isn't great at 200MHz but 100MHz has proven stable
diff --git a/arch/arm64/boot/dts/rockchip/rk3566-lubancat-1.dts b/arch/arm64/boot/dts/rockchip/rk3566-lubancat-1.dts
index 6ecdf5d283390a..c1194d1e438d0d 100644
--- a/arch/arm64/boot/dts/rockchip/rk3566-lubancat-1.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3566-lubancat-1.dts
@@ -447,7 +447,6 @@
&pcie2x1 {
reset-gpios = <&gpio0 RK_PB6 GPIO_ACTIVE_HIGH>;
- disable-gpios = <&gpio0 RK_PA6 GPIO_ACTIVE_HIGH>;
vpcie3v3-supply = <&vcc3v3_pcie>;
status = "okay";
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts b/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts
index 7b5f3904ef6104..c87fad2c34cba3 100644
--- a/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts
@@ -416,6 +416,8 @@
vccio_sd: LDO_REG5 {
regulator-name = "vccio_sd";
+ regulator-always-on;
+ regulator-boot-on;
regulator-min-microvolt = <1800000>;
regulator-max-microvolt = <3300000>;
@@ -525,9 +527,9 @@
#address-cells = <1>;
#size-cells = <0>;
- switch@0 {
+ switch@1f {
compatible = "mediatek,mt7531";
- reg = <0>;
+ reg = <0x1f>;
ports {
#address-cells = <1>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3568-lubancat-2.dts b/arch/arm64/boot/dts/rockchip/rk3568-lubancat-2.dts
index a8a4cc190eb32e..a3112d5df2008d 100644
--- a/arch/arm64/boot/dts/rockchip/rk3568-lubancat-2.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3568-lubancat-2.dts
@@ -523,7 +523,6 @@
&pcie2x1 {
reset-gpios = <&gpio3 RK_PC1 GPIO_ACTIVE_HIGH>;
- disable-gpios = <&gpio3 RK_PC2 GPIO_ACTIVE_HIGH>;
vpcie3v3-supply = <&vcc3v3_mini_pcie>;
status = "okay";
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5.dtsi
index cce1c8e835877c..94ecb9b4f98f88 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5.dtsi
@@ -216,9 +216,9 @@
pinctrl-0 = <&i2c7m0_xfer>;
status = "okay";
- es8316: audio-codec@11 {
+ es8316: audio-codec@10 {
compatible = "everest,es8316";
- reg = <0x11>;
+ reg = <0x10>;
assigned-clocks = <&cru I2S0_8CH_MCLKOUT>;
assigned-clock-rates = <12288000>;
clocks = <&cru I2S0_8CH_MCLKOUT>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3588-orangepi-5-plus.dts b/arch/arm64/boot/dts/rockchip/rk3588-orangepi-5-plus.dts
index 1b606ea5b6cf2b..1a604429fb266e 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588-orangepi-5-plus.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3588-orangepi-5-plus.dts
@@ -485,6 +485,7 @@
pinctrl-0 = <&pmic_pins>, <&rk806_dvs1_null>,
<&rk806_dvs2_null>, <&rk806_dvs3_null>;
spi-max-frequency = <1000000>;
+ system-power-controller;
vcc1-supply = <&vcc5v0_sys>;
vcc2-supply = <&vcc5v0_sys>;
@@ -506,7 +507,7 @@
#gpio-cells = <2>;
rk806_dvs1_null: dvs1-null-pins {
- pins = "gpio_pwrctrl2";
+ pins = "gpio_pwrctrl1";
function = "pin_fun0";
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3588-quartzpro64.dts b/arch/arm64/boot/dts/rockchip/rk3588-quartzpro64.dts
index 67414d72e2b6ef..22bbfbe729c11b 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588-quartzpro64.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3588-quartzpro64.dts
@@ -456,6 +456,7 @@
<&rk806_dvs2_null>, <&rk806_dvs3_null>;
pinctrl-names = "default";
spi-max-frequency = <1000000>;
+ system-power-controller;
vcc1-supply = <&vcc4v0_sys>;
vcc2-supply = <&vcc4v0_sys>;
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index db51ec37dd22d1..8fb9768b0d8afb 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -865,6 +865,7 @@ CONFIG_DRM_PANEL_LVDS=m
CONFIG_DRM_PANEL_SIMPLE=m
CONFIG_DRM_PANEL_EDP=m
CONFIG_DRM_PANEL_ILITEK_ILI9882T=m
+CONFIG_DRM_PANEL_KHADAS_TS050=m
CONFIG_DRM_PANEL_MANTIX_MLAF057WE51=m
CONFIG_DRM_PANEL_RAYDIUM_RM67191=m
CONFIG_DRM_PANEL_SITRONIX_ST7703=m
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index ab8b396428da8a..9ecd076ba08fc6 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -480,9 +480,10 @@ alternative_endif
*/
.macro reset_pmuserenr_el0, tmpreg
mrs \tmpreg, id_aa64dfr0_el1
- sbfx \tmpreg, \tmpreg, #ID_AA64DFR0_EL1_PMUVer_SHIFT, #4
- cmp \tmpreg, #1 // Skip if no PMU present
- b.lt 9000f
+ ubfx \tmpreg, \tmpreg, #ID_AA64DFR0_EL1_PMUVer_SHIFT, #4
+ cmp \tmpreg, #ID_AA64DFR0_EL1_PMUVer_NI
+ ccmp \tmpreg, #ID_AA64DFR0_EL1_PMUVer_IMP_DEF, #4, ne
+ b.eq 9000f // Skip if no PMU present or IMP_DEF
msr pmuserenr_el0, xzr // Disable PMU access from EL0
9000:
.endm
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 52f076afeb9600..936389e9aecbc7 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -86,6 +86,7 @@
#define ARM_CPU_PART_CORTEX_X2 0xD48
#define ARM_CPU_PART_NEOVERSE_N2 0xD49
#define ARM_CPU_PART_CORTEX_A78C 0xD4B
+#define ARM_CPU_PART_NEOVERSE_V2 0xD4F
#define APM_CPU_PART_XGENE 0x000
#define APM_CPU_VAR_POTENZA 0x00
@@ -159,6 +160,7 @@
#define MIDR_CORTEX_X2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X2)
#define MIDR_NEOVERSE_N2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N2)
#define MIDR_CORTEX_A78C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78C)
+#define MIDR_NEOVERSE_V2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V2)
#define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX)
#define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX)
#define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX)
diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h
index b7afaa026842b7..e4546b29dd0cbf 100644
--- a/arch/arm64/include/asm/el2_setup.h
+++ b/arch/arm64/include/asm/el2_setup.h
@@ -59,13 +59,14 @@
.macro __init_el2_debug
mrs x1, id_aa64dfr0_el1
- sbfx x0, x1, #ID_AA64DFR0_EL1_PMUVer_SHIFT, #4
- cmp x0, #1
- b.lt .Lskip_pmu_\@ // Skip if no PMU present
+ ubfx x0, x1, #ID_AA64DFR0_EL1_PMUVer_SHIFT, #4
+ cmp x0, #ID_AA64DFR0_EL1_PMUVer_NI
+ ccmp x0, #ID_AA64DFR0_EL1_PMUVer_IMP_DEF, #4, ne
+ b.eq .Lskip_pmu_\@ // Skip if no PMU present or IMP_DEF
mrs x0, pmcr_el0 // Disable debug access traps
ubfx x0, x0, #11, #5 // to EL2 and allow access to
.Lskip_pmu_\@:
- csel x2, xzr, x0, lt // all PMU counters from EL1
+ csel x2, xzr, x0, eq // all PMU counters from EL1
/* Statistical profiling */
ubfx x0, x1, #ID_AA64DFR0_EL1_PMSVer_SHIFT, #4
diff --git a/arch/arm64/include/asm/fpu.h b/arch/arm64/include/asm/fpu.h
new file mode 100644
index 00000000000000..2ae50bdce59b43
--- /dev/null
+++ b/arch/arm64/include/asm/fpu.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2023 SiFive
+ */
+
+#ifndef __ASM_FPU_H
+#define __ASM_FPU_H
+
+#include <asm/neon.h>
+
+#define kernel_fpu_available() cpu_has_neon()
+#define kernel_fpu_begin() kernel_neon_begin()
+#define kernel_fpu_end() kernel_neon_end()
+
+#endif /* ! __ASM_FPU_H */
diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h
index 2ddc33d93b13b2..3954cbd2ff56bc 100644
--- a/arch/arm64/include/asm/hugetlb.h
+++ b/arch/arm64/include/asm/hugetlb.h
@@ -18,11 +18,11 @@
extern bool arch_hugetlb_migration_supported(struct hstate *h);
#endif
-static inline void arch_clear_hugepage_flags(struct page *page)
+static inline void arch_clear_hugetlb_flags(struct folio *folio)
{
- clear_bit(PG_dcache_clean, &page->flags);
+ clear_bit(PG_dcache_clean, &folio->flags);
}
-#define arch_clear_hugepage_flags arch_clear_hugepage_flags
+#define arch_clear_hugetlb_flags arch_clear_hugetlb_flags
pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags);
#define arch_make_huge_pte arch_make_huge_pte
diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h
index 0a7186a93882da..d4d7451c2c129f 100644
--- a/arch/arm64/include/asm/irqflags.h
+++ b/arch/arm64/include/asm/irqflags.h
@@ -5,7 +5,6 @@
#ifndef __ASM_IRQFLAGS_H
#define __ASM_IRQFLAGS_H
-#include <asm/alternative.h>
#include <asm/barrier.h>
#include <asm/ptrace.h>
#include <asm/sysreg.h>
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index afdd56d26ad700..9df6f6b0f8afd0 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -49,12 +49,6 @@
__flush_tlb_range(vma, addr, end, PUD_SIZE, false, 1)
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
-static inline bool arch_thp_swp_supported(void)
-{
- return !system_supports_mte();
-}
-#define arch_thp_swp_supported arch_thp_swp_supported
-
/*
* Outside of a few very special situations (e.g. hibernation), we always
* use broadcast TLB invalidation instructions, therefore a spurious page
@@ -271,9 +265,14 @@ static inline pte_t pte_mkdevmap(pte_t pte)
return set_pte_bit(pte, __pgprot(PTE_DEVMAP | PTE_SPECIAL));
}
-static inline void __set_pte(pte_t *ptep, pte_t pte)
+static inline void __set_pte_nosync(pte_t *ptep, pte_t pte)
{
WRITE_ONCE(*ptep, pte);
+}
+
+static inline void __set_pte(pte_t *ptep, pte_t pte)
+{
+ __set_pte_nosync(ptep, pte);
/*
* Only if the new pte is valid and kernel, otherwise TLB maintenance
@@ -517,8 +516,6 @@ static inline pmd_t pmd_mkinvalid(pmd_t pmd)
return pmd;
}
-#define pmd_thp_or_huge(pmd) (pmd_huge(pmd) || pmd_trans_huge(pmd))
-
#define pmd_write(pmd) pte_write(pmd_pte(pmd))
#define pmd_mkhuge(pmd) (__pmd(pmd_val(pmd) & ~PMD_TABLE_BIT))
@@ -709,7 +706,11 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
#define pud_none(pud) (!pud_val(pud))
#define pud_bad(pud) (!pud_table(pud))
#define pud_present(pud) pte_present(pud_pte(pud))
+#ifndef __PAGETABLE_PMD_FOLDED
#define pud_leaf(pud) (pud_present(pud) && !pud_table(pud))
+#else
+#define pud_leaf(pud) false
+#endif
#define pud_valid(pud) pte_valid(pud_pte(pud))
#define pud_user(pud) pte_user(pud_pte(pud))
#define pud_user_exec(pud) pte_user_exec(pud_pte(pud))
@@ -1005,6 +1006,8 @@ static inline p4d_t *p4d_offset_kimg(pgd_t *pgdp, u64 addr)
static inline bool pgtable_l5_enabled(void) { return false; }
+#define p4d_index(addr) (((addr) >> P4D_SHIFT) & (PTRS_PER_P4D - 1))
+
/* Match p4d_offset folding in <asm/generic/pgtable-nop4d.h> */
#define p4d_set_fixmap(addr) NULL
#define p4d_set_fixmap_offset(p4dp, addr) ((p4d_t *)p4dp)
@@ -1227,6 +1230,46 @@ static inline void __wrprotect_ptes(struct mm_struct *mm, unsigned long address,
__ptep_set_wrprotect(mm, address, ptep);
}
+static inline void __clear_young_dirty_pte(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep,
+ pte_t pte, cydp_t flags)
+{
+ pte_t old_pte;
+
+ do {
+ old_pte = pte;
+
+ if (flags & CYDP_CLEAR_YOUNG)
+ pte = pte_mkold(pte);
+ if (flags & CYDP_CLEAR_DIRTY)
+ pte = pte_mkclean(pte);
+
+ pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep),
+ pte_val(old_pte), pte_val(pte));
+ } while (pte_val(pte) != pte_val(old_pte));
+}
+
+static inline void __clear_young_dirty_ptes(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep,
+ unsigned int nr, cydp_t flags)
+{
+ pte_t pte;
+
+ for (;;) {
+ pte = __ptep_get(ptep);
+
+ if (flags == (CYDP_CLEAR_YOUNG | CYDP_CLEAR_DIRTY))
+ __set_pte(ptep, pte_mkclean(pte_mkold(pte)));
+ else
+ __clear_young_dirty_pte(vma, addr, ptep, pte, flags);
+
+ if (--nr == 0)
+ break;
+ ptep++;
+ addr += PAGE_SIZE;
+ }
+}
+
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#define __HAVE_ARCH_PMDP_SET_WRPROTECT
static inline void pmdp_set_wrprotect(struct mm_struct *mm,
@@ -1280,12 +1323,7 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
#ifdef CONFIG_ARM64_MTE
#define __HAVE_ARCH_PREPARE_TO_SWAP
-static inline int arch_prepare_to_swap(struct page *page)
-{
- if (system_supports_mte())
- return mte_save_tags(page);
- return 0;
-}
+extern int arch_prepare_to_swap(struct folio *folio);
#define __HAVE_ARCH_SWAP_INVALIDATE
static inline void arch_swap_invalidate_page(int type, pgoff_t offset)
@@ -1301,11 +1339,7 @@ static inline void arch_swap_invalidate_area(int type)
}
#define __HAVE_ARCH_SWAP_RESTORE
-static inline void arch_swap_restore(swp_entry_t entry, struct folio *folio)
-{
- if (system_supports_mte())
- mte_restore_tags(entry, &folio->page);
-}
+extern void arch_swap_restore(swp_entry_t entry, struct folio *folio);
#endif /* CONFIG_ARM64_MTE */
@@ -1392,6 +1426,9 @@ extern void contpte_wrprotect_ptes(struct mm_struct *mm, unsigned long addr,
extern int contpte_ptep_set_access_flags(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep,
pte_t entry, int dirty);
+extern void contpte_clear_young_dirty_ptes(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep,
+ unsigned int nr, cydp_t flags);
static __always_inline void contpte_try_fold(struct mm_struct *mm,
unsigned long addr, pte_t *ptep, pte_t pte)
@@ -1616,6 +1653,17 @@ static inline int ptep_set_access_flags(struct vm_area_struct *vma,
return contpte_ptep_set_access_flags(vma, addr, ptep, entry, dirty);
}
+#define clear_young_dirty_ptes clear_young_dirty_ptes
+static inline void clear_young_dirty_ptes(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep,
+ unsigned int nr, cydp_t flags)
+{
+ if (likely(nr == 1 && !pte_cont(__ptep_get(ptep))))
+ __clear_young_dirty_ptes(vma, addr, ptep, nr, flags);
+ else
+ contpte_clear_young_dirty_ptes(vma, addr, ptep, nr, flags);
+}
+
#else /* CONFIG_ARM64_CONTPTE */
#define ptep_get __ptep_get
@@ -1635,6 +1683,7 @@ static inline int ptep_set_access_flags(struct vm_area_struct *vma,
#define wrprotect_ptes __wrprotect_ptes
#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
#define ptep_set_access_flags __ptep_set_access_flags
+#define clear_young_dirty_ptes __clear_young_dirty_ptes
#endif /* CONFIG_ARM64_CONTPTE */
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index 3b0e8248e1a41a..95fbc8c0560798 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -142,17 +142,24 @@ static inline unsigned long get_trans_granule(void)
* EL1, Inner Shareable".
*
*/
-#define __TLBI_VADDR_RANGE(baddr, asid, scale, num, ttl) \
- ({ \
- unsigned long __ta = (baddr); \
- unsigned long __ttl = (ttl >= 1 && ttl <= 3) ? ttl : 0; \
- __ta &= GENMASK_ULL(36, 0); \
- __ta |= __ttl << 37; \
- __ta |= (unsigned long)(num) << 39; \
- __ta |= (unsigned long)(scale) << 44; \
- __ta |= get_trans_granule() << 46; \
- __ta |= (unsigned long)(asid) << 48; \
- __ta; \
+#define TLBIR_ASID_MASK GENMASK_ULL(63, 48)
+#define TLBIR_TG_MASK GENMASK_ULL(47, 46)
+#define TLBIR_SCALE_MASK GENMASK_ULL(45, 44)
+#define TLBIR_NUM_MASK GENMASK_ULL(43, 39)
+#define TLBIR_TTL_MASK GENMASK_ULL(38, 37)
+#define TLBIR_BADDR_MASK GENMASK_ULL(36, 0)
+
+#define __TLBI_VADDR_RANGE(baddr, asid, scale, num, ttl) \
+ ({ \
+ unsigned long __ta = 0; \
+ unsigned long __ttl = (ttl >= 1 && ttl <= 3) ? ttl : 0; \
+ __ta |= FIELD_PREP(TLBIR_BADDR_MASK, baddr); \
+ __ta |= FIELD_PREP(TLBIR_TTL_MASK, __ttl); \
+ __ta |= FIELD_PREP(TLBIR_NUM_MASK, num); \
+ __ta |= FIELD_PREP(TLBIR_SCALE_MASK, scale); \
+ __ta |= FIELD_PREP(TLBIR_TG_MASK, get_trans_granule()); \
+ __ta |= FIELD_PREP(TLBIR_ASID_MASK, asid); \
+ __ta; \
})
/* These macros are used by the TLBI RANGE feature. */
@@ -161,12 +168,18 @@ static inline unsigned long get_trans_granule(void)
#define MAX_TLBI_RANGE_PAGES __TLBI_RANGE_PAGES(31, 3)
/*
- * Generate 'num' values from -1 to 30 with -1 rejected by the
- * __flush_tlb_range() loop below.
+ * Generate 'num' values from -1 to 31 with -1 rejected by the
+ * __flush_tlb_range() loop below. Its return value is only
+ * significant for a maximum of MAX_TLBI_RANGE_PAGES pages. If
+ * 'pages' is more than that, you must iterate over the overall
+ * range.
*/
-#define TLBI_RANGE_MASK GENMASK_ULL(4, 0)
-#define __TLBI_RANGE_NUM(pages, scale) \
- ((((pages) >> (5 * (scale) + 1)) & TLBI_RANGE_MASK) - 1)
+#define __TLBI_RANGE_NUM(pages, scale) \
+ ({ \
+ int __pages = min((pages), \
+ __TLBI_RANGE_PAGES(31, (scale))); \
+ (__pages >> (5 * (scale) + 1)) - 1; \
+ })
/*
* TLB Invalidation
@@ -379,10 +392,6 @@ static inline void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
* 3. If there is 1 page remaining, flush it through non-range operations. Range
* operations can only span an even number of pages. We save this for last to
* ensure 64KB start alignment is maintained for the LPA2 case.
- *
- * Note that certain ranges can be represented by either num = 31 and
- * scale or num = 0 and scale + 1. The loop below favours the latter
- * since num is limited to 30 by the __TLBI_RANGE_NUM() macro.
*/
#define __flush_tlb_range_op(op, start, pages, stride, \
asid, tlb_level, tlbi_user, lpa2) \
@@ -437,11 +446,11 @@ static inline void __flush_tlb_range_nosync(struct vm_area_struct *vma,
* When not uses TLB range ops, we can handle up to
* (MAX_DVM_OPS - 1) pages;
* When uses TLB range ops, we can handle up to
- * (MAX_TLBI_RANGE_PAGES - 1) pages.
+ * MAX_TLBI_RANGE_PAGES pages.
*/
if ((!system_supports_tlb_range() &&
(end - start) >= (MAX_DVM_OPS * stride)) ||
- pages >= MAX_TLBI_RANGE_PAGES) {
+ pages > MAX_TLBI_RANGE_PAGES) {
flush_tlb_mm(vma->vm_mm);
return;
}
diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index 491b2b9bd5536f..1346579f802f7f 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -39,7 +39,7 @@
#define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5)
#define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800)
-#define __NR_compat_syscalls 462
+#define __NR_compat_syscalls 463
#endif
#define __ARCH_WANT_SYS_CLONE
diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index 7118282d1c7976..266b96acc01439 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -929,6 +929,8 @@ __SYSCALL(__NR_lsm_get_self_attr, sys_lsm_get_self_attr)
__SYSCALL(__NR_lsm_set_self_attr, sys_lsm_set_self_attr)
#define __NR_lsm_list_modules 461
__SYSCALL(__NR_lsm_list_modules, sys_lsm_list_modules)
+#define __NR_mseal 462
+__SYSCALL(__NR_mseal, sys_mseal)
/*
* Please add new compat syscalls above this comment and update
diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c
index dba8fcec7f33d6..e0e7b93c16cc43 100644
--- a/arch/arm64/kernel/acpi.c
+++ b/arch/arm64/kernel/acpi.c
@@ -26,6 +26,7 @@
#include <linux/libfdt.h>
#include <linux/smp.h>
#include <linux/serial_core.h>
+#include <linux/suspend.h>
#include <linux/pgtable.h>
#include <acpi/ghes.h>
@@ -227,6 +228,15 @@ done:
if (earlycon_acpi_spcr_enable)
early_init_dt_scan_chosen_stdout();
} else {
+#ifdef CONFIG_HIBERNATION
+ struct acpi_table_header *facs = NULL;
+ acpi_get_table(ACPI_SIG_FACS, 1, &facs);
+ if (facs) {
+ swsusp_hardware_signature =
+ ((struct acpi_table_facs *)facs)->hardware_signature;
+ acpi_put_table(facs);
+ }
+#endif
acpi_parse_spcr(earlycon_acpi_spcr_enable, true);
if (IS_ENABLED(CONFIG_ACPI_BGRT))
acpi_table_parse(ACPI_SIG_BGRT, acpi_parse_bgrt);
diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c
index 9afcc690fe73c2..4a92096db34e0b 100644
--- a/arch/arm64/kernel/efi.c
+++ b/arch/arm64/kernel/efi.c
@@ -10,6 +10,7 @@
#include <linux/efi.h>
#include <linux/init.h>
#include <linux/screen_info.h>
+#include <linux/vmalloc.h>
#include <asm/efi.h>
#include <asm/stacktrace.h>
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index ce08b744aaab22..cb68adcabe0789 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -289,8 +289,28 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL)
adr_l x1, __hyp_text_end
adr_l x2, dcache_clean_poc
blr x2
+
+ mov_q x0, INIT_SCTLR_EL2_MMU_OFF
+ pre_disable_mmu_workaround
+ msr sctlr_el2, x0
+ isb
0:
mov_q x0, HCR_HOST_NVHE_FLAGS
+
+ /*
+ * Compliant CPUs advertise their VHE-onlyness with
+ * ID_AA64MMFR4_EL1.E2H0 < 0. HCR_EL2.E2H can be
+ * RES1 in that case. Publish the E2H bit early so that
+ * it can be picked up by the init_el2_state macro.
+ *
+ * Fruity CPUs seem to have HCR_EL2.E2H set to RAO/WI, but
+ * don't advertise it (they predate this relaxation).
+ */
+ mrs_s x1, SYS_ID_AA64MMFR4_EL1
+ tbz x1, #(ID_AA64MMFR4_EL1_E2H0_SHIFT + ID_AA64MMFR4_EL1_E2H0_WIDTH - 1), 1f
+
+ orr x0, x0, #HCR_E2H
+1:
msr hcr_el2, x0
isb
@@ -303,30 +323,16 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL)
mov_q x1, INIT_SCTLR_EL1_MMU_OFF
- /*
- * Compliant CPUs advertise their VHE-onlyness with
- * ID_AA64MMFR4_EL1.E2H0 < 0. HCR_EL2.E2H can be
- * RES1 in that case.
- *
- * Fruity CPUs seem to have HCR_EL2.E2H set to RES1, but
- * don't advertise it (they predate this relaxation).
- */
- mrs_s x0, SYS_ID_AA64MMFR4_EL1
- ubfx x0, x0, #ID_AA64MMFR4_EL1_E2H0_SHIFT, #ID_AA64MMFR4_EL1_E2H0_WIDTH
- tbnz x0, #(ID_AA64MMFR4_EL1_E2H0_SHIFT + ID_AA64MMFR4_EL1_E2H0_WIDTH - 1), 1f
-
mrs x0, hcr_el2
and x0, x0, #HCR_E2H
cbz x0, 2f
-1:
+
/* Set a sane SCTLR_EL1, the VHE way */
- pre_disable_mmu_workaround
msr_s SYS_SCTLR_EL12, x1
mov x2, #BOOT_CPU_FLAG_E2H
b 3f
2:
- pre_disable_mmu_workaround
msr sctlr_el1, x1
mov x2, xzr
3:
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 162b030ab9da33..0d022599eb61b3 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -761,7 +761,6 @@ static void sve_init_header_from_task(struct user_sve_header *header,
{
unsigned int vq;
bool active;
- bool fpsimd_only;
enum vec_type task_type;
memset(header, 0, sizeof(*header));
@@ -777,12 +776,10 @@ static void sve_init_header_from_task(struct user_sve_header *header,
case ARM64_VEC_SVE:
if (test_tsk_thread_flag(target, TIF_SVE_VL_INHERIT))
header->flags |= SVE_PT_VL_INHERIT;
- fpsimd_only = !test_tsk_thread_flag(target, TIF_SVE);
break;
case ARM64_VEC_SME:
if (test_tsk_thread_flag(target, TIF_SME_VL_INHERIT))
header->flags |= SVE_PT_VL_INHERIT;
- fpsimd_only = false;
break;
default:
WARN_ON_ONCE(1);
@@ -790,7 +787,7 @@ static void sve_init_header_from_task(struct user_sve_header *header,
}
if (active) {
- if (fpsimd_only) {
+ if (target->thread.fp_type == FP_STATE_FPSIMD) {
header->flags |= SVE_PT_REGS_FPSIMD;
} else {
header->flags |= SVE_PT_REGS_SVE;
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 3dee5490eea94d..c4a0a35e02c728 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -2597,14 +2597,11 @@ static __init int kvm_arm_init(void)
if (err)
goto out_hyp;
- if (is_protected_kvm_enabled()) {
- kvm_info("Protected nVHE mode initialized successfully\n");
- } else if (in_hyp_mode) {
- kvm_info("VHE mode initialized successfully\n");
- } else {
- char mode = cpus_have_final_cap(ARM64_KVM_HVHE) ? 'h' : 'n';
- kvm_info("Hyp mode (%cVHE) initialized successfully\n", mode);
- }
+ kvm_info("%s%sVHE mode initialized successfully\n",
+ in_hyp_mode ? "" : (is_protected_kvm_enabled() ?
+ "Protected " : "Hyp "),
+ in_hyp_mode ? "" : (cpus_have_final_cap(ARM64_KVM_HVHE) ?
+ "h" : "n"));
/*
* FIXME: Do something reasonable if kvm_init() fails after pKVM
diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c
index a60fb13e21924f..2fc68da4036d90 100644
--- a/arch/arm64/kvm/hyp/nvhe/tlb.c
+++ b/arch/arm64/kvm/hyp/nvhe/tlb.c
@@ -154,7 +154,8 @@ void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
/* Switch to requested VMID */
__tlb_switch_to_guest(mmu, &cxt, false);
- __flush_s2_tlb_range_op(ipas2e1is, start, pages, stride, 0);
+ __flush_s2_tlb_range_op(ipas2e1is, start, pages, stride,
+ TLBI_TTL_UNKNOWN);
dsb(ish);
__tlbi(vmalle1is);
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 3fae5830f8d2c7..5a59ef88b646f0 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -528,7 +528,7 @@ static int hyp_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx,
kvm_clear_pte(ctx->ptep);
dsb(ishst);
- __tlbi_level(vae2is, __TLBI_VADDR(ctx->addr, 0), ctx->level);
+ __tlbi_level(vae2is, __TLBI_VADDR(ctx->addr, 0), TLBI_TTL_UNKNOWN);
} else {
if (ctx->end - ctx->addr < granule)
return -EINVAL;
@@ -843,12 +843,15 @@ static bool stage2_try_break_pte(const struct kvm_pgtable_visit_ctx *ctx,
* Perform the appropriate TLB invalidation based on the
* evicted pte value (if any).
*/
- if (kvm_pte_table(ctx->old, ctx->level))
- kvm_tlb_flush_vmid_range(mmu, ctx->addr,
- kvm_granule_size(ctx->level));
- else if (kvm_pte_valid(ctx->old))
+ if (kvm_pte_table(ctx->old, ctx->level)) {
+ u64 size = kvm_granule_size(ctx->level);
+ u64 addr = ALIGN_DOWN(ctx->addr, size);
+
+ kvm_tlb_flush_vmid_range(mmu, addr, size);
+ } else if (kvm_pte_valid(ctx->old)) {
kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu,
ctx->addr, ctx->level);
+ }
}
if (stage2_pte_is_counted(ctx->old))
@@ -896,9 +899,13 @@ static void stage2_unmap_put_pte(const struct kvm_pgtable_visit_ctx *ctx,
if (kvm_pte_valid(ctx->old)) {
kvm_clear_pte(ctx->ptep);
- if (!stage2_unmap_defer_tlb_flush(pgt))
- kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu,
- ctx->addr, ctx->level);
+ if (kvm_pte_table(ctx->old, ctx->level)) {
+ kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr,
+ TLBI_TTL_UNKNOWN);
+ } else if (!stage2_unmap_defer_tlb_flush(pgt)) {
+ kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr,
+ ctx->level);
+ }
}
mm_ops->put_page(ctx->ptep);
diff --git a/arch/arm64/kvm/hyp/vhe/tlb.c b/arch/arm64/kvm/hyp/vhe/tlb.c
index b32e2940df7dc8..1a60b95381e8e9 100644
--- a/arch/arm64/kvm/hyp/vhe/tlb.c
+++ b/arch/arm64/kvm/hyp/vhe/tlb.c
@@ -171,7 +171,8 @@ void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
/* Switch to requested VMID */
__tlb_switch_to_guest(mmu, &cxt);
- __flush_s2_tlb_range_op(ipas2e1is, start, pages, stride, 0);
+ __flush_s2_tlb_range_op(ipas2e1is, start, pages, stride,
+ TLBI_TTL_UNKNOWN);
dsb(ish);
__tlbi(vmalle1is);
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 18680771cdb0ea..dc04bc7678659a 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -1637,7 +1637,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
is_iabt = kvm_vcpu_trap_is_iabt(vcpu);
- if (esr_fsc_is_permission_fault(esr)) {
+ if (esr_fsc_is_translation_fault(esr)) {
/* Beyond sanitised PARange (which is the IPA limit) */
if (fault_ipa >= BIT_ULL(get_kvm_ipa_limit())) {
kvm_inject_size_fault(vcpu);
diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile
index 29490be2546bfc..13e6a2829116ca 100644
--- a/arch/arm64/lib/Makefile
+++ b/arch/arm64/lib/Makefile
@@ -7,10 +7,8 @@ lib-y := clear_user.o delay.o copy_from_user.o \
ifeq ($(CONFIG_KERNEL_MODE_NEON), y)
obj-$(CONFIG_XOR_BLOCKS) += xor-neon.o
-CFLAGS_REMOVE_xor-neon.o += -mgeneral-regs-only
-CFLAGS_xor-neon.o += -ffreestanding
-# Enable <arm_neon.h>
-CFLAGS_xor-neon.o += -isystem $(shell $(CC) -print-file-name=include)
+CFLAGS_xor-neon.o += $(CC_FLAGS_FPU)
+CFLAGS_REMOVE_xor-neon.o += $(CC_FLAGS_NO_FPU)
endif
lib-$(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) += uaccess_flushcache.o
diff --git a/arch/arm64/mm/contpte.c b/arch/arm64/mm/contpte.c
index 1b64b4c3f8bf8a..9f9486de0004dd 100644
--- a/arch/arm64/mm/contpte.c
+++ b/arch/arm64/mm/contpte.c
@@ -361,6 +361,35 @@ void contpte_wrprotect_ptes(struct mm_struct *mm, unsigned long addr,
}
EXPORT_SYMBOL_GPL(contpte_wrprotect_ptes);
+void contpte_clear_young_dirty_ptes(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep,
+ unsigned int nr, cydp_t flags)
+{
+ /*
+ * We can safely clear access/dirty without needing to unfold from
+ * the architectures perspective, even when contpte is set. If the
+ * range starts or ends midway through a contpte block, we can just
+ * expand to include the full contpte block. While this is not
+ * exactly what the core-mm asked for, it tracks access/dirty per
+ * folio, not per page. And since we only create a contpte block
+ * when it is covered by a single folio, we can get away with
+ * clearing access/dirty for the whole block.
+ */
+ unsigned long start = addr;
+ unsigned long end = start + nr;
+
+ if (pte_cont(__ptep_get(ptep + nr - 1)))
+ end = ALIGN(end, CONT_PTE_SIZE);
+
+ if (pte_cont(__ptep_get(ptep))) {
+ start = ALIGN_DOWN(start, CONT_PTE_SIZE);
+ ptep = contpte_align_down(ptep);
+ }
+
+ __clear_young_dirty_ptes(vma, start, ptep, end - start, flags);
+}
+EXPORT_SYMBOL_GPL(contpte_clear_young_dirty_ptes);
+
int contpte_ptep_set_access_flags(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep,
pte_t entry, int dirty)
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 8251e2fea9c757..451ba7cbd5adb0 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -486,25 +486,6 @@ static void do_bad_area(unsigned long far, unsigned long esr,
}
}
-#define VM_FAULT_BADMAP ((__force vm_fault_t)0x010000)
-#define VM_FAULT_BADACCESS ((__force vm_fault_t)0x020000)
-
-static vm_fault_t __do_page_fault(struct mm_struct *mm,
- struct vm_area_struct *vma, unsigned long addr,
- unsigned int mm_flags, unsigned long vm_flags,
- struct pt_regs *regs)
-{
- /*
- * Ok, we have a good vm_area for this memory access, so we can handle
- * it.
- * Check that the permissions on the VMA allow for the fault which
- * occurred.
- */
- if (!(vma->vm_flags & vm_flags))
- return VM_FAULT_BADACCESS;
- return handle_mm_fault(vma, addr, mm_flags, regs);
-}
-
static bool is_el0_instruction_abort(unsigned long esr)
{
return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_LOW;
@@ -529,6 +510,7 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
unsigned int mm_flags = FAULT_FLAG_DEFAULT;
unsigned long addr = untagged_addr(far);
struct vm_area_struct *vma;
+ int si_code;
if (kprobe_page_fault(regs, esr))
return 0;
@@ -588,7 +570,10 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
if (!(vma->vm_flags & vm_flags)) {
vma_end_read(vma);
- goto lock_mmap;
+ fault = 0;
+ si_code = SEGV_ACCERR;
+ count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
+ goto bad_area;
}
fault = handle_mm_fault(vma, addr, mm_flags | FAULT_FLAG_VMA_LOCK, regs);
if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED)))
@@ -613,12 +598,19 @@ lock_mmap:
retry:
vma = lock_mm_and_find_vma(mm, addr, regs);
if (unlikely(!vma)) {
- fault = VM_FAULT_BADMAP;
- goto done;
+ fault = 0;
+ si_code = SEGV_MAPERR;
+ goto bad_area;
}
- fault = __do_page_fault(mm, vma, addr, mm_flags, vm_flags, regs);
+ if (!(vma->vm_flags & vm_flags)) {
+ mmap_read_unlock(mm);
+ fault = 0;
+ si_code = SEGV_ACCERR;
+ goto bad_area;
+ }
+ fault = handle_mm_fault(vma, addr, mm_flags, regs);
/* Quick path to respond to signals */
if (fault_signal_pending(fault, regs)) {
if (!user_mode(regs))
@@ -637,13 +629,12 @@ retry:
mmap_read_unlock(mm);
done:
- /*
- * Handle the "normal" (no error) case first.
- */
- if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP |
- VM_FAULT_BADACCESS))))
+ /* Handle the "normal" (no error) case first. */
+ if (likely(!(fault & VM_FAULT_ERROR)))
return 0;
+ si_code = SEGV_MAPERR;
+bad_area:
/*
* If we are in kernel mode at this point, we have no context to
* handle this fault with.
@@ -678,13 +669,8 @@ done:
arm64_force_sig_mceerr(BUS_MCEERR_AR, far, lsb, inf->name);
} else {
- /*
- * Something tried to access memory that isn't in our memory
- * map.
- */
- arm64_force_sig_fault(SIGSEGV,
- fault == VM_FAULT_BADACCESS ? SEGV_ACCERR : SEGV_MAPERR,
- far, inf->name);
+ /* Something tried to access memory that out of memory map */
+ arm64_force_sig_fault(SIGSEGV, si_code, far, inf->name);
}
return 0;
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index 0f0e10bb0a9540..3f09ac73cce3b2 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -79,20 +79,6 @@ bool arch_hugetlb_migration_supported(struct hstate *h)
}
#endif
-int pmd_huge(pmd_t pmd)
-{
- return pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT);
-}
-
-int pud_huge(pud_t pud)
-{
-#ifndef __PAGETABLE_PMD_FOLDED
- return pud_val(pud) && !(pud_val(pud) & PUD_TABLE_BIT);
-#else
- return 0;
-#endif
-}
-
static int find_num_contig(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, size_t *pgsize)
{
@@ -276,7 +262,10 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
pte_t *ptep = NULL;
pgdp = pgd_offset(mm, addr);
- p4dp = p4d_offset(pgdp, addr);
+ p4dp = p4d_alloc(mm, pgdp, addr);
+ if (!p4dp)
+ return NULL;
+
pudp = pud_alloc(mm, p4dp, addr);
if (!pudp)
return NULL;
@@ -325,7 +314,7 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
if (sz != PUD_SIZE && pud_none(pud))
return NULL;
/* hugepage or swap? */
- if (pud_huge(pud) || !pud_present(pud))
+ if (pud_leaf(pud) || !pud_present(pud))
return (pte_t *)pudp;
/* table; check the next level */
@@ -337,7 +326,7 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
if (!(sz == PMD_SIZE || sz == CONT_PMD_SIZE) &&
pmd_none(pmd))
return NULL;
- if (pmd_huge(pmd) || !pmd_present(pmd))
+ if (pmd_leaf(pmd) || !pmd_present(pmd))
return (pte_t *)pmdp;
if (sz == CONT_PTE_SIZE)
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 495b732d5af36f..c927e9312f102e 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -109,28 +109,12 @@ EXPORT_SYMBOL(phys_mem_access_prot);
static phys_addr_t __init early_pgtable_alloc(int shift)
{
phys_addr_t phys;
- void *ptr;
phys = memblock_phys_alloc_range(PAGE_SIZE, PAGE_SIZE, 0,
MEMBLOCK_ALLOC_NOLEAKTRACE);
if (!phys)
panic("Failed to allocate page table page\n");
- /*
- * The FIX_{PGD,PUD,PMD} slots may be in active use, but the FIX_PTE
- * slot will be free, so we can (ab)use the FIX_PTE slot to initialise
- * any level of table.
- */
- ptr = pte_set_fixmap(phys);
-
- memset(ptr, 0, PAGE_SIZE);
-
- /*
- * Implicit barriers also ensure the zeroed page is visible to the page
- * table walker
- */
- pte_clear_fixmap();
-
return phys;
}
@@ -172,16 +156,25 @@ bool pgattr_change_is_safe(u64 old, u64 new)
return ((old ^ new) & ~mask) == 0;
}
-static void init_pte(pmd_t *pmdp, unsigned long addr, unsigned long end,
- phys_addr_t phys, pgprot_t prot)
+static void init_clear_pgtable(void *table)
{
- pte_t *ptep;
+ clear_page(table);
- ptep = pte_set_fixmap_offset(pmdp, addr);
+ /* Ensure the zeroing is observed by page table walks. */
+ dsb(ishst);
+}
+
+static void init_pte(pte_t *ptep, unsigned long addr, unsigned long end,
+ phys_addr_t phys, pgprot_t prot)
+{
do {
pte_t old_pte = __ptep_get(ptep);
- __set_pte(ptep, pfn_pte(__phys_to_pfn(phys), prot));
+ /*
+ * Required barriers to make this visible to the table walker
+ * are deferred to the end of alloc_init_cont_pte().
+ */
+ __set_pte_nosync(ptep, pfn_pte(__phys_to_pfn(phys), prot));
/*
* After the PTE entry has been populated once, we
@@ -192,8 +185,6 @@ static void init_pte(pmd_t *pmdp, unsigned long addr, unsigned long end,
phys += PAGE_SIZE;
} while (ptep++, addr += PAGE_SIZE, addr != end);
-
- pte_clear_fixmap();
}
static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr,
@@ -204,6 +195,7 @@ static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr,
{
unsigned long next;
pmd_t pmd = READ_ONCE(*pmdp);
+ pte_t *ptep;
BUG_ON(pmd_sect(pmd));
if (pmd_none(pmd)) {
@@ -214,10 +206,14 @@ static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr,
pmdval |= PMD_TABLE_PXN;
BUG_ON(!pgtable_alloc);
pte_phys = pgtable_alloc(PAGE_SHIFT);
+ ptep = pte_set_fixmap(pte_phys);
+ init_clear_pgtable(ptep);
+ ptep += pte_index(addr);
__pmd_populate(pmdp, pte_phys, pmdval);
- pmd = READ_ONCE(*pmdp);
+ } else {
+ BUG_ON(pmd_bad(pmd));
+ ptep = pte_set_fixmap_offset(pmdp, addr);
}
- BUG_ON(pmd_bad(pmd));
do {
pgprot_t __prot = prot;
@@ -229,20 +225,26 @@ static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr,
(flags & NO_CONT_MAPPINGS) == 0)
__prot = __pgprot(pgprot_val(prot) | PTE_CONT);
- init_pte(pmdp, addr, next, phys, __prot);
+ init_pte(ptep, addr, next, phys, __prot);
+ ptep += pte_index(next) - pte_index(addr);
phys += next - addr;
} while (addr = next, addr != end);
+
+ /*
+ * Note: barriers and maintenance necessary to clear the fixmap slot
+ * ensure that all previous pgtable writes are visible to the table
+ * walker.
+ */
+ pte_clear_fixmap();
}
-static void init_pmd(pud_t *pudp, unsigned long addr, unsigned long end,
+static void init_pmd(pmd_t *pmdp, unsigned long addr, unsigned long end,
phys_addr_t phys, pgprot_t prot,
phys_addr_t (*pgtable_alloc)(int), int flags)
{
unsigned long next;
- pmd_t *pmdp;
- pmdp = pmd_set_fixmap_offset(pudp, addr);
do {
pmd_t old_pmd = READ_ONCE(*pmdp);
@@ -268,8 +270,6 @@ static void init_pmd(pud_t *pudp, unsigned long addr, unsigned long end,
}
phys += next - addr;
} while (pmdp++, addr = next, addr != end);
-
- pmd_clear_fixmap();
}
static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr,
@@ -279,6 +279,7 @@ static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr,
{
unsigned long next;
pud_t pud = READ_ONCE(*pudp);
+ pmd_t *pmdp;
/*
* Check for initial section mappings in the pgd/pud.
@@ -292,10 +293,14 @@ static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr,
pudval |= PUD_TABLE_PXN;
BUG_ON(!pgtable_alloc);
pmd_phys = pgtable_alloc(PMD_SHIFT);
+ pmdp = pmd_set_fixmap(pmd_phys);
+ init_clear_pgtable(pmdp);
+ pmdp += pmd_index(addr);
__pud_populate(pudp, pmd_phys, pudval);
- pud = READ_ONCE(*pudp);
+ } else {
+ BUG_ON(pud_bad(pud));
+ pmdp = pmd_set_fixmap_offset(pudp, addr);
}
- BUG_ON(pud_bad(pud));
do {
pgprot_t __prot = prot;
@@ -307,10 +312,13 @@ static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr,
(flags & NO_CONT_MAPPINGS) == 0)
__prot = __pgprot(pgprot_val(prot) | PTE_CONT);
- init_pmd(pudp, addr, next, phys, __prot, pgtable_alloc, flags);
+ init_pmd(pmdp, addr, next, phys, __prot, pgtable_alloc, flags);
+ pmdp += pmd_index(next) - pmd_index(addr);
phys += next - addr;
} while (addr = next, addr != end);
+
+ pmd_clear_fixmap();
}
static void alloc_init_pud(p4d_t *p4dp, unsigned long addr, unsigned long end,
@@ -330,12 +338,15 @@ static void alloc_init_pud(p4d_t *p4dp, unsigned long addr, unsigned long end,
p4dval |= P4D_TABLE_PXN;
BUG_ON(!pgtable_alloc);
pud_phys = pgtable_alloc(PUD_SHIFT);
+ pudp = pud_set_fixmap(pud_phys);
+ init_clear_pgtable(pudp);
+ pudp += pud_index(addr);
__p4d_populate(p4dp, pud_phys, p4dval);
- p4d = READ_ONCE(*p4dp);
+ } else {
+ BUG_ON(p4d_bad(p4d));
+ pudp = pud_set_fixmap_offset(p4dp, addr);
}
- BUG_ON(p4d_bad(p4d));
- pudp = pud_set_fixmap_offset(p4dp, addr);
do {
pud_t old_pud = READ_ONCE(*pudp);
@@ -385,12 +396,15 @@ static void alloc_init_p4d(pgd_t *pgdp, unsigned long addr, unsigned long end,
pgdval |= PGD_TABLE_PXN;
BUG_ON(!pgtable_alloc);
p4d_phys = pgtable_alloc(P4D_SHIFT);
+ p4dp = p4d_set_fixmap(p4d_phys);
+ init_clear_pgtable(p4dp);
+ p4dp += p4d_index(addr);
__pgd_populate(pgdp, p4d_phys, pgdval);
- pgd = READ_ONCE(*pgdp);
+ } else {
+ BUG_ON(pgd_bad(pgd));
+ p4dp = p4d_set_fixmap_offset(pgdp, addr);
}
- BUG_ON(pgd_bad(pgd));
- p4dp = p4d_set_fixmap_offset(pgdp, addr);
do {
p4d_t old_p4d = READ_ONCE(*p4dp);
@@ -457,11 +471,10 @@ void create_kpti_ng_temp_pgd(pgd_t *pgdir, phys_addr_t phys, unsigned long virt,
static phys_addr_t __pgd_pgtable_alloc(int shift)
{
- void *ptr = (void *)__get_free_page(GFP_PGTABLE_KERNEL);
- BUG_ON(!ptr);
+ /* Page is zeroed by init_clear_pgtable() so don't duplicate effort. */
+ void *ptr = (void *)__get_free_page(GFP_PGTABLE_KERNEL & ~__GFP_ZERO);
- /* Ensure the zeroed page is visible to the page table walker */
- dsb(ishst);
+ BUG_ON(!ptr);
return __pa(ptr);
}
diff --git a/arch/arm64/mm/mteswap.c b/arch/arm64/mm/mteswap.c
index a31833e3ddc544..63e8d72f202a3b 100644
--- a/arch/arm64/mm/mteswap.c
+++ b/arch/arm64/mm/mteswap.c
@@ -68,6 +68,13 @@ void mte_invalidate_tags(int type, pgoff_t offset)
mte_free_tag_storage(tags);
}
+static inline void __mte_invalidate_tags(struct page *page)
+{
+ swp_entry_t entry = page_swap_entry(page);
+
+ mte_invalidate_tags(swp_type(entry), swp_offset(entry));
+}
+
void mte_invalidate_tags_area(int type)
{
swp_entry_t entry = swp_entry(type, 0);
@@ -83,3 +90,41 @@ void mte_invalidate_tags_area(int type)
}
xa_unlock(&mte_pages);
}
+
+int arch_prepare_to_swap(struct folio *folio)
+{
+ long i, nr;
+ int err;
+
+ if (!system_supports_mte())
+ return 0;
+
+ nr = folio_nr_pages(folio);
+
+ for (i = 0; i < nr; i++) {
+ err = mte_save_tags(folio_page(folio, i));
+ if (err)
+ goto out;
+ }
+ return 0;
+
+out:
+ while (i--)
+ __mte_invalidate_tags(folio_page(folio, i));
+ return err;
+}
+
+void arch_swap_restore(swp_entry_t entry, struct folio *folio)
+{
+ long i, nr;
+
+ if (!system_supports_mte())
+ return;
+
+ nr = folio_nr_pages(folio);
+
+ for (i = 0; i < nr; i++) {
+ mte_restore_tags(entry, folio_page(folio, i));
+ entry.val++;
+ }
+}
diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c
index 0c4e3ecf989d43..0e270a1c51e645 100644
--- a/arch/arm64/mm/pageattr.c
+++ b/arch/arm64/mm/pageattr.c
@@ -219,9 +219,6 @@ bool kernel_page_present(struct page *page)
pte_t *ptep;
unsigned long addr = (unsigned long)page_address(page);
- if (!can_set_direct_map())
- return true;
-
pgdp = pgd_offset_k(addr);
if (pgd_none(READ_ONCE(*pgdp)))
return false;
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index c5b461dda43859..13eac43c632dfe 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -943,7 +943,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
emit(A64_UXTH(is64, dst, dst), ctx);
break;
case 32:
- emit(A64_REV32(is64, dst, dst), ctx);
+ emit(A64_REV32(0, dst, dst), ctx);
/* upper 32 bits already cleared */
break;
case 64:
@@ -1256,7 +1256,7 @@ emit_cond_jmp:
} else {
emit_a64_mov_i(1, tmp, off, ctx);
if (sign_extend)
- emit(A64_LDRSW(dst, src_adj, off_adj), ctx);
+ emit(A64_LDRSW(dst, src, tmp), ctx);
else
emit(A64_LDR32(dst, src, tmp), ctx);
}
@@ -1844,15 +1844,15 @@ static void invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l,
emit_call(enter_prog, ctx);
+ /* save return value to callee saved register x20 */
+ emit(A64_MOV(1, A64_R(20), A64_R(0)), ctx);
+
/* if (__bpf_prog_enter(prog) == 0)
* goto skip_exec_of_prog;
*/
branch = ctx->image + ctx->idx;
emit(A64_NOP, ctx);
- /* save return value to callee saved register x20 */
- emit(A64_MOV(1, A64_R(20), A64_R(0)), ctx);
-
emit(A64_ADD_I(1, A64_R(0), A64_SP, args_off), ctx);
if (!p->jited)
emit_addr_mov_i64(A64_R(1), (const u64)p->insnsi, ctx);
diff --git a/arch/csky/abiv1/mmap.c b/arch/csky/abiv1/mmap.c
index 6792aca4999917..7f826331d409b2 100644
--- a/arch/csky/abiv1/mmap.c
+++ b/arch/csky/abiv1/mmap.c
@@ -28,7 +28,12 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
int do_align = 0;
- struct vm_unmapped_area_info info;
+ struct vm_unmapped_area_info info = {
+ .length = len,
+ .low_limit = mm->mmap_base,
+ .high_limit = TASK_SIZE,
+ .align_offset = pgoff << PAGE_SHIFT
+ };
/*
* We only need to do colour alignment if either the I or D
@@ -61,11 +66,6 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
return addr;
}
- info.flags = 0;
- info.length = len;
- info.low_limit = mm->mmap_base;
- info.high_limit = TASK_SIZE;
info.align_mask = do_align ? (PAGE_MASK & (SHMLBA - 1)) : 0;
- info.align_offset = pgoff << PAGE_SHIFT;
return vm_unmapped_area(&info);
}
diff --git a/arch/hexagon/kernel/vmlinux.lds.S b/arch/hexagon/kernel/vmlinux.lds.S
index 1140051a0c455d..1150b77fa281ce 100644
--- a/arch/hexagon/kernel/vmlinux.lds.S
+++ b/arch/hexagon/kernel/vmlinux.lds.S
@@ -63,6 +63,7 @@ SECTIONS
STABS_DEBUG
DWARF_DEBUG
ELF_DETAILS
+ .hexagon.attributes 0 : { *(.hexagon.attributes) }
DISCARDS
}
diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index a5f300ec6f2808..13077142ae8251 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -18,6 +18,7 @@ config LOONGARCH
select ARCH_HAS_CURRENT_STACK_POINTER
select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_KCOV
+ select ARCH_HAS_KERNEL_FPU_SUPPORT if CPU_HAS_FPU
select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
select ARCH_HAS_PTE_SPECIAL
@@ -119,7 +120,7 @@ config LOONGARCH
select HAVE_EBPF_JIT
select HAVE_EFFICIENT_UNALIGNED_ACCESS if !ARCH_STRICT_ALIGN
select HAVE_EXIT_THREAD
- select HAVE_FAST_GUP
+ select HAVE_GUP_FAST
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_FUNCTION_ARG_ACCESS_API
select HAVE_FUNCTION_ERROR_INJECTION
@@ -595,7 +596,7 @@ config ARCH_SELECTS_CRASH_DUMP
select RELOCATABLE
config ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION
- def_bool CRASH_CORE
+ def_bool CRASH_RESERVE
config RELOCATABLE
bool "Relocatable kernel"
diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile
index df6caf79537a9d..efb5440a43ec4c 100644
--- a/arch/loongarch/Makefile
+++ b/arch/loongarch/Makefile
@@ -26,6 +26,9 @@ endif
32bit-emul = elf32loongarch
64bit-emul = elf64loongarch
+CC_FLAGS_FPU := -mfpu=64
+CC_FLAGS_NO_FPU := -msoft-float
+
ifdef CONFIG_UNWINDER_ORC
orc_hash_h := arch/$(SRCARCH)/include/generated/asm/orc_hash.h
orc_hash_sh := $(srctree)/scripts/orc_hash.sh
@@ -59,7 +62,7 @@ ld-emul = $(64bit-emul)
cflags-y += -mabi=lp64s
endif
-cflags-y += -pipe -msoft-float
+cflags-y += -pipe $(CC_FLAGS_NO_FPU)
LDFLAGS_vmlinux += -static -n -nostdlib
# When the assembler supports explicit relocation hint, we must use it.
diff --git a/arch/loongarch/boot/dts/loongson-2k1000.dtsi b/arch/loongarch/boot/dts/loongson-2k1000.dtsi
index 49a70f8c3cab22..b6aeb1f70e2a03 100644
--- a/arch/loongarch/boot/dts/loongson-2k1000.dtsi
+++ b/arch/loongarch/boot/dts/loongson-2k1000.dtsi
@@ -100,6 +100,13 @@
#size-cells = <2>;
dma-coherent;
+ isa@18000000 {
+ compatible = "isa";
+ #size-cells = <1>;
+ #address-cells = <2>;
+ ranges = <1 0x0 0x0 0x18000000 0x4000>;
+ };
+
liointc0: interrupt-controller@1fe01400 {
compatible = "loongson,liointc-2.0";
reg = <0x0 0x1fe01400 0x0 0x40>,
diff --git a/arch/loongarch/boot/dts/loongson-2k2000-ref.dts b/arch/loongarch/boot/dts/loongson-2k2000-ref.dts
index dca91caf895e3c..74b99bd234cc38 100644
--- a/arch/loongarch/boot/dts/loongson-2k2000-ref.dts
+++ b/arch/loongarch/boot/dts/loongson-2k2000-ref.dts
@@ -61,12 +61,45 @@
&gmac0 {
status = "okay";
+
+ phy-mode = "gmii";
+ phy-handle = <&phy0>;
+ mdio {
+ compatible = "snps,dwmac-mdio";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ phy0: ethernet-phy@0 {
+ reg = <2>;
+ };
+ };
};
&gmac1 {
status = "okay";
+
+ phy-mode = "gmii";
+ phy-handle = <&phy1>;
+ mdio {
+ compatible = "snps,dwmac-mdio";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ phy1: ethernet-phy@1 {
+ reg = <2>;
+ };
+ };
};
&gmac2 {
status = "okay";
+
+ phy-mode = "rgmii";
+ phy-handle = <&phy2>;
+ mdio {
+ compatible = "snps,dwmac-mdio";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ phy2: ethernet-phy@2 {
+ reg = <0>;
+ };
+ };
};
diff --git a/arch/loongarch/boot/dts/loongson-2k2000.dtsi b/arch/loongarch/boot/dts/loongson-2k2000.dtsi
index a231949b5f553a..9eab2d02cbe8bf 100644
--- a/arch/loongarch/boot/dts/loongson-2k2000.dtsi
+++ b/arch/loongarch/boot/dts/loongson-2k2000.dtsi
@@ -51,6 +51,13 @@
#address-cells = <2>;
#size-cells = <2>;
+ isa@18400000 {
+ compatible = "isa";
+ #size-cells = <1>;
+ #address-cells = <2>;
+ ranges = <1 0x0 0x0 0x18400000 0x4000>;
+ };
+
pmc: power-management@100d0000 {
compatible = "loongson,ls2k2000-pmc", "loongson,ls2k0500-pmc", "syscon";
reg = <0x0 0x100d0000 0x0 0x58>;
@@ -109,6 +116,8 @@
msi: msi-controller@1fe01140 {
compatible = "loongson,pch-msi-1.0";
reg = <0x0 0x1fe01140 0x0 0x8>;
+ interrupt-controller;
+ #interrupt-cells = <1>;
msi-controller;
loongson,msi-base-vec = <64>;
loongson,msi-num-vecs = <192>;
@@ -140,27 +149,34 @@
#address-cells = <3>;
#size-cells = <2>;
device_type = "pci";
+ msi-parent = <&msi>;
bus-range = <0x0 0xff>;
- ranges = <0x01000000 0x0 0x00008000 0x0 0x18400000 0x0 0x00008000>,
+ ranges = <0x01000000 0x0 0x00008000 0x0 0x18408000 0x0 0x00008000>,
<0x02000000 0x0 0x60000000 0x0 0x60000000 0x0 0x20000000>;
gmac0: ethernet@3,0 {
reg = <0x1800 0x0 0x0 0x0 0x0>;
- interrupts = <12 IRQ_TYPE_LEVEL_HIGH>;
+ interrupts = <12 IRQ_TYPE_LEVEL_HIGH>,
+ <13 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "macirq", "eth_lpi";
interrupt-parent = <&pic>;
status = "disabled";
};
gmac1: ethernet@3,1 {
reg = <0x1900 0x0 0x0 0x0 0x0>;
- interrupts = <14 IRQ_TYPE_LEVEL_HIGH>;
+ interrupts = <14 IRQ_TYPE_LEVEL_HIGH>,
+ <15 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "macirq", "eth_lpi";
interrupt-parent = <&pic>;
status = "disabled";
};
gmac2: ethernet@3,2 {
reg = <0x1a00 0x0 0x0 0x0 0x0>;
- interrupts = <17 IRQ_TYPE_LEVEL_HIGH>;
+ interrupts = <17 IRQ_TYPE_LEVEL_HIGH>,
+ <18 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "macirq", "eth_lpi";
interrupt-parent = <&pic>;
status = "disabled";
};
diff --git a/arch/loongarch/include/asm/addrspace.h b/arch/loongarch/include/asm/addrspace.h
index b24437e28c6eda..7bd47d65bf7a04 100644
--- a/arch/loongarch/include/asm/addrspace.h
+++ b/arch/loongarch/include/asm/addrspace.h
@@ -11,6 +11,7 @@
#define _ASM_ADDRSPACE_H
#include <linux/const.h>
+#include <linux/sizes.h>
#include <asm/loongarch.h>
diff --git a/arch/loongarch/include/asm/crash_core.h b/arch/loongarch/include/asm/crash_reserve.h
index 218bdbfa527ba8..a1d9b84b1c7d51 100644
--- a/arch/loongarch/include/asm/crash_core.h
+++ b/arch/loongarch/include/asm/crash_reserve.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0-only */
-#ifndef _LOONGARCH_CRASH_CORE_H
-#define _LOONGARCH_CRASH_CORE_H
+#ifndef _LOONGARCH_CRASH_RESERVE_H
+#define _LOONGARCH_CRASH_RESERVE_H
#define CRASH_ALIGN SZ_2M
diff --git a/arch/loongarch/include/asm/fpu.h b/arch/loongarch/include/asm/fpu.h
index c2d8962fda00be..3177674228f896 100644
--- a/arch/loongarch/include/asm/fpu.h
+++ b/arch/loongarch/include/asm/fpu.h
@@ -21,6 +21,7 @@
struct sigcontext;
+#define kernel_fpu_available() cpu_has_fpu
extern void kernel_fpu_begin(void);
extern void kernel_fpu_end(void);
diff --git a/arch/loongarch/include/asm/io.h b/arch/loongarch/include/asm/io.h
index 4a8adcca329b81..c2f9979b2979e5 100644
--- a/arch/loongarch/include/asm/io.h
+++ b/arch/loongarch/include/asm/io.h
@@ -14,11 +14,6 @@
#include <asm/pgtable-bits.h>
#include <asm/string.h>
-/*
- * Change "struct page" to physical address.
- */
-#define page_to_phys(page) ((phys_addr_t)page_to_pfn(page) << PAGE_SHIFT)
-
extern void __init __iomem *early_ioremap(u64 phys_addr, unsigned long size);
extern void __init early_iounmap(void __iomem *addr, unsigned long size);
@@ -73,6 +68,21 @@ extern void __memcpy_fromio(void *to, const volatile void __iomem *from, size_t
#define __io_aw() mmiowb()
+#ifdef CONFIG_KFENCE
+#define virt_to_phys(kaddr) \
+({ \
+ (likely((unsigned long)kaddr < vm_map_base)) ? __pa((unsigned long)kaddr) : \
+ page_to_phys(tlb_virt_to_page((unsigned long)kaddr)) + offset_in_page((unsigned long)kaddr);\
+})
+
+#define phys_to_virt(paddr) \
+({ \
+ extern char *__kfence_pool; \
+ (unlikely(__kfence_pool == NULL)) ? __va((unsigned long)paddr) : \
+ page_address(phys_to_page((unsigned long)paddr)) + offset_in_page((unsigned long)paddr);\
+})
+#endif
+
#include <asm-generic/io.h>
#define ARCH_HAS_VALID_PHYS_ADDR_RANGE
diff --git a/arch/loongarch/include/asm/kfence.h b/arch/loongarch/include/asm/kfence.h
index 6c82aea1c99398..92636e82957c7e 100644
--- a/arch/loongarch/include/asm/kfence.h
+++ b/arch/loongarch/include/asm/kfence.h
@@ -10,12 +10,14 @@
#define _ASM_LOONGARCH_KFENCE_H
#include <linux/kfence.h>
+#include <linux/vmalloc.h>
#include <asm/pgtable.h>
#include <asm/tlb.h>
static inline bool arch_kfence_init_pool(void)
{
int err;
+ char *kaddr, *vaddr;
char *kfence_pool = __kfence_pool;
struct vm_struct *area;
@@ -35,6 +37,14 @@ static inline bool arch_kfence_init_pool(void)
return false;
}
+ kaddr = kfence_pool;
+ vaddr = __kfence_pool;
+ while (kaddr < kfence_pool + KFENCE_POOL_SIZE) {
+ set_page_address(virt_to_page(kaddr), vaddr);
+ kaddr += PAGE_SIZE;
+ vaddr += PAGE_SIZE;
+ }
+
return true;
}
diff --git a/arch/loongarch/include/asm/page.h b/arch/loongarch/include/asm/page.h
index 44027060c54a28..e85df33f11c772 100644
--- a/arch/loongarch/include/asm/page.h
+++ b/arch/loongarch/include/asm/page.h
@@ -78,7 +78,26 @@ typedef struct { unsigned long pgprot; } pgprot_t;
struct page *dmw_virt_to_page(unsigned long kaddr);
struct page *tlb_virt_to_page(unsigned long kaddr);
-#define virt_to_pfn(kaddr) PFN_DOWN(PHYSADDR(kaddr))
+#define pfn_to_phys(pfn) __pfn_to_phys(pfn)
+#define phys_to_pfn(paddr) __phys_to_pfn(paddr)
+
+#define page_to_phys(page) pfn_to_phys(page_to_pfn(page))
+#define phys_to_page(paddr) pfn_to_page(phys_to_pfn(paddr))
+
+#ifndef CONFIG_KFENCE
+
+#define page_to_virt(page) __va(page_to_phys(page))
+#define virt_to_page(kaddr) phys_to_page(__pa(kaddr))
+
+#else
+
+#define WANT_PAGE_VIRTUAL
+
+#define page_to_virt(page) \
+({ \
+ extern char *__kfence_pool; \
+ (__kfence_pool == NULL) ? __va(page_to_phys(page)) : page_address(page); \
+})
#define virt_to_page(kaddr) \
({ \
@@ -86,6 +105,11 @@ struct page *tlb_virt_to_page(unsigned long kaddr);
dmw_virt_to_page((unsigned long)kaddr) : tlb_virt_to_page((unsigned long)kaddr);\
})
+#endif
+
+#define pfn_to_virt(pfn) page_to_virt(pfn_to_page(pfn))
+#define virt_to_pfn(kaddr) page_to_pfn(virt_to_page(kaddr))
+
extern int __virt_addr_valid(volatile void *kaddr);
#define virt_addr_valid(kaddr) __virt_addr_valid((volatile void *)(kaddr))
diff --git a/arch/loongarch/include/asm/perf_event.h b/arch/loongarch/include/asm/perf_event.h
index 2a35a0bc2aaabf..52b638059e40b3 100644
--- a/arch/loongarch/include/asm/perf_event.h
+++ b/arch/loongarch/include/asm/perf_event.h
@@ -7,6 +7,14 @@
#ifndef __LOONGARCH_PERF_EVENT_H__
#define __LOONGARCH_PERF_EVENT_H__
+#include <asm/ptrace.h>
+
#define perf_arch_bpf_user_pt_regs(regs) (struct user_pt_regs *)regs
+#define perf_arch_fetch_caller_regs(regs, __ip) { \
+ (regs)->csr_era = (__ip); \
+ (regs)->regs[3] = current_stack_pointer; \
+ (regs)->regs[22] = (unsigned long) __builtin_frame_address(0); \
+}
+
#endif /* __LOONGARCH_PERF_EVENT_H__ */
diff --git a/arch/loongarch/include/asm/tlb.h b/arch/loongarch/include/asm/tlb.h
index da7a3b5b9374ae..e071f5e9e85802 100644
--- a/arch/loongarch/include/asm/tlb.h
+++ b/arch/loongarch/include/asm/tlb.h
@@ -132,8 +132,6 @@ static __always_inline void invtlb_all(u32 op, u32 info, u64 addr)
);
}
-#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
-
static void tlb_flush(struct mmu_gather *tlb);
#define tlb_flush tlb_flush
diff --git a/arch/loongarch/kernel/perf_event.c b/arch/loongarch/kernel/perf_event.c
index 0491bf453cd496..cac7cba81b65f7 100644
--- a/arch/loongarch/kernel/perf_event.c
+++ b/arch/loongarch/kernel/perf_event.c
@@ -884,4 +884,4 @@ static int __init init_hw_perf_events(void)
return 0;
}
-early_initcall(init_hw_perf_events);
+pure_initcall(init_hw_perf_events);
diff --git a/arch/loongarch/mm/fault.c b/arch/loongarch/mm/fault.c
index 1fc2f6813ea027..97b40defde0608 100644
--- a/arch/loongarch/mm/fault.c
+++ b/arch/loongarch/mm/fault.c
@@ -202,10 +202,10 @@ good_area:
if (!(vma->vm_flags & VM_WRITE))
goto bad_area;
} else {
- if (!(vma->vm_flags & VM_READ) && address != exception_era(regs))
- goto bad_area;
if (!(vma->vm_flags & VM_EXEC) && address == exception_era(regs))
goto bad_area;
+ if (!(vma->vm_flags & (VM_READ | VM_WRITE)) && address != exception_era(regs))
+ goto bad_area;
}
/*
diff --git a/arch/loongarch/mm/hugetlbpage.c b/arch/loongarch/mm/hugetlbpage.c
index 1e76fcb83093dd..12222c56cb5948 100644
--- a/arch/loongarch/mm/hugetlbpage.c
+++ b/arch/loongarch/mm/hugetlbpage.c
@@ -50,21 +50,11 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr,
return (pte_t *) pmd;
}
-int pmd_huge(pmd_t pmd)
-{
- return (pmd_val(pmd) & _PAGE_HUGE) != 0;
-}
-
-int pud_huge(pud_t pud)
-{
- return (pud_val(pud) & _PAGE_HUGE) != 0;
-}
-
uint64_t pmd_to_entrylo(unsigned long pmd_val)
{
uint64_t val;
/* PMD as PTE. Must be huge page */
- if (!pmd_huge(__pmd(pmd_val)))
+ if (!pmd_leaf(__pmd(pmd_val)))
panic("%s", __func__);
val = pmd_val ^ _PAGE_HUGE;
diff --git a/arch/loongarch/mm/mmap.c b/arch/loongarch/mm/mmap.c
index a9630a81b38abb..8890309851351c 100644
--- a/arch/loongarch/mm/mmap.c
+++ b/arch/loongarch/mm/mmap.c
@@ -4,6 +4,7 @@
*/
#include <linux/export.h>
#include <linux/io.h>
+#include <linux/kfence.h>
#include <linux/memblock.h>
#include <linux/mm.h>
#include <linux/mman.h>
@@ -24,7 +25,7 @@ static unsigned long arch_get_unmapped_area_common(struct file *filp,
struct vm_area_struct *vma;
unsigned long addr = addr0;
int do_color_align;
- struct vm_unmapped_area_info info;
+ struct vm_unmapped_area_info info = {};
if (unlikely(len > TASK_SIZE))
return -ENOMEM;
@@ -82,7 +83,6 @@ static unsigned long arch_get_unmapped_area_common(struct file *filp,
*/
}
- info.flags = 0;
info.low_limit = mm->mmap_base;
info.high_limit = TASK_SIZE;
return vm_unmapped_area(&info);
@@ -111,6 +111,9 @@ int __virt_addr_valid(volatile void *kaddr)
{
unsigned long vaddr = (unsigned long)kaddr;
+ if (is_kfence_address((void *)kaddr))
+ return 1;
+
if ((vaddr < PAGE_OFFSET) || (vaddr >= vm_map_base))
return 0;
diff --git a/arch/loongarch/mm/pgtable.c b/arch/loongarch/mm/pgtable.c
index 2aae72e638713a..bda018150000e6 100644
--- a/arch/loongarch/mm/pgtable.c
+++ b/arch/loongarch/mm/pgtable.c
@@ -11,13 +11,13 @@
struct page *dmw_virt_to_page(unsigned long kaddr)
{
- return pfn_to_page(virt_to_pfn(kaddr));
+ return phys_to_page(__pa(kaddr));
}
EXPORT_SYMBOL(dmw_virt_to_page);
struct page *tlb_virt_to_page(unsigned long kaddr)
{
- return pfn_to_page(pte_pfn(*virt_to_kpte(kaddr)));
+ return phys_to_page(pfn_to_phys(pte_pfn(*virt_to_kpte(kaddr))));
}
EXPORT_SYMBOL(tlb_virt_to_page);
diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl
index 7fd43fd4c9f2e2..22a3cbd4c60298 100644
--- a/arch/m68k/kernel/syscalls/syscall.tbl
+++ b/arch/m68k/kernel/syscalls/syscall.tbl
@@ -461,3 +461,4 @@
459 common lsm_get_self_attr sys_lsm_get_self_attr
460 common lsm_set_self_attr sys_lsm_set_self_attr
461 common lsm_list_modules sys_lsm_list_modules
+462 common mseal sys_mseal
diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl
index b00ab2cabab92a..2b81a6bd78b292 100644
--- a/arch/microblaze/kernel/syscalls/syscall.tbl
+++ b/arch/microblaze/kernel/syscalls/syscall.tbl
@@ -467,3 +467,4 @@
459 common lsm_get_self_attr sys_lsm_get_self_attr
460 common lsm_set_self_attr sys_lsm_set_self_attr
461 common lsm_list_modules sys_lsm_list_modules
+462 common mseal sys_mseal
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 06ef440d16ce71..f1aa1bf11166bc 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -68,7 +68,7 @@ config MIPS
select HAVE_DYNAMIC_FTRACE
select HAVE_EBPF_JIT if !CPU_MICROMIPS
select HAVE_EXIT_THREAD
- select HAVE_FAST_GUP
+ select HAVE_GUP_FAST
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FUNCTION_TRACER
@@ -619,15 +619,6 @@ config MACH_EYEQ5
bool
-config FIT_IMAGE_FDT_EPM5
- bool "Include FDT for Mobileye EyeQ5 development platforms"
- depends on MACH_EYEQ5
- default n
- help
- Enable this to include the FDT for the EyeQ5 development platforms
- from Mobileye in the FIT kernel image.
- This requires u-boot on the platform.
-
config MACH_NINTENDO64
bool "Nintendo 64 console"
select CEVT_R4K
@@ -1011,6 +1002,15 @@ config CAVIUM_OCTEON_SOC
endchoice
+config FIT_IMAGE_FDT_EPM5
+ bool "Include FDT for Mobileye EyeQ5 development platforms"
+ depends on MACH_EYEQ5
+ default n
+ help
+ Enable this to include the FDT for the EyeQ5 development platforms
+ from Mobileye in the FIT kernel image.
+ This requires u-boot on the platform.
+
source "arch/mips/alchemy/Kconfig"
source "arch/mips/ath25/Kconfig"
source "arch/mips/ath79/Kconfig"
diff --git a/arch/mips/include/asm/pgtable-32.h b/arch/mips/include/asm/pgtable-32.h
index 0e196650f4f488..92b7591aac2acd 100644
--- a/arch/mips/include/asm/pgtable-32.h
+++ b/arch/mips/include/asm/pgtable-32.h
@@ -129,7 +129,7 @@ static inline int pmd_none(pmd_t pmd)
static inline int pmd_bad(pmd_t pmd)
{
#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
- /* pmd_huge(pmd) but inline */
+ /* pmd_leaf(pmd) but inline */
if (unlikely(pmd_val(pmd) & _PAGE_HUGE))
return 0;
#endif
diff --git a/arch/mips/include/asm/pgtable-64.h b/arch/mips/include/asm/pgtable-64.h
index 20ca48c1b6063e..7c28510b3768f0 100644
--- a/arch/mips/include/asm/pgtable-64.h
+++ b/arch/mips/include/asm/pgtable-64.h
@@ -245,7 +245,7 @@ static inline int pmd_none(pmd_t pmd)
static inline int pmd_bad(pmd_t pmd)
{
#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
- /* pmd_huge(pmd) but inline */
+ /* pmd_leaf(pmd) but inline */
if (unlikely(pmd_val(pmd) & _PAGE_HUGE))
return 0;
#endif
diff --git a/arch/mips/include/asm/ptrace.h b/arch/mips/include/asm/ptrace.h
index d14d0e37ad02dd..4a2b40ce39e091 100644
--- a/arch/mips/include/asm/ptrace.h
+++ b/arch/mips/include/asm/ptrace.h
@@ -159,7 +159,7 @@ extern unsigned long exception_ip(struct pt_regs *regs);
#define exception_ip(regs) exception_ip(regs)
#define profile_pc(regs) instruction_pointer(regs)
-extern asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall);
+extern asmlinkage long syscall_trace_enter(struct pt_regs *regs);
extern asmlinkage void syscall_trace_leave(struct pt_regs *regs);
extern void die(const char *, struct pt_regs *) __noreturn;
diff --git a/arch/mips/jazz/jazzdma.c b/arch/mips/jazz/jazzdma.c
index eabddb89d221fe..c97b089b99029d 100644
--- a/arch/mips/jazz/jazzdma.c
+++ b/arch/mips/jazz/jazzdma.c
@@ -617,7 +617,7 @@ const struct dma_map_ops jazz_dma_ops = {
.sync_sg_for_device = jazz_dma_sync_sg_for_device,
.mmap = dma_common_mmap,
.get_sgtable = dma_common_get_sgtable,
- .alloc_pages = dma_common_alloc_pages,
+ .alloc_pages_op = dma_common_alloc_pages,
.free_pages = dma_common_free_pages,
};
EXPORT_SYMBOL(jazz_dma_ops);
diff --git a/arch/mips/kernel/asm-offsets.c b/arch/mips/kernel/asm-offsets.c
index d1b11f66f748f0..cb1045ebab0621 100644
--- a/arch/mips/kernel/asm-offsets.c
+++ b/arch/mips/kernel/asm-offsets.c
@@ -101,6 +101,7 @@ void output_thread_info_defines(void)
OFFSET(TI_CPU, thread_info, cpu);
OFFSET(TI_PRE_COUNT, thread_info, preempt_count);
OFFSET(TI_REGS, thread_info, regs);
+ OFFSET(TI_SYSCALL, thread_info, syscall);
DEFINE(_THREAD_SIZE, THREAD_SIZE);
DEFINE(_THREAD_MASK, THREAD_MASK);
DEFINE(_IRQ_STACK_SIZE, IRQ_STACK_SIZE);
diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index 59288c13b581b8..61503a36067e9e 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -1317,16 +1317,13 @@ long arch_ptrace(struct task_struct *child, long request,
* Notification of system call entry/exit
* - triggered by current->work.syscall_trace
*/
-asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall)
+asmlinkage long syscall_trace_enter(struct pt_regs *regs)
{
user_exit();
- current_thread_info()->syscall = syscall;
-
if (test_thread_flag(TIF_SYSCALL_TRACE)) {
if (ptrace_report_syscall_entry(regs))
return -1;
- syscall = current_thread_info()->syscall;
}
#ifdef CONFIG_SECCOMP
@@ -1335,7 +1332,7 @@ asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall)
struct seccomp_data sd;
unsigned long args[6];
- sd.nr = syscall;
+ sd.nr = current_thread_info()->syscall;
sd.arch = syscall_get_arch(current);
syscall_get_arguments(current, regs, args);
for (i = 0; i < 6; i++)
@@ -1345,23 +1342,23 @@ asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall)
ret = __secure_computing(&sd);
if (ret == -1)
return ret;
- syscall = current_thread_info()->syscall;
}
#endif
if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
trace_sys_enter(regs, regs->regs[2]);
- audit_syscall_entry(syscall, regs->regs[4], regs->regs[5],
+ audit_syscall_entry(current_thread_info()->syscall,
+ regs->regs[4], regs->regs[5],
regs->regs[6], regs->regs[7]);
/*
* Negative syscall numbers are mistaken for rejected syscalls, but
* won't have had the return value set appropriately, so we do so now.
*/
- if (syscall < 0)
+ if (current_thread_info()->syscall < 0)
syscall_set_return_value(current, regs, -ENOSYS, 0);
- return syscall;
+ return current_thread_info()->syscall;
}
/*
diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S
index 18dc9b34505614..2c604717e63080 100644
--- a/arch/mips/kernel/scall32-o32.S
+++ b/arch/mips/kernel/scall32-o32.S
@@ -77,6 +77,18 @@ loads_done:
PTR_WD load_a7, bad_stack_a7
.previous
+ /*
+ * syscall number is in v0 unless we called syscall(__NR_###)
+ * where the real syscall number is in a0
+ */
+ subu t2, v0, __NR_O32_Linux
+ bnez t2, 1f /* __NR_syscall at offset 0 */
+ LONG_S a0, TI_SYSCALL($28) # Save a0 as syscall number
+ b 2f
+1:
+ LONG_S v0, TI_SYSCALL($28) # Save v0 as syscall number
+2:
+
lw t0, TI_FLAGS($28) # syscall tracing enabled?
li t1, _TIF_WORK_SYSCALL_ENTRY
and t0, t1
@@ -114,16 +126,7 @@ syscall_trace_entry:
SAVE_STATIC
move a0, sp
- /*
- * syscall number is in v0 unless we called syscall(__NR_###)
- * where the real syscall number is in a0
- */
- move a1, v0
- subu t2, v0, __NR_O32_Linux
- bnez t2, 1f /* __NR_syscall at offset 0 */
- lw a1, PT_R4(sp)
-
-1: jal syscall_trace_enter
+ jal syscall_trace_enter
bltz v0, 1f # seccomp failed? Skip syscall
diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
index 97456b2ca7dc32..97788859238c34 100644
--- a/arch/mips/kernel/scall64-n32.S
+++ b/arch/mips/kernel/scall64-n32.S
@@ -44,6 +44,8 @@ NESTED(handle_sysn32, PT_SIZE, sp)
sd a3, PT_R26(sp) # save a3 for syscall restarting
+ LONG_S v0, TI_SYSCALL($28) # Store syscall number
+
li t1, _TIF_WORK_SYSCALL_ENTRY
LONG_L t0, TI_FLAGS($28) # syscall tracing enabled?
and t0, t1, t0
@@ -72,7 +74,6 @@ syscall_common:
n32_syscall_trace_entry:
SAVE_STATIC
move a0, sp
- move a1, v0
jal syscall_trace_enter
bltz v0, 1f # seccomp failed? Skip syscall
diff --git a/arch/mips/kernel/scall64-n64.S b/arch/mips/kernel/scall64-n64.S
index e6264aa62e457f..be11ea5cc67e04 100644
--- a/arch/mips/kernel/scall64-n64.S
+++ b/arch/mips/kernel/scall64-n64.S
@@ -46,6 +46,8 @@ NESTED(handle_sys64, PT_SIZE, sp)
sd a3, PT_R26(sp) # save a3 for syscall restarting
+ LONG_S v0, TI_SYSCALL($28) # Store syscall number
+
li t1, _TIF_WORK_SYSCALL_ENTRY
LONG_L t0, TI_FLAGS($28) # syscall tracing enabled?
and t0, t1, t0
@@ -82,7 +84,6 @@ n64_syscall_exit:
syscall_trace_entry:
SAVE_STATIC
move a0, sp
- move a1, v0
jal syscall_trace_enter
bltz v0, 1f # seccomp failed? Skip syscall
diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
index d3c2616cba2269..7a5abb73e53127 100644
--- a/arch/mips/kernel/scall64-o32.S
+++ b/arch/mips/kernel/scall64-o32.S
@@ -79,6 +79,22 @@ loads_done:
PTR_WD load_a7, bad_stack_a7
.previous
+ /*
+ * absolute syscall number is in v0 unless we called syscall(__NR_###)
+ * where the real syscall number is in a0
+ * note: NR_syscall is the first O32 syscall but the macro is
+ * only defined when compiling with -mabi=32 (CONFIG_32BIT)
+ * therefore __NR_O32_Linux is used (4000)
+ */
+
+ subu t2, v0, __NR_O32_Linux
+ bnez t2, 1f /* __NR_syscall at offset 0 */
+ LONG_S a0, TI_SYSCALL($28) # Save a0 as syscall number
+ b 2f
+1:
+ LONG_S v0, TI_SYSCALL($28) # Save v0 as syscall number
+2:
+
li t1, _TIF_WORK_SYSCALL_ENTRY
LONG_L t0, TI_FLAGS($28) # syscall tracing enabled?
and t0, t1, t0
@@ -113,22 +129,7 @@ trace_a_syscall:
sd a7, PT_R11(sp) # For indirect syscalls
move a0, sp
- /*
- * absolute syscall number is in v0 unless we called syscall(__NR_###)
- * where the real syscall number is in a0
- * note: NR_syscall is the first O32 syscall but the macro is
- * only defined when compiling with -mabi=32 (CONFIG_32BIT)
- * therefore __NR_O32_Linux is used (4000)
- */
- .set push
- .set reorder
- subu t1, v0, __NR_O32_Linux
- move a1, v0
- bnez t1, 1f /* __NR_syscall at offset 0 */
- ld a1, PT_R4(sp) /* Arg1 for __NR_syscall case */
- .set pop
-
-1: jal syscall_trace_enter
+ jal syscall_trace_enter
bltz v0, 1f # seccomp failed? Skip syscall
diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl
index 83cfc9eb6b88e6..cc869f5d569318 100644
--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
@@ -400,3 +400,4 @@
459 n32 lsm_get_self_attr sys_lsm_get_self_attr
460 n32 lsm_set_self_attr sys_lsm_set_self_attr
461 n32 lsm_list_modules sys_lsm_list_modules
+462 n32 mseal sys_mseal
diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl
index 532b855df58957..1464c6be6eb3c7 100644
--- a/arch/mips/kernel/syscalls/syscall_n64.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n64.tbl
@@ -376,3 +376,4 @@
459 n64 lsm_get_self_attr sys_lsm_get_self_attr
460 n64 lsm_set_self_attr sys_lsm_set_self_attr
461 n64 lsm_list_modules sys_lsm_list_modules
+462 n64 mseal sys_mseal
diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl
index f45c9530ea93a1..008ebe60263e37 100644
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -449,3 +449,4 @@
459 o32 lsm_get_self_attr sys_lsm_get_self_attr
460 o32 lsm_set_self_attr sys_lsm_set_self_attr
461 o32 lsm_list_modules sys_lsm_list_modules
+462 o32 mseal sys_mseal
diff --git a/arch/mips/mm/hugetlbpage.c b/arch/mips/mm/hugetlbpage.c
index 7eaff5b078739b..0b9e15555b59bb 100644
--- a/arch/mips/mm/hugetlbpage.c
+++ b/arch/mips/mm/hugetlbpage.c
@@ -57,13 +57,3 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr,
}
return (pte_t *) pmd;
}
-
-int pmd_huge(pmd_t pmd)
-{
- return (pmd_val(pmd) & _PAGE_HUGE) != 0;
-}
-
-int pud_huge(pud_t pud)
-{
- return (pud_val(pud) & _PAGE_HUGE) != 0;
-}
diff --git a/arch/mips/mm/mmap.c b/arch/mips/mm/mmap.c
index 00fe90c6db3e80..7e11d7b5876107 100644
--- a/arch/mips/mm/mmap.c
+++ b/arch/mips/mm/mmap.c
@@ -34,7 +34,7 @@ static unsigned long arch_get_unmapped_area_common(struct file *filp,
struct vm_area_struct *vma;
unsigned long addr = addr0;
int do_color_align;
- struct vm_unmapped_area_info info;
+ struct vm_unmapped_area_info info = {};
if (unlikely(len > TASK_SIZE))
return -ENOMEM;
@@ -92,7 +92,6 @@ static unsigned long arch_get_unmapped_area_common(struct file *filp,
*/
}
- info.flags = 0;
info.low_limit = mm->mmap_base;
info.high_limit = TASK_SIZE;
return vm_unmapped_area(&info);
diff --git a/arch/mips/mm/tlb-r4k.c b/arch/mips/mm/tlb-r4k.c
index 4106084e57d728..76f3b9c0a9f0ce 100644
--- a/arch/mips/mm/tlb-r4k.c
+++ b/arch/mips/mm/tlb-r4k.c
@@ -326,7 +326,7 @@ void __update_tlb(struct vm_area_struct * vma, unsigned long address, pte_t pte)
idx = read_c0_index();
#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
/* this could be a huge page */
- if (pmd_huge(*pmdp)) {
+ if (pmd_leaf(*pmdp)) {
unsigned long lo;
write_c0_pagemask(PM_HUGE_MASK);
ptep = (pte_t *)pmdp;
diff --git a/arch/nios2/kernel/prom.c b/arch/nios2/kernel/prom.c
index 8d98af5c7201bb..9a8393e6b4a85e 100644
--- a/arch/nios2/kernel/prom.c
+++ b/arch/nios2/kernel/prom.c
@@ -21,7 +21,8 @@
void __init early_init_devtree(void *params)
{
- __be32 *dtb = (u32 *)__dtb_start;
+ __be32 __maybe_unused *dtb = (u32 *)__dtb_start;
+
#if defined(CONFIG_NIOS2_DTB_AT_PHYS_ADDR)
if (be32_to_cpup((__be32 *)CONFIG_NIOS2_DTB_PHYS_ADDR) ==
OF_DT_HEADER) {
@@ -30,8 +31,11 @@ void __init early_init_devtree(void *params)
return;
}
#endif
+
+#ifdef CONFIG_NIOS2_DTB_SOURCE_BOOL
if (be32_to_cpu((__be32) *dtb) == OF_DT_HEADER)
params = (void *)__dtb_start;
+#endif
early_init_dt_scan(params);
}
diff --git a/arch/parisc/include/asm/mman.h b/arch/parisc/include/asm/mman.h
new file mode 100644
index 00000000000000..47c5a1991d1034
--- /dev/null
+++ b/arch/parisc/include/asm/mman.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_MMAN_H__
+#define __ASM_MMAN_H__
+
+#include <uapi/asm/mman.h>
+
+/* PARISC cannot allow mdwe as it needs writable stacks */
+static inline bool arch_memory_deny_write_exec_supported(void)
+{
+ return false;
+}
+#define arch_memory_deny_write_exec_supported arch_memory_deny_write_exec_supported
+
+#endif /* __ASM_MMAN_H__ */
diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c
index 98af719d5f85b2..f7722451276ef4 100644
--- a/arch/parisc/kernel/sys_parisc.c
+++ b/arch/parisc/kernel/sys_parisc.c
@@ -104,7 +104,9 @@ static unsigned long arch_get_unmapped_area_common(struct file *filp,
struct vm_area_struct *vma, *prev;
unsigned long filp_pgoff;
int do_color_align;
- struct vm_unmapped_area_info info;
+ struct vm_unmapped_area_info info = {
+ .length = len
+ };
if (unlikely(len > TASK_SIZE))
return -ENOMEM;
@@ -139,7 +141,6 @@ static unsigned long arch_get_unmapped_area_common(struct file *filp,
return addr;
}
- info.length = len;
info.align_mask = do_color_align ? (PAGE_MASK & (SHM_COLOUR - 1)) : 0;
info.align_offset = shared_align_offset(filp_pgoff, pgoff);
@@ -160,7 +161,6 @@ static unsigned long arch_get_unmapped_area_common(struct file *filp,
*/
}
- info.flags = 0;
info.low_limit = mm->mmap_base;
info.high_limit = mmap_upper_limit(NULL);
return vm_unmapped_area(&info);
diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl
index b236a84c4e127e..b13c21373974c8 100644
--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
@@ -460,3 +460,4 @@
459 common lsm_get_self_attr sys_lsm_get_self_attr
460 common lsm_set_self_attr sys_lsm_set_self_attr
461 common lsm_list_modules sys_lsm_list_modules
+462 common mseal sys_mseal
diff --git a/arch/parisc/kernel/vdso32/Makefile b/arch/parisc/kernel/vdso32/Makefile
index 4459a48d230335..e45d46bf46a233 100644
--- a/arch/parisc/kernel/vdso32/Makefile
+++ b/arch/parisc/kernel/vdso32/Makefile
@@ -26,23 +26,18 @@ $(obj)/vdso32_wrapper.o : $(obj)/vdso32.so FORCE
# Force dependency (incbin is bad)
# link rule for the .so file, .lds has to be first
-$(obj)/vdso32.so: $(src)/vdso32.lds $(obj-vdso32) $(obj-cvdso32) $(VDSO_LIBGCC) FORCE
+$(obj)/vdso32.so: $(src)/vdso32.lds $(obj-vdso32) $(VDSO_LIBGCC) FORCE
$(call if_changed,vdso32ld)
# assembly rules for the .S files
$(obj-vdso32): %.o: %.S FORCE
$(call if_changed_dep,vdso32as)
-$(obj-cvdso32): %.o: %.c FORCE
- $(call if_changed_dep,vdso32cc)
-
# actual build commands
quiet_cmd_vdso32ld = VDSO32L $@
cmd_vdso32ld = $(CROSS32CC) $(c_flags) -Wl,-T $(filter-out FORCE, $^) -o $@
quiet_cmd_vdso32as = VDSO32A $@
cmd_vdso32as = $(CROSS32CC) $(a_flags) -c -o $@ $<
-quiet_cmd_vdso32cc = VDSO32C $@
- cmd_vdso32cc = $(CROSS32CC) $(c_flags) -c -fPIC -mno-fast-indirect-calls -o $@ $<
# Generate VDSO offsets using helper script
gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh
diff --git a/arch/parisc/mm/hugetlbpage.c b/arch/parisc/mm/hugetlbpage.c
index a9f7e21f66567a..0356199bd9e7e7 100644
--- a/arch/parisc/mm/hugetlbpage.c
+++ b/arch/parisc/mm/hugetlbpage.c
@@ -180,14 +180,3 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma,
}
return changed;
}
-
-
-int pmd_huge(pmd_t pmd)
-{
- return 0;
-}
-
-int pud_huge(pud_t pud)
-{
- return 0;
-}
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 1c4be337368604..45addf6848ad15 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -137,6 +137,7 @@ config PPC
select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_HAS_HUGEPD if HUGETLB_PAGE
select ARCH_HAS_KCOV
+ select ARCH_HAS_KERNEL_FPU_SUPPORT if PPC_FPU
select ARCH_HAS_MEMBARRIER_CALLBACKS
select ARCH_HAS_MEMBARRIER_SYNC_CORE
select ARCH_HAS_MEMREMAP_COMPAT_ALIGN if PPC_64S_HASH_MMU
@@ -236,7 +237,7 @@ config PPC
select HAVE_DYNAMIC_FTRACE_WITH_REGS if ARCH_USING_PATCHABLE_FUNCTION_ENTRY || MPROFILE_KERNEL || PPC32
select HAVE_EBPF_JIT
select HAVE_EFFICIENT_UNALIGNED_ACCESS
- select HAVE_FAST_GUP
+ select HAVE_GUP_FAST
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_FUNCTION_ARG_ACCESS_API
select HAVE_FUNCTION_DESCRIPTORS if PPC64_ELF_ABI_V1
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index 65261cbe5bfdbf..93d89f055b70b7 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -153,6 +153,9 @@ CFLAGS-$(CONFIG_PPC32) += $(call cc-option, $(MULTIPLEWORD))
CFLAGS-$(CONFIG_PPC32) += $(call cc-option,-mno-readonly-in-sdata)
+CC_FLAGS_FPU := $(call cc-option,-mhard-float)
+CC_FLAGS_NO_FPU := $(call cc-option,-msoft-float)
+
ifdef CONFIG_FUNCTION_TRACER
ifdef CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY
KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY
@@ -174,7 +177,7 @@ asinstr := $(call as-instr,lis 9$(comma)foo@high,-DHAVE_AS_ATHIGH=1)
KBUILD_CPPFLAGS += -I $(srctree)/arch/powerpc $(asinstr)
KBUILD_AFLAGS += $(AFLAGS-y)
-KBUILD_CFLAGS += $(call cc-option,-msoft-float)
+KBUILD_CFLAGS += $(CC_FLAGS_NO_FPU)
KBUILD_CFLAGS += $(CFLAGS-y)
CPP = $(CC) -E $(KBUILD_CFLAGS)
diff --git a/arch/powerpc/crypto/chacha-p10-glue.c b/arch/powerpc/crypto/chacha-p10-glue.c
index 74fb86b0d2097c..7c728755852e1a 100644
--- a/arch/powerpc/crypto/chacha-p10-glue.c
+++ b/arch/powerpc/crypto/chacha-p10-glue.c
@@ -197,6 +197,9 @@ static struct skcipher_alg algs[] = {
static int __init chacha_p10_init(void)
{
+ if (!cpu_has_feature(CPU_FTR_ARCH_31))
+ return 0;
+
static_branch_enable(&have_p10);
return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
@@ -204,10 +207,13 @@ static int __init chacha_p10_init(void)
static void __exit chacha_p10_exit(void)
{
+ if (!static_branch_likely(&have_p10))
+ return;
+
crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
}
-module_cpu_feature_match(PPC_MODULE_FEATURE_P10, chacha_p10_init);
+module_init(chacha_p10_init);
module_exit(chacha_p10_exit);
MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (P10 accelerated)");
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable-4k.h b/arch/powerpc/include/asm/book3s/64/pgtable-4k.h
index 48f21820afe209..baf934578c3a5e 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable-4k.h
@@ -6,26 +6,6 @@
*/
#ifndef __ASSEMBLY__
#ifdef CONFIG_HUGETLB_PAGE
-static inline int pmd_huge(pmd_t pmd)
-{
- /*
- * leaf pte for huge page
- */
- if (radix_enabled())
- return !!(pmd_raw(pmd) & cpu_to_be64(_PAGE_PTE));
- return 0;
-}
-
-static inline int pud_huge(pud_t pud)
-{
- /*
- * leaf pte for huge page
- */
- if (radix_enabled())
- return !!(pud_raw(pud) & cpu_to_be64(_PAGE_PTE));
- return 0;
-}
-
/*
* With radix , we have hugepage ptes in the pud and pmd entries. We don't
* need to setup hugepage directory for them. Our pte and page directory format
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable-64k.h b/arch/powerpc/include/asm/book3s/64/pgtable-64k.h
index ced7ee8b42fc35..6ac73da7b80e3c 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable-64k.h
@@ -4,31 +4,6 @@
#ifndef __ASSEMBLY__
#ifdef CONFIG_HUGETLB_PAGE
-/*
- * We have PGD_INDEX_SIZ = 12 and PTE_INDEX_SIZE = 8, so that we can have
- * 16GB hugepage pte in PGD and 16MB hugepage pte at PMD;
- *
- * Defined in such a way that we can optimize away code block at build time
- * if CONFIG_HUGETLB_PAGE=n.
- *
- * returns true for pmd migration entries, THP, devmap, hugetlb
- * But compile time dependent on CONFIG_HUGETLB_PAGE
- */
-static inline int pmd_huge(pmd_t pmd)
-{
- /*
- * leaf pte for huge page
- */
- return !!(pmd_raw(pmd) & cpu_to_be64(_PAGE_PTE));
-}
-
-static inline int pud_huge(pud_t pud)
-{
- /*
- * leaf pte for huge page
- */
- return !!(pud_raw(pud) & cpu_to_be64(_PAGE_PTE));
-}
/*
* With 64k page size, we have hugepage ptes in the pgd and pmd entries. We don't
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index fac5615e6bc575..8f9432e3855ae5 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -262,6 +262,18 @@ extern unsigned long __kernel_io_end;
extern struct page *vmemmap;
extern unsigned long pci_io_base;
+
+#define pmd_leaf pmd_leaf
+static inline bool pmd_leaf(pmd_t pmd)
+{
+ return !!(pmd_raw(pmd) & cpu_to_be64(_PAGE_PTE));
+}
+
+#define pud_leaf pud_leaf
+static inline bool pud_leaf(pud_t pud)
+{
+ return !!(pud_raw(pud) & cpu_to_be64(_PAGE_PTE));
+}
#endif /* __ASSEMBLY__ */
#include <asm/book3s/64/hash.h>
@@ -1426,20 +1438,5 @@ static inline bool is_pte_rw_upgrade(unsigned long old_val, unsigned long new_va
return false;
}
-/*
- * Like pmd_huge(), but works regardless of config options
- */
-#define pmd_leaf pmd_leaf
-static inline bool pmd_leaf(pmd_t pmd)
-{
- return !!(pmd_raw(pmd) & cpu_to_be64(_PAGE_PTE));
-}
-
-#define pud_leaf pud_leaf
-static inline bool pud_leaf(pud_t pud)
-{
- return !!(pud_raw(pud) & cpu_to_be64(_PAGE_PTE));
-}
-
#endif /* __ASSEMBLY__ */
#endif /* _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ */
diff --git a/arch/powerpc/include/asm/fpu.h b/arch/powerpc/include/asm/fpu.h
new file mode 100644
index 00000000000000..ca584e4bc40f84
--- /dev/null
+++ b/arch/powerpc/include/asm/fpu.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2023 SiFive
+ */
+
+#ifndef _ASM_POWERPC_FPU_H
+#define _ASM_POWERPC_FPU_H
+
+#include <linux/preempt.h>
+
+#include <asm/cpu_has_feature.h>
+#include <asm/switch_to.h>
+
+#define kernel_fpu_available() (!cpu_has_feature(CPU_FTR_FPU_UNAVAILABLE))
+
+static inline void kernel_fpu_begin(void)
+{
+ preempt_disable();
+ enable_kernel_fp();
+}
+
+static inline void kernel_fpu_end(void)
+{
+ disable_kernel_fp();
+ preempt_enable();
+}
+
+#endif /* ! _ASM_POWERPC_FPU_H */
diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index 3b72c7ed24cfde..aa5c0fd5edb1c9 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -406,9 +406,5 @@ extern void *abatron_pteptrs[2];
#include <asm/nohash/mmu.h>
#endif
-#if defined(CONFIG_FA_DUMP) || defined(CONFIG_PRESERVE_FA_DUMP)
-#define __HAVE_ARCH_RESERVED_KERNEL_PAGES
-#endif
-
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_MMU_H_ */
diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h
index 427db14292c9ec..f5f39d4f03c821 100644
--- a/arch/powerpc/include/asm/nohash/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/pgtable.h
@@ -351,16 +351,6 @@ static inline int hugepd_ok(hugepd_t hpd)
#endif
}
-static inline int pmd_huge(pmd_t pmd)
-{
- return 0;
-}
-
-static inline int pud_huge(pud_t pud)
-{
- return 0;
-}
-
#define is_hugepd(hpd) (hugepd_ok(hpd))
#endif
diff --git a/arch/powerpc/include/asm/vdso/gettimeofday.h b/arch/powerpc/include/asm/vdso/gettimeofday.h
index f0a4cf01e85c03..78302f6c258006 100644
--- a/arch/powerpc/include/asm/vdso/gettimeofday.h
+++ b/arch/powerpc/include/asm/vdso/gettimeofday.h
@@ -4,7 +4,6 @@
#ifndef __ASSEMBLY__
-#include <asm/page.h>
#include <asm/vdso/timebase.h>
#include <asm/barrier.h>
#include <asm/unistd.h>
@@ -95,7 +94,7 @@ const struct vdso_data *__arch_get_vdso_data(void);
static __always_inline
const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd)
{
- return (void *)vd + PAGE_SIZE;
+ return (void *)vd + (1U << CONFIG_PAGE_SHIFT);
}
#endif
diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c
index 8920862ffd7916..f0ae39e77e3741 100644
--- a/arch/powerpc/kernel/dma-iommu.c
+++ b/arch/powerpc/kernel/dma-iommu.c
@@ -216,6 +216,6 @@ const struct dma_map_ops dma_iommu_ops = {
.get_required_mask = dma_iommu_get_required_mask,
.mmap = dma_common_mmap,
.get_sgtable = dma_common_get_sgtable,
- .alloc_pages = dma_common_alloc_pages,
+ .alloc_pages_op = dma_common_alloc_pages,
.free_pages = dma_common_free_pages,
};
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index d14eda1e858968..ae8c7619e597d9 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -1735,8 +1735,3 @@ static void __init fadump_reserve_crash_area(u64 base)
memblock_reserve(mstart, msize);
}
}
-
-unsigned long __init arch_reserved_kernel_pages(void)
-{
- return memblock_reserved_size() / PAGE_SIZE;
-}
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 1185efebf032b6..b70b4f93561fcc 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -26,6 +26,7 @@
#include <linux/iommu.h>
#include <linux/sched.h>
#include <linux/debugfs.h>
+#include <linux/vmalloc.h>
#include <asm/io.h>
#include <asm/iommu.h>
#include <asm/pci-bridge.h>
@@ -1285,15 +1286,14 @@ spapr_tce_platform_iommu_attach_dev(struct iommu_domain *platform_domain,
struct device *dev)
{
struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
- struct iommu_group *grp = iommu_group_get(dev);
struct iommu_table_group *table_group;
+ struct iommu_group *grp;
/* At first attach the ownership is already set */
- if (!domain) {
- iommu_group_put(grp);
+ if (!domain)
return 0;
- }
+ grp = iommu_group_get(dev);
table_group = iommu_group_get_iommudata(grp);
/*
* The domain being set to PLATFORM from earlier
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index 17173b82ca21dc..3656f1ca7a21c6 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -548,3 +548,4 @@
459 common lsm_get_self_attr sys_lsm_get_self_attr
460 common lsm_set_self_attr sys_lsm_set_self_attr
461 common lsm_list_modules sys_lsm_list_modules
+462 common mseal sys_mseal
diff --git a/arch/powerpc/mm/book3s64/slice.c b/arch/powerpc/mm/book3s64/slice.c
index c0b58afb9a4762..ef3ce37f1bb3ec 100644
--- a/arch/powerpc/mm/book3s64/slice.c
+++ b/arch/powerpc/mm/book3s64/slice.c
@@ -282,12 +282,10 @@ static unsigned long slice_find_area_bottomup(struct mm_struct *mm,
{
int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
unsigned long found, next_end;
- struct vm_unmapped_area_info info;
-
- info.flags = 0;
- info.length = len;
- info.align_mask = PAGE_MASK & ((1ul << pshift) - 1);
- info.align_offset = 0;
+ struct vm_unmapped_area_info info = {
+ .length = len,
+ .align_mask = PAGE_MASK & ((1ul << pshift) - 1),
+ };
/*
* Check till the allow max value for this mmap request
*/
@@ -326,13 +324,13 @@ static unsigned long slice_find_area_topdown(struct mm_struct *mm,
{
int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
unsigned long found, prev;
- struct vm_unmapped_area_info info;
+ struct vm_unmapped_area_info info = {
+ .flags = VM_UNMAPPED_AREA_TOPDOWN,
+ .length = len,
+ .align_mask = PAGE_MASK & ((1ul << pshift) - 1),
+ };
unsigned long min_addr = max(PAGE_SIZE, mmap_min_addr);
- info.flags = VM_UNMAPPED_AREA_TOPDOWN;
- info.length = len;
- info.align_mask = PAGE_MASK & ((1ul << pshift) - 1);
- info.align_offset = 0;
/*
* If we are trying to allocate above DEFAULT_MAP_WINDOW
* Add the different to the mmap_base.
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 53335ae21a40ae..21569045249594 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -71,23 +71,26 @@ static noinline int bad_area_nosemaphore(struct pt_regs *regs, unsigned long add
return __bad_area_nosemaphore(regs, address, SEGV_MAPERR);
}
-static int __bad_area(struct pt_regs *regs, unsigned long address, int si_code)
+static int __bad_area(struct pt_regs *regs, unsigned long address, int si_code,
+ struct mm_struct *mm, struct vm_area_struct *vma)
{
- struct mm_struct *mm = current->mm;
/*
* Something tried to access memory that isn't in our memory map..
* Fix it, but check if it's kernel or user first..
*/
- mmap_read_unlock(mm);
+ if (mm)
+ mmap_read_unlock(mm);
+ else
+ vma_end_read(vma);
return __bad_area_nosemaphore(regs, address, si_code);
}
static noinline int bad_access_pkey(struct pt_regs *regs, unsigned long address,
+ struct mm_struct *mm,
struct vm_area_struct *vma)
{
- struct mm_struct *mm = current->mm;
int pkey;
/*
@@ -109,7 +112,10 @@ static noinline int bad_access_pkey(struct pt_regs *regs, unsigned long address,
*/
pkey = vma_pkey(vma);
- mmap_read_unlock(mm);
+ if (mm)
+ mmap_read_unlock(mm);
+ else
+ vma_end_read(vma);
/*
* If we are in kernel mode, bail out with a SEGV, this will
@@ -124,9 +130,10 @@ static noinline int bad_access_pkey(struct pt_regs *regs, unsigned long address,
return 0;
}
-static noinline int bad_access(struct pt_regs *regs, unsigned long address)
+static noinline int bad_access(struct pt_regs *regs, unsigned long address,
+ struct mm_struct *mm, struct vm_area_struct *vma)
{
- return __bad_area(regs, address, SEGV_ACCERR);
+ return __bad_area(regs, address, SEGV_ACCERR, mm, vma);
}
static int do_sigbus(struct pt_regs *regs, unsigned long address,
@@ -479,13 +486,13 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address,
if (unlikely(access_pkey_error(is_write, is_exec,
(error_code & DSISR_KEYFAULT), vma))) {
- vma_end_read(vma);
- goto lock_mmap;
+ count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
+ return bad_access_pkey(regs, address, NULL, vma);
}
if (unlikely(access_error(is_write, is_exec, vma))) {
- vma_end_read(vma);
- goto lock_mmap;
+ count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
+ return bad_access(regs, address, NULL, vma);
}
fault = handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, regs);
@@ -521,10 +528,10 @@ retry:
if (unlikely(access_pkey_error(is_write, is_exec,
(error_code & DSISR_KEYFAULT), vma)))
- return bad_access_pkey(regs, address, vma);
+ return bad_access_pkey(regs, address, mm, vma);
if (unlikely(access_error(is_write, is_exec, vma)))
- return bad_access(regs, address);
+ return bad_access(regs, address, mm, vma);
/*
* If for any reason at all we couldn't handle the fault,
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 3a440004b97d2b..a197d4c2244b32 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -16,6 +16,7 @@
#include <linux/highmem.h>
#include <linux/suspend.h>
#include <linux/dma-direct.h>
+#include <linux/vmalloc.h>
#include <asm/swiotlb.h>
#include <asm/machdep.h>
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 9b99113cb51a8f..6621cfc3baf86c 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -102,7 +102,7 @@ struct page *p4d_page(p4d_t p4d)
{
if (p4d_leaf(p4d)) {
if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMAP))
- VM_WARN_ON(!p4d_huge(p4d));
+ VM_WARN_ON(!p4d_leaf(p4d));
return pte_page(p4d_pte(p4d));
}
return virt_to_page(p4d_pgtable(p4d));
@@ -113,7 +113,7 @@ struct page *pud_page(pud_t pud)
{
if (pud_leaf(pud)) {
if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMAP))
- VM_WARN_ON(!pud_huge(pud));
+ VM_WARN_ON(!pud_leaf(pud));
return pte_page(pud_pte(pud));
}
return virt_to_page(pud_pgtable(pud));
@@ -132,7 +132,7 @@ struct page *pmd_page(pmd_t pmd)
* enabled so these checks can't be used.
*/
if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMAP))
- VM_WARN_ON(!(pmd_leaf(pmd) || pmd_huge(pmd)));
+ VM_WARN_ON(!pmd_leaf(pmd));
return pte_page(pmd_pte(pmd));
}
return virt_to_page(pmd_page_vaddr(pmd));
diff --git a/arch/powerpc/platforms/ps3/system-bus.c b/arch/powerpc/platforms/ps3/system-bus.c
index d6b5f5ecd5152b..56dc6b29a3e769 100644
--- a/arch/powerpc/platforms/ps3/system-bus.c
+++ b/arch/powerpc/platforms/ps3/system-bus.c
@@ -695,7 +695,7 @@ static const struct dma_map_ops ps3_sb_dma_ops = {
.unmap_page = ps3_unmap_page,
.mmap = dma_common_mmap,
.get_sgtable = dma_common_get_sgtable,
- .alloc_pages = dma_common_alloc_pages,
+ .alloc_pages_op = dma_common_alloc_pages,
.free_pages = dma_common_free_pages,
};
@@ -709,7 +709,7 @@ static const struct dma_map_ops ps3_ioc0_dma_ops = {
.unmap_page = ps3_unmap_page,
.mmap = dma_common_mmap,
.get_sgtable = dma_common_get_sgtable,
- .alloc_pages = dma_common_alloc_pages,
+ .alloc_pages_op = dma_common_alloc_pages,
.free_pages = dma_common_free_pages,
};
diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c
index 90ff85c879bfe9..477c1d5e1737ba 100644
--- a/arch/powerpc/platforms/pseries/vio.c
+++ b/arch/powerpc/platforms/pseries/vio.c
@@ -611,7 +611,7 @@ static const struct dma_map_ops vio_dma_mapping_ops = {
.get_required_mask = dma_iommu_get_required_mask,
.mmap = dma_common_mmap,
.get_sgtable = dma_common_get_sgtable,
- .alloc_pages = dma_common_alloc_pages,
+ .alloc_pages_op = dma_common_alloc_pages,
.free_pages = dma_common_free_pages,
};
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index be09c8836d56be..bf1a83c09ea059 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -27,6 +27,7 @@ config RISCV
select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_HAS_GIGANTIC_PAGE
select ARCH_HAS_KCOV
+ select ARCH_HAS_KERNEL_FPU_SUPPORT if 64BIT && FPU
select ARCH_HAS_MEMBARRIER_CALLBACKS
select ARCH_HAS_MEMBARRIER_SYNC_CORE
select ARCH_HAS_MMIOWB
@@ -132,7 +133,7 @@ config RISCV
select HAVE_FUNCTION_GRAPH_RETVAL if HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FUNCTION_TRACER if !XIP_KERNEL && !PREEMPTION
select HAVE_EBPF_JIT if MMU
- select HAVE_FAST_GUP if MMU
+ select HAVE_GUP_FAST if MMU
select HAVE_FUNCTION_ARG_ACCESS_API
select HAVE_FUNCTION_ERROR_INJECTION
select HAVE_GCC_PLUGINS
diff --git a/arch/riscv/Kconfig.errata b/arch/riscv/Kconfig.errata
index 910ba8837add86..2acc7d876e1fb6 100644
--- a/arch/riscv/Kconfig.errata
+++ b/arch/riscv/Kconfig.errata
@@ -82,14 +82,14 @@ config ERRATA_THEAD
Otherwise, please say "N" here to avoid unnecessary overhead.
-config ERRATA_THEAD_PBMT
- bool "Apply T-Head memory type errata"
+config ERRATA_THEAD_MAE
+ bool "Apply T-Head's memory attribute extension (XTheadMae) errata"
depends on ERRATA_THEAD && 64BIT && MMU
select RISCV_ALTERNATIVE_EARLY
default y
help
- This will apply the memory type errata to handle the non-standard
- memory type bits in page-table-entries on T-Head SoCs.
+ This will apply the memory attribute extension errata to handle the
+ non-standard PTE utilization on T-Head SoCs (XTheadMae).
If you don't know what to do here, say "Y".
diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index 252d63942f34eb..032b9dc5afbf3f 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -84,6 +84,9 @@ KBUILD_CFLAGS += -march=$(shell echo $(riscv-march-y) | sed -E 's/(rv32ima|rv64i
KBUILD_AFLAGS += -march=$(riscv-march-y)
+# For C code built with floating-point support, exclude V but keep F and D.
+CC_FLAGS_FPU := -march=$(shell echo $(riscv-march-y) | sed -E 's/(rv32ima|rv64ima)([^v_]*)v?/\1\2/')
+
KBUILD_CFLAGS += -mno-save-restore
KBUILD_CFLAGS += -DCONFIG_PAGE_OFFSET=$(CONFIG_PAGE_OFFSET)
@@ -151,7 +154,7 @@ endif
endif
vdso-install-y += arch/riscv/kernel/vdso/vdso.so.dbg
-vdso-install-$(CONFIG_COMPAT) += arch/riscv/kernel/compat_vdso/compat_vdso.so.dbg:../compat_vdso/compat_vdso.so
+vdso-install-$(CONFIG_COMPAT) += arch/riscv/kernel/compat_vdso/compat_vdso.so.dbg
ifneq ($(CONFIG_XIP_KERNEL),y)
ifeq ($(CONFIG_RISCV_M_MODE)$(CONFIG_ARCH_CANAAN),yy)
diff --git a/arch/riscv/boot/dts/starfive/jh7110-starfive-visionfive-2.dtsi b/arch/riscv/boot/dts/starfive/jh7110-starfive-visionfive-2.dtsi
index 45b58b6f3df88e..ccd0ce55aa5301 100644
--- a/arch/riscv/boot/dts/starfive/jh7110-starfive-visionfive-2.dtsi
+++ b/arch/riscv/boot/dts/starfive/jh7110-starfive-visionfive-2.dtsi
@@ -238,7 +238,6 @@
axp15060: pmic@36 {
compatible = "x-powers,axp15060";
reg = <0x36>;
- interrupts = <0>;
interrupt-controller;
#interrupt-cells = <1>;
@@ -279,24 +278,6 @@
status = "okay";
};
-&i2srx {
- pinctrl-names = "default";
- pinctrl-0 = <&i2srx_pins>;
- status = "okay";
-};
-
-&i2stx0 {
- pinctrl-names = "default";
- pinctrl-0 = <&mclk_ext_pins>;
- status = "okay";
-};
-
-&i2stx1 {
- pinctrl-names = "default";
- pinctrl-0 = <&i2stx1_pins>;
- status = "okay";
-};
-
&mmc0 {
max-frequency = <100000000>;
assigned-clocks = <&syscrg JH7110_SYSCLK_SDIO0_SDCARD>;
@@ -447,46 +428,6 @@
};
};
- i2srx_pins: i2srx-0 {
- clk-sd-pins {
- pinmux = <GPIOMUX(38, GPOUT_LOW,
- GPOEN_DISABLE,
- GPI_SYS_I2SRX_BCLK)>,
- <GPIOMUX(63, GPOUT_LOW,
- GPOEN_DISABLE,
- GPI_SYS_I2SRX_LRCK)>,
- <GPIOMUX(38, GPOUT_LOW,
- GPOEN_DISABLE,
- GPI_SYS_I2STX1_BCLK)>,
- <GPIOMUX(63, GPOUT_LOW,
- GPOEN_DISABLE,
- GPI_SYS_I2STX1_LRCK)>,
- <GPIOMUX(61, GPOUT_LOW,
- GPOEN_DISABLE,
- GPI_SYS_I2SRX_SDIN0)>;
- input-enable;
- };
- };
-
- i2stx1_pins: i2stx1-0 {
- sd-pins {
- pinmux = <GPIOMUX(44, GPOUT_SYS_I2STX1_SDO0,
- GPOEN_ENABLE,
- GPI_NONE)>;
- bias-disable;
- input-disable;
- };
- };
-
- mclk_ext_pins: mclk-ext-0 {
- mclk-ext-pins {
- pinmux = <GPIOMUX(4, GPOUT_LOW,
- GPOEN_DISABLE,
- GPI_SYS_MCLK_EXT)>;
- input-enable;
- };
- };
-
mmc0_pins: mmc0-0 {
rst-pins {
pinmux = <GPIOMUX(62, GPOUT_SYS_SDIO0_RST,
@@ -622,40 +563,6 @@
};
};
- tdm_pins: tdm-0 {
- tx-pins {
- pinmux = <GPIOMUX(44, GPOUT_SYS_TDM_TXD,
- GPOEN_ENABLE,
- GPI_NONE)>;
- bias-pull-up;
- drive-strength = <2>;
- input-disable;
- input-schmitt-disable;
- slew-rate = <0>;
- };
-
- rx-pins {
- pinmux = <GPIOMUX(61, GPOUT_HIGH,
- GPOEN_DISABLE,
- GPI_SYS_TDM_RXD)>;
- input-enable;
- };
-
- sync-pins {
- pinmux = <GPIOMUX(63, GPOUT_HIGH,
- GPOEN_DISABLE,
- GPI_SYS_TDM_SYNC)>;
- input-enable;
- };
-
- pcmclk-pins {
- pinmux = <GPIOMUX(38, GPOUT_HIGH,
- GPOEN_DISABLE,
- GPI_SYS_TDM_CLK)>;
- input-enable;
- };
- };
-
uart0_pins: uart0-0 {
tx-pins {
pinmux = <GPIOMUX(5, GPOUT_SYS_UART0_TX,
@@ -681,12 +588,6 @@
};
};
-&tdm {
- pinctrl-names = "default";
- pinctrl-0 = <&tdm_pins>;
- status = "okay";
-};
-
&uart0 {
pinctrl-names = "default";
pinctrl-0 = <&uart0_pins>;
diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c
index b1c410bbc1aece..bf6a0a6318ee30 100644
--- a/arch/riscv/errata/thead/errata.c
+++ b/arch/riscv/errata/thead/errata.c
@@ -19,20 +19,26 @@
#include <asm/patch.h>
#include <asm/vendorid_list.h>
-static bool errata_probe_pbmt(unsigned int stage,
- unsigned long arch_id, unsigned long impid)
+#define CSR_TH_SXSTATUS 0x5c0
+#define SXSTATUS_MAEE _AC(0x200000, UL)
+
+static bool errata_probe_mae(unsigned int stage,
+ unsigned long arch_id, unsigned long impid)
{
- if (!IS_ENABLED(CONFIG_ERRATA_THEAD_PBMT))
+ if (!IS_ENABLED(CONFIG_ERRATA_THEAD_MAE))
return false;
if (arch_id != 0 || impid != 0)
return false;
- if (stage == RISCV_ALTERNATIVES_EARLY_BOOT ||
- stage == RISCV_ALTERNATIVES_MODULE)
- return true;
+ if (stage != RISCV_ALTERNATIVES_EARLY_BOOT &&
+ stage != RISCV_ALTERNATIVES_MODULE)
+ return false;
+
+ if (!(csr_read(CSR_TH_SXSTATUS) & SXSTATUS_MAEE))
+ return false;
- return false;
+ return true;
}
/*
@@ -140,8 +146,8 @@ static u32 thead_errata_probe(unsigned int stage,
{
u32 cpu_req_errata = 0;
- if (errata_probe_pbmt(stage, archid, impid))
- cpu_req_errata |= BIT(ERRATA_THEAD_PBMT);
+ if (errata_probe_mae(stage, archid, impid))
+ cpu_req_errata |= BIT(ERRATA_THEAD_MAE);
errata_probe_cmo(stage, archid, impid);
diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h
index 1f2dbfb8a8bfc8..efd851e1b48321 100644
--- a/arch/riscv/include/asm/errata_list.h
+++ b/arch/riscv/include/asm/errata_list.h
@@ -23,7 +23,7 @@
#endif
#ifdef CONFIG_ERRATA_THEAD
-#define ERRATA_THEAD_PBMT 0
+#define ERRATA_THEAD_MAE 0
#define ERRATA_THEAD_PMU 1
#define ERRATA_THEAD_NUMBER 2
#endif
@@ -53,20 +53,20 @@ asm(ALTERNATIVE("sfence.vma %0", "sfence.vma", SIFIVE_VENDOR_ID, \
* in the default case.
*/
#define ALT_SVPBMT_SHIFT 61
-#define ALT_THEAD_PBMT_SHIFT 59
+#define ALT_THEAD_MAE_SHIFT 59
#define ALT_SVPBMT(_val, prot) \
asm(ALTERNATIVE_2("li %0, 0\t\nnop", \
"li %0, %1\t\nslli %0,%0,%3", 0, \
RISCV_ISA_EXT_SVPBMT, CONFIG_RISCV_ISA_SVPBMT, \
"li %0, %2\t\nslli %0,%0,%4", THEAD_VENDOR_ID, \
- ERRATA_THEAD_PBMT, CONFIG_ERRATA_THEAD_PBMT) \
+ ERRATA_THEAD_MAE, CONFIG_ERRATA_THEAD_MAE) \
: "=r"(_val) \
: "I"(prot##_SVPBMT >> ALT_SVPBMT_SHIFT), \
- "I"(prot##_THEAD >> ALT_THEAD_PBMT_SHIFT), \
+ "I"(prot##_THEAD >> ALT_THEAD_MAE_SHIFT), \
"I"(ALT_SVPBMT_SHIFT), \
- "I"(ALT_THEAD_PBMT_SHIFT))
+ "I"(ALT_THEAD_MAE_SHIFT))
-#ifdef CONFIG_ERRATA_THEAD_PBMT
+#ifdef CONFIG_ERRATA_THEAD_MAE
/*
* IO/NOCACHE memory types are handled together with svpbmt,
* so on T-Head chips, check if no other memory type is set,
@@ -83,11 +83,11 @@ asm volatile(ALTERNATIVE( \
"slli t3, t3, %3\n\t" \
"or %0, %0, t3\n\t" \
"2:", THEAD_VENDOR_ID, \
- ERRATA_THEAD_PBMT, CONFIG_ERRATA_THEAD_PBMT) \
+ ERRATA_THEAD_MAE, CONFIG_ERRATA_THEAD_MAE) \
: "+r"(_val) \
- : "I"(_PAGE_MTMASK_THEAD >> ALT_THEAD_PBMT_SHIFT), \
- "I"(_PAGE_PMA_THEAD >> ALT_THEAD_PBMT_SHIFT), \
- "I"(ALT_THEAD_PBMT_SHIFT) \
+ : "I"(_PAGE_MTMASK_THEAD >> ALT_THEAD_MAE_SHIFT), \
+ "I"(_PAGE_PMA_THEAD >> ALT_THEAD_MAE_SHIFT), \
+ "I"(ALT_THEAD_MAE_SHIFT) \
: "t3")
#else
#define ALT_THEAD_PMA(_val)
diff --git a/arch/riscv/include/asm/fpu.h b/arch/riscv/include/asm/fpu.h
new file mode 100644
index 00000000000000..91c04c244e12d3
--- /dev/null
+++ b/arch/riscv/include/asm/fpu.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2023 SiFive
+ */
+
+#ifndef _ASM_RISCV_FPU_H
+#define _ASM_RISCV_FPU_H
+
+#include <asm/switch_to.h>
+
+#define kernel_fpu_available() has_fpu()
+
+void kernel_fpu_begin(void);
+void kernel_fpu_end(void);
+
+#endif /* ! _ASM_RISCV_FPU_H */
diff --git a/arch/riscv/include/asm/hugetlb.h b/arch/riscv/include/asm/hugetlb.h
index 22deb7a2a6ec4e..b1ce97a9dbfce7 100644
--- a/arch/riscv/include/asm/hugetlb.h
+++ b/arch/riscv/include/asm/hugetlb.h
@@ -5,11 +5,11 @@
#include <asm/cacheflush.h>
#include <asm/page.h>
-static inline void arch_clear_hugepage_flags(struct page *page)
+static inline void arch_clear_hugetlb_flags(struct folio *folio)
{
- clear_bit(PG_dcache_clean, &page->flags);
+ clear_bit(PG_dcache_clean, &folio->flags);
}
-#define arch_clear_hugepage_flags arch_clear_hugepage_flags
+#define arch_clear_hugetlb_flags arch_clear_hugetlb_flags
#ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION
bool arch_hugetlb_migration_supported(struct hstate *h);
diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
index 2947423b5082e9..115ac98b8d729d 100644
--- a/arch/riscv/include/asm/page.h
+++ b/arch/riscv/include/asm/page.h
@@ -89,7 +89,7 @@ typedef struct page *pgtable_t;
#define PTE_FMT "%08lx"
#endif
-#ifdef CONFIG_64BIT
+#if defined(CONFIG_64BIT) && defined(CONFIG_MMU)
/*
* We override this value as its generic definition uses __pa too early in
* the boot process (before kernel_map.va_pa_offset is set).
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 97fcde30e2477d..f2d5973a011bdd 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -593,6 +593,12 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
return ptep_test_and_clear_young(vma, address, ptep);
}
+#define pgprot_nx pgprot_nx
+static inline pgprot_t pgprot_nx(pgprot_t _prot)
+{
+ return __pgprot(pgprot_val(_prot) & ~_PAGE_EXEC);
+}
+
#define pgprot_noncached pgprot_noncached
static inline pgprot_t pgprot_noncached(pgprot_t _prot)
{
@@ -642,6 +648,7 @@ static inline unsigned long pmd_pfn(pmd_t pmd)
#define __pud_to_phys(pud) (__page_val_to_pfn(pud_val(pud)) << PAGE_SHIFT)
+#define pud_pfn pud_pfn
static inline unsigned long pud_pfn(pud_t pud)
{
return ((__pud_to_phys(pud) & PUD_MASK) >> PAGE_SHIFT);
@@ -890,7 +897,7 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte)
#define PAGE_SHARED __pgprot(0)
#define PAGE_KERNEL __pgprot(0)
#define swapper_pg_dir NULL
-#define TASK_SIZE 0xffffffffUL
+#define TASK_SIZE _AC(-1, UL)
#define VMALLOC_START _AC(0, UL)
#define VMALLOC_END TASK_SIZE
diff --git a/arch/riscv/include/asm/syscall_wrapper.h b/arch/riscv/include/asm/syscall_wrapper.h
index 980094c2e9761d..ac80216549ffa6 100644
--- a/arch/riscv/include/asm/syscall_wrapper.h
+++ b/arch/riscv/include/asm/syscall_wrapper.h
@@ -36,7 +36,8 @@ asmlinkage long __riscv_sys_ni_syscall(const struct pt_regs *);
ulong) \
__attribute__((alias(__stringify(___se_##prefix##name)))); \
__diag_pop(); \
- static long noinline ___se_##prefix##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \
+ static long noinline ___se_##prefix##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \
+ __used; \
static long ___se_##prefix##name(__MAP(x,__SC_LONG,__VA_ARGS__))
#define SC_RISCV_REGS_TO_ARGS(x, ...) \
diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h
index ec0cab9fbddd0d..72ec1d9bd3f312 100644
--- a/arch/riscv/include/asm/uaccess.h
+++ b/arch/riscv/include/asm/uaccess.h
@@ -319,7 +319,7 @@ unsigned long __must_check clear_user(void __user *to, unsigned long n)
#define __get_kernel_nofault(dst, src, type, err_label) \
do { \
- long __kr_err; \
+ long __kr_err = 0; \
\
__get_user_nocheck(*((type *)(dst)), (type *)(src), __kr_err); \
if (unlikely(__kr_err)) \
@@ -328,7 +328,7 @@ do { \
#define __put_kernel_nofault(dst, src, type, err_label) \
do { \
- long __kr_err; \
+ long __kr_err = 0; \
\
__put_user_nocheck(*((type *)(src)), (type *)(dst), __kr_err); \
if (unlikely(__kr_err)) \
diff --git a/arch/riscv/include/uapi/asm/auxvec.h b/arch/riscv/include/uapi/asm/auxvec.h
index 10aaa83db89ef7..95050ebe9ad00b 100644
--- a/arch/riscv/include/uapi/asm/auxvec.h
+++ b/arch/riscv/include/uapi/asm/auxvec.h
@@ -34,7 +34,7 @@
#define AT_L3_CACHEGEOMETRY 47
/* entries in ARCH_DLINFO */
-#define AT_VECTOR_SIZE_ARCH 9
+#define AT_VECTOR_SIZE_ARCH 10
#define AT_MINSIGSTKSZ 51
#endif /* _UAPI_ASM_RISCV_AUXVEC_H */
diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h
index 9f2a8e3ff2048e..2902f68dc913aa 100644
--- a/arch/riscv/include/uapi/asm/hwprobe.h
+++ b/arch/riscv/include/uapi/asm/hwprobe.h
@@ -54,7 +54,7 @@ struct riscv_hwprobe {
#define RISCV_HWPROBE_EXT_ZFHMIN (1 << 28)
#define RISCV_HWPROBE_EXT_ZIHINTNTL (1 << 29)
#define RISCV_HWPROBE_EXT_ZVFH (1 << 30)
-#define RISCV_HWPROBE_EXT_ZVFHMIN (1 << 31)
+#define RISCV_HWPROBE_EXT_ZVFHMIN (1ULL << 31)
#define RISCV_HWPROBE_EXT_ZFA (1ULL << 32)
#define RISCV_HWPROBE_EXT_ZTSO (1ULL << 33)
#define RISCV_HWPROBE_EXT_ZACAS (1ULL << 34)
diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
index 81d94a8ee10f29..5b243d46f4b174 100644
--- a/arch/riscv/kernel/Makefile
+++ b/arch/riscv/kernel/Makefile
@@ -67,6 +67,7 @@ obj-$(CONFIG_RISCV_MISALIGNED) += unaligned_access_speed.o
obj-$(CONFIG_RISCV_PROBE_UNALIGNED_ACCESS) += copy-unaligned.o
obj-$(CONFIG_FPU) += fpu.o
+obj-$(CONFIG_FPU) += kernel_mode_fpu.o
obj-$(CONFIG_RISCV_ISA_V) += vector.o
obj-$(CONFIG_RISCV_ISA_V) += kernel_mode_vector.o
obj-$(CONFIG_SMP) += smpboot.o
diff --git a/arch/riscv/kernel/compat_vdso/Makefile b/arch/riscv/kernel/compat_vdso/Makefile
index 62fa393b2eb2ea..3df4cb788c1fa4 100644
--- a/arch/riscv/kernel/compat_vdso/Makefile
+++ b/arch/riscv/kernel/compat_vdso/Makefile
@@ -74,5 +74,5 @@ quiet_cmd_compat_vdsold = VDSOLD $@
rm $@.tmp
# actual build commands
-quiet_cmd_compat_vdsoas = VDSOAS $@
+quiet_cmd_compat_vdsoas = VDSOAS $@
cmd_compat_vdsoas = $(COMPAT_CC) $(a_flags) $(COMPAT_CC_FLAGS) -c -o $@ $<
diff --git a/arch/riscv/kernel/elf_kexec.c b/arch/riscv/kernel/elf_kexec.c
index 54260c16f9912a..11c0d2e0becfe9 100644
--- a/arch/riscv/kernel/elf_kexec.c
+++ b/arch/riscv/kernel/elf_kexec.c
@@ -19,6 +19,7 @@
#include <linux/libfdt.h>
#include <linux/types.h>
#include <linux/memblock.h>
+#include <linux/vmalloc.h>
#include <asm/setup.h>
int arch_kimage_file_post_load_cleanup(struct kimage *image)
diff --git a/arch/riscv/kernel/kernel_mode_fpu.c b/arch/riscv/kernel/kernel_mode_fpu.c
new file mode 100644
index 00000000000000..0ac8348876c499
--- /dev/null
+++ b/arch/riscv/kernel/kernel_mode_fpu.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2023 SiFive
+ */
+
+#include <linux/export.h>
+#include <linux/preempt.h>
+
+#include <asm/csr.h>
+#include <asm/fpu.h>
+#include <asm/processor.h>
+#include <asm/switch_to.h>
+
+void kernel_fpu_begin(void)
+{
+ preempt_disable();
+ fstate_save(current, task_pt_regs(current));
+ csr_set(CSR_SSTATUS, SR_FS);
+}
+EXPORT_SYMBOL_GPL(kernel_fpu_begin);
+
+void kernel_fpu_end(void)
+{
+ csr_clear(CSR_SSTATUS, SR_FS);
+ fstate_restore(current, task_pt_regs(current));
+ preempt_enable();
+}
+EXPORT_SYMBOL_GPL(kernel_fpu_end);
diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c
index 37e87fdcf6a000..30e12b310cab73 100644
--- a/arch/riscv/kernel/patch.c
+++ b/arch/riscv/kernel/patch.c
@@ -80,6 +80,8 @@ static int __patch_insn_set(void *addr, u8 c, size_t len)
*/
lockdep_assert_held(&text_mutex);
+ preempt_disable();
+
if (across_pages)
patch_map(addr + PAGE_SIZE, FIX_TEXT_POKE1);
@@ -92,6 +94,8 @@ static int __patch_insn_set(void *addr, u8 c, size_t len)
if (across_pages)
patch_unmap(FIX_TEXT_POKE1);
+ preempt_enable();
+
return 0;
}
NOKPROBE_SYMBOL(__patch_insn_set);
@@ -122,6 +126,8 @@ static int __patch_insn_write(void *addr, const void *insn, size_t len)
if (!riscv_patch_in_stop_machine)
lockdep_assert_held(&text_mutex);
+ preempt_disable();
+
if (across_pages)
patch_map(addr + PAGE_SIZE, FIX_TEXT_POKE1);
@@ -134,6 +140,8 @@ static int __patch_insn_write(void *addr, const void *insn, size_t len)
if (across_pages)
patch_unmap(FIX_TEXT_POKE1);
+ preempt_enable();
+
return ret;
}
NOKPROBE_SYMBOL(__patch_insn_write);
diff --git a/arch/riscv/kernel/probes/kprobes.c b/arch/riscv/kernel/probes/kprobes.c
index 2f08c14a933d05..71a8b8945b26fd 100644
--- a/arch/riscv/kernel/probes/kprobes.c
+++ b/arch/riscv/kernel/probes/kprobes.c
@@ -6,6 +6,7 @@
#include <linux/extable.h>
#include <linux/slab.h>
#include <linux/stop_machine.h>
+#include <linux/vmalloc.h>
#include <asm/ptrace.h>
#include <linux/uaccess.h>
#include <asm/sections.h>
diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c
index 92922dbd5b5c1f..e4bc61c4e58af9 100644
--- a/arch/riscv/kernel/process.c
+++ b/arch/riscv/kernel/process.c
@@ -27,8 +27,6 @@
#include <asm/vector.h>
#include <asm/cpufeature.h>
-register unsigned long gp_in_global __asm__("gp");
-
#if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_STACKPROTECTOR_PER_TASK)
#include <linux/stackprotector.h>
unsigned long __stack_chk_guard __read_mostly;
@@ -37,7 +35,7 @@ EXPORT_SYMBOL(__stack_chk_guard);
extern asmlinkage void ret_from_fork(void);
-void arch_cpu_idle(void)
+void noinstr arch_cpu_idle(void)
{
cpu_do_idle();
}
@@ -207,7 +205,6 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
if (unlikely(args->fn)) {
/* Kernel thread */
memset(childregs, 0, sizeof(struct pt_regs));
- childregs->gp = gp_in_global;
/* Supervisor/Machine, irqs on: */
childregs->status = SR_PP | SR_PIE;
diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c
index 501e66debf6972..5a2edd7f027e5d 100644
--- a/arch/riscv/kernel/signal.c
+++ b/arch/riscv/kernel/signal.c
@@ -119,6 +119,13 @@ static long __restore_v_state(struct pt_regs *regs, void __user *sc_vec)
struct __sc_riscv_v_state __user *state = sc_vec;
void __user *datap;
+ /*
+ * Mark the vstate as clean prior performing the actual copy,
+ * to avoid getting the vstate incorrectly clobbered by the
+ * discarded vector state.
+ */
+ riscv_v_vstate_set_restore(current, regs);
+
/* Copy everything of __sc_riscv_v_state except datap. */
err = __copy_from_user(&current->thread.vstate, &state->v_state,
offsetof(struct __riscv_v_ext_state, datap));
@@ -133,13 +140,7 @@ static long __restore_v_state(struct pt_regs *regs, void __user *sc_vec)
* Copy the whole vector content from user space datap. Use
* copy_from_user to prevent information leak.
*/
- err = copy_from_user(current->thread.vstate.datap, datap, riscv_v_vsize);
- if (unlikely(err))
- return err;
-
- riscv_v_vstate_set_restore(current, regs);
-
- return err;
+ return copy_from_user(current->thread.vstate.datap, datap, riscv_v_vsize);
}
#else
#define save_v_state(task, regs) (0)
diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
index 868d6280cf667e..05a16b1f0aee85 100644
--- a/arch/riscv/kernel/traps.c
+++ b/arch/riscv/kernel/traps.c
@@ -122,7 +122,7 @@ void do_trap(struct pt_regs *regs, int signo, int code, unsigned long addr)
print_vma_addr(KERN_CONT " in ", instruction_pointer(regs));
pr_cont("\n");
__show_regs(regs);
- dump_instr(KERN_EMERG, regs);
+ dump_instr(KERN_INFO, regs);
}
force_sig_fault(signo, code, (void __user *)addr);
diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile
index 9b517fe1b8a8ec..272c431ac5b9f8 100644
--- a/arch/riscv/kernel/vdso/Makefile
+++ b/arch/riscv/kernel/vdso/Makefile
@@ -37,6 +37,7 @@ endif
# Disable -pg to prevent insert call site
CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS)
+CFLAGS_REMOVE_hwprobe.o = $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS)
# Disable profiling and instrumentation for VDSO code
GCOV_PROFILE := n
diff --git a/arch/riscv/kvm/aia_aplic.c b/arch/riscv/kvm/aia_aplic.c
index 39e72aa016a4cc..b467ba5ed91000 100644
--- a/arch/riscv/kvm/aia_aplic.c
+++ b/arch/riscv/kvm/aia_aplic.c
@@ -137,11 +137,21 @@ static void aplic_write_pending(struct aplic *aplic, u32 irq, bool pending)
raw_spin_lock_irqsave(&irqd->lock, flags);
sm = irqd->sourcecfg & APLIC_SOURCECFG_SM_MASK;
- if (!pending &&
- ((sm == APLIC_SOURCECFG_SM_LEVEL_HIGH) ||
- (sm == APLIC_SOURCECFG_SM_LEVEL_LOW)))
+ if (sm == APLIC_SOURCECFG_SM_INACTIVE)
goto skip_write_pending;
+ if (sm == APLIC_SOURCECFG_SM_LEVEL_HIGH ||
+ sm == APLIC_SOURCECFG_SM_LEVEL_LOW) {
+ if (!pending)
+ goto skip_write_pending;
+ if ((irqd->state & APLIC_IRQ_STATE_INPUT) &&
+ sm == APLIC_SOURCECFG_SM_LEVEL_LOW)
+ goto skip_write_pending;
+ if (!(irqd->state & APLIC_IRQ_STATE_INPUT) &&
+ sm == APLIC_SOURCECFG_SM_LEVEL_HIGH)
+ goto skip_write_pending;
+ }
+
if (pending)
irqd->state |= APLIC_IRQ_STATE_PENDING;
else
@@ -187,16 +197,31 @@ static void aplic_write_enabled(struct aplic *aplic, u32 irq, bool enabled)
static bool aplic_read_input(struct aplic *aplic, u32 irq)
{
- bool ret;
- unsigned long flags;
+ u32 sourcecfg, sm, raw_input, irq_inverted;
struct aplic_irq *irqd;
+ unsigned long flags;
+ bool ret = false;
if (!irq || aplic->nr_irqs <= irq)
return false;
irqd = &aplic->irqs[irq];
raw_spin_lock_irqsave(&irqd->lock, flags);
- ret = (irqd->state & APLIC_IRQ_STATE_INPUT) ? true : false;
+
+ sourcecfg = irqd->sourcecfg;
+ if (sourcecfg & APLIC_SOURCECFG_D)
+ goto skip;
+
+ sm = sourcecfg & APLIC_SOURCECFG_SM_MASK;
+ if (sm == APLIC_SOURCECFG_SM_INACTIVE)
+ goto skip;
+
+ raw_input = (irqd->state & APLIC_IRQ_STATE_INPUT) ? 1 : 0;
+ irq_inverted = (sm == APLIC_SOURCECFG_SM_LEVEL_LOW ||
+ sm == APLIC_SOURCECFG_SM_EDGE_FALL) ? 1 : 0;
+ ret = !!(raw_input ^ irq_inverted);
+
+skip:
raw_spin_unlock_irqrestore(&irqd->lock, flags);
return ret;
diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c
index f4a6124d25c939..994adc26db4b10 100644
--- a/arch/riscv/kvm/vcpu_onereg.c
+++ b/arch/riscv/kvm/vcpu_onereg.c
@@ -986,7 +986,7 @@ static int copy_isa_ext_reg_indices(const struct kvm_vcpu *vcpu,
static inline unsigned long num_isa_ext_regs(const struct kvm_vcpu *vcpu)
{
- return copy_isa_ext_reg_indices(vcpu, NULL);;
+ return copy_isa_ext_reg_indices(vcpu, NULL);
}
static int copy_sbi_ext_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c
index 3ba1d4dde5dd1a..5224f373380225 100644
--- a/arch/riscv/mm/fault.c
+++ b/arch/riscv/mm/fault.c
@@ -292,7 +292,10 @@ void handle_page_fault(struct pt_regs *regs)
if (unlikely(access_error(cause, vma))) {
vma_end_read(vma);
- goto lock_mmap;
+ count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
+ tsk->thread.bad_cause = cause;
+ bad_area_nosemaphore(regs, SEGV_ACCERR, addr);
+ return;
}
fault = handle_mm_fault(vma, addr, flags | FAULT_FLAG_VMA_LOCK, regs);
diff --git a/arch/riscv/mm/hugetlbpage.c b/arch/riscv/mm/hugetlbpage.c
index 5ef2a6891158a6..0ebd968b33c997 100644
--- a/arch/riscv/mm/hugetlbpage.c
+++ b/arch/riscv/mm/hugetlbpage.c
@@ -399,16 +399,6 @@ static bool is_napot_size(unsigned long size)
#endif /*CONFIG_RISCV_ISA_SVNAPOT*/
-int pud_huge(pud_t pud)
-{
- return pud_leaf(pud);
-}
-
-int pmd_huge(pmd_t pmd)
-{
- return pmd_leaf(pmd);
-}
-
static bool __hugetlb_valid_size(unsigned long size)
{
if (size == HPAGE_SIZE)
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index fe8e159394d8ee..9687618432031f 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -231,7 +231,7 @@ static void __init setup_bootmem(void)
* In 64-bit, any use of __va/__pa before this point is wrong as we
* did not know the start of DRAM before.
*/
- if (IS_ENABLED(CONFIG_64BIT))
+ if (IS_ENABLED(CONFIG_64BIT) && IS_ENABLED(CONFIG_MMU))
kernel_map.va_pa_offset = PAGE_OFFSET - phys_ram_base;
/*
diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c
index 893566e004b73f..07d743f87b3f69 100644
--- a/arch/riscv/mm/tlbflush.c
+++ b/arch/riscv/mm/tlbflush.c
@@ -99,7 +99,7 @@ static void __ipi_flush_tlb_range_asid(void *info)
local_flush_tlb_range_asid(d->start, d->size, d->stride, d->asid);
}
-static void __flush_tlb_range(struct cpumask *cmask, unsigned long asid,
+static void __flush_tlb_range(const struct cpumask *cmask, unsigned long asid,
unsigned long start, unsigned long size,
unsigned long stride)
{
@@ -200,7 +200,7 @@ void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
void flush_tlb_kernel_range(unsigned long start, unsigned long end)
{
- __flush_tlb_range((struct cpumask *)cpu_online_mask, FLUSH_TLB_NO_ASID,
+ __flush_tlb_range(cpu_online_mask, FLUSH_TLB_NO_ASID,
start, end - start, PAGE_SIZE);
}
diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c
index aac19008547241..ec9d692838fca5 100644
--- a/arch/riscv/net/bpf_jit_comp64.c
+++ b/arch/riscv/net/bpf_jit_comp64.c
@@ -722,6 +722,9 @@ static int invoke_bpf_prog(struct bpf_tramp_link *l, int args_off, int retval_of
if (ret)
return ret;
+ /* store prog start time */
+ emit_mv(RV_REG_S1, RV_REG_A0, ctx);
+
/* if (__bpf_prog_enter(prog) == 0)
* goto skip_exec_of_prog;
*/
@@ -729,9 +732,6 @@ static int invoke_bpf_prog(struct bpf_tramp_link *l, int args_off, int retval_of
/* nop reserved for conditional jump */
emit(rv_nop(), ctx);
- /* store prog start time */
- emit_mv(RV_REG_S1, RV_REG_A0, ctx);
-
/* arg1: &args_off */
emit_addi(RV_REG_A0, RV_REG_FP, -args_off, ctx);
if (!p->jited)
@@ -1463,6 +1463,22 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
if (ret < 0)
return ret;
+ if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
+ const struct btf_func_model *fm;
+ int idx;
+
+ fm = bpf_jit_find_kfunc_model(ctx->prog, insn);
+ if (!fm)
+ return -EINVAL;
+
+ for (idx = 0; idx < fm->nr_args; idx++) {
+ u8 reg = bpf_to_rv_reg(BPF_REG_1 + idx, ctx);
+
+ if (fm->arg_size[idx] == sizeof(int))
+ emit_sextw(reg, reg, ctx);
+ }
+ }
+
ret = emit_call(addr, fixed_addr, ctx);
if (ret)
return ret;
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 8f01ada6845e36..d9aed4c93ee671 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -174,7 +174,7 @@ config S390
select HAVE_DYNAMIC_FTRACE_WITH_REGS
select HAVE_EBPF_JIT if HAVE_MARCH_Z196_FEATURES
select HAVE_EFFICIENT_UNALIGNED_ACCESS
- select HAVE_FAST_GUP
+ select HAVE_GUP_FAST
select HAVE_FENTRY
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_FUNCTION_ARG_ACCESS_API
diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h
index 7138d189cc420a..0c4cad7d5a5b11 100644
--- a/arch/s390/include/asm/atomic.h
+++ b/arch/s390/include/asm/atomic.h
@@ -15,31 +15,31 @@
#include <asm/barrier.h>
#include <asm/cmpxchg.h>
-static inline int arch_atomic_read(const atomic_t *v)
+static __always_inline int arch_atomic_read(const atomic_t *v)
{
return __atomic_read(v);
}
#define arch_atomic_read arch_atomic_read
-static inline void arch_atomic_set(atomic_t *v, int i)
+static __always_inline void arch_atomic_set(atomic_t *v, int i)
{
__atomic_set(v, i);
}
#define arch_atomic_set arch_atomic_set
-static inline int arch_atomic_add_return(int i, atomic_t *v)
+static __always_inline int arch_atomic_add_return(int i, atomic_t *v)
{
return __atomic_add_barrier(i, &v->counter) + i;
}
#define arch_atomic_add_return arch_atomic_add_return
-static inline int arch_atomic_fetch_add(int i, atomic_t *v)
+static __always_inline int arch_atomic_fetch_add(int i, atomic_t *v)
{
return __atomic_add_barrier(i, &v->counter);
}
#define arch_atomic_fetch_add arch_atomic_fetch_add
-static inline void arch_atomic_add(int i, atomic_t *v)
+static __always_inline void arch_atomic_add(int i, atomic_t *v)
{
__atomic_add(i, &v->counter);
}
@@ -50,11 +50,11 @@ static inline void arch_atomic_add(int i, atomic_t *v)
#define arch_atomic_fetch_sub(_i, _v) arch_atomic_fetch_add(-(int)(_i), _v)
#define ATOMIC_OPS(op) \
-static inline void arch_atomic_##op(int i, atomic_t *v) \
+static __always_inline void arch_atomic_##op(int i, atomic_t *v) \
{ \
__atomic_##op(i, &v->counter); \
} \
-static inline int arch_atomic_fetch_##op(int i, atomic_t *v) \
+static __always_inline int arch_atomic_fetch_##op(int i, atomic_t *v) \
{ \
return __atomic_##op##_barrier(i, &v->counter); \
}
@@ -74,7 +74,7 @@ ATOMIC_OPS(xor)
#define arch_atomic_xchg(v, new) (arch_xchg(&((v)->counter), new))
-static inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new)
+static __always_inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new)
{
return __atomic_cmpxchg(&v->counter, old, new);
}
@@ -82,31 +82,31 @@ static inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new)
#define ATOMIC64_INIT(i) { (i) }
-static inline s64 arch_atomic64_read(const atomic64_t *v)
+static __always_inline s64 arch_atomic64_read(const atomic64_t *v)
{
return __atomic64_read(v);
}
#define arch_atomic64_read arch_atomic64_read
-static inline void arch_atomic64_set(atomic64_t *v, s64 i)
+static __always_inline void arch_atomic64_set(atomic64_t *v, s64 i)
{
__atomic64_set(v, i);
}
#define arch_atomic64_set arch_atomic64_set
-static inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v)
+static __always_inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v)
{
return __atomic64_add_barrier(i, (long *)&v->counter) + i;
}
#define arch_atomic64_add_return arch_atomic64_add_return
-static inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v)
+static __always_inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v)
{
return __atomic64_add_barrier(i, (long *)&v->counter);
}
#define arch_atomic64_fetch_add arch_atomic64_fetch_add
-static inline void arch_atomic64_add(s64 i, atomic64_t *v)
+static __always_inline void arch_atomic64_add(s64 i, atomic64_t *v)
{
__atomic64_add(i, (long *)&v->counter);
}
@@ -114,20 +114,20 @@ static inline void arch_atomic64_add(s64 i, atomic64_t *v)
#define arch_atomic64_xchg(v, new) (arch_xchg(&((v)->counter), new))
-static inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
+static __always_inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
{
return __atomic64_cmpxchg((long *)&v->counter, old, new);
}
#define arch_atomic64_cmpxchg arch_atomic64_cmpxchg
-#define ATOMIC64_OPS(op) \
-static inline void arch_atomic64_##op(s64 i, atomic64_t *v) \
-{ \
- __atomic64_##op(i, (long *)&v->counter); \
-} \
-static inline long arch_atomic64_fetch_##op(s64 i, atomic64_t *v) \
-{ \
- return __atomic64_##op##_barrier(i, (long *)&v->counter); \
+#define ATOMIC64_OPS(op) \
+static __always_inline void arch_atomic64_##op(s64 i, atomic64_t *v) \
+{ \
+ __atomic64_##op(i, (long *)&v->counter); \
+} \
+static __always_inline long arch_atomic64_fetch_##op(s64 i, atomic64_t *v) \
+{ \
+ return __atomic64_##op##_barrier(i, (long *)&v->counter); \
}
ATOMIC64_OPS(and)
diff --git a/arch/s390/include/asm/atomic_ops.h b/arch/s390/include/asm/atomic_ops.h
index 50510e08b893b5..7fa5f96a553a47 100644
--- a/arch/s390/include/asm/atomic_ops.h
+++ b/arch/s390/include/asm/atomic_ops.h
@@ -8,7 +8,7 @@
#ifndef __ARCH_S390_ATOMIC_OPS__
#define __ARCH_S390_ATOMIC_OPS__
-static inline int __atomic_read(const atomic_t *v)
+static __always_inline int __atomic_read(const atomic_t *v)
{
int c;
@@ -18,14 +18,14 @@ static inline int __atomic_read(const atomic_t *v)
return c;
}
-static inline void __atomic_set(atomic_t *v, int i)
+static __always_inline void __atomic_set(atomic_t *v, int i)
{
asm volatile(
" st %1,%0\n"
: "=R" (v->counter) : "d" (i));
}
-static inline s64 __atomic64_read(const atomic64_t *v)
+static __always_inline s64 __atomic64_read(const atomic64_t *v)
{
s64 c;
@@ -35,7 +35,7 @@ static inline s64 __atomic64_read(const atomic64_t *v)
return c;
}
-static inline void __atomic64_set(atomic64_t *v, s64 i)
+static __always_inline void __atomic64_set(atomic64_t *v, s64 i)
{
asm volatile(
" stg %1,%0\n"
@@ -45,7 +45,7 @@ static inline void __atomic64_set(atomic64_t *v, s64 i)
#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
#define __ATOMIC_OP(op_name, op_type, op_string, op_barrier) \
-static inline op_type op_name(op_type val, op_type *ptr) \
+static __always_inline op_type op_name(op_type val, op_type *ptr) \
{ \
op_type old; \
\
@@ -96,7 +96,7 @@ __ATOMIC_CONST_OPS(__atomic64_add_const, long, "agsi")
#else /* CONFIG_HAVE_MARCH_Z196_FEATURES */
#define __ATOMIC_OP(op_name, op_string) \
-static inline int op_name(int val, int *ptr) \
+static __always_inline int op_name(int val, int *ptr) \
{ \
int old, new; \
\
@@ -122,7 +122,7 @@ __ATOMIC_OPS(__atomic_xor, "xr")
#undef __ATOMIC_OPS
#define __ATOMIC64_OP(op_name, op_string) \
-static inline long op_name(long val, long *ptr) \
+static __always_inline long op_name(long val, long *ptr) \
{ \
long old, new; \
\
@@ -154,7 +154,7 @@ __ATOMIC64_OPS(__atomic64_xor, "xgr")
#endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */
-static inline int __atomic_cmpxchg(int *ptr, int old, int new)
+static __always_inline int __atomic_cmpxchg(int *ptr, int old, int new)
{
asm volatile(
" cs %[old],%[new],%[ptr]"
@@ -164,7 +164,7 @@ static inline int __atomic_cmpxchg(int *ptr, int old, int new)
return old;
}
-static inline bool __atomic_cmpxchg_bool(int *ptr, int old, int new)
+static __always_inline bool __atomic_cmpxchg_bool(int *ptr, int old, int new)
{
int old_expected = old;
@@ -176,7 +176,7 @@ static inline bool __atomic_cmpxchg_bool(int *ptr, int old, int new)
return old == old_expected;
}
-static inline long __atomic64_cmpxchg(long *ptr, long old, long new)
+static __always_inline long __atomic64_cmpxchg(long *ptr, long old, long new)
{
asm volatile(
" csg %[old],%[new],%[ptr]"
@@ -186,7 +186,7 @@ static inline long __atomic64_cmpxchg(long *ptr, long old, long new)
return old;
}
-static inline bool __atomic64_cmpxchg_bool(long *ptr, long old, long new)
+static __always_inline bool __atomic64_cmpxchg_bool(long *ptr, long old, long new)
{
long old_expected = old;
diff --git a/arch/s390/include/asm/dwarf.h b/arch/s390/include/asm/dwarf.h
index 4f21ae561e4ddc..390906b8e386e6 100644
--- a/arch/s390/include/asm/dwarf.h
+++ b/arch/s390/include/asm/dwarf.h
@@ -9,6 +9,7 @@
#define CFI_DEF_CFA_OFFSET .cfi_def_cfa_offset
#define CFI_ADJUST_CFA_OFFSET .cfi_adjust_cfa_offset
#define CFI_RESTORE .cfi_restore
+#define CFI_REL_OFFSET .cfi_rel_offset
#ifdef CONFIG_AS_CFI_VAL_OFFSET
#define CFI_VAL_OFFSET .cfi_val_offset
diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h
index deb198a610395b..ce5f4fe8be4dce 100644
--- a/arch/s390/include/asm/hugetlb.h
+++ b/arch/s390/include/asm/hugetlb.h
@@ -39,11 +39,11 @@ static inline int prepare_hugepage_range(struct file *file,
return 0;
}
-static inline void arch_clear_hugepage_flags(struct page *page)
+static inline void arch_clear_hugetlb_flags(struct folio *folio)
{
- clear_bit(PG_arch_1, &page->flags);
+ clear_bit(PG_arch_1, &folio->flags);
}
-#define arch_clear_hugepage_flags arch_clear_hugepage_flags
+#define arch_clear_hugetlb_flags arch_clear_hugetlb_flags
static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, unsigned long sz)
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 60950e7a25f585..2cb2a2e7b34b83 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -1405,6 +1405,7 @@ static inline unsigned long pud_deref(pud_t pud)
return (unsigned long)__va(pud_val(pud) & origin_mask);
}
+#define pud_pfn pud_pfn
static inline unsigned long pud_pfn(pud_t pud)
{
return __pa(pud_deref(pud)) >> PAGE_SHIFT;
diff --git a/arch/s390/include/asm/preempt.h b/arch/s390/include/asm/preempt.h
index bf15da0fedbca5..0e3da500e98c19 100644
--- a/arch/s390/include/asm/preempt.h
+++ b/arch/s390/include/asm/preempt.h
@@ -12,12 +12,12 @@
#define PREEMPT_NEED_RESCHED 0x80000000
#define PREEMPT_ENABLED (0 + PREEMPT_NEED_RESCHED)
-static inline int preempt_count(void)
+static __always_inline int preempt_count(void)
{
return READ_ONCE(S390_lowcore.preempt_count) & ~PREEMPT_NEED_RESCHED;
}
-static inline void preempt_count_set(int pc)
+static __always_inline void preempt_count_set(int pc)
{
int old, new;
@@ -29,22 +29,22 @@ static inline void preempt_count_set(int pc)
old, new) != old);
}
-static inline void set_preempt_need_resched(void)
+static __always_inline void set_preempt_need_resched(void)
{
__atomic_and(~PREEMPT_NEED_RESCHED, &S390_lowcore.preempt_count);
}
-static inline void clear_preempt_need_resched(void)
+static __always_inline void clear_preempt_need_resched(void)
{
__atomic_or(PREEMPT_NEED_RESCHED, &S390_lowcore.preempt_count);
}
-static inline bool test_preempt_need_resched(void)
+static __always_inline bool test_preempt_need_resched(void)
{
return !(READ_ONCE(S390_lowcore.preempt_count) & PREEMPT_NEED_RESCHED);
}
-static inline void __preempt_count_add(int val)
+static __always_inline void __preempt_count_add(int val)
{
/*
* With some obscure config options and CONFIG_PROFILE_ALL_BRANCHES
@@ -59,17 +59,17 @@ static inline void __preempt_count_add(int val)
__atomic_add(val, &S390_lowcore.preempt_count);
}
-static inline void __preempt_count_sub(int val)
+static __always_inline void __preempt_count_sub(int val)
{
__preempt_count_add(-val);
}
-static inline bool __preempt_count_dec_and_test(void)
+static __always_inline bool __preempt_count_dec_and_test(void)
{
return __atomic_add(-1, &S390_lowcore.preempt_count) == 1;
}
-static inline bool should_resched(int preempt_offset)
+static __always_inline bool should_resched(int preempt_offset)
{
return unlikely(READ_ONCE(S390_lowcore.preempt_count) ==
preempt_offset);
@@ -79,45 +79,45 @@ static inline bool should_resched(int preempt_offset)
#define PREEMPT_ENABLED (0)
-static inline int preempt_count(void)
+static __always_inline int preempt_count(void)
{
return READ_ONCE(S390_lowcore.preempt_count);
}
-static inline void preempt_count_set(int pc)
+static __always_inline void preempt_count_set(int pc)
{
S390_lowcore.preempt_count = pc;
}
-static inline void set_preempt_need_resched(void)
+static __always_inline void set_preempt_need_resched(void)
{
}
-static inline void clear_preempt_need_resched(void)
+static __always_inline void clear_preempt_need_resched(void)
{
}
-static inline bool test_preempt_need_resched(void)
+static __always_inline bool test_preempt_need_resched(void)
{
return false;
}
-static inline void __preempt_count_add(int val)
+static __always_inline void __preempt_count_add(int val)
{
S390_lowcore.preempt_count += val;
}
-static inline void __preempt_count_sub(int val)
+static __always_inline void __preempt_count_sub(int val)
{
S390_lowcore.preempt_count -= val;
}
-static inline bool __preempt_count_dec_and_test(void)
+static __always_inline bool __preempt_count_dec_and_test(void)
{
return !--S390_lowcore.preempt_count && tif_need_resched();
}
-static inline bool should_resched(int preempt_offset)
+static __always_inline bool should_resched(int preempt_offset)
{
return unlikely(preempt_count() == preempt_offset &&
tif_need_resched());
diff --git a/arch/s390/kernel/cert_store.c b/arch/s390/kernel/cert_store.c
index 554447768bddc7..bf983513dd333b 100644
--- a/arch/s390/kernel/cert_store.c
+++ b/arch/s390/kernel/cert_store.c
@@ -21,6 +21,7 @@
#include <linux/seq_file.h>
#include <linux/slab.h>
#include <linux/sysfs.h>
+#include <linux/vmalloc.h>
#include <crypto/sha2.h>
#include <keys/user-type.h>
#include <asm/debug.h>
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 787394978bc0f8..6a1e0fbbaa15b3 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -340,7 +340,8 @@ SYM_CODE_START(pgm_check_handler)
mvc __PT_LAST_BREAK(8,%r11),__LC_PGM_LAST_BREAK
stctg %c1,%c1,__PT_CR1(%r11)
#if IS_ENABLED(CONFIG_KVM)
- lg %r12,__LC_GMAP
+ ltg %r12,__LC_GMAP
+ jz 5f
clc __GMAP_ASCE(8,%r12), __PT_CR1(%r11)
jne 5f
BPENTER __SF_SIE_FLAGS(%r10),_TIF_ISOLATE_BP_GUEST
@@ -635,6 +636,7 @@ SYM_DATA_START_LOCAL(daton_psw)
SYM_DATA_END(daton_psw)
.section .rodata, "a"
+ .balign 8
#define SYSCALL(esame,emu) .quad __s390x_ ## esame
SYM_DATA_START(sys_call_table)
#include "asm/syscall_table.h"
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index 1486350a417751..5d6d381aa0be4a 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -20,6 +20,7 @@
#include <linux/gfp.h>
#include <linux/crash_dump.h>
#include <linux/debug_locks.h>
+#include <linux/vmalloc.h>
#include <asm/asm-extable.h>
#include <asm/diag.h>
#include <asm/ipl.h>
diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c
index 823d652e3917f8..4ad472d130a3c0 100644
--- a/arch/s390/kernel/perf_pai_crypto.c
+++ b/arch/s390/kernel/perf_pai_crypto.c
@@ -90,7 +90,6 @@ static void paicrypt_event_destroy(struct perf_event *event)
event->cpu);
struct paicrypt_map *cpump = mp->mapptr;
- cpump->event = NULL;
static_branch_dec(&pai_key);
mutex_lock(&pai_reserve_mutex);
debug_sprintf_event(cfm_dbg, 5, "%s event %#llx cpu %d users %d"
@@ -356,10 +355,15 @@ static int paicrypt_add(struct perf_event *event, int flags)
static void paicrypt_stop(struct perf_event *event, int flags)
{
- if (!event->attr.sample_period) /* Counting */
+ struct paicrypt_mapptr *mp = this_cpu_ptr(paicrypt_root.mapptr);
+ struct paicrypt_map *cpump = mp->mapptr;
+
+ if (!event->attr.sample_period) { /* Counting */
paicrypt_read(event);
- else /* Sampling */
+ } else { /* Sampling */
perf_sched_cb_dec(event->pmu);
+ cpump->event = NULL;
+ }
event->hw.state = PERF_HES_STOPPED;
}
diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c
index 616a25606cd63d..a6da7e0cc7a66d 100644
--- a/arch/s390/kernel/perf_pai_ext.c
+++ b/arch/s390/kernel/perf_pai_ext.c
@@ -122,7 +122,6 @@ static void paiext_event_destroy(struct perf_event *event)
free_page(PAI_SAVE_AREA(event));
mutex_lock(&paiext_reserve_mutex);
- cpump->event = NULL;
if (refcount_dec_and_test(&cpump->refcnt)) /* Last reference gone */
paiext_free(mp);
paiext_root_free();
@@ -362,10 +361,15 @@ static int paiext_add(struct perf_event *event, int flags)
static void paiext_stop(struct perf_event *event, int flags)
{
- if (!event->attr.sample_period) /* Counting */
+ struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
+ struct paiext_map *cpump = mp->mapptr;
+
+ if (!event->attr.sample_period) { /* Counting */
paiext_read(event);
- else /* Sampling */
+ } else { /* Sampling */
perf_sched_cb_dec(event->pmu);
+ cpump->event = NULL;
+ }
event->hw.state = PERF_HES_STOPPED;
}
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index 095bb86339a7d3..bd0fee24ad10a3 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -464,3 +464,4 @@
459 common lsm_get_self_attr sys_lsm_get_self_attr sys_lsm_get_self_attr
460 common lsm_set_self_attr sys_lsm_set_self_attr sys_lsm_set_self_attr
461 common lsm_list_modules sys_lsm_list_modules sys_lsm_list_modules
+462 common mseal sys_mseal sys_mseal
diff --git a/arch/s390/kernel/vdso64/vdso_user_wrapper.S b/arch/s390/kernel/vdso64/vdso_user_wrapper.S
index 57f62596e53b95..85247ef5a41b89 100644
--- a/arch/s390/kernel/vdso64/vdso_user_wrapper.S
+++ b/arch/s390/kernel/vdso64/vdso_user_wrapper.S
@@ -24,8 +24,10 @@ __kernel_\func:
CFI_DEF_CFA_OFFSET (STACK_FRAME_OVERHEAD + WRAPPER_FRAME_SIZE)
CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
stg %r14,STACK_FRAME_OVERHEAD(%r15)
+ CFI_REL_OFFSET 14, STACK_FRAME_OVERHEAD
brasl %r14,__s390_vdso_\func
lg %r14,STACK_FRAME_OVERHEAD(%r15)
+ CFI_RESTORE 14
aghi %r15,WRAPPER_FRAME_SIZE
CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD
CFI_RESTORE 15
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index c421dd44ffbe03..65747f15dbec4b 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -75,7 +75,7 @@ static enum fault_type get_fault_type(struct pt_regs *regs)
if (!IS_ENABLED(CONFIG_PGSTE))
return KERNEL_FAULT;
gmap = (struct gmap *)S390_lowcore.gmap;
- if (regs->cr1 == gmap->asce)
+ if (gmap && gmap->asce == regs->cr1)
return GMAP_FAULT;
return KERNEL_FAULT;
}
@@ -325,7 +325,8 @@ static void do_exception(struct pt_regs *regs, int access)
goto lock_mmap;
if (!(vma->vm_flags & access)) {
vma_end_read(vma);
- goto lock_mmap;
+ count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
+ return handle_fault_error_nolock(regs, SEGV_ACCERR);
}
fault = handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, regs);
if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED)))
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index 094b43b121cd5d..12d22a7fa32fd2 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -2661,7 +2661,7 @@ static int __s390_enable_skey_hugetlb(pte_t *pte, unsigned long addr,
return 0;
start = pmd_val(*pmd) & HPAGE_MASK;
- end = start + HPAGE_SIZE - 1;
+ end = start + HPAGE_SIZE;
__storage_key_init_range(start, end);
set_bit(PG_arch_1, &page->flags);
cond_resched();
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
index c2e8242bd15dd0..2675aab4acc700 100644
--- a/arch/s390/mm/hugetlbpage.c
+++ b/arch/s390/mm/hugetlbpage.c
@@ -139,7 +139,7 @@ static void clear_huge_pte_skeys(struct mm_struct *mm, unsigned long rste)
}
if (!test_and_set_bit(PG_arch_1, &page->flags))
- __storage_key_init_range(paddr, paddr + size - 1);
+ __storage_key_init_range(paddr, paddr + size);
}
void __set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
@@ -233,16 +233,6 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
return (pte_t *) pmdp;
}
-int pmd_huge(pmd_t pmd)
-{
- return pmd_leaf(pmd);
-}
-
-int pud_huge(pud_t pud)
-{
- return pud_leaf(pud);
-}
-
bool __init arch_hugetlb_valid_size(unsigned long size)
{
if (MACHINE_HAS_EDAT1 && size == PMD_SIZE)
@@ -258,14 +248,12 @@ static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file,
unsigned long pgoff, unsigned long flags)
{
struct hstate *h = hstate_file(file);
- struct vm_unmapped_area_info info;
+ struct vm_unmapped_area_info info = {};
- info.flags = 0;
info.length = len;
info.low_limit = current->mm->mmap_base;
info.high_limit = TASK_SIZE;
info.align_mask = PAGE_MASK & ~huge_page_mask(h);
- info.align_offset = 0;
return vm_unmapped_area(&info);
}
@@ -274,7 +262,7 @@ static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
unsigned long pgoff, unsigned long flags)
{
struct hstate *h = hstate_file(file);
- struct vm_unmapped_area_info info;
+ struct vm_unmapped_area_info info = {};
unsigned long addr;
info.flags = VM_UNMAPPED_AREA_TOPDOWN;
@@ -282,7 +270,6 @@ static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
info.low_limit = PAGE_SIZE;
info.high_limit = current->mm->mmap_base;
info.align_mask = PAGE_MASK & ~huge_page_mask(h);
- info.align_offset = 0;
addr = vm_unmapped_area(&info);
/*
@@ -328,7 +315,7 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
goto check_asce_limit;
}
- if (mm->get_unmapped_area == arch_get_unmapped_area)
+ if (!test_bit(MMF_TOPDOWN, &mm->flags))
addr = hugetlb_get_unmapped_area_bottomup(file, addr, len,
pgoff, flags);
else
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index b14fc0887654d2..2067569465897b 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -86,7 +86,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
- struct vm_unmapped_area_info info;
+ struct vm_unmapped_area_info info = {};
if (len > TASK_SIZE - mmap_min_addr)
return -ENOMEM;
@@ -102,7 +102,6 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
goto check_asce_limit;
}
- info.flags = 0;
info.length = len;
info.low_limit = mm->mmap_base;
info.high_limit = TASK_SIZE;
@@ -122,7 +121,7 @@ unsigned long arch_get_unmapped_area_topdown(struct file *filp, unsigned long ad
{
struct vm_area_struct *vma;
struct mm_struct *mm = current->mm;
- struct vm_unmapped_area_info info;
+ struct vm_unmapped_area_info info = {};
/* requested length too big for entire address space */
if (len > TASK_SIZE - mmap_min_addr)
@@ -185,10 +184,10 @@ void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
*/
if (mmap_is_legacy(rlim_stack)) {
mm->mmap_base = mmap_base_legacy(random_factor);
- mm->get_unmapped_area = arch_get_unmapped_area;
+ clear_bit(MMF_TOPDOWN, &mm->flags);
} else {
mm->mmap_base = mmap_base(random_factor, rlim_stack);
- mm->get_unmapped_area = arch_get_unmapped_area_topdown;
+ set_bit(MMF_TOPDOWN, &mm->flags);
}
}
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index b418333bb08635..5af0402e94b88c 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -516,11 +516,12 @@ static void bpf_skip(struct bpf_jit *jit, int size)
* PLT for hotpatchable calls. The calling convention is the same as for the
* ftrace hotpatch trampolines: %r0 is return address, %r1 is clobbered.
*/
-extern const char bpf_plt[];
-extern const char bpf_plt_ret[];
-extern const char bpf_plt_target[];
-extern const char bpf_plt_end[];
-#define BPF_PLT_SIZE 32
+struct bpf_plt {
+ char code[16];
+ void *ret;
+ void *target;
+} __packed;
+extern const struct bpf_plt bpf_plt;
asm(
".pushsection .rodata\n"
" .balign 8\n"
@@ -531,15 +532,14 @@ asm(
" .balign 8\n"
"bpf_plt_ret: .quad 0\n"
"bpf_plt_target: .quad 0\n"
- "bpf_plt_end:\n"
" .popsection\n"
);
-static void bpf_jit_plt(void *plt, void *ret, void *target)
+static void bpf_jit_plt(struct bpf_plt *plt, void *ret, void *target)
{
- memcpy(plt, bpf_plt, BPF_PLT_SIZE);
- *(void **)((char *)plt + (bpf_plt_ret - bpf_plt)) = ret;
- *(void **)((char *)plt + (bpf_plt_target - bpf_plt)) = target ?: ret;
+ memcpy(plt, &bpf_plt, sizeof(*plt));
+ plt->ret = ret;
+ plt->target = target;
}
/*
@@ -662,9 +662,9 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth)
jit->prg = ALIGN(jit->prg, 8);
jit->prologue_plt = jit->prg;
if (jit->prg_buf)
- bpf_jit_plt(jit->prg_buf + jit->prg,
+ bpf_jit_plt((struct bpf_plt *)(jit->prg_buf + jit->prg),
jit->prg_buf + jit->prologue_plt_ret, NULL);
- jit->prg += BPF_PLT_SIZE;
+ jit->prg += sizeof(struct bpf_plt);
}
static int get_probe_mem_regno(const u8 *insn)
@@ -2040,9 +2040,6 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
struct bpf_jit jit;
int pass;
- if (WARN_ON_ONCE(bpf_plt_end - bpf_plt != BPF_PLT_SIZE))
- return orig_fp;
-
if (!fp->jit_requested)
return orig_fp;
@@ -2148,14 +2145,11 @@ bool bpf_jit_supports_far_kfunc_call(void)
int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
void *old_addr, void *new_addr)
{
+ struct bpf_plt expected_plt, current_plt, new_plt, *plt;
struct {
u16 opc;
s32 disp;
} __packed insn;
- char expected_plt[BPF_PLT_SIZE];
- char current_plt[BPF_PLT_SIZE];
- char new_plt[BPF_PLT_SIZE];
- char *plt;
char *ret;
int err;
@@ -2174,18 +2168,18 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
*/
} else {
/* Verify the PLT. */
- plt = (char *)ip + (insn.disp << 1);
- err = copy_from_kernel_nofault(current_plt, plt, BPF_PLT_SIZE);
+ plt = ip + (insn.disp << 1);
+ err = copy_from_kernel_nofault(&current_plt, plt,
+ sizeof(current_plt));
if (err < 0)
return err;
ret = (char *)ip + 6;
- bpf_jit_plt(expected_plt, ret, old_addr);
- if (memcmp(current_plt, expected_plt, BPF_PLT_SIZE))
+ bpf_jit_plt(&expected_plt, ret, old_addr);
+ if (memcmp(&current_plt, &expected_plt, sizeof(current_plt)))
return -EINVAL;
/* Adjust the call address. */
- bpf_jit_plt(new_plt, ret, new_addr);
- s390_kernel_write(plt + (bpf_plt_target - bpf_plt),
- new_plt + (bpf_plt_target - bpf_plt),
+ bpf_jit_plt(&new_plt, ret, new_addr);
+ s390_kernel_write(&plt->target, &new_plt.target,
sizeof(void *));
}
diff --git a/arch/s390/pci/pci_mmio.c b/arch/s390/pci/pci_mmio.c
index a90499c087f0c5..5398729bfe1b77 100644
--- a/arch/s390/pci/pci_mmio.c
+++ b/arch/s390/pci/pci_mmio.c
@@ -169,7 +169,7 @@ SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr,
if (!(vma->vm_flags & VM_WRITE))
goto out_unlock_mmap;
- ret = follow_pte(vma->vm_mm, mmio_addr, &ptep, &ptl);
+ ret = follow_pte(vma, mmio_addr, &ptep, &ptl);
if (ret)
goto out_unlock_mmap;
@@ -308,7 +308,7 @@ SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr,
if (!(vma->vm_flags & VM_WRITE))
goto out_unlock_mmap;
- ret = follow_pte(vma->vm_mm, mmio_addr, &ptep, &ptl);
+ ret = follow_pte(vma, mmio_addr, &ptep, &ptl);
if (ret)
goto out_unlock_mmap;
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 2ad3e29f0ebec4..94895a387ed523 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -38,7 +38,7 @@ config SUPERH
select HAVE_DEBUG_BUGVERBOSE
select HAVE_DEBUG_KMEMLEAK
select HAVE_DYNAMIC_FTRACE
- select HAVE_FAST_GUP if MMU
+ select HAVE_GUP_FAST if MMU
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FUNCTION_TRACER
select HAVE_FTRACE_MCOUNT_RECORD
@@ -709,7 +709,6 @@ config ROMIMAGE_MMCIF
choice
prompt "Kernel command line"
- optional
default CMDLINE_OVERWRITE
help
Setting this option allows the kernel command line arguments
@@ -727,6 +726,11 @@ config CMDLINE_EXTEND
Given string will be concatenated with arguments passed in
by a bootloader.
+config CMDLINE_FROM_BOOTLOADER
+ bool "Use bootloader kernel arguments"
+ help
+ Uses the command-line options passed by the boot loader.
+
endchoice
config CMDLINE
diff --git a/arch/sh/configs/apsh4a3a_defconfig b/arch/sh/configs/apsh4a3a_defconfig
index cc909f34787769..9c2644443c4d52 100644
--- a/arch/sh/configs/apsh4a3a_defconfig
+++ b/arch/sh/configs/apsh4a3a_defconfig
@@ -15,6 +15,7 @@ CONFIG_MEMORY_START=0x0C000000
CONFIG_FLATMEM_MANUAL=y
CONFIG_SH_STORE_QUEUES=y
CONFIG_SH_APSH4A3A=y
+CONFIG_CMDLINE_FROM_BOOTLOADER=y
CONFIG_HIGH_RES_TIMERS=y
CONFIG_KEXEC=y
CONFIG_PREEMPT=y
diff --git a/arch/sh/configs/apsh4ad0a_defconfig b/arch/sh/configs/apsh4ad0a_defconfig
index 64558bf60e106a..05d21d91f41d6b 100644
--- a/arch/sh/configs/apsh4ad0a_defconfig
+++ b/arch/sh/configs/apsh4ad0a_defconfig
@@ -42,6 +42,7 @@ CONFIG_SECCOMP=y
CONFIG_PREEMPT=y
# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
CONFIG_BINFMT_MISC=y
+CONFIG_CMDLINE_FROM_BOOTLOADER=y
CONFIG_PM=y
CONFIG_PM_DEBUG=y
CONFIG_PM=y
diff --git a/arch/sh/configs/edosk7705_defconfig b/arch/sh/configs/edosk7705_defconfig
index 9ee35269bee260..57c79da1ff8ed3 100644
--- a/arch/sh/configs/edosk7705_defconfig
+++ b/arch/sh/configs/edosk7705_defconfig
@@ -19,6 +19,7 @@
CONFIG_CPU_SUBTYPE_SH7705=y
CONFIG_SH_EDOSK7705=y
CONFIG_SH_PCLK_FREQ=31250000
+CONFIG_CMDLINE_FROM_BOOTLOADER=y
# CONFIG_PREVENT_FIRMWARE_BUILD is not set
# CONFIG_INPUT is not set
# CONFIG_SERIO is not set
diff --git a/arch/sh/configs/hp6xx_defconfig b/arch/sh/configs/hp6xx_defconfig
index 0c45f2a0f9bd93..77e3185f63e47a 100644
--- a/arch/sh/configs/hp6xx_defconfig
+++ b/arch/sh/configs/hp6xx_defconfig
@@ -15,6 +15,7 @@ CONFIG_SH_DMA_API=y
CONFIG_HD64461_ENABLER=y
CONFIG_PCCARD=y
CONFIG_PM=y
+CONFIG_CMDLINE_FROM_BOOTLOADER=y
CONFIG_APM_EMULATION=y
# CONFIG_STANDALONE is not set
CONFIG_BLK_DEV_SD=y
diff --git a/arch/sh/configs/landisk_defconfig b/arch/sh/configs/landisk_defconfig
index 54108209091874..0311380160f42b 100644
--- a/arch/sh/configs/landisk_defconfig
+++ b/arch/sh/configs/landisk_defconfig
@@ -15,6 +15,7 @@ CONFIG_KEXEC=y
CONFIG_PCI=y
CONFIG_PCCARD=y
CONFIG_YENTA=y
+CONFIG_CMDLINE_FROM_BOOTLOADER=y
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
diff --git a/arch/sh/configs/magicpanelr2_defconfig b/arch/sh/configs/magicpanelr2_defconfig
index 52937f9cc2ab31..8d443749550ec0 100644
--- a/arch/sh/configs/magicpanelr2_defconfig
+++ b/arch/sh/configs/magicpanelr2_defconfig
@@ -22,6 +22,7 @@ CONFIG_SH_PCLK_FREQ=24000000
CONFIG_SH_DMA=y
CONFIG_SH_DMA_API=y
CONFIG_HEARTBEAT=y
+CONFIG_CMDLINE_FROM_BOOTLOADER=y
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
diff --git a/arch/sh/configs/rsk7264_defconfig b/arch/sh/configs/rsk7264_defconfig
index a88cb3b7795778..e4ef259425c42a 100644
--- a/arch/sh/configs/rsk7264_defconfig
+++ b/arch/sh/configs/rsk7264_defconfig
@@ -21,6 +21,7 @@ CONFIG_MEMORY_START=0x0c000000
CONFIG_FLATMEM_MANUAL=y
CONFIG_CPU_BIG_ENDIAN=y
CONFIG_SH_RSK=y
+CONFIG_CMDLINE_FROM_BOOTLOADER=y
# CONFIG_SH_TIMER_MTU2 is not set
CONFIG_BINFMT_FLAT=y
CONFIG_NET=y
diff --git a/arch/sh/configs/rsk7269_defconfig b/arch/sh/configs/rsk7269_defconfig
index d9a7ce783c9bc2..e0d1560b2bfd73 100644
--- a/arch/sh/configs/rsk7269_defconfig
+++ b/arch/sh/configs/rsk7269_defconfig
@@ -10,6 +10,7 @@ CONFIG_MEMORY_SIZE=0x02000000
CONFIG_FLATMEM_MANUAL=y
CONFIG_CPU_BIG_ENDIAN=y
CONFIG_SH_RSK=y
+CONFIG_CMDLINE_FROM_BOOTLOADER=y
# CONFIG_SH_TIMER_MTU2 is not set
CONFIG_SH_PCLK_FREQ=66700000
CONFIG_BINFMT_FLAT=y
diff --git a/arch/sh/configs/se7619_defconfig b/arch/sh/configs/se7619_defconfig
index 14d0f5ead502f4..6b25e9713e7774 100644
--- a/arch/sh/configs/se7619_defconfig
+++ b/arch/sh/configs/se7619_defconfig
@@ -14,6 +14,7 @@ CONFIG_FLATMEM_MANUAL=y
CONFIG_CPU_BIG_ENDIAN=y
CONFIG_SH_7619_SOLUTION_ENGINE=y
CONFIG_HZ_100=y
+CONFIG_CMDLINE_FROM_BOOTLOADER=y
CONFIG_BINFMT_FLAT=y
CONFIG_BINFMT_ZFLAT=y
# CONFIG_STANDALONE is not set
diff --git a/arch/sh/configs/se7705_defconfig b/arch/sh/configs/se7705_defconfig
index 16a0f72f082260..1752ddc2694a43 100644
--- a/arch/sh/configs/se7705_defconfig
+++ b/arch/sh/configs/se7705_defconfig
@@ -13,6 +13,7 @@ CONFIG_FLATMEM_MANUAL=y
# CONFIG_SH_ADC is not set
CONFIG_SH_SOLUTION_ENGINE=y
CONFIG_HEARTBEAT=y
+CONFIG_CMDLINE_FROM_BOOTLOADER=y
CONFIG_PREEMPT=y
CONFIG_NET=y
CONFIG_PACKET=y
diff --git a/arch/sh/configs/se7722_defconfig b/arch/sh/configs/se7722_defconfig
index 09e45581744743..5327a2f7098054 100644
--- a/arch/sh/configs/se7722_defconfig
+++ b/arch/sh/configs/se7722_defconfig
@@ -17,6 +17,7 @@ CONFIG_SH_7722_SOLUTION_ENGINE=y
CONFIG_NO_HZ=y
CONFIG_HIGH_RES_TIMERS=y
CONFIG_HEARTBEAT=y
+CONFIG_CMDLINE_FROM_BOOTLOADER=y
CONFIG_KEXEC=y
CONFIG_PREEMPT=y
CONFIG_NET=y
diff --git a/arch/sh/configs/se7750_defconfig b/arch/sh/configs/se7750_defconfig
index 5fa6239ae4eae4..a1e25d7de8a608 100644
--- a/arch/sh/configs/se7750_defconfig
+++ b/arch/sh/configs/se7750_defconfig
@@ -15,6 +15,7 @@ CONFIG_FLATMEM_MANUAL=y
CONFIG_SH_SOLUTION_ENGINE=y
CONFIG_SH_PCLK_FREQ=33333333
CONFIG_HEARTBEAT=y
+CONFIG_CMDLINE_FROM_BOOTLOADER=y
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
diff --git a/arch/sh/configs/secureedge5410_defconfig b/arch/sh/configs/secureedge5410_defconfig
index 120176afe3f6f5..2f77b60e9540b4 100644
--- a/arch/sh/configs/secureedge5410_defconfig
+++ b/arch/sh/configs/secureedge5410_defconfig
@@ -10,6 +10,7 @@ CONFIG_SH_SECUREEDGE5410=y
CONFIG_SH_DMA=y
CONFIG_SH_DMA_API=y
CONFIG_PCI=y
+CONFIG_CMDLINE_FROM_BOOTLOADER=y
CONFIG_NET=y
CONFIG_INET=y
# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
diff --git a/arch/sh/configs/sh7710voipgw_defconfig b/arch/sh/configs/sh7710voipgw_defconfig
index 7f742729df697d..99a5d07605326f 100644
--- a/arch/sh/configs/sh7710voipgw_defconfig
+++ b/arch/sh/configs/sh7710voipgw_defconfig
@@ -15,6 +15,7 @@ CONFIG_MEMORY_SIZE=0x00800000
CONFIG_FLATMEM_MANUAL=y
# CONFIG_SH_ADC is not set
CONFIG_SH_PCLK_FREQ=32768000
+CONFIG_CMDLINE_FROM_BOOTLOADER=y
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
diff --git a/arch/sh/configs/sh7724_generic_defconfig b/arch/sh/configs/sh7724_generic_defconfig
index cbc9389a89a8c0..5440bd0ca4ed5b 100644
--- a/arch/sh/configs/sh7724_generic_defconfig
+++ b/arch/sh/configs/sh7724_generic_defconfig
@@ -12,6 +12,7 @@ CONFIG_CPU_FREQ=y
CONFIG_SH_CPU_FREQ=y
CONFIG_KEXEC=y
CONFIG_KEXEC_JUMP=y
+CONFIG_CMDLINE_FROM_BOOTLOADER=y
CONFIG_HIBERNATION=y
CONFIG_CPU_IDLE=y
# CONFIG_PREVENT_FIRMWARE_BUILD is not set
diff --git a/arch/sh/configs/sh7770_generic_defconfig b/arch/sh/configs/sh7770_generic_defconfig
index ee2357deba0f1c..4338af8d02d036 100644
--- a/arch/sh/configs/sh7770_generic_defconfig
+++ b/arch/sh/configs/sh7770_generic_defconfig
@@ -14,6 +14,7 @@ CONFIG_SH_CPU_FREQ=y
CONFIG_KEXEC=y
CONFIG_KEXEC_JUMP=y
CONFIG_PM=y
+CONFIG_CMDLINE_FROM_BOOTLOADER=y
CONFIG_HIBERNATION=y
CONFIG_CPU_IDLE=y
# CONFIG_PREVENT_FIRMWARE_BUILD is not set
diff --git a/arch/sh/configs/sh7785lcr_32bit_defconfig b/arch/sh/configs/sh7785lcr_32bit_defconfig
index 59262f42abe6f8..44f9b2317f090f 100644
--- a/arch/sh/configs/sh7785lcr_32bit_defconfig
+++ b/arch/sh/configs/sh7785lcr_32bit_defconfig
@@ -32,6 +32,7 @@ CONFIG_PREEMPT=y
CONFIG_INTC_USERIMASK=y
CONFIG_PCI=y
CONFIG_PCI_DEBUG=y
+CONFIG_CMDLINE_FROM_BOOTLOADER=y
CONFIG_PM=y
CONFIG_CPU_IDLE=y
CONFIG_NET=y
diff --git a/arch/sh/configs/sh7785lcr_defconfig b/arch/sh/configs/sh7785lcr_defconfig
index 94381f8268fffc..aec74b0e700352 100644
--- a/arch/sh/configs/sh7785lcr_defconfig
+++ b/arch/sh/configs/sh7785lcr_defconfig
@@ -17,6 +17,7 @@ CONFIG_HEARTBEAT=y
CONFIG_KEXEC=y
CONFIG_PREEMPT=y
CONFIG_PCI=y
+CONFIG_CMDLINE_FROM_BOOTLOADER=y
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
diff --git a/arch/sh/configs/urquell_defconfig b/arch/sh/configs/urquell_defconfig
index 445bb451a5ec80..00ef62133b04de 100644
--- a/arch/sh/configs/urquell_defconfig
+++ b/arch/sh/configs/urquell_defconfig
@@ -34,6 +34,7 @@ CONFIG_PCIEPORTBUS=y
CONFIG_PCIEASPM_DEBUG=y
CONFIG_PCI_DEBUG=y
CONFIG_BINFMT_MISC=y
+CONFIG_CMDLINE_FROM_BOOTLOADER=y
CONFIG_PM=y
CONFIG_CPU_IDLE=y
CONFIG_NET=y
diff --git a/arch/sh/include/asm/hugetlb.h b/arch/sh/include/asm/hugetlb.h
index 4d3ba39e681c4f..75028bd568ba5e 100644
--- a/arch/sh/include/asm/hugetlb.h
+++ b/arch/sh/include/asm/hugetlb.h
@@ -27,11 +27,11 @@ static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma,
return *ptep;
}
-static inline void arch_clear_hugepage_flags(struct page *page)
+static inline void arch_clear_hugetlb_flags(struct folio *folio)
{
- clear_bit(PG_dcache_clean, &page->flags);
+ clear_bit(PG_dcache_clean, &folio->flags);
}
-#define arch_clear_hugepage_flags arch_clear_hugepage_flags
+#define arch_clear_hugetlb_flags arch_clear_hugetlb_flags
#include <asm-generic/hugetlb.h>
diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl
index 86fe269f02203b..bbf83a2db9868d 100644
--- a/arch/sh/kernel/syscalls/syscall.tbl
+++ b/arch/sh/kernel/syscalls/syscall.tbl
@@ -464,3 +464,4 @@
459 common lsm_get_self_attr sys_lsm_get_self_attr
460 common lsm_set_self_attr sys_lsm_set_self_attr
461 common lsm_list_modules sys_lsm_list_modules
+462 common mseal sys_mseal
diff --git a/arch/sh/mm/cache-sh4.c b/arch/sh/mm/cache-sh4.c
index 862046f26981b6..9a1e581cd192d1 100644
--- a/arch/sh/mm/cache-sh4.c
+++ b/arch/sh/mm/cache-sh4.c
@@ -241,13 +241,14 @@ static void sh4_flush_cache_page(void *args)
if ((vma->vm_mm == current->active_mm))
vaddr = NULL;
else {
+ struct folio *folio = page_folio(page);
/*
* Use kmap_coherent or kmap_atomic to do flushes for
* another ASID than the current one.
*/
map_coherent = (current_cpu_data.dcache.n_aliases &&
- test_bit(PG_dcache_clean, &page->flags) &&
- page_mapcount(page));
+ test_bit(PG_dcache_clean, folio_flags(folio, 0)) &&
+ page_mapped(page));
if (map_coherent)
vaddr = kmap_coherent(page, address);
else
diff --git a/arch/sh/mm/cache.c b/arch/sh/mm/cache.c
index 9bcaa5619eabd1..d8be352e14d20d 100644
--- a/arch/sh/mm/cache.c
+++ b/arch/sh/mm/cache.c
@@ -84,7 +84,7 @@ void copy_from_user_page(struct vm_area_struct *vma, struct page *page,
{
struct folio *folio = page_folio(page);
- if (boot_cpu_data.dcache.n_aliases && page_mapcount(page) &&
+ if (boot_cpu_data.dcache.n_aliases && folio_mapped(folio) &&
test_bit(PG_dcache_clean, &folio->flags)) {
void *vfrom = kmap_coherent(page, vaddr) + (vaddr & ~PAGE_MASK);
memcpy(dst, vfrom, len);
diff --git a/arch/sh/mm/hugetlbpage.c b/arch/sh/mm/hugetlbpage.c
index 6cb0ad73dbb9e3..ff209b55285ad2 100644
--- a/arch/sh/mm/hugetlbpage.c
+++ b/arch/sh/mm/hugetlbpage.c
@@ -70,13 +70,3 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
return pte;
}
-
-int pmd_huge(pmd_t pmd)
-{
- return 0;
-}
-
-int pud_huge(pud_t pud)
-{
- return 0;
-}
diff --git a/arch/sh/mm/mmap.c b/arch/sh/mm/mmap.c
index b82199878b4594..bee329d4149aa7 100644
--- a/arch/sh/mm/mmap.c
+++ b/arch/sh/mm/mmap.c
@@ -57,7 +57,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
int do_colour_align;
- struct vm_unmapped_area_info info;
+ struct vm_unmapped_area_info info = {};
if (flags & MAP_FIXED) {
/* We do not accept a shared mapping if it would violate
@@ -88,7 +88,6 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
return addr;
}
- info.flags = 0;
info.length = len;
info.low_limit = TASK_UNMAPPED_BASE;
info.high_limit = TASK_SIZE;
@@ -106,7 +105,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
struct mm_struct *mm = current->mm;
unsigned long addr = addr0;
int do_colour_align;
- struct vm_unmapped_area_info info;
+ struct vm_unmapped_area_info info = {};
if (flags & MAP_FIXED) {
/* We do not accept a shared mapping if it would violate
diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
index 4d1bafaba942d2..3fe429d73a65b0 100644
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h
@@ -875,6 +875,7 @@ static inline bool pud_leaf(pud_t pud)
return pte_val(pte) & _PAGE_PMD_HUGE;
}
+#define pud_pfn pud_pfn
static inline unsigned long pud_pfn(pud_t pud)
{
pte_t pte = __pte(pud_val(pud));
@@ -956,7 +957,7 @@ static inline void set_ptes(struct mm_struct *mm, unsigned long addr,
#ifdef DCACHE_ALIASING_POSSIBLE
#define __HAVE_ARCH_MOVE_PTE
-#define move_pte(pte, prot, old_addr, new_addr) \
+#define move_pte(pte, old_addr, new_addr) \
({ \
pte_t newpte = (pte); \
if (tlb_type != hypervisor && pte_present(pte)) { \
diff --git a/arch/sparc/kernel/sys_sparc_32.c b/arch/sparc/kernel/sys_sparc_32.c
index 082a551897ed8f..08a19727795ca4 100644
--- a/arch/sparc/kernel/sys_sparc_32.c
+++ b/arch/sparc/kernel/sys_sparc_32.c
@@ -41,7 +41,7 @@ SYSCALL_DEFINE0(getpagesize)
unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags)
{
- struct vm_unmapped_area_info info;
+ struct vm_unmapped_area_info info = {};
if (flags & MAP_FIXED) {
/* We do not accept a shared mapping if it would violate
@@ -59,7 +59,6 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsi
if (!addr)
addr = TASK_UNMAPPED_BASE;
- info.flags = 0;
info.length = len;
info.low_limit = addr;
info.high_limit = TASK_SIZE;
diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c
index 1e9a9e016237bf..d9c3b34ca74470 100644
--- a/arch/sparc/kernel/sys_sparc_64.c
+++ b/arch/sparc/kernel/sys_sparc_64.c
@@ -93,7 +93,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsi
struct vm_area_struct * vma;
unsigned long task_size = TASK_SIZE;
int do_color_align;
- struct vm_unmapped_area_info info;
+ struct vm_unmapped_area_info info = {};
if (flags & MAP_FIXED) {
/* We do not accept a shared mapping if it would violate
@@ -126,7 +126,6 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsi
return addr;
}
- info.flags = 0;
info.length = len;
info.low_limit = TASK_UNMAPPED_BASE;
info.high_limit = min(task_size, VA_EXCLUDE_START);
@@ -154,7 +153,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
unsigned long task_size = STACK_TOP32;
unsigned long addr = addr0;
int do_color_align;
- struct vm_unmapped_area_info info;
+ struct vm_unmapped_area_info info = {};
/* This should only ever run for 32-bit processes. */
BUG_ON(!test_thread_flag(TIF_32BIT));
@@ -218,14 +217,10 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
unsigned long get_fb_unmapped_area(struct file *filp, unsigned long orig_addr, unsigned long len, unsigned long pgoff, unsigned long flags)
{
unsigned long align_goal, addr = -ENOMEM;
- unsigned long (*get_area)(struct file *, unsigned long,
- unsigned long, unsigned long, unsigned long);
-
- get_area = current->mm->get_unmapped_area;
if (flags & MAP_FIXED) {
/* Ok, don't mess with it. */
- return get_area(NULL, orig_addr, len, pgoff, flags);
+ return mm_get_unmapped_area(current->mm, NULL, orig_addr, len, pgoff, flags);
}
flags &= ~MAP_SHARED;
@@ -238,7 +233,8 @@ unsigned long get_fb_unmapped_area(struct file *filp, unsigned long orig_addr, u
align_goal = (64UL * 1024);
do {
- addr = get_area(NULL, orig_addr, len + (align_goal - PAGE_SIZE), pgoff, flags);
+ addr = mm_get_unmapped_area(current->mm, NULL, orig_addr,
+ len + (align_goal - PAGE_SIZE), pgoff, flags);
if (!(addr & ~PAGE_MASK)) {
addr = (addr + (align_goal - 1UL)) & ~(align_goal - 1UL);
break;
@@ -256,7 +252,7 @@ unsigned long get_fb_unmapped_area(struct file *filp, unsigned long orig_addr, u
* be obtained.
*/
if (addr & ~PAGE_MASK)
- addr = get_area(NULL, orig_addr, len, pgoff, flags);
+ addr = mm_get_unmapped_area(current->mm, NULL, orig_addr, len, pgoff, flags);
return addr;
}
@@ -292,7 +288,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
gap == RLIM_INFINITY ||
sysctl_legacy_va_layout) {
mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
- mm->get_unmapped_area = arch_get_unmapped_area;
+ clear_bit(MMF_TOPDOWN, &mm->flags);
} else {
/* We know it's 32-bit */
unsigned long task_size = STACK_TOP32;
@@ -303,7 +299,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
gap = (task_size / 6 * 5);
mm->mmap_base = PAGE_ALIGN(task_size - gap - random_factor);
- mm->get_unmapped_area = arch_get_unmapped_area_topdown;
+ set_bit(MMF_TOPDOWN, &mm->flags);
}
}
diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl
index b23d59313589aa..ac6c281ccfe029 100644
--- a/arch/sparc/kernel/syscalls/syscall.tbl
+++ b/arch/sparc/kernel/syscalls/syscall.tbl
@@ -507,3 +507,4 @@
459 common lsm_get_self_attr sys_lsm_get_self_attr
460 common lsm_set_self_attr sys_lsm_set_self_attr
461 common lsm_list_modules sys_lsm_list_modules
+462 common mseal sys_mseal
diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c
index b432500c13a5d8..cc91ca7a1e182c 100644
--- a/arch/sparc/mm/hugetlbpage.c
+++ b/arch/sparc/mm/hugetlbpage.c
@@ -31,17 +31,15 @@ static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *filp,
{
struct hstate *h = hstate_file(filp);
unsigned long task_size = TASK_SIZE;
- struct vm_unmapped_area_info info;
+ struct vm_unmapped_area_info info = {};
if (test_thread_flag(TIF_32BIT))
task_size = STACK_TOP32;
- info.flags = 0;
info.length = len;
info.low_limit = TASK_UNMAPPED_BASE;
info.high_limit = min(task_size, VA_EXCLUDE_START);
info.align_mask = PAGE_MASK & ~huge_page_mask(h);
- info.align_offset = 0;
addr = vm_unmapped_area(&info);
if ((addr & ~PAGE_MASK) && task_size > VA_EXCLUDE_END) {
@@ -63,7 +61,7 @@ hugetlb_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
struct hstate *h = hstate_file(filp);
struct mm_struct *mm = current->mm;
unsigned long addr = addr0;
- struct vm_unmapped_area_info info;
+ struct vm_unmapped_area_info info = {};
/* This should only ever run for 32-bit processes. */
BUG_ON(!test_thread_flag(TIF_32BIT));
@@ -73,7 +71,6 @@ hugetlb_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
info.low_limit = PAGE_SIZE;
info.high_limit = mm->mmap_base;
info.align_mask = PAGE_MASK & ~huge_page_mask(h);
- info.align_offset = 0;
addr = vm_unmapped_area(&info);
/*
@@ -123,7 +120,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
(!vma || addr + len <= vm_start_gap(vma)))
return addr;
}
- if (mm->get_unmapped_area == arch_get_unmapped_area)
+ if (!test_bit(MMF_TOPDOWN, &mm->flags))
return hugetlb_get_unmapped_area_bottomup(file, addr, len,
pgoff, flags);
else
@@ -407,18 +404,6 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
return entry;
}
-int pmd_huge(pmd_t pmd)
-{
- return !pmd_none(pmd) &&
- (pmd_val(pmd) & (_PAGE_VALID|_PAGE_PMD_HUGE)) != _PAGE_VALID;
-}
-
-int pud_huge(pud_t pud)
-{
- return !pud_none(pud) &&
- (pud_val(pud) & (_PAGE_VALID|_PAGE_PUD_HUGE)) != _PAGE_VALID;
-}
-
static void hugetlb_free_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
unsigned long addr)
{
diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c
index b44d79d778c718..19642f7ffb52a5 100644
--- a/arch/sparc/mm/tlb.c
+++ b/arch/sparc/mm/tlb.c
@@ -183,12 +183,12 @@ static void __set_pmd_acct(struct mm_struct *mm, unsigned long addr,
* hugetlb_pte_count.
*/
if (pmd_val(pmd) & _PAGE_PMD_HUGE) {
- if (is_huge_zero_page(pmd_page(pmd)))
+ if (is_huge_zero_pmd(pmd))
mm->context.hugetlb_pte_count++;
else
mm->context.thp_pte_count++;
} else {
- if (is_huge_zero_page(pmd_page(orig)))
+ if (is_huge_zero_pmd(orig))
mm->context.hugetlb_pte_count--;
else
mm->context.thp_pte_count--;
@@ -259,7 +259,7 @@ pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
* Sanity check pmd before doing the actual decrement.
*/
if ((pmd_val(entry) & _PAGE_PMD_HUGE) &&
- !is_huge_zero_page(pmd_page(entry)))
+ !is_huge_zero_pmd(entry))
(vma->vm_mm)->context.thp_pte_count--;
return old;
diff --git a/arch/um/include/shared/um_malloc.h b/arch/um/include/shared/um_malloc.h
index 13da93284c2c7f..bf503658f08eb3 100644
--- a/arch/um/include/shared/um_malloc.h
+++ b/arch/um/include/shared/um_malloc.h
@@ -11,7 +11,8 @@
extern void *uml_kmalloc(int size, int flags);
extern void kfree(const void *ptr);
-extern void *vmalloc(unsigned long size);
+extern void *vmalloc_noprof(unsigned long size);
+#define vmalloc(...) vmalloc_noprof(__VA_ARGS__)
extern void vfree(void *ptr);
#endif /* __UM_MALLOC_H__ */
diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild
index 6a1f36df6a1817..cf0ad89f5639da 100644
--- a/arch/x86/Kbuild
+++ b/arch/x86/Kbuild
@@ -28,7 +28,7 @@ obj-y += net/
obj-$(CONFIG_KEXEC_FILE) += purgatory/
-obj-y += virt/svm/
+obj-y += virt/
# for cleaning
subdir- += boot tools
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 39886bab943a88..2e2a69647e5ec7 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -62,6 +62,7 @@ config X86
select ACPI_HOTPLUG_CPU if ACPI_PROCESSOR && HOTPLUG_CPU
select ARCH_32BIT_OFF_T if X86_32
select ARCH_CLOCKSOURCE_INIT
+ select ARCH_CONFIGURES_CPU_MITIGATIONS
select ARCH_CORRECT_STACKTRACE_ON_KRETPROBE
select ARCH_ENABLE_HUGEPAGE_MIGRATION if X86_64 && HUGETLB_PAGE && MIGRATION
select ARCH_ENABLE_MEMORY_HOTPLUG if X86_64
@@ -83,6 +84,7 @@ config X86
select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_HAS_KCOV if X86_64
+ select ARCH_HAS_KERNEL_FPU_SUPPORT
select ARCH_HAS_MEM_ENCRYPT
select ARCH_HAS_MEMBARRIER_SYNC_CORE
select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
@@ -221,7 +223,7 @@ config X86
select HAVE_EFFICIENT_UNALIGNED_ACCESS
select HAVE_EISA
select HAVE_EXIT_THREAD
- select HAVE_FAST_GUP
+ select HAVE_GUP_FAST
select HAVE_FENTRY if X86_64 || DYNAMIC_FTRACE
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_FUNCTION_GRAPH_RETVAL if HAVE_FUNCTION_GRAPH_TRACER
@@ -2439,6 +2441,8 @@ config USE_X86_SEG_SUPPORT
# with named address spaces - see GCC PR sanitizer/111736.
#
depends on !KASAN
+ # -fsanitize=thread (KCSAN) is also incompatible.
+ depends on !KCSAN
config CC_HAS_SLS
def_bool $(cc-option,-mharden-sls=all)
@@ -2486,17 +2490,21 @@ config PREFIX_SYMBOLS
def_bool y
depends on CALL_PADDING && !CFI_CLANG
-menuconfig SPECULATION_MITIGATIONS
- bool "Mitigations for speculative execution vulnerabilities"
+menuconfig CPU_MITIGATIONS
+ bool "Mitigations for CPU vulnerabilities"
default y
help
- Say Y here to enable options which enable mitigations for
- speculative execution hardware vulnerabilities.
+ Say Y here to enable options which enable mitigations for hardware
+ vulnerabilities (usually related to speculative execution).
+ Mitigations can be disabled or restricted to SMT systems at runtime
+ via the "mitigations" kernel parameter.
- If you say N, all mitigations will be disabled. You really
- should know what you are doing to say so.
+ If you say N, all mitigations will be disabled. This CANNOT be
+ overridden at runtime.
-if SPECULATION_MITIGATIONS
+ Say 'Y', unless you really know what you are doing.
+
+if CPU_MITIGATIONS
config MITIGATION_PAGE_TABLE_ISOLATION
bool "Remove the kernel mapping in user mode"
@@ -2631,6 +2639,16 @@ config MITIGATION_RFDS
stored in floating point, vector and integer registers.
See also <file:Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst>
+config MITIGATION_SPECTRE_BHI
+ bool "Mitigate Spectre-BHB (Branch History Injection)"
+ depends on CPU_SUP_INTEL
+ default y
+ help
+ Enable BHI mitigations. BHI attacks are a form of Spectre V2 attacks
+ where the branch history buffer is poisoned to speculatively steer
+ indirect branches.
+ See <file:Documentation/admin-guide/hw-vuln/spectre.rst>
+
endif
config ARCH_HAS_ADD_PAGES
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 662d9d4033e6b8..164498b3382ae2 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -74,6 +74,26 @@ KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx
KBUILD_RUSTFLAGS += --target=$(objtree)/scripts/target.json
KBUILD_RUSTFLAGS += -Ctarget-feature=-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-avx,-avx2
+#
+# CFLAGS for compiling floating point code inside the kernel.
+#
+CC_FLAGS_FPU := -msse -msse2
+ifdef CONFIG_CC_IS_GCC
+# Stack alignment mismatch, proceed with caution.
+# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
+# (8B stack alignment).
+# See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53383
+#
+# The "-msse" in the first argument is there so that the
+# -mpreferred-stack-boundary=3 build error:
+#
+# -mpreferred-stack-boundary=3 is not between 4 and 12
+#
+# can be triggered. Otherwise gcc doesn't complain.
+CC_FLAGS_FPU += -mhard-float
+CC_FLAGS_FPU += $(call cc-option,-msse -mpreferred-stack-boundary=3,-mpreferred-stack-boundary=4)
+endif
+
ifeq ($(CONFIG_X86_KERNEL_IBT),y)
#
# Kernel IBT has S_CET.NOTRACK_EN=0, as such the compilers must not generate
@@ -251,8 +271,6 @@ archheaders:
libs-y += arch/x86/lib/
-core-y += arch/x86/virt/
-
# drivers-y are linked after core-y
drivers-$(CONFIG_MATH_EMULATION) += arch/x86/math-emu/
drivers-$(CONFIG_PCI) += arch/x86/pci/
diff --git a/arch/x86/boot/compressed/efi_mixed.S b/arch/x86/boot/compressed/efi_mixed.S
index 719e939050cbfa..876fc6d46a1318 100644
--- a/arch/x86/boot/compressed/efi_mixed.S
+++ b/arch/x86/boot/compressed/efi_mixed.S
@@ -15,10 +15,12 @@
*/
#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
#include <asm/msr.h>
#include <asm/page_types.h>
#include <asm/processor-flags.h>
#include <asm/segment.h>
+#include <asm/setup.h>
.code64
.text
@@ -149,6 +151,7 @@ SYM_FUNC_END(__efi64_thunk)
SYM_FUNC_START(efi32_stub_entry)
call 1f
1: popl %ecx
+ leal (efi32_boot_args - 1b)(%ecx), %ebx
/* Clear BSS */
xorl %eax, %eax
@@ -163,6 +166,7 @@ SYM_FUNC_START(efi32_stub_entry)
popl %ecx
popl %edx
popl %esi
+ movl %esi, 8(%ebx)
jmp efi32_entry
SYM_FUNC_END(efi32_stub_entry)
#endif
@@ -239,8 +243,6 @@ SYM_FUNC_END(efi_enter32)
*
* Arguments: %ecx image handle
* %edx EFI system table pointer
- * %esi struct bootparams pointer (or NULL when not using
- * the EFI handover protocol)
*
* Since this is the point of no return for ordinary execution, no registers
* are considered live except for the function parameters. [Note that the EFI
@@ -266,9 +268,18 @@ SYM_FUNC_START_LOCAL(efi32_entry)
leal (efi32_boot_args - 1b)(%ebx), %ebx
movl %ecx, 0(%ebx)
movl %edx, 4(%ebx)
- movl %esi, 8(%ebx)
movb $0x0, 12(%ebx) // efi_is64
+ /*
+ * Allocate some memory for a temporary struct boot_params, which only
+ * needs the minimal pieces that startup_32() relies on.
+ */
+ subl $PARAM_SIZE, %esp
+ movl %esp, %esi
+ movl $PAGE_SIZE, BP_kernel_alignment(%esi)
+ movl $_end - 1b, BP_init_size(%esi)
+ subl $startup_32 - 1b, BP_init_size(%esi)
+
/* Disable paging */
movl %cr0, %eax
btrl $X86_CR0_PG_BIT, %eax
@@ -294,8 +305,7 @@ SYM_FUNC_START(efi32_pe_entry)
movl 8(%ebp), %ecx // image_handle
movl 12(%ebp), %edx // sys_table
- xorl %esi, %esi
- jmp efi32_entry // pass %ecx, %edx, %esi
+ jmp efi32_entry // pass %ecx, %edx
// no other registers remain live
2: popl %edi // restore callee-save registers
diff --git a/arch/x86/coco/core.c b/arch/x86/coco/core.c
index d07be9d05cd037..b31ef2424d194b 100644
--- a/arch/x86/coco/core.c
+++ b/arch/x86/coco/core.c
@@ -3,19 +3,28 @@
* Confidential Computing Platform Capability checks
*
* Copyright (C) 2021 Advanced Micro Devices, Inc.
+ * Copyright (C) 2024 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*
* Author: Tom Lendacky <thomas.lendacky@amd.com>
*/
#include <linux/export.h>
#include <linux/cc_platform.h>
+#include <linux/string.h>
+#include <linux/random.h>
+#include <asm/archrandom.h>
#include <asm/coco.h>
#include <asm/processor.h>
enum cc_vendor cc_vendor __ro_after_init = CC_VENDOR_NONE;
u64 cc_mask __ro_after_init;
+static struct cc_attr_flags {
+ __u64 host_sev_snp : 1,
+ __resv : 63;
+} cc_flags;
+
static bool noinstr intel_cc_platform_has(enum cc_attr attr)
{
switch (attr) {
@@ -89,6 +98,9 @@ static bool noinstr amd_cc_platform_has(enum cc_attr attr)
case CC_ATTR_GUEST_SEV_SNP:
return sev_status & MSR_AMD64_SEV_SNP_ENABLED;
+ case CC_ATTR_HOST_SEV_SNP:
+ return cc_flags.host_sev_snp;
+
default:
return false;
}
@@ -148,3 +160,84 @@ u64 cc_mkdec(u64 val)
}
}
EXPORT_SYMBOL_GPL(cc_mkdec);
+
+static void amd_cc_platform_clear(enum cc_attr attr)
+{
+ switch (attr) {
+ case CC_ATTR_HOST_SEV_SNP:
+ cc_flags.host_sev_snp = 0;
+ break;
+ default:
+ break;
+ }
+}
+
+void cc_platform_clear(enum cc_attr attr)
+{
+ switch (cc_vendor) {
+ case CC_VENDOR_AMD:
+ amd_cc_platform_clear(attr);
+ break;
+ default:
+ break;
+ }
+}
+
+static void amd_cc_platform_set(enum cc_attr attr)
+{
+ switch (attr) {
+ case CC_ATTR_HOST_SEV_SNP:
+ cc_flags.host_sev_snp = 1;
+ break;
+ default:
+ break;
+ }
+}
+
+void cc_platform_set(enum cc_attr attr)
+{
+ switch (cc_vendor) {
+ case CC_VENDOR_AMD:
+ amd_cc_platform_set(attr);
+ break;
+ default:
+ break;
+ }
+}
+
+__init void cc_random_init(void)
+{
+ /*
+ * The seed is 32 bytes (in units of longs), which is 256 bits, which
+ * is the security level that the RNG is targeting.
+ */
+ unsigned long rng_seed[32 / sizeof(long)];
+ size_t i, longs;
+
+ if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
+ return;
+
+ /*
+ * Since the CoCo threat model includes the host, the only reliable
+ * source of entropy that can be neither observed nor manipulated is
+ * RDRAND. Usually, RDRAND failure is considered tolerable, but since
+ * CoCo guests have no other unobservable source of entropy, it's
+ * important to at least ensure the RNG gets some initial random seeds.
+ */
+ for (i = 0; i < ARRAY_SIZE(rng_seed); i += longs) {
+ longs = arch_get_random_longs(&rng_seed[i], ARRAY_SIZE(rng_seed) - i);
+
+ /*
+ * A zero return value means that the guest doesn't have RDRAND
+ * or the CPU is physically broken, and in both cases that
+ * means most crypto inside of the CoCo instance will be
+ * broken, defeating the purpose of CoCo in the first place. So
+ * just panic here because it's absolutely unsafe to continue
+ * executing.
+ */
+ if (longs == 0)
+ panic("RDRAND is defective.");
+ }
+ add_device_randomness(rng_seed, sizeof(rng_seed));
+ memzero_explicit(rng_seed, sizeof(rng_seed));
+}
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 6356060caaf311..51cc9c7cb9bdc0 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -49,7 +49,7 @@ static __always_inline bool do_syscall_x64(struct pt_regs *regs, int nr)
if (likely(unr < NR_syscalls)) {
unr = array_index_nospec(unr, NR_syscalls);
- regs->ax = sys_call_table[unr](regs);
+ regs->ax = x64_sys_call(regs, unr);
return true;
}
return false;
@@ -66,7 +66,7 @@ static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr)
if (IS_ENABLED(CONFIG_X86_X32_ABI) && likely(xnr < X32_NR_syscalls)) {
xnr = array_index_nospec(xnr, X32_NR_syscalls);
- regs->ax = x32_sys_call_table[xnr](regs);
+ regs->ax = x32_sys_call(regs, xnr);
return true;
}
return false;
@@ -162,7 +162,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs, int nr)
if (likely(unr < IA32_NR_syscalls)) {
unr = array_index_nospec(unr, IA32_NR_syscalls);
- regs->ax = ia32_sys_call_table[unr](regs);
+ regs->ax = ia32_sys_call(regs, unr);
} else if (nr != -1) {
regs->ax = __ia32_sys_ni_syscall(regs);
}
@@ -189,7 +189,7 @@ static __always_inline bool int80_is_external(void)
}
/**
- * int80_emulation - 32-bit legacy syscall entry
+ * do_int80_emulation - 32-bit legacy syscall C entry from asm
*
* This entry point can be used by 32-bit and 64-bit programs to perform
* 32-bit system calls. Instances of INT $0x80 can be found inline in
@@ -207,7 +207,7 @@ static __always_inline bool int80_is_external(void)
* eax: system call number
* ebx, ecx, edx, esi, edi, ebp: arg1 - arg 6
*/
-DEFINE_IDTENTRY_RAW(int80_emulation)
+__visible noinstr void do_int80_emulation(struct pt_regs *regs)
{
int nr;
@@ -255,6 +255,71 @@ DEFINE_IDTENTRY_RAW(int80_emulation)
instrumentation_end();
syscall_exit_to_user_mode(regs);
}
+
+#ifdef CONFIG_X86_FRED
+/*
+ * A FRED-specific INT80 handler is warranted for the follwing reasons:
+ *
+ * 1) As INT instructions and hardware interrupts are separate event
+ * types, FRED does not preclude the use of vector 0x80 for external
+ * interrupts. As a result, the FRED setup code does not reserve
+ * vector 0x80 and calling int80_is_external() is not merely
+ * suboptimal but actively incorrect: it could cause a system call
+ * to be incorrectly ignored.
+ *
+ * 2) It is called only for handling vector 0x80 of event type
+ * EVENT_TYPE_SWINT and will never be called to handle any external
+ * interrupt (event type EVENT_TYPE_EXTINT).
+ *
+ * 3) FRED has separate entry flows depending on if the event came from
+ * user space or kernel space, and because the kernel does not use
+ * INT insns, the FRED kernel entry handler fred_entry_from_kernel()
+ * falls through to fred_bad_type() if the event type is
+ * EVENT_TYPE_SWINT, i.e., INT insns. So if the kernel is handling
+ * an INT insn, it can only be from a user level.
+ *
+ * 4) int80_emulation() does a CLEAR_BRANCH_HISTORY. While FRED will
+ * likely take a different approach if it is ever needed: it
+ * probably belongs in either fred_intx()/ fred_other() or
+ * asm_fred_entrypoint_user(), depending on if this ought to be done
+ * for all entries from userspace or only system
+ * calls.
+ *
+ * 5) INT $0x80 is the fast path for 32-bit system calls under FRED.
+ */
+DEFINE_FREDENTRY_RAW(int80_emulation)
+{
+ int nr;
+
+ enter_from_user_mode(regs);
+
+ instrumentation_begin();
+ add_random_kstack_offset();
+
+ /*
+ * FRED pushed 0 into regs::orig_ax and regs::ax contains the
+ * syscall number.
+ *
+ * User tracing code (ptrace or signal handlers) might assume
+ * that the regs::orig_ax contains a 32-bit number on invoking
+ * a 32-bit syscall.
+ *
+ * Establish the syscall convention by saving the 32bit truncated
+ * syscall number in regs::orig_ax and by invalidating regs::ax.
+ */
+ regs->orig_ax = regs->ax & GENMASK(31, 0);
+ regs->ax = -ENOSYS;
+
+ nr = syscall_32_enter(regs);
+
+ local_irq_enable();
+ nr = syscall_enter_from_user_mode_work(regs, nr);
+ do_syscall_32_irqs_on(regs, nr);
+
+ instrumentation_end();
+ syscall_exit_to_user_mode(regs);
+}
+#endif
#else /* CONFIG_IA32_EMULATION */
/* Handles int $0x80 on a 32bit kernel */
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 8af2a26b24f6a9..1b5be07f86698a 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -116,6 +116,7 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL)
/* clobbers %rax, make sure it is after saving the syscall nr */
IBRS_ENTER
UNTRAIN_RET
+ CLEAR_BRANCH_HISTORY
call do_syscall_64 /* returns with IRQs disabled */
@@ -1491,3 +1492,63 @@ SYM_CODE_START_NOALIGN(rewind_stack_and_make_dead)
call make_task_dead
SYM_CODE_END(rewind_stack_and_make_dead)
.popsection
+
+/*
+ * This sequence executes branches in order to remove user branch information
+ * from the branch history tracker in the Branch Predictor, therefore removing
+ * user influence on subsequent BTB lookups.
+ *
+ * It should be used on parts prior to Alder Lake. Newer parts should use the
+ * BHI_DIS_S hardware control instead. If a pre-Alder Lake part is being
+ * virtualized on newer hardware the VMM should protect against BHI attacks by
+ * setting BHI_DIS_S for the guests.
+ *
+ * CALLs/RETs are necessary to prevent Loop Stream Detector(LSD) from engaging
+ * and not clearing the branch history. The call tree looks like:
+ *
+ * call 1
+ * call 2
+ * call 2
+ * call 2
+ * call 2
+ * call 2
+ * ret
+ * ret
+ * ret
+ * ret
+ * ret
+ * ret
+ *
+ * This means that the stack is non-constant and ORC can't unwind it with %rsp
+ * alone. Therefore we unconditionally set up the frame pointer, which allows
+ * ORC to unwind properly.
+ *
+ * The alignment is for performance and not for safety, and may be safely
+ * refactored in the future if needed.
+ */
+SYM_FUNC_START(clear_bhb_loop)
+ push %rbp
+ mov %rsp, %rbp
+ movl $5, %ecx
+ ANNOTATE_INTRA_FUNCTION_CALL
+ call 1f
+ jmp 5f
+ .align 64, 0xcc
+ ANNOTATE_INTRA_FUNCTION_CALL
+1: call 2f
+ RET
+ .align 64, 0xcc
+2: movl $5, %eax
+3: jmp 4f
+ nop
+4: sub $1, %eax
+ jnz 3b
+ sub $1, %ecx
+ jnz 1b
+ RET
+5: lfence
+ pop %rbp
+ RET
+SYM_FUNC_END(clear_bhb_loop)
+EXPORT_SYMBOL_GPL(clear_bhb_loop)
+STACK_FRAME_NON_STANDARD(clear_bhb_loop)
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index eabf48c4d4b4c3..c779046cc3fe79 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -92,6 +92,7 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL)
IBRS_ENTER
UNTRAIN_RET
+ CLEAR_BRANCH_HISTORY
/*
* SYSENTER doesn't filter flags, so we need to clear NT and AC
@@ -206,6 +207,7 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_after_hwframe, SYM_L_GLOBAL)
IBRS_ENTER
UNTRAIN_RET
+ CLEAR_BRANCH_HISTORY
movq %rsp, %rdi
call do_fast_syscall_32
@@ -276,3 +278,17 @@ SYM_INNER_LABEL(entry_SYSRETL_compat_end, SYM_L_GLOBAL)
ANNOTATE_NOENDBR
int3
SYM_CODE_END(entry_SYSCALL_compat)
+
+/*
+ * int 0x80 is used by 32 bit mode as a system call entry. Normally idt entries
+ * point to C routines, however since this is a system call interface the branch
+ * history needs to be scrubbed to protect against BHI attacks, and that
+ * scrubbing needs to take place in assembly code prior to entering any C
+ * routines.
+ */
+SYM_CODE_START(int80_emulation)
+ ANNOTATE_NOENDBR
+ UNWIND_HINT_FUNC
+ CLEAR_BRANCH_HISTORY
+ jmp do_int80_emulation
+SYM_CODE_END(int80_emulation)
diff --git a/arch/x86/entry/entry_fred.c b/arch/x86/entry/entry_fred.c
index ac120cbdaaf2b4..89c1476fcdd9f9 100644
--- a/arch/x86/entry/entry_fred.c
+++ b/arch/x86/entry/entry_fred.c
@@ -28,9 +28,9 @@ static noinstr void fred_bad_type(struct pt_regs *regs, unsigned long error_code
if (regs->fred_cs.sl > 0) {
pr_emerg("PANIC: invalid or fatal FRED event; event type %u "
"vector %u error 0x%lx aux 0x%lx at %04x:%016lx\n",
- regs->fred_ss.type, regs->fred_ss.vector, regs->orig_ax,
+ regs->fred_ss.type, regs->fred_ss.vector, error_code,
fred_event_data(regs), regs->cs, regs->ip);
- die("invalid or fatal FRED event", regs, regs->orig_ax);
+ die("invalid or fatal FRED event", regs, error_code);
panic("invalid or fatal FRED event");
} else {
unsigned long flags = oops_begin();
@@ -38,10 +38,10 @@ static noinstr void fred_bad_type(struct pt_regs *regs, unsigned long error_code
pr_alert("BUG: invalid or fatal FRED event; event type %u "
"vector %u error 0x%lx aux 0x%lx at %04x:%016lx\n",
- regs->fred_ss.type, regs->fred_ss.vector, regs->orig_ax,
+ regs->fred_ss.type, regs->fred_ss.vector, error_code,
fred_event_data(regs), regs->cs, regs->ip);
- if (__die("Invalid or fatal FRED event", regs, regs->orig_ax))
+ if (__die("Invalid or fatal FRED event", regs, error_code))
sig = 0;
oops_end(flags, regs, sig);
@@ -66,7 +66,7 @@ static noinstr void fred_intx(struct pt_regs *regs)
/* INT80 */
case IA32_SYSCALL_VECTOR:
if (ia32_enabled())
- return int80_emulation(regs);
+ return fred_int80_emulation(regs);
fallthrough;
#endif
diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c
index 8cfc9bc73e7f8b..c2235bae17ef66 100644
--- a/arch/x86/entry/syscall_32.c
+++ b/arch/x86/entry/syscall_32.c
@@ -18,8 +18,25 @@
#include <asm/syscalls_32.h>
#undef __SYSCALL
+/*
+ * The sys_call_table[] is no longer used for system calls, but
+ * kernel/trace/trace_syscalls.c still wants to know the system
+ * call address.
+ */
+#ifdef CONFIG_X86_32
#define __SYSCALL(nr, sym) __ia32_##sym,
-
-__visible const sys_call_ptr_t ia32_sys_call_table[] = {
+const sys_call_ptr_t sys_call_table[] = {
#include <asm/syscalls_32.h>
};
+#undef __SYSCALL
+#endif
+
+#define __SYSCALL(nr, sym) case nr: return __ia32_##sym(regs);
+
+long ia32_sys_call(const struct pt_regs *regs, unsigned int nr)
+{
+ switch (nr) {
+ #include <asm/syscalls_32.h>
+ default: return __ia32_sys_ni_syscall(regs);
+ }
+};
diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c
index be120eec1fc9f9..33b3f09e6f151e 100644
--- a/arch/x86/entry/syscall_64.c
+++ b/arch/x86/entry/syscall_64.c
@@ -11,8 +11,23 @@
#include <asm/syscalls_64.h>
#undef __SYSCALL
+/*
+ * The sys_call_table[] is no longer used for system calls, but
+ * kernel/trace/trace_syscalls.c still wants to know the system
+ * call address.
+ */
#define __SYSCALL(nr, sym) __x64_##sym,
-
-asmlinkage const sys_call_ptr_t sys_call_table[] = {
+const sys_call_ptr_t sys_call_table[] = {
#include <asm/syscalls_64.h>
};
+#undef __SYSCALL
+
+#define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs);
+
+long x64_sys_call(const struct pt_regs *regs, unsigned int nr)
+{
+ switch (nr) {
+ #include <asm/syscalls_64.h>
+ default: return __x64_sys_ni_syscall(regs);
+ }
+};
diff --git a/arch/x86/entry/syscall_x32.c b/arch/x86/entry/syscall_x32.c
index bdd0e03a1265d2..03de4a93213182 100644
--- a/arch/x86/entry/syscall_x32.c
+++ b/arch/x86/entry/syscall_x32.c
@@ -11,8 +11,12 @@
#include <asm/syscalls_x32.h>
#undef __SYSCALL
-#define __SYSCALL(nr, sym) __x64_##sym,
+#define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs);
-asmlinkage const sys_call_ptr_t x32_sys_call_table[] = {
-#include <asm/syscalls_x32.h>
+long x32_sys_call(const struct pt_regs *regs, unsigned int nr)
+{
+ switch (nr) {
+ #include <asm/syscalls_x32.h>
+ default: return __x64_sys_ni_syscall(regs);
+ }
};
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 5f8591ce7f25e7..7fd1f57ad3d30c 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -466,3 +466,4 @@
459 i386 lsm_get_self_attr sys_lsm_get_self_attr
460 i386 lsm_set_self_attr sys_lsm_set_self_attr
461 i386 lsm_list_modules sys_lsm_list_modules
+462 i386 mseal sys_mseal
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 7e8d46f4147f57..52df0dec70da6c 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -383,6 +383,7 @@
459 common lsm_get_self_attr sys_lsm_get_self_attr
460 common lsm_set_self_attr sys_lsm_set_self_attr
461 common lsm_list_modules sys_lsm_list_modules
+462 common mseal sys_mseal
#
# Due to a historical design error, certain syscalls are numbered differently
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index fd63051bbbbb82..3d64bcc403cfbe 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -41,6 +41,7 @@ obj-$(CONFIG_X86_X32_ABI) += vdso-image-x32.o
obj-$(CONFIG_COMPAT_32) += vdso-image-32.o vdso32-setup.o
OBJECT_FILES_NON_STANDARD_vdso-image-32.o := n
+OBJECT_FILES_NON_STANDARD_vdso-image-x32.o := n
OBJECT_FILES_NON_STANDARD_vdso-image-64.o := n
OBJECT_FILES_NON_STANDARD_vdso32-setup.o := n
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index aec16e581f5b2a..985ef3b479191f 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -250,7 +250,7 @@ static const u64 amd_perfmon_event_map[PERF_COUNT_HW_MAX] =
/*
* AMD Performance Monitor Family 17h and later:
*/
-static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] =
+static const u64 amd_zen1_perfmon_event_map[PERF_COUNT_HW_MAX] =
{
[PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
[PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
@@ -262,10 +262,39 @@ static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] =
[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x0187,
};
+static const u64 amd_zen2_perfmon_event_map[PERF_COUNT_HW_MAX] =
+{
+ [PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
+ [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = 0xff60,
+ [PERF_COUNT_HW_CACHE_MISSES] = 0x0964,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2,
+ [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3,
+ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00a9,
+};
+
+static const u64 amd_zen4_perfmon_event_map[PERF_COUNT_HW_MAX] =
+{
+ [PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
+ [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = 0xff60,
+ [PERF_COUNT_HW_CACHE_MISSES] = 0x0964,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2,
+ [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3,
+ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00a9,
+ [PERF_COUNT_HW_REF_CPU_CYCLES] = 0x100000120,
+};
+
static u64 amd_pmu_event_map(int hw_event)
{
- if (boot_cpu_data.x86 >= 0x17)
- return amd_f17h_perfmon_event_map[hw_event];
+ if (cpu_feature_enabled(X86_FEATURE_ZEN4) || boot_cpu_data.x86 >= 0x1a)
+ return amd_zen4_perfmon_event_map[hw_event];
+
+ if (cpu_feature_enabled(X86_FEATURE_ZEN2) || boot_cpu_data.x86 >= 0x19)
+ return amd_zen2_perfmon_event_map[hw_event];
+
+ if (cpu_feature_enabled(X86_FEATURE_ZEN1))
+ return amd_zen1_perfmon_event_map[hw_event];
return amd_perfmon_event_map[hw_event];
}
@@ -904,8 +933,8 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
if (!status)
goto done;
- /* Read branch records before unfreezing */
- if (status & GLOBAL_STATUS_LBRS_FROZEN) {
+ /* Read branch records */
+ if (x86_pmu.lbr_nr) {
amd_pmu_lbr_read();
status &= ~GLOBAL_STATUS_LBRS_FROZEN;
}
diff --git a/arch/x86/events/amd/lbr.c b/arch/x86/events/amd/lbr.c
index 4a1e600314d5df..5149830c7c4fa6 100644
--- a/arch/x86/events/amd/lbr.c
+++ b/arch/x86/events/amd/lbr.c
@@ -402,10 +402,12 @@ void amd_pmu_lbr_enable_all(void)
wrmsrl(MSR_AMD64_LBR_SELECT, lbr_select);
}
- rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
- rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
+ if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) {
+ rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
+ wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
+ }
- wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
+ rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg | DBG_EXTN_CFG_LBRV2EN);
}
@@ -418,10 +420,12 @@ void amd_pmu_lbr_disable_all(void)
return;
rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
- rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
-
wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg & ~DBG_EXTN_CFG_LBRV2EN);
- wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
+
+ if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) {
+ rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
+ wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
+ }
}
__init int amd_pmu_lbr_init(void)
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 09050641ce5d3c..5b0dd07b1ef19e 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -1644,6 +1644,7 @@ static void x86_pmu_del(struct perf_event *event, int flags)
while (++i < cpuc->n_events) {
cpuc->event_list[i-1] = cpuc->event_list[i];
cpuc->event_constraint[i-1] = cpuc->event_constraint[i];
+ cpuc->assign[i-1] = cpuc->assign[i];
}
cpuc->event_constraint[i-1] = NULL;
--cpuc->n_events;
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 2641ba620f12a5..e010bfed841705 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1237,11 +1237,11 @@ pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc,
struct pmu *pmu = event->pmu;
/*
- * Make sure we get updated with the first PEBS
- * event. It will trigger also during removal, but
- * that does not hurt:
+ * Make sure we get updated with the first PEBS event.
+ * During removal, ->pebs_data_cfg is still valid for
+ * the last PEBS event. Don't clear it.
*/
- if (cpuc->n_pebs == 1)
+ if ((cpuc->n_pebs == 1) && add)
cpuc->pebs_data_cfg = PEBS_UPDATE_DS_SW;
if (needed_cb != pebs_needs_sched_cb(cpuc)) {
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 78cd5084104e9c..4367aa77cb8d9f 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -1693,6 +1693,7 @@ void x86_perf_get_lbr(struct x86_pmu_lbr *lbr)
lbr->from = x86_pmu.lbr_from;
lbr->to = x86_pmu.lbr_to;
lbr->info = x86_pmu.lbr_info;
+ lbr->has_callstack = x86_pmu_has_lbr_callstack();
}
EXPORT_SYMBOL_GPL(x86_perf_get_lbr);
diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c
index 5fc45543e95502..0569f579338b51 100644
--- a/arch/x86/hyperv/hv_apic.c
+++ b/arch/x86/hyperv/hv_apic.c
@@ -105,7 +105,7 @@ static bool cpu_is_self(int cpu)
* IPI implementation on Hyper-V.
*/
static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector,
- bool exclude_self)
+ bool exclude_self)
{
struct hv_send_ipi_ex *ipi_arg;
unsigned long flags;
@@ -132,8 +132,8 @@ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector,
if (!cpumask_equal(mask, cpu_present_mask) || exclude_self) {
ipi_arg->vp_set.format = HV_GENERIC_SET_SPARSE_4K;
- nr_bank = cpumask_to_vpset_skip(&(ipi_arg->vp_set), mask,
- exclude_self ? cpu_is_self : NULL);
+ nr_bank = cpumask_to_vpset_skip(&ipi_arg->vp_set, mask,
+ exclude_self ? cpu_is_self : NULL);
/*
* 'nr_bank <= 0' means some CPUs in cpumask can't be
@@ -147,7 +147,7 @@ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector,
}
status = hv_do_rep_hypercall(HVCALL_SEND_IPI_EX, 0, nr_bank,
- ipi_arg, NULL);
+ ipi_arg, NULL);
ipi_mask_ex_done:
local_irq_restore(flags);
@@ -155,7 +155,7 @@ ipi_mask_ex_done:
}
static bool __send_ipi_mask(const struct cpumask *mask, int vector,
- bool exclude_self)
+ bool exclude_self)
{
int cur_cpu, vcpu, this_cpu = smp_processor_id();
struct hv_send_ipi ipi_arg;
@@ -181,7 +181,7 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector,
return false;
}
- if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR))
+ if (vector < HV_IPI_LOW_VECTOR || vector > HV_IPI_HIGH_VECTOR)
return false;
/*
@@ -218,7 +218,7 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector,
}
status = hv_do_fast_hypercall16(HVCALL_SEND_IPI, ipi_arg.vector,
- ipi_arg.cpu_mask);
+ ipi_arg.cpu_mask);
return hv_result_success(status);
do_ex_hypercall:
@@ -241,7 +241,7 @@ static bool __send_ipi_one(int cpu, int vector)
return false;
}
- if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR))
+ if (vector < HV_IPI_LOW_VECTOR || vector > HV_IPI_HIGH_VECTOR)
return false;
if (vp >= 64)
diff --git a/arch/x86/hyperv/hv_proc.c b/arch/x86/hyperv/hv_proc.c
index 68a0843d4750f7..3fa1f2ee7b0d06 100644
--- a/arch/x86/hyperv/hv_proc.c
+++ b/arch/x86/hyperv/hv_proc.c
@@ -3,7 +3,6 @@
#include <linux/vmalloc.h>
#include <linux/mm.h>
#include <linux/clockchips.h>
-#include <linux/acpi.h>
#include <linux/hyperv.h>
#include <linux/slab.h>
#include <linux/cpuhotplug.h>
@@ -116,12 +115,11 @@ free_buf:
int hv_call_add_logical_proc(int node, u32 lp_index, u32 apic_id)
{
- struct hv_add_logical_processor_in *input;
- struct hv_add_logical_processor_out *output;
+ struct hv_input_add_logical_processor *input;
+ struct hv_output_add_logical_processor *output;
u64 status;
unsigned long flags;
int ret = HV_STATUS_SUCCESS;
- int pxm = node_to_pxm(node);
/*
* When adding a logical processor, the hypervisor may return
@@ -137,11 +135,7 @@ int hv_call_add_logical_proc(int node, u32 lp_index, u32 apic_id)
input->lp_index = lp_index;
input->apic_id = apic_id;
- input->flags = 0;
- input->proximity_domain_info.domain_id = pxm;
- input->proximity_domain_info.flags.reserved = 0;
- input->proximity_domain_info.flags.proximity_info_valid = 1;
- input->proximity_domain_info.flags.proximity_preferred = 1;
+ input->proximity_domain_info = hv_numa_node_to_pxm_info(node);
status = hv_do_hypercall(HVCALL_ADD_LOGICAL_PROCESSOR,
input, output);
local_irq_restore(flags);
@@ -166,7 +160,6 @@ int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags)
u64 status;
unsigned long irq_flags;
int ret = HV_STATUS_SUCCESS;
- int pxm = node_to_pxm(node);
/* Root VPs don't seem to need pages deposited */
if (partition_id != hv_current_partition_id) {
@@ -185,14 +178,7 @@ int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags)
input->vp_index = vp_index;
input->flags = flags;
input->subnode_type = HvSubnodeAny;
- if (node != NUMA_NO_NODE) {
- input->proximity_domain_info.domain_id = pxm;
- input->proximity_domain_info.flags.reserved = 0;
- input->proximity_domain_info.flags.proximity_info_valid = 1;
- input->proximity_domain_info.flags.proximity_preferred = 1;
- } else {
- input->proximity_domain_info.as_uint64 = 0;
- }
+ input->proximity_domain_info = hv_numa_node_to_pxm_info(node);
status = hv_do_hypercall(HVCALL_CREATE_VP, input, NULL);
local_irq_restore(irq_flags);
diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c
index 768d73de0d098a..5bc2430e1f3417 100644
--- a/arch/x86/hyperv/ivm.c
+++ b/arch/x86/hyperv/ivm.c
@@ -291,16 +291,18 @@ static void snp_cleanup_vmsa(struct sev_es_save_area *vmsa)
int hv_snp_boot_ap(u32 cpu, unsigned long start_ip)
{
- struct sev_es_save_area *vmsa = (struct sev_es_save_area *)
- __get_free_page(GFP_KERNEL | __GFP_ZERO);
+ struct hv_enable_vp_vtl *start_vp_input;
struct sev_es_save_area *cur_vmsa;
+ struct sev_es_save_area *vmsa;
struct desc_ptr gdtr;
- u64 ret, retry = 5;
- struct hv_enable_vp_vtl *start_vp_input;
unsigned long flags;
+ u64 ret, retry = 5;
+ struct page *p;
- if (!vmsa)
+ p = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL | __GFP_ZERO, 0);
+ if (!p)
return -ENOMEM;
+ vmsa = (struct sev_es_save_area *)page_address(p);
native_store_gdt(&gdtr);
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index fcd20c6dc7f90c..67b68d0d17d1ec 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -117,7 +117,7 @@ extern void callthunks_patch_builtin_calls(void);
extern void callthunks_patch_module_calls(struct callthunk_sites *sites,
struct module *mod);
extern void *callthunks_translate_call_dest(void *dest);
-extern int x86_call_depth_emit_accounting(u8 **pprog, void *func);
+extern int x86_call_depth_emit_accounting(u8 **pprog, void *func, void *ip);
#else
static __always_inline void callthunks_patch_builtin_calls(void) {}
static __always_inline void
@@ -128,7 +128,7 @@ static __always_inline void *callthunks_translate_call_dest(void *dest)
return dest;
}
static __always_inline int x86_call_depth_emit_accounting(u8 **pprog,
- void *func)
+ void *func, void *ip)
{
return 0;
}
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 94ce0f7c9d3a26..e6ab0cf15ed573 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -13,6 +13,7 @@
#include <asm/mpspec.h>
#include <asm/msr.h>
#include <asm/hardirq.h>
+#include <asm/io.h>
#define ARCH_APICTIMER_STOPS_ON_C3 1
@@ -98,7 +99,7 @@ static inline void native_apic_mem_write(u32 reg, u32 v)
static inline u32 native_apic_mem_read(u32 reg)
{
- return *((volatile u32 *)(APIC_BASE + reg));
+ return readl((void __iomem *)(APIC_BASE + reg));
}
static inline void native_apic_mem_eoi(void)
diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h
index 076bf8dee70264..25466c4d213481 100644
--- a/arch/x86/include/asm/asm-prototypes.h
+++ b/arch/x86/include/asm/asm-prototypes.h
@@ -14,6 +14,7 @@
#include <asm/asm.h>
#include <asm/fred.h>
#include <asm/gsseg.h>
+#include <asm/nospec-branch.h>
#ifndef CONFIG_X86_CMPXCHG64
extern void cmpxchg8b_emu(void);
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index fe1e7e3cc844a8..63bdc6b8521971 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -79,6 +79,9 @@ do { \
#define __smp_mb__before_atomic() do { } while (0)
#define __smp_mb__after_atomic() do { } while (0)
+/* Writing to CR3 provides a full memory barrier in switch_mm(). */
+#define smp_mb__after_switch_mm() do { } while (0)
+
#include <asm-generic/barrier.h>
#endif /* _ASM_X86_BARRIER_H */
diff --git a/arch/x86/include/asm/coco.h b/arch/x86/include/asm/coco.h
index fb7388bbc212f9..aa6c8f8ca9588e 100644
--- a/arch/x86/include/asm/coco.h
+++ b/arch/x86/include/asm/coco.h
@@ -22,8 +22,10 @@ static inline void cc_set_mask(u64 mask)
u64 cc_mkenc(u64 val);
u64 cc_mkdec(u64 val);
+void cc_random_init(void);
#else
#define cc_vendor (CC_VENDOR_NONE)
+static const u64 cc_mask = 0;
static inline u64 cc_mkenc(u64 val)
{
@@ -34,6 +36,7 @@ static inline u64 cc_mkdec(u64 val)
{
return val;
}
+static inline void cc_random_init(void) { }
#endif
#endif /* _ASM_X86_COCO_H */
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index a1273698fc430b..686e92d2663eee 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -33,6 +33,8 @@ enum cpuid_leafs
CPUID_7_EDX,
CPUID_8000_001F_EAX,
CPUID_8000_0021_EAX,
+ CPUID_LNX_5,
+ NR_CPUID_WORDS,
};
#define X86_CAP_FMT_NUM "%d:%d"
@@ -91,8 +93,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 18, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 19, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 20, feature_bit) || \
+ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 21, feature_bit) || \
REQUIRED_MASK_CHECK || \
- BUILD_BUG_ON_ZERO(NCAPINTS != 21))
+ BUILD_BUG_ON_ZERO(NCAPINTS != 22))
#define DISABLED_MASK_BIT_SET(feature_bit) \
( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 0, feature_bit) || \
@@ -116,8 +119,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 18, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 19, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 20, feature_bit) || \
+ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 21, feature_bit) || \
DISABLED_MASK_CHECK || \
- BUILD_BUG_ON_ZERO(NCAPINTS != 21))
+ BUILD_BUG_ON_ZERO(NCAPINTS != 22))
#define cpu_has(c, bit) \
(__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index f0337f7bcf1625..3c7434329661c6 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -13,7 +13,7 @@
/*
* Defines x86 CPU feature bits
*/
-#define NCAPINTS 21 /* N 32-bit words worth of info */
+#define NCAPINTS 22 /* N 32-bit words worth of info */
#define NBUGINTS 2 /* N 32-bit bug flags */
/*
@@ -460,6 +460,18 @@
#define X86_FEATURE_SRSO_NO (20*32+29) /* "" CPU is not affected by SRSO */
/*
+ * Extended auxiliary flags: Linux defined - for features scattered in various
+ * CPUID levels like 0x80000022, etc and Linux defined features.
+ *
+ * Reuse free bits when adding new feature flags!
+ */
+#define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* AMD LBR and PMC Freeze */
+#define X86_FEATURE_CLEAR_BHB_LOOP (21*32+ 1) /* "" Clear branch history at syscall entry using SW loop */
+#define X86_FEATURE_BHI_CTRL (21*32+ 2) /* "" BHI_DIS_S HW control available */
+#define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* "" BHI_DIS_S HW control enabled */
+#define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* "" Clear branch history at vmexit using SW loop */
+
+/*
* BUG word(s)
*/
#define X86_BUG(x) (NCAPINTS*32 + (x))
@@ -507,4 +519,5 @@
#define X86_BUG_SRSO X86_BUG(1*32 + 0) /* AMD SRSO bug */
#define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* AMD DIV0 speculation bug */
#define X86_BUG_RFDS X86_BUG(1*32 + 2) /* CPU is vulnerable to Register File Data Sampling */
+#define X86_BUG_BHI X86_BUG(1*32 + 3) /* CPU is affected by Branch History Injection */
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/crash_reserve.h b/arch/x86/include/asm/crash_reserve.h
index 152239f9554195..7835b2cdff04a7 100644
--- a/arch/x86/include/asm/crash_reserve.h
+++ b/arch/x86/include/asm/crash_reserve.h
@@ -39,4 +39,6 @@ static inline unsigned long crash_low_size_default(void)
#endif
}
+#define HAVE_ARCH_ADD_CRASH_RES_TO_IOMEM_EARLY
+
#endif /* _X86_CRASH_RESERVE_H */
diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
index da4054fbf533e9..c492bdc97b0595 100644
--- a/arch/x86/include/asm/disabled-features.h
+++ b/arch/x86/include/asm/disabled-features.h
@@ -155,6 +155,7 @@
#define DISABLED_MASK18 (DISABLE_IBT)
#define DISABLED_MASK19 (DISABLE_SEV_SNP)
#define DISABLED_MASK20 0
-#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21)
+#define DISABLED_MASK21 0
+#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 22)
#endif /* _ASM_X86_DISABLED_FEATURES_H */
diff --git a/arch/x86/include/asm/fpu.h b/arch/x86/include/asm/fpu.h
new file mode 100644
index 00000000000000..b2743fe19339a6
--- /dev/null
+++ b/arch/x86/include/asm/fpu.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2023 SiFive
+ */
+
+#ifndef _ASM_X86_FPU_H
+#define _ASM_X86_FPU_H
+
+#include <asm/fpu/api.h>
+
+#define kernel_fpu_available() true
+
+#endif /* ! _ASM_X86_FPU_H */
diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
index ace9aa3b78a305..eb17f31b06d25c 100644
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -2,8 +2,8 @@
/*
* FPU data structures:
*/
-#ifndef _ASM_X86_FPU_H
-#define _ASM_X86_FPU_H
+#ifndef _ASM_X86_FPU_TYPES_H
+#define _ASM_X86_FPU_TYPES_H
#include <asm/page_types.h>
@@ -596,4 +596,4 @@ struct fpu_state_config {
/* FPU state configuration information */
extern struct fpu_state_config fpu_kernel_cfg, fpu_user_cfg;
-#endif /* _ASM_X86_FPU_H */
+#endif /* _ASM_X86_FPU_TYPES_H */
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 294cd2a4081812..7452fc193b4f9e 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -42,6 +42,7 @@
#include <asm/early_ioremap.h>
#include <asm/pgtable_types.h>
#include <asm/shared/io.h>
+#include <asm/special_insns.h>
#define build_mmio_read(name, size, type, reg, barrier) \
static inline type name(const volatile void __iomem *addr) \
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 16e07a2eee195d..6efd1497b02636 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -855,6 +855,7 @@ struct kvm_vcpu_arch {
int cpuid_nent;
struct kvm_cpuid_entry2 *cpuid_entries;
struct kvm_hypervisor_cpuid kvm_cpuid;
+ bool is_amd_compatible;
/*
* FIXME: Drop this macro and use KVM_NR_GOVERNED_FEATURES directly
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 05956bd8bacf50..e72c2b87295799 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -61,10 +61,13 @@
#define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */
#define SPEC_CTRL_RRSBA_DIS_S_SHIFT 6 /* Disable RRSBA behavior */
#define SPEC_CTRL_RRSBA_DIS_S BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT)
+#define SPEC_CTRL_BHI_DIS_S_SHIFT 10 /* Disable Branch History Injection behavior */
+#define SPEC_CTRL_BHI_DIS_S BIT(SPEC_CTRL_BHI_DIS_S_SHIFT)
/* A mask for bits which the kernel toggles when controlling mitigations */
#define SPEC_CTRL_MITIGATIONS_MASK (SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD \
- | SPEC_CTRL_RRSBA_DIS_S)
+ | SPEC_CTRL_RRSBA_DIS_S \
+ | SPEC_CTRL_BHI_DIS_S)
#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */
#define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */
@@ -163,6 +166,10 @@
* are restricted to targets in
* kernel.
*/
+#define ARCH_CAP_BHI_NO BIT(20) /*
+ * CPU is not affected by Branch
+ * History Injection.
+ */
#define ARCH_CAP_PBRSB_NO BIT(24) /*
* Not susceptible to Post-Barrier
* Return Stack Buffer Predictions.
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index fc3a8a3c7ffeec..ff5f1ecc7d1e65 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -262,11 +262,20 @@
.Lskip_rsb_\@:
.endm
+/*
+ * The CALL to srso_alias_untrain_ret() must be patched in directly at
+ * the spot where untraining must be done, ie., srso_alias_untrain_ret()
+ * must be the target of a CALL instruction instead of indirectly
+ * jumping to a wrapper which then calls it. Therefore, this macro is
+ * called outside of __UNTRAIN_RET below, for the time being, before the
+ * kernel can support nested alternatives with arbitrary nesting.
+ */
+.macro CALL_UNTRAIN_RET
#if defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_MITIGATION_SRSO)
-#define CALL_UNTRAIN_RET "call entry_untrain_ret"
-#else
-#define CALL_UNTRAIN_RET ""
+ ALTERNATIVE_2 "", "call entry_untrain_ret", X86_FEATURE_UNRET, \
+ "call srso_alias_untrain_ret", X86_FEATURE_SRSO_ALIAS
#endif
+.endm
/*
* Mitigate RETBleed for AMD/Hygon Zen uarch. Requires KERNEL CR3 because the
@@ -282,8 +291,8 @@
.macro __UNTRAIN_RET ibpb_feature, call_depth_insns
#if defined(CONFIG_MITIGATION_RETHUNK) || defined(CONFIG_MITIGATION_IBPB_ENTRY)
VALIDATE_UNRET_END
- ALTERNATIVE_3 "", \
- CALL_UNTRAIN_RET, X86_FEATURE_UNRET, \
+ CALL_UNTRAIN_RET
+ ALTERNATIVE_2 "", \
"call entry_ibpb", \ibpb_feature, \
__stringify(\call_depth_insns), X86_FEATURE_CALL_DEPTH
#endif
@@ -317,6 +326,19 @@
ALTERNATIVE "", __stringify(verw _ASM_RIP(mds_verw_sel)), X86_FEATURE_CLEAR_CPU_BUF
.endm
+#ifdef CONFIG_X86_64
+.macro CLEAR_BRANCH_HISTORY
+ ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP
+.endm
+
+.macro CLEAR_BRANCH_HISTORY_VMEXIT
+ ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT
+.endm
+#else
+#define CLEAR_BRANCH_HISTORY
+#define CLEAR_BRANCH_HISTORY_VMEXIT
+#endif
+
#else /* __ASSEMBLY__ */
#define ANNOTATE_RETPOLINE_SAFE \
@@ -342,6 +364,8 @@ extern void retbleed_return_thunk(void);
static inline void retbleed_return_thunk(void) {}
#endif
+extern void srso_alias_untrain_ret(void);
+
#ifdef CONFIG_MITIGATION_SRSO
extern void srso_return_thunk(void);
extern void srso_alias_return_thunk(void);
@@ -357,6 +381,10 @@ extern void srso_alias_return_thunk(void);
extern void entry_untrain_ret(void);
extern void entry_ibpb(void);
+#ifdef CONFIG_X86_64
+extern void clear_bhb_loop(void);
+#endif
+
extern void (*x86_return_thunk)(void);
extern void __warn_thunk(void);
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 3736b8a46c04de..7f1e17250546bd 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -555,6 +555,7 @@ struct x86_pmu_lbr {
unsigned int from;
unsigned int to;
unsigned int info;
+ bool has_callstack;
};
extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap);
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 315535ffb2582c..65b8e5bb902cc3 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -234,6 +234,7 @@ static inline unsigned long pmd_pfn(pmd_t pmd)
return (pfn & pmd_pfn_mask(pmd)) >> PAGE_SHIFT;
}
+#define pud_pfn pud_pfn
static inline unsigned long pud_pfn(pud_t pud)
{
phys_addr_t pfn = pud_val(pud);
@@ -387,23 +388,7 @@ static inline pte_t pte_wrprotect(pte_t pte)
#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
static inline int pte_uffd_wp(pte_t pte)
{
- bool wp = pte_flags(pte) & _PAGE_UFFD_WP;
-
-#ifdef CONFIG_DEBUG_VM
- /*
- * Having write bit for wr-protect-marked present ptes is fatal,
- * because it means the uffd-wp bit will be ignored and write will
- * just go through.
- *
- * Use any chance of pgtable walking to verify this (e.g., when
- * page swapped out or being migrated for all purposes). It means
- * something is already wrong. Tell the admin even before the
- * process crashes. We also nail it with wrong pgtable setup.
- */
- WARN_ON_ONCE(wp && pte_write(pte));
-#endif
-
- return wp;
+ return pte_flags(pte) & _PAGE_UFFD_WP;
}
static inline pte_t pte_mkuffd_wp(pte_t pte)
@@ -1200,7 +1185,6 @@ static inline int pgd_none(pgd_t pgd)
extern int direct_gbpages;
void init_mem_mapping(void);
void early_alloc_pgt_buf(void);
-extern void memblock_find_dma_reserve(void);
void __init poking_init(void);
unsigned long init_memory_mapping(unsigned long start,
unsigned long end, pgprot_t prot);
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 7e9db77231ac7f..3c4407271d0838 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -245,6 +245,7 @@ extern void cleanup_highmap(void);
#define HAVE_ARCH_UNMAPPED_AREA
#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
+#define HAVE_ARCH_UNMAPPED_AREA_VMFLAGS
#define PAGE_AGP PAGE_KERNEL_NOCACHE
#define HAVE_PAGE_AGP 1
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 0b748ee16b3d94..9abb8cc4cd4747 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -148,7 +148,7 @@
#define _COMMON_PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \
_PAGE_SPECIAL | _PAGE_ACCESSED | \
_PAGE_DIRTY_BITS | _PAGE_SOFT_DIRTY | \
- _PAGE_DEVMAP | _PAGE_ENC | _PAGE_UFFD_WP)
+ _PAGE_DEVMAP | _PAGE_CC | _PAGE_UFFD_WP)
#define _PAGE_CHG_MASK (_COMMON_PAGE_CHG_MASK | _PAGE_PAT)
#define _HPAGE_CHG_MASK (_COMMON_PAGE_CHG_MASK | _PAGE_PSE | _PAGE_PAT_LARGE)
@@ -173,6 +173,7 @@ enum page_cache_mode {
};
#endif
+#define _PAGE_CC (_AT(pteval_t, cc_mask))
#define _PAGE_ENC (_AT(pteval_t, sme_me_mask))
#define _PAGE_CACHE_MASK (_PAGE_PWT | _PAGE_PCD | _PAGE_PAT)
diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h
index 7ba1726b71c7b8..e9187ddd3d1fdc 100644
--- a/arch/x86/include/asm/required-features.h
+++ b/arch/x86/include/asm/required-features.h
@@ -99,6 +99,7 @@
#define REQUIRED_MASK18 0
#define REQUIRED_MASK19 0
#define REQUIRED_MASK20 0
-#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21)
+#define REQUIRED_MASK21 0
+#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 22)
#endif /* _ASM_X86_REQUIRED_FEATURES_H */
diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
index 9477b4053bce2c..7f57382afee417 100644
--- a/arch/x86/include/asm/sev.h
+++ b/arch/x86/include/asm/sev.h
@@ -218,17 +218,16 @@ void early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr,
unsigned long npages);
void early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr,
unsigned long npages);
-void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op);
void snp_set_memory_shared(unsigned long vaddr, unsigned long npages);
void snp_set_memory_private(unsigned long vaddr, unsigned long npages);
void snp_set_wakeup_secondary_cpu(void);
bool snp_init(struct boot_params *bp);
void __noreturn snp_abort(void);
+void snp_dmi_setup(void);
int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio);
void snp_accept_memory(phys_addr_t start, phys_addr_t end);
u64 snp_get_unsupported_features(u64 status);
u64 sev_get_status(void);
-void kdump_sev_callback(void);
void sev_show_status(void);
#else
static inline void sev_es_ist_enter(struct pt_regs *regs) { }
@@ -244,12 +243,12 @@ static inline void __init
early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, unsigned long npages) { }
static inline void __init
early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, unsigned long npages) { }
-static inline void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op) { }
static inline void snp_set_memory_shared(unsigned long vaddr, unsigned long npages) { }
static inline void snp_set_memory_private(unsigned long vaddr, unsigned long npages) { }
static inline void snp_set_wakeup_secondary_cpu(void) { }
static inline bool snp_init(struct boot_params *bp) { return false; }
static inline void snp_abort(void) { }
+static inline void snp_dmi_setup(void) { }
static inline int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio)
{
return -ENOTTY;
@@ -258,7 +257,6 @@ static inline int snp_issue_guest_request(u64 exit_code, struct snp_req_data *in
static inline void snp_accept_memory(phys_addr_t start, phys_addr_t end) { }
static inline u64 snp_get_unsupported_features(u64 status) { return 0; }
static inline u64 sev_get_status(void) { return 0; }
-static inline void kdump_sev_callback(void) { }
static inline void sev_show_status(void) { }
#endif
@@ -270,6 +268,7 @@ int psmash(u64 pfn);
int rmp_make_private(u64 pfn, u64 gpa, enum pg_level level, u32 asid, bool immutable);
int rmp_make_shared(u64 pfn, enum pg_level level);
void snp_leak_pages(u64 pfn, unsigned int npages);
+void kdump_sev_callback(void);
#else
static inline bool snp_probe_rmptable_info(void) { return false; }
static inline int snp_lookup_rmpentry(u64 pfn, bool *assigned, int *level) { return -ENODEV; }
@@ -282,6 +281,7 @@ static inline int rmp_make_private(u64 pfn, u64 gpa, enum pg_level level, u32 as
}
static inline int rmp_make_shared(u64 pfn, enum pg_level level) { return -ENODEV; }
static inline void snp_leak_pages(u64 pfn, unsigned int npages) {}
+static inline void kdump_sev_callback(void) { }
#endif
#endif
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index f44e2f9ab65d77..2fc7bc3863ff6f 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -16,19 +16,17 @@
#include <asm/thread_info.h> /* for TS_COMPAT */
#include <asm/unistd.h>
+/* This is used purely for kernel/trace/trace_syscalls.c */
typedef long (*sys_call_ptr_t)(const struct pt_regs *);
extern const sys_call_ptr_t sys_call_table[];
-#if defined(CONFIG_X86_32)
-#define ia32_sys_call_table sys_call_table
-#else
/*
* These may not exist, but still put the prototypes in so we
* can use IS_ENABLED().
*/
-extern const sys_call_ptr_t ia32_sys_call_table[];
-extern const sys_call_ptr_t x32_sys_call_table[];
-#endif
+extern long ia32_sys_call(const struct pt_regs *, unsigned int nr);
+extern long x32_sys_call(const struct pt_regs *, unsigned int nr);
+extern long x64_sys_call(const struct pt_regs *, unsigned int nr);
/*
* Only the low 32 bits of orig_ax are meaningful, so we return int.
@@ -127,6 +125,7 @@ static inline int syscall_get_arch(struct task_struct *task)
}
bool do_syscall_64(struct pt_regs *regs, int nr);
+void do_int80_emulation(struct pt_regs *regs);
#endif /* CONFIG_X86_32 */
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index b89b40f250e6f5..6149eabe200f5b 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -30,12 +30,13 @@ struct x86_init_mpparse {
* @reserve_resources: reserve the standard resources for the
* platform
* @memory_setup: platform specific memory setup
- *
+ * @dmi_setup: platform specific DMI setup
*/
struct x86_init_resources {
void (*probe_roms)(void);
void (*reserve_resources)(void);
char *(*memory_setup)(void);
+ void (*dmi_setup)(void);
};
/**
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index ad29984d5e398d..ef11aa4cab4253 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -694,6 +694,7 @@ enum sev_cmd_id {
struct kvm_sev_cmd {
__u32 id;
+ __u32 pad0;
__u64 data;
__u32 error;
__u32 sev_fd;
@@ -704,28 +705,35 @@ struct kvm_sev_launch_start {
__u32 policy;
__u64 dh_uaddr;
__u32 dh_len;
+ __u32 pad0;
__u64 session_uaddr;
__u32 session_len;
+ __u32 pad1;
};
struct kvm_sev_launch_update_data {
__u64 uaddr;
__u32 len;
+ __u32 pad0;
};
struct kvm_sev_launch_secret {
__u64 hdr_uaddr;
__u32 hdr_len;
+ __u32 pad0;
__u64 guest_uaddr;
__u32 guest_len;
+ __u32 pad1;
__u64 trans_uaddr;
__u32 trans_len;
+ __u32 pad2;
};
struct kvm_sev_launch_measure {
__u64 uaddr;
__u32 len;
+ __u32 pad0;
};
struct kvm_sev_guest_status {
@@ -738,33 +746,43 @@ struct kvm_sev_dbg {
__u64 src_uaddr;
__u64 dst_uaddr;
__u32 len;
+ __u32 pad0;
};
struct kvm_sev_attestation_report {
__u8 mnonce[16];
__u64 uaddr;
__u32 len;
+ __u32 pad0;
};
struct kvm_sev_send_start {
__u32 policy;
+ __u32 pad0;
__u64 pdh_cert_uaddr;
__u32 pdh_cert_len;
+ __u32 pad1;
__u64 plat_certs_uaddr;
__u32 plat_certs_len;
+ __u32 pad2;
__u64 amd_certs_uaddr;
__u32 amd_certs_len;
+ __u32 pad3;
__u64 session_uaddr;
__u32 session_len;
+ __u32 pad4;
};
struct kvm_sev_send_update_data {
__u64 hdr_uaddr;
__u32 hdr_len;
+ __u32 pad0;
__u64 guest_uaddr;
__u32 guest_len;
+ __u32 pad1;
__u64 trans_uaddr;
__u32 trans_len;
+ __u32 pad2;
};
struct kvm_sev_receive_start {
@@ -772,17 +790,22 @@ struct kvm_sev_receive_start {
__u32 policy;
__u64 pdh_uaddr;
__u32 pdh_len;
+ __u32 pad0;
__u64 session_uaddr;
__u32 session_len;
+ __u32 pad1;
};
struct kvm_sev_receive_update_data {
__u64 hdr_uaddr;
__u32 hdr_len;
+ __u32 pad0;
__u64 guest_uaddr;
__u32 guest_len;
+ __u32 pad1;
__u64 trans_uaddr;
__u32 trans_len;
+ __u32 pad2;
};
#define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0)
diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
index 6bc3456a8ebf1d..a1efa7907a0b10 100644
--- a/arch/x86/include/uapi/asm/kvm_para.h
+++ b/arch/x86/include/uapi/asm/kvm_para.h
@@ -142,7 +142,6 @@ struct kvm_vcpu_pv_apf_data {
__u32 token;
__u8 pad[56];
- __u32 enabled;
};
#define KVM_PV_EOI_BIT 0
diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c
index 2ae98f754e5918..c884deca839b2e 100644
--- a/arch/x86/kernel/amd_gart_64.c
+++ b/arch/x86/kernel/amd_gart_64.c
@@ -676,7 +676,7 @@ static const struct dma_map_ops gart_dma_ops = {
.get_sgtable = dma_common_get_sgtable,
.dma_supported = dma_direct_supported,
.get_required_mask = dma_direct_get_required_mask,
- .alloc_pages = dma_direct_alloc_pages,
+ .alloc_pages_op = dma_direct_alloc_pages,
.free_pages = dma_direct_free_pages,
};
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index a42d8a6f714958..c342c4aa9c6848 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1687,11 +1687,11 @@ static int x2apic_state;
static bool x2apic_hw_locked(void)
{
- u64 ia32_cap;
+ u64 x86_arch_cap_msr;
u64 msr;
- ia32_cap = x86_read_arch_cap_msr();
- if (ia32_cap & ARCH_CAP_XAPIC_DISABLE) {
+ x86_arch_cap_msr = x86_read_arch_cap_msr();
+ if (x86_arch_cap_msr & ARCH_CAP_XAPIC_DISABLE) {
rdmsrl(MSR_IA32_XAPIC_DISABLE_STATUS, msr);
return (msr & LEGACY_XAPIC_DISABLED);
}
diff --git a/arch/x86/kernel/callthunks.c b/arch/x86/kernel/callthunks.c
index 30335182b6b0ae..e92ff0c11db814 100644
--- a/arch/x86/kernel/callthunks.c
+++ b/arch/x86/kernel/callthunks.c
@@ -314,7 +314,7 @@ static bool is_callthunk(void *addr)
return !bcmp(pad, insn_buff, tmpl_size);
}
-int x86_call_depth_emit_accounting(u8 **pprog, void *func)
+int x86_call_depth_emit_accounting(u8 **pprog, void *func, void *ip)
{
unsigned int tmpl_size = SKL_TMPL_SIZE;
u8 insn_buff[MAX_PATCH_LEN];
@@ -327,7 +327,7 @@ int x86_call_depth_emit_accounting(u8 **pprog, void *func)
return 0;
memcpy(insn_buff, skl_call_thunk_template, tmpl_size);
- apply_relocation(insn_buff, tmpl_size, *pprog,
+ apply_relocation(insn_buff, tmpl_size, ip,
skl_call_thunk_template, tmpl_size);
memcpy(*pprog, insn_buff, tmpl_size);
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 6d8677e80ddbb1..307302af0aeee2 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -345,6 +345,28 @@ static void srat_detect_node(struct cpuinfo_x86 *c)
#endif
}
+static void bsp_determine_snp(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_ARCH_HAS_CC_PLATFORM
+ cc_vendor = CC_VENDOR_AMD;
+
+ if (cpu_has(c, X86_FEATURE_SEV_SNP)) {
+ /*
+ * RMP table entry format is not architectural and is defined by the
+ * per-processor PPR. Restrict SNP support on the known CPU models
+ * for which the RMP table entry format is currently defined for.
+ */
+ if (!cpu_has(c, X86_FEATURE_HYPERVISOR) &&
+ c->x86 >= 0x19 && snp_probe_rmptable_info()) {
+ cc_platform_set(CC_ATTR_HOST_SEV_SNP);
+ } else {
+ setup_clear_cpu_cap(X86_FEATURE_SEV_SNP);
+ cc_platform_clear(CC_ATTR_HOST_SEV_SNP);
+ }
+ }
+#endif
+}
+
static void bsp_init_amd(struct cpuinfo_x86 *c)
{
if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) {
@@ -437,8 +459,7 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
case 0x1a:
switch (c->x86_model) {
- case 0x00 ... 0x0f:
- case 0x20 ... 0x2f:
+ case 0x00 ... 0x2f:
case 0x40 ... 0x4f:
case 0x70 ... 0x7f:
setup_force_cpu_cap(X86_FEATURE_ZEN5);
@@ -452,21 +473,7 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
break;
}
- if (cpu_has(c, X86_FEATURE_SEV_SNP)) {
- /*
- * RMP table entry format is not architectural and it can vary by processor
- * and is defined by the per-processor PPR. Restrict SNP support on the
- * known CPU model and family for which the RMP table entry format is
- * currently defined for.
- */
- if (!boot_cpu_has(X86_FEATURE_ZEN3) &&
- !boot_cpu_has(X86_FEATURE_ZEN4) &&
- !boot_cpu_has(X86_FEATURE_ZEN5))
- setup_clear_cpu_cap(X86_FEATURE_SEV_SNP);
- else if (!snp_probe_rmptable_info())
- setup_clear_cpu_cap(X86_FEATURE_SEV_SNP);
- }
-
+ bsp_determine_snp(c);
return;
warn:
@@ -527,7 +534,6 @@ clear_sev:
static void early_init_amd(struct cpuinfo_x86 *c)
{
- u64 value;
u32 dummy;
if (c->x86 >= 0xf)
@@ -595,20 +601,6 @@ static void early_init_amd(struct cpuinfo_x86 *c)
early_detect_mem_encrypt(c);
- /* Re-enable TopologyExtensions if switched off by BIOS */
- if (c->x86 == 0x15 &&
- (c->x86_model >= 0x10 && c->x86_model <= 0x6f) &&
- !cpu_has(c, X86_FEATURE_TOPOEXT)) {
-
- if (msr_set_bit(0xc0011005, 54) > 0) {
- rdmsrl(0xc0011005, value);
- if (value & BIT_64(54)) {
- set_cpu_cap(c, X86_FEATURE_TOPOEXT);
- pr_info_once(FW_INFO "CPU: Re-enabling disabled Topology Extensions Support.\n");
- }
- }
- }
-
if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && !cpu_has(c, X86_FEATURE_IBPB_BRTYPE)) {
if (c->x86 == 0x17 && boot_cpu_has(X86_FEATURE_AMD_IBPB))
setup_force_cpu_cap(X86_FEATURE_IBPB_BRTYPE);
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index e7ba936d798b81..ab18185894dfd5 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -61,6 +61,8 @@ EXPORT_PER_CPU_SYMBOL_GPL(x86_spec_ctrl_current);
u64 x86_pred_cmd __ro_after_init = PRED_CMD_IBPB;
EXPORT_SYMBOL_GPL(x86_pred_cmd);
+static u64 __ro_after_init x86_arch_cap_msr;
+
static DEFINE_MUTEX(spec_ctrl_mutex);
void (*x86_return_thunk)(void) __ro_after_init = __x86_return_thunk;
@@ -144,6 +146,8 @@ void __init cpu_select_mitigations(void)
x86_spec_ctrl_base &= ~SPEC_CTRL_MITIGATIONS_MASK;
}
+ x86_arch_cap_msr = x86_read_arch_cap_msr();
+
/* Select the proper CPU mitigations before patching alternatives: */
spectre_v1_select_mitigation();
spectre_v2_select_mitigation();
@@ -301,8 +305,6 @@ static const char * const taa_strings[] = {
static void __init taa_select_mitigation(void)
{
- u64 ia32_cap;
-
if (!boot_cpu_has_bug(X86_BUG_TAA)) {
taa_mitigation = TAA_MITIGATION_OFF;
return;
@@ -341,9 +343,8 @@ static void __init taa_select_mitigation(void)
* On MDS_NO=1 CPUs if ARCH_CAP_TSX_CTRL_MSR is not set, microcode
* update is required.
*/
- ia32_cap = x86_read_arch_cap_msr();
- if ( (ia32_cap & ARCH_CAP_MDS_NO) &&
- !(ia32_cap & ARCH_CAP_TSX_CTRL_MSR))
+ if ( (x86_arch_cap_msr & ARCH_CAP_MDS_NO) &&
+ !(x86_arch_cap_msr & ARCH_CAP_TSX_CTRL_MSR))
taa_mitigation = TAA_MITIGATION_UCODE_NEEDED;
/*
@@ -401,8 +402,6 @@ static const char * const mmio_strings[] = {
static void __init mmio_select_mitigation(void)
{
- u64 ia32_cap;
-
if (!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA) ||
boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN) ||
cpu_mitigations_off()) {
@@ -413,8 +412,6 @@ static void __init mmio_select_mitigation(void)
if (mmio_mitigation == MMIO_MITIGATION_OFF)
return;
- ia32_cap = x86_read_arch_cap_msr();
-
/*
* Enable CPU buffer clear mitigation for host and VMM, if also affected
* by MDS or TAA. Otherwise, enable mitigation for VMM only.
@@ -437,7 +434,7 @@ static void __init mmio_select_mitigation(void)
* be propagated to uncore buffers, clearing the Fill buffers on idle
* is required irrespective of SMT state.
*/
- if (!(ia32_cap & ARCH_CAP_FBSDP_NO))
+ if (!(x86_arch_cap_msr & ARCH_CAP_FBSDP_NO))
static_branch_enable(&mds_idle_clear);
/*
@@ -447,10 +444,10 @@ static void __init mmio_select_mitigation(void)
* FB_CLEAR or by the presence of both MD_CLEAR and L1D_FLUSH on MDS
* affected systems.
*/
- if ((ia32_cap & ARCH_CAP_FB_CLEAR) ||
+ if ((x86_arch_cap_msr & ARCH_CAP_FB_CLEAR) ||
(boot_cpu_has(X86_FEATURE_MD_CLEAR) &&
boot_cpu_has(X86_FEATURE_FLUSH_L1D) &&
- !(ia32_cap & ARCH_CAP_MDS_NO)))
+ !(x86_arch_cap_msr & ARCH_CAP_MDS_NO)))
mmio_mitigation = MMIO_MITIGATION_VERW;
else
mmio_mitigation = MMIO_MITIGATION_UCODE_NEEDED;
@@ -508,7 +505,7 @@ static void __init rfds_select_mitigation(void)
if (rfds_mitigation == RFDS_MITIGATION_OFF)
return;
- if (x86_read_arch_cap_msr() & ARCH_CAP_RFDS_CLEAR)
+ if (x86_arch_cap_msr & ARCH_CAP_RFDS_CLEAR)
setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF);
else
rfds_mitigation = RFDS_MITIGATION_UCODE_NEEDED;
@@ -659,8 +656,6 @@ void update_srbds_msr(void)
static void __init srbds_select_mitigation(void)
{
- u64 ia32_cap;
-
if (!boot_cpu_has_bug(X86_BUG_SRBDS))
return;
@@ -669,8 +664,7 @@ static void __init srbds_select_mitigation(void)
* are only exposed to SRBDS when TSX is enabled or when CPU is affected
* by Processor MMIO Stale Data vulnerability.
*/
- ia32_cap = x86_read_arch_cap_msr();
- if ((ia32_cap & ARCH_CAP_MDS_NO) && !boot_cpu_has(X86_FEATURE_RTM) &&
+ if ((x86_arch_cap_msr & ARCH_CAP_MDS_NO) && !boot_cpu_has(X86_FEATURE_RTM) &&
!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA))
srbds_mitigation = SRBDS_MITIGATION_TSX_OFF;
else if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
@@ -813,7 +807,7 @@ static void __init gds_select_mitigation(void)
/* Will verify below that mitigation _can_ be disabled */
/* No microcode */
- if (!(x86_read_arch_cap_msr() & ARCH_CAP_GDS_CTRL)) {
+ if (!(x86_arch_cap_msr & ARCH_CAP_GDS_CTRL)) {
if (gds_mitigation == GDS_MITIGATION_FORCE) {
/*
* This only needs to be done on the boot CPU so do it
@@ -1544,20 +1538,25 @@ static enum spectre_v2_mitigation __init spectre_v2_select_retpoline(void)
return SPECTRE_V2_RETPOLINE;
}
+static bool __ro_after_init rrsba_disabled;
+
/* Disable in-kernel use of non-RSB RET predictors */
static void __init spec_ctrl_disable_kernel_rrsba(void)
{
- u64 ia32_cap;
+ if (rrsba_disabled)
+ return;
- if (!boot_cpu_has(X86_FEATURE_RRSBA_CTRL))
+ if (!(x86_arch_cap_msr & ARCH_CAP_RRSBA)) {
+ rrsba_disabled = true;
return;
+ }
- ia32_cap = x86_read_arch_cap_msr();
+ if (!boot_cpu_has(X86_FEATURE_RRSBA_CTRL))
+ return;
- if (ia32_cap & ARCH_CAP_RRSBA) {
- x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S;
- update_spec_ctrl(x86_spec_ctrl_base);
- }
+ x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S;
+ update_spec_ctrl(x86_spec_ctrl_base);
+ rrsba_disabled = true;
}
static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_mitigation mode)
@@ -1607,6 +1606,74 @@ static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_
dump_stack();
}
+/*
+ * Set BHI_DIS_S to prevent indirect branches in kernel to be influenced by
+ * branch history in userspace. Not needed if BHI_NO is set.
+ */
+static bool __init spec_ctrl_bhi_dis(void)
+{
+ if (!boot_cpu_has(X86_FEATURE_BHI_CTRL))
+ return false;
+
+ x86_spec_ctrl_base |= SPEC_CTRL_BHI_DIS_S;
+ update_spec_ctrl(x86_spec_ctrl_base);
+ setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_HW);
+
+ return true;
+}
+
+enum bhi_mitigations {
+ BHI_MITIGATION_OFF,
+ BHI_MITIGATION_ON,
+};
+
+static enum bhi_mitigations bhi_mitigation __ro_after_init =
+ IS_ENABLED(CONFIG_MITIGATION_SPECTRE_BHI) ? BHI_MITIGATION_ON : BHI_MITIGATION_OFF;
+
+static int __init spectre_bhi_parse_cmdline(char *str)
+{
+ if (!str)
+ return -EINVAL;
+
+ if (!strcmp(str, "off"))
+ bhi_mitigation = BHI_MITIGATION_OFF;
+ else if (!strcmp(str, "on"))
+ bhi_mitigation = BHI_MITIGATION_ON;
+ else
+ pr_err("Ignoring unknown spectre_bhi option (%s)", str);
+
+ return 0;
+}
+early_param("spectre_bhi", spectre_bhi_parse_cmdline);
+
+static void __init bhi_select_mitigation(void)
+{
+ if (bhi_mitigation == BHI_MITIGATION_OFF)
+ return;
+
+ /* Retpoline mitigates against BHI unless the CPU has RRSBA behavior */
+ if (boot_cpu_has(X86_FEATURE_RETPOLINE) &&
+ !boot_cpu_has(X86_FEATURE_RETPOLINE_LFENCE)) {
+ spec_ctrl_disable_kernel_rrsba();
+ if (rrsba_disabled)
+ return;
+ }
+
+ if (spec_ctrl_bhi_dis())
+ return;
+
+ if (!IS_ENABLED(CONFIG_X86_64))
+ return;
+
+ /* Mitigate KVM by default */
+ setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT);
+ pr_info("Spectre BHI mitigation: SW BHB clearing on vm exit\n");
+
+ /* Mitigate syscalls when the mitigation is forced =on */
+ setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP);
+ pr_info("Spectre BHI mitigation: SW BHB clearing on syscall\n");
+}
+
static void __init spectre_v2_select_mitigation(void)
{
enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
@@ -1718,6 +1785,9 @@ static void __init spectre_v2_select_mitigation(void)
mode == SPECTRE_V2_RETPOLINE)
spec_ctrl_disable_kernel_rrsba();
+ if (boot_cpu_has(X86_BUG_BHI))
+ bhi_select_mitigation();
+
spectre_v2_enabled = mode;
pr_info("%s\n", spectre_v2_strings[mode]);
@@ -1832,8 +1902,6 @@ static void update_indir_branch_cond(void)
/* Update the static key controlling the MDS CPU buffer clear in idle */
static void update_mds_branch_idle(void)
{
- u64 ia32_cap = x86_read_arch_cap_msr();
-
/*
* Enable the idle clearing if SMT is active on CPUs which are
* affected only by MSBDS and not any other MDS variant.
@@ -1848,7 +1916,7 @@ static void update_mds_branch_idle(void)
if (sched_smt_active()) {
static_branch_enable(&mds_idle_clear);
} else if (mmio_mitigation == MMIO_MITIGATION_OFF ||
- (ia32_cap & ARCH_CAP_FBSDP_NO)) {
+ (x86_arch_cap_msr & ARCH_CAP_FBSDP_NO)) {
static_branch_disable(&mds_idle_clear);
}
}
@@ -2695,15 +2763,15 @@ static char *stibp_state(void)
switch (spectre_v2_user_stibp) {
case SPECTRE_V2_USER_NONE:
- return ", STIBP: disabled";
+ return "; STIBP: disabled";
case SPECTRE_V2_USER_STRICT:
- return ", STIBP: forced";
+ return "; STIBP: forced";
case SPECTRE_V2_USER_STRICT_PREFERRED:
- return ", STIBP: always-on";
+ return "; STIBP: always-on";
case SPECTRE_V2_USER_PRCTL:
case SPECTRE_V2_USER_SECCOMP:
if (static_key_enabled(&switch_to_cond_stibp))
- return ", STIBP: conditional";
+ return "; STIBP: conditional";
}
return "";
}
@@ -2712,10 +2780,10 @@ static char *ibpb_state(void)
{
if (boot_cpu_has(X86_FEATURE_IBPB)) {
if (static_key_enabled(&switch_mm_always_ibpb))
- return ", IBPB: always-on";
+ return "; IBPB: always-on";
if (static_key_enabled(&switch_mm_cond_ibpb))
- return ", IBPB: conditional";
- return ", IBPB: disabled";
+ return "; IBPB: conditional";
+ return "; IBPB: disabled";
}
return "";
}
@@ -2725,14 +2793,32 @@ static char *pbrsb_eibrs_state(void)
if (boot_cpu_has_bug(X86_BUG_EIBRS_PBRSB)) {
if (boot_cpu_has(X86_FEATURE_RSB_VMEXIT_LITE) ||
boot_cpu_has(X86_FEATURE_RSB_VMEXIT))
- return ", PBRSB-eIBRS: SW sequence";
+ return "; PBRSB-eIBRS: SW sequence";
else
- return ", PBRSB-eIBRS: Vulnerable";
+ return "; PBRSB-eIBRS: Vulnerable";
} else {
- return ", PBRSB-eIBRS: Not affected";
+ return "; PBRSB-eIBRS: Not affected";
}
}
+static const char *spectre_bhi_state(void)
+{
+ if (!boot_cpu_has_bug(X86_BUG_BHI))
+ return "; BHI: Not affected";
+ else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_HW))
+ return "; BHI: BHI_DIS_S";
+ else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP))
+ return "; BHI: SW loop, KVM: SW loop";
+ else if (boot_cpu_has(X86_FEATURE_RETPOLINE) &&
+ !boot_cpu_has(X86_FEATURE_RETPOLINE_LFENCE) &&
+ rrsba_disabled)
+ return "; BHI: Retpoline";
+ else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT))
+ return "; BHI: Vulnerable, KVM: SW loop";
+
+ return "; BHI: Vulnerable";
+}
+
static ssize_t spectre_v2_show_state(char *buf)
{
if (spectre_v2_enabled == SPECTRE_V2_LFENCE)
@@ -2745,13 +2831,15 @@ static ssize_t spectre_v2_show_state(char *buf)
spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE)
return sysfs_emit(buf, "Vulnerable: eIBRS+LFENCE with unprivileged eBPF and SMT\n");
- return sysfs_emit(buf, "%s%s%s%s%s%s%s\n",
+ return sysfs_emit(buf, "%s%s%s%s%s%s%s%s\n",
spectre_v2_strings[spectre_v2_enabled],
ibpb_state(),
- boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
+ boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? "; IBRS_FW" : "",
stibp_state(),
- boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "",
+ boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? "; RSB filling" : "",
pbrsb_eibrs_state(),
+ spectre_bhi_state(),
+ /* this should always be at the end */
spectre_v2_module_string());
}
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 5c1e6d6be267af..605c26c009c8ac 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1120,6 +1120,7 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
#define NO_SPECTRE_V2 BIT(8)
#define NO_MMIO BIT(9)
#define NO_EIBRS_PBRSB BIT(10)
+#define NO_BHI BIT(11)
#define VULNWL(vendor, family, model, whitelist) \
X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, whitelist)
@@ -1182,18 +1183,18 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
VULNWL_INTEL(ATOM_TREMONT_D, NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB),
/* AMD Family 0xf - 0x12 */
- VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
- VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
- VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
- VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
+ VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI),
+ VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI),
+ VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI),
+ VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI),
/* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */
- VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB),
- VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB),
+ VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB | NO_BHI),
+ VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB | NO_BHI),
/* Zhaoxin Family 7 */
- VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO),
- VULNWL(ZHAOXIN, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO),
+ VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO | NO_BHI),
+ VULNWL(ZHAOXIN, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO | NO_BHI),
{}
};
@@ -1283,25 +1284,25 @@ static bool __init cpu_matches(const struct x86_cpu_id *table, unsigned long whi
u64 x86_read_arch_cap_msr(void)
{
- u64 ia32_cap = 0;
+ u64 x86_arch_cap_msr = 0;
if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
- rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap);
+ rdmsrl(MSR_IA32_ARCH_CAPABILITIES, x86_arch_cap_msr);
- return ia32_cap;
+ return x86_arch_cap_msr;
}
-static bool arch_cap_mmio_immune(u64 ia32_cap)
+static bool arch_cap_mmio_immune(u64 x86_arch_cap_msr)
{
- return (ia32_cap & ARCH_CAP_FBSDP_NO &&
- ia32_cap & ARCH_CAP_PSDP_NO &&
- ia32_cap & ARCH_CAP_SBDR_SSDP_NO);
+ return (x86_arch_cap_msr & ARCH_CAP_FBSDP_NO &&
+ x86_arch_cap_msr & ARCH_CAP_PSDP_NO &&
+ x86_arch_cap_msr & ARCH_CAP_SBDR_SSDP_NO);
}
-static bool __init vulnerable_to_rfds(u64 ia32_cap)
+static bool __init vulnerable_to_rfds(u64 x86_arch_cap_msr)
{
/* The "immunity" bit trumps everything else: */
- if (ia32_cap & ARCH_CAP_RFDS_NO)
+ if (x86_arch_cap_msr & ARCH_CAP_RFDS_NO)
return false;
/*
@@ -1309,7 +1310,7 @@ static bool __init vulnerable_to_rfds(u64 ia32_cap)
* indicate that mitigation is needed because guest is running on a
* vulnerable hardware or may migrate to such hardware:
*/
- if (ia32_cap & ARCH_CAP_RFDS_CLEAR)
+ if (x86_arch_cap_msr & ARCH_CAP_RFDS_CLEAR)
return true;
/* Only consult the blacklist when there is no enumeration: */
@@ -1318,11 +1319,11 @@ static bool __init vulnerable_to_rfds(u64 ia32_cap)
static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
{
- u64 ia32_cap = x86_read_arch_cap_msr();
+ u64 x86_arch_cap_msr = x86_read_arch_cap_msr();
/* Set ITLB_MULTIHIT bug if cpu is not in the whitelist and not mitigated */
if (!cpu_matches(cpu_vuln_whitelist, NO_ITLB_MULTIHIT) &&
- !(ia32_cap & ARCH_CAP_PSCHANGE_MC_NO))
+ !(x86_arch_cap_msr & ARCH_CAP_PSCHANGE_MC_NO))
setup_force_cpu_bug(X86_BUG_ITLB_MULTIHIT);
if (cpu_matches(cpu_vuln_whitelist, NO_SPECULATION))
@@ -1334,7 +1335,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
if (!cpu_matches(cpu_vuln_whitelist, NO_SSB) &&
- !(ia32_cap & ARCH_CAP_SSB_NO) &&
+ !(x86_arch_cap_msr & ARCH_CAP_SSB_NO) &&
!cpu_has(c, X86_FEATURE_AMD_SSB_NO))
setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS);
@@ -1345,17 +1346,17 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
* Don't use AutoIBRS when SNP is enabled because it degrades host
* userspace indirect branch performance.
*/
- if ((ia32_cap & ARCH_CAP_IBRS_ALL) ||
+ if ((x86_arch_cap_msr & ARCH_CAP_IBRS_ALL) ||
(cpu_has(c, X86_FEATURE_AUTOIBRS) &&
!cpu_feature_enabled(X86_FEATURE_SEV_SNP))) {
setup_force_cpu_cap(X86_FEATURE_IBRS_ENHANCED);
if (!cpu_matches(cpu_vuln_whitelist, NO_EIBRS_PBRSB) &&
- !(ia32_cap & ARCH_CAP_PBRSB_NO))
+ !(x86_arch_cap_msr & ARCH_CAP_PBRSB_NO))
setup_force_cpu_bug(X86_BUG_EIBRS_PBRSB);
}
if (!cpu_matches(cpu_vuln_whitelist, NO_MDS) &&
- !(ia32_cap & ARCH_CAP_MDS_NO)) {
+ !(x86_arch_cap_msr & ARCH_CAP_MDS_NO)) {
setup_force_cpu_bug(X86_BUG_MDS);
if (cpu_matches(cpu_vuln_whitelist, MSBDS_ONLY))
setup_force_cpu_bug(X86_BUG_MSBDS_ONLY);
@@ -1374,9 +1375,9 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
* TSX_CTRL check alone is not sufficient for cases when the microcode
* update is not present or running as guest that don't get TSX_CTRL.
*/
- if (!(ia32_cap & ARCH_CAP_TAA_NO) &&
+ if (!(x86_arch_cap_msr & ARCH_CAP_TAA_NO) &&
(cpu_has(c, X86_FEATURE_RTM) ||
- (ia32_cap & ARCH_CAP_TSX_CTRL_MSR)))
+ (x86_arch_cap_msr & ARCH_CAP_TSX_CTRL_MSR)))
setup_force_cpu_bug(X86_BUG_TAA);
/*
@@ -1402,7 +1403,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
* Set X86_BUG_MMIO_UNKNOWN for CPUs that are neither in the blacklist,
* nor in the whitelist and also don't enumerate MSR ARCH_CAP MMIO bits.
*/
- if (!arch_cap_mmio_immune(ia32_cap)) {
+ if (!arch_cap_mmio_immune(x86_arch_cap_msr)) {
if (cpu_matches(cpu_vuln_blacklist, MMIO))
setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA);
else if (!cpu_matches(cpu_vuln_whitelist, NO_MMIO))
@@ -1410,7 +1411,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
}
if (!cpu_has(c, X86_FEATURE_BTC_NO)) {
- if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA))
+ if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (x86_arch_cap_msr & ARCH_CAP_RSBA))
setup_force_cpu_bug(X86_BUG_RETBLEED);
}
@@ -1428,18 +1429,25 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
* disabling AVX2. The only way to do this in HW is to clear XCR0[2],
* which means that AVX will be disabled.
*/
- if (cpu_matches(cpu_vuln_blacklist, GDS) && !(ia32_cap & ARCH_CAP_GDS_NO) &&
+ if (cpu_matches(cpu_vuln_blacklist, GDS) && !(x86_arch_cap_msr & ARCH_CAP_GDS_NO) &&
boot_cpu_has(X86_FEATURE_AVX))
setup_force_cpu_bug(X86_BUG_GDS);
- if (vulnerable_to_rfds(ia32_cap))
+ if (vulnerable_to_rfds(x86_arch_cap_msr))
setup_force_cpu_bug(X86_BUG_RFDS);
+ /* When virtualized, eIBRS could be hidden, assume vulnerable */
+ if (!(x86_arch_cap_msr & ARCH_CAP_BHI_NO) &&
+ !cpu_matches(cpu_vuln_whitelist, NO_BHI) &&
+ (boot_cpu_has(X86_FEATURE_IBRS_ENHANCED) ||
+ boot_cpu_has(X86_FEATURE_HYPERVISOR)))
+ setup_force_cpu_bug(X86_BUG_BHI);
+
if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))
return;
/* Rogue Data Cache Load? No! */
- if (ia32_cap & ARCH_CAP_RDCL_NO)
+ if (x86_arch_cap_msr & ARCH_CAP_RDCL_NO)
return;
setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c
index b7174209d855c6..946813d816bfc2 100644
--- a/arch/x86/kernel/cpu/cpuid-deps.c
+++ b/arch/x86/kernel/cpu/cpuid-deps.c
@@ -44,7 +44,10 @@ static const struct cpuid_dep cpuid_deps[] = {
{ X86_FEATURE_F16C, X86_FEATURE_XMM2, },
{ X86_FEATURE_AES, X86_FEATURE_XMM2 },
{ X86_FEATURE_SHA_NI, X86_FEATURE_XMM2 },
+ { X86_FEATURE_GFNI, X86_FEATURE_XMM2 },
{ X86_FEATURE_FMA, X86_FEATURE_AVX },
+ { X86_FEATURE_VAES, X86_FEATURE_AVX },
+ { X86_FEATURE_VPCLMULQDQ, X86_FEATURE_AVX },
{ X86_FEATURE_AVX2, X86_FEATURE_AVX, },
{ X86_FEATURE_AVX512F, X86_FEATURE_AVX, },
{ X86_FEATURE_AVX512IFMA, X86_FEATURE_AVX512F },
@@ -56,9 +59,6 @@ static const struct cpuid_dep cpuid_deps[] = {
{ X86_FEATURE_AVX512VL, X86_FEATURE_AVX512F },
{ X86_FEATURE_AVX512VBMI, X86_FEATURE_AVX512F },
{ X86_FEATURE_AVX512_VBMI2, X86_FEATURE_AVX512VL },
- { X86_FEATURE_GFNI, X86_FEATURE_AVX512VL },
- { X86_FEATURE_VAES, X86_FEATURE_AVX512VL },
- { X86_FEATURE_VPCLMULQDQ, X86_FEATURE_AVX512VL },
{ X86_FEATURE_AVX512_VNNI, X86_FEATURE_AVX512VL },
{ X86_FEATURE_AVX512_BITALG, X86_FEATURE_AVX512VL },
{ X86_FEATURE_AVX512_4VNNIW, X86_FEATURE_AVX512F },
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index b5cc557cfc3736..84d41be6d06ba4 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -2500,12 +2500,14 @@ static ssize_t set_bank(struct device *s, struct device_attribute *attr,
return -EINVAL;
b = &per_cpu(mce_banks_array, s->id)[bank];
-
if (!b->init)
return -ENODEV;
b->ctl = new;
+
+ mutex_lock(&mce_sysfs_mutex);
mce_restart();
+ mutex_unlock(&mce_sysfs_mutex);
return size;
}
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 422a4ddc2ab7c9..7b29ebda024f4e 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -108,7 +108,7 @@ static inline void k8_check_syscfg_dram_mod_en(void)
(boot_cpu_data.x86 >= 0x0f)))
return;
- if (cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+ if (cc_platform_has(CC_ATTR_HOST_SEV_SNP))
return;
rdmsr(MSR_AMD64_SYSCFG, lo, hi);
diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index c99f26ebe7a653..1a8687f8073a89 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -78,7 +78,8 @@ cpumask_any_housekeeping(const struct cpumask *mask, int exclude_cpu)
else
cpu = cpumask_any_but(mask, exclude_cpu);
- if (!IS_ENABLED(CONFIG_NO_HZ_FULL))
+ /* Only continue if tick_nohz_full_mask has been initialized. */
+ if (!tick_nohz_full_enabled())
return cpu;
/* If the CPU picked isn't marked nohz_full nothing more needs doing. */
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 0dad49a09b7a9e..af5aa2c754c222 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -28,6 +28,7 @@ static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 },
{ X86_FEATURE_INTEL_PPIN, CPUID_EBX, 0, 0x00000007, 1 },
{ X86_FEATURE_RRSBA_CTRL, CPUID_EDX, 2, 0x00000007, 2 },
+ { X86_FEATURE_BHI_CTRL, CPUID_EDX, 4, 0x00000007, 2 },
{ X86_FEATURE_CQM_LLC, CPUID_EDX, 1, 0x0000000f, 0 },
{ X86_FEATURE_CQM_OCCUP_LLC, CPUID_EDX, 0, 0x0000000f, 1 },
{ X86_FEATURE_CQM_MBM_TOTAL, CPUID_EDX, 1, 0x0000000f, 1 },
@@ -49,6 +50,7 @@ static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_BMEC, CPUID_EBX, 3, 0x80000020, 0 },
{ X86_FEATURE_PERFMON_V2, CPUID_EAX, 0, 0x80000022, 0 },
{ X86_FEATURE_AMD_LBR_V2, CPUID_EAX, 1, 0x80000022, 0 },
+ { X86_FEATURE_AMD_LBR_PMC_FREEZE, CPUID_EAX, 2, 0x80000022, 0 },
{ 0, 0, 0, 0, 0 }
};
diff --git a/arch/x86/kernel/cpu/sgx/driver.c b/arch/x86/kernel/cpu/sgx/driver.c
index 262f5fb18d74d3..22b65a5f5ec6c4 100644
--- a/arch/x86/kernel/cpu/sgx/driver.c
+++ b/arch/x86/kernel/cpu/sgx/driver.c
@@ -113,7 +113,7 @@ static unsigned long sgx_get_unmapped_area(struct file *file,
if (flags & MAP_FIXED)
return addr;
- return current->mm->get_unmapped_area(file, addr, len, pgoff, flags);
+ return mm_get_unmapped_area(current->mm, file, addr, len, pgoff, flags);
}
#ifdef CONFIG_COMPAT
diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
index 166692f2d50111..27892e57c4ef93 100644
--- a/arch/x86/kernel/cpu/sgx/main.c
+++ b/arch/x86/kernel/cpu/sgx/main.c
@@ -13,6 +13,7 @@
#include <linux/sched/signal.h>
#include <linux/slab.h>
#include <linux/sysfs.h>
+#include <linux/vmalloc.h>
#include <asm/sgx.h>
#include "driver.h"
#include "encl.h"
diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c
index aaca8d235dc2bb..d17c9b71eb4a25 100644
--- a/arch/x86/kernel/cpu/topology.c
+++ b/arch/x86/kernel/cpu/topology.c
@@ -123,7 +123,6 @@ static void topo_set_cpuids(unsigned int cpu, u32 apic_id, u32 acpi_id)
early_per_cpu(x86_cpu_to_apicid, cpu) = apic_id;
early_per_cpu(x86_cpu_to_acpiid, cpu) = acpi_id;
#endif
- set_cpu_possible(cpu, true);
set_cpu_present(cpu, true);
}
@@ -210,7 +209,11 @@ static __init void topo_register_apic(u32 apic_id, u32 acpi_id, bool present)
topo_info.nr_disabled_cpus++;
}
- /* Register present and possible CPUs in the domain maps */
+ /*
+ * Register present and possible CPUs in the domain
+ * maps. cpu_possible_map will be updated in
+ * topology_init_possible_cpus() after enumeration is done.
+ */
for (dom = TOPO_SMT_DOMAIN; dom < TOPO_MAX_DOMAIN; dom++)
set_bit(topo_apicid(apic_id, dom), apic_maps[dom].map);
}
diff --git a/arch/x86/kernel/cpu/topology_amd.c b/arch/x86/kernel/cpu/topology_amd.c
index 1a8b3ad493afef..a7aa6eff4ae5ba 100644
--- a/arch/x86/kernel/cpu/topology_amd.c
+++ b/arch/x86/kernel/cpu/topology_amd.c
@@ -29,11 +29,21 @@ static bool parse_8000_0008(struct topo_scan *tscan)
if (!sft)
sft = get_count_order(ecx.cpu_nthreads + 1);
- topology_set_dom(tscan, TOPO_SMT_DOMAIN, sft, ecx.cpu_nthreads + 1);
+ /*
+ * cpu_nthreads describes the number of threads in the package
+ * sft is the number of APIC ID bits per package
+ *
+ * As the number of actual threads per core is not described in
+ * this leaf, just set the CORE domain shift and let the later
+ * parsers set SMT shift. Assume one thread per core by default
+ * which is correct if there are no other CPUID leafs to parse.
+ */
+ topology_update_dom(tscan, TOPO_SMT_DOMAIN, 0, 1);
+ topology_set_dom(tscan, TOPO_CORE_DOMAIN, sft, ecx.cpu_nthreads + 1);
return true;
}
-static void store_node(struct topo_scan *tscan, unsigned int nr_nodes, u16 node_id)
+static void store_node(struct topo_scan *tscan, u16 nr_nodes, u16 node_id)
{
/*
* Starting with Fam 17h the DIE domain could probably be used to
@@ -73,12 +83,14 @@ static bool parse_8000_001e(struct topo_scan *tscan, bool has_0xb)
tscan->c->topo.initial_apicid = leaf.ext_apic_id;
/*
- * If leaf 0xb is available, then SMT shift is set already. If not
- * take it from ecx.threads_per_core and use topo_update_dom() -
- * topology_set_dom() would propagate and overwrite the already
- * propagated CORE level.
+ * If leaf 0xb is available, then the domain shifts are set
+ * already and nothing to do here.
*/
if (!has_0xb) {
+ /*
+ * Leaf 0x80000008 set the CORE domain shift already.
+ * Update the SMT domain, but do not propagate it.
+ */
unsigned int nthreads = leaf.core_nthreads + 1;
topology_update_dom(tscan, TOPO_SMT_DOMAIN, get_count_order(nthreads), nthreads);
@@ -109,13 +121,13 @@ static bool parse_8000_001e(struct topo_scan *tscan, bool has_0xb)
static bool parse_fam10h_node_id(struct topo_scan *tscan)
{
- struct {
- union {
+ union {
+ struct {
u64 node_id : 3,
nodes_per_pkg : 3,
unused : 58;
- u64 msr;
};
+ u64 msr;
} nid;
if (!boot_cpu_has(X86_FEATURE_NODEID_MSR))
@@ -135,6 +147,26 @@ static void legacy_set_llc(struct topo_scan *tscan)
tscan->c->topo.llc_id = apicid >> tscan->dom_shifts[TOPO_CORE_DOMAIN];
}
+static void topoext_fixup(struct topo_scan *tscan)
+{
+ struct cpuinfo_x86 *c = tscan->c;
+ u64 msrval;
+
+ /* Try to re-enable TopologyExtensions if switched off by BIOS */
+ if (cpu_has(c, X86_FEATURE_TOPOEXT) || c->x86_vendor != X86_VENDOR_AMD ||
+ c->x86 != 0x15 || c->x86_model < 0x10 || c->x86_model > 0x6f)
+ return;
+
+ if (msr_set_bit(0xc0011005, 54) <= 0)
+ return;
+
+ rdmsrl(0xc0011005, msrval);
+ if (msrval & BIT_64(54)) {
+ set_cpu_cap(c, X86_FEATURE_TOPOEXT);
+ pr_info_once(FW_INFO "CPU: Re-enabling disabled Topology Extensions Support.\n");
+ }
+}
+
static void parse_topology_amd(struct topo_scan *tscan)
{
bool has_0xb = false;
@@ -164,6 +196,7 @@ static void parse_topology_amd(struct topo_scan *tscan)
void cpu_parse_topology_amd(struct topo_scan *tscan)
{
tscan->amd_nodes_per_pkg = 1;
+ topoext_fixup(tscan);
parse_topology_amd(tscan);
if (tscan->amd_nodes_per_pkg > 1)
diff --git a/arch/x86/kernel/eisa.c b/arch/x86/kernel/eisa.c
index e963344b044902..53935b4d62e305 100644
--- a/arch/x86/kernel/eisa.c
+++ b/arch/x86/kernel/eisa.c
@@ -2,6 +2,7 @@
/*
* EISA specific code
*/
+#include <linux/cc_platform.h>
#include <linux/ioport.h>
#include <linux/eisa.h>
#include <linux/io.h>
@@ -12,7 +13,7 @@ static __init int eisa_bus_probe(void)
{
void __iomem *p;
- if (xen_pv_domain() && !xen_initial_domain())
+ if ((xen_pv_domain() && !xen_initial_domain()) || cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
return 0;
p = ioremap(0x0FFFD9, 4);
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index fe0c859873d17e..ade0043ce56efe 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -18,6 +18,7 @@
#include <linux/uaccess.h>
#include <linux/smp.h>
#include <linux/sched/task_stack.h>
+#include <linux/vmalloc.h>
#include <asm/cpu_entry_area.h>
#include <asm/softirq_stack.h>
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 4cadfd606e8e6a..7f0732bc0ccd23 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -65,6 +65,7 @@ static int __init parse_no_stealacc(char *arg)
early_param("no-steal-acc", parse_no_stealacc);
+static DEFINE_PER_CPU_READ_MOSTLY(bool, async_pf_enabled);
static DEFINE_PER_CPU_DECRYPTED(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64);
DEFINE_PER_CPU_DECRYPTED(struct kvm_steal_time, steal_time) __aligned(64) __visible;
static int has_steal_clock = 0;
@@ -244,7 +245,7 @@ noinstr u32 kvm_read_and_reset_apf_flags(void)
{
u32 flags = 0;
- if (__this_cpu_read(apf_reason.enabled)) {
+ if (__this_cpu_read(async_pf_enabled)) {
flags = __this_cpu_read(apf_reason.flags);
__this_cpu_write(apf_reason.flags, 0);
}
@@ -295,7 +296,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_kvm_asyncpf_interrupt)
inc_irq_stat(irq_hv_callback_count);
- if (__this_cpu_read(apf_reason.enabled)) {
+ if (__this_cpu_read(async_pf_enabled)) {
token = __this_cpu_read(apf_reason.token);
kvm_async_pf_task_wake(token);
__this_cpu_write(apf_reason.token, 0);
@@ -362,7 +363,7 @@ static void kvm_guest_cpu_init(void)
wrmsrl(MSR_KVM_ASYNC_PF_INT, HYPERVISOR_CALLBACK_VECTOR);
wrmsrl(MSR_KVM_ASYNC_PF_EN, pa);
- __this_cpu_write(apf_reason.enabled, 1);
+ __this_cpu_write(async_pf_enabled, true);
pr_debug("setup async PF for cpu %d\n", smp_processor_id());
}
@@ -383,11 +384,11 @@ static void kvm_guest_cpu_init(void)
static void kvm_pv_disable_apf(void)
{
- if (!__this_cpu_read(apf_reason.enabled))
+ if (!__this_cpu_read(async_pf_enabled))
return;
wrmsrl(MSR_KVM_ASYNC_PF_EN, 0);
- __this_cpu_write(apf_reason.enabled, 0);
+ __this_cpu_write(async_pf_enabled, false);
pr_debug("disable async PF for cpu %d\n", smp_processor_id());
}
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 9a5b372c706fcc..ed163c8c8604e3 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -580,7 +580,7 @@ EXPORT_SYMBOL_GPL(asm_exc_nmi_kvm_vmx);
static char *nmi_check_stall_msg[] = {
/* */
-/* +--------- nsp->idt_seq_snap & 0x1: CPU is in NMI handler. */
+/* +--------- nmi_seq & 0x1: CPU is currently in NMI handler. */
/* | +------ cpu_is_offline(cpu) */
/* | | +--- nsp->idt_calls_snap != atomic_long_read(&nsp->idt_calls): */
/* | | | NMI handler has been invoked. */
@@ -628,22 +628,26 @@ void nmi_backtrace_stall_check(const struct cpumask *btp)
nmi_seq = READ_ONCE(nsp->idt_nmi_seq);
if (nsp->idt_nmi_seq_snap + 1 == nmi_seq && (nmi_seq & 0x1)) {
msgp = "CPU entered NMI handler function, but has not exited";
- } else if ((nsp->idt_nmi_seq_snap & 0x1) != (nmi_seq & 0x1)) {
- msgp = "CPU is handling NMIs";
- } else {
- idx = ((nsp->idt_seq_snap & 0x1) << 2) |
+ } else if (nsp->idt_nmi_seq_snap == nmi_seq ||
+ nsp->idt_nmi_seq_snap + 1 == nmi_seq) {
+ idx = ((nmi_seq & 0x1) << 2) |
(cpu_is_offline(cpu) << 1) |
(nsp->idt_calls_snap != atomic_long_read(&nsp->idt_calls));
msgp = nmi_check_stall_msg[idx];
if (nsp->idt_ignored_snap != READ_ONCE(nsp->idt_ignored) && (idx & 0x1))
modp = ", but OK because ignore_nmis was set";
- if (nmi_seq & 0x1)
- msghp = " (CPU currently in NMI handler function)";
- else if (nsp->idt_nmi_seq_snap + 1 == nmi_seq)
+ if (nsp->idt_nmi_seq_snap + 1 == nmi_seq)
msghp = " (CPU exited one NMI handler function)";
+ else if (nmi_seq & 0x1)
+ msghp = " (CPU currently in NMI handler function)";
+ else
+ msghp = " (CPU was never in an NMI handler function)";
+ } else {
+ msgp = "CPU is handling NMIs";
}
- pr_alert("%s: CPU %d: %s%s%s, last activity: %lu jiffies ago.\n",
- __func__, cpu, msgp, modp, msghp, j - READ_ONCE(nsp->recv_jiffies));
+ pr_alert("%s: CPU %d: %s%s%s\n", __func__, cpu, msgp, modp, msghp);
+ pr_alert("%s: last activity: %lu jiffies ago.\n",
+ __func__, j - READ_ONCE(nsp->recv_jiffies));
}
}
diff --git a/arch/x86/kernel/probe_roms.c b/arch/x86/kernel/probe_roms.c
index 319fef37d9dce4..cc2c34ba7228ac 100644
--- a/arch/x86/kernel/probe_roms.c
+++ b/arch/x86/kernel/probe_roms.c
@@ -203,16 +203,6 @@ void __init probe_roms(void)
unsigned char c;
int i;
- /*
- * The ROM memory range is not part of the e820 table and is therefore not
- * pre-validated by BIOS. The kernel page table maps the ROM region as encrypted
- * memory, and SNP requires encrypted memory to be validated before access.
- * Do that here.
- */
- snp_prep_memory(video_rom_resource.start,
- ((system_rom_resource.end + 1) - video_rom_resource.start),
- SNP_PAGE_STATE_PRIVATE);
-
/* video rom */
upper = adapter_rom_resources[0].start;
for (start = video_rom_resource.start; start < upper; start += 2048) {
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 7062b84dd467d6..6d3d20e3e43a9b 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -139,7 +139,7 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode,
log_lvl, d3, d6, d7);
}
- if (cpu_feature_enabled(X86_FEATURE_OSPKE))
+ if (cr4 & X86_CR4_PKE)
printk("%sPKRU: %08x\n", log_lvl, read_pkru());
}
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index ef206500ed6f22..47e7fcdbdacd5b 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -9,7 +9,6 @@
#include <linux/console.h>
#include <linux/crash_dump.h>
#include <linux/dma-map-ops.h>
-#include <linux/dmi.h>
#include <linux/efi.h>
#include <linux/ima.h>
#include <linux/init_ohci1394_dma.h>
@@ -36,6 +35,7 @@
#include <asm/bios_ebda.h>
#include <asm/bugs.h>
#include <asm/cacheinfo.h>
+#include <asm/coco.h>
#include <asm/cpu.h>
#include <asm/efi.h>
#include <asm/gart.h>
@@ -902,7 +902,7 @@ void __init setup_arch(char **cmdline_p)
efi_init();
reserve_ibft_region();
- dmi_setup();
+ x86_init.resources.dmi_setup();
/*
* VMware detection requires dmi to be available, so this
@@ -992,6 +992,7 @@ void __init setup_arch(char **cmdline_p)
* memory size.
*/
mem_encrypt_setup_arch();
+ cc_random_init();
efi_fake_memmap();
efi_find_mirror();
@@ -1106,8 +1107,6 @@ void __init setup_arch(char **cmdline_p)
*/
arch_reserve_crashkernel();
- memblock_find_dma_reserve();
-
if (!early_xdbc_setup_hardware())
early_xdbc_register_console();
diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c
index 8b04958da5e7d6..b4f8fa0f722cd6 100644
--- a/arch/x86/kernel/sev-shared.c
+++ b/arch/x86/kernel/sev-shared.c
@@ -1203,12 +1203,14 @@ static enum es_result vc_check_opcode_bytes(struct es_em_ctxt *ctxt,
break;
case SVM_EXIT_MONITOR:
- if (opcode == 0x010f && modrm == 0xc8)
+ /* MONITOR and MONITORX instructions generate the same error code */
+ if (opcode == 0x010f && (modrm == 0xc8 || modrm == 0xfa))
return ES_OK;
break;
case SVM_EXIT_MWAIT:
- if (opcode == 0x010f && modrm == 0xc9)
+ /* MWAIT and MWAITX instructions generate the same error code */
+ if (opcode == 0x010f && (modrm == 0xc9 || modrm == 0xfb))
return ES_OK;
break;
diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
index b59b09c2f28406..38ad066179d81f 100644
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -23,6 +23,7 @@
#include <linux/platform_device.h>
#include <linux/io.h>
#include <linux/psp-sev.h>
+#include <linux/dmi.h>
#include <uapi/linux/sev-guest.h>
#include <asm/init.h>
@@ -795,21 +796,6 @@ void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr
early_set_pages_state(vaddr, paddr, npages, SNP_PAGE_STATE_SHARED);
}
-void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op)
-{
- unsigned long vaddr, npages;
-
- vaddr = (unsigned long)__va(paddr);
- npages = PAGE_ALIGN(sz) >> PAGE_SHIFT;
-
- if (op == SNP_PAGE_STATE_PRIVATE)
- early_snp_set_memory_private(vaddr, paddr, npages);
- else if (op == SNP_PAGE_STATE_SHARED)
- early_snp_set_memory_shared(vaddr, paddr, npages);
- else
- WARN(1, "invalid memory op %d\n", op);
-}
-
static unsigned long __set_pages_state(struct snp_psc_desc *data, unsigned long vaddr,
unsigned long vaddr_end, int op)
{
@@ -2136,6 +2122,17 @@ void __head __noreturn snp_abort(void)
sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED);
}
+/*
+ * SEV-SNP guests should only execute dmi_setup() if EFI_CONFIG_TABLES are
+ * enabled, as the alternative (fallback) logic for DMI probing in the legacy
+ * ROM region can cause a crash since this region is not pre-validated.
+ */
+void __init snp_dmi_setup(void)
+{
+ if (efi_enabled(EFI_CONFIG_TABLES))
+ dmi_setup();
+}
+
static void dump_cpuid_table(void)
{
const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
@@ -2287,16 +2284,6 @@ static int __init snp_init_platform_device(void)
}
device_initcall(snp_init_platform_device);
-void kdump_sev_callback(void)
-{
- /*
- * Do wbinvd() on remote CPUs when SNP is enabled in order to
- * safely do SNP_SHUTDOWN on the local CPU.
- */
- if (cpu_feature_enabled(X86_FEATURE_SEV_SNP))
- wbinvd();
-}
-
void sev_show_status(void)
{
int i;
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index cb9fa1d5c66f73..01d7cd85ef9789 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -112,13 +112,21 @@ static void find_start_end(unsigned long addr, unsigned long flags,
*end = task_size_64bit(addr > DEFAULT_MAP_WINDOW);
}
+static inline unsigned long stack_guard_placement(vm_flags_t vm_flags)
+{
+ if (vm_flags & VM_SHADOW_STACK)
+ return PAGE_SIZE;
+
+ return 0;
+}
+
unsigned long
-arch_get_unmapped_area(struct file *filp, unsigned long addr,
- unsigned long len, unsigned long pgoff, unsigned long flags)
+arch_get_unmapped_area_vmflags(struct file *filp, unsigned long addr, unsigned long len,
+ unsigned long pgoff, unsigned long flags, vm_flags_t vm_flags)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
- struct vm_unmapped_area_info info;
+ struct vm_unmapped_area_info info = {};
unsigned long begin, end;
if (flags & MAP_FIXED)
@@ -137,12 +145,11 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
return addr;
}
- info.flags = 0;
info.length = len;
info.low_limit = begin;
info.high_limit = end;
- info.align_mask = 0;
info.align_offset = pgoff << PAGE_SHIFT;
+ info.start_gap = stack_guard_placement(vm_flags);
if (filp) {
info.align_mask = get_align_mask();
info.align_offset += get_align_bits();
@@ -151,14 +158,14 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
}
unsigned long
-arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
- const unsigned long len, const unsigned long pgoff,
- const unsigned long flags)
+arch_get_unmapped_area_topdown_vmflags(struct file *filp, unsigned long addr0,
+ unsigned long len, unsigned long pgoff,
+ unsigned long flags, vm_flags_t vm_flags)
{
struct vm_area_struct *vma;
struct mm_struct *mm = current->mm;
unsigned long addr = addr0;
- struct vm_unmapped_area_info info;
+ struct vm_unmapped_area_info info = {};
/* requested length too big for entire address space */
if (len > TASK_SIZE)
@@ -192,6 +199,7 @@ get_unmapped_area:
info.low_limit = PAGE_SIZE;
info.high_limit = get_mmap_base(0);
+ info.start_gap = stack_guard_placement(vm_flags);
/*
* If hint address is above DEFAULT_MAP_WINDOW, look for unmapped area
@@ -203,7 +211,6 @@ get_unmapped_area:
if (addr > DEFAULT_MAP_WINDOW && !in_32bit_syscall())
info.high_limit += TASK_SIZE_MAX - DEFAULT_MAP_WINDOW;
- info.align_mask = 0;
info.align_offset = pgoff << PAGE_SHIFT;
if (filp) {
info.align_mask = get_align_mask();
@@ -223,3 +230,18 @@ bottomup:
*/
return arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
}
+
+unsigned long
+arch_get_unmapped_area(struct file *filp, unsigned long addr,
+ unsigned long len, unsigned long pgoff, unsigned long flags)
+{
+ return arch_get_unmapped_area_vmflags(filp, addr, len, pgoff, flags, 0);
+}
+
+unsigned long
+arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr,
+ const unsigned long len, const unsigned long pgoff,
+ const unsigned long flags)
+{
+ return arch_get_unmapped_area_topdown_vmflags(filp, addr, len, pgoff, flags, 0);
+}
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index a42830dc151bc4..d5dc5a92635a82 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -3,6 +3,7 @@
*
* For licencing details see kernel-base/COPYING
*/
+#include <linux/dmi.h>
#include <linux/init.h>
#include <linux/ioport.h>
#include <linux/export.h>
@@ -66,6 +67,7 @@ struct x86_init_ops x86_init __initdata = {
.probe_roms = probe_roms,
.reserve_resources = reserve_standard_io_resources,
.memory_setup = e820__memory_setup_default,
+ .dmi_setup = dmi_setup,
},
.mpparse = {
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 3aaf7e86a859a2..0ebdd088f28b85 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -122,6 +122,7 @@ config KVM_AMD_SEV
default y
depends on KVM_AMD && X86_64
depends on CRYPTO_DEV_SP_PSP && !(KVM_AMD=y && CRYPTO_DEV_CCP_DD=m)
+ select ARCH_HAS_CC_PLATFORM
help
Provides support for launching Encrypted VMs (SEV) and Encrypted VMs
with Encrypted State (SEV-ES) on AMD processors.
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index a88bb14266b69f..addc44fc7187d6 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -3,11 +3,6 @@
ccflags-y += -I $(srctree)/arch/x86/kvm
ccflags-$(CONFIG_KVM_WERROR) += -Werror
-ifeq ($(CONFIG_FRAME_POINTER),y)
-OBJECT_FILES_NON_STANDARD_vmx/vmenter.o := y
-OBJECT_FILES_NON_STANDARD_svm/vmenter.o := y
-endif
-
include $(srctree)/virt/kvm/Makefile.kvm
kvm-y += x86.o emulate.o i8259.o irq.o lapic.o \
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index adba49afb5fe63..77352a4abd87f8 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -189,15 +189,15 @@ static int kvm_cpuid_check_equal(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2
return 0;
}
-static struct kvm_hypervisor_cpuid kvm_get_hypervisor_cpuid(struct kvm_vcpu *vcpu,
- const char *sig)
+static struct kvm_hypervisor_cpuid __kvm_get_hypervisor_cpuid(struct kvm_cpuid_entry2 *entries,
+ int nent, const char *sig)
{
struct kvm_hypervisor_cpuid cpuid = {};
struct kvm_cpuid_entry2 *entry;
u32 base;
for_each_possible_hypervisor_cpuid_base(base) {
- entry = kvm_find_cpuid_entry(vcpu, base);
+ entry = cpuid_entry2_find(entries, nent, base, KVM_CPUID_INDEX_NOT_SIGNIFICANT);
if (entry) {
u32 signature[3];
@@ -217,22 +217,29 @@ static struct kvm_hypervisor_cpuid kvm_get_hypervisor_cpuid(struct kvm_vcpu *vcp
return cpuid;
}
-static struct kvm_cpuid_entry2 *__kvm_find_kvm_cpuid_features(struct kvm_vcpu *vcpu,
- struct kvm_cpuid_entry2 *entries, int nent)
+static struct kvm_hypervisor_cpuid kvm_get_hypervisor_cpuid(struct kvm_vcpu *vcpu,
+ const char *sig)
{
- u32 base = vcpu->arch.kvm_cpuid.base;
-
- if (!base)
- return NULL;
+ return __kvm_get_hypervisor_cpuid(vcpu->arch.cpuid_entries,
+ vcpu->arch.cpuid_nent, sig);
+}
- return cpuid_entry2_find(entries, nent, base | KVM_CPUID_FEATURES,
+static struct kvm_cpuid_entry2 *__kvm_find_kvm_cpuid_features(struct kvm_cpuid_entry2 *entries,
+ int nent, u32 kvm_cpuid_base)
+{
+ return cpuid_entry2_find(entries, nent, kvm_cpuid_base | KVM_CPUID_FEATURES,
KVM_CPUID_INDEX_NOT_SIGNIFICANT);
}
static struct kvm_cpuid_entry2 *kvm_find_kvm_cpuid_features(struct kvm_vcpu *vcpu)
{
- return __kvm_find_kvm_cpuid_features(vcpu, vcpu->arch.cpuid_entries,
- vcpu->arch.cpuid_nent);
+ u32 base = vcpu->arch.kvm_cpuid.base;
+
+ if (!base)
+ return NULL;
+
+ return __kvm_find_kvm_cpuid_features(vcpu->arch.cpuid_entries,
+ vcpu->arch.cpuid_nent, base);
}
void kvm_update_pv_runtime(struct kvm_vcpu *vcpu)
@@ -266,6 +273,7 @@ static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_e
int nent)
{
struct kvm_cpuid_entry2 *best;
+ struct kvm_hypervisor_cpuid kvm_cpuid;
best = cpuid_entry2_find(entries, nent, 1, KVM_CPUID_INDEX_NOT_SIGNIFICANT);
if (best) {
@@ -292,10 +300,12 @@ static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_e
cpuid_entry_has(best, X86_FEATURE_XSAVEC)))
best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
- best = __kvm_find_kvm_cpuid_features(vcpu, entries, nent);
- if (kvm_hlt_in_guest(vcpu->kvm) && best &&
- (best->eax & (1 << KVM_FEATURE_PV_UNHALT)))
- best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT);
+ kvm_cpuid = __kvm_get_hypervisor_cpuid(entries, nent, KVM_SIGNATURE);
+ if (kvm_cpuid.base) {
+ best = __kvm_find_kvm_cpuid_features(entries, nent, kvm_cpuid.base);
+ if (kvm_hlt_in_guest(vcpu->kvm) && best)
+ best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT);
+ }
if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT)) {
best = cpuid_entry2_find(entries, nent, 0x1, KVM_CPUID_INDEX_NOT_SIGNIFICANT);
@@ -366,6 +376,7 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
kvm_update_pv_runtime(vcpu);
+ vcpu->arch.is_amd_compatible = guest_cpuid_is_amd_or_hygon(vcpu);
vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
vcpu->arch.reserved_gpa_bits = kvm_vcpu_reserved_gpa_bits_raw(vcpu);
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index 856e3037e74f3f..23dbb9eb277c74 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -120,6 +120,16 @@ static inline bool guest_cpuid_is_intel(struct kvm_vcpu *vcpu)
return best && is_guest_vendor_intel(best->ebx, best->ecx, best->edx);
}
+static inline bool guest_cpuid_is_amd_compatible(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.is_amd_compatible;
+}
+
+static inline bool guest_cpuid_is_intel_compatible(struct kvm_vcpu *vcpu)
+{
+ return !guest_cpuid_is_amd_compatible(vcpu);
+}
+
static inline int guest_cpuid_family(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index cf37586f04668d..ebf41023be3829 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -2776,7 +2776,8 @@ int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type)
trig_mode = reg & APIC_LVT_LEVEL_TRIGGER;
r = __apic_accept_irq(apic, mode, vector, 1, trig_mode, NULL);
- if (r && lvt_type == APIC_LVTPC)
+ if (r && lvt_type == APIC_LVTPC &&
+ guest_cpuid_is_intel_compatible(apic->vcpu))
kvm_lapic_set_reg(apic, APIC_LVTPC, reg | APIC_LVT_MASKED);
return r;
}
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 992e651540e852..db007a4dffa2e1 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4935,7 +4935,7 @@ static void reset_guest_rsvds_bits_mask(struct kvm_vcpu *vcpu,
context->cpu_role.base.level, is_efer_nx(context),
guest_can_use(vcpu, X86_FEATURE_GBPAGES),
is_cr4_pse(context),
- guest_cpuid_is_amd_or_hygon(vcpu));
+ guest_cpuid_is_amd_compatible(vcpu));
}
static void __reset_rsvds_bits_mask_ept(struct rsvd_bits_validate *rsvd_check,
@@ -5576,9 +5576,9 @@ void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu)
* that problem is swept under the rug; KVM's CPUID API is horrific and
* it's all but impossible to solve it without introducing a new API.
*/
- vcpu->arch.root_mmu.root_role.word = 0;
- vcpu->arch.guest_mmu.root_role.word = 0;
- vcpu->arch.nested_mmu.root_role.word = 0;
+ vcpu->arch.root_mmu.root_role.invalid = 1;
+ vcpu->arch.guest_mmu.root_role.invalid = 1;
+ vcpu->arch.nested_mmu.root_role.invalid = 1;
vcpu->arch.root_mmu.cpu_role.ext.valid = 0;
vcpu->arch.guest_mmu.cpu_role.ext.valid = 0;
vcpu->arch.nested_mmu.cpu_role.ext.valid = 0;
@@ -7399,7 +7399,8 @@ bool kvm_arch_post_set_memory_attributes(struct kvm *kvm,
* by the memslot, KVM can't use a hugepage due to the
* misaligned address regardless of memory attributes.
*/
- if (gfn >= slot->base_gfn) {
+ if (gfn >= slot->base_gfn &&
+ gfn + nr_pages <= slot->base_gfn + slot->npages) {
if (hugepage_has_attrs(kvm, slot, gfn, level, attrs))
hugepage_clear_mixed(slot, gfn, level);
else
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index d078157e62aa40..04c1f0957fea87 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -1548,17 +1548,21 @@ void kvm_tdp_mmu_try_split_huge_pages(struct kvm *kvm,
}
}
-/*
- * Clear the dirty status of all the SPTEs mapping GFNs in the memslot. If
- * AD bits are enabled, this will involve clearing the dirty bit on each SPTE.
- * If AD bits are not enabled, this will require clearing the writable bit on
- * each SPTE. Returns true if an SPTE has been changed and the TLBs need to
- * be flushed.
- */
+static bool tdp_mmu_need_write_protect(struct kvm_mmu_page *sp)
+{
+ /*
+ * All TDP MMU shadow pages share the same role as their root, aside
+ * from level, so it is valid to key off any shadow page to determine if
+ * write protection is needed for an entire tree.
+ */
+ return kvm_mmu_page_ad_need_write_protect(sp) || !kvm_ad_enabled();
+}
+
static bool clear_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
gfn_t start, gfn_t end)
{
- u64 dbit = kvm_ad_enabled() ? shadow_dirty_mask : PT_WRITABLE_MASK;
+ const u64 dbit = tdp_mmu_need_write_protect(root) ? PT_WRITABLE_MASK :
+ shadow_dirty_mask;
struct tdp_iter iter;
bool spte_set = false;
@@ -1573,7 +1577,7 @@ retry:
if (tdp_mmu_iter_cond_resched(kvm, &iter, false, true))
continue;
- KVM_MMU_WARN_ON(kvm_ad_enabled() &&
+ KVM_MMU_WARN_ON(dbit == shadow_dirty_mask &&
spte_ad_need_write_protect(iter.old_spte));
if (!(iter.old_spte & dbit))
@@ -1590,11 +1594,9 @@ retry:
}
/*
- * Clear the dirty status of all the SPTEs mapping GFNs in the memslot. If
- * AD bits are enabled, this will involve clearing the dirty bit on each SPTE.
- * If AD bits are not enabled, this will require clearing the writable bit on
- * each SPTE. Returns true if an SPTE has been changed and the TLBs need to
- * be flushed.
+ * Clear the dirty status (D-bit or W-bit) of all the SPTEs mapping GFNs in the
+ * memslot. Returns true if an SPTE has been changed and the TLBs need to be
+ * flushed.
*/
bool kvm_tdp_mmu_clear_dirty_slot(struct kvm *kvm,
const struct kvm_memory_slot *slot)
@@ -1610,18 +1612,11 @@ bool kvm_tdp_mmu_clear_dirty_slot(struct kvm *kvm,
return spte_set;
}
-/*
- * Clears the dirty status of all the 4k SPTEs mapping GFNs for which a bit is
- * set in mask, starting at gfn. The given memslot is expected to contain all
- * the GFNs represented by set bits in the mask. If AD bits are enabled,
- * clearing the dirty status will involve clearing the dirty bit on each SPTE
- * or, if AD bits are not enabled, clearing the writable bit on each SPTE.
- */
static void clear_dirty_pt_masked(struct kvm *kvm, struct kvm_mmu_page *root,
gfn_t gfn, unsigned long mask, bool wrprot)
{
- u64 dbit = (wrprot || !kvm_ad_enabled()) ? PT_WRITABLE_MASK :
- shadow_dirty_mask;
+ const u64 dbit = (wrprot || tdp_mmu_need_write_protect(root)) ? PT_WRITABLE_MASK :
+ shadow_dirty_mask;
struct tdp_iter iter;
lockdep_assert_held_write(&kvm->mmu_lock);
@@ -1633,7 +1628,7 @@ static void clear_dirty_pt_masked(struct kvm *kvm, struct kvm_mmu_page *root,
if (!mask)
break;
- KVM_MMU_WARN_ON(kvm_ad_enabled() &&
+ KVM_MMU_WARN_ON(dbit == shadow_dirty_mask &&
spte_ad_need_write_protect(iter.old_spte));
if (iter.level > PG_LEVEL_4K ||
@@ -1659,11 +1654,9 @@ static void clear_dirty_pt_masked(struct kvm *kvm, struct kvm_mmu_page *root,
}
/*
- * Clears the dirty status of all the 4k SPTEs mapping GFNs for which a bit is
- * set in mask, starting at gfn. The given memslot is expected to contain all
- * the GFNs represented by set bits in the mask. If AD bits are enabled,
- * clearing the dirty status will involve clearing the dirty bit on each SPTE
- * or, if AD bits are not enabled, clearing the writable bit on each SPTE.
+ * Clear the dirty status (D-bit or W-bit) of all the 4k SPTEs mapping GFNs for
+ * which a bit is set in mask, starting at gfn. The given memslot is expected to
+ * contain all the GFNs represented by set bits in the mask.
*/
void kvm_tdp_mmu_clear_dirty_pt_masked(struct kvm *kvm,
struct kvm_memory_slot *slot,
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index c397b28e3d1b68..a593b03c9aed67 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -775,8 +775,20 @@ void kvm_pmu_refresh(struct kvm_vcpu *vcpu)
pmu->pebs_data_cfg_mask = ~0ull;
bitmap_zero(pmu->all_valid_pmc_idx, X86_PMC_IDX_MAX);
- if (vcpu->kvm->arch.enable_pmu)
- static_call(kvm_x86_pmu_refresh)(vcpu);
+ if (!vcpu->kvm->arch.enable_pmu)
+ return;
+
+ static_call(kvm_x86_pmu_refresh)(vcpu);
+
+ /*
+ * At RESET, both Intel and AMD CPUs set all enable bits for general
+ * purpose counters in IA32_PERF_GLOBAL_CTRL (so that software that
+ * was written for v1 PMUs don't unknowingly leave GP counters disabled
+ * in the global controls). Emulate that behavior when refreshing the
+ * PMU so that userspace doesn't need to manually set PERF_GLOBAL_CTRL.
+ */
+ if (kvm_pmu_has_perf_global_ctrl(pmu) && pmu->nr_arch_gp_counters)
+ pmu->global_ctrl = GENMASK_ULL(pmu->nr_arch_gp_counters - 1, 0);
}
void kvm_pmu_init(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h
index aadefcaa9561d0..2f4e155080badc 100644
--- a/arch/x86/kvm/reverse_cpuid.h
+++ b/arch/x86/kvm/reverse_cpuid.h
@@ -52,7 +52,7 @@ enum kvm_only_cpuid_leafs {
#define X86_FEATURE_IPRED_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 1)
#define KVM_X86_FEATURE_RRSBA_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 2)
#define X86_FEATURE_DDPD_U KVM_X86_FEATURE(CPUID_7_2_EDX, 3)
-#define X86_FEATURE_BHI_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 4)
+#define KVM_X86_FEATURE_BHI_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 4)
#define X86_FEATURE_MCDT_NO KVM_X86_FEATURE(CPUID_7_2_EDX, 5)
/* CPUID level 0x80000007 (EDX). */
@@ -102,10 +102,12 @@ static const struct cpuid_reg reverse_cpuid[] = {
*/
static __always_inline void reverse_cpuid_check(unsigned int x86_leaf)
{
+ BUILD_BUG_ON(NR_CPUID_WORDS != NCAPINTS);
BUILD_BUG_ON(x86_leaf == CPUID_LNX_1);
BUILD_BUG_ON(x86_leaf == CPUID_LNX_2);
BUILD_BUG_ON(x86_leaf == CPUID_LNX_3);
BUILD_BUG_ON(x86_leaf == CPUID_LNX_4);
+ BUILD_BUG_ON(x86_leaf == CPUID_LNX_5);
BUILD_BUG_ON(x86_leaf >= ARRAY_SIZE(reverse_cpuid));
BUILD_BUG_ON(reverse_cpuid[x86_leaf].function == 0);
}
@@ -126,6 +128,7 @@ static __always_inline u32 __feature_translate(int x86_feature)
KVM_X86_TRANSLATE_FEATURE(CONSTANT_TSC);
KVM_X86_TRANSLATE_FEATURE(PERFMON_V2);
KVM_X86_TRANSLATE_FEATURE(RRSBA_CTRL);
+ KVM_X86_TRANSLATE_FEATURE(BHI_CTRL);
default:
return x86_feature;
}
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index ae0ac12382b927..759581bb2128da 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -84,9 +84,10 @@ struct enc_region {
};
/* Called with the sev_bitmap_lock held, or on shutdown */
-static int sev_flush_asids(int min_asid, int max_asid)
+static int sev_flush_asids(unsigned int min_asid, unsigned int max_asid)
{
- int ret, asid, error = 0;
+ int ret, error = 0;
+ unsigned int asid;
/* Check if there are any ASIDs to reclaim before performing a flush */
asid = find_next_bit(sev_reclaim_asid_bitmap, nr_asids, min_asid);
@@ -116,7 +117,7 @@ static inline bool is_mirroring_enc_context(struct kvm *kvm)
}
/* Must be called with the sev_bitmap_lock held */
-static bool __sev_recycle_asids(int min_asid, int max_asid)
+static bool __sev_recycle_asids(unsigned int min_asid, unsigned int max_asid)
{
if (sev_flush_asids(min_asid, max_asid))
return false;
@@ -143,8 +144,20 @@ static void sev_misc_cg_uncharge(struct kvm_sev_info *sev)
static int sev_asid_new(struct kvm_sev_info *sev)
{
- int asid, min_asid, max_asid, ret;
+ /*
+ * SEV-enabled guests must use asid from min_sev_asid to max_sev_asid.
+ * SEV-ES-enabled guest can use from 1 to min_sev_asid - 1.
+ * Note: min ASID can end up larger than the max if basic SEV support is
+ * effectively disabled by disallowing use of ASIDs for SEV guests.
+ */
+ unsigned int min_asid = sev->es_active ? 1 : min_sev_asid;
+ unsigned int max_asid = sev->es_active ? min_sev_asid - 1 : max_sev_asid;
+ unsigned int asid;
bool retry = true;
+ int ret;
+
+ if (min_asid > max_asid)
+ return -ENOTTY;
WARN_ON(sev->misc_cg);
sev->misc_cg = get_current_misc_cg();
@@ -157,12 +170,6 @@ static int sev_asid_new(struct kvm_sev_info *sev)
mutex_lock(&sev_bitmap_lock);
- /*
- * SEV-enabled guests must use asid from min_sev_asid to max_sev_asid.
- * SEV-ES-enabled guest can use from 1 to min_sev_asid - 1.
- */
- min_asid = sev->es_active ? 1 : min_sev_asid;
- max_asid = sev->es_active ? min_sev_asid - 1 : max_sev_asid;
again:
asid = find_next_zero_bit(sev_asid_bitmap, max_asid + 1, min_asid);
if (asid > max_asid) {
@@ -179,7 +186,8 @@ again:
mutex_unlock(&sev_bitmap_lock);
- return asid;
+ sev->asid = asid;
+ return 0;
e_uncharge:
sev_misc_cg_uncharge(sev);
put_misc_cg(sev->misc_cg);
@@ -187,7 +195,7 @@ e_uncharge:
return ret;
}
-static int sev_get_asid(struct kvm *kvm)
+static unsigned int sev_get_asid(struct kvm *kvm)
{
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
@@ -247,21 +255,19 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct sev_platform_init_args init_args = {0};
- int asid, ret;
+ int ret;
if (kvm->created_vcpus)
return -EINVAL;
- ret = -EBUSY;
if (unlikely(sev->active))
- return ret;
+ return -EINVAL;
sev->active = true;
sev->es_active = argp->id == KVM_SEV_ES_INIT;
- asid = sev_asid_new(sev);
- if (asid < 0)
+ ret = sev_asid_new(sev);
+ if (ret)
goto e_no_asid;
- sev->asid = asid;
init_args.probe = false;
ret = sev_platform_init(&init_args);
@@ -287,8 +293,8 @@ e_no_asid:
static int sev_bind_asid(struct kvm *kvm, unsigned int handle, int *error)
{
+ unsigned int asid = sev_get_asid(kvm);
struct sev_data_activate activate;
- int asid = sev_get_asid(kvm);
int ret;
/* activate ASID on the given handle */
@@ -428,7 +434,7 @@ static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr,
/* Avoid using vmalloc for smaller buffers. */
size = npages * sizeof(struct page *);
if (size > PAGE_SIZE)
- pages = __vmalloc(size, GFP_KERNEL_ACCOUNT | __GFP_ZERO);
+ pages = __vmalloc(size, GFP_KERNEL_ACCOUNT);
else
pages = kmalloc(size, GFP_KERNEL_ACCOUNT);
@@ -2240,8 +2246,10 @@ void __init sev_hardware_setup(void)
goto out;
}
- sev_asid_count = max_sev_asid - min_sev_asid + 1;
- WARN_ON_ONCE(misc_cg_set_capacity(MISC_CG_RES_SEV, sev_asid_count));
+ if (min_sev_asid <= max_sev_asid) {
+ sev_asid_count = max_sev_asid - min_sev_asid + 1;
+ WARN_ON_ONCE(misc_cg_set_capacity(MISC_CG_RES_SEV, sev_asid_count));
+ }
sev_supported = true;
/* SEV-ES support requested? */
@@ -2272,7 +2280,9 @@ void __init sev_hardware_setup(void)
out:
if (boot_cpu_has(X86_FEATURE_SEV))
pr_info("SEV %s (ASIDs %u - %u)\n",
- sev_supported ? "enabled" : "disabled",
+ sev_supported ? min_sev_asid <= max_sev_asid ? "enabled" :
+ "unusable" :
+ "disabled",
min_sev_asid, max_sev_asid);
if (boot_cpu_has(X86_FEATURE_SEV_ES))
pr_info("SEV-ES %s (ASIDs %u - %u)\n",
@@ -2320,7 +2330,7 @@ int sev_cpu_init(struct svm_cpu_data *sd)
*/
static void sev_flush_encrypted_page(struct kvm_vcpu *vcpu, void *va)
{
- int asid = to_kvm_svm(vcpu->kvm)->sev_info.asid;
+ unsigned int asid = sev_get_asid(vcpu->kvm);
/*
* Note! The address must be a kernel address, as regular page walk
@@ -2638,7 +2648,7 @@ void sev_es_unmap_ghcb(struct vcpu_svm *svm)
void pre_sev_run(struct vcpu_svm *svm, int cpu)
{
struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, cpu);
- int asid = sev_get_asid(svm->vcpu.kvm);
+ unsigned int asid = sev_get_asid(svm->vcpu.kvm);
/* Assign the asid allocated with this SEV guest */
svm->asid = asid;
@@ -3174,7 +3184,7 @@ struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu)
unsigned long pfn;
struct page *p;
- if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+ if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
return alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
/*
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index d1a9f995163581..9aaf83c8d57df7 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1503,6 +1503,11 @@ static void svm_vcpu_free(struct kvm_vcpu *vcpu)
__free_pages(virt_to_page(svm->msrpm), get_order(MSRPM_SIZE));
}
+static struct sev_es_save_area *sev_es_host_save_area(struct svm_cpu_data *sd)
+{
+ return page_address(sd->save_area) + 0x400;
+}
+
static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -1519,12 +1524,8 @@ static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
* or subsequent vmload of host save area.
*/
vmsave(sd->save_area_pa);
- if (sev_es_guest(vcpu->kvm)) {
- struct sev_es_save_area *hostsa;
- hostsa = (struct sev_es_save_area *)(page_address(sd->save_area) + 0x400);
-
- sev_es_prepare_switch_to_guest(hostsa);
- }
+ if (sev_es_guest(vcpu->kvm))
+ sev_es_prepare_switch_to_guest(sev_es_host_save_area(sd));
if (tsc_scaling)
__svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio);
@@ -4101,6 +4102,7 @@ static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, bool spec_ctrl_intercepted)
{
+ struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, vcpu->cpu);
struct vcpu_svm *svm = to_svm(vcpu);
guest_state_enter_irqoff();
@@ -4108,7 +4110,8 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, bool spec_ctrl_in
amd_clear_divider();
if (sev_es_guest(vcpu->kvm))
- __svm_sev_es_vcpu_run(svm, spec_ctrl_intercepted);
+ __svm_sev_es_vcpu_run(svm, spec_ctrl_intercepted,
+ sev_es_host_save_area(sd));
else
__svm_vcpu_run(svm, spec_ctrl_intercepted);
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 7f1fbd874c4582..33878efdebc829 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -698,7 +698,8 @@ struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu);
/* vmenter.S */
-void __svm_sev_es_vcpu_run(struct vcpu_svm *svm, bool spec_ctrl_intercepted);
+void __svm_sev_es_vcpu_run(struct vcpu_svm *svm, bool spec_ctrl_intercepted,
+ struct sev_es_save_area *hostsa);
void __svm_vcpu_run(struct vcpu_svm *svm, bool spec_ctrl_intercepted);
#define DEFINE_KVM_GHCB_ACCESSORS(field) \
diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S
index 187018c424bfb4..a0c8eb37d3e1c6 100644
--- a/arch/x86/kvm/svm/vmenter.S
+++ b/arch/x86/kvm/svm/vmenter.S
@@ -3,6 +3,7 @@
#include <asm/asm.h>
#include <asm/asm-offsets.h>
#include <asm/bitsperlong.h>
+#include <asm/frame.h>
#include <asm/kvm_vcpu_regs.h>
#include <asm/nospec-branch.h>
#include "kvm-asm-offsets.h"
@@ -67,7 +68,7 @@
"", X86_FEATURE_V_SPEC_CTRL
901:
.endm
-.macro RESTORE_HOST_SPEC_CTRL_BODY
+.macro RESTORE_HOST_SPEC_CTRL_BODY spec_ctrl_intercepted:req
900:
/* Same for after vmexit. */
mov $MSR_IA32_SPEC_CTRL, %ecx
@@ -76,7 +77,7 @@
* Load the value that the guest had written into MSR_IA32_SPEC_CTRL,
* if it was not intercepted during guest execution.
*/
- cmpb $0, (%_ASM_SP)
+ cmpb $0, \spec_ctrl_intercepted
jnz 998f
rdmsr
movl %eax, SVM_spec_ctrl(%_ASM_DI)
@@ -99,6 +100,7 @@
*/
SYM_FUNC_START(__svm_vcpu_run)
push %_ASM_BP
+ mov %_ASM_SP, %_ASM_BP
#ifdef CONFIG_X86_64
push %r15
push %r14
@@ -268,7 +270,7 @@ SYM_FUNC_START(__svm_vcpu_run)
RET
RESTORE_GUEST_SPEC_CTRL_BODY
- RESTORE_HOST_SPEC_CTRL_BODY
+ RESTORE_HOST_SPEC_CTRL_BODY (%_ASM_SP)
10: cmpb $0, _ASM_RIP(kvm_rebooting)
jne 2b
@@ -290,66 +292,68 @@ SYM_FUNC_START(__svm_vcpu_run)
SYM_FUNC_END(__svm_vcpu_run)
+#ifdef CONFIG_KVM_AMD_SEV
+
+
+#ifdef CONFIG_X86_64
+#define SEV_ES_GPRS_BASE 0x300
+#define SEV_ES_RBX (SEV_ES_GPRS_BASE + __VCPU_REGS_RBX * WORD_SIZE)
+#define SEV_ES_RBP (SEV_ES_GPRS_BASE + __VCPU_REGS_RBP * WORD_SIZE)
+#define SEV_ES_RSI (SEV_ES_GPRS_BASE + __VCPU_REGS_RSI * WORD_SIZE)
+#define SEV_ES_RDI (SEV_ES_GPRS_BASE + __VCPU_REGS_RDI * WORD_SIZE)
+#define SEV_ES_R12 (SEV_ES_GPRS_BASE + __VCPU_REGS_R12 * WORD_SIZE)
+#define SEV_ES_R13 (SEV_ES_GPRS_BASE + __VCPU_REGS_R13 * WORD_SIZE)
+#define SEV_ES_R14 (SEV_ES_GPRS_BASE + __VCPU_REGS_R14 * WORD_SIZE)
+#define SEV_ES_R15 (SEV_ES_GPRS_BASE + __VCPU_REGS_R15 * WORD_SIZE)
+#endif
+
/**
* __svm_sev_es_vcpu_run - Run a SEV-ES vCPU via a transition to SVM guest mode
* @svm: struct vcpu_svm *
* @spec_ctrl_intercepted: bool
*/
SYM_FUNC_START(__svm_sev_es_vcpu_run)
- push %_ASM_BP
-#ifdef CONFIG_X86_64
- push %r15
- push %r14
- push %r13
- push %r12
-#else
- push %edi
- push %esi
-#endif
- push %_ASM_BX
+ FRAME_BEGIN
/*
- * Save variables needed after vmexit on the stack, in inverse
- * order compared to when they are needed.
+ * Save non-volatile (callee-saved) registers to the host save area.
+ * Except for RAX and RSP, all GPRs are restored on #VMEXIT, but not
+ * saved on VMRUN.
*/
+ mov %rbp, SEV_ES_RBP (%rdx)
+ mov %r15, SEV_ES_R15 (%rdx)
+ mov %r14, SEV_ES_R14 (%rdx)
+ mov %r13, SEV_ES_R13 (%rdx)
+ mov %r12, SEV_ES_R12 (%rdx)
+ mov %rbx, SEV_ES_RBX (%rdx)
- /* Accessed directly from the stack in RESTORE_HOST_SPEC_CTRL. */
- push %_ASM_ARG2
-
- /* Save @svm. */
- push %_ASM_ARG1
-
-.ifnc _ASM_ARG1, _ASM_DI
/*
- * Stash @svm in RDI early. On 32-bit, arguments are in RAX, RCX
- * and RDX which are clobbered by RESTORE_GUEST_SPEC_CTRL.
+ * Save volatile registers that hold arguments that are needed after
+ * #VMEXIT (RDI=@svm and RSI=@spec_ctrl_intercepted).
*/
- mov %_ASM_ARG1, %_ASM_DI
-.endif
+ mov %rdi, SEV_ES_RDI (%rdx)
+ mov %rsi, SEV_ES_RSI (%rdx)
- /* Clobbers RAX, RCX, RDX. */
+ /* Clobbers RAX, RCX, RDX (@hostsa). */
RESTORE_GUEST_SPEC_CTRL
/* Get svm->current_vmcb->pa into RAX. */
- mov SVM_current_vmcb(%_ASM_DI), %_ASM_AX
- mov KVM_VMCB_pa(%_ASM_AX), %_ASM_AX
+ mov SVM_current_vmcb(%rdi), %rax
+ mov KVM_VMCB_pa(%rax), %rax
/* Enter guest mode */
sti
-1: vmrun %_ASM_AX
+1: vmrun %rax
2: cli
- /* Pop @svm to RDI, guest registers have been saved already. */
- pop %_ASM_DI
-
#ifdef CONFIG_MITIGATION_RETPOLINE
/* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */
- FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
+ FILL_RETURN_BUFFER %rax, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
#endif
- /* Clobbers RAX, RCX, RDX. */
+ /* Clobbers RAX, RCX, RDX, consumes RDI (@svm) and RSI (@spec_ctrl_intercepted). */
RESTORE_HOST_SPEC_CTRL
/*
@@ -361,30 +365,17 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run)
*/
UNTRAIN_RET_VM
- /* "Pop" @spec_ctrl_intercepted. */
- pop %_ASM_BX
-
- pop %_ASM_BX
-
-#ifdef CONFIG_X86_64
- pop %r12
- pop %r13
- pop %r14
- pop %r15
-#else
- pop %esi
- pop %edi
-#endif
- pop %_ASM_BP
+ FRAME_END
RET
RESTORE_GUEST_SPEC_CTRL_BODY
- RESTORE_HOST_SPEC_CTRL_BODY
+ RESTORE_HOST_SPEC_CTRL_BODY %sil
-3: cmpb $0, _ASM_RIP(kvm_rebooting)
+3: cmpb $0, kvm_rebooting(%rip)
jne 2b
ud2
_ASM_EXTABLE(1b, 3b)
SYM_FUNC_END(__svm_sev_es_vcpu_run)
+#endif /* CONFIG_KVM_AMD_SEV */
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 88659de4d2a714..c6b4b1728006d5 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -735,13 +735,13 @@ TRACE_EVENT(kvm_nested_intr_vmexit,
* Tracepoint for nested #vmexit because of interrupt pending
*/
TRACE_EVENT(kvm_invlpga,
- TP_PROTO(__u64 rip, int asid, u64 address),
+ TP_PROTO(__u64 rip, unsigned int asid, u64 address),
TP_ARGS(rip, asid, address),
TP_STRUCT__entry(
- __field( __u64, rip )
- __field( int, asid )
- __field( __u64, address )
+ __field( __u64, rip )
+ __field( unsigned int, asid )
+ __field( __u64, address )
),
TP_fast_assign(
@@ -750,7 +750,7 @@ TRACE_EVENT(kvm_invlpga,
__entry->address = address;
),
- TP_printk("rip: 0x%016llx asid: %d address: 0x%016llx",
+ TP_printk("rip: 0x%016llx asid: %u address: 0x%016llx",
__entry->rip, __entry->asid, __entry->address)
);
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 12ade343a17ed5..be40474de6e4db 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -535,7 +535,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
perf_capabilities = vcpu_get_perf_capabilities(vcpu);
if (cpuid_model_is_consistent(vcpu) &&
(perf_capabilities & PMU_CAP_LBR_FMT))
- x86_perf_get_lbr(&lbr_desc->records);
+ memcpy(&lbr_desc->records, &vmx_lbr_caps, sizeof(vmx_lbr_caps));
else
lbr_desc->records.nr = 0;
diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
index 2bfbf758d06110..f6986dee6f8c7c 100644
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -275,6 +275,8 @@ SYM_INNER_LABEL_ALIGN(vmx_vmexit, SYM_L_GLOBAL)
call vmx_spec_ctrl_restore_host
+ CLEAR_BRANCH_HISTORY_VMEXIT
+
/* Put return value in AX */
mov %_ASM_BX, %_ASM_AX
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index c37a89eda90f82..22411f4aff5303 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -218,6 +218,8 @@ module_param(ple_window_max, uint, 0444);
int __read_mostly pt_mode = PT_MODE_SYSTEM;
module_param(pt_mode, int, S_IRUGO);
+struct x86_pmu_lbr __ro_after_init vmx_lbr_caps;
+
static DEFINE_STATIC_KEY_FALSE(vmx_l1d_should_flush);
static DEFINE_STATIC_KEY_FALSE(vmx_l1d_flush_cond);
static DEFINE_MUTEX(vmx_l1d_flush_mutex);
@@ -7862,10 +7864,9 @@ static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
vmx_update_exception_bitmap(vcpu);
}
-static u64 vmx_get_perf_capabilities(void)
+static __init u64 vmx_get_perf_capabilities(void)
{
u64 perf_cap = PMU_CAP_FW_WRITES;
- struct x86_pmu_lbr lbr;
u64 host_perf_cap = 0;
if (!enable_pmu)
@@ -7875,15 +7876,43 @@ static u64 vmx_get_perf_capabilities(void)
rdmsrl(MSR_IA32_PERF_CAPABILITIES, host_perf_cap);
if (!cpu_feature_enabled(X86_FEATURE_ARCH_LBR)) {
- x86_perf_get_lbr(&lbr);
- if (lbr.nr)
+ x86_perf_get_lbr(&vmx_lbr_caps);
+
+ /*
+ * KVM requires LBR callstack support, as the overhead due to
+ * context switching LBRs without said support is too high.
+ * See intel_pmu_create_guest_lbr_event() for more info.
+ */
+ if (!vmx_lbr_caps.has_callstack)
+ memset(&vmx_lbr_caps, 0, sizeof(vmx_lbr_caps));
+ else if (vmx_lbr_caps.nr)
perf_cap |= host_perf_cap & PMU_CAP_LBR_FMT;
}
if (vmx_pebs_supported()) {
perf_cap |= host_perf_cap & PERF_CAP_PEBS_MASK;
- if ((perf_cap & PERF_CAP_PEBS_FORMAT) < 4)
- perf_cap &= ~PERF_CAP_PEBS_BASELINE;
+
+ /*
+ * Disallow adaptive PEBS as it is functionally broken, can be
+ * used by the guest to read *host* LBRs, and can be used to
+ * bypass userspace event filters. To correctly and safely
+ * support adaptive PEBS, KVM needs to:
+ *
+ * 1. Account for the ADAPTIVE flag when (re)programming fixed
+ * counters.
+ *
+ * 2. Gain support from perf (or take direct control of counter
+ * programming) to support events without adaptive PEBS
+ * enabled for the hardware counter.
+ *
+ * 3. Ensure LBR MSRs cannot hold host data on VM-Entry with
+ * adaptive PEBS enabled and MSR_PEBS_DATA_CFG.LBRS=1.
+ *
+ * 4. Document which PMU events are effectively exposed to the
+ * guest via adaptive PEBS, and make adaptive PEBS mutually
+ * exclusive with KVM_SET_PMU_EVENT_FILTER if necessary.
+ */
+ perf_cap &= ~PERF_CAP_PEBS_BASELINE;
}
return perf_cap;
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 65786dbe7d60bd..90f9e443464645 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -15,6 +15,7 @@
#include "vmx_ops.h"
#include "../cpuid.h"
#include "run_flags.h"
+#include "../mmu.h"
#define MSR_TYPE_R 1
#define MSR_TYPE_W 2
@@ -109,6 +110,8 @@ struct lbr_desc {
bool msr_passthrough;
};
+extern struct x86_pmu_lbr vmx_lbr_caps;
+
/*
* The nested_vmx structure is part of vcpu_vmx, and holds information we need
* for correct emulation of VMX (i.e., nested VMX) on this vcpu.
@@ -719,7 +722,8 @@ static inline bool vmx_need_pf_intercept(struct kvm_vcpu *vcpu)
if (!enable_ept)
return true;
- return allow_smaller_maxphyaddr && cpuid_maxphyaddr(vcpu) < boot_cpu_data.x86_phys_bits;
+ return allow_smaller_maxphyaddr &&
+ cpuid_maxphyaddr(vcpu) < kvm_get_shadow_phys_bits();
}
static inline bool is_unrestricted_guest(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 47d9f03b777837..91478b769af089 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1621,7 +1621,7 @@ static bool kvm_is_immutable_feature_msr(u32 msr)
ARCH_CAP_PSCHANGE_MC_NO | ARCH_CAP_TSX_CTRL_MSR | ARCH_CAP_TAA_NO | \
ARCH_CAP_SBDR_SSDP_NO | ARCH_CAP_FBSDP_NO | ARCH_CAP_PSDP_NO | \
ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO | ARCH_CAP_GDS_NO | \
- ARCH_CAP_RFDS_NO | ARCH_CAP_RFDS_CLEAR)
+ ARCH_CAP_RFDS_NO | ARCH_CAP_RFDS_CLEAR | ARCH_CAP_BHI_NO)
static u64 kvm_get_arch_capabilities(void)
{
@@ -3470,7 +3470,7 @@ static bool is_mci_status_msr(u32 msr)
static bool can_set_mci_status(struct kvm_vcpu *vcpu)
{
/* McStatusWrEn enabled? */
- if (guest_cpuid_is_amd_or_hygon(vcpu))
+ if (guest_cpuid_is_amd_compatible(vcpu))
return !!(vcpu->arch.msr_hwcr & BIT_ULL(18));
return false;
diff --git a/arch/x86/lib/copy_mc.c b/arch/x86/lib/copy_mc.c
index 6e8b7e600def57..97e88e58567bf8 100644
--- a/arch/x86/lib/copy_mc.c
+++ b/arch/x86/lib/copy_mc.c
@@ -4,6 +4,7 @@
#include <linux/jump_label.h>
#include <linux/uaccess.h>
#include <linux/export.h>
+#include <linux/instrumented.h>
#include <linux/string.h>
#include <linux/types.h>
@@ -61,10 +62,20 @@ unsigned long copy_mc_enhanced_fast_string(void *dst, const void *src, unsigned
*/
unsigned long __must_check copy_mc_to_kernel(void *dst, const void *src, unsigned len)
{
- if (copy_mc_fragile_enabled)
- return copy_mc_fragile(dst, src, len);
- if (static_cpu_has(X86_FEATURE_ERMS))
- return copy_mc_enhanced_fast_string(dst, src, len);
+ unsigned long ret;
+
+ if (copy_mc_fragile_enabled) {
+ instrument_memcpy_before(dst, src, len);
+ ret = copy_mc_fragile(dst, src, len);
+ instrument_memcpy_after(dst, src, len, ret);
+ return ret;
+ }
+ if (static_cpu_has(X86_FEATURE_ERMS)) {
+ instrument_memcpy_before(dst, src, len);
+ ret = copy_mc_enhanced_fast_string(dst, src, len);
+ instrument_memcpy_after(dst, src, len, ret);
+ return ret;
+ }
memcpy(dst, src, len);
return 0;
}
@@ -75,6 +86,7 @@ unsigned long __must_check copy_mc_to_user(void __user *dst, const void *src, un
unsigned long ret;
if (copy_mc_fragile_enabled) {
+ instrument_copy_to_user(dst, src, len);
__uaccess_begin();
ret = copy_mc_fragile((__force void *)dst, src, len);
__uaccess_end();
@@ -82,6 +94,7 @@ unsigned long __must_check copy_mc_to_user(void __user *dst, const void *src, un
}
if (static_cpu_has(X86_FEATURE_ERMS)) {
+ instrument_copy_to_user(dst, src, len);
__uaccess_begin();
ret = copy_mc_enhanced_fast_string((__force void *)dst, src, len);
__uaccess_end();
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
index 721b528da9acee..391059b2c6fbc4 100644
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -163,6 +163,7 @@ SYM_CODE_START_NOALIGN(srso_alias_untrain_ret)
lfence
jmp srso_alias_return_thunk
SYM_FUNC_END(srso_alias_untrain_ret)
+__EXPORT_THUNK(srso_alias_untrain_ret)
.popsection
.pushsection .text..__x86.rethunk_safe
@@ -224,10 +225,16 @@ SYM_CODE_START(srso_return_thunk)
SYM_CODE_END(srso_return_thunk)
#define JMP_SRSO_UNTRAIN_RET "jmp srso_untrain_ret"
-#define JMP_SRSO_ALIAS_UNTRAIN_RET "jmp srso_alias_untrain_ret"
#else /* !CONFIG_MITIGATION_SRSO */
+/* Dummy for the alternative in CALL_UNTRAIN_RET. */
+SYM_CODE_START(srso_alias_untrain_ret)
+ ANNOTATE_UNRET_SAFE
+ ANNOTATE_NOENDBR
+ ret
+ int3
+SYM_FUNC_END(srso_alias_untrain_ret)
+__EXPORT_THUNK(srso_alias_untrain_ret)
#define JMP_SRSO_UNTRAIN_RET "ud2"
-#define JMP_SRSO_ALIAS_UNTRAIN_RET "ud2"
#endif /* CONFIG_MITIGATION_SRSO */
#ifdef CONFIG_MITIGATION_UNRET_ENTRY
@@ -319,9 +326,7 @@ SYM_FUNC_END(retbleed_untrain_ret)
#if defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_MITIGATION_SRSO)
SYM_FUNC_START(entry_untrain_ret)
- ALTERNATIVE_2 JMP_RETBLEED_UNTRAIN_RET, \
- JMP_SRSO_UNTRAIN_RET, X86_FEATURE_SRSO, \
- JMP_SRSO_ALIAS_UNTRAIN_RET, X86_FEATURE_SRSO_ALIAS
+ ALTERNATIVE JMP_RETBLEED_UNTRAIN_RET, JMP_SRSO_UNTRAIN_RET, X86_FEATURE_SRSO
SYM_FUNC_END(entry_untrain_ret)
__EXPORT_THUNK(entry_untrain_ret)
@@ -377,8 +382,15 @@ SYM_FUNC_END(call_depth_return_thunk)
SYM_CODE_START(__x86_return_thunk)
UNWIND_HINT_FUNC
ANNOTATE_NOENDBR
+#if defined(CONFIG_MITIGATION_UNRET_ENTRY) || \
+ defined(CONFIG_MITIGATION_SRSO) || \
+ defined(CONFIG_MITIGATION_CALL_DEPTH_TRACKING)
ALTERNATIVE __stringify(ANNOTATE_UNRET_SAFE; ret), \
"jmp warn_thunk_thunk", X86_FEATURE_ALWAYS
+#else
+ ANNOTATE_UNRET_SAFE
+ ret
+#endif
int3
SYM_CODE_END(__x86_return_thunk)
EXPORT_SYMBOL(__x86_return_thunk)
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 622d12ec7f0851..67b18adc75ddc1 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -20,6 +20,7 @@
#include <linux/efi.h> /* efi_crash_gracefully_on_page_fault()*/
#include <linux/mm_types.h>
#include <linux/mm.h> /* find_and_lock_vma() */
+#include <linux/vmalloc.h>
#include <asm/cpufeature.h> /* boot_cpu_has, ... */
#include <asm/traps.h> /* dotraplinkage, ... */
@@ -865,14 +866,17 @@ bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
static void
__bad_area(struct pt_regs *regs, unsigned long error_code,
- unsigned long address, u32 pkey, int si_code)
+ unsigned long address, struct mm_struct *mm,
+ struct vm_area_struct *vma, u32 pkey, int si_code)
{
- struct mm_struct *mm = current->mm;
/*
* Something tried to access memory that isn't in our memory map..
* Fix it, but check if it's kernel or user first..
*/
- mmap_read_unlock(mm);
+ if (mm)
+ mmap_read_unlock(mm);
+ else
+ vma_end_read(vma);
__bad_area_nosemaphore(regs, error_code, address, pkey, si_code);
}
@@ -896,7 +900,8 @@ static inline bool bad_area_access_from_pkeys(unsigned long error_code,
static noinline void
bad_area_access_error(struct pt_regs *regs, unsigned long error_code,
- unsigned long address, struct vm_area_struct *vma)
+ unsigned long address, struct mm_struct *mm,
+ struct vm_area_struct *vma)
{
/*
* This OSPKE check is not strictly necessary at runtime.
@@ -926,9 +931,9 @@ bad_area_access_error(struct pt_regs *regs, unsigned long error_code,
*/
u32 pkey = vma_pkey(vma);
- __bad_area(regs, error_code, address, pkey, SEGV_PKUERR);
+ __bad_area(regs, error_code, address, mm, vma, pkey, SEGV_PKUERR);
} else {
- __bad_area(regs, error_code, address, 0, SEGV_ACCERR);
+ __bad_area(regs, error_code, address, mm, vma, 0, SEGV_ACCERR);
}
}
@@ -1356,8 +1361,9 @@ void do_user_addr_fault(struct pt_regs *regs,
goto lock_mmap;
if (unlikely(access_error(error_code, vma))) {
- vma_end_read(vma);
- goto lock_mmap;
+ bad_area_access_error(regs, error_code, address, NULL, vma);
+ count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
+ return;
}
fault = handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, regs);
if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED)))
@@ -1393,7 +1399,7 @@ retry:
* we can handle it..
*/
if (unlikely(access_error(error_code, vma))) {
- bad_area_access_error(regs, error_code, address, vma);
+ bad_area_access_error(regs, error_code, address, mm, vma);
return;
}
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index 5804bbae4f0125..807a5859a3c4b3 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -19,41 +19,14 @@
#include <asm/tlbflush.h>
#include <asm/elf.h>
-/*
- * pmd_huge() returns 1 if @pmd is hugetlb related entry, that is normal
- * hugetlb entry or non-present (migration or hwpoisoned) hugetlb entry.
- * Otherwise, returns 0.
- */
-int pmd_huge(pmd_t pmd)
-{
- return !pmd_none(pmd) &&
- (pmd_val(pmd) & (_PAGE_PRESENT|_PAGE_PSE)) != _PAGE_PRESENT;
-}
-
-/*
- * pud_huge() returns 1 if @pud is hugetlb related entry, that is normal
- * hugetlb entry or non-present (migration or hwpoisoned) hugetlb entry.
- * Otherwise, returns 0.
- */
-int pud_huge(pud_t pud)
-{
-#if CONFIG_PGTABLE_LEVELS > 2
- return !pud_none(pud) &&
- (pud_val(pud) & (_PAGE_PRESENT|_PAGE_PSE)) != _PAGE_PRESENT;
-#else
- return 0;
-#endif
-}
-
#ifdef CONFIG_HUGETLB_PAGE
static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file,
unsigned long addr, unsigned long len,
unsigned long pgoff, unsigned long flags)
{
struct hstate *h = hstate_file(file);
- struct vm_unmapped_area_info info;
+ struct vm_unmapped_area_info info = {};
- info.flags = 0;
info.length = len;
info.low_limit = get_mmap_base(1);
@@ -65,7 +38,6 @@ static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file,
task_size_32bit() : task_size_64bit(addr > DEFAULT_MAP_WINDOW);
info.align_mask = PAGE_MASK & ~huge_page_mask(h);
- info.align_offset = 0;
return vm_unmapped_area(&info);
}
@@ -74,7 +46,7 @@ static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
unsigned long pgoff, unsigned long flags)
{
struct hstate *h = hstate_file(file);
- struct vm_unmapped_area_info info;
+ struct vm_unmapped_area_info info = {};
info.flags = VM_UNMAPPED_AREA_TOPDOWN;
info.length = len;
@@ -89,7 +61,6 @@ static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
info.high_limit += TASK_SIZE_MAX - DEFAULT_MAP_WINDOW;
info.align_mask = PAGE_MASK & ~huge_page_mask(h);
- info.align_offset = 0;
addr = vm_unmapped_area(&info);
/*
@@ -141,7 +112,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
}
get_unmapped_area:
- if (mm->get_unmapped_area == arch_get_unmapped_area)
+ if (!test_bit(MMF_TOPDOWN, &mm->flags))
return hugetlb_get_unmapped_area_bottomup(file, addr, len,
pgoff, flags);
else
diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c
index a204a332c71fc5..968d7005f4a724 100644
--- a/arch/x86/mm/ident_map.c
+++ b/arch/x86/mm/ident_map.c
@@ -26,31 +26,18 @@ static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page,
for (; addr < end; addr = next) {
pud_t *pud = pud_page + pud_index(addr);
pmd_t *pmd;
- bool use_gbpage;
next = (addr & PUD_MASK) + PUD_SIZE;
if (next > end)
next = end;
- /* if this is already a gbpage, this portion is already mapped */
- if (pud_leaf(*pud))
- continue;
-
- /* Is using a gbpage allowed? */
- use_gbpage = info->direct_gbpages;
-
- /* Don't use gbpage if it maps more than the requested region. */
- /* at the begining: */
- use_gbpage &= ((addr & ~PUD_MASK) == 0);
- /* ... or at the end: */
- use_gbpage &= ((next & ~PUD_MASK) == 0);
-
- /* Never overwrite existing mappings */
- use_gbpage &= !pud_present(*pud);
-
- if (use_gbpage) {
+ if (info->direct_gbpages) {
pud_t pudval;
+ if (pud_present(*pud))
+ continue;
+
+ addr &= PUD_MASK;
pudval = __pud((addr - info->offset) | info->page_flag);
set_pud(pud, pudval);
continue;
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 679893ea5e6873..615f0bf4bda61a 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -990,53 +990,6 @@ void __init free_initrd_mem(unsigned long start, unsigned long end)
}
#endif
-/*
- * Calculate the precise size of the DMA zone (first 16 MB of RAM),
- * and pass it to the MM layer - to help it set zone watermarks more
- * accurately.
- *
- * Done on 64-bit systems only for the time being, although 32-bit systems
- * might benefit from this as well.
- */
-void __init memblock_find_dma_reserve(void)
-{
-#ifdef CONFIG_X86_64
- u64 nr_pages = 0, nr_free_pages = 0;
- unsigned long start_pfn, end_pfn;
- phys_addr_t start_addr, end_addr;
- int i;
- u64 u;
-
- /*
- * Iterate over all memory ranges (free and reserved ones alike),
- * to calculate the total number of pages in the first 16 MB of RAM:
- */
- nr_pages = 0;
- for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, NULL) {
- start_pfn = min(start_pfn, MAX_DMA_PFN);
- end_pfn = min(end_pfn, MAX_DMA_PFN);
-
- nr_pages += end_pfn - start_pfn;
- }
-
- /*
- * Iterate over free memory ranges to calculate the number of free
- * pages in the DMA zone, while not counting potential partial
- * pages at the beginning or the end of the range:
- */
- nr_free_pages = 0;
- for_each_free_mem_range(u, NUMA_NO_NODE, MEMBLOCK_NONE, &start_addr, &end_addr, NULL) {
- start_pfn = min_t(unsigned long, PFN_UP(start_addr), MAX_DMA_PFN);
- end_pfn = min_t(unsigned long, PFN_DOWN(end_addr), MAX_DMA_PFN);
-
- if (start_pfn < end_pfn)
- nr_free_pages += end_pfn - start_pfn;
- }
-
- set_dma_reserve(nr_pages - nr_free_pages);
-#endif
-}
-
void __init zone_sizes_init(void)
{
unsigned long max_zone_pfns[MAX_NR_ZONES];
diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c
index 70b91de2e053ab..422602f6039b82 100644
--- a/arch/x86/mm/mem_encrypt_amd.c
+++ b/arch/x86/mm/mem_encrypt_amd.c
@@ -492,6 +492,24 @@ void __init sme_early_init(void)
*/
if (sev_status & MSR_AMD64_SEV_ENABLED)
ia32_disable();
+
+ /*
+ * Override init functions that scan the ROM region in SEV-SNP guests,
+ * as this memory is not pre-validated and would thus cause a crash.
+ */
+ if (sev_status & MSR_AMD64_SEV_SNP_ENABLED) {
+ x86_init.mpparse.find_mptable = x86_init_noop;
+ x86_init.pci.init_irq = x86_init_noop;
+ x86_init.resources.probe_roms = x86_init_noop;
+
+ /*
+ * DMI setup behavior for SEV-SNP guests depends on
+ * efi_enabled(EFI_CONFIG_TABLES), which hasn't been
+ * parsed yet. snp_dmi_setup() will run after that
+ * parsing has happened.
+ */
+ x86_init.resources.dmi_setup = snp_dmi_setup;
+ }
}
void __init mem_encrypt_free_decrypted_mem(void)
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index c90c20904a6075..a2cabb1c81e1ae 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -129,9 +129,9 @@ static void arch_pick_mmap_base(unsigned long *base, unsigned long *legacy_base,
void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
{
if (mmap_is_legacy())
- mm->get_unmapped_area = arch_get_unmapped_area;
+ clear_bit(MMF_TOPDOWN, &mm->flags);
else
- mm->get_unmapped_area = arch_get_unmapped_area_topdown;
+ set_bit(MMF_TOPDOWN, &mm->flags);
arch_pick_mmap_base(&mm->mmap_base, &mm->mmap_legacy_base,
arch_rnd(mmap64_rnd_bits), task_size_64bit(0),
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 104544359d69cd..65fda406e6f265 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -24,6 +24,8 @@
#include <linux/memblock.h>
#include <linux/init.h>
+#include <linux/vmalloc.h>
+#include <asm/pgtable_areas.h>
#include "numa_internal.h"
diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c
index 0d72183b5dd028..bdc2a240c2aae6 100644
--- a/arch/x86/mm/pat/memtype.c
+++ b/arch/x86/mm/pat/memtype.c
@@ -39,6 +39,7 @@
#include <linux/pfn_t.h>
#include <linux/slab.h>
#include <linux/mm.h>
+#include <linux/highmem.h>
#include <linux/fs.h>
#include <linux/rbtree.h>
@@ -947,6 +948,61 @@ static void free_pfn_range(u64 paddr, unsigned long size)
memtype_free(paddr, paddr + size);
}
+static int follow_phys(struct vm_area_struct *vma, unsigned long *prot,
+ resource_size_t *phys)
+{
+ pte_t *ptep, pte;
+ spinlock_t *ptl;
+
+ if (follow_pte(vma, vma->vm_start, &ptep, &ptl))
+ return -EINVAL;
+
+ pte = ptep_get(ptep);
+
+ /* Never return PFNs of anon folios in COW mappings. */
+ if (vm_normal_folio(vma, vma->vm_start, pte)) {
+ pte_unmap_unlock(ptep, ptl);
+ return -EINVAL;
+ }
+
+ *prot = pgprot_val(pte_pgprot(pte));
+ *phys = (resource_size_t)pte_pfn(pte) << PAGE_SHIFT;
+ pte_unmap_unlock(ptep, ptl);
+ return 0;
+}
+
+static int get_pat_info(struct vm_area_struct *vma, resource_size_t *paddr,
+ pgprot_t *pgprot)
+{
+ unsigned long prot;
+
+ VM_WARN_ON_ONCE(!(vma->vm_flags & VM_PAT));
+
+ /*
+ * We need the starting PFN and cachemode used for track_pfn_remap()
+ * that covered the whole VMA. For most mappings, we can obtain that
+ * information from the page tables. For COW mappings, we might now
+ * suddenly have anon folios mapped and follow_phys() will fail.
+ *
+ * Fallback to using vma->vm_pgoff, see remap_pfn_range_notrack(), to
+ * detect the PFN. If we need the cachemode as well, we're out of luck
+ * for now and have to fail fork().
+ */
+ if (!follow_phys(vma, &prot, paddr)) {
+ if (pgprot)
+ *pgprot = __pgprot(prot);
+ return 0;
+ }
+ if (is_cow_mapping(vma->vm_flags)) {
+ if (pgprot)
+ return -EINVAL;
+ *paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT;
+ return 0;
+ }
+ WARN_ON_ONCE(1);
+ return -EINVAL;
+}
+
/*
* track_pfn_copy is called when vma that is covering the pfnmap gets
* copied through copy_page_range().
@@ -957,20 +1013,13 @@ static void free_pfn_range(u64 paddr, unsigned long size)
int track_pfn_copy(struct vm_area_struct *vma)
{
resource_size_t paddr;
- unsigned long prot;
unsigned long vma_size = vma->vm_end - vma->vm_start;
pgprot_t pgprot;
if (vma->vm_flags & VM_PAT) {
- /*
- * reserve the whole chunk covered by vma. We need the
- * starting address and protection from pte.
- */
- if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) {
- WARN_ON_ONCE(1);
+ if (get_pat_info(vma, &paddr, &pgprot))
return -EINVAL;
- }
- pgprot = __pgprot(prot);
+ /* reserve the whole chunk covered by vma. */
return reserve_pfn_range(paddr, vma_size, &pgprot, 1);
}
@@ -1045,7 +1094,6 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
unsigned long size, bool mm_wr_locked)
{
resource_size_t paddr;
- unsigned long prot;
if (vma && !(vma->vm_flags & VM_PAT))
return;
@@ -1053,11 +1101,8 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
/* free the chunk starting from pfn or the whole chunk */
paddr = (resource_size_t)pfn << PAGE_SHIFT;
if (!paddr && !size) {
- if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) {
- WARN_ON_ONCE(1);
+ if (get_pat_info(vma, &paddr, NULL))
return;
- }
-
size = vma->vm_end - vma->vm_start;
}
free_pfn_range(paddr, size);
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index d007591b80597a..94767c82fc0d72 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -731,7 +731,7 @@ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
return 0;
/* Bail out if we are we on a populated non-leaf entry: */
- if (pud_present(*pud) && !pud_huge(*pud))
+ if (pud_present(*pud) && !pud_leaf(*pud))
return 0;
set_pte((pte_t *)pud, pfn_pte(
@@ -760,7 +760,7 @@ int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot)
}
/* Bail out if we are we on a populated non-leaf entry: */
- if (pmd_present(*pmd) && !pmd_huge(*pmd))
+ if (pmd_present(*pmd) && !pmd_leaf(*pmd))
return 0;
set_pte((pte_t *)pmd, pfn_pte(
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index a7ba8e1786452d..59cbc94b6e6903 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -480,7 +480,7 @@ static int emit_call(u8 **pprog, void *func, void *ip)
static int emit_rsb_call(u8 **pprog, void *func, void *ip)
{
OPTIMIZER_HIDE_VAR(func);
- x86_call_depth_emit_accounting(pprog, func);
+ ip += x86_call_depth_emit_accounting(pprog, func, ip);
return emit_patch(pprog, func, ip, 0xE8);
}
@@ -1807,36 +1807,41 @@ populate_extable:
if (BPF_MODE(insn->code) == BPF_PROBE_MEM ||
BPF_MODE(insn->code) == BPF_PROBE_MEMSX) {
/* Conservatively check that src_reg + insn->off is a kernel address:
- * src_reg + insn->off >= TASK_SIZE_MAX + PAGE_SIZE
- * src_reg is used as scratch for src_reg += insn->off and restored
- * after emit_ldx if necessary
+ * src_reg + insn->off > TASK_SIZE_MAX + PAGE_SIZE
+ * and
+ * src_reg + insn->off < VSYSCALL_ADDR
*/
- u64 limit = TASK_SIZE_MAX + PAGE_SIZE;
+ u64 limit = TASK_SIZE_MAX + PAGE_SIZE - VSYSCALL_ADDR;
u8 *end_of_jmp;
- /* At end of these emitted checks, insn->off will have been added
- * to src_reg, so no need to do relative load with insn->off offset
- */
- insn_off = 0;
+ /* movabsq r10, VSYSCALL_ADDR */
+ emit_mov_imm64(&prog, BPF_REG_AX, (long)VSYSCALL_ADDR >> 32,
+ (u32)(long)VSYSCALL_ADDR);
- /* movabsq r11, limit */
- EMIT2(add_1mod(0x48, AUX_REG), add_1reg(0xB8, AUX_REG));
- EMIT((u32)limit, 4);
- EMIT(limit >> 32, 4);
+ /* mov src_reg, r11 */
+ EMIT_mov(AUX_REG, src_reg);
if (insn->off) {
- /* add src_reg, insn->off */
- maybe_emit_1mod(&prog, src_reg, true);
- EMIT2_off32(0x81, add_1reg(0xC0, src_reg), insn->off);
+ /* add r11, insn->off */
+ maybe_emit_1mod(&prog, AUX_REG, true);
+ EMIT2_off32(0x81, add_1reg(0xC0, AUX_REG), insn->off);
}
- /* cmp src_reg, r11 */
- maybe_emit_mod(&prog, src_reg, AUX_REG, true);
- EMIT2(0x39, add_2reg(0xC0, src_reg, AUX_REG));
+ /* sub r11, r10 */
+ maybe_emit_mod(&prog, AUX_REG, BPF_REG_AX, true);
+ EMIT2(0x29, add_2reg(0xC0, AUX_REG, BPF_REG_AX));
+
+ /* movabsq r10, limit */
+ emit_mov_imm64(&prog, BPF_REG_AX, (long)limit >> 32,
+ (u32)(long)limit);
+
+ /* cmp r10, r11 */
+ maybe_emit_mod(&prog, AUX_REG, BPF_REG_AX, true);
+ EMIT2(0x39, add_2reg(0xC0, AUX_REG, BPF_REG_AX));
- /* if unsigned '>=', goto load */
- EMIT2(X86_JAE, 0);
+ /* if unsigned '>', goto load */
+ EMIT2(X86_JA, 0);
end_of_jmp = prog;
/* xor dst_reg, dst_reg */
@@ -1862,18 +1867,6 @@ populate_extable:
/* populate jmp_offset for JMP above */
start_of_ldx[-1] = prog - start_of_ldx;
- if (insn->off && src_reg != dst_reg) {
- /* sub src_reg, insn->off
- * Restore src_reg after "add src_reg, insn->off" in prev
- * if statement. But if src_reg == dst_reg, emit_ldx
- * above already clobbered src_reg, so no need to restore.
- * If add src_reg, insn->off was unnecessary, no need to
- * restore either.
- */
- maybe_emit_1mod(&prog, src_reg, true);
- EMIT2_off32(0x81, add_1reg(0xE8, src_reg), insn->off);
- }
-
if (!bpf_prog->aux->extable)
break;
@@ -1972,20 +1965,17 @@ populate_extable:
/* call */
case BPF_JMP | BPF_CALL: {
- int offs;
+ u8 *ip = image + addrs[i - 1];
func = (u8 *) __bpf_call_base + imm32;
if (tail_call_reachable) {
RESTORE_TAIL_CALL_CNT(bpf_prog->aux->stack_depth);
- if (!imm32)
- return -EINVAL;
- offs = 7 + x86_call_depth_emit_accounting(&prog, func);
- } else {
- if (!imm32)
- return -EINVAL;
- offs = x86_call_depth_emit_accounting(&prog, func);
+ ip += 7;
}
- if (emit_call(&prog, func, image + addrs[i - 1] + offs))
+ if (!imm32)
+ return -EINVAL;
+ ip += x86_call_depth_emit_accounting(&prog, func, ip);
+ if (emit_call(&prog, func, ip))
return -EINVAL;
break;
}
@@ -2835,7 +2825,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
* Direct-call fentry stub, as such it needs accounting for the
* __fentry__ call.
*/
- x86_call_depth_emit_accounting(&prog, NULL);
+ x86_call_depth_emit_accounting(&prog, NULL, image);
}
EMIT1(0x55); /* push rbp */
EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */
@@ -3476,3 +3466,9 @@ bool bpf_jit_supports_ptr_xchg(void)
{
return true;
}
+
+/* x86-64 JIT emits its own code to filter user addresses so return 0 here */
+u64 bpf_arch_uaddress_limit(void)
+{
+ return 0;
+}
diff --git a/arch/x86/virt/Makefile b/arch/x86/virt/Makefile
index 1e36502cd7383a..ea343fc392dcc8 100644
--- a/arch/x86/virt/Makefile
+++ b/arch/x86/virt/Makefile
@@ -1,2 +1,2 @@
# SPDX-License-Identifier: GPL-2.0-only
-obj-y += vmx/
+obj-y += svm/ vmx/
diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c
index cffe1157a90acf..ab0e8448bb6eb2 100644
--- a/arch/x86/virt/svm/sev.c
+++ b/arch/x86/virt/svm/sev.c
@@ -77,7 +77,7 @@ static int __mfd_enable(unsigned int cpu)
{
u64 val;
- if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+ if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
return 0;
rdmsrl(MSR_AMD64_SYSCFG, val);
@@ -98,7 +98,7 @@ static int __snp_enable(unsigned int cpu)
{
u64 val;
- if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+ if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
return 0;
rdmsrl(MSR_AMD64_SYSCFG, val);
@@ -174,11 +174,11 @@ static int __init snp_rmptable_init(void)
u64 rmptable_size;
u64 val;
- if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+ if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
return 0;
if (!amd_iommu_snp_en)
- return 0;
+ goto nosnp;
if (!probed_rmp_size)
goto nosnp;
@@ -225,7 +225,7 @@ skip_enable:
return 0;
nosnp:
- setup_clear_cpu_cap(X86_FEATURE_SEV_SNP);
+ cc_platform_clear(CC_ATTR_HOST_SEV_SNP);
return -ENOSYS;
}
@@ -246,7 +246,7 @@ static struct rmpentry *__snp_lookup_rmpentry(u64 pfn, int *level)
{
struct rmpentry *large_entry, *entry;
- if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+ if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
return ERR_PTR(-ENODEV);
entry = get_rmpentry(pfn);
@@ -363,7 +363,7 @@ int psmash(u64 pfn)
unsigned long paddr = pfn << PAGE_SHIFT;
int ret;
- if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+ if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
return -ENODEV;
if (!pfn_valid(pfn))
@@ -472,7 +472,7 @@ static int rmpupdate(u64 pfn, struct rmp_state *state)
unsigned long paddr = pfn << PAGE_SHIFT;
int ret, level;
- if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+ if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
return -ENODEV;
level = RMP_TO_PG_LEVEL(state->pagesize);
@@ -558,3 +558,13 @@ void snp_leak_pages(u64 pfn, unsigned int npages)
spin_unlock(&snp_leaked_pages_list_lock);
}
EXPORT_SYMBOL_GPL(snp_leak_pages);
+
+void kdump_sev_callback(void)
+{
+ /*
+ * Do wbinvd() on remote CPUs when SNP is enabled in order to
+ * safely do SNP_SHUTDOWN on the local CPU.
+ */
+ if (cc_platform_has(CC_ATTR_HOST_SEV_SNP))
+ wbinvd();
+}
diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl
index dd116598fb2517..67083fc1b2f563 100644
--- a/arch/xtensa/kernel/syscalls/syscall.tbl
+++ b/arch/xtensa/kernel/syscalls/syscall.tbl
@@ -432,3 +432,4 @@
459 common lsm_get_self_attr sys_lsm_get_self_attr
460 common lsm_set_self_attr sys_lsm_set_self_attr
461 common lsm_list_modules sys_lsm_list_modules
+462 common mseal sys_mseal
diff --git a/arch/xtensa/mm/cache.c b/arch/xtensa/mm/cache.c
index 7ec66a79f4723c..23be0e7516cede 100644
--- a/arch/xtensa/mm/cache.c
+++ b/arch/xtensa/mm/cache.c
@@ -87,12 +87,13 @@ static inline void *coherent_kvaddr(struct page *page, unsigned long base,
void clear_user_highpage(struct page *page, unsigned long vaddr)
{
+ struct folio *folio = page_folio(page);
unsigned long paddr;
void *kvaddr = coherent_kvaddr(page, TLBTEMP_BASE_1, vaddr, &paddr);
preempt_disable();
kmap_invalidate_coherent(page, vaddr);
- set_bit(PG_arch_1, &page->flags);
+ set_bit(PG_arch_1, folio_flags(folio, 0));
clear_page_alias(kvaddr, paddr);
preempt_enable();
}
@@ -101,6 +102,7 @@ EXPORT_SYMBOL(clear_user_highpage);
void copy_user_highpage(struct page *dst, struct page *src,
unsigned long vaddr, struct vm_area_struct *vma)
{
+ struct folio *folio = page_folio(dst);
unsigned long dst_paddr, src_paddr;
void *dst_vaddr = coherent_kvaddr(dst, TLBTEMP_BASE_1, vaddr,
&dst_paddr);
@@ -109,7 +111,7 @@ void copy_user_highpage(struct page *dst, struct page *src,
preempt_disable();
kmap_invalidate_coherent(dst, vaddr);
- set_bit(PG_arch_1, &dst->flags);
+ set_bit(PG_arch_1, folio_flags(folio, 0));
copy_page_alias(dst_vaddr, src_vaddr, dst_paddr, src_paddr);
preempt_enable();
}
diff --git a/arch/xtensa/mm/tlb.c b/arch/xtensa/mm/tlb.c
index 4f974b74883cae..d8b60d6e50a8e7 100644
--- a/arch/xtensa/mm/tlb.c
+++ b/arch/xtensa/mm/tlb.c
@@ -256,12 +256,13 @@ static int check_tlb_entry(unsigned w, unsigned e, bool dtlb)
dtlb ? 'D' : 'I', w, e, r0, r1, pte);
if (pte == 0 || !pte_present(__pte(pte))) {
struct page *p = pfn_to_page(r1 >> PAGE_SHIFT);
- pr_err("page refcount: %d, mapcount: %d\n",
- page_count(p),
- page_mapcount(p));
- if (!page_count(p))
+ struct folio *f = page_folio(p);
+
+ pr_err("folio refcount: %d, mapcount: %d\n",
+ folio_ref_count(f), folio_mapcount(f));
+ if (!folio_ref_count(f))
rc |= TLB_INSANE;
- else if (page_mapcount(p))
+ else if (folio_mapped(f))
rc |= TLB_SUSPICIOUS;
} else {
rc |= TLB_INSANE;
diff --git a/block/bdev.c b/block/bdev.c
index 7a5f611c3d2e3e..da2a167a4d08b6 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -583,9 +583,6 @@ static void bd_finish_claiming(struct block_device *bdev, void *holder,
mutex_unlock(&bdev->bd_holder_lock);
bd_clear_claiming(whole, holder);
mutex_unlock(&bdev_lock);
-
- if (hops && hops->get_holder)
- hops->get_holder(holder);
}
/**
@@ -608,7 +605,6 @@ EXPORT_SYMBOL(bd_abort_claiming);
static void bd_end_claim(struct block_device *bdev, void *holder)
{
struct block_device *whole = bdev_whole(bdev);
- const struct blk_holder_ops *hops = bdev->bd_holder_ops;
bool unblock = false;
/*
@@ -631,9 +627,6 @@ static void bd_end_claim(struct block_device *bdev, void *holder)
whole->bd_holder = NULL;
mutex_unlock(&bdev_lock);
- if (hops && hops->put_holder)
- hops->put_holder(holder);
-
/*
* If this was the last claim, remove holder link and unblock evpoll if
* it was a write holder.
@@ -652,6 +645,14 @@ static void blkdev_flush_mapping(struct block_device *bdev)
bdev_write_inode(bdev);
}
+static void blkdev_put_whole(struct block_device *bdev)
+{
+ if (atomic_dec_and_test(&bdev->bd_openers))
+ blkdev_flush_mapping(bdev);
+ if (bdev->bd_disk->fops->release)
+ bdev->bd_disk->fops->release(bdev->bd_disk);
+}
+
static int blkdev_get_whole(struct block_device *bdev, blk_mode_t mode)
{
struct gendisk *disk = bdev->bd_disk;
@@ -670,20 +671,21 @@ static int blkdev_get_whole(struct block_device *bdev, blk_mode_t mode)
if (!atomic_read(&bdev->bd_openers))
set_init_blocksize(bdev);
- if (test_bit(GD_NEED_PART_SCAN, &disk->state))
- bdev_disk_changed(disk, false);
atomic_inc(&bdev->bd_openers);
+ if (test_bit(GD_NEED_PART_SCAN, &disk->state)) {
+ /*
+ * Only return scanning errors if we are called from contexts
+ * that explicitly want them, e.g. the BLKRRPART ioctl.
+ */
+ ret = bdev_disk_changed(disk, false);
+ if (ret && (mode & BLK_OPEN_STRICT_SCAN)) {
+ blkdev_put_whole(bdev);
+ return ret;
+ }
+ }
return 0;
}
-static void blkdev_put_whole(struct block_device *bdev)
-{
- if (atomic_dec_and_test(&bdev->bd_openers))
- blkdev_flush_mapping(bdev);
- if (bdev->bd_disk->fops->release)
- bdev->bd_disk->fops->release(bdev->bd_disk);
-}
-
static int blkdev_get_part(struct block_device *part, blk_mode_t mode)
{
struct gendisk *disk = part->bd_disk;
@@ -776,17 +778,17 @@ void blkdev_put_no_open(struct block_device *bdev)
static bool bdev_writes_blocked(struct block_device *bdev)
{
- return bdev->bd_writers == -1;
+ return bdev->bd_writers < 0;
}
static void bdev_block_writes(struct block_device *bdev)
{
- bdev->bd_writers = -1;
+ bdev->bd_writers--;
}
static void bdev_unblock_writes(struct block_device *bdev)
{
- bdev->bd_writers = 0;
+ bdev->bd_writers++;
}
static bool bdev_may_open(struct block_device *bdev, blk_mode_t mode)
@@ -813,6 +815,11 @@ static void bdev_claim_write_access(struct block_device *bdev, blk_mode_t mode)
bdev->bd_writers++;
}
+static inline bool bdev_unclaimed(const struct file *bdev_file)
+{
+ return bdev_file->private_data == BDEV_I(bdev_file->f_mapping->host);
+}
+
static void bdev_yield_write_access(struct file *bdev_file)
{
struct block_device *bdev;
@@ -820,14 +827,15 @@ static void bdev_yield_write_access(struct file *bdev_file)
if (bdev_allow_write_mounted)
return;
+ if (bdev_unclaimed(bdev_file))
+ return;
+
bdev = file_bdev(bdev_file);
- /* Yield exclusive or shared write access. */
- if (bdev_file->f_mode & FMODE_WRITE) {
- if (bdev_writes_blocked(bdev))
- bdev_unblock_writes(bdev);
- else
- bdev->bd_writers--;
- }
+
+ if (bdev_file->f_mode & FMODE_WRITE_RESTRICTED)
+ bdev_unblock_writes(bdev);
+ else if (bdev_file->f_mode & FMODE_WRITE)
+ bdev->bd_writers--;
}
/**
@@ -874,7 +882,7 @@ int bdev_open(struct block_device *bdev, blk_mode_t mode, void *holder,
goto abort_claiming;
ret = -EBUSY;
if (!bdev_may_open(bdev, mode))
- goto abort_claiming;
+ goto put_module;
if (bdev_is_partition(bdev))
ret = blkdev_get_part(bdev, mode);
else
@@ -907,6 +915,8 @@ int bdev_open(struct block_device *bdev, blk_mode_t mode, void *holder,
bdev_file->f_mode |= FMODE_BUF_RASYNC | FMODE_CAN_ODIRECT;
if (bdev_nowait(bdev))
bdev_file->f_mode |= FMODE_NOWAIT;
+ if (mode & BLK_OPEN_RESTRICT_WRITES)
+ bdev_file->f_mode |= FMODE_WRITE_RESTRICTED;
bdev_file->f_mapping = bdev->bd_inode->i_mapping;
bdev_file->f_wb_err = filemap_sample_wb_err(bdev_file->f_mapping);
bdev_file->private_data = holder;
@@ -1012,6 +1022,20 @@ struct file *bdev_file_open_by_path(const char *path, blk_mode_t mode,
}
EXPORT_SYMBOL(bdev_file_open_by_path);
+static inline void bd_yield_claim(struct file *bdev_file)
+{
+ struct block_device *bdev = file_bdev(bdev_file);
+ void *holder = bdev_file->private_data;
+
+ lockdep_assert_held(&bdev->bd_disk->open_mutex);
+
+ if (WARN_ON_ONCE(IS_ERR_OR_NULL(holder)))
+ return;
+
+ if (!bdev_unclaimed(bdev_file))
+ bd_end_claim(bdev, holder);
+}
+
void bdev_release(struct file *bdev_file)
{
struct block_device *bdev = file_bdev(bdev_file);
@@ -1036,7 +1060,7 @@ void bdev_release(struct file *bdev_file)
bdev_yield_write_access(bdev_file);
if (holder)
- bd_end_claim(bdev, holder);
+ bd_yield_claim(bdev_file);
/*
* Trigger event checking and tell drivers to flush MEDIA_CHANGE
@@ -1057,6 +1081,39 @@ put_no_open:
}
/**
+ * bdev_fput - yield claim to the block device and put the file
+ * @bdev_file: open block device
+ *
+ * Yield claim on the block device and put the file. Ensure that the
+ * block device can be reclaimed before the file is closed which is a
+ * deferred operation.
+ */
+void bdev_fput(struct file *bdev_file)
+{
+ if (WARN_ON_ONCE(bdev_file->f_op != &def_blk_fops))
+ return;
+
+ if (bdev_file->private_data) {
+ struct block_device *bdev = file_bdev(bdev_file);
+ struct gendisk *disk = bdev->bd_disk;
+
+ mutex_lock(&disk->open_mutex);
+ bdev_yield_write_access(bdev_file);
+ bd_yield_claim(bdev_file);
+ /*
+ * Tell release we already gave up our hold on the
+ * device and if write restrictions are available that
+ * we already gave up write access to the device.
+ */
+ bdev_file->private_data = BDEV_I(bdev_file->f_mapping->host);
+ mutex_unlock(&disk->open_mutex);
+ }
+
+ fput(bdev_file);
+}
+EXPORT_SYMBOL(bdev_fput);
+
+/**
* lookup_bdev() - Look up a struct block_device by name.
* @pathname: Name of the block device in the filesystem.
* @dev: Pointer to the block device's dev_t, if found.
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index bdbb557feb5a0e..059467086b1312 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1409,6 +1409,12 @@ static int blkcg_css_online(struct cgroup_subsys_state *css)
return 0;
}
+void blkg_init_queue(struct request_queue *q)
+{
+ INIT_LIST_HEAD(&q->blkg_list);
+ mutex_init(&q->blkcg_mutex);
+}
+
int blkcg_init_disk(struct gendisk *disk)
{
struct request_queue *q = disk->queue;
@@ -1416,9 +1422,6 @@ int blkcg_init_disk(struct gendisk *disk)
bool preloaded;
int ret;
- INIT_LIST_HEAD(&q->blkg_list);
- mutex_init(&q->blkcg_mutex);
-
new_blkg = blkg_alloc(&blkcg_root, disk, GFP_KERNEL);
if (!new_blkg)
return -ENOMEM;
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 78b74106bf10c5..90b3959d88cfa4 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -189,6 +189,7 @@ struct blkcg_policy {
extern struct blkcg blkcg_root;
extern bool blkcg_debug_stats;
+void blkg_init_queue(struct request_queue *q);
int blkcg_init_disk(struct gendisk *disk);
void blkcg_exit_disk(struct gendisk *disk);
@@ -482,6 +483,7 @@ struct blkcg {
};
static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; }
+static inline void blkg_init_queue(struct request_queue *q) { }
static inline int blkcg_init_disk(struct gendisk *disk) { return 0; }
static inline void blkcg_exit_disk(struct gendisk *disk) { }
static inline int blkcg_policy_register(struct blkcg_policy *pol) { return 0; }
diff --git a/block/blk-core.c b/block/blk-core.c
index a16b5abdbbf56f..b795ac177281ad 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -442,6 +442,8 @@ struct request_queue *blk_alloc_queue(struct queue_limits *lim, int node_id)
init_waitqueue_head(&q->mq_freeze_wq);
mutex_init(&q->mq_freeze_lock);
+ blkg_init_queue(q);
+
/*
* Init percpu_ref in atomic mode so that it's faster to shutdown.
* See blk_register_queue() for details.
@@ -1195,6 +1197,7 @@ void __blk_flush_plug(struct blk_plug *plug, bool from_schedule)
if (unlikely(!rq_list_empty(plug->cached_rq)))
blk_mq_free_plug_rqs(plug);
+ plug->cur_ktime = 0;
current->flags &= ~PF_BLOCK_TS;
}
diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index 9a85bfbbc45a01..690ca99dfaca67 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -1347,7 +1347,7 @@ static bool iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now)
{
struct ioc *ioc = iocg->ioc;
struct blkcg_gq *blkg = iocg_to_blkg(iocg);
- u64 tdelta, delay, new_delay;
+ u64 tdelta, delay, new_delay, shift;
s64 vover, vover_pct;
u32 hwa;
@@ -1362,8 +1362,9 @@ static bool iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now)
/* calculate the current delay in effect - 1/2 every second */
tdelta = now->now - iocg->delay_at;
- if (iocg->delay)
- delay = iocg->delay >> div64_u64(tdelta, USEC_PER_SEC);
+ shift = div64_u64(tdelta, USEC_PER_SEC);
+ if (iocg->delay && shift < BITS_PER_LONG)
+ delay = iocg->delay >> shift;
else
delay = 0;
@@ -1438,8 +1439,11 @@ static void iocg_pay_debt(struct ioc_gq *iocg, u64 abs_vpay,
lockdep_assert_held(&iocg->ioc->lock);
lockdep_assert_held(&iocg->waitq.lock);
- /* make sure that nobody messed with @iocg */
- WARN_ON_ONCE(list_empty(&iocg->active_list));
+ /*
+ * make sure that nobody messed with @iocg. Check iocg->pd.online
+ * to avoid warn when removing blkcg or disk.
+ */
+ WARN_ON_ONCE(list_empty(&iocg->active_list) && iocg->pd.online);
WARN_ON_ONCE(iocg->inuse > 1);
iocg->abs_vdebt -= min(abs_vpay, iocg->abs_vdebt);
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 2a06fd33039da6..4e3483a16b7575 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -726,7 +726,7 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
* which can be mixed are set in each bio and mark @rq as mixed
* merged.
*/
-void blk_rq_set_mixed_merge(struct request *rq)
+static void blk_rq_set_mixed_merge(struct request *rq)
{
blk_opf_t ff = rq->cmd_flags & REQ_FAILFAST_MASK;
struct bio *bio;
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 555ada922cf060..32afb87efbd0ef 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -770,16 +770,11 @@ static void req_bio_endio(struct request *rq, struct bio *bio,
/*
* Partial zone append completions cannot be supported as the
* BIO fragments may end up not being written sequentially.
- * For such case, force the completed nbytes to be equal to
- * the BIO size so that bio_advance() sets the BIO remaining
- * size to 0 and we end up calling bio_endio() before returning.
*/
- if (bio->bi_iter.bi_size != nbytes) {
+ if (bio->bi_iter.bi_size != nbytes)
bio->bi_status = BLK_STS_IOERR;
- nbytes = bio->bi_iter.bi_size;
- } else {
+ else
bio->bi_iter.bi_sector = rq->__sector;
- }
}
bio_advance(bio, nbytes);
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 3c7d8d638ab59d..d2731843f2fccb 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -146,8 +146,7 @@ static int blk_validate_limits(struct queue_limits *lim)
max_hw_sectors = min_not_zero(lim->max_hw_sectors,
lim->max_dev_sectors);
if (lim->max_user_sectors) {
- if (lim->max_user_sectors > max_hw_sectors ||
- lim->max_user_sectors < PAGE_SIZE / SECTOR_SIZE)
+ if (lim->max_user_sectors < PAGE_SIZE / SECTOR_SIZE)
return -EINVAL;
lim->max_sectors = min(max_hw_sectors, lim->max_user_sectors);
} else {
@@ -183,17 +182,13 @@ static int blk_validate_limits(struct queue_limits *lim)
return -EINVAL;
/*
- * Devices that require a virtual boundary do not support scatter/gather
- * I/O natively, but instead require a descriptor list entry for each
- * page (which might not be identical to the Linux PAGE_SIZE). Because
- * of that they are not limited by our notion of "segment size".
+ * Stacking device may have both virtual boundary and max segment
+ * size limit, so allow this setting now, and long-term the two
+ * might need to move out of stacking limits since we have immutable
+ * bvec and lower layer bio splitting is supposed to handle the two
+ * correctly.
*/
- if (lim->virt_boundary_mask) {
- if (WARN_ON_ONCE(lim->max_segment_size &&
- lim->max_segment_size != UINT_MAX))
- return -EINVAL;
- lim->max_segment_size = UINT_MAX;
- } else {
+ if (!lim->virt_boundary_mask) {
/*
* The maximum segment size has an odd historic 64k default that
* drivers probably should override. Just like the I/O size we
diff --git a/block/blk.h b/block/blk.h
index 5cac4e29ae1744..d9f584984bc44b 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -339,7 +339,6 @@ int ll_back_merge_fn(struct request *req, struct bio *bio,
bool blk_attempt_req_merge(struct request_queue *q, struct request *rq,
struct request *next);
unsigned int blk_recalc_rq_segments(struct request *rq);
-void blk_rq_set_mixed_merge(struct request *rq);
bool blk_rq_merge_ok(struct request *rq, struct bio *bio);
enum elv_merge blk_try_merge(struct request *rq, struct bio *bio);
diff --git a/block/ioctl.c b/block/ioctl.c
index 0c76137adcaaa5..f505f9c341eb08 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -96,7 +96,7 @@ static int blk_ioctl_discard(struct block_device *bdev, blk_mode_t mode,
unsigned long arg)
{
uint64_t range[2];
- uint64_t start, len;
+ uint64_t start, len, end;
struct inode *inode = bdev->bd_inode;
int err;
@@ -117,7 +117,8 @@ static int blk_ioctl_discard(struct block_device *bdev, blk_mode_t mode,
if (len & 511)
return -EINVAL;
- if (start + len > bdev_nr_bytes(bdev))
+ if (check_add_overflow(start, len, &end) ||
+ end > bdev_nr_bytes(bdev))
return -EINVAL;
filemap_invalidate_lock(inode->i_mapping);
@@ -562,7 +563,8 @@ static int blkdev_common_ioctl(struct block_device *bdev, blk_mode_t mode,
return -EACCES;
if (bdev_is_partition(bdev))
return -EINVAL;
- return disk_scan_partitions(bdev->bd_disk, mode);
+ return disk_scan_partitions(bdev->bd_disk,
+ mode | BLK_OPEN_STRICT_SCAN);
case BLKTRACESTART:
case BLKTRACESTOP:
case BLKTRACETEARDOWN:
diff --git a/block/partitions/ldm.c b/block/partitions/ldm.c
index 38e58960ae036a..2bd42fedb907a9 100644
--- a/block/partitions/ldm.c
+++ b/block/partitions/ldm.c
@@ -131,8 +131,7 @@ static bool ldm_parse_tocblock (const u8 *data, struct tocblock *toc)
ldm_crit ("Cannot find TOCBLOCK, database may be corrupt.");
return false;
}
- strncpy (toc->bitmap1_name, data + 0x24, sizeof (toc->bitmap1_name));
- toc->bitmap1_name[sizeof (toc->bitmap1_name) - 1] = 0;
+ strscpy_pad(toc->bitmap1_name, data + 0x24, sizeof(toc->bitmap1_name));
toc->bitmap1_start = get_unaligned_be64(data + 0x2E);
toc->bitmap1_size = get_unaligned_be64(data + 0x36);
@@ -142,8 +141,7 @@ static bool ldm_parse_tocblock (const u8 *data, struct tocblock *toc)
TOC_BITMAP1, toc->bitmap1_name);
return false;
}
- strncpy (toc->bitmap2_name, data + 0x46, sizeof (toc->bitmap2_name));
- toc->bitmap2_name[sizeof (toc->bitmap2_name) - 1] = 0;
+ strscpy_pad(toc->bitmap2_name, data + 0x46, sizeof(toc->bitmap2_name));
toc->bitmap2_start = get_unaligned_be64(data + 0x50);
toc->bitmap2_size = get_unaligned_be64(data + 0x58);
if (strncmp (toc->bitmap2_name, TOC_BITMAP2,
diff --git a/crypto/asymmetric_keys/mscode_parser.c b/crypto/asymmetric_keys/mscode_parser.c
index 05402ef8964ed4..8aecbe4637f36e 100644
--- a/crypto/asymmetric_keys/mscode_parser.c
+++ b/crypto/asymmetric_keys/mscode_parser.c
@@ -75,6 +75,9 @@ int mscode_note_digest_algo(void *context, size_t hdrlen,
oid = look_up_OID(value, vlen);
switch (oid) {
+ case OID_sha1:
+ ctx->digest_algo = "sha1";
+ break;
case OID_sha256:
ctx->digest_algo = "sha256";
break;
diff --git a/crypto/asymmetric_keys/pkcs7_parser.c b/crypto/asymmetric_keys/pkcs7_parser.c
index 5b08c50722d0f5..231ad7b3789d5e 100644
--- a/crypto/asymmetric_keys/pkcs7_parser.c
+++ b/crypto/asymmetric_keys/pkcs7_parser.c
@@ -227,6 +227,9 @@ int pkcs7_sig_note_digest_algo(void *context, size_t hdrlen,
struct pkcs7_parse_context *ctx = context;
switch (ctx->last_oid) {
+ case OID_sha1:
+ ctx->sinfo->sig->hash_algo = "sha1";
+ break;
case OID_sha256:
ctx->sinfo->sig->hash_algo = "sha256";
break;
@@ -278,6 +281,7 @@ int pkcs7_sig_note_pkey_algo(void *context, size_t hdrlen,
ctx->sinfo->sig->pkey_algo = "rsa";
ctx->sinfo->sig->encoding = "pkcs1";
break;
+ case OID_id_ecdsa_with_sha1:
case OID_id_ecdsa_with_sha224:
case OID_id_ecdsa_with_sha256:
case OID_id_ecdsa_with_sha384:
diff --git a/crypto/asymmetric_keys/public_key.c b/crypto/asymmetric_keys/public_key.c
index e5f22691febd59..e314fd57e6f88a 100644
--- a/crypto/asymmetric_keys/public_key.c
+++ b/crypto/asymmetric_keys/public_key.c
@@ -115,7 +115,8 @@ software_key_determine_akcipher(const struct public_key *pkey,
*/
if (!hash_algo)
return -EINVAL;
- if (strcmp(hash_algo, "sha224") != 0 &&
+ if (strcmp(hash_algo, "sha1") != 0 &&
+ strcmp(hash_algo, "sha224") != 0 &&
strcmp(hash_algo, "sha256") != 0 &&
strcmp(hash_algo, "sha384") != 0 &&
strcmp(hash_algo, "sha512") != 0 &&
diff --git a/crypto/asymmetric_keys/signature.c b/crypto/asymmetric_keys/signature.c
index 398983be77e8bc..2deff81f8af50b 100644
--- a/crypto/asymmetric_keys/signature.c
+++ b/crypto/asymmetric_keys/signature.c
@@ -115,7 +115,7 @@ EXPORT_SYMBOL_GPL(decrypt_blob);
* Sign the specified data blob using the private key specified by params->key.
* The signature is wrapped in an encoding if params->encoding is specified
* (eg. "pkcs1"). If the encoding needs to know the digest type, this can be
- * passed through params->hash_algo (eg. "sha512").
+ * passed through params->hash_algo (eg. "sha1").
*
* Returns the length of the data placed in the signature buffer or an error.
*/
diff --git a/crypto/asymmetric_keys/x509_cert_parser.c b/crypto/asymmetric_keys/x509_cert_parser.c
index 487204d394266e..bb0bffa271b53c 100644
--- a/crypto/asymmetric_keys/x509_cert_parser.c
+++ b/crypto/asymmetric_keys/x509_cert_parser.c
@@ -198,6 +198,10 @@ int x509_note_sig_algo(void *context, size_t hdrlen, unsigned char tag,
default:
return -ENOPKG; /* Unsupported combination */
+ case OID_sha1WithRSAEncryption:
+ ctx->cert->sig->hash_algo = "sha1";
+ goto rsa_pkcs1;
+
case OID_sha256WithRSAEncryption:
ctx->cert->sig->hash_algo = "sha256";
goto rsa_pkcs1;
@@ -214,6 +218,10 @@ int x509_note_sig_algo(void *context, size_t hdrlen, unsigned char tag,
ctx->cert->sig->hash_algo = "sha224";
goto rsa_pkcs1;
+ case OID_id_ecdsa_with_sha1:
+ ctx->cert->sig->hash_algo = "sha1";
+ goto ecdsa;
+
case OID_id_rsassa_pkcs1_v1_5_with_sha3_256:
ctx->cert->sig->hash_algo = "sha3-256";
goto rsa_pkcs1;
diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index 986f331a5fc247..12e1c892f36661 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -653,6 +653,30 @@ static const struct akcipher_testvec rsa_tv_template[] = {
static const struct akcipher_testvec ecdsa_nist_p192_tv_template[] = {
{
.key =
+ "\x04\xf7\x46\xf8\x2f\x15\xf6\x22\x8e\xd7\x57\x4f\xcc\xe7\xbb\xc1"
+ "\xd4\x09\x73\xcf\xea\xd0\x15\x07\x3d\xa5\x8a\x8a\x95\x43\xe4\x68"
+ "\xea\xc6\x25\xc1\xc1\x01\x25\x4c\x7e\xc3\x3c\xa6\x04\x0a\xe7\x08"
+ "\x98",
+ .key_len = 49,
+ .params =
+ "\x30\x13\x06\x07\x2a\x86\x48\xce\x3d\x02\x01\x06\x08\x2a\x86\x48"
+ "\xce\x3d\x03\x01\x01",
+ .param_len = 21,
+ .m =
+ "\xcd\xb9\xd2\x1c\xb7\x6f\xcd\x44\xb3\xfd\x63\xea\xa3\x66\x7f\xae"
+ "\x63\x85\xe7\x82",
+ .m_size = 20,
+ .algo = OID_id_ecdsa_with_sha1,
+ .c =
+ "\x30\x35\x02\x19\x00\xba\xe5\x93\x83\x6e\xb6\x3b\x63\xa0\x27\x91"
+ "\xc6\xf6\x7f\xc3\x09\xad\x59\xad\x88\x27\xd6\x92\x6b\x02\x18\x10"
+ "\x68\x01\x9d\xba\xce\x83\x08\xef\x95\x52\x7b\xa0\x0f\xe4\x18\x86"
+ "\x80\x6f\xa5\x79\x77\xda\xd0",
+ .c_size = 55,
+ .public_key_vec = true,
+ .siggen_sigver_test = true,
+ }, {
+ .key =
"\x04\xb6\x4b\xb1\xd1\xac\xba\x24\x8f\x65\xb2\x60\x00\x90\xbf\xbd"
"\x78\x05\x73\xe9\x79\x1d\x6f\x7c\x0b\xd2\xc3\x93\xa7\x28\xe1\x75"
"\xf7\xd5\x95\x1d\x28\x10\xc0\x75\x50\x5c\x1a\x4f\x3f\x8f\xa5\xee"
@@ -756,6 +780,32 @@ static const struct akcipher_testvec ecdsa_nist_p192_tv_template[] = {
static const struct akcipher_testvec ecdsa_nist_p256_tv_template[] = {
{
.key =
+ "\x04\xb9\x7b\xbb\xd7\x17\x64\xd2\x7e\xfc\x81\x5d\x87\x06\x83\x41"
+ "\x22\xd6\x9a\xaa\x87\x17\xec\x4f\x63\x55\x2f\x94\xba\xdd\x83\xe9"
+ "\x34\x4b\xf3\xe9\x91\x13\x50\xb6\xcb\xca\x62\x08\xe7\x3b\x09\xdc"
+ "\xc3\x63\x4b\x2d\xb9\x73\x53\xe4\x45\xe6\x7c\xad\xe7\x6b\xb0\xe8"
+ "\xaf",
+ .key_len = 65,
+ .params =
+ "\x30\x13\x06\x07\x2a\x86\x48\xce\x3d\x02\x01\x06\x08\x2a\x86\x48"
+ "\xce\x3d\x03\x01\x07",
+ .param_len = 21,
+ .m =
+ "\xc2\x2b\x5f\x91\x78\x34\x26\x09\x42\x8d\x6f\x51\xb2\xc5\xaf\x4c"
+ "\x0b\xde\x6a\x42",
+ .m_size = 20,
+ .algo = OID_id_ecdsa_with_sha1,
+ .c =
+ "\x30\x46\x02\x21\x00\xf9\x25\xce\x9f\x3a\xa6\x35\x81\xcf\xd4\xe7"
+ "\xb7\xf0\x82\x56\x41\xf7\xd4\xad\x8d\x94\x5a\x69\x89\xee\xca\x6a"
+ "\x52\x0e\x48\x4d\xcc\x02\x21\x00\xd7\xe4\xef\x52\x66\xd3\x5b\x9d"
+ "\x8a\xfa\x54\x93\x29\xa7\x70\x86\xf1\x03\x03\xf3\x3b\xe2\x73\xf7"
+ "\xfb\x9d\x8b\xde\xd4\x8d\x6f\xad",
+ .c_size = 72,
+ .public_key_vec = true,
+ .siggen_sigver_test = true,
+ }, {
+ .key =
"\x04\x8b\x6d\xc0\x33\x8e\x2d\x8b\x67\xf5\xeb\xc4\x7f\xa0\xf5\xd9"
"\x7b\x03\xa5\x78\x9a\xb5\xea\x14\xe4\x23\xd0\xaf\xd7\x0e\x2e\xa0"
"\xc9\x8b\xdb\x95\xf8\xb3\xaf\xac\x00\x2c\x2c\x1f\x7a\xfd\x95\x88"
@@ -866,6 +916,36 @@ static const struct akcipher_testvec ecdsa_nist_p256_tv_template[] = {
static const struct akcipher_testvec ecdsa_nist_p384_tv_template[] = {
{
+ .key = /* secp384r1(sha1) */
+ "\x04\x89\x25\xf3\x97\x88\xcb\xb0\x78\xc5\x72\x9a\x14\x6e\x7a\xb1"
+ "\x5a\xa5\x24\xf1\x95\x06\x9e\x28\xfb\xc4\xb9\xbe\x5a\x0d\xd9\x9f"
+ "\xf3\xd1\x4d\x2d\x07\x99\xbd\xda\xa7\x66\xec\xbb\xea\xba\x79\x42"
+ "\xc9\x34\x89\x6a\xe7\x0b\xc3\xf2\xfe\x32\x30\xbe\xba\xf9\xdf\x7e"
+ "\x4b\x6a\x07\x8e\x26\x66\x3f\x1d\xec\xa2\x57\x91\x51\xdd\x17\x0e"
+ "\x0b\x25\xd6\x80\x5c\x3b\xe6\x1a\x98\x48\x91\x45\x7a\x73\xb0\xc3"
+ "\xf1",
+ .key_len = 97,
+ .params =
+ "\x30\x10\x06\x07\x2a\x86\x48\xce\x3d\x02\x01\x06\x05\x2b\x81\x04"
+ "\x00\x22",
+ .param_len = 18,
+ .m =
+ "\x12\x55\x28\xf0\x77\xd5\xb6\x21\x71\x32\x48\xcd\x28\xa8\x25\x22"
+ "\x3a\x69\xc1\x93",
+ .m_size = 20,
+ .algo = OID_id_ecdsa_with_sha1,
+ .c =
+ "\x30\x66\x02\x31\x00\xf5\x0f\x24\x4c\x07\x93\x6f\x21\x57\x55\x07"
+ "\x20\x43\x30\xde\xa0\x8d\x26\x8e\xae\x63\x3f\xbc\x20\x3a\xc6\xf1"
+ "\x32\x3c\xce\x70\x2b\x78\xf1\x4c\x26\xe6\x5b\x86\xcf\xec\x7c\x7e"
+ "\xd0\x87\xd7\xd7\x6e\x02\x31\x00\xcd\xbb\x7e\x81\x5d\x8f\x63\xc0"
+ "\x5f\x63\xb1\xbe\x5e\x4c\x0e\xa1\xdf\x28\x8c\x1b\xfa\xf9\x95\x88"
+ "\x74\xa0\x0f\xbf\xaf\xc3\x36\x76\x4a\xa1\x59\xf1\x1c\xa4\x58\x26"
+ "\x79\x12\x2a\xb7\xc5\x15\x92\xc5",
+ .c_size = 104,
+ .public_key_vec = true,
+ .siggen_sigver_test = true,
+ }, {
.key = /* secp384r1(sha224) */
"\x04\x69\x6c\xcf\x62\xee\xd0\x0d\xe5\xb5\x2f\x70\x54\xcf\x26\xa0"
"\xd9\x98\x8d\x92\x2a\xab\x9b\x11\xcb\x48\x18\xa1\xa9\x0d\xd5\x18"
diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c
index 39f6d1b98fd6a5..51d3f1a55d024c 100644
--- a/drivers/accel/ivpu/ivpu_drv.c
+++ b/drivers/accel/ivpu/ivpu_drv.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Copyright (C) 2020-2023 Intel Corporation
+ * Copyright (C) 2020-2024 Intel Corporation
*/
#include <linux/firmware.h>
@@ -131,22 +131,6 @@ static int ivpu_get_capabilities(struct ivpu_device *vdev, struct drm_ivpu_param
return 0;
}
-static int ivpu_get_core_clock_rate(struct ivpu_device *vdev, u64 *clk_rate)
-{
- int ret;
-
- ret = ivpu_rpm_get_if_active(vdev);
- if (ret < 0)
- return ret;
-
- *clk_rate = ret ? ivpu_hw_reg_pll_freq_get(vdev) : 0;
-
- if (ret)
- ivpu_rpm_put(vdev);
-
- return 0;
-}
-
static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
{
struct ivpu_file_priv *file_priv = file->driver_priv;
@@ -170,7 +154,7 @@ static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_f
args->value = vdev->platform;
break;
case DRM_IVPU_PARAM_CORE_CLOCK_RATE:
- ret = ivpu_get_core_clock_rate(vdev, &args->value);
+ args->value = ivpu_hw_ratio_to_freq(vdev, vdev->hw->pll.max_ratio);
break;
case DRM_IVPU_PARAM_NUM_CONTEXTS:
args->value = ivpu_get_context_count(vdev);
@@ -387,12 +371,15 @@ int ivpu_shutdown(struct ivpu_device *vdev)
{
int ret;
- ivpu_prepare_for_reset(vdev);
+ /* Save PCI state before powering down as it sometimes gets corrupted if NPU hangs */
+ pci_save_state(to_pci_dev(vdev->drm.dev));
ret = ivpu_hw_power_down(vdev);
if (ret)
ivpu_warn(vdev, "Failed to power down HW: %d\n", ret);
+ pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D3hot);
+
return ret;
}
@@ -530,7 +517,7 @@ static int ivpu_dev_init(struct ivpu_device *vdev)
vdev->context_xa_limit.min = IVPU_USER_CONTEXT_MIN_SSID;
vdev->context_xa_limit.max = IVPU_USER_CONTEXT_MAX_SSID;
atomic64_set(&vdev->unique_id_counter, 0);
- xa_init_flags(&vdev->context_xa, XA_FLAGS_ALLOC);
+ xa_init_flags(&vdev->context_xa, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ);
xa_init_flags(&vdev->submitted_jobs_xa, XA_FLAGS_ALLOC1);
xa_init_flags(&vdev->db_xa, XA_FLAGS_ALLOC1);
lockdep_set_class(&vdev->submitted_jobs_xa.xa_lock, &submitted_jobs_xa_lock_class_key);
@@ -560,11 +547,11 @@ static int ivpu_dev_init(struct ivpu_device *vdev)
/* Power up early so the rest of init code can access VPU registers */
ret = ivpu_hw_power_up(vdev);
if (ret)
- goto err_power_down;
+ goto err_shutdown;
ret = ivpu_mmu_global_context_init(vdev);
if (ret)
- goto err_power_down;
+ goto err_shutdown;
ret = ivpu_mmu_init(vdev);
if (ret)
@@ -601,10 +588,8 @@ err_mmu_rctx_fini:
ivpu_mmu_reserved_context_fini(vdev);
err_mmu_gctx_fini:
ivpu_mmu_global_context_fini(vdev);
-err_power_down:
- ivpu_hw_power_down(vdev);
- if (IVPU_WA(d3hot_after_power_off))
- pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D3hot);
+err_shutdown:
+ ivpu_shutdown(vdev);
err_xa_destroy:
xa_destroy(&vdev->db_xa);
xa_destroy(&vdev->submitted_jobs_xa);
@@ -628,9 +613,8 @@ static void ivpu_bo_unbind_all_user_contexts(struct ivpu_device *vdev)
static void ivpu_dev_fini(struct ivpu_device *vdev)
{
ivpu_pm_disable(vdev);
+ ivpu_prepare_for_reset(vdev);
ivpu_shutdown(vdev);
- if (IVPU_WA(d3hot_after_power_off))
- pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D3hot);
ivpu_jobs_abort_all(vdev);
ivpu_job_done_consumer_fini(vdev);
diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h
index 7be0500d9bb891..bb4374d0eaecc9 100644
--- a/drivers/accel/ivpu/ivpu_drv.h
+++ b/drivers/accel/ivpu/ivpu_drv.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * Copyright (C) 2020-2023 Intel Corporation
+ * Copyright (C) 2020-2024 Intel Corporation
*/
#ifndef __IVPU_DRV_H__
@@ -90,7 +90,6 @@
struct ivpu_wa_table {
bool punit_disabled;
bool clear_runtime_mem;
- bool d3hot_after_power_off;
bool interrupt_clear_with_0;
bool disable_clock_relinquish;
bool disable_d0i3_msg;
diff --git a/drivers/accel/ivpu/ivpu_hw.h b/drivers/accel/ivpu/ivpu_hw.h
index b2909168a0a690..094c659d2800b1 100644
--- a/drivers/accel/ivpu/ivpu_hw.h
+++ b/drivers/accel/ivpu/ivpu_hw.h
@@ -21,6 +21,7 @@ struct ivpu_hw_ops {
u32 (*profiling_freq_get)(struct ivpu_device *vdev);
void (*profiling_freq_drive)(struct ivpu_device *vdev, bool enable);
u32 (*reg_pll_freq_get)(struct ivpu_device *vdev);
+ u32 (*ratio_to_freq)(struct ivpu_device *vdev, u32 ratio);
u32 (*reg_telemetry_offset_get)(struct ivpu_device *vdev);
u32 (*reg_telemetry_size_get)(struct ivpu_device *vdev);
u32 (*reg_telemetry_enable_get)(struct ivpu_device *vdev);
@@ -130,6 +131,11 @@ static inline u32 ivpu_hw_reg_pll_freq_get(struct ivpu_device *vdev)
return vdev->hw->ops->reg_pll_freq_get(vdev);
};
+static inline u32 ivpu_hw_ratio_to_freq(struct ivpu_device *vdev, u32 ratio)
+{
+ return vdev->hw->ops->ratio_to_freq(vdev, ratio);
+}
+
static inline u32 ivpu_hw_reg_telemetry_offset_get(struct ivpu_device *vdev)
{
return vdev->hw->ops->reg_telemetry_offset_get(vdev);
diff --git a/drivers/accel/ivpu/ivpu_hw_37xx.c b/drivers/accel/ivpu/ivpu_hw_37xx.c
index 9a0c9498baba29..bd25e2d9fb0f45 100644
--- a/drivers/accel/ivpu/ivpu_hw_37xx.c
+++ b/drivers/accel/ivpu/ivpu_hw_37xx.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Copyright (C) 2020-2023 Intel Corporation
+ * Copyright (C) 2020-2024 Intel Corporation
*/
#include "ivpu_drv.h"
@@ -75,7 +75,6 @@ static void ivpu_hw_wa_init(struct ivpu_device *vdev)
{
vdev->wa.punit_disabled = false;
vdev->wa.clear_runtime_mem = false;
- vdev->wa.d3hot_after_power_off = true;
REGB_WR32(VPU_37XX_BUTTRESS_INTERRUPT_STAT, BUTTRESS_ALL_IRQ_MASK);
if (REGB_RD32(VPU_37XX_BUTTRESS_INTERRUPT_STAT) == BUTTRESS_ALL_IRQ_MASK) {
@@ -86,7 +85,6 @@ static void ivpu_hw_wa_init(struct ivpu_device *vdev)
IVPU_PRINT_WA(punit_disabled);
IVPU_PRINT_WA(clear_runtime_mem);
- IVPU_PRINT_WA(d3hot_after_power_off);
IVPU_PRINT_WA(interrupt_clear_with_0);
}
@@ -805,12 +803,12 @@ static void ivpu_hw_37xx_profiling_freq_drive(struct ivpu_device *vdev, bool ena
/* Profiling freq - is a debug feature. Unavailable on VPU 37XX. */
}
-static u32 ivpu_hw_37xx_pll_to_freq(u32 ratio, u32 config)
+static u32 ivpu_hw_37xx_ratio_to_freq(struct ivpu_device *vdev, u32 ratio)
{
u32 pll_clock = PLL_REF_CLK_FREQ * ratio;
u32 cpu_clock;
- if ((config & 0xff) == PLL_RATIO_4_3)
+ if ((vdev->hw->config & 0xff) == PLL_RATIO_4_3)
cpu_clock = pll_clock * 2 / 4;
else
cpu_clock = pll_clock * 2 / 5;
@@ -829,7 +827,7 @@ static u32 ivpu_hw_37xx_reg_pll_freq_get(struct ivpu_device *vdev)
if (!ivpu_is_silicon(vdev))
return PLL_SIMULATION_FREQ;
- return ivpu_hw_37xx_pll_to_freq(pll_curr_ratio, vdev->hw->config);
+ return ivpu_hw_37xx_ratio_to_freq(vdev, pll_curr_ratio);
}
static u32 ivpu_hw_37xx_reg_telemetry_offset_get(struct ivpu_device *vdev)
@@ -1052,6 +1050,7 @@ const struct ivpu_hw_ops ivpu_hw_37xx_ops = {
.profiling_freq_get = ivpu_hw_37xx_profiling_freq_get,
.profiling_freq_drive = ivpu_hw_37xx_profiling_freq_drive,
.reg_pll_freq_get = ivpu_hw_37xx_reg_pll_freq_get,
+ .ratio_to_freq = ivpu_hw_37xx_ratio_to_freq,
.reg_telemetry_offset_get = ivpu_hw_37xx_reg_telemetry_offset_get,
.reg_telemetry_size_get = ivpu_hw_37xx_reg_telemetry_size_get,
.reg_telemetry_enable_get = ivpu_hw_37xx_reg_telemetry_enable_get,
diff --git a/drivers/accel/ivpu/ivpu_hw_40xx.c b/drivers/accel/ivpu/ivpu_hw_40xx.c
index e4eddbf5d11c25..b0b88d4c89264a 100644
--- a/drivers/accel/ivpu/ivpu_hw_40xx.c
+++ b/drivers/accel/ivpu/ivpu_hw_40xx.c
@@ -980,6 +980,11 @@ static u32 ivpu_hw_40xx_reg_pll_freq_get(struct ivpu_device *vdev)
return PLL_RATIO_TO_FREQ(pll_curr_ratio);
}
+static u32 ivpu_hw_40xx_ratio_to_freq(struct ivpu_device *vdev, u32 ratio)
+{
+ return PLL_RATIO_TO_FREQ(ratio);
+}
+
static u32 ivpu_hw_40xx_reg_telemetry_offset_get(struct ivpu_device *vdev)
{
return REGB_RD32(VPU_40XX_BUTTRESS_VPU_TELEMETRY_OFFSET);
@@ -1230,6 +1235,7 @@ const struct ivpu_hw_ops ivpu_hw_40xx_ops = {
.profiling_freq_get = ivpu_hw_40xx_profiling_freq_get,
.profiling_freq_drive = ivpu_hw_40xx_profiling_freq_drive,
.reg_pll_freq_get = ivpu_hw_40xx_reg_pll_freq_get,
+ .ratio_to_freq = ivpu_hw_40xx_ratio_to_freq,
.reg_telemetry_offset_get = ivpu_hw_40xx_reg_telemetry_offset_get,
.reg_telemetry_size_get = ivpu_hw_40xx_reg_telemetry_size_get,
.reg_telemetry_enable_get = ivpu_hw_40xx_reg_telemetry_enable_get,
diff --git a/drivers/accel/ivpu/ivpu_ipc.c b/drivers/accel/ivpu/ivpu_ipc.c
index 04ac4b9840fbe5..56ff067f63e295 100644
--- a/drivers/accel/ivpu/ivpu_ipc.c
+++ b/drivers/accel/ivpu/ivpu_ipc.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Copyright (C) 2020-2023 Intel Corporation
+ * Copyright (C) 2020-2024 Intel Corporation
*/
#include <linux/genalloc.h>
@@ -501,7 +501,11 @@ int ivpu_ipc_init(struct ivpu_device *vdev)
spin_lock_init(&ipc->cons_lock);
INIT_LIST_HEAD(&ipc->cons_list);
INIT_LIST_HEAD(&ipc->cb_msg_list);
- drmm_mutex_init(&vdev->drm, &ipc->lock);
+ ret = drmm_mutex_init(&vdev->drm, &ipc->lock);
+ if (ret) {
+ ivpu_err(vdev, "Failed to initialize ipc->lock, ret %d\n", ret);
+ goto err_free_rx;
+ }
ivpu_ipc_reset(vdev);
return 0;
diff --git a/drivers/accel/ivpu/ivpu_mmu.c b/drivers/accel/ivpu/ivpu_mmu.c
index 91bd640655ab36..2e46b322c4505e 100644
--- a/drivers/accel/ivpu/ivpu_mmu.c
+++ b/drivers/accel/ivpu/ivpu_mmu.c
@@ -278,7 +278,7 @@ static const char *ivpu_mmu_event_to_str(u32 cmd)
case IVPU_MMU_EVT_F_VMS_FETCH:
return "Fetch of VMS caused external abort";
default:
- return "Unknown CMDQ command";
+ return "Unknown event";
}
}
@@ -286,15 +286,15 @@ static const char *ivpu_mmu_cmdq_err_to_str(u32 err)
{
switch (err) {
case IVPU_MMU_CERROR_NONE:
- return "No CMDQ Error";
+ return "No error";
case IVPU_MMU_CERROR_ILL:
return "Illegal command";
case IVPU_MMU_CERROR_ABT:
- return "External abort on CMDQ read";
+ return "External abort on command queue read";
case IVPU_MMU_CERROR_ATC_INV_SYNC:
return "Sync failed to complete ATS invalidation";
default:
- return "Unknown CMDQ Error";
+ return "Unknown error";
}
}
diff --git a/drivers/accel/ivpu/ivpu_mmu_context.c b/drivers/accel/ivpu/ivpu_mmu_context.c
index fe61612992364c..128aef8e5a1999 100644
--- a/drivers/accel/ivpu/ivpu_mmu_context.c
+++ b/drivers/accel/ivpu/ivpu_mmu_context.c
@@ -6,6 +6,7 @@
#include <linux/bitfield.h>
#include <linux/highmem.h>
#include <linux/set_memory.h>
+#include <linux/vmalloc.h>
#include <drm/drm_cache.h>
diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c
index 7cce1c928a7f4e..4f5ea466731ffe 100644
--- a/drivers/accel/ivpu/ivpu_pm.c
+++ b/drivers/accel/ivpu/ivpu_pm.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Copyright (C) 2020-2023 Intel Corporation
+ * Copyright (C) 2020-2024 Intel Corporation
*/
#include <linux/highmem.h>
@@ -58,14 +58,11 @@ static int ivpu_suspend(struct ivpu_device *vdev)
{
int ret;
- /* Save PCI state before powering down as it sometimes gets corrupted if NPU hangs */
- pci_save_state(to_pci_dev(vdev->drm.dev));
+ ivpu_prepare_for_reset(vdev);
ret = ivpu_shutdown(vdev);
if (ret)
- ivpu_err(vdev, "Failed to shutdown VPU: %d\n", ret);
-
- pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D3hot);
+ ivpu_err(vdev, "Failed to shutdown NPU: %d\n", ret);
return ret;
}
@@ -74,10 +71,10 @@ static int ivpu_resume(struct ivpu_device *vdev)
{
int ret;
- pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D0);
+retry:
pci_restore_state(to_pci_dev(vdev->drm.dev));
+ pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D0);
-retry:
ret = ivpu_hw_power_up(vdev);
if (ret) {
ivpu_err(vdev, "Failed to power up HW: %d\n", ret);
@@ -100,6 +97,7 @@ err_mmu_disable:
ivpu_mmu_disable(vdev);
err_power_down:
ivpu_hw_power_down(vdev);
+ pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D3hot);
if (!ivpu_fw_is_cold_boot(vdev)) {
ivpu_pm_prepare_cold_boot(vdev);
diff --git a/drivers/accessibility/speakup/main.c b/drivers/accessibility/speakup/main.c
index 1fbc9b921c4fcc..736c2eb8c0f37d 100644
--- a/drivers/accessibility/speakup/main.c
+++ b/drivers/accessibility/speakup/main.c
@@ -574,7 +574,7 @@ static u_long get_word(struct vc_data *vc)
}
attr_ch = get_char(vc, (u_short *)tmp_pos, &spk_attr);
buf[cnt++] = attr_ch;
- while (tmpx < vc->vc_cols - 1) {
+ while (tmpx < vc->vc_cols - 1 && cnt < sizeof(buf) - 1) {
tmp_pos += 2;
tmpx++;
ch = get_char(vc, (u_short *)tmp_pos, &temp);
diff --git a/drivers/acpi/acpica/dbnames.c b/drivers/acpi/acpica/dbnames.c
index b91155ea9c343c..c9131259f717b0 100644
--- a/drivers/acpi/acpica/dbnames.c
+++ b/drivers/acpi/acpica/dbnames.c
@@ -550,8 +550,12 @@ acpi_db_walk_for_fields(acpi_handle obj_handle,
ACPI_FREE(buffer.pointer);
buffer.length = ACPI_ALLOCATE_LOCAL_BUFFER;
- acpi_evaluate_object(obj_handle, NULL, NULL, &buffer);
-
+ status = acpi_evaluate_object(obj_handle, NULL, NULL, &buffer);
+ if (ACPI_FAILURE(status)) {
+ acpi_os_printf("Could Not evaluate object %p\n",
+ obj_handle);
+ return (AE_OK);
+ }
/*
* Since this is a field unit, surround the output in braces
*/
diff --git a/drivers/acpi/acpica/tbfadt.c b/drivers/acpi/acpica/tbfadt.c
index 44267a92bce5e2..3c126c6d306b06 100644
--- a/drivers/acpi/acpica/tbfadt.c
+++ b/drivers/acpi/acpica/tbfadt.c
@@ -315,23 +315,19 @@ void acpi_tb_parse_fadt(void)
ACPI_TABLE_ORIGIN_INTERNAL_PHYSICAL,
NULL, FALSE, TRUE, &acpi_gbl_dsdt_index);
- /* If Hardware Reduced flag is set, there is no FACS */
-
- if (!acpi_gbl_reduced_hardware) {
- if (acpi_gbl_FADT.facs) {
- acpi_tb_install_standard_table((acpi_physical_address)
- acpi_gbl_FADT.facs,
- ACPI_TABLE_ORIGIN_INTERNAL_PHYSICAL,
- NULL, FALSE, TRUE,
- &acpi_gbl_facs_index);
- }
- if (acpi_gbl_FADT.Xfacs) {
- acpi_tb_install_standard_table((acpi_physical_address)
- acpi_gbl_FADT.Xfacs,
- ACPI_TABLE_ORIGIN_INTERNAL_PHYSICAL,
- NULL, FALSE, TRUE,
- &acpi_gbl_xfacs_index);
- }
+ if (acpi_gbl_FADT.facs) {
+ acpi_tb_install_standard_table((acpi_physical_address)
+ acpi_gbl_FADT.facs,
+ ACPI_TABLE_ORIGIN_INTERNAL_PHYSICAL,
+ NULL, FALSE, TRUE,
+ &acpi_gbl_facs_index);
+ }
+ if (acpi_gbl_FADT.Xfacs) {
+ acpi_tb_install_standard_table((acpi_physical_address)
+ acpi_gbl_FADT.Xfacs,
+ ACPI_TABLE_ORIGIN_INTERNAL_PHYSICAL,
+ NULL, FALSE, TRUE,
+ &acpi_gbl_xfacs_index);
}
}
diff --git a/drivers/acpi/acpica/tbutils.c b/drivers/acpi/acpica/tbutils.c
index bb4a56e5673a5f..15fa68a5ea6e25 100644
--- a/drivers/acpi/acpica/tbutils.c
+++ b/drivers/acpi/acpica/tbutils.c
@@ -36,12 +36,7 @@ acpi_status acpi_tb_initialize_facs(void)
{
struct acpi_table_facs *facs;
- /* If Hardware Reduced flag is set, there is no FACS */
-
- if (acpi_gbl_reduced_hardware) {
- acpi_gbl_FACS = NULL;
- return (AE_OK);
- } else if (acpi_gbl_FADT.Xfacs &&
+ if (acpi_gbl_FADT.Xfacs &&
(!acpi_gbl_FADT.facs
|| !acpi_gbl_use32_bit_facs_addresses)) {
(void)acpi_get_table_by_index(acpi_gbl_xfacs_index,
diff --git a/drivers/acpi/apei/einj-core.c b/drivers/acpi/apei/einj-core.c
index 66e7f529e92fc2..01faca3a238a3a 100644
--- a/drivers/acpi/apei/einj-core.c
+++ b/drivers/acpi/apei/einj-core.c
@@ -851,7 +851,7 @@ err_put_table:
return rc;
}
-static void __exit einj_remove(struct platform_device *pdev)
+static void einj_remove(struct platform_device *pdev)
{
struct apei_exec_context ctx;
diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c
index 4bfbe55553f410..a40b6f3946efeb 100644
--- a/drivers/acpi/cppc_acpi.c
+++ b/drivers/acpi/cppc_acpi.c
@@ -170,8 +170,8 @@ show_cppc_data(cppc_get_perf_ctrs, cppc_perf_fb_ctrs, wraparound_time);
#define GET_BIT_WIDTH(reg) ((reg)->access_width ? (8 << ((reg)->access_width - 1)) : (reg)->bit_width)
/* Shift and apply the mask for CPC reads/writes */
-#define MASK_VAL(reg, val) ((val) >> ((reg)->bit_offset & \
- GENMASK(((reg)->bit_width), 0)))
+#define MASK_VAL(reg, val) (((val) >> (reg)->bit_offset) & \
+ GENMASK(((reg)->bit_width) - 1, 0))
static ssize_t show_feedback_ctrs(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
@@ -1002,14 +1002,14 @@ static int cpc_read(int cpu, struct cpc_register_resource *reg_res, u64 *val)
}
*val = 0;
+ size = GET_BIT_WIDTH(reg);
if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_IO) {
- u32 width = GET_BIT_WIDTH(reg);
u32 val_u32;
acpi_status status;
status = acpi_os_read_port((acpi_io_address)reg->address,
- &val_u32, width);
+ &val_u32, size);
if (ACPI_FAILURE(status)) {
pr_debug("Error: Failed to read SystemIO port %llx\n",
reg->address);
@@ -1018,17 +1018,22 @@ static int cpc_read(int cpu, struct cpc_register_resource *reg_res, u64 *val)
*val = val_u32;
return 0;
- } else if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM && pcc_ss_id >= 0)
+ } else if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM && pcc_ss_id >= 0) {
+ /*
+ * For registers in PCC space, the register size is determined
+ * by the bit width field; the access size is used to indicate
+ * the PCC subspace id.
+ */
+ size = reg->bit_width;
vaddr = GET_PCC_VADDR(reg->address, pcc_ss_id);
+ }
else if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY)
vaddr = reg_res->sys_mem_vaddr;
else if (reg->space_id == ACPI_ADR_SPACE_FIXED_HARDWARE)
return cpc_read_ffh(cpu, reg, val);
else
return acpi_os_read_memory((acpi_physical_address)reg->address,
- val, reg->bit_width);
-
- size = GET_BIT_WIDTH(reg);
+ val, size);
switch (size) {
case 8:
@@ -1044,8 +1049,13 @@ static int cpc_read(int cpu, struct cpc_register_resource *reg_res, u64 *val)
*val = readq_relaxed(vaddr);
break;
default:
- pr_debug("Error: Cannot read %u bit width from PCC for ss: %d\n",
- reg->bit_width, pcc_ss_id);
+ if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) {
+ pr_debug("Error: Cannot read %u bit width from system memory: 0x%llx\n",
+ size, reg->address);
+ } else if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM) {
+ pr_debug("Error: Cannot read %u bit width from PCC for ss: %d\n",
+ size, pcc_ss_id);
+ }
return -EFAULT;
}
@@ -1063,12 +1073,13 @@ static int cpc_write(int cpu, struct cpc_register_resource *reg_res, u64 val)
int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpu);
struct cpc_reg *reg = &reg_res->cpc_entry.reg;
+ size = GET_BIT_WIDTH(reg);
+
if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_IO) {
- u32 width = GET_BIT_WIDTH(reg);
acpi_status status;
status = acpi_os_write_port((acpi_io_address)reg->address,
- (u32)val, width);
+ (u32)val, size);
if (ACPI_FAILURE(status)) {
pr_debug("Error: Failed to write SystemIO port %llx\n",
reg->address);
@@ -1076,17 +1087,22 @@ static int cpc_write(int cpu, struct cpc_register_resource *reg_res, u64 val)
}
return 0;
- } else if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM && pcc_ss_id >= 0)
+ } else if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM && pcc_ss_id >= 0) {
+ /*
+ * For registers in PCC space, the register size is determined
+ * by the bit width field; the access size is used to indicate
+ * the PCC subspace id.
+ */
+ size = reg->bit_width;
vaddr = GET_PCC_VADDR(reg->address, pcc_ss_id);
+ }
else if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY)
vaddr = reg_res->sys_mem_vaddr;
else if (reg->space_id == ACPI_ADR_SPACE_FIXED_HARDWARE)
return cpc_write_ffh(cpu, reg, val);
else
return acpi_os_write_memory((acpi_physical_address)reg->address,
- val, reg->bit_width);
-
- size = GET_BIT_WIDTH(reg);
+ val, size);
if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY)
val = MASK_VAL(reg, val);
@@ -1105,8 +1121,13 @@ static int cpc_write(int cpu, struct cpc_register_resource *reg_res, u64 val)
writeq_relaxed(val, vaddr);
break;
default:
- pr_debug("Error: Cannot write %u bit width to PCC for ss: %d\n",
- reg->bit_width, pcc_ss_id);
+ if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) {
+ pr_debug("Error: Cannot write %u bit width to system memory: 0x%llx\n",
+ size, reg->address);
+ } else if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM) {
+ pr_debug("Error: Cannot write %u bit width to PCC for ss: %d\n",
+ size, pcc_ss_id);
+ }
ret_val = -EFAULT;
break;
}
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 7c157bf926956b..d1464324de9519 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -1843,7 +1843,8 @@ static void acpi_scan_dep_init(struct acpi_device *adev)
if (dep->honor_dep)
adev->flags.honor_deps = 1;
- adev->dep_unmet++;
+ if (!dep->met)
+ adev->dep_unmet++;
}
}
}
diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c
index 302dce0b2b5044..d67881b50bca28 100644
--- a/drivers/acpi/thermal.c
+++ b/drivers/acpi/thermal.c
@@ -662,14 +662,15 @@ static int acpi_thermal_register_thermal_zone(struct acpi_thermal *tz,
{
int result;
- tz->thermal_zone = thermal_zone_device_register_with_trips("acpitz",
- trip_table,
- trip_count,
- tz,
- &acpi_thermal_zone_ops,
- NULL,
- passive_delay,
- tz->polling_frequency * 100);
+ if (trip_count)
+ tz->thermal_zone = thermal_zone_device_register_with_trips(
+ "acpitz", trip_table, trip_count, tz,
+ &acpi_thermal_zone_ops, NULL, passive_delay,
+ tz->polling_frequency * 100);
+ else
+ tz->thermal_zone = thermal_tripless_zone_device_register(
+ "acpitz", tz, &acpi_thermal_zone_ops, NULL);
+
if (IS_ERR(tz->thermal_zone))
return PTR_ERR(tz->thermal_zone);
@@ -901,11 +902,8 @@ static int acpi_thermal_add(struct acpi_device *device)
trip++;
}
- if (trip == trip_table) {
+ if (trip == trip_table)
pr_warn(FW_BUG "No valid trip points!\n");
- result = -ENODEV;
- goto free_memory;
- }
result = acpi_thermal_register_thermal_zone(tz, trip_table,
trip - trip_table,
diff --git a/drivers/acpi/x86/s2idle.c b/drivers/acpi/x86/s2idle.c
index cd84af23f7eac8..dd0b40b9bbe8be 100644
--- a/drivers/acpi/x86/s2idle.c
+++ b/drivers/acpi/x86/s2idle.c
@@ -492,16 +492,14 @@ static int lps0_device_attach(struct acpi_device *adev,
unsigned int func_mask;
/*
- * Avoid evaluating the same _DSM function for two
- * different UUIDs and prioritize the MSFT one.
+ * Log a message if the _DSM function sets for two
+ * different UUIDs overlap.
*/
func_mask = lps0_dsm_func_mask & lps0_dsm_func_mask_microsoft;
- if (func_mask) {
+ if (func_mask)
acpi_handle_info(adev->handle,
"Duplicate LPS0 _DSM functions (mask: 0x%x)\n",
func_mask);
- lps0_dsm_func_mask &= ~func_mask;
- }
}
}
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index bad28cf4201041..dd6923d37931f9 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -1708,8 +1708,10 @@ static size_t binder_get_object(struct binder_proc *proc,
size_t object_size = 0;
read_size = min_t(size_t, sizeof(*object), buffer->data_size - offset);
- if (offset > buffer->data_size || read_size < sizeof(*hdr))
+ if (offset > buffer->data_size || read_size < sizeof(*hdr) ||
+ !IS_ALIGNED(offset, sizeof(u32)))
return 0;
+
if (u) {
if (copy_from_user(object, u + offset, read_size))
return 0;
diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 562302e2e57ce5..6548f10e61d9c7 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -666,6 +666,87 @@ static int mobile_lpm_policy = -1;
module_param(mobile_lpm_policy, int, 0644);
MODULE_PARM_DESC(mobile_lpm_policy, "Default LPM policy for mobile chipsets");
+static char *ahci_mask_port_map;
+module_param_named(mask_port_map, ahci_mask_port_map, charp, 0444);
+MODULE_PARM_DESC(mask_port_map,
+ "32-bits port map masks to ignore controllers ports. "
+ "Valid values are: "
+ "\"<mask>\" to apply the same mask to all AHCI controller "
+ "devices, and \"<pci_dev>=<mask>,<pci_dev>=<mask>,...\" to "
+ "specify different masks for the controllers specified, "
+ "where <pci_dev> is the PCI ID of an AHCI controller in the "
+ "form \"domain:bus:dev.func\"");
+
+static void ahci_apply_port_map_mask(struct device *dev,
+ struct ahci_host_priv *hpriv, char *mask_s)
+{
+ unsigned int mask;
+
+ if (kstrtouint(mask_s, 0, &mask)) {
+ dev_err(dev, "Invalid port map mask\n");
+ return;
+ }
+
+ hpriv->mask_port_map = mask;
+}
+
+static void ahci_get_port_map_mask(struct device *dev,
+ struct ahci_host_priv *hpriv)
+{
+ char *param, *end, *str, *mask_s;
+ char *name;
+
+ if (!strlen(ahci_mask_port_map))
+ return;
+
+ str = kstrdup(ahci_mask_port_map, GFP_KERNEL);
+ if (!str)
+ return;
+
+ /* Handle single mask case */
+ if (!strchr(str, '=')) {
+ ahci_apply_port_map_mask(dev, hpriv, str);
+ goto free;
+ }
+
+ /*
+ * Mask list case: parse the parameter to apply the mask only if
+ * the device name matches.
+ */
+ param = str;
+ end = param + strlen(param);
+ while (param && param < end && *param) {
+ name = param;
+ param = strchr(name, '=');
+ if (!param)
+ break;
+
+ *param = '\0';
+ param++;
+ if (param >= end)
+ break;
+
+ if (strcmp(dev_name(dev), name) != 0) {
+ param = strchr(param, ',');
+ if (param)
+ param++;
+ continue;
+ }
+
+ mask_s = param;
+ param = strchr(mask_s, ',');
+ if (param) {
+ *param = '\0';
+ param++;
+ }
+
+ ahci_apply_port_map_mask(dev, hpriv, mask_s);
+ }
+
+free:
+ kfree(str);
+}
+
static void ahci_pci_save_initial_config(struct pci_dev *pdev,
struct ahci_host_priv *hpriv)
{
@@ -688,6 +769,10 @@ static void ahci_pci_save_initial_config(struct pci_dev *pdev,
"Disabling your PATA port. Use the boot option 'ahci.marvell_enable=0' to avoid this.\n");
}
+ /* Handle port map masks passed as module parameter. */
+ if (ahci_mask_port_map)
+ ahci_get_port_map_mask(&pdev->dev, hpriv);
+
ahci_save_initial_config(&pdev->dev, hpriv);
}
diff --git a/drivers/ata/ahci_st.c b/drivers/ata/ahci_st.c
index d4a626f87963ba..79a8b0aa37bf37 100644
--- a/drivers/ata/ahci_st.c
+++ b/drivers/ata/ahci_st.c
@@ -30,7 +30,6 @@
#define ST_AHCI_OOBR_CIMAX_SHIFT 0
struct st_ahci_drv_data {
- struct platform_device *ahci;
struct reset_control *pwr;
struct reset_control *sw_rst;
struct reset_control *pwr_rst;
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index be3412cdb22e78..c449d60d9bb962 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -2539,7 +2539,7 @@ static void ata_dev_config_cdl(struct ata_device *dev)
bool cdl_enabled;
u64 val;
- if (ata_id_major_version(dev->id) < 12)
+ if (ata_id_major_version(dev->id) < 11)
goto not_supported;
if (!ata_log_supported(dev, ATA_LOG_IDENTIFY_DEVICE) ||
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index b0d6e69c4a5b2e..214b935c2ced79 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -712,8 +712,10 @@ void ata_scsi_port_error_handler(struct Scsi_Host *host, struct ata_port *ap)
ehc->saved_ncq_enabled |= 1 << devno;
/* If we are resuming, wake up the device */
- if (ap->pflags & ATA_PFLAG_RESUMING)
+ if (ap->pflags & ATA_PFLAG_RESUMING) {
+ dev->flags |= ATA_DFLAG_RESUMING;
ehc->i.dev_action[devno] |= ATA_EH_SET_ACTIVE;
+ }
}
}
@@ -3169,6 +3171,7 @@ static int ata_eh_revalidate_and_attach(struct ata_link *link,
return 0;
err:
+ dev->flags &= ~ATA_DFLAG_RESUMING;
*r_failed_dev = dev;
return rc;
}
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 0a0f483124c3a5..e954976891a9f5 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -4730,6 +4730,7 @@ void ata_scsi_dev_rescan(struct work_struct *work)
struct ata_link *link;
struct ata_device *dev;
unsigned long flags;
+ bool do_resume;
int ret = 0;
mutex_lock(&ap->scsi_scan_mutex);
@@ -4744,25 +4745,34 @@ void ata_scsi_dev_rescan(struct work_struct *work)
* bail out.
*/
if (ap->pflags & ATA_PFLAG_SUSPENDED)
- goto unlock;
+ goto unlock_ap;
if (!sdev)
continue;
if (scsi_device_get(sdev))
continue;
+ do_resume = dev->flags & ATA_DFLAG_RESUMING;
+
spin_unlock_irqrestore(ap->lock, flags);
+ if (do_resume) {
+ ret = scsi_resume_device(sdev);
+ if (ret == -EWOULDBLOCK)
+ goto unlock_scan;
+ dev->flags &= ~ATA_DFLAG_RESUMING;
+ }
ret = scsi_rescan_device(sdev);
scsi_device_put(sdev);
spin_lock_irqsave(ap->lock, flags);
if (ret)
- goto unlock;
+ goto unlock_ap;
}
}
-unlock:
+unlock_ap:
spin_unlock_irqrestore(ap->lock, flags);
+unlock_scan:
mutex_unlock(&ap->scsi_scan_mutex);
/* Reschedule with a delay if scsi_rescan_device() returned an error */
diff --git a/drivers/ata/pata_macio.c b/drivers/ata/pata_macio.c
index 4ac854f6b05777..88b2e9817f49df 100644
--- a/drivers/ata/pata_macio.c
+++ b/drivers/ata/pata_macio.c
@@ -1371,9 +1371,6 @@ static struct pci_driver pata_macio_pci_driver = {
.suspend = pata_macio_pci_suspend,
.resume = pata_macio_pci_resume,
#endif
- .driver = {
- .owner = THIS_MODULE,
- },
};
MODULE_DEVICE_TABLE(pci, pata_macio_pci_match);
diff --git a/drivers/ata/sata_gemini.c b/drivers/ata/sata_gemini.c
index 400b22ee99c33a..4c270999ba3ccd 100644
--- a/drivers/ata/sata_gemini.c
+++ b/drivers/ata/sata_gemini.c
@@ -200,7 +200,10 @@ int gemini_sata_start_bridge(struct sata_gemini *sg, unsigned int bridge)
pclk = sg->sata0_pclk;
else
pclk = sg->sata1_pclk;
- clk_enable(pclk);
+ ret = clk_enable(pclk);
+ if (ret)
+ return ret;
+
msleep(10);
/* Do not keep clocking a bridge that is not online */
diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c
index e82786c63fbd73..9bec0aee92e04c 100644
--- a/drivers/ata/sata_mv.c
+++ b/drivers/ata/sata_mv.c
@@ -787,37 +787,6 @@ static const struct ata_port_info mv_port_info[] = {
},
};
-static const struct pci_device_id mv_pci_tbl[] = {
- { PCI_VDEVICE(MARVELL, 0x5040), chip_504x },
- { PCI_VDEVICE(MARVELL, 0x5041), chip_504x },
- { PCI_VDEVICE(MARVELL, 0x5080), chip_5080 },
- { PCI_VDEVICE(MARVELL, 0x5081), chip_508x },
- /* RocketRAID 1720/174x have different identifiers */
- { PCI_VDEVICE(TTI, 0x1720), chip_6042 },
- { PCI_VDEVICE(TTI, 0x1740), chip_6042 },
- { PCI_VDEVICE(TTI, 0x1742), chip_6042 },
-
- { PCI_VDEVICE(MARVELL, 0x6040), chip_604x },
- { PCI_VDEVICE(MARVELL, 0x6041), chip_604x },
- { PCI_VDEVICE(MARVELL, 0x6042), chip_6042 },
- { PCI_VDEVICE(MARVELL, 0x6080), chip_608x },
- { PCI_VDEVICE(MARVELL, 0x6081), chip_608x },
-
- { PCI_VDEVICE(ADAPTEC2, 0x0241), chip_604x },
-
- /* Adaptec 1430SA */
- { PCI_VDEVICE(ADAPTEC2, 0x0243), chip_7042 },
-
- /* Marvell 7042 support */
- { PCI_VDEVICE(MARVELL, 0x7042), chip_7042 },
-
- /* Highpoint RocketRAID PCIe series */
- { PCI_VDEVICE(TTI, 0x2300), chip_7042 },
- { PCI_VDEVICE(TTI, 0x2310), chip_7042 },
-
- { } /* terminate list */
-};
-
static const struct mv_hw_ops mv5xxx_ops = {
.phy_errata = mv5_phy_errata,
.enable_leds = mv5_enable_leds,
@@ -4303,6 +4272,36 @@ static int mv_pci_init_one(struct pci_dev *pdev,
static int mv_pci_device_resume(struct pci_dev *pdev);
#endif
+static const struct pci_device_id mv_pci_tbl[] = {
+ { PCI_VDEVICE(MARVELL, 0x5040), chip_504x },
+ { PCI_VDEVICE(MARVELL, 0x5041), chip_504x },
+ { PCI_VDEVICE(MARVELL, 0x5080), chip_5080 },
+ { PCI_VDEVICE(MARVELL, 0x5081), chip_508x },
+ /* RocketRAID 1720/174x have different identifiers */
+ { PCI_VDEVICE(TTI, 0x1720), chip_6042 },
+ { PCI_VDEVICE(TTI, 0x1740), chip_6042 },
+ { PCI_VDEVICE(TTI, 0x1742), chip_6042 },
+
+ { PCI_VDEVICE(MARVELL, 0x6040), chip_604x },
+ { PCI_VDEVICE(MARVELL, 0x6041), chip_604x },
+ { PCI_VDEVICE(MARVELL, 0x6042), chip_6042 },
+ { PCI_VDEVICE(MARVELL, 0x6080), chip_608x },
+ { PCI_VDEVICE(MARVELL, 0x6081), chip_608x },
+
+ { PCI_VDEVICE(ADAPTEC2, 0x0241), chip_604x },
+
+ /* Adaptec 1430SA */
+ { PCI_VDEVICE(ADAPTEC2, 0x0243), chip_7042 },
+
+ /* Marvell 7042 support */
+ { PCI_VDEVICE(MARVELL, 0x7042), chip_7042 },
+
+ /* Highpoint RocketRAID PCIe series */
+ { PCI_VDEVICE(TTI, 0x2300), chip_7042 },
+ { PCI_VDEVICE(TTI, 0x2310), chip_7042 },
+
+ { } /* terminate list */
+};
static struct pci_driver mv_pci_driver = {
.name = DRV_NAME,
@@ -4315,6 +4314,7 @@ static struct pci_driver mv_pci_driver = {
#endif
};
+MODULE_DEVICE_TABLE(pci, mv_pci_tbl);
/**
* mv_print_info - Dump key info to kernel log for perusal.
@@ -4487,7 +4487,6 @@ static void __exit mv_exit(void)
MODULE_AUTHOR("Brett Russ");
MODULE_DESCRIPTION("SCSI low-level driver for Marvell SATA controllers");
MODULE_LICENSE("GPL v2");
-MODULE_DEVICE_TABLE(pci, mv_pci_tbl);
MODULE_VERSION(DRV_VERSION);
MODULE_ALIAS("platform:" DRV_NAME);
diff --git a/drivers/ata/sata_sx4.c b/drivers/ata/sata_sx4.c
index b51d7a9d0d90ce..a482741eb181ff 100644
--- a/drivers/ata/sata_sx4.c
+++ b/drivers/ata/sata_sx4.c
@@ -957,8 +957,7 @@ static void pdc20621_get_from_dimm(struct ata_host *host, void *psource,
offset -= (idx * window_size);
idx++;
- dist = ((long) (window_size - (offset + size))) >= 0 ? size :
- (long) (window_size - offset);
+ dist = min(size, window_size - offset);
memcpy_fromio(psource, dimm_mmio + offset / 4, dist);
psource += dist;
@@ -1005,8 +1004,7 @@ static void pdc20621_put_to_dimm(struct ata_host *host, void *psource,
readl(mmio + PDC_DIMM_WINDOW_CTLR);
offset -= (idx * window_size);
idx++;
- dist = ((long)(s32)(window_size - (offset + size))) >= 0 ? size :
- (long) (window_size - offset);
+ dist = min(size, window_size - offset);
memcpy_toio(dimm_mmio + offset / 4, psource, dist);
writel(0x01, mmio + PDC_GENERAL_CTLR);
readl(mmio + PDC_GENERAL_CTLR);
diff --git a/drivers/base/core.c b/drivers/base/core.c
index b93f3c5716aeea..5f4e03336e68ef 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -44,6 +44,7 @@ static bool fw_devlink_is_permissive(void);
static void __fw_devlink_link_to_consumers(struct device *dev);
static bool fw_devlink_drv_reg_done;
static bool fw_devlink_best_effort;
+static struct workqueue_struct *device_link_wq;
/**
* __fwnode_link_add - Create a link between two fwnode_handles.
@@ -533,12 +534,26 @@ static void devlink_dev_release(struct device *dev)
/*
* It may take a while to complete this work because of the SRCU
* synchronization in device_link_release_fn() and if the consumer or
- * supplier devices get deleted when it runs, so put it into the "long"
- * workqueue.
+ * supplier devices get deleted when it runs, so put it into the
+ * dedicated workqueue.
*/
- queue_work(system_long_wq, &link->rm_work);
+ queue_work(device_link_wq, &link->rm_work);
}
+/**
+ * device_link_wait_removal - Wait for ongoing devlink removal jobs to terminate
+ */
+void device_link_wait_removal(void)
+{
+ /*
+ * devlink removal jobs are queued in the dedicated work queue.
+ * To be sure that all removal jobs are terminated, ensure that any
+ * scheduled work has run to completion.
+ */
+ flush_workqueue(device_link_wq);
+}
+EXPORT_SYMBOL_GPL(device_link_wait_removal);
+
static struct class devlink_class = {
.name = "devlink",
.dev_groups = devlink_groups,
@@ -4164,9 +4179,14 @@ int __init devices_init(void)
sysfs_dev_char_kobj = kobject_create_and_add("char", dev_kobj);
if (!sysfs_dev_char_kobj)
goto char_kobj_err;
+ device_link_wq = alloc_workqueue("device_link_wq", 0, 0);
+ if (!device_link_wq)
+ goto wq_err;
return 0;
+ wq_err:
+ kobject_put(sysfs_dev_char_kobj);
char_kobj_err:
kobject_put(sysfs_dev_block_kobj);
block_kobj_err:
diff --git a/drivers/base/regmap/regcache-maple.c b/drivers/base/regmap/regcache-maple.c
index 41edd6a430eb45..55999a50ccc0b8 100644
--- a/drivers/base/regmap/regcache-maple.c
+++ b/drivers/base/regmap/regcache-maple.c
@@ -112,7 +112,7 @@ static int regcache_maple_drop(struct regmap *map, unsigned int min,
unsigned long *entry, *lower, *upper;
unsigned long lower_index, lower_last;
unsigned long upper_index, upper_last;
- int ret;
+ int ret = 0;
lower = NULL;
upper = NULL;
@@ -145,7 +145,7 @@ static int regcache_maple_drop(struct regmap *map, unsigned int min,
upper_index = max + 1;
upper_last = mas.last;
- upper = kmemdup(&entry[max + 1],
+ upper = kmemdup(&entry[max - mas.index + 1],
((mas.last - max) *
sizeof(unsigned long)),
map->alloc_flags);
@@ -244,7 +244,7 @@ static int regcache_maple_sync(struct regmap *map, unsigned int min,
unsigned long lmin = min;
unsigned long lmax = max;
unsigned int r, v, sync_start;
- int ret;
+ int ret = 0;
bool sync_needed = false;
map->cache_bypass = true;
diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index 5cb425f6f02d4b..0a34dd3c4f38d2 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -2839,6 +2839,43 @@ int regmap_read(struct regmap *map, unsigned int reg, unsigned int *val)
EXPORT_SYMBOL_GPL(regmap_read);
/**
+ * regmap_read_bypassed() - Read a value from a single register direct
+ * from the device, bypassing the cache
+ *
+ * @map: Register map to read from
+ * @reg: Register to be read from
+ * @val: Pointer to store read value
+ *
+ * A value of zero will be returned on success, a negative errno will
+ * be returned in error cases.
+ */
+int regmap_read_bypassed(struct regmap *map, unsigned int reg, unsigned int *val)
+{
+ int ret;
+ bool bypass, cache_only;
+
+ if (!IS_ALIGNED(reg, map->reg_stride))
+ return -EINVAL;
+
+ map->lock(map->lock_arg);
+
+ bypass = map->cache_bypass;
+ cache_only = map->cache_only;
+ map->cache_bypass = true;
+ map->cache_only = false;
+
+ ret = _regmap_read(map, reg, val);
+
+ map->cache_bypass = bypass;
+ map->cache_only = cache_only;
+
+ map->unlock(map->lock_arg);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(regmap_read_bypassed);
+
+/**
* regmap_raw_read() - Read raw data from the device
*
* @map: Register map to read from
diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c
index 71c39bcd872c7e..ed33cf7192d216 100644
--- a/drivers/block/null_blk/main.c
+++ b/drivers/block/null_blk/main.c
@@ -1965,10 +1965,10 @@ static int null_add_dev(struct nullb_device *dev)
out_ida_free:
ida_free(&nullb_indexes, nullb->index);
-out_cleanup_zone:
- null_free_zoned_dev(dev);
out_cleanup_disk:
put_disk(nullb->disk);
+out_cleanup_zone:
+ null_free_zoned_dev(dev);
out_cleanup_tags:
if (nullb->tag_set == &nullb->__tag_set)
blk_mq_free_tag_set(nullb->tag_set);
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index f0639df6cd184a..4cf38f7d3e0a3d 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -1568,7 +1568,8 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
* Corresponding ZRAM slot should be locked.
*/
static int zram_recompress(struct zram *zram, u32 index, struct page *page,
- u32 threshold, u32 prio, u32 prio_max)
+ u64 *num_recomp_pages, u32 threshold, u32 prio,
+ u32 prio_max)
{
struct zcomp_strm *zstrm = NULL;
unsigned long handle_old;
@@ -1645,6 +1646,15 @@ static int zram_recompress(struct zram *zram, u32 index, struct page *page,
if (!zstrm)
return 0;
+ /*
+ * Decrement the limit (if set) on pages we can recompress, even
+ * when current recompression was unsuccessful or did not compress
+ * the page below the threshold, because we still spent resources
+ * on it.
+ */
+ if (*num_recomp_pages)
+ *num_recomp_pages -= 1;
+
if (class_index_new >= class_index_old) {
/*
* Secondary algorithms failed to re-compress the page
@@ -1710,6 +1720,7 @@ static ssize_t recompress_store(struct device *dev,
struct zram *zram = dev_to_zram(dev);
unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
char *args, *param, *val, *algo = NULL;
+ u64 num_recomp_pages = ULLONG_MAX;
u32 mode = 0, threshold = 0;
unsigned long index;
struct page *page;
@@ -1732,6 +1743,17 @@ static ssize_t recompress_store(struct device *dev,
continue;
}
+ if (!strcmp(param, "max_pages")) {
+ /*
+ * Limit the number of entries (pages) we attempt to
+ * recompress.
+ */
+ ret = kstrtoull(val, 10, &num_recomp_pages);
+ if (ret)
+ return ret;
+ continue;
+ }
+
if (!strcmp(param, "threshold")) {
/*
* We will re-compress only idle objects equal or
@@ -1788,6 +1810,9 @@ static ssize_t recompress_store(struct device *dev,
for (index = 0; index < nr_pages; index++) {
int err = 0;
+ if (!num_recomp_pages)
+ break;
+
zram_slot_lock(zram, index);
if (!zram_allocated(zram, index))
@@ -1807,8 +1832,8 @@ static ssize_t recompress_store(struct device *dev,
zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE))
goto next;
- err = zram_recompress(zram, index, page, threshold,
- prio, prio_max);
+ err = zram_recompress(zram, index, page, &num_recomp_pages,
+ threshold, prio, prio_max);
next:
zram_slot_unlock(zram, index);
if (err) {
diff --git a/drivers/bluetooth/btmtk.c b/drivers/bluetooth/btmtk.c
index ac8ebccd350756..812fd2a8f853e1 100644
--- a/drivers/bluetooth/btmtk.c
+++ b/drivers/bluetooth/btmtk.c
@@ -380,8 +380,10 @@ int btmtk_process_coredump(struct hci_dev *hdev, struct sk_buff *skb)
switch (data->cd_info.state) {
case HCI_DEVCOREDUMP_IDLE:
err = hci_devcd_init(hdev, MTK_COREDUMP_SIZE);
- if (err < 0)
+ if (err < 0) {
+ kfree_skb(skb);
break;
+ }
data->cd_info.cnt = 0;
/* It is supposed coredump can be done within 5 seconds */
@@ -407,9 +409,6 @@ int btmtk_process_coredump(struct hci_dev *hdev, struct sk_buff *skb)
break;
}
- if (err < 0)
- kfree_skb(skb);
-
return err;
}
EXPORT_SYMBOL_GPL(btmtk_process_coredump);
diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c
index b40b32fa7f1c38..216826c31ee34f 100644
--- a/drivers/bluetooth/btqca.c
+++ b/drivers/bluetooth/btqca.c
@@ -15,6 +15,8 @@
#define VERSION "0.1"
+#define QCA_BDADDR_DEFAULT (&(bdaddr_t) {{ 0xad, 0x5a, 0x00, 0x00, 0x00, 0x00 }})
+
int qca_read_soc_version(struct hci_dev *hdev, struct qca_btsoc_version *ver,
enum qca_btsoc_type soc_type)
{
@@ -612,6 +614,38 @@ int qca_set_bdaddr_rome(struct hci_dev *hdev, const bdaddr_t *bdaddr)
}
EXPORT_SYMBOL_GPL(qca_set_bdaddr_rome);
+static int qca_check_bdaddr(struct hci_dev *hdev)
+{
+ struct hci_rp_read_bd_addr *bda;
+ struct sk_buff *skb;
+ int err;
+
+ if (bacmp(&hdev->public_addr, BDADDR_ANY))
+ return 0;
+
+ skb = __hci_cmd_sync(hdev, HCI_OP_READ_BD_ADDR, 0, NULL,
+ HCI_INIT_TIMEOUT);
+ if (IS_ERR(skb)) {
+ err = PTR_ERR(skb);
+ bt_dev_err(hdev, "Failed to read device address (%d)", err);
+ return err;
+ }
+
+ if (skb->len != sizeof(*bda)) {
+ bt_dev_err(hdev, "Device address length mismatch");
+ kfree_skb(skb);
+ return -EIO;
+ }
+
+ bda = (struct hci_rp_read_bd_addr *)skb->data;
+ if (!bacmp(&bda->bdaddr, QCA_BDADDR_DEFAULT))
+ set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks);
+
+ kfree_skb(skb);
+
+ return 0;
+}
+
static void qca_generate_hsp_nvm_name(char *fwname, size_t max_size,
struct qca_btsoc_version ver, u8 rom_ver, u16 bid)
{
@@ -818,6 +852,10 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate,
break;
}
+ err = qca_check_bdaddr(hdev);
+ if (err)
+ return err;
+
bt_dev_info(hdev, "QCA setup on UART is completed");
return 0;
@@ -826,11 +864,15 @@ EXPORT_SYMBOL_GPL(qca_uart_setup);
int qca_set_bdaddr(struct hci_dev *hdev, const bdaddr_t *bdaddr)
{
+ bdaddr_t bdaddr_swapped;
struct sk_buff *skb;
int err;
- skb = __hci_cmd_sync_ev(hdev, EDL_WRITE_BD_ADDR_OPCODE, 6, bdaddr,
- HCI_EV_VENDOR, HCI_INIT_TIMEOUT);
+ baswap(&bdaddr_swapped, bdaddr);
+
+ skb = __hci_cmd_sync_ev(hdev, EDL_WRITE_BD_ADDR_OPCODE, 6,
+ &bdaddr_swapped, HCI_EV_VENDOR,
+ HCI_INIT_TIMEOUT);
if (IS_ERR(skb)) {
err = PTR_ERR(skb);
bt_dev_err(hdev, "QCA Change address cmd failed (%d)", err);
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index 06e915b57283f8..e3946f7b736e3c 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -542,6 +542,8 @@ static const struct usb_device_id quirks_table[] = {
/* Realtek 8852BE Bluetooth devices */
{ USB_DEVICE(0x0cb8, 0xc559), .driver_info = BTUSB_REALTEK |
BTUSB_WIDEBAND_SPEECH },
+ { USB_DEVICE(0x0bda, 0x4853), .driver_info = BTUSB_REALTEK |
+ BTUSB_WIDEBAND_SPEECH },
{ USB_DEVICE(0x0bda, 0x887b), .driver_info = BTUSB_REALTEK |
BTUSB_WIDEBAND_SPEECH },
{ USB_DEVICE(0x0bda, 0xb85b), .driver_info = BTUSB_REALTEK |
@@ -3480,13 +3482,12 @@ static void btusb_dump_hdr_qca(struct hci_dev *hdev, struct sk_buff *skb)
static void btusb_coredump_qca(struct hci_dev *hdev)
{
+ int err;
static const u8 param[] = { 0x26 };
- struct sk_buff *skb;
- skb = __hci_cmd_sync(hdev, 0xfc0c, 1, param, HCI_CMD_TIMEOUT);
- if (IS_ERR(skb))
- bt_dev_err(hdev, "%s: triggle crash failed (%ld)", __func__, PTR_ERR(skb));
- kfree_skb(skb);
+ err = __hci_cmd_send(hdev, 0xfc0c, 1, param);
+ if (err < 0)
+ bt_dev_err(hdev, "%s: triggle crash failed (%d)", __func__, err);
}
/*
diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c
index 8a60ad7acd7052..0c9c9ee56592dc 100644
--- a/drivers/bluetooth/hci_qca.c
+++ b/drivers/bluetooth/hci_qca.c
@@ -7,7 +7,6 @@
*
* Copyright (C) 2007 Texas Instruments, Inc.
* Copyright (c) 2010, 2012, 2018 The Linux Foundation. All rights reserved.
- * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved.
*
* Acknowledgements:
* This file is based on hci_ll.c, which was...
@@ -226,6 +225,7 @@ struct qca_serdev {
struct qca_power *bt_power;
u32 init_speed;
u32 oper_speed;
+ bool bdaddr_property_broken;
const char *firmware_name;
};
@@ -1672,6 +1672,9 @@ static bool qca_wakeup(struct hci_dev *hdev)
struct hci_uart *hu = hci_get_drvdata(hdev);
bool wakeup;
+ if (!hu->serdev)
+ return true;
+
/* BT SoC attached through the serial bus is handled by the serdev driver.
* So we need to use the device handle of the serdev driver to get the
* status of device may wakeup.
@@ -1843,6 +1846,7 @@ static int qca_setup(struct hci_uart *hu)
const char *firmware_name = qca_get_firmware_name(hu);
int ret;
struct qca_btsoc_version ver;
+ struct qca_serdev *qcadev;
const char *soc_name;
ret = qca_check_speeds(hu);
@@ -1904,16 +1908,9 @@ retry:
case QCA_WCN6750:
case QCA_WCN6855:
case QCA_WCN7850:
-
- /* Set BDA quirk bit for reading BDA value from fwnode property
- * only if that property exist in DT.
- */
- if (fwnode_property_present(dev_fwnode(hdev->dev.parent), "local-bd-address")) {
- set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks);
- bt_dev_info(hdev, "setting quirk bit to read BDA from fwnode later");
- } else {
- bt_dev_dbg(hdev, "local-bd-address` is not present in the devicetree so not setting quirk bit for BDA");
- }
+ qcadev = serdev_device_get_drvdata(hu->serdev);
+ if (qcadev->bdaddr_property_broken)
+ set_bit(HCI_QUIRK_BDADDR_PROPERTY_BROKEN, &hdev->quirks);
hci_set_aosp_capable(hdev);
@@ -1961,8 +1958,10 @@ retry:
qca_debugfs_init(hdev);
hu->hdev->hw_error = qca_hw_error;
hu->hdev->cmd_timeout = qca_cmd_timeout;
- if (device_can_wakeup(hu->serdev->ctrl->dev.parent))
- hu->hdev->wakeup = qca_wakeup;
+ if (hu->serdev) {
+ if (device_can_wakeup(hu->serdev->ctrl->dev.parent))
+ hu->hdev->wakeup = qca_wakeup;
+ }
} else if (ret == -ENOENT) {
/* No patch/nvm-config found, run with original fw/config */
set_bit(QCA_ROM_FW, &qca->flags);
@@ -2295,6 +2294,9 @@ static int qca_serdev_probe(struct serdev_device *serdev)
if (!qcadev->oper_speed)
BT_DBG("UART will pick default operating speed");
+ qcadev->bdaddr_property_broken = device_property_read_bool(&serdev->dev,
+ "qcom,local-bd-address-broken");
+
if (data)
qcadev->btsoc_type = data->soc_type;
else
@@ -2330,16 +2332,21 @@ static int qca_serdev_probe(struct serdev_device *serdev)
(data->soc_type == QCA_WCN6750 ||
data->soc_type == QCA_WCN6855)) {
dev_err(&serdev->dev, "failed to acquire BT_EN gpio\n");
- power_ctrl_enabled = false;
+ return PTR_ERR(qcadev->bt_en);
}
+ if (!qcadev->bt_en)
+ power_ctrl_enabled = false;
+
qcadev->sw_ctrl = devm_gpiod_get_optional(&serdev->dev, "swctrl",
GPIOD_IN);
if (IS_ERR(qcadev->sw_ctrl) &&
(data->soc_type == QCA_WCN6750 ||
data->soc_type == QCA_WCN6855 ||
- data->soc_type == QCA_WCN7850))
- dev_warn(&serdev->dev, "failed to acquire SW_CTRL gpio\n");
+ data->soc_type == QCA_WCN7850)) {
+ dev_err(&serdev->dev, "failed to acquire SW_CTRL gpio\n");
+ return PTR_ERR(qcadev->sw_ctrl);
+ }
qcadev->susclk = devm_clk_get_optional(&serdev->dev, NULL);
if (IS_ERR(qcadev->susclk)) {
@@ -2358,10 +2365,13 @@ static int qca_serdev_probe(struct serdev_device *serdev)
qcadev->bt_en = devm_gpiod_get_optional(&serdev->dev, "enable",
GPIOD_OUT_LOW);
if (IS_ERR(qcadev->bt_en)) {
- dev_warn(&serdev->dev, "failed to acquire enable gpio\n");
- power_ctrl_enabled = false;
+ dev_err(&serdev->dev, "failed to acquire enable gpio\n");
+ return PTR_ERR(qcadev->bt_en);
}
+ if (!qcadev->bt_en)
+ power_ctrl_enabled = false;
+
qcadev->susclk = devm_clk_get_optional(&serdev->dev, NULL);
if (IS_ERR(qcadev->susclk)) {
dev_warn(&serdev->dev, "failed to acquire clk\n");
diff --git a/drivers/cache/sifive_ccache.c b/drivers/cache/sifive_ccache.c
index 89ed6cd6b059eb..6874b72ec59d8d 100644
--- a/drivers/cache/sifive_ccache.c
+++ b/drivers/cache/sifive_ccache.c
@@ -15,6 +15,8 @@
#include <linux/of_address.h>
#include <linux/device.h>
#include <linux/bitfield.h>
+#include <linux/platform_device.h>
+#include <linux/property.h>
#include <asm/cacheflush.h>
#include <asm/cacheinfo.h>
#include <asm/dma-noncoherent.h>
@@ -247,13 +249,49 @@ static irqreturn_t ccache_int_handler(int irq, void *device)
return IRQ_HANDLED;
}
+static int sifive_ccache_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ unsigned long quirks;
+ int intr_num, rc;
+
+ quirks = (unsigned long)device_get_match_data(dev);
+
+ intr_num = platform_irq_count(pdev);
+ if (!intr_num)
+ return dev_err_probe(dev, -ENODEV, "No interrupts property\n");
+
+ for (int i = 0; i < intr_num; i++) {
+ if (i == DATA_UNCORR && (quirks & QUIRK_BROKEN_DATA_UNCORR))
+ continue;
+
+ g_irq[i] = platform_get_irq(pdev, i);
+ if (g_irq[i] < 0)
+ return g_irq[i];
+
+ rc = devm_request_irq(dev, g_irq[i], ccache_int_handler, 0, "ccache_ecc", NULL);
+ if (rc)
+ return dev_err_probe(dev, rc, "Could not request IRQ %d\n", g_irq[i]);
+ }
+
+ return 0;
+}
+
+static struct platform_driver sifive_ccache_driver = {
+ .probe = sifive_ccache_probe,
+ .driver = {
+ .name = "sifive_ccache",
+ .of_match_table = sifive_ccache_ids,
+ },
+};
+
static int __init sifive_ccache_init(void)
{
struct device_node *np;
struct resource res;
- int i, rc, intr_num;
const struct of_device_id *match;
- unsigned long quirks;
+ unsigned long quirks __maybe_unused;
+ int rc;
np = of_find_matching_node_and_match(NULL, sifive_ccache_ids, &match);
if (!np)
@@ -277,28 +315,6 @@ static int __init sifive_ccache_init(void)
goto err_unmap;
}
- intr_num = of_property_count_u32_elems(np, "interrupts");
- if (!intr_num) {
- pr_err("No interrupts property\n");
- rc = -ENODEV;
- goto err_unmap;
- }
-
- for (i = 0; i < intr_num; i++) {
- g_irq[i] = irq_of_parse_and_map(np, i);
-
- if (i == DATA_UNCORR && (quirks & QUIRK_BROKEN_DATA_UNCORR))
- continue;
-
- rc = request_irq(g_irq[i], ccache_int_handler, 0, "ccache_ecc",
- NULL);
- if (rc) {
- pr_err("Could not request IRQ %d\n", g_irq[i]);
- goto err_free_irq;
- }
- }
- of_node_put(np);
-
#ifdef CONFIG_RISCV_NONSTANDARD_CACHE_OPS
if (quirks & QUIRK_NONSTANDARD_CACHE_OPS) {
riscv_cbom_block_size = SIFIVE_CCACHE_LINE_SIZE;
@@ -315,11 +331,15 @@ static int __init sifive_ccache_init(void)
#ifdef CONFIG_DEBUG_FS
setup_sifive_debug();
#endif
+
+ rc = platform_driver_register(&sifive_ccache_driver);
+ if (rc)
+ goto err_unmap;
+
+ of_node_put(np);
+
return 0;
-err_free_irq:
- while (--i >= 0)
- free_irq(g_irq[i], NULL);
err_unmap:
iounmap(ccache_base);
err_node_put:
diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index 3c6670cf905f11..9b80e622ae80a8 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -544,7 +544,7 @@ static unsigned long get_unmapped_area_zero(struct file *file,
}
/* Otherwise flags & MAP_PRIVATE: with no shmem object beneath it */
- return current->mm->get_unmapped_area(file, addr, len, pgoff, flags);
+ return mm_get_unmapped_area(current->mm, file, addr, len, pgoff, flags);
#else
return -ENOSYS;
#endif
diff --git a/drivers/char/random.c b/drivers/char/random.c
index 456be28ba67cb4..2597cb43f43871 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -702,7 +702,7 @@ static void extract_entropy(void *buf, size_t len)
static void __cold _credit_init_bits(size_t bits)
{
- static struct execute_work set_ready;
+ static DECLARE_WORK(set_ready, crng_set_ready);
unsigned int new, orig, add;
unsigned long flags;
@@ -718,8 +718,8 @@ static void __cold _credit_init_bits(size_t bits)
if (orig < POOL_READY_BITS && new >= POOL_READY_BITS) {
crng_reseed(NULL); /* Sets crng_init to CRNG_READY under base_crng.lock. */
- if (static_key_initialized)
- execute_in_process_context(crng_set_ready, &set_ready);
+ if (static_key_initialized && system_unbound_wq)
+ queue_work(system_unbound_wq, &set_ready);
atomic_notifier_call_chain(&random_ready_notifier, 0, NULL);
wake_up_interruptible(&crng_init_wait);
kill_fasync(&fasync, SIGIO, POLL_IN);
@@ -890,8 +890,8 @@ void __init random_init(void)
/*
* If we were initialized by the cpu or bootloader before jump labels
- * are initialized, then we should enable the static branch here, where
- * it's guaranteed that jump labels have been initialized.
+ * or workqueues are initialized, then we should enable the static
+ * branch here, where it's guaranteed that these have been initialized.
*/
if (!static_branch_likely(&crng_is_ready) && crng_init >= CRNG_READY)
crng_set_ready(NULL);
diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index 25371c91a58fe7..8cca52be993f4c 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -37,6 +37,10 @@ static HLIST_HEAD(clk_root_list);
static HLIST_HEAD(clk_orphan_list);
static LIST_HEAD(clk_notifier_list);
+/* List of registered clks that use runtime PM */
+static HLIST_HEAD(clk_rpm_list);
+static DEFINE_MUTEX(clk_rpm_list_lock);
+
static const struct hlist_head *all_lists[] = {
&clk_root_list,
&clk_orphan_list,
@@ -59,6 +63,7 @@ struct clk_core {
struct clk_hw *hw;
struct module *owner;
struct device *dev;
+ struct hlist_node rpm_node;
struct device_node *of_node;
struct clk_core *parent;
struct clk_parent_map *parents;
@@ -122,6 +127,89 @@ static void clk_pm_runtime_put(struct clk_core *core)
pm_runtime_put_sync(core->dev);
}
+/**
+ * clk_pm_runtime_get_all() - Runtime "get" all clk provider devices
+ *
+ * Call clk_pm_runtime_get() on all runtime PM enabled clks in the clk tree so
+ * that disabling unused clks avoids a deadlock where a device is runtime PM
+ * resuming/suspending and the runtime PM callback is trying to grab the
+ * prepare_lock for something like clk_prepare_enable() while
+ * clk_disable_unused_subtree() holds the prepare_lock and is trying to runtime
+ * PM resume/suspend the device as well.
+ *
+ * Context: Acquires the 'clk_rpm_list_lock' and returns with the lock held on
+ * success. Otherwise the lock is released on failure.
+ *
+ * Return: 0 on success, negative errno otherwise.
+ */
+static int clk_pm_runtime_get_all(void)
+{
+ int ret;
+ struct clk_core *core, *failed;
+
+ /*
+ * Grab the list lock to prevent any new clks from being registered
+ * or unregistered until clk_pm_runtime_put_all().
+ */
+ mutex_lock(&clk_rpm_list_lock);
+
+ /*
+ * Runtime PM "get" all the devices that are needed for the clks
+ * currently registered. Do this without holding the prepare_lock, to
+ * avoid the deadlock.
+ */
+ hlist_for_each_entry(core, &clk_rpm_list, rpm_node) {
+ ret = clk_pm_runtime_get(core);
+ if (ret) {
+ failed = core;
+ pr_err("clk: Failed to runtime PM get '%s' for clk '%s'\n",
+ dev_name(failed->dev), failed->name);
+ goto err;
+ }
+ }
+
+ return 0;
+
+err:
+ hlist_for_each_entry(core, &clk_rpm_list, rpm_node) {
+ if (core == failed)
+ break;
+
+ clk_pm_runtime_put(core);
+ }
+ mutex_unlock(&clk_rpm_list_lock);
+
+ return ret;
+}
+
+/**
+ * clk_pm_runtime_put_all() - Runtime "put" all clk provider devices
+ *
+ * Put the runtime PM references taken in clk_pm_runtime_get_all() and release
+ * the 'clk_rpm_list_lock'.
+ */
+static void clk_pm_runtime_put_all(void)
+{
+ struct clk_core *core;
+
+ hlist_for_each_entry(core, &clk_rpm_list, rpm_node)
+ clk_pm_runtime_put(core);
+ mutex_unlock(&clk_rpm_list_lock);
+}
+
+static void clk_pm_runtime_init(struct clk_core *core)
+{
+ struct device *dev = core->dev;
+
+ if (dev && pm_runtime_enabled(dev)) {
+ core->rpm_enabled = true;
+
+ mutex_lock(&clk_rpm_list_lock);
+ hlist_add_head(&core->rpm_node, &clk_rpm_list);
+ mutex_unlock(&clk_rpm_list_lock);
+ }
+}
+
/*** locking ***/
static void clk_prepare_lock(void)
{
@@ -1381,9 +1469,6 @@ static void __init clk_unprepare_unused_subtree(struct clk_core *core)
if (core->flags & CLK_IGNORE_UNUSED)
return;
- if (clk_pm_runtime_get(core))
- return;
-
if (clk_core_is_prepared(core)) {
trace_clk_unprepare(core);
if (core->ops->unprepare_unused)
@@ -1392,8 +1477,6 @@ static void __init clk_unprepare_unused_subtree(struct clk_core *core)
core->ops->unprepare(core->hw);
trace_clk_unprepare_complete(core);
}
-
- clk_pm_runtime_put(core);
}
static void __init clk_disable_unused_subtree(struct clk_core *core)
@@ -1409,9 +1492,6 @@ static void __init clk_disable_unused_subtree(struct clk_core *core)
if (core->flags & CLK_OPS_PARENT_ENABLE)
clk_core_prepare_enable(core->parent);
- if (clk_pm_runtime_get(core))
- goto unprepare_out;
-
flags = clk_enable_lock();
if (core->enable_count)
@@ -1436,8 +1516,6 @@ static void __init clk_disable_unused_subtree(struct clk_core *core)
unlock_out:
clk_enable_unlock(flags);
- clk_pm_runtime_put(core);
-unprepare_out:
if (core->flags & CLK_OPS_PARENT_ENABLE)
clk_core_disable_unprepare(core->parent);
}
@@ -1453,6 +1531,7 @@ __setup("clk_ignore_unused", clk_ignore_unused_setup);
static int __init clk_disable_unused(void)
{
struct clk_core *core;
+ int ret;
if (clk_ignore_unused) {
pr_warn("clk: Not disabling unused clocks\n");
@@ -1461,6 +1540,13 @@ static int __init clk_disable_unused(void)
pr_info("clk: Disabling unused clocks\n");
+ ret = clk_pm_runtime_get_all();
+ if (ret)
+ return ret;
+ /*
+ * Grab the prepare lock to keep the clk topology stable while iterating
+ * over clks.
+ */
clk_prepare_lock();
hlist_for_each_entry(core, &clk_root_list, child_node)
@@ -1477,6 +1563,8 @@ static int __init clk_disable_unused(void)
clk_prepare_unlock();
+ clk_pm_runtime_put_all();
+
return 0;
}
late_initcall_sync(clk_disable_unused);
@@ -3252,9 +3340,7 @@ static void clk_summary_show_subtree(struct seq_file *s, struct clk_core *c,
{
struct clk_core *child;
- clk_pm_runtime_get(c);
clk_summary_show_one(s, c, level);
- clk_pm_runtime_put(c);
hlist_for_each_entry(child, &c->children, child_node)
clk_summary_show_subtree(s, child, level + 1);
@@ -3264,11 +3350,15 @@ static int clk_summary_show(struct seq_file *s, void *data)
{
struct clk_core *c;
struct hlist_head **lists = s->private;
+ int ret;
seq_puts(s, " enable prepare protect duty hardware connection\n");
seq_puts(s, " clock count count count rate accuracy phase cycle enable consumer id\n");
seq_puts(s, "---------------------------------------------------------------------------------------------------------------------------------------------\n");
+ ret = clk_pm_runtime_get_all();
+ if (ret)
+ return ret;
clk_prepare_lock();
@@ -3277,6 +3367,7 @@ static int clk_summary_show(struct seq_file *s, void *data)
clk_summary_show_subtree(s, c, 0);
clk_prepare_unlock();
+ clk_pm_runtime_put_all();
return 0;
}
@@ -3324,8 +3415,14 @@ static int clk_dump_show(struct seq_file *s, void *data)
struct clk_core *c;
bool first_node = true;
struct hlist_head **lists = s->private;
+ int ret;
+
+ ret = clk_pm_runtime_get_all();
+ if (ret)
+ return ret;
seq_putc(s, '{');
+
clk_prepare_lock();
for (; *lists; lists++) {
@@ -3338,6 +3435,7 @@ static int clk_dump_show(struct seq_file *s, void *data)
}
clk_prepare_unlock();
+ clk_pm_runtime_put_all();
seq_puts(s, "}\n");
return 0;
@@ -3981,8 +4079,6 @@ static int __clk_core_init(struct clk_core *core)
}
clk_core_reparent_orphans_nolock();
-
- kref_init(&core->ref);
out:
clk_pm_runtime_put(core);
unlock:
@@ -4211,6 +4307,22 @@ static void clk_core_free_parent_map(struct clk_core *core)
kfree(core->parents);
}
+/* Free memory allocated for a struct clk_core */
+static void __clk_release(struct kref *ref)
+{
+ struct clk_core *core = container_of(ref, struct clk_core, ref);
+
+ if (core->rpm_enabled) {
+ mutex_lock(&clk_rpm_list_lock);
+ hlist_del(&core->rpm_node);
+ mutex_unlock(&clk_rpm_list_lock);
+ }
+
+ clk_core_free_parent_map(core);
+ kfree_const(core->name);
+ kfree(core);
+}
+
static struct clk *
__clk_register(struct device *dev, struct device_node *np, struct clk_hw *hw)
{
@@ -4231,6 +4343,8 @@ __clk_register(struct device *dev, struct device_node *np, struct clk_hw *hw)
goto fail_out;
}
+ kref_init(&core->ref);
+
core->name = kstrdup_const(init->name, GFP_KERNEL);
if (!core->name) {
ret = -ENOMEM;
@@ -4243,9 +4357,8 @@ __clk_register(struct device *dev, struct device_node *np, struct clk_hw *hw)
}
core->ops = init->ops;
- if (dev && pm_runtime_enabled(dev))
- core->rpm_enabled = true;
core->dev = dev;
+ clk_pm_runtime_init(core);
core->of_node = np;
if (dev && dev->driver)
core->owner = dev->driver->owner;
@@ -4285,12 +4398,10 @@ __clk_register(struct device *dev, struct device_node *np, struct clk_hw *hw)
hw->clk = NULL;
fail_create_clk:
- clk_core_free_parent_map(core);
fail_parents:
fail_ops:
- kfree_const(core->name);
fail_name:
- kfree(core);
+ kref_put(&core->ref, __clk_release);
fail_out:
return ERR_PTR(ret);
}
@@ -4370,18 +4481,6 @@ int of_clk_hw_register(struct device_node *node, struct clk_hw *hw)
}
EXPORT_SYMBOL_GPL(of_clk_hw_register);
-/* Free memory allocated for a clock. */
-static void __clk_release(struct kref *ref)
-{
- struct clk_core *core = container_of(ref, struct clk_core, ref);
-
- lockdep_assert_held(&prepare_lock);
-
- clk_core_free_parent_map(core);
- kfree_const(core->name);
- kfree(core);
-}
-
/*
* Empty clk_ops for unregistered clocks. These are used temporarily
* after clk_unregister() was called on a clock and until last clock
@@ -4472,7 +4571,8 @@ void clk_unregister(struct clk *clk)
if (ops == &clk_nodrv_ops) {
pr_err("%s: unregistered clock: %s\n", __func__,
clk->core->name);
- goto unlock;
+ clk_prepare_unlock();
+ return;
}
/*
* Assign empty clock ops for consumers that might still hold
@@ -4506,11 +4606,10 @@ void clk_unregister(struct clk *clk)
if (clk->core->protect_count)
pr_warn("%s: unregistering protected clock: %s\n",
__func__, clk->core->name);
+ clk_prepare_unlock();
kref_put(&clk->core->ref, __clk_release);
free_clk(clk);
-unlock:
- clk_prepare_unlock();
}
EXPORT_SYMBOL_GPL(clk_unregister);
@@ -4669,13 +4768,11 @@ void __clk_put(struct clk *clk)
if (clk->min_rate > 0 || clk->max_rate < ULONG_MAX)
clk_set_rate_range_nolock(clk, 0, ULONG_MAX);
- owner = clk->core->owner;
- kref_put(&clk->core->ref, __clk_release);
-
clk_prepare_unlock();
+ owner = clk->core->owner;
+ kref_put(&clk->core->ref, __clk_release);
module_put(owner);
-
free_clk(clk);
}
diff --git a/drivers/clk/mediatek/clk-mt7988-infracfg.c b/drivers/clk/mediatek/clk-mt7988-infracfg.c
index 449041f8abbc9a..c8c023afe3e5ad 100644
--- a/drivers/clk/mediatek/clk-mt7988-infracfg.c
+++ b/drivers/clk/mediatek/clk-mt7988-infracfg.c
@@ -156,7 +156,7 @@ static const struct mtk_gate infra_clks[] = {
GATE_INFRA0(CLK_INFRA_PCIE_PERI_26M_CK_P1, "infra_pcie_peri_ck_26m_ck_p1",
"csw_infra_f26m_sel", 8),
GATE_INFRA0(CLK_INFRA_PCIE_PERI_26M_CK_P2, "infra_pcie_peri_ck_26m_ck_p2",
- "csw_infra_f26m_sel", 9),
+ "infra_pcie_peri_ck_26m_ck_p3", 9),
GATE_INFRA0(CLK_INFRA_PCIE_PERI_26M_CK_P3, "infra_pcie_peri_ck_26m_ck_p3",
"csw_infra_f26m_sel", 10),
/* INFRA1 */
diff --git a/drivers/clk/mediatek/clk-mtk.c b/drivers/clk/mediatek/clk-mtk.c
index 2e55368dc4d820..bd37ab4d1a9bb3 100644
--- a/drivers/clk/mediatek/clk-mtk.c
+++ b/drivers/clk/mediatek/clk-mtk.c
@@ -13,6 +13,7 @@
#include <linux/of.h>
#include <linux/of_address.h>
#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
#include <linux/slab.h>
#include "clk-mtk.h"
@@ -494,6 +495,16 @@ static int __mtk_clk_simple_probe(struct platform_device *pdev,
return IS_ERR(base) ? PTR_ERR(base) : -ENOMEM;
}
+
+ devm_pm_runtime_enable(&pdev->dev);
+ /*
+ * Do a pm_runtime_resume_and_get() to workaround a possible
+ * deadlock between clk_register() and the genpd framework.
+ */
+ r = pm_runtime_resume_and_get(&pdev->dev);
+ if (r)
+ return r;
+
/* Calculate how many clk_hw_onecell_data entries to allocate */
num_clks = mcd->num_clks + mcd->num_composite_clks;
num_clks += mcd->num_fixed_clks + mcd->num_factor_clks;
@@ -574,6 +585,8 @@ static int __mtk_clk_simple_probe(struct platform_device *pdev,
goto unregister_clks;
}
+ pm_runtime_put(&pdev->dev);
+
return r;
unregister_clks:
@@ -604,6 +617,8 @@ free_data:
free_base:
if (mcd->shared_io && base)
iounmap(base);
+
+ pm_runtime_put(&pdev->dev);
return r;
}
diff --git a/drivers/clk/qcom/clk-smd-rpm.c b/drivers/clk/qcom/clk-smd-rpm.c
index 8602c02047d048..45c5255bcd11ba 100644
--- a/drivers/clk/qcom/clk-smd-rpm.c
+++ b/drivers/clk/qcom/clk-smd-rpm.c
@@ -768,6 +768,7 @@ static struct clk_smd_rpm *msm8976_clks[] = {
static const struct rpm_smd_clk_desc rpm_clk_msm8976 = {
.clks = msm8976_clks,
+ .num_clks = ARRAY_SIZE(msm8976_clks),
.icc_clks = bimc_pcnoc_snoc_smmnoc_icc_clks,
.num_icc_clks = ARRAY_SIZE(bimc_pcnoc_snoc_smmnoc_icc_clks),
};
diff --git a/drivers/clk/qcom/gdsc.c b/drivers/clk/qcom/gdsc.c
index e7a4068b9f3906..df9618ab7eea1f 100644
--- a/drivers/clk/qcom/gdsc.c
+++ b/drivers/clk/qcom/gdsc.c
@@ -487,9 +487,14 @@ int gdsc_register(struct gdsc_desc *desc,
if (!scs[i] || !scs[i]->supply)
continue;
- scs[i]->rsupply = devm_regulator_get(dev, scs[i]->supply);
- if (IS_ERR(scs[i]->rsupply))
- return PTR_ERR(scs[i]->rsupply);
+ scs[i]->rsupply = devm_regulator_get_optional(dev, scs[i]->supply);
+ if (IS_ERR(scs[i]->rsupply)) {
+ ret = PTR_ERR(scs[i]->rsupply);
+ if (ret != -ENODEV)
+ return ret;
+
+ scs[i]->rsupply = NULL;
+ }
}
data->num_domains = num;
diff --git a/drivers/clk/samsung/clk-exynos-clkout.c b/drivers/clk/samsung/clk-exynos-clkout.c
index 3484e6cc80ad10..503c6f5b20d5c5 100644
--- a/drivers/clk/samsung/clk-exynos-clkout.c
+++ b/drivers/clk/samsung/clk-exynos-clkout.c
@@ -13,9 +13,9 @@
#include <linux/io.h>
#include <linux/of.h>
#include <linux/of_address.h>
+#include <linux/of_device.h>
#include <linux/platform_device.h>
#include <linux/pm.h>
-#include <linux/property.h>
#define EXYNOS_CLKOUT_NR_CLKS 1
#define EXYNOS_CLKOUT_PARENTS 32
@@ -84,17 +84,24 @@ MODULE_DEVICE_TABLE(of, exynos_clkout_ids);
static int exynos_clkout_match_parent_dev(struct device *dev, u32 *mux_mask)
{
const struct exynos_clkout_variant *variant;
+ const struct of_device_id *match;
if (!dev->parent) {
dev_err(dev, "not instantiated from MFD\n");
return -EINVAL;
}
- variant = device_get_match_data(dev->parent);
- if (!variant) {
+ /*
+ * 'exynos_clkout_ids' arrays is not the ids array matched by
+ * the dev->parent driver, so of_device_get_match_data() or
+ * device_get_match_data() cannot be used here.
+ */
+ match = of_match_device(exynos_clkout_ids, dev->parent);
+ if (!match) {
dev_err(dev, "cannot match parent device\n");
return -EINVAL;
}
+ variant = match->data;
*mux_mask = variant->mux_mask;
diff --git a/drivers/comedi/drivers/vmk80xx.c b/drivers/comedi/drivers/vmk80xx.c
index 4536ed43f65b27..84dce5184a77ae 100644
--- a/drivers/comedi/drivers/vmk80xx.c
+++ b/drivers/comedi/drivers/vmk80xx.c
@@ -641,33 +641,22 @@ static int vmk80xx_find_usb_endpoints(struct comedi_device *dev)
struct vmk80xx_private *devpriv = dev->private;
struct usb_interface *intf = comedi_to_usb_interface(dev);
struct usb_host_interface *iface_desc = intf->cur_altsetting;
- struct usb_endpoint_descriptor *ep_desc;
- int i;
-
- if (iface_desc->desc.bNumEndpoints != 2)
- return -ENODEV;
-
- for (i = 0; i < iface_desc->desc.bNumEndpoints; i++) {
- ep_desc = &iface_desc->endpoint[i].desc;
-
- if (usb_endpoint_is_int_in(ep_desc) ||
- usb_endpoint_is_bulk_in(ep_desc)) {
- if (!devpriv->ep_rx)
- devpriv->ep_rx = ep_desc;
- continue;
- }
+ struct usb_endpoint_descriptor *ep_rx_desc, *ep_tx_desc;
+ int ret;
- if (usb_endpoint_is_int_out(ep_desc) ||
- usb_endpoint_is_bulk_out(ep_desc)) {
- if (!devpriv->ep_tx)
- devpriv->ep_tx = ep_desc;
- continue;
- }
- }
+ if (devpriv->model == VMK8061_MODEL)
+ ret = usb_find_common_endpoints(iface_desc, &ep_rx_desc,
+ &ep_tx_desc, NULL, NULL);
+ else
+ ret = usb_find_common_endpoints(iface_desc, NULL, NULL,
+ &ep_rx_desc, &ep_tx_desc);
- if (!devpriv->ep_rx || !devpriv->ep_tx)
+ if (ret)
return -ENODEV;
+ devpriv->ep_rx = ep_rx_desc;
+ devpriv->ep_tx = ep_tx_desc;
+
if (!usb_endpoint_maxp(devpriv->ep_rx) || !usb_endpoint_maxp(devpriv->ep_tx))
return -EINVAL;
diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c
index f44efbb89c346a..2102377f727b1e 100644
--- a/drivers/crypto/ccp/sev-dev.c
+++ b/drivers/crypto/ccp/sev-dev.c
@@ -1090,7 +1090,7 @@ static int __sev_snp_init_locked(int *error)
void *arg = &data;
int cmd, rc = 0;
- if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+ if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
return -ENODEV;
sev = psp->sev_data;
diff --git a/drivers/crypto/intel/iaa/iaa_crypto_main.c b/drivers/crypto/intel/iaa/iaa_crypto_main.c
index 1cd304de538815..b2191ade9011c6 100644
--- a/drivers/crypto/intel/iaa/iaa_crypto_main.c
+++ b/drivers/crypto/intel/iaa/iaa_crypto_main.c
@@ -806,6 +806,8 @@ static int save_iaa_wq(struct idxd_wq *wq)
return -EINVAL;
cpus_per_iaa = (nr_nodes * nr_cpus_per_node) / nr_iaa;
+ if (!cpus_per_iaa)
+ cpus_per_iaa = 1;
out:
return 0;
}
@@ -821,10 +823,12 @@ static void remove_iaa_wq(struct idxd_wq *wq)
}
}
- if (nr_iaa)
+ if (nr_iaa) {
cpus_per_iaa = (nr_nodes * nr_cpus_per_node) / nr_iaa;
- else
- cpus_per_iaa = 0;
+ if (!cpus_per_iaa)
+ cpus_per_iaa = 1;
+ } else
+ cpus_per_iaa = 1;
}
static int wq_table_add_wqs(int iaa, int cpu)
diff --git a/drivers/cxl/Kconfig b/drivers/cxl/Kconfig
index 67998dbd1d46b4..5f3c9c5529b960 100644
--- a/drivers/cxl/Kconfig
+++ b/drivers/cxl/Kconfig
@@ -144,17 +144,4 @@ config CXL_REGION_INVALIDATION_TEST
If unsure, or if this kernel is meant for production environments,
say N.
-config CXL_PMU
- tristate "CXL Performance Monitoring Unit"
- default CXL_BUS
- depends on PERF_EVENTS
- help
- Support performance monitoring as defined in CXL rev 3.0
- section 13.2: Performance Monitoring. CXL components may have
- one or more CXL Performance Monitoring Units (CPMUs).
-
- Say 'y/m' to enable a driver that will attach to performance
- monitoring units and provide standard perf based interfaces.
-
- If unsure say 'm'.
endif
diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c
index af5cb818f84d6b..cb8c155a2c9b3d 100644
--- a/drivers/cxl/acpi.c
+++ b/drivers/cxl/acpi.c
@@ -525,22 +525,11 @@ static int get_genport_coordinates(struct device *dev, struct cxl_dport *dport)
{
struct acpi_device *hb = to_cxl_host_bridge(NULL, dev);
u32 uid;
- int rc;
if (kstrtou32(acpi_device_uid(hb), 0, &uid))
return -EINVAL;
- rc = acpi_get_genport_coordinates(uid, dport->hb_coord);
- if (rc < 0)
- return rc;
-
- /* Adjust back to picoseconds from nanoseconds */
- for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
- dport->hb_coord[i].read_latency *= 1000;
- dport->hb_coord[i].write_latency *= 1000;
- }
-
- return 0;
+ return acpi_get_genport_coordinates(uid, dport->coord);
}
static int add_host_bridge_dport(struct device *match, void *arg)
diff --git a/drivers/cxl/core/cdat.c b/drivers/cxl/core/cdat.c
index eddbbe21450ca9..bb83867d9fec98 100644
--- a/drivers/cxl/core/cdat.c
+++ b/drivers/cxl/core/cdat.c
@@ -14,12 +14,42 @@
struct dsmas_entry {
struct range dpa_range;
u8 handle;
- struct access_coordinate coord;
+ struct access_coordinate coord[ACCESS_COORDINATE_MAX];
int entries;
int qos_class;
};
+static u32 cdat_normalize(u16 entry, u64 base, u8 type)
+{
+ u32 value;
+
+ /*
+ * Check for invalid and overflow values
+ */
+ if (entry == 0xffff || !entry)
+ return 0;
+ else if (base > (UINT_MAX / (entry)))
+ return 0;
+
+ /*
+ * CDAT fields follow the format of HMAT fields. See table 5 Device
+ * Scoped Latency and Bandwidth Information Structure in Coherent Device
+ * Attribute Table (CDAT) Specification v1.01.
+ */
+ value = entry * base;
+ switch (type) {
+ case ACPI_HMAT_ACCESS_LATENCY:
+ case ACPI_HMAT_READ_LATENCY:
+ case ACPI_HMAT_WRITE_LATENCY:
+ value = DIV_ROUND_UP(value, 1000);
+ break;
+ default:
+ break;
+ }
+ return value;
+}
+
static int cdat_dsmas_handler(union acpi_subtable_headers *header, void *arg,
const unsigned long end)
{
@@ -58,8 +88,8 @@ static int cdat_dsmas_handler(union acpi_subtable_headers *header, void *arg,
return 0;
}
-static void cxl_access_coordinate_set(struct access_coordinate *coord,
- int access, unsigned int val)
+static void __cxl_access_coordinate_set(struct access_coordinate *coord,
+ int access, unsigned int val)
{
switch (access) {
case ACPI_HMAT_ACCESS_LATENCY:
@@ -85,6 +115,13 @@ static void cxl_access_coordinate_set(struct access_coordinate *coord,
}
}
+static void cxl_access_coordinate_set(struct access_coordinate *coord,
+ int access, unsigned int val)
+{
+ for (int i = 0; i < ACCESS_COORDINATE_MAX; i++)
+ __cxl_access_coordinate_set(&coord[i], access, val);
+}
+
static int cdat_dslbis_handler(union acpi_subtable_headers *header, void *arg,
const unsigned long end)
{
@@ -97,7 +134,6 @@ static int cdat_dslbis_handler(union acpi_subtable_headers *header, void *arg,
__le16 le_val;
u64 val;
u16 len;
- int rc;
len = le16_to_cpu((__force __le16)hdr->length);
if (len != size || (unsigned long)hdr + len > end) {
@@ -124,12 +160,10 @@ static int cdat_dslbis_handler(union acpi_subtable_headers *header, void *arg,
le_base = (__force __le64)dslbis->entry_base_unit;
le_val = (__force __le16)dslbis->entry[0];
- rc = check_mul_overflow(le64_to_cpu(le_base),
- le16_to_cpu(le_val), &val);
- if (rc)
- pr_warn("DSLBIS value overflowed.\n");
+ val = cdat_normalize(le16_to_cpu(le_val), le64_to_cpu(le_base),
+ dslbis->data_type);
- cxl_access_coordinate_set(&dent->coord, dslbis->data_type, val);
+ cxl_access_coordinate_set(dent->coord, dslbis->data_type, val);
return 0;
}
@@ -163,25 +197,18 @@ static int cxl_cdat_endpoint_process(struct cxl_port *port,
static int cxl_port_perf_data_calculate(struct cxl_port *port,
struct xarray *dsmas_xa)
{
- struct access_coordinate ep_c;
- struct access_coordinate coord[ACCESS_COORDINATE_MAX];
+ struct access_coordinate ep_c[ACCESS_COORDINATE_MAX];
struct dsmas_entry *dent;
int valid_entries = 0;
unsigned long index;
int rc;
- rc = cxl_endpoint_get_perf_coordinates(port, &ep_c);
+ rc = cxl_endpoint_get_perf_coordinates(port, ep_c);
if (rc) {
dev_dbg(&port->dev, "Failed to retrieve ep perf coordinates.\n");
return rc;
}
- rc = cxl_hb_get_perf_coordinates(port, coord);
- if (rc) {
- dev_dbg(&port->dev, "Failed to retrieve hb perf coordinates.\n");
- return rc;
- }
-
struct cxl_root *cxl_root __free(put_cxl_root) = find_cxl_root(port);
if (!cxl_root)
@@ -193,18 +220,10 @@ static int cxl_port_perf_data_calculate(struct cxl_port *port,
xa_for_each(dsmas_xa, index, dent) {
int qos_class;
- cxl_coordinates_combine(&dent->coord, &dent->coord, &ep_c);
- /*
- * Keeping the host bridge coordinates separate from the dsmas
- * coordinates in order to allow calculation of access class
- * 0 and 1 for region later.
- */
- cxl_coordinates_combine(&coord[ACCESS_COORDINATE_CPU],
- &coord[ACCESS_COORDINATE_CPU],
- &dent->coord);
+ cxl_coordinates_combine(dent->coord, dent->coord, ep_c);
dent->entries = 1;
rc = cxl_root->ops->qos_class(cxl_root,
- &coord[ACCESS_COORDINATE_CPU],
+ &dent->coord[ACCESS_COORDINATE_CPU],
1, &qos_class);
if (rc != 1)
continue;
@@ -222,14 +241,17 @@ static int cxl_port_perf_data_calculate(struct cxl_port *port,
static void update_perf_entry(struct device *dev, struct dsmas_entry *dent,
struct cxl_dpa_perf *dpa_perf)
{
+ for (int i = 0; i < ACCESS_COORDINATE_MAX; i++)
+ dpa_perf->coord[i] = dent->coord[i];
dpa_perf->dpa_range = dent->dpa_range;
- dpa_perf->coord = dent->coord;
dpa_perf->qos_class = dent->qos_class;
dev_dbg(dev,
"DSMAS: dpa: %#llx qos: %d read_bw: %d write_bw %d read_lat: %d write_lat: %d\n",
dent->dpa_range.start, dpa_perf->qos_class,
- dent->coord.read_bandwidth, dent->coord.write_bandwidth,
- dent->coord.read_latency, dent->coord.write_latency);
+ dent->coord[ACCESS_COORDINATE_CPU].read_bandwidth,
+ dent->coord[ACCESS_COORDINATE_CPU].write_bandwidth,
+ dent->coord[ACCESS_COORDINATE_CPU].read_latency,
+ dent->coord[ACCESS_COORDINATE_CPU].write_latency);
}
static void cxl_memdev_set_qos_class(struct cxl_dev_state *cxlds,
@@ -461,17 +483,16 @@ static int cdat_sslbis_handler(union acpi_subtable_headers *header, void *arg,
le_base = (__force __le64)tbl->sslbis_header.entry_base_unit;
le_val = (__force __le16)tbl->entries[i].latency_or_bandwidth;
-
- if (check_mul_overflow(le64_to_cpu(le_base),
- le16_to_cpu(le_val), &val))
- dev_warn(dev, "SSLBIS value overflowed!\n");
+ val = cdat_normalize(le16_to_cpu(le_val), le64_to_cpu(le_base),
+ sslbis->data_type);
xa_for_each(&port->dports, index, dport) {
if (dsp_id == ACPI_CDAT_SSLBIS_ANY_PORT ||
- dsp_id == dport->port_id)
- cxl_access_coordinate_set(&dport->sw_coord,
+ dsp_id == dport->port_id) {
+ cxl_access_coordinate_set(dport->coord,
sslbis->data_type,
val);
+ }
}
}
@@ -493,6 +514,21 @@ void cxl_switch_parse_cdat(struct cxl_port *port)
}
EXPORT_SYMBOL_NS_GPL(cxl_switch_parse_cdat, CXL);
+static void __cxl_coordinates_combine(struct access_coordinate *out,
+ struct access_coordinate *c1,
+ struct access_coordinate *c2)
+{
+ if (c1->write_bandwidth && c2->write_bandwidth)
+ out->write_bandwidth = min(c1->write_bandwidth,
+ c2->write_bandwidth);
+ out->write_latency = c1->write_latency + c2->write_latency;
+
+ if (c1->read_bandwidth && c2->read_bandwidth)
+ out->read_bandwidth = min(c1->read_bandwidth,
+ c2->read_bandwidth);
+ out->read_latency = c1->read_latency + c2->read_latency;
+}
+
/**
* cxl_coordinates_combine - Combine the two input coordinates
*
@@ -504,15 +540,8 @@ void cxl_coordinates_combine(struct access_coordinate *out,
struct access_coordinate *c1,
struct access_coordinate *c2)
{
- if (c1->write_bandwidth && c2->write_bandwidth)
- out->write_bandwidth = min(c1->write_bandwidth,
- c2->write_bandwidth);
- out->write_latency = c1->write_latency + c2->write_latency;
-
- if (c1->read_bandwidth && c2->read_bandwidth)
- out->read_bandwidth = min(c1->read_bandwidth,
- c2->read_bandwidth);
- out->read_latency = c1->read_latency + c2->read_latency;
+ for (int i = 0; i < ACCESS_COORDINATE_MAX; i++)
+ __cxl_coordinates_combine(&out[i], &c1[i], &c2[i]);
}
MODULE_IMPORT_NS(CXL);
@@ -521,17 +550,13 @@ void cxl_region_perf_data_calculate(struct cxl_region *cxlr,
struct cxl_endpoint_decoder *cxled)
{
struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
- struct cxl_port *port = cxlmd->endpoint;
struct cxl_dev_state *cxlds = cxlmd->cxlds;
struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
- struct access_coordinate hb_coord[ACCESS_COORDINATE_MAX];
- struct access_coordinate coord;
struct range dpa = {
.start = cxled->dpa_res->start,
.end = cxled->dpa_res->end,
};
struct cxl_dpa_perf *perf;
- int rc;
switch (cxlr->mode) {
case CXL_DECODER_RAM:
@@ -549,35 +574,16 @@ void cxl_region_perf_data_calculate(struct cxl_region *cxlr,
if (!range_contains(&perf->dpa_range, &dpa))
return;
- rc = cxl_hb_get_perf_coordinates(port, hb_coord);
- if (rc) {
- dev_dbg(&port->dev, "Failed to retrieve hb perf coordinates.\n");
- return;
- }
-
for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
- /* Pickup the host bridge coords */
- cxl_coordinates_combine(&coord, &hb_coord[i], &perf->coord);
-
/* Get total bandwidth and the worst latency for the cxl region */
cxlr->coord[i].read_latency = max_t(unsigned int,
cxlr->coord[i].read_latency,
- coord.read_latency);
+ perf->coord[i].read_latency);
cxlr->coord[i].write_latency = max_t(unsigned int,
cxlr->coord[i].write_latency,
- coord.write_latency);
- cxlr->coord[i].read_bandwidth += coord.read_bandwidth;
- cxlr->coord[i].write_bandwidth += coord.write_bandwidth;
-
- /*
- * Convert latency to nanosec from picosec to be consistent
- * with the resulting latency coordinates computed by the
- * HMAT_REPORTING code.
- */
- cxlr->coord[i].read_latency =
- DIV_ROUND_UP(cxlr->coord[i].read_latency, 1000);
- cxlr->coord[i].write_latency =
- DIV_ROUND_UP(cxlr->coord[i].write_latency, 1000);
+ perf->coord[i].write_latency);
+ cxlr->coord[i].read_bandwidth += perf->coord[i].read_bandwidth;
+ cxlr->coord[i].write_bandwidth += perf->coord[i].write_bandwidth;
}
}
diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
index 9adda4795eb786..65185c9fa00134 100644
--- a/drivers/cxl/core/mbox.c
+++ b/drivers/cxl/core/mbox.c
@@ -915,7 +915,7 @@ static int cxl_clear_event_record(struct cxl_memdev_state *mds,
payload->handles[i++] = gen->hdr.handle;
dev_dbg(mds->cxlds.dev, "Event log '%d': Clearing %u\n", log,
- le16_to_cpu(payload->handles[i]));
+ le16_to_cpu(payload->handles[i - 1]));
if (i == max_handles) {
payload->nr_recs = i;
@@ -946,24 +946,22 @@ static void cxl_mem_get_records_log(struct cxl_memdev_state *mds,
struct cxl_memdev *cxlmd = mds->cxlds.cxlmd;
struct device *dev = mds->cxlds.dev;
struct cxl_get_event_payload *payload;
- struct cxl_mbox_cmd mbox_cmd;
u8 log_type = type;
u16 nr_rec;
mutex_lock(&mds->event.log_lock);
payload = mds->event.buf;
- mbox_cmd = (struct cxl_mbox_cmd) {
- .opcode = CXL_MBOX_OP_GET_EVENT_RECORD,
- .payload_in = &log_type,
- .size_in = sizeof(log_type),
- .payload_out = payload,
- .size_out = mds->payload_size,
- .min_out = struct_size(payload, records, 0),
- };
-
do {
int rc, i;
+ struct cxl_mbox_cmd mbox_cmd = (struct cxl_mbox_cmd) {
+ .opcode = CXL_MBOX_OP_GET_EVENT_RECORD,
+ .payload_in = &log_type,
+ .size_in = sizeof(log_type),
+ .payload_out = payload,
+ .size_out = mds->payload_size,
+ .min_out = struct_size(payload, records, 0),
+ };
rc = cxl_internal_send_cmd(mds, &mbox_cmd);
if (rc) {
@@ -1296,7 +1294,6 @@ int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len,
struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
struct cxl_mbox_poison_out *po;
struct cxl_mbox_poison_in pi;
- struct cxl_mbox_cmd mbox_cmd;
int nr_records = 0;
int rc;
@@ -1308,16 +1305,16 @@ int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len,
pi.offset = cpu_to_le64(offset);
pi.length = cpu_to_le64(len / CXL_POISON_LEN_MULT);
- mbox_cmd = (struct cxl_mbox_cmd) {
- .opcode = CXL_MBOX_OP_GET_POISON,
- .size_in = sizeof(pi),
- .payload_in = &pi,
- .size_out = mds->payload_size,
- .payload_out = po,
- .min_out = struct_size(po, record, 0),
- };
-
do {
+ struct cxl_mbox_cmd mbox_cmd = (struct cxl_mbox_cmd){
+ .opcode = CXL_MBOX_OP_GET_POISON,
+ .size_in = sizeof(pi),
+ .payload_in = &pi,
+ .size_out = mds->payload_size,
+ .payload_out = po,
+ .min_out = struct_size(po, record, 0),
+ };
+
rc = cxl_internal_send_cmd(mds, &mbox_cmd);
if (rc)
break;
diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
index 2b0cab556072f5..762783bb091afc 100644
--- a/drivers/cxl/core/port.c
+++ b/drivers/cxl/core/port.c
@@ -2133,36 +2133,44 @@ bool schedule_cxl_memdev_detach(struct cxl_memdev *cxlmd)
}
EXPORT_SYMBOL_NS_GPL(schedule_cxl_memdev_detach, CXL);
-/**
- * cxl_hb_get_perf_coordinates - Retrieve performance numbers between initiator
- * and host bridge
- *
- * @port: endpoint cxl_port
- * @coord: output access coordinates
- *
- * Return: errno on failure, 0 on success.
- */
-int cxl_hb_get_perf_coordinates(struct cxl_port *port,
- struct access_coordinate *coord)
+static void add_latency(struct access_coordinate *c, long latency)
{
- struct cxl_port *iter = port;
- struct cxl_dport *dport;
+ for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
+ c[i].write_latency += latency;
+ c[i].read_latency += latency;
+ }
+}
- if (!is_cxl_endpoint(port))
- return -EINVAL;
+static bool coordinates_valid(struct access_coordinate *c)
+{
+ for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
+ if (c[i].read_bandwidth && c[i].write_bandwidth &&
+ c[i].read_latency && c[i].write_latency)
+ continue;
+ return false;
+ }
- dport = iter->parent_dport;
- while (iter && !is_cxl_root(to_cxl_port(iter->dev.parent))) {
- iter = to_cxl_port(iter->dev.parent);
- dport = iter->parent_dport;
+ return true;
+}
+
+static void set_min_bandwidth(struct access_coordinate *c, unsigned int bw)
+{
+ for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
+ c[i].write_bandwidth = min(c[i].write_bandwidth, bw);
+ c[i].read_bandwidth = min(c[i].read_bandwidth, bw);
}
+}
- coord[ACCESS_COORDINATE_LOCAL] =
- dport->hb_coord[ACCESS_COORDINATE_LOCAL];
- coord[ACCESS_COORDINATE_CPU] =
- dport->hb_coord[ACCESS_COORDINATE_CPU];
+static void set_access_coordinates(struct access_coordinate *out,
+ struct access_coordinate *in)
+{
+ for (int i = 0; i < ACCESS_COORDINATE_MAX; i++)
+ out[i] = in[i];
+}
- return 0;
+static bool parent_port_is_cxl_root(struct cxl_port *port)
+{
+ return is_cxl_root(to_cxl_port(port->dev.parent));
}
/**
@@ -2176,35 +2184,53 @@ int cxl_hb_get_perf_coordinates(struct cxl_port *port,
int cxl_endpoint_get_perf_coordinates(struct cxl_port *port,
struct access_coordinate *coord)
{
- struct access_coordinate c = {
- .read_bandwidth = UINT_MAX,
- .write_bandwidth = UINT_MAX,
+ struct access_coordinate c[] = {
+ {
+ .read_bandwidth = UINT_MAX,
+ .write_bandwidth = UINT_MAX,
+ },
+ {
+ .read_bandwidth = UINT_MAX,
+ .write_bandwidth = UINT_MAX,
+ },
};
struct cxl_port *iter = port;
struct cxl_dport *dport;
struct pci_dev *pdev;
unsigned int bw;
+ bool is_cxl_root;
if (!is_cxl_endpoint(port))
return -EINVAL;
- dport = iter->parent_dport;
-
/*
- * Exit the loop when the parent port of the current port is cxl root.
- * The iterative loop starts at the endpoint and gathers the
- * latency of the CXL link from the current iter to the next downstream
- * port each iteration. If the parent is cxl root then there is
- * nothing to gather.
+ * Exit the loop when the parent port of the current iter port is cxl
+ * root. The iterative loop starts at the endpoint and gathers the
+ * latency of the CXL link from the current device/port to the connected
+ * downstream port each iteration.
*/
- while (iter && !is_cxl_root(to_cxl_port(iter->dev.parent))) {
- cxl_coordinates_combine(&c, &c, &dport->sw_coord);
- c.write_latency += dport->link_latency;
- c.read_latency += dport->link_latency;
-
- iter = to_cxl_port(iter->dev.parent);
+ do {
dport = iter->parent_dport;
- }
+ iter = to_cxl_port(iter->dev.parent);
+ is_cxl_root = parent_port_is_cxl_root(iter);
+
+ /*
+ * There's no valid access_coordinate for a root port since RPs do not
+ * have CDAT and therefore needs to be skipped.
+ */
+ if (!is_cxl_root) {
+ if (!coordinates_valid(dport->coord))
+ return -EINVAL;
+ cxl_coordinates_combine(c, c, dport->coord);
+ }
+ add_latency(c, dport->link_latency);
+ } while (!is_cxl_root);
+
+ dport = iter->parent_dport;
+ /* Retrieve HB coords */
+ if (!coordinates_valid(dport->coord))
+ return -EINVAL;
+ cxl_coordinates_combine(c, c, dport->coord);
/* Get the calculated PCI paths bandwidth */
pdev = to_pci_dev(port->uport_dev->parent);
@@ -2213,10 +2239,8 @@ int cxl_endpoint_get_perf_coordinates(struct cxl_port *port,
return -ENXIO;
bw /= BITS_PER_BYTE;
- c.write_bandwidth = min(c.write_bandwidth, bw);
- c.read_bandwidth = min(c.read_bandwidth, bw);
-
- *coord = c;
+ set_min_bandwidth(c, bw);
+ set_access_coordinates(coord, c);
return 0;
}
diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c
index 372786f809555f..3c42f984eeafaa 100644
--- a/drivers/cxl/core/regs.c
+++ b/drivers/cxl/core/regs.c
@@ -271,6 +271,7 @@ EXPORT_SYMBOL_NS_GPL(cxl_map_device_regs, CXL);
static bool cxl_decode_regblock(struct pci_dev *pdev, u32 reg_lo, u32 reg_hi,
struct cxl_register_map *map)
{
+ u8 reg_type = FIELD_GET(CXL_DVSEC_REG_LOCATOR_BLOCK_ID_MASK, reg_lo);
int bar = FIELD_GET(CXL_DVSEC_REG_LOCATOR_BIR_MASK, reg_lo);
u64 offset = ((u64)reg_hi << 32) |
(reg_lo & CXL_DVSEC_REG_LOCATOR_BLOCK_OFF_LOW_MASK);
@@ -278,11 +279,11 @@ static bool cxl_decode_regblock(struct pci_dev *pdev, u32 reg_lo, u32 reg_hi,
if (offset > pci_resource_len(pdev, bar)) {
dev_warn(&pdev->dev,
"BAR%d: %pr: too small (offset: %pa, type: %d)\n", bar,
- &pdev->resource[bar], &offset, map->reg_type);
+ &pdev->resource[bar], &offset, reg_type);
return false;
}
- map->reg_type = FIELD_GET(CXL_DVSEC_REG_LOCATOR_BLOCK_ID_MASK, reg_lo);
+ map->reg_type = reg_type;
map->resource = pci_resource_start(pdev, bar) + offset;
map->max_size = pci_resource_len(pdev, bar) - offset;
return true;
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index 534e25e2f0a481..036d17db68e006 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -663,8 +663,7 @@ struct cxl_rcrb_info {
* @rch: Indicate whether this dport was enumerated in RCH or VH mode
* @port: reference to cxl_port that contains this downstream port
* @regs: Dport parsed register blocks
- * @sw_coord: access coordinates (performance) for switch from CDAT
- * @hb_coord: access coordinates (performance) from ACPI generic port (host bridge)
+ * @coord: access coordinates (bandwidth and latency performance attributes)
* @link_latency: calculated PCIe downstream latency
*/
struct cxl_dport {
@@ -675,8 +674,7 @@ struct cxl_dport {
bool rch;
struct cxl_port *port;
struct cxl_regs regs;
- struct access_coordinate sw_coord;
- struct access_coordinate hb_coord[ACCESS_COORDINATE_MAX];
+ struct access_coordinate coord[ACCESS_COORDINATE_MAX];
long link_latency;
};
@@ -884,8 +882,6 @@ void cxl_switch_parse_cdat(struct cxl_port *port);
int cxl_endpoint_get_perf_coordinates(struct cxl_port *port,
struct access_coordinate *coord);
-int cxl_hb_get_perf_coordinates(struct cxl_port *port,
- struct access_coordinate *coord);
void cxl_region_perf_data_calculate(struct cxl_region *cxlr,
struct cxl_endpoint_decoder *cxled);
diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
index 20fb3b35e89e04..36cee9c30cebd2 100644
--- a/drivers/cxl/cxlmem.h
+++ b/drivers/cxl/cxlmem.h
@@ -401,7 +401,7 @@ enum cxl_devtype {
*/
struct cxl_dpa_perf {
struct range dpa_range;
- struct access_coordinate coord;
+ struct access_coordinate coord[ACCESS_COORDINATE_MAX];
int qos_class;
};
diff --git a/drivers/dax/device.c b/drivers/dax/device.c
index 93ebedc5ec8ca3..47c126d37b59ab 100644
--- a/drivers/dax/device.c
+++ b/drivers/dax/device.c
@@ -329,14 +329,14 @@ static unsigned long dax_get_unmapped_area(struct file *filp,
if ((off + len_align) < off)
goto out;
- addr_align = current->mm->get_unmapped_area(filp, addr, len_align,
- pgoff, flags);
+ addr_align = mm_get_unmapped_area(current->mm, filp, addr, len_align,
+ pgoff, flags);
if (!IS_ERR_VALUE(addr_align)) {
addr_align += (off - addr_align) & (align - 1);
return addr_align;
}
out:
- return current->mm->get_unmapped_area(filp, addr, len, pgoff, flags);
+ return mm_get_unmapped_area(current->mm, filp, addr, len, pgoff, flags);
}
static const struct address_space_operations dev_dax_aops = {
diff --git a/drivers/dax/kmem.c b/drivers/dax/kmem.c
index 42ee360cf4e3db..4fe9d040e37545 100644
--- a/drivers/dax/kmem.c
+++ b/drivers/dax/kmem.c
@@ -55,36 +55,14 @@ static LIST_HEAD(kmem_memory_types);
static struct memory_dev_type *kmem_find_alloc_memory_type(int adist)
{
- bool found = false;
- struct memory_dev_type *mtype;
-
- mutex_lock(&kmem_memory_type_lock);
- list_for_each_entry(mtype, &kmem_memory_types, list) {
- if (mtype->adistance == adist) {
- found = true;
- break;
- }
- }
- if (!found) {
- mtype = alloc_memory_type(adist);
- if (!IS_ERR(mtype))
- list_add(&mtype->list, &kmem_memory_types);
- }
- mutex_unlock(&kmem_memory_type_lock);
-
- return mtype;
+ guard(mutex)(&kmem_memory_type_lock);
+ return mt_find_alloc_memory_type(adist, &kmem_memory_types);
}
static void kmem_put_memory_types(void)
{
- struct memory_dev_type *mtype, *mtn;
-
- mutex_lock(&kmem_memory_type_lock);
- list_for_each_entry_safe(mtype, mtn, &kmem_memory_types, list) {
- list_del(&mtype->list);
- put_memory_type(mtype);
- }
- mutex_unlock(&kmem_memory_type_lock);
+ guard(mutex)(&kmem_memory_type_lock);
+ mt_put_memory_types(&kmem_memory_types);
}
static int dev_dax_kmem_probe(struct dev_dax *dev_dax)
diff --git a/drivers/dma-buf/st-dma-fence-chain.c b/drivers/dma-buf/st-dma-fence-chain.c
index 9c2a0c082a768f..ed4b323886e430 100644
--- a/drivers/dma-buf/st-dma-fence-chain.c
+++ b/drivers/dma-buf/st-dma-fence-chain.c
@@ -84,11 +84,11 @@ static int sanitycheck(void *arg)
return -ENOMEM;
chain = mock_chain(NULL, f, 1);
- if (!chain)
+ if (chain)
+ dma_fence_enable_sw_signaling(chain);
+ else
err = -ENOMEM;
- dma_fence_enable_sw_signaling(chain);
-
dma_fence_signal(f);
dma_fence_put(f);
diff --git a/drivers/dma/idma64.c b/drivers/dma/idma64.c
index 78a938969d7d76..1398814d8fbb63 100644
--- a/drivers/dma/idma64.c
+++ b/drivers/dma/idma64.c
@@ -171,6 +171,10 @@ static irqreturn_t idma64_irq(int irq, void *dev)
u32 status_err;
unsigned short i;
+ /* Since IRQ may be shared, check if DMA controller is powered on */
+ if (status == GENMASK(31, 0))
+ return IRQ_NONE;
+
dev_vdbg(idma64->dma.dev, "%s: status=%#x\n", __func__, status);
/* Check if we have any interrupt from the DMA controller */
diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c
index 8078ab9acfbc37..c095a2c8f65956 100644
--- a/drivers/dma/idxd/cdev.c
+++ b/drivers/dma/idxd/cdev.c
@@ -342,7 +342,7 @@ static void idxd_cdev_evl_drain_pasid(struct idxd_wq *wq, u32 pasid)
if (!evl)
return;
- spin_lock(&evl->lock);
+ mutex_lock(&evl->lock);
status.bits = ioread64(idxd->reg_base + IDXD_EVLSTATUS_OFFSET);
t = status.tail;
h = status.head;
@@ -354,9 +354,8 @@ static void idxd_cdev_evl_drain_pasid(struct idxd_wq *wq, u32 pasid)
set_bit(h, evl->bmap);
h = (h + 1) % size;
}
- spin_unlock(&evl->lock);
-
drain_workqueue(wq->wq);
+ mutex_unlock(&evl->lock);
}
static int idxd_cdev_release(struct inode *node, struct file *filep)
diff --git a/drivers/dma/idxd/debugfs.c b/drivers/dma/idxd/debugfs.c
index f3f25ee676f30e..ad4245cb301d50 100644
--- a/drivers/dma/idxd/debugfs.c
+++ b/drivers/dma/idxd/debugfs.c
@@ -66,7 +66,7 @@ static int debugfs_evl_show(struct seq_file *s, void *d)
if (!evl || !evl->log)
return 0;
- spin_lock(&evl->lock);
+ mutex_lock(&evl->lock);
evl_status.bits = ioread64(idxd->reg_base + IDXD_EVLSTATUS_OFFSET);
t = evl_status.tail;
@@ -87,7 +87,7 @@ static int debugfs_evl_show(struct seq_file *s, void *d)
dump_event_entry(idxd, s, i, &count, processed);
}
- spin_unlock(&evl->lock);
+ mutex_unlock(&evl->lock);
return 0;
}
diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c
index ecfdf4a8f1f838..c41ef195eeb9f2 100644
--- a/drivers/dma/idxd/device.c
+++ b/drivers/dma/idxd/device.c
@@ -775,7 +775,7 @@ static int idxd_device_evl_setup(struct idxd_device *idxd)
goto err_alloc;
}
- spin_lock(&evl->lock);
+ mutex_lock(&evl->lock);
evl->log = addr;
evl->dma = dma_addr;
evl->log_size = size;
@@ -796,7 +796,7 @@ static int idxd_device_evl_setup(struct idxd_device *idxd)
gencfg.evl_en = 1;
iowrite32(gencfg.bits, idxd->reg_base + IDXD_GENCFG_OFFSET);
- spin_unlock(&evl->lock);
+ mutex_unlock(&evl->lock);
return 0;
err_alloc:
@@ -819,7 +819,7 @@ static void idxd_device_evl_free(struct idxd_device *idxd)
if (!gencfg.evl_en)
return;
- spin_lock(&evl->lock);
+ mutex_lock(&evl->lock);
gencfg.evl_en = 0;
iowrite32(gencfg.bits, idxd->reg_base + IDXD_GENCFG_OFFSET);
@@ -836,7 +836,7 @@ static void idxd_device_evl_free(struct idxd_device *idxd)
evl_dma = evl->dma;
evl->log = NULL;
evl->size = IDXD_EVL_SIZE_MIN;
- spin_unlock(&evl->lock);
+ mutex_unlock(&evl->lock);
dma_free_coherent(dev, evl_log_size, evl_log, evl_dma);
}
diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h
index a4099a1e2340fd..7b98944135eb44 100644
--- a/drivers/dma/idxd/idxd.h
+++ b/drivers/dma/idxd/idxd.h
@@ -293,7 +293,7 @@ struct idxd_driver_data {
struct idxd_evl {
/* Lock to protect event log access. */
- spinlock_t lock;
+ struct mutex lock;
void *log;
dma_addr_t dma;
/* Total size of event log = number of entries * entry size. */
diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c
index 4954adc6bb609e..264c4e47d7cca5 100644
--- a/drivers/dma/idxd/init.c
+++ b/drivers/dma/idxd/init.c
@@ -354,7 +354,7 @@ static int idxd_init_evl(struct idxd_device *idxd)
if (!evl)
return -ENOMEM;
- spin_lock_init(&evl->lock);
+ mutex_init(&evl->lock);
evl->size = IDXD_EVL_SIZE_MIN;
idxd_name = dev_name(idxd_confdev(idxd));
diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c
index 348aa21389a9fc..8dc029c8655151 100644
--- a/drivers/dma/idxd/irq.c
+++ b/drivers/dma/idxd/irq.c
@@ -363,7 +363,7 @@ static void process_evl_entries(struct idxd_device *idxd)
evl_status.bits = 0;
evl_status.int_pending = 1;
- spin_lock(&evl->lock);
+ mutex_lock(&evl->lock);
/* Clear interrupt pending bit */
iowrite32(evl_status.bits_upper32,
idxd->reg_base + IDXD_EVLSTATUS_OFFSET + sizeof(u32));
@@ -380,7 +380,7 @@ static void process_evl_entries(struct idxd_device *idxd)
evl_status.head = h;
iowrite32(evl_status.bits_lower32, idxd->reg_base + IDXD_EVLSTATUS_OFFSET);
- spin_unlock(&evl->lock);
+ mutex_unlock(&evl->lock);
}
irqreturn_t idxd_misc_thread(int vec, void *data)
diff --git a/drivers/dma/idxd/perfmon.c b/drivers/dma/idxd/perfmon.c
index fdda6d60426295..5e94247e1ea703 100644
--- a/drivers/dma/idxd/perfmon.c
+++ b/drivers/dma/idxd/perfmon.c
@@ -528,14 +528,11 @@ static int perf_event_cpu_offline(unsigned int cpu, struct hlist_node *node)
return 0;
target = cpumask_any_but(cpu_online_mask, cpu);
-
/* migrate events if there is a valid target */
- if (target < nr_cpu_ids)
+ if (target < nr_cpu_ids) {
cpumask_set_cpu(target, &perfmon_dsa_cpu_mask);
- else
- target = -1;
-
- perf_pmu_migrate_context(&idxd_pmu->pmu, cpu, target);
+ perf_pmu_migrate_context(&idxd_pmu->pmu, cpu, target);
+ }
return 0;
}
diff --git a/drivers/dma/owl-dma.c b/drivers/dma/owl-dma.c
index 4e76c4ec2d3966..e001f4f7aa640f 100644
--- a/drivers/dma/owl-dma.c
+++ b/drivers/dma/owl-dma.c
@@ -250,7 +250,7 @@ static void pchan_update(struct owl_dma_pchan *pchan, u32 reg,
else
regval &= ~val;
- writel(val, pchan->base + reg);
+ writel(regval, pchan->base + reg);
}
static void pchan_writel(struct owl_dma_pchan *pchan, u32 reg, u32 data)
@@ -274,7 +274,7 @@ static void dma_update(struct owl_dma *od, u32 reg, u32 val, bool state)
else
regval &= ~val;
- writel(val, od->base + reg);
+ writel(regval, od->base + reg);
}
static void dma_writel(struct owl_dma *od, u32 reg, u32 data)
diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
index 5f6d7f1e095f90..ad8e3da1b2cd22 100644
--- a/drivers/dma/pl330.c
+++ b/drivers/dma/pl330.c
@@ -1053,9 +1053,6 @@ static bool _trigger(struct pl330_thread *thrd)
thrd->req_running = idx;
- if (desc->rqtype == DMA_MEM_TO_DEV || desc->rqtype == DMA_DEV_TO_MEM)
- UNTIL(thrd, PL330_STATE_WFP);
-
return true;
}
diff --git a/drivers/dma/tegra186-gpc-dma.c b/drivers/dma/tegra186-gpc-dma.c
index 88547a23825b18..3642508e88bb22 100644
--- a/drivers/dma/tegra186-gpc-dma.c
+++ b/drivers/dma/tegra186-gpc-dma.c
@@ -746,6 +746,9 @@ static int tegra_dma_get_residual(struct tegra_dma_channel *tdc)
bytes_xfer = dma_desc->bytes_xfer +
sg_req[dma_desc->sg_idx].len - (wcount * 4);
+ if (dma_desc->bytes_req == bytes_xfer)
+ return 0;
+
residual = dma_desc->bytes_req - (bytes_xfer % dma_desc->bytes_req);
return residual;
diff --git a/drivers/dma/xilinx/xdma-regs.h b/drivers/dma/xilinx/xdma-regs.h
index 98f5f6fb9ff9c7..6ad08878e93862 100644
--- a/drivers/dma/xilinx/xdma-regs.h
+++ b/drivers/dma/xilinx/xdma-regs.h
@@ -117,6 +117,9 @@ struct xdma_hw_desc {
CHAN_CTRL_IE_WRITE_ERROR | \
CHAN_CTRL_IE_DESC_ERROR)
+/* bits of the channel status register */
+#define XDMA_CHAN_STATUS_BUSY BIT(0)
+
#define XDMA_CHAN_STATUS_MASK CHAN_CTRL_START
#define XDMA_CHAN_ERROR_MASK (CHAN_CTRL_IE_DESC_ALIGN_MISMATCH | \
diff --git a/drivers/dma/xilinx/xdma.c b/drivers/dma/xilinx/xdma.c
index 170017ff2aad6e..313b217388fe95 100644
--- a/drivers/dma/xilinx/xdma.c
+++ b/drivers/dma/xilinx/xdma.c
@@ -71,6 +71,8 @@ struct xdma_chan {
enum dma_transfer_direction dir;
struct dma_slave_config cfg;
u32 irq;
+ struct completion last_interrupt;
+ bool stop_requested;
};
/**
@@ -376,6 +378,8 @@ static int xdma_xfer_start(struct xdma_chan *xchan)
return ret;
xchan->busy = true;
+ xchan->stop_requested = false;
+ reinit_completion(&xchan->last_interrupt);
return 0;
}
@@ -387,7 +391,6 @@ static int xdma_xfer_start(struct xdma_chan *xchan)
static int xdma_xfer_stop(struct xdma_chan *xchan)
{
int ret;
- u32 val;
struct xdma_device *xdev = xchan->xdev_hdl;
/* clear run stop bit to prevent any further auto-triggering */
@@ -395,13 +398,7 @@ static int xdma_xfer_stop(struct xdma_chan *xchan)
CHAN_CTRL_RUN_STOP);
if (ret)
return ret;
-
- /* Clear the channel status register */
- ret = regmap_read(xdev->rmap, xchan->base + XDMA_CHAN_STATUS_RC, &val);
- if (ret)
- return ret;
-
- return 0;
+ return ret;
}
/**
@@ -474,6 +471,8 @@ static int xdma_alloc_channels(struct xdma_device *xdev,
xchan->xdev_hdl = xdev;
xchan->base = base + i * XDMA_CHAN_STRIDE;
xchan->dir = dir;
+ xchan->stop_requested = false;
+ init_completion(&xchan->last_interrupt);
ret = xdma_channel_init(xchan);
if (ret)
@@ -521,6 +520,7 @@ static int xdma_terminate_all(struct dma_chan *chan)
spin_lock_irqsave(&xdma_chan->vchan.lock, flags);
xdma_chan->busy = false;
+ xdma_chan->stop_requested = true;
vd = vchan_next_desc(&xdma_chan->vchan);
if (vd) {
list_del(&vd->node);
@@ -542,17 +542,26 @@ static int xdma_terminate_all(struct dma_chan *chan)
static void xdma_synchronize(struct dma_chan *chan)
{
struct xdma_chan *xdma_chan = to_xdma_chan(chan);
+ struct xdma_device *xdev = xdma_chan->xdev_hdl;
+ int st = 0;
+
+ /* If the engine continues running, wait for the last interrupt */
+ regmap_read(xdev->rmap, xdma_chan->base + XDMA_CHAN_STATUS, &st);
+ if (st & XDMA_CHAN_STATUS_BUSY)
+ wait_for_completion_timeout(&xdma_chan->last_interrupt, msecs_to_jiffies(1000));
vchan_synchronize(&xdma_chan->vchan);
}
/**
- * xdma_fill_descs - Fill hardware descriptors with contiguous memory block addresses
- * @sw_desc: tx descriptor state container
- * @src_addr: Value for a ->src_addr field of a first descriptor
- * @dst_addr: Value for a ->dst_addr field of a first descriptor
- * @size: Total size of a contiguous memory block
- * @filled_descs_num: Number of filled hardware descriptors for corresponding sw_desc
+ * xdma_fill_descs() - Fill hardware descriptors for one contiguous memory chunk.
+ * More than one descriptor will be used if the size is bigger
+ * than XDMA_DESC_BLEN_MAX.
+ * @sw_desc: Descriptor container
+ * @src_addr: First value for the ->src_addr field
+ * @dst_addr: First value for the ->dst_addr field
+ * @size: Size of the contiguous memory block
+ * @filled_descs_num: Index of the first descriptor to take care of in @sw_desc
*/
static inline u32 xdma_fill_descs(struct xdma_desc *sw_desc, u64 src_addr,
u64 dst_addr, u32 size, u32 filled_descs_num)
@@ -704,7 +713,7 @@ xdma_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t address,
desc_num = 0;
for (i = 0; i < periods; i++) {
desc_num += xdma_fill_descs(sw_desc, *src, *dst, period_size, desc_num);
- addr += i * period_size;
+ addr += period_size;
}
tx_desc = vchan_tx_prep(&xdma_chan->vchan, &sw_desc->vdesc, flags);
@@ -876,6 +885,9 @@ static irqreturn_t xdma_channel_isr(int irq, void *dev_id)
u32 st;
bool repeat_tx;
+ if (xchan->stop_requested)
+ complete(&xchan->last_interrupt);
+
spin_lock(&xchan->vchan.lock);
/* get submitted request */
diff --git a/drivers/dma/xilinx/xilinx_dpdma.c b/drivers/dma/xilinx/xilinx_dpdma.c
index b82815e64d24e8..eb0637d90342a6 100644
--- a/drivers/dma/xilinx/xilinx_dpdma.c
+++ b/drivers/dma/xilinx/xilinx_dpdma.c
@@ -214,7 +214,8 @@ struct xilinx_dpdma_tx_desc {
* @running: true if the channel is running
* @first_frame: flag for the first frame of stream
* @video_group: flag if multi-channel operation is needed for video channels
- * @lock: lock to access struct xilinx_dpdma_chan
+ * @lock: lock to access struct xilinx_dpdma_chan. Must be taken before
+ * @vchan.lock, if both are to be held.
* @desc_pool: descriptor allocation pool
* @err_task: error IRQ bottom half handler
* @desc: References to descriptors being processed
@@ -1097,12 +1098,14 @@ static void xilinx_dpdma_chan_vsync_irq(struct xilinx_dpdma_chan *chan)
* Complete the active descriptor, if any, promote the pending
* descriptor to active, and queue the next transfer, if any.
*/
+ spin_lock(&chan->vchan.lock);
if (chan->desc.active)
vchan_cookie_complete(&chan->desc.active->vdesc);
chan->desc.active = pending;
chan->desc.pending = NULL;
xilinx_dpdma_chan_queue_transfer(chan);
+ spin_unlock(&chan->vchan.lock);
out:
spin_unlock_irqrestore(&chan->lock, flags);
@@ -1264,10 +1267,12 @@ static void xilinx_dpdma_issue_pending(struct dma_chan *dchan)
struct xilinx_dpdma_chan *chan = to_xilinx_chan(dchan);
unsigned long flags;
- spin_lock_irqsave(&chan->vchan.lock, flags);
+ spin_lock_irqsave(&chan->lock, flags);
+ spin_lock(&chan->vchan.lock);
if (vchan_issue_pending(&chan->vchan))
xilinx_dpdma_chan_queue_transfer(chan);
- spin_unlock_irqrestore(&chan->vchan.lock, flags);
+ spin_unlock(&chan->vchan.lock);
+ spin_unlock_irqrestore(&chan->lock, flags);
}
static int xilinx_dpdma_config(struct dma_chan *dchan,
@@ -1495,7 +1500,9 @@ static void xilinx_dpdma_chan_err_task(struct tasklet_struct *t)
XILINX_DPDMA_EINTR_CHAN_ERR_MASK << chan->id);
spin_lock_irqsave(&chan->lock, flags);
+ spin_lock(&chan->vchan.lock);
xilinx_dpdma_chan_queue_transfer(chan);
+ spin_unlock(&chan->vchan.lock);
spin_unlock_irqrestore(&chan->lock, flags);
}
diff --git a/drivers/dpll/Kconfig b/drivers/dpll/Kconfig
index a4cae73f20d3d0..20607ed5424358 100644
--- a/drivers/dpll/Kconfig
+++ b/drivers/dpll/Kconfig
@@ -4,4 +4,4 @@
#
config DPLL
- bool
+ bool
diff --git a/drivers/dpll/dpll_core.c b/drivers/dpll/dpll_core.c
index 64eaca80d736c5..d0f6693ca14262 100644
--- a/drivers/dpll/dpll_core.c
+++ b/drivers/dpll/dpll_core.c
@@ -42,6 +42,7 @@ struct dpll_pin_registration {
struct list_head list;
const struct dpll_pin_ops *ops;
void *priv;
+ void *cookie;
};
struct dpll_device *dpll_device_get_by_id(int id)
@@ -54,12 +55,14 @@ struct dpll_device *dpll_device_get_by_id(int id)
static struct dpll_pin_registration *
dpll_pin_registration_find(struct dpll_pin_ref *ref,
- const struct dpll_pin_ops *ops, void *priv)
+ const struct dpll_pin_ops *ops, void *priv,
+ void *cookie)
{
struct dpll_pin_registration *reg;
list_for_each_entry(reg, &ref->registration_list, list) {
- if (reg->ops == ops && reg->priv == priv)
+ if (reg->ops == ops && reg->priv == priv &&
+ reg->cookie == cookie)
return reg;
}
return NULL;
@@ -67,7 +70,8 @@ dpll_pin_registration_find(struct dpll_pin_ref *ref,
static int
dpll_xa_ref_pin_add(struct xarray *xa_pins, struct dpll_pin *pin,
- const struct dpll_pin_ops *ops, void *priv)
+ const struct dpll_pin_ops *ops, void *priv,
+ void *cookie)
{
struct dpll_pin_registration *reg;
struct dpll_pin_ref *ref;
@@ -78,7 +82,7 @@ dpll_xa_ref_pin_add(struct xarray *xa_pins, struct dpll_pin *pin,
xa_for_each(xa_pins, i, ref) {
if (ref->pin != pin)
continue;
- reg = dpll_pin_registration_find(ref, ops, priv);
+ reg = dpll_pin_registration_find(ref, ops, priv, cookie);
if (reg) {
refcount_inc(&ref->refcount);
return 0;
@@ -111,6 +115,7 @@ dpll_xa_ref_pin_add(struct xarray *xa_pins, struct dpll_pin *pin,
}
reg->ops = ops;
reg->priv = priv;
+ reg->cookie = cookie;
if (ref_exists)
refcount_inc(&ref->refcount);
list_add_tail(&reg->list, &ref->registration_list);
@@ -119,7 +124,8 @@ dpll_xa_ref_pin_add(struct xarray *xa_pins, struct dpll_pin *pin,
}
static int dpll_xa_ref_pin_del(struct xarray *xa_pins, struct dpll_pin *pin,
- const struct dpll_pin_ops *ops, void *priv)
+ const struct dpll_pin_ops *ops, void *priv,
+ void *cookie)
{
struct dpll_pin_registration *reg;
struct dpll_pin_ref *ref;
@@ -128,7 +134,7 @@ static int dpll_xa_ref_pin_del(struct xarray *xa_pins, struct dpll_pin *pin,
xa_for_each(xa_pins, i, ref) {
if (ref->pin != pin)
continue;
- reg = dpll_pin_registration_find(ref, ops, priv);
+ reg = dpll_pin_registration_find(ref, ops, priv, cookie);
if (WARN_ON(!reg))
return -EINVAL;
list_del(&reg->list);
@@ -146,7 +152,7 @@ static int dpll_xa_ref_pin_del(struct xarray *xa_pins, struct dpll_pin *pin,
static int
dpll_xa_ref_dpll_add(struct xarray *xa_dplls, struct dpll_device *dpll,
- const struct dpll_pin_ops *ops, void *priv)
+ const struct dpll_pin_ops *ops, void *priv, void *cookie)
{
struct dpll_pin_registration *reg;
struct dpll_pin_ref *ref;
@@ -157,7 +163,7 @@ dpll_xa_ref_dpll_add(struct xarray *xa_dplls, struct dpll_device *dpll,
xa_for_each(xa_dplls, i, ref) {
if (ref->dpll != dpll)
continue;
- reg = dpll_pin_registration_find(ref, ops, priv);
+ reg = dpll_pin_registration_find(ref, ops, priv, cookie);
if (reg) {
refcount_inc(&ref->refcount);
return 0;
@@ -190,6 +196,7 @@ dpll_xa_ref_dpll_add(struct xarray *xa_dplls, struct dpll_device *dpll,
}
reg->ops = ops;
reg->priv = priv;
+ reg->cookie = cookie;
if (ref_exists)
refcount_inc(&ref->refcount);
list_add_tail(&reg->list, &ref->registration_list);
@@ -199,7 +206,7 @@ dpll_xa_ref_dpll_add(struct xarray *xa_dplls, struct dpll_device *dpll,
static void
dpll_xa_ref_dpll_del(struct xarray *xa_dplls, struct dpll_device *dpll,
- const struct dpll_pin_ops *ops, void *priv)
+ const struct dpll_pin_ops *ops, void *priv, void *cookie)
{
struct dpll_pin_registration *reg;
struct dpll_pin_ref *ref;
@@ -208,7 +215,7 @@ dpll_xa_ref_dpll_del(struct xarray *xa_dplls, struct dpll_device *dpll,
xa_for_each(xa_dplls, i, ref) {
if (ref->dpll != dpll)
continue;
- reg = dpll_pin_registration_find(ref, ops, priv);
+ reg = dpll_pin_registration_find(ref, ops, priv, cookie);
if (WARN_ON(!reg))
return;
list_del(&reg->list);
@@ -594,14 +601,14 @@ EXPORT_SYMBOL_GPL(dpll_pin_put);
static int
__dpll_pin_register(struct dpll_device *dpll, struct dpll_pin *pin,
- const struct dpll_pin_ops *ops, void *priv)
+ const struct dpll_pin_ops *ops, void *priv, void *cookie)
{
int ret;
- ret = dpll_xa_ref_pin_add(&dpll->pin_refs, pin, ops, priv);
+ ret = dpll_xa_ref_pin_add(&dpll->pin_refs, pin, ops, priv, cookie);
if (ret)
return ret;
- ret = dpll_xa_ref_dpll_add(&pin->dpll_refs, dpll, ops, priv);
+ ret = dpll_xa_ref_dpll_add(&pin->dpll_refs, dpll, ops, priv, cookie);
if (ret)
goto ref_pin_del;
xa_set_mark(&dpll_pin_xa, pin->id, DPLL_REGISTERED);
@@ -610,7 +617,7 @@ __dpll_pin_register(struct dpll_device *dpll, struct dpll_pin *pin,
return ret;
ref_pin_del:
- dpll_xa_ref_pin_del(&dpll->pin_refs, pin, ops, priv);
+ dpll_xa_ref_pin_del(&dpll->pin_refs, pin, ops, priv, cookie);
return ret;
}
@@ -642,7 +649,7 @@ dpll_pin_register(struct dpll_device *dpll, struct dpll_pin *pin,
dpll->clock_id == pin->clock_id)))
ret = -EINVAL;
else
- ret = __dpll_pin_register(dpll, pin, ops, priv);
+ ret = __dpll_pin_register(dpll, pin, ops, priv, NULL);
mutex_unlock(&dpll_lock);
return ret;
@@ -651,11 +658,11 @@ EXPORT_SYMBOL_GPL(dpll_pin_register);
static void
__dpll_pin_unregister(struct dpll_device *dpll, struct dpll_pin *pin,
- const struct dpll_pin_ops *ops, void *priv)
+ const struct dpll_pin_ops *ops, void *priv, void *cookie)
{
ASSERT_DPLL_PIN_REGISTERED(pin);
- dpll_xa_ref_pin_del(&dpll->pin_refs, pin, ops, priv);
- dpll_xa_ref_dpll_del(&pin->dpll_refs, dpll, ops, priv);
+ dpll_xa_ref_pin_del(&dpll->pin_refs, pin, ops, priv, cookie);
+ dpll_xa_ref_dpll_del(&pin->dpll_refs, dpll, ops, priv, cookie);
if (xa_empty(&pin->dpll_refs))
xa_clear_mark(&dpll_pin_xa, pin->id, DPLL_REGISTERED);
}
@@ -680,7 +687,7 @@ void dpll_pin_unregister(struct dpll_device *dpll, struct dpll_pin *pin,
mutex_lock(&dpll_lock);
dpll_pin_delete_ntf(pin);
- __dpll_pin_unregister(dpll, pin, ops, priv);
+ __dpll_pin_unregister(dpll, pin, ops, priv, NULL);
mutex_unlock(&dpll_lock);
}
EXPORT_SYMBOL_GPL(dpll_pin_unregister);
@@ -716,12 +723,12 @@ int dpll_pin_on_pin_register(struct dpll_pin *parent, struct dpll_pin *pin,
return -EINVAL;
mutex_lock(&dpll_lock);
- ret = dpll_xa_ref_pin_add(&pin->parent_refs, parent, ops, priv);
+ ret = dpll_xa_ref_pin_add(&pin->parent_refs, parent, ops, priv, pin);
if (ret)
goto unlock;
refcount_inc(&pin->refcount);
xa_for_each(&parent->dpll_refs, i, ref) {
- ret = __dpll_pin_register(ref->dpll, pin, ops, priv);
+ ret = __dpll_pin_register(ref->dpll, pin, ops, priv, parent);
if (ret) {
stop = i;
goto dpll_unregister;
@@ -735,11 +742,12 @@ int dpll_pin_on_pin_register(struct dpll_pin *parent, struct dpll_pin *pin,
dpll_unregister:
xa_for_each(&parent->dpll_refs, i, ref)
if (i < stop) {
- __dpll_pin_unregister(ref->dpll, pin, ops, priv);
+ __dpll_pin_unregister(ref->dpll, pin, ops, priv,
+ parent);
dpll_pin_delete_ntf(pin);
}
refcount_dec(&pin->refcount);
- dpll_xa_ref_pin_del(&pin->parent_refs, parent, ops, priv);
+ dpll_xa_ref_pin_del(&pin->parent_refs, parent, ops, priv, pin);
unlock:
mutex_unlock(&dpll_lock);
return ret;
@@ -764,10 +772,10 @@ void dpll_pin_on_pin_unregister(struct dpll_pin *parent, struct dpll_pin *pin,
mutex_lock(&dpll_lock);
dpll_pin_delete_ntf(pin);
- dpll_xa_ref_pin_del(&pin->parent_refs, parent, ops, priv);
+ dpll_xa_ref_pin_del(&pin->parent_refs, parent, ops, priv, pin);
refcount_dec(&pin->refcount);
xa_for_each(&pin->dpll_refs, i, ref)
- __dpll_pin_unregister(ref->dpll, pin, ops, priv);
+ __dpll_pin_unregister(ref->dpll, pin, ops, priv, parent);
mutex_unlock(&dpll_lock);
}
EXPORT_SYMBOL_GPL(dpll_pin_on_pin_unregister);
diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c
index 7bc71f4be64a07..38d19410a2be68 100644
--- a/drivers/firewire/ohci.c
+++ b/drivers/firewire/ohci.c
@@ -2060,6 +2060,8 @@ static void bus_reset_work(struct work_struct *work)
ohci->generation = generation;
reg_write(ohci, OHCI1394_IntEventClear, OHCI1394_busReset);
+ if (param_debug & OHCI_PARAM_DEBUG_BUSRESETS)
+ reg_write(ohci, OHCI1394_IntMaskSet, OHCI1394_busReset);
if (ohci->quirks & QUIRK_RESET_PACKET)
ohci->request_generation = generation;
@@ -2125,12 +2127,14 @@ static irqreturn_t irq_handler(int irq, void *data)
return IRQ_NONE;
/*
- * busReset and postedWriteErr must not be cleared yet
+ * busReset and postedWriteErr events must not be cleared yet
* (OHCI 1.1 clauses 7.2.3.2 and 13.2.8.1)
*/
reg_write(ohci, OHCI1394_IntEventClear,
event & ~(OHCI1394_busReset | OHCI1394_postedWriteErr));
log_irqs(ohci, event);
+ if (event & OHCI1394_busReset)
+ reg_write(ohci, OHCI1394_IntMaskClear, OHCI1394_busReset);
if (event & OHCI1394_selfIDComplete)
queue_work(selfid_workqueue, &ohci->bus_reset_work);
diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c
index f2556a8e940156..9bc2e10381afd9 100644
--- a/drivers/firmware/arm_ffa/driver.c
+++ b/drivers/firmware/arm_ffa/driver.c
@@ -790,7 +790,7 @@ static void ffa_notification_info_get(void)
part_id = packed_id_list[ids_processed++];
- if (!ids_count[list]) { /* Global Notification */
+ if (ids_count[list] == 1) { /* Global Notification */
__do_sched_recv_cb(part_id, 0, false);
continue;
}
diff --git a/drivers/firmware/arm_scmi/powercap.c b/drivers/firmware/arm_scmi/powercap.c
index ea9201e7044cbd..1fa79bba492e88 100644
--- a/drivers/firmware/arm_scmi/powercap.c
+++ b/drivers/firmware/arm_scmi/powercap.c
@@ -736,7 +736,7 @@ static void scmi_powercap_domain_init_fc(const struct scmi_protocol_handle *ph,
ph->hops->fastchannel_init(ph, POWERCAP_DESCRIBE_FASTCHANNEL,
POWERCAP_PAI_GET, 4, domain,
&fc[POWERCAP_FC_PAI].get_addr, NULL,
- &fc[POWERCAP_PAI_GET].rate_limit);
+ &fc[POWERCAP_FC_PAI].rate_limit);
*p_fc = fc;
}
diff --git a/drivers/firmware/arm_scmi/raw_mode.c b/drivers/firmware/arm_scmi/raw_mode.c
index 35057351850335..130d13e9cd6beb 100644
--- a/drivers/firmware/arm_scmi/raw_mode.c
+++ b/drivers/firmware/arm_scmi/raw_mode.c
@@ -921,7 +921,7 @@ static int scmi_dbg_raw_mode_open(struct inode *inode, struct file *filp)
rd->raw = raw;
filp->private_data = rd;
- return 0;
+ return nonseekable_open(inode, filp);
}
static int scmi_dbg_raw_mode_release(struct inode *inode, struct file *filp)
@@ -950,6 +950,7 @@ static const struct file_operations scmi_dbg_raw_mode_reset_fops = {
.open = scmi_dbg_raw_mode_open,
.release = scmi_dbg_raw_mode_release,
.write = scmi_dbg_raw_mode_reset_write,
+ .llseek = no_llseek,
.owner = THIS_MODULE,
};
@@ -959,6 +960,7 @@ static const struct file_operations scmi_dbg_raw_mode_message_fops = {
.read = scmi_dbg_raw_mode_message_read,
.write = scmi_dbg_raw_mode_message_write,
.poll = scmi_dbg_raw_mode_message_poll,
+ .llseek = no_llseek,
.owner = THIS_MODULE,
};
@@ -975,6 +977,7 @@ static const struct file_operations scmi_dbg_raw_mode_message_async_fops = {
.read = scmi_dbg_raw_mode_message_read,
.write = scmi_dbg_raw_mode_message_async_write,
.poll = scmi_dbg_raw_mode_message_poll,
+ .llseek = no_llseek,
.owner = THIS_MODULE,
};
@@ -998,6 +1001,7 @@ static const struct file_operations scmi_dbg_raw_mode_notification_fops = {
.release = scmi_dbg_raw_mode_release,
.read = scmi_test_dbg_raw_mode_notif_read,
.poll = scmi_test_dbg_raw_mode_notif_poll,
+ .llseek = no_llseek,
.owner = THIS_MODULE,
};
@@ -1021,6 +1025,7 @@ static const struct file_operations scmi_dbg_raw_mode_errors_fops = {
.release = scmi_dbg_raw_mode_release,
.read = scmi_test_dbg_raw_mode_errors_read,
.poll = scmi_test_dbg_raw_mode_errors_poll,
+ .llseek = no_llseek,
.owner = THIS_MODULE,
};
diff --git a/drivers/firmware/efi/libstub/randomalloc.c b/drivers/firmware/efi/libstub/randomalloc.c
index 7e185285955021..c41e7b2091cdd1 100644
--- a/drivers/firmware/efi/libstub/randomalloc.c
+++ b/drivers/firmware/efi/libstub/randomalloc.c
@@ -120,7 +120,7 @@ efi_status_t efi_random_alloc(unsigned long size,
continue;
}
- target = round_up(max(md->phys_addr, alloc_min), align) + target_slot * align;
+ target = round_up(max_t(u64, md->phys_addr, alloc_min), align) + target_slot * align;
pages = size / EFI_PAGE_SIZE;
status = efi_bs_call(allocate_pages, EFI_ALLOCATE_ADDRESS,
diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c
index 6a6ffc6707bd0e..d5a8182cf2e1cc 100644
--- a/drivers/firmware/efi/libstub/x86-stub.c
+++ b/drivers/firmware/efi/libstub/x86-stub.c
@@ -496,6 +496,7 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle,
hdr->vid_mode = 0xffff;
hdr->type_of_loader = 0x21;
+ hdr->initrd_addr_max = INT_MAX;
/* Convert unicode cmdline to ascii */
cmdline_ptr = efi_convert_cmdline(image, &options_size);
diff --git a/drivers/firmware/efi/unaccepted_memory.c b/drivers/firmware/efi/unaccepted_memory.c
index 5b439d04079c84..50f6503fe49f5e 100644
--- a/drivers/firmware/efi/unaccepted_memory.c
+++ b/drivers/firmware/efi/unaccepted_memory.c
@@ -4,6 +4,7 @@
#include <linux/memblock.h>
#include <linux/spinlock.h>
#include <linux/crash_dump.h>
+#include <linux/nmi.h>
#include <asm/unaccepted_memory.h>
/* Protects unaccepted memory bitmap and accepting_list */
@@ -149,6 +150,9 @@ retry:
}
list_del(&range.list);
+
+ touch_softlockup_watchdog();
+
spin_unlock_irqrestore(&unaccepted_memory_lock, flags);
}
diff --git a/drivers/firmware/microchip/mpfs-auto-update.c b/drivers/firmware/microchip/mpfs-auto-update.c
index fbeeaee4ac8560..835a19a7a3a09e 100644
--- a/drivers/firmware/microchip/mpfs-auto-update.c
+++ b/drivers/firmware/microchip/mpfs-auto-update.c
@@ -206,10 +206,12 @@ static int mpfs_auto_update_verify_image(struct fw_upload *fw_uploader)
if (ret | response->resp_status) {
dev_warn(priv->dev, "Verification of Upgrade Image failed!\n");
ret = ret ? ret : -EBADMSG;
+ goto free_message;
}
dev_info(priv->dev, "Verification of Upgrade Image passed!\n");
+free_message:
devm_kfree(priv->dev, message);
free_response:
devm_kfree(priv->dev, response);
@@ -265,7 +267,7 @@ static int mpfs_auto_update_set_image_address(struct mpfs_auto_update_priv *priv
AUTO_UPDATE_DIRECTORY_WIDTH);
memset(buffer + AUTO_UPDATE_BLANK_DIRECTORY, 0x0, AUTO_UPDATE_DIRECTORY_WIDTH);
- dev_info(priv->dev, "Writing the image address (%x) to the flash directory (%llx)\n",
+ dev_info(priv->dev, "Writing the image address (0x%x) to the flash directory (0x%llx)\n",
image_address, directory_address);
ret = mtd_write(priv->flash, 0x0, erase_size, &bytes_written, (u_char *)buffer);
@@ -313,7 +315,7 @@ static int mpfs_auto_update_write_bitstream(struct fw_upload *fw_uploader, const
erase.len = round_up(size, (size_t)priv->flash->erasesize);
erase.addr = image_address;
- dev_info(priv->dev, "Erasing the flash at address (%x)\n", image_address);
+ dev_info(priv->dev, "Erasing the flash at address (0x%x)\n", image_address);
ret = mtd_erase(priv->flash, &erase);
if (ret)
goto out;
@@ -323,7 +325,7 @@ static int mpfs_auto_update_write_bitstream(struct fw_upload *fw_uploader, const
* will do all of that itself - including verifying that the bitstream
* is valid.
*/
- dev_info(priv->dev, "Writing the image to the flash at address (%x)\n", image_address);
+ dev_info(priv->dev, "Writing the image to the flash at address (0x%x)\n", image_address);
ret = mtd_write(priv->flash, (loff_t)image_address, size, &bytes_written, data);
if (ret)
goto out;
diff --git a/drivers/firmware/qcom/qcom_qseecom_uefisecapp.c b/drivers/firmware/qcom/qcom_qseecom_uefisecapp.c
index 32188f098ef349..bc550ad0dbe0c7 100644
--- a/drivers/firmware/qcom/qcom_qseecom_uefisecapp.c
+++ b/drivers/firmware/qcom/qcom_qseecom_uefisecapp.c
@@ -221,6 +221,19 @@ struct qsee_rsp_uefi_query_variable_info {
* alignment of 8 bytes (64 bits) for GUIDs. Our definition of efi_guid_t,
* however, has an alignment of 4 byte (32 bits). So far, this seems to work
* fine here. See also the comment on the typedef of efi_guid_t.
+ *
+ * Note: It looks like uefisecapp is quite picky about how the memory passed to
+ * it is structured and aligned. In particular the request/response setup used
+ * for QSEE_CMD_UEFI_GET_VARIABLE. While qcom_qseecom_app_send(), in theory,
+ * accepts separate buffers/addresses for the request and response parts, in
+ * practice, however, it seems to expect them to be both part of a larger
+ * contiguous block. We initially allocated separate buffers for the request
+ * and response but this caused the QSEE_CMD_UEFI_GET_VARIABLE command to
+ * either not write any response to the response buffer or outright crash the
+ * device. Therefore, we now allocate a single contiguous block of DMA memory
+ * for both and properly align the data using the macros below. In particular,
+ * request and response structs are aligned at 8 byte (via __reqdata_offs()),
+ * following the driver that this has been reverse-engineered from.
*/
#define qcuefi_buf_align_fields(fields...) \
({ \
@@ -244,6 +257,12 @@ struct qsee_rsp_uefi_query_variable_info {
#define __array_offs(type, count, offset) \
__field_impl(sizeof(type) * (count), __alignof__(type), offset)
+#define __array_offs_aligned(type, count, align, offset) \
+ __field_impl(sizeof(type) * (count), align, offset)
+
+#define __reqdata_offs(size, offset) \
+ __array_offs_aligned(u8, size, 8, offset)
+
#define __array(type, count) __array_offs(type, count, NULL)
#define __field_offs(type, offset) __array_offs(type, 1, offset)
#define __field(type) __array_offs(type, 1, NULL)
@@ -277,10 +296,15 @@ static efi_status_t qsee_uefi_get_variable(struct qcuefi_client *qcuefi, const e
unsigned long buffer_size = *data_size;
efi_status_t efi_status = EFI_SUCCESS;
unsigned long name_length;
+ dma_addr_t cmd_buf_dma;
+ size_t cmd_buf_size;
+ void *cmd_buf;
size_t guid_offs;
size_t name_offs;
size_t req_size;
size_t rsp_size;
+ size_t req_offs;
+ size_t rsp_offs;
ssize_t status;
if (!name || !guid)
@@ -304,17 +328,19 @@ static efi_status_t qsee_uefi_get_variable(struct qcuefi_client *qcuefi, const e
__array(u8, buffer_size)
);
- req_data = kzalloc(req_size, GFP_KERNEL);
- if (!req_data) {
+ cmd_buf_size = qcuefi_buf_align_fields(
+ __reqdata_offs(req_size, &req_offs)
+ __reqdata_offs(rsp_size, &rsp_offs)
+ );
+
+ cmd_buf = qseecom_dma_alloc(qcuefi->client, cmd_buf_size, &cmd_buf_dma, GFP_KERNEL);
+ if (!cmd_buf) {
efi_status = EFI_OUT_OF_RESOURCES;
goto out;
}
- rsp_data = kzalloc(rsp_size, GFP_KERNEL);
- if (!rsp_data) {
- efi_status = EFI_OUT_OF_RESOURCES;
- goto out_free_req;
- }
+ req_data = cmd_buf + req_offs;
+ rsp_data = cmd_buf + rsp_offs;
req_data->command_id = QSEE_CMD_UEFI_GET_VARIABLE;
req_data->data_size = buffer_size;
@@ -332,7 +358,9 @@ static efi_status_t qsee_uefi_get_variable(struct qcuefi_client *qcuefi, const e
memcpy(((void *)req_data) + req_data->guid_offset, guid, req_data->guid_size);
- status = qcom_qseecom_app_send(qcuefi->client, req_data, req_size, rsp_data, rsp_size);
+ status = qcom_qseecom_app_send(qcuefi->client,
+ cmd_buf_dma + req_offs, req_size,
+ cmd_buf_dma + rsp_offs, rsp_size);
if (status) {
efi_status = EFI_DEVICE_ERROR;
goto out_free;
@@ -407,9 +435,7 @@ static efi_status_t qsee_uefi_get_variable(struct qcuefi_client *qcuefi, const e
memcpy(data, ((void *)rsp_data) + rsp_data->data_offset, rsp_data->data_size);
out_free:
- kfree(rsp_data);
-out_free_req:
- kfree(req_data);
+ qseecom_dma_free(qcuefi->client, cmd_buf_size, cmd_buf, cmd_buf_dma);
out:
return efi_status;
}
@@ -422,10 +448,15 @@ static efi_status_t qsee_uefi_set_variable(struct qcuefi_client *qcuefi, const e
struct qsee_rsp_uefi_set_variable *rsp_data;
efi_status_t efi_status = EFI_SUCCESS;
unsigned long name_length;
+ dma_addr_t cmd_buf_dma;
+ size_t cmd_buf_size;
+ void *cmd_buf;
size_t name_offs;
size_t guid_offs;
size_t data_offs;
size_t req_size;
+ size_t req_offs;
+ size_t rsp_offs;
ssize_t status;
if (!name || !guid)
@@ -450,17 +481,19 @@ static efi_status_t qsee_uefi_set_variable(struct qcuefi_client *qcuefi, const e
__array_offs(u8, data_size, &data_offs)
);
- req_data = kzalloc(req_size, GFP_KERNEL);
- if (!req_data) {
+ cmd_buf_size = qcuefi_buf_align_fields(
+ __reqdata_offs(req_size, &req_offs)
+ __reqdata_offs(sizeof(*rsp_data), &rsp_offs)
+ );
+
+ cmd_buf = qseecom_dma_alloc(qcuefi->client, cmd_buf_size, &cmd_buf_dma, GFP_KERNEL);
+ if (!cmd_buf) {
efi_status = EFI_OUT_OF_RESOURCES;
goto out;
}
- rsp_data = kzalloc(sizeof(*rsp_data), GFP_KERNEL);
- if (!rsp_data) {
- efi_status = EFI_OUT_OF_RESOURCES;
- goto out_free_req;
- }
+ req_data = cmd_buf + req_offs;
+ rsp_data = cmd_buf + rsp_offs;
req_data->command_id = QSEE_CMD_UEFI_SET_VARIABLE;
req_data->attributes = attributes;
@@ -483,8 +516,9 @@ static efi_status_t qsee_uefi_set_variable(struct qcuefi_client *qcuefi, const e
if (data_size)
memcpy(((void *)req_data) + req_data->data_offset, data, req_data->data_size);
- status = qcom_qseecom_app_send(qcuefi->client, req_data, req_size, rsp_data,
- sizeof(*rsp_data));
+ status = qcom_qseecom_app_send(qcuefi->client,
+ cmd_buf_dma + req_offs, req_size,
+ cmd_buf_dma + rsp_offs, sizeof(*rsp_data));
if (status) {
efi_status = EFI_DEVICE_ERROR;
goto out_free;
@@ -507,9 +541,7 @@ static efi_status_t qsee_uefi_set_variable(struct qcuefi_client *qcuefi, const e
}
out_free:
- kfree(rsp_data);
-out_free_req:
- kfree(req_data);
+ qseecom_dma_free(qcuefi->client, cmd_buf_size, cmd_buf, cmd_buf_dma);
out:
return efi_status;
}
@@ -521,10 +553,15 @@ static efi_status_t qsee_uefi_get_next_variable(struct qcuefi_client *qcuefi,
struct qsee_req_uefi_get_next_variable *req_data;
struct qsee_rsp_uefi_get_next_variable *rsp_data;
efi_status_t efi_status = EFI_SUCCESS;
+ dma_addr_t cmd_buf_dma;
+ size_t cmd_buf_size;
+ void *cmd_buf;
size_t guid_offs;
size_t name_offs;
size_t req_size;
size_t rsp_size;
+ size_t req_offs;
+ size_t rsp_offs;
ssize_t status;
if (!name_size || !name || !guid)
@@ -545,17 +582,19 @@ static efi_status_t qsee_uefi_get_next_variable(struct qcuefi_client *qcuefi,
__array(*name, *name_size / sizeof(*name))
);
- req_data = kzalloc(req_size, GFP_KERNEL);
- if (!req_data) {
+ cmd_buf_size = qcuefi_buf_align_fields(
+ __reqdata_offs(req_size, &req_offs)
+ __reqdata_offs(rsp_size, &rsp_offs)
+ );
+
+ cmd_buf = qseecom_dma_alloc(qcuefi->client, cmd_buf_size, &cmd_buf_dma, GFP_KERNEL);
+ if (!cmd_buf) {
efi_status = EFI_OUT_OF_RESOURCES;
goto out;
}
- rsp_data = kzalloc(rsp_size, GFP_KERNEL);
- if (!rsp_data) {
- efi_status = EFI_OUT_OF_RESOURCES;
- goto out_free_req;
- }
+ req_data = cmd_buf + req_offs;
+ rsp_data = cmd_buf + rsp_offs;
req_data->command_id = QSEE_CMD_UEFI_GET_NEXT_VARIABLE;
req_data->guid_offset = guid_offs;
@@ -572,7 +611,9 @@ static efi_status_t qsee_uefi_get_next_variable(struct qcuefi_client *qcuefi,
goto out_free;
}
- status = qcom_qseecom_app_send(qcuefi->client, req_data, req_size, rsp_data, rsp_size);
+ status = qcom_qseecom_app_send(qcuefi->client,
+ cmd_buf_dma + req_offs, req_size,
+ cmd_buf_dma + rsp_offs, rsp_size);
if (status) {
efi_status = EFI_DEVICE_ERROR;
goto out_free;
@@ -645,9 +686,7 @@ static efi_status_t qsee_uefi_get_next_variable(struct qcuefi_client *qcuefi,
}
out_free:
- kfree(rsp_data);
-out_free_req:
- kfree(req_data);
+ qseecom_dma_free(qcuefi->client, cmd_buf_size, cmd_buf, cmd_buf_dma);
out:
return efi_status;
}
@@ -659,26 +698,34 @@ static efi_status_t qsee_uefi_query_variable_info(struct qcuefi_client *qcuefi,
struct qsee_req_uefi_query_variable_info *req_data;
struct qsee_rsp_uefi_query_variable_info *rsp_data;
efi_status_t efi_status = EFI_SUCCESS;
+ dma_addr_t cmd_buf_dma;
+ size_t cmd_buf_size;
+ void *cmd_buf;
+ size_t req_offs;
+ size_t rsp_offs;
int status;
- req_data = kzalloc(sizeof(*req_data), GFP_KERNEL);
- if (!req_data) {
+ cmd_buf_size = qcuefi_buf_align_fields(
+ __reqdata_offs(sizeof(*req_data), &req_offs)
+ __reqdata_offs(sizeof(*rsp_data), &rsp_offs)
+ );
+
+ cmd_buf = qseecom_dma_alloc(qcuefi->client, cmd_buf_size, &cmd_buf_dma, GFP_KERNEL);
+ if (!cmd_buf) {
efi_status = EFI_OUT_OF_RESOURCES;
goto out;
}
- rsp_data = kzalloc(sizeof(*rsp_data), GFP_KERNEL);
- if (!rsp_data) {
- efi_status = EFI_OUT_OF_RESOURCES;
- goto out_free_req;
- }
+ req_data = cmd_buf + req_offs;
+ rsp_data = cmd_buf + rsp_offs;
req_data->command_id = QSEE_CMD_UEFI_QUERY_VARIABLE_INFO;
req_data->attributes = attr;
req_data->length = sizeof(*req_data);
- status = qcom_qseecom_app_send(qcuefi->client, req_data, sizeof(*req_data), rsp_data,
- sizeof(*rsp_data));
+ status = qcom_qseecom_app_send(qcuefi->client,
+ cmd_buf_dma + req_offs, sizeof(*req_data),
+ cmd_buf_dma + rsp_offs, sizeof(*rsp_data));
if (status) {
efi_status = EFI_DEVICE_ERROR;
goto out_free;
@@ -711,9 +758,7 @@ static efi_status_t qsee_uefi_query_variable_info(struct qcuefi_client *qcuefi,
*max_variable_size = rsp_data->max_variable_size;
out_free:
- kfree(rsp_data);
-out_free_req:
- kfree(req_data);
+ qseecom_dma_free(qcuefi->client, cmd_buf_size, cmd_buf, cmd_buf_dma);
out:
return efi_status;
}
diff --git a/drivers/firmware/qcom/qcom_scm.c b/drivers/firmware/qcom/qcom_scm.c
index 520de9b5633abc..90283f160a2286 100644
--- a/drivers/firmware/qcom/qcom_scm.c
+++ b/drivers/firmware/qcom/qcom_scm.c
@@ -1576,9 +1576,9 @@ EXPORT_SYMBOL_GPL(qcom_scm_qseecom_app_get_id);
/**
* qcom_scm_qseecom_app_send() - Send to and receive data from a given QSEE app.
* @app_id: The ID of the target app.
- * @req: Request buffer sent to the app (must be DMA-mappable).
+ * @req: DMA address of the request buffer sent to the app.
* @req_size: Size of the request buffer.
- * @rsp: Response buffer, written to by the app (must be DMA-mappable).
+ * @rsp: DMA address of the response buffer, written to by the app.
* @rsp_size: Size of the response buffer.
*
* Sends a request to the QSEE app associated with the given ID and read back
@@ -1589,33 +1589,13 @@ EXPORT_SYMBOL_GPL(qcom_scm_qseecom_app_get_id);
*
* Return: Zero on success, nonzero on failure.
*/
-int qcom_scm_qseecom_app_send(u32 app_id, void *req, size_t req_size, void *rsp,
- size_t rsp_size)
+int qcom_scm_qseecom_app_send(u32 app_id, dma_addr_t req, size_t req_size,
+ dma_addr_t rsp, size_t rsp_size)
{
struct qcom_scm_qseecom_resp res = {};
struct qcom_scm_desc desc = {};
- dma_addr_t req_phys;
- dma_addr_t rsp_phys;
int status;
- /* Map request buffer */
- req_phys = dma_map_single(__scm->dev, req, req_size, DMA_TO_DEVICE);
- status = dma_mapping_error(__scm->dev, req_phys);
- if (status) {
- dev_err(__scm->dev, "qseecom: failed to map request buffer\n");
- return status;
- }
-
- /* Map response buffer */
- rsp_phys = dma_map_single(__scm->dev, rsp, rsp_size, DMA_FROM_DEVICE);
- status = dma_mapping_error(__scm->dev, rsp_phys);
- if (status) {
- dma_unmap_single(__scm->dev, req_phys, req_size, DMA_TO_DEVICE);
- dev_err(__scm->dev, "qseecom: failed to map response buffer\n");
- return status;
- }
-
- /* Set up SCM call data */
desc.owner = QSEECOM_TZ_OWNER_TZ_APPS;
desc.svc = QSEECOM_TZ_SVC_APP_ID_PLACEHOLDER;
desc.cmd = QSEECOM_TZ_CMD_APP_SEND;
@@ -1623,18 +1603,13 @@ int qcom_scm_qseecom_app_send(u32 app_id, void *req, size_t req_size, void *rsp,
QCOM_SCM_RW, QCOM_SCM_VAL,
QCOM_SCM_RW, QCOM_SCM_VAL);
desc.args[0] = app_id;
- desc.args[1] = req_phys;
+ desc.args[1] = req;
desc.args[2] = req_size;
- desc.args[3] = rsp_phys;
+ desc.args[3] = rsp;
desc.args[4] = rsp_size;
- /* Perform call */
status = qcom_scm_qseecom_call(&desc, &res);
- /* Unmap buffers */
- dma_unmap_single(__scm->dev, rsp_phys, rsp_size, DMA_FROM_DEVICE);
- dma_unmap_single(__scm->dev, req_phys, req_size, DMA_TO_DEVICE);
-
if (status)
return status;
diff --git a/drivers/fpga/dfl-pci.c b/drivers/fpga/dfl-pci.c
index 98b8fd16183e41..80cac3a5f97678 100644
--- a/drivers/fpga/dfl-pci.c
+++ b/drivers/fpga/dfl-pci.c
@@ -78,6 +78,7 @@ static void cci_pci_free_irq(struct pci_dev *pcidev)
#define PCIE_DEVICE_ID_SILICOM_PAC_N5011 0x1001
#define PCIE_DEVICE_ID_INTEL_DFL 0xbcce
/* PCI Subdevice ID for PCIE_DEVICE_ID_INTEL_DFL */
+#define PCIE_SUBDEVICE_ID_INTEL_D5005 0x138d
#define PCIE_SUBDEVICE_ID_INTEL_N6000 0x1770
#define PCIE_SUBDEVICE_ID_INTEL_N6001 0x1771
#define PCIE_SUBDEVICE_ID_INTEL_C6100 0x17d4
@@ -102,6 +103,8 @@ static struct pci_device_id cci_pcie_id_tbl[] = {
{PCI_DEVICE(PCI_VENDOR_ID_SILICOM_DENMARK, PCIE_DEVICE_ID_SILICOM_PAC_N5010),},
{PCI_DEVICE(PCI_VENDOR_ID_SILICOM_DENMARK, PCIE_DEVICE_ID_SILICOM_PAC_N5011),},
{PCI_DEVICE_SUB(PCI_VENDOR_ID_INTEL, PCIE_DEVICE_ID_INTEL_DFL,
+ PCI_VENDOR_ID_INTEL, PCIE_SUBDEVICE_ID_INTEL_D5005),},
+ {PCI_DEVICE_SUB(PCI_VENDOR_ID_INTEL, PCIE_DEVICE_ID_INTEL_DFL,
PCI_VENDOR_ID_INTEL, PCIE_SUBDEVICE_ID_INTEL_N6000),},
{PCI_DEVICE_SUB(PCI_VENDOR_ID_INTEL, PCIE_DEVICE_ID_INTEL_DFL_VF,
PCI_VENDOR_ID_INTEL, PCIE_SUBDEVICE_ID_INTEL_N6000),},
diff --git a/drivers/gpio/gpio-crystalcove.c b/drivers/gpio/gpio-crystalcove.c
index 1ee62cd58582b6..25db014494a4de 100644
--- a/drivers/gpio/gpio-crystalcove.c
+++ b/drivers/gpio/gpio-crystalcove.c
@@ -92,7 +92,7 @@ static inline int to_reg(int gpio, enum ctrl_register reg_type)
case 0x5e:
return GPIOPANELCTL;
default:
- return -EOPNOTSUPP;
+ return -ENOTSUPP;
}
}
diff --git a/drivers/gpio/gpio-lpc32xx.c b/drivers/gpio/gpio-lpc32xx.c
index 5ef8af8249806a..c097e310c9e841 100644
--- a/drivers/gpio/gpio-lpc32xx.c
+++ b/drivers/gpio/gpio-lpc32xx.c
@@ -529,6 +529,7 @@ static const struct of_device_id lpc32xx_gpio_of_match[] = {
{ .compatible = "nxp,lpc3220-gpio", },
{ },
};
+MODULE_DEVICE_TABLE(of, lpc32xx_gpio_of_match);
static struct platform_driver lpc32xx_gpio_driver = {
.driver = {
diff --git a/drivers/gpio/gpio-tangier.c b/drivers/gpio/gpio-tangier.c
index b75e0b12087ac7..4b29abafecf6a4 100644
--- a/drivers/gpio/gpio-tangier.c
+++ b/drivers/gpio/gpio-tangier.c
@@ -195,7 +195,8 @@ static int tng_gpio_set_config(struct gpio_chip *chip, unsigned int offset,
static void tng_irq_ack(struct irq_data *d)
{
- struct tng_gpio *priv = irq_data_get_irq_chip_data(d);
+ struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+ struct tng_gpio *priv = gpiochip_get_data(gc);
irq_hw_number_t gpio = irqd_to_hwirq(d);
void __iomem *gisr;
u8 shift;
@@ -227,7 +228,8 @@ static void tng_irq_unmask_mask(struct tng_gpio *priv, u32 gpio, bool unmask)
static void tng_irq_mask(struct irq_data *d)
{
- struct tng_gpio *priv = irq_data_get_irq_chip_data(d);
+ struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+ struct tng_gpio *priv = gpiochip_get_data(gc);
irq_hw_number_t gpio = irqd_to_hwirq(d);
tng_irq_unmask_mask(priv, gpio, false);
@@ -236,7 +238,8 @@ static void tng_irq_mask(struct irq_data *d)
static void tng_irq_unmask(struct irq_data *d)
{
- struct tng_gpio *priv = irq_data_get_irq_chip_data(d);
+ struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+ struct tng_gpio *priv = gpiochip_get_data(gc);
irq_hw_number_t gpio = irqd_to_hwirq(d);
gpiochip_enable_irq(&priv->chip, gpio);
diff --git a/drivers/gpio/gpio-tegra186.c b/drivers/gpio/gpio-tegra186.c
index d87dd06db40d07..9130c691a2dd32 100644
--- a/drivers/gpio/gpio-tegra186.c
+++ b/drivers/gpio/gpio-tegra186.c
@@ -36,12 +36,6 @@
#define TEGRA186_GPIO_SCR_SEC_REN BIT(27)
#define TEGRA186_GPIO_SCR_SEC_G1W BIT(9)
#define TEGRA186_GPIO_SCR_SEC_G1R BIT(1)
-#define TEGRA186_GPIO_FULL_ACCESS (TEGRA186_GPIO_SCR_SEC_WEN | \
- TEGRA186_GPIO_SCR_SEC_REN | \
- TEGRA186_GPIO_SCR_SEC_G1R | \
- TEGRA186_GPIO_SCR_SEC_G1W)
-#define TEGRA186_GPIO_SCR_SEC_ENABLE (TEGRA186_GPIO_SCR_SEC_WEN | \
- TEGRA186_GPIO_SCR_SEC_REN)
/* control registers */
#define TEGRA186_GPIO_ENABLE_CONFIG 0x00
@@ -177,10 +171,18 @@ static inline bool tegra186_gpio_is_accessible(struct tegra_gpio *gpio, unsigned
value = __raw_readl(secure + TEGRA186_GPIO_SCR);
- if ((value & TEGRA186_GPIO_SCR_SEC_ENABLE) == 0)
- return true;
+ /*
+ * When SCR_SEC_[R|W]EN is unset, then we have full read/write access to all the
+ * registers for given GPIO pin.
+ * When SCR_SEC[R|W]EN is set, then there is need to further check the accompanying
+ * SCR_SEC_G1[R|W] bit to determine read/write access to all the registers for given
+ * GPIO pin.
+ */
- if ((value & TEGRA186_GPIO_FULL_ACCESS) == TEGRA186_GPIO_FULL_ACCESS)
+ if (((value & TEGRA186_GPIO_SCR_SEC_REN) == 0 ||
+ ((value & TEGRA186_GPIO_SCR_SEC_REN) && (value & TEGRA186_GPIO_SCR_SEC_G1R))) &&
+ ((value & TEGRA186_GPIO_SCR_SEC_WEN) == 0 ||
+ ((value & TEGRA186_GPIO_SCR_SEC_WEN) && (value & TEGRA186_GPIO_SCR_SEC_G1W))))
return true;
return false;
diff --git a/drivers/gpio/gpio-wcove.c b/drivers/gpio/gpio-wcove.c
index c18b6b47384f1b..94ca9d03c09494 100644
--- a/drivers/gpio/gpio-wcove.c
+++ b/drivers/gpio/gpio-wcove.c
@@ -104,7 +104,7 @@ static inline int to_reg(int gpio, enum ctrl_register type)
unsigned int reg = type == CTRL_IN ? GPIO_IN_CTRL_BASE : GPIO_OUT_CTRL_BASE;
if (gpio >= WCOVE_GPIO_NUM)
- return -EOPNOTSUPP;
+ return -ENOTSUPP;
return reg + gpio;
}
diff --git a/drivers/gpio/gpiolib-cdev.c b/drivers/gpio/gpiolib-cdev.c
index f384fa2787648e..d09c7d72836551 100644
--- a/drivers/gpio/gpiolib-cdev.c
+++ b/drivers/gpio/gpiolib-cdev.c
@@ -728,6 +728,25 @@ static u32 line_event_id(int level)
GPIO_V2_LINE_EVENT_FALLING_EDGE;
}
+static inline char *make_irq_label(const char *orig)
+{
+ char *new;
+
+ if (!orig)
+ return NULL;
+
+ new = kstrdup_and_replace(orig, '/', ':', GFP_KERNEL);
+ if (!new)
+ return ERR_PTR(-ENOMEM);
+
+ return new;
+}
+
+static inline void free_irq_label(const char *label)
+{
+ kfree(label);
+}
+
#ifdef CONFIG_HTE
static enum hte_return process_hw_ts_thread(void *p)
@@ -1015,6 +1034,7 @@ static int debounce_setup(struct line *line, unsigned int debounce_period_us)
{
unsigned long irqflags;
int ret, level, irq;
+ char *label;
/* try hardware */
ret = gpiod_set_debounce(line->desc, debounce_period_us);
@@ -1037,11 +1057,17 @@ static int debounce_setup(struct line *line, unsigned int debounce_period_us)
if (irq < 0)
return -ENXIO;
+ label = make_irq_label(line->req->label);
+ if (IS_ERR(label))
+ return -ENOMEM;
+
irqflags = IRQF_TRIGGER_FALLING | IRQF_TRIGGER_RISING;
ret = request_irq(irq, debounce_irq_handler, irqflags,
- line->req->label, line);
- if (ret)
+ label, line);
+ if (ret) {
+ free_irq_label(label);
return ret;
+ }
line->irq = irq;
} else {
ret = hte_edge_setup(line, GPIO_V2_LINE_FLAG_EDGE_BOTH);
@@ -1086,7 +1112,7 @@ static u32 gpio_v2_line_config_debounce_period(struct gpio_v2_line_config *lc,
static void edge_detector_stop(struct line *line)
{
if (line->irq) {
- free_irq(line->irq, line);
+ free_irq_label(free_irq(line->irq, line));
line->irq = 0;
}
@@ -1110,6 +1136,7 @@ static int edge_detector_setup(struct line *line,
unsigned long irqflags = 0;
u64 eflags;
int irq, ret;
+ char *label;
eflags = edflags & GPIO_V2_LINE_EDGE_FLAGS;
if (eflags && !kfifo_initialized(&line->req->events)) {
@@ -1146,11 +1173,17 @@ static int edge_detector_setup(struct line *line,
IRQF_TRIGGER_RISING : IRQF_TRIGGER_FALLING;
irqflags |= IRQF_ONESHOT;
+ label = make_irq_label(line->req->label);
+ if (IS_ERR(label))
+ return PTR_ERR(label);
+
/* Request a thread to read the events */
ret = request_threaded_irq(irq, edge_irq_handler, edge_irq_thread,
- irqflags, line->req->label, line);
- if (ret)
+ irqflags, label, line);
+ if (ret) {
+ free_irq_label(label);
return ret;
+ }
line->irq = irq;
return 0;
@@ -1973,7 +2006,7 @@ static void lineevent_free(struct lineevent_state *le)
blocking_notifier_chain_unregister(&le->gdev->device_notifier,
&le->device_unregistered_nb);
if (le->irq)
- free_irq(le->irq, le);
+ free_irq_label(free_irq(le->irq, le));
if (le->desc)
gpiod_free(le->desc);
kfree(le->label);
@@ -2114,6 +2147,7 @@ static int lineevent_create(struct gpio_device *gdev, void __user *ip)
int fd;
int ret;
int irq, irqflags = 0;
+ char *label;
if (copy_from_user(&eventreq, ip, sizeof(eventreq)))
return -EFAULT;
@@ -2198,15 +2232,23 @@ static int lineevent_create(struct gpio_device *gdev, void __user *ip)
if (ret)
goto out_free_le;
+ label = make_irq_label(le->label);
+ if (IS_ERR(label)) {
+ ret = PTR_ERR(label);
+ goto out_free_le;
+ }
+
/* Request a thread to read the events */
ret = request_threaded_irq(irq,
lineevent_irq_handler,
lineevent_irq_thread,
irqflags,
- le->label,
+ label,
le);
- if (ret)
+ if (ret) {
+ free_irq_label(label);
goto out_free_le;
+ }
le->irq = irq;
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index ce94e37bcbee79..94903fc1c1459f 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -1175,6 +1175,9 @@ struct gpio_device *gpio_device_find(const void *data,
list_for_each_entry_srcu(gdev, &gpio_devices, list,
srcu_read_lock_held(&gpio_devices_srcu)) {
+ if (!device_is_registered(&gdev->dev))
+ continue;
+
guard(srcu)(&gdev->srcu);
gc = srcu_dereference(gdev->chip, &gdev->srcu);
@@ -2397,6 +2400,11 @@ char *gpiochip_dup_line_label(struct gpio_chip *gc, unsigned int offset)
}
EXPORT_SYMBOL_GPL(gpiochip_dup_line_label);
+static inline const char *function_name_or_default(const char *con_id)
+{
+ return con_id ?: "(default)";
+}
+
/**
* gpiochip_request_own_desc - Allow GPIO chip to request its own descriptor
* @gc: GPIO chip
@@ -2425,10 +2433,11 @@ struct gpio_desc *gpiochip_request_own_desc(struct gpio_chip *gc,
enum gpiod_flags dflags)
{
struct gpio_desc *desc = gpiochip_get_desc(gc, hwnum);
+ const char *name = function_name_or_default(label);
int ret;
if (IS_ERR(desc)) {
- chip_err(gc, "failed to get GPIO descriptor\n");
+ chip_err(gc, "failed to get GPIO %s descriptor\n", name);
return desc;
}
@@ -2438,8 +2447,8 @@ struct gpio_desc *gpiochip_request_own_desc(struct gpio_chip *gc,
ret = gpiod_configure_flags(desc, label, lflags, dflags);
if (ret) {
- chip_err(gc, "setup of own GPIO %s failed\n", label);
gpiod_free_commit(desc);
+ chip_err(gc, "setup of own GPIO %s failed\n", name);
return ERR_PTR(ret);
}
@@ -4153,19 +4162,17 @@ static struct gpio_desc *gpiod_find_by_fwnode(struct fwnode_handle *fwnode,
enum gpiod_flags *flags,
unsigned long *lookupflags)
{
+ const char *name = function_name_or_default(con_id);
struct gpio_desc *desc = ERR_PTR(-ENOENT);
if (is_of_node(fwnode)) {
- dev_dbg(consumer, "using DT '%pfw' for '%s' GPIO lookup\n",
- fwnode, con_id);
+ dev_dbg(consumer, "using DT '%pfw' for '%s' GPIO lookup\n", fwnode, name);
desc = of_find_gpio(to_of_node(fwnode), con_id, idx, lookupflags);
} else if (is_acpi_node(fwnode)) {
- dev_dbg(consumer, "using ACPI '%pfw' for '%s' GPIO lookup\n",
- fwnode, con_id);
+ dev_dbg(consumer, "using ACPI '%pfw' for '%s' GPIO lookup\n", fwnode, name);
desc = acpi_find_gpio(fwnode, con_id, idx, flags, lookupflags);
} else if (is_software_node(fwnode)) {
- dev_dbg(consumer, "using swnode '%pfw' for '%s' GPIO lookup\n",
- fwnode, con_id);
+ dev_dbg(consumer, "using swnode '%pfw' for '%s' GPIO lookup\n", fwnode, name);
desc = swnode_find_gpio(fwnode, con_id, idx, lookupflags);
}
@@ -4181,6 +4188,7 @@ struct gpio_desc *gpiod_find_and_request(struct device *consumer,
bool platform_lookup_allowed)
{
unsigned long lookupflags = GPIO_LOOKUP_FLAGS_DEFAULT;
+ const char *name = function_name_or_default(con_id);
/*
* scoped_guard() is implemented as a for loop, meaning static
* analyzers will complain about these two not being initialized.
@@ -4203,8 +4211,7 @@ struct gpio_desc *gpiod_find_and_request(struct device *consumer,
}
if (IS_ERR(desc)) {
- dev_dbg(consumer, "No GPIO consumer %s found\n",
- con_id);
+ dev_dbg(consumer, "No GPIO consumer %s found\n", name);
return desc;
}
@@ -4226,15 +4233,14 @@ struct gpio_desc *gpiod_find_and_request(struct device *consumer,
*
* FIXME: Make this more sane and safe.
*/
- dev_info(consumer,
- "nonexclusive access to GPIO for %s\n", con_id);
+ dev_info(consumer, "nonexclusive access to GPIO for %s\n", name);
return desc;
}
ret = gpiod_configure_flags(desc, con_id, lookupflags, flags);
if (ret < 0) {
- dev_dbg(consumer, "setup of GPIO %s failed\n", con_id);
gpiod_put(desc);
+ dev_dbg(consumer, "setup of GPIO %s failed\n", name);
return ERR_PTR(ret);
}
@@ -4350,6 +4356,7 @@ EXPORT_SYMBOL_GPL(gpiod_get_optional);
int gpiod_configure_flags(struct gpio_desc *desc, const char *con_id,
unsigned long lflags, enum gpiod_flags dflags)
{
+ const char *name = function_name_or_default(con_id);
int ret;
if (lflags & GPIO_ACTIVE_LOW)
@@ -4393,7 +4400,7 @@ int gpiod_configure_flags(struct gpio_desc *desc, const char *con_id,
/* No particular flag request, return here... */
if (!(dflags & GPIOD_FLAGS_BIT_DIR_SET)) {
- gpiod_dbg(desc, "no flags found for %s\n", con_id);
+ gpiod_dbg(desc, "no flags found for GPIO %s\n", name);
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 9c62552bec344e..b3b84647207ed4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -210,6 +210,7 @@ extern int amdgpu_async_gfx_ring;
extern int amdgpu_mcbp;
extern int amdgpu_discovery;
extern int amdgpu_mes;
+extern int amdgpu_mes_log_enable;
extern int amdgpu_mes_kiq;
extern int amdgpu_noretry;
extern int amdgpu_force_asic_type;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index df58a6a1a67ec5..2131de36e3dac0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1854,6 +1854,7 @@ err_node_allow:
err_bo_create:
amdgpu_amdkfd_unreserve_mem_limit(adev, aligned_size, flags, xcp_id);
err_reserve_limit:
+ amdgpu_sync_free(&(*mem)->sync);
mutex_destroy(&(*mem)->lock);
if (gobj)
drm_gem_object_put(gobj);
@@ -2900,13 +2901,12 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu *
amdgpu_sync_create(&sync_obj);
- /* Validate BOs and map them to GPUVM (update VM page tables). */
+ /* Validate BOs managed by KFD */
list_for_each_entry(mem, &process_info->kfd_bo_list,
validate_list) {
struct amdgpu_bo *bo = mem->bo;
uint32_t domain = mem->domain;
- struct kfd_mem_attachment *attachment;
struct dma_resv_iter cursor;
struct dma_fence *fence;
@@ -2931,6 +2931,25 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu *
goto validate_map_fail;
}
}
+ }
+
+ if (failed_size)
+ pr_debug("0x%lx/0x%lx in system\n", failed_size, total_size);
+
+ /* Validate PDs, PTs and evicted DMABuf imports last. Otherwise BO
+ * validations above would invalidate DMABuf imports again.
+ */
+ ret = process_validate_vms(process_info, &exec.ticket);
+ if (ret) {
+ pr_debug("Validating VMs failed, ret: %d\n", ret);
+ goto validate_map_fail;
+ }
+
+ /* Update mappings managed by KFD. */
+ list_for_each_entry(mem, &process_info->kfd_bo_list,
+ validate_list) {
+ struct kfd_mem_attachment *attachment;
+
list_for_each_entry(attachment, &mem->attachments, list) {
if (!attachment->is_mapped)
continue;
@@ -2947,18 +2966,6 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu *
}
}
- if (failed_size)
- pr_debug("0x%lx/0x%lx in system\n", failed_size, total_size);
-
- /* Validate PDs, PTs and evicted DMABuf imports last. Otherwise BO
- * validations above would invalidate DMABuf imports again.
- */
- ret = process_validate_vms(process_info, &exec.ticket);
- if (ret) {
- pr_debug("Validating VMs failed, ret: %d\n", ret);
- goto validate_map_fail;
- }
-
/* Update mappings not managed by KFD */
list_for_each_entry(peer_vm, &process_info->vm_list_head,
vm_list_node) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 0a4b09709cfb14..ec888fc6ead8df 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -819,7 +819,7 @@ retry:
p->bytes_moved += ctx.bytes_moved;
if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
- amdgpu_bo_in_cpu_visible_vram(bo))
+ amdgpu_res_cpu_visible(adev, bo->tbo.resource))
p->bytes_moved_vis += ctx.bytes_moved;
if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 5dc24c971b41f0..7753a2e64d4114 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4135,18 +4135,22 @@ int amdgpu_device_init(struct amdgpu_device *adev,
adev->ip_blocks[i].status.hw = true;
}
}
+ } else if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 10) &&
+ !amdgpu_device_has_display_hardware(adev)) {
+ r = psp_gpu_reset(adev);
} else {
- tmp = amdgpu_reset_method;
- /* It should do a default reset when loading or reloading the driver,
- * regardless of the module parameter reset_method.
- */
- amdgpu_reset_method = AMD_RESET_METHOD_NONE;
- r = amdgpu_asic_reset(adev);
- amdgpu_reset_method = tmp;
- if (r) {
- dev_err(adev->dev, "asic reset on init failed\n");
- goto failed;
- }
+ tmp = amdgpu_reset_method;
+ /* It should do a default reset when loading or reloading the driver,
+ * regardless of the module parameter reset_method.
+ */
+ amdgpu_reset_method = AMD_RESET_METHOD_NONE;
+ r = amdgpu_asic_reset(adev);
+ amdgpu_reset_method = tmp;
+ }
+
+ if (r) {
+ dev_err(adev->dev, "asic reset on init failed\n");
+ goto failed;
}
}
@@ -4539,6 +4543,8 @@ int amdgpu_device_prepare(struct drm_device *dev)
if (r)
goto unprepare;
+ flush_delayed_work(&adev->gfx.gfx_off_delay_work);
+
for (i = 0; i < adev->num_ip_blocks; i++) {
if (!adev->ip_blocks[i].status.valid)
continue;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index a07e4b87d4cae0..ac5bf01fe8d2a9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -1896,6 +1896,7 @@ static int amdgpu_discovery_set_smu_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &smu_v13_0_ip_block);
break;
case IP_VERSION(14, 0, 0):
+ case IP_VERSION(14, 0, 1):
amdgpu_device_ip_block_add(adev, &smu_v14_0_ip_block);
break;
default:
@@ -2237,6 +2238,7 @@ static int amdgpu_discovery_set_umsch_mm_ip_blocks(struct amdgpu_device *adev)
{
switch (amdgpu_ip_version(adev, VCN_HWIP, 0)) {
case IP_VERSION(4, 0, 5):
+ case IP_VERSION(4, 0, 6):
if (amdgpu_umsch_mm & 0x1) {
amdgpu_device_ip_block_add(adev, &umsch_mm_v4_0_ip_block);
adev->enable_umsch_mm = true;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 80b9642f2bc4f2..e4277298cf1aad 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -195,6 +195,7 @@ int amdgpu_async_gfx_ring = 1;
int amdgpu_mcbp = -1;
int amdgpu_discovery = -1;
int amdgpu_mes;
+int amdgpu_mes_log_enable = 0;
int amdgpu_mes_kiq;
int amdgpu_noretry = -1;
int amdgpu_force_asic_type = -1;
@@ -668,6 +669,15 @@ MODULE_PARM_DESC(mes,
module_param_named(mes, amdgpu_mes, int, 0444);
/**
+ * DOC: mes_log_enable (int)
+ * Enable Micro Engine Scheduler log. This is used to enable/disable MES internal log.
+ * (0 = disabled (default), 1 = enabled)
+ */
+MODULE_PARM_DESC(mes_log_enable,
+ "Enable Micro Engine Scheduler log (0 = disabled (default), 1 = enabled)");
+module_param_named(mes_log_enable, amdgpu_mes_log_enable, int, 0444);
+
+/**
* DOC: mes_kiq (int)
* Enable Micro Engine Scheduler KIQ. This is a new engine pipe for kiq.
* (0 = disabled (default), 1 = enabled)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 4b3000c21ef2c5..e4742b65032d1d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -304,12 +304,15 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job)
dma_fence_set_error(finished, -ECANCELED);
if (finished->error < 0) {
- DRM_INFO("Skip scheduling IBs!\n");
+ dev_dbg(adev->dev, "Skip scheduling IBs in ring(%s)",
+ ring->name);
} else {
r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, job,
&fence);
if (r)
- DRM_ERROR("Error scheduling IBs (%d)\n", r);
+ dev_err(adev->dev,
+ "Error scheduling IBs (%d) in ring(%s)", r,
+ ring->name);
}
job->job_run_counter++;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index a98e03e0a51f1f..1569bef030eac1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -102,7 +102,10 @@ static int amdgpu_mes_event_log_init(struct amdgpu_device *adev)
{
int r;
- r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
+ if (!amdgpu_mes_log_enable)
+ return 0;
+
+ r = amdgpu_bo_create_kernel(adev, AMDGPU_MES_LOG_BUFFER_SIZE, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_GTT,
&adev->mes.event_log_gpu_obj,
&adev->mes.event_log_gpu_addr,
@@ -1129,6 +1132,7 @@ void amdgpu_mes_remove_ring(struct amdgpu_device *adev,
return;
amdgpu_mes_remove_hw_queue(adev, ring->hw_queue_id);
+ del_timer_sync(&ring->fence_drv.fallback_timer);
amdgpu_ring_fini(ring);
kfree(ring);
}
@@ -1549,12 +1553,11 @@ static int amdgpu_debugfs_mes_event_log_show(struct seq_file *m, void *unused)
uint32_t *mem = (uint32_t *)(adev->mes.event_log_cpu_addr);
seq_hex_dump(m, "", DUMP_PREFIX_OFFSET, 32, 4,
- mem, PAGE_SIZE, false);
+ mem, AMDGPU_MES_LOG_BUFFER_SIZE, false);
return 0;
}
-
DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_mes_event_log);
#endif
@@ -1565,7 +1568,7 @@ void amdgpu_debugfs_mes_event_log_init(struct amdgpu_device *adev)
#if defined(CONFIG_DEBUG_FS)
struct drm_minor *minor = adev_to_drm(adev)->primary;
struct dentry *root = minor->debugfs_root;
- if (adev->enable_mes)
+ if (adev->enable_mes && amdgpu_mes_log_enable)
debugfs_create_file("amdgpu_mes_event_log", 0444, root,
adev, &amdgpu_debugfs_mes_event_log_fops);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
index 7d4f93fea937ae..4c8fc3117ef894 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -52,6 +52,7 @@ enum amdgpu_mes_priority_level {
#define AMDGPU_MES_PROC_CTX_SIZE 0x1000 /* one page area */
#define AMDGPU_MES_GANG_CTX_SIZE 0x1000 /* one page area */
+#define AMDGPU_MES_LOG_BUFFER_SIZE 0x4000 /* Maximu log buffer size for MES */
struct amdgpu_mes_funcs;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 010b0cb7693c9c..ce733e3cb35d05 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -605,6 +605,8 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
else
amdgpu_bo_placement_from_domain(bo, bp->domain);
if (bp->type == ttm_bo_type_kernel)
+ bo->tbo.priority = 2;
+ else if (!(bp->flags & AMDGPU_GEM_CREATE_DISCARDABLE))
bo->tbo.priority = 1;
if (!bp->destroy)
@@ -617,8 +619,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
return r;
if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
- bo->tbo.resource->mem_type == TTM_PL_VRAM &&
- amdgpu_bo_in_cpu_visible_vram(bo))
+ amdgpu_res_cpu_visible(adev, bo->tbo.resource))
amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved,
ctx.bytes_moved);
else
@@ -1272,23 +1273,25 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, bool evict)
void amdgpu_bo_get_memory(struct amdgpu_bo *bo,
struct amdgpu_mem_stats *stats)
{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+ struct ttm_resource *res = bo->tbo.resource;
uint64_t size = amdgpu_bo_size(bo);
struct drm_gem_object *obj;
unsigned int domain;
bool shared;
/* Abort if the BO doesn't currently have a backing store */
- if (!bo->tbo.resource)
+ if (!res)
return;
obj = &bo->tbo.base;
shared = drm_gem_object_is_shared_for_memory_stats(obj);
- domain = amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type);
+ domain = amdgpu_mem_type_to_domain(res->mem_type);
switch (domain) {
case AMDGPU_GEM_DOMAIN_VRAM:
stats->vram += size;
- if (amdgpu_bo_in_cpu_visible_vram(bo))
+ if (amdgpu_res_cpu_visible(adev, bo->tbo.resource))
stats->visible_vram += size;
if (shared)
stats->vram_shared += size;
@@ -1389,10 +1392,7 @@ vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
/* Remember that this BO was accessed by the CPU */
abo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
- if (bo->resource->mem_type != TTM_PL_VRAM)
- return 0;
-
- if (amdgpu_bo_in_cpu_visible_vram(abo))
+ if (amdgpu_res_cpu_visible(adev, bo->resource))
return 0;
/* Can't move a pinned BO to visible VRAM */
@@ -1415,7 +1415,7 @@ vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
/* this should never happen */
if (bo->resource->mem_type == TTM_PL_VRAM &&
- !amdgpu_bo_in_cpu_visible_vram(abo))
+ !amdgpu_res_cpu_visible(adev, bo->resource))
return VM_FAULT_SIGBUS;
ttm_bo_move_to_lru_tail_unlocked(bo);
@@ -1579,6 +1579,7 @@ uint32_t amdgpu_bo_get_preferred_domain(struct amdgpu_device *adev,
*/
u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, struct seq_file *m)
{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
struct dma_buf_attachment *attachment;
struct dma_buf *dma_buf;
const char *placement;
@@ -1587,10 +1588,11 @@ u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, struct seq_file *m)
if (dma_resv_trylock(bo->tbo.base.resv)) {
unsigned int domain;
+
domain = amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type);
switch (domain) {
case AMDGPU_GEM_DOMAIN_VRAM:
- if (amdgpu_bo_in_cpu_visible_vram(bo))
+ if (amdgpu_res_cpu_visible(adev, bo->tbo.resource))
placement = "VRAM VISIBLE";
else
placement = "VRAM";
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index be679c42b0b8cb..fa03d9e4874cc6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -251,28 +251,6 @@ static inline u64 amdgpu_bo_mmap_offset(struct amdgpu_bo *bo)
}
/**
- * amdgpu_bo_in_cpu_visible_vram - check if BO is (partly) in visible VRAM
- */
-static inline bool amdgpu_bo_in_cpu_visible_vram(struct amdgpu_bo *bo)
-{
- struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
- struct amdgpu_res_cursor cursor;
-
- if (!bo->tbo.resource || bo->tbo.resource->mem_type != TTM_PL_VRAM)
- return false;
-
- amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &cursor);
- while (cursor.remaining) {
- if (cursor.start < adev->gmc.visible_vram_size)
- return true;
-
- amdgpu_res_next(&cursor, cursor.size);
- }
-
- return false;
-}
-
-/**
* amdgpu_bo_explicit_sync - return whether the bo is explicitly synced
*/
static inline bool amdgpu_bo_explicit_sync(struct amdgpu_bo *bo)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 5505d646f43aa8..06f0a6534a94f7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -524,46 +524,58 @@ static ssize_t amdgpu_debugfs_mqd_read(struct file *f, char __user *buf,
{
struct amdgpu_ring *ring = file_inode(f)->i_private;
volatile u32 *mqd;
- int r;
+ u32 *kbuf;
+ int r, i;
uint32_t value, result;
if (*pos & 3 || size & 3)
return -EINVAL;
- result = 0;
+ kbuf = kmalloc(ring->mqd_size, GFP_KERNEL);
+ if (!kbuf)
+ return -ENOMEM;
r = amdgpu_bo_reserve(ring->mqd_obj, false);
if (unlikely(r != 0))
- return r;
+ goto err_free;
r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&mqd);
- if (r) {
- amdgpu_bo_unreserve(ring->mqd_obj);
- return r;
- }
+ if (r)
+ goto err_unreserve;
+ /*
+ * Copy to local buffer to avoid put_user(), which might fault
+ * and acquire mmap_sem, under reservation_ww_class_mutex.
+ */
+ for (i = 0; i < ring->mqd_size/sizeof(u32); i++)
+ kbuf[i] = mqd[i];
+
+ amdgpu_bo_kunmap(ring->mqd_obj);
+ amdgpu_bo_unreserve(ring->mqd_obj);
+
+ result = 0;
while (size) {
if (*pos >= ring->mqd_size)
- goto done;
+ break;
- value = mqd[*pos/4];
+ value = kbuf[*pos/4];
r = put_user(value, (uint32_t *)buf);
if (r)
- goto done;
+ goto err_free;
buf += 4;
result += 4;
size -= 4;
*pos += 4;
}
-done:
- amdgpu_bo_kunmap(ring->mqd_obj);
- mqd = NULL;
- amdgpu_bo_unreserve(ring->mqd_obj);
- if (r)
- return r;
-
+ kfree(kbuf);
return result;
+
+err_unreserve:
+ amdgpu_bo_unreserve(ring->mqd_obj);
+err_free:
+ kfree(kbuf);
+ return r;
}
static const struct file_operations amdgpu_debugfs_mqd_fops = {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index fc418e670fdae2..1d71729e3f6bce 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -133,7 +133,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
} else if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
!(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) &&
- amdgpu_bo_in_cpu_visible_vram(abo)) {
+ amdgpu_res_cpu_visible(adev, bo->resource)) {
/* Try evicting to the CPU inaccessible part of VRAM
* first, but only set GTT as busy placement, so this
@@ -403,40 +403,55 @@ error:
return r;
}
-/*
- * amdgpu_mem_visible - Check that memory can be accessed by ttm_bo_move_memcpy
+/**
+ * amdgpu_res_cpu_visible - Check that resource can be accessed by CPU
+ * @adev: amdgpu device
+ * @res: the resource to check
*
- * Called by amdgpu_bo_move()
+ * Returns: true if the full resource is CPU visible, false otherwise.
*/
-static bool amdgpu_mem_visible(struct amdgpu_device *adev,
- struct ttm_resource *mem)
+bool amdgpu_res_cpu_visible(struct amdgpu_device *adev,
+ struct ttm_resource *res)
{
- u64 mem_size = (u64)mem->size;
struct amdgpu_res_cursor cursor;
- u64 end;
- if (mem->mem_type == TTM_PL_SYSTEM ||
- mem->mem_type == TTM_PL_TT)
+ if (!res)
+ return false;
+
+ if (res->mem_type == TTM_PL_SYSTEM || res->mem_type == TTM_PL_TT ||
+ res->mem_type == AMDGPU_PL_PREEMPT)
return true;
- if (mem->mem_type != TTM_PL_VRAM)
+
+ if (res->mem_type != TTM_PL_VRAM)
return false;
- amdgpu_res_first(mem, 0, mem_size, &cursor);
- end = cursor.start + cursor.size;
+ amdgpu_res_first(res, 0, res->size, &cursor);
while (cursor.remaining) {
+ if ((cursor.start + cursor.size) >= adev->gmc.visible_vram_size)
+ return false;
amdgpu_res_next(&cursor, cursor.size);
+ }
- if (!cursor.remaining)
- break;
+ return true;
+}
- /* ttm_resource_ioremap only supports contiguous memory */
- if (end != cursor.start)
- return false;
+/*
+ * amdgpu_res_copyable - Check that memory can be accessed by ttm_bo_move_memcpy
+ *
+ * Called by amdgpu_bo_move()
+ */
+static bool amdgpu_res_copyable(struct amdgpu_device *adev,
+ struct ttm_resource *mem)
+{
+ if (!amdgpu_res_cpu_visible(adev, mem))
+ return false;
- end = cursor.start + cursor.size;
- }
+ /* ttm_resource_ioremap only supports contiguous memory */
+ if (mem->mem_type == TTM_PL_VRAM &&
+ !(mem->placement & TTM_PL_FLAG_CONTIGUOUS))
+ return false;
- return end <= adev->gmc.visible_vram_size;
+ return true;
}
/*
@@ -529,8 +544,8 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
if (r) {
/* Check that all memory is CPU accessible */
- if (!amdgpu_mem_visible(adev, old_mem) ||
- !amdgpu_mem_visible(adev, new_mem)) {
+ if (!amdgpu_res_copyable(adev, old_mem) ||
+ !amdgpu_res_copyable(adev, new_mem)) {
pr_err("Move buffer fallback to memcpy unavailable\n");
return r;
}
@@ -557,7 +572,6 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev,
struct ttm_resource *mem)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
- size_t bus_size = (size_t)mem->size;
switch (mem->mem_type) {
case TTM_PL_SYSTEM:
@@ -568,9 +582,6 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev,
break;
case TTM_PL_VRAM:
mem->bus.offset = mem->start << PAGE_SHIFT;
- /* check if it's visible */
- if ((mem->bus.offset + bus_size) > adev->gmc.visible_vram_size)
- return -EINVAL;
if (adev->mman.aper_base_kaddr &&
mem->placement & TTM_PL_FLAG_CONTIGUOUS)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index 65ec82141a8e01..32cf6b6f6efd96 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -139,6 +139,9 @@ int amdgpu_vram_mgr_reserve_range(struct amdgpu_vram_mgr *mgr,
int amdgpu_vram_mgr_query_page_status(struct amdgpu_vram_mgr *mgr,
uint64_t start);
+bool amdgpu_res_cpu_visible(struct amdgpu_device *adev,
+ struct ttm_resource *res);
+
int amdgpu_ttm_init(struct amdgpu_device *adev);
void amdgpu_ttm_fini(struct amdgpu_device *adev);
void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c
index ab820cf526683b..f7c73533e336fa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c
@@ -189,10 +189,13 @@ static void setup_vpe_queue(struct amdgpu_device *adev,
mqd->rptr_val = 0;
mqd->unmapped = 1;
+ if (adev->vpe.collaborate_mode)
+ memcpy(++mqd, test->mqd_data_cpu_addr, sizeof(struct MQD_INFO));
+
qinfo->mqd_addr = test->mqd_data_gpu_addr;
qinfo->csa_addr = test->ctx_data_gpu_addr +
offsetof(struct umsch_mm_test_ctx_data, vpe_ctx_csa);
- qinfo->doorbell_offset_0 = (adev->doorbell_index.vpe_ring + 1) << 1;
+ qinfo->doorbell_offset_0 = 0;
qinfo->doorbell_offset_1 = 0;
}
@@ -287,7 +290,10 @@ static int submit_vpe_queue(struct amdgpu_device *adev, struct umsch_mm_test *te
ring[5] = 0;
mqd->wptr_val = (6 << 2);
- // WDOORBELL32(adev->umsch_mm.agdb_index[CONTEXT_PRIORITY_LEVEL_NORMAL], mqd->wptr_val);
+ if (adev->vpe.collaborate_mode)
+ (++mqd)->wptr_val = (6 << 2);
+
+ WDOORBELL32(adev->umsch_mm.agdb_index[CONTEXT_PRIORITY_LEVEL_NORMAL], mqd->wptr_val);
for (i = 0; i < adev->usec_timeout; i++) {
if (*fence == test_pattern)
@@ -571,6 +577,7 @@ int amdgpu_umsch_mm_init_microcode(struct amdgpu_umsch_mm *umsch)
switch (amdgpu_ip_version(adev, VCN_HWIP, 0)) {
case IP_VERSION(4, 0, 5):
+ case IP_VERSION(4, 0, 6):
fw_name = "amdgpu/umsch_mm_4_0_0.bin";
break;
default:
@@ -750,6 +757,7 @@ static int umsch_mm_early_init(void *handle)
switch (amdgpu_ip_version(adev, VCN_HWIP, 0)) {
case IP_VERSION(4, 0, 5):
+ case IP_VERSION(4, 0, 6):
umsch_mm_v4_0_set_funcs(&adev->umsch_mm);
break;
default:
@@ -766,6 +774,9 @@ static int umsch_mm_late_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ if (amdgpu_in_reset(adev) || adev->in_s0ix || adev->in_suspend)
+ return 0;
+
return umsch_mm_test(adev);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.h
index 8258a43a6236c0..5014b5af95fd97 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.h
@@ -33,13 +33,6 @@ enum UMSCH_SWIP_ENGINE_TYPE {
UMSCH_SWIP_ENGINE_TYPE_MAX
};
-enum UMSCH_SWIP_AFFINITY_TYPE {
- UMSCH_SWIP_AFFINITY_TYPE_ANY = 0,
- UMSCH_SWIP_AFFINITY_TYPE_VCN0 = 1,
- UMSCH_SWIP_AFFINITY_TYPE_VCN1 = 2,
- UMSCH_SWIP_AFFINITY_TYPE_MAX
-};
-
enum UMSCH_CONTEXT_PRIORITY_LEVEL {
CONTEXT_PRIORITY_LEVEL_IDLE = 0,
CONTEXT_PRIORITY_LEVEL_NORMAL = 1,
@@ -51,13 +44,15 @@ enum UMSCH_CONTEXT_PRIORITY_LEVEL {
struct umsch_mm_set_resource_input {
uint32_t vmid_mask_mm_vcn;
uint32_t vmid_mask_mm_vpe;
+ uint32_t collaboration_mask_vpe;
uint32_t logging_vmid;
uint32_t engine_mask;
union {
struct {
uint32_t disable_reset : 1;
uint32_t disable_umsch_mm_log : 1;
- uint32_t reserved : 30;
+ uint32_t use_rs64mem_for_proc_ctx_csa : 1;
+ uint32_t reserved : 29;
};
uint32_t uint32_all;
};
@@ -78,15 +73,18 @@ struct umsch_mm_add_queue_input {
uint32_t doorbell_offset_1;
enum UMSCH_SWIP_ENGINE_TYPE engine_type;
uint32_t affinity;
- enum UMSCH_SWIP_AFFINITY_TYPE affinity_type;
uint64_t mqd_addr;
uint64_t h_context;
uint64_t h_queue;
uint32_t vm_context_cntl;
+ uint32_t process_csa_array_index;
+ uint32_t context_csa_array_index;
+
struct {
uint32_t is_context_suspended : 1;
- uint32_t reserved : 31;
+ uint32_t collaboration_mode : 1;
+ uint32_t reserved : 30;
};
};
@@ -94,6 +92,7 @@ struct umsch_mm_remove_queue_input {
uint32_t doorbell_offset_0;
uint32_t doorbell_offset_1;
uint64_t context_csa_addr;
+ uint32_t context_csa_array_index;
};
struct MQD_INFO {
@@ -103,6 +102,7 @@ struct MQD_INFO {
uint32_t wptr_val;
uint32_t rptr_val;
uint32_t unmapped;
+ uint32_t vmid;
};
struct amdgpu_umsch_mm;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 4299ce386322e7..94089069c9ada6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1613,6 +1613,37 @@ static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,
trace_amdgpu_vm_bo_map(bo_va, mapping);
}
+/* Validate operation parameters to prevent potential abuse */
+static int amdgpu_vm_verify_parameters(struct amdgpu_device *adev,
+ struct amdgpu_bo *bo,
+ uint64_t saddr,
+ uint64_t offset,
+ uint64_t size)
+{
+ uint64_t tmp, lpfn;
+
+ if (saddr & AMDGPU_GPU_PAGE_MASK
+ || offset & AMDGPU_GPU_PAGE_MASK
+ || size & AMDGPU_GPU_PAGE_MASK)
+ return -EINVAL;
+
+ if (check_add_overflow(saddr, size, &tmp)
+ || check_add_overflow(offset, size, &tmp)
+ || size == 0 /* which also leads to end < begin */)
+ return -EINVAL;
+
+ /* make sure object fit at this offset */
+ if (bo && offset + size > amdgpu_bo_size(bo))
+ return -EINVAL;
+
+ /* Ensure last pfn not exceed max_pfn */
+ lpfn = (saddr + size - 1) >> AMDGPU_GPU_PAGE_SHIFT;
+ if (lpfn >= adev->vm_manager.max_pfn)
+ return -EINVAL;
+
+ return 0;
+}
+
/**
* amdgpu_vm_bo_map - map bo inside a vm
*
@@ -1639,21 +1670,14 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
struct amdgpu_bo *bo = bo_va->base.bo;
struct amdgpu_vm *vm = bo_va->base.vm;
uint64_t eaddr;
+ int r;
- /* validate the parameters */
- if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK || size & ~PAGE_MASK)
- return -EINVAL;
- if (saddr + size <= saddr || offset + size <= offset)
- return -EINVAL;
-
- /* make sure object fit at this offset */
- eaddr = saddr + size - 1;
- if ((bo && offset + size > amdgpu_bo_size(bo)) ||
- (eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT))
- return -EINVAL;
+ r = amdgpu_vm_verify_parameters(adev, bo, saddr, offset, size);
+ if (r)
+ return r;
saddr /= AMDGPU_GPU_PAGE_SIZE;
- eaddr /= AMDGPU_GPU_PAGE_SIZE;
+ eaddr = saddr + (size - 1) / AMDGPU_GPU_PAGE_SIZE;
tmp = amdgpu_vm_it_iter_first(&vm->va, saddr, eaddr);
if (tmp) {
@@ -1706,17 +1730,9 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
uint64_t eaddr;
int r;
- /* validate the parameters */
- if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK || size & ~PAGE_MASK)
- return -EINVAL;
- if (saddr + size <= saddr || offset + size <= offset)
- return -EINVAL;
-
- /* make sure object fit at this offset */
- eaddr = saddr + size - 1;
- if ((bo && offset + size > amdgpu_bo_size(bo)) ||
- (eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT))
- return -EINVAL;
+ r = amdgpu_vm_verify_parameters(adev, bo, saddr, offset, size);
+ if (r)
+ return r;
/* Allocate all the needed memory */
mapping = kmalloc(sizeof(*mapping), GFP_KERNEL);
@@ -1730,7 +1746,7 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
}
saddr /= AMDGPU_GPU_PAGE_SIZE;
- eaddr /= AMDGPU_GPU_PAGE_SIZE;
+ eaddr = saddr + (size - 1) / AMDGPU_GPU_PAGE_SIZE;
mapping->start = saddr;
mapping->last = eaddr;
@@ -1817,10 +1833,14 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
struct amdgpu_bo_va_mapping *before, *after, *tmp, *next;
LIST_HEAD(removed);
uint64_t eaddr;
+ int r;
+
+ r = amdgpu_vm_verify_parameters(adev, NULL, saddr, 0, size);
+ if (r)
+ return r;
- eaddr = saddr + size - 1;
saddr /= AMDGPU_GPU_PAGE_SIZE;
- eaddr /= AMDGPU_GPU_PAGE_SIZE;
+ eaddr = saddr + (size - 1) / AMDGPU_GPU_PAGE_SIZE;
/* Allocate all the needed memory */
before = kzalloc(sizeof(*before), GFP_KERNEL);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c
index 7a65a2b128ec43..c23d97d34b7ec5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c
@@ -205,7 +205,7 @@ disable_dpm:
dpm_ctl &= 0xfffffffe; /* Disable DPM */
WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_enable), dpm_ctl);
dev_dbg(adev->dev, "%s: disable vpe dpm\n", __func__);
- return 0;
+ return -EINVAL;
}
int amdgpu_vpe_psp_update_sram(struct amdgpu_device *adev)
@@ -396,6 +396,12 @@ static int vpe_hw_init(void *handle)
struct amdgpu_vpe *vpe = &adev->vpe;
int ret;
+ /* Power on VPE */
+ ret = amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VPE,
+ AMD_PG_STATE_UNGATE);
+ if (ret)
+ return ret;
+
ret = vpe_load_microcode(vpe);
if (ret)
return ret;
diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c
index d6f808acfb17b7..fbb43ae7624f44 100644
--- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c
+++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c
@@ -62,6 +62,11 @@ void aqua_vanjaram_doorbell_index_init(struct amdgpu_device *adev)
adev->doorbell_index.max_assignment = AMDGPU_DOORBELL_LAYOUT1_MAX_ASSIGNMENT << 1;
}
+static bool aqua_vanjaram_xcp_vcn_shared(struct amdgpu_device *adev)
+{
+ return (adev->xcp_mgr->num_xcps > adev->vcn.num_vcn_inst);
+}
+
static void aqua_vanjaram_set_xcp_id(struct amdgpu_device *adev,
uint32_t inst_idx, struct amdgpu_ring *ring)
{
@@ -87,7 +92,7 @@ static void aqua_vanjaram_set_xcp_id(struct amdgpu_device *adev,
case AMDGPU_RING_TYPE_VCN_ENC:
case AMDGPU_RING_TYPE_VCN_JPEG:
ip_blk = AMDGPU_XCP_VCN;
- if (adev->xcp_mgr->mode == AMDGPU_CPX_PARTITION_MODE)
+ if (aqua_vanjaram_xcp_vcn_shared(adev))
inst_mask = 1 << (inst_idx * 2);
break;
default:
@@ -140,10 +145,12 @@ static int aqua_vanjaram_xcp_sched_list_update(
aqua_vanjaram_xcp_gpu_sched_update(adev, ring, ring->xcp_id);
- /* VCN is shared by two partitions under CPX MODE */
+ /* VCN may be shared by two partitions under CPX MODE in certain
+ * configs.
+ */
if ((ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC ||
- ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG) &&
- adev->xcp_mgr->mode == AMDGPU_CPX_PARTITION_MODE)
+ ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG) &&
+ aqua_vanjaram_xcp_vcn_shared(adev))
aqua_vanjaram_xcp_gpu_sched_update(adev, ring, ring->xcp_id + 1);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index f90905ef32c76d..701146d649c353 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -9186,7 +9186,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
7 + /* PIPELINE_SYNC */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
- 2 + /* VM_FLUSH */
+ 4 + /* VM_FLUSH */
8 + /* FENCE for VM_FLUSH */
20 + /* GDS switch */
4 + /* double SWITCH_BUFFER,
@@ -9276,7 +9276,6 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
7 + /* gfx_v10_0_ring_emit_pipeline_sync */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
- 2 + /* gfx_v10_0_ring_emit_vm_flush */
8 + 8 + 8, /* gfx_v10_0_ring_emit_fence_kiq x3 for user fence, vm fence */
.emit_ib_size = 7, /* gfx_v10_0_ring_emit_ib_compute */
.emit_ib = gfx_v10_0_ring_emit_ib_compute,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index 1770e496c1b7ce..f00e05aba46a4e 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -1635,7 +1635,7 @@ static void gfx_v11_0_setup_rb(struct amdgpu_device *adev)
active_rb_bitmap |= (0x3 << (i * rb_bitmap_width_per_sa));
}
- active_rb_bitmap |= global_active_rb_bitmap;
+ active_rb_bitmap &= global_active_rb_bitmap;
adev->gfx.config.backend_enable_mask = active_rb_bitmap;
adev->gfx.config.num_rbs = hweight32(active_rb_bitmap);
}
@@ -5465,6 +5465,7 @@ static void gfx_v11_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
/* Make sure that we can't skip the SET_Q_MODE packets when the VM
* changed in any way.
*/
+ ring->set_q_mode_offs = 0;
ring->set_q_mode_ptr = NULL;
}
@@ -6191,7 +6192,7 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = {
7 + /* PIPELINE_SYNC */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
- 2 + /* VM_FLUSH */
+ 4 + /* VM_FLUSH */
8 + /* FENCE for VM_FLUSH */
20 + /* GDS switch */
5 + /* COND_EXEC */
@@ -6277,7 +6278,6 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_kiq = {
7 + /* gfx_v11_0_ring_emit_pipeline_sync */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
- 2 + /* gfx_v11_0_ring_emit_vm_flush */
8 + 8 + 8, /* gfx_v11_0_ring_emit_fence_kiq x3 for user fence, vm fence */
.emit_ib_size = 7, /* gfx_v11_0_ring_emit_ib_compute */
.emit_ib = gfx_v11_0_ring_emit_ib_compute,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 6f97a6d0e6d052..99dbd2341120db 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -6981,7 +6981,6 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
7 + /* gfx_v9_0_ring_emit_pipeline_sync */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
- 2 + /* gfx_v9_0_ring_emit_vm_flush */
8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
7 + /* gfx_v9_0_emit_mem_sync */
5 + /* gfx_v9_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */
@@ -7019,7 +7018,6 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
7 + /* gfx_v9_0_ring_emit_pipeline_sync */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
- 2 + /* gfx_v9_0_ring_emit_vm_flush */
8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
.emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
.emit_fence = gfx_v9_0_ring_emit_fence_kiq,
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
index 072c478665ade1..63f281a9984d98 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -411,8 +411,11 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes)
mes_set_hw_res_pkt.enable_reg_active_poll = 1;
mes_set_hw_res_pkt.enable_level_process_quantum_check = 1;
mes_set_hw_res_pkt.oversubscription_timer = 50;
- mes_set_hw_res_pkt.enable_mes_event_int_logging = 1;
- mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = mes->event_log_gpu_addr;
+ if (amdgpu_mes_log_enable) {
+ mes_set_hw_res_pkt.enable_mes_event_int_logging = 1;
+ mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr =
+ mes->event_log_gpu_addr;
+ }
return mes_v11_0_submit_pkt_and_poll_completion(mes,
&mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt),
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
index 34237a1b1f2e45..e708468ac54dd5 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
@@ -368,7 +368,8 @@ static void sdma_v4_4_2_ring_emit_hdp_flush(struct amdgpu_ring *ring)
u32 ref_and_mask = 0;
const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
- ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me;
+ ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0
+ << (ring->me % adev->sdma.num_inst_per_aid);
sdma_v4_4_2_wait_reg_mem(ring, 0, 1,
adev->nbio.funcs->get_hdp_flush_done_offset(adev),
@@ -1602,19 +1603,9 @@ static int sdma_v4_4_2_set_ecc_irq_state(struct amdgpu_device *adev,
u32 sdma_cntl;
sdma_cntl = RREG32_SDMA(type, regSDMA_CNTL);
- switch (state) {
- case AMDGPU_IRQ_STATE_DISABLE:
- sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA_CNTL,
- DRAM_ECC_INT_ENABLE, 0);
- WREG32_SDMA(type, regSDMA_CNTL, sdma_cntl);
- break;
- /* sdma ecc interrupt is enabled by default
- * driver doesn't need to do anything to
- * enable the interrupt */
- case AMDGPU_IRQ_STATE_ENABLE:
- default:
- break;
- }
+ sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA_CNTL, DRAM_ECC_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SDMA(type, regSDMA_CNTL, sdma_cntl);
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
index 42f4bd250def62..da01b524b9f2a9 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
@@ -280,17 +280,21 @@ static void sdma_v5_2_ring_emit_hdp_flush(struct amdgpu_ring *ring)
u32 ref_and_mask = 0;
const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
- ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me;
-
- amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
- SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) |
- SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */
- amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_done_offset(adev)) << 2);
- amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_req_offset(adev)) << 2);
- amdgpu_ring_write(ring, ref_and_mask); /* reference */
- amdgpu_ring_write(ring, ref_and_mask); /* mask */
- amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
- SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
+ if (ring->me > 1) {
+ amdgpu_asic_flush_hdp(adev, ring);
+ } else {
+ ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me;
+
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
+ SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) |
+ SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */
+ amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_done_offset(adev)) << 2);
+ amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_req_offset(adev)) << 2);
+ amdgpu_ring_write(ring, ref_and_mask); /* reference */
+ amdgpu_ring_write(ring, ref_and_mask); /* mask */
+ amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
+ SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
+ }
}
/**
diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c
index 581a3bd11481cc..43ca63fe85ac3b 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc21.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc21.c
@@ -457,10 +457,8 @@ static bool soc21_need_full_reset(struct amdgpu_device *adev)
{
switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(11, 0, 0):
- return amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC);
case IP_VERSION(11, 0, 2):
case IP_VERSION(11, 0, 3):
- return false;
default:
return true;
}
@@ -722,7 +720,10 @@ static int soc21_common_early_init(void *handle)
AMD_PG_SUPPORT_VCN |
AMD_PG_SUPPORT_JPEG |
AMD_PG_SUPPORT_GFX_PG;
- adev->external_rev_id = adev->rev_id + 0x1;
+ if (adev->rev_id == 0)
+ adev->external_rev_id = 0x1;
+ else
+ adev->external_rev_id = adev->rev_id + 0x10;
break;
case IP_VERSION(11, 5, 1):
adev->cg_flags =
@@ -869,10 +870,35 @@ static int soc21_common_suspend(void *handle)
return soc21_common_hw_fini(adev);
}
+static bool soc21_need_reset_on_resume(struct amdgpu_device *adev)
+{
+ u32 sol_reg1, sol_reg2;
+
+ /* Will reset for the following suspend abort cases.
+ * 1) Only reset dGPU side.
+ * 2) S3 suspend got aborted and TOS is active.
+ */
+ if (!(adev->flags & AMD_IS_APU) && adev->in_s3 &&
+ !adev->suspend_complete) {
+ sol_reg1 = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81);
+ msleep(100);
+ sol_reg2 = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81);
+
+ return (sol_reg1 != sol_reg2);
+ }
+
+ return false;
+}
+
static int soc21_common_resume(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ if (soc21_need_reset_on_resume(adev)) {
+ dev_info(adev->dev, "S3 suspend aborted, resetting...");
+ soc21_asic_reset(adev);
+ }
+
return soc21_common_hw_init(adev);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c b/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c
index 8e7b763cfdb7ef..bd57896ab85d56 100644
--- a/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c
@@ -60,7 +60,7 @@ static int umsch_mm_v4_0_load_microcode(struct amdgpu_umsch_mm *umsch)
umsch->cmd_buf_curr_ptr = umsch->cmd_buf_ptr;
- if (amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(4, 0, 5)) {
+ if (amdgpu_ip_version(adev, VCN_HWIP, 0) >= IP_VERSION(4, 0, 5)) {
WREG32_SOC15(VCN, 0, regUVD_IPX_DLDO_CONFIG,
1 << UVD_IPX_DLDO_CONFIG__ONO0_PWR_CONFIG__SHIFT);
SOC15_WAIT_ON_RREG(VCN, 0, regUVD_IPX_DLDO_STATUS,
@@ -225,6 +225,8 @@ static int umsch_mm_v4_0_ring_start(struct amdgpu_umsch_mm *umsch)
WREG32_SOC15(VCN, 0, regVCN_UMSCH_RB_SIZE, ring->ring_size);
+ ring->wptr = 0;
+
data = RREG32_SOC15(VCN, 0, regVCN_RB_ENABLE);
data &= ~(VCN_RB_ENABLE__AUDIO_RB_EN_MASK);
WREG32_SOC15(VCN, 0, regVCN_RB_ENABLE, data);
@@ -248,7 +250,7 @@ static int umsch_mm_v4_0_ring_stop(struct amdgpu_umsch_mm *umsch)
data = REG_SET_FIELD(data, VCN_UMSCH_RB_DB_CTRL, EN, 0);
WREG32_SOC15(VCN, 0, regVCN_UMSCH_RB_DB_CTRL, data);
- if (amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(4, 0, 5)) {
+ if (amdgpu_ip_version(adev, VCN_HWIP, 0) >= IP_VERSION(4, 0, 5)) {
WREG32_SOC15(VCN, 0, regUVD_IPX_DLDO_CONFIG,
2 << UVD_IPX_DLDO_CONFIG__ONO0_PWR_CONFIG__SHIFT);
SOC15_WAIT_ON_RREG(VCN, 0, regUVD_IPX_DLDO_STATUS,
@@ -271,6 +273,8 @@ static int umsch_mm_v4_0_set_hw_resources(struct amdgpu_umsch_mm *umsch)
set_hw_resources.vmid_mask_mm_vcn = umsch->vmid_mask_mm_vcn;
set_hw_resources.vmid_mask_mm_vpe = umsch->vmid_mask_mm_vpe;
+ set_hw_resources.collaboration_mask_vpe =
+ adev->vpe.collaborate_mode ? 0x3 : 0x0;
set_hw_resources.engine_mask = umsch->engine_mask;
set_hw_resources.vcn0_hqd_mask[0] = umsch->vcn0_hqd_mask;
@@ -346,6 +350,7 @@ static int umsch_mm_v4_0_add_queue(struct amdgpu_umsch_mm *umsch,
add_queue.h_queue = input_ptr->h_queue;
add_queue.vm_context_cntl = input_ptr->vm_context_cntl;
add_queue.is_context_suspended = input_ptr->is_context_suspended;
+ add_queue.collaboration_mode = adev->vpe.collaborate_mode ? 1 : 0;
add_queue.api_status.api_completion_fence_addr = umsch->ring.fence_drv.gpu_addr;
add_queue.api_status.api_completion_fence_value = ++umsch->ring.fence_drv.sync_seq;
diff --git a/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c b/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c
index 769eb8f7bb3c57..09315dd5a1ec95 100644
--- a/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c
@@ -144,6 +144,12 @@ static int vpe_v6_1_load_microcode(struct amdgpu_vpe *vpe)
WREG32(vpe_get_reg_offset(vpe, j, regVPEC_CNTL), ret);
}
+ /* setup collaborate mode */
+ vpe_v6_1_set_collaborate_mode(vpe, true);
+ /* setup DPM */
+ if (amdgpu_vpe_configure_dpm(vpe))
+ dev_warn(adev->dev, "VPE failed to enable DPM\n");
+
/*
* For VPE 6.1.1, still only need to add master's offset, and psp will apply it to slave as well.
* Here use instance 0 as master.
@@ -159,11 +165,7 @@ static int vpe_v6_1_load_microcode(struct amdgpu_vpe *vpe)
adev->vpe.cmdbuf_cpu_addr[0] = f32_offset;
adev->vpe.cmdbuf_cpu_addr[1] = f32_cntl;
- amdgpu_vpe_psp_update_sram(adev);
- vpe_v6_1_set_collaborate_mode(vpe, true);
- amdgpu_vpe_configure_dpm(vpe);
-
- return 0;
+ return amdgpu_vpe_psp_update_sram(adev);
}
vpe_hdr = (const struct vpe_firmware_header_v1_0 *)adev->vpe.fw->data;
@@ -196,8 +198,6 @@ static int vpe_v6_1_load_microcode(struct amdgpu_vpe *vpe)
}
vpe_v6_1_halt(vpe, false);
- vpe_v6_1_set_collaborate_mode(vpe, true);
- amdgpu_vpe_configure_dpm(vpe);
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index dfa8c69532d470..55aa74cbc5325e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -779,8 +779,8 @@ static int kfd_ioctl_get_process_apertures_new(struct file *filp,
* nodes, but not more than args->num_of_nodes as that is
* the amount of memory allocated by user
*/
- pa = kzalloc((sizeof(struct kfd_process_device_apertures) *
- args->num_of_nodes), GFP_KERNEL);
+ pa = kcalloc(args->num_of_nodes, sizeof(struct kfd_process_device_apertures),
+ GFP_KERNEL);
if (!pa)
return -ENOMEM;
@@ -1523,7 +1523,7 @@ static int kfd_ioctl_get_dmabuf_info(struct file *filep,
/* Find a KFD GPU device that supports the get_dmabuf_info query */
for (i = 0; kfd_topology_enum_kfd_devices(i, &dev) == 0; i++)
- if (dev)
+ if (dev && !kfd_devcgroup_check_permission(dev))
break;
if (!dev)
return -EINVAL;
@@ -1545,7 +1545,7 @@ static int kfd_ioctl_get_dmabuf_info(struct file *filep,
if (xcp_id >= 0)
args->gpu_id = dmabuf_adev->kfd.dev->nodes[xcp_id]->id;
else
- args->gpu_id = dmabuf_adev->kfd.dev->nodes[0]->id;
+ args->gpu_id = dev->id;
args->flags = flags;
/* Copy metadata buffer to user mode */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 041ec3de55e72f..719d6d365e1501 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -960,7 +960,6 @@ void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm)
{
struct kfd_node *node;
int i;
- int count;
if (!kfd->init_complete)
return;
@@ -968,12 +967,10 @@ void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm)
/* for runtime suspend, skip locking kfd */
if (!run_pm) {
mutex_lock(&kfd_processes_mutex);
- count = ++kfd_locked;
- mutex_unlock(&kfd_processes_mutex);
-
/* For first KFD device suspend all the KFD processes */
- if (count == 1)
+ if (++kfd_locked == 1)
kfd_suspend_all_processes();
+ mutex_unlock(&kfd_processes_mutex);
}
for (i = 0; i < kfd->num_nodes; i++) {
@@ -984,7 +981,7 @@ void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm)
int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
{
- int ret, count, i;
+ int ret, i;
if (!kfd->init_complete)
return 0;
@@ -998,12 +995,10 @@ int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
/* for runtime resume, skip unlocking kfd */
if (!run_pm) {
mutex_lock(&kfd_processes_mutex);
- count = --kfd_locked;
- mutex_unlock(&kfd_processes_mutex);
-
- WARN_ONCE(count < 0, "KFD suspend / resume ref. error");
- if (count == 0)
+ if (--kfd_locked == 0)
ret = kfd_resume_all_processes();
+ WARN_ONCE(kfd_locked < 0, "KFD suspend / resume ref. error");
+ mutex_unlock(&kfd_processes_mutex);
}
return ret;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index f4d395e38683db..0b655555e16786 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -2001,6 +2001,7 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
dev_err(dev, "HIQ MQD's queue_doorbell_id0 is not 0, Queue preemption time out\n");
while (halt_if_hws_hang)
schedule();
+ kfd_hws_hang(dqm);
return -ETIME;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
index 9a06c6fb660585..40a21be6c07c9b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
@@ -339,7 +339,8 @@ static void event_interrupt_wq_v10(struct kfd_node *dev,
break;
}
kfd_signal_event_interrupt(pasid, context_id0 & 0x7fffff, 23);
- } else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE) {
+ } else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE &&
+ KFD_DBG_EC_TYPE_IS_PACKET(KFD_DEBUG_CP_BAD_OP_ECODE(context_id0))) {
kfd_set_dbg_ev_from_interrupt(dev, pasid,
KFD_DEBUG_DOORBELL_ID(context_id0),
KFD_EC_MASK(KFD_DEBUG_CP_BAD_OP_ECODE(context_id0)),
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c
index 7e2859736a558f..fe2ad0c0de9543 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c
@@ -328,7 +328,8 @@ static void event_interrupt_wq_v11(struct kfd_node *dev,
/* CP */
if (source_id == SOC15_INTSRC_CP_END_OF_PIPE)
kfd_signal_event_interrupt(pasid, context_id0, 32);
- else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE)
+ else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE &&
+ KFD_DBG_EC_TYPE_IS_PACKET(KFD_CTXID0_CP_BAD_OP_ECODE(context_id0)))
kfd_set_dbg_ev_from_interrupt(dev, pasid,
KFD_CTXID0_DOORBELL_ID(context_id0),
KFD_EC_MASK(KFD_CTXID0_CP_BAD_OP_ECODE(context_id0)),
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
index 91dd5e045b511d..c4c6a29052ac8f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
@@ -388,7 +388,8 @@ static void event_interrupt_wq_v9(struct kfd_node *dev,
break;
}
kfd_signal_event_interrupt(pasid, sq_int_data, 24);
- } else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE) {
+ } else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE &&
+ KFD_DBG_EC_TYPE_IS_PACKET(KFD_DEBUG_CP_BAD_OP_ECODE(context_id0))) {
kfd_set_dbg_ev_from_interrupt(dev, pasid,
KFD_DEBUG_DOORBELL_ID(context_id0),
KFD_EC_MASK(KFD_DEBUG_CP_BAD_OP_ECODE(context_id0)),
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index bdc01ca9609a7e..5c8d81bfce7ab1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -509,10 +509,19 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
start = start_mgr << PAGE_SHIFT;
end = (last_mgr + 1) << PAGE_SHIFT;
+ r = amdgpu_amdkfd_reserve_mem_limit(node->adev,
+ prange->npages * PAGE_SIZE,
+ KFD_IOC_ALLOC_MEM_FLAGS_VRAM,
+ node->xcp ? node->xcp->id : 0);
+ if (r) {
+ dev_dbg(node->adev->dev, "failed to reserve VRAM, r: %ld\n", r);
+ return -ENOSPC;
+ }
+
r = svm_range_vram_node_new(node, prange, true);
if (r) {
dev_dbg(node->adev->dev, "fail %ld to alloc vram\n", r);
- return r;
+ goto out;
}
ttm_res_offset = (start_mgr - prange->start + prange->offset) << PAGE_SHIFT;
@@ -545,6 +554,11 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
svm_range_vram_node_free(prange);
}
+out:
+ amdgpu_amdkfd_unreserve_mem_limit(node->adev,
+ prange->npages * PAGE_SIZE,
+ KFD_IOC_ALLOC_MEM_FLAGS_VRAM,
+ node->xcp ? node->xcp->id : 0);
return r < 0 ? r : 0;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 42d40560cd30d7..a81ef232fdef96 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -1473,7 +1473,7 @@ static inline void kfd_flush_tlb(struct kfd_process_device *pdd,
static inline bool kfd_flush_tlb_after_unmap(struct kfd_dev *dev)
{
- return KFD_GC_VERSION(dev) > IP_VERSION(9, 4, 2) ||
+ return KFD_GC_VERSION(dev) >= IP_VERSION(9, 4, 2) ||
(KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 1) && dev->sdma_fw_version >= 18) ||
KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 0);
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 717a60d7a4ea95..58c1fe5421934d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -819,9 +819,9 @@ struct kfd_process *kfd_create_process(struct task_struct *thread)
mutex_lock(&kfd_processes_mutex);
if (kfd_is_locked()) {
- mutex_unlock(&kfd_processes_mutex);
pr_debug("KFD is locked! Cannot create process");
- return ERR_PTR(-EINVAL);
+ process = ERR_PTR(-EINVAL);
+ goto out;
}
/* A prior open of /dev/kfd could have already created the process. */
@@ -1922,6 +1922,8 @@ static int signal_eviction_fence(struct kfd_process *p)
rcu_read_lock();
ef = dma_fence_get_rcu_safe(&p->ef);
rcu_read_unlock();
+ if (!ef)
+ return -EINVAL;
ret = dma_fence_signal(ef);
dma_fence_put(ef);
@@ -1949,10 +1951,9 @@ static void evict_process_worker(struct work_struct *work)
* they are responsible stopping the queues and scheduling
* the restore work.
*/
- if (!signal_eviction_fence(p))
- queue_delayed_work(kfd_restore_wq, &p->restore_work,
- msecs_to_jiffies(PROCESS_RESTORE_TIME_MS));
- else
+ if (signal_eviction_fence(p) ||
+ mod_delayed_work(kfd_restore_wq, &p->restore_work,
+ msecs_to_jiffies(PROCESS_RESTORE_TIME_MS)))
kfd_process_restore_queues(p);
pr_debug("Finished evicting pasid 0x%x\n", p->pasid);
@@ -2011,9 +2012,9 @@ static void restore_process_worker(struct work_struct *work)
if (ret) {
pr_debug("Failed to restore BOs of pasid 0x%x, retry after %d ms\n",
p->pasid, PROCESS_BACK_OFF_TIME_MS);
- ret = queue_delayed_work(kfd_restore_wq, &p->restore_work,
- msecs_to_jiffies(PROCESS_BACK_OFF_TIME_MS));
- WARN(!ret, "reschedule restore work failed\n");
+ if (mod_delayed_work(kfd_restore_wq, &p->restore_work,
+ msecs_to_jiffies(PROCESS_RESTORE_TIME_MS)))
+ kfd_process_restore_queues(p);
}
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index f0f7f48af4137a..386875e6eb96ba 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -3426,7 +3426,7 @@ svm_range_trigger_migration(struct mm_struct *mm, struct svm_range *prange,
mm, KFD_MIGRATE_TRIGGER_PREFETCH);
*migrated = !r;
- return r;
+ return 0;
}
int svm_range_schedule_evict_svm_bo(struct amdgpu_amdkfd_fence *fence)
diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig
index 901d1961b73927..5fcd4f778dc3dd 100644
--- a/drivers/gpu/drm/amd/display/Kconfig
+++ b/drivers/gpu/drm/amd/display/Kconfig
@@ -8,7 +8,7 @@ config DRM_AMD_DC
depends on BROKEN || !CC_IS_CLANG || ARM64 || RISCV || SPARC64 || X86_64
select SND_HDA_COMPONENT if SND_HDA_CORE
# !CC_IS_CLANG: https://github.com/ClangBuiltLinux/linux/issues/1752
- select DRM_AMD_DC_FP if (X86 || LOONGARCH || (PPC64 && ALTIVEC) || (ARM64 && KERNEL_MODE_NEON && !CC_IS_CLANG))
+ select DRM_AMD_DC_FP if ARCH_HAS_KERNEL_FPU_SUPPORT && (!ARM64 || !CC_IS_CLANG)
help
Choose this option if you want to use the new display engine
support for AMDGPU. This adds required support for Vega and
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 2851719d712161..f3f94d109726d3 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -148,6 +148,9 @@ MODULE_FIRMWARE(FIRMWARE_NAVI12_DMCU);
#define FIRMWARE_DCN_35_DMUB "amdgpu/dcn_3_5_dmcub.bin"
MODULE_FIRMWARE(FIRMWARE_DCN_35_DMUB);
+#define FIRMWARE_DCN_351_DMUB "amdgpu/dcn_3_5_1_dmcub.bin"
+MODULE_FIRMWARE(FIRMWARE_DCN_351_DMUB);
+
/* Number of bytes in PSP header for firmware. */
#define PSP_HEADER_BYTES 0x100
@@ -3026,6 +3029,7 @@ static int dm_resume(void *handle)
dc_stream_release(dm_new_crtc_state->stream);
dm_new_crtc_state->stream = NULL;
}
+ dm_new_crtc_state->base.color_mgmt_changed = true;
}
for_each_new_plane_in_state(dm->cached_state, plane, new_plane_state, i) {
@@ -3044,6 +3048,10 @@ static int dm_resume(void *handle)
/* Do mst topology probing after resuming cached state*/
drm_connector_list_iter_begin(ddev, &iter);
drm_for_each_connector_iter(connector, &iter) {
+
+ if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+ continue;
+
aconnector = to_amdgpu_dm_connector(connector);
if (aconnector->dc_link->type != dc_connection_mst_branch ||
aconnector->mst_root)
@@ -4820,9 +4828,11 @@ static int dm_init_microcode(struct amdgpu_device *adev)
fw_name_dmub = FIRMWARE_DCN_V3_2_1_DMCUB;
break;
case IP_VERSION(3, 5, 0):
- case IP_VERSION(3, 5, 1):
fw_name_dmub = FIRMWARE_DCN_35_DMUB;
break;
+ case IP_VERSION(3, 5, 1):
+ fw_name_dmub = FIRMWARE_DCN_351_DMUB;
+ break;
default:
/* ASIC doesn't support DMUB. */
return 0;
@@ -5921,6 +5931,9 @@ get_highest_refresh_rate_mode(struct amdgpu_dm_connector *aconnector,
&aconnector->base.probed_modes :
&aconnector->base.modes;
+ if (aconnector->base.connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+ return NULL;
+
if (aconnector->freesync_vid_base.clock != 0)
return &aconnector->freesync_vid_base;
@@ -6305,27 +6318,22 @@ create_stream_for_sink(struct drm_connector *connector,
if (stream->signal == SIGNAL_TYPE_HDMI_TYPE_A)
mod_build_hf_vsif_infopacket(stream, &stream->vsp_infopacket);
- else if (stream->signal == SIGNAL_TYPE_DISPLAY_PORT ||
- stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST ||
- stream->signal == SIGNAL_TYPE_EDP) {
+
+ if (stream->signal == SIGNAL_TYPE_DISPLAY_PORT ||
+ stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST ||
+ stream->signal == SIGNAL_TYPE_EDP) {
//
// should decide stream support vsc sdp colorimetry capability
// before building vsc info packet
//
- stream->use_vsc_sdp_for_colorimetry = false;
- if (aconnector->dc_sink->sink_signal == SIGNAL_TYPE_DISPLAY_PORT_MST) {
- stream->use_vsc_sdp_for_colorimetry =
- aconnector->dc_sink->is_vsc_sdp_colorimetry_supported;
- } else {
- if (stream->link->dpcd_caps.dprx_feature.bits.VSC_SDP_COLORIMETRY_SUPPORTED)
- stream->use_vsc_sdp_for_colorimetry = true;
- }
+ stream->use_vsc_sdp_for_colorimetry = stream->link->dpcd_caps.dpcd_rev.raw >= 0x14 &&
+ stream->link->dpcd_caps.dprx_feature.bits.VSC_SDP_COLORIMETRY_SUPPORTED;
+
if (stream->out_transfer_func->tf == TRANSFER_FUNCTION_GAMMA22)
tf = TRANSFER_FUNC_GAMMA_22;
mod_build_vsc_infopacket(stream, &stream->vsc_infopacket, stream->output_color_space, tf);
+ aconnector->psr_skip_count = AMDGPU_DM_PSR_ENTRY_DELAY;
- if (stream->link->psr_settings.psr_feature_enabled)
- aconnector->psr_skip_count = AMDGPU_DM_PSR_ENTRY_DELAY;
}
finish:
dc_sink_release(sink);
@@ -8764,10 +8772,10 @@ static void amdgpu_dm_commit_audio(struct drm_device *dev,
if (!drm_atomic_crtc_needs_modeset(new_crtc_state))
continue;
+notify:
if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
continue;
-notify:
aconnector = to_amdgpu_dm_connector(connector);
mutex_lock(&adev->dm.audio_lock);
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c
index 1f08c6564c3bfe..286ecd28cc6e66 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c
@@ -141,9 +141,8 @@ bool amdgpu_dm_link_setup_psr(struct dc_stream_state *stream)
* amdgpu_dm_psr_enable() - enable psr f/w
* @stream: stream state
*
- * Return: true if success
*/
-bool amdgpu_dm_psr_enable(struct dc_stream_state *stream)
+void amdgpu_dm_psr_enable(struct dc_stream_state *stream)
{
struct dc_link *link = stream->link;
unsigned int vsync_rate_hz = 0;
@@ -190,7 +189,10 @@ bool amdgpu_dm_psr_enable(struct dc_stream_state *stream)
if (link->psr_settings.psr_version < DC_PSR_VERSION_SU_1)
power_opt |= psr_power_opt_z10_static_screen;
- return dc_link_set_psr_allow_active(link, &psr_enable, false, false, &power_opt);
+ dc_link_set_psr_allow_active(link, &psr_enable, false, false, &power_opt);
+
+ if (link->ctx->dc->caps.ips_support)
+ dc_allow_idle_optimizations(link->ctx->dc, true);
}
/*
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.h
index 6806b3c9c84ba0..1fdfd183c0d91a 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.h
@@ -32,7 +32,7 @@
#define AMDGPU_DM_PSR_ENTRY_DELAY 5
void amdgpu_dm_set_psr_caps(struct dc_link *link);
-bool amdgpu_dm_psr_enable(struct dc_stream_state *stream);
+void amdgpu_dm_psr_enable(struct dc_stream_state *stream);
bool amdgpu_dm_link_setup_psr(struct dc_stream_state *stream);
bool amdgpu_dm_psr_disable(struct dc_stream_state *stream);
bool amdgpu_dm_psr_disable_all(struct amdgpu_display_manager *dm);
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c
index 16e72d623630ca..08c494a7a21bad 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c
@@ -76,10 +76,8 @@ static int amdgpu_dm_wb_encoder_atomic_check(struct drm_encoder *encoder,
static int amdgpu_dm_wb_connector_get_modes(struct drm_connector *connector)
{
- struct drm_device *dev = connector->dev;
-
- return drm_add_modes_noedid(connector, dev->mode_config.max_width,
- dev->mode_config.max_height);
+ /* Maximum resolution supported by DWB */
+ return drm_add_modes_noedid(connector, 3840, 2160);
}
static int amdgpu_dm_wb_prepare_job(struct drm_writeback_connector *wb_connector,
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
index 4ae4720535a56d..e46f8ce41d871c 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
@@ -26,16 +26,7 @@
#include "dc_trace.h"
-#if defined(CONFIG_X86)
-#include <asm/fpu/api.h>
-#elif defined(CONFIG_PPC64)
-#include <asm/switch_to.h>
-#include <asm/cputable.h>
-#elif defined(CONFIG_ARM64)
-#include <asm/neon.h>
-#elif defined(CONFIG_LOONGARCH)
-#include <asm/fpu.h>
-#endif
+#include <linux/fpu.h>
/**
* DOC: DC FPU manipulation overview
@@ -87,20 +78,9 @@ void dc_fpu_begin(const char *function_name, const int line)
WARN_ON_ONCE(!in_task());
preempt_disable();
depth = __this_cpu_inc_return(fpu_recursion_depth);
-
if (depth == 1) {
-#if defined(CONFIG_X86) || defined(CONFIG_LOONGARCH)
+ BUG_ON(!kernel_fpu_available());
kernel_fpu_begin();
-#elif defined(CONFIG_PPC64)
- if (cpu_has_feature(CPU_FTR_VSX_COMP))
- enable_kernel_vsx();
- else if (cpu_has_feature(CPU_FTR_ALTIVEC_COMP))
- enable_kernel_altivec();
- else if (!cpu_has_feature(CPU_FTR_FPU_UNAVAILABLE))
- enable_kernel_fp();
-#elif defined(CONFIG_ARM64)
- kernel_neon_begin();
-#endif
}
TRACE_DCN_FPU(true, function_name, line, depth);
@@ -122,18 +102,7 @@ void dc_fpu_end(const char *function_name, const int line)
depth = __this_cpu_dec_return(fpu_recursion_depth);
if (depth == 0) {
-#if defined(CONFIG_X86) || defined(CONFIG_LOONGARCH)
kernel_fpu_end();
-#elif defined(CONFIG_PPC64)
- if (cpu_has_feature(CPU_FTR_VSX_COMP))
- disable_kernel_vsx();
- else if (cpu_has_feature(CPU_FTR_ALTIVEC_COMP))
- disable_kernel_altivec();
- else if (!cpu_has_feature(CPU_FTR_FPU_UNAVAILABLE))
- disable_kernel_fp();
-#elif defined(CONFIG_ARM64)
- kernel_neon_end();
-#endif
} else {
WARN_ON_ONCE(depth < 0);
}
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c
index 12f3e8aa46d8df..6ad4f4efec5dd3 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c
@@ -99,20 +99,25 @@ static int dcn316_get_active_display_cnt_wa(
return display_count;
}
-static void dcn316_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state *context, bool disable)
+static void dcn316_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state *context,
+ bool safe_to_lower, bool disable)
{
struct dc *dc = clk_mgr_base->ctx->dc;
int i;
for (i = 0; i < dc->res_pool->pipe_count; ++i) {
- struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i];
+ struct pipe_ctx *pipe = safe_to_lower
+ ? &context->res_ctx.pipe_ctx[i]
+ : &dc->current_state->res_ctx.pipe_ctx[i];
if (pipe->top_pipe || pipe->prev_odm_pipe)
continue;
- if (pipe->stream && (pipe->stream->dpms_off || pipe->plane_state == NULL ||
- dc_is_virtual_signal(pipe->stream->signal))) {
+ if (pipe->stream && (pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal) ||
+ !pipe->stream->link_enc)) {
if (disable) {
- pipe->stream_res.tg->funcs->immediate_disable_crtc(pipe->stream_res.tg);
+ if (pipe->stream_res.tg && pipe->stream_res.tg->funcs->immediate_disable_crtc)
+ pipe->stream_res.tg->funcs->immediate_disable_crtc(pipe->stream_res.tg);
+
reset_sync_context_for_pipe(dc, context, i);
} else
pipe->stream_res.tg->funcs->enable_crtc(pipe->stream_res.tg);
@@ -207,11 +212,11 @@ static void dcn316_update_clocks(struct clk_mgr *clk_mgr_base,
}
if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz)) {
- dcn316_disable_otg_wa(clk_mgr_base, context, true);
+ dcn316_disable_otg_wa(clk_mgr_base, context, safe_to_lower, true);
clk_mgr_base->clks.dispclk_khz = new_clocks->dispclk_khz;
dcn316_smu_set_dispclk(clk_mgr, clk_mgr_base->clks.dispclk_khz);
- dcn316_disable_otg_wa(clk_mgr_base, context, false);
+ dcn316_disable_otg_wa(clk_mgr_base, context, safe_to_lower, false);
update_dispclk = true;
}
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
index c378b879c76d8c..d9c5692c86c21a 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
@@ -73,6 +73,14 @@
#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_SEL_MASK 0x00000007L
#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_DIV_MASK 0x000F0000L
+#define regCLK5_0_CLK5_spll_field_8 0x464b
+#define regCLK5_0_CLK5_spll_field_8_BASE_IDX 0
+
+#define CLK5_0_CLK5_spll_field_8__spll_ssc_en__SHIFT 0xd
+#define CLK5_0_CLK5_spll_field_8__spll_ssc_en_MASK 0x00002000L
+
+#define SMU_VER_THRESHOLD 0x5D4A00 //93.74.0
+
#define REG(reg_name) \
(ctx->clk_reg_offsets[reg ## reg_name ## _BASE_IDX] + reg ## reg_name)
@@ -409,11 +417,25 @@ static void dcn35_dump_clk_registers(struct clk_state_registers_and_bypass *regs
{
}
+static bool dcn35_is_spll_ssc_enabled(struct clk_mgr *clk_mgr_base)
+{
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+ struct dc_context *ctx = clk_mgr->base.ctx;
+ uint32_t ssc_enable;
+
+ REG_GET(CLK5_0_CLK5_spll_field_8, spll_ssc_en, &ssc_enable);
+
+ return ssc_enable == 1;
+}
+
static void init_clk_states(struct clk_mgr *clk_mgr)
{
+ struct clk_mgr_internal *clk_mgr_int = TO_CLK_MGR_INTERNAL(clk_mgr);
uint32_t ref_dtbclk = clk_mgr->clks.ref_dtbclk_khz;
memset(&(clk_mgr->clks), 0, sizeof(struct dc_clocks));
+ if (clk_mgr_int->smu_ver >= SMU_VER_THRESHOLD)
+ clk_mgr->clks.dtbclk_en = true; // request DTBCLK disable on first commit
clk_mgr->clks.ref_dtbclk_khz = ref_dtbclk; // restore ref_dtbclk
clk_mgr->clks.p_state_change_support = true;
clk_mgr->clks.prev_p_state_change_support = true;
@@ -423,7 +445,16 @@ static void init_clk_states(struct clk_mgr *clk_mgr)
void dcn35_init_clocks(struct clk_mgr *clk_mgr)
{
+ struct clk_mgr_internal *clk_mgr_int = TO_CLK_MGR_INTERNAL(clk_mgr);
init_clk_states(clk_mgr);
+
+ // to adjust dp_dto reference clock if ssc is enable otherwise to apply dprefclk
+ if (dcn35_is_spll_ssc_enabled(clk_mgr))
+ clk_mgr->dp_dto_source_clock_in_khz =
+ dce_adjust_dp_ref_freq_for_ss(clk_mgr_int, clk_mgr->dprefclk_khz);
+ else
+ clk_mgr->dp_dto_source_clock_in_khz = clk_mgr->dprefclk_khz;
+
}
static struct clk_bw_params dcn35_bw_params = {
.vram_type = Ddr4MemType,
@@ -512,6 +543,28 @@ static DpmClocks_t_dcn35 dummy_clocks;
static struct dcn35_watermarks dummy_wms = { 0 };
+static struct dcn35_ss_info_table ss_info_table = {
+ .ss_divider = 1000,
+ .ss_percentage = {0, 0, 375, 375, 375}
+};
+
+static void dcn35_read_ss_info_from_lut(struct clk_mgr_internal *clk_mgr)
+{
+ struct dc_context *ctx = clk_mgr->base.ctx;
+ uint32_t clock_source;
+
+ REG_GET(CLK1_CLK2_BYPASS_CNTL, CLK2_BYPASS_SEL, &clock_source);
+ // If it's DFS mode, clock_source is 0.
+ if (dcn35_is_spll_ssc_enabled(&clk_mgr->base) && (clock_source < ARRAY_SIZE(ss_info_table.ss_percentage))) {
+ clk_mgr->dprefclk_ss_percentage = ss_info_table.ss_percentage[clock_source];
+
+ if (clk_mgr->dprefclk_ss_percentage != 0) {
+ clk_mgr->ss_on_dprefclk = true;
+ clk_mgr->dprefclk_ss_divider = ss_info_table.ss_divider;
+ }
+ }
+}
+
static void dcn35_build_watermark_ranges(struct clk_bw_params *bw_params, struct dcn35_watermarks *table)
{
int i, num_valid_sets;
@@ -709,7 +762,7 @@ static void dcn35_clk_mgr_helper_populate_bw_params(struct clk_mgr_internal *clk
clock_table->NumFclkLevelsEnabled;
max_fclk = find_max_clk_value(clock_table->FclkClocks_Freq, num_fclk);
- num_dcfclk = (clock_table->NumFclkLevelsEnabled > NUM_DCFCLK_DPM_LEVELS) ? NUM_DCFCLK_DPM_LEVELS :
+ num_dcfclk = (clock_table->NumDcfClkLevelsEnabled > NUM_DCFCLK_DPM_LEVELS) ? NUM_DCFCLK_DPM_LEVELS :
clock_table->NumDcfClkLevelsEnabled;
for (i = 0; i < num_dcfclk; i++) {
int j;
@@ -1056,6 +1109,8 @@ void dcn35_clk_mgr_construct(
dce_clock_read_ss_info(&clk_mgr->base);
/*when clk src is from FCH, it could have ss, same clock src as DPREF clk*/
+ dcn35_read_ss_info_from_lut(&clk_mgr->base);
+
clk_mgr->base.base.bw_params = &dcn35_bw_params;
if (clk_mgr->base.base.ctx->dc->debug.pstate_enabled) {
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index e7dc128f6284b4..03b554e912a20d 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -3024,7 +3024,8 @@ static void backup_planes_and_stream_state(
scratch->blend_tf[i] = *status->plane_states[i]->blend_tf;
}
scratch->stream_state = *stream;
- scratch->out_transfer_func = *stream->out_transfer_func;
+ if (stream->out_transfer_func)
+ scratch->out_transfer_func = *stream->out_transfer_func;
}
static void restore_planes_and_stream_state(
@@ -3046,7 +3047,8 @@ static void restore_planes_and_stream_state(
*status->plane_states[i]->blend_tf = scratch->blend_tf[i];
}
*stream = scratch->stream_state;
- *stream->out_transfer_func = scratch->out_transfer_func;
+ if (stream->out_transfer_func)
+ *stream->out_transfer_func = scratch->out_transfer_func;
}
static bool update_planes_and_stream_state(struct dc *dc,
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_state.c b/drivers/gpu/drm/amd/display/dc/core/dc_state.c
index 5cc7f8da209c59..61986e5cb49196 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_state.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_state.c
@@ -436,6 +436,15 @@ bool dc_state_add_plane(
goto out;
}
+ if (stream_status->plane_count == 0 && dc->config.enable_windowed_mpo_odm)
+ /* ODM combine could prevent us from supporting more planes
+ * we will reset ODM slice count back to 1 when all planes have
+ * been removed to maximize the amount of planes supported when
+ * new planes are added.
+ */
+ resource_update_pipes_for_stream_with_slice_count(
+ state, dc->current_state, dc->res_pool, stream, 1);
+
otg_master_pipe = resource_get_otg_master_for_stream(
&state->res_ctx, stream);
if (otg_master_pipe)
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
index 970644b695cd4f..b5e0289d2fe82a 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
@@ -976,7 +976,10 @@ static bool dcn31_program_pix_clk(
struct bp_pixel_clock_parameters bp_pc_params = {0};
enum transmitter_color_depth bp_pc_colour_depth = TRANSMITTER_COLOR_DEPTH_24;
- if (clock_source->ctx->dc->clk_mgr->dp_dto_source_clock_in_khz != 0)
+ // Apply ssed(spread spectrum) dpref clock for edp only.
+ if (clock_source->ctx->dc->clk_mgr->dp_dto_source_clock_in_khz != 0
+ && pix_clk_params->signal_type == SIGNAL_TYPE_EDP
+ && encoding == DP_8b_10b_ENCODING)
dp_dto_ref_khz = clock_source->ctx->dc->clk_mgr->dp_dto_source_clock_in_khz;
// For these signal types Driver to program DP_DTO without calling VBIOS Command table
if (dc_is_dp_signal(pix_clk_params->signal_type) || dc_is_virtual_signal(pix_clk_params->signal_type)) {
@@ -1093,9 +1096,6 @@ static bool get_pixel_clk_frequency_100hz(
unsigned int modulo_hz = 0;
unsigned int dp_dto_ref_khz = clock_source->ctx->dc->clk_mgr->dprefclk_khz;
- if (clock_source->ctx->dc->clk_mgr->dp_dto_source_clock_in_khz != 0)
- dp_dto_ref_khz = clock_source->ctx->dc->clk_mgr->dp_dto_source_clock_in_khz;
-
if (clock_source->id == CLOCK_SOURCE_ID_DP_DTO) {
clock_hz = REG_READ(PHASE[inst]);
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/Makefile b/drivers/gpu/drm/amd/display/dc/dce110/Makefile
index f0777d61c2cbb6..c307f040e48fc6 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dce110/Makefile
@@ -23,7 +23,7 @@
# Makefile for the 'controller' sub-component of DAL.
# It provides the control and status of HW CRTC block.
-CFLAGS_$(AMDDALPATH)/dc/dce110/dce110_resource.o = $(call cc-disable-warning, override-init)
+CFLAGS_$(AMDDALPATH)/dc/dce110/dce110_resource.o = -Wno-override-init
DCE110 = dce110_timing_generator.o \
dce110_compressor.o dce110_opp_regamma_v.o \
diff --git a/drivers/gpu/drm/amd/display/dc/dce112/Makefile b/drivers/gpu/drm/amd/display/dc/dce112/Makefile
index 7e92effec89447..683866797709ba 100644
--- a/drivers/gpu/drm/amd/display/dc/dce112/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dce112/Makefile
@@ -23,7 +23,7 @@
# Makefile for the 'controller' sub-component of DAL.
# It provides the control and status of HW CRTC block.
-CFLAGS_$(AMDDALPATH)/dc/dce112/dce112_resource.o = $(call cc-disable-warning, override-init)
+CFLAGS_$(AMDDALPATH)/dc/dce112/dce112_resource.o = -Wno-override-init
DCE112 = dce112_compressor.o
diff --git a/drivers/gpu/drm/amd/display/dc/dce120/Makefile b/drivers/gpu/drm/amd/display/dc/dce120/Makefile
index 1e3ef68a452a56..8f508e66274805 100644
--- a/drivers/gpu/drm/amd/display/dc/dce120/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dce120/Makefile
@@ -24,7 +24,7 @@
# It provides the control and status of HW CRTC block.
-CFLAGS_$(AMDDALPATH)/dc/dce120/dce120_resource.o = $(call cc-disable-warning, override-init)
+CFLAGS_$(AMDDALPATH)/dc/dce120/dce120_resource.o = -Wno-override-init
DCE120 = dce120_timing_generator.o
diff --git a/drivers/gpu/drm/amd/display/dc/dce60/Makefile b/drivers/gpu/drm/amd/display/dc/dce60/Makefile
index fee331accc0e7c..eede83ad91fa0d 100644
--- a/drivers/gpu/drm/amd/display/dc/dce60/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dce60/Makefile
@@ -23,7 +23,7 @@
# Makefile for the 'controller' sub-component of DAL.
# It provides the control and status of HW CRTC block.
-CFLAGS_$(AMDDALPATH)/dc/dce60/dce60_resource.o = $(call cc-disable-warning, override-init)
+CFLAGS_$(AMDDALPATH)/dc/dce60/dce60_resource.o = -Wno-override-init
DCE60 = dce60_timing_generator.o dce60_hw_sequencer.o \
dce60_resource.o
diff --git a/drivers/gpu/drm/amd/display/dc/dce80/Makefile b/drivers/gpu/drm/amd/display/dc/dce80/Makefile
index 7eefffbdc9253f..fba189d26652d6 100644
--- a/drivers/gpu/drm/amd/display/dc/dce80/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dce80/Makefile
@@ -23,7 +23,7 @@
# Makefile for the 'controller' sub-component of DAL.
# It provides the control and status of HW CRTC block.
-CFLAGS_$(AMDDALPATH)/dc/dce80/dce80_resource.o = $(call cc-disable-warning, override-init)
+CFLAGS_$(AMDDALPATH)/dc/dce80/dce80_resource.o = -Wno-override-init
DCE80 = dce80_timing_generator.o
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c
index bf3386cd444d62..5ebb5730313048 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c
@@ -44,6 +44,36 @@
#define NUM_ELEMENTS(a) (sizeof(a) / sizeof((a)[0]))
+void mpc3_mpc_init(struct mpc *mpc)
+{
+ struct dcn30_mpc *mpc30 = TO_DCN30_MPC(mpc);
+ int opp_id;
+
+ mpc1_mpc_init(mpc);
+
+ for (opp_id = 0; opp_id < MAX_OPP; opp_id++) {
+ if (REG(MUX[opp_id]))
+ /* disable mpc out rate and flow control */
+ REG_UPDATE_2(MUX[opp_id], MPC_OUT_RATE_CONTROL_DISABLE,
+ 1, MPC_OUT_FLOW_CONTROL_COUNT, 0);
+ }
+}
+
+void mpc3_mpc_init_single_inst(struct mpc *mpc, unsigned int mpcc_id)
+{
+ struct dcn30_mpc *mpc30 = TO_DCN30_MPC(mpc);
+
+ mpc1_mpc_init_single_inst(mpc, mpcc_id);
+
+ /* assuming mpc out mux is connected to opp with the same index at this
+ * point in time (e.g. transitioning from vbios to driver)
+ */
+ if (mpcc_id < MAX_OPP && REG(MUX[mpcc_id]))
+ /* disable mpc out rate and flow control */
+ REG_UPDATE_2(MUX[mpcc_id], MPC_OUT_RATE_CONTROL_DISABLE,
+ 1, MPC_OUT_FLOW_CONTROL_COUNT, 0);
+}
+
bool mpc3_is_dwb_idle(
struct mpc *mpc,
int dwb_id)
@@ -80,25 +110,6 @@ void mpc3_disable_dwb_mux(
MPC_DWB0_MUX, 0xf);
}
-void mpc3_set_out_rate_control(
- struct mpc *mpc,
- int opp_id,
- bool enable,
- bool rate_2x_mode,
- struct mpc_dwb_flow_control *flow_control)
-{
- struct dcn30_mpc *mpc30 = TO_DCN30_MPC(mpc);
-
- REG_UPDATE_2(MUX[opp_id],
- MPC_OUT_RATE_CONTROL_DISABLE, !enable,
- MPC_OUT_RATE_CONTROL, rate_2x_mode);
-
- if (flow_control)
- REG_UPDATE_2(MUX[opp_id],
- MPC_OUT_FLOW_CONTROL_MODE, flow_control->flow_ctrl_mode,
- MPC_OUT_FLOW_CONTROL_COUNT, flow_control->flow_ctrl_cnt1);
-}
-
enum dc_lut_mode mpc3_get_ogam_current(struct mpc *mpc, int mpcc_id)
{
/*Contrary to DCN2 and DCN1 wherein a single status register field holds this info;
@@ -1490,8 +1501,8 @@ static const struct mpc_funcs dcn30_mpc_funcs = {
.read_mpcc_state = mpc3_read_mpcc_state,
.insert_plane = mpc1_insert_plane,
.remove_mpcc = mpc1_remove_mpcc,
- .mpc_init = mpc1_mpc_init,
- .mpc_init_single_inst = mpc1_mpc_init_single_inst,
+ .mpc_init = mpc3_mpc_init,
+ .mpc_init_single_inst = mpc3_mpc_init_single_inst,
.update_blending = mpc2_update_blending,
.cursor_lock = mpc1_cursor_lock,
.get_mpcc_for_dpp = mpc1_get_mpcc_for_dpp,
@@ -1508,7 +1519,6 @@ static const struct mpc_funcs dcn30_mpc_funcs = {
.set_dwb_mux = mpc3_set_dwb_mux,
.disable_dwb_mux = mpc3_disable_dwb_mux,
.is_dwb_idle = mpc3_is_dwb_idle,
- .set_out_rate_control = mpc3_set_out_rate_control,
.set_gamut_remap = mpc3_set_gamut_remap,
.program_shaper = mpc3_program_shaper,
.acquire_rmu = mpcc3_acquire_rmu,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.h b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.h
index 9cb96ae95a2f75..ce93003dae0113 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.h
@@ -1007,6 +1007,13 @@ void dcn30_mpc_construct(struct dcn30_mpc *mpc30,
int num_mpcc,
int num_rmu);
+void mpc3_mpc_init(
+ struct mpc *mpc);
+
+void mpc3_mpc_init_single_inst(
+ struct mpc *mpc,
+ unsigned int mpcc_id);
+
bool mpc3_program_shaper(
struct mpc *mpc,
const struct pwl_params *params,
@@ -1078,13 +1085,6 @@ bool mpc3_is_dwb_idle(
struct mpc *mpc,
int dwb_id);
-void mpc3_set_out_rate_control(
- struct mpc *mpc,
- int opp_id,
- bool enable,
- bool rate_2x_mode,
- struct mpc_dwb_flow_control *flow_control);
-
void mpc3_power_on_ogam_lut(
struct mpc *mpc, int mpcc_id,
bool power_on);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_link_encoder.c
index e224a028d68acc..8a0460e8630977 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_link_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_link_encoder.c
@@ -248,14 +248,12 @@ void dcn32_link_encoder_construct(
enc10->base.hpd_source = init_data->hpd_source;
enc10->base.connector = init_data->connector;
- enc10->base.preferred_engine = ENGINE_ID_UNKNOWN;
-
- enc10->base.features = *enc_features;
if (enc10->base.connector.id == CONNECTOR_ID_USBC)
enc10->base.features.flags.bits.DP_IS_USB_C = 1;
- if (enc10->base.connector.id == CONNECTOR_ID_USBC)
- enc10->base.features.flags.bits.DP_IS_USB_C = 1;
+ enc10->base.preferred_engine = ENGINE_ID_UNKNOWN;
+
+ enc10->base.features = *enc_features;
enc10->base.transmitter = init_data->transmitter;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c
index e789e654c38705..e408e859b35561 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c
@@ -47,7 +47,7 @@ void mpc32_mpc_init(struct mpc *mpc)
struct dcn30_mpc *mpc30 = TO_DCN30_MPC(mpc);
int mpcc_id;
- mpc1_mpc_init(mpc);
+ mpc3_mpc_init(mpc);
if (mpc->ctx->dc->debug.enable_mem_low_power.bits.mpc) {
if (mpc30->mpc_mask->MPCC_MCM_SHAPER_MEM_LOW_PWR_MODE && mpc30->mpc_mask->MPCC_MCM_3DLUT_MEM_LOW_PWR_MODE) {
@@ -991,7 +991,7 @@ static const struct mpc_funcs dcn32_mpc_funcs = {
.insert_plane = mpc1_insert_plane,
.remove_mpcc = mpc1_remove_mpcc,
.mpc_init = mpc32_mpc_init,
- .mpc_init_single_inst = mpc1_mpc_init_single_inst,
+ .mpc_init_single_inst = mpc3_mpc_init_single_inst,
.update_blending = mpc2_update_blending,
.cursor_lock = mpc1_cursor_lock,
.get_mpcc_for_dpp = mpc1_get_mpcc_for_dpp,
@@ -1008,7 +1008,6 @@ static const struct mpc_funcs dcn32_mpc_funcs = {
.set_dwb_mux = mpc3_set_dwb_mux,
.disable_dwb_mux = mpc3_disable_dwb_mux,
.is_dwb_idle = mpc3_is_dwb_idle,
- .set_out_rate_control = mpc3_set_out_rate_control,
.set_gamut_remap = mpc3_set_gamut_remap,
.program_shaper = mpc32_program_shaper,
.program_3dlut = mpc32_program_3dlut,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_link_encoder.c
index 81e349d5835bbe..da94e5309fbaf0 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_link_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_link_encoder.c
@@ -184,6 +184,8 @@ void dcn35_link_encoder_construct(
enc10->base.hpd_source = init_data->hpd_source;
enc10->base.connector = init_data->connector;
+ if (enc10->base.connector.id == CONNECTOR_ID_USBC)
+ enc10->base.features.flags.bits.DP_IS_USB_C = 1;
enc10->base.preferred_engine = ENGINE_ID_UNKNOWN;
@@ -238,8 +240,6 @@ void dcn35_link_encoder_construct(
}
enc10->base.features.flags.bits.HDMI_6GB_EN = 1;
- if (enc10->base.connector.id == CONNECTOR_ID_USBC)
- enc10->base.features.flags.bits.DP_IS_USB_C = 1;
if (bp_funcs->get_connector_speed_cap_info)
result = bp_funcs->get_connector_speed_cap_info(enc10->base.ctx->dc_bios,
diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile
index c4a5efd2dda51c..a94b6d546cd1a1 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile
@@ -25,40 +25,8 @@
# It provides the general basic services required by other DAL
# subcomponents.
-ifdef CONFIG_X86
-dml_ccflags-$(CONFIG_CC_IS_GCC) := -mhard-float
-dml_ccflags := $(dml_ccflags-y) -msse
-endif
-
-ifdef CONFIG_PPC64
-dml_ccflags := -mhard-float -maltivec
-endif
-
-ifdef CONFIG_ARM64
-dml_rcflags := -mgeneral-regs-only
-endif
-
-ifdef CONFIG_LOONGARCH
-dml_ccflags := -mfpu=64
-dml_rcflags := -msoft-float
-endif
-
-ifdef CONFIG_CC_IS_GCC
-ifneq ($(call gcc-min-version, 70100),y)
-IS_OLD_GCC = 1
-endif
-endif
-
-ifdef CONFIG_X86
-ifdef IS_OLD_GCC
-# Stack alignment mismatch, proceed with caution.
-# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
-# (8B stack alignment).
-dml_ccflags += -mpreferred-stack-boundary=4
-else
-dml_ccflags += -msse2
-endif
-endif
+dml_ccflags := $(CC_FLAGS_FPU)
+dml_rcflags := $(CC_FLAGS_NO_FPU)
ifneq ($(CONFIG_FRAME_WARN),0)
ifeq ($(filter y,$(CONFIG_KASAN)$(CONFIG_KCSAN)),y)
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c
index 80bebfc268db0f..21e0eef3269b10 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c
@@ -166,8 +166,8 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = {
.num_states = 5,
.sr_exit_time_us = 28.0,
.sr_enter_plus_exit_time_us = 30.0,
- .sr_exit_z8_time_us = 210.0,
- .sr_enter_plus_exit_z8_time_us = 320.0,
+ .sr_exit_z8_time_us = 250.0,
+ .sr_enter_plus_exit_z8_time_us = 350.0,
.fclk_change_latency_us = 24.0,
.usr_retraining_latency_us = 2,
.writeback_latency_us = 12.0,
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c
index dc9e1b758ed6a1..b3ffab77cf8894 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c
@@ -98,55 +98,114 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_51_soc = {
.clock_limits = {
{
.state = 0,
- .dispclk_mhz = 1200.0,
- .dppclk_mhz = 1200.0,
+ .dcfclk_mhz = 400.0,
+ .fabricclk_mhz = 400.0,
+ .socclk_mhz = 600.0,
+ .dram_speed_mts = 3200.0,
+ .dispclk_mhz = 600.0,
+ .dppclk_mhz = 600.0,
.phyclk_mhz = 600.0,
.phyclk_d18_mhz = 667.0,
- .dscclk_mhz = 186.0,
+ .dscclk_mhz = 200.0,
.dtbclk_mhz = 600.0,
},
{
.state = 1,
- .dispclk_mhz = 1200.0,
- .dppclk_mhz = 1200.0,
+ .dcfclk_mhz = 600.0,
+ .fabricclk_mhz = 1000.0,
+ .socclk_mhz = 733.0,
+ .dram_speed_mts = 6400.0,
+ .dispclk_mhz = 800.0,
+ .dppclk_mhz = 800.0,
.phyclk_mhz = 810.0,
.phyclk_d18_mhz = 667.0,
- .dscclk_mhz = 209.0,
+ .dscclk_mhz = 266.7,
.dtbclk_mhz = 600.0,
},
{
.state = 2,
- .dispclk_mhz = 1200.0,
- .dppclk_mhz = 1200.0,
+ .dcfclk_mhz = 738.0,
+ .fabricclk_mhz = 1200.0,
+ .socclk_mhz = 880.0,
+ .dram_speed_mts = 7500.0,
+ .dispclk_mhz = 800.0,
+ .dppclk_mhz = 800.0,
.phyclk_mhz = 810.0,
.phyclk_d18_mhz = 667.0,
- .dscclk_mhz = 209.0,
+ .dscclk_mhz = 266.7,
.dtbclk_mhz = 600.0,
},
{
.state = 3,
- .dispclk_mhz = 1200.0,
- .dppclk_mhz = 1200.0,
+ .dcfclk_mhz = 800.0,
+ .fabricclk_mhz = 1400.0,
+ .socclk_mhz = 978.0,
+ .dram_speed_mts = 7500.0,
+ .dispclk_mhz = 960.0,
+ .dppclk_mhz = 960.0,
.phyclk_mhz = 810.0,
.phyclk_d18_mhz = 667.0,
- .dscclk_mhz = 371.0,
+ .dscclk_mhz = 320.0,
.dtbclk_mhz = 600.0,
},
{
.state = 4,
+ .dcfclk_mhz = 873.0,
+ .fabricclk_mhz = 1600.0,
+ .socclk_mhz = 1100.0,
+ .dram_speed_mts = 8533.0,
+ .dispclk_mhz = 1066.7,
+ .dppclk_mhz = 1066.7,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 355.6,
+ .dtbclk_mhz = 600.0,
+ },
+ {
+ .state = 5,
+ .dcfclk_mhz = 960.0,
+ .fabricclk_mhz = 1700.0,
+ .socclk_mhz = 1257.0,
+ .dram_speed_mts = 8533.0,
.dispclk_mhz = 1200.0,
.dppclk_mhz = 1200.0,
.phyclk_mhz = 810.0,
.phyclk_d18_mhz = 667.0,
- .dscclk_mhz = 417.0,
+ .dscclk_mhz = 400.0,
+ .dtbclk_mhz = 600.0,
+ },
+ {
+ .state = 6,
+ .dcfclk_mhz = 1067.0,
+ .fabricclk_mhz = 1850.0,
+ .socclk_mhz = 1257.0,
+ .dram_speed_mts = 8533.0,
+ .dispclk_mhz = 1371.4,
+ .dppclk_mhz = 1371.4,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 457.1,
+ .dtbclk_mhz = 600.0,
+ },
+ {
+ .state = 7,
+ .dcfclk_mhz = 1200.0,
+ .fabricclk_mhz = 2000.0,
+ .socclk_mhz = 1467.0,
+ .dram_speed_mts = 8533.0,
+ .dispclk_mhz = 1600.0,
+ .dppclk_mhz = 1600.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 533.3,
.dtbclk_mhz = 600.0,
},
},
- .num_states = 5,
+ .num_states = 8,
.sr_exit_time_us = 28.0,
.sr_enter_plus_exit_time_us = 30.0,
- .sr_exit_z8_time_us = 210.0,
- .sr_enter_plus_exit_z8_time_us = 320.0,
+ .sr_exit_z8_time_us = 250.0,
+ .sr_enter_plus_exit_z8_time_us = 350.0,
.fclk_change_latency_us = 24.0,
.usr_retraining_latency_us = 2,
.writeback_latency_us = 12.0,
@@ -177,6 +236,9 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_51_soc = {
.do_urgent_latency_adjustment = 0,
.urgent_latency_adjustment_fabric_clock_component_us = 0,
.urgent_latency_adjustment_fabric_clock_reference_mhz = 0,
+ .num_chans = 4,
+ .dram_clock_change_latency_us = 11.72,
+ .dispclk_dppclk_vco_speed_mhz = 2400.0,
};
/*
@@ -340,6 +402,8 @@ void dcn351_update_bw_bounding_box_fpu(struct dc *dc,
clock_limits[i].socclk_mhz;
dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].memclk_mhz =
clk_table->entries[i].memclk_mhz * clk_table->entries[i].wck_ratio;
+ dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dtbclk_mhz =
+ clock_limits[i].dtbclk_mhz;
dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dcfclk_levels =
clk_table->num_entries;
dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_fclk_levels =
@@ -352,6 +416,8 @@ void dcn351_update_bw_bounding_box_fpu(struct dc *dc,
clk_table->num_entries;
dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_memclk_levels =
clk_table->num_entries;
+ dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dtbclk_levels =
+ clk_table->num_entries;
}
}
@@ -551,6 +617,7 @@ void dcn351_decide_zstate_support(struct dc *dc, struct dc_state *context)
if (context->res_ctx.pipe_ctx[i].plane_state)
plane_count++;
}
+
/*dcn351 does not support z9/z10*/
if (context->stream_count == 0 || plane_count == 0) {
support = DCN_ZSTATE_SUPPORT_ALLOW_Z8_ONLY;
@@ -564,11 +631,9 @@ void dcn351_decide_zstate_support(struct dc *dc, struct dc_state *context)
dc->debug.minimum_z8_residency_time > 0 ? dc->debug.minimum_z8_residency_time : 1000;
bool allow_z8 = context->bw_ctx.dml.vba.StutterPeriod > (double)minmum_z8_residency;
-
/*for psr1/psr-su, we allow z8 and z10 based on latency, for replay with IPS enabled, it will enter ips2*/
- if (is_pwrseq0 && (is_psr || is_replay))
+ if (is_pwrseq0 && (is_psr || is_replay))
support = allow_z8 ? allow_z8 : DCN_ZSTATE_SUPPORT_DISALLOW;
-
}
context->bw_ctx.bw.dcn.clk.zstate_support = support;
}
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/Makefile b/drivers/gpu/drm/amd/display/dc/dml2/Makefile
index acff3449b8d787..4f6c804a26ad6c 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dml2/Makefile
@@ -24,40 +24,8 @@
#
# Makefile for dml2.
-ifdef CONFIG_X86
-dml2_ccflags-$(CONFIG_CC_IS_GCC) := -mhard-float
-dml2_ccflags := $(dml2_ccflags-y) -msse
-endif
-
-ifdef CONFIG_PPC64
-dml2_ccflags := -mhard-float -maltivec
-endif
-
-ifdef CONFIG_ARM64
-dml2_rcflags := -mgeneral-regs-only
-endif
-
-ifdef CONFIG_LOONGARCH
-dml2_ccflags := -mfpu=64
-dml2_rcflags := -msoft-float
-endif
-
-ifdef CONFIG_CC_IS_GCC
-ifeq ($(call cc-ifversion, -lt, 0701, y), y)
-IS_OLD_GCC = 1
-endif
-endif
-
-ifdef CONFIG_X86
-ifdef IS_OLD_GCC
-# Stack alignment mismatch, proceed with caution.
-# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
-# (8B stack alignment).
-dml2_ccflags += -mpreferred-stack-boundary=4
-else
-dml2_ccflags += -msse2
-endif
-endif
+dml2_ccflags := $(CC_FLAGS_FPU)
+dml2_rcflags := $(CC_FLAGS_NO_FPU)
ifneq ($(CONFIG_FRAME_WARN),0)
ifeq ($(filter y,$(CONFIG_KASAN)$(CONFIG_KCSAN)),y)
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c
index 17a58f41fc6a85..a20f28a5d2e7b0 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c
@@ -228,17 +228,13 @@ void dml2_init_socbb_params(struct dml2_context *dml2, const struct dc *in_dc, s
break;
case dml_project_dcn35:
+ case dml_project_dcn351:
out->num_chans = 4;
out->round_trip_ping_latency_dcfclk_cycles = 106;
out->smn_latency_us = 2;
out->dispclk_dppclk_vco_speed_mhz = 3600;
break;
- case dml_project_dcn351:
- out->num_chans = 16;
- out->round_trip_ping_latency_dcfclk_cycles = 1100;
- out->smn_latency_us = 2;
- break;
}
/* ---Overrides if available--- */
if (dml2->config.bbox_overrides.dram_num_chan)
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c
index 9d5df4c0da5979..0ba1feaf96c0d7 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c
@@ -1185,7 +1185,8 @@ void dce110_disable_stream(struct pipe_ctx *pipe_ctx)
if (dccg) {
dccg->funcs->disable_symclk32_se(dccg, dp_hpo_inst);
dccg->funcs->set_dpstreamclk(dccg, REFCLK, tg->inst, dp_hpo_inst);
- dccg->funcs->set_dtbclk_dto(dccg, &dto_params);
+ if (dccg && dccg->funcs->set_dtbclk_dto)
+ dccg->funcs->set_dtbclk_dto(dccg, &dto_params);
}
} else if (dccg && dccg->funcs->disable_symclk_se) {
dccg->funcs->disable_symclk_se(dccg, stream_enc->stream_enc_inst,
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c
index 3a9cc8ac0c0793..093f4387553ce3 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c
@@ -69,29 +69,6 @@
#define FN(reg_name, field_name) \
hws->shifts->field_name, hws->masks->field_name
-static int calc_mpc_flow_ctrl_cnt(const struct dc_stream_state *stream,
- int opp_cnt)
-{
- bool hblank_halved = optc2_is_two_pixels_per_containter(&stream->timing);
- int flow_ctrl_cnt;
-
- if (opp_cnt >= 2)
- hblank_halved = true;
-
- flow_ctrl_cnt = stream->timing.h_total - stream->timing.h_addressable -
- stream->timing.h_border_left -
- stream->timing.h_border_right;
-
- if (hblank_halved)
- flow_ctrl_cnt /= 2;
-
- /* ODM combine 4:1 case */
- if (opp_cnt == 4)
- flow_ctrl_cnt /= 2;
-
- return flow_ctrl_cnt;
-}
-
static void update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable)
{
struct display_stream_compressor *dsc = pipe_ctx->stream_res.dsc;
@@ -183,10 +160,6 @@ void dcn314_update_odm(struct dc *dc, struct dc_state *context, struct pipe_ctx
struct pipe_ctx *odm_pipe;
int opp_cnt = 0;
int opp_inst[MAX_PIPES] = {0};
- bool rate_control_2x_pclk = (pipe_ctx->stream->timing.flags.INTERLACE || optc2_is_two_pixels_per_containter(&pipe_ctx->stream->timing));
- struct mpc_dwb_flow_control flow_control;
- struct mpc *mpc = dc->res_pool->mpc;
- int i;
opp_cnt = get_odm_config(pipe_ctx, opp_inst);
@@ -199,20 +172,6 @@ void dcn314_update_odm(struct dc *dc, struct dc_state *context, struct pipe_ctx
pipe_ctx->stream_res.tg->funcs->set_odm_bypass(
pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing);
- rate_control_2x_pclk = rate_control_2x_pclk || opp_cnt > 1;
- flow_control.flow_ctrl_mode = 0;
- flow_control.flow_ctrl_cnt0 = 0x80;
- flow_control.flow_ctrl_cnt1 = calc_mpc_flow_ctrl_cnt(pipe_ctx->stream, opp_cnt);
- if (mpc->funcs->set_out_rate_control) {
- for (i = 0; i < opp_cnt; ++i) {
- mpc->funcs->set_out_rate_control(
- mpc, opp_inst[i],
- true,
- rate_control_2x_pclk,
- &flow_control);
- }
- }
-
for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) {
odm_pipe->stream_res.opp->funcs->opp_pipe_clock_control(
odm_pipe->stream_res.opp,
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c
index c0b526cf178654..7668229438da22 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c
@@ -966,29 +966,6 @@ void dcn32_init_hw(struct dc *dc)
}
}
-static int calc_mpc_flow_ctrl_cnt(const struct dc_stream_state *stream,
- int opp_cnt)
-{
- bool hblank_halved = optc2_is_two_pixels_per_containter(&stream->timing);
- int flow_ctrl_cnt;
-
- if (opp_cnt >= 2)
- hblank_halved = true;
-
- flow_ctrl_cnt = stream->timing.h_total - stream->timing.h_addressable -
- stream->timing.h_border_left -
- stream->timing.h_border_right;
-
- if (hblank_halved)
- flow_ctrl_cnt /= 2;
-
- /* ODM combine 4:1 case */
- if (opp_cnt == 4)
- flow_ctrl_cnt /= 2;
-
- return flow_ctrl_cnt;
-}
-
static void update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable)
{
struct display_stream_compressor *dsc = pipe_ctx->stream_res.dsc;
@@ -1103,10 +1080,6 @@ void dcn32_update_odm(struct dc *dc, struct dc_state *context, struct pipe_ctx *
struct pipe_ctx *odm_pipe;
int opp_cnt = 0;
int opp_inst[MAX_PIPES] = {0};
- bool rate_control_2x_pclk = (pipe_ctx->stream->timing.flags.INTERLACE || optc2_is_two_pixels_per_containter(&pipe_ctx->stream->timing));
- struct mpc_dwb_flow_control flow_control;
- struct mpc *mpc = dc->res_pool->mpc;
- int i;
opp_cnt = get_odm_config(pipe_ctx, opp_inst);
@@ -1119,20 +1092,6 @@ void dcn32_update_odm(struct dc *dc, struct dc_state *context, struct pipe_ctx *
pipe_ctx->stream_res.tg->funcs->set_odm_bypass(
pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing);
- rate_control_2x_pclk = rate_control_2x_pclk || opp_cnt > 1;
- flow_control.flow_ctrl_mode = 0;
- flow_control.flow_ctrl_cnt0 = 0x80;
- flow_control.flow_ctrl_cnt1 = calc_mpc_flow_ctrl_cnt(pipe_ctx->stream, opp_cnt);
- if (mpc->funcs->set_out_rate_control) {
- for (i = 0; i < opp_cnt; ++i) {
- mpc->funcs->set_out_rate_control(
- mpc, opp_inst[i],
- true,
- rate_control_2x_pclk,
- &flow_control);
- }
- }
-
for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) {
odm_pipe->stream_res.opp->funcs->opp_pipe_clock_control(
odm_pipe->stream_res.opp,
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
index 4b92df23ff0db9..a5560b3fc39ba9 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
@@ -358,29 +358,6 @@ void dcn35_init_hw(struct dc *dc)
}
}
-static int calc_mpc_flow_ctrl_cnt(const struct dc_stream_state *stream,
- int opp_cnt)
-{
- bool hblank_halved = optc2_is_two_pixels_per_containter(&stream->timing);
- int flow_ctrl_cnt;
-
- if (opp_cnt >= 2)
- hblank_halved = true;
-
- flow_ctrl_cnt = stream->timing.h_total - stream->timing.h_addressable -
- stream->timing.h_border_left -
- stream->timing.h_border_right;
-
- if (hblank_halved)
- flow_ctrl_cnt /= 2;
-
- /* ODM combine 4:1 case */
- if (opp_cnt == 4)
- flow_ctrl_cnt /= 2;
-
- return flow_ctrl_cnt;
-}
-
static void update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable)
{
struct display_stream_compressor *dsc = pipe_ctx->stream_res.dsc;
@@ -474,10 +451,6 @@ void dcn35_update_odm(struct dc *dc, struct dc_state *context, struct pipe_ctx *
struct pipe_ctx *odm_pipe;
int opp_cnt = 0;
int opp_inst[MAX_PIPES] = {0};
- bool rate_control_2x_pclk = (pipe_ctx->stream->timing.flags.INTERLACE || optc2_is_two_pixels_per_containter(&pipe_ctx->stream->timing));
- struct mpc_dwb_flow_control flow_control;
- struct mpc *mpc = dc->res_pool->mpc;
- int i;
opp_cnt = get_odm_config(pipe_ctx, opp_inst);
@@ -490,20 +463,6 @@ void dcn35_update_odm(struct dc *dc, struct dc_state *context, struct pipe_ctx *
pipe_ctx->stream_res.tg->funcs->set_odm_bypass(
pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing);
- rate_control_2x_pclk = rate_control_2x_pclk || opp_cnt > 1;
- flow_control.flow_ctrl_mode = 0;
- flow_control.flow_ctrl_cnt0 = 0x80;
- flow_control.flow_ctrl_cnt1 = calc_mpc_flow_ctrl_cnt(pipe_ctx->stream, opp_cnt);
- if (mpc->funcs->set_out_rate_control) {
- for (i = 0; i < opp_cnt; ++i) {
- mpc->funcs->set_out_rate_control(
- mpc, opp_inst[i],
- true,
- rate_control_2x_pclk,
- &flow_control);
- }
- }
-
for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) {
odm_pipe->stream_res.opp->funcs->opp_pipe_clock_control(
odm_pipe->stream_res.opp,
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c
index ab17fa1c64e8c5..670255c9bc8228 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c
@@ -67,7 +67,7 @@ static const struct hw_sequencer_funcs dcn351_funcs = {
.prepare_bandwidth = dcn35_prepare_bandwidth,
.optimize_bandwidth = dcn35_optimize_bandwidth,
.update_bandwidth = dcn20_update_bandwidth,
- .set_drr = dcn10_set_drr,
+ .set_drr = dcn35_set_drr,
.get_position = dcn10_get_position,
.set_static_screen_control = dcn35_set_static_screen_control,
.setup_stereo = dcn10_setup_stereo,
diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c
index f07a4c7e48bc23..52eab8fccb7f16 100644
--- a/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c
@@ -267,9 +267,6 @@ static void optc32_setup_manual_trigger(struct timing_generator *optc)
OTG_V_TOTAL_MAX_SEL, 1,
OTG_FORCE_LOCK_ON_EVENT, 0,
OTG_SET_V_TOTAL_MIN_MASK, (1 << 1)); /* TRIGA */
-
- // Setup manual flow control for EOF via TRIG_A
- optc->funcs->setup_manual_trigger(optc);
}
}
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c
index 5b486400dfdb5b..909e14261f9b49 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c
@@ -700,6 +700,8 @@ static const struct dc_debug_options debug_defaults_drv = {
.disable_dcc = DCC_ENABLE,
.disable_dpp_power_gate = true,
.disable_hubp_power_gate = true,
+ .disable_optc_power_gate = true, /*should the same as above two*/
+ .disable_hpo_power_gate = true, /*dmubfw force domain25 on*/
.disable_clock_gate = false,
.disable_dsc_power_gate = true,
.vsr_support = true,
@@ -742,12 +744,13 @@ static const struct dc_debug_options debug_defaults_drv = {
},
.seamless_boot_odm_combine = DML_FAIL_SOURCE_PIXEL_FORMAT,
.enable_z9_disable_interface = true, /* Allow support for the PMFW interface for disable Z9*/
+ .minimum_z8_residency_time = 2100,
.using_dml2 = true,
.support_eDP1_5 = true,
.enable_hpo_pg_support = false,
.enable_legacy_fast_update = true,
.enable_single_display_2to1_odm_policy = true,
- .disable_idle_power_optimizations = true,
+ .disable_idle_power_optimizations = false,
.dmcub_emulation = false,
.disable_boot_optimizations = false,
.disable_unbounded_requesting = false,
@@ -758,8 +761,10 @@ static const struct dc_debug_options debug_defaults_drv = {
.disable_z10 = true,
.ignore_pg = true,
.psp_disabled_wa = true,
- .ips2_eval_delay_us = 200,
- .ips2_entry_delay_us = 400
+ .ips2_eval_delay_us = 2000,
+ .ips2_entry_delay_us = 800,
+ .disable_dmub_reallow_idle = true,
+ .static_screen_wait_frames = 2,
};
static const struct dc_panel_config panel_config_defaults = {
diff --git a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c
index 738ee763f24a51..84f9b412a4f117 100644
--- a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c
+++ b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c
@@ -147,15 +147,12 @@ void mod_build_vsc_infopacket(const struct dc_stream_state *stream,
}
/* VSC packet set to 4 for PSR-SU, or 2 for PSR1 */
- if (stream->link->psr_settings.psr_feature_enabled) {
- if (stream->link->psr_settings.psr_version == DC_PSR_VERSION_SU_1)
- vsc_packet_revision = vsc_packet_rev4;
- else if (stream->link->psr_settings.psr_version == DC_PSR_VERSION_1)
- vsc_packet_revision = vsc_packet_rev2;
- }
-
- if (stream->link->replay_settings.config.replay_supported)
+ if (stream->link->psr_settings.psr_version == DC_PSR_VERSION_SU_1)
+ vsc_packet_revision = vsc_packet_rev4;
+ else if (stream->link->replay_settings.config.replay_supported)
vsc_packet_revision = vsc_packet_rev4;
+ else if (stream->link->psr_settings.psr_version == DC_PSR_VERSION_1)
+ vsc_packet_revision = vsc_packet_rev2;
/* Update to revision 5 for extended colorimetry support */
if (stream->use_vsc_sdp_for_colorimetry)
diff --git a/drivers/gpu/drm/amd/include/umsch_mm_4_0_api_def.h b/drivers/gpu/drm/amd/include/umsch_mm_4_0_api_def.h
index beadb9e42850c7..ca83e9e5c3ffb8 100644
--- a/drivers/gpu/drm/amd/include/umsch_mm_4_0_api_def.h
+++ b/drivers/gpu/drm/amd/include/umsch_mm_4_0_api_def.h
@@ -234,7 +234,8 @@ union UMSCHAPI__SET_HW_RESOURCES {
uint32_t enable_level_process_quantum_check : 1;
uint32_t is_vcn0_enabled : 1;
uint32_t is_vcn1_enabled : 1;
- uint32_t reserved : 27;
+ uint32_t use_rs64mem_for_proc_ctx_csa : 1;
+ uint32_t reserved : 26;
};
uint32_t uint32_all;
};
@@ -297,9 +298,12 @@ union UMSCHAPI__ADD_QUEUE {
struct {
uint32_t is_context_suspended : 1;
- uint32_t reserved : 31;
+ uint32_t collaboration_mode : 1;
+ uint32_t reserved : 30;
};
struct UMSCH_API_STATUS api_status;
+ uint32_t process_csa_array_index;
+ uint32_t context_csa_array_index;
};
uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
@@ -314,6 +318,7 @@ union UMSCHAPI__REMOVE_QUEUE {
uint64_t context_csa_addr;
struct UMSCH_API_STATUS api_status;
+ uint32_t context_csa_array_index;
};
uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
@@ -337,6 +342,7 @@ union UMSCHAPI__SUSPEND {
uint32_t suspend_fence_value;
struct UMSCH_API_STATUS api_status;
+ uint32_t context_csa_array_index;
};
uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
@@ -356,6 +362,7 @@ union UMSCHAPI__RESUME {
enum UMSCH_ENGINE_TYPE engine_type;
struct UMSCH_API_STATUS api_status;
+ uint32_t context_csa_array_index;
};
uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
@@ -404,6 +411,7 @@ union UMSCHAPI__UPDATE_AFFINITY {
union UMSCH_AFFINITY affinity;
uint64_t context_csa_addr;
struct UMSCH_API_STATUS api_status;
+ uint32_t context_csa_array_index;
};
uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
@@ -417,6 +425,7 @@ union UMSCHAPI__CHANGE_CONTEXT_PRIORITY_LEVEL {
uint64_t context_quantum;
uint64_t context_csa_addr;
struct UMSCH_API_STATUS api_status;
+ uint32_t context_csa_array_index;
};
uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
index f09b9d49297e81..bbd0169010c2d5 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
@@ -4261,6 +4261,13 @@ static int amdgpu_od_set_init(struct amdgpu_device *adev)
}
}
+ /*
+ * If gpu_od is the only member in the list, that means gpu_od is an
+ * empty directory, so remove it.
+ */
+ if (list_is_singular(&adev->pm.od_kobj_list))
+ goto err_out;
+
return 0;
err_out:
diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index 246b211b1e85f7..65333141b1c1b0 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -735,7 +735,7 @@ static int smu_early_init(void *handle)
smu->adev = adev;
smu->pm_enabled = !!amdgpu_dpm;
smu->is_apu = false;
- smu->smu_baco.state = SMU_BACO_STATE_EXIT;
+ smu->smu_baco.state = SMU_BACO_STATE_NONE;
smu->smu_baco.platform_support = false;
smu->user_dpm_profile.fan_mode = -1;
@@ -1966,10 +1966,25 @@ static int smu_smc_hw_cleanup(struct smu_context *smu)
return 0;
}
+static int smu_reset_mp1_state(struct smu_context *smu)
+{
+ struct amdgpu_device *adev = smu->adev;
+ int ret = 0;
+
+ if ((!adev->in_runpm) && (!adev->in_suspend) &&
+ (!amdgpu_in_reset(adev)) && amdgpu_ip_version(adev, MP1_HWIP, 0) ==
+ IP_VERSION(13, 0, 10) &&
+ !amdgpu_device_has_display_hardware(adev))
+ ret = smu_set_mp1_state(smu, PP_MP1_STATE_UNLOAD);
+
+ return ret;
+}
+
static int smu_hw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
struct smu_context *smu = adev->powerplay.pp_handle;
+ int ret;
if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
return 0;
@@ -1987,7 +2002,15 @@ static int smu_hw_fini(void *handle)
adev->pm.dpm_enabled = false;
- return smu_smc_hw_cleanup(smu);
+ ret = smu_smc_hw_cleanup(smu);
+ if (ret)
+ return ret;
+
+ ret = smu_reset_mp1_state(smu);
+ if (ret)
+ return ret;
+
+ return 0;
}
static void smu_late_fini(void *handle)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
index a870bdd49a4e3c..1fa81575788c54 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
@@ -424,6 +424,7 @@ enum smu_reset_mode {
enum smu_baco_state {
SMU_BACO_STATE_ENTER = 0,
SMU_BACO_STATE_EXIT,
+ SMU_BACO_STATE_NONE,
};
struct smu_baco_context {
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h
index 5bb7a63c0602b7..97522c0852589d 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h
@@ -144,6 +144,37 @@ typedef struct {
uint32_t MaxGfxClk;
} DpmClocks_t;
+//Freq in MHz
+//Voltage in milli volts with 2 fractional bits
+typedef struct {
+ uint32_t DcfClocks[NUM_DCFCLK_DPM_LEVELS];
+ uint32_t DispClocks[NUM_DISPCLK_DPM_LEVELS];
+ uint32_t DppClocks[NUM_DPPCLK_DPM_LEVELS];
+ uint32_t SocClocks[NUM_SOCCLK_DPM_LEVELS];
+ uint32_t VClocks0[NUM_VCN_DPM_LEVELS];
+ uint32_t VClocks1[NUM_VCN_DPM_LEVELS];
+ uint32_t DClocks0[NUM_VCN_DPM_LEVELS];
+ uint32_t DClocks1[NUM_VCN_DPM_LEVELS];
+ uint32_t VPEClocks[NUM_VPE_DPM_LEVELS];
+ uint32_t FclkClocks_Freq[NUM_FCLK_DPM_LEVELS];
+ uint32_t FclkClocks_Voltage[NUM_FCLK_DPM_LEVELS];
+ uint32_t SocVoltage[NUM_SOC_VOLTAGE_LEVELS];
+ MemPstateTable_t MemPstateTable[NUM_MEM_PSTATE_LEVELS];
+
+ uint8_t NumDcfClkLevelsEnabled;
+ uint8_t NumDispClkLevelsEnabled; //Applies to both Dispclk and Dppclk
+ uint8_t NumSocClkLevelsEnabled;
+ uint8_t Vcn0ClkLevelsEnabled; //Applies to both Vclk0 and Dclk0
+ uint8_t Vcn1ClkLevelsEnabled; //Applies to both Vclk1 and Dclk1
+ uint8_t VpeClkLevelsEnabled;
+ uint8_t NumMemPstatesEnabled;
+ uint8_t NumFclkLevelsEnabled;
+ uint8_t spare;
+
+ uint32_t MinGfxClk;
+ uint32_t MaxGfxClk;
+} DpmClocks_t_v14_0_1;
+
typedef struct {
uint16_t CoreFrequency[16]; //Target core frequency [MHz]
uint16_t CorePower[16]; //CAC calculated core power [mW]
@@ -224,7 +255,7 @@ typedef enum {
#define TABLE_CUSTOM_DPM 2 // Called by Driver
#define TABLE_BIOS_GPIO_CONFIG 3 // Called by BIOS
#define TABLE_DPMCLOCKS 4 // Called by Driver and VBIOS
-#define TABLE_SPARE0 5 // Unused
+#define TABLE_MOMENTARY_PM 5 // Called by Tools
#define TABLE_MODERN_STDBY 6 // Called by Tools for Modern Standby Log
#define TABLE_SMU_METRICS 7 // Called by Driver and SMF/PMF
#define TABLE_COUNT 8
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_pmfw.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_pmfw.h
index 356e0f57a426ff..ddb62586008319 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_pmfw.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_pmfw.h
@@ -42,7 +42,7 @@
#define FEATURE_EDC_BIT 7
#define FEATURE_PLL_POWER_DOWN_BIT 8
#define FEATURE_VDDOFF_BIT 9
-#define FEATURE_VCN_DPM_BIT 10
+#define FEATURE_VCN_DPM_BIT 10 /* this is for both VCN0 and VCN1 */
#define FEATURE_DS_MPM_BIT 11
#define FEATURE_FCLK_DPM_BIT 12
#define FEATURE_SOCCLK_DPM_BIT 13
@@ -56,9 +56,9 @@
#define FEATURE_DS_GFXCLK_BIT 21
#define FEATURE_DS_SOCCLK_BIT 22
#define FEATURE_DS_LCLK_BIT 23
-#define FEATURE_LOW_POWER_DCNCLKS_BIT 24 // for all DISP clks
+#define FEATURE_LOW_POWER_DCNCLKS_BIT 24
#define FEATURE_DS_SHUBCLK_BIT 25
-#define FEATURE_SPARE0_BIT 26 //SPARE
+#define FEATURE_RESERVED0_BIT 26
#define FEATURE_ZSTATES_BIT 27
#define FEATURE_IOMMUL2_PG_BIT 28
#define FEATURE_DS_FCLK_BIT 29
@@ -66,8 +66,8 @@
#define FEATURE_DS_MP1CLK_BIT 31
#define FEATURE_WHISPER_MODE_BIT 32
#define FEATURE_SMU_LOW_POWER_BIT 33
-#define FEATURE_SMART_L3_RINSER_BIT 34
-#define FEATURE_SPARE1_BIT 35 //SPARE
+#define FEATURE_RESERVED1_BIT 34 /* v14_0_0 SMART_L3_RINSER; v14_0_1 RESERVED1 */
+#define FEATURE_GFX_DEM_BIT 35 /* v14_0_0 SPARE; v14_0_1 GFX_DEM */
#define FEATURE_PSI_BIT 36
#define FEATURE_PROCHOT_BIT 37
#define FEATURE_CPUOFF_BIT 38
@@ -77,11 +77,11 @@
#define FEATURE_PERF_LIMIT_BIT 42
#define FEATURE_CORE_DLDO_BIT 43
#define FEATURE_DVO_BIT 44
-#define FEATURE_DS_VCN_BIT 45
+#define FEATURE_DS_VCN_BIT 45 /* v14_0_1 this is for both VCN0 and VCN1 */
#define FEATURE_CPPC_BIT 46
#define FEATURE_CPPC_PREFERRED_CORES 47
#define FEATURE_DF_CSTATES_BIT 48
-#define FEATURE_SPARE2_BIT 49 //SPARE
+#define FEATURE_FAST_PSTATE_CLDO_BIT 49 /* v14_0_0 SPARE */
#define FEATURE_ATHUB_PG_BIT 50
#define FEATURE_VDDOFF_ECO_BIT 51
#define FEATURE_ZSTATES_ECO_BIT 52
@@ -93,8 +93,8 @@
#define FEATURE_DS_IPUCLK_BIT 58
#define FEATURE_DS_VPECLK_BIT 59
#define FEATURE_VPE_DPM_BIT 60
-#define FEATURE_SPARE_61 61
-#define FEATURE_FP_DIDT 62
+#define FEATURE_SMART_L3_RINSER_BIT 61 /* v14_0_0 SPARE*/
+#define FEATURE_PCC_BIT 62 /* v14_0_0 FP_DIDT v14_0_1 PCC_BIT */
#define NUM_FEATURES 63
// Firmware Header/Footer
@@ -151,6 +151,43 @@ typedef struct {
// MP1_EXT_SCRATCH7 = RTOS Current Job
} FwStatus_t;
+typedef struct {
+ // MP1_EXT_SCRATCH0
+ uint32_t DpmHandlerID : 8;
+ uint32_t ActivityMonitorID : 8;
+ uint32_t DpmTimerID : 8;
+ uint32_t DpmHubID : 4;
+ uint32_t DpmHubTask : 4;
+ // MP1_EXT_SCRATCH1
+ uint32_t CclkSyncStatus : 8;
+ uint32_t ZstateStatus : 4;
+ uint32_t Cpu1VddOff : 4;
+ uint32_t DstateFun : 4;
+ uint32_t DstateDev : 4;
+ uint32_t GfxOffStatus : 2;
+ uint32_t Cpu0Off : 2;
+ uint32_t Cpu1Off : 2;
+ uint32_t Cpu0VddOff : 2;
+ // MP1_EXT_SCRATCH2
+ uint32_t P2JobHandler :32;
+ // MP1_EXT_SCRATCH3
+ uint32_t PostCode :32;
+ // MP1_EXT_SCRATCH4
+ uint32_t MsgPortBusy :15;
+ uint32_t RsmuPmiP1Pending : 1;
+ uint32_t RsmuPmiP2PendingCnt : 8;
+ uint32_t DfCstateExitPending : 1;
+ uint32_t Pc6EntryPending : 1;
+ uint32_t Pc6ExitPending : 1;
+ uint32_t WarmResetPending : 1;
+ uint32_t Mp0ClkPending : 1;
+ uint32_t InWhisperMode : 1;
+ uint32_t spare2 : 2;
+ // MP1_EXT_SCRATCH5
+ uint32_t IdleMask :32;
+ // MP1_EXT_SCRATCH6 = RTOS threads' status
+ // MP1_EXT_SCRATCH7 = RTOS Current Job
+} FwStatus_t_v14_0_1;
#pragma pack(pop)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_ppsmc.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_ppsmc.h
index 8a8a57c56bc0c4..c4dc5881d8df09 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_ppsmc.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_ppsmc.h
@@ -54,14 +54,14 @@
#define PPSMC_MSG_TestMessage 0x01 ///< To check if PMFW is alive and responding. Requirement specified by PMFW team
#define PPSMC_MSG_GetPmfwVersion 0x02 ///< Get PMFW version
#define PPSMC_MSG_GetDriverIfVersion 0x03 ///< Get PMFW_DRIVER_IF version
-#define PPSMC_MSG_SPARE0 0x04 ///< SPARE
-#define PPSMC_MSG_SPARE1 0x05 ///< SPARE
-#define PPSMC_MSG_PowerDownVcn 0x06 ///< Power down VCN
-#define PPSMC_MSG_PowerUpVcn 0x07 ///< Power up VCN; VCN is power gated by default
-#define PPSMC_MSG_SetHardMinVcn 0x08 ///< For wireless display
+#define PPSMC_MSG_PowerDownVcn1 0x04 ///< Power down VCN1
+#define PPSMC_MSG_PowerUpVcn1 0x05 ///< Power up VCN1; VCN1 is power gated by default
+#define PPSMC_MSG_PowerDownVcn0 0x06 ///< Power down VCN0
+#define PPSMC_MSG_PowerUpVcn0 0x07 ///< Power up VCN0; VCN0 is power gated by default
+#define PPSMC_MSG_SetHardMinVcn0 0x08 ///< For wireless display
#define PPSMC_MSG_SetSoftMinGfxclk 0x09 ///< Set SoftMin for GFXCLK, argument is frequency in MHz
-#define PPSMC_MSG_SPARE2 0x0A ///< SPARE
-#define PPSMC_MSG_SPARE3 0x0B ///< SPARE
+#define PPSMC_MSG_SetHardMinVcn1 0x0A ///< For wireless display
+#define PPSMC_MSG_SetSoftMinVcn1 0x0B ///< Set soft min for VCN1 clocks (VCLK1 and DCLK1)
#define PPSMC_MSG_PrepareMp1ForUnload 0x0C ///< Prepare PMFW for GFX driver unload
#define PPSMC_MSG_SetDriverDramAddrHigh 0x0D ///< Set high 32 bits of DRAM address for Driver table transfer
#define PPSMC_MSG_SetDriverDramAddrLow 0x0E ///< Set low 32 bits of DRAM address for Driver table transfer
@@ -71,36 +71,32 @@
#define PPSMC_MSG_GetEnabledSmuFeatures 0x12 ///< Get enabled features in PMFW
#define PPSMC_MSG_SetHardMinSocclkByFreq 0x13 ///< Set hard min for SOC CLK
#define PPSMC_MSG_SetSoftMinFclk 0x14 ///< Set hard min for FCLK
-#define PPSMC_MSG_SetSoftMinVcn 0x15 ///< Set soft min for VCN clocks (VCLK and DCLK)
-
+#define PPSMC_MSG_SetSoftMinVcn0 0x15 ///< Set soft min for VCN0 clocks (VCLK0 and DCLK0)
#define PPSMC_MSG_EnableGfxImu 0x16 ///< Enable GFX IMU
-
-#define PPSMC_MSG_spare_0x17 0x17
-#define PPSMC_MSG_spare_0x18 0x18
+#define PPSMC_MSG_spare_0x17 0x17 ///< Get GFX clock frequency
+#define PPSMC_MSG_spare_0x18 0x18 ///< Get FCLK frequency
#define PPSMC_MSG_AllowGfxOff 0x19 ///< Inform PMFW of allowing GFXOFF entry
#define PPSMC_MSG_DisallowGfxOff 0x1A ///< Inform PMFW of disallowing GFXOFF entry
#define PPSMC_MSG_SetSoftMaxGfxClk 0x1B ///< Set soft max for GFX CLK
#define PPSMC_MSG_SetHardMinGfxClk 0x1C ///< Set hard min for GFX CLK
-
#define PPSMC_MSG_SetSoftMaxSocclkByFreq 0x1D ///< Set soft max for SOC CLK
#define PPSMC_MSG_SetSoftMaxFclkByFreq 0x1E ///< Set soft max for FCLK
-#define PPSMC_MSG_SetSoftMaxVcn 0x1F ///< Set soft max for VCN clocks (VCLK and DCLK)
-#define PPSMC_MSG_spare_0x20 0x20
-#define PPSMC_MSG_PowerDownJpeg 0x21 ///< Power down Jpeg
-#define PPSMC_MSG_PowerUpJpeg 0x22 ///< Power up Jpeg; VCN is power gated by default
-
+#define PPSMC_MSG_SetSoftMaxVcn0 0x1F ///< Set soft max for VCN0 clocks (VCLK0 and DCLK0)
+#define PPSMC_MSG_spare_0x20 0x20 ///< Set power limit percentage
+#define PPSMC_MSG_PowerDownJpeg0 0x21 ///< Power down Jpeg of VCN0
+#define PPSMC_MSG_PowerUpJpeg0 0x22 ///< Power up Jpeg of VCN0; VCN0 is power gated by default
#define PPSMC_MSG_SetHardMinFclkByFreq 0x23 ///< Set hard min for FCLK
#define PPSMC_MSG_SetSoftMinSocclkByFreq 0x24 ///< Set soft min for SOC CLK
#define PPSMC_MSG_AllowZstates 0x25 ///< Inform PMFM of allowing Zstate entry, i.e. no Miracast activity
-#define PPSMC_MSG_Reserved 0x26 ///< Not used
-#define PPSMC_MSG_Reserved1 0x27 ///< Not used, previously PPSMC_MSG_RequestActiveWgp
-#define PPSMC_MSG_Reserved2 0x28 ///< Not used, previously PPSMC_MSG_QueryActiveWgp
+#define PPSMC_MSG_PowerDownJpeg1 0x26 ///< Power down Jpeg of VCN1
+#define PPSMC_MSG_PowerUpJpeg1 0x27 ///< Power up Jpeg of VCN1; VCN1 is power gated by default
+#define PPSMC_MSG_SetSoftMaxVcn1 0x28 ///< Set soft max for VCN1 clocks (VCLK1 and DCLK1)
#define PPSMC_MSG_PowerDownIspByTile 0x29 ///< ISP is power gated by default
#define PPSMC_MSG_PowerUpIspByTile 0x2A ///< This message is used to power up ISP tiles and enable the ISP DPM
#define PPSMC_MSG_SetHardMinIspiclkByFreq 0x2B ///< Set HardMin by frequency for ISPICLK
#define PPSMC_MSG_SetHardMinIspxclkByFreq 0x2C ///< Set HardMin by frequency for ISPXCLK
-#define PPSMC_MSG_PowerDownUmsch 0x2D ///< Power down VCN.UMSCH (aka VSCH) scheduler
-#define PPSMC_MSG_PowerUpUmsch 0x2E ///< Power up VCN.UMSCH (aka VSCH) scheduler
+#define PPSMC_MSG_PowerDownUmsch 0x2D ///< Power down VCN0.UMSCH (aka VSCH) scheduler
+#define PPSMC_MSG_PowerUpUmsch 0x2E ///< Power up VCN0.UMSCH (aka VSCH) scheduler
#define PPSMC_Message_IspStutterOn_MmhubPgDis 0x2F ///< ISP StutterOn mmHub PgDis
#define PPSMC_Message_IspStutterOff_MmhubPgEn 0x30 ///< ISP StufferOff mmHub PgEn
#define PPSMC_MSG_PowerUpVpe 0x31 ///< Power up VPE
@@ -110,7 +106,9 @@
#define PPSMC_MSG_DisableLSdma 0x35 ///< Disable LSDMA
#define PPSMC_MSG_SetSoftMaxVpe 0x36 ///<
#define PPSMC_MSG_SetSoftMinVpe 0x37 ///<
-#define PPSMC_Message_Count 0x38 ///< Total number of PPSMC messages
+#define PPSMC_MSG_AllocMALLCache 0x38 ///< Allocating MALL Cache
+#define PPSMC_MSG_ReleaseMALLCache 0x39 ///< Releasing MALL Cache
+#define PPSMC_Message_Count 0x3A ///< Total number of PPSMC messages
/** @}*/
/**
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
index a941fdbf78b6b3..af427cc7dbb845 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
@@ -115,6 +115,10 @@
__SMU_DUMMY_MAP(PowerDownVcn), \
__SMU_DUMMY_MAP(PowerUpJpeg), \
__SMU_DUMMY_MAP(PowerDownJpeg), \
+ __SMU_DUMMY_MAP(PowerUpJpeg0), \
+ __SMU_DUMMY_MAP(PowerDownJpeg0), \
+ __SMU_DUMMY_MAP(PowerUpJpeg1), \
+ __SMU_DUMMY_MAP(PowerDownJpeg1), \
__SMU_DUMMY_MAP(BacoAudioD3PME), \
__SMU_DUMMY_MAP(ArmD3), \
__SMU_DUMMY_MAP(RunDcBtc), \
@@ -135,6 +139,8 @@
__SMU_DUMMY_MAP(PowerUpSdma), \
__SMU_DUMMY_MAP(SetHardMinIspclkByFreq), \
__SMU_DUMMY_MAP(SetHardMinVcn), \
+ __SMU_DUMMY_MAP(SetHardMinVcn0), \
+ __SMU_DUMMY_MAP(SetHardMinVcn1), \
__SMU_DUMMY_MAP(SetAllowFclkSwitch), \
__SMU_DUMMY_MAP(SetMinVideoGfxclkFreq), \
__SMU_DUMMY_MAP(ActiveProcessNotify), \
@@ -150,6 +156,8 @@
__SMU_DUMMY_MAP(SetPhyclkVoltageByFreq), \
__SMU_DUMMY_MAP(SetDppclkVoltageByFreq), \
__SMU_DUMMY_MAP(SetSoftMinVcn), \
+ __SMU_DUMMY_MAP(SetSoftMinVcn0), \
+ __SMU_DUMMY_MAP(SetSoftMinVcn1), \
__SMU_DUMMY_MAP(EnablePostCode), \
__SMU_DUMMY_MAP(GetGfxclkFrequency), \
__SMU_DUMMY_MAP(GetFclkFrequency), \
@@ -161,6 +169,8 @@
__SMU_DUMMY_MAP(SetSoftMaxSocclkByFreq), \
__SMU_DUMMY_MAP(SetSoftMaxFclkByFreq), \
__SMU_DUMMY_MAP(SetSoftMaxVcn), \
+ __SMU_DUMMY_MAP(SetSoftMaxVcn0), \
+ __SMU_DUMMY_MAP(SetSoftMaxVcn1), \
__SMU_DUMMY_MAP(PowerGateMmHub), \
__SMU_DUMMY_MAP(UpdatePmeRestore), \
__SMU_DUMMY_MAP(GpuChangeState), \
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h
index 3f7463c1c1a919..4af1985ae44668 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h
@@ -27,6 +27,7 @@
#define SMU14_DRIVER_IF_VERSION_INV 0xFFFFFFFF
#define SMU14_DRIVER_IF_VERSION_SMU_V14_0_0 0x7
+#define SMU14_DRIVER_IF_VERSION_SMU_V14_0_1 0x6
#define SMU14_DRIVER_IF_VERSION_SMU_V14_0_2 0x1
#define FEATURE_MASK(feature) (1ULL << feature)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
index 9c03296f92cdd4..67117ced7c6ae6 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
@@ -2751,7 +2751,13 @@ static int smu_v13_0_0_set_mp1_state(struct smu_context *smu,
switch (mp1_state) {
case PP_MP1_STATE_UNLOAD:
- ret = smu_cmn_set_mp1_state(smu, mp1_state);
+ ret = smu_cmn_send_smc_msg_with_param(smu,
+ SMU_MSG_PrepareMp1ForUnload,
+ 0x55, NULL);
+
+ if (!ret && smu->smu_baco.state == SMU_BACO_STATE_EXIT)
+ ret = smu_v13_0_disable_pmfw_state(smu);
+
break;
default:
/* Ignore others */
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c
index bb98156b2fa1d5..949131bd1ecb21 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c
@@ -226,8 +226,18 @@ static int smu_v13_0_4_system_features_control(struct smu_context *smu, bool en)
struct amdgpu_device *adev = smu->adev;
int ret = 0;
- if (!en && !adev->in_s0ix)
+ if (!en && !adev->in_s0ix) {
+ /* Adds a GFX reset as workaround just before sending the
+ * MP1_UNLOAD message to prevent GC/RLC/PMFW from entering
+ * an invalid state.
+ */
+ ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GfxDeviceDriverReset,
+ SMU_RESET_MODE_2, NULL);
+ if (ret)
+ return ret;
+
ret = smu_cmn_send_smc_msg(smu, SMU_MSG_PrepareMp1ForUnload, NULL);
+ }
return ret;
}
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
index 3957af057d54ff..c977ebe88001df 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
@@ -2294,6 +2294,17 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table
return sizeof(*gpu_metrics);
}
+static void smu_v13_0_6_restore_pci_config(struct smu_context *smu)
+{
+ struct amdgpu_device *adev = smu->adev;
+ int i;
+
+ for (i = 0; i < 16; i++)
+ pci_write_config_dword(adev->pdev, i * 4,
+ adev->pdev->saved_config_space[i]);
+ pci_restore_msi_state(adev->pdev);
+}
+
static int smu_v13_0_6_mode2_reset(struct smu_context *smu)
{
int ret = 0, index;
@@ -2315,6 +2326,20 @@ static int smu_v13_0_6_mode2_reset(struct smu_context *smu)
/* Restore the config space saved during init */
amdgpu_device_load_pci_state(adev->pdev);
+ /* Certain platforms have switches which assign virtual BAR values to
+ * devices. OS uses the virtual BAR values and device behind the switch
+ * is assgined another BAR value. When device's config space registers
+ * are queried, switch returns the virtual BAR values. When mode-2 reset
+ * is performed, switch is unaware of it, and will continue to return
+ * the same virtual values to the OS.This affects
+ * pci_restore_config_space() API as it doesn't write the value saved if
+ * the current value read from config space is the same as what is
+ * saved. As a workaround, make sure the config space is restored
+ * always.
+ */
+ if (!(adev->flags & AMD_IS_APU))
+ smu_v13_0_6_restore_pci_config(smu);
+
dev_dbg(smu->adev->dev, "wait for reset ack\n");
do {
ret = smu_cmn_wait_for_response(smu);
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c
index b06a3cc4330542..07a65e005785d6 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c
@@ -234,7 +234,7 @@ int smu_v14_0_check_fw_version(struct smu_context *smu)
smu->smc_driver_if_version = SMU14_DRIVER_IF_VERSION_SMU_V14_0_0;
break;
case IP_VERSION(14, 0, 1):
- smu->smc_driver_if_version = SMU14_DRIVER_IF_VERSION_SMU_V14_0_0;
+ smu->smc_driver_if_version = SMU14_DRIVER_IF_VERSION_SMU_V14_0_1;
break;
default:
@@ -1402,9 +1402,22 @@ int smu_v14_0_set_vcn_enable(struct smu_context *smu,
if (adev->vcn.harvest_config & (1 << i))
continue;
- ret = smu_cmn_send_smc_msg_with_param(smu, enable ?
- SMU_MSG_PowerUpVcn : SMU_MSG_PowerDownVcn,
- i << 16U, NULL);
+ if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 0) ||
+ amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 1)) {
+ if (i == 0)
+ ret = smu_cmn_send_smc_msg_with_param(smu, enable ?
+ SMU_MSG_PowerUpVcn0 : SMU_MSG_PowerDownVcn0,
+ i << 16U, NULL);
+ else if (i == 1)
+ ret = smu_cmn_send_smc_msg_with_param(smu, enable ?
+ SMU_MSG_PowerUpVcn1 : SMU_MSG_PowerDownVcn1,
+ i << 16U, NULL);
+ } else {
+ ret = smu_cmn_send_smc_msg_with_param(smu, enable ?
+ SMU_MSG_PowerUpVcn : SMU_MSG_PowerDownVcn,
+ i << 16U, NULL);
+ }
+
if (ret)
return ret;
}
@@ -1415,9 +1428,34 @@ int smu_v14_0_set_vcn_enable(struct smu_context *smu,
int smu_v14_0_set_jpeg_enable(struct smu_context *smu,
bool enable)
{
- return smu_cmn_send_smc_msg_with_param(smu, enable ?
- SMU_MSG_PowerUpJpeg : SMU_MSG_PowerDownJpeg,
- 0, NULL);
+ struct amdgpu_device *adev = smu->adev;
+ int i, ret = 0;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 0) ||
+ amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 1)) {
+ if (i == 0)
+ ret = smu_cmn_send_smc_msg_with_param(smu, enable ?
+ SMU_MSG_PowerUpJpeg0 : SMU_MSG_PowerDownJpeg0,
+ i << 16U, NULL);
+ else if (i == 1 && amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 1))
+ ret = smu_cmn_send_smc_msg_with_param(smu, enable ?
+ SMU_MSG_PowerUpJpeg1 : SMU_MSG_PowerDownJpeg1,
+ i << 16U, NULL);
+ } else {
+ ret = smu_cmn_send_smc_msg_with_param(smu, enable ?
+ SMU_MSG_PowerUpJpeg : SMU_MSG_PowerDownJpeg,
+ i << 16U, NULL);
+ }
+
+ if (ret)
+ return ret;
+ }
+
+ return ret;
}
int smu_v14_0_run_btc(struct smu_context *smu)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c
index 9310c4758e38ce..63399c00cc28ff 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c
@@ -70,9 +70,12 @@ static struct cmn2asic_msg_mapping smu_v14_0_0_message_map[SMU_MSG_MAX_COUNT] =
MSG_MAP(TestMessage, PPSMC_MSG_TestMessage, 1),
MSG_MAP(GetSmuVersion, PPSMC_MSG_GetPmfwVersion, 1),
MSG_MAP(GetDriverIfVersion, PPSMC_MSG_GetDriverIfVersion, 1),
- MSG_MAP(PowerDownVcn, PPSMC_MSG_PowerDownVcn, 1),
- MSG_MAP(PowerUpVcn, PPSMC_MSG_PowerUpVcn, 1),
- MSG_MAP(SetHardMinVcn, PPSMC_MSG_SetHardMinVcn, 1),
+ MSG_MAP(PowerDownVcn0, PPSMC_MSG_PowerDownVcn0, 1),
+ MSG_MAP(PowerUpVcn0, PPSMC_MSG_PowerUpVcn0, 1),
+ MSG_MAP(SetHardMinVcn0, PPSMC_MSG_SetHardMinVcn0, 1),
+ MSG_MAP(PowerDownVcn1, PPSMC_MSG_PowerDownVcn1, 1),
+ MSG_MAP(PowerUpVcn1, PPSMC_MSG_PowerUpVcn1, 1),
+ MSG_MAP(SetHardMinVcn1, PPSMC_MSG_SetHardMinVcn1, 1),
MSG_MAP(SetSoftMinGfxclk, PPSMC_MSG_SetSoftMinGfxclk, 1),
MSG_MAP(PrepareMp1ForUnload, PPSMC_MSG_PrepareMp1ForUnload, 1),
MSG_MAP(SetDriverDramAddrHigh, PPSMC_MSG_SetDriverDramAddrHigh, 1),
@@ -83,7 +86,8 @@ static struct cmn2asic_msg_mapping smu_v14_0_0_message_map[SMU_MSG_MAX_COUNT] =
MSG_MAP(GetEnabledSmuFeatures, PPSMC_MSG_GetEnabledSmuFeatures, 1),
MSG_MAP(SetHardMinSocclkByFreq, PPSMC_MSG_SetHardMinSocclkByFreq, 1),
MSG_MAP(SetSoftMinFclk, PPSMC_MSG_SetSoftMinFclk, 1),
- MSG_MAP(SetSoftMinVcn, PPSMC_MSG_SetSoftMinVcn, 1),
+ MSG_MAP(SetSoftMinVcn0, PPSMC_MSG_SetSoftMinVcn0, 1),
+ MSG_MAP(SetSoftMinVcn1, PPSMC_MSG_SetSoftMinVcn1, 1),
MSG_MAP(EnableGfxImu, PPSMC_MSG_EnableGfxImu, 1),
MSG_MAP(AllowGfxOff, PPSMC_MSG_AllowGfxOff, 1),
MSG_MAP(DisallowGfxOff, PPSMC_MSG_DisallowGfxOff, 1),
@@ -91,9 +95,12 @@ static struct cmn2asic_msg_mapping smu_v14_0_0_message_map[SMU_MSG_MAX_COUNT] =
MSG_MAP(SetHardMinGfxClk, PPSMC_MSG_SetHardMinGfxClk, 1),
MSG_MAP(SetSoftMaxSocclkByFreq, PPSMC_MSG_SetSoftMaxSocclkByFreq, 1),
MSG_MAP(SetSoftMaxFclkByFreq, PPSMC_MSG_SetSoftMaxFclkByFreq, 1),
- MSG_MAP(SetSoftMaxVcn, PPSMC_MSG_SetSoftMaxVcn, 1),
- MSG_MAP(PowerDownJpeg, PPSMC_MSG_PowerDownJpeg, 1),
- MSG_MAP(PowerUpJpeg, PPSMC_MSG_PowerUpJpeg, 1),
+ MSG_MAP(SetSoftMaxVcn0, PPSMC_MSG_SetSoftMaxVcn0, 1),
+ MSG_MAP(SetSoftMaxVcn1, PPSMC_MSG_SetSoftMaxVcn1, 1),
+ MSG_MAP(PowerDownJpeg0, PPSMC_MSG_PowerDownJpeg0, 1),
+ MSG_MAP(PowerUpJpeg0, PPSMC_MSG_PowerUpJpeg0, 1),
+ MSG_MAP(PowerDownJpeg1, PPSMC_MSG_PowerDownJpeg1, 1),
+ MSG_MAP(PowerUpJpeg1, PPSMC_MSG_PowerUpJpeg1, 1),
MSG_MAP(SetHardMinFclkByFreq, PPSMC_MSG_SetHardMinFclkByFreq, 1),
MSG_MAP(SetSoftMinSocclkByFreq, PPSMC_MSG_SetSoftMinSocclkByFreq, 1),
MSG_MAP(PowerDownIspByTile, PPSMC_MSG_PowerDownIspByTile, 1),
@@ -154,7 +161,7 @@ static int smu_v14_0_0_init_smc_tables(struct smu_context *smu)
SMU_TABLE_INIT(tables, SMU_TABLE_WATERMARKS, sizeof(Watermarks_t),
PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
- SMU_TABLE_INIT(tables, SMU_TABLE_DPMCLOCKS, sizeof(DpmClocks_t),
+ SMU_TABLE_INIT(tables, SMU_TABLE_DPMCLOCKS, max(sizeof(DpmClocks_t), sizeof(DpmClocks_t_v14_0_1)),
PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
SMU_TABLE_INIT(tables, SMU_TABLE_SMU_METRICS, sizeof(SmuMetrics_t),
PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
@@ -164,7 +171,7 @@ static int smu_v14_0_0_init_smc_tables(struct smu_context *smu)
goto err0_out;
smu_table->metrics_time = 0;
- smu_table->clocks_table = kzalloc(sizeof(DpmClocks_t), GFP_KERNEL);
+ smu_table->clocks_table = kzalloc(max(sizeof(DpmClocks_t), sizeof(DpmClocks_t_v14_0_1)), GFP_KERNEL);
if (!smu_table->clocks_table)
goto err1_out;
@@ -586,6 +593,60 @@ static int smu_v14_0_0_mode2_reset(struct smu_context *smu)
return ret;
}
+static int smu_v14_0_1_get_dpm_freq_by_index(struct smu_context *smu,
+ enum smu_clk_type clk_type,
+ uint32_t dpm_level,
+ uint32_t *freq)
+{
+ DpmClocks_t_v14_0_1 *clk_table = smu->smu_table.clocks_table;
+
+ if (!clk_table || clk_type >= SMU_CLK_COUNT)
+ return -EINVAL;
+
+ switch (clk_type) {
+ case SMU_SOCCLK:
+ if (dpm_level >= clk_table->NumSocClkLevelsEnabled)
+ return -EINVAL;
+ *freq = clk_table->SocClocks[dpm_level];
+ break;
+ case SMU_VCLK:
+ if (dpm_level >= clk_table->Vcn0ClkLevelsEnabled)
+ return -EINVAL;
+ *freq = clk_table->VClocks0[dpm_level];
+ break;
+ case SMU_DCLK:
+ if (dpm_level >= clk_table->Vcn0ClkLevelsEnabled)
+ return -EINVAL;
+ *freq = clk_table->DClocks0[dpm_level];
+ break;
+ case SMU_VCLK1:
+ if (dpm_level >= clk_table->Vcn1ClkLevelsEnabled)
+ return -EINVAL;
+ *freq = clk_table->VClocks1[dpm_level];
+ break;
+ case SMU_DCLK1:
+ if (dpm_level >= clk_table->Vcn1ClkLevelsEnabled)
+ return -EINVAL;
+ *freq = clk_table->DClocks1[dpm_level];
+ break;
+ case SMU_UCLK:
+ case SMU_MCLK:
+ if (dpm_level >= clk_table->NumMemPstatesEnabled)
+ return -EINVAL;
+ *freq = clk_table->MemPstateTable[dpm_level].MemClk;
+ break;
+ case SMU_FCLK:
+ if (dpm_level >= clk_table->NumFclkLevelsEnabled)
+ return -EINVAL;
+ *freq = clk_table->FclkClocks_Freq[dpm_level];
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int smu_v14_0_0_get_dpm_freq_by_index(struct smu_context *smu,
enum smu_clk_type clk_type,
uint32_t dpm_level,
@@ -630,6 +691,19 @@ static int smu_v14_0_0_get_dpm_freq_by_index(struct smu_context *smu,
return 0;
}
+static int smu_v14_0_common_get_dpm_freq_by_index(struct smu_context *smu,
+ enum smu_clk_type clk_type,
+ uint32_t dpm_level,
+ uint32_t *freq)
+{
+ if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 0))
+ smu_v14_0_0_get_dpm_freq_by_index(smu, clk_type, dpm_level, freq);
+ else if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 1))
+ smu_v14_0_1_get_dpm_freq_by_index(smu, clk_type, dpm_level, freq);
+
+ return 0;
+}
+
static bool smu_v14_0_0_clk_dpm_is_enabled(struct smu_context *smu,
enum smu_clk_type clk_type)
{
@@ -650,6 +724,8 @@ static bool smu_v14_0_0_clk_dpm_is_enabled(struct smu_context *smu,
break;
case SMU_VCLK:
case SMU_DCLK:
+ case SMU_VCLK1:
+ case SMU_DCLK1:
feature_id = SMU_FEATURE_VCN_DPM_BIT;
break;
default:
@@ -659,6 +735,126 @@ static bool smu_v14_0_0_clk_dpm_is_enabled(struct smu_context *smu,
return smu_cmn_feature_is_enabled(smu, feature_id);
}
+static int smu_v14_0_1_get_dpm_ultimate_freq(struct smu_context *smu,
+ enum smu_clk_type clk_type,
+ uint32_t *min,
+ uint32_t *max)
+{
+ DpmClocks_t_v14_0_1 *clk_table = smu->smu_table.clocks_table;
+ uint32_t clock_limit;
+ uint32_t max_dpm_level, min_dpm_level;
+ int ret = 0;
+
+ if (!smu_v14_0_0_clk_dpm_is_enabled(smu, clk_type)) {
+ switch (clk_type) {
+ case SMU_MCLK:
+ case SMU_UCLK:
+ clock_limit = smu->smu_table.boot_values.uclk;
+ break;
+ case SMU_FCLK:
+ clock_limit = smu->smu_table.boot_values.fclk;
+ break;
+ case SMU_GFXCLK:
+ case SMU_SCLK:
+ clock_limit = smu->smu_table.boot_values.gfxclk;
+ break;
+ case SMU_SOCCLK:
+ clock_limit = smu->smu_table.boot_values.socclk;
+ break;
+ case SMU_VCLK:
+ case SMU_VCLK1:
+ clock_limit = smu->smu_table.boot_values.vclk;
+ break;
+ case SMU_DCLK:
+ case SMU_DCLK1:
+ clock_limit = smu->smu_table.boot_values.dclk;
+ break;
+ default:
+ clock_limit = 0;
+ break;
+ }
+
+ /* clock in Mhz unit */
+ if (min)
+ *min = clock_limit / 100;
+ if (max)
+ *max = clock_limit / 100;
+
+ return 0;
+ }
+
+ if (max) {
+ switch (clk_type) {
+ case SMU_GFXCLK:
+ case SMU_SCLK:
+ *max = clk_table->MaxGfxClk;
+ break;
+ case SMU_MCLK:
+ case SMU_UCLK:
+ case SMU_FCLK:
+ max_dpm_level = 0;
+ break;
+ case SMU_SOCCLK:
+ max_dpm_level = clk_table->NumSocClkLevelsEnabled - 1;
+ break;
+ case SMU_VCLK:
+ case SMU_DCLK:
+ max_dpm_level = clk_table->Vcn0ClkLevelsEnabled - 1;
+ break;
+ case SMU_VCLK1:
+ case SMU_DCLK1:
+ max_dpm_level = clk_table->Vcn1ClkLevelsEnabled - 1;
+ break;
+ default:
+ ret = -EINVAL;
+ goto failed;
+ }
+
+ if (clk_type != SMU_GFXCLK && clk_type != SMU_SCLK) {
+ ret = smu_v14_0_common_get_dpm_freq_by_index(smu, clk_type, max_dpm_level, max);
+ if (ret)
+ goto failed;
+ }
+ }
+
+ if (min) {
+ switch (clk_type) {
+ case SMU_GFXCLK:
+ case SMU_SCLK:
+ *min = clk_table->MinGfxClk;
+ break;
+ case SMU_MCLK:
+ case SMU_UCLK:
+ min_dpm_level = clk_table->NumMemPstatesEnabled - 1;
+ break;
+ case SMU_FCLK:
+ min_dpm_level = clk_table->NumFclkLevelsEnabled - 1;
+ break;
+ case SMU_SOCCLK:
+ min_dpm_level = 0;
+ break;
+ case SMU_VCLK:
+ case SMU_DCLK:
+ case SMU_VCLK1:
+ case SMU_DCLK1:
+ min_dpm_level = 0;
+ break;
+ default:
+ ret = -EINVAL;
+ goto failed;
+ }
+
+ if (clk_type != SMU_GFXCLK && clk_type != SMU_SCLK) {
+ ret = smu_v14_0_common_get_dpm_freq_by_index(smu, clk_type, min_dpm_level, min);
+ if (ret)
+ goto failed;
+ }
+ }
+
+failed:
+ return ret;
+}
+
static int smu_v14_0_0_get_dpm_ultimate_freq(struct smu_context *smu,
enum smu_clk_type clk_type,
uint32_t *min,
@@ -729,7 +925,7 @@ static int smu_v14_0_0_get_dpm_ultimate_freq(struct smu_context *smu,
}
if (clk_type != SMU_GFXCLK && clk_type != SMU_SCLK) {
- ret = smu_v14_0_0_get_dpm_freq_by_index(smu, clk_type, max_dpm_level, max);
+ ret = smu_v14_0_common_get_dpm_freq_by_index(smu, clk_type, max_dpm_level, max);
if (ret)
goto failed;
}
@@ -761,7 +957,7 @@ static int smu_v14_0_0_get_dpm_ultimate_freq(struct smu_context *smu,
}
if (clk_type != SMU_GFXCLK && clk_type != SMU_SCLK) {
- ret = smu_v14_0_0_get_dpm_freq_by_index(smu, clk_type, min_dpm_level, min);
+ ret = smu_v14_0_common_get_dpm_freq_by_index(smu, clk_type, min_dpm_level, min);
if (ret)
goto failed;
}
@@ -771,6 +967,19 @@ failed:
return ret;
}
+static int smu_v14_0_common_get_dpm_ultimate_freq(struct smu_context *smu,
+ enum smu_clk_type clk_type,
+ uint32_t *min,
+ uint32_t *max)
+{
+ if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 0))
+ smu_v14_0_0_get_dpm_ultimate_freq(smu, clk_type, min, max);
+ else if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 1))
+ smu_v14_0_1_get_dpm_ultimate_freq(smu, clk_type, min, max);
+
+ return 0;
+}
+
static int smu_v14_0_0_get_current_clk_freq(struct smu_context *smu,
enum smu_clk_type clk_type,
uint32_t *value)
@@ -804,6 +1013,37 @@ static int smu_v14_0_0_get_current_clk_freq(struct smu_context *smu,
return smu_v14_0_0_get_smu_metrics_data(smu, member_type, value);
}
+static int smu_v14_0_1_get_dpm_level_count(struct smu_context *smu,
+ enum smu_clk_type clk_type,
+ uint32_t *count)
+{
+ DpmClocks_t_v14_0_1 *clk_table = smu->smu_table.clocks_table;
+
+ switch (clk_type) {
+ case SMU_SOCCLK:
+ *count = clk_table->NumSocClkLevelsEnabled;
+ break;
+ case SMU_VCLK:
+ case SMU_DCLK:
+ *count = clk_table->Vcn0ClkLevelsEnabled;
+ break;
+ case SMU_VCLK1:
+ case SMU_DCLK1:
+ *count = clk_table->Vcn1ClkLevelsEnabled;
+ break;
+ case SMU_MCLK:
+ *count = clk_table->NumMemPstatesEnabled;
+ break;
+ case SMU_FCLK:
+ *count = clk_table->NumFclkLevelsEnabled;
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
static int smu_v14_0_0_get_dpm_level_count(struct smu_context *smu,
enum smu_clk_type clk_type,
uint32_t *count)
@@ -833,6 +1073,18 @@ static int smu_v14_0_0_get_dpm_level_count(struct smu_context *smu,
return 0;
}
+static int smu_v14_0_common_get_dpm_level_count(struct smu_context *smu,
+ enum smu_clk_type clk_type,
+ uint32_t *count)
+{
+ if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 0))
+ smu_v14_0_0_get_dpm_level_count(smu, clk_type, count);
+ else if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 1))
+ smu_v14_0_1_get_dpm_level_count(smu, clk_type, count);
+
+ return 0;
+}
+
static int smu_v14_0_0_print_clk_levels(struct smu_context *smu,
enum smu_clk_type clk_type, char *buf)
{
@@ -859,18 +1111,20 @@ static int smu_v14_0_0_print_clk_levels(struct smu_context *smu,
case SMU_SOCCLK:
case SMU_VCLK:
case SMU_DCLK:
+ case SMU_VCLK1:
+ case SMU_DCLK1:
case SMU_MCLK:
case SMU_FCLK:
ret = smu_v14_0_0_get_current_clk_freq(smu, clk_type, &cur_value);
if (ret)
break;
- ret = smu_v14_0_0_get_dpm_level_count(smu, clk_type, &count);
+ ret = smu_v14_0_common_get_dpm_level_count(smu, clk_type, &count);
if (ret)
break;
for (i = 0; i < count; i++) {
- ret = smu_v14_0_0_get_dpm_freq_by_index(smu, clk_type, i, &value);
+ ret = smu_v14_0_common_get_dpm_freq_by_index(smu, clk_type, i, &value);
if (ret)
break;
@@ -933,8 +1187,13 @@ static int smu_v14_0_0_set_soft_freq_limited_range(struct smu_context *smu,
break;
case SMU_VCLK:
case SMU_DCLK:
- msg_set_min = SMU_MSG_SetHardMinVcn;
- msg_set_max = SMU_MSG_SetSoftMaxVcn;
+ msg_set_min = SMU_MSG_SetHardMinVcn0;
+ msg_set_max = SMU_MSG_SetSoftMaxVcn0;
+ break;
+ case SMU_VCLK1:
+ case SMU_DCLK1:
+ msg_set_min = SMU_MSG_SetHardMinVcn1;
+ msg_set_max = SMU_MSG_SetSoftMaxVcn1;
break;
default:
return -EINVAL;
@@ -964,11 +1223,11 @@ static int smu_v14_0_0_force_clk_levels(struct smu_context *smu,
case SMU_FCLK:
case SMU_VCLK:
case SMU_DCLK:
- ret = smu_v14_0_0_get_dpm_freq_by_index(smu, clk_type, soft_min_level, &min_freq);
+ ret = smu_v14_0_common_get_dpm_freq_by_index(smu, clk_type, soft_min_level, &min_freq);
if (ret)
break;
- ret = smu_v14_0_0_get_dpm_freq_by_index(smu, clk_type, soft_max_level, &max_freq);
+ ret = smu_v14_0_common_get_dpm_freq_by_index(smu, clk_type, soft_max_level, &max_freq);
if (ret)
break;
@@ -993,25 +1252,25 @@ static int smu_v14_0_0_set_performance_level(struct smu_context *smu,
switch (level) {
case AMD_DPM_FORCED_LEVEL_HIGH:
- smu_v14_0_0_get_dpm_ultimate_freq(smu, SMU_SCLK, NULL, &sclk_max);
- smu_v14_0_0_get_dpm_ultimate_freq(smu, SMU_FCLK, NULL, &fclk_max);
- smu_v14_0_0_get_dpm_ultimate_freq(smu, SMU_SOCCLK, NULL, &socclk_max);
+ smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_SCLK, NULL, &sclk_max);
+ smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_FCLK, NULL, &fclk_max);
+ smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_SOCCLK, NULL, &socclk_max);
sclk_min = sclk_max;
fclk_min = fclk_max;
socclk_min = socclk_max;
break;
case AMD_DPM_FORCED_LEVEL_LOW:
- smu_v14_0_0_get_dpm_ultimate_freq(smu, SMU_SCLK, &sclk_min, NULL);
- smu_v14_0_0_get_dpm_ultimate_freq(smu, SMU_FCLK, &fclk_min, NULL);
- smu_v14_0_0_get_dpm_ultimate_freq(smu, SMU_SOCCLK, &socclk_min, NULL);
+ smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_SCLK, &sclk_min, NULL);
+ smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_FCLK, &fclk_min, NULL);
+ smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_SOCCLK, &socclk_min, NULL);
sclk_max = sclk_min;
fclk_max = fclk_min;
socclk_max = socclk_min;
break;
case AMD_DPM_FORCED_LEVEL_AUTO:
- smu_v14_0_0_get_dpm_ultimate_freq(smu, SMU_SCLK, &sclk_min, &sclk_max);
- smu_v14_0_0_get_dpm_ultimate_freq(smu, SMU_FCLK, &fclk_min, &fclk_max);
- smu_v14_0_0_get_dpm_ultimate_freq(smu, SMU_SOCCLK, &socclk_min, &socclk_max);
+ smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_SCLK, &sclk_min, &sclk_max);
+ smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_FCLK, &fclk_min, &fclk_max);
+ smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_SOCCLK, &socclk_min, &socclk_max);
break;
case AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD:
case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK:
@@ -1060,6 +1319,18 @@ static int smu_v14_0_0_set_performance_level(struct smu_context *smu,
return ret;
}
+static int smu_v14_0_1_set_fine_grain_gfx_freq_parameters(struct smu_context *smu)
+{
+ DpmClocks_t_v14_0_1 *clk_table = smu->smu_table.clocks_table;
+
+ smu->gfx_default_hard_min_freq = clk_table->MinGfxClk;
+ smu->gfx_default_soft_max_freq = clk_table->MaxGfxClk;
+ smu->gfx_actual_hard_min_freq = 0;
+ smu->gfx_actual_soft_max_freq = 0;
+
+ return 0;
+}
+
static int smu_v14_0_0_set_fine_grain_gfx_freq_parameters(struct smu_context *smu)
{
DpmClocks_t *clk_table = smu->smu_table.clocks_table;
@@ -1072,6 +1343,16 @@ static int smu_v14_0_0_set_fine_grain_gfx_freq_parameters(struct smu_context *sm
return 0;
}
+static int smu_v14_0_common_set_fine_grain_gfx_freq_parameters(struct smu_context *smu)
+{
+ if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 0))
+ smu_v14_0_0_set_fine_grain_gfx_freq_parameters(smu);
+ else if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 1))
+ smu_v14_0_1_set_fine_grain_gfx_freq_parameters(smu);
+
+ return 0;
+}
+
static int smu_v14_0_0_set_vpe_enable(struct smu_context *smu,
bool enable)
{
@@ -1088,6 +1369,25 @@ static int smu_v14_0_0_set_umsch_mm_enable(struct smu_context *smu,
0, NULL);
}
+static int smu_14_0_1_get_dpm_table(struct smu_context *smu, struct dpm_clocks *clock_table)
+{
+ DpmClocks_t_v14_0_1 *clk_table = smu->smu_table.clocks_table;
+ uint8_t idx;
+
+ /* Only the Clock information of SOC and VPE is copied to provide VPE DPM settings for use. */
+ for (idx = 0; idx < NUM_SOCCLK_DPM_LEVELS; idx++) {
+ clock_table->SocClocks[idx].Freq = (idx < clk_table->NumSocClkLevelsEnabled) ? clk_table->SocClocks[idx]:0;
+ clock_table->SocClocks[idx].Vol = 0;
+ }
+
+ for (idx = 0; idx < NUM_VPE_DPM_LEVELS; idx++) {
+ clock_table->VPEClocks[idx].Freq = (idx < clk_table->VpeClkLevelsEnabled) ? clk_table->VPEClocks[idx]:0;
+ clock_table->VPEClocks[idx].Vol = 0;
+ }
+
+ return 0;
+}
+
static int smu_14_0_0_get_dpm_table(struct smu_context *smu, struct dpm_clocks *clock_table)
{
DpmClocks_t *clk_table = smu->smu_table.clocks_table;
@@ -1107,6 +1407,16 @@ static int smu_14_0_0_get_dpm_table(struct smu_context *smu, struct dpm_clocks *
return 0;
}
+static int smu_v14_0_common_get_dpm_table(struct smu_context *smu, struct dpm_clocks *clock_table)
+{
+ if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 0))
+ smu_14_0_0_get_dpm_table(smu, clock_table);
+ else if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 1))
+ smu_14_0_1_get_dpm_table(smu, clock_table);
+
+ return 0;
+}
+
static const struct pptable_funcs smu_v14_0_0_ppt_funcs = {
.check_fw_status = smu_v14_0_check_fw_status,
.check_fw_version = smu_v14_0_check_fw_version,
@@ -1128,16 +1438,16 @@ static const struct pptable_funcs smu_v14_0_0_ppt_funcs = {
.set_driver_table_location = smu_v14_0_set_driver_table_location,
.gfx_off_control = smu_v14_0_gfx_off_control,
.mode2_reset = smu_v14_0_0_mode2_reset,
- .get_dpm_ultimate_freq = smu_v14_0_0_get_dpm_ultimate_freq,
+ .get_dpm_ultimate_freq = smu_v14_0_common_get_dpm_ultimate_freq,
.od_edit_dpm_table = smu_v14_0_od_edit_dpm_table,
.print_clk_levels = smu_v14_0_0_print_clk_levels,
.force_clk_levels = smu_v14_0_0_force_clk_levels,
.set_performance_level = smu_v14_0_0_set_performance_level,
- .set_fine_grain_gfx_freq_parameters = smu_v14_0_0_set_fine_grain_gfx_freq_parameters,
+ .set_fine_grain_gfx_freq_parameters = smu_v14_0_common_set_fine_grain_gfx_freq_parameters,
.set_gfx_power_up_by_imu = smu_v14_0_set_gfx_power_up_by_imu,
.dpm_set_vpe_enable = smu_v14_0_0_set_vpe_enable,
.dpm_set_umsch_mm_enable = smu_v14_0_0_set_umsch_mm_enable,
- .get_dpm_clock_table = smu_14_0_0_get_dpm_table,
+ .get_dpm_clock_table = smu_v14_0_common_get_dpm_table,
};
static void smu_v14_0_0_set_smu_mailbox_registers(struct smu_context *smu)
diff --git a/drivers/gpu/drm/ast/ast_dp.c b/drivers/gpu/drm/ast/ast_dp.c
index ebb6d8ebd44eb6..1e9259416980ec 100644
--- a/drivers/gpu/drm/ast/ast_dp.c
+++ b/drivers/gpu/drm/ast/ast_dp.c
@@ -180,6 +180,7 @@ void ast_dp_set_on_off(struct drm_device *dev, bool on)
{
struct ast_device *ast = to_ast_device(dev);
u8 video_on_off = on;
+ u32 i = 0;
// Video On/Off
ast_set_index_reg_mask(ast, AST_IO_VGACRI, 0xE3, (u8) ~AST_DP_VIDEO_ENABLE, on);
@@ -192,6 +193,8 @@ void ast_dp_set_on_off(struct drm_device *dev, bool on)
ASTDP_MIRROR_VIDEO_ENABLE) != video_on_off) {
// wait 1 ms
mdelay(1);
+ if (++i > 200)
+ break;
}
}
}
diff --git a/drivers/gpu/drm/display/drm_dp_dual_mode_helper.c b/drivers/gpu/drm/display/drm_dp_dual_mode_helper.c
index bd61e20770a5be..14a2a8473682b0 100644
--- a/drivers/gpu/drm/display/drm_dp_dual_mode_helper.c
+++ b/drivers/gpu/drm/display/drm_dp_dual_mode_helper.c
@@ -52,7 +52,7 @@
* @adapter: I2C adapter for the DDC bus
* @offset: register offset
* @buffer: buffer for return data
- * @size: sizo of the buffer
+ * @size: size of the buffer
*
* Reads @size bytes from the DP dual mode adaptor registers
* starting at @offset.
@@ -116,7 +116,7 @@ EXPORT_SYMBOL(drm_dp_dual_mode_read);
* @adapter: I2C adapter for the DDC bus
* @offset: register offset
* @buffer: buffer for write data
- * @size: sizo of the buffer
+ * @size: size of the buffer
*
* Writes @size bytes to the DP dual mode adaptor registers
* starting at @offset.
diff --git a/drivers/gpu/drm/display/drm_dp_helper.c b/drivers/gpu/drm/display/drm_dp_helper.c
index 266826eac4a75b..f5d4be89786609 100644
--- a/drivers/gpu/drm/display/drm_dp_helper.c
+++ b/drivers/gpu/drm/display/drm_dp_helper.c
@@ -4111,6 +4111,13 @@ int drm_dp_bw_overhead(int lane_count, int hactive,
u32 overhead = 1000000;
int symbol_cycles;
+ if (lane_count == 0 || hactive == 0 || bpp_x16 == 0) {
+ DRM_DEBUG_KMS("Invalid BW overhead params: lane_count %d, hactive %d, bpp_x16 %d.%04d\n",
+ lane_count, hactive,
+ bpp_x16 >> 4, (bpp_x16 & 0xf) * 625);
+ return 0;
+ }
+
/*
* DP Standard v2.1 2.6.4.1
* SSC downspread and ref clock variation margin:
diff --git a/drivers/gpu/drm/drm_client_modeset.c b/drivers/gpu/drm/drm_client_modeset.c
index 871e4e2129d6da..0683a129b36285 100644
--- a/drivers/gpu/drm/drm_client_modeset.c
+++ b/drivers/gpu/drm/drm_client_modeset.c
@@ -777,6 +777,7 @@ int drm_client_modeset_probe(struct drm_client_dev *client, unsigned int width,
unsigned int total_modes_count = 0;
struct drm_client_offset *offsets;
unsigned int connector_count = 0;
+ /* points to modes protected by mode_config.mutex */
struct drm_display_mode **modes;
struct drm_crtc **crtcs;
int i, ret = 0;
@@ -845,7 +846,6 @@ int drm_client_modeset_probe(struct drm_client_dev *client, unsigned int width,
drm_client_pick_crtcs(client, connectors, connector_count,
crtcs, modes, 0, width, height);
}
- mutex_unlock(&dev->mode_config.mutex);
drm_client_modeset_release(client);
@@ -875,6 +875,7 @@ int drm_client_modeset_probe(struct drm_client_dev *client, unsigned int width,
modeset->y = offset->y;
}
}
+ mutex_unlock(&dev->mode_config.mutex);
mutex_unlock(&client->modeset_mutex);
out:
diff --git a/drivers/gpu/drm/drm_gem_atomic_helper.c b/drivers/gpu/drm/drm_gem_atomic_helper.c
index e440f458b6633d..93337543aac32b 100644
--- a/drivers/gpu/drm/drm_gem_atomic_helper.c
+++ b/drivers/gpu/drm/drm_gem_atomic_helper.c
@@ -224,8 +224,8 @@ __drm_gem_duplicate_shadow_plane_state(struct drm_plane *plane,
__drm_atomic_helper_plane_duplicate_state(plane, &new_shadow_plane_state->base);
- drm_format_conv_state_copy(&shadow_plane_state->fmtcnv_state,
- &new_shadow_plane_state->fmtcnv_state);
+ drm_format_conv_state_copy(&new_shadow_plane_state->fmtcnv_state,
+ &shadow_plane_state->fmtcnv_state);
}
EXPORT_SYMBOL(__drm_gem_duplicate_shadow_plane_state);
diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
index 7352bde299d547..03bd3c7bd0dc2c 100644
--- a/drivers/gpu/drm/drm_prime.c
+++ b/drivers/gpu/drm/drm_prime.c
@@ -582,7 +582,12 @@ int drm_gem_map_attach(struct dma_buf *dma_buf,
{
struct drm_gem_object *obj = dma_buf->priv;
- if (!obj->funcs->get_sg_table)
+ /*
+ * drm_gem_map_dma_buf() requires obj->get_sg_table(), but drivers
+ * that implement their own ->map_dma_buf() do not.
+ */
+ if (dma_buf->ops->map_dma_buf == drm_gem_map_dma_buf &&
+ !obj->funcs->get_sg_table)
return -ENOSYS;
return drm_gem_pin(obj);
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
index 734412aae94dde..a9bf426f69b365 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
@@ -164,26 +164,6 @@ int etnaviv_gpu_get_param(struct etnaviv_gpu *gpu, u32 param, u64 *value)
*value = gpu->identity.eco_id;
break;
- case ETNAVIV_PARAM_GPU_NN_CORE_COUNT:
- *value = gpu->identity.nn_core_count;
- break;
-
- case ETNAVIV_PARAM_GPU_NN_MAD_PER_CORE:
- *value = gpu->identity.nn_mad_per_core;
- break;
-
- case ETNAVIV_PARAM_GPU_TP_CORE_COUNT:
- *value = gpu->identity.tp_core_count;
- break;
-
- case ETNAVIV_PARAM_GPU_ON_CHIP_SRAM_SIZE:
- *value = gpu->identity.on_chip_sram_size;
- break;
-
- case ETNAVIV_PARAM_GPU_AXI_SRAM_SIZE:
- *value = gpu->identity.axi_sram_size;
- break;
-
default:
DBG("%s: invalid param: %u", dev_name(gpu->dev), param);
return -EINVAL;
@@ -663,8 +643,8 @@ static void etnaviv_gpu_enable_mlcg(struct etnaviv_gpu *gpu)
/* Disable TX clock gating on affected core revisions. */
if (etnaviv_is_model_rev(gpu, GC4000, 0x5222) ||
etnaviv_is_model_rev(gpu, GC2000, 0x5108) ||
- etnaviv_is_model_rev(gpu, GC2000, 0x6202) ||
- etnaviv_is_model_rev(gpu, GC2000, 0x6203))
+ etnaviv_is_model_rev(gpu, GC7000, 0x6202) ||
+ etnaviv_is_model_rev(gpu, GC7000, 0x6203))
pmc |= VIVS_PM_MODULE_CONTROLS_DISABLE_MODULE_CLOCK_GATING_TX;
/* Disable SE and RA clock gating on affected core revisions. */
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
index 7d5e9158e13c1a..197e0037732ec8 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
@@ -54,18 +54,6 @@ struct etnaviv_chip_identity {
/* Number of Neural Network cores. */
u32 nn_core_count;
- /* Number of MAD units per Neural Network core. */
- u32 nn_mad_per_core;
-
- /* Number of Tensor Processing cores. */
- u32 tp_core_count;
-
- /* Size in bytes of the SRAM inside the NPU. */
- u32 on_chip_sram_size;
-
- /* Size in bytes of the SRAM across the AXI bus. */
- u32 axi_sram_size;
-
/* Size of the vertex cache. */
u32 vertex_cache_size;
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_hwdb.c b/drivers/gpu/drm/etnaviv/etnaviv_hwdb.c
index d8e7334de8ceac..8665f2658d51b3 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_hwdb.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_hwdb.c
@@ -17,10 +17,6 @@ static const struct etnaviv_chip_identity etnaviv_chip_identities[] = {
.thread_count = 128,
.shader_core_count = 1,
.nn_core_count = 0,
- .nn_mad_per_core = 0,
- .tp_core_count = 0,
- .on_chip_sram_size = 0,
- .axi_sram_size = 0,
.vertex_cache_size = 8,
.vertex_output_buffer_size = 1024,
.pixel_pipes = 1,
@@ -52,11 +48,6 @@ static const struct etnaviv_chip_identity etnaviv_chip_identities[] = {
.register_max = 64,
.thread_count = 256,
.shader_core_count = 1,
- .nn_core_count = 0,
- .nn_mad_per_core = 0,
- .tp_core_count = 0,
- .on_chip_sram_size = 0,
- .axi_sram_size = 0,
.vertex_cache_size = 8,
.vertex_output_buffer_size = 512,
.pixel_pipes = 1,
@@ -89,10 +80,6 @@ static const struct etnaviv_chip_identity etnaviv_chip_identities[] = {
.thread_count = 512,
.shader_core_count = 2,
.nn_core_count = 0,
- .nn_mad_per_core = 0,
- .tp_core_count = 0,
- .on_chip_sram_size = 0,
- .axi_sram_size = 0,
.vertex_cache_size = 16,
.vertex_output_buffer_size = 1024,
.pixel_pipes = 1,
@@ -125,10 +112,6 @@ static const struct etnaviv_chip_identity etnaviv_chip_identities[] = {
.thread_count = 512,
.shader_core_count = 2,
.nn_core_count = 0,
- .nn_mad_per_core = 0,
- .tp_core_count = 0,
- .on_chip_sram_size = 0,
- .axi_sram_size = 0,
.vertex_cache_size = 16,
.vertex_output_buffer_size = 1024,
.pixel_pipes = 1,
@@ -160,11 +143,6 @@ static const struct etnaviv_chip_identity etnaviv_chip_identities[] = {
.register_max = 64,
.thread_count = 512,
.shader_core_count = 2,
- .nn_core_count = 0,
- .nn_mad_per_core = 0,
- .tp_core_count = 0,
- .on_chip_sram_size = 0,
- .axi_sram_size = 0,
.vertex_cache_size = 16,
.vertex_output_buffer_size = 1024,
.pixel_pipes = 1,
@@ -197,10 +175,6 @@ static const struct etnaviv_chip_identity etnaviv_chip_identities[] = {
.thread_count = 1024,
.shader_core_count = 4,
.nn_core_count = 0,
- .nn_mad_per_core = 0,
- .tp_core_count = 0,
- .on_chip_sram_size = 0,
- .axi_sram_size = 0,
.vertex_cache_size = 16,
.vertex_output_buffer_size = 1024,
.pixel_pipes = 2,
@@ -233,10 +207,6 @@ static const struct etnaviv_chip_identity etnaviv_chip_identities[] = {
.thread_count = 256,
.shader_core_count = 1,
.nn_core_count = 8,
- .nn_mad_per_core = 64,
- .tp_core_count = 4,
- .on_chip_sram_size = 524288,
- .axi_sram_size = 1048576,
.vertex_cache_size = 16,
.vertex_output_buffer_size = 1024,
.pixel_pipes = 1,
@@ -269,10 +239,6 @@ static const struct etnaviv_chip_identity etnaviv_chip_identities[] = {
.thread_count = 256,
.shader_core_count = 1,
.nn_core_count = 6,
- .nn_mad_per_core = 64,
- .tp_core_count = 3,
- .on_chip_sram_size = 262144,
- .axi_sram_size = 0,
.vertex_cache_size = 16,
.vertex_output_buffer_size = 1024,
.pixel_pipes = 1,
diff --git a/drivers/gpu/drm/gma500/Makefile b/drivers/gpu/drm/gma500/Makefile
index 4f302cd5e1a6ca..58fed80c7392a0 100644
--- a/drivers/gpu/drm/gma500/Makefile
+++ b/drivers/gpu/drm/gma500/Makefile
@@ -34,7 +34,6 @@ gma500_gfx-y += \
psb_intel_lvds.o \
psb_intel_modes.o \
psb_intel_sdvo.o \
- psb_lid.o \
psb_irq.o
gma500_gfx-$(CONFIG_ACPI) += opregion.o
diff --git a/drivers/gpu/drm/gma500/mmu.c b/drivers/gpu/drm/gma500/mmu.c
index a70b01ccdf7022..4d78b33eaa8230 100644
--- a/drivers/gpu/drm/gma500/mmu.c
+++ b/drivers/gpu/drm/gma500/mmu.c
@@ -5,6 +5,7 @@
**************************************************************************/
#include <linux/highmem.h>
+#include <linux/vmalloc.h>
#include "mmu.h"
#include "psb_drv.h"
diff --git a/drivers/gpu/drm/gma500/psb_device.c b/drivers/gpu/drm/gma500/psb_device.c
index dcfcd7b89d4a1d..6dece8f0e380f7 100644
--- a/drivers/gpu/drm/gma500/psb_device.c
+++ b/drivers/gpu/drm/gma500/psb_device.c
@@ -73,8 +73,7 @@ static int psb_backlight_setup(struct drm_device *dev)
}
psb_intel_lvds_set_brightness(dev, PSB_MAX_BRIGHTNESS);
- /* This must occur after the backlight is properly initialised */
- psb_lid_timer_init(dev_priv);
+
return 0;
}
@@ -259,8 +258,6 @@ static int psb_chip_setup(struct drm_device *dev)
static void psb_chip_teardown(struct drm_device *dev)
{
- struct drm_psb_private *dev_priv = to_drm_psb_private(dev);
- psb_lid_timer_takedown(dev_priv);
gma_intel_teardown_gmbus(dev);
}
diff --git a/drivers/gpu/drm/gma500/psb_drv.h b/drivers/gpu/drm/gma500/psb_drv.h
index c5edfa4aa4ccdd..83c17689c454f7 100644
--- a/drivers/gpu/drm/gma500/psb_drv.h
+++ b/drivers/gpu/drm/gma500/psb_drv.h
@@ -162,7 +162,6 @@
#define PSB_NUM_VBLANKS 2
#define PSB_WATCHDOG_DELAY (HZ * 2)
-#define PSB_LID_DELAY (HZ / 10)
#define PSB_MAX_BRIGHTNESS 100
@@ -491,11 +490,7 @@ struct drm_psb_private {
/* Hotplug handling */
struct work_struct hotplug_work;
- /* LID-Switch */
- spinlock_t lid_lock;
- struct timer_list lid_timer;
struct psb_intel_opregion opregion;
- u32 lid_last_state;
/* Watchdog */
uint32_t apm_reg;
@@ -591,10 +586,6 @@ struct psb_ops {
int i2c_bus; /* I2C bus identifier for Moorestown */
};
-/* psb_lid.c */
-extern void psb_lid_timer_init(struct drm_psb_private *dev_priv);
-extern void psb_lid_timer_takedown(struct drm_psb_private *dev_priv);
-
/* modesetting */
extern void psb_modeset_init(struct drm_device *dev);
extern void psb_modeset_cleanup(struct drm_device *dev);
diff --git a/drivers/gpu/drm/gma500/psb_lid.c b/drivers/gpu/drm/gma500/psb_lid.c
deleted file mode 100644
index 58a7fe39263601..00000000000000
--- a/drivers/gpu/drm/gma500/psb_lid.c
+++ /dev/null
@@ -1,80 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/**************************************************************************
- * Copyright (c) 2007, Intel Corporation.
- *
- * Authors: Thomas Hellstrom <thomas-at-tungstengraphics-dot-com>
- **************************************************************************/
-
-#include <linux/spinlock.h>
-
-#include "psb_drv.h"
-#include "psb_intel_reg.h"
-#include "psb_reg.h"
-
-static void psb_lid_timer_func(struct timer_list *t)
-{
- struct drm_psb_private *dev_priv = from_timer(dev_priv, t, lid_timer);
- struct drm_device *dev = (struct drm_device *)&dev_priv->dev;
- struct timer_list *lid_timer = &dev_priv->lid_timer;
- unsigned long irq_flags;
- u32 __iomem *lid_state = dev_priv->opregion.lid_state;
- u32 pp_status;
-
- if (readl(lid_state) == dev_priv->lid_last_state)
- goto lid_timer_schedule;
-
- if ((readl(lid_state)) & 0x01) {
- /*lid state is open*/
- REG_WRITE(PP_CONTROL, REG_READ(PP_CONTROL) | POWER_TARGET_ON);
- do {
- pp_status = REG_READ(PP_STATUS);
- } while ((pp_status & PP_ON) == 0 &&
- (pp_status & PP_SEQUENCE_MASK) != 0);
-
- if (REG_READ(PP_STATUS) & PP_ON) {
- /*FIXME: should be backlight level before*/
- psb_intel_lvds_set_brightness(dev, 100);
- } else {
- DRM_DEBUG("LVDS panel never powered up");
- return;
- }
- } else {
- psb_intel_lvds_set_brightness(dev, 0);
-
- REG_WRITE(PP_CONTROL, REG_READ(PP_CONTROL) & ~POWER_TARGET_ON);
- do {
- pp_status = REG_READ(PP_STATUS);
- } while ((pp_status & PP_ON) == 0);
- }
- dev_priv->lid_last_state = readl(lid_state);
-
-lid_timer_schedule:
- spin_lock_irqsave(&dev_priv->lid_lock, irq_flags);
- if (!timer_pending(lid_timer)) {
- lid_timer->expires = jiffies + PSB_LID_DELAY;
- add_timer(lid_timer);
- }
- spin_unlock_irqrestore(&dev_priv->lid_lock, irq_flags);
-}
-
-void psb_lid_timer_init(struct drm_psb_private *dev_priv)
-{
- struct timer_list *lid_timer = &dev_priv->lid_timer;
- unsigned long irq_flags;
-
- spin_lock_init(&dev_priv->lid_lock);
- spin_lock_irqsave(&dev_priv->lid_lock, irq_flags);
-
- timer_setup(lid_timer, psb_lid_timer_func, 0);
-
- lid_timer->expires = jiffies + PSB_LID_DELAY;
-
- add_timer(lid_timer);
- spin_unlock_irqrestore(&dev_priv->lid_lock, irq_flags);
-}
-
-void psb_lid_timer_takedown(struct drm_psb_private *dev_priv)
-{
- del_timer_sync(&dev_priv->lid_timer);
-}
-
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 3ef6ed41e62b4a..fba73c38e23569 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -33,9 +33,9 @@ endif
subdir-ccflags-$(CONFIG_DRM_I915_WERROR) += -Werror
# Fine grained warnings disable
-CFLAGS_i915_pci.o = $(call cc-disable-warning, override-init)
-CFLAGS_display/intel_display_device.o = $(call cc-disable-warning, override-init)
-CFLAGS_display/intel_fbdev.o = $(call cc-disable-warning, override-init)
+CFLAGS_i915_pci.o = -Wno-override-init
+CFLAGS_display/intel_display_device.o = -Wno-override-init
+CFLAGS_display/intel_fbdev.o = -Wno-override-init
# Support compiling the display code separately for both i915 and xe
# drivers. Define I915 when building i915.
@@ -118,6 +118,7 @@ gt-y += \
gt/intel_ggtt_fencing.o \
gt/intel_gt.o \
gt/intel_gt_buffer_pool.o \
+ gt/intel_gt_ccs_mode.o \
gt/intel_gt_clock_utils.o \
gt/intel_gt_debugfs.o \
gt/intel_gt_engines_debugfs.o \
diff --git a/drivers/gpu/drm/i915/display/g4x_dp.c b/drivers/gpu/drm/i915/display/g4x_dp.c
index dfe0b07a122d15..06ec04e667e32f 100644
--- a/drivers/gpu/drm/i915/display/g4x_dp.c
+++ b/drivers/gpu/drm/i915/display/g4x_dp.c
@@ -717,7 +717,6 @@ static void g4x_enable_dp(struct intel_atomic_state *state,
{
intel_enable_dp(state, encoder, pipe_config, conn_state);
intel_edp_backlight_on(pipe_config, conn_state);
- encoder->audio_enable(encoder, pipe_config, conn_state);
}
static void vlv_enable_dp(struct intel_atomic_state *state,
@@ -726,7 +725,6 @@ static void vlv_enable_dp(struct intel_atomic_state *state,
const struct drm_connector_state *conn_state)
{
intel_edp_backlight_on(pipe_config, conn_state);
- encoder->audio_enable(encoder, pipe_config, conn_state);
}
static void g4x_pre_enable_dp(struct intel_atomic_state *state,
diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c
index eda4a8b885904d..ac456a2275dbad 100644
--- a/drivers/gpu/drm/i915/display/icl_dsi.c
+++ b/drivers/gpu/drm/i915/display/icl_dsi.c
@@ -1155,7 +1155,6 @@ static void gen11_dsi_powerup_panel(struct intel_encoder *encoder)
}
intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_INIT_OTP);
- intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_DISPLAY_ON);
/* ensure all panel commands dispatched before enabling transcoder */
wait_for_cmds_dispatched_to_panel(encoder);
@@ -1256,6 +1255,8 @@ static void gen11_dsi_enable(struct intel_atomic_state *state,
/* step6d: enable dsi transcoder */
gen11_dsi_enable_transcoder(encoder);
+ intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_DISPLAY_ON);
+
/* step7: enable backlight */
intel_backlight_enable(crtc_state, conn_state);
intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_BACKLIGHT_ON);
diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c
index fe52c06271ef05..52bd3576835b6b 100644
--- a/drivers/gpu/drm/i915/display/intel_bios.c
+++ b/drivers/gpu/drm/i915/display/intel_bios.c
@@ -1955,16 +1955,12 @@ static int get_init_otp_deassert_fragment_len(struct drm_i915_private *i915,
* these devices we split the init OTP sequence into a deassert sequence and
* the actual init OTP part.
*/
-static void fixup_mipi_sequences(struct drm_i915_private *i915,
- struct intel_panel *panel)
+static void vlv_fixup_mipi_sequences(struct drm_i915_private *i915,
+ struct intel_panel *panel)
{
u8 *init_otp;
int len;
- /* Limit this to VLV for now. */
- if (!IS_VALLEYVIEW(i915))
- return;
-
/* Limit this to v1 vid-mode sequences */
if (panel->vbt.dsi.config->is_cmd_mode ||
panel->vbt.dsi.seq_version != 1)
@@ -2000,6 +1996,41 @@ static void fixup_mipi_sequences(struct drm_i915_private *i915,
panel->vbt.dsi.sequence[MIPI_SEQ_INIT_OTP] = init_otp + len - 1;
}
+/*
+ * Some machines (eg. Lenovo 82TQ) appear to have broken
+ * VBT sequences:
+ * - INIT_OTP is not present at all
+ * - what should be in INIT_OTP is in DISPLAY_ON
+ * - what should be in DISPLAY_ON is in BACKLIGHT_ON
+ * (along with the actual backlight stuff)
+ *
+ * To make those work we simply swap DISPLAY_ON and INIT_OTP.
+ *
+ * TODO: Do we need to limit this to specific machines,
+ * or examine the contents of the sequences to
+ * avoid false positives?
+ */
+static void icl_fixup_mipi_sequences(struct drm_i915_private *i915,
+ struct intel_panel *panel)
+{
+ if (!panel->vbt.dsi.sequence[MIPI_SEQ_INIT_OTP] &&
+ panel->vbt.dsi.sequence[MIPI_SEQ_DISPLAY_ON]) {
+ drm_dbg_kms(&i915->drm, "Broken VBT: Swapping INIT_OTP and DISPLAY_ON sequences\n");
+
+ swap(panel->vbt.dsi.sequence[MIPI_SEQ_INIT_OTP],
+ panel->vbt.dsi.sequence[MIPI_SEQ_DISPLAY_ON]);
+ }
+}
+
+static void fixup_mipi_sequences(struct drm_i915_private *i915,
+ struct intel_panel *panel)
+{
+ if (DISPLAY_VER(i915) >= 11)
+ icl_fixup_mipi_sequences(i915, panel);
+ else if (IS_VALLEYVIEW(i915))
+ vlv_fixup_mipi_sequences(i915, panel);
+}
+
static void
parse_mipi_sequence(struct drm_i915_private *i915,
struct intel_panel *panel)
@@ -3351,6 +3382,9 @@ bool intel_bios_encoder_supports_dp_dual_mode(const struct intel_bios_encoder_da
{
const struct child_device_config *child = &devdata->child;
+ if (!devdata)
+ return false;
+
if (!intel_bios_encoder_supports_dp(devdata) ||
!intel_bios_encoder_supports_hdmi(devdata))
return false;
diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c
index ed89b86ea625aa..f672bfd70d4551 100644
--- a/drivers/gpu/drm/i915/display/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/display/intel_cdclk.c
@@ -2534,7 +2534,8 @@ intel_set_cdclk_pre_plane_update(struct intel_atomic_state *state)
intel_atomic_get_old_cdclk_state(state);
const struct intel_cdclk_state *new_cdclk_state =
intel_atomic_get_new_cdclk_state(state);
- enum pipe pipe = new_cdclk_state->pipe;
+ struct intel_cdclk_config cdclk_config;
+ enum pipe pipe;
if (!intel_cdclk_changed(&old_cdclk_state->actual,
&new_cdclk_state->actual))
@@ -2543,12 +2544,25 @@ intel_set_cdclk_pre_plane_update(struct intel_atomic_state *state)
if (IS_DG2(i915))
intel_cdclk_pcode_pre_notify(state);
- if (pipe == INVALID_PIPE ||
- old_cdclk_state->actual.cdclk <= new_cdclk_state->actual.cdclk) {
- drm_WARN_ON(&i915->drm, !new_cdclk_state->base.changed);
+ if (new_cdclk_state->disable_pipes) {
+ cdclk_config = new_cdclk_state->actual;
+ pipe = INVALID_PIPE;
+ } else {
+ if (new_cdclk_state->actual.cdclk >= old_cdclk_state->actual.cdclk) {
+ cdclk_config = new_cdclk_state->actual;
+ pipe = new_cdclk_state->pipe;
+ } else {
+ cdclk_config = old_cdclk_state->actual;
+ pipe = INVALID_PIPE;
+ }
- intel_set_cdclk(i915, &new_cdclk_state->actual, pipe);
+ cdclk_config.voltage_level = max(new_cdclk_state->actual.voltage_level,
+ old_cdclk_state->actual.voltage_level);
}
+
+ drm_WARN_ON(&i915->drm, !new_cdclk_state->base.changed);
+
+ intel_set_cdclk(i915, &cdclk_config, pipe);
}
/**
@@ -2566,7 +2580,7 @@ intel_set_cdclk_post_plane_update(struct intel_atomic_state *state)
intel_atomic_get_old_cdclk_state(state);
const struct intel_cdclk_state *new_cdclk_state =
intel_atomic_get_new_cdclk_state(state);
- enum pipe pipe = new_cdclk_state->pipe;
+ enum pipe pipe;
if (!intel_cdclk_changed(&old_cdclk_state->actual,
&new_cdclk_state->actual))
@@ -2575,12 +2589,15 @@ intel_set_cdclk_post_plane_update(struct intel_atomic_state *state)
if (IS_DG2(i915))
intel_cdclk_pcode_post_notify(state);
- if (pipe != INVALID_PIPE &&
- old_cdclk_state->actual.cdclk > new_cdclk_state->actual.cdclk) {
- drm_WARN_ON(&i915->drm, !new_cdclk_state->base.changed);
+ if (!new_cdclk_state->disable_pipes &&
+ new_cdclk_state->actual.cdclk < old_cdclk_state->actual.cdclk)
+ pipe = new_cdclk_state->pipe;
+ else
+ pipe = INVALID_PIPE;
+
+ drm_WARN_ON(&i915->drm, !new_cdclk_state->base.changed);
- intel_set_cdclk(i915, &new_cdclk_state->actual, pipe);
- }
+ intel_set_cdclk(i915, &new_cdclk_state->actual, pipe);
}
static int intel_pixel_rate_to_cdclk(const struct intel_crtc_state *crtc_state)
@@ -3058,6 +3075,7 @@ static struct intel_global_state *intel_cdclk_duplicate_state(struct intel_globa
return NULL;
cdclk_state->pipe = INVALID_PIPE;
+ cdclk_state->disable_pipes = false;
return &cdclk_state->base;
}
@@ -3236,6 +3254,8 @@ int intel_modeset_calc_cdclk(struct intel_atomic_state *state)
if (ret)
return ret;
+ new_cdclk_state->disable_pipes = true;
+
drm_dbg_kms(&dev_priv->drm,
"Modeset required for cdclk change\n");
}
diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.h b/drivers/gpu/drm/i915/display/intel_cdclk.h
index 48fd7d39e0cd9c..71bc032bfef16e 100644
--- a/drivers/gpu/drm/i915/display/intel_cdclk.h
+++ b/drivers/gpu/drm/i915/display/intel_cdclk.h
@@ -51,6 +51,9 @@ struct intel_cdclk_state {
/* bitmask of active pipes */
u8 active_pipes;
+
+ /* update cdclk with pipes disabled */
+ bool disable_pipes;
};
int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state);
diff --git a/drivers/gpu/drm/i915/display/intel_cursor.c b/drivers/gpu/drm/i915/display/intel_cursor.c
index f8b33999d43fcc..0d3da55e1c24d5 100644
--- a/drivers/gpu/drm/i915/display/intel_cursor.c
+++ b/drivers/gpu/drm/i915/display/intel_cursor.c
@@ -36,12 +36,10 @@ static u32 intel_cursor_base(const struct intel_plane_state *plane_state)
{
struct drm_i915_private *dev_priv =
to_i915(plane_state->uapi.plane->dev);
- const struct drm_framebuffer *fb = plane_state->hw.fb;
- struct drm_i915_gem_object *obj = intel_fb_obj(fb);
u32 base;
if (DISPLAY_INFO(dev_priv)->cursor_needs_physical)
- base = i915_gem_object_get_dma_address(obj, 0);
+ base = plane_state->phys_dma_addr;
else
base = intel_plane_ggtt_offset(plane_state);
diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c
index c587a8efeafcf5..c17462b4c2ac19 100644
--- a/drivers/gpu/drm/i915/display/intel_ddi.c
+++ b/drivers/gpu/drm/i915/display/intel_ddi.c
@@ -4256,7 +4256,12 @@ static bool m_n_equal(const struct intel_link_m_n *m_n_1,
static bool crtcs_port_sync_compatible(const struct intel_crtc_state *crtc_state1,
const struct intel_crtc_state *crtc_state2)
{
+ /*
+ * FIXME the modeset sequence is currently wrong and
+ * can't deal with bigjoiner + port sync at the same time.
+ */
return crtc_state1->hw.active && crtc_state2->hw.active &&
+ !crtc_state1->bigjoiner_pipes && !crtc_state2->bigjoiner_pipes &&
crtc_state1->output_types == crtc_state2->output_types &&
crtc_state1->output_format == crtc_state2->output_format &&
crtc_state1->lane_count == crtc_state2->lane_count &&
diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index ab2f52d21bad8b..8af9e6128277af 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -2709,15 +2709,6 @@ static void intel_set_pipe_src_size(const struct intel_crtc_state *crtc_state)
*/
intel_de_write(dev_priv, PIPESRC(pipe),
PIPESRC_WIDTH(width - 1) | PIPESRC_HEIGHT(height - 1));
-
- if (!crtc_state->enable_psr2_su_region_et)
- return;
-
- width = drm_rect_width(&crtc_state->psr2_su_area);
- height = drm_rect_height(&crtc_state->psr2_su_area);
-
- intel_de_write(dev_priv, PIPE_SRCSZ_ERLY_TPT(pipe),
- PIPESRC_WIDTH(width - 1) | PIPESRC_HEIGHT(height - 1));
}
static bool intel_pipe_is_interlaced(const struct intel_crtc_state *crtc_state)
diff --git a/drivers/gpu/drm/i915/display/intel_display_device.h b/drivers/gpu/drm/i915/display/intel_display_device.h
index fe42688137863c..9b1bce2624b9ea 100644
--- a/drivers/gpu/drm/i915/display/intel_display_device.h
+++ b/drivers/gpu/drm/i915/display/intel_display_device.h
@@ -47,6 +47,7 @@ struct drm_printer;
#define HAS_DPT(i915) (DISPLAY_VER(i915) >= 13)
#define HAS_DSB(i915) (DISPLAY_INFO(i915)->has_dsb)
#define HAS_DSC(__i915) (DISPLAY_RUNTIME_INFO(__i915)->has_dsc)
+#define HAS_DSC_MST(__i915) (DISPLAY_VER(__i915) >= 12 && HAS_DSC(__i915))
#define HAS_FBC(i915) (DISPLAY_RUNTIME_INFO(i915)->fbc_mask != 0)
#define HAS_FPGA_DBG_UNCLAIMED(i915) (DISPLAY_INFO(i915)->has_fpga_dbg)
#define HAS_FW_BLC(i915) (DISPLAY_VER(i915) >= 3)
diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h
index e67cd5b02e84ff..bf3f942e19c3d3 100644
--- a/drivers/gpu/drm/i915/display/intel_display_types.h
+++ b/drivers/gpu/drm/i915/display/intel_display_types.h
@@ -727,6 +727,7 @@ struct intel_plane_state {
#define PLANE_HAS_FENCE BIT(0)
struct intel_fb_view view;
+ u32 phys_dma_addr; /* for cursor_needs_physical */
/* Plane pxp decryption state */
bool decrypt;
@@ -1422,6 +1423,8 @@ struct intel_crtc_state {
u32 psr2_man_track_ctl;
+ u32 pipe_srcsz_early_tpt;
+
struct drm_rect psr2_su_area;
/* Variable Refresh Rate state */
diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
index f0c3ed37b350b9..e583515f9b25a3 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -67,6 +67,7 @@
#include "intel_dp_tunnel.h"
#include "intel_dpio_phy.h"
#include "intel_dpll.h"
+#include "intel_drrs.h"
#include "intel_fifo_underrun.h"
#include "intel_hdcp.h"
#include "intel_hdmi.h"
@@ -498,7 +499,7 @@ intel_dp_set_source_rates(struct intel_dp *intel_dp)
/* The values must be in increasing order */
static const int mtl_rates[] = {
162000, 216000, 243000, 270000, 324000, 432000, 540000, 675000,
- 810000, 1000000, 1350000, 2000000,
+ 810000, 1000000, 2000000,
};
static const int icl_rates[] = {
162000, 216000, 270000, 324000, 432000, 540000, 648000, 810000,
@@ -1421,7 +1422,8 @@ static bool intel_dp_source_supports_fec(struct intel_dp *intel_dp,
if (DISPLAY_VER(dev_priv) >= 12)
return true;
- if (DISPLAY_VER(dev_priv) == 11 && encoder->port != PORT_A)
+ if (DISPLAY_VER(dev_priv) == 11 && encoder->port != PORT_A &&
+ !intel_crtc_has_type(pipe_config, INTEL_OUTPUT_DP_MST))
return true;
return false;
@@ -1916,8 +1918,9 @@ icl_dsc_compute_link_config(struct intel_dp *intel_dp,
dsc_max_bpp = min(dsc_max_bpp, pipe_bpp - 1);
for (i = 0; i < ARRAY_SIZE(valid_dsc_bpp); i++) {
- if (valid_dsc_bpp[i] < dsc_min_bpp ||
- valid_dsc_bpp[i] > dsc_max_bpp)
+ if (valid_dsc_bpp[i] < dsc_min_bpp)
+ continue;
+ if (valid_dsc_bpp[i] > dsc_max_bpp)
break;
ret = dsc_compute_link_config(intel_dp,
@@ -2683,15 +2686,6 @@ intel_dp_compute_hdr_metadata_infoframe_sdp(struct intel_dp *intel_dp,
intel_hdmi_infoframe_enable(HDMI_PACKET_TYPE_GAMUT_METADATA);
}
-static bool cpu_transcoder_has_drrs(struct drm_i915_private *i915,
- enum transcoder cpu_transcoder)
-{
- if (HAS_DOUBLE_BUFFERED_M_N(i915))
- return true;
-
- return intel_cpu_transcoder_has_m2_n2(i915, cpu_transcoder);
-}
-
static bool can_enable_drrs(struct intel_connector *connector,
const struct intel_crtc_state *pipe_config,
const struct drm_display_mode *downclock_mode)
@@ -2714,7 +2708,7 @@ static bool can_enable_drrs(struct intel_connector *connector,
if (pipe_config->has_pch_encoder)
return false;
- if (!cpu_transcoder_has_drrs(i915, pipe_config->cpu_transcoder))
+ if (!intel_cpu_transcoder_has_drrs(i915, pipe_config->cpu_transcoder))
return false;
return downclock_mode &&
@@ -2731,7 +2725,11 @@ intel_dp_drrs_compute_config(struct intel_connector *connector,
intel_panel_downclock_mode(connector, &pipe_config->hw.adjusted_mode);
int pixel_clock;
- if (has_seamless_m_n(connector))
+ /*
+ * FIXME all joined pipes share the same transcoder.
+ * Need to account for that when updating M/N live.
+ */
+ if (has_seamless_m_n(connector) && !pipe_config->bigjoiner_pipes)
pipe_config->update_m_n = true;
if (!can_enable_drrs(connector, pipe_config, downclock_mode)) {
@@ -6565,6 +6563,7 @@ intel_dp_init_connector(struct intel_digital_port *dig_port,
intel_connector->get_hw_state = intel_ddi_connector_get_hw_state;
else
intel_connector->get_hw_state = intel_connector_get_hw_state;
+ intel_connector->sync_state = intel_dp_connector_sync_state;
if (!intel_edp_init_connector(intel_dp, intel_connector)) {
intel_dp_aux_fini(intel_dp);
diff --git a/drivers/gpu/drm/i915/display/intel_dp_hdcp.c b/drivers/gpu/drm/i915/display/intel_dp_hdcp.c
index b98a87883fefb0..9db43bd81ce2fa 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_hdcp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_hdcp.c
@@ -691,12 +691,15 @@ int intel_dp_hdcp_get_remote_capability(struct intel_connector *connector,
u8 bcaps;
int ret;
+ *hdcp_capable = false;
+ *hdcp2_capable = false;
if (!intel_encoder_is_mst(connector->encoder))
return -EINVAL;
ret = _intel_dp_hdcp2_get_capability(aux, hdcp2_capable);
if (ret)
- return ret;
+ drm_dbg_kms(&i915->drm,
+ "HDCP2 DPCD capability read failed err: %d\n", ret);
ret = intel_dp_hdcp_read_bcaps(aux, i915, &bcaps);
if (ret)
diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c
index 53aec023ce92fa..b651c990af85f7 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c
@@ -1355,7 +1355,7 @@ intel_dp_mst_mode_valid_ctx(struct drm_connector *connector,
return 0;
}
- if (DISPLAY_VER(dev_priv) >= 10 &&
+ if (HAS_DSC_MST(dev_priv) &&
drm_dp_sink_supports_dsc(intel_connector->dp.dsc_dpcd)) {
/*
* TBD pass the connector BPC,
diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
index ff480f171f75a2..b6d24410740f85 100644
--- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
+++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
@@ -2554,7 +2554,7 @@ static void icl_wrpll_params_populate(struct skl_wrpll_params *params,
static bool
ehl_combo_pll_div_frac_wa_needed(struct drm_i915_private *i915)
{
- return (((IS_ELKHARTLAKE(i915) || IS_JASPERLAKE(i915)) &&
+ return ((IS_ELKHARTLAKE(i915) &&
IS_DISPLAY_STEP(i915, STEP_B0, STEP_FOREVER)) ||
IS_TIGERLAKE(i915) || IS_ALDERLAKE_S(i915) || IS_ALDERLAKE_P(i915)) &&
i915->display.dpll.ref_clks.nssc == 38400;
diff --git a/drivers/gpu/drm/i915/display/intel_drrs.c b/drivers/gpu/drm/i915/display/intel_drrs.c
index 169ef38ff18833..597f8bd6aa1a0e 100644
--- a/drivers/gpu/drm/i915/display/intel_drrs.c
+++ b/drivers/gpu/drm/i915/display/intel_drrs.c
@@ -63,6 +63,15 @@ const char *intel_drrs_type_str(enum drrs_type drrs_type)
return str[drrs_type];
}
+bool intel_cpu_transcoder_has_drrs(struct drm_i915_private *i915,
+ enum transcoder cpu_transcoder)
+{
+ if (HAS_DOUBLE_BUFFERED_M_N(i915))
+ return true;
+
+ return intel_cpu_transcoder_has_m2_n2(i915, cpu_transcoder);
+}
+
static void
intel_drrs_set_refresh_rate_pipeconf(struct intel_crtc *crtc,
enum drrs_refresh_rate refresh_rate)
@@ -312,9 +321,8 @@ static int intel_drrs_debugfs_status_show(struct seq_file *m, void *unused)
mutex_lock(&crtc->drrs.mutex);
seq_printf(m, "DRRS capable: %s\n",
- str_yes_no(crtc_state->has_drrs ||
- HAS_DOUBLE_BUFFERED_M_N(i915) ||
- intel_cpu_transcoder_has_m2_n2(i915, crtc_state->cpu_transcoder)));
+ str_yes_no(intel_cpu_transcoder_has_drrs(i915,
+ crtc_state->cpu_transcoder)));
seq_printf(m, "DRRS enabled: %s\n",
str_yes_no(crtc_state->has_drrs));
diff --git a/drivers/gpu/drm/i915/display/intel_drrs.h b/drivers/gpu/drm/i915/display/intel_drrs.h
index 8ef5f93a80ffd5..0982f95eab727a 100644
--- a/drivers/gpu/drm/i915/display/intel_drrs.h
+++ b/drivers/gpu/drm/i915/display/intel_drrs.h
@@ -9,12 +9,15 @@
#include <linux/types.h>
enum drrs_type;
+enum transcoder;
struct drm_i915_private;
struct intel_atomic_state;
struct intel_crtc;
struct intel_crtc_state;
struct intel_connector;
+bool intel_cpu_transcoder_has_drrs(struct drm_i915_private *i915,
+ enum transcoder cpu_transcoder);
const char *intel_drrs_type_str(enum drrs_type drrs_type);
bool intel_drrs_is_active(struct intel_crtc *crtc);
void intel_drrs_activate(const struct intel_crtc_state *crtc_state);
diff --git a/drivers/gpu/drm/i915/display/intel_dsb.c b/drivers/gpu/drm/i915/display/intel_dsb.c
index d62e050185e7c3..e4515bf920388e 100644
--- a/drivers/gpu/drm/i915/display/intel_dsb.c
+++ b/drivers/gpu/drm/i915/display/intel_dsb.c
@@ -340,6 +340,17 @@ static int intel_dsb_dewake_scanline(const struct intel_crtc_state *crtc_state)
return max(0, vblank_start - intel_usecs_to_scanlines(adjusted_mode, latency));
}
+static u32 dsb_chicken(struct intel_crtc *crtc)
+{
+ if (crtc->mode_flags & I915_MODE_FLAG_VRR)
+ return DSB_CTRL_WAIT_SAFE_WINDOW |
+ DSB_CTRL_NO_WAIT_VBLANK |
+ DSB_INST_WAIT_SAFE_WINDOW |
+ DSB_INST_NO_WAIT_VBLANK;
+ else
+ return 0;
+}
+
static void _intel_dsb_commit(struct intel_dsb *dsb, u32 ctrl,
int dewake_scanline)
{
@@ -361,6 +372,9 @@ static void _intel_dsb_commit(struct intel_dsb *dsb, u32 ctrl,
intel_de_write_fw(dev_priv, DSB_CTRL(pipe, dsb->id),
ctrl | DSB_ENABLE);
+ intel_de_write_fw(dev_priv, DSB_CHICKEN(pipe, dsb->id),
+ dsb_chicken(crtc));
+
intel_de_write_fw(dev_priv, DSB_HEAD(pipe, dsb->id),
intel_dsb_buffer_ggtt_offset(&dsb->dsb_buf));
diff --git a/drivers/gpu/drm/i915/display/intel_fb_pin.c b/drivers/gpu/drm/i915/display/intel_fb_pin.c
index 7b42aef37d2f72..b6df9baf481b69 100644
--- a/drivers/gpu/drm/i915/display/intel_fb_pin.c
+++ b/drivers/gpu/drm/i915/display/intel_fb_pin.c
@@ -255,6 +255,16 @@ int intel_plane_pin_fb(struct intel_plane_state *plane_state)
return PTR_ERR(vma);
plane_state->ggtt_vma = vma;
+
+ /*
+ * Pre-populate the dma address before we enter the vblank
+ * evade critical section as i915_gem_object_get_dma_address()
+ * will trigger might_sleep() even if it won't actually sleep,
+ * which is the case when the fb has already been pinned.
+ */
+ if (phys_cursor)
+ plane_state->phys_dma_addr =
+ i915_gem_object_get_dma_address(intel_fb_obj(fb), 0);
} else {
struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb);
diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c
index 6927785fd6ff2f..aabd018bd73743 100644
--- a/drivers/gpu/drm/i915/display/intel_psr.c
+++ b/drivers/gpu/drm/i915/display/intel_psr.c
@@ -1422,6 +1422,17 @@ void intel_psr_compute_config(struct intel_dp *intel_dp,
return;
}
+ /*
+ * FIXME figure out what is wrong with PSR+bigjoiner and
+ * fix it. Presumably something related to the fact that
+ * PSR is a transcoder level feature.
+ */
+ if (crtc_state->bigjoiner_pipes) {
+ drm_dbg_kms(&dev_priv->drm,
+ "PSR disabled due to bigjoiner\n");
+ return;
+ }
+
if (CAN_PANEL_REPLAY(intel_dp))
crtc_state->has_panel_replay = true;
else
@@ -1994,6 +2005,7 @@ static void psr_force_hw_tracking_exit(struct intel_dp *intel_dp)
void intel_psr2_program_trans_man_trk_ctl(const struct intel_crtc_state *crtc_state)
{
+ struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
enum transcoder cpu_transcoder = crtc_state->cpu_transcoder;
struct intel_encoder *encoder;
@@ -2013,6 +2025,12 @@ void intel_psr2_program_trans_man_trk_ctl(const struct intel_crtc_state *crtc_st
intel_de_write(dev_priv, PSR2_MAN_TRK_CTL(cpu_transcoder),
crtc_state->psr2_man_track_ctl);
+
+ if (!crtc_state->enable_psr2_su_region_et)
+ return;
+
+ intel_de_write(dev_priv, PIPE_SRCSZ_ERLY_TPT(crtc->pipe),
+ crtc_state->pipe_srcsz_early_tpt);
}
static void psr2_man_trk_ctl_calc(struct intel_crtc_state *crtc_state,
@@ -2051,6 +2069,20 @@ exit:
crtc_state->psr2_man_track_ctl = val;
}
+static u32 psr2_pipe_srcsz_early_tpt_calc(struct intel_crtc_state *crtc_state,
+ bool full_update)
+{
+ int width, height;
+
+ if (!crtc_state->enable_psr2_su_region_et || full_update)
+ return 0;
+
+ width = drm_rect_width(&crtc_state->psr2_su_area);
+ height = drm_rect_height(&crtc_state->psr2_su_area);
+
+ return PIPESRC_WIDTH(width - 1) | PIPESRC_HEIGHT(height - 1);
+}
+
static void clip_area_update(struct drm_rect *overlap_damage_area,
struct drm_rect *damage_area,
struct drm_rect *pipe_src)
@@ -2095,21 +2127,36 @@ static void intel_psr2_sel_fetch_pipe_alignment(struct intel_crtc_state *crtc_st
* cursor fully when cursor is in SU area.
*/
static void
-intel_psr2_sel_fetch_et_alignment(struct intel_crtc_state *crtc_state,
- struct intel_plane_state *cursor_state)
+intel_psr2_sel_fetch_et_alignment(struct intel_atomic_state *state,
+ struct intel_crtc *crtc)
{
- struct drm_rect inter;
+ struct intel_crtc_state *crtc_state = intel_atomic_get_new_crtc_state(state, crtc);
+ struct intel_plane_state *new_plane_state;
+ struct intel_plane *plane;
+ int i;
- if (!crtc_state->enable_psr2_su_region_et ||
- !cursor_state->uapi.visible)
+ if (!crtc_state->enable_psr2_su_region_et)
return;
- inter = crtc_state->psr2_su_area;
- if (!drm_rect_intersect(&inter, &cursor_state->uapi.dst))
- return;
+ for_each_new_intel_plane_in_state(state, plane, new_plane_state, i) {
+ struct drm_rect inter;
+
+ if (new_plane_state->uapi.crtc != crtc_state->uapi.crtc)
+ continue;
+
+ if (plane->id != PLANE_CURSOR)
+ continue;
+
+ if (!new_plane_state->uapi.visible)
+ continue;
- clip_area_update(&crtc_state->psr2_su_area, &cursor_state->uapi.dst,
- &crtc_state->pipe_src);
+ inter = crtc_state->psr2_su_area;
+ if (!drm_rect_intersect(&inter, &new_plane_state->uapi.dst))
+ continue;
+
+ clip_area_update(&crtc_state->psr2_su_area, &new_plane_state->uapi.dst,
+ &crtc_state->pipe_src);
+ }
}
/*
@@ -2152,8 +2199,7 @@ int intel_psr2_sel_fetch_update(struct intel_atomic_state *state,
{
struct drm_i915_private *dev_priv = to_i915(state->base.dev);
struct intel_crtc_state *crtc_state = intel_atomic_get_new_crtc_state(state, crtc);
- struct intel_plane_state *new_plane_state, *old_plane_state,
- *cursor_plane_state = NULL;
+ struct intel_plane_state *new_plane_state, *old_plane_state;
struct intel_plane *plane;
bool full_update = false;
int i, ret;
@@ -2238,13 +2284,6 @@ int intel_psr2_sel_fetch_update(struct intel_atomic_state *state,
damaged_area.x2 += new_plane_state->uapi.dst.x1 - src.x1;
clip_area_update(&crtc_state->psr2_su_area, &damaged_area, &crtc_state->pipe_src);
-
- /*
- * Cursor plane new state is stored to adjust su area to cover
- * cursor are fully.
- */
- if (plane->id == PLANE_CURSOR)
- cursor_plane_state = new_plane_state;
}
/*
@@ -2273,9 +2312,13 @@ int intel_psr2_sel_fetch_update(struct intel_atomic_state *state,
if (ret)
return ret;
- /* Adjust su area to cover cursor fully as necessary */
- if (cursor_plane_state)
- intel_psr2_sel_fetch_et_alignment(crtc_state, cursor_plane_state);
+ /*
+ * Adjust su area to cover cursor fully as necessary (early
+ * transport). This needs to be done after
+ * drm_atomic_add_affected_planes to ensure visible cursor is added into
+ * affected planes even when cursor is not updated by itself.
+ */
+ intel_psr2_sel_fetch_et_alignment(state, crtc);
intel_psr2_sel_fetch_pipe_alignment(crtc_state);
@@ -2338,6 +2381,8 @@ int intel_psr2_sel_fetch_update(struct intel_atomic_state *state,
skip_sel_fetch_set_loop:
psr2_man_trk_ctl_calc(crtc_state, full_update);
+ crtc_state->pipe_srcsz_early_tpt =
+ psr2_pipe_srcsz_early_tpt_calc(crtc_state, full_update);
return 0;
}
diff --git a/drivers/gpu/drm/i915/display/intel_sdvo.c b/drivers/gpu/drm/i915/display/intel_sdvo.c
index 5f9e748adc89ee..0cd9c183f6212f 100644
--- a/drivers/gpu/drm/i915/display/intel_sdvo.c
+++ b/drivers/gpu/drm/i915/display/intel_sdvo.c
@@ -1842,8 +1842,6 @@ static void intel_disable_sdvo(struct intel_atomic_state *state,
struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->uapi.crtc);
u32 temp;
- encoder->audio_disable(encoder, old_crtc_state, conn_state);
-
intel_sdvo_set_active_outputs(intel_sdvo, 0);
if (0)
intel_sdvo_set_encoder_power_state(intel_sdvo,
@@ -1935,8 +1933,6 @@ static void intel_enable_sdvo(struct intel_atomic_state *state,
intel_sdvo_set_encoder_power_state(intel_sdvo,
DRM_MODE_DPMS_ON);
intel_sdvo_set_active_outputs(intel_sdvo, intel_sdvo_connector->output_flag);
-
- encoder->audio_enable(encoder, pipe_config, conn_state);
}
static enum drm_mode_status
diff --git a/drivers/gpu/drm/i915/display/intel_vrr.c b/drivers/gpu/drm/i915/display/intel_vrr.c
index 5d905f932cb4b3..f542ee1db1d970 100644
--- a/drivers/gpu/drm/i915/display/intel_vrr.c
+++ b/drivers/gpu/drm/i915/display/intel_vrr.c
@@ -117,6 +117,13 @@ intel_vrr_compute_config(struct intel_crtc_state *crtc_state,
const struct drm_display_info *info = &connector->base.display_info;
int vmin, vmax;
+ /*
+ * FIXME all joined pipes share the same transcoder.
+ * Need to account for that during VRR toggle/push/etc.
+ */
+ if (crtc_state->bigjoiner_pipes)
+ return;
+
if (adjusted_mode->flags & DRM_MODE_FLAG_INTERLACE)
return;
@@ -187,10 +194,11 @@ void intel_vrr_set_transcoder_timings(const struct intel_crtc_state *crtc_state)
enum transcoder cpu_transcoder = crtc_state->cpu_transcoder;
/*
- * TRANS_SET_CONTEXT_LATENCY with VRR enabled
- * requires this chicken bit on ADL/DG2.
+ * This bit seems to have two meanings depending on the platform:
+ * TGL: generate VRR "safe window" for DSB vblank waits
+ * ADL/DG2: make TRANS_SET_CONTEXT_LATENCY effective with VRR
*/
- if (DISPLAY_VER(dev_priv) == 13)
+ if (IS_DISPLAY_VER(dev_priv, 12, 13))
intel_de_rmw(dev_priv, CHICKEN_TRANS(cpu_transcoder),
0, PIPE_VBLANK_WITH_DELAY);
diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c
index e941e2e4fd14c2..860574d04f881a 100644
--- a/drivers/gpu/drm/i915/display/skl_universal_plane.c
+++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c
@@ -2295,6 +2295,9 @@ static u8 skl_get_plane_caps(struct drm_i915_private *i915,
if (HAS_4TILE(i915))
caps |= INTEL_PLANE_CAP_TILING_4;
+ if (!IS_ENABLED(I915) && !HAS_FLAT_CCS(i915))
+ return caps;
+
if (skl_plane_has_rc_ccs(i915, pipe, plane_id)) {
caps |= INTEL_PLANE_CAP_CCS_RC;
if (DISPLAY_VER(i915) >= 12)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
index 0ba955611dfb54..8780aa24310535 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
@@ -5,6 +5,7 @@
*/
#include <drm/drm_cache.h>
+#include <linux/vmalloc.h>
#include "gt/intel_gt.h"
#include "gt/intel_tlb.h"
diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c
index b2a5882b8f81a0..07565701873922 100644
--- a/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c
@@ -4,6 +4,7 @@
* Copyright © 2016 Intel Corporation
*/
+#include <linux/vmalloc.h>
#include "mock_dmabuf.h"
static struct sg_table *mock_map_dma_buf(struct dma_buf_attachment *attachment,
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index fa46d2308b0ed3..81bf2216371be6 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -961,6 +961,9 @@ static int gen8_init_rsvd(struct i915_address_space *vm)
struct i915_vma *vma;
int ret;
+ if (!intel_gt_needs_wa_16018031267(vm->gt))
+ return 0;
+
/* The memory will be used only by GPU. */
obj = i915_gem_object_create_lmem(i915, PAGE_SIZE,
I915_BO_ALLOC_VOLATILE |
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 1ade568ffbfa43..7a6dc371c384eb 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -908,6 +908,23 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt)
info->engine_mask &= ~BIT(GSC0);
}
+ /*
+ * Do not create the command streamer for CCS slices beyond the first.
+ * All the workload submitted to the first engine will be shared among
+ * all the slices.
+ *
+ * Once the user will be allowed to customize the CCS mode, then this
+ * check needs to be removed.
+ */
+ if (IS_DG2(gt->i915)) {
+ u8 first_ccs = __ffs(CCS_MASK(gt));
+
+ /* Mask off all the CCS engine */
+ info->engine_mask &= ~GENMASK(CCS3, CCS0);
+ /* Put back in the first CCS engine */
+ info->engine_mask |= BIT(_CCS(first_ccs));
+ }
+
return info->engine_mask;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index 96bdb93a948d1b..fb7bff27b45a34 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -279,9 +279,6 @@ static int __engine_park(struct intel_wakeref *wf)
intel_engine_park_heartbeat(engine);
intel_breadcrumbs_park(engine->breadcrumbs);
- /* Must be reset upon idling, or we may miss the busy wakeup. */
- GEM_BUG_ON(engine->sched_engine->queue_priority_hint != INT_MIN);
-
if (engine->park)
engine->park(engine);
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index 42aade0faf2d14..b061a0a0d6b082 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -3272,6 +3272,9 @@ static void execlists_park(struct intel_engine_cs *engine)
{
cancel_timer(&engine->execlists.timer);
cancel_timer(&engine->execlists.preempt);
+
+ /* Reset upon idling, or we may delay the busy wakeup. */
+ WRITE_ONCE(engine->sched_engine->queue_priority_hint, INT_MIN);
}
static void add_to_engine(struct i915_request *rq)
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index a425db5ed3a22c..6a2c2718bcc38e 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -1024,6 +1024,12 @@ enum i915_map_type intel_gt_coherent_map_type(struct intel_gt *gt,
return I915_MAP_WC;
}
+bool intel_gt_needs_wa_16018031267(struct intel_gt *gt)
+{
+ /* Wa_16018031267, Wa_16018063123 */
+ return IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 55), IP_VER(12, 71));
+}
+
bool intel_gt_needs_wa_22016122933(struct intel_gt *gt)
{
return MEDIA_VER_FULL(gt->i915) == IP_VER(13, 0) && gt->type == GT_MEDIA;
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h
index 608f5c87292857..003eb93b826fd0 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt.h
@@ -82,17 +82,18 @@ struct drm_printer;
##__VA_ARGS__); \
} while (0)
-#define NEEDS_FASTCOLOR_BLT_WABB(engine) ( \
- IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 55), IP_VER(12, 71)) && \
- engine->class == COPY_ENGINE_CLASS && engine->instance == 0)
-
static inline bool gt_is_root(struct intel_gt *gt)
{
return !gt->info.id;
}
+bool intel_gt_needs_wa_16018031267(struct intel_gt *gt);
bool intel_gt_needs_wa_22016122933(struct intel_gt *gt);
+#define NEEDS_FASTCOLOR_BLT_WABB(engine) ( \
+ intel_gt_needs_wa_16018031267(engine->gt) && \
+ engine->class == COPY_ENGINE_CLASS && engine->instance == 0)
+
static inline struct intel_gt *uc_to_gt(struct intel_uc *uc)
{
return container_of(uc, struct intel_gt, uc);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c
new file mode 100644
index 00000000000000..044219c5960a53
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#include "i915_drv.h"
+#include "intel_gt.h"
+#include "intel_gt_ccs_mode.h"
+#include "intel_gt_regs.h"
+
+void intel_gt_apply_ccs_mode(struct intel_gt *gt)
+{
+ int cslice;
+ u32 mode = 0;
+ int first_ccs = __ffs(CCS_MASK(gt));
+
+ if (!IS_DG2(gt->i915))
+ return;
+
+ /* Build the value for the fixed CCS load balancing */
+ for (cslice = 0; cslice < I915_MAX_CCS; cslice++) {
+ if (CCS_MASK(gt) & BIT(cslice))
+ /*
+ * If available, assign the cslice
+ * to the first available engine...
+ */
+ mode |= XEHP_CCS_MODE_CSLICE(cslice, first_ccs);
+
+ else
+ /*
+ * ... otherwise, mark the cslice as
+ * unavailable if no CCS dispatches here
+ */
+ mode |= XEHP_CCS_MODE_CSLICE(cslice,
+ XEHP_CCS_MODE_CSLICE_MASK);
+ }
+
+ intel_uncore_write(gt->uncore, XEHP_CCS_MODE, mode);
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h
new file mode 100644
index 00000000000000..9e5549caeb2693
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#ifndef __INTEL_GT_CCS_MODE_H__
+#define __INTEL_GT_CCS_MODE_H__
+
+struct intel_gt;
+
+void intel_gt_apply_ccs_mode(struct intel_gt *gt);
+
+#endif /* __INTEL_GT_CCS_MODE_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index 50962cfd1353ae..743fe356672274 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -1477,8 +1477,14 @@
#define ECOBITS_PPGTT_CACHE4B (0 << 8)
#define GEN12_RCU_MODE _MMIO(0x14800)
+#define XEHP_RCU_MODE_FIXED_SLICE_CCS_MODE REG_BIT(1)
#define GEN12_RCU_MODE_CCS_ENABLE REG_BIT(0)
+#define XEHP_CCS_MODE _MMIO(0x14804)
+#define XEHP_CCS_MODE_CSLICE_MASK REG_GENMASK(2, 0) /* CCS0-3 + rsvd */
+#define XEHP_CCS_MODE_CSLICE_WIDTH ilog2(XEHP_CCS_MODE_CSLICE_MASK + 1)
+#define XEHP_CCS_MODE_CSLICE(cslice, ccs) (ccs << (cslice * XEHP_CCS_MODE_CSLICE_WIDTH))
+
#define CHV_FUSE_GT _MMIO(VLV_GUNIT_BASE + 0x2168)
#define CHV_FGT_DISABLE_SS0 (1 << 10)
#define CHV_FGT_DISABLE_SS1 (1 << 11)
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index d67d44611c2834..6ec3582c973577 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -10,6 +10,7 @@
#include "intel_engine_regs.h"
#include "intel_gpu_commands.h"
#include "intel_gt.h"
+#include "intel_gt_ccs_mode.h"
#include "intel_gt_mcr.h"
#include "intel_gt_print.h"
#include "intel_gt_regs.h"
@@ -51,7 +52,8 @@
* registers belonging to BCS, VCS or VECS should be implemented in
* xcs_engine_wa_init(). Workarounds for registers not belonging to a specific
* engine's MMIO range but that are part of of the common RCS/CCS reset domain
- * should be implemented in general_render_compute_wa_init().
+ * should be implemented in general_render_compute_wa_init(). The settings
+ * about the CCS load balancing should be added in ccs_engine_wa_mode().
*
* - GT workarounds: the list of these WAs is applied whenever these registers
* revert to their default values: on GPU reset, suspend/resume [1]_, etc.
@@ -1653,6 +1655,7 @@ static void
xelpg_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
{
/* Wa_14018575942 / Wa_18018781329 */
+ wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB);
wa_mcr_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB);
/* Wa_22016670082 */
@@ -2853,6 +2856,28 @@ add_render_compute_tuning_settings(struct intel_gt *gt,
wa_write_clr(wal, GEN8_GARBCNTL, GEN12_BUS_HASH_CTL_BIT_EXC);
}
+static void ccs_engine_wa_mode(struct intel_engine_cs *engine, struct i915_wa_list *wal)
+{
+ struct intel_gt *gt = engine->gt;
+
+ if (!IS_DG2(gt->i915))
+ return;
+
+ /*
+ * Wa_14019159160: This workaround, along with others, leads to
+ * significant challenges in utilizing load balancing among the
+ * CCS slices. Consequently, an architectural decision has been
+ * made to completely disable automatic CCS load balancing.
+ */
+ wa_masked_en(wal, GEN12_RCU_MODE, XEHP_RCU_MODE_FIXED_SLICE_CCS_MODE);
+
+ /*
+ * After having disabled automatic load balancing we need to
+ * assign all slices to a single CCS. We will call it CCS mode 1
+ */
+ intel_gt_apply_ccs_mode(gt);
+}
+
/*
* The workarounds in this function apply to shared registers in
* the general render reset domain that aren't tied to a
@@ -3003,8 +3028,10 @@ engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal
* to a single RCS/CCS engine's workaround list since
* they're reset as part of the general render domain reset.
*/
- if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE)
+ if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE) {
general_render_compute_wa_init(engine, wal);
+ ccs_engine_wa_mode(engine, wal);
+ }
if (engine->class == COMPUTE_CLASS)
ccs_engine_wa_init(engine, wal);
diff --git a/drivers/gpu/drm/i915/gt/shmem_utils.c b/drivers/gpu/drm/i915/gt/shmem_utils.c
index bccc3a1200bc6e..1fb6ff77fd8991 100644
--- a/drivers/gpu/drm/i915/gt/shmem_utils.c
+++ b/drivers/gpu/drm/i915/gt/shmem_utils.c
@@ -7,6 +7,7 @@
#include <linux/mm.h>
#include <linux/pagemap.h>
#include <linux/shmem_fs.h>
+#include <linux/vmalloc.h>
#include "i915_drv.h"
#include "gem/i915_gem_object.h"
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index f3dcae4b9d455e..0f83c6d4376ffb 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -1403,14 +1403,17 @@ static void guc_cancel_busyness_worker(struct intel_guc *guc)
* Trying to pass a 'need_sync' or 'in_reset' flag all the way down through
* every possible call stack is unfeasible. It would be too intrusive to many
* areas that really don't care about the GuC backend. However, there is the
- * 'reset_in_progress' flag available, so just use that.
+ * I915_RESET_BACKOFF flag and the gt->reset.mutex can be tested for is_locked.
+ * So just use those. Note that testing both is required due to the hideously
+ * complex nature of the i915 driver's reset code paths.
*
* And note that in the case of a reset occurring during driver unload
- * (wedge_on_fini), skipping the cancel in _prepare (when the reset flag is set
- * is fine because there is another cancel in _finish (when the reset flag is
- * not).
+ * (wedged_on_fini), skipping the cancel in reset_prepare/reset_fini (when the
+ * reset flag/mutex are set) is fine because there is another explicit cancel in
+ * intel_guc_submission_fini (when the reset flag/mutex are not).
*/
- if (guc_to_gt(guc)->uc.reset_in_progress)
+ if (mutex_is_locked(&guc_to_gt(guc)->reset.mutex) ||
+ test_bit(I915_RESET_BACKOFF, &guc_to_gt(guc)->reset.flags))
cancel_delayed_work(&guc->timestamp.work);
else
cancel_delayed_work_sync(&guc->timestamp.work);
@@ -1424,8 +1427,6 @@ static void __reset_guc_busyness_stats(struct intel_guc *guc)
unsigned long flags;
ktime_t unused;
- guc_cancel_busyness_worker(guc);
-
spin_lock_irqsave(&guc->timestamp.lock, flags);
guc_update_pm_timestamp(guc, &unused);
@@ -2004,13 +2005,6 @@ void intel_guc_submission_cancel_requests(struct intel_guc *guc)
void intel_guc_submission_reset_finish(struct intel_guc *guc)
{
- /*
- * Ensure the busyness worker gets cancelled even on a fatal wedge.
- * Note that reset_prepare is not allowed to because it confuses lockdep.
- */
- if (guc_submission_initialized(guc))
- guc_cancel_busyness_worker(guc);
-
/* Reset called during driver load or during wedge? */
if (unlikely(!guc_submission_initialized(guc) ||
!intel_guc_is_fw_running(guc) ||
@@ -2136,6 +2130,7 @@ void intel_guc_submission_fini(struct intel_guc *guc)
if (!guc->submission_initialized)
return;
+ guc_fini_engine_stats(guc);
guc_flush_destroyed_contexts(guc);
guc_lrc_desc_pool_destroy_v69(guc);
i915_sched_engine_put(guc->sched_engine);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
index 6dfe5d9456c69e..399bc319180b04 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
@@ -637,6 +637,10 @@ void intel_uc_reset_finish(struct intel_uc *uc)
{
struct intel_guc *guc = &uc->guc;
+ /*
+ * NB: The wedge code path results in prepare -> prepare -> finish -> finish.
+ * So this function is sometimes called with the in-progress flag not set.
+ */
uc->reset_in_progress = false;
/* Firmware expected to be running when this function is called */
diff --git a/drivers/gpu/drm/i915/gvt/firmware.c b/drivers/gpu/drm/i915/gvt/firmware.c
index 4dd52ac2043e7a..d800d267f0e9bd 100644
--- a/drivers/gpu/drm/i915/gvt/firmware.c
+++ b/drivers/gpu/drm/i915/gvt/firmware.c
@@ -30,6 +30,7 @@
#include <linux/firmware.h>
#include <linux/crc32.h>
+#include <linux/vmalloc.h>
#include "i915_drv.h"
#include "gvt.h"
diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c
index 094fca9b0e73d1..58cca4906f4135 100644
--- a/drivers/gpu/drm/i915/gvt/gtt.c
+++ b/drivers/gpu/drm/i915/gvt/gtt.c
@@ -39,6 +39,7 @@
#include "trace.h"
#include "gt/intel_gt_regs.h"
+#include <linux/vmalloc.h>
#if defined(VERBOSE_DEBUG)
#define gvt_vdbg_mm(fmt, args...) gvt_dbg_mm(fmt, ##args)
diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index efcb00472be247..ea9c300927671f 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -52,6 +52,7 @@
#include "display/skl_watermark_regs.h"
#include "display/vlv_dsi_pll_regs.h"
#include "gt/intel_gt_regs.h"
+#include <linux/vmalloc.h>
/* XXX FIXME i915 has changed PP_XXX definition */
#define PCH_PP_STATUS _MMIO(0xc7200)
diff --git a/drivers/gpu/drm/i915/gvt/mmio.c b/drivers/gpu/drm/i915/gvt/mmio.c
index 5b5def6ddef7ae..780762f28aa4fb 100644
--- a/drivers/gpu/drm/i915/gvt/mmio.c
+++ b/drivers/gpu/drm/i915/gvt/mmio.c
@@ -33,6 +33,7 @@
*
*/
+#include <linux/vmalloc.h>
#include "i915_drv.h"
#include "i915_reg.h"
#include "gvt.h"
diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c
index 08ad1bd651f103..63c751ca411964 100644
--- a/drivers/gpu/drm/i915/gvt/vgpu.c
+++ b/drivers/gpu/drm/i915/gvt/vgpu.c
@@ -34,6 +34,7 @@
#include "i915_drv.h"
#include "gvt.h"
#include "i915_pvinfo.h"
+#include <linux/vmalloc.h>
void populate_pvinfo_page(struct intel_vgpu *vgpu)
{
diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c
index 9ee902d5b72c49..4b9233c07a22c6 100644
--- a/drivers/gpu/drm/i915/i915_driver.c
+++ b/drivers/gpu/drm/i915/i915_driver.c
@@ -800,7 +800,7 @@ int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
goto out_cleanup_modeset2;
ret = intel_pxp_init(i915);
- if (ret != -ENODEV)
+ if (ret && ret != -ENODEV)
drm_dbg(&i915->drm, "pxp init failed with %d\n", ret);
ret = intel_display_driver_probe(i915);
diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c
index 8c3f443c8347e0..b758fd110c2045 100644
--- a/drivers/gpu/drm/i915/i915_hwmon.c
+++ b/drivers/gpu/drm/i915/i915_hwmon.c
@@ -72,12 +72,13 @@ hwm_locked_with_pm_intel_uncore_rmw(struct hwm_drvdata *ddat,
struct intel_uncore *uncore = ddat->uncore;
intel_wakeref_t wakeref;
- mutex_lock(&hwmon->hwmon_lock);
+ with_intel_runtime_pm(uncore->rpm, wakeref) {
+ mutex_lock(&hwmon->hwmon_lock);
- with_intel_runtime_pm(uncore->rpm, wakeref)
intel_uncore_rmw(uncore, reg, clear, set);
- mutex_unlock(&hwmon->hwmon_lock);
+ mutex_unlock(&hwmon->hwmon_lock);
+ }
}
/*
@@ -136,20 +137,21 @@ hwm_energy(struct hwm_drvdata *ddat, long *energy)
else
rgaddr = hwmon->rg.energy_status_all;
- mutex_lock(&hwmon->hwmon_lock);
+ with_intel_runtime_pm(uncore->rpm, wakeref) {
+ mutex_lock(&hwmon->hwmon_lock);
- with_intel_runtime_pm(uncore->rpm, wakeref)
reg_val = intel_uncore_read(uncore, rgaddr);
- if (reg_val >= ei->reg_val_prev)
- ei->accum_energy += reg_val - ei->reg_val_prev;
- else
- ei->accum_energy += UINT_MAX - ei->reg_val_prev + reg_val;
- ei->reg_val_prev = reg_val;
+ if (reg_val >= ei->reg_val_prev)
+ ei->accum_energy += reg_val - ei->reg_val_prev;
+ else
+ ei->accum_energy += UINT_MAX - ei->reg_val_prev + reg_val;
+ ei->reg_val_prev = reg_val;
- *energy = mul_u64_u32_shr(ei->accum_energy, SF_ENERGY,
- hwmon->scl_shift_energy);
- mutex_unlock(&hwmon->hwmon_lock);
+ *energy = mul_u64_u32_shr(ei->accum_energy, SF_ENERGY,
+ hwmon->scl_shift_energy);
+ mutex_unlock(&hwmon->hwmon_lock);
+ }
}
static ssize_t
@@ -404,6 +406,7 @@ hwm_power_max_write(struct hwm_drvdata *ddat, long val)
/* Block waiting for GuC reset to complete when needed */
for (;;) {
+ wakeref = intel_runtime_pm_get(ddat->uncore->rpm);
mutex_lock(&hwmon->hwmon_lock);
prepare_to_wait(&ddat->waitq, &wait, TASK_INTERRUPTIBLE);
@@ -417,14 +420,13 @@ hwm_power_max_write(struct hwm_drvdata *ddat, long val)
}
mutex_unlock(&hwmon->hwmon_lock);
+ intel_runtime_pm_put(ddat->uncore->rpm, wakeref);
schedule();
}
finish_wait(&ddat->waitq, &wait);
if (ret)
- goto unlock;
-
- wakeref = intel_runtime_pm_get(ddat->uncore->rpm);
+ goto exit;
/* Disable PL1 limit and verify, because the limit cannot be disabled on all platforms */
if (val == PL1_DISABLE) {
@@ -444,9 +446,8 @@ hwm_power_max_write(struct hwm_drvdata *ddat, long val)
intel_uncore_rmw(ddat->uncore, hwmon->rg.pkg_rapl_limit,
PKG_PWR_LIM_1_EN | PKG_PWR_LIM_1, nval);
exit:
- intel_runtime_pm_put(ddat->uncore->rpm, wakeref);
-unlock:
mutex_unlock(&hwmon->hwmon_lock);
+ intel_runtime_pm_put(ddat->uncore->rpm, wakeref);
return ret;
}
diff --git a/drivers/gpu/drm/i915/i915_memcpy.c b/drivers/gpu/drm/i915/i915_memcpy.c
index ba82277254b762..cc41974cee7462 100644
--- a/drivers/gpu/drm/i915/i915_memcpy.c
+++ b/drivers/gpu/drm/i915/i915_memcpy.c
@@ -25,6 +25,8 @@
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/cpufeature.h>
+#include <linux/bug.h>
+#include <linux/build_bug.h>
#include <asm/fpu/api.h>
#include "i915_memcpy.h"
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index e00557e1a57f0c..3b2e49ce29ba03 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -4599,7 +4599,7 @@
#define MTL_CHICKEN_TRANS(trans) _MMIO_TRANS((trans), \
_MTL_CHICKEN_TRANS_A, \
_MTL_CHICKEN_TRANS_B)
-#define PIPE_VBLANK_WITH_DELAY REG_BIT(31) /* ADL/DG2 */
+#define PIPE_VBLANK_WITH_DELAY REG_BIT(31) /* tgl+ */
#define SKL_UNMASK_VBL_TO_PIPE_IN_SRD REG_BIT(30) /* skl+ */
#define HSW_FRAME_START_DELAY_MASK REG_GENMASK(28, 27)
#define HSW_FRAME_START_DELAY(x) REG_FIELD_PREP(HSW_FRAME_START_DELAY_MASK, x)
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index d09aad34ba37fa..b70715b1411d67 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -34,6 +34,7 @@
#include "gt/intel_engine.h"
#include "gt/intel_engine_heartbeat.h"
#include "gt/intel_gt.h"
+#include "gt/intel_gt_pm.h"
#include "gt/intel_gt_requests.h"
#include "gt/intel_tlb.h"
@@ -103,12 +104,42 @@ static inline struct i915_vma *active_to_vma(struct i915_active *ref)
static int __i915_vma_active(struct i915_active *ref)
{
- return i915_vma_tryget(active_to_vma(ref)) ? 0 : -ENOENT;
+ struct i915_vma *vma = active_to_vma(ref);
+
+ if (!i915_vma_tryget(vma))
+ return -ENOENT;
+
+ /*
+ * Exclude global GTT VMA from holding a GT wakeref
+ * while active, otherwise GPU never goes idle.
+ */
+ if (!i915_vma_is_ggtt(vma)) {
+ /*
+ * Since we and our _retire() counterpart can be
+ * called asynchronously, storing a wakeref tracking
+ * handle inside struct i915_vma is not safe, and
+ * there is no other good place for that. Hence,
+ * use untracked variants of intel_gt_pm_get/put().
+ */
+ intel_gt_pm_get_untracked(vma->vm->gt);
+ }
+
+ return 0;
}
static void __i915_vma_retire(struct i915_active *ref)
{
- i915_vma_put(active_to_vma(ref));
+ struct i915_vma *vma = active_to_vma(ref);
+
+ if (!i915_vma_is_ggtt(vma)) {
+ /*
+ * Since we can be called from atomic contexts,
+ * use an async variant of intel_gt_pm_put().
+ */
+ intel_gt_pm_put_async_untracked(vma->vm->gt);
+ }
+
+ i915_vma_put(vma);
}
static struct i915_vma *
@@ -1404,7 +1435,7 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
struct i915_vma_work *work = NULL;
struct dma_fence *moving = NULL;
struct i915_vma_resource *vma_res = NULL;
- intel_wakeref_t wakeref = 0;
+ intel_wakeref_t wakeref;
unsigned int bound;
int err;
@@ -1424,8 +1455,14 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
if (err)
return err;
- if (flags & PIN_GLOBAL)
- wakeref = intel_runtime_pm_get(&vma->vm->i915->runtime_pm);
+ /*
+ * In case of a global GTT, we must hold a runtime-pm wakeref
+ * while global PTEs are updated. In other cases, we hold
+ * the rpm reference while the VMA is active. Since runtime
+ * resume may require allocations, which are forbidden inside
+ * vm->mutex, get the first rpm wakeref outside of the mutex.
+ */
+ wakeref = intel_runtime_pm_get(&vma->vm->i915->runtime_pm);
if (flags & vma->vm->bind_async_flags) {
/* lock VM */
@@ -1561,8 +1598,7 @@ err_fence:
if (work)
dma_fence_work_commit_imm(&work->base);
err_rpm:
- if (wakeref)
- intel_runtime_pm_put(&vma->vm->i915->runtime_pm, wakeref);
+ intel_runtime_pm_put(&vma->vm->i915->runtime_pm, wakeref);
if (moving)
dma_fence_put(moving);
diff --git a/drivers/gpu/drm/i915/intel_gvt.c b/drivers/gpu/drm/i915/intel_gvt.c
index 9b6d87c8b5831c..5a01d60e51861e 100644
--- a/drivers/gpu/drm/i915/intel_gvt.c
+++ b/drivers/gpu/drm/i915/intel_gvt.c
@@ -28,6 +28,7 @@
#include "gt/intel_context.h"
#include "gt/intel_ring.h"
#include "gt/shmem_utils.h"
+#include <linux/vmalloc.h>
/**
* DOC: Intel GVT-g host support
diff --git a/drivers/gpu/drm/imagination/pvr_vm_mips.c b/drivers/gpu/drm/imagination/pvr_vm_mips.c
index b7fef3c797e6ca..6563dcde109cda 100644
--- a/drivers/gpu/drm/imagination/pvr_vm_mips.c
+++ b/drivers/gpu/drm/imagination/pvr_vm_mips.c
@@ -14,6 +14,7 @@
#include <linux/err.h>
#include <linux/slab.h>
#include <linux/types.h>
+#include <linux/vmalloc.h>
/**
* pvr_vm_mips_init() - Initialise MIPS FW pagetable
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_gem.c b/drivers/gpu/drm/mediatek/mtk_drm_gem.c
index 4f2e3feabc0f8a..3e519869b632c6 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_gem.c
+++ b/drivers/gpu/drm/mediatek/mtk_drm_gem.c
@@ -4,6 +4,7 @@
*/
#include <linux/dma-buf.h>
+#include <linux/vmalloc.h>
#include <drm/drm.h>
#include <drm/drm_device.h>
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index 0674aca0f8a3f5..cf0b1de1c07124 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -1377,6 +1377,10 @@ static void a6xx_calc_ubwc_config(struct adreno_gpu *gpu)
if (adreno_is_a618(gpu))
gpu->ubwc_config.highest_bank_bit = 14;
+ if (adreno_is_a619(gpu))
+ /* TODO: Should be 14 but causes corruption at e.g. 1920x1200 on DP */
+ gpu->ubwc_config.highest_bank_bit = 13;
+
if (adreno_is_a619_holi(gpu))
gpu->ubwc_config.highest_bank_bit = 13;
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c
index 1f5245fc2cdc6c..a847a0f7a73c9f 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c
@@ -852,7 +852,7 @@ static void a6xx_get_shader_block(struct msm_gpu *gpu,
(block->type << 8) | i);
in += CRASHDUMP_READ(in, REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE,
- block->size, dumper->iova + A6XX_CD_DATA_OFFSET);
+ block->size, out);
out += block->size * sizeof(u32);
}
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_2_x1e80100.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_2_x1e80100.h
index 9a9f7092c526a6..a3e60ac70689e7 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_2_x1e80100.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_2_x1e80100.h
@@ -324,6 +324,7 @@ static const struct dpu_wb_cfg x1e80100_wb[] = {
},
};
+/* TODO: INTF 3, 8 and 7 are used for MST, marked as INTF_NONE for now */
static const struct dpu_intf_cfg x1e80100_intf[] = {
{
.name = "intf_0", .id = INTF_0,
@@ -358,8 +359,8 @@ static const struct dpu_intf_cfg x1e80100_intf[] = {
.name = "intf_3", .id = INTF_3,
.base = 0x37000, .len = 0x280,
.features = INTF_SC7280_MASK,
- .type = INTF_DP,
- .controller_id = MSM_DP_CONTROLLER_1,
+ .type = INTF_NONE,
+ .controller_id = MSM_DP_CONTROLLER_0, /* pair with intf_0 for DP MST */
.prog_fetch_lines_worst_case = 24,
.intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 30),
.intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 31),
@@ -368,7 +369,7 @@ static const struct dpu_intf_cfg x1e80100_intf[] = {
.base = 0x38000, .len = 0x280,
.features = INTF_SC7280_MASK,
.type = INTF_DP,
- .controller_id = MSM_DP_CONTROLLER_2,
+ .controller_id = MSM_DP_CONTROLLER_1,
.prog_fetch_lines_worst_case = 24,
.intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 20),
.intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 21),
@@ -381,6 +382,33 @@ static const struct dpu_intf_cfg x1e80100_intf[] = {
.prog_fetch_lines_worst_case = 24,
.intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 22),
.intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 23),
+ }, {
+ .name = "intf_6", .id = INTF_6,
+ .base = 0x3A000, .len = 0x280,
+ .features = INTF_SC7280_MASK,
+ .type = INTF_DP,
+ .controller_id = MSM_DP_CONTROLLER_2,
+ .prog_fetch_lines_worst_case = 24,
+ .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 17),
+ .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 16),
+ }, {
+ .name = "intf_7", .id = INTF_7,
+ .base = 0x3b000, .len = 0x280,
+ .features = INTF_SC7280_MASK,
+ .type = INTF_NONE,
+ .controller_id = MSM_DP_CONTROLLER_2, /* pair with intf_6 for DP MST */
+ .prog_fetch_lines_worst_case = 24,
+ .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 18),
+ .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 19),
+ }, {
+ .name = "intf_8", .id = INTF_8,
+ .base = 0x3c000, .len = 0x280,
+ .features = INTF_SC7280_MASK,
+ .type = INTF_NONE,
+ .controller_id = MSM_DP_CONTROLLER_1, /* pair with intf_4 for DP MST */
+ .prog_fetch_lines_worst_case = 24,
+ .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 12),
+ .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 13),
},
};
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c
index ef871239adb2a3..68fae048a9a837 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c
@@ -459,15 +459,15 @@ int dpu_core_perf_debugfs_init(struct dpu_kms *dpu_kms, struct dentry *parent)
&perf->core_clk_rate);
debugfs_create_u32("enable_bw_release", 0600, entry,
(u32 *)&perf->enable_bw_release);
- debugfs_create_u32("threshold_low", 0600, entry,
+ debugfs_create_u32("threshold_low", 0400, entry,
(u32 *)&perf->perf_cfg->max_bw_low);
- debugfs_create_u32("threshold_high", 0600, entry,
+ debugfs_create_u32("threshold_high", 0400, entry,
(u32 *)&perf->perf_cfg->max_bw_high);
- debugfs_create_u32("min_core_ib", 0600, entry,
+ debugfs_create_u32("min_core_ib", 0400, entry,
(u32 *)&perf->perf_cfg->min_core_ib);
- debugfs_create_u32("min_llcc_ib", 0600, entry,
+ debugfs_create_u32("min_llcc_ib", 0400, entry,
(u32 *)&perf->perf_cfg->min_llcc_ib);
- debugfs_create_u32("min_dram_ib", 0600, entry,
+ debugfs_create_u32("min_dram_ib", 0400, entry,
(u32 *)&perf->perf_cfg->min_dram_ib);
debugfs_create_file("perf_mode", 0600, entry,
(u32 *)perf, &dpu_core_perf_mode_fops);
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c
index 946dd0135dffcf..6a0a74832fb64d 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c
@@ -525,14 +525,14 @@ int dpu_core_irq_register_callback(struct dpu_kms *dpu_kms,
int ret;
if (!irq_cb) {
- DPU_ERROR("invalid IRQ=[%d, %d] irq_cb:%ps\n",
- DPU_IRQ_REG(irq_idx), DPU_IRQ_BIT(irq_idx), irq_cb);
+ DPU_ERROR("IRQ=[%d, %d] NULL callback\n",
+ DPU_IRQ_REG(irq_idx), DPU_IRQ_BIT(irq_idx));
return -EINVAL;
}
if (!dpu_core_irq_is_valid(irq_idx)) {
- DPU_ERROR("invalid IRQ=[%d, %d]\n",
- DPU_IRQ_REG(irq_idx), DPU_IRQ_BIT(irq_idx));
+ DPU_ERROR("invalid IRQ=[%d, %d] irq_cb:%ps\n",
+ DPU_IRQ_REG(irq_idx), DPU_IRQ_BIT(irq_idx), irq_cb);
return -EINVAL;
}
diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c
index c4cb82af5c2f2f..ffbfde92258986 100644
--- a/drivers/gpu/drm/msm/dp/dp_display.c
+++ b/drivers/gpu/drm/msm/dp/dp_display.c
@@ -484,7 +484,7 @@ static void dp_display_handle_video_request(struct dp_display_private *dp)
}
}
-static int dp_display_handle_port_ststus_changed(struct dp_display_private *dp)
+static int dp_display_handle_port_status_changed(struct dp_display_private *dp)
{
int rc = 0;
@@ -541,7 +541,7 @@ static int dp_display_usbpd_attention_cb(struct device *dev)
drm_dbg_dp(dp->drm_dev, "hpd_state=%d sink_request=%d\n",
dp->hpd_state, sink_request);
if (sink_request & DS_PORT_STATUS_CHANGED)
- rc = dp_display_handle_port_ststus_changed(dp);
+ rc = dp_display_handle_port_status_changed(dp);
else
rc = dp_display_handle_irq_hpd(dp);
}
@@ -588,6 +588,7 @@ static int dp_hpd_plug_handle(struct dp_display_private *dp, u32 data)
ret = dp_display_usbpd_configure_cb(&pdev->dev);
if (ret) { /* link train failed */
dp->hpd_state = ST_DISCONNECTED;
+ pm_runtime_put_sync(&pdev->dev);
} else {
dp->hpd_state = ST_MAINLINK_READY;
}
@@ -645,6 +646,7 @@ static int dp_hpd_unplug_handle(struct dp_display_private *dp, u32 data)
dp_display_host_phy_exit(dp);
dp->hpd_state = ST_DISCONNECTED;
dp_display_notify_disconnect(&dp->dp_display.pdev->dev);
+ pm_runtime_put_sync(&pdev->dev);
mutex_unlock(&dp->event_mutex);
return 0;
}
diff --git a/drivers/gpu/drm/msm/msm_fb.c b/drivers/gpu/drm/msm/msm_fb.c
index e3f61c39df69b4..80166f702a0dba 100644
--- a/drivers/gpu/drm/msm/msm_fb.c
+++ b/drivers/gpu/drm/msm/msm_fb.c
@@ -89,7 +89,7 @@ int msm_framebuffer_prepare(struct drm_framebuffer *fb,
for (i = 0; i < n; i++) {
ret = msm_gem_get_and_pin_iova(fb->obj[i], aspace, &msm_fb->iova[i]);
- drm_dbg_state(fb->dev, "FB[%u]: iova[%d]: %08llx (%d)",
+ drm_dbg_state(fb->dev, "FB[%u]: iova[%d]: %08llx (%d)\n",
fb->base.id, i, msm_fb->iova[i], ret);
if (ret)
return ret;
@@ -176,7 +176,7 @@ static struct drm_framebuffer *msm_framebuffer_init(struct drm_device *dev,
const struct msm_format *format;
int ret, i, n;
- drm_dbg_state(dev, "create framebuffer: mode_cmd=%p (%dx%d@%4.4s)",
+ drm_dbg_state(dev, "create framebuffer: mode_cmd=%p (%dx%d@%4.4s)\n",
mode_cmd, mode_cmd->width, mode_cmd->height,
(char *)&mode_cmd->pixel_format);
@@ -232,7 +232,7 @@ static struct drm_framebuffer *msm_framebuffer_init(struct drm_device *dev,
refcount_set(&msm_fb->dirtyfb, 1);
- drm_dbg_state(dev, "create: FB ID: %d (%p)", fb->base.id, fb);
+ drm_dbg_state(dev, "create: FB ID: %d (%p)\n", fb->base.id, fb);
return fb;
diff --git a/drivers/gpu/drm/msm/msm_kms.c b/drivers/gpu/drm/msm/msm_kms.c
index 84c21ec2ceeae0..af6a6fcb11736f 100644
--- a/drivers/gpu/drm/msm/msm_kms.c
+++ b/drivers/gpu/drm/msm/msm_kms.c
@@ -149,7 +149,7 @@ int msm_crtc_enable_vblank(struct drm_crtc *crtc)
struct msm_kms *kms = priv->kms;
if (!kms)
return -ENXIO;
- drm_dbg_vbl(dev, "crtc=%u", crtc->base.id);
+ drm_dbg_vbl(dev, "crtc=%u\n", crtc->base.id);
return vblank_ctrl_queue_work(priv, crtc, true);
}
@@ -160,7 +160,7 @@ void msm_crtc_disable_vblank(struct drm_crtc *crtc)
struct msm_kms *kms = priv->kms;
if (!kms)
return;
- drm_dbg_vbl(dev, "crtc=%u", crtc->base.id);
+ drm_dbg_vbl(dev, "crtc=%u\n", crtc->base.id);
vblank_ctrl_queue_work(priv, crtc, false);
}
diff --git a/drivers/gpu/drm/nouveau/nouveau_bios.c b/drivers/gpu/drm/nouveau/nouveau_bios.c
index 479effcf607e26..79cfab53f80e25 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bios.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bios.c
@@ -23,6 +23,7 @@
*/
#include "nouveau_drv.h"
+#include "nouveau_bios.h"
#include "nouveau_reg.h"
#include "dispnv04/hw.h"
#include "nouveau_encoder.h"
@@ -1677,7 +1678,7 @@ apply_dcb_encoder_quirks(struct drm_device *dev, int idx, u32 *conn, u32 *conf)
*/
if (nv_match_device(dev, 0x0201, 0x1462, 0x8851)) {
if (*conn == 0xf2005014 && *conf == 0xffffffff) {
- fabricate_dcb_output(dcb, DCB_OUTPUT_TMDS, 1, 1, 1);
+ fabricate_dcb_output(dcb, DCB_OUTPUT_TMDS, 1, 1, DCB_OUTPUT_B);
return false;
}
}
@@ -1763,26 +1764,26 @@ fabricate_dcb_encoder_table(struct drm_device *dev, struct nvbios *bios)
#ifdef __powerpc__
/* Apple iMac G4 NV17 */
if (of_machine_is_compatible("PowerMac4,5")) {
- fabricate_dcb_output(dcb, DCB_OUTPUT_TMDS, 0, all_heads, 1);
- fabricate_dcb_output(dcb, DCB_OUTPUT_ANALOG, 1, all_heads, 2);
+ fabricate_dcb_output(dcb, DCB_OUTPUT_TMDS, 0, all_heads, DCB_OUTPUT_B);
+ fabricate_dcb_output(dcb, DCB_OUTPUT_ANALOG, 1, all_heads, DCB_OUTPUT_C);
return;
}
#endif
/* Make up some sane defaults */
fabricate_dcb_output(dcb, DCB_OUTPUT_ANALOG,
- bios->legacy.i2c_indices.crt, 1, 1);
+ bios->legacy.i2c_indices.crt, 1, DCB_OUTPUT_B);
if (nv04_tv_identify(dev, bios->legacy.i2c_indices.tv) >= 0)
fabricate_dcb_output(dcb, DCB_OUTPUT_TV,
bios->legacy.i2c_indices.tv,
- all_heads, 0);
+ all_heads, DCB_OUTPUT_A);
else if (bios->tmds.output0_script_ptr ||
bios->tmds.output1_script_ptr)
fabricate_dcb_output(dcb, DCB_OUTPUT_TMDS,
bios->legacy.i2c_indices.panel,
- all_heads, 1);
+ all_heads, DCB_OUTPUT_B);
}
static int
diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c
index 12feecf71e752d..6fb65b01d77804 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dmem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c
@@ -378,9 +378,9 @@ nouveau_dmem_evict_chunk(struct nouveau_dmem_chunk *chunk)
dma_addr_t *dma_addrs;
struct nouveau_fence *fence;
- src_pfns = kcalloc(npages, sizeof(*src_pfns), GFP_KERNEL);
- dst_pfns = kcalloc(npages, sizeof(*dst_pfns), GFP_KERNEL);
- dma_addrs = kcalloc(npages, sizeof(*dma_addrs), GFP_KERNEL);
+ src_pfns = kvcalloc(npages, sizeof(*src_pfns), GFP_KERNEL | __GFP_NOFAIL);
+ dst_pfns = kvcalloc(npages, sizeof(*dst_pfns), GFP_KERNEL | __GFP_NOFAIL);
+ dma_addrs = kvcalloc(npages, sizeof(*dma_addrs), GFP_KERNEL | __GFP_NOFAIL);
migrate_device_range(src_pfns, chunk->pagemap.range.start >> PAGE_SHIFT,
npages);
@@ -406,11 +406,11 @@ nouveau_dmem_evict_chunk(struct nouveau_dmem_chunk *chunk)
migrate_device_pages(src_pfns, dst_pfns, npages);
nouveau_dmem_fence_done(&fence);
migrate_device_finalize(src_pfns, dst_pfns, npages);
- kfree(src_pfns);
- kfree(dst_pfns);
+ kvfree(src_pfns);
+ kvfree(dst_pfns);
for (i = 0; i < npages; i++)
dma_unmap_page(chunk->drm->dev->dev, dma_addrs[i], PAGE_SIZE, DMA_BIDIRECTIONAL);
- kfree(dma_addrs);
+ kvfree(dma_addrs);
}
void
diff --git a/drivers/gpu/drm/nouveau/nouveau_dp.c b/drivers/gpu/drm/nouveau/nouveau_dp.c
index 7de7707ec6a895..a72c45809484ab 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dp.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dp.c
@@ -225,12 +225,18 @@ nouveau_dp_detect(struct nouveau_connector *nv_connector,
u8 *dpcd = nv_encoder->dp.dpcd;
int ret = NOUVEAU_DP_NONE, hpd;
- /* If we've already read the DPCD on an eDP device, we don't need to
- * reread it as it won't change
+ /* eDP ports don't support hotplugging - so there's no point in probing eDP ports unless we
+ * haven't probed them once before.
*/
- if (connector->connector_type == DRM_MODE_CONNECTOR_eDP &&
- dpcd[DP_DPCD_REV] != 0)
- return NOUVEAU_DP_SST;
+ if (connector->connector_type == DRM_MODE_CONNECTOR_eDP) {
+ if (connector->status == connector_status_connected)
+ return NOUVEAU_DP_SST;
+ else if (connector->status == connector_status_disconnected)
+ return NOUVEAU_DP_NONE;
+ }
+
+ // Ensure that the aux bus is enabled for probing
+ drm_dp_dpcd_set_powered(&nv_connector->aux, true);
mutex_lock(&nv_encoder->dp.hpd_irq_lock);
if (mstm) {
@@ -293,6 +299,13 @@ out:
if (mstm && !mstm->suspended && ret != NOUVEAU_DP_MST)
nv50_mstm_remove(mstm);
+ /* GSP doesn't like when we try to do aux transactions on a port it considers disconnected,
+ * and since we don't really have a usecase for that anyway - just disable the aux bus here
+ * if we've decided the connector is disconnected
+ */
+ if (ret == NOUVEAU_DP_NONE)
+ drm_dp_dpcd_set_powered(&nv_connector->aux, false);
+
mutex_unlock(&nv_encoder->dp.hpd_irq_lock);
return ret;
}
diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.c b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
index 0a0a11dc9ec03e..ee02cd833c5e43 100644
--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
@@ -812,15 +812,15 @@ op_remap(struct drm_gpuva_op_remap *r,
struct drm_gpuva_op_unmap *u = r->unmap;
struct nouveau_uvma *uvma = uvma_from_va(u->va);
u64 addr = uvma->va.va.addr;
- u64 range = uvma->va.va.range;
+ u64 end = uvma->va.va.addr + uvma->va.va.range;
if (r->prev)
addr = r->prev->va.addr + r->prev->va.range;
if (r->next)
- range = r->next->va.addr - addr;
+ end = r->next->va.addr;
- op_unmap_range(u, addr, range);
+ op_unmap_range(u, addr, end - addr);
}
static int
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
index 986e8d547c9424..060c74a80eb14b 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
@@ -420,7 +420,7 @@ gf100_gr_chan_new(struct nvkm_gr *base, struct nvkm_chan *fifoch,
return ret;
} else {
ret = nvkm_memory_map(gr->attrib_cb, 0, chan->vmm, chan->attrib_cb,
- &args, sizeof(args));;
+ &args, sizeof(args));
if (ret)
return ret;
}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowof.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowof.c
index 4bf486b5710136..cb05f7f48a98bb 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowof.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowof.c
@@ -66,11 +66,16 @@ of_init(struct nvkm_bios *bios, const char *name)
return ERR_PTR(-EINVAL);
}
+static void of_fini(void *p)
+{
+ kfree(p);
+}
+
const struct nvbios_source
nvbios_of = {
.name = "OpenFirmware",
.init = of_init,
- .fini = (void(*)(void *))kfree,
+ .fini = of_fini,
.read = of_read,
.size = of_size,
.rw = false,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm107.c b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm107.c
index 7bcbc4895ec221..271bfa038f5bc9 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm107.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm107.c
@@ -25,6 +25,7 @@
#include <subdev/bios.h>
#include <subdev/bios/init.h>
+#include <subdev/gsp.h>
void
gm107_devinit_disable(struct nvkm_devinit *init)
@@ -33,10 +34,13 @@ gm107_devinit_disable(struct nvkm_devinit *init)
u32 r021c00 = nvkm_rd32(device, 0x021c00);
u32 r021c04 = nvkm_rd32(device, 0x021c04);
- if (r021c00 & 0x00000001)
- nvkm_subdev_disable(device, NVKM_ENGINE_CE, 0);
- if (r021c00 & 0x00000004)
- nvkm_subdev_disable(device, NVKM_ENGINE_CE, 2);
+ /* gsp only wants to enable/disable display */
+ if (!nvkm_gsp_rm(device->gsp)) {
+ if (r021c00 & 0x00000001)
+ nvkm_subdev_disable(device, NVKM_ENGINE_CE, 0);
+ if (r021c00 & 0x00000004)
+ nvkm_subdev_disable(device, NVKM_ENGINE_CE, 2);
+ }
if (r021c04 & 0x00000001)
nvkm_subdev_disable(device, NVKM_ENGINE_DISP, 0);
}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/r535.c b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/r535.c
index 11b4c9c274a1a5..666eb93b1742ca 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/r535.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/r535.c
@@ -41,6 +41,7 @@ r535_devinit_new(const struct nvkm_devinit_func *hw,
rm->dtor = r535_devinit_dtor;
rm->post = hw->post;
+ rm->disable = hw->disable;
ret = nv50_devinit_new_(rm, device, type, inst, pdevinit);
if (ret)
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
index 9994cbd6f1c40c..9858c1438aa7fe 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
@@ -1112,7 +1112,7 @@ r535_gsp_rpc_set_registry(struct nvkm_gsp *gsp)
rpc->numEntries = NV_GSP_REG_NUM_ENTRIES;
str_offset = offsetof(typeof(*rpc), entries[NV_GSP_REG_NUM_ENTRIES]);
- strings = (char *)&rpc->entries[NV_GSP_REG_NUM_ENTRIES];
+ strings = (char *)rpc + str_offset;
for (i = 0; i < NV_GSP_REG_NUM_ENTRIES; i++) {
int name_len = strlen(r535_registry_entries[i].name) + 1;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c
index a7f3fc342d87e0..dd5b5a17ece0be 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c
@@ -222,8 +222,11 @@ nv50_instobj_acquire(struct nvkm_memory *memory)
void __iomem *map = NULL;
/* Already mapped? */
- if (refcount_inc_not_zero(&iobj->maps))
+ if (refcount_inc_not_zero(&iobj->maps)) {
+ /* read barrier match the wmb on refcount set */
+ smp_rmb();
return iobj->map;
+ }
/* Take the lock, and re-check that another thread hasn't
* already mapped the object in the meantime.
@@ -250,6 +253,8 @@ nv50_instobj_acquire(struct nvkm_memory *memory)
iobj->base.memory.ptrs = &nv50_instobj_fast;
else
iobj->base.memory.ptrs = &nv50_instobj_slow;
+ /* barrier to ensure the ptrs are written before refcount is set */
+ smp_wmb();
refcount_set(&iobj->maps, 1);
}
diff --git a/drivers/gpu/drm/omapdrm/omap_gem.c b/drivers/gpu/drm/omapdrm/omap_gem.c
index 3421e8389222a4..9ea0c64c26b5c8 100644
--- a/drivers/gpu/drm/omapdrm/omap_gem.c
+++ b/drivers/gpu/drm/omapdrm/omap_gem.c
@@ -9,6 +9,7 @@
#include <linux/shmem_fs.h>
#include <linux/spinlock.h>
#include <linux/pfn_t.h>
+#include <linux/vmalloc.h>
#include <drm/drm_prime.h>
#include <drm/drm_vma_manager.h>
diff --git a/drivers/gpu/drm/panel/panel-novatek-nt36672e.c b/drivers/gpu/drm/panel/panel-novatek-nt36672e.c
index cb7406d7446695..c39fe0fc5d69c6 100644
--- a/drivers/gpu/drm/panel/panel-novatek-nt36672e.c
+++ b/drivers/gpu/drm/panel/panel-novatek-nt36672e.c
@@ -614,8 +614,6 @@ static void nt36672e_panel_remove(struct mipi_dsi_device *dsi)
struct nt36672e_panel *ctx = mipi_dsi_get_drvdata(dsi);
mipi_dsi_detach(ctx->dsi);
- mipi_dsi_device_unregister(ctx->dsi);
-
drm_panel_remove(&ctx->panel);
}
diff --git a/drivers/gpu/drm/panel/panel-visionox-rm69299.c b/drivers/gpu/drm/panel/panel-visionox-rm69299.c
index 775144695283f5..b15ca56a09a74a 100644
--- a/drivers/gpu/drm/panel/panel-visionox-rm69299.c
+++ b/drivers/gpu/drm/panel/panel-visionox-rm69299.c
@@ -253,8 +253,6 @@ static void visionox_rm69299_remove(struct mipi_dsi_device *dsi)
struct visionox_rm69299 *ctx = mipi_dsi_get_drvdata(dsi);
mipi_dsi_detach(ctx->dsi);
- mipi_dsi_device_unregister(ctx->dsi);
-
drm_panel_remove(&ctx->panel);
}
diff --git a/drivers/gpu/drm/panfrost/panfrost_gpu.c b/drivers/gpu/drm/panfrost/panfrost_gpu.c
index 9063ce2546422f..fd8e44992184fa 100644
--- a/drivers/gpu/drm/panfrost/panfrost_gpu.c
+++ b/drivers/gpu/drm/panfrost/panfrost_gpu.c
@@ -441,19 +441,19 @@ void panfrost_gpu_power_off(struct panfrost_device *pfdev)
gpu_write(pfdev, SHADER_PWROFF_LO, pfdev->features.shader_present);
ret = readl_relaxed_poll_timeout(pfdev->iomem + SHADER_PWRTRANS_LO,
- val, !val, 1, 1000);
+ val, !val, 1, 2000);
if (ret)
dev_err(pfdev->dev, "shader power transition timeout");
gpu_write(pfdev, TILER_PWROFF_LO, pfdev->features.tiler_present);
ret = readl_relaxed_poll_timeout(pfdev->iomem + TILER_PWRTRANS_LO,
- val, !val, 1, 1000);
+ val, !val, 1, 2000);
if (ret)
dev_err(pfdev->dev, "tiler power transition timeout");
gpu_write(pfdev, L2_PWROFF_LO, pfdev->features.l2_present);
ret = readl_poll_timeout(pfdev->iomem + L2_PWRTRANS_LO,
- val, !val, 0, 1000);
+ val, !val, 0, 2000);
if (ret)
dev_err(pfdev->dev, "l2 power transition timeout");
}
diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c
index f38385fe76bbb4..b91019cd5acb19 100644
--- a/drivers/gpu/drm/panfrost/panfrost_mmu.c
+++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c
@@ -502,11 +502,18 @@ static int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as,
mapping_set_unevictable(mapping);
for (i = page_offset; i < page_offset + NUM_FAULT_PAGES; i++) {
+ /* Can happen if the last fault only partially filled this
+ * section of the pages array before failing. In that case
+ * we skip already filled pages.
+ */
+ if (pages[i])
+ continue;
+
pages[i] = shmem_read_mapping_page(mapping, i);
if (IS_ERR(pages[i])) {
ret = PTR_ERR(pages[i]);
pages[i] = NULL;
- goto err_pages;
+ goto err_unlock;
}
}
@@ -514,7 +521,7 @@ static int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as,
ret = sg_alloc_table_from_pages(sgt, pages + page_offset,
NUM_FAULT_PAGES, 0, SZ_2M, GFP_KERNEL);
if (ret)
- goto err_pages;
+ goto err_unlock;
ret = dma_map_sgtable(pfdev->dev, sgt, DMA_BIDIRECTIONAL, 0);
if (ret)
@@ -537,8 +544,6 @@ out:
err_map:
sg_free_table(sgt);
-err_pages:
- drm_gem_shmem_put_pages(&bo->base);
err_unlock:
dma_resv_unlock(obj->resv);
err_bo:
diff --git a/drivers/gpu/drm/qxl/qxl_cmd.c b/drivers/gpu/drm/qxl/qxl_cmd.c
index 281edab518cdd3..d6ea01f3797be7 100644
--- a/drivers/gpu/drm/qxl/qxl_cmd.c
+++ b/drivers/gpu/drm/qxl/qxl_cmd.c
@@ -421,7 +421,6 @@ int qxl_surface_id_alloc(struct qxl_device *qdev,
{
uint32_t handle;
int idr_ret;
- int count = 0;
again:
idr_preload(GFP_ATOMIC);
spin_lock(&qdev->surf_id_idr_lock);
@@ -433,7 +432,6 @@ again:
handle = idr_ret;
if (handle >= qdev->rom->n_surfaces) {
- count++;
spin_lock(&qdev->surf_id_idr_lock);
idr_remove(&qdev->surf_id_idr, handle);
spin_unlock(&qdev->surf_id_idr_lock);
diff --git a/drivers/gpu/drm/qxl/qxl_ioctl.c b/drivers/gpu/drm/qxl/qxl_ioctl.c
index dd0f834d881ce1..506ae1f5e099ff 100644
--- a/drivers/gpu/drm/qxl/qxl_ioctl.c
+++ b/drivers/gpu/drm/qxl/qxl_ioctl.c
@@ -145,7 +145,7 @@ static int qxl_process_single_command(struct qxl_device *qdev,
struct qxl_release *release;
struct qxl_bo *cmd_bo;
void *fb_cmd;
- int i, ret, num_relocs;
+ int i, ret;
int unwritten;
switch (cmd->type) {
@@ -200,7 +200,6 @@ static int qxl_process_single_command(struct qxl_device *qdev,
}
/* fill out reloc info structs */
- num_relocs = 0;
for (i = 0; i < cmd->relocs_num; ++i) {
struct drm_qxl_reloc reloc;
struct drm_qxl_reloc __user *u = u64_to_user_ptr(cmd->relocs);
@@ -230,7 +229,6 @@ static int qxl_process_single_command(struct qxl_device *qdev,
reloc_info[i].dst_bo = cmd_bo;
reloc_info[i].dst_offset = reloc.dst_offset + release->release_offset;
}
- num_relocs++;
/* reserve and validate the reloc dst bo */
if (reloc.reloc_type == QXL_RELOC_TYPE_BO || reloc.src_handle) {
diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c
index 368d26da0d6a23..9febc8b73f09ef 100644
--- a/drivers/gpu/drm/qxl/qxl_release.c
+++ b/drivers/gpu/drm/qxl/qxl_release.c
@@ -58,16 +58,56 @@ static long qxl_fence_wait(struct dma_fence *fence, bool intr,
signed long timeout)
{
struct qxl_device *qdev;
+ struct qxl_release *release;
+ int count = 0, sc = 0;
+ bool have_drawable_releases;
unsigned long cur, end = jiffies + timeout;
qdev = container_of(fence->lock, struct qxl_device, release_lock);
+ release = container_of(fence, struct qxl_release, base);
+ have_drawable_releases = release->type == QXL_RELEASE_DRAWABLE;
- if (!wait_event_timeout(qdev->release_event,
- (dma_fence_is_signaled(fence) ||
- (qxl_io_notify_oom(qdev), 0)),
- timeout))
- return 0;
+retry:
+ sc++;
+
+ if (dma_fence_is_signaled(fence))
+ goto signaled;
+
+ qxl_io_notify_oom(qdev);
+
+ for (count = 0; count < 11; count++) {
+ if (!qxl_queue_garbage_collect(qdev, true))
+ break;
+
+ if (dma_fence_is_signaled(fence))
+ goto signaled;
+ }
+
+ if (dma_fence_is_signaled(fence))
+ goto signaled;
+
+ if (have_drawable_releases || sc < 4) {
+ if (sc > 2)
+ /* back off */
+ usleep_range(500, 1000);
+
+ if (time_after(jiffies, end))
+ return 0;
+
+ if (have_drawable_releases && sc > 300) {
+ DMA_FENCE_WARN(fence,
+ "failed to wait on release %llu after spincount %d\n",
+ fence->context & ~0xf0000000, sc);
+ goto signaled;
+ }
+ goto retry;
+ }
+ /*
+ * yeah, original sync_obj_wait gave up after 3 spins when
+ * have_drawable_releases is not set.
+ */
+signaled:
cur = jiffies;
if (time_after(cur, end))
return 0;
diff --git a/drivers/gpu/drm/radeon/pptable.h b/drivers/gpu/drm/radeon/pptable.h
index 94947229888ba7..b7f22597ee95e7 100644
--- a/drivers/gpu/drm/radeon/pptable.h
+++ b/drivers/gpu/drm/radeon/pptable.h
@@ -424,7 +424,7 @@ typedef struct _ATOM_PPLIB_SUMO_CLOCK_INFO{
typedef struct _ATOM_PPLIB_STATE_V2
{
//number of valid dpm levels in this state; Driver uses it to calculate the whole
- //size of the state: sizeof(ATOM_PPLIB_STATE_V2) + (ucNumDPMLevels - 1) * sizeof(UCHAR)
+ //size of the state: struct_size(ATOM_PPLIB_STATE_V2, clockInfoIndex, ucNumDPMLevels)
UCHAR ucNumDPMLevels;
//a index to the array of nonClockInfos
@@ -432,14 +432,14 @@ typedef struct _ATOM_PPLIB_STATE_V2
/**
* Driver will read the first ucNumDPMLevels in this array
*/
- UCHAR clockInfoIndex[1];
+ UCHAR clockInfoIndex[] __counted_by(ucNumDPMLevels);
} ATOM_PPLIB_STATE_V2;
typedef struct _StateArray{
//how many states we have
UCHAR ucNumEntries;
- ATOM_PPLIB_STATE_V2 states[1];
+ ATOM_PPLIB_STATE_V2 states[] __counted_by(ucNumEntries);
}StateArray;
@@ -450,7 +450,7 @@ typedef struct _ClockInfoArray{
//sizeof(ATOM_PPLIB_CLOCK_INFO)
UCHAR ucEntrySize;
- UCHAR clockInfo[1];
+ UCHAR clockInfo[] __counted_by(ucNumEntries);
}ClockInfoArray;
typedef struct _NonClockInfoArray{
@@ -460,7 +460,7 @@ typedef struct _NonClockInfoArray{
//sizeof(ATOM_PPLIB_NONCLOCK_INFO)
UCHAR ucEntrySize;
- ATOM_PPLIB_NONCLOCK_INFO nonClockInfo[1];
+ ATOM_PPLIB_NONCLOCK_INFO nonClockInfo[] __counted_by(ucNumEntries);
}NonClockInfoArray;
typedef struct _ATOM_PPLIB_Clock_Voltage_Dependency_Record
diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c
index bb1f0a3371ab5d..10793a433bf586 100644
--- a/drivers/gpu/drm/radeon/radeon_atombios.c
+++ b/drivers/gpu/drm/radeon/radeon_atombios.c
@@ -923,8 +923,12 @@ bool radeon_get_atom_connector_info_from_supported_devices_table(struct
max_device = ATOM_MAX_SUPPORTED_DEVICE_INFO;
for (i = 0; i < max_device; i++) {
- ATOM_CONNECTOR_INFO_I2C ci =
- supported_devices->info.asConnInfo[i];
+ ATOM_CONNECTOR_INFO_I2C ci;
+
+ if (frev > 1)
+ ci = supported_devices->info_2d1.asConnInfo[i];
+ else
+ ci = supported_devices->info.asConnInfo[i];
bios_connectors[i].valid = false;
diff --git a/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c b/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c
index 48170694ac6b89..18efb3fe1c000f 100644
--- a/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c
+++ b/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c
@@ -17,9 +17,7 @@
static const uint32_t formats_cluster[] = {
DRM_FORMAT_XRGB2101010,
- DRM_FORMAT_ARGB2101010,
DRM_FORMAT_XBGR2101010,
- DRM_FORMAT_ABGR2101010,
DRM_FORMAT_XRGB8888,
DRM_FORMAT_ARGB8888,
DRM_FORMAT_XBGR8888,
diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c
index 3c4f5a392b0646..58c8161289fea9 100644
--- a/drivers/gpu/drm/scheduler/sched_entity.c
+++ b/drivers/gpu/drm/scheduler/sched_entity.c
@@ -71,13 +71,19 @@ int drm_sched_entity_init(struct drm_sched_entity *entity,
entity->guilty = guilty;
entity->num_sched_list = num_sched_list;
entity->priority = priority;
+ /*
+ * It's perfectly valid to initialize an entity without having a valid
+ * scheduler attached. It's just not valid to use the scheduler before it
+ * is initialized itself.
+ */
entity->sched_list = num_sched_list > 1 ? sched_list : NULL;
RCU_INIT_POINTER(entity->last_scheduled, NULL);
RB_CLEAR_NODE(&entity->rb_tree_node);
- if (!sched_list[0]->sched_rq) {
- /* Warn drivers not to do this and to fix their DRM
- * calling order.
+ if (num_sched_list && !sched_list[0]->sched_rq) {
+ /* Since every entry covered by num_sched_list
+ * should be non-NULL and therefore we warn drivers
+ * not to do this and to fix their DRM calling order.
*/
pr_warn("%s: called with uninitialized scheduler\n", __func__);
} else if (num_sched_list) {
diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
index 112438d965ffbe..6e1fd6985ffcb7 100644
--- a/drivers/gpu/drm/ttm/ttm_pool.c
+++ b/drivers/gpu/drm/ttm/ttm_pool.c
@@ -288,17 +288,23 @@ static struct ttm_pool_type *ttm_pool_select_type(struct ttm_pool *pool,
enum ttm_caching caching,
unsigned int order)
{
- if (pool->use_dma_alloc || pool->nid != NUMA_NO_NODE)
+ if (pool->use_dma_alloc)
return &pool->caching[caching].orders[order];
#ifdef CONFIG_X86
switch (caching) {
case ttm_write_combined:
+ if (pool->nid != NUMA_NO_NODE)
+ return &pool->caching[caching].orders[order];
+
if (pool->use_dma32)
return &global_dma32_write_combined[order];
return &global_write_combined[order];
case ttm_uncached:
+ if (pool->nid != NUMA_NO_NODE)
+ return &pool->caching[caching].orders[order];
+
if (pool->use_dma32)
return &global_dma32_uncached[order];
@@ -566,11 +572,17 @@ void ttm_pool_init(struct ttm_pool *pool, struct device *dev,
pool->use_dma_alloc = use_dma_alloc;
pool->use_dma32 = use_dma32;
- if (use_dma_alloc || nid != NUMA_NO_NODE) {
- for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
- for (j = 0; j < NR_PAGE_ORDERS; ++j)
- ttm_pool_type_init(&pool->caching[i].orders[j],
- pool, i, j);
+ for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) {
+ for (j = 0; j < NR_PAGE_ORDERS; ++j) {
+ struct ttm_pool_type *pt;
+
+ /* Initialize only pool types which are actually used */
+ pt = ttm_pool_select_type(pool, i, j);
+ if (pt != &pool->caching[i].orders[j])
+ continue;
+
+ ttm_pool_type_init(pt, pool, i, j);
+ }
}
}
EXPORT_SYMBOL(ttm_pool_init);
@@ -599,10 +611,16 @@ void ttm_pool_fini(struct ttm_pool *pool)
{
unsigned int i, j;
- if (pool->use_dma_alloc || pool->nid != NUMA_NO_NODE) {
- for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
- for (j = 0; j < NR_PAGE_ORDERS; ++j)
- ttm_pool_type_fini(&pool->caching[i].orders[j]);
+ for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) {
+ for (j = 0; j < NR_PAGE_ORDERS; ++j) {
+ struct ttm_pool_type *pt;
+
+ pt = ttm_pool_select_type(pool, i, j);
+ if (pt != &pool->caching[i].orders[j])
+ continue;
+
+ ttm_pool_type_fini(pt);
+ }
}
/* We removed the pool types from the LRU, but we need to also make sure
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index 578a7c37f00bd7..d776e3f87064fa 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -92,7 +92,7 @@ int ttm_tt_create(struct ttm_buffer_object *bo, bool zero_alloc)
*/
if (bdev->pool.use_dma_alloc && cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) {
page_flags |= TTM_TT_FLAG_DECRYPTED;
- drm_info(ddev, "TT memory decryption enabled.");
+ drm_info_once(ddev, "TT memory decryption enabled.");
}
bo->ttm = bdev->funcs->ttm_tt_create(bo, page_flags);
diff --git a/drivers/gpu/drm/v3d/v3d_bo.c b/drivers/gpu/drm/v3d/v3d_bo.c
index a07ede668cc161..a165cbcdd27b94 100644
--- a/drivers/gpu/drm/v3d/v3d_bo.c
+++ b/drivers/gpu/drm/v3d/v3d_bo.c
@@ -21,6 +21,7 @@
#include <linux/dma-buf.h>
#include <linux/pfn_t.h>
+#include <linux/vmalloc.h>
#include "v3d_drv.h"
#include "uapi/drm/v3d_drm.h"
diff --git a/drivers/gpu/drm/v3d/v3d_irq.c b/drivers/gpu/drm/v3d/v3d_irq.c
index 2e04f6cb661e4f..ce6b2fb341d1f8 100644
--- a/drivers/gpu/drm/v3d/v3d_irq.c
+++ b/drivers/gpu/drm/v3d/v3d_irq.c
@@ -105,7 +105,6 @@ v3d_irq(int irq, void *arg)
struct v3d_file_priv *file = v3d->bin_job->base.file->driver_priv;
u64 runtime = local_clock() - file->start_ns[V3D_BIN];
- file->enabled_ns[V3D_BIN] += local_clock() - file->start_ns[V3D_BIN];
file->jobs_sent[V3D_BIN]++;
v3d->queue[V3D_BIN].jobs_sent++;
@@ -126,7 +125,6 @@ v3d_irq(int irq, void *arg)
struct v3d_file_priv *file = v3d->render_job->base.file->driver_priv;
u64 runtime = local_clock() - file->start_ns[V3D_RENDER];
- file->enabled_ns[V3D_RENDER] += local_clock() - file->start_ns[V3D_RENDER];
file->jobs_sent[V3D_RENDER]++;
v3d->queue[V3D_RENDER].jobs_sent++;
@@ -147,7 +145,6 @@ v3d_irq(int irq, void *arg)
struct v3d_file_priv *file = v3d->csd_job->base.file->driver_priv;
u64 runtime = local_clock() - file->start_ns[V3D_CSD];
- file->enabled_ns[V3D_CSD] += local_clock() - file->start_ns[V3D_CSD];
file->jobs_sent[V3D_CSD]++;
v3d->queue[V3D_CSD].jobs_sent++;
@@ -195,7 +192,6 @@ v3d_hub_irq(int irq, void *arg)
struct v3d_file_priv *file = v3d->tfu_job->base.file->driver_priv;
u64 runtime = local_clock() - file->start_ns[V3D_TFU];
- file->enabled_ns[V3D_TFU] += local_clock() - file->start_ns[V3D_TFU];
file->jobs_sent[V3D_TFU]++;
v3d->queue[V3D_TFU].jobs_sent++;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_binding.c b/drivers/gpu/drm/vmwgfx/vmwgfx_binding.c
index ae2de914eb890e..2731f6ded1c28c 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_binding.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_binding.c
@@ -54,6 +54,7 @@
#include "vmwgfx_drv.h"
#include "vmwgfx_binding.h"
#include "device_include/svga3d_reg.h"
+#include <linux/vmalloc.h>
#define VMW_BINDING_RT_BIT 0
#define VMW_BINDING_PS_BIT 1
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_blit.c b/drivers/gpu/drm/vmwgfx/vmwgfx_blit.c
index c52c7bf1485b1f..717d624e9a0522 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_blit.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_blit.c
@@ -456,8 +456,10 @@ int vmw_bo_cpu_blit(struct ttm_buffer_object *dst,
.no_wait_gpu = false
};
u32 j, initial_line = dst_offset / dst_stride;
- struct vmw_bo_blit_line_data d;
+ struct vmw_bo_blit_line_data d = {0};
int ret = 0;
+ struct page **dst_pages = NULL;
+ struct page **src_pages = NULL;
/* Buffer objects need to be either pinned or reserved: */
if (!(dst->pin_count))
@@ -477,12 +479,35 @@ int vmw_bo_cpu_blit(struct ttm_buffer_object *dst,
return ret;
}
+ if (!src->ttm->pages && src->ttm->sg) {
+ src_pages = kvmalloc_array(src->ttm->num_pages,
+ sizeof(struct page *), GFP_KERNEL);
+ if (!src_pages)
+ return -ENOMEM;
+ ret = drm_prime_sg_to_page_array(src->ttm->sg, src_pages,
+ src->ttm->num_pages);
+ if (ret)
+ goto out;
+ }
+ if (!dst->ttm->pages && dst->ttm->sg) {
+ dst_pages = kvmalloc_array(dst->ttm->num_pages,
+ sizeof(struct page *), GFP_KERNEL);
+ if (!dst_pages) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ ret = drm_prime_sg_to_page_array(dst->ttm->sg, dst_pages,
+ dst->ttm->num_pages);
+ if (ret)
+ goto out;
+ }
+
d.mapped_dst = 0;
d.mapped_src = 0;
d.dst_addr = NULL;
d.src_addr = NULL;
- d.dst_pages = dst->ttm->pages;
- d.src_pages = src->ttm->pages;
+ d.dst_pages = dst->ttm->pages ? dst->ttm->pages : dst_pages;
+ d.src_pages = src->ttm->pages ? src->ttm->pages : src_pages;
d.dst_num_pages = PFN_UP(dst->resource->size);
d.src_num_pages = PFN_UP(src->resource->size);
d.dst_prot = ttm_io_prot(dst, dst->resource, PAGE_KERNEL);
@@ -504,6 +529,10 @@ out:
kunmap_atomic(d.src_addr);
if (d.dst_addr)
kunmap_atomic(d.dst_addr);
+ if (src_pages)
+ kvfree(src_pages);
+ if (dst_pages)
+ kvfree(dst_pages);
return ret;
}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
index bfd41ce3c8f4fc..00144632c600eb 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
@@ -204,6 +204,7 @@ int vmw_bo_pin_in_start_of_vram(struct vmw_private *dev_priv,
VMW_BO_DOMAIN_VRAM,
VMW_BO_DOMAIN_VRAM);
buf->places[0].lpfn = PFN_UP(bo->resource->size);
+ buf->busy_places[0].lpfn = PFN_UP(bo->resource->size);
ret = ttm_bo_validate(bo, &buf->placement, &ctx);
/* For some reason we didn't end up at the start of vram */
@@ -377,7 +378,8 @@ static int vmw_bo_init(struct vmw_private *dev_priv,
{
struct ttm_operation_ctx ctx = {
.interruptible = params->bo_type != ttm_bo_type_kernel,
- .no_wait_gpu = false
+ .no_wait_gpu = false,
+ .resv = params->resv,
};
struct ttm_device *bdev = &dev_priv->bdev;
struct drm_device *vdev = &dev_priv->drm;
@@ -394,8 +396,8 @@ static int vmw_bo_init(struct vmw_private *dev_priv,
vmw_bo_placement_set(vmw_bo, params->domain, params->busy_domain);
ret = ttm_bo_init_reserved(bdev, &vmw_bo->tbo, params->bo_type,
- &vmw_bo->placement, 0, &ctx, NULL,
- NULL, destroy);
+ &vmw_bo->placement, 0, &ctx,
+ params->sg, params->resv, destroy);
if (unlikely(ret))
return ret;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h
index 0d496dc9c6af7a..f349642e6190d6 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h
@@ -55,6 +55,8 @@ struct vmw_bo_params {
enum ttm_bo_type bo_type;
size_t size;
bool pin;
+ struct dma_resv *resv;
+ struct sg_table *sg;
};
/**
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cmd.c b/drivers/gpu/drm/vmwgfx/vmwgfx_cmd.c
index 195ff8792e5aba..dd4ca6a9c690bd 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_cmd.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cmd.c
@@ -31,6 +31,7 @@
#include <drm/ttm/ttm_placement.h>
#include <linux/sched/signal.h>
+#include <linux/vmalloc.h>
bool vmw_supports_3d(struct vmw_private *dev_priv)
{
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_devcaps.c b/drivers/gpu/drm/vmwgfx/vmwgfx_devcaps.c
index 829df395c2ed7f..6e6beff9e2621b 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_devcaps.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_devcaps.c
@@ -25,6 +25,7 @@
*
**************************************************************************/
+#include <linux/vmalloc.h>
#include "vmwgfx_devcaps.h"
#include "vmwgfx_drv.h"
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index d3e308fdfd5be8..bd6cebadf37ee6 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -53,6 +53,7 @@
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/version.h>
+#include <linux/vmalloc.h>
#define VMWGFX_DRIVER_DESC "Linux drm driver for VMware graphics devices"
@@ -666,11 +667,12 @@ static int vmw_dma_select_mode(struct vmw_private *dev_priv)
[vmw_dma_map_populate] = "Caching DMA mappings.",
[vmw_dma_map_bind] = "Giving up DMA mappings early."};
- /* TTM currently doesn't fully support SEV encryption. */
- if (cc_platform_has(CC_ATTR_MEM_ENCRYPT))
- return -EINVAL;
-
- if (vmw_force_coherent)
+ /*
+ * When running with SEV we always want dma mappings, because
+ * otherwise ttm tt pool pages will bounce through swiotlb running
+ * out of available space.
+ */
+ if (vmw_force_coherent || cc_platform_has(CC_ATTR_MEM_ENCRYPT))
dev_priv->map_mode = vmw_dma_alloc_coherent;
else if (vmw_restrict_iommu)
dev_priv->map_mode = vmw_dma_map_bind;
@@ -1444,12 +1446,15 @@ static void vmw_debugfs_resource_managers_init(struct vmw_private *vmw)
root, "system_ttm");
ttm_resource_manager_create_debugfs(ttm_manager_type(&vmw->bdev, TTM_PL_VRAM),
root, "vram_ttm");
- ttm_resource_manager_create_debugfs(ttm_manager_type(&vmw->bdev, VMW_PL_GMR),
- root, "gmr_ttm");
- ttm_resource_manager_create_debugfs(ttm_manager_type(&vmw->bdev, VMW_PL_MOB),
- root, "mob_ttm");
- ttm_resource_manager_create_debugfs(ttm_manager_type(&vmw->bdev, VMW_PL_SYSTEM),
- root, "system_mob_ttm");
+ if (vmw->has_gmr)
+ ttm_resource_manager_create_debugfs(ttm_manager_type(&vmw->bdev, VMW_PL_GMR),
+ root, "gmr_ttm");
+ if (vmw->has_mob) {
+ ttm_resource_manager_create_debugfs(ttm_manager_type(&vmw->bdev, VMW_PL_MOB),
+ root, "mob_ttm");
+ ttm_resource_manager_create_debugfs(ttm_manager_type(&vmw->bdev, VMW_PL_SYSTEM),
+ root, "system_mob_ttm");
+ }
}
static int vmwgfx_pm_notifier(struct notifier_block *nb, unsigned long val,
@@ -1624,6 +1629,7 @@ static const struct drm_driver driver = {
.prime_fd_to_handle = vmw_prime_fd_to_handle,
.prime_handle_to_fd = vmw_prime_handle_to_fd,
+ .gem_prime_import_sg_table = vmw_prime_import_sg_table,
.fops = &vmwgfx_driver_fops,
.name = VMWGFX_DRIVER_NAME,
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index 12efecc17df664..b019a1a1787af5 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -1130,6 +1130,9 @@ extern int vmw_prime_handle_to_fd(struct drm_device *dev,
struct drm_file *file_priv,
uint32_t handle, uint32_t flags,
int *prime_fd);
+struct drm_gem_object *vmw_prime_import_sg_table(struct drm_device *dev,
+ struct dma_buf_attachment *attach,
+ struct sg_table *table);
/*
* MemoryOBject management - vmwgfx_mob.c
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
index cc3086e649eb5c..2e52d73eba4840 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
@@ -35,6 +35,7 @@
#include <linux/sync_file.h>
#include <linux/hashtable.h>
+#include <linux/vmalloc.h>
/*
* Helper macro to get dx_ctx_node if available otherwise print an error
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c b/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c
index 12787bb9c111d1..d6bcaf078b1f40 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c
@@ -149,6 +149,38 @@ out_no_bo:
return ret;
}
+struct drm_gem_object *vmw_prime_import_sg_table(struct drm_device *dev,
+ struct dma_buf_attachment *attach,
+ struct sg_table *table)
+{
+ int ret;
+ struct vmw_private *dev_priv = vmw_priv(dev);
+ struct drm_gem_object *gem = NULL;
+ struct vmw_bo *vbo;
+ struct vmw_bo_params params = {
+ .domain = (dev_priv->has_mob) ? VMW_BO_DOMAIN_SYS : VMW_BO_DOMAIN_VRAM,
+ .busy_domain = VMW_BO_DOMAIN_SYS,
+ .bo_type = ttm_bo_type_sg,
+ .size = attach->dmabuf->size,
+ .pin = false,
+ .resv = attach->dmabuf->resv,
+ .sg = table,
+
+ };
+
+ dma_resv_lock(params.resv, NULL);
+
+ ret = vmw_bo_create(dev_priv, &params, &vbo);
+ if (ret != 0)
+ goto out_no_bo;
+
+ vbo->tbo.base.funcs = &vmw_gem_object_funcs;
+
+ gem = &vbo->tbo.base;
+out_no_bo:
+ dma_resv_unlock(params.resv);
+ return gem;
+}
int vmw_gem_object_create_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp)
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
index a1da5678c73140..835d1eed8dd931 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
@@ -31,6 +31,7 @@
#include <drm/vmwgfx_drm.h>
#include <linux/pci.h>
+#include <linux/vmalloc.h>
int vmw_getparam_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv)
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index cd4925346ed45a..84ae4e10a2ebec 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -933,6 +933,7 @@ int vmw_du_cursor_plane_atomic_check(struct drm_plane *plane,
int vmw_du_crtc_atomic_check(struct drm_crtc *crtc,
struct drm_atomic_state *state)
{
+ struct vmw_private *vmw = vmw_priv(crtc->dev);
struct drm_crtc_state *new_state = drm_atomic_get_new_crtc_state(state,
crtc);
struct vmw_display_unit *du = vmw_crtc_to_du(new_state->crtc);
@@ -940,9 +941,13 @@ int vmw_du_crtc_atomic_check(struct drm_crtc *crtc,
bool has_primary = new_state->plane_mask &
drm_plane_mask(crtc->primary);
- /* We always want to have an active plane with an active CRTC */
- if (has_primary != new_state->enable)
- return -EINVAL;
+ /*
+ * This is fine in general, but broken userspace might expect
+ * some actual rendering so give a clue as why it's blank.
+ */
+ if (new_state->enable && !has_primary)
+ drm_dbg_driver(&vmw->drm,
+ "CRTC without a primary plane will be blank.\n");
if (new_state->connector_mask != connector_mask &&
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
index a94947b588e85f..19a843da87b789 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
@@ -243,10 +243,10 @@ struct vmw_framebuffer_bo {
static const uint32_t __maybe_unused vmw_primary_plane_formats[] = {
- DRM_FORMAT_XRGB1555,
- DRM_FORMAT_RGB565,
DRM_FORMAT_XRGB8888,
DRM_FORMAT_ARGB8888,
+ DRM_FORMAT_RGB565,
+ DRM_FORMAT_XRGB1555,
};
static const uint32_t __maybe_unused vmw_cursor_plane_formats[] = {
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_prime.c b/drivers/gpu/drm/vmwgfx/vmwgfx_prime.c
index 2d72a5ee7c0c71..c99cad44499157 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_prime.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_prime.c
@@ -75,8 +75,12 @@ int vmw_prime_fd_to_handle(struct drm_device *dev,
int fd, u32 *handle)
{
struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
+ int ret = ttm_prime_fd_to_handle(tfile, fd, handle);
- return ttm_prime_fd_to_handle(tfile, fd, handle);
+ if (ret)
+ ret = drm_gem_prime_fd_to_handle(dev, file_priv, fd, handle);
+
+ return ret;
}
int vmw_prime_handle_to_fd(struct drm_device *dev,
@@ -85,5 +89,12 @@ int vmw_prime_handle_to_fd(struct drm_device *dev,
int *prime_fd)
{
struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
- return ttm_prime_handle_to_fd(tfile, handle, flags, prime_fd);
+ int ret;
+
+ if (handle > VMWGFX_NUM_MOB)
+ ret = ttm_prime_handle_to_fd(tfile, handle, flags, prime_fd);
+ else
+ ret = drm_gem_prime_handle_to_fd(dev, file_priv, handle, flags, prime_fd);
+
+ return ret;
}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
index 4d23d0a70bcb7e..621d98b376bbbc 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
@@ -188,13 +188,18 @@ static int vmw_ttm_map_dma(struct vmw_ttm_tt *vmw_tt)
switch (dev_priv->map_mode) {
case vmw_dma_map_bind:
case vmw_dma_map_populate:
- vsgt->sgt = &vmw_tt->sgt;
- ret = sg_alloc_table_from_pages_segment(
- &vmw_tt->sgt, vsgt->pages, vsgt->num_pages, 0,
- (unsigned long)vsgt->num_pages << PAGE_SHIFT,
- dma_get_max_seg_size(dev_priv->drm.dev), GFP_KERNEL);
- if (ret)
- goto out_sg_alloc_fail;
+ if (vmw_tt->dma_ttm.page_flags & TTM_TT_FLAG_EXTERNAL) {
+ vsgt->sgt = vmw_tt->dma_ttm.sg;
+ } else {
+ vsgt->sgt = &vmw_tt->sgt;
+ ret = sg_alloc_table_from_pages_segment(&vmw_tt->sgt,
+ vsgt->pages, vsgt->num_pages, 0,
+ (unsigned long)vsgt->num_pages << PAGE_SHIFT,
+ dma_get_max_seg_size(dev_priv->drm.dev),
+ GFP_KERNEL);
+ if (ret)
+ goto out_sg_alloc_fail;
+ }
ret = vmw_ttm_map_for_dma(vmw_tt);
if (unlikely(ret != 0))
@@ -209,8 +214,9 @@ static int vmw_ttm_map_dma(struct vmw_ttm_tt *vmw_tt)
return 0;
out_map_fail:
- sg_free_table(vmw_tt->vsgt.sgt);
- vmw_tt->vsgt.sgt = NULL;
+ drm_warn(&dev_priv->drm, "VSG table map failed!");
+ sg_free_table(vsgt->sgt);
+ vsgt->sgt = NULL;
out_sg_alloc_fail:
return ret;
}
@@ -356,15 +362,17 @@ static void vmw_ttm_destroy(struct ttm_device *bdev, struct ttm_tt *ttm)
static int vmw_ttm_populate(struct ttm_device *bdev,
struct ttm_tt *ttm, struct ttm_operation_ctx *ctx)
{
- int ret;
+ bool external = (ttm->page_flags & TTM_TT_FLAG_EXTERNAL) != 0;
- /* TODO: maybe completely drop this ? */
if (ttm_tt_is_populated(ttm))
return 0;
- ret = ttm_pool_alloc(&bdev->pool, ttm, ctx);
+ if (external && ttm->sg)
+ return drm_prime_sg_to_dma_addr_array(ttm->sg,
+ ttm->dma_address,
+ ttm->num_pages);
- return ret;
+ return ttm_pool_alloc(&bdev->pool, ttm, ctx);
}
static void vmw_ttm_unpopulate(struct ttm_device *bdev,
@@ -372,6 +380,10 @@ static void vmw_ttm_unpopulate(struct ttm_device *bdev,
{
struct vmw_ttm_tt *vmw_tt = container_of(ttm, struct vmw_ttm_tt,
dma_ttm);
+ bool external = (ttm->page_flags & TTM_TT_FLAG_EXTERNAL) != 0;
+
+ if (external)
+ return;
vmw_ttm_unbind(bdev, ttm);
@@ -390,6 +402,7 @@ static struct ttm_tt *vmw_ttm_tt_create(struct ttm_buffer_object *bo,
{
struct vmw_ttm_tt *vmw_be;
int ret;
+ bool external = bo->type == ttm_bo_type_sg;
vmw_be = kzalloc(sizeof(*vmw_be), GFP_KERNEL);
if (!vmw_be)
@@ -398,7 +411,10 @@ static struct ttm_tt *vmw_ttm_tt_create(struct ttm_buffer_object *bo,
vmw_be->dev_priv = vmw_priv_from_ttm(bo->bdev);
vmw_be->mob = NULL;
- if (vmw_be->dev_priv->map_mode == vmw_dma_alloc_coherent)
+ if (external)
+ page_flags |= TTM_TT_FLAG_EXTERNAL | TTM_TT_FLAG_EXTERNAL_MAPPABLE;
+
+ if (vmw_be->dev_priv->map_mode == vmw_dma_alloc_coherent || external)
ret = ttm_sg_tt_init(&vmw_be->dma_ttm, bo, page_flags,
ttm_cached);
else
diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index 5a428ca00f10f2..c29a850859ad5a 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -172,8 +172,8 @@ subdir-ccflags-$(CONFIG_DRM_XE_DISPLAY) += \
-Ddrm_i915_gem_object=xe_bo \
-Ddrm_i915_private=xe_device
-CFLAGS_i915-display/intel_fbdev.o = $(call cc-disable-warning, override-init)
-CFLAGS_i915-display/intel_display_device.o = $(call cc-disable-warning, override-init)
+CFLAGS_i915-display/intel_fbdev.o = -Wno-override-init
+CFLAGS_i915-display/intel_display_device.o = -Wno-override-init
# Rule to build SOC code shared with i915
$(obj)/i915-soc/%.o: $(srctree)/drivers/gpu/drm/i915/soc/%.c FORCE
diff --git a/drivers/gpu/drm/xe/display/intel_fb_bo.c b/drivers/gpu/drm/xe/display/intel_fb_bo.c
index b21da7b745a5e7..a9c1f9885c6bb4 100644
--- a/drivers/gpu/drm/xe/display/intel_fb_bo.c
+++ b/drivers/gpu/drm/xe/display/intel_fb_bo.c
@@ -31,7 +31,7 @@ int intel_fb_bo_framebuffer_init(struct intel_framebuffer *intel_fb,
ret = ttm_bo_reserve(&bo->ttm, true, false, NULL);
if (ret)
- return ret;
+ goto err;
if (!(bo->flags & XE_BO_SCANOUT_BIT)) {
/*
@@ -42,12 +42,16 @@ int intel_fb_bo_framebuffer_init(struct intel_framebuffer *intel_fb,
*/
if (XE_IOCTL_DBG(i915, !list_empty(&bo->ttm.base.gpuva.list))) {
ttm_bo_unreserve(&bo->ttm);
- return -EINVAL;
+ ret = -EINVAL;
+ goto err;
}
bo->flags |= XE_BO_SCANOUT_BIT;
}
ttm_bo_unreserve(&bo->ttm);
+ return 0;
+err:
+ xe_bo_put(bo);
return ret;
}
diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c
index e4db069f0db3f1..6ec375c1c4b6c0 100644
--- a/drivers/gpu/drm/xe/display/xe_display.c
+++ b/drivers/gpu/drm/xe/display/xe_display.c
@@ -108,11 +108,6 @@ int xe_display_create(struct xe_device *xe)
xe->display.hotplug.dp_wq = alloc_ordered_workqueue("xe-dp", 0);
drmm_mutex_init(&xe->drm, &xe->sb_lock);
- drmm_mutex_init(&xe->drm, &xe->display.backlight.lock);
- drmm_mutex_init(&xe->drm, &xe->display.audio.mutex);
- drmm_mutex_init(&xe->drm, &xe->display.wm.wm_mutex);
- drmm_mutex_init(&xe->drm, &xe->display.pps.mutex);
- drmm_mutex_init(&xe->drm, &xe->display.hdcp.hdcp_mutex);
xe->enabled_irq_mask = ~0;
err = drmm_add_action_or_reset(&xe->drm, display_destroy, NULL);
diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
index 0b1266c88a6af3..deddc8be48c0af 100644
--- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
@@ -125,7 +125,7 @@
#define RING_EXECLIST_STATUS_LO(base) XE_REG((base) + 0x234)
#define RING_EXECLIST_STATUS_HI(base) XE_REG((base) + 0x234 + 4)
-#define RING_CONTEXT_CONTROL(base) XE_REG((base) + 0x244)
+#define RING_CONTEXT_CONTROL(base) XE_REG((base) + 0x244, XE_REG_OPTION_MASKED)
#define CTX_CTRL_INHIBIT_SYN_CTX_SWITCH REG_BIT(3)
#define CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT REG_BIT(0)
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 6603a0ea79c5af..9c0837b6fdfc8d 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -144,9 +144,6 @@ static void try_add_system(struct xe_device *xe, struct xe_bo *bo,
.mem_type = XE_PL_TT,
};
*c += 1;
-
- if (bo->props.preferred_mem_type == XE_BO_PROPS_INVALID)
- bo->props.preferred_mem_type = XE_PL_TT;
}
}
@@ -181,25 +178,15 @@ static void add_vram(struct xe_device *xe, struct xe_bo *bo,
}
places[*c] = place;
*c += 1;
-
- if (bo->props.preferred_mem_type == XE_BO_PROPS_INVALID)
- bo->props.preferred_mem_type = mem_type;
}
static void try_add_vram(struct xe_device *xe, struct xe_bo *bo,
u32 bo_flags, u32 *c)
{
- if (bo->props.preferred_gt == XE_GT1) {
- if (bo_flags & XE_BO_CREATE_VRAM1_BIT)
- add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM1, c);
- if (bo_flags & XE_BO_CREATE_VRAM0_BIT)
- add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM0, c);
- } else {
- if (bo_flags & XE_BO_CREATE_VRAM0_BIT)
- add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM0, c);
- if (bo_flags & XE_BO_CREATE_VRAM1_BIT)
- add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM1, c);
- }
+ if (bo_flags & XE_BO_CREATE_VRAM0_BIT)
+ add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM0, c);
+ if (bo_flags & XE_BO_CREATE_VRAM1_BIT)
+ add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM1, c);
}
static void try_add_stolen(struct xe_device *xe, struct xe_bo *bo,
@@ -223,17 +210,8 @@ static int __xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
{
u32 c = 0;
- bo->props.preferred_mem_type = XE_BO_PROPS_INVALID;
-
- /* The order of placements should indicate preferred location */
-
- if (bo->props.preferred_mem_class == DRM_XE_MEM_REGION_CLASS_SYSMEM) {
- try_add_system(xe, bo, bo_flags, &c);
- try_add_vram(xe, bo, bo_flags, &c);
- } else {
- try_add_vram(xe, bo, bo_flags, &c);
- try_add_system(xe, bo, bo_flags, &c);
- }
+ try_add_vram(xe, bo, bo_flags, &c);
+ try_add_system(xe, bo, bo_flags, &c);
try_add_stolen(xe, bo, bo_flags, &c);
if (!c)
@@ -1126,13 +1104,6 @@ static void xe_gem_object_close(struct drm_gem_object *obj,
}
}
-static bool should_migrate_to_system(struct xe_bo *bo)
-{
- struct xe_device *xe = xe_bo_device(bo);
-
- return xe_device_in_fault_mode(xe) && bo->props.cpu_atomic;
-}
-
static vm_fault_t xe_gem_fault(struct vm_fault *vmf)
{
struct ttm_buffer_object *tbo = vmf->vma->vm_private_data;
@@ -1141,7 +1112,7 @@ static vm_fault_t xe_gem_fault(struct vm_fault *vmf)
struct xe_bo *bo = ttm_to_xe_bo(tbo);
bool needs_rpm = bo->flags & XE_BO_CREATE_VRAM_MASK;
vm_fault_t ret;
- int idx, r = 0;
+ int idx;
if (needs_rpm)
xe_device_mem_access_get(xe);
@@ -1153,17 +1124,8 @@ static vm_fault_t xe_gem_fault(struct vm_fault *vmf)
if (drm_dev_enter(ddev, &idx)) {
trace_xe_bo_cpu_fault(bo);
- if (should_migrate_to_system(bo)) {
- r = xe_bo_migrate(bo, XE_PL_TT);
- if (r == -EBUSY || r == -ERESTARTSYS || r == -EINTR)
- ret = VM_FAULT_NOPAGE;
- else if (r)
- ret = VM_FAULT_SIGBUS;
- }
- if (!ret)
- ret = ttm_bo_vm_fault_reserved(vmf,
- vmf->vma->vm_page_prot,
- TTM_BO_VM_NUM_PREFAULT);
+ ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot,
+ TTM_BO_VM_NUM_PREFAULT);
drm_dev_exit(idx);
} else {
ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot);
@@ -1291,9 +1253,6 @@ struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
bo->flags = flags;
bo->cpu_caching = cpu_caching;
bo->ttm.base.funcs = &xe_gem_object_funcs;
- bo->props.preferred_mem_class = XE_BO_PROPS_INVALID;
- bo->props.preferred_gt = XE_BO_PROPS_INVALID;
- bo->props.preferred_mem_type = XE_BO_PROPS_INVALID;
bo->ttm.priority = XE_BO_PRIORITY_NORMAL;
INIT_LIST_HEAD(&bo->pinned_link);
#ifdef CONFIG_PROC_FS
diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h
index 14ef13b7b421f3..86422e113d3962 100644
--- a/drivers/gpu/drm/xe/xe_bo_types.h
+++ b/drivers/gpu/drm/xe/xe_bo_types.h
@@ -56,25 +56,6 @@ struct xe_bo {
*/
struct list_head client_link;
#endif
- /** @props: BO user controlled properties */
- struct {
- /** @preferred_mem: preferred memory class for this BO */
- s16 preferred_mem_class;
- /** @prefered_gt: preferred GT for this BO */
- s16 preferred_gt;
- /** @preferred_mem_type: preferred memory type */
- s32 preferred_mem_type;
- /**
- * @cpu_atomic: the CPU expects to do atomics operations to
- * this BO
- */
- bool cpu_atomic;
- /**
- * @device_atomic: the device expects to do atomics operations
- * to this BO
- */
- bool device_atomic;
- } props;
/** @freed: List node for delayed put. */
struct llist_node freed;
/** @created: Whether the bo has passed initial creation */
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index ca85e81fdb4438..d32ff3857e6583 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -193,6 +193,9 @@ static void xe_device_destroy(struct drm_device *dev, void *dummy)
{
struct xe_device *xe = to_xe_device(dev);
+ if (xe->preempt_fence_wq)
+ destroy_workqueue(xe->preempt_fence_wq);
+
if (xe->ordered_wq)
destroy_workqueue(xe->ordered_wq);
@@ -258,9 +261,15 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
INIT_LIST_HEAD(&xe->pinned.external_vram);
INIT_LIST_HEAD(&xe->pinned.evicted);
+ xe->preempt_fence_wq = alloc_ordered_workqueue("xe-preempt-fence-wq", 0);
xe->ordered_wq = alloc_ordered_workqueue("xe-ordered-wq", 0);
xe->unordered_wq = alloc_workqueue("xe-unordered-wq", 0, 0);
- if (!xe->ordered_wq || !xe->unordered_wq) {
+ if (!xe->ordered_wq || !xe->unordered_wq ||
+ !xe->preempt_fence_wq) {
+ /*
+ * Cleanup done in xe_device_destroy via
+ * drmm_add_action_or_reset register above
+ */
drm_err(&xe->drm, "Failed to allocate xe workqueues\n");
err = -ENOMEM;
goto err;
diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h
index 14be34d9f5434b..d413bc2c6be5a0 100644
--- a/drivers/gpu/drm/xe/xe_device.h
+++ b/drivers/gpu/drm/xe/xe_device.h
@@ -58,7 +58,7 @@ static inline struct xe_tile *xe_device_get_root_tile(struct xe_device *xe)
static inline struct xe_gt *xe_tile_get_gt(struct xe_tile *tile, u8 gt_id)
{
- if (drm_WARN_ON(&tile_to_xe(tile)->drm, gt_id > XE_MAX_GT_PER_TILE))
+ if (drm_WARN_ON(&tile_to_xe(tile)->drm, gt_id >= XE_MAX_GT_PER_TILE))
gt_id = 0;
return gt_id ? tile->media_gt : tile->primary_gt;
@@ -79,7 +79,7 @@ static inline struct xe_gt *xe_device_get_gt(struct xe_device *xe, u8 gt_id)
if (MEDIA_VER(xe) >= 13) {
gt = xe_tile_get_gt(root_tile, gt_id);
} else {
- if (drm_WARN_ON(&xe->drm, gt_id > XE_MAX_TILES_PER_DEVICE))
+ if (drm_WARN_ON(&xe->drm, gt_id >= XE_MAX_TILES_PER_DEVICE))
gt_id = 0;
gt = xe->tiles[gt_id].primary_gt;
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 9785eef2e5a4e6..8e3a222b41cf0a 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -363,6 +363,9 @@ struct xe_device {
/** @ufence_wq: user fence wait queue */
wait_queue_head_t ufence_wq;
+ /** @preempt_fence_wq: used to serialize preempt fences */
+ struct workqueue_struct *preempt_fence_wq;
+
/** @ordered_wq: used to serialize compute mode resume */
struct workqueue_struct *ordered_wq;
diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
index 826c8b38967250..cc5e0f75de3c73 100644
--- a/drivers/gpu/drm/xe/xe_exec.c
+++ b/drivers/gpu/drm/xe/xe_exec.c
@@ -94,48 +94,16 @@
* Unlock all
*/
+/*
+ * Add validation and rebinding to the drm_exec locking loop, since both can
+ * trigger eviction which may require sleeping dma_resv locks.
+ */
static int xe_exec_fn(struct drm_gpuvm_exec *vm_exec)
{
struct xe_vm *vm = container_of(vm_exec->vm, struct xe_vm, gpuvm);
- struct drm_gem_object *obj;
- unsigned long index;
- int num_fences;
- int ret;
-
- ret = drm_gpuvm_validate(vm_exec->vm, &vm_exec->exec);
- if (ret)
- return ret;
-
- /*
- * 1 fence slot for the final submit, and 1 more for every per-tile for
- * GPU bind and 1 extra for CPU bind. Note that there are potentially
- * many vma per object/dma-resv, however the fence slot will just be
- * re-used, since they are largely the same timeline and the seqno
- * should be in order. In the case of CPU bind there is dummy fence used
- * for all CPU binds, so no need to have a per-tile slot for that.
- */
- num_fences = 1 + 1 + vm->xe->info.tile_count;
- /*
- * We don't know upfront exactly how many fence slots we will need at
- * the start of the exec, since the TTM bo_validate above can consume
- * numerous fence slots. Also due to how the dma_resv_reserve_fences()
- * works it only ensures that at least that many fence slots are
- * available i.e if there are already 10 slots available and we reserve
- * two more, it can just noop without reserving anything. With this it
- * is quite possible that TTM steals some of the fence slots and then
- * when it comes time to do the vma binding and final exec stage we are
- * lacking enough fence slots, leading to some nasty BUG_ON() when
- * adding the fences. Hence just add our own fences here, after the
- * validate stage.
- */
- drm_exec_for_each_locked_object(&vm_exec->exec, index, obj) {
- ret = dma_resv_reserve_fences(obj->resv, num_fences);
- if (ret)
- return ret;
- }
-
- return 0;
+ /* The fence slot added here is intended for the exec sched job. */
+ return xe_vm_validate_rebind(vm, &vm_exec->exec, 1);
}
int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
@@ -152,7 +120,6 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
struct drm_exec *exec = &vm_exec.exec;
u32 i, num_syncs = 0, num_ufence = 0;
struct xe_sched_job *job;
- struct dma_fence *rebind_fence;
struct xe_vm *vm;
bool write_locked, skip_retry = false;
ktime_t end = 0;
@@ -290,39 +257,7 @@ retry:
goto err_exec;
}
- /*
- * Rebind any invalidated userptr or evicted BOs in the VM, non-compute
- * VM mode only.
- */
- rebind_fence = xe_vm_rebind(vm, false);
- if (IS_ERR(rebind_fence)) {
- err = PTR_ERR(rebind_fence);
- goto err_put_job;
- }
-
- /*
- * We store the rebind_fence in the VM so subsequent execs don't get
- * scheduled before the rebinds of userptrs / evicted BOs is complete.
- */
- if (rebind_fence) {
- dma_fence_put(vm->rebind_fence);
- vm->rebind_fence = rebind_fence;
- }
- if (vm->rebind_fence) {
- if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
- &vm->rebind_fence->flags)) {
- dma_fence_put(vm->rebind_fence);
- vm->rebind_fence = NULL;
- } else {
- dma_fence_get(vm->rebind_fence);
- err = drm_sched_job_add_dependency(&job->drm,
- vm->rebind_fence);
- if (err)
- goto err_put_job;
- }
- }
-
- /* Wait behind munmap style rebinds */
+ /* Wait behind rebinds */
if (!xe_vm_in_lr_mode(vm)) {
err = drm_sched_job_add_resv_dependencies(&job->drm,
xe_vm_resv(vm),
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index 11e150f4c0c1f7..ead25d5e723ea5 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -448,7 +448,7 @@ find_hw_engine(struct xe_device *xe,
{
u32 idx;
- if (eci.engine_class > ARRAY_SIZE(user_to_xe_engine_class))
+ if (eci.engine_class >= ARRAY_SIZE(user_to_xe_engine_class))
return NULL;
if (eci.gt_id >= xe->info.gt_count)
diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h
index 62b3d9d1d7cdd4..462b331950320c 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue_types.h
+++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
@@ -148,6 +148,11 @@ struct xe_exec_queue {
const struct xe_ring_ops *ring_ops;
/** @entity: DRM sched entity for this exec queue (1 to 1 relationship) */
struct drm_sched_entity *entity;
+ /**
+ * @tlb_flush_seqno: The seqno of the last rebind tlb flush performed
+ * Protected by @vm's resv. Unused if @vm == NULL.
+ */
+ u64 tlb_flush_seqno;
/** @lrc: logical ring context for this exec queue */
struct xe_lrc lrc[];
};
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index a0afe1ba6dd5ce..f9705430ada930 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -378,7 +378,9 @@ static int gt_fw_domain_init(struct xe_gt *gt)
err);
/* Initialize CCS mode sysfs after early initialization of HW engines */
- xe_gt_ccs_mode_sysfs_init(gt);
+ err = xe_gt_ccs_mode_sysfs_init(gt);
+ if (err)
+ goto err_force_wake;
/*
* Stash hardware-reported version. Since this register does not exist
diff --git a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c
index 529fc286cd06c6..396aeb5b992424 100644
--- a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c
+++ b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c
@@ -167,25 +167,20 @@ static void xe_gt_ccs_mode_sysfs_fini(struct drm_device *drm, void *arg)
* and it is expected that there are no open drm clients while doing so.
* The number of available compute slices is exposed to user through a per-gt
* 'num_cslices' sysfs interface.
+ *
+ * Returns: Returns error value for failure and 0 for success.
*/
-void xe_gt_ccs_mode_sysfs_init(struct xe_gt *gt)
+int xe_gt_ccs_mode_sysfs_init(struct xe_gt *gt)
{
struct xe_device *xe = gt_to_xe(gt);
int err;
if (!xe_gt_ccs_mode_enabled(gt))
- return;
+ return 0;
err = sysfs_create_files(gt->sysfs, gt_ccs_mode_attrs);
- if (err) {
- drm_warn(&xe->drm, "Sysfs creation for ccs_mode failed err: %d\n", err);
- return;
- }
+ if (err)
+ return err;
- err = drmm_add_action_or_reset(&xe->drm, xe_gt_ccs_mode_sysfs_fini, gt);
- if (err) {
- sysfs_remove_files(gt->sysfs, gt_ccs_mode_attrs);
- drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n",
- __func__, err);
- }
+ return drmm_add_action_or_reset(&xe->drm, xe_gt_ccs_mode_sysfs_fini, gt);
}
diff --git a/drivers/gpu/drm/xe/xe_gt_ccs_mode.h b/drivers/gpu/drm/xe/xe_gt_ccs_mode.h
index f39975aaaab0db..f8779852cf0d26 100644
--- a/drivers/gpu/drm/xe/xe_gt_ccs_mode.h
+++ b/drivers/gpu/drm/xe/xe_gt_ccs_mode.h
@@ -12,7 +12,7 @@
#include "xe_platform_types.h"
void xe_gt_apply_ccs_mode(struct xe_gt *gt);
-void xe_gt_ccs_mode_sysfs_init(struct xe_gt *gt);
+int xe_gt_ccs_mode_sysfs_init(struct xe_gt *gt);
static inline bool xe_gt_ccs_mode_enabled(const struct xe_gt *gt)
{
diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c
index 241c294270d916..fa9e9853c53ba6 100644
--- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
+++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
@@ -100,10 +100,9 @@ static int xe_pf_begin(struct drm_exec *exec, struct xe_vma *vma,
{
struct xe_bo *bo = xe_vma_bo(vma);
struct xe_vm *vm = xe_vma_vm(vma);
- unsigned int num_shared = 2; /* slots for bind + move */
int err;
- err = xe_vm_prepare_vma(exec, vma, num_shared);
+ err = xe_vm_lock_vma(exec, vma);
if (err)
return err;
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
index f03e077f81a04f..e598a4363d0190 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
@@ -61,7 +61,6 @@ int xe_gt_tlb_invalidation_init(struct xe_gt *gt)
INIT_LIST_HEAD(&gt->tlb_invalidation.pending_fences);
spin_lock_init(&gt->tlb_invalidation.pending_lock);
spin_lock_init(&gt->tlb_invalidation.lock);
- gt->tlb_invalidation.fence_context = dma_fence_context_alloc(1);
INIT_DELAYED_WORK(&gt->tlb_invalidation.fence_tdr,
xe_gt_tlb_fence_timeout);
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
index 70c615dd149865..07b2f724ec4568 100644
--- a/drivers/gpu/drm/xe/xe_gt_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -177,13 +177,6 @@ struct xe_gt {
* xe_gt_tlb_fence_timeout after the timeut interval is over.
*/
struct delayed_work fence_tdr;
- /** @tlb_invalidation.fence_context: context for TLB invalidation fences */
- u64 fence_context;
- /**
- * @tlb_invalidation.fence_seqno: seqno to TLB invalidation fences, protected by
- * tlb_invalidation.lock
- */
- u32 fence_seqno;
/** @tlb_invalidation.lock: protects TLB invalidation fences */
spinlock_t lock;
} tlb_invalidation;
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index 355edd4d758af7..7f32547f94b266 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -1054,10 +1054,10 @@ static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
adj_len);
break;
case XE_GUC_ACTION_GUC2PF_RELAY_FROM_VF:
- ret = xe_guc_relay_process_guc2pf(&guc->relay, payload, adj_len);
+ ret = xe_guc_relay_process_guc2pf(&guc->relay, hxg, hxg_len);
break;
case XE_GUC_ACTION_GUC2VF_RELAY_FROM_PF:
- ret = xe_guc_relay_process_guc2vf(&guc->relay, payload, adj_len);
+ ret = xe_guc_relay_process_guc2vf(&guc->relay, hxg, hxg_len);
break;
default:
drm_err(&xe->drm, "unexpected action 0x%04x\n", action);
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index ff77bc8da1b270..e2a4c3b5e9ff84 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -1220,7 +1220,7 @@ static int guc_exec_queue_init(struct xe_exec_queue *q)
init_waitqueue_head(&ge->suspend_wait);
timeout = (q->vm && xe_vm_in_lr_mode(q->vm)) ? MAX_SCHEDULE_TIMEOUT :
- q->sched_props.job_timeout_ms;
+ msecs_to_jiffies(q->sched_props.job_timeout_ms);
err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops,
get_submit_wq(guc),
q->lrc[0].ring.size / MAX_JOB_SIZE_BYTES, 64,
diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c
index b545f850087cd8..6b9b1cbedd379e 100644
--- a/drivers/gpu/drm/xe/xe_huc.c
+++ b/drivers/gpu/drm/xe/xe_huc.c
@@ -53,7 +53,6 @@ static int huc_alloc_gsc_pkt(struct xe_huc *huc)
struct xe_gt *gt = huc_to_gt(huc);
struct xe_device *xe = gt_to_xe(gt);
struct xe_bo *bo;
- int err;
/* we use a single object for both input and output */
bo = xe_bo_create_pin_map(xe, gt_to_tile(gt), NULL,
@@ -66,13 +65,7 @@ static int huc_alloc_gsc_pkt(struct xe_huc *huc)
huc->gsc_pkt = bo;
- err = drmm_add_action_or_reset(&xe->drm, free_gsc_pkt, huc);
- if (err) {
- free_gsc_pkt(&xe->drm, huc);
- return err;
- }
-
- return 0;
+ return drmm_add_action_or_reset(&xe->drm, free_gsc_pkt, huc);
}
int xe_huc_init(struct xe_huc *huc)
diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c
index b82233a4160624..9ac7fbe201b3c2 100644
--- a/drivers/gpu/drm/xe/xe_hwmon.c
+++ b/drivers/gpu/drm/xe/xe_hwmon.c
@@ -290,7 +290,7 @@ xe_hwmon_power1_max_interval_show(struct device *dev, struct device_attribute *a
* As y can be < 2, we compute tau4 = (4 | x) << y
* and then add 2 when doing the final right shift to account for units
*/
- tau4 = ((1 << x_w) | x) << y;
+ tau4 = (u64)((1 << x_w) | x) << y;
/* val in hwmon interface units (millisec) */
out = mul_u64_u32_shr(tau4, SF_TIME, hwmon->scl_shift_time + x_w);
@@ -330,7 +330,7 @@ xe_hwmon_power1_max_interval_store(struct device *dev, struct device_attribute *
r = FIELD_PREP(PKG_MAX_WIN, PKG_MAX_WIN_DEFAULT);
x = REG_FIELD_GET(PKG_MAX_WIN_X, r);
y = REG_FIELD_GET(PKG_MAX_WIN_Y, r);
- tau4 = ((1 << x_w) | x) << y;
+ tau4 = (u64)((1 << x_w) | x) << y;
max_win = mul_u64_u32_shr(tau4, SF_TIME, hwmon->scl_shift_time + x_w);
if (val > max_win)
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index 7ad853b0788af4..57066faf575eec 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -97,7 +97,6 @@ static void set_offsets(u32 *regs,
#define REG16(x) \
(((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
(((x) >> 2) & 0x7f)
-#define END 0
{
const u32 base = hwe->mmio_base;
@@ -168,7 +167,7 @@ static const u8 gen12_xcs_offsets[] = {
REG16(0x274),
REG16(0x270),
- END
+ 0
};
static const u8 dg2_xcs_offsets[] = {
@@ -202,7 +201,7 @@ static const u8 dg2_xcs_offsets[] = {
REG16(0x274),
REG16(0x270),
- END
+ 0
};
static const u8 gen12_rcs_offsets[] = {
@@ -298,7 +297,7 @@ static const u8 gen12_rcs_offsets[] = {
REG(0x084),
NOP(1),
- END
+ 0
};
static const u8 xehp_rcs_offsets[] = {
@@ -339,7 +338,7 @@ static const u8 xehp_rcs_offsets[] = {
LRI(1, 0),
REG(0x0c8),
- END
+ 0
};
static const u8 dg2_rcs_offsets[] = {
@@ -382,7 +381,7 @@ static const u8 dg2_rcs_offsets[] = {
LRI(1, 0),
REG(0x0c8),
- END
+ 0
};
static const u8 mtl_rcs_offsets[] = {
@@ -425,7 +424,7 @@ static const u8 mtl_rcs_offsets[] = {
LRI(1, 0),
REG(0x0c8),
- END
+ 0
};
#define XE2_CTX_COMMON \
@@ -471,7 +470,7 @@ static const u8 xe2_rcs_offsets[] = {
LRI(1, 0), /* [0x47] */
REG(0x0c8), /* [0x48] R_PWR_CLK_STATE */
- END
+ 0
};
static const u8 xe2_bcs_offsets[] = {
@@ -482,16 +481,15 @@ static const u8 xe2_bcs_offsets[] = {
REG16(0x200), /* [0x42] BCS_SWCTRL */
REG16(0x204), /* [0x44] BLIT_CCTL */
- END
+ 0
};
static const u8 xe2_xcs_offsets[] = {
XE2_CTX_COMMON,
- END
+ 0
};
-#undef END
#undef REG16
#undef REG
#undef LRI
@@ -527,9 +525,8 @@ static const u8 *reg_offsets(struct xe_device *xe, enum xe_engine_class class)
static void set_context_control(u32 *regs, struct xe_hw_engine *hwe)
{
- regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH) |
- _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) |
- CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT;
+ regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH |
+ CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
/* TODO: Timestamp */
}
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index ee1bb938c49348..2ba4fb9511f63f 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -227,7 +227,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
if (vm->flags & XE_VM_FLAG_64K && level == 1)
flags = XE_PDE_64K;
- entry = vm->pt_ops->pde_encode_bo(bo, map_ofs + (level - 1) *
+ entry = vm->pt_ops->pde_encode_bo(bo, map_ofs + (u64)(level - 1) *
XE_PAGE_SIZE, pat_index);
xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE * level, u64,
entry | flags);
@@ -235,7 +235,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
/* Write PDE's that point to our BO. */
for (i = 0; i < num_entries - num_level; i++) {
- entry = vm->pt_ops->pde_encode_bo(bo, i * XE_PAGE_SIZE,
+ entry = vm->pt_ops->pde_encode_bo(bo, (u64)i * XE_PAGE_SIZE,
pat_index);
xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE +
@@ -291,7 +291,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
#define VM_SA_UPDATE_UNIT_SIZE (XE_PAGE_SIZE / NUM_VMUSA_UNIT_PER_PAGE)
#define NUM_VMUSA_WRITES_PER_UNIT (VM_SA_UPDATE_UNIT_SIZE / sizeof(u64))
drm_suballoc_manager_init(&m->vm_update_sa,
- (map_ofs / XE_PAGE_SIZE - NUM_KERNEL_PDE) *
+ (size_t)(map_ofs / XE_PAGE_SIZE - NUM_KERNEL_PDE) *
NUM_VMUSA_UNIT_PER_PAGE, 0);
m->pt_bo = bo;
@@ -490,7 +490,7 @@ static void emit_pte(struct xe_migrate *m,
struct xe_vm *vm = m->q->vm;
u16 pat_index;
u32 ptes;
- u64 ofs = at_pt * XE_PAGE_SIZE;
+ u64 ofs = (u64)at_pt * XE_PAGE_SIZE;
u64 cur_ofs;
/* Indirect access needs compression enabled uncached PAT index */
diff --git a/drivers/gpu/drm/xe/xe_preempt_fence.c b/drivers/gpu/drm/xe/xe_preempt_fence.c
index 7bce2a332603c0..7d50c6e89d8e7d 100644
--- a/drivers/gpu/drm/xe/xe_preempt_fence.c
+++ b/drivers/gpu/drm/xe/xe_preempt_fence.c
@@ -49,7 +49,7 @@ static bool preempt_fence_enable_signaling(struct dma_fence *fence)
struct xe_exec_queue *q = pfence->q;
pfence->error = q->ops->suspend(q);
- queue_work(system_unbound_wq, &pfence->preempt_work);
+ queue_work(q->vm->xe->preempt_fence_wq, &pfence->preempt_work);
return true;
}
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index 7f54bc3e389d58..4efc8c1a3d7a99 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -1135,8 +1135,7 @@ static int invalidation_fence_init(struct xe_gt *gt,
spin_lock_irq(&gt->tlb_invalidation.lock);
dma_fence_init(&ifence->base.base, &invalidation_fence_ops,
&gt->tlb_invalidation.lock,
- gt->tlb_invalidation.fence_context,
- ++gt->tlb_invalidation.fence_seqno);
+ dma_fence_context_alloc(1), 1);
spin_unlock_irq(&gt->tlb_invalidation.lock);
INIT_LIST_HEAD(&ifence->base.link);
@@ -1236,6 +1235,13 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue
err = xe_pt_prepare_bind(tile, vma, entries, &num_entries);
if (err)
goto err;
+
+ err = dma_resv_reserve_fences(xe_vm_resv(vm), 1);
+ if (!err && !xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm)
+ err = dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv, 1);
+ if (err)
+ goto err;
+
xe_tile_assert(tile, num_entries <= ARRAY_SIZE(entries));
xe_vm_dbg_print_entries(tile_to_xe(tile), entries, num_entries);
@@ -1254,11 +1260,13 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue
* non-faulting LR, in particular on user-space batch buffer chaining,
* it needs to be done here.
*/
- if ((rebind && !xe_vm_in_lr_mode(vm) && !vm->batch_invalidate_tlb) ||
- (!rebind && xe_vm_has_scratch(vm) && xe_vm_in_preempt_fence_mode(vm))) {
+ if ((!rebind && xe_vm_has_scratch(vm) && xe_vm_in_preempt_fence_mode(vm))) {
ifence = kzalloc(sizeof(*ifence), GFP_KERNEL);
if (!ifence)
return ERR_PTR(-ENOMEM);
+ } else if (rebind && !xe_vm_in_lr_mode(vm)) {
+ /* We bump also if batch_invalidate_tlb is true */
+ vm->tlb_flush_seqno++;
}
rfence = kzalloc(sizeof(*rfence), GFP_KERNEL);
@@ -1297,7 +1305,7 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue
}
/* add shared fence now for pagetable delayed destroy */
- dma_resv_add_fence(xe_vm_resv(vm), fence, !rebind &&
+ dma_resv_add_fence(xe_vm_resv(vm), fence, rebind ||
last_munmap_rebind ?
DMA_RESV_USAGE_KERNEL :
DMA_RESV_USAGE_BOOKKEEP);
@@ -1576,6 +1584,7 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queu
struct dma_fence *fence = NULL;
struct invalidation_fence *ifence;
struct xe_range_fence *rfence;
+ int err;
LLIST_HEAD(deferred);
@@ -1593,6 +1602,12 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queu
xe_pt_calc_rfence_interval(vma, &unbind_pt_update, entries,
num_entries);
+ err = dma_resv_reserve_fences(xe_vm_resv(vm), 1);
+ if (!err && !xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm)
+ err = dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv, 1);
+ if (err)
+ return ERR_PTR(err);
+
ifence = kzalloc(sizeof(*ifence), GFP_KERNEL);
if (!ifence)
return ERR_PTR(-ENOMEM);
diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index 92bb06c0586eb4..075f9eaef03122 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -132,7 +132,7 @@ query_engine_cycles(struct xe_device *xe,
return -EINVAL;
eci = &resp.eci;
- if (eci->gt_id > XE_MAX_GT_PER_TILE)
+ if (eci->gt_id >= XE_MAX_GT_PER_TILE)
return -EINVAL;
gt = xe_device_get_gt(xe, eci->gt_id);
diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
index c4edffcd4a3206..5b2b37b598130a 100644
--- a/drivers/gpu/drm/xe/xe_ring_ops.c
+++ b/drivers/gpu/drm/xe/xe_ring_ops.c
@@ -219,10 +219,9 @@ static void __emit_job_gen12_simple(struct xe_sched_job *job, struct xe_lrc *lrc
{
u32 dw[MAX_JOB_SIZE_DW], i = 0;
u32 ppgtt_flag = get_ppgtt_flag(job);
- struct xe_vm *vm = job->q->vm;
struct xe_gt *gt = job->q->gt;
- if (vm && vm->batch_invalidate_tlb) {
+ if (job->ring_ops_flush_tlb) {
dw[i++] = preparser_disable(true);
i = emit_flush_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
seqno, true, dw, i);
@@ -270,7 +269,6 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc,
struct xe_gt *gt = job->q->gt;
struct xe_device *xe = gt_to_xe(gt);
bool decode = job->q->class == XE_ENGINE_CLASS_VIDEO_DECODE;
- struct xe_vm *vm = job->q->vm;
dw[i++] = preparser_disable(true);
@@ -282,13 +280,13 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc,
i = emit_aux_table_inv(gt, VE0_AUX_INV, dw, i);
}
- if (vm && vm->batch_invalidate_tlb)
+ if (job->ring_ops_flush_tlb)
i = emit_flush_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
seqno, true, dw, i);
dw[i++] = preparser_disable(false);
- if (!vm || !vm->batch_invalidate_tlb)
+ if (!job->ring_ops_flush_tlb)
i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
seqno, dw, i);
@@ -317,7 +315,6 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job,
struct xe_gt *gt = job->q->gt;
struct xe_device *xe = gt_to_xe(gt);
bool lacks_render = !(gt->info.engine_mask & XE_HW_ENGINE_RCS_MASK);
- struct xe_vm *vm = job->q->vm;
u32 mask_flags = 0;
dw[i++] = preparser_disable(true);
@@ -327,7 +324,7 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job,
mask_flags = PIPE_CONTROL_3D_ENGINE_FLAGS;
/* See __xe_pt_bind_vma() for a discussion on TLB invalidations. */
- i = emit_pipe_invalidate(mask_flags, vm && vm->batch_invalidate_tlb, dw, i);
+ i = emit_pipe_invalidate(mask_flags, job->ring_ops_flush_tlb, dw, i);
/* hsdes: 1809175790 */
if (has_aux_ccs(xe))
diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c
index 8151ddafb94075..b0c7fa4693cfe4 100644
--- a/drivers/gpu/drm/xe/xe_sched_job.c
+++ b/drivers/gpu/drm/xe/xe_sched_job.c
@@ -250,6 +250,16 @@ bool xe_sched_job_completed(struct xe_sched_job *job)
void xe_sched_job_arm(struct xe_sched_job *job)
{
+ struct xe_exec_queue *q = job->q;
+ struct xe_vm *vm = q->vm;
+
+ if (vm && !xe_sched_job_is_migration(q) && !xe_vm_in_lr_mode(vm) &&
+ (vm->batch_invalidate_tlb || vm->tlb_flush_seqno != q->tlb_flush_seqno)) {
+ xe_vm_assert_held(vm);
+ q->tlb_flush_seqno = vm->tlb_flush_seqno;
+ job->ring_ops_flush_tlb = true;
+ }
+
drm_sched_job_arm(&job->drm);
}
diff --git a/drivers/gpu/drm/xe/xe_sched_job_types.h b/drivers/gpu/drm/xe/xe_sched_job_types.h
index b1d83da50a53da..5e12724219fdd4 100644
--- a/drivers/gpu/drm/xe/xe_sched_job_types.h
+++ b/drivers/gpu/drm/xe/xe_sched_job_types.h
@@ -39,6 +39,8 @@ struct xe_sched_job {
} user_fence;
/** @migrate_flush_flags: Additional flush flags for migration jobs */
u32 migrate_flush_flags;
+ /** @ring_ops_flush_tlb: The ring ops need to flush TLB before payload. */
+ bool ring_ops_flush_tlb;
/** @batch_addr: batch buffer address of job */
u64 batch_addr[];
};
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index f88faef4142bde..3d4c8f342e215e 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -482,17 +482,53 @@ static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec)
return 0;
}
+/**
+ * xe_vm_validate_rebind() - Validate buffer objects and rebind vmas
+ * @vm: The vm for which we are rebinding.
+ * @exec: The struct drm_exec with the locked GEM objects.
+ * @num_fences: The number of fences to reserve for the operation, not
+ * including rebinds and validations.
+ *
+ * Validates all evicted gem objects and rebinds their vmas. Note that
+ * rebindings may cause evictions and hence the validation-rebind
+ * sequence is rerun until there are no more objects to validate.
+ *
+ * Return: 0 on success, negative error code on error. In particular,
+ * may return -EINTR or -ERESTARTSYS if interrupted, and -EDEADLK if
+ * the drm_exec transaction needs to be restarted.
+ */
+int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec,
+ unsigned int num_fences)
+{
+ struct drm_gem_object *obj;
+ unsigned long index;
+ int ret;
+
+ do {
+ ret = drm_gpuvm_validate(&vm->gpuvm, exec);
+ if (ret)
+ return ret;
+
+ ret = xe_vm_rebind(vm, false);
+ if (ret)
+ return ret;
+ } while (!list_empty(&vm->gpuvm.evict.list));
+
+ drm_exec_for_each_locked_object(exec, index, obj) {
+ ret = dma_resv_reserve_fences(obj->resv, num_fences);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm,
bool *done)
{
int err;
- /*
- * 1 fence for each preempt fence plus a fence for each tile from a
- * possible rebind
- */
- err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, vm->preempt.num_exec_queues +
- vm->xe->info.tile_count);
+ err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, 0);
if (err)
return err;
@@ -507,7 +543,7 @@ static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm,
return 0;
}
- err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, vm->preempt.num_exec_queues);
+ err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, 0);
if (err)
return err;
@@ -515,14 +551,19 @@ static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm,
if (err)
return err;
- return drm_gpuvm_validate(&vm->gpuvm, exec);
+ /*
+ * Add validation and rebinding to the locking loop since both can
+ * cause evictions which may require blocing dma_resv locks.
+ * The fence reservation here is intended for the new preempt fences
+ * we attach at the end of the rebind work.
+ */
+ return xe_vm_validate_rebind(vm, exec, vm->preempt.num_exec_queues);
}
static void preempt_rebind_work_func(struct work_struct *w)
{
struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work);
struct drm_exec exec;
- struct dma_fence *rebind_fence;
unsigned int fence_count = 0;
LIST_HEAD(preempt_fences);
ktime_t end = 0;
@@ -568,18 +609,11 @@ retry:
if (err)
goto out_unlock;
- rebind_fence = xe_vm_rebind(vm, true);
- if (IS_ERR(rebind_fence)) {
- err = PTR_ERR(rebind_fence);
+ err = xe_vm_rebind(vm, true);
+ if (err)
goto out_unlock;
- }
-
- if (rebind_fence) {
- dma_fence_wait(rebind_fence, false);
- dma_fence_put(rebind_fence);
- }
- /* Wait on munmap style VM unbinds */
+ /* Wait on rebinds and munmap style VM unbinds */
wait = dma_resv_wait_timeout(xe_vm_resv(vm),
DMA_RESV_USAGE_KERNEL,
false, MAX_SCHEDULE_TIMEOUT);
@@ -773,14 +807,14 @@ xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
struct xe_sync_entry *syncs, u32 num_syncs,
bool first_op, bool last_op);
-struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
+int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
{
- struct dma_fence *fence = NULL;
+ struct dma_fence *fence;
struct xe_vma *vma, *next;
lockdep_assert_held(&vm->lock);
if (xe_vm_in_lr_mode(vm) && !rebind_worker)
- return NULL;
+ return 0;
xe_vm_assert_held(vm);
list_for_each_entry_safe(vma, next, &vm->rebind_list,
@@ -788,17 +822,17 @@ struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
xe_assert(vm->xe, vma->tile_present);
list_del_init(&vma->combined_links.rebind);
- dma_fence_put(fence);
if (rebind_worker)
trace_xe_vma_rebind_worker(vma);
else
trace_xe_vma_rebind_exec(vma);
fence = xe_vm_bind_vma(vma, NULL, NULL, 0, false, false);
if (IS_ERR(fence))
- return fence;
+ return PTR_ERR(fence);
+ dma_fence_put(fence);
}
- return fence;
+ return 0;
}
static void xe_vma_free(struct xe_vma *vma)
@@ -1004,35 +1038,26 @@ static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence)
}
/**
- * xe_vm_prepare_vma() - drm_exec utility to lock a vma
+ * xe_vm_lock_vma() - drm_exec utility to lock a vma
* @exec: The drm_exec object we're currently locking for.
* @vma: The vma for witch we want to lock the vm resv and any attached
* object's resv.
- * @num_shared: The number of dma-fence slots to pre-allocate in the
- * objects' reservation objects.
*
* Return: 0 on success, negative error code on error. In particular
* may return -EDEADLK on WW transaction contention and -EINTR if
* an interruptible wait is terminated by a signal.
*/
-int xe_vm_prepare_vma(struct drm_exec *exec, struct xe_vma *vma,
- unsigned int num_shared)
+int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma)
{
struct xe_vm *vm = xe_vma_vm(vma);
struct xe_bo *bo = xe_vma_bo(vma);
int err;
XE_WARN_ON(!vm);
- if (num_shared)
- err = drm_exec_prepare_obj(exec, xe_vm_obj(vm), num_shared);
- else
- err = drm_exec_lock_obj(exec, xe_vm_obj(vm));
- if (!err && bo && !bo->vm) {
- if (num_shared)
- err = drm_exec_prepare_obj(exec, &bo->ttm.base, num_shared);
- else
- err = drm_exec_lock_obj(exec, &bo->ttm.base);
- }
+
+ err = drm_exec_lock_obj(exec, xe_vm_obj(vm));
+ if (!err && bo && !bo->vm)
+ err = drm_exec_lock_obj(exec, &bo->ttm.base);
return err;
}
@@ -1044,7 +1069,7 @@ static void xe_vma_destroy_unlocked(struct xe_vma *vma)
drm_exec_init(&exec, 0, 0);
drm_exec_until_all_locked(&exec) {
- err = xe_vm_prepare_vma(&exec, vma, 0);
+ err = xe_vm_lock_vma(&exec, vma);
drm_exec_retry_on_contention(&exec);
if (XE_WARN_ON(err))
break;
@@ -1552,6 +1577,16 @@ void xe_vm_close_and_put(struct xe_vm *vm)
xe->usm.num_vm_in_fault_mode--;
else if (!(vm->flags & XE_VM_FLAG_MIGRATION))
xe->usm.num_vm_in_non_fault_mode--;
+
+ if (vm->usm.asid) {
+ void *lookup;
+
+ xe_assert(xe, xe->info.has_asid);
+ xe_assert(xe, !(vm->flags & XE_VM_FLAG_MIGRATION));
+
+ lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid);
+ xe_assert(xe, lookup == vm);
+ }
mutex_unlock(&xe->usm.lock);
for_each_tile(tile, xe, id)
@@ -1567,29 +1602,19 @@ static void vm_destroy_work_func(struct work_struct *w)
struct xe_device *xe = vm->xe;
struct xe_tile *tile;
u8 id;
- void *lookup;
/* xe_vm_close_and_put was not called? */
xe_assert(xe, !vm->size);
mutex_destroy(&vm->snap_mutex);
- if (!(vm->flags & XE_VM_FLAG_MIGRATION)) {
+ if (!(vm->flags & XE_VM_FLAG_MIGRATION))
xe_device_mem_access_put(xe);
- if (xe->info.has_asid && vm->usm.asid) {
- mutex_lock(&xe->usm.lock);
- lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid);
- xe_assert(xe, lookup == vm);
- mutex_unlock(&xe->usm.lock);
- }
- }
-
for_each_tile(tile, xe, id)
XE_WARN_ON(vm->pt_root[id]);
trace_xe_vm_free(vm);
- dma_fence_put(vm->rebind_fence);
kfree(vm);
}
@@ -2512,7 +2537,7 @@ static int op_execute(struct drm_exec *exec, struct xe_vm *vm,
lockdep_assert_held_write(&vm->lock);
- err = xe_vm_prepare_vma(exec, vma, 1);
+ err = xe_vm_lock_vma(exec, vma);
if (err)
return err;
diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
index 6df1f1c7f85d98..306cd0934a190b 100644
--- a/drivers/gpu/drm/xe/xe_vm.h
+++ b/drivers/gpu/drm/xe/xe_vm.h
@@ -207,7 +207,7 @@ int __xe_vm_userptr_needs_repin(struct xe_vm *vm);
int xe_vm_userptr_check_repin(struct xe_vm *vm);
-struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker);
+int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker);
int xe_vm_invalidate_vma(struct xe_vma *vma);
@@ -242,8 +242,10 @@ bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end);
int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id);
-int xe_vm_prepare_vma(struct drm_exec *exec, struct xe_vma *vma,
- unsigned int num_shared);
+int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma);
+
+int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec,
+ unsigned int num_fences);
/**
* xe_vm_resv() - Return's the vm's reservation object
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index ae5fb565f6bf48..badf3945083d56 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -177,9 +177,6 @@ struct xe_vm {
*/
struct list_head rebind_list;
- /** @rebind_fence: rebind fence from execbuf */
- struct dma_fence *rebind_fence;
-
/**
* @destroy_work: worker to destroy VM, needed as a dma_fence signaling
* from an irq context can be last put and the destroy needs to be able
@@ -264,6 +261,11 @@ struct xe_vm {
bool capture_once;
} error_capture;
+ /**
+ * @tlb_flush_seqno: Required TLB flush seqno for the next exec.
+ * protected by the vm resv.
+ */
+ u64 tlb_flush_seqno;
/** @batch_invalidate_tlb: Always invalidate TLB before batch start */
bool batch_invalidate_tlb;
/** @xef: XE file handle for tracking this VM's drm client */
diff --git a/drivers/gpu/drm/xen/xen_drm_front_gem.c b/drivers/gpu/drm/xen/xen_drm_front_gem.c
index 3ad2b4cfd1f0b2..63112ed975c472 100644
--- a/drivers/gpu/drm/xen/xen_drm_front_gem.c
+++ b/drivers/gpu/drm/xen/xen_drm_front_gem.c
@@ -11,6 +11,7 @@
#include <linux/dma-buf.h>
#include <linux/scatterlist.h>
#include <linux/shmem_fs.h>
+#include <linux/vmalloc.h>
#include <drm/drm_gem.h>
#include <drm/drm_prime.h>
diff --git a/drivers/gpu/host1x/bus.c b/drivers/gpu/host1x/bus.c
index 783975d1384fc4..7c52757a89db9a 100644
--- a/drivers/gpu/host1x/bus.c
+++ b/drivers/gpu/host1x/bus.c
@@ -351,11 +351,6 @@ static int host1x_device_uevent(const struct device *dev,
return 0;
}
-static int host1x_dma_configure(struct device *dev)
-{
- return of_dma_configure(dev, dev->of_node, true);
-}
-
static const struct dev_pm_ops host1x_device_pm_ops = {
.suspend = pm_generic_suspend,
.resume = pm_generic_resume,
@@ -369,7 +364,6 @@ const struct bus_type host1x_bus_type = {
.name = "host1x",
.match = host1x_device_match,
.uevent = host1x_device_uevent,
- .dma_configure = host1x_dma_configure,
.pm = &host1x_device_pm_ops,
};
@@ -458,8 +452,6 @@ static int host1x_device_add(struct host1x *host1x,
device->dev.bus = &host1x_bus_type;
device->dev.parent = host1x->dev;
- of_dma_configure(&device->dev, host1x->dev->of_node, true);
-
device->dev.dma_parms = &device->dma_parms;
dma_set_max_seg_size(&device->dev, UINT_MAX);
diff --git a/drivers/hid/hid-logitech-dj.c b/drivers/hid/hid-logitech-dj.c
index e6a8b6d8eab707..3c3c497b6b9114 100644
--- a/drivers/hid/hid-logitech-dj.c
+++ b/drivers/hid/hid-logitech-dj.c
@@ -965,9 +965,7 @@ static void logi_hidpp_dev_conn_notif_equad(struct hid_device *hdev,
}
break;
case REPORT_TYPE_MOUSE:
- workitem->reports_supported |= STD_MOUSE | HIDPP;
- if (djrcv_dev->type == recvr_type_mouse_only)
- workitem->reports_supported |= MULTIMEDIA;
+ workitem->reports_supported |= STD_MOUSE | HIDPP | MULTIMEDIA;
break;
}
}
diff --git a/drivers/hid/hid-mcp2221.c b/drivers/hid/hid-mcp2221.c
index f9cceaeffd0814..da5ea5a23b087c 100644
--- a/drivers/hid/hid-mcp2221.c
+++ b/drivers/hid/hid-mcp2221.c
@@ -944,9 +944,11 @@ static void mcp2221_hid_unregister(void *ptr)
/* This is needed to be sure hid_hw_stop() isn't called twice by the subsystem */
static void mcp2221_remove(struct hid_device *hdev)
{
+#if IS_REACHABLE(CONFIG_IIO)
struct mcp2221 *mcp = hid_get_drvdata(hdev);
cancel_delayed_work_sync(&mcp->init_work);
+#endif
}
#if IS_REACHABLE(CONFIG_IIO)
diff --git a/drivers/hid/hid-nintendo.c b/drivers/hid/hid-nintendo.c
index ab5953fc24367a..80e0f23c1c33ec 100644
--- a/drivers/hid/hid-nintendo.c
+++ b/drivers/hid/hid-nintendo.c
@@ -481,10 +481,10 @@ static const struct joycon_ctlr_button_mapping n64con_button_mappings[] = {
{ BTN_TR, JC_BTN_R, },
{ BTN_TR2, JC_BTN_LSTICK, }, /* ZR */
{ BTN_START, JC_BTN_PLUS, },
- { BTN_FORWARD, JC_BTN_Y, }, /* C UP */
- { BTN_BACK, JC_BTN_ZR, }, /* C DOWN */
- { BTN_LEFT, JC_BTN_X, }, /* C LEFT */
- { BTN_RIGHT, JC_BTN_MINUS, }, /* C RIGHT */
+ { BTN_SELECT, JC_BTN_Y, }, /* C UP */
+ { BTN_X, JC_BTN_ZR, }, /* C DOWN */
+ { BTN_Y, JC_BTN_X, }, /* C LEFT */
+ { BTN_C, JC_BTN_MINUS, }, /* C RIGHT */
{ BTN_MODE, JC_BTN_HOME, },
{ BTN_Z, JC_BTN_CAP, },
{ /* sentinel */ },
diff --git a/drivers/hid/i2c-hid/i2c-hid-core.c b/drivers/hid/i2c-hid/i2c-hid-core.c
index 2df1ab3c31cc54..d965382196c69e 100644
--- a/drivers/hid/i2c-hid/i2c-hid-core.c
+++ b/drivers/hid/i2c-hid/i2c-hid-core.c
@@ -64,7 +64,6 @@
/* flags */
#define I2C_HID_STARTED 0
#define I2C_HID_RESET_PENDING 1
-#define I2C_HID_READ_PENDING 2
#define I2C_HID_PWR_ON 0x00
#define I2C_HID_PWR_SLEEP 0x01
@@ -190,15 +189,10 @@ static int i2c_hid_xfer(struct i2c_hid *ihid,
msgs[n].len = recv_len;
msgs[n].buf = recv_buf;
n++;
-
- set_bit(I2C_HID_READ_PENDING, &ihid->flags);
}
ret = i2c_transfer(client->adapter, msgs, n);
- if (recv_len)
- clear_bit(I2C_HID_READ_PENDING, &ihid->flags);
-
if (ret != n)
return ret < 0 ? ret : -EIO;
@@ -556,9 +550,6 @@ static irqreturn_t i2c_hid_irq(int irq, void *dev_id)
{
struct i2c_hid *ihid = dev_id;
- if (test_bit(I2C_HID_READ_PENDING, &ihid->flags))
- return IRQ_HANDLED;
-
i2c_hid_get_input(ihid);
return IRQ_HANDLED;
@@ -735,12 +726,15 @@ static int i2c_hid_parse(struct hid_device *hid)
mutex_lock(&ihid->reset_lock);
do {
ret = i2c_hid_start_hwreset(ihid);
- if (ret)
+ if (ret == 0)
+ ret = i2c_hid_finish_hwreset(ihid);
+ else
msleep(1000);
} while (tries-- > 0 && ret);
+ mutex_unlock(&ihid->reset_lock);
if (ret)
- goto abort_reset;
+ return ret;
use_override = i2c_hid_get_dmi_hid_report_desc_override(client->name,
&rsize);
@@ -750,11 +744,8 @@ static int i2c_hid_parse(struct hid_device *hid)
i2c_hid_dbg(ihid, "Using a HID report descriptor override\n");
} else {
rdesc = kzalloc(rsize, GFP_KERNEL);
-
- if (!rdesc) {
- ret = -ENOMEM;
- goto abort_reset;
- }
+ if (!rdesc)
+ return -ENOMEM;
i2c_hid_dbg(ihid, "asking HID report descriptor\n");
@@ -763,23 +754,10 @@ static int i2c_hid_parse(struct hid_device *hid)
rdesc, rsize);
if (ret) {
hid_err(hid, "reading report descriptor failed\n");
- goto abort_reset;
+ goto out;
}
}
- /*
- * Windows directly reads the report-descriptor after sending reset
- * and then waits for resets completion afterwards. Some touchpads
- * actually wait for the report-descriptor to be read before signalling
- * reset completion.
- */
- ret = i2c_hid_finish_hwreset(ihid);
-abort_reset:
- clear_bit(I2C_HID_RESET_PENDING, &ihid->flags);
- mutex_unlock(&ihid->reset_lock);
- if (ret)
- goto out;
-
i2c_hid_dbg(ihid, "Report Descriptor: %*ph\n", rsize, rdesc);
ret = hid_parse_report(hid, rdesc, rsize);
diff --git a/drivers/hid/intel-ish-hid/ipc/ipc.c b/drivers/hid/intel-ish-hid/ipc/ipc.c
index a49c6affd7c4c4..dd5fc60874ba1d 100644
--- a/drivers/hid/intel-ish-hid/ipc/ipc.c
+++ b/drivers/hid/intel-ish-hid/ipc/ipc.c
@@ -948,6 +948,7 @@ struct ishtp_device *ish_dev_init(struct pci_dev *pdev)
if (!dev)
return NULL;
+ dev->devc = &pdev->dev;
ishtp_device_init(dev);
init_waitqueue_head(&dev->wait_hw_ready);
@@ -983,7 +984,6 @@ struct ishtp_device *ish_dev_init(struct pci_dev *pdev)
}
dev->ops = &ish_hw_ops;
- dev->devc = &pdev->dev;
dev->mtu = IPC_PAYLOAD_SIZE - sizeof(struct ishtp_msg_hdr);
return dev;
}
diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index adbf674355b2b8..fb8cd8469328ee 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -153,7 +153,9 @@ void vmbus_free_ring(struct vmbus_channel *channel)
hv_ringbuffer_cleanup(&channel->inbound);
if (channel->ringbuffer_page) {
- __free_pages(channel->ringbuffer_page,
+ /* In a CoCo VM leak the memory if it didn't get re-encrypted */
+ if (!channel->ringbuffer_gpadlhandle.decrypted)
+ __free_pages(channel->ringbuffer_page,
get_order(channel->ringbuffer_pagecount
<< PAGE_SHIFT));
channel->ringbuffer_page = NULL;
@@ -436,9 +438,18 @@ static int __vmbus_establish_gpadl(struct vmbus_channel *channel,
(atomic_inc_return(&vmbus_connection.next_gpadl_handle) - 1);
ret = create_gpadl_header(type, kbuffer, size, send_offset, &msginfo);
- if (ret)
+ if (ret) {
+ gpadl->decrypted = false;
return ret;
+ }
+ /*
+ * Set the "decrypted" flag to true for the set_memory_decrypted()
+ * success case. In the failure case, the encryption state of the
+ * memory is unknown. Leave "decrypted" as true to ensure the
+ * memory will be leaked instead of going back on the free list.
+ */
+ gpadl->decrypted = true;
ret = set_memory_decrypted((unsigned long)kbuffer,
PFN_UP(size));
if (ret) {
@@ -527,9 +538,15 @@ cleanup:
kfree(msginfo);
- if (ret)
- set_memory_encrypted((unsigned long)kbuffer,
- PFN_UP(size));
+ if (ret) {
+ /*
+ * If set_memory_encrypted() fails, the decrypted flag is
+ * left as true so the memory is leaked instead of being
+ * put back on the free list.
+ */
+ if (!set_memory_encrypted((unsigned long)kbuffer, PFN_UP(size)))
+ gpadl->decrypted = false;
+ }
return ret;
}
@@ -850,6 +867,8 @@ post_msg_err:
if (ret)
pr_warn("Fail to set mem host visibility in GPADL teardown %d.\n", ret);
+ gpadl->decrypted = ret;
+
return ret;
}
EXPORT_SYMBOL_GPL(vmbus_teardown_gpadl);
diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index 3cabeeabb1cacf..f001ae880e1dbe 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -237,8 +237,17 @@ int vmbus_connect(void)
vmbus_connection.monitor_pages[0], 1);
ret |= set_memory_decrypted((unsigned long)
vmbus_connection.monitor_pages[1], 1);
- if (ret)
+ if (ret) {
+ /*
+ * If set_memory_decrypted() fails, the encryption state
+ * of the memory is unknown. So leak the memory instead
+ * of risking returning decrypted memory to the free list.
+ * For simplicity, always handle both pages the same.
+ */
+ vmbus_connection.monitor_pages[0] = NULL;
+ vmbus_connection.monitor_pages[1] = NULL;
goto cleanup;
+ }
/*
* Set_memory_decrypted() will change the memory contents if
@@ -337,13 +346,19 @@ void vmbus_disconnect(void)
vmbus_connection.int_page = NULL;
}
- set_memory_encrypted((unsigned long)vmbus_connection.monitor_pages[0], 1);
- set_memory_encrypted((unsigned long)vmbus_connection.monitor_pages[1], 1);
+ if (vmbus_connection.monitor_pages[0]) {
+ if (!set_memory_encrypted(
+ (unsigned long)vmbus_connection.monitor_pages[0], 1))
+ hv_free_hyperv_page(vmbus_connection.monitor_pages[0]);
+ vmbus_connection.monitor_pages[0] = NULL;
+ }
- hv_free_hyperv_page(vmbus_connection.monitor_pages[0]);
- hv_free_hyperv_page(vmbus_connection.monitor_pages[1]);
- vmbus_connection.monitor_pages[0] = NULL;
- vmbus_connection.monitor_pages[1] = NULL;
+ if (vmbus_connection.monitor_pages[1]) {
+ if (!set_memory_encrypted(
+ (unsigned long)vmbus_connection.monitor_pages[1], 1))
+ hv_free_hyperv_page(vmbus_connection.monitor_pages[1]);
+ vmbus_connection.monitor_pages[1] = NULL;
+ }
}
/*
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 4cb17603a8289b..e140cbf8d4c795 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -131,7 +131,7 @@ static ssize_t id_show(struct device *dev, struct device_attribute *dev_attr,
if (!hv_dev->channel)
return -ENODEV;
- return sprintf(buf, "%d\n", hv_dev->channel->offermsg.child_relid);
+ return sysfs_emit(buf, "%d\n", hv_dev->channel->offermsg.child_relid);
}
static DEVICE_ATTR_RO(id);
@@ -142,7 +142,7 @@ static ssize_t state_show(struct device *dev, struct device_attribute *dev_attr,
if (!hv_dev->channel)
return -ENODEV;
- return sprintf(buf, "%d\n", hv_dev->channel->state);
+ return sysfs_emit(buf, "%d\n", hv_dev->channel->state);
}
static DEVICE_ATTR_RO(state);
@@ -153,7 +153,7 @@ static ssize_t monitor_id_show(struct device *dev,
if (!hv_dev->channel)
return -ENODEV;
- return sprintf(buf, "%d\n", hv_dev->channel->offermsg.monitorid);
+ return sysfs_emit(buf, "%d\n", hv_dev->channel->offermsg.monitorid);
}
static DEVICE_ATTR_RO(monitor_id);
@@ -164,8 +164,8 @@ static ssize_t class_id_show(struct device *dev,
if (!hv_dev->channel)
return -ENODEV;
- return sprintf(buf, "{%pUl}\n",
- &hv_dev->channel->offermsg.offer.if_type);
+ return sysfs_emit(buf, "{%pUl}\n",
+ &hv_dev->channel->offermsg.offer.if_type);
}
static DEVICE_ATTR_RO(class_id);
@@ -176,8 +176,8 @@ static ssize_t device_id_show(struct device *dev,
if (!hv_dev->channel)
return -ENODEV;
- return sprintf(buf, "{%pUl}\n",
- &hv_dev->channel->offermsg.offer.if_instance);
+ return sysfs_emit(buf, "{%pUl}\n",
+ &hv_dev->channel->offermsg.offer.if_instance);
}
static DEVICE_ATTR_RO(device_id);
@@ -186,7 +186,7 @@ static ssize_t modalias_show(struct device *dev,
{
struct hv_device *hv_dev = device_to_hv_device(dev);
- return sprintf(buf, "vmbus:%*phN\n", UUID_SIZE, &hv_dev->dev_type);
+ return sysfs_emit(buf, "vmbus:%*phN\n", UUID_SIZE, &hv_dev->dev_type);
}
static DEVICE_ATTR_RO(modalias);
@@ -199,7 +199,7 @@ static ssize_t numa_node_show(struct device *dev,
if (!hv_dev->channel)
return -ENODEV;
- return sprintf(buf, "%d\n", cpu_to_node(hv_dev->channel->target_cpu));
+ return sysfs_emit(buf, "%d\n", cpu_to_node(hv_dev->channel->target_cpu));
}
static DEVICE_ATTR_RO(numa_node);
#endif
@@ -212,9 +212,8 @@ static ssize_t server_monitor_pending_show(struct device *dev,
if (!hv_dev->channel)
return -ENODEV;
- return sprintf(buf, "%d\n",
- channel_pending(hv_dev->channel,
- vmbus_connection.monitor_pages[0]));
+ return sysfs_emit(buf, "%d\n", channel_pending(hv_dev->channel,
+ vmbus_connection.monitor_pages[0]));
}
static DEVICE_ATTR_RO(server_monitor_pending);
@@ -226,9 +225,8 @@ static ssize_t client_monitor_pending_show(struct device *dev,
if (!hv_dev->channel)
return -ENODEV;
- return sprintf(buf, "%d\n",
- channel_pending(hv_dev->channel,
- vmbus_connection.monitor_pages[1]));
+ return sysfs_emit(buf, "%d\n", channel_pending(hv_dev->channel,
+ vmbus_connection.monitor_pages[1]));
}
static DEVICE_ATTR_RO(client_monitor_pending);
@@ -240,9 +238,8 @@ static ssize_t server_monitor_latency_show(struct device *dev,
if (!hv_dev->channel)
return -ENODEV;
- return sprintf(buf, "%d\n",
- channel_latency(hv_dev->channel,
- vmbus_connection.monitor_pages[0]));
+ return sysfs_emit(buf, "%d\n", channel_latency(hv_dev->channel,
+ vmbus_connection.monitor_pages[0]));
}
static DEVICE_ATTR_RO(server_monitor_latency);
@@ -254,9 +251,8 @@ static ssize_t client_monitor_latency_show(struct device *dev,
if (!hv_dev->channel)
return -ENODEV;
- return sprintf(buf, "%d\n",
- channel_latency(hv_dev->channel,
- vmbus_connection.monitor_pages[1]));
+ return sysfs_emit(buf, "%d\n", channel_latency(hv_dev->channel,
+ vmbus_connection.monitor_pages[1]));
}
static DEVICE_ATTR_RO(client_monitor_latency);
@@ -268,9 +264,8 @@ static ssize_t server_monitor_conn_id_show(struct device *dev,
if (!hv_dev->channel)
return -ENODEV;
- return sprintf(buf, "%d\n",
- channel_conn_id(hv_dev->channel,
- vmbus_connection.monitor_pages[0]));
+ return sysfs_emit(buf, "%d\n", channel_conn_id(hv_dev->channel,
+ vmbus_connection.monitor_pages[0]));
}
static DEVICE_ATTR_RO(server_monitor_conn_id);
@@ -282,9 +277,8 @@ static ssize_t client_monitor_conn_id_show(struct device *dev,
if (!hv_dev->channel)
return -ENODEV;
- return sprintf(buf, "%d\n",
- channel_conn_id(hv_dev->channel,
- vmbus_connection.monitor_pages[1]));
+ return sysfs_emit(buf, "%d\n", channel_conn_id(hv_dev->channel,
+ vmbus_connection.monitor_pages[1]));
}
static DEVICE_ATTR_RO(client_monitor_conn_id);
@@ -303,7 +297,7 @@ static ssize_t out_intr_mask_show(struct device *dev,
if (ret < 0)
return ret;
- return sprintf(buf, "%d\n", outbound.current_interrupt_mask);
+ return sysfs_emit(buf, "%d\n", outbound.current_interrupt_mask);
}
static DEVICE_ATTR_RO(out_intr_mask);
@@ -321,7 +315,7 @@ static ssize_t out_read_index_show(struct device *dev,
&outbound);
if (ret < 0)
return ret;
- return sprintf(buf, "%d\n", outbound.current_read_index);
+ return sysfs_emit(buf, "%d\n", outbound.current_read_index);
}
static DEVICE_ATTR_RO(out_read_index);
@@ -340,7 +334,7 @@ static ssize_t out_write_index_show(struct device *dev,
&outbound);
if (ret < 0)
return ret;
- return sprintf(buf, "%d\n", outbound.current_write_index);
+ return sysfs_emit(buf, "%d\n", outbound.current_write_index);
}
static DEVICE_ATTR_RO(out_write_index);
@@ -359,7 +353,7 @@ static ssize_t out_read_bytes_avail_show(struct device *dev,
&outbound);
if (ret < 0)
return ret;
- return sprintf(buf, "%d\n", outbound.bytes_avail_toread);
+ return sysfs_emit(buf, "%d\n", outbound.bytes_avail_toread);
}
static DEVICE_ATTR_RO(out_read_bytes_avail);
@@ -378,7 +372,7 @@ static ssize_t out_write_bytes_avail_show(struct device *dev,
&outbound);
if (ret < 0)
return ret;
- return sprintf(buf, "%d\n", outbound.bytes_avail_towrite);
+ return sysfs_emit(buf, "%d\n", outbound.bytes_avail_towrite);
}
static DEVICE_ATTR_RO(out_write_bytes_avail);
@@ -396,7 +390,7 @@ static ssize_t in_intr_mask_show(struct device *dev,
if (ret < 0)
return ret;
- return sprintf(buf, "%d\n", inbound.current_interrupt_mask);
+ return sysfs_emit(buf, "%d\n", inbound.current_interrupt_mask);
}
static DEVICE_ATTR_RO(in_intr_mask);
@@ -414,7 +408,7 @@ static ssize_t in_read_index_show(struct device *dev,
if (ret < 0)
return ret;
- return sprintf(buf, "%d\n", inbound.current_read_index);
+ return sysfs_emit(buf, "%d\n", inbound.current_read_index);
}
static DEVICE_ATTR_RO(in_read_index);
@@ -432,7 +426,7 @@ static ssize_t in_write_index_show(struct device *dev,
if (ret < 0)
return ret;
- return sprintf(buf, "%d\n", inbound.current_write_index);
+ return sysfs_emit(buf, "%d\n", inbound.current_write_index);
}
static DEVICE_ATTR_RO(in_write_index);
@@ -451,7 +445,7 @@ static ssize_t in_read_bytes_avail_show(struct device *dev,
if (ret < 0)
return ret;
- return sprintf(buf, "%d\n", inbound.bytes_avail_toread);
+ return sysfs_emit(buf, "%d\n", inbound.bytes_avail_toread);
}
static DEVICE_ATTR_RO(in_read_bytes_avail);
@@ -470,7 +464,7 @@ static ssize_t in_write_bytes_avail_show(struct device *dev,
if (ret < 0)
return ret;
- return sprintf(buf, "%d\n", inbound.bytes_avail_towrite);
+ return sysfs_emit(buf, "%d\n", inbound.bytes_avail_towrite);
}
static DEVICE_ATTR_RO(in_write_bytes_avail);
@@ -480,7 +474,7 @@ static ssize_t channel_vp_mapping_show(struct device *dev,
{
struct hv_device *hv_dev = device_to_hv_device(dev);
struct vmbus_channel *channel = hv_dev->channel, *cur_sc;
- int buf_size = PAGE_SIZE, n_written, tot_written;
+ int n_written;
struct list_head *cur;
if (!channel)
@@ -488,25 +482,21 @@ static ssize_t channel_vp_mapping_show(struct device *dev,
mutex_lock(&vmbus_connection.channel_mutex);
- tot_written = snprintf(buf, buf_size, "%u:%u\n",
- channel->offermsg.child_relid, channel->target_cpu);
+ n_written = sysfs_emit(buf, "%u:%u\n",
+ channel->offermsg.child_relid,
+ channel->target_cpu);
list_for_each(cur, &channel->sc_list) {
- if (tot_written >= buf_size - 1)
- break;
cur_sc = list_entry(cur, struct vmbus_channel, sc_list);
- n_written = scnprintf(buf + tot_written,
- buf_size - tot_written,
- "%u:%u\n",
- cur_sc->offermsg.child_relid,
- cur_sc->target_cpu);
- tot_written += n_written;
+ n_written += sysfs_emit_at(buf, n_written, "%u:%u\n",
+ cur_sc->offermsg.child_relid,
+ cur_sc->target_cpu);
}
mutex_unlock(&vmbus_connection.channel_mutex);
- return tot_written;
+ return n_written;
}
static DEVICE_ATTR_RO(channel_vp_mapping);
@@ -516,7 +506,7 @@ static ssize_t vendor_show(struct device *dev,
{
struct hv_device *hv_dev = device_to_hv_device(dev);
- return sprintf(buf, "0x%x\n", hv_dev->vendor_id);
+ return sysfs_emit(buf, "0x%x\n", hv_dev->vendor_id);
}
static DEVICE_ATTR_RO(vendor);
@@ -526,7 +516,7 @@ static ssize_t device_show(struct device *dev,
{
struct hv_device *hv_dev = device_to_hv_device(dev);
- return sprintf(buf, "0x%x\n", hv_dev->device_id);
+ return sysfs_emit(buf, "0x%x\n", hv_dev->device_id);
}
static DEVICE_ATTR_RO(device);
@@ -551,7 +541,7 @@ static ssize_t driver_override_show(struct device *dev,
ssize_t len;
device_lock(dev);
- len = snprintf(buf, PAGE_SIZE, "%s\n", hv_dev->driver_override);
+ len = sysfs_emit(buf, "%s\n", hv_dev->driver_override);
device_unlock(dev);
return len;
@@ -2619,7 +2609,7 @@ static struct syscore_ops hv_synic_syscore_ops = {
.resume = hv_synic_resume,
};
-static int __init hv_acpi_init(void)
+static int __init vmbus_init(void)
{
int ret;
@@ -2630,7 +2620,7 @@ static int __init hv_acpi_init(void)
return 0;
/*
- * Get ACPI resources first.
+ * Get VMBus resources first.
*/
ret = platform_driver_register(&vmbus_platform_driver);
if (ret)
@@ -2717,5 +2707,5 @@ static void __exit vmbus_exit(void)
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Microsoft Hyper-V VMBus Driver");
-subsys_initcall(hv_acpi_init);
+subsys_initcall(vmbus_init);
module_exit(vmbus_exit);
diff --git a/drivers/hwtracing/coresight/coresight-trbe.c b/drivers/hwtracing/coresight/coresight-trbe.c
index 6136776482e6d8..96a32b21366994 100644
--- a/drivers/hwtracing/coresight/coresight-trbe.c
+++ b/drivers/hwtracing/coresight/coresight-trbe.c
@@ -17,6 +17,7 @@
#include <asm/barrier.h>
#include <asm/cpufeature.h>
+#include <linux/vmalloc.h>
#include "coresight-self-hosted-trace.h"
#include "coresight-trbe.h"
diff --git a/drivers/hwtracing/intel_th/core.c b/drivers/hwtracing/intel_th/core.c
index cc7f879bb175d4..86c8efecd7c264 100644
--- a/drivers/hwtracing/intel_th/core.c
+++ b/drivers/hwtracing/intel_th/core.c
@@ -871,7 +871,7 @@ intel_th_alloc(struct device *dev, const struct intel_th_drvdata *drvdata,
if (!th)
return ERR_PTR(-ENOMEM);
- th->id = ida_simple_get(&intel_th_ida, 0, 0, GFP_KERNEL);
+ th->id = ida_alloc(&intel_th_ida, GFP_KERNEL);
if (th->id < 0) {
err = th->id;
goto err_alloc;
@@ -931,7 +931,7 @@ err_chrdev:
"intel_th/output");
err_ida:
- ida_simple_remove(&intel_th_ida, th->id);
+ ida_free(&intel_th_ida, th->id);
err_alloc:
kfree(th);
@@ -964,7 +964,7 @@ void intel_th_free(struct intel_th *th)
__unregister_chrdev(th->major, 0, TH_POSSIBLE_OUTPUTS,
"intel_th/output");
- ida_simple_remove(&intel_th_ida, th->id);
+ ida_free(&intel_th_ida, th->id);
kfree(th);
}
diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c
index a6861660cb8ca7..79870dd7a0146e 100644
--- a/drivers/i2c/busses/i2c-i801.c
+++ b/drivers/i2c/busses/i2c-i801.c
@@ -536,11 +536,12 @@ static int i801_block_transaction_by_block(struct i801_priv *priv,
if (read_write == I2C_SMBUS_READ ||
command == I2C_SMBUS_BLOCK_PROC_CALL) {
- status = i801_get_block_len(priv);
- if (status < 0)
+ len = i801_get_block_len(priv);
+ if (len < 0) {
+ status = len;
goto out;
+ }
- len = status;
data->block[0] = len;
inb_p(SMBHSTCNT(priv)); /* reset the data buffer index */
for (i = 0; i < len; i++)
diff --git a/drivers/i2c/busses/i2c-pxa.c b/drivers/i2c/busses/i2c-pxa.c
index 76f79b68cef845..888ca636f3f3b0 100644
--- a/drivers/i2c/busses/i2c-pxa.c
+++ b/drivers/i2c/busses/i2c-pxa.c
@@ -324,6 +324,7 @@ static void decode_ISR(unsigned int val)
decode_bits(KERN_DEBUG "ISR", isr_bits, ARRAY_SIZE(isr_bits), val);
}
+#ifdef CONFIG_I2C_PXA_SLAVE
static const struct bits icr_bits[] = {
PXA_BIT(ICR_START, "START", NULL),
PXA_BIT(ICR_STOP, "STOP", NULL),
@@ -342,7 +343,6 @@ static const struct bits icr_bits[] = {
PXA_BIT(ICR_UR, "UR", "ur"),
};
-#ifdef CONFIG_I2C_PXA_SLAVE
static void decode_ICR(unsigned int val)
{
decode_bits(KERN_DEBUG "ICR", icr_bits, ARRAY_SIZE(icr_bits), val);
diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c
index ff5c486a1dbb1f..db0d1ac82910e5 100644
--- a/drivers/i2c/i2c-core-base.c
+++ b/drivers/i2c/i2c-core-base.c
@@ -2200,13 +2200,18 @@ static int i2c_check_for_quirks(struct i2c_adapter *adap, struct i2c_msg *msgs,
* Returns negative errno, else the number of messages executed.
*
* Adapter lock must be held when calling this function. No debug logging
- * takes place. adap->algo->master_xfer existence isn't checked.
+ * takes place.
*/
int __i2c_transfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num)
{
unsigned long orig_jiffies;
int ret, try;
+ if (!adap->algo->master_xfer) {
+ dev_dbg(&adap->dev, "I2C level transfers not supported\n");
+ return -EOPNOTSUPP;
+ }
+
if (WARN_ON(!msgs || num < 1))
return -EINVAL;
@@ -2273,11 +2278,6 @@ int i2c_transfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num)
{
int ret;
- if (!adap->algo->master_xfer) {
- dev_dbg(&adap->dev, "I2C level transfers not supported\n");
- return -EOPNOTSUPP;
- }
-
/* REVISIT the fault reporting model here is weak:
*
* - When we get an error after receiving N bytes from a slave,
diff --git a/drivers/iio/accel/mxc4005.c b/drivers/iio/accel/mxc4005.c
index 61839be501c21a..63c3566a533bd8 100644
--- a/drivers/iio/accel/mxc4005.c
+++ b/drivers/iio/accel/mxc4005.c
@@ -5,6 +5,7 @@
* Copyright (c) 2014, Intel Corporation.
*/
+#include <linux/delay.h>
#include <linux/module.h>
#include <linux/i2c.h>
#include <linux/iio/iio.h>
@@ -27,11 +28,16 @@
#define MXC4005_REG_ZOUT_UPPER 0x07
#define MXC4005_REG_ZOUT_LOWER 0x08
+#define MXC4005_REG_INT_MASK0 0x0A
+
#define MXC4005_REG_INT_MASK1 0x0B
#define MXC4005_REG_INT_MASK1_BIT_DRDYE 0x01
+#define MXC4005_REG_INT_CLR0 0x00
+
#define MXC4005_REG_INT_CLR1 0x01
#define MXC4005_REG_INT_CLR1_BIT_DRDYC 0x01
+#define MXC4005_REG_INT_CLR1_SW_RST 0x10
#define MXC4005_REG_CONTROL 0x0D
#define MXC4005_REG_CONTROL_MASK_FSR GENMASK(6, 5)
@@ -39,6 +45,9 @@
#define MXC4005_REG_DEVICE_ID 0x0E
+/* Datasheet does not specify a reset time, this is a conservative guess */
+#define MXC4005_RESET_TIME_US 2000
+
enum mxc4005_axis {
AXIS_X,
AXIS_Y,
@@ -62,6 +71,8 @@ struct mxc4005_data {
s64 timestamp __aligned(8);
} scan;
bool trigger_enabled;
+ unsigned int control;
+ unsigned int int_mask1;
};
/*
@@ -113,7 +124,9 @@ static bool mxc4005_is_readable_reg(struct device *dev, unsigned int reg)
static bool mxc4005_is_writeable_reg(struct device *dev, unsigned int reg)
{
switch (reg) {
+ case MXC4005_REG_INT_CLR0:
case MXC4005_REG_INT_CLR1:
+ case MXC4005_REG_INT_MASK0:
case MXC4005_REG_INT_MASK1:
case MXC4005_REG_CONTROL:
return true;
@@ -330,23 +343,20 @@ static int mxc4005_set_trigger_state(struct iio_trigger *trig,
{
struct iio_dev *indio_dev = iio_trigger_get_drvdata(trig);
struct mxc4005_data *data = iio_priv(indio_dev);
+ unsigned int val;
int ret;
mutex_lock(&data->mutex);
- if (state) {
- ret = regmap_write(data->regmap, MXC4005_REG_INT_MASK1,
- MXC4005_REG_INT_MASK1_BIT_DRDYE);
- } else {
- ret = regmap_write(data->regmap, MXC4005_REG_INT_MASK1,
- ~MXC4005_REG_INT_MASK1_BIT_DRDYE);
- }
+ val = state ? MXC4005_REG_INT_MASK1_BIT_DRDYE : 0;
+ ret = regmap_write(data->regmap, MXC4005_REG_INT_MASK1, val);
if (ret < 0) {
mutex_unlock(&data->mutex);
dev_err(data->dev, "failed to update reg_int_mask1");
return ret;
}
+ data->int_mask1 = val;
data->trigger_enabled = state;
mutex_unlock(&data->mutex);
@@ -382,6 +392,21 @@ static int mxc4005_chip_init(struct mxc4005_data *data)
dev_dbg(data->dev, "MXC4005 chip id %02x\n", reg);
+ ret = regmap_write(data->regmap, MXC4005_REG_INT_CLR1,
+ MXC4005_REG_INT_CLR1_SW_RST);
+ if (ret < 0)
+ return dev_err_probe(data->dev, ret, "resetting chip\n");
+
+ fsleep(MXC4005_RESET_TIME_US);
+
+ ret = regmap_write(data->regmap, MXC4005_REG_INT_MASK0, 0);
+ if (ret < 0)
+ return dev_err_probe(data->dev, ret, "writing INT_MASK0\n");
+
+ ret = regmap_write(data->regmap, MXC4005_REG_INT_MASK1, 0);
+ if (ret < 0)
+ return dev_err_probe(data->dev, ret, "writing INT_MASK1\n");
+
return 0;
}
@@ -469,6 +494,58 @@ static int mxc4005_probe(struct i2c_client *client)
return devm_iio_device_register(&client->dev, indio_dev);
}
+static int mxc4005_suspend(struct device *dev)
+{
+ struct iio_dev *indio_dev = dev_get_drvdata(dev);
+ struct mxc4005_data *data = iio_priv(indio_dev);
+ int ret;
+
+ /* Save control to restore it on resume */
+ ret = regmap_read(data->regmap, MXC4005_REG_CONTROL, &data->control);
+ if (ret < 0)
+ dev_err(data->dev, "failed to read reg_control\n");
+
+ return ret;
+}
+
+static int mxc4005_resume(struct device *dev)
+{
+ struct iio_dev *indio_dev = dev_get_drvdata(dev);
+ struct mxc4005_data *data = iio_priv(indio_dev);
+ int ret;
+
+ ret = regmap_write(data->regmap, MXC4005_REG_INT_CLR1,
+ MXC4005_REG_INT_CLR1_SW_RST);
+ if (ret) {
+ dev_err(data->dev, "failed to reset chip: %d\n", ret);
+ return ret;
+ }
+
+ fsleep(MXC4005_RESET_TIME_US);
+
+ ret = regmap_write(data->regmap, MXC4005_REG_CONTROL, data->control);
+ if (ret) {
+ dev_err(data->dev, "failed to restore control register\n");
+ return ret;
+ }
+
+ ret = regmap_write(data->regmap, MXC4005_REG_INT_MASK0, 0);
+ if (ret) {
+ dev_err(data->dev, "failed to restore interrupt 0 mask\n");
+ return ret;
+ }
+
+ ret = regmap_write(data->regmap, MXC4005_REG_INT_MASK1, data->int_mask1);
+ if (ret) {
+ dev_err(data->dev, "failed to restore interrupt 1 mask\n");
+ return ret;
+ }
+
+ return 0;
+}
+
+static DEFINE_SIMPLE_DEV_PM_OPS(mxc4005_pm_ops, mxc4005_suspend, mxc4005_resume);
+
static const struct acpi_device_id mxc4005_acpi_match[] = {
{"MXC4005", 0},
{"MXC6655", 0},
@@ -496,6 +573,7 @@ static struct i2c_driver mxc4005_driver = {
.name = MXC4005_DRV_NAME,
.acpi_match_table = mxc4005_acpi_match,
.of_match_table = mxc4005_of_match,
+ .pm = pm_sleep_ptr(&mxc4005_pm_ops),
},
.probe = mxc4005_probe,
.id_table = mxc4005_id,
diff --git a/drivers/iio/common/inv_sensors/inv_sensors_timestamp.c b/drivers/iio/common/inv_sensors/inv_sensors_timestamp.c
index 3b0f9598a7c773..fa205f17bd905f 100644
--- a/drivers/iio/common/inv_sensors/inv_sensors_timestamp.c
+++ b/drivers/iio/common/inv_sensors/inv_sensors_timestamp.c
@@ -70,13 +70,13 @@ int inv_sensors_timestamp_update_odr(struct inv_sensors_timestamp *ts,
}
EXPORT_SYMBOL_NS_GPL(inv_sensors_timestamp_update_odr, IIO_INV_SENSORS_TIMESTAMP);
-static bool inv_validate_period(struct inv_sensors_timestamp *ts, uint32_t period, uint32_t mult)
+static bool inv_validate_period(struct inv_sensors_timestamp *ts, uint32_t period)
{
uint32_t period_min, period_max;
/* check that period is acceptable */
- period_min = ts->min_period * mult;
- period_max = ts->max_period * mult;
+ period_min = ts->min_period * ts->mult;
+ period_max = ts->max_period * ts->mult;
if (period > period_min && period < period_max)
return true;
else
@@ -84,15 +84,15 @@ static bool inv_validate_period(struct inv_sensors_timestamp *ts, uint32_t perio
}
static bool inv_update_chip_period(struct inv_sensors_timestamp *ts,
- uint32_t mult, uint32_t period)
+ uint32_t period)
{
uint32_t new_chip_period;
- if (!inv_validate_period(ts, period, mult))
+ if (!inv_validate_period(ts, period))
return false;
/* update chip internal period estimation */
- new_chip_period = period / mult;
+ new_chip_period = period / ts->mult;
inv_update_acc(&ts->chip_period, new_chip_period);
ts->period = ts->mult * ts->chip_period.val;
@@ -101,6 +101,9 @@ static bool inv_update_chip_period(struct inv_sensors_timestamp *ts,
static void inv_align_timestamp_it(struct inv_sensors_timestamp *ts)
{
+ const int64_t period_min = ts->min_period * ts->mult;
+ const int64_t period_max = ts->max_period * ts->mult;
+ int64_t add_max, sub_max;
int64_t delta, jitter;
int64_t adjust;
@@ -108,11 +111,13 @@ static void inv_align_timestamp_it(struct inv_sensors_timestamp *ts)
delta = ts->it.lo - ts->timestamp;
/* adjust timestamp while respecting jitter */
+ add_max = period_max - (int64_t)ts->period;
+ sub_max = period_min - (int64_t)ts->period;
jitter = INV_SENSORS_TIMESTAMP_JITTER((int64_t)ts->period, ts->chip.jitter);
if (delta > jitter)
- adjust = jitter;
+ adjust = add_max;
else if (delta < -jitter)
- adjust = -jitter;
+ adjust = sub_max;
else
adjust = 0;
@@ -120,16 +125,14 @@ static void inv_align_timestamp_it(struct inv_sensors_timestamp *ts)
}
void inv_sensors_timestamp_interrupt(struct inv_sensors_timestamp *ts,
- uint32_t fifo_period, size_t fifo_nb,
- size_t sensor_nb, int64_t timestamp)
+ size_t sample_nb, int64_t timestamp)
{
struct inv_sensors_timestamp_interval *it;
int64_t delta, interval;
- const uint32_t fifo_mult = fifo_period / ts->chip.clock_period;
uint32_t period;
bool valid = false;
- if (fifo_nb == 0)
+ if (sample_nb == 0)
return;
/* update interrupt timestamp and compute chip and sensor periods */
@@ -139,14 +142,14 @@ void inv_sensors_timestamp_interrupt(struct inv_sensors_timestamp *ts,
delta = it->up - it->lo;
if (it->lo != 0) {
/* compute period: delta time divided by number of samples */
- period = div_s64(delta, fifo_nb);
- valid = inv_update_chip_period(ts, fifo_mult, period);
+ period = div_s64(delta, sample_nb);
+ valid = inv_update_chip_period(ts, period);
}
/* no previous data, compute theoritical value from interrupt */
if (ts->timestamp == 0) {
/* elapsed time: sensor period * sensor samples number */
- interval = (int64_t)ts->period * (int64_t)sensor_nb;
+ interval = (int64_t)ts->period * (int64_t)sample_nb;
ts->timestamp = it->up - interval;
return;
}
diff --git a/drivers/iio/imu/adis16475.c b/drivers/iio/imu/adis16475.c
index 01f55cc902faad..060a21c70460d2 100644
--- a/drivers/iio/imu/adis16475.c
+++ b/drivers/iio/imu/adis16475.c
@@ -1289,6 +1289,7 @@ static int adis16475_config_sync_mode(struct adis16475 *st)
struct device *dev = &st->adis.spi->dev;
const struct adis16475_sync *sync;
u32 sync_mode;
+ u16 val;
/* default to internal clk */
st->clk_freq = st->info->int_clk * 1000;
@@ -1350,8 +1351,9 @@ static int adis16475_config_sync_mode(struct adis16475 *st)
* I'm keeping this for simplicity and avoiding extra variables
* in chip_info.
*/
+ val = ADIS16475_SYNC_MODE(sync->sync_mode);
ret = __adis_update_bits(&st->adis, ADIS16475_REG_MSG_CTRL,
- ADIS16475_SYNC_MODE_MASK, sync->sync_mode);
+ ADIS16475_SYNC_MODE_MASK, val);
if (ret)
return ret;
diff --git a/drivers/iio/imu/inv_icm42600/inv_icm42600_buffer.c b/drivers/iio/imu/inv_icm42600/inv_icm42600_buffer.c
index b52f328fd26ce7..9cde9a9337ad00 100644
--- a/drivers/iio/imu/inv_icm42600/inv_icm42600_buffer.c
+++ b/drivers/iio/imu/inv_icm42600/inv_icm42600_buffer.c
@@ -509,20 +509,20 @@ int inv_icm42600_buffer_fifo_parse(struct inv_icm42600_state *st)
return 0;
/* handle gyroscope timestamp and FIFO data parsing */
- ts = iio_priv(st->indio_gyro);
- inv_sensors_timestamp_interrupt(ts, st->fifo.period, st->fifo.nb.total,
- st->fifo.nb.gyro, st->timestamp.gyro);
if (st->fifo.nb.gyro > 0) {
+ ts = iio_priv(st->indio_gyro);
+ inv_sensors_timestamp_interrupt(ts, st->fifo.nb.gyro,
+ st->timestamp.gyro);
ret = inv_icm42600_gyro_parse_fifo(st->indio_gyro);
if (ret)
return ret;
}
/* handle accelerometer timestamp and FIFO data parsing */
- ts = iio_priv(st->indio_accel);
- inv_sensors_timestamp_interrupt(ts, st->fifo.period, st->fifo.nb.total,
- st->fifo.nb.accel, st->timestamp.accel);
if (st->fifo.nb.accel > 0) {
+ ts = iio_priv(st->indio_accel);
+ inv_sensors_timestamp_interrupt(ts, st->fifo.nb.accel,
+ st->timestamp.accel);
ret = inv_icm42600_accel_parse_fifo(st->indio_accel);
if (ret)
return ret;
@@ -550,9 +550,7 @@ int inv_icm42600_buffer_hwfifo_flush(struct inv_icm42600_state *st,
if (st->fifo.nb.gyro > 0) {
ts = iio_priv(st->indio_gyro);
- inv_sensors_timestamp_interrupt(ts, st->fifo.period,
- st->fifo.nb.total, st->fifo.nb.gyro,
- gyro_ts);
+ inv_sensors_timestamp_interrupt(ts, st->fifo.nb.gyro, gyro_ts);
ret = inv_icm42600_gyro_parse_fifo(st->indio_gyro);
if (ret)
return ret;
@@ -560,9 +558,7 @@ int inv_icm42600_buffer_hwfifo_flush(struct inv_icm42600_state *st,
if (st->fifo.nb.accel > 0) {
ts = iio_priv(st->indio_accel);
- inv_sensors_timestamp_interrupt(ts, st->fifo.period,
- st->fifo.nb.total, st->fifo.nb.accel,
- accel_ts);
+ inv_sensors_timestamp_interrupt(ts, st->fifo.nb.accel, accel_ts);
ret = inv_icm42600_accel_parse_fifo(st->indio_accel);
if (ret)
return ret;
diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c b/drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c
index d4f9b5d8d28d6d..d404e59eb5e855 100644
--- a/drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c
+++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c
@@ -113,7 +113,7 @@ irqreturn_t inv_mpu6050_read_fifo(int irq, void *p)
goto end_session;
/* Each FIFO data contains all sensors, so same number for FIFO and sensor data */
fifo_period = NSEC_PER_SEC / INV_MPU6050_DIVIDER_TO_FIFO_RATE(st->chip_config.divider);
- inv_sensors_timestamp_interrupt(&st->timestamp, fifo_period, nb, nb, pf->timestamp);
+ inv_sensors_timestamp_interrupt(&st->timestamp, nb, pf->timestamp);
inv_sensors_timestamp_apply_odr(&st->timestamp, fifo_period, nb, 0);
/* clear internal data buffer for avoiding kernel data leak */
diff --git a/drivers/iio/pressure/bmp280-core.c b/drivers/iio/pressure/bmp280-core.c
index fe8734468ed352..62e9e93d915dc8 100644
--- a/drivers/iio/pressure/bmp280-core.c
+++ b/drivers/iio/pressure/bmp280-core.c
@@ -1233,6 +1233,7 @@ const struct bmp280_chip_info bmp380_chip_info = {
.chip_id = bmp380_chip_ids,
.num_chip_id = ARRAY_SIZE(bmp380_chip_ids),
.regmap_config = &bmp380_regmap_config,
+ .spi_read_extra_byte = true,
.start_up_time = 2000,
.channels = bmp380_channels,
.num_channels = 2,
diff --git a/drivers/iio/pressure/bmp280-spi.c b/drivers/iio/pressure/bmp280-spi.c
index a444d4b2978b58..4e19ea0b4d3984 100644
--- a/drivers/iio/pressure/bmp280-spi.c
+++ b/drivers/iio/pressure/bmp280-spi.c
@@ -96,15 +96,10 @@ static int bmp280_spi_probe(struct spi_device *spi)
chip_info = spi_get_device_match_data(spi);
- switch (chip_info->chip_id[0]) {
- case BMP380_CHIP_ID:
- case BMP390_CHIP_ID:
+ if (chip_info->spi_read_extra_byte)
bmp_regmap_bus = &bmp380_regmap_bus;
- break;
- default:
+ else
bmp_regmap_bus = &bmp280_regmap_bus;
- break;
- }
regmap = devm_regmap_init(&spi->dev,
bmp_regmap_bus,
@@ -127,7 +122,7 @@ static const struct of_device_id bmp280_of_spi_match[] = {
{ .compatible = "bosch,bmp180", .data = &bmp180_chip_info },
{ .compatible = "bosch,bmp181", .data = &bmp180_chip_info },
{ .compatible = "bosch,bmp280", .data = &bmp280_chip_info },
- { .compatible = "bosch,bme280", .data = &bmp280_chip_info },
+ { .compatible = "bosch,bme280", .data = &bme280_chip_info },
{ .compatible = "bosch,bmp380", .data = &bmp380_chip_info },
{ .compatible = "bosch,bmp580", .data = &bmp580_chip_info },
{ },
@@ -139,7 +134,7 @@ static const struct spi_device_id bmp280_spi_id[] = {
{ "bmp180", (kernel_ulong_t)&bmp180_chip_info },
{ "bmp181", (kernel_ulong_t)&bmp180_chip_info },
{ "bmp280", (kernel_ulong_t)&bmp280_chip_info },
- { "bme280", (kernel_ulong_t)&bmp280_chip_info },
+ { "bme280", (kernel_ulong_t)&bme280_chip_info },
{ "bmp380", (kernel_ulong_t)&bmp380_chip_info },
{ "bmp580", (kernel_ulong_t)&bmp580_chip_info },
{ }
diff --git a/drivers/iio/pressure/bmp280.h b/drivers/iio/pressure/bmp280.h
index 4012387d795656..5812a344ed8e88 100644
--- a/drivers/iio/pressure/bmp280.h
+++ b/drivers/iio/pressure/bmp280.h
@@ -423,6 +423,7 @@ struct bmp280_chip_info {
int num_chip_id;
const struct regmap_config *regmap_config;
+ bool spi_read_extra_byte;
const struct iio_chan_spec *channels;
int num_channels;
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index bf0df6ee4f7857..07fb8d3c037f00 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -1026,23 +1026,26 @@ static void cm_reset_to_idle(struct cm_id_private *cm_id_priv)
}
}
-static noinline void cm_destroy_id_wait_timeout(struct ib_cm_id *cm_id)
+static noinline void cm_destroy_id_wait_timeout(struct ib_cm_id *cm_id,
+ enum ib_cm_state old_state)
{
struct cm_id_private *cm_id_priv;
cm_id_priv = container_of(cm_id, struct cm_id_private, id);
- pr_err("%s: cm_id=%p timed out. state=%d refcnt=%d\n", __func__,
- cm_id, cm_id->state, refcount_read(&cm_id_priv->refcount));
+ pr_err("%s: cm_id=%p timed out. state %d -> %d, refcnt=%d\n", __func__,
+ cm_id, old_state, cm_id->state, refcount_read(&cm_id_priv->refcount));
}
static void cm_destroy_id(struct ib_cm_id *cm_id, int err)
{
struct cm_id_private *cm_id_priv;
+ enum ib_cm_state old_state;
struct cm_work *work;
int ret;
cm_id_priv = container_of(cm_id, struct cm_id_private, id);
spin_lock_irq(&cm_id_priv->lock);
+ old_state = cm_id->state;
retest:
switch (cm_id->state) {
case IB_CM_LISTEN:
@@ -1151,7 +1154,7 @@ retest:
msecs_to_jiffies(
CM_DESTROY_ID_WAIT_TIMEOUT));
if (!ret) /* timeout happened */
- cm_destroy_id_wait_timeout(cm_id);
+ cm_destroy_id_wait_timeout(cm_id, old_state);
} while (!ret);
while ((work = cm_dequeue_work(cm_id_priv)) != NULL)
diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c
index 0c3c4e64812c58..3e43687a7f6f73 100644
--- a/drivers/infiniband/hw/mlx5/mad.c
+++ b/drivers/infiniband/hw/mlx5/mad.c
@@ -188,7 +188,8 @@ static int process_pma_cmd(struct mlx5_ib_dev *dev, u32 port_num,
mdev = dev->mdev;
mdev_port_num = 1;
}
- if (MLX5_CAP_GEN(dev->mdev, num_ports) == 1) {
+ if (MLX5_CAP_GEN(dev->mdev, num_ports) == 1 &&
+ !mlx5_core_mp_enabled(mdev)) {
/* set local port to one for Function-Per-Port HCA. */
mdev = dev->mdev;
mdev_port_num = 1;
diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c
index 455e966eeff390..b27791029fa934 100644
--- a/drivers/infiniband/hw/qib/qib_fs.c
+++ b/drivers/infiniband/hw/qib/qib_fs.c
@@ -439,6 +439,7 @@ static int remove_device_files(struct super_block *sb,
return PTR_ERR(dir);
}
simple_recursive_removal(dir, NULL);
+ dput(dir);
return 0;
}
diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c
index ae466e72fc43b3..255677bc12b2ab 100644
--- a/drivers/infiniband/sw/rxe/rxe.c
+++ b/drivers/infiniband/sw/rxe/rxe.c
@@ -33,6 +33,8 @@ void rxe_dealloc(struct ib_device *ib_dev)
if (rxe->tfm)
crypto_free_shash(rxe->tfm);
+
+ mutex_destroy(&rxe->usdev_lock);
}
/* initialize rxe device parameters */
diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c
index f50848ed5575db..6fadaddb2b908f 100644
--- a/drivers/input/joystick/xpad.c
+++ b/drivers/input/joystick/xpad.c
@@ -208,6 +208,7 @@ static const struct xpad_device {
{ 0x0738, 0xcb29, "Saitek Aviator Stick AV8R02", 0, XTYPE_XBOX360 },
{ 0x0738, 0xf738, "Super SFIV FightStick TE S", 0, XTYPE_XBOX360 },
{ 0x07ff, 0xffff, "Mad Catz GamePad", 0, XTYPE_XBOX360 },
+ { 0x0b05, 0x1a38, "ASUS ROG RAIKIRI", 0, XTYPE_XBOXONE },
{ 0x0c12, 0x0005, "Intec wireless", 0, XTYPE_XBOX },
{ 0x0c12, 0x8801, "Nyko Xbox Controller", 0, XTYPE_XBOX },
{ 0x0c12, 0x8802, "Zeroplus Xbox Controller", 0, XTYPE_XBOX },
@@ -487,6 +488,7 @@ static const struct usb_device_id xpad_table[] = {
{ USB_DEVICE(0x0738, 0x4540) }, /* Mad Catz Beat Pad */
XPAD_XBOXONE_VENDOR(0x0738), /* Mad Catz FightStick TE 2 */
XPAD_XBOX360_VENDOR(0x07ff), /* Mad Catz Gamepad */
+ XPAD_XBOXONE_VENDOR(0x0b05), /* ASUS controllers */
XPAD_XBOX360_VENDOR(0x0c12), /* Zeroplus X-Box 360 controllers */
XPAD_XBOX360_VENDOR(0x0e6f), /* 0x0e6f Xbox 360 controllers */
XPAD_XBOXONE_VENDOR(0x0e6f), /* 0x0e6f Xbox One controllers */
diff --git a/drivers/interconnect/core.c b/drivers/interconnect/core.c
index 5d1010cafed8d3..7e9b996b47c833 100644
--- a/drivers/interconnect/core.c
+++ b/drivers/interconnect/core.c
@@ -176,6 +176,8 @@ static struct icc_path *path_init(struct device *dev, struct icc_node *dst,
path->num_nodes = num_nodes;
+ mutex_lock(&icc_bw_lock);
+
for (i = num_nodes - 1; i >= 0; i--) {
node->provider->users++;
hlist_add_head(&path->reqs[i].req_node, &node->req_list);
@@ -186,6 +188,8 @@ static struct icc_path *path_init(struct device *dev, struct icc_node *dst,
node = node->reverse;
}
+ mutex_unlock(&icc_bw_lock);
+
return path;
}
@@ -792,12 +796,16 @@ void icc_put(struct icc_path *path)
pr_err("%s: error (%d)\n", __func__, ret);
mutex_lock(&icc_lock);
+ mutex_lock(&icc_bw_lock);
+
for (i = 0; i < path->num_nodes; i++) {
node = path->reqs[i].node;
hlist_del(&path->reqs[i].req_node);
if (!WARN_ON(!node->provider->users))
node->provider->users--;
}
+
+ mutex_unlock(&icc_bw_lock);
mutex_unlock(&icc_lock);
kfree_const(path->name);
diff --git a/drivers/interconnect/qcom/x1e80100.c b/drivers/interconnect/qcom/x1e80100.c
index 99824675ee3f49..654abb9ce08eed 100644
--- a/drivers/interconnect/qcom/x1e80100.c
+++ b/drivers/interconnect/qcom/x1e80100.c
@@ -116,15 +116,6 @@ static struct qcom_icc_node xm_sdc2 = {
.links = { X1E80100_SLAVE_A2NOC_SNOC },
};
-static struct qcom_icc_node ddr_perf_mode_master = {
- .name = "ddr_perf_mode_master",
- .id = X1E80100_MASTER_DDR_PERF_MODE,
- .channels = 1,
- .buswidth = 4,
- .num_links = 1,
- .links = { X1E80100_SLAVE_DDR_PERF_MODE },
-};
-
static struct qcom_icc_node qup0_core_master = {
.name = "qup0_core_master",
.id = X1E80100_MASTER_QUP_CORE_0,
@@ -688,14 +679,6 @@ static struct qcom_icc_node qns_a2noc_snoc = {
.links = { X1E80100_MASTER_A2NOC_SNOC },
};
-static struct qcom_icc_node ddr_perf_mode_slave = {
- .name = "ddr_perf_mode_slave",
- .id = X1E80100_SLAVE_DDR_PERF_MODE,
- .channels = 1,
- .buswidth = 4,
- .num_links = 0,
-};
-
static struct qcom_icc_node qup0_core_slave = {
.name = "qup0_core_slave",
.id = X1E80100_SLAVE_QUP_CORE_0,
@@ -1377,12 +1360,6 @@ static struct qcom_icc_bcm bcm_acv = {
.nodes = { &ebi },
};
-static struct qcom_icc_bcm bcm_acv_perf = {
- .name = "ACV_PERF",
- .num_nodes = 1,
- .nodes = { &ddr_perf_mode_slave },
-};
-
static struct qcom_icc_bcm bcm_ce0 = {
.name = "CE0",
.num_nodes = 1,
@@ -1583,18 +1560,15 @@ static const struct qcom_icc_desc x1e80100_aggre2_noc = {
};
static struct qcom_icc_bcm * const clk_virt_bcms[] = {
- &bcm_acv_perf,
&bcm_qup0,
&bcm_qup1,
&bcm_qup2,
};
static struct qcom_icc_node * const clk_virt_nodes[] = {
- [MASTER_DDR_PERF_MODE] = &ddr_perf_mode_master,
[MASTER_QUP_CORE_0] = &qup0_core_master,
[MASTER_QUP_CORE_1] = &qup1_core_master,
[MASTER_QUP_CORE_2] = &qup2_core_master,
- [SLAVE_DDR_PERF_MODE] = &ddr_perf_mode_slave,
[SLAVE_QUP_CORE_0] = &qup0_core_slave,
[SLAVE_QUP_CORE_1] = &qup1_core_slave,
[SLAVE_QUP_CORE_2] = &qup2_core_slave,
diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index f482aab420f787..52575ba9a141fe 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -134,13 +134,14 @@ static inline int get_pci_sbdf_id(struct pci_dev *pdev)
return PCI_SEG_DEVID_TO_SBDF(seg, devid);
}
-static inline void *alloc_pgtable_page(int nid, gfp_t gfp)
+static inline void *alloc_pgtable_page_noprof(int nid, gfp_t gfp)
{
struct page *page;
- page = alloc_pages_node(nid, gfp | __GFP_ZERO, 0);
+ page = alloc_pages_node_noprof(nid, gfp | __GFP_ZERO, 0);
return page ? page_address(page) : NULL;
}
+#define alloc_pgtable_page(...) alloc_hooks(alloc_pgtable_page_noprof(__VA_ARGS__))
/*
* This must be called after device probe completes. During probe
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index e7a44929f0daf7..ac6754a85f3507 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -3228,30 +3228,33 @@ out:
static void iommu_snp_enable(void)
{
#ifdef CONFIG_KVM_AMD_SEV
- if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+ if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
return;
/*
* The SNP support requires that IOMMU must be enabled, and is
- * not configured in the passthrough mode.
+ * configured with V1 page table (DTE[Mode] = 0 is not supported).
*/
if (no_iommu || iommu_default_passthrough()) {
- pr_err("SNP: IOMMU disabled or configured in passthrough mode, SNP cannot be supported.\n");
- return;
+ pr_warn("SNP: IOMMU disabled or configured in passthrough mode, SNP cannot be supported.\n");
+ goto disable_snp;
+ }
+
+ if (amd_iommu_pgtable != AMD_IOMMU_V1) {
+ pr_warn("SNP: IOMMU is configured with V2 page table mode, SNP cannot be supported.\n");
+ goto disable_snp;
}
amd_iommu_snp_en = check_feature(FEATURE_SNP);
if (!amd_iommu_snp_en) {
- pr_err("SNP: IOMMU SNP feature not enabled, SNP cannot be supported.\n");
- return;
+ pr_warn("SNP: IOMMU SNP feature not enabled, SNP cannot be supported.\n");
+ goto disable_snp;
}
pr_info("IOMMU SNP support enabled.\n");
+ return;
- /* Enforce IOMMU v1 pagetable when SNP is enabled. */
- if (amd_iommu_pgtable != AMD_IOMMU_V1) {
- pr_warn("Forcing use of AMD IOMMU v1 page table due to SNP.\n");
- amd_iommu_pgtable = AMD_IOMMU_V1;
- }
+disable_snp:
+ cc_platform_clear(CC_ATTR_HOST_SEV_SNP);
#endif
}
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index d35c1b8c8e65ce..e692217fcb2801 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -1692,26 +1692,29 @@ int amd_iommu_complete_ppr(struct pci_dev *pdev, u32 pasid,
static u16 domain_id_alloc(void)
{
+ unsigned long flags;
int id;
- spin_lock(&pd_bitmap_lock);
+ spin_lock_irqsave(&pd_bitmap_lock, flags);
id = find_first_zero_bit(amd_iommu_pd_alloc_bitmap, MAX_DOMAIN_ID);
BUG_ON(id == 0);
if (id > 0 && id < MAX_DOMAIN_ID)
__set_bit(id, amd_iommu_pd_alloc_bitmap);
else
id = 0;
- spin_unlock(&pd_bitmap_lock);
+ spin_unlock_irqrestore(&pd_bitmap_lock, flags);
return id;
}
static void domain_id_free(int id)
{
- spin_lock(&pd_bitmap_lock);
+ unsigned long flags;
+
+ spin_lock_irqsave(&pd_bitmap_lock, flags);
if (id > 0 && id < MAX_DOMAIN_ID)
__clear_bit(id, amd_iommu_pd_alloc_bitmap);
- spin_unlock(&pd_bitmap_lock);
+ spin_unlock_irqrestore(&pd_bitmap_lock, flags);
}
static void free_gcr3_tbl_level1(u64 *tbl)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 5ed036225e69bb..41f93c3ab160d0 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -1139,7 +1139,8 @@ static void arm_smmu_write_ste(struct arm_smmu_master *master, u32 sid,
* requires a breaking update, zero the V bit, write all qwords
* but 0, then set qword 0
*/
- unused_update.data[0] = entry->data[0] & (~STRTAB_STE_0_V);
+ unused_update.data[0] = entry->data[0] &
+ cpu_to_le64(~STRTAB_STE_0_V);
entry_set(smmu, sid, entry, &unused_update, 0, 1);
entry_set(smmu, sid, entry, target, 1, num_entry_qwords - 1);
entry_set(smmu, sid, entry, target, 0, 1);
@@ -1453,14 +1454,17 @@ static void arm_smmu_make_abort_ste(struct arm_smmu_ste *target)
FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT));
}
-static void arm_smmu_make_bypass_ste(struct arm_smmu_ste *target)
+static void arm_smmu_make_bypass_ste(struct arm_smmu_device *smmu,
+ struct arm_smmu_ste *target)
{
memset(target, 0, sizeof(*target));
target->data[0] = cpu_to_le64(
STRTAB_STE_0_V |
FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS));
- target->data[1] = cpu_to_le64(
- FIELD_PREP(STRTAB_STE_1_SHCFG, STRTAB_STE_1_SHCFG_INCOMING));
+
+ if (smmu->features & ARM_SMMU_FEAT_ATTR_TYPES_OVR)
+ target->data[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
+ STRTAB_STE_1_SHCFG_INCOMING));
}
static void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target,
@@ -1523,6 +1527,7 @@ static void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target,
typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr =
&pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
u64 vtcr_val;
+ struct arm_smmu_device *smmu = master->smmu;
memset(target, 0, sizeof(*target));
target->data[0] = cpu_to_le64(
@@ -1531,9 +1536,11 @@ static void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target,
target->data[1] = cpu_to_le64(
FIELD_PREP(STRTAB_STE_1_EATS,
- master->ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0) |
- FIELD_PREP(STRTAB_STE_1_SHCFG,
- STRTAB_STE_1_SHCFG_INCOMING));
+ master->ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0));
+
+ if (smmu->features & ARM_SMMU_FEAT_ATTR_TYPES_OVR)
+ target->data[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
+ STRTAB_STE_1_SHCFG_INCOMING));
vtcr_val = FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) |
FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) |
@@ -1560,7 +1567,8 @@ static void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target,
* This can safely directly manipulate the STE memory without a sync sequence
* because the STE table has not been installed in the SMMU yet.
*/
-static void arm_smmu_init_initial_stes(struct arm_smmu_ste *strtab,
+static void arm_smmu_init_initial_stes(struct arm_smmu_device *smmu,
+ struct arm_smmu_ste *strtab,
unsigned int nent)
{
unsigned int i;
@@ -1569,7 +1577,7 @@ static void arm_smmu_init_initial_stes(struct arm_smmu_ste *strtab,
if (disable_bypass)
arm_smmu_make_abort_ste(strtab);
else
- arm_smmu_make_bypass_ste(strtab);
+ arm_smmu_make_bypass_ste(smmu, strtab);
strtab++;
}
}
@@ -1597,7 +1605,7 @@ static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
return -ENOMEM;
}
- arm_smmu_init_initial_stes(desc->l2ptr, 1 << STRTAB_SPLIT);
+ arm_smmu_init_initial_stes(smmu, desc->l2ptr, 1 << STRTAB_SPLIT);
arm_smmu_write_strtab_l1_desc(strtab, desc);
return 0;
}
@@ -2637,8 +2645,9 @@ static int arm_smmu_attach_dev_identity(struct iommu_domain *domain,
struct device *dev)
{
struct arm_smmu_ste ste;
+ struct arm_smmu_master *master = dev_iommu_priv_get(dev);
- arm_smmu_make_bypass_ste(&ste);
+ arm_smmu_make_bypass_ste(master->smmu, &ste);
return arm_smmu_attach_dev_ste(dev, &ste);
}
@@ -3264,7 +3273,7 @@ static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
cfg->strtab_base_cfg = reg;
- arm_smmu_init_initial_stes(strtab, cfg->num_l1_ents);
+ arm_smmu_init_initial_stes(smmu, strtab, cfg->num_l1_ents);
return 0;
}
@@ -3777,6 +3786,9 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
return -ENXIO;
}
+ if (reg & IDR1_ATTR_TYPES_OVR)
+ smmu->features |= ARM_SMMU_FEAT_ATTR_TYPES_OVR;
+
/* Queue sizes, capped to ensure natural alignment */
smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
FIELD_GET(IDR1_CMDQS, reg));
@@ -3992,7 +4004,7 @@ static void arm_smmu_rmr_install_bypass_ste(struct arm_smmu_device *smmu)
* STE table is not programmed to HW, see
* arm_smmu_initial_bypass_stes()
*/
- arm_smmu_make_bypass_ste(
+ arm_smmu_make_bypass_ste(smmu,
arm_smmu_get_step_for_sid(smmu, rmr->sids[i]));
}
}
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index 23baf117e7e4b5..2a19bb63e5c6d2 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -44,6 +44,7 @@
#define IDR1_TABLES_PRESET (1 << 30)
#define IDR1_QUEUES_PRESET (1 << 29)
#define IDR1_REL (1 << 28)
+#define IDR1_ATTR_TYPES_OVR (1 << 27)
#define IDR1_CMDQS GENMASK(25, 21)
#define IDR1_EVTQS GENMASK(20, 16)
#define IDR1_PRIQS GENMASK(15, 11)
@@ -647,6 +648,7 @@ struct arm_smmu_device {
#define ARM_SMMU_FEAT_SVA (1 << 17)
#define ARM_SMMU_FEAT_E2H (1 << 18)
#define ARM_SMMU_FEAT_NESTING (1 << 19)
+#define ARM_SMMU_FEAT_ATTR_TYPES_OVR (1 << 20)
u32 features;
#define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0)
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index e4cb26f6a9434e..07d087eecc17c3 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -1723,7 +1723,7 @@ static const struct dma_map_ops iommu_dma_ops = {
.flags = DMA_F_PCI_P2PDMA_SUPPORTED,
.alloc = iommu_dma_alloc,
.free = iommu_dma_free,
- .alloc_pages = dma_common_alloc_pages,
+ .alloc_pages_op = dma_common_alloc_pages,
.free_pages = dma_common_free_pages,
.alloc_noncontiguous = iommu_dma_alloc_noncontiguous,
.free_noncontiguous = iommu_dma_free_noncontiguous,
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 50eb9aed47cc58..a7ecd90303dc42 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -4299,9 +4299,11 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev)
}
dev_iommu_priv_set(dev, info);
- ret = device_rbtree_insert(iommu, info);
- if (ret)
- goto free;
+ if (pdev && pci_ats_supported(pdev)) {
+ ret = device_rbtree_insert(iommu, info);
+ if (ret)
+ goto free;
+ }
if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) {
ret = intel_pasid_alloc_table(dev);
@@ -4336,7 +4338,8 @@ static void intel_iommu_release_device(struct device *dev)
struct intel_iommu *iommu = info->iommu;
mutex_lock(&iommu->iopf_lock);
- device_rbtree_remove(info);
+ if (dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev)))
+ device_rbtree_remove(info);
mutex_unlock(&iommu->iopf_lock);
if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev) &&
diff --git a/drivers/iommu/intel/perfmon.c b/drivers/iommu/intel/perfmon.c
index cf43e798eca499..44083d01852dbf 100644
--- a/drivers/iommu/intel/perfmon.c
+++ b/drivers/iommu/intel/perfmon.c
@@ -438,7 +438,7 @@ static int iommu_pmu_assign_event(struct iommu_pmu *iommu_pmu,
iommu_pmu_set_filter(domain, event->attr.config1,
IOMMU_PMU_FILTER_DOMAIN, idx,
event->attr.config1);
- iommu_pmu_set_filter(pasid, event->attr.config1,
+ iommu_pmu_set_filter(pasid, event->attr.config2,
IOMMU_PMU_FILTER_PASID, idx,
event->attr.config1);
iommu_pmu_set_filter(ats, event->attr.config2,
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index c1bed89b102614..ee3b469e2da155 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -66,7 +66,7 @@ int intel_svm_enable_prq(struct intel_iommu *iommu)
struct page *pages;
int irq, ret;
- pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, PRQ_ORDER);
+ pages = alloc_pages_node(iommu->node, GFP_KERNEL | __GFP_ZERO, PRQ_ORDER);
if (!pages) {
pr_warn("IOMMU: %s: Failed to allocate page request queue\n",
iommu->name);
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 098869007c69e5..a95a483def2d2a 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -3354,6 +3354,7 @@ int iommu_attach_device_pasid(struct iommu_domain *domain,
{
/* Caller must be a probed driver on dev */
struct iommu_group *group = dev->iommu_group;
+ struct group_device *device;
void *curr;
int ret;
@@ -3363,10 +3364,18 @@ int iommu_attach_device_pasid(struct iommu_domain *domain,
if (!group)
return -ENODEV;
- if (!dev_has_iommu(dev) || dev_iommu_ops(dev) != domain->owner)
+ if (!dev_has_iommu(dev) || dev_iommu_ops(dev) != domain->owner ||
+ pasid == IOMMU_NO_PASID)
return -EINVAL;
mutex_lock(&group->mutex);
+ for_each_group_device(group, device) {
+ if (pasid >= device->dev->iommu->max_pasids) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+ }
+
curr = xa_cmpxchg(&group->pasid_array, pasid, NULL, domain, GFP_KERNEL);
if (curr) {
ret = xa_err(curr) ? : -EBUSY;
diff --git a/drivers/iommu/iommufd/Kconfig b/drivers/iommu/iommufd/Kconfig
index 99d4b075df49e4..76656fe0470d7d 100644
--- a/drivers/iommu/iommufd/Kconfig
+++ b/drivers/iommu/iommufd/Kconfig
@@ -37,6 +37,7 @@ config IOMMUFD_TEST
depends on DEBUG_KERNEL
depends on FAULT_INJECTION
depends on RUNTIME_TESTING_MENU
+ select IOMMUFD_DRIVER
default n
help
This is dangerous, do not enable unless running
diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index b8c47f18bc2612..6a2707fe7a78c0 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -1790,6 +1790,7 @@ static const struct of_device_id mtk_iommu_of_ids[] = {
{ .compatible = "mediatek,mt8365-m4u", .data = &mt8365_data},
{}
};
+MODULE_DEVICE_TABLE(of, mtk_iommu_of_ids);
static struct platform_driver mtk_iommu_driver = {
.probe = mtk_iommu_probe,
diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c
index a9fa2a54dc9b39..d6e4002200bd33 100644
--- a/drivers/iommu/mtk_iommu_v1.c
+++ b/drivers/iommu/mtk_iommu_v1.c
@@ -600,6 +600,7 @@ static const struct of_device_id mtk_iommu_v1_of_ids[] = {
{ .compatible = "mediatek,mt2701-m4u", },
{}
};
+MODULE_DEVICE_TABLE(of, mtk_iommu_v1_of_ids);
static const struct component_master_ops mtk_iommu_v1_com_ops = {
.bind = mtk_iommu_v1_bind,
diff --git a/drivers/irqchip/irq-armada-370-xp.c b/drivers/irqchip/irq-armada-370-xp.c
index a55528469278c7..4b021a67bdfe48 100644
--- a/drivers/irqchip/irq-armada-370-xp.c
+++ b/drivers/irqchip/irq-armada-370-xp.c
@@ -316,7 +316,7 @@ static int armada_370_xp_msi_init(struct device_node *node,
return 0;
}
#else
-static void armada_370_xp_msi_reenable_percpu(void) {}
+static __maybe_unused void armada_370_xp_msi_reenable_percpu(void) {}
static inline int armada_370_xp_msi_init(struct device_node *node,
phys_addr_t main_int_phys_base)
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index fca888b36680df..5f7d3db3afd824 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -786,6 +786,7 @@ static struct its_vpe *its_build_vmapp_cmd(struct its_node *its,
struct its_cmd_block *cmd,
struct its_cmd_desc *desc)
{
+ struct its_vpe *vpe = valid_vpe(its, desc->its_vmapp_cmd.vpe);
unsigned long vpt_addr, vconf_addr;
u64 target;
bool alloc;
@@ -798,6 +799,11 @@ static struct its_vpe *its_build_vmapp_cmd(struct its_node *its,
if (is_v4_1(its)) {
alloc = !atomic_dec_return(&desc->its_vmapp_cmd.vpe->vmapp_count);
its_encode_alloc(cmd, alloc);
+ /*
+ * Unmapping a VPE is self-synchronizing on GICv4.1,
+ * no need to issue a VSYNC.
+ */
+ vpe = NULL;
}
goto out;
@@ -832,7 +838,7 @@ static struct its_vpe *its_build_vmapp_cmd(struct its_node *its,
out:
its_fixup_cmd(cmd);
- return valid_vpe(its, desc->its_vmapp_cmd.vpe);
+ return vpe;
}
static struct its_vpe *its_build_vmapti_cmd(struct its_node *its,
@@ -4561,13 +4567,8 @@ static int its_vpe_irq_domain_alloc(struct irq_domain *domain, unsigned int virq
irqd_set_resend_when_in_progress(irq_get_irq_data(virq + i));
}
- if (err) {
- if (i > 0)
- its_vpe_irq_domain_free(domain, virq, i);
-
- its_lpi_free(bitmap, base, nr_ids);
- its_free_prop_table(vprop_page);
- }
+ if (err)
+ its_vpe_irq_domain_free(domain, virq, i);
return err;
}
diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c
index 2776ca5fc33f39..b215b28cad7b76 100644
--- a/drivers/isdn/mISDN/socket.c
+++ b/drivers/isdn/mISDN/socket.c
@@ -401,23 +401,23 @@ data_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
}
static int data_sock_setsockopt(struct socket *sock, int level, int optname,
- sockptr_t optval, unsigned int len)
+ sockptr_t optval, unsigned int optlen)
{
struct sock *sk = sock->sk;
int err = 0, opt = 0;
if (*debug & DEBUG_SOCKET)
printk(KERN_DEBUG "%s(%p, %d, %x, optval, %d)\n", __func__, sock,
- level, optname, len);
+ level, optname, optlen);
lock_sock(sk);
switch (optname) {
case MISDN_TIME_STAMP:
- if (copy_from_sockptr(&opt, optval, sizeof(int))) {
- err = -EFAULT;
+ err = copy_safe_from_sockptr(&opt, sizeof(opt),
+ optval, optlen);
+ if (err)
break;
- }
if (opt)
_pms(sk)->cmask |= MISDN_TIME_STAMP;
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index 37b9f8f1ae1a27..7f3dc8ee6ab8dd 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -4221,7 +4221,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned int argc, char **argv
} else if (sscanf(opt_string, "sectors_per_bit:%llu%c", &llval, &dummy) == 1) {
log2_sectors_per_bitmap_bit = !llval ? 0 : __ilog2_u64(llval);
} else if (sscanf(opt_string, "bitmap_flush_interval:%u%c", &val, &dummy) == 1) {
- if (val >= (uint64_t)UINT_MAX * 1000 / HZ) {
+ if ((uint64_t)val >= (uint64_t)UINT_MAX * 1000 / HZ) {
r = -EINVAL;
ti->error = "Invalid bitmap_flush_interval argument";
goto bad;
diff --git a/drivers/md/dm-vdo/murmurhash3.c b/drivers/md/dm-vdo/murmurhash3.c
index 00c9b9c050011c..3a989efae1420a 100644
--- a/drivers/md/dm-vdo/murmurhash3.c
+++ b/drivers/md/dm-vdo/murmurhash3.c
@@ -8,33 +8,14 @@
#include "murmurhash3.h"
+#include <asm/unaligned.h>
+
static inline u64 rotl64(u64 x, s8 r)
{
return (x << r) | (x >> (64 - r));
}
#define ROTL64(x, y) rotl64(x, y)
-static __always_inline u64 getblock64(const u64 *p, int i)
-{
-#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
- return p[i];
-#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
- return __builtin_bswap64(p[i]);
-#else
-#error "can't figure out byte order"
-#endif
-}
-
-static __always_inline void putblock64(u64 *p, int i, u64 value)
-{
-#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
- p[i] = value;
-#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
- p[i] = __builtin_bswap64(value);
-#else
-#error "can't figure out byte order"
-#endif
-}
/* Finalization mix - force all bits of a hash block to avalanche */
@@ -60,6 +41,8 @@ void murmurhash3_128(const void *key, const int len, const u32 seed, void *out)
const u64 c1 = 0x87c37b91114253d5LLU;
const u64 c2 = 0x4cf5ad432745937fLLU;
+ u64 *hash_out = out;
+
/* body */
const u64 *blocks = (const u64 *)(data);
@@ -67,8 +50,8 @@ void murmurhash3_128(const void *key, const int len, const u32 seed, void *out)
int i;
for (i = 0; i < nblocks; i++) {
- u64 k1 = getblock64(blocks, i * 2 + 0);
- u64 k2 = getblock64(blocks, i * 2 + 1);
+ u64 k1 = get_unaligned_le64(&blocks[i * 2]);
+ u64 k2 = get_unaligned_le64(&blocks[i * 2 + 1]);
k1 *= c1;
k1 = ROTL64(k1, 31);
@@ -154,7 +137,7 @@ void murmurhash3_128(const void *key, const int len, const u32 seed, void *out)
break;
default:
break;
- };
+ }
}
/* finalization */
@@ -170,6 +153,6 @@ void murmurhash3_128(const void *key, const int len, const u32 seed, void *out)
h1 += h2;
h2 += h1;
- putblock64((u64 *)out, 0, h1);
- putblock64((u64 *)out, 1, h2);
+ put_unaligned_le64(h1, &hash_out[0]);
+ put_unaligned_le64(h2, &hash_out[1]);
}
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 56aa2a8b9d7153..7d0746b37c8ec7 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -765,7 +765,7 @@ static struct table_device *open_table_device(struct mapped_device *md,
return td;
out_blkdev_put:
- fput(bdev_file);
+ __fput_sync(bdev_file);
out_free_td:
kfree(td);
return ERR_PTR(r);
@@ -778,7 +778,13 @@ static void close_table_device(struct table_device *td, struct mapped_device *md
{
if (md->disk->slave_dir)
bd_unlink_disk_holder(td->dm_dev.bdev, md->disk);
- fput(td->dm_dev.bdev_file);
+
+ /* Leverage async fput() if DMF_DEFERRED_REMOVE set */
+ if (unlikely(test_bit(DMF_DEFERRED_REMOVE, &md->flags)))
+ fput(td->dm_dev.bdev_file);
+ else
+ __fput_sync(td->dm_dev.bdev_file);
+
put_dax(td->dm_dev.dax_dev);
list_del(&td->list);
kfree(td);
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index be8ac24f50b6ad..7b8a71ca66dde0 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1558,7 +1558,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
for (j = 0; j < i; j++)
if (r1_bio->bios[j])
rdev_dec_pending(conf->mirrors[j].rdev, mddev);
- free_r1bio(r1_bio);
+ mempool_free(r1_bio, &conf->r1bio_pool);
allow_barrier(conf, bio->bi_iter.bi_sector);
if (bio->bi_opf & REQ_NOWAIT) {
diff --git a/drivers/media/cec/platform/sti/stih-cec.c b/drivers/media/cec/platform/sti/stih-cec.c
index a20fc5c0c88d0e..99978a7c8d9b70 100644
--- a/drivers/media/cec/platform/sti/stih-cec.c
+++ b/drivers/media/cec/platform/sti/stih-cec.c
@@ -6,6 +6,7 @@
*/
#include <linux/clk.h>
#include <linux/interrupt.h>
+#include <linux/io.h>
#include <linux/kernel.h>
#include <linux/mfd/syscon.h>
#include <linux/module.h>
diff --git a/drivers/media/platform/mediatek/vcodec/common/mtk_vcodec_fw_vpu.c b/drivers/media/platform/mediatek/vcodec/common/mtk_vcodec_fw_vpu.c
index 4c34344dc7dcb8..d7027d600208fc 100644
--- a/drivers/media/platform/mediatek/vcodec/common/mtk_vcodec_fw_vpu.c
+++ b/drivers/media/platform/mediatek/vcodec/common/mtk_vcodec_fw_vpu.c
@@ -50,12 +50,12 @@ static void mtk_vcodec_vpu_reset_dec_handler(void *priv)
dev_err(&dev->plat_dev->dev, "Watchdog timeout!!");
- mutex_lock(&dev->dev_mutex);
+ mutex_lock(&dev->dev_ctx_lock);
list_for_each_entry(ctx, &dev->ctx_list, list) {
ctx->state = MTK_STATE_ABORT;
mtk_v4l2_vdec_dbg(0, ctx, "[%d] Change to state MTK_STATE_ABORT", ctx->id);
}
- mutex_unlock(&dev->dev_mutex);
+ mutex_unlock(&dev->dev_ctx_lock);
}
static void mtk_vcodec_vpu_reset_enc_handler(void *priv)
@@ -65,12 +65,12 @@ static void mtk_vcodec_vpu_reset_enc_handler(void *priv)
dev_err(&dev->plat_dev->dev, "Watchdog timeout!!");
- mutex_lock(&dev->dev_mutex);
+ mutex_lock(&dev->dev_ctx_lock);
list_for_each_entry(ctx, &dev->ctx_list, list) {
ctx->state = MTK_STATE_ABORT;
mtk_v4l2_vdec_dbg(0, ctx, "[%d] Change to state MTK_STATE_ABORT", ctx->id);
}
- mutex_unlock(&dev->dev_mutex);
+ mutex_unlock(&dev->dev_ctx_lock);
}
static const struct mtk_vcodec_fw_ops mtk_vcodec_vpu_msg = {
diff --git a/drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec_drv.c b/drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec_drv.c
index f47c98faf068b6..2073781ccadb15 100644
--- a/drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec_drv.c
+++ b/drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec_drv.c
@@ -268,7 +268,9 @@ static int fops_vcodec_open(struct file *file)
ctx->dev->vdec_pdata->init_vdec_params(ctx);
+ mutex_lock(&dev->dev_ctx_lock);
list_add(&ctx->list, &dev->ctx_list);
+ mutex_unlock(&dev->dev_ctx_lock);
mtk_vcodec_dbgfs_create(ctx);
mutex_unlock(&dev->dev_mutex);
@@ -311,7 +313,9 @@ static int fops_vcodec_release(struct file *file)
v4l2_ctrl_handler_free(&ctx->ctrl_hdl);
mtk_vcodec_dbgfs_remove(dev, ctx->id);
+ mutex_lock(&dev->dev_ctx_lock);
list_del_init(&ctx->list);
+ mutex_unlock(&dev->dev_ctx_lock);
kfree(ctx);
mutex_unlock(&dev->dev_mutex);
return 0;
@@ -404,6 +408,7 @@ static int mtk_vcodec_probe(struct platform_device *pdev)
for (i = 0; i < MTK_VDEC_HW_MAX; i++)
mutex_init(&dev->dec_mutex[i]);
mutex_init(&dev->dev_mutex);
+ mutex_init(&dev->dev_ctx_lock);
spin_lock_init(&dev->irqlock);
snprintf(dev->v4l2_dev.name, sizeof(dev->v4l2_dev.name), "%s",
diff --git a/drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec_drv.h b/drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec_drv.h
index 849b89dd205c21..85b2c0d3d8bcdd 100644
--- a/drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec_drv.h
+++ b/drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec_drv.h
@@ -241,6 +241,7 @@ struct mtk_vcodec_dec_ctx {
*
* @dec_mutex: decoder hardware lock
* @dev_mutex: video_device lock
+ * @dev_ctx_lock: the lock of context list
* @decode_workqueue: decode work queue
*
* @irqlock: protect data access by irq handler and work thread
@@ -282,6 +283,7 @@ struct mtk_vcodec_dec_dev {
/* decoder hardware mutex lock */
struct mutex dec_mutex[MTK_VDEC_HW_MAX];
struct mutex dev_mutex;
+ struct mutex dev_ctx_lock;
struct workqueue_struct *decode_workqueue;
spinlock_t irqlock;
diff --git a/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_hevc_req_multi_if.c b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_hevc_req_multi_if.c
index 06ed47df693bfd..21836dd6ef85a3 100644
--- a/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_hevc_req_multi_if.c
+++ b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_hevc_req_multi_if.c
@@ -869,7 +869,6 @@ static int vdec_hevc_slice_init(struct mtk_vcodec_dec_ctx *ctx)
inst->vpu.codec_type = ctx->current_codec;
inst->vpu.capture_type = ctx->capture_fourcc;
- ctx->drv_handle = inst;
err = vpu_dec_init(&inst->vpu);
if (err) {
mtk_vdec_err(ctx, "vdec_hevc init err=%d", err);
@@ -898,6 +897,7 @@ static int vdec_hevc_slice_init(struct mtk_vcodec_dec_ctx *ctx)
mtk_vdec_debug(ctx, "lat hevc instance >> %p, codec_type = 0x%x",
inst, inst->vpu.codec_type);
+ ctx->drv_handle = inst;
return 0;
error_free_inst:
kfree(inst);
diff --git a/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp8_if.c b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp8_if.c
index 19407f9bc773c3..987b3d71b662ac 100644
--- a/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp8_if.c
+++ b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp8_if.c
@@ -449,7 +449,7 @@ static int vdec_vp8_decode(void *h_vdec, struct mtk_vcodec_mem *bs,
inst->frm_cnt, y_fb_dma, c_fb_dma, fb);
inst->cur_fb = fb;
- dec->bs_dma = (unsigned long)bs->dma_addr;
+ dec->bs_dma = (uint64_t)bs->dma_addr;
dec->bs_sz = bs->size;
dec->cur_y_fb_dma = y_fb_dma;
dec->cur_c_fb_dma = c_fb_dma;
diff --git a/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp9_if.c b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp9_if.c
index 55355fa7009083..039082f600c813 100644
--- a/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp9_if.c
+++ b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp9_if.c
@@ -16,6 +16,7 @@
#include "../vdec_drv_base.h"
#include "../vdec_vpu_if.h"
+#define VP9_MAX_SUPER_FRAMES_NUM 8
#define VP9_SUPER_FRAME_BS_SZ 64
#define MAX_VP9_DPB_SIZE 9
@@ -133,11 +134,11 @@ struct vp9_sf_ref_fb {
*/
struct vdec_vp9_vsi {
unsigned char sf_bs_buf[VP9_SUPER_FRAME_BS_SZ];
- struct vp9_sf_ref_fb sf_ref_fb[VP9_MAX_FRM_BUF_NUM-1];
+ struct vp9_sf_ref_fb sf_ref_fb[VP9_MAX_SUPER_FRAMES_NUM];
int sf_next_ref_fb_idx;
unsigned int sf_frm_cnt;
- unsigned int sf_frm_offset[VP9_MAX_FRM_BUF_NUM-1];
- unsigned int sf_frm_sz[VP9_MAX_FRM_BUF_NUM-1];
+ unsigned int sf_frm_offset[VP9_MAX_SUPER_FRAMES_NUM];
+ unsigned int sf_frm_sz[VP9_MAX_SUPER_FRAMES_NUM];
unsigned int sf_frm_idx;
unsigned int sf_init;
struct vdec_fb fb;
@@ -526,7 +527,7 @@ static void vp9_swap_frm_bufs(struct vdec_vp9_inst *inst)
/* if this super frame and it is not last sub-frame, get next fb for
* sub-frame decode
*/
- if (vsi->sf_frm_cnt > 0 && vsi->sf_frm_idx != vsi->sf_frm_cnt - 1)
+ if (vsi->sf_frm_cnt > 0 && vsi->sf_frm_idx != vsi->sf_frm_cnt)
vsi->sf_next_ref_fb_idx = vp9_get_sf_ref_fb(inst);
}
@@ -735,7 +736,7 @@ static void get_free_fb(struct vdec_vp9_inst *inst, struct vdec_fb **out_fb)
static int validate_vsi_array_indexes(struct vdec_vp9_inst *inst,
struct vdec_vp9_vsi *vsi) {
- if (vsi->sf_frm_idx >= VP9_MAX_FRM_BUF_NUM - 1) {
+ if (vsi->sf_frm_idx > VP9_MAX_SUPER_FRAMES_NUM) {
mtk_vdec_err(inst->ctx, "Invalid vsi->sf_frm_idx=%u.", vsi->sf_frm_idx);
return -EIO;
}
diff --git a/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp9_req_lat_if.c b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp9_req_lat_if.c
index cf48d09b78d7a1..eea709d9382091 100644
--- a/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp9_req_lat_if.c
+++ b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp9_req_lat_if.c
@@ -1074,7 +1074,7 @@ static int vdec_vp9_slice_setup_tile_buffer(struct vdec_vp9_slice_instance *inst
unsigned int mi_row;
unsigned int mi_col;
unsigned int offset;
- unsigned int pa;
+ dma_addr_t pa;
unsigned int size;
struct vdec_vp9_slice_tiles *tiles;
unsigned char *pos;
@@ -1109,7 +1109,7 @@ static int vdec_vp9_slice_setup_tile_buffer(struct vdec_vp9_slice_instance *inst
pos = va + offset;
end = va + bs->size;
/* truncated */
- pa = (unsigned int)bs->dma_addr + offset;
+ pa = bs->dma_addr + offset;
tb = instance->tile.va;
for (i = 0; i < rows; i++) {
for (j = 0; j < cols; j++) {
diff --git a/drivers/media/platform/mediatek/vcodec/decoder/vdec_vpu_if.c b/drivers/media/platform/mediatek/vcodec/decoder/vdec_vpu_if.c
index 82e57ae983d557..da6be556727bb1 100644
--- a/drivers/media/platform/mediatek/vcodec/decoder/vdec_vpu_if.c
+++ b/drivers/media/platform/mediatek/vcodec/decoder/vdec_vpu_if.c
@@ -77,12 +77,14 @@ static bool vpu_dec_check_ap_inst(struct mtk_vcodec_dec_dev *dec_dev, struct vde
struct mtk_vcodec_dec_ctx *ctx;
int ret = false;
+ mutex_lock(&dec_dev->dev_ctx_lock);
list_for_each_entry(ctx, &dec_dev->ctx_list, list) {
if (!IS_ERR_OR_NULL(ctx) && ctx->vpu_inst == vpu) {
ret = true;
break;
}
}
+ mutex_unlock(&dec_dev->dev_ctx_lock);
return ret;
}
diff --git a/drivers/media/platform/mediatek/vcodec/encoder/mtk_vcodec_enc_drv.c b/drivers/media/platform/mediatek/vcodec/encoder/mtk_vcodec_enc_drv.c
index 6319f24bc714b5..3cb8a16222220e 100644
--- a/drivers/media/platform/mediatek/vcodec/encoder/mtk_vcodec_enc_drv.c
+++ b/drivers/media/platform/mediatek/vcodec/encoder/mtk_vcodec_enc_drv.c
@@ -177,7 +177,9 @@ static int fops_vcodec_open(struct file *file)
mtk_v4l2_venc_dbg(2, ctx, "Create instance [%d]@%p m2m_ctx=%p ",
ctx->id, ctx, ctx->m2m_ctx);
+ mutex_lock(&dev->dev_ctx_lock);
list_add(&ctx->list, &dev->ctx_list);
+ mutex_unlock(&dev->dev_ctx_lock);
mutex_unlock(&dev->dev_mutex);
mtk_v4l2_venc_dbg(0, ctx, "%s encoder [%d]", dev_name(&dev->plat_dev->dev),
@@ -212,7 +214,9 @@ static int fops_vcodec_release(struct file *file)
v4l2_fh_exit(&ctx->fh);
v4l2_ctrl_handler_free(&ctx->ctrl_hdl);
+ mutex_lock(&dev->dev_ctx_lock);
list_del_init(&ctx->list);
+ mutex_unlock(&dev->dev_ctx_lock);
kfree(ctx);
mutex_unlock(&dev->dev_mutex);
return 0;
@@ -294,6 +298,7 @@ static int mtk_vcodec_probe(struct platform_device *pdev)
mutex_init(&dev->enc_mutex);
mutex_init(&dev->dev_mutex);
+ mutex_init(&dev->dev_ctx_lock);
spin_lock_init(&dev->irqlock);
snprintf(dev->v4l2_dev.name, sizeof(dev->v4l2_dev.name), "%s",
diff --git a/drivers/media/platform/mediatek/vcodec/encoder/mtk_vcodec_enc_drv.h b/drivers/media/platform/mediatek/vcodec/encoder/mtk_vcodec_enc_drv.h
index a042f607ed8d16..0bd85d0fb379ac 100644
--- a/drivers/media/platform/mediatek/vcodec/encoder/mtk_vcodec_enc_drv.h
+++ b/drivers/media/platform/mediatek/vcodec/encoder/mtk_vcodec_enc_drv.h
@@ -178,6 +178,7 @@ struct mtk_vcodec_enc_ctx {
*
* @enc_mutex: encoder hardware lock.
* @dev_mutex: video_device lock
+ * @dev_ctx_lock: the lock of context list
* @encode_workqueue: encode work queue
*
* @enc_irq: h264 encoder irq resource
@@ -205,6 +206,7 @@ struct mtk_vcodec_enc_dev {
/* encoder hardware mutex lock */
struct mutex enc_mutex;
struct mutex dev_mutex;
+ struct mutex dev_ctx_lock;
struct workqueue_struct *encode_workqueue;
int enc_irq;
diff --git a/drivers/media/platform/mediatek/vcodec/encoder/venc_vpu_if.c b/drivers/media/platform/mediatek/vcodec/encoder/venc_vpu_if.c
index 84ad1cc6ad171e..51bb7ee141b9e5 100644
--- a/drivers/media/platform/mediatek/vcodec/encoder/venc_vpu_if.c
+++ b/drivers/media/platform/mediatek/vcodec/encoder/venc_vpu_if.c
@@ -47,12 +47,14 @@ static bool vpu_enc_check_ap_inst(struct mtk_vcodec_enc_dev *enc_dev, struct ven
struct mtk_vcodec_enc_ctx *ctx;
int ret = false;
+ mutex_lock(&enc_dev->dev_ctx_lock);
list_for_each_entry(ctx, &enc_dev->ctx_list, list) {
if (!IS_ERR_OR_NULL(ctx) && ctx->vpu_inst == vpu) {
ret = true;
break;
}
}
+ mutex_unlock(&enc_dev->dev_ctx_lock);
return ret;
}
diff --git a/drivers/media/rc/mtk-cir.c b/drivers/media/rc/mtk-cir.c
index 4e294e59d3cba8..b2f82b2d1c8d40 100644
--- a/drivers/media/rc/mtk-cir.c
+++ b/drivers/media/rc/mtk-cir.c
@@ -8,6 +8,7 @@
#include <linux/clk.h>
#include <linux/interrupt.h>
#include <linux/module.h>
+#include <linux/io.h>
#include <linux/of.h>
#include <linux/platform_device.h>
#include <linux/reset.h>
diff --git a/drivers/media/rc/serial_ir.c b/drivers/media/rc/serial_ir.c
index 96ae0294ac102a..fc5fd392717720 100644
--- a/drivers/media/rc/serial_ir.c
+++ b/drivers/media/rc/serial_ir.c
@@ -18,6 +18,7 @@
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/interrupt.h>
+#include <linux/io.h>
#include <linux/kernel.h>
#include <linux/serial_reg.h>
#include <linux/types.h>
diff --git a/drivers/media/rc/st_rc.c b/drivers/media/rc/st_rc.c
index 28477aa955635d..988b09191c4c74 100644
--- a/drivers/media/rc/st_rc.c
+++ b/drivers/media/rc/st_rc.c
@@ -6,6 +6,7 @@
#include <linux/kernel.h>
#include <linux/clk.h>
#include <linux/interrupt.h>
+#include <linux/io.h>
#include <linux/module.h>
#include <linux/of.h>
#include <linux/platform_device.h>
diff --git a/drivers/media/rc/sunxi-cir.c b/drivers/media/rc/sunxi-cir.c
index bf58c965ead8cf..b49df8355e6b39 100644
--- a/drivers/media/rc/sunxi-cir.c
+++ b/drivers/media/rc/sunxi-cir.c
@@ -12,6 +12,7 @@
#include <linux/clk.h>
#include <linux/interrupt.h>
+#include <linux/io.h>
#include <linux/module.h>
#include <linux/of.h>
#include <linux/platform_device.h>
diff --git a/drivers/misc/cardreader/rtsx_pcr.c b/drivers/misc/cardreader/rtsx_pcr.c
index 1a64364700eb0f..0ad2ff9065aad0 100644
--- a/drivers/misc/cardreader/rtsx_pcr.c
+++ b/drivers/misc/cardreader/rtsx_pcr.c
@@ -1002,7 +1002,7 @@ static irqreturn_t rtsx_pci_isr(int irq, void *dev_id)
} else {
pcr->card_removed |= SD_EXIST;
pcr->card_inserted &= ~SD_EXIST;
- if (PCI_PID(pcr) == PID_5261) {
+ if ((PCI_PID(pcr) == PID_5261) || (PCI_PID(pcr) == PID_5264)) {
rtsx_pci_write_register(pcr, RTS5261_FW_STATUS,
RTS5261_EXPRESS_LINK_FAIL_MASK, 0);
pcr->extra_caps |= EXTRA_CAPS_SD_EXPRESS;
diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c
index 572333ead5fb8b..4bd4f32bcdabb8 100644
--- a/drivers/misc/eeprom/at24.c
+++ b/drivers/misc/eeprom/at24.c
@@ -758,15 +758,6 @@ static int at24_probe(struct i2c_client *client)
}
pm_runtime_enable(dev);
- at24->nvmem = devm_nvmem_register(dev, &nvmem_config);
- if (IS_ERR(at24->nvmem)) {
- pm_runtime_disable(dev);
- if (!pm_runtime_status_suspended(dev))
- regulator_disable(at24->vcc_reg);
- return dev_err_probe(dev, PTR_ERR(at24->nvmem),
- "failed to register nvmem\n");
- }
-
/*
* Perform a one-byte test read to verify that the chip is functional,
* unless powering on the device is to be avoided during probe (i.e.
@@ -782,6 +773,15 @@ static int at24_probe(struct i2c_client *client)
}
}
+ at24->nvmem = devm_nvmem_register(dev, &nvmem_config);
+ if (IS_ERR(at24->nvmem)) {
+ pm_runtime_disable(dev);
+ if (!pm_runtime_status_suspended(dev))
+ regulator_disable(at24->vcc_reg);
+ return dev_err_probe(dev, PTR_ERR(at24->nvmem),
+ "failed to register nvmem\n");
+ }
+
/* If this a SPD EEPROM, probe for DDR3 thermal sensor */
if (cdata == &at24_data_spd)
at24_probe_temp_sensor(client);
diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h
index aac36750d2c54a..c3a6657dcd4a29 100644
--- a/drivers/misc/mei/hw-me-regs.h
+++ b/drivers/misc/mei/hw-me-regs.h
@@ -115,6 +115,8 @@
#define MEI_DEV_ID_ARL_S 0x7F68 /* Arrow Lake Point S */
#define MEI_DEV_ID_ARL_H 0x7770 /* Arrow Lake Point H */
+#define MEI_DEV_ID_LNL_M 0xA870 /* Lunar Lake Point M */
+
/*
* MEI HW Section
*/
diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c
index b5757993c9b2af..7f59dd38c32f52 100644
--- a/drivers/misc/mei/pci-me.c
+++ b/drivers/misc/mei/pci-me.c
@@ -116,12 +116,14 @@ static const struct pci_device_id mei_me_pci_tbl[] = {
{MEI_PCI_DEVICE(MEI_DEV_ID_ADP_P, MEI_ME_PCH15_CFG)},
{MEI_PCI_DEVICE(MEI_DEV_ID_ADP_N, MEI_ME_PCH15_CFG)},
- {MEI_PCI_DEVICE(MEI_DEV_ID_RPL_S, MEI_ME_PCH15_CFG)},
+ {MEI_PCI_DEVICE(MEI_DEV_ID_RPL_S, MEI_ME_PCH15_SPS_CFG)},
{MEI_PCI_DEVICE(MEI_DEV_ID_MTL_M, MEI_ME_PCH15_CFG)},
{MEI_PCI_DEVICE(MEI_DEV_ID_ARL_S, MEI_ME_PCH15_CFG)},
{MEI_PCI_DEVICE(MEI_DEV_ID_ARL_H, MEI_ME_PCH15_CFG)},
+ {MEI_PCI_DEVICE(MEI_DEV_ID_LNL_M, MEI_ME_PCH15_CFG)},
+
/* required last entry */
{0, }
};
diff --git a/drivers/misc/mei/platform-vsc.c b/drivers/misc/mei/platform-vsc.c
index 6c9f00bcb94b18..b543e6b9f3cfd6 100644
--- a/drivers/misc/mei/platform-vsc.c
+++ b/drivers/misc/mei/platform-vsc.c
@@ -400,25 +400,40 @@ static void mei_vsc_remove(struct platform_device *pdev)
static int mei_vsc_suspend(struct device *dev)
{
struct mei_device *mei_dev = dev_get_drvdata(dev);
+ struct mei_vsc_hw *hw = mei_dev_to_vsc_hw(mei_dev);
mei_stop(mei_dev);
+ mei_disable_interrupts(mei_dev);
+
+ vsc_tp_free_irq(hw->tp);
+
return 0;
}
static int mei_vsc_resume(struct device *dev)
{
struct mei_device *mei_dev = dev_get_drvdata(dev);
+ struct mei_vsc_hw *hw = mei_dev_to_vsc_hw(mei_dev);
int ret;
- ret = mei_restart(mei_dev);
+ ret = vsc_tp_request_irq(hw->tp);
if (ret)
return ret;
+ ret = mei_restart(mei_dev);
+ if (ret)
+ goto err_free;
+
/* start timer if stopped in suspend */
schedule_delayed_work(&mei_dev->timer_work, HZ);
return 0;
+
+err_free:
+ vsc_tp_free_irq(hw->tp);
+
+ return ret;
}
static DEFINE_SIMPLE_DEV_PM_OPS(mei_vsc_pm_ops, mei_vsc_suspend, mei_vsc_resume);
diff --git a/drivers/misc/mei/pxp/mei_pxp.c b/drivers/misc/mei/pxp/mei_pxp.c
index b1e4c23b31a329..49abc95677cdac 100644
--- a/drivers/misc/mei/pxp/mei_pxp.c
+++ b/drivers/misc/mei/pxp/mei_pxp.c
@@ -236,8 +236,11 @@ static int mei_pxp_component_match(struct device *dev, int subcomponent,
pdev = to_pci_dev(dev);
- if (pdev->class != (PCI_CLASS_DISPLAY_VGA << 8) ||
- pdev->vendor != PCI_VENDOR_ID_INTEL)
+ if (pdev->vendor != PCI_VENDOR_ID_INTEL)
+ return 0;
+
+ if (pdev->class != (PCI_CLASS_DISPLAY_VGA << 8) &&
+ pdev->class != (PCI_CLASS_DISPLAY_OTHER << 8))
return 0;
if (subcomponent != I915_COMPONENT_PXP)
diff --git a/drivers/misc/mei/vsc-tp.c b/drivers/misc/mei/vsc-tp.c
index ecfb70cd057ca0..e6a98dba8a735e 100644
--- a/drivers/misc/mei/vsc-tp.c
+++ b/drivers/misc/mei/vsc-tp.c
@@ -94,6 +94,27 @@ static const struct acpi_gpio_mapping vsc_tp_acpi_gpios[] = {
{}
};
+static irqreturn_t vsc_tp_isr(int irq, void *data)
+{
+ struct vsc_tp *tp = data;
+
+ atomic_inc(&tp->assert_cnt);
+
+ wake_up(&tp->xfer_wait);
+
+ return IRQ_WAKE_THREAD;
+}
+
+static irqreturn_t vsc_tp_thread_isr(int irq, void *data)
+{
+ struct vsc_tp *tp = data;
+
+ if (tp->event_notify)
+ tp->event_notify(tp->event_notify_context);
+
+ return IRQ_HANDLED;
+}
+
/* wakeup firmware and wait for response */
static int vsc_tp_wakeup_request(struct vsc_tp *tp)
{
@@ -384,6 +405,37 @@ int vsc_tp_register_event_cb(struct vsc_tp *tp, vsc_tp_event_cb_t event_cb,
EXPORT_SYMBOL_NS_GPL(vsc_tp_register_event_cb, VSC_TP);
/**
+ * vsc_tp_request_irq - request irq for vsc_tp device
+ * @tp: vsc_tp device handle
+ */
+int vsc_tp_request_irq(struct vsc_tp *tp)
+{
+ struct spi_device *spi = tp->spi;
+ struct device *dev = &spi->dev;
+ int ret;
+
+ irq_set_status_flags(spi->irq, IRQ_DISABLE_UNLAZY);
+ ret = request_threaded_irq(spi->irq, vsc_tp_isr, vsc_tp_thread_isr,
+ IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
+ dev_name(dev), tp);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+EXPORT_SYMBOL_NS_GPL(vsc_tp_request_irq, VSC_TP);
+
+/**
+ * vsc_tp_free_irq - free irq for vsc_tp device
+ * @tp: vsc_tp device handle
+ */
+void vsc_tp_free_irq(struct vsc_tp *tp)
+{
+ free_irq(tp->spi->irq, tp);
+}
+EXPORT_SYMBOL_NS_GPL(vsc_tp_free_irq, VSC_TP);
+
+/**
* vsc_tp_intr_synchronize - synchronize vsc_tp interrupt
* @tp: vsc_tp device handle
*/
@@ -413,27 +465,6 @@ void vsc_tp_intr_disable(struct vsc_tp *tp)
}
EXPORT_SYMBOL_NS_GPL(vsc_tp_intr_disable, VSC_TP);
-static irqreturn_t vsc_tp_isr(int irq, void *data)
-{
- struct vsc_tp *tp = data;
-
- atomic_inc(&tp->assert_cnt);
-
- return IRQ_WAKE_THREAD;
-}
-
-static irqreturn_t vsc_tp_thread_isr(int irq, void *data)
-{
- struct vsc_tp *tp = data;
-
- wake_up(&tp->xfer_wait);
-
- if (tp->event_notify)
- tp->event_notify(tp->event_notify_context);
-
- return IRQ_HANDLED;
-}
-
static int vsc_tp_match_any(struct acpi_device *adev, void *data)
{
struct acpi_device **__adev = data;
@@ -490,10 +521,9 @@ static int vsc_tp_probe(struct spi_device *spi)
tp->spi = spi;
irq_set_status_flags(spi->irq, IRQ_DISABLE_UNLAZY);
- ret = devm_request_threaded_irq(dev, spi->irq, vsc_tp_isr,
- vsc_tp_thread_isr,
- IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
- dev_name(dev), tp);
+ ret = request_threaded_irq(spi->irq, vsc_tp_isr, vsc_tp_thread_isr,
+ IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
+ dev_name(dev), tp);
if (ret)
return ret;
@@ -522,6 +552,8 @@ static int vsc_tp_probe(struct spi_device *spi)
err_destroy_lock:
mutex_destroy(&tp->mutex);
+ free_irq(spi->irq, tp);
+
return ret;
}
@@ -532,6 +564,8 @@ static void vsc_tp_remove(struct spi_device *spi)
platform_device_unregister(tp->pdev);
mutex_destroy(&tp->mutex);
+
+ free_irq(spi->irq, tp);
}
static const struct acpi_device_id vsc_tp_acpi_ids[] = {
diff --git a/drivers/misc/mei/vsc-tp.h b/drivers/misc/mei/vsc-tp.h
index f9513ddc3e4093..14ca195cbddccf 100644
--- a/drivers/misc/mei/vsc-tp.h
+++ b/drivers/misc/mei/vsc-tp.h
@@ -37,6 +37,9 @@ int vsc_tp_xfer(struct vsc_tp *tp, u8 cmd, const void *obuf, size_t olen,
int vsc_tp_register_event_cb(struct vsc_tp *tp, vsc_tp_event_cb_t event_cb,
void *context);
+int vsc_tp_request_irq(struct vsc_tp *tp);
+void vsc_tp_free_irq(struct vsc_tp *tp);
+
void vsc_tp_intr_enable(struct vsc_tp *tp);
void vsc_tp_intr_disable(struct vsc_tp *tp);
void vsc_tp_intr_synchronize(struct vsc_tp *tp);
diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c
index 64a3492e8002fb..90c51b12148e8d 100644
--- a/drivers/mmc/core/block.c
+++ b/drivers/mmc/core/block.c
@@ -413,7 +413,7 @@ static struct mmc_blk_ioc_data *mmc_blk_ioctl_copy_from_user(
struct mmc_blk_ioc_data *idata;
int err;
- idata = kmalloc(sizeof(*idata), GFP_KERNEL);
+ idata = kzalloc(sizeof(*idata), GFP_KERNEL);
if (!idata) {
err = -ENOMEM;
goto out;
@@ -488,7 +488,7 @@ static int __mmc_blk_ioctl_cmd(struct mmc_card *card, struct mmc_blk_data *md,
if (idata->flags & MMC_BLK_IOC_DROP)
return 0;
- if (idata->flags & MMC_BLK_IOC_SBC)
+ if (idata->flags & MMC_BLK_IOC_SBC && i > 0)
prev_idata = idatas[i - 1];
/*
diff --git a/drivers/mmc/host/moxart-mmc.c b/drivers/mmc/host/moxart-mmc.c
index b88d6dec209f57..9a5f75163acaee 100644
--- a/drivers/mmc/host/moxart-mmc.c
+++ b/drivers/mmc/host/moxart-mmc.c
@@ -300,6 +300,7 @@ static void moxart_transfer_pio(struct moxart_host *host)
remain = sgm->length;
if (remain > host->data_len)
remain = host->data_len;
+ sgm->consumed = 0;
if (data->flags & MMC_DATA_WRITE) {
while (remain > 0) {
diff --git a/drivers/mmc/host/omap.c b/drivers/mmc/host/omap.c
index 088f8ed4fdc464..a8ee0df4714823 100644
--- a/drivers/mmc/host/omap.c
+++ b/drivers/mmc/host/omap.c
@@ -1114,10 +1114,25 @@ static void mmc_omap_set_power(struct mmc_omap_slot *slot, int power_on,
host = slot->host;
- if (slot->vsd)
- gpiod_set_value(slot->vsd, power_on);
- if (slot->vio)
- gpiod_set_value(slot->vio, power_on);
+ if (power_on) {
+ if (slot->vsd) {
+ gpiod_set_value(slot->vsd, power_on);
+ msleep(1);
+ }
+ if (slot->vio) {
+ gpiod_set_value(slot->vio, power_on);
+ msleep(1);
+ }
+ } else {
+ if (slot->vio) {
+ gpiod_set_value(slot->vio, power_on);
+ msleep(50);
+ }
+ if (slot->vsd) {
+ gpiod_set_value(slot->vsd, power_on);
+ msleep(50);
+ }
+ }
if (slot->pdata->set_power != NULL)
slot->pdata->set_power(mmc_dev(slot->mmc), slot->id, power_on,
@@ -1254,18 +1269,18 @@ static int mmc_omap_new_slot(struct mmc_omap_host *host, int id)
slot->pdata = &host->pdata->slots[id];
/* Check for some optional GPIO controls */
- slot->vsd = gpiod_get_index_optional(host->dev, "vsd",
- id, GPIOD_OUT_LOW);
+ slot->vsd = devm_gpiod_get_index_optional(host->dev, "vsd",
+ id, GPIOD_OUT_LOW);
if (IS_ERR(slot->vsd))
return dev_err_probe(host->dev, PTR_ERR(slot->vsd),
"error looking up VSD GPIO\n");
- slot->vio = gpiod_get_index_optional(host->dev, "vio",
- id, GPIOD_OUT_LOW);
+ slot->vio = devm_gpiod_get_index_optional(host->dev, "vio",
+ id, GPIOD_OUT_LOW);
if (IS_ERR(slot->vio))
return dev_err_probe(host->dev, PTR_ERR(slot->vio),
"error looking up VIO GPIO\n");
- slot->cover = gpiod_get_index_optional(host->dev, "cover",
- id, GPIOD_IN);
+ slot->cover = devm_gpiod_get_index_optional(host->dev, "cover",
+ id, GPIOD_IN);
if (IS_ERR(slot->cover))
return dev_err_probe(host->dev, PTR_ERR(slot->cover),
"error looking up cover switch GPIO\n");
@@ -1379,13 +1394,6 @@ static int mmc_omap_probe(struct platform_device *pdev)
if (IS_ERR(host->virt_base))
return PTR_ERR(host->virt_base);
- host->slot_switch = gpiod_get_optional(host->dev, "switch",
- GPIOD_OUT_LOW);
- if (IS_ERR(host->slot_switch))
- return dev_err_probe(host->dev, PTR_ERR(host->slot_switch),
- "error looking up slot switch GPIO\n");
-
-
INIT_WORK(&host->slot_release_work, mmc_omap_slot_release_work);
INIT_WORK(&host->send_stop_work, mmc_omap_send_stop_work);
@@ -1404,6 +1412,12 @@ static int mmc_omap_probe(struct platform_device *pdev)
host->dev = &pdev->dev;
platform_set_drvdata(pdev, host);
+ host->slot_switch = devm_gpiod_get_optional(host->dev, "switch",
+ GPIOD_OUT_LOW);
+ if (IS_ERR(host->slot_switch))
+ return dev_err_probe(host->dev, PTR_ERR(host->slot_switch),
+ "error looking up slot switch GPIO\n");
+
host->id = pdev->id;
host->irq = irq;
host->phys_base = res->start;
diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c
index 668e0aceeebac9..e113b99a3eab59 100644
--- a/drivers/mmc/host/sdhci-msm.c
+++ b/drivers/mmc/host/sdhci-msm.c
@@ -2694,6 +2694,11 @@ static __maybe_unused int sdhci_msm_runtime_suspend(struct device *dev)
struct sdhci_host *host = dev_get_drvdata(dev);
struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
struct sdhci_msm_host *msm_host = sdhci_pltfm_priv(pltfm_host);
+ unsigned long flags;
+
+ spin_lock_irqsave(&host->lock, flags);
+ host->runtime_suspended = true;
+ spin_unlock_irqrestore(&host->lock, flags);
/* Drop the performance vote */
dev_pm_opp_set_rate(dev, 0);
@@ -2708,6 +2713,7 @@ static __maybe_unused int sdhci_msm_runtime_resume(struct device *dev)
struct sdhci_host *host = dev_get_drvdata(dev);
struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
struct sdhci_msm_host *msm_host = sdhci_pltfm_priv(pltfm_host);
+ unsigned long flags;
int ret;
ret = clk_bulk_prepare_enable(ARRAY_SIZE(msm_host->bulk_clks),
@@ -2726,7 +2732,15 @@ static __maybe_unused int sdhci_msm_runtime_resume(struct device *dev)
dev_pm_opp_set_rate(dev, msm_host->clk_rate);
- return sdhci_msm_ice_resume(msm_host);
+ ret = sdhci_msm_ice_resume(msm_host);
+ if (ret)
+ return ret;
+
+ spin_lock_irqsave(&host->lock, flags);
+ host->runtime_suspended = false;
+ spin_unlock_irqrestore(&host->lock, flags);
+
+ return ret;
}
static const struct dev_pm_ops sdhci_msm_pm_ops = {
diff --git a/drivers/mmc/host/sdhci-of-dwcmshc.c b/drivers/mmc/host/sdhci-of-dwcmshc.c
index ab4b964d405844..f2e4a93ed1d61a 100644
--- a/drivers/mmc/host/sdhci-of-dwcmshc.c
+++ b/drivers/mmc/host/sdhci-of-dwcmshc.c
@@ -626,6 +626,7 @@ static int th1520_execute_tuning(struct sdhci_host *host, u32 opcode)
/* perform tuning */
sdhci_start_tuning(host);
+ host->tuning_loop_count = 128;
host->tuning_err = __sdhci_execute_tuning(host, opcode);
if (host->tuning_err) {
/* disable auto-tuning upon tuning error */
@@ -999,6 +1000,17 @@ free_pltfm:
return err;
}
+static void dwcmshc_disable_card_clk(struct sdhci_host *host)
+{
+ u16 ctrl;
+
+ ctrl = sdhci_readw(host, SDHCI_CLOCK_CONTROL);
+ if (ctrl & SDHCI_CLOCK_CARD_EN) {
+ ctrl &= ~SDHCI_CLOCK_CARD_EN;
+ sdhci_writew(host, ctrl, SDHCI_CLOCK_CONTROL);
+ }
+}
+
static void dwcmshc_remove(struct platform_device *pdev)
{
struct sdhci_host *host = platform_get_drvdata(pdev);
@@ -1006,8 +1018,14 @@ static void dwcmshc_remove(struct platform_device *pdev)
struct dwcmshc_priv *priv = sdhci_pltfm_priv(pltfm_host);
struct rk35xx_priv *rk_priv = priv->priv;
+ pm_runtime_get_sync(&pdev->dev);
+ pm_runtime_disable(&pdev->dev);
+ pm_runtime_put_noidle(&pdev->dev);
+
sdhci_remove_host(host, 0);
+ dwcmshc_disable_card_clk(host);
+
clk_disable_unprepare(pltfm_host->clk);
clk_disable_unprepare(priv->bus_clk);
if (rk_priv)
@@ -1099,17 +1117,6 @@ static void dwcmshc_enable_card_clk(struct sdhci_host *host)
}
}
-static void dwcmshc_disable_card_clk(struct sdhci_host *host)
-{
- u16 ctrl;
-
- ctrl = sdhci_readw(host, SDHCI_CLOCK_CONTROL);
- if (ctrl & SDHCI_CLOCK_CARD_EN) {
- ctrl &= ~SDHCI_CLOCK_CARD_EN;
- sdhci_writew(host, ctrl, SDHCI_CLOCK_CONTROL);
- }
-}
-
static int dwcmshc_runtime_suspend(struct device *dev)
{
struct sdhci_host *host = dev_get_drvdata(dev);
diff --git a/drivers/mmc/host/sdhci-omap.c b/drivers/mmc/host/sdhci-omap.c
index e78faef67d7ab5..94076b09557198 100644
--- a/drivers/mmc/host/sdhci-omap.c
+++ b/drivers/mmc/host/sdhci-omap.c
@@ -1439,6 +1439,9 @@ static int __maybe_unused sdhci_omap_runtime_suspend(struct device *dev)
struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
struct sdhci_omap_host *omap_host = sdhci_pltfm_priv(pltfm_host);
+ if (host->tuning_mode != SDHCI_TUNING_MODE_3)
+ mmc_retune_needed(host->mmc);
+
if (omap_host->con != -EINVAL)
sdhci_runtime_suspend_host(host);
diff --git a/drivers/mtd/devices/block2mtd.c b/drivers/mtd/devices/block2mtd.c
index 97a00ec9a4d489..caacdc0a381945 100644
--- a/drivers/mtd/devices/block2mtd.c
+++ b/drivers/mtd/devices/block2mtd.c
@@ -209,7 +209,7 @@ static void block2mtd_free_device(struct block2mtd_dev *dev)
if (dev->bdev_file) {
invalidate_mapping_pages(dev->bdev_file->f_mapping, 0, -1);
- fput(dev->bdev_file);
+ bdev_fput(dev->bdev_file);
}
kfree(dev);
diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index 5887feb347a4e4..0de87bc6384054 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -900,7 +900,7 @@ static struct nvmem_device *mtd_otp_nvmem_register(struct mtd_info *mtd,
config.name = compatible;
config.id = NVMEM_DEVID_AUTO;
config.owner = THIS_MODULE;
- config.add_legacy_fixed_of_cells = true;
+ config.add_legacy_fixed_of_cells = !mtd_type_is_nand(mtd);
config.type = NVMEM_TYPE_OTP;
config.root_only = true;
config.ignore_wp = true;
diff --git a/drivers/mtd/nand/raw/brcmnand/brcmnand.c b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
index a8d12c71f987be..1b2ec0fec60c7a 100644
--- a/drivers/mtd/nand/raw/brcmnand/brcmnand.c
+++ b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
@@ -857,7 +857,7 @@ static inline void brcmnand_read_data_bus(struct brcmnand_controller *ctrl,
struct brcmnand_soc *soc = ctrl->soc;
int i;
- if (soc->read_data_bus) {
+ if (soc && soc->read_data_bus) {
soc->read_data_bus(soc, flash_cache, buffer, fc_words);
} else {
for (i = 0; i < fc_words; i++)
diff --git a/drivers/mtd/nand/raw/diskonchip.c b/drivers/mtd/nand/raw/diskonchip.c
index 5243fab9face00..8db7fc42457111 100644
--- a/drivers/mtd/nand/raw/diskonchip.c
+++ b/drivers/mtd/nand/raw/diskonchip.c
@@ -53,7 +53,7 @@ static unsigned long doc_locations[] __initdata = {
0xe8000, 0xea000, 0xec000, 0xee000,
#endif
#endif
- 0xffffffff };
+};
static struct mtd_info *doclist = NULL;
@@ -1554,7 +1554,7 @@ static int __init init_nanddoc(void)
if (ret < 0)
return ret;
} else {
- for (i = 0; (doc_locations[i] != 0xffffffff); i++) {
+ for (i = 0; i < ARRAY_SIZE(doc_locations); i++) {
doc_probe(doc_locations[i]);
}
}
diff --git a/drivers/mtd/nand/raw/qcom_nandc.c b/drivers/mtd/nand/raw/qcom_nandc.c
index b079605c84d382..b8cff9240b286c 100644
--- a/drivers/mtd/nand/raw/qcom_nandc.c
+++ b/drivers/mtd/nand/raw/qcom_nandc.c
@@ -2815,7 +2815,7 @@ static int qcom_misc_cmd_type_exec(struct nand_chip *chip, const struct nand_sub
host->cfg0_raw & ~(7 << CW_PER_PAGE));
nandc_set_reg(chip, NAND_DEV0_CFG1, host->cfg1_raw);
instrs = 3;
- } else {
+ } else if (q_op.cmd_reg != OP_RESET_DEVICE) {
return 0;
}
@@ -2830,9 +2830,8 @@ static int qcom_misc_cmd_type_exec(struct nand_chip *chip, const struct nand_sub
nandc_set_reg(chip, NAND_EXEC_CMD, 1);
write_reg_dma(nandc, NAND_FLASH_CMD, instrs, NAND_BAM_NEXT_SGL);
- (q_op.cmd_reg == OP_BLOCK_ERASE) ? write_reg_dma(nandc, NAND_DEV0_CFG0,
- 2, NAND_BAM_NEXT_SGL) : read_reg_dma(nandc,
- NAND_FLASH_STATUS, 1, NAND_BAM_NEXT_SGL);
+ if (q_op.cmd_reg == OP_BLOCK_ERASE)
+ write_reg_dma(nandc, NAND_DEV0_CFG0, 2, NAND_BAM_NEXT_SGL);
write_reg_dma(nandc, NAND_EXEC_CMD, 1, NAND_BAM_NEXT_SGL);
read_reg_dma(nandc, NAND_FLASH_STATUS, 1, NAND_BAM_NEXT_SGL);
diff --git a/drivers/mux/core.c b/drivers/mux/core.c
index 7758161129320f..78c0022697ec7c 100644
--- a/drivers/mux/core.c
+++ b/drivers/mux/core.c
@@ -64,7 +64,7 @@ static void mux_chip_release(struct device *dev)
{
struct mux_chip *mux_chip = to_mux_chip(dev);
- ida_simple_remove(&mux_ida, mux_chip->id);
+ ida_free(&mux_ida, mux_chip->id);
kfree(mux_chip);
}
@@ -111,7 +111,7 @@ struct mux_chip *mux_chip_alloc(struct device *dev,
mux_chip->dev.of_node = dev->of_node;
dev_set_drvdata(&mux_chip->dev, mux_chip);
- mux_chip->id = ida_simple_get(&mux_ida, 0, 0, GFP_KERNEL);
+ mux_chip->id = ida_alloc(&mux_ida, GFP_KERNEL);
if (mux_chip->id < 0) {
int err = mux_chip->id;
diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c
index 767f66c37f6b5c..8090390edaf9db 100644
--- a/drivers/net/dsa/mt7530.c
+++ b/drivers/net/dsa/mt7530.c
@@ -950,20 +950,173 @@ static void mt7530_setup_port5(struct dsa_switch *ds, phy_interface_t interface)
mutex_unlock(&priv->reg_mutex);
}
-/* On page 205, section "8.6.3 Frame filtering" of the active standard, IEEE Std
- * 802.1Q™-2022, it is stated that frames with 01:80:C2:00:00:00-0F as MAC DA
- * must only be propagated to C-VLAN and MAC Bridge components. That means
- * VLAN-aware and VLAN-unaware bridges. On the switch designs with CPU ports,
- * these frames are supposed to be processed by the CPU (software). So we make
- * the switch only forward them to the CPU port. And if received from a CPU
- * port, forward to a single port. The software is responsible of making the
- * switch conform to the latter by setting a single port as destination port on
- * the special tag.
+/* In Clause 5 of IEEE Std 802-2014, two sublayers of the data link layer (DLL)
+ * of the Open Systems Interconnection basic reference model (OSI/RM) are
+ * described; the medium access control (MAC) and logical link control (LLC)
+ * sublayers. The MAC sublayer is the one facing the physical layer.
*
- * This switch intellectual property cannot conform to this part of the standard
- * fully. Whilst the REV_UN frame tag covers the remaining :04-0D and :0F MAC
- * DAs, it also includes :22-FF which the scope of propagation is not supposed
- * to be restricted for these MAC DAs.
+ * In 8.2 of IEEE Std 802.1Q-2022, the Bridge architecture is described. A
+ * Bridge component comprises a MAC Relay Entity for interconnecting the Ports
+ * of the Bridge, at least two Ports, and higher layer entities with at least a
+ * Spanning Tree Protocol Entity included.
+ *
+ * Each Bridge Port also functions as an end station and shall provide the MAC
+ * Service to an LLC Entity. Each instance of the MAC Service is provided to a
+ * distinct LLC Entity that supports protocol identification, multiplexing, and
+ * demultiplexing, for protocol data unit (PDU) transmission and reception by
+ * one or more higher layer entities.
+ *
+ * It is described in 8.13.9 of IEEE Std 802.1Q-2022 that in a Bridge, the LLC
+ * Entity associated with each Bridge Port is modeled as being directly
+ * connected to the attached Local Area Network (LAN).
+ *
+ * On the switch with CPU port architecture, CPU port functions as Management
+ * Port, and the Management Port functionality is provided by software which
+ * functions as an end station. Software is connected to an IEEE 802 LAN that is
+ * wholly contained within the system that incorporates the Bridge. Software
+ * provides access to the LLC Entity associated with each Bridge Port by the
+ * value of the source port field on the special tag on the frame received by
+ * software.
+ *
+ * We call frames that carry control information to determine the active
+ * topology and current extent of each Virtual Local Area Network (VLAN), i.e.,
+ * spanning tree or Shortest Path Bridging (SPB) and Multiple VLAN Registration
+ * Protocol Data Units (MVRPDUs), and frames from other link constrained
+ * protocols, such as Extensible Authentication Protocol over LAN (EAPOL) and
+ * Link Layer Discovery Protocol (LLDP), link-local frames. They are not
+ * forwarded by a Bridge. Permanently configured entries in the filtering
+ * database (FDB) ensure that such frames are discarded by the Forwarding
+ * Process. In 8.6.3 of IEEE Std 802.1Q-2022, this is described in detail:
+ *
+ * Each of the reserved MAC addresses specified in Table 8-1
+ * (01-80-C2-00-00-[00,01,02,03,04,05,06,07,08,09,0A,0B,0C,0D,0E,0F]) shall be
+ * permanently configured in the FDB in C-VLAN components and ERs.
+ *
+ * Each of the reserved MAC addresses specified in Table 8-2
+ * (01-80-C2-00-00-[01,02,03,04,05,06,07,08,09,0A,0E]) shall be permanently
+ * configured in the FDB in S-VLAN components.
+ *
+ * Each of the reserved MAC addresses specified in Table 8-3
+ * (01-80-C2-00-00-[01,02,04,0E]) shall be permanently configured in the FDB in
+ * TPMR components.
+ *
+ * The FDB entries for reserved MAC addresses shall specify filtering for all
+ * Bridge Ports and all VIDs. Management shall not provide the capability to
+ * modify or remove entries for reserved MAC addresses.
+ *
+ * The addresses in Table 8-1, Table 8-2, and Table 8-3 determine the scope of
+ * propagation of PDUs within a Bridged Network, as follows:
+ *
+ * The Nearest Bridge group address (01-80-C2-00-00-0E) is an address that no
+ * conformant Two-Port MAC Relay (TPMR) component, Service VLAN (S-VLAN)
+ * component, Customer VLAN (C-VLAN) component, or MAC Bridge can forward.
+ * PDUs transmitted using this destination address, or any other addresses
+ * that appear in Table 8-1, Table 8-2, and Table 8-3
+ * (01-80-C2-00-00-[00,01,02,03,04,05,06,07,08,09,0A,0B,0C,0D,0E,0F]), can
+ * therefore travel no further than those stations that can be reached via a
+ * single individual LAN from the originating station.
+ *
+ * The Nearest non-TPMR Bridge group address (01-80-C2-00-00-03), is an
+ * address that no conformant S-VLAN component, C-VLAN component, or MAC
+ * Bridge can forward; however, this address is relayed by a TPMR component.
+ * PDUs using this destination address, or any of the other addresses that
+ * appear in both Table 8-1 and Table 8-2 but not in Table 8-3
+ * (01-80-C2-00-00-[00,03,05,06,07,08,09,0A,0B,0C,0D,0F]), will be relayed by
+ * any TPMRs but will propagate no further than the nearest S-VLAN component,
+ * C-VLAN component, or MAC Bridge.
+ *
+ * The Nearest Customer Bridge group address (01-80-C2-00-00-00) is an address
+ * that no conformant C-VLAN component, MAC Bridge can forward; however, it is
+ * relayed by TPMR components and S-VLAN components. PDUs using this
+ * destination address, or any of the other addresses that appear in Table 8-1
+ * but not in either Table 8-2 or Table 8-3 (01-80-C2-00-00-[00,0B,0C,0D,0F]),
+ * will be relayed by TPMR components and S-VLAN components but will propagate
+ * no further than the nearest C-VLAN component or MAC Bridge.
+ *
+ * Because the LLC Entity associated with each Bridge Port is provided via CPU
+ * port, we must not filter these frames but forward them to CPU port.
+ *
+ * In a Bridge, the transmission Port is majorly decided by ingress and egress
+ * rules, FDB, and spanning tree Port State functions of the Forwarding Process.
+ * For link-local frames, only CPU port should be designated as destination port
+ * in the FDB, and the other functions of the Forwarding Process must not
+ * interfere with the decision of the transmission Port. We call this process
+ * trapping frames to CPU port.
+ *
+ * Therefore, on the switch with CPU port architecture, link-local frames must
+ * be trapped to CPU port, and certain link-local frames received by a Port of a
+ * Bridge comprising a TPMR component or an S-VLAN component must be excluded
+ * from it.
+ *
+ * A Bridge of the switch with CPU port architecture cannot comprise a Two-Port
+ * MAC Relay (TPMR) component as a TPMR component supports only a subset of the
+ * functionality of a MAC Bridge. A Bridge comprising two Ports (Management Port
+ * doesn't count) of this architecture will either function as a standard MAC
+ * Bridge or a standard VLAN Bridge.
+ *
+ * Therefore, a Bridge of this architecture can only comprise S-VLAN components,
+ * C-VLAN components, or MAC Bridge components. Since there's no TPMR component,
+ * we don't need to relay PDUs using the destination addresses specified on the
+ * Nearest non-TPMR section, and the proportion of the Nearest Customer Bridge
+ * section where they must be relayed by TPMR components.
+ *
+ * One option to trap link-local frames to CPU port is to add static FDB entries
+ * with CPU port designated as destination port. However, because that
+ * Independent VLAN Learning (IVL) is being used on every VID, each entry only
+ * applies to a single VLAN Identifier (VID). For a Bridge comprising a MAC
+ * Bridge component or a C-VLAN component, there would have to be 16 times 4096
+ * entries. This switch intellectual property can only hold a maximum of 2048
+ * entries. Using this option, there also isn't a mechanism to prevent
+ * link-local frames from being discarded when the spanning tree Port State of
+ * the reception Port is discarding.
+ *
+ * The remaining option is to utilise the BPC, RGAC1, RGAC2, RGAC3, and RGAC4
+ * registers. Whilst this applies to every VID, it doesn't contain all of the
+ * reserved MAC addresses without affecting the remaining Standard Group MAC
+ * Addresses. The REV_UN frame tag utilised using the RGAC4 register covers the
+ * remaining 01-80-C2-00-00-[04,05,06,07,08,09,0A,0B,0C,0D,0F] destination
+ * addresses. It also includes the 01-80-C2-00-00-22 to 01-80-C2-00-00-FF
+ * destination addresses which may be relayed by MAC Bridges or VLAN Bridges.
+ * The latter option provides better but not complete conformance.
+ *
+ * This switch intellectual property also does not provide a mechanism to trap
+ * link-local frames with specific destination addresses to CPU port by Bridge,
+ * to conform to the filtering rules for the distinct Bridge components.
+ *
+ * Therefore, regardless of the type of the Bridge component, link-local frames
+ * with these destination addresses will be trapped to CPU port:
+ *
+ * 01-80-C2-00-00-[00,01,02,03,0E]
+ *
+ * In a Bridge comprising a MAC Bridge component or a C-VLAN component:
+ *
+ * Link-local frames with these destination addresses won't be trapped to CPU
+ * port which won't conform to IEEE Std 802.1Q-2022:
+ *
+ * 01-80-C2-00-00-[04,05,06,07,08,09,0A,0B,0C,0D,0F]
+ *
+ * In a Bridge comprising an S-VLAN component:
+ *
+ * Link-local frames with these destination addresses will be trapped to CPU
+ * port which won't conform to IEEE Std 802.1Q-2022:
+ *
+ * 01-80-C2-00-00-00
+ *
+ * Link-local frames with these destination addresses won't be trapped to CPU
+ * port which won't conform to IEEE Std 802.1Q-2022:
+ *
+ * 01-80-C2-00-00-[04,05,06,07,08,09,0A]
+ *
+ * To trap link-local frames to CPU port as conformant as this switch
+ * intellectual property can allow, link-local frames are made to be regarded as
+ * Bridge Protocol Data Units (BPDUs). This is because this switch intellectual
+ * property only lets the frames regarded as BPDUs bypass the spanning tree Port
+ * State function of the Forwarding Process.
+ *
+ * The only remaining interference is the ingress rules. When the reception Port
+ * has no PVID assigned on software, VLAN-untagged frames won't be allowed in.
+ * There doesn't seem to be a mechanism on the switch intellectual property to
+ * have link-local frames bypass this function of the Forwarding Process.
*/
static void
mt753x_trap_frames(struct mt7530_priv *priv)
@@ -971,35 +1124,43 @@ mt753x_trap_frames(struct mt7530_priv *priv)
/* Trap 802.1X PAE frames and BPDUs to the CPU port(s) and egress them
* VLAN-untagged.
*/
- mt7530_rmw(priv, MT753X_BPC, MT753X_PAE_EG_TAG_MASK |
- MT753X_PAE_PORT_FW_MASK | MT753X_BPDU_EG_TAG_MASK |
- MT753X_BPDU_PORT_FW_MASK,
- MT753X_PAE_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
- MT753X_PAE_PORT_FW(MT753X_BPDU_CPU_ONLY) |
- MT753X_BPDU_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
- MT753X_BPDU_CPU_ONLY);
+ mt7530_rmw(priv, MT753X_BPC,
+ MT753X_PAE_BPDU_FR | MT753X_PAE_EG_TAG_MASK |
+ MT753X_PAE_PORT_FW_MASK | MT753X_BPDU_EG_TAG_MASK |
+ MT753X_BPDU_PORT_FW_MASK,
+ MT753X_PAE_BPDU_FR |
+ MT753X_PAE_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
+ MT753X_PAE_PORT_FW(MT753X_BPDU_CPU_ONLY) |
+ MT753X_BPDU_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
+ MT753X_BPDU_CPU_ONLY);
/* Trap frames with :01 and :02 MAC DAs to the CPU port(s) and egress
* them VLAN-untagged.
*/
- mt7530_rmw(priv, MT753X_RGAC1, MT753X_R02_EG_TAG_MASK |
- MT753X_R02_PORT_FW_MASK | MT753X_R01_EG_TAG_MASK |
- MT753X_R01_PORT_FW_MASK,
- MT753X_R02_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
- MT753X_R02_PORT_FW(MT753X_BPDU_CPU_ONLY) |
- MT753X_R01_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
- MT753X_BPDU_CPU_ONLY);
+ mt7530_rmw(priv, MT753X_RGAC1,
+ MT753X_R02_BPDU_FR | MT753X_R02_EG_TAG_MASK |
+ MT753X_R02_PORT_FW_MASK | MT753X_R01_BPDU_FR |
+ MT753X_R01_EG_TAG_MASK | MT753X_R01_PORT_FW_MASK,
+ MT753X_R02_BPDU_FR |
+ MT753X_R02_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
+ MT753X_R02_PORT_FW(MT753X_BPDU_CPU_ONLY) |
+ MT753X_R01_BPDU_FR |
+ MT753X_R01_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
+ MT753X_BPDU_CPU_ONLY);
/* Trap frames with :03 and :0E MAC DAs to the CPU port(s) and egress
* them VLAN-untagged.
*/
- mt7530_rmw(priv, MT753X_RGAC2, MT753X_R0E_EG_TAG_MASK |
- MT753X_R0E_PORT_FW_MASK | MT753X_R03_EG_TAG_MASK |
- MT753X_R03_PORT_FW_MASK,
- MT753X_R0E_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
- MT753X_R0E_PORT_FW(MT753X_BPDU_CPU_ONLY) |
- MT753X_R03_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
- MT753X_BPDU_CPU_ONLY);
+ mt7530_rmw(priv, MT753X_RGAC2,
+ MT753X_R0E_BPDU_FR | MT753X_R0E_EG_TAG_MASK |
+ MT753X_R0E_PORT_FW_MASK | MT753X_R03_BPDU_FR |
+ MT753X_R03_EG_TAG_MASK | MT753X_R03_PORT_FW_MASK,
+ MT753X_R0E_BPDU_FR |
+ MT753X_R0E_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
+ MT753X_R0E_PORT_FW(MT753X_BPDU_CPU_ONLY) |
+ MT753X_R03_BPDU_FR |
+ MT753X_R03_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
+ MT753X_BPDU_CPU_ONLY);
}
static void
@@ -1722,14 +1883,16 @@ mt7530_port_vlan_del(struct dsa_switch *ds, int port,
static int mt753x_mirror_port_get(unsigned int id, u32 val)
{
- return (id == ID_MT7531) ? MT7531_MIRROR_PORT_GET(val) :
- MIRROR_PORT(val);
+ return (id == ID_MT7531 || id == ID_MT7988) ?
+ MT7531_MIRROR_PORT_GET(val) :
+ MIRROR_PORT(val);
}
static int mt753x_mirror_port_set(unsigned int id, u32 val)
{
- return (id == ID_MT7531) ? MT7531_MIRROR_PORT_SET(val) :
- MIRROR_PORT(val);
+ return (id == ID_MT7531 || id == ID_MT7988) ?
+ MT7531_MIRROR_PORT_SET(val) :
+ MIRROR_PORT(val);
}
static int mt753x_port_mirror_add(struct dsa_switch *ds, int port,
@@ -2268,8 +2431,6 @@ mt7530_setup(struct dsa_switch *ds)
SYS_CTRL_PHY_RST | SYS_CTRL_SW_RST |
SYS_CTRL_REG_RST);
- mt7530_pll_setup(priv);
-
/* Lower Tx driving for TRGMII path */
for (i = 0; i < NUM_TRGMII_CTRL; i++)
mt7530_write(priv, MT7530_TRGMII_TD_ODT(i),
@@ -2285,6 +2446,9 @@ mt7530_setup(struct dsa_switch *ds)
val |= MHWTRAP_MANUAL;
mt7530_write(priv, MT7530_MHWTRAP, val);
+ if ((val & HWTRAP_XTAL_MASK) == HWTRAP_XTAL_40MHZ)
+ mt7530_pll_setup(priv);
+
mt753x_trap_frames(priv);
/* Enable and reset MIB counters */
@@ -2318,6 +2482,9 @@ mt7530_setup(struct dsa_switch *ds)
PVC_EG_TAG(MT7530_VLAN_EG_CONSISTENT));
}
+ /* Allow mirroring frames received on the local port (monitor port). */
+ mt7530_set(priv, MT753X_AGC, LOCAL_EN);
+
/* Setup VLAN ID 0 for VLAN-unaware bridges */
ret = mt7530_setup_vlan0(priv);
if (ret)
@@ -2429,6 +2596,9 @@ mt7531_setup_common(struct dsa_switch *ds)
PVC_EG_TAG(MT7530_VLAN_EG_CONSISTENT));
}
+ /* Allow mirroring frames received on the local port (monitor port). */
+ mt7530_set(priv, MT753X_AGC, LOCAL_EN);
+
/* Flush the FDB table */
ret = mt7530_fdb_cmd(priv, MT7530_FDB_FLUSH, NULL);
if (ret < 0)
@@ -2504,18 +2674,25 @@ mt7531_setup(struct dsa_switch *ds)
mt7530_rmw(priv, MT7531_GPIO_MODE0, MT7531_GPIO0_MASK,
MT7531_GPIO0_INTERRUPT);
- /* Enable PHY core PLL, since phy_device has not yet been created
- * provided for phy_[read,write]_mmd_indirect is called, we provide
- * our own mt7531_ind_mmd_phy_[read,write] to complete this
- * function.
+ /* Enable Energy-Efficient Ethernet (EEE) and PHY core PLL, since
+ * phy_device has not yet been created provided for
+ * phy_[read,write]_mmd_indirect is called, we provide our own
+ * mt7531_ind_mmd_phy_[read,write] to complete this function.
*/
val = mt7531_ind_c45_phy_read(priv, MT753X_CTRL_PHY_ADDR,
MDIO_MMD_VEND2, CORE_PLL_GROUP4);
- val |= MT7531_PHY_PLL_BYPASS_MODE;
+ val |= MT7531_RG_SYSPLL_DMY2 | MT7531_PHY_PLL_BYPASS_MODE;
val &= ~MT7531_PHY_PLL_OFF;
mt7531_ind_c45_phy_write(priv, MT753X_CTRL_PHY_ADDR, MDIO_MMD_VEND2,
CORE_PLL_GROUP4, val);
+ /* Disable EEE advertisement on the switch PHYs. */
+ for (i = MT753X_CTRL_PHY_ADDR;
+ i < MT753X_CTRL_PHY_ADDR + MT7530_NUM_PHYS; i++) {
+ mt7531_ind_c45_phy_write(priv, i, MDIO_MMD_AN, MDIO_AN_EEE_ADV,
+ 0);
+ }
+
mt7531_setup_common(ds);
/* Setup VLAN ID 0 for VLAN-unaware bridges */
diff --git a/drivers/net/dsa/mt7530.h b/drivers/net/dsa/mt7530.h
index d17b318e6ee488..a08053390b285e 100644
--- a/drivers/net/dsa/mt7530.h
+++ b/drivers/net/dsa/mt7530.h
@@ -32,6 +32,10 @@ enum mt753x_id {
#define SYSC_REG_RSTCTRL 0x34
#define RESET_MCM BIT(2)
+/* Register for ARL global control */
+#define MT753X_AGC 0xc
+#define LOCAL_EN BIT(7)
+
/* Registers to mac forward control for unknown frames */
#define MT7530_MFC 0x10
#define BC_FFP(x) (((x) & 0xff) << 24)
@@ -65,6 +69,7 @@ enum mt753x_id {
/* Registers for BPDU and PAE frame control*/
#define MT753X_BPC 0x24
+#define MT753X_PAE_BPDU_FR BIT(25)
#define MT753X_PAE_EG_TAG_MASK GENMASK(24, 22)
#define MT753X_PAE_EG_TAG(x) FIELD_PREP(MT753X_PAE_EG_TAG_MASK, x)
#define MT753X_PAE_PORT_FW_MASK GENMASK(18, 16)
@@ -75,20 +80,24 @@ enum mt753x_id {
/* Register for :01 and :02 MAC DA frame control */
#define MT753X_RGAC1 0x28
+#define MT753X_R02_BPDU_FR BIT(25)
#define MT753X_R02_EG_TAG_MASK GENMASK(24, 22)
#define MT753X_R02_EG_TAG(x) FIELD_PREP(MT753X_R02_EG_TAG_MASK, x)
#define MT753X_R02_PORT_FW_MASK GENMASK(18, 16)
#define MT753X_R02_PORT_FW(x) FIELD_PREP(MT753X_R02_PORT_FW_MASK, x)
+#define MT753X_R01_BPDU_FR BIT(9)
#define MT753X_R01_EG_TAG_MASK GENMASK(8, 6)
#define MT753X_R01_EG_TAG(x) FIELD_PREP(MT753X_R01_EG_TAG_MASK, x)
#define MT753X_R01_PORT_FW_MASK GENMASK(2, 0)
/* Register for :03 and :0E MAC DA frame control */
#define MT753X_RGAC2 0x2c
+#define MT753X_R0E_BPDU_FR BIT(25)
#define MT753X_R0E_EG_TAG_MASK GENMASK(24, 22)
#define MT753X_R0E_EG_TAG(x) FIELD_PREP(MT753X_R0E_EG_TAG_MASK, x)
#define MT753X_R0E_PORT_FW_MASK GENMASK(18, 16)
#define MT753X_R0E_PORT_FW(x) FIELD_PREP(MT753X_R0E_PORT_FW_MASK, x)
+#define MT753X_R03_BPDU_FR BIT(9)
#define MT753X_R03_EG_TAG_MASK GENMASK(8, 6)
#define MT753X_R03_EG_TAG(x) FIELD_PREP(MT753X_R03_EG_TAG_MASK, x)
#define MT753X_R03_PORT_FW_MASK GENMASK(2, 0)
@@ -616,6 +625,7 @@ enum mt7531_clk_skew {
#define RG_SYSPLL_DDSFBK_EN BIT(12)
#define RG_SYSPLL_BIAS_EN BIT(11)
#define RG_SYSPLL_BIAS_LPF_EN BIT(10)
+#define MT7531_RG_SYSPLL_DMY2 BIT(6)
#define MT7531_PHY_PLL_OFF BIT(5)
#define MT7531_PHY_PLL_BYPASS_MODE BIT(4)
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 9ed1821184ece5..59b5dd0e2f41d2 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -566,13 +566,61 @@ static void mv88e6xxx_translate_cmode(u8 cmode, unsigned long *supported)
phy_interface_set_rgmii(supported);
}
-static void mv88e6250_phylink_get_caps(struct mv88e6xxx_chip *chip, int port,
- struct phylink_config *config)
+static void
+mv88e6250_setup_supported_interfaces(struct mv88e6xxx_chip *chip, int port,
+ struct phylink_config *config)
{
unsigned long *supported = config->supported_interfaces;
+ int err;
+ u16 reg;
- /* Translate the default cmode */
- mv88e6xxx_translate_cmode(chip->ports[port].cmode, supported);
+ err = mv88e6xxx_port_read(chip, port, MV88E6XXX_PORT_STS, &reg);
+ if (err) {
+ dev_err(chip->dev, "p%d: failed to read port status\n", port);
+ return;
+ }
+
+ switch (reg & MV88E6250_PORT_STS_PORTMODE_MASK) {
+ case MV88E6250_PORT_STS_PORTMODE_MII_10_HALF_PHY:
+ case MV88E6250_PORT_STS_PORTMODE_MII_100_HALF_PHY:
+ case MV88E6250_PORT_STS_PORTMODE_MII_10_FULL_PHY:
+ case MV88E6250_PORT_STS_PORTMODE_MII_100_FULL_PHY:
+ __set_bit(PHY_INTERFACE_MODE_REVMII, supported);
+ break;
+
+ case MV88E6250_PORT_STS_PORTMODE_MII_HALF:
+ case MV88E6250_PORT_STS_PORTMODE_MII_FULL:
+ __set_bit(PHY_INTERFACE_MODE_MII, supported);
+ break;
+
+ case MV88E6250_PORT_STS_PORTMODE_MII_DUAL_100_RMII_FULL_PHY:
+ case MV88E6250_PORT_STS_PORTMODE_MII_200_RMII_FULL_PHY:
+ case MV88E6250_PORT_STS_PORTMODE_MII_10_100_RMII_HALF_PHY:
+ case MV88E6250_PORT_STS_PORTMODE_MII_10_100_RMII_FULL_PHY:
+ __set_bit(PHY_INTERFACE_MODE_REVRMII, supported);
+ break;
+
+ case MV88E6250_PORT_STS_PORTMODE_MII_DUAL_100_RMII_FULL:
+ case MV88E6250_PORT_STS_PORTMODE_MII_10_100_RMII_FULL:
+ __set_bit(PHY_INTERFACE_MODE_RMII, supported);
+ break;
+
+ case MV88E6250_PORT_STS_PORTMODE_MII_100_RGMII:
+ __set_bit(PHY_INTERFACE_MODE_RGMII, supported);
+ break;
+
+ default:
+ dev_err(chip->dev,
+ "p%d: invalid port mode in status register: %04x\n",
+ port, reg);
+ }
+}
+
+static void mv88e6250_phylink_get_caps(struct mv88e6xxx_chip *chip, int port,
+ struct phylink_config *config)
+{
+ if (!mv88e6xxx_phy_is_internal(chip, port))
+ mv88e6250_setup_supported_interfaces(chip, port, config);
config->mac_capabilities = MAC_SYM_PAUSE | MAC_10 | MAC_100;
}
@@ -5503,8 +5551,12 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
.family = MV88E6XXX_FAMILY_6250,
.name = "Marvell 88E6020",
.num_databases = 64,
- .num_ports = 4,
+ /* Ports 2-4 are not routed to pins
+ * => usable ports 0, 1, 5, 6
+ */
+ .num_ports = 7,
.num_internal_phys = 2,
+ .invalid_port_mask = BIT(2) | BIT(3) | BIT(4),
.max_vid = 4095,
.port_base_addr = 0x8,
.phy_base_addr = 0x0,
diff --git a/drivers/net/dsa/mv88e6xxx/port.h b/drivers/net/dsa/mv88e6xxx/port.h
index 86deeb347cbc1d..ddadeb9bfdaeed 100644
--- a/drivers/net/dsa/mv88e6xxx/port.h
+++ b/drivers/net/dsa/mv88e6xxx/port.h
@@ -25,10 +25,25 @@
#define MV88E6250_PORT_STS_PORTMODE_PHY_100_HALF 0x0900
#define MV88E6250_PORT_STS_PORTMODE_PHY_10_FULL 0x0a00
#define MV88E6250_PORT_STS_PORTMODE_PHY_100_FULL 0x0b00
-#define MV88E6250_PORT_STS_PORTMODE_MII_10_HALF 0x0c00
-#define MV88E6250_PORT_STS_PORTMODE_MII_100_HALF 0x0d00
-#define MV88E6250_PORT_STS_PORTMODE_MII_10_FULL 0x0e00
-#define MV88E6250_PORT_STS_PORTMODE_MII_100_FULL 0x0f00
+/* - Modes with PHY suffix use output instead of input clock
+ * - Modes without RMII or RGMII use MII
+ * - Modes without speed do not have a fixed speed specified in the manual
+ * ("DC to x MHz" - variable clock support?)
+ */
+#define MV88E6250_PORT_STS_PORTMODE_MII_DISABLED 0x0000
+#define MV88E6250_PORT_STS_PORTMODE_MII_100_RGMII 0x0100
+#define MV88E6250_PORT_STS_PORTMODE_MII_DUAL_100_RMII_FULL_PHY 0x0200
+#define MV88E6250_PORT_STS_PORTMODE_MII_200_RMII_FULL_PHY 0x0400
+#define MV88E6250_PORT_STS_PORTMODE_MII_DUAL_100_RMII_FULL 0x0600
+#define MV88E6250_PORT_STS_PORTMODE_MII_10_100_RMII_FULL 0x0700
+#define MV88E6250_PORT_STS_PORTMODE_MII_HALF 0x0800
+#define MV88E6250_PORT_STS_PORTMODE_MII_10_100_RMII_HALF_PHY 0x0900
+#define MV88E6250_PORT_STS_PORTMODE_MII_FULL 0x0a00
+#define MV88E6250_PORT_STS_PORTMODE_MII_10_100_RMII_FULL_PHY 0x0b00
+#define MV88E6250_PORT_STS_PORTMODE_MII_10_HALF_PHY 0x0c00
+#define MV88E6250_PORT_STS_PORTMODE_MII_100_HALF_PHY 0x0d00
+#define MV88E6250_PORT_STS_PORTMODE_MII_10_FULL_PHY 0x0e00
+#define MV88E6250_PORT_STS_PORTMODE_MII_100_FULL_PHY 0x0f00
#define MV88E6XXX_PORT_STS_LINK 0x0800
#define MV88E6XXX_PORT_STS_DUPLEX 0x0400
#define MV88E6XXX_PORT_STS_SPEED_MASK 0x0300
diff --git a/drivers/net/dsa/sja1105/sja1105_mdio.c b/drivers/net/dsa/sja1105/sja1105_mdio.c
index 833e55e4b96129..52ddb4ef259e93 100644
--- a/drivers/net/dsa/sja1105/sja1105_mdio.c
+++ b/drivers/net/dsa/sja1105/sja1105_mdio.c
@@ -94,7 +94,7 @@ int sja1110_pcs_mdio_read_c45(struct mii_bus *bus, int phy, int mmd, int reg)
return tmp & 0xffff;
}
-int sja1110_pcs_mdio_write_c45(struct mii_bus *bus, int phy, int reg, int mmd,
+int sja1110_pcs_mdio_write_c45(struct mii_bus *bus, int phy, int mmd, int reg,
u16 val)
{
struct sja1105_mdio_private *mdio_priv = bus->priv;
diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c
index 9e9e4a03f1a8c9..2d8a66ea82fab7 100644
--- a/drivers/net/ethernet/amazon/ena/ena_com.c
+++ b/drivers/net/ethernet/amazon/ena/ena_com.c
@@ -351,7 +351,7 @@ static int ena_com_init_io_sq(struct ena_com_dev *ena_dev,
ENA_COM_BOUNCE_BUFFER_CNTRL_CNT;
io_sq->bounce_buf_ctrl.next_to_use = 0;
- size = io_sq->bounce_buf_ctrl.buffer_size *
+ size = (size_t)io_sq->bounce_buf_ctrl.buffer_size *
io_sq->bounce_buf_ctrl.buffers_num;
dev_node = dev_to_node(ena_dev->dmadev);
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index 09e7da1a69c9f0..be5acfa41ee0ce 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -718,8 +718,11 @@ void ena_unmap_tx_buff(struct ena_ring *tx_ring,
static void ena_free_tx_bufs(struct ena_ring *tx_ring)
{
bool print_once = true;
+ bool is_xdp_ring;
u32 i;
+ is_xdp_ring = ENA_IS_XDP_INDEX(tx_ring->adapter, tx_ring->qid);
+
for (i = 0; i < tx_ring->ring_size; i++) {
struct ena_tx_buffer *tx_info = &tx_ring->tx_buffer_info[i];
@@ -739,10 +742,15 @@ static void ena_free_tx_bufs(struct ena_ring *tx_ring)
ena_unmap_tx_buff(tx_ring, tx_info);
- dev_kfree_skb_any(tx_info->skb);
+ if (is_xdp_ring)
+ xdp_return_frame(tx_info->xdpf);
+ else
+ dev_kfree_skb_any(tx_info->skb);
}
- netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev,
- tx_ring->qid));
+
+ if (!is_xdp_ring)
+ netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev,
+ tx_ring->qid));
}
static void ena_free_all_tx_bufs(struct ena_adapter *adapter)
@@ -3481,10 +3489,11 @@ static void check_for_missing_completions(struct ena_adapter *adapter)
{
struct ena_ring *tx_ring;
struct ena_ring *rx_ring;
- int i, budget, rc;
+ int qid, budget, rc;
int io_queue_count;
io_queue_count = adapter->xdp_num_queues + adapter->num_io_queues;
+
/* Make sure the driver doesn't turn the device in other process */
smp_rmb();
@@ -3497,27 +3506,29 @@ static void check_for_missing_completions(struct ena_adapter *adapter)
if (adapter->missing_tx_completion_to == ENA_HW_HINTS_NO_TIMEOUT)
return;
- budget = ENA_MONITORED_TX_QUEUES;
+ budget = min_t(u32, io_queue_count, ENA_MONITORED_TX_QUEUES);
- for (i = adapter->last_monitored_tx_qid; i < io_queue_count; i++) {
- tx_ring = &adapter->tx_ring[i];
- rx_ring = &adapter->rx_ring[i];
+ qid = adapter->last_monitored_tx_qid;
+
+ while (budget) {
+ qid = (qid + 1) % io_queue_count;
+
+ tx_ring = &adapter->tx_ring[qid];
+ rx_ring = &adapter->rx_ring[qid];
rc = check_missing_comp_in_tx_queue(adapter, tx_ring);
if (unlikely(rc))
return;
- rc = !ENA_IS_XDP_INDEX(adapter, i) ?
+ rc = !ENA_IS_XDP_INDEX(adapter, qid) ?
check_for_rx_interrupt_queue(adapter, rx_ring) : 0;
if (unlikely(rc))
return;
budget--;
- if (!budget)
- break;
}
- adapter->last_monitored_tx_qid = i % io_queue_count;
+ adapter->last_monitored_tx_qid = qid;
}
/* trigger napi schedule after 2 consecutive detections */
diff --git a/drivers/net/ethernet/amazon/ena/ena_xdp.c b/drivers/net/ethernet/amazon/ena/ena_xdp.c
index 337c435d3ce998..5b175e7e92a10b 100644
--- a/drivers/net/ethernet/amazon/ena/ena_xdp.c
+++ b/drivers/net/ethernet/amazon/ena/ena_xdp.c
@@ -89,7 +89,7 @@ int ena_xdp_xmit_frame(struct ena_ring *tx_ring,
rc = ena_xdp_tx_map_frame(tx_ring, tx_info, xdpf, &ena_tx_ctx);
if (unlikely(rc))
- return rc;
+ goto err;
ena_tx_ctx.req_id = req_id;
@@ -112,7 +112,9 @@ int ena_xdp_xmit_frame(struct ena_ring *tx_ring,
error_unmap_dma:
ena_unmap_tx_buff(tx_ring, tx_info);
+err:
tx_info->xdpf = NULL;
+
return rc;
}
diff --git a/drivers/net/ethernet/amd/pds_core/core.c b/drivers/net/ethernet/amd/pds_core/core.c
index 9662ee72814c0c..536635e5772799 100644
--- a/drivers/net/ethernet/amd/pds_core/core.c
+++ b/drivers/net/ethernet/amd/pds_core/core.c
@@ -593,6 +593,16 @@ err_out:
pdsc_teardown(pdsc, PDSC_TEARDOWN_RECOVERY);
}
+void pdsc_pci_reset_thread(struct work_struct *work)
+{
+ struct pdsc *pdsc = container_of(work, struct pdsc, pci_reset_work);
+ struct pci_dev *pdev = pdsc->pdev;
+
+ pci_dev_get(pdev);
+ pci_reset_function(pdev);
+ pci_dev_put(pdev);
+}
+
static void pdsc_check_pci_health(struct pdsc *pdsc)
{
u8 fw_status;
@@ -607,7 +617,8 @@ static void pdsc_check_pci_health(struct pdsc *pdsc)
if (fw_status != PDS_RC_BAD_PCI)
return;
- pci_reset_function(pdsc->pdev);
+ /* prevent deadlock between pdsc_reset_prepare and pdsc_health_thread */
+ queue_work(pdsc->wq, &pdsc->pci_reset_work);
}
void pdsc_health_thread(struct work_struct *work)
diff --git a/drivers/net/ethernet/amd/pds_core/core.h b/drivers/net/ethernet/amd/pds_core/core.h
index 92d7657dd6147e..a3e17a0c187a6a 100644
--- a/drivers/net/ethernet/amd/pds_core/core.h
+++ b/drivers/net/ethernet/amd/pds_core/core.h
@@ -197,6 +197,7 @@ struct pdsc {
struct pdsc_qcq notifyqcq;
u64 last_eid;
struct pdsc_viftype *viftype_status;
+ struct work_struct pci_reset_work;
};
/** enum pds_core_dbell_bits - bitwise composition of dbell values.
@@ -313,5 +314,6 @@ int pdsc_firmware_update(struct pdsc *pdsc, const struct firmware *fw,
void pdsc_fw_down(struct pdsc *pdsc);
void pdsc_fw_up(struct pdsc *pdsc);
+void pdsc_pci_reset_thread(struct work_struct *work);
#endif /* _PDSC_H_ */
diff --git a/drivers/net/ethernet/amd/pds_core/dev.c b/drivers/net/ethernet/amd/pds_core/dev.c
index e494e1298dc9a3..495ef4ef8c103d 100644
--- a/drivers/net/ethernet/amd/pds_core/dev.c
+++ b/drivers/net/ethernet/amd/pds_core/dev.c
@@ -229,6 +229,9 @@ int pdsc_devcmd_reset(struct pdsc *pdsc)
.reset.opcode = PDS_CORE_CMD_RESET,
};
+ if (!pdsc_is_fw_running(pdsc))
+ return 0;
+
return pdsc_devcmd(pdsc, &cmd, &comp, pdsc->devcmd_timeout);
}
diff --git a/drivers/net/ethernet/amd/pds_core/main.c b/drivers/net/ethernet/amd/pds_core/main.c
index ab6133e7db422d..660268ff95623f 100644
--- a/drivers/net/ethernet/amd/pds_core/main.c
+++ b/drivers/net/ethernet/amd/pds_core/main.c
@@ -239,6 +239,7 @@ static int pdsc_init_pf(struct pdsc *pdsc)
snprintf(wq_name, sizeof(wq_name), "%s.%d", PDS_CORE_DRV_NAME, pdsc->uid);
pdsc->wq = create_singlethread_workqueue(wq_name);
INIT_WORK(&pdsc->health_work, pdsc_health_thread);
+ INIT_WORK(&pdsc->pci_reset_work, pdsc_pci_reset_thread);
timer_setup(&pdsc->wdtimer, pdsc_wdtimer_cb, 0);
pdsc->wdtimer_period = PDSC_WATCHDOG_SECS * HZ;
diff --git a/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c b/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c
index dd06b68b33ed61..82768b0e90262b 100644
--- a/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c
+++ b/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c
@@ -392,7 +392,9 @@ static void umac_reset(struct bcmasp_intf *intf)
umac_wl(intf, 0x0, UMC_CMD);
umac_wl(intf, UMC_CMD_SW_RESET, UMC_CMD);
usleep_range(10, 100);
- umac_wl(intf, 0x0, UMC_CMD);
+ /* We hold the umac in reset and bring it out of
+ * reset when phy link is up.
+ */
}
static void umac_set_hw_addr(struct bcmasp_intf *intf,
@@ -412,6 +414,8 @@ static void umac_enable_set(struct bcmasp_intf *intf, u32 mask,
u32 reg;
reg = umac_rl(intf, UMC_CMD);
+ if (reg & UMC_CMD_SW_RESET)
+ return;
if (enable)
reg |= mask;
else
@@ -430,13 +434,10 @@ static void umac_init(struct bcmasp_intf *intf)
umac_wl(intf, 0x800, UMC_FRM_LEN);
umac_wl(intf, 0xffff, UMC_PAUSE_CNTRL);
umac_wl(intf, 0x800, UMC_RX_MAX_PKT_SZ);
- umac_enable_set(intf, UMC_CMD_PROMISC, 1);
}
-static int bcmasp_tx_poll(struct napi_struct *napi, int budget)
+static int bcmasp_tx_reclaim(struct bcmasp_intf *intf)
{
- struct bcmasp_intf *intf =
- container_of(napi, struct bcmasp_intf, tx_napi);
struct bcmasp_intf_stats64 *stats = &intf->stats64;
struct device *kdev = &intf->parent->pdev->dev;
unsigned long read, released = 0;
@@ -479,10 +480,16 @@ static int bcmasp_tx_poll(struct napi_struct *napi, int budget)
DESC_RING_COUNT);
}
- /* Ensure all descriptors have been written to DRAM for the hardware
- * to see updated contents.
- */
- wmb();
+ return released;
+}
+
+static int bcmasp_tx_poll(struct napi_struct *napi, int budget)
+{
+ struct bcmasp_intf *intf =
+ container_of(napi, struct bcmasp_intf, tx_napi);
+ int released = 0;
+
+ released = bcmasp_tx_reclaim(intf);
napi_complete(&intf->tx_napi);
@@ -658,6 +665,12 @@ static void bcmasp_adj_link(struct net_device *dev)
UMC_CMD_HD_EN | UMC_CMD_RX_PAUSE_IGNORE |
UMC_CMD_TX_PAUSE_IGNORE);
reg |= cmd_bits;
+ if (reg & UMC_CMD_SW_RESET) {
+ reg &= ~UMC_CMD_SW_RESET;
+ umac_wl(intf, reg, UMC_CMD);
+ udelay(2);
+ reg |= UMC_CMD_TX_EN | UMC_CMD_RX_EN | UMC_CMD_PROMISC;
+ }
umac_wl(intf, reg, UMC_CMD);
active = phy_init_eee(phydev, 0) >= 0;
@@ -788,6 +801,7 @@ static void bcmasp_init_tx(struct bcmasp_intf *intf)
intf->tx_spb_dma_read = intf->tx_spb_dma_addr;
intf->tx_spb_index = 0;
intf->tx_spb_clean_index = 0;
+ memset(intf->tx_cbs, 0, sizeof(struct bcmasp_tx_cb) * DESC_RING_COUNT);
/* Make sure channels are disabled */
tx_spb_ctrl_wl(intf, 0x0, TX_SPB_CTRL_ENABLE);
@@ -876,6 +890,8 @@ static void bcmasp_netif_deinit(struct net_device *dev)
} while (timeout-- > 0);
tx_spb_dma_wl(intf, 0x0, TX_SPB_DMA_FIFO_CTRL);
+ bcmasp_tx_reclaim(intf);
+
umac_enable_set(intf, UMC_CMD_TX_EN, 0);
phy_stop(dev->phydev);
@@ -1035,19 +1051,12 @@ static int bcmasp_netif_init(struct net_device *dev, bool phy_connect)
/* Indicate that the MAC is responsible for PHY PM */
phydev->mac_managed_pm = true;
- } else if (!intf->wolopts) {
- ret = phy_resume(dev->phydev);
- if (ret)
- goto err_phy_disable;
}
umac_reset(intf);
umac_init(intf);
- /* Disable the UniMAC RX/TX */
- umac_enable_set(intf, (UMC_CMD_RX_EN | UMC_CMD_TX_EN), 0);
-
umac_set_hw_addr(intf, dev->dev_addr);
intf->old_duplex = -1;
@@ -1062,9 +1071,6 @@ static int bcmasp_netif_init(struct net_device *dev, bool phy_connect)
netif_napi_add(intf->ndev, &intf->rx_napi, bcmasp_rx_poll);
bcmasp_enable_rx(intf, 1);
- /* Turn on UniMAC TX/RX */
- umac_enable_set(intf, (UMC_CMD_RX_EN | UMC_CMD_TX_EN), 1);
-
intf->crc_fwd = !!(umac_rl(intf, UMC_CMD) & UMC_CMD_CRC_FWD);
bcmasp_netif_start(dev);
@@ -1306,7 +1312,14 @@ static void bcmasp_suspend_to_wol(struct bcmasp_intf *intf)
if (intf->wolopts & WAKE_FILTER)
bcmasp_netfilt_suspend(intf);
- /* UniMAC receive needs to be turned on */
+ /* Bring UniMAC out of reset if needed and enable RX */
+ reg = umac_rl(intf, UMC_CMD);
+ if (reg & UMC_CMD_SW_RESET)
+ reg &= ~UMC_CMD_SW_RESET;
+
+ reg |= UMC_CMD_RX_EN | UMC_CMD_PROMISC;
+ umac_wl(intf, reg, UMC_CMD);
+
umac_enable_set(intf, UMC_CMD_RX_EN, 1);
if (intf->parent->wol_irq > 0) {
@@ -1324,7 +1337,6 @@ int bcmasp_interface_suspend(struct bcmasp_intf *intf)
{
struct device *kdev = &intf->parent->pdev->dev;
struct net_device *dev = intf->ndev;
- int ret = 0;
if (!netif_running(dev))
return 0;
@@ -1334,10 +1346,6 @@ int bcmasp_interface_suspend(struct bcmasp_intf *intf)
bcmasp_netif_deinit(dev);
if (!intf->wolopts) {
- ret = phy_suspend(dev->phydev);
- if (ret)
- goto out;
-
if (intf->internal_phy)
bcmasp_ephy_enable_set(intf, false);
else
@@ -1354,11 +1362,7 @@ int bcmasp_interface_suspend(struct bcmasp_intf *intf)
clk_disable_unprepare(intf->parent->clk);
- return ret;
-
-out:
- bcmasp_netif_init(dev, false);
- return ret;
+ return 0;
}
static void bcmasp_resume_from_wol(struct bcmasp_intf *intf)
diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c
index 3e4fb3c3e8342a..1be6d14030bcff 100644
--- a/drivers/net/ethernet/broadcom/b44.c
+++ b/drivers/net/ethernet/broadcom/b44.c
@@ -2009,12 +2009,14 @@ static int b44_set_pauseparam(struct net_device *dev,
bp->flags |= B44_FLAG_TX_PAUSE;
else
bp->flags &= ~B44_FLAG_TX_PAUSE;
- if (bp->flags & B44_FLAG_PAUSE_AUTO) {
- b44_halt(bp);
- b44_init_rings(bp);
- b44_init_hw(bp, B44_FULL_RESET);
- } else {
- __b44_set_flow_ctrl(bp, bp->flags);
+ if (netif_running(dev)) {
+ if (bp->flags & B44_FLAG_PAUSE_AUTO) {
+ b44_halt(bp);
+ b44_init_rings(bp);
+ b44_init_hw(bp, B44_FULL_RESET);
+ } else {
+ __b44_set_flow_ctrl(bp, bp->flags);
+ }
}
spin_unlock_irq(&bp->lock);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 493b724848c8f4..2c2ee79c4d7795 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -1778,7 +1778,7 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp,
skb = bnxt_copy_skb(bnapi, data_ptr, len, mapping);
if (!skb) {
bnxt_abort_tpa(cpr, idx, agg_bufs);
- cpr->sw_stats.rx.rx_oom_discards += 1;
+ cpr->bnapi->cp_ring.sw_stats.rx.rx_oom_discards += 1;
return NULL;
}
} else {
@@ -1788,7 +1788,7 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp,
new_data = __bnxt_alloc_rx_frag(bp, &new_mapping, GFP_ATOMIC);
if (!new_data) {
bnxt_abort_tpa(cpr, idx, agg_bufs);
- cpr->sw_stats.rx.rx_oom_discards += 1;
+ cpr->bnapi->cp_ring.sw_stats.rx.rx_oom_discards += 1;
return NULL;
}
@@ -1804,7 +1804,7 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp,
if (!skb) {
skb_free_frag(data);
bnxt_abort_tpa(cpr, idx, agg_bufs);
- cpr->sw_stats.rx.rx_oom_discards += 1;
+ cpr->bnapi->cp_ring.sw_stats.rx.rx_oom_discards += 1;
return NULL;
}
skb_reserve(skb, bp->rx_offset);
@@ -1815,7 +1815,7 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp,
skb = bnxt_rx_agg_pages_skb(bp, cpr, skb, idx, agg_bufs, true);
if (!skb) {
/* Page reuse already handled by bnxt_rx_pages(). */
- cpr->sw_stats.rx.rx_oom_discards += 1;
+ cpr->bnapi->cp_ring.sw_stats.rx.rx_oom_discards += 1;
return NULL;
}
}
@@ -2094,11 +2094,8 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
u32 frag_len = bnxt_rx_agg_pages_xdp(bp, cpr, &xdp,
cp_cons, agg_bufs,
false);
- if (!frag_len) {
- cpr->sw_stats.rx.rx_oom_discards += 1;
- rc = -ENOMEM;
- goto next_rx;
- }
+ if (!frag_len)
+ goto oom_next_rx;
}
xdp_active = true;
}
@@ -2121,9 +2118,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
else
bnxt_xdp_buff_frags_free(rxr, &xdp);
}
- cpr->sw_stats.rx.rx_oom_discards += 1;
- rc = -ENOMEM;
- goto next_rx;
+ goto oom_next_rx;
}
} else {
u32 payload;
@@ -2134,29 +2129,21 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
payload = 0;
skb = bp->rx_skb_func(bp, rxr, cons, data, data_ptr, dma_addr,
payload | len);
- if (!skb) {
- cpr->sw_stats.rx.rx_oom_discards += 1;
- rc = -ENOMEM;
- goto next_rx;
- }
+ if (!skb)
+ goto oom_next_rx;
}
if (agg_bufs) {
if (!xdp_active) {
skb = bnxt_rx_agg_pages_skb(bp, cpr, skb, cp_cons, agg_bufs, false);
- if (!skb) {
- cpr->sw_stats.rx.rx_oom_discards += 1;
- rc = -ENOMEM;
- goto next_rx;
- }
+ if (!skb)
+ goto oom_next_rx;
} else {
skb = bnxt_xdp_build_skb(bp, skb, agg_bufs, rxr->page_pool, &xdp, rxcmp1);
if (!skb) {
/* we should be able to free the old skb here */
bnxt_xdp_buff_frags_free(rxr, &xdp);
- cpr->sw_stats.rx.rx_oom_discards += 1;
- rc = -ENOMEM;
- goto next_rx;
+ goto oom_next_rx;
}
}
}
@@ -2234,6 +2221,11 @@ next_rx_no_prod_no_len:
*raw_cons = tmp_raw_cons;
return rc;
+
+oom_next_rx:
+ cpr->bnapi->cp_ring.sw_stats.rx.rx_oom_discards += 1;
+ rc = -ENOMEM;
+ goto next_rx;
}
/* In netpoll mode, if we are using a combined completion ring, we need to
@@ -2280,7 +2272,7 @@ static int bnxt_force_rx_discard(struct bnxt *bp,
}
rc = bnxt_rx_pkt(bp, cpr, raw_cons, event);
if (rc && rc != -EBUSY)
- cpr->sw_stats.rx.rx_netpoll_discards += 1;
+ cpr->bnapi->cp_ring.sw_stats.rx.rx_netpoll_discards += 1;
return rc;
}
@@ -9089,7 +9081,7 @@ static void bnxt_try_map_fw_health_reg(struct bnxt *bp)
BNXT_FW_HEALTH_WIN_BASE +
BNXT_GRC_REG_CHIP_NUM);
}
- if (!BNXT_CHIP_P5(bp))
+ if (!BNXT_CHIP_P5_PLUS(bp))
return;
status_loc = BNXT_GRC_REG_STATUS_P5 |
@@ -11758,6 +11750,8 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init)
/* VF-reps may need to be re-opened after the PF is re-opened */
if (BNXT_PF(bp))
bnxt_vf_reps_open(bp);
+ if (bp->ptp_cfg)
+ atomic_set(&bp->ptp_cfg->tx_avail, BNXT_MAX_TX_TS);
bnxt_ptp_init_rtc(bp, true);
bnxt_ptp_cfg_tstamp_filters(bp);
bnxt_cfg_usr_fltrs(bp);
@@ -13035,6 +13029,16 @@ static void bnxt_rx_ring_reset(struct bnxt *bp)
bnxt_rtnl_unlock_sp(bp);
}
+static void bnxt_fw_fatal_close(struct bnxt *bp)
+{
+ bnxt_tx_disable(bp);
+ bnxt_disable_napi(bp);
+ bnxt_disable_int_sync(bp);
+ bnxt_free_irq(bp);
+ bnxt_clear_int_mode(bp);
+ pci_disable_device(bp->pdev);
+}
+
static void bnxt_fw_reset_close(struct bnxt *bp)
{
bnxt_ulp_stop(bp);
@@ -13048,12 +13052,7 @@ static void bnxt_fw_reset_close(struct bnxt *bp)
pci_read_config_word(bp->pdev, PCI_SUBSYSTEM_ID, &val);
if (val == 0xffff)
bp->fw_reset_min_dsecs = 0;
- bnxt_tx_disable(bp);
- bnxt_disable_napi(bp);
- bnxt_disable_int_sync(bp);
- bnxt_free_irq(bp);
- bnxt_clear_int_mode(bp);
- pci_disable_device(bp->pdev);
+ bnxt_fw_fatal_close(bp);
}
__bnxt_close_nic(bp, true, false);
bnxt_vf_reps_free(bp);
@@ -15371,6 +15370,7 @@ static pci_ers_result_t bnxt_io_error_detected(struct pci_dev *pdev,
{
struct net_device *netdev = pci_get_drvdata(pdev);
struct bnxt *bp = netdev_priv(netdev);
+ bool abort = false;
netdev_info(netdev, "PCI I/O error detected\n");
@@ -15379,16 +15379,27 @@ static pci_ers_result_t bnxt_io_error_detected(struct pci_dev *pdev,
bnxt_ulp_stop(bp);
- if (state == pci_channel_io_perm_failure) {
+ if (test_and_set_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) {
+ netdev_err(bp->dev, "Firmware reset already in progress\n");
+ abort = true;
+ }
+
+ if (abort || state == pci_channel_io_perm_failure) {
rtnl_unlock();
return PCI_ERS_RESULT_DISCONNECT;
}
- if (state == pci_channel_io_frozen)
+ /* Link is not reliable anymore if state is pci_channel_io_frozen
+ * so we disable bus master to prevent any potential bad DMAs before
+ * freeing kernel memory.
+ */
+ if (state == pci_channel_io_frozen) {
set_bit(BNXT_STATE_PCI_CHANNEL_IO_FROZEN, &bp->state);
+ bnxt_fw_fatal_close(bp);
+ }
if (netif_running(netdev))
- bnxt_close(netdev);
+ __bnxt_close_nic(bp, true, true);
if (pci_is_enabled(pdev))
pci_disable_device(pdev);
@@ -15472,6 +15483,7 @@ static pci_ers_result_t bnxt_io_slot_reset(struct pci_dev *pdev)
}
reset_exit:
+ clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state);
bnxt_clear_reservations(bp, true);
rtnl_unlock();
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
index 93f9bd55020f27..195c02dc068305 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
@@ -210,6 +210,9 @@ void bnxt_ulp_start(struct bnxt *bp, int err)
if (err)
return;
+ if (edev->ulp_tbl->msix_requested)
+ bnxt_fill_msix_vecs(bp, edev->msix_entries);
+
if (aux_priv) {
struct auxiliary_device *adev;
@@ -392,12 +395,13 @@ void bnxt_rdma_aux_device_init(struct bnxt *bp)
if (!edev)
goto aux_dev_uninit;
+ aux_priv->edev = edev;
+
ulp = kzalloc(sizeof(*ulp), GFP_KERNEL);
if (!ulp)
goto aux_dev_uninit;
edev->ulp_tbl = ulp;
- aux_priv->edev = edev;
bp->edev = edev;
bnxt_set_edev_info(edev, bp);
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index 7396e2823e3285..b1f84b37032a78 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -3280,7 +3280,7 @@ static void bcmgenet_get_hw_addr(struct bcmgenet_priv *priv,
}
/* Returns a reusable dma control register value */
-static u32 bcmgenet_dma_disable(struct bcmgenet_priv *priv)
+static u32 bcmgenet_dma_disable(struct bcmgenet_priv *priv, bool flush_rx)
{
unsigned int i;
u32 reg;
@@ -3305,6 +3305,14 @@ static u32 bcmgenet_dma_disable(struct bcmgenet_priv *priv)
udelay(10);
bcmgenet_umac_writel(priv, 0, UMAC_TX_FLUSH);
+ if (flush_rx) {
+ reg = bcmgenet_rbuf_ctrl_get(priv);
+ bcmgenet_rbuf_ctrl_set(priv, reg | BIT(0));
+ udelay(10);
+ bcmgenet_rbuf_ctrl_set(priv, reg);
+ udelay(10);
+ }
+
return dma_ctrl;
}
@@ -3368,8 +3376,8 @@ static int bcmgenet_open(struct net_device *dev)
bcmgenet_set_hw_addr(priv, dev->dev_addr);
- /* Disable RX/TX DMA and flush TX queues */
- dma_ctrl = bcmgenet_dma_disable(priv);
+ /* Disable RX/TX DMA and flush TX and RX queues */
+ dma_ctrl = bcmgenet_dma_disable(priv, true);
/* Reinitialize TDMA and RDMA and SW housekeeping */
ret = bcmgenet_init_dma(priv);
@@ -4235,7 +4243,7 @@ static int bcmgenet_resume(struct device *d)
bcmgenet_hfb_create_rxnfc_filter(priv, rule);
/* Disable RX/TX DMA and flush TX queues */
- dma_ctrl = bcmgenet_dma_disable(priv);
+ dma_ctrl = bcmgenet_dma_disable(priv, false);
/* Reinitialize TDMA and RDMA and SW housekeeping */
ret = bcmgenet_init_dma(priv);
diff --git a/drivers/net/ethernet/brocade/bna/bnad_debugfs.c b/drivers/net/ethernet/brocade/bna/bnad_debugfs.c
index 7246e13dd559fc..97291bfbeea589 100644
--- a/drivers/net/ethernet/brocade/bna/bnad_debugfs.c
+++ b/drivers/net/ethernet/brocade/bna/bnad_debugfs.c
@@ -312,7 +312,7 @@ bnad_debugfs_write_regrd(struct file *file, const char __user *buf,
void *kern_buf;
/* Copy the user space buf */
- kern_buf = memdup_user(buf, nbytes);
+ kern_buf = memdup_user_nul(buf, nbytes);
if (IS_ERR(kern_buf))
return PTR_ERR(kern_buf);
@@ -372,7 +372,7 @@ bnad_debugfs_write_regwr(struct file *file, const char __user *buf,
void *kern_buf;
/* Copy the user space buf */
- kern_buf = memdup_user(buf, nbytes);
+ kern_buf = memdup_user_nul(buf, nbytes);
if (IS_ERR(kern_buf))
return PTR_ERR(kern_buf);
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index d7693fdf640d53..8bd213da8fb6f5 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -2454,8 +2454,6 @@ static int fec_enet_mii_probe(struct net_device *ndev)
fep->link = 0;
fep->full_duplex = 0;
- phy_dev->mac_managed_pm = true;
-
phy_attached_info(phy_dev);
return 0;
@@ -2467,10 +2465,12 @@ static int fec_enet_mii_init(struct platform_device *pdev)
struct net_device *ndev = platform_get_drvdata(pdev);
struct fec_enet_private *fep = netdev_priv(ndev);
bool suppress_preamble = false;
+ struct phy_device *phydev;
struct device_node *node;
int err = -ENXIO;
u32 mii_speed, holdtime;
u32 bus_freq;
+ int addr;
/*
* The i.MX28 dual fec interfaces are not equal.
@@ -2584,6 +2584,13 @@ static int fec_enet_mii_init(struct platform_device *pdev)
goto err_out_free_mdiobus;
of_node_put(node);
+ /* find all the PHY devices on the bus and set mac_managed_pm to true */
+ for (addr = 0; addr < PHY_MAX_ADDR; addr++) {
+ phydev = mdiobus_get_phy(fep->mii_bus, addr);
+ if (phydev)
+ phydev->mac_managed_pm = true;
+ }
+
mii_cnt++;
/* save fec0 mii_bus */
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_tqp_stats.c b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_tqp_stats.c
index f3c9395d8351cb..618f66d9586b39 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_tqp_stats.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_tqp_stats.c
@@ -85,7 +85,7 @@ int hclge_comm_tqps_update_stats(struct hnae3_handle *handle,
hclge_comm_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_TX_STATS,
true);
- desc.data[0] = cpu_to_le32(tqp->index & 0x1ff);
+ desc.data[0] = cpu_to_le32(tqp->index);
ret = hclge_comm_cmd_send(hw, &desc, 1);
if (ret) {
dev_err(&hw->cmq.csq.pdev->dev,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
index 999a0ee162a644..941cb529d671fb 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
@@ -78,6 +78,9 @@ static const struct hns3_stats hns3_rxq_stats[] = {
#define HNS3_NIC_LB_TEST_NO_MEM_ERR 1
#define HNS3_NIC_LB_TEST_TX_CNT_ERR 2
#define HNS3_NIC_LB_TEST_RX_CNT_ERR 3
+#define HNS3_NIC_LB_TEST_UNEXECUTED 4
+
+static int hns3_get_sset_count(struct net_device *netdev, int stringset);
static int hns3_lp_setup(struct net_device *ndev, enum hnae3_loop loop, bool en)
{
@@ -418,18 +421,26 @@ static void hns3_do_external_lb(struct net_device *ndev,
static void hns3_self_test(struct net_device *ndev,
struct ethtool_test *eth_test, u64 *data)
{
+ int cnt = hns3_get_sset_count(ndev, ETH_SS_TEST);
struct hns3_nic_priv *priv = netdev_priv(ndev);
struct hnae3_handle *h = priv->ae_handle;
int st_param[HNAE3_LOOP_NONE][2];
bool if_running = netif_running(ndev);
+ int i;
+
+ /* initialize the loopback test result, avoid marking an unexcuted
+ * loopback test as PASS.
+ */
+ for (i = 0; i < cnt; i++)
+ data[i] = HNS3_NIC_LB_TEST_UNEXECUTED;
if (hns3_nic_resetting(ndev)) {
netdev_err(ndev, "dev resetting!");
- return;
+ goto failure;
}
if (!(eth_test->flags & ETH_TEST_FL_OFFLINE))
- return;
+ goto failure;
if (netif_msg_ifdown(h))
netdev_info(ndev, "self test start\n");
@@ -451,6 +462,10 @@ static void hns3_self_test(struct net_device *ndev,
if (netif_msg_ifdown(h))
netdev_info(ndev, "self test end\n");
+ return;
+
+failure:
+ eth_test->flags |= ETH_TEST_FL_FAILED;
}
static void hns3_update_limit_promisc_mode(struct net_device *netdev,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index b4afb66efe5c5c..ff6a2ed23ddb6b 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -11626,6 +11626,8 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
if (ret)
goto err_pci_uninit;
+ devl_lock(hdev->devlink);
+
/* Firmware command queue initialize */
ret = hclge_comm_cmd_queue_init(hdev->pdev, &hdev->hw.hw);
if (ret)
@@ -11805,6 +11807,7 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
hclge_task_schedule(hdev, round_jiffies_relative(HZ));
+ devl_unlock(hdev->devlink);
return 0;
err_mdiobus_unreg:
@@ -11817,6 +11820,7 @@ err_msi_uninit:
err_cmd_uninit:
hclge_comm_cmd_uninit(hdev->ae_dev, &hdev->hw.hw);
err_devlink_uninit:
+ devl_unlock(hdev->devlink);
hclge_devlink_uninit(hdev);
err_pci_uninit:
pcim_iounmap(pdev, hdev->hw.hw.io_base);
diff --git a/drivers/net/ethernet/intel/e1000e/hw.h b/drivers/net/ethernet/intel/e1000e/hw.h
index 1fef6bb5a5fbc8..4b6e7536170abc 100644
--- a/drivers/net/ethernet/intel/e1000e/hw.h
+++ b/drivers/net/ethernet/intel/e1000e/hw.h
@@ -628,6 +628,7 @@ struct e1000_phy_info {
u32 id;
u32 reset_delay_us; /* in usec */
u32 revision;
+ u32 retry_count;
enum e1000_media_type media_type;
@@ -644,6 +645,7 @@ struct e1000_phy_info {
bool polarity_correction;
bool speed_downgraded;
bool autoneg_wait_to_complete;
+ bool retry_enabled;
};
struct e1000_nvm_info {
diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c
index 19e450a5bd314f..f9e94be36e97f2 100644
--- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
@@ -222,11 +222,18 @@ out:
if (hw->mac.type >= e1000_pch_lpt) {
/* Only unforce SMBus if ME is not active */
if (!(er32(FWSM) & E1000_ICH_FWSM_FW_VALID)) {
+ /* Switching PHY interface always returns MDI error
+ * so disable retry mechanism to avoid wasting time
+ */
+ e1000e_disable_phy_retry(hw);
+
/* Unforce SMBus mode in PHY */
e1e_rphy_locked(hw, CV_SMB_CTRL, &phy_reg);
phy_reg &= ~CV_SMB_CTRL_FORCE_SMBUS;
e1e_wphy_locked(hw, CV_SMB_CTRL, phy_reg);
+ e1000e_enable_phy_retry(hw);
+
/* Unforce SMBus mode in MAC */
mac_reg = er32(CTRL_EXT);
mac_reg &= ~E1000_CTRL_EXT_FORCE_SMBUS;
@@ -310,6 +317,11 @@ static s32 e1000_init_phy_workarounds_pchlan(struct e1000_hw *hw)
goto out;
}
+ /* There is no guarantee that the PHY is accessible at this time
+ * so disable retry mechanism to avoid wasting time
+ */
+ e1000e_disable_phy_retry(hw);
+
/* The MAC-PHY interconnect may be in SMBus mode. If the PHY is
* inaccessible and resetting the PHY is not blocked, toggle the
* LANPHYPC Value bit to force the interconnect to PCIe mode.
@@ -380,6 +392,8 @@ static s32 e1000_init_phy_workarounds_pchlan(struct e1000_hw *hw)
break;
}
+ e1000e_enable_phy_retry(hw);
+
hw->phy.ops.release(hw);
if (!ret_val) {
@@ -449,6 +463,11 @@ static s32 e1000_init_phy_params_pchlan(struct e1000_hw *hw)
phy->id = e1000_phy_unknown;
+ if (hw->mac.type == e1000_pch_mtp) {
+ phy->retry_count = 2;
+ e1000e_enable_phy_retry(hw);
+ }
+
ret_val = e1000_init_phy_workarounds_pchlan(hw);
if (ret_val)
return ret_val;
@@ -1146,18 +1165,6 @@ s32 e1000_enable_ulp_lpt_lp(struct e1000_hw *hw, bool to_sx)
if (ret_val)
goto out;
- /* Force SMBus mode in PHY */
- ret_val = e1000_read_phy_reg_hv_locked(hw, CV_SMB_CTRL, &phy_reg);
- if (ret_val)
- goto release;
- phy_reg |= CV_SMB_CTRL_FORCE_SMBUS;
- e1000_write_phy_reg_hv_locked(hw, CV_SMB_CTRL, phy_reg);
-
- /* Force SMBus mode in MAC */
- mac_reg = er32(CTRL_EXT);
- mac_reg |= E1000_CTRL_EXT_FORCE_SMBUS;
- ew32(CTRL_EXT, mac_reg);
-
/* Si workaround for ULP entry flow on i127/rev6 h/w. Enable
* LPLU and disable Gig speed when entering ULP
*/
@@ -1313,6 +1320,11 @@ static s32 e1000_disable_ulp_lpt_lp(struct e1000_hw *hw, bool force)
/* Toggle LANPHYPC Value bit */
e1000_toggle_lanphypc_pch_lpt(hw);
+ /* Switching PHY interface always returns MDI error
+ * so disable retry mechanism to avoid wasting time
+ */
+ e1000e_disable_phy_retry(hw);
+
/* Unforce SMBus mode in PHY */
ret_val = e1000_read_phy_reg_hv_locked(hw, CV_SMB_CTRL, &phy_reg);
if (ret_val) {
@@ -1333,6 +1345,8 @@ static s32 e1000_disable_ulp_lpt_lp(struct e1000_hw *hw, bool force)
phy_reg &= ~CV_SMB_CTRL_FORCE_SMBUS;
e1000_write_phy_reg_hv_locked(hw, CV_SMB_CTRL, phy_reg);
+ e1000e_enable_phy_retry(hw);
+
/* Unforce SMBus mode in MAC */
mac_reg = er32(CTRL_EXT);
mac_reg &= ~E1000_CTRL_EXT_FORCE_SMBUS;
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index cc8c531ec3dff3..3692fce201959f 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -6623,6 +6623,7 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool runtime)
struct e1000_hw *hw = &adapter->hw;
u32 ctrl, ctrl_ext, rctl, status, wufc;
int retval = 0;
+ u16 smb_ctrl;
/* Runtime suspend should only enable wakeup for link changes */
if (runtime)
@@ -6696,6 +6697,23 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool runtime)
if (retval)
return retval;
}
+
+ /* Force SMBUS to allow WOL */
+ /* Switching PHY interface always returns MDI error
+ * so disable retry mechanism to avoid wasting time
+ */
+ e1000e_disable_phy_retry(hw);
+
+ e1e_rphy(hw, CV_SMB_CTRL, &smb_ctrl);
+ smb_ctrl |= CV_SMB_CTRL_FORCE_SMBUS;
+ e1e_wphy(hw, CV_SMB_CTRL, smb_ctrl);
+
+ e1000e_enable_phy_retry(hw);
+
+ /* Force SMBus mode in MAC */
+ ctrl_ext = er32(CTRL_EXT);
+ ctrl_ext |= E1000_CTRL_EXT_FORCE_SMBUS;
+ ew32(CTRL_EXT, ctrl_ext);
}
/* Ensure that the appropriate bits are set in LPI_CTRL
diff --git a/drivers/net/ethernet/intel/e1000e/phy.c b/drivers/net/ethernet/intel/e1000e/phy.c
index 5e329156d1bae0..93544f1cc2a51b 100644
--- a/drivers/net/ethernet/intel/e1000e/phy.c
+++ b/drivers/net/ethernet/intel/e1000e/phy.c
@@ -107,6 +107,16 @@ s32 e1000e_phy_reset_dsp(struct e1000_hw *hw)
return e1e_wphy(hw, M88E1000_PHY_GEN_CONTROL, 0);
}
+void e1000e_disable_phy_retry(struct e1000_hw *hw)
+{
+ hw->phy.retry_enabled = false;
+}
+
+void e1000e_enable_phy_retry(struct e1000_hw *hw)
+{
+ hw->phy.retry_enabled = true;
+}
+
/**
* e1000e_read_phy_reg_mdic - Read MDI control register
* @hw: pointer to the HW structure
@@ -118,55 +128,73 @@ s32 e1000e_phy_reset_dsp(struct e1000_hw *hw)
**/
s32 e1000e_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data)
{
+ u32 i, mdic = 0, retry_counter, retry_max;
struct e1000_phy_info *phy = &hw->phy;
- u32 i, mdic = 0;
+ bool success;
if (offset > MAX_PHY_REG_ADDRESS) {
e_dbg("PHY Address %d is out of range\n", offset);
return -E1000_ERR_PARAM;
}
+ retry_max = phy->retry_enabled ? phy->retry_count : 0;
+
/* Set up Op-code, Phy Address, and register offset in the MDI
* Control register. The MAC will take care of interfacing with the
* PHY to retrieve the desired data.
*/
- mdic = ((offset << E1000_MDIC_REG_SHIFT) |
- (phy->addr << E1000_MDIC_PHY_SHIFT) |
- (E1000_MDIC_OP_READ));
+ for (retry_counter = 0; retry_counter <= retry_max; retry_counter++) {
+ success = true;
- ew32(MDIC, mdic);
+ mdic = ((offset << E1000_MDIC_REG_SHIFT) |
+ (phy->addr << E1000_MDIC_PHY_SHIFT) |
+ (E1000_MDIC_OP_READ));
- /* Poll the ready bit to see if the MDI read completed
- * Increasing the time out as testing showed failures with
- * the lower time out
- */
- for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) {
- udelay(50);
- mdic = er32(MDIC);
- if (mdic & E1000_MDIC_READY)
- break;
- }
- if (!(mdic & E1000_MDIC_READY)) {
- e_dbg("MDI Read PHY Reg Address %d did not complete\n", offset);
- return -E1000_ERR_PHY;
- }
- if (mdic & E1000_MDIC_ERROR) {
- e_dbg("MDI Read PHY Reg Address %d Error\n", offset);
- return -E1000_ERR_PHY;
- }
- if (FIELD_GET(E1000_MDIC_REG_MASK, mdic) != offset) {
- e_dbg("MDI Read offset error - requested %d, returned %d\n",
- offset, FIELD_GET(E1000_MDIC_REG_MASK, mdic));
- return -E1000_ERR_PHY;
+ ew32(MDIC, mdic);
+
+ /* Poll the ready bit to see if the MDI read completed
+ * Increasing the time out as testing showed failures with
+ * the lower time out
+ */
+ for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) {
+ usleep_range(50, 60);
+ mdic = er32(MDIC);
+ if (mdic & E1000_MDIC_READY)
+ break;
+ }
+ if (!(mdic & E1000_MDIC_READY)) {
+ e_dbg("MDI Read PHY Reg Address %d did not complete\n",
+ offset);
+ success = false;
+ }
+ if (mdic & E1000_MDIC_ERROR) {
+ e_dbg("MDI Read PHY Reg Address %d Error\n", offset);
+ success = false;
+ }
+ if (FIELD_GET(E1000_MDIC_REG_MASK, mdic) != offset) {
+ e_dbg("MDI Read offset error - requested %d, returned %d\n",
+ offset, FIELD_GET(E1000_MDIC_REG_MASK, mdic));
+ success = false;
+ }
+
+ /* Allow some time after each MDIC transaction to avoid
+ * reading duplicate data in the next MDIC transaction.
+ */
+ if (hw->mac.type == e1000_pch2lan)
+ usleep_range(100, 150);
+
+ if (success) {
+ *data = (u16)mdic;
+ return 0;
+ }
+
+ if (retry_counter != retry_max) {
+ e_dbg("Perform retry on PHY transaction...\n");
+ mdelay(10);
+ }
}
- *data = (u16)mdic;
- /* Allow some time after each MDIC transaction to avoid
- * reading duplicate data in the next MDIC transaction.
- */
- if (hw->mac.type == e1000_pch2lan)
- udelay(100);
- return 0;
+ return -E1000_ERR_PHY;
}
/**
@@ -179,56 +207,72 @@ s32 e1000e_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data)
**/
s32 e1000e_write_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 data)
{
+ u32 i, mdic = 0, retry_counter, retry_max;
struct e1000_phy_info *phy = &hw->phy;
- u32 i, mdic = 0;
+ bool success;
if (offset > MAX_PHY_REG_ADDRESS) {
e_dbg("PHY Address %d is out of range\n", offset);
return -E1000_ERR_PARAM;
}
+ retry_max = phy->retry_enabled ? phy->retry_count : 0;
+
/* Set up Op-code, Phy Address, and register offset in the MDI
* Control register. The MAC will take care of interfacing with the
* PHY to retrieve the desired data.
*/
- mdic = (((u32)data) |
- (offset << E1000_MDIC_REG_SHIFT) |
- (phy->addr << E1000_MDIC_PHY_SHIFT) |
- (E1000_MDIC_OP_WRITE));
+ for (retry_counter = 0; retry_counter <= retry_max; retry_counter++) {
+ success = true;
- ew32(MDIC, mdic);
+ mdic = (((u32)data) |
+ (offset << E1000_MDIC_REG_SHIFT) |
+ (phy->addr << E1000_MDIC_PHY_SHIFT) |
+ (E1000_MDIC_OP_WRITE));
- /* Poll the ready bit to see if the MDI read completed
- * Increasing the time out as testing showed failures with
- * the lower time out
- */
- for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) {
- udelay(50);
- mdic = er32(MDIC);
- if (mdic & E1000_MDIC_READY)
- break;
- }
- if (!(mdic & E1000_MDIC_READY)) {
- e_dbg("MDI Write PHY Reg Address %d did not complete\n", offset);
- return -E1000_ERR_PHY;
- }
- if (mdic & E1000_MDIC_ERROR) {
- e_dbg("MDI Write PHY Red Address %d Error\n", offset);
- return -E1000_ERR_PHY;
- }
- if (FIELD_GET(E1000_MDIC_REG_MASK, mdic) != offset) {
- e_dbg("MDI Write offset error - requested %d, returned %d\n",
- offset, FIELD_GET(E1000_MDIC_REG_MASK, mdic));
- return -E1000_ERR_PHY;
- }
+ ew32(MDIC, mdic);
- /* Allow some time after each MDIC transaction to avoid
- * reading duplicate data in the next MDIC transaction.
- */
- if (hw->mac.type == e1000_pch2lan)
- udelay(100);
+ /* Poll the ready bit to see if the MDI read completed
+ * Increasing the time out as testing showed failures with
+ * the lower time out
+ */
+ for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) {
+ usleep_range(50, 60);
+ mdic = er32(MDIC);
+ if (mdic & E1000_MDIC_READY)
+ break;
+ }
+ if (!(mdic & E1000_MDIC_READY)) {
+ e_dbg("MDI Write PHY Reg Address %d did not complete\n",
+ offset);
+ success = false;
+ }
+ if (mdic & E1000_MDIC_ERROR) {
+ e_dbg("MDI Write PHY Reg Address %d Error\n", offset);
+ success = false;
+ }
+ if (FIELD_GET(E1000_MDIC_REG_MASK, mdic) != offset) {
+ e_dbg("MDI Write offset error - requested %d, returned %d\n",
+ offset, FIELD_GET(E1000_MDIC_REG_MASK, mdic));
+ success = false;
+ }
- return 0;
+ /* Allow some time after each MDIC transaction to avoid
+ * reading duplicate data in the next MDIC transaction.
+ */
+ if (hw->mac.type == e1000_pch2lan)
+ usleep_range(100, 150);
+
+ if (success)
+ return 0;
+
+ if (retry_counter != retry_max) {
+ e_dbg("Perform retry on PHY transaction...\n");
+ mdelay(10);
+ }
+ }
+
+ return -E1000_ERR_PHY;
}
/**
diff --git a/drivers/net/ethernet/intel/e1000e/phy.h b/drivers/net/ethernet/intel/e1000e/phy.h
index c48777d0952352..049bb325b4b14f 100644
--- a/drivers/net/ethernet/intel/e1000e/phy.h
+++ b/drivers/net/ethernet/intel/e1000e/phy.h
@@ -51,6 +51,8 @@ s32 e1000e_read_phy_reg_bm2(struct e1000_hw *hw, u32 offset, u16 *data);
s32 e1000e_write_phy_reg_bm2(struct e1000_hw *hw, u32 offset, u16 data);
void e1000_power_up_phy_copper(struct e1000_hw *hw);
void e1000_power_down_phy_copper(struct e1000_hw *hw);
+void e1000e_disable_phy_retry(struct e1000_hw *hw);
+void e1000e_enable_phy_retry(struct e1000_hw *hw);
s32 e1000e_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data);
s32 e1000e_write_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 data);
s32 e1000_read_phy_reg_hv(struct e1000_hw *hw, u32 offset, u16 *data);
diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index ba24f3fa92c371..2fbabcdb5bb5f3 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -955,6 +955,7 @@ struct i40e_q_vector {
struct rcu_head rcu; /* to avoid race with update stats on free */
char name[I40E_INT_NAME_STR_LEN];
bool arm_wb_state;
+ bool in_busy_poll;
int irq_num; /* IRQ assigned to this q_vector */
} ____cacheline_internodealigned_in_smp;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index f86578857e8aee..ffb9f9f15c5232 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -1253,8 +1253,11 @@ int i40e_count_filters(struct i40e_vsi *vsi)
int bkt;
int cnt = 0;
- hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist)
- ++cnt;
+ hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) {
+ if (f->state == I40E_FILTER_NEW ||
+ f->state == I40E_FILTER_ACTIVE)
+ ++cnt;
+ }
return cnt;
}
@@ -3911,6 +3914,12 @@ static void i40e_vsi_configure_msix(struct i40e_vsi *vsi)
q_vector->tx.target_itr >> 1);
q_vector->tx.current_itr = q_vector->tx.target_itr;
+ /* Set ITR for software interrupts triggered after exiting
+ * busy-loop polling.
+ */
+ wr32(hw, I40E_PFINT_ITRN(I40E_SW_ITR, vector - 1),
+ I40E_ITR_20K);
+
wr32(hw, I40E_PFINT_RATEN(vector - 1),
i40e_intrl_usec_to_reg(vsi->int_rate_limit));
@@ -16098,8 +16107,8 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
val = FIELD_GET(I40E_PRTGL_SAH_MFS_MASK,
rd32(&pf->hw, I40E_PRTGL_SAH));
if (val < MAX_FRAME_SIZE_DEFAULT)
- dev_warn(&pdev->dev, "MFS for port %x has been set below the default: %x\n",
- pf->hw.port, val);
+ dev_warn(&pdev->dev, "MFS for port %x (%d) has been set below the default (%d)\n",
+ pf->hw.port, val, MAX_FRAME_SIZE_DEFAULT);
/* Add a filter to drop all Flow control frames from any VSI from being
* transmitted. By doing so we stop a malicious VF from sending out
@@ -16641,7 +16650,7 @@ static int __init i40e_init_module(void)
* since we need to be able to guarantee forward progress even under
* memory pressure.
*/
- i40e_wq = alloc_workqueue("%s", WQ_MEM_RECLAIM, 0, i40e_driver_name);
+ i40e_wq = alloc_workqueue("%s", 0, 0, i40e_driver_name);
if (!i40e_wq) {
pr_err("%s: Failed to create workqueue\n", i40e_driver_name);
return -ENOMEM;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_register.h b/drivers/net/ethernet/intel/i40e/i40e_register.h
index 14ab642cafdb26..432afbb6420137 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_register.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_register.h
@@ -333,8 +333,11 @@
#define I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT 3
#define I40E_PFINT_DYN_CTLN_ITR_INDX_MASK I40E_MASK(0x3, I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT)
#define I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT 5
+#define I40E_PFINT_DYN_CTLN_INTERVAL_MASK I40E_MASK(0xFFF, I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT)
#define I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_SHIFT 24
#define I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_MASK I40E_MASK(0x1, I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_SHIFT)
+#define I40E_PFINT_DYN_CTLN_SW_ITR_INDX_SHIFT 25
+#define I40E_PFINT_DYN_CTLN_SW_ITR_INDX_MASK I40E_MASK(0x3, I40E_PFINT_DYN_CTLN_SW_ITR_INDX_SHIFT)
#define I40E_PFINT_ICR0 0x00038780 /* Reset: CORER */
#define I40E_PFINT_ICR0_INTEVENT_SHIFT 0
#define I40E_PFINT_ICR0_INTEVENT_MASK I40E_MASK(0x1, I40E_PFINT_ICR0_INTEVENT_SHIFT)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 0d7177083708f2..1a12b732818eef 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -2630,7 +2630,22 @@ process_next:
return failure ? budget : (int)total_rx_packets;
}
-static inline u32 i40e_buildreg_itr(const int type, u16 itr)
+/**
+ * i40e_buildreg_itr - build a value for writing to I40E_PFINT_DYN_CTLN register
+ * @itr_idx: interrupt throttling index
+ * @interval: interrupt throttling interval value in usecs
+ * @force_swint: force software interrupt
+ *
+ * The function builds a value for I40E_PFINT_DYN_CTLN register that
+ * is used to update interrupt throttling interval for specified ITR index
+ * and optionally enforces a software interrupt. If the @itr_idx is equal
+ * to I40E_ITR_NONE then no interval change is applied and only @force_swint
+ * parameter is taken into account. If the interval change and enforced
+ * software interrupt are not requested then the built value just enables
+ * appropriate vector interrupt.
+ **/
+static u32 i40e_buildreg_itr(enum i40e_dyn_idx itr_idx, u16 interval,
+ bool force_swint)
{
u32 val;
@@ -2644,23 +2659,33 @@ static inline u32 i40e_buildreg_itr(const int type, u16 itr)
* an event in the PBA anyway so we need to rely on the automask
* to hold pending events for us until the interrupt is re-enabled
*
- * The itr value is reported in microseconds, and the register
- * value is recorded in 2 microsecond units. For this reason we
- * only need to shift by the interval shift - 1 instead of the
- * full value.
+ * We have to shift the given value as it is reported in microseconds
+ * and the register value is recorded in 2 microsecond units.
*/
- itr &= I40E_ITR_MASK;
+ interval >>= 1;
+ /* 1. Enable vector interrupt
+ * 2. Update the interval for the specified ITR index
+ * (I40E_ITR_NONE in the register is used to indicate that
+ * no interval update is requested)
+ */
val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
- (type << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) |
- (itr << (I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT - 1));
+ FIELD_PREP(I40E_PFINT_DYN_CTLN_ITR_INDX_MASK, itr_idx) |
+ FIELD_PREP(I40E_PFINT_DYN_CTLN_INTERVAL_MASK, interval);
+
+ /* 3. Enforce software interrupt trigger if requested
+ * (These software interrupts rate is limited by ITR2 that is
+ * set to 20K interrupts per second)
+ */
+ if (force_swint)
+ val |= I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK |
+ I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_MASK |
+ FIELD_PREP(I40E_PFINT_DYN_CTLN_SW_ITR_INDX_MASK,
+ I40E_SW_ITR);
return val;
}
-/* a small macro to shorten up some long lines */
-#define INTREG I40E_PFINT_DYN_CTLN
-
/* The act of updating the ITR will cause it to immediately trigger. In order
* to prevent this from throwing off adaptive update statistics we defer the
* update so that it can only happen so often. So after either Tx or Rx are
@@ -2679,8 +2704,10 @@ static inline u32 i40e_buildreg_itr(const int type, u16 itr)
static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
struct i40e_q_vector *q_vector)
{
+ enum i40e_dyn_idx itr_idx = I40E_ITR_NONE;
struct i40e_hw *hw = &vsi->back->hw;
- u32 intval;
+ u16 interval = 0;
+ u32 itr_val;
/* If we don't have MSIX, then we only need to re-enable icr0 */
if (!test_bit(I40E_FLAG_MSIX_ENA, vsi->back->flags)) {
@@ -2702,8 +2729,8 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
*/
if (q_vector->rx.target_itr < q_vector->rx.current_itr) {
/* Rx ITR needs to be reduced, this is highest priority */
- intval = i40e_buildreg_itr(I40E_RX_ITR,
- q_vector->rx.target_itr);
+ itr_idx = I40E_RX_ITR;
+ interval = q_vector->rx.target_itr;
q_vector->rx.current_itr = q_vector->rx.target_itr;
q_vector->itr_countdown = ITR_COUNTDOWN_START;
} else if ((q_vector->tx.target_itr < q_vector->tx.current_itr) ||
@@ -2712,25 +2739,36 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
/* Tx ITR needs to be reduced, this is second priority
* Tx ITR needs to be increased more than Rx, fourth priority
*/
- intval = i40e_buildreg_itr(I40E_TX_ITR,
- q_vector->tx.target_itr);
+ itr_idx = I40E_TX_ITR;
+ interval = q_vector->tx.target_itr;
q_vector->tx.current_itr = q_vector->tx.target_itr;
q_vector->itr_countdown = ITR_COUNTDOWN_START;
} else if (q_vector->rx.current_itr != q_vector->rx.target_itr) {
/* Rx ITR needs to be increased, third priority */
- intval = i40e_buildreg_itr(I40E_RX_ITR,
- q_vector->rx.target_itr);
+ itr_idx = I40E_RX_ITR;
+ interval = q_vector->rx.target_itr;
q_vector->rx.current_itr = q_vector->rx.target_itr;
q_vector->itr_countdown = ITR_COUNTDOWN_START;
} else {
/* No ITR update, lowest priority */
- intval = i40e_buildreg_itr(I40E_ITR_NONE, 0);
if (q_vector->itr_countdown)
q_vector->itr_countdown--;
}
- if (!test_bit(__I40E_VSI_DOWN, vsi->state))
- wr32(hw, INTREG(q_vector->reg_idx), intval);
+ /* Do not update interrupt control register if VSI is down */
+ if (test_bit(__I40E_VSI_DOWN, vsi->state))
+ return;
+
+ /* Update ITR interval if necessary and enforce software interrupt
+ * if we are exiting busy poll.
+ */
+ if (q_vector->in_busy_poll) {
+ itr_val = i40e_buildreg_itr(itr_idx, interval, true);
+ q_vector->in_busy_poll = false;
+ } else {
+ itr_val = i40e_buildreg_itr(itr_idx, interval, false);
+ }
+ wr32(hw, I40E_PFINT_DYN_CTLN(q_vector->reg_idx), itr_val);
}
/**
@@ -2845,6 +2883,8 @@ tx_only:
*/
if (likely(napi_complete_done(napi, work_done)))
i40e_update_enable_itr(vsi, q_vector);
+ else
+ q_vector->in_busy_poll = true;
return min(work_done, budget - 1);
}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
index abf15067eb5de4..2cdc7de6301c13 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
@@ -68,6 +68,7 @@ enum i40e_dyn_idx {
/* these are indexes into ITRN registers */
#define I40E_RX_ITR I40E_IDX_ITR0
#define I40E_TX_ITR I40E_IDX_ITR1
+#define I40E_SW_ITR I40E_IDX_ITR2
/* Supported RSS offloads */
#define I40E_DEFAULT_RSS_HENA ( \
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index 83a34e98bdc79d..232b65b9c8eacd 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -1624,8 +1624,8 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr)
{
struct i40e_hw *hw = &pf->hw;
struct i40e_vf *vf;
- int i, v;
u32 reg;
+ int i;
/* If we don't have any VFs, then there is nothing to reset */
if (!pf->num_alloc_vfs)
@@ -1636,11 +1636,10 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr)
return false;
/* Begin reset on all VFs at once */
- for (v = 0; v < pf->num_alloc_vfs; v++) {
- vf = &pf->vf[v];
+ for (vf = &pf->vf[0]; vf < &pf->vf[pf->num_alloc_vfs]; ++vf) {
/* If VF is being reset no need to trigger reset again */
if (!test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states))
- i40e_trigger_vf_reset(&pf->vf[v], flr);
+ i40e_trigger_vf_reset(vf, flr);
}
/* HW requires some time to make sure it can flush the FIFO for a VF
@@ -1649,14 +1648,13 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr)
* the VFs using a simple iterator that increments once that VF has
* finished resetting.
*/
- for (i = 0, v = 0; i < 10 && v < pf->num_alloc_vfs; i++) {
+ for (i = 0, vf = &pf->vf[0]; i < 10 && vf < &pf->vf[pf->num_alloc_vfs]; ++i) {
usleep_range(10000, 20000);
/* Check each VF in sequence, beginning with the VF to fail
* the previous check.
*/
- while (v < pf->num_alloc_vfs) {
- vf = &pf->vf[v];
+ while (vf < &pf->vf[pf->num_alloc_vfs]) {
if (!test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states)) {
reg = rd32(hw, I40E_VPGEN_VFRSTAT(vf->vf_id));
if (!(reg & I40E_VPGEN_VFRSTAT_VFRD_MASK))
@@ -1666,7 +1664,7 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr)
/* If the current VF has finished resetting, move on
* to the next VF in sequence.
*/
- v++;
+ ++vf;
}
}
@@ -1676,39 +1674,39 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr)
/* Display a warning if at least one VF didn't manage to reset in
* time, but continue on with the operation.
*/
- if (v < pf->num_alloc_vfs)
+ if (vf < &pf->vf[pf->num_alloc_vfs])
dev_err(&pf->pdev->dev, "VF reset check timeout on VF %d\n",
- pf->vf[v].vf_id);
+ vf->vf_id);
usleep_range(10000, 20000);
/* Begin disabling all the rings associated with VFs, but do not wait
* between each VF.
*/
- for (v = 0; v < pf->num_alloc_vfs; v++) {
+ for (vf = &pf->vf[0]; vf < &pf->vf[pf->num_alloc_vfs]; ++vf) {
/* On initial reset, we don't have any queues to disable */
- if (pf->vf[v].lan_vsi_idx == 0)
+ if (vf->lan_vsi_idx == 0)
continue;
/* If VF is reset in another thread just continue */
if (test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states))
continue;
- i40e_vsi_stop_rings_no_wait(pf->vsi[pf->vf[v].lan_vsi_idx]);
+ i40e_vsi_stop_rings_no_wait(pf->vsi[vf->lan_vsi_idx]);
}
/* Now that we've notified HW to disable all of the VF rings, wait
* until they finish.
*/
- for (v = 0; v < pf->num_alloc_vfs; v++) {
+ for (vf = &pf->vf[0]; vf < &pf->vf[pf->num_alloc_vfs]; ++vf) {
/* On initial reset, we don't have any queues to disable */
- if (pf->vf[v].lan_vsi_idx == 0)
+ if (vf->lan_vsi_idx == 0)
continue;
/* If VF is reset in another thread just continue */
if (test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states))
continue;
- i40e_vsi_wait_queues_disabled(pf->vsi[pf->vf[v].lan_vsi_idx]);
+ i40e_vsi_wait_queues_disabled(pf->vsi[vf->lan_vsi_idx]);
}
/* Hw may need up to 50ms to finish disabling the RX queues. We
@@ -1717,12 +1715,12 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr)
mdelay(50);
/* Finish the reset on each VF */
- for (v = 0; v < pf->num_alloc_vfs; v++) {
+ for (vf = &pf->vf[0]; vf < &pf->vf[pf->num_alloc_vfs]; ++vf) {
/* If VF is reset in another thread just continue */
if (test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states))
continue;
- i40e_cleanup_reset_vf(&pf->vf[v]);
+ i40e_cleanup_reset_vf(vf);
}
i40e_flush(hw);
@@ -3139,11 +3137,12 @@ static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg)
/* Allow to delete VF primary MAC only if it was not set
* administratively by PF or if VF is trusted.
*/
- if (ether_addr_equal(addr, vf->default_lan_addr.addr) &&
- i40e_can_vf_change_mac(vf))
- was_unimac_deleted = true;
- else
- continue;
+ if (ether_addr_equal(addr, vf->default_lan_addr.addr)) {
+ if (i40e_can_vf_change_mac(vf))
+ was_unimac_deleted = true;
+ else
+ continue;
+ }
if (i40e_del_mac_filter(vsi, al->list[i].addr)) {
ret = -EINVAL;
diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
index ef2440f3abf8b6..166832a4213a28 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
@@ -3503,6 +3503,34 @@ static void iavf_del_all_cloud_filters(struct iavf_adapter *adapter)
}
/**
+ * iavf_is_tc_config_same - Compare the mqprio TC config with the
+ * TC config already configured on this adapter.
+ * @adapter: board private structure
+ * @mqprio_qopt: TC config received from kernel.
+ *
+ * This function compares the TC config received from the kernel
+ * with the config already configured on the adapter.
+ *
+ * Return: True if configuration is same, false otherwise.
+ **/
+static bool iavf_is_tc_config_same(struct iavf_adapter *adapter,
+ struct tc_mqprio_qopt *mqprio_qopt)
+{
+ struct virtchnl_channel_info *ch = &adapter->ch_config.ch_info[0];
+ int i;
+
+ if (adapter->num_tc != mqprio_qopt->num_tc)
+ return false;
+
+ for (i = 0; i < adapter->num_tc; i++) {
+ if (ch[i].count != mqprio_qopt->count[i] ||
+ ch[i].offset != mqprio_qopt->offset[i])
+ return false;
+ }
+ return true;
+}
+
+/**
* __iavf_setup_tc - configure multiple traffic classes
* @netdev: network interface device structure
* @type_data: tc offload data
@@ -3559,7 +3587,7 @@ static int __iavf_setup_tc(struct net_device *netdev, void *type_data)
if (ret)
return ret;
/* Return if same TC config is requested */
- if (adapter->num_tc == num_tc)
+ if (iavf_is_tc_config_same(adapter, &mqprio_qopt->qopt))
return 0;
adapter->num_tc = num_tc;
diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
index 8040317c95617e..1f3e7a6903e56e 100644
--- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
@@ -593,8 +593,9 @@ struct ice_aqc_recipe_data_elem {
struct ice_aqc_recipe_to_profile {
__le16 profile_id;
u8 rsvd[6];
- DECLARE_BITMAP(recipe_assoc, ICE_MAX_NUM_RECIPES);
+ __le64 recipe_assoc;
};
+static_assert(sizeof(struct ice_aqc_recipe_to_profile) == 16);
/* Add/Update/Remove/Get switch rules (indirect 0x02A0, 0x02A1, 0x02A2, 0x02A3)
*/
diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index db4b2844e1f718..d9f6cc71d900aa 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -1002,8 +1002,8 @@ static void ice_get_itr_intrl_gran(struct ice_hw *hw)
*/
int ice_init_hw(struct ice_hw *hw)
{
- struct ice_aqc_get_phy_caps_data *pcaps __free(kfree);
- void *mac_buf __free(kfree);
+ struct ice_aqc_get_phy_caps_data *pcaps __free(kfree) = NULL;
+ void *mac_buf __free(kfree) = NULL;
u16 mac_buf_len;
int status;
@@ -3272,7 +3272,7 @@ int ice_update_link_info(struct ice_port_info *pi)
return status;
if (li->link_info & ICE_AQ_MEDIA_AVAILABLE) {
- struct ice_aqc_get_phy_caps_data *pcaps __free(kfree);
+ struct ice_aqc_get_phy_caps_data *pcaps __free(kfree) = NULL;
pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL);
if (!pcaps)
@@ -3420,7 +3420,7 @@ ice_cfg_phy_fc(struct ice_port_info *pi, struct ice_aqc_set_phy_cfg_data *cfg,
int
ice_set_fc(struct ice_port_info *pi, u8 *aq_failures, bool ena_auto_link_update)
{
- struct ice_aqc_get_phy_caps_data *pcaps __free(kfree);
+ struct ice_aqc_get_phy_caps_data *pcaps __free(kfree) = NULL;
struct ice_aqc_set_phy_cfg_data cfg = { 0 };
struct ice_hw *hw;
int status;
@@ -3561,7 +3561,7 @@ int
ice_cfg_phy_fec(struct ice_port_info *pi, struct ice_aqc_set_phy_cfg_data *cfg,
enum ice_fec_mode fec)
{
- struct ice_aqc_get_phy_caps_data *pcaps __free(kfree);
+ struct ice_aqc_get_phy_caps_data *pcaps __free(kfree) = NULL;
struct ice_hw *hw;
int status;
diff --git a/drivers/net/ethernet/intel/ice/ice_debugfs.c b/drivers/net/ethernet/intel/ice/ice_debugfs.c
index d252d98218d084..9fc0fd95a13d8f 100644
--- a/drivers/net/ethernet/intel/ice/ice_debugfs.c
+++ b/drivers/net/ethernet/intel/ice/ice_debugfs.c
@@ -171,7 +171,7 @@ ice_debugfs_module_write(struct file *filp, const char __user *buf,
if (*ppos != 0 || count > 8)
return -EINVAL;
- cmd_buf = memdup_user(buf, count);
+ cmd_buf = memdup_user_nul(buf, count);
if (IS_ERR(cmd_buf))
return PTR_ERR(cmd_buf);
@@ -257,7 +257,7 @@ ice_debugfs_nr_messages_write(struct file *filp, const char __user *buf,
if (*ppos != 0 || count > 4)
return -EINVAL;
- cmd_buf = memdup_user(buf, count);
+ cmd_buf = memdup_user_nul(buf, count);
if (IS_ERR(cmd_buf))
return PTR_ERR(cmd_buf);
@@ -332,7 +332,7 @@ ice_debugfs_enable_write(struct file *filp, const char __user *buf,
if (*ppos != 0 || count > 2)
return -EINVAL;
- cmd_buf = memdup_user(buf, count);
+ cmd_buf = memdup_user_nul(buf, count);
if (IS_ERR(cmd_buf))
return PTR_ERR(cmd_buf);
@@ -428,7 +428,7 @@ ice_debugfs_log_size_write(struct file *filp, const char __user *buf,
if (*ppos != 0 || count > 5)
return -EINVAL;
- cmd_buf = memdup_user(buf, count);
+ cmd_buf = memdup_user_nul(buf, count);
if (IS_ERR(cmd_buf))
return PTR_ERR(cmd_buf);
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index 255a9c8151b451..78b833b3e1d7ef 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -941,11 +941,11 @@ static u64 ice_loopback_test(struct net_device *netdev)
struct ice_netdev_priv *np = netdev_priv(netdev);
struct ice_vsi *orig_vsi = np->vsi, *test_vsi;
struct ice_pf *pf = orig_vsi->back;
+ u8 *tx_frame __free(kfree) = NULL;
u8 broadcast[ETH_ALEN], ret = 0;
int num_frames, valid_frames;
struct ice_tx_ring *tx_ring;
struct ice_rx_ring *rx_ring;
- u8 *tx_frame __free(kfree);
int i;
netdev_info(netdev, "loopback test\n");
diff --git a/drivers/net/ethernet/intel/ice/ice_lag.c b/drivers/net/ethernet/intel/ice/ice_lag.c
index f97128b69f87ec..f0e76f0a6d6031 100644
--- a/drivers/net/ethernet/intel/ice/ice_lag.c
+++ b/drivers/net/ethernet/intel/ice/ice_lag.c
@@ -2041,7 +2041,7 @@ int ice_init_lag(struct ice_pf *pf)
/* associate recipes to profiles */
for (n = 0; n < ICE_PROFID_IPV6_GTPU_IPV6_TCP_INNER; n++) {
err = ice_aq_get_recipe_to_profile(&pf->hw, n,
- (u8 *)&recipe_bits, NULL);
+ &recipe_bits, NULL);
if (err)
continue;
@@ -2049,7 +2049,7 @@ int ice_init_lag(struct ice_pf *pf)
recipe_bits |= BIT(lag->pf_recipe) |
BIT(lag->lport_recipe);
ice_aq_map_recipe_to_profile(&pf->hw, n,
- (u8 *)&recipe_bits, NULL);
+ recipe_bits, NULL);
}
}
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index ee3f0d3e3f6dbd..558422120312ba 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -3091,7 +3091,7 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags)
{
struct ice_vsi_cfg_params params = {};
struct ice_coalesce_stored *coalesce;
- int prev_num_q_vectors = 0;
+ int prev_num_q_vectors;
struct ice_pf *pf;
int ret;
@@ -3105,13 +3105,6 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags)
if (WARN_ON(vsi->type == ICE_VSI_VF && !vsi->vf))
return -EINVAL;
- coalesce = kcalloc(vsi->num_q_vectors,
- sizeof(struct ice_coalesce_stored), GFP_KERNEL);
- if (!coalesce)
- return -ENOMEM;
-
- prev_num_q_vectors = ice_vsi_rebuild_get_coalesce(vsi, coalesce);
-
ret = ice_vsi_realloc_stat_arrays(vsi);
if (ret)
goto err_vsi_cfg;
@@ -3121,6 +3114,13 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags)
if (ret)
goto err_vsi_cfg;
+ coalesce = kcalloc(vsi->num_q_vectors,
+ sizeof(struct ice_coalesce_stored), GFP_KERNEL);
+ if (!coalesce)
+ return -ENOMEM;
+
+ prev_num_q_vectors = ice_vsi_rebuild_get_coalesce(vsi, coalesce);
+
ret = ice_vsi_cfg_tc_lan(pf, vsi);
if (ret) {
if (vsi_flags & ICE_VSI_FLAG_INIT) {
@@ -3139,8 +3139,8 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags)
err_vsi_cfg_tc_lan:
ice_vsi_decfg(vsi);
-err_vsi_cfg:
kfree(coalesce);
+err_vsi_cfg:
return ret;
}
diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c
index d4baae8c3b720a..b4ea935e830054 100644
--- a/drivers/net/ethernet/intel/ice/ice_switch.c
+++ b/drivers/net/ethernet/intel/ice/ice_switch.c
@@ -2025,12 +2025,12 @@ error_out:
* ice_aq_map_recipe_to_profile - Map recipe to packet profile
* @hw: pointer to the HW struct
* @profile_id: package profile ID to associate the recipe with
- * @r_bitmap: Recipe bitmap filled in and need to be returned as response
+ * @r_assoc: Recipe bitmap filled in and need to be returned as response
* @cd: pointer to command details structure or NULL
* Recipe to profile association (0x0291)
*/
int
-ice_aq_map_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap,
+ice_aq_map_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u64 r_assoc,
struct ice_sq_cd *cd)
{
struct ice_aqc_recipe_to_profile *cmd;
@@ -2042,7 +2042,7 @@ ice_aq_map_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap,
/* Set the recipe ID bit in the bitmask to let the device know which
* profile we are associating the recipe to
*/
- memcpy(cmd->recipe_assoc, r_bitmap, sizeof(cmd->recipe_assoc));
+ cmd->recipe_assoc = cpu_to_le64(r_assoc);
return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
}
@@ -2051,12 +2051,12 @@ ice_aq_map_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap,
* ice_aq_get_recipe_to_profile - Map recipe to packet profile
* @hw: pointer to the HW struct
* @profile_id: package profile ID to associate the recipe with
- * @r_bitmap: Recipe bitmap filled in and need to be returned as response
+ * @r_assoc: Recipe bitmap filled in and need to be returned as response
* @cd: pointer to command details structure or NULL
* Associate profile ID with given recipe (0x0293)
*/
int
-ice_aq_get_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap,
+ice_aq_get_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u64 *r_assoc,
struct ice_sq_cd *cd)
{
struct ice_aqc_recipe_to_profile *cmd;
@@ -2069,7 +2069,7 @@ ice_aq_get_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap,
status = ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
if (!status)
- memcpy(r_bitmap, cmd->recipe_assoc, sizeof(cmd->recipe_assoc));
+ *r_assoc = le64_to_cpu(cmd->recipe_assoc);
return status;
}
@@ -2108,6 +2108,7 @@ int ice_alloc_recipe(struct ice_hw *hw, u16 *rid)
static void ice_get_recp_to_prof_map(struct ice_hw *hw)
{
DECLARE_BITMAP(r_bitmap, ICE_MAX_NUM_RECIPES);
+ u64 recp_assoc;
u16 i;
for (i = 0; i < hw->switch_info->max_used_prof_index + 1; i++) {
@@ -2115,8 +2116,9 @@ static void ice_get_recp_to_prof_map(struct ice_hw *hw)
bitmap_zero(profile_to_recipe[i], ICE_MAX_NUM_RECIPES);
bitmap_zero(r_bitmap, ICE_MAX_NUM_RECIPES);
- if (ice_aq_get_recipe_to_profile(hw, i, (u8 *)r_bitmap, NULL))
+ if (ice_aq_get_recipe_to_profile(hw, i, &recp_assoc, NULL))
continue;
+ bitmap_from_arr64(r_bitmap, &recp_assoc, ICE_MAX_NUM_RECIPES);
bitmap_copy(profile_to_recipe[i], r_bitmap,
ICE_MAX_NUM_RECIPES);
for_each_set_bit(j, r_bitmap, ICE_MAX_NUM_RECIPES)
@@ -5390,22 +5392,24 @@ ice_add_adv_recipe(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups,
*/
list_for_each_entry(fvit, &rm->fv_list, list_entry) {
DECLARE_BITMAP(r_bitmap, ICE_MAX_NUM_RECIPES);
+ u64 recp_assoc;
u16 j;
status = ice_aq_get_recipe_to_profile(hw, fvit->profile_id,
- (u8 *)r_bitmap, NULL);
+ &recp_assoc, NULL);
if (status)
goto err_unroll;
+ bitmap_from_arr64(r_bitmap, &recp_assoc, ICE_MAX_NUM_RECIPES);
bitmap_or(r_bitmap, r_bitmap, rm->r_bitmap,
ICE_MAX_NUM_RECIPES);
status = ice_acquire_change_lock(hw, ICE_RES_WRITE);
if (status)
goto err_unroll;
+ bitmap_to_arr64(&recp_assoc, r_bitmap, ICE_MAX_NUM_RECIPES);
status = ice_aq_map_recipe_to_profile(hw, fvit->profile_id,
- (u8 *)r_bitmap,
- NULL);
+ recp_assoc, NULL);
ice_release_change_lock(hw);
if (status)
diff --git a/drivers/net/ethernet/intel/ice/ice_switch.h b/drivers/net/ethernet/intel/ice/ice_switch.h
index db7e501b7e0a48..89ffa1b51b5ad1 100644
--- a/drivers/net/ethernet/intel/ice/ice_switch.h
+++ b/drivers/net/ethernet/intel/ice/ice_switch.h
@@ -424,10 +424,10 @@ int ice_aq_add_recipe(struct ice_hw *hw,
struct ice_aqc_recipe_data_elem *s_recipe_list,
u16 num_recipes, struct ice_sq_cd *cd);
int
-ice_aq_get_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap,
+ice_aq_get_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u64 *r_assoc,
struct ice_sq_cd *cd);
int
-ice_aq_map_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap,
+ice_aq_map_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u64 r_assoc,
struct ice_sq_cd *cd);
#endif /* _ICE_SWITCH_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.c b/drivers/net/ethernet/intel/ice/ice_tc_lib.c
index b890410a2bc0ba..688ccb0615ab9f 100644
--- a/drivers/net/ethernet/intel/ice/ice_tc_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.c
@@ -28,6 +28,8 @@ ice_tc_count_lkups(u32 flags, struct ice_tc_flower_lyr_2_4_hdrs *headers,
* - ICE_TC_FLWR_FIELD_VLAN_TPID (present if specified)
* - Tunnel flag (present if tunnel)
*/
+ if (fltr->direction == ICE_ESWITCH_FLTR_EGRESS)
+ lkups_cnt++;
if (flags & ICE_TC_FLWR_FIELD_TENANT_ID)
lkups_cnt++;
@@ -363,6 +365,11 @@ ice_tc_fill_rules(struct ice_hw *hw, u32 flags,
/* Always add direction metadata */
ice_rule_add_direction_metadata(&list[ICE_TC_METADATA_LKUP_IDX]);
+ if (tc_fltr->direction == ICE_ESWITCH_FLTR_EGRESS) {
+ ice_rule_add_src_vsi_metadata(&list[i]);
+ i++;
+ }
+
rule_info->tun_type = ice_sw_type_from_tunnel(tc_fltr->tunnel_type);
if (tc_fltr->tunnel_type != TNL_LAST) {
i = ice_tc_fill_tunnel_outer(flags, tc_fltr, list, i);
@@ -772,7 +779,7 @@ ice_eswitch_add_tc_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr)
int ret;
int i;
- if (!flags || (flags & ICE_TC_FLWR_FIELD_ENC_SRC_L4_PORT)) {
+ if (flags & ICE_TC_FLWR_FIELD_ENC_SRC_L4_PORT) {
NL_SET_ERR_MSG_MOD(fltr->extack, "Unsupported encap field(s)");
return -EOPNOTSUPP;
}
@@ -820,6 +827,7 @@ ice_eswitch_add_tc_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr)
/* specify the cookie as filter_rule_id */
rule_info.fltr_rule_id = fltr->cookie;
+ rule_info.src_vsi = vsi->idx;
ret = ice_add_adv_rule(hw, list, lkups_cnt, &rule_info, &rule_added);
if (ret == -EEXIST) {
@@ -1481,7 +1489,10 @@ ice_parse_cls_flower(struct net_device *filter_dev, struct ice_vsi *vsi,
(BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) |
BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) |
BIT_ULL(FLOW_DISSECTOR_KEY_ENC_KEYID) |
- BIT_ULL(FLOW_DISSECTOR_KEY_ENC_PORTS))) {
+ BIT_ULL(FLOW_DISSECTOR_KEY_ENC_PORTS) |
+ BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IP) |
+ BIT_ULL(FLOW_DISSECTOR_KEY_ENC_OPTS) |
+ BIT_ULL(FLOW_DISSECTOR_KEY_ENC_CONTROL))) {
NL_SET_ERR_MSG_MOD(fltr->extack, "Tunnel key used, but device isn't a tunnel");
return -EOPNOTSUPP;
} else {
diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.c b/drivers/net/ethernet/intel/ice/ice_vf_lib.c
index 21d26e19338a69..d10a4be965b591 100644
--- a/drivers/net/ethernet/intel/ice/ice_vf_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.c
@@ -856,6 +856,11 @@ int ice_reset_vf(struct ice_vf *vf, u32 flags)
return 0;
}
+ if (flags & ICE_VF_RESET_LOCK)
+ mutex_lock(&vf->cfg_lock);
+ else
+ lockdep_assert_held(&vf->cfg_lock);
+
lag = pf->lag;
mutex_lock(&pf->lag_mutex);
if (lag && lag->bonded && lag->primary) {
@@ -867,11 +872,6 @@ int ice_reset_vf(struct ice_vf *vf, u32 flags)
act_prt = ICE_LAG_INVALID_PORT;
}
- if (flags & ICE_VF_RESET_LOCK)
- mutex_lock(&vf->cfg_lock);
- else
- lockdep_assert_held(&vf->cfg_lock);
-
if (ice_is_vf_disabled(vf)) {
vsi = ice_get_vf_vsi(vf);
if (!vsi) {
@@ -956,14 +956,14 @@ int ice_reset_vf(struct ice_vf *vf, u32 flags)
ice_mbx_clear_malvf(&vf->mbx_info);
out_unlock:
- if (flags & ICE_VF_RESET_LOCK)
- mutex_unlock(&vf->cfg_lock);
-
if (lag && lag->bonded && lag->primary &&
act_prt != ICE_LAG_INVALID_PORT)
ice_lag_move_vf_nodes_cfg(lag, pri_prt, act_prt);
mutex_unlock(&pf->lag_mutex);
+ if (flags & ICE_VF_RESET_LOCK)
+ mutex_unlock(&vf->cfg_lock);
+
return err;
}
diff --git a/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c b/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c
index 80dc4bcdd3a41c..b3e1bdcb80f84d 100644
--- a/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c
+++ b/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c
@@ -26,24 +26,22 @@ static void ice_port_vlan_on(struct ice_vsi *vsi)
struct ice_vsi_vlan_ops *vlan_ops;
struct ice_pf *pf = vsi->back;
- if (ice_is_dvm_ena(&pf->hw)) {
- vlan_ops = &vsi->outer_vlan_ops;
-
- /* setup outer VLAN ops */
- vlan_ops->set_port_vlan = ice_vsi_set_outer_port_vlan;
- vlan_ops->clear_port_vlan = ice_vsi_clear_outer_port_vlan;
+ /* setup inner VLAN ops */
+ vlan_ops = &vsi->inner_vlan_ops;
- /* setup inner VLAN ops */
- vlan_ops = &vsi->inner_vlan_ops;
+ if (ice_is_dvm_ena(&pf->hw)) {
vlan_ops->add_vlan = noop_vlan_arg;
vlan_ops->del_vlan = noop_vlan_arg;
vlan_ops->ena_stripping = ice_vsi_ena_inner_stripping;
vlan_ops->dis_stripping = ice_vsi_dis_inner_stripping;
vlan_ops->ena_insertion = ice_vsi_ena_inner_insertion;
vlan_ops->dis_insertion = ice_vsi_dis_inner_insertion;
- } else {
- vlan_ops = &vsi->inner_vlan_ops;
+ /* setup outer VLAN ops */
+ vlan_ops = &vsi->outer_vlan_ops;
+ vlan_ops->set_port_vlan = ice_vsi_set_outer_port_vlan;
+ vlan_ops->clear_port_vlan = ice_vsi_clear_outer_port_vlan;
+ } else {
vlan_ops->set_port_vlan = ice_vsi_set_inner_port_vlan;
vlan_ops->clear_port_vlan = ice_vsi_clear_inner_port_vlan;
}
diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
index 6dd7a66bb8979a..f5bc4a2780745e 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
@@ -2941,6 +2941,8 @@ static int idpf_rx_process_skb_fields(struct idpf_queue *rxq,
rx_ptype = le16_get_bits(rx_desc->ptype_err_fflags0,
VIRTCHNL2_RX_FLEX_DESC_ADV_PTYPE_M);
+ skb->protocol = eth_type_trans(skb, rxq->vport->netdev);
+
decoded = rxq->vport->rx_ptype_lkup[rx_ptype];
/* If we don't know the ptype we can't do anything else with it. Just
* pass it up the stack as-is.
@@ -2951,8 +2953,6 @@ static int idpf_rx_process_skb_fields(struct idpf_queue *rxq,
/* process RSS/hash */
idpf_rx_hash(rxq, skb, rx_desc, &decoded);
- skb->protocol = eth_type_trans(skb, rxq->vport->netdev);
-
if (le16_get_bits(rx_desc->hdrlen_flags,
VIRTCHNL2_RX_FLEX_DESC_ADV_RSC_M))
return idpf_rx_rsc(rxq, skb, rx_desc, &decoded);
diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
index 90316dc5863087..6bc56c7c181e48 100644
--- a/drivers/net/ethernet/intel/igc/igc.h
+++ b/drivers/net/ethernet/intel/igc/igc.h
@@ -298,6 +298,7 @@ struct igc_adapter {
/* LEDs */
struct mutex led_mutex;
+ struct igc_led_classdev *leds;
};
void igc_up(struct igc_adapter *adapter);
@@ -723,6 +724,7 @@ void igc_ptp_read(struct igc_adapter *adapter, struct timespec64 *ts);
void igc_ptp_tx_tstamp_event(struct igc_adapter *adapter);
int igc_led_setup(struct igc_adapter *adapter);
+void igc_led_free(struct igc_adapter *adapter);
#define igc_rx_pg_size(_ring) (PAGE_SIZE << igc_rx_pg_order(_ring))
diff --git a/drivers/net/ethernet/intel/igc/igc_leds.c b/drivers/net/ethernet/intel/igc/igc_leds.c
index bf240c5daf8657..3929b25b6ae6eb 100644
--- a/drivers/net/ethernet/intel/igc/igc_leds.c
+++ b/drivers/net/ethernet/intel/igc/igc_leds.c
@@ -236,8 +236,8 @@ static void igc_led_get_name(struct igc_adapter *adapter, int index, char *buf,
pci_dev_id(adapter->pdev), index);
}
-static void igc_setup_ldev(struct igc_led_classdev *ldev,
- struct net_device *netdev, int index)
+static int igc_setup_ldev(struct igc_led_classdev *ldev,
+ struct net_device *netdev, int index)
{
struct igc_adapter *adapter = netdev_priv(netdev);
struct led_classdev *led_cdev = &ldev->led;
@@ -257,24 +257,46 @@ static void igc_setup_ldev(struct igc_led_classdev *ldev,
led_cdev->hw_control_get = igc_led_hw_control_get;
led_cdev->hw_control_get_device = igc_led_hw_control_get_device;
- devm_led_classdev_register(&netdev->dev, led_cdev);
+ return led_classdev_register(&netdev->dev, led_cdev);
}
int igc_led_setup(struct igc_adapter *adapter)
{
struct net_device *netdev = adapter->netdev;
- struct device *dev = &netdev->dev;
struct igc_led_classdev *leds;
- int i;
+ int i, err;
mutex_init(&adapter->led_mutex);
- leds = devm_kcalloc(dev, IGC_NUM_LEDS, sizeof(*leds), GFP_KERNEL);
+ leds = kcalloc(IGC_NUM_LEDS, sizeof(*leds), GFP_KERNEL);
if (!leds)
return -ENOMEM;
- for (i = 0; i < IGC_NUM_LEDS; i++)
- igc_setup_ldev(leds + i, netdev, i);
+ for (i = 0; i < IGC_NUM_LEDS; i++) {
+ err = igc_setup_ldev(leds + i, netdev, i);
+ if (err)
+ goto err;
+ }
+
+ adapter->leds = leds;
return 0;
+
+err:
+ for (i--; i >= 0; i--)
+ led_classdev_unregister(&((leds + i)->led));
+
+ kfree(leds);
+ return err;
+}
+
+void igc_led_free(struct igc_adapter *adapter)
+{
+ struct igc_led_classdev *leds = adapter->leds;
+ int i;
+
+ for (i = 0; i < IGC_NUM_LEDS; i++)
+ led_classdev_unregister(&((leds + i)->led));
+
+ kfree(leds);
}
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 2e1cfbd82f4fd5..4d975d620a8e4b 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -1642,10 +1642,6 @@ done:
if (unlikely(test_bit(IGC_RING_FLAG_TX_HWTSTAMP, &tx_ring->flags) &&
skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
- /* FIXME: add support for retrieving timestamps from
- * the other timer registers before skipping the
- * timestamping request.
- */
unsigned long flags;
u32 tstamp_flags;
@@ -7025,6 +7021,9 @@ static void igc_remove(struct pci_dev *pdev)
cancel_work_sync(&adapter->watchdog_task);
hrtimer_cancel(&adapter->hrtimer);
+ if (IS_ENABLED(CONFIG_IGC_LEDS))
+ igc_led_free(adapter);
+
/* Release control of h/w to f/w. If f/w is AMT enabled, this
* would have already happened in close and is redundant.
*/
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
index 13a6fca31004a8..866024f2b9eeb3 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
@@ -914,7 +914,13 @@ int ixgbe_ipsec_vf_add_sa(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf)
goto err_out;
}
- xs = kzalloc(sizeof(*xs), GFP_KERNEL);
+ algo = xfrm_aead_get_byname(aes_gcm_name, IXGBE_IPSEC_AUTH_BITS, 1);
+ if (unlikely(!algo)) {
+ err = -ENOENT;
+ goto err_out;
+ }
+
+ xs = kzalloc(sizeof(*xs), GFP_ATOMIC);
if (unlikely(!xs)) {
err = -ENOMEM;
goto err_out;
@@ -930,14 +936,8 @@ int ixgbe_ipsec_vf_add_sa(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf)
memcpy(&xs->id.daddr.a4, sam->addr, sizeof(xs->id.daddr.a4));
xs->xso.dev = adapter->netdev;
- algo = xfrm_aead_get_byname(aes_gcm_name, IXGBE_IPSEC_AUTH_BITS, 1);
- if (unlikely(!algo)) {
- err = -ENOENT;
- goto err_xs;
- }
-
aead_len = sizeof(*xs->aead) + IXGBE_IPSEC_KEY_BITS / 8;
- xs->aead = kzalloc(aead_len, GFP_KERNEL);
+ xs->aead = kzalloc(aead_len, GFP_ATOMIC);
if (unlikely(!xs->aead)) {
err = -ENOMEM;
goto err_xs;
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_pfvf_mbox.c b/drivers/net/ethernet/marvell/octeon_ep/octep_pfvf_mbox.c
index 2e2c3be8a0b429..e6eb98d70f3c42 100644
--- a/drivers/net/ethernet/marvell/octeon_ep/octep_pfvf_mbox.c
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_pfvf_mbox.c
@@ -15,6 +15,7 @@
#include <linux/io.h>
#include <linux/pci.h>
#include <linux/etherdevice.h>
+#include <linux/vmalloc.h>
#include "octep_config.h"
#include "octep_main.h"
diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_mbox.c b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_mbox.c
index 2eab21e43048a1..445b626efe11d7 100644
--- a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_mbox.c
+++ b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_mbox.c
@@ -7,6 +7,7 @@
#include <linux/types.h>
#include <linux/pci.h>
#include <linux/netdevice.h>
+#include <linux/vmalloc.h>
#include "octep_vf_config.h"
#include "octep_vf_main.h"
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
index 3c0f55b3e48ea4..b86f3224f0b783 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
@@ -808,6 +808,11 @@ static int cgx_lmac_enadis_pause_frm(void *cgxd, int lmac_id,
if (!is_lmac_valid(cgx, lmac_id))
return -ENODEV;
+ cfg = cgx_read(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL);
+ cfg &= ~CGX_GMP_GMI_RXX_FRM_CTL_CTL_BCK;
+ cfg |= rx_pause ? CGX_GMP_GMI_RXX_FRM_CTL_CTL_BCK : 0x0;
+ cgx_write(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL, cfg);
+
cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL);
cfg &= ~CGX_SMUX_RX_FRM_CTL_CTL_BCK;
cfg |= rx_pause ? CGX_SMUX_RX_FRM_CTL_CTL_BCK : 0x0;
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
index 72e060cf6b6181..e9bf9231b0185d 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
@@ -160,6 +160,8 @@ static int rvu_map_cgx_lmac_pf(struct rvu *rvu)
continue;
lmac_bmap = cgx_get_lmac_bmap(rvu_cgx_pdata(cgx, rvu));
for_each_set_bit(iter, &lmac_bmap, rvu->hw->lmac_per_cgx) {
+ if (iter >= MAX_LMAC_COUNT)
+ continue;
lmac = cgx_get_lmacid(rvu_cgx_pdata(cgx, rvu),
iter);
rvu->pf2cgxlmac_map[pf] = cgxlmac_id_to_bmap(cgx, lmac);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
index 2500f5ba4f5a42..881d704644fbee 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
@@ -999,12 +999,10 @@ static ssize_t rvu_dbg_qsize_write(struct file *filp,
u16 pcifunc;
int ret, lf;
- cmd_buf = memdup_user(buffer, count + 1);
+ cmd_buf = memdup_user_nul(buffer, count);
if (IS_ERR(cmd_buf))
return -ENOMEM;
- cmd_buf[count] = '\0';
-
cmd_buf_tmp = strchr(cmd_buf, '\n');
if (cmd_buf_tmp) {
*cmd_buf_tmp = '\0';
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
index d39001cdc707ee..00af8888e3291a 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
@@ -4819,18 +4819,18 @@ static int rvu_nix_block_init(struct rvu *rvu, struct nix_hw *nix_hw)
*/
rvu_write64(rvu, blkaddr, NIX_AF_CFG,
rvu_read64(rvu, blkaddr, NIX_AF_CFG) | 0x40ULL);
+ }
- /* Set chan/link to backpressure TL3 instead of TL2 */
- rvu_write64(rvu, blkaddr, NIX_AF_PSE_CHANNEL_LEVEL, 0x01);
+ /* Set chan/link to backpressure TL3 instead of TL2 */
+ rvu_write64(rvu, blkaddr, NIX_AF_PSE_CHANNEL_LEVEL, 0x01);
- /* Disable SQ manager's sticky mode operation (set TM6 = 0)
- * This sticky mode is known to cause SQ stalls when multiple
- * SQs are mapped to same SMQ and transmitting pkts at a time.
- */
- cfg = rvu_read64(rvu, blkaddr, NIX_AF_SQM_DBG_CTL_STATUS);
- cfg &= ~BIT_ULL(15);
- rvu_write64(rvu, blkaddr, NIX_AF_SQM_DBG_CTL_STATUS, cfg);
- }
+ /* Disable SQ manager's sticky mode operation (set TM6 = 0)
+ * This sticky mode is known to cause SQ stalls when multiple
+ * SQs are mapped to same SMQ and transmitting pkts at a time.
+ */
+ cfg = rvu_read64(rvu, blkaddr, NIX_AF_SQM_DBG_CTL_STATUS);
+ cfg &= ~BIT_ULL(15);
+ rvu_write64(rvu, blkaddr, NIX_AF_SQM_DBG_CTL_STATUS, cfg);
ltdefs = rvu->kpu.lt_def;
/* Calibrate X2P bus to check if CGX/LBK links are fine */
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
index e350242bbafbad..e8b73b9d75e311 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
@@ -1657,7 +1657,7 @@ static int npc_fwdb_detect_load_prfl_img(struct rvu *rvu, uint64_t prfl_sz,
struct npc_coalesced_kpu_prfl *img_data = NULL;
int i = 0, rc = -EINVAL;
void __iomem *kpu_prfl_addr;
- u16 offset;
+ u32 offset;
img_data = (struct npc_coalesced_kpu_prfl __force *)rvu->kpu_prfl_addr;
if (le64_to_cpu(img_data->signature) == KPU_SIGN &&
@@ -2181,7 +2181,6 @@ void rvu_npc_freemem(struct rvu *rvu)
kfree(pkind->rsrc.bmap);
npc_mcam_rsrcs_deinit(rvu);
- kfree(mcam->counters.bmap);
if (rvu->kpu_prfl_addr)
iounmap(rvu->kpu_prfl_addr);
else
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
index b40bd0e4675148..3f46d5e0fb2ecb 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
@@ -1933,7 +1933,7 @@ int otx2_open(struct net_device *netdev)
* mcam entries are enabled to receive the packets. Hence disable the
* packet I/O.
*/
- if (err == EIO)
+ if (err == -EIO)
goto err_disable_rxtx;
else if (err)
goto err_tx_stop_queues;
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
index 87bdb93cb066e9..f4655a8c0705d7 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
@@ -689,6 +689,7 @@ static int otx2_tc_prepare_flow(struct otx2_nic *nic, struct otx2_tc_flow *node,
if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
struct flow_match_control match;
+ u32 val;
flow_rule_match_control(rule, &match);
if (match.mask->flags & FLOW_DIS_FIRST_FRAG) {
@@ -697,12 +698,14 @@ static int otx2_tc_prepare_flow(struct otx2_nic *nic, struct otx2_tc_flow *node,
}
if (match.mask->flags & FLOW_DIS_IS_FRAGMENT) {
+ val = match.key->flags & FLOW_DIS_IS_FRAGMENT;
if (ntohs(flow_spec->etype) == ETH_P_IP) {
- flow_spec->ip_flag = IPV4_FLAG_MORE;
+ flow_spec->ip_flag = val ? IPV4_FLAG_MORE : 0;
flow_mask->ip_flag = IPV4_FLAG_MORE;
req->features |= BIT_ULL(NPC_IPFRAG_IPV4);
} else if (ntohs(flow_spec->etype) == ETH_P_IPV6) {
- flow_spec->next_header = IPPROTO_FRAGMENT;
+ flow_spec->next_header = val ?
+ IPPROTO_FRAGMENT : 0;
flow_mask->next_header = 0xff;
req->features |= BIT_ULL(NPC_IPFRAG_IPV6);
} else {
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/qos.c b/drivers/net/ethernet/marvell/octeontx2/nic/qos.c
index 1e77bbf5d22a1a..1723e9912ae07c 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/qos.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/qos.c
@@ -382,6 +382,7 @@ static void otx2_qos_read_txschq_cfg_tl(struct otx2_qos_node *parent,
otx2_qos_read_txschq_cfg_tl(node, cfg);
cnt = cfg->static_node_pos[node->level];
cfg->schq_contig_list[node->level][cnt] = node->schq;
+ cfg->schq_index_used[node->level][cnt] = true;
cfg->schq_contig[node->level]++;
cfg->static_node_pos[node->level]++;
otx2_qos_read_txschq_cfg_schq(node, cfg);
diff --git a/drivers/net/ethernet/mediatek/mtk_wed.c b/drivers/net/ethernet/mediatek/mtk_wed.c
index c895e265ae0ebc..61334a71058c75 100644
--- a/drivers/net/ethernet/mediatek/mtk_wed.c
+++ b/drivers/net/ethernet/mediatek/mtk_wed.c
@@ -1074,13 +1074,13 @@ mtk_wed_dma_disable(struct mtk_wed_device *dev)
static void
mtk_wed_stop(struct mtk_wed_device *dev)
{
+ mtk_wed_dma_disable(dev);
mtk_wed_set_ext_int(dev, false);
wed_w32(dev, MTK_WED_WPDMA_INT_TRIGGER, 0);
wed_w32(dev, MTK_WED_WDMA_INT_TRIGGER, 0);
wdma_w32(dev, MTK_WDMA_INT_MASK, 0);
wdma_w32(dev, MTK_WDMA_INT_GRP2, 0);
- wed_w32(dev, MTK_WED_WPDMA_INT_MASK, 0);
if (!mtk_wed_get_rx_capa(dev))
return;
@@ -1093,7 +1093,6 @@ static void
mtk_wed_deinit(struct mtk_wed_device *dev)
{
mtk_wed_stop(dev);
- mtk_wed_dma_disable(dev);
wed_clr(dev, MTK_WED_CTRL,
MTK_WED_CTRL_WDMA_INT_AGENT_EN |
@@ -2605,9 +2604,6 @@ mtk_wed_irq_get(struct mtk_wed_device *dev, u32 mask)
static void
mtk_wed_irq_set_mask(struct mtk_wed_device *dev, u32 mask)
{
- if (!dev->running)
- return;
-
mtk_wed_set_ext_int(dev, !!mask);
wed_w32(dev, MTK_WED_INT_MASK, mask);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h
index 86f1854698b4e8..883c044852f1df 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h
@@ -95,9 +95,15 @@ static inline void mlx5e_ptp_metadata_fifo_push(struct mlx5e_ptp_metadata_fifo *
}
static inline u8
+mlx5e_ptp_metadata_fifo_peek(struct mlx5e_ptp_metadata_fifo *fifo)
+{
+ return fifo->data[fifo->mask & fifo->cc];
+}
+
+static inline void
mlx5e_ptp_metadata_fifo_pop(struct mlx5e_ptp_metadata_fifo *fifo)
{
- return fifo->data[fifo->mask & fifo->cc++];
+ fifo->cc++;
}
static inline void
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c
index e87e26f2c669c2..6743806b848060 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c
@@ -83,24 +83,25 @@ int mlx5e_open_qos_sq(struct mlx5e_priv *priv, struct mlx5e_channels *chs,
txq_ix = mlx5e_qid_from_qos(chs, node_qid);
- WARN_ON(node_qid > priv->htb_max_qos_sqs);
- if (node_qid == priv->htb_max_qos_sqs) {
- struct mlx5e_sq_stats *stats, **stats_list = NULL;
-
- if (priv->htb_max_qos_sqs == 0) {
- stats_list = kvcalloc(mlx5e_qos_max_leaf_nodes(priv->mdev),
- sizeof(*stats_list),
- GFP_KERNEL);
- if (!stats_list)
- return -ENOMEM;
- }
+ WARN_ON(node_qid >= mlx5e_htb_cur_leaf_nodes(priv->htb));
+ if (!priv->htb_qos_sq_stats) {
+ struct mlx5e_sq_stats **stats_list;
+
+ stats_list = kvcalloc(mlx5e_qos_max_leaf_nodes(priv->mdev),
+ sizeof(*stats_list), GFP_KERNEL);
+ if (!stats_list)
+ return -ENOMEM;
+
+ WRITE_ONCE(priv->htb_qos_sq_stats, stats_list);
+ }
+
+ if (!priv->htb_qos_sq_stats[node_qid]) {
+ struct mlx5e_sq_stats *stats;
+
stats = kzalloc(sizeof(*stats), GFP_KERNEL);
- if (!stats) {
- kvfree(stats_list);
+ if (!stats)
return -ENOMEM;
- }
- if (stats_list)
- WRITE_ONCE(priv->htb_qos_sq_stats, stats_list);
+
WRITE_ONCE(priv->htb_qos_sq_stats[node_qid], stats);
/* Order htb_max_qos_sqs increment after writing the array pointer.
* Pairs with smp_load_acquire in en_stats.c.
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
index 0ab9db31953025..22918b2ef7f128 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
@@ -108,7 +108,10 @@ static int mlx5e_tx_reporter_err_cqe_recover(void *ctx)
mlx5e_reset_txqsq_cc_pc(sq);
sq->stats->recover++;
clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state);
+ rtnl_lock();
mlx5e_activate_txqsq(sq);
+ rtnl_unlock();
+
if (sq->channel)
mlx5e_trigger_napi_icosq(sq->channel);
else
@@ -179,12 +182,16 @@ static int mlx5e_tx_reporter_ptpsq_unhealthy_recover(void *ctx)
carrier_ok = netif_carrier_ok(netdev);
netif_carrier_off(netdev);
+ rtnl_lock();
mlx5e_deactivate_priv_channels(priv);
+ rtnl_unlock();
mlx5e_ptp_close(chs->ptp);
err = mlx5e_ptp_open(priv, &chs->params, chs->c[0]->lag_port, &chs->ptp);
+ rtnl_lock();
mlx5e_activate_priv_channels(priv);
+ rtnl_unlock();
/* return carrier back if needed */
if (carrier_ok)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c
index bcafb4bf94154f..8d9a3b5ec973b3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c
@@ -179,6 +179,13 @@ u32 mlx5e_rqt_size(struct mlx5_core_dev *mdev, unsigned int num_channels)
return min_t(u32, rqt_size, max_cap_rqt_size);
}
+#define MLX5E_MAX_RQT_SIZE_ALLOWED_WITH_XOR8_HASH 256
+
+unsigned int mlx5e_rqt_max_num_channels_allowed_for_xor8(void)
+{
+ return MLX5E_MAX_RQT_SIZE_ALLOWED_WITH_XOR8_HASH / MLX5E_UNIFORM_SPREAD_RQT_FACTOR;
+}
+
void mlx5e_rqt_destroy(struct mlx5e_rqt *rqt)
{
mlx5_core_destroy_rqt(rqt->mdev, rqt->rqtn);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h
index e0bc30308c7700..2f9e04a8418f14 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h
@@ -38,6 +38,7 @@ static inline u32 mlx5e_rqt_get_rqtn(struct mlx5e_rqt *rqt)
}
u32 mlx5e_rqt_size(struct mlx5_core_dev *mdev, unsigned int num_channels);
+unsigned int mlx5e_rqt_max_num_channels_allowed_for_xor8(void);
int mlx5e_rqt_redirect_direct(struct mlx5e_rqt *rqt, u32 rqn, u32 *vhca_id);
int mlx5e_rqt_redirect_indir(struct mlx5e_rqt *rqt, u32 *rqns, u32 *vhca_ids,
unsigned int num_rqns,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/selq.c b/drivers/net/ethernet/mellanox/mlx5/core/en/selq.c
index f675b1926340f9..f66bbc8464645e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/selq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/selq.c
@@ -57,6 +57,7 @@ int mlx5e_selq_init(struct mlx5e_selq *selq, struct mutex *state_lock)
void mlx5e_selq_cleanup(struct mlx5e_selq *selq)
{
+ mutex_lock(selq->state_lock);
WARN_ON_ONCE(selq->is_prepared);
kvfree(selq->standby);
@@ -67,6 +68,7 @@ void mlx5e_selq_cleanup(struct mlx5e_selq *selq)
kvfree(selq->standby);
selq->standby = NULL;
+ mutex_unlock(selq->state_lock);
}
void mlx5e_selq_prepare_params(struct mlx5e_selq *selq, struct mlx5e_params *params)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c
index b2cabd6ab86cb9..cc9bcc42003242 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c
@@ -1640,6 +1640,7 @@ static const struct macsec_ops macsec_offload_ops = {
.mdo_add_secy = mlx5e_macsec_add_secy,
.mdo_upd_secy = mlx5e_macsec_upd_secy,
.mdo_del_secy = mlx5e_macsec_del_secy,
+ .rx_uses_md_dst = true,
};
bool mlx5e_macsec_handle_tx_skb(struct mlx5e_macsec *macsec, struct sk_buff *skb)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
index c7f542d0b8f08c..93cf23278d93c2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
@@ -46,6 +46,10 @@ struct arfs_table {
struct hlist_head rules_hash[ARFS_HASH_SIZE];
};
+enum {
+ MLX5E_ARFS_STATE_ENABLED,
+};
+
enum arfs_type {
ARFS_IPV4_TCP,
ARFS_IPV6_TCP,
@@ -60,6 +64,7 @@ struct mlx5e_arfs_tables {
spinlock_t arfs_lock;
int last_filter_id;
struct workqueue_struct *wq;
+ unsigned long state;
};
struct arfs_tuple {
@@ -170,6 +175,8 @@ int mlx5e_arfs_enable(struct mlx5e_flow_steering *fs)
return err;
}
}
+ set_bit(MLX5E_ARFS_STATE_ENABLED, &arfs->state);
+
return 0;
}
@@ -455,6 +462,8 @@ static void arfs_del_rules(struct mlx5e_flow_steering *fs)
int i;
int j;
+ clear_bit(MLX5E_ARFS_STATE_ENABLED, &arfs->state);
+
spin_lock_bh(&arfs->arfs_lock);
mlx5e_for_each_arfs_rule(rule, htmp, arfs->arfs_tables, i, j) {
hlist_del_init(&rule->hlist);
@@ -627,17 +636,8 @@ static void arfs_handle_work(struct work_struct *work)
struct mlx5_flow_handle *rule;
arfs = mlx5e_fs_get_arfs(priv->fs);
- mutex_lock(&priv->state_lock);
- if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
- spin_lock_bh(&arfs->arfs_lock);
- hlist_del(&arfs_rule->hlist);
- spin_unlock_bh(&arfs->arfs_lock);
-
- mutex_unlock(&priv->state_lock);
- kfree(arfs_rule);
- goto out;
- }
- mutex_unlock(&priv->state_lock);
+ if (!test_bit(MLX5E_ARFS_STATE_ENABLED, &arfs->state))
+ return;
if (!arfs_rule->rule) {
rule = arfs_add_rule(priv, arfs_rule);
@@ -753,6 +753,11 @@ int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
return -EPROTONOSUPPORT;
spin_lock_bh(&arfs->arfs_lock);
+ if (!test_bit(MLX5E_ARFS_STATE_ENABLED, &arfs->state)) {
+ spin_unlock_bh(&arfs->arfs_lock);
+ return -EPERM;
+ }
+
arfs_rule = arfs_find_rule(arfs_t, &fk);
if (arfs_rule) {
if (arfs_rule->rxq == rxq_index || work_busy(&arfs_rule->arfs_work)) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index cc51ce16df14ab..67a29826bb5702 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -451,6 +451,34 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv,
mutex_lock(&priv->state_lock);
+ if (mlx5e_rx_res_get_current_hash(priv->rx_res).hfunc == ETH_RSS_HASH_XOR) {
+ unsigned int xor8_max_channels = mlx5e_rqt_max_num_channels_allowed_for_xor8();
+
+ if (count > xor8_max_channels) {
+ err = -EINVAL;
+ netdev_err(priv->netdev, "%s: Requested number of channels (%d) exceeds the maximum allowed by the XOR8 RSS hfunc (%d)\n",
+ __func__, count, xor8_max_channels);
+ goto out;
+ }
+ }
+
+ /* If RXFH is configured, changing the channels number is allowed only if
+ * it does not require resizing the RSS table. This is because the previous
+ * configuration may no longer be compatible with the new RSS table.
+ */
+ if (netif_is_rxfh_configured(priv->netdev)) {
+ int cur_rqt_size = mlx5e_rqt_size(priv->mdev, cur_params->num_channels);
+ int new_rqt_size = mlx5e_rqt_size(priv->mdev, count);
+
+ if (new_rqt_size != cur_rqt_size) {
+ err = -EINVAL;
+ netdev_err(priv->netdev,
+ "%s: RXFH is configured, block changing channels number that affects RSS table size (new: %d, current: %d)\n",
+ __func__, new_rqt_size, cur_rqt_size);
+ goto out;
+ }
+ }
+
/* Don't allow changing the number of channels if HTB offload is active,
* because the numeration of the QoS SQs will change, while per-queue
* qdiscs are attached.
@@ -561,12 +589,12 @@ static int mlx5e_get_coalesce(struct net_device *netdev,
static void
mlx5e_set_priv_channels_tx_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal)
{
- struct mlx5_core_dev *mdev = priv->mdev;
int tc;
int i;
for (i = 0; i < priv->channels.num; ++i) {
struct mlx5e_channel *c = priv->channels.c[i];
+ struct mlx5_core_dev *mdev = c->mdev;
for (tc = 0; tc < c->num_tc; tc++) {
mlx5_core_modify_cq_moderation(mdev,
@@ -580,11 +608,11 @@ mlx5e_set_priv_channels_tx_coalesce(struct mlx5e_priv *priv, struct ethtool_coal
static void
mlx5e_set_priv_channels_rx_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal)
{
- struct mlx5_core_dev *mdev = priv->mdev;
int i;
for (i = 0; i < priv->channels.num; ++i) {
struct mlx5e_channel *c = priv->channels.c[i];
+ struct mlx5_core_dev *mdev = c->mdev;
mlx5_core_modify_cq_moderation(mdev, &c->rq.cq.mcq,
coal->rx_coalesce_usecs,
@@ -1281,17 +1309,30 @@ int mlx5e_set_rxfh(struct net_device *dev, struct ethtool_rxfh_param *rxfh,
struct mlx5e_priv *priv = netdev_priv(dev);
u32 *rss_context = &rxfh->rss_context;
u8 hfunc = rxfh->hfunc;
+ unsigned int count;
int err;
mutex_lock(&priv->state_lock);
+
+ count = priv->channels.params.num_channels;
+
+ if (hfunc == ETH_RSS_HASH_XOR) {
+ unsigned int xor8_max_channels = mlx5e_rqt_max_num_channels_allowed_for_xor8();
+
+ if (count > xor8_max_channels) {
+ err = -EINVAL;
+ netdev_err(priv->netdev, "%s: Cannot set RSS hash function to XOR, current number of channels (%d) exceeds the maximum allowed for XOR8 RSS hfunc (%d)\n",
+ __func__, count, xor8_max_channels);
+ goto unlock;
+ }
+ }
+
if (*rss_context && rxfh->rss_delete) {
err = mlx5e_rx_res_rss_destroy(priv->rx_res, *rss_context);
goto unlock;
}
if (*rss_context == ETH_RXFH_CONTEXT_ALLOC) {
- unsigned int count = priv->channels.params.num_channels;
-
err = mlx5e_rx_res_rss_init(priv->rx_res, rss_context, count);
if (err)
goto unlock;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 91848eae45655f..319930c04093ba 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -209,8 +209,8 @@ static int mlx5e_devcom_init_mpv(struct mlx5e_priv *priv, u64 *data)
*data,
mlx5e_devcom_event_mpv,
priv);
- if (IS_ERR_OR_NULL(priv->devcom))
- return -EOPNOTSUPP;
+ if (IS_ERR(priv->devcom))
+ return PTR_ERR(priv->devcom);
if (mlx5_core_is_mp_master(priv->mdev)) {
mlx5_devcom_send_event(priv->devcom, MPV_DEVCOM_MASTER_UP,
@@ -5726,9 +5726,7 @@ void mlx5e_priv_cleanup(struct mlx5e_priv *priv)
kfree(priv->tx_rates);
kfree(priv->txq2sq);
destroy_workqueue(priv->wq);
- mutex_lock(&priv->state_lock);
mlx5e_selq_cleanup(&priv->selq);
- mutex_unlock(&priv->state_lock);
free_cpumask_var(priv->scratchpad.cpumask);
for (i = 0; i < priv->htb_max_qos_sqs; i++)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index 2fa076b23fbead..e21a3b4128ce88 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -398,6 +398,8 @@ mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb,
(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))) {
u8 metadata_index = be32_to_cpu(eseg->flow_table_metadata);
+ mlx5e_ptp_metadata_fifo_pop(&sq->ptpsq->metadata_freelist);
+
mlx5e_skb_cb_hwtstamp_init(skb);
mlx5e_ptp_metadata_map_put(&sq->ptpsq->metadata_map, skb,
metadata_index);
@@ -496,9 +498,6 @@ mlx5e_sq_xmit_wqe(struct mlx5e_txqsq *sq, struct sk_buff *skb,
err_drop:
stats->dropped++;
- if (unlikely(sq->ptpsq && (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)))
- mlx5e_ptp_metadata_fifo_push(&sq->ptpsq->metadata_freelist,
- be32_to_cpu(eseg->flow_table_metadata));
dev_kfree_skb_any(skb);
mlx5e_tx_flush(sq);
}
@@ -657,7 +656,7 @@ static void mlx5e_cqe_ts_id_eseg(struct mlx5e_ptpsq *ptpsq, struct sk_buff *skb,
{
if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))
eseg->flow_table_metadata =
- cpu_to_be32(mlx5e_ptp_metadata_fifo_pop(&ptpsq->metadata_freelist));
+ cpu_to_be32(mlx5e_ptp_metadata_fifo_peek(&ptpsq->metadata_freelist));
}
static void mlx5e_txwqe_build_eseg(struct mlx5e_priv *priv, struct mlx5e_txqsq *sq,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 3047d7015c5256..1789800faaeb62 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -1868,6 +1868,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
if (err)
goto abort;
+ dev->priv.eswitch = esw;
err = esw_offloads_init(esw);
if (err)
goto reps_err;
@@ -1892,11 +1893,6 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_BASIC;
else
esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE;
- if (MLX5_ESWITCH_MANAGER(dev) &&
- mlx5_esw_vport_match_metadata_supported(esw))
- esw->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA;
-
- dev->priv.eswitch = esw;
BLOCKING_INIT_NOTIFIER_HEAD(&esw->n_head);
esw_info(dev,
@@ -1908,6 +1904,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
reps_err:
mlx5_esw_vports_cleanup(esw);
+ dev->priv.eswitch = NULL;
abort:
if (esw->work_queue)
destroy_workqueue(esw->work_queue);
@@ -1926,7 +1923,6 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw)
esw_info(esw->dev, "cleanup\n");
- esw->dev->priv.eswitch = NULL;
destroy_workqueue(esw->work_queue);
WARN_ON(refcount_read(&esw->qos.refcnt));
mutex_destroy(&esw->state_lock);
@@ -1937,6 +1933,7 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw)
mutex_destroy(&esw->offloads.encap_tbl_lock);
mutex_destroy(&esw->offloads.decap_tbl_lock);
esw_offloads_cleanup(esw);
+ esw->dev->priv.eswitch = NULL;
mlx5_esw_vports_cleanup(esw);
debugfs_remove_recursive(esw->debugfs_root);
devl_params_unregister(priv_to_devlink(esw->dev), mlx5_eswitch_params,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index baaae628b0a0f6..844d3e3a65ddf0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -43,6 +43,7 @@
#include "rdma.h"
#include "en.h"
#include "fs_core.h"
+#include "lib/mlx5.h"
#include "lib/devcom.h"
#include "lib/eq.h"
#include "lib/fs_chains.h"
@@ -2476,6 +2477,10 @@ int esw_offloads_init(struct mlx5_eswitch *esw)
if (err)
return err;
+ if (MLX5_ESWITCH_MANAGER(esw->dev) &&
+ mlx5_esw_vport_match_metadata_supported(esw))
+ esw->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA;
+
err = devl_params_register(priv_to_devlink(esw->dev),
esw_devlink_params,
ARRAY_SIZE(esw_devlink_params));
@@ -3055,7 +3060,7 @@ void mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw, u64 key)
key,
mlx5_esw_offloads_devcom_event,
esw);
- if (IS_ERR_OR_NULL(esw->devcom))
+ if (IS_ERR(esw->devcom))
return;
mlx5_devcom_send_event(esw->devcom,
@@ -3707,6 +3712,12 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
if (esw_mode_from_devlink(mode, &mlx5_mode))
return -EINVAL;
+ if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV && mlx5_get_sd(esw->dev)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Can't change E-Switch mode to switchdev when multi-PF netdev (Socket Direct) is configured.");
+ return -EPERM;
+ }
+
mlx5_lag_disable_change(esw->dev);
err = mlx5_esw_try_lock(esw);
if (err < 0) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index e6bfa7e4f146ca..cf085a478e3e4c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -1664,6 +1664,16 @@ static int create_auto_flow_group(struct mlx5_flow_table *ft,
return err;
}
+static bool mlx5_pkt_reformat_cmp(struct mlx5_pkt_reformat *p1,
+ struct mlx5_pkt_reformat *p2)
+{
+ return p1->owner == p2->owner &&
+ (p1->owner == MLX5_FLOW_RESOURCE_OWNER_FW ?
+ p1->id == p2->id :
+ mlx5_fs_dr_action_get_pkt_reformat_id(p1) ==
+ mlx5_fs_dr_action_get_pkt_reformat_id(p2));
+}
+
static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1,
struct mlx5_flow_destination *d2)
{
@@ -1675,8 +1685,8 @@ static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1,
((d1->vport.flags & MLX5_FLOW_DEST_VPORT_VHCA_ID) ?
(d1->vport.vhca_id == d2->vport.vhca_id) : true) &&
((d1->vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID) ?
- (d1->vport.pkt_reformat->id ==
- d2->vport.pkt_reformat->id) : true)) ||
+ mlx5_pkt_reformat_cmp(d1->vport.pkt_reformat,
+ d2->vport.pkt_reformat) : true)) ||
(d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE &&
d1->ft == d2->ft) ||
(d1->type == MLX5_FLOW_DESTINATION_TYPE_TIR &&
@@ -1808,8 +1818,9 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg,
}
trace_mlx5_fs_set_fte(fte, false);
+ /* Link newly added rules into the tree. */
for (i = 0; i < handle->num_rules; i++) {
- if (refcount_read(&handle->rule[i]->node.refcount) == 1) {
+ if (!handle->rule[i]->node.parent) {
tree_add_node(&handle->rule[i]->node, &fte->node);
trace_mlx5_fs_add_rule(handle->rule[i]);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
index d14459e5c04fc5..69d482f7c5a299 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
@@ -703,8 +703,10 @@ int mlx5_deactivate_lag(struct mlx5_lag *ldev)
return err;
}
- if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags))
+ if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) {
mlx5_lag_port_sel_destroy(ldev);
+ ldev->buckets = 1;
+ }
if (mlx5_lag_has_drop_rule(ldev))
mlx5_lag_drop_rule_cleanup(ldev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c
index e7d59cfa8708e1..7b0766c89f4cf0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c
@@ -220,7 +220,7 @@ mlx5_devcom_register_component(struct mlx5_devcom_dev *devc,
struct mlx5_devcom_comp *comp;
if (IS_ERR_OR_NULL(devc))
- return NULL;
+ return ERR_PTR(-EINVAL);
mutex_lock(&comp_list_lock);
comp = devcom_component_get(devc, id, key, handler);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c
index 5b28084e8a03c7..dd5d186dc6148f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c
@@ -213,8 +213,8 @@ static int sd_register(struct mlx5_core_dev *dev)
sd = mlx5_get_sd(dev);
devcom = mlx5_devcom_register_component(dev->priv.devc, MLX5_DEVCOM_SD_GROUP,
sd->group_id, NULL, dev);
- if (!devcom)
- return -ENOMEM;
+ if (IS_ERR(devcom))
+ return PTR_ERR(devcom);
sd->devcom = devcom;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index c2593625c09ad6..331ce47f51a17a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -956,7 +956,7 @@ static void mlx5_register_hca_devcom_comp(struct mlx5_core_dev *dev)
mlx5_devcom_register_component(dev->priv.devc, MLX5_DEVCOM_HCA_PORTS,
mlx5_query_nic_system_image_guid(dev),
NULL, dev);
- if (IS_ERR_OR_NULL(dev->priv.hca_devcom_comp))
+ if (IS_ERR(dev->priv.hca_devcom_comp))
mlx5_core_err(dev, "Failed to register devcom HCA component\n");
}
@@ -1480,6 +1480,14 @@ int mlx5_init_one_devl_locked(struct mlx5_core_dev *dev)
if (err)
goto err_register;
+ err = mlx5_crdump_enable(dev);
+ if (err)
+ mlx5_core_err(dev, "mlx5_crdump_enable failed with error code %d\n", err);
+
+ err = mlx5_hwmon_dev_register(dev);
+ if (err)
+ mlx5_core_err(dev, "mlx5_hwmon_dev_register failed with error code %d\n", err);
+
mutex_unlock(&dev->intf_state_mutex);
return 0;
@@ -1505,7 +1513,10 @@ int mlx5_init_one(struct mlx5_core_dev *dev)
int err;
devl_lock(devlink);
+ devl_register(devlink);
err = mlx5_init_one_devl_locked(dev);
+ if (err)
+ devl_unregister(devlink);
devl_unlock(devlink);
return err;
}
@@ -1517,6 +1528,8 @@ void mlx5_uninit_one(struct mlx5_core_dev *dev)
devl_lock(devlink);
mutex_lock(&dev->intf_state_mutex);
+ mlx5_hwmon_dev_unregister(dev);
+ mlx5_crdump_disable(dev);
mlx5_unregister_device(dev);
if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
@@ -1534,6 +1547,7 @@ void mlx5_uninit_one(struct mlx5_core_dev *dev)
mlx5_function_teardown(dev, true);
out:
mutex_unlock(&dev->intf_state_mutex);
+ devl_unregister(devlink);
devl_unlock(devlink);
}
@@ -1680,16 +1694,23 @@ int mlx5_init_one_light(struct mlx5_core_dev *dev)
}
devl_lock(devlink);
+ devl_register(devlink);
+
err = mlx5_devlink_params_register(priv_to_devlink(dev));
- devl_unlock(devlink);
if (err) {
mlx5_core_warn(dev, "mlx5_devlink_param_reg err = %d\n", err);
- goto query_hca_caps_err;
+ goto params_reg_err;
}
+ devl_unlock(devlink);
return 0;
+params_reg_err:
+ devl_unregister(devlink);
+ devl_unlock(devlink);
query_hca_caps_err:
+ devl_unregister(devlink);
+ devl_unlock(devlink);
mlx5_function_disable(dev, true);
out:
dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
@@ -1702,6 +1723,7 @@ void mlx5_uninit_one_light(struct mlx5_core_dev *dev)
devl_lock(devlink);
mlx5_devlink_params_unregister(priv_to_devlink(dev));
+ devl_unregister(devlink);
devl_unlock(devlink);
if (dev->state != MLX5_DEVICE_STATE_UP)
return;
@@ -1943,16 +1965,7 @@ static int probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
goto err_init_one;
}
- err = mlx5_crdump_enable(dev);
- if (err)
- dev_err(&pdev->dev, "mlx5_crdump_enable failed with error code %d\n", err);
-
- err = mlx5_hwmon_dev_register(dev);
- if (err)
- mlx5_core_err(dev, "mlx5_hwmon_dev_register failed with error code %d\n", err);
-
pci_save_state(pdev);
- devlink_register(devlink);
return 0;
err_init_one:
@@ -1973,16 +1986,9 @@ static void remove_one(struct pci_dev *pdev)
struct devlink *devlink = priv_to_devlink(dev);
set_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state);
- /* mlx5_drain_fw_reset() and mlx5_drain_health_wq() are using
- * devlink notify APIs.
- * Hence, we must drain them before unregistering the devlink.
- */
mlx5_drain_fw_reset(dev);
mlx5_drain_health_wq(dev);
- devlink_unregister(devlink);
mlx5_sriov_disable(pdev, false);
- mlx5_hwmon_dev_unregister(dev);
- mlx5_crdump_disable(dev);
mlx5_uninit_one(dev);
mlx5_pci_close(dev);
mlx5_mdev_uninit(dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
index 4dcf995cb1a204..6bac8ad70ba60b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
@@ -19,6 +19,7 @@
#define MLX5_IRQ_CTRL_SF_MAX 8
/* min num of vectors for SFs to be enabled */
#define MLX5_IRQ_VEC_COMP_BASE_SF 2
+#define MLX5_IRQ_VEC_COMP_BASE 1
#define MLX5_EQ_SHARE_IRQ_MAX_COMP (8)
#define MLX5_EQ_SHARE_IRQ_MAX_CTRL (UINT_MAX)
@@ -246,6 +247,7 @@ static void irq_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx)
return;
}
+ vecidx -= MLX5_IRQ_VEC_COMP_BASE;
snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", vecidx);
}
@@ -585,7 +587,7 @@ struct mlx5_irq *mlx5_irq_request_vector(struct mlx5_core_dev *dev, u16 cpu,
struct mlx5_irq_table *table = mlx5_irq_table_get(dev);
struct mlx5_irq_pool *pool = table->pcif_pool;
struct irq_affinity_desc af_desc;
- int offset = 1;
+ int offset = MLX5_IRQ_VEC_COMP_BASE;
if (!pool->xa_num_irqs.max)
offset = 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c
index bc863e1f062e6b..7ebe712808275a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c
@@ -75,7 +75,6 @@ static int mlx5_sf_dev_probe(struct auxiliary_device *adev, const struct auxilia
goto peer_devlink_set_err;
}
- devlink_register(devlink);
return 0;
peer_devlink_set_err:
@@ -101,7 +100,6 @@ static void mlx5_sf_dev_remove(struct auxiliary_device *adev)
devlink = priv_to_devlink(mdev);
set_bit(MLX5_BREAK_FW_WAIT, &mdev->intf_state);
mlx5_drain_health_wq(mdev);
- devlink_unregister(devlink);
if (mlx5_dev_is_lightweight(mdev))
mlx5_uninit_one_light(mdev);
else
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c
index 64f4cc284aea41..030a5776c93740 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c
@@ -205,12 +205,11 @@ dr_dump_hex_print(char hex[DR_HEX_SIZE], char *src, u32 size)
}
static int
-dr_dump_rule_action_mem(struct seq_file *file, const u64 rule_id,
+dr_dump_rule_action_mem(struct seq_file *file, char *buff, const u64 rule_id,
struct mlx5dr_rule_action_member *action_mem)
{
struct mlx5dr_action *action = action_mem->action;
const u64 action_id = DR_DBG_PTR_TO_ID(action);
- char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH];
u64 hit_tbl_ptr, miss_tbl_ptr;
u32 hit_tbl_id, miss_tbl_id;
int ret;
@@ -488,10 +487,9 @@ dr_dump_rule_action_mem(struct seq_file *file, const u64 rule_id,
}
static int
-dr_dump_rule_mem(struct seq_file *file, struct mlx5dr_ste *ste,
+dr_dump_rule_mem(struct seq_file *file, char *buff, struct mlx5dr_ste *ste,
bool is_rx, const u64 rule_id, u8 format_ver)
{
- char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH];
char hw_ste_dump[DR_HEX_SIZE];
u32 mem_rec_type;
int ret;
@@ -522,7 +520,8 @@ dr_dump_rule_mem(struct seq_file *file, struct mlx5dr_ste *ste,
}
static int
-dr_dump_rule_rx_tx(struct seq_file *file, struct mlx5dr_rule_rx_tx *rule_rx_tx,
+dr_dump_rule_rx_tx(struct seq_file *file, char *buff,
+ struct mlx5dr_rule_rx_tx *rule_rx_tx,
bool is_rx, const u64 rule_id, u8 format_ver)
{
struct mlx5dr_ste *ste_arr[DR_RULE_MAX_STES + DR_ACTION_MAX_STES];
@@ -533,7 +532,7 @@ dr_dump_rule_rx_tx(struct seq_file *file, struct mlx5dr_rule_rx_tx *rule_rx_tx,
return 0;
while (i--) {
- ret = dr_dump_rule_mem(file, ste_arr[i], is_rx, rule_id,
+ ret = dr_dump_rule_mem(file, buff, ste_arr[i], is_rx, rule_id,
format_ver);
if (ret < 0)
return ret;
@@ -542,7 +541,8 @@ dr_dump_rule_rx_tx(struct seq_file *file, struct mlx5dr_rule_rx_tx *rule_rx_tx,
return 0;
}
-static int dr_dump_rule(struct seq_file *file, struct mlx5dr_rule *rule)
+static noinline_for_stack int
+dr_dump_rule(struct seq_file *file, struct mlx5dr_rule *rule)
{
struct mlx5dr_rule_action_member *action_mem;
const u64 rule_id = DR_DBG_PTR_TO_ID(rule);
@@ -565,19 +565,19 @@ static int dr_dump_rule(struct seq_file *file, struct mlx5dr_rule *rule)
return ret;
if (rx->nic_matcher) {
- ret = dr_dump_rule_rx_tx(file, rx, true, rule_id, format_ver);
+ ret = dr_dump_rule_rx_tx(file, buff, rx, true, rule_id, format_ver);
if (ret < 0)
return ret;
}
if (tx->nic_matcher) {
- ret = dr_dump_rule_rx_tx(file, tx, false, rule_id, format_ver);
+ ret = dr_dump_rule_rx_tx(file, buff, tx, false, rule_id, format_ver);
if (ret < 0)
return ret;
}
list_for_each_entry(action_mem, &rule->rule_actions_list, list) {
- ret = dr_dump_rule_action_mem(file, rule_id, action_mem);
+ ret = dr_dump_rule_action_mem(file, buff, rule_id, action_mem);
if (ret < 0)
return ret;
}
@@ -586,10 +586,10 @@ static int dr_dump_rule(struct seq_file *file, struct mlx5dr_rule *rule)
}
static int
-dr_dump_matcher_mask(struct seq_file *file, struct mlx5dr_match_param *mask,
+dr_dump_matcher_mask(struct seq_file *file, char *buff,
+ struct mlx5dr_match_param *mask,
u8 criteria, const u64 matcher_id)
{
- char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH];
char dump[DR_HEX_SIZE];
int ret;
@@ -681,10 +681,10 @@ dr_dump_matcher_mask(struct seq_file *file, struct mlx5dr_match_param *mask,
}
static int
-dr_dump_matcher_builder(struct seq_file *file, struct mlx5dr_ste_build *builder,
+dr_dump_matcher_builder(struct seq_file *file, char *buff,
+ struct mlx5dr_ste_build *builder,
u32 index, bool is_rx, const u64 matcher_id)
{
- char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH];
int ret;
ret = snprintf(buff, MLX5DR_DEBUG_DUMP_BUFF_LENGTH,
@@ -702,11 +702,10 @@ dr_dump_matcher_builder(struct seq_file *file, struct mlx5dr_ste_build *builder,
}
static int
-dr_dump_matcher_rx_tx(struct seq_file *file, bool is_rx,
+dr_dump_matcher_rx_tx(struct seq_file *file, char *buff, bool is_rx,
struct mlx5dr_matcher_rx_tx *matcher_rx_tx,
const u64 matcher_id)
{
- char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH];
enum dr_dump_rec_type rec_type;
u64 s_icm_addr, e_icm_addr;
int i, ret;
@@ -731,7 +730,7 @@ dr_dump_matcher_rx_tx(struct seq_file *file, bool is_rx,
return ret;
for (i = 0; i < matcher_rx_tx->num_of_builders; i++) {
- ret = dr_dump_matcher_builder(file,
+ ret = dr_dump_matcher_builder(file, buff,
&matcher_rx_tx->ste_builder[i],
i, is_rx, matcher_id);
if (ret < 0)
@@ -741,7 +740,7 @@ dr_dump_matcher_rx_tx(struct seq_file *file, bool is_rx,
return 0;
}
-static int
+static noinline_for_stack int
dr_dump_matcher(struct seq_file *file, struct mlx5dr_matcher *matcher)
{
struct mlx5dr_matcher_rx_tx *rx = &matcher->rx;
@@ -763,19 +762,19 @@ dr_dump_matcher(struct seq_file *file, struct mlx5dr_matcher *matcher)
if (ret)
return ret;
- ret = dr_dump_matcher_mask(file, &matcher->mask,
+ ret = dr_dump_matcher_mask(file, buff, &matcher->mask,
matcher->match_criteria, matcher_id);
if (ret < 0)
return ret;
if (rx->nic_tbl) {
- ret = dr_dump_matcher_rx_tx(file, true, rx, matcher_id);
+ ret = dr_dump_matcher_rx_tx(file, buff, true, rx, matcher_id);
if (ret < 0)
return ret;
}
if (tx->nic_tbl) {
- ret = dr_dump_matcher_rx_tx(file, false, tx, matcher_id);
+ ret = dr_dump_matcher_rx_tx(file, buff, false, tx, matcher_id);
if (ret < 0)
return ret;
}
@@ -803,11 +802,10 @@ dr_dump_matcher_all(struct seq_file *file, struct mlx5dr_matcher *matcher)
}
static int
-dr_dump_table_rx_tx(struct seq_file *file, bool is_rx,
+dr_dump_table_rx_tx(struct seq_file *file, char *buff, bool is_rx,
struct mlx5dr_table_rx_tx *table_rx_tx,
const u64 table_id)
{
- char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH];
enum dr_dump_rec_type rec_type;
u64 s_icm_addr;
int ret;
@@ -829,7 +827,8 @@ dr_dump_table_rx_tx(struct seq_file *file, bool is_rx,
return 0;
}
-static int dr_dump_table(struct seq_file *file, struct mlx5dr_table *table)
+static noinline_for_stack int
+dr_dump_table(struct seq_file *file, struct mlx5dr_table *table)
{
struct mlx5dr_table_rx_tx *rx = &table->rx;
struct mlx5dr_table_rx_tx *tx = &table->tx;
@@ -848,14 +847,14 @@ static int dr_dump_table(struct seq_file *file, struct mlx5dr_table *table)
return ret;
if (rx->nic_dmn) {
- ret = dr_dump_table_rx_tx(file, true, rx,
+ ret = dr_dump_table_rx_tx(file, buff, true, rx,
DR_DBG_PTR_TO_ID(table));
if (ret < 0)
return ret;
}
if (tx->nic_dmn) {
- ret = dr_dump_table_rx_tx(file, false, tx,
+ ret = dr_dump_table_rx_tx(file, buff, false, tx,
DR_DBG_PTR_TO_ID(table));
if (ret < 0)
return ret;
@@ -881,10 +880,10 @@ static int dr_dump_table_all(struct seq_file *file, struct mlx5dr_table *tbl)
}
static int
-dr_dump_send_ring(struct seq_file *file, struct mlx5dr_send_ring *ring,
+dr_dump_send_ring(struct seq_file *file, char *buff,
+ struct mlx5dr_send_ring *ring,
const u64 domain_id)
{
- char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH];
int ret;
ret = snprintf(buff, MLX5DR_DEBUG_DUMP_BUFF_LENGTH,
@@ -902,13 +901,13 @@ dr_dump_send_ring(struct seq_file *file, struct mlx5dr_send_ring *ring,
return 0;
}
-static noinline_for_stack int
+static int
dr_dump_domain_info_flex_parser(struct seq_file *file,
+ char *buff,
const char *flex_parser_name,
const u8 flex_parser_value,
const u64 domain_id)
{
- char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH];
int ret;
ret = snprintf(buff, MLX5DR_DEBUG_DUMP_BUFF_LENGTH,
@@ -925,11 +924,11 @@ dr_dump_domain_info_flex_parser(struct seq_file *file,
return 0;
}
-static noinline_for_stack int
-dr_dump_domain_info_caps(struct seq_file *file, struct mlx5dr_cmd_caps *caps,
+static int
+dr_dump_domain_info_caps(struct seq_file *file, char *buff,
+ struct mlx5dr_cmd_caps *caps,
const u64 domain_id)
{
- char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH];
struct mlx5dr_cmd_vport_cap *vport_caps;
unsigned long i, vports_num;
int ret;
@@ -969,34 +968,35 @@ dr_dump_domain_info_caps(struct seq_file *file, struct mlx5dr_cmd_caps *caps,
}
static int
-dr_dump_domain_info(struct seq_file *file, struct mlx5dr_domain_info *info,
+dr_dump_domain_info(struct seq_file *file, char *buff,
+ struct mlx5dr_domain_info *info,
const u64 domain_id)
{
int ret;
- ret = dr_dump_domain_info_caps(file, &info->caps, domain_id);
+ ret = dr_dump_domain_info_caps(file, buff, &info->caps, domain_id);
if (ret < 0)
return ret;
- ret = dr_dump_domain_info_flex_parser(file, "icmp_dw0",
+ ret = dr_dump_domain_info_flex_parser(file, buff, "icmp_dw0",
info->caps.flex_parser_id_icmp_dw0,
domain_id);
if (ret < 0)
return ret;
- ret = dr_dump_domain_info_flex_parser(file, "icmp_dw1",
+ ret = dr_dump_domain_info_flex_parser(file, buff, "icmp_dw1",
info->caps.flex_parser_id_icmp_dw1,
domain_id);
if (ret < 0)
return ret;
- ret = dr_dump_domain_info_flex_parser(file, "icmpv6_dw0",
+ ret = dr_dump_domain_info_flex_parser(file, buff, "icmpv6_dw0",
info->caps.flex_parser_id_icmpv6_dw0,
domain_id);
if (ret < 0)
return ret;
- ret = dr_dump_domain_info_flex_parser(file, "icmpv6_dw1",
+ ret = dr_dump_domain_info_flex_parser(file, buff, "icmpv6_dw1",
info->caps.flex_parser_id_icmpv6_dw1,
domain_id);
if (ret < 0)
@@ -1032,12 +1032,12 @@ dr_dump_domain(struct seq_file *file, struct mlx5dr_domain *dmn)
if (ret)
return ret;
- ret = dr_dump_domain_info(file, &dmn->info, domain_id);
+ ret = dr_dump_domain_info(file, buff, &dmn->info, domain_id);
if (ret < 0)
return ret;
if (dmn->info.supp_sw_steering) {
- ret = dr_dump_send_ring(file, dmn->send_ring, domain_id);
+ ret = dr_dump_send_ring(file, buff, dmn->send_ring, domain_id);
if (ret < 0)
return ret;
}
diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
index 3d09fa54598f1a..ba303868686a77 100644
--- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
+++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
@@ -14,6 +14,7 @@
#include <linux/module.h>
#include <linux/phy.h>
#include <linux/platform_device.h>
+#include <linux/rtnetlink.h>
#include <linux/skbuff.h>
#include "mlxbf_gige.h"
@@ -139,13 +140,10 @@ static int mlxbf_gige_open(struct net_device *netdev)
control |= MLXBF_GIGE_CONTROL_PORT_EN;
writeq(control, priv->base + MLXBF_GIGE_CONTROL);
- err = mlxbf_gige_request_irqs(priv);
- if (err)
- return err;
mlxbf_gige_cache_stats(priv);
err = mlxbf_gige_clean_port(priv);
if (err)
- goto free_irqs;
+ return err;
/* Clear driver's valid_polarity to match hardware,
* since the above call to clean_port() resets the
@@ -157,7 +155,7 @@ static int mlxbf_gige_open(struct net_device *netdev)
err = mlxbf_gige_tx_init(priv);
if (err)
- goto free_irqs;
+ goto phy_deinit;
err = mlxbf_gige_rx_init(priv);
if (err)
goto tx_deinit;
@@ -166,6 +164,10 @@ static int mlxbf_gige_open(struct net_device *netdev)
napi_enable(&priv->napi);
netif_start_queue(netdev);
+ err = mlxbf_gige_request_irqs(priv);
+ if (err)
+ goto napi_deinit;
+
/* Set bits in INT_EN that we care about */
int_en = MLXBF_GIGE_INT_EN_HW_ACCESS_ERROR |
MLXBF_GIGE_INT_EN_TX_CHECKSUM_INPUTS |
@@ -182,11 +184,17 @@ static int mlxbf_gige_open(struct net_device *netdev)
return 0;
+napi_deinit:
+ netif_stop_queue(netdev);
+ napi_disable(&priv->napi);
+ netif_napi_del(&priv->napi);
+ mlxbf_gige_rx_deinit(priv);
+
tx_deinit:
mlxbf_gige_tx_deinit(priv);
-free_irqs:
- mlxbf_gige_free_irqs(priv);
+phy_deinit:
+ phy_stop(phydev);
return err;
}
@@ -485,8 +493,13 @@ static void mlxbf_gige_shutdown(struct platform_device *pdev)
{
struct mlxbf_gige *priv = platform_get_drvdata(pdev);
- writeq(0, priv->base + MLXBF_GIGE_INT_EN);
- mlxbf_gige_clean_port(priv);
+ rtnl_lock();
+ netif_device_detach(priv->netdev);
+
+ if (netif_running(priv->netdev))
+ dev_close(priv->netdev);
+
+ rtnl_unlock();
}
static const struct acpi_device_id __maybe_unused mlxbf_gige_acpi_match[] = {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c
index e4d7739bd7c888..4a79c0d7e7ad85 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.c
@@ -849,7 +849,7 @@ free_skb:
static const struct mlxsw_listener mlxsw_emad_rx_listener =
MLXSW_RXL(mlxsw_emad_rx_listener_func, ETHEMAD, TRAP_TO_CPU, false,
- EMAD, DISCARD);
+ EMAD, FORWARD);
static int mlxsw_emad_tlv_enable(struct mlxsw_core *mlxsw_core)
{
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_env.c b/drivers/net/ethernet/mellanox/mlxsw/core_env.c
index 53b150b7ae4e70..6c06b059276084 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_env.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_env.c
@@ -1357,24 +1357,20 @@ static struct mlxsw_linecards_event_ops mlxsw_env_event_ops = {
.got_inactive = mlxsw_env_got_inactive,
};
-static int mlxsw_env_max_module_eeprom_len_query(struct mlxsw_env *mlxsw_env)
+static void mlxsw_env_max_module_eeprom_len_query(struct mlxsw_env *mlxsw_env)
{
char mcam_pl[MLXSW_REG_MCAM_LEN];
- bool mcia_128b_supported;
+ bool mcia_128b_supported = false;
int err;
mlxsw_reg_mcam_pack(mcam_pl,
MLXSW_REG_MCAM_FEATURE_GROUP_ENHANCED_FEATURES);
err = mlxsw_reg_query(mlxsw_env->core, MLXSW_REG(mcam), mcam_pl);
- if (err)
- return err;
-
- mlxsw_reg_mcam_unpack(mcam_pl, MLXSW_REG_MCAM_MCIA_128B,
- &mcia_128b_supported);
+ if (!err)
+ mlxsw_reg_mcam_unpack(mcam_pl, MLXSW_REG_MCAM_MCIA_128B,
+ &mcia_128b_supported);
mlxsw_env->max_eeprom_len = mcia_128b_supported ? 128 : 48;
-
- return 0;
}
int mlxsw_env_init(struct mlxsw_core *mlxsw_core,
@@ -1445,15 +1441,11 @@ int mlxsw_env_init(struct mlxsw_core *mlxsw_core,
if (err)
goto err_type_set;
- err = mlxsw_env_max_module_eeprom_len_query(env);
- if (err)
- goto err_eeprom_len_query;
-
+ mlxsw_env_max_module_eeprom_len_query(env);
env->line_cards[0]->active = true;
return 0;
-err_eeprom_len_query:
err_type_set:
mlxsw_env_module_event_disable(env, 0);
err_mlxsw_env_module_event_enable:
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c
index af99bf17eb36de..f42a1b1c936873 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c
@@ -1530,7 +1530,7 @@ mlxsw_pci_reset(struct mlxsw_pci *mlxsw_pci, const struct pci_device_id *id)
{
struct pci_dev *pdev = mlxsw_pci->pdev;
char mcam_pl[MLXSW_REG_MCAM_LEN];
- bool pci_reset_supported;
+ bool pci_reset_supported = false;
u32 sys_status;
int err;
@@ -1548,11 +1548,9 @@ mlxsw_pci_reset(struct mlxsw_pci *mlxsw_pci, const struct pci_device_id *id)
mlxsw_reg_mcam_pack(mcam_pl,
MLXSW_REG_MCAM_FEATURE_GROUP_ENHANCED_FEATURES);
err = mlxsw_reg_query(mlxsw_pci->core, MLXSW_REG(mcam), mcam_pl);
- if (err)
- return err;
-
- mlxsw_reg_mcam_unpack(mcam_pl, MLXSW_REG_MCAM_PCI_RESET,
- &pci_reset_supported);
+ if (!err)
+ mlxsw_reg_mcam_unpack(mcam_pl, MLXSW_REG_MCAM_PCI_RESET,
+ &pci_reset_supported);
if (pci_reset_supported) {
pci_dbg(pdev, "Starting PCI reset flow\n");
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c
index f20052776b3f2e..92a406f02eae74 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c
@@ -10,6 +10,7 @@
#include <linux/netdevice.h>
#include <linux/mutex.h>
#include <linux/refcount.h>
+#include <linux/idr.h>
#include <net/devlink.h>
#include <trace/events/mlxsw.h>
@@ -58,41 +59,43 @@ int mlxsw_sp_acl_tcam_priority_get(struct mlxsw_sp *mlxsw_sp,
static int mlxsw_sp_acl_tcam_region_id_get(struct mlxsw_sp_acl_tcam *tcam,
u16 *p_id)
{
- u16 id;
+ int id;
- id = find_first_zero_bit(tcam->used_regions, tcam->max_regions);
- if (id < tcam->max_regions) {
- __set_bit(id, tcam->used_regions);
- *p_id = id;
- return 0;
- }
- return -ENOBUFS;
+ id = ida_alloc_max(&tcam->used_regions, tcam->max_regions - 1,
+ GFP_KERNEL);
+ if (id < 0)
+ return id;
+
+ *p_id = id;
+
+ return 0;
}
static void mlxsw_sp_acl_tcam_region_id_put(struct mlxsw_sp_acl_tcam *tcam,
u16 id)
{
- __clear_bit(id, tcam->used_regions);
+ ida_free(&tcam->used_regions, id);
}
static int mlxsw_sp_acl_tcam_group_id_get(struct mlxsw_sp_acl_tcam *tcam,
u16 *p_id)
{
- u16 id;
+ int id;
- id = find_first_zero_bit(tcam->used_groups, tcam->max_groups);
- if (id < tcam->max_groups) {
- __set_bit(id, tcam->used_groups);
- *p_id = id;
- return 0;
- }
- return -ENOBUFS;
+ id = ida_alloc_max(&tcam->used_groups, tcam->max_groups - 1,
+ GFP_KERNEL);
+ if (id < 0)
+ return id;
+
+ *p_id = id;
+
+ return 0;
}
static void mlxsw_sp_acl_tcam_group_id_put(struct mlxsw_sp_acl_tcam *tcam,
u16 id)
{
- __clear_bit(id, tcam->used_groups);
+ ida_free(&tcam->used_groups, id);
}
struct mlxsw_sp_acl_tcam_pattern {
@@ -715,7 +718,9 @@ static void mlxsw_sp_acl_tcam_vregion_rehash_work(struct work_struct *work)
rehash.dw.work);
int credits = MLXSW_SP_ACL_TCAM_VREGION_REHASH_CREDITS;
+ mutex_lock(&vregion->lock);
mlxsw_sp_acl_tcam_vregion_rehash(vregion->mlxsw_sp, vregion, &credits);
+ mutex_unlock(&vregion->lock);
if (credits < 0)
/* Rehash gone out of credits so it was interrupted.
* Schedule the work as soon as possible to continue.
@@ -726,6 +731,17 @@ static void mlxsw_sp_acl_tcam_vregion_rehash_work(struct work_struct *work)
}
static void
+mlxsw_sp_acl_tcam_rehash_ctx_vchunk_reset(struct mlxsw_sp_acl_tcam_rehash_ctx *ctx)
+{
+ /* The entry markers are relative to the current chunk and therefore
+ * needs to be reset together with the chunk marker.
+ */
+ ctx->current_vchunk = NULL;
+ ctx->start_ventry = NULL;
+ ctx->stop_ventry = NULL;
+}
+
+static void
mlxsw_sp_acl_tcam_rehash_ctx_vchunk_changed(struct mlxsw_sp_acl_tcam_vchunk *vchunk)
{
struct mlxsw_sp_acl_tcam_vregion *vregion = vchunk->vregion;
@@ -747,7 +763,7 @@ mlxsw_sp_acl_tcam_rehash_ctx_vregion_changed(struct mlxsw_sp_acl_tcam_vregion *v
* the current chunk pointer to make sure all chunks
* are properly migrated.
*/
- vregion->rehash.ctx.current_vchunk = NULL;
+ mlxsw_sp_acl_tcam_rehash_ctx_vchunk_reset(&vregion->rehash.ctx);
}
static struct mlxsw_sp_acl_tcam_vregion *
@@ -820,10 +836,14 @@ mlxsw_sp_acl_tcam_vregion_destroy(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_acl_tcam *tcam = vregion->tcam;
if (vgroup->vregion_rehash_enabled && ops->region_rehash_hints_get) {
+ struct mlxsw_sp_acl_tcam_rehash_ctx *ctx = &vregion->rehash.ctx;
+
mutex_lock(&tcam->lock);
list_del(&vregion->tlist);
mutex_unlock(&tcam->lock);
- cancel_delayed_work_sync(&vregion->rehash.dw);
+ if (cancel_delayed_work_sync(&vregion->rehash.dw) &&
+ ctx->hints_priv)
+ ops->region_rehash_hints_put(ctx->hints_priv);
}
mlxsw_sp_acl_tcam_vgroup_vregion_detach(mlxsw_sp, vregion);
if (vregion->region2)
@@ -1154,8 +1174,14 @@ mlxsw_sp_acl_tcam_ventry_activity_get(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_acl_tcam_ventry *ventry,
bool *activity)
{
- return mlxsw_sp_acl_tcam_entry_activity_get(mlxsw_sp,
- ventry->entry, activity);
+ struct mlxsw_sp_acl_tcam_vregion *vregion = ventry->vchunk->vregion;
+ int err;
+
+ mutex_lock(&vregion->lock);
+ err = mlxsw_sp_acl_tcam_entry_activity_get(mlxsw_sp, ventry->entry,
+ activity);
+ mutex_unlock(&vregion->lock);
+ return err;
}
static int
@@ -1189,6 +1215,8 @@ mlxsw_sp_acl_tcam_vchunk_migrate_start(struct mlxsw_sp *mlxsw_sp,
{
struct mlxsw_sp_acl_tcam_chunk *new_chunk;
+ WARN_ON(vchunk->chunk2);
+
new_chunk = mlxsw_sp_acl_tcam_chunk_create(mlxsw_sp, vchunk, region);
if (IS_ERR(new_chunk))
return PTR_ERR(new_chunk);
@@ -1207,7 +1235,7 @@ mlxsw_sp_acl_tcam_vchunk_migrate_end(struct mlxsw_sp *mlxsw_sp,
{
mlxsw_sp_acl_tcam_chunk_destroy(mlxsw_sp, vchunk->chunk2);
vchunk->chunk2 = NULL;
- ctx->current_vchunk = NULL;
+ mlxsw_sp_acl_tcam_rehash_ctx_vchunk_reset(ctx);
}
static int
@@ -1230,6 +1258,9 @@ mlxsw_sp_acl_tcam_vchunk_migrate_one(struct mlxsw_sp *mlxsw_sp,
return 0;
}
+ if (list_empty(&vchunk->ventry_list))
+ goto out;
+
/* If the migration got interrupted, we have the ventry to start from
* stored in context.
*/
@@ -1239,6 +1270,8 @@ mlxsw_sp_acl_tcam_vchunk_migrate_one(struct mlxsw_sp *mlxsw_sp,
ventry = list_first_entry(&vchunk->ventry_list,
typeof(*ventry), list);
+ WARN_ON(ventry->vchunk != vchunk);
+
list_for_each_entry_from(ventry, &vchunk->ventry_list, list) {
/* During rollback, once we reach the ventry that failed
* to migrate, we are done.
@@ -1279,6 +1312,7 @@ mlxsw_sp_acl_tcam_vchunk_migrate_one(struct mlxsw_sp *mlxsw_sp,
}
}
+out:
mlxsw_sp_acl_tcam_vchunk_migrate_end(mlxsw_sp, vchunk, ctx);
return 0;
}
@@ -1292,6 +1326,9 @@ mlxsw_sp_acl_tcam_vchunk_migrate_all(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_acl_tcam_vchunk *vchunk;
int err;
+ if (list_empty(&vregion->vchunk_list))
+ return 0;
+
/* If the migration got interrupted, we have the vchunk
* we are working on stored in context.
*/
@@ -1320,16 +1357,17 @@ mlxsw_sp_acl_tcam_vregion_migrate(struct mlxsw_sp *mlxsw_sp,
int err, err2;
trace_mlxsw_sp_acl_tcam_vregion_migrate(mlxsw_sp, vregion);
- mutex_lock(&vregion->lock);
err = mlxsw_sp_acl_tcam_vchunk_migrate_all(mlxsw_sp, vregion,
ctx, credits);
if (err) {
+ if (ctx->this_is_rollback)
+ return err;
/* In case migration was not successful, we need to swap
* so the original region pointer is assigned again
* to vregion->region.
*/
swap(vregion->region, vregion->region2);
- ctx->current_vchunk = NULL;
+ mlxsw_sp_acl_tcam_rehash_ctx_vchunk_reset(ctx);
ctx->this_is_rollback = true;
err2 = mlxsw_sp_acl_tcam_vchunk_migrate_all(mlxsw_sp, vregion,
ctx, credits);
@@ -1340,7 +1378,6 @@ mlxsw_sp_acl_tcam_vregion_migrate(struct mlxsw_sp *mlxsw_sp,
/* Let the rollback to be continued later on. */
}
}
- mutex_unlock(&vregion->lock);
trace_mlxsw_sp_acl_tcam_vregion_migrate_end(mlxsw_sp, vregion);
return err;
}
@@ -1389,6 +1426,7 @@ mlxsw_sp_acl_tcam_vregion_rehash_start(struct mlxsw_sp *mlxsw_sp,
ctx->hints_priv = hints_priv;
ctx->this_is_rollback = false;
+ mlxsw_sp_acl_tcam_rehash_ctx_vchunk_reset(ctx);
return 0;
@@ -1441,7 +1479,8 @@ mlxsw_sp_acl_tcam_vregion_rehash(struct mlxsw_sp *mlxsw_sp,
err = mlxsw_sp_acl_tcam_vregion_migrate(mlxsw_sp, vregion,
ctx, credits);
if (err) {
- dev_err(mlxsw_sp->bus_info->dev, "Failed to migrate vregion\n");
+ dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to migrate vregion\n");
+ return;
}
if (*credits >= 0)
@@ -1549,19 +1588,11 @@ int mlxsw_sp_acl_tcam_init(struct mlxsw_sp *mlxsw_sp,
if (max_tcam_regions < max_regions)
max_regions = max_tcam_regions;
- tcam->used_regions = bitmap_zalloc(max_regions, GFP_KERNEL);
- if (!tcam->used_regions) {
- err = -ENOMEM;
- goto err_alloc_used_regions;
- }
+ ida_init(&tcam->used_regions);
tcam->max_regions = max_regions;
max_groups = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_GROUPS);
- tcam->used_groups = bitmap_zalloc(max_groups, GFP_KERNEL);
- if (!tcam->used_groups) {
- err = -ENOMEM;
- goto err_alloc_used_groups;
- }
+ ida_init(&tcam->used_groups);
tcam->max_groups = max_groups;
tcam->max_group_size = MLXSW_CORE_RES_GET(mlxsw_sp->core,
ACL_MAX_GROUP_SIZE);
@@ -1575,10 +1606,8 @@ int mlxsw_sp_acl_tcam_init(struct mlxsw_sp *mlxsw_sp,
return 0;
err_tcam_init:
- bitmap_free(tcam->used_groups);
-err_alloc_used_groups:
- bitmap_free(tcam->used_regions);
-err_alloc_used_regions:
+ ida_destroy(&tcam->used_groups);
+ ida_destroy(&tcam->used_regions);
mlxsw_sp_acl_tcam_rehash_params_unregister(mlxsw_sp);
err_rehash_params_register:
mutex_destroy(&tcam->lock);
@@ -1591,8 +1620,8 @@ void mlxsw_sp_acl_tcam_fini(struct mlxsw_sp *mlxsw_sp,
const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops;
ops->fini(mlxsw_sp, tcam->priv);
- bitmap_free(tcam->used_groups);
- bitmap_free(tcam->used_regions);
+ ida_destroy(&tcam->used_groups);
+ ida_destroy(&tcam->used_regions);
mlxsw_sp_acl_tcam_rehash_params_unregister(mlxsw_sp);
mutex_destroy(&tcam->lock);
}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h
index 462bf448497d33..79a1d860651253 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h
@@ -6,15 +6,16 @@
#include <linux/list.h>
#include <linux/parman.h>
+#include <linux/idr.h>
#include "reg.h"
#include "spectrum.h"
#include "core_acl_flex_keys.h"
struct mlxsw_sp_acl_tcam {
- unsigned long *used_regions; /* bit array */
+ struct ida used_regions;
unsigned int max_regions;
- unsigned long *used_groups; /* bit array */
+ struct ida used_groups;
unsigned int max_groups;
unsigned int max_group_size;
struct mutex lock; /* guards vregion list */
diff --git a/drivers/net/ethernet/micrel/ks8851.h b/drivers/net/ethernet/micrel/ks8851.h
index e5ec0a363aff84..31f75b4a67fd79 100644
--- a/drivers/net/ethernet/micrel/ks8851.h
+++ b/drivers/net/ethernet/micrel/ks8851.h
@@ -368,7 +368,6 @@ union ks8851_tx_hdr {
* @rdfifo: FIFO read callback
* @wrfifo: FIFO write callback
* @start_xmit: start_xmit() implementation callback
- * @rx_skb: rx_skb() implementation callback
* @flush_tx_work: flush_tx_work() implementation callback
*
* The @statelock is used to protect information in the structure which may
@@ -423,8 +422,6 @@ struct ks8851_net {
struct sk_buff *txp, bool irq);
netdev_tx_t (*start_xmit)(struct sk_buff *skb,
struct net_device *dev);
- void (*rx_skb)(struct ks8851_net *ks,
- struct sk_buff *skb);
void (*flush_tx_work)(struct ks8851_net *ks);
};
diff --git a/drivers/net/ethernet/micrel/ks8851_common.c b/drivers/net/ethernet/micrel/ks8851_common.c
index 0bf13b38b8f5b9..d4cdf3d4f55257 100644
--- a/drivers/net/ethernet/micrel/ks8851_common.c
+++ b/drivers/net/ethernet/micrel/ks8851_common.c
@@ -232,16 +232,6 @@ static void ks8851_dbg_dumpkkt(struct ks8851_net *ks, u8 *rxpkt)
}
/**
- * ks8851_rx_skb - receive skbuff
- * @ks: The device state.
- * @skb: The skbuff
- */
-static void ks8851_rx_skb(struct ks8851_net *ks, struct sk_buff *skb)
-{
- ks->rx_skb(ks, skb);
-}
-
-/**
* ks8851_rx_pkts - receive packets from the host
* @ks: The device information.
*
@@ -309,7 +299,7 @@ static void ks8851_rx_pkts(struct ks8851_net *ks)
ks8851_dbg_dumpkkt(ks, rxpkt);
skb->protocol = eth_type_trans(skb, ks->netdev);
- ks8851_rx_skb(ks, skb);
+ __netif_rx(skb);
ks->netdev->stats.rx_packets++;
ks->netdev->stats.rx_bytes += rxlen;
@@ -340,6 +330,8 @@ static irqreturn_t ks8851_irq(int irq, void *_ks)
unsigned long flags;
unsigned int status;
+ local_bh_disable();
+
ks8851_lock(ks, &flags);
status = ks8851_rdreg16(ks, KS_ISR);
@@ -416,6 +408,8 @@ static irqreturn_t ks8851_irq(int irq, void *_ks)
if (status & IRQ_LCI)
mii_check_link(&ks->mii);
+ local_bh_enable();
+
return IRQ_HANDLED;
}
diff --git a/drivers/net/ethernet/micrel/ks8851_par.c b/drivers/net/ethernet/micrel/ks8851_par.c
index 2a7f2985426703..381b9cd285ebd0 100644
--- a/drivers/net/ethernet/micrel/ks8851_par.c
+++ b/drivers/net/ethernet/micrel/ks8851_par.c
@@ -210,16 +210,6 @@ static void ks8851_wrfifo_par(struct ks8851_net *ks, struct sk_buff *txp,
iowrite16_rep(ksp->hw_addr, txp->data, len / 2);
}
-/**
- * ks8851_rx_skb_par - receive skbuff
- * @ks: The device state.
- * @skb: The skbuff
- */
-static void ks8851_rx_skb_par(struct ks8851_net *ks, struct sk_buff *skb)
-{
- netif_rx(skb);
-}
-
static unsigned int ks8851_rdreg16_par_txqcr(struct ks8851_net *ks)
{
return ks8851_rdreg16_par(ks, KS_TXQCR);
@@ -298,7 +288,6 @@ static int ks8851_probe_par(struct platform_device *pdev)
ks->rdfifo = ks8851_rdfifo_par;
ks->wrfifo = ks8851_wrfifo_par;
ks->start_xmit = ks8851_start_xmit_par;
- ks->rx_skb = ks8851_rx_skb_par;
#define STD_IRQ (IRQ_LCI | /* Link Change */ \
IRQ_RXI | /* RX done */ \
diff --git a/drivers/net/ethernet/micrel/ks8851_spi.c b/drivers/net/ethernet/micrel/ks8851_spi.c
index 2f803377c9f9dd..670c1de966db88 100644
--- a/drivers/net/ethernet/micrel/ks8851_spi.c
+++ b/drivers/net/ethernet/micrel/ks8851_spi.c
@@ -299,16 +299,6 @@ static unsigned int calc_txlen(unsigned int len)
}
/**
- * ks8851_rx_skb_spi - receive skbuff
- * @ks: The device state
- * @skb: The skbuff
- */
-static void ks8851_rx_skb_spi(struct ks8851_net *ks, struct sk_buff *skb)
-{
- netif_rx(skb);
-}
-
-/**
* ks8851_tx_work - process tx packet(s)
* @work: The work strucutre what was scheduled.
*
@@ -435,7 +425,6 @@ static int ks8851_probe_spi(struct spi_device *spi)
ks->rdfifo = ks8851_rdfifo_spi;
ks->wrfifo = ks8851_wrfifo_spi;
ks->start_xmit = ks8851_start_xmit_spi;
- ks->rx_skb = ks8851_rx_skb_spi;
ks->flush_tx_work = ks8851_flush_tx_work_spi;
#define STD_IRQ (IRQ_LCI | /* Link Change */ \
diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c
index bd8aa83b47e5ee..75a988c0bd794a 100644
--- a/drivers/net/ethernet/microchip/lan743x_main.c
+++ b/drivers/net/ethernet/microchip/lan743x_main.c
@@ -25,6 +25,8 @@
#define PCS_POWER_STATE_DOWN 0x6
#define PCS_POWER_STATE_UP 0x4
+#define RFE_RD_FIFO_TH_3_DWORDS 0x3
+
static void pci11x1x_strap_get_status(struct lan743x_adapter *adapter)
{
u32 chip_rev;
@@ -3272,6 +3274,21 @@ static void lan743x_full_cleanup(struct lan743x_adapter *adapter)
lan743x_pci_cleanup(adapter);
}
+static void pci11x1x_set_rfe_rd_fifo_threshold(struct lan743x_adapter *adapter)
+{
+ u16 rev = adapter->csr.id_rev & ID_REV_CHIP_REV_MASK_;
+
+ if (rev == ID_REV_CHIP_REV_PCI11X1X_B0_) {
+ u32 misc_ctl;
+
+ misc_ctl = lan743x_csr_read(adapter, MISC_CTL_0);
+ misc_ctl &= ~MISC_CTL_0_RFE_READ_FIFO_MASK_;
+ misc_ctl |= FIELD_PREP(MISC_CTL_0_RFE_READ_FIFO_MASK_,
+ RFE_RD_FIFO_TH_3_DWORDS);
+ lan743x_csr_write(adapter, MISC_CTL_0, misc_ctl);
+ }
+}
+
static int lan743x_hardware_init(struct lan743x_adapter *adapter,
struct pci_dev *pdev)
{
@@ -3287,6 +3304,7 @@ static int lan743x_hardware_init(struct lan743x_adapter *adapter,
pci11x1x_strap_get_status(adapter);
spin_lock_init(&adapter->eth_syslock_spinlock);
mutex_init(&adapter->sgmii_rw_lock);
+ pci11x1x_set_rfe_rd_fifo_threshold(adapter);
} else {
adapter->max_tx_channels = LAN743X_MAX_TX_CHANNELS;
adapter->used_tx_channels = LAN743X_USED_TX_CHANNELS;
diff --git a/drivers/net/ethernet/microchip/lan743x_main.h b/drivers/net/ethernet/microchip/lan743x_main.h
index be79cb0ae5af33..645bc048e52ef5 100644
--- a/drivers/net/ethernet/microchip/lan743x_main.h
+++ b/drivers/net/ethernet/microchip/lan743x_main.h
@@ -26,6 +26,7 @@
#define ID_REV_CHIP_REV_MASK_ (0x0000FFFF)
#define ID_REV_CHIP_REV_A0_ (0x00000000)
#define ID_REV_CHIP_REV_B0_ (0x00000010)
+#define ID_REV_CHIP_REV_PCI11X1X_B0_ (0x000000B0)
#define FPGA_REV (0x04)
#define FPGA_REV_GET_MINOR_(fpga_rev) (((fpga_rev) >> 8) & 0x000000FF)
@@ -311,6 +312,9 @@
#define SGMII_CTL_LINK_STATUS_SOURCE_ BIT(8)
#define SGMII_CTL_SGMII_POWER_DN_ BIT(1)
+#define MISC_CTL_0 (0x920)
+#define MISC_CTL_0_RFE_READ_FIFO_MASK_ GENMASK(6, 4)
+
/* Vendor Specific SGMII MMD details */
#define SR_VSMMD_PCS_ID1 0x0004
#define SR_VSMMD_PCS_ID2 0x0005
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_port.c b/drivers/net/ethernet/microchip/sparx5/sparx5_port.c
index 3a1b1a1f5a1951..60dd2fd603a855 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_port.c
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_port.c
@@ -731,7 +731,7 @@ static int sparx5_port_pcs_low_set(struct sparx5 *sparx5,
bool sgmii = false, inband_aneg = false;
int err;
- if (port->conf.inband) {
+ if (conf->inband) {
if (conf->portmode == PHY_INTERFACE_MODE_SGMII ||
conf->portmode == PHY_INTERFACE_MODE_QSGMII)
inband_aneg = true; /* Cisco-SGMII in-band-aneg */
@@ -948,7 +948,7 @@ int sparx5_port_pcs_set(struct sparx5 *sparx5,
if (err)
return -EINVAL;
- if (port->conf.inband) {
+ if (conf->inband) {
/* Enable/disable 1G counters in ASM */
spx5_rmw(ASM_PORT_CFG_CSC_STAT_DIS_SET(high_speed_dev),
ASM_PORT_CFG_CSC_STAT_DIS,
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_tc_flower.c b/drivers/net/ethernet/microchip/sparx5/sparx5_tc_flower.c
index 523e0c470894f7..55f255a3c9db69 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_tc_flower.c
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_tc_flower.c
@@ -36,6 +36,27 @@ struct sparx5_tc_flower_template {
u16 l3_proto; /* protocol specified in the template */
};
+/* SparX-5 VCAP fragment types:
+ * 0 = no fragment, 1 = initial fragment,
+ * 2 = suspicious fragment, 3 = valid follow-up fragment
+ */
+enum { /* key / mask */
+ FRAG_NOT = 0x03, /* 0 / 3 */
+ FRAG_SOME = 0x11, /* 1 / 1 */
+ FRAG_FIRST = 0x13, /* 1 / 3 */
+ FRAG_LATER = 0x33, /* 3 / 3 */
+ FRAG_INVAL = 0xff, /* invalid */
+};
+
+/* Flower fragment flag to VCAP fragment type mapping */
+static const u8 sparx5_vcap_frag_map[4][4] = { /* is_frag */
+ { FRAG_INVAL, FRAG_INVAL, FRAG_INVAL, FRAG_FIRST }, /* 0/0 */
+ { FRAG_NOT, FRAG_NOT, FRAG_INVAL, FRAG_INVAL }, /* 0/1 */
+ { FRAG_INVAL, FRAG_INVAL, FRAG_INVAL, FRAG_INVAL }, /* 1/0 */
+ { FRAG_SOME, FRAG_LATER, FRAG_INVAL, FRAG_FIRST } /* 1/1 */
+ /* 0/0 0/1 1/0 1/1 <-- first_frag */
+};
+
static int
sparx5_tc_flower_es0_tpid(struct vcap_tc_flower_parse_usage *st)
{
@@ -145,29 +166,27 @@ sparx5_tc_flower_handler_control_usage(struct vcap_tc_flower_parse_usage *st)
flow_rule_match_control(st->frule, &mt);
if (mt.mask->flags) {
- if (mt.mask->flags & FLOW_DIS_FIRST_FRAG) {
- if (mt.key->flags & FLOW_DIS_FIRST_FRAG) {
- value = 1; /* initial fragment */
- mask = 0x3;
- } else {
- if (mt.mask->flags & FLOW_DIS_IS_FRAGMENT) {
- value = 3; /* follow up fragment */
- mask = 0x3;
- } else {
- value = 0; /* no fragment */
- mask = 0x3;
- }
- }
- } else {
- if (mt.mask->flags & FLOW_DIS_IS_FRAGMENT) {
- value = 3; /* follow up fragment */
- mask = 0x3;
- } else {
- value = 0; /* no fragment */
- mask = 0x3;
- }
+ u8 is_frag_key = !!(mt.key->flags & FLOW_DIS_IS_FRAGMENT);
+ u8 is_frag_mask = !!(mt.mask->flags & FLOW_DIS_IS_FRAGMENT);
+ u8 is_frag_idx = (is_frag_key << 1) | is_frag_mask;
+
+ u8 first_frag_key = !!(mt.key->flags & FLOW_DIS_FIRST_FRAG);
+ u8 first_frag_mask = !!(mt.mask->flags & FLOW_DIS_FIRST_FRAG);
+ u8 first_frag_idx = (first_frag_key << 1) | first_frag_mask;
+
+ /* Lookup verdict based on the 2 + 2 input bits */
+ u8 vdt = sparx5_vcap_frag_map[is_frag_idx][first_frag_idx];
+
+ if (vdt == FRAG_INVAL) {
+ NL_SET_ERR_MSG_MOD(st->fco->common.extack,
+ "Match on invalid fragment flag combination");
+ return -EINVAL;
}
+ /* Extract VCAP fragment key and mask from verdict */
+ value = (vdt >> 4) & 0x3;
+ mask = vdt & 0x3;
+
err = vcap_rule_add_key_u32(st->vrule,
VCAP_KF_L3_FRAGMENT_TYPE,
value, mask);
diff --git a/drivers/net/ethernet/microsoft/mana/hw_channel.c b/drivers/net/ethernet/microsoft/mana/hw_channel.c
index 2729a2c5acf9cc..11021c34e47e7c 100644
--- a/drivers/net/ethernet/microsoft/mana/hw_channel.c
+++ b/drivers/net/ethernet/microsoft/mana/hw_channel.c
@@ -3,6 +3,7 @@
#include <net/mana/gdma.h>
#include <net/mana/hw_channel.h>
+#include <linux/vmalloc.h>
static int mana_hwc_get_msg_index(struct hw_channel_context *hwc, u16 *msg_id)
{
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index 59287c6e6cee6f..d8af5e7e15b4d8 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -601,7 +601,7 @@ static void mana_get_rxbuf_cfg(int mtu, u32 *datasize, u32 *alloc_size,
*alloc_size = mtu + MANA_RXBUF_PAD + *headroom;
- *datasize = ALIGN(mtu + ETH_HLEN, MANA_RX_DATA_ALIGN);
+ *datasize = mtu + ETH_HLEN;
}
static int mana_pre_alloc_rxbufs(struct mana_port_context *mpc, int new_mtu)
diff --git a/drivers/net/ethernet/realtek/r8169.h b/drivers/net/ethernet/realtek/r8169.h
index 4c043052198d47..00882ffc7a029e 100644
--- a/drivers/net/ethernet/realtek/r8169.h
+++ b/drivers/net/ethernet/realtek/r8169.h
@@ -73,6 +73,7 @@ enum mac_version {
};
struct rtl8169_private;
+struct r8169_led_classdev;
void r8169_apply_firmware(struct rtl8169_private *tp);
u16 rtl8168h_2_get_adc_bias_ioffset(struct rtl8169_private *tp);
@@ -84,7 +85,8 @@ void r8169_get_led_name(struct rtl8169_private *tp, int idx,
char *buf, int buf_len);
int rtl8168_get_led_mode(struct rtl8169_private *tp);
int rtl8168_led_mod_ctrl(struct rtl8169_private *tp, u16 mask, u16 val);
-void rtl8168_init_leds(struct net_device *ndev);
+struct r8169_led_classdev *rtl8168_init_leds(struct net_device *ndev);
int rtl8125_get_led_mode(struct rtl8169_private *tp, int index);
int rtl8125_set_led_mode(struct rtl8169_private *tp, int index, u16 mode);
-void rtl8125_init_leds(struct net_device *ndev);
+struct r8169_led_classdev *rtl8125_init_leds(struct net_device *ndev);
+void r8169_remove_leds(struct r8169_led_classdev *leds);
diff --git a/drivers/net/ethernet/realtek/r8169_leds.c b/drivers/net/ethernet/realtek/r8169_leds.c
index 7c5dc9d0df855e..e10bee706bc691 100644
--- a/drivers/net/ethernet/realtek/r8169_leds.c
+++ b/drivers/net/ethernet/realtek/r8169_leds.c
@@ -146,22 +146,22 @@ static void rtl8168_setup_ldev(struct r8169_led_classdev *ldev,
led_cdev->hw_control_get_device = r8169_led_hw_control_get_device;
/* ignore errors */
- devm_led_classdev_register(&ndev->dev, led_cdev);
+ led_classdev_register(&ndev->dev, led_cdev);
}
-void rtl8168_init_leds(struct net_device *ndev)
+struct r8169_led_classdev *rtl8168_init_leds(struct net_device *ndev)
{
- /* bind resource mgmt to netdev */
- struct device *dev = &ndev->dev;
struct r8169_led_classdev *leds;
int i;
- leds = devm_kcalloc(dev, RTL8168_NUM_LEDS, sizeof(*leds), GFP_KERNEL);
+ leds = kcalloc(RTL8168_NUM_LEDS + 1, sizeof(*leds), GFP_KERNEL);
if (!leds)
- return;
+ return NULL;
for (i = 0; i < RTL8168_NUM_LEDS; i++)
rtl8168_setup_ldev(leds + i, ndev, i);
+
+ return leds;
}
static int rtl8125_led_hw_control_is_supported(struct led_classdev *led_cdev,
@@ -245,20 +245,31 @@ static void rtl8125_setup_led_ldev(struct r8169_led_classdev *ldev,
led_cdev->hw_control_get_device = r8169_led_hw_control_get_device;
/* ignore errors */
- devm_led_classdev_register(&ndev->dev, led_cdev);
+ led_classdev_register(&ndev->dev, led_cdev);
}
-void rtl8125_init_leds(struct net_device *ndev)
+struct r8169_led_classdev *rtl8125_init_leds(struct net_device *ndev)
{
- /* bind resource mgmt to netdev */
- struct device *dev = &ndev->dev;
struct r8169_led_classdev *leds;
int i;
- leds = devm_kcalloc(dev, RTL8125_NUM_LEDS, sizeof(*leds), GFP_KERNEL);
+ leds = kcalloc(RTL8125_NUM_LEDS + 1, sizeof(*leds), GFP_KERNEL);
if (!leds)
- return;
+ return NULL;
for (i = 0; i < RTL8125_NUM_LEDS; i++)
rtl8125_setup_led_ldev(leds + i, ndev, i);
+
+ return leds;
+}
+
+void r8169_remove_leds(struct r8169_led_classdev *leds)
+{
+ if (!leds)
+ return;
+
+ for (struct r8169_led_classdev *l = leds; l->ndev; l++)
+ led_classdev_unregister(&l->led);
+
+ kfree(leds);
}
diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 5c879a5c86d70b..0fc5fe564ae50b 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -647,6 +647,8 @@ struct rtl8169_private {
const char *fw_name;
struct rtl_fw *rtl_fw;
+ struct r8169_led_classdev *leds;
+
u32 ocp_base;
};
@@ -1314,17 +1316,40 @@ static void rtl8168ep_stop_cmac(struct rtl8169_private *tp)
RTL_W8(tp, IBCR0, RTL_R8(tp, IBCR0) & ~0x01);
}
+static void rtl_dash_loop_wait(struct rtl8169_private *tp,
+ const struct rtl_cond *c,
+ unsigned long usecs, int n, bool high)
+{
+ if (!tp->dash_enabled)
+ return;
+ rtl_loop_wait(tp, c, usecs, n, high);
+}
+
+static void rtl_dash_loop_wait_high(struct rtl8169_private *tp,
+ const struct rtl_cond *c,
+ unsigned long d, int n)
+{
+ rtl_dash_loop_wait(tp, c, d, n, true);
+}
+
+static void rtl_dash_loop_wait_low(struct rtl8169_private *tp,
+ const struct rtl_cond *c,
+ unsigned long d, int n)
+{
+ rtl_dash_loop_wait(tp, c, d, n, false);
+}
+
static void rtl8168dp_driver_start(struct rtl8169_private *tp)
{
r8168dp_oob_notify(tp, OOB_CMD_DRIVER_START);
- rtl_loop_wait_high(tp, &rtl_dp_ocp_read_cond, 10000, 10);
+ rtl_dash_loop_wait_high(tp, &rtl_dp_ocp_read_cond, 10000, 10);
}
static void rtl8168ep_driver_start(struct rtl8169_private *tp)
{
r8168ep_ocp_write(tp, 0x01, 0x180, OOB_CMD_DRIVER_START);
r8168ep_ocp_write(tp, 0x01, 0x30, r8168ep_ocp_read(tp, 0x30) | 0x01);
- rtl_loop_wait_high(tp, &rtl_ep_ocp_read_cond, 10000, 30);
+ rtl_dash_loop_wait_high(tp, &rtl_ep_ocp_read_cond, 10000, 30);
}
static void rtl8168_driver_start(struct rtl8169_private *tp)
@@ -1338,7 +1363,7 @@ static void rtl8168_driver_start(struct rtl8169_private *tp)
static void rtl8168dp_driver_stop(struct rtl8169_private *tp)
{
r8168dp_oob_notify(tp, OOB_CMD_DRIVER_STOP);
- rtl_loop_wait_low(tp, &rtl_dp_ocp_read_cond, 10000, 10);
+ rtl_dash_loop_wait_low(tp, &rtl_dp_ocp_read_cond, 10000, 10);
}
static void rtl8168ep_driver_stop(struct rtl8169_private *tp)
@@ -1346,7 +1371,7 @@ static void rtl8168ep_driver_stop(struct rtl8169_private *tp)
rtl8168ep_stop_cmac(tp);
r8168ep_ocp_write(tp, 0x01, 0x180, OOB_CMD_DRIVER_STOP);
r8168ep_ocp_write(tp, 0x01, 0x30, r8168ep_ocp_read(tp, 0x30) | 0x01);
- rtl_loop_wait_low(tp, &rtl_ep_ocp_read_cond, 10000, 10);
+ rtl_dash_loop_wait_low(tp, &rtl_ep_ocp_read_cond, 10000, 10);
}
static void rtl8168_driver_stop(struct rtl8169_private *tp)
@@ -5021,6 +5046,9 @@ static void rtl_remove_one(struct pci_dev *pdev)
cancel_work_sync(&tp->wk.work);
+ if (IS_ENABLED(CONFIG_R8169_LEDS))
+ r8169_remove_leds(tp->leds);
+
unregister_netdev(tp->dev);
if (tp->dash_type != RTL_DASH_NONE)
@@ -5141,6 +5169,15 @@ static int r8169_mdio_register(struct rtl8169_private *tp)
struct mii_bus *new_bus;
int ret;
+ /* On some boards with this chip version the BIOS is buggy and misses
+ * to reset the PHY page selector. This results in the PHY ID read
+ * accessing registers on a different page, returning a more or
+ * less random value. Fix this by resetting the page selector first.
+ */
+ if (tp->mac_version == RTL_GIGA_MAC_VER_25 ||
+ tp->mac_version == RTL_GIGA_MAC_VER_26)
+ r8169_mdio_write(tp, 0x1f, 0);
+
new_bus = devm_mdiobus_alloc(&pdev->dev);
if (!new_bus)
return -ENOMEM;
@@ -5469,9 +5506,9 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
if (IS_ENABLED(CONFIG_R8169_LEDS)) {
if (rtl_is_8125(tp))
- rtl8125_init_leds(dev);
+ tp->leds = rtl8125_init_leds(dev);
else if (tp->mac_version > RTL_GIGA_MAC_VER_06)
- rtl8168_init_leds(dev);
+ tp->leds = rtl8168_init_leds(dev);
}
netdev_info(dev, "%s, %pM, XID %03x, IRQ %d\n",
diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index d1be030c88483a..9b1f639f64a10c 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -769,25 +769,28 @@ static bool ravb_rx_gbeth(struct net_device *ndev, int *quota, int q)
dma_addr_t dma_addr;
int rx_packets = 0;
u8 desc_status;
- u16 pkt_len;
+ u16 desc_len;
u8 die_dt;
int entry;
int limit;
int i;
- entry = priv->cur_rx[q] % priv->num_rx_ring[q];
limit = priv->dirty_rx[q] + priv->num_rx_ring[q] - priv->cur_rx[q];
stats = &priv->stats[q];
- desc = &priv->rx_ring[q].desc[entry];
- for (i = 0; i < limit && rx_packets < *quota && desc->die_dt != DT_FEMPTY; i++) {
+ for (i = 0; i < limit; i++, priv->cur_rx[q]++) {
+ entry = priv->cur_rx[q] % priv->num_rx_ring[q];
+ desc = &priv->rx_ring[q].desc[entry];
+ if (rx_packets == *quota || desc->die_dt == DT_FEMPTY)
+ break;
+
/* Descriptor type must be checked before all other reads */
dma_rmb();
desc_status = desc->msc;
- pkt_len = le16_to_cpu(desc->ds_cc) & RX_DS;
+ desc_len = le16_to_cpu(desc->ds_cc) & RX_DS;
/* We use 0-byte descriptors to mark the DMA mapping errors */
- if (!pkt_len)
+ if (!desc_len)
continue;
if (desc_status & MSC_MC)
@@ -808,25 +811,25 @@ static bool ravb_rx_gbeth(struct net_device *ndev, int *quota, int q)
switch (die_dt) {
case DT_FSINGLE:
skb = ravb_get_skb_gbeth(ndev, entry, desc);
- skb_put(skb, pkt_len);
+ skb_put(skb, desc_len);
skb->protocol = eth_type_trans(skb, ndev);
if (ndev->features & NETIF_F_RXCSUM)
ravb_rx_csum_gbeth(skb);
napi_gro_receive(&priv->napi[q], skb);
rx_packets++;
- stats->rx_bytes += pkt_len;
+ stats->rx_bytes += desc_len;
break;
case DT_FSTART:
priv->rx_1st_skb = ravb_get_skb_gbeth(ndev, entry, desc);
- skb_put(priv->rx_1st_skb, pkt_len);
+ skb_put(priv->rx_1st_skb, desc_len);
break;
case DT_FMID:
skb = ravb_get_skb_gbeth(ndev, entry, desc);
skb_copy_to_linear_data_offset(priv->rx_1st_skb,
priv->rx_1st_skb->len,
skb->data,
- pkt_len);
- skb_put(priv->rx_1st_skb, pkt_len);
+ desc_len);
+ skb_put(priv->rx_1st_skb, desc_len);
dev_kfree_skb(skb);
break;
case DT_FEND:
@@ -834,23 +837,20 @@ static bool ravb_rx_gbeth(struct net_device *ndev, int *quota, int q)
skb_copy_to_linear_data_offset(priv->rx_1st_skb,
priv->rx_1st_skb->len,
skb->data,
- pkt_len);
- skb_put(priv->rx_1st_skb, pkt_len);
+ desc_len);
+ skb_put(priv->rx_1st_skb, desc_len);
dev_kfree_skb(skb);
priv->rx_1st_skb->protocol =
eth_type_trans(priv->rx_1st_skb, ndev);
if (ndev->features & NETIF_F_RXCSUM)
- ravb_rx_csum_gbeth(skb);
+ ravb_rx_csum_gbeth(priv->rx_1st_skb);
+ stats->rx_bytes += priv->rx_1st_skb->len;
napi_gro_receive(&priv->napi[q],
priv->rx_1st_skb);
rx_packets++;
- stats->rx_bytes += pkt_len;
break;
}
}
-
- entry = (++priv->cur_rx[q]) % priv->num_rx_ring[q];
- desc = &priv->rx_ring[q].desc[entry];
}
/* Refill the RX ring buffers. */
@@ -891,30 +891,29 @@ static bool ravb_rx_rcar(struct net_device *ndev, int *quota, int q)
{
struct ravb_private *priv = netdev_priv(ndev);
const struct ravb_hw_info *info = priv->info;
- int entry = priv->cur_rx[q] % priv->num_rx_ring[q];
- int boguscnt = (priv->dirty_rx[q] + priv->num_rx_ring[q]) -
- priv->cur_rx[q];
struct net_device_stats *stats = &priv->stats[q];
struct ravb_ex_rx_desc *desc;
+ unsigned int limit, i;
struct sk_buff *skb;
dma_addr_t dma_addr;
struct timespec64 ts;
+ int rx_packets = 0;
u8 desc_status;
u16 pkt_len;
- int limit;
+ int entry;
+
+ limit = priv->dirty_rx[q] + priv->num_rx_ring[q] - priv->cur_rx[q];
+ for (i = 0; i < limit; i++, priv->cur_rx[q]++) {
+ entry = priv->cur_rx[q] % priv->num_rx_ring[q];
+ desc = &priv->rx_ring[q].ex_desc[entry];
+ if (rx_packets == *quota || desc->die_dt == DT_FEMPTY)
+ break;
- boguscnt = min(boguscnt, *quota);
- limit = boguscnt;
- desc = &priv->rx_ring[q].ex_desc[entry];
- while (desc->die_dt != DT_FEMPTY) {
/* Descriptor type must be checked before all other reads */
dma_rmb();
desc_status = desc->msc;
pkt_len = le16_to_cpu(desc->ds_cc) & RX_DS;
- if (--boguscnt < 0)
- break;
-
/* We use 0-byte descriptors to mark the DMA mapping errors */
if (!pkt_len)
continue;
@@ -960,12 +959,9 @@ static bool ravb_rx_rcar(struct net_device *ndev, int *quota, int q)
if (ndev->features & NETIF_F_RXCSUM)
ravb_rx_csum(skb);
napi_gro_receive(&priv->napi[q], skb);
- stats->rx_packets++;
+ rx_packets++;
stats->rx_bytes += pkt_len;
}
-
- entry = (++priv->cur_rx[q]) % priv->num_rx_ring[q];
- desc = &priv->rx_ring[q].ex_desc[entry];
}
/* Refill the RX ring buffers. */
@@ -995,9 +991,9 @@ static bool ravb_rx_rcar(struct net_device *ndev, int *quota, int q)
desc->die_dt = DT_FEMPTY;
}
- *quota -= limit - (++boguscnt);
-
- return boguscnt <= 0;
+ stats->rx_packets += rx_packets;
+ *quota -= rx_packets;
+ return *quota == 0;
}
/* Packet receive function for Ethernet AVB */
@@ -1324,12 +1320,12 @@ static int ravb_poll(struct napi_struct *napi, int budget)
int q = napi - priv->napi;
int mask = BIT(q);
int quota = budget;
+ bool unmask;
/* Processing RX Descriptor Ring */
/* Clear RX interrupt */
ravb_write(ndev, ~(mask | RIS0_RESERVED), RIS0);
- if (ravb_rx(ndev, &quota, q))
- goto out;
+ unmask = !ravb_rx(ndev, &quota, q);
/* Processing TX Descriptor Ring */
spin_lock_irqsave(&priv->lock, flags);
@@ -1339,6 +1335,18 @@ static int ravb_poll(struct napi_struct *napi, int budget)
netif_wake_subqueue(ndev, q);
spin_unlock_irqrestore(&priv->lock, flags);
+ /* Receive error message handling */
+ priv->rx_over_errors = priv->stats[RAVB_BE].rx_over_errors;
+ if (info->nc_queues)
+ priv->rx_over_errors += priv->stats[RAVB_NC].rx_over_errors;
+ if (priv->rx_over_errors != ndev->stats.rx_over_errors)
+ ndev->stats.rx_over_errors = priv->rx_over_errors;
+ if (priv->rx_fifo_errors != ndev->stats.rx_fifo_errors)
+ ndev->stats.rx_fifo_errors = priv->rx_fifo_errors;
+
+ if (!unmask)
+ goto out;
+
napi_complete(napi);
/* Re-enable RX/TX interrupts */
@@ -1352,14 +1360,6 @@ static int ravb_poll(struct napi_struct *napi, int budget)
}
spin_unlock_irqrestore(&priv->lock, flags);
- /* Receive error message handling */
- priv->rx_over_errors = priv->stats[RAVB_BE].rx_over_errors;
- if (info->nc_queues)
- priv->rx_over_errors += priv->stats[RAVB_NC].rx_over_errors;
- if (priv->rx_over_errors != ndev->stats.rx_over_errors)
- ndev->stats.rx_over_errors = priv->rx_over_errors;
- if (priv->rx_fifo_errors != ndev->stats.rx_fifo_errors)
- ndev->stats.rx_fifo_errors = priv->rx_fifo_errors;
out:
return budget - quota;
}
@@ -2722,19 +2722,18 @@ static int ravb_setup_irq(struct ravb_private *priv, const char *irq_name,
struct platform_device *pdev = priv->pdev;
struct net_device *ndev = priv->ndev;
struct device *dev = &pdev->dev;
- const char *dev_name;
+ const char *devname = dev_name(dev);
unsigned long flags;
int error, irq_num;
if (irq_name) {
- dev_name = devm_kasprintf(dev, GFP_KERNEL, "%s:%s", ndev->name, ch);
- if (!dev_name)
+ devname = devm_kasprintf(dev, GFP_KERNEL, "%s:%s", devname, ch);
+ if (!devname)
return -ENOMEM;
irq_num = platform_get_irq_byname(pdev, irq_name);
flags = 0;
} else {
- dev_name = ndev->name;
irq_num = platform_get_irq(pdev, 0);
flags = IRQF_SHARED;
}
@@ -2744,9 +2743,9 @@ static int ravb_setup_irq(struct ravb_private *priv, const char *irq_name,
if (irq)
*irq = irq_num;
- error = devm_request_irq(dev, irq_num, handler, flags, dev_name, ndev);
+ error = devm_request_irq(dev, irq_num, handler, flags, devname, ndev);
if (error)
- netdev_err(ndev, "cannot request IRQ %s\n", dev_name);
+ netdev_err(ndev, "cannot request IRQ %s\n", devname);
return error;
}
diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index 475e1e8c1d35f3..0786eb0da39143 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -50,7 +50,7 @@
* the macros available to do this only define GCC 8.
*/
__diag_push();
-__diag_ignore(GCC, 8, "-Woverride-init",
+__diag_ignore_all("-Woverride-init",
"logic to initialize all and then override some is OK");
static const u16 sh_eth_offset_gigabit[SH_ETH_MAX_REGISTER_OFFSET] = {
SH_ETH_OFFSET_DEFAULTS,
diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index a6fefe675ef152..3b7d4ac1e7be07 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -553,6 +553,7 @@ extern const struct stmmac_hwtimestamp stmmac_ptp;
extern const struct stmmac_mode_ops dwmac4_ring_mode_ops;
struct mac_link {
+ u32 caps;
u32 speed_mask;
u32 speed10;
u32 speed100;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
index b21d99faa2d04c..e1b761dcfa1dd5 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
@@ -1096,6 +1096,8 @@ static struct mac_device_info *sun8i_dwmac_setup(void *ppriv)
priv->dev->priv_flags |= IFF_UNICAST_FLT;
+ mac->link.caps = MAC_ASYM_PAUSE | MAC_SYM_PAUSE |
+ MAC_10 | MAC_100 | MAC_1000;
/* The loopback bit seems to be re-set when link change
* Simply mask it each time
* Speed 10/100/1000 are set in BIT(2)/BIT(3)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
index 3927609abc4411..8555299443f4ed 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
@@ -539,6 +539,8 @@ int dwmac1000_setup(struct stmmac_priv *priv)
if (mac->multicast_filter_bins)
mac->mcast_bits_log2 = ilog2(mac->multicast_filter_bins);
+ mac->link.caps = MAC_ASYM_PAUSE | MAC_SYM_PAUSE |
+ MAC_10 | MAC_100 | MAC_1000;
mac->link.duplex = GMAC_CONTROL_DM;
mac->link.speed10 = GMAC_CONTROL_PS;
mac->link.speed100 = GMAC_CONTROL_PS | GMAC_CONTROL_FES;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c
index a6e8d7bd95886f..7667d103cd0ebd 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c
@@ -175,6 +175,8 @@ int dwmac100_setup(struct stmmac_priv *priv)
dev_info(priv->device, "\tDWMAC100\n");
mac->pcsr = priv->ioaddr;
+ mac->link.caps = MAC_ASYM_PAUSE | MAC_SYM_PAUSE |
+ MAC_10 | MAC_100;
mac->link.duplex = MAC_CONTROL_F;
mac->link.speed10 = 0;
mac->link.speed100 = 0;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
index 6b6d0de0961975..a38226d7cc6a99 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
@@ -70,7 +70,10 @@ static void dwmac4_core_init(struct mac_device_info *hw,
static void dwmac4_phylink_get_caps(struct stmmac_priv *priv)
{
- priv->phylink_config.mac_capabilities |= MAC_2500FD;
+ if (priv->plat->tx_queues_to_use > 1)
+ priv->hw->link.caps &= ~(MAC_10HD | MAC_100HD | MAC_1000HD);
+ else
+ priv->hw->link.caps |= (MAC_10HD | MAC_100HD | MAC_1000HD);
}
static void dwmac4_rx_queue_enable(struct mac_device_info *hw,
@@ -92,19 +95,41 @@ static void dwmac4_rx_queue_priority(struct mac_device_info *hw,
u32 prio, u32 queue)
{
void __iomem *ioaddr = hw->pcsr;
- u32 base_register;
- u32 value;
+ u32 clear_mask = 0;
+ u32 ctrl2, ctrl3;
+ int i;
- base_register = (queue < 4) ? GMAC_RXQ_CTRL2 : GMAC_RXQ_CTRL3;
- if (queue >= 4)
- queue -= 4;
+ ctrl2 = readl(ioaddr + GMAC_RXQ_CTRL2);
+ ctrl3 = readl(ioaddr + GMAC_RXQ_CTRL3);
- value = readl(ioaddr + base_register);
+ /* The software must ensure that the same priority
+ * is not mapped to multiple Rx queues
+ */
+ for (i = 0; i < 4; i++)
+ clear_mask |= ((prio << GMAC_RXQCTRL_PSRQX_SHIFT(i)) &
+ GMAC_RXQCTRL_PSRQX_MASK(i));
- value &= ~GMAC_RXQCTRL_PSRQX_MASK(queue);
- value |= (prio << GMAC_RXQCTRL_PSRQX_SHIFT(queue)) &
+ ctrl2 &= ~clear_mask;
+ ctrl3 &= ~clear_mask;
+
+ /* First assign new priorities to a queue, then
+ * clear them from others queues
+ */
+ if (queue < 4) {
+ ctrl2 |= (prio << GMAC_RXQCTRL_PSRQX_SHIFT(queue)) &
GMAC_RXQCTRL_PSRQX_MASK(queue);
- writel(value, ioaddr + base_register);
+
+ writel(ctrl2, ioaddr + GMAC_RXQ_CTRL2);
+ writel(ctrl3, ioaddr + GMAC_RXQ_CTRL3);
+ } else {
+ queue -= 4;
+
+ ctrl3 |= (prio << GMAC_RXQCTRL_PSRQX_SHIFT(queue)) &
+ GMAC_RXQCTRL_PSRQX_MASK(queue);
+
+ writel(ctrl3, ioaddr + GMAC_RXQ_CTRL3);
+ writel(ctrl2, ioaddr + GMAC_RXQ_CTRL2);
+ }
}
static void dwmac4_tx_queue_priority(struct mac_device_info *hw,
@@ -1356,6 +1381,8 @@ int dwmac4_setup(struct stmmac_priv *priv)
if (mac->multicast_filter_bins)
mac->mcast_bits_log2 = ilog2(mac->multicast_filter_bins);
+ mac->link.caps = MAC_ASYM_PAUSE | MAC_SYM_PAUSE |
+ MAC_10 | MAC_100 | MAC_1000 | MAC_2500FD;
mac->link.duplex = GMAC_CONFIG_DM;
mac->link.speed10 = GMAC_CONFIG_PS;
mac->link.speed100 = GMAC_CONFIG_FES | GMAC_CONFIG_PS;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
index 1af2f89a0504ab..f8e7775bb63364 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
@@ -47,14 +47,6 @@ static void dwxgmac2_core_init(struct mac_device_info *hw,
writel(XGMAC_INT_DEFAULT_EN, ioaddr + XGMAC_INT_EN);
}
-static void xgmac_phylink_get_caps(struct stmmac_priv *priv)
-{
- priv->phylink_config.mac_capabilities |= MAC_2500FD | MAC_5000FD |
- MAC_10000FD | MAC_25000FD |
- MAC_40000FD | MAC_50000FD |
- MAC_100000FD;
-}
-
static void dwxgmac2_set_mac(void __iomem *ioaddr, bool enable)
{
u32 tx = readl(ioaddr + XGMAC_TX_CONFIG);
@@ -105,17 +97,41 @@ static void dwxgmac2_rx_queue_prio(struct mac_device_info *hw, u32 prio,
u32 queue)
{
void __iomem *ioaddr = hw->pcsr;
- u32 value, reg;
+ u32 clear_mask = 0;
+ u32 ctrl2, ctrl3;
+ int i;
- reg = (queue < 4) ? XGMAC_RXQ_CTRL2 : XGMAC_RXQ_CTRL3;
- if (queue >= 4)
+ ctrl2 = readl(ioaddr + XGMAC_RXQ_CTRL2);
+ ctrl3 = readl(ioaddr + XGMAC_RXQ_CTRL3);
+
+ /* The software must ensure that the same priority
+ * is not mapped to multiple Rx queues
+ */
+ for (i = 0; i < 4; i++)
+ clear_mask |= ((prio << XGMAC_PSRQ_SHIFT(i)) &
+ XGMAC_PSRQ(i));
+
+ ctrl2 &= ~clear_mask;
+ ctrl3 &= ~clear_mask;
+
+ /* First assign new priorities to a queue, then
+ * clear them from others queues
+ */
+ if (queue < 4) {
+ ctrl2 |= (prio << XGMAC_PSRQ_SHIFT(queue)) &
+ XGMAC_PSRQ(queue);
+
+ writel(ctrl2, ioaddr + XGMAC_RXQ_CTRL2);
+ writel(ctrl3, ioaddr + XGMAC_RXQ_CTRL3);
+ } else {
queue -= 4;
- value = readl(ioaddr + reg);
- value &= ~XGMAC_PSRQ(queue);
- value |= (prio << XGMAC_PSRQ_SHIFT(queue)) & XGMAC_PSRQ(queue);
+ ctrl3 |= (prio << XGMAC_PSRQ_SHIFT(queue)) &
+ XGMAC_PSRQ(queue);
- writel(value, ioaddr + reg);
+ writel(ctrl3, ioaddr + XGMAC_RXQ_CTRL3);
+ writel(ctrl2, ioaddr + XGMAC_RXQ_CTRL2);
+ }
}
static void dwxgmac2_tx_queue_prio(struct mac_device_info *hw, u32 prio,
@@ -1516,7 +1532,6 @@ static void dwxgmac3_fpe_configure(void __iomem *ioaddr, struct stmmac_fpe_cfg *
const struct stmmac_ops dwxgmac210_ops = {
.core_init = dwxgmac2_core_init,
- .phylink_get_caps = xgmac_phylink_get_caps,
.set_mac = dwxgmac2_set_mac,
.rx_ipc = dwxgmac2_rx_ipc,
.rx_queue_enable = dwxgmac2_rx_queue_enable,
@@ -1577,7 +1592,6 @@ static void dwxlgmac2_rx_queue_enable(struct mac_device_info *hw, u8 mode,
const struct stmmac_ops dwxlgmac2_ops = {
.core_init = dwxgmac2_core_init,
- .phylink_get_caps = xgmac_phylink_get_caps,
.set_mac = dwxgmac2_set_mac,
.rx_ipc = dwxgmac2_rx_ipc,
.rx_queue_enable = dwxlgmac2_rx_queue_enable,
@@ -1637,6 +1651,9 @@ int dwxgmac2_setup(struct stmmac_priv *priv)
if (mac->multicast_filter_bins)
mac->mcast_bits_log2 = ilog2(mac->multicast_filter_bins);
+ mac->link.caps = MAC_ASYM_PAUSE | MAC_SYM_PAUSE |
+ MAC_1000FD | MAC_2500FD | MAC_5000FD |
+ MAC_10000FD;
mac->link.duplex = 0;
mac->link.speed10 = XGMAC_CONFIG_SS_10_MII;
mac->link.speed100 = XGMAC_CONFIG_SS_100_MII;
@@ -1674,6 +1691,11 @@ int dwxlgmac2_setup(struct stmmac_priv *priv)
if (mac->multicast_filter_bins)
mac->mcast_bits_log2 = ilog2(mac->multicast_filter_bins);
+ mac->link.caps = MAC_ASYM_PAUSE | MAC_SYM_PAUSE |
+ MAC_1000FD | MAC_2500FD | MAC_5000FD |
+ MAC_10000FD | MAC_25000FD |
+ MAC_40000FD | MAC_50000FD |
+ MAC_100000FD;
mac->link.duplex = 0;
mac->link.speed1000 = XLGMAC_CONFIG_SS_1000;
mac->link.speed2500 = XLGMAC_CONFIG_SS_2500;
diff --git a/drivers/net/ethernet/stmicro/stmmac/mmc.h b/drivers/net/ethernet/stmicro/stmmac/mmc.h
index dff02d75d51971..5d1ea3e07459a3 100644
--- a/drivers/net/ethernet/stmicro/stmmac/mmc.h
+++ b/drivers/net/ethernet/stmicro/stmmac/mmc.h
@@ -52,6 +52,7 @@ struct stmmac_counters {
unsigned int mmc_tx_excessdef;
unsigned int mmc_tx_pause_frame;
unsigned int mmc_tx_vlan_frame_g;
+ unsigned int mmc_tx_oversize_g;
unsigned int mmc_tx_lpi_usec;
unsigned int mmc_tx_lpi_tran;
@@ -80,6 +81,7 @@ struct stmmac_counters {
unsigned int mmc_rx_fifo_overflow;
unsigned int mmc_rx_vlan_frames_gb;
unsigned int mmc_rx_watchdog_error;
+ unsigned int mmc_rx_error;
unsigned int mmc_rx_lpi_usec;
unsigned int mmc_rx_lpi_tran;
unsigned int mmc_rx_discard_frames_gb;
diff --git a/drivers/net/ethernet/stmicro/stmmac/mmc_core.c b/drivers/net/ethernet/stmicro/stmmac/mmc_core.c
index 7eb477faa75a38..0fab842902a850 100644
--- a/drivers/net/ethernet/stmicro/stmmac/mmc_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/mmc_core.c
@@ -53,6 +53,7 @@
#define MMC_TX_EXCESSDEF 0x6c
#define MMC_TX_PAUSE_FRAME 0x70
#define MMC_TX_VLAN_FRAME_G 0x74
+#define MMC_TX_OVERSIZE_G 0x78
/* MMC RX counter registers */
#define MMC_RX_FRAMECOUNT_GB 0x80
@@ -79,6 +80,13 @@
#define MMC_RX_FIFO_OVERFLOW 0xd4
#define MMC_RX_VLAN_FRAMES_GB 0xd8
#define MMC_RX_WATCHDOG_ERROR 0xdc
+#define MMC_RX_ERROR 0xe0
+
+#define MMC_TX_LPI_USEC 0xec
+#define MMC_TX_LPI_TRAN 0xf0
+#define MMC_RX_LPI_USEC 0xf4
+#define MMC_RX_LPI_TRAN 0xf8
+
/* IPC*/
#define MMC_RX_IPC_INTR_MASK 0x100
#define MMC_RX_IPC_INTR 0x108
@@ -283,6 +291,9 @@ static void dwmac_mmc_read(void __iomem *mmcaddr, struct stmmac_counters *mmc)
mmc->mmc_tx_excessdef += readl(mmcaddr + MMC_TX_EXCESSDEF);
mmc->mmc_tx_pause_frame += readl(mmcaddr + MMC_TX_PAUSE_FRAME);
mmc->mmc_tx_vlan_frame_g += readl(mmcaddr + MMC_TX_VLAN_FRAME_G);
+ mmc->mmc_tx_oversize_g += readl(mmcaddr + MMC_TX_OVERSIZE_G);
+ mmc->mmc_tx_lpi_usec += readl(mmcaddr + MMC_TX_LPI_USEC);
+ mmc->mmc_tx_lpi_tran += readl(mmcaddr + MMC_TX_LPI_TRAN);
/* MMC RX counter registers */
mmc->mmc_rx_framecount_gb += readl(mmcaddr + MMC_RX_FRAMECOUNT_GB);
@@ -316,6 +327,10 @@ static void dwmac_mmc_read(void __iomem *mmcaddr, struct stmmac_counters *mmc)
mmc->mmc_rx_fifo_overflow += readl(mmcaddr + MMC_RX_FIFO_OVERFLOW);
mmc->mmc_rx_vlan_frames_gb += readl(mmcaddr + MMC_RX_VLAN_FRAMES_GB);
mmc->mmc_rx_watchdog_error += readl(mmcaddr + MMC_RX_WATCHDOG_ERROR);
+ mmc->mmc_rx_error += readl(mmcaddr + MMC_RX_ERROR);
+ mmc->mmc_rx_lpi_usec += readl(mmcaddr + MMC_RX_LPI_USEC);
+ mmc->mmc_rx_lpi_tran += readl(mmcaddr + MMC_RX_LPI_TRAN);
+
/* IPv4 */
mmc->mmc_rx_ipv4_gd += readl(mmcaddr + MMC_RX_IPV4_GD);
mmc->mmc_rx_ipv4_hderr += readl(mmcaddr + MMC_RX_IPV4_HDERR);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
index e1537a57815f38..542e2633a6f522 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
@@ -212,6 +212,7 @@ static const struct stmmac_stats stmmac_mmc[] = {
STMMAC_MMC_STAT(mmc_tx_excessdef),
STMMAC_MMC_STAT(mmc_tx_pause_frame),
STMMAC_MMC_STAT(mmc_tx_vlan_frame_g),
+ STMMAC_MMC_STAT(mmc_tx_oversize_g),
STMMAC_MMC_STAT(mmc_tx_lpi_usec),
STMMAC_MMC_STAT(mmc_tx_lpi_tran),
STMMAC_MMC_STAT(mmc_rx_framecount_gb),
@@ -238,6 +239,7 @@ static const struct stmmac_stats stmmac_mmc[] = {
STMMAC_MMC_STAT(mmc_rx_fifo_overflow),
STMMAC_MMC_STAT(mmc_rx_vlan_frames_gb),
STMMAC_MMC_STAT(mmc_rx_watchdog_error),
+ STMMAC_MMC_STAT(mmc_rx_error),
STMMAC_MMC_STAT(mmc_rx_lpi_usec),
STMMAC_MMC_STAT(mmc_rx_lpi_tran),
STMMAC_MMC_STAT(mmc_rx_discard_frames_gb),
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 24cd80490d19cf..7c6fb14b555508 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -1198,17 +1198,6 @@ static int stmmac_init_phy(struct net_device *dev)
return ret;
}
-static void stmmac_set_half_duplex(struct stmmac_priv *priv)
-{
- /* Half-Duplex can only work with single tx queue */
- if (priv->plat->tx_queues_to_use > 1)
- priv->phylink_config.mac_capabilities &=
- ~(MAC_10HD | MAC_100HD | MAC_1000HD);
- else
- priv->phylink_config.mac_capabilities |=
- (MAC_10HD | MAC_100HD | MAC_1000HD);
-}
-
static int stmmac_phy_setup(struct stmmac_priv *priv)
{
struct stmmac_mdio_bus_data *mdio_bus_data;
@@ -1236,15 +1225,11 @@ static int stmmac_phy_setup(struct stmmac_priv *priv)
xpcs_get_interfaces(priv->hw->xpcs,
priv->phylink_config.supported_interfaces);
- priv->phylink_config.mac_capabilities = MAC_ASYM_PAUSE | MAC_SYM_PAUSE |
- MAC_10FD | MAC_100FD |
- MAC_1000FD;
-
- stmmac_set_half_duplex(priv);
-
/* Get the MAC specific capabilities */
stmmac_mac_phylink_get_caps(priv);
+ priv->phylink_config.mac_capabilities = priv->hw->link.caps;
+
max_speed = priv->plat->max_speed;
if (max_speed)
phylink_limit_mac_speed(&priv->phylink_config, max_speed);
@@ -7342,6 +7327,7 @@ int stmmac_reinit_queues(struct net_device *dev, u32 rx_cnt, u32 tx_cnt)
{
struct stmmac_priv *priv = netdev_priv(dev);
int ret = 0, i;
+ int max_speed;
if (netif_running(dev))
stmmac_release(dev);
@@ -7355,7 +7341,14 @@ int stmmac_reinit_queues(struct net_device *dev, u32 rx_cnt, u32 tx_cnt)
priv->rss.table[i] = ethtool_rxfh_indir_default(i,
rx_cnt);
- stmmac_set_half_duplex(priv);
+ stmmac_mac_phylink_get_caps(priv);
+
+ priv->phylink_config.mac_capabilities = priv->hw->link.caps;
+
+ max_speed = priv->plat->max_speed;
+ if (max_speed)
+ phylink_limit_mac_speed(&priv->phylink_config, max_speed);
+
stmmac_napi_add(dev);
if (netif_running(dev))
diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
index 2939a21ca74f3c..1d00e21808c1c3 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c
+++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
@@ -2793,6 +2793,8 @@ static void am65_cpsw_unregister_devlink(struct am65_cpsw_common *common)
static int am65_cpsw_nuss_register_ndevs(struct am65_cpsw_common *common)
{
+ struct am65_cpsw_rx_chn *rx_chan = &common->rx_chns;
+ struct am65_cpsw_tx_chn *tx_chan = common->tx_chns;
struct device *dev = common->dev;
struct am65_cpsw_port *port;
int ret = 0, i;
@@ -2805,6 +2807,22 @@ static int am65_cpsw_nuss_register_ndevs(struct am65_cpsw_common *common)
if (ret)
return ret;
+ /* The DMA Channels are not guaranteed to be in a clean state.
+ * Reset and disable them to ensure that they are back to the
+ * clean state and ready to be used.
+ */
+ for (i = 0; i < common->tx_ch_num; i++) {
+ k3_udma_glue_reset_tx_chn(tx_chan[i].tx_chn, &tx_chan[i],
+ am65_cpsw_nuss_tx_cleanup);
+ k3_udma_glue_disable_tx_chn(tx_chan[i].tx_chn);
+ }
+
+ for (i = 0; i < AM65_CPSW_MAX_RX_FLOWS; i++)
+ k3_udma_glue_reset_rx_chn(rx_chan->rx_chn, i, rx_chan,
+ am65_cpsw_nuss_rx_cleanup, !!i);
+
+ k3_udma_glue_disable_rx_chn(rx_chan->rx_chn);
+
ret = am65_cpsw_nuss_register_devlink(common);
if (ret)
return ret;
diff --git a/drivers/net/ethernet/ti/am65-cpts.c b/drivers/net/ethernet/ti/am65-cpts.c
index c66618d91c28fe..f89716b1cfb640 100644
--- a/drivers/net/ethernet/ti/am65-cpts.c
+++ b/drivers/net/ethernet/ti/am65-cpts.c
@@ -784,6 +784,11 @@ static bool am65_cpts_match_tx_ts(struct am65_cpts *cpts,
struct am65_cpts_skb_cb_data *skb_cb =
(struct am65_cpts_skb_cb_data *)skb->cb;
+ if ((ptp_classify_raw(skb) & PTP_CLASS_V1) &&
+ ((mtype_seqid & AM65_CPTS_EVENT_1_SEQUENCE_ID_MASK) ==
+ (skb_cb->skb_mtype_seqid & AM65_CPTS_EVENT_1_SEQUENCE_ID_MASK)))
+ mtype_seqid = skb_cb->skb_mtype_seqid;
+
if (mtype_seqid == skb_cb->skb_mtype_seqid) {
u64 ns = event->timestamp;
diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c
index cf7b73f8f45072..b69af69a1ccd36 100644
--- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c
+++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c
@@ -421,12 +421,14 @@ static int prueth_init_rx_chns(struct prueth_emac *emac,
if (!i)
fdqring_id = k3_udma_glue_rx_flow_get_fdq_id(rx_chn->rx_chn,
i);
- rx_chn->irq[i] = k3_udma_glue_rx_get_irq(rx_chn->rx_chn, i);
- if (rx_chn->irq[i] <= 0) {
- ret = rx_chn->irq[i];
+ ret = k3_udma_glue_rx_get_irq(rx_chn->rx_chn, i);
+ if (ret <= 0) {
+ if (!ret)
+ ret = -ENXIO;
netdev_err(ndev, "Failed to get rx dma irq");
goto fail;
}
+ rx_chn->irq[i] = ret;
}
return 0;
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.c b/drivers/net/ethernet/wangxun/libwx/wx_lib.c
index 6dff2c85682d8b..6fae161cbcb822 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_lib.c
+++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.c
@@ -1598,7 +1598,7 @@ static void wx_set_num_queues(struct wx *wx)
*/
static int wx_acquire_msix_vectors(struct wx *wx)
{
- struct irq_affinity affd = {0, };
+ struct irq_affinity affd = { .pre_vectors = 1 };
int nvecs, i;
/* We start by asking for one vector per queue pair */
diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c
index 5b5d5e4310d127..93295916b1d2b8 100644
--- a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c
+++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c
@@ -571,7 +571,7 @@ static int txgbe_clock_register(struct txgbe *txgbe)
char clk_name[32];
struct clk *clk;
- snprintf(clk_name, sizeof(clk_name), "i2c_dw.%d",
+ snprintf(clk_name, sizeof(clk_name), "i2c_designware.%d",
pci_dev_id(pdev));
clk = clk_register_fixed_rate(NULL, clk_name, NULL, 0, 156250000);
diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c
index 9df39cf8b09750..1072e2210aed32 100644
--- a/drivers/net/ethernet/xilinx/ll_temac_main.c
+++ b/drivers/net/ethernet/xilinx/ll_temac_main.c
@@ -1443,7 +1443,7 @@ static int temac_probe(struct platform_device *pdev)
}
/* map device registers */
- lp->regs = devm_platform_ioremap_resource_byname(pdev, 0);
+ lp->regs = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(lp->regs)) {
dev_err(&pdev->dev, "could not map TEMAC registers\n");
return -ENOMEM;
diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index 2f6739fe78af2e..6c2835086b57ea 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -822,7 +822,7 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
__be16 sport;
int err;
- if (!pskb_inet_may_pull(skb))
+ if (!skb_vlan_inet_prepare(skb))
return -EINVAL;
if (!gs4)
@@ -929,7 +929,7 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev,
__be16 sport;
int err;
- if (!pskb_inet_may_pull(skb))
+ if (!skb_vlan_inet_prepare(skb))
return -EINVAL;
if (!gs6)
diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c
index ba4704c2c640b8..e62d6cbdf9bc64 100644
--- a/drivers/net/gtp.c
+++ b/drivers/net/gtp.c
@@ -1098,11 +1098,12 @@ out_hashtable:
static void gtp_dellink(struct net_device *dev, struct list_head *head)
{
struct gtp_dev *gtp = netdev_priv(dev);
+ struct hlist_node *next;
struct pdp_ctx *pctx;
int i;
for (i = 0; i < gtp->hash_size; i++)
- hlist_for_each_entry_rcu(pctx, &gtp->tid_hash[i], hlist_tid)
+ hlist_for_each_entry_safe(pctx, next, &gtp->tid_hash[i], hlist_tid)
pdp_context_delete(pctx);
list_del_rcu(&gtp->list);
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index a6fcbda64ecc60..2b6ec979a62f21 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -154,8 +154,11 @@ static void free_netvsc_device(struct rcu_head *head)
int i;
kfree(nvdev->extension);
- vfree(nvdev->recv_buf);
- vfree(nvdev->send_buf);
+
+ if (!nvdev->recv_buf_gpadl_handle.decrypted)
+ vfree(nvdev->recv_buf);
+ if (!nvdev->send_buf_gpadl_handle.decrypted)
+ vfree(nvdev->send_buf);
bitmap_free(nvdev->send_section_map);
for (i = 0; i < VRSS_CHANNEL_MAX; i++) {
diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
index 0206b84284ab5e..ff016c11b4a038 100644
--- a/drivers/net/macsec.c
+++ b/drivers/net/macsec.c
@@ -999,10 +999,12 @@ static enum rx_handler_result handle_not_macsec(struct sk_buff *skb)
struct metadata_dst *md_dst;
struct macsec_rxh_data *rxd;
struct macsec_dev *macsec;
+ bool is_macsec_md_dst;
rcu_read_lock();
rxd = macsec_data_rcu(skb->dev);
md_dst = skb_metadata_dst(skb);
+ is_macsec_md_dst = md_dst && md_dst->type == METADATA_MACSEC;
list_for_each_entry_rcu(macsec, &rxd->secys, secys) {
struct sk_buff *nskb;
@@ -1013,14 +1015,42 @@ static enum rx_handler_result handle_not_macsec(struct sk_buff *skb)
* the SecTAG, so we have to deduce which port to deliver to.
*/
if (macsec_is_offloaded(macsec) && netif_running(ndev)) {
- struct macsec_rx_sc *rx_sc = NULL;
+ const struct macsec_ops *ops;
- if (md_dst && md_dst->type == METADATA_MACSEC)
- rx_sc = find_rx_sc(&macsec->secy, md_dst->u.macsec_info.sci);
+ ops = macsec_get_ops(macsec, NULL);
- if (md_dst && md_dst->type == METADATA_MACSEC && !rx_sc)
+ if (ops->rx_uses_md_dst && !is_macsec_md_dst)
continue;
+ if (is_macsec_md_dst) {
+ struct macsec_rx_sc *rx_sc;
+
+ /* All drivers that implement MACsec offload
+ * support using skb metadata destinations must
+ * indicate that they do so.
+ */
+ DEBUG_NET_WARN_ON_ONCE(!ops->rx_uses_md_dst);
+ rx_sc = find_rx_sc(&macsec->secy,
+ md_dst->u.macsec_info.sci);
+ if (!rx_sc)
+ continue;
+ /* device indicated macsec offload occurred */
+ skb->dev = ndev;
+ skb->pkt_type = PACKET_HOST;
+ eth_skb_pkt_type(skb, ndev);
+ ret = RX_HANDLER_ANOTHER;
+ goto out;
+ }
+
+ /* This datapath is insecure because it is unable to
+ * enforce isolation of broadcast/multicast traffic and
+ * unicast traffic with promiscuous mode on the macsec
+ * netdev. Since the core stack has no mechanism to
+ * check that the hardware did indeed receive MACsec
+ * traffic, it is possible that the response handling
+ * done by the MACsec port was to a plaintext packet.
+ * This violates the MACsec protocol standard.
+ */
if (ether_addr_equal_64bits(hdr->h_dest,
ndev->dev_addr)) {
/* exact match, divert skb to this port */
@@ -1036,14 +1066,10 @@ static enum rx_handler_result handle_not_macsec(struct sk_buff *skb)
break;
nskb->dev = ndev;
- if (ether_addr_equal_64bits(hdr->h_dest,
- ndev->broadcast))
- nskb->pkt_type = PACKET_BROADCAST;
- else
- nskb->pkt_type = PACKET_MULTICAST;
+ eth_skb_pkt_type(nskb, ndev);
__netif_rx(nskb);
- } else if (rx_sc || ndev->flags & IFF_PROMISC) {
+ } else if (ndev->flags & IFF_PROMISC) {
skb->dev = ndev;
skb->pkt_type = PACKET_HOST;
ret = RX_HANDLER_ANOTHER;
diff --git a/drivers/net/phy/dp83869.c b/drivers/net/phy/dp83869.c
index fa8c6fdcf30181..d7aaefb5226b62 100644
--- a/drivers/net/phy/dp83869.c
+++ b/drivers/net/phy/dp83869.c
@@ -695,7 +695,8 @@ static int dp83869_configure_mode(struct phy_device *phydev,
phy_ctrl_val = dp83869->mode;
if (phydev->interface == PHY_INTERFACE_MODE_MII) {
if (dp83869->mode == DP83869_100M_MEDIA_CONVERT ||
- dp83869->mode == DP83869_RGMII_100_BASE) {
+ dp83869->mode == DP83869_RGMII_100_BASE ||
+ dp83869->mode == DP83869_RGMII_COPPER_ETHERNET) {
phy_ctrl_val |= DP83869_OP_MODE_MII;
} else {
phydev_err(phydev, "selected op-mode is not valid with MII mode\n");
diff --git a/drivers/net/phy/mediatek-ge-soc.c b/drivers/net/phy/mediatek-ge-soc.c
index 0f3a1538a8b8ee..f4f9412d0cd7e2 100644
--- a/drivers/net/phy/mediatek-ge-soc.c
+++ b/drivers/net/phy/mediatek-ge-soc.c
@@ -216,6 +216,9 @@
#define MTK_PHY_LED_ON_LINK1000 BIT(0)
#define MTK_PHY_LED_ON_LINK100 BIT(1)
#define MTK_PHY_LED_ON_LINK10 BIT(2)
+#define MTK_PHY_LED_ON_LINK (MTK_PHY_LED_ON_LINK10 |\
+ MTK_PHY_LED_ON_LINK100 |\
+ MTK_PHY_LED_ON_LINK1000)
#define MTK_PHY_LED_ON_LINKDOWN BIT(3)
#define MTK_PHY_LED_ON_FDX BIT(4) /* Full duplex */
#define MTK_PHY_LED_ON_HDX BIT(5) /* Half duplex */
@@ -231,6 +234,12 @@
#define MTK_PHY_LED_BLINK_100RX BIT(3)
#define MTK_PHY_LED_BLINK_10TX BIT(4)
#define MTK_PHY_LED_BLINK_10RX BIT(5)
+#define MTK_PHY_LED_BLINK_RX (MTK_PHY_LED_BLINK_10RX |\
+ MTK_PHY_LED_BLINK_100RX |\
+ MTK_PHY_LED_BLINK_1000RX)
+#define MTK_PHY_LED_BLINK_TX (MTK_PHY_LED_BLINK_10TX |\
+ MTK_PHY_LED_BLINK_100TX |\
+ MTK_PHY_LED_BLINK_1000TX)
#define MTK_PHY_LED_BLINK_COLLISION BIT(6)
#define MTK_PHY_LED_BLINK_RX_CRC_ERR BIT(7)
#define MTK_PHY_LED_BLINK_RX_IDLE_ERR BIT(8)
@@ -1247,11 +1256,9 @@ static int mt798x_phy_led_hw_control_get(struct phy_device *phydev, u8 index,
if (blink < 0)
return -EIO;
- if ((on & (MTK_PHY_LED_ON_LINK1000 | MTK_PHY_LED_ON_LINK100 |
- MTK_PHY_LED_ON_LINK10)) ||
- (blink & (MTK_PHY_LED_BLINK_1000RX | MTK_PHY_LED_BLINK_100RX |
- MTK_PHY_LED_BLINK_10RX | MTK_PHY_LED_BLINK_1000TX |
- MTK_PHY_LED_BLINK_100TX | MTK_PHY_LED_BLINK_10TX)))
+ if ((on & (MTK_PHY_LED_ON_LINK | MTK_PHY_LED_ON_FDX | MTK_PHY_LED_ON_HDX |
+ MTK_PHY_LED_ON_LINKDOWN)) ||
+ (blink & (MTK_PHY_LED_BLINK_RX | MTK_PHY_LED_BLINK_TX)))
set_bit(bit_netdev, &priv->led_state);
else
clear_bit(bit_netdev, &priv->led_state);
@@ -1269,7 +1276,7 @@ static int mt798x_phy_led_hw_control_get(struct phy_device *phydev, u8 index,
if (!rules)
return 0;
- if (on & (MTK_PHY_LED_ON_LINK1000 | MTK_PHY_LED_ON_LINK100 | MTK_PHY_LED_ON_LINK10))
+ if (on & MTK_PHY_LED_ON_LINK)
*rules |= BIT(TRIGGER_NETDEV_LINK);
if (on & MTK_PHY_LED_ON_LINK10)
@@ -1287,10 +1294,10 @@ static int mt798x_phy_led_hw_control_get(struct phy_device *phydev, u8 index,
if (on & MTK_PHY_LED_ON_HDX)
*rules |= BIT(TRIGGER_NETDEV_HALF_DUPLEX);
- if (blink & (MTK_PHY_LED_BLINK_1000RX | MTK_PHY_LED_BLINK_100RX | MTK_PHY_LED_BLINK_10RX))
+ if (blink & MTK_PHY_LED_BLINK_RX)
*rules |= BIT(TRIGGER_NETDEV_RX);
- if (blink & (MTK_PHY_LED_BLINK_1000TX | MTK_PHY_LED_BLINK_100TX | MTK_PHY_LED_BLINK_10TX))
+ if (blink & MTK_PHY_LED_BLINK_TX)
*rules |= BIT(TRIGGER_NETDEV_TX);
return 0;
@@ -1323,15 +1330,19 @@ static int mt798x_phy_led_hw_control_set(struct phy_device *phydev, u8 index,
on |= MTK_PHY_LED_ON_LINK1000;
if (rules & BIT(TRIGGER_NETDEV_RX)) {
- blink |= MTK_PHY_LED_BLINK_10RX |
- MTK_PHY_LED_BLINK_100RX |
- MTK_PHY_LED_BLINK_1000RX;
+ blink |= (on & MTK_PHY_LED_ON_LINK) ?
+ (((on & MTK_PHY_LED_ON_LINK10) ? MTK_PHY_LED_BLINK_10RX : 0) |
+ ((on & MTK_PHY_LED_ON_LINK100) ? MTK_PHY_LED_BLINK_100RX : 0) |
+ ((on & MTK_PHY_LED_ON_LINK1000) ? MTK_PHY_LED_BLINK_1000RX : 0)) :
+ MTK_PHY_LED_BLINK_RX;
}
if (rules & BIT(TRIGGER_NETDEV_TX)) {
- blink |= MTK_PHY_LED_BLINK_10TX |
- MTK_PHY_LED_BLINK_100TX |
- MTK_PHY_LED_BLINK_1000TX;
+ blink |= (on & MTK_PHY_LED_ON_LINK) ?
+ (((on & MTK_PHY_LED_ON_LINK10) ? MTK_PHY_LED_BLINK_10TX : 0) |
+ ((on & MTK_PHY_LED_ON_LINK100) ? MTK_PHY_LED_BLINK_100TX : 0) |
+ ((on & MTK_PHY_LED_ON_LINK1000) ? MTK_PHY_LED_BLINK_1000TX : 0)) :
+ MTK_PHY_LED_BLINK_TX;
}
if (blink || on)
@@ -1344,9 +1355,7 @@ static int mt798x_phy_led_hw_control_set(struct phy_device *phydev, u8 index,
MTK_PHY_LED0_ON_CTRL,
MTK_PHY_LED_ON_FDX |
MTK_PHY_LED_ON_HDX |
- MTK_PHY_LED_ON_LINK10 |
- MTK_PHY_LED_ON_LINK100 |
- MTK_PHY_LED_ON_LINK1000,
+ MTK_PHY_LED_ON_LINK,
on);
if (ret)
diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 8b8634600c5190..ddb50a0e2bc822 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -2431,6 +2431,7 @@ static int lan8814_hwtstamp(struct mii_timestamper *mii_ts,
struct lan8814_ptp_rx_ts *rx_ts, *tmp;
int txcfg = 0, rxcfg = 0;
int pkt_ts_enable;
+ int tx_mod;
ptp_priv->hwts_tx_type = config->tx_type;
ptp_priv->rx_filter = config->rx_filter;
@@ -2477,9 +2478,14 @@ static int lan8814_hwtstamp(struct mii_timestamper *mii_ts,
lanphy_write_page_reg(ptp_priv->phydev, 5, PTP_RX_TIMESTAMP_EN, pkt_ts_enable);
lanphy_write_page_reg(ptp_priv->phydev, 5, PTP_TX_TIMESTAMP_EN, pkt_ts_enable);
- if (ptp_priv->hwts_tx_type == HWTSTAMP_TX_ONESTEP_SYNC)
+ tx_mod = lanphy_read_page_reg(ptp_priv->phydev, 5, PTP_TX_MOD);
+ if (ptp_priv->hwts_tx_type == HWTSTAMP_TX_ONESTEP_SYNC) {
lanphy_write_page_reg(ptp_priv->phydev, 5, PTP_TX_MOD,
- PTP_TX_MOD_TX_PTP_SYNC_TS_INSERT_);
+ tx_mod | PTP_TX_MOD_TX_PTP_SYNC_TS_INSERT_);
+ } else if (ptp_priv->hwts_tx_type == HWTSTAMP_TX_ON) {
+ lanphy_write_page_reg(ptp_priv->phydev, 5, PTP_TX_MOD,
+ tx_mod & ~PTP_TX_MOD_TX_PTP_SYNC_TS_INSERT_);
+ }
if (config->rx_filter != HWTSTAMP_FILTER_NONE)
lan8814_config_ts_intr(ptp_priv->phydev, true);
@@ -2537,7 +2543,7 @@ static void lan8814_txtstamp(struct mii_timestamper *mii_ts,
}
}
-static void lan8814_get_sig_rx(struct sk_buff *skb, u16 *sig)
+static bool lan8814_get_sig_rx(struct sk_buff *skb, u16 *sig)
{
struct ptp_header *ptp_header;
u32 type;
@@ -2547,7 +2553,11 @@ static void lan8814_get_sig_rx(struct sk_buff *skb, u16 *sig)
ptp_header = ptp_parse_header(skb, type);
skb_pull_inline(skb, ETH_HLEN);
+ if (!ptp_header)
+ return false;
+
*sig = (__force u16)(ntohs(ptp_header->sequence_id));
+ return true;
}
static bool lan8814_match_rx_skb(struct kszphy_ptp_priv *ptp_priv,
@@ -2559,7 +2569,8 @@ static bool lan8814_match_rx_skb(struct kszphy_ptp_priv *ptp_priv,
bool ret = false;
u16 skb_sig;
- lan8814_get_sig_rx(skb, &skb_sig);
+ if (!lan8814_get_sig_rx(skb, &skb_sig))
+ return ret;
/* Iterate over all RX timestamps and match it with the received skbs */
spin_lock_irqsave(&ptp_priv->rx_ts_lock, flags);
@@ -2834,7 +2845,7 @@ static int lan8814_ptpci_adjfine(struct ptp_clock_info *ptpci, long scaled_ppm)
return 0;
}
-static void lan8814_get_sig_tx(struct sk_buff *skb, u16 *sig)
+static bool lan8814_get_sig_tx(struct sk_buff *skb, u16 *sig)
{
struct ptp_header *ptp_header;
u32 type;
@@ -2842,7 +2853,11 @@ static void lan8814_get_sig_tx(struct sk_buff *skb, u16 *sig)
type = ptp_classify_raw(skb);
ptp_header = ptp_parse_header(skb, type);
+ if (!ptp_header)
+ return false;
+
*sig = (__force u16)(ntohs(ptp_header->sequence_id));
+ return true;
}
static void lan8814_match_tx_skb(struct kszphy_ptp_priv *ptp_priv,
@@ -2856,7 +2871,8 @@ static void lan8814_match_tx_skb(struct kszphy_ptp_priv *ptp_priv,
spin_lock_irqsave(&ptp_priv->tx_queue.lock, flags);
skb_queue_walk_safe(&ptp_priv->tx_queue, skb, skb_tmp) {
- lan8814_get_sig_tx(skb, &skb_sig);
+ if (!lan8814_get_sig_tx(skb, &skb_sig))
+ continue;
if (memcmp(&skb_sig, &seq_id, sizeof(seq_id)))
continue;
@@ -2910,7 +2926,8 @@ static bool lan8814_match_skb(struct kszphy_ptp_priv *ptp_priv,
spin_lock_irqsave(&ptp_priv->rx_queue.lock, flags);
skb_queue_walk_safe(&ptp_priv->rx_queue, skb, skb_tmp) {
- lan8814_get_sig_rx(skb, &skb_sig);
+ if (!lan8814_get_sig_rx(skb, &skb_sig))
+ continue;
if (memcmp(&skb_sig, &rx_ts->seq_id, sizeof(rx_ts->seq_id)))
continue;
diff --git a/drivers/net/phy/qcom/at803x.c b/drivers/net/phy/qcom/at803x.c
index 4717c59d51d042..e79657f76bea23 100644
--- a/drivers/net/phy/qcom/at803x.c
+++ b/drivers/net/phy/qcom/at803x.c
@@ -797,7 +797,7 @@ static int at8031_parse_dt(struct phy_device *phydev)
static int at8031_probe(struct phy_device *phydev)
{
- struct at803x_priv *priv = phydev->priv;
+ struct at803x_priv *priv;
int mode_cfg;
int ccr;
int ret;
@@ -806,6 +806,8 @@ static int at8031_probe(struct phy_device *phydev)
if (ret)
return ret;
+ priv = phydev->priv;
+
/* Only supported on AR8031/AR8033, the AR8030/AR8035 use strapping
* options.
*/
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 0b3f21cba552f2..92da8c03d960c9 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -2125,14 +2125,16 @@ static ssize_t tun_put_user(struct tun_struct *tun,
tun_is_little_endian(tun), true,
vlan_hlen)) {
struct skb_shared_info *sinfo = skb_shinfo(skb);
- pr_err("unexpected GSO type: "
- "0x%x, gso_size %d, hdr_len %d\n",
- sinfo->gso_type, tun16_to_cpu(tun, gso.gso_size),
- tun16_to_cpu(tun, gso.hdr_len));
- print_hex_dump(KERN_ERR, "tun: ",
- DUMP_PREFIX_NONE,
- 16, 1, skb->head,
- min((int)tun16_to_cpu(tun, gso.hdr_len), 64), true);
+
+ if (net_ratelimit()) {
+ netdev_err(tun->dev, "unexpected GSO type: 0x%x, gso_size %d, hdr_len %d\n",
+ sinfo->gso_type, tun16_to_cpu(tun, gso.gso_size),
+ tun16_to_cpu(tun, gso.hdr_len));
+ print_hex_dump(KERN_ERR, "tun: ",
+ DUMP_PREFIX_NONE,
+ 16, 1, skb->head,
+ min((int)tun16_to_cpu(tun, gso.hdr_len), 64), true);
+ }
WARN_ON_ONCE(1);
return -EINVAL;
}
diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c
index 88e084534853dd..df9d767cb52424 100644
--- a/drivers/net/usb/ax88179_178a.c
+++ b/drivers/net/usb/ax88179_178a.c
@@ -1273,6 +1273,8 @@ static void ax88179_get_mac_addr(struct usbnet *dev)
if (is_valid_ether_addr(mac)) {
eth_hw_addr_set(dev->net, mac);
+ if (!is_local_ether_addr(mac))
+ dev->net->addr_assign_type = NET_ADDR_PERM;
} else {
netdev_info(dev->net, "invalid MAC address, using random\n");
eth_hw_addr_random(dev->net);
@@ -1315,6 +1317,8 @@ static int ax88179_bind(struct usbnet *dev, struct usb_interface *intf)
netif_set_tso_max_size(dev->net, 16384);
+ ax88179_reset(dev);
+
return 0;
}
@@ -1452,21 +1456,16 @@ static int ax88179_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
/* Skip IP alignment pseudo header */
skb_pull(skb, 2);
- skb->truesize = SKB_TRUESIZE(pkt_len_plus_padd);
ax88179_rx_checksum(skb, pkt_hdr);
return 1;
}
- ax_skb = skb_clone(skb, GFP_ATOMIC);
+ ax_skb = netdev_alloc_skb_ip_align(dev->net, pkt_len);
if (!ax_skb)
return 0;
- skb_trim(ax_skb, pkt_len);
-
- /* Skip IP alignment pseudo header */
- skb_pull(ax_skb, 2);
+ skb_put(ax_skb, pkt_len);
+ memcpy(ax_skb->data, skb->data + 2, pkt_len);
- skb->truesize = pkt_len_plus_padd +
- SKB_DATA_ALIGN(sizeof(struct sk_buff));
ax88179_rx_checksum(ax_skb, pkt_hdr);
usbnet_skb_return(dev, ax_skb);
@@ -1693,7 +1692,6 @@ static const struct driver_info ax88179_info = {
.unbind = ax88179_unbind,
.status = ax88179_status,
.link_reset = ax88179_link_reset,
- .reset = ax88179_reset,
.stop = ax88179_stop,
.flags = FLAG_ETHER | FLAG_FRAMING_AX,
.rx_fixup = ax88179_rx_fixup,
@@ -1706,7 +1704,6 @@ static const struct driver_info ax88178a_info = {
.unbind = ax88179_unbind,
.status = ax88179_status,
.link_reset = ax88179_link_reset,
- .reset = ax88179_reset,
.stop = ax88179_stop,
.flags = FLAG_ETHER | FLAG_FRAMING_AX,
.rx_fixup = ax88179_rx_fixup,
diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index e2e181378f4124..a5469cf5cf6706 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -1368,6 +1368,9 @@ static const struct usb_device_id products[] = {
{QMI_QUIRK_SET_DTR(0x1bc7, 0x1060, 2)}, /* Telit LN920 */
{QMI_QUIRK_SET_DTR(0x1bc7, 0x1070, 2)}, /* Telit FN990 */
{QMI_QUIRK_SET_DTR(0x1bc7, 0x1080, 2)}, /* Telit FE990 */
+ {QMI_QUIRK_SET_DTR(0x1bc7, 0x10a0, 0)}, /* Telit FN920C04 */
+ {QMI_QUIRK_SET_DTR(0x1bc7, 0x10a4, 0)}, /* Telit FN920C04 */
+ {QMI_QUIRK_SET_DTR(0x1bc7, 0x10a9, 0)}, /* Telit FN920C04 */
{QMI_FIXED_INTF(0x1bc7, 0x1100, 3)}, /* Telit ME910 */
{QMI_FIXED_INTF(0x1bc7, 0x1101, 3)}, /* Telit ME910 dual modem */
{QMI_FIXED_INTF(0x1bc7, 0x1200, 5)}, /* Telit LE920 */
@@ -1431,6 +1434,7 @@ static const struct usb_device_id products[] = {
{QMI_FIXED_INTF(0x2692, 0x9025, 4)}, /* Cellient MPL200 (rebranded Qualcomm 05c6:9025) */
{QMI_QUIRK_SET_DTR(0x1546, 0x1312, 4)}, /* u-blox LARA-R6 01B */
{QMI_QUIRK_SET_DTR(0x1546, 0x1342, 4)}, /* u-blox LARA-L6 */
+ {QMI_QUIRK_SET_DTR(0x33f8, 0x0104, 4)}, /* Rolling RW101 RMNET */
/* 4. Gobi 1000 devices */
{QMI_GOBI1K_DEVICE(0x05c6, 0x9212)}, /* Acer Gobi Modem Device */
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index c22d1118a13333..115c3c5414f2a7 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -3807,6 +3807,7 @@ static int virtnet_set_rxfh(struct net_device *dev,
struct netlink_ext_ack *extack)
{
struct virtnet_info *vi = netdev_priv(dev);
+ bool update = false;
int i;
if (rxfh->hfunc != ETH_RSS_HASH_NO_CHANGE &&
@@ -3814,13 +3815,28 @@ static int virtnet_set_rxfh(struct net_device *dev,
return -EOPNOTSUPP;
if (rxfh->indir) {
+ if (!vi->has_rss)
+ return -EOPNOTSUPP;
+
for (i = 0; i < vi->rss_indir_table_size; ++i)
vi->ctrl->rss.indirection_table[i] = rxfh->indir[i];
+ update = true;
}
- if (rxfh->key)
+
+ if (rxfh->key) {
+ /* If either _F_HASH_REPORT or _F_RSS are negotiated, the
+ * device provides hash calculation capabilities, that is,
+ * hash_key is configured.
+ */
+ if (!vi->has_rss && !vi->has_rss_hash_report)
+ return -EOPNOTSUPP;
+
memcpy(vi->ctrl->rss.key, rxfh->key, vi->rss_key_size);
+ update = true;
+ }
- virtnet_commit_rss_command(vi);
+ if (update)
+ virtnet_commit_rss_command(vi);
return 0;
}
@@ -4729,13 +4745,15 @@ static int virtnet_probe(struct virtio_device *vdev)
if (virtio_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT))
vi->has_rss_hash_report = true;
- if (virtio_has_feature(vdev, VIRTIO_NET_F_RSS))
+ if (virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) {
vi->has_rss = true;
- if (vi->has_rss || vi->has_rss_hash_report) {
vi->rss_indir_table_size =
virtio_cread16(vdev, offsetof(struct virtio_net_config,
rss_max_indirection_table_length));
+ }
+
+ if (vi->has_rss || vi->has_rss_hash_report) {
vi->rss_key_size =
virtio_cread8(vdev, offsetof(struct virtio_net_config, rss_max_key_size));
diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c
index 3495591a5c29b2..ba319fc2195719 100644
--- a/drivers/net/vxlan/vxlan_core.c
+++ b/drivers/net/vxlan/vxlan_core.c
@@ -1615,6 +1615,10 @@ static bool vxlan_set_mac(struct vxlan_dev *vxlan,
if (ether_addr_equal(eth_hdr(skb)->h_source, vxlan->dev->dev_addr))
return false;
+ /* Ignore packets from invalid src-address */
+ if (!is_valid_ether_addr(eth_hdr(skb)->h_source))
+ return false;
+
/* Get address from the outer IP header */
if (vxlan_get_sk_family(vs) == AF_INET) {
saddr.sin.sin_addr.s_addr = ip_hdr(skb)->saddr;
diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c
index a6a37d67a50ad5..9f4bf41a3d41e4 100644
--- a/drivers/net/wireless/ath/ath11k/mac.c
+++ b/drivers/net/wireless/ath/ath11k/mac.c
@@ -9020,6 +9020,7 @@ static void ath11k_mac_op_ipv6_changed(struct ieee80211_hw *hw,
offload = &arvif->arp_ns_offload;
count = 0;
+ /* Note: read_lock_bh() calls rcu_read_lock() */
read_lock_bh(&idev->lock);
memset(offload->ipv6_addr, 0, sizeof(offload->ipv6_addr));
@@ -9050,7 +9051,8 @@ static void ath11k_mac_op_ipv6_changed(struct ieee80211_hw *hw,
}
/* get anycast address */
- for (ifaca6 = idev->ac_list; ifaca6; ifaca6 = ifaca6->aca_next) {
+ for (ifaca6 = rcu_dereference(idev->ac_list); ifaca6;
+ ifaca6 = rcu_dereference(ifaca6->aca_next)) {
if (count >= ATH11K_IPV6_MAX_COUNT)
goto generate;
diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/bz.c b/drivers/net/wireless/intel/iwlwifi/cfg/bz.c
index 072b0a5827d19f..eca1457caa0cad 100644
--- a/drivers/net/wireless/intel/iwlwifi/cfg/bz.c
+++ b/drivers/net/wireless/intel/iwlwifi/cfg/bz.c
@@ -10,7 +10,7 @@
#include "fw/api/txq.h"
/* Highest firmware API version supported */
-#define IWL_BZ_UCODE_API_MAX 90
+#define IWL_BZ_UCODE_API_MAX 89
/* Lowest firmware API version supported */
#define IWL_BZ_UCODE_API_MIN 80
diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/sc.c b/drivers/net/wireless/intel/iwlwifi/cfg/sc.c
index 9b79279fd76cad..dbbcb2d0968c09 100644
--- a/drivers/net/wireless/intel/iwlwifi/cfg/sc.c
+++ b/drivers/net/wireless/intel/iwlwifi/cfg/sc.c
@@ -10,7 +10,7 @@
#include "fw/api/txq.h"
/* Highest firmware API version supported */
-#define IWL_SC_UCODE_API_MAX 90
+#define IWL_SC_UCODE_API_MAX 89
/* Lowest firmware API version supported */
#define IWL_SC_UCODE_API_MIN 82
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
index db6d7013df6654..c3bdf433d8f7b3 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
+++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
@@ -3081,8 +3081,6 @@ static void iwl_fw_dbg_collect_sync(struct iwl_fw_runtime *fwrt, u8 wk_idx)
struct iwl_fw_dbg_params params = {0};
struct iwl_fwrt_dump_data *dump_data =
&fwrt->dump.wks[wk_idx].dump_data;
- u32 policy;
- u32 time_point;
if (!test_bit(wk_idx, &fwrt->dump.active_wks))
return;
@@ -3113,13 +3111,16 @@ static void iwl_fw_dbg_collect_sync(struct iwl_fw_runtime *fwrt, u8 wk_idx)
iwl_fw_dbg_stop_restart_recording(fwrt, &params, false);
- policy = le32_to_cpu(dump_data->trig->apply_policy);
- time_point = le32_to_cpu(dump_data->trig->time_point);
+ if (iwl_trans_dbg_ini_valid(fwrt->trans)) {
+ u32 policy = le32_to_cpu(dump_data->trig->apply_policy);
+ u32 time_point = le32_to_cpu(dump_data->trig->time_point);
- if (policy & IWL_FW_INI_APPLY_POLICY_DUMP_COMPLETE_CMD) {
- IWL_DEBUG_FW_INFO(fwrt, "WRT: sending dump complete\n");
- iwl_send_dbg_dump_complete_cmd(fwrt, time_point, 0);
+ if (policy & IWL_FW_INI_APPLY_POLICY_DUMP_COMPLETE_CMD) {
+ IWL_DEBUG_FW_INFO(fwrt, "WRT: sending dump complete\n");
+ iwl_send_dbg_dump_complete_cmd(fwrt, time_point, 0);
+ }
}
+
if (fwrt->trans->dbg.last_tp_resetfw == IWL_FW_INI_RESET_FW_MODE_STOP_FW_ONLY)
iwl_force_nmi(fwrt->trans);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
index 553c6fffc7c66d..52518a47554e70 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
@@ -1260,15 +1260,15 @@ static int __iwl_mvm_suspend(struct ieee80211_hw *hw,
if (IS_ERR_OR_NULL(vif))
return 1;
- if (ieee80211_vif_is_mld(vif) && vif->cfg.assoc) {
+ if (hweight16(vif->active_links) > 1) {
/*
- * Select the 'best' link. May need to revisit, it seems
- * better to not optimize for throughput but rather range,
- * reliability and power here - and select 2.4 GHz ...
+ * Select the 'best' link.
+ * May need to revisit, it seems better to not optimize
+ * for throughput but rather range, reliability and
+ * power here - and select 2.4 GHz ...
*/
- primary_link =
- iwl_mvm_mld_get_primary_link(mvm, vif,
- vif->active_links);
+ primary_link = iwl_mvm_mld_get_primary_link(mvm, vif,
+ vif->active_links);
if (WARN_ONCE(primary_link < 0, "no primary link in 0x%x\n",
vif->active_links))
@@ -1277,6 +1277,8 @@ static int __iwl_mvm_suspend(struct ieee80211_hw *hw,
ret = ieee80211_set_active_links(vif, BIT(primary_link));
if (ret)
return ret;
+ } else if (vif->active_links) {
+ primary_link = __ffs(vif->active_links);
} else {
primary_link = 0;
}
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c
index 51b01f7528beec..7fe57ecd0682b8 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c
@@ -748,7 +748,9 @@ void iwl_mvm_vif_dbgfs_add_link(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
{
struct dentry *dbgfs_dir = vif->debugfs_dir;
struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
- char buf[100];
+ char buf[3 * 3 + 11 + (NL80211_WIPHY_NAME_MAXLEN + 1) +
+ (7 + IFNAMSIZ + 1) + 6 + 1];
+ char name[7 + IFNAMSIZ + 1];
/* this will happen in monitor mode */
if (!dbgfs_dir)
@@ -761,10 +763,11 @@ void iwl_mvm_vif_dbgfs_add_link(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
* find
* netdev:wlan0 -> ../../../ieee80211/phy0/netdev:wlan0/iwlmvm/
*/
- snprintf(buf, 100, "../../../%pd3/iwlmvm", dbgfs_dir);
+ snprintf(name, sizeof(name), "%pd", dbgfs_dir);
+ snprintf(buf, sizeof(buf), "../../../%pd3/iwlmvm", dbgfs_dir);
- mvmvif->dbgfs_slink = debugfs_create_symlink(dbgfs_dir->d_name.name,
- mvm->debugfs_dir, buf);
+ mvmvif->dbgfs_slink =
+ debugfs_create_symlink(name, mvm->debugfs_dir, buf);
}
void iwl_mvm_vif_dbgfs_rm_link(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c b/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c
index 4863a3c746406e..d84d7e955bb021 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c
@@ -53,6 +53,8 @@ int iwl_mvm_ftm_add_pasn_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
if (!pasn)
return -ENOBUFS;
+ iwl_mvm_ftm_remove_pasn_sta(mvm, addr);
+
pasn->cipher = iwl_mvm_cipher_to_location_cipher(cipher);
switch (pasn->cipher) {
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/link.c b/drivers/net/wireless/intel/iwlwifi/mvm/link.c
index f13f13e6b71af1..fe5bba8561d0c6 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/link.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/link.c
@@ -46,6 +46,27 @@ static int iwl_mvm_link_cmd_send(struct iwl_mvm *mvm,
return ret;
}
+int iwl_mvm_set_link_mapping(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
+ struct ieee80211_bss_conf *link_conf)
+{
+ struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
+ struct iwl_mvm_vif_link_info *link_info =
+ mvmvif->link[link_conf->link_id];
+
+ if (link_info->fw_link_id == IWL_MVM_FW_LINK_ID_INVALID) {
+ link_info->fw_link_id = iwl_mvm_get_free_fw_link_id(mvm,
+ mvmvif);
+ if (link_info->fw_link_id >=
+ ARRAY_SIZE(mvm->link_id_to_link_conf))
+ return -EINVAL;
+
+ rcu_assign_pointer(mvm->link_id_to_link_conf[link_info->fw_link_id],
+ link_conf);
+ }
+
+ return 0;
+}
+
int iwl_mvm_add_link(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
struct ieee80211_bss_conf *link_conf)
{
@@ -55,19 +76,14 @@ int iwl_mvm_add_link(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
struct iwl_link_config_cmd cmd = {};
unsigned int cmd_id = WIDE_ID(MAC_CONF_GROUP, LINK_CONFIG_CMD);
u8 cmd_ver = iwl_fw_lookup_cmd_ver(mvm->fw, cmd_id, 1);
+ int ret;
if (WARN_ON_ONCE(!link_info))
return -EINVAL;
- if (link_info->fw_link_id == IWL_MVM_FW_LINK_ID_INVALID) {
- link_info->fw_link_id = iwl_mvm_get_free_fw_link_id(mvm,
- mvmvif);
- if (link_info->fw_link_id >= ARRAY_SIZE(mvm->link_id_to_link_conf))
- return -EINVAL;
-
- rcu_assign_pointer(mvm->link_id_to_link_conf[link_info->fw_link_id],
- link_conf);
- }
+ ret = iwl_mvm_set_link_mapping(mvm, vif, link_conf);
+ if (ret)
+ return ret;
/* Update SF - Disable if needed. if this fails, SF might still be on
* while many macs are bound, which is forbidden - so fail the binding.
@@ -248,6 +264,25 @@ send_cmd:
return ret;
}
+int iwl_mvm_unset_link_mapping(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
+ struct ieee80211_bss_conf *link_conf)
+{
+ struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
+ struct iwl_mvm_vif_link_info *link_info =
+ mvmvif->link[link_conf->link_id];
+
+ /* mac80211 thought we have the link, but it was never configured */
+ if (WARN_ON(!link_info ||
+ link_info->fw_link_id >=
+ ARRAY_SIZE(mvm->link_id_to_link_conf)))
+ return -EINVAL;
+
+ RCU_INIT_POINTER(mvm->link_id_to_link_conf[link_info->fw_link_id],
+ NULL);
+ iwl_mvm_release_fw_link_id(mvm, link_info->fw_link_id);
+ return 0;
+}
+
int iwl_mvm_remove_link(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
struct ieee80211_bss_conf *link_conf)
{
@@ -257,15 +292,11 @@ int iwl_mvm_remove_link(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
struct iwl_link_config_cmd cmd = {};
int ret;
- /* mac80211 thought we have the link, but it was never configured */
- if (WARN_ON(!link_info ||
- link_info->fw_link_id >= ARRAY_SIZE(mvm->link_id_to_link_conf)))
+ ret = iwl_mvm_unset_link_mapping(mvm, vif, link_conf);
+ if (ret)
return 0;
- RCU_INIT_POINTER(mvm->link_id_to_link_conf[link_info->fw_link_id],
- NULL);
cmd.link_id = cpu_to_le32(link_info->fw_link_id);
- iwl_mvm_release_fw_link_id(mvm, link_info->fw_link_id);
link_info->fw_link_id = IWL_MVM_FW_LINK_ID_INVALID;
cmd.spec_link_id = link_conf->link_id;
cmd.phy_id = cpu_to_le32(FW_CTXT_INVALID);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
index 1935630d3def00..8f4b063d6243ed 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
@@ -360,7 +360,7 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm)
if (mvm->mld_api_is_used && mvm->nvm_data->sku_cap_11be_enable &&
!iwlwifi_mod_params.disable_11ax &&
!iwlwifi_mod_params.disable_11be)
- hw->wiphy->flags |= WIPHY_FLAG_SUPPORTS_MLO;
+ hw->wiphy->flags |= WIPHY_FLAG_DISABLE_WEXT;
/* With MLD FW API, it tracks timing by itself,
* no need for any timing from the host
@@ -1577,8 +1577,14 @@ static int iwl_mvm_mac_add_interface(struct ieee80211_hw *hw,
mvmvif->mvm = mvm;
/* the first link always points to the default one */
+ mvmvif->deflink.fw_link_id = IWL_MVM_FW_LINK_ID_INVALID;
+ mvmvif->deflink.active = 0;
mvmvif->link[0] = &mvmvif->deflink;
+ ret = iwl_mvm_set_link_mapping(mvm, vif, &vif->bss_conf);
+ if (ret)
+ goto out;
+
/*
* Not much to do here. The stack will not allow interface
* types or combinations that we didn't advertise, so we
@@ -1783,6 +1789,7 @@ static void iwl_mvm_mac_remove_interface(struct ieee80211_hw *hw,
mvm->p2p_device_vif = NULL;
}
+ iwl_mvm_unset_link_mapping(mvm, vif, &vif->bss_conf);
iwl_mvm_mac_ctxt_remove(mvm, vif);
RCU_INIT_POINTER(mvm->vif_id_to_mac[mvmvif->id], NULL);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mld-sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/mld-sta.c
index 1628bf55458fcb..23e64a757cfe86 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mld-sta.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mld-sta.c
@@ -855,10 +855,15 @@ int iwl_mvm_mld_rm_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
int iwl_mvm_mld_rm_sta_id(struct iwl_mvm *mvm, u8 sta_id)
{
- int ret = iwl_mvm_mld_rm_sta_from_fw(mvm, sta_id);
+ int ret;
lockdep_assert_held(&mvm->mutex);
+ if (WARN_ON(sta_id == IWL_MVM_INVALID_STA))
+ return 0;
+
+ ret = iwl_mvm_mld_rm_sta_from_fw(mvm, sta_id);
+
RCU_INIT_POINTER(mvm->fw_id_to_mac_id[sta_id], NULL);
RCU_INIT_POINTER(mvm->fw_id_to_link_sta[sta_id], NULL);
return ret;
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
index 44571114fb154b..f0b24f00938bd5 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
@@ -1916,11 +1916,15 @@ int iwl_mvm_binding_remove_vif(struct iwl_mvm *mvm, struct ieee80211_vif *vif);
u32 iwl_mvm_get_lmac_id(struct iwl_mvm *mvm, enum nl80211_band band);
/* Links */
+int iwl_mvm_set_link_mapping(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
+ struct ieee80211_bss_conf *link_conf);
int iwl_mvm_add_link(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
struct ieee80211_bss_conf *link_conf);
int iwl_mvm_link_changed(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
struct ieee80211_bss_conf *link_conf,
u32 changes, bool active);
+int iwl_mvm_unset_link_mapping(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
+ struct ieee80211_bss_conf *link_conf);
int iwl_mvm_remove_link(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
struct ieee80211_bss_conf *link_conf);
int iwl_mvm_disable_link(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rfi.c b/drivers/net/wireless/intel/iwlwifi/mvm/rfi.c
index 2ecd32bed752ff..045c862a8fc4fc 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/rfi.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/rfi.c
@@ -132,14 +132,18 @@ struct iwl_rfi_freq_table_resp_cmd *iwl_rfi_get_freq_table(struct iwl_mvm *mvm)
if (ret)
return ERR_PTR(ret);
- if (WARN_ON_ONCE(iwl_rx_packet_payload_len(cmd.resp_pkt) != resp_size))
+ if (WARN_ON_ONCE(iwl_rx_packet_payload_len(cmd.resp_pkt) !=
+ resp_size)) {
+ iwl_free_resp(&cmd);
return ERR_PTR(-EIO);
+ }
resp = kmemdup(cmd.resp_pkt->data, resp_size, GFP_KERNEL);
+ iwl_free_resp(&cmd);
+
if (!resp)
return ERR_PTR(-ENOMEM);
- iwl_free_resp(&cmd);
return resp;
}
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
index 1484eaedf45292..ce8d83c771a70d 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
@@ -236,21 +236,13 @@ static void iwl_mvm_add_rtap_sniffer_config(struct iwl_mvm *mvm,
static void iwl_mvm_pass_packet_to_mac80211(struct iwl_mvm *mvm,
struct napi_struct *napi,
struct sk_buff *skb, int queue,
- struct ieee80211_sta *sta,
- struct ieee80211_link_sta *link_sta)
+ struct ieee80211_sta *sta)
{
if (unlikely(iwl_mvm_check_pn(mvm, skb, queue, sta))) {
kfree_skb(skb);
return;
}
- if (sta && sta->valid_links && link_sta) {
- struct ieee80211_rx_status *rx_status = IEEE80211_SKB_RXCB(skb);
-
- rx_status->link_valid = 1;
- rx_status->link_id = link_sta->link_id;
- }
-
ieee80211_rx_napi(mvm->hw, sta, skb, napi);
}
@@ -588,7 +580,7 @@ static void iwl_mvm_release_frames(struct iwl_mvm *mvm,
while ((skb = __skb_dequeue(skb_list))) {
iwl_mvm_pass_packet_to_mac80211(mvm, napi, skb,
reorder_buf->queue,
- sta, NULL /* FIXME */);
+ sta);
reorder_buf->num_stored--;
}
}
@@ -2213,6 +2205,11 @@ void iwl_mvm_rx_mpdu_mq(struct iwl_mvm *mvm, struct napi_struct *napi,
if (IS_ERR(sta))
sta = NULL;
link_sta = rcu_dereference(mvm->fw_id_to_link_sta[id]);
+
+ if (sta && sta->valid_links && link_sta) {
+ rx_status->link_valid = 1;
+ rx_status->link_id = link_sta->link_id;
+ }
}
} else if (!is_multicast_ether_addr(hdr->addr2)) {
/*
@@ -2356,8 +2353,7 @@ void iwl_mvm_rx_mpdu_mq(struct iwl_mvm *mvm, struct napi_struct *napi,
!(desc->amsdu_info & IWL_RX_MPDU_AMSDU_LAST_SUBFRAME))
rx_status->flag |= RX_FLAG_AMSDU_MORE;
- iwl_mvm_pass_packet_to_mac80211(mvm, napi, skb, queue, sta,
- link_sta);
+ iwl_mvm_pass_packet_to_mac80211(mvm, napi, skb, queue, sta);
}
out:
rcu_read_unlock();
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
index f3e3986b4c72f2..11559563ae3816 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
@@ -2813,7 +2813,8 @@ static int iwl_mvm_build_scan_cmd(struct iwl_mvm *mvm,
if (ver_handler->version != scan_ver)
continue;
- return ver_handler->handler(mvm, vif, params, type, uid);
+ err = ver_handler->handler(mvm, vif, params, type, uid);
+ return err ? : uid;
}
err = iwl_mvm_scan_umac(mvm, vif, params, type, uid);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c
index a59d264a11c52f..ad960faceb0d8f 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c
@@ -879,9 +879,8 @@ void iwl_mvm_rx_session_protect_notif(struct iwl_mvm *mvm,
struct iwl_rx_packet *pkt = rxb_addr(rxb);
struct iwl_mvm_session_prot_notif *notif = (void *)pkt->data;
unsigned int ver =
- iwl_fw_lookup_cmd_ver(mvm->fw,
- WIDE_ID(MAC_CONF_GROUP,
- SESSION_PROTECTION_CMD), 2);
+ iwl_fw_lookup_notif_ver(mvm->fw, MAC_CONF_GROUP,
+ SESSION_PROTECTION_NOTIF, 2);
int id = le32_to_cpu(notif->mac_link_id);
struct ieee80211_vif *vif;
struct iwl_mvm_vif *mvmvif;
diff --git a/drivers/net/wireless/intel/iwlwifi/queue/tx.c b/drivers/net/wireless/intel/iwlwifi/queue/tx.c
index 33973a60d0bf41..6229c785c84576 100644
--- a/drivers/net/wireless/intel/iwlwifi/queue/tx.c
+++ b/drivers/net/wireless/intel/iwlwifi/queue/tx.c
@@ -1589,9 +1589,9 @@ void iwl_txq_reclaim(struct iwl_trans *trans, int txq_id, int ssn,
return;
tfd_num = iwl_txq_get_cmd_index(txq, ssn);
- read_ptr = iwl_txq_get_cmd_index(txq, txq->read_ptr);
spin_lock_bh(&txq->lock);
+ read_ptr = iwl_txq_get_cmd_index(txq, txq->read_ptr);
if (!test_bit(txq_id, trans->txqs.queue_used)) {
IWL_DEBUG_TX_QUEUES(trans, "Q %d inactive - ignoring idx %d\n",
diff --git a/drivers/net/wireless/realtek/rtw89/rtw8922a.c b/drivers/net/wireless/realtek/rtw89/rtw8922a.c
index 367459bd134574..708132d5be2a6a 100644
--- a/drivers/net/wireless/realtek/rtw89/rtw8922a.c
+++ b/drivers/net/wireless/realtek/rtw89/rtw8922a.c
@@ -2233,7 +2233,7 @@ static void rtw8922a_btc_init_cfg(struct rtw89_dev *rtwdev)
* Shared-Ant && BTG-path:WL mask(0x55f), others:WL THRU(0x5ff)
*/
if (btc->ant_type == BTC_ANT_SHARED && btc->btg_pos == path)
- rtw8922a_set_trx_mask(rtwdev, path, BTC_BT_TX_GROUP, 0x5ff);
+ rtw8922a_set_trx_mask(rtwdev, path, BTC_BT_TX_GROUP, 0x55f);
else
rtw8922a_set_trx_mask(rtwdev, path, BTC_BT_TX_GROUP, 0x5ff);
diff --git a/drivers/net/wireless/virtual/mac80211_hwsim.c b/drivers/net/wireless/virtual/mac80211_hwsim.c
index b55fe320633c74..59e1fc0018df3f 100644
--- a/drivers/net/wireless/virtual/mac80211_hwsim.c
+++ b/drivers/net/wireless/virtual/mac80211_hwsim.c
@@ -3899,7 +3899,7 @@ static int hwsim_pmsr_report_nl(struct sk_buff *msg, struct genl_info *info)
}
nla_for_each_nested(peer, peers, rem) {
- struct cfg80211_pmsr_result result;
+ struct cfg80211_pmsr_result result = {};
err = mac80211_hwsim_parse_pmsr_result(peer, &result, info);
if (err)
diff --git a/drivers/net/wwan/t7xx/t7xx_cldma.c b/drivers/net/wwan/t7xx/t7xx_cldma.c
index 9f43f256db1d06..f0a4783baf1f32 100644
--- a/drivers/net/wwan/t7xx/t7xx_cldma.c
+++ b/drivers/net/wwan/t7xx/t7xx_cldma.c
@@ -106,7 +106,7 @@ bool t7xx_cldma_tx_addr_is_set(struct t7xx_cldma_hw *hw_info, unsigned int qno)
{
u32 offset = REG_CLDMA_UL_START_ADDRL_0 + qno * ADDR_SIZE;
- return ioread64(hw_info->ap_pdn_base + offset);
+ return ioread64_lo_hi(hw_info->ap_pdn_base + offset);
}
void t7xx_cldma_hw_set_start_addr(struct t7xx_cldma_hw *hw_info, unsigned int qno, u64 address,
@@ -117,7 +117,7 @@ void t7xx_cldma_hw_set_start_addr(struct t7xx_cldma_hw *hw_info, unsigned int qn
reg = tx_rx == MTK_RX ? hw_info->ap_ao_base + REG_CLDMA_DL_START_ADDRL_0 :
hw_info->ap_pdn_base + REG_CLDMA_UL_START_ADDRL_0;
- iowrite64(address, reg + offset);
+ iowrite64_lo_hi(address, reg + offset);
}
void t7xx_cldma_hw_resume_queue(struct t7xx_cldma_hw *hw_info, unsigned int qno,
diff --git a/drivers/net/wwan/t7xx/t7xx_hif_cldma.c b/drivers/net/wwan/t7xx/t7xx_hif_cldma.c
index abc41a7089fa4f..97163e1e5783ed 100644
--- a/drivers/net/wwan/t7xx/t7xx_hif_cldma.c
+++ b/drivers/net/wwan/t7xx/t7xx_hif_cldma.c
@@ -137,8 +137,9 @@ static int t7xx_cldma_gpd_rx_from_q(struct cldma_queue *queue, int budget, bool
return -ENODEV;
}
- gpd_addr = ioread64(hw_info->ap_pdn_base + REG_CLDMA_DL_CURRENT_ADDRL_0 +
- queue->index * sizeof(u64));
+ gpd_addr = ioread64_lo_hi(hw_info->ap_pdn_base +
+ REG_CLDMA_DL_CURRENT_ADDRL_0 +
+ queue->index * sizeof(u64));
if (req->gpd_addr == gpd_addr || hwo_polling_count++ >= 100)
return 0;
@@ -316,8 +317,8 @@ static void t7xx_cldma_txq_empty_hndl(struct cldma_queue *queue)
struct t7xx_cldma_hw *hw_info = &md_ctrl->hw_info;
/* Check current processing TGPD, 64-bit address is in a table by Q index */
- ul_curr_addr = ioread64(hw_info->ap_pdn_base + REG_CLDMA_UL_CURRENT_ADDRL_0 +
- queue->index * sizeof(u64));
+ ul_curr_addr = ioread64_lo_hi(hw_info->ap_pdn_base + REG_CLDMA_UL_CURRENT_ADDRL_0 +
+ queue->index * sizeof(u64));
if (req->gpd_addr != ul_curr_addr) {
spin_unlock_irqrestore(&md_ctrl->cldma_lock, flags);
dev_err(md_ctrl->dev, "CLDMA%d queue %d is not empty\n",
diff --git a/drivers/net/wwan/t7xx/t7xx_pcie_mac.c b/drivers/net/wwan/t7xx/t7xx_pcie_mac.c
index 76da4c15e3de17..f071ec7ff23d50 100644
--- a/drivers/net/wwan/t7xx/t7xx_pcie_mac.c
+++ b/drivers/net/wwan/t7xx/t7xx_pcie_mac.c
@@ -75,7 +75,7 @@ static void t7xx_pcie_mac_atr_tables_dis(void __iomem *pbase, enum t7xx_atr_src_
for (i = 0; i < ATR_TABLE_NUM_PER_ATR; i++) {
offset = ATR_PORT_OFFSET * port + ATR_TABLE_OFFSET * i;
reg = pbase + ATR_PCIE_WIN0_T0_ATR_PARAM_SRC_ADDR + offset;
- iowrite64(0, reg);
+ iowrite64_lo_hi(0, reg);
}
}
@@ -112,17 +112,17 @@ static int t7xx_pcie_mac_atr_cfg(struct t7xx_pci_dev *t7xx_dev, struct t7xx_atr_
reg = pbase + ATR_PCIE_WIN0_T0_TRSL_ADDR + offset;
value = cfg->trsl_addr & ATR_PCIE_WIN0_ADDR_ALGMT;
- iowrite64(value, reg);
+ iowrite64_lo_hi(value, reg);
reg = pbase + ATR_PCIE_WIN0_T0_TRSL_PARAM + offset;
iowrite32(cfg->trsl_id, reg);
reg = pbase + ATR_PCIE_WIN0_T0_ATR_PARAM_SRC_ADDR + offset;
value = (cfg->src_addr & ATR_PCIE_WIN0_ADDR_ALGMT) | (atr_size << 1) | BIT(0);
- iowrite64(value, reg);
+ iowrite64_lo_hi(value, reg);
/* Ensure ATR is set */
- ioread64(reg);
+ ioread64_lo_hi(reg);
return 0;
}
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index ad29f370034e4f..8d2aee88526c69 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -285,6 +285,7 @@ static struct sk_buff *xennet_alloc_one_rx_buffer(struct netfront_queue *queue)
return NULL;
}
skb_add_rx_frag(skb, 0, page, 0, 0, PAGE_SIZE);
+ skb_mark_for_recycle(skb);
/* Align ip header to a 16 bytes boundary */
skb_reserve(skb, NET_IP_ALIGN);
diff --git a/drivers/nfc/trf7970a.c b/drivers/nfc/trf7970a.c
index 7eb17f46a8153d..9e1a34e23af26e 100644
--- a/drivers/nfc/trf7970a.c
+++ b/drivers/nfc/trf7970a.c
@@ -424,7 +424,8 @@ struct trf7970a {
enum trf7970a_state state;
struct device *dev;
struct spi_device *spi;
- struct regulator *regulator;
+ struct regulator *vin_regulator;
+ struct regulator *vddio_regulator;
struct nfc_digital_dev *ddev;
u32 quirks;
bool is_initiator;
@@ -1883,7 +1884,7 @@ static int trf7970a_power_up(struct trf7970a *trf)
if (trf->state != TRF7970A_ST_PWR_OFF)
return 0;
- ret = regulator_enable(trf->regulator);
+ ret = regulator_enable(trf->vin_regulator);
if (ret) {
dev_err(trf->dev, "%s - Can't enable VIN: %d\n", __func__, ret);
return ret;
@@ -1926,7 +1927,7 @@ static int trf7970a_power_down(struct trf7970a *trf)
if (trf->en2_gpiod && !(trf->quirks & TRF7970A_QUIRK_EN2_MUST_STAY_LOW))
gpiod_set_value_cansleep(trf->en2_gpiod, 0);
- ret = regulator_disable(trf->regulator);
+ ret = regulator_disable(trf->vin_regulator);
if (ret)
dev_err(trf->dev, "%s - Can't disable VIN: %d\n", __func__,
ret);
@@ -2065,37 +2066,37 @@ static int trf7970a_probe(struct spi_device *spi)
mutex_init(&trf->lock);
INIT_DELAYED_WORK(&trf->timeout_work, trf7970a_timeout_work_handler);
- trf->regulator = devm_regulator_get(&spi->dev, "vin");
- if (IS_ERR(trf->regulator)) {
- ret = PTR_ERR(trf->regulator);
+ trf->vin_regulator = devm_regulator_get(&spi->dev, "vin");
+ if (IS_ERR(trf->vin_regulator)) {
+ ret = PTR_ERR(trf->vin_regulator);
dev_err(trf->dev, "Can't get VIN regulator: %d\n", ret);
goto err_destroy_lock;
}
- ret = regulator_enable(trf->regulator);
+ ret = regulator_enable(trf->vin_regulator);
if (ret) {
dev_err(trf->dev, "Can't enable VIN: %d\n", ret);
goto err_destroy_lock;
}
- uvolts = regulator_get_voltage(trf->regulator);
+ uvolts = regulator_get_voltage(trf->vin_regulator);
if (uvolts > 4000000)
trf->chip_status_ctrl = TRF7970A_CHIP_STATUS_VRS5_3;
- trf->regulator = devm_regulator_get(&spi->dev, "vdd-io");
- if (IS_ERR(trf->regulator)) {
- ret = PTR_ERR(trf->regulator);
+ trf->vddio_regulator = devm_regulator_get(&spi->dev, "vdd-io");
+ if (IS_ERR(trf->vddio_regulator)) {
+ ret = PTR_ERR(trf->vddio_regulator);
dev_err(trf->dev, "Can't get VDD_IO regulator: %d\n", ret);
- goto err_destroy_lock;
+ goto err_disable_vin_regulator;
}
- ret = regulator_enable(trf->regulator);
+ ret = regulator_enable(trf->vddio_regulator);
if (ret) {
dev_err(trf->dev, "Can't enable VDD_IO: %d\n", ret);
- goto err_destroy_lock;
+ goto err_disable_vin_regulator;
}
- if (regulator_get_voltage(trf->regulator) == 1800000) {
+ if (regulator_get_voltage(trf->vddio_regulator) == 1800000) {
trf->io_ctrl = TRF7970A_REG_IO_CTRL_IO_LOW;
dev_dbg(trf->dev, "trf7970a config vdd_io to 1.8V\n");
}
@@ -2108,7 +2109,7 @@ static int trf7970a_probe(struct spi_device *spi)
if (!trf->ddev) {
dev_err(trf->dev, "Can't allocate NFC digital device\n");
ret = -ENOMEM;
- goto err_disable_regulator;
+ goto err_disable_vddio_regulator;
}
nfc_digital_set_parent_dev(trf->ddev, trf->dev);
@@ -2137,8 +2138,10 @@ err_shutdown:
trf7970a_shutdown(trf);
err_free_ddev:
nfc_digital_free_device(trf->ddev);
-err_disable_regulator:
- regulator_disable(trf->regulator);
+err_disable_vddio_regulator:
+ regulator_disable(trf->vddio_regulator);
+err_disable_vin_regulator:
+ regulator_disable(trf->vin_regulator);
err_destroy_lock:
mutex_destroy(&trf->lock);
return ret;
@@ -2157,7 +2160,8 @@ static void trf7970a_remove(struct spi_device *spi)
nfc_digital_unregister_device(trf->ddev);
nfc_digital_free_device(trf->ddev);
- regulator_disable(trf->regulator);
+ regulator_disable(trf->vddio_regulator);
+ regulator_disable(trf->vin_regulator);
mutex_destroy(&trf->lock);
}
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 943d72bdd794ca..27281a9a8951db 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2076,6 +2076,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
bool vwc = ns->ctrl->vwc & NVME_CTRL_VWC_PRESENT;
struct queue_limits lim;
struct nvme_id_ns_nvm *nvm = NULL;
+ struct nvme_zone_info zi = {};
struct nvme_id_ns *id;
sector_t capacity;
unsigned lbaf;
@@ -2088,9 +2089,10 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
if (id->ncap == 0) {
/* namespace not allocated or attached */
info->is_removed = true;
- ret = -ENODEV;
+ ret = -ENXIO;
goto out;
}
+ lbaf = nvme_lbaf_index(id->flbas);
if (ns->ctrl->ctratt & NVME_CTRL_ATTR_ELBAS) {
ret = nvme_identify_ns_nvm(ns->ctrl, info->nsid, &nvm);
@@ -2098,8 +2100,14 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
goto out;
}
+ if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
+ ns->head->ids.csi == NVME_CSI_ZNS) {
+ ret = nvme_query_zone_info(ns, lbaf, &zi);
+ if (ret < 0)
+ goto out;
+ }
+
blk_mq_freeze_queue(ns->disk->queue);
- lbaf = nvme_lbaf_index(id->flbas);
ns->head->lba_shift = id->lbaf[lbaf].ds;
ns->head->nuse = le64_to_cpu(id->nuse);
capacity = nvme_lba_to_sect(ns->head, le64_to_cpu(id->nsze));
@@ -2112,13 +2120,8 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
capacity = 0;
nvme_config_discard(ns, &lim);
if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
- ns->head->ids.csi == NVME_CSI_ZNS) {
- ret = nvme_update_zone_info(ns, lbaf, &lim);
- if (ret) {
- blk_mq_unfreeze_queue(ns->disk->queue);
- goto out;
- }
- }
+ ns->head->ids.csi == NVME_CSI_ZNS)
+ nvme_update_zone_info(ns, &lim, &zi);
ret = queue_limits_commit_update(ns->disk->queue, &lim);
if (ret) {
blk_mq_unfreeze_queue(ns->disk->queue);
@@ -2201,6 +2204,7 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_ns_info *info)
}
if (!ret && nvme_ns_head_multipath(ns->head)) {
+ struct queue_limits *ns_lim = &ns->disk->queue->limits;
struct queue_limits lim;
blk_mq_freeze_queue(ns->head->disk->queue);
@@ -2212,7 +2216,26 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_ns_info *info)
set_disk_ro(ns->head->disk, nvme_ns_is_readonly(ns, info));
nvme_mpath_revalidate_paths(ns);
+ /*
+ * queue_limits mixes values that are the hardware limitations
+ * for bio splitting with what is the device configuration.
+ *
+ * For NVMe the device configuration can change after e.g. a
+ * Format command, and we really want to pick up the new format
+ * value here. But we must still stack the queue limits to the
+ * least common denominator for multipathing to split the bios
+ * properly.
+ *
+ * To work around this, we explicitly set the device
+ * configuration to those that we just queried, but only stack
+ * the splitting limits in to make sure we still obey possibly
+ * lower limitations of other controllers.
+ */
lim = queue_limits_start_update(ns->head->disk->queue);
+ lim.logical_block_size = ns_lim->logical_block_size;
+ lim.physical_block_size = ns_lim->physical_block_size;
+ lim.io_min = ns_lim->io_min;
+ lim.io_opt = ns_lim->io_opt;
queue_limits_stack_bdev(&lim, ns->disk->part0, 0,
ns->head->disk->disk_name);
ret = queue_limits_commit_update(ns->head->disk->queue, &lim);
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 68a5d971657bb5..a5b29e9ad342df 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -2428,7 +2428,7 @@ nvme_fc_ctrl_get(struct nvme_fc_ctrl *ctrl)
* controller. Called after last nvme_put_ctrl() call
*/
static void
-nvme_fc_nvme_ctrl_freed(struct nvme_ctrl *nctrl)
+nvme_fc_free_ctrl(struct nvme_ctrl *nctrl)
{
struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
@@ -3384,7 +3384,7 @@ static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = {
.reg_read32 = nvmf_reg_read32,
.reg_read64 = nvmf_reg_read64,
.reg_write32 = nvmf_reg_write32,
- .free_ctrl = nvme_fc_nvme_ctrl_freed,
+ .free_ctrl = nvme_fc_free_ctrl,
.submit_async_event = nvme_fc_submit_async_event,
.delete_ctrl = nvme_fc_delete_ctrl,
.get_address = nvmf_get_address,
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 24193fcb8bd584..d0ed64dc7380e5 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -1036,10 +1036,18 @@ static inline bool nvme_disk_is_ns_head(struct gendisk *disk)
}
#endif /* CONFIG_NVME_MULTIPATH */
+struct nvme_zone_info {
+ u64 zone_size;
+ unsigned int max_open_zones;
+ unsigned int max_active_zones;
+};
+
int nvme_ns_report_zones(struct nvme_ns *ns, sector_t sector,
unsigned int nr_zones, report_zones_cb cb, void *data);
-int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf,
- struct queue_limits *lim);
+int nvme_query_zone_info(struct nvme_ns *ns, unsigned lbaf,
+ struct nvme_zone_info *zi);
+void nvme_update_zone_info(struct nvme_ns *ns, struct queue_limits *lim,
+ struct nvme_zone_info *zi);
#ifdef CONFIG_BLK_DEV_ZONED
blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns, struct request *req,
struct nvme_command *cmnd,
diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c
index 722384bcc765cd..77aa0f440a6d2a 100644
--- a/drivers/nvme/host/zns.c
+++ b/drivers/nvme/host/zns.c
@@ -35,8 +35,8 @@ static int nvme_set_max_append(struct nvme_ctrl *ctrl)
return 0;
}
-int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf,
- struct queue_limits *lim)
+int nvme_query_zone_info(struct nvme_ns *ns, unsigned lbaf,
+ struct nvme_zone_info *zi)
{
struct nvme_effects_log *log = ns->head->effects;
struct nvme_command c = { };
@@ -89,27 +89,34 @@ int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf,
goto free_data;
}
- ns->head->zsze =
- nvme_lba_to_sect(ns->head, le64_to_cpu(id->lbafe[lbaf].zsze));
- if (!is_power_of_2(ns->head->zsze)) {
+ zi->zone_size = le64_to_cpu(id->lbafe[lbaf].zsze);
+ if (!is_power_of_2(zi->zone_size)) {
dev_warn(ns->ctrl->device,
- "invalid zone size:%llu for namespace:%u\n",
- ns->head->zsze, ns->head->ns_id);
+ "invalid zone size: %llu for namespace: %u\n",
+ zi->zone_size, ns->head->ns_id);
status = -ENODEV;
goto free_data;
}
+ zi->max_open_zones = le32_to_cpu(id->mor) + 1;
+ zi->max_active_zones = le32_to_cpu(id->mar) + 1;
- blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, ns->queue);
- lim->zoned = 1;
- lim->max_open_zones = le32_to_cpu(id->mor) + 1;
- lim->max_active_zones = le32_to_cpu(id->mar) + 1;
- lim->chunk_sectors = ns->head->zsze;
- lim->max_zone_append_sectors = ns->ctrl->max_zone_append;
free_data:
kfree(id);
return status;
}
+void nvme_update_zone_info(struct nvme_ns *ns, struct queue_limits *lim,
+ struct nvme_zone_info *zi)
+{
+ lim->zoned = 1;
+ lim->max_open_zones = zi->max_open_zones;
+ lim->max_active_zones = zi->max_active_zones;
+ lim->max_zone_append_sectors = ns->ctrl->max_zone_append;
+ lim->chunk_sectors = ns->head->zsze =
+ nvme_lba_to_sect(ns->head, zi->zone_size);
+ blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, ns->queue);
+}
+
static void *nvme_zns_alloc_report_buffer(struct nvme_ns *ns,
unsigned int nr_zones, size_t *buflen)
{
diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index 77a6e817b31596..a2325330bf2214 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -1613,6 +1613,11 @@ static struct config_group *nvmet_subsys_make(struct config_group *group,
return ERR_PTR(-EINVAL);
}
+ if (sysfs_streq(name, nvmet_disc_subsys->subsysnqn)) {
+ pr_err("can't create subsystem using unique discovery NQN\n");
+ return ERR_PTR(-EINVAL);
+ }
+
subsys = nvmet_subsys_alloc(name, NVME_NQN_NVME);
if (IS_ERR(subsys))
return ERR_CAST(subsys);
@@ -2159,7 +2164,49 @@ static const struct config_item_type nvmet_hosts_type = {
static struct config_group nvmet_hosts_group;
+static ssize_t nvmet_root_discovery_nqn_show(struct config_item *item,
+ char *page)
+{
+ return snprintf(page, PAGE_SIZE, "%s\n", nvmet_disc_subsys->subsysnqn);
+}
+
+static ssize_t nvmet_root_discovery_nqn_store(struct config_item *item,
+ const char *page, size_t count)
+{
+ struct list_head *entry;
+ size_t len;
+
+ len = strcspn(page, "\n");
+ if (!len || len > NVMF_NQN_FIELD_LEN - 1)
+ return -EINVAL;
+
+ down_write(&nvmet_config_sem);
+ list_for_each(entry, &nvmet_subsystems_group.cg_children) {
+ struct config_item *item =
+ container_of(entry, struct config_item, ci_entry);
+
+ if (!strncmp(config_item_name(item), page, len)) {
+ pr_err("duplicate NQN %s\n", config_item_name(item));
+ up_write(&nvmet_config_sem);
+ return -EINVAL;
+ }
+ }
+ memset(nvmet_disc_subsys->subsysnqn, 0, NVMF_NQN_FIELD_LEN);
+ memcpy(nvmet_disc_subsys->subsysnqn, page, len);
+ up_write(&nvmet_config_sem);
+
+ return len;
+}
+
+CONFIGFS_ATTR(nvmet_root_, discovery_nqn);
+
+static struct configfs_attribute *nvmet_root_attrs[] = {
+ &nvmet_root_attr_discovery_nqn,
+ NULL,
+};
+
static const struct config_item_type nvmet_root_type = {
+ .ct_attrs = nvmet_root_attrs,
.ct_owner = THIS_MODULE,
};
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 6bbe4df0166ca5..8860a3eb71ec89 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -1541,6 +1541,13 @@ static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port,
}
down_read(&nvmet_config_sem);
+ if (!strncmp(nvmet_disc_subsys->subsysnqn, subsysnqn,
+ NVMF_NQN_SIZE)) {
+ if (kref_get_unless_zero(&nvmet_disc_subsys->ref)) {
+ up_read(&nvmet_config_sem);
+ return nvmet_disc_subsys;
+ }
+ }
list_for_each_entry(p, &port->subsystems, entry) {
if (!strncmp(p->subsys->subsysnqn, subsysnqn,
NVMF_NQN_SIZE)) {
diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index fd229f310c931f..337ee1cb09ae64 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -1115,16 +1115,21 @@ nvmet_fc_schedule_delete_assoc(struct nvmet_fc_tgt_assoc *assoc)
}
static bool
-nvmet_fc_assoc_exits(struct nvmet_fc_tgtport *tgtport, u64 association_id)
+nvmet_fc_assoc_exists(struct nvmet_fc_tgtport *tgtport, u64 association_id)
{
struct nvmet_fc_tgt_assoc *a;
+ bool found = false;
+ rcu_read_lock();
list_for_each_entry_rcu(a, &tgtport->assoc_list, a_list) {
- if (association_id == a->association_id)
- return true;
+ if (association_id == a->association_id) {
+ found = true;
+ break;
+ }
}
+ rcu_read_unlock();
- return false;
+ return found;
}
static struct nvmet_fc_tgt_assoc *
@@ -1164,13 +1169,11 @@ nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport, void *hosthandle)
ran = ran << BYTES_FOR_QID_SHIFT;
spin_lock_irqsave(&tgtport->lock, flags);
- rcu_read_lock();
- if (!nvmet_fc_assoc_exits(tgtport, ran)) {
+ if (!nvmet_fc_assoc_exists(tgtport, ran)) {
assoc->association_id = ran;
list_add_tail_rcu(&assoc->a_list, &tgtport->assoc_list);
done = true;
}
- rcu_read_unlock();
spin_unlock_irqrestore(&tgtport->lock, flags);
} while (!done);
diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c
index 3bf27052832f30..4d57a4e3410546 100644
--- a/drivers/of/dynamic.c
+++ b/drivers/of/dynamic.c
@@ -9,6 +9,7 @@
#define pr_fmt(fmt) "OF: " fmt
+#include <linux/device.h>
#include <linux/of.h>
#include <linux/spinlock.h>
#include <linux/slab.h>
@@ -667,6 +668,17 @@ void of_changeset_destroy(struct of_changeset *ocs)
{
struct of_changeset_entry *ce, *cen;
+ /*
+ * When a device is deleted, the device links to/from it are also queued
+ * for deletion. Until these device links are freed, the devices
+ * themselves aren't freed. If the device being deleted is due to an
+ * overlay change, this device might be holding a reference to a device
+ * node that will be freed. So, wait until all already pending device
+ * links are deleted before freeing a device node. This ensures we don't
+ * free any device node that has a non-zero reference count.
+ */
+ device_link_wait_removal();
+
list_for_each_entry_safe_reverse(ce, cen, &ocs->entries, node)
__of_changeset_entry_destroy(ce);
}
diff --git a/drivers/of/module.c b/drivers/of/module.c
index 0e8aa974f0f2bb..780fd82a7ecc58 100644
--- a/drivers/of/module.c
+++ b/drivers/of/module.c
@@ -16,19 +16,28 @@ ssize_t of_modalias(const struct device_node *np, char *str, ssize_t len)
ssize_t csize;
ssize_t tsize;
+ /*
+ * Prevent a kernel oops in vsnprintf() -- it only allows passing a
+ * NULL ptr when the length is also 0. Also filter out the negative
+ * lengths...
+ */
+ if ((len > 0 && !str) || len < 0)
+ return -EINVAL;
+
/* Name & Type */
/* %p eats all alphanum characters, so %c must be used here */
csize = snprintf(str, len, "of:N%pOFn%c%s", np, 'T',
of_node_get_device_type(np));
tsize = csize;
+ if (csize >= len)
+ csize = len > 0 ? len - 1 : 0;
len -= csize;
- if (str)
- str += csize;
+ str += csize;
of_property_for_each_string(np, "compatible", p, compat) {
csize = strlen(compat) + 1;
tsize += csize;
- if (csize > len)
+ if (csize >= len)
continue;
csize = snprintf(str, len, "C%s", compat);
diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c
index 9ce0d20a6c5812..feef537257d057 100644
--- a/drivers/parisc/ccio-dma.c
+++ b/drivers/parisc/ccio-dma.c
@@ -1022,7 +1022,7 @@ static const struct dma_map_ops ccio_ops = {
.map_sg = ccio_map_sg,
.unmap_sg = ccio_unmap_sg,
.get_sgtable = dma_common_get_sgtable,
- .alloc_pages = dma_common_alloc_pages,
+ .alloc_pages_op = dma_common_alloc_pages,
.free_pages = dma_common_free_pages,
};
diff --git a/drivers/parisc/sba_iommu.c b/drivers/parisc/sba_iommu.c
index 784037837f65e0..fc3863c09f83d3 100644
--- a/drivers/parisc/sba_iommu.c
+++ b/drivers/parisc/sba_iommu.c
@@ -1090,7 +1090,7 @@ static const struct dma_map_ops sba_ops = {
.map_sg = sba_map_sg,
.unmap_sg = sba_unmap_sg,
.get_sgtable = dma_common_get_sgtable,
- .alloc_pages = dma_common_alloc_pages,
+ .alloc_pages_op = dma_common_alloc_pages,
.free_pages = dma_common_free_pages,
};
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index bf4833221816d4..eff7f5df08e27f 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -3766,14 +3766,6 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATHEROS, 0x003e, quirk_no_bus_reset);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_CAVIUM, 0xa100, quirk_no_bus_reset);
/*
- * Apparently the LSI / Agere FW643 can't recover after a Secondary Bus
- * Reset and requires a power-off or suspend/resume and rescan. Prevent
- * use of that reset.
- */
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATT, 0x5900, quirk_no_bus_reset);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATT, 0x5901, quirk_no_bus_reset);
-
-/*
* Some TI KeyStone C667X devices do not support bus/hot reset. The PCIESS
* automatically disables LTSSM when Secondary Bus Reset is received and
* the device stops working. Prevent bus reset for these devices. With
diff --git a/drivers/perf/alibaba_uncore_drw_pmu.c b/drivers/perf/alibaba_uncore_drw_pmu.c
index a9277dcf90ce0c..89dd38343f93ce 100644
--- a/drivers/perf/alibaba_uncore_drw_pmu.c
+++ b/drivers/perf/alibaba_uncore_drw_pmu.c
@@ -709,6 +709,7 @@ static int ali_drw_pmu_probe(struct platform_device *pdev)
drw_pmu->pmu = (struct pmu) {
.module = THIS_MODULE,
+ .parent = &pdev->dev,
.task_ctx_nr = perf_invalid_context,
.event_init = ali_drw_pmu_event_init,
.add = ali_drw_pmu_add,
@@ -746,18 +747,14 @@ static int ali_drw_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
struct ali_drw_pmu_irq *irq;
struct ali_drw_pmu *drw_pmu;
unsigned int target;
- int ret;
- cpumask_t node_online_cpus;
irq = hlist_entry_safe(node, struct ali_drw_pmu_irq, node);
if (cpu != irq->cpu)
return 0;
- ret = cpumask_and(&node_online_cpus,
- cpumask_of_node(cpu_to_node(cpu)), cpu_online_mask);
- if (ret)
- target = cpumask_any_but(&node_online_cpus, cpu);
- else
+ target = cpumask_any_and_but(cpumask_of_node(cpu_to_node(cpu)),
+ cpu_online_mask, cpu);
+ if (target >= nr_cpu_ids)
target = cpumask_any_but(cpu_online_mask, cpu);
if (target >= nr_cpu_ids)
diff --git a/drivers/perf/amlogic/meson_ddr_pmu_core.c b/drivers/perf/amlogic/meson_ddr_pmu_core.c
index bbc7285fd934a3..07446d784a1a64 100644
--- a/drivers/perf/amlogic/meson_ddr_pmu_core.c
+++ b/drivers/perf/amlogic/meson_ddr_pmu_core.c
@@ -492,6 +492,7 @@ int meson_ddr_pmu_create(struct platform_device *pdev)
*pmu = (struct ddr_pmu) {
.pmu = {
.module = THIS_MODULE,
+ .parent = &pdev->dev,
.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
.task_ctx_nr = perf_invalid_context,
.attr_groups = attr_groups,
diff --git a/drivers/perf/arm-cci.c b/drivers/perf/arm-cci.c
index 6be03f81ae5db7..a7fd806779197f 100644
--- a/drivers/perf/arm-cci.c
+++ b/drivers/perf/arm-cci.c
@@ -1409,6 +1409,7 @@ static int cci_pmu_init(struct cci_pmu *cci_pmu, struct platform_device *pdev)
cci_pmu->pmu = (struct pmu) {
.module = THIS_MODULE,
+ .parent = &pdev->dev,
.name = cci_pmu->model->name,
.task_ctx_nr = perf_invalid_context,
.pmu_enable = cci_pmu_enable,
diff --git a/drivers/perf/arm-ccn.c b/drivers/perf/arm-ccn.c
index 641471bd5eff40..f4495ff6525f65 100644
--- a/drivers/perf/arm-ccn.c
+++ b/drivers/perf/arm-ccn.c
@@ -1265,6 +1265,7 @@ static int arm_ccn_pmu_init(struct arm_ccn *ccn)
/* Perf driver registration */
ccn->dt.pmu = (struct pmu) {
.module = THIS_MODULE,
+ .parent = ccn->dev,
.attr_groups = arm_ccn_pmu_attr_groups,
.task_ctx_nr = perf_invalid_context,
.event_init = arm_ccn_pmu_event_init,
diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
index 7ef9c7e4836b72..e26ad1d3ed0bb7 100644
--- a/drivers/perf/arm-cmn.c
+++ b/drivers/perf/arm-cmn.c
@@ -1950,20 +1950,20 @@ static int arm_cmn_pmu_offline_cpu(unsigned int cpu, struct hlist_node *cpuhp_no
struct arm_cmn *cmn;
unsigned int target;
int node;
- cpumask_t mask;
cmn = hlist_entry_safe(cpuhp_node, struct arm_cmn, cpuhp_node);
if (cpu != cmn->cpu)
return 0;
node = dev_to_node(cmn->dev);
- if (cpumask_and(&mask, cpumask_of_node(node), cpu_online_mask) &&
- cpumask_andnot(&mask, &mask, cpumask_of(cpu)))
- target = cpumask_any(&mask);
- else
+
+ target = cpumask_any_and_but(cpumask_of_node(node), cpu_online_mask, cpu);
+ if (target >= nr_cpu_ids)
target = cpumask_any_but(cpu_online_mask, cpu);
+
if (target < nr_cpu_ids)
arm_cmn_migrate(cmn, target);
+
return 0;
}
@@ -2482,6 +2482,7 @@ static int arm_cmn_probe(struct platform_device *pdev)
cmn->cpu = cpumask_local_spread(0, dev_to_node(cmn->dev));
cmn->pmu = (struct pmu) {
.module = THIS_MODULE,
+ .parent = cmn->dev,
.attr_groups = arm_cmn_attr_groups,
.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
.task_ctx_nr = perf_invalid_context,
diff --git a/drivers/perf/arm_cspmu/arm_cspmu.c b/drivers/perf/arm_cspmu/arm_cspmu.c
index b9a252272f1e9f..ba0cf2f466ef00 100644
--- a/drivers/perf/arm_cspmu/arm_cspmu.c
+++ b/drivers/perf/arm_cspmu/arm_cspmu.c
@@ -1206,6 +1206,7 @@ static int arm_cspmu_register_pmu(struct arm_cspmu *cspmu)
cspmu->pmu = (struct pmu){
.task_ctx_nr = perf_invalid_context,
.module = cspmu->impl.module,
+ .parent = cspmu->dev,
.pmu_enable = arm_cspmu_enable,
.pmu_disable = arm_cspmu_disable,
.event_init = arm_cspmu_event_init,
@@ -1322,8 +1323,7 @@ static int arm_cspmu_cpu_online(unsigned int cpu, struct hlist_node *node)
static int arm_cspmu_cpu_teardown(unsigned int cpu, struct hlist_node *node)
{
- int dst;
- struct cpumask online_supported;
+ unsigned int dst;
struct arm_cspmu *cspmu =
hlist_entry_safe(node, struct arm_cspmu, cpuhp_node);
@@ -1333,9 +1333,8 @@ static int arm_cspmu_cpu_teardown(unsigned int cpu, struct hlist_node *node)
return 0;
/* Choose a new CPU to migrate ownership of the PMU to */
- cpumask_and(&online_supported, &cspmu->associated_cpus,
- cpu_online_mask);
- dst = cpumask_any_but(&online_supported, cpu);
+ dst = cpumask_any_and_but(&cspmu->associated_cpus,
+ cpu_online_mask, cpu);
if (dst >= nr_cpu_ids)
return 0;
diff --git a/drivers/perf/arm_dmc620_pmu.c b/drivers/perf/arm_dmc620_pmu.c
index 8a81be2dd5ecf5..2ec96e204c403e 100644
--- a/drivers/perf/arm_dmc620_pmu.c
+++ b/drivers/perf/arm_dmc620_pmu.c
@@ -673,6 +673,7 @@ static int dmc620_pmu_device_probe(struct platform_device *pdev)
dmc620_pmu->pmu = (struct pmu) {
.module = THIS_MODULE,
+ .parent = &pdev->dev,
.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
.task_ctx_nr = perf_invalid_context,
.event_init = dmc620_pmu_event_init,
diff --git a/drivers/perf/arm_dsu_pmu.c b/drivers/perf/arm_dsu_pmu.c
index bae3ca37f846ee..92248a24a1aabe 100644
--- a/drivers/perf/arm_dsu_pmu.c
+++ b/drivers/perf/arm_dsu_pmu.c
@@ -230,15 +230,6 @@ static const struct attribute_group *dsu_pmu_attr_groups[] = {
NULL,
};
-static int dsu_pmu_get_online_cpu_any_but(struct dsu_pmu *dsu_pmu, int cpu)
-{
- struct cpumask online_supported;
-
- cpumask_and(&online_supported,
- &dsu_pmu->associated_cpus, cpu_online_mask);
- return cpumask_any_but(&online_supported, cpu);
-}
-
static inline bool dsu_pmu_counter_valid(struct dsu_pmu *dsu_pmu, u32 idx)
{
return (idx < dsu_pmu->num_counters) ||
@@ -751,6 +742,7 @@ static int dsu_pmu_device_probe(struct platform_device *pdev)
dsu_pmu->pmu = (struct pmu) {
.task_ctx_nr = perf_invalid_context,
+ .parent = &pdev->dev,
.module = THIS_MODULE,
.pmu_enable = dsu_pmu_enable,
.pmu_disable = dsu_pmu_disable,
@@ -827,14 +819,16 @@ static int dsu_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
static int dsu_pmu_cpu_teardown(unsigned int cpu, struct hlist_node *node)
{
- int dst;
- struct dsu_pmu *dsu_pmu = hlist_entry_safe(node, struct dsu_pmu,
- cpuhp_node);
+ struct dsu_pmu *dsu_pmu;
+ unsigned int dst;
+
+ dsu_pmu = hlist_entry_safe(node, struct dsu_pmu, cpuhp_node);
if (!cpumask_test_and_clear_cpu(cpu, &dsu_pmu->active_cpu))
return 0;
- dst = dsu_pmu_get_online_cpu_any_but(dsu_pmu, cpu);
+ dst = cpumask_any_and_but(&dsu_pmu->associated_cpus,
+ cpu_online_mask, cpu);
/* If there are no active CPUs in the DSU, leave IRQ disabled */
if (dst >= nr_cpu_ids)
return 0;
diff --git a/drivers/perf/arm_pmu_platform.c b/drivers/perf/arm_pmu_platform.c
index 3596db36cbff4f..4b1a9a92ea117b 100644
--- a/drivers/perf/arm_pmu_platform.c
+++ b/drivers/perf/arm_pmu_platform.c
@@ -196,6 +196,7 @@ int arm_pmu_device_probe(struct platform_device *pdev,
if (!pmu)
return -ENOMEM;
+ pmu->pmu.parent = &pdev->dev;
pmu->plat_device = pdev;
ret = pmu_parse_irqs(pmu);
diff --git a/drivers/perf/arm_smmuv3_pmu.c b/drivers/perf/arm_smmuv3_pmu.c
index 719aa953a1c4d6..d5fa92ba837397 100644
--- a/drivers/perf/arm_smmuv3_pmu.c
+++ b/drivers/perf/arm_smmuv3_pmu.c
@@ -860,6 +860,7 @@ static int smmu_pmu_probe(struct platform_device *pdev)
smmu_pmu->pmu = (struct pmu) {
.module = THIS_MODULE,
+ .parent = &pdev->dev,
.task_ctx_nr = perf_invalid_context,
.pmu_enable = smmu_pmu_enable,
.pmu_disable = smmu_pmu_disable,
diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c
index 35f0de03416fc0..9100d82bfabc0d 100644
--- a/drivers/perf/arm_spe_pmu.c
+++ b/drivers/perf/arm_spe_pmu.c
@@ -932,6 +932,7 @@ static int arm_spe_pmu_perf_init(struct arm_spe_pmu *spe_pmu)
spe_pmu->pmu = (struct pmu) {
.module = THIS_MODULE,
+ .parent = &spe_pmu->pdev->dev,
.capabilities = PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE,
.attr_groups = arm_spe_pmu_attr_groups,
/*
diff --git a/drivers/perf/dwc_pcie_pmu.c b/drivers/perf/dwc_pcie_pmu.c
index 957058ad0099e2..c5e328f2384194 100644
--- a/drivers/perf/dwc_pcie_pmu.c
+++ b/drivers/perf/dwc_pcie_pmu.c
@@ -690,9 +690,8 @@ static int dwc_pcie_pmu_offline_cpu(unsigned int cpu, struct hlist_node *cpuhp_n
{
struct dwc_pcie_pmu *pcie_pmu;
struct pci_dev *pdev;
- int node;
- cpumask_t mask;
unsigned int target;
+ int node;
pcie_pmu = hlist_entry_safe(cpuhp_node, struct dwc_pcie_pmu, cpuhp_node);
/* Nothing to do if this CPU doesn't own the PMU */
@@ -702,10 +701,9 @@ static int dwc_pcie_pmu_offline_cpu(unsigned int cpu, struct hlist_node *cpuhp_n
pcie_pmu->on_cpu = -1;
pdev = pcie_pmu->pdev;
node = dev_to_node(&pdev->dev);
- if (cpumask_and(&mask, cpumask_of_node(node), cpu_online_mask) &&
- cpumask_andnot(&mask, &mask, cpumask_of(cpu)))
- target = cpumask_any(&mask);
- else
+
+ target = cpumask_any_and_but(cpumask_of_node(node), cpu_online_mask, cpu);
+ if (target >= nr_cpu_ids)
target = cpumask_any_but(cpu_online_mask, cpu);
if (target >= nr_cpu_ids) {
diff --git a/drivers/perf/fsl_imx8_ddr_perf.c b/drivers/perf/fsl_imx8_ddr_perf.c
index 4e8fa5a48fcfe0..1bbdb29743c442 100644
--- a/drivers/perf/fsl_imx8_ddr_perf.c
+++ b/drivers/perf/fsl_imx8_ddr_perf.c
@@ -651,6 +651,7 @@ static int ddr_perf_init(struct ddr_pmu *pmu, void __iomem *base,
*pmu = (struct ddr_pmu) {
.pmu = (struct pmu) {
.module = THIS_MODULE,
+ .parent = dev,
.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
.task_ctx_nr = perf_invalid_context,
.attr_groups = attr_groups,
diff --git a/drivers/perf/hisilicon/hisi_pcie_pmu.c b/drivers/perf/hisilicon/hisi_pcie_pmu.c
index 5d1f0e9fdb08d2..b1e4739fbdb0a9 100644
--- a/drivers/perf/hisilicon/hisi_pcie_pmu.c
+++ b/drivers/perf/hisilicon/hisi_pcie_pmu.c
@@ -673,7 +673,6 @@ static int hisi_pcie_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
{
struct hisi_pcie_pmu *pcie_pmu = hlist_entry_safe(node, struct hisi_pcie_pmu, node);
unsigned int target;
- cpumask_t mask;
int numa_node;
/* Nothing to do if this CPU doesn't own the PMU */
@@ -684,10 +683,10 @@ static int hisi_pcie_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
/* Choose a local CPU from all online cpus. */
numa_node = dev_to_node(&pcie_pmu->pdev->dev);
- if (cpumask_and(&mask, cpumask_of_node(numa_node), cpu_online_mask) &&
- cpumask_andnot(&mask, &mask, cpumask_of(cpu)))
- target = cpumask_any(&mask);
- else
+
+ target = cpumask_any_and_but(cpumask_of_node(numa_node),
+ cpu_online_mask, cpu);
+ if (target >= nr_cpu_ids)
target = cpumask_any_but(cpu_online_mask, cpu);
if (target >= nr_cpu_ids) {
@@ -807,6 +806,7 @@ static int hisi_pcie_alloc_pmu(struct pci_dev *pdev, struct hisi_pcie_pmu *pcie_
pcie_pmu->pmu = (struct pmu) {
.name = name,
.module = THIS_MODULE,
+ .parent = &pdev->dev,
.event_init = hisi_pcie_pmu_event_init,
.pmu_enable = hisi_pcie_pmu_enable,
.pmu_disable = hisi_pcie_pmu_disable,
diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c
index 04031450d5feca..a60e4c96609876 100644
--- a/drivers/perf/hisilicon/hisi_uncore_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c
@@ -504,7 +504,6 @@ int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
{
struct hisi_pmu *hisi_pmu = hlist_entry_safe(node, struct hisi_pmu,
node);
- cpumask_t pmu_online_cpus;
unsigned int target;
if (!cpumask_test_and_clear_cpu(cpu, &hisi_pmu->associated_cpus))
@@ -518,9 +517,8 @@ int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
hisi_pmu->on_cpu = -1;
/* Choose a new CPU to migrate ownership of the PMU to */
- cpumask_and(&pmu_online_cpus, &hisi_pmu->associated_cpus,
- cpu_online_mask);
- target = cpumask_any_but(&pmu_online_cpus, cpu);
+ target = cpumask_any_and_but(&hisi_pmu->associated_cpus,
+ cpu_online_mask, cpu);
if (target >= nr_cpu_ids)
return 0;
@@ -538,6 +536,7 @@ void hisi_pmu_init(struct hisi_pmu *hisi_pmu, struct module *module)
struct pmu *pmu = &hisi_pmu->pmu;
pmu->module = module;
+ pmu->parent = hisi_pmu->dev;
pmu->task_ctx_nr = perf_invalid_context;
pmu->event_init = hisi_uncore_pmu_event_init;
pmu->pmu_enable = hisi_uncore_pmu_enable;
diff --git a/drivers/perf/hisilicon/hns3_pmu.c b/drivers/perf/hisilicon/hns3_pmu.c
index 16869bf5bf4cca..5236acdcc2e18f 100644
--- a/drivers/perf/hisilicon/hns3_pmu.c
+++ b/drivers/perf/hisilicon/hns3_pmu.c
@@ -1419,6 +1419,7 @@ static int hns3_pmu_alloc_pmu(struct pci_dev *pdev, struct hns3_pmu *hns3_pmu)
hns3_pmu->pmu = (struct pmu) {
.name = name,
.module = THIS_MODULE,
+ .parent = &pdev->dev,
.event_init = hns3_pmu_event_init,
.pmu_enable = hns3_pmu_enable,
.pmu_disable = hns3_pmu_disable,
diff --git a/drivers/perf/qcom_l2_pmu.c b/drivers/perf/qcom_l2_pmu.c
index 148df5ae8ef832..980e3051edd725 100644
--- a/drivers/perf/qcom_l2_pmu.c
+++ b/drivers/perf/qcom_l2_pmu.c
@@ -801,9 +801,8 @@ static int l2cache_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
static int l2cache_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
{
- struct cluster_pmu *cluster;
struct l2cache_pmu *l2cache_pmu;
- cpumask_t cluster_online_cpus;
+ struct cluster_pmu *cluster;
unsigned int target;
l2cache_pmu = hlist_entry_safe(node, struct l2cache_pmu, node);
@@ -820,9 +819,8 @@ static int l2cache_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
cluster->on_cpu = -1;
/* Any other CPU for this cluster which is still online */
- cpumask_and(&cluster_online_cpus, &cluster->cluster_cpus,
- cpu_online_mask);
- target = cpumask_any_but(&cluster_online_cpus, cpu);
+ target = cpumask_any_and_but(&cluster->cluster_cpus,
+ cpu_online_mask, cpu);
if (target >= nr_cpu_ids) {
disable_irq(cluster->irq);
return 0;
@@ -904,6 +902,7 @@ static int l2_cache_pmu_probe(struct platform_device *pdev)
l2cache_pmu->pmu = (struct pmu) {
/* suffix is instance id for future use with multiple sockets */
.name = "l2cache_0",
+ .parent = &pdev->dev,
.task_ctx_nr = perf_invalid_context,
.pmu_enable = l2_cache_pmu_enable,
.pmu_disable = l2_cache_pmu_disable,
diff --git a/drivers/perf/qcom_l3_pmu.c b/drivers/perf/qcom_l3_pmu.c
index f16783d03db7b6..37786e88514e8c 100644
--- a/drivers/perf/qcom_l3_pmu.c
+++ b/drivers/perf/qcom_l3_pmu.c
@@ -748,6 +748,7 @@ static int qcom_l3_cache_pmu_probe(struct platform_device *pdev)
return -ENOMEM;
l3pmu->pmu = (struct pmu) {
+ .parent = &pdev->dev,
.task_ctx_nr = perf_invalid_context,
.pmu_enable = qcom_l3_cache__pmu_enable,
diff --git a/drivers/perf/riscv_pmu.c b/drivers/perf/riscv_pmu.c
index c78a6fd6c57f61..b4efdddb2ad91f 100644
--- a/drivers/perf/riscv_pmu.c
+++ b/drivers/perf/riscv_pmu.c
@@ -313,6 +313,10 @@ static int riscv_pmu_event_init(struct perf_event *event)
u64 event_config = 0;
uint64_t cmask;
+ /* driver does not support branch stack sampling */
+ if (has_branch_stack(event))
+ return -EOPNOTSUPP;
+
hwc->flags = 0;
mapped_event = rvpmu->event_map(event, &event_config);
if (mapped_event < 0) {
diff --git a/drivers/perf/riscv_pmu_legacy.c b/drivers/perf/riscv_pmu_legacy.c
index fa0bccf4edf2ea..04487ad7fba0b5 100644
--- a/drivers/perf/riscv_pmu_legacy.c
+++ b/drivers/perf/riscv_pmu_legacy.c
@@ -136,6 +136,7 @@ static int pmu_legacy_device_probe(struct platform_device *pdev)
pmu = riscv_pmu_alloc();
if (!pmu)
return -ENOMEM;
+ pmu->pmu.parent = &pdev->dev;
pmu_legacy_init(pmu);
return 0;
diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c
index 8cbe6e5f9c39a6..82636273d72659 100644
--- a/drivers/perf/riscv_pmu_sbi.c
+++ b/drivers/perf/riscv_pmu_sbi.c
@@ -1043,7 +1043,6 @@ static struct ctl_table sbi_pmu_sysctl_table[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_TWO,
},
- { }
};
static int pmu_sbi_device_probe(struct platform_device *pdev)
@@ -1081,6 +1080,7 @@ static int pmu_sbi_device_probe(struct platform_device *pdev)
}
pmu->pmu.attr_groups = riscv_pmu_attr_groups;
+ pmu->pmu.parent = &pdev->dev;
pmu->cmask = cmask;
pmu->ctr_start = pmu_sbi_ctr_start;
pmu->ctr_stop = pmu_sbi_ctr_stop;
diff --git a/drivers/perf/thunderx2_pmu.c b/drivers/perf/thunderx2_pmu.c
index e16d10c763de19..faf763d2c95cb8 100644
--- a/drivers/perf/thunderx2_pmu.c
+++ b/drivers/perf/thunderx2_pmu.c
@@ -504,24 +504,19 @@ static void tx2_uncore_event_update(struct perf_event *event)
static enum tx2_uncore_type get_tx2_pmu_type(struct acpi_device *adev)
{
- int i = 0;
- struct acpi_tx2_pmu_device {
- __u8 id[ACPI_ID_LEN];
- enum tx2_uncore_type type;
- } devices[] = {
+ struct acpi_device_id devices[] = {
{"CAV901D", PMU_TYPE_L3C},
{"CAV901F", PMU_TYPE_DMC},
{"CAV901E", PMU_TYPE_CCPI2},
- {"", PMU_TYPE_INVALID}
+ {}
};
+ const struct acpi_device_id *id;
- while (devices[i].type != PMU_TYPE_INVALID) {
- if (!strcmp(acpi_device_hid(adev), devices[i].id))
- break;
- i++;
- }
+ id = acpi_match_acpi_device(devices, adev);
+ if (!id)
+ return PMU_TYPE_INVALID;
- return devices[i].type;
+ return (enum tx2_uncore_type)id->driver_data;
}
static bool tx2_uncore_validate_event(struct pmu *pmu,
@@ -729,6 +724,7 @@ static int tx2_uncore_pmu_register(
/* Perf event registration */
tx2_pmu->pmu = (struct pmu) {
.module = THIS_MODULE,
+ .parent = tx2_pmu->dev,
.attr_groups = tx2_pmu->attr_groups,
.task_ctx_nr = perf_invalid_context,
.event_init = tx2_uncore_event_init,
@@ -932,9 +928,8 @@ static int tx2_uncore_pmu_online_cpu(unsigned int cpu,
static int tx2_uncore_pmu_offline_cpu(unsigned int cpu,
struct hlist_node *hpnode)
{
- int new_cpu;
struct tx2_uncore_pmu *tx2_pmu;
- struct cpumask cpu_online_mask_temp;
+ unsigned int new_cpu;
tx2_pmu = hlist_entry_safe(hpnode,
struct tx2_uncore_pmu, hpnode);
@@ -945,11 +940,8 @@ static int tx2_uncore_pmu_offline_cpu(unsigned int cpu,
if (tx2_pmu->hrtimer_callback)
hrtimer_cancel(&tx2_pmu->hrtimer);
- cpumask_copy(&cpu_online_mask_temp, cpu_online_mask);
- cpumask_clear_cpu(cpu, &cpu_online_mask_temp);
- new_cpu = cpumask_any_and(
- cpumask_of_node(tx2_pmu->node),
- &cpu_online_mask_temp);
+ new_cpu = cpumask_any_and_but(cpumask_of_node(tx2_pmu->node),
+ cpu_online_mask, cpu);
tx2_pmu->cpu = new_cpu;
if (new_cpu >= nr_cpu_ids)
diff --git a/drivers/perf/xgene_pmu.c b/drivers/perf/xgene_pmu.c
index 0d49343d704b7c..8823b4c6b55613 100644
--- a/drivers/perf/xgene_pmu.c
+++ b/drivers/perf/xgene_pmu.c
@@ -1102,6 +1102,7 @@ static int xgene_init_perf(struct xgene_pmu_dev *pmu_dev, char *name)
/* Perf driver registration */
pmu_dev->pmu = (struct pmu) {
+ .parent = pmu_dev->parent->dev,
.attr_groups = pmu_dev->attr_groups,
.task_ctx_nr = perf_invalid_context,
.pmu_enable = xgene_perf_pmu_enable,
diff --git a/drivers/phy/freescale/phy-fsl-imx8m-pcie.c b/drivers/phy/freescale/phy-fsl-imx8m-pcie.c
index b700f52b7b6799..11fcb1867118c3 100644
--- a/drivers/phy/freescale/phy-fsl-imx8m-pcie.c
+++ b/drivers/phy/freescale/phy-fsl-imx8m-pcie.c
@@ -110,8 +110,10 @@ static int imx8_pcie_phy_power_on(struct phy *phy)
/* Source clock from SoC internal PLL */
writel(ANA_PLL_CLK_OUT_TO_EXT_IO_SEL,
imx8_phy->base + IMX8MM_PCIE_PHY_CMN_REG062);
- writel(AUX_PLL_REFCLK_SEL_SYS_PLL,
- imx8_phy->base + IMX8MM_PCIE_PHY_CMN_REG063);
+ if (imx8_phy->drvdata->variant != IMX8MM) {
+ writel(AUX_PLL_REFCLK_SEL_SYS_PLL,
+ imx8_phy->base + IMX8MM_PCIE_PHY_CMN_REG063);
+ }
val = ANA_AUX_RX_TX_SEL_TX | ANA_AUX_TX_TERM;
writel(val | ANA_AUX_RX_TERM_GND_EN,
imx8_phy->base + IMX8MM_PCIE_PHY_CMN_REG064);
diff --git a/drivers/phy/marvell/phy-mvebu-a3700-comphy.c b/drivers/phy/marvell/phy-mvebu-a3700-comphy.c
index 41162d7228c919..1d1db1737422a8 100644
--- a/drivers/phy/marvell/phy-mvebu-a3700-comphy.c
+++ b/drivers/phy/marvell/phy-mvebu-a3700-comphy.c
@@ -603,7 +603,7 @@ static void comphy_gbe_phy_init(struct mvebu_a3700_comphy_lane *lane,
u16 val;
fix_idx = 0;
- for (addr = 0; addr < 512; addr++) {
+ for (addr = 0; addr < ARRAY_SIZE(gbe_phy_init); addr++) {
/*
* All PHY register values are defined in full for 3.125Gbps
* SERDES speed. The values required for 1.25 Gbps are almost
@@ -611,11 +611,12 @@ static void comphy_gbe_phy_init(struct mvebu_a3700_comphy_lane *lane,
* comparison to 3.125 Gbps values. These register values are
* stored in "gbe_phy_init_fix" array.
*/
- if (!is_1gbps && gbe_phy_init_fix[fix_idx].addr == addr) {
+ if (!is_1gbps &&
+ fix_idx < ARRAY_SIZE(gbe_phy_init_fix) &&
+ gbe_phy_init_fix[fix_idx].addr == addr) {
/* Use new value */
val = gbe_phy_init_fix[fix_idx].value;
- if (fix_idx < ARRAY_SIZE(gbe_phy_init_fix))
- fix_idx++;
+ fix_idx++;
} else {
val = gbe_phy_init[addr];
}
diff --git a/drivers/phy/qualcomm/phy-qcom-m31.c b/drivers/phy/qualcomm/phy-qcom-m31.c
index 03fb0d4b75d744..20d4c020a83c1f 100644
--- a/drivers/phy/qualcomm/phy-qcom-m31.c
+++ b/drivers/phy/qualcomm/phy-qcom-m31.c
@@ -297,7 +297,7 @@ static int m31usb_phy_probe(struct platform_device *pdev)
return dev_err_probe(dev, PTR_ERR(qphy->phy),
"failed to create phy\n");
- qphy->vreg = devm_regulator_get(dev, "vdda-phy");
+ qphy->vreg = devm_regulator_get(dev, "vdd");
if (IS_ERR(qphy->vreg))
return dev_err_probe(dev, PTR_ERR(qphy->vreg),
"failed to get vreg\n");
diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
index 7d585a4bbbba95..c21cdb8dbfe746 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
@@ -77,6 +77,7 @@ enum qphy_reg_layout {
QPHY_COM_BIAS_EN_CLKBUFLR_EN,
QPHY_DP_PHY_STATUS,
+ QPHY_DP_PHY_VCO_DIV,
QPHY_TX_TX_POL_INV,
QPHY_TX_TX_DRV_LVL,
@@ -102,6 +103,7 @@ static const unsigned int qmp_v3_usb3phy_regs_layout[QPHY_LAYOUT_SIZE] = {
[QPHY_COM_BIAS_EN_CLKBUFLR_EN] = QSERDES_V3_COM_BIAS_EN_CLKBUFLR_EN,
[QPHY_DP_PHY_STATUS] = QSERDES_V3_DP_PHY_STATUS,
+ [QPHY_DP_PHY_VCO_DIV] = QSERDES_V3_DP_PHY_VCO_DIV,
[QPHY_TX_TX_POL_INV] = QSERDES_V3_TX_TX_POL_INV,
[QPHY_TX_TX_DRV_LVL] = QSERDES_V3_TX_TX_DRV_LVL,
@@ -126,6 +128,7 @@ static const unsigned int qmp_v45_usb3phy_regs_layout[QPHY_LAYOUT_SIZE] = {
[QPHY_COM_BIAS_EN_CLKBUFLR_EN] = QSERDES_V4_COM_BIAS_EN_CLKBUFLR_EN,
[QPHY_DP_PHY_STATUS] = QSERDES_V4_DP_PHY_STATUS,
+ [QPHY_DP_PHY_VCO_DIV] = QSERDES_V4_DP_PHY_VCO_DIV,
[QPHY_TX_TX_POL_INV] = QSERDES_V4_TX_TX_POL_INV,
[QPHY_TX_TX_DRV_LVL] = QSERDES_V4_TX_TX_DRV_LVL,
@@ -150,6 +153,7 @@ static const unsigned int qmp_v5_5nm_usb3phy_regs_layout[QPHY_LAYOUT_SIZE] = {
[QPHY_COM_BIAS_EN_CLKBUFLR_EN] = QSERDES_V5_COM_BIAS_EN_CLKBUFLR_EN,
[QPHY_DP_PHY_STATUS] = QSERDES_V5_DP_PHY_STATUS,
+ [QPHY_DP_PHY_VCO_DIV] = QSERDES_V5_DP_PHY_VCO_DIV,
[QPHY_TX_TX_POL_INV] = QSERDES_V5_5NM_TX_TX_POL_INV,
[QPHY_TX_TX_DRV_LVL] = QSERDES_V5_5NM_TX_TX_DRV_LVL,
@@ -174,6 +178,7 @@ static const unsigned int qmp_v6_usb3phy_regs_layout[QPHY_LAYOUT_SIZE] = {
[QPHY_COM_BIAS_EN_CLKBUFLR_EN] = QSERDES_V6_COM_PLL_BIAS_EN_CLK_BUFLR_EN,
[QPHY_DP_PHY_STATUS] = QSERDES_V6_DP_PHY_STATUS,
+ [QPHY_DP_PHY_VCO_DIV] = QSERDES_V6_DP_PHY_VCO_DIV,
[QPHY_TX_TX_POL_INV] = QSERDES_V6_TX_TX_POL_INV,
[QPHY_TX_TX_DRV_LVL] = QSERDES_V6_TX_TX_DRV_LVL,
@@ -2150,9 +2155,9 @@ static bool qmp_combo_configure_dp_mode(struct qmp_combo *qmp)
writel(val, qmp->dp_dp_phy + QSERDES_DP_PHY_PD_CTL);
if (reverse)
- writel(0x4c, qmp->pcs + QSERDES_DP_PHY_MODE);
+ writel(0x4c, qmp->dp_dp_phy + QSERDES_DP_PHY_MODE);
else
- writel(0x5c, qmp->pcs + QSERDES_DP_PHY_MODE);
+ writel(0x5c, qmp->dp_dp_phy + QSERDES_DP_PHY_MODE);
return reverse;
}
@@ -2162,6 +2167,7 @@ static int qmp_combo_configure_dp_clocks(struct qmp_combo *qmp)
const struct phy_configure_opts_dp *dp_opts = &qmp->dp_opts;
u32 phy_vco_div;
unsigned long pixel_freq;
+ const struct qmp_phy_cfg *cfg = qmp->cfg;
switch (dp_opts->link_rate) {
case 1620:
@@ -2184,7 +2190,7 @@ static int qmp_combo_configure_dp_clocks(struct qmp_combo *qmp)
/* Other link rates aren't supported */
return -EINVAL;
}
- writel(phy_vco_div, qmp->dp_dp_phy + QSERDES_V4_DP_PHY_VCO_DIV);
+ writel(phy_vco_div, qmp->dp_dp_phy + cfg->regs[QPHY_DP_PHY_VCO_DIV]);
clk_set_rate(qmp->dp_link_hw.clk, dp_opts->link_rate * 100000);
clk_set_rate(qmp->dp_pixel_hw.clk, pixel_freq);
diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-dp-phy-v5.h b/drivers/phy/qualcomm/phy-qcom-qmp-dp-phy-v5.h
index f5cfacf9be964e..181057421c113f 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-dp-phy-v5.h
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-dp-phy-v5.h
@@ -7,6 +7,7 @@
#define QCOM_PHY_QMP_DP_PHY_V5_H_
/* Only for QMP V5 PHY - DP PHY registers */
+#define QSERDES_V5_DP_PHY_VCO_DIV 0x070
#define QSERDES_V5_DP_PHY_AUX_INTERRUPT_STATUS 0x0d8
#define QSERDES_V5_DP_PHY_STATUS 0x0dc
diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-dp-phy-v6.h b/drivers/phy/qualcomm/phy-qcom-qmp-dp-phy-v6.h
index 01a20d3be4b812..fa967a1af058f7 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-dp-phy-v6.h
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-dp-phy-v6.h
@@ -7,6 +7,7 @@
#define QCOM_PHY_QMP_DP_PHY_V6_H_
/* Only for QMP V6 PHY - DP PHY registers */
+#define QSERDES_V6_DP_PHY_VCO_DIV 0x070
#define QSERDES_V6_DP_PHY_AUX_INTERRUPT_STATUS 0x0e0
#define QSERDES_V6_DP_PHY_STATUS 0x0e4
diff --git a/drivers/phy/rockchip/Kconfig b/drivers/phy/rockchip/Kconfig
index a34f67bb7e61ad..b60a4b60451e5c 100644
--- a/drivers/phy/rockchip/Kconfig
+++ b/drivers/phy/rockchip/Kconfig
@@ -87,6 +87,7 @@ config PHY_ROCKCHIP_SAMSUNG_HDPTX
tristate "Rockchip Samsung HDMI/eDP Combo PHY driver"
depends on (ARCH_ROCKCHIP || COMPILE_TEST) && OF
select GENERIC_PHY
+ select RATIONAL
help
Enable this to support the Rockchip HDMI/eDP Combo PHY
with Samsung IP block.
diff --git a/drivers/phy/rockchip/phy-rockchip-naneng-combphy.c b/drivers/phy/rockchip/phy-rockchip-naneng-combphy.c
index 76b9cf417591de..bf74e429ff46e7 100644
--- a/drivers/phy/rockchip/phy-rockchip-naneng-combphy.c
+++ b/drivers/phy/rockchip/phy-rockchip-naneng-combphy.c
@@ -125,12 +125,15 @@ struct rockchip_combphy_grfcfg {
};
struct rockchip_combphy_cfg {
+ unsigned int num_phys;
+ unsigned int phy_ids[3];
const struct rockchip_combphy_grfcfg *grfcfg;
int (*combphy_cfg)(struct rockchip_combphy_priv *priv);
};
struct rockchip_combphy_priv {
u8 type;
+ int id;
void __iomem *mmio;
int num_clks;
struct clk_bulk_data *clks;
@@ -320,7 +323,7 @@ static int rockchip_combphy_probe(struct platform_device *pdev)
struct rockchip_combphy_priv *priv;
const struct rockchip_combphy_cfg *phy_cfg;
struct resource *res;
- int ret;
+ int ret, id;
phy_cfg = of_device_get_match_data(dev);
if (!phy_cfg) {
@@ -338,6 +341,15 @@ static int rockchip_combphy_probe(struct platform_device *pdev)
return ret;
}
+ /* find the phy-id from the io address */
+ priv->id = -ENODEV;
+ for (id = 0; id < phy_cfg->num_phys; id++) {
+ if (res->start == phy_cfg->phy_ids[id]) {
+ priv->id = id;
+ break;
+ }
+ }
+
priv->dev = dev;
priv->type = PHY_NONE;
priv->cfg = phy_cfg;
@@ -562,6 +574,12 @@ static const struct rockchip_combphy_grfcfg rk3568_combphy_grfcfgs = {
};
static const struct rockchip_combphy_cfg rk3568_combphy_cfgs = {
+ .num_phys = 3,
+ .phy_ids = {
+ 0xfe820000,
+ 0xfe830000,
+ 0xfe840000,
+ },
.grfcfg = &rk3568_combphy_grfcfgs,
.combphy_cfg = rk3568_combphy_cfg,
};
@@ -578,8 +596,14 @@ static int rk3588_combphy_cfg(struct rockchip_combphy_priv *priv)
rockchip_combphy_param_write(priv->phy_grf, &cfg->con1_for_pcie, true);
rockchip_combphy_param_write(priv->phy_grf, &cfg->con2_for_pcie, true);
rockchip_combphy_param_write(priv->phy_grf, &cfg->con3_for_pcie, true);
- rockchip_combphy_param_write(priv->pipe_grf, &cfg->pipe_pcie1l0_sel, true);
- rockchip_combphy_param_write(priv->pipe_grf, &cfg->pipe_pcie1l1_sel, true);
+ switch (priv->id) {
+ case 1:
+ rockchip_combphy_param_write(priv->pipe_grf, &cfg->pipe_pcie1l0_sel, true);
+ break;
+ case 2:
+ rockchip_combphy_param_write(priv->pipe_grf, &cfg->pipe_pcie1l1_sel, true);
+ break;
+ }
break;
case PHY_TYPE_USB3:
/* Set SSC downward spread spectrum */
@@ -736,6 +760,12 @@ static const struct rockchip_combphy_grfcfg rk3588_combphy_grfcfgs = {
};
static const struct rockchip_combphy_cfg rk3588_combphy_cfgs = {
+ .num_phys = 3,
+ .phy_ids = {
+ 0xfee00000,
+ 0xfee10000,
+ 0xfee20000,
+ },
.grfcfg = &rk3588_combphy_grfcfgs,
.combphy_cfg = rk3588_combphy_cfg,
};
diff --git a/drivers/phy/rockchip/phy-rockchip-snps-pcie3.c b/drivers/phy/rockchip/phy-rockchip-snps-pcie3.c
index 121e5961ce1147..9857ee45b89e0d 100644
--- a/drivers/phy/rockchip/phy-rockchip-snps-pcie3.c
+++ b/drivers/phy/rockchip/phy-rockchip-snps-pcie3.c
@@ -40,6 +40,8 @@
#define RK3588_BIFURCATION_LANE_0_1 BIT(0)
#define RK3588_BIFURCATION_LANE_2_3 BIT(1)
#define RK3588_LANE_AGGREGATION BIT(2)
+#define RK3588_PCIE1LN_SEL_EN (GENMASK(1, 0) << 16)
+#define RK3588_PCIE30_PHY_MODE_EN (GENMASK(2, 0) << 16)
struct rockchip_p3phy_ops;
@@ -132,7 +134,7 @@ static const struct rockchip_p3phy_ops rk3568_ops = {
static int rockchip_p3phy_rk3588_init(struct rockchip_p3phy_priv *priv)
{
u32 reg = 0;
- u8 mode = 0;
+ u8 mode = RK3588_LANE_AGGREGATION; /* default */
int ret;
/* Deassert PCIe PMA output clamp mode */
@@ -140,31 +142,24 @@ static int rockchip_p3phy_rk3588_init(struct rockchip_p3phy_priv *priv)
/* Set bifurcation if needed */
for (int i = 0; i < priv->num_lanes; i++) {
- if (!priv->lanes[i])
- mode |= (BIT(i) << 3);
-
if (priv->lanes[i] > 1)
- mode |= (BIT(i) >> 1);
- }
-
- if (!mode)
- reg = RK3588_LANE_AGGREGATION;
- else {
- if (mode & (BIT(0) | BIT(1)))
- reg |= RK3588_BIFURCATION_LANE_0_1;
-
- if (mode & (BIT(2) | BIT(3)))
- reg |= RK3588_BIFURCATION_LANE_2_3;
+ mode &= ~RK3588_LANE_AGGREGATION;
+ if (priv->lanes[i] == 3)
+ mode |= RK3588_BIFURCATION_LANE_0_1;
+ if (priv->lanes[i] == 4)
+ mode |= RK3588_BIFURCATION_LANE_2_3;
}
- regmap_write(priv->phy_grf, RK3588_PCIE3PHY_GRF_CMN_CON0, (0x7<<16) | reg);
+ reg = mode;
+ regmap_write(priv->phy_grf, RK3588_PCIE3PHY_GRF_CMN_CON0,
+ RK3588_PCIE30_PHY_MODE_EN | reg);
/* Set pcie1ln_sel in PHP_GRF_PCIESEL_CON */
if (!IS_ERR(priv->pipe_grf)) {
- reg = (mode & (BIT(6) | BIT(7))) >> 6;
+ reg = mode & (RK3588_BIFURCATION_LANE_0_1 | RK3588_BIFURCATION_LANE_2_3);
if (reg)
regmap_write(priv->pipe_grf, PHP_GRF_PCIESEL_CON,
- (reg << 16) | reg);
+ RK3588_PCIE1LN_SEL_EN | reg);
}
reset_control_deassert(priv->p30phy);
diff --git a/drivers/phy/ti/phy-tusb1210.c b/drivers/phy/ti/phy-tusb1210.c
index 13cd614e12a1d3..751fecd466e3e3 100644
--- a/drivers/phy/ti/phy-tusb1210.c
+++ b/drivers/phy/ti/phy-tusb1210.c
@@ -69,7 +69,6 @@ struct tusb1210 {
struct delayed_work chg_det_work;
struct notifier_block psy_nb;
struct power_supply *psy;
- struct power_supply *charger;
#endif
};
@@ -236,19 +235,24 @@ static const char * const tusb1210_chargers[] = {
static bool tusb1210_get_online(struct tusb1210 *tusb)
{
+ struct power_supply *charger = NULL;
union power_supply_propval val;
- int i;
+ bool online = false;
+ int i, ret;
- for (i = 0; i < ARRAY_SIZE(tusb1210_chargers) && !tusb->charger; i++)
- tusb->charger = power_supply_get_by_name(tusb1210_chargers[i]);
+ for (i = 0; i < ARRAY_SIZE(tusb1210_chargers) && !charger; i++)
+ charger = power_supply_get_by_name(tusb1210_chargers[i]);
- if (!tusb->charger)
+ if (!charger)
return false;
- if (power_supply_get_property(tusb->charger, POWER_SUPPLY_PROP_ONLINE, &val))
- return false;
+ ret = power_supply_get_property(charger, POWER_SUPPLY_PROP_ONLINE, &val);
+ if (ret == 0)
+ online = val.intval;
+
+ power_supply_put(charger);
- return val.intval;
+ return online;
}
static void tusb1210_chg_det_work(struct work_struct *work)
@@ -473,9 +477,6 @@ static void tusb1210_remove_charger_detect(struct tusb1210 *tusb)
cancel_delayed_work_sync(&tusb->chg_det_work);
power_supply_unregister(tusb->psy);
}
-
- if (tusb->charger)
- power_supply_put(tusb->charger);
}
#else
static void tusb1210_probe_charger_detect(struct tusb1210 *tusb) { }
diff --git a/drivers/pinctrl/aspeed/Makefile b/drivers/pinctrl/aspeed/Makefile
index 489ea1778353f7..db2a7600ae2bde 100644
--- a/drivers/pinctrl/aspeed/Makefile
+++ b/drivers/pinctrl/aspeed/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0-only
# Aspeed pinctrl support
-ccflags-y += $(call cc-option,-Woverride-init)
+ccflags-y += -Woverride-init
obj-$(CONFIG_PINCTRL_ASPEED) += pinctrl-aspeed.o pinmux-aspeed.o
obj-$(CONFIG_PINCTRL_ASPEED_G4) += pinctrl-aspeed-g4.o
obj-$(CONFIG_PINCTRL_ASPEED_G5) += pinctrl-aspeed-g5.o
diff --git a/drivers/pinctrl/intel/pinctrl-baytrail.c b/drivers/pinctrl/intel/pinctrl-baytrail.c
index ac97724c59bae9..4e87f5b875c0e8 100644
--- a/drivers/pinctrl/intel/pinctrl-baytrail.c
+++ b/drivers/pinctrl/intel/pinctrl-baytrail.c
@@ -231,6 +231,7 @@ static const unsigned int byt_score_pins_map[BYT_NGPIO_SCORE] = {
/* SCORE groups */
static const unsigned int byt_score_uart1_pins[] = { 70, 71, 72, 73 };
static const unsigned int byt_score_uart2_pins[] = { 74, 75, 76, 77 };
+static const unsigned int byt_score_uart3_pins[] = { 57, 61 };
static const unsigned int byt_score_pwm0_pins[] = { 94 };
static const unsigned int byt_score_pwm1_pins[] = { 95 };
@@ -278,37 +279,38 @@ static const unsigned int byt_score_plt_clk5_pins[] = { 101 };
static const unsigned int byt_score_smbus_pins[] = { 51, 52, 53 };
static const struct intel_pingroup byt_score_groups[] = {
- PIN_GROUP("uart1_grp", byt_score_uart1_pins, 1),
- PIN_GROUP("uart2_grp", byt_score_uart2_pins, 1),
- PIN_GROUP("pwm0_grp", byt_score_pwm0_pins, 1),
- PIN_GROUP("pwm1_grp", byt_score_pwm1_pins, 1),
- PIN_GROUP("ssp2_grp", byt_score_ssp2_pins, 1),
- PIN_GROUP("sio_spi_grp", byt_score_sio_spi_pins, 1),
- PIN_GROUP("i2c5_grp", byt_score_i2c5_pins, 1),
- PIN_GROUP("i2c6_grp", byt_score_i2c6_pins, 1),
- PIN_GROUP("i2c4_grp", byt_score_i2c4_pins, 1),
- PIN_GROUP("i2c3_grp", byt_score_i2c3_pins, 1),
- PIN_GROUP("i2c2_grp", byt_score_i2c2_pins, 1),
- PIN_GROUP("i2c1_grp", byt_score_i2c1_pins, 1),
- PIN_GROUP("i2c0_grp", byt_score_i2c0_pins, 1),
- PIN_GROUP("ssp0_grp", byt_score_ssp0_pins, 1),
- PIN_GROUP("ssp1_grp", byt_score_ssp1_pins, 1),
- PIN_GROUP("sdcard_grp", byt_score_sdcard_pins, byt_score_sdcard_mux_values),
- PIN_GROUP("sdio_grp", byt_score_sdio_pins, 1),
- PIN_GROUP("emmc_grp", byt_score_emmc_pins, 1),
- PIN_GROUP("lpc_grp", byt_score_ilb_lpc_pins, 1),
- PIN_GROUP("sata_grp", byt_score_sata_pins, 1),
- PIN_GROUP("plt_clk0_grp", byt_score_plt_clk0_pins, 1),
- PIN_GROUP("plt_clk1_grp", byt_score_plt_clk1_pins, 1),
- PIN_GROUP("plt_clk2_grp", byt_score_plt_clk2_pins, 1),
- PIN_GROUP("plt_clk3_grp", byt_score_plt_clk3_pins, 1),
- PIN_GROUP("plt_clk4_grp", byt_score_plt_clk4_pins, 1),
- PIN_GROUP("plt_clk5_grp", byt_score_plt_clk5_pins, 1),
- PIN_GROUP("smbus_grp", byt_score_smbus_pins, 1),
+ PIN_GROUP_GPIO("uart1_grp", byt_score_uart1_pins, 1),
+ PIN_GROUP_GPIO("uart2_grp", byt_score_uart2_pins, 1),
+ PIN_GROUP_GPIO("uart3_grp", byt_score_uart3_pins, 1),
+ PIN_GROUP_GPIO("pwm0_grp", byt_score_pwm0_pins, 1),
+ PIN_GROUP_GPIO("pwm1_grp", byt_score_pwm1_pins, 1),
+ PIN_GROUP_GPIO("ssp2_grp", byt_score_ssp2_pins, 1),
+ PIN_GROUP_GPIO("sio_spi_grp", byt_score_sio_spi_pins, 1),
+ PIN_GROUP_GPIO("i2c5_grp", byt_score_i2c5_pins, 1),
+ PIN_GROUP_GPIO("i2c6_grp", byt_score_i2c6_pins, 1),
+ PIN_GROUP_GPIO("i2c4_grp", byt_score_i2c4_pins, 1),
+ PIN_GROUP_GPIO("i2c3_grp", byt_score_i2c3_pins, 1),
+ PIN_GROUP_GPIO("i2c2_grp", byt_score_i2c2_pins, 1),
+ PIN_GROUP_GPIO("i2c1_grp", byt_score_i2c1_pins, 1),
+ PIN_GROUP_GPIO("i2c0_grp", byt_score_i2c0_pins, 1),
+ PIN_GROUP_GPIO("ssp0_grp", byt_score_ssp0_pins, 1),
+ PIN_GROUP_GPIO("ssp1_grp", byt_score_ssp1_pins, 1),
+ PIN_GROUP_GPIO("sdcard_grp", byt_score_sdcard_pins, byt_score_sdcard_mux_values),
+ PIN_GROUP_GPIO("sdio_grp", byt_score_sdio_pins, 1),
+ PIN_GROUP_GPIO("emmc_grp", byt_score_emmc_pins, 1),
+ PIN_GROUP_GPIO("lpc_grp", byt_score_ilb_lpc_pins, 1),
+ PIN_GROUP_GPIO("sata_grp", byt_score_sata_pins, 1),
+ PIN_GROUP_GPIO("plt_clk0_grp", byt_score_plt_clk0_pins, 1),
+ PIN_GROUP_GPIO("plt_clk1_grp", byt_score_plt_clk1_pins, 1),
+ PIN_GROUP_GPIO("plt_clk2_grp", byt_score_plt_clk2_pins, 1),
+ PIN_GROUP_GPIO("plt_clk3_grp", byt_score_plt_clk3_pins, 1),
+ PIN_GROUP_GPIO("plt_clk4_grp", byt_score_plt_clk4_pins, 1),
+ PIN_GROUP_GPIO("plt_clk5_grp", byt_score_plt_clk5_pins, 1),
+ PIN_GROUP_GPIO("smbus_grp", byt_score_smbus_pins, 1),
};
static const char * const byt_score_uart_groups[] = {
- "uart1_grp", "uart2_grp",
+ "uart1_grp", "uart2_grp", "uart3_grp",
};
static const char * const byt_score_pwm_groups[] = {
"pwm0_grp", "pwm1_grp",
@@ -332,12 +334,14 @@ static const char * const byt_score_plt_clk_groups[] = {
};
static const char * const byt_score_smbus_groups[] = { "smbus_grp" };
static const char * const byt_score_gpio_groups[] = {
- "uart1_grp", "uart2_grp", "pwm0_grp", "pwm1_grp", "ssp0_grp",
- "ssp1_grp", "ssp2_grp", "sio_spi_grp", "i2c0_grp", "i2c1_grp",
- "i2c2_grp", "i2c3_grp", "i2c4_grp", "i2c5_grp", "i2c6_grp",
- "sdcard_grp", "sdio_grp", "emmc_grp", "lpc_grp", "sata_grp",
- "plt_clk0_grp", "plt_clk1_grp", "plt_clk2_grp", "plt_clk3_grp",
- "plt_clk4_grp", "plt_clk5_grp", "smbus_grp",
+ "uart1_grp_gpio", "uart2_grp_gpio", "uart3_grp_gpio", "pwm0_grp_gpio",
+ "pwm1_grp_gpio", "ssp0_grp_gpio", "ssp1_grp_gpio", "ssp2_grp_gpio",
+ "sio_spi_grp_gpio", "i2c0_grp_gpio", "i2c1_grp_gpio", "i2c2_grp_gpio",
+ "i2c3_grp_gpio", "i2c4_grp_gpio", "i2c5_grp_gpio", "i2c6_grp_gpio",
+ "sdcard_grp_gpio", "sdio_grp_gpio", "emmc_grp_gpio", "lpc_grp_gpio",
+ "sata_grp_gpio", "plt_clk0_grp_gpio", "plt_clk1_grp_gpio",
+ "plt_clk2_grp_gpio", "plt_clk3_grp_gpio", "plt_clk4_grp_gpio",
+ "plt_clk5_grp_gpio", "smbus_grp_gpio",
};
static const struct intel_function byt_score_functions[] = {
@@ -456,8 +460,8 @@ static const struct intel_pingroup byt_sus_groups[] = {
PIN_GROUP("usb_oc_grp_gpio", byt_sus_usb_over_current_pins, byt_sus_usb_over_current_gpio_mode_values),
PIN_GROUP("usb_ulpi_grp_gpio", byt_sus_usb_ulpi_pins, byt_sus_usb_ulpi_gpio_mode_values),
PIN_GROUP("pcu_spi_grp_gpio", byt_sus_pcu_spi_pins, byt_sus_pcu_spi_gpio_mode_values),
- PIN_GROUP("pmu_clk1_grp", byt_sus_pmu_clk1_pins, 1),
- PIN_GROUP("pmu_clk2_grp", byt_sus_pmu_clk2_pins, 1),
+ PIN_GROUP_GPIO("pmu_clk1_grp", byt_sus_pmu_clk1_pins, 1),
+ PIN_GROUP_GPIO("pmu_clk2_grp", byt_sus_pmu_clk2_pins, 1),
};
static const char * const byt_sus_usb_groups[] = {
@@ -469,7 +473,7 @@ static const char * const byt_sus_pmu_clk_groups[] = {
};
static const char * const byt_sus_gpio_groups[] = {
"usb_oc_grp_gpio", "usb_ulpi_grp_gpio", "pcu_spi_grp_gpio",
- "pmu_clk1_grp", "pmu_clk2_grp",
+ "pmu_clk1_grp_gpio", "pmu_clk2_grp_gpio",
};
static const struct intel_function byt_sus_functions[] = {
diff --git a/drivers/pinctrl/intel/pinctrl-intel.h b/drivers/pinctrl/intel/pinctrl-intel.h
index fde65e18cd145e..6981e2fab93f34 100644
--- a/drivers/pinctrl/intel/pinctrl-intel.h
+++ b/drivers/pinctrl/intel/pinctrl-intel.h
@@ -179,6 +179,10 @@ struct intel_community {
.modes = __builtin_choose_expr(__builtin_constant_p((m)), NULL, (m)), \
}
+#define PIN_GROUP_GPIO(n, p, m) \
+ PIN_GROUP(n, p, m), \
+ PIN_GROUP(n "_gpio", p, 0)
+
#define FUNCTION(n, g) \
{ \
.func = PINCTRL_PINFUNCTION((n), (g), ARRAY_SIZE(g)), \
diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c
index 49f89b70dcecb4..7f66ec73199a9c 100644
--- a/drivers/pinctrl/pinctrl-amd.c
+++ b/drivers/pinctrl/pinctrl-amd.c
@@ -1159,7 +1159,7 @@ static int amd_gpio_probe(struct platform_device *pdev)
}
ret = devm_request_irq(&pdev->dev, gpio_dev->irq, amd_gpio_irq_handler,
- IRQF_SHARED | IRQF_ONESHOT, KBUILD_MODNAME, gpio_dev);
+ IRQF_SHARED | IRQF_COND_ONESHOT, KBUILD_MODNAME, gpio_dev);
if (ret)
goto out2;
diff --git a/drivers/platform/chrome/cros_ec_uart.c b/drivers/platform/chrome/cros_ec_uart.c
index 8ea867c2a01a37..62bc24f6dcc7a8 100644
--- a/drivers/platform/chrome/cros_ec_uart.c
+++ b/drivers/platform/chrome/cros_ec_uart.c
@@ -263,12 +263,6 @@ static int cros_ec_uart_probe(struct serdev_device *serdev)
if (!ec_dev)
return -ENOMEM;
- ret = devm_serdev_device_open(dev, serdev);
- if (ret) {
- dev_err(dev, "Unable to open UART device");
- return ret;
- }
-
serdev_device_set_drvdata(serdev, ec_dev);
init_waitqueue_head(&ec_uart->response.wait_queue);
@@ -280,14 +274,6 @@ static int cros_ec_uart_probe(struct serdev_device *serdev)
return ret;
}
- ret = serdev_device_set_baudrate(serdev, ec_uart->baudrate);
- if (ret < 0) {
- dev_err(dev, "Failed to set up host baud rate (%d)", ret);
- return ret;
- }
-
- serdev_device_set_flow_control(serdev, ec_uart->flowcontrol);
-
/* Initialize ec_dev for cros_ec */
ec_dev->phys_name = dev_name(dev);
ec_dev->dev = dev;
@@ -301,6 +287,20 @@ static int cros_ec_uart_probe(struct serdev_device *serdev)
serdev_device_set_client_ops(serdev, &cros_ec_uart_client_ops);
+ ret = devm_serdev_device_open(dev, serdev);
+ if (ret) {
+ dev_err(dev, "Unable to open UART device");
+ return ret;
+ }
+
+ ret = serdev_device_set_baudrate(serdev, ec_uart->baudrate);
+ if (ret < 0) {
+ dev_err(dev, "Failed to set up host baud rate (%d)", ret);
+ return ret;
+ }
+
+ serdev_device_set_flow_control(serdev, ec_uart->flowcontrol);
+
return cros_ec_register(ec_dev);
}
diff --git a/drivers/platform/x86/acer-wmi.c b/drivers/platform/x86/acer-wmi.c
index ee2e164f86b9c2..38c932df6446ac 100644
--- a/drivers/platform/x86/acer-wmi.c
+++ b/drivers/platform/x86/acer-wmi.c
@@ -598,6 +598,15 @@ static const struct dmi_system_id acer_quirks[] __initconst = {
.driver_data = &quirk_acer_predator_v4,
},
{
+ .callback = dmi_matched,
+ .ident = "Acer Predator PH18-71",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Predator PH18-71"),
+ },
+ .driver_data = &quirk_acer_predator_v4,
+ },
+ {
.callback = set_force_caps,
.ident = "Acer Aspire Switch 10E SW3-016",
.matches = {
diff --git a/drivers/platform/x86/amd/pmc/pmc-quirks.c b/drivers/platform/x86/amd/pmc/pmc-quirks.c
index b456370166b6bb..b4f49720c87f62 100644
--- a/drivers/platform/x86/amd/pmc/pmc-quirks.c
+++ b/drivers/platform/x86/amd/pmc/pmc-quirks.c
@@ -208,6 +208,15 @@ static const struct dmi_system_id fwbug_list[] = {
DMI_MATCH(DMI_BIOS_VERSION, "03.03"),
}
},
+ {
+ .ident = "Framework Laptop 13 (Phoenix)",
+ .driver_data = &quirk_spurious_8042,
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Framework"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Laptop 13 (AMD Ryzen 7040Series)"),
+ DMI_MATCH(DMI_BIOS_VERSION, "03.05"),
+ }
+ },
{}
};
diff --git a/drivers/platform/x86/amd/pmf/Makefile b/drivers/platform/x86/amd/pmf/Makefile
index 6b26e48ce8ad2a..7d6079b02589cb 100644
--- a/drivers/platform/x86/amd/pmf/Makefile
+++ b/drivers/platform/x86/amd/pmf/Makefile
@@ -7,4 +7,4 @@
obj-$(CONFIG_AMD_PMF) += amd-pmf.o
amd-pmf-objs := core.o acpi.o sps.o \
auto-mode.o cnqf.o \
- tee-if.o spc.o
+ tee-if.o spc.o pmf-quirks.o
diff --git a/drivers/platform/x86/amd/pmf/acpi.c b/drivers/platform/x86/amd/pmf/acpi.c
index d0cf46e2fc8e8a..1157ec148880b5 100644
--- a/drivers/platform/x86/amd/pmf/acpi.c
+++ b/drivers/platform/x86/amd/pmf/acpi.c
@@ -343,7 +343,10 @@ static int apmf_if_verify_interface(struct amd_pmf_dev *pdev)
if (err)
return err;
- pdev->supported_func = output.supported_functions;
+ /* only set if not already set by a quirk */
+ if (!pdev->supported_func)
+ pdev->supported_func = output.supported_functions;
+
dev_dbg(pdev->dev, "supported functions:0x%x notifications:0x%x version:%u\n",
output.supported_functions, output.notification_mask, output.version);
@@ -437,7 +440,7 @@ int apmf_check_smart_pc(struct amd_pmf_dev *pmf_dev)
status = acpi_walk_resources(ahandle, METHOD_NAME__CRS, apmf_walk_resources, pmf_dev);
if (ACPI_FAILURE(status)) {
- dev_err(pmf_dev->dev, "acpi_walk_resources failed :%d\n", status);
+ dev_dbg(pmf_dev->dev, "acpi_walk_resources failed :%d\n", status);
return -EINVAL;
}
diff --git a/drivers/platform/x86/amd/pmf/core.c b/drivers/platform/x86/amd/pmf/core.c
index 5d4f80698a8b88..64e6e34a2a9acd 100644
--- a/drivers/platform/x86/amd/pmf/core.c
+++ b/drivers/platform/x86/amd/pmf/core.c
@@ -445,6 +445,7 @@ static int amd_pmf_probe(struct platform_device *pdev)
mutex_init(&dev->lock);
mutex_init(&dev->update_mutex);
+ amd_pmf_quirks_init(dev);
apmf_acpi_init(dev);
platform_set_drvdata(pdev, dev);
amd_pmf_dbgfs_register(dev);
diff --git a/drivers/platform/x86/amd/pmf/pmf-quirks.c b/drivers/platform/x86/amd/pmf/pmf-quirks.c
new file mode 100644
index 00000000000000..0b2eb0ae85febd
--- /dev/null
+++ b/drivers/platform/x86/amd/pmf/pmf-quirks.c
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * AMD Platform Management Framework Driver Quirks
+ *
+ * Copyright (c) 2024, Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Author: Mario Limonciello <mario.limonciello@amd.com>
+ */
+
+#include <linux/dmi.h>
+
+#include "pmf.h"
+
+struct quirk_entry {
+ u32 supported_func;
+};
+
+static struct quirk_entry quirk_no_sps_bug = {
+ .supported_func = 0x4003,
+};
+
+static const struct dmi_system_id fwbug_list[] = {
+ {
+ .ident = "ROG Zephyrus G14",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "GA403UV"),
+ },
+ .driver_data = &quirk_no_sps_bug,
+ },
+ {}
+};
+
+void amd_pmf_quirks_init(struct amd_pmf_dev *dev)
+{
+ const struct dmi_system_id *dmi_id;
+ struct quirk_entry *quirks;
+
+ dmi_id = dmi_first_match(fwbug_list);
+ if (!dmi_id)
+ return;
+
+ quirks = dmi_id->driver_data;
+ if (quirks->supported_func) {
+ dev->supported_func = quirks->supported_func;
+ pr_info("Using supported funcs quirk to avoid %s platform firmware bug\n",
+ dmi_id->ident);
+ }
+}
+
diff --git a/drivers/platform/x86/amd/pmf/pmf.h b/drivers/platform/x86/amd/pmf/pmf.h
index 8c4df5753f40d4..eeedd0c0395a89 100644
--- a/drivers/platform/x86/amd/pmf/pmf.h
+++ b/drivers/platform/x86/amd/pmf/pmf.h
@@ -720,4 +720,7 @@ int apmf_check_smart_pc(struct amd_pmf_dev *pmf_dev);
void amd_pmf_populate_ta_inputs(struct amd_pmf_dev *dev, struct ta_pmf_enact_table *in);
void amd_pmf_dump_ta_inputs(struct amd_pmf_dev *dev, struct ta_pmf_enact_table *in);
+/* Quirk infrastructure */
+void amd_pmf_quirks_init(struct amd_pmf_dev *dev);
+
#endif /* PMF_H */
diff --git a/drivers/platform/x86/intel/hid.c b/drivers/platform/x86/intel/hid.c
index 7457ca2b27a60b..c7a8276458640a 100644
--- a/drivers/platform/x86/intel/hid.c
+++ b/drivers/platform/x86/intel/hid.c
@@ -49,6 +49,8 @@ static const struct acpi_device_id intel_hid_ids[] = {
{"INTC1076", 0},
{"INTC1077", 0},
{"INTC1078", 0},
+ {"INTC107B", 0},
+ {"INTC10CB", 0},
{"", 0},
};
MODULE_DEVICE_TABLE(acpi, intel_hid_ids);
@@ -504,6 +506,7 @@ static void notify_handler(acpi_handle handle, u32 event, void *context)
struct platform_device *device = context;
struct intel_hid_priv *priv = dev_get_drvdata(&device->dev);
unsigned long long ev_index;
+ struct key_entry *ke;
int err;
/*
@@ -545,11 +548,15 @@ static void notify_handler(acpi_handle handle, u32 event, void *context)
if (event == 0xc0 || !priv->array)
return;
- if (!sparse_keymap_entry_from_scancode(priv->array, event)) {
+ ke = sparse_keymap_entry_from_scancode(priv->array, event);
+ if (!ke) {
dev_info(&device->dev, "unknown event 0x%x\n", event);
return;
}
+ if (ke->type == KE_IGNORE)
+ return;
+
wakeup:
pm_wakeup_hard_event(&device->dev);
diff --git a/drivers/platform/x86/intel/speed_select_if/isst_if_common.c b/drivers/platform/x86/intel/speed_select_if/isst_if_common.c
index 08df9494603c5e..30951f7131cd98 100644
--- a/drivers/platform/x86/intel/speed_select_if/isst_if_common.c
+++ b/drivers/platform/x86/intel/speed_select_if/isst_if_common.c
@@ -719,6 +719,7 @@ static struct miscdevice isst_if_char_driver = {
};
static const struct x86_cpu_id hpm_cpu_ids[] = {
+ X86_MATCH_INTEL_FAM6_MODEL(GRANITERAPIDS_D, NULL),
X86_MATCH_INTEL_FAM6_MODEL(GRANITERAPIDS_X, NULL),
X86_MATCH_INTEL_FAM6_MODEL(ATOM_CRESTMONT_X, NULL),
{}
diff --git a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c
index bd75d61ff8a661..ef730200a04bd9 100644
--- a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c
+++ b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c
@@ -29,7 +29,7 @@
#include "uncore-frequency-common.h"
#define UNCORE_MAJOR_VERSION 0
-#define UNCORE_MINOR_VERSION 1
+#define UNCORE_MINOR_VERSION 2
#define UNCORE_HEADER_INDEX 0
#define UNCORE_FABRIC_CLUSTER_OFFSET 8
@@ -329,7 +329,7 @@ static int uncore_probe(struct auxiliary_device *auxdev, const struct auxiliary_
goto remove_clusters;
}
- if (TPMI_MINOR_VERSION(pd_info->ufs_header_ver) != UNCORE_MINOR_VERSION)
+ if (TPMI_MINOR_VERSION(pd_info->ufs_header_ver) > UNCORE_MINOR_VERSION)
dev_info(&auxdev->dev, "Uncore: Ignore: Unsupported minor version:%lx\n",
TPMI_MINOR_VERSION(pd_info->ufs_header_ver));
diff --git a/drivers/platform/x86/intel/vbtn.c b/drivers/platform/x86/intel/vbtn.c
index 084c355c86f5fa..79bb2c801daa97 100644
--- a/drivers/platform/x86/intel/vbtn.c
+++ b/drivers/platform/x86/intel/vbtn.c
@@ -136,8 +136,6 @@ static int intel_vbtn_input_setup(struct platform_device *device)
priv->switches_dev->id.bustype = BUS_HOST;
if (priv->has_switches) {
- detect_tablet_mode(&device->dev);
-
ret = input_register_device(priv->switches_dev);
if (ret)
return ret;
@@ -258,9 +256,6 @@ static const struct dmi_system_id dmi_switches_allow_list[] = {
static bool intel_vbtn_has_switches(acpi_handle handle, bool dual_accel)
{
- unsigned long long vgbs;
- acpi_status status;
-
/* See dual_accel_detect.h for more info */
if (dual_accel)
return false;
@@ -268,8 +263,7 @@ static bool intel_vbtn_has_switches(acpi_handle handle, bool dual_accel)
if (!dmi_check_system(dmi_switches_allow_list))
return false;
- status = acpi_evaluate_integer(handle, "VGBS", NULL, &vgbs);
- return ACPI_SUCCESS(status);
+ return acpi_has_method(handle, "VGBS");
}
static int intel_vbtn_probe(struct platform_device *device)
@@ -316,6 +310,9 @@ static int intel_vbtn_probe(struct platform_device *device)
if (ACPI_FAILURE(status))
dev_err(&device->dev, "Error VBDL failed with ACPI status %d\n", status);
}
+ // Check switches after buttons since VBDL may have side effects.
+ if (has_switches)
+ detect_tablet_mode(&device->dev);
device_init_wakeup(&device->dev, true);
/*
diff --git a/drivers/platform/x86/lg-laptop.c b/drivers/platform/x86/lg-laptop.c
index ad3c39e9e9f586..e714ee6298dda8 100644
--- a/drivers/platform/x86/lg-laptop.c
+++ b/drivers/platform/x86/lg-laptop.c
@@ -736,7 +736,7 @@ static int acpi_add(struct acpi_device *device)
default:
year = 2019;
}
- pr_info("product: %s year: %d\n", product, year);
+ pr_info("product: %s year: %d\n", product ?: "unknown", year);
if (year >= 2019)
battery_limit_use_wmbb = 1;
diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c
index 291f14ef67024a..77244c9aa60d23 100644
--- a/drivers/platform/x86/toshiba_acpi.c
+++ b/drivers/platform/x86/toshiba_acpi.c
@@ -264,6 +264,7 @@ static const struct key_entry toshiba_acpi_keymap[] = {
{ KE_KEY, 0xb32, { KEY_NEXTSONG } },
{ KE_KEY, 0xb33, { KEY_PLAYPAUSE } },
{ KE_KEY, 0xb5a, { KEY_MEDIA } },
+ { KE_IGNORE, 0x0e00, { KEY_RESERVED } }, /* Wake from sleep */
{ KE_IGNORE, 0x1430, { KEY_RESERVED } }, /* Wake from sleep */
{ KE_IGNORE, 0x1501, { KEY_RESERVED } }, /* Output changed */
{ KE_IGNORE, 0x1502, { KEY_RESERVED } }, /* HDMI plugged/unplugged */
@@ -3523,9 +3524,10 @@ static void toshiba_acpi_notify(struct acpi_device *acpi_dev, u32 event)
(dev->kbd_mode == SCI_KBD_MODE_ON) ?
LED_FULL : LED_OFF);
break;
+ case 0x8e: /* Power button pressed */
+ break;
case 0x85: /* Unknown */
case 0x8d: /* Unknown */
- case 0x8e: /* Unknown */
case 0x94: /* Unknown */
case 0x95: /* Unknown */
default:
diff --git a/drivers/platform/x86/uv_sysfs.c b/drivers/platform/x86/uv_sysfs.c
index 38d1b692d3c0aa..40e0108771893b 100644
--- a/drivers/platform/x86/uv_sysfs.c
+++ b/drivers/platform/x86/uv_sysfs.c
@@ -11,6 +11,7 @@
#include <linux/device.h>
#include <linux/slab.h>
#include <linux/kobject.h>
+#include <linux/vmalloc.h>
#include <asm/uv/bios.h>
#include <asm/uv/uv.h>
#include <asm/uv/uv_hub.h>
diff --git a/drivers/power/supply/mt6360_charger.c b/drivers/power/supply/mt6360_charger.c
index 1305cba61edd4b..aca123783efccd 100644
--- a/drivers/power/supply/mt6360_charger.c
+++ b/drivers/power/supply/mt6360_charger.c
@@ -588,7 +588,7 @@ static const struct regulator_ops mt6360_chg_otg_ops = {
};
static const struct regulator_desc mt6360_otg_rdesc = {
- .of_match = "usb-otg-vbus",
+ .of_match = "usb-otg-vbus-regulator",
.name = "usb-otg-vbus",
.ops = &mt6360_chg_otg_ops,
.owner = THIS_MODULE,
diff --git a/drivers/power/supply/rt9455_charger.c b/drivers/power/supply/rt9455_charger.c
index c345a77f9f78c0..e4dbacd50a437d 100644
--- a/drivers/power/supply/rt9455_charger.c
+++ b/drivers/power/supply/rt9455_charger.c
@@ -192,6 +192,7 @@ static const int rt9455_voreg_values[] = {
4450000, 4450000, 4450000, 4450000, 4450000, 4450000, 4450000, 4450000
};
+#if IS_ENABLED(CONFIG_USB_PHY)
/*
* When the charger is in boost mode, REG02[7:2] represent boost output
* voltage.
@@ -207,6 +208,7 @@ static const int rt9455_boost_voltage_values[] = {
5600000, 5600000, 5600000, 5600000, 5600000, 5600000, 5600000, 5600000,
5600000, 5600000, 5600000, 5600000, 5600000, 5600000, 5600000, 5600000,
};
+#endif
/* REG07[3:0] (VMREG) in uV */
static const int rt9455_vmreg_values[] = {
diff --git a/drivers/pps/clients/pps_parport.c b/drivers/pps/clients/pps_parport.c
index 42f93d4c6ee329..af972cdc04b53c 100644
--- a/drivers/pps/clients/pps_parport.c
+++ b/drivers/pps/clients/pps_parport.c
@@ -148,7 +148,7 @@ static void parport_attach(struct parport *port)
return;
}
- index = ida_simple_get(&pps_client_index, 0, 0, GFP_KERNEL);
+ index = ida_alloc(&pps_client_index, GFP_KERNEL);
memset(&pps_client_cb, 0, sizeof(pps_client_cb));
pps_client_cb.private = device;
pps_client_cb.irq_func = parport_irq;
@@ -188,7 +188,7 @@ err_release_dev:
err_unregister_dev:
parport_unregister_device(device->pardev);
err_free:
- ida_simple_remove(&pps_client_index, index);
+ ida_free(&pps_client_index, index);
kfree(device);
}
@@ -208,7 +208,7 @@ static void parport_detach(struct parport *port)
pps_unregister_source(device->pps);
parport_release(pardev);
parport_unregister_device(pardev);
- ida_simple_remove(&pps_client_index, device->index);
+ ida_free(&pps_client_index, device->index);
kfree(device);
}
diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c
index d70f793ce4b38d..403525cc17833c 100644
--- a/drivers/pwm/core.c
+++ b/drivers/pwm/core.c
@@ -443,7 +443,7 @@ of_pwm_single_xlate(struct pwm_chip *chip, const struct of_phandle_args *args)
if (IS_ERR(pwm))
return pwm;
- if (args->args_count > 1)
+ if (args->args_count > 0)
pwm->args.period = args->args[0];
pwm->args.polarity = PWM_POLARITY_NORMAL;
diff --git a/drivers/pwm/pwm-dwc-core.c b/drivers/pwm/pwm-dwc-core.c
index 043736972cb921..c8425493b95d85 100644
--- a/drivers/pwm/pwm-dwc-core.c
+++ b/drivers/pwm/pwm-dwc-core.c
@@ -172,7 +172,6 @@ struct pwm_chip *dwc_pwm_alloc(struct device *dev)
dwc->clk_ns = 10;
chip->ops = &dwc_pwm_ops;
- dev_set_drvdata(dev, chip);
return chip;
}
EXPORT_SYMBOL_GPL(dwc_pwm_alloc);
diff --git a/drivers/pwm/pwm-dwc.c b/drivers/pwm/pwm-dwc.c
index 676eaf8d7a53f7..fb3eadf6fbc464 100644
--- a/drivers/pwm/pwm-dwc.c
+++ b/drivers/pwm/pwm-dwc.c
@@ -31,26 +31,34 @@ static const struct dwc_pwm_info ehl_pwm_info = {
.size = 0x1000,
};
-static int dwc_pwm_init_one(struct device *dev, void __iomem *base, unsigned int offset)
+static int dwc_pwm_init_one(struct device *dev, struct dwc_pwm_drvdata *ddata, unsigned int idx)
{
struct pwm_chip *chip;
struct dwc_pwm *dwc;
+ int ret;
chip = dwc_pwm_alloc(dev);
if (IS_ERR(chip))
return PTR_ERR(chip);
dwc = to_dwc_pwm(chip);
- dwc->base = base + offset;
+ dwc->base = ddata->io_base + (ddata->info->size * idx);
- return devm_pwmchip_add(dev, chip);
+ ret = devm_pwmchip_add(dev, chip);
+ if (ret)
+ return ret;
+
+ ddata->chips[idx] = chip;
+ return 0;
}
static int dwc_pwm_probe(struct pci_dev *pci, const struct pci_device_id *id)
{
const struct dwc_pwm_info *info;
struct device *dev = &pci->dev;
- int i, ret;
+ struct dwc_pwm_drvdata *ddata;
+ unsigned int idx;
+ int ret;
ret = pcim_enable_device(pci);
if (ret)
@@ -63,17 +71,25 @@ static int dwc_pwm_probe(struct pci_dev *pci, const struct pci_device_id *id)
return dev_err_probe(dev, ret, "Failed to iomap PCI BAR\n");
info = (const struct dwc_pwm_info *)id->driver_data;
-
- for (i = 0; i < info->nr; i++) {
- /*
- * No need to check for pcim_iomap_table() failure,
- * pcim_iomap_regions() already does it for us.
- */
- ret = dwc_pwm_init_one(dev, pcim_iomap_table(pci)[0], i * info->size);
+ ddata = devm_kzalloc(dev, struct_size(ddata, chips, info->nr), GFP_KERNEL);
+ if (!ddata)
+ return -ENOMEM;
+
+ /*
+ * No need to check for pcim_iomap_table() failure,
+ * pcim_iomap_regions() already does it for us.
+ */
+ ddata->io_base = pcim_iomap_table(pci)[0];
+ ddata->info = info;
+
+ for (idx = 0; idx < ddata->info->nr; idx++) {
+ ret = dwc_pwm_init_one(dev, ddata, idx);
if (ret)
return ret;
}
+ dev_set_drvdata(dev, ddata);
+
pm_runtime_put(dev);
pm_runtime_allow(dev);
@@ -88,19 +104,24 @@ static void dwc_pwm_remove(struct pci_dev *pci)
static int dwc_pwm_suspend(struct device *dev)
{
- struct pwm_chip *chip = dev_get_drvdata(dev);
- struct dwc_pwm *dwc = to_dwc_pwm(chip);
- int i;
-
- for (i = 0; i < DWC_TIMERS_TOTAL; i++) {
- if (chip->pwms[i].state.enabled) {
- dev_err(dev, "PWM %u in use by consumer (%s)\n",
- i, chip->pwms[i].label);
- return -EBUSY;
+ struct dwc_pwm_drvdata *ddata = dev_get_drvdata(dev);
+ unsigned int idx;
+
+ for (idx = 0; idx < ddata->info->nr; idx++) {
+ struct pwm_chip *chip = ddata->chips[idx];
+ struct dwc_pwm *dwc = to_dwc_pwm(chip);
+ unsigned int i;
+
+ for (i = 0; i < DWC_TIMERS_TOTAL; i++) {
+ if (chip->pwms[i].state.enabled) {
+ dev_err(dev, "PWM %u in use by consumer (%s)\n",
+ i, chip->pwms[i].label);
+ return -EBUSY;
+ }
+ dwc->ctx[i].cnt = dwc_pwm_readl(dwc, DWC_TIM_LD_CNT(i));
+ dwc->ctx[i].cnt2 = dwc_pwm_readl(dwc, DWC_TIM_LD_CNT2(i));
+ dwc->ctx[i].ctrl = dwc_pwm_readl(dwc, DWC_TIM_CTRL(i));
}
- dwc->ctx[i].cnt = dwc_pwm_readl(dwc, DWC_TIM_LD_CNT(i));
- dwc->ctx[i].cnt2 = dwc_pwm_readl(dwc, DWC_TIM_LD_CNT2(i));
- dwc->ctx[i].ctrl = dwc_pwm_readl(dwc, DWC_TIM_CTRL(i));
}
return 0;
@@ -108,14 +129,19 @@ static int dwc_pwm_suspend(struct device *dev)
static int dwc_pwm_resume(struct device *dev)
{
- struct pwm_chip *chip = dev_get_drvdata(dev);
- struct dwc_pwm *dwc = to_dwc_pwm(chip);
- int i;
-
- for (i = 0; i < DWC_TIMERS_TOTAL; i++) {
- dwc_pwm_writel(dwc, dwc->ctx[i].cnt, DWC_TIM_LD_CNT(i));
- dwc_pwm_writel(dwc, dwc->ctx[i].cnt2, DWC_TIM_LD_CNT2(i));
- dwc_pwm_writel(dwc, dwc->ctx[i].ctrl, DWC_TIM_CTRL(i));
+ struct dwc_pwm_drvdata *ddata = dev_get_drvdata(dev);
+ unsigned int idx;
+
+ for (idx = 0; idx < ddata->info->nr; idx++) {
+ struct pwm_chip *chip = ddata->chips[idx];
+ struct dwc_pwm *dwc = to_dwc_pwm(chip);
+ unsigned int i;
+
+ for (i = 0; i < DWC_TIMERS_TOTAL; i++) {
+ dwc_pwm_writel(dwc, dwc->ctx[i].cnt, DWC_TIM_LD_CNT(i));
+ dwc_pwm_writel(dwc, dwc->ctx[i].cnt2, DWC_TIM_LD_CNT2(i));
+ dwc_pwm_writel(dwc, dwc->ctx[i].ctrl, DWC_TIM_CTRL(i));
+ }
}
return 0;
diff --git a/drivers/pwm/pwm-dwc.h b/drivers/pwm/pwm-dwc.h
index a8b074841ae805..c6e2df5a612271 100644
--- a/drivers/pwm/pwm-dwc.h
+++ b/drivers/pwm/pwm-dwc.h
@@ -38,6 +38,12 @@ struct dwc_pwm_info {
unsigned int size;
};
+struct dwc_pwm_drvdata {
+ const struct dwc_pwm_info *info;
+ void __iomem *io_base;
+ struct pwm_chip *chips[];
+};
+
struct dwc_pwm_ctx {
u32 cnt;
u32 cnt2;
diff --git a/drivers/pwm/pwm-img.c b/drivers/pwm/pwm-img.c
index d79a96679a26c9..d6596583ed4e78 100644
--- a/drivers/pwm/pwm-img.c
+++ b/drivers/pwm/pwm-img.c
@@ -284,9 +284,9 @@ static int img_pwm_probe(struct platform_device *pdev)
return PTR_ERR(imgchip->sys_clk);
}
- imgchip->pwm_clk = devm_clk_get(&pdev->dev, "imgchip");
+ imgchip->pwm_clk = devm_clk_get(&pdev->dev, "pwm");
if (IS_ERR(imgchip->pwm_clk)) {
- dev_err(&pdev->dev, "failed to get imgchip clock\n");
+ dev_err(&pdev->dev, "failed to get pwm clock\n");
return PTR_ERR(imgchip->pwm_clk);
}
diff --git a/drivers/ras/amd/fmpm.c b/drivers/ras/amd/fmpm.c
index 2f4ac9591c8f5a..271dfad05d6835 100644
--- a/drivers/ras/amd/fmpm.c
+++ b/drivers/ras/amd/fmpm.c
@@ -150,6 +150,8 @@ static unsigned int max_nr_fru;
/* Total length of record including headers and list of descriptor entries. */
static size_t max_rec_len;
+#define FMPM_MAX_REC_LEN (sizeof(struct fru_rec) + (sizeof(struct cper_fru_poison_desc) * 255))
+
/* Total number of SPA entries across all FRUs. */
static unsigned int spa_nr_entries;
@@ -475,6 +477,16 @@ static void set_rec_fields(struct fru_rec *rec)
struct cper_section_descriptor *sec_desc = &rec->sec_desc;
struct cper_record_header *hdr = &rec->hdr;
+ /*
+ * This is a saved record created with fewer max_nr_entries.
+ * Update the record lengths and keep everything else as-is.
+ */
+ if (hdr->record_length && hdr->record_length < max_rec_len) {
+ pr_debug("Growing record 0x%016llx from %u to %zu bytes\n",
+ hdr->record_id, hdr->record_length, max_rec_len);
+ goto update_lengths;
+ }
+
memcpy(hdr->signature, CPER_SIG_RECORD, CPER_SIG_SIZE);
hdr->revision = CPER_RECORD_REV;
hdr->signature_end = CPER_SIG_END;
@@ -489,19 +501,21 @@ static void set_rec_fields(struct fru_rec *rec)
hdr->error_severity = CPER_SEV_RECOVERABLE;
hdr->validation_bits = 0;
- hdr->record_length = max_rec_len;
hdr->creator_id = CPER_CREATOR_FMP;
hdr->notification_type = CPER_NOTIFY_MCE;
hdr->record_id = cper_next_record_id();
hdr->flags = CPER_HW_ERROR_FLAGS_PREVERR;
sec_desc->section_offset = sizeof(struct cper_record_header);
- sec_desc->section_length = max_rec_len - sizeof(struct cper_record_header);
sec_desc->revision = CPER_SEC_REV;
sec_desc->validation_bits = 0;
sec_desc->flags = CPER_SEC_PRIMARY;
sec_desc->section_type = CPER_SECTION_TYPE_FMP;
sec_desc->section_severity = CPER_SEV_RECOVERABLE;
+
+update_lengths:
+ hdr->record_length = max_rec_len;
+ sec_desc->section_length = max_rec_len - sizeof(struct cper_record_header);
}
static int save_new_records(void)
@@ -512,16 +526,18 @@ static int save_new_records(void)
int ret = 0;
for_each_fru(i, rec) {
- if (rec->hdr.record_length)
+ /* No need to update saved records that match the current record size. */
+ if (rec->hdr.record_length == max_rec_len)
continue;
+ if (!rec->hdr.record_length)
+ set_bit(i, new_records);
+
set_rec_fields(rec);
ret = update_record_on_storage(rec);
if (ret)
goto out_clear;
-
- set_bit(i, new_records);
}
return ret;
@@ -641,12 +657,7 @@ static int get_saved_records(void)
int ret, pos;
ssize_t len;
- /*
- * Assume saved records match current max size.
- *
- * However, this may not be true depending on module parameters.
- */
- old = kmalloc(max_rec_len, GFP_KERNEL);
+ old = kmalloc(FMPM_MAX_REC_LEN, GFP_KERNEL);
if (!old) {
ret = -ENOMEM;
goto out;
@@ -663,21 +674,31 @@ static int get_saved_records(void)
* Make sure to clear temporary buffer between reads to avoid
* leftover data from records of various sizes.
*/
- memset(old, 0, max_rec_len);
+ memset(old, 0, FMPM_MAX_REC_LEN);
- len = erst_read_record(record_id, &old->hdr, max_rec_len,
+ len = erst_read_record(record_id, &old->hdr, FMPM_MAX_REC_LEN,
sizeof(struct fru_rec), &CPER_CREATOR_FMP);
if (len < 0)
continue;
- if (len > max_rec_len) {
- pr_debug("Found record larger than max_rec_len\n");
+ new = get_valid_record(old);
+ if (!new) {
+ erst_clear(record_id);
continue;
}
- new = get_valid_record(old);
- if (!new)
- erst_clear(record_id);
+ if (len > max_rec_len) {
+ unsigned int saved_nr_entries;
+
+ saved_nr_entries = len - sizeof(struct fru_rec);
+ saved_nr_entries /= sizeof(struct cper_fru_poison_desc);
+
+ pr_warn("Saved record found with %u entries.\n", saved_nr_entries);
+ pr_warn("Please increase max_nr_entries to %u.\n", saved_nr_entries);
+
+ ret = -EINVAL;
+ goto out_end;
+ }
/* Restore the record */
memcpy(new, old, len);
diff --git a/drivers/ras/debugfs.h b/drivers/ras/debugfs.h
index 4749ccdeeba122..5a2f48439258cd 100644
--- a/drivers/ras/debugfs.h
+++ b/drivers/ras/debugfs.h
@@ -4,6 +4,10 @@
#include <linux/debugfs.h>
+#if IS_ENABLED(CONFIG_DEBUG_FS)
struct dentry *ras_get_debugfs_root(void);
+#else
+static inline struct dentry *ras_get_debugfs_root(void) { return NULL; }
+#endif /* DEBUG_FS */
#endif /* __RAS_DEBUGFS_H__ */
diff --git a/drivers/regulator/irq_helpers.c b/drivers/regulator/irq_helpers.c
index fe7ae0f3f46af9..5ab1a0befe12f7 100644
--- a/drivers/regulator/irq_helpers.c
+++ b/drivers/regulator/irq_helpers.c
@@ -352,6 +352,9 @@ void *regulator_irq_helper(struct device *dev,
h->irq = irq;
h->desc = *d;
+ h->desc.name = devm_kstrdup(dev, d->name, GFP_KERNEL);
+ if (!h->desc.name)
+ return ERR_PTR(-ENOMEM);
ret = init_rdev_state(dev, h, rdev, common_errs, per_rdev_errs,
rdev_amount);
diff --git a/drivers/regulator/mt6360-regulator.c b/drivers/regulator/mt6360-regulator.c
index ad6587a378d09c..24cc9fc94e900a 100644
--- a/drivers/regulator/mt6360-regulator.c
+++ b/drivers/regulator/mt6360-regulator.c
@@ -319,15 +319,15 @@ static unsigned int mt6360_regulator_of_map_mode(unsigned int hw_mode)
}
}
-#define MT6360_REGULATOR_DESC(_name, _sname, ereg, emask, vreg, vmask, \
- mreg, mmask, streg, stmask, vranges, \
- vcnts, offon_delay, irq_tbls) \
+#define MT6360_REGULATOR_DESC(match, _name, _sname, ereg, emask, vreg, \
+ vmask, mreg, mmask, streg, stmask, \
+ vranges, vcnts, offon_delay, irq_tbls) \
{ \
.desc = { \
.name = #_name, \
.supply_name = #_sname, \
.id = MT6360_REGULATOR_##_name, \
- .of_match = of_match_ptr(#_name), \
+ .of_match = of_match_ptr(match), \
.regulators_node = of_match_ptr("regulator"), \
.of_map_mode = mt6360_regulator_of_map_mode, \
.owner = THIS_MODULE, \
@@ -351,21 +351,29 @@ static unsigned int mt6360_regulator_of_map_mode(unsigned int hw_mode)
}
static const struct mt6360_regulator_desc mt6360_regulator_descs[] = {
- MT6360_REGULATOR_DESC(BUCK1, BUCK1_VIN, 0x117, 0x40, 0x110, 0xff, 0x117, 0x30, 0x117, 0x04,
+ MT6360_REGULATOR_DESC("buck1", BUCK1, BUCK1_VIN,
+ 0x117, 0x40, 0x110, 0xff, 0x117, 0x30, 0x117, 0x04,
buck_vout_ranges, 256, 0, buck1_irq_tbls),
- MT6360_REGULATOR_DESC(BUCK2, BUCK2_VIN, 0x127, 0x40, 0x120, 0xff, 0x127, 0x30, 0x127, 0x04,
+ MT6360_REGULATOR_DESC("buck2", BUCK2, BUCK2_VIN,
+ 0x127, 0x40, 0x120, 0xff, 0x127, 0x30, 0x127, 0x04,
buck_vout_ranges, 256, 0, buck2_irq_tbls),
- MT6360_REGULATOR_DESC(LDO6, LDO_VIN3, 0x137, 0x40, 0x13B, 0xff, 0x137, 0x30, 0x137, 0x04,
+ MT6360_REGULATOR_DESC("ldo6", LDO6, LDO_VIN3,
+ 0x137, 0x40, 0x13B, 0xff, 0x137, 0x30, 0x137, 0x04,
ldo_vout_ranges1, 256, 0, ldo6_irq_tbls),
- MT6360_REGULATOR_DESC(LDO7, LDO_VIN3, 0x131, 0x40, 0x135, 0xff, 0x131, 0x30, 0x131, 0x04,
+ MT6360_REGULATOR_DESC("ldo7", LDO7, LDO_VIN3,
+ 0x131, 0x40, 0x135, 0xff, 0x131, 0x30, 0x131, 0x04,
ldo_vout_ranges1, 256, 0, ldo7_irq_tbls),
- MT6360_REGULATOR_DESC(LDO1, LDO_VIN1, 0x217, 0x40, 0x21B, 0xff, 0x217, 0x30, 0x217, 0x04,
+ MT6360_REGULATOR_DESC("ldo1", LDO1, LDO_VIN1,
+ 0x217, 0x40, 0x21B, 0xff, 0x217, 0x30, 0x217, 0x04,
ldo_vout_ranges2, 256, 0, ldo1_irq_tbls),
- MT6360_REGULATOR_DESC(LDO2, LDO_VIN1, 0x211, 0x40, 0x215, 0xff, 0x211, 0x30, 0x211, 0x04,
+ MT6360_REGULATOR_DESC("ldo2", LDO2, LDO_VIN1,
+ 0x211, 0x40, 0x215, 0xff, 0x211, 0x30, 0x211, 0x04,
ldo_vout_ranges2, 256, 0, ldo2_irq_tbls),
- MT6360_REGULATOR_DESC(LDO3, LDO_VIN1, 0x205, 0x40, 0x209, 0xff, 0x205, 0x30, 0x205, 0x04,
+ MT6360_REGULATOR_DESC("ldo3", LDO3, LDO_VIN1,
+ 0x205, 0x40, 0x209, 0xff, 0x205, 0x30, 0x205, 0x04,
ldo_vout_ranges2, 256, 100, ldo3_irq_tbls),
- MT6360_REGULATOR_DESC(LDO5, LDO_VIN2, 0x20B, 0x40, 0x20F, 0x7f, 0x20B, 0x30, 0x20B, 0x04,
+ MT6360_REGULATOR_DESC("ldo5", LDO5, LDO_VIN2,
+ 0x20B, 0x40, 0x20F, 0x7f, 0x20B, 0x30, 0x20B, 0x04,
ldo_vout_ranges3, 128, 100, ldo5_irq_tbls),
};
diff --git a/drivers/regulator/qcom-refgen-regulator.c b/drivers/regulator/qcom-refgen-regulator.c
index 656fe330d38f0a..063e12c08e75f7 100644
--- a/drivers/regulator/qcom-refgen-regulator.c
+++ b/drivers/regulator/qcom-refgen-regulator.c
@@ -140,6 +140,7 @@ static const struct of_device_id qcom_refgen_match_table[] = {
{ .compatible = "qcom,sm8250-refgen-regulator", .data = &sm8250_refgen_desc },
{ }
};
+MODULE_DEVICE_TABLE(of, qcom_refgen_match_table);
static struct platform_driver qcom_refgen_driver = {
.probe = qcom_refgen_probe,
diff --git a/drivers/regulator/tps65132-regulator.c b/drivers/regulator/tps65132-regulator.c
index a06f5f2d79329d..9c2f0dd42613d4 100644
--- a/drivers/regulator/tps65132-regulator.c
+++ b/drivers/regulator/tps65132-regulator.c
@@ -267,10 +267,17 @@ static const struct i2c_device_id tps65132_id[] = {
};
MODULE_DEVICE_TABLE(i2c, tps65132_id);
+static const struct of_device_id __maybe_unused tps65132_of_match[] = {
+ { .compatible = "ti,tps65132" },
+ {},
+};
+MODULE_DEVICE_TABLE(of, tps65132_of_match);
+
static struct i2c_driver tps65132_i2c_driver = {
.driver = {
.name = "tps65132",
.probe_type = PROBE_PREFER_ASYNCHRONOUS,
+ .of_match_table = of_match_ptr(tps65132_of_match),
},
.probe = tps65132_probe,
.id_table = tps65132_id,
diff --git a/drivers/regulator/vqmmc-ipq4019-regulator.c b/drivers/regulator/vqmmc-ipq4019-regulator.c
index 086da36abc0b49..4955616517ce9c 100644
--- a/drivers/regulator/vqmmc-ipq4019-regulator.c
+++ b/drivers/regulator/vqmmc-ipq4019-regulator.c
@@ -84,6 +84,7 @@ static const struct of_device_id regulator_ipq4019_of_match[] = {
{ .compatible = "qcom,vqmmc-ipq4019-regulator", },
{},
};
+MODULE_DEVICE_TABLE(of, regulator_ipq4019_of_match);
static struct platform_driver ipq4019_regulator_driver = {
.probe = ipq4019_regulator_probe,
diff --git a/drivers/s390/char/raw3270.c b/drivers/s390/char/raw3270.c
index 37173cb0f5f5a2..c57694be9bd32e 100644
--- a/drivers/s390/char/raw3270.c
+++ b/drivers/s390/char/raw3270.c
@@ -162,7 +162,8 @@ struct raw3270_request *raw3270_request_alloc(size_t size)
/*
* Setup ccw.
*/
- rq->ccw.cda = virt_to_dma32(rq->buffer);
+ if (rq->buffer)
+ rq->ccw.cda = virt_to_dma32(rq->buffer);
rq->ccw.flags = CCW_FLAG_SLI;
return rq;
@@ -188,7 +189,8 @@ int raw3270_request_reset(struct raw3270_request *rq)
return -EBUSY;
rq->ccw.cmd_code = 0;
rq->ccw.count = 0;
- rq->ccw.cda = virt_to_dma32(rq->buffer);
+ if (rq->buffer)
+ rq->ccw.cda = virt_to_dma32(rq->buffer);
rq->ccw.flags = CCW_FLAG_SLI;
rq->rescnt = 0;
rq->rc = 0;
diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c
index f95d12345d98a6..920f550bc313bf 100644
--- a/drivers/s390/cio/device.c
+++ b/drivers/s390/cio/device.c
@@ -363,10 +363,8 @@ int ccw_device_set_online(struct ccw_device *cdev)
spin_lock_irq(cdev->ccwlock);
ret = ccw_device_online(cdev);
- spin_unlock_irq(cdev->ccwlock);
- if (ret == 0)
- wait_event(cdev->private->wait_q, dev_fsm_final_state(cdev));
- else {
+ if (ret) {
+ spin_unlock_irq(cdev->ccwlock);
CIO_MSG_EVENT(0, "ccw_device_online returned %d, "
"device 0.%x.%04x\n",
ret, cdev->private->dev_id.ssid,
@@ -375,7 +373,12 @@ int ccw_device_set_online(struct ccw_device *cdev)
put_device(&cdev->dev);
return ret;
}
- spin_lock_irq(cdev->ccwlock);
+ /* Wait until a final state is reached */
+ while (!dev_fsm_final_state(cdev)) {
+ spin_unlock_irq(cdev->ccwlock);
+ wait_event(cdev->private->wait_q, dev_fsm_final_state(cdev));
+ spin_lock_irq(cdev->ccwlock);
+ }
/* Check if online processing was successful */
if ((cdev->private->state != DEV_STATE_ONLINE) &&
(cdev->private->state != DEV_STATE_W4SENSE)) {
diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c
index 65d8b2cfd6262d..42791fa0b80e26 100644
--- a/drivers/s390/cio/device_fsm.c
+++ b/drivers/s390/cio/device_fsm.c
@@ -504,6 +504,11 @@ callback:
ccw_device_done(cdev, DEV_STATE_ONLINE);
/* Deliver fake irb to device driver, if needed. */
if (cdev->private->flags.fake_irb) {
+ CIO_MSG_EVENT(2, "fakeirb: deliver device 0.%x.%04x intparm %lx type=%d\n",
+ cdev->private->dev_id.ssid,
+ cdev->private->dev_id.devno,
+ cdev->private->intparm,
+ cdev->private->flags.fake_irb);
create_fake_irb(&cdev->private->dma_area->irb,
cdev->private->flags.fake_irb);
cdev->private->flags.fake_irb = 0;
diff --git a/drivers/s390/cio/device_ops.c b/drivers/s390/cio/device_ops.c
index 40c97f8730751f..acd6790dba4dd1 100644
--- a/drivers/s390/cio/device_ops.c
+++ b/drivers/s390/cio/device_ops.c
@@ -208,6 +208,10 @@ int ccw_device_start_timeout_key(struct ccw_device *cdev, struct ccw1 *cpa,
if (!cdev->private->flags.fake_irb) {
cdev->private->flags.fake_irb = FAKE_CMD_IRB;
cdev->private->intparm = intparm;
+ CIO_MSG_EVENT(2, "fakeirb: queue device 0.%x.%04x intparm %lx type=%d\n",
+ cdev->private->dev_id.ssid,
+ cdev->private->dev_id.devno, intparm,
+ cdev->private->flags.fake_irb);
return 0;
} else
/* There's already a fake I/O around. */
@@ -551,6 +555,10 @@ int ccw_device_tm_start_timeout_key(struct ccw_device *cdev, struct tcw *tcw,
if (!cdev->private->flags.fake_irb) {
cdev->private->flags.fake_irb = FAKE_TM_IRB;
cdev->private->intparm = intparm;
+ CIO_MSG_EVENT(2, "fakeirb: queue device 0.%x.%04x intparm %lx type=%d\n",
+ cdev->private->dev_id.ssid,
+ cdev->private->dev_id.devno, intparm,
+ cdev->private->flags.fake_irb);
return 0;
} else
/* There's already a fake I/O around. */
diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c
index 3d9f0834c78bf1..a1cb39f4b7a279 100644
--- a/drivers/s390/cio/qdio_main.c
+++ b/drivers/s390/cio/qdio_main.c
@@ -722,8 +722,8 @@ static void qdio_handle_activate_check(struct qdio_irq *irq_ptr,
lgr_info_log();
}
-static void qdio_establish_handle_irq(struct qdio_irq *irq_ptr, int cstat,
- int dstat)
+static int qdio_establish_handle_irq(struct qdio_irq *irq_ptr, int cstat,
+ int dstat, int dcc)
{
DBF_DEV_EVENT(DBF_INFO, irq_ptr, "qest irq");
@@ -731,15 +731,18 @@ static void qdio_establish_handle_irq(struct qdio_irq *irq_ptr, int cstat,
goto error;
if (dstat & ~(DEV_STAT_DEV_END | DEV_STAT_CHN_END))
goto error;
+ if (dcc == 1)
+ return -EAGAIN;
if (!(dstat & DEV_STAT_DEV_END))
goto error;
qdio_set_state(irq_ptr, QDIO_IRQ_STATE_ESTABLISHED);
- return;
+ return 0;
error:
DBF_ERROR("%4x EQ:error", irq_ptr->schid.sch_no);
DBF_ERROR("ds: %2x cs:%2x", dstat, cstat);
qdio_set_state(irq_ptr, QDIO_IRQ_STATE_ERR);
+ return -EIO;
}
/* qdio interrupt handler */
@@ -748,7 +751,7 @@ void qdio_int_handler(struct ccw_device *cdev, unsigned long intparm,
{
struct qdio_irq *irq_ptr = cdev->private->qdio_data;
struct subchannel_id schid;
- int cstat, dstat;
+ int cstat, dstat, rc, dcc;
if (!intparm || !irq_ptr) {
ccw_device_get_schid(cdev, &schid);
@@ -768,10 +771,12 @@ void qdio_int_handler(struct ccw_device *cdev, unsigned long intparm,
qdio_irq_check_sense(irq_ptr, irb);
cstat = irb->scsw.cmd.cstat;
dstat = irb->scsw.cmd.dstat;
+ dcc = scsw_cmd_is_valid_cc(&irb->scsw) ? irb->scsw.cmd.cc : 0;
+ rc = 0;
switch (irq_ptr->state) {
case QDIO_IRQ_STATE_INACTIVE:
- qdio_establish_handle_irq(irq_ptr, cstat, dstat);
+ rc = qdio_establish_handle_irq(irq_ptr, cstat, dstat, dcc);
break;
case QDIO_IRQ_STATE_CLEANUP:
qdio_set_state(irq_ptr, QDIO_IRQ_STATE_INACTIVE);
@@ -785,12 +790,25 @@ void qdio_int_handler(struct ccw_device *cdev, unsigned long intparm,
if (cstat || dstat)
qdio_handle_activate_check(irq_ptr, intparm, cstat,
dstat);
+ else if (dcc == 1)
+ rc = -EAGAIN;
break;
case QDIO_IRQ_STATE_STOPPED:
break;
default:
WARN_ON_ONCE(1);
}
+
+ if (rc == -EAGAIN) {
+ DBF_DEV_EVENT(DBF_INFO, irq_ptr, "qint retry");
+ rc = ccw_device_start(cdev, irq_ptr->ccw, intparm, 0, 0);
+ if (!rc)
+ return;
+ DBF_ERROR("%4x RETRY ERR", irq_ptr->schid.sch_no);
+ DBF_ERROR("rc:%4x", rc);
+ qdio_set_state(irq_ptr, QDIO_IRQ_STATE_ERR);
+ }
+
wake_up(&cdev->private->wait_q);
}
diff --git a/drivers/s390/net/ism_drv.c b/drivers/s390/net/ism_drv.c
index 2c8e964425dc38..43778b088ffac5 100644
--- a/drivers/s390/net/ism_drv.c
+++ b/drivers/s390/net/ism_drv.c
@@ -292,13 +292,16 @@ out:
static void ism_free_dmb(struct ism_dev *ism, struct ism_dmb *dmb)
{
clear_bit(dmb->sba_idx, ism->sba_bitmap);
- dma_free_coherent(&ism->pdev->dev, dmb->dmb_len,
- dmb->cpu_addr, dmb->dma_addr);
+ dma_unmap_page(&ism->pdev->dev, dmb->dma_addr, dmb->dmb_len,
+ DMA_FROM_DEVICE);
+ folio_put(virt_to_folio(dmb->cpu_addr));
}
static int ism_alloc_dmb(struct ism_dev *ism, struct ism_dmb *dmb)
{
+ struct folio *folio;
unsigned long bit;
+ int rc;
if (PAGE_ALIGN(dmb->dmb_len) > dma_get_max_seg_size(&ism->pdev->dev))
return -EINVAL;
@@ -315,14 +318,30 @@ static int ism_alloc_dmb(struct ism_dev *ism, struct ism_dmb *dmb)
test_and_set_bit(dmb->sba_idx, ism->sba_bitmap))
return -EINVAL;
- dmb->cpu_addr = dma_alloc_coherent(&ism->pdev->dev, dmb->dmb_len,
- &dmb->dma_addr,
- GFP_KERNEL | __GFP_NOWARN |
- __GFP_NOMEMALLOC | __GFP_NORETRY);
- if (!dmb->cpu_addr)
- clear_bit(dmb->sba_idx, ism->sba_bitmap);
+ folio = folio_alloc(GFP_KERNEL | __GFP_NOWARN | __GFP_NOMEMALLOC |
+ __GFP_NORETRY, get_order(dmb->dmb_len));
- return dmb->cpu_addr ? 0 : -ENOMEM;
+ if (!folio) {
+ rc = -ENOMEM;
+ goto out_bit;
+ }
+
+ dmb->cpu_addr = folio_address(folio);
+ dmb->dma_addr = dma_map_page(&ism->pdev->dev,
+ virt_to_page(dmb->cpu_addr), 0,
+ dmb->dmb_len, DMA_FROM_DEVICE);
+ if (dma_mapping_error(&ism->pdev->dev, dmb->dma_addr)) {
+ rc = -ENOMEM;
+ goto out_free;
+ }
+
+ return 0;
+
+out_free:
+ kfree(dmb->cpu_addr);
+out_bit:
+ clear_bit(dmb->sba_idx, ism->sba_bitmap);
+ return rc;
}
int ism_register_dmb(struct ism_dev *ism, struct ism_dmb *dmb,
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index a0cce6872075d4..f0b8b709649f29 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -1179,6 +1179,20 @@ static int qeth_check_irb_error(struct qeth_card *card, struct ccw_device *cdev,
}
}
+/**
+ * qeth_irq() - qeth interrupt handler
+ * @cdev: ccw device
+ * @intparm: expect pointer to iob
+ * @irb: Interruption Response Block
+ *
+ * In the good path:
+ * corresponding qeth channel is locked with last used iob as active_cmd.
+ * But this function is also called for error interrupts.
+ *
+ * Caller ensures that:
+ * Interrupts are disabled; ccw device lock is held;
+ *
+ */
static void qeth_irq(struct ccw_device *cdev, unsigned long intparm,
struct irb *irb)
{
@@ -1220,11 +1234,10 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm,
iob = (struct qeth_cmd_buffer *) (addr_t)intparm;
}
- qeth_unlock_channel(card, channel);
-
rc = qeth_check_irb_error(card, cdev, irb);
if (rc) {
/* IO was terminated, free its resources. */
+ qeth_unlock_channel(card, channel);
if (iob)
qeth_cancel_cmd(iob, rc);
return;
@@ -1268,6 +1281,7 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm,
rc = qeth_get_problem(card, cdev, irb);
if (rc) {
card->read_or_write_problem = 1;
+ qeth_unlock_channel(card, channel);
if (iob)
qeth_cancel_cmd(iob, rc);
qeth_clear_ipacmd_list(card);
@@ -1276,6 +1290,26 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm,
}
}
+ if (scsw_cmd_is_valid_cc(&irb->scsw) && irb->scsw.cmd.cc == 1 && iob) {
+ /* channel command hasn't started: retry.
+ * active_cmd is still set to last iob
+ */
+ QETH_CARD_TEXT(card, 2, "irqcc1");
+ rc = ccw_device_start_timeout(cdev, __ccw_from_cmd(iob),
+ (addr_t)iob, 0, 0, iob->timeout);
+ if (rc) {
+ QETH_DBF_MESSAGE(2,
+ "ccw retry on %x failed, rc = %i\n",
+ CARD_DEVID(card), rc);
+ QETH_CARD_TEXT_(card, 2, " err%d", rc);
+ qeth_unlock_channel(card, channel);
+ qeth_cancel_cmd(iob, rc);
+ }
+ return;
+ }
+
+ qeth_unlock_channel(card, channel);
+
if (iob) {
/* sanity check: */
if (irb->scsw.cmd.count > iob->length) {
diff --git a/drivers/scsi/bnx2fc/bnx2fc_tgt.c b/drivers/scsi/bnx2fc/bnx2fc_tgt.c
index 2c246e80c1c4d6..d91659811eb3c5 100644
--- a/drivers/scsi/bnx2fc/bnx2fc_tgt.c
+++ b/drivers/scsi/bnx2fc/bnx2fc_tgt.c
@@ -833,7 +833,6 @@ static void bnx2fc_free_session_resc(struct bnx2fc_hba *hba,
BNX2FC_TGT_DBG(tgt, "Freeing up session resources\n");
- spin_lock_bh(&tgt->cq_lock);
ctx_base_ptr = tgt->ctx_base;
tgt->ctx_base = NULL;
@@ -889,7 +888,6 @@ static void bnx2fc_free_session_resc(struct bnx2fc_hba *hba,
tgt->sq, tgt->sq_dma);
tgt->sq = NULL;
}
- spin_unlock_bh(&tgt->cq_lock);
if (ctx_base_ptr)
iounmap(ctx_base_ptr);
diff --git a/drivers/scsi/ch.c b/drivers/scsi/ch.c
index 1befcd5b2a0f93..fa07a6f54003ee 100644
--- a/drivers/scsi/ch.c
+++ b/drivers/scsi/ch.c
@@ -102,7 +102,9 @@ do { \
#define MAX_RETRIES 1
-static struct class * ch_sysfs_class;
+static const struct class ch_sysfs_class = {
+ .name = "scsi_changer",
+};
typedef struct {
struct kref ref;
@@ -930,7 +932,7 @@ static int ch_probe(struct device *dev)
mutex_init(&ch->lock);
kref_init(&ch->ref);
ch->device = sd;
- class_dev = device_create(ch_sysfs_class, dev,
+ class_dev = device_create(&ch_sysfs_class, dev,
MKDEV(SCSI_CHANGER_MAJOR, ch->minor), ch,
"s%s", ch->name);
if (IS_ERR(class_dev)) {
@@ -955,7 +957,7 @@ static int ch_probe(struct device *dev)
return 0;
destroy_dev:
- device_destroy(ch_sysfs_class, MKDEV(SCSI_CHANGER_MAJOR, ch->minor));
+ device_destroy(&ch_sysfs_class, MKDEV(SCSI_CHANGER_MAJOR, ch->minor));
put_device:
scsi_device_put(sd);
remove_idr:
@@ -974,7 +976,7 @@ static int ch_remove(struct device *dev)
dev_set_drvdata(dev, NULL);
spin_unlock(&ch_index_lock);
- device_destroy(ch_sysfs_class, MKDEV(SCSI_CHANGER_MAJOR,ch->minor));
+ device_destroy(&ch_sysfs_class, MKDEV(SCSI_CHANGER_MAJOR, ch->minor));
scsi_device_put(ch->device);
kref_put(&ch->ref, ch_destroy);
return 0;
@@ -1003,11 +1005,9 @@ static int __init init_ch_module(void)
int rc;
printk(KERN_INFO "SCSI Media Changer driver v" VERSION " \n");
- ch_sysfs_class = class_create("scsi_changer");
- if (IS_ERR(ch_sysfs_class)) {
- rc = PTR_ERR(ch_sysfs_class);
+ rc = class_register(&ch_sysfs_class);
+ if (rc)
return rc;
- }
rc = register_chrdev(SCSI_CHANGER_MAJOR,"ch",&changer_fops);
if (rc < 0) {
printk("Unable to get major %d for SCSI-Changer\n",
@@ -1022,7 +1022,7 @@ static int __init init_ch_module(void)
fail2:
unregister_chrdev(SCSI_CHANGER_MAJOR, "ch");
fail1:
- class_destroy(ch_sysfs_class);
+ class_unregister(&ch_sysfs_class);
return rc;
}
@@ -1030,7 +1030,7 @@ static void __exit exit_ch_module(void)
{
scsi_unregister_driver(&ch_template.gendrv);
unregister_chrdev(SCSI_CHANGER_MAJOR, "ch");
- class_destroy(ch_sysfs_class);
+ class_unregister(&ch_sysfs_class);
idr_destroy(&ch_index_idr);
}
diff --git a/drivers/scsi/cxlflash/main.c b/drivers/scsi/cxlflash/main.c
index debd3697411974..e8382cc5cf23c0 100644
--- a/drivers/scsi/cxlflash/main.c
+++ b/drivers/scsi/cxlflash/main.c
@@ -28,7 +28,12 @@ MODULE_AUTHOR("Manoj N. Kumar <manoj@linux.vnet.ibm.com>");
MODULE_AUTHOR("Matthew R. Ochs <mrochs@linux.vnet.ibm.com>");
MODULE_LICENSE("GPL");
-static struct class *cxlflash_class;
+static char *cxlflash_devnode(const struct device *dev, umode_t *mode);
+static const struct class cxlflash_class = {
+ .name = "cxlflash",
+ .devnode = cxlflash_devnode,
+};
+
static u32 cxlflash_major;
static DECLARE_BITMAP(cxlflash_minor, CXLFLASH_MAX_ADAPTERS);
@@ -3602,7 +3607,7 @@ static int init_chrdev(struct cxlflash_cfg *cfg)
goto err1;
}
- char_dev = device_create(cxlflash_class, NULL, devno,
+ char_dev = device_create(&cxlflash_class, NULL, devno,
NULL, "cxlflash%d", minor);
if (IS_ERR(char_dev)) {
rc = PTR_ERR(char_dev);
@@ -3880,14 +3885,12 @@ static int cxlflash_class_init(void)
cxlflash_major = MAJOR(devno);
- cxlflash_class = class_create("cxlflash");
- if (IS_ERR(cxlflash_class)) {
- rc = PTR_ERR(cxlflash_class);
+ rc = class_register(&cxlflash_class);
+ if (rc) {
pr_err("%s: class_create failed rc=%d\n", __func__, rc);
goto err;
}
- cxlflash_class->devnode = cxlflash_devnode;
out:
pr_debug("%s: returning rc=%d\n", __func__, rc);
return rc;
@@ -3903,7 +3906,7 @@ static void cxlflash_class_exit(void)
{
dev_t devno = MKDEV(cxlflash_major, 0);
- class_destroy(cxlflash_class);
+ class_unregister(&cxlflash_class);
unregister_chrdev_region(devno, CXLFLASH_MAX_ADAPTERS);
}
diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c
index 097dfe4b620dce..35f8e00850d6cb 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_main.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_main.c
@@ -1797,7 +1797,7 @@ static int hisi_sas_debug_I_T_nexus_reset(struct domain_device *device)
if (dev_is_sata(device)) {
struct ata_link *link = &device->sata_dev.ap->link;
- rc = ata_wait_after_reset(link, HISI_SAS_WAIT_PHYUP_TIMEOUT,
+ rc = ata_wait_after_reset(link, jiffies + HISI_SAS_WAIT_PHYUP_TIMEOUT,
smp_ata_check_ready_type);
} else {
msleep(2000);
diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
index 7d2a33514538c2..34f96cc35342bc 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
@@ -2244,7 +2244,15 @@ slot_err_v3_hw(struct hisi_hba *hisi_hba, struct sas_task *task,
case SAS_PROTOCOL_SATA | SAS_PROTOCOL_STP:
if ((dw0 & CMPLT_HDR_RSPNS_XFRD_MSK) &&
(sipc_rx_err_type & RX_FIS_STATUS_ERR_MSK)) {
- ts->stat = SAS_PROTO_RESPONSE;
+ if (task->ata_task.use_ncq) {
+ struct domain_device *device = task->dev;
+ struct hisi_sas_device *sas_dev = device->lldd_dev;
+
+ sas_dev->dev_status = HISI_SAS_DEV_NCQ_ERR;
+ slot->abort = 1;
+ } else {
+ ts->stat = SAS_PROTO_RESPONSE;
+ }
} else if (dma_rx_err_type & RX_DATA_LEN_UNDERFLOW_MSK) {
ts->residual = trans_tx_fail_type;
ts->stat = SAS_DATA_UNDERRUN;
diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
index 4f495a41ec4aae..2d92549e524319 100644
--- a/drivers/scsi/hosts.c
+++ b/drivers/scsi/hosts.c
@@ -353,12 +353,13 @@ static void scsi_host_dev_release(struct device *dev)
if (shost->shost_state == SHOST_CREATED) {
/*
- * Free the shost_dev device name here if scsi_host_alloc()
- * and scsi_host_put() have been called but neither
+ * Free the shost_dev device name and remove the proc host dir
+ * here if scsi_host_{alloc,put}() have been called but neither
* scsi_host_add() nor scsi_remove_host() has been called.
* This avoids that the memory allocated for the shost_dev
- * name is leaked.
+ * name as well as the proc dir structure are leaked.
*/
+ scsi_proc_hostdir_rm(shost->hostt);
kfree(dev_name(&shost->shost_dev));
}
diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c
index a2204674b6808f..f6e6db8b8aba91 100644
--- a/drivers/scsi/libsas/sas_expander.c
+++ b/drivers/scsi/libsas/sas_expander.c
@@ -135,7 +135,7 @@ static int smp_execute_task(struct domain_device *dev, void *req, int req_size,
static inline void *alloc_smp_req(int size)
{
- u8 *p = kzalloc(size, GFP_KERNEL);
+ u8 *p = kzalloc(ALIGN(size, ARCH_DMA_MINALIGN), GFP_KERNEL);
if (p)
p[0] = SMP_REQUEST;
return p;
@@ -1621,6 +1621,16 @@ out_err:
/* ---------- Domain revalidation ---------- */
+static void sas_get_sas_addr_and_dev_type(struct smp_disc_resp *disc_resp,
+ u8 *sas_addr,
+ enum sas_device_type *type)
+{
+ memcpy(sas_addr, disc_resp->disc.attached_sas_addr, SAS_ADDR_SIZE);
+ *type = to_dev_type(&disc_resp->disc);
+ if (*type == SAS_PHY_UNUSED)
+ memset(sas_addr, 0, SAS_ADDR_SIZE);
+}
+
static int sas_get_phy_discover(struct domain_device *dev,
int phy_id, struct smp_disc_resp *disc_resp)
{
@@ -1674,13 +1684,8 @@ int sas_get_phy_attached_dev(struct domain_device *dev, int phy_id,
return -ENOMEM;
res = sas_get_phy_discover(dev, phy_id, disc_resp);
- if (res == 0) {
- memcpy(sas_addr, disc_resp->disc.attached_sas_addr,
- SAS_ADDR_SIZE);
- *type = to_dev_type(&disc_resp->disc);
- if (*type == 0)
- memset(sas_addr, 0, SAS_ADDR_SIZE);
- }
+ if (res == 0)
+ sas_get_sas_addr_and_dev_type(disc_resp, sas_addr, type);
kfree(disc_resp);
return res;
}
@@ -1940,6 +1945,7 @@ static int sas_rediscover_dev(struct domain_device *dev, int phy_id,
struct expander_device *ex = &dev->ex_dev;
struct ex_phy *phy = &ex->ex_phy[phy_id];
enum sas_device_type type = SAS_PHY_UNUSED;
+ struct smp_disc_resp *disc_resp;
u8 sas_addr[SAS_ADDR_SIZE];
char msg[80] = "";
int res;
@@ -1951,33 +1957,41 @@ static int sas_rediscover_dev(struct domain_device *dev, int phy_id,
SAS_ADDR(dev->sas_addr), phy_id, msg);
memset(sas_addr, 0, SAS_ADDR_SIZE);
- res = sas_get_phy_attached_dev(dev, phy_id, sas_addr, &type);
+ disc_resp = alloc_smp_resp(DISCOVER_RESP_SIZE);
+ if (!disc_resp)
+ return -ENOMEM;
+
+ res = sas_get_phy_discover(dev, phy_id, disc_resp);
switch (res) {
case SMP_RESP_NO_PHY:
phy->phy_state = PHY_NOT_PRESENT;
sas_unregister_devs_sas_addr(dev, phy_id, last);
- return res;
+ goto out_free_resp;
case SMP_RESP_PHY_VACANT:
phy->phy_state = PHY_VACANT;
sas_unregister_devs_sas_addr(dev, phy_id, last);
- return res;
+ goto out_free_resp;
case SMP_RESP_FUNC_ACC:
break;
case -ECOMM:
break;
default:
- return res;
+ goto out_free_resp;
}
+ if (res == 0)
+ sas_get_sas_addr_and_dev_type(disc_resp, sas_addr, &type);
+
if ((SAS_ADDR(sas_addr) == 0) || (res == -ECOMM)) {
phy->phy_state = PHY_EMPTY;
sas_unregister_devs_sas_addr(dev, phy_id, last);
/*
- * Even though the PHY is empty, for convenience we discover
- * the PHY to update the PHY info, like negotiated linkrate.
+ * Even though the PHY is empty, for convenience we update
+ * the PHY info, like negotiated linkrate.
*/
- sas_ex_phy_discover(dev, phy_id);
- return res;
+ if (res == 0)
+ sas_set_ex_phy(dev, phy_id, disc_resp);
+ goto out_free_resp;
} else if (SAS_ADDR(sas_addr) == SAS_ADDR(phy->attached_sas_addr) &&
dev_type_flutter(type, phy->attached_dev_type)) {
struct domain_device *ata_dev = sas_ex_to_ata(dev, phy_id);
@@ -1989,7 +2003,7 @@ static int sas_rediscover_dev(struct domain_device *dev, int phy_id,
action = ", needs recovery";
pr_debug("ex %016llx phy%02d broadcast flutter%s\n",
SAS_ADDR(dev->sas_addr), phy_id, action);
- return res;
+ goto out_free_resp;
}
/* we always have to delete the old device when we went here */
@@ -1998,7 +2012,10 @@ static int sas_rediscover_dev(struct domain_device *dev, int phy_id,
SAS_ADDR(phy->attached_sas_addr));
sas_unregister_devs_sas_addr(dev, phy_id, last);
- return sas_discover_new(dev, phy_id);
+ res = sas_discover_new(dev, phy_id);
+out_free_resp:
+ kfree(disc_resp);
+ return res;
}
/**
diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index 30d20d37554f6d..98ca7df003efb3 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -1333,7 +1333,6 @@ struct lpfc_hba {
struct timer_list fabric_block_timer;
unsigned long bit_flags;
atomic_t num_rsrc_err;
- atomic_t num_cmd_success;
unsigned long last_rsrc_error_time;
unsigned long last_ramp_down_time;
#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
@@ -1438,6 +1437,7 @@ struct lpfc_hba {
struct timer_list inactive_vmid_poll;
/* RAS Support */
+ spinlock_t ras_fwlog_lock; /* do not take while holding another lock */
struct lpfc_ras_fwlog ras_fwlog;
uint32_t iocb_cnt;
diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index 365c7e96070bb7..3c534b3cfe9186 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -5865,9 +5865,9 @@ lpfc_ras_fwlog_buffsize_set(struct lpfc_hba *phba, uint val)
if (phba->cfg_ras_fwlog_func != PCI_FUNC(phba->pcidev->devfn))
return -EINVAL;
- spin_lock_irq(&phba->hbalock);
+ spin_lock_irq(&phba->ras_fwlog_lock);
state = phba->ras_fwlog.state;
- spin_unlock_irq(&phba->hbalock);
+ spin_unlock_irq(&phba->ras_fwlog_lock);
if (state == REG_INPROGRESS) {
lpfc_printf_log(phba, KERN_ERR, LOG_SLI, "6147 RAS Logging "
diff --git a/drivers/scsi/lpfc/lpfc_bsg.c b/drivers/scsi/lpfc/lpfc_bsg.c
index d80e6e81053b0a..529df1768fa89f 100644
--- a/drivers/scsi/lpfc/lpfc_bsg.c
+++ b/drivers/scsi/lpfc/lpfc_bsg.c
@@ -2513,7 +2513,7 @@ static int lpfcdiag_loop_self_reg(struct lpfc_hba *phba, uint16_t *rpi)
return -ENOMEM;
}
- dmabuff = (struct lpfc_dmabuf *)mbox->ctx_buf;
+ dmabuff = mbox->ctx_buf;
mbox->ctx_buf = NULL;
mbox->ctx_ndlp = NULL;
status = lpfc_sli_issue_mbox_wait(phba, mbox, LPFC_MBOX_TMO);
@@ -3169,10 +3169,10 @@ lpfc_bsg_diag_loopback_run(struct bsg_job *job)
}
cmdwqe = &cmdiocbq->wqe;
- memset(cmdwqe, 0, sizeof(union lpfc_wqe));
+ memset(cmdwqe, 0, sizeof(*cmdwqe));
if (phba->sli_rev < LPFC_SLI_REV4) {
rspwqe = &rspiocbq->wqe;
- memset(rspwqe, 0, sizeof(union lpfc_wqe));
+ memset(rspwqe, 0, sizeof(*rspwqe));
}
INIT_LIST_HEAD(&head);
@@ -3376,7 +3376,7 @@ lpfc_bsg_issue_mbox_cmpl(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmboxq)
unsigned long flags;
uint8_t *pmb, *pmb_buf;
- dd_data = pmboxq->ctx_ndlp;
+ dd_data = pmboxq->ctx_u.dd_data;
/*
* The outgoing buffer is readily referred from the dma buffer,
@@ -3553,7 +3553,7 @@ lpfc_bsg_issue_mbox_ext_handle_job(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmboxq)
struct lpfc_sli_config_mbox *sli_cfg_mbx;
uint8_t *pmbx;
- dd_data = pmboxq->ctx_buf;
+ dd_data = pmboxq->ctx_u.dd_data;
/* Determine if job has been aborted */
spin_lock_irqsave(&phba->ct_ev_lock, flags);
@@ -3940,7 +3940,7 @@ lpfc_bsg_sli_cfg_read_cmd_ext(struct lpfc_hba *phba, struct bsg_job *job,
pmboxq->mbox_cmpl = lpfc_bsg_issue_read_mbox_ext_cmpl;
/* context fields to callback function */
- pmboxq->ctx_buf = dd_data;
+ pmboxq->ctx_u.dd_data = dd_data;
dd_data->type = TYPE_MBOX;
dd_data->set_job = job;
dd_data->context_un.mbox.pmboxq = pmboxq;
@@ -4112,7 +4112,7 @@ lpfc_bsg_sli_cfg_write_cmd_ext(struct lpfc_hba *phba, struct bsg_job *job,
pmboxq->mbox_cmpl = lpfc_bsg_issue_write_mbox_ext_cmpl;
/* context fields to callback function */
- pmboxq->ctx_buf = dd_data;
+ pmboxq->ctx_u.dd_data = dd_data;
dd_data->type = TYPE_MBOX;
dd_data->set_job = job;
dd_data->context_un.mbox.pmboxq = pmboxq;
@@ -4460,7 +4460,7 @@ lpfc_bsg_write_ebuf_set(struct lpfc_hba *phba, struct bsg_job *job,
pmboxq->mbox_cmpl = lpfc_bsg_issue_write_mbox_ext_cmpl;
/* context fields to callback function */
- pmboxq->ctx_buf = dd_data;
+ pmboxq->ctx_u.dd_data = dd_data;
dd_data->type = TYPE_MBOX;
dd_data->set_job = job;
dd_data->context_un.mbox.pmboxq = pmboxq;
@@ -4747,7 +4747,7 @@ lpfc_bsg_issue_mbox(struct lpfc_hba *phba, struct bsg_job *job,
if (mbox_req->inExtWLen || mbox_req->outExtWLen) {
from = pmbx;
ext = from + sizeof(MAILBOX_t);
- pmboxq->ctx_buf = ext;
+ pmboxq->ext_buf = ext;
pmboxq->in_ext_byte_len =
mbox_req->inExtWLen * sizeof(uint32_t);
pmboxq->out_ext_byte_len =
@@ -4875,7 +4875,7 @@ lpfc_bsg_issue_mbox(struct lpfc_hba *phba, struct bsg_job *job,
pmboxq->mbox_cmpl = lpfc_bsg_issue_mbox_cmpl;
/* setup context field to pass wait_queue pointer to wake function */
- pmboxq->ctx_ndlp = dd_data;
+ pmboxq->ctx_u.dd_data = dd_data;
dd_data->type = TYPE_MBOX;
dd_data->set_job = job;
dd_data->context_un.mbox.pmboxq = pmboxq;
@@ -5070,12 +5070,12 @@ lpfc_bsg_get_ras_config(struct bsg_job *job)
bsg_reply->reply_data.vendor_reply.vendor_rsp;
/* Current logging state */
- spin_lock_irq(&phba->hbalock);
+ spin_lock_irq(&phba->ras_fwlog_lock);
if (ras_fwlog->state == ACTIVE)
ras_reply->state = LPFC_RASLOG_STATE_RUNNING;
else
ras_reply->state = LPFC_RASLOG_STATE_STOPPED;
- spin_unlock_irq(&phba->hbalock);
+ spin_unlock_irq(&phba->ras_fwlog_lock);
ras_reply->log_level = phba->ras_fwlog.fw_loglevel;
ras_reply->log_buff_sz = phba->cfg_ras_fwlog_buffsize;
@@ -5132,13 +5132,13 @@ lpfc_bsg_set_ras_config(struct bsg_job *job)
if (action == LPFC_RASACTION_STOP_LOGGING) {
/* Check if already disabled */
- spin_lock_irq(&phba->hbalock);
+ spin_lock_irq(&phba->ras_fwlog_lock);
if (ras_fwlog->state != ACTIVE) {
- spin_unlock_irq(&phba->hbalock);
+ spin_unlock_irq(&phba->ras_fwlog_lock);
rc = -ESRCH;
goto ras_job_error;
}
- spin_unlock_irq(&phba->hbalock);
+ spin_unlock_irq(&phba->ras_fwlog_lock);
/* Disable logging */
lpfc_ras_stop_fwlog(phba);
@@ -5149,10 +5149,10 @@ lpfc_bsg_set_ras_config(struct bsg_job *job)
* FW-logging with new log-level. Return status
* "Logging already Running" to caller.
**/
- spin_lock_irq(&phba->hbalock);
+ spin_lock_irq(&phba->ras_fwlog_lock);
if (ras_fwlog->state != INACTIVE)
action_status = -EINPROGRESS;
- spin_unlock_irq(&phba->hbalock);
+ spin_unlock_irq(&phba->ras_fwlog_lock);
/* Enable logging */
rc = lpfc_sli4_ras_fwlog_init(phba, log_level,
@@ -5268,13 +5268,13 @@ lpfc_bsg_get_ras_fwlog(struct bsg_job *job)
goto ras_job_error;
/* Logging to be stopped before reading */
- spin_lock_irq(&phba->hbalock);
+ spin_lock_irq(&phba->ras_fwlog_lock);
if (ras_fwlog->state == ACTIVE) {
- spin_unlock_irq(&phba->hbalock);
+ spin_unlock_irq(&phba->ras_fwlog_lock);
rc = -EINPROGRESS;
goto ras_job_error;
}
- spin_unlock_irq(&phba->hbalock);
+ spin_unlock_irq(&phba->ras_fwlog_lock);
if (job->request_len <
sizeof(struct fc_bsg_request) +
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c
index ab5af10c8a16ca..a2d2b02b34187f 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.c
+++ b/drivers/scsi/lpfc/lpfc_debugfs.c
@@ -2194,12 +2194,12 @@ static int lpfc_debugfs_ras_log_data(struct lpfc_hba *phba,
memset(buffer, 0, size);
- spin_lock_irq(&phba->hbalock);
+ spin_lock_irq(&phba->ras_fwlog_lock);
if (phba->ras_fwlog.state != ACTIVE) {
- spin_unlock_irq(&phba->hbalock);
+ spin_unlock_irq(&phba->ras_fwlog_lock);
return -EINVAL;
}
- spin_unlock_irq(&phba->hbalock);
+ spin_unlock_irq(&phba->ras_fwlog_lock);
list_for_each_entry_safe(dmabuf, next,
&phba->ras_fwlog.fwlog_buff_list, list) {
@@ -2250,13 +2250,13 @@ lpfc_debugfs_ras_log_open(struct inode *inode, struct file *file)
int size;
int rc = -ENOMEM;
- spin_lock_irq(&phba->hbalock);
+ spin_lock_irq(&phba->ras_fwlog_lock);
if (phba->ras_fwlog.state != ACTIVE) {
- spin_unlock_irq(&phba->hbalock);
+ spin_unlock_irq(&phba->ras_fwlog_lock);
rc = -EINVAL;
goto out;
}
- spin_unlock_irq(&phba->hbalock);
+ spin_unlock_irq(&phba->ras_fwlog_lock);
if (check_mul_overflow(LPFC_RAS_MIN_BUFF_POST_SIZE,
phba->cfg_ras_fwlog_buffsize, &size))
diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index 28e56542e0720e..f7c28dc73bf67d 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c
@@ -4437,23 +4437,23 @@ lpfc_els_retry_delay(struct timer_list *t)
unsigned long flags;
struct lpfc_work_evt *evtp = &ndlp->els_retry_evt;
+ /* Hold a node reference for outstanding queued work */
+ if (!lpfc_nlp_get(ndlp))
+ return;
+
spin_lock_irqsave(&phba->hbalock, flags);
if (!list_empty(&evtp->evt_listp)) {
spin_unlock_irqrestore(&phba->hbalock, flags);
+ lpfc_nlp_put(ndlp);
return;
}
- /* We need to hold the node by incrementing the reference
- * count until the queued work is done
- */
- evtp->evt_arg1 = lpfc_nlp_get(ndlp);
- if (evtp->evt_arg1) {
- evtp->evt = LPFC_EVT_ELS_RETRY;
- list_add_tail(&evtp->evt_listp, &phba->work_list);
- lpfc_worker_wake_up(phba);
- }
+ evtp->evt_arg1 = ndlp;
+ evtp->evt = LPFC_EVT_ELS_RETRY;
+ list_add_tail(&evtp->evt_listp, &phba->work_list);
spin_unlock_irqrestore(&phba->hbalock, flags);
- return;
+
+ lpfc_worker_wake_up(phba);
}
/**
@@ -7238,7 +7238,7 @@ lpfc_get_rdp_info(struct lpfc_hba *phba, struct lpfc_rdp_context *rdp_context)
goto rdp_fail;
mbox->vport = rdp_context->ndlp->vport;
mbox->mbox_cmpl = lpfc_mbx_cmpl_rdp_page_a0;
- mbox->ctx_ndlp = (struct lpfc_rdp_context *)rdp_context;
+ mbox->ctx_u.rdp = rdp_context;
rc = lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT);
if (rc == MBX_NOT_FINISHED) {
lpfc_mbox_rsrc_cleanup(phba, mbox, MBOX_THD_UNLOCKED);
@@ -7290,7 +7290,7 @@ int lpfc_get_sfp_info_wait(struct lpfc_hba *phba,
mbox->in_ext_byte_len = DMP_SFF_PAGE_A0_SIZE;
mbox->out_ext_byte_len = DMP_SFF_PAGE_A0_SIZE;
mbox->mbox_offset_word = 5;
- mbox->ctx_buf = virt;
+ mbox->ext_buf = virt;
} else {
bf_set(lpfc_mbx_memory_dump_type3_length,
&mbox->u.mqe.un.mem_dump_type3, DMP_SFF_PAGE_A0_SIZE);
@@ -7298,7 +7298,6 @@ int lpfc_get_sfp_info_wait(struct lpfc_hba *phba,
mbox->u.mqe.un.mem_dump_type3.addr_hi = putPaddrHigh(mp->phys);
}
mbox->vport = phba->pport;
- mbox->ctx_ndlp = (struct lpfc_rdp_context *)rdp_context;
rc = lpfc_sli_issue_mbox_wait(phba, mbox, 30);
if (rc == MBX_NOT_FINISHED) {
@@ -7307,7 +7306,7 @@ int lpfc_get_sfp_info_wait(struct lpfc_hba *phba,
}
if (phba->sli_rev == LPFC_SLI_REV4)
- mp = (struct lpfc_dmabuf *)(mbox->ctx_buf);
+ mp = mbox->ctx_buf;
else
mp = mpsave;
@@ -7350,7 +7349,7 @@ int lpfc_get_sfp_info_wait(struct lpfc_hba *phba,
mbox->in_ext_byte_len = DMP_SFF_PAGE_A2_SIZE;
mbox->out_ext_byte_len = DMP_SFF_PAGE_A2_SIZE;
mbox->mbox_offset_word = 5;
- mbox->ctx_buf = virt;
+ mbox->ext_buf = virt;
} else {
bf_set(lpfc_mbx_memory_dump_type3_length,
&mbox->u.mqe.un.mem_dump_type3, DMP_SFF_PAGE_A2_SIZE);
@@ -7358,7 +7357,6 @@ int lpfc_get_sfp_info_wait(struct lpfc_hba *phba,
mbox->u.mqe.un.mem_dump_type3.addr_hi = putPaddrHigh(mp->phys);
}
- mbox->ctx_ndlp = (struct lpfc_rdp_context *)rdp_context;
rc = lpfc_sli_issue_mbox_wait(phba, mbox, 30);
if (bf_get(lpfc_mqe_status, &mbox->u.mqe)) {
rc = 1;
@@ -7500,9 +7498,9 @@ lpfc_els_lcb_rsp(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
int rc;
mb = &pmb->u.mb;
- lcb_context = (struct lpfc_lcb_context *)pmb->ctx_ndlp;
+ lcb_context = pmb->ctx_u.lcb;
ndlp = lcb_context->ndlp;
- pmb->ctx_ndlp = NULL;
+ memset(&pmb->ctx_u, 0, sizeof(pmb->ctx_u));
pmb->ctx_buf = NULL;
shdr = (union lpfc_sli4_cfg_shdr *)
@@ -7642,7 +7640,7 @@ lpfc_sli4_set_beacon(struct lpfc_vport *vport,
lpfc_sli4_config(phba, mbox, LPFC_MBOX_SUBSYSTEM_COMMON,
LPFC_MBOX_OPCODE_SET_BEACON_CONFIG, len,
LPFC_SLI4_MBX_EMBED);
- mbox->ctx_ndlp = (void *)lcb_context;
+ mbox->ctx_u.lcb = lcb_context;
mbox->vport = phba->pport;
mbox->mbox_cmpl = lpfc_els_lcb_rsp;
bf_set(lpfc_mbx_set_beacon_port_num, &mbox->u.mqe.un.beacon_config,
@@ -8639,9 +8637,9 @@ lpfc_els_rsp_rls_acc(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
mb = &pmb->u.mb;
ndlp = pmb->ctx_ndlp;
- rxid = (uint16_t)((unsigned long)(pmb->ctx_buf) & 0xffff);
- oxid = (uint16_t)(((unsigned long)(pmb->ctx_buf) >> 16) & 0xffff);
- pmb->ctx_buf = NULL;
+ rxid = (uint16_t)(pmb->ctx_u.ox_rx_id & 0xffff);
+ oxid = (uint16_t)((pmb->ctx_u.ox_rx_id >> 16) & 0xffff);
+ memset(&pmb->ctx_u, 0, sizeof(pmb->ctx_u));
pmb->ctx_ndlp = NULL;
if (mb->mbxStatus) {
@@ -8745,8 +8743,7 @@ lpfc_els_rcv_rls(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb,
mbox = mempool_alloc(phba->mbox_mem_pool, GFP_ATOMIC);
if (mbox) {
lpfc_read_lnk_stat(phba, mbox);
- mbox->ctx_buf = (void *)((unsigned long)
- (ox_id << 16 | ctx));
+ mbox->ctx_u.ox_rx_id = ox_id << 16 | ctx;
mbox->ctx_ndlp = lpfc_nlp_get(ndlp);
if (!mbox->ctx_ndlp)
goto node_err;
diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index a7a2309a629faf..e42fa9c822b502 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -257,7 +257,9 @@ lpfc_dev_loss_tmo_callbk(struct fc_rport *rport)
if (evtp->evt_arg1) {
evtp->evt = LPFC_EVT_DEV_LOSS;
list_add_tail(&evtp->evt_listp, &phba->work_list);
+ spin_unlock_irqrestore(&phba->hbalock, iflags);
lpfc_worker_wake_up(phba);
+ return;
}
spin_unlock_irqrestore(&phba->hbalock, iflags);
} else {
@@ -275,10 +277,7 @@ lpfc_dev_loss_tmo_callbk(struct fc_rport *rport)
lpfc_disc_state_machine(vport, ndlp, NULL,
NLP_EVT_DEVICE_RM);
}
-
}
-
- return;
}
/**
@@ -3429,7 +3428,7 @@ static void
lpfc_mbx_cmpl_read_sparam(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
{
MAILBOX_t *mb = &pmb->u.mb;
- struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *)pmb->ctx_buf;
+ struct lpfc_dmabuf *mp = pmb->ctx_buf;
struct lpfc_vport *vport = pmb->vport;
struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
struct serv_parm *sp = &vport->fc_sparam;
@@ -3737,7 +3736,7 @@ lpfc_mbx_cmpl_read_topology(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
struct lpfc_mbx_read_top *la;
struct lpfc_sli_ring *pring;
MAILBOX_t *mb = &pmb->u.mb;
- struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *)(pmb->ctx_buf);
+ struct lpfc_dmabuf *mp = pmb->ctx_buf;
uint8_t attn_type;
/* Unblock ELS traffic */
@@ -3851,8 +3850,8 @@ void
lpfc_mbx_cmpl_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
{
struct lpfc_vport *vport = pmb->vport;
- struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *)pmb->ctx_buf;
- struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *)pmb->ctx_ndlp;
+ struct lpfc_dmabuf *mp = pmb->ctx_buf;
+ struct lpfc_nodelist *ndlp = pmb->ctx_ndlp;
/* The driver calls the state machine with the pmb pointer
* but wants to make sure a stale ctx_buf isn't acted on.
@@ -4066,7 +4065,7 @@ lpfc_create_static_vport(struct lpfc_hba *phba)
* the dump routine is a single-use construct.
*/
if (pmb->ctx_buf) {
- mp = (struct lpfc_dmabuf *)pmb->ctx_buf;
+ mp = pmb->ctx_buf;
lpfc_mbuf_free(phba, mp->virt, mp->phys);
kfree(mp);
pmb->ctx_buf = NULL;
@@ -4089,7 +4088,7 @@ lpfc_create_static_vport(struct lpfc_hba *phba)
if (phba->sli_rev == LPFC_SLI_REV4) {
byte_count = pmb->u.mqe.un.mb_words[5];
- mp = (struct lpfc_dmabuf *)pmb->ctx_buf;
+ mp = pmb->ctx_buf;
if (byte_count > sizeof(struct static_vport_info) -
offset)
byte_count = sizeof(struct static_vport_info)
@@ -4169,7 +4168,7 @@ lpfc_mbx_cmpl_fabric_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
{
struct lpfc_vport *vport = pmb->vport;
MAILBOX_t *mb = &pmb->u.mb;
- struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *)pmb->ctx_ndlp;
+ struct lpfc_nodelist *ndlp = pmb->ctx_ndlp;
pmb->ctx_ndlp = NULL;
@@ -4307,7 +4306,7 @@ void
lpfc_mbx_cmpl_ns_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
{
MAILBOX_t *mb = &pmb->u.mb;
- struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *)pmb->ctx_ndlp;
+ struct lpfc_nodelist *ndlp = pmb->ctx_ndlp;
struct lpfc_vport *vport = pmb->vport;
int rc;
@@ -4431,7 +4430,7 @@ lpfc_mbx_cmpl_fc_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
{
struct lpfc_vport *vport = pmb->vport;
MAILBOX_t *mb = &pmb->u.mb;
- struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *)pmb->ctx_ndlp;
+ struct lpfc_nodelist *ndlp = pmb->ctx_ndlp;
pmb->ctx_ndlp = NULL;
if (mb->mbxStatus) {
@@ -5174,7 +5173,7 @@ lpfc_nlp_logo_unreg(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
struct lpfc_vport *vport = pmb->vport;
struct lpfc_nodelist *ndlp;
- ndlp = (struct lpfc_nodelist *)(pmb->ctx_ndlp);
+ ndlp = pmb->ctx_ndlp;
if (!ndlp)
return;
lpfc_issue_els_logo(vport, ndlp, 0);
@@ -5496,7 +5495,7 @@ lpfc_cleanup_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
if ((mb = phba->sli.mbox_active)) {
if ((mb->u.mb.mbxCommand == MBX_REG_LOGIN64) &&
!(mb->mbox_flag & LPFC_MBX_IMED_UNREG) &&
- (ndlp == (struct lpfc_nodelist *)mb->ctx_ndlp)) {
+ (ndlp == mb->ctx_ndlp)) {
mb->ctx_ndlp = NULL;
mb->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
}
@@ -5507,7 +5506,7 @@ lpfc_cleanup_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
list_for_each_entry(mb, &phba->sli.mboxq_cmpl, list) {
if ((mb->u.mb.mbxCommand != MBX_REG_LOGIN64) ||
(mb->mbox_flag & LPFC_MBX_IMED_UNREG) ||
- (ndlp != (struct lpfc_nodelist *)mb->ctx_ndlp))
+ (ndlp != mb->ctx_ndlp))
continue;
mb->ctx_ndlp = NULL;
@@ -5517,7 +5516,7 @@ lpfc_cleanup_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
list_for_each_entry_safe(mb, nextmb, &phba->sli.mboxq, list) {
if ((mb->u.mb.mbxCommand == MBX_REG_LOGIN64) &&
!(mb->mbox_flag & LPFC_MBX_IMED_UNREG) &&
- (ndlp == (struct lpfc_nodelist *)mb->ctx_ndlp)) {
+ (ndlp == mb->ctx_ndlp)) {
list_del(&mb->list);
lpfc_mbox_rsrc_cleanup(phba, mb, MBOX_THD_LOCKED);
@@ -6357,7 +6356,7 @@ void
lpfc_mbx_cmpl_fdmi_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
{
MAILBOX_t *mb = &pmb->u.mb;
- struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *)pmb->ctx_ndlp;
+ struct lpfc_nodelist *ndlp = pmb->ctx_ndlp;
struct lpfc_vport *vport = pmb->vport;
pmb->ctx_ndlp = NULL;
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 88b2e57d90c2e3..f7a0aa3625f4e1 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -460,7 +460,7 @@ lpfc_config_port_post(struct lpfc_hba *phba)
return -EIO;
}
- mp = (struct lpfc_dmabuf *)pmb->ctx_buf;
+ mp = pmb->ctx_buf;
/* This dmabuf was allocated by lpfc_read_sparam. The dmabuf is no
* longer needed. Prevent unintended ctx_buf access as the mbox is
@@ -2217,7 +2217,7 @@ lpfc_handle_latt(struct lpfc_hba *phba)
/* Cleanup any outstanding ELS commands */
lpfc_els_flush_all_cmd(phba);
psli->slistat.link_event++;
- lpfc_read_topology(phba, pmb, (struct lpfc_dmabuf *)pmb->ctx_buf);
+ lpfc_read_topology(phba, pmb, pmb->ctx_buf);
pmb->mbox_cmpl = lpfc_mbx_cmpl_read_topology;
pmb->vport = vport;
/* Block ELS IOCBs until we have processed this mbox command */
@@ -5454,7 +5454,7 @@ lpfc_sli4_async_link_evt(struct lpfc_hba *phba,
phba->sli.slistat.link_event++;
/* Create lpfc_handle_latt mailbox command from link ACQE */
- lpfc_read_topology(phba, pmb, (struct lpfc_dmabuf *)pmb->ctx_buf);
+ lpfc_read_topology(phba, pmb, pmb->ctx_buf);
pmb->mbox_cmpl = lpfc_mbx_cmpl_read_topology;
pmb->vport = phba->pport;
@@ -6347,7 +6347,7 @@ lpfc_sli4_async_fc_evt(struct lpfc_hba *phba, struct lpfc_acqe_fc_la *acqe_fc)
phba->sli.slistat.link_event++;
/* Create lpfc_handle_latt mailbox command from link ACQE */
- lpfc_read_topology(phba, pmb, (struct lpfc_dmabuf *)pmb->ctx_buf);
+ lpfc_read_topology(phba, pmb, pmb->ctx_buf);
pmb->mbox_cmpl = lpfc_mbx_cmpl_read_topology;
pmb->vport = phba->pport;
@@ -7705,6 +7705,9 @@ lpfc_setup_driver_resource_phase1(struct lpfc_hba *phba)
"NVME" : " "),
(phba->nvmet_support ? "NVMET" : " "));
+ /* ras_fwlog state */
+ spin_lock_init(&phba->ras_fwlog_lock);
+
/* Initialize the IO buffer list used by driver for SLI3 SCSI */
spin_lock_init(&phba->scsi_buf_list_get_lock);
INIT_LIST_HEAD(&phba->lpfc_scsi_buf_list_get);
@@ -13055,7 +13058,7 @@ lpfc_sli4_enable_msix(struct lpfc_hba *phba)
rc = request_threaded_irq(eqhdl->irq,
&lpfc_sli4_hba_intr_handler,
&lpfc_sli4_hba_intr_handler_th,
- IRQF_ONESHOT, name, eqhdl);
+ 0, name, eqhdl);
if (rc) {
lpfc_printf_log(phba, KERN_WARNING, LOG_INIT,
"0486 MSI-X fast-path (%d) "
diff --git a/drivers/scsi/lpfc/lpfc_mbox.c b/drivers/scsi/lpfc/lpfc_mbox.c
index f7c41958036bb7..e98f1c2b22202e 100644
--- a/drivers/scsi/lpfc/lpfc_mbox.c
+++ b/drivers/scsi/lpfc/lpfc_mbox.c
@@ -102,7 +102,7 @@ lpfc_mbox_rsrc_cleanup(struct lpfc_hba *phba, LPFC_MBOXQ_t *mbox,
{
struct lpfc_dmabuf *mp;
- mp = (struct lpfc_dmabuf *)mbox->ctx_buf;
+ mp = mbox->ctx_buf;
mbox->ctx_buf = NULL;
/* Release the generic BPL buffer memory. */
@@ -204,10 +204,8 @@ lpfc_dump_mem(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb, uint16_t offset,
uint16_t region_id)
{
MAILBOX_t *mb;
- void *ctx;
mb = &pmb->u.mb;
- ctx = pmb->ctx_buf;
/* Setup to dump VPD region */
memset(pmb, 0, sizeof (LPFC_MBOXQ_t));
@@ -219,7 +217,6 @@ lpfc_dump_mem(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb, uint16_t offset,
mb->un.varDmp.word_cnt = (DMP_RSP_SIZE / sizeof (uint32_t));
mb->un.varDmp.co = 0;
mb->un.varDmp.resp_offset = 0;
- pmb->ctx_buf = ctx;
mb->mbxOwner = OWN_HOST;
return;
}
@@ -236,11 +233,8 @@ void
lpfc_dump_wakeup_param(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
{
MAILBOX_t *mb;
- void *ctx;
mb = &pmb->u.mb;
- /* Save context so that we can restore after memset */
- ctx = pmb->ctx_buf;
/* Setup to dump VPD region */
memset(pmb, 0, sizeof(LPFC_MBOXQ_t));
@@ -254,7 +248,6 @@ lpfc_dump_wakeup_param(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
mb->un.varDmp.word_cnt = WAKE_UP_PARMS_WORD_SIZE;
mb->un.varDmp.co = 0;
mb->un.varDmp.resp_offset = 0;
- pmb->ctx_buf = ctx;
return;
}
@@ -372,7 +365,7 @@ lpfc_read_topology(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb,
/* Save address for later completion and set the owner to host so that
* the FW knows this mailbox is available for processing.
*/
- pmb->ctx_buf = (uint8_t *)mp;
+ pmb->ctx_buf = mp;
mb->mbxOwner = OWN_HOST;
return (0);
}
@@ -1816,7 +1809,7 @@ lpfc_sli4_mbox_cmd_free(struct lpfc_hba *phba, struct lpfcMboxq *mbox)
}
/* Reinitialize the context pointers to avoid stale usage. */
mbox->ctx_buf = NULL;
- mbox->context3 = NULL;
+ memset(&mbox->ctx_u, 0, sizeof(mbox->ctx_u));
kfree(mbox->sge_array);
/* Finally, free the mailbox command itself */
mempool_free(mbox, phba->mbox_mem_pool);
@@ -2366,8 +2359,7 @@ lpfc_mbx_cmpl_rdp_link_stat(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq)
{
MAILBOX_t *mb;
int rc = FAILURE;
- struct lpfc_rdp_context *rdp_context =
- (struct lpfc_rdp_context *)(mboxq->ctx_ndlp);
+ struct lpfc_rdp_context *rdp_context = mboxq->ctx_u.rdp;
mb = &mboxq->u.mb;
if (mb->mbxStatus)
@@ -2385,9 +2377,8 @@ mbx_failed:
static void
lpfc_mbx_cmpl_rdp_page_a2(struct lpfc_hba *phba, LPFC_MBOXQ_t *mbox)
{
- struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *)mbox->ctx_buf;
- struct lpfc_rdp_context *rdp_context =
- (struct lpfc_rdp_context *)(mbox->ctx_ndlp);
+ struct lpfc_dmabuf *mp = mbox->ctx_buf;
+ struct lpfc_rdp_context *rdp_context = mbox->ctx_u.rdp;
if (bf_get(lpfc_mqe_status, &mbox->u.mqe))
goto error_mbox_free;
@@ -2401,7 +2392,7 @@ lpfc_mbx_cmpl_rdp_page_a2(struct lpfc_hba *phba, LPFC_MBOXQ_t *mbox)
/* Save the dma buffer for cleanup in the final completion. */
mbox->ctx_buf = mp;
mbox->mbox_cmpl = lpfc_mbx_cmpl_rdp_link_stat;
- mbox->ctx_ndlp = (struct lpfc_rdp_context *)rdp_context;
+ mbox->ctx_u.rdp = rdp_context;
if (lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT) == MBX_NOT_FINISHED)
goto error_mbox_free;
@@ -2416,9 +2407,8 @@ void
lpfc_mbx_cmpl_rdp_page_a0(struct lpfc_hba *phba, LPFC_MBOXQ_t *mbox)
{
int rc;
- struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *)(mbox->ctx_buf);
- struct lpfc_rdp_context *rdp_context =
- (struct lpfc_rdp_context *)(mbox->ctx_ndlp);
+ struct lpfc_dmabuf *mp = mbox->ctx_buf;
+ struct lpfc_rdp_context *rdp_context = mbox->ctx_u.rdp;
if (bf_get(lpfc_mqe_status, &mbox->u.mqe))
goto error;
@@ -2448,7 +2438,7 @@ lpfc_mbx_cmpl_rdp_page_a0(struct lpfc_hba *phba, LPFC_MBOXQ_t *mbox)
mbox->u.mqe.un.mem_dump_type3.addr_hi = putPaddrHigh(mp->phys);
mbox->mbox_cmpl = lpfc_mbx_cmpl_rdp_page_a2;
- mbox->ctx_ndlp = (struct lpfc_rdp_context *)rdp_context;
+ mbox->ctx_u.rdp = rdp_context;
rc = lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT);
if (rc == MBX_NOT_FINISHED)
goto error;
diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c
index 8e425be7c7c99c..c4172791c26751 100644
--- a/drivers/scsi/lpfc/lpfc_nportdisc.c
+++ b/drivers/scsi/lpfc/lpfc_nportdisc.c
@@ -300,7 +300,7 @@ lpfc_defer_plogi_acc(struct lpfc_hba *phba, LPFC_MBOXQ_t *login_mbox)
int rc;
ndlp = login_mbox->ctx_ndlp;
- save_iocb = login_mbox->context3;
+ save_iocb = login_mbox->ctx_u.save_iocb;
if (mb->mbxStatus == MBX_SUCCESS) {
/* Now that REG_RPI completed successfully,
@@ -640,7 +640,7 @@ lpfc_rcv_plogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
if (!login_mbox->ctx_ndlp)
goto out;
- login_mbox->context3 = save_iocb; /* For PLOGI ACC */
+ login_mbox->ctx_u.save_iocb = save_iocb; /* For PLOGI ACC */
spin_lock_irq(&ndlp->lock);
ndlp->nlp_flag |= (NLP_ACC_REGLOGIN | NLP_RCV_PLOGI);
@@ -682,8 +682,8 @@ lpfc_mbx_cmpl_resume_rpi(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq)
struct lpfc_nodelist *ndlp;
uint32_t cmd;
- elsiocb = (struct lpfc_iocbq *)mboxq->ctx_buf;
- ndlp = (struct lpfc_nodelist *)mboxq->ctx_ndlp;
+ elsiocb = mboxq->ctx_u.save_iocb;
+ ndlp = mboxq->ctx_ndlp;
vport = mboxq->vport;
cmd = elsiocb->drvrTimeout;
@@ -1875,7 +1875,7 @@ lpfc_rcv_logo_reglogin_issue(struct lpfc_vport *vport,
/* cleanup any ndlp on mbox q waiting for reglogin cmpl */
if ((mb = phba->sli.mbox_active)) {
if ((mb->u.mb.mbxCommand == MBX_REG_LOGIN64) &&
- (ndlp == (struct lpfc_nodelist *)mb->ctx_ndlp)) {
+ (ndlp == mb->ctx_ndlp)) {
ndlp->nlp_flag &= ~NLP_REG_LOGIN_SEND;
lpfc_nlp_put(ndlp);
mb->ctx_ndlp = NULL;
@@ -1886,7 +1886,7 @@ lpfc_rcv_logo_reglogin_issue(struct lpfc_vport *vport,
spin_lock_irq(&phba->hbalock);
list_for_each_entry_safe(mb, nextmb, &phba->sli.mboxq, list) {
if ((mb->u.mb.mbxCommand == MBX_REG_LOGIN64) &&
- (ndlp == (struct lpfc_nodelist *)mb->ctx_ndlp)) {
+ (ndlp == mb->ctx_ndlp)) {
ndlp->nlp_flag &= ~NLP_REG_LOGIN_SEND;
lpfc_nlp_put(ndlp);
list_del(&mb->list);
diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c
index 09c53b85bcb8d6..c5792eaf3f64cb 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.c
+++ b/drivers/scsi/lpfc/lpfc_nvme.c
@@ -2616,9 +2616,9 @@ lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
/* No concern about the role change on the nvme remoteport.
* The transport will update it.
*/
- spin_lock_irq(&vport->phba->hbalock);
+ spin_lock_irq(&ndlp->lock);
ndlp->fc4_xpt_flags |= NVME_XPT_UNREG_WAIT;
- spin_unlock_irq(&vport->phba->hbalock);
+ spin_unlock_irq(&ndlp->lock);
/* Don't let the host nvme transport keep sending keep-alives
* on this remoteport. Vport is unloading, no recovery. The
diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c
index 8258b771bd009e..561ced5503c634 100644
--- a/drivers/scsi/lpfc/lpfc_nvmet.c
+++ b/drivers/scsi/lpfc/lpfc_nvmet.c
@@ -1586,7 +1586,7 @@ lpfc_nvmet_setup_io_context(struct lpfc_hba *phba)
wqe = &nvmewqe->wqe;
/* Initialize WQE */
- memset(wqe, 0, sizeof(union lpfc_wqe));
+ memset(wqe, 0, sizeof(*wqe));
ctx_buf->iocbq->cmd_dmabuf = NULL;
spin_lock(&phba->sli4_hba.sgl_list_lock);
diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index c0038eaae7b0ae..4a6e5223a22418 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -167,11 +167,10 @@ lpfc_ramp_down_queue_handler(struct lpfc_hba *phba)
struct Scsi_Host *shost;
struct scsi_device *sdev;
unsigned long new_queue_depth;
- unsigned long num_rsrc_err, num_cmd_success;
+ unsigned long num_rsrc_err;
int i;
num_rsrc_err = atomic_read(&phba->num_rsrc_err);
- num_cmd_success = atomic_read(&phba->num_cmd_success);
/*
* The error and success command counters are global per
@@ -186,20 +185,16 @@ lpfc_ramp_down_queue_handler(struct lpfc_hba *phba)
for (i = 0; i <= phba->max_vports && vports[i] != NULL; i++) {
shost = lpfc_shost_from_vport(vports[i]);
shost_for_each_device(sdev, shost) {
- new_queue_depth =
- sdev->queue_depth * num_rsrc_err /
- (num_rsrc_err + num_cmd_success);
- if (!new_queue_depth)
- new_queue_depth = sdev->queue_depth - 1;
+ if (num_rsrc_err >= sdev->queue_depth)
+ new_queue_depth = 1;
else
new_queue_depth = sdev->queue_depth -
- new_queue_depth;
+ num_rsrc_err;
scsi_change_queue_depth(sdev, new_queue_depth);
}
}
lpfc_destroy_vport_work_array(phba, vports);
atomic_set(&phba->num_rsrc_err, 0);
- atomic_set(&phba->num_cmd_success, 0);
}
/**
@@ -5336,16 +5331,6 @@ lpfc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *cmnd)
}
err = lpfc_bg_scsi_prep_dma_buf(phba, lpfc_cmd);
} else {
- if (vport->phba->cfg_enable_bg) {
- lpfc_printf_vlog(vport,
- KERN_INFO, LOG_SCSI_CMD,
- "9038 BLKGRD: rcvd PROT_NORMAL cmd: "
- "x%x reftag x%x cnt %u pt %x\n",
- cmnd->cmnd[0],
- scsi_prot_ref_tag(cmnd),
- scsi_logical_block_count(cmnd),
- (cmnd->cmnd[1]>>5));
- }
err = lpfc_scsi_prep_dma_buf(phba, lpfc_cmd);
}
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 1f8a9b5945cbae..a028e008dd1ee8 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -1217,9 +1217,9 @@ lpfc_set_rrq_active(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp,
empty = list_empty(&phba->active_rrq_list);
list_add_tail(&rrq->list, &phba->active_rrq_list);
phba->hba_flag |= HBA_RRQ_ACTIVE;
+ spin_unlock_irqrestore(&phba->hbalock, iflags);
if (empty)
lpfc_worker_wake_up(phba);
- spin_unlock_irqrestore(&phba->hbalock, iflags);
return 0;
out:
spin_unlock_irqrestore(&phba->hbalock, iflags);
@@ -2830,7 +2830,7 @@ lpfc_sli_wake_mbox_wait(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmboxq)
*/
pmboxq->mbox_flag |= LPFC_MBX_WAKE;
spin_lock_irqsave(&phba->hbalock, drvr_flag);
- pmbox_done = (struct completion *)pmboxq->context3;
+ pmbox_done = pmboxq->ctx_u.mbox_wait;
if (pmbox_done)
complete(pmbox_done);
spin_unlock_irqrestore(&phba->hbalock, drvr_flag);
@@ -2885,7 +2885,7 @@ lpfc_sli_def_mbox_cmpl(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
if (!test_bit(FC_UNLOADING, &phba->pport->load_flag) &&
pmb->u.mb.mbxCommand == MBX_REG_LOGIN64 &&
!pmb->u.mb.mbxStatus) {
- mp = (struct lpfc_dmabuf *)pmb->ctx_buf;
+ mp = pmb->ctx_buf;
if (mp) {
pmb->ctx_buf = NULL;
lpfc_mbuf_free(phba, mp->virt, mp->phys);
@@ -2914,12 +2914,12 @@ lpfc_sli_def_mbox_cmpl(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
}
if (pmb->u.mb.mbxCommand == MBX_REG_LOGIN64) {
- ndlp = (struct lpfc_nodelist *)pmb->ctx_ndlp;
+ ndlp = pmb->ctx_ndlp;
lpfc_nlp_put(ndlp);
}
if (pmb->u.mb.mbxCommand == MBX_UNREG_LOGIN) {
- ndlp = (struct lpfc_nodelist *)pmb->ctx_ndlp;
+ ndlp = pmb->ctx_ndlp;
/* Check to see if there are any deferred events to process */
if (ndlp) {
@@ -2952,7 +2952,7 @@ lpfc_sli_def_mbox_cmpl(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
/* This nlp_put pairs with lpfc_sli4_resume_rpi */
if (pmb->u.mb.mbxCommand == MBX_RESUME_RPI) {
- ndlp = (struct lpfc_nodelist *)pmb->ctx_ndlp;
+ ndlp = pmb->ctx_ndlp;
lpfc_nlp_put(ndlp);
}
@@ -5819,7 +5819,7 @@ lpfc_sli4_read_fcoe_params(struct lpfc_hba *phba)
goto out_free_mboxq;
}
- mp = (struct lpfc_dmabuf *)mboxq->ctx_buf;
+ mp = mboxq->ctx_buf;
rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL);
lpfc_printf_log(phba, KERN_INFO, LOG_MBOX | LOG_SLI,
@@ -6849,9 +6849,9 @@ lpfc_ras_stop_fwlog(struct lpfc_hba *phba)
{
struct lpfc_ras_fwlog *ras_fwlog = &phba->ras_fwlog;
- spin_lock_irq(&phba->hbalock);
+ spin_lock_irq(&phba->ras_fwlog_lock);
ras_fwlog->state = INACTIVE;
- spin_unlock_irq(&phba->hbalock);
+ spin_unlock_irq(&phba->ras_fwlog_lock);
/* Disable FW logging to host memory */
writel(LPFC_CTL_PDEV_CTL_DDL_RAS,
@@ -6894,9 +6894,9 @@ lpfc_sli4_ras_dma_free(struct lpfc_hba *phba)
ras_fwlog->lwpd.virt = NULL;
}
- spin_lock_irq(&phba->hbalock);
+ spin_lock_irq(&phba->ras_fwlog_lock);
ras_fwlog->state = INACTIVE;
- spin_unlock_irq(&phba->hbalock);
+ spin_unlock_irq(&phba->ras_fwlog_lock);
}
/**
@@ -6998,9 +6998,9 @@ lpfc_sli4_ras_mbox_cmpl(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
goto disable_ras;
}
- spin_lock_irq(&phba->hbalock);
+ spin_lock_irq(&phba->ras_fwlog_lock);
ras_fwlog->state = ACTIVE;
- spin_unlock_irq(&phba->hbalock);
+ spin_unlock_irq(&phba->ras_fwlog_lock);
mempool_free(pmb, phba->mbox_mem_pool);
return;
@@ -7032,9 +7032,9 @@ lpfc_sli4_ras_fwlog_init(struct lpfc_hba *phba,
uint32_t len = 0, fwlog_buffsize, fwlog_entry_count;
int rc = 0;
- spin_lock_irq(&phba->hbalock);
+ spin_lock_irq(&phba->ras_fwlog_lock);
ras_fwlog->state = INACTIVE;
- spin_unlock_irq(&phba->hbalock);
+ spin_unlock_irq(&phba->ras_fwlog_lock);
fwlog_buffsize = (LPFC_RAS_MIN_BUFF_POST_SIZE *
phba->cfg_ras_fwlog_buffsize);
@@ -7095,9 +7095,9 @@ lpfc_sli4_ras_fwlog_init(struct lpfc_hba *phba,
mbx_fwlog->u.request.lwpd.addr_lo = putPaddrLow(ras_fwlog->lwpd.phys);
mbx_fwlog->u.request.lwpd.addr_hi = putPaddrHigh(ras_fwlog->lwpd.phys);
- spin_lock_irq(&phba->hbalock);
+ spin_lock_irq(&phba->ras_fwlog_lock);
ras_fwlog->state = REG_INPROGRESS;
- spin_unlock_irq(&phba->hbalock);
+ spin_unlock_irq(&phba->ras_fwlog_lock);
mbox->vport = phba->pport;
mbox->mbox_cmpl = lpfc_sli4_ras_mbox_cmpl;
@@ -8766,7 +8766,7 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba)
mboxq->vport = vport;
rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL);
- mp = (struct lpfc_dmabuf *)mboxq->ctx_buf;
+ mp = mboxq->ctx_buf;
if (rc == MBX_SUCCESS) {
memcpy(&vport->fc_sparam, mp->virt, sizeof(struct serv_parm));
rc = 0;
@@ -9548,8 +9548,8 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox,
}
/* Copy the mailbox extension data */
- if (pmbox->in_ext_byte_len && pmbox->ctx_buf) {
- lpfc_sli_pcimem_bcopy(pmbox->ctx_buf,
+ if (pmbox->in_ext_byte_len && pmbox->ext_buf) {
+ lpfc_sli_pcimem_bcopy(pmbox->ext_buf,
(uint8_t *)phba->mbox_ext,
pmbox->in_ext_byte_len);
}
@@ -9562,10 +9562,10 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox,
= MAILBOX_HBA_EXT_OFFSET;
/* Copy the mailbox extension data */
- if (pmbox->in_ext_byte_len && pmbox->ctx_buf)
+ if (pmbox->in_ext_byte_len && pmbox->ext_buf)
lpfc_memcpy_to_slim(phba->MBslimaddr +
MAILBOX_HBA_EXT_OFFSET,
- pmbox->ctx_buf, pmbox->in_ext_byte_len);
+ pmbox->ext_buf, pmbox->in_ext_byte_len);
if (mbx->mbxCommand == MBX_CONFIG_PORT)
/* copy command data into host mbox for cmpl */
@@ -9688,9 +9688,9 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox,
lpfc_sli_pcimem_bcopy(phba->mbox, mbx,
MAILBOX_CMD_SIZE);
/* Copy the mailbox extension data */
- if (pmbox->out_ext_byte_len && pmbox->ctx_buf) {
+ if (pmbox->out_ext_byte_len && pmbox->ext_buf) {
lpfc_sli_pcimem_bcopy(phba->mbox_ext,
- pmbox->ctx_buf,
+ pmbox->ext_buf,
pmbox->out_ext_byte_len);
}
} else {
@@ -9698,9 +9698,9 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox,
lpfc_memcpy_from_slim(mbx, phba->MBslimaddr,
MAILBOX_CMD_SIZE);
/* Copy the mailbox extension data */
- if (pmbox->out_ext_byte_len && pmbox->ctx_buf) {
+ if (pmbox->out_ext_byte_len && pmbox->ext_buf) {
lpfc_memcpy_from_slim(
- pmbox->ctx_buf,
+ pmbox->ext_buf,
phba->MBslimaddr +
MAILBOX_HBA_EXT_OFFSET,
pmbox->out_ext_byte_len);
@@ -11373,18 +11373,18 @@ lpfc_sli_post_recovery_event(struct lpfc_hba *phba,
unsigned long iflags;
struct lpfc_work_evt *evtp = &ndlp->recovery_evt;
+ /* Hold a node reference for outstanding queued work */
+ if (!lpfc_nlp_get(ndlp))
+ return;
+
spin_lock_irqsave(&phba->hbalock, iflags);
if (!list_empty(&evtp->evt_listp)) {
spin_unlock_irqrestore(&phba->hbalock, iflags);
+ lpfc_nlp_put(ndlp);
return;
}
- /* Incrementing the reference count until the queued work is done. */
- evtp->evt_arg1 = lpfc_nlp_get(ndlp);
- if (!evtp->evt_arg1) {
- spin_unlock_irqrestore(&phba->hbalock, iflags);
- return;
- }
+ evtp->evt_arg1 = ndlp;
evtp->evt = LPFC_EVT_RECOVER_PORT;
list_add_tail(&evtp->evt_listp, &phba->work_list);
spin_unlock_irqrestore(&phba->hbalock, iflags);
@@ -13262,9 +13262,9 @@ lpfc_sli_issue_mbox_wait(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmboxq,
/* setup wake call as IOCB callback */
pmboxq->mbox_cmpl = lpfc_sli_wake_mbox_wait;
- /* setup context3 field to pass wait_queue pointer to wake function */
+ /* setup ctx_u field to pass wait_queue pointer to wake function */
init_completion(&mbox_done);
- pmboxq->context3 = &mbox_done;
+ pmboxq->ctx_u.mbox_wait = &mbox_done;
/* now issue the command */
retval = lpfc_sli_issue_mbox(phba, pmboxq, MBX_NOWAIT);
if (retval == MBX_BUSY || retval == MBX_SUCCESS) {
@@ -13272,7 +13272,7 @@ lpfc_sli_issue_mbox_wait(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmboxq,
msecs_to_jiffies(timeout * 1000));
spin_lock_irqsave(&phba->hbalock, flag);
- pmboxq->context3 = NULL;
+ pmboxq->ctx_u.mbox_wait = NULL;
/*
* if LPFC_MBX_WAKE flag is set the mailbox is completed
* else do not free the resources.
@@ -13813,10 +13813,10 @@ lpfc_sli_sp_intr_handler(int irq, void *dev_id)
lpfc_sli_pcimem_bcopy(mbox, pmbox,
MAILBOX_CMD_SIZE);
if (pmb->out_ext_byte_len &&
- pmb->ctx_buf)
+ pmb->ext_buf)
lpfc_sli_pcimem_bcopy(
phba->mbox_ext,
- pmb->ctx_buf,
+ pmb->ext_buf,
pmb->out_ext_byte_len);
}
if (pmb->mbox_flag & LPFC_MBX_IMED_UNREG) {
@@ -13830,10 +13830,8 @@ lpfc_sli_sp_intr_handler(int irq, void *dev_id)
pmbox->un.varWords[0], 0);
if (!pmbox->mbxStatus) {
- mp = (struct lpfc_dmabuf *)
- (pmb->ctx_buf);
- ndlp = (struct lpfc_nodelist *)
- pmb->ctx_ndlp;
+ mp = pmb->ctx_buf;
+ ndlp = pmb->ctx_ndlp;
/* Reg_LOGIN of dflt RPI was
* successful. new lets get
@@ -14340,8 +14338,8 @@ lpfc_sli4_sp_handle_mbox_event(struct lpfc_hba *phba, struct lpfc_mcqe *mcqe)
mcqe_status,
pmbox->un.varWords[0], 0);
if (mcqe_status == MB_CQE_STATUS_SUCCESS) {
- mp = (struct lpfc_dmabuf *)(pmb->ctx_buf);
- ndlp = (struct lpfc_nodelist *)pmb->ctx_ndlp;
+ mp = pmb->ctx_buf;
+ ndlp = pmb->ctx_ndlp;
/* Reg_LOGIN of dflt RPI was successful. Mark the
* node as having an UNREG_LOGIN in progress to stop
@@ -19823,14 +19821,15 @@ lpfc_sli4_remove_rpis(struct lpfc_hba *phba)
* lpfc_sli4_resume_rpi - Remove the rpi bitmask region
* @ndlp: pointer to lpfc nodelist data structure.
* @cmpl: completion call-back.
- * @arg: data to load as MBox 'caller buffer information'
+ * @iocbq: data to load as mbox ctx_u information
*
* This routine is invoked to remove the memory region that
* provided rpi via a bitmask.
**/
int
lpfc_sli4_resume_rpi(struct lpfc_nodelist *ndlp,
- void (*cmpl)(struct lpfc_hba *, LPFC_MBOXQ_t *), void *arg)
+ void (*cmpl)(struct lpfc_hba *, LPFC_MBOXQ_t *),
+ struct lpfc_iocbq *iocbq)
{
LPFC_MBOXQ_t *mboxq;
struct lpfc_hba *phba = ndlp->phba;
@@ -19859,7 +19858,7 @@ lpfc_sli4_resume_rpi(struct lpfc_nodelist *ndlp,
lpfc_resume_rpi(mboxq, ndlp);
if (cmpl) {
mboxq->mbox_cmpl = cmpl;
- mboxq->ctx_buf = arg;
+ mboxq->ctx_u.save_iocb = iocbq;
} else
mboxq->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
mboxq->ctx_ndlp = ndlp;
@@ -20676,7 +20675,7 @@ lpfc_sli4_get_config_region23(struct lpfc_hba *phba, char *rgn23_data)
if (lpfc_sli4_dump_cfg_rg23(phba, mboxq))
goto out;
mqe = &mboxq->u.mqe;
- mp = (struct lpfc_dmabuf *)mboxq->ctx_buf;
+ mp = mboxq->ctx_buf;
rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL);
if (rc)
goto out;
@@ -21035,7 +21034,7 @@ lpfc_cleanup_pending_mbox(struct lpfc_vport *vport)
(mb->u.mb.mbxCommand == MBX_REG_VPI))
mb->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
if (mb->u.mb.mbxCommand == MBX_REG_LOGIN64) {
- act_mbx_ndlp = (struct lpfc_nodelist *)mb->ctx_ndlp;
+ act_mbx_ndlp = mb->ctx_ndlp;
/* This reference is local to this routine. The
* reference is removed at routine exit.
@@ -21064,7 +21063,7 @@ lpfc_cleanup_pending_mbox(struct lpfc_vport *vport)
mb->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
if (mb->u.mb.mbxCommand == MBX_REG_LOGIN64) {
- ndlp = (struct lpfc_nodelist *)mb->ctx_ndlp;
+ ndlp = mb->ctx_ndlp;
/* Unregister the RPI when mailbox complete */
mb->mbox_flag |= LPFC_MBX_IMED_UNREG;
restart_loop = 1;
@@ -21084,7 +21083,7 @@ lpfc_cleanup_pending_mbox(struct lpfc_vport *vport)
while (!list_empty(&mbox_cmd_list)) {
list_remove_head(&mbox_cmd_list, mb, LPFC_MBOXQ_t, list);
if (mb->u.mb.mbxCommand == MBX_REG_LOGIN64) {
- ndlp = (struct lpfc_nodelist *)mb->ctx_ndlp;
+ ndlp = mb->ctx_ndlp;
mb->ctx_ndlp = NULL;
if (ndlp) {
spin_lock(&ndlp->lock);
diff --git a/drivers/scsi/lpfc/lpfc_sli.h b/drivers/scsi/lpfc/lpfc_sli.h
index c911a39cb46b8c..cf7c42ec030679 100644
--- a/drivers/scsi/lpfc/lpfc_sli.h
+++ b/drivers/scsi/lpfc/lpfc_sli.h
@@ -1,7 +1,7 @@
/*******************************************************************
* This file is part of the Emulex Linux Device Driver for *
* Fibre Channel Host Bus Adapters. *
- * Copyright (C) 2017-2023 Broadcom. All Rights Reserved. The term *
+ * Copyright (C) 2017-2024 Broadcom. All Rights Reserved. The term *
* “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. *
* Copyright (C) 2004-2016 Emulex. All rights reserved. *
* EMULEX and SLI are trademarks of Emulex. *
@@ -182,11 +182,29 @@ typedef struct lpfcMboxq {
struct lpfc_mqe mqe;
} u;
struct lpfc_vport *vport; /* virtual port pointer */
- void *ctx_ndlp; /* an lpfc_nodelist pointer */
- void *ctx_buf; /* an lpfc_dmabuf pointer */
- void *context3; /* a generic pointer. Code must
- * accommodate the actual datatype.
- */
+ struct lpfc_nodelist *ctx_ndlp; /* caller ndlp pointer */
+ struct lpfc_dmabuf *ctx_buf; /* caller buffer information */
+ void *ext_buf; /* extended buffer for extended mbox
+ * cmds. Not a generic pointer.
+ * Use for storing virtual address.
+ */
+
+ /* Pointers that are seldom used during mbox execution, but require
+ * a saved context.
+ */
+ union {
+ unsigned long ox_rx_id; /* Used in els_rsp_rls_acc */
+ struct lpfc_rdp_context *rdp; /* Used in get_rdp_info */
+ struct lpfc_lcb_context *lcb; /* Used in set_beacon */
+ struct completion *mbox_wait; /* Used in issue_mbox_wait */
+ struct bsg_job_data *dd_data; /* Used in bsg_issue_mbox_cmpl
+ * and
+ * bsg_issue_mbox_ext_handle_job
+ */
+ struct lpfc_iocbq *save_iocb; /* Used in defer_plogi_acc and
+ * lpfc_mbx_cmpl_resume_rpi
+ */
+ } ctx_u;
void (*mbox_cmpl) (struct lpfc_hba *, struct lpfcMboxq *);
uint8_t mbox_flag;
diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h
index 2541a8fba093fa..c1e9ec0243bacb 100644
--- a/drivers/scsi/lpfc/lpfc_sli4.h
+++ b/drivers/scsi/lpfc/lpfc_sli4.h
@@ -1,7 +1,7 @@
/*******************************************************************
* This file is part of the Emulex Linux Device Driver for *
* Fibre Channel Host Bus Adapters. *
- * Copyright (C) 2017-2023 Broadcom. All Rights Reserved. The term *
+ * Copyright (C) 2017-2024 Broadcom. All Rights Reserved. The term *
* “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. *
* Copyright (C) 2009-2016 Emulex. All rights reserved. *
* EMULEX and SLI are trademarks of Emulex. *
@@ -1118,8 +1118,9 @@ void lpfc_sli4_free_rpi(struct lpfc_hba *, int);
void lpfc_sli4_remove_rpis(struct lpfc_hba *);
void lpfc_sli4_async_event_proc(struct lpfc_hba *);
void lpfc_sli4_fcf_redisc_event_proc(struct lpfc_hba *);
-int lpfc_sli4_resume_rpi(struct lpfc_nodelist *,
- void (*)(struct lpfc_hba *, LPFC_MBOXQ_t *), void *);
+int lpfc_sli4_resume_rpi(struct lpfc_nodelist *ndlp,
+ void (*cmpl)(struct lpfc_hba *, LPFC_MBOXQ_t *),
+ struct lpfc_iocbq *iocbq);
void lpfc_sli4_els_xri_abort_event_proc(struct lpfc_hba *phba);
void lpfc_sli4_nvme_pci_offline_aborted(struct lpfc_hba *phba,
struct lpfc_io_buf *lpfc_ncmd);
diff --git a/drivers/scsi/lpfc/lpfc_version.h b/drivers/scsi/lpfc/lpfc_version.h
index 56f5889dbaf934..915f2f11fb5585 100644
--- a/drivers/scsi/lpfc/lpfc_version.h
+++ b/drivers/scsi/lpfc/lpfc_version.h
@@ -20,7 +20,7 @@
* included with this package. *
*******************************************************************/
-#define LPFC_DRIVER_VERSION "14.4.0.0"
+#define LPFC_DRIVER_VERSION "14.4.0.1"
#define LPFC_DRIVER_NAME "lpfc"
/* Used for SLI 2/3 */
diff --git a/drivers/scsi/lpfc/lpfc_vport.c b/drivers/scsi/lpfc/lpfc_vport.c
index 0f79840b949861..4439167a51882d 100644
--- a/drivers/scsi/lpfc/lpfc_vport.c
+++ b/drivers/scsi/lpfc/lpfc_vport.c
@@ -166,7 +166,7 @@ lpfc_vport_sparm(struct lpfc_hba *phba, struct lpfc_vport *vport)
}
}
- mp = (struct lpfc_dmabuf *)pmb->ctx_buf;
+ mp = pmb->ctx_buf;
memcpy(&vport->fc_sparam, mp->virt, sizeof (struct serv_parm));
memcpy(&vport->fc_nodename, &vport->fc_sparam.nodeName,
sizeof (struct lpfc_name));
@@ -674,10 +674,6 @@ lpfc_vport_delete(struct fc_vport *fc_vport)
lpfc_free_sysfs_attr(vport);
lpfc_debugfs_terminate(vport);
- /* Remove FC host to break driver binding. */
- fc_remove_host(shost);
- scsi_remove_host(shost);
-
/* Send the DA_ID and Fabric LOGO to cleanup Nameserver entries. */
ndlp = lpfc_findnode_did(vport, Fabric_DID);
if (!ndlp)
@@ -721,6 +717,10 @@ lpfc_vport_delete(struct fc_vport *fc_vport)
skip_logo:
+ /* Remove FC host to break driver binding. */
+ fc_remove_host(shost);
+ scsi_remove_host(shost);
+
lpfc_cleanup(vport);
/* Remove scsi host now. The nodes are cleaned up. */
diff --git a/drivers/scsi/mpi3mr/mpi3mr_app.c b/drivers/scsi/mpi3mr/mpi3mr_app.c
index 0380996b5ad27a..55d590b919476e 100644
--- a/drivers/scsi/mpi3mr/mpi3mr_app.c
+++ b/drivers/scsi/mpi3mr/mpi3mr_app.c
@@ -1644,7 +1644,7 @@ static long mpi3mr_bsg_process_mpt_cmds(struct bsg_job *job)
if ((mpirep_offset != 0xFF) &&
drv_bufs[mpirep_offset].bsg_buf_len) {
drv_buf_iter = &drv_bufs[mpirep_offset];
- drv_buf_iter->kern_buf_len = (sizeof(*bsg_reply_buf) - 1 +
+ drv_buf_iter->kern_buf_len = (sizeof(*bsg_reply_buf) +
mrioc->reply_sz);
bsg_reply_buf = kzalloc(drv_buf_iter->kern_buf_len, GFP_KERNEL);
diff --git a/drivers/scsi/mpi3mr/mpi3mr_transport.c b/drivers/scsi/mpi3mr/mpi3mr_transport.c
index d32ad46318cb09..dabb91f0f75df9 100644
--- a/drivers/scsi/mpi3mr/mpi3mr_transport.c
+++ b/drivers/scsi/mpi3mr/mpi3mr_transport.c
@@ -7,6 +7,8 @@
*
*/
+#include <linux/vmalloc.h>
+
#include "mpi3mr.h"
/**
diff --git a/drivers/scsi/myrb.c b/drivers/scsi/myrb.c
index ca2e932dd9b701..f684eb5e04898a 100644
--- a/drivers/scsi/myrb.c
+++ b/drivers/scsi/myrb.c
@@ -1775,9 +1775,9 @@ static ssize_t raid_state_show(struct device *dev,
name = myrb_devstate_name(ldev_info->state);
if (name)
- ret = snprintf(buf, 32, "%s\n", name);
+ ret = snprintf(buf, 64, "%s\n", name);
else
- ret = snprintf(buf, 32, "Invalid (%02X)\n",
+ ret = snprintf(buf, 64, "Invalid (%02X)\n",
ldev_info->state);
} else {
struct myrb_pdev_state *pdev_info = sdev->hostdata;
@@ -1796,9 +1796,9 @@ static ssize_t raid_state_show(struct device *dev,
else
name = myrb_devstate_name(pdev_info->state);
if (name)
- ret = snprintf(buf, 32, "%s\n", name);
+ ret = snprintf(buf, 64, "%s\n", name);
else
- ret = snprintf(buf, 32, "Invalid (%02X)\n",
+ ret = snprintf(buf, 64, "Invalid (%02X)\n",
pdev_info->state);
}
return ret;
@@ -1886,11 +1886,11 @@ static ssize_t raid_level_show(struct device *dev,
name = myrb_raidlevel_name(ldev_info->raid_level);
if (!name)
- return snprintf(buf, 32, "Invalid (%02X)\n",
+ return snprintf(buf, 64, "Invalid (%02X)\n",
ldev_info->state);
- return snprintf(buf, 32, "%s\n", name);
+ return snprintf(buf, 64, "%s\n", name);
}
- return snprintf(buf, 32, "Physical Drive\n");
+ return snprintf(buf, 64, "Physical Drive\n");
}
static DEVICE_ATTR_RO(raid_level);
@@ -1903,15 +1903,15 @@ static ssize_t rebuild_show(struct device *dev,
unsigned char status;
if (sdev->channel < myrb_logical_channel(sdev->host))
- return snprintf(buf, 32, "physical device - not rebuilding\n");
+ return snprintf(buf, 64, "physical device - not rebuilding\n");
status = myrb_get_rbld_progress(cb, &rbld_buf);
if (rbld_buf.ldev_num != sdev->id ||
status != MYRB_STATUS_SUCCESS)
- return snprintf(buf, 32, "not rebuilding\n");
+ return snprintf(buf, 64, "not rebuilding\n");
- return snprintf(buf, 32, "rebuilding block %u of %u\n",
+ return snprintf(buf, 64, "rebuilding block %u of %u\n",
rbld_buf.ldev_size - rbld_buf.blocks_left,
rbld_buf.ldev_size);
}
diff --git a/drivers/scsi/myrs.c b/drivers/scsi/myrs.c
index a1eec65a9713f5..e824be9d9bbb94 100644
--- a/drivers/scsi/myrs.c
+++ b/drivers/scsi/myrs.c
@@ -947,9 +947,9 @@ static ssize_t raid_state_show(struct device *dev,
name = myrs_devstate_name(ldev_info->dev_state);
if (name)
- ret = snprintf(buf, 32, "%s\n", name);
+ ret = snprintf(buf, 64, "%s\n", name);
else
- ret = snprintf(buf, 32, "Invalid (%02X)\n",
+ ret = snprintf(buf, 64, "Invalid (%02X)\n",
ldev_info->dev_state);
} else {
struct myrs_pdev_info *pdev_info;
@@ -958,9 +958,9 @@ static ssize_t raid_state_show(struct device *dev,
pdev_info = sdev->hostdata;
name = myrs_devstate_name(pdev_info->dev_state);
if (name)
- ret = snprintf(buf, 32, "%s\n", name);
+ ret = snprintf(buf, 64, "%s\n", name);
else
- ret = snprintf(buf, 32, "Invalid (%02X)\n",
+ ret = snprintf(buf, 64, "Invalid (%02X)\n",
pdev_info->dev_state);
}
return ret;
@@ -1066,13 +1066,13 @@ static ssize_t raid_level_show(struct device *dev,
ldev_info = sdev->hostdata;
name = myrs_raid_level_name(ldev_info->raid_level);
if (!name)
- return snprintf(buf, 32, "Invalid (%02X)\n",
+ return snprintf(buf, 64, "Invalid (%02X)\n",
ldev_info->dev_state);
} else
name = myrs_raid_level_name(MYRS_RAID_PHYSICAL);
- return snprintf(buf, 32, "%s\n", name);
+ return snprintf(buf, 64, "%s\n", name);
}
static DEVICE_ATTR_RO(raid_level);
@@ -1086,7 +1086,7 @@ static ssize_t rebuild_show(struct device *dev,
unsigned char status;
if (sdev->channel < cs->ctlr_info->physchan_present)
- return snprintf(buf, 32, "physical device - not rebuilding\n");
+ return snprintf(buf, 64, "physical device - not rebuilding\n");
ldev_info = sdev->hostdata;
ldev_num = ldev_info->ldev_num;
@@ -1098,11 +1098,11 @@ static ssize_t rebuild_show(struct device *dev,
return -EIO;
}
if (ldev_info->rbld_active) {
- return snprintf(buf, 32, "rebuilding block %zu of %zu\n",
+ return snprintf(buf, 64, "rebuilding block %zu of %zu\n",
(size_t)ldev_info->rbld_lba,
(size_t)ldev_info->cfg_devsize);
} else
- return snprintf(buf, 32, "not rebuilding\n");
+ return snprintf(buf, 64, "not rebuilding\n");
}
static ssize_t rebuild_store(struct device *dev,
@@ -1190,7 +1190,7 @@ static ssize_t consistency_check_show(struct device *dev,
unsigned short ldev_num;
if (sdev->channel < cs->ctlr_info->physchan_present)
- return snprintf(buf, 32, "physical device - not checking\n");
+ return snprintf(buf, 64, "physical device - not checking\n");
ldev_info = sdev->hostdata;
if (!ldev_info)
@@ -1198,11 +1198,11 @@ static ssize_t consistency_check_show(struct device *dev,
ldev_num = ldev_info->ldev_num;
myrs_get_ldev_info(cs, ldev_num, ldev_info);
if (ldev_info->cc_active)
- return snprintf(buf, 32, "checking block %zu of %zu\n",
+ return snprintf(buf, 64, "checking block %zu of %zu\n",
(size_t)ldev_info->cc_lba,
(size_t)ldev_info->cfg_devsize);
else
- return snprintf(buf, 32, "not checking\n");
+ return snprintf(buf, 64, "not checking\n");
}
static ssize_t consistency_check_store(struct device *dev,
diff --git a/drivers/scsi/pmcraid.c b/drivers/scsi/pmcraid.c
index e8bcc3a88732a1..0614b7e366b776 100644
--- a/drivers/scsi/pmcraid.c
+++ b/drivers/scsi/pmcraid.c
@@ -61,7 +61,9 @@ static atomic_t pmcraid_adapter_count = ATOMIC_INIT(0);
* pmcraid_minor - minor number(s) to use
*/
static unsigned int pmcraid_major;
-static struct class *pmcraid_class;
+static const struct class pmcraid_class = {
+ .name = PMCRAID_DEVFILE,
+};
static DECLARE_BITMAP(pmcraid_minor, PMCRAID_MAX_ADAPTERS);
/*
@@ -4723,7 +4725,7 @@ static int pmcraid_setup_chrdev(struct pmcraid_instance *pinstance)
if (error)
pmcraid_release_minor(minor);
else
- device_create(pmcraid_class, NULL, MKDEV(pmcraid_major, minor),
+ device_create(&pmcraid_class, NULL, MKDEV(pmcraid_major, minor),
NULL, "%s%u", PMCRAID_DEVFILE, minor);
return error;
}
@@ -4739,7 +4741,7 @@ static int pmcraid_setup_chrdev(struct pmcraid_instance *pinstance)
static void pmcraid_release_chrdev(struct pmcraid_instance *pinstance)
{
pmcraid_release_minor(MINOR(pinstance->cdev.dev));
- device_destroy(pmcraid_class,
+ device_destroy(&pmcraid_class,
MKDEV(pmcraid_major, MINOR(pinstance->cdev.dev)));
cdev_del(&pinstance->cdev);
}
@@ -5390,10 +5392,10 @@ static int __init pmcraid_init(void)
}
pmcraid_major = MAJOR(dev);
- pmcraid_class = class_create(PMCRAID_DEVFILE);
- if (IS_ERR(pmcraid_class)) {
- error = PTR_ERR(pmcraid_class);
+ error = class_register(&pmcraid_class);
+
+ if (error) {
pmcraid_err("failed to register with sysfs, error = %x\n",
error);
goto out_unreg_chrdev;
@@ -5402,7 +5404,7 @@ static int __init pmcraid_init(void)
error = pmcraid_netlink_init();
if (error) {
- class_destroy(pmcraid_class);
+ class_unregister(&pmcraid_class);
goto out_unreg_chrdev;
}
@@ -5413,7 +5415,7 @@ static int __init pmcraid_init(void)
pmcraid_err("failed to register pmcraid driver, error = %x\n",
error);
- class_destroy(pmcraid_class);
+ class_unregister(&pmcraid_class);
pmcraid_netlink_release();
out_unreg_chrdev:
@@ -5432,7 +5434,7 @@ static void __exit pmcraid_exit(void)
unregister_chrdev_region(MKDEV(pmcraid_major, 0),
PMCRAID_MAX_ADAPTERS);
pci_unregister_driver(&pmcraid_driver);
- class_destroy(pmcraid_class);
+ class_unregister(&pmcraid_class);
}
module_init(pmcraid_init);
diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c
index 44449c70a375f3..76eeba435fd046 100644
--- a/drivers/scsi/qla2xxx/qla_attr.c
+++ b/drivers/scsi/qla2xxx/qla_attr.c
@@ -2741,7 +2741,13 @@ qla2x00_dev_loss_tmo_callbk(struct fc_rport *rport)
return;
if (unlikely(pci_channel_offline(fcport->vha->hw->pdev))) {
- qla2x00_abort_all_cmds(fcport->vha, DID_NO_CONNECT << 16);
+ /* Will wait for wind down of adapter */
+ ql_dbg(ql_dbg_aer, fcport->vha, 0x900c,
+ "%s pci offline detected (id %06x)\n", __func__,
+ fcport->d_id.b24);
+ qla_pci_set_eeh_busy(fcport->vha);
+ qla2x00_eh_wait_for_pending_commands(fcport->vha, fcport->d_id.b24,
+ 0, WAIT_TARGET);
return;
}
}
@@ -2763,7 +2769,11 @@ qla2x00_terminate_rport_io(struct fc_rport *rport)
vha = fcport->vha;
if (unlikely(pci_channel_offline(fcport->vha->hw->pdev))) {
- qla2x00_abort_all_cmds(fcport->vha, DID_NO_CONNECT << 16);
+ /* Will wait for wind down of adapter */
+ ql_dbg(ql_dbg_aer, fcport->vha, 0x900b,
+ "%s pci offline detected (id %06x)\n", __func__,
+ fcport->d_id.b24);
+ qla_pci_set_eeh_busy(vha);
qla2x00_eh_wait_for_pending_commands(fcport->vha, fcport->d_id.b24,
0, WAIT_TARGET);
return;
diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index deb642607deb6f..2f49baf131e26f 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -82,7 +82,7 @@ typedef union {
#include "qla_nvme.h"
#define QLA2XXX_DRIVER_NAME "qla2xxx"
#define QLA2XXX_APIDEV "ql2xapidev"
-#define QLA2XXX_MANUFACTURER "Marvell Semiconductor, Inc."
+#define QLA2XXX_MANUFACTURER "Marvell"
/*
* We have MAILBOX_REGISTER_COUNT sized arrays in a few places,
diff --git a/drivers/scsi/qla2xxx/qla_edif.c b/drivers/scsi/qla2xxx/qla_edif.c
index 26e6b3e3af4317..dcde55c8ee5dea 100644
--- a/drivers/scsi/qla2xxx/qla_edif.c
+++ b/drivers/scsi/qla2xxx/qla_edif.c
@@ -1100,7 +1100,7 @@ qla_edif_app_getstats(scsi_qla_host_t *vha, struct bsg_job *bsg_job)
list_for_each_entry_safe(fcport, tf, &vha->vp_fcports, list) {
if (fcport->edif.enable) {
- if (pcnt > app_req.num_ports)
+ if (pcnt >= app_req.num_ports)
break;
app_reply->elem[pcnt].rekey_count =
diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h
index 09cb9413670a5e..7309310d2ab943 100644
--- a/drivers/scsi/qla2xxx/qla_gbl.h
+++ b/drivers/scsi/qla2xxx/qla_gbl.h
@@ -44,7 +44,7 @@ extern int qla2x00_fabric_login(scsi_qla_host_t *, fc_port_t *, uint16_t *);
extern int qla2x00_local_device_login(scsi_qla_host_t *, fc_port_t *);
extern int qla24xx_els_dcmd_iocb(scsi_qla_host_t *, int, port_id_t);
-extern int qla24xx_els_dcmd2_iocb(scsi_qla_host_t *, int, fc_port_t *, bool);
+extern int qla24xx_els_dcmd2_iocb(scsi_qla_host_t *, int, fc_port_t *);
extern void qla2x00_els_dcmd2_free(scsi_qla_host_t *vha,
struct els_plogi *els_plogi);
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index a314cfc5b263f2..8377624d76c98e 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -1193,8 +1193,12 @@ int qla24xx_async_gnl(struct scsi_qla_host *vha, fc_port_t *fcport)
return rval;
done_free_sp:
- /* ref: INIT */
- kref_put(&sp->cmd_kref, qla2x00_sp_release);
+ /*
+ * use qla24xx_async_gnl_sp_done to purge all pending gnl request.
+ * kref_put is call behind the scene.
+ */
+ sp->u.iocb_cmd.u.mbx.in_mb[0] = MBS_COMMAND_ERROR;
+ qla24xx_async_gnl_sp_done(sp, QLA_COMMAND_ERROR);
fcport->flags &= ~(FCF_ASYNC_SENT);
done:
fcport->flags &= ~(FCF_ASYNC_ACTIVE);
@@ -2665,6 +2669,40 @@ exit:
return rval;
}
+static void qla_enable_fce_trace(scsi_qla_host_t *vha)
+{
+ int rval;
+ struct qla_hw_data *ha = vha->hw;
+
+ if (ha->fce) {
+ ha->flags.fce_enabled = 1;
+ memset(ha->fce, 0, fce_calc_size(ha->fce_bufs));
+ rval = qla2x00_enable_fce_trace(vha,
+ ha->fce_dma, ha->fce_bufs, ha->fce_mb, &ha->fce_bufs);
+
+ if (rval) {
+ ql_log(ql_log_warn, vha, 0x8033,
+ "Unable to reinitialize FCE (%d).\n", rval);
+ ha->flags.fce_enabled = 0;
+ }
+ }
+}
+
+static void qla_enable_eft_trace(scsi_qla_host_t *vha)
+{
+ int rval;
+ struct qla_hw_data *ha = vha->hw;
+
+ if (ha->eft) {
+ memset(ha->eft, 0, EFT_SIZE);
+ rval = qla2x00_enable_eft_trace(vha, ha->eft_dma, EFT_NUM_BUFFERS);
+
+ if (rval) {
+ ql_log(ql_log_warn, vha, 0x8034,
+ "Unable to reinitialize EFT (%d).\n", rval);
+ }
+ }
+}
/*
* qla2x00_initialize_adapter
* Initialize board.
@@ -3668,9 +3706,8 @@ qla24xx_chip_diag(scsi_qla_host_t *vha)
}
static void
-qla2x00_init_fce_trace(scsi_qla_host_t *vha)
+qla2x00_alloc_fce_trace(scsi_qla_host_t *vha)
{
- int rval;
dma_addr_t tc_dma;
void *tc;
struct qla_hw_data *ha = vha->hw;
@@ -3699,27 +3736,17 @@ qla2x00_init_fce_trace(scsi_qla_host_t *vha)
return;
}
- rval = qla2x00_enable_fce_trace(vha, tc_dma, FCE_NUM_BUFFERS,
- ha->fce_mb, &ha->fce_bufs);
- if (rval) {
- ql_log(ql_log_warn, vha, 0x00bf,
- "Unable to initialize FCE (%d).\n", rval);
- dma_free_coherent(&ha->pdev->dev, FCE_SIZE, tc, tc_dma);
- return;
- }
-
ql_dbg(ql_dbg_init, vha, 0x00c0,
"Allocated (%d KB) for FCE...\n", FCE_SIZE / 1024);
- ha->flags.fce_enabled = 1;
ha->fce_dma = tc_dma;
ha->fce = tc;
+ ha->fce_bufs = FCE_NUM_BUFFERS;
}
static void
-qla2x00_init_eft_trace(scsi_qla_host_t *vha)
+qla2x00_alloc_eft_trace(scsi_qla_host_t *vha)
{
- int rval;
dma_addr_t tc_dma;
void *tc;
struct qla_hw_data *ha = vha->hw;
@@ -3744,14 +3771,6 @@ qla2x00_init_eft_trace(scsi_qla_host_t *vha)
return;
}
- rval = qla2x00_enable_eft_trace(vha, tc_dma, EFT_NUM_BUFFERS);
- if (rval) {
- ql_log(ql_log_warn, vha, 0x00c2,
- "Unable to initialize EFT (%d).\n", rval);
- dma_free_coherent(&ha->pdev->dev, EFT_SIZE, tc, tc_dma);
- return;
- }
-
ql_dbg(ql_dbg_init, vha, 0x00c3,
"Allocated (%d KB) EFT ...\n", EFT_SIZE / 1024);
@@ -3759,13 +3778,6 @@ qla2x00_init_eft_trace(scsi_qla_host_t *vha)
ha->eft = tc;
}
-static void
-qla2x00_alloc_offload_mem(scsi_qla_host_t *vha)
-{
- qla2x00_init_fce_trace(vha);
- qla2x00_init_eft_trace(vha);
-}
-
void
qla2x00_alloc_fw_dump(scsi_qla_host_t *vha)
{
@@ -3820,10 +3832,10 @@ qla2x00_alloc_fw_dump(scsi_qla_host_t *vha)
if (ha->tgt.atio_ring)
mq_size += ha->tgt.atio_q_length * sizeof(request_t);
- qla2x00_init_fce_trace(vha);
+ qla2x00_alloc_fce_trace(vha);
if (ha->fce)
fce_size = sizeof(struct qla2xxx_fce_chain) + FCE_SIZE;
- qla2x00_init_eft_trace(vha);
+ qla2x00_alloc_eft_trace(vha);
if (ha->eft)
eft_size = EFT_SIZE;
}
@@ -4253,7 +4265,6 @@ qla2x00_setup_chip(scsi_qla_host_t *vha)
struct qla_hw_data *ha = vha->hw;
struct device_reg_2xxx __iomem *reg = &ha->iobase->isp;
unsigned long flags;
- uint16_t fw_major_version;
int done_once = 0;
if (IS_P3P_TYPE(ha)) {
@@ -4320,7 +4331,6 @@ execute_fw_with_lr:
goto failed;
enable_82xx_npiv:
- fw_major_version = ha->fw_major_version;
if (IS_P3P_TYPE(ha))
qla82xx_check_md_needed(vha);
else
@@ -4349,12 +4359,11 @@ enable_82xx_npiv:
if (rval != QLA_SUCCESS)
goto failed;
- if (!fw_major_version && !(IS_P3P_TYPE(ha)))
- qla2x00_alloc_offload_mem(vha);
-
if (ql2xallocfwdump && !(IS_P3P_TYPE(ha)))
qla2x00_alloc_fw_dump(vha);
+ qla_enable_fce_trace(vha);
+ qla_enable_eft_trace(vha);
} else {
goto failed;
}
@@ -7487,12 +7496,12 @@ qla2x00_abort_isp_cleanup(scsi_qla_host_t *vha)
int
qla2x00_abort_isp(scsi_qla_host_t *vha)
{
- int rval;
uint8_t status = 0;
struct qla_hw_data *ha = vha->hw;
struct scsi_qla_host *vp, *tvp;
struct req_que *req = ha->req_q_map[0];
unsigned long flags;
+ fc_port_t *fcport;
if (vha->flags.online) {
qla2x00_abort_isp_cleanup(vha);
@@ -7561,6 +7570,15 @@ qla2x00_abort_isp(scsi_qla_host_t *vha)
"ISP Abort - ISP reg disconnect post nvmram config, exiting.\n");
return status;
}
+
+ /* User may have updated [fcp|nvme] prefer in flash */
+ list_for_each_entry(fcport, &vha->vp_fcports, list) {
+ if (NVME_PRIORITY(ha, fcport))
+ fcport->do_prli_nvme = 1;
+ else
+ fcport->do_prli_nvme = 0;
+ }
+
if (!qla2x00_restart_isp(vha)) {
clear_bit(RESET_MARKER_NEEDED, &vha->dpc_flags);
@@ -7581,31 +7599,7 @@ qla2x00_abort_isp(scsi_qla_host_t *vha)
if (IS_QLA81XX(ha) || IS_QLA8031(ha))
qla2x00_get_fw_version(vha);
- if (ha->fce) {
- ha->flags.fce_enabled = 1;
- memset(ha->fce, 0,
- fce_calc_size(ha->fce_bufs));
- rval = qla2x00_enable_fce_trace(vha,
- ha->fce_dma, ha->fce_bufs, ha->fce_mb,
- &ha->fce_bufs);
- if (rval) {
- ql_log(ql_log_warn, vha, 0x8033,
- "Unable to reinitialize FCE "
- "(%d).\n", rval);
- ha->flags.fce_enabled = 0;
- }
- }
- if (ha->eft) {
- memset(ha->eft, 0, EFT_SIZE);
- rval = qla2x00_enable_eft_trace(vha,
- ha->eft_dma, EFT_NUM_BUFFERS);
- if (rval) {
- ql_log(ql_log_warn, vha, 0x8034,
- "Unable to reinitialize EFT "
- "(%d).\n", rval);
- }
- }
} else { /* failed the ISP abort */
vha->flags.online = 1;
if (test_bit(ISP_ABORT_RETRY, &vha->dpc_flags)) {
@@ -7655,6 +7649,14 @@ qla2x00_abort_isp(scsi_qla_host_t *vha)
atomic_inc(&vp->vref_count);
spin_unlock_irqrestore(&ha->vport_slock, flags);
+ /* User may have updated [fcp|nvme] prefer in flash */
+ list_for_each_entry(fcport, &vp->vp_fcports, list) {
+ if (NVME_PRIORITY(ha, fcport))
+ fcport->do_prli_nvme = 1;
+ else
+ fcport->do_prli_nvme = 0;
+ }
+
qla2x00_vp_abort_isp(vp);
spin_lock_irqsave(&ha->vport_slock, flags);
diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c
index df90169f82440a..0b41e8a0660262 100644
--- a/drivers/scsi/qla2xxx/qla_iocb.c
+++ b/drivers/scsi/qla2xxx/qla_iocb.c
@@ -2587,6 +2587,33 @@ void
qla2x00_sp_release(struct kref *kref)
{
struct srb *sp = container_of(kref, struct srb, cmd_kref);
+ struct scsi_qla_host *vha = sp->vha;
+
+ switch (sp->type) {
+ case SRB_CT_PTHRU_CMD:
+ /* GPSC & GFPNID use fcport->ct_desc.ct_sns for both req & rsp */
+ if (sp->u.iocb_cmd.u.ctarg.req &&
+ (!sp->fcport ||
+ sp->u.iocb_cmd.u.ctarg.req != sp->fcport->ct_desc.ct_sns)) {
+ dma_free_coherent(&vha->hw->pdev->dev,
+ sp->u.iocb_cmd.u.ctarg.req_allocated_size,
+ sp->u.iocb_cmd.u.ctarg.req,
+ sp->u.iocb_cmd.u.ctarg.req_dma);
+ sp->u.iocb_cmd.u.ctarg.req = NULL;
+ }
+ if (sp->u.iocb_cmd.u.ctarg.rsp &&
+ (!sp->fcport ||
+ sp->u.iocb_cmd.u.ctarg.rsp != sp->fcport->ct_desc.ct_sns)) {
+ dma_free_coherent(&vha->hw->pdev->dev,
+ sp->u.iocb_cmd.u.ctarg.rsp_allocated_size,
+ sp->u.iocb_cmd.u.ctarg.rsp,
+ sp->u.iocb_cmd.u.ctarg.rsp_dma);
+ sp->u.iocb_cmd.u.ctarg.rsp = NULL;
+ }
+ break;
+ default:
+ break;
+ }
sp->free(sp);
}
@@ -2610,7 +2637,8 @@ static void qla2x00_els_dcmd_sp_free(srb_t *sp)
{
struct srb_iocb *elsio = &sp->u.iocb_cmd;
- kfree(sp->fcport);
+ if (sp->fcport)
+ qla2x00_free_fcport(sp->fcport);
if (elsio->u.els_logo.els_logo_pyld)
dma_free_coherent(&sp->vha->hw->pdev->dev, DMA_POOL_SIZE,
@@ -2692,7 +2720,7 @@ qla24xx_els_dcmd_iocb(scsi_qla_host_t *vha, int els_opcode,
*/
sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL);
if (!sp) {
- kfree(fcport);
+ qla2x00_free_fcport(fcport);
ql_log(ql_log_info, vha, 0x70e6,
"SRB allocation failed\n");
return -ENOMEM;
@@ -2723,6 +2751,7 @@ qla24xx_els_dcmd_iocb(scsi_qla_host_t *vha, int els_opcode,
if (!elsio->u.els_logo.els_logo_pyld) {
/* ref: INIT */
kref_put(&sp->cmd_kref, qla2x00_sp_release);
+ qla2x00_free_fcport(fcport);
return QLA_FUNCTION_FAILED;
}
@@ -2747,6 +2776,7 @@ qla24xx_els_dcmd_iocb(scsi_qla_host_t *vha, int els_opcode,
if (rval != QLA_SUCCESS) {
/* ref: INIT */
kref_put(&sp->cmd_kref, qla2x00_sp_release);
+ qla2x00_free_fcport(fcport);
return QLA_FUNCTION_FAILED;
}
@@ -3012,7 +3042,7 @@ static void qla2x00_els_dcmd2_sp_done(srb_t *sp, int res)
int
qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode,
- fc_port_t *fcport, bool wait)
+ fc_port_t *fcport)
{
srb_t *sp;
struct srb_iocb *elsio = NULL;
@@ -3027,8 +3057,7 @@ qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode,
if (!sp) {
ql_log(ql_log_info, vha, 0x70e6,
"SRB allocation failed\n");
- fcport->flags &= ~FCF_ASYNC_ACTIVE;
- return -ENOMEM;
+ goto done;
}
fcport->flags |= FCF_ASYNC_SENT;
@@ -3037,9 +3066,6 @@ qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode,
ql_dbg(ql_dbg_io, vha, 0x3073,
"%s Enter: PLOGI portid=%06x\n", __func__, fcport->d_id.b24);
- if (wait)
- sp->flags = SRB_WAKEUP_ON_COMP;
-
sp->type = SRB_ELS_DCMD;
sp->name = "ELS_DCMD";
sp->fcport = fcport;
@@ -3055,7 +3081,7 @@ qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode,
if (!elsio->u.els_plogi.els_plogi_pyld) {
rval = QLA_FUNCTION_FAILED;
- goto out;
+ goto done_free_sp;
}
resp_ptr = elsio->u.els_plogi.els_resp_pyld =
@@ -3064,7 +3090,7 @@ qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode,
if (!elsio->u.els_plogi.els_resp_pyld) {
rval = QLA_FUNCTION_FAILED;
- goto out;
+ goto done_free_sp;
}
ql_dbg(ql_dbg_io, vha, 0x3073, "PLOGI %p %p\n", ptr, resp_ptr);
@@ -3080,7 +3106,6 @@ qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode,
if (els_opcode == ELS_DCMD_PLOGI && DBELL_ACTIVE(vha)) {
struct fc_els_flogi *p = ptr;
-
p->fl_csp.sp_features |= cpu_to_be16(FC_SP_FT_SEC);
}
@@ -3089,10 +3114,11 @@ qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode,
(uint8_t *)elsio->u.els_plogi.els_plogi_pyld,
sizeof(*elsio->u.els_plogi.els_plogi_pyld));
- init_completion(&elsio->u.els_plogi.comp);
rval = qla2x00_start_sp(sp);
if (rval != QLA_SUCCESS) {
- rval = QLA_FUNCTION_FAILED;
+ fcport->flags |= FCF_LOGIN_NEEDED;
+ set_bit(RELOGIN_NEEDED, &vha->dpc_flags);
+ goto done_free_sp;
} else {
ql_dbg(ql_dbg_disc, vha, 0x3074,
"%s PLOGI sent, hdl=%x, loopid=%x, to port_id %06x from port_id %06x\n",
@@ -3100,21 +3126,15 @@ qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode,
fcport->d_id.b24, vha->d_id.b24);
}
- if (wait) {
- wait_for_completion(&elsio->u.els_plogi.comp);
-
- if (elsio->u.els_plogi.comp_status != CS_COMPLETE)
- rval = QLA_FUNCTION_FAILED;
- } else {
- goto done;
- }
+ return rval;
-out:
- fcport->flags &= ~(FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE);
+done_free_sp:
qla2x00_els_dcmd2_free(vha, &elsio->u.els_plogi);
/* ref: INIT */
kref_put(&sp->cmd_kref, qla2x00_sp_release);
done:
+ fcport->flags &= ~(FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE);
+ qla2x00_set_fcport_disc_state(fcport, DSC_DELETED);
return rval;
}
@@ -3918,7 +3938,7 @@ qla2x00_start_sp(srb_t *sp)
return -EAGAIN;
}
- pkt = __qla2x00_alloc_iocbs(sp->qpair, sp);
+ pkt = qla2x00_alloc_iocbs_ready(sp->qpair, sp);
if (!pkt) {
rval = -EAGAIN;
ql_log(ql_log_warn, vha, 0x700c,
diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c
index 21ec32b4fb2809..0cd6f3e1488249 100644
--- a/drivers/scsi/qla2xxx/qla_mbx.c
+++ b/drivers/scsi/qla2xxx/qla_mbx.c
@@ -194,7 +194,7 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp)
if (ha->flags.purge_mbox || chip_reset != ha->chip_reset ||
ha->flags.eeh_busy) {
ql_log(ql_log_warn, vha, 0xd035,
- "Error detected: purge[%d] eeh[%d] cmd=0x%x, Exiting.\n",
+ "Purge mbox: purge[%d] eeh[%d] cmd=0x%x, Exiting.\n",
ha->flags.purge_mbox, ha->flags.eeh_busy, mcp->mb[0]);
rval = QLA_ABORTED;
goto premature_exit;
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index dd674378f2f392..1e2f52210f6050 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -4602,6 +4602,7 @@ fail_free_init_cb:
ha->init_cb_dma = 0;
fail_free_vp_map:
kfree(ha->vp_map);
+ ha->vp_map = NULL;
fail:
ql_log(ql_log_fatal, NULL, 0x0030,
"Memory allocation failure.\n");
@@ -5583,7 +5584,7 @@ qla2x00_do_work(struct scsi_qla_host *vha)
break;
case QLA_EVT_ELS_PLOGI:
qla24xx_els_dcmd2_iocb(vha, ELS_DCMD_PLOGI,
- e->u.fcport.fcport, false);
+ e->u.fcport.fcport);
break;
case QLA_EVT_SA_REPLACE:
rc = qla24xx_issue_sa_replace_iocb(vha, e);
diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index 2ef2dbac0db273..d7551b1443e4a7 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -1062,6 +1062,16 @@ void qlt_free_session_done(struct work_struct *work)
"%s: sess %p logout completed\n", __func__, sess);
}
+ /* check for any straggling io left behind */
+ if (!(sess->flags & FCF_FCP2_DEVICE) &&
+ qla2x00_eh_wait_for_pending_commands(sess->vha, sess->d_id.b24, 0, WAIT_TARGET)) {
+ ql_log(ql_log_warn, vha, 0x3027,
+ "IO not return. Resetting.\n");
+ set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags);
+ qla2xxx_wake_dpc(vha);
+ qla2x00_wait_for_chip_reset(vha);
+ }
+
if (sess->logo_ack_needed) {
sess->logo_ack_needed = 0;
qla24xx_async_notify_ack(vha, sess,
diff --git a/drivers/scsi/qla2xxx/qla_version.h b/drivers/scsi/qla2xxx/qla_version.h
index d903563e969eb3..7627fd807bc3ed 100644
--- a/drivers/scsi/qla2xxx/qla_version.h
+++ b/drivers/scsi/qla2xxx/qla_version.h
@@ -6,9 +6,9 @@
/*
* Driver version
*/
-#define QLA2XXX_VERSION "10.02.09.100-k"
+#define QLA2XXX_VERSION "10.02.09.200-k"
#define QLA_DRIVER_MAJOR_VER 10
#define QLA_DRIVER_MINOR_VER 2
#define QLA_DRIVER_PATCH_VER 9
-#define QLA_DRIVER_BETA_VER 100
+#define QLA_DRIVER_BETA_VER 200
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 2e28e2360c8574..5b3230ef51fe61 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -635,10 +635,9 @@ static bool scsi_end_request(struct request *req, blk_status_t error,
if (blk_queue_add_random(q))
add_disk_randomness(req->q->disk);
- if (!blk_rq_is_passthrough(req)) {
- WARN_ON_ONCE(!(cmd->flags & SCMD_INITIALIZED));
- cmd->flags &= ~SCMD_INITIALIZED;
- }
+ WARN_ON_ONCE(!blk_rq_is_passthrough(req) &&
+ !(cmd->flags & SCMD_INITIALIZED));
+ cmd->flags = 0;
/*
* Calling rcu_barrier() is not necessary here because the
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 8d06475de17a33..ffd7e7e72933c5 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -1642,6 +1642,40 @@ int scsi_add_device(struct Scsi_Host *host, uint channel,
}
EXPORT_SYMBOL(scsi_add_device);
+int scsi_resume_device(struct scsi_device *sdev)
+{
+ struct device *dev = &sdev->sdev_gendev;
+ int ret = 0;
+
+ device_lock(dev);
+
+ /*
+ * Bail out if the device or its queue are not running. Otherwise,
+ * the rescan may block waiting for commands to be executed, with us
+ * holding the device lock. This can result in a potential deadlock
+ * in the power management core code when system resume is on-going.
+ */
+ if (sdev->sdev_state != SDEV_RUNNING ||
+ blk_queue_pm_only(sdev->request_queue)) {
+ ret = -EWOULDBLOCK;
+ goto unlock;
+ }
+
+ if (dev->driver && try_module_get(dev->driver->owner)) {
+ struct scsi_driver *drv = to_scsi_driver(dev->driver);
+
+ if (drv->resume)
+ ret = drv->resume(dev);
+ module_put(dev->driver->owner);
+ }
+
+unlock:
+ device_unlock(dev);
+
+ return ret;
+}
+EXPORT_SYMBOL(scsi_resume_device);
+
int scsi_rescan_device(struct scsi_device *sdev)
{
struct device *dev = &sdev->sdev_gendev;
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index ccff8f2e2e75bd..65cdc8b77e3585 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -3120,6 +3120,7 @@ static void sd_read_io_hints(struct scsi_disk *sdkp, unsigned char *buffer)
{
struct scsi_device *sdp = sdkp->device;
const struct scsi_io_group_descriptor *desc, *start, *end;
+ u16 permanent_stream_count_old;
struct scsi_sense_hdr sshdr;
struct scsi_mode_data data;
int res;
@@ -3140,12 +3141,13 @@ static void sd_read_io_hints(struct scsi_disk *sdkp, unsigned char *buffer)
for (desc = start; desc < end; desc++)
if (!desc->st_enble || !sd_is_perm_stream(sdkp, desc - start))
break;
+ permanent_stream_count_old = sdkp->permanent_stream_count;
sdkp->permanent_stream_count = desc - start;
if (sdkp->rscs && sdkp->permanent_stream_count < 2)
sd_printk(KERN_INFO, sdkp,
"Unexpected: RSCS has been set and the permanent stream count is %u\n",
sdkp->permanent_stream_count);
- else if (sdkp->permanent_stream_count)
+ else if (sdkp->permanent_stream_count != permanent_stream_count_old)
sd_printk(KERN_INFO, sdkp, "permanent stream count = %d\n",
sdkp->permanent_stream_count);
}
@@ -3920,7 +3922,7 @@ static int sd_probe(struct device *dev)
error = device_add_disk(dev, gd, NULL);
if (error) {
- put_device(&sdkp->disk_dev);
+ device_unregister(&sdkp->disk_dev);
put_disk(gd);
goto out;
}
@@ -4108,7 +4110,21 @@ static int sd_suspend_runtime(struct device *dev)
return sd_suspend_common(dev, true);
}
-static int sd_resume(struct device *dev, bool runtime)
+static int sd_resume(struct device *dev)
+{
+ struct scsi_disk *sdkp = dev_get_drvdata(dev);
+
+ sd_printk(KERN_NOTICE, sdkp, "Starting disk\n");
+
+ if (opal_unlock_from_suspend(sdkp->opal_dev)) {
+ sd_printk(KERN_NOTICE, sdkp, "OPAL unlock failed\n");
+ return -EIO;
+ }
+
+ return 0;
+}
+
+static int sd_resume_common(struct device *dev, bool runtime)
{
struct scsi_disk *sdkp = dev_get_drvdata(dev);
int ret;
@@ -4124,7 +4140,7 @@ static int sd_resume(struct device *dev, bool runtime)
sd_printk(KERN_NOTICE, sdkp, "Starting disk\n");
ret = sd_start_stop_device(sdkp, 1);
if (!ret) {
- opal_unlock_from_suspend(sdkp->opal_dev);
+ sd_resume(dev);
sdkp->suspended = false;
}
@@ -4143,7 +4159,7 @@ static int sd_resume_system(struct device *dev)
return 0;
}
- return sd_resume(dev, false);
+ return sd_resume_common(dev, false);
}
static int sd_resume_runtime(struct device *dev)
@@ -4170,7 +4186,7 @@ static int sd_resume_runtime(struct device *dev)
"Failed to clear sense data\n");
}
- return sd_resume(dev, true);
+ return sd_resume_common(dev, true);
}
static const struct dev_pm_ops sd_pm_ops = {
@@ -4193,6 +4209,7 @@ static struct scsi_driver sd_template = {
.pm = &sd_pm_ops,
},
.rescan = sd_rescan,
+ .resume = sd_resume,
.init_command = sd_init_command,
.uninit_command = sd_uninit_command,
.done = sd_done,
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 86210e4dd0d353..baf870a03ecf6c 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -285,6 +285,7 @@ sg_open(struct inode *inode, struct file *filp)
int dev = iminor(inode);
int flags = filp->f_flags;
struct request_queue *q;
+ struct scsi_device *device;
Sg_device *sdp;
Sg_fd *sfp;
int retval;
@@ -301,11 +302,12 @@ sg_open(struct inode *inode, struct file *filp)
/* This driver's module count bumped by fops_get in <linux/fs.h> */
/* Prevent the device driver from vanishing while we sleep */
- retval = scsi_device_get(sdp->device);
+ device = sdp->device;
+ retval = scsi_device_get(device);
if (retval)
goto sg_put;
- retval = scsi_autopm_get_device(sdp->device);
+ retval = scsi_autopm_get_device(device);
if (retval)
goto sdp_put;
@@ -313,7 +315,7 @@ sg_open(struct inode *inode, struct file *filp)
* check if O_NONBLOCK. Permits SCSI commands to be issued
* during error recovery. Tread carefully. */
if (!((flags & O_NONBLOCK) ||
- scsi_block_when_processing_errors(sdp->device))) {
+ scsi_block_when_processing_errors(device))) {
retval = -ENXIO;
/* we are in error recovery for this device */
goto error_out;
@@ -344,7 +346,7 @@ sg_open(struct inode *inode, struct file *filp)
if (sdp->open_cnt < 1) { /* no existing opens */
sdp->sgdebug = 0;
- q = sdp->device->request_queue;
+ q = device->request_queue;
sdp->sg_tablesize = queue_max_segments(q);
}
sfp = sg_add_sfp(sdp);
@@ -370,10 +372,11 @@ out_undo:
error_mutex_locked:
mutex_unlock(&sdp->open_rel_lock);
error_out:
- scsi_autopm_put_device(sdp->device);
+ scsi_autopm_put_device(device);
sdp_put:
- scsi_device_put(sdp->device);
- goto sg_put;
+ kref_put(&sdp->d_ref, sg_device_destroy);
+ scsi_device_put(device);
+ return retval;
}
/* Release resources associated with a successful sg_open()
@@ -1424,7 +1427,9 @@ static const struct file_operations sg_fops = {
.llseek = no_llseek,
};
-static struct class *sg_sysfs_class;
+static const struct class sg_sysfs_class = {
+ .name = "scsi_generic"
+};
static int sg_sysfs_valid = 0;
@@ -1526,7 +1531,7 @@ sg_add_device(struct device *cl_dev)
if (sg_sysfs_valid) {
struct device *sg_class_member;
- sg_class_member = device_create(sg_sysfs_class, cl_dev->parent,
+ sg_class_member = device_create(&sg_sysfs_class, cl_dev->parent,
MKDEV(SCSI_GENERIC_MAJOR,
sdp->index),
sdp, "%s", sdp->name);
@@ -1616,7 +1621,7 @@ sg_remove_device(struct device *cl_dev)
read_unlock_irqrestore(&sdp->sfd_lock, iflags);
sysfs_remove_link(&scsidp->sdev_gendev.kobj, "generic");
- device_destroy(sg_sysfs_class, MKDEV(SCSI_GENERIC_MAJOR, sdp->index));
+ device_destroy(&sg_sysfs_class, MKDEV(SCSI_GENERIC_MAJOR, sdp->index));
cdev_del(sdp->cdev);
sdp->cdev = NULL;
@@ -1687,11 +1692,9 @@ init_sg(void)
SG_MAX_DEVS, "sg");
if (rc)
return rc;
- sg_sysfs_class = class_create("scsi_generic");
- if ( IS_ERR(sg_sysfs_class) ) {
- rc = PTR_ERR(sg_sysfs_class);
+ rc = class_register(&sg_sysfs_class);
+ if (rc)
goto err_out;
- }
sg_sysfs_valid = 1;
rc = scsi_register_interface(&sg_interface);
if (0 == rc) {
@@ -1700,7 +1703,7 @@ init_sg(void)
#endif /* CONFIG_SCSI_PROC_FS */
return 0;
}
- class_destroy(sg_sysfs_class);
+ class_unregister(&sg_sysfs_class);
register_sg_sysctls();
err_out:
unregister_chrdev_region(MKDEV(SCSI_GENERIC_MAJOR, 0), SG_MAX_DEVS);
@@ -1715,7 +1718,7 @@ exit_sg(void)
remove_proc_subtree("scsi/sg", NULL);
#endif /* CONFIG_SCSI_PROC_FS */
scsi_unregister_interface(&sg_interface);
- class_destroy(sg_sysfs_class);
+ class_unregister(&sg_sysfs_class);
sg_sysfs_valid = 0;
unregister_chrdev_region(MKDEV(SCSI_GENERIC_MAJOR, 0),
SG_MAX_DEVS);
@@ -2207,6 +2210,7 @@ sg_remove_sfp_usercontext(struct work_struct *work)
{
struct sg_fd *sfp = container_of(work, struct sg_fd, ew.work);
struct sg_device *sdp = sfp->parentdp;
+ struct scsi_device *device = sdp->device;
Sg_request *srp;
unsigned long iflags;
@@ -2232,8 +2236,8 @@ sg_remove_sfp_usercontext(struct work_struct *work)
"sg_remove_sfp: sfp=0x%p\n", sfp));
kfree(sfp);
- scsi_device_put(sdp->device);
kref_put(&sdp->d_ref, sg_device_destroy);
+ scsi_device_put(device);
module_put(THIS_MODULE);
}
diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c
index 338aa8c429682c..5a9bcf8e0792e5 100644
--- a/drivers/scsi/st.c
+++ b/drivers/scsi/st.c
@@ -87,7 +87,7 @@ static int try_rdio = 1;
static int try_wdio = 1;
static int debug_flag;
-static struct class st_sysfs_class;
+static const struct class st_sysfs_class;
static const struct attribute_group *st_dev_groups[];
static const struct attribute_group *st_drv_groups[];
@@ -4438,7 +4438,7 @@ static void scsi_tape_release(struct kref *kref)
return;
}
-static struct class st_sysfs_class = {
+static const struct class st_sysfs_class = {
.name = "scsi_tape",
.dev_groups = st_dev_groups,
};
diff --git a/drivers/soc/mediatek/Kconfig b/drivers/soc/mediatek/Kconfig
index 50c664b65f4d44..1b7afb19ccd637 100644
--- a/drivers/soc/mediatek/Kconfig
+++ b/drivers/soc/mediatek/Kconfig
@@ -72,6 +72,7 @@ config MTK_SOCINFO
tristate "MediaTek SoC Information"
default y
depends on NVMEM_MTK_EFUSE
+ select SOC_BUS
help
The MediaTek SoC Information (mtk-socinfo) driver provides
information about the SoC to the userspace including the
diff --git a/drivers/soc/mediatek/mtk-svs.c b/drivers/soc/mediatek/mtk-svs.c
index c832f5c670bcf0..9a91298c125397 100644
--- a/drivers/soc/mediatek/mtk-svs.c
+++ b/drivers/soc/mediatek/mtk-svs.c
@@ -1768,6 +1768,7 @@ static int svs_bank_resource_setup(struct svs_platform *svsp)
const struct svs_bank_pdata *bdata;
struct svs_bank *svsb;
struct dev_pm_opp *opp;
+ char tz_name_buf[20];
unsigned long freq;
int count, ret;
u32 idx, i;
@@ -1819,10 +1820,12 @@ static int svs_bank_resource_setup(struct svs_platform *svsp)
}
if (!IS_ERR_OR_NULL(bdata->tzone_name)) {
- svsb->tzd = thermal_zone_get_zone_by_name(bdata->tzone_name);
+ snprintf(tz_name_buf, ARRAY_SIZE(tz_name_buf),
+ "%s-thermal", bdata->tzone_name);
+ svsb->tzd = thermal_zone_get_zone_by_name(tz_name_buf);
if (IS_ERR(svsb->tzd)) {
dev_err(svsb->dev, "cannot get \"%s\" thermal zone\n",
- bdata->tzone_name);
+ tz_name_buf);
return PTR_ERR(svsb->tzd);
}
}
diff --git a/drivers/soundwire/amd_manager.c b/drivers/soundwire/amd_manager.c
index 7cd24bd8e2248e..6bcf8e75273c4e 100644
--- a/drivers/soundwire/amd_manager.c
+++ b/drivers/soundwire/amd_manager.c
@@ -130,6 +130,19 @@ static void amd_sdw_set_frameshape(struct amd_sdw_manager *amd_manager)
writel(frame_size, amd_manager->mmio + ACP_SW_FRAMESIZE);
}
+static void amd_sdw_wake_enable(struct amd_sdw_manager *amd_manager, bool enable)
+{
+ u32 wake_ctrl;
+
+ wake_ctrl = readl(amd_manager->mmio + ACP_SW_STATE_CHANGE_STATUS_MASK_8TO11);
+ if (enable)
+ wake_ctrl |= AMD_SDW_WAKE_INTR_MASK;
+ else
+ wake_ctrl &= ~AMD_SDW_WAKE_INTR_MASK;
+
+ writel(wake_ctrl, amd_manager->mmio + ACP_SW_STATE_CHANGE_STATUS_MASK_8TO11);
+}
+
static void amd_sdw_ctl_word_prep(u32 *lower_word, u32 *upper_word, struct sdw_msg *msg,
int cmd_offset)
{
@@ -1095,6 +1108,7 @@ static int __maybe_unused amd_suspend(struct device *dev)
}
if (amd_manager->power_mode_mask & AMD_SDW_CLK_STOP_MODE) {
+ amd_sdw_wake_enable(amd_manager, false);
return amd_sdw_clock_stop(amd_manager);
} else if (amd_manager->power_mode_mask & AMD_SDW_POWER_OFF_MODE) {
/*
@@ -1121,6 +1135,7 @@ static int __maybe_unused amd_suspend_runtime(struct device *dev)
return 0;
}
if (amd_manager->power_mode_mask & AMD_SDW_CLK_STOP_MODE) {
+ amd_sdw_wake_enable(amd_manager, true);
return amd_sdw_clock_stop(amd_manager);
} else if (amd_manager->power_mode_mask & AMD_SDW_POWER_OFF_MODE) {
ret = amd_sdw_clock_stop(amd_manager);
diff --git a/drivers/soundwire/amd_manager.h b/drivers/soundwire/amd_manager.h
index 418b679e0b1a68..707065468e05a0 100644
--- a/drivers/soundwire/amd_manager.h
+++ b/drivers/soundwire/amd_manager.h
@@ -152,7 +152,7 @@
#define AMD_SDW0_EXT_INTR_MASK 0x200000
#define AMD_SDW1_EXT_INTR_MASK 4
#define AMD_SDW_IRQ_MASK_0TO7 0x77777777
-#define AMD_SDW_IRQ_MASK_8TO11 0x000d7777
+#define AMD_SDW_IRQ_MASK_8TO11 0x000c7777
#define AMD_SDW_IRQ_ERROR_MASK 0xff
#define AMD_SDW_MAX_FREQ_NUM 1
#define AMD_SDW0_MAX_TX_PORTS 3
@@ -190,6 +190,7 @@
#define AMD_SDW_CLK_RESUME_REQ 2
#define AMD_SDW_CLK_RESUME_DONE 3
#define AMD_SDW_WAKE_STAT_MASK BIT(16)
+#define AMD_SDW_WAKE_INTR_MASK BIT(16)
static u32 amd_sdw_freq_tbl[AMD_SDW_MAX_FREQ_NUM] = {
AMD_SDW_DEFAULT_CLK_FREQ,
diff --git a/drivers/spi/spi-axi-spi-engine.c b/drivers/spi/spi-axi-spi-engine.c
index 7cc219d78551ad..e358ac5b450977 100644
--- a/drivers/spi/spi-axi-spi-engine.c
+++ b/drivers/spi/spi-axi-spi-engine.c
@@ -623,7 +623,7 @@ static int spi_engine_probe(struct platform_device *pdev)
version = readl(spi_engine->base + ADI_AXI_REG_VERSION);
if (ADI_AXI_PCORE_VER_MAJOR(version) != 1) {
- dev_err(&pdev->dev, "Unsupported peripheral version %u.%u.%c\n",
+ dev_err(&pdev->dev, "Unsupported peripheral version %u.%u.%u\n",
ADI_AXI_PCORE_VER_MAJOR(version),
ADI_AXI_PCORE_VER_MINOR(version),
ADI_AXI_PCORE_VER_PATCH(version));
diff --git a/drivers/spi/spi-fsl-lpspi.c b/drivers/spi/spi-fsl-lpspi.c
index 079035db7dd859..92a662d1b55cf2 100644
--- a/drivers/spi/spi-fsl-lpspi.c
+++ b/drivers/spi/spi-fsl-lpspi.c
@@ -852,39 +852,39 @@ static int fsl_lpspi_probe(struct platform_device *pdev)
fsl_lpspi->base = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
if (IS_ERR(fsl_lpspi->base)) {
ret = PTR_ERR(fsl_lpspi->base);
- goto out_controller_put;
+ return ret;
}
fsl_lpspi->base_phys = res->start;
irq = platform_get_irq(pdev, 0);
if (irq < 0) {
ret = irq;
- goto out_controller_put;
+ return ret;
}
ret = devm_request_irq(&pdev->dev, irq, fsl_lpspi_isr, 0,
dev_name(&pdev->dev), fsl_lpspi);
if (ret) {
dev_err(&pdev->dev, "can't get irq%d: %d\n", irq, ret);
- goto out_controller_put;
+ return ret;
}
fsl_lpspi->clk_per = devm_clk_get(&pdev->dev, "per");
if (IS_ERR(fsl_lpspi->clk_per)) {
ret = PTR_ERR(fsl_lpspi->clk_per);
- goto out_controller_put;
+ return ret;
}
fsl_lpspi->clk_ipg = devm_clk_get(&pdev->dev, "ipg");
if (IS_ERR(fsl_lpspi->clk_ipg)) {
ret = PTR_ERR(fsl_lpspi->clk_ipg);
- goto out_controller_put;
+ return ret;
}
/* enable the clock */
ret = fsl_lpspi_init_rpm(fsl_lpspi);
if (ret)
- goto out_controller_put;
+ return ret;
ret = pm_runtime_get_sync(fsl_lpspi->dev);
if (ret < 0) {
@@ -945,8 +945,6 @@ out_pm_get:
pm_runtime_dont_use_autosuspend(fsl_lpspi->dev);
pm_runtime_put_sync(fsl_lpspi->dev);
pm_runtime_disable(fsl_lpspi->dev);
-out_controller_put:
- spi_controller_put(controller);
return ret;
}
diff --git a/drivers/spi/spi-hisi-kunpeng.c b/drivers/spi/spi-hisi-kunpeng.c
index 35ef5e8e2ffd25..77e9738e42f60e 100644
--- a/drivers/spi/spi-hisi-kunpeng.c
+++ b/drivers/spi/spi-hisi-kunpeng.c
@@ -151,8 +151,6 @@ static const struct debugfs_reg32 hisi_spi_regs[] = {
HISI_SPI_DBGFS_REG("ENR", HISI_SPI_ENR),
HISI_SPI_DBGFS_REG("FIFOC", HISI_SPI_FIFOC),
HISI_SPI_DBGFS_REG("IMR", HISI_SPI_IMR),
- HISI_SPI_DBGFS_REG("DIN", HISI_SPI_DIN),
- HISI_SPI_DBGFS_REG("DOUT", HISI_SPI_DOUT),
HISI_SPI_DBGFS_REG("SR", HISI_SPI_SR),
HISI_SPI_DBGFS_REG("RISR", HISI_SPI_RISR),
HISI_SPI_DBGFS_REG("ISR", HISI_SPI_ISR),
diff --git a/drivers/spi/spi-pci1xxxx.c b/drivers/spi/spi-pci1xxxx.c
index 969965d7bc98b5..cc18d320370f97 100644
--- a/drivers/spi/spi-pci1xxxx.c
+++ b/drivers/spi/spi-pci1xxxx.c
@@ -725,6 +725,8 @@ static int pci1xxxx_spi_probe(struct pci_dev *pdev, const struct pci_device_id *
spi_bus->spi_int[iter] = devm_kzalloc(&pdev->dev,
sizeof(struct pci1xxxx_spi_internal),
GFP_KERNEL);
+ if (!spi_bus->spi_int[iter])
+ return -ENOMEM;
spi_sub_ptr = spi_bus->spi_int[iter];
spi_sub_ptr->spi_host = devm_spi_alloc_host(dev, sizeof(struct spi_controller));
if (!spi_sub_ptr->spi_host)
diff --git a/drivers/spi/spi-s3c64xx.c b/drivers/spi/spi-s3c64xx.c
index 9fcbe040cb2f2e..f726d86704287e 100644
--- a/drivers/spi/spi-s3c64xx.c
+++ b/drivers/spi/spi-s3c64xx.c
@@ -430,7 +430,7 @@ static bool s3c64xx_spi_can_dma(struct spi_controller *host,
struct s3c64xx_spi_driver_data *sdd = spi_controller_get_devdata(host);
if (sdd->rx_dma.ch && sdd->tx_dma.ch)
- return xfer->len > sdd->fifo_depth;
+ return xfer->len >= sdd->fifo_depth;
return false;
}
@@ -826,10 +826,9 @@ static int s3c64xx_spi_transfer_one(struct spi_controller *host,
return status;
}
- if (!is_polling(sdd) && (xfer->len > fifo_len) &&
+ if (!is_polling(sdd) && xfer->len >= fifo_len &&
sdd->rx_dma.ch && sdd->tx_dma.ch) {
use_dma = 1;
-
} else if (xfer->len >= fifo_len) {
tx_buf = xfer->tx_buf;
rx_buf = xfer->rx_buf;
diff --git a/drivers/staging/media/atomisp/pci/hmm/hmm.c b/drivers/staging/media/atomisp/pci/hmm/hmm.c
index bb12644fd033b5..3e2899ad85174f 100644
--- a/drivers/staging/media/atomisp/pci/hmm/hmm.c
+++ b/drivers/staging/media/atomisp/pci/hmm/hmm.c
@@ -205,7 +205,7 @@ static ia_css_ptr __hmm_alloc(size_t bytes, enum hmm_bo_type type,
}
dev_dbg(atomisp_dev, "pages: 0x%08x (%zu bytes), type: %d, vmalloc %p\n",
- bo->start, bytes, type, vmalloc);
+ bo->start, bytes, type, vmalloc_noprof);
return bo->start;
diff --git a/drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.c b/drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.c
index 258aa0e37f5544..4c3684dd902ed4 100644
--- a/drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.c
+++ b/drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.c
@@ -937,8 +937,9 @@ static int create_component(struct vchiq_mmal_instance *instance,
/* build component create message */
m.h.type = MMAL_MSG_TYPE_COMPONENT_CREATE;
m.u.component_create.client_component = component->client_component;
- strncpy(m.u.component_create.name, name,
- sizeof(m.u.component_create.name));
+ strscpy_pad(m.u.component_create.name, name,
+ sizeof(m.u.component_create.name));
+ m.u.component_create.pid = 0;
ret = send_synchronous_mmal_msg(instance, &m,
sizeof(m.u.component_create),
diff --git a/drivers/target/iscsi/iscsi_target_erl1.c b/drivers/target/iscsi/iscsi_target_erl1.c
index 6797200211836d..d9a6242264b787 100644
--- a/drivers/target/iscsi/iscsi_target_erl1.c
+++ b/drivers/target/iscsi/iscsi_target_erl1.c
@@ -583,7 +583,7 @@ int iscsit_dataout_datapduinorder_no_fbit(
struct iscsi_pdu *pdu)
{
int i, send_recovery_r2t = 0, recovery = 0;
- u32 length = 0, offset = 0, pdu_count = 0, xfer_len = 0;
+ u32 length = 0, offset = 0, pdu_count = 0;
struct iscsit_conn *conn = cmd->conn;
struct iscsi_pdu *first_pdu = NULL;
@@ -596,7 +596,6 @@ int iscsit_dataout_datapduinorder_no_fbit(
if (cmd->pdu_list[i].seq_no == pdu->seq_no) {
if (!first_pdu)
first_pdu = &cmd->pdu_list[i];
- xfer_len += cmd->pdu_list[i].length;
pdu_count++;
} else if (pdu_count)
break;
diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c
index c1fbcdd1618264..c40217f44b1bc5 100644
--- a/drivers/target/target_core_configfs.c
+++ b/drivers/target/target_core_configfs.c
@@ -3672,6 +3672,8 @@ static int __init target_core_init_configfs(void)
{
struct configfs_subsystem *subsys = &target_core_fabrics;
struct t10_alua_lu_gp *lu_gp;
+ struct cred *kern_cred;
+ const struct cred *old_cred;
int ret;
pr_debug("TARGET_CORE[0]: Loading Generic Kernel Storage"
@@ -3748,11 +3750,21 @@ static int __init target_core_init_configfs(void)
if (ret < 0)
goto out;
+ /* We use the kernel credentials to access the target directory */
+ kern_cred = prepare_kernel_cred(&init_task);
+ if (!kern_cred) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ old_cred = override_creds(kern_cred);
target_init_dbroot();
+ revert_creds(old_cred);
+ put_cred(kern_cred);
return 0;
out:
+ target_xcopy_release_pt();
configfs_unregister_subsystem(subsys);
core_dev_release_virtual_lun0();
rd_module_exit();
diff --git a/drivers/thermal/devfreq_cooling.c b/drivers/thermal/devfreq_cooling.c
index 50dec24e967a00..8fd7cf1932cd44 100644
--- a/drivers/thermal/devfreq_cooling.c
+++ b/drivers/thermal/devfreq_cooling.c
@@ -214,7 +214,7 @@ static int devfreq_cooling_get_requested_power(struct thermal_cooling_device *cd
res = dfc->power_ops->get_real_power(df, power, freq, voltage);
if (!res) {
- state = dfc->capped_state;
+ state = dfc->max_state - dfc->capped_state;
/* Convert EM power into milli-Watts first */
rcu_read_lock();
diff --git a/drivers/thermal/gov_power_allocator.c b/drivers/thermal/gov_power_allocator.c
index 1b17dc4c219cc9..e25e48d76aa79c 100644
--- a/drivers/thermal/gov_power_allocator.c
+++ b/drivers/thermal/gov_power_allocator.c
@@ -606,7 +606,7 @@ static int allocate_actors_buffer(struct power_allocator_params *params,
/* There might be no cooling devices yet. */
if (!num_actors) {
- ret = -EINVAL;
+ ret = 0;
goto clean_state;
}
@@ -679,11 +679,6 @@ static int power_allocator_bind(struct thermal_zone_device *tz)
return -ENOMEM;
get_governor_trips(tz, params);
- if (!params->trip_max) {
- dev_warn(&tz->device, "power_allocator: missing trip_max\n");
- kfree(params);
- return -EINVAL;
- }
ret = check_power_actors(tz, params);
if (ret < 0) {
@@ -714,9 +709,10 @@ static int power_allocator_bind(struct thermal_zone_device *tz)
else
params->sustainable_power = tz->tzp->sustainable_power;
- estimate_pid_constants(tz, tz->tzp->sustainable_power,
- params->trip_switch_on,
- params->trip_max->temperature);
+ if (params->trip_max)
+ estimate_pid_constants(tz, tz->tzp->sustainable_power,
+ params->trip_switch_on,
+ params->trip_max->temperature);
reset_pid_controller(params);
diff --git a/drivers/thermal/thermal_debugfs.c b/drivers/thermal/thermal_debugfs.c
index c617e8b9f0ddfe..d78d54ae2605e8 100644
--- a/drivers/thermal/thermal_debugfs.c
+++ b/drivers/thermal/thermal_debugfs.c
@@ -616,6 +616,7 @@ void thermal_debug_tz_trip_up(struct thermal_zone_device *tz,
tze->trip_stats[trip_id].timestamp = now;
tze->trip_stats[trip_id].max = max(tze->trip_stats[trip_id].max, temperature);
tze->trip_stats[trip_id].min = min(tze->trip_stats[trip_id].min, temperature);
+ tze->trip_stats[trip_id].count++;
tze->trip_stats[trip_id].avg = tze->trip_stats[trip_id].avg +
(temperature - tze->trip_stats[trip_id].avg) /
tze->trip_stats[trip_id].count;
diff --git a/drivers/thermal/thermal_trip.c b/drivers/thermal/thermal_trip.c
index 09f6050dd04161..497abf0d47cac5 100644
--- a/drivers/thermal/thermal_trip.c
+++ b/drivers/thermal/thermal_trip.c
@@ -65,7 +65,6 @@ void __thermal_zone_set_trips(struct thermal_zone_device *tz)
{
const struct thermal_trip *trip;
int low = -INT_MAX, high = INT_MAX;
- bool same_trip = false;
int ret;
lockdep_assert_held(&tz->lock);
@@ -74,36 +73,22 @@ void __thermal_zone_set_trips(struct thermal_zone_device *tz)
return;
for_each_trip(tz, trip) {
- bool low_set = false;
int trip_low;
trip_low = trip->temperature - trip->hysteresis;
- if (trip_low < tz->temperature && trip_low > low) {
+ if (trip_low < tz->temperature && trip_low > low)
low = trip_low;
- low_set = true;
- same_trip = false;
- }
if (trip->temperature > tz->temperature &&
- trip->temperature < high) {
+ trip->temperature < high)
high = trip->temperature;
- same_trip = low_set;
- }
}
/* No need to change trip points */
if (tz->prev_low_trip == low && tz->prev_high_trip == high)
return;
- /*
- * If "high" and "low" are the same, skip the change unless this is the
- * first time.
- */
- if (same_trip && (tz->prev_low_trip != -INT_MAX ||
- tz->prev_high_trip != INT_MAX))
- return;
-
tz->prev_low_trip = low;
tz->prev_high_trip = high;
diff --git a/drivers/thunderbolt/switch.c b/drivers/thunderbolt/switch.c
index 6ffc4e81ffed78..326433df5880e2 100644
--- a/drivers/thunderbolt/switch.c
+++ b/drivers/thunderbolt/switch.c
@@ -3180,22 +3180,29 @@ void tb_switch_unconfigure_link(struct tb_switch *sw)
{
struct tb_port *up, *down;
- if (sw->is_unplugged)
- return;
if (!tb_route(sw) || tb_switch_is_icm(sw))
return;
+ /*
+ * Unconfigure downstream port so that wake-on-connect can be
+ * configured after router unplug. No need to unconfigure upstream port
+ * since its router is unplugged.
+ */
up = tb_upstream_port(sw);
- if (tb_switch_is_usb4(up->sw))
- usb4_port_unconfigure(up);
- else
- tb_lc_unconfigure_port(up);
-
down = up->remote;
if (tb_switch_is_usb4(down->sw))
usb4_port_unconfigure(down);
else
tb_lc_unconfigure_port(down);
+
+ if (sw->is_unplugged)
+ return;
+
+ up = tb_upstream_port(sw);
+ if (tb_switch_is_usb4(up->sw))
+ usb4_port_unconfigure(up);
+ else
+ tb_lc_unconfigure_port(up);
}
static void tb_switch_credits_init(struct tb_switch *sw)
@@ -3441,7 +3448,26 @@ static int tb_switch_set_wake(struct tb_switch *sw, unsigned int flags)
return tb_lc_set_wake(sw, flags);
}
-int tb_switch_resume(struct tb_switch *sw)
+static void tb_switch_check_wakes(struct tb_switch *sw)
+{
+ if (device_may_wakeup(&sw->dev)) {
+ if (tb_switch_is_usb4(sw))
+ usb4_switch_check_wakes(sw);
+ }
+}
+
+/**
+ * tb_switch_resume() - Resume a switch after sleep
+ * @sw: Switch to resume
+ * @runtime: Is this resume from runtime suspend or system sleep
+ *
+ * Resumes and re-enumerates router (and all its children), if still plugged
+ * after suspend. Don't enumerate device router whose UID was changed during
+ * suspend. If this is resume from system sleep, notifies PM core about the
+ * wakes occurred during suspend. Disables all wakes, except USB4 wake of
+ * upstream port for USB4 routers that shall be always enabled.
+ */
+int tb_switch_resume(struct tb_switch *sw, bool runtime)
{
struct tb_port *port;
int err;
@@ -3490,6 +3516,9 @@ int tb_switch_resume(struct tb_switch *sw)
if (err)
return err;
+ if (!runtime)
+ tb_switch_check_wakes(sw);
+
/* Disable wakes */
tb_switch_set_wake(sw, 0);
@@ -3519,7 +3548,8 @@ int tb_switch_resume(struct tb_switch *sw)
*/
if (tb_port_unlock(port))
tb_port_warn(port, "failed to unlock port\n");
- if (port->remote && tb_switch_resume(port->remote->sw)) {
+ if (port->remote &&
+ tb_switch_resume(port->remote->sw, runtime)) {
tb_port_warn(port,
"lost during suspend, disconnecting\n");
tb_sw_set_unplugged(port->remote->sw);
diff --git a/drivers/thunderbolt/tb.c b/drivers/thunderbolt/tb.c
index c5ce7a694b27df..3e44c78ac40929 100644
--- a/drivers/thunderbolt/tb.c
+++ b/drivers/thunderbolt/tb.c
@@ -1801,6 +1801,12 @@ static struct tb_port *tb_find_dp_out(struct tb *tb, struct tb_port *in)
continue;
}
+ /* Needs to be on different routers */
+ if (in->sw == port->sw) {
+ tb_port_dbg(port, "skipping DP OUT on same router\n");
+ continue;
+ }
+
tb_port_dbg(port, "DP OUT available\n");
/*
@@ -2936,7 +2942,7 @@ static int tb_resume_noirq(struct tb *tb)
if (!tb_switch_is_usb4(tb->root_switch))
tb_switch_reset(tb->root_switch);
- tb_switch_resume(tb->root_switch);
+ tb_switch_resume(tb->root_switch, false);
tb_free_invalid_tunnels(tb);
tb_free_unplugged_children(tb->root_switch);
tb_restore_children(tb->root_switch);
@@ -3062,7 +3068,7 @@ static int tb_runtime_resume(struct tb *tb)
struct tb_tunnel *tunnel, *n;
mutex_lock(&tb->lock);
- tb_switch_resume(tb->root_switch);
+ tb_switch_resume(tb->root_switch, true);
tb_free_invalid_tunnels(tb);
tb_restore_children(tb->root_switch);
list_for_each_entry_safe(tunnel, n, &tcm->tunnel_list, list)
diff --git a/drivers/thunderbolt/tb.h b/drivers/thunderbolt/tb.h
index feed8ecaf712e8..18aae4ccaed596 100644
--- a/drivers/thunderbolt/tb.h
+++ b/drivers/thunderbolt/tb.h
@@ -827,7 +827,7 @@ int tb_switch_configuration_valid(struct tb_switch *sw);
int tb_switch_add(struct tb_switch *sw);
void tb_switch_remove(struct tb_switch *sw);
void tb_switch_suspend(struct tb_switch *sw, bool runtime);
-int tb_switch_resume(struct tb_switch *sw);
+int tb_switch_resume(struct tb_switch *sw, bool runtime);
int tb_switch_reset(struct tb_switch *sw);
int tb_switch_wait_for_bit(struct tb_switch *sw, u32 offset, u32 bit,
u32 value, int timeout_msec);
@@ -1288,6 +1288,7 @@ static inline bool tb_switch_is_usb4(const struct tb_switch *sw)
return usb4_switch_version(sw) > 0;
}
+void usb4_switch_check_wakes(struct tb_switch *sw);
int usb4_switch_setup(struct tb_switch *sw);
int usb4_switch_configuration_valid(struct tb_switch *sw);
int usb4_switch_read_uid(struct tb_switch *sw, u64 *uid);
diff --git a/drivers/thunderbolt/usb4.c b/drivers/thunderbolt/usb4.c
index 9860b49d7a2b20..78b06e922fdace 100644
--- a/drivers/thunderbolt/usb4.c
+++ b/drivers/thunderbolt/usb4.c
@@ -155,7 +155,13 @@ static inline int usb4_switch_op_data(struct tb_switch *sw, u16 opcode,
tx_dwords, rx_data, rx_dwords);
}
-static void usb4_switch_check_wakes(struct tb_switch *sw)
+/**
+ * usb4_switch_check_wakes() - Check for wakes and notify PM core about them
+ * @sw: Router whose wakes to check
+ *
+ * Checks wakes occurred during suspend and notify the PM core about them.
+ */
+void usb4_switch_check_wakes(struct tb_switch *sw)
{
bool wakeup_usb4 = false;
struct usb4_port *usb4;
@@ -163,9 +169,6 @@ static void usb4_switch_check_wakes(struct tb_switch *sw)
bool wakeup = false;
u32 val;
- if (!device_may_wakeup(&sw->dev))
- return;
-
if (tb_route(sw)) {
if (tb_sw_read(sw, &val, TB_CFG_SWITCH, ROUTER_CS_6, 1))
return;
@@ -244,8 +247,6 @@ int usb4_switch_setup(struct tb_switch *sw)
u32 val = 0;
int ret;
- usb4_switch_check_wakes(sw);
-
if (!tb_route(sw))
return 0;
diff --git a/drivers/tty/serial/8250/8250_dw.c b/drivers/tty/serial/8250/8250_dw.c
index a3acbf0f5da1be..1300c92b8702a3 100644
--- a/drivers/tty/serial/8250/8250_dw.c
+++ b/drivers/tty/serial/8250/8250_dw.c
@@ -356,9 +356,9 @@ static void dw8250_set_termios(struct uart_port *p, struct ktermios *termios,
long rate;
int ret;
+ clk_disable_unprepare(d->clk);
rate = clk_round_rate(d->clk, newrate);
- if (rate > 0 && p->uartclk != rate) {
- clk_disable_unprepare(d->clk);
+ if (rate > 0) {
/*
* Note that any clock-notifer worker will block in
* serial8250_update_uartclk() until we are done.
@@ -366,8 +366,8 @@ static void dw8250_set_termios(struct uart_port *p, struct ktermios *termios,
ret = clk_set_rate(d->clk, newrate);
if (!ret)
p->uartclk = rate;
- clk_prepare_enable(d->clk);
}
+ clk_prepare_enable(d->clk);
dw8250_do_set_termios(p, termios, old);
}
diff --git a/drivers/tty/serial/8250/8250_lpc18xx.c b/drivers/tty/serial/8250/8250_lpc18xx.c
index 7984ee05af1dad..47e1a056a60c34 100644
--- a/drivers/tty/serial/8250/8250_lpc18xx.c
+++ b/drivers/tty/serial/8250/8250_lpc18xx.c
@@ -151,7 +151,7 @@ static int lpc18xx_serial_probe(struct platform_device *pdev)
ret = uart_read_port_properties(&uart.port);
if (ret)
- return ret;
+ goto dis_uart_clk;
uart.port.iotype = UPIO_MEM32;
uart.port.regshift = 2;
diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c
index 0d35c77fad9eb1..e2e4f99f9d3471 100644
--- a/drivers/tty/serial/8250/8250_pci.c
+++ b/drivers/tty/serial/8250/8250_pci.c
@@ -5010,12 +5010,6 @@ static const struct pci_device_id serial_pci_tbl[] = {
{ PCI_VENDOR_ID_LAVA, PCI_DEVICE_ID_LAVA_QUATRO_B,
PCI_ANY_ID, PCI_ANY_ID, 0, 0,
pbn_b0_bt_2_115200 },
- { PCI_VENDOR_ID_LAVA, PCI_DEVICE_ID_LAVA_QUATTRO_A,
- PCI_ANY_ID, PCI_ANY_ID, 0, 0,
- pbn_b0_bt_2_115200 },
- { PCI_VENDOR_ID_LAVA, PCI_DEVICE_ID_LAVA_QUATTRO_B,
- PCI_ANY_ID, PCI_ANY_ID, 0, 0,
- pbn_b0_bt_2_115200 },
{ PCI_VENDOR_ID_LAVA, PCI_DEVICE_ID_LAVA_OCTO_A,
PCI_ANY_ID, PCI_ANY_ID, 0, 0,
pbn_b0_bt_4_460800 },
diff --git a/drivers/tty/serial/mxs-auart.c b/drivers/tty/serial/mxs-auart.c
index 4749331fe618ca..1e8853eae5042b 100644
--- a/drivers/tty/serial/mxs-auart.c
+++ b/drivers/tty/serial/mxs-auart.c
@@ -1086,11 +1086,13 @@ static void mxs_auart_set_ldisc(struct uart_port *port,
static irqreturn_t mxs_auart_irq_handle(int irq, void *context)
{
- u32 istat;
+ u32 istat, stat;
struct mxs_auart_port *s = context;
u32 mctrl_temp = s->mctrl_prev;
- u32 stat = mxs_read(s, REG_STAT);
+ uart_port_lock(&s->port);
+
+ stat = mxs_read(s, REG_STAT);
istat = mxs_read(s, REG_INTR);
/* ack irq */
@@ -1126,6 +1128,8 @@ static irqreturn_t mxs_auart_irq_handle(int irq, void *context)
istat &= ~AUART_INTR_TXIS;
}
+ uart_port_unlock(&s->port);
+
return IRQ_HANDLED;
}
diff --git a/drivers/tty/serial/pmac_zilog.c b/drivers/tty/serial/pmac_zilog.c
index 05d97e89511e69..92195f984de1b5 100644
--- a/drivers/tty/serial/pmac_zilog.c
+++ b/drivers/tty/serial/pmac_zilog.c
@@ -210,7 +210,6 @@ static bool pmz_receive_chars(struct uart_pmac_port *uap)
{
struct tty_port *port;
unsigned char ch, r1, drop, flag;
- int loops = 0;
/* Sanity check, make sure the old bug is no longer happening */
if (uap->port.state == NULL) {
@@ -291,25 +290,12 @@ static bool pmz_receive_chars(struct uart_pmac_port *uap)
if (r1 & Rx_OVR)
tty_insert_flip_char(port, 0, TTY_OVERRUN);
next_char:
- /* We can get stuck in an infinite loop getting char 0 when the
- * line is in a wrong HW state, we break that here.
- * When that happens, I disable the receive side of the driver.
- * Note that what I've been experiencing is a real irq loop where
- * I'm getting flooded regardless of the actual port speed.
- * Something strange is going on with the HW
- */
- if ((++loops) > 1000)
- goto flood;
ch = read_zsreg(uap, R0);
if (!(ch & Rx_CH_AV))
break;
}
return true;
- flood:
- pmz_interrupt_control(uap, 0);
- pmz_error("pmz: rx irq flood !\n");
- return true;
}
static void pmz_status_handle(struct uart_pmac_port *uap)
diff --git a/drivers/tty/serial/serial_base.h b/drivers/tty/serial/serial_base.h
index c74c548f0db62a..b6c38d2edfd401 100644
--- a/drivers/tty/serial/serial_base.h
+++ b/drivers/tty/serial/serial_base.h
@@ -22,6 +22,7 @@ struct serial_ctrl_device {
struct serial_port_device {
struct device dev;
struct uart_port *port;
+ unsigned int tx_enabled:1;
};
int serial_base_ctrl_init(void);
@@ -30,6 +31,9 @@ void serial_base_ctrl_exit(void);
int serial_base_port_init(void);
void serial_base_port_exit(void);
+void serial_base_port_startup(struct uart_port *port);
+void serial_base_port_shutdown(struct uart_port *port);
+
int serial_base_driver_register(struct device_driver *driver);
void serial_base_driver_unregister(struct device_driver *driver);
diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index ff85ebd3a007db..c476d884356dbd 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -156,7 +156,7 @@ static void __uart_start(struct uart_state *state)
* enabled, serial_port_runtime_resume() calls start_tx() again
* after enabling the device.
*/
- if (pm_runtime_active(&port_dev->dev))
+ if (!pm_runtime_enabled(port->dev) || pm_runtime_active(&port_dev->dev))
port->ops->start_tx(port);
pm_runtime_mark_last_busy(&port_dev->dev);
pm_runtime_put_autosuspend(&port_dev->dev);
@@ -323,16 +323,26 @@ static int uart_startup(struct tty_struct *tty, struct uart_state *state,
bool init_hw)
{
struct tty_port *port = &state->port;
+ struct uart_port *uport;
int retval;
if (tty_port_initialized(port))
- return 0;
+ goto out_base_port_startup;
retval = uart_port_startup(tty, state, init_hw);
- if (retval)
+ if (retval) {
set_bit(TTY_IO_ERROR, &tty->flags);
+ return retval;
+ }
- return retval;
+out_base_port_startup:
+ uport = uart_port_check(state);
+ if (!uport)
+ return -EIO;
+
+ serial_base_port_startup(uport);
+
+ return 0;
}
/*
@@ -355,6 +365,9 @@ static void uart_shutdown(struct tty_struct *tty, struct uart_state *state)
if (tty)
set_bit(TTY_IO_ERROR, &tty->flags);
+ if (uport)
+ serial_base_port_shutdown(uport);
+
if (tty_port_initialized(port)) {
tty_port_set_initialized(port, false);
@@ -1775,6 +1788,7 @@ static void uart_tty_port_shutdown(struct tty_port *port)
uport->ops->stop_rx(uport);
uart_port_unlock_irq(uport);
+ serial_base_port_shutdown(uport);
uart_port_shutdown(port);
/*
@@ -1788,6 +1802,7 @@ static void uart_tty_port_shutdown(struct tty_port *port)
* Free the transmit buffer.
*/
uart_port_lock_irq(uport);
+ uart_circ_clear(&state->xmit);
buf = state->xmit.buf;
state->xmit.buf = NULL;
uart_port_unlock_irq(uport);
diff --git a/drivers/tty/serial/serial_port.c b/drivers/tty/serial/serial_port.c
index 22b9eeb23e68ad..7e3a1c7b097c3c 100644
--- a/drivers/tty/serial/serial_port.c
+++ b/drivers/tty/serial/serial_port.c
@@ -39,8 +39,12 @@ static int serial_port_runtime_resume(struct device *dev)
/* Flush any pending TX for the port */
uart_port_lock_irqsave(port, &flags);
+ if (!port_dev->tx_enabled)
+ goto unlock;
if (__serial_port_busy(port))
port->ops->start_tx(port);
+
+unlock:
uart_port_unlock_irqrestore(port, flags);
out:
@@ -60,6 +64,11 @@ static int serial_port_runtime_suspend(struct device *dev)
return 0;
uart_port_lock_irqsave(port, &flags);
+ if (!port_dev->tx_enabled) {
+ uart_port_unlock_irqrestore(port, flags);
+ return 0;
+ }
+
busy = __serial_port_busy(port);
if (busy)
port->ops->start_tx(port);
@@ -71,6 +80,31 @@ static int serial_port_runtime_suspend(struct device *dev)
return busy ? -EBUSY : 0;
}
+static void serial_base_port_set_tx(struct uart_port *port,
+ struct serial_port_device *port_dev,
+ bool enabled)
+{
+ unsigned long flags;
+
+ uart_port_lock_irqsave(port, &flags);
+ port_dev->tx_enabled = enabled;
+ uart_port_unlock_irqrestore(port, flags);
+}
+
+void serial_base_port_startup(struct uart_port *port)
+{
+ struct serial_port_device *port_dev = port->port_dev;
+
+ serial_base_port_set_tx(port, port_dev, true);
+}
+
+void serial_base_port_shutdown(struct uart_port *port)
+{
+ struct serial_port_device *port_dev = port->port_dev;
+
+ serial_base_port_set_tx(port, port_dev, false);
+}
+
static DEFINE_RUNTIME_DEV_PM_OPS(serial_port_pm,
serial_port_runtime_suspend,
serial_port_runtime_resume, NULL);
diff --git a/drivers/tty/serial/stm32-usart.c b/drivers/tty/serial/stm32-usart.c
index 58d169e5c1db03..4fa5a03ebac08e 100644
--- a/drivers/tty/serial/stm32-usart.c
+++ b/drivers/tty/serial/stm32-usart.c
@@ -861,6 +861,7 @@ static irqreturn_t stm32_usart_interrupt(int irq, void *ptr)
const struct stm32_usart_offsets *ofs = &stm32_port->info->ofs;
u32 sr;
unsigned int size;
+ irqreturn_t ret = IRQ_NONE;
sr = readl_relaxed(port->membase + ofs->isr);
@@ -869,11 +870,14 @@ static irqreturn_t stm32_usart_interrupt(int irq, void *ptr)
(sr & USART_SR_TC)) {
stm32_usart_tc_interrupt_disable(port);
stm32_usart_rs485_rts_disable(port);
+ ret = IRQ_HANDLED;
}
- if ((sr & USART_SR_RTOF) && ofs->icr != UNDEF_REG)
+ if ((sr & USART_SR_RTOF) && ofs->icr != UNDEF_REG) {
writel_relaxed(USART_ICR_RTOCF,
port->membase + ofs->icr);
+ ret = IRQ_HANDLED;
+ }
if ((sr & USART_SR_WUF) && ofs->icr != UNDEF_REG) {
/* Clear wake up flag and disable wake up interrupt */
@@ -882,6 +886,7 @@ static irqreturn_t stm32_usart_interrupt(int irq, void *ptr)
stm32_usart_clr_bits(port, ofs->cr3, USART_CR3_WUFIE);
if (irqd_is_wakeup_set(irq_get_irq_data(port->irq)))
pm_wakeup_event(tport->tty->dev, 0);
+ ret = IRQ_HANDLED;
}
/*
@@ -896,6 +901,7 @@ static irqreturn_t stm32_usart_interrupt(int irq, void *ptr)
uart_unlock_and_check_sysrq(port);
if (size)
tty_flip_buffer_push(tport);
+ ret = IRQ_HANDLED;
}
}
@@ -903,6 +909,7 @@ static irqreturn_t stm32_usart_interrupt(int irq, void *ptr)
uart_port_lock(port);
stm32_usart_transmit_chars(port);
uart_port_unlock(port);
+ ret = IRQ_HANDLED;
}
/* Receiver timeout irq for DMA RX */
@@ -912,9 +919,10 @@ static irqreturn_t stm32_usart_interrupt(int irq, void *ptr)
uart_unlock_and_check_sysrq(port);
if (size)
tty_flip_buffer_push(tport);
+ ret = IRQ_HANDLED;
}
- return IRQ_HANDLED;
+ return ret;
}
static void stm32_usart_set_mctrl(struct uart_port *port, unsigned int mctrl)
@@ -1084,6 +1092,7 @@ static int stm32_usart_startup(struct uart_port *port)
val |= USART_CR2_SWAP;
writel_relaxed(val, port->membase + ofs->cr2);
}
+ stm32_port->throttled = false;
/* RX FIFO Flush */
if (ofs->rqr != UNDEF_REG)
diff --git a/drivers/ufs/core/ufs-mcq.c b/drivers/ufs/core/ufs-mcq.c
index 8db81f1a12d5fc..768bf87cd80d3f 100644
--- a/drivers/ufs/core/ufs-mcq.c
+++ b/drivers/ufs/core/ufs-mcq.c
@@ -94,7 +94,7 @@ void ufshcd_mcq_config_mac(struct ufs_hba *hba, u32 max_active_cmds)
val = ufshcd_readl(hba, REG_UFS_MCQ_CFG);
val &= ~MCQ_CFG_MAC_MASK;
- val |= FIELD_PREP(MCQ_CFG_MAC_MASK, max_active_cmds);
+ val |= FIELD_PREP(MCQ_CFG_MAC_MASK, max_active_cmds - 1);
ufshcd_writel(hba, val, REG_UFS_MCQ_CFG);
}
EXPORT_SYMBOL_GPL(ufshcd_mcq_config_mac);
diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
index e30fd125988d7a..a0f8e930167d70 100644
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -3217,7 +3217,9 @@ retry:
/* MCQ mode */
if (is_mcq_enabled(hba)) {
- err = ufshcd_clear_cmd(hba, lrbp->task_tag);
+ /* successfully cleared the command, retry if needed */
+ if (ufshcd_clear_cmd(hba, lrbp->task_tag) == 0)
+ err = -EAGAIN;
hba->dev_cmd.complete = NULL;
return err;
}
@@ -9791,7 +9793,10 @@ static int __ufshcd_wl_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op)
/* UFS device & link must be active before we enter in this function */
if (!ufshcd_is_ufs_dev_active(hba) || !ufshcd_is_link_active(hba)) {
- ret = -EINVAL;
+ /* Wait err handler finish or trigger err recovery */
+ if (!ufshcd_eh_in_progress(hba))
+ ufshcd_force_error_recovery(hba);
+ ret = -EBUSY;
goto enable_scaling;
}
diff --git a/drivers/ufs/host/ufs-qcom.c b/drivers/ufs/host/ufs-qcom.c
index 8d68bd21ae7332..7a00004bfd0361 100644
--- a/drivers/ufs/host/ufs-qcom.c
+++ b/drivers/ufs/host/ufs-qcom.c
@@ -47,7 +47,7 @@ enum {
TSTBUS_MAX,
};
-#define QCOM_UFS_MAX_GEAR 4
+#define QCOM_UFS_MAX_GEAR 5
#define QCOM_UFS_MAX_LANE 2
enum {
@@ -67,26 +67,32 @@ static const struct __ufs_qcom_bw_table {
[MODE_PWM][UFS_PWM_G2][UFS_LANE_1] = { 1844, 1000 },
[MODE_PWM][UFS_PWM_G3][UFS_LANE_1] = { 3688, 1000 },
[MODE_PWM][UFS_PWM_G4][UFS_LANE_1] = { 7376, 1000 },
+ [MODE_PWM][UFS_PWM_G5][UFS_LANE_1] = { 14752, 1000 },
[MODE_PWM][UFS_PWM_G1][UFS_LANE_2] = { 1844, 1000 },
[MODE_PWM][UFS_PWM_G2][UFS_LANE_2] = { 3688, 1000 },
[MODE_PWM][UFS_PWM_G3][UFS_LANE_2] = { 7376, 1000 },
[MODE_PWM][UFS_PWM_G4][UFS_LANE_2] = { 14752, 1000 },
+ [MODE_PWM][UFS_PWM_G5][UFS_LANE_2] = { 29504, 1000 },
[MODE_HS_RA][UFS_HS_G1][UFS_LANE_1] = { 127796, 1000 },
[MODE_HS_RA][UFS_HS_G2][UFS_LANE_1] = { 255591, 1000 },
[MODE_HS_RA][UFS_HS_G3][UFS_LANE_1] = { 1492582, 102400 },
[MODE_HS_RA][UFS_HS_G4][UFS_LANE_1] = { 2915200, 204800 },
+ [MODE_HS_RA][UFS_HS_G5][UFS_LANE_1] = { 5836800, 409600 },
[MODE_HS_RA][UFS_HS_G1][UFS_LANE_2] = { 255591, 1000 },
[MODE_HS_RA][UFS_HS_G2][UFS_LANE_2] = { 511181, 1000 },
[MODE_HS_RA][UFS_HS_G3][UFS_LANE_2] = { 1492582, 204800 },
[MODE_HS_RA][UFS_HS_G4][UFS_LANE_2] = { 2915200, 409600 },
+ [MODE_HS_RA][UFS_HS_G5][UFS_LANE_2] = { 5836800, 819200 },
[MODE_HS_RB][UFS_HS_G1][UFS_LANE_1] = { 149422, 1000 },
[MODE_HS_RB][UFS_HS_G2][UFS_LANE_1] = { 298189, 1000 },
[MODE_HS_RB][UFS_HS_G3][UFS_LANE_1] = { 1492582, 102400 },
[MODE_HS_RB][UFS_HS_G4][UFS_LANE_1] = { 2915200, 204800 },
+ [MODE_HS_RB][UFS_HS_G5][UFS_LANE_1] = { 5836800, 409600 },
[MODE_HS_RB][UFS_HS_G1][UFS_LANE_2] = { 298189, 1000 },
[MODE_HS_RB][UFS_HS_G2][UFS_LANE_2] = { 596378, 1000 },
[MODE_HS_RB][UFS_HS_G3][UFS_LANE_2] = { 1492582, 204800 },
[MODE_HS_RB][UFS_HS_G4][UFS_LANE_2] = { 2915200, 409600 },
+ [MODE_HS_RB][UFS_HS_G5][UFS_LANE_2] = { 5836800, 819200 },
[MODE_MAX][0][0] = { 7643136, 307200 },
};
@@ -1210,8 +1216,10 @@ static int ufs_qcom_set_core_clk_ctrl(struct ufs_hba *hba, bool is_scale_up)
list_for_each_entry(clki, head, list) {
if (!IS_ERR_OR_NULL(clki->clk) &&
- !strcmp(clki->name, "core_clk_unipro")) {
- if (is_scale_up)
+ !strcmp(clki->name, "core_clk_unipro")) {
+ if (!clki->max_freq)
+ cycles_in_1us = 150; /* default for backwards compatibility */
+ else if (is_scale_up)
cycles_in_1us = ceil(clki->max_freq, (1000 * 1000));
else
cycles_in_1us = ceil(clk_get_rate(clki->clk), (1000 * 1000));
diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c
index bb77de6fa067ef..009158fef2a8f1 100644
--- a/drivers/uio/uio.c
+++ b/drivers/uio/uio.c
@@ -792,7 +792,7 @@ static int uio_mmap_dma_coherent(struct vm_area_struct *vma)
*/
vma->vm_pgoff = 0;
- addr = (void *)mem->addr;
+ addr = (void *)(uintptr_t)mem->addr;
ret = dma_mmap_coherent(mem->dma_device,
vma,
addr,
diff --git a/drivers/uio/uio_dmem_genirq.c b/drivers/uio/uio_dmem_genirq.c
index d5f9384df1255f..13cc35ab5d29a7 100644
--- a/drivers/uio/uio_dmem_genirq.c
+++ b/drivers/uio/uio_dmem_genirq.c
@@ -60,7 +60,7 @@ static int uio_dmem_genirq_open(struct uio_info *info, struct inode *inode)
addr = dma_alloc_coherent(&priv->pdev->dev, uiomem->size,
&uiomem->dma_addr, GFP_KERNEL);
- uiomem->addr = addr ? (phys_addr_t) addr : DMEM_MAP_ERROR;
+ uiomem->addr = addr ? (uintptr_t) addr : DMEM_MAP_ERROR;
++uiomem;
}
priv->refcnt++;
@@ -89,7 +89,7 @@ static int uio_dmem_genirq_release(struct uio_info *info, struct inode *inode)
break;
if (uiomem->addr) {
dma_free_coherent(uiomem->dma_device, uiomem->size,
- (void *) uiomem->addr,
+ (void *) (uintptr_t) uiomem->addr,
uiomem->dma_addr);
}
uiomem->addr = DMEM_MAP_ERROR;
diff --git a/drivers/uio/uio_hv_generic.c b/drivers/uio/uio_hv_generic.c
index 20d9762331bd76..6be3462b109ff2 100644
--- a/drivers/uio/uio_hv_generic.c
+++ b/drivers/uio/uio_hv_generic.c
@@ -181,12 +181,14 @@ hv_uio_cleanup(struct hv_device *dev, struct hv_uio_private_data *pdata)
{
if (pdata->send_gpadl.gpadl_handle) {
vmbus_teardown_gpadl(dev->channel, &pdata->send_gpadl);
- vfree(pdata->send_buf);
+ if (!pdata->send_gpadl.decrypted)
+ vfree(pdata->send_buf);
}
if (pdata->recv_gpadl.gpadl_handle) {
vmbus_teardown_gpadl(dev->channel, &pdata->recv_gpadl);
- vfree(pdata->recv_buf);
+ if (!pdata->recv_gpadl.decrypted)
+ vfree(pdata->recv_buf);
}
}
@@ -295,7 +297,8 @@ hv_uio_probe(struct hv_device *dev,
ret = vmbus_establish_gpadl(channel, pdata->recv_buf,
RECV_BUFFER_SIZE, &pdata->recv_gpadl);
if (ret) {
- vfree(pdata->recv_buf);
+ if (!pdata->recv_gpadl.decrypted)
+ vfree(pdata->recv_buf);
goto fail_close;
}
@@ -317,7 +320,8 @@ hv_uio_probe(struct hv_device *dev,
ret = vmbus_establish_gpadl(channel, pdata->send_buf,
SEND_BUFFER_SIZE, &pdata->send_gpadl);
if (ret) {
- vfree(pdata->send_buf);
+ if (!pdata->send_gpadl.decrypted)
+ vfree(pdata->send_buf);
goto fail_close;
}
diff --git a/drivers/uio/uio_pruss.c b/drivers/uio/uio_pruss.c
index 72b33f7d4c40fc..f67881cba645ba 100644
--- a/drivers/uio/uio_pruss.c
+++ b/drivers/uio/uio_pruss.c
@@ -191,7 +191,7 @@ static int pruss_probe(struct platform_device *pdev)
p->mem[1].size = sram_pool_sz;
p->mem[1].memtype = UIO_MEM_PHYS;
- p->mem[2].addr = (phys_addr_t) gdev->ddr_vaddr;
+ p->mem[2].addr = (uintptr_t) gdev->ddr_vaddr;
p->mem[2].dma_addr = gdev->ddr_paddr;
p->mem[2].size = extram_pool_sz;
p->mem[2].memtype = UIO_MEM_DMA_COHERENT;
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 3ee8455585b6be..9446660e231bb3 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -130,7 +130,6 @@ EXPORT_SYMBOL_GPL(ehci_cf_port_reset_rwsem);
#define HUB_DEBOUNCE_STEP 25
#define HUB_DEBOUNCE_STABLE 100
-static void hub_release(struct kref *kref);
static int usb_reset_and_verify_device(struct usb_device *udev);
static int hub_port_disable(struct usb_hub *hub, int port1, int set_state);
static bool hub_port_warm_reset_required(struct usb_hub *hub, int port1,
@@ -720,14 +719,14 @@ static void kick_hub_wq(struct usb_hub *hub)
*/
intf = to_usb_interface(hub->intfdev);
usb_autopm_get_interface_no_resume(intf);
- kref_get(&hub->kref);
+ hub_get(hub);
if (queue_work(hub_wq, &hub->events))
return;
/* the work has already been scheduled */
usb_autopm_put_interface_async(intf);
- kref_put(&hub->kref, hub_release);
+ hub_put(hub);
}
void usb_kick_hub_wq(struct usb_device *hdev)
@@ -1095,7 +1094,7 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type)
goto init2;
goto init3;
}
- kref_get(&hub->kref);
+ hub_get(hub);
/* The superspeed hub except for root hub has to use Hub Depth
* value as an offset into the route string to locate the bits
@@ -1343,7 +1342,7 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type)
device_unlock(&hdev->dev);
}
- kref_put(&hub->kref, hub_release);
+ hub_put(hub);
}
/* Implement the continuations for the delays above */
@@ -1759,6 +1758,16 @@ static void hub_release(struct kref *kref)
kfree(hub);
}
+void hub_get(struct usb_hub *hub)
+{
+ kref_get(&hub->kref);
+}
+
+void hub_put(struct usb_hub *hub)
+{
+ kref_put(&hub->kref, hub_release);
+}
+
static unsigned highspeed_hubs;
static void hub_disconnect(struct usb_interface *intf)
@@ -1807,7 +1816,7 @@ static void hub_disconnect(struct usb_interface *intf)
onboard_hub_destroy_pdevs(&hub->onboard_hub_devs);
- kref_put(&hub->kref, hub_release);
+ hub_put(hub);
}
static bool hub_descriptor_is_sane(struct usb_host_interface *desc)
@@ -5934,7 +5943,7 @@ out_hdev_lock:
/* Balance the stuff in kick_hub_wq() and allow autosuspend */
usb_autopm_put_interface(intf);
- kref_put(&hub->kref, hub_release);
+ hub_put(hub);
kcov_remote_stop();
}
diff --git a/drivers/usb/core/hub.h b/drivers/usb/core/hub.h
index 43ce21c96a5114..183b69dc295547 100644
--- a/drivers/usb/core/hub.h
+++ b/drivers/usb/core/hub.h
@@ -129,6 +129,8 @@ extern void usb_hub_remove_port_device(struct usb_hub *hub,
extern int usb_hub_set_port_power(struct usb_device *hdev, struct usb_hub *hub,
int port1, bool set);
extern struct usb_hub *usb_hub_to_struct_hub(struct usb_device *hdev);
+extern void hub_get(struct usb_hub *hub);
+extern void hub_put(struct usb_hub *hub);
extern int hub_port_debounce(struct usb_hub *hub, int port1,
bool must_be_connected);
extern int usb_clear_port_feature(struct usb_device *hdev,
diff --git a/drivers/usb/core/port.c b/drivers/usb/core/port.c
index 5b5e613a11e599..e7da2fca11a48c 100644
--- a/drivers/usb/core/port.c
+++ b/drivers/usb/core/port.c
@@ -51,16 +51,29 @@ static ssize_t disable_show(struct device *dev,
struct usb_port *port_dev = to_usb_port(dev);
struct usb_device *hdev = to_usb_device(dev->parent->parent);
struct usb_hub *hub = usb_hub_to_struct_hub(hdev);
- struct usb_interface *intf = to_usb_interface(hub->intfdev);
+ struct usb_interface *intf = to_usb_interface(dev->parent);
int port1 = port_dev->portnum;
u16 portstatus, unused;
bool disabled;
int rc;
+ struct kernfs_node *kn;
+ if (!hub)
+ return -ENODEV;
+ hub_get(hub);
rc = usb_autopm_get_interface(intf);
if (rc < 0)
- return rc;
+ goto out_hub_get;
+ /*
+ * Prevent deadlock if another process is concurrently
+ * trying to unregister hdev.
+ */
+ kn = sysfs_break_active_protection(&dev->kobj, &attr->attr);
+ if (!kn) {
+ rc = -ENODEV;
+ goto out_autopm;
+ }
usb_lock_device(hdev);
if (hub->disconnected) {
rc = -ENODEV;
@@ -70,9 +83,13 @@ static ssize_t disable_show(struct device *dev,
usb_hub_port_status(hub, port1, &portstatus, &unused);
disabled = !usb_port_is_power_on(hub, portstatus);
-out_hdev_lock:
+ out_hdev_lock:
usb_unlock_device(hdev);
+ sysfs_unbreak_active_protection(kn);
+ out_autopm:
usb_autopm_put_interface(intf);
+ out_hub_get:
+ hub_put(hub);
if (rc)
return rc;
@@ -86,19 +103,32 @@ static ssize_t disable_store(struct device *dev, struct device_attribute *attr,
struct usb_port *port_dev = to_usb_port(dev);
struct usb_device *hdev = to_usb_device(dev->parent->parent);
struct usb_hub *hub = usb_hub_to_struct_hub(hdev);
- struct usb_interface *intf = to_usb_interface(hub->intfdev);
+ struct usb_interface *intf = to_usb_interface(dev->parent);
int port1 = port_dev->portnum;
bool disabled;
int rc;
+ struct kernfs_node *kn;
+ if (!hub)
+ return -ENODEV;
rc = kstrtobool(buf, &disabled);
if (rc)
return rc;
+ hub_get(hub);
rc = usb_autopm_get_interface(intf);
if (rc < 0)
- return rc;
+ goto out_hub_get;
+ /*
+ * Prevent deadlock if another process is concurrently
+ * trying to unregister hdev.
+ */
+ kn = sysfs_break_active_protection(&dev->kobj, &attr->attr);
+ if (!kn) {
+ rc = -ENODEV;
+ goto out_autopm;
+ }
usb_lock_device(hdev);
if (hub->disconnected) {
rc = -ENODEV;
@@ -119,9 +149,13 @@ static ssize_t disable_store(struct device *dev, struct device_attribute *attr,
if (!rc)
rc = count;
-out_hdev_lock:
+ out_hdev_lock:
usb_unlock_device(hdev);
+ sysfs_unbreak_active_protection(kn);
+ out_autopm:
usb_autopm_put_interface(intf);
+ out_hub_get:
+ hub_put(hub);
return rc;
}
@@ -419,8 +453,10 @@ static void usb_port_shutdown(struct device *dev)
{
struct usb_port *port_dev = to_usb_port(dev);
- if (port_dev->child)
+ if (port_dev->child) {
usb_disable_usb2_hardware_lpm(port_dev->child);
+ usb_unlocked_disable_lpm(port_dev->child);
+ }
}
static const struct dev_pm_ops usb_port_pm_ops = {
diff --git a/drivers/usb/core/sysfs.c b/drivers/usb/core/sysfs.c
index f98263e21c2a71..d83231d6736ac6 100644
--- a/drivers/usb/core/sysfs.c
+++ b/drivers/usb/core/sysfs.c
@@ -1217,14 +1217,24 @@ static ssize_t interface_authorized_store(struct device *dev,
{
struct usb_interface *intf = to_usb_interface(dev);
bool val;
+ struct kernfs_node *kn;
if (kstrtobool(buf, &val) != 0)
return -EINVAL;
- if (val)
+ if (val) {
usb_authorize_interface(intf);
- else
- usb_deauthorize_interface(intf);
+ } else {
+ /*
+ * Prevent deadlock if another process is concurrently
+ * trying to unregister intf.
+ */
+ kn = sysfs_break_active_protection(&dev->kobj, &attr->attr);
+ if (kn) {
+ usb_deauthorize_interface(intf);
+ sysfs_unbreak_active_protection(kn);
+ }
+ }
return count;
}
diff --git a/drivers/usb/dwc2/core.h b/drivers/usb/dwc2/core.h
index c92a1da46a0147..a141f83aba0cce 100644
--- a/drivers/usb/dwc2/core.h
+++ b/drivers/usb/dwc2/core.h
@@ -729,8 +729,14 @@ struct dwc2_dregs_backup {
* struct dwc2_hregs_backup - Holds host registers state before
* entering partial power down
* @hcfg: Backup of HCFG register
+ * @hflbaddr: Backup of HFLBADDR register
* @haintmsk: Backup of HAINTMSK register
+ * @hcchar: Backup of HCCHAR register
+ * @hcsplt: Backup of HCSPLT register
* @hcintmsk: Backup of HCINTMSK register
+ * @hctsiz: Backup of HCTSIZ register
+ * @hdma: Backup of HCDMA register
+ * @hcdmab: Backup of HCDMAB register
* @hprt0: Backup of HPTR0 register
* @hfir: Backup of HFIR register
* @hptxfsiz: Backup of HPTXFSIZ register
@@ -738,8 +744,14 @@ struct dwc2_dregs_backup {
*/
struct dwc2_hregs_backup {
u32 hcfg;
+ u32 hflbaddr;
u32 haintmsk;
+ u32 hcchar[MAX_EPS_CHANNELS];
+ u32 hcsplt[MAX_EPS_CHANNELS];
u32 hcintmsk[MAX_EPS_CHANNELS];
+ u32 hctsiz[MAX_EPS_CHANNELS];
+ u32 hcidma[MAX_EPS_CHANNELS];
+ u32 hcidmab[MAX_EPS_CHANNELS];
u32 hprt0;
u32 hfir;
u32 hptxfsiz;
@@ -1086,6 +1098,7 @@ struct dwc2_hsotg {
bool needs_byte_swap;
/* DWC OTG HW Release versions */
+#define DWC2_CORE_REV_4_30a 0x4f54430a
#define DWC2_CORE_REV_2_71a 0x4f54271a
#define DWC2_CORE_REV_2_72a 0x4f54272a
#define DWC2_CORE_REV_2_80a 0x4f54280a
@@ -1323,6 +1336,7 @@ int dwc2_backup_global_registers(struct dwc2_hsotg *hsotg);
int dwc2_restore_global_registers(struct dwc2_hsotg *hsotg);
void dwc2_enable_acg(struct dwc2_hsotg *hsotg);
+void dwc2_wakeup_from_lpm_l1(struct dwc2_hsotg *hsotg, bool remotewakeup);
/* This function should be called on every hardware interrupt. */
irqreturn_t dwc2_handle_common_intr(int irq, void *dev);
diff --git a/drivers/usb/dwc2/core_intr.c b/drivers/usb/dwc2/core_intr.c
index 158ede7538548e..26d752a4c3ca95 100644
--- a/drivers/usb/dwc2/core_intr.c
+++ b/drivers/usb/dwc2/core_intr.c
@@ -297,7 +297,8 @@ static void dwc2_handle_session_req_intr(struct dwc2_hsotg *hsotg)
/* Exit gadget mode clock gating. */
if (hsotg->params.power_down ==
- DWC2_POWER_DOWN_PARAM_NONE && hsotg->bus_suspended)
+ DWC2_POWER_DOWN_PARAM_NONE && hsotg->bus_suspended &&
+ !hsotg->params.no_clock_gating)
dwc2_gadget_exit_clock_gating(hsotg, 0);
}
@@ -322,10 +323,11 @@ static void dwc2_handle_session_req_intr(struct dwc2_hsotg *hsotg)
* @hsotg: Programming view of DWC_otg controller
*
*/
-static void dwc2_wakeup_from_lpm_l1(struct dwc2_hsotg *hsotg)
+void dwc2_wakeup_from_lpm_l1(struct dwc2_hsotg *hsotg, bool remotewakeup)
{
u32 glpmcfg;
- u32 i = 0;
+ u32 pcgctl;
+ u32 dctl;
if (hsotg->lx_state != DWC2_L1) {
dev_err(hsotg->dev, "Core isn't in DWC2_L1 state\n");
@@ -334,37 +336,57 @@ static void dwc2_wakeup_from_lpm_l1(struct dwc2_hsotg *hsotg)
glpmcfg = dwc2_readl(hsotg, GLPMCFG);
if (dwc2_is_device_mode(hsotg)) {
- dev_dbg(hsotg->dev, "Exit from L1 state\n");
+ dev_dbg(hsotg->dev, "Exit from L1 state, remotewakeup=%d\n", remotewakeup);
glpmcfg &= ~GLPMCFG_ENBLSLPM;
- glpmcfg &= ~GLPMCFG_HIRD_THRES_EN;
+ glpmcfg &= ~GLPMCFG_HIRD_THRES_MASK;
dwc2_writel(hsotg, glpmcfg, GLPMCFG);
- do {
- glpmcfg = dwc2_readl(hsotg, GLPMCFG);
+ pcgctl = dwc2_readl(hsotg, PCGCTL);
+ pcgctl &= ~PCGCTL_ENBL_SLEEP_GATING;
+ dwc2_writel(hsotg, pcgctl, PCGCTL);
- if (!(glpmcfg & (GLPMCFG_COREL1RES_MASK |
- GLPMCFG_L1RESUMEOK | GLPMCFG_SLPSTS)))
- break;
+ glpmcfg = dwc2_readl(hsotg, GLPMCFG);
+ if (glpmcfg & GLPMCFG_ENBESL) {
+ glpmcfg |= GLPMCFG_RSTRSLPSTS;
+ dwc2_writel(hsotg, glpmcfg, GLPMCFG);
+ }
+
+ if (remotewakeup) {
+ if (dwc2_hsotg_wait_bit_set(hsotg, GLPMCFG, GLPMCFG_L1RESUMEOK, 1000)) {
+ dev_warn(hsotg->dev, "%s: timeout GLPMCFG_L1RESUMEOK\n", __func__);
+ goto fail;
+ return;
+ }
+
+ dctl = dwc2_readl(hsotg, DCTL);
+ dctl |= DCTL_RMTWKUPSIG;
+ dwc2_writel(hsotg, dctl, DCTL);
- udelay(1);
- } while (++i < 200);
+ if (dwc2_hsotg_wait_bit_set(hsotg, GINTSTS, GINTSTS_WKUPINT, 1000)) {
+ dev_warn(hsotg->dev, "%s: timeout GINTSTS_WKUPINT\n", __func__);
+ goto fail;
+ return;
+ }
+ }
- if (i == 200) {
- dev_err(hsotg->dev, "Failed to exit L1 sleep state in 200us.\n");
+ glpmcfg = dwc2_readl(hsotg, GLPMCFG);
+ if (glpmcfg & GLPMCFG_COREL1RES_MASK || glpmcfg & GLPMCFG_SLPSTS ||
+ glpmcfg & GLPMCFG_L1RESUMEOK) {
+ goto fail;
return;
}
- dwc2_gadget_init_lpm(hsotg);
+
+ /* Inform gadget to exit from L1 */
+ call_gadget(hsotg, resume);
+ /* Change to L0 state */
+ hsotg->lx_state = DWC2_L0;
+ hsotg->bus_suspended = false;
+fail: dwc2_gadget_init_lpm(hsotg);
} else {
/* TODO */
dev_err(hsotg->dev, "Host side LPM is not supported.\n");
return;
}
-
- /* Change to L0 state */
- hsotg->lx_state = DWC2_L0;
-
- /* Inform gadget to exit from L1 */
- call_gadget(hsotg, resume);
}
/*
@@ -385,7 +407,7 @@ static void dwc2_handle_wakeup_detected_intr(struct dwc2_hsotg *hsotg)
dev_dbg(hsotg->dev, "%s lxstate = %d\n", __func__, hsotg->lx_state);
if (hsotg->lx_state == DWC2_L1) {
- dwc2_wakeup_from_lpm_l1(hsotg);
+ dwc2_wakeup_from_lpm_l1(hsotg, false);
return;
}
@@ -408,7 +430,8 @@ static void dwc2_handle_wakeup_detected_intr(struct dwc2_hsotg *hsotg)
/* Exit gadget mode clock gating. */
if (hsotg->params.power_down ==
- DWC2_POWER_DOWN_PARAM_NONE && hsotg->bus_suspended)
+ DWC2_POWER_DOWN_PARAM_NONE && hsotg->bus_suspended &&
+ !hsotg->params.no_clock_gating)
dwc2_gadget_exit_clock_gating(hsotg, 0);
} else {
/* Change to L0 state */
@@ -425,7 +448,8 @@ static void dwc2_handle_wakeup_detected_intr(struct dwc2_hsotg *hsotg)
}
if (hsotg->params.power_down ==
- DWC2_POWER_DOWN_PARAM_NONE && hsotg->bus_suspended)
+ DWC2_POWER_DOWN_PARAM_NONE && hsotg->bus_suspended &&
+ !hsotg->params.no_clock_gating)
dwc2_host_exit_clock_gating(hsotg, 1);
/*
diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c
index b517a7216de22a..b2f6da5b65ccd0 100644
--- a/drivers/usb/dwc2/gadget.c
+++ b/drivers/usb/dwc2/gadget.c
@@ -1415,6 +1415,10 @@ static int dwc2_hsotg_ep_queue(struct usb_ep *ep, struct usb_request *req,
ep->name, req, req->length, req->buf, req->no_interrupt,
req->zero, req->short_not_ok);
+ if (hs->lx_state == DWC2_L1) {
+ dwc2_wakeup_from_lpm_l1(hs, true);
+ }
+
/* Prevent new request submission when controller is suspended */
if (hs->lx_state != DWC2_L0) {
dev_dbg(hs->dev, "%s: submit request only in active state\n",
@@ -3727,6 +3731,12 @@ irq_retry:
if (hsotg->in_ppd && hsotg->lx_state == DWC2_L2)
dwc2_exit_partial_power_down(hsotg, 0, true);
+ /* Exit gadget mode clock gating. */
+ if (hsotg->params.power_down ==
+ DWC2_POWER_DOWN_PARAM_NONE && hsotg->bus_suspended &&
+ !hsotg->params.no_clock_gating)
+ dwc2_gadget_exit_clock_gating(hsotg, 0);
+
hsotg->lx_state = DWC2_L0;
}
diff --git a/drivers/usb/dwc2/hcd.c b/drivers/usb/dwc2/hcd.c
index 35c7a4df8e7175..dd5b1c5691e11e 100644
--- a/drivers/usb/dwc2/hcd.c
+++ b/drivers/usb/dwc2/hcd.c
@@ -2701,8 +2701,11 @@ enum dwc2_transaction_type dwc2_hcd_select_transactions(
hsotg->available_host_channels--;
}
qh = list_entry(qh_ptr, struct dwc2_qh, qh_list_entry);
- if (dwc2_assign_and_init_hc(hsotg, qh))
+ if (dwc2_assign_and_init_hc(hsotg, qh)) {
+ if (hsotg->params.uframe_sched)
+ hsotg->available_host_channels++;
break;
+ }
/*
* Move the QH from the periodic ready schedule to the
@@ -2735,8 +2738,11 @@ enum dwc2_transaction_type dwc2_hcd_select_transactions(
hsotg->available_host_channels--;
}
- if (dwc2_assign_and_init_hc(hsotg, qh))
+ if (dwc2_assign_and_init_hc(hsotg, qh)) {
+ if (hsotg->params.uframe_sched)
+ hsotg->available_host_channels++;
break;
+ }
/*
* Move the QH from the non-periodic inactive schedule to the
@@ -4143,6 +4149,8 @@ void dwc2_host_complete(struct dwc2_hsotg *hsotg, struct dwc2_qtd *qtd,
urb->actual_length);
if (usb_pipetype(urb->pipe) == PIPE_ISOCHRONOUS) {
+ if (!hsotg->params.dma_desc_enable)
+ urb->start_frame = qtd->qh->start_active_frame;
urb->error_count = dwc2_hcd_urb_get_error_count(qtd->urb);
for (i = 0; i < urb->number_of_packets; ++i) {
urb->iso_frame_desc[i].actual_length =
@@ -4649,7 +4657,7 @@ static int _dwc2_hcd_urb_enqueue(struct usb_hcd *hcd, struct urb *urb,
}
if (hsotg->params.power_down == DWC2_POWER_DOWN_PARAM_NONE &&
- hsotg->bus_suspended) {
+ hsotg->bus_suspended && !hsotg->params.no_clock_gating) {
if (dwc2_is_device_mode(hsotg))
dwc2_gadget_exit_clock_gating(hsotg, 0);
else
@@ -5406,9 +5414,16 @@ int dwc2_backup_host_registers(struct dwc2_hsotg *hsotg)
/* Backup Host regs */
hr = &hsotg->hr_backup;
hr->hcfg = dwc2_readl(hsotg, HCFG);
+ hr->hflbaddr = dwc2_readl(hsotg, HFLBADDR);
hr->haintmsk = dwc2_readl(hsotg, HAINTMSK);
- for (i = 0; i < hsotg->params.host_channels; ++i)
+ for (i = 0; i < hsotg->params.host_channels; ++i) {
+ hr->hcchar[i] = dwc2_readl(hsotg, HCCHAR(i));
+ hr->hcsplt[i] = dwc2_readl(hsotg, HCSPLT(i));
hr->hcintmsk[i] = dwc2_readl(hsotg, HCINTMSK(i));
+ hr->hctsiz[i] = dwc2_readl(hsotg, HCTSIZ(i));
+ hr->hcidma[i] = dwc2_readl(hsotg, HCDMA(i));
+ hr->hcidmab[i] = dwc2_readl(hsotg, HCDMAB(i));
+ }
hr->hprt0 = dwc2_read_hprt0(hsotg);
hr->hfir = dwc2_readl(hsotg, HFIR);
@@ -5442,10 +5457,17 @@ int dwc2_restore_host_registers(struct dwc2_hsotg *hsotg)
hr->valid = false;
dwc2_writel(hsotg, hr->hcfg, HCFG);
+ dwc2_writel(hsotg, hr->hflbaddr, HFLBADDR);
dwc2_writel(hsotg, hr->haintmsk, HAINTMSK);
- for (i = 0; i < hsotg->params.host_channels; ++i)
+ for (i = 0; i < hsotg->params.host_channels; ++i) {
+ dwc2_writel(hsotg, hr->hcchar[i], HCCHAR(i));
+ dwc2_writel(hsotg, hr->hcsplt[i], HCSPLT(i));
dwc2_writel(hsotg, hr->hcintmsk[i], HCINTMSK(i));
+ dwc2_writel(hsotg, hr->hctsiz[i], HCTSIZ(i));
+ dwc2_writel(hsotg, hr->hcidma[i], HCDMA(i));
+ dwc2_writel(hsotg, hr->hcidmab[i], HCDMAB(i));
+ }
dwc2_writel(hsotg, hr->hprt0, HPRT0);
dwc2_writel(hsotg, hr->hfir, HFIR);
@@ -5610,10 +5632,12 @@ int dwc2_host_exit_hibernation(struct dwc2_hsotg *hsotg, int rem_wakeup,
dwc2_writel(hsotg, hr->hcfg, HCFG);
/* De-assert Wakeup Logic */
- gpwrdn = dwc2_readl(hsotg, GPWRDN);
- gpwrdn &= ~GPWRDN_PMUACTV;
- dwc2_writel(hsotg, gpwrdn, GPWRDN);
- udelay(10);
+ if (!(rem_wakeup && hsotg->hw_params.snpsid >= DWC2_CORE_REV_4_30a)) {
+ gpwrdn = dwc2_readl(hsotg, GPWRDN);
+ gpwrdn &= ~GPWRDN_PMUACTV;
+ dwc2_writel(hsotg, gpwrdn, GPWRDN);
+ udelay(10);
+ }
hprt0 = hr->hprt0;
hprt0 |= HPRT0_PWR;
@@ -5638,6 +5662,13 @@ int dwc2_host_exit_hibernation(struct dwc2_hsotg *hsotg, int rem_wakeup,
hprt0 |= HPRT0_RES;
dwc2_writel(hsotg, hprt0, HPRT0);
+ /* De-assert Wakeup Logic */
+ if ((rem_wakeup && hsotg->hw_params.snpsid >= DWC2_CORE_REV_4_30a)) {
+ gpwrdn = dwc2_readl(hsotg, GPWRDN);
+ gpwrdn &= ~GPWRDN_PMUACTV;
+ dwc2_writel(hsotg, gpwrdn, GPWRDN);
+ udelay(10);
+ }
/* Wait for Resume time and then program HPRT again */
mdelay(100);
hprt0 &= ~HPRT0_RES;
diff --git a/drivers/usb/dwc2/hcd_ddma.c b/drivers/usb/dwc2/hcd_ddma.c
index 6b4d825e97a2d9..994a78ad084b1c 100644
--- a/drivers/usb/dwc2/hcd_ddma.c
+++ b/drivers/usb/dwc2/hcd_ddma.c
@@ -559,7 +559,7 @@ static void dwc2_init_isoc_dma_desc(struct dwc2_hsotg *hsotg,
idx = qh->td_last;
inc = qh->host_interval;
hsotg->frame_number = dwc2_hcd_get_frame_number(hsotg);
- cur_idx = dwc2_frame_list_idx(hsotg->frame_number);
+ cur_idx = idx;
next_idx = dwc2_desclist_idx_inc(qh->td_last, inc, qh->dev_speed);
/*
@@ -866,20 +866,27 @@ static int dwc2_cmpl_host_isoc_dma_desc(struct dwc2_hsotg *hsotg,
{
struct dwc2_dma_desc *dma_desc;
struct dwc2_hcd_iso_packet_desc *frame_desc;
+ u16 frame_desc_idx;
+ struct urb *usb_urb;
u16 remain = 0;
int rc = 0;
if (!qtd->urb)
return -EINVAL;
+ usb_urb = qtd->urb->priv;
+
dma_sync_single_for_cpu(hsotg->dev, qh->desc_list_dma + (idx *
sizeof(struct dwc2_dma_desc)),
sizeof(struct dwc2_dma_desc),
DMA_FROM_DEVICE);
dma_desc = &qh->desc_list[idx];
+ frame_desc_idx = (idx - qtd->isoc_td_first) & (usb_urb->number_of_packets - 1);
- frame_desc = &qtd->urb->iso_descs[qtd->isoc_frame_index_last];
+ frame_desc = &qtd->urb->iso_descs[frame_desc_idx];
+ if (idx == qtd->isoc_td_first)
+ usb_urb->start_frame = dwc2_hcd_get_frame_number(hsotg);
dma_desc->buf = (u32)(qtd->urb->dma + frame_desc->offset);
if (chan->ep_is_in)
remain = (dma_desc->status & HOST_DMA_ISOC_NBYTES_MASK) >>
@@ -900,7 +907,7 @@ static int dwc2_cmpl_host_isoc_dma_desc(struct dwc2_hsotg *hsotg,
frame_desc->status = 0;
}
- if (++qtd->isoc_frame_index == qtd->urb->packet_count) {
+ if (++qtd->isoc_frame_index == usb_urb->number_of_packets) {
/*
* urb->status is not used for isoc transfers here. The
* individual frame_desc status are used instead.
@@ -1005,11 +1012,11 @@ static void dwc2_complete_isoc_xfer_ddma(struct dwc2_hsotg *hsotg,
return;
idx = dwc2_desclist_idx_inc(idx, qh->host_interval,
chan->speed);
- if (!rc)
+ if (rc == 0)
continue;
- if (rc == DWC2_CMPL_DONE)
- break;
+ if (rc == DWC2_CMPL_DONE || rc == DWC2_CMPL_STOP)
+ goto stop_scan;
/* rc == DWC2_CMPL_STOP */
diff --git a/drivers/usb/dwc2/hw.h b/drivers/usb/dwc2/hw.h
index 13abdd5f675299..12f8c7f86dc980 100644
--- a/drivers/usb/dwc2/hw.h
+++ b/drivers/usb/dwc2/hw.h
@@ -698,7 +698,7 @@
#define TXSTS_QTOP_TOKEN_MASK (0x3 << 25)
#define TXSTS_QTOP_TOKEN_SHIFT 25
#define TXSTS_QTOP_TERMINATE BIT(24)
-#define TXSTS_QSPCAVAIL_MASK (0xff << 16)
+#define TXSTS_QSPCAVAIL_MASK (0x7f << 16)
#define TXSTS_QSPCAVAIL_SHIFT 16
#define TXSTS_FSPCAVAIL_MASK (0xffff << 0)
#define TXSTS_FSPCAVAIL_SHIFT 0
diff --git a/drivers/usb/dwc2/platform.c b/drivers/usb/dwc2/platform.c
index b1d48019e944f3..7b84416dfc2b11 100644
--- a/drivers/usb/dwc2/platform.c
+++ b/drivers/usb/dwc2/platform.c
@@ -331,7 +331,7 @@ static void dwc2_driver_remove(struct platform_device *dev)
/* Exit clock gating when driver is removed. */
if (hsotg->params.power_down == DWC2_POWER_DOWN_PARAM_NONE &&
- hsotg->bus_suspended) {
+ hsotg->bus_suspended && !hsotg->params.no_clock_gating) {
if (dwc2_is_device_mode(hsotg))
dwc2_gadget_exit_clock_gating(hsotg, 0);
else
diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
index 3e55838c000144..100041320e8dd2 100644
--- a/drivers/usb/dwc3/core.c
+++ b/drivers/usb/dwc3/core.c
@@ -104,6 +104,27 @@ static int dwc3_get_dr_mode(struct dwc3 *dwc)
return 0;
}
+void dwc3_enable_susphy(struct dwc3 *dwc, bool enable)
+{
+ u32 reg;
+
+ reg = dwc3_readl(dwc->regs, DWC3_GUSB3PIPECTL(0));
+ if (enable && !dwc->dis_u3_susphy_quirk)
+ reg |= DWC3_GUSB3PIPECTL_SUSPHY;
+ else
+ reg &= ~DWC3_GUSB3PIPECTL_SUSPHY;
+
+ dwc3_writel(dwc->regs, DWC3_GUSB3PIPECTL(0), reg);
+
+ reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0));
+ if (enable && !dwc->dis_u2_susphy_quirk)
+ reg |= DWC3_GUSB2PHYCFG_SUSPHY;
+ else
+ reg &= ~DWC3_GUSB2PHYCFG_SUSPHY;
+
+ dwc3_writel(dwc->regs, DWC3_GUSB2PHYCFG(0), reg);
+}
+
void dwc3_set_prtcap(struct dwc3 *dwc, u32 mode)
{
u32 reg;
@@ -585,11 +606,8 @@ static int dwc3_core_ulpi_init(struct dwc3 *dwc)
*/
static int dwc3_phy_setup(struct dwc3 *dwc)
{
- unsigned int hw_mode;
u32 reg;
- hw_mode = DWC3_GHWPARAMS0_MODE(dwc->hwparams.hwparams0);
-
reg = dwc3_readl(dwc->regs, DWC3_GUSB3PIPECTL(0));
/*
@@ -599,21 +617,16 @@ static int dwc3_phy_setup(struct dwc3 *dwc)
reg &= ~DWC3_GUSB3PIPECTL_UX_EXIT_PX;
/*
- * Above 1.94a, it is recommended to set DWC3_GUSB3PIPECTL_SUSPHY
- * to '0' during coreConsultant configuration. So default value
- * will be '0' when the core is reset. Application needs to set it
- * to '1' after the core initialization is completed.
- */
- if (!DWC3_VER_IS_WITHIN(DWC3, ANY, 194A))
- reg |= DWC3_GUSB3PIPECTL_SUSPHY;
-
- /*
- * For DRD controllers, GUSB3PIPECTL.SUSPENDENABLE must be cleared after
- * power-on reset, and it can be set after core initialization, which is
- * after device soft-reset during initialization.
+ * Above DWC_usb3.0 1.94a, it is recommended to set
+ * DWC3_GUSB3PIPECTL_SUSPHY to '0' during coreConsultant configuration.
+ * So default value will be '0' when the core is reset. Application
+ * needs to set it to '1' after the core initialization is completed.
+ *
+ * Similarly for DRD controllers, GUSB3PIPECTL.SUSPENDENABLE must be
+ * cleared after power-on reset, and it can be set after core
+ * initialization.
*/
- if (hw_mode == DWC3_GHWPARAMS0_MODE_DRD)
- reg &= ~DWC3_GUSB3PIPECTL_SUSPHY;
+ reg &= ~DWC3_GUSB3PIPECTL_SUSPHY;
if (dwc->u2ss_inp3_quirk)
reg |= DWC3_GUSB3PIPECTL_U2SSINP3OK;
@@ -639,9 +652,6 @@ static int dwc3_phy_setup(struct dwc3 *dwc)
if (dwc->tx_de_emphasis_quirk)
reg |= DWC3_GUSB3PIPECTL_TX_DEEPH(dwc->tx_de_emphasis);
- if (dwc->dis_u3_susphy_quirk)
- reg &= ~DWC3_GUSB3PIPECTL_SUSPHY;
-
if (dwc->dis_del_phy_power_chg_quirk)
reg &= ~DWC3_GUSB3PIPECTL_DEPOCHANGE;
@@ -689,24 +699,15 @@ static int dwc3_phy_setup(struct dwc3 *dwc)
}
/*
- * Above 1.94a, it is recommended to set DWC3_GUSB2PHYCFG_SUSPHY to
- * '0' during coreConsultant configuration. So default value will
- * be '0' when the core is reset. Application needs to set it to
- * '1' after the core initialization is completed.
- */
- if (!DWC3_VER_IS_WITHIN(DWC3, ANY, 194A))
- reg |= DWC3_GUSB2PHYCFG_SUSPHY;
-
- /*
- * For DRD controllers, GUSB2PHYCFG.SUSPHY must be cleared after
- * power-on reset, and it can be set after core initialization, which is
- * after device soft-reset during initialization.
+ * Above DWC_usb3.0 1.94a, it is recommended to set
+ * DWC3_GUSB2PHYCFG_SUSPHY to '0' during coreConsultant configuration.
+ * So default value will be '0' when the core is reset. Application
+ * needs to set it to '1' after the core initialization is completed.
+ *
+ * Similarly for DRD controllers, GUSB2PHYCFG.SUSPHY must be cleared
+ * after power-on reset, and it can be set after core initialization.
*/
- if (hw_mode == DWC3_GHWPARAMS0_MODE_DRD)
- reg &= ~DWC3_GUSB2PHYCFG_SUSPHY;
-
- if (dwc->dis_u2_susphy_quirk)
- reg &= ~DWC3_GUSB2PHYCFG_SUSPHY;
+ reg &= ~DWC3_GUSB2PHYCFG_SUSPHY;
if (dwc->dis_enblslpm_quirk)
reg &= ~DWC3_GUSB2PHYCFG_ENBLSLPM;
@@ -1227,21 +1228,6 @@ static int dwc3_core_init(struct dwc3 *dwc)
if (ret)
goto err_exit_phy;
- if (hw_mode == DWC3_GHWPARAMS0_MODE_DRD &&
- !DWC3_VER_IS_WITHIN(DWC3, ANY, 194A)) {
- if (!dwc->dis_u3_susphy_quirk) {
- reg = dwc3_readl(dwc->regs, DWC3_GUSB3PIPECTL(0));
- reg |= DWC3_GUSB3PIPECTL_SUSPHY;
- dwc3_writel(dwc->regs, DWC3_GUSB3PIPECTL(0), reg);
- }
-
- if (!dwc->dis_u2_susphy_quirk) {
- reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0));
- reg |= DWC3_GUSB2PHYCFG_SUSPHY;
- dwc3_writel(dwc->regs, DWC3_GUSB2PHYCFG(0), reg);
- }
- }
-
dwc3_core_setup_global_control(dwc);
dwc3_core_num_eps(dwc);
@@ -1519,6 +1505,8 @@ static void dwc3_get_properties(struct dwc3 *dwc)
else
dwc->sysdev = dwc->dev;
+ dwc->sys_wakeup = device_may_wakeup(dwc->sysdev);
+
ret = device_property_read_string(dev, "usb-psy-name", &usb_psy_name);
if (ret >= 0) {
dwc->usb_psy = power_supply_get_by_name(usb_psy_name);
diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h
index c07edfc954f72e..180dd8d29287c6 100644
--- a/drivers/usb/dwc3/core.h
+++ b/drivers/usb/dwc3/core.h
@@ -1133,6 +1133,7 @@ struct dwc3_scratchpad_array {
* 3 - Reserved
* @dis_metastability_quirk: set to disable metastability quirk.
* @dis_split_quirk: set to disable split boundary.
+ * @sys_wakeup: set if the device may do system wakeup.
* @wakeup_configured: set if the device is configured for remote wakeup.
* @suspended: set to track suspend event due to U3/L2.
* @imod_interval: set the interrupt moderation interval in 250ns
@@ -1357,6 +1358,7 @@ struct dwc3 {
unsigned dis_split_quirk:1;
unsigned async_callbacks:1;
+ unsigned sys_wakeup:1;
unsigned wakeup_configured:1;
unsigned suspended:1;
@@ -1578,6 +1580,7 @@ int dwc3_event_buffers_setup(struct dwc3 *dwc);
void dwc3_event_buffers_cleanup(struct dwc3 *dwc);
int dwc3_core_soft_reset(struct dwc3 *dwc);
+void dwc3_enable_susphy(struct dwc3 *dwc, bool enable);
#if IS_ENABLED(CONFIG_USB_DWC3_HOST) || IS_ENABLED(CONFIG_USB_DWC3_DUAL_ROLE)
int dwc3_host_init(struct dwc3 *dwc);
diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c
index 39564e17f3b07a..497deed38c0c1e 100644
--- a/drivers/usb/dwc3/dwc3-pci.c
+++ b/drivers/usb/dwc3/dwc3-pci.c
@@ -51,7 +51,6 @@
#define PCI_DEVICE_ID_INTEL_MTLP 0x7ec1
#define PCI_DEVICE_ID_INTEL_MTLS 0x7f6f
#define PCI_DEVICE_ID_INTEL_MTL 0x7e7e
-#define PCI_DEVICE_ID_INTEL_ARLH 0x7ec1
#define PCI_DEVICE_ID_INTEL_ARLH_PCH 0x777e
#define PCI_DEVICE_ID_INTEL_TGL 0x9a15
#define PCI_DEVICE_ID_AMD_MR 0x163a
@@ -423,7 +422,6 @@ static const struct pci_device_id dwc3_pci_id_table[] = {
{ PCI_DEVICE_DATA(INTEL, MTLP, &dwc3_pci_intel_swnode) },
{ PCI_DEVICE_DATA(INTEL, MTL, &dwc3_pci_intel_swnode) },
{ PCI_DEVICE_DATA(INTEL, MTLS, &dwc3_pci_intel_swnode) },
- { PCI_DEVICE_DATA(INTEL, ARLH, &dwc3_pci_intel_swnode) },
{ PCI_DEVICE_DATA(INTEL, ARLH_PCH, &dwc3_pci_intel_swnode) },
{ PCI_DEVICE_DATA(INTEL, TGL, &dwc3_pci_intel_swnode) },
diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c
index 72bb722da2f258..d96ffbe520397a 100644
--- a/drivers/usb/dwc3/ep0.c
+++ b/drivers/usb/dwc3/ep0.c
@@ -226,7 +226,8 @@ void dwc3_ep0_stall_and_restart(struct dwc3 *dwc)
/* reinitialize physical ep1 */
dep = dwc->eps[1];
- dep->flags = DWC3_EP_ENABLED;
+ dep->flags &= DWC3_EP_RESOURCE_ALLOCATED;
+ dep->flags |= DWC3_EP_ENABLED;
/* stall is always issued on EP0 */
dep = dwc->eps[0];
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 40c52dbc28d3b4..f94f68f1e7d2b7 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -2924,6 +2924,7 @@ static int __dwc3_gadget_start(struct dwc3 *dwc)
dwc3_ep0_out_start(dwc);
dwc3_gadget_enable_irq(dwc);
+ dwc3_enable_susphy(dwc, true);
return 0;
@@ -2955,6 +2956,9 @@ static int dwc3_gadget_start(struct usb_gadget *g,
dwc->gadget_driver = driver;
spin_unlock_irqrestore(&dwc->lock, flags);
+ if (dwc->sys_wakeup)
+ device_wakeup_enable(dwc->sysdev);
+
return 0;
}
@@ -2970,6 +2974,9 @@ static int dwc3_gadget_stop(struct usb_gadget *g)
struct dwc3 *dwc = gadget_to_dwc(g);
unsigned long flags;
+ if (dwc->sys_wakeup)
+ device_wakeup_disable(dwc->sysdev);
+
spin_lock_irqsave(&dwc->lock, flags);
dwc->gadget_driver = NULL;
dwc->max_cfg_eps = 0;
@@ -4651,6 +4658,10 @@ int dwc3_gadget_init(struct dwc3 *dwc)
else
dwc3_gadget_set_speed(dwc->gadget, dwc->maximum_speed);
+ /* No system wakeup if no gadget driver bound */
+ if (dwc->sys_wakeup)
+ device_wakeup_disable(dwc->sysdev);
+
return 0;
err5:
@@ -4680,6 +4691,7 @@ void dwc3_gadget_exit(struct dwc3 *dwc)
if (!dwc->gadget)
return;
+ dwc3_enable_susphy(dwc, false);
usb_del_gadget(dwc->gadget);
dwc3_gadget_free_endpoints(dwc);
usb_put_gadget(dwc->gadget);
diff --git a/drivers/usb/dwc3/host.c b/drivers/usb/dwc3/host.c
index 5a5cb6ce9946d3..a171b27a7845af 100644
--- a/drivers/usb/dwc3/host.c
+++ b/drivers/usb/dwc3/host.c
@@ -10,10 +10,13 @@
#include <linux/irq.h>
#include <linux/of.h>
#include <linux/platform_device.h>
+#include <linux/usb.h>
+#include <linux/usb/hcd.h>
#include "../host/xhci-port.h"
#include "../host/xhci-ext-caps.h"
#include "../host/xhci-caps.h"
+#include "../host/xhci-plat.h"
#include "core.h"
#define XHCI_HCSPARAMS1 0x4
@@ -57,6 +60,24 @@ static void dwc3_power_off_all_roothub_ports(struct dwc3 *dwc)
}
}
+static void dwc3_xhci_plat_start(struct usb_hcd *hcd)
+{
+ struct platform_device *pdev;
+ struct dwc3 *dwc;
+
+ if (!usb_hcd_is_primary_hcd(hcd))
+ return;
+
+ pdev = to_platform_device(hcd->self.controller);
+ dwc = dev_get_drvdata(pdev->dev.parent);
+
+ dwc3_enable_susphy(dwc, true);
+}
+
+static const struct xhci_plat_priv dwc3_xhci_plat_quirk = {
+ .plat_start = dwc3_xhci_plat_start,
+};
+
static void dwc3_host_fill_xhci_irq_res(struct dwc3 *dwc,
int irq, char *name)
{
@@ -167,12 +188,25 @@ int dwc3_host_init(struct dwc3 *dwc)
}
}
+ ret = platform_device_add_data(xhci, &dwc3_xhci_plat_quirk,
+ sizeof(struct xhci_plat_priv));
+ if (ret)
+ goto err;
+
ret = platform_device_add(xhci);
if (ret) {
dev_err(dwc->dev, "failed to register xHCI device\n");
goto err;
}
+ if (dwc->sys_wakeup) {
+ /* Restore wakeup setting if switched from device */
+ device_wakeup_enable(dwc->sysdev);
+
+ /* Pass on wakeup setting to the new xhci platform device */
+ device_init_wakeup(&xhci->dev, true);
+ }
+
return 0;
err:
platform_device_put(xhci);
@@ -181,6 +215,10 @@ err:
void dwc3_host_exit(struct dwc3 *dwc)
{
+ if (dwc->sys_wakeup)
+ device_init_wakeup(&dwc->xhci->dev, false);
+
+ dwc3_enable_susphy(dwc, false);
platform_device_unregister(dwc->xhci);
dwc->xhci = NULL;
}
diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c
index 0ace45b66a31c4..0e151b54aae82a 100644
--- a/drivers/usb/gadget/composite.c
+++ b/drivers/usb/gadget/composite.c
@@ -2112,7 +2112,7 @@ unknown:
buf[5] = 0x01;
switch (ctrl->bRequestType & USB_RECIP_MASK) {
case USB_RECIP_DEVICE:
- if (w_index != 0x4 || (w_value >> 8))
+ if (w_index != 0x4 || (w_value & 0xff))
break;
buf[6] = w_index;
/* Number of ext compat interfaces */
@@ -2128,9 +2128,9 @@ unknown:
}
break;
case USB_RECIP_INTERFACE:
- if (w_index != 0x5 || (w_value >> 8))
+ if (w_index != 0x5 || (w_value & 0xff))
break;
- interface = w_value & 0xFF;
+ interface = w_value >> 8;
if (interface >= MAX_CONFIG_INTERFACES ||
!os_desc_cfg->interface[interface])
break;
diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
index bffbc1dc651f9e..a057cbedf3c9b3 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -46,6 +46,8 @@
#define FUNCTIONFS_MAGIC 0xa647361 /* Chosen by a honest dice roll ;) */
+#define DMABUF_ENQUEUE_TIMEOUT_MS 5000
+
MODULE_IMPORT_NS(DMA_BUF);
/* Reference counter handling */
@@ -850,6 +852,7 @@ static void ffs_user_copy_worker(struct work_struct *work)
work);
int ret = io_data->status;
bool kiocb_has_eventfd = io_data->kiocb->ki_flags & IOCB_EVENTFD;
+ unsigned long flags;
if (io_data->read && ret > 0) {
kthread_use_mm(io_data->mm);
@@ -862,6 +865,11 @@ static void ffs_user_copy_worker(struct work_struct *work)
if (io_data->ffs->ffs_eventfd && !kiocb_has_eventfd)
eventfd_signal(io_data->ffs->ffs_eventfd);
+ spin_lock_irqsave(&io_data->ffs->eps_lock, flags);
+ usb_ep_free_request(io_data->ep, io_data->req);
+ io_data->req = NULL;
+ spin_unlock_irqrestore(&io_data->ffs->eps_lock, flags);
+
if (io_data->read)
kfree(io_data->to_free);
ffs_free_buffer(io_data);
@@ -875,7 +883,6 @@ static void ffs_epfile_async_io_complete(struct usb_ep *_ep,
struct ffs_data *ffs = io_data->ffs;
io_data->status = req->status ? req->status : req->actual;
- usb_ep_free_request(_ep, req);
INIT_WORK(&io_data->work, ffs_user_copy_worker);
queue_work(ffs->io_completion_wq, &io_data->work);
@@ -1578,10 +1585,13 @@ static int ffs_dmabuf_transfer(struct file *file,
struct ffs_dmabuf_priv *priv;
struct ffs_dma_fence *fence;
struct usb_request *usb_req;
+ enum dma_resv_usage resv_dir;
struct dma_buf *dmabuf;
+ unsigned long timeout;
struct ffs_ep *ep;
bool cookie;
u32 seqno;
+ long retl;
int ret;
if (req->flags & ~USB_FFS_DMABUF_TRANSFER_MASK)
@@ -1615,17 +1625,14 @@ static int ffs_dmabuf_transfer(struct file *file,
goto err_attachment_put;
/* Make sure we don't have writers */
- if (!dma_resv_test_signaled(dmabuf->resv, DMA_RESV_USAGE_WRITE)) {
- pr_vdebug("FFS WRITE fence is not signaled\n");
- ret = -EBUSY;
- goto err_resv_unlock;
- }
-
- /* If we're writing to the DMABUF, make sure we don't have readers */
- if (epfile->in &&
- !dma_resv_test_signaled(dmabuf->resv, DMA_RESV_USAGE_READ)) {
- pr_vdebug("FFS READ fence is not signaled\n");
- ret = -EBUSY;
+ timeout = nonblock ? 0 : msecs_to_jiffies(DMABUF_ENQUEUE_TIMEOUT_MS);
+ retl = dma_resv_wait_timeout(dmabuf->resv,
+ dma_resv_usage_rw(epfile->in),
+ true, timeout);
+ if (retl == 0)
+ retl = -EBUSY;
+ if (retl < 0) {
+ ret = (int)retl;
goto err_resv_unlock;
}
@@ -1665,8 +1672,9 @@ static int ffs_dmabuf_transfer(struct file *file,
dma_fence_init(&fence->base, &ffs_dmabuf_fence_ops,
&priv->lock, priv->context, seqno);
- dma_resv_add_fence(dmabuf->resv, &fence->base,
- dma_resv_usage_rw(epfile->in));
+ resv_dir = epfile->in ? DMA_RESV_USAGE_WRITE : DMA_RESV_USAGE_READ;
+
+ dma_resv_add_fence(dmabuf->resv, &fence->base, resv_dir);
dma_resv_unlock(dmabuf->resv);
/* Now that the dma_fence is in place, queue the transfer. */
@@ -3803,7 +3811,7 @@ static int ffs_func_setup(struct usb_function *f,
__ffs_event_add(ffs, FUNCTIONFS_SETUP);
spin_unlock_irqrestore(&ffs->ev.waitq.lock, flags);
- return creq->wLength == 0 ? USB_GADGET_DELAYED_STATUS : 0;
+ return ffs->ev.setup.wLength == 0 ? USB_GADGET_DELAYED_STATUS : 0;
}
static bool ffs_func_req_match(struct usb_function *f,
diff --git a/drivers/usb/gadget/function/f_ncm.c b/drivers/usb/gadget/function/f_ncm.c
index 28f4e6552e8459..0acc32ed99609f 100644
--- a/drivers/usb/gadget/function/f_ncm.c
+++ b/drivers/usb/gadget/function/f_ncm.c
@@ -878,7 +878,7 @@ static int ncm_set_alt(struct usb_function *f, unsigned intf, unsigned alt)
if (alt > 1)
goto fail;
- if (ncm->port.in_ep->enabled) {
+ if (ncm->netdev) {
DBG(cdev, "reset ncm\n");
ncm->netdev = NULL;
gether_disconnect(&ncm->port);
@@ -1367,7 +1367,7 @@ static void ncm_disable(struct usb_function *f)
DBG(cdev, "ncm deactivated\n");
- if (ncm->port.in_ep->enabled) {
+ if (ncm->netdev) {
ncm->netdev = NULL;
gether_disconnect(&ncm->port);
}
diff --git a/drivers/usb/gadget/function/uvc_configfs.c b/drivers/usb/gadget/function/uvc_configfs.c
index 7e704b2bcfd1ce..a4377df612f51e 100644
--- a/drivers/usb/gadget/function/uvc_configfs.c
+++ b/drivers/usb/gadget/function/uvc_configfs.c
@@ -92,10 +92,10 @@ static int __uvcg_iter_item_entries(const char *page, size_t len,
while (pg - page < len) {
i = 0;
- while (i < sizeof(buf) && (pg - page < len) &&
+ while (i < bufsize && (pg - page < len) &&
*pg != '\0' && *pg != '\n')
buf[i++] = *pg++;
- if (i == sizeof(buf)) {
+ if (i == bufsize) {
ret = -EINVAL;
goto out_free_buf;
}
diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c
index 9d4150124fdb82..b3a9d18a8dcd19 100644
--- a/drivers/usb/gadget/udc/core.c
+++ b/drivers/usb/gadget/udc/core.c
@@ -292,7 +292,9 @@ int usb_ep_queue(struct usb_ep *ep,
{
int ret = 0;
- if (WARN_ON_ONCE(!ep->enabled && ep->address)) {
+ if (!ep->enabled && ep->address) {
+ pr_debug("USB gadget: queue request to disabled ep 0x%x (%s)\n",
+ ep->address, ep->name);
ret = -ESHUTDOWN;
goto out;
}
diff --git a/drivers/usb/gadget/udc/fsl_udc_core.c b/drivers/usb/gadget/udc/fsl_udc_core.c
index e82d03224f940f..3432ebfae97879 100644
--- a/drivers/usb/gadget/udc/fsl_udc_core.c
+++ b/drivers/usb/gadget/udc/fsl_udc_core.c
@@ -868,7 +868,7 @@ fsl_ep_queue(struct usb_ep *_ep, struct usb_request *_req, gfp_t gfp_flags)
{
struct fsl_ep *ep = container_of(_ep, struct fsl_ep, ep);
struct fsl_req *req = container_of(_req, struct fsl_req, req);
- struct fsl_udc *udc;
+ struct fsl_udc *udc = ep->udc;
unsigned long flags;
int ret;
@@ -878,7 +878,7 @@ fsl_ep_queue(struct usb_ep *_ep, struct usb_request *_req, gfp_t gfp_flags)
dev_vdbg(&udc->gadget.dev, "%s, bad params\n", __func__);
return -EINVAL;
}
- if (unlikely(!_ep || !ep->ep.desc)) {
+ if (unlikely(!ep->ep.desc)) {
dev_vdbg(&udc->gadget.dev, "%s, bad ep\n", __func__);
return -EINVAL;
}
@@ -887,7 +887,6 @@ fsl_ep_queue(struct usb_ep *_ep, struct usb_request *_req, gfp_t gfp_flags)
return -EMSGSIZE;
}
- udc = ep->udc;
if (!udc->driver || udc->gadget.speed == USB_SPEED_UNKNOWN)
return -ESHUTDOWN;
diff --git a/drivers/usb/host/xhci-plat.h b/drivers/usb/host/xhci-plat.h
index 2d15386f2c504b..6475130eac4b38 100644
--- a/drivers/usb/host/xhci-plat.h
+++ b/drivers/usb/host/xhci-plat.h
@@ -8,7 +8,9 @@
#ifndef _XHCI_PLAT_H
#define _XHCI_PLAT_H
-#include "xhci.h" /* for hcd_to_xhci() */
+struct device;
+struct platform_device;
+struct usb_hcd;
struct xhci_plat_priv {
const char *firmware_name;
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 52278afea94be0..575f0fd9c9f11e 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -3133,7 +3133,7 @@ static int xhci_handle_events(struct xhci_hcd *xhci, struct xhci_interrupter *ir
irqreturn_t xhci_irq(struct usb_hcd *hcd)
{
struct xhci_hcd *xhci = hcd_to_xhci(hcd);
- irqreturn_t ret = IRQ_NONE;
+ irqreturn_t ret = IRQ_HANDLED;
u32 status;
spin_lock(&xhci->lock);
@@ -3141,12 +3141,13 @@ irqreturn_t xhci_irq(struct usb_hcd *hcd)
status = readl(&xhci->op_regs->status);
if (status == ~(u32)0) {
xhci_hc_died(xhci);
- ret = IRQ_HANDLED;
goto out;
}
- if (!(status & STS_EINT))
+ if (!(status & STS_EINT)) {
+ ret = IRQ_NONE;
goto out;
+ }
if (status & STS_HCE) {
xhci_warn(xhci, "WARNING: Host Controller Error\n");
@@ -3156,7 +3157,6 @@ irqreturn_t xhci_irq(struct usb_hcd *hcd)
if (status & STS_FATAL) {
xhci_warn(xhci, "WARNING: Host System Error\n");
xhci_halt(xhci);
- ret = IRQ_HANDLED;
goto out;
}
@@ -3167,7 +3167,6 @@ irqreturn_t xhci_irq(struct usb_hcd *hcd)
*/
status |= STS_EINT;
writel(status, &xhci->op_regs->status);
- ret = IRQ_HANDLED;
/* This is the handler of the primary interrupter */
xhci_handle_events(xhci, xhci->interrupters[0]);
diff --git a/drivers/usb/host/xhci-rzv2m.c b/drivers/usb/host/xhci-rzv2m.c
index ec65b24eafa868..4f59867d7117cf 100644
--- a/drivers/usb/host/xhci-rzv2m.c
+++ b/drivers/usb/host/xhci-rzv2m.c
@@ -6,6 +6,7 @@
*/
#include <linux/usb/rzv2m_usb3drd.h>
+#include "xhci.h"
#include "xhci-plat.h"
#include "xhci-rzv2m.h"
diff --git a/drivers/usb/host/xhci-trace.h b/drivers/usb/host/xhci-trace.h
index 1740000d54c295..5762564b9d7337 100644
--- a/drivers/usb/host/xhci-trace.h
+++ b/drivers/usb/host/xhci-trace.h
@@ -172,8 +172,7 @@ DECLARE_EVENT_CLASS(xhci_log_free_virt_dev,
__field(void *, vdev)
__field(unsigned long long, out_ctx)
__field(unsigned long long, in_ctx)
- __field(int, hcd_portnum)
- __field(int, hw_portnum)
+ __field(int, slot_id)
__field(u16, current_mel)
),
@@ -181,13 +180,12 @@ DECLARE_EVENT_CLASS(xhci_log_free_virt_dev,
__entry->vdev = vdev;
__entry->in_ctx = (unsigned long long) vdev->in_ctx->dma;
__entry->out_ctx = (unsigned long long) vdev->out_ctx->dma;
- __entry->hcd_portnum = (int) vdev->rhub_port->hcd_portnum;
- __entry->hw_portnum = (int) vdev->rhub_port->hw_portnum;
+ __entry->slot_id = (int) vdev->slot_id;
__entry->current_mel = (u16) vdev->current_mel;
),
- TP_printk("vdev %p ctx %llx | %llx hcd_portnum %d hw_portnum %d current_mel %d",
- __entry->vdev, __entry->in_ctx, __entry->out_ctx,
- __entry->hcd_portnum, __entry->hw_portnum, __entry->current_mel
+ TP_printk("vdev %p slot %d ctx %llx | %llx current_mel %d",
+ __entry->vdev, __entry->slot_id, __entry->in_ctx,
+ __entry->out_ctx, __entry->current_mel
)
);
diff --git a/drivers/usb/misc/onboard_usb_hub.c b/drivers/usb/misc/onboard_usb_hub.c
index c6101ed2d9d49a..d8049275a023c6 100644
--- a/drivers/usb/misc/onboard_usb_hub.c
+++ b/drivers/usb/misc/onboard_usb_hub.c
@@ -78,7 +78,7 @@ static int onboard_hub_power_on(struct onboard_hub *hub)
err = regulator_bulk_enable(hub->pdata->num_supplies, hub->supplies);
if (err) {
dev_err(hub->dev, "failed to enable supplies: %pe\n", ERR_PTR(err));
- return err;
+ goto disable_clk;
}
fsleep(hub->pdata->reset_us);
@@ -87,6 +87,10 @@ static int onboard_hub_power_on(struct onboard_hub *hub)
hub->is_powered_on = true;
return 0;
+
+disable_clk:
+ clk_disable_unprepare(hub->clk);
+ return err;
}
static int onboard_hub_power_off(struct onboard_hub *hub)
diff --git a/drivers/usb/misc/usb-ljca.c b/drivers/usb/misc/usb-ljca.c
index 35770e608c6497..2d30fc1be30669 100644
--- a/drivers/usb/misc/usb-ljca.c
+++ b/drivers/usb/misc/usb-ljca.c
@@ -518,8 +518,10 @@ static int ljca_new_client_device(struct ljca_adapter *adap, u8 type, u8 id,
int ret;
client = kzalloc(sizeof *client, GFP_KERNEL);
- if (!client)
+ if (!client) {
+ kfree(data);
return -ENOMEM;
+ }
client->type = type;
client->id = id;
@@ -535,8 +537,10 @@ static int ljca_new_client_device(struct ljca_adapter *adap, u8 type, u8 id,
auxdev->dev.release = ljca_auxdev_release;
ret = auxiliary_device_init(auxdev);
- if (ret)
+ if (ret) {
+ kfree(data);
goto err_free;
+ }
ljca_auxdev_acpi_bind(adap, auxdev, adr, id);
@@ -590,12 +594,8 @@ static int ljca_enumerate_gpio(struct ljca_adapter *adap)
valid_pin[i] = get_unaligned_le32(&desc->bank_desc[i].valid_pins);
bitmap_from_arr32(gpio_info->valid_pin_map, valid_pin, gpio_num);
- ret = ljca_new_client_device(adap, LJCA_CLIENT_GPIO, 0, "ljca-gpio",
+ return ljca_new_client_device(adap, LJCA_CLIENT_GPIO, 0, "ljca-gpio",
gpio_info, LJCA_GPIO_ACPI_ADR);
- if (ret)
- kfree(gpio_info);
-
- return ret;
}
static int ljca_enumerate_i2c(struct ljca_adapter *adap)
@@ -629,10 +629,8 @@ static int ljca_enumerate_i2c(struct ljca_adapter *adap)
ret = ljca_new_client_device(adap, LJCA_CLIENT_I2C, i,
"ljca-i2c", i2c_info,
LJCA_I2C1_ACPI_ADR + i);
- if (ret) {
- kfree(i2c_info);
+ if (ret)
return ret;
- }
}
return 0;
@@ -669,10 +667,8 @@ static int ljca_enumerate_spi(struct ljca_adapter *adap)
ret = ljca_new_client_device(adap, LJCA_CLIENT_SPI, i,
"ljca-spi", spi_info,
LJCA_SPI1_ACPI_ADR + i);
- if (ret) {
- kfree(spi_info);
+ if (ret)
return ret;
- }
}
return 0;
diff --git a/drivers/usb/phy/phy-generic.c b/drivers/usb/phy/phy-generic.c
index 8f735a86cd1972..fdcffebf415cda 100644
--- a/drivers/usb/phy/phy-generic.c
+++ b/drivers/usb/phy/phy-generic.c
@@ -262,13 +262,6 @@ int usb_phy_gen_create_phy(struct device *dev, struct usb_phy_generic *nop)
return dev_err_probe(dev, PTR_ERR(nop->vbus_draw),
"could not get vbus regulator\n");
- nop->vbus_draw = devm_regulator_get_exclusive(dev, "vbus");
- if (PTR_ERR(nop->vbus_draw) == -ENODEV)
- nop->vbus_draw = NULL;
- if (IS_ERR(nop->vbus_draw))
- return dev_err_probe(dev, PTR_ERR(nop->vbus_draw),
- "could not get vbus regulator\n");
-
nop->dev = dev;
nop->phy.dev = nop->dev;
nop->phy.label = "nop-xceiv";
diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index 55a65d941ccbfb..8a5846d4adf67e 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -255,6 +255,10 @@ static void option_instat_callback(struct urb *urb);
#define QUECTEL_PRODUCT_EM061K_LMS 0x0124
#define QUECTEL_PRODUCT_EC25 0x0125
#define QUECTEL_PRODUCT_EM060K_128 0x0128
+#define QUECTEL_PRODUCT_EM060K_129 0x0129
+#define QUECTEL_PRODUCT_EM060K_12a 0x012a
+#define QUECTEL_PRODUCT_EM060K_12b 0x012b
+#define QUECTEL_PRODUCT_EM060K_12c 0x012c
#define QUECTEL_PRODUCT_EG91 0x0191
#define QUECTEL_PRODUCT_EG95 0x0195
#define QUECTEL_PRODUCT_BG96 0x0296
@@ -1218,6 +1222,18 @@ static const struct usb_device_id option_ids[] = {
{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_128, 0xff, 0xff, 0x30) },
{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_128, 0xff, 0x00, 0x40) },
{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_128, 0xff, 0xff, 0x40) },
+ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_129, 0xff, 0xff, 0x30) },
+ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_129, 0xff, 0x00, 0x40) },
+ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_129, 0xff, 0xff, 0x40) },
+ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_12a, 0xff, 0xff, 0x30) },
+ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_12a, 0xff, 0x00, 0x40) },
+ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_12a, 0xff, 0xff, 0x40) },
+ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_12b, 0xff, 0xff, 0x30) },
+ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_12b, 0xff, 0x00, 0x40) },
+ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_12b, 0xff, 0xff, 0x40) },
+ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_12c, 0xff, 0xff, 0x30) },
+ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_12c, 0xff, 0x00, 0x40) },
+ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_12c, 0xff, 0xff, 0x40) },
{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LCN, 0xff, 0xff, 0x30) },
{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LCN, 0xff, 0x00, 0x40) },
{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LCN, 0xff, 0xff, 0x40) },
@@ -1360,6 +1376,12 @@ static const struct usb_device_id option_ids[] = {
.driver_info = NCTRL(2) | RSVD(3) },
{ USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1083, 0xff), /* Telit FE990 (ECM) */
.driver_info = NCTRL(0) | RSVD(1) },
+ { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x10a0, 0xff), /* Telit FN20C04 (rmnet) */
+ .driver_info = RSVD(0) | NCTRL(3) },
+ { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x10a4, 0xff), /* Telit FN20C04 (rmnet) */
+ .driver_info = RSVD(0) | NCTRL(3) },
+ { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x10a9, 0xff), /* Telit FN20C04 (rmnet) */
+ .driver_info = RSVD(0) | NCTRL(2) | RSVD(3) | RSVD(4) },
{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910),
.driver_info = NCTRL(0) | RSVD(1) | RSVD(3) },
{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910_DUAL_MODEM),
@@ -2052,6 +2074,10 @@ static const struct usb_device_id option_ids[] = {
.driver_info = RSVD(3) },
{ USB_DEVICE_INTERFACE_CLASS(LONGCHEER_VENDOR_ID, 0x9803, 0xff),
.driver_info = RSVD(4) },
+ { USB_DEVICE(LONGCHEER_VENDOR_ID, 0x9b05), /* Longsung U8300 */
+ .driver_info = RSVD(4) | RSVD(5) },
+ { USB_DEVICE(LONGCHEER_VENDOR_ID, 0x9b3c), /* Longsung U9300 */
+ .driver_info = RSVD(0) | RSVD(4) },
{ USB_DEVICE(LONGCHEER_VENDOR_ID, ZOOM_PRODUCT_4597) },
{ USB_DEVICE(LONGCHEER_VENDOR_ID, IBALL_3_5G_CONNECT) },
{ USB_DEVICE(HAIER_VENDOR_ID, HAIER_PRODUCT_CE100) },
@@ -2272,15 +2298,29 @@ static const struct usb_device_id option_ids[] = {
{ USB_DEVICE_AND_INTERFACE_INFO(0x2cb7, 0x010b, 0xff, 0xff, 0x30) }, /* Fibocom FG150 Diag */
{ USB_DEVICE_AND_INTERFACE_INFO(0x2cb7, 0x010b, 0xff, 0, 0) }, /* Fibocom FG150 AT */
{ USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0111, 0xff) }, /* Fibocom FM160 (MBIM mode) */
+ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0115, 0xff), /* Fibocom FM135 (laptop MBIM) */
+ .driver_info = RSVD(5) },
{ USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a0, 0xff) }, /* Fibocom NL668-AM/NL652-EU (laptop MBIM) */
{ USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a2, 0xff) }, /* Fibocom FM101-GL (laptop MBIM) */
{ USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a3, 0xff) }, /* Fibocom FM101-GL (laptop MBIM) */
{ USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a4, 0xff), /* Fibocom FM101-GL (laptop MBIM) */
.driver_info = RSVD(4) },
+ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0a04, 0xff) }, /* Fibocom FM650-CN (ECM mode) */
+ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0a05, 0xff) }, /* Fibocom FM650-CN (NCM mode) */
+ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0a06, 0xff) }, /* Fibocom FM650-CN (RNDIS mode) */
+ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0a07, 0xff) }, /* Fibocom FM650-CN (MBIM mode) */
{ USB_DEVICE_INTERFACE_CLASS(0x2df3, 0x9d03, 0xff) }, /* LongSung M5710 */
{ USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1404, 0xff) }, /* GosunCn GM500 RNDIS */
{ USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1405, 0xff) }, /* GosunCn GM500 MBIM */
{ USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1406, 0xff) }, /* GosunCn GM500 ECM/NCM */
+ { USB_DEVICE(0x33f8, 0x0104), /* Rolling RW101-GL (laptop RMNET) */
+ .driver_info = RSVD(4) | RSVD(5) },
+ { USB_DEVICE_INTERFACE_CLASS(0x33f8, 0x01a2, 0xff) }, /* Rolling RW101-GL (laptop MBIM) */
+ { USB_DEVICE_INTERFACE_CLASS(0x33f8, 0x01a3, 0xff) }, /* Rolling RW101-GL (laptop MBIM) */
+ { USB_DEVICE_INTERFACE_CLASS(0x33f8, 0x01a4, 0xff), /* Rolling RW101-GL (laptop MBIM) */
+ .driver_info = RSVD(4) },
+ { USB_DEVICE_INTERFACE_CLASS(0x33f8, 0x0115, 0xff), /* Rolling RW135-GL (laptop MBIM) */
+ .driver_info = RSVD(5) },
{ USB_DEVICE_AND_INTERFACE_INFO(OPPO_VENDOR_ID, OPPO_PRODUCT_R11, 0xff, 0xff, 0x30) },
{ USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0xff, 0x30) },
{ USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0xff, 0x40) },
diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c
index 71ace274761f18..08953f0d4532aa 100644
--- a/drivers/usb/storage/uas.c
+++ b/drivers/usb/storage/uas.c
@@ -533,7 +533,7 @@ static struct urb *uas_alloc_cmd_urb(struct uas_dev_info *devinfo, gfp_t gfp,
* daft to me.
*/
-static struct urb *uas_submit_sense_urb(struct scsi_cmnd *cmnd, gfp_t gfp)
+static int uas_submit_sense_urb(struct scsi_cmnd *cmnd, gfp_t gfp)
{
struct uas_dev_info *devinfo = cmnd->device->hostdata;
struct urb *urb;
@@ -541,30 +541,28 @@ static struct urb *uas_submit_sense_urb(struct scsi_cmnd *cmnd, gfp_t gfp)
urb = uas_alloc_sense_urb(devinfo, gfp, cmnd);
if (!urb)
- return NULL;
+ return -ENOMEM;
usb_anchor_urb(urb, &devinfo->sense_urbs);
err = usb_submit_urb(urb, gfp);
if (err) {
usb_unanchor_urb(urb);
uas_log_cmd_state(cmnd, "sense submit err", err);
usb_free_urb(urb);
- return NULL;
}
- return urb;
+ return err;
}
static int uas_submit_urbs(struct scsi_cmnd *cmnd,
struct uas_dev_info *devinfo)
{
struct uas_cmd_info *cmdinfo = scsi_cmd_priv(cmnd);
- struct urb *urb;
int err;
lockdep_assert_held(&devinfo->lock);
if (cmdinfo->state & SUBMIT_STATUS_URB) {
- urb = uas_submit_sense_urb(cmnd, GFP_ATOMIC);
- if (!urb)
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ err = uas_submit_sense_urb(cmnd, GFP_ATOMIC);
+ if (err)
+ return err;
cmdinfo->state &= ~SUBMIT_STATUS_URB;
}
@@ -572,7 +570,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd,
cmdinfo->data_in_urb = uas_alloc_data_urb(devinfo, GFP_ATOMIC,
cmnd, DMA_FROM_DEVICE);
if (!cmdinfo->data_in_urb)
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ return -ENOMEM;
cmdinfo->state &= ~ALLOC_DATA_IN_URB;
}
@@ -582,7 +580,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd,
if (err) {
usb_unanchor_urb(cmdinfo->data_in_urb);
uas_log_cmd_state(cmnd, "data in submit err", err);
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ return err;
}
cmdinfo->state &= ~SUBMIT_DATA_IN_URB;
cmdinfo->state |= DATA_IN_URB_INFLIGHT;
@@ -592,7 +590,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd,
cmdinfo->data_out_urb = uas_alloc_data_urb(devinfo, GFP_ATOMIC,
cmnd, DMA_TO_DEVICE);
if (!cmdinfo->data_out_urb)
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ return -ENOMEM;
cmdinfo->state &= ~ALLOC_DATA_OUT_URB;
}
@@ -602,7 +600,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd,
if (err) {
usb_unanchor_urb(cmdinfo->data_out_urb);
uas_log_cmd_state(cmnd, "data out submit err", err);
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ return err;
}
cmdinfo->state &= ~SUBMIT_DATA_OUT_URB;
cmdinfo->state |= DATA_OUT_URB_INFLIGHT;
@@ -611,7 +609,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd,
if (cmdinfo->state & ALLOC_CMD_URB) {
cmdinfo->cmd_urb = uas_alloc_cmd_urb(devinfo, GFP_ATOMIC, cmnd);
if (!cmdinfo->cmd_urb)
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ return -ENOMEM;
cmdinfo->state &= ~ALLOC_CMD_URB;
}
@@ -621,7 +619,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd,
if (err) {
usb_unanchor_urb(cmdinfo->cmd_urb);
uas_log_cmd_state(cmnd, "cmd submit err", err);
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ return err;
}
cmdinfo->cmd_urb = NULL;
cmdinfo->state &= ~SUBMIT_CMD_URB;
@@ -698,7 +696,7 @@ static int uas_queuecommand_lck(struct scsi_cmnd *cmnd)
* of queueing, no matter how fatal the error
*/
if (err == -ENODEV) {
- set_host_byte(cmnd, DID_ERROR);
+ set_host_byte(cmnd, DID_NO_CONNECT);
scsi_done(cmnd);
goto zombie;
}
diff --git a/drivers/usb/typec/class.c b/drivers/usb/typec/class.c
index 389c7f0b8d9358..9610e647a8d480 100644
--- a/drivers/usb/typec/class.c
+++ b/drivers/usb/typec/class.c
@@ -1310,6 +1310,7 @@ static ssize_t select_usb_power_delivery_store(struct device *dev,
{
struct typec_port *port = to_typec_port(dev);
struct usb_power_delivery *pd;
+ int ret;
if (!port->ops || !port->ops->pd_set)
return -EOPNOTSUPP;
@@ -1318,7 +1319,11 @@ static ssize_t select_usb_power_delivery_store(struct device *dev,
if (!pd)
return -EINVAL;
- return port->ops->pd_set(port, pd);
+ ret = port->ops->pd_set(port, pd);
+ if (ret)
+ return ret;
+
+ return size;
}
static ssize_t select_usb_power_delivery_show(struct device *dev,
diff --git a/drivers/usb/typec/mux/it5205.c b/drivers/usb/typec/mux/it5205.c
index 5535932e42cdee..4357cc67a86722 100644
--- a/drivers/usb/typec/mux/it5205.c
+++ b/drivers/usb/typec/mux/it5205.c
@@ -22,7 +22,7 @@
#include <linux/usb/typec_mux.h>
#define IT5205_REG_CHIP_ID(x) (0x4 + (x))
-#define IT5205FN_CHIP_ID 0x35323035 /* "5205" */
+#define IT5205FN_CHIP_ID 0x35303235 /* "5025" -> "5205" */
/* MUX power down register */
#define IT5205_REG_MUXPDR 0x10
diff --git a/drivers/usb/typec/tcpm/qcom/qcom_pmic_typec.c b/drivers/usb/typec/tcpm/qcom/qcom_pmic_typec.c
index e48412cdcb0fb5..d3958c061a972c 100644
--- a/drivers/usb/typec/tcpm/qcom/qcom_pmic_typec.c
+++ b/drivers/usb/typec/tcpm/qcom/qcom_pmic_typec.c
@@ -104,14 +104,18 @@ static int qcom_pmic_typec_probe(struct platform_device *pdev)
ret = tcpm->port_start(tcpm, tcpm->tcpm_port);
if (ret)
- goto fwnode_remove;
+ goto port_unregister;
ret = tcpm->pdphy_start(tcpm, tcpm->tcpm_port);
if (ret)
- goto fwnode_remove;
+ goto port_stop;
return 0;
+port_stop:
+ tcpm->port_stop(tcpm);
+port_unregister:
+ tcpm_unregister_port(tcpm->tcpm_port);
fwnode_remove:
fwnode_remove_software_node(tcpm->tcpc.fwnode);
diff --git a/drivers/usb/typec/tcpm/qcom/qcom_pmic_typec_pdphy.c b/drivers/usb/typec/tcpm/qcom/qcom_pmic_typec_pdphy.c
index 6560f4fc98d5a3..5b7f52b74a40aa 100644
--- a/drivers/usb/typec/tcpm/qcom/qcom_pmic_typec_pdphy.c
+++ b/drivers/usb/typec/tcpm/qcom/qcom_pmic_typec_pdphy.c
@@ -475,10 +475,8 @@ static int qcom_pmic_typec_pdphy_enable(struct pmic_typec_pdphy *pmic_typec_pdph
qcom_pmic_typec_pdphy_reset_off(pmic_typec_pdphy);
done:
- if (ret) {
- regulator_disable(pmic_typec_pdphy->vdd_pdphy);
+ if (ret)
dev_err(dev, "pdphy_enable fail %d\n", ret);
- }
return ret;
}
@@ -524,12 +522,17 @@ static int qcom_pmic_typec_pdphy_start(struct pmic_typec *tcpm,
ret = pmic_typec_pdphy_reset(pmic_typec_pdphy);
if (ret)
- return ret;
+ goto err_disable_vdd_pdhy;
for (i = 0; i < pmic_typec_pdphy->nr_irqs; i++)
enable_irq(pmic_typec_pdphy->irq_data[i].irq);
return 0;
+
+err_disable_vdd_pdhy:
+ regulator_disable(pmic_typec_pdphy->vdd_pdphy);
+
+ return ret;
}
static void qcom_pmic_typec_pdphy_stop(struct pmic_typec *tcpm)
diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c
index ae2b6c94482d5f..ab6ed6111ed05c 100644
--- a/drivers/usb/typec/tcpm/tcpm.c
+++ b/drivers/usb/typec/tcpm/tcpm.c
@@ -6855,14 +6855,14 @@ static int tcpm_pd_set(struct typec_port *p, struct usb_power_delivery *pd)
if (data->sink_desc.pdo[0]) {
for (i = 0; i < PDO_MAX_OBJECTS && data->sink_desc.pdo[i]; i++)
port->snk_pdo[i] = data->sink_desc.pdo[i];
- port->nr_snk_pdo = i + 1;
+ port->nr_snk_pdo = i;
port->operating_snk_mw = data->operating_snk_mw;
}
if (data->source_desc.pdo[0]) {
for (i = 0; i < PDO_MAX_OBJECTS && data->source_desc.pdo[i]; i++)
- port->snk_pdo[i] = data->source_desc.pdo[i];
- port->nr_src_pdo = i + 1;
+ port->src_pdo[i] = data->source_desc.pdo[i];
+ port->nr_src_pdo = i;
}
switch (port->state) {
@@ -6910,7 +6910,9 @@ static int tcpm_pd_set(struct typec_port *p, struct usb_power_delivery *pd)
port->port_source_caps = data->source_cap;
port->port_sink_caps = data->sink_cap;
+ typec_port_set_usb_power_delivery(p, NULL);
port->selected_pd = pd;
+ typec_port_set_usb_power_delivery(p, port->selected_pd);
unlock:
mutex_unlock(&port->lock);
return ret;
@@ -6943,9 +6945,7 @@ static void tcpm_port_unregister_pd(struct tcpm_port *port)
port->port_source_caps = NULL;
for (i = 0; i < port->pd_count; i++) {
usb_power_delivery_unregister_capabilities(port->pd_list[i]->sink_cap);
- kfree(port->pd_list[i]->sink_cap);
usb_power_delivery_unregister_capabilities(port->pd_list[i]->source_cap);
- kfree(port->pd_list[i]->source_cap);
devm_kfree(port->dev, port->pd_list[i]);
port->pd_list[i] = NULL;
usb_power_delivery_unregister(port->pds[i]);
diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c
index cf52cb34d28592..bd6ae92aa39e7a 100644
--- a/drivers/usb/typec/ucsi/ucsi.c
+++ b/drivers/usb/typec/ucsi/ucsi.c
@@ -151,8 +151,12 @@ static int ucsi_exec_command(struct ucsi *ucsi, u64 cmd)
if (!(cci & UCSI_CCI_COMMAND_COMPLETE))
return -EIO;
- if (cci & UCSI_CCI_NOT_SUPPORTED)
+ if (cci & UCSI_CCI_NOT_SUPPORTED) {
+ if (ucsi_acknowledge_command(ucsi) < 0)
+ dev_err(ucsi->dev,
+ "ACK of unsupported command failed\n");
return -EOPNOTSUPP;
+ }
if (cci & UCSI_CCI_ERROR) {
if (cmd == UCSI_GET_ERROR_STATUS)
@@ -1133,17 +1137,21 @@ static int ucsi_check_cable(struct ucsi_connector *con)
if (ret < 0)
return ret;
- ret = ucsi_get_cable_identity(con);
- if (ret < 0)
- return ret;
+ if (con->ucsi->cap.features & UCSI_CAP_GET_PD_MESSAGE) {
+ ret = ucsi_get_cable_identity(con);
+ if (ret < 0)
+ return ret;
+ }
- ret = ucsi_register_plug(con);
- if (ret < 0)
- return ret;
+ if (con->ucsi->cap.features & UCSI_CAP_ALT_MODE_DETAILS) {
+ ret = ucsi_register_plug(con);
+ if (ret < 0)
+ return ret;
- ret = ucsi_register_altmodes(con, UCSI_RECIPIENT_SOP_P);
- if (ret < 0)
- return ret;
+ ret = ucsi_register_altmodes(con, UCSI_RECIPIENT_SOP_P);
+ if (ret < 0)
+ return ret;
+ }
return 0;
}
@@ -1189,8 +1197,10 @@ static void ucsi_handle_connector_change(struct work_struct *work)
ucsi_register_partner(con);
ucsi_partner_task(con, ucsi_check_connection, 1, HZ);
ucsi_partner_task(con, ucsi_check_connector_capability, 1, HZ);
- ucsi_partner_task(con, ucsi_get_partner_identity, 1, HZ);
- ucsi_partner_task(con, ucsi_check_cable, 1, HZ);
+ if (con->ucsi->cap.features & UCSI_CAP_GET_PD_MESSAGE)
+ ucsi_partner_task(con, ucsi_get_partner_identity, 1, HZ);
+ if (con->ucsi->cap.features & UCSI_CAP_CABLE_DETAILS)
+ ucsi_partner_task(con, ucsi_check_cable, 1, HZ);
if (UCSI_CONSTAT_PWR_OPMODE(con->status.flags) ==
UCSI_CONSTAT_PWR_OPMODE_PD)
@@ -1215,11 +1225,11 @@ static void ucsi_handle_connector_change(struct work_struct *work)
if (con->status.change & UCSI_CONSTAT_CAM_CHANGE)
ucsi_partner_task(con, ucsi_check_altmodes, 1, 0);
- clear_bit(EVENT_PENDING, &con->ucsi->flags);
-
mutex_lock(&ucsi->ppm_lock);
+ clear_bit(EVENT_PENDING, &con->ucsi->flags);
ret = ucsi_acknowledge_connector_change(ucsi);
mutex_unlock(&ucsi->ppm_lock);
+
if (ret)
dev_err(ucsi->dev, "%s: ACK failed (%d)", __func__, ret);
@@ -1237,7 +1247,7 @@ void ucsi_connector_change(struct ucsi *ucsi, u8 num)
struct ucsi_connector *con = &ucsi->connector[num - 1];
if (!(ucsi->ntfy & UCSI_ENABLE_NTFY_CONNECTOR_CHANGE)) {
- dev_dbg(ucsi->dev, "Bogus connector change event\n");
+ dev_dbg(ucsi->dev, "Early connector change event\n");
return;
}
@@ -1260,13 +1270,47 @@ static int ucsi_reset_connector(struct ucsi_connector *con, bool hard)
static int ucsi_reset_ppm(struct ucsi *ucsi)
{
- u64 command = UCSI_PPM_RESET;
+ u64 command;
unsigned long tmo;
u32 cci;
int ret;
mutex_lock(&ucsi->ppm_lock);
+ ret = ucsi->ops->read(ucsi, UCSI_CCI, &cci, sizeof(cci));
+ if (ret < 0)
+ goto out;
+
+ /*
+ * If UCSI_CCI_RESET_COMPLETE is already set we must clear
+ * the flag before we start another reset. Send a
+ * UCSI_SET_NOTIFICATION_ENABLE command to achieve this.
+ * Ignore a timeout and try the reset anyway if this fails.
+ */
+ if (cci & UCSI_CCI_RESET_COMPLETE) {
+ command = UCSI_SET_NOTIFICATION_ENABLE;
+ ret = ucsi->ops->async_write(ucsi, UCSI_CONTROL, &command,
+ sizeof(command));
+ if (ret < 0)
+ goto out;
+
+ tmo = jiffies + msecs_to_jiffies(UCSI_TIMEOUT_MS);
+ do {
+ ret = ucsi->ops->read(ucsi, UCSI_CCI,
+ &cci, sizeof(cci));
+ if (ret < 0)
+ goto out;
+ if (cci & UCSI_CCI_COMMAND_COMPLETE)
+ break;
+ if (time_is_before_jiffies(tmo))
+ break;
+ msleep(20);
+ } while (1);
+
+ WARN_ON(cci & UCSI_CCI_RESET_COMPLETE);
+ }
+
+ command = UCSI_PPM_RESET;
ret = ucsi->ops->async_write(ucsi, UCSI_CONTROL, &command,
sizeof(command));
if (ret < 0)
@@ -1589,8 +1633,10 @@ static int ucsi_register_port(struct ucsi *ucsi, struct ucsi_connector *con)
ucsi_register_partner(con);
ucsi_pwr_opmode_change(con);
ucsi_port_psy_changed(con);
- ucsi_get_partner_identity(con);
- ucsi_check_cable(con);
+ if (con->ucsi->cap.features & UCSI_CAP_GET_PD_MESSAGE)
+ ucsi_get_partner_identity(con);
+ if (con->ucsi->cap.features & UCSI_CAP_CABLE_DETAILS)
+ ucsi_check_cable(con);
}
/* Only notify USB controller if partner supports USB data */
@@ -1636,6 +1682,7 @@ static int ucsi_init(struct ucsi *ucsi)
{
struct ucsi_connector *con, *connector;
u64 command, ntfy;
+ u32 cci;
int ret;
int i;
@@ -1688,6 +1735,15 @@ static int ucsi_init(struct ucsi *ucsi)
ucsi->connector = connector;
ucsi->ntfy = ntfy;
+
+ mutex_lock(&ucsi->ppm_lock);
+ ret = ucsi->ops->read(ucsi, UCSI_CCI, &cci, sizeof(cci));
+ mutex_unlock(&ucsi->ppm_lock);
+ if (ret)
+ return ret;
+ if (UCSI_CCI_CONNECTOR(cci))
+ ucsi_connector_change(ucsi, UCSI_CCI_CONNECTOR(cci));
+
return 0;
err_unregister:
diff --git a/drivers/usb/typec/ucsi/ucsi.h b/drivers/usb/typec/ucsi/ucsi.h
index 32daf5f5865053..0e7c92eb1b2278 100644
--- a/drivers/usb/typec/ucsi/ucsi.h
+++ b/drivers/usb/typec/ucsi/ucsi.h
@@ -206,7 +206,7 @@ struct ucsi_capability {
#define UCSI_CAP_ATTR_POWER_OTHER BIT(10)
#define UCSI_CAP_ATTR_POWER_VBUS BIT(14)
u8 num_connectors;
- u8 features;
+ u16 features;
#define UCSI_CAP_SET_UOM BIT(0)
#define UCSI_CAP_SET_PDM BIT(1)
#define UCSI_CAP_ALT_MODE_DETAILS BIT(2)
@@ -215,7 +215,8 @@ struct ucsi_capability {
#define UCSI_CAP_CABLE_DETAILS BIT(5)
#define UCSI_CAP_EXT_SUPPLY_NOTIFICATIONS BIT(6)
#define UCSI_CAP_PD_RESET BIT(7)
- u16 reserved_1;
+#define UCSI_CAP_GET_PD_MESSAGE BIT(8)
+ u8 reserved_1;
u8 num_alt_modes;
u8 reserved_2;
u16 bc_version;
diff --git a/drivers/usb/typec/ucsi/ucsi_acpi.c b/drivers/usb/typec/ucsi/ucsi_acpi.c
index 928eacbeb21ac4..7b3ac133ef8618 100644
--- a/drivers/usb/typec/ucsi/ucsi_acpi.c
+++ b/drivers/usb/typec/ucsi/ucsi_acpi.c
@@ -23,10 +23,11 @@ struct ucsi_acpi {
void *base;
struct completion complete;
unsigned long flags;
+#define UCSI_ACPI_SUPPRESS_EVENT 0
+#define UCSI_ACPI_COMMAND_PENDING 1
+#define UCSI_ACPI_ACK_PENDING 2
guid_t guid;
u64 cmd;
- bool dell_quirk_probed;
- bool dell_quirk_active;
};
static int ucsi_acpi_dsm(struct ucsi_acpi *ua, int func)
@@ -79,9 +80,9 @@ static int ucsi_acpi_sync_write(struct ucsi *ucsi, unsigned int offset,
int ret;
if (ack)
- set_bit(ACK_PENDING, &ua->flags);
+ set_bit(UCSI_ACPI_ACK_PENDING, &ua->flags);
else
- set_bit(COMMAND_PENDING, &ua->flags);
+ set_bit(UCSI_ACPI_COMMAND_PENDING, &ua->flags);
ret = ucsi_acpi_async_write(ucsi, offset, val, val_len);
if (ret)
@@ -92,9 +93,9 @@ static int ucsi_acpi_sync_write(struct ucsi *ucsi, unsigned int offset,
out_clear_bit:
if (ack)
- clear_bit(ACK_PENDING, &ua->flags);
+ clear_bit(UCSI_ACPI_ACK_PENDING, &ua->flags);
else
- clear_bit(COMMAND_PENDING, &ua->flags);
+ clear_bit(UCSI_ACPI_COMMAND_PENDING, &ua->flags);
return ret;
}
@@ -129,51 +130,40 @@ static const struct ucsi_operations ucsi_zenbook_ops = {
};
/*
- * Some Dell laptops expect that an ACK command with the
- * UCSI_ACK_CONNECTOR_CHANGE bit set is followed by a (separate)
- * ACK command that only has the UCSI_ACK_COMMAND_COMPLETE bit set.
- * If this is not done events are not delivered to OSPM and
- * subsequent commands will timeout.
+ * Some Dell laptops don't like ACK commands with the
+ * UCSI_ACK_CONNECTOR_CHANGE but not the UCSI_ACK_COMMAND_COMPLETE
+ * bit set. To work around this send a dummy command and bundle the
+ * UCSI_ACK_CONNECTOR_CHANGE with the UCSI_ACK_COMMAND_COMPLETE
+ * for the dummy command.
*/
static int
ucsi_dell_sync_write(struct ucsi *ucsi, unsigned int offset,
const void *val, size_t val_len)
{
struct ucsi_acpi *ua = ucsi_get_drvdata(ucsi);
- u64 cmd = *(u64 *)val, ack = 0;
+ u64 cmd = *(u64 *)val;
+ u64 dummycmd = UCSI_GET_CAPABILITY;
int ret;
- if (UCSI_COMMAND(cmd) == UCSI_ACK_CC_CI &&
- cmd & UCSI_ACK_CONNECTOR_CHANGE)
- ack = UCSI_ACK_CC_CI | UCSI_ACK_COMMAND_COMPLETE;
-
- ret = ucsi_acpi_sync_write(ucsi, offset, val, val_len);
- if (ret != 0)
- return ret;
- if (ack == 0)
- return ret;
-
- if (!ua->dell_quirk_probed) {
- ua->dell_quirk_probed = true;
-
- cmd = UCSI_GET_CAPABILITY;
- ret = ucsi_acpi_sync_write(ucsi, UCSI_CONTROL, &cmd,
- sizeof(cmd));
- if (ret == 0)
- return ucsi_acpi_sync_write(ucsi, UCSI_CONTROL,
- &ack, sizeof(ack));
- if (ret != -ETIMEDOUT)
+ if (cmd == (UCSI_ACK_CC_CI | UCSI_ACK_CONNECTOR_CHANGE)) {
+ cmd |= UCSI_ACK_COMMAND_COMPLETE;
+
+ /*
+ * The UCSI core thinks it is sending a connector change ack
+ * and will accept new connector change events. We don't want
+ * this to happen for the dummy command as its response will
+ * still report the very event that the core is trying to clear.
+ */
+ set_bit(UCSI_ACPI_SUPPRESS_EVENT, &ua->flags);
+ ret = ucsi_acpi_sync_write(ucsi, UCSI_CONTROL, &dummycmd,
+ sizeof(dummycmd));
+ clear_bit(UCSI_ACPI_SUPPRESS_EVENT, &ua->flags);
+
+ if (ret < 0)
return ret;
-
- ua->dell_quirk_active = true;
- dev_err(ua->dev, "Firmware bug: Additional ACK required after ACKing a connector change.\n");
- dev_err(ua->dev, "Firmware bug: Enabling workaround\n");
}
- if (!ua->dell_quirk_active)
- return ret;
-
- return ucsi_acpi_sync_write(ucsi, UCSI_CONTROL, &ack, sizeof(ack));
+ return ucsi_acpi_sync_write(ucsi, UCSI_CONTROL, &cmd, sizeof(cmd));
}
static const struct ucsi_operations ucsi_dell_ops = {
@@ -209,13 +199,14 @@ static void ucsi_acpi_notify(acpi_handle handle, u32 event, void *data)
if (ret)
return;
- if (UCSI_CCI_CONNECTOR(cci))
+ if (UCSI_CCI_CONNECTOR(cci) &&
+ !test_bit(UCSI_ACPI_SUPPRESS_EVENT, &ua->flags))
ucsi_connector_change(ua->ucsi, UCSI_CCI_CONNECTOR(cci));
if (cci & UCSI_CCI_ACK_COMPLETE && test_bit(ACK_PENDING, &ua->flags))
complete(&ua->complete);
if (cci & UCSI_CCI_COMMAND_COMPLETE &&
- test_bit(COMMAND_PENDING, &ua->flags))
+ test_bit(UCSI_ACPI_COMMAND_PENDING, &ua->flags))
complete(&ua->complete);
}
diff --git a/drivers/usb/typec/ucsi/ucsi_glink.c b/drivers/usb/typec/ucsi/ucsi_glink.c
index 932e7bf6944735..ce08eb33e5beca 100644
--- a/drivers/usb/typec/ucsi/ucsi_glink.c
+++ b/drivers/usb/typec/ucsi/ucsi_glink.c
@@ -255,6 +255,20 @@ static void pmic_glink_ucsi_notify(struct work_struct *work)
static void pmic_glink_ucsi_register(struct work_struct *work)
{
struct pmic_glink_ucsi *ucsi = container_of(work, struct pmic_glink_ucsi, register_work);
+ int orientation;
+ int i;
+
+ for (i = 0; i < PMIC_GLINK_MAX_PORTS; i++) {
+ if (!ucsi->port_orientation[i])
+ continue;
+ orientation = gpiod_get_value(ucsi->port_orientation[i]);
+
+ if (orientation >= 0) {
+ typec_switch_set(ucsi->port_switch[i],
+ orientation ? TYPEC_ORIENTATION_REVERSE
+ : TYPEC_ORIENTATION_NORMAL);
+ }
+ }
ucsi_register(ucsi->ucsi);
}
diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c
index b246067e074bc0..6cb96a1e8b7df4 100644
--- a/drivers/vdpa/vdpa.c
+++ b/drivers/vdpa/vdpa.c
@@ -967,7 +967,7 @@ vdpa_dev_blk_seg_size_config_fill(struct sk_buff *msg, u64 features,
val_u32 = __virtio32_to_cpu(true, config->size_max);
- return nla_put_u32(msg, VDPA_ATTR_DEV_BLK_CFG_SEG_SIZE, val_u32);
+ return nla_put_u32(msg, VDPA_ATTR_DEV_BLK_CFG_SIZE_MAX, val_u32);
}
/* fill the block size*/
@@ -1089,7 +1089,7 @@ static int vdpa_dev_blk_ro_config_fill(struct sk_buff *msg, u64 features)
u8 ro;
ro = ((features & BIT_ULL(VIRTIO_BLK_F_RO)) == 0) ? 0 : 1;
- if (nla_put_u8(msg, VDPA_ATTR_DEV_BLK_CFG_READ_ONLY, ro))
+ if (nla_put_u8(msg, VDPA_ATTR_DEV_BLK_READ_ONLY, ro))
return -EMSGSIZE;
return 0;
@@ -1100,7 +1100,7 @@ static int vdpa_dev_blk_flush_config_fill(struct sk_buff *msg, u64 features)
u8 flush;
flush = ((features & BIT_ULL(VIRTIO_BLK_F_FLUSH)) == 0) ? 0 : 1;
- if (nla_put_u8(msg, VDPA_ATTR_DEV_BLK_CFG_FLUSH, flush))
+ if (nla_put_u8(msg, VDPA_ATTR_DEV_BLK_FLUSH, flush))
return -EMSGSIZE;
return 0;
diff --git a/drivers/vfio/pci/pds/dirty.c b/drivers/vfio/pci/pds/dirty.c
index 68e8f006dfdbf7..c51f5e4c3dd6d2 100644
--- a/drivers/vfio/pci/pds/dirty.c
+++ b/drivers/vfio/pci/pds/dirty.c
@@ -3,6 +3,7 @@
#include <linux/interval_tree.h>
#include <linux/vfio.h>
+#include <linux/vmalloc.h>
#include <linux/pds/pds_common.h>
#include <linux/pds/pds_core_if.h>
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index b5c15fe8f9fcf9..3a0218171cfaa0 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -518,7 +518,7 @@ static int follow_fault_pfn(struct vm_area_struct *vma, struct mm_struct *mm,
spinlock_t *ptl;
int ret;
- ret = follow_pte(vma->vm_mm, vaddr, &ptep, &ptl);
+ ret = follow_pte(vma, vaddr, &ptep, &ptl);
if (ret) {
bool unlocked = false;
@@ -532,7 +532,7 @@ static int follow_fault_pfn(struct vm_area_struct *vma, struct mm_struct *mm,
if (ret)
return ret;
- ret = follow_pte(vma->vm_mm, vaddr, &ptep, &ptl);
+ ret = follow_pte(vma, vaddr, &ptep, &ptl);
if (ret)
return ret;
}
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 045f666b4f12a2..8995730ce0bfc8 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -2515,7 +2515,7 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
vq->avail_idx = vhost16_to_cpu(vq, avail_idx);
if (unlikely((u16)(vq->avail_idx - last_avail_idx) > vq->num)) {
- vq_err(vq, "Guest moved used index from %u to %u",
+ vq_err(vq, "Guest moved avail index from %u to %u",
last_avail_idx, vq->avail_idx);
return -EFAULT;
}
@@ -2799,9 +2799,19 @@ bool vhost_vq_avail_empty(struct vhost_dev *dev, struct vhost_virtqueue *vq)
r = vhost_get_avail_idx(vq, &avail_idx);
if (unlikely(r))
return false;
+
vq->avail_idx = vhost16_to_cpu(vq, avail_idx);
+ if (vq->avail_idx != vq->last_avail_idx) {
+ /* Since we have updated avail_idx, the following
+ * call to vhost_get_vq_desc() will read available
+ * ring entries. Make sure that read happens after
+ * the avail_idx read.
+ */
+ smp_rmb();
+ return false;
+ }
- return vq->avail_idx == vq->last_avail_idx;
+ return true;
}
EXPORT_SYMBOL_GPL(vhost_vq_avail_empty);
@@ -2838,9 +2848,19 @@ bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
&vq->avail->idx, r);
return false;
}
+
vq->avail_idx = vhost16_to_cpu(vq, avail_idx);
+ if (vq->avail_idx != vq->last_avail_idx) {
+ /* Since we have updated avail_idx, the following
+ * call to vhost_get_vq_desc() will read available
+ * ring entries. Make sure that read happens after
+ * the avail_idx read.
+ */
+ smp_rmb();
+ return true;
+ }
- return vq->avail_idx != vq->last_avail_idx;
+ return false;
}
EXPORT_SYMBOL_GPL(vhost_enable_notify);
diff --git a/drivers/video/fbdev/Kconfig b/drivers/video/fbdev/Kconfig
index e3179e987cdb32..197b6d5268e941 100644
--- a/drivers/video/fbdev/Kconfig
+++ b/drivers/video/fbdev/Kconfig
@@ -494,6 +494,7 @@ config FB_SBUS_HELPERS
select FB_CFB_COPYAREA
select FB_CFB_FILLRECT
select FB_CFB_IMAGEBLIT
+ select FB_IOMEM_FOPS
config FB_BW2
bool "BWtwo support"
@@ -514,6 +515,7 @@ config FB_CG6
depends on (FB = y) && (SPARC && FB_SBUS)
select FB_CFB_COPYAREA
select FB_CFB_IMAGEBLIT
+ select FB_IOMEM_FOPS
help
This is the frame buffer device driver for the CGsix (GX, TurboGX)
frame buffer.
@@ -523,6 +525,7 @@ config FB_FFB
depends on FB_SBUS && SPARC64
select FB_CFB_COPYAREA
select FB_CFB_IMAGEBLIT
+ select FB_IOMEM_FOPS
help
This is the frame buffer device driver for the Creator, Creator3D,
and Elite3D graphics boards.
diff --git a/drivers/video/fbdev/core/fb_defio.c b/drivers/video/fbdev/core/fb_defio.c
index dae96c9f61cf87..806ecd32219b69 100644
--- a/drivers/video/fbdev/core/fb_defio.c
+++ b/drivers/video/fbdev/core/fb_defio.c
@@ -196,7 +196,7 @@ err_mutex_unlock:
*/
static vm_fault_t fb_deferred_io_page_mkwrite(struct fb_info *info, struct vm_fault *vmf)
{
- unsigned long offset = vmf->address - vmf->vma->vm_start;
+ unsigned long offset = vmf->pgoff << PAGE_SHIFT;
struct page *page = vmf->page;
file_update_time(vmf->vma->vm_file);
diff --git a/drivers/virt/acrn/mm.c b/drivers/virt/acrn/mm.c
index fa5d9ca6be5706..db8ff1d0ac23ce 100644
--- a/drivers/virt/acrn/mm.c
+++ b/drivers/virt/acrn/mm.c
@@ -12,6 +12,7 @@
#include <linux/io.h>
#include <linux/mm.h>
#include <linux/slab.h>
+#include <linux/vmalloc.h>
#include "acrn_drv.h"
@@ -155,43 +156,82 @@ int acrn_vm_memseg_unmap(struct acrn_vm *vm, struct acrn_vm_memmap *memmap)
int acrn_vm_ram_map(struct acrn_vm *vm, struct acrn_vm_memmap *memmap)
{
struct vm_memory_region_batch *regions_info;
- int nr_pages, i = 0, order, nr_regions = 0;
+ int nr_pages, i, order, nr_regions = 0;
struct vm_memory_mapping *region_mapping;
struct vm_memory_region_op *vm_region;
struct page **pages = NULL, *page;
void *remap_vaddr;
int ret, pinned;
u64 user_vm_pa;
- unsigned long pfn;
struct vm_area_struct *vma;
if (!vm || !memmap)
return -EINVAL;
+ /* Get the page number of the map region */
+ nr_pages = memmap->len >> PAGE_SHIFT;
+ if (!nr_pages)
+ return -EINVAL;
+
mmap_read_lock(current->mm);
vma = vma_lookup(current->mm, memmap->vma_base);
if (vma && ((vma->vm_flags & VM_PFNMAP) != 0)) {
+ unsigned long start_pfn, cur_pfn;
+ spinlock_t *ptl;
+ bool writable;
+ pte_t *ptep;
+
if ((memmap->vma_base + memmap->len) > vma->vm_end) {
mmap_read_unlock(current->mm);
return -EINVAL;
}
- ret = follow_pfn(vma, memmap->vma_base, &pfn);
+ for (i = 0; i < nr_pages; i++) {
+ ret = follow_pte(vma, memmap->vma_base + i * PAGE_SIZE,
+ &ptep, &ptl);
+ if (ret)
+ break;
+
+ cur_pfn = pte_pfn(ptep_get(ptep));
+ if (i == 0)
+ start_pfn = cur_pfn;
+ writable = !!pte_write(ptep_get(ptep));
+ pte_unmap_unlock(ptep, ptl);
+
+ /* Disallow write access if the PTE is not writable. */
+ if (!writable &&
+ (memmap->attr & ACRN_MEM_ACCESS_WRITE)) {
+ ret = -EFAULT;
+ break;
+ }
+
+ /* Disallow refcounted pages. */
+ if (pfn_valid(cur_pfn) &&
+ !PageReserved(pfn_to_page(cur_pfn))) {
+ ret = -EFAULT;
+ break;
+ }
+
+ /* Disallow non-contiguous ranges. */
+ if (cur_pfn != start_pfn + i) {
+ ret = -EINVAL;
+ break;
+ }
+ }
mmap_read_unlock(current->mm);
- if (ret < 0) {
+
+ if (ret) {
dev_dbg(acrn_dev.this_device,
"Failed to lookup PFN at VMA:%pK.\n", (void *)memmap->vma_base);
return ret;
}
return acrn_mm_region_add(vm, memmap->user_vm_pa,
- PFN_PHYS(pfn), memmap->len,
+ PFN_PHYS(start_pfn), memmap->len,
ACRN_MEM_TYPE_WB, memmap->attr);
}
mmap_read_unlock(current->mm);
- /* Get the page number of the map region */
- nr_pages = memmap->len >> PAGE_SHIFT;
pages = vzalloc(array_size(nr_pages, sizeof(*pages)));
if (!pages)
return -ENOMEM;
@@ -235,12 +275,11 @@ int acrn_vm_ram_map(struct acrn_vm *vm, struct acrn_vm_memmap *memmap)
mutex_unlock(&vm->regions_mapping_lock);
/* Calculate count of vm_memory_region_op */
- while (i < nr_pages) {
+ for (i = 0; i < nr_pages; i += 1 << order) {
page = pages[i];
VM_BUG_ON_PAGE(PageTail(page), page);
order = compound_order(page);
nr_regions++;
- i += 1 << order;
}
/* Prepare the vm_memory_region_batch */
@@ -257,8 +296,7 @@ int acrn_vm_ram_map(struct acrn_vm *vm, struct acrn_vm_memmap *memmap)
regions_info->vmid = vm->vmid;
regions_info->regions_gpa = virt_to_phys(vm_region);
user_vm_pa = memmap->user_vm_pa;
- i = 0;
- while (i < nr_pages) {
+ for (i = 0; i < nr_pages; i += 1 << order) {
u32 region_size;
page = pages[i];
@@ -274,7 +312,6 @@ int acrn_vm_ram_map(struct acrn_vm *vm, struct acrn_vm_memmap *memmap)
vm_region++;
user_vm_pa += region_size;
- i += 1 << order;
}
/* Inform the ACRN Hypervisor to set up EPT mappings */
diff --git a/drivers/virt/vmgenid.c b/drivers/virt/vmgenid.c
index b67a28da47026d..a1c467a0e9f719 100644
--- a/drivers/virt/vmgenid.c
+++ b/drivers/virt/vmgenid.c
@@ -68,7 +68,6 @@ out:
static void vmgenid_notify(struct acpi_device *device, u32 event)
{
struct vmgenid_state *state = acpi_driver_data(device);
- char *envp[] = { "NEW_VMGENID=1", NULL };
u8 old_id[VMGENID_SIZE];
memcpy(old_id, state->this_id, sizeof(old_id));
@@ -76,7 +75,6 @@ static void vmgenid_notify(struct acpi_device *device, u32 event)
if (!memcmp(old_id, state->this_id, sizeof(old_id)))
return;
add_vmfork_randomness(state->this_id, sizeof(state->this_id));
- kobject_uevent_env(&device->dev.kobj, KOBJ_CHANGE, envp);
}
static const struct acpi_device_id vmgenid_ids[] = {
diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index f173587893cb34..9510c551dce864 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -362,14 +362,16 @@ static const struct bus_type virtio_bus = {
.remove = virtio_dev_remove,
};
-int register_virtio_driver(struct virtio_driver *driver)
+int __register_virtio_driver(struct virtio_driver *driver, struct module *owner)
{
/* Catch this early. */
BUG_ON(driver->feature_table_size && !driver->feature_table);
driver->driver.bus = &virtio_bus;
+ driver->driver.owner = owner;
+
return driver_register(&driver->driver);
}
-EXPORT_SYMBOL_GPL(register_virtio_driver);
+EXPORT_SYMBOL_GPL(__register_virtio_driver);
void unregister_virtio_driver(struct virtio_driver *driver)
{
diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c
index 8e322329444237..e8355f55a8f7e8 100644
--- a/drivers/virtio/virtio_mem.c
+++ b/drivers/virtio/virtio_mem.c
@@ -21,6 +21,7 @@
#include <linux/bitmap.h>
#include <linux/lockdep.h>
#include <linux/log2.h>
+#include <linux/vmalloc.h>
#include <acpi/acpi_numa.h>
diff --git a/drivers/xen/grant-dma-ops.c b/drivers/xen/grant-dma-ops.c
index 76f6f26265a3b8..29257d2639dbf0 100644
--- a/drivers/xen/grant-dma-ops.c
+++ b/drivers/xen/grant-dma-ops.c
@@ -282,7 +282,7 @@ static int xen_grant_dma_supported(struct device *dev, u64 mask)
static const struct dma_map_ops xen_grant_dma_ops = {
.alloc = xen_grant_dma_alloc,
.free = xen_grant_dma_free,
- .alloc_pages = xen_grant_dma_alloc_pages,
+ .alloc_pages_op = xen_grant_dma_alloc_pages,
.free_pages = xen_grant_dma_free_pages,
.mmap = dma_common_mmap,
.get_sgtable = dma_common_get_sgtable,
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 0e6c6c25d154f5..1c4ef5111651d6 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -403,7 +403,7 @@ const struct dma_map_ops xen_swiotlb_dma_ops = {
.dma_supported = xen_swiotlb_dma_supported,
.mmap = dma_common_mmap,
.get_sgtable = dma_common_get_sgtable,
- .alloc_pages = dma_common_alloc_pages,
+ .alloc_pages_op = dma_common_alloc_pages,
.free_pages = dma_common_free_pages,
.max_mapping_size = swiotlb_max_mapping_size,
};
diff --git a/fs/9p/fid.h b/fs/9p/fid.h
index 29281b7c388703..0d6138bee2a3d1 100644
--- a/fs/9p/fid.h
+++ b/fs/9p/fid.h
@@ -49,9 +49,6 @@ static inline struct p9_fid *v9fs_fid_clone(struct dentry *dentry)
static inline void v9fs_fid_add_modes(struct p9_fid *fid, unsigned int s_flags,
unsigned int s_cache, unsigned int f_flags)
{
- if (fid->qid.type != P9_QTFILE)
- return;
-
if ((!s_cache) ||
((fid->qid.version == 0) && !(s_flags & V9FS_IGNORE_QV)) ||
(s_flags & V9FS_DIRECT_IO) || (f_flags & O_DIRECT)) {
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index 9defa12208f98a..1775fcc7f0e8ef 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -179,13 +179,14 @@ extern int v9fs_vfs_rename(struct mnt_idmap *idmap,
struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry,
unsigned int flags);
-extern struct inode *v9fs_fid_iget(struct super_block *sb, struct p9_fid *fid);
+extern struct inode *v9fs_fid_iget(struct super_block *sb, struct p9_fid *fid,
+ bool new);
extern const struct inode_operations v9fs_dir_inode_operations_dotl;
extern const struct inode_operations v9fs_file_inode_operations_dotl;
extern const struct inode_operations v9fs_symlink_inode_operations_dotl;
extern const struct netfs_request_ops v9fs_req_ops;
extern struct inode *v9fs_fid_iget_dotl(struct super_block *sb,
- struct p9_fid *fid);
+ struct p9_fid *fid, bool new);
/* other default globals */
#define V9FS_PORT 564
@@ -224,12 +225,12 @@ static inline int v9fs_proto_dotl(struct v9fs_session_info *v9ses)
*/
static inline struct inode *
v9fs_get_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
- struct super_block *sb)
+ struct super_block *sb, bool new)
{
if (v9fs_proto_dotl(v9ses))
- return v9fs_fid_iget_dotl(sb, fid);
+ return v9fs_fid_iget_dotl(sb, fid, new);
else
- return v9fs_fid_iget(sb, fid);
+ return v9fs_fid_iget(sb, fid, new);
}
#endif
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index abdbbaee518462..348cc90bf9c56b 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -520,6 +520,7 @@ const struct file_operations v9fs_file_operations = {
.splice_read = v9fs_file_splice_read,
.splice_write = iter_file_splice_write,
.fsync = v9fs_file_fsync,
+ .setlease = simple_nosetlease,
};
const struct file_operations v9fs_file_operations_dotl = {
@@ -534,4 +535,5 @@ const struct file_operations v9fs_file_operations_dotl = {
.splice_read = v9fs_file_splice_read,
.splice_write = iter_file_splice_write,
.fsync = v9fs_file_fsync_dotl,
+ .setlease = simple_nosetlease,
};
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 360a5304ec03ce..7a3308d776060e 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -83,7 +83,7 @@ static int p9mode2perm(struct v9fs_session_info *v9ses,
int res;
int mode = stat->mode;
- res = mode & S_IALLUGO;
+ res = mode & 0777; /* S_IRWXUGO */
if (v9fs_proto_dotu(v9ses)) {
if ((mode & P9_DMSETUID) == P9_DMSETUID)
res |= S_ISUID;
@@ -178,6 +178,9 @@ int v9fs_uflags2omode(int uflags, int extended)
break;
}
+ if (uflags & O_TRUNC)
+ ret |= P9_OTRUNC;
+
if (extended) {
if (uflags & O_EXCL)
ret |= P9_OEXCL;
@@ -344,20 +347,25 @@ void v9fs_evict_inode(struct inode *inode)
struct v9fs_inode __maybe_unused *v9inode = V9FS_I(inode);
__le32 __maybe_unused version;
- truncate_inode_pages_final(&inode->i_data);
+ if (!is_bad_inode(inode)) {
+ truncate_inode_pages_final(&inode->i_data);
- version = cpu_to_le32(v9inode->qid.version);
- netfs_clear_inode_writeback(inode, &version);
+ version = cpu_to_le32(v9inode->qid.version);
+ netfs_clear_inode_writeback(inode, &version);
- clear_inode(inode);
- filemap_fdatawrite(&inode->i_data);
+ clear_inode(inode);
+ filemap_fdatawrite(&inode->i_data);
#ifdef CONFIG_9P_FSCACHE
- fscache_relinquish_cookie(v9fs_inode_cookie(v9inode), false);
+ if (v9fs_inode_cookie(v9inode))
+ fscache_relinquish_cookie(v9fs_inode_cookie(v9inode), false);
#endif
+ } else
+ clear_inode(inode);
}
-struct inode *v9fs_fid_iget(struct super_block *sb, struct p9_fid *fid)
+struct inode *
+v9fs_fid_iget(struct super_block *sb, struct p9_fid *fid, bool new)
{
dev_t rdev;
int retval;
@@ -369,8 +377,18 @@ struct inode *v9fs_fid_iget(struct super_block *sb, struct p9_fid *fid)
inode = iget_locked(sb, QID2INO(&fid->qid));
if (unlikely(!inode))
return ERR_PTR(-ENOMEM);
- if (!(inode->i_state & I_NEW))
- return inode;
+ if (!(inode->i_state & I_NEW)) {
+ if (!new) {
+ goto done;
+ } else {
+ p9_debug(P9_DEBUG_VFS, "WARNING: Inode collision %ld\n",
+ inode->i_ino);
+ iput(inode);
+ remove_inode_hash(inode);
+ inode = iget_locked(sb, QID2INO(&fid->qid));
+ WARN_ON(!(inode->i_state & I_NEW));
+ }
+ }
/*
* initialize the inode with the stat info
@@ -394,11 +412,11 @@ struct inode *v9fs_fid_iget(struct super_block *sb, struct p9_fid *fid)
v9fs_set_netfs_context(inode);
v9fs_cache_inode_get_cookie(inode);
unlock_new_inode(inode);
+done:
return inode;
error:
iget_failed(inode);
return ERR_PTR(retval);
-
}
/**
@@ -430,8 +448,15 @@ static int v9fs_at_to_dotl_flags(int flags)
*/
static void v9fs_dec_count(struct inode *inode)
{
- if (!S_ISDIR(inode->i_mode) || inode->i_nlink > 2)
- drop_nlink(inode);
+ if (!S_ISDIR(inode->i_mode) || inode->i_nlink > 2) {
+ if (inode->i_nlink) {
+ drop_nlink(inode);
+ } else {
+ p9_debug(P9_DEBUG_VFS,
+ "WARNING: unexpected i_nlink zero %d inode %ld\n",
+ inode->i_nlink, inode->i_ino);
+ }
+ }
}
/**
@@ -482,6 +507,9 @@ static int v9fs_remove(struct inode *dir, struct dentry *dentry, int flags)
} else
v9fs_dec_count(inode);
+ if (inode->i_nlink <= 0) /* no more refs unhash it */
+ remove_inode_hash(inode);
+
v9fs_invalidate_inode_attr(inode);
v9fs_invalidate_inode_attr(dir);
@@ -547,7 +575,7 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
/*
* instantiate inode and assign the unopened fid to the dentry
*/
- inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb);
+ inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb, true);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
p9_debug(P9_DEBUG_VFS,
@@ -676,7 +704,7 @@ struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
else if (IS_ERR(fid))
inode = ERR_CAST(fid);
else
- inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb);
+ inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb, false);
/*
* If we had a rename on the server and a parallel lookup
* for the new name, then make sure we instantiate with
@@ -1057,8 +1085,6 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
struct v9fs_session_info *v9ses = sb->s_fs_info;
struct v9fs_inode *v9inode = V9FS_I(inode);
- set_nlink(inode, 1);
-
inode_set_atime(inode, stat->atime, 0);
inode_set_mtime(inode, stat->mtime, 0);
inode_set_ctime(inode, stat->mtime, 0);
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index ef9db3e035062b..c61b97bd13b9a7 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -52,7 +52,10 @@ static kgid_t v9fs_get_fsgid_for_create(struct inode *dir_inode)
return current_fsgid();
}
-struct inode *v9fs_fid_iget_dotl(struct super_block *sb, struct p9_fid *fid)
+
+
+struct inode *
+v9fs_fid_iget_dotl(struct super_block *sb, struct p9_fid *fid, bool new)
{
int retval;
struct inode *inode;
@@ -62,8 +65,18 @@ struct inode *v9fs_fid_iget_dotl(struct super_block *sb, struct p9_fid *fid)
inode = iget_locked(sb, QID2INO(&fid->qid));
if (unlikely(!inode))
return ERR_PTR(-ENOMEM);
- if (!(inode->i_state & I_NEW))
- return inode;
+ if (!(inode->i_state & I_NEW)) {
+ if (!new) {
+ goto done;
+ } else { /* deal with race condition in inode number reuse */
+ p9_debug(P9_DEBUG_ERROR, "WARNING: Inode collision %lx\n",
+ inode->i_ino);
+ iput(inode);
+ remove_inode_hash(inode);
+ inode = iget_locked(sb, QID2INO(&fid->qid));
+ WARN_ON(!(inode->i_state & I_NEW));
+ }
+ }
/*
* initialize the inode with the stat info
@@ -78,11 +91,11 @@ struct inode *v9fs_fid_iget_dotl(struct super_block *sb, struct p9_fid *fid)
retval = v9fs_init_inode(v9ses, inode, &fid->qid,
st->st_mode, new_decode_dev(st->st_rdev));
+ v9fs_stat2inode_dotl(st, inode, 0);
kfree(st);
if (retval)
goto error;
- v9fs_stat2inode_dotl(st, inode, 0);
v9fs_set_netfs_context(inode);
v9fs_cache_inode_get_cookie(inode);
retval = v9fs_get_acl(inode, fid);
@@ -90,12 +103,11 @@ struct inode *v9fs_fid_iget_dotl(struct super_block *sb, struct p9_fid *fid)
goto error;
unlock_new_inode(inode);
-
+done:
return inode;
error:
iget_failed(inode);
return ERR_PTR(retval);
-
}
struct dotl_openflag_map {
@@ -247,7 +259,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err);
goto out;
}
- inode = v9fs_fid_iget_dotl(dir->i_sb, fid);
+ inode = v9fs_fid_iget_dotl(dir->i_sb, fid, true);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
p9_debug(P9_DEBUG_VFS, "inode creation failed %d\n", err);
@@ -297,7 +309,6 @@ static int v9fs_vfs_mkdir_dotl(struct mnt_idmap *idmap,
umode_t omode)
{
int err;
- struct v9fs_session_info *v9ses;
struct p9_fid *fid = NULL, *dfid = NULL;
kgid_t gid;
const unsigned char *name;
@@ -307,7 +318,6 @@ static int v9fs_vfs_mkdir_dotl(struct mnt_idmap *idmap,
struct posix_acl *dacl = NULL, *pacl = NULL;
p9_debug(P9_DEBUG_VFS, "name %pd\n", dentry);
- v9ses = v9fs_inode2v9ses(dir);
omode |= S_IFDIR;
if (dir->i_mode & S_ISGID)
@@ -342,7 +352,7 @@ static int v9fs_vfs_mkdir_dotl(struct mnt_idmap *idmap,
}
/* instantiate inode and assign the unopened fid to the dentry */
- inode = v9fs_fid_iget_dotl(dir->i_sb, fid);
+ inode = v9fs_fid_iget_dotl(dir->i_sb, fid, true);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
p9_debug(P9_DEBUG_VFS, "inode creation failed %d\n",
@@ -739,7 +749,6 @@ v9fs_vfs_mknod_dotl(struct mnt_idmap *idmap, struct inode *dir,
kgid_t gid;
const unsigned char *name;
umode_t mode;
- struct v9fs_session_info *v9ses;
struct p9_fid *fid = NULL, *dfid = NULL;
struct inode *inode;
struct p9_qid qid;
@@ -749,7 +758,6 @@ v9fs_vfs_mknod_dotl(struct mnt_idmap *idmap, struct inode *dir,
dir->i_ino, dentry, omode,
MAJOR(rdev), MINOR(rdev));
- v9ses = v9fs_inode2v9ses(dir);
dfid = v9fs_parent_fid(dentry);
if (IS_ERR(dfid)) {
err = PTR_ERR(dfid);
@@ -780,7 +788,7 @@ v9fs_vfs_mknod_dotl(struct mnt_idmap *idmap, struct inode *dir,
err);
goto error;
}
- inode = v9fs_fid_iget_dotl(dir->i_sb, fid);
+ inode = v9fs_fid_iget_dotl(dir->i_sb, fid, true);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
p9_debug(P9_DEBUG_VFS, "inode creation failed %d\n",
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 4236058c7bbd18..f52fdf42945cf1 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -139,7 +139,7 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
else
sb->s_d_op = &v9fs_dentry_operations;
- inode = v9fs_get_inode_from_fid(v9ses, fid, sb);
+ inode = v9fs_get_inode_from_fid(v9ses, fid, sb, true);
if (IS_ERR(inode)) {
retval = PTR_ERR(inode);
goto release_sb;
@@ -244,6 +244,21 @@ done:
return res;
}
+static int v9fs_drop_inode(struct inode *inode)
+{
+ struct v9fs_session_info *v9ses;
+
+ v9ses = v9fs_inode2v9ses(inode);
+ if (v9ses->cache & (CACHE_META|CACHE_LOOSE))
+ return generic_drop_inode(inode);
+ /*
+ * in case of non cached mode always drop the
+ * inode because we want the inode attribute
+ * to always match that on the server.
+ */
+ return 1;
+}
+
static int v9fs_write_inode(struct inode *inode,
struct writeback_control *wbc)
{
@@ -268,6 +283,7 @@ static const struct super_operations v9fs_super_ops = {
.alloc_inode = v9fs_alloc_inode,
.free_inode = v9fs_free_inode,
.statfs = simple_statfs,
+ .drop_inode = v9fs_drop_inode,
.evict_inode = v9fs_evict_inode,
.show_options = v9fs_show_options,
.umount_begin = v9fs_umount_begin,
@@ -278,6 +294,7 @@ static const struct super_operations v9fs_super_ops_dotl = {
.alloc_inode = v9fs_alloc_inode,
.free_inode = v9fs_free_inode,
.statfs = v9fs_statfs,
+ .drop_inode = v9fs_drop_inode,
.evict_inode = v9fs_evict_inode,
.show_options = v9fs_show_options,
.umount_begin = v9fs_umount_begin,
diff --git a/fs/aio.c b/fs/aio.c
index 9cdaa2faa53633..0f4f531c97800c 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1202,8 +1202,8 @@ static void aio_complete(struct aio_kiocb *iocb)
spin_lock_irqsave(&ctx->wait.lock, flags);
list_for_each_entry_safe(curr, next, &ctx->wait.head, w.entry)
if (avail >= curr->min_nr) {
- list_del_init_careful(&curr->w.entry);
wake_up_process(curr->w.private);
+ list_del_init_careful(&curr->w.entry);
}
spin_unlock_irqrestore(&ctx->wait.lock, flags);
}
diff --git a/fs/bcachefs/Makefile b/fs/bcachefs/Makefile
index b02796c8a59533..66ca0bbee63949 100644
--- a/fs/bcachefs/Makefile
+++ b/fs/bcachefs/Makefile
@@ -17,6 +17,7 @@ bcachefs-y := \
btree_journal_iter.o \
btree_key_cache.o \
btree_locking.o \
+ btree_node_scan.o \
btree_trans_commit.o \
btree_update.o \
btree_update_interior.o \
@@ -37,6 +38,7 @@ bcachefs-y := \
error.o \
extents.o \
extent_update.o \
+ eytzinger.o \
fs.o \
fs-common.o \
fs-ioctl.o \
@@ -67,6 +69,7 @@ bcachefs-y := \
quota.o \
rebalance.o \
recovery.o \
+ recovery_passes.o \
reflink.o \
replicas.o \
sb-clean.o \
diff --git a/fs/bcachefs/acl.c b/fs/bcachefs/acl.c
index 3640f417cce118..5c180fdc3efbdf 100644
--- a/fs/bcachefs/acl.c
+++ b/fs/bcachefs/acl.c
@@ -281,7 +281,6 @@ struct posix_acl *bch2_get_acl(struct mnt_idmap *idmap,
struct xattr_search_key search = X_SEARCH(acl_to_xattr_type(type), "", 0);
struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter = { NULL };
- struct bkey_s_c_xattr xattr;
struct posix_acl *acl = NULL;
struct bkey_s_c k;
int ret;
@@ -290,28 +289,27 @@ retry:
ret = bch2_hash_lookup(trans, &iter, bch2_xattr_hash_desc,
&hash, inode_inum(inode), &search, 0);
- if (ret) {
- if (!bch2_err_matches(ret, ENOENT))
- acl = ERR_PTR(ret);
- goto out;
- }
+ if (ret)
+ goto err;
k = bch2_btree_iter_peek_slot(&iter);
ret = bkey_err(k);
- if (ret) {
- acl = ERR_PTR(ret);
- goto out;
- }
+ if (ret)
+ goto err;
- xattr = bkey_s_c_to_xattr(k);
+ struct bkey_s_c_xattr xattr = bkey_s_c_to_xattr(k);
acl = bch2_acl_from_disk(trans, xattr_val(xattr.v),
- le16_to_cpu(xattr.v->x_val_len));
+ le16_to_cpu(xattr.v->x_val_len));
+ ret = PTR_ERR_OR_ZERO(acl);
+err:
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ goto retry;
- if (!IS_ERR(acl))
+ if (ret)
+ acl = !bch2_err_matches(ret, ENOENT) ? ERR_PTR(ret) : NULL;
+
+ if (!IS_ERR_OR_NULL(acl))
set_cached_acl(&inode->v, type, acl);
-out:
- if (bch2_err_matches(PTR_ERR_OR_ZERO(acl), BCH_ERR_transaction_restart))
- goto retry;
bch2_trans_iter_exit(trans, &iter);
bch2_trans_put(trans);
diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c
index 893e38f9db807f..4ff56fa4d53920 100644
--- a/fs/bcachefs/alloc_background.c
+++ b/fs/bcachefs/alloc_background.c
@@ -1713,34 +1713,37 @@ static int bch2_discard_one_bucket(struct btree_trans *trans,
if (ret)
goto out;
- if (BCH_ALLOC_V4_NEED_INC_GEN(&a->v)) {
- a->v.gen++;
- SET_BCH_ALLOC_V4_NEED_INC_GEN(&a->v, false);
- goto write;
- }
-
- if (a->v.journal_seq > c->journal.flushed_seq_ondisk) {
- if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info) {
- bch2_trans_inconsistent(trans,
- "clearing need_discard but journal_seq %llu > flushed_seq %llu\n"
- "%s",
- a->v.journal_seq,
- c->journal.flushed_seq_ondisk,
- (bch2_bkey_val_to_text(&buf, c, k), buf.buf));
+ if (a->v.dirty_sectors) {
+ if (bch2_trans_inconsistent_on(c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info,
+ trans, "attempting to discard bucket with dirty data\n%s",
+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
ret = -EIO;
- }
goto out;
}
if (a->v.data_type != BCH_DATA_need_discard) {
- if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info) {
- bch2_trans_inconsistent(trans,
- "bucket incorrectly set in need_discard btree\n"
- "%s",
- (bch2_bkey_val_to_text(&buf, c, k), buf.buf));
- ret = -EIO;
+ if (data_type_is_empty(a->v.data_type) &&
+ BCH_ALLOC_V4_NEED_INC_GEN(&a->v)) {
+ a->v.gen++;
+ SET_BCH_ALLOC_V4_NEED_INC_GEN(&a->v, false);
+ goto write;
}
+ if (bch2_trans_inconsistent_on(c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info,
+ trans, "bucket incorrectly set in need_discard btree\n"
+ "%s",
+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
+ ret = -EIO;
+ goto out;
+ }
+
+ if (a->v.journal_seq > c->journal.flushed_seq_ondisk) {
+ if (bch2_trans_inconsistent_on(c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info,
+ trans, "clearing need_discard but journal_seq %llu > flushed_seq %llu\n%s",
+ a->v.journal_seq,
+ c->journal.flushed_seq_ondisk,
+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
+ ret = -EIO;
goto out;
}
@@ -1835,6 +1838,7 @@ static int bch2_clear_bucket_needs_discard(struct btree_trans *trans, struct bpo
if (ret)
goto err;
+ BUG_ON(a->v.dirty_sectors);
SET_BCH_ALLOC_V4_NEED_DISCARD(&a->v, false);
a->v.data_type = alloc_data_type(a->v, a->v.data_type);
@@ -1942,6 +1946,7 @@ static int invalidate_one_bucket(struct btree_trans *trans,
goto out;
BUG_ON(a->v.data_type != BCH_DATA_cached);
+ BUG_ON(a->v.dirty_sectors);
if (!a->v.cached_sectors)
bch_err(c, "invalidating empty bucket, confused");
diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c
index 214b15c84d1f32..a1fc30adf9129d 100644
--- a/fs/bcachefs/alloc_foreground.c
+++ b/fs/bcachefs/alloc_foreground.c
@@ -188,8 +188,10 @@ long bch2_bucket_alloc_new_fs(struct bch_dev *ca)
static inline unsigned open_buckets_reserved(enum bch_watermark watermark)
{
switch (watermark) {
- case BCH_WATERMARK_reclaim:
+ case BCH_WATERMARK_interior_updates:
return 0;
+ case BCH_WATERMARK_reclaim:
+ return OPEN_BUCKETS_COUNT / 6;
case BCH_WATERMARK_btree:
case BCH_WATERMARK_btree_copygc:
return OPEN_BUCKETS_COUNT / 4;
diff --git a/fs/bcachefs/alloc_types.h b/fs/bcachefs/alloc_types.h
index b91b7a46105608..c2226e947c41fb 100644
--- a/fs/bcachefs/alloc_types.h
+++ b/fs/bcachefs/alloc_types.h
@@ -22,7 +22,8 @@ struct bucket_alloc_state {
x(copygc) \
x(btree) \
x(btree_copygc) \
- x(reclaim)
+ x(reclaim) \
+ x(interior_updates)
enum bch_watermark {
#define x(name) BCH_WATERMARK_##name,
diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c
index 8cb35ea572cb95..a200442010025a 100644
--- a/fs/bcachefs/backpointers.c
+++ b/fs/bcachefs/backpointers.c
@@ -8,6 +8,7 @@
#include "btree_update.h"
#include "btree_update_interior.h"
#include "btree_write_buffer.h"
+#include "checksum.h"
#include "error.h"
#include <linux/mm.h>
@@ -29,8 +30,7 @@ static bool extent_matches_bp(struct bch_fs *c,
if (p.ptr.cached)
continue;
- bch2_extent_ptr_to_bp(c, btree_id, level, k, p,
- &bucket2, &bp2);
+ bch2_extent_ptr_to_bp(c, btree_id, level, k, p, entry, &bucket2, &bp2);
if (bpos_eq(bucket, bucket2) &&
!memcmp(&bp, &bp2, sizeof(bp)))
return true;
@@ -44,13 +44,20 @@ int bch2_backpointer_invalid(struct bch_fs *c, struct bkey_s_c k,
struct printbuf *err)
{
struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k);
+
+ /* these will be caught by fsck */
+ if (!bch2_dev_exists2(c, bp.k->p.inode))
+ return 0;
+
+ struct bch_dev *ca = bch_dev_bkey_exists(c, bp.k->p.inode);
struct bpos bucket = bp_pos_to_bucket(c, bp.k->p);
int ret = 0;
- bkey_fsck_err_on(!bpos_eq(bp.k->p, bucket_pos_to_bp(c, bucket, bp.v->bucket_offset)),
+ bkey_fsck_err_on((bp.v->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT) >= ca->mi.bucket_size ||
+ !bpos_eq(bp.k->p, bucket_pos_to_bp(c, bucket, bp.v->bucket_offset)),
c, err,
- backpointer_pos_wrong,
- "backpointer at wrong pos");
+ backpointer_bucket_offset_wrong,
+ "backpointer bucket_offset wrong");
fsck_err:
return ret;
}
@@ -378,7 +385,7 @@ static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_
backpointer_to_missing_alloc,
"backpointer for nonexistent alloc key: %llu:%llu:0\n%s",
alloc_iter.pos.inode, alloc_iter.pos.offset,
- (bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) {
+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
ret = bch2_btree_delete_at(trans, bp_iter, 0);
goto out;
}
@@ -414,6 +421,84 @@ struct extents_to_bp_state {
struct bkey_buf last_flushed;
};
+static int drop_dev_and_update(struct btree_trans *trans, enum btree_id btree,
+ struct bkey_s_c extent, unsigned dev)
+{
+ struct bkey_i *n = bch2_bkey_make_mut_noupdate(trans, extent);
+ int ret = PTR_ERR_OR_ZERO(n);
+ if (ret)
+ return ret;
+
+ bch2_bkey_drop_device(bkey_i_to_s(n), dev);
+ return bch2_btree_insert_trans(trans, btree, n, 0);
+}
+
+static int check_extent_checksum(struct btree_trans *trans,
+ enum btree_id btree, struct bkey_s_c extent,
+ enum btree_id o_btree, struct bkey_s_c extent2, unsigned dev)
+{
+ struct bch_fs *c = trans->c;
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(extent);
+ const union bch_extent_entry *entry;
+ struct extent_ptr_decoded p;
+ struct printbuf buf = PRINTBUF;
+ void *data_buf = NULL;
+ struct bio *bio = NULL;
+ size_t bytes;
+ int ret = 0;
+
+ if (bkey_is_btree_ptr(extent.k))
+ return false;
+
+ bkey_for_each_ptr_decode(extent.k, ptrs, p, entry)
+ if (p.ptr.dev == dev)
+ goto found;
+ BUG();
+found:
+ if (!p.crc.csum_type)
+ return false;
+
+ bytes = p.crc.compressed_size << 9;
+
+ struct bch_dev *ca = bch_dev_bkey_exists(c, dev);
+ if (!bch2_dev_get_ioref(ca, READ))
+ return false;
+
+ data_buf = kvmalloc(bytes, GFP_KERNEL);
+ if (!data_buf) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ bio = bio_alloc(ca->disk_sb.bdev, buf_pages(data_buf, bytes), REQ_OP_READ, GFP_KERNEL);
+ bio->bi_iter.bi_sector = p.ptr.offset;
+ bch2_bio_map(bio, data_buf, bytes);
+ ret = submit_bio_wait(bio);
+ if (ret)
+ goto err;
+
+ prt_str(&buf, "extents pointing to same space, but first extent checksum bad:");
+ prt_printf(&buf, "\n %s ", bch2_btree_id_str(btree));
+ bch2_bkey_val_to_text(&buf, c, extent);
+ prt_printf(&buf, "\n %s ", bch2_btree_id_str(o_btree));
+ bch2_bkey_val_to_text(&buf, c, extent2);
+
+ struct nonce nonce = extent_nonce(extent.k->version, p.crc);
+ struct bch_csum csum = bch2_checksum(c, p.crc.csum_type, nonce, data_buf, bytes);
+ if (fsck_err_on(bch2_crc_cmp(csum, p.crc.csum),
+ c, dup_backpointer_to_bad_csum_extent,
+ "%s", buf.buf))
+ ret = drop_dev_and_update(trans, btree, extent, dev) ?: 1;
+fsck_err:
+err:
+ if (bio)
+ bio_put(bio);
+ kvfree(data_buf);
+ percpu_ref_put(&ca->io_ref);
+ printbuf_exit(&buf);
+ return ret;
+}
+
static int check_bp_exists(struct btree_trans *trans,
struct extents_to_bp_state *s,
struct bpos bucket,
@@ -421,7 +506,8 @@ static int check_bp_exists(struct btree_trans *trans,
struct bkey_s_c orig_k)
{
struct bch_fs *c = trans->c;
- struct btree_iter bp_iter = { NULL };
+ struct btree_iter bp_iter = {};
+ struct btree_iter other_extent_iter = {};
struct printbuf buf = PRINTBUF;
struct bkey_s_c bp_k;
struct bkey_buf tmp;
@@ -429,13 +515,19 @@ static int check_bp_exists(struct btree_trans *trans,
bch2_bkey_buf_init(&tmp);
+ if (!bch2_dev_bucket_exists(c, bucket)) {
+ prt_str(&buf, "extent for nonexistent device:bucket ");
+ bch2_bpos_to_text(&buf, bucket);
+ prt_str(&buf, "\n ");
+ bch2_bkey_val_to_text(&buf, c, orig_k);
+ bch_err(c, "%s", buf.buf);
+ return -BCH_ERR_fsck_repair_unimplemented;
+ }
+
if (bpos_lt(bucket, s->bucket_start) ||
bpos_gt(bucket, s->bucket_end))
return 0;
- if (!bch2_dev_bucket_exists(c, bucket))
- goto missing;
-
bp_k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers,
bucket_pos_to_bp(c, bucket, bp.bucket_offset),
0);
@@ -461,21 +553,94 @@ static int check_bp_exists(struct btree_trans *trans,
ret = -BCH_ERR_transaction_restart_write_buffer_flush;
goto out;
}
- goto missing;
+
+ goto check_existing_bp;
}
out:
err:
fsck_err:
+ bch2_trans_iter_exit(trans, &other_extent_iter);
bch2_trans_iter_exit(trans, &bp_iter);
bch2_bkey_buf_exit(&tmp, c);
printbuf_exit(&buf);
return ret;
+check_existing_bp:
+ /* Do we have a backpointer for a different extent? */
+ if (bp_k.k->type != KEY_TYPE_backpointer)
+ goto missing;
+
+ struct bch_backpointer other_bp = *bkey_s_c_to_backpointer(bp_k).v;
+
+ struct bkey_s_c other_extent =
+ bch2_backpointer_get_key(trans, &other_extent_iter, bp_k.k->p, other_bp, 0);
+ ret = bkey_err(other_extent);
+ if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node)
+ ret = 0;
+ if (ret)
+ goto err;
+
+ if (!other_extent.k)
+ goto missing;
+
+ if (bch2_extents_match(orig_k, other_extent)) {
+ printbuf_reset(&buf);
+ prt_printf(&buf, "duplicate versions of same extent, deleting smaller\n ");
+ bch2_bkey_val_to_text(&buf, c, orig_k);
+ prt_str(&buf, "\n ");
+ bch2_bkey_val_to_text(&buf, c, other_extent);
+ bch_err(c, "%s", buf.buf);
+
+ if (other_extent.k->size <= orig_k.k->size) {
+ ret = drop_dev_and_update(trans, other_bp.btree_id, other_extent, bucket.inode);
+ if (ret)
+ goto err;
+ goto out;
+ } else {
+ ret = drop_dev_and_update(trans, bp.btree_id, orig_k, bucket.inode);
+ if (ret)
+ goto err;
+ goto missing;
+ }
+ }
+
+ ret = check_extent_checksum(trans, other_bp.btree_id, other_extent, bp.btree_id, orig_k, bucket.inode);
+ if (ret < 0)
+ goto err;
+ if (ret) {
+ ret = 0;
+ goto missing;
+ }
+
+ ret = check_extent_checksum(trans, bp.btree_id, orig_k, other_bp.btree_id, other_extent, bucket.inode);
+ if (ret < 0)
+ goto err;
+ if (ret) {
+ ret = 0;
+ goto out;
+ }
+
+ printbuf_reset(&buf);
+ prt_printf(&buf, "duplicate extents pointing to same space on dev %llu\n ", bucket.inode);
+ bch2_bkey_val_to_text(&buf, c, orig_k);
+ prt_str(&buf, "\n ");
+ bch2_bkey_val_to_text(&buf, c, other_extent);
+ bch_err(c, "%s", buf.buf);
+ ret = -BCH_ERR_fsck_repair_unimplemented;
+ goto err;
missing:
+ printbuf_reset(&buf);
prt_printf(&buf, "missing backpointer for btree=%s l=%u ",
bch2_btree_id_str(bp.btree_id), bp.level);
bch2_bkey_val_to_text(&buf, c, orig_k);
- prt_printf(&buf, "\nbp pos ");
- bch2_bpos_to_text(&buf, bp_iter.pos);
+ prt_printf(&buf, "\n got: ");
+ bch2_bkey_val_to_text(&buf, c, bp_k);
+
+ struct bkey_i_backpointer n_bp_k;
+ bkey_backpointer_init(&n_bp_k.k_i);
+ n_bp_k.k.p = bucket_pos_to_bp(trans->c, bucket, bp.bucket_offset);
+ n_bp_k.v = bp;
+ prt_printf(&buf, "\n want: ");
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&n_bp_k.k_i));
if (fsck_err(c, ptr_to_missing_backpointer, "%s", buf.buf))
ret = bch2_bucket_backpointer_mod(trans, bucket, bp, orig_k, true);
@@ -502,8 +667,7 @@ static int check_extent_to_backpointers(struct btree_trans *trans,
if (p.ptr.cached)
continue;
- bch2_extent_ptr_to_bp(c, btree, level,
- k, p, &bucket_pos, &bp);
+ bch2_extent_ptr_to_bp(c, btree, level, k, p, entry, &bucket_pos, &bp);
ret = check_bp_exists(trans, s, bucket_pos, bp, k);
if (ret)
diff --git a/fs/bcachefs/backpointers.h b/fs/bcachefs/backpointers.h
index 327365a9feac4e..85949b9fd880ce 100644
--- a/fs/bcachefs/backpointers.h
+++ b/fs/bcachefs/backpointers.h
@@ -53,14 +53,11 @@ static inline struct bpos bucket_pos_to_bp(const struct bch_fs *c,
u64 bucket_offset)
{
struct bch_dev *ca = bch_dev_bkey_exists(c, bucket.inode);
- struct bpos ret;
-
- ret = POS(bucket.inode,
- (bucket_to_sector(ca, bucket.offset) <<
- MAX_EXTENT_COMPRESS_RATIO_SHIFT) + bucket_offset);
+ struct bpos ret = POS(bucket.inode,
+ (bucket_to_sector(ca, bucket.offset) <<
+ MAX_EXTENT_COMPRESS_RATIO_SHIFT) + bucket_offset);
EBUG_ON(!bkey_eq(bucket, bp_pos_to_bucket(c, ret)));
-
return ret;
}
@@ -90,20 +87,40 @@ static inline int bch2_bucket_backpointer_mod(struct btree_trans *trans,
return bch2_trans_update_buffered(trans, BTREE_ID_backpointers, &bp_k.k_i);
}
-static inline enum bch_data_type bkey_ptr_data_type(enum btree_id btree_id, unsigned level,
- struct bkey_s_c k, struct extent_ptr_decoded p)
+static inline enum bch_data_type bch2_bkey_ptr_data_type(struct bkey_s_c k,
+ struct extent_ptr_decoded p,
+ const union bch_extent_entry *entry)
{
- return level ? BCH_DATA_btree :
- p.has_ec ? BCH_DATA_stripe :
- BCH_DATA_user;
+ switch (k.k->type) {
+ case KEY_TYPE_btree_ptr:
+ case KEY_TYPE_btree_ptr_v2:
+ return BCH_DATA_btree;
+ case KEY_TYPE_extent:
+ case KEY_TYPE_reflink_v:
+ return p.has_ec ? BCH_DATA_stripe : BCH_DATA_user;
+ case KEY_TYPE_stripe: {
+ const struct bch_extent_ptr *ptr = &entry->ptr;
+ struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k);
+
+ BUG_ON(ptr < s.v->ptrs ||
+ ptr >= s.v->ptrs + s.v->nr_blocks);
+
+ return ptr >= s.v->ptrs + s.v->nr_blocks - s.v->nr_redundant
+ ? BCH_DATA_parity
+ : BCH_DATA_user;
+ }
+ default:
+ BUG();
+ }
}
static inline void bch2_extent_ptr_to_bp(struct bch_fs *c,
enum btree_id btree_id, unsigned level,
struct bkey_s_c k, struct extent_ptr_decoded p,
+ const union bch_extent_entry *entry,
struct bpos *bucket_pos, struct bch_backpointer *bp)
{
- enum bch_data_type data_type = bkey_ptr_data_type(btree_id, level, k, p);
+ enum bch_data_type data_type = bch2_bkey_ptr_data_type(k, p, entry);
s64 sectors = level ? btree_sectors(c) : k.k->size;
u32 bucket_offset;
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index 799aa32b6b4d99..91c3c1fef233d1 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -209,7 +209,7 @@
#include "fifo.h"
#include "nocow_locking_types.h"
#include "opts.h"
-#include "recovery_types.h"
+#include "recovery_passes_types.h"
#include "sb-errors_types.h"
#include "seqmutex.h"
#include "time_stats.h"
@@ -456,6 +456,7 @@ enum bch_time_stats {
#include "alloc_types.h"
#include "btree_types.h"
+#include "btree_node_scan_types.h"
#include "btree_write_buffer_types.h"
#include "buckets_types.h"
#include "buckets_waiting_for_journal_types.h"
@@ -614,6 +615,7 @@ struct bch_dev {
*/
#define BCH_FS_FLAGS() \
+ x(new_fs) \
x(started) \
x(may_go_rw) \
x(rw) \
@@ -707,6 +709,8 @@ struct btree_trans_buf {
x(stripe_delete) \
x(reflink) \
x(fallocate) \
+ x(fsync) \
+ x(dio_write) \
x(discard) \
x(discard_fast) \
x(invalidate) \
@@ -796,6 +800,7 @@ struct bch_fs {
u64 features;
u64 compat;
unsigned long errors_silent[BITS_TO_LONGS(BCH_SB_ERR_MAX)];
+ u64 btrees_lost_data;
} sb;
@@ -810,7 +815,6 @@ struct bch_fs {
/* snapshot.c: */
struct snapshot_table __rcu *snapshots;
- size_t snapshot_table_size;
struct mutex snapshot_table_lock;
struct rw_semaphore snapshot_create_lock;
@@ -1104,6 +1108,8 @@ struct bch_fs {
struct journal_keys journal_keys;
struct list_head journal_iters;
+ struct find_btree_nodes found_btree_nodes;
+
u64 last_bucket_seq_cleanup;
u64 counters_on_mount[BCH_COUNTER_NR];
diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h
index bff8750ac0d743..f7fbfccd2b1e4d 100644
--- a/fs/bcachefs/bcachefs_format.h
+++ b/fs/bcachefs/bcachefs_format.h
@@ -578,7 +578,8 @@ struct bch_member {
__le64 nbuckets; /* device size */
__le16 first_bucket; /* index of first bucket used */
__le16 bucket_size; /* sectors */
- __le32 pad;
+ __u8 btree_bitmap_shift;
+ __u8 pad[3];
__le64 last_mount; /* time_t */
__le64 flags;
@@ -587,6 +588,7 @@ struct bch_member {
__le64 errors_at_reset[BCH_MEMBER_ERROR_NR];
__le64 errors_reset_time;
__le64 seq;
+ __le64 btree_allocated_bitmap;
};
#define BCH_MEMBER_V1_BYTES 56
@@ -818,6 +820,7 @@ struct bch_sb_field_ext {
struct bch_sb_field field;
__le64 recovery_passes_required[2];
__le64 errors_silent[8];
+ __le64 btrees_lost_data;
};
struct bch_sb_field_downgrade_entry {
@@ -875,7 +878,8 @@ struct bch_sb_field_downgrade {
x(rebalance_work, BCH_VERSION(1, 3)) \
x(member_seq, BCH_VERSION(1, 4)) \
x(subvolume_fs_parent, BCH_VERSION(1, 5)) \
- x(btree_subvolume_children, BCH_VERSION(1, 6))
+ x(btree_subvolume_children, BCH_VERSION(1, 6)) \
+ x(mi_btree_bitmap, BCH_VERSION(1, 7))
enum bcachefs_metadata_version {
bcachefs_metadata_version_min = 9,
@@ -1313,7 +1317,7 @@ static inline __u64 __bset_magic(struct bch_sb *sb)
x(write_buffer_keys, 11) \
x(datetime, 12)
-enum {
+enum bch_jset_entry_type {
#define x(f, nr) BCH_JSET_ENTRY_##f = nr,
BCH_JSET_ENTRY_TYPES()
#undef x
@@ -1359,7 +1363,7 @@ struct jset_entry_blacklist_v2 {
x(inodes, 1) \
x(key_version, 2)
-enum {
+enum bch_fs_usage_type {
#define x(f, nr) BCH_FS_USAGE_##f = nr,
BCH_FS_USAGE_TYPES()
#undef x
@@ -1500,7 +1504,8 @@ enum btree_id_flags {
BIT_ULL(KEY_TYPE_stripe)) \
x(reflink, 7, BTREE_ID_EXTENTS|BTREE_ID_DATA, \
BIT_ULL(KEY_TYPE_reflink_v)| \
- BIT_ULL(KEY_TYPE_indirect_inline_data)) \
+ BIT_ULL(KEY_TYPE_indirect_inline_data)| \
+ BIT_ULL(KEY_TYPE_error)) \
x(subvolumes, 8, 0, \
BIT_ULL(KEY_TYPE_subvolume)) \
x(snapshots, 9, 0, \
@@ -1534,6 +1539,20 @@ enum btree_id {
BTREE_ID_NR
};
+static inline bool btree_id_is_alloc(enum btree_id id)
+{
+ switch (id) {
+ case BTREE_ID_alloc:
+ case BTREE_ID_backpointers:
+ case BTREE_ID_need_discard:
+ case BTREE_ID_freespace:
+ case BTREE_ID_bucket_gens:
+ return true;
+ default:
+ return false;
+ }
+}
+
#define BTREE_MAX_DEPTH 4U
/* Btree nodes */
diff --git a/fs/bcachefs/bkey.h b/fs/bcachefs/bkey.h
index cf23ff47bed8be..3a45d128f608db 100644
--- a/fs/bcachefs/bkey.h
+++ b/fs/bcachefs/bkey.h
@@ -314,6 +314,12 @@ static inline unsigned bkeyp_key_u64s(const struct bkey_format *format,
return bkey_packed(k) ? format->key_u64s : BKEY_U64s;
}
+static inline bool bkeyp_u64s_valid(const struct bkey_format *f,
+ const struct bkey_packed *k)
+{
+ return ((unsigned) k->u64s - bkeyp_key_u64s(f, k) <= U8_MAX - BKEY_U64s);
+}
+
static inline unsigned bkeyp_key_bytes(const struct bkey_format *format,
const struct bkey_packed *k)
{
diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c
index 5e52684764eb14..db336a43fc083a 100644
--- a/fs/bcachefs/bkey_methods.c
+++ b/fs/bcachefs/bkey_methods.c
@@ -171,11 +171,15 @@ int __bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k,
if (type >= BKEY_TYPE_NR)
return 0;
- bkey_fsck_err_on((flags & BKEY_INVALID_COMMIT) &&
+ bkey_fsck_err_on((type == BKEY_TYPE_btree ||
+ (flags & BKEY_INVALID_COMMIT)) &&
!(bch2_key_types_allowed[type] & BIT_ULL(k.k->type)), c, err,
bkey_invalid_type_for_btree,
"invalid key type for btree %s (%s)",
- bch2_btree_node_type_str(type), bch2_bkey_types[k.k->type]);
+ bch2_btree_node_type_str(type),
+ k.k->type < KEY_TYPE_MAX
+ ? bch2_bkey_types[k.k->type]
+ : "(unknown)");
if (btree_node_type_is_extents(type) && !bkey_whiteout(k.k)) {
bkey_fsck_err_on(k.k->size == 0, c, err,
diff --git a/fs/bcachefs/bset.c b/fs/bcachefs/bset.c
index 3fd1085b6c61ee..3bb477840eab6b 100644
--- a/fs/bcachefs/bset.c
+++ b/fs/bcachefs/bset.c
@@ -134,18 +134,24 @@ void bch2_dump_btree_node_iter(struct btree *b,
printbuf_exit(&buf);
}
-#ifdef CONFIG_BCACHEFS_DEBUG
-
-void __bch2_verify_btree_nr_keys(struct btree *b)
+struct btree_nr_keys bch2_btree_node_count_keys(struct btree *b)
{
struct bset_tree *t;
struct bkey_packed *k;
- struct btree_nr_keys nr = { 0 };
+ struct btree_nr_keys nr = {};
for_each_bset(b, t)
bset_tree_for_each_key(b, t, k)
if (!bkey_deleted(k))
btree_keys_account_key_add(&nr, t - b->set, k);
+ return nr;
+}
+
+#ifdef CONFIG_BCACHEFS_DEBUG
+
+void __bch2_verify_btree_nr_keys(struct btree *b)
+{
+ struct btree_nr_keys nr = bch2_btree_node_count_keys(b);
BUG_ON(memcmp(&nr, &b->nr, sizeof(nr)));
}
diff --git a/fs/bcachefs/bset.h b/fs/bcachefs/bset.h
index 79c77baaa38386..120a79fd456bd5 100644
--- a/fs/bcachefs/bset.h
+++ b/fs/bcachefs/bset.h
@@ -458,6 +458,8 @@ struct bkey_s_c bch2_btree_node_iter_peek_unpack(struct btree_node_iter *,
/* Accounting: */
+struct btree_nr_keys bch2_btree_node_count_keys(struct btree *);
+
static inline void btree_keys_account_key(struct btree_nr_keys *n,
unsigned bset,
struct bkey_packed *k,
diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c
index 562561a9a510e8..02c70e813face0 100644
--- a/fs/bcachefs/btree_cache.c
+++ b/fs/bcachefs/btree_cache.c
@@ -709,9 +709,31 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans,
struct bch_fs *c = trans->c;
struct btree_cache *bc = &c->btree_cache;
struct btree *b;
- u32 seq;
- BUG_ON(level + 1 >= BTREE_MAX_DEPTH);
+ if (unlikely(level >= BTREE_MAX_DEPTH)) {
+ int ret = bch2_fs_topology_error(c, "attempting to get btree node at level %u, >= max depth %u",
+ level, BTREE_MAX_DEPTH);
+ return ERR_PTR(ret);
+ }
+
+ if (unlikely(!bkey_is_btree_ptr(&k->k))) {
+ struct printbuf buf = PRINTBUF;
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k));
+
+ int ret = bch2_fs_topology_error(c, "attempting to get btree node with non-btree key %s", buf.buf);
+ printbuf_exit(&buf);
+ return ERR_PTR(ret);
+ }
+
+ if (unlikely(k->k.u64s > BKEY_BTREE_PTR_U64s_MAX)) {
+ struct printbuf buf = PRINTBUF;
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k));
+
+ int ret = bch2_fs_topology_error(c, "attempting to get btree node with too big key %s", buf.buf);
+ printbuf_exit(&buf);
+ return ERR_PTR(ret);
+ }
+
/*
* Parent node must be locked, else we could read in a btree node that's
* been freed:
@@ -752,34 +774,26 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans,
}
set_btree_node_read_in_flight(b);
-
six_unlock_write(&b->c.lock);
- seq = six_lock_seq(&b->c.lock);
- six_unlock_intent(&b->c.lock);
- /* Unlock before doing IO: */
- if (path && sync)
- bch2_trans_unlock_noassert(trans);
-
- bch2_btree_node_read(trans, b, sync);
+ if (path) {
+ u32 seq = six_lock_seq(&b->c.lock);
- if (!sync)
- return NULL;
+ /* Unlock before doing IO: */
+ six_unlock_intent(&b->c.lock);
+ bch2_trans_unlock_noassert(trans);
- if (path) {
- int ret = bch2_trans_relock(trans) ?:
- bch2_btree_path_relock_intent(trans, path);
- if (ret) {
- BUG_ON(!trans->restarted);
- return ERR_PTR(ret);
- }
- }
+ bch2_btree_node_read(trans, b, sync);
- if (!six_relock_type(&b->c.lock, lock_type, seq)) {
- BUG_ON(!path);
+ if (!sync)
+ return NULL;
- trace_and_count(c, trans_restart_relock_after_fill, trans, _THIS_IP_, path);
- return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_relock_after_fill));
+ if (!six_relock_type(&b->c.lock, lock_type, seq))
+ b = NULL;
+ } else {
+ bch2_btree_node_read(trans, b, sync);
+ if (lock_type == SIX_LOCK_read)
+ six_lock_downgrade(&b->c.lock);
}
return b;
@@ -808,7 +822,8 @@ static noinline void btree_bad_header(struct bch_fs *c, struct btree *b)
prt_printf(&buf, "\nmax ");
bch2_bpos_to_text(&buf, b->data->max_key);
- bch2_fs_inconsistent(c, "%s", buf.buf);
+ bch2_fs_topology_error(c, "%s", buf.buf);
+
printbuf_exit(&buf);
}
@@ -1111,18 +1126,19 @@ int bch2_btree_node_prefetch(struct btree_trans *trans,
{
struct bch_fs *c = trans->c;
struct btree_cache *bc = &c->btree_cache;
- struct btree *b;
BUG_ON(path && !btree_node_locked(path, level + 1));
BUG_ON(level >= BTREE_MAX_DEPTH);
- b = btree_cache_find(bc, k);
+ struct btree *b = btree_cache_find(bc, k);
if (b)
return 0;
b = bch2_btree_node_fill(trans, path, k, btree_id,
level, SIX_LOCK_read, false);
- return PTR_ERR_OR_ZERO(b);
+ if (!IS_ERR_OR_NULL(b))
+ six_unlock_read(&b->c.lock);
+ return bch2_trans_relock(trans) ?: PTR_ERR_OR_ZERO(b);
}
void bch2_btree_node_evict(struct btree_trans *trans, const struct bkey_i *k)
@@ -1134,6 +1150,8 @@ void bch2_btree_node_evict(struct btree_trans *trans, const struct bkey_i *k)
b = btree_cache_find(bc, k);
if (!b)
return;
+
+ BUG_ON(b == btree_node_root(trans->c, b));
wait_on_io:
/* not allowed to wait on io with btree locks held: */
@@ -1145,6 +1163,8 @@ wait_on_io:
btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_intent);
btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_write);
+ if (unlikely(b->hash_val != btree_ptr_hash_val(k)))
+ goto out;
if (btree_node_dirty(b)) {
__bch2_btree_node_write(c, b, BTREE_WRITE_cache_reclaim);
@@ -1159,7 +1179,7 @@ wait_on_io:
btree_node_data_free(c, b);
bch2_btree_node_hash_remove(bc, b);
mutex_unlock(&bc->lock);
-
+out:
six_unlock_write(&b->c.lock);
six_unlock_intent(&b->c.lock);
}
diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c
index bdaed29f084a4d..791470b0c65455 100644
--- a/fs/bcachefs/btree_gc.c
+++ b/fs/bcachefs/btree_gc.c
@@ -7,11 +7,13 @@
#include "bcachefs.h"
#include "alloc_background.h"
#include "alloc_foreground.h"
+#include "backpointers.h"
#include "bkey_methods.h"
#include "bkey_buf.h"
#include "btree_journal_iter.h"
#include "btree_key_cache.h"
#include "btree_locking.h"
+#include "btree_node_scan.h"
#include "btree_update_interior.h"
#include "btree_io.h"
#include "btree_gc.h"
@@ -24,7 +26,7 @@
#include "journal.h"
#include "keylist.h"
#include "move.h"
-#include "recovery.h"
+#include "recovery_passes.h"
#include "reflink.h"
#include "replicas.h"
#include "super-io.h"
@@ -40,6 +42,7 @@
#define DROP_THIS_NODE 10
#define DROP_PREV_NODE 11
+#define DID_FILL_FROM_SCAN 12
static struct bkey_s unsafe_bkey_s_c_to_s(struct bkey_s_c k)
{
@@ -70,90 +73,6 @@ static inline void gc_pos_set(struct bch_fs *c, struct gc_pos new_pos)
__gc_pos_set(c, new_pos);
}
-/*
- * Missing: if an interior btree node is empty, we need to do something -
- * perhaps just kill it
- */
-static int bch2_gc_check_topology(struct bch_fs *c,
- struct btree *b,
- struct bkey_buf *prev,
- struct bkey_buf cur,
- bool is_last)
-{
- struct bpos node_start = b->data->min_key;
- struct bpos node_end = b->data->max_key;
- struct bpos expected_start = bkey_deleted(&prev->k->k)
- ? node_start
- : bpos_successor(prev->k->k.p);
- struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF;
- int ret = 0;
-
- if (cur.k->k.type == KEY_TYPE_btree_ptr_v2) {
- struct bkey_i_btree_ptr_v2 *bp = bkey_i_to_btree_ptr_v2(cur.k);
-
- if (!bpos_eq(expected_start, bp->v.min_key)) {
- bch2_topology_error(c);
-
- if (bkey_deleted(&prev->k->k)) {
- prt_printf(&buf1, "start of node: ");
- bch2_bpos_to_text(&buf1, node_start);
- } else {
- bch2_bkey_val_to_text(&buf1, c, bkey_i_to_s_c(prev->k));
- }
- bch2_bkey_val_to_text(&buf2, c, bkey_i_to_s_c(cur.k));
-
- if (__fsck_err(c,
- FSCK_CAN_FIX|
- FSCK_CAN_IGNORE|
- FSCK_NO_RATELIMIT,
- btree_node_topology_bad_min_key,
- "btree node with incorrect min_key at btree %s level %u:\n"
- " prev %s\n"
- " cur %s",
- bch2_btree_id_str(b->c.btree_id), b->c.level,
- buf1.buf, buf2.buf) && should_restart_for_topology_repair(c)) {
- bch_info(c, "Halting mark and sweep to start topology repair pass");
- ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology);
- goto err;
- } else {
- set_bit(BCH_FS_initial_gc_unfixed, &c->flags);
- }
- }
- }
-
- if (is_last && !bpos_eq(cur.k->k.p, node_end)) {
- bch2_topology_error(c);
-
- printbuf_reset(&buf1);
- printbuf_reset(&buf2);
-
- bch2_bkey_val_to_text(&buf1, c, bkey_i_to_s_c(cur.k));
- bch2_bpos_to_text(&buf2, node_end);
-
- if (__fsck_err(c, FSCK_CAN_FIX|FSCK_CAN_IGNORE|FSCK_NO_RATELIMIT,
- btree_node_topology_bad_max_key,
- "btree node with incorrect max_key at btree %s level %u:\n"
- " %s\n"
- " expected %s",
- bch2_btree_id_str(b->c.btree_id), b->c.level,
- buf1.buf, buf2.buf) &&
- should_restart_for_topology_repair(c)) {
- bch_info(c, "Halting mark and sweep to start topology repair pass");
- ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology);
- goto err;
- } else {
- set_bit(BCH_FS_initial_gc_unfixed, &c->flags);
- }
- }
-
- bch2_bkey_buf_copy(prev, c, cur.k);
-err:
-fsck_err:
- printbuf_exit(&buf2);
- printbuf_exit(&buf1);
- return ret;
-}
-
static void btree_ptr_to_v2(struct btree *b, struct bkey_i_btree_ptr_v2 *dst)
{
switch (b->key.k.type) {
@@ -212,6 +131,17 @@ static int set_node_min(struct bch_fs *c, struct btree *b, struct bpos new_min)
struct bkey_i_btree_ptr_v2 *new;
int ret;
+ if (c->opts.verbose) {
+ struct printbuf buf = PRINTBUF;
+
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
+ prt_str(&buf, " -> ");
+ bch2_bpos_to_text(&buf, new_min);
+
+ bch_info(c, "%s(): %s", __func__, buf.buf);
+ printbuf_exit(&buf);
+ }
+
new = kmalloc_array(BKEY_BTREE_PTR_U64s_MAX, sizeof(u64), GFP_KERNEL);
if (!new)
return -BCH_ERR_ENOMEM_gc_repair_key;
@@ -237,6 +167,17 @@ static int set_node_max(struct bch_fs *c, struct btree *b, struct bpos new_max)
struct bkey_i_btree_ptr_v2 *new;
int ret;
+ if (c->opts.verbose) {
+ struct printbuf buf = PRINTBUF;
+
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
+ prt_str(&buf, " -> ");
+ bch2_bpos_to_text(&buf, new_max);
+
+ bch_info(c, "%s(): %s", __func__, buf.buf);
+ printbuf_exit(&buf);
+ }
+
ret = bch2_journal_key_delete(c, b->c.btree_id, b->c.level + 1, b->key.k.p);
if (ret)
return ret;
@@ -268,127 +209,138 @@ static int set_node_max(struct bch_fs *c, struct btree *b, struct bpos new_max)
return 0;
}
-static int btree_repair_node_boundaries(struct bch_fs *c, struct btree *b,
- struct btree *prev, struct btree *cur)
+static int btree_check_node_boundaries(struct bch_fs *c, struct btree *b,
+ struct btree *prev, struct btree *cur,
+ struct bpos *pulled_from_scan)
{
struct bpos expected_start = !prev
? b->data->min_key
: bpos_successor(prev->key.k.p);
- struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF;
+ struct printbuf buf = PRINTBUF;
int ret = 0;
- if (!prev) {
- prt_printf(&buf1, "start of node: ");
- bch2_bpos_to_text(&buf1, b->data->min_key);
- } else {
- bch2_bkey_val_to_text(&buf1, c, bkey_i_to_s_c(&prev->key));
+ BUG_ON(b->key.k.type == KEY_TYPE_btree_ptr_v2 &&
+ !bpos_eq(bkey_i_to_btree_ptr_v2(&b->key)->v.min_key,
+ b->data->min_key));
+
+ if (bpos_eq(expected_start, cur->data->min_key))
+ return 0;
+
+ prt_printf(&buf, " at btree %s level %u:\n parent: ",
+ bch2_btree_id_str(b->c.btree_id), b->c.level);
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
+
+ if (prev) {
+ prt_printf(&buf, "\n prev: ");
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&prev->key));
}
- bch2_bkey_val_to_text(&buf2, c, bkey_i_to_s_c(&cur->key));
-
- if (prev &&
- bpos_gt(expected_start, cur->data->min_key) &&
- BTREE_NODE_SEQ(cur->data) > BTREE_NODE_SEQ(prev->data)) {
- /* cur overwrites prev: */
-
- if (mustfix_fsck_err_on(bpos_ge(prev->data->min_key,
- cur->data->min_key), c,
- btree_node_topology_overwritten_by_next_node,
- "btree node overwritten by next node at btree %s level %u:\n"
- " node %s\n"
- " next %s",
- bch2_btree_id_str(b->c.btree_id), b->c.level,
- buf1.buf, buf2.buf)) {
- ret = DROP_PREV_NODE;
- goto out;
- }
+ prt_str(&buf, "\n next: ");
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&cur->key));
- if (mustfix_fsck_err_on(!bpos_eq(prev->key.k.p,
- bpos_predecessor(cur->data->min_key)), c,
- btree_node_topology_bad_max_key,
- "btree node with incorrect max_key at btree %s level %u:\n"
- " node %s\n"
- " next %s",
- bch2_btree_id_str(b->c.btree_id), b->c.level,
- buf1.buf, buf2.buf))
- ret = set_node_max(c, prev,
- bpos_predecessor(cur->data->min_key));
- } else {
- /* prev overwrites cur: */
-
- if (mustfix_fsck_err_on(bpos_ge(expected_start,
- cur->data->max_key), c,
- btree_node_topology_overwritten_by_prev_node,
- "btree node overwritten by prev node at btree %s level %u:\n"
- " prev %s\n"
- " node %s",
- bch2_btree_id_str(b->c.btree_id), b->c.level,
- buf1.buf, buf2.buf)) {
- ret = DROP_THIS_NODE;
- goto out;
- }
+ if (bpos_lt(expected_start, cur->data->min_key)) { /* gap */
+ if (b->c.level == 1 &&
+ bpos_lt(*pulled_from_scan, cur->data->min_key)) {
+ ret = bch2_get_scanned_nodes(c, b->c.btree_id, 0,
+ expected_start,
+ bpos_predecessor(cur->data->min_key));
+ if (ret)
+ goto err;
- if (mustfix_fsck_err_on(!bpos_eq(expected_start, cur->data->min_key), c,
- btree_node_topology_bad_min_key,
- "btree node with incorrect min_key at btree %s level %u:\n"
- " prev %s\n"
- " node %s",
- bch2_btree_id_str(b->c.btree_id), b->c.level,
- buf1.buf, buf2.buf))
- ret = set_node_min(c, cur, expected_start);
+ *pulled_from_scan = cur->data->min_key;
+ ret = DID_FILL_FROM_SCAN;
+ } else {
+ if (mustfix_fsck_err(c, btree_node_topology_bad_min_key,
+ "btree node with incorrect min_key%s", buf.buf))
+ ret = set_node_min(c, cur, expected_start);
+ }
+ } else { /* overlap */
+ if (prev && BTREE_NODE_SEQ(cur->data) > BTREE_NODE_SEQ(prev->data)) { /* cur overwrites prev */
+ if (bpos_ge(prev->data->min_key, cur->data->min_key)) { /* fully? */
+ if (mustfix_fsck_err(c, btree_node_topology_overwritten_by_next_node,
+ "btree node overwritten by next node%s", buf.buf))
+ ret = DROP_PREV_NODE;
+ } else {
+ if (mustfix_fsck_err(c, btree_node_topology_bad_max_key,
+ "btree node with incorrect max_key%s", buf.buf))
+ ret = set_node_max(c, prev,
+ bpos_predecessor(cur->data->min_key));
+ }
+ } else {
+ if (bpos_ge(expected_start, cur->data->max_key)) { /* fully? */
+ if (mustfix_fsck_err(c, btree_node_topology_overwritten_by_prev_node,
+ "btree node overwritten by prev node%s", buf.buf))
+ ret = DROP_THIS_NODE;
+ } else {
+ if (mustfix_fsck_err(c, btree_node_topology_bad_min_key,
+ "btree node with incorrect min_key%s", buf.buf))
+ ret = set_node_min(c, cur, expected_start);
+ }
+ }
}
-out:
+err:
fsck_err:
- printbuf_exit(&buf2);
- printbuf_exit(&buf1);
+ printbuf_exit(&buf);
return ret;
}
static int btree_repair_node_end(struct bch_fs *c, struct btree *b,
- struct btree *child)
+ struct btree *child, struct bpos *pulled_from_scan)
{
- struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF;
+ struct printbuf buf = PRINTBUF;
int ret = 0;
- bch2_bkey_val_to_text(&buf1, c, bkey_i_to_s_c(&child->key));
- bch2_bpos_to_text(&buf2, b->key.k.p);
+ if (bpos_eq(child->key.k.p, b->key.k.p))
+ return 0;
- if (mustfix_fsck_err_on(!bpos_eq(child->key.k.p, b->key.k.p), c,
- btree_node_topology_bad_max_key,
- "btree node with incorrect max_key at btree %s level %u:\n"
- " %s\n"
- " expected %s",
- bch2_btree_id_str(b->c.btree_id), b->c.level,
- buf1.buf, buf2.buf)) {
- ret = set_node_max(c, child, b->key.k.p);
- if (ret)
- goto err;
+ prt_printf(&buf, "at btree %s level %u:\n parent: ",
+ bch2_btree_id_str(b->c.btree_id), b->c.level);
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
+
+ prt_str(&buf, "\n child: ");
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&child->key));
+
+ if (mustfix_fsck_err(c, btree_node_topology_bad_max_key,
+ "btree node with incorrect max_key%s", buf.buf)) {
+ if (b->c.level == 1 &&
+ bpos_lt(*pulled_from_scan, b->key.k.p)) {
+ ret = bch2_get_scanned_nodes(c, b->c.btree_id, 0,
+ bpos_successor(child->key.k.p), b->key.k.p);
+ if (ret)
+ goto err;
+
+ *pulled_from_scan = b->key.k.p;
+ ret = DID_FILL_FROM_SCAN;
+ } else {
+ ret = set_node_max(c, child, b->key.k.p);
+ }
}
err:
fsck_err:
- printbuf_exit(&buf2);
- printbuf_exit(&buf1);
+ printbuf_exit(&buf);
return ret;
}
-static int bch2_btree_repair_topology_recurse(struct btree_trans *trans, struct btree *b)
+static int bch2_btree_repair_topology_recurse(struct btree_trans *trans, struct btree *b,
+ struct bpos *pulled_from_scan)
{
struct bch_fs *c = trans->c;
struct btree_and_journal_iter iter;
struct bkey_s_c k;
struct bkey_buf prev_k, cur_k;
struct btree *prev = NULL, *cur = NULL;
- bool have_child, dropped_children = false;
+ bool have_child, new_pass = false;
struct printbuf buf = PRINTBUF;
int ret = 0;
if (!b->c.level)
return 0;
-again:
- prev = NULL;
- have_child = dropped_children = false;
+
bch2_bkey_buf_init(&prev_k);
bch2_bkey_buf_init(&cur_k);
+again:
+ cur = prev = NULL;
+ have_child = new_pass = false;
bch2_btree_and_journal_iter_init_node_iter(trans, &iter, b);
iter.prefetch = true;
@@ -415,11 +367,17 @@ again:
b->c.level - 1,
buf.buf)) {
bch2_btree_node_evict(trans, cur_k.k);
+ cur = NULL;
ret = bch2_journal_key_delete(c, b->c.btree_id,
b->c.level, cur_k.k->k.p);
- cur = NULL;
if (ret)
break;
+
+ if (!btree_id_is_alloc(b->c.btree_id)) {
+ ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_scan_for_btree_nodes);
+ if (ret)
+ break;
+ }
continue;
}
@@ -427,7 +385,23 @@ again:
if (ret)
break;
- ret = btree_repair_node_boundaries(c, b, prev, cur);
+ if (bch2_btree_node_is_stale(c, cur)) {
+ bch_info(c, "btree node %s older than nodes found by scanning", buf.buf);
+ six_unlock_read(&cur->c.lock);
+ bch2_btree_node_evict(trans, cur_k.k);
+ ret = bch2_journal_key_delete(c, b->c.btree_id,
+ b->c.level, cur_k.k->k.p);
+ cur = NULL;
+ if (ret)
+ break;
+ continue;
+ }
+
+ ret = btree_check_node_boundaries(c, b, prev, cur, pulled_from_scan);
+ if (ret == DID_FILL_FROM_SCAN) {
+ new_pass = true;
+ ret = 0;
+ }
if (ret == DROP_THIS_NODE) {
six_unlock_read(&cur->c.lock);
@@ -445,6 +419,7 @@ again:
prev = NULL;
if (ret == DROP_PREV_NODE) {
+ bch_info(c, "dropped prev node");
bch2_btree_node_evict(trans, prev_k.k);
ret = bch2_journal_key_delete(c, b->c.btree_id,
b->c.level, prev_k.k->k.p);
@@ -452,8 +427,6 @@ again:
break;
bch2_btree_and_journal_iter_exit(&iter);
- bch2_bkey_buf_exit(&prev_k, c);
- bch2_bkey_buf_exit(&cur_k, c);
goto again;
} else if (ret)
break;
@@ -465,7 +438,11 @@ again:
if (!ret && !IS_ERR_OR_NULL(prev)) {
BUG_ON(cur);
- ret = btree_repair_node_end(c, b, prev);
+ ret = btree_repair_node_end(c, b, prev, pulled_from_scan);
+ if (ret == DID_FILL_FROM_SCAN) {
+ new_pass = true;
+ ret = 0;
+ }
}
if (!IS_ERR_OR_NULL(prev))
@@ -479,6 +456,10 @@ again:
goto err;
bch2_btree_and_journal_iter_exit(&iter);
+
+ if (new_pass)
+ goto again;
+
bch2_btree_and_journal_iter_init_node_iter(trans, &iter, b);
iter.prefetch = true;
@@ -495,7 +476,7 @@ again:
if (ret)
goto err;
- ret = bch2_btree_repair_topology_recurse(trans, cur);
+ ret = bch2_btree_repair_topology_recurse(trans, cur, pulled_from_scan);
six_unlock_read(&cur->c.lock);
cur = NULL;
@@ -503,7 +484,7 @@ again:
bch2_btree_node_evict(trans, cur_k.k);
ret = bch2_journal_key_delete(c, b->c.btree_id,
b->c.level, cur_k.k->k.p);
- dropped_children = true;
+ new_pass = true;
}
if (ret)
@@ -530,12 +511,14 @@ fsck_err:
six_unlock_read(&cur->c.lock);
bch2_btree_and_journal_iter_exit(&iter);
- bch2_bkey_buf_exit(&prev_k, c);
- bch2_bkey_buf_exit(&cur_k, c);
- if (!ret && dropped_children)
+ if (!ret && new_pass)
goto again;
+ BUG_ON(!ret && bch2_btree_node_check_topology(trans, b));
+
+ bch2_bkey_buf_exit(&prev_k, c);
+ bch2_bkey_buf_exit(&cur_k, c);
printbuf_exit(&buf);
return ret;
}
@@ -543,32 +526,63 @@ fsck_err:
int bch2_check_topology(struct bch_fs *c)
{
struct btree_trans *trans = bch2_trans_get(c);
- struct btree *b;
- unsigned i;
+ struct bpos pulled_from_scan = POS_MIN;
int ret = 0;
- for (i = 0; i < btree_id_nr_alive(c) && !ret; i++) {
+ for (unsigned i = 0; i < btree_id_nr_alive(c) && !ret; i++) {
struct btree_root *r = bch2_btree_id_root(c, i);
+ bool reconstructed_root = false;
- if (!r->alive)
- continue;
+ if (r->error) {
+ ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_scan_for_btree_nodes);
+ if (ret)
+ break;
+reconstruct_root:
+ bch_info(c, "btree root %s unreadable, must recover from scan", bch2_btree_id_str(i));
- b = r->b;
- if (btree_node_fake(b))
- continue;
+ r->alive = false;
+ r->error = 0;
+
+ if (!bch2_btree_has_scanned_nodes(c, i)) {
+ mustfix_fsck_err(c, btree_root_unreadable_and_scan_found_nothing,
+ "no nodes found for btree %s, continue?", bch2_btree_id_str(i));
+ bch2_btree_root_alloc_fake(c, i, 0);
+ } else {
+ bch2_btree_root_alloc_fake(c, i, 1);
+ bch2_shoot_down_journal_keys(c, i, 1, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX);
+ ret = bch2_get_scanned_nodes(c, i, 0, POS_MIN, SPOS_MAX);
+ if (ret)
+ break;
+ }
+
+ reconstructed_root = true;
+ }
+
+ struct btree *b = r->b;
btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read);
- ret = bch2_btree_repair_topology_recurse(trans, b);
+ ret = bch2_btree_repair_topology_recurse(trans, b, &pulled_from_scan);
six_unlock_read(&b->c.lock);
if (ret == DROP_THIS_NODE) {
- bch_err(c, "empty btree root - repair unimplemented");
- ret = -BCH_ERR_fsck_repair_unimplemented;
+ bch2_btree_node_hash_remove(&c->btree_cache, b);
+ mutex_lock(&c->btree_cache.lock);
+ list_move(&b->list, &c->btree_cache.freeable);
+ mutex_unlock(&c->btree_cache.lock);
+
+ r->b = NULL;
+
+ if (!reconstructed_root)
+ goto reconstruct_root;
+
+ bch_err(c, "empty btree root %s", bch2_btree_id_str(i));
+ bch2_btree_root_alloc_fake(c, i, 0);
+ r->alive = false;
+ ret = 0;
}
}
-
+fsck_err:
bch2_trans_put(trans);
-
return ret;
}
@@ -591,7 +605,7 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id
bkey_for_each_ptr_decode(k->k, ptrs_c, p, entry_c) {
struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
struct bucket *g = PTR_GC_BUCKET(ca, &p.ptr);
- enum bch_data_type data_type = bch2_bkey_ptr_data_type(*k, &entry_c->ptr);
+ enum bch_data_type data_type = bch2_bkey_ptr_data_type(*k, p, entry_c);
if (fsck_err_on(!g->gen_valid,
c, ptr_to_missing_alloc_key,
@@ -657,7 +671,8 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id
continue;
if (fsck_err_on(bucket_data_type(g->data_type) &&
- bucket_data_type(g->data_type) != data_type, c,
+ bucket_data_type(g->data_type) !=
+ bucket_data_type(data_type), c,
ptr_bucket_data_type_mismatch,
"bucket %u:%zu different types of data in same bucket: %s, %s\n"
"while marking %s",
@@ -698,18 +713,13 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id
}
if (do_update) {
- struct bkey_ptrs ptrs;
- union bch_extent_entry *entry;
- struct bch_extent_ptr *ptr;
- struct bkey_i *new;
-
if (is_root) {
bch_err(c, "cannot update btree roots yet");
ret = -EINVAL;
goto err;
}
- new = kmalloc(bkey_bytes(k->k), GFP_KERNEL);
+ struct bkey_i *new = kmalloc(bkey_bytes(k->k), GFP_KERNEL);
if (!new) {
ret = -BCH_ERR_ENOMEM_gc_repair_key;
bch_err_msg(c, ret, "allocating new key");
@@ -724,7 +734,7 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id
* btree node isn't there anymore, the read path will
* sort it out:
*/
- ptrs = bch2_bkey_ptrs(bkey_i_to_s(new));
+ struct bkey_ptrs ptrs = bch2_bkey_ptrs(bkey_i_to_s(new));
bkey_for_each_ptr(ptrs, ptr) {
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
struct bucket *g = PTR_GC_BUCKET(ca, ptr);
@@ -732,19 +742,26 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id
ptr->gen = g->gen;
}
} else {
- bch2_bkey_drop_ptrs(bkey_i_to_s(new), ptr, ({
- struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
- struct bucket *g = PTR_GC_BUCKET(ca, ptr);
- enum bch_data_type data_type = bch2_bkey_ptr_data_type(*k, ptr);
-
- (ptr->cached &&
- (!g->gen_valid || gen_cmp(ptr->gen, g->gen) > 0)) ||
- (!ptr->cached &&
- gen_cmp(ptr->gen, g->gen) < 0) ||
- gen_cmp(g->gen, ptr->gen) > BUCKET_GC_GEN_MAX ||
- (g->data_type &&
- g->data_type != data_type);
- }));
+ struct bkey_ptrs ptrs;
+ union bch_extent_entry *entry;
+restart_drop_ptrs:
+ ptrs = bch2_bkey_ptrs(bkey_i_to_s(new));
+ bkey_for_each_ptr_decode(bkey_i_to_s(new).k, ptrs, p, entry) {
+ struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
+ struct bucket *g = PTR_GC_BUCKET(ca, &p.ptr);
+ enum bch_data_type data_type = bch2_bkey_ptr_data_type(bkey_i_to_s_c(new), p, entry);
+
+ if ((p.ptr.cached &&
+ (!g->gen_valid || gen_cmp(p.ptr.gen, g->gen) > 0)) ||
+ (!p.ptr.cached &&
+ gen_cmp(p.ptr.gen, g->gen) < 0) ||
+ gen_cmp(g->gen, p.ptr.gen) > BUCKET_GC_GEN_MAX ||
+ (g->data_type &&
+ g->data_type != data_type)) {
+ bch2_bkey_drop_ptr(bkey_i_to_s(new), &entry->ptr);
+ goto restart_drop_ptrs;
+ }
+ }
again:
ptrs = bch2_bkey_ptrs(bkey_i_to_s(new));
bkey_extent_entry_for_each(ptrs, entry) {
@@ -774,12 +791,6 @@ found:
}
}
- ret = bch2_journal_key_insert_take(c, btree_id, level, new);
- if (ret) {
- kfree(new);
- goto err;
- }
-
if (level)
bch2_btree_node_update_key_early(trans, btree_id, level - 1, *k, new);
@@ -793,6 +804,12 @@ found:
bch_info(c, "new key %s", buf.buf);
}
+ ret = bch2_journal_key_insert_take(c, btree_id, level, new);
+ if (ret) {
+ kfree(new);
+ goto err;
+ }
+
*k = bkey_i_to_s_c(new);
}
err:
@@ -811,6 +828,7 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id,
struct bch_fs *c = trans->c;
struct bkey deleted = KEY(0, 0, 0);
struct bkey_s_c old = (struct bkey_s_c) { &deleted, NULL };
+ struct printbuf buf = PRINTBUF;
int ret = 0;
deleted.p = k->k->p;
@@ -819,10 +837,6 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id,
BUG_ON(bch2_journal_seq_verify &&
k->k->version.lo > atomic64_read(&c->journal.seq));
- ret = bch2_check_fix_ptrs(trans, btree_id, level, is_root, k);
- if (ret)
- goto err;
-
if (fsck_err_on(k->k->version.lo > atomic64_read(&c->key_version), c,
bkey_version_in_future,
"key version number higher than recorded: %llu > %llu",
@@ -831,52 +845,57 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id,
atomic64_set(&c->key_version, k->k->version.lo);
}
+ ret = bch2_check_fix_ptrs(trans, btree_id, level, is_root, k);
+ if (ret)
+ goto err;
+
+ if (mustfix_fsck_err_on(level && !bch2_dev_btree_bitmap_marked(c, *k),
+ c, btree_bitmap_not_marked,
+ "btree ptr not marked in member info btree allocated bitmap\n %s",
+ (bch2_bkey_val_to_text(&buf, c, *k),
+ buf.buf))) {
+ mutex_lock(&c->sb_lock);
+ bch2_dev_btree_bitmap_mark(c, *k);
+ bch2_write_super(c);
+ mutex_unlock(&c->sb_lock);
+ }
+
ret = commit_do(trans, NULL, NULL, 0,
- bch2_key_trigger(trans, btree_id, level, old, unsafe_bkey_s_c_to_s(*k), BTREE_TRIGGER_GC));
+ bch2_key_trigger(trans, btree_id, level, old,
+ unsafe_bkey_s_c_to_s(*k), BTREE_TRIGGER_GC));
fsck_err:
err:
+ printbuf_exit(&buf);
bch_err_fn(c, ret);
return ret;
}
static int btree_gc_mark_node(struct btree_trans *trans, struct btree *b, bool initial)
{
- struct bch_fs *c = trans->c;
struct btree_node_iter iter;
struct bkey unpacked;
struct bkey_s_c k;
- struct bkey_buf prev, cur;
int ret = 0;
+ ret = bch2_btree_node_check_topology(trans, b);
+ if (ret)
+ return ret;
+
if (!btree_node_type_needs_gc(btree_node_type(b)))
return 0;
bch2_btree_node_iter_init_from_start(&iter, b);
- bch2_bkey_buf_init(&prev);
- bch2_bkey_buf_init(&cur);
- bkey_init(&prev.k->k);
while ((k = bch2_btree_node_iter_peek_unpack(&iter, b, &unpacked)).k) {
ret = bch2_gc_mark_key(trans, b->c.btree_id, b->c.level, false,
&k, initial);
if (ret)
- break;
+ return ret;
bch2_btree_node_iter_advance(&iter, b);
-
- if (b->c.level) {
- bch2_bkey_buf_reassemble(&cur, c, k);
-
- ret = bch2_gc_check_topology(c, b, &prev, cur,
- bch2_btree_node_iter_end(&iter));
- if (ret)
- break;
- }
}
- bch2_bkey_buf_exit(&cur, c);
- bch2_bkey_buf_exit(&prev, c);
- return ret;
+ return 0;
}
static int bch2_gc_btree(struct btree_trans *trans, enum btree_id btree_id,
@@ -925,14 +944,16 @@ static int bch2_gc_btree_init_recurse(struct btree_trans *trans, struct btree *b
struct bch_fs *c = trans->c;
struct btree_and_journal_iter iter;
struct bkey_s_c k;
- struct bkey_buf cur, prev;
+ struct bkey_buf cur;
struct printbuf buf = PRINTBUF;
int ret = 0;
+ ret = bch2_btree_node_check_topology(trans, b);
+ if (ret)
+ return ret;
+
bch2_btree_and_journal_iter_init_node_iter(trans, &iter, b);
- bch2_bkey_buf_init(&prev);
bch2_bkey_buf_init(&cur);
- bkey_init(&prev.k->k);
while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
BUG_ON(bpos_lt(k.k->p, b->data->min_key));
@@ -943,20 +964,7 @@ static int bch2_gc_btree_init_recurse(struct btree_trans *trans, struct btree *b
if (ret)
goto fsck_err;
- if (b->c.level) {
- bch2_bkey_buf_reassemble(&cur, c, k);
- k = bkey_i_to_s_c(cur.k);
-
- bch2_btree_and_journal_iter_advance(&iter);
-
- ret = bch2_gc_check_topology(c, b,
- &prev, cur,
- !bch2_btree_and_journal_iter_peek(&iter).k);
- if (ret)
- goto fsck_err;
- } else {
- bch2_btree_and_journal_iter_advance(&iter);
- }
+ bch2_btree_and_journal_iter_advance(&iter);
}
if (b->c.level > target_depth) {
@@ -1015,7 +1023,6 @@ static int bch2_gc_btree_init_recurse(struct btree_trans *trans, struct btree *b
}
fsck_err:
bch2_bkey_buf_exit(&cur, c);
- bch2_bkey_buf_exit(&prev, c);
bch2_btree_and_journal_iter_exit(&iter);
printbuf_exit(&buf);
return ret;
@@ -1033,9 +1040,6 @@ static int bch2_gc_btree_init(struct btree_trans *trans,
b = bch2_btree_id_root(c, btree_id)->b;
- if (btree_node_fake(b))
- return 0;
-
six_lock_read(&b->c.lock, NULL, NULL);
printbuf_reset(&buf);
bch2_bpos_to_text(&buf, b->data->min_key);
@@ -1583,7 +1587,7 @@ static int bch2_gc_write_reflink_key(struct btree_trans *trans,
struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k);
ret = PTR_ERR_OR_ZERO(new);
if (ret)
- return ret;
+ goto out;
if (!r->refcount)
new->k.type = KEY_TYPE_deleted;
@@ -1591,6 +1595,7 @@ static int bch2_gc_write_reflink_key(struct btree_trans *trans,
*bkey_refcount(bkey_i_to_s(new)) = cpu_to_le64(r->refcount);
ret = bch2_trans_update(trans, iter, new, 0);
}
+out:
fsck_err:
printbuf_exit(&buf);
return ret;
diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c
index 34df8ccc5fecc2..debb0edc3455af 100644
--- a/fs/bcachefs/btree_io.c
+++ b/fs/bcachefs/btree_io.c
@@ -654,6 +654,7 @@ void bch2_btree_node_drop_keys_outside_node(struct btree *b)
*/
bch2_bset_set_no_aux_tree(b, b->set);
bch2_btree_build_aux_trees(b);
+ b->nr = bch2_btree_node_count_keys(b);
struct bkey_s_c k;
struct bkey unpacked;
@@ -830,7 +831,7 @@ static int bset_key_invalid(struct bch_fs *c, struct btree *b,
(rw == WRITE ? bch2_bkey_val_invalid(c, k, READ, err) : 0);
}
-static bool __bkey_valid(struct bch_fs *c, struct btree *b,
+static bool bkey_packed_valid(struct bch_fs *c, struct btree *b,
struct bset *i, struct bkey_packed *k)
{
if (bkey_p_next(k) > vstruct_last(i))
@@ -839,7 +840,7 @@ static bool __bkey_valid(struct bch_fs *c, struct btree *b,
if (k->format > KEY_FORMAT_CURRENT)
return false;
- if (k->u64s < bkeyp_key_u64s(&b->format, k))
+ if (!bkeyp_u64s_valid(&b->format, k))
return false;
struct printbuf buf = PRINTBUF;
@@ -883,11 +884,13 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b,
"invalid bkey format %u", k->format))
goto drop_this_key;
- if (btree_err_on(k->u64s < bkeyp_key_u64s(&b->format, k),
+ if (btree_err_on(!bkeyp_u64s_valid(&b->format, k),
-BCH_ERR_btree_node_read_err_fixable,
c, NULL, b, i,
btree_node_bkey_bad_u64s,
- "k->u64s too small (%u < %u)", k->u64s, bkeyp_key_u64s(&b->format, k)))
+ "bad k->u64s %u (min %u max %zu)", k->u64s,
+ bkeyp_key_u64s(&b->format, k),
+ U8_MAX - BKEY_U64s + bkeyp_key_u64s(&b->format, k)))
goto drop_this_key;
if (!write)
@@ -946,13 +949,12 @@ drop_this_key:
* do
*/
- if (!__bkey_valid(c, b, i, (void *) ((u64 *) k + next_good_key))) {
+ if (!bkey_packed_valid(c, b, i, (void *) ((u64 *) k + next_good_key))) {
for (next_good_key = 1;
next_good_key < (u64 *) vstruct_last(i) - (u64 *) k;
next_good_key++)
- if (__bkey_valid(c, b, i, (void *) ((u64 *) k + next_good_key)))
+ if (bkey_packed_valid(c, b, i, (void *) ((u64 *) k + next_good_key)))
goto got_good_key;
-
}
/*
@@ -1263,10 +1265,12 @@ out:
return retry_read;
fsck_err:
if (ret == -BCH_ERR_btree_node_read_err_want_retry ||
- ret == -BCH_ERR_btree_node_read_err_must_retry)
+ ret == -BCH_ERR_btree_node_read_err_must_retry) {
retry_read = 1;
- else
+ } else {
set_btree_node_read_error(b);
+ bch2_btree_lost_data(c, b->c.btree_id);
+ }
goto out;
}
@@ -1327,6 +1331,7 @@ start:
if (!can_retry) {
set_btree_node_read_error(b);
+ bch2_btree_lost_data(c, b->c.btree_id);
break;
}
}
@@ -1335,7 +1340,9 @@ start:
rb->start_time);
bio_put(&rb->bio);
- if (saw_error && !btree_node_read_error(b)) {
+ if (saw_error &&
+ !btree_node_read_error(b) &&
+ c->curr_recovery_pass != BCH_RECOVERY_PASS_scan_for_btree_nodes) {
printbuf_reset(&buf);
bch2_bpos_to_text(&buf, b->key.k.p);
bch_err_ratelimited(c, "%s: rewriting btree node at btree=%s level=%u %s due to error",
@@ -1526,9 +1533,10 @@ fsck_err:
ret = -1;
}
- if (ret)
+ if (ret) {
set_btree_node_read_error(b);
- else if (*saw_error)
+ bch2_btree_lost_data(c, b->c.btree_id);
+ } else if (*saw_error)
bch2_btree_node_rewrite_async(c, b);
for (i = 0; i < ra->nr; i++) {
@@ -1657,13 +1665,14 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b,
prt_str(&buf, "btree node read error: no device to read from\n at ");
bch2_btree_pos_to_text(&buf, c, b);
- bch_err(c, "%s", buf.buf);
+ bch_err_ratelimited(c, "%s", buf.buf);
if (c->recovery_passes_explicit & BIT_ULL(BCH_RECOVERY_PASS_check_topology) &&
c->curr_recovery_pass > BCH_RECOVERY_PASS_check_topology)
bch2_fatal_error(c);
set_btree_node_read_error(b);
+ bch2_btree_lost_data(c, b->c.btree_id);
clear_btree_node_read_in_flight(b);
wake_up_bit(&b->flags, BTREE_NODE_read_in_flight);
printbuf_exit(&buf);
@@ -1860,7 +1869,7 @@ static void btree_node_write_work(struct work_struct *work)
} else {
ret = bch2_trans_do(c, NULL, NULL, 0,
bch2_btree_node_update_key_get_iter(trans, b, &wbio->key,
- BCH_WATERMARK_reclaim|
+ BCH_WATERMARK_interior_updates|
BCH_TRANS_COMMIT_journal_reclaim|
BCH_TRANS_COMMIT_no_enospc|
BCH_TRANS_COMMIT_no_check_rw,
diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c
index 51bcdc6c6d1cda..2a211a4bebd153 100644
--- a/fs/bcachefs/btree_iter.c
+++ b/fs/bcachefs/btree_iter.c
@@ -927,8 +927,22 @@ static __always_inline int btree_path_down(struct btree_trans *trans,
if (ret)
goto err;
} else {
- bch2_bkey_buf_unpack(&tmp, c, l->b,
- bch2_btree_node_iter_peek(&l->iter, l->b));
+ struct bkey_packed *k = bch2_btree_node_iter_peek(&l->iter, l->b);
+ if (!k) {
+ struct printbuf buf = PRINTBUF;
+
+ prt_str(&buf, "node not found at pos ");
+ bch2_bpos_to_text(&buf, path->pos);
+ prt_str(&buf, " within parent node ");
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&l->b->key));
+
+ bch2_fs_fatal_error(c, "%s", buf.buf);
+ printbuf_exit(&buf);
+ ret = -BCH_ERR_btree_need_topology_repair;
+ goto err;
+ }
+
+ bch2_bkey_buf_unpack(&tmp, c, l->b, k);
if ((flags & BTREE_ITER_PREFETCH) &&
c->opts.btree_node_prefetch) {
@@ -962,7 +976,6 @@ err:
return ret;
}
-
static int bch2_btree_path_traverse_all(struct btree_trans *trans)
{
struct bch_fs *c = trans->c;
@@ -2790,6 +2803,31 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
struct btree_transaction_stats *s = btree_trans_stats(trans);
s->max_mem = max(s->max_mem, new_bytes);
+ if (trans->used_mempool) {
+ if (trans->mem_bytes >= new_bytes)
+ goto out_change_top;
+
+ /* No more space from mempool item, need malloc new one */
+ new_mem = kmalloc(new_bytes, GFP_NOWAIT|__GFP_NOWARN);
+ if (unlikely(!new_mem)) {
+ bch2_trans_unlock(trans);
+
+ new_mem = kmalloc(new_bytes, GFP_KERNEL);
+ if (!new_mem)
+ return ERR_PTR(-BCH_ERR_ENOMEM_trans_kmalloc);
+
+ ret = bch2_trans_relock(trans);
+ if (ret) {
+ kfree(new_mem);
+ return ERR_PTR(ret);
+ }
+ }
+ memcpy(new_mem, trans->mem, trans->mem_top);
+ trans->used_mempool = false;
+ mempool_free(trans->mem, &c->btree_trans_mem_pool);
+ goto out_new_mem;
+ }
+
new_mem = krealloc(trans->mem, new_bytes, GFP_NOWAIT|__GFP_NOWARN);
if (unlikely(!new_mem)) {
bch2_trans_unlock(trans);
@@ -2798,6 +2836,8 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
if (!new_mem && new_bytes <= BTREE_TRANS_MEM_MAX) {
new_mem = mempool_alloc(&c->btree_trans_mem_pool, GFP_KERNEL);
new_bytes = BTREE_TRANS_MEM_MAX;
+ memcpy(new_mem, trans->mem, trans->mem_top);
+ trans->used_mempool = true;
kfree(trans->mem);
}
@@ -2811,7 +2851,7 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
if (ret)
return ERR_PTR(ret);
}
-
+out_new_mem:
trans->mem = new_mem;
trans->mem_bytes = new_bytes;
@@ -2819,7 +2859,7 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
trace_and_count(c, trans_restart_mem_realloced, trans, _RET_IP_, new_bytes);
return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_mem_realloced));
}
-
+out_change_top:
p = trans->mem + trans->mem_top;
trans->mem_top += size;
memset(p, 0, size);
@@ -3093,7 +3133,7 @@ void bch2_trans_put(struct btree_trans *trans)
if (paths_allocated != trans->_paths_allocated)
kvfree_rcu_mightsleep(paths_allocated);
- if (trans->mem_bytes == BTREE_TRANS_MEM_MAX)
+ if (trans->used_mempool)
mempool_free(trans->mem, &c->btree_trans_mem_pool);
else
kfree(trans->mem);
diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h
index 24772538e4cc74..1c70836dd7cce4 100644
--- a/fs/bcachefs/btree_iter.h
+++ b/fs/bcachefs/btree_iter.h
@@ -498,8 +498,13 @@ static inline void set_btree_iter_dontneed(struct btree_iter *iter)
{
struct btree_trans *trans = iter->trans;
- if (!trans->restarted)
- btree_iter_path(trans, iter)->preserve = false;
+ if (!iter->path || trans->restarted)
+ return;
+
+ struct btree_path *path = btree_iter_path(trans, iter);
+ path->preserve = false;
+ if (path->ref == 1)
+ path->should_be_locked = false;
}
void *__bch2_trans_kmalloc(struct btree_trans *, size_t);
@@ -642,7 +647,7 @@ int __bch2_btree_trans_too_many_iters(struct btree_trans *);
static inline int btree_trans_too_many_iters(struct btree_trans *trans)
{
- if (bitmap_weight(trans->paths_allocated, trans->nr_paths) > BTREE_ITER_INITIAL - 8)
+ if (bitmap_weight(trans->paths_allocated, trans->nr_paths) > BTREE_ITER_NORMAL_LIMIT - 8)
return __bch2_btree_trans_too_many_iters(trans);
return 0;
diff --git a/fs/bcachefs/btree_journal_iter.c b/fs/bcachefs/btree_journal_iter.c
index 50e04356d72c8e..1e8cf49a693531 100644
--- a/fs/bcachefs/btree_journal_iter.c
+++ b/fs/bcachefs/btree_journal_iter.c
@@ -130,12 +130,30 @@ struct bkey_i *bch2_journal_keys_peek_slot(struct bch_fs *c, enum btree_id btree
return bch2_journal_keys_peek_upto(c, btree_id, level, pos, pos, &idx);
}
+static void journal_iter_verify(struct journal_iter *iter)
+{
+ struct journal_keys *keys = iter->keys;
+ size_t gap_size = keys->size - keys->nr;
+
+ BUG_ON(iter->idx >= keys->gap &&
+ iter->idx < keys->gap + gap_size);
+
+ if (iter->idx < keys->size) {
+ struct journal_key *k = keys->data + iter->idx;
+
+ int cmp = cmp_int(k->btree_id, iter->btree_id) ?:
+ cmp_int(k->level, iter->level);
+ BUG_ON(cmp < 0);
+ }
+}
+
static void journal_iters_fix(struct bch_fs *c)
{
struct journal_keys *keys = &c->journal_keys;
/* The key we just inserted is immediately before the gap: */
size_t gap_end = keys->gap + (keys->size - keys->nr);
- struct btree_and_journal_iter *iter;
+ struct journal_key *new_key = &keys->data[keys->gap - 1];
+ struct journal_iter *iter;
/*
* If an iterator points one after the key we just inserted, decrement
@@ -143,9 +161,14 @@ static void journal_iters_fix(struct bch_fs *c)
* decrement was unnecessary, bch2_btree_and_journal_iter_peek() will
* handle that:
*/
- list_for_each_entry(iter, &c->journal_iters, journal.list)
- if (iter->journal.idx == gap_end)
- iter->journal.idx = keys->gap - 1;
+ list_for_each_entry(iter, &c->journal_iters, list) {
+ journal_iter_verify(iter);
+ if (iter->idx == gap_end &&
+ new_key->btree_id == iter->btree_id &&
+ new_key->level == iter->level)
+ iter->idx = keys->gap - 1;
+ journal_iter_verify(iter);
+ }
}
static void journal_iters_move_gap(struct bch_fs *c, size_t old_gap, size_t new_gap)
@@ -192,7 +215,12 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id,
if (idx > keys->gap)
idx -= keys->size - keys->nr;
+ size_t old_gap = keys->gap;
+
if (keys->nr == keys->size) {
+ journal_iters_move_gap(c, old_gap, keys->size);
+ old_gap = keys->size;
+
struct journal_keys new_keys = {
.nr = keys->nr,
.size = max_t(size_t, keys->size, 8) * 2,
@@ -216,7 +244,7 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id,
keys->gap = keys->nr;
}
- journal_iters_move_gap(c, keys->gap, idx);
+ journal_iters_move_gap(c, old_gap, idx);
move_gap(keys, idx);
@@ -261,6 +289,22 @@ int bch2_journal_key_delete(struct bch_fs *c, enum btree_id id,
return bch2_journal_key_insert(c, id, level, &whiteout);
}
+bool bch2_key_deleted_in_journal(struct btree_trans *trans, enum btree_id btree,
+ unsigned level, struct bpos pos)
+{
+ struct journal_keys *keys = &trans->c->journal_keys;
+ size_t idx = bch2_journal_key_search(keys, btree, level, pos);
+
+ if (!trans->journal_replay_not_finished)
+ return false;
+
+ return (idx < keys->size &&
+ keys->data[idx].btree_id == btree &&
+ keys->data[idx].level == level &&
+ bpos_eq(keys->data[idx].k->k.p, pos) &&
+ bkey_deleted(&keys->data[idx].k->k));
+}
+
void bch2_journal_key_overwritten(struct bch_fs *c, enum btree_id btree,
unsigned level, struct bpos pos)
{
@@ -285,16 +329,21 @@ static void bch2_journal_iter_advance(struct journal_iter *iter)
static struct bkey_s_c bch2_journal_iter_peek(struct journal_iter *iter)
{
- struct journal_key *k = iter->keys->data + iter->idx;
+ journal_iter_verify(iter);
+
+ while (iter->idx < iter->keys->size) {
+ struct journal_key *k = iter->keys->data + iter->idx;
+
+ int cmp = cmp_int(k->btree_id, iter->btree_id) ?:
+ cmp_int(k->level, iter->level);
+ if (cmp > 0)
+ break;
+ BUG_ON(cmp);
- while (k < iter->keys->data + iter->keys->size &&
- k->btree_id == iter->btree_id &&
- k->level == iter->level) {
if (!k->overwritten)
return bkey_i_to_s_c(k->k);
bch2_journal_iter_advance(iter);
- k = iter->keys->data + iter->idx;
}
return bkey_s_c_null;
@@ -314,6 +363,8 @@ static void bch2_journal_iter_init(struct bch_fs *c,
iter->level = level;
iter->keys = &c->journal_keys;
iter->idx = bch2_journal_key_search(&c->journal_keys, id, level, pos);
+
+ journal_iter_verify(iter);
}
static struct bkey_s_c bch2_journal_iter_peek_btree(struct btree_and_journal_iter *iter)
@@ -363,7 +414,7 @@ static void btree_and_journal_iter_prefetch(struct btree_and_journal_iter *_iter
struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *iter)
{
- struct bkey_s_c btree_k, journal_k, ret;
+ struct bkey_s_c btree_k, journal_k = bkey_s_c_null, ret;
if (iter->prefetch && iter->journal.level)
btree_and_journal_iter_prefetch(iter);
@@ -375,9 +426,10 @@ again:
bpos_lt(btree_k.k->p, iter->pos))
bch2_journal_iter_advance_btree(iter);
- while ((journal_k = bch2_journal_iter_peek(&iter->journal)).k &&
- bpos_lt(journal_k.k->p, iter->pos))
- bch2_journal_iter_advance(&iter->journal);
+ if (iter->trans->journal_replay_not_finished)
+ while ((journal_k = bch2_journal_iter_peek(&iter->journal)).k &&
+ bpos_lt(journal_k.k->p, iter->pos))
+ bch2_journal_iter_advance(&iter->journal);
ret = journal_k.k &&
(!btree_k.k || bpos_le(journal_k.k->p, btree_k.k->p))
@@ -417,10 +469,15 @@ void __bch2_btree_and_journal_iter_init_node_iter(struct btree_trans *trans,
iter->trans = trans;
iter->b = b;
iter->node_iter = node_iter;
- bch2_journal_iter_init(trans->c, &iter->journal, b->c.btree_id, b->c.level, pos);
- INIT_LIST_HEAD(&iter->journal.list);
iter->pos = b->data->min_key;
iter->at_end = false;
+ INIT_LIST_HEAD(&iter->journal.list);
+
+ if (trans->journal_replay_not_finished) {
+ bch2_journal_iter_init(trans->c, &iter->journal, b->c.btree_id, b->c.level, pos);
+ if (!test_bit(BCH_FS_may_go_rw, &trans->c->flags))
+ list_add(&iter->journal.list, &trans->c->journal_iters);
+ }
}
/*
@@ -435,7 +492,6 @@ void bch2_btree_and_journal_iter_init_node_iter(struct btree_trans *trans,
bch2_btree_node_iter_init_from_start(&node_iter, b);
__bch2_btree_and_journal_iter_init_node_iter(trans, iter, b, node_iter, b->data->min_key);
- list_add(&iter->journal.list, &trans->c->journal_iters);
}
/* sort and dedup all keys in the journal: */
@@ -548,3 +604,22 @@ int bch2_journal_keys_sort(struct bch_fs *c)
bch_verbose(c, "Journal keys: %zu read, %zu after sorting and compacting", nr_read, keys->nr);
return 0;
}
+
+void bch2_shoot_down_journal_keys(struct bch_fs *c, enum btree_id btree,
+ unsigned level_min, unsigned level_max,
+ struct bpos start, struct bpos end)
+{
+ struct journal_keys *keys = &c->journal_keys;
+ size_t dst = 0;
+
+ move_gap(keys, keys->nr);
+
+ darray_for_each(*keys, i)
+ if (!(i->btree_id == btree &&
+ i->level >= level_min &&
+ i->level <= level_max &&
+ bpos_ge(i->k->k.p, start) &&
+ bpos_le(i->k->k.p, end)))
+ keys->data[dst++] = *i;
+ keys->nr = keys->gap = dst;
+}
diff --git a/fs/bcachefs/btree_journal_iter.h b/fs/bcachefs/btree_journal_iter.h
index c9d19da3ea0480..af25046ebcaa76 100644
--- a/fs/bcachefs/btree_journal_iter.h
+++ b/fs/bcachefs/btree_journal_iter.h
@@ -40,8 +40,8 @@ int bch2_journal_key_insert(struct bch_fs *, enum btree_id,
unsigned, struct bkey_i *);
int bch2_journal_key_delete(struct bch_fs *, enum btree_id,
unsigned, struct bpos);
-void bch2_journal_key_overwritten(struct bch_fs *, enum btree_id,
- unsigned, struct bpos);
+bool bch2_key_deleted_in_journal(struct btree_trans *, enum btree_id, unsigned, struct bpos);
+void bch2_journal_key_overwritten(struct bch_fs *, enum btree_id, unsigned, struct bpos);
void bch2_btree_and_journal_iter_advance(struct btree_and_journal_iter *);
struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *);
@@ -66,4 +66,8 @@ void bch2_journal_entries_free(struct bch_fs *);
int bch2_journal_keys_sort(struct bch_fs *);
+void bch2_shoot_down_journal_keys(struct bch_fs *, enum btree_id,
+ unsigned, unsigned,
+ struct bpos, struct bpos);
+
#endif /* _BCACHEFS_BTREE_JOURNAL_ITER_H */
diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c
index 581edcb0911bfa..e8c1c530cd95f5 100644
--- a/fs/bcachefs/btree_key_cache.c
+++ b/fs/bcachefs/btree_key_cache.c
@@ -169,6 +169,7 @@ static void bkey_cached_move_to_freelist(struct btree_key_cache *bc,
} else {
mutex_lock(&bc->lock);
list_move_tail(&ck->list, &bc->freed_pcpu);
+ bc->nr_freed_pcpu++;
mutex_unlock(&bc->lock);
}
}
@@ -245,6 +246,7 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path,
if (!list_empty(&bc->freed_pcpu)) {
ck = list_last_entry(&bc->freed_pcpu, struct bkey_cached, list);
list_del_init(&ck->list);
+ bc->nr_freed_pcpu--;
}
mutex_unlock(&bc->lock);
}
@@ -659,7 +661,7 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans,
commit_flags |= BCH_WATERMARK_reclaim;
if (ck->journal.seq != journal_last_seq(j) ||
- j->watermark == BCH_WATERMARK_stripe)
+ !test_bit(JOURNAL_SPACE_LOW, &c->journal.flags))
commit_flags |= BCH_TRANS_COMMIT_no_journal_res;
ret = bch2_btree_iter_traverse(&b_iter) ?:
@@ -840,8 +842,6 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
* Newest freed entries are at the end of the list - once we hit one
* that's too new to be freed, we can bail out:
*/
- scanned += bc->nr_freed_nonpcpu;
-
list_for_each_entry_safe(ck, t, &bc->freed_nonpcpu, list) {
if (!poll_state_synchronize_srcu(&c->btree_trans_barrier,
ck->btree_trans_barrier_seq))
@@ -855,11 +855,6 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
bc->nr_freed_nonpcpu--;
}
- if (scanned >= nr)
- goto out;
-
- scanned += bc->nr_freed_pcpu;
-
list_for_each_entry_safe(ck, t, &bc->freed_pcpu, list) {
if (!poll_state_synchronize_srcu(&c->btree_trans_barrier,
ck->btree_trans_barrier_seq))
@@ -873,9 +868,6 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
bc->nr_freed_pcpu--;
}
- if (scanned >= nr)
- goto out;
-
rcu_read_lock();
tbl = rht_dereference_rcu(bc->table.tbl, &bc->table);
if (bc->shrink_iter >= tbl->size)
@@ -891,12 +883,12 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
next = rht_dereference_bucket_rcu(pos->next, tbl, bc->shrink_iter);
ck = container_of(pos, struct bkey_cached, hash);
- if (test_bit(BKEY_CACHED_DIRTY, &ck->flags))
+ if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
goto next;
-
- if (test_bit(BKEY_CACHED_ACCESSED, &ck->flags))
+ } else if (test_bit(BKEY_CACHED_ACCESSED, &ck->flags)) {
clear_bit(BKEY_CACHED_ACCESSED, &ck->flags);
- else if (bkey_cached_lock_for_evict(ck)) {
+ goto next;
+ } else if (bkey_cached_lock_for_evict(ck)) {
bkey_cached_evict(bc, ck);
bkey_cached_free(bc, ck);
}
@@ -914,7 +906,6 @@ next:
} while (scanned < nr && bc->shrink_iter != start);
rcu_read_unlock();
-out:
memalloc_nofs_restore(flags);
srcu_read_unlock(&c->btree_trans_barrier, srcu_idx);
mutex_unlock(&bc->lock);
diff --git a/fs/bcachefs/btree_locking.c b/fs/bcachefs/btree_locking.c
index b9b151e693ed60..f2caf491957efc 100644
--- a/fs/bcachefs/btree_locking.c
+++ b/fs/bcachefs/btree_locking.c
@@ -440,33 +440,7 @@ void bch2_btree_node_lock_write_nofail(struct btree_trans *trans,
struct btree_path *path,
struct btree_bkey_cached_common *b)
{
- struct btree_path *linked;
- unsigned i, iter;
- int ret;
-
- /*
- * XXX BIG FAT NOTICE
- *
- * Drop all read locks before taking a write lock:
- *
- * This is a hack, because bch2_btree_node_lock_write_nofail() is a
- * hack - but by dropping read locks first, this should never fail, and
- * we only use this in code paths where whatever read locks we've
- * already taken are no longer needed:
- */
-
- trans_for_each_path(trans, linked, iter) {
- if (!linked->nodes_locked)
- continue;
-
- for (i = 0; i < BTREE_MAX_DEPTH; i++)
- if (btree_node_read_locked(linked, i)) {
- btree_node_unlock(trans, linked, i);
- btree_path_set_dirty(linked, BTREE_ITER_NEED_RELOCK);
- }
- }
-
- ret = __btree_node_lock_write(trans, path, b, true);
+ int ret = __btree_node_lock_write(trans, path, b, true);
BUG_ON(ret);
}
diff --git a/fs/bcachefs/btree_node_scan.c b/fs/bcachefs/btree_node_scan.c
new file mode 100644
index 00000000000000..c60794264da289
--- /dev/null
+++ b/fs/bcachefs/btree_node_scan.c
@@ -0,0 +1,521 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "btree_cache.h"
+#include "btree_io.h"
+#include "btree_journal_iter.h"
+#include "btree_node_scan.h"
+#include "btree_update_interior.h"
+#include "buckets.h"
+#include "error.h"
+#include "journal_io.h"
+#include "recovery_passes.h"
+
+#include <linux/kthread.h>
+#include <linux/sort.h>
+
+struct find_btree_nodes_worker {
+ struct closure *cl;
+ struct find_btree_nodes *f;
+ struct bch_dev *ca;
+};
+
+static void found_btree_node_to_text(struct printbuf *out, struct bch_fs *c, const struct found_btree_node *n)
+{
+ prt_printf(out, "%s l=%u seq=%u cookie=%llx ", bch2_btree_id_str(n->btree_id), n->level, n->seq, n->cookie);
+ bch2_bpos_to_text(out, n->min_key);
+ prt_str(out, "-");
+ bch2_bpos_to_text(out, n->max_key);
+
+ if (n->range_updated)
+ prt_str(out, " range updated");
+ if (n->overwritten)
+ prt_str(out, " overwritten");
+
+ for (unsigned i = 0; i < n->nr_ptrs; i++) {
+ prt_char(out, ' ');
+ bch2_extent_ptr_to_text(out, c, n->ptrs + i);
+ }
+}
+
+static void found_btree_nodes_to_text(struct printbuf *out, struct bch_fs *c, found_btree_nodes nodes)
+{
+ printbuf_indent_add(out, 2);
+ darray_for_each(nodes, i) {
+ found_btree_node_to_text(out, c, i);
+ prt_newline(out);
+ }
+ printbuf_indent_sub(out, 2);
+}
+
+static void found_btree_node_to_key(struct bkey_i *k, const struct found_btree_node *f)
+{
+ struct bkey_i_btree_ptr_v2 *bp = bkey_btree_ptr_v2_init(k);
+
+ set_bkey_val_u64s(&bp->k, sizeof(struct bch_btree_ptr_v2) / sizeof(u64) + f->nr_ptrs);
+ bp->k.p = f->max_key;
+ bp->v.seq = cpu_to_le64(f->cookie);
+ bp->v.sectors_written = 0;
+ bp->v.flags = 0;
+ bp->v.min_key = f->min_key;
+ SET_BTREE_PTR_RANGE_UPDATED(&bp->v, f->range_updated);
+ memcpy(bp->v.start, f->ptrs, sizeof(struct bch_extent_ptr) * f->nr_ptrs);
+}
+
+static bool found_btree_node_is_readable(struct btree_trans *trans,
+ const struct found_btree_node *f)
+{
+ struct { __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX); } k;
+
+ found_btree_node_to_key(&k.k, f);
+
+ struct btree *b = bch2_btree_node_get_noiter(trans, &k.k, f->btree_id, f->level, false);
+ bool ret = !IS_ERR_OR_NULL(b);
+ if (ret)
+ six_unlock_read(&b->c.lock);
+
+ /*
+ * We might update this node's range; if that happens, we need the node
+ * to be re-read so the read path can trim keys that are no longer in
+ * this node
+ */
+ if (b != btree_node_root(trans->c, b))
+ bch2_btree_node_evict(trans, &k.k);
+ return ret;
+}
+
+static int found_btree_node_cmp_cookie(const void *_l, const void *_r)
+{
+ const struct found_btree_node *l = _l;
+ const struct found_btree_node *r = _r;
+
+ return cmp_int(l->btree_id, r->btree_id) ?:
+ cmp_int(l->level, r->level) ?:
+ cmp_int(l->cookie, r->cookie);
+}
+
+/*
+ * Given two found btree nodes, if their sequence numbers are equal, take the
+ * one that's readable:
+ */
+static int found_btree_node_cmp_time(const struct found_btree_node *l,
+ const struct found_btree_node *r)
+{
+ return cmp_int(l->seq, r->seq);
+}
+
+static int found_btree_node_cmp_pos(const void *_l, const void *_r)
+{
+ const struct found_btree_node *l = _l;
+ const struct found_btree_node *r = _r;
+
+ return cmp_int(l->btree_id, r->btree_id) ?:
+ -cmp_int(l->level, r->level) ?:
+ bpos_cmp(l->min_key, r->min_key) ?:
+ -found_btree_node_cmp_time(l, r);
+}
+
+static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca,
+ struct bio *bio, struct btree_node *bn, u64 offset)
+{
+ struct bch_fs *c = container_of(f, struct bch_fs, found_btree_nodes);
+
+ bio_reset(bio, ca->disk_sb.bdev, REQ_OP_READ);
+ bio->bi_iter.bi_sector = offset;
+ bch2_bio_map(bio, bn, PAGE_SIZE);
+
+ submit_bio_wait(bio);
+ if (bch2_dev_io_err_on(bio->bi_status, ca, BCH_MEMBER_ERROR_read,
+ "IO error in try_read_btree_node() at %llu: %s",
+ offset, bch2_blk_status_to_str(bio->bi_status)))
+ return;
+
+ if (le64_to_cpu(bn->magic) != bset_magic(c))
+ return;
+
+ if (bch2_csum_type_is_encryption(BSET_CSUM_TYPE(&bn->keys))) {
+ struct nonce nonce = btree_nonce(&bn->keys, 0);
+ unsigned bytes = (void *) &bn->keys - (void *) &bn->flags;
+
+ bch2_encrypt(c, BSET_CSUM_TYPE(&bn->keys), nonce, &bn->flags, bytes);
+ }
+
+ if (btree_id_is_alloc(BTREE_NODE_ID(bn)))
+ return;
+
+ if (BTREE_NODE_LEVEL(bn) >= BTREE_MAX_DEPTH)
+ return;
+
+ rcu_read_lock();
+ struct found_btree_node n = {
+ .btree_id = BTREE_NODE_ID(bn),
+ .level = BTREE_NODE_LEVEL(bn),
+ .seq = BTREE_NODE_SEQ(bn),
+ .cookie = le64_to_cpu(bn->keys.seq),
+ .min_key = bn->min_key,
+ .max_key = bn->max_key,
+ .nr_ptrs = 1,
+ .ptrs[0].type = 1 << BCH_EXTENT_ENTRY_ptr,
+ .ptrs[0].offset = offset,
+ .ptrs[0].dev = ca->dev_idx,
+ .ptrs[0].gen = *bucket_gen(ca, sector_to_bucket(ca, offset)),
+ };
+ rcu_read_unlock();
+
+ if (bch2_trans_run(c, found_btree_node_is_readable(trans, &n))) {
+ mutex_lock(&f->lock);
+ if (BSET_BIG_ENDIAN(&bn->keys) != CPU_BIG_ENDIAN) {
+ bch_err(c, "try_read_btree_node() can't handle endian conversion");
+ f->ret = -EINVAL;
+ goto unlock;
+ }
+
+ if (darray_push(&f->nodes, n))
+ f->ret = -ENOMEM;
+unlock:
+ mutex_unlock(&f->lock);
+ }
+}
+
+static int read_btree_nodes_worker(void *p)
+{
+ struct find_btree_nodes_worker *w = p;
+ struct bch_fs *c = container_of(w->f, struct bch_fs, found_btree_nodes);
+ struct bch_dev *ca = w->ca;
+ void *buf = (void *) __get_free_page(GFP_KERNEL);
+ struct bio *bio = bio_alloc(NULL, 1, 0, GFP_KERNEL);
+ unsigned long last_print = jiffies;
+
+ if (!buf || !bio) {
+ bch_err(c, "read_btree_nodes_worker: error allocating bio/buf");
+ w->f->ret = -ENOMEM;
+ goto err;
+ }
+
+ for (u64 bucket = ca->mi.first_bucket; bucket < ca->mi.nbuckets; bucket++)
+ for (unsigned bucket_offset = 0;
+ bucket_offset + btree_sectors(c) <= ca->mi.bucket_size;
+ bucket_offset += btree_sectors(c)) {
+ if (time_after(jiffies, last_print + HZ * 30)) {
+ u64 cur_sector = bucket * ca->mi.bucket_size + bucket_offset;
+ u64 end_sector = ca->mi.nbuckets * ca->mi.bucket_size;
+
+ bch_info(ca, "%s: %2u%% done", __func__,
+ (unsigned) div64_u64(cur_sector * 100, end_sector));
+ last_print = jiffies;
+ }
+
+ u64 sector = bucket * ca->mi.bucket_size + bucket_offset;
+
+ if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_mi_btree_bitmap &&
+ !bch2_dev_btree_bitmap_marked_sectors(ca, sector, btree_sectors(c)))
+ continue;
+
+ try_read_btree_node(w->f, ca, bio, buf, sector);
+ }
+err:
+ bio_put(bio);
+ free_page((unsigned long) buf);
+ percpu_ref_get(&ca->io_ref);
+ closure_put(w->cl);
+ kfree(w);
+ return 0;
+}
+
+static int read_btree_nodes(struct find_btree_nodes *f)
+{
+ struct bch_fs *c = container_of(f, struct bch_fs, found_btree_nodes);
+ struct closure cl;
+ int ret = 0;
+
+ closure_init_stack(&cl);
+
+ for_each_online_member(c, ca) {
+ if (!(ca->mi.data_allowed & BIT(BCH_DATA_btree)))
+ continue;
+
+ struct find_btree_nodes_worker *w = kmalloc(sizeof(*w), GFP_KERNEL);
+ struct task_struct *t;
+
+ if (!w) {
+ percpu_ref_put(&ca->io_ref);
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ percpu_ref_get(&ca->io_ref);
+ closure_get(&cl);
+ w->cl = &cl;
+ w->f = f;
+ w->ca = ca;
+
+ t = kthread_run(read_btree_nodes_worker, w, "read_btree_nodes/%s", ca->name);
+ ret = IS_ERR_OR_NULL(t);
+ if (ret) {
+ percpu_ref_put(&ca->io_ref);
+ closure_put(&cl);
+ f->ret = ret;
+ bch_err(c, "error starting kthread: %i", ret);
+ break;
+ }
+ }
+err:
+ closure_sync(&cl);
+ return f->ret ?: ret;
+}
+
+static void bubble_up(struct found_btree_node *n, struct found_btree_node *end)
+{
+ while (n + 1 < end &&
+ found_btree_node_cmp_pos(n, n + 1) > 0) {
+ swap(n[0], n[1]);
+ n++;
+ }
+}
+
+static int handle_overwrites(struct bch_fs *c,
+ struct found_btree_node *start,
+ struct found_btree_node *end)
+{
+ struct found_btree_node *n;
+again:
+ for (n = start + 1;
+ n < end &&
+ n->btree_id == start->btree_id &&
+ n->level == start->level &&
+ bpos_lt(n->min_key, start->max_key);
+ n++) {
+ int cmp = found_btree_node_cmp_time(start, n);
+
+ if (cmp > 0) {
+ if (bpos_cmp(start->max_key, n->max_key) >= 0)
+ n->overwritten = true;
+ else {
+ n->range_updated = true;
+ n->min_key = bpos_successor(start->max_key);
+ n->range_updated = true;
+ bubble_up(n, end);
+ goto again;
+ }
+ } else if (cmp < 0) {
+ BUG_ON(bpos_cmp(n->min_key, start->min_key) <= 0);
+
+ start->max_key = bpos_predecessor(n->min_key);
+ start->range_updated = true;
+ } else if (n->level) {
+ n->overwritten = true;
+ } else {
+ struct printbuf buf = PRINTBUF;
+
+ prt_str(&buf, "overlapping btree nodes with same seq! halting\n ");
+ found_btree_node_to_text(&buf, c, start);
+ prt_str(&buf, "\n ");
+ found_btree_node_to_text(&buf, c, n);
+ bch_err(c, "%s", buf.buf);
+ printbuf_exit(&buf);
+ return -BCH_ERR_fsck_repair_unimplemented;
+ }
+ }
+
+ return 0;
+}
+
+int bch2_scan_for_btree_nodes(struct bch_fs *c)
+{
+ struct find_btree_nodes *f = &c->found_btree_nodes;
+ struct printbuf buf = PRINTBUF;
+ size_t dst;
+ int ret = 0;
+
+ if (f->nodes.nr)
+ return 0;
+
+ mutex_init(&f->lock);
+
+ ret = read_btree_nodes(f);
+ if (ret)
+ return ret;
+
+ if (!f->nodes.nr) {
+ bch_err(c, "%s: no btree nodes found", __func__);
+ ret = -EINVAL;
+ goto err;
+ }
+
+ if (0 && c->opts.verbose) {
+ printbuf_reset(&buf);
+ prt_printf(&buf, "%s: nodes found:\n", __func__);
+ found_btree_nodes_to_text(&buf, c, f->nodes);
+ bch2_print_string_as_lines(KERN_INFO, buf.buf);
+ }
+
+ sort(f->nodes.data, f->nodes.nr, sizeof(f->nodes.data[0]), found_btree_node_cmp_cookie, NULL);
+
+ dst = 0;
+ darray_for_each(f->nodes, i) {
+ struct found_btree_node *prev = dst ? f->nodes.data + dst - 1 : NULL;
+
+ if (prev &&
+ prev->cookie == i->cookie) {
+ if (prev->nr_ptrs == ARRAY_SIZE(prev->ptrs)) {
+ bch_err(c, "%s: found too many replicas for btree node", __func__);
+ ret = -EINVAL;
+ goto err;
+ }
+ prev->ptrs[prev->nr_ptrs++] = i->ptrs[0];
+ } else {
+ f->nodes.data[dst++] = *i;
+ }
+ }
+ f->nodes.nr = dst;
+
+ sort(f->nodes.data, f->nodes.nr, sizeof(f->nodes.data[0]), found_btree_node_cmp_pos, NULL);
+
+ if (0 && c->opts.verbose) {
+ printbuf_reset(&buf);
+ prt_printf(&buf, "%s: nodes after merging replicas:\n", __func__);
+ found_btree_nodes_to_text(&buf, c, f->nodes);
+ bch2_print_string_as_lines(KERN_INFO, buf.buf);
+ }
+
+ dst = 0;
+ darray_for_each(f->nodes, i) {
+ if (i->overwritten)
+ continue;
+
+ ret = handle_overwrites(c, i, &darray_top(f->nodes));
+ if (ret)
+ goto err;
+
+ BUG_ON(i->overwritten);
+ f->nodes.data[dst++] = *i;
+ }
+ f->nodes.nr = dst;
+
+ if (c->opts.verbose) {
+ printbuf_reset(&buf);
+ prt_printf(&buf, "%s: nodes found after overwrites:\n", __func__);
+ found_btree_nodes_to_text(&buf, c, f->nodes);
+ bch2_print_string_as_lines(KERN_INFO, buf.buf);
+ }
+
+ eytzinger0_sort(f->nodes.data, f->nodes.nr, sizeof(f->nodes.data[0]), found_btree_node_cmp_pos, NULL);
+err:
+ printbuf_exit(&buf);
+ return ret;
+}
+
+static int found_btree_node_range_start_cmp(const void *_l, const void *_r)
+{
+ const struct found_btree_node *l = _l;
+ const struct found_btree_node *r = _r;
+
+ return cmp_int(l->btree_id, r->btree_id) ?:
+ -cmp_int(l->level, r->level) ?:
+ bpos_cmp(l->max_key, r->min_key);
+}
+
+#define for_each_found_btree_node_in_range(_f, _search, _idx) \
+ for (size_t _idx = eytzinger0_find_gt((_f)->nodes.data, (_f)->nodes.nr, \
+ sizeof((_f)->nodes.data[0]), \
+ found_btree_node_range_start_cmp, &search); \
+ _idx < (_f)->nodes.nr && \
+ (_f)->nodes.data[_idx].btree_id == _search.btree_id && \
+ (_f)->nodes.data[_idx].level == _search.level && \
+ bpos_lt((_f)->nodes.data[_idx].min_key, _search.max_key); \
+ _idx = eytzinger0_next(_idx, (_f)->nodes.nr))
+
+bool bch2_btree_node_is_stale(struct bch_fs *c, struct btree *b)
+{
+ struct find_btree_nodes *f = &c->found_btree_nodes;
+
+ struct found_btree_node search = {
+ .btree_id = b->c.btree_id,
+ .level = b->c.level,
+ .min_key = b->data->min_key,
+ .max_key = b->key.k.p,
+ };
+
+ for_each_found_btree_node_in_range(f, search, idx)
+ if (f->nodes.data[idx].seq > BTREE_NODE_SEQ(b->data))
+ return true;
+ return false;
+}
+
+bool bch2_btree_has_scanned_nodes(struct bch_fs *c, enum btree_id btree)
+{
+ struct found_btree_node search = {
+ .btree_id = btree,
+ .level = 0,
+ .min_key = POS_MIN,
+ .max_key = SPOS_MAX,
+ };
+
+ for_each_found_btree_node_in_range(&c->found_btree_nodes, search, idx)
+ return true;
+ return false;
+}
+
+int bch2_get_scanned_nodes(struct bch_fs *c, enum btree_id btree,
+ unsigned level, struct bpos node_min, struct bpos node_max)
+{
+ if (btree_id_is_alloc(btree))
+ return 0;
+
+ struct find_btree_nodes *f = &c->found_btree_nodes;
+
+ int ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_scan_for_btree_nodes);
+ if (ret)
+ return ret;
+
+ if (c->opts.verbose) {
+ struct printbuf buf = PRINTBUF;
+
+ prt_printf(&buf, "recovering %s l=%u ", bch2_btree_id_str(btree), level);
+ bch2_bpos_to_text(&buf, node_min);
+ prt_str(&buf, " - ");
+ bch2_bpos_to_text(&buf, node_max);
+
+ bch_info(c, "%s(): %s", __func__, buf.buf);
+ printbuf_exit(&buf);
+ }
+
+ struct found_btree_node search = {
+ .btree_id = btree,
+ .level = level,
+ .min_key = node_min,
+ .max_key = node_max,
+ };
+
+ for_each_found_btree_node_in_range(f, search, idx) {
+ struct found_btree_node n = f->nodes.data[idx];
+
+ n.range_updated |= bpos_lt(n.min_key, node_min);
+ n.min_key = bpos_max(n.min_key, node_min);
+
+ n.range_updated |= bpos_gt(n.max_key, node_max);
+ n.max_key = bpos_min(n.max_key, node_max);
+
+ struct { __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX); } tmp;
+
+ found_btree_node_to_key(&tmp.k, &n);
+
+ struct printbuf buf = PRINTBUF;
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&tmp.k));
+ bch_verbose(c, "%s(): recovering %s", __func__, buf.buf);
+ printbuf_exit(&buf);
+
+ BUG_ON(bch2_bkey_invalid(c, bkey_i_to_s_c(&tmp.k), BKEY_TYPE_btree, 0, NULL));
+
+ ret = bch2_journal_key_insert(c, btree, level + 1, &tmp.k);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+void bch2_find_btree_nodes_exit(struct find_btree_nodes *f)
+{
+ darray_exit(&f->nodes);
+}
diff --git a/fs/bcachefs/btree_node_scan.h b/fs/bcachefs/btree_node_scan.h
new file mode 100644
index 00000000000000..08687b209787be
--- /dev/null
+++ b/fs/bcachefs/btree_node_scan.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_BTREE_NODE_SCAN_H
+#define _BCACHEFS_BTREE_NODE_SCAN_H
+
+int bch2_scan_for_btree_nodes(struct bch_fs *);
+bool bch2_btree_node_is_stale(struct bch_fs *, struct btree *);
+bool bch2_btree_has_scanned_nodes(struct bch_fs *, enum btree_id);
+int bch2_get_scanned_nodes(struct bch_fs *, enum btree_id, unsigned, struct bpos, struct bpos);
+void bch2_find_btree_nodes_exit(struct find_btree_nodes *);
+
+#endif /* _BCACHEFS_BTREE_NODE_SCAN_H */
diff --git a/fs/bcachefs/btree_node_scan_types.h b/fs/bcachefs/btree_node_scan_types.h
new file mode 100644
index 00000000000000..abb7b27d556a9f
--- /dev/null
+++ b/fs/bcachefs/btree_node_scan_types.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_BTREE_NODE_SCAN_TYPES_H
+#define _BCACHEFS_BTREE_NODE_SCAN_TYPES_H
+
+#include "darray.h"
+
+struct found_btree_node {
+ bool range_updated:1;
+ bool overwritten:1;
+ u8 btree_id;
+ u8 level;
+ u32 seq;
+ u64 cookie;
+
+ struct bpos min_key;
+ struct bpos max_key;
+
+ unsigned nr_ptrs;
+ struct bch_extent_ptr ptrs[BCH_REPLICAS_MAX];
+};
+
+typedef DARRAY(struct found_btree_node) found_btree_nodes;
+
+struct find_btree_nodes {
+ int ret;
+ struct mutex lock;
+ found_btree_nodes nodes;
+};
+
+#endif /* _BCACHEFS_BTREE_NODE_SCAN_TYPES_H */
diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c
index 30d69a6d133eec..bbec91e8e6506f 100644
--- a/fs/bcachefs/btree_trans_commit.c
+++ b/fs/bcachefs/btree_trans_commit.c
@@ -318,7 +318,7 @@ static inline void btree_insert_entry_checks(struct btree_trans *trans,
!(i->flags & BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) &&
test_bit(JOURNAL_REPLAY_DONE, &trans->c->journal.flags) &&
i->k->k.p.snapshot &&
- bch2_snapshot_is_internal_node(trans->c, i->k->k.p.snapshot));
+ bch2_snapshot_is_internal_node(trans->c, i->k->k.p.snapshot) > 0);
}
static __always_inline int bch2_trans_journal_res_get(struct btree_trans *trans,
@@ -397,12 +397,13 @@ static int btree_key_can_insert_cached(struct btree_trans *trans, unsigned flags
struct bkey_cached *ck = (void *) path->l[0].b;
unsigned new_u64s;
struct bkey_i *new_k;
+ unsigned watermark = flags & BCH_WATERMARK_MASK;
EBUG_ON(path->level);
- if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags) &&
- bch2_btree_key_cache_must_wait(c) &&
- !(flags & BCH_TRANS_COMMIT_journal_reclaim))
+ if (watermark < BCH_WATERMARK_reclaim &&
+ !test_bit(BKEY_CACHED_DIRTY, &ck->flags) &&
+ bch2_btree_key_cache_must_wait(c))
return -BCH_ERR_btree_insert_need_journal_reclaim;
/*
@@ -499,9 +500,8 @@ static int run_one_trans_trigger(struct btree_trans *trans, struct btree_insert_
}
static int run_btree_triggers(struct btree_trans *trans, enum btree_id btree_id,
- struct btree_insert_entry *btree_id_start)
+ unsigned btree_id_start)
{
- struct btree_insert_entry *i;
bool trans_trigger_run;
int ret, overwrite;
@@ -514,13 +514,13 @@ static int run_btree_triggers(struct btree_trans *trans, enum btree_id btree_id,
do {
trans_trigger_run = false;
- for (i = btree_id_start;
- i < trans->updates + trans->nr_updates && i->btree_id <= btree_id;
+ for (unsigned i = btree_id_start;
+ i < trans->nr_updates && trans->updates[i].btree_id <= btree_id;
i++) {
- if (i->btree_id != btree_id)
+ if (trans->updates[i].btree_id != btree_id)
continue;
- ret = run_one_trans_trigger(trans, i, overwrite);
+ ret = run_one_trans_trigger(trans, trans->updates + i, overwrite);
if (ret < 0)
return ret;
if (ret)
@@ -534,8 +534,7 @@ static int run_btree_triggers(struct btree_trans *trans, enum btree_id btree_id,
static int bch2_trans_commit_run_triggers(struct btree_trans *trans)
{
- struct btree_insert_entry *btree_id_start = trans->updates;
- unsigned btree_id = 0;
+ unsigned btree_id = 0, btree_id_start = 0;
int ret = 0;
/*
@@ -549,8 +548,8 @@ static int bch2_trans_commit_run_triggers(struct btree_trans *trans)
if (btree_id == BTREE_ID_alloc)
continue;
- while (btree_id_start < trans->updates + trans->nr_updates &&
- btree_id_start->btree_id < btree_id)
+ while (btree_id_start < trans->nr_updates &&
+ trans->updates[btree_id_start].btree_id < btree_id)
btree_id_start++;
ret = run_btree_triggers(trans, btree_id, btree_id_start);
@@ -558,11 +557,13 @@ static int bch2_trans_commit_run_triggers(struct btree_trans *trans)
return ret;
}
- trans_for_each_update(trans, i) {
+ for (unsigned idx = 0; idx < trans->nr_updates; idx++) {
+ struct btree_insert_entry *i = trans->updates + idx;
+
if (i->btree_id > BTREE_ID_alloc)
break;
if (i->btree_id == BTREE_ID_alloc) {
- ret = run_btree_triggers(trans, BTREE_ID_alloc, i);
+ ret = run_btree_triggers(trans, BTREE_ID_alloc, idx);
if (ret)
return ret;
break;
@@ -826,7 +827,8 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, unsigned flags
struct bch_fs *c = trans->c;
int ret = 0, u64s_delta = 0;
- trans_for_each_update(trans, i) {
+ for (unsigned idx = 0; idx < trans->nr_updates; idx++) {
+ struct btree_insert_entry *i = trans->updates + idx;
if (i->cached)
continue;
@@ -887,6 +889,7 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags,
int ret, unsigned long trace_ip)
{
struct bch_fs *c = trans->c;
+ enum bch_watermark watermark = flags & BCH_WATERMARK_MASK;
switch (ret) {
case -BCH_ERR_btree_insert_btree_node_full:
@@ -905,7 +908,7 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags,
* flag
*/
if ((flags & BCH_TRANS_COMMIT_journal_reclaim) &&
- (flags & BCH_WATERMARK_MASK) != BCH_WATERMARK_reclaim) {
+ watermark < BCH_WATERMARK_reclaim) {
ret = -BCH_ERR_journal_reclaim_would_deadlock;
break;
}
diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h
index 9404d96c38f3b3..c69b233c41bb3d 100644
--- a/fs/bcachefs/btree_types.h
+++ b/fs/bcachefs/btree_types.h
@@ -321,9 +321,9 @@ struct bkey_cached {
struct btree_bkey_cached_common c;
unsigned long flags;
+ unsigned long btree_trans_barrier_seq;
u16 u64s;
bool valid;
- u32 btree_trans_barrier_seq;
struct bkey_cached_key key;
struct rhash_head hash;
@@ -364,7 +364,21 @@ struct btree_insert_entry {
unsigned long ip_allocated;
};
+/* Number of btree paths we preallocate, usually enough */
#define BTREE_ITER_INITIAL 64
+/*
+ * Lmiit for btree_trans_too_many_iters(); this is enough that almost all code
+ * paths should run inside this limit, and if they don't it usually indicates a
+ * bug (leaking/duplicated btree paths).
+ *
+ * exception: some fsck paths
+ *
+ * bugs with excessive path usage seem to have possibly been eliminated now, so
+ * we might consider eliminating this (and btree_trans_too_many_iter()) at some
+ * point.
+ */
+#define BTREE_ITER_NORMAL_LIMIT 256
+/* never exceed limit */
#define BTREE_ITER_MAX (1U << 10)
struct btree_trans_commit_hook;
diff --git a/fs/bcachefs/btree_update.c b/fs/bcachefs/btree_update.c
index a4b40c1656a54b..8e47e260eba59b 100644
--- a/fs/bcachefs/btree_update.c
+++ b/fs/bcachefs/btree_update.c
@@ -38,6 +38,9 @@ static noinline int extent_front_merge(struct btree_trans *trans,
struct bkey_i *update;
int ret;
+ if (unlikely(trans->journal_replay_not_finished))
+ return 0;
+
update = bch2_bkey_make_mut_noupdate(trans, k);
ret = PTR_ERR_OR_ZERO(update);
if (ret)
@@ -69,6 +72,9 @@ static noinline int extent_back_merge(struct btree_trans *trans,
struct bch_fs *c = trans->c;
int ret;
+ if (unlikely(trans->journal_replay_not_finished))
+ return 0;
+
ret = bch2_key_has_snapshot_overwrites(trans, iter->btree_id, insert->k.p) ?:
bch2_key_has_snapshot_overwrites(trans, iter->btree_id, k.k->p);
if (ret < 0)
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c
index b2f5f2e50f7e19..b4efd8cc4d1a2b 100644
--- a/fs/bcachefs/btree_update_interior.c
+++ b/fs/bcachefs/btree_update_interior.c
@@ -2,6 +2,7 @@
#include "bcachefs.h"
#include "alloc_foreground.h"
+#include "bkey_buf.h"
#include "bkey_methods.h"
#include "btree_cache.h"
#include "btree_gc.h"
@@ -18,12 +19,21 @@
#include "journal.h"
#include "journal_reclaim.h"
#include "keylist.h"
+#include "recovery_passes.h"
#include "replicas.h"
+#include "sb-members.h"
#include "super-io.h"
#include "trace.h"
#include <linux/random.h>
+static const char * const bch2_btree_update_modes[] = {
+#define x(t) #t,
+ BTREE_UPDATE_MODES()
+#undef x
+ NULL
+};
+
static int bch2_btree_insert_node(struct btree_update *, struct btree_trans *,
btree_path_idx_t, struct btree *, struct keylist *);
static void bch2_btree_update_add_new_node(struct btree_update *, struct btree *);
@@ -44,56 +54,103 @@ static btree_path_idx_t get_unlocked_mut_path(struct btree_trans *trans,
return path_idx;
}
-/* Debug code: */
-
/*
* Verify that child nodes correctly span parent node's range:
*/
-static void btree_node_interior_verify(struct bch_fs *c, struct btree *b)
+int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b)
{
-#ifdef CONFIG_BCACHEFS_DEBUG
- struct bpos next_node = b->data->min_key;
- struct btree_node_iter iter;
+ struct bch_fs *c = trans->c;
+ struct bpos node_min = b->key.k.type == KEY_TYPE_btree_ptr_v2
+ ? bkey_i_to_btree_ptr_v2(&b->key)->v.min_key
+ : b->data->min_key;
+ struct btree_and_journal_iter iter;
struct bkey_s_c k;
- struct bkey_s_c_btree_ptr_v2 bp;
- struct bkey unpacked;
- struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF;
+ struct printbuf buf = PRINTBUF;
+ struct bkey_buf prev;
+ int ret = 0;
- BUG_ON(!b->c.level);
+ BUG_ON(b->key.k.type == KEY_TYPE_btree_ptr_v2 &&
+ !bpos_eq(bkey_i_to_btree_ptr_v2(&b->key)->v.min_key,
+ b->data->min_key));
- if (!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags))
- return;
+ if (!b->c.level)
+ return 0;
- bch2_btree_node_iter_init_from_start(&iter, b);
+ bch2_bkey_buf_init(&prev);
+ bkey_init(&prev.k->k);
+ bch2_btree_and_journal_iter_init_node_iter(trans, &iter, b);
- while (1) {
- k = bch2_btree_node_iter_peek_unpack(&iter, b, &unpacked);
+ while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
if (k.k->type != KEY_TYPE_btree_ptr_v2)
- break;
- bp = bkey_s_c_to_btree_ptr_v2(k);
+ goto out;
- if (!bpos_eq(next_node, bp.v->min_key)) {
- bch2_dump_btree_node(c, b);
- bch2_bpos_to_text(&buf1, next_node);
- bch2_bpos_to_text(&buf2, bp.v->min_key);
- panic("expected next min_key %s got %s\n", buf1.buf, buf2.buf);
- }
+ struct bkey_s_c_btree_ptr_v2 bp = bkey_s_c_to_btree_ptr_v2(k);
- bch2_btree_node_iter_advance(&iter, b);
+ struct bpos expected_min = bkey_deleted(&prev.k->k)
+ ? node_min
+ : bpos_successor(prev.k->k.p);
- if (bch2_btree_node_iter_end(&iter)) {
- if (!bpos_eq(k.k->p, b->key.k.p)) {
- bch2_dump_btree_node(c, b);
- bch2_bpos_to_text(&buf1, b->key.k.p);
- bch2_bpos_to_text(&buf2, k.k->p);
- panic("expected end %s got %s\n", buf1.buf, buf2.buf);
- }
- break;
+ if (!bpos_eq(expected_min, bp.v->min_key)) {
+ bch2_topology_error(c);
+
+ printbuf_reset(&buf);
+ prt_str(&buf, "end of prev node doesn't match start of next node\n"),
+ prt_printf(&buf, " in btree %s level %u node ",
+ bch2_btree_id_str(b->c.btree_id), b->c.level);
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
+ prt_str(&buf, "\n prev ");
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(prev.k));
+ prt_str(&buf, "\n next ");
+ bch2_bkey_val_to_text(&buf, c, k);
+
+ need_fsck_err(c, btree_node_topology_bad_min_key, "%s", buf.buf);
+ goto topology_repair;
}
- next_node = bpos_successor(k.k->p);
+ bch2_bkey_buf_reassemble(&prev, c, k);
+ bch2_btree_and_journal_iter_advance(&iter);
+ }
+
+ if (bkey_deleted(&prev.k->k)) {
+ bch2_topology_error(c);
+
+ printbuf_reset(&buf);
+ prt_str(&buf, "empty interior node\n");
+ prt_printf(&buf, " in btree %s level %u node ",
+ bch2_btree_id_str(b->c.btree_id), b->c.level);
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
+
+ need_fsck_err(c, btree_node_topology_empty_interior_node, "%s", buf.buf);
+ goto topology_repair;
+ } else if (!bpos_eq(prev.k->k.p, b->key.k.p)) {
+ bch2_topology_error(c);
+
+ printbuf_reset(&buf);
+ prt_str(&buf, "last child node doesn't end at end of parent node\n");
+ prt_printf(&buf, " in btree %s level %u node ",
+ bch2_btree_id_str(b->c.btree_id), b->c.level);
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
+ prt_str(&buf, "\n last key ");
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(prev.k));
+
+ need_fsck_err(c, btree_node_topology_bad_max_key, "%s", buf.buf);
+ goto topology_repair;
}
-#endif
+out:
+fsck_err:
+ bch2_btree_and_journal_iter_exit(&iter);
+ bch2_bkey_buf_exit(&prev, c);
+ printbuf_exit(&buf);
+ return ret;
+topology_repair:
+ if ((c->recovery_passes_explicit & BIT_ULL(BCH_RECOVERY_PASS_check_topology)) &&
+ c->curr_recovery_pass > BCH_RECOVERY_PASS_check_topology) {
+ bch2_inconsistent_error(c);
+ ret = -BCH_ERR_btree_need_topology_repair;
+ } else {
+ ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology);
+ }
+ goto out;
}
/* Calculate ideal packed bkey format for new btree nodes: */
@@ -254,7 +311,7 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans,
struct open_buckets obs = { .nr = 0 };
struct bch_devs_list devs_have = (struct bch_devs_list) { 0 };
enum bch_watermark watermark = flags & BCH_WATERMARK_MASK;
- unsigned nr_reserve = watermark > BCH_WATERMARK_reclaim
+ unsigned nr_reserve = watermark < BCH_WATERMARK_reclaim
? BTREE_NODE_RESERVE
: 0;
int ret;
@@ -549,6 +606,26 @@ static void btree_update_add_key(struct btree_update *as,
bch2_keylist_push(keys);
}
+static bool btree_update_new_nodes_marked_sb(struct btree_update *as)
+{
+ for_each_keylist_key(&as->new_keys, k)
+ if (!bch2_dev_btree_bitmap_marked(as->c, bkey_i_to_s_c(k)))
+ return false;
+ return true;
+}
+
+static void btree_update_new_nodes_mark_sb(struct btree_update *as)
+{
+ struct bch_fs *c = as->c;
+
+ mutex_lock(&c->sb_lock);
+ for_each_keylist_key(&as->new_keys, k)
+ bch2_dev_btree_bitmap_mark(c, bkey_i_to_s_c(k));
+
+ bch2_write_super(c);
+ mutex_unlock(&c->sb_lock);
+}
+
/*
* The transactional part of an interior btree node update, where we journal the
* update we did to the interior node and update alloc info:
@@ -606,6 +683,9 @@ static void btree_update_nodes_written(struct btree_update *as)
if (ret)
goto err;
+ if (!btree_update_new_nodes_marked_sb(as))
+ btree_update_new_nodes_mark_sb(as);
+
/*
* Wait for any in flight writes to finish before we free the old nodes
* on disk:
@@ -638,7 +718,7 @@ static void btree_update_nodes_written(struct btree_update *as)
* which may require allocations as well.
*/
ret = commit_do(trans, &as->disk_res, &journal_seq,
- BCH_WATERMARK_reclaim|
+ BCH_WATERMARK_interior_updates|
BCH_TRANS_COMMIT_no_enospc|
BCH_TRANS_COMMIT_no_check_rw|
BCH_TRANS_COMMIT_journal_reclaim,
@@ -648,9 +728,13 @@ static void btree_update_nodes_written(struct btree_update *as)
bch2_fs_fatal_err_on(ret && !bch2_journal_error(&c->journal), c,
"%s", bch2_err_str(ret));
err:
- if (as->b) {
-
- b = as->b;
+ /*
+ * We have to be careful because another thread might be getting ready
+ * to free as->b and calling btree_update_reparent() on us - we'll
+ * recheck under btree_update_lock below:
+ */
+ b = READ_ONCE(as->b);
+ if (b) {
btree_path_idx_t path_idx = get_unlocked_mut_path(trans,
as->btree_id, b->c.level, b->key.k.p);
struct btree_path *path = trans->paths + path_idx;
@@ -794,15 +878,17 @@ static void btree_update_updated_node(struct btree_update *as, struct btree *b)
{
struct bch_fs *c = as->c;
- mutex_lock(&c->btree_interior_update_lock);
- list_add_tail(&as->unwritten_list, &c->btree_interior_updates_unwritten);
-
- BUG_ON(as->mode != BTREE_INTERIOR_NO_UPDATE);
+ BUG_ON(as->mode != BTREE_UPDATE_none);
+ BUG_ON(as->update_level_end < b->c.level);
BUG_ON(!btree_node_dirty(b));
BUG_ON(!b->c.level);
- as->mode = BTREE_INTERIOR_UPDATING_NODE;
+ mutex_lock(&c->btree_interior_update_lock);
+ list_add_tail(&as->unwritten_list, &c->btree_interior_updates_unwritten);
+
+ as->mode = BTREE_UPDATE_node;
as->b = b;
+ as->update_level_end = b->c.level;
set_btree_node_write_blocked(b);
list_add(&as->write_blocked_list, &b->write_blocked);
@@ -824,7 +910,7 @@ static void btree_update_reparent(struct btree_update *as,
lockdep_assert_held(&c->btree_interior_update_lock);
child->b = NULL;
- child->mode = BTREE_INTERIOR_UPDATING_AS;
+ child->mode = BTREE_UPDATE_update;
bch2_journal_pin_copy(&c->journal, &as->journal, &child->journal,
bch2_update_reparent_journal_pin_flush);
@@ -835,7 +921,7 @@ static void btree_update_updated_root(struct btree_update *as, struct btree *b)
struct bkey_i *insert = &b->key;
struct bch_fs *c = as->c;
- BUG_ON(as->mode != BTREE_INTERIOR_NO_UPDATE);
+ BUG_ON(as->mode != BTREE_UPDATE_none);
BUG_ON(as->journal_u64s + jset_u64s(insert->k.u64s) >
ARRAY_SIZE(as->journal_entries));
@@ -849,7 +935,7 @@ static void btree_update_updated_root(struct btree_update *as, struct btree *b)
mutex_lock(&c->btree_interior_update_lock);
list_add_tail(&as->unwritten_list, &c->btree_interior_updates_unwritten);
- as->mode = BTREE_INTERIOR_UPDATING_ROOT;
+ as->mode = BTREE_UPDATE_root;
mutex_unlock(&c->btree_interior_update_lock);
}
@@ -1027,7 +1113,7 @@ static void bch2_btree_update_done(struct btree_update *as, struct btree_trans *
struct bch_fs *c = as->c;
u64 start_time = as->start_time;
- BUG_ON(as->mode == BTREE_INTERIOR_NO_UPDATE);
+ BUG_ON(as->mode == BTREE_UPDATE_none);
if (as->took_gc_lock)
up_read(&as->c->gc_lock);
@@ -1044,7 +1130,7 @@ static void bch2_btree_update_done(struct btree_update *as, struct btree_trans *
static struct btree_update *
bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
- unsigned level, bool split, unsigned flags)
+ unsigned level_start, bool split, unsigned flags)
{
struct bch_fs *c = trans->c;
struct btree_update *as;
@@ -1052,7 +1138,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
int disk_res_flags = (flags & BCH_TRANS_COMMIT_no_enospc)
? BCH_DISK_RESERVATION_NOFAIL : 0;
unsigned nr_nodes[2] = { 0, 0 };
- unsigned update_level = level;
+ unsigned level_end = level_start;
enum bch_watermark watermark = flags & BCH_WATERMARK_MASK;
int ret = 0;
u32 restart_count = trans->restart_count;
@@ -1067,34 +1153,30 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
flags &= ~BCH_WATERMARK_MASK;
flags |= watermark;
- if (watermark < c->journal.watermark) {
- struct journal_res res = { 0 };
- unsigned journal_flags = watermark|JOURNAL_RES_GET_CHECK;
-
- if ((flags & BCH_TRANS_COMMIT_journal_reclaim) &&
- watermark != BCH_WATERMARK_reclaim)
- journal_flags |= JOURNAL_RES_GET_NONBLOCK;
+ if (watermark < BCH_WATERMARK_reclaim &&
+ test_bit(JOURNAL_SPACE_LOW, &c->journal.flags)) {
+ if (flags & BCH_TRANS_COMMIT_journal_reclaim)
+ return ERR_PTR(-BCH_ERR_journal_reclaim_would_deadlock);
- ret = drop_locks_do(trans,
- bch2_journal_res_get(&c->journal, &res, 1, journal_flags));
- if (bch2_err_matches(ret, BCH_ERR_operation_blocked))
- ret = -BCH_ERR_journal_reclaim_would_deadlock;
+ bch2_trans_unlock(trans);
+ wait_event(c->journal.wait, !test_bit(JOURNAL_SPACE_LOW, &c->journal.flags));
+ ret = bch2_trans_relock(trans);
if (ret)
return ERR_PTR(ret);
}
while (1) {
- nr_nodes[!!update_level] += 1 + split;
- update_level++;
+ nr_nodes[!!level_end] += 1 + split;
+ level_end++;
- ret = bch2_btree_path_upgrade(trans, path, update_level + 1);
+ ret = bch2_btree_path_upgrade(trans, path, level_end + 1);
if (ret)
return ERR_PTR(ret);
- if (!btree_path_node(path, update_level)) {
+ if (!btree_path_node(path, level_end)) {
/* Allocating new root? */
nr_nodes[1] += split;
- update_level = BTREE_MAX_DEPTH;
+ level_end = BTREE_MAX_DEPTH;
break;
}
@@ -1102,11 +1184,11 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
* Always check for space for two keys, even if we won't have to
* split at prior level - it might have been a merge instead:
*/
- if (bch2_btree_node_insert_fits(path->l[update_level].b,
+ if (bch2_btree_node_insert_fits(path->l[level_end].b,
BKEY_BTREE_PTR_U64s_MAX * 2))
break;
- split = path->l[update_level].b->nr.live_u64s > BTREE_SPLIT_THRESHOLD(c);
+ split = path->l[level_end].b->nr.live_u64s > BTREE_SPLIT_THRESHOLD(c);
}
if (!down_read_trylock(&c->gc_lock)) {
@@ -1120,13 +1202,15 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
as = mempool_alloc(&c->btree_interior_update_pool, GFP_NOFS);
memset(as, 0, sizeof(*as));
closure_init(&as->cl, NULL);
- as->c = c;
- as->start_time = start_time;
- as->ip_started = _RET_IP_;
- as->mode = BTREE_INTERIOR_NO_UPDATE;
- as->took_gc_lock = true;
- as->btree_id = path->btree_id;
- as->update_level = update_level;
+ as->c = c;
+ as->start_time = start_time;
+ as->ip_started = _RET_IP_;
+ as->mode = BTREE_UPDATE_none;
+ as->watermark = watermark;
+ as->took_gc_lock = true;
+ as->btree_id = path->btree_id;
+ as->update_level_start = level_start;
+ as->update_level_end = level_end;
INIT_LIST_HEAD(&as->list);
INIT_LIST_HEAD(&as->unwritten_list);
INIT_LIST_HEAD(&as->write_blocked_list);
@@ -1168,7 +1252,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
*/
if (bch2_err_matches(ret, ENOSPC) &&
(flags & BCH_TRANS_COMMIT_journal_reclaim) &&
- watermark != BCH_WATERMARK_reclaim) {
+ watermark < BCH_WATERMARK_reclaim) {
ret = -BCH_ERR_journal_reclaim_would_deadlock;
goto err;
}
@@ -1220,23 +1304,29 @@ static void bch2_btree_set_root_inmem(struct bch_fs *c, struct btree *b)
bch2_recalc_btree_reserve(c);
}
-static void bch2_btree_set_root(struct btree_update *as,
- struct btree_trans *trans,
- struct btree_path *path,
- struct btree *b)
+static int bch2_btree_set_root(struct btree_update *as,
+ struct btree_trans *trans,
+ struct btree_path *path,
+ struct btree *b,
+ bool nofail)
{
struct bch_fs *c = as->c;
- struct btree *old;
trace_and_count(c, btree_node_set_root, trans, b);
- old = btree_node_root(c, b);
+ struct btree *old = btree_node_root(c, b);
/*
* Ensure no one is using the old root while we switch to the
* new root:
*/
- bch2_btree_node_lock_write_nofail(trans, path, &old->c);
+ if (nofail) {
+ bch2_btree_node_lock_write_nofail(trans, path, &old->c);
+ } else {
+ int ret = bch2_btree_node_lock_write(trans, path, &old->c);
+ if (ret)
+ return ret;
+ }
bch2_btree_set_root_inmem(c, b);
@@ -1250,6 +1340,7 @@ static void bch2_btree_set_root(struct btree_update *as,
* depend on the new root would have to update the new root.
*/
bch2_btree_node_unlock_write(trans, path, old);
+ return 0;
}
/* Interior node updates: */
@@ -1316,12 +1407,12 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as,
}
static void
-__bch2_btree_insert_keys_interior(struct btree_update *as,
- struct btree_trans *trans,
- struct btree_path *path,
- struct btree *b,
- struct btree_node_iter node_iter,
- struct keylist *keys)
+bch2_btree_insert_keys_interior(struct btree_update *as,
+ struct btree_trans *trans,
+ struct btree_path *path,
+ struct btree *b,
+ struct btree_node_iter node_iter,
+ struct keylist *keys)
{
struct bkey_i *insert = bch2_keylist_front(keys);
struct bkey_packed *k;
@@ -1380,9 +1471,16 @@ static void __btree_split_node(struct btree_update *as,
if (bkey_deleted(k))
continue;
+ uk = bkey_unpack_key(b, k);
+
+ if (b->c.level &&
+ u64s < n1_u64s &&
+ u64s + k->u64s >= n1_u64s &&
+ bch2_key_deleted_in_journal(trans, b->c.btree_id, b->c.level, uk.p))
+ n1_u64s += k->u64s;
+
i = u64s >= n1_u64s;
u64s += k->u64s;
- uk = bkey_unpack_key(b, k);
if (!i)
n1_pos = uk.p;
bch2_bkey_format_add_key(&format[i], &uk);
@@ -1441,8 +1539,7 @@ static void __btree_split_node(struct btree_update *as,
bch2_verify_btree_nr_keys(n[i]);
- if (b->c.level)
- btree_node_interior_verify(as->c, n[i]);
+ BUG_ON(bch2_btree_node_check_topology(trans, n[i]));
}
}
@@ -1471,9 +1568,9 @@ static void btree_split_insert_keys(struct btree_update *as,
bch2_btree_node_iter_init(&node_iter, b, &bch2_keylist_front(keys)->k.p);
- __bch2_btree_insert_keys_interior(as, trans, path, b, node_iter, keys);
+ bch2_btree_insert_keys_interior(as, trans, path, b, node_iter, keys);
- btree_node_interior_verify(as->c, b);
+ BUG_ON(bch2_btree_node_check_topology(trans, b));
}
}
@@ -1488,9 +1585,14 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans,
u64 start_time = local_clock();
int ret = 0;
+ bch2_verify_btree_nr_keys(b);
BUG_ON(!parent && (b != btree_node_root(c, b)));
BUG_ON(parent && !btree_node_intent_locked(trans->paths + path, b->c.level + 1));
+ ret = bch2_btree_node_check_topology(trans, b);
+ if (ret)
+ return ret;
+
bch2_btree_interior_update_will_free_node(as, b);
if (b->nr.live_u64s > BTREE_SPLIT_THRESHOLD(c)) {
@@ -1581,15 +1683,16 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans,
if (parent) {
/* Split a non root node */
ret = bch2_btree_insert_node(as, trans, path, parent, &as->parent_keys);
- if (ret)
- goto err;
} else if (n3) {
- bch2_btree_set_root(as, trans, trans->paths + path, n3);
+ ret = bch2_btree_set_root(as, trans, trans->paths + path, n3, false);
} else {
/* Root filled up but didn't need to be split */
- bch2_btree_set_root(as, trans, trans->paths + path, n1);
+ ret = bch2_btree_set_root(as, trans, trans->paths + path, n1, false);
}
+ if (ret)
+ goto err;
+
if (n3) {
bch2_btree_update_get_open_buckets(as, n3);
bch2_btree_node_write(c, n3, SIX_LOCK_intent, 0);
@@ -1646,27 +1749,6 @@ err:
goto out;
}
-static void
-bch2_btree_insert_keys_interior(struct btree_update *as,
- struct btree_trans *trans,
- struct btree_path *path,
- struct btree *b,
- struct keylist *keys)
-{
- struct btree_path *linked;
- unsigned i;
-
- __bch2_btree_insert_keys_interior(as, trans, path, b,
- path->l[b->c.level].iter, keys);
-
- btree_update_updated_node(as, b);
-
- trans_for_each_path_with_node(trans, b, linked, i)
- bch2_btree_node_iter_peek(&linked->l[b->c.level].iter, b);
-
- bch2_trans_verify_paths(trans);
-}
-
/**
* bch2_btree_insert_node - insert bkeys into a given btree node
*
@@ -1687,7 +1769,8 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t
struct keylist *keys)
{
struct bch_fs *c = as->c;
- struct btree_path *path = trans->paths + path_idx;
+ struct btree_path *path = trans->paths + path_idx, *linked;
+ unsigned i;
int old_u64s = le16_to_cpu(btree_bset_last(b)->u64s);
int old_live_u64s = b->nr.live_u64s;
int live_u64s_added, u64s_added;
@@ -1710,9 +1793,19 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t
goto split;
}
- btree_node_interior_verify(c, b);
+ ret = bch2_btree_node_check_topology(trans, b);
+ if (ret) {
+ bch2_btree_node_unlock_write(trans, path, b);
+ return ret;
+ }
+
+ bch2_btree_insert_keys_interior(as, trans, path, b,
+ path->l[b->c.level].iter, keys);
+
+ trans_for_each_path_with_node(trans, b, linked, i)
+ bch2_btree_node_iter_peek(&linked->l[b->c.level].iter, b);
- bch2_btree_insert_keys_interior(as, trans, path, b, keys);
+ bch2_trans_verify_paths(trans);
live_u64s_added = (int) b->nr.live_u64s - old_live_u64s;
u64s_added = (int) le16_to_cpu(btree_bset_last(b)->u64s) - old_u64s;
@@ -1726,16 +1819,17 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t
bch2_maybe_compact_whiteouts(c, b))
bch2_trans_node_reinit_iter(trans, b);
+ btree_update_updated_node(as, b);
bch2_btree_node_unlock_write(trans, path, b);
- btree_node_interior_verify(c, b);
+ BUG_ON(bch2_btree_node_check_topology(trans, b));
return 0;
split:
/*
* We could attempt to avoid the transaction restart, by calling
* bch2_btree_path_upgrade() and allocating more nodes:
*/
- if (b->c.level >= as->update_level) {
+ if (b->c.level >= as->update_level_end) {
trace_and_count(c, trans_restart_split_race, trans, _THIS_IP_, b);
return btree_trans_restart(trans, BCH_ERR_transaction_restart_split_race);
}
@@ -1801,7 +1895,9 @@ static void __btree_increase_depth(struct btree_update *as, struct btree_trans *
bch2_keylist_add(&as->parent_keys, &b->key);
btree_split_insert_keys(as, trans, path_idx, n, &as->parent_keys);
- bch2_btree_set_root(as, trans, path, n);
+ int ret = bch2_btree_set_root(as, trans, path, n, true);
+ BUG_ON(ret);
+
bch2_btree_update_get_open_buckets(as, n);
bch2_btree_node_write(c, n, SIX_LOCK_intent, 0);
bch2_trans_node_add(trans, path, n);
@@ -1818,9 +1914,12 @@ int bch2_btree_increase_depth(struct btree_trans *trans, btree_path_idx_t path,
{
struct bch_fs *c = trans->c;
struct btree *b = bch2_btree_id_root(c, trans->paths[path].btree_id)->b;
+
+ if (btree_node_fake(b))
+ return bch2_btree_split_leaf(trans, path, flags);
+
struct btree_update *as =
- bch2_btree_update_start(trans, trans->paths + path,
- b->c.level, true, flags);
+ bch2_btree_update_start(trans, trans->paths + path, b->c.level, true, flags);
if (IS_ERR(as))
return PTR_ERR(as);
@@ -1851,6 +1950,22 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
BUG_ON(!trans->paths[path].should_be_locked);
BUG_ON(!btree_node_locked(&trans->paths[path], level));
+ /*
+ * Work around a deadlock caused by the btree write buffer not doing
+ * merges and leaving tons of merges for us to do - we really don't need
+ * to be doing merges at all from the interior update path, and if the
+ * interior update path is generating too many new interior updates we
+ * deadlock:
+ */
+ if ((flags & BCH_WATERMARK_MASK) == BCH_WATERMARK_interior_updates)
+ return 0;
+
+ if ((flags & BCH_WATERMARK_MASK) <= BCH_WATERMARK_reclaim) {
+ flags &= ~BCH_WATERMARK_MASK;
+ flags |= BCH_WATERMARK_btree;
+ flags |= BCH_TRANS_COMMIT_journal_reclaim;
+ }
+
b = trans->paths[path].l[level].b;
if ((sib == btree_prev_sib && bpos_eq(b->data->min_key, POS_MIN)) ||
@@ -1996,6 +2111,10 @@ err:
bch2_path_put(trans, new_path, true);
bch2_path_put(trans, sib_path, true);
bch2_trans_verify_locks(trans);
+ if (ret == -BCH_ERR_journal_reclaim_would_deadlock)
+ ret = 0;
+ if (!ret)
+ ret = bch2_trans_relock(trans);
return ret;
err_free_update:
bch2_btree_node_free_never_used(as, trans, n);
@@ -2041,12 +2160,13 @@ int bch2_btree_node_rewrite(struct btree_trans *trans,
if (parent) {
bch2_keylist_add(&as->parent_keys, &n->key);
ret = bch2_btree_insert_node(as, trans, iter->path, parent, &as->parent_keys);
- if (ret)
- goto err;
} else {
- bch2_btree_set_root(as, trans, btree_iter_path(trans, iter), n);
+ ret = bch2_btree_set_root(as, trans, btree_iter_path(trans, iter), n, false);
}
+ if (ret)
+ goto err;
+
bch2_btree_update_get_open_buckets(as, n);
bch2_btree_node_write(c, n, SIX_LOCK_intent, 0);
@@ -2391,7 +2511,7 @@ void bch2_btree_set_root_for_read(struct bch_fs *c, struct btree *b)
bch2_btree_set_root_inmem(c, b);
}
-static int __bch2_btree_root_alloc(struct btree_trans *trans, enum btree_id id)
+static int __bch2_btree_root_alloc_fake(struct btree_trans *trans, enum btree_id id, unsigned level)
{
struct bch_fs *c = trans->c;
struct closure cl;
@@ -2410,7 +2530,7 @@ static int __bch2_btree_root_alloc(struct btree_trans *trans, enum btree_id id)
set_btree_node_fake(b);
set_btree_node_need_rewrite(b);
- b->c.level = 0;
+ b->c.level = level;
b->c.btree_id = id;
bkey_btree_ptr_init(&b->key);
@@ -2437,9 +2557,23 @@ static int __bch2_btree_root_alloc(struct btree_trans *trans, enum btree_id id)
return 0;
}
-void bch2_btree_root_alloc(struct bch_fs *c, enum btree_id id)
+void bch2_btree_root_alloc_fake(struct bch_fs *c, enum btree_id id, unsigned level)
+{
+ bch2_trans_run(c, __bch2_btree_root_alloc_fake(trans, id, level));
+}
+
+static void bch2_btree_update_to_text(struct printbuf *out, struct btree_update *as)
{
- bch2_trans_run(c, __bch2_btree_root_alloc(trans, id));
+ prt_printf(out, "%ps: btree=%s l=%u-%u watermark=%s mode=%s nodes_written=%u cl.remaining=%u journal_seq=%llu\n",
+ (void *) as->ip_started,
+ bch2_btree_id_str(as->btree_id),
+ as->update_level_start,
+ as->update_level_end,
+ bch2_watermarks[as->watermark],
+ bch2_btree_update_modes[as->mode],
+ as->nodes_written,
+ closure_nr_remaining(&as->cl),
+ as->journal.seq);
}
void bch2_btree_updates_to_text(struct printbuf *out, struct bch_fs *c)
@@ -2448,12 +2582,7 @@ void bch2_btree_updates_to_text(struct printbuf *out, struct bch_fs *c)
mutex_lock(&c->btree_interior_update_lock);
list_for_each_entry(as, &c->btree_interior_update_list, list)
- prt_printf(out, "%ps: mode=%u nodes_written=%u cl.remaining=%u journal_seq=%llu\n",
- (void *) as->ip_started,
- as->mode,
- as->nodes_written,
- closure_nr_remaining(&as->cl),
- as->journal.seq);
+ bch2_btree_update_to_text(out, as);
mutex_unlock(&c->btree_interior_update_lock);
}
diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h
index f651dd48aaa049..c1a479ebaad121 100644
--- a/fs/bcachefs/btree_update_interior.h
+++ b/fs/bcachefs/btree_update_interior.h
@@ -10,6 +10,20 @@
#define BTREE_UPDATE_JOURNAL_RES (BTREE_UPDATE_NODES_MAX * (BKEY_BTREE_PTR_U64s_MAX + 1))
+int bch2_btree_node_check_topology(struct btree_trans *, struct btree *);
+
+#define BTREE_UPDATE_MODES() \
+ x(none) \
+ x(node) \
+ x(root) \
+ x(update)
+
+enum btree_update_mode {
+#define x(n) BTREE_UPDATE_##n,
+ BTREE_UPDATE_MODES()
+#undef x
+};
+
/*
* Tracks an in progress split/rewrite of a btree node and the update to the
* parent node:
@@ -37,24 +51,19 @@ struct btree_update {
struct list_head list;
struct list_head unwritten_list;
- /* What kind of update are we doing? */
- enum {
- BTREE_INTERIOR_NO_UPDATE,
- BTREE_INTERIOR_UPDATING_NODE,
- BTREE_INTERIOR_UPDATING_ROOT,
- BTREE_INTERIOR_UPDATING_AS,
- } mode;
-
+ enum btree_update_mode mode;
+ enum bch_watermark watermark;
unsigned nodes_written:1;
unsigned took_gc_lock:1;
enum btree_id btree_id;
- unsigned update_level;
+ unsigned update_level_start;
+ unsigned update_level_end;
struct disk_reservation disk_res;
/*
- * BTREE_INTERIOR_UPDATING_NODE:
+ * BTREE_UPDATE_node:
* The update that made the new nodes visible was a regular update to an
* existing interior node - @b. We can't write out the update to @b
* until the new nodes we created are finished writing, so we block @b
@@ -163,7 +172,7 @@ int bch2_btree_node_update_key_get_iter(struct btree_trans *, struct btree *,
struct bkey_i *, unsigned, bool);
void bch2_btree_set_root_for_read(struct bch_fs *, struct btree *);
-void bch2_btree_root_alloc(struct bch_fs *, enum btree_id);
+void bch2_btree_root_alloc_fake(struct bch_fs *, enum btree_id, unsigned);
static inline unsigned btree_update_reserve_required(struct bch_fs *c,
struct btree *b)
diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c
index 5cbad8445782c4..36a6f42aba5e6f 100644
--- a/fs/bcachefs/btree_write_buffer.c
+++ b/fs/bcachefs/btree_write_buffer.c
@@ -11,6 +11,7 @@
#include "journal_reclaim.h"
#include <linux/prefetch.h>
+#include <linux/sort.h>
static int bch2_btree_write_buffer_journal_flush(struct journal *,
struct journal_entry_pin *, u64);
@@ -46,6 +47,14 @@ static inline bool wb_key_ref_cmp(const struct wb_key_ref *l, const struct wb_ke
#endif
}
+static int wb_key_seq_cmp(const void *_l, const void *_r)
+{
+ const struct btree_write_buffered_key *l = _l;
+ const struct btree_write_buffered_key *r = _r;
+
+ return cmp_int(l->journal_seq, r->journal_seq);
+}
+
/* Compare excluding idx, the low 24 bits: */
static inline bool wb_key_eq(const void *_l, const void *_r)
{
@@ -307,6 +316,16 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
bpos_gt(k->k.k.p, path->l[0].b->key.k.p)) {
bch2_btree_node_unlock_write(trans, path, path->l[0].b);
write_locked = false;
+
+ ret = lockrestart_do(trans,
+ bch2_btree_iter_traverse(&iter) ?:
+ bch2_foreground_maybe_merge(trans, iter.path, 0,
+ BCH_WATERMARK_reclaim|
+ BCH_TRANS_COMMIT_journal_reclaim|
+ BCH_TRANS_COMMIT_no_check_rw|
+ BCH_TRANS_COMMIT_no_enospc));
+ if (ret)
+ goto err;
}
}
@@ -357,6 +376,11 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
*/
trace_and_count(c, write_buffer_flush_slowpath, trans, slowpath, wb->flushing.keys.nr);
+ sort(wb->flushing.keys.data,
+ wb->flushing.keys.nr,
+ sizeof(wb->flushing.keys.data[0]),
+ wb_key_seq_cmp, NULL);
+
darray_for_each(wb->flushing.keys, i) {
if (!i->journal_seq)
continue;
@@ -368,10 +392,10 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
ret = commit_do(trans, NULL, NULL,
BCH_WATERMARK_reclaim|
+ BCH_TRANS_COMMIT_journal_reclaim|
BCH_TRANS_COMMIT_no_check_rw|
BCH_TRANS_COMMIT_no_enospc|
- BCH_TRANS_COMMIT_no_journal_res|
- BCH_TRANS_COMMIT_journal_reclaim,
+ BCH_TRANS_COMMIT_no_journal_res ,
btree_write_buffered_insert(trans, i));
if (ret)
goto err;
diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c
index 96edf2c34d433d..941401a210f569 100644
--- a/fs/bcachefs/buckets.c
+++ b/fs/bcachefs/buckets.c
@@ -525,6 +525,7 @@ int bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
"different types of data in same bucket: %s, %s",
bch2_data_type_str(g->data_type),
bch2_data_type_str(data_type))) {
+ BUG();
ret = -EIO;
goto err;
}
@@ -628,6 +629,7 @@ int bch2_check_bucket_ref(struct btree_trans *trans,
bch2_data_type_str(ptr_data_type),
(printbuf_reset(&buf),
bch2_bkey_val_to_text(&buf, c, k), buf.buf));
+ BUG();
ret = -EIO;
goto err;
}
@@ -815,14 +817,14 @@ static int __mark_pointer(struct btree_trans *trans,
static int bch2_trigger_pointer(struct btree_trans *trans,
enum btree_id btree_id, unsigned level,
struct bkey_s_c k, struct extent_ptr_decoded p,
- s64 *sectors,
- unsigned flags)
+ const union bch_extent_entry *entry,
+ s64 *sectors, unsigned flags)
{
bool insert = !(flags & BTREE_TRIGGER_OVERWRITE);
struct bpos bucket;
struct bch_backpointer bp;
- bch2_extent_ptr_to_bp(trans->c, btree_id, level, k, p, &bucket, &bp);
+ bch2_extent_ptr_to_bp(trans->c, btree_id, level, k, p, entry, &bucket, &bp);
*sectors = insert ? bp.bucket_len : -((s64) bp.bucket_len);
if (flags & BTREE_TRIGGER_TRANSACTIONAL) {
@@ -851,7 +853,7 @@ static int bch2_trigger_pointer(struct btree_trans *trans,
if (flags & BTREE_TRIGGER_GC) {
struct bch_fs *c = trans->c;
struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
- enum bch_data_type data_type = bkey_ptr_data_type(btree_id, level, k, p);
+ enum bch_data_type data_type = bch2_bkey_ptr_data_type(k, p, entry);
percpu_down_read(&c->mark_lock);
struct bucket *g = PTR_GC_BUCKET(ca, &p.ptr);
@@ -979,7 +981,7 @@ static int __trigger_extent(struct btree_trans *trans,
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
s64 disk_sectors;
- ret = bch2_trigger_pointer(trans, btree_id, level, k, p, &disk_sectors, flags);
+ ret = bch2_trigger_pointer(trans, btree_id, level, k, p, entry, &disk_sectors, flags);
if (ret < 0)
return ret;
diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h
index 6387e039f78975..f9af5adabe8363 100644
--- a/fs/bcachefs/buckets.h
+++ b/fs/bcachefs/buckets.h
@@ -226,6 +226,7 @@ static inline u64 bch2_dev_buckets_reserved(struct bch_dev *ca, enum bch_waterma
fallthrough;
case BCH_WATERMARK_btree_copygc:
case BCH_WATERMARK_reclaim:
+ case BCH_WATERMARK_interior_updates:
break;
}
@@ -394,14 +395,6 @@ static inline const char *bch2_data_type_str(enum bch_data_type type)
: "(invalid data type)";
}
-static inline void bch2_prt_data_type(struct printbuf *out, enum bch_data_type type)
-{
- if (type < BCH_DATA_NR)
- prt_str(out, __bch2_data_types[type]);
- else
- prt_printf(out, "(invalid data type %u)", type);
-}
-
/* disk reservations: */
static inline void bch2_disk_reservation_put(struct bch_fs *c,
diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c
index 38defa19d52d70..4d14f19f51850e 100644
--- a/fs/bcachefs/chardev.c
+++ b/fs/bcachefs/chardev.c
@@ -7,7 +7,7 @@
#include "chardev.h"
#include "journal.h"
#include "move.h"
-#include "recovery.h"
+#include "recovery_passes.h"
#include "replicas.h"
#include "super.h"
#include "super-io.h"
@@ -134,42 +134,38 @@ static long bch2_ioctl_incremental(struct bch_ioctl_incremental __user *user_arg
struct fsck_thread {
struct thread_with_stdio thr;
struct bch_fs *c;
- char **devs;
- size_t nr_devs;
struct bch_opts opts;
};
static void bch2_fsck_thread_exit(struct thread_with_stdio *_thr)
{
struct fsck_thread *thr = container_of(_thr, struct fsck_thread, thr);
- if (thr->devs)
- for (size_t i = 0; i < thr->nr_devs; i++)
- kfree(thr->devs[i]);
- kfree(thr->devs);
kfree(thr);
}
static int bch2_fsck_offline_thread_fn(struct thread_with_stdio *stdio)
{
struct fsck_thread *thr = container_of(stdio, struct fsck_thread, thr);
- struct bch_fs *c = bch2_fs_open(thr->devs, thr->nr_devs, thr->opts);
-
- if (IS_ERR(c))
- return PTR_ERR(c);
+ struct bch_fs *c = thr->c;
- int ret = 0;
- if (test_bit(BCH_FS_errors_fixed, &c->flags))
- ret |= 1;
- if (test_bit(BCH_FS_error, &c->flags))
- ret |= 4;
+ int ret = PTR_ERR_OR_ZERO(c);
+ if (ret)
+ return ret;
- bch2_fs_stop(c);
+ ret = bch2_fs_start(thr->c);
+ if (ret)
+ goto err;
- if (ret & 1)
+ if (test_bit(BCH_FS_errors_fixed, &c->flags)) {
bch2_stdio_redirect_printf(&stdio->stdio, false, "%s: errors fixed\n", c->name);
- if (ret & 4)
+ ret |= 1;
+ }
+ if (test_bit(BCH_FS_error, &c->flags)) {
bch2_stdio_redirect_printf(&stdio->stdio, false, "%s: still has errors\n", c->name);
-
+ ret |= 4;
+ }
+err:
+ bch2_fs_stop(c);
return ret;
}
@@ -182,7 +178,7 @@ static long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_a
{
struct bch_ioctl_fsck_offline arg;
struct fsck_thread *thr = NULL;
- u64 *devs = NULL;
+ darray_str(devs) = {};
long ret = 0;
if (copy_from_user(&arg, user_arg, sizeof(arg)))
@@ -194,29 +190,32 @@ static long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_a
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- if (!(devs = kcalloc(arg.nr_devs, sizeof(*devs), GFP_KERNEL)) ||
- !(thr = kzalloc(sizeof(*thr), GFP_KERNEL)) ||
- !(thr->devs = kcalloc(arg.nr_devs, sizeof(*thr->devs), GFP_KERNEL))) {
- ret = -ENOMEM;
- goto err;
- }
+ for (size_t i = 0; i < arg.nr_devs; i++) {
+ u64 dev_u64;
+ ret = copy_from_user_errcode(&dev_u64, &user_arg->devs[i], sizeof(u64));
+ if (ret)
+ goto err;
- thr->opts = bch2_opts_empty();
- thr->nr_devs = arg.nr_devs;
+ char *dev_str = strndup_user((char __user *)(unsigned long) dev_u64, PATH_MAX);
+ ret = PTR_ERR_OR_ZERO(dev_str);
+ if (ret)
+ goto err;
- if (copy_from_user(devs, &user_arg->devs[0],
- array_size(sizeof(user_arg->devs[0]), arg.nr_devs))) {
- ret = -EINVAL;
- goto err;
+ ret = darray_push(&devs, dev_str);
+ if (ret) {
+ kfree(dev_str);
+ goto err;
+ }
}
- for (size_t i = 0; i < arg.nr_devs; i++) {
- thr->devs[i] = strndup_user((char __user *)(unsigned long) devs[i], PATH_MAX);
- ret = PTR_ERR_OR_ZERO(thr->devs[i]);
- if (ret)
- goto err;
+ thr = kzalloc(sizeof(*thr), GFP_KERNEL);
+ if (!thr) {
+ ret = -ENOMEM;
+ goto err;
}
+ thr->opts = bch2_opts_empty();
+
if (arg.opts) {
char *optstr = strndup_user((char __user *)(unsigned long) arg.opts, 1 << 16);
@@ -230,15 +229,28 @@ static long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_a
opt_set(thr->opts, stdio, (u64)(unsigned long)&thr->thr.stdio);
- ret = bch2_run_thread_with_stdio(&thr->thr, &bch2_offline_fsck_ops);
-err:
- if (ret < 0) {
- if (thr)
- bch2_fsck_thread_exit(&thr->thr);
- pr_err("ret %s", bch2_err_str(ret));
- }
- kfree(devs);
+ /* We need request_key() to be called before we punt to kthread: */
+ opt_set(thr->opts, nostart, true);
+
+ bch2_thread_with_stdio_init(&thr->thr, &bch2_offline_fsck_ops);
+
+ thr->c = bch2_fs_open(devs.data, arg.nr_devs, thr->opts);
+
+ if (!IS_ERR(thr->c) &&
+ thr->c->opts.errors == BCH_ON_ERROR_panic)
+ thr->c->opts.errors = BCH_ON_ERROR_ro;
+
+ ret = __bch2_run_thread_with_stdio(&thr->thr);
+out:
+ darray_for_each(devs, i)
+ kfree(*i);
+ darray_exit(&devs);
return ret;
+err:
+ if (thr)
+ bch2_fsck_thread_exit(&thr->thr);
+ pr_err("ret %s", bch2_err_str(ret));
+ goto out;
}
static long bch2_global_ioctl(unsigned cmd, void __user *arg)
diff --git a/fs/bcachefs/checksum.c b/fs/bcachefs/checksum.c
index 4701457f6381ca..7ed779b411f61e 100644
--- a/fs/bcachefs/checksum.c
+++ b/fs/bcachefs/checksum.c
@@ -429,15 +429,20 @@ int bch2_rechecksum_bio(struct bch_fs *c, struct bio *bio,
extent_nonce(version, crc_old), bio);
if (bch2_crc_cmp(merged, crc_old.csum) && !c->opts.no_data_io) {
- bch_err(c, "checksum error in %s() (memory corruption or bug?)\n"
- "expected %0llx:%0llx got %0llx:%0llx (old type %s new type %s)",
- __func__,
- crc_old.csum.hi,
- crc_old.csum.lo,
- merged.hi,
- merged.lo,
- bch2_csum_types[crc_old.csum_type],
- bch2_csum_types[new_csum_type]);
+ struct printbuf buf = PRINTBUF;
+ prt_printf(&buf, "checksum error in %s() (memory corruption or bug?)\n"
+ "expected %0llx:%0llx got %0llx:%0llx (old type ",
+ __func__,
+ crc_old.csum.hi,
+ crc_old.csum.lo,
+ merged.hi,
+ merged.lo);
+ bch2_prt_csum_type(&buf, crc_old.csum_type);
+ prt_str(&buf, " new type ");
+ bch2_prt_csum_type(&buf, new_csum_type);
+ prt_str(&buf, ")");
+ bch_err(c, "%s", buf.buf);
+ printbuf_exit(&buf);
return -EIO;
}
diff --git a/fs/bcachefs/checksum.h b/fs/bcachefs/checksum.h
index 1b8c2c1016dc63..e40499fde9a401 100644
--- a/fs/bcachefs/checksum.h
+++ b/fs/bcachefs/checksum.h
@@ -61,11 +61,12 @@ static inline void bch2_csum_err_msg(struct printbuf *out,
struct bch_csum expected,
struct bch_csum got)
{
- prt_printf(out, "checksum error: got ");
+ prt_str(out, "checksum error, type ");
+ bch2_prt_csum_type(out, type);
+ prt_str(out, ": got ");
bch2_csum_to_text(out, type, got);
prt_str(out, " should be ");
bch2_csum_to_text(out, type, expected);
- prt_printf(out, " type %s", bch2_csum_types[type]);
}
int bch2_chacha_encrypt_key(struct bch_key *, struct nonce, void *, size_t);
diff --git a/fs/bcachefs/compress.h b/fs/bcachefs/compress.h
index 58c2eb45570ff0..607fd5e232c902 100644
--- a/fs/bcachefs/compress.h
+++ b/fs/bcachefs/compress.h
@@ -47,14 +47,6 @@ static inline enum bch_compression_type bch2_compression_opt_to_type(unsigned v)
return __bch2_compression_opt_to_type[bch2_compression_decode(v).type];
}
-static inline void bch2_prt_compression_type(struct printbuf *out, enum bch_compression_type type)
-{
- if (type < BCH_COMPRESSION_TYPE_NR)
- prt_str(out, __bch2_compression_types[type]);
- else
- prt_printf(out, "(invalid compression type %u)", type);
-}
-
int bch2_bio_uncompress_inplace(struct bch_fs *, struct bio *,
struct bch_extent_crc_unpacked *);
int bch2_bio_uncompress(struct bch_fs *, struct bio *, struct bio *,
diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c
index 4150feca42a2e6..0022b51ce3c09c 100644
--- a/fs/bcachefs/data_update.c
+++ b/fs/bcachefs/data_update.c
@@ -14,6 +14,7 @@
#include "move.h"
#include "nocow_locking.h"
#include "rebalance.h"
+#include "snapshot.h"
#include "subvolume.h"
#include "trace.h"
@@ -509,6 +510,14 @@ int bch2_data_update_init(struct btree_trans *trans,
unsigned ptrs_locked = 0;
int ret = 0;
+ /*
+ * fs is corrupt we have a key for a snapshot node that doesn't exist,
+ * and we have to check for this because we go rw before repairing the
+ * snapshots table - just skip it, we can move it later.
+ */
+ if (unlikely(k.k->p.snapshot && !bch2_snapshot_equiv(c, k.k->p.snapshot)))
+ return -BCH_ERR_data_update_done;
+
bch2_bkey_buf_init(&m->k);
bch2_bkey_buf_reassemble(&m->k, c, k);
m->btree_id = btree_id;
@@ -571,8 +580,7 @@ int bch2_data_update_init(struct btree_trans *trans,
move_ctxt_wait_event(ctxt,
(locked = bch2_bucket_nocow_trylock(&c->nocow_locks,
PTR_BUCKET_POS(c, &p.ptr), 0)) ||
- (!atomic_read(&ctxt->read_sectors) &&
- !atomic_read(&ctxt->write_sectors)));
+ list_empty(&ctxt->ios));
if (!locked)
bch2_bucket_nocow_lock(&c->nocow_locks,
@@ -590,6 +598,8 @@ int bch2_data_update_init(struct btree_trans *trans,
i++;
}
+ unsigned durability_required = max(0, (int) (io_opts.data_replicas - durability_have));
+
/*
* If current extent durability is less than io_opts.data_replicas,
* we're not trying to rereplicate the extent up to data_replicas here -
@@ -599,7 +609,7 @@ int bch2_data_update_init(struct btree_trans *trans,
* rereplicate, currently, so that users don't get an unexpected -ENOSPC
*/
if (!(m->data_opts.write_flags & BCH_WRITE_CACHED) &&
- durability_have >= io_opts.data_replicas) {
+ !durability_required) {
m->data_opts.kill_ptrs |= m->data_opts.rewrite_ptrs;
m->data_opts.rewrite_ptrs = 0;
/* if iter == NULL, it's just a promote */
@@ -608,11 +618,18 @@ int bch2_data_update_init(struct btree_trans *trans,
goto done;
}
- m->op.nr_replicas = min(durability_removing, io_opts.data_replicas - durability_have) +
+ m->op.nr_replicas = min(durability_removing, durability_required) +
m->data_opts.extra_replicas;
- m->op.nr_replicas_required = m->op.nr_replicas;
- BUG_ON(!m->op.nr_replicas);
+ /*
+ * If device(s) were set to durability=0 after data was written to them
+ * we can end up with a duribilty=0 extent, and the normal algorithm
+ * that tries not to increase durability doesn't work:
+ */
+ if (!(durability_have + durability_removing))
+ m->op.nr_replicas = max((unsigned) m->op.nr_replicas, 1);
+
+ m->op.nr_replicas_required = m->op.nr_replicas;
if (reserve_sectors) {
ret = bch2_disk_reservation_add(c, &m->op.res, reserve_sectors,
diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c
index 208ce6f0fc4317..cd99b739941447 100644
--- a/fs/bcachefs/debug.c
+++ b/fs/bcachefs/debug.c
@@ -13,6 +13,7 @@
#include "btree_iter.h"
#include "btree_locking.h"
#include "btree_update.h"
+#include "btree_update_interior.h"
#include "buckets.h"
#include "debug.h"
#include "error.h"
@@ -668,7 +669,7 @@ static ssize_t bch2_journal_pins_read(struct file *file, char __user *buf,
i->size = size;
i->ret = 0;
- do {
+ while (1) {
err = flush_buf(i);
if (err)
return err;
@@ -676,9 +677,12 @@ static ssize_t bch2_journal_pins_read(struct file *file, char __user *buf,
if (!i->size)
break;
+ if (done)
+ break;
+
done = bch2_journal_seq_pins_to_text(&i->buf, &c->journal, &i->iter);
i->iter++;
- } while (!done);
+ }
if (i->buf.allocation_failure)
return -ENOMEM;
@@ -693,13 +697,45 @@ static const struct file_operations journal_pins_ops = {
.read = bch2_journal_pins_read,
};
+static ssize_t bch2_btree_updates_read(struct file *file, char __user *buf,
+ size_t size, loff_t *ppos)
+{
+ struct dump_iter *i = file->private_data;
+ struct bch_fs *c = i->c;
+ int err;
+
+ i->ubuf = buf;
+ i->size = size;
+ i->ret = 0;
+
+ if (!i->iter) {
+ bch2_btree_updates_to_text(&i->buf, c);
+ i->iter++;
+ }
+
+ err = flush_buf(i);
+ if (err)
+ return err;
+
+ if (i->buf.allocation_failure)
+ return -ENOMEM;
+
+ return i->ret;
+}
+
+static const struct file_operations btree_updates_ops = {
+ .owner = THIS_MODULE,
+ .open = bch2_dump_open,
+ .release = bch2_dump_release,
+ .read = bch2_btree_updates_read,
+};
+
static int btree_transaction_stats_open(struct inode *inode, struct file *file)
{
struct bch_fs *c = inode->i_private;
struct dump_iter *i;
i = kzalloc(sizeof(struct dump_iter), GFP_KERNEL);
-
if (!i)
return -ENOMEM;
@@ -866,6 +902,20 @@ void bch2_fs_debug_exit(struct bch_fs *c)
debugfs_remove_recursive(c->fs_debug_dir);
}
+static void bch2_fs_debug_btree_init(struct bch_fs *c, struct btree_debug *bd)
+{
+ struct dentry *d;
+
+ d = debugfs_create_dir(bch2_btree_id_str(bd->id), c->btree_debug_dir);
+
+ debugfs_create_file("keys", 0400, d, bd, &btree_debug_ops);
+
+ debugfs_create_file("formats", 0400, d, bd, &btree_format_debug_ops);
+
+ debugfs_create_file("bfloat-failed", 0400, d, bd,
+ &bfloat_failed_debug_ops);
+}
+
void bch2_fs_debug_init(struct bch_fs *c)
{
struct btree_debug *bd;
@@ -888,6 +938,9 @@ void bch2_fs_debug_init(struct bch_fs *c)
debugfs_create_file("journal_pins", 0400, c->fs_debug_dir,
c->btree_debug, &journal_pins_ops);
+ debugfs_create_file("btree_updates", 0400, c->fs_debug_dir,
+ c->btree_debug, &btree_updates_ops);
+
debugfs_create_file("btree_transaction_stats", 0400, c->fs_debug_dir,
c, &btree_transaction_stats_op);
@@ -902,21 +955,7 @@ void bch2_fs_debug_init(struct bch_fs *c)
bd < c->btree_debug + ARRAY_SIZE(c->btree_debug);
bd++) {
bd->id = bd - c->btree_debug;
- debugfs_create_file(bch2_btree_id_str(bd->id),
- 0400, c->btree_debug_dir, bd,
- &btree_debug_ops);
-
- snprintf(name, sizeof(name), "%s-formats",
- bch2_btree_id_str(bd->id));
-
- debugfs_create_file(name, 0400, c->btree_debug_dir, bd,
- &btree_format_debug_ops);
-
- snprintf(name, sizeof(name), "%s-bfloat-failed",
- bch2_btree_id_str(bd->id));
-
- debugfs_create_file(name, 0400, c->btree_debug_dir, bd,
- &bfloat_failed_debug_ops);
+ bch2_fs_debug_btree_init(c, bd);
}
}
diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c
index 082075244e16ae..556a217108d32e 100644
--- a/fs/bcachefs/ec.c
+++ b/fs/bcachefs/ec.c
@@ -131,29 +131,33 @@ fsck_err:
void bch2_stripe_to_text(struct printbuf *out, struct bch_fs *c,
struct bkey_s_c k)
{
- const struct bch_stripe *s = bkey_s_c_to_stripe(k).v;
- unsigned i, nr_data = s->nr_blocks - s->nr_redundant;
+ const struct bch_stripe *sp = bkey_s_c_to_stripe(k).v;
+ struct bch_stripe s = {};
+
+ memcpy(&s, sp, min(sizeof(s), bkey_val_bytes(k.k)));
+
+ unsigned nr_data = s.nr_blocks - s.nr_redundant;
+
+ prt_printf(out, "algo %u sectors %u blocks %u:%u csum ",
+ s.algorithm,
+ le16_to_cpu(s.sectors),
+ nr_data,
+ s.nr_redundant);
+ bch2_prt_csum_type(out, s.csum_type);
+ prt_printf(out, " gran %u", 1U << s.csum_granularity_bits);
+
+ for (unsigned i = 0; i < s.nr_blocks; i++) {
+ const struct bch_extent_ptr *ptr = sp->ptrs + i;
+
+ if ((void *) ptr >= bkey_val_end(k))
+ break;
+
+ bch2_extent_ptr_to_text(out, c, ptr);
- prt_printf(out, "algo %u sectors %u blocks %u:%u csum %u gran %u",
- s->algorithm,
- le16_to_cpu(s->sectors),
- nr_data,
- s->nr_redundant,
- s->csum_type,
- 1U << s->csum_granularity_bits);
-
- for (i = 0; i < s->nr_blocks; i++) {
- const struct bch_extent_ptr *ptr = s->ptrs + i;
- struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
- u32 offset;
- u64 b = sector_to_bucket_and_offset(ca, ptr->offset, &offset);
-
- prt_printf(out, " %u:%llu:%u", ptr->dev, b, offset);
- if (i < nr_data)
- prt_printf(out, "#%u", stripe_blockcount_get(s, i));
- prt_printf(out, " gen %u", ptr->gen);
- if (ptr_stale(ca, ptr))
- prt_printf(out, " stale");
+ if (s.csum_type < BCH_CSUM_NR &&
+ i < nr_data &&
+ stripe_blockcount_offset(&s, i) < bkey_val_bytes(k.k))
+ prt_printf(out, "#%u", stripe_blockcount_get(sp, i));
}
}
@@ -607,10 +611,8 @@ static void ec_validate_checksums(struct bch_fs *c, struct ec_stripe_buf *buf)
struct printbuf err = PRINTBUF;
struct bch_dev *ca = bch_dev_bkey_exists(c, v->ptrs[i].dev);
- prt_printf(&err, "stripe checksum error: expected %0llx:%0llx got %0llx:%0llx (type %s)\n",
- want.hi, want.lo,
- got.hi, got.lo,
- bch2_csum_types[v->csum_type]);
+ prt_str(&err, "stripe ");
+ bch2_csum_err_msg(&err, v->csum_type, want, got);
prt_printf(&err, " for %ps at %u of\n ", (void *) _RET_IP_, i);
bch2_bkey_val_to_text(&err, c, bkey_i_to_s_c(&buf->key));
bch_err_ratelimited(ca, "%s", err.buf);
diff --git a/fs/bcachefs/ec.h b/fs/bcachefs/ec.h
index f4369b02e805f0..f042616888b0a1 100644
--- a/fs/bcachefs/ec.h
+++ b/fs/bcachefs/ec.h
@@ -32,6 +32,8 @@ static inline unsigned stripe_csums_per_device(const struct bch_stripe *s)
static inline unsigned stripe_csum_offset(const struct bch_stripe *s,
unsigned dev, unsigned csum_idx)
{
+ EBUG_ON(s->csum_type >= BCH_CSUM_NR);
+
unsigned csum_bytes = bch_crc_bytes[s->csum_type];
return sizeof(struct bch_stripe) +
diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h
index af25d8ec60f221..01a79fa3eacb21 100644
--- a/fs/bcachefs/errcode.h
+++ b/fs/bcachefs/errcode.h
@@ -252,7 +252,8 @@
x(BCH_ERR_nopromote, nopromote_in_flight) \
x(BCH_ERR_nopromote, nopromote_no_writes) \
x(BCH_ERR_nopromote, nopromote_enomem) \
- x(0, need_inode_lock)
+ x(0, need_inode_lock) \
+ x(0, invalid_snapshot_node)
enum bch_errcode {
BCH_ERR_START = 2048,
diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c
index 043431206799d8..82a6656c941c5f 100644
--- a/fs/bcachefs/error.c
+++ b/fs/bcachefs/error.c
@@ -1,7 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
#include "error.h"
-#include "recovery.h"
+#include "journal.h"
+#include "recovery_passes.h"
#include "super.h"
#include "thread_with_file.h"
@@ -16,7 +17,8 @@ bool bch2_inconsistent_error(struct bch_fs *c)
return false;
case BCH_ON_ERROR_ro:
if (bch2_fs_emergency_read_only(c))
- bch_err(c, "inconsistency detected - emergency read only");
+ bch_err(c, "inconsistency detected - emergency read only at journal seq %llu",
+ journal_cur_seq(&c->journal));
return true;
case BCH_ON_ERROR_panic:
panic(bch2_fmt(c, "panic after error"));
diff --git a/fs/bcachefs/error.h b/fs/bcachefs/error.h
index ae1d6674c512d4..36caedf72d89ab 100644
--- a/fs/bcachefs/error.h
+++ b/fs/bcachefs/error.h
@@ -32,6 +32,12 @@ bool bch2_inconsistent_error(struct bch_fs *);
int bch2_topology_error(struct bch_fs *);
+#define bch2_fs_topology_error(c, ...) \
+({ \
+ bch_err(c, "btree topology error: " __VA_ARGS__); \
+ bch2_topology_error(c); \
+})
+
#define bch2_fs_inconsistent(c, ...) \
({ \
bch_err(c, __VA_ARGS__); \
diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c
index 61395b113df9bd..1a331e53920485 100644
--- a/fs/bcachefs/extents.c
+++ b/fs/bcachefs/extents.c
@@ -189,13 +189,18 @@ int bch2_btree_ptr_v2_invalid(struct bch_fs *c, struct bkey_s_c k,
enum bkey_invalid_flags flags,
struct printbuf *err)
{
+ struct bkey_s_c_btree_ptr_v2 bp = bkey_s_c_to_btree_ptr_v2(k);
int ret = 0;
- bkey_fsck_err_on(bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX, c, err,
- btree_ptr_v2_val_too_big,
+ bkey_fsck_err_on(bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX,
+ c, err, btree_ptr_v2_val_too_big,
"value too big (%zu > %zu)",
bkey_val_u64s(k.k), BKEY_BTREE_PTR_VAL_U64s_MAX);
+ bkey_fsck_err_on(bpos_ge(bp.v->min_key, bp.k->p),
+ c, err, btree_ptr_v2_min_key_bad,
+ "min_key > key");
+
ret = bch2_bkey_ptrs_invalid(c, k, flags, err);
fsck_err:
return ret;
@@ -973,6 +978,33 @@ bool bch2_extent_normalize(struct bch_fs *c, struct bkey_s k)
return bkey_deleted(k.k);
}
+void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *c, const struct bch_extent_ptr *ptr)
+{
+ struct bch_dev *ca = c && ptr->dev < c->sb.nr_devices && c->devs[ptr->dev]
+ ? bch_dev_bkey_exists(c, ptr->dev)
+ : NULL;
+
+ if (!ca) {
+ prt_printf(out, "ptr: %u:%llu gen %u%s", ptr->dev,
+ (u64) ptr->offset, ptr->gen,
+ ptr->cached ? " cached" : "");
+ } else {
+ u32 offset;
+ u64 b = sector_to_bucket_and_offset(ca, ptr->offset, &offset);
+
+ prt_printf(out, "ptr: %u:%llu:%u gen %u",
+ ptr->dev, b, offset, ptr->gen);
+ if (ptr->cached)
+ prt_str(out, " cached");
+ if (ptr->unwritten)
+ prt_str(out, " unwritten");
+ if (b >= ca->mi.first_bucket &&
+ b < ca->mi.nbuckets &&
+ ptr_stale(ca, ptr))
+ prt_printf(out, " stale");
+ }
+}
+
void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
struct bkey_s_c k)
{
@@ -988,42 +1020,22 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
prt_printf(out, " ");
switch (__extent_entry_type(entry)) {
- case BCH_EXTENT_ENTRY_ptr: {
- const struct bch_extent_ptr *ptr = entry_to_ptr(entry);
- struct bch_dev *ca = c && ptr->dev < c->sb.nr_devices && c->devs[ptr->dev]
- ? bch_dev_bkey_exists(c, ptr->dev)
- : NULL;
-
- if (!ca) {
- prt_printf(out, "ptr: %u:%llu gen %u%s", ptr->dev,
- (u64) ptr->offset, ptr->gen,
- ptr->cached ? " cached" : "");
- } else {
- u32 offset;
- u64 b = sector_to_bucket_and_offset(ca, ptr->offset, &offset);
-
- prt_printf(out, "ptr: %u:%llu:%u gen %u",
- ptr->dev, b, offset, ptr->gen);
- if (ptr->cached)
- prt_str(out, " cached");
- if (ptr->unwritten)
- prt_str(out, " unwritten");
- if (ca && ptr_stale(ca, ptr))
- prt_printf(out, " stale");
- }
+ case BCH_EXTENT_ENTRY_ptr:
+ bch2_extent_ptr_to_text(out, c, entry_to_ptr(entry));
break;
- }
+
case BCH_EXTENT_ENTRY_crc32:
case BCH_EXTENT_ENTRY_crc64:
case BCH_EXTENT_ENTRY_crc128: {
struct bch_extent_crc_unpacked crc =
bch2_extent_crc_unpack(k.k, entry_to_crc(entry));
- prt_printf(out, "crc: c_size %u size %u offset %u nonce %u csum %s compress ",
+ prt_printf(out, "crc: c_size %u size %u offset %u nonce %u csum ",
crc.compressed_size,
crc.uncompressed_size,
- crc.offset, crc.nonce,
- bch2_csum_types[crc.csum_type]);
+ crc.offset, crc.nonce);
+ bch2_prt_csum_type(out, crc.csum_type);
+ prt_str(out, " compress ");
bch2_prt_compression_type(out, crc.compression_type);
break;
}
diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h
index fd2669cdd76f3b..528e817eacbdad 100644
--- a/fs/bcachefs/extents.h
+++ b/fs/bcachefs/extents.h
@@ -596,30 +596,6 @@ static inline struct bch_devs_list bch2_bkey_cached_devs(struct bkey_s_c k)
return ret;
}
-static inline unsigned bch2_bkey_ptr_data_type(struct bkey_s_c k, const struct bch_extent_ptr *ptr)
-{
- switch (k.k->type) {
- case KEY_TYPE_btree_ptr:
- case KEY_TYPE_btree_ptr_v2:
- return BCH_DATA_btree;
- case KEY_TYPE_extent:
- case KEY_TYPE_reflink_v:
- return BCH_DATA_user;
- case KEY_TYPE_stripe: {
- struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k);
-
- BUG_ON(ptr < s.v->ptrs ||
- ptr >= s.v->ptrs + s.v->nr_blocks);
-
- return ptr >= s.v->ptrs + s.v->nr_blocks - s.v->nr_redundant
- ? BCH_DATA_parity
- : BCH_DATA_user;
- }
- default:
- BUG();
- }
-}
-
unsigned bch2_bkey_nr_ptrs(struct bkey_s_c);
unsigned bch2_bkey_nr_ptrs_allocated(struct bkey_s_c);
unsigned bch2_bkey_nr_ptrs_fully_allocated(struct bkey_s_c);
@@ -700,6 +676,7 @@ bch2_extent_has_ptr(struct bkey_s_c, struct extent_ptr_decoded, struct bkey_s);
void bch2_extent_ptr_set_cached(struct bkey_s, struct bch_extent_ptr *);
bool bch2_extent_normalize(struct bch_fs *, struct bkey_s);
+void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *, const struct bch_extent_ptr *);
void bch2_bkey_ptrs_to_text(struct printbuf *, struct bch_fs *,
struct bkey_s_c);
int bch2_bkey_ptrs_invalid(struct bch_fs *, struct bkey_s_c,
diff --git a/fs/bcachefs/eytzinger.c b/fs/bcachefs/eytzinger.c
new file mode 100644
index 00000000000000..0f955c3c76a7bc
--- /dev/null
+++ b/fs/bcachefs/eytzinger.c
@@ -0,0 +1,234 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "eytzinger.h"
+
+/**
+ * is_aligned - is this pointer & size okay for word-wide copying?
+ * @base: pointer to data
+ * @size: size of each element
+ * @align: required alignment (typically 4 or 8)
+ *
+ * Returns true if elements can be copied using word loads and stores.
+ * The size must be a multiple of the alignment, and the base address must
+ * be if we do not have CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS.
+ *
+ * For some reason, gcc doesn't know to optimize "if (a & mask || b & mask)"
+ * to "if ((a | b) & mask)", so we do that by hand.
+ */
+__attribute_const__ __always_inline
+static bool is_aligned(const void *base, size_t size, unsigned char align)
+{
+ unsigned char lsbits = (unsigned char)size;
+
+ (void)base;
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+ lsbits |= (unsigned char)(uintptr_t)base;
+#endif
+ return (lsbits & (align - 1)) == 0;
+}
+
+/**
+ * swap_words_32 - swap two elements in 32-bit chunks
+ * @a: pointer to the first element to swap
+ * @b: pointer to the second element to swap
+ * @n: element size (must be a multiple of 4)
+ *
+ * Exchange the two objects in memory. This exploits base+index addressing,
+ * which basically all CPUs have, to minimize loop overhead computations.
+ *
+ * For some reason, on x86 gcc 7.3.0 adds a redundant test of n at the
+ * bottom of the loop, even though the zero flag is still valid from the
+ * subtract (since the intervening mov instructions don't alter the flags).
+ * Gcc 8.1.0 doesn't have that problem.
+ */
+static void swap_words_32(void *a, void *b, size_t n)
+{
+ do {
+ u32 t = *(u32 *)(a + (n -= 4));
+ *(u32 *)(a + n) = *(u32 *)(b + n);
+ *(u32 *)(b + n) = t;
+ } while (n);
+}
+
+/**
+ * swap_words_64 - swap two elements in 64-bit chunks
+ * @a: pointer to the first element to swap
+ * @b: pointer to the second element to swap
+ * @n: element size (must be a multiple of 8)
+ *
+ * Exchange the two objects in memory. This exploits base+index
+ * addressing, which basically all CPUs have, to minimize loop overhead
+ * computations.
+ *
+ * We'd like to use 64-bit loads if possible. If they're not, emulating
+ * one requires base+index+4 addressing which x86 has but most other
+ * processors do not. If CONFIG_64BIT, we definitely have 64-bit loads,
+ * but it's possible to have 64-bit loads without 64-bit pointers (e.g.
+ * x32 ABI). Are there any cases the kernel needs to worry about?
+ */
+static void swap_words_64(void *a, void *b, size_t n)
+{
+ do {
+#ifdef CONFIG_64BIT
+ u64 t = *(u64 *)(a + (n -= 8));
+ *(u64 *)(a + n) = *(u64 *)(b + n);
+ *(u64 *)(b + n) = t;
+#else
+ /* Use two 32-bit transfers to avoid base+index+4 addressing */
+ u32 t = *(u32 *)(a + (n -= 4));
+ *(u32 *)(a + n) = *(u32 *)(b + n);
+ *(u32 *)(b + n) = t;
+
+ t = *(u32 *)(a + (n -= 4));
+ *(u32 *)(a + n) = *(u32 *)(b + n);
+ *(u32 *)(b + n) = t;
+#endif
+ } while (n);
+}
+
+/**
+ * swap_bytes - swap two elements a byte at a time
+ * @a: pointer to the first element to swap
+ * @b: pointer to the second element to swap
+ * @n: element size
+ *
+ * This is the fallback if alignment doesn't allow using larger chunks.
+ */
+static void swap_bytes(void *a, void *b, size_t n)
+{
+ do {
+ char t = ((char *)a)[--n];
+ ((char *)a)[n] = ((char *)b)[n];
+ ((char *)b)[n] = t;
+ } while (n);
+}
+
+/*
+ * The values are arbitrary as long as they can't be confused with
+ * a pointer, but small integers make for the smallest compare
+ * instructions.
+ */
+#define SWAP_WORDS_64 (swap_r_func_t)0
+#define SWAP_WORDS_32 (swap_r_func_t)1
+#define SWAP_BYTES (swap_r_func_t)2
+#define SWAP_WRAPPER (swap_r_func_t)3
+
+struct wrapper {
+ cmp_func_t cmp;
+ swap_func_t swap_func;
+};
+
+/*
+ * The function pointer is last to make tail calls most efficient if the
+ * compiler decides not to inline this function.
+ */
+static void do_swap(void *a, void *b, size_t size, swap_r_func_t swap_func, const void *priv)
+{
+ if (swap_func == SWAP_WRAPPER) {
+ ((const struct wrapper *)priv)->swap_func(a, b, (int)size);
+ return;
+ }
+
+ if (swap_func == SWAP_WORDS_64)
+ swap_words_64(a, b, size);
+ else if (swap_func == SWAP_WORDS_32)
+ swap_words_32(a, b, size);
+ else if (swap_func == SWAP_BYTES)
+ swap_bytes(a, b, size);
+ else
+ swap_func(a, b, (int)size, priv);
+}
+
+#define _CMP_WRAPPER ((cmp_r_func_t)0L)
+
+static int do_cmp(const void *a, const void *b, cmp_r_func_t cmp, const void *priv)
+{
+ if (cmp == _CMP_WRAPPER)
+ return ((const struct wrapper *)priv)->cmp(a, b);
+ return cmp(a, b, priv);
+}
+
+static inline int eytzinger0_do_cmp(void *base, size_t n, size_t size,
+ cmp_r_func_t cmp_func, const void *priv,
+ size_t l, size_t r)
+{
+ return do_cmp(base + inorder_to_eytzinger0(l, n) * size,
+ base + inorder_to_eytzinger0(r, n) * size,
+ cmp_func, priv);
+}
+
+static inline void eytzinger0_do_swap(void *base, size_t n, size_t size,
+ swap_r_func_t swap_func, const void *priv,
+ size_t l, size_t r)
+{
+ do_swap(base + inorder_to_eytzinger0(l, n) * size,
+ base + inorder_to_eytzinger0(r, n) * size,
+ size, swap_func, priv);
+}
+
+void eytzinger0_sort_r(void *base, size_t n, size_t size,
+ cmp_r_func_t cmp_func,
+ swap_r_func_t swap_func,
+ const void *priv)
+{
+ int i, c, r;
+
+ /* called from 'sort' without swap function, let's pick the default */
+ if (swap_func == SWAP_WRAPPER && !((struct wrapper *)priv)->swap_func)
+ swap_func = NULL;
+
+ if (!swap_func) {
+ if (is_aligned(base, size, 8))
+ swap_func = SWAP_WORDS_64;
+ else if (is_aligned(base, size, 4))
+ swap_func = SWAP_WORDS_32;
+ else
+ swap_func = SWAP_BYTES;
+ }
+
+ /* heapify */
+ for (i = n / 2 - 1; i >= 0; --i) {
+ for (r = i; r * 2 + 1 < n; r = c) {
+ c = r * 2 + 1;
+
+ if (c + 1 < n &&
+ eytzinger0_do_cmp(base, n, size, cmp_func, priv, c, c + 1) < 0)
+ c++;
+
+ if (eytzinger0_do_cmp(base, n, size, cmp_func, priv, r, c) >= 0)
+ break;
+
+ eytzinger0_do_swap(base, n, size, swap_func, priv, r, c);
+ }
+ }
+
+ /* sort */
+ for (i = n - 1; i > 0; --i) {
+ eytzinger0_do_swap(base, n, size, swap_func, priv, 0, i);
+
+ for (r = 0; r * 2 + 1 < i; r = c) {
+ c = r * 2 + 1;
+
+ if (c + 1 < i &&
+ eytzinger0_do_cmp(base, n, size, cmp_func, priv, c, c + 1) < 0)
+ c++;
+
+ if (eytzinger0_do_cmp(base, n, size, cmp_func, priv, r, c) >= 0)
+ break;
+
+ eytzinger0_do_swap(base, n, size, swap_func, priv, r, c);
+ }
+ }
+}
+
+void eytzinger0_sort(void *base, size_t n, size_t size,
+ cmp_func_t cmp_func,
+ swap_func_t swap_func)
+{
+ struct wrapper w = {
+ .cmp = cmp_func,
+ .swap_func = swap_func,
+ };
+
+ return eytzinger0_sort_r(base, n, size, _CMP_WRAPPER, SWAP_WRAPPER, &w);
+}
diff --git a/fs/bcachefs/eytzinger.h b/fs/bcachefs/eytzinger.h
index b04750dbf870bc..24840aee335c0f 100644
--- a/fs/bcachefs/eytzinger.h
+++ b/fs/bcachefs/eytzinger.h
@@ -5,23 +5,33 @@
#include <linux/bitops.h>
#include <linux/log2.h>
-#include "util.h"
+#ifdef EYTZINGER_DEBUG
+#define EYTZINGER_BUG_ON(cond) BUG_ON(cond)
+#else
+#define EYTZINGER_BUG_ON(cond)
+#endif
/*
* Traversal for trees in eytzinger layout - a full binary tree layed out in an
- * array
- */
-
-/*
- * One based indexing version:
+ * array.
+ *
+ * Consider using an eytzinger tree any time you would otherwise be doing binary
+ * search over an array. Binary search is a worst case scenario for branch
+ * prediction and prefetching, but in an eytzinger tree every node's children
+ * are adjacent in memory, thus we can prefetch children before knowing the
+ * result of the comparison, assuming multiple nodes fit on a cacheline.
*
- * With one based indexing each level of the tree starts at a power of two -
- * good for cacheline alignment:
+ * Two variants are provided, for one based indexing and zero based indexing.
+ *
+ * Zero based indexing is more convenient, but one based indexing has better
+ * alignment and thus better performance because each new level of the tree
+ * starts at a power of two, and thus if element 0 was cacheline aligned, each
+ * new level will be as well.
*/
static inline unsigned eytzinger1_child(unsigned i, unsigned child)
{
- EBUG_ON(child > 1);
+ EYTZINGER_BUG_ON(child > 1);
return (i << 1) + child;
}
@@ -58,7 +68,7 @@ static inline unsigned eytzinger1_last(unsigned size)
static inline unsigned eytzinger1_next(unsigned i, unsigned size)
{
- EBUG_ON(i > size);
+ EYTZINGER_BUG_ON(i > size);
if (eytzinger1_right_child(i) <= size) {
i = eytzinger1_right_child(i);
@@ -74,7 +84,7 @@ static inline unsigned eytzinger1_next(unsigned i, unsigned size)
static inline unsigned eytzinger1_prev(unsigned i, unsigned size)
{
- EBUG_ON(i > size);
+ EYTZINGER_BUG_ON(i > size);
if (eytzinger1_left_child(i) <= size) {
i = eytzinger1_left_child(i) + 1;
@@ -101,7 +111,7 @@ static inline unsigned __eytzinger1_to_inorder(unsigned i, unsigned size,
unsigned shift = __fls(size) - b;
int s;
- EBUG_ON(!i || i > size);
+ EYTZINGER_BUG_ON(!i || i > size);
i ^= 1U << b;
i <<= 1;
@@ -126,7 +136,7 @@ static inline unsigned __inorder_to_eytzinger1(unsigned i, unsigned size,
unsigned shift;
int s;
- EBUG_ON(!i || i > size);
+ EYTZINGER_BUG_ON(!i || i > size);
/*
* sign bit trick:
@@ -164,7 +174,7 @@ static inline unsigned inorder_to_eytzinger1(unsigned i, unsigned size)
static inline unsigned eytzinger0_child(unsigned i, unsigned child)
{
- EBUG_ON(child > 1);
+ EYTZINGER_BUG_ON(child > 1);
return (i << 1) + 1 + child;
}
@@ -231,11 +241,9 @@ static inline unsigned inorder_to_eytzinger0(unsigned i, unsigned size)
(_i) != -1; \
(_i) = eytzinger0_next((_i), (_size)))
-typedef int (*eytzinger_cmp_fn)(const void *l, const void *r, size_t size);
-
/* return greatest node <= @search, or -1 if not found */
-static inline ssize_t eytzinger0_find_le(void *base, size_t nr, size_t size,
- eytzinger_cmp_fn cmp, const void *search)
+static inline int eytzinger0_find_le(void *base, size_t nr, size_t size,
+ cmp_func_t cmp, const void *search)
{
unsigned i, n = 0;
@@ -244,21 +252,38 @@ static inline ssize_t eytzinger0_find_le(void *base, size_t nr, size_t size,
do {
i = n;
- n = eytzinger0_child(i, cmp(search, base + i * size, size) >= 0);
+ n = eytzinger0_child(i, cmp(base + i * size, search) <= 0);
} while (n < nr);
if (n & 1) {
- /* @i was greater than @search, return previous node: */
-
- if (i == eytzinger0_first(nr))
- return -1;
-
+ /*
+ * @i was greater than @search, return previous node:
+ *
+ * if @i was leftmost/smallest element,
+ * eytzinger0_prev(eytzinger0_first())) returns -1, as expected
+ */
return eytzinger0_prev(i, nr);
} else {
return i;
}
}
+static inline int eytzinger0_find_gt(void *base, size_t nr, size_t size,
+ cmp_func_t cmp, const void *search)
+{
+ ssize_t idx = eytzinger0_find_le(base, nr, size, cmp, search);
+
+ /*
+ * if eytitzinger0_find_le() returned -1 - no element was <= search - we
+ * want to return the first element; next/prev identities mean this work
+ * as expected
+ *
+ * similarly if find_le() returns last element, we should return -1;
+ * identities mean this all works out:
+ */
+ return eytzinger0_next(idx, nr);
+}
+
#define eytzinger0_find(base, nr, size, _cmp, search) \
({ \
void *_base = (base); \
@@ -269,13 +294,13 @@ static inline ssize_t eytzinger0_find_le(void *base, size_t nr, size_t size,
int _res; \
\
while (_i < _nr && \
- (_res = _cmp(_search, _base + _i * _size, _size))) \
+ (_res = _cmp(_search, _base + _i * _size))) \
_i = eytzinger0_child(_i, _res > 0); \
_i; \
})
-void eytzinger0_sort(void *, size_t, size_t,
- int (*cmp_func)(const void *, const void *, size_t),
- void (*swap_func)(void *, void *, size_t));
+void eytzinger0_sort_r(void *, size_t, size_t,
+ cmp_r_func_t, swap_r_func_t, const void *);
+void eytzinger0_sort(void *, size_t, size_t, cmp_func_t, swap_func_t);
#endif /* _EYTZINGER_H */
diff --git a/fs/bcachefs/fs-io-direct.c b/fs/bcachefs/fs-io-direct.c
index 33cb6da3a5ad28..b889370a508811 100644
--- a/fs/bcachefs/fs-io-direct.c
+++ b/fs/bcachefs/fs-io-direct.c
@@ -387,6 +387,8 @@ static __always_inline long bch2_dio_write_done(struct dio_write *dio)
ret = dio->op.error ?: ((long) dio->written << 9);
bio_put(&dio->op.wbio.bio);
+ bch2_write_ref_put(dio->op.c, BCH_WRITE_REF_dio_write);
+
/* inode->i_dio_count is our ref on inode and thus bch_fs */
inode_dio_end(&inode->v);
@@ -536,7 +538,7 @@ static __always_inline long bch2_dio_write_loop(struct dio_write *dio)
if (likely(!dio->iter.count) || dio->op.error)
break;
- bio_reset(bio, NULL, REQ_OP_WRITE);
+ bio_reset(bio, NULL, REQ_OP_WRITE | REQ_SYNC | REQ_IDLE);
}
out:
return bch2_dio_write_done(dio);
@@ -590,22 +592,25 @@ ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter)
prefetch(&inode->ei_inode);
prefetch((void *) &inode->ei_inode + 64);
+ if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_dio_write))
+ return -EROFS;
+
inode_lock(&inode->v);
ret = generic_write_checks(req, iter);
if (unlikely(ret <= 0))
- goto err;
+ goto err_put_write_ref;
ret = file_remove_privs(file);
if (unlikely(ret))
- goto err;
+ goto err_put_write_ref;
ret = file_update_time(file);
if (unlikely(ret))
- goto err;
+ goto err_put_write_ref;
if (unlikely((req->ki_pos|iter->count) & (block_bytes(c) - 1)))
- goto err;
+ goto err_put_write_ref;
inode_dio_begin(&inode->v);
bch2_pagecache_block_get(inode);
@@ -618,7 +623,7 @@ ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter)
bio = bio_alloc_bioset(NULL,
bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS),
- REQ_OP_WRITE,
+ REQ_OP_WRITE | REQ_SYNC | REQ_IDLE,
GFP_KERNEL,
&c->dio_write_bioset);
dio = container_of(bio, struct dio_write, op.wbio.bio);
@@ -645,7 +650,7 @@ ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter)
}
ret = bch2_dio_write_loop(dio);
-err:
+out:
if (locked)
inode_unlock(&inode->v);
return ret;
@@ -653,7 +658,9 @@ err_put_bio:
bch2_pagecache_block_put(inode);
bio_put(bio);
inode_dio_end(&inode->v);
- goto err;
+err_put_write_ref:
+ bch2_write_ref_put(c, BCH_WRITE_REF_dio_write);
+ goto out;
}
void bch2_fs_fs_io_direct_exit(struct bch_fs *c)
diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c
index 8c70123b6a0c80..20b40477425f49 100644
--- a/fs/bcachefs/fs-io.c
+++ b/fs/bcachefs/fs-io.c
@@ -174,18 +174,18 @@ void __bch2_i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode,
static int bch2_flush_inode(struct bch_fs *c,
struct bch_inode_info *inode)
{
- struct bch_inode_unpacked u;
- int ret;
-
if (c->opts.journal_flush_disabled)
return 0;
- ret = bch2_inode_find_by_inum(c, inode_inum(inode), &u);
- if (ret)
- return ret;
+ if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_fsync))
+ return -EROFS;
- return bch2_journal_flush_seq(&c->journal, u.bi_journal_seq) ?:
- bch2_inode_flush_nocow_writes(c, inode);
+ struct bch_inode_unpacked u;
+ int ret = bch2_inode_find_by_inum(c, inode_inum(inode), &u) ?:
+ bch2_journal_flush_seq(&c->journal, u.bi_journal_seq) ?:
+ bch2_inode_flush_nocow_writes(c, inode);
+ bch2_write_ref_put(c, BCH_WRITE_REF_fsync);
+ return ret;
}
int bch2_fsync(struct file *file, loff_t start, loff_t end, int datasync)
diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c
index 0ccee05f6887b3..fce690007edfce 100644
--- a/fs/bcachefs/fs.c
+++ b/fs/bcachefs/fs.c
@@ -188,7 +188,8 @@ static struct bch_inode_info *bch2_inode_insert(struct bch_fs *c, struct bch_ino
BUG_ON(!old);
if (unlikely(old != inode)) {
- discard_new_inode(&inode->v);
+ __destroy_inode(&inode->v);
+ kmem_cache_free(bch2_inode_cache, inode);
inode = old;
} else {
mutex_lock(&c->vfs_inodes_lock);
@@ -225,8 +226,10 @@ static struct bch_inode_info *bch2_new_inode(struct btree_trans *trans)
if (unlikely(!inode)) {
int ret = drop_locks_do(trans, (inode = to_bch_ei(new_inode(c->vfs_sb))) ? 0 : -ENOMEM);
- if (ret && inode)
- discard_new_inode(&inode->v);
+ if (ret && inode) {
+ __destroy_inode(&inode->v);
+ kmem_cache_free(bch2_inode_cache, inode);
+ }
if (ret)
return ERR_PTR(ret);
}
@@ -1997,6 +2000,7 @@ out:
return dget(sb->s_root);
err_put_super:
+ __bch2_fs_stop(c);
deactivate_locked_super(sb);
return ERR_PTR(bch2_err_class(ret));
}
diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c
index 47d4eefaba7ba0..8e2010212cc371 100644
--- a/fs/bcachefs/fsck.c
+++ b/fs/bcachefs/fsck.c
@@ -12,7 +12,7 @@
#include "fsck.h"
#include "inode.h"
#include "keylist.h"
-#include "recovery.h"
+#include "recovery_passes.h"
#include "snapshot.h"
#include "super.h"
#include "xattr.h"
@@ -63,9 +63,7 @@ static int subvol_lookup(struct btree_trans *trans, u32 subvol,
u32 *snapshot, u64 *inum)
{
struct bch_subvolume s;
- int ret;
-
- ret = bch2_subvolume_get(trans, subvol, false, 0, &s);
+ int ret = bch2_subvolume_get(trans, subvol, false, 0, &s);
*snapshot = le32_to_cpu(s.snapshot);
*inum = le64_to_cpu(s.inode);
@@ -158,9 +156,10 @@ static int __remove_dirent(struct btree_trans *trans, struct bpos pos)
bch2_trans_iter_init(trans, &iter, BTREE_ID_dirents, pos, BTREE_ITER_INTENT);
- ret = bch2_hash_delete_at(trans, bch2_dirent_hash_desc,
- &dir_hash_info, &iter,
- BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
+ ret = bch2_btree_iter_traverse(&iter) ?:
+ bch2_hash_delete_at(trans, bch2_dirent_hash_desc,
+ &dir_hash_info, &iter,
+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
bch2_trans_iter_exit(trans, &iter);
err:
bch_err_fn(c, ret);
@@ -169,7 +168,8 @@ err:
/* Get lost+found, create if it doesn't exist: */
static int lookup_lostfound(struct btree_trans *trans, u32 snapshot,
- struct bch_inode_unpacked *lostfound)
+ struct bch_inode_unpacked *lostfound,
+ u64 reattaching_inum)
{
struct bch_fs *c = trans->c;
struct qstr lostfound_str = QSTR("lost+found");
@@ -184,19 +184,36 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot,
return ret;
subvol_inum root_inum = { .subvol = le32_to_cpu(st.master_subvol) };
- u32 subvol_snapshot;
- ret = subvol_lookup(trans, le32_to_cpu(st.master_subvol),
- &subvol_snapshot, &root_inum.inum);
- bch_err_msg(c, ret, "looking up root subvol");
+ struct bch_subvolume subvol;
+ ret = bch2_subvolume_get(trans, le32_to_cpu(st.master_subvol),
+ false, 0, &subvol);
+ bch_err_msg(c, ret, "looking up root subvol %u for snapshot %u",
+ le32_to_cpu(st.master_subvol), snapshot);
if (ret)
return ret;
+ if (!subvol.inode) {
+ struct btree_iter iter;
+ struct bkey_i_subvolume *subvol = bch2_bkey_get_mut_typed(trans, &iter,
+ BTREE_ID_subvolumes, POS(0, le32_to_cpu(st.master_subvol)),
+ 0, subvolume);
+ ret = PTR_ERR_OR_ZERO(subvol);
+ if (ret)
+ return ret;
+
+ subvol->v.inode = cpu_to_le64(reattaching_inum);
+ bch2_trans_iter_exit(trans, &iter);
+ }
+
+ root_inum.inum = le64_to_cpu(subvol.inode);
+
struct bch_inode_unpacked root_inode;
struct bch_hash_info root_hash_info;
u32 root_inode_snapshot = snapshot;
ret = lookup_inode(trans, root_inum.inum, &root_inode, &root_inode_snapshot);
- bch_err_msg(c, ret, "looking up root inode");
+ bch_err_msg(c, ret, "looking up root inode %llu for subvol %u",
+ root_inum.inum, le32_to_cpu(st.master_subvol));
if (ret)
return ret;
@@ -292,7 +309,7 @@ static int reattach_inode(struct btree_trans *trans,
snprintf(name_buf, sizeof(name_buf), "%llu", inode->bi_inum);
}
- ret = lookup_lostfound(trans, dirent_snapshot, &lostfound);
+ ret = lookup_lostfound(trans, dirent_snapshot, &lostfound, inode->bi_inum);
if (ret)
return ret;
@@ -363,6 +380,112 @@ static int reattach_subvol(struct btree_trans *trans, struct bkey_s_c_subvolume
return ret;
}
+static int reconstruct_subvol(struct btree_trans *trans, u32 snapshotid, u32 subvolid, u64 inum)
+{
+ struct bch_fs *c = trans->c;
+
+ if (!bch2_snapshot_is_leaf(c, snapshotid)) {
+ bch_err(c, "need to reconstruct subvol, but have interior node snapshot");
+ return -BCH_ERR_fsck_repair_unimplemented;
+ }
+
+ /*
+ * If inum isn't set, that means we're being called from check_dirents,
+ * not check_inodes - the root of this subvolume doesn't exist or we
+ * would have found it there:
+ */
+ if (!inum) {
+ struct btree_iter inode_iter = {};
+ struct bch_inode_unpacked new_inode;
+ u64 cpu = raw_smp_processor_id();
+
+ bch2_inode_init_early(c, &new_inode);
+ bch2_inode_init_late(&new_inode, bch2_current_time(c), 0, 0, S_IFDIR|0755, 0, NULL);
+
+ new_inode.bi_subvol = subvolid;
+
+ int ret = bch2_inode_create(trans, &inode_iter, &new_inode, snapshotid, cpu) ?:
+ bch2_btree_iter_traverse(&inode_iter) ?:
+ bch2_inode_write(trans, &inode_iter, &new_inode);
+ bch2_trans_iter_exit(trans, &inode_iter);
+ if (ret)
+ return ret;
+
+ inum = new_inode.bi_inum;
+ }
+
+ bch_info(c, "reconstructing subvol %u with root inode %llu", subvolid, inum);
+
+ struct bkey_i_subvolume *new_subvol = bch2_trans_kmalloc(trans, sizeof(*new_subvol));
+ int ret = PTR_ERR_OR_ZERO(new_subvol);
+ if (ret)
+ return ret;
+
+ bkey_subvolume_init(&new_subvol->k_i);
+ new_subvol->k.p.offset = subvolid;
+ new_subvol->v.snapshot = cpu_to_le32(snapshotid);
+ new_subvol->v.inode = cpu_to_le64(inum);
+ ret = bch2_btree_insert_trans(trans, BTREE_ID_subvolumes, &new_subvol->k_i, 0);
+ if (ret)
+ return ret;
+
+ struct btree_iter iter;
+ struct bkey_i_snapshot *s = bch2_bkey_get_mut_typed(trans, &iter,
+ BTREE_ID_snapshots, POS(0, snapshotid),
+ 0, snapshot);
+ ret = PTR_ERR_OR_ZERO(s);
+ bch_err_msg(c, ret, "getting snapshot %u", snapshotid);
+ if (ret)
+ return ret;
+
+ u32 snapshot_tree = le32_to_cpu(s->v.tree);
+
+ s->v.subvol = cpu_to_le32(subvolid);
+ SET_BCH_SNAPSHOT_SUBVOL(&s->v, true);
+ bch2_trans_iter_exit(trans, &iter);
+
+ struct bkey_i_snapshot_tree *st = bch2_bkey_get_mut_typed(trans, &iter,
+ BTREE_ID_snapshot_trees, POS(0, snapshot_tree),
+ 0, snapshot_tree);
+ ret = PTR_ERR_OR_ZERO(st);
+ bch_err_msg(c, ret, "getting snapshot tree %u", snapshot_tree);
+ if (ret)
+ return ret;
+
+ if (!st->v.master_subvol)
+ st->v.master_subvol = cpu_to_le32(subvolid);
+
+ bch2_trans_iter_exit(trans, &iter);
+ return 0;
+}
+
+static int reconstruct_inode(struct btree_trans *trans, u32 snapshot, u64 inum, u64 size, unsigned mode)
+{
+ struct bch_fs *c = trans->c;
+ struct bch_inode_unpacked new_inode;
+
+ bch2_inode_init_early(c, &new_inode);
+ bch2_inode_init_late(&new_inode, bch2_current_time(c), 0, 0, mode|0755, 0, NULL);
+ new_inode.bi_size = size;
+ new_inode.bi_inum = inum;
+
+ return __bch2_fsck_write_inode(trans, &new_inode, snapshot);
+}
+
+static int reconstruct_reg_inode(struct btree_trans *trans, u32 snapshot, u64 inum)
+{
+ struct btree_iter iter = {};
+
+ bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, SPOS(inum, U64_MAX, snapshot), 0);
+ struct bkey_s_c k = bch2_btree_iter_peek_prev(&iter);
+ bch2_trans_iter_exit(trans, &iter);
+ int ret = bkey_err(k);
+ if (ret)
+ return ret;
+
+ return reconstruct_inode(trans, snapshot, inum, k.k->p.offset << 9, S_IFREG);
+}
+
struct snapshots_seen_entry {
u32 id;
u32 equiv;
@@ -1064,6 +1187,11 @@ static int check_inode(struct btree_trans *trans,
if (ret && !bch2_err_matches(ret, ENOENT))
goto err;
+ if (ret && (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_subvolumes))) {
+ ret = reconstruct_subvol(trans, k.k->p.snapshot, u.bi_subvol, u.bi_inum);
+ goto do_update;
+ }
+
if (fsck_err_on(ret,
c, inode_bi_subvol_missing,
"inode %llu:%u bi_subvol points to missing subvolume %u",
@@ -1081,7 +1209,7 @@ static int check_inode(struct btree_trans *trans,
do_update = true;
}
}
-
+do_update:
if (do_update) {
ret = __bch2_fsck_write_inode(trans, &u, iter->pos.snapshot);
bch_err_msg(c, ret, "in fsck updating inode");
@@ -1130,8 +1258,8 @@ static int check_i_sectors_notnested(struct btree_trans *trans, struct inode_wal
i->count = count2;
if (i->count != count2) {
- bch_err(c, "fsck counted i_sectors wrong for inode %llu:%u: got %llu should be %llu",
- w->last_pos.inode, i->snapshot, i->count, count2);
+ bch_err_ratelimited(c, "fsck counted i_sectors wrong for inode %llu:%u: got %llu should be %llu",
+ w->last_pos.inode, i->snapshot, i->count, count2);
return -BCH_ERR_internal_fsck_err;
}
@@ -1371,10 +1499,6 @@ static int check_overlapping_extents(struct btree_trans *trans,
goto err;
}
- ret = extent_ends_at(c, extent_ends, seen, k);
- if (ret)
- goto err;
-
extent_ends->last_pos = k.k->p;
err:
return ret;
@@ -1438,6 +1562,17 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
goto err;
if (k.k->type != KEY_TYPE_whiteout) {
+ if (!i && (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_inodes))) {
+ ret = reconstruct_reg_inode(trans, k.k->p.snapshot, k.k->p.inode) ?:
+ bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
+ if (ret)
+ goto err;
+
+ inode->last_pos.inode--;
+ ret = -BCH_ERR_transaction_restart_nested;
+ goto err;
+ }
+
if (fsck_err_on(!i, c, extent_in_missing_inode,
"extent in missing inode:\n %s",
(printbuf_reset(&buf),
@@ -1504,6 +1639,12 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
i->seen_this_pos = true;
}
+
+ if (k.k->type != KEY_TYPE_whiteout) {
+ ret = extent_ends_at(c, extent_ends, s, k);
+ if (ret)
+ goto err;
+ }
out:
err:
fsck_err:
@@ -1584,8 +1725,8 @@ static int check_subdir_count_notnested(struct btree_trans *trans, struct inode_
return count2;
if (i->count != count2) {
- bch_err(c, "fsck counted subdirectories wrong: got %llu should be %llu",
- i->count, count2);
+ bch_err_ratelimited(c, "fsck counted subdirectories wrong for inum %llu:%u: got %llu should be %llu",
+ w->last_pos.inode, i->snapshot, i->count, count2);
i->count = count2;
if (i->inode.bi_nlink == i->count)
continue;
@@ -1782,6 +1923,7 @@ static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter *
u32 parent_subvol = le32_to_cpu(d.v->d_parent_subvol);
u32 target_subvol = le32_to_cpu(d.v->d_child_subvol);
u32 parent_snapshot;
+ u32 new_parent_subvol = 0;
u64 parent_inum;
struct printbuf buf = PRINTBUF;
int ret = 0;
@@ -1790,6 +1932,27 @@ static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter *
if (ret && !bch2_err_matches(ret, ENOENT))
return ret;
+ if (ret ||
+ (!ret && !bch2_snapshot_is_ancestor(c, parent_snapshot, d.k->p.snapshot))) {
+ int ret2 = find_snapshot_subvol(trans, d.k->p.snapshot, &new_parent_subvol);
+ if (ret2 && !bch2_err_matches(ret, ENOENT))
+ return ret2;
+ }
+
+ if (ret &&
+ !new_parent_subvol &&
+ (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_subvolumes))) {
+ /*
+ * Couldn't find a subvol for dirent's snapshot - but we lost
+ * subvols, so we need to reconstruct:
+ */
+ ret = reconstruct_subvol(trans, d.k->p.snapshot, parent_subvol, 0);
+ if (ret)
+ return ret;
+
+ parent_snapshot = d.k->p.snapshot;
+ }
+
if (fsck_err_on(ret, c, dirent_to_missing_parent_subvol,
"dirent parent_subvol points to missing subvolume\n%s",
(bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf)) ||
@@ -1798,10 +1961,10 @@ static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter *
"dirent not visible in parent_subvol (not an ancestor of subvol snap %u)\n%s",
parent_snapshot,
(bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) {
- u32 new_parent_subvol;
- ret = find_snapshot_subvol(trans, d.k->p.snapshot, &new_parent_subvol);
- if (ret)
- goto err;
+ if (!new_parent_subvol) {
+ bch_err(c, "could not find a subvol for snapshot %u", d.k->p.snapshot);
+ return -BCH_ERR_fsck_repair_unimplemented;
+ }
struct bkey_i_dirent *new_dirent = bch2_bkey_make_mut_typed(trans, iter, &d.s_c, 0, dirent);
ret = PTR_ERR_OR_ZERO(new_dirent);
@@ -1847,9 +2010,16 @@ static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter *
ret = lookup_inode(trans, target_inum, &subvol_root, &target_snapshot);
if (ret && !bch2_err_matches(ret, ENOENT))
- return ret;
+ goto err;
+
+ if (ret) {
+ bch_err(c, "subvol %u points to missing inode root %llu", target_subvol, target_inum);
+ ret = -BCH_ERR_fsck_repair_unimplemented;
+ ret = 0;
+ goto err;
+ }
- if (fsck_err_on(parent_subvol != subvol_root.bi_parent_subvol,
+ if (fsck_err_on(!ret && parent_subvol != subvol_root.bi_parent_subvol,
c, inode_bi_parent_wrong,
"subvol root %llu has wrong bi_parent_subvol: got %u, should be %u",
target_inum,
@@ -1857,13 +2027,13 @@ static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter *
subvol_root.bi_parent_subvol = parent_subvol;
ret = __bch2_fsck_write_inode(trans, &subvol_root, target_snapshot);
if (ret)
- return ret;
+ goto err;
}
ret = check_dirent_target(trans, iter, d, &subvol_root,
target_snapshot);
if (ret)
- return ret;
+ goto err;
out:
err:
fsck_err:
@@ -1880,7 +2050,6 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
struct snapshots_seen *s)
{
struct bch_fs *c = trans->c;
- struct bkey_s_c_dirent d;
struct inode_walker_entry *i;
struct printbuf buf = PRINTBUF;
struct bpos equiv;
@@ -1919,6 +2088,17 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
*hash_info = bch2_hash_info_init(c, &dir->inodes.data[0].inode);
dir->first_this_inode = false;
+ if (!i && (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_inodes))) {
+ ret = reconstruct_inode(trans, k.k->p.snapshot, k.k->p.inode, 0, S_IFDIR) ?:
+ bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
+ if (ret)
+ goto err;
+
+ dir->last_pos.inode--;
+ ret = -BCH_ERR_transaction_restart_nested;
+ goto err;
+ }
+
if (fsck_err_on(!i, c, dirent_in_missing_dir_inode,
"dirent in nonexisting directory:\n%s",
(printbuf_reset(&buf),
@@ -1953,7 +2133,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
if (k.k->type != KEY_TYPE_dirent)
goto out;
- d = bkey_s_c_to_dirent(k);
+ struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
if (d.v->d_type == DT_SUBVOL) {
ret = check_dirent_to_subvol(trans, iter, d);
@@ -2098,17 +2278,21 @@ static int check_root_trans(struct btree_trans *trans)
if (mustfix_fsck_err_on(ret, c, root_subvol_missing,
"root subvol missing")) {
- struct bkey_i_subvolume root_subvol;
+ struct bkey_i_subvolume *root_subvol =
+ bch2_trans_kmalloc(trans, sizeof(*root_subvol));
+ ret = PTR_ERR_OR_ZERO(root_subvol);
+ if (ret)
+ goto err;
snapshot = U32_MAX;
inum = BCACHEFS_ROOT_INO;
- bkey_subvolume_init(&root_subvol.k_i);
- root_subvol.k.p.offset = BCACHEFS_ROOT_SUBVOL;
- root_subvol.v.flags = 0;
- root_subvol.v.snapshot = cpu_to_le32(snapshot);
- root_subvol.v.inode = cpu_to_le64(inum);
- ret = bch2_btree_insert_trans(trans, BTREE_ID_subvolumes, &root_subvol.k_i, 0);
+ bkey_subvolume_init(&root_subvol->k_i);
+ root_subvol->k.p.offset = BCACHEFS_ROOT_SUBVOL;
+ root_subvol->v.flags = 0;
+ root_subvol->v.snapshot = cpu_to_le32(snapshot);
+ root_subvol->v.inode = cpu_to_le64(inum);
+ ret = bch2_btree_insert_trans(trans, BTREE_ID_subvolumes, &root_subvol->k_i, 0);
bch_err_msg(c, ret, "writing root subvol");
if (ret)
goto err;
diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c
index 2b5e06770ab39e..ca4a066e9a5428 100644
--- a/fs/bcachefs/inode.c
+++ b/fs/bcachefs/inode.c
@@ -552,8 +552,8 @@ static void __bch2_inode_unpacked_to_text(struct printbuf *out,
prt_printf(out, "bi_sectors=%llu", inode->bi_sectors);
prt_newline(out);
- prt_newline(out);
prt_printf(out, "bi_version=%llu", inode->bi_version);
+ prt_newline(out);
#define x(_name, _bits) \
prt_printf(out, #_name "=%llu", (u64) inode->_name); \
diff --git a/fs/bcachefs/io_misc.c b/fs/bcachefs/io_misc.c
index 1baf78594ccaf8..82f9170dab3fdc 100644
--- a/fs/bcachefs/io_misc.c
+++ b/fs/bcachefs/io_misc.c
@@ -264,6 +264,7 @@ static int __bch2_resume_logged_op_truncate(struct btree_trans *trans,
ret = 0;
err:
bch2_logged_op_finish(trans, op_k);
+ bch_err_fn(c, ret);
return ret;
}
@@ -476,6 +477,7 @@ case LOGGED_OP_FINSERT_finish:
break;
}
err:
+ bch_err_fn(c, ret);
bch2_logged_op_finish(trans, op_k);
bch2_trans_iter_exit(trans, &iter);
return ret;
diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c
index 725fcf46f6312c..eb1f9d6f5a196e 100644
--- a/fs/bcachefs/journal_io.c
+++ b/fs/bcachefs/journal_io.c
@@ -247,7 +247,7 @@ static void journal_entry_err_msg(struct printbuf *out,
if (entry) {
prt_str(out, " type=");
- prt_str(out, bch2_jset_entry_types[entry->type]);
+ bch2_prt_jset_entry_type(out, entry->type);
}
if (!jset) {
@@ -403,7 +403,8 @@ static void journal_entry_btree_keys_to_text(struct printbuf *out, struct bch_fs
jset_entry_for_each_key(entry, k) {
if (!first) {
prt_newline(out);
- prt_printf(out, "%s: ", bch2_jset_entry_types[entry->type]);
+ bch2_prt_jset_entry_type(out, entry->type);
+ prt_str(out, ": ");
}
prt_printf(out, "btree=%s l=%u ", bch2_btree_id_str(entry->btree_id), entry->level);
bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(k));
@@ -563,9 +564,9 @@ static void journal_entry_usage_to_text(struct printbuf *out, struct bch_fs *c,
struct jset_entry_usage *u =
container_of(entry, struct jset_entry_usage, entry);
- prt_printf(out, "type=%s v=%llu",
- bch2_fs_usage_types[u->entry.btree_id],
- le64_to_cpu(u->v));
+ prt_str(out, "type=");
+ bch2_prt_fs_usage_type(out, u->entry.btree_id);
+ prt_printf(out, " v=%llu", le64_to_cpu(u->v));
}
static int journal_entry_data_usage_validate(struct bch_fs *c,
@@ -827,11 +828,11 @@ int bch2_journal_entry_validate(struct bch_fs *c,
void bch2_journal_entry_to_text(struct printbuf *out, struct bch_fs *c,
struct jset_entry *entry)
{
+ bch2_prt_jset_entry_type(out, entry->type);
+
if (entry->type < BCH_JSET_ENTRY_NR) {
- prt_printf(out, "%s: ", bch2_jset_entry_types[entry->type]);
+ prt_str(out, ": ");
bch2_jset_entry_ops[entry->type].to_text(out, c, entry);
- } else {
- prt_printf(out, "(unknown type %u)", entry->type);
}
}
@@ -1722,7 +1723,7 @@ static void journal_write_endio(struct bio *bio)
percpu_ref_put(&ca->io_ref);
}
-static CLOSURE_CALLBACK(do_journal_write)
+static CLOSURE_CALLBACK(journal_write_submit)
{
closure_type(w, struct journal_buf, io);
struct journal *j = container_of(w, struct journal, buf[w->idx]);
@@ -1767,6 +1768,44 @@ static CLOSURE_CALLBACK(do_journal_write)
continue_at(cl, journal_write_done, j->wq);
}
+static CLOSURE_CALLBACK(journal_write_preflush)
+{
+ closure_type(w, struct journal_buf, io);
+ struct journal *j = container_of(w, struct journal, buf[w->idx]);
+ struct bch_fs *c = container_of(j, struct bch_fs, journal);
+
+ if (j->seq_ondisk + 1 != le64_to_cpu(w->data->seq)) {
+ spin_lock(&j->lock);
+ closure_wait(&j->async_wait, cl);
+ spin_unlock(&j->lock);
+
+ continue_at(cl, journal_write_preflush, j->wq);
+ return;
+ }
+
+ if (w->separate_flush) {
+ for_each_rw_member(c, ca) {
+ percpu_ref_get(&ca->io_ref);
+
+ struct journal_device *ja = &ca->journal;
+ struct bio *bio = &ja->bio[w->idx]->bio;
+ bio_reset(bio, ca->disk_sb.bdev,
+ REQ_OP_WRITE|REQ_SYNC|REQ_META|REQ_PREFLUSH);
+ bio->bi_end_io = journal_write_endio;
+ bio->bi_private = ca;
+ closure_bio_submit(bio, cl);
+ }
+
+ continue_at(cl, journal_write_submit, j->wq);
+ } else {
+ /*
+ * no need to punt to another work item if we're not waiting on
+ * preflushes
+ */
+ journal_write_submit(&cl->work);
+ }
+}
+
static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w)
{
struct bch_fs *c = container_of(j, struct bch_fs, journal);
@@ -2032,23 +2071,9 @@ CLOSURE_CALLBACK(bch2_journal_write)
goto err;
if (!JSET_NO_FLUSH(w->data))
- closure_wait_event(&j->async_wait, j->seq_ondisk + 1 == le64_to_cpu(w->data->seq));
-
- if (!JSET_NO_FLUSH(w->data) && w->separate_flush) {
- for_each_rw_member(c, ca) {
- percpu_ref_get(&ca->io_ref);
-
- struct journal_device *ja = &ca->journal;
- struct bio *bio = &ja->bio[w->idx]->bio;
- bio_reset(bio, ca->disk_sb.bdev,
- REQ_OP_WRITE|REQ_SYNC|REQ_META|REQ_PREFLUSH);
- bio->bi_end_io = journal_write_endio;
- bio->bi_private = ca;
- closure_bio_submit(bio, cl);
- }
- }
-
- continue_at(cl, do_journal_write, j->wq);
+ continue_at(cl, journal_write_preflush, j->wq);
+ else
+ continue_at(cl, journal_write_submit, j->wq);
return;
no_io:
continue_at(cl, journal_write_done, j->wq);
diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c
index ab811c0dad26ac..04a577848b015c 100644
--- a/fs/bcachefs/journal_reclaim.c
+++ b/fs/bcachefs/journal_reclaim.c
@@ -67,6 +67,8 @@ void bch2_journal_set_watermark(struct journal *j)
track_event_change(&c->times[BCH_TIME_blocked_write_buffer_full], low_on_wb))
trace_and_count(c, journal_full, c);
+ mod_bit(JOURNAL_SPACE_LOW, &j->flags, low_on_space || low_on_pin);
+
swap(watermark, j->watermark);
if (watermark > j->watermark)
journal_wake(j);
diff --git a/fs/bcachefs/journal_seq_blacklist.c b/fs/bcachefs/journal_seq_blacklist.c
index b5303874fc35b3..37a024e034d495 100644
--- a/fs/bcachefs/journal_seq_blacklist.c
+++ b/fs/bcachefs/journal_seq_blacklist.c
@@ -95,8 +95,7 @@ out:
return ret ?: bch2_blacklist_table_initialize(c);
}
-static int journal_seq_blacklist_table_cmp(const void *_l,
- const void *_r, size_t size)
+static int journal_seq_blacklist_table_cmp(const void *_l, const void *_r)
{
const struct journal_seq_blacklist_table_entry *l = _l;
const struct journal_seq_blacklist_table_entry *r = _r;
diff --git a/fs/bcachefs/journal_types.h b/fs/bcachefs/journal_types.h
index 8c053cb64ca5ee..b5161b5d76a008 100644
--- a/fs/bcachefs/journal_types.h
+++ b/fs/bcachefs/journal_types.h
@@ -134,6 +134,7 @@ enum journal_flags {
JOURNAL_STARTED,
JOURNAL_MAY_SKIP_FLUSH,
JOURNAL_NEED_FLUSH_WRITE,
+ JOURNAL_SPACE_LOW,
};
/* Reasons we may fail to get a journal reservation: */
diff --git a/fs/bcachefs/logged_ops.c b/fs/bcachefs/logged_ops.c
index 9fac838d123e8e..b82f8209041ffb 100644
--- a/fs/bcachefs/logged_ops.c
+++ b/fs/bcachefs/logged_ops.c
@@ -37,7 +37,6 @@ static int resume_logged_op(struct btree_trans *trans, struct btree_iter *iter,
const struct bch_logged_op_fn *fn = logged_op_fn(k.k->type);
struct bkey_buf sk;
u32 restart_count = trans->restart_count;
- int ret;
if (!fn)
return 0;
@@ -45,11 +44,11 @@ static int resume_logged_op(struct btree_trans *trans, struct btree_iter *iter,
bch2_bkey_buf_init(&sk);
bch2_bkey_buf_reassemble(&sk, c, k);
- ret = drop_locks_do(trans, (bch2_fs_lazy_rw(c), 0)) ?:
- fn->resume(trans, sk.k) ?: trans_was_restarted(trans, restart_count);
+ fn->resume(trans, sk.k);
bch2_bkey_buf_exit(&sk, c);
- return ret;
+
+ return trans_was_restarted(trans, restart_count);
}
int bch2_resume_logged_ops(struct bch_fs *c)
diff --git a/fs/bcachefs/mean_and_variance_test.c b/fs/bcachefs/mean_and_variance_test.c
index db63b3f3b338ad..4c298e74723db3 100644
--- a/fs/bcachefs/mean_and_variance_test.c
+++ b/fs/bcachefs/mean_and_variance_test.c
@@ -136,20 +136,8 @@ static void mean_and_variance_test_1(struct kunit *test)
d, mean, stddev, weighted_mean, weighted_stddev);
}
-static void mean_and_variance_test_2(struct kunit *test)
-{
- s64 d[] = { 100, 10, 10, 10, 10, 10, 10 };
- s64 mean[] = { 10, 10, 10, 10, 10, 10, 10 };
- s64 stddev[] = { 9, 9, 9, 9, 9, 9, 9 };
- s64 weighted_mean[] = { 32, 27, 22, 19, 17, 15, 14 };
- s64 weighted_stddev[] = { 38, 35, 31, 27, 24, 21, 18 };
-
- do_mean_and_variance_test(test, 10, 6, ARRAY_SIZE(d), 2,
- d, mean, stddev, weighted_mean, weighted_stddev);
-}
-
/* Test behaviour where we switch from one steady state to another: */
-static void mean_and_variance_test_3(struct kunit *test)
+static void mean_and_variance_test_2(struct kunit *test)
{
s64 d[] = { 100, 100, 100, 100, 100 };
s64 mean[] = { 22, 32, 40, 46, 50 };
@@ -161,18 +149,6 @@ static void mean_and_variance_test_3(struct kunit *test)
d, mean, stddev, weighted_mean, weighted_stddev);
}
-static void mean_and_variance_test_4(struct kunit *test)
-{
- s64 d[] = { 100, 100, 100, 100, 100 };
- s64 mean[] = { 10, 11, 12, 13, 14 };
- s64 stddev[] = { 9, 13, 15, 17, 19 };
- s64 weighted_mean[] = { 32, 49, 61, 71, 78 };
- s64 weighted_stddev[] = { 38, 44, 44, 41, 38 };
-
- do_mean_and_variance_test(test, 10, 6, ARRAY_SIZE(d), 2,
- d, mean, stddev, weighted_mean, weighted_stddev);
-}
-
static void mean_and_variance_fast_divpow2(struct kunit *test)
{
s64 i;
@@ -230,8 +206,6 @@ static struct kunit_case mean_and_variance_test_cases[] = {
KUNIT_CASE(mean_and_variance_weighted_advanced_test),
KUNIT_CASE(mean_and_variance_test_1),
KUNIT_CASE(mean_and_variance_test_2),
- KUNIT_CASE(mean_and_variance_test_3),
- KUNIT_CASE(mean_and_variance_test_4),
{}
};
diff --git a/fs/bcachefs/opts.c b/fs/bcachefs/opts.c
index 08ea0cfc4aef08..bb068fd724656c 100644
--- a/fs/bcachefs/opts.c
+++ b/fs/bcachefs/opts.c
@@ -7,6 +7,7 @@
#include "disk_groups.h"
#include "error.h"
#include "opts.h"
+#include "recovery_passes.h"
#include "super-io.h"
#include "util.h"
@@ -42,7 +43,7 @@ const char * const __bch2_btree_ids[] = {
NULL
};
-const char * const bch2_csum_types[] = {
+static const char * const __bch2_csum_types[] = {
BCH_CSUM_TYPES()
NULL
};
@@ -52,7 +53,7 @@ const char * const bch2_csum_opts[] = {
NULL
};
-const char * const __bch2_compression_types[] = {
+static const char * const __bch2_compression_types[] = {
BCH_COMPRESSION_TYPES()
NULL
};
@@ -82,18 +83,39 @@ const char * const bch2_member_states[] = {
NULL
};
-const char * const bch2_jset_entry_types[] = {
+static const char * const __bch2_jset_entry_types[] = {
BCH_JSET_ENTRY_TYPES()
NULL
};
-const char * const bch2_fs_usage_types[] = {
+static const char * const __bch2_fs_usage_types[] = {
BCH_FS_USAGE_TYPES()
NULL
};
#undef x
+static void prt_str_opt_boundscheck(struct printbuf *out, const char * const opts[],
+ unsigned nr, const char *type, unsigned idx)
+{
+ if (idx < nr)
+ prt_str(out, opts[idx]);
+ else
+ prt_printf(out, "(unknown %s %u)", type, idx);
+}
+
+#define PRT_STR_OPT_BOUNDSCHECKED(name, type) \
+void bch2_prt_##name(struct printbuf *out, type t) \
+{ \
+ prt_str_opt_boundscheck(out, __bch2_##name##s, ARRAY_SIZE(__bch2_##name##s) - 1, #name, t);\
+}
+
+PRT_STR_OPT_BOUNDSCHECKED(jset_entry_type, enum bch_jset_entry_type);
+PRT_STR_OPT_BOUNDSCHECKED(fs_usage_type, enum bch_fs_usage_type);
+PRT_STR_OPT_BOUNDSCHECKED(data_type, enum bch_data_type);
+PRT_STR_OPT_BOUNDSCHECKED(csum_type, enum bch_csum_type);
+PRT_STR_OPT_BOUNDSCHECKED(compression_type, enum bch_compression_type);
+
static int bch2_opt_fix_errors_parse(struct bch_fs *c, const char *val, u64 *res,
struct printbuf *err)
{
@@ -205,6 +227,9 @@ const struct bch_option bch2_opt_table[] = {
#define OPT_STR(_choices) .type = BCH_OPT_STR, \
.min = 0, .max = ARRAY_SIZE(_choices), \
.choices = _choices
+#define OPT_STR_NOLIMIT(_choices) .type = BCH_OPT_STR, \
+ .min = 0, .max = U64_MAX, \
+ .choices = _choices
#define OPT_FN(_fn) .type = BCH_OPT_FN, .fn = _fn
#define x(_name, _bits, _flags, _type, _sb_opt, _default, _hint, _help) \
diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h
index 136083c11f3a3a..84e452835a17d8 100644
--- a/fs/bcachefs/opts.h
+++ b/fs/bcachefs/opts.h
@@ -16,18 +16,20 @@ extern const char * const bch2_version_upgrade_opts[];
extern const char * const bch2_sb_features[];
extern const char * const bch2_sb_compat[];
extern const char * const __bch2_btree_ids[];
-extern const char * const bch2_csum_types[];
extern const char * const bch2_csum_opts[];
-extern const char * const __bch2_compression_types[];
extern const char * const bch2_compression_opts[];
extern const char * const bch2_str_hash_types[];
extern const char * const bch2_str_hash_opts[];
extern const char * const __bch2_data_types[];
extern const char * const bch2_member_states[];
-extern const char * const bch2_jset_entry_types[];
-extern const char * const bch2_fs_usage_types[];
extern const char * const bch2_d_types[];
+void bch2_prt_jset_entry_type(struct printbuf *, enum bch_jset_entry_type);
+void bch2_prt_fs_usage_type(struct printbuf *, enum bch_fs_usage_type);
+void bch2_prt_data_type(struct printbuf *, enum bch_data_type);
+void bch2_prt_csum_type(struct printbuf *, enum bch_csum_type);
+void bch2_prt_compression_type(struct printbuf *, enum bch_compression_type);
+
static inline const char *bch2_d_type_str(unsigned d_type)
{
return (d_type < BCH_DT_MAX ? bch2_d_types[d_type] : NULL) ?: "(bad d_type)";
@@ -362,12 +364,17 @@ enum fsck_err_opts {
OPT_FS|OPT_MOUNT, \
OPT_BOOL(), \
BCH2_NO_SB_OPT, false, \
- NULL, "Don't replay the journal") \
- x(keep_journal, u8, \
+ NULL, "Exit recovery immediately prior to journal replay")\
+ x(recovery_pass_last, u8, \
+ OPT_FS|OPT_MOUNT, \
+ OPT_STR_NOLIMIT(bch2_recovery_passes), \
+ BCH2_NO_SB_OPT, 0, \
+ NULL, "Exit recovery after specified pass") \
+ x(retain_recovery_info, u8, \
0, \
OPT_BOOL(), \
BCH2_NO_SB_OPT, false, \
- NULL, "Don't free journal entries/keys after startup")\
+ NULL, "Don't free journal entries/keys, scanned btree nodes after startup")\
x(read_entire_journal, u8, \
0, \
OPT_BOOL(), \
diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c
index 03f9d6afe46788..be5b4761932700 100644
--- a/fs/bcachefs/recovery.c
+++ b/fs/bcachefs/recovery.c
@@ -1,35 +1,31 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
-#include "backpointers.h"
-#include "bkey_buf.h"
#include "alloc_background.h"
-#include "btree_gc.h"
+#include "bkey_buf.h"
#include "btree_journal_iter.h"
+#include "btree_node_scan.h"
#include "btree_update.h"
#include "btree_update_interior.h"
#include "btree_io.h"
#include "buckets.h"
#include "dirent.h"
-#include "ec.h"
#include "errcode.h"
#include "error.h"
#include "fs-common.h"
-#include "fsck.h"
#include "journal_io.h"
#include "journal_reclaim.h"
#include "journal_seq_blacklist.h"
-#include "lru.h"
#include "logged_ops.h"
#include "move.h"
#include "quota.h"
#include "rebalance.h"
#include "recovery.h"
+#include "recovery_passes.h"
#include "replicas.h"
#include "sb-clean.h"
#include "sb-downgrade.h"
#include "snapshot.h"
-#include "subvolume.h"
#include "super-io.h"
#include <linux/sort.h>
@@ -37,22 +33,22 @@
#define QSTR(n) { { { .len = strlen(n) } }, .name = n }
-static bool btree_id_is_alloc(enum btree_id id)
+void bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree)
{
- switch (id) {
- case BTREE_ID_alloc:
- case BTREE_ID_backpointers:
- case BTREE_ID_need_discard:
- case BTREE_ID_freespace:
- case BTREE_ID_bucket_gens:
- return true;
- default:
- return false;
+ u64 b = BIT_ULL(btree);
+
+ if (!(c->sb.btrees_lost_data & b)) {
+ bch_err(c, "flagging btree %s lost data", bch2_btree_id_str(btree));
+
+ mutex_lock(&c->sb_lock);
+ bch2_sb_field_get(c->disk_sb.sb, ext)->btrees_lost_data |= cpu_to_le64(b);
+ bch2_write_super(c);
+ mutex_unlock(&c->sb_lock);
}
}
/* for -o reconstruct_alloc: */
-static void do_reconstruct_alloc(struct bch_fs *c)
+static void bch2_reconstruct_alloc(struct bch_fs *c)
{
bch2_journal_log_msg(c, "dropping alloc info");
bch_info(c, "dropping and reconstructing all alloc info");
@@ -87,15 +83,17 @@ static void do_reconstruct_alloc(struct bch_fs *c)
c->recovery_passes_explicit |= bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0]));
- struct journal_keys *keys = &c->journal_keys;
- size_t src, dst;
- move_gap(keys, keys->nr);
-
- for (src = 0, dst = 0; src < keys->nr; src++)
- if (!btree_id_is_alloc(keys->data[src].btree_id))
- keys->data[dst++] = keys->data[src];
- keys->nr = keys->gap = dst;
+ bch2_shoot_down_journal_keys(c, BTREE_ID_alloc,
+ 0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX);
+ bch2_shoot_down_journal_keys(c, BTREE_ID_backpointers,
+ 0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX);
+ bch2_shoot_down_journal_keys(c, BTREE_ID_need_discard,
+ 0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX);
+ bch2_shoot_down_journal_keys(c, BTREE_ID_freespace,
+ 0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX);
+ bch2_shoot_down_journal_keys(c, BTREE_ID_bucket_gens,
+ 0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX);
}
/*
@@ -186,7 +184,7 @@ static int journal_sort_seq_cmp(const void *_l, const void *_r)
return cmp_int(l->journal_seq, r->journal_seq);
}
-static int bch2_journal_replay(struct bch_fs *c)
+int bch2_journal_replay(struct bch_fs *c)
{
struct journal_keys *keys = &c->journal_keys;
DARRAY(struct journal_key *) keys_sorted = { 0 };
@@ -194,6 +192,7 @@ static int bch2_journal_replay(struct bch_fs *c)
u64 start_seq = c->journal_replay_seq_start;
u64 end_seq = c->journal_replay_seq_start;
struct btree_trans *trans = bch2_trans_get(c);
+ bool immediate_flush = false;
int ret = 0;
if (keys->nr) {
@@ -215,6 +214,13 @@ static int bch2_journal_replay(struct bch_fs *c)
darray_for_each(*keys, k) {
cond_resched();
+ /*
+ * k->allocated means the key wasn't read in from the journal,
+ * rather it was from early repair code
+ */
+ if (k->allocated)
+ immediate_flush = true;
+
/* Skip fastpath if we're low on space in the journal */
ret = c->journal.watermark ? -1 :
commit_do(trans, NULL, NULL,
@@ -243,7 +249,10 @@ static int bch2_journal_replay(struct bch_fs *c)
struct journal_key *k = *kp;
- replay_now_at(j, k->journal_seq);
+ if (k->journal_seq)
+ replay_now_at(j, k->journal_seq);
+ else
+ replay_now_at(j, j->replay_journal_seq_end);
ret = commit_do(trans, NULL, NULL,
BCH_TRANS_COMMIT_no_enospc|
@@ -266,7 +275,8 @@ static int bch2_journal_replay(struct bch_fs *c)
bch2_trans_put(trans);
trans = NULL;
- if (!c->opts.keep_journal)
+ if (!c->opts.retain_recovery_info &&
+ c->recovery_pass_done >= BCH_RECOVERY_PASS_journal_replay)
bch2_journal_keys_put_initial(c);
replay_now_at(j, j->replay_journal_seq_end);
@@ -274,6 +284,12 @@ static int bch2_journal_replay(struct bch_fs *c)
bch2_journal_set_replay_done(j);
+ /* if we did any repair, flush it immediately */
+ if (immediate_flush) {
+ bch2_journal_flush_all_pins(&c->journal);
+ ret = bch2_journal_meta(&c->journal);
+ }
+
if (keys->nr)
bch2_journal_log_msg(c, "journal replay finished");
err:
@@ -423,10 +439,9 @@ static int journal_replay_early(struct bch_fs *c,
static int read_btree_roots(struct bch_fs *c)
{
- unsigned i;
int ret = 0;
- for (i = 0; i < btree_id_nr_alive(c); i++) {
+ for (unsigned i = 0; i < btree_id_nr_alive(c); i++) {
struct btree_root *r = bch2_btree_id_root(c, i);
if (!r->alive)
@@ -435,186 +450,46 @@ static int read_btree_roots(struct bch_fs *c)
if (btree_id_is_alloc(i) && c->opts.reconstruct_alloc)
continue;
- if (r->error) {
- __fsck_err(c,
- btree_id_is_alloc(i)
- ? FSCK_CAN_IGNORE : 0,
- btree_root_bkey_invalid,
- "invalid btree root %s",
- bch2_btree_id_str(i));
- if (i == BTREE_ID_alloc)
+ if (mustfix_fsck_err_on((ret = r->error),
+ c, btree_root_bkey_invalid,
+ "invalid btree root %s",
+ bch2_btree_id_str(i)) ||
+ mustfix_fsck_err_on((ret = r->error = bch2_btree_root_read(c, i, &r->key, r->level)),
+ c, btree_root_read_error,
+ "error reading btree root %s l=%u: %s",
+ bch2_btree_id_str(i), r->level, bch2_err_str(ret))) {
+ if (btree_id_is_alloc(i)) {
+ c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_allocations);
+ c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_alloc_info);
+ c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_lrus);
+ c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_extents_to_backpointers);
+ c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_alloc_to_lru_refs);
c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info);
- }
+ r->error = 0;
+ } else if (!(c->recovery_passes_explicit & BIT_ULL(BCH_RECOVERY_PASS_scan_for_btree_nodes))) {
+ bch_info(c, "will run btree node scan");
+ c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_scan_for_btree_nodes);
+ c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_topology);
+ }
- ret = bch2_btree_root_read(c, i, &r->key, r->level);
- if (ret) {
- fsck_err(c,
- btree_root_read_error,
- "error reading btree root %s",
- bch2_btree_id_str(i));
- if (btree_id_is_alloc(i))
- c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info);
ret = 0;
+ bch2_btree_lost_data(c, i);
}
}
- for (i = 0; i < BTREE_ID_NR; i++) {
+ for (unsigned i = 0; i < BTREE_ID_NR; i++) {
struct btree_root *r = bch2_btree_id_root(c, i);
- if (!r->b) {
+ if (!r->b && !r->error) {
r->alive = false;
r->level = 0;
- bch2_btree_root_alloc(c, i);
+ bch2_btree_root_alloc_fake(c, i, 0);
}
}
fsck_err:
return ret;
}
-static int bch2_initialize_subvolumes(struct bch_fs *c)
-{
- struct bkey_i_snapshot_tree root_tree;
- struct bkey_i_snapshot root_snapshot;
- struct bkey_i_subvolume root_volume;
- int ret;
-
- bkey_snapshot_tree_init(&root_tree.k_i);
- root_tree.k.p.offset = 1;
- root_tree.v.master_subvol = cpu_to_le32(1);
- root_tree.v.root_snapshot = cpu_to_le32(U32_MAX);
-
- bkey_snapshot_init(&root_snapshot.k_i);
- root_snapshot.k.p.offset = U32_MAX;
- root_snapshot.v.flags = 0;
- root_snapshot.v.parent = 0;
- root_snapshot.v.subvol = cpu_to_le32(BCACHEFS_ROOT_SUBVOL);
- root_snapshot.v.tree = cpu_to_le32(1);
- SET_BCH_SNAPSHOT_SUBVOL(&root_snapshot.v, true);
-
- bkey_subvolume_init(&root_volume.k_i);
- root_volume.k.p.offset = BCACHEFS_ROOT_SUBVOL;
- root_volume.v.flags = 0;
- root_volume.v.snapshot = cpu_to_le32(U32_MAX);
- root_volume.v.inode = cpu_to_le64(BCACHEFS_ROOT_INO);
-
- ret = bch2_btree_insert(c, BTREE_ID_snapshot_trees, &root_tree.k_i, NULL, 0) ?:
- bch2_btree_insert(c, BTREE_ID_snapshots, &root_snapshot.k_i, NULL, 0) ?:
- bch2_btree_insert(c, BTREE_ID_subvolumes, &root_volume.k_i, NULL, 0);
- bch_err_fn(c, ret);
- return ret;
-}
-
-static int __bch2_fs_upgrade_for_subvolumes(struct btree_trans *trans)
-{
- struct btree_iter iter;
- struct bkey_s_c k;
- struct bch_inode_unpacked inode;
- int ret;
-
- k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes,
- SPOS(0, BCACHEFS_ROOT_INO, U32_MAX), 0);
- ret = bkey_err(k);
- if (ret)
- return ret;
-
- if (!bkey_is_inode(k.k)) {
- bch_err(trans->c, "root inode not found");
- ret = -BCH_ERR_ENOENT_inode;
- goto err;
- }
-
- ret = bch2_inode_unpack(k, &inode);
- BUG_ON(ret);
-
- inode.bi_subvol = BCACHEFS_ROOT_SUBVOL;
-
- ret = bch2_inode_write(trans, &iter, &inode);
-err:
- bch2_trans_iter_exit(trans, &iter);
- return ret;
-}
-
-/* set bi_subvol on root inode */
-noinline_for_stack
-static int bch2_fs_upgrade_for_subvolumes(struct bch_fs *c)
-{
- int ret = bch2_trans_do(c, NULL, NULL, BCH_TRANS_COMMIT_lazy_rw,
- __bch2_fs_upgrade_for_subvolumes(trans));
- bch_err_fn(c, ret);
- return ret;
-}
-
-const char * const bch2_recovery_passes[] = {
-#define x(_fn, ...) #_fn,
- BCH_RECOVERY_PASSES()
-#undef x
- NULL
-};
-
-static int bch2_check_allocations(struct bch_fs *c)
-{
- return bch2_gc(c, true, c->opts.norecovery);
-}
-
-static int bch2_set_may_go_rw(struct bch_fs *c)
-{
- struct journal_keys *keys = &c->journal_keys;
-
- /*
- * After we go RW, the journal keys buffer can't be modified (except for
- * setting journal_key->overwritten: it will be accessed by multiple
- * threads
- */
- move_gap(keys, keys->nr);
-
- set_bit(BCH_FS_may_go_rw, &c->flags);
-
- if (keys->nr || c->opts.fsck || !c->sb.clean)
- return bch2_fs_read_write_early(c);
- return 0;
-}
-
-struct recovery_pass_fn {
- int (*fn)(struct bch_fs *);
- unsigned when;
-};
-
-static struct recovery_pass_fn recovery_pass_fns[] = {
-#define x(_fn, _id, _when) { .fn = bch2_##_fn, .when = _when },
- BCH_RECOVERY_PASSES()
-#undef x
-};
-
-u64 bch2_recovery_passes_to_stable(u64 v)
-{
- static const u8 map[] = {
-#define x(n, id, ...) [BCH_RECOVERY_PASS_##n] = BCH_RECOVERY_PASS_STABLE_##n,
- BCH_RECOVERY_PASSES()
-#undef x
- };
-
- u64 ret = 0;
- for (unsigned i = 0; i < ARRAY_SIZE(map); i++)
- if (v & BIT_ULL(i))
- ret |= BIT_ULL(map[i]);
- return ret;
-}
-
-u64 bch2_recovery_passes_from_stable(u64 v)
-{
- static const u8 map[] = {
-#define x(n, id, ...) [BCH_RECOVERY_PASS_STABLE_##n] = BCH_RECOVERY_PASS_##n,
- BCH_RECOVERY_PASSES()
-#undef x
- };
-
- u64 ret = 0;
- for (unsigned i = 0; i < ARRAY_SIZE(map); i++)
- if (v & BIT_ULL(i))
- ret |= BIT_ULL(map[i]);
- return ret;
-}
-
static bool check_version_upgrade(struct bch_fs *c)
{
unsigned latest_version = bcachefs_metadata_version_current;
@@ -687,96 +562,6 @@ static bool check_version_upgrade(struct bch_fs *c)
return false;
}
-u64 bch2_fsck_recovery_passes(void)
-{
- u64 ret = 0;
-
- for (unsigned i = 0; i < ARRAY_SIZE(recovery_pass_fns); i++)
- if (recovery_pass_fns[i].when & PASS_FSCK)
- ret |= BIT_ULL(i);
- return ret;
-}
-
-static bool should_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass)
-{
- struct recovery_pass_fn *p = recovery_pass_fns + pass;
-
- if (c->opts.norecovery && pass > BCH_RECOVERY_PASS_snapshots_read)
- return false;
- if (c->recovery_passes_explicit & BIT_ULL(pass))
- return true;
- if ((p->when & PASS_FSCK) && c->opts.fsck)
- return true;
- if ((p->when & PASS_UNCLEAN) && !c->sb.clean)
- return true;
- if (p->when & PASS_ALWAYS)
- return true;
- return false;
-}
-
-static int bch2_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass)
-{
- struct recovery_pass_fn *p = recovery_pass_fns + pass;
- int ret;
-
- if (!(p->when & PASS_SILENT))
- bch2_print(c, KERN_INFO bch2_log_msg(c, "%s..."),
- bch2_recovery_passes[pass]);
- ret = p->fn(c);
- if (ret)
- return ret;
- if (!(p->when & PASS_SILENT))
- bch2_print(c, KERN_CONT " done\n");
-
- return 0;
-}
-
-static int bch2_run_recovery_passes(struct bch_fs *c)
-{
- int ret = 0;
-
- while (c->curr_recovery_pass < ARRAY_SIZE(recovery_pass_fns)) {
- if (should_run_recovery_pass(c, c->curr_recovery_pass)) {
- unsigned pass = c->curr_recovery_pass;
-
- ret = bch2_run_recovery_pass(c, c->curr_recovery_pass);
- if (bch2_err_matches(ret, BCH_ERR_restart_recovery) ||
- (ret && c->curr_recovery_pass < pass))
- continue;
- if (ret)
- break;
-
- c->recovery_passes_complete |= BIT_ULL(c->curr_recovery_pass);
- }
- c->curr_recovery_pass++;
- c->recovery_pass_done = max(c->recovery_pass_done, c->curr_recovery_pass);
- }
-
- return ret;
-}
-
-int bch2_run_online_recovery_passes(struct bch_fs *c)
-{
- int ret = 0;
-
- for (unsigned i = 0; i < ARRAY_SIZE(recovery_pass_fns); i++) {
- struct recovery_pass_fn *p = recovery_pass_fns + i;
-
- if (!(p->when & PASS_ONLINE))
- continue;
-
- ret = bch2_run_recovery_pass(c, i);
- if (bch2_err_matches(ret, BCH_ERR_restart_recovery)) {
- i = c->curr_recovery_pass;
- continue;
- }
- if (ret)
- break;
- }
-
- return ret;
-}
-
int bch2_fs_recovery(struct bch_fs *c)
{
struct bch_sb_field_clean *clean = NULL;
@@ -809,24 +594,14 @@ int bch2_fs_recovery(struct bch_fs *c)
goto err;
}
- if (c->opts.fsck && c->opts.norecovery) {
- bch_err(c, "cannot select both norecovery and fsck");
- ret = -EINVAL;
- goto err;
- }
+ if (c->opts.norecovery)
+ c->opts.recovery_pass_last = BCH_RECOVERY_PASS_journal_replay - 1;
if (!c->opts.nochanges) {
mutex_lock(&c->sb_lock);
+ struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
bool write_sb = false;
- struct bch_sb_field_ext *ext =
- bch2_sb_field_get_minsize(&c->disk_sb, ext, sizeof(*ext) / sizeof(u64));
- if (!ext) {
- ret = -BCH_ERR_ENOSPC_sb;
- mutex_unlock(&c->sb_lock);
- goto err;
- }
-
if (BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb)) {
ext->recovery_passes_required[0] |=
cpu_to_le64(bch2_recovery_passes_to_stable(BIT_ULL(BCH_RECOVERY_PASS_check_topology)));
@@ -885,7 +660,7 @@ int bch2_fs_recovery(struct bch_fs *c)
goto err;
}
- if (!c->sb.clean || c->opts.fsck || c->opts.keep_journal) {
+ if (!c->sb.clean || c->opts.fsck || c->opts.retain_recovery_info) {
struct genradix_iter iter;
struct journal_replay **i;
@@ -965,7 +740,7 @@ use_clean:
c->journal_replay_seq_end = blacklist_seq - 1;
if (c->opts.reconstruct_alloc)
- do_reconstruct_alloc(c);
+ bch2_reconstruct_alloc(c);
zero_out_btree_mem_ptr(&c->journal_keys);
@@ -1017,6 +792,12 @@ use_clean:
clear_bit(BCH_FS_fsck_running, &c->flags);
+ /* fsync if we fixed errors */
+ if (test_bit(BCH_FS_errors_fixed, &c->flags)) {
+ bch2_journal_flush_all_pins(&c->journal);
+ bch2_journal_meta(&c->journal);
+ }
+
/* If we fixed errors, verify that fs is actually clean now: */
if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) &&
test_bit(BCH_FS_errors_fixed, &c->flags) &&
@@ -1051,6 +832,7 @@ use_clean:
}
mutex_lock(&c->sb_lock);
+ struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
bool write_sb = false;
if (BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb) != le16_to_cpu(c->disk_sb.sb->version)) {
@@ -1064,15 +846,18 @@ use_clean:
write_sb = true;
}
- if (!test_bit(BCH_FS_error, &c->flags)) {
- struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
- if (ext &&
- (!bch2_is_zero(ext->recovery_passes_required, sizeof(ext->recovery_passes_required)) ||
- !bch2_is_zero(ext->errors_silent, sizeof(ext->errors_silent)))) {
- memset(ext->recovery_passes_required, 0, sizeof(ext->recovery_passes_required));
- memset(ext->errors_silent, 0, sizeof(ext->errors_silent));
- write_sb = true;
- }
+ if (!test_bit(BCH_FS_error, &c->flags) &&
+ !bch2_is_zero(ext->errors_silent, sizeof(ext->errors_silent))) {
+ memset(ext->errors_silent, 0, sizeof(ext->errors_silent));
+ write_sb = true;
+ }
+
+ if (c->opts.fsck &&
+ !test_bit(BCH_FS_error, &c->flags) &&
+ c->recovery_pass_done == BCH_RECOVERY_PASS_NR - 1 &&
+ ext->btrees_lost_data) {
+ ext->btrees_lost_data = 0;
+ write_sb = true;
}
if (c->opts.fsck &&
@@ -1113,9 +898,10 @@ use_clean:
out:
bch2_flush_fsck_errs(c);
- if (!c->opts.keep_journal &&
- test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags))
+ if (!c->opts.retain_recovery_info) {
bch2_journal_keys_put_initial(c);
+ bch2_find_btree_nodes_exit(&c->found_btree_nodes);
+ }
kfree(clean);
if (!ret &&
@@ -1141,6 +927,7 @@ int bch2_fs_initialize(struct bch_fs *c)
int ret;
bch_notice(c, "initializing new filesystem");
+ set_bit(BCH_FS_new_fs, &c->flags);
mutex_lock(&c->sb_lock);
c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_extents_above_btree_updates_done);
@@ -1155,11 +942,11 @@ int bch2_fs_initialize(struct bch_fs *c)
}
mutex_unlock(&c->sb_lock);
- c->curr_recovery_pass = ARRAY_SIZE(recovery_pass_fns);
+ c->curr_recovery_pass = BCH_RECOVERY_PASS_NR;
set_bit(BCH_FS_may_go_rw, &c->flags);
for (unsigned i = 0; i < BTREE_ID_NR; i++)
- bch2_btree_root_alloc(c, i);
+ bch2_btree_root_alloc_fake(c, i, 0);
for_each_member_device(c, ca)
bch2_dev_usage_init(ca);
@@ -1230,7 +1017,7 @@ int bch2_fs_initialize(struct bch_fs *c)
if (ret)
goto err;
- c->recovery_pass_done = ARRAY_SIZE(recovery_pass_fns) - 1;
+ c->recovery_pass_done = BCH_RECOVERY_PASS_NR - 1;
if (enabled_qtypes(c)) {
ret = bch2_fs_quota_read(c);
diff --git a/fs/bcachefs/recovery.h b/fs/bcachefs/recovery.h
index 4e9d24719b2e85..4bf818de1f2feb 100644
--- a/fs/bcachefs/recovery.h
+++ b/fs/bcachefs/recovery.h
@@ -2,37 +2,9 @@
#ifndef _BCACHEFS_RECOVERY_H
#define _BCACHEFS_RECOVERY_H
-extern const char * const bch2_recovery_passes[];
+void bch2_btree_lost_data(struct bch_fs *, enum btree_id);
-u64 bch2_recovery_passes_to_stable(u64 v);
-u64 bch2_recovery_passes_from_stable(u64 v);
-
-/*
- * For when we need to rewind recovery passes and run a pass we skipped:
- */
-static inline int bch2_run_explicit_recovery_pass(struct bch_fs *c,
- enum bch_recovery_pass pass)
-{
- if (c->recovery_passes_explicit & BIT_ULL(pass))
- return 0;
-
- bch_info(c, "running explicit recovery pass %s (%u), currently at %s (%u)",
- bch2_recovery_passes[pass], pass,
- bch2_recovery_passes[c->curr_recovery_pass], c->curr_recovery_pass);
-
- c->recovery_passes_explicit |= BIT_ULL(pass);
-
- if (c->curr_recovery_pass >= pass) {
- c->curr_recovery_pass = pass;
- c->recovery_passes_complete &= (1ULL << pass) >> 1;
- return -BCH_ERR_restart_recovery;
- } else {
- return 0;
- }
-}
-
-int bch2_run_online_recovery_passes(struct bch_fs *);
-u64 bch2_fsck_recovery_passes(void);
+int bch2_journal_replay(struct bch_fs *);
int bch2_fs_recovery(struct bch_fs *);
int bch2_fs_initialize(struct bch_fs *);
diff --git a/fs/bcachefs/recovery_passes.c b/fs/bcachefs/recovery_passes.c
new file mode 100644
index 00000000000000..0cec0f7d970352
--- /dev/null
+++ b/fs/bcachefs/recovery_passes.c
@@ -0,0 +1,249 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "alloc_background.h"
+#include "backpointers.h"
+#include "btree_gc.h"
+#include "btree_node_scan.h"
+#include "ec.h"
+#include "fsck.h"
+#include "inode.h"
+#include "journal.h"
+#include "lru.h"
+#include "logged_ops.h"
+#include "rebalance.h"
+#include "recovery.h"
+#include "recovery_passes.h"
+#include "snapshot.h"
+#include "subvolume.h"
+#include "super.h"
+#include "super-io.h"
+
+const char * const bch2_recovery_passes[] = {
+#define x(_fn, ...) #_fn,
+ BCH_RECOVERY_PASSES()
+#undef x
+ NULL
+};
+
+static int bch2_check_allocations(struct bch_fs *c)
+{
+ return bch2_gc(c, true, false);
+}
+
+static int bch2_set_may_go_rw(struct bch_fs *c)
+{
+ struct journal_keys *keys = &c->journal_keys;
+
+ /*
+ * After we go RW, the journal keys buffer can't be modified (except for
+ * setting journal_key->overwritten: it will be accessed by multiple
+ * threads
+ */
+ move_gap(keys, keys->nr);
+
+ set_bit(BCH_FS_may_go_rw, &c->flags);
+
+ if (keys->nr || c->opts.fsck || !c->sb.clean || c->recovery_passes_explicit)
+ return bch2_fs_read_write_early(c);
+ return 0;
+}
+
+struct recovery_pass_fn {
+ int (*fn)(struct bch_fs *);
+ unsigned when;
+};
+
+static struct recovery_pass_fn recovery_pass_fns[] = {
+#define x(_fn, _id, _when) { .fn = bch2_##_fn, .when = _when },
+ BCH_RECOVERY_PASSES()
+#undef x
+};
+
+static const u8 passes_to_stable_map[] = {
+#define x(n, id, ...) [BCH_RECOVERY_PASS_##n] = BCH_RECOVERY_PASS_STABLE_##n,
+ BCH_RECOVERY_PASSES()
+#undef x
+};
+
+static enum bch_recovery_pass_stable bch2_recovery_pass_to_stable(enum bch_recovery_pass pass)
+{
+ return passes_to_stable_map[pass];
+}
+
+u64 bch2_recovery_passes_to_stable(u64 v)
+{
+ u64 ret = 0;
+ for (unsigned i = 0; i < ARRAY_SIZE(passes_to_stable_map); i++)
+ if (v & BIT_ULL(i))
+ ret |= BIT_ULL(passes_to_stable_map[i]);
+ return ret;
+}
+
+u64 bch2_recovery_passes_from_stable(u64 v)
+{
+ static const u8 map[] = {
+#define x(n, id, ...) [BCH_RECOVERY_PASS_STABLE_##n] = BCH_RECOVERY_PASS_##n,
+ BCH_RECOVERY_PASSES()
+#undef x
+ };
+
+ u64 ret = 0;
+ for (unsigned i = 0; i < ARRAY_SIZE(map); i++)
+ if (v & BIT_ULL(i))
+ ret |= BIT_ULL(map[i]);
+ return ret;
+}
+
+/*
+ * For when we need to rewind recovery passes and run a pass we skipped:
+ */
+int bch2_run_explicit_recovery_pass(struct bch_fs *c,
+ enum bch_recovery_pass pass)
+{
+ if (c->recovery_passes_explicit & BIT_ULL(pass))
+ return 0;
+
+ bch_info(c, "running explicit recovery pass %s (%u), currently at %s (%u)",
+ bch2_recovery_passes[pass], pass,
+ bch2_recovery_passes[c->curr_recovery_pass], c->curr_recovery_pass);
+
+ c->recovery_passes_explicit |= BIT_ULL(pass);
+
+ if (c->curr_recovery_pass >= pass) {
+ c->curr_recovery_pass = pass;
+ c->recovery_passes_complete &= (1ULL << pass) >> 1;
+ return -BCH_ERR_restart_recovery;
+ } else {
+ return 0;
+ }
+}
+
+int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *c,
+ enum bch_recovery_pass pass)
+{
+ enum bch_recovery_pass_stable s = bch2_recovery_pass_to_stable(pass);
+
+ mutex_lock(&c->sb_lock);
+ struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
+
+ if (!test_bit_le64(s, ext->recovery_passes_required)) {
+ __set_bit_le64(s, ext->recovery_passes_required);
+ bch2_write_super(c);
+ }
+ mutex_unlock(&c->sb_lock);
+
+ return bch2_run_explicit_recovery_pass(c, pass);
+}
+
+static void bch2_clear_recovery_pass_required(struct bch_fs *c,
+ enum bch_recovery_pass pass)
+{
+ enum bch_recovery_pass_stable s = bch2_recovery_pass_to_stable(pass);
+
+ mutex_lock(&c->sb_lock);
+ struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
+
+ if (test_bit_le64(s, ext->recovery_passes_required)) {
+ __clear_bit_le64(s, ext->recovery_passes_required);
+ bch2_write_super(c);
+ }
+ mutex_unlock(&c->sb_lock);
+}
+
+u64 bch2_fsck_recovery_passes(void)
+{
+ u64 ret = 0;
+
+ for (unsigned i = 0; i < ARRAY_SIZE(recovery_pass_fns); i++)
+ if (recovery_pass_fns[i].when & PASS_FSCK)
+ ret |= BIT_ULL(i);
+ return ret;
+}
+
+static bool should_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass)
+{
+ struct recovery_pass_fn *p = recovery_pass_fns + pass;
+
+ if (c->recovery_passes_explicit & BIT_ULL(pass))
+ return true;
+ if ((p->when & PASS_FSCK) && c->opts.fsck)
+ return true;
+ if ((p->when & PASS_UNCLEAN) && !c->sb.clean)
+ return true;
+ if (p->when & PASS_ALWAYS)
+ return true;
+ return false;
+}
+
+static int bch2_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass)
+{
+ struct recovery_pass_fn *p = recovery_pass_fns + pass;
+ int ret;
+
+ if (!(p->when & PASS_SILENT))
+ bch2_print(c, KERN_INFO bch2_log_msg(c, "%s..."),
+ bch2_recovery_passes[pass]);
+ ret = p->fn(c);
+ if (ret)
+ return ret;
+ if (!(p->when & PASS_SILENT))
+ bch2_print(c, KERN_CONT " done\n");
+
+ return 0;
+}
+
+int bch2_run_online_recovery_passes(struct bch_fs *c)
+{
+ int ret = 0;
+
+ for (unsigned i = 0; i < ARRAY_SIZE(recovery_pass_fns); i++) {
+ struct recovery_pass_fn *p = recovery_pass_fns + i;
+
+ if (!(p->when & PASS_ONLINE))
+ continue;
+
+ ret = bch2_run_recovery_pass(c, i);
+ if (bch2_err_matches(ret, BCH_ERR_restart_recovery)) {
+ i = c->curr_recovery_pass;
+ continue;
+ }
+ if (ret)
+ break;
+ }
+
+ return ret;
+}
+
+int bch2_run_recovery_passes(struct bch_fs *c)
+{
+ int ret = 0;
+
+ while (c->curr_recovery_pass < ARRAY_SIZE(recovery_pass_fns)) {
+ if (c->opts.recovery_pass_last &&
+ c->curr_recovery_pass > c->opts.recovery_pass_last)
+ break;
+
+ if (should_run_recovery_pass(c, c->curr_recovery_pass)) {
+ unsigned pass = c->curr_recovery_pass;
+
+ ret = bch2_run_recovery_pass(c, c->curr_recovery_pass);
+ if (bch2_err_matches(ret, BCH_ERR_restart_recovery) ||
+ (ret && c->curr_recovery_pass < pass))
+ continue;
+ if (ret)
+ break;
+
+ c->recovery_passes_complete |= BIT_ULL(c->curr_recovery_pass);
+ }
+
+ c->recovery_pass_done = max(c->recovery_pass_done, c->curr_recovery_pass);
+
+ if (!test_bit(BCH_FS_error, &c->flags))
+ bch2_clear_recovery_pass_required(c, c->curr_recovery_pass);
+
+ c->curr_recovery_pass++;
+ }
+
+ return ret;
+}
diff --git a/fs/bcachefs/recovery_passes.h b/fs/bcachefs/recovery_passes.h
new file mode 100644
index 00000000000000..99b464e127b80b
--- /dev/null
+++ b/fs/bcachefs/recovery_passes.h
@@ -0,0 +1,17 @@
+#ifndef _BCACHEFS_RECOVERY_PASSES_H
+#define _BCACHEFS_RECOVERY_PASSES_H
+
+extern const char * const bch2_recovery_passes[];
+
+u64 bch2_recovery_passes_to_stable(u64 v);
+u64 bch2_recovery_passes_from_stable(u64 v);
+
+u64 bch2_fsck_recovery_passes(void);
+
+int bch2_run_explicit_recovery_pass(struct bch_fs *, enum bch_recovery_pass);
+int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *, enum bch_recovery_pass);
+
+int bch2_run_online_recovery_passes(struct bch_fs *);
+int bch2_run_recovery_passes(struct bch_fs *);
+
+#endif /* _BCACHEFS_RECOVERY_PASSES_H */
diff --git a/fs/bcachefs/recovery_types.h b/fs/bcachefs/recovery_passes_types.h
index 4959e95e7c7465..773aea9a0080fd 100644
--- a/fs/bcachefs/recovery_types.h
+++ b/fs/bcachefs/recovery_passes_types.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _BCACHEFS_RECOVERY_TYPES_H
-#define _BCACHEFS_RECOVERY_TYPES_H
+#ifndef _BCACHEFS_RECOVERY_PASSES_TYPES_H
+#define _BCACHEFS_RECOVERY_PASSES_TYPES_H
#define PASS_SILENT BIT(0)
#define PASS_FSCK BIT(1)
@@ -13,6 +13,7 @@
* must never change:
*/
#define BCH_RECOVERY_PASSES() \
+ x(scan_for_btree_nodes, 37, 0) \
x(check_topology, 4, 0) \
x(alloc_read, 0, PASS_ALWAYS) \
x(stripes_read, 1, PASS_ALWAYS) \
@@ -31,13 +32,13 @@
x(check_alloc_to_lru_refs, 15, PASS_ONLINE|PASS_FSCK) \
x(fs_freespace_init, 16, PASS_ALWAYS|PASS_SILENT) \
x(bucket_gens_init, 17, 0) \
+ x(reconstruct_snapshots, 38, 0) \
x(check_snapshot_trees, 18, PASS_ONLINE|PASS_FSCK) \
x(check_snapshots, 19, PASS_ONLINE|PASS_FSCK) \
x(check_subvols, 20, PASS_ONLINE|PASS_FSCK) \
x(check_subvol_children, 35, PASS_ONLINE|PASS_FSCK) \
x(delete_dead_snapshots, 21, PASS_ONLINE|PASS_FSCK) \
x(fs_upgrade_for_subvolumes, 22, 0) \
- x(resume_logged_ops, 23, PASS_ALWAYS) \
x(check_inodes, 24, PASS_FSCK) \
x(check_extents, 25, PASS_FSCK) \
x(check_indirect_extents, 26, PASS_FSCK) \
@@ -47,6 +48,7 @@
x(check_subvolume_structure, 36, PASS_ONLINE|PASS_FSCK) \
x(check_directory_structure, 30, PASS_ONLINE|PASS_FSCK) \
x(check_nlinks, 31, PASS_FSCK) \
+ x(resume_logged_ops, 23, PASS_ALWAYS) \
x(delete_dead_inodes, 32, PASS_FSCK|PASS_UNCLEAN) \
x(fix_reflink_p, 33, 0) \
x(set_fs_needs_rebalance, 34, 0) \
@@ -56,6 +58,7 @@ enum bch_recovery_pass {
#define x(n, id, when) BCH_RECOVERY_PASS_##n,
BCH_RECOVERY_PASSES()
#undef x
+ BCH_RECOVERY_PASS_NR
};
/* But we also need stable identifiers that can be used in the superblock */
@@ -65,4 +68,4 @@ enum bch_recovery_pass_stable {
#undef x
};
-#endif /* _BCACHEFS_RECOVERY_TYPES_H */
+#endif /* _BCACHEFS_RECOVERY_PASSES_TYPES_H */
diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c
index c47c66c2b394dc..ff7864731a073d 100644
--- a/fs/bcachefs/reflink.c
+++ b/fs/bcachefs/reflink.c
@@ -185,8 +185,7 @@ not_found:
} else {
bkey_error_init(update);
update->k.p = p.k->p;
- update->k.p.offset = next_idx;
- update->k.size = next_idx - *idx;
+ update->k.size = p.k->size;
set_bkey_val_u64s(&update->k, 0);
}
diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c
index cc2672c120312c..678b9c20e2514b 100644
--- a/fs/bcachefs/replicas.c
+++ b/fs/bcachefs/replicas.c
@@ -6,12 +6,15 @@
#include "replicas.h"
#include "super-io.h"
+#include <linux/sort.h>
+
static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *,
struct bch_replicas_cpu *);
/* Some (buggy!) compilers don't allow memcmp to be passed as a pointer */
-static int bch2_memcmp(const void *l, const void *r, size_t size)
+static int bch2_memcmp(const void *l, const void *r, const void *priv)
{
+ size_t size = (size_t) priv;
return memcmp(l, r, size);
}
@@ -39,7 +42,8 @@ void bch2_replicas_entry_sort(struct bch_replicas_entry_v1 *e)
static void bch2_cpu_replicas_sort(struct bch_replicas_cpu *r)
{
- eytzinger0_sort(r->entries, r->nr, r->entry_size, bch2_memcmp, NULL);
+ eytzinger0_sort_r(r->entries, r->nr, r->entry_size,
+ bch2_memcmp, NULL, (void *)(size_t)r->entry_size);
}
static void bch2_replicas_entry_v0_to_text(struct printbuf *out,
@@ -228,7 +232,7 @@ static inline int __replicas_entry_idx(struct bch_replicas_cpu *r,
verify_replicas_entry(search);
-#define entry_cmp(_l, _r, size) memcmp(_l, _r, entry_size)
+#define entry_cmp(_l, _r) memcmp(_l, _r, entry_size)
idx = eytzinger0_find(r->entries, r->nr, r->entry_size,
entry_cmp, search);
#undef entry_cmp
@@ -824,10 +828,11 @@ static int bch2_cpu_replicas_validate(struct bch_replicas_cpu *cpu_r,
{
unsigned i;
- sort_cmp_size(cpu_r->entries,
- cpu_r->nr,
- cpu_r->entry_size,
- bch2_memcmp, NULL);
+ sort_r(cpu_r->entries,
+ cpu_r->nr,
+ cpu_r->entry_size,
+ bch2_memcmp, NULL,
+ (void *)(size_t)cpu_r->entry_size);
for (i = 0; i < cpu_r->nr; i++) {
struct bch_replicas_entry_v1 *e =
diff --git a/fs/bcachefs/sb-clean.c b/fs/bcachefs/sb-clean.c
index 5980ba2563fe9f..35ca3f138de6fa 100644
--- a/fs/bcachefs/sb-clean.c
+++ b/fs/bcachefs/sb-clean.c
@@ -29,6 +29,14 @@ int bch2_sb_clean_validate_late(struct bch_fs *c, struct bch_sb_field_clean *cle
for (entry = clean->start;
entry < (struct jset_entry *) vstruct_end(&clean->field);
entry = vstruct_next(entry)) {
+ if (vstruct_end(entry) > vstruct_end(&clean->field)) {
+ bch_err(c, "journal entry (u64s %u) overran end of superblock clean section (u64s %u) by %zu",
+ le16_to_cpu(entry->u64s), le32_to_cpu(clean->field.u64s),
+ (u64 *) vstruct_end(entry) - (u64 *) vstruct_end(&clean->field));
+ bch2_sb_error_count(c, BCH_FSCK_ERR_sb_clean_entry_overrun);
+ return -BCH_ERR_fsck_repair_unimplemented;
+ }
+
ret = bch2_journal_entry_validate(c, NULL, entry,
le16_to_cpu(c->disk_sb.sb->version),
BCH_SB_BIG_ENDIAN(c->disk_sb.sb),
diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c
index e4396cb0bacb03..a98ef940b7a328 100644
--- a/fs/bcachefs/sb-downgrade.c
+++ b/fs/bcachefs/sb-downgrade.c
@@ -7,7 +7,7 @@
#include "bcachefs.h"
#include "darray.h"
-#include "recovery.h"
+#include "recovery_passes.h"
#include "sb-downgrade.h"
#include "sb-errors.h"
#include "super-io.h"
@@ -51,7 +51,10 @@
BCH_FSCK_ERR_subvol_fs_path_parent_wrong) \
x(btree_subvolume_children, \
BIT_ULL(BCH_RECOVERY_PASS_check_subvols), \
- BCH_FSCK_ERR_subvol_children_not_set)
+ BCH_FSCK_ERR_subvol_children_not_set) \
+ x(mi_btree_bitmap, \
+ BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \
+ BCH_FSCK_ERR_btree_bitmap_not_marked)
#define DOWNGRADE_TABLE()
diff --git a/fs/bcachefs/sb-errors_types.h b/fs/bcachefs/sb-errors_types.h
index 5178bf579f7c53..06c7a644f4a442 100644
--- a/fs/bcachefs/sb-errors_types.h
+++ b/fs/bcachefs/sb-errors_types.h
@@ -130,7 +130,7 @@
x(bucket_gens_nonzero_for_invalid_buckets, 122) \
x(need_discard_freespace_key_to_invalid_dev_bucket, 123) \
x(need_discard_freespace_key_bad, 124) \
- x(backpointer_pos_wrong, 125) \
+ x(backpointer_bucket_offset_wrong, 125) \
x(backpointer_to_missing_device, 126) \
x(backpointer_to_missing_alloc, 127) \
x(backpointer_to_missing_ptr, 128) \
@@ -265,7 +265,14 @@
x(subvol_children_bad, 257) \
x(subvol_loop, 258) \
x(subvol_unreachable, 259) \
- x(btree_node_bkey_bad_u64s, 260)
+ x(btree_node_bkey_bad_u64s, 260) \
+ x(btree_node_topology_empty_interior_node, 261) \
+ x(btree_ptr_v2_min_key_bad, 262) \
+ x(btree_root_unreadable_and_scan_found_nothing, 263) \
+ x(snapshot_node_missing, 264) \
+ x(dup_backpointer_to_bad_csum_extent, 265) \
+ x(btree_bitmap_not_marked, 266) \
+ x(sb_clean_entry_overrun, 267)
enum bch_sb_error_id {
#define x(t, n) BCH_FSCK_ERR_##t = n,
diff --git a/fs/bcachefs/sb-members.c b/fs/bcachefs/sb-members.c
index eff5ce18c69c06..5b8e621ac5eb57 100644
--- a/fs/bcachefs/sb-members.c
+++ b/fs/bcachefs/sb-members.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
+#include "btree_cache.h"
#include "disk_groups.h"
#include "opts.h"
#include "replicas.h"
@@ -426,3 +427,55 @@ void bch2_dev_errors_reset(struct bch_dev *ca)
bch2_write_super(c);
mutex_unlock(&c->sb_lock);
}
+
+/*
+ * Per member "range has btree nodes" bitmap:
+ *
+ * This is so that if we ever have to run the btree node scan to repair we don't
+ * have to scan full devices:
+ */
+
+bool bch2_dev_btree_bitmap_marked(struct bch_fs *c, struct bkey_s_c k)
+{
+ bkey_for_each_ptr(bch2_bkey_ptrs_c(k), ptr)
+ if (!bch2_dev_btree_bitmap_marked_sectors(bch_dev_bkey_exists(c, ptr->dev),
+ ptr->offset, btree_sectors(c)))
+ return false;
+ return true;
+}
+
+static void __bch2_dev_btree_bitmap_mark(struct bch_sb_field_members_v2 *mi, unsigned dev,
+ u64 start, unsigned sectors)
+{
+ struct bch_member *m = __bch2_members_v2_get_mut(mi, dev);
+ u64 bitmap = le64_to_cpu(m->btree_allocated_bitmap);
+
+ u64 end = start + sectors;
+
+ int resize = ilog2(roundup_pow_of_two(end)) - (m->btree_bitmap_shift + 6);
+ if (resize > 0) {
+ u64 new_bitmap = 0;
+
+ for (unsigned i = 0; i < 64; i++)
+ if (bitmap & BIT_ULL(i))
+ new_bitmap |= BIT_ULL(i >> resize);
+ bitmap = new_bitmap;
+ m->btree_bitmap_shift += resize;
+ }
+
+ for (unsigned bit = start >> m->btree_bitmap_shift;
+ (u64) bit << m->btree_bitmap_shift < end;
+ bit++)
+ bitmap |= BIT_ULL(bit);
+
+ m->btree_allocated_bitmap = cpu_to_le64(bitmap);
+}
+
+void bch2_dev_btree_bitmap_mark(struct bch_fs *c, struct bkey_s_c k)
+{
+ lockdep_assert_held(&c->sb_lock);
+
+ struct bch_sb_field_members_v2 *mi = bch2_sb_field_get(c->disk_sb.sb, members_v2);
+ bkey_for_each_ptr(bch2_bkey_ptrs_c(k), ptr)
+ __bch2_dev_btree_bitmap_mark(mi, ptr->dev, ptr->offset, btree_sectors(c));
+}
diff --git a/fs/bcachefs/sb-members.h b/fs/bcachefs/sb-members.h
index be0a941832715a..5efa64eca5f85a 100644
--- a/fs/bcachefs/sb-members.h
+++ b/fs/bcachefs/sb-members.h
@@ -3,6 +3,7 @@
#define _BCACHEFS_SB_MEMBERS_H
#include "darray.h"
+#include "bkey_types.h"
extern char * const bch2_member_error_strs[];
@@ -220,6 +221,8 @@ static inline struct bch_member_cpu bch2_mi_to_cpu(struct bch_member *mi)
: 1,
.freespace_initialized = BCH_MEMBER_FREESPACE_INITIALIZED(mi),
.valid = bch2_member_exists(mi),
+ .btree_bitmap_shift = mi->btree_bitmap_shift,
+ .btree_allocated_bitmap = le64_to_cpu(mi->btree_allocated_bitmap),
};
}
@@ -228,4 +231,22 @@ void bch2_sb_members_from_cpu(struct bch_fs *);
void bch2_dev_io_errors_to_text(struct printbuf *, struct bch_dev *);
void bch2_dev_errors_reset(struct bch_dev *);
+static inline bool bch2_dev_btree_bitmap_marked_sectors(struct bch_dev *ca, u64 start, unsigned sectors)
+{
+ u64 end = start + sectors;
+
+ if (end > 64ULL << ca->mi.btree_bitmap_shift)
+ return false;
+
+ for (unsigned bit = start >> ca->mi.btree_bitmap_shift;
+ (u64) bit << ca->mi.btree_bitmap_shift < end;
+ bit++)
+ if (!(ca->mi.btree_allocated_bitmap & BIT_ULL(bit)))
+ return false;
+ return true;
+}
+
+bool bch2_dev_btree_bitmap_marked(struct bch_fs *, struct bkey_s_c);
+void bch2_dev_btree_bitmap_mark(struct bch_fs *, struct bkey_s_c);
+
#endif /* _BCACHEFS_SB_MEMBERS_H */
diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c
index 39debe814bf392..544322d5c25170 100644
--- a/fs/bcachefs/snapshot.c
+++ b/fs/bcachefs/snapshot.c
@@ -8,6 +8,7 @@
#include "errcode.h"
#include "error.h"
#include "fs.h"
+#include "recovery_passes.h"
#include "snapshot.h"
#include <linux/random.h>
@@ -93,8 +94,10 @@ static int bch2_snapshot_tree_create(struct btree_trans *trans,
static bool __bch2_snapshot_is_ancestor_early(struct snapshot_table *t, u32 id, u32 ancestor)
{
- while (id && id < ancestor)
- id = __snapshot_t(t, id)->parent;
+ while (id && id < ancestor) {
+ const struct snapshot_t *s = __snapshot_t(t, id);
+ id = s ? s->parent : 0;
+ }
return id == ancestor;
}
@@ -110,6 +113,8 @@ static bool bch2_snapshot_is_ancestor_early(struct bch_fs *c, u32 id, u32 ancest
static inline u32 get_ancestor_below(struct snapshot_table *t, u32 id, u32 ancestor)
{
const struct snapshot_t *s = __snapshot_t(t, id);
+ if (!s)
+ return 0;
if (s->skip[2] <= ancestor)
return s->skip[2];
@@ -120,6 +125,15 @@ static inline u32 get_ancestor_below(struct snapshot_table *t, u32 id, u32 ances
return s->parent;
}
+static bool test_ancestor_bitmap(struct snapshot_table *t, u32 id, u32 ancestor)
+{
+ const struct snapshot_t *s = __snapshot_t(t, id);
+ if (!s)
+ return false;
+
+ return test_bit(ancestor - id - 1, s->is_ancestor);
+}
+
bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor)
{
bool ret;
@@ -127,7 +141,7 @@ bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor)
rcu_read_lock();
struct snapshot_table *t = rcu_dereference(c->snapshots);
- if (unlikely(c->recovery_pass_done <= BCH_RECOVERY_PASS_check_snapshots)) {
+ if (unlikely(c->recovery_pass_done < BCH_RECOVERY_PASS_check_snapshots)) {
ret = __bch2_snapshot_is_ancestor_early(t, id, ancestor);
goto out;
}
@@ -135,13 +149,11 @@ bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor)
while (id && id < ancestor - IS_ANCESTOR_BITMAP)
id = get_ancestor_below(t, id, ancestor);
- if (id && id < ancestor) {
- ret = test_bit(ancestor - id - 1, __snapshot_t(t, id)->is_ancestor);
+ ret = id && id < ancestor
+ ? test_ancestor_bitmap(t, id, ancestor)
+ : id == ancestor;
- EBUG_ON(ret != __bch2_snapshot_is_ancestor_early(t, id, ancestor));
- } else {
- ret = id == ancestor;
- }
+ EBUG_ON(ret != __bch2_snapshot_is_ancestor_early(t, id, ancestor));
out:
rcu_read_unlock();
@@ -151,36 +163,39 @@ out:
static noinline struct snapshot_t *__snapshot_t_mut(struct bch_fs *c, u32 id)
{
size_t idx = U32_MAX - id;
- size_t new_size;
struct snapshot_table *new, *old;
- new_size = max(16UL, roundup_pow_of_two(idx + 1));
+ size_t new_bytes = kmalloc_size_roundup(struct_size(new, s, idx + 1));
+ size_t new_size = (new_bytes - sizeof(*new)) / sizeof(new->s[0]);
- new = kvzalloc(struct_size(new, s, new_size), GFP_KERNEL);
+ new = kvzalloc(new_bytes, GFP_KERNEL);
if (!new)
return NULL;
+ new->nr = new_size;
+
old = rcu_dereference_protected(c->snapshots, true);
if (old)
- memcpy(new->s,
- rcu_dereference_protected(c->snapshots, true)->s,
- sizeof(new->s[0]) * c->snapshot_table_size);
+ memcpy(new->s, old->s, sizeof(old->s[0]) * old->nr);
rcu_assign_pointer(c->snapshots, new);
- c->snapshot_table_size = new_size;
- kvfree_rcu_mightsleep(old);
+ kvfree_rcu(old, rcu);
- return &rcu_dereference_protected(c->snapshots, true)->s[idx];
+ return &rcu_dereference_protected(c->snapshots,
+ lockdep_is_held(&c->snapshot_table_lock))->s[idx];
}
static inline struct snapshot_t *snapshot_t_mut(struct bch_fs *c, u32 id)
{
size_t idx = U32_MAX - id;
+ struct snapshot_table *table =
+ rcu_dereference_protected(c->snapshots,
+ lockdep_is_held(&c->snapshot_table_lock));
lockdep_assert_held(&c->snapshot_table_lock);
- if (likely(idx < c->snapshot_table_size))
- return &rcu_dereference_protected(c->snapshots, true)->s[idx];
+ if (likely(table && idx < table->nr))
+ return &table->s[idx];
return __snapshot_t_mut(c, id);
}
@@ -567,6 +582,13 @@ static int check_snapshot_tree(struct btree_trans *trans,
u32 subvol_id;
ret = bch2_snapshot_tree_master_subvol(trans, root_id, &subvol_id);
+ bch_err_fn(c, ret);
+
+ if (bch2_err_matches(ret, ENOENT)) { /* nothing to be done here */
+ ret = 0;
+ goto err;
+ }
+
if (ret)
goto err;
@@ -724,7 +746,6 @@ static int check_snapshot(struct btree_trans *trans,
u32 parent_id = bch2_snapshot_parent_early(c, k.k->p.offset);
u32 real_depth;
struct printbuf buf = PRINTBUF;
- bool should_have_subvol;
u32 i, id;
int ret = 0;
@@ -770,7 +791,7 @@ static int check_snapshot(struct btree_trans *trans,
}
}
- should_have_subvol = BCH_SNAPSHOT_SUBVOL(&s) &&
+ bool should_have_subvol = BCH_SNAPSHOT_SUBVOL(&s) &&
!BCH_SNAPSHOT_DELETED(&s);
if (should_have_subvol) {
@@ -872,6 +893,154 @@ int bch2_check_snapshots(struct bch_fs *c)
return ret;
}
+static int check_snapshot_exists(struct btree_trans *trans, u32 id)
+{
+ struct bch_fs *c = trans->c;
+
+ if (bch2_snapshot_equiv(c, id))
+ return 0;
+
+ u32 tree_id;
+ int ret = bch2_snapshot_tree_create(trans, id, 0, &tree_id);
+ if (ret)
+ return ret;
+
+ struct bkey_i_snapshot *snapshot = bch2_trans_kmalloc(trans, sizeof(*snapshot));
+ ret = PTR_ERR_OR_ZERO(snapshot);
+ if (ret)
+ return ret;
+
+ bkey_snapshot_init(&snapshot->k_i);
+ snapshot->k.p = POS(0, id);
+ snapshot->v.tree = cpu_to_le32(tree_id);
+ snapshot->v.btime.lo = cpu_to_le64(bch2_current_time(c));
+
+ return bch2_btree_insert_trans(trans, BTREE_ID_snapshots, &snapshot->k_i, 0) ?:
+ bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0,
+ bkey_s_c_null, bkey_i_to_s(&snapshot->k_i), 0) ?:
+ bch2_snapshot_set_equiv(trans, bkey_i_to_s_c(&snapshot->k_i));
+}
+
+/* Figure out which snapshot nodes belong in the same tree: */
+struct snapshot_tree_reconstruct {
+ enum btree_id btree;
+ struct bpos cur_pos;
+ snapshot_id_list cur_ids;
+ DARRAY(snapshot_id_list) trees;
+};
+
+static void snapshot_tree_reconstruct_exit(struct snapshot_tree_reconstruct *r)
+{
+ darray_for_each(r->trees, i)
+ darray_exit(i);
+ darray_exit(&r->trees);
+ darray_exit(&r->cur_ids);
+}
+
+static inline bool same_snapshot(struct snapshot_tree_reconstruct *r, struct bpos pos)
+{
+ return r->btree == BTREE_ID_inodes
+ ? r->cur_pos.offset == pos.offset
+ : r->cur_pos.inode == pos.inode;
+}
+
+static inline bool snapshot_id_lists_have_common(snapshot_id_list *l, snapshot_id_list *r)
+{
+ darray_for_each(*l, i)
+ if (snapshot_list_has_id(r, *i))
+ return true;
+ return false;
+}
+
+static void snapshot_id_list_to_text(struct printbuf *out, snapshot_id_list *s)
+{
+ bool first = true;
+ darray_for_each(*s, i) {
+ if (!first)
+ prt_char(out, ' ');
+ first = false;
+ prt_printf(out, "%u", *i);
+ }
+}
+
+static int snapshot_tree_reconstruct_next(struct bch_fs *c, struct snapshot_tree_reconstruct *r)
+{
+ if (r->cur_ids.nr) {
+ darray_for_each(r->trees, i)
+ if (snapshot_id_lists_have_common(i, &r->cur_ids)) {
+ int ret = snapshot_list_merge(c, i, &r->cur_ids);
+ if (ret)
+ return ret;
+ goto out;
+ }
+ darray_push(&r->trees, r->cur_ids);
+ darray_init(&r->cur_ids);
+ }
+out:
+ r->cur_ids.nr = 0;
+ return 0;
+}
+
+static int get_snapshot_trees(struct bch_fs *c, struct snapshot_tree_reconstruct *r, struct bpos pos)
+{
+ if (!same_snapshot(r, pos))
+ snapshot_tree_reconstruct_next(c, r);
+ r->cur_pos = pos;
+ return snapshot_list_add_nodup(c, &r->cur_ids, pos.snapshot);
+}
+
+int bch2_reconstruct_snapshots(struct bch_fs *c)
+{
+ struct btree_trans *trans = bch2_trans_get(c);
+ struct printbuf buf = PRINTBUF;
+ struct snapshot_tree_reconstruct r = {};
+ int ret = 0;
+
+ for (unsigned btree = 0; btree < BTREE_ID_NR; btree++) {
+ if (btree_type_has_snapshots(btree)) {
+ r.btree = btree;
+
+ ret = for_each_btree_key(trans, iter, btree, POS_MIN,
+ BTREE_ITER_ALL_SNAPSHOTS|BTREE_ITER_PREFETCH, k, ({
+ get_snapshot_trees(c, &r, k.k->p);
+ }));
+ if (ret)
+ goto err;
+
+ snapshot_tree_reconstruct_next(c, &r);
+ }
+ }
+
+ darray_for_each(r.trees, t) {
+ printbuf_reset(&buf);
+ snapshot_id_list_to_text(&buf, t);
+
+ darray_for_each(*t, id) {
+ if (fsck_err_on(!bch2_snapshot_equiv(c, *id),
+ c, snapshot_node_missing,
+ "snapshot node %u from tree %s missing", *id, buf.buf)) {
+ if (t->nr > 1) {
+ bch_err(c, "cannot reconstruct snapshot trees with multiple nodes");
+ ret = -BCH_ERR_fsck_repair_unimplemented;
+ goto err;
+ }
+
+ ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
+ check_snapshot_exists(trans, *id));
+ if (ret)
+ goto err;
+ }
+ }
+ }
+fsck_err:
+err:
+ bch2_trans_put(trans);
+ snapshot_tree_reconstruct_exit(&r);
+ printbuf_exit(&buf);
+ bch_err_fn(c, ret);
+ return ret;
+}
+
/*
* Mark a snapshot as deleted, for future cleanup:
*/
@@ -1682,6 +1851,20 @@ int bch2_snapshots_read(struct bch_fs *c)
POS_MIN, 0, k,
(set_is_ancestor_bitmap(c, k.k->p.offset), 0)));
bch_err_fn(c, ret);
+
+ /*
+ * It's important that we check if we need to reconstruct snapshots
+ * before going RW, so we mark that pass as required in the superblock -
+ * otherwise, we could end up deleting keys with missing snapshot nodes
+ * instead
+ */
+ BUG_ON(!test_bit(BCH_FS_new_fs, &c->flags) &&
+ test_bit(BCH_FS_may_go_rw, &c->flags));
+
+ if (bch2_err_matches(ret, EIO) ||
+ (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_snapshots)))
+ ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_reconstruct_snapshots);
+
return ret;
}
diff --git a/fs/bcachefs/snapshot.h b/fs/bcachefs/snapshot.h
index 7c66ffc06385dd..b7d2fed37c4f31 100644
--- a/fs/bcachefs/snapshot.h
+++ b/fs/bcachefs/snapshot.h
@@ -33,7 +33,11 @@ int bch2_mark_snapshot(struct btree_trans *, enum btree_id, unsigned,
static inline struct snapshot_t *__snapshot_t(struct snapshot_table *t, u32 id)
{
- return &t->s[U32_MAX - id];
+ u32 idx = U32_MAX - id;
+
+ return likely(t && idx < t->nr)
+ ? &t->s[idx]
+ : NULL;
}
static inline const struct snapshot_t *snapshot_t(struct bch_fs *c, u32 id)
@@ -44,7 +48,8 @@ static inline const struct snapshot_t *snapshot_t(struct bch_fs *c, u32 id)
static inline u32 bch2_snapshot_tree(struct bch_fs *c, u32 id)
{
rcu_read_lock();
- id = snapshot_t(c, id)->tree;
+ const struct snapshot_t *s = snapshot_t(c, id);
+ id = s ? s->tree : 0;
rcu_read_unlock();
return id;
@@ -52,7 +57,8 @@ static inline u32 bch2_snapshot_tree(struct bch_fs *c, u32 id)
static inline u32 __bch2_snapshot_parent_early(struct bch_fs *c, u32 id)
{
- return snapshot_t(c, id)->parent;
+ const struct snapshot_t *s = snapshot_t(c, id);
+ return s ? s->parent : 0;
}
static inline u32 bch2_snapshot_parent_early(struct bch_fs *c, u32 id)
@@ -66,19 +72,19 @@ static inline u32 bch2_snapshot_parent_early(struct bch_fs *c, u32 id)
static inline u32 __bch2_snapshot_parent(struct bch_fs *c, u32 id)
{
-#ifdef CONFIG_BCACHEFS_DEBUG
- u32 parent = snapshot_t(c, id)->parent;
+ const struct snapshot_t *s = snapshot_t(c, id);
+ if (!s)
+ return 0;
- if (parent &&
- snapshot_t(c, id)->depth != snapshot_t(c, parent)->depth + 1)
+ u32 parent = s->parent;
+ if (IS_ENABLED(CONFIG_BCACHEFS_DEBU) &&
+ parent &&
+ s->depth != snapshot_t(c, parent)->depth + 1)
panic("id %u depth=%u parent %u depth=%u\n",
id, snapshot_t(c, id)->depth,
parent, snapshot_t(c, parent)->depth);
return parent;
-#else
- return snapshot_t(c, id)->parent;
-#endif
}
static inline u32 bch2_snapshot_parent(struct bch_fs *c, u32 id)
@@ -116,7 +122,8 @@ static inline u32 bch2_snapshot_root(struct bch_fs *c, u32 id)
static inline u32 __bch2_snapshot_equiv(struct bch_fs *c, u32 id)
{
- return snapshot_t(c, id)->equiv;
+ const struct snapshot_t *s = snapshot_t(c, id);
+ return s ? s->equiv : 0;
}
static inline u32 bch2_snapshot_equiv(struct bch_fs *c, u32 id)
@@ -133,38 +140,22 @@ static inline bool bch2_snapshot_is_equiv(struct bch_fs *c, u32 id)
return id == bch2_snapshot_equiv(c, id);
}
-static inline bool bch2_snapshot_is_internal_node(struct bch_fs *c, u32 id)
+static inline int bch2_snapshot_is_internal_node(struct bch_fs *c, u32 id)
{
- const struct snapshot_t *s;
- bool ret;
-
rcu_read_lock();
- s = snapshot_t(c, id);
- ret = s->children[0];
+ const struct snapshot_t *s = snapshot_t(c, id);
+ int ret = s ? s->children[0] : -BCH_ERR_invalid_snapshot_node;
rcu_read_unlock();
return ret;
}
-static inline u32 bch2_snapshot_is_leaf(struct bch_fs *c, u32 id)
-{
- return !bch2_snapshot_is_internal_node(c, id);
-}
-
-static inline u32 bch2_snapshot_sibling(struct bch_fs *c, u32 id)
+static inline int bch2_snapshot_is_leaf(struct bch_fs *c, u32 id)
{
- const struct snapshot_t *s;
- u32 parent = __bch2_snapshot_parent(c, id);
-
- if (!parent)
- return 0;
-
- s = snapshot_t(c, __bch2_snapshot_parent(c, id));
- if (id == s->children[0])
- return s->children[1];
- if (id == s->children[1])
- return s->children[0];
- return 0;
+ int ret = bch2_snapshot_is_internal_node(c, id);
+ if (ret < 0)
+ return ret;
+ return !ret;
}
static inline u32 bch2_snapshot_depth(struct bch_fs *c, u32 parent)
@@ -218,15 +209,34 @@ static inline bool snapshot_list_has_ancestor(struct bch_fs *c, snapshot_id_list
static inline int snapshot_list_add(struct bch_fs *c, snapshot_id_list *s, u32 id)
{
- int ret;
-
BUG_ON(snapshot_list_has_id(s, id));
- ret = darray_push(s, id);
+ int ret = darray_push(s, id);
if (ret)
bch_err(c, "error reallocating snapshot_id_list (size %zu)", s->size);
return ret;
}
+static inline int snapshot_list_add_nodup(struct bch_fs *c, snapshot_id_list *s, u32 id)
+{
+ int ret = snapshot_list_has_id(s, id)
+ ? 0
+ : darray_push(s, id);
+ if (ret)
+ bch_err(c, "error reallocating snapshot_id_list (size %zu)", s->size);
+ return ret;
+}
+
+static inline int snapshot_list_merge(struct bch_fs *c, snapshot_id_list *dst, snapshot_id_list *src)
+{
+ darray_for_each(*src, i) {
+ int ret = snapshot_list_add_nodup(c, dst, *i);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
int bch2_snapshot_lookup(struct btree_trans *trans, u32 id,
struct bch_snapshot *s);
int bch2_snapshot_get_subvol(struct btree_trans *, u32,
@@ -238,6 +248,7 @@ int bch2_snapshot_node_create(struct btree_trans *, u32,
int bch2_check_snapshot_trees(struct bch_fs *);
int bch2_check_snapshots(struct bch_fs *);
+int bch2_reconstruct_snapshots(struct bch_fs *);
int bch2_snapshot_node_set_deleted(struct btree_trans *, u32);
void bch2_delete_dead_snapshots_work(struct work_struct *);
@@ -249,7 +260,7 @@ static inline int bch2_key_has_snapshot_overwrites(struct btree_trans *trans,
struct bpos pos)
{
if (!btree_type_has_snapshots(id) ||
- bch2_snapshot_is_leaf(trans->c, pos.snapshot))
+ bch2_snapshot_is_leaf(trans->c, pos.snapshot) > 0)
return 0;
return __bch2_key_has_snapshot_overwrites(trans, id, pos);
diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c
index ce7aed12194238..88a79c82327687 100644
--- a/fs/bcachefs/subvolume.c
+++ b/fs/bcachefs/subvolume.c
@@ -595,6 +595,78 @@ err:
return ret;
}
+int bch2_initialize_subvolumes(struct bch_fs *c)
+{
+ struct bkey_i_snapshot_tree root_tree;
+ struct bkey_i_snapshot root_snapshot;
+ struct bkey_i_subvolume root_volume;
+ int ret;
+
+ bkey_snapshot_tree_init(&root_tree.k_i);
+ root_tree.k.p.offset = 1;
+ root_tree.v.master_subvol = cpu_to_le32(1);
+ root_tree.v.root_snapshot = cpu_to_le32(U32_MAX);
+
+ bkey_snapshot_init(&root_snapshot.k_i);
+ root_snapshot.k.p.offset = U32_MAX;
+ root_snapshot.v.flags = 0;
+ root_snapshot.v.parent = 0;
+ root_snapshot.v.subvol = cpu_to_le32(BCACHEFS_ROOT_SUBVOL);
+ root_snapshot.v.tree = cpu_to_le32(1);
+ SET_BCH_SNAPSHOT_SUBVOL(&root_snapshot.v, true);
+
+ bkey_subvolume_init(&root_volume.k_i);
+ root_volume.k.p.offset = BCACHEFS_ROOT_SUBVOL;
+ root_volume.v.flags = 0;
+ root_volume.v.snapshot = cpu_to_le32(U32_MAX);
+ root_volume.v.inode = cpu_to_le64(BCACHEFS_ROOT_INO);
+
+ ret = bch2_btree_insert(c, BTREE_ID_snapshot_trees, &root_tree.k_i, NULL, 0) ?:
+ bch2_btree_insert(c, BTREE_ID_snapshots, &root_snapshot.k_i, NULL, 0) ?:
+ bch2_btree_insert(c, BTREE_ID_subvolumes, &root_volume.k_i, NULL, 0);
+ bch_err_fn(c, ret);
+ return ret;
+}
+
+static int __bch2_fs_upgrade_for_subvolumes(struct btree_trans *trans)
+{
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ struct bch_inode_unpacked inode;
+ int ret;
+
+ k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes,
+ SPOS(0, BCACHEFS_ROOT_INO, U32_MAX), 0);
+ ret = bkey_err(k);
+ if (ret)
+ return ret;
+
+ if (!bkey_is_inode(k.k)) {
+ bch_err(trans->c, "root inode not found");
+ ret = -BCH_ERR_ENOENT_inode;
+ goto err;
+ }
+
+ ret = bch2_inode_unpack(k, &inode);
+ BUG_ON(ret);
+
+ inode.bi_subvol = BCACHEFS_ROOT_SUBVOL;
+
+ ret = bch2_inode_write(trans, &iter, &inode);
+err:
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+/* set bi_subvol on root inode */
+int bch2_fs_upgrade_for_subvolumes(struct bch_fs *c)
+{
+ int ret = bch2_trans_do(c, NULL, NULL, BCH_TRANS_COMMIT_lazy_rw,
+ __bch2_fs_upgrade_for_subvolumes(trans));
+ bch_err_fn(c, ret);
+ return ret;
+}
+
int bch2_fs_subvolumes_init(struct bch_fs *c)
{
INIT_WORK(&c->snapshot_delete_work, bch2_delete_dead_snapshots_work);
diff --git a/fs/bcachefs/subvolume.h b/fs/bcachefs/subvolume.h
index 903c05162c0688..d2015d549bd2a3 100644
--- a/fs/bcachefs/subvolume.h
+++ b/fs/bcachefs/subvolume.h
@@ -37,6 +37,9 @@ void bch2_delete_dead_snapshots_async(struct bch_fs *);
int bch2_subvolume_unlink(struct btree_trans *, u32);
int bch2_subvolume_create(struct btree_trans *, u64, u32, u32, u32 *, u32 *, bool);
+int bch2_initialize_subvolumes(struct bch_fs *);
+int bch2_fs_upgrade_for_subvolumes(struct bch_fs *);
+
int bch2_fs_subvolumes_init(struct bch_fs *);
#endif /* _BCACHEFS_SUBVOLUME_H */
diff --git a/fs/bcachefs/subvolume_types.h b/fs/bcachefs/subvolume_types.h
index ae644adfc39168..9b10c8947828e0 100644
--- a/fs/bcachefs/subvolume_types.h
+++ b/fs/bcachefs/subvolume_types.h
@@ -20,6 +20,8 @@ struct snapshot_t {
};
struct snapshot_table {
+ struct rcu_head rcu;
+ size_t nr;
#ifndef RUST_BINDGEN
DECLARE_FLEX_ARRAY(struct snapshot_t, s);
#else
diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c
index ad28e370b6404c..08ea3dbbbe97ce 100644
--- a/fs/bcachefs/super-io.c
+++ b/fs/bcachefs/super-io.c
@@ -8,7 +8,7 @@
#include "journal.h"
#include "journal_sb.h"
#include "journal_seq_blacklist.h"
-#include "recovery.h"
+#include "recovery_passes.h"
#include "replicas.h"
#include "quota.h"
#include "sb-clean.h"
@@ -143,7 +143,7 @@ void bch2_free_super(struct bch_sb_handle *sb)
{
kfree(sb->bio);
if (!IS_ERR_OR_NULL(sb->s_bdev_file))
- fput(sb->s_bdev_file);
+ bdev_fput(sb->s_bdev_file);
kfree(sb->holder);
kfree(sb->sb_name);
@@ -527,9 +527,11 @@ static void bch2_sb_update(struct bch_fs *c)
memset(c->sb.errors_silent, 0, sizeof(c->sb.errors_silent));
struct bch_sb_field_ext *ext = bch2_sb_field_get(src, ext);
- if (ext)
+ if (ext) {
le_bitvector_to_cpu(c->sb.errors_silent, (void *) ext->errors_silent,
sizeof(c->sb.errors_silent) * 8);
+ c->sb.btrees_lost_data = le64_to_cpu(ext->btrees_lost_data);
+ }
for_each_member_device(c, ca) {
struct bch_member m = bch2_sb_member_get(src, ca->dev_idx);
@@ -698,8 +700,11 @@ retry:
return -ENOMEM;
sb->sb_name = kstrdup(path, GFP_KERNEL);
- if (!sb->sb_name)
- return -ENOMEM;
+ if (!sb->sb_name) {
+ ret = -ENOMEM;
+ prt_printf(&err, "error allocating memory for sb_name");
+ goto err;
+ }
#ifndef __KERNEL__
if (opt_get(*opts, direct_io) == false)
@@ -1162,6 +1167,11 @@ static void bch2_sb_ext_to_text(struct printbuf *out, struct bch_sb *sb,
kfree(errors_silent);
}
+
+ prt_printf(out, "Btrees with missing data:");
+ prt_tab(out);
+ prt_bitflags(out, __bch2_btree_ids, le64_to_cpu(e->btrees_lost_data));
+ prt_newline(out);
}
static const struct bch_sb_field_ops bch_sb_field_ops_ext = {
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index 1ad6e5cd9476c8..88e214c609bb2b 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -15,6 +15,7 @@
#include "btree_gc.h"
#include "btree_journal_iter.h"
#include "btree_key_cache.h"
+#include "btree_node_scan.h"
#include "btree_update_interior.h"
#include "btree_io.h"
#include "btree_write_buffer.h"
@@ -287,8 +288,13 @@ static void __bch2_fs_read_only(struct bch_fs *c)
if (test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags) &&
!test_bit(BCH_FS_emergency_ro, &c->flags))
set_bit(BCH_FS_clean_shutdown, &c->flags);
+
bch2_fs_journal_stop(&c->journal);
+ bch_info(c, "%sshutdown complete, journal seq %llu",
+ test_bit(BCH_FS_clean_shutdown, &c->flags) ? "" : "un",
+ c->journal.seq_ondisk);
+
/*
* After stopping journal:
*/
@@ -365,7 +371,7 @@ void bch2_fs_read_only(struct bch_fs *c)
!test_bit(BCH_FS_emergency_ro, &c->flags) &&
test_bit(BCH_FS_started, &c->flags) &&
test_bit(BCH_FS_clean_shutdown, &c->flags) &&
- !c->opts.norecovery) {
+ c->recovery_pass_done >= BCH_RECOVERY_PASS_journal_replay) {
BUG_ON(c->journal.last_empty_seq != journal_cur_seq(&c->journal));
BUG_ON(atomic_read(&c->btree_cache.dirty));
BUG_ON(atomic_long_read(&c->btree_key_cache.nr_dirty));
@@ -510,7 +516,8 @@ err:
int bch2_fs_read_write(struct bch_fs *c)
{
- if (c->opts.norecovery)
+ if (c->opts.recovery_pass_last &&
+ c->opts.recovery_pass_last < BCH_RECOVERY_PASS_journal_replay)
return -BCH_ERR_erofs_norecovery;
if (c->opts.nochanges)
@@ -535,7 +542,9 @@ static void __bch2_fs_free(struct bch_fs *c)
for (i = 0; i < BCH_TIME_STAT_NR; i++)
bch2_time_stats_exit(&c->times[i]);
+ bch2_find_btree_nodes_exit(&c->found_btree_nodes);
bch2_free_pending_node_rewrites(c);
+ bch2_fs_allocator_background_exit(c);
bch2_fs_sb_errors_exit(c);
bch2_fs_counters_exit(c);
bch2_fs_snapshots_exit(c);
@@ -559,6 +568,7 @@ static void __bch2_fs_free(struct bch_fs *c)
bch2_io_clock_exit(&c->io_clock[READ]);
bch2_fs_compress_exit(c);
bch2_journal_keys_put_initial(c);
+ bch2_find_btree_nodes_exit(&c->found_btree_nodes);
BUG_ON(atomic_read(&c->journal_keys.ref));
bch2_fs_btree_write_buffer_exit(c);
percpu_free_rwsem(&c->mark_lock);
@@ -1015,8 +1025,16 @@ int bch2_fs_start(struct bch_fs *c)
for_each_online_member(c, ca)
bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx)->last_mount = cpu_to_le64(now);
+ struct bch_sb_field_ext *ext =
+ bch2_sb_field_get_minsize(&c->disk_sb, ext, sizeof(*ext) / sizeof(u64));
mutex_unlock(&c->sb_lock);
+ if (!ext) {
+ bch_err(c, "insufficient space in superblock for sb_field_ext");
+ ret = -BCH_ERR_ENOSPC_sb;
+ goto err;
+ }
+
for_each_rw_member(c, ca)
bch2_dev_allocator_add(c, ca);
bch2_recalc_capacity(c);
diff --git a/fs/bcachefs/super_types.h b/fs/bcachefs/super_types.h
index ec784d975f6655..11bcef170c2c22 100644
--- a/fs/bcachefs/super_types.h
+++ b/fs/bcachefs/super_types.h
@@ -37,6 +37,8 @@ struct bch_member_cpu {
u8 durability;
u8 freespace_initialized;
u8 valid;
+ u8 btree_bitmap_shift;
+ u64 btree_allocated_bitmap;
};
#endif /* _BCACHEFS_SUPER_TYPES_H */
diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c
index c86a93a8d8fc81..5be92fe3f4ea4e 100644
--- a/fs/bcachefs/sysfs.c
+++ b/fs/bcachefs/sysfs.c
@@ -17,7 +17,6 @@
#include "btree_iter.h"
#include "btree_key_cache.h"
#include "btree_update.h"
-#include "btree_update_interior.h"
#include "btree_gc.h"
#include "buckets.h"
#include "clock.h"
@@ -26,6 +25,7 @@
#include "ec.h"
#include "inode.h"
#include "journal.h"
+#include "journal_reclaim.h"
#include "keylist.h"
#include "move.h"
#include "movinggc.h"
@@ -139,6 +139,7 @@ do { \
write_attribute(trigger_gc);
write_attribute(trigger_discards);
write_attribute(trigger_invalidates);
+write_attribute(trigger_journal_flush);
write_attribute(prune_cache);
write_attribute(btree_wakeup);
rw_attribute(btree_gc_periodic);
@@ -166,7 +167,6 @@ read_attribute(btree_write_stats);
read_attribute(btree_cache_size);
read_attribute(compression_stats);
read_attribute(journal_debug);
-read_attribute(btree_updates);
read_attribute(btree_cache);
read_attribute(btree_key_cache);
read_attribute(stripes_heap);
@@ -415,9 +415,6 @@ SHOW(bch2_fs)
if (attr == &sysfs_journal_debug)
bch2_journal_debug_to_text(out, &c->journal);
- if (attr == &sysfs_btree_updates)
- bch2_btree_updates_to_text(out, c);
-
if (attr == &sysfs_btree_cache)
bch2_btree_cache_to_text(out, c);
@@ -505,7 +502,7 @@ STORE(bch2_fs)
/* Debugging: */
- if (!test_bit(BCH_FS_rw, &c->flags))
+ if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_sysfs))
return -EROFS;
if (attr == &sysfs_prune_cache) {
@@ -538,6 +535,11 @@ STORE(bch2_fs)
if (attr == &sysfs_trigger_invalidates)
bch2_do_invalidates(c);
+ if (attr == &sysfs_trigger_journal_flush) {
+ bch2_journal_flush_all_pins(&c->journal);
+ bch2_journal_meta(&c->journal);
+ }
+
#ifdef CONFIG_BCACHEFS_TESTS
if (attr == &sysfs_perf_test) {
char *tmp = kstrdup(buf, GFP_KERNEL), *p = tmp;
@@ -558,6 +560,7 @@ STORE(bch2_fs)
size = ret;
}
#endif
+ bch2_write_ref_put(c, BCH_WRITE_REF_sysfs);
return size;
}
SYSFS_OPS(bch2_fs);
@@ -639,7 +642,6 @@ SYSFS_OPS(bch2_fs_internal);
struct attribute *bch2_fs_internal_files[] = {
&sysfs_flags,
&sysfs_journal_debug,
- &sysfs_btree_updates,
&sysfs_btree_cache,
&sysfs_btree_key_cache,
&sysfs_new_stripes,
@@ -657,6 +659,7 @@ struct attribute *bch2_fs_internal_files[] = {
&sysfs_trigger_gc,
&sysfs_trigger_discards,
&sysfs_trigger_invalidates,
+ &sysfs_trigger_journal_flush,
&sysfs_prune_cache,
&sysfs_btree_wakeup,
diff --git a/fs/bcachefs/tests.c b/fs/bcachefs/tests.c
index b3fe9fc577470f..bfec656f94c075 100644
--- a/fs/bcachefs/tests.c
+++ b/fs/bcachefs/tests.c
@@ -672,7 +672,7 @@ static int __do_delete(struct btree_trans *trans, struct bpos pos)
bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs, pos,
BTREE_ITER_INTENT);
- k = bch2_btree_iter_peek(&iter);
+ k = bch2_btree_iter_peek_upto(&iter, POS(0, U64_MAX));
ret = bkey_err(k);
if (ret)
goto err;
diff --git a/fs/bcachefs/thread_with_file.c b/fs/bcachefs/thread_with_file.c
index 940db15d6a939b..b1af7ac430f662 100644
--- a/fs/bcachefs/thread_with_file.c
+++ b/fs/bcachefs/thread_with_file.c
@@ -294,16 +294,27 @@ static int thread_with_stdio_fn(void *arg)
return 0;
}
-int bch2_run_thread_with_stdio(struct thread_with_stdio *thr,
- const struct thread_with_stdio_ops *ops)
+void bch2_thread_with_stdio_init(struct thread_with_stdio *thr,
+ const struct thread_with_stdio_ops *ops)
{
stdio_buf_init(&thr->stdio.input);
stdio_buf_init(&thr->stdio.output);
thr->ops = ops;
+}
+int __bch2_run_thread_with_stdio(struct thread_with_stdio *thr)
+{
return bch2_run_thread_with_file(&thr->thr, &thread_with_stdio_fops, thread_with_stdio_fn);
}
+int bch2_run_thread_with_stdio(struct thread_with_stdio *thr,
+ const struct thread_with_stdio_ops *ops)
+{
+ bch2_thread_with_stdio_init(thr, ops);
+
+ return __bch2_run_thread_with_stdio(thr);
+}
+
int bch2_run_thread_with_stdout(struct thread_with_stdio *thr,
const struct thread_with_stdio_ops *ops)
{
diff --git a/fs/bcachefs/thread_with_file.h b/fs/bcachefs/thread_with_file.h
index af54ea8f5b0ff8..1d63d14d7dcae8 100644
--- a/fs/bcachefs/thread_with_file.h
+++ b/fs/bcachefs/thread_with_file.h
@@ -63,6 +63,9 @@ struct thread_with_stdio {
const struct thread_with_stdio_ops *ops;
};
+void bch2_thread_with_stdio_init(struct thread_with_stdio *,
+ const struct thread_with_stdio_ops *);
+int __bch2_run_thread_with_stdio(struct thread_with_stdio *);
int bch2_run_thread_with_stdio(struct thread_with_stdio *,
const struct thread_with_stdio_ops *);
int bch2_run_thread_with_stdout(struct thread_with_stdio *,
diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c
index 216fadf16928b9..92c6ad75e702ab 100644
--- a/fs/bcachefs/util.c
+++ b/fs/bcachefs/util.c
@@ -707,149 +707,6 @@ void memcpy_from_bio(void *dst, struct bio *src, struct bvec_iter src_iter)
}
}
-static int alignment_ok(const void *base, size_t align)
-{
- return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ||
- ((unsigned long)base & (align - 1)) == 0;
-}
-
-static void u32_swap(void *a, void *b, size_t size)
-{
- u32 t = *(u32 *)a;
- *(u32 *)a = *(u32 *)b;
- *(u32 *)b = t;
-}
-
-static void u64_swap(void *a, void *b, size_t size)
-{
- u64 t = *(u64 *)a;
- *(u64 *)a = *(u64 *)b;
- *(u64 *)b = t;
-}
-
-static void generic_swap(void *a, void *b, size_t size)
-{
- char t;
-
- do {
- t = *(char *)a;
- *(char *)a++ = *(char *)b;
- *(char *)b++ = t;
- } while (--size > 0);
-}
-
-static inline int do_cmp(void *base, size_t n, size_t size,
- int (*cmp_func)(const void *, const void *, size_t),
- size_t l, size_t r)
-{
- return cmp_func(base + inorder_to_eytzinger0(l, n) * size,
- base + inorder_to_eytzinger0(r, n) * size,
- size);
-}
-
-static inline void do_swap(void *base, size_t n, size_t size,
- void (*swap_func)(void *, void *, size_t),
- size_t l, size_t r)
-{
- swap_func(base + inorder_to_eytzinger0(l, n) * size,
- base + inorder_to_eytzinger0(r, n) * size,
- size);
-}
-
-void eytzinger0_sort(void *base, size_t n, size_t size,
- int (*cmp_func)(const void *, const void *, size_t),
- void (*swap_func)(void *, void *, size_t))
-{
- int i, c, r;
-
- if (!swap_func) {
- if (size == 4 && alignment_ok(base, 4))
- swap_func = u32_swap;
- else if (size == 8 && alignment_ok(base, 8))
- swap_func = u64_swap;
- else
- swap_func = generic_swap;
- }
-
- /* heapify */
- for (i = n / 2 - 1; i >= 0; --i) {
- for (r = i; r * 2 + 1 < n; r = c) {
- c = r * 2 + 1;
-
- if (c + 1 < n &&
- do_cmp(base, n, size, cmp_func, c, c + 1) < 0)
- c++;
-
- if (do_cmp(base, n, size, cmp_func, r, c) >= 0)
- break;
-
- do_swap(base, n, size, swap_func, r, c);
- }
- }
-
- /* sort */
- for (i = n - 1; i > 0; --i) {
- do_swap(base, n, size, swap_func, 0, i);
-
- for (r = 0; r * 2 + 1 < i; r = c) {
- c = r * 2 + 1;
-
- if (c + 1 < i &&
- do_cmp(base, n, size, cmp_func, c, c + 1) < 0)
- c++;
-
- if (do_cmp(base, n, size, cmp_func, r, c) >= 0)
- break;
-
- do_swap(base, n, size, swap_func, r, c);
- }
- }
-}
-
-void sort_cmp_size(void *base, size_t num, size_t size,
- int (*cmp_func)(const void *, const void *, size_t),
- void (*swap_func)(void *, void *, size_t size))
-{
- /* pre-scale counters for performance */
- int i = (num/2 - 1) * size, n = num * size, c, r;
-
- if (!swap_func) {
- if (size == 4 && alignment_ok(base, 4))
- swap_func = u32_swap;
- else if (size == 8 && alignment_ok(base, 8))
- swap_func = u64_swap;
- else
- swap_func = generic_swap;
- }
-
- /* heapify */
- for ( ; i >= 0; i -= size) {
- for (r = i; r * 2 + size < n; r = c) {
- c = r * 2 + size;
- if (c < n - size &&
- cmp_func(base + c, base + c + size, size) < 0)
- c += size;
- if (cmp_func(base + r, base + c, size) >= 0)
- break;
- swap_func(base + r, base + c, size);
- }
- }
-
- /* sort */
- for (i = n - size; i > 0; i -= size) {
- swap_func(base, base + i, size);
- for (r = 0; r * 2 + size < i; r = c) {
- c = r * 2 + size;
- if (c < i - size &&
- cmp_func(base + c, base + c + size, size) < 0)
- c += size;
- if (cmp_func(base + r, base + c, size) >= 0)
- break;
- swap_func(base + r, base + c, size);
- }
- }
-}
-
#if 0
void eytzinger1_test(void)
{
diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h
index 175aee3074c7d5..5cf885b09986ac 100644
--- a/fs/bcachefs/util.h
+++ b/fs/bcachefs/util.h
@@ -631,10 +631,6 @@ static inline void memset_u64s_tail(void *s, int c, unsigned bytes)
memset(s + bytes, c, rem);
}
-void sort_cmp_size(void *base, size_t num, size_t size,
- int (*cmp_func)(const void *, const void *, size_t),
- void (*swap_func)(void *, void *, size_t));
-
/* just the memmove, doesn't update @_nr */
#define __array_insert_item(_array, _nr, _pos) \
memmove(&(_array)[(_pos) + 1], \
@@ -792,9 +788,27 @@ static inline int copy_from_user_errcode(void *to, const void __user *from, unsi
#endif
+static inline void mod_bit(long nr, volatile unsigned long *addr, bool v)
+{
+ if (v)
+ set_bit(nr, addr);
+ else
+ clear_bit(nr, addr);
+}
+
static inline void __set_bit_le64(size_t bit, __le64 *addr)
{
addr[bit / 64] |= cpu_to_le64(BIT_ULL(bit % 64));
}
+static inline void __clear_bit_le64(size_t bit, __le64 *addr)
+{
+ addr[bit / 64] &= ~cpu_to_le64(BIT_ULL(bit % 64));
+}
+
+static inline bool test_bit_le64(size_t bit, __le64 *addr)
+{
+ return (addr[bit / 64] & cpu_to_le64(BIT_ULL(bit % 64))) != 0;
+}
+
#endif /* _BCACHEFS_UTIL_H */
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 5397b552fbeb54..ac178ad388239a 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1928,7 +1928,7 @@ static void free_note_info(struct elf_note_info *info)
threads = t->next;
WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
for (i = 1; i < info->thread_notes; ++i)
- kfree(t->notes[i].data);
+ kvfree(t->notes[i].data);
kfree(t);
}
kfree(info->psinfo.data);
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 1920ed69279b58..3314249e867474 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1359,7 +1359,7 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
rcu_read_unlock();
- strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
+ get_task_comm(psinfo->pr_fname, p);
return 0;
}
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index c1e6a5bbeeaffe..58110c96866736 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -2776,20 +2776,14 @@ struct btrfs_data_container *init_data_container(u32 total_bytes)
size_t alloc_bytes;
alloc_bytes = max_t(size_t, total_bytes, sizeof(*data));
- data = kvmalloc(alloc_bytes, GFP_KERNEL);
+ data = kvzalloc(alloc_bytes, GFP_KERNEL);
if (!data)
return ERR_PTR(-ENOMEM);
- if (total_bytes >= sizeof(*data)) {
+ if (total_bytes >= sizeof(*data))
data->bytes_left = total_bytes - sizeof(*data);
- data->bytes_missing = 0;
- } else {
+ else
data->bytes_missing = sizeof(*data) - total_bytes;
- data->bytes_left = 0;
- }
-
- data->elem_cnt = 0;
- data->elem_missed = 0;
return data;
}
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 5f7587ca1ca772..1e09aeea69c22e 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -1559,7 +1559,8 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
* needing to allocate extents from the block group.
*/
used = btrfs_space_info_used(space_info, true);
- if (space_info->total_bytes - block_group->length < used) {
+ if (space_info->total_bytes - block_group->length < used &&
+ block_group->zone_unusable < block_group->length) {
/*
* Add a reference for the list, compensate for the ref
* drop under the "next" label for the
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index dd6f566a383f00..121ab890bd0557 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1133,6 +1133,9 @@ __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
if (ret)
return ret;
+ ret = btrfs_record_root_in_trans(trans, node->root);
+ if (ret)
+ return ret;
ret = btrfs_update_delayed_inode(trans, node->root, path, node);
return ret;
}
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index beedd6ed64d39b..257d044bca9158 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3464,6 +3464,14 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
if (root_id != BTRFS_TREE_LOG_OBJECTID) {
struct btrfs_ref generic_ref = { 0 };
+ /*
+ * Assert that the extent buffer is not cleared due to
+ * EXTENT_BUFFER_ZONED_ZEROOUT. Please refer
+ * btrfs_clear_buffer_dirty() and btree_csum_one_bio() for
+ * detail.
+ */
+ ASSERT(btrfs_header_bytenr(buf) != 0);
+
btrfs_init_generic_ref(&generic_ref, BTRFS_DROP_DELAYED_REF,
buf->start, buf->len, parent,
btrfs_header_owner(buf));
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 7441245b1ceb15..2776112dbdf8d4 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -681,31 +681,21 @@ static void end_bbio_data_read(struct btrfs_bio *bbio)
int btrfs_alloc_page_array(unsigned int nr_pages, struct page **page_array,
gfp_t extra_gfp)
{
+ const gfp_t gfp = GFP_NOFS | extra_gfp;
unsigned int allocated;
for (allocated = 0; allocated < nr_pages;) {
unsigned int last = allocated;
- allocated = alloc_pages_bulk_array(GFP_NOFS | extra_gfp,
- nr_pages, page_array);
-
- if (allocated == nr_pages)
- return 0;
-
- /*
- * During this iteration, no page could be allocated, even
- * though alloc_pages_bulk_array() falls back to alloc_page()
- * if it could not bulk-allocate. So we must be out of memory.
- */
- if (allocated == last) {
+ allocated = alloc_pages_bulk_array(gfp, nr_pages, page_array);
+ if (unlikely(allocated == last)) {
+ /* No progress, fail and do cleanup. */
for (int i = 0; i < allocated; i++) {
__free_page(page_array[i]);
page_array[i] = NULL;
}
return -ENOMEM;
}
-
- memalloc_retry_wait(GFP_NOFS);
}
return 0;
}
@@ -4154,7 +4144,7 @@ void btrfs_clear_buffer_dirty(struct btrfs_trans_handle *trans,
* The actual zeroout of the buffer will happen later in
* btree_csum_one_bio.
*/
- if (btrfs_is_zoned(fs_info)) {
+ if (btrfs_is_zoned(fs_info) && test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
set_bit(EXTENT_BUFFER_ZONED_ZEROOUT, &eb->bflags);
return;
}
@@ -4193,6 +4183,7 @@ void set_extent_buffer_dirty(struct extent_buffer *eb)
num_folios = num_extent_folios(eb);
WARN_ON(atomic_read(&eb->refs) == 0);
WARN_ON(!test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags));
+ WARN_ON(test_bit(EXTENT_BUFFER_ZONED_ZEROOUT, &eb->bflags));
if (!was_dirty) {
bool subpage = eb->fs_info->nodesize < PAGE_SIZE;
@@ -4333,6 +4324,19 @@ int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num,
if (test_and_set_bit(EXTENT_BUFFER_READING, &eb->bflags))
goto done;
+ /*
+ * Between the initial test_bit(EXTENT_BUFFER_UPTODATE) and the above
+ * test_and_set_bit(EXTENT_BUFFER_READING), someone else could have
+ * started and finished reading the same eb. In this case, UPTODATE
+ * will now be set, and we shouldn't read it in again.
+ */
+ if (unlikely(test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))) {
+ clear_bit(EXTENT_BUFFER_READING, &eb->bflags);
+ smp_mb__after_atomic();
+ wake_up_bit(&eb->bflags, EXTENT_BUFFER_READING);
+ return 0;
+ }
+
clear_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
eb->read_mirror = 0;
check_buffer_tree_ref(eb);
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 347ca13d15a975..24a048210b1571 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -309,7 +309,7 @@ int unpin_extent_cache(struct btrfs_inode *inode, u64 start, u64 len, u64 gen)
btrfs_warn(fs_info,
"no extent map found for inode %llu (root %lld) when unpinning extent range [%llu, %llu), generation %llu",
btrfs_ino(inode), btrfs_root_id(inode->root),
- start, len, gen);
+ start, start + len, gen);
ret = -ENOENT;
goto out;
}
@@ -318,7 +318,7 @@ int unpin_extent_cache(struct btrfs_inode *inode, u64 start, u64 len, u64 gen)
btrfs_warn(fs_info,
"found extent map for inode %llu (root %lld) with unexpected start offset %llu when unpinning extent range [%llu, %llu), generation %llu",
btrfs_ino(inode), btrfs_root_id(inode->root),
- em->start, start, len, gen);
+ em->start, start, start + len, gen);
ret = -EUCLEAN;
goto out;
}
@@ -340,9 +340,9 @@ int unpin_extent_cache(struct btrfs_inode *inode, u64 start, u64 len, u64 gen)
em->mod_len = em->len;
}
- free_extent_map(em);
out:
write_unlock(&tree->lock);
+ free_extent_map(em);
return ret;
}
@@ -629,13 +629,13 @@ int btrfs_add_extent_mapping(struct btrfs_fs_info *fs_info,
*/
ret = merge_extent_mapping(em_tree, existing,
em, start);
- if (ret) {
+ if (WARN_ON(ret)) {
free_extent_map(em);
*em_in = NULL;
- WARN_ONCE(ret,
-"extent map merge error existing [%llu, %llu) with em [%llu, %llu) start %llu\n",
- existing->start, existing->len,
- orig_start, orig_len, start);
+ btrfs_warn(fs_info,
+"extent map merge error existing [%llu, %llu) with em [%llu, %llu) start %llu",
+ existing->start, extent_map_end(existing),
+ orig_start, orig_start + orig_len, start);
}
free_extent_map(existing);
}
@@ -817,7 +817,7 @@ void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end,
split->block_len = em->block_len;
split->orig_start = em->orig_start;
} else {
- const u64 diff = start + len - em->start;
+ const u64 diff = end - em->start;
split->block_len = split->len;
split->block_start += diff;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 37701531eeb1ba..7fed887e700c4e 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1145,13 +1145,13 @@ static void submit_one_async_extent(struct async_chunk *async_chunk,
0, *alloc_hint, &ins, 1, 1);
if (ret) {
/*
- * Here we used to try again by going back to non-compressed
- * path for ENOSPC. But we can't reserve space even for
- * compressed size, how could it work for uncompressed size
- * which requires larger size? So here we directly go error
- * path.
+ * We can't reserve contiguous space for the compressed size.
+ * Unlikely, but it's possible that we could have enough
+ * non-contiguous space for the uncompressed size instead. So
+ * fall back to uncompressed.
*/
- goto out_free;
+ submit_uncompressed_range(inode, async_extent, locked_page);
+ goto done;
}
/* Here we're doing allocation and writeback of the compressed pages */
@@ -1203,7 +1203,6 @@ done:
out_free_reserve:
btrfs_dec_block_group_reservations(fs_info, ins.objectid);
btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1);
-out_free:
mapping_set_error(inode->vfs_inode.i_mapping, -EIO);
extent_clear_unlock_delalloc(inode, start, end,
NULL, EXTENT_LOCKED | EXTENT_DELALLOC |
@@ -2533,7 +2532,7 @@ void btrfs_clear_delalloc_extent(struct btrfs_inode *inode,
*/
if (bits & EXTENT_CLEAR_META_RESV &&
root != fs_info->tree_root)
- btrfs_delalloc_release_metadata(inode, len, false);
+ btrfs_delalloc_release_metadata(inode, len, true);
/* For sanity tests. */
if (btrfs_is_testing(fs_info))
@@ -4503,6 +4502,7 @@ int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry)
struct btrfs_trans_handle *trans;
struct btrfs_block_rsv block_rsv;
u64 root_flags;
+ u64 qgroup_reserved = 0;
int ret;
down_write(&fs_info->subvol_sem);
@@ -4547,12 +4547,20 @@ int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry)
ret = btrfs_subvolume_reserve_metadata(root, &block_rsv, 5, true);
if (ret)
goto out_undead;
+ qgroup_reserved = block_rsv.qgroup_rsv_reserved;
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
goto out_release;
}
+ ret = btrfs_record_root_in_trans(trans, root);
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
+ goto out_end_trans;
+ }
+ btrfs_qgroup_convert_reserved_meta(root, qgroup_reserved);
+ qgroup_reserved = 0;
trans->block_rsv = &block_rsv;
trans->bytes_reserved = block_rsv.size;
@@ -4611,7 +4619,9 @@ out_end_trans:
ret = btrfs_end_transaction(trans);
inode->i_flags |= S_DEAD;
out_release:
- btrfs_subvolume_release_metadata(root, &block_rsv);
+ btrfs_block_rsv_release(fs_info, &block_rsv, (u64)-1, NULL);
+ if (qgroup_reserved)
+ btrfs_qgroup_free_meta_prealloc(root, qgroup_reserved);
out_undead:
if (ret) {
spin_lock(&dest->root_item_lock);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 294e31edec9d3b..0493272a7668ed 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -613,6 +613,7 @@ static noinline int create_subvol(struct mnt_idmap *idmap,
int ret;
dev_t anon_dev;
u64 objectid;
+ u64 qgroup_reserved = 0;
root_item = kzalloc(sizeof(*root_item), GFP_KERNEL);
if (!root_item)
@@ -650,13 +651,18 @@ static noinline int create_subvol(struct mnt_idmap *idmap,
trans_num_items, false);
if (ret)
goto out_new_inode_args;
+ qgroup_reserved = block_rsv.qgroup_rsv_reserved;
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
- btrfs_subvolume_release_metadata(root, &block_rsv);
- goto out_new_inode_args;
+ goto out_release_rsv;
}
+ ret = btrfs_record_root_in_trans(trans, BTRFS_I(dir)->root);
+ if (ret)
+ goto out;
+ btrfs_qgroup_convert_reserved_meta(root, qgroup_reserved);
+ qgroup_reserved = 0;
trans->block_rsv = &block_rsv;
trans->bytes_reserved = block_rsv.size;
/* Tree log can't currently deal with an inode which is a new root. */
@@ -767,9 +773,11 @@ static noinline int create_subvol(struct mnt_idmap *idmap,
out:
trans->block_rsv = NULL;
trans->bytes_reserved = 0;
- btrfs_subvolume_release_metadata(root, &block_rsv);
-
btrfs_end_transaction(trans);
+out_release_rsv:
+ btrfs_block_rsv_release(fs_info, &block_rsv, (u64)-1, NULL);
+ if (qgroup_reserved)
+ btrfs_qgroup_free_meta_prealloc(root, qgroup_reserved);
out_new_inode_args:
btrfs_new_inode_args_destroy(&new_inode_args);
out_inode:
@@ -791,6 +799,8 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
struct btrfs_pending_snapshot *pending_snapshot;
unsigned int trans_num_items;
struct btrfs_trans_handle *trans;
+ struct btrfs_block_rsv *block_rsv;
+ u64 qgroup_reserved = 0;
int ret;
/* We do not support snapshotting right now. */
@@ -827,19 +837,19 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
goto free_pending;
}
- btrfs_init_block_rsv(&pending_snapshot->block_rsv,
- BTRFS_BLOCK_RSV_TEMP);
+ block_rsv = &pending_snapshot->block_rsv;
+ btrfs_init_block_rsv(block_rsv, BTRFS_BLOCK_RSV_TEMP);
/*
* 1 to add dir item
* 1 to add dir index
* 1 to update parent inode item
*/
trans_num_items = create_subvol_num_items(inherit) + 3;
- ret = btrfs_subvolume_reserve_metadata(BTRFS_I(dir)->root,
- &pending_snapshot->block_rsv,
+ ret = btrfs_subvolume_reserve_metadata(BTRFS_I(dir)->root, block_rsv,
trans_num_items, false);
if (ret)
goto free_pending;
+ qgroup_reserved = block_rsv->qgroup_rsv_reserved;
pending_snapshot->dentry = dentry;
pending_snapshot->root = root;
@@ -852,6 +862,13 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
ret = PTR_ERR(trans);
goto fail;
}
+ ret = btrfs_record_root_in_trans(trans, BTRFS_I(dir)->root);
+ if (ret) {
+ btrfs_end_transaction(trans);
+ goto fail;
+ }
+ btrfs_qgroup_convert_reserved_meta(root, qgroup_reserved);
+ qgroup_reserved = 0;
trans->pending_snapshot = pending_snapshot;
@@ -881,7 +898,9 @@ fail:
if (ret && pending_snapshot->snap)
pending_snapshot->snap->anon_dev = 0;
btrfs_put_root(pending_snapshot->snap);
- btrfs_subvolume_release_metadata(root, &pending_snapshot->block_rsv);
+ btrfs_block_rsv_release(fs_info, block_rsv, (u64)-1, NULL);
+ if (qgroup_reserved)
+ btrfs_qgroup_free_meta_prealloc(root, qgroup_reserved);
free_pending:
if (pending_snapshot->anon_dev)
free_anon_bdev(pending_snapshot->anon_dev);
@@ -3739,15 +3758,43 @@ static long btrfs_ioctl_quota_ctl(struct file *file, void __user *arg)
goto drop_write;
}
- down_write(&fs_info->subvol_sem);
-
switch (sa->cmd) {
case BTRFS_QUOTA_CTL_ENABLE:
case BTRFS_QUOTA_CTL_ENABLE_SIMPLE_QUOTA:
+ down_write(&fs_info->subvol_sem);
ret = btrfs_quota_enable(fs_info, sa);
+ up_write(&fs_info->subvol_sem);
break;
case BTRFS_QUOTA_CTL_DISABLE:
+ /*
+ * Lock the cleaner mutex to prevent races with concurrent
+ * relocation, because relocation may be building backrefs for
+ * blocks of the quota root while we are deleting the root. This
+ * is like dropping fs roots of deleted snapshots/subvolumes, we
+ * need the same protection.
+ *
+ * This also prevents races between concurrent tasks trying to
+ * disable quotas, because we will unlock and relock
+ * qgroup_ioctl_lock across BTRFS_FS_QUOTA_ENABLED changes.
+ *
+ * We take this here because we have the dependency of
+ *
+ * inode_lock -> subvol_sem
+ *
+ * because of rename. With relocation we can prealloc extents,
+ * so that makes the dependency chain
+ *
+ * cleaner_mutex -> inode_lock -> subvol_sem
+ *
+ * so we must take the cleaner_mutex here before we take the
+ * subvol_sem. The deadlock can't actually happen, but this
+ * quiets lockdep.
+ */
+ mutex_lock(&fs_info->cleaner_mutex);
+ down_write(&fs_info->subvol_sem);
ret = btrfs_quota_disable(fs_info);
+ up_write(&fs_info->subvol_sem);
+ mutex_unlock(&fs_info->cleaner_mutex);
break;
default:
ret = -EINVAL;
@@ -3755,7 +3802,6 @@ static long btrfs_ioctl_quota_ctl(struct file *file, void __user *arg)
}
kfree(sa);
- up_write(&fs_info->subvol_sem);
drop_write:
mnt_drop_write_file(file);
return ret;
diff --git a/fs/btrfs/messages.c b/fs/btrfs/messages.c
index c96dd66fd0f722..210d9c82e2ae05 100644
--- a/fs/btrfs/messages.c
+++ b/fs/btrfs/messages.c
@@ -7,7 +7,7 @@
#ifdef CONFIG_PRINTK
-#define STATE_STRING_PREFACE ": state "
+#define STATE_STRING_PREFACE " state "
#define STATE_STRING_BUF_LEN (sizeof(STATE_STRING_PREFACE) + BTRFS_FS_STATE_COUNT + 1)
/*
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 5f90f0605b12f7..364acc9bbe730c 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1342,16 +1342,10 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
lockdep_assert_held_write(&fs_info->subvol_sem);
/*
- * Lock the cleaner mutex to prevent races with concurrent relocation,
- * because relocation may be building backrefs for blocks of the quota
- * root while we are deleting the root. This is like dropping fs roots
- * of deleted snapshots/subvolumes, we need the same protection.
- *
- * This also prevents races between concurrent tasks trying to disable
- * quotas, because we will unlock and relock qgroup_ioctl_lock across
- * BTRFS_FS_QUOTA_ENABLED changes.
+ * Relocation will mess with backrefs, so make sure we have the
+ * cleaner_mutex held to protect us from relocate.
*/
- mutex_lock(&fs_info->cleaner_mutex);
+ lockdep_assert_held(&fs_info->cleaner_mutex);
mutex_lock(&fs_info->qgroup_ioctl_lock);
if (!fs_info->quota_root)
@@ -1373,9 +1367,13 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
btrfs_qgroup_wait_for_completion(fs_info, false);
+ /*
+ * We have nothing held here and no trans handle, just return the error
+ * if there is one.
+ */
ret = flush_reservations(fs_info);
if (ret)
- goto out_unlock_cleaner;
+ return ret;
/*
* 1 For the root item
@@ -1439,9 +1437,6 @@ out:
btrfs_end_transaction(trans);
else if (trans)
ret = btrfs_commit_transaction(trans);
-out_unlock_cleaner:
- mutex_unlock(&fs_info->cleaner_mutex);
-
return ret;
}
@@ -4495,6 +4490,8 @@ void btrfs_qgroup_convert_reserved_meta(struct btrfs_root *root, int num_bytes)
BTRFS_QGROUP_RSV_META_PREALLOC);
trace_qgroup_meta_convert(root, num_bytes);
qgroup_convert_meta(fs_info, root->root_key.objectid, num_bytes);
+ if (!sb_rdonly(fs_info->sb))
+ add_root_meta_rsv(root, num_bytes, BTRFS_QGROUP_RSV_META_PERTRANS);
}
/*
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 4bb538a372ce56..7007f9e0c97282 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -548,13 +548,3 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
}
return ret;
}
-
-void btrfs_subvolume_release_metadata(struct btrfs_root *root,
- struct btrfs_block_rsv *rsv)
-{
- struct btrfs_fs_info *fs_info = root->fs_info;
- u64 qgroup_to_release;
-
- btrfs_block_rsv_release(fs_info, rsv, (u64)-1, &qgroup_to_release);
- btrfs_qgroup_convert_reserved_meta(root, qgroup_to_release);
-}
diff --git a/fs/btrfs/root-tree.h b/fs/btrfs/root-tree.h
index 6f929cf3bd4967..8f5739e732b9b6 100644
--- a/fs/btrfs/root-tree.h
+++ b/fs/btrfs/root-tree.h
@@ -18,8 +18,6 @@ struct btrfs_trans_handle;
int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
struct btrfs_block_rsv *rsv,
int nitems, bool use_global_rsv);
-void btrfs_subvolume_release_metadata(struct btrfs_root *root,
- struct btrfs_block_rsv *rsv);
int btrfs_add_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
u64 ref_id, u64 dirid, u64 sequence,
const struct fscrypt_str *name);
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index c4bd0e60db5925..4b22cfe9a98cb0 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -1012,6 +1012,7 @@ static void scrub_stripe_read_repair_worker(struct work_struct *work)
struct btrfs_fs_info *fs_info = sctx->fs_info;
int num_copies = btrfs_num_copies(fs_info, stripe->bg->start,
stripe->bg->length);
+ unsigned long repaired;
int mirror;
int i;
@@ -1078,16 +1079,15 @@ out:
* Submit the repaired sectors. For zoned case, we cannot do repair
* in-place, but queue the bg to be relocated.
*/
- if (btrfs_is_zoned(fs_info)) {
- if (!bitmap_empty(&stripe->error_bitmap, stripe->nr_sectors))
+ bitmap_andnot(&repaired, &stripe->init_error_bitmap, &stripe->error_bitmap,
+ stripe->nr_sectors);
+ if (!sctx->readonly && !bitmap_empty(&repaired, stripe->nr_sectors)) {
+ if (btrfs_is_zoned(fs_info)) {
btrfs_repair_one_zone(fs_info, sctx->stripes[0].bg->start);
- } else if (!sctx->readonly) {
- unsigned long repaired;
-
- bitmap_andnot(&repaired, &stripe->init_error_bitmap,
- &stripe->error_bitmap, stripe->nr_sectors);
- scrub_write_sectors(sctx, stripe, repaired, false);
- wait_scrub_stripe_io(stripe);
+ } else {
+ scrub_write_sectors(sctx, stripe, repaired, false);
+ wait_scrub_stripe_io(stripe);
+ }
}
scrub_stripe_report_errors(sctx, stripe);
@@ -2812,7 +2812,17 @@ static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx,
gen = btrfs_get_last_trans_committed(fs_info);
for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
- bytenr = btrfs_sb_offset(i);
+ ret = btrfs_sb_log_location(scrub_dev, i, 0, &bytenr);
+ if (ret == -ENOENT)
+ break;
+
+ if (ret) {
+ spin_lock(&sctx->stat_lock);
+ sctx->stat.super_errors++;
+ spin_unlock(&sctx->stat_lock);
+ continue;
+ }
+
if (bytenr + BTRFS_SUPER_INFO_SIZE >
scrub_dev->commit_total_bytes)
break;
diff --git a/fs/btrfs/tests/extent-map-tests.c b/fs/btrfs/tests/extent-map-tests.c
index 253cce7ffecfe5..47b5d301038eed 100644
--- a/fs/btrfs/tests/extent-map-tests.c
+++ b/fs/btrfs/tests/extent-map-tests.c
@@ -847,6 +847,11 @@ static int test_case_7(struct btrfs_fs_info *fs_info)
goto out;
}
+ if (em->block_start != SZ_32K + SZ_4K) {
+ test_err("em->block_start is %llu, expected 36K", em->block_start);
+ goto out;
+ }
+
free_extent_map(em);
read_lock(&em_tree->lock);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 46e8426adf4f15..85f359e0e0a7f2 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -745,14 +745,6 @@ again:
h->reloc_reserved = reloc_reserved;
}
- /*
- * Now that we have found a transaction to be a part of, convert the
- * qgroup reservation from prealloc to pertrans. A different transaction
- * can't race in and free our pertrans out from under us.
- */
- if (qgroup_reserved)
- btrfs_qgroup_convert_reserved_meta(root, qgroup_reserved);
-
got_it:
if (!current->journal_info)
current->journal_info = h;
@@ -786,8 +778,15 @@ got_it:
* not just freed.
*/
btrfs_end_transaction(h);
- return ERR_PTR(ret);
+ goto reserve_fail;
}
+ /*
+ * Now that we have found a transaction to be a part of, convert the
+ * qgroup reservation from prealloc to pertrans. A different transaction
+ * can't race in and free our pertrans out from under us.
+ */
+ if (qgroup_reserved)
+ btrfs_qgroup_convert_reserved_meta(root, qgroup_reserved);
return h;
@@ -1495,6 +1494,7 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans)
radix_tree_tag_clear(&fs_info->fs_roots_radix,
(unsigned long)root->root_key.objectid,
BTRFS_ROOT_TRANS_TAG);
+ btrfs_qgroup_free_meta_all_pertrans(root);
spin_unlock(&fs_info->fs_roots_radix_lock);
btrfs_free_log(trans, root);
@@ -1519,7 +1519,6 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans)
if (ret2)
return ret2;
spin_lock(&fs_info->fs_roots_radix_lock);
- btrfs_qgroup_free_meta_all_pertrans(root);
}
}
spin_unlock(&fs_info->fs_roots_radix_lock);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 1dc1f1946ae0eb..ef6bd2f4251b52 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -692,6 +692,16 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
device->bdev = file_bdev(bdev_file);
clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
+ if (device->devt != device->bdev->bd_dev) {
+ btrfs_warn(NULL,
+ "device %s maj:min changed from %d:%d to %d:%d",
+ device->name->str, MAJOR(device->devt),
+ MINOR(device->devt), MAJOR(device->bdev->bd_dev),
+ MINOR(device->bdev->bd_dev));
+
+ device->devt = device->bdev->bd_dev;
+ }
+
fs_devices->open_devices++;
if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
device->devid != BTRFS_DEV_REPLACE_DEVID) {
@@ -1174,23 +1184,30 @@ static int open_fs_devices(struct btrfs_fs_devices *fs_devices,
struct btrfs_device *device;
struct btrfs_device *latest_dev = NULL;
struct btrfs_device *tmp_device;
+ int ret = 0;
list_for_each_entry_safe(device, tmp_device, &fs_devices->devices,
dev_list) {
- int ret;
+ int ret2;
- ret = btrfs_open_one_device(fs_devices, device, flags, holder);
- if (ret == 0 &&
+ ret2 = btrfs_open_one_device(fs_devices, device, flags, holder);
+ if (ret2 == 0 &&
(!latest_dev || device->generation > latest_dev->generation)) {
latest_dev = device;
- } else if (ret == -ENODATA) {
+ } else if (ret2 == -ENODATA) {
fs_devices->num_devices--;
list_del(&device->dev_list);
btrfs_free_device(device);
}
+ if (ret == 0 && ret2 != 0)
+ ret = ret2;
}
- if (fs_devices->open_devices == 0)
+
+ if (fs_devices->open_devices == 0) {
+ if (ret)
+ return ret;
return -EINVAL;
+ }
fs_devices->opened = 1;
fs_devices->latest_dev = latest_dev;
@@ -3438,6 +3455,7 @@ again:
* alignment and size).
*/
ret = -EUCLEAN;
+ mutex_unlock(&fs_info->reclaim_bgs_lock);
goto error;
}
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index 5a3d5ec75c5a94..4cba80b34387c1 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -1574,11 +1574,7 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
if (!map)
return -EINVAL;
- cache->physical_map = btrfs_clone_chunk_map(map, GFP_NOFS);
- if (!cache->physical_map) {
- ret = -ENOMEM;
- goto out;
- }
+ cache->physical_map = map;
zone_info = kcalloc(map->num_stripes, sizeof(*zone_info), GFP_NOFS);
if (!zone_info) {
@@ -1690,7 +1686,6 @@ out:
}
bitmap_free(active);
kfree(zone_info);
- btrfs_free_chunk_map(map);
return ret;
}
@@ -2175,6 +2170,7 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
struct btrfs_chunk_map *map;
const bool is_metadata = (block_group->flags &
(BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM));
+ struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
int ret = 0;
int i;
@@ -2250,6 +2246,7 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
btrfs_clear_data_reloc_bg(block_group);
spin_unlock(&block_group->lock);
+ down_read(&dev_replace->rwsem);
map = block_group->physical_map;
for (i = 0; i < map->num_stripes; i++) {
struct btrfs_device *device = map->stripes[i].dev;
@@ -2266,13 +2263,16 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
zinfo->zone_size >> SECTOR_SHIFT);
memalloc_nofs_restore(nofs_flags);
- if (ret)
+ if (ret) {
+ up_read(&dev_replace->rwsem);
return ret;
+ }
if (!(block_group->flags & BTRFS_BLOCK_GROUP_DATA))
zinfo->reserved_active_zones++;
btrfs_dev_clear_active_zone(device, physical);
}
+ up_read(&dev_replace->rwsem);
if (!fully_written)
btrfs_dec_block_group_ro(block_group);
diff --git a/fs/buffer.c b/fs/buffer.c
index 4f73d23c2c4691..ed698caa8834b2 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -687,30 +687,37 @@ void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode)
}
EXPORT_SYMBOL(mark_buffer_dirty_inode);
-/*
- * Add a page to the dirty page list.
- *
- * It is a sad fact of life that this function is called from several places
- * deeply under spinlocking. It may not sleep.
- *
- * If the page has buffers, the uptodate buffers are set dirty, to preserve
- * dirty-state coherency between the page and the buffers. It the page does
- * not have buffers then when they are later attached they will all be set
- * dirty.
- *
- * The buffers are dirtied before the page is dirtied. There's a small race
- * window in which a writepage caller may see the page cleanness but not the
- * buffer dirtiness. That's fine. If this code were to set the page dirty
- * before the buffers, a concurrent writepage caller could clear the page dirty
- * bit, see a bunch of clean buffers and we'd end up with dirty buffers/clean
- * page on the dirty page list.
- *
- * We use i_private_lock to lock against try_to_free_buffers while using the
- * page's buffer list. Also use this to protect against clean buffers being
- * added to the page after it was set dirty.
- *
- * FIXME: may need to call ->reservepage here as well. That's rather up to the
- * address_space though.
+/**
+ * block_dirty_folio - Mark a folio as dirty.
+ * @mapping: The address space containing this folio.
+ * @folio: The folio to mark dirty.
+ *
+ * Filesystems which use buffer_heads can use this function as their
+ * ->dirty_folio implementation. Some filesystems need to do a little
+ * work before calling this function. Filesystems which do not use
+ * buffer_heads should call filemap_dirty_folio() instead.
+ *
+ * If the folio has buffers, the uptodate buffers are set dirty, to
+ * preserve dirty-state coherency between the folio and the buffers.
+ * Buffers added to a dirty folio are created dirty.
+ *
+ * The buffers are dirtied before the folio is dirtied. There's a small
+ * race window in which writeback may see the folio cleanness but not the
+ * buffer dirtiness. That's fine. If this code were to set the folio
+ * dirty before the buffers, writeback could clear the folio dirty flag,
+ * see a bunch of clean buffers and we'd end up with dirty buffers/clean
+ * folio on the dirty folio list.
+ *
+ * We use i_private_lock to lock against try_to_free_buffers() while
+ * using the folio's buffer list. This also prevents clean buffers
+ * being added to the folio after it was set dirty.
+ *
+ * Context: May only be called from process context. Does not sleep.
+ * Caller must ensure that @folio cannot be truncated during this call,
+ * typically by holding the folio lock or having a page in the folio
+ * mapped and holding the page table lock.
+ *
+ * Return: True if the folio was dirtied; false if it was already dirtied.
*/
bool block_dirty_folio(struct address_space *mapping, struct folio *folio)
{
@@ -1219,26 +1226,28 @@ void mark_buffer_write_io_error(struct buffer_head *bh)
}
EXPORT_SYMBOL(mark_buffer_write_io_error);
-/*
- * Decrement a buffer_head's reference count. If all buffers against a page
- * have zero reference count, are clean and unlocked, and if the page is clean
- * and unlocked then try_to_free_buffers() may strip the buffers from the page
- * in preparation for freeing it (sometimes, rarely, buffers are removed from
- * a page but it ends up not being freed, and buffers may later be reattached).
+/**
+ * __brelse - Release a buffer.
+ * @bh: The buffer to release.
+ *
+ * This variant of brelse() can be called if @bh is guaranteed to not be NULL.
*/
-void __brelse(struct buffer_head * buf)
+void __brelse(struct buffer_head *bh)
{
- if (atomic_read(&buf->b_count)) {
- put_bh(buf);
+ if (atomic_read(&bh->b_count)) {
+ put_bh(bh);
return;
}
WARN(1, KERN_ERR "VFS: brelse: Trying to free free buffer\n");
}
EXPORT_SYMBOL(__brelse);
-/*
- * bforget() is like brelse(), except it discards any
- * potentially dirty data.
+/**
+ * __bforget - Discard any dirty data in a buffer.
+ * @bh: The buffer to forget.
+ *
+ * This variant of bforget() can be called if @bh is guaranteed to not
+ * be NULL.
*/
void __bforget(struct buffer_head *bh)
{
@@ -1415,6 +1424,11 @@ EXPORT_SYMBOL(__find_get_block);
* @size: The size of buffer_heads for this @bdev.
* @gfp: The memory allocation flags to use.
*
+ * The returned buffer head has its reference count incremented, but is
+ * not locked. The caller should call brelse() when it has finished
+ * with the buffer. The buffer may not be uptodate. If needed, the
+ * caller can bring it uptodate either by reading it or overwriting it.
+ *
* Return: The buffer head, or NULL if memory could not be allocated.
*/
struct buffer_head *bdev_getblk(struct block_device *bdev, sector_t block,
@@ -1446,20 +1460,29 @@ void __breadahead(struct block_device *bdev, sector_t block, unsigned size)
EXPORT_SYMBOL(__breadahead);
/**
- * __bread_gfp() - reads a specified block and returns the bh
- * @bdev: the block_device to read from
- * @block: number of block
- * @size: size (in bytes) to read
- * @gfp: page allocation flag
- *
- * Reads a specified block, and returns buffer head that contains it.
- * The page cache can be allocated from non-movable area
- * not to prevent page migration if you set gfp to zero.
- * It returns NULL if the block was unreadable.
+ * __bread_gfp() - Read a block.
+ * @bdev: The block device to read from.
+ * @block: Block number in units of block size.
+ * @size: The block size of this device in bytes.
+ * @gfp: Not page allocation flags; see below.
+ *
+ * You are not expected to call this function. You should use one of
+ * sb_bread(), sb_bread_unmovable() or __bread().
+ *
+ * Read a specified block, and return the buffer head that refers to it.
+ * If @gfp is 0, the memory will be allocated using the block device's
+ * default GFP flags. If @gfp is __GFP_MOVABLE, the memory may be
+ * allocated from a movable area. Do not pass in a complete set of
+ * GFP flags.
+ *
+ * The returned buffer head has its refcount increased. The caller should
+ * call brelse() when it has finished with the buffer.
+ *
+ * Context: May sleep waiting for I/O.
+ * Return: NULL if the block was unreadable.
*/
-struct buffer_head *
-__bread_gfp(struct block_device *bdev, sector_t block,
- unsigned size, gfp_t gfp)
+struct buffer_head *__bread_gfp(struct block_device *bdev, sector_t block,
+ unsigned size, gfp_t gfp)
{
struct buffer_head *bh;
@@ -2861,26 +2884,6 @@ int sync_dirty_buffer(struct buffer_head *bh)
}
EXPORT_SYMBOL(sync_dirty_buffer);
-/*
- * try_to_free_buffers() checks if all the buffers on this particular folio
- * are unused, and releases them if so.
- *
- * Exclusion against try_to_free_buffers may be obtained by either
- * locking the folio or by holding its mapping's i_private_lock.
- *
- * If the folio is dirty but all the buffers are clean then we need to
- * be sure to mark the folio clean as well. This is because the folio
- * may be against a block device, and a later reattachment of buffers
- * to a dirty folio will set *all* buffers dirty. Which would corrupt
- * filesystem data on the same device.
- *
- * The same applies to regular filesystem folios: if all the buffers are
- * clean then we set the folio clean and proceed. To do that, we require
- * total exclusion from block_dirty_folio(). That is obtained with
- * i_private_lock.
- *
- * try_to_free_buffers() is non-blocking.
- */
static inline int buffer_busy(struct buffer_head *bh)
{
return atomic_read(&bh->b_count) |
@@ -2914,6 +2917,30 @@ failed:
return false;
}
+/**
+ * try_to_free_buffers - Release buffers attached to this folio.
+ * @folio: The folio.
+ *
+ * If any buffers are in use (dirty, under writeback, elevated refcount),
+ * no buffers will be freed.
+ *
+ * If the folio is dirty but all the buffers are clean then we need to
+ * be sure to mark the folio clean as well. This is because the folio
+ * may be against a block device, and a later reattachment of buffers
+ * to a dirty folio will set *all* buffers dirty. Which would corrupt
+ * filesystem data on the same device.
+ *
+ * The same applies to regular filesystem folios: if all the buffers are
+ * clean then we set the folio clean and proceed. To do that, we require
+ * total exclusion from block_dirty_folio(). That is obtained with
+ * i_private_lock.
+ *
+ * Exclusion against try_to_free_buffers may be obtained by either
+ * locking the folio or by holding its mapping's i_private_lock.
+ *
+ * Context: Process context. @folio must be locked. Will not sleep.
+ * Return: true if all buffers attached to this folio were freed.
+ */
bool try_to_free_buffers(struct folio *folio)
{
struct address_space * const mapping = folio->mapping;
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 1340d77124ae4d..ee9caf7916fb95 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -795,8 +795,10 @@ static int ceph_writepage(struct page *page, struct writeback_control *wbc)
ihold(inode);
if (wbc->sync_mode == WB_SYNC_NONE &&
- ceph_inode_to_fs_client(inode)->write_congested)
+ ceph_inode_to_fs_client(inode)->write_congested) {
+ redirty_page_for_writepage(wbc, page);
return AOP_WRITEPAGE_ACTIVATE;
+ }
wait_on_page_fscache(page);
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 55051ad09c1919..c4941ba245ac3d 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -4783,13 +4783,13 @@ int ceph_drop_caps_for_unlink(struct inode *inode)
doutc(mdsc->fsc->client, "%p %llx.%llx\n", inode,
ceph_vinop(inode));
- spin_lock(&mdsc->cap_unlink_delay_lock);
+ spin_lock(&mdsc->cap_delay_lock);
ci->i_ceph_flags |= CEPH_I_FLUSH;
if (!list_empty(&ci->i_cap_delay_list))
list_del_init(&ci->i_cap_delay_list);
list_add_tail(&ci->i_cap_delay_list,
&mdsc->cap_unlink_delay_list);
- spin_unlock(&mdsc->cap_unlink_delay_lock);
+ spin_unlock(&mdsc->cap_delay_lock);
/*
* Fire the work immediately, because the MDS maybe
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 3ab9c268a8bb39..360b686c3c67cf 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2504,7 +2504,7 @@ static void ceph_cap_unlink_work(struct work_struct *work)
struct ceph_client *cl = mdsc->fsc->client;
doutc(cl, "begin\n");
- spin_lock(&mdsc->cap_unlink_delay_lock);
+ spin_lock(&mdsc->cap_delay_lock);
while (!list_empty(&mdsc->cap_unlink_delay_list)) {
struct ceph_inode_info *ci;
struct inode *inode;
@@ -2516,15 +2516,15 @@ static void ceph_cap_unlink_work(struct work_struct *work)
inode = igrab(&ci->netfs.inode);
if (inode) {
- spin_unlock(&mdsc->cap_unlink_delay_lock);
+ spin_unlock(&mdsc->cap_delay_lock);
doutc(cl, "on %p %llx.%llx\n", inode,
ceph_vinop(inode));
ceph_check_caps(ci, CHECK_CAPS_FLUSH);
iput(inode);
- spin_lock(&mdsc->cap_unlink_delay_lock);
+ spin_lock(&mdsc->cap_delay_lock);
}
}
- spin_unlock(&mdsc->cap_unlink_delay_lock);
+ spin_unlock(&mdsc->cap_delay_lock);
doutc(cl, "done\n");
}
@@ -5404,7 +5404,6 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
INIT_LIST_HEAD(&mdsc->cap_wait_list);
spin_lock_init(&mdsc->cap_delay_lock);
INIT_LIST_HEAD(&mdsc->cap_unlink_delay_list);
- spin_lock_init(&mdsc->cap_unlink_delay_lock);
INIT_LIST_HEAD(&mdsc->snap_flush_list);
spin_lock_init(&mdsc->snap_flush_lock);
mdsc->last_cap_flush_tid = 1;
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 03f8ff00874f72..b88e8041522412 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -461,9 +461,8 @@ struct ceph_mds_client {
struct delayed_work delayed_work; /* delayed work */
unsigned long last_renew_caps; /* last time we renewed our caps */
struct list_head cap_delay_list; /* caps with delayed release */
- spinlock_t cap_delay_lock; /* protects cap_delay_list */
struct list_head cap_unlink_delay_list; /* caps with delayed release for unlink */
- spinlock_t cap_unlink_delay_lock; /* protects cap_unlink_delay_list */
+ spinlock_t cap_delay_lock; /* protects cap_delay_list and cap_unlink_delay_list */
struct list_head snap_flush_list; /* cap_snaps ready to flush */
spinlock_t snap_flush_lock;
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 39e75131fd5aa0..9901057a15ba79 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -495,7 +495,7 @@ static void cramfs_kill_sb(struct super_block *sb)
sb->s_mtd = NULL;
} else if (IS_ENABLED(CONFIG_CRAMFS_BLOCKDEV) && sb->s_bdev) {
sync_blockdev(sb->s_bdev);
- fput(sb->s_bdev_file);
+ bdev_fput(sb->s_bdev_file);
}
kfree(sbi);
}
diff --git a/fs/crypto/inline_crypt.c b/fs/crypto/inline_crypt.c
index b4002aea7cdb9d..40de69860dcf97 100644
--- a/fs/crypto/inline_crypt.c
+++ b/fs/crypto/inline_crypt.c
@@ -284,7 +284,7 @@ static bool bh_get_inode_and_lblk_num(const struct buffer_head *bh,
const struct inode **inode_ret,
u64 *lblk_num_ret)
{
- struct page *page = bh->b_page;
+ struct folio *folio = bh->b_folio;
const struct address_space *mapping;
const struct inode *inode;
@@ -292,13 +292,13 @@ static bool bh_get_inode_and_lblk_num(const struct buffer_head *bh,
* The ext4 journal (jbd2) can submit a buffer_head it directly created
* for a non-pagecache page. fscrypt doesn't care about these.
*/
- mapping = page_mapping(page);
+ mapping = folio_mapping(folio);
if (!mapping)
return false;
inode = mapping->host;
*inode_ret = inode;
- *lblk_num_ret = ((u64)page->index << (PAGE_SHIFT - inode->i_blkbits)) +
+ *lblk_num_ret = ((u64)folio->index << (PAGE_SHIFT - inode->i_blkbits)) +
(bh_offset(bh) >> inode->i_blkbits);
return true;
}
diff --git a/fs/dax.c b/fs/dax.c
index 423fc1607dfae5..becb4a6920c6aa 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1207,17 +1207,17 @@ static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf,
struct vm_area_struct *vma = vmf->vma;
struct inode *inode = mapping->host;
pgtable_t pgtable = NULL;
- struct page *zero_page;
+ struct folio *zero_folio;
spinlock_t *ptl;
pmd_t pmd_entry;
pfn_t pfn;
- zero_page = mm_get_huge_zero_page(vmf->vma->vm_mm);
+ zero_folio = mm_get_huge_zero_folio(vmf->vma->vm_mm);
- if (unlikely(!zero_page))
+ if (unlikely(!zero_folio))
goto fallback;
- pfn = page_to_pfn_t(zero_page);
+ pfn = page_to_pfn_t(&zero_folio->page);
*entry = dax_insert_entry(xas, vmf, iter, *entry, pfn,
DAX_PMD | DAX_ZERO_PAGE);
@@ -1237,17 +1237,17 @@ static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf,
pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable);
mm_inc_nr_ptes(vma->vm_mm);
}
- pmd_entry = mk_pmd(zero_page, vmf->vma->vm_page_prot);
+ pmd_entry = mk_pmd(&zero_folio->page, vmf->vma->vm_page_prot);
pmd_entry = pmd_mkhuge(pmd_entry);
set_pmd_at(vmf->vma->vm_mm, pmd_addr, vmf->pmd, pmd_entry);
spin_unlock(ptl);
- trace_dax_pmd_load_hole(inode, vmf, zero_page, *entry);
+ trace_dax_pmd_load_hole(inode, vmf, zero_folio, *entry);
return VM_FAULT_NOPAGE;
fallback:
if (pgtable)
pte_free(vma->vm_mm, pgtable);
- trace_dax_pmd_load_hole_fallback(inode, vmf, zero_page, *entry);
+ trace_dax_pmd_load_hole_fallback(inode, vmf, zero_folio, *entry);
return VM_FAULT_FALLBACK;
}
#else
diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c
index 8aff1a724805a6..62da538d91cbd4 100644
--- a/fs/erofs/fscache.c
+++ b/fs/erofs/fscache.c
@@ -151,7 +151,7 @@ static int erofs_fscache_read_io_async(struct fscache_cookie *cookie,
if (WARN_ON(len == 0))
source = NETFS_INVALID_READ;
if (source != NETFS_READ_FROM_CACHE) {
- erofs_err(NULL, "prepare_read failed (source %d)", source);
+ erofs_err(NULL, "prepare_ondemand_read failed (source %d)", source);
return -EIO;
}
diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
index 39c67119f43bfd..d28ccfc0352b1a 100644
--- a/fs/erofs/internal.h
+++ b/fs/erofs/internal.h
@@ -84,13 +84,6 @@ struct erofs_dev_context {
bool flatdev;
};
-struct erofs_fs_context {
- struct erofs_mount_opts opt;
- struct erofs_dev_context *devs;
- char *fsid;
- char *domain_id;
-};
-
/* all filesystem-wide lz4 configurations */
struct erofs_sb_lz4_info {
/* # of pages needed for EROFS lz4 rolling decompression */
diff --git a/fs/erofs/super.c b/fs/erofs/super.c
index 69308fd73e4a92..30b49b2eee5340 100644
--- a/fs/erofs/super.c
+++ b/fs/erofs/super.c
@@ -370,18 +370,18 @@ out:
return ret;
}
-static void erofs_default_options(struct erofs_fs_context *ctx)
+static void erofs_default_options(struct erofs_sb_info *sbi)
{
#ifdef CONFIG_EROFS_FS_ZIP
- ctx->opt.cache_strategy = EROFS_ZIP_CACHE_READAROUND;
- ctx->opt.max_sync_decompress_pages = 3;
- ctx->opt.sync_decompress = EROFS_SYNC_DECOMPRESS_AUTO;
+ sbi->opt.cache_strategy = EROFS_ZIP_CACHE_READAROUND;
+ sbi->opt.max_sync_decompress_pages = 3;
+ sbi->opt.sync_decompress = EROFS_SYNC_DECOMPRESS_AUTO;
#endif
#ifdef CONFIG_EROFS_FS_XATTR
- set_opt(&ctx->opt, XATTR_USER);
+ set_opt(&sbi->opt, XATTR_USER);
#endif
#ifdef CONFIG_EROFS_FS_POSIX_ACL
- set_opt(&ctx->opt, POSIX_ACL);
+ set_opt(&sbi->opt, POSIX_ACL);
#endif
}
@@ -426,17 +426,16 @@ static const struct fs_parameter_spec erofs_fs_parameters[] = {
static bool erofs_fc_set_dax_mode(struct fs_context *fc, unsigned int mode)
{
#ifdef CONFIG_FS_DAX
- struct erofs_fs_context *ctx = fc->fs_private;
+ struct erofs_sb_info *sbi = fc->s_fs_info;
switch (mode) {
case EROFS_MOUNT_DAX_ALWAYS:
- warnfc(fc, "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
- set_opt(&ctx->opt, DAX_ALWAYS);
- clear_opt(&ctx->opt, DAX_NEVER);
+ set_opt(&sbi->opt, DAX_ALWAYS);
+ clear_opt(&sbi->opt, DAX_NEVER);
return true;
case EROFS_MOUNT_DAX_NEVER:
- set_opt(&ctx->opt, DAX_NEVER);
- clear_opt(&ctx->opt, DAX_ALWAYS);
+ set_opt(&sbi->opt, DAX_NEVER);
+ clear_opt(&sbi->opt, DAX_ALWAYS);
return true;
default:
DBG_BUGON(1);
@@ -451,7 +450,7 @@ static bool erofs_fc_set_dax_mode(struct fs_context *fc, unsigned int mode)
static int erofs_fc_parse_param(struct fs_context *fc,
struct fs_parameter *param)
{
- struct erofs_fs_context *ctx = fc->fs_private;
+ struct erofs_sb_info *sbi = fc->s_fs_info;
struct fs_parse_result result;
struct erofs_device_info *dif;
int opt, ret;
@@ -464,9 +463,9 @@ static int erofs_fc_parse_param(struct fs_context *fc,
case Opt_user_xattr:
#ifdef CONFIG_EROFS_FS_XATTR
if (result.boolean)
- set_opt(&ctx->opt, XATTR_USER);
+ set_opt(&sbi->opt, XATTR_USER);
else
- clear_opt(&ctx->opt, XATTR_USER);
+ clear_opt(&sbi->opt, XATTR_USER);
#else
errorfc(fc, "{,no}user_xattr options not supported");
#endif
@@ -474,16 +473,16 @@ static int erofs_fc_parse_param(struct fs_context *fc,
case Opt_acl:
#ifdef CONFIG_EROFS_FS_POSIX_ACL
if (result.boolean)
- set_opt(&ctx->opt, POSIX_ACL);
+ set_opt(&sbi->opt, POSIX_ACL);
else
- clear_opt(&ctx->opt, POSIX_ACL);
+ clear_opt(&sbi->opt, POSIX_ACL);
#else
errorfc(fc, "{,no}acl options not supported");
#endif
break;
case Opt_cache_strategy:
#ifdef CONFIG_EROFS_FS_ZIP
- ctx->opt.cache_strategy = result.uint_32;
+ sbi->opt.cache_strategy = result.uint_32;
#else
errorfc(fc, "compression not supported, cache_strategy ignored");
#endif
@@ -505,27 +504,27 @@ static int erofs_fc_parse_param(struct fs_context *fc,
kfree(dif);
return -ENOMEM;
}
- down_write(&ctx->devs->rwsem);
- ret = idr_alloc(&ctx->devs->tree, dif, 0, 0, GFP_KERNEL);
- up_write(&ctx->devs->rwsem);
+ down_write(&sbi->devs->rwsem);
+ ret = idr_alloc(&sbi->devs->tree, dif, 0, 0, GFP_KERNEL);
+ up_write(&sbi->devs->rwsem);
if (ret < 0) {
kfree(dif->path);
kfree(dif);
return ret;
}
- ++ctx->devs->extra_devices;
+ ++sbi->devs->extra_devices;
break;
#ifdef CONFIG_EROFS_FS_ONDEMAND
case Opt_fsid:
- kfree(ctx->fsid);
- ctx->fsid = kstrdup(param->string, GFP_KERNEL);
- if (!ctx->fsid)
+ kfree(sbi->fsid);
+ sbi->fsid = kstrdup(param->string, GFP_KERNEL);
+ if (!sbi->fsid)
return -ENOMEM;
break;
case Opt_domain_id:
- kfree(ctx->domain_id);
- ctx->domain_id = kstrdup(param->string, GFP_KERNEL);
- if (!ctx->domain_id)
+ kfree(sbi->domain_id);
+ sbi->domain_id = kstrdup(param->string, GFP_KERNEL);
+ if (!sbi->domain_id)
return -ENOMEM;
break;
#else
@@ -582,8 +581,7 @@ static const struct export_operations erofs_export_ops = {
static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
{
struct inode *inode;
- struct erofs_sb_info *sbi;
- struct erofs_fs_context *ctx = fc->fs_private;
+ struct erofs_sb_info *sbi = EROFS_SB(sb);
int err;
sb->s_magic = EROFS_SUPER_MAGIC;
@@ -591,19 +589,6 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
sb->s_maxbytes = MAX_LFS_FILESIZE;
sb->s_op = &erofs_sops;
- sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
- if (!sbi)
- return -ENOMEM;
-
- sb->s_fs_info = sbi;
- sbi->opt = ctx->opt;
- sbi->devs = ctx->devs;
- ctx->devs = NULL;
- sbi->fsid = ctx->fsid;
- ctx->fsid = NULL;
- sbi->domain_id = ctx->domain_id;
- ctx->domain_id = NULL;
-
sbi->blkszbits = PAGE_SHIFT;
if (erofs_is_fscache_mode(sb)) {
sb->s_blocksize = PAGE_SIZE;
@@ -707,9 +692,9 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
static int erofs_fc_get_tree(struct fs_context *fc)
{
- struct erofs_fs_context *ctx = fc->fs_private;
+ struct erofs_sb_info *sbi = fc->s_fs_info;
- if (IS_ENABLED(CONFIG_EROFS_FS_ONDEMAND) && ctx->fsid)
+ if (IS_ENABLED(CONFIG_EROFS_FS_ONDEMAND) && sbi->fsid)
return get_tree_nodev(fc, erofs_fc_fill_super);
return get_tree_bdev(fc, erofs_fc_fill_super);
@@ -719,19 +704,19 @@ static int erofs_fc_reconfigure(struct fs_context *fc)
{
struct super_block *sb = fc->root->d_sb;
struct erofs_sb_info *sbi = EROFS_SB(sb);
- struct erofs_fs_context *ctx = fc->fs_private;
+ struct erofs_sb_info *new_sbi = fc->s_fs_info;
DBG_BUGON(!sb_rdonly(sb));
- if (ctx->fsid || ctx->domain_id)
+ if (new_sbi->fsid || new_sbi->domain_id)
erofs_info(sb, "ignoring reconfiguration for fsid|domain_id.");
- if (test_opt(&ctx->opt, POSIX_ACL))
+ if (test_opt(&new_sbi->opt, POSIX_ACL))
fc->sb_flags |= SB_POSIXACL;
else
fc->sb_flags &= ~SB_POSIXACL;
- sbi->opt = ctx->opt;
+ sbi->opt = new_sbi->opt;
fc->sb_flags |= SB_RDONLY;
return 0;
@@ -762,12 +747,15 @@ static void erofs_free_dev_context(struct erofs_dev_context *devs)
static void erofs_fc_free(struct fs_context *fc)
{
- struct erofs_fs_context *ctx = fc->fs_private;
+ struct erofs_sb_info *sbi = fc->s_fs_info;
- erofs_free_dev_context(ctx->devs);
- kfree(ctx->fsid);
- kfree(ctx->domain_id);
- kfree(ctx);
+ if (!sbi)
+ return;
+
+ erofs_free_dev_context(sbi->devs);
+ kfree(sbi->fsid);
+ kfree(sbi->domain_id);
+ kfree(sbi);
}
static const struct fs_context_operations erofs_context_ops = {
@@ -779,38 +767,35 @@ static const struct fs_context_operations erofs_context_ops = {
static int erofs_init_fs_context(struct fs_context *fc)
{
- struct erofs_fs_context *ctx;
+ struct erofs_sb_info *sbi;
- ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
- if (!ctx)
+ sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
+ if (!sbi)
return -ENOMEM;
- ctx->devs = kzalloc(sizeof(struct erofs_dev_context), GFP_KERNEL);
- if (!ctx->devs) {
- kfree(ctx);
+
+ sbi->devs = kzalloc(sizeof(struct erofs_dev_context), GFP_KERNEL);
+ if (!sbi->devs) {
+ kfree(sbi);
return -ENOMEM;
}
- fc->fs_private = ctx;
+ fc->s_fs_info = sbi;
- idr_init(&ctx->devs->tree);
- init_rwsem(&ctx->devs->rwsem);
- erofs_default_options(ctx);
+ idr_init(&sbi->devs->tree);
+ init_rwsem(&sbi->devs->rwsem);
+ erofs_default_options(sbi);
fc->ops = &erofs_context_ops;
return 0;
}
static void erofs_kill_sb(struct super_block *sb)
{
- struct erofs_sb_info *sbi;
+ struct erofs_sb_info *sbi = EROFS_SB(sb);
- if (erofs_is_fscache_mode(sb))
+ if (IS_ENABLED(CONFIG_EROFS_FS_ONDEMAND) && sbi->fsid)
kill_anon_super(sb);
else
kill_block_super(sb);
- sbi = EROFS_SB(sb);
- if (!sbi)
- return;
-
erofs_free_dev_context(sbi->devs);
fs_put_dax(sbi->dax_dev, NULL);
erofs_fscache_unregister_fs(sb);
diff --git a/fs/exec.c b/fs/exec.c
index ff6f26671cfc02..0c5f06d08c355a 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -67,6 +67,7 @@
#include <linux/time_namespace.h>
#include <linux/user_events.h>
#include <linux/rseq.h>
+#include <linux/ksm.h>
#include <linux/uaccess.h>
#include <asm/mmu_context.h>
@@ -268,6 +269,14 @@ static int __bprm_mm_init(struct linux_binprm *bprm)
}
/*
+ * Need to be called with mmap write lock
+ * held, to avoid race with ksmd.
+ */
+ err = ksm_execve(mm);
+ if (err)
+ goto err_ksm;
+
+ /*
* Place the stack at the largest stack address the architecture
* supports. Later, we'll move this to an appropriate place. We don't
* use STACK_TOP because that can depend on attributes which aren't
@@ -288,6 +297,8 @@ static int __bprm_mm_init(struct linux_binprm *bprm)
bprm->p = vma->vm_end - sizeof(void *);
return 0;
err:
+ ksm_exit(mm);
+err_ksm:
mmap_write_unlock(mm);
err_free:
bprm->vma = NULL;
@@ -895,6 +906,7 @@ int transfer_args_to_stack(struct linux_binprm *bprm,
goto out;
}
+ bprm->exec += *sp_location - MAX_ARG_PAGES * PAGE_SIZE;
*sp_location = sp;
out:
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index cfb8449c731f9a..044135796f2b6e 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -5668,7 +5668,7 @@ failed_mount:
brelse(sbi->s_sbh);
if (sbi->s_journal_bdev_file) {
invalidate_bdev(file_bdev(sbi->s_journal_bdev_file));
- fput(sbi->s_journal_bdev_file);
+ bdev_fput(sbi->s_journal_bdev_file);
}
out_fail:
invalidate_bdev(sb->s_bdev);
@@ -5913,7 +5913,7 @@ static struct file *ext4_get_journal_blkdev(struct super_block *sb,
out_bh:
brelse(bh);
out_bdev:
- fput(bdev_file);
+ bdev_fput(bdev_file);
return ERR_PTR(errno);
}
@@ -5952,7 +5952,7 @@ static journal_t *ext4_open_dev_journal(struct super_block *sb,
out_journal:
jbd2_journal_destroy(journal);
out_bdev:
- fput(bdev_file);
+ bdev_fput(bdev_file);
return ERR_PTR(errno);
}
@@ -7327,7 +7327,7 @@ static void ext4_kill_sb(struct super_block *sb)
kill_block_super(sb);
if (bdev_file)
- fput(bdev_file);
+ bdev_fput(bdev_file);
}
static struct file_system_type ext4_fs_type = {
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index d9494b5fc7c185..961e6ff77c7235 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -4082,11 +4082,12 @@ const struct address_space_operations f2fs_dblock_aops = {
void f2fs_clear_page_cache_dirty_tag(struct page *page)
{
- struct address_space *mapping = page_mapping(page);
+ struct folio *folio = page_folio(page);
+ struct address_space *mapping = folio->mapping;
unsigned long flags;
xa_lock_irqsave(&mapping->i_pages, flags);
- __xa_clear_mark(&mapping->i_pages, page_index(page),
+ __xa_clear_mark(&mapping->i_pages, folio->index,
PAGECACHE_TAG_DIRTY);
xa_unlock_irqrestore(&mapping->i_pages, flags);
}
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index a6867f26f14183..a4bc26dfdb1af5 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1558,7 +1558,7 @@ static void destroy_device_list(struct f2fs_sb_info *sbi)
for (i = 0; i < sbi->s_ndevs; i++) {
if (i > 0)
- fput(FDEV(i).bdev_file);
+ bdev_fput(FDEV(i).bdev_file);
#ifdef CONFIG_BLK_DEV_ZONED
kvfree(FDEV(i).blkz_seq);
#endif
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index 00235b8a1823af..acbec5bdd5210a 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -269,6 +269,18 @@ enum { PARSE_INVALID = 1, PARSE_NOT_LONGNAME, PARSE_EOF, };
/**
* fat_parse_long - Parse extended directory entry.
*
+ * @dir: Pointer to the inode that represents the directory.
+ * @pos: On input, contains the starting position to read from.
+ * On output, updated with the new position.
+ * @bh: Pointer to the buffer head that may be used for reading directory
+ * entries. May be updated.
+ * @de: On input, points to the current directory entry.
+ * On output, points to the next directory entry.
+ * @unicode: Pointer to a buffer where the parsed Unicode long filename will be
+ * stored.
+ * @nr_slots: Pointer to a variable that will store the number of longname
+ * slots found.
+ *
* This function returns zero on success, negative value on error, or one of
* the following:
*
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
index b6cad106c37e44..0b2da7b7e2ad01 100644
--- a/fs/fuse/cuse.c
+++ b/fs/fuse/cuse.c
@@ -310,6 +310,10 @@ struct cuse_init_args {
/**
* cuse_process_init_reply - finish initializing CUSE channel
*
+ * @fm: The fuse mount information containing the CUSE connection.
+ * @args: The arguments passed to the init reply.
+ * @error: The error code signifying if any error occurred during the process.
+ *
* This function creates the character device and sets up all the
* required data structures for it. Please read the comment at the
* top of this file for high level overview.
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 4a6df591add61c..2b0d4781f39484 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1321,6 +1321,7 @@ retry:
err = fuse_do_statx(inode, file, stat);
if (err == -ENOSYS) {
fc->no_statx = 1;
+ err = 0;
goto retry;
}
} else {
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index a56e7bffd0004e..b57ce41576407b 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1362,7 +1362,7 @@ static void fuse_dio_lock(struct kiocb *iocb, struct iov_iter *from,
bool *exclusive)
{
struct inode *inode = file_inode(iocb->ki_filp);
- struct fuse_file *ff = iocb->ki_filp->private_data;
+ struct fuse_inode *fi = get_fuse_inode(inode);
*exclusive = fuse_dio_wr_exclusive_lock(iocb, from);
if (*exclusive) {
@@ -1377,7 +1377,7 @@ static void fuse_dio_lock(struct kiocb *iocb, struct iov_iter *from,
* have raced, so check it again.
*/
if (fuse_io_past_eof(iocb, from) ||
- fuse_file_uncached_io_start(inode, ff, NULL) != 0) {
+ fuse_inode_uncached_io_start(fi, NULL) != 0) {
inode_unlock_shared(inode);
inode_lock(inode);
*exclusive = true;
@@ -1388,13 +1388,13 @@ static void fuse_dio_lock(struct kiocb *iocb, struct iov_iter *from,
static void fuse_dio_unlock(struct kiocb *iocb, bool exclusive)
{
struct inode *inode = file_inode(iocb->ki_filp);
- struct fuse_file *ff = iocb->ki_filp->private_data;
+ struct fuse_inode *fi = get_fuse_inode(inode);
if (exclusive) {
inode_unlock(inode);
} else {
/* Allow opens in caching mode after last parallel dio end */
- fuse_file_uncached_io_end(inode, ff);
+ fuse_inode_uncached_io_end(fi);
inode_unlock_shared(inode);
}
}
@@ -2574,8 +2574,10 @@ static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma)
* First mmap of direct_io file enters caching inode io mode.
* Also waits for parallel dio writers to go into serial mode
* (exclusive instead of shared lock).
+ * After first mmap, the inode stays in caching io mode until
+ * the direct_io file release.
*/
- rc = fuse_file_cached_io_start(inode, ff);
+ rc = fuse_file_cached_io_open(inode, ff);
if (rc)
return rc;
}
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index b24084b60864ee..f2391961031374 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -1394,9 +1394,10 @@ int fuse_fileattr_set(struct mnt_idmap *idmap,
struct dentry *dentry, struct fileattr *fa);
/* iomode.c */
-int fuse_file_cached_io_start(struct inode *inode, struct fuse_file *ff);
-int fuse_file_uncached_io_start(struct inode *inode, struct fuse_file *ff, struct fuse_backing *fb);
-void fuse_file_uncached_io_end(struct inode *inode, struct fuse_file *ff);
+int fuse_file_cached_io_open(struct inode *inode, struct fuse_file *ff);
+int fuse_inode_uncached_io_start(struct fuse_inode *fi,
+ struct fuse_backing *fb);
+void fuse_inode_uncached_io_end(struct fuse_inode *fi);
int fuse_file_io_open(struct file *file, struct inode *inode);
void fuse_file_io_release(struct fuse_file *ff, struct inode *inode);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 3a5d888783353c..99e44ea7d8756d 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -175,6 +175,7 @@ static void fuse_evict_inode(struct inode *inode)
}
}
if (S_ISREG(inode->i_mode) && !fuse_is_bad(inode)) {
+ WARN_ON(fi->iocachectr != 0);
WARN_ON(!list_empty(&fi->write_files));
WARN_ON(!list_empty(&fi->queued_writes));
}
diff --git a/fs/fuse/iomode.c b/fs/fuse/iomode.c
index c653ddcf057872..c99e285f3183ef 100644
--- a/fs/fuse/iomode.c
+++ b/fs/fuse/iomode.c
@@ -21,12 +21,13 @@ static inline bool fuse_is_io_cache_wait(struct fuse_inode *fi)
}
/*
- * Start cached io mode.
+ * Called on cached file open() and on first mmap() of direct_io file.
+ * Takes cached_io inode mode reference to be dropped on file release.
*
* Blocks new parallel dio writes and waits for the in-progress parallel dio
* writes to complete.
*/
-int fuse_file_cached_io_start(struct inode *inode, struct fuse_file *ff)
+int fuse_file_cached_io_open(struct inode *inode, struct fuse_file *ff)
{
struct fuse_inode *fi = get_fuse_inode(inode);
@@ -67,10 +68,9 @@ int fuse_file_cached_io_start(struct inode *inode, struct fuse_file *ff)
return 0;
}
-static void fuse_file_cached_io_end(struct inode *inode, struct fuse_file *ff)
+static void fuse_file_cached_io_release(struct fuse_file *ff,
+ struct fuse_inode *fi)
{
- struct fuse_inode *fi = get_fuse_inode(inode);
-
spin_lock(&fi->lock);
WARN_ON(fi->iocachectr <= 0);
WARN_ON(ff->iomode != IOM_CACHED);
@@ -82,16 +82,15 @@ static void fuse_file_cached_io_end(struct inode *inode, struct fuse_file *ff)
}
/* Start strictly uncached io mode where cache access is not allowed */
-int fuse_file_uncached_io_start(struct inode *inode, struct fuse_file *ff, struct fuse_backing *fb)
+int fuse_inode_uncached_io_start(struct fuse_inode *fi, struct fuse_backing *fb)
{
- struct fuse_inode *fi = get_fuse_inode(inode);
struct fuse_backing *oldfb;
int err = 0;
spin_lock(&fi->lock);
/* deny conflicting backing files on same fuse inode */
oldfb = fuse_inode_backing(fi);
- if (oldfb && oldfb != fb) {
+ if (fb && oldfb && oldfb != fb) {
err = -EBUSY;
goto unlock;
}
@@ -99,12 +98,10 @@ int fuse_file_uncached_io_start(struct inode *inode, struct fuse_file *ff, struc
err = -ETXTBSY;
goto unlock;
}
- WARN_ON(ff->iomode != IOM_NONE);
fi->iocachectr--;
- ff->iomode = IOM_UNCACHED;
/* fuse inode holds a single refcount of backing file */
- if (!oldfb) {
+ if (fb && !oldfb) {
oldfb = fuse_inode_backing_set(fi, fb);
WARN_ON_ONCE(oldfb != NULL);
} else {
@@ -115,15 +112,29 @@ unlock:
return err;
}
-void fuse_file_uncached_io_end(struct inode *inode, struct fuse_file *ff)
+/* Takes uncached_io inode mode reference to be dropped on file release */
+static int fuse_file_uncached_io_open(struct inode *inode,
+ struct fuse_file *ff,
+ struct fuse_backing *fb)
{
struct fuse_inode *fi = get_fuse_inode(inode);
+ int err;
+
+ err = fuse_inode_uncached_io_start(fi, fb);
+ if (err)
+ return err;
+
+ WARN_ON(ff->iomode != IOM_NONE);
+ ff->iomode = IOM_UNCACHED;
+ return 0;
+}
+
+void fuse_inode_uncached_io_end(struct fuse_inode *fi)
+{
struct fuse_backing *oldfb = NULL;
spin_lock(&fi->lock);
WARN_ON(fi->iocachectr >= 0);
- WARN_ON(ff->iomode != IOM_UNCACHED);
- ff->iomode = IOM_NONE;
fi->iocachectr++;
if (!fi->iocachectr) {
wake_up(&fi->direct_io_waitq);
@@ -134,6 +145,15 @@ void fuse_file_uncached_io_end(struct inode *inode, struct fuse_file *ff)
fuse_backing_put(oldfb);
}
+/* Drop uncached_io reference from passthrough open */
+static void fuse_file_uncached_io_release(struct fuse_file *ff,
+ struct fuse_inode *fi)
+{
+ WARN_ON(ff->iomode != IOM_UNCACHED);
+ ff->iomode = IOM_NONE;
+ fuse_inode_uncached_io_end(fi);
+}
+
/*
* Open flags that are allowed in combination with FOPEN_PASSTHROUGH.
* A combination of FOPEN_PASSTHROUGH and FOPEN_DIRECT_IO means that read/write
@@ -163,7 +183,7 @@ static int fuse_file_passthrough_open(struct inode *inode, struct file *file)
return PTR_ERR(fb);
/* First passthrough file open denies caching inode io mode */
- err = fuse_file_uncached_io_start(inode, ff, fb);
+ err = fuse_file_uncached_io_open(inode, ff, fb);
if (!err)
return 0;
@@ -216,7 +236,7 @@ int fuse_file_io_open(struct file *file, struct inode *inode)
if (ff->open_flags & FOPEN_PASSTHROUGH)
err = fuse_file_passthrough_open(inode, file);
else
- err = fuse_file_cached_io_start(inode, ff);
+ err = fuse_file_cached_io_open(inode, ff);
if (err)
goto fail;
@@ -236,8 +256,10 @@ fail:
/* No more pending io and no new io possible to inode via open/mmapped file */
void fuse_file_io_release(struct fuse_file *ff, struct inode *inode)
{
+ struct fuse_inode *fi = get_fuse_inode(inode);
+
/*
- * Last parallel dio close allows caching inode io mode.
+ * Last passthrough file close allows caching inode io mode.
* Last caching file close exits caching inode io mode.
*/
switch (ff->iomode) {
@@ -245,10 +267,10 @@ void fuse_file_io_release(struct fuse_file *ff, struct inode *inode)
/* Nothing to do */
break;
case IOM_UNCACHED:
- fuse_file_uncached_io_end(inode, ff);
+ fuse_file_uncached_io_release(ff, fi);
break;
case IOM_CACHED:
- fuse_file_cached_io_end(inode, ff);
+ fuse_file_cached_io_release(ff, fi);
break;
}
}
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 789af5c8fade9d..aa1626955b2cf5 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -1718,7 +1718,8 @@ static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length)
struct buffer_head *dibh, *bh;
struct gfs2_holder rd_gh;
unsigned int bsize_shift = sdp->sd_sb.sb_bsize_shift;
- u64 lblock = (offset + (1 << bsize_shift) - 1) >> bsize_shift;
+ unsigned int bsize = 1 << bsize_shift;
+ u64 lblock = (offset + bsize - 1) >> bsize_shift;
__u16 start_list[GFS2_MAX_META_HEIGHT];
__u16 __end_list[GFS2_MAX_META_HEIGHT], *end_list = NULL;
unsigned int start_aligned, end_aligned;
@@ -1729,7 +1730,7 @@ static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length)
u64 prev_bnr = 0;
__be64 *start, *end;
- if (offset >= maxsize) {
+ if (offset + bsize - 1 >= maxsize) {
/*
* The starting point lies beyond the allocated metadata;
* there are no blocks to deallocate.
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 6502c7e776d195..2f4e88552d3f28 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -176,14 +176,12 @@ hugetlb_get_unmapped_area_bottomup(struct file *file, unsigned long addr,
unsigned long len, unsigned long pgoff, unsigned long flags)
{
struct hstate *h = hstate_file(file);
- struct vm_unmapped_area_info info;
+ struct vm_unmapped_area_info info = {};
- info.flags = 0;
info.length = len;
info.low_limit = current->mm->mmap_base;
info.high_limit = arch_get_mmap_end(addr, len, flags);
info.align_mask = PAGE_MASK & ~huge_page_mask(h);
- info.align_offset = 0;
return vm_unmapped_area(&info);
}
@@ -192,14 +190,13 @@ hugetlb_get_unmapped_area_topdown(struct file *file, unsigned long addr,
unsigned long len, unsigned long pgoff, unsigned long flags)
{
struct hstate *h = hstate_file(file);
- struct vm_unmapped_area_info info;
+ struct vm_unmapped_area_info info = {};
info.flags = VM_UNMAPPED_AREA_TOPDOWN;
info.length = len;
info.low_limit = PAGE_SIZE;
info.high_limit = arch_get_mmap_base(addr, current->mm->mmap_base);
info.align_mask = PAGE_MASK & ~huge_page_mask(h);
- info.align_offset = 0;
addr = vm_unmapped_area(&info);
/*
@@ -249,11 +246,11 @@ generic_hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
}
/*
- * Use mm->get_unmapped_area value as a hint to use topdown routine.
+ * Use MMF_TOPDOWN flag as a hint to use topdown routine.
* If architectures have special needs, they should define their own
* version of hugetlb_get_unmapped_area.
*/
- if (mm->get_unmapped_area == arch_get_unmapped_area_topdown)
+ if (test_bit(MMF_TOPDOWN, &mm->flags))
return hugetlb_get_unmapped_area_topdown(file, addr, len,
pgoff, flags);
return hugetlb_get_unmapped_area_bottomup(file, addr, len,
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 1d5abfdf0f22a6..fb0628e680c40f 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -769,7 +769,7 @@ static int ioctl_getfsuuid(struct file *file, void __user *argp)
struct fsuuid2 u = { .len = sb->s_uuid_len, };
if (!sb->s_uuid_len)
- return -ENOIOCTLCMD;
+ return -ENOTTY;
memcpy(&u.uuid[0], &sb->s_uuid, sb->s_uuid_len);
@@ -781,7 +781,7 @@ static int ioctl_get_fs_sysfs_path(struct file *file, void __user *argp)
struct super_block *sb = file_inode(file)->i_sb;
if (!strlen(sb->s_sysfs_name))
- return -ENOIOCTLCMD;
+ return -ENOTTY;
struct fs_sysfs_path u = {};
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index 73389c68e25170..9609349e92e5e1 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -1141,7 +1141,7 @@ journal_found:
lbmLogShutdown(log);
close: /* close external log device */
- fput(bdev_file);
+ bdev_fput(bdev_file);
free: /* free log descriptor */
mutex_unlock(&jfs_log_mutex);
@@ -1485,7 +1485,7 @@ int lmLogClose(struct super_block *sb)
bdev_file = log->bdev_file;
rc = lmLogShutdown(log);
- fput(bdev_file);
+ bdev_fput(bdev_file);
kfree(log);
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index e9df2f87072c68..8502ef68459b98 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -636,11 +636,18 @@ static int kernfs_fop_open(struct inode *inode, struct file *file)
* each file a separate locking class. Let's differentiate on
* whether the file has mmap or not for now.
*
- * Both paths of the branch look the same. They're supposed to
+ * For similar reasons, writable and readonly files are given different
+ * lockdep key, because the writable file /sys/power/resume may call vfs
+ * lookup helpers for arbitrary paths and readonly files can be read by
+ * overlayfs from vfs helpers when sysfs is a lower layer of overalyfs.
+ *
+ * All three cases look the same. They're supposed to
* look that way and give @of->mutex different static lockdep keys.
*/
if (has_mmap)
mutex_init(&of->mutex);
+ else if (file->f_mode & FMODE_WRITE)
+ mutex_init(&of->mutex);
else
mutex_init(&of->mutex);
diff --git a/fs/namei.c b/fs/namei.c
index ceb9ddf8dfdd4e..c5b2a25be7d048 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -4050,6 +4050,8 @@ retry:
case 0: case S_IFREG:
error = vfs_create(idmap, path.dentry->d_inode,
dentry, mode, true);
+ if (!error)
+ security_path_post_mknod(idmap, dentry);
break;
case S_IFCHR: case S_IFBLK:
error = vfs_mknod(idmap, path.dentry->d_inode,
@@ -4060,11 +4062,6 @@ retry:
dentry, mode, 0);
break;
}
-
- if (error)
- goto out2;
-
- security_path_post_mknod(idmap, dentry);
out2:
done_path_create(&path, dentry);
if (retry_estale(error, lookup_flags)) {
diff --git a/fs/netfs/buffered_write.c b/fs/netfs/buffered_write.c
index 9a0d32e4b422ad..267b622d923b1f 100644
--- a/fs/netfs/buffered_write.c
+++ b/fs/netfs/buffered_write.c
@@ -164,7 +164,7 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
enum netfs_how_to_modify howto;
enum netfs_folio_trace trace;
unsigned int bdp_flags = (iocb->ki_flags & IOCB_SYNC) ? 0: BDP_ASYNC;
- ssize_t written = 0, ret;
+ ssize_t written = 0, ret, ret2;
loff_t i_size, pos = iocb->ki_pos, from, to;
size_t max_chunk = PAGE_SIZE << MAX_PAGECACHE_ORDER;
bool maybe_trouble = false;
@@ -172,15 +172,14 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
if (unlikely(test_bit(NETFS_ICTX_WRITETHROUGH, &ctx->flags) ||
iocb->ki_flags & (IOCB_DSYNC | IOCB_SYNC))
) {
- if (pos < i_size_read(inode)) {
- ret = filemap_write_and_wait_range(mapping, pos, pos + iter->count);
- if (ret < 0) {
- goto out;
- }
- }
-
wbc_attach_fdatawrite_inode(&wbc, mapping->host);
+ ret = filemap_write_and_wait_range(mapping, pos, pos + iter->count);
+ if (ret < 0) {
+ wbc_detach_inode(&wbc);
+ goto out;
+ }
+
wreq = netfs_begin_writethrough(iocb, iter->count);
if (IS_ERR(wreq)) {
wbc_detach_inode(&wbc);
@@ -395,10 +394,12 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
out:
if (unlikely(wreq)) {
- ret = netfs_end_writethrough(wreq, iocb);
+ ret2 = netfs_end_writethrough(wreq, iocb);
wbc_detach_inode(&wbc);
- if (ret == -EIOCBQUEUED)
- return ret;
+ if (ret2 == -EIOCBQUEUED)
+ return ret2;
+ if (ret == 0)
+ ret = ret2;
}
iocb->ki_pos += written;
diff --git a/fs/nfs/iostat.h b/fs/nfs/iostat.h
index 5aa776b5a3e7f8..b17a9eb9b1488b 100644
--- a/fs/nfs/iostat.h
+++ b/fs/nfs/iostat.h
@@ -46,10 +46,7 @@ static inline void nfs_add_stats(const struct inode *inode,
nfs_add_server_stats(NFS_SERVER(inode), stat, addend);
}
-static inline struct nfs_iostats __percpu *nfs_alloc_iostats(void)
-{
- return alloc_percpu(struct nfs_iostats);
-}
+#define nfs_alloc_iostats() alloc_percpu(struct nfs_iostats)
static inline void nfs_free_iostats(struct nfs_iostats __percpu *stats)
{
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 87c9547989f69e..e88aca0c6e8ef1 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -983,15 +983,7 @@ static struct workqueue_struct *callback_wq;
static bool nfsd4_queue_cb(struct nfsd4_callback *cb)
{
trace_nfsd_cb_queue(cb->cb_clp, cb);
- return queue_delayed_work(callback_wq, &cb->cb_work, 0);
-}
-
-static void nfsd4_queue_cb_delayed(struct nfsd4_callback *cb,
- unsigned long msecs)
-{
- trace_nfsd_cb_queue(cb->cb_clp, cb);
- queue_delayed_work(callback_wq, &cb->cb_work,
- msecs_to_jiffies(msecs));
+ return queue_work(callback_wq, &cb->cb_work);
}
static void nfsd41_cb_inflight_begin(struct nfs4_client *clp)
@@ -1490,7 +1482,7 @@ static void
nfsd4_run_cb_work(struct work_struct *work)
{
struct nfsd4_callback *cb =
- container_of(work, struct nfsd4_callback, cb_work.work);
+ container_of(work, struct nfsd4_callback, cb_work);
struct nfs4_client *clp = cb->cb_clp;
struct rpc_clnt *clnt;
int flags;
@@ -1502,16 +1494,8 @@ nfsd4_run_cb_work(struct work_struct *work)
clnt = clp->cl_cb_client;
if (!clnt) {
- if (test_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags))
- nfsd41_destroy_cb(cb);
- else {
- /*
- * XXX: Ideally, we could wait for the client to
- * reconnect, but I haven't figured out how
- * to do that yet.
- */
- nfsd4_queue_cb_delayed(cb, 25);
- }
+ /* Callback channel broken, or client killed; give up: */
+ nfsd41_destroy_cb(cb);
return;
}
@@ -1544,7 +1528,7 @@ void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp,
cb->cb_msg.rpc_argp = cb;
cb->cb_msg.rpc_resp = cb;
cb->cb_ops = ops;
- INIT_DELAYED_WORK(&cb->cb_work, nfsd4_run_cb_work);
+ INIT_WORK(&cb->cb_work, nfsd4_run_cb_work);
cb->cb_status = 0;
cb->cb_need_restart = false;
cb->cb_holds_slot = false;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 1a93c7fcf76c55..84d4093ca71317 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -3042,12 +3042,9 @@ static void
nfsd4_cb_recall_any_release(struct nfsd4_callback *cb)
{
struct nfs4_client *clp = cb->cb_clp;
- struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
- spin_lock(&nn->client_lock);
clear_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags);
- put_client_renew_locked(clp);
- spin_unlock(&nn->client_lock);
+ drop_client(clp);
}
static int
@@ -3831,15 +3828,20 @@ nfsd4_create_session(struct svc_rqst *rqstp,
else
cs_slot = &unconf->cl_cs_slot;
status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0);
- if (status) {
- if (status == nfserr_replay_cache) {
- status = nfsd4_replay_create_session(cr_ses, cs_slot);
- goto out_free_conn;
- }
+ switch (status) {
+ case nfs_ok:
+ cs_slot->sl_seqid++;
+ cr_ses->seqid = cs_slot->sl_seqid;
+ break;
+ case nfserr_replay_cache:
+ status = nfsd4_replay_create_session(cr_ses, cs_slot);
+ fallthrough;
+ case nfserr_jukebox:
+ /* The server MUST NOT cache NFS4ERR_DELAY */
+ goto out_free_conn;
+ default:
goto out_cache_error;
}
- cs_slot->sl_seqid++;
- cr_ses->seqid = cs_slot->sl_seqid;
/* RFC 8881 Section 18.36.4 Phase 3: Client ID confirmation. */
if (conf) {
@@ -3859,10 +3861,8 @@ nfsd4_create_session(struct svc_rqst *rqstp,
old = find_confirmed_client_by_name(&unconf->cl_name, nn);
if (old) {
status = mark_client_expired_locked(old);
- if (status) {
- old = NULL;
- goto out_cache_error;
- }
+ if (status)
+ goto out_expired_error;
trace_nfsd_clid_replaced(&old->cl_clientid);
}
move_to_confirmed(unconf);
@@ -3894,6 +3894,17 @@ nfsd4_create_session(struct svc_rqst *rqstp,
expire_client(old);
return status;
+out_expired_error:
+ old = NULL;
+ /*
+ * Revert the slot seq_nr change so the server will process
+ * the client's resend instead of returning a cached response.
+ */
+ if (status == nfserr_jukebox) {
+ cs_slot->sl_seqid--;
+ cr_ses->seqid = cs_slot->sl_seqid;
+ goto out_free_conn;
+ }
out_cache_error:
nfsd4_cache_create_session(cr_ses, cs_slot, status);
out_free_conn:
@@ -6602,7 +6613,7 @@ deleg_reaper(struct nfsd_net *nn)
list_add(&clp->cl_ra_cblist, &cblist);
/* release in nfsd4_cb_recall_any_release */
- atomic_inc(&clp->cl_rpc_users);
+ kref_get(&clp->cl_nfsdfs.cl_ref);
set_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags);
clp->cl_ra_time = ktime_get_boottime_seconds();
}
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index fac938f563ad02..a644460f3a5e7a 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -3490,11 +3490,13 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
struct dentry *dentry, const u32 *bmval,
int ignore_crossmnt)
{
+ DECLARE_BITMAP(attr_bitmap, ARRAY_SIZE(nfsd4_enc_fattr4_encode_ops));
struct nfsd4_fattr_args args;
struct svc_fh *tempfh = NULL;
int starting_len = xdr->buf->len;
__be32 *attrlen_p, status;
int attrlen_offset;
+ u32 attrmask[3];
int err;
struct nfsd4_compoundres *resp = rqstp->rq_resp;
u32 minorversion = resp->cstate.minorversion;
@@ -3502,10 +3504,6 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
.mnt = exp->ex_path.mnt,
.dentry = dentry,
};
- union {
- u32 attrmask[3];
- unsigned long mask[2];
- } u;
unsigned long bit;
bool file_modified = false;
u64 size = 0;
@@ -3517,24 +3515,24 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
args.exp = exp;
args.dentry = dentry;
args.ignore_crossmnt = (ignore_crossmnt != 0);
+ args.acl = NULL;
/*
* Make a local copy of the attribute bitmap that can be modified.
*/
- memset(&u, 0, sizeof(u));
- u.attrmask[0] = bmval[0];
- u.attrmask[1] = bmval[1];
- u.attrmask[2] = bmval[2];
+ attrmask[0] = bmval[0];
+ attrmask[1] = bmval[1];
+ attrmask[2] = bmval[2];
args.rdattr_err = 0;
if (exp->ex_fslocs.migrated) {
- status = fattr_handle_absent_fs(&u.attrmask[0], &u.attrmask[1],
- &u.attrmask[2], &args.rdattr_err);
+ status = fattr_handle_absent_fs(&attrmask[0], &attrmask[1],
+ &attrmask[2], &args.rdattr_err);
if (status)
goto out;
}
args.size = 0;
- if (u.attrmask[0] & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) {
+ if (attrmask[0] & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) {
status = nfsd4_deleg_getattr_conflict(rqstp, d_inode(dentry),
&file_modified, &size);
if (status)
@@ -3553,16 +3551,16 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
if (!(args.stat.result_mask & STATX_BTIME))
/* underlying FS does not offer btime so we can't share it */
- u.attrmask[1] &= ~FATTR4_WORD1_TIME_CREATE;
- if ((u.attrmask[0] & (FATTR4_WORD0_FILES_AVAIL | FATTR4_WORD0_FILES_FREE |
+ attrmask[1] &= ~FATTR4_WORD1_TIME_CREATE;
+ if ((attrmask[0] & (FATTR4_WORD0_FILES_AVAIL | FATTR4_WORD0_FILES_FREE |
FATTR4_WORD0_FILES_TOTAL | FATTR4_WORD0_MAXNAME)) ||
- (u.attrmask[1] & (FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE |
+ (attrmask[1] & (FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE |
FATTR4_WORD1_SPACE_TOTAL))) {
err = vfs_statfs(&path, &args.statfs);
if (err)
goto out_nfserr;
}
- if ((u.attrmask[0] & (FATTR4_WORD0_FILEHANDLE | FATTR4_WORD0_FSID)) &&
+ if ((attrmask[0] & (FATTR4_WORD0_FILEHANDLE | FATTR4_WORD0_FSID)) &&
!fhp) {
tempfh = kmalloc(sizeof(struct svc_fh), GFP_KERNEL);
status = nfserr_jukebox;
@@ -3576,11 +3574,10 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
} else
args.fhp = fhp;
- args.acl = NULL;
- if (u.attrmask[0] & FATTR4_WORD0_ACL) {
+ if (attrmask[0] & FATTR4_WORD0_ACL) {
err = nfsd4_get_nfs4_acl(rqstp, dentry, &args.acl);
if (err == -EOPNOTSUPP)
- u.attrmask[0] &= ~FATTR4_WORD0_ACL;
+ attrmask[0] &= ~FATTR4_WORD0_ACL;
else if (err == -EINVAL) {
status = nfserr_attrnotsupp;
goto out;
@@ -3592,17 +3589,17 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
args.context = NULL;
- if ((u.attrmask[2] & FATTR4_WORD2_SECURITY_LABEL) ||
- u.attrmask[0] & FATTR4_WORD0_SUPPORTED_ATTRS) {
+ if ((attrmask[2] & FATTR4_WORD2_SECURITY_LABEL) ||
+ attrmask[0] & FATTR4_WORD0_SUPPORTED_ATTRS) {
if (exp->ex_flags & NFSEXP_SECURITY_LABEL)
err = security_inode_getsecctx(d_inode(dentry),
&args.context, &args.contextlen);
else
err = -EOPNOTSUPP;
args.contextsupport = (err == 0);
- if (u.attrmask[2] & FATTR4_WORD2_SECURITY_LABEL) {
+ if (attrmask[2] & FATTR4_WORD2_SECURITY_LABEL) {
if (err == -EOPNOTSUPP)
- u.attrmask[2] &= ~FATTR4_WORD2_SECURITY_LABEL;
+ attrmask[2] &= ~FATTR4_WORD2_SECURITY_LABEL;
else if (err)
goto out_nfserr;
}
@@ -3610,8 +3607,8 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
#endif /* CONFIG_NFSD_V4_SECURITY_LABEL */
/* attrmask */
- status = nfsd4_encode_bitmap4(xdr, u.attrmask[0],
- u.attrmask[1], u.attrmask[2]);
+ status = nfsd4_encode_bitmap4(xdr, attrmask[0], attrmask[1],
+ attrmask[2]);
if (status)
goto out;
@@ -3620,7 +3617,9 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
attrlen_p = xdr_reserve_space(xdr, XDR_UNIT);
if (!attrlen_p)
goto out_resource;
- for_each_set_bit(bit, (const unsigned long *)&u.mask,
+ bitmap_from_arr32(attr_bitmap, attrmask,
+ ARRAY_SIZE(nfsd4_enc_fattr4_encode_ops));
+ for_each_set_bit(bit, attr_bitmap,
ARRAY_SIZE(nfsd4_enc_fattr4_encode_ops)) {
status = nfsd4_enc_fattr4_encode_ops[bit](xdr, &args);
if (status != nfs_ok)
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 01c6f344564693..2ed0fcf879fd17 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -68,7 +68,7 @@ struct nfsd4_callback {
struct nfs4_client *cb_clp;
struct rpc_message cb_msg;
const struct nfsd4_callback_ops *cb_ops;
- struct delayed_work cb_work;
+ struct work_struct cb_work;
int cb_seq_status;
int cb_status;
bool cb_need_restart;
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 6a9464262fae6b..2e41eb4c3cec76 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1852,7 +1852,7 @@ retry:
trap = lock_rename(tdentry, fdentry);
if (IS_ERR(trap)) {
err = (rqstp->rq_vers == 2) ? nfserr_acces : nfserr_xdev;
- goto out;
+ goto out_want_write;
}
err = fh_fill_pre_attrs(ffhp);
if (err != nfs_ok)
@@ -1922,6 +1922,7 @@ retry:
}
out_unlock:
unlock_rename(tdentry, fdentry);
+out_want_write:
fh_drop_write(ffhp);
/*
diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c
index 65659fa0372e6c..a139970e48041d 100644
--- a/fs/nilfs2/btree.c
+++ b/fs/nilfs2/btree.c
@@ -1857,13 +1857,22 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *btree,
}
/**
- * nilfs_btree_convert_and_insert -
- * @bmap:
- * @key:
- * @ptr:
- * @keys:
- * @ptrs:
- * @n:
+ * nilfs_btree_convert_and_insert - Convert and insert entries into a B-tree
+ * @btree: NILFS B-tree structure
+ * @key: Key of the new entry to be inserted
+ * @ptr: Pointer (block number) associated with the key to be inserted
+ * @keys: Array of keys to be inserted in addition to @key
+ * @ptrs: Array of pointers associated with @keys
+ * @n: Number of keys and pointers in @keys and @ptrs
+ *
+ * This function is used to insert a new entry specified by @key and @ptr,
+ * along with additional entries specified by @keys and @ptrs arrays, into a
+ * NILFS B-tree.
+ * It prepares the necessary changes by allocating the required blocks and any
+ * necessary intermediate nodes. It converts configurations from other forms of
+ * block mapping (the one that currently exists is direct mapping) to a B-tree.
+ *
+ * Return: 0 on success or a negative error code on failure.
*/
int nilfs_btree_convert_and_insert(struct nilfs_bmap *btree,
__u64 key, __u64 ptr,
diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c
index bc846b904b68d4..aee40db7a036fb 100644
--- a/fs/nilfs2/dir.c
+++ b/fs/nilfs2/dir.c
@@ -240,7 +240,7 @@ nilfs_filetype_table[NILFS_FT_MAX] = {
#define S_SHIFT 12
static unsigned char
-nilfs_type_by_mode[S_IFMT >> S_SHIFT] = {
+nilfs_type_by_mode[(S_IFMT >> S_SHIFT) + 1] = {
[S_IFREG >> S_SHIFT] = NILFS_FT_REG_FILE,
[S_IFDIR >> S_SHIFT] = NILFS_FT_DIR,
[S_IFCHR >> S_SHIFT] = NILFS_FT_CHRDEV,
diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c
index bf9a11d5881722..1c9ae36a03abe9 100644
--- a/fs/nilfs2/gcinode.c
+++ b/fs/nilfs2/gcinode.c
@@ -175,6 +175,7 @@ int nilfs_init_gcinode(struct inode *inode)
/**
* nilfs_remove_all_gcinodes() - remove all unprocessed gc inodes
+ * @nilfs: NILFS filesystem instance
*/
void nilfs_remove_all_gcinodes(struct the_nilfs *nilfs)
{
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index 2e29b98ba8bab2..728e90be3570b7 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -335,8 +335,8 @@ void __nilfs_error(struct super_block *sb, const char *function,
extern struct nilfs_super_block *
nilfs_read_super_block(struct super_block *, u64, int, struct buffer_head **);
-extern int nilfs_store_magic_and_option(struct super_block *,
- struct nilfs_super_block *, char *);
+extern int nilfs_store_magic(struct super_block *sb,
+ struct nilfs_super_block *sbp);
extern int nilfs_check_feature_compatibility(struct super_block *,
struct nilfs_super_block *);
extern void nilfs_set_log_cursor(struct nilfs_super_block *,
diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c
index 49a70c68bf3c06..e48372618ac400 100644
--- a/fs/nilfs2/recovery.c
+++ b/fs/nilfs2/recovery.c
@@ -563,6 +563,7 @@ static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs,
* checkpoint
* @nilfs: nilfs object
* @sb: super block instance
+ * @root: NILFS root instance
* @ri: pointer to a nilfs_recovery_info
*/
static int nilfs_do_roll_forward(struct the_nilfs *nilfs,
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index ac24ed109ce935..e835e1f5a71201 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -29,13 +29,13 @@
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/blkdev.h>
-#include <linux/parser.h>
#include <linux/crc32.h>
#include <linux/vfs.h>
#include <linux/writeback.h>
#include <linux/seq_file.h>
#include <linux/mount.h>
#include <linux/fs_context.h>
+#include <linux/fs_parser.h>
#include "nilfs.h"
#include "export.h"
#include "mdt.h"
@@ -61,7 +61,6 @@ struct kmem_cache *nilfs_segbuf_cachep;
struct kmem_cache *nilfs_btree_path_cache;
static int nilfs_setup_super(struct super_block *sb, int is_mount);
-static int nilfs_remount(struct super_block *sb, int *flags, char *data);
void __nilfs_msg(struct super_block *sb, const char *fmt, ...)
{
@@ -702,105 +701,98 @@ static const struct super_operations nilfs_sops = {
.freeze_fs = nilfs_freeze,
.unfreeze_fs = nilfs_unfreeze,
.statfs = nilfs_statfs,
- .remount_fs = nilfs_remount,
.show_options = nilfs_show_options
};
enum {
- Opt_err_cont, Opt_err_panic, Opt_err_ro,
- Opt_barrier, Opt_nobarrier, Opt_snapshot, Opt_order, Opt_norecovery,
- Opt_discard, Opt_nodiscard, Opt_err,
+ Opt_err, Opt_barrier, Opt_snapshot, Opt_order, Opt_norecovery,
+ Opt_discard,
};
-static match_table_t tokens = {
- {Opt_err_cont, "errors=continue"},
- {Opt_err_panic, "errors=panic"},
- {Opt_err_ro, "errors=remount-ro"},
- {Opt_barrier, "barrier"},
- {Opt_nobarrier, "nobarrier"},
- {Opt_snapshot, "cp=%u"},
- {Opt_order, "order=%s"},
- {Opt_norecovery, "norecovery"},
- {Opt_discard, "discard"},
- {Opt_nodiscard, "nodiscard"},
- {Opt_err, NULL}
+static const struct constant_table nilfs_param_err[] = {
+ {"continue", NILFS_MOUNT_ERRORS_CONT},
+ {"panic", NILFS_MOUNT_ERRORS_PANIC},
+ {"remount-ro", NILFS_MOUNT_ERRORS_RO},
+ {}
};
-static int parse_options(char *options, struct super_block *sb, int is_remount)
-{
- struct the_nilfs *nilfs = sb->s_fs_info;
- char *p;
- substring_t args[MAX_OPT_ARGS];
-
- if (!options)
- return 1;
-
- while ((p = strsep(&options, ",")) != NULL) {
- int token;
+static const struct fs_parameter_spec nilfs_param_spec[] = {
+ fsparam_enum ("errors", Opt_err, nilfs_param_err),
+ fsparam_flag_no ("barrier", Opt_barrier),
+ fsparam_u64 ("cp", Opt_snapshot),
+ fsparam_string ("order", Opt_order),
+ fsparam_flag ("norecovery", Opt_norecovery),
+ fsparam_flag_no ("discard", Opt_discard),
+ {}
+};
- if (!*p)
- continue;
+struct nilfs_fs_context {
+ unsigned long ns_mount_opt;
+ __u64 cno;
+};
- token = match_token(p, tokens, args);
- switch (token) {
- case Opt_barrier:
- nilfs_set_opt(nilfs, BARRIER);
- break;
- case Opt_nobarrier:
+static int nilfs_parse_param(struct fs_context *fc, struct fs_parameter *param)
+{
+ struct nilfs_fs_context *nilfs = fc->fs_private;
+ int is_remount = fc->purpose == FS_CONTEXT_FOR_RECONFIGURE;
+ struct fs_parse_result result;
+ int opt;
+
+ opt = fs_parse(fc, nilfs_param_spec, param, &result);
+ if (opt < 0)
+ return opt;
+
+ switch (opt) {
+ case Opt_barrier:
+ if (result.negated)
nilfs_clear_opt(nilfs, BARRIER);
- break;
- case Opt_order:
- if (strcmp(args[0].from, "relaxed") == 0)
- /* Ordered data semantics */
- nilfs_clear_opt(nilfs, STRICT_ORDER);
- else if (strcmp(args[0].from, "strict") == 0)
- /* Strict in-order semantics */
- nilfs_set_opt(nilfs, STRICT_ORDER);
- else
- return 0;
- break;
- case Opt_err_panic:
- nilfs_write_opt(nilfs, ERROR_MODE, ERRORS_PANIC);
- break;
- case Opt_err_ro:
- nilfs_write_opt(nilfs, ERROR_MODE, ERRORS_RO);
- break;
- case Opt_err_cont:
- nilfs_write_opt(nilfs, ERROR_MODE, ERRORS_CONT);
- break;
- case Opt_snapshot:
- if (is_remount) {
- nilfs_err(sb,
- "\"%s\" option is invalid for remount",
- p);
- return 0;
- }
- break;
- case Opt_norecovery:
- nilfs_set_opt(nilfs, NORECOVERY);
- break;
- case Opt_discard:
- nilfs_set_opt(nilfs, DISCARD);
- break;
- case Opt_nodiscard:
- nilfs_clear_opt(nilfs, DISCARD);
- break;
- default:
- nilfs_err(sb, "unrecognized mount option \"%s\"", p);
- return 0;
+ else
+ nilfs_set_opt(nilfs, BARRIER);
+ break;
+ case Opt_order:
+ if (strcmp(param->string, "relaxed") == 0)
+ /* Ordered data semantics */
+ nilfs_clear_opt(nilfs, STRICT_ORDER);
+ else if (strcmp(param->string, "strict") == 0)
+ /* Strict in-order semantics */
+ nilfs_set_opt(nilfs, STRICT_ORDER);
+ else
+ return -EINVAL;
+ break;
+ case Opt_err:
+ nilfs->ns_mount_opt &= ~NILFS_MOUNT_ERROR_MODE;
+ nilfs->ns_mount_opt |= result.uint_32;
+ break;
+ case Opt_snapshot:
+ if (is_remount) {
+ struct super_block *sb = fc->root->d_sb;
+
+ nilfs_err(sb,
+ "\"%s\" option is invalid for remount",
+ param->key);
+ return -EINVAL;
+ }
+ if (result.uint_64 == 0) {
+ nilfs_err(NULL,
+ "invalid option \"cp=0\": invalid checkpoint number 0");
+ return -EINVAL;
}
+ nilfs->cno = result.uint_64;
+ break;
+ case Opt_norecovery:
+ nilfs_set_opt(nilfs, NORECOVERY);
+ break;
+ case Opt_discard:
+ if (result.negated)
+ nilfs_clear_opt(nilfs, DISCARD);
+ else
+ nilfs_set_opt(nilfs, DISCARD);
+ break;
+ default:
+ return -EINVAL;
}
- return 1;
-}
-
-static inline void
-nilfs_set_default_options(struct super_block *sb,
- struct nilfs_super_block *sbp)
-{
- struct the_nilfs *nilfs = sb->s_fs_info;
- nilfs->ns_mount_opt =
- NILFS_MOUNT_ERRORS_RO | NILFS_MOUNT_BARRIER;
+ return 0;
}
static int nilfs_setup_super(struct super_block *sb, int is_mount)
@@ -857,9 +849,8 @@ struct nilfs_super_block *nilfs_read_super_block(struct super_block *sb,
return (struct nilfs_super_block *)((char *)(*pbh)->b_data + offset);
}
-int nilfs_store_magic_and_option(struct super_block *sb,
- struct nilfs_super_block *sbp,
- char *data)
+int nilfs_store_magic(struct super_block *sb,
+ struct nilfs_super_block *sbp)
{
struct the_nilfs *nilfs = sb->s_fs_info;
@@ -870,14 +861,12 @@ int nilfs_store_magic_and_option(struct super_block *sb,
sb->s_flags |= SB_NOATIME;
#endif
- nilfs_set_default_options(sb, sbp);
-
nilfs->ns_resuid = le16_to_cpu(sbp->s_def_resuid);
nilfs->ns_resgid = le16_to_cpu(sbp->s_def_resgid);
nilfs->ns_interval = le32_to_cpu(sbp->s_c_interval);
nilfs->ns_watermark = le32_to_cpu(sbp->s_c_block_max);
- return !parse_options(data, sb, 0) ? -EINVAL : 0;
+ return 0;
}
int nilfs_check_feature_compatibility(struct super_block *sb,
@@ -1035,17 +1024,17 @@ int nilfs_checkpoint_is_mounted(struct super_block *sb, __u64 cno)
/**
* nilfs_fill_super() - initialize a super block instance
* @sb: super_block
- * @data: mount options
- * @silent: silent mode flag
+ * @fc: filesystem context
*
* This function is called exclusively by nilfs->ns_mount_mutex.
* So, the recovery process is protected from other simultaneous mounts.
*/
static int
-nilfs_fill_super(struct super_block *sb, void *data, int silent)
+nilfs_fill_super(struct super_block *sb, struct fs_context *fc)
{
struct the_nilfs *nilfs;
struct nilfs_root *fsroot;
+ struct nilfs_fs_context *ctx = fc->fs_private;
__u64 cno;
int err;
@@ -1055,10 +1044,13 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent)
sb->s_fs_info = nilfs;
- err = init_nilfs(nilfs, sb, (char *)data);
+ err = init_nilfs(nilfs, sb);
if (err)
goto failed_nilfs;
+ /* Copy in parsed mount options */
+ nilfs->ns_mount_opt = ctx->ns_mount_opt;
+
sb->s_op = &nilfs_sops;
sb->s_export_op = &nilfs_export_ops;
sb->s_root = NULL;
@@ -1117,34 +1109,25 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent)
return err;
}
-static int nilfs_remount(struct super_block *sb, int *flags, char *data)
+static int nilfs_reconfigure(struct fs_context *fc)
{
+ struct nilfs_fs_context *ctx = fc->fs_private;
+ struct super_block *sb = fc->root->d_sb;
struct the_nilfs *nilfs = sb->s_fs_info;
- unsigned long old_sb_flags;
- unsigned long old_mount_opt;
int err;
sync_filesystem(sb);
- old_sb_flags = sb->s_flags;
- old_mount_opt = nilfs->ns_mount_opt;
-
- if (!parse_options(data, sb, 1)) {
- err = -EINVAL;
- goto restore_opts;
- }
- sb->s_flags = (sb->s_flags & ~SB_POSIXACL);
err = -EINVAL;
if (!nilfs_valid_fs(nilfs)) {
nilfs_warn(sb,
"couldn't remount because the filesystem is in an incomplete recovery state");
- goto restore_opts;
+ goto ignore_opts;
}
-
- if ((bool)(*flags & SB_RDONLY) == sb_rdonly(sb))
+ if ((bool)(fc->sb_flags & SB_RDONLY) == sb_rdonly(sb))
goto out;
- if (*flags & SB_RDONLY) {
+ if (fc->sb_flags & SB_RDONLY) {
sb->s_flags |= SB_RDONLY;
/*
@@ -1172,138 +1155,67 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)
"couldn't remount RDWR because of unsupported optional features (%llx)",
(unsigned long long)features);
err = -EROFS;
- goto restore_opts;
+ goto ignore_opts;
}
sb->s_flags &= ~SB_RDONLY;
root = NILFS_I(d_inode(sb->s_root))->i_root;
err = nilfs_attach_log_writer(sb, root);
- if (err)
- goto restore_opts;
+ if (err) {
+ sb->s_flags |= SB_RDONLY;
+ goto ignore_opts;
+ }
down_write(&nilfs->ns_sem);
nilfs_setup_super(sb, true);
up_write(&nilfs->ns_sem);
}
out:
- return 0;
-
- restore_opts:
- sb->s_flags = old_sb_flags;
- nilfs->ns_mount_opt = old_mount_opt;
- return err;
-}
-
-struct nilfs_super_data {
- __u64 cno;
- int flags;
-};
-
-static int nilfs_parse_snapshot_option(const char *option,
- const substring_t *arg,
- struct nilfs_super_data *sd)
-{
- unsigned long long val;
- const char *msg = NULL;
- int err;
-
- if (!(sd->flags & SB_RDONLY)) {
- msg = "read-only option is not specified";
- goto parse_error;
- }
-
- err = kstrtoull(arg->from, 0, &val);
- if (err) {
- if (err == -ERANGE)
- msg = "too large checkpoint number";
- else
- msg = "malformed argument";
- goto parse_error;
- } else if (val == 0) {
- msg = "invalid checkpoint number 0";
- goto parse_error;
- }
- sd->cno = val;
- return 0;
-
-parse_error:
- nilfs_err(NULL, "invalid option \"%s\": %s", option, msg);
- return 1;
-}
-
-/**
- * nilfs_identify - pre-read mount options needed to identify mount instance
- * @data: mount options
- * @sd: nilfs_super_data
- */
-static int nilfs_identify(char *data, struct nilfs_super_data *sd)
-{
- char *p, *options = data;
- substring_t args[MAX_OPT_ARGS];
- int token;
- int ret = 0;
-
- do {
- p = strsep(&options, ",");
- if (p != NULL && *p) {
- token = match_token(p, tokens, args);
- if (token == Opt_snapshot)
- ret = nilfs_parse_snapshot_option(p, &args[0],
- sd);
- }
- if (!options)
- break;
- BUG_ON(options == data);
- *(options - 1) = ',';
- } while (!ret);
- return ret;
-}
+ sb->s_flags = (sb->s_flags & ~SB_POSIXACL);
+ /* Copy over parsed remount options */
+ nilfs->ns_mount_opt = ctx->ns_mount_opt;
-static int nilfs_set_bdev_super(struct super_block *s, void *data)
-{
- s->s_dev = *(dev_t *)data;
return 0;
-}
-static int nilfs_test_bdev_super(struct super_block *s, void *data)
-{
- return !(s->s_iflags & SB_I_RETIRED) && s->s_dev == *(dev_t *)data;
+ ignore_opts:
+ return err;
}
-static struct dentry *
-nilfs_mount(struct file_system_type *fs_type, int flags,
- const char *dev_name, void *data)
+static int
+nilfs_get_tree(struct fs_context *fc)
{
- struct nilfs_super_data sd = { .flags = flags };
+ struct nilfs_fs_context *ctx = fc->fs_private;
struct super_block *s;
dev_t dev;
int err;
- if (nilfs_identify(data, &sd))
- return ERR_PTR(-EINVAL);
+ if (ctx->cno && !(fc->sb_flags & SB_RDONLY)) {
+ nilfs_err(NULL,
+ "invalid option \"cp=%llu\": read-only option is not specified",
+ ctx->cno);
+ return -EINVAL;
+ }
- err = lookup_bdev(dev_name, &dev);
+ err = lookup_bdev(fc->source, &dev);
if (err)
- return ERR_PTR(err);
+ return err;
- s = sget(fs_type, nilfs_test_bdev_super, nilfs_set_bdev_super, flags,
- &dev);
+ s = sget_dev(fc, dev);
if (IS_ERR(s))
- return ERR_CAST(s);
+ return PTR_ERR(s);
if (!s->s_root) {
- err = setup_bdev_super(s, flags, NULL);
+ err = setup_bdev_super(s, fc->sb_flags, fc);
if (!err)
- err = nilfs_fill_super(s, data,
- flags & SB_SILENT ? 1 : 0);
+ err = nilfs_fill_super(s, fc);
if (err)
goto failed_super;
s->s_flags |= SB_ACTIVE;
- } else if (!sd.cno) {
+ } else if (!ctx->cno) {
if (nilfs_tree_is_busy(s->s_root)) {
- if ((flags ^ s->s_flags) & SB_RDONLY) {
+ if ((fc->sb_flags ^ s->s_flags) & SB_RDONLY) {
nilfs_err(s,
"the device already has a %s mount.",
sb_rdonly(s) ? "read-only" : "read/write");
@@ -1312,37 +1224,75 @@ nilfs_mount(struct file_system_type *fs_type, int flags,
}
} else {
/*
- * Try remount to setup mount states if the current
+ * Try reconfigure to setup mount states if the current
* tree is not mounted and only snapshots use this sb.
+ *
+ * Since nilfs_reconfigure() requires fc->root to be
+ * set, set it first and release it on failure.
*/
- err = nilfs_remount(s, &flags, data);
- if (err)
+ fc->root = dget(s->s_root);
+ err = nilfs_reconfigure(fc);
+ if (err) {
+ dput(fc->root);
+ fc->root = NULL; /* prevent double release */
goto failed_super;
+ }
+ return 0;
}
}
- if (sd.cno) {
+ if (ctx->cno) {
struct dentry *root_dentry;
- err = nilfs_attach_snapshot(s, sd.cno, &root_dentry);
+ err = nilfs_attach_snapshot(s, ctx->cno, &root_dentry);
if (err)
goto failed_super;
- return root_dentry;
+ fc->root = root_dentry;
+ return 0;
}
- return dget(s->s_root);
+ fc->root = dget(s->s_root);
+ return 0;
failed_super:
deactivate_locked_super(s);
- return ERR_PTR(err);
+ return err;
+}
+
+static void nilfs_free_fc(struct fs_context *fc)
+{
+ kfree(fc->fs_private);
+}
+
+static const struct fs_context_operations nilfs_context_ops = {
+ .parse_param = nilfs_parse_param,
+ .get_tree = nilfs_get_tree,
+ .reconfigure = nilfs_reconfigure,
+ .free = nilfs_free_fc,
+};
+
+static int nilfs_init_fs_context(struct fs_context *fc)
+{
+ struct nilfs_fs_context *ctx;
+
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ ctx->ns_mount_opt = NILFS_MOUNT_ERRORS_RO | NILFS_MOUNT_BARRIER;
+ fc->fs_private = ctx;
+ fc->ops = &nilfs_context_ops;
+
+ return 0;
}
struct file_system_type nilfs_fs_type = {
.owner = THIS_MODULE,
.name = "nilfs2",
- .mount = nilfs_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
+ .init_fs_context = nilfs_init_fs_context,
+ .parameters = nilfs_param_spec,
};
MODULE_ALIAS_FS("nilfs2");
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index 2ae2c1bbf6d17c..db322068678f40 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -659,7 +659,6 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs,
* init_nilfs - initialize a NILFS instance.
* @nilfs: the_nilfs structure
* @sb: super block
- * @data: mount options
*
* init_nilfs() performs common initialization per block device (e.g.
* reading the super block, getting disk layout information, initializing
@@ -668,7 +667,7 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs,
* Return Value: On success, 0 is returned. On error, a negative error
* code is returned.
*/
-int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data)
+int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb)
{
struct nilfs_super_block *sbp;
int blocksize;
@@ -686,7 +685,7 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data)
if (err)
goto out;
- err = nilfs_store_magic_and_option(sb, sbp, data);
+ err = nilfs_store_magic(sb, sbp);
if (err)
goto failed_sbh;
diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h
index cd4ae1b8ae165a..85da0629415dfe 100644
--- a/fs/nilfs2/the_nilfs.h
+++ b/fs/nilfs2/the_nilfs.h
@@ -219,10 +219,6 @@ THE_NILFS_FNS(PURGING, purging)
#define nilfs_set_opt(nilfs, opt) \
((nilfs)->ns_mount_opt |= NILFS_MOUNT_##opt)
#define nilfs_test_opt(nilfs, opt) ((nilfs)->ns_mount_opt & NILFS_MOUNT_##opt)
-#define nilfs_write_opt(nilfs, mask, opt) \
- ((nilfs)->ns_mount_opt = \
- (((nilfs)->ns_mount_opt & ~NILFS_MOUNT_##mask) | \
- NILFS_MOUNT_##opt)) \
/**
* struct nilfs_root - nilfs root object
@@ -276,7 +272,7 @@ static inline int nilfs_sb_will_flip(struct the_nilfs *nilfs)
void nilfs_set_last_segment(struct the_nilfs *, sector_t, u64, __u64);
struct the_nilfs *alloc_nilfs(struct super_block *sb);
void destroy_nilfs(struct the_nilfs *nilfs);
-int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data);
+int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb);
int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb);
unsigned long nilfs_nrsvsegs(struct the_nilfs *nilfs, unsigned long nsegs);
void nilfs_set_nsegments(struct the_nilfs *nilfs, unsigned long nsegs);
diff --git a/fs/ntfs3/Kconfig b/fs/ntfs3/Kconfig
index cdfdf51e55d797..7bc31d69f680dd 100644
--- a/fs/ntfs3/Kconfig
+++ b/fs/ntfs3/Kconfig
@@ -46,3 +46,12 @@ config NTFS3_FS_POSIX_ACL
NOTE: this is linux only feature. Windows will ignore these ACLs.
If you don't know what Access Control Lists are, say N.
+
+config NTFS_FS
+ tristate "NTFS file system support"
+ select NTFS3_FS
+ select BUFFER_HEAD
+ select NLS
+ help
+ This config option is here only for backward compatibility. NTFS
+ filesystem is now handled by the NTFS3 driver.
diff --git a/fs/ntfs3/dir.c b/fs/ntfs3/dir.c
index 5cf3d9decf646b..263635199b60d3 100644
--- a/fs/ntfs3/dir.c
+++ b/fs/ntfs3/dir.c
@@ -616,4 +616,11 @@ const struct file_operations ntfs_dir_operations = {
.compat_ioctl = ntfs_compat_ioctl,
#endif
};
+
+const struct file_operations ntfs_legacy_dir_operations = {
+ .llseek = generic_file_llseek,
+ .read = generic_read_dir,
+ .iterate_shared = ntfs_readdir,
+ .open = ntfs_file_open,
+};
// clang-format on
diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c
index 5418662c80d887..b73969e05052ae 100644
--- a/fs/ntfs3/file.c
+++ b/fs/ntfs3/file.c
@@ -1236,4 +1236,12 @@ const struct file_operations ntfs_file_operations = {
.fallocate = ntfs_fallocate,
.release = ntfs_file_release,
};
+
+const struct file_operations ntfs_legacy_file_operations = {
+ .llseek = generic_file_llseek,
+ .read_iter = ntfs_file_read_iter,
+ .splice_read = ntfs_file_splice_read,
+ .open = ntfs_file_open,
+ .release = ntfs_file_release,
+};
// clang-format on
diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c
index eb7a8c9fba0183..d273eda1cf45d6 100644
--- a/fs/ntfs3/inode.c
+++ b/fs/ntfs3/inode.c
@@ -440,7 +440,10 @@ end_enum:
* Usually a hard links to directories are disabled.
*/
inode->i_op = &ntfs_dir_inode_operations;
- inode->i_fop = &ntfs_dir_operations;
+ if (is_legacy_ntfs(inode->i_sb))
+ inode->i_fop = &ntfs_legacy_dir_operations;
+ else
+ inode->i_fop = &ntfs_dir_operations;
ni->i_valid = 0;
} else if (S_ISLNK(mode)) {
ni->std_fa &= ~FILE_ATTRIBUTE_DIRECTORY;
@@ -450,7 +453,10 @@ end_enum:
} else if (S_ISREG(mode)) {
ni->std_fa &= ~FILE_ATTRIBUTE_DIRECTORY;
inode->i_op = &ntfs_file_inode_operations;
- inode->i_fop = &ntfs_file_operations;
+ if (is_legacy_ntfs(inode->i_sb))
+ inode->i_fop = &ntfs_legacy_file_operations;
+ else
+ inode->i_fop = &ntfs_file_operations;
inode->i_mapping->a_ops = is_compressed(ni) ? &ntfs_aops_cmpr :
&ntfs_aops;
if (ino != MFT_REC_MFT)
@@ -1614,7 +1620,10 @@ struct inode *ntfs_create_inode(struct mnt_idmap *idmap, struct inode *dir,
if (S_ISDIR(mode)) {
inode->i_op = &ntfs_dir_inode_operations;
- inode->i_fop = &ntfs_dir_operations;
+ if (is_legacy_ntfs(inode->i_sb))
+ inode->i_fop = &ntfs_legacy_dir_operations;
+ else
+ inode->i_fop = &ntfs_dir_operations;
} else if (S_ISLNK(mode)) {
inode->i_op = &ntfs_link_inode_operations;
inode->i_fop = NULL;
@@ -1623,7 +1632,10 @@ struct inode *ntfs_create_inode(struct mnt_idmap *idmap, struct inode *dir,
inode_nohighmem(inode);
} else if (S_ISREG(mode)) {
inode->i_op = &ntfs_file_inode_operations;
- inode->i_fop = &ntfs_file_operations;
+ if (is_legacy_ntfs(inode->i_sb))
+ inode->i_fop = &ntfs_legacy_file_operations;
+ else
+ inode->i_fop = &ntfs_file_operations;
inode->i_mapping->a_ops = is_compressed(ni) ? &ntfs_aops_cmpr :
&ntfs_aops;
init_rwsem(&ni->file.run_lock);
diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h
index 79356fd29a1414..5f4d288c6adfb9 100644
--- a/fs/ntfs3/ntfs_fs.h
+++ b/fs/ntfs3/ntfs_fs.h
@@ -493,6 +493,7 @@ struct inode *dir_search_u(struct inode *dir, const struct cpu_str *uni,
struct ntfs_fnd *fnd);
bool dir_is_empty(struct inode *dir);
extern const struct file_operations ntfs_dir_operations;
+extern const struct file_operations ntfs_legacy_dir_operations;
/* Globals from file.c */
int ntfs_getattr(struct mnt_idmap *idmap, const struct path *path,
@@ -507,6 +508,7 @@ long ntfs_compat_ioctl(struct file *filp, u32 cmd, unsigned long arg);
extern const struct inode_operations ntfs_special_inode_operations;
extern const struct inode_operations ntfs_file_inode_operations;
extern const struct file_operations ntfs_file_operations;
+extern const struct file_operations ntfs_legacy_file_operations;
/* Globals from frecord.c */
void ni_remove_mi(struct ntfs_inode *ni, struct mft_inode *mi);
@@ -1154,4 +1156,6 @@ static inline void le64_sub_cpu(__le64 *var, u64 val)
*var = cpu_to_le64(le64_to_cpu(*var) - val);
}
+bool is_legacy_ntfs(struct super_block *sb);
+
#endif /* _LINUX_NTFS3_NTFS_FS_H */
diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c
index 9df7c20d066f61..b26d95a8d3274d 100644
--- a/fs/ntfs3/super.c
+++ b/fs/ntfs3/super.c
@@ -408,6 +408,12 @@ static int ntfs_fs_reconfigure(struct fs_context *fc)
struct ntfs_mount_options *new_opts = fc->fs_private;
int ro_rw;
+ /* If ntfs3 is used as legacy ntfs enforce read-only mode. */
+ if (is_legacy_ntfs(sb)) {
+ fc->sb_flags |= SB_RDONLY;
+ goto out;
+ }
+
ro_rw = sb_rdonly(sb) && !(fc->sb_flags & SB_RDONLY);
if (ro_rw && (sbi->flags & NTFS_FLAGS_NEED_REPLAY)) {
errorf(fc,
@@ -427,8 +433,6 @@ static int ntfs_fs_reconfigure(struct fs_context *fc)
fc,
"ntfs3: Cannot use different iocharset when remounting!");
- sync_filesystem(sb);
-
if (ro_rw && (sbi->volume.flags & VOLUME_FLAG_DIRTY) &&
!new_opts->force) {
errorf(fc,
@@ -436,6 +440,8 @@ static int ntfs_fs_reconfigure(struct fs_context *fc)
return -EINVAL;
}
+out:
+ sync_filesystem(sb);
swap(sbi->options, fc->fs_private);
return 0;
@@ -1613,6 +1619,8 @@ load_root:
}
#endif
+ if (is_legacy_ntfs(sb))
+ sb->s_flags |= SB_RDONLY;
return 0;
put_inode_out:
@@ -1730,7 +1738,7 @@ static const struct fs_context_operations ntfs_context_ops = {
* This will called when mount/remount. We will first initialize
* options so that if remount we can use just that.
*/
-static int ntfs_init_fs_context(struct fs_context *fc)
+static int __ntfs_init_fs_context(struct fs_context *fc)
{
struct ntfs_mount_options *opts;
struct ntfs_sb_info *sbi;
@@ -1778,6 +1786,11 @@ free_opts:
return -ENOMEM;
}
+static int ntfs_init_fs_context(struct fs_context *fc)
+{
+ return __ntfs_init_fs_context(fc);
+}
+
static void ntfs3_kill_sb(struct super_block *sb)
{
struct ntfs_sb_info *sbi = sb->s_fs_info;
@@ -1798,6 +1811,50 @@ static struct file_system_type ntfs_fs_type = {
.kill_sb = ntfs3_kill_sb,
.fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP,
};
+
+#if IS_ENABLED(CONFIG_NTFS_FS)
+static int ntfs_legacy_init_fs_context(struct fs_context *fc)
+{
+ int ret;
+
+ ret = __ntfs_init_fs_context(fc);
+ /* If ntfs3 is used as legacy ntfs enforce read-only mode. */
+ fc->sb_flags |= SB_RDONLY;
+ return ret;
+}
+
+static struct file_system_type ntfs_legacy_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "ntfs",
+ .init_fs_context = ntfs_legacy_init_fs_context,
+ .parameters = ntfs_fs_parameters,
+ .kill_sb = ntfs3_kill_sb,
+ .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP,
+};
+MODULE_ALIAS_FS("ntfs");
+
+static inline void register_as_ntfs_legacy(void)
+{
+ int err = register_filesystem(&ntfs_legacy_fs_type);
+ if (err)
+ pr_warn("ntfs3: Failed to register legacy ntfs filesystem driver: %d\n", err);
+}
+
+static inline void unregister_as_ntfs_legacy(void)
+{
+ unregister_filesystem(&ntfs_legacy_fs_type);
+}
+bool is_legacy_ntfs(struct super_block *sb)
+{
+ return sb->s_type == &ntfs_legacy_fs_type;
+}
+#else
+static inline void register_as_ntfs_legacy(void) {}
+static inline void unregister_as_ntfs_legacy(void) {}
+bool is_legacy_ntfs(struct super_block *sb) { return false; }
+#endif
+
+
// clang-format on
static int __init init_ntfs_fs(void)
@@ -1832,6 +1889,7 @@ static int __init init_ntfs_fs(void)
goto out1;
}
+ register_as_ntfs_legacy();
err = register_filesystem(&ntfs_fs_type);
if (err)
goto out;
@@ -1849,6 +1907,7 @@ static void __exit exit_ntfs_fs(void)
rcu_barrier();
kmem_cache_destroy(ntfs_inode_cachep);
unregister_filesystem(&ntfs_fs_type);
+ unregister_as_ntfs_legacy();
ntfs3_exit_bitmap();
#ifdef CONFIG_PROC_FS
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index b82185075de7df..f0467d3b3c880e 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -2283,8 +2283,6 @@ unlock:
ocfs2_inode_unlock(inode, 1);
brelse(di_bh);
out:
- if (ret < 0)
- ret = -EIO;
return ret;
}
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 5c04dde99981c7..2018501b224937 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -1274,7 +1274,7 @@ static int dlm_query_nodeinfo_handler(struct o2net_msg *msg, u32 len,
{
struct dlm_query_nodeinfo *qn;
struct dlm_ctxt *dlm = NULL;
- int locked = 0, status = -EINVAL;
+ int status = -EINVAL;
qn = (struct dlm_query_nodeinfo *) msg->buf;
@@ -1290,12 +1290,11 @@ static int dlm_query_nodeinfo_handler(struct o2net_msg *msg, u32 len,
}
spin_lock(&dlm->spinlock);
- locked = 1;
if (dlm->joining_node != qn->qn_nodenum) {
mlog(ML_ERROR, "Node %d queried nodes on domain %s but "
"joining node is %d\n", qn->qn_nodenum, qn->qn_domain,
dlm->joining_node);
- goto bail;
+ goto unlock;
}
/* Support for node query was added in 1.1 */
@@ -1305,14 +1304,14 @@ static int dlm_query_nodeinfo_handler(struct o2net_msg *msg, u32 len,
"but active dlm protocol is %d.%d\n", qn->qn_nodenum,
qn->qn_domain, dlm->dlm_locking_proto.pv_major,
dlm->dlm_locking_proto.pv_minor);
- goto bail;
+ goto unlock;
}
status = dlm_match_nodes(dlm, qn);
+unlock:
+ spin_unlock(&dlm->spinlock);
bail:
- if (locked)
- spin_unlock(&dlm->spinlock);
spin_unlock(&dlm_domain_lock);
return status;
@@ -1528,7 +1527,6 @@ static void dlm_send_join_asserts(struct dlm_ctxt *dlm,
{
int status, node, live;
- status = 0;
node = -1;
while ((node = find_next_bit(node_map, O2NM_MAX_NODES,
node + 1)) < O2NM_MAX_NODES) {
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
index b8b6a191b5cb50..96b684763b3906 100644
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -255,9 +255,9 @@ static struct dentry *ocfs2_fh_to_dentry(struct super_block *sb,
if (fh_len < 3 || fh_type > 2)
return NULL;
- handle.ih_blkno = (u64)le32_to_cpu(fid->raw[0]) << 32;
- handle.ih_blkno |= (u64)le32_to_cpu(fid->raw[1]);
- handle.ih_generation = le32_to_cpu(fid->raw[2]);
+ handle.ih_blkno = (u64)le32_to_cpu((__force __le32)fid->raw[0]) << 32;
+ handle.ih_blkno |= (u64)le32_to_cpu((__force __le32)fid->raw[1]);
+ handle.ih_generation = le32_to_cpu((__force __le32)fid->raw[2]);
return ocfs2_get_dentry(sb, &handle);
}
@@ -269,9 +269,9 @@ static struct dentry *ocfs2_fh_to_parent(struct super_block *sb,
if (fh_type != 2 || fh_len < 6)
return NULL;
- parent.ih_blkno = (u64)le32_to_cpu(fid->raw[3]) << 32;
- parent.ih_blkno |= (u64)le32_to_cpu(fid->raw[4]);
- parent.ih_generation = le32_to_cpu(fid->raw[5]);
+ parent.ih_blkno = (u64)le32_to_cpu((__force __le32)fid->raw[3]) << 32;
+ parent.ih_blkno |= (u64)le32_to_cpu((__force __le32)fid->raw[4]);
+ parent.ih_generation = le32_to_cpu((__force __le32)fid->raw[5]);
return ocfs2_get_dentry(sb, &parent);
}
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 0da8e7bd32616f..ccc57038a9779f 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1936,6 +1936,8 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode,
inode_lock(inode);
+ /* Wait all existing dio workers, newcomers will block on i_rwsem */
+ inode_dio_wait(inode);
/*
* This prevents concurrent writes on other nodes
*/
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 999111bfc27178..2cc5c99fe94167 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -1621,6 +1621,7 @@ static struct super_block *ocfs2_inode_cache_get_super(struct ocfs2_caching_info
}
static void ocfs2_inode_cache_lock(struct ocfs2_caching_info *ci)
+__acquires(&oi->ip_lock)
{
struct ocfs2_inode_info *oi = cache_info_to_inode(ci);
@@ -1628,6 +1629,7 @@ static void ocfs2_inode_cache_lock(struct ocfs2_caching_info *ci)
}
static void ocfs2_inode_cache_unlock(struct ocfs2_caching_info *ci)
+__releases(&oi->ip_lock)
{
struct ocfs2_inode_info *oi = cache_info_to_inode(ci);
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index b1550ba73f9634..71beef7f8a60b6 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -125,6 +125,7 @@ int ocfs2_fileattr_set(struct mnt_idmap *idmap,
ocfs2_inode->ip_attr = flags;
ocfs2_set_inode_flags(inode);
+ inode_set_ctime_current(inode);
status = ocfs2_mark_inode_dirty(handle, inode, bh);
if (status < 0)
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index c803c10dd97ef1..5df34561c551c6 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -212,14 +212,15 @@ static inline int ocfs2_la_state_enabled(struct ocfs2_super *osb)
void ocfs2_local_alloc_seen_free_bits(struct ocfs2_super *osb,
unsigned int num_clusters)
{
- spin_lock(&osb->osb_lock);
- if (osb->local_alloc_state == OCFS2_LA_DISABLED ||
- osb->local_alloc_state == OCFS2_LA_THROTTLED)
- if (num_clusters >= osb->local_alloc_default_bits) {
+ if (num_clusters >= osb->local_alloc_default_bits) {
+ spin_lock(&osb->osb_lock);
+ if (osb->local_alloc_state == OCFS2_LA_DISABLED ||
+ osb->local_alloc_state == OCFS2_LA_THROTTLED) {
cancel_delayed_work(&osb->la_enable_wq);
osb->local_alloc_state = OCFS2_LA_ENABLED;
}
- spin_unlock(&osb->osb_lock);
+ spin_unlock(&osb->osb_lock);
+ }
}
void ocfs2_la_enable_worker(struct work_struct *work)
@@ -335,7 +336,7 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb)
"found = %u, set = %u, taken = %u, off = %u\n",
num_used, le32_to_cpu(alloc->id1.bitmap1.i_used),
le32_to_cpu(alloc->id1.bitmap1.i_total),
- OCFS2_LOCAL_ALLOC(alloc)->la_bm_off);
+ le32_to_cpu(OCFS2_LOCAL_ALLOC(alloc)->la_bm_off));
status = -EINVAL;
goto bail;
@@ -863,14 +864,8 @@ static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
numfound = bitoff = startoff = 0;
left = le32_to_cpu(alloc->id1.bitmap1.i_total);
- while ((bitoff = ocfs2_find_next_zero_bit(bitmap, left, startoff)) != -1) {
- if (bitoff == left) {
- /* mlog(0, "bitoff (%d) == left", bitoff); */
- break;
- }
- /* mlog(0, "Found a zero: bitoff = %d, startoff = %d, "
- "numfound = %d\n", bitoff, startoff, numfound);*/
-
+ while ((bitoff = ocfs2_find_next_zero_bit(bitmap, left, startoff)) <
+ left) {
/* Ok, we found a zero bit... is it contig. or do we
* start over?*/
if (bitoff == startoff) {
@@ -976,9 +971,9 @@ static int ocfs2_sync_local_to_main(struct ocfs2_super *osb,
start = count = 0;
left = le32_to_cpu(alloc->id1.bitmap1.i_total);
- while ((bit_off = ocfs2_find_next_zero_bit(bitmap, left, start))
- != -1) {
- if ((bit_off < left) && (bit_off == start)) {
+ while ((bit_off = ocfs2_find_next_zero_bit(bitmap, left, start)) <
+ left) {
+ if (bit_off == start) {
count++;
start++;
continue;
@@ -1002,8 +997,7 @@ static int ocfs2_sync_local_to_main(struct ocfs2_super *osb,
goto bail;
}
}
- if (bit_off >= left)
- break;
+
count = 1;
start = bit_off + 1;
}
@@ -1220,7 +1214,7 @@ retry_enospc:
OCFS2_LOCAL_ALLOC(alloc)->la_bitmap);
trace_ocfs2_local_alloc_new_window_result(
- OCFS2_LOCAL_ALLOC(alloc)->la_bm_off,
+ le32_to_cpu(OCFS2_LOCAL_ALLOC(alloc)->la_bm_off),
le32_to_cpu(alloc->id1.bitmap1.i_total));
bail:
diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c
index 1f9ed117e78b61..f9d6a4f9ca9219 100644
--- a/fs/ocfs2/move_extents.c
+++ b/fs/ocfs2/move_extents.c
@@ -685,7 +685,7 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
}
ret = ocfs2_block_group_set_bits(handle, gb_inode, gd, gd_bh,
- goal_bit, len);
+ goal_bit, len, 0, 0);
if (ret) {
ocfs2_rollback_alloc_dinode_counts(gb_inode, gb_bh, len,
le16_to_cpu(gd->bg_chain));
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 9221a33f917b81..4d1ea8703fcdf3 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -566,7 +566,7 @@ static int __ocfs2_mknod_locked(struct inode *dir,
fe->i_last_eb_blk = 0;
strcpy(fe->i_signature, OCFS2_INODE_SIGNATURE);
fe->i_flags |= cpu_to_le32(OCFS2_VALID_FL);
- ktime_get_real_ts64(&ts);
+ ktime_get_coarse_real_ts64(&ts);
fe->i_atime = fe->i_ctime = fe->i_mtime =
cpu_to_le64(ts.tv_sec);
fe->i_mtime_nsec = fe->i_ctime_nsec = fe->i_atime_nsec =
@@ -797,6 +797,7 @@ static int ocfs2_link(struct dentry *old_dentry,
ocfs2_set_links_count(fe, inode->i_nlink);
fe->i_ctime = cpu_to_le64(inode_get_ctime_sec(inode));
fe->i_ctime_nsec = cpu_to_le32(inode_get_ctime_nsec(inode));
+ ocfs2_update_inode_fsync_trans(handle, inode, 0);
ocfs2_journal_dirty(handle, fe_bh);
err = ocfs2_add_entry(handle, dentry, inode,
@@ -993,6 +994,7 @@ static int ocfs2_unlink(struct inode *dir,
drop_nlink(inode);
drop_nlink(inode);
ocfs2_set_links_count(fe, inode->i_nlink);
+ ocfs2_update_inode_fsync_trans(handle, inode, 0);
ocfs2_journal_dirty(handle, fe_bh);
inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir));
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 7aebdbf5cc0a56..c93689b568fe08 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -883,7 +883,8 @@ struct ocfs2_group_desc
__le16 bg_free_bits_count; /* Free bits count */
__le16 bg_chain; /* What chain I am in. */
/*10*/ __le32 bg_generation;
- __le32 bg_reserved1;
+ __le16 bg_contig_free_bits; /* max contig free bits length */
+ __le16 bg_reserved1;
__le64 bg_next_group; /* Next group in my list, in
blocks */
/*20*/ __le64 bg_parent_dinode; /* dinode which owns me, in
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 3f80a56d0d603c..1f303b1adf1ab9 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -630,7 +630,7 @@ static int ocfs2_create_refcount_tree(struct inode *inode,
rb->rf_records.rl_count =
cpu_to_le16(ocfs2_refcount_recs_per_rb(osb->sb));
spin_lock(&osb->osb_lock);
- rb->rf_generation = osb->s_next_generation++;
+ rb->rf_generation = cpu_to_le32(osb->s_next_generation++);
spin_unlock(&osb->osb_lock);
ocfs2_journal_dirty(handle, new_bh);
diff --git a/fs/ocfs2/reservations.c b/fs/ocfs2/reservations.c
index a9d1296d736dc4..1fe61974d9f023 100644
--- a/fs/ocfs2/reservations.c
+++ b/fs/ocfs2/reservations.c
@@ -414,7 +414,7 @@ static int ocfs2_resmap_find_free_bits(struct ocfs2_reservation_map *resmap,
start = search_start;
while ((offset = ocfs2_find_next_zero_bit(bitmap, resmap->m_bitmap_len,
- start)) != -1) {
+ start)) < resmap->m_bitmap_len) {
/* Search reached end of the region */
if (offset >= (search_start + search_len))
break;
diff --git a/fs/ocfs2/resize.c b/fs/ocfs2/resize.c
index d65d43c61857a4..c4a4016d3866a3 100644
--- a/fs/ocfs2/resize.c
+++ b/fs/ocfs2/resize.c
@@ -91,6 +91,8 @@ static int ocfs2_update_last_group_and_inode(handle_t *handle,
u16 cl_bpc = le16_to_cpu(cl->cl_bpc);
u16 cl_cpg = le16_to_cpu(cl->cl_cpg);
u16 old_bg_clusters;
+ u16 contig_bits;
+ __le16 old_bg_contig_free_bits;
trace_ocfs2_update_last_group_and_inode(new_clusters,
first_new_cluster);
@@ -122,6 +124,11 @@ static int ocfs2_update_last_group_and_inode(handle_t *handle,
le16_add_cpu(&group->bg_free_bits_count, -1 * backups);
}
+ contig_bits = ocfs2_find_max_contig_free_bits(group->bg_bitmap,
+ le16_to_cpu(group->bg_bits), 0);
+ old_bg_contig_free_bits = group->bg_contig_free_bits;
+ group->bg_contig_free_bits = cpu_to_le16(contig_bits);
+
ocfs2_journal_dirty(handle, group_bh);
/* update the inode accordingly. */
@@ -160,6 +167,7 @@ out_rollback:
le16_add_cpu(&group->bg_free_bits_count, backups);
le16_add_cpu(&group->bg_bits, -1 * num_bits);
le16_add_cpu(&group->bg_free_bits_count, -1 * num_bits);
+ group->bg_contig_free_bits = old_bg_contig_free_bits;
}
out:
if (ret)
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 166c8918c825a6..f7b483f0de2add 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -50,6 +50,10 @@ struct ocfs2_suballoc_result {
u64 sr_blkno; /* The first allocated block */
unsigned int sr_bit_offset; /* The bit in the bg */
unsigned int sr_bits; /* How many bits we claimed */
+ unsigned int sr_max_contig_bits; /* The length for contiguous
+ * free bits, only available
+ * for cluster group
+ */
};
static u64 ocfs2_group_from_res(struct ocfs2_suballoc_result *res)
@@ -1272,6 +1276,26 @@ static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
return ret;
}
+u16 ocfs2_find_max_contig_free_bits(void *bitmap,
+ u16 total_bits, u16 start)
+{
+ u16 offset, free_bits;
+ u16 contig_bits = 0;
+
+ while (start < total_bits) {
+ offset = ocfs2_find_next_zero_bit(bitmap, total_bits, start);
+ if (offset == total_bits)
+ break;
+
+ start = ocfs2_find_next_bit(bitmap, total_bits, offset);
+ free_bits = start - offset;
+ if (contig_bits < free_bits)
+ contig_bits = free_bits;
+ }
+
+ return contig_bits;
+}
+
static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
struct buffer_head *bg_bh,
unsigned int bits_wanted,
@@ -1280,6 +1304,7 @@ static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
{
void *bitmap;
u16 best_offset, best_size;
+ u16 prev_best_size = 0;
int offset, start, found, status = 0;
struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
@@ -1290,10 +1315,8 @@ static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
found = start = best_offset = best_size = 0;
bitmap = bg->bg_bitmap;
- while((offset = ocfs2_find_next_zero_bit(bitmap, total_bits, start)) != -1) {
- if (offset == total_bits)
- break;
-
+ while ((offset = ocfs2_find_next_zero_bit(bitmap, total_bits, start)) <
+ total_bits) {
if (!ocfs2_test_bg_bit_allocatable(bg_bh, offset)) {
/* We found a zero, but we can't use it as it
* hasn't been put to disk yet! */
@@ -1308,6 +1331,7 @@ static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
/* got a zero after some ones */
found = 1;
start = offset + 1;
+ prev_best_size = best_size;
}
if (found > best_size) {
best_size = found;
@@ -1320,6 +1344,8 @@ static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
}
}
+ /* best_size will be allocated, we save prev_best_size */
+ res->sr_max_contig_bits = prev_best_size;
if (best_size) {
res->sr_bit_offset = best_offset;
res->sr_bits = best_size;
@@ -1337,11 +1363,16 @@ int ocfs2_block_group_set_bits(handle_t *handle,
struct ocfs2_group_desc *bg,
struct buffer_head *group_bh,
unsigned int bit_off,
- unsigned int num_bits)
+ unsigned int num_bits,
+ unsigned int max_contig_bits,
+ int fastpath)
{
int status;
void *bitmap = bg->bg_bitmap;
int journal_type = OCFS2_JOURNAL_ACCESS_WRITE;
+ unsigned int start = bit_off + num_bits;
+ u16 contig_bits;
+ struct ocfs2_super *osb = OCFS2_SB(alloc_inode->i_sb);
/* All callers get the descriptor via
* ocfs2_read_group_descriptor(). Any corruption is a code bug. */
@@ -1373,6 +1404,29 @@ int ocfs2_block_group_set_bits(handle_t *handle,
while(num_bits--)
ocfs2_set_bit(bit_off++, bitmap);
+ /*
+ * this is optimize path, caller set old contig value
+ * in max_contig_bits to bypass finding action.
+ */
+ if (fastpath) {
+ bg->bg_contig_free_bits = cpu_to_le16(max_contig_bits);
+ } else if (ocfs2_is_cluster_bitmap(alloc_inode)) {
+ /*
+ * Usually, the block group bitmap allocates only 1 bit
+ * at a time, while the cluster group allocates n bits
+ * each time. Therefore, we only save the contig bits for
+ * the cluster group.
+ */
+ contig_bits = ocfs2_find_max_contig_free_bits(bitmap,
+ le16_to_cpu(bg->bg_bits), start);
+ if (contig_bits > max_contig_bits)
+ max_contig_bits = contig_bits;
+ bg->bg_contig_free_bits = cpu_to_le16(max_contig_bits);
+ ocfs2_local_alloc_seen_free_bits(osb, max_contig_bits);
+ } else {
+ bg->bg_contig_free_bits = 0;
+ }
+
ocfs2_journal_dirty(handle, group_bh);
bail:
@@ -1486,7 +1540,12 @@ static int ocfs2_cluster_group_search(struct inode *inode,
BUG_ON(!ocfs2_is_cluster_bitmap(inode));
- if (gd->bg_free_bits_count) {
+ if (le16_to_cpu(gd->bg_contig_free_bits) &&
+ le16_to_cpu(gd->bg_contig_free_bits) < bits_wanted)
+ return -ENOSPC;
+
+ /* ->bg_contig_free_bits may un-initialized, so compare again */
+ if (le16_to_cpu(gd->bg_free_bits_count) >= bits_wanted) {
max_bits = le16_to_cpu(gd->bg_bits);
/* Tail groups in cluster bitmaps which aren't cpg
@@ -1530,13 +1589,6 @@ static int ocfs2_cluster_group_search(struct inode *inode,
* of bits. */
if (min_bits <= res->sr_bits)
search = 0; /* success */
- else if (res->sr_bits) {
- /*
- * Don't show bits which we'll be returning
- * for allocation to the local alloc bitmap.
- */
- ocfs2_local_alloc_seen_free_bits(osb, res->sr_bits);
- }
}
return search;
@@ -1555,7 +1607,7 @@ static int ocfs2_block_group_search(struct inode *inode,
BUG_ON(min_bits != 1);
BUG_ON(ocfs2_is_cluster_bitmap(inode));
- if (bg->bg_free_bits_count) {
+ if (le16_to_cpu(bg->bg_free_bits_count) >= bits_wanted) {
ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb),
group_bh, bits_wanted,
le16_to_cpu(bg->bg_bits),
@@ -1715,7 +1767,8 @@ static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac,
}
ret = ocfs2_block_group_set_bits(handle, alloc_inode, gd, group_bh,
- res->sr_bit_offset, res->sr_bits);
+ res->sr_bit_offset, res->sr_bits,
+ res->sr_max_contig_bits, 0);
if (ret < 0) {
ocfs2_rollback_alloc_dinode_counts(alloc_inode, ac->ac_bh,
res->sr_bits,
@@ -1849,7 +1902,9 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
bg,
group_bh,
res->sr_bit_offset,
- res->sr_bits);
+ res->sr_bits,
+ res->sr_max_contig_bits,
+ 0);
if (status < 0) {
ocfs2_rollback_alloc_dinode_counts(alloc_inode,
ac->ac_bh, res->sr_bits, chain);
@@ -1951,7 +2006,7 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac,
for (i = 0; i < le16_to_cpu(cl->cl_next_free_rec); i ++) {
if (i == victim)
continue;
- if (!cl->cl_recs[i].c_free)
+ if (le32_to_cpu(cl->cl_recs[i].c_free) < bits_wanted)
continue;
ac->ac_chain = i;
@@ -2163,7 +2218,9 @@ int ocfs2_claim_new_inode_at_loc(handle_t *handle,
bg,
bg_bh,
res->sr_bit_offset,
- res->sr_bits);
+ res->sr_bits,
+ res->sr_max_contig_bits,
+ 0);
if (ret < 0) {
ocfs2_rollback_alloc_dinode_counts(ac->ac_inode,
ac->ac_bh, res->sr_bits, chain);
@@ -2382,11 +2439,13 @@ static int ocfs2_block_group_clear_bits(handle_t *handle,
struct buffer_head *group_bh,
unsigned int bit_off,
unsigned int num_bits,
+ unsigned int max_contig_bits,
void (*undo_fn)(unsigned int bit,
unsigned long *bmap))
{
int status;
unsigned int tmp;
+ u16 contig_bits;
struct ocfs2_group_desc *undo_bg = NULL;
struct journal_head *jh;
@@ -2433,6 +2492,20 @@ static int ocfs2_block_group_clear_bits(handle_t *handle,
num_bits);
}
+ /*
+ * TODO: even 'num_bits == 1' (the worst case, release 1 cluster),
+ * we still need to rescan whole bitmap.
+ */
+ if (ocfs2_is_cluster_bitmap(alloc_inode)) {
+ contig_bits = ocfs2_find_max_contig_free_bits(bg->bg_bitmap,
+ le16_to_cpu(bg->bg_bits), 0);
+ if (contig_bits > max_contig_bits)
+ max_contig_bits = contig_bits;
+ bg->bg_contig_free_bits = cpu_to_le16(max_contig_bits);
+ } else {
+ bg->bg_contig_free_bits = 0;
+ }
+
if (undo_fn)
spin_unlock(&jh->b_state_lock);
@@ -2459,6 +2532,7 @@ static int _ocfs2_free_suballoc_bits(handle_t *handle,
struct ocfs2_chain_list *cl = &fe->id2.i_chain;
struct buffer_head *group_bh = NULL;
struct ocfs2_group_desc *group;
+ __le16 old_bg_contig_free_bits = 0;
/* The alloc_bh comes from ocfs2_free_dinode() or
* ocfs2_free_clusters(). The callers have all locked the
@@ -2483,9 +2557,11 @@ static int _ocfs2_free_suballoc_bits(handle_t *handle,
BUG_ON((count + start_bit) > le16_to_cpu(group->bg_bits));
+ if (ocfs2_is_cluster_bitmap(alloc_inode))
+ old_bg_contig_free_bits = group->bg_contig_free_bits;
status = ocfs2_block_group_clear_bits(handle, alloc_inode,
group, group_bh,
- start_bit, count, undo_fn);
+ start_bit, count, 0, undo_fn);
if (status < 0) {
mlog_errno(status);
goto bail;
@@ -2496,7 +2572,8 @@ static int _ocfs2_free_suballoc_bits(handle_t *handle,
if (status < 0) {
mlog_errno(status);
ocfs2_block_group_set_bits(handle, alloc_inode, group, group_bh,
- start_bit, count);
+ start_bit, count,
+ le16_to_cpu(old_bg_contig_free_bits), 1);
goto bail;
}
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h
index 9c74eace3adc15..b481b834857d33 100644
--- a/fs/ocfs2/suballoc.h
+++ b/fs/ocfs2/suballoc.h
@@ -79,12 +79,16 @@ void ocfs2_rollback_alloc_dinode_counts(struct inode *inode,
struct buffer_head *di_bh,
u32 num_bits,
u16 chain);
+u16 ocfs2_find_max_contig_free_bits(void *bitmap,
+ u16 total_bits, u16 start);
int ocfs2_block_group_set_bits(handle_t *handle,
struct inode *alloc_inode,
struct ocfs2_group_desc *bg,
struct buffer_head *group_bh,
unsigned int bit_off,
- unsigned int num_bits);
+ unsigned int num_bits,
+ unsigned int max_contig_bits,
+ int fastpath);
int ocfs2_claim_metadata(handle_t *handle,
struct ocfs2_alloc_context *ac,
diff --git a/fs/proc/Makefile b/fs/proc/Makefile
index bd08616ed8bad7..7b4db9c56e6a77 100644
--- a/fs/proc/Makefile
+++ b/fs/proc/Makefile
@@ -5,7 +5,7 @@
obj-y += proc.o
-CFLAGS_task_mmu.o += $(call cc-option,-Wno-override-init,)
+CFLAGS_task_mmu.o += -Wno-override-init
proc-y := nommu.o task_nommu.o
proc-$(CONFIG_MMU) := task_mmu.o
diff --git a/fs/proc/bootconfig.c b/fs/proc/bootconfig.c
index 902b326e1e5607..87dcaae32ff87b 100644
--- a/fs/proc/bootconfig.c
+++ b/fs/proc/bootconfig.c
@@ -62,12 +62,12 @@ static int __init copy_xbc_key_value_list(char *dst, size_t size)
break;
dst += ret;
}
- if (ret >= 0 && boot_command_line[0]) {
- ret = snprintf(dst, rest(dst, end), "# Parameters from bootloader:\n# %s\n",
- boot_command_line);
- if (ret > 0)
- dst += ret;
- }
+ }
+ if (cmdline_has_extra_options() && ret >= 0 && boot_command_line[0]) {
+ ret = snprintf(dst, rest(dst, end), "# Parameters from bootloader:\n# %s\n",
+ boot_command_line);
+ if (ret > 0)
+ dst += ret;
}
out:
kfree(key);
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index dcd513dccf55cb..d19434e2a58e71 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -451,15 +451,13 @@ pde_get_unmapped_area(struct proc_dir_entry *pde, struct file *file, unsigned lo
unsigned long len, unsigned long pgoff,
unsigned long flags)
{
- typeof_member(struct proc_ops, proc_get_unmapped_area) get_area;
+ if (pde->proc_ops->proc_get_unmapped_area)
+ return pde->proc_ops->proc_get_unmapped_area(file, orig_addr, len, pgoff, flags);
- get_area = pde->proc_ops->proc_get_unmapped_area;
#ifdef CONFIG_MMU
- if (!get_area)
- get_area = current->mm->get_unmapped_area;
+ return mm_get_unmapped_area(current->mm, file, orig_addr, len, pgoff, flags);
#endif
- if (get_area)
- return get_area(file, orig_addr, len, pgoff, flags);
+
return orig_addr;
}
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 45af9a989d4040..245171d9164be3 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -89,8 +89,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
show_val_kb(m, "SwapTotal: ", i.totalswap);
show_val_kb(m, "SwapFree: ", i.freeswap);
#ifdef CONFIG_ZSWAP
- seq_printf(m, "Zswap: %8lu kB\n",
- (unsigned long)(zswap_pool_total_size >> 10));
+ show_val_kb(m, "Zswap: ", zswap_total_pages());
seq_printf(m, "Zswapped: %8lu kB\n",
(unsigned long)atomic_read(&zswap_stored_pages) <<
(PAGE_SHIFT - 10));
diff --git a/fs/proc/page.c b/fs/proc/page.c
index 195b077c0facbf..2fb64bdb64eb1d 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -67,7 +67,7 @@ static ssize_t kpagecount_read(struct file *file, char __user *buf,
*/
ppage = pfn_to_online_page(pfn);
- if (!ppage || PageSlab(ppage) || page_has_type(ppage))
+ if (!ppage)
pcount = 0;
else
pcount = page_mapcount(ppage);
@@ -107,10 +107,13 @@ static inline u64 kpf_copy_bit(u64 kflags, int ubit, int kbit)
return ((kflags >> kbit) & 1) << ubit;
}
-u64 stable_page_flags(struct page *page)
+u64 stable_page_flags(const struct page *page)
{
- u64 k;
- u64 u;
+ const struct folio *folio;
+ unsigned long k;
+ unsigned long mapping;
+ bool is_anon;
+ u64 u = 0;
/*
* pseudo flag: KPF_NOPAGE
@@ -118,52 +121,47 @@ u64 stable_page_flags(struct page *page)
*/
if (!page)
return 1 << KPF_NOPAGE;
+ folio = page_folio(page);
- k = page->flags;
- u = 0;
+ k = folio->flags;
+ mapping = (unsigned long)folio->mapping;
+ is_anon = mapping & PAGE_MAPPING_ANON;
/*
* pseudo flags for the well known (anonymous) memory mapped pages
- *
- * Note that page->_mapcount is overloaded in SLAB, so the
- * simple test in page_mapped() is not enough.
*/
- if (!PageSlab(page) && page_mapped(page))
+ if (page_mapped(page))
u |= 1 << KPF_MMAP;
- if (PageAnon(page))
+ if (is_anon) {
u |= 1 << KPF_ANON;
- if (PageKsm(page))
- u |= 1 << KPF_KSM;
+ if (mapping & PAGE_MAPPING_KSM)
+ u |= 1 << KPF_KSM;
+ }
/*
* compound pages: export both head/tail info
* they together define a compound page's start/end pos and order
*/
- if (PageHead(page))
- u |= 1 << KPF_COMPOUND_HEAD;
- if (PageTail(page))
+ if (page == &folio->page)
+ u |= kpf_copy_bit(k, KPF_COMPOUND_HEAD, PG_head);
+ else
u |= 1 << KPF_COMPOUND_TAIL;
- if (PageHuge(page))
+ if (folio_test_hugetlb(folio))
u |= 1 << KPF_HUGE;
/*
- * PageTransCompound can be true for non-huge compound pages (slab
- * pages or pages allocated by drivers with __GFP_COMP) because it
- * just checks PG_head/PG_tail, so we need to check PageLRU/PageAnon
+ * We need to check PageLRU/PageAnon
* to make sure a given page is a thp, not a non-huge compound page.
*/
- else if (PageTransCompound(page)) {
- struct page *head = compound_head(page);
-
- if (PageLRU(head) || PageAnon(head))
+ else if (folio_test_large(folio)) {
+ if ((k & (1 << PG_lru)) || is_anon)
u |= 1 << KPF_THP;
- else if (is_huge_zero_page(head)) {
+ else if (is_huge_zero_folio(folio)) {
u |= 1 << KPF_ZERO_PAGE;
u |= 1 << KPF_THP;
}
} else if (is_zero_pfn(page_to_pfn(page)))
u |= 1 << KPF_ZERO_PAGE;
-
/*
* Caveats on high order pages: PG_buddy and PG_slab will only be set
* on the head page.
@@ -177,16 +175,17 @@ u64 stable_page_flags(struct page *page)
u |= 1 << KPF_OFFLINE;
if (PageTable(page))
u |= 1 << KPF_PGTABLE;
+ if (folio_test_slab(folio))
+ u |= 1 << KPF_SLAB;
- if (page_is_idle(page))
+#if defined(CONFIG_PAGE_IDLE_FLAG) && defined(CONFIG_64BIT)
+ u |= kpf_copy_bit(k, KPF_IDLE, PG_idle);
+#else
+ if (folio_test_idle(folio))
u |= 1 << KPF_IDLE;
+#endif
u |= kpf_copy_bit(k, KPF_LOCKED, PG_locked);
-
- u |= kpf_copy_bit(k, KPF_SLAB, PG_slab);
- if (PageTail(page) && PageSlab(page))
- u |= 1 << KPF_SLAB;
-
u |= kpf_copy_bit(k, KPF_ERROR, PG_error);
u |= kpf_copy_bit(k, KPF_DIRTY, PG_dirty);
u |= kpf_copy_bit(k, KPF_UPTODATE, PG_uptodate);
@@ -197,7 +196,8 @@ u64 stable_page_flags(struct page *page)
u |= kpf_copy_bit(k, KPF_ACTIVE, PG_active);
u |= kpf_copy_bit(k, KPF_RECLAIM, PG_reclaim);
- if (PageSwapCache(page))
+#define SWAPCACHE ((1 << PG_swapbacked) | (1 << PG_swapcache))
+ if ((k & SWAPCACHE) == SWAPCACHE)
u |= 1 << KPF_SWAPCACHE;
u |= kpf_copy_bit(k, KPF_SWAPBACKED, PG_swapbacked);
@@ -205,7 +205,10 @@ u64 stable_page_flags(struct page *page)
u |= kpf_copy_bit(k, KPF_MLOCKED, PG_mlocked);
#ifdef CONFIG_MEMORY_FAILURE
- u |= kpf_copy_bit(k, KPF_HWPOISON, PG_hwpoison);
+ if (u & (1 << KPF_HUGE))
+ u |= kpf_copy_bit(k, KPF_HWPOISON, PG_hwpoison);
+ else
+ u |= kpf_copy_bit(page->flags, KPF_HWPOISON, PG_hwpoison);
#endif
#ifdef CONFIG_ARCH_USES_PG_UNCACHED
@@ -231,7 +234,6 @@ static ssize_t kpageflags_read(struct file *file, char __user *buf,
{
const unsigned long max_dump_pfn = get_max_dump_pfn();
u64 __user *out = (u64 __user *)buf;
- struct page *ppage;
unsigned long src = *ppos;
unsigned long pfn;
ssize_t ret = 0;
@@ -248,9 +250,9 @@ static ssize_t kpageflags_read(struct file *file, char __user *buf,
* TODO: ZONE_DEVICE support requires to identify
* memmaps that were actually initialized.
*/
- ppage = pfn_to_online_page(pfn);
+ struct page *page = pfn_to_online_page(pfn);
- if (put_user(stable_page_flags(ppage), out)) {
+ if (put_user(stable_page_flags(page), out)) {
ret = -EFAULT;
break;
}
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 23fbab954c20b6..81fbecfe5ff6c7 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -411,14 +411,14 @@ struct mem_size_stats {
};
static void smaps_page_accumulate(struct mem_size_stats *mss,
- struct page *page, unsigned long size, unsigned long pss,
+ struct folio *folio, unsigned long size, unsigned long pss,
bool dirty, bool locked, bool private)
{
mss->pss += pss;
- if (PageAnon(page))
+ if (folio_test_anon(folio))
mss->pss_anon += pss;
- else if (PageSwapBacked(page))
+ else if (folio_test_swapbacked(folio))
mss->pss_shmem += pss;
else
mss->pss_file += pss;
@@ -426,7 +426,7 @@ static void smaps_page_accumulate(struct mem_size_stats *mss,
if (locked)
mss->pss_locked += pss;
- if (dirty || PageDirty(page)) {
+ if (dirty || folio_test_dirty(folio)) {
mss->pss_dirty += pss;
if (private)
mss->private_dirty += size;
@@ -444,6 +444,7 @@ static void smaps_account(struct mem_size_stats *mss, struct page *page,
bool compound, bool young, bool dirty, bool locked,
bool migration)
{
+ struct folio *folio = page_folio(page);
int i, nr = compound ? compound_nr(page) : 1;
unsigned long size = nr * PAGE_SIZE;
@@ -451,27 +452,28 @@ static void smaps_account(struct mem_size_stats *mss, struct page *page,
* First accumulate quantities that depend only on |size| and the type
* of the compound page.
*/
- if (PageAnon(page)) {
+ if (folio_test_anon(folio)) {
mss->anonymous += size;
- if (!PageSwapBacked(page) && !dirty && !PageDirty(page))
+ if (!folio_test_swapbacked(folio) && !dirty &&
+ !folio_test_dirty(folio))
mss->lazyfree += size;
}
- if (PageKsm(page))
+ if (folio_test_ksm(folio))
mss->ksm += size;
mss->resident += size;
/* Accumulate the size in pages that have been accessed. */
- if (young || page_is_young(page) || PageReferenced(page))
+ if (young || folio_test_young(folio) || folio_test_referenced(folio))
mss->referenced += size;
/*
* Then accumulate quantities that may depend on sharing, or that may
* differ page-by-page.
*
- * page_count(page) == 1 guarantees the page is mapped exactly once.
+ * refcount == 1 guarantees the page is mapped exactly once.
* If any subpage of the compound page mapped with PTE it would elevate
- * page_count().
+ * the refcount.
*
* The page_mapcount() is called to get a snapshot of the mapcount.
* Without holding the page lock this snapshot can be slightly wrong as
@@ -480,9 +482,9 @@ static void smaps_account(struct mem_size_stats *mss, struct page *page,
* especially for migration entries. Treat regular migration entries
* as mapcount == 1.
*/
- if ((page_count(page) == 1) || migration) {
- smaps_page_accumulate(mss, page, size, size << PSS_SHIFT, dirty,
- locked, true);
+ if ((folio_ref_count(folio) == 1) || migration) {
+ smaps_page_accumulate(mss, folio, size, size << PSS_SHIFT,
+ dirty, locked, true);
return;
}
for (i = 0; i < nr; i++, page++) {
@@ -490,8 +492,8 @@ static void smaps_account(struct mem_size_stats *mss, struct page *page,
unsigned long pss = PAGE_SIZE << PSS_SHIFT;
if (mapcount >= 2)
pss /= mapcount;
- smaps_page_accumulate(mss, page, PAGE_SIZE, pss, dirty, locked,
- mapcount < 2);
+ smaps_page_accumulate(mss, folio, PAGE_SIZE, pss,
+ dirty, locked, mapcount < 2);
}
}
@@ -576,6 +578,7 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
struct vm_area_struct *vma = walk->vma;
bool locked = !!(vma->vm_flags & VM_LOCKED);
struct page *page = NULL;
+ struct folio *folio;
bool migration = false;
if (pmd_present(*pmd)) {
@@ -590,11 +593,12 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
}
if (IS_ERR_OR_NULL(page))
return;
- if (PageAnon(page))
+ folio = page_folio(page);
+ if (folio_test_anon(folio))
mss->anonymous_thp += HPAGE_PMD_SIZE;
- else if (PageSwapBacked(page))
+ else if (folio_test_swapbacked(folio))
mss->shmem_thp += HPAGE_PMD_SIZE;
- else if (is_zone_device_page(page))
+ else if (folio_is_zone_device(folio))
/* pass */;
else
mss->file_thp += HPAGE_PMD_SIZE;
@@ -726,19 +730,20 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask,
{
struct mem_size_stats *mss = walk->private;
struct vm_area_struct *vma = walk->vma;
- struct page *page = NULL;
- pte_t ptent = ptep_get(pte);
+ pte_t ptent = huge_ptep_get(pte);
+ struct folio *folio = NULL;
if (pte_present(ptent)) {
- page = vm_normal_page(vma, addr, ptent);
+ folio = page_folio(pte_page(ptent));
} else if (is_swap_pte(ptent)) {
swp_entry_t swpent = pte_to_swp_entry(ptent);
if (is_pfn_swap_entry(swpent))
- page = pfn_swap_entry_to_page(swpent);
+ folio = pfn_swap_entry_folio(swpent);
}
- if (page) {
- if (page_mapcount(page) >= 2 || hugetlb_pmd_shared(pte))
+ if (folio) {
+ if (folio_likely_mapped_shared(folio) ||
+ hugetlb_pmd_shared(pte))
mss->shared_hugetlb += huge_page_size(hstate_vma(vma));
else
mss->private_hugetlb += huge_page_size(hstate_vma(vma));
@@ -866,8 +871,8 @@ static int show_smap(struct seq_file *m, void *v)
__show_smap(m, &mss, false);
seq_printf(m, "THPeligible: %8u\n",
- !!thp_vma_allowable_orders(vma, vma->vm_flags, true, false,
- true, THP_ORDERS_ALL));
+ !!thp_vma_allowable_orders(vma, vma->vm_flags,
+ TVA_SMAPS | TVA_ENFORCE_SYSFS, THP_ORDERS_ALL));
if (arch_pkeys_enabled())
seq_printf(m, "ProtectionKey: %8u\n", vma_pkey(vma));
@@ -1161,7 +1166,7 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
struct vm_area_struct *vma = walk->vma;
pte_t *pte, ptent;
spinlock_t *ptl;
- struct page *page;
+ struct folio *folio;
ptl = pmd_trans_huge_lock(pmd, vma);
if (ptl) {
@@ -1173,12 +1178,12 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
if (!pmd_present(*pmd))
goto out;
- page = pmd_page(*pmd);
+ folio = pmd_folio(*pmd);
/* Clear accessed and referenced bits. */
pmdp_test_and_clear_young(vma, addr, pmd);
- test_and_clear_page_young(page);
- ClearPageReferenced(page);
+ folio_test_clear_young(folio);
+ folio_clear_referenced(folio);
out:
spin_unlock(ptl);
return 0;
@@ -1200,14 +1205,14 @@ out:
if (!pte_present(ptent))
continue;
- page = vm_normal_page(vma, addr, ptent);
- if (!page)
+ folio = vm_normal_folio(vma, addr, ptent);
+ if (!folio)
continue;
/* Clear accessed and referenced bits. */
ptep_test_and_clear_young(vma, addr, pte);
- test_and_clear_page_young(page);
- ClearPageReferenced(page);
+ folio_test_clear_young(folio);
+ folio_clear_referenced(folio);
}
pte_unmap_unlock(pte - 1, ptl);
cond_resched();
@@ -1574,12 +1579,13 @@ static int pagemap_hugetlb_range(pte_t *ptep, unsigned long hmask,
pte = huge_ptep_get(ptep);
if (pte_present(pte)) {
- struct page *page = pte_page(pte);
+ struct folio *folio = page_folio(pte_page(pte));
- if (!PageAnon(page))
+ if (!folio_test_anon(folio))
flags |= PM_FILE;
- if (page_mapcount(page) == 1)
+ if (!folio_likely_mapped_shared(folio) &&
+ !hugetlb_pmd_shared(ptep))
flags |= PM_MMAP_EXCLUSIVE;
if (huge_pte_uffd_wp(pte))
@@ -2547,28 +2553,29 @@ struct numa_maps_private {
static void gather_stats(struct page *page, struct numa_maps *md, int pte_dirty,
unsigned long nr_pages)
{
+ struct folio *folio = page_folio(page);
int count = page_mapcount(page);
md->pages += nr_pages;
- if (pte_dirty || PageDirty(page))
+ if (pte_dirty || folio_test_dirty(folio))
md->dirty += nr_pages;
- if (PageSwapCache(page))
+ if (folio_test_swapcache(folio))
md->swapcache += nr_pages;
- if (PageActive(page) || PageUnevictable(page))
+ if (folio_test_active(folio) || folio_test_unevictable(folio))
md->active += nr_pages;
- if (PageWriteback(page))
+ if (folio_test_writeback(folio))
md->writeback += nr_pages;
- if (PageAnon(page))
+ if (folio_test_anon(folio))
md->anon += nr_pages;
if (count > md->mapcount_max)
md->mapcount_max = count;
- md->node[page_to_nid(page)] += nr_pages;
+ md->node[folio_nid(folio)] += nr_pages;
}
static struct page *can_gather_numa_stats(pte_t pte, struct vm_area_struct *vma,
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 1fb213f379a5b6..5d08d4d159d30b 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -1370,9 +1370,8 @@ static void vmcoredd_write_header(void *buf, struct vmcoredd_data *data,
vdd_hdr->n_descsz = size + sizeof(vdd_hdr->dump_name);
vdd_hdr->n_type = NT_VMCOREDD;
- strncpy((char *)vdd_hdr->name, VMCOREDD_NOTE_NAME,
- sizeof(vdd_hdr->name));
- memcpy(vdd_hdr->dump_name, data->dump_name, sizeof(vdd_hdr->dump_name));
+ strscpy_pad(vdd_hdr->name, VMCOREDD_NOTE_NAME);
+ strscpy_pad(vdd_hdr->dump_name, data->dump_name);
}
/**
diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c
index c7a1aa3c882b02..b45c7edc3225e4 100644
--- a/fs/ramfs/file-mmu.c
+++ b/fs/ramfs/file-mmu.c
@@ -35,7 +35,7 @@ static unsigned long ramfs_mmu_get_unmapped_area(struct file *file,
unsigned long addr, unsigned long len, unsigned long pgoff,
unsigned long flags)
{
- return current->mm->get_unmapped_area(file, addr, len, pgoff, flags);
+ return mm_get_unmapped_area(current->mm, file, addr, len, pgoff, flags);
}
const struct file_operations ramfs_file_operations = {
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 6474529c425306..e539ccd39e1ee7 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -2589,7 +2589,7 @@ static void journal_list_init(struct super_block *sb)
static void release_journal_dev(struct reiserfs_journal *journal)
{
if (journal->j_bdev_file) {
- fput(journal->j_bdev_file);
+ bdev_fput(journal->j_bdev_file);
journal->j_bdev_file = NULL;
}
}
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index 2be227532f3997..2cbb924620747f 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -594,7 +594,7 @@ static void romfs_kill_sb(struct super_block *sb)
#ifdef CONFIG_ROMFS_ON_BLOCK
if (sb->s_bdev) {
sync_blockdev(sb->s_bdev);
- fput(sb->s_bdev_file);
+ bdev_fput(sb->s_bdev_file);
}
#endif
}
diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c
index a0017724d52393..0ff2491c311d8a 100644
--- a/fs/smb/client/cached_dir.c
+++ b/fs/smb/client/cached_dir.c
@@ -417,6 +417,7 @@ smb2_close_cached_fid(struct kref *ref)
{
struct cached_fid *cfid = container_of(ref, struct cached_fid,
refcount);
+ int rc;
spin_lock(&cfid->cfids->cfid_list_lock);
if (cfid->on_list) {
@@ -430,9 +431,10 @@ smb2_close_cached_fid(struct kref *ref)
cfid->dentry = NULL;
if (cfid->is_open) {
- SMB2_close(0, cfid->tcon, cfid->fid.persistent_fid,
+ rc = SMB2_close(0, cfid->tcon, cfid->fid.persistent_fid,
cfid->fid.volatile_fid);
- atomic_dec(&cfid->tcon->num_remote_opens);
+ if (rc) /* should we retry on -EBUSY or -EAGAIN? */
+ cifs_dbg(VFS, "close cached dir rc %d\n", rc);
}
free_cached_dir(cfid);
diff --git a/fs/smb/client/cifs_debug.c b/fs/smb/client/cifs_debug.c
index 226d4835c92db8..c71ae5c043060e 100644
--- a/fs/smb/client/cifs_debug.c
+++ b/fs/smb/client/cifs_debug.c
@@ -250,6 +250,8 @@ static int cifs_debug_files_proc_show(struct seq_file *m, void *v)
spin_lock(&cifs_tcp_ses_lock);
list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) {
list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
+ if (cifs_ses_exiting(ses))
+ continue;
list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
spin_lock(&tcon->open_file_lock);
list_for_each_entry(cfile, &tcon->openFileList, tlist) {
@@ -676,6 +678,8 @@ static ssize_t cifs_stats_proc_write(struct file *file,
}
#endif /* CONFIG_CIFS_STATS2 */
list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
+ if (cifs_ses_exiting(ses))
+ continue;
list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
atomic_set(&tcon->num_smbs_sent, 0);
spin_lock(&tcon->stat_lock);
@@ -755,6 +759,8 @@ static int cifs_stats_proc_show(struct seq_file *m, void *v)
}
#endif /* STATS2 */
list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
+ if (cifs_ses_exiting(ses))
+ continue;
list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
i++;
seq_printf(m, "\n%d) %s", i, tcon->tree_name);
diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c
index aa6f1ecb7c0e8f..39277c37185cac 100644
--- a/fs/smb/client/cifsfs.c
+++ b/fs/smb/client/cifsfs.c
@@ -156,6 +156,7 @@ struct workqueue_struct *decrypt_wq;
struct workqueue_struct *fileinfo_put_wq;
struct workqueue_struct *cifsoplockd_wq;
struct workqueue_struct *deferredclose_wq;
+struct workqueue_struct *serverclose_wq;
__u32 cifs_lock_secret;
/*
@@ -388,6 +389,7 @@ cifs_alloc_inode(struct super_block *sb)
* server, can not assume caching of file data or metadata.
*/
cifs_set_oplock_level(cifs_inode, 0);
+ cifs_inode->lease_granted = false;
cifs_inode->flags = 0;
spin_lock_init(&cifs_inode->writers_lock);
cifs_inode->writers = 0;
@@ -738,6 +740,8 @@ static void cifs_umount_begin(struct super_block *sb)
spin_lock(&cifs_tcp_ses_lock);
spin_lock(&tcon->tc_lock);
+ trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count,
+ netfs_trace_tcon_ref_see_umount);
if ((tcon->tc_count > 1) || (tcon->status == TID_EXITING)) {
/* we have other mounts to same share or we have
already tried to umount this and woken up
@@ -1888,6 +1892,13 @@ init_cifs(void)
goto out_destroy_cifsoplockd_wq;
}
+ serverclose_wq = alloc_workqueue("serverclose",
+ WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
+ if (!serverclose_wq) {
+ rc = -ENOMEM;
+ goto out_destroy_serverclose_wq;
+ }
+
rc = cifs_init_inodecache();
if (rc)
goto out_destroy_deferredclose_wq;
@@ -1962,6 +1973,8 @@ out_destroy_decrypt_wq:
destroy_workqueue(decrypt_wq);
out_destroy_cifsiod_wq:
destroy_workqueue(cifsiod_wq);
+out_destroy_serverclose_wq:
+ destroy_workqueue(serverclose_wq);
out_clean_proc:
cifs_proc_clean();
return rc;
@@ -1991,6 +2004,7 @@ exit_cifs(void)
destroy_workqueue(cifsoplockd_wq);
destroy_workqueue(decrypt_wq);
destroy_workqueue(fileinfo_put_wq);
+ destroy_workqueue(serverclose_wq);
destroy_workqueue(cifsiod_wq);
cifs_proc_clean();
}
diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h
index 7ed9d05f6890b4..6ff35570db813a 100644
--- a/fs/smb/client/cifsglob.h
+++ b/fs/smb/client/cifsglob.h
@@ -442,10 +442,10 @@ struct smb_version_operations {
/* set fid protocol-specific info */
void (*set_fid)(struct cifsFileInfo *, struct cifs_fid *, __u32);
/* close a file */
- void (*close)(const unsigned int, struct cifs_tcon *,
+ int (*close)(const unsigned int, struct cifs_tcon *,
struct cifs_fid *);
/* close a file, returning file attributes and timestamps */
- void (*close_getattr)(const unsigned int xid, struct cifs_tcon *tcon,
+ int (*close_getattr)(const unsigned int xid, struct cifs_tcon *tcon,
struct cifsFileInfo *pfile_info);
/* send a flush request to the server */
int (*flush)(const unsigned int, struct cifs_tcon *, struct cifs_fid *);
@@ -1077,6 +1077,7 @@ struct cifs_ses {
and after mount option parsing we fill it */
char *domainName;
char *password;
+ char *password2; /* When key rotation used, new password may be set before it expires */
char workstation_name[CIFS_MAX_WORKSTATION_LEN];
struct session_key auth_key;
struct ntlmssp_auth *ntlmssp; /* ciphertext, flags, server challenge */
@@ -1189,6 +1190,7 @@ struct cifs_fattr {
*/
struct cifs_tcon {
struct list_head tcon_list;
+ int debug_id; /* Debugging for tracing */
int tc_count;
struct list_head rlist; /* reconnect list */
spinlock_t tc_lock; /* protect anything here that is not protected */
@@ -1275,13 +1277,14 @@ struct cifs_tcon {
__u32 max_cached_dirs;
#ifdef CONFIG_CIFS_FSCACHE
u64 resource_id; /* server resource id */
+ bool fscache_acquired; /* T if we've tried acquiring a cookie */
struct fscache_volume *fscache; /* cookie for share */
+ struct mutex fscache_lock; /* Prevent regetting a cookie */
#endif
struct list_head pending_opens; /* list of incomplete opens */
struct cached_fids *cfids;
/* BB add field for back pointer to sb struct(s)? */
#ifdef CONFIG_CIFS_DFS_UPCALL
- struct list_head dfs_ses_list;
struct delayed_work dfs_cache_work;
#endif
struct delayed_work query_interfaces; /* query interfaces workqueue job */
@@ -1440,6 +1443,7 @@ struct cifsFileInfo {
bool swapfile:1;
bool oplock_break_cancelled:1;
bool status_file_deleted:1; /* file has been deleted */
+ bool offload:1; /* offload final part of _put to a wq */
unsigned int oplock_epoch; /* epoch from the lease break */
__u32 oplock_level; /* oplock/lease level from the lease break */
int count;
@@ -1448,6 +1452,7 @@ struct cifsFileInfo {
struct cifs_search_info srch_inf;
struct work_struct oplock_break; /* work for oplock breaks */
struct work_struct put; /* work for the final part of _put */
+ struct work_struct serverclose; /* work for serverclose */
struct delayed_work deferred;
bool deferred_close_scheduled; /* Flag to indicate close is scheduled */
char *symlink_target;
@@ -1804,7 +1809,6 @@ struct cifs_mount_ctx {
struct TCP_Server_Info *server;
struct cifs_ses *ses;
struct cifs_tcon *tcon;
- struct list_head dfs_ses_list;
};
static inline void __free_dfs_info_param(struct dfs_info3_param *param)
@@ -2105,6 +2109,7 @@ extern struct workqueue_struct *decrypt_wq;
extern struct workqueue_struct *fileinfo_put_wq;
extern struct workqueue_struct *cifsoplockd_wq;
extern struct workqueue_struct *deferredclose_wq;
+extern struct workqueue_struct *serverclose_wq;
extern __u32 cifs_lock_secret;
extern mempool_t *cifs_sm_req_poolp;
@@ -2324,4 +2329,14 @@ struct smb2_compound_vars {
struct kvec ea_iov;
};
+static inline bool cifs_ses_exiting(struct cifs_ses *ses)
+{
+ bool ret;
+
+ spin_lock(&ses->ses_lock);
+ ret = ses->ses_status == SES_EXITING;
+ spin_unlock(&ses->ses_lock);
+ return ret;
+}
+
#endif /* _CIFS_GLOB_H */
diff --git a/fs/smb/client/cifspdu.h b/fs/smb/client/cifspdu.h
index c0513fbb8a59d4..c46d418c1c0c3e 100644
--- a/fs/smb/client/cifspdu.h
+++ b/fs/smb/client/cifspdu.h
@@ -882,7 +882,7 @@ typedef struct smb_com_open_rsp {
__u8 OplockLevel;
__u16 Fid;
__le32 CreateAction;
- struct_group(common_attributes,
+ struct_group_attr(common_attributes, __packed,
__le64 CreationTime;
__le64 LastAccessTime;
__le64 LastWriteTime;
@@ -2266,7 +2266,7 @@ typedef struct {
/* QueryFileInfo/QueryPathinfo (also for SetPath/SetFile) data buffer formats */
/******************************************************************************/
typedef struct { /* data block encoding of response to level 263 QPathInfo */
- struct_group(common_attributes,
+ struct_group_attr(common_attributes, __packed,
__le64 CreationTime;
__le64 LastAccessTime;
__le64 LastWriteTime;
diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h
index 0723e1b57256b8..fbc358c09da3b1 100644
--- a/fs/smb/client/cifsproto.h
+++ b/fs/smb/client/cifsproto.h
@@ -303,7 +303,7 @@ cifs_get_tcp_session(struct smb3_fs_context *ctx,
struct TCP_Server_Info *primary_server);
extern void cifs_put_tcp_session(struct TCP_Server_Info *server,
int from_reconnect);
-extern void cifs_put_tcon(struct cifs_tcon *tcon);
+extern void cifs_put_tcon(struct cifs_tcon *tcon, enum smb3_tcon_ref_trace trace);
extern void cifs_release_automount_timer(void);
@@ -530,8 +530,9 @@ extern int CIFSSMBLogoff(const unsigned int xid, struct cifs_ses *ses);
extern struct cifs_ses *sesInfoAlloc(void);
extern void sesInfoFree(struct cifs_ses *);
-extern struct cifs_tcon *tcon_info_alloc(bool dir_leases_enabled);
-extern void tconInfoFree(struct cifs_tcon *);
+extern struct cifs_tcon *tcon_info_alloc(bool dir_leases_enabled,
+ enum smb3_tcon_ref_trace trace);
+extern void tconInfoFree(struct cifs_tcon *tcon, enum smb3_tcon_ref_trace trace);
extern int cifs_sign_rqst(struct smb_rqst *rqst, struct TCP_Server_Info *server,
__u32 *pexpected_response_sequence_number);
@@ -721,35 +722,33 @@ static inline int cifs_create_options(struct cifs_sb_info *cifs_sb, int options)
return options;
}
-struct super_block *cifs_get_tcon_super(struct cifs_tcon *tcon);
-void cifs_put_tcon_super(struct super_block *sb);
int cifs_wait_for_server_reconnect(struct TCP_Server_Info *server, bool retry);
-/* Put references of @ses and @ses->dfs_root_ses */
+/* Put references of @ses and its children */
static inline void cifs_put_smb_ses(struct cifs_ses *ses)
{
- struct cifs_ses *rses = ses->dfs_root_ses;
+ struct cifs_ses *next;
- __cifs_put_smb_ses(ses);
- if (rses)
- __cifs_put_smb_ses(rses);
+ do {
+ next = ses->dfs_root_ses;
+ __cifs_put_smb_ses(ses);
+ } while ((ses = next));
}
-/* Get an active reference of @ses and @ses->dfs_root_ses.
+/* Get an active reference of @ses and its children.
*
* NOTE: make sure to call this function when incrementing reference count of
* @ses to ensure that any DFS root session attached to it (@ses->dfs_root_ses)
* will also get its reference count incremented.
*
- * cifs_put_smb_ses() will put both references, so call it when you're done.
+ * cifs_put_smb_ses() will put all references, so call it when you're done.
*/
static inline void cifs_smb_ses_inc_refcount(struct cifs_ses *ses)
{
lockdep_assert_held(&cifs_tcp_ses_lock);
- ses->ses_count++;
- if (ses->dfs_root_ses)
- ses->dfs_root_ses->ses_count++;
+ for (; ses; ses = ses->dfs_root_ses)
+ ses->ses_count++;
}
static inline bool dfs_src_pathname_equal(const char *s1, const char *s2)
diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c
index 5aee555515730d..23b5709ddc311c 100644
--- a/fs/smb/client/cifssmb.c
+++ b/fs/smb/client/cifssmb.c
@@ -5854,10 +5854,8 @@ SetEARetry:
parm_data->list.EA_flags = 0;
/* we checked above that name len is less than 255 */
parm_data->list.name_len = (__u8)name_len;
- /* EA names are always ASCII */
- if (ea_name)
- strncpy(parm_data->list.name, ea_name, name_len);
- parm_data->list.name[name_len] = '\0';
+ /* EA names are always ASCII and NUL-terminated */
+ strscpy(parm_data->list.name, ea_name ?: "", name_len + 1);
parm_data->list.value_len = cpu_to_le16(ea_value_len);
/* caller ensures that ea_value_len is less than 64K but
we need to ensure that it fits within the smb */
diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c
index 9b85b5341822e7..7a16e12f5da879 100644
--- a/fs/smb/client/connect.c
+++ b/fs/smb/client/connect.c
@@ -175,6 +175,8 @@ cifs_signal_cifsd_for_reconnect(struct TCP_Server_Info *server,
spin_lock(&cifs_tcp_ses_lock);
list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
+ if (cifs_ses_exiting(ses))
+ continue;
spin_lock(&ses->chan_lock);
for (i = 0; i < ses->chan_count; i++) {
if (!ses->chans[i].server)
@@ -232,7 +234,13 @@ cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server,
spin_lock(&cifs_tcp_ses_lock);
list_for_each_entry_safe(ses, nses, &pserver->smb_ses_list, smb_ses_list) {
- /* check if iface is still active */
+ spin_lock(&ses->ses_lock);
+ if (ses->ses_status == SES_EXITING) {
+ spin_unlock(&ses->ses_lock);
+ continue;
+ }
+ spin_unlock(&ses->ses_lock);
+
spin_lock(&ses->chan_lock);
if (cifs_ses_get_chan_index(ses, server) ==
CIFS_INVAL_CHAN_INDEX) {
@@ -1860,6 +1868,9 @@ static int match_session(struct cifs_ses *ses, struct smb3_fs_context *ctx)
ctx->sectype != ses->sectype)
return 0;
+ if (ctx->dfs_root_ses != ses->dfs_root_ses)
+ return 0;
+
/*
* If an existing session is limited to less channels than
* requested, it should not be reused
@@ -1932,7 +1943,7 @@ cifs_setup_ipc(struct cifs_ses *ses, struct smb3_fs_context *ctx)
}
/* no need to setup directory caching on IPC share, so pass in false */
- tcon = tcon_info_alloc(false);
+ tcon = tcon_info_alloc(false, netfs_trace_tcon_ref_new_ipc);
if (tcon == NULL)
return -ENOMEM;
@@ -1949,7 +1960,7 @@ cifs_setup_ipc(struct cifs_ses *ses, struct smb3_fs_context *ctx)
if (rc) {
cifs_server_dbg(VFS, "failed to connect to IPC (rc=%d)\n", rc);
- tconInfoFree(tcon);
+ tconInfoFree(tcon, netfs_trace_tcon_ref_free_ipc_fail);
goto out;
}
@@ -1963,31 +1974,6 @@ out:
return rc;
}
-/**
- * cifs_free_ipc - helper to release the session IPC tcon
- * @ses: smb session to unmount the IPC from
- *
- * Needs to be called everytime a session is destroyed.
- *
- * On session close, the IPC is closed and the server must release all tcons of the session.
- * No need to send a tree disconnect here.
- *
- * Besides, it will make the server to not close durable and resilient files on session close, as
- * specified in MS-SMB2 3.3.5.6 Receiving an SMB2 LOGOFF Request.
- */
-static int
-cifs_free_ipc(struct cifs_ses *ses)
-{
- struct cifs_tcon *tcon = ses->tcon_ipc;
-
- if (tcon == NULL)
- return 0;
-
- tconInfoFree(tcon);
- ses->tcon_ipc = NULL;
- return 0;
-}
-
static struct cifs_ses *
cifs_find_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx)
{
@@ -2019,48 +2005,52 @@ cifs_find_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx)
void __cifs_put_smb_ses(struct cifs_ses *ses)
{
struct TCP_Server_Info *server = ses->server;
+ struct cifs_tcon *tcon;
unsigned int xid;
size_t i;
+ bool do_logoff;
int rc;
+ spin_lock(&cifs_tcp_ses_lock);
spin_lock(&ses->ses_lock);
- if (ses->ses_status == SES_EXITING) {
+ cifs_dbg(FYI, "%s: id=0x%llx ses_count=%d ses_status=%u ipc=%s\n",
+ __func__, ses->Suid, ses->ses_count, ses->ses_status,
+ ses->tcon_ipc ? ses->tcon_ipc->tree_name : "none");
+ if (ses->ses_status == SES_EXITING || --ses->ses_count > 0) {
spin_unlock(&ses->ses_lock);
+ spin_unlock(&cifs_tcp_ses_lock);
return;
}
- spin_unlock(&ses->ses_lock);
+ /* ses_count can never go negative */
+ WARN_ON(ses->ses_count < 0);
- cifs_dbg(FYI, "%s: ses_count=%d\n", __func__, ses->ses_count);
- cifs_dbg(FYI,
- "%s: ses ipc: %s\n", __func__, ses->tcon_ipc ? ses->tcon_ipc->tree_name : "NONE");
+ spin_lock(&ses->chan_lock);
+ cifs_chan_clear_need_reconnect(ses, server);
+ spin_unlock(&ses->chan_lock);
- spin_lock(&cifs_tcp_ses_lock);
- if (--ses->ses_count > 0) {
- spin_unlock(&cifs_tcp_ses_lock);
- return;
- }
- spin_lock(&ses->ses_lock);
- if (ses->ses_status == SES_GOOD)
- ses->ses_status = SES_EXITING;
+ do_logoff = ses->ses_status == SES_GOOD && server->ops->logoff;
+ ses->ses_status = SES_EXITING;
+ tcon = ses->tcon_ipc;
+ ses->tcon_ipc = NULL;
spin_unlock(&ses->ses_lock);
spin_unlock(&cifs_tcp_ses_lock);
- /* ses_count can never go negative */
- WARN_ON(ses->ses_count < 0);
-
- spin_lock(&ses->ses_lock);
- if (ses->ses_status == SES_EXITING && server->ops->logoff) {
- spin_unlock(&ses->ses_lock);
- cifs_free_ipc(ses);
+ /*
+ * On session close, the IPC is closed and the server must release all
+ * tcons of the session. No need to send a tree disconnect here.
+ *
+ * Besides, it will make the server to not close durable and resilient
+ * files on session close, as specified in MS-SMB2 3.3.5.6 Receiving an
+ * SMB2 LOGOFF Request.
+ */
+ tconInfoFree(tcon, netfs_trace_tcon_ref_free_ipc);
+ if (do_logoff) {
xid = get_xid();
rc = server->ops->logoff(xid, ses);
if (rc)
cifs_server_dbg(VFS, "%s: Session Logoff failure rc=%d\n",
__func__, rc);
_free_xid(xid);
- } else {
- spin_unlock(&ses->ses_lock);
- cifs_free_ipc(ses);
}
spin_lock(&cifs_tcp_ses_lock);
@@ -2193,6 +2183,7 @@ cifs_set_cifscreds(struct smb3_fs_context *ctx, struct cifs_ses *ses)
}
++delim;
+ /* BB consider adding support for password2 (Key Rotation) for multiuser in future */
ctx->password = kstrndup(delim, len, GFP_KERNEL);
if (!ctx->password) {
cifs_dbg(FYI, "Unable to allocate %zd bytes for password\n",
@@ -2216,6 +2207,7 @@ cifs_set_cifscreds(struct smb3_fs_context *ctx, struct cifs_ses *ses)
kfree(ctx->username);
ctx->username = NULL;
kfree_sensitive(ctx->password);
+ /* no need to free ctx->password2 since not allocated in this path */
ctx->password = NULL;
goto out_key_put;
}
@@ -2327,6 +2319,12 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx)
if (!ses->password)
goto get_ses_fail;
}
+ /* ctx->password freed at unmount */
+ if (ctx->password2) {
+ ses->password2 = kstrdup(ctx->password2, GFP_KERNEL);
+ if (!ses->password2)
+ goto get_ses_fail;
+ }
if (ctx->domainname) {
ses->domainName = kstrdup(ctx->domainname, GFP_KERNEL);
if (!ses->domainName)
@@ -2373,9 +2371,9 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx)
* need to lock before changing something in the session.
*/
spin_lock(&cifs_tcp_ses_lock);
+ if (ctx->dfs_root_ses)
+ cifs_smb_ses_inc_refcount(ctx->dfs_root_ses);
ses->dfs_root_ses = ctx->dfs_root_ses;
- if (ses->dfs_root_ses)
- ses->dfs_root_ses->ses_count++;
list_add(&ses->smb_ses_list, &server->smb_ses_list);
spin_unlock(&cifs_tcp_ses_lock);
@@ -2434,6 +2432,8 @@ cifs_find_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx)
continue;
}
++tcon->tc_count;
+ trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count,
+ netfs_trace_tcon_ref_get_find);
spin_unlock(&tcon->tc_lock);
spin_unlock(&cifs_tcp_ses_lock);
return tcon;
@@ -2443,7 +2443,7 @@ cifs_find_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx)
}
void
-cifs_put_tcon(struct cifs_tcon *tcon)
+cifs_put_tcon(struct cifs_tcon *tcon, enum smb3_tcon_ref_trace trace)
{
unsigned int xid;
struct cifs_ses *ses;
@@ -2459,6 +2459,7 @@ cifs_put_tcon(struct cifs_tcon *tcon)
cifs_dbg(FYI, "%s: tc_count=%d\n", __func__, tcon->tc_count);
spin_lock(&cifs_tcp_ses_lock);
spin_lock(&tcon->tc_lock);
+ trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count - 1, trace);
if (--tcon->tc_count > 0) {
spin_unlock(&tcon->tc_lock);
spin_unlock(&cifs_tcp_ses_lock);
@@ -2495,7 +2496,7 @@ cifs_put_tcon(struct cifs_tcon *tcon)
_free_xid(xid);
cifs_fscache_release_super_cookie(tcon);
- tconInfoFree(tcon);
+ tconInfoFree(tcon, netfs_trace_tcon_ref_free);
cifs_put_smb_ses(ses);
}
@@ -2549,7 +2550,7 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx)
nohandlecache = ctx->nohandlecache;
else
nohandlecache = true;
- tcon = tcon_info_alloc(!nohandlecache);
+ tcon = tcon_info_alloc(!nohandlecache, netfs_trace_tcon_ref_new);
if (tcon == NULL) {
rc = -ENOMEM;
goto out_fail;
@@ -2739,7 +2740,7 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx)
return tcon;
out_fail:
- tconInfoFree(tcon);
+ tconInfoFree(tcon, netfs_trace_tcon_ref_free_fail);
return ERR_PTR(rc);
}
@@ -2756,7 +2757,7 @@ cifs_put_tlink(struct tcon_link *tlink)
}
if (!IS_ERR(tlink_tcon(tlink)))
- cifs_put_tcon(tlink_tcon(tlink));
+ cifs_put_tcon(tlink_tcon(tlink), netfs_trace_tcon_ref_put_tlink);
kfree(tlink);
}
@@ -3321,11 +3322,14 @@ void cifs_mount_put_conns(struct cifs_mount_ctx *mnt_ctx)
int rc = 0;
if (mnt_ctx->tcon)
- cifs_put_tcon(mnt_ctx->tcon);
+ cifs_put_tcon(mnt_ctx->tcon, netfs_trace_tcon_ref_put_mnt_ctx);
else if (mnt_ctx->ses)
cifs_put_smb_ses(mnt_ctx->ses);
else if (mnt_ctx->server)
cifs_put_tcp_session(mnt_ctx->server, 0);
+ mnt_ctx->ses = NULL;
+ mnt_ctx->tcon = NULL;
+ mnt_ctx->server = NULL;
mnt_ctx->cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_POSIX_PATHS;
free_xid(mnt_ctx->xid);
}
@@ -3604,8 +3608,6 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx)
bool isdfs;
int rc;
- INIT_LIST_HEAD(&mnt_ctx.dfs_ses_list);
-
rc = dfs_mount_share(&mnt_ctx, &isdfs);
if (rc)
goto error;
@@ -3636,7 +3638,6 @@ out:
return rc;
error:
- dfs_put_root_smb_sessions(&mnt_ctx.dfs_ses_list);
cifs_mount_put_conns(&mnt_ctx);
return rc;
}
@@ -3651,6 +3652,18 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx)
goto error;
rc = cifs_mount_get_tcon(&mnt_ctx);
+ if (!rc) {
+ /*
+ * Prevent superblock from being created with any missing
+ * connections.
+ */
+ if (WARN_ON(!mnt_ctx.server))
+ rc = -EHOSTDOWN;
+ else if (WARN_ON(!mnt_ctx.ses))
+ rc = -EACCES;
+ else if (WARN_ON(!mnt_ctx.tcon))
+ rc = -ENOENT;
+ }
if (rc)
goto error;
@@ -3988,13 +4001,14 @@ cifs_set_vol_auth(struct smb3_fs_context *ctx, struct cifs_ses *ses)
}
static struct cifs_tcon *
-cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid)
+__cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid)
{
int rc;
struct cifs_tcon *master_tcon = cifs_sb_master_tcon(cifs_sb);
struct cifs_ses *ses;
struct cifs_tcon *tcon = NULL;
struct smb3_fs_context *ctx;
+ char *origin_fullpath = NULL;
ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
if (ctx == NULL)
@@ -4018,6 +4032,7 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid)
ctx->sign = master_tcon->ses->sign;
ctx->seal = master_tcon->seal;
ctx->witness = master_tcon->use_witness;
+ ctx->dfs_root_ses = master_tcon->ses->dfs_root_ses;
rc = cifs_set_vol_auth(ctx, master_tcon->ses);
if (rc) {
@@ -4037,12 +4052,39 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid)
goto out;
}
+#ifdef CONFIG_CIFS_DFS_UPCALL
+ spin_lock(&master_tcon->tc_lock);
+ if (master_tcon->origin_fullpath) {
+ spin_unlock(&master_tcon->tc_lock);
+ origin_fullpath = dfs_get_path(cifs_sb, cifs_sb->ctx->source);
+ if (IS_ERR(origin_fullpath)) {
+ tcon = ERR_CAST(origin_fullpath);
+ origin_fullpath = NULL;
+ cifs_put_smb_ses(ses);
+ goto out;
+ }
+ } else {
+ spin_unlock(&master_tcon->tc_lock);
+ }
+#endif
+
tcon = cifs_get_tcon(ses, ctx);
if (IS_ERR(tcon)) {
cifs_put_smb_ses(ses);
goto out;
}
+#ifdef CONFIG_CIFS_DFS_UPCALL
+ if (origin_fullpath) {
+ spin_lock(&tcon->tc_lock);
+ tcon->origin_fullpath = origin_fullpath;
+ spin_unlock(&tcon->tc_lock);
+ origin_fullpath = NULL;
+ queue_delayed_work(dfscache_wq, &tcon->dfs_cache_work,
+ dfs_cache_get_ttl() * HZ);
+ }
+#endif
+
#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
if (cap_unix(ses))
reset_cifs_unix_caps(0, tcon, NULL, ctx);
@@ -4051,11 +4093,23 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid)
out:
kfree(ctx->username);
kfree_sensitive(ctx->password);
+ kfree(origin_fullpath);
kfree(ctx);
return tcon;
}
+static struct cifs_tcon *
+cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid)
+{
+ struct cifs_tcon *ret;
+
+ cifs_mount_lock();
+ ret = __cifs_construct_tcon(cifs_sb, fsuid);
+ cifs_mount_unlock();
+ return ret;
+}
+
struct cifs_tcon *
cifs_sb_master_tcon(struct cifs_sb_info *cifs_sb)
{
diff --git a/fs/smb/client/dfs.c b/fs/smb/client/dfs.c
index 449c59830039bc..3ec965547e3d4d 100644
--- a/fs/smb/client/dfs.c
+++ b/fs/smb/client/dfs.c
@@ -66,33 +66,20 @@ static int get_session(struct cifs_mount_ctx *mnt_ctx, const char *full_path)
}
/*
- * Track individual DFS referral servers used by new DFS mount.
- *
- * On success, their lifetime will be shared by final tcon (dfs_ses_list).
- * Otherwise, they will be put by dfs_put_root_smb_sessions() in cifs_mount().
+ * Get an active reference of @ses so that next call to cifs_put_tcon() won't
+ * release it as any new DFS referrals must go through its IPC tcon.
*/
-static int add_root_smb_session(struct cifs_mount_ctx *mnt_ctx)
+static void add_root_smb_session(struct cifs_mount_ctx *mnt_ctx)
{
struct smb3_fs_context *ctx = mnt_ctx->fs_ctx;
- struct dfs_root_ses *root_ses;
struct cifs_ses *ses = mnt_ctx->ses;
if (ses) {
- root_ses = kmalloc(sizeof(*root_ses), GFP_KERNEL);
- if (!root_ses)
- return -ENOMEM;
-
- INIT_LIST_HEAD(&root_ses->list);
-
spin_lock(&cifs_tcp_ses_lock);
cifs_smb_ses_inc_refcount(ses);
spin_unlock(&cifs_tcp_ses_lock);
- root_ses->ses = ses;
- list_add_tail(&root_ses->list, &mnt_ctx->dfs_ses_list);
}
- /* Select new DFS referral server so that new referrals go through it */
ctx->dfs_root_ses = ses;
- return 0;
}
static inline int parse_dfs_target(struct smb3_fs_context *ctx,
@@ -185,11 +172,8 @@ again:
continue;
}
- if (is_refsrv) {
- rc = add_root_smb_session(mnt_ctx);
- if (rc)
- goto out;
- }
+ if (is_refsrv)
+ add_root_smb_session(mnt_ctx);
rc = ref_walk_advance(rw);
if (!rc) {
@@ -232,6 +216,7 @@ static int __dfs_mount_share(struct cifs_mount_ctx *mnt_ctx)
struct smb3_fs_context *ctx = mnt_ctx->fs_ctx;
struct cifs_tcon *tcon;
char *origin_fullpath;
+ bool new_tcon = true;
int rc;
origin_fullpath = dfs_get_path(cifs_sb, ctx->source);
@@ -239,6 +224,18 @@ static int __dfs_mount_share(struct cifs_mount_ctx *mnt_ctx)
return PTR_ERR(origin_fullpath);
rc = dfs_referral_walk(mnt_ctx);
+ if (!rc) {
+ /*
+ * Prevent superblock from being created with any missing
+ * connections.
+ */
+ if (WARN_ON(!mnt_ctx->server))
+ rc = -EHOSTDOWN;
+ else if (WARN_ON(!mnt_ctx->ses))
+ rc = -EACCES;
+ else if (WARN_ON(!mnt_ctx->tcon))
+ rc = -ENOENT;
+ }
if (rc)
goto out;
@@ -247,15 +244,14 @@ static int __dfs_mount_share(struct cifs_mount_ctx *mnt_ctx)
if (!tcon->origin_fullpath) {
tcon->origin_fullpath = origin_fullpath;
origin_fullpath = NULL;
+ } else {
+ new_tcon = false;
}
spin_unlock(&tcon->tc_lock);
- if (list_empty(&tcon->dfs_ses_list)) {
- list_replace_init(&mnt_ctx->dfs_ses_list, &tcon->dfs_ses_list);
+ if (new_tcon) {
queue_delayed_work(dfscache_wq, &tcon->dfs_cache_work,
dfs_cache_get_ttl() * HZ);
- } else {
- dfs_put_root_smb_sessions(&mnt_ctx->dfs_ses_list);
}
out:
@@ -298,7 +294,6 @@ int dfs_mount_share(struct cifs_mount_ctx *mnt_ctx, bool *isdfs)
if (rc)
return rc;
- ctx->dfs_root_ses = mnt_ctx->ses;
/*
* If called with 'nodfs' mount option, then skip DFS resolving. Otherwise unconditionally
* try to get an DFS referral (even cached) to determine whether it is an DFS mount.
@@ -324,7 +319,9 @@ int dfs_mount_share(struct cifs_mount_ctx *mnt_ctx, bool *isdfs)
*isdfs = true;
add_root_smb_session(mnt_ctx);
- return __dfs_mount_share(mnt_ctx);
+ rc = __dfs_mount_share(mnt_ctx);
+ dfs_put_root_smb_sessions(mnt_ctx);
+ return rc;
}
/* Update dfs referral path of superblock */
diff --git a/fs/smb/client/dfs.h b/fs/smb/client/dfs.h
index 875ab7ae57fcdf..e5c4dcf837503a 100644
--- a/fs/smb/client/dfs.h
+++ b/fs/smb/client/dfs.h
@@ -7,7 +7,9 @@
#define _CIFS_DFS_H
#include "cifsglob.h"
+#include "cifsproto.h"
#include "fs_context.h"
+#include "dfs_cache.h"
#include "cifs_unicode.h"
#include <linux/namei.h>
@@ -114,11 +116,6 @@ static inline void ref_walk_set_tgt_hint(struct dfs_ref_walk *rw)
ref_walk_tit(rw));
}
-struct dfs_root_ses {
- struct list_head list;
- struct cifs_ses *ses;
-};
-
int dfs_parse_target_referral(const char *full_path, const struct dfs_info3_param *ref,
struct smb3_fs_context *ctx);
int dfs_mount_share(struct cifs_mount_ctx *mnt_ctx, bool *isdfs);
@@ -133,20 +130,32 @@ static inline int dfs_get_referral(struct cifs_mount_ctx *mnt_ctx, const char *p
{
struct smb3_fs_context *ctx = mnt_ctx->fs_ctx;
struct cifs_sb_info *cifs_sb = mnt_ctx->cifs_sb;
+ struct cifs_ses *rses = ctx->dfs_root_ses ?: mnt_ctx->ses;
- return dfs_cache_find(mnt_ctx->xid, ctx->dfs_root_ses, cifs_sb->local_nls,
+ return dfs_cache_find(mnt_ctx->xid, rses, cifs_sb->local_nls,
cifs_remap(cifs_sb), path, ref, tl);
}
-static inline void dfs_put_root_smb_sessions(struct list_head *head)
+/*
+ * cifs_get_smb_ses() already guarantees an active reference of
+ * @ses->dfs_root_ses when a new session is created, so we need to put extra
+ * references of all DFS root sessions that were used across the mount process
+ * in dfs_mount_share().
+ */
+static inline void dfs_put_root_smb_sessions(struct cifs_mount_ctx *mnt_ctx)
{
- struct dfs_root_ses *root, *tmp;
+ const struct smb3_fs_context *ctx = mnt_ctx->fs_ctx;
+ struct cifs_ses *ses = ctx->dfs_root_ses;
+ struct cifs_ses *cur;
+
+ if (!ses)
+ return;
- list_for_each_entry_safe(root, tmp, head, list) {
- list_del_init(&root->list);
- cifs_put_smb_ses(root->ses);
- kfree(root);
+ for (cur = ses; cur; cur = cur->dfs_root_ses) {
+ if (cur->dfs_root_ses)
+ cifs_put_smb_ses(cur->dfs_root_ses);
}
+ cifs_put_smb_ses(ses);
}
#endif /* _CIFS_DFS_H */
diff --git a/fs/smb/client/dfs_cache.c b/fs/smb/client/dfs_cache.c
index 508d831fabe378..11c8efecf7aa12 100644
--- a/fs/smb/client/dfs_cache.c
+++ b/fs/smb/client/dfs_cache.c
@@ -1172,8 +1172,8 @@ static bool is_ses_good(struct cifs_ses *ses)
return ret;
}
-/* Refresh dfs referral of tcon and mark it for reconnect if needed */
-static int __refresh_tcon(const char *path, struct cifs_ses *ses, bool force_refresh)
+/* Refresh dfs referral of @ses and mark it for reconnect if needed */
+static void __refresh_ses_referral(struct cifs_ses *ses, bool force_refresh)
{
struct TCP_Server_Info *server = ses->server;
DFS_CACHE_TGT_LIST(old_tl);
@@ -1181,10 +1181,21 @@ static int __refresh_tcon(const char *path, struct cifs_ses *ses, bool force_ref
bool needs_refresh = false;
struct cache_entry *ce;
unsigned int xid;
+ char *path = NULL;
int rc = 0;
xid = get_xid();
+ mutex_lock(&server->refpath_lock);
+ if (server->leaf_fullpath) {
+ path = kstrdup(server->leaf_fullpath + 1, GFP_ATOMIC);
+ if (!path)
+ rc = -ENOMEM;
+ }
+ mutex_unlock(&server->refpath_lock);
+ if (!path)
+ goto out;
+
down_read(&htable_rw_lock);
ce = lookup_cache_entry(path);
needs_refresh = force_refresh || IS_ERR(ce) || cache_entry_expired(ce);
@@ -1218,19 +1229,17 @@ out:
free_xid(xid);
dfs_cache_free_tgts(&old_tl);
dfs_cache_free_tgts(&new_tl);
- return rc;
+ kfree(path);
}
-static int refresh_tcon(struct cifs_tcon *tcon, bool force_refresh)
+static inline void refresh_ses_referral(struct cifs_ses *ses)
{
- struct TCP_Server_Info *server = tcon->ses->server;
- struct cifs_ses *ses = tcon->ses;
+ __refresh_ses_referral(ses, false);
+}
- mutex_lock(&server->refpath_lock);
- if (server->leaf_fullpath)
- __refresh_tcon(server->leaf_fullpath + 1, ses, force_refresh);
- mutex_unlock(&server->refpath_lock);
- return 0;
+static inline void force_refresh_ses_referral(struct cifs_ses *ses)
+{
+ __refresh_ses_referral(ses, true);
}
/**
@@ -1271,34 +1280,20 @@ int dfs_cache_remount_fs(struct cifs_sb_info *cifs_sb)
*/
cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_USE_PREFIX_PATH;
- return refresh_tcon(tcon, true);
+ force_refresh_ses_referral(tcon->ses);
+ return 0;
}
/* Refresh all DFS referrals related to DFS tcon */
void dfs_cache_refresh(struct work_struct *work)
{
- struct TCP_Server_Info *server;
- struct dfs_root_ses *rses;
struct cifs_tcon *tcon;
struct cifs_ses *ses;
tcon = container_of(work, struct cifs_tcon, dfs_cache_work.work);
- ses = tcon->ses;
- server = ses->server;
- mutex_lock(&server->refpath_lock);
- if (server->leaf_fullpath)
- __refresh_tcon(server->leaf_fullpath + 1, ses, false);
- mutex_unlock(&server->refpath_lock);
-
- list_for_each_entry(rses, &tcon->dfs_ses_list, list) {
- ses = rses->ses;
- server = ses->server;
- mutex_lock(&server->refpath_lock);
- if (server->leaf_fullpath)
- __refresh_tcon(server->leaf_fullpath + 1, ses, false);
- mutex_unlock(&server->refpath_lock);
- }
+ for (ses = tcon->ses; ses; ses = ses->dfs_root_ses)
+ refresh_ses_referral(ses);
queue_delayed_work(dfscache_wq, &tcon->dfs_cache_work,
atomic_read(&dfs_cache_ttl) * HZ);
diff --git a/fs/smb/client/dir.c b/fs/smb/client/dir.c
index 89333d9bce36eb..864b194dbaa0a0 100644
--- a/fs/smb/client/dir.c
+++ b/fs/smb/client/dir.c
@@ -189,6 +189,7 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned
int disposition;
struct TCP_Server_Info *server = tcon->ses->server;
struct cifs_open_parms oparms;
+ int rdwr_for_fscache = 0;
*oplock = 0;
if (tcon->ses->server->oplocks)
@@ -200,6 +201,10 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned
return PTR_ERR(full_path);
}
+ /* If we're caching, we need to be able to fill in around partial writes. */
+ if (cifs_fscache_enabled(inode) && (oflags & O_ACCMODE) == O_WRONLY)
+ rdwr_for_fscache = 1;
+
#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
if (tcon->unix_ext && cap_unix(tcon->ses) && !tcon->broken_posix_open &&
(CIFS_UNIX_POSIX_PATH_OPS_CAP &
@@ -276,6 +281,8 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned
desired_access |= GENERIC_READ; /* is this too little? */
if (OPEN_FMODE(oflags) & FMODE_WRITE)
desired_access |= GENERIC_WRITE;
+ if (rdwr_for_fscache == 1)
+ desired_access |= GENERIC_READ;
disposition = FILE_OVERWRITE_IF;
if ((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
@@ -304,6 +311,7 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned
if (!tcon->unix_ext && (mode & S_IWUGO) == 0)
create_options |= CREATE_OPTION_READONLY;
+retry_open:
oparms = (struct cifs_open_parms) {
.tcon = tcon,
.cifs_sb = cifs_sb,
@@ -317,8 +325,15 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned
rc = server->ops->open(xid, &oparms, oplock, buf);
if (rc) {
cifs_dbg(FYI, "cifs_create returned 0x%x\n", rc);
+ if (rc == -EACCES && rdwr_for_fscache == 1) {
+ desired_access &= ~GENERIC_READ;
+ rdwr_for_fscache = 2;
+ goto retry_open;
+ }
goto out;
}
+ if (rdwr_for_fscache == 2)
+ cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE);
#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
/*
@@ -612,11 +627,18 @@ int cifs_mknod(struct mnt_idmap *idmap, struct inode *inode,
goto mknod_out;
}
+ trace_smb3_mknod_enter(xid, tcon->ses->Suid, tcon->tid, full_path);
+
rc = tcon->ses->server->ops->make_node(xid, inode, direntry, tcon,
full_path, mode,
device_number);
mknod_out:
+ if (rc)
+ trace_smb3_mknod_err(xid, tcon->ses->Suid, tcon->tid, rc);
+ else
+ trace_smb3_mknod_done(xid, tcon->ses->Suid, tcon->tid);
+
free_dentry_path(page);
free_xid(xid);
cifs_put_tlink(tlink);
diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c
index 16aadce492b2ec..9be37d0fe724e9 100644
--- a/fs/smb/client/file.c
+++ b/fs/smb/client/file.c
@@ -206,12 +206,12 @@ cifs_mark_open_files_invalid(struct cifs_tcon *tcon)
*/
}
-static inline int cifs_convert_flags(unsigned int flags)
+static inline int cifs_convert_flags(unsigned int flags, int rdwr_for_fscache)
{
if ((flags & O_ACCMODE) == O_RDONLY)
return GENERIC_READ;
else if ((flags & O_ACCMODE) == O_WRONLY)
- return GENERIC_WRITE;
+ return rdwr_for_fscache == 1 ? (GENERIC_READ | GENERIC_WRITE) : GENERIC_WRITE;
else if ((flags & O_ACCMODE) == O_RDWR) {
/* GENERIC_ALL is too much permission to request
can cause unnecessary access denied on create */
@@ -348,11 +348,16 @@ static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_
int create_options = CREATE_NOT_DIR;
struct TCP_Server_Info *server = tcon->ses->server;
struct cifs_open_parms oparms;
+ int rdwr_for_fscache = 0;
if (!server->ops->open)
return -ENOSYS;
- desired_access = cifs_convert_flags(f_flags);
+ /* If we're caching, we need to be able to fill in around partial writes. */
+ if (cifs_fscache_enabled(inode) && (f_flags & O_ACCMODE) == O_WRONLY)
+ rdwr_for_fscache = 1;
+
+ desired_access = cifs_convert_flags(f_flags, rdwr_for_fscache);
/*********************************************************************
* open flag mapping table:
@@ -389,6 +394,7 @@ static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_
if (f_flags & O_DIRECT)
create_options |= CREATE_NO_BUFFER;
+retry_open:
oparms = (struct cifs_open_parms) {
.tcon = tcon,
.cifs_sb = cifs_sb,
@@ -400,8 +406,16 @@ static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_
};
rc = server->ops->open(xid, &oparms, oplock, buf);
- if (rc)
+ if (rc) {
+ if (rc == -EACCES && rdwr_for_fscache == 1) {
+ desired_access = cifs_convert_flags(f_flags, 0);
+ rdwr_for_fscache = 2;
+ goto retry_open;
+ }
return rc;
+ }
+ if (rdwr_for_fscache == 2)
+ cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE);
/* TODO: Add support for calling posix query info but with passing in fid */
if (tcon->unix_ext)
@@ -445,6 +459,7 @@ cifs_down_write(struct rw_semaphore *sem)
}
static void cifsFileInfo_put_work(struct work_struct *work);
+void serverclose_work(struct work_struct *work);
struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
struct tcon_link *tlink, __u32 oplock,
@@ -491,6 +506,7 @@ struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
cfile->tlink = cifs_get_tlink(tlink);
INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
INIT_WORK(&cfile->put, cifsFileInfo_put_work);
+ INIT_WORK(&cfile->serverclose, serverclose_work);
INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close);
mutex_init(&cfile->fh_mutex);
spin_lock_init(&cfile->file_info_lock);
@@ -582,6 +598,40 @@ static void cifsFileInfo_put_work(struct work_struct *work)
cifsFileInfo_put_final(cifs_file);
}
+void serverclose_work(struct work_struct *work)
+{
+ struct cifsFileInfo *cifs_file = container_of(work,
+ struct cifsFileInfo, serverclose);
+
+ struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
+
+ struct TCP_Server_Info *server = tcon->ses->server;
+ int rc = 0;
+ int retries = 0;
+ int MAX_RETRIES = 4;
+
+ do {
+ if (server->ops->close_getattr)
+ rc = server->ops->close_getattr(0, tcon, cifs_file);
+ else if (server->ops->close)
+ rc = server->ops->close(0, tcon, &cifs_file->fid);
+
+ if (rc == -EBUSY || rc == -EAGAIN) {
+ retries++;
+ msleep(250);
+ }
+ } while ((rc == -EBUSY || rc == -EAGAIN) && (retries < MAX_RETRIES)
+ );
+
+ if (retries == MAX_RETRIES)
+ pr_warn("Serverclose failed %d times, giving up\n", MAX_RETRIES);
+
+ if (cifs_file->offload)
+ queue_work(fileinfo_put_wq, &cifs_file->put);
+ else
+ cifsFileInfo_put_final(cifs_file);
+}
+
/**
* cifsFileInfo_put - release a reference of file priv data
*
@@ -622,10 +672,13 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
struct cifs_fid fid = {};
struct cifs_pending_open open;
bool oplock_break_cancelled;
+ bool serverclose_offloaded = false;
spin_lock(&tcon->open_file_lock);
spin_lock(&cifsi->open_file_lock);
spin_lock(&cifs_file->file_info_lock);
+
+ cifs_file->offload = offload;
if (--cifs_file->count > 0) {
spin_unlock(&cifs_file->file_info_lock);
spin_unlock(&cifsi->open_file_lock);
@@ -667,13 +720,20 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
struct TCP_Server_Info *server = tcon->ses->server;
unsigned int xid;
+ int rc = 0;
xid = get_xid();
if (server->ops->close_getattr)
- server->ops->close_getattr(xid, tcon, cifs_file);
+ rc = server->ops->close_getattr(xid, tcon, cifs_file);
else if (server->ops->close)
- server->ops->close(xid, tcon, &cifs_file->fid);
+ rc = server->ops->close(xid, tcon, &cifs_file->fid);
_free_xid(xid);
+
+ if (rc == -EBUSY || rc == -EAGAIN) {
+ // Server close failed, hence offloading it as an async op
+ queue_work(serverclose_wq, &cifs_file->serverclose);
+ serverclose_offloaded = true;
+ }
}
if (oplock_break_cancelled)
@@ -681,10 +741,15 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
cifs_del_pending_open(&open);
- if (offload)
- queue_work(fileinfo_put_wq, &cifs_file->put);
- else
- cifsFileInfo_put_final(cifs_file);
+ // if serverclose has been offloaded to wq (on failure), it will
+ // handle offloading put as well. If serverclose not offloaded,
+ // we need to handle offloading put here.
+ if (!serverclose_offloaded) {
+ if (offload)
+ queue_work(fileinfo_put_wq, &cifs_file->put);
+ else
+ cifsFileInfo_put_final(cifs_file);
+ }
}
int cifs_open(struct inode *inode, struct file *file)
@@ -834,11 +899,11 @@ int cifs_open(struct inode *inode, struct file *file)
use_cache:
fscache_use_cookie(cifs_inode_cookie(file_inode(file)),
file->f_mode & FMODE_WRITE);
- if (file->f_flags & O_DIRECT &&
- (!((file->f_flags & O_ACCMODE) != O_RDONLY) ||
- file->f_flags & O_APPEND))
- cifs_invalidate_cache(file_inode(file),
- FSCACHE_INVAL_DIO_WRITE);
+ if (!(file->f_flags & O_DIRECT))
+ goto out;
+ if ((file->f_flags & (O_ACCMODE | O_APPEND)) == O_RDONLY)
+ goto out;
+ cifs_invalidate_cache(file_inode(file), FSCACHE_INVAL_DIO_WRITE);
out:
free_dentry_path(page);
@@ -903,6 +968,7 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
int disposition = FILE_OPEN;
int create_options = CREATE_NOT_DIR;
struct cifs_open_parms oparms;
+ int rdwr_for_fscache = 0;
xid = get_xid();
mutex_lock(&cfile->fh_mutex);
@@ -966,7 +1032,11 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
}
#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
- desired_access = cifs_convert_flags(cfile->f_flags);
+ /* If we're caching, we need to be able to fill in around partial writes. */
+ if (cifs_fscache_enabled(inode) && (cfile->f_flags & O_ACCMODE) == O_WRONLY)
+ rdwr_for_fscache = 1;
+
+ desired_access = cifs_convert_flags(cfile->f_flags, rdwr_for_fscache);
/* O_SYNC also has bit for O_DSYNC so following check picks up either */
if (cfile->f_flags & O_SYNC)
@@ -978,6 +1048,7 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
if (server->ops->get_lease_key)
server->ops->get_lease_key(inode, &cfile->fid);
+retry_open:
oparms = (struct cifs_open_parms) {
.tcon = tcon,
.cifs_sb = cifs_sb,
@@ -1003,6 +1074,11 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
/* indicate that we need to relock the file */
oparms.reconnect = true;
}
+ if (rc == -EACCES && rdwr_for_fscache == 1) {
+ desired_access = cifs_convert_flags(cfile->f_flags, 0);
+ rdwr_for_fscache = 2;
+ goto retry_open;
+ }
if (rc) {
mutex_unlock(&cfile->fh_mutex);
@@ -1011,6 +1087,9 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
goto reopen_error_exit;
}
+ if (rdwr_for_fscache == 2)
+ cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE);
+
#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
reopen_success:
#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c
index bdcbe6ff2739ab..3bbac925d0766b 100644
--- a/fs/smb/client/fs_context.c
+++ b/fs/smb/client/fs_context.c
@@ -37,7 +37,7 @@
#include "rfc1002pdu.h"
#include "fs_context.h"
-static DEFINE_MUTEX(cifs_mount_mutex);
+DEFINE_MUTEX(cifs_mount_mutex);
static const match_table_t cifs_smb_version_tokens = {
{ Smb_1, SMB1_VERSION_STRING },
@@ -162,6 +162,7 @@ const struct fs_parameter_spec smb3_fs_parameters[] = {
fsparam_string("username", Opt_user),
fsparam_string("pass", Opt_pass),
fsparam_string("password", Opt_pass),
+ fsparam_string("password2", Opt_pass2),
fsparam_string("ip", Opt_ip),
fsparam_string("addr", Opt_ip),
fsparam_string("domain", Opt_domain),
@@ -345,6 +346,7 @@ smb3_fs_context_dup(struct smb3_fs_context *new_ctx, struct smb3_fs_context *ctx
new_ctx->nodename = NULL;
new_ctx->username = NULL;
new_ctx->password = NULL;
+ new_ctx->password2 = NULL;
new_ctx->server_hostname = NULL;
new_ctx->domainname = NULL;
new_ctx->UNC = NULL;
@@ -357,6 +359,7 @@ smb3_fs_context_dup(struct smb3_fs_context *new_ctx, struct smb3_fs_context *ctx
DUP_CTX_STR(prepath);
DUP_CTX_STR(username);
DUP_CTX_STR(password);
+ DUP_CTX_STR(password2);
DUP_CTX_STR(server_hostname);
DUP_CTX_STR(UNC);
DUP_CTX_STR(source);
@@ -745,6 +748,16 @@ static int smb3_fs_context_validate(struct fs_context *fc)
/* set the port that we got earlier */
cifs_set_port((struct sockaddr *)&ctx->dstaddr, ctx->port);
+ if (ctx->uid_specified && !ctx->forceuid_specified) {
+ ctx->override_uid = 1;
+ pr_notice("enabling forceuid mount option implicitly because uid= option is specified\n");
+ }
+
+ if (ctx->gid_specified && !ctx->forcegid_specified) {
+ ctx->override_gid = 1;
+ pr_notice("enabling forcegid mount option implicitly because gid= option is specified\n");
+ }
+
if (ctx->override_uid && !ctx->uid_specified) {
ctx->override_uid = 0;
pr_notice("ignoring forceuid mount option specified with no uid= option\n");
@@ -783,9 +796,9 @@ static int smb3_get_tree(struct fs_context *fc)
if (err)
return err;
- mutex_lock(&cifs_mount_mutex);
+ cifs_mount_lock();
ret = smb3_get_tree_common(fc);
- mutex_unlock(&cifs_mount_mutex);
+ cifs_mount_unlock();
return ret;
}
@@ -905,6 +918,8 @@ static int smb3_reconfigure(struct fs_context *fc)
else {
kfree_sensitive(ses->password);
ses->password = kstrdup(ctx->password, GFP_KERNEL);
+ kfree_sensitive(ses->password2);
+ ses->password2 = kstrdup(ctx->password2, GFP_KERNEL);
}
STEAL_STRING(cifs_sb, ctx, domainname);
STEAL_STRING(cifs_sb, ctx, nodename);
@@ -1014,12 +1029,14 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
ctx->override_uid = 0;
else
ctx->override_uid = 1;
+ ctx->forceuid_specified = true;
break;
case Opt_forcegid:
if (result.negated)
ctx->override_gid = 0;
else
ctx->override_gid = 1;
+ ctx->forcegid_specified = true;
break;
case Opt_perm:
if (result.negated)
@@ -1305,6 +1322,18 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
goto cifs_parse_mount_err;
}
break;
+ case Opt_pass2:
+ kfree_sensitive(ctx->password2);
+ ctx->password2 = NULL;
+ if (strlen(param->string) == 0)
+ break;
+
+ ctx->password2 = kstrdup(param->string, GFP_KERNEL);
+ if (ctx->password2 == NULL) {
+ cifs_errorf(fc, "OOM when copying password2 string\n");
+ goto cifs_parse_mount_err;
+ }
+ break;
case Opt_ip:
if (strlen(param->string) == 0) {
ctx->got_ip = false;
@@ -1608,6 +1637,8 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
cifs_parse_mount_err:
kfree_sensitive(ctx->password);
ctx->password = NULL;
+ kfree_sensitive(ctx->password2);
+ ctx->password2 = NULL;
return -EINVAL;
}
@@ -1713,6 +1744,8 @@ smb3_cleanup_fs_context_contents(struct smb3_fs_context *ctx)
ctx->username = NULL;
kfree_sensitive(ctx->password);
ctx->password = NULL;
+ kfree_sensitive(ctx->password2);
+ ctx->password2 = NULL;
kfree(ctx->server_hostname);
ctx->server_hostname = NULL;
kfree(ctx->UNC);
diff --git a/fs/smb/client/fs_context.h b/fs/smb/client/fs_context.h
index 7863f2248c4df8..cf577ec0dd0ac4 100644
--- a/fs/smb/client/fs_context.h
+++ b/fs/smb/client/fs_context.h
@@ -145,6 +145,7 @@ enum cifs_param {
Opt_source,
Opt_user,
Opt_pass,
+ Opt_pass2,
Opt_ip,
Opt_domain,
Opt_srcaddr,
@@ -164,6 +165,8 @@ enum cifs_param {
};
struct smb3_fs_context {
+ bool forceuid_specified;
+ bool forcegid_specified;
bool uid_specified;
bool cruid_specified;
bool gid_specified;
@@ -177,6 +180,7 @@ struct smb3_fs_context {
char *username;
char *password;
+ char *password2;
char *domainname;
char *source;
char *server_hostname;
@@ -304,4 +308,16 @@ extern void smb3_update_mnt_flags(struct cifs_sb_info *cifs_sb);
#define MAX_CACHED_FIDS 16
extern char *cifs_sanitize_prepath(char *prepath, gfp_t gfp);
+extern struct mutex cifs_mount_mutex;
+
+static inline void cifs_mount_lock(void)
+{
+ mutex_lock(&cifs_mount_mutex);
+}
+
+static inline void cifs_mount_unlock(void)
+{
+ mutex_unlock(&cifs_mount_mutex);
+}
+
#endif
diff --git a/fs/smb/client/fscache.c b/fs/smb/client/fscache.c
index c4a3cb736881ae..1a895e6243ee9a 100644
--- a/fs/smb/client/fscache.c
+++ b/fs/smb/client/fscache.c
@@ -12,6 +12,16 @@
#include "cifs_fs_sb.h"
#include "cifsproto.h"
+/*
+ * Key for fscache inode. [!] Contents must match comparisons in cifs_find_inode().
+ */
+struct cifs_fscache_inode_key {
+
+ __le64 uniqueid; /* server inode number */
+ __le64 createtime; /* creation time on server */
+ u8 type; /* S_IFMT file type */
+} __packed;
+
static void cifs_fscache_fill_volume_coherency(
struct cifs_tcon *tcon,
struct cifs_fscache_volume_coherency_data *cd)
@@ -33,12 +43,23 @@ int cifs_fscache_get_super_cookie(struct cifs_tcon *tcon)
char *key;
int ret = -ENOMEM;
+ if (tcon->fscache_acquired)
+ return 0;
+
+ mutex_lock(&tcon->fscache_lock);
+ if (tcon->fscache_acquired) {
+ mutex_unlock(&tcon->fscache_lock);
+ return 0;
+ }
+ tcon->fscache_acquired = true;
+
tcon->fscache = NULL;
switch (sa->sa_family) {
case AF_INET:
case AF_INET6:
break;
default:
+ mutex_unlock(&tcon->fscache_lock);
cifs_dbg(VFS, "Unknown network family '%d'\n", sa->sa_family);
return -EINVAL;
}
@@ -47,6 +68,7 @@ int cifs_fscache_get_super_cookie(struct cifs_tcon *tcon)
sharename = extract_sharename(tcon->tree_name);
if (IS_ERR(sharename)) {
+ mutex_unlock(&tcon->fscache_lock);
cifs_dbg(FYI, "%s: couldn't extract sharename\n", __func__);
return PTR_ERR(sharename);
}
@@ -72,6 +94,11 @@ int cifs_fscache_get_super_cookie(struct cifs_tcon *tcon)
}
pr_err("Cache volume key already in use (%s)\n", key);
vcookie = NULL;
+ trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count,
+ netfs_trace_tcon_ref_see_fscache_collision);
+ } else {
+ trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count,
+ netfs_trace_tcon_ref_see_fscache_okay);
}
tcon->fscache = vcookie;
@@ -80,6 +107,7 @@ out_2:
kfree(key);
out:
kfree(sharename);
+ mutex_unlock(&tcon->fscache_lock);
return ret;
}
@@ -92,20 +120,26 @@ void cifs_fscache_release_super_cookie(struct cifs_tcon *tcon)
cifs_fscache_fill_volume_coherency(tcon, &cd);
fscache_relinquish_volume(tcon->fscache, &cd, false);
tcon->fscache = NULL;
+ trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count,
+ netfs_trace_tcon_ref_see_fscache_relinq);
}
void cifs_fscache_get_inode_cookie(struct inode *inode)
{
struct cifs_fscache_inode_coherency_data cd;
+ struct cifs_fscache_inode_key key;
struct cifsInodeInfo *cifsi = CIFS_I(inode);
struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
+ key.uniqueid = cpu_to_le64(cifsi->uniqueid);
+ key.createtime = cpu_to_le64(cifsi->createtime);
+ key.type = (inode->i_mode & S_IFMT) >> 12;
cifs_fscache_fill_coherency(&cifsi->netfs.inode, &cd);
cifsi->netfs.cache =
fscache_acquire_cookie(tcon->fscache, 0,
- &cifsi->uniqueid, sizeof(cifsi->uniqueid),
+ &key, sizeof(key),
&cd, sizeof(cd),
i_size_read(&cifsi->netfs.inode));
if (cifsi->netfs.cache)
diff --git a/fs/smb/client/fscache.h b/fs/smb/client/fscache.h
index a3d73720914f88..1f2ea9f5cc9a8a 100644
--- a/fs/smb/client/fscache.h
+++ b/fs/smb/client/fscache.h
@@ -109,6 +109,11 @@ static inline void cifs_readahead_to_fscache(struct inode *inode,
__cifs_readahead_to_fscache(inode, pos, len);
}
+static inline bool cifs_fscache_enabled(struct inode *inode)
+{
+ return fscache_cookie_enabled(cifs_inode_cookie(inode));
+}
+
#else /* CONFIG_CIFS_FSCACHE */
static inline
void cifs_fscache_fill_coherency(struct inode *inode,
@@ -124,6 +129,7 @@ static inline void cifs_fscache_release_inode_cookie(struct inode *inode) {}
static inline void cifs_fscache_unuse_inode_cookie(struct inode *inode, bool update) {}
static inline struct fscache_cookie *cifs_inode_cookie(struct inode *inode) { return NULL; }
static inline void cifs_invalidate_cache(struct inode *inode, unsigned int flags) {}
+static inline bool cifs_fscache_enabled(struct inode *inode) { return false; }
static inline int cifs_fscache_query_occupancy(struct inode *inode,
pgoff_t first, unsigned int nr_pages,
diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c
index d28ab0af604936..60afab5c83d410 100644
--- a/fs/smb/client/inode.c
+++ b/fs/smb/client/inode.c
@@ -1105,7 +1105,8 @@ static int cifs_get_fattr(struct cifs_open_info_data *data,
} else {
cifs_open_info_to_fattr(fattr, data, sb);
}
- if (!rc && fattr->cf_flags & CIFS_FATTR_DELETE_PENDING)
+ if (!rc && *inode &&
+ (fattr->cf_flags & CIFS_FATTR_DELETE_PENDING))
cifs_mark_open_handles_for_deleted_file(*inode, full_path);
break;
case -EREMOTE:
@@ -1351,6 +1352,8 @@ cifs_find_inode(struct inode *inode, void *opaque)
{
struct cifs_fattr *fattr = opaque;
+ /* [!] The compared values must be the same in struct cifs_fscache_inode_key. */
+
/* don't match inode with different uniqueid */
if (CIFS_I(inode)->uniqueid != fattr->cf_uniqueid)
return 0;
diff --git a/fs/smb/client/ioctl.c b/fs/smb/client/ioctl.c
index c012dfdba80d45..855ac5a62edfaa 100644
--- a/fs/smb/client/ioctl.c
+++ b/fs/smb/client/ioctl.c
@@ -247,7 +247,9 @@ static int cifs_dump_full_key(struct cifs_tcon *tcon, struct smb3_full_key_debug
spin_lock(&cifs_tcp_ses_lock);
list_for_each_entry(server_it, &cifs_tcp_ses_list, tcp_ses_list) {
list_for_each_entry(ses_it, &server_it->smb_ses_list, smb_ses_list) {
- if (ses_it->Suid == out.session_id) {
+ spin_lock(&ses_it->ses_lock);
+ if (ses_it->ses_status != SES_EXITING &&
+ ses_it->Suid == out.session_id) {
ses = ses_it;
/*
* since we are using the session outside the crit
@@ -255,9 +257,11 @@ static int cifs_dump_full_key(struct cifs_tcon *tcon, struct smb3_full_key_debug
* so increment its refcount
*/
cifs_smb_ses_inc_refcount(ses);
+ spin_unlock(&ses_it->ses_lock);
found = true;
goto search_end;
}
+ spin_unlock(&ses_it->ses_lock);
}
}
search_end:
diff --git a/fs/smb/client/misc.c b/fs/smb/client/misc.c
index c3771fc81328ff..07c468ddb88a89 100644
--- a/fs/smb/client/misc.c
+++ b/fs/smb/client/misc.c
@@ -98,6 +98,7 @@ sesInfoFree(struct cifs_ses *buf_to_free)
kfree(buf_to_free->serverDomain);
kfree(buf_to_free->serverNOS);
kfree_sensitive(buf_to_free->password);
+ kfree_sensitive(buf_to_free->password2);
kfree(buf_to_free->user_name);
kfree(buf_to_free->domainName);
kfree_sensitive(buf_to_free->auth_key.response);
@@ -110,9 +111,10 @@ sesInfoFree(struct cifs_ses *buf_to_free)
}
struct cifs_tcon *
-tcon_info_alloc(bool dir_leases_enabled)
+tcon_info_alloc(bool dir_leases_enabled, enum smb3_tcon_ref_trace trace)
{
struct cifs_tcon *ret_buf;
+ static atomic_t tcon_debug_id;
ret_buf = kzalloc(sizeof(*ret_buf), GFP_KERNEL);
if (!ret_buf)
@@ -129,7 +131,8 @@ tcon_info_alloc(bool dir_leases_enabled)
atomic_inc(&tconInfoAllocCount);
ret_buf->status = TID_NEW;
- ++ret_buf->tc_count;
+ ret_buf->debug_id = atomic_inc_return(&tcon_debug_id);
+ ret_buf->tc_count = 1;
spin_lock_init(&ret_buf->tc_lock);
INIT_LIST_HEAD(&ret_buf->openFileList);
INIT_LIST_HEAD(&ret_buf->tcon_list);
@@ -138,27 +141,26 @@ tcon_info_alloc(bool dir_leases_enabled)
atomic_set(&ret_buf->num_local_opens, 0);
atomic_set(&ret_buf->num_remote_opens, 0);
ret_buf->stats_from_time = ktime_get_real_seconds();
-#ifdef CONFIG_CIFS_DFS_UPCALL
- INIT_LIST_HEAD(&ret_buf->dfs_ses_list);
+#ifdef CONFIG_CIFS_FSCACHE
+ mutex_init(&ret_buf->fscache_lock);
#endif
+ trace_smb3_tcon_ref(ret_buf->debug_id, ret_buf->tc_count, trace);
return ret_buf;
}
void
-tconInfoFree(struct cifs_tcon *tcon)
+tconInfoFree(struct cifs_tcon *tcon, enum smb3_tcon_ref_trace trace)
{
if (tcon == NULL) {
cifs_dbg(FYI, "Null buffer passed to tconInfoFree\n");
return;
}
+ trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count, trace);
free_cached_dirs(tcon->cfids);
atomic_dec(&tconInfoAllocCount);
kfree(tcon->nativeFileSystem);
kfree_sensitive(tcon->password);
-#ifdef CONFIG_CIFS_DFS_UPCALL
- dfs_put_root_smb_sessions(&tcon->dfs_ses_list);
-#endif
kfree(tcon->origin_fullpath);
kfree(tcon);
}
@@ -487,6 +489,8 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv)
/* look up tcon based on tid & uid */
spin_lock(&cifs_tcp_ses_lock);
list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
+ if (cifs_ses_exiting(ses))
+ continue;
list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
if (tcon->tid != buf->Tid)
continue;
diff --git a/fs/smb/client/smb1ops.c b/fs/smb/client/smb1ops.c
index a9eaba8083b0d6..212ec6f66ec65b 100644
--- a/fs/smb/client/smb1ops.c
+++ b/fs/smb/client/smb1ops.c
@@ -753,11 +753,11 @@ cifs_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock)
cinode->can_cache_brlcks = CIFS_CACHE_WRITE(cinode);
}
-static void
+static int
cifs_close_file(const unsigned int xid, struct cifs_tcon *tcon,
struct cifs_fid *fid)
{
- CIFSSMBClose(xid, tcon, fid->netfid);
+ return CIFSSMBClose(xid, tcon, fid->netfid);
}
static int
diff --git a/fs/smb/client/smb2misc.c b/fs/smb/client/smb2misc.c
index 82b84a4941dd2f..677ef6f99a5be4 100644
--- a/fs/smb/client/smb2misc.c
+++ b/fs/smb/client/smb2misc.c
@@ -622,6 +622,8 @@ smb2_is_valid_lease_break(char *buffer, struct TCP_Server_Info *server)
/* look up tcon based on tid & uid */
spin_lock(&cifs_tcp_ses_lock);
list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
+ if (cifs_ses_exiting(ses))
+ continue;
list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
spin_lock(&tcon->open_file_lock);
cifs_stats_inc(
@@ -697,6 +699,8 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
/* look up tcon based on tid & uid */
spin_lock(&cifs_tcp_ses_lock);
list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
+ if (cifs_ses_exiting(ses))
+ continue;
list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
spin_lock(&tcon->open_file_lock);
@@ -763,7 +767,7 @@ smb2_cancelled_close_fid(struct work_struct *work)
if (rc)
cifs_tcon_dbg(VFS, "Close cancelled mid failed rc:%d\n", rc);
- cifs_put_tcon(tcon);
+ cifs_put_tcon(tcon, netfs_trace_tcon_ref_put_cancelled_close_fid);
kfree(cancelled);
}
@@ -807,6 +811,8 @@ smb2_handle_cancelled_close(struct cifs_tcon *tcon, __u64 persistent_fid,
if (tcon->tc_count <= 0) {
struct TCP_Server_Info *server = NULL;
+ trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count,
+ netfs_trace_tcon_ref_see_cancelled_close);
WARN_ONCE(tcon->tc_count < 0, "tcon refcount is negative");
spin_unlock(&cifs_tcp_ses_lock);
@@ -819,12 +825,14 @@ smb2_handle_cancelled_close(struct cifs_tcon *tcon, __u64 persistent_fid,
return 0;
}
tcon->tc_count++;
+ trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count,
+ netfs_trace_tcon_ref_get_cancelled_close);
spin_unlock(&cifs_tcp_ses_lock);
rc = __smb2_handle_cancelled_cmd(tcon, SMB2_CLOSE_HE, 0,
persistent_fid, volatile_fid);
if (rc)
- cifs_put_tcon(tcon);
+ cifs_put_tcon(tcon, netfs_trace_tcon_ref_put_cancelled_close);
return rc;
}
@@ -852,7 +860,7 @@ smb2_handle_cancelled_mid(struct mid_q_entry *mid, struct TCP_Server_Info *serve
rsp->PersistentFileId,
rsp->VolatileFileId);
if (rc)
- cifs_put_tcon(tcon);
+ cifs_put_tcon(tcon, netfs_trace_tcon_ref_put_cancelled_mid);
return rc;
}
diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c
index 2ed456948f34ca..28f0b7d19d534b 100644
--- a/fs/smb/client/smb2ops.c
+++ b/fs/smb/client/smb2ops.c
@@ -1412,14 +1412,14 @@ smb2_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock)
memcpy(cfile->fid.create_guid, fid->create_guid, 16);
}
-static void
+static int
smb2_close_file(const unsigned int xid, struct cifs_tcon *tcon,
struct cifs_fid *fid)
{
- SMB2_close(xid, tcon, fid->persistent_fid, fid->volatile_fid);
+ return SMB2_close(xid, tcon, fid->persistent_fid, fid->volatile_fid);
}
-static void
+static int
smb2_close_getattr(const unsigned int xid, struct cifs_tcon *tcon,
struct cifsFileInfo *cfile)
{
@@ -1430,7 +1430,7 @@ smb2_close_getattr(const unsigned int xid, struct cifs_tcon *tcon,
rc = __SMB2_close(xid, tcon, cfile->fid.persistent_fid,
cfile->fid.volatile_fid, &file_inf);
if (rc)
- return;
+ return rc;
inode = d_inode(cfile->dentry);
@@ -1459,6 +1459,7 @@ smb2_close_getattr(const unsigned int xid, struct cifs_tcon *tcon,
/* End of file and Attributes should not have to be updated on close */
spin_unlock(&inode->i_lock);
+ return rc;
}
static int
@@ -2480,6 +2481,8 @@ smb2_is_network_name_deleted(char *buf, struct TCP_Server_Info *server)
spin_lock(&cifs_tcp_ses_lock);
list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
+ if (cifs_ses_exiting(ses))
+ continue;
list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
if (tcon->tid == le32_to_cpu(shdr->Id.SyncId.TreeId)) {
spin_lock(&tcon->tc_lock);
@@ -2912,8 +2915,11 @@ smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses,
tcon = list_first_entry_or_null(&ses->tcon_list,
struct cifs_tcon,
tcon_list);
- if (tcon)
+ if (tcon) {
tcon->tc_count++;
+ trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count,
+ netfs_trace_tcon_ref_get_dfs_refer);
+ }
spin_unlock(&cifs_tcp_ses_lock);
}
@@ -2977,6 +2983,8 @@ smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses,
/* ipc tcons are not refcounted */
spin_lock(&cifs_tcp_ses_lock);
tcon->tc_count--;
+ trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count,
+ netfs_trace_tcon_ref_dec_dfs_refer);
/* tc_count can never go negative */
WARN_ON(tcon->tc_count < 0);
spin_unlock(&cifs_tcp_ses_lock);
@@ -3913,7 +3921,7 @@ smb21_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock,
strcat(message, "W");
}
if (!new_oplock)
- strncpy(message, "None", sizeof(message));
+ strscpy(message, "None");
cinode->oplock = new_oplock;
cifs_dbg(FYI, "%s Lease granted on inode %p\n", message,
@@ -4961,68 +4969,84 @@ static int smb2_next_header(struct TCP_Server_Info *server, char *buf,
return 0;
}
-int cifs_sfu_make_node(unsigned int xid, struct inode *inode,
- struct dentry *dentry, struct cifs_tcon *tcon,
- const char *full_path, umode_t mode, dev_t dev)
+static int __cifs_sfu_make_node(unsigned int xid, struct inode *inode,
+ struct dentry *dentry, struct cifs_tcon *tcon,
+ const char *full_path, umode_t mode, dev_t dev)
{
- struct cifs_open_info_data buf = {};
struct TCP_Server_Info *server = tcon->ses->server;
struct cifs_open_parms oparms;
struct cifs_io_parms io_parms = {};
struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
struct cifs_fid fid;
unsigned int bytes_written;
- struct win_dev *pdev;
+ struct win_dev pdev = {};
struct kvec iov[2];
__u32 oplock = server->oplocks ? REQ_OPLOCK : 0;
int rc;
- if (!S_ISCHR(mode) && !S_ISBLK(mode) && !S_ISFIFO(mode))
+ switch (mode & S_IFMT) {
+ case S_IFCHR:
+ strscpy(pdev.type, "IntxCHR");
+ pdev.major = cpu_to_le64(MAJOR(dev));
+ pdev.minor = cpu_to_le64(MINOR(dev));
+ break;
+ case S_IFBLK:
+ strscpy(pdev.type, "IntxBLK");
+ pdev.major = cpu_to_le64(MAJOR(dev));
+ pdev.minor = cpu_to_le64(MINOR(dev));
+ break;
+ case S_IFIFO:
+ strscpy(pdev.type, "LnxFIFO");
+ break;
+ default:
return -EPERM;
+ }
- oparms = (struct cifs_open_parms) {
- .tcon = tcon,
- .cifs_sb = cifs_sb,
- .desired_access = GENERIC_WRITE,
- .create_options = cifs_create_options(cifs_sb, CREATE_NOT_DIR |
- CREATE_OPTION_SPECIAL),
- .disposition = FILE_CREATE,
- .path = full_path,
- .fid = &fid,
- };
+ oparms = CIFS_OPARMS(cifs_sb, tcon, full_path, GENERIC_WRITE,
+ FILE_CREATE, CREATE_NOT_DIR |
+ CREATE_OPTION_SPECIAL, ACL_NO_MODE);
+ oparms.fid = &fid;
- rc = server->ops->open(xid, &oparms, &oplock, &buf);
+ rc = server->ops->open(xid, &oparms, &oplock, NULL);
if (rc)
return rc;
- /*
- * BB Do not bother to decode buf since no local inode yet to put
- * timestamps in, but we can reuse it safely.
- */
- pdev = (struct win_dev *)&buf.fi;
io_parms.pid = current->tgid;
io_parms.tcon = tcon;
- io_parms.length = sizeof(*pdev);
- iov[1].iov_base = pdev;
- iov[1].iov_len = sizeof(*pdev);
- if (S_ISCHR(mode)) {
- memcpy(pdev->type, "IntxCHR", 8);
- pdev->major = cpu_to_le64(MAJOR(dev));
- pdev->minor = cpu_to_le64(MINOR(dev));
- } else if (S_ISBLK(mode)) {
- memcpy(pdev->type, "IntxBLK", 8);
- pdev->major = cpu_to_le64(MAJOR(dev));
- pdev->minor = cpu_to_le64(MINOR(dev));
- } else if (S_ISFIFO(mode)) {
- memcpy(pdev->type, "LnxFIFO", 8);
- }
+ io_parms.length = sizeof(pdev);
+ iov[1].iov_base = &pdev;
+ iov[1].iov_len = sizeof(pdev);
rc = server->ops->sync_write(xid, &fid, &io_parms,
&bytes_written, iov, 1);
server->ops->close(xid, tcon, &fid);
- d_drop(dentry);
- /* FIXME: add code here to set EAs */
- cifs_free_open_info(&buf);
+ return rc;
+}
+
+int cifs_sfu_make_node(unsigned int xid, struct inode *inode,
+ struct dentry *dentry, struct cifs_tcon *tcon,
+ const char *full_path, umode_t mode, dev_t dev)
+{
+ struct inode *new = NULL;
+ int rc;
+
+ rc = __cifs_sfu_make_node(xid, inode, dentry, tcon,
+ full_path, mode, dev);
+ if (rc)
+ return rc;
+
+ if (tcon->posix_extensions) {
+ rc = smb311_posix_get_inode_info(&new, full_path, NULL,
+ inode->i_sb, xid);
+ } else if (tcon->unix_ext) {
+ rc = cifs_get_inode_info_unix(&new, full_path,
+ inode->i_sb, xid);
+ } else {
+ rc = cifs_get_inode_info(&new, full_path, NULL,
+ inode->i_sb, xid, NULL);
+ }
+ if (!rc)
+ d_instantiate(dentry, new);
return rc;
}
diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c
index 3ea688558e6c9b..a5efce03cb58e2 100644
--- a/fs/smb/client/smb2pdu.c
+++ b/fs/smb/client/smb2pdu.c
@@ -367,6 +367,17 @@ again:
}
rc = cifs_setup_session(0, ses, server, nls_codepage);
+ if ((rc == -EACCES) || (rc == -EKEYEXPIRED) || (rc == -EKEYREVOKED)) {
+ /*
+ * Try alternate password for next reconnect (key rotation
+ * could be enabled on the server e.g.) if an alternate
+ * password is available and the current password is expired,
+ * but do not swap on non pwd related errors like host down
+ */
+ if (ses->password2)
+ swap(ses->password2, ses->password);
+ }
+
if ((rc == -EACCES) && !tcon->retry) {
mutex_unlock(&ses->session_mutex);
rc = -EHOSTDOWN;
@@ -3628,9 +3639,9 @@ replay_again:
memcpy(&pbuf->network_open_info,
&rsp->network_open_info,
sizeof(pbuf->network_open_info));
+ atomic_dec(&tcon->num_remote_opens);
}
- atomic_dec(&tcon->num_remote_opens);
close_exit:
SMB2_close_free(&rqst);
free_rsp_buf(resp_buftype, rsp);
@@ -4127,6 +4138,8 @@ void smb2_reconnect_server(struct work_struct *work)
list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
if (tcon->need_reconnect || tcon->need_reopen_files) {
tcon->tc_count++;
+ trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count,
+ netfs_trace_tcon_ref_get_reconnect_server);
list_add_tail(&tcon->rlist, &tmp_list);
tcon_selected = true;
}
@@ -4165,14 +4178,14 @@ void smb2_reconnect_server(struct work_struct *work)
if (tcon->ipc)
cifs_put_smb_ses(tcon->ses);
else
- cifs_put_tcon(tcon);
+ cifs_put_tcon(tcon, netfs_trace_tcon_ref_put_reconnect_server);
}
if (!ses_exist)
goto done;
/* allocate a dummy tcon struct used for reconnect */
- tcon = tcon_info_alloc(false);
+ tcon = tcon_info_alloc(false, netfs_trace_tcon_ref_new_reconnect_server);
if (!tcon) {
resched = true;
list_for_each_entry_safe(ses, ses2, &tmp_ses_list, rlist) {
@@ -4195,7 +4208,7 @@ void smb2_reconnect_server(struct work_struct *work)
list_del_init(&ses->rlist);
cifs_put_smb_ses(ses);
}
- tconInfoFree(tcon);
+ tconInfoFree(tcon, netfs_trace_tcon_ref_free_reconnect_server);
done:
cifs_dbg(FYI, "Reconnecting tcons and channels finished\n");
diff --git a/fs/smb/client/smb2pdu.h b/fs/smb/client/smb2pdu.h
index c72a3b2886b7ff..2fccf0d4f53d27 100644
--- a/fs/smb/client/smb2pdu.h
+++ b/fs/smb/client/smb2pdu.h
@@ -320,7 +320,7 @@ struct smb2_file_reparse_point_info {
} __packed;
struct smb2_file_network_open_info {
- struct_group(network_open_info,
+ struct_group_attr(network_open_info, __packed,
__le64 CreationTime;
__le64 LastAccessTime;
__le64 LastWriteTime;
diff --git a/fs/smb/client/smb2transport.c b/fs/smb/client/smb2transport.c
index 5a3ca62d2f07f7..02135a6053051e 100644
--- a/fs/smb/client/smb2transport.c
+++ b/fs/smb/client/smb2transport.c
@@ -189,6 +189,8 @@ smb2_find_smb_sess_tcon_unlocked(struct cifs_ses *ses, __u32 tid)
if (tcon->tid != tid)
continue;
++tcon->tc_count;
+ trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count,
+ netfs_trace_tcon_ref_get_find_sess_tcon);
return tcon;
}
@@ -659,7 +661,7 @@ smb2_sign_rqst(struct smb_rqst *rqst, struct TCP_Server_Info *server)
}
spin_unlock(&server->srv_lock);
if (!is_binding && !server->session_estab) {
- strncpy(shdr->Signature, "BSRSPYL", 8);
+ strscpy(shdr->Signature, "BSRSPYL");
return 0;
}
diff --git a/fs/smb/client/trace.h b/fs/smb/client/trace.h
index f9c1fd32d0b8c3..604e52876cd2d9 100644
--- a/fs/smb/client/trace.h
+++ b/fs/smb/client/trace.h
@@ -3,6 +3,9 @@
* Copyright (C) 2018, Microsoft Corporation.
*
* Author(s): Steve French <stfrench@microsoft.com>
+ *
+ * Please use this 3-part article as a reference for writing new tracepoints:
+ * https://lwn.net/Articles/379903/
*/
#undef TRACE_SYSTEM
#define TRACE_SYSTEM cifs
@@ -15,9 +18,70 @@
#include <linux/inet.h>
/*
- * Please use this 3-part article as a reference for writing new tracepoints:
- * https://lwn.net/Articles/379903/
+ * Specify enums for tracing information.
*/
+#define smb3_tcon_ref_traces \
+ EM(netfs_trace_tcon_ref_dec_dfs_refer, "DEC DfsRef") \
+ EM(netfs_trace_tcon_ref_free, "FRE ") \
+ EM(netfs_trace_tcon_ref_free_fail, "FRE Fail ") \
+ EM(netfs_trace_tcon_ref_free_ipc, "FRE Ipc ") \
+ EM(netfs_trace_tcon_ref_free_ipc_fail, "FRE Ipc-F ") \
+ EM(netfs_trace_tcon_ref_free_reconnect_server, "FRE Reconn") \
+ EM(netfs_trace_tcon_ref_get_cancelled_close, "GET Cn-Cls") \
+ EM(netfs_trace_tcon_ref_get_dfs_refer, "GET DfsRef") \
+ EM(netfs_trace_tcon_ref_get_find, "GET Find ") \
+ EM(netfs_trace_tcon_ref_get_find_sess_tcon, "GET FndSes") \
+ EM(netfs_trace_tcon_ref_get_reconnect_server, "GET Reconn") \
+ EM(netfs_trace_tcon_ref_new, "NEW ") \
+ EM(netfs_trace_tcon_ref_new_ipc, "NEW Ipc ") \
+ EM(netfs_trace_tcon_ref_new_reconnect_server, "NEW Reconn") \
+ EM(netfs_trace_tcon_ref_put_cancelled_close, "PUT Cn-Cls") \
+ EM(netfs_trace_tcon_ref_put_cancelled_close_fid, "PUT Cn-Fid") \
+ EM(netfs_trace_tcon_ref_put_cancelled_mid, "PUT Cn-Mid") \
+ EM(netfs_trace_tcon_ref_put_mnt_ctx, "PUT MntCtx") \
+ EM(netfs_trace_tcon_ref_put_reconnect_server, "PUT Reconn") \
+ EM(netfs_trace_tcon_ref_put_tlink, "PUT Tlink ") \
+ EM(netfs_trace_tcon_ref_see_cancelled_close, "SEE Cn-Cls") \
+ EM(netfs_trace_tcon_ref_see_fscache_collision, "SEE FV-CO!") \
+ EM(netfs_trace_tcon_ref_see_fscache_okay, "SEE FV-Ok ") \
+ EM(netfs_trace_tcon_ref_see_fscache_relinq, "SEE FV-Rlq") \
+ E_(netfs_trace_tcon_ref_see_umount, "SEE Umount")
+
+#undef EM
+#undef E_
+
+/*
+ * Define those tracing enums.
+ */
+#ifndef __SMB3_DECLARE_TRACE_ENUMS_ONCE_ONLY
+#define __SMB3_DECLARE_TRACE_ENUMS_ONCE_ONLY
+
+#define EM(a, b) a,
+#define E_(a, b) a
+
+enum smb3_tcon_ref_trace { smb3_tcon_ref_traces } __mode(byte);
+
+#undef EM
+#undef E_
+#endif
+
+/*
+ * Export enum symbols via userspace.
+ */
+#define EM(a, b) TRACE_DEFINE_ENUM(a);
+#define E_(a, b) TRACE_DEFINE_ENUM(a);
+
+smb3_tcon_ref_traces;
+
+#undef EM
+#undef E_
+
+/*
+ * Now redefine the EM() and E_() macros to map the enums to the strings that
+ * will be printed in the output.
+ */
+#define EM(a, b) { a, b },
+#define E_(a, b) { a, b }
/* For logging errors in read or write */
DECLARE_EVENT_CLASS(smb3_rw_err_class,
@@ -375,6 +439,7 @@ DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(get_reparse_compound_enter);
DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(delete_enter);
DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(mkdir_enter);
DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(tdis_enter);
+DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(mknod_enter);
DECLARE_EVENT_CLASS(smb3_inf_compound_done_class,
TP_PROTO(unsigned int xid,
@@ -415,7 +480,7 @@ DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(query_wsl_ea_compound_done);
DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(delete_done);
DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(mkdir_done);
DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(tdis_done);
-
+DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(mknod_done);
DECLARE_EVENT_CLASS(smb3_inf_compound_err_class,
TP_PROTO(unsigned int xid,
@@ -461,6 +526,7 @@ DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(query_wsl_ea_compound_err);
DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(mkdir_err);
DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(delete_err);
DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(tdis_err);
+DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(mknod_err);
/*
* For logging SMB3 Status code and Command for responses which return errors
@@ -1123,6 +1189,30 @@ DEFINE_SMB3_CREDIT_EVENT(waitff_credits);
DEFINE_SMB3_CREDIT_EVENT(overflow_credits);
DEFINE_SMB3_CREDIT_EVENT(set_credits);
+
+TRACE_EVENT(smb3_tcon_ref,
+ TP_PROTO(unsigned int tcon_debug_id, int ref,
+ enum smb3_tcon_ref_trace trace),
+ TP_ARGS(tcon_debug_id, ref, trace),
+ TP_STRUCT__entry(
+ __field(unsigned int, tcon)
+ __field(int, ref)
+ __field(enum smb3_tcon_ref_trace, trace)
+ ),
+ TP_fast_assign(
+ __entry->tcon = tcon_debug_id;
+ __entry->ref = ref;
+ __entry->trace = trace;
+ ),
+ TP_printk("TC=%08x %s r=%u",
+ __entry->tcon,
+ __print_symbolic(__entry->trace, smb3_tcon_ref_traces),
+ __entry->ref)
+ );
+
+
+#undef EM
+#undef E_
#endif /* _CIFS_TRACE_H */
#undef TRACE_INCLUDE_PATH
diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c
index 994d7019343297..ddf1a3aafee5c6 100644
--- a/fs/smb/client/transport.c
+++ b/fs/smb/client/transport.c
@@ -909,12 +909,15 @@ cifs_sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server)
list_del_init(&mid->qhead);
mid->mid_flags |= MID_DELETED;
}
+ spin_unlock(&server->mid_lock);
cifs_server_dbg(VFS, "%s: invalid mid state mid=%llu state=%d\n",
__func__, mid->mid, mid->mid_state);
rc = -EIO;
+ goto sync_mid_done;
}
spin_unlock(&server->mid_lock);
+sync_mid_done:
release_mid(mid);
return rc;
}
@@ -1057,9 +1060,11 @@ struct TCP_Server_Info *cifs_pick_channel(struct cifs_ses *ses)
index = (uint)atomic_inc_return(&ses->chan_seq);
index %= ses->chan_count;
}
+
+ server = ses->chans[index].server;
spin_unlock(&ses->chan_lock);
- return ses->chans[index].server;
+ return server;
}
int
diff --git a/fs/smb/common/smb2pdu.h b/fs/smb/common/smb2pdu.h
index 1b594307c9d5a0..202ff912815604 100644
--- a/fs/smb/common/smb2pdu.h
+++ b/fs/smb/common/smb2pdu.h
@@ -711,7 +711,7 @@ struct smb2_close_rsp {
__le16 StructureSize; /* 60 */
__le16 Flags;
__le32 Reserved;
- struct_group(network_open_info,
+ struct_group_attr(network_open_info, __packed,
__le64 CreationTime;
__le64 LastAccessTime;
__le64 LastWriteTime;
diff --git a/fs/smb/server/ksmbd_netlink.h b/fs/smb/server/ksmbd_netlink.h
index 8ca8a45c4c621c..f4e55199938d58 100644
--- a/fs/smb/server/ksmbd_netlink.h
+++ b/fs/smb/server/ksmbd_netlink.h
@@ -167,7 +167,8 @@ struct ksmbd_share_config_response {
__u16 force_uid;
__u16 force_gid;
__s8 share_name[KSMBD_REQ_MAX_SHARE_NAME];
- __u32 reserved[112]; /* Reserved room */
+ __u32 reserved[111]; /* Reserved room */
+ __u32 payload_sz;
__u32 veto_list_sz;
__s8 ____payload[];
};
@@ -339,23 +340,24 @@ enum KSMBD_TREE_CONN_STATUS {
/*
* Share config flags.
*/
-#define KSMBD_SHARE_FLAG_INVALID (0)
-#define KSMBD_SHARE_FLAG_AVAILABLE BIT(0)
-#define KSMBD_SHARE_FLAG_BROWSEABLE BIT(1)
-#define KSMBD_SHARE_FLAG_WRITEABLE BIT(2)
-#define KSMBD_SHARE_FLAG_READONLY BIT(3)
-#define KSMBD_SHARE_FLAG_GUEST_OK BIT(4)
-#define KSMBD_SHARE_FLAG_GUEST_ONLY BIT(5)
-#define KSMBD_SHARE_FLAG_STORE_DOS_ATTRS BIT(6)
-#define KSMBD_SHARE_FLAG_OPLOCKS BIT(7)
-#define KSMBD_SHARE_FLAG_PIPE BIT(8)
-#define KSMBD_SHARE_FLAG_HIDE_DOT_FILES BIT(9)
-#define KSMBD_SHARE_FLAG_INHERIT_OWNER BIT(10)
-#define KSMBD_SHARE_FLAG_STREAMS BIT(11)
-#define KSMBD_SHARE_FLAG_FOLLOW_SYMLINKS BIT(12)
-#define KSMBD_SHARE_FLAG_ACL_XATTR BIT(13)
-#define KSMBD_SHARE_FLAG_UPDATE BIT(14)
-#define KSMBD_SHARE_FLAG_CROSSMNT BIT(15)
+#define KSMBD_SHARE_FLAG_INVALID (0)
+#define KSMBD_SHARE_FLAG_AVAILABLE BIT(0)
+#define KSMBD_SHARE_FLAG_BROWSEABLE BIT(1)
+#define KSMBD_SHARE_FLAG_WRITEABLE BIT(2)
+#define KSMBD_SHARE_FLAG_READONLY BIT(3)
+#define KSMBD_SHARE_FLAG_GUEST_OK BIT(4)
+#define KSMBD_SHARE_FLAG_GUEST_ONLY BIT(5)
+#define KSMBD_SHARE_FLAG_STORE_DOS_ATTRS BIT(6)
+#define KSMBD_SHARE_FLAG_OPLOCKS BIT(7)
+#define KSMBD_SHARE_FLAG_PIPE BIT(8)
+#define KSMBD_SHARE_FLAG_HIDE_DOT_FILES BIT(9)
+#define KSMBD_SHARE_FLAG_INHERIT_OWNER BIT(10)
+#define KSMBD_SHARE_FLAG_STREAMS BIT(11)
+#define KSMBD_SHARE_FLAG_FOLLOW_SYMLINKS BIT(12)
+#define KSMBD_SHARE_FLAG_ACL_XATTR BIT(13)
+#define KSMBD_SHARE_FLAG_UPDATE BIT(14)
+#define KSMBD_SHARE_FLAG_CROSSMNT BIT(15)
+#define KSMBD_SHARE_FLAG_CONTINUOUS_AVAILABILITY BIT(16)
/*
* Tree connect request flags.
diff --git a/fs/smb/server/mgmt/share_config.c b/fs/smb/server/mgmt/share_config.c
index 328a412259dc1b..a2f0a2edceb8ae 100644
--- a/fs/smb/server/mgmt/share_config.c
+++ b/fs/smb/server/mgmt/share_config.c
@@ -158,7 +158,12 @@ static struct ksmbd_share_config *share_config_request(struct unicode_map *um,
share->name = kstrdup(name, GFP_KERNEL);
if (!test_share_config_flag(share, KSMBD_SHARE_FLAG_PIPE)) {
- share->path = kstrdup(ksmbd_share_config_path(resp),
+ int path_len = PATH_MAX;
+
+ if (resp->payload_sz)
+ path_len = resp->payload_sz - resp->veto_list_sz;
+
+ share->path = kstrndup(ksmbd_share_config_path(resp), path_len,
GFP_KERNEL);
if (share->path)
share->path_sz = strlen(share->path);
diff --git a/fs/smb/server/server.c b/fs/smb/server/server.c
index c0788188aa82fa..c67fbc8d6683ef 100644
--- a/fs/smb/server/server.c
+++ b/fs/smb/server/server.c
@@ -167,20 +167,17 @@ static void __handle_ksmbd_work(struct ksmbd_work *work,
int rc;
bool is_chained = false;
- if (conn->ops->allocate_rsp_buf(work))
- return;
-
if (conn->ops->is_transform_hdr &&
conn->ops->is_transform_hdr(work->request_buf)) {
rc = conn->ops->decrypt_req(work);
- if (rc < 0) {
- conn->ops->set_rsp_status(work, STATUS_DATA_ERROR);
- goto send;
- }
-
+ if (rc < 0)
+ return;
work->encrypted = true;
}
+ if (conn->ops->allocate_rsp_buf(work))
+ return;
+
rc = conn->ops->init_rsp_hdr(work);
if (rc) {
/* either uid or tid is not correct */
diff --git a/fs/smb/server/smb2ops.c b/fs/smb/server/smb2ops.c
index a45f7dca482e01..606aa3c5189a28 100644
--- a/fs/smb/server/smb2ops.c
+++ b/fs/smb/server/smb2ops.c
@@ -228,6 +228,11 @@ void init_smb3_0_server(struct ksmbd_conn *conn)
conn->cli_cap & SMB2_GLOBAL_CAP_ENCRYPTION)
conn->vals->capabilities |= SMB2_GLOBAL_CAP_ENCRYPTION;
+ if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION ||
+ (!(server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION_OFF) &&
+ conn->cli_cap & SMB2_GLOBAL_CAP_ENCRYPTION))
+ conn->vals->capabilities |= SMB2_GLOBAL_CAP_ENCRYPTION;
+
if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB3_MULTICHANNEL)
conn->vals->capabilities |= SMB2_GLOBAL_CAP_MULTI_CHANNEL;
}
@@ -278,11 +283,6 @@ int init_smb3_11_server(struct ksmbd_conn *conn)
conn->vals->capabilities |= SMB2_GLOBAL_CAP_LEASING |
SMB2_GLOBAL_CAP_DIRECTORY_LEASING;
- if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION ||
- (!(server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION_OFF) &&
- conn->cli_cap & SMB2_GLOBAL_CAP_ENCRYPTION))
- conn->vals->capabilities |= SMB2_GLOBAL_CAP_ENCRYPTION;
-
if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB3_MULTICHANNEL)
conn->vals->capabilities |= SMB2_GLOBAL_CAP_MULTI_CHANNEL;
diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c
index d478fa0c57abdb..355824151c2d88 100644
--- a/fs/smb/server/smb2pdu.c
+++ b/fs/smb/server/smb2pdu.c
@@ -535,6 +535,10 @@ int smb2_allocate_rsp_buf(struct ksmbd_work *work)
if (cmd == SMB2_QUERY_INFO_HE) {
struct smb2_query_info_req *req;
+ if (get_rfc1002_len(work->request_buf) <
+ offsetof(struct smb2_query_info_req, OutputBufferLength))
+ return -EINVAL;
+
req = smb2_get_msg(work->request_buf);
if ((req->InfoType == SMB2_O_INFO_FILE &&
(req->FileInfoClass == FILE_FULL_EA_INFORMATION ||
@@ -1984,7 +1988,12 @@ int smb2_tree_connect(struct ksmbd_work *work)
write_unlock(&sess->tree_conns_lock);
rsp->StructureSize = cpu_to_le16(16);
out_err1:
- rsp->Capabilities = 0;
+ if (server_conf.flags & KSMBD_GLOBAL_FLAG_DURABLE_HANDLE &&
+ test_share_config_flag(share,
+ KSMBD_SHARE_FLAG_CONTINUOUS_AVAILABILITY))
+ rsp->Capabilities = SMB2_SHARE_CAP_CONTINUOUS_AVAILABILITY;
+ else
+ rsp->Capabilities = 0;
rsp->Reserved = 0;
/* default manual caching */
rsp->ShareFlags = SMB2_SHAREFLAG_MANUAL_CACHING;
@@ -3498,7 +3507,9 @@ int smb2_open(struct ksmbd_work *work)
memcpy(fp->client_guid, conn->ClientGUID, SMB2_CLIENT_GUID_SIZE);
if (dh_info.type == DURABLE_REQ_V2 || dh_info.type == DURABLE_REQ) {
- if (dh_info.type == DURABLE_REQ_V2 && dh_info.persistent)
+ if (dh_info.type == DURABLE_REQ_V2 && dh_info.persistent &&
+ test_share_config_flag(work->tcon->share_conf,
+ KSMBD_SHARE_FLAG_CONTINUOUS_AVAILABILITY))
fp->is_persistent = true;
else
fp->is_durable = true;
@@ -5857,8 +5868,9 @@ static int smb2_rename(struct ksmbd_work *work,
if (!file_info->ReplaceIfExists)
flags = RENAME_NOREPLACE;
- smb_break_all_levII_oplock(work, fp, 0);
rc = ksmbd_vfs_rename(work, &fp->filp->f_path, new_name, flags);
+ if (!rc)
+ smb_break_all_levII_oplock(work, fp, 0);
out:
kfree(new_name);
return rc;
diff --git a/fs/smb/server/transport_ipc.c b/fs/smb/server/transport_ipc.c
index f29bb03f0dc47b..8752ac82c557bf 100644
--- a/fs/smb/server/transport_ipc.c
+++ b/fs/smb/server/transport_ipc.c
@@ -65,6 +65,7 @@ struct ipc_msg_table_entry {
struct hlist_node ipc_table_hlist;
void *response;
+ unsigned int msg_sz;
};
static struct delayed_work ipc_timer_work;
@@ -275,6 +276,7 @@ static int handle_response(int type, void *payload, size_t sz)
}
memcpy(entry->response, payload, sz);
+ entry->msg_sz = sz;
wake_up_interruptible(&entry->wait);
ret = 0;
break;
@@ -453,6 +455,34 @@ out:
return ret;
}
+static int ipc_validate_msg(struct ipc_msg_table_entry *entry)
+{
+ unsigned int msg_sz = entry->msg_sz;
+
+ if (entry->type == KSMBD_EVENT_RPC_REQUEST) {
+ struct ksmbd_rpc_command *resp = entry->response;
+
+ msg_sz = sizeof(struct ksmbd_rpc_command) + resp->payload_sz;
+ } else if (entry->type == KSMBD_EVENT_SPNEGO_AUTHEN_REQUEST) {
+ struct ksmbd_spnego_authen_response *resp = entry->response;
+
+ msg_sz = sizeof(struct ksmbd_spnego_authen_response) +
+ resp->session_key_len + resp->spnego_blob_len;
+ } else if (entry->type == KSMBD_EVENT_SHARE_CONFIG_REQUEST) {
+ struct ksmbd_share_config_response *resp = entry->response;
+
+ if (resp->payload_sz) {
+ if (resp->payload_sz < resp->veto_list_sz)
+ return -EINVAL;
+
+ msg_sz = sizeof(struct ksmbd_share_config_response) +
+ resp->payload_sz;
+ }
+ }
+
+ return entry->msg_sz != msg_sz ? -EINVAL : 0;
+}
+
static void *ipc_msg_send_request(struct ksmbd_ipc_msg *msg, unsigned int handle)
{
struct ipc_msg_table_entry entry;
@@ -477,6 +507,13 @@ static void *ipc_msg_send_request(struct ksmbd_ipc_msg *msg, unsigned int handle
ret = wait_event_interruptible_timeout(entry.wait,
entry.response != NULL,
IPC_WAIT_TIMEOUT);
+ if (entry.response) {
+ ret = ipc_validate_msg(&entry);
+ if (ret) {
+ kvfree(entry.response);
+ entry.response = NULL;
+ }
+ }
out:
down_write(&ipc_msg_table_lock);
hash_del(&entry.ipc_table_hlist);
diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c
index 22f0f3db3ac92d..51b1b0bed616ee 100644
--- a/fs/smb/server/vfs.c
+++ b/fs/smb/server/vfs.c
@@ -754,10 +754,15 @@ retry:
goto out4;
}
+ /*
+ * explicitly handle file overwrite case, for compatibility with
+ * filesystems that may not support rename flags (e.g: fuse)
+ */
if ((flags & RENAME_NOREPLACE) && d_is_positive(new_dentry)) {
err = -EEXIST;
goto out4;
}
+ flags &= ~(RENAME_NOREPLACE);
if (old_child == trap) {
err = -EINVAL;
diff --git a/fs/squashfs/inode.c b/fs/squashfs/inode.c
index aa3411354e66d0..16bd693d0b3aa2 100644
--- a/fs/squashfs/inode.c
+++ b/fs/squashfs/inode.c
@@ -48,6 +48,10 @@ static int squashfs_new_inode(struct super_block *sb, struct inode *inode,
gid_t i_gid;
int err;
+ inode->i_ino = le32_to_cpu(sqsh_ino->inode_number);
+ if (inode->i_ino == 0)
+ return -EINVAL;
+
err = squashfs_get_id(sb, le16_to_cpu(sqsh_ino->uid), &i_uid);
if (err)
return err;
@@ -58,7 +62,6 @@ static int squashfs_new_inode(struct super_block *sb, struct inode *inode,
i_uid_write(inode, i_uid);
i_gid_write(inode, i_gid);
- inode->i_ino = le32_to_cpu(sqsh_ino->inode_number);
inode_set_mtime(inode, le32_to_cpu(sqsh_ino->mtime), 0);
inode_set_atime(inode, inode_get_mtime_sec(inode), 0);
inode_set_ctime(inode, inode_get_mtime_sec(inode), 0);
diff --git a/fs/squashfs/namei.c b/fs/squashfs/namei.c
index 11e4539b9eae4c..65aae7e2a859d0 100644
--- a/fs/squashfs/namei.c
+++ b/fs/squashfs/namei.c
@@ -62,27 +62,21 @@
*/
static int get_dir_index_using_name(struct super_block *sb,
u64 *next_block, int *next_offset, u64 index_start,
- int index_offset, int i_count, const char *name,
- int len)
+ int index_offset, int i_count, const char *name)
{
struct squashfs_sb_info *msblk = sb->s_fs_info;
int i, length = 0, err;
unsigned int size;
struct squashfs_dir_index *index;
- char *str;
TRACE("Entered get_dir_index_using_name, i_count %d\n", i_count);
- index = kmalloc(sizeof(*index) + SQUASHFS_NAME_LEN * 2 + 2, GFP_KERNEL);
+ index = kmalloc(sizeof(*index) + SQUASHFS_NAME_LEN + 1, GFP_KERNEL);
if (index == NULL) {
ERROR("Failed to allocate squashfs_dir_index\n");
goto out;
}
- str = &index->name[SQUASHFS_NAME_LEN + 1];
- strncpy(str, name, len);
- str[len] = '\0';
-
for (i = 0; i < i_count; i++) {
err = squashfs_read_metadata(sb, index, &index_start,
&index_offset, sizeof(*index));
@@ -101,7 +95,7 @@ static int get_dir_index_using_name(struct super_block *sb,
index->name[size] = '\0';
- if (strcmp(index->name, str) > 0)
+ if (strcmp(index->name, name) > 0)
break;
length = le32_to_cpu(index->index);
@@ -153,7 +147,7 @@ static struct dentry *squashfs_lookup(struct inode *dir, struct dentry *dentry,
length = get_dir_index_using_name(dir->i_sb, &block, &offset,
squashfs_i(dir)->dir_idx_start,
squashfs_i(dir)->dir_idx_offset,
- squashfs_i(dir)->dir_idx_cnt, name, len);
+ squashfs_i(dir)->dir_idx_cnt, name);
while (length < i_size_read(dir)) {
/*
diff --git a/fs/super.c b/fs/super.c
index 71d9779c42b10a..69ce6c60096847 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1515,29 +1515,11 @@ static int fs_bdev_thaw(struct block_device *bdev)
return error;
}
-static void fs_bdev_super_get(void *data)
-{
- struct super_block *sb = data;
-
- spin_lock(&sb_lock);
- sb->s_count++;
- spin_unlock(&sb_lock);
-}
-
-static void fs_bdev_super_put(void *data)
-{
- struct super_block *sb = data;
-
- put_super(sb);
-}
-
const struct blk_holder_ops fs_holder_ops = {
.mark_dead = fs_bdev_mark_dead,
.sync = fs_bdev_sync,
.freeze = fs_bdev_freeze,
.thaw = fs_bdev_thaw,
- .get_holder = fs_bdev_super_get,
- .put_holder = fs_bdev_super_put,
};
EXPORT_SYMBOL_GPL(fs_holder_ops);
@@ -1562,7 +1544,7 @@ int setup_bdev_super(struct super_block *sb, int sb_flags,
* writable from userspace even for a read-only block device.
*/
if ((mode & BLK_OPEN_WRITE) && bdev_read_only(bdev)) {
- fput(bdev_file);
+ bdev_fput(bdev_file);
return -EACCES;
}
@@ -1573,7 +1555,7 @@ int setup_bdev_super(struct super_block *sb, int sb_flags,
if (atomic_read(&bdev->bd_fsfreeze_count) > 0) {
if (fc)
warnf(fc, "%pg: Can't mount, blockdev is frozen", bdev);
- fput(bdev_file);
+ bdev_fput(bdev_file);
return -EBUSY;
}
spin_lock(&sb_lock);
@@ -1693,7 +1675,7 @@ void kill_block_super(struct super_block *sb)
generic_shutdown_super(sb);
if (bdev) {
sync_blockdev(bdev);
- fput(sb->s_bdev_file);
+ bdev_fput(sb->s_bdev_file);
}
}
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 6b7652fb805057..7cd64021d453de 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -463,6 +463,8 @@ struct kernfs_node *sysfs_break_active_protection(struct kobject *kobj,
kn = kernfs_find_and_get(kobj->sd, attr->name);
if (kn)
kernfs_break_active_protection(kn);
+ else
+ kobject_put(kobj);
return kn;
}
EXPORT_SYMBOL_GPL(sysfs_break_active_protection);
diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c
index dc067eeb638744..894c6ca1e50020 100644
--- a/fs/tracefs/event_inode.c
+++ b/fs/tracefs/event_inode.c
@@ -336,6 +336,7 @@ static void update_inode_attr(struct dentry *dentry, struct inode *inode,
/**
* lookup_file - look up a file in the tracefs filesystem
+ * @parent_ei: Pointer to the eventfs_inode that represents parent of the file
* @dentry: the dentry to look up
* @mode: the permission that the file should have.
* @attr: saved attributes changed by user
@@ -389,6 +390,7 @@ static struct dentry *lookup_file(struct eventfs_inode *parent_ei,
/**
* lookup_dir_entry - look up a dir in the tracefs filesystem
* @dentry: the directory to look up
+ * @pei: Pointer to the parent eventfs_inode if available
* @ei: the eventfs_inode that represents the directory to create
*
* This function will look up a dentry for a directory represented by
@@ -478,16 +480,20 @@ void eventfs_d_release(struct dentry *dentry)
/**
* lookup_file_dentry - create a dentry for a file of an eventfs_inode
+ * @dentry: The parent dentry under which the new file's dentry will be created
* @ei: the eventfs_inode that the file will be created under
* @idx: the index into the entry_attrs[] of the @ei
- * @parent: The parent dentry of the created file.
- * @name: The name of the file to create
* @mode: The mode of the file.
* @data: The data to use to set the inode of the file with on open()
* @fops: The fops of the file to be created.
*
- * Create a dentry for a file of an eventfs_inode @ei and place it into the
- * address located at @e_dentry.
+ * This function creates a dentry for a file associated with an
+ * eventfs_inode @ei. It uses the entry attributes specified by @idx,
+ * if available. The file will have the specified @mode and its inode will be
+ * set up with @data upon open. The file operations will be set to @fops.
+ *
+ * Return: Returns a pointer to the newly created file's dentry or an error
+ * pointer.
*/
static struct dentry *
lookup_file_dentry(struct dentry *dentry,
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 60dcfafdc11a84..d2c3879745e535 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -657,7 +657,10 @@ int dup_userfaultfd(struct vm_area_struct *vma, struct list_head *fcs)
struct userfaultfd_fork_ctx *fctx;
octx = vma->vm_userfaultfd_ctx.ctx;
- if (!octx || !(octx->features & UFFD_FEATURE_EVENT_FORK)) {
+ if (!octx)
+ return 0;
+
+ if (!(octx->features & UFFD_FEATURE_EVENT_FORK)) {
vma_start_write(vma);
vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
userfaultfd_set_vm_flags(vma, vma->vm_flags & ~__VM_UFFD_FLAGS);
@@ -895,6 +898,10 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
prev = vma;
continue;
}
+ /* Reset ptes for the whole vma range if wr-protected */
+ if (userfaultfd_wp(vma))
+ uffd_wp_range(vma, vma->vm_start,
+ vma->vm_end - vma->vm_start, false);
new_flags = vma->vm_flags & ~__VM_UFFD_FLAGS;
vma = vma_modify_flags_uffd(&vmi, prev, vma, vma->vm_start,
vma->vm_end, new_flags,
diff --git a/fs/vboxsf/file.c b/fs/vboxsf/file.c
index 2307f8037efc3d..118dedef8ebe8d 100644
--- a/fs/vboxsf/file.c
+++ b/fs/vboxsf/file.c
@@ -218,6 +218,7 @@ const struct file_operations vboxsf_reg_fops = {
.release = vboxsf_file_release,
.fsync = noop_fsync,
.splice_read = filemap_splice_read,
+ .setlease = simple_nosetlease,
};
const struct inode_operations vboxsf_reg_iops = {
diff --git a/fs/vboxsf/super.c b/fs/vboxsf/super.c
index cabe8ac4fefc5d..ffb1d565da3981 100644
--- a/fs/vboxsf/super.c
+++ b/fs/vboxsf/super.c
@@ -151,11 +151,11 @@ static int vboxsf_fill_super(struct super_block *sb, struct fs_context *fc)
if (!sbi->nls) {
vbg_err("vboxsf: Count not load '%s' nls\n", nls_name);
err = -EINVAL;
- goto fail_free;
+ goto fail_destroy_idr;
}
}
- sbi->bdi_id = ida_simple_get(&vboxsf_bdi_ida, 0, 0, GFP_KERNEL);
+ sbi->bdi_id = ida_alloc(&vboxsf_bdi_ida, GFP_KERNEL);
if (sbi->bdi_id < 0) {
err = sbi->bdi_id;
goto fail_free;
@@ -221,9 +221,10 @@ fail_unmap:
vboxsf_unmap_folder(sbi->root);
fail_free:
if (sbi->bdi_id >= 0)
- ida_simple_remove(&vboxsf_bdi_ida, sbi->bdi_id);
+ ida_free(&vboxsf_bdi_ida, sbi->bdi_id);
if (sbi->nls)
unload_nls(sbi->nls);
+fail_destroy_idr:
idr_destroy(&sbi->ino_idr);
kfree(sbi);
return err;
@@ -268,7 +269,7 @@ static void vboxsf_put_super(struct super_block *sb)
vboxsf_unmap_folder(sbi->root);
if (sbi->bdi_id >= 0)
- ida_simple_remove(&vboxsf_bdi_ida, sbi->bdi_id);
+ ida_free(&vboxsf_bdi_ida, sbi->bdi_id);
if (sbi->nls)
unload_nls(sbi->nls);
diff --git a/fs/vboxsf/utils.c b/fs/vboxsf/utils.c
index 72ac9320e6a35f..9515bbf0b54ce8 100644
--- a/fs/vboxsf/utils.c
+++ b/fs/vboxsf/utils.c
@@ -440,7 +440,6 @@ int vboxsf_nlscpy(struct vboxsf_sbi *sbi, char *name, size_t name_bound_len,
{
const char *in;
char *out;
- size_t out_len;
size_t out_bound_len;
size_t in_bound_len;
@@ -448,7 +447,6 @@ int vboxsf_nlscpy(struct vboxsf_sbi *sbi, char *name, size_t name_bound_len,
in_bound_len = utf8_len;
out = name;
- out_len = 0;
/* Reserve space for terminating 0 */
out_bound_len = name_bound_len - 1;
@@ -469,7 +467,6 @@ int vboxsf_nlscpy(struct vboxsf_sbi *sbi, char *name, size_t name_bound_len,
out += nb;
out_bound_len -= nb;
- out_len += nb;
}
*out = 0;
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index d991eec0543683..73a4b895de6704 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -530,7 +530,8 @@ xfs_validate_sb_common(
}
if (!xfs_validate_stripe_geometry(mp, XFS_FSB_TO_B(mp, sbp->sb_unit),
- XFS_FSB_TO_B(mp, sbp->sb_width), 0, false))
+ XFS_FSB_TO_B(mp, sbp->sb_width), 0,
+ xfs_buf_daddr(bp) == XFS_SB_DADDR, false))
return -EFSCORRUPTED;
/*
@@ -1323,8 +1324,10 @@ xfs_sb_get_secondary(
}
/*
- * sunit, swidth, sectorsize(optional with 0) should be all in bytes,
- * so users won't be confused by values in error messages.
+ * sunit, swidth, sectorsize(optional with 0) should be all in bytes, so users
+ * won't be confused by values in error messages. This function returns false
+ * if the stripe geometry is invalid and the caller is unable to repair the
+ * stripe configuration later in the mount process.
*/
bool
xfs_validate_stripe_geometry(
@@ -1332,20 +1335,21 @@ xfs_validate_stripe_geometry(
__s64 sunit,
__s64 swidth,
int sectorsize,
+ bool may_repair,
bool silent)
{
if (swidth > INT_MAX) {
if (!silent)
xfs_notice(mp,
"stripe width (%lld) is too large", swidth);
- return false;
+ goto check_override;
}
if (sunit > swidth) {
if (!silent)
xfs_notice(mp,
"stripe unit (%lld) is larger than the stripe width (%lld)", sunit, swidth);
- return false;
+ goto check_override;
}
if (sectorsize && (int)sunit % sectorsize) {
@@ -1353,21 +1357,21 @@ xfs_validate_stripe_geometry(
xfs_notice(mp,
"stripe unit (%lld) must be a multiple of the sector size (%d)",
sunit, sectorsize);
- return false;
+ goto check_override;
}
if (sunit && !swidth) {
if (!silent)
xfs_notice(mp,
"invalid stripe unit (%lld) and stripe width of 0", sunit);
- return false;
+ goto check_override;
}
if (!sunit && swidth) {
if (!silent)
xfs_notice(mp,
"invalid stripe width (%lld) and stripe unit of 0", swidth);
- return false;
+ goto check_override;
}
if (sunit && (int)swidth % (int)sunit) {
@@ -1375,9 +1379,27 @@ xfs_validate_stripe_geometry(
xfs_notice(mp,
"stripe width (%lld) must be a multiple of the stripe unit (%lld)",
swidth, sunit);
- return false;
+ goto check_override;
}
return true;
+
+check_override:
+ if (!may_repair)
+ return false;
+ /*
+ * During mount, mp->m_dalign will not be set unless the sunit mount
+ * option was set. If it was set, ignore the bad stripe alignment values
+ * and allow the validation and overwrite later in the mount process to
+ * attempt to overwrite the bad stripe alignment values with the values
+ * supplied by mount options.
+ */
+ if (!mp->m_dalign)
+ return false;
+ if (!silent)
+ xfs_notice(mp,
+"Will try to correct with specified mount options sunit (%d) and swidth (%d)",
+ BBTOB(mp->m_dalign), BBTOB(mp->m_swidth));
+ return true;
}
/*
diff --git a/fs/xfs/libxfs/xfs_sb.h b/fs/xfs/libxfs/xfs_sb.h
index 2e8e8d63d4eb22..37b1ed1bc2095e 100644
--- a/fs/xfs/libxfs/xfs_sb.h
+++ b/fs/xfs/libxfs/xfs_sb.h
@@ -35,8 +35,9 @@ extern int xfs_sb_get_secondary(struct xfs_mount *mp,
struct xfs_trans *tp, xfs_agnumber_t agno,
struct xfs_buf **bpp);
-extern bool xfs_validate_stripe_geometry(struct xfs_mount *mp,
- __s64 sunit, __s64 swidth, int sectorsize, bool silent);
+bool xfs_validate_stripe_geometry(struct xfs_mount *mp,
+ __s64 sunit, __s64 swidth, int sectorsize, bool may_repair,
+ bool silent);
uint8_t xfs_compute_rextslog(xfs_rtbxlen_t rtextents);
diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c
index abff79a77c72b6..47a20cf5205f00 100644
--- a/fs/xfs/scrub/common.c
+++ b/fs/xfs/scrub/common.c
@@ -1044,9 +1044,7 @@ xchk_irele(
struct xfs_scrub *sc,
struct xfs_inode *ip)
{
- if (current->journal_info != NULL) {
- ASSERT(current->journal_info == sc->tp);
-
+ if (sc->tp) {
/*
* If we are in a transaction, we /cannot/ drop the inode
* ourselves, because the VFS will trigger writeback, which
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 1698507d1ac73a..3f428620ebf2a3 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -503,13 +503,6 @@ xfs_vm_writepages(
{
struct xfs_writepage_ctx wpc = { };
- /*
- * Writing back data in a transaction context can result in recursive
- * transactions. This is bad, so issue a warning and get out of here.
- */
- if (WARN_ON_ONCE(current->journal_info))
- return 0;
-
xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED);
return iomap_writepages(mapping, wbc, &wpc.ctx, &xfs_writeback_ops);
}
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 1a18c381127e21..f0fa02264edaae 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -2030,7 +2030,7 @@ xfs_free_buftarg(
fs_put_dax(btp->bt_daxdev, btp->bt_mount);
/* the main block device is closed by kill_block_super */
if (btp->bt_bdev != btp->bt_mount->m_super->s_bdev)
- fput(btp->bt_bdev_file);
+ bdev_fput(btp->bt_bdev_file);
kfree(btp);
}
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index e64265bc0b3371..74f1812b03cbd2 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -2039,8 +2039,10 @@ xfs_inodegc_want_queue_work(
* - Memory shrinkers queued the inactivation worker and it hasn't finished.
* - The queue depth exceeds the maximum allowable percpu backlog.
*
- * Note: If the current thread is running a transaction, we don't ever want to
- * wait for other transactions because that could introduce a deadlock.
+ * Note: If we are in a NOFS context here (e.g. current thread is running a
+ * transaction) the we don't want to block here as inodegc progress may require
+ * filesystem resources we hold to make progress and that could result in a
+ * deadlock. Hence we skip out of here if we are in a scoped NOFS context.
*/
static inline bool
xfs_inodegc_want_flush_work(
@@ -2048,7 +2050,7 @@ xfs_inodegc_want_flush_work(
unsigned int items,
unsigned int shrinker_hits)
{
- if (current->journal_info)
+ if (current->flags & PF_MEMALLOC_NOFS)
return false;
if (shrinker_hits > 0)
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index ea48774f6b76d3..d55b42b2480d6c 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1301,8 +1301,19 @@ xfs_link(
*/
if (unlikely((tdp->i_diflags & XFS_DIFLAG_PROJINHERIT) &&
tdp->i_projid != sip->i_projid)) {
- error = -EXDEV;
- goto error_return;
+ /*
+ * Project quota setup skips special files which can
+ * leave inodes in a PROJINHERIT directory without a
+ * project ID set. We need to allow links to be made
+ * to these "project-less" inodes because userspace
+ * expects them to succeed after project ID setup,
+ * but everything else should be rejected.
+ */
+ if (!special_file(VFS_I(sip)->i_mode) ||
+ sip->i_projid != 0) {
+ error = -EXDEV;
+ goto error_return;
+ }
}
if (!resblks) {
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index c21f10ab0f5dbe..bce020374c5eba 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -485,7 +485,7 @@ xfs_open_devices(
mp->m_logdev_targp = mp->m_ddev_targp;
/* Handle won't be used, drop it */
if (logdev_file)
- fput(logdev_file);
+ bdev_fput(logdev_file);
}
return 0;
@@ -497,10 +497,10 @@ xfs_open_devices(
xfs_free_buftarg(mp->m_ddev_targp);
out_close_rtdev:
if (rtdev_file)
- fput(rtdev_file);
+ bdev_fput(rtdev_file);
out_close_logdev:
if (logdev_file)
- fput(logdev_file);
+ bdev_fput(logdev_file);
return error;
}
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 3f7e3a09a49ff4..1636663707dc04 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -268,19 +268,14 @@ static inline void
xfs_trans_set_context(
struct xfs_trans *tp)
{
- ASSERT(current->journal_info == NULL);
tp->t_pflags = memalloc_nofs_save();
- current->journal_info = tp;
}
static inline void
xfs_trans_clear_context(
struct xfs_trans *tp)
{
- if (current->journal_info == tp) {
- memalloc_nofs_restore(tp->t_pflags);
- current->journal_info = NULL;
- }
+ memalloc_nofs_restore(tp->t_pflags);
}
static inline void
@@ -288,10 +283,8 @@ xfs_trans_switch_context(
struct xfs_trans *old_tp,
struct xfs_trans *new_tp)
{
- ASSERT(current->journal_info == old_tp);
new_tp->t_pflags = old_tp->t_pflags;
old_tp->t_pflags = 0;
- current->journal_info = new_tp;
}
#endif /* __XFS_TRANS_H__ */
diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
index c6a124e8d565fe..964fa7f2400335 100644
--- a/fs/zonefs/super.c
+++ b/fs/zonefs/super.c
@@ -1048,7 +1048,7 @@ static int zonefs_init_zgroup(struct super_block *sb,
zonefs_info(sb, "Zone group \"%s\" has %u file%s\n",
zonefs_zgroup_name(ztype),
zgroup->g_nr_zones,
- zgroup->g_nr_zones > 1 ? "s" : "");
+ str_plural(zgroup->g_nr_zones));
return 0;
}
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 5de954e2b18aaa..e7796f373d0dac 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -911,17 +911,19 @@ static inline bool acpi_int_uid_match(struct acpi_device *adev, u64 uid2)
* acpi_dev_hid_uid_match - Match device by supplied HID and UID
* @adev: ACPI device to match.
* @hid2: Hardware ID of the device.
- * @uid2: Unique ID of the device, pass 0 or NULL to not check _UID.
+ * @uid2: Unique ID of the device, pass NULL to not check _UID.
*
* Matches HID and UID in @adev with given @hid2 and @uid2. Absence of @uid2
* will be treated as a match. If user wants to validate @uid2, it should be
* done before calling this function.
*
- * Returns: %true if matches or @uid2 is 0 or NULL, %false otherwise.
+ * Returns: %true if matches or @uid2 is NULL, %false otherwise.
*/
#define acpi_dev_hid_uid_match(adev, hid2, uid2) \
(acpi_dev_hid_match(adev, hid2) && \
- (!(uid2) || acpi_dev_uid_match(adev, uid2)))
+ /* Distinguish integer 0 from NULL @uid2 */ \
+ (_Generic(uid2, ACPI_STR_TYPES(!(uid2)), default: 0) || \
+ acpi_dev_uid_match(adev, uid2)))
void acpi_dev_clear_dependencies(struct acpi_device *supplier);
bool acpi_dev_ready_for_enumeration(const struct acpi_device *device);
diff --git a/include/acpi/platform/aclinuxex.h b/include/acpi/platform/aclinuxex.h
index 600d4e2641dae1..62cac266a1c89e 100644
--- a/include/acpi/platform/aclinuxex.h
+++ b/include/acpi/platform/aclinuxex.h
@@ -47,26 +47,19 @@ acpi_status acpi_os_terminate(void);
* However, boot has (system_state != SYSTEM_RUNNING)
* to quiet __might_sleep() in kmalloc() and resume does not.
*/
-static inline void *acpi_os_allocate(acpi_size size)
-{
- return kmalloc(size, irqs_disabled()? GFP_ATOMIC : GFP_KERNEL);
-}
+#define acpi_os_allocate(_size) \
+ kmalloc(_size, irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL)
-static inline void *acpi_os_allocate_zeroed(acpi_size size)
-{
- return kzalloc(size, irqs_disabled()? GFP_ATOMIC : GFP_KERNEL);
-}
+#define acpi_os_allocate_zeroed(_size) \
+ kzalloc(_size, irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL)
static inline void acpi_os_free(void *memory)
{
kfree(memory);
}
-static inline void *acpi_os_acquire_object(acpi_cache_t * cache)
-{
- return kmem_cache_zalloc(cache,
- irqs_disabled()? GFP_ATOMIC : GFP_KERNEL);
-}
+#define acpi_os_acquire_object(_cache) \
+ kmem_cache_zalloc(_cache, irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL)
static inline acpi_thread_id acpi_os_get_thread_id(void)
{
diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h
index 0c0695763bea39..d4f581c1e21da5 100644
--- a/include/asm-generic/barrier.h
+++ b/include/asm-generic/barrier.h
@@ -294,5 +294,13 @@ do { \
#define io_stop_wc() do { } while (0)
#endif
+/*
+ * Architectures that guarantee an implicit smp_mb() in switch_mm()
+ * can override smp_mb__after_switch_mm.
+ */
+#ifndef smp_mb__after_switch_mm
+# define smp_mb__after_switch_mm() smp_mb()
+#endif
+
#endif /* !__ASSEMBLY__ */
#endif /* __ASM_GENERIC_BARRIER_H */
diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h
index 6e794420bd398c..b7de3a4eade1c2 100644
--- a/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h
@@ -156,7 +156,10 @@ extern __printf(1, 2) void __warn_printk(const char *fmt, ...);
#else /* !CONFIG_BUG */
#ifndef HAVE_ARCH_BUG
-#define BUG() do {} while (1)
+#define BUG() do { \
+ do {} while (1); \
+ unreachable(); \
+} while (0)
#endif
#ifndef HAVE_ARCH_BUG_ON
diff --git a/include/asm-generic/codetag.lds.h b/include/asm-generic/codetag.lds.h
new file mode 100644
index 00000000000000..64f536b803802e
--- /dev/null
+++ b/include/asm-generic/codetag.lds.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __ASM_GENERIC_CODETAG_LDS_H
+#define __ASM_GENERIC_CODETAG_LDS_H
+
+#define SECTION_WITH_BOUNDARIES(_name) \
+ . = ALIGN(8); \
+ __start_##_name = .; \
+ KEEP(*(_name)) \
+ __stop_##_name = .;
+
+#define CODETAG_SECTIONS() \
+ SECTION_WITH_BOUNDARIES(alloc_tags)
+
+#endif /* __ASM_GENERIC_CODETAG_LDS_H */
diff --git a/include/asm-generic/export.h b/include/asm-generic/export.h
deleted file mode 100644
index 570cd4da72105c..00000000000000
--- a/include/asm-generic/export.h
+++ /dev/null
@@ -1,11 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-#ifndef __ASM_GENERIC_EXPORT_H
-#define __ASM_GENERIC_EXPORT_H
-
-/*
- * <asm/export.h> and <asm-generic/export.h> are deprecated.
- * Please include <linux/export.h> directly.
- */
-#include <linux/export.h>
-
-#endif
diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h
index 87e3d49a4e29bf..814207e7c37fcf 100644
--- a/include/asm-generic/hyperv-tlfs.h
+++ b/include/asm-generic/hyperv-tlfs.h
@@ -512,13 +512,9 @@ struct hv_proximity_domain_flags {
u32 proximity_info_valid : 1;
} __packed;
-/* Not a union in windows but useful for zeroing */
-union hv_proximity_domain_info {
- struct {
- u32 domain_id;
- struct hv_proximity_domain_flags flags;
- };
- u64 as_uint64;
+struct hv_proximity_domain_info {
+ u32 domain_id;
+ struct hv_proximity_domain_flags flags;
} __packed;
struct hv_lp_startup_status {
@@ -532,14 +528,13 @@ struct hv_lp_startup_status {
} __packed;
/* HvAddLogicalProcessor hypercall */
-struct hv_add_logical_processor_in {
+struct hv_input_add_logical_processor {
u32 lp_index;
u32 apic_id;
- union hv_proximity_domain_info proximity_domain_info;
- u64 flags;
+ struct hv_proximity_domain_info proximity_domain_info;
} __packed;
-struct hv_add_logical_processor_out {
+struct hv_output_add_logical_processor {
struct hv_lp_startup_status startup_status;
} __packed;
@@ -560,7 +555,7 @@ struct hv_create_vp {
u8 padding[3];
u8 subnode_type;
u64 subnode_id;
- union hv_proximity_domain_info proximity_domain_info;
+ struct hv_proximity_domain_info proximity_domain_info;
u64 flags;
} __packed;
diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h
index bac63e874c7bf9..80de699bf6af4b 100644
--- a/include/asm-generic/io.h
+++ b/include/asm-generic/io.h
@@ -9,6 +9,7 @@
#include <asm/page.h> /* I/O is all done through memory accesses */
#include <linux/string.h> /* for memset() and memcpy() */
+#include <linux/sizes.h>
#include <linux/types.h>
#include <linux/instruction_pointer.h>
@@ -991,7 +992,6 @@ static inline void iowrite64_rep(volatile void __iomem *addr,
#ifdef __KERNEL__
-#include <linux/vmalloc.h>
#define __io_virt(x) ((void __force *)(x))
/*
diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
index 99935779682dc2..8fe7aaab25990a 100644
--- a/include/asm-generic/mshyperv.h
+++ b/include/asm-generic/mshyperv.h
@@ -21,6 +21,7 @@
#include <linux/types.h>
#include <linux/atomic.h>
#include <linux/bitops.h>
+#include <acpi/acpi_numa.h>
#include <linux/cpumask.h>
#include <linux/nmi.h>
#include <asm/ptrace.h>
@@ -67,6 +68,19 @@ extern u64 hv_do_fast_hypercall8(u16 control, u64 input8);
bool hv_isolation_type_snp(void);
bool hv_isolation_type_tdx(void);
+static inline struct hv_proximity_domain_info hv_numa_node_to_pxm_info(int node)
+{
+ struct hv_proximity_domain_info pxm_info = {};
+
+ if (node != NUMA_NO_NODE) {
+ pxm_info.domain_id = node_to_pxm(node);
+ pxm_info.flags.proximity_info_valid = 1;
+ pxm_info.flags.proximity_preferred = 1;
+ }
+
+ return pxm_info;
+}
+
/* Helper functions that provide a consistent pattern for checking Hyper-V hypercall status. */
static inline int hv_result(u64 status)
{
diff --git a/include/asm-generic/pgalloc.h b/include/asm-generic/pgalloc.h
index 879e5f8aa5e92a..7c48f5fbf8aa79 100644
--- a/include/asm-generic/pgalloc.h
+++ b/include/asm-generic/pgalloc.h
@@ -16,15 +16,16 @@
*
* Return: pointer to the allocated memory or %NULL on error
*/
-static inline pte_t *__pte_alloc_one_kernel(struct mm_struct *mm)
+static inline pte_t *__pte_alloc_one_kernel_noprof(struct mm_struct *mm)
{
- struct ptdesc *ptdesc = pagetable_alloc(GFP_PGTABLE_KERNEL &
+ struct ptdesc *ptdesc = pagetable_alloc_noprof(GFP_PGTABLE_KERNEL &
~__GFP_HIGHMEM, 0);
if (!ptdesc)
return NULL;
return ptdesc_address(ptdesc);
}
+#define __pte_alloc_one_kernel(...) alloc_hooks(__pte_alloc_one_kernel_noprof(__VA_ARGS__))
#ifndef __HAVE_ARCH_PTE_ALLOC_ONE_KERNEL
/**
@@ -33,10 +34,11 @@ static inline pte_t *__pte_alloc_one_kernel(struct mm_struct *mm)
*
* Return: pointer to the allocated memory or %NULL on error
*/
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
+static inline pte_t *pte_alloc_one_kernel_noprof(struct mm_struct *mm)
{
- return __pte_alloc_one_kernel(mm);
+ return __pte_alloc_one_kernel_noprof(mm);
}
+#define pte_alloc_one_kernel(...) alloc_hooks(pte_alloc_one_kernel_noprof(__VA_ARGS__))
#endif
/**
@@ -61,11 +63,11 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
*
* Return: `struct page` referencing the ptdesc or %NULL on error
*/
-static inline pgtable_t __pte_alloc_one(struct mm_struct *mm, gfp_t gfp)
+static inline pgtable_t __pte_alloc_one_noprof(struct mm_struct *mm, gfp_t gfp)
{
struct ptdesc *ptdesc;
- ptdesc = pagetable_alloc(gfp, 0);
+ ptdesc = pagetable_alloc_noprof(gfp, 0);
if (!ptdesc)
return NULL;
if (!pagetable_pte_ctor(ptdesc)) {
@@ -75,6 +77,7 @@ static inline pgtable_t __pte_alloc_one(struct mm_struct *mm, gfp_t gfp)
return ptdesc_page(ptdesc);
}
+#define __pte_alloc_one(...) alloc_hooks(__pte_alloc_one_noprof(__VA_ARGS__))
#ifndef __HAVE_ARCH_PTE_ALLOC_ONE
/**
@@ -85,10 +88,11 @@ static inline pgtable_t __pte_alloc_one(struct mm_struct *mm, gfp_t gfp)
*
* Return: `struct page` referencing the ptdesc or %NULL on error
*/
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
+static inline pgtable_t pte_alloc_one_noprof(struct mm_struct *mm)
{
- return __pte_alloc_one(mm, GFP_PGTABLE_USER);
+ return __pte_alloc_one_noprof(mm, GFP_PGTABLE_USER);
}
+#define pte_alloc_one(...) alloc_hooks(pte_alloc_one_noprof(__VA_ARGS__))
#endif
/*
@@ -124,14 +128,14 @@ static inline void pte_free(struct mm_struct *mm, struct page *pte_page)
*
* Return: pointer to the allocated memory or %NULL on error
*/
-static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
+static inline pmd_t *pmd_alloc_one_noprof(struct mm_struct *mm, unsigned long addr)
{
struct ptdesc *ptdesc;
gfp_t gfp = GFP_PGTABLE_USER;
if (mm == &init_mm)
gfp = GFP_PGTABLE_KERNEL;
- ptdesc = pagetable_alloc(gfp, 0);
+ ptdesc = pagetable_alloc_noprof(gfp, 0);
if (!ptdesc)
return NULL;
if (!pagetable_pmd_ctor(ptdesc)) {
@@ -140,6 +144,7 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
}
return ptdesc_address(ptdesc);
}
+#define pmd_alloc_one(...) alloc_hooks(pmd_alloc_one_noprof(__VA_ARGS__))
#endif
#ifndef __HAVE_ARCH_PMD_FREE
@@ -157,7 +162,7 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
#if CONFIG_PGTABLE_LEVELS > 3
-static inline pud_t *__pud_alloc_one(struct mm_struct *mm, unsigned long addr)
+static inline pud_t *__pud_alloc_one_noprof(struct mm_struct *mm, unsigned long addr)
{
gfp_t gfp = GFP_PGTABLE_USER;
struct ptdesc *ptdesc;
@@ -166,13 +171,14 @@ static inline pud_t *__pud_alloc_one(struct mm_struct *mm, unsigned long addr)
gfp = GFP_PGTABLE_KERNEL;
gfp &= ~__GFP_HIGHMEM;
- ptdesc = pagetable_alloc(gfp, 0);
+ ptdesc = pagetable_alloc_noprof(gfp, 0);
if (!ptdesc)
return NULL;
pagetable_pud_ctor(ptdesc);
return ptdesc_address(ptdesc);
}
+#define __pud_alloc_one(...) alloc_hooks(__pud_alloc_one_noprof(__VA_ARGS__))
#ifndef __HAVE_ARCH_PUD_ALLOC_ONE
/**
@@ -184,10 +190,11 @@ static inline pud_t *__pud_alloc_one(struct mm_struct *mm, unsigned long addr)
*
* Return: pointer to the allocated memory or %NULL on error
*/
-static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
+static inline pud_t *pud_alloc_one_noprof(struct mm_struct *mm, unsigned long addr)
{
- return __pud_alloc_one(mm, addr);
+ return __pud_alloc_one_noprof(mm, addr);
}
+#define pud_alloc_one(...) alloc_hooks(pud_alloc_one_noprof(__VA_ARGS__))
#endif
static inline void __pud_free(struct mm_struct *mm, pud_t *pud)
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index f7749d0f2562f1..a6dc516a56bf9a 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -50,6 +50,8 @@
* [__nosave_begin, __nosave_end] for the nosave data
*/
+#include <asm-generic/codetag.lds.h>
+
#ifndef LOAD_OFFSET
#define LOAD_OFFSET 0
#endif
@@ -366,6 +368,7 @@
. = ALIGN(8); \
BOUNDED_SECTION_BY(__dyndbg_classes, ___dyndbg_classes) \
BOUNDED_SECTION_BY(__dyndbg, ___dyndbg) \
+ CODETAG_SECTIONS() \
LIKELY_PROFILE() \
BRANCH_PROFILE() \
TRACE_PRINTKS() \
@@ -449,10 +452,29 @@
#endif
/*
+ * Some symbol definitions will not exist yet during the first pass of the
+ * link, but are guaranteed to exist in the final link. Provide preliminary
+ * definitions that will be superseded in the final link to avoid having to
+ * rely on weak external linkage, which requires a GOT when used in position
+ * independent code.
+ */
+#define PRELIMINARY_SYMBOL_DEFINITIONS \
+ PROVIDE(kallsyms_addresses = .); \
+ PROVIDE(kallsyms_offsets = .); \
+ PROVIDE(kallsyms_names = .); \
+ PROVIDE(kallsyms_num_syms = .); \
+ PROVIDE(kallsyms_relative_base = .); \
+ PROVIDE(kallsyms_token_table = .); \
+ PROVIDE(kallsyms_token_index = .); \
+ PROVIDE(kallsyms_markers = .); \
+ PROVIDE(kallsyms_seqs_of_names = .);
+
+/*
* Read only Data
*/
#define RO_DATA(align) \
. = ALIGN((align)); \
+ PRELIMINARY_SYMBOL_DEFINITIONS \
.rodata : AT(ADDR(.rodata) - LOAD_OFFSET) { \
__start_rodata = .; \
*(.rodata) *(.rodata.*) \
diff --git a/include/crypto/hash.h b/include/crypto/hash.h
index 5d61f576cfc860..e5181cc9b7c563 100644
--- a/include/crypto/hash.h
+++ b/include/crypto/hash.h
@@ -578,19 +578,20 @@ static inline void ahash_request_set_tfm(struct ahash_request *req,
*
* Return: allocated request handle in case of success, or NULL if out of memory
*/
-static inline struct ahash_request *ahash_request_alloc(
+static inline struct ahash_request *ahash_request_alloc_noprof(
struct crypto_ahash *tfm, gfp_t gfp)
{
struct ahash_request *req;
- req = kmalloc(sizeof(struct ahash_request) +
- crypto_ahash_reqsize(tfm), gfp);
+ req = kmalloc_noprof(sizeof(struct ahash_request) +
+ crypto_ahash_reqsize(tfm), gfp);
if (likely(req))
ahash_request_set_tfm(req, tfm);
return req;
}
+#define ahash_request_alloc(...) alloc_hooks(ahash_request_alloc_noprof(__VA_ARGS__))
/**
* ahash_request_free() - zeroize and free the request data structure
diff --git a/include/crypto/internal/acompress.h b/include/crypto/internal/acompress.h
index 4ac46bafba9d7c..2a67793f52ad87 100644
--- a/include/crypto/internal/acompress.h
+++ b/include/crypto/internal/acompress.h
@@ -69,15 +69,16 @@ static inline void acomp_request_complete(struct acomp_req *req,
crypto_request_complete(&req->base, err);
}
-static inline struct acomp_req *__acomp_request_alloc(struct crypto_acomp *tfm)
+static inline struct acomp_req *__acomp_request_alloc_noprof(struct crypto_acomp *tfm)
{
struct acomp_req *req;
- req = kzalloc(sizeof(*req) + crypto_acomp_reqsize(tfm), GFP_KERNEL);
+ req = kzalloc_noprof(sizeof(*req) + crypto_acomp_reqsize(tfm), GFP_KERNEL);
if (likely(req))
acomp_request_set_tfm(req, tfm);
return req;
}
+#define __acomp_request_alloc(...) alloc_hooks(__acomp_request_alloc_noprof(__VA_ARGS__))
static inline void __acomp_request_free(struct acomp_req *req)
{
diff --git a/include/crypto/skcipher.h b/include/crypto/skcipher.h
index c8857d7bdb37f1..6c5330e316b0e2 100644
--- a/include/crypto/skcipher.h
+++ b/include/crypto/skcipher.h
@@ -861,19 +861,20 @@ static inline struct skcipher_request *skcipher_request_cast(
*
* Return: allocated request handle in case of success, or NULL if out of memory
*/
-static inline struct skcipher_request *skcipher_request_alloc(
+static inline struct skcipher_request *skcipher_request_alloc_noprof(
struct crypto_skcipher *tfm, gfp_t gfp)
{
struct skcipher_request *req;
- req = kmalloc(sizeof(struct skcipher_request) +
- crypto_skcipher_reqsize(tfm), gfp);
+ req = kmalloc_noprof(sizeof(struct skcipher_request) +
+ crypto_skcipher_reqsize(tfm), gfp);
if (likely(req))
skcipher_request_set_tfm(req, tfm);
return req;
}
+#define skcipher_request_alloc(...) alloc_hooks(skcipher_request_alloc_noprof(__VA_ARGS__))
/**
* skcipher_request_free() - zeroize and free request data structure
diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h
index eb4c369a79eb31..35d4ca4f6122c7 100644
--- a/include/kvm/arm_pmu.h
+++ b/include/kvm/arm_pmu.h
@@ -86,7 +86,7 @@ void kvm_vcpu_pmu_resync_el0(void);
*/
#define kvm_pmu_update_vcpu_events(vcpu) \
do { \
- if (!has_vhe() && kvm_vcpu_has_pmu(vcpu)) \
+ if (!has_vhe() && kvm_arm_support_pmu_v3()) \
vcpu->arch.pmu.events = *kvm_get_pmu_events(); \
} while (0)
diff --git a/include/linux/alloc_tag.h b/include/linux/alloc_tag.h
new file mode 100644
index 00000000000000..afc9e259a2d354
--- /dev/null
+++ b/include/linux/alloc_tag.h
@@ -0,0 +1,216 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * allocation tagging
+ */
+#ifndef _LINUX_ALLOC_TAG_H
+#define _LINUX_ALLOC_TAG_H
+
+#include <linux/bug.h>
+#include <linux/codetag.h>
+#include <linux/container_of.h>
+#include <linux/preempt.h>
+#include <asm/percpu.h>
+#include <linux/cpumask.h>
+#include <linux/static_key.h>
+#include <linux/irqflags.h>
+
+struct alloc_tag_counters {
+ u64 bytes;
+ u64 calls;
+};
+
+/*
+ * An instance of this structure is created in a special ELF section at every
+ * allocation callsite. At runtime, the special section is treated as
+ * an array of these. Embedded codetag utilizes codetag framework.
+ */
+struct alloc_tag {
+ struct codetag ct;
+ struct alloc_tag_counters __percpu *counters;
+} __aligned(8);
+
+#ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG
+
+#define CODETAG_EMPTY ((void *)1)
+
+static inline bool is_codetag_empty(union codetag_ref *ref)
+{
+ return ref->ct == CODETAG_EMPTY;
+}
+
+static inline void set_codetag_empty(union codetag_ref *ref)
+{
+ if (ref)
+ ref->ct = CODETAG_EMPTY;
+}
+
+#else /* CONFIG_MEM_ALLOC_PROFILING_DEBUG */
+
+static inline bool is_codetag_empty(union codetag_ref *ref) { return false; }
+static inline void set_codetag_empty(union codetag_ref *ref) {}
+
+#endif /* CONFIG_MEM_ALLOC_PROFILING_DEBUG */
+
+#ifdef CONFIG_MEM_ALLOC_PROFILING
+
+struct codetag_bytes {
+ struct codetag *ct;
+ s64 bytes;
+};
+
+size_t alloc_tag_top_users(struct codetag_bytes *tags, size_t count, bool can_sleep);
+
+static inline struct alloc_tag *ct_to_alloc_tag(struct codetag *ct)
+{
+ return container_of(ct, struct alloc_tag, ct);
+}
+
+#ifdef ARCH_NEEDS_WEAK_PER_CPU
+/*
+ * When percpu variables are required to be defined as weak, static percpu
+ * variables can't be used inside a function (see comments for DECLARE_PER_CPU_SECTION).
+ * Instead we will accound all module allocations to a single counter.
+ */
+DECLARE_PER_CPU(struct alloc_tag_counters, _shared_alloc_tag);
+
+#define DEFINE_ALLOC_TAG(_alloc_tag) \
+ static struct alloc_tag _alloc_tag __used __aligned(8) \
+ __section("alloc_tags") = { \
+ .ct = CODE_TAG_INIT, \
+ .counters = &_shared_alloc_tag };
+
+#else /* ARCH_NEEDS_WEAK_PER_CPU */
+
+#define DEFINE_ALLOC_TAG(_alloc_tag) \
+ static DEFINE_PER_CPU(struct alloc_tag_counters, _alloc_tag_cntr); \
+ static struct alloc_tag _alloc_tag __used __aligned(8) \
+ __section("alloc_tags") = { \
+ .ct = CODE_TAG_INIT, \
+ .counters = &_alloc_tag_cntr };
+
+#endif /* ARCH_NEEDS_WEAK_PER_CPU */
+
+DECLARE_STATIC_KEY_MAYBE(CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT,
+ mem_alloc_profiling_key);
+
+static inline bool mem_alloc_profiling_enabled(void)
+{
+ return static_branch_maybe(CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT,
+ &mem_alloc_profiling_key);
+}
+
+static inline struct alloc_tag_counters alloc_tag_read(struct alloc_tag *tag)
+{
+ struct alloc_tag_counters v = { 0, 0 };
+ struct alloc_tag_counters *counter;
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ counter = per_cpu_ptr(tag->counters, cpu);
+ v.bytes += counter->bytes;
+ v.calls += counter->calls;
+ }
+
+ return v;
+}
+
+#ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG
+static inline void alloc_tag_add_check(union codetag_ref *ref, struct alloc_tag *tag)
+{
+ WARN_ONCE(ref && ref->ct,
+ "alloc_tag was not cleared (got tag for %s:%u)\n",
+ ref->ct->filename, ref->ct->lineno);
+
+ WARN_ONCE(!tag, "current->alloc_tag not set");
+}
+
+static inline void alloc_tag_sub_check(union codetag_ref *ref)
+{
+ WARN_ONCE(ref && !ref->ct, "alloc_tag was not set\n");
+}
+#else
+static inline void alloc_tag_add_check(union codetag_ref *ref, struct alloc_tag *tag) {}
+static inline void alloc_tag_sub_check(union codetag_ref *ref) {}
+#endif
+
+/* Caller should verify both ref and tag to be valid */
+static inline void __alloc_tag_ref_set(union codetag_ref *ref, struct alloc_tag *tag)
+{
+ ref->ct = &tag->ct;
+ /*
+ * We need in increment the call counter every time we have a new
+ * allocation or when we split a large allocation into smaller ones.
+ * Each new reference for every sub-allocation needs to increment call
+ * counter because when we free each part the counter will be decremented.
+ */
+ this_cpu_inc(tag->counters->calls);
+}
+
+static inline void alloc_tag_ref_set(union codetag_ref *ref, struct alloc_tag *tag)
+{
+ alloc_tag_add_check(ref, tag);
+ if (!ref || !tag)
+ return;
+
+ __alloc_tag_ref_set(ref, tag);
+}
+
+static inline void alloc_tag_add(union codetag_ref *ref, struct alloc_tag *tag, size_t bytes)
+{
+ alloc_tag_add_check(ref, tag);
+ if (!ref || !tag)
+ return;
+
+ __alloc_tag_ref_set(ref, tag);
+ this_cpu_add(tag->counters->bytes, bytes);
+}
+
+static inline void alloc_tag_sub(union codetag_ref *ref, size_t bytes)
+{
+ struct alloc_tag *tag;
+
+ alloc_tag_sub_check(ref);
+ if (!ref || !ref->ct)
+ return;
+
+ if (is_codetag_empty(ref)) {
+ ref->ct = NULL;
+ return;
+ }
+
+ tag = ct_to_alloc_tag(ref->ct);
+
+ this_cpu_sub(tag->counters->bytes, bytes);
+ this_cpu_dec(tag->counters->calls);
+
+ ref->ct = NULL;
+}
+
+#define alloc_tag_record(p) ((p) = current->alloc_tag)
+
+#else /* CONFIG_MEM_ALLOC_PROFILING */
+
+#define DEFINE_ALLOC_TAG(_alloc_tag)
+static inline bool mem_alloc_profiling_enabled(void) { return false; }
+static inline void alloc_tag_add(union codetag_ref *ref, struct alloc_tag *tag,
+ size_t bytes) {}
+static inline void alloc_tag_sub(union codetag_ref *ref, size_t bytes) {}
+#define alloc_tag_record(p) do {} while (0)
+
+#endif /* CONFIG_MEM_ALLOC_PROFILING */
+
+#define alloc_hooks_tag(_tag, _do_alloc) \
+({ \
+ struct alloc_tag * __maybe_unused _old = alloc_tag_save(_tag); \
+ typeof(_do_alloc) _res = _do_alloc; \
+ alloc_tag_restore(_tag, _old); \
+ _res; \
+})
+
+#define alloc_hooks(_do_alloc) \
+({ \
+ DEFINE_ALLOC_TAG(_alloc_tag); \
+ alloc_hooks_tag(&_alloc_tag, _do_alloc); \
+})
+
+#endif /* _LINUX_ALLOC_TAG_H */
diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index 2ba557e067fe69..ffeb71e7215457 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -254,16 +254,12 @@ static inline unsigned long __ffs64(u64 word)
*/
static inline unsigned long fns(unsigned long word, unsigned int n)
{
- unsigned int bit;
+ unsigned int i;
- while (word) {
- bit = __ffs(word);
- if (n-- == 0)
- return bit;
- __clear_bit(bit, &word);
- }
+ for (i = 0; word && i < n; i++)
+ word &= word - 1;
- return BITS_PER_LONG;
+ return word ? __ffs(word) : BITS_PER_LONG;
}
/**
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index c3e8f7cf96be9e..69e7da33ca49a6 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -128,6 +128,8 @@ typedef unsigned int __bitwise blk_mode_t;
#define BLK_OPEN_WRITE_IOCTL ((__force blk_mode_t)(1 << 4))
/* open is exclusive wrt all other BLK_OPEN_WRITE opens to the device */
#define BLK_OPEN_RESTRICT_WRITES ((__force blk_mode_t)(1 << 5))
+/* return partition scanning errors */
+#define BLK_OPEN_STRICT_SCAN ((__force blk_mode_t)(1 << 6))
struct gendisk {
/*
@@ -1505,16 +1507,6 @@ struct blk_holder_ops {
* Thaw the file system mounted on the block device.
*/
int (*thaw)(struct block_device *bdev);
-
- /*
- * If needed, get a reference to the holder.
- */
- void (*get_holder)(void *holder);
-
- /*
- * Release the holder.
- */
- void (*put_holder)(void *holder);
};
/*
@@ -1585,6 +1577,7 @@ static inline int early_lookup_bdev(const char *pathname, dev_t *dev)
int bdev_freeze(struct block_device *bdev);
int bdev_thaw(struct block_device *bdev);
+void bdev_fput(struct file *bdev_file);
struct io_comp_batch {
struct request *req_list;
diff --git a/include/linux/bootconfig.h b/include/linux/bootconfig.h
index ca73940e26df83..3f4b4ac527ca28 100644
--- a/include/linux/bootconfig.h
+++ b/include/linux/bootconfig.h
@@ -10,6 +10,7 @@
#ifdef __KERNEL__
#include <linux/kernel.h>
#include <linux/types.h>
+bool __init cmdline_has_extra_options(void);
#else /* !__KERNEL__ */
/*
* NOTE: This is only for tools/bootconfig, because tools/bootconfig will
@@ -287,7 +288,12 @@ int __init xbc_init(const char *buf, size_t size, const char **emsg, int *epos);
int __init xbc_get_info(int *node_size, size_t *data_size);
/* XBC cleanup data structures */
-void __init xbc_exit(void);
+void __init _xbc_exit(bool early);
+
+static inline void xbc_exit(void)
+{
+ _xbc_exit(false);
+}
/* XBC embedded bootconfig data in kernel */
#ifdef CONFIG_BOOT_CONFIG_EMBED
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 4f20f62f9d63da..a86da1f38b3c34 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1574,12 +1574,26 @@ struct bpf_link {
enum bpf_link_type type;
const struct bpf_link_ops *ops;
struct bpf_prog *prog;
- struct work_struct work;
+ /* rcu is used before freeing, work can be used to schedule that
+ * RCU-based freeing before that, so they never overlap
+ */
+ union {
+ struct rcu_head rcu;
+ struct work_struct work;
+ };
};
struct bpf_link_ops {
void (*release)(struct bpf_link *link);
+ /* deallocate link resources callback, called without RCU grace period
+ * waiting
+ */
void (*dealloc)(struct bpf_link *link);
+ /* deallocate link resources callback, called after RCU grace period;
+ * if underlying BPF program is sleepable we go through tasks trace
+ * RCU GP and then "classic" RCU GP
+ */
+ void (*dealloc_deferred)(struct bpf_link *link);
int (*detach)(struct bpf_link *link);
int (*update_prog)(struct bpf_link *link, struct bpf_prog *new_prog,
struct bpf_prog *old_prog);
@@ -2230,31 +2244,14 @@ void *bpf_map_kvcalloc(struct bpf_map *map, size_t n, size_t size,
void __percpu *bpf_map_alloc_percpu(const struct bpf_map *map, size_t size,
size_t align, gfp_t flags);
#else
-static inline void *
-bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags,
- int node)
-{
- return kmalloc_node(size, flags, node);
-}
-
-static inline void *
-bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags)
-{
- return kzalloc(size, flags);
-}
-
-static inline void *
-bpf_map_kvcalloc(struct bpf_map *map, size_t n, size_t size, gfp_t flags)
-{
- return kvcalloc(n, size, flags);
-}
-
-static inline void __percpu *
-bpf_map_alloc_percpu(const struct bpf_map *map, size_t size, size_t align,
- gfp_t flags)
-{
- return __alloc_percpu_gfp(size, align, flags);
-}
+#define bpf_map_kmalloc_node(_map, _size, _flags, _node) \
+ kmalloc_node(_size, _flags, _node)
+#define bpf_map_kzalloc(_map, _size, _flags) \
+ kzalloc(_size, _flags)
+#define bpf_map_kvcalloc(_map, _n, _size, _flags) \
+ kvcalloc(_n, _size, _flags)
+#define bpf_map_alloc_percpu(_map, _size, _align, _flags) \
+ __alloc_percpu_gfp(_size, _align, _flags)
#endif
static inline int
diff --git a/include/linux/bpfptr.h b/include/linux/bpfptr.h
index 79b2f78eec1a06..1af241525a17cf 100644
--- a/include/linux/bpfptr.h
+++ b/include/linux/bpfptr.h
@@ -65,9 +65,9 @@ static inline int copy_to_bpfptr_offset(bpfptr_t dst, size_t offset,
return copy_to_sockptr_offset((sockptr_t) dst, offset, src, size);
}
-static inline void *kvmemdup_bpfptr(bpfptr_t src, size_t len)
+static inline void *kvmemdup_bpfptr_noprof(bpfptr_t src, size_t len)
{
- void *p = kvmalloc(len, GFP_USER | __GFP_NOWARN);
+ void *p = kvmalloc_noprof(len, GFP_USER | __GFP_NOWARN);
if (!p)
return ERR_PTR(-ENOMEM);
@@ -77,6 +77,7 @@ static inline void *kvmemdup_bpfptr(bpfptr_t src, size_t len)
}
return p;
}
+#define kvmemdup_bpfptr(...) alloc_hooks(kvmemdup_bpfptr_noprof(__VA_ARGS__))
static inline long strncpy_from_bpfptr(char *dst, bpfptr_t src, size_t count)
{
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index d78454a4dd1f0f..a1c0bdd0cca66b 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -303,12 +303,38 @@ static inline void put_bh(struct buffer_head *bh)
atomic_dec(&bh->b_count);
}
+/**
+ * brelse - Release a buffer.
+ * @bh: The buffer to release.
+ *
+ * Decrement a buffer_head's reference count. If @bh is NULL, this
+ * function is a no-op.
+ *
+ * If all buffers on a folio have zero reference count, are clean
+ * and unlocked, and if the folio is unlocked and not under writeback
+ * then try_to_free_buffers() may strip the buffers from the folio in
+ * preparation for freeing it (sometimes, rarely, buffers are removed
+ * from a folio but it ends up not being freed, and buffers may later
+ * be reattached).
+ *
+ * Context: Any context.
+ */
static inline void brelse(struct buffer_head *bh)
{
if (bh)
__brelse(bh);
}
+/**
+ * bforget - Discard any dirty data in a buffer.
+ * @bh: The buffer to forget.
+ *
+ * Call this function instead of brelse() if the data written to a buffer
+ * no longer needs to be written back. It will clear the buffer's dirty
+ * flag so writeback of this buffer will be skipped.
+ *
+ * Context: Any context.
+ */
static inline void bforget(struct buffer_head *bh)
{
if (bh)
@@ -437,17 +463,21 @@ static inline void bh_readahead_batch(int nr, struct buffer_head *bhs[],
}
/**
- * __bread() - reads a specified block and returns the bh
- * @bdev: the block_device to read from
- * @block: number of block
- * @size: size (in bytes) to read
+ * __bread() - Read a block.
+ * @bdev: The block device to read from.
+ * @block: Block number in units of block size.
+ * @size: The block size of this device in bytes.
*
- * Reads a specified block, and returns buffer head that contains it.
- * The page cache is allocated from movable area so that it can be migrated.
- * It returns NULL if the block was unreadable.
+ * Read a specified block, and return the buffer head that refers
+ * to it. The memory is allocated from the movable area so that it can
+ * be migrated. The returned buffer head has its refcount increased.
+ * The caller should call brelse() when it has finished with the buffer.
+ *
+ * Context: May sleep waiting for I/O.
+ * Return: NULL if the block was unreadable.
*/
-static inline struct buffer_head *
-__bread(struct block_device *bdev, sector_t block, unsigned size)
+static inline struct buffer_head *__bread(struct block_device *bdev,
+ sector_t block, unsigned size)
{
return __bread_gfp(bdev, block, size, __GFP_MOVABLE);
}
diff --git a/include/linux/cc_platform.h b/include/linux/cc_platform.h
index cb0d6cd1c12f24..60693a14589462 100644
--- a/include/linux/cc_platform.h
+++ b/include/linux/cc_platform.h
@@ -90,6 +90,14 @@ enum cc_attr {
* Examples include TDX Guest.
*/
CC_ATTR_HOTPLUG_DISABLED,
+
+ /**
+ * @CC_ATTR_HOST_SEV_SNP: AMD SNP enabled on the host.
+ *
+ * The host kernel is running with the necessary features
+ * enabled to run SEV-SNP guests.
+ */
+ CC_ATTR_HOST_SEV_SNP,
};
#ifdef CONFIG_ARCH_HAS_CC_PLATFORM
@@ -107,10 +115,14 @@ enum cc_attr {
* * FALSE - Specified Confidential Computing attribute is not active
*/
bool cc_platform_has(enum cc_attr attr);
+void cc_platform_set(enum cc_attr attr);
+void cc_platform_clear(enum cc_attr attr);
#else /* !CONFIG_ARCH_HAS_CC_PLATFORM */
static inline bool cc_platform_has(enum cc_attr attr) { return false; }
+static inline void cc_platform_set(enum cc_attr attr) { }
+static inline void cc_platform_clear(enum cc_attr attr) { }
#endif /* CONFIG_ARCH_HAS_CC_PLATFORM */
diff --git a/include/linux/clk.h b/include/linux/clk.h
index 00623f4de5e195..0fa56d67253215 100644
--- a/include/linux/clk.h
+++ b/include/linux/clk.h
@@ -286,6 +286,11 @@ static inline int clk_rate_exclusive_get(struct clk *clk)
return 0;
}
+static inline int devm_clk_rate_exclusive_get(struct device *dev, struct clk *clk)
+{
+ return 0;
+}
+
static inline void clk_rate_exclusive_put(struct clk *clk) {}
#endif
diff --git a/include/linux/codetag.h b/include/linux/codetag.h
new file mode 100644
index 00000000000000..c2a579ccd45588
--- /dev/null
+++ b/include/linux/codetag.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * code tagging framework
+ */
+#ifndef _LINUX_CODETAG_H
+#define _LINUX_CODETAG_H
+
+#include <linux/types.h>
+
+struct codetag_iterator;
+struct codetag_type;
+struct codetag_module;
+struct seq_buf;
+struct module;
+
+/*
+ * An instance of this structure is created in a special ELF section at every
+ * code location being tagged. At runtime, the special section is treated as
+ * an array of these.
+ */
+struct codetag {
+ unsigned int flags; /* used in later patches */
+ unsigned int lineno;
+ const char *modname;
+ const char *function;
+ const char *filename;
+} __aligned(8);
+
+union codetag_ref {
+ struct codetag *ct;
+};
+
+struct codetag_type_desc {
+ const char *section;
+ size_t tag_size;
+ void (*module_load)(struct codetag_type *cttype,
+ struct codetag_module *cmod);
+ bool (*module_unload)(struct codetag_type *cttype,
+ struct codetag_module *cmod);
+};
+
+struct codetag_iterator {
+ struct codetag_type *cttype;
+ struct codetag_module *cmod;
+ unsigned long mod_id;
+ struct codetag *ct;
+};
+
+#ifdef MODULE
+#define CT_MODULE_NAME KBUILD_MODNAME
+#else
+#define CT_MODULE_NAME NULL
+#endif
+
+#define CODE_TAG_INIT { \
+ .modname = CT_MODULE_NAME, \
+ .function = __func__, \
+ .filename = __FILE__, \
+ .lineno = __LINE__, \
+ .flags = 0, \
+}
+
+void codetag_lock_module_list(struct codetag_type *cttype, bool lock);
+bool codetag_trylock_module_list(struct codetag_type *cttype);
+struct codetag_iterator codetag_get_ct_iter(struct codetag_type *cttype);
+struct codetag *codetag_next_ct(struct codetag_iterator *iter);
+
+void codetag_to_text(struct seq_buf *out, struct codetag *ct);
+
+struct codetag_type *
+codetag_register_type(const struct codetag_type_desc *desc);
+
+#if defined(CONFIG_CODE_TAGGING) && defined(CONFIG_MODULES)
+void codetag_load_module(struct module *mod);
+bool codetag_unload_module(struct module *mod);
+#else
+static inline void codetag_load_module(struct module *mod) {}
+static inline bool codetag_unload_module(struct module *mod) { return true; }
+#endif
+
+#endif /* _LINUX_CODETAG_H */
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index c00cc6c0878a1e..8c252e073bd810 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -268,7 +268,7 @@ static inline void *offset_to_ptr(const int *off)
* - When one operand is a null pointer constant (i.e. when x is an integer
* constant expression) and the other is an object pointer (i.e. our
* third operand), the conditional operator returns the type of the
- * object pointer operand (i.e. "int *). Here, within the sizeof(), we
+ * object pointer operand (i.e. "int *"). Here, within the sizeof(), we
* would then get:
* sizeof(*((int *)(...)) == sizeof(int) == 4
* - When one operand is a void pointer (i.e. when x is not an integer
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index 2abaa3a825a9ec..8f8236317d5b15 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -278,6 +278,17 @@ struct ftrace_likely_data {
# define __no_kcsan
#endif
+#ifdef __SANITIZE_MEMORY__
+/*
+ * Similarly to KASAN and KCSAN, KMSAN loses function attributes of inlined
+ * functions, therefore disabling KMSAN checks also requires disabling inlining.
+ *
+ * __no_sanitize_or_inline effectively prevents KMSAN from reporting errors
+ * within the function and marks all its outputs as initialized.
+ */
+# define __no_sanitize_or_inline __no_kmsan_checks notrace __maybe_unused
+#endif
+
#ifndef __no_sanitize_or_inline
#define __no_sanitize_or_inline __always_inline
#endif
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 272e4e79e15c48..861c3bfc5f1700 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -221,7 +221,18 @@ void cpuhp_report_idle_dead(void);
static inline void cpuhp_report_idle_dead(void) { }
#endif /* #ifdef CONFIG_HOTPLUG_CPU */
+#ifdef CONFIG_CPU_MITIGATIONS
extern bool cpu_mitigations_off(void);
extern bool cpu_mitigations_auto_nosmt(void);
+#else
+static inline bool cpu_mitigations_off(void)
+{
+ return true;
+}
+static inline bool cpu_mitigations_auto_nosmt(void)
+{
+ return false;
+}
+#endif
#endif /* _LINUX_CPU_H_ */
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 1c29947db84899..2b1bb52ea3df2e 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -389,6 +389,29 @@ unsigned int cpumask_any_but(const struct cpumask *mask, unsigned int cpu)
}
/**
+ * cpumask_any_and_but - pick a "random" cpu from *mask1 & *mask2, but not this one.
+ * @mask1: the first input cpumask
+ * @mask2: the second input cpumask
+ * @cpu: the cpu to ignore
+ *
+ * Returns >= nr_cpu_ids if no cpus set.
+ */
+static inline
+unsigned int cpumask_any_and_but(const struct cpumask *mask1,
+ const struct cpumask *mask2,
+ unsigned int cpu)
+{
+ unsigned int i;
+
+ cpumask_check(cpu);
+ i = cpumask_first_and(mask1, mask2);
+ if (i != cpu)
+ return i;
+
+ return cpumask_next_and(cpu, mask1, mask2);
+}
+
+/**
* cpumask_nth - get the Nth cpu in a cpumask
* @srcp: the cpumask pointer
* @cpu: the Nth cpu to find, starting from 0
@@ -1017,11 +1040,6 @@ void init_cpu_present(const struct cpumask *src);
void init_cpu_possible(const struct cpumask *src);
void init_cpu_online(const struct cpumask *src);
-static inline void reset_cpu_possible_mask(void)
-{
- bitmap_zero(cpumask_bits(&__cpu_possible_mask), NR_CPUS);
-}
-
static inline void
set_cpu_possible(unsigned int cpu, bool possible)
{
diff --git a/include/linux/damon.h b/include/linux/damon.h
index 886d07294f4e7c..f7da65e1ac041e 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -297,6 +297,7 @@ struct damos_stat {
* enum damos_filter_type - Type of memory for &struct damos_filter
* @DAMOS_FILTER_TYPE_ANON: Anonymous pages.
* @DAMOS_FILTER_TYPE_MEMCG: Specific memcg's pages.
+ * @DAMOS_FILTER_TYPE_YOUNG: Recently accessed pages.
* @DAMOS_FILTER_TYPE_ADDR: Address range.
* @DAMOS_FILTER_TYPE_TARGET: Data Access Monitoring target.
* @NR_DAMOS_FILTER_TYPES: Number of filter types.
@@ -315,6 +316,7 @@ struct damos_stat {
enum damos_filter_type {
DAMOS_FILTER_TYPE_ANON,
DAMOS_FILTER_TYPE_MEMCG,
+ DAMOS_FILTER_TYPE_YOUNG,
DAMOS_FILTER_TYPE_ADDR,
DAMOS_FILTER_TYPE_TARGET,
NR_DAMOS_FILTER_TYPES,
diff --git a/include/linux/device.h b/include/linux/device.h
index 97c4b046c09d94..b9f5464f44ed81 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -1247,6 +1247,7 @@ void device_link_del(struct device_link *link);
void device_link_remove(void *consumer, struct device *supplier);
void device_links_supplier_sync_state_pause(void);
void device_links_supplier_sync_state_resume(void);
+void device_link_wait_removal(void);
/* Create alias, so I can be autoloaded. */
#define MODULE_ALIAS_CHARDEV(major,minor) \
diff --git a/include/linux/dma-fence-chain.h b/include/linux/dma-fence-chain.h
index 4bdf0b96da2831..ad9e2506c2f401 100644
--- a/include/linux/dma-fence-chain.h
+++ b/include/linux/dma-fence-chain.h
@@ -86,10 +86,8 @@ dma_fence_chain_contained(struct dma_fence *fence)
*
* Returns a new struct dma_fence_chain object or NULL on failure.
*/
-static inline struct dma_fence_chain *dma_fence_chain_alloc(void)
-{
- return kmalloc(sizeof(struct dma_fence_chain), GFP_KERNEL);
-};
+#define dma_fence_chain_alloc() \
+ ((struct dma_fence_chain *)kmalloc(sizeof(struct dma_fence_chain), GFP_KERNEL))
/**
* dma_fence_chain_free
diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
index e06bad467f55ef..c3f9bb6602ba21 100644
--- a/include/linux/dma-fence.h
+++ b/include/linux/dma-fence.h
@@ -682,4 +682,11 @@ static inline bool dma_fence_is_container(struct dma_fence *fence)
return dma_fence_is_array(fence) || dma_fence_is_chain(fence);
}
+#define DMA_FENCE_WARN(f, fmt, args...) \
+ do { \
+ struct dma_fence *__ff = (f); \
+ pr_warn("f %llu#%llu: " fmt, __ff->context, __ff->seqno,\
+ ##args); \
+ } while (0)
+
#endif /* __LINUX_DMA_FENCE_H */
diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h
index 4abc60f0420928..9ee319851b5f63 100644
--- a/include/linux/dma-map-ops.h
+++ b/include/linux/dma-map-ops.h
@@ -29,7 +29,7 @@ struct dma_map_ops {
unsigned long attrs);
void (*free)(struct device *dev, size_t size, void *vaddr,
dma_addr_t dma_handle, unsigned long attrs);
- struct page *(*alloc_pages)(struct device *dev, size_t size,
+ struct page *(*alloc_pages_op)(struct device *dev, size_t size,
dma_addr_t *dma_handle, enum dma_data_direction dir,
gfp_t gfp);
void (*free_pages)(struct device *dev, size_t size, struct page *vaddr,
diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h
index 770755df852f14..70cd7258cd29f5 100644
--- a/include/linux/energy_model.h
+++ b/include/linux/energy_model.h
@@ -245,7 +245,6 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd,
* max utilization to the allowed CPU capacity before calculating
* effective performance.
*/
- max_util = map_util_perf(max_util);
max_util = min(max_util, allowed_cpu_cap);
/*
diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index 224645f17c333b..297231854ada51 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -608,6 +608,31 @@ static inline void eth_hw_addr_gen(struct net_device *dev, const u8 *base_addr,
}
/**
+ * eth_skb_pkt_type - Assign packet type if destination address does not match
+ * @skb: Assigned a packet type if address does not match @dev address
+ * @dev: Network device used to compare packet address against
+ *
+ * If the destination MAC address of the packet does not match the network
+ * device address, assign an appropriate packet type.
+ */
+static inline void eth_skb_pkt_type(struct sk_buff *skb,
+ const struct net_device *dev)
+{
+ const struct ethhdr *eth = eth_hdr(skb);
+
+ if (unlikely(!ether_addr_equal_64bits(eth->h_dest, dev->dev_addr))) {
+ if (unlikely(is_multicast_ether_addr_64bits(eth->h_dest))) {
+ if (ether_addr_equal_64bits(eth->h_dest, dev->broadcast))
+ skb->pkt_type = PACKET_BROADCAST;
+ else
+ skb->pkt_type = PACKET_MULTICAST;
+ } else {
+ skb->pkt_type = PACKET_OTHERHOST;
+ }
+ }
+}
+
+/**
* eth_skb_pad - Pad buffer to mininum number of octets for Ethernet frame
* @skb: Buffer to pad
*
diff --git a/include/linux/filter.h b/include/linux/filter.h
index c99bc3df2d28e3..219ee7a7687443 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -963,6 +963,7 @@ bool bpf_jit_supports_far_kfunc_call(void);
bool bpf_jit_supports_exceptions(void);
bool bpf_jit_supports_ptr_xchg(void);
bool bpf_jit_supports_arena(void);
+u64 bpf_arch_uaddress_limit(void);
void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie);
bool bpf_helper_changes_pkt_data(void *func);
diff --git a/include/linux/firmware/qcom/qcom_qseecom.h b/include/linux/firmware/qcom/qcom_qseecom.h
index 5c28298a98bec8..366243ee96096d 100644
--- a/include/linux/firmware/qcom/qcom_qseecom.h
+++ b/include/linux/firmware/qcom/qcom_qseecom.h
@@ -10,6 +10,7 @@
#define __QCOM_QSEECOM_H
#include <linux/auxiliary_bus.h>
+#include <linux/dma-mapping.h>
#include <linux/types.h>
#include <linux/firmware/qcom/qcom_scm.h>
@@ -25,11 +26,56 @@ struct qseecom_client {
};
/**
+ * qseecom_scm_dev() - Get the SCM device associated with the QSEECOM client.
+ * @client: The QSEECOM client device.
+ *
+ * Returns the SCM device under which the provided QSEECOM client device
+ * operates. This function is intended to be used for DMA allocations.
+ */
+static inline struct device *qseecom_scm_dev(struct qseecom_client *client)
+{
+ return client->aux_dev.dev.parent->parent;
+}
+
+/**
+ * qseecom_dma_alloc() - Allocate DMA memory for a QSEECOM client.
+ * @client: The QSEECOM client to allocate the memory for.
+ * @size: The number of bytes to allocate.
+ * @dma_handle: Pointer to where the DMA address should be stored.
+ * @gfp: Allocation flags.
+ *
+ * Wrapper function for dma_alloc_coherent(), allocating DMA memory usable for
+ * TZ/QSEECOM communication. Refer to dma_alloc_coherent() for details.
+ */
+static inline void *qseecom_dma_alloc(struct qseecom_client *client, size_t size,
+ dma_addr_t *dma_handle, gfp_t gfp)
+{
+ return dma_alloc_coherent(qseecom_scm_dev(client), size, dma_handle, gfp);
+}
+
+/**
+ * dma_free_coherent() - Free QSEECOM DMA memory.
+ * @client: The QSEECOM client for which the memory has been allocated.
+ * @size: The number of bytes allocated.
+ * @cpu_addr: Virtual memory address to free.
+ * @dma_handle: DMA memory address to free.
+ *
+ * Wrapper function for dma_free_coherent(), freeing memory previously
+ * allocated with qseecom_dma_alloc(). Refer to dma_free_coherent() for
+ * details.
+ */
+static inline void qseecom_dma_free(struct qseecom_client *client, size_t size,
+ void *cpu_addr, dma_addr_t dma_handle)
+{
+ return dma_free_coherent(qseecom_scm_dev(client), size, cpu_addr, dma_handle);
+}
+
+/**
* qcom_qseecom_app_send() - Send to and receive data from a given QSEE app.
* @client: The QSEECOM client associated with the target app.
- * @req: Request buffer sent to the app (must be DMA-mappable).
+ * @req: DMA address of the request buffer sent to the app.
* @req_size: Size of the request buffer.
- * @rsp: Response buffer, written to by the app (must be DMA-mappable).
+ * @rsp: DMA address of the response buffer, written to by the app.
* @rsp_size: Size of the response buffer.
*
* Sends a request to the QSEE app associated with the given client and read
@@ -43,8 +89,9 @@ struct qseecom_client {
*
* Return: Zero on success, nonzero on failure.
*/
-static inline int qcom_qseecom_app_send(struct qseecom_client *client, void *req, size_t req_size,
- void *rsp, size_t rsp_size)
+static inline int qcom_qseecom_app_send(struct qseecom_client *client,
+ dma_addr_t req, size_t req_size,
+ dma_addr_t rsp, size_t rsp_size)
{
return qcom_scm_qseecom_app_send(client->app_id, req, req_size, rsp, rsp_size);
}
diff --git a/include/linux/firmware/qcom/qcom_scm.h b/include/linux/firmware/qcom/qcom_scm.h
index ccaf288460546a..aaa19f93ac4306 100644
--- a/include/linux/firmware/qcom/qcom_scm.h
+++ b/include/linux/firmware/qcom/qcom_scm.h
@@ -118,8 +118,8 @@ bool qcom_scm_lmh_dcvsh_available(void);
#ifdef CONFIG_QCOM_QSEECOM
int qcom_scm_qseecom_app_get_id(const char *app_name, u32 *app_id);
-int qcom_scm_qseecom_app_send(u32 app_id, void *req, size_t req_size, void *rsp,
- size_t rsp_size);
+int qcom_scm_qseecom_app_send(u32 app_id, dma_addr_t req, size_t req_size,
+ dma_addr_t rsp, size_t rsp_size);
#else /* CONFIG_QCOM_QSEECOM */
@@ -128,9 +128,9 @@ static inline int qcom_scm_qseecom_app_get_id(const char *app_name, u32 *app_id)
return -EINVAL;
}
-static inline int qcom_scm_qseecom_app_send(u32 app_id, void *req,
- size_t req_size, void *rsp,
- size_t rsp_size)
+static inline int qcom_scm_qseecom_app_send(u32 app_id,
+ dma_addr_t req, size_t req_size,
+ dma_addr_t rsp, size_t rsp_size)
{
return -EINVAL;
}
diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h
index 6aeebe0a677704..b24d62bad0b329 100644
--- a/include/linux/fortify-string.h
+++ b/include/linux/fortify-string.h
@@ -725,9 +725,9 @@ __FORTIFY_INLINE void *memchr_inv(const void * const POS0 p, int c, size_t size)
return __real_memchr_inv(p, c, size);
}
-extern void *__real_kmemdup(const void *src, size_t len, gfp_t gfp) __RENAME(kmemdup)
+extern void *__real_kmemdup(const void *src, size_t len, gfp_t gfp) __RENAME(kmemdup_noprof)
__realloc_size(2);
-__FORTIFY_INLINE void *kmemdup(const void * const POS0 p, size_t size, gfp_t gfp)
+__FORTIFY_INLINE void *kmemdup_noprof(const void * const POS0 p, size_t size, gfp_t gfp)
{
const size_t p_size = __struct_size(p);
@@ -737,6 +737,7 @@ __FORTIFY_INLINE void *kmemdup(const void * const POS0 p, size_t size, gfp_t gfp
fortify_panic(FORTIFY_FUNC_kmemdup, FORTIFY_READ, p_size, size, NULL);
return __real_kmemdup(p, size, gfp);
}
+#define kmemdup(...) alloc_hooks(kmemdup_noprof(__VA_ARGS__))
/**
* strcpy - Copy a string into another string buffer
diff --git a/include/linux/fpu.h b/include/linux/fpu.h
new file mode 100644
index 00000000000000..2fb63e22913bcf
--- /dev/null
+++ b/include/linux/fpu.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _LINUX_FPU_H
+#define _LINUX_FPU_H
+
+#ifdef _LINUX_FPU_COMPILATION_UNIT
+#error FP code must be compiled separately. See Documentation/core-api/floating-point.rst.
+#endif
+
+#include <asm/fpu.h>
+
+#endif
diff --git a/include/linux/framer/framer.h b/include/linux/framer/framer.h
index 9a9b88962c296f..2b85fe9e7f9a76 100644
--- a/include/linux/framer/framer.h
+++ b/include/linux/framer/framer.h
@@ -181,12 +181,12 @@ static inline int framer_notifier_unregister(struct framer *framer,
return -ENOSYS;
}
-struct framer *framer_get(struct device *dev, const char *con_id)
+static inline struct framer *framer_get(struct device *dev, const char *con_id)
{
return ERR_PTR(-ENOSYS);
}
-void framer_put(struct device *dev, struct framer *framer)
+static inline void framer_put(struct device *dev, struct framer *framer)
{
}
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 00fc429b0af0fb..4f2e2e5b6ab114 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -121,6 +121,8 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
#define FMODE_PWRITE ((__force fmode_t)0x10)
/* File is opened for execution with sys_execve / sys_uselib */
#define FMODE_EXEC ((__force fmode_t)0x20)
+/* File writes are restricted (block device specific) */
+#define FMODE_WRITE_RESTRICTED ((__force fmode_t)0x40)
/* 32bit hashes as llseek() offset (for directories) */
#define FMODE_32BITHASH ((__force fmode_t)0x200)
/* 64bit hashes as llseek() offset (for directories) */
@@ -3083,11 +3085,7 @@ int setattr_should_drop_sgid(struct mnt_idmap *idmap,
* This must be used for allocating filesystems specific inodes to set
* up the inode reclaim context correctly.
*/
-static inline void *
-alloc_inode_sb(struct super_block *sb, struct kmem_cache *cache, gfp_t gfp)
-{
- return kmem_cache_alloc_lru(cache, &sb->s_inode_lru, gfp);
-}
+#define alloc_inode_sb(_sb, _cache, _gfp) kmem_cache_alloc_lru(_cache, &_sb->s_inode_lru, _gfp)
extern void __insert_inode_hash(struct inode *, unsigned long hashval);
static inline void insert_inode_hash(struct inode *inode)
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index c775ea3c601558..450c2cbcf04b8d 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -6,6 +6,8 @@
#include <linux/mmzone.h>
#include <linux/topology.h>
+#include <linux/alloc_tag.h>
+#include <linux/sched.h>
struct vm_area_struct;
struct mempolicy;
@@ -175,42 +177,46 @@ static inline void arch_free_page(struct page *page, int order) { }
static inline void arch_alloc_page(struct page *page, int order) { }
#endif
-struct page *__alloc_pages(gfp_t gfp, unsigned int order, int preferred_nid,
+struct page *__alloc_pages_noprof(gfp_t gfp, unsigned int order, int preferred_nid,
nodemask_t *nodemask);
-struct folio *__folio_alloc(gfp_t gfp, unsigned int order, int preferred_nid,
+#define __alloc_pages(...) alloc_hooks(__alloc_pages_noprof(__VA_ARGS__))
+
+struct folio *__folio_alloc_noprof(gfp_t gfp, unsigned int order, int preferred_nid,
nodemask_t *nodemask);
+#define __folio_alloc(...) alloc_hooks(__folio_alloc_noprof(__VA_ARGS__))
-unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid,
+unsigned long alloc_pages_bulk_noprof(gfp_t gfp, int preferred_nid,
nodemask_t *nodemask, int nr_pages,
struct list_head *page_list,
struct page **page_array);
+#define __alloc_pages_bulk(...) alloc_hooks(alloc_pages_bulk_noprof(__VA_ARGS__))
-unsigned long alloc_pages_bulk_array_mempolicy(gfp_t gfp,
+unsigned long alloc_pages_bulk_array_mempolicy_noprof(gfp_t gfp,
unsigned long nr_pages,
struct page **page_array);
+#define alloc_pages_bulk_array_mempolicy(...) \
+ alloc_hooks(alloc_pages_bulk_array_mempolicy_noprof(__VA_ARGS__))
/* Bulk allocate order-0 pages */
-static inline unsigned long
-alloc_pages_bulk_list(gfp_t gfp, unsigned long nr_pages, struct list_head *list)
-{
- return __alloc_pages_bulk(gfp, numa_mem_id(), NULL, nr_pages, list, NULL);
-}
+#define alloc_pages_bulk_list(_gfp, _nr_pages, _list) \
+ __alloc_pages_bulk(_gfp, numa_mem_id(), NULL, _nr_pages, _list, NULL)
-static inline unsigned long
-alloc_pages_bulk_array(gfp_t gfp, unsigned long nr_pages, struct page **page_array)
-{
- return __alloc_pages_bulk(gfp, numa_mem_id(), NULL, nr_pages, NULL, page_array);
-}
+#define alloc_pages_bulk_array(_gfp, _nr_pages, _page_array) \
+ __alloc_pages_bulk(_gfp, numa_mem_id(), NULL, _nr_pages, NULL, _page_array)
static inline unsigned long
-alloc_pages_bulk_array_node(gfp_t gfp, int nid, unsigned long nr_pages, struct page **page_array)
+alloc_pages_bulk_array_node_noprof(gfp_t gfp, int nid, unsigned long nr_pages,
+ struct page **page_array)
{
if (nid == NUMA_NO_NODE)
nid = numa_mem_id();
- return __alloc_pages_bulk(gfp, nid, NULL, nr_pages, NULL, page_array);
+ return alloc_pages_bulk_noprof(gfp, nid, NULL, nr_pages, NULL, page_array);
}
+#define alloc_pages_bulk_array_node(...) \
+ alloc_hooks(alloc_pages_bulk_array_node_noprof(__VA_ARGS__))
+
static inline void warn_if_node_offline(int this_node, gfp_t gfp_mask)
{
gfp_t warn_gfp = gfp_mask & (__GFP_THISNODE|__GFP_NOWARN);
@@ -230,82 +236,104 @@ static inline void warn_if_node_offline(int this_node, gfp_t gfp_mask)
* online. For more general interface, see alloc_pages_node().
*/
static inline struct page *
-__alloc_pages_node(int nid, gfp_t gfp_mask, unsigned int order)
+__alloc_pages_node_noprof(int nid, gfp_t gfp_mask, unsigned int order)
{
VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES);
warn_if_node_offline(nid, gfp_mask);
- return __alloc_pages(gfp_mask, order, nid, NULL);
+ return __alloc_pages_noprof(gfp_mask, order, nid, NULL);
}
+#define __alloc_pages_node(...) alloc_hooks(__alloc_pages_node_noprof(__VA_ARGS__))
+
static inline
-struct folio *__folio_alloc_node(gfp_t gfp, unsigned int order, int nid)
+struct folio *__folio_alloc_node_noprof(gfp_t gfp, unsigned int order, int nid)
{
VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES);
warn_if_node_offline(nid, gfp);
- return __folio_alloc(gfp, order, nid, NULL);
+ return __folio_alloc_noprof(gfp, order, nid, NULL);
}
+#define __folio_alloc_node(...) alloc_hooks(__folio_alloc_node_noprof(__VA_ARGS__))
+
/*
* Allocate pages, preferring the node given as nid. When nid == NUMA_NO_NODE,
* prefer the current CPU's closest node. Otherwise node must be valid and
* online.
*/
-static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask,
- unsigned int order)
+static inline struct page *alloc_pages_node_noprof(int nid, gfp_t gfp_mask,
+ unsigned int order)
{
if (nid == NUMA_NO_NODE)
nid = numa_mem_id();
- return __alloc_pages_node(nid, gfp_mask, order);
+ return __alloc_pages_node_noprof(nid, gfp_mask, order);
}
+#define alloc_pages_node(...) alloc_hooks(alloc_pages_node_noprof(__VA_ARGS__))
+
#ifdef CONFIG_NUMA
-struct page *alloc_pages(gfp_t gfp, unsigned int order);
-struct page *alloc_pages_mpol(gfp_t gfp, unsigned int order,
+struct page *alloc_pages_noprof(gfp_t gfp, unsigned int order);
+struct page *alloc_pages_mpol_noprof(gfp_t gfp, unsigned int order,
struct mempolicy *mpol, pgoff_t ilx, int nid);
-struct folio *folio_alloc(gfp_t gfp, unsigned int order);
-struct folio *vma_alloc_folio(gfp_t gfp, int order, struct vm_area_struct *vma,
+struct folio *folio_alloc_noprof(gfp_t gfp, unsigned int order);
+struct folio *vma_alloc_folio_noprof(gfp_t gfp, int order, struct vm_area_struct *vma,
unsigned long addr, bool hugepage);
#else
-static inline struct page *alloc_pages(gfp_t gfp_mask, unsigned int order)
+static inline struct page *alloc_pages_noprof(gfp_t gfp_mask, unsigned int order)
{
- return alloc_pages_node(numa_node_id(), gfp_mask, order);
+ return alloc_pages_node_noprof(numa_node_id(), gfp_mask, order);
}
-static inline struct page *alloc_pages_mpol(gfp_t gfp, unsigned int order,
+static inline struct page *alloc_pages_mpol_noprof(gfp_t gfp, unsigned int order,
struct mempolicy *mpol, pgoff_t ilx, int nid)
{
- return alloc_pages(gfp, order);
+ return alloc_pages_noprof(gfp, order);
}
-static inline struct folio *folio_alloc(gfp_t gfp, unsigned int order)
+static inline struct folio *folio_alloc_noprof(gfp_t gfp, unsigned int order)
{
return __folio_alloc_node(gfp, order, numa_node_id());
}
-#define vma_alloc_folio(gfp, order, vma, addr, hugepage) \
- folio_alloc(gfp, order)
+#define vma_alloc_folio_noprof(gfp, order, vma, addr, hugepage) \
+ folio_alloc_noprof(gfp, order)
#endif
+
+#define alloc_pages(...) alloc_hooks(alloc_pages_noprof(__VA_ARGS__))
+#define alloc_pages_mpol(...) alloc_hooks(alloc_pages_mpol_noprof(__VA_ARGS__))
+#define folio_alloc(...) alloc_hooks(folio_alloc_noprof(__VA_ARGS__))
+#define vma_alloc_folio(...) alloc_hooks(vma_alloc_folio_noprof(__VA_ARGS__))
+
#define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
-static inline struct page *alloc_page_vma(gfp_t gfp,
+
+static inline struct page *alloc_page_vma_noprof(gfp_t gfp,
struct vm_area_struct *vma, unsigned long addr)
{
- struct folio *folio = vma_alloc_folio(gfp, 0, vma, addr, false);
+ struct folio *folio = vma_alloc_folio_noprof(gfp, 0, vma, addr, false);
return &folio->page;
}
+#define alloc_page_vma(...) alloc_hooks(alloc_page_vma_noprof(__VA_ARGS__))
+
+extern unsigned long get_free_pages_noprof(gfp_t gfp_mask, unsigned int order);
+#define __get_free_pages(...) alloc_hooks(get_free_pages_noprof(__VA_ARGS__))
-extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order);
-extern unsigned long get_zeroed_page(gfp_t gfp_mask);
+extern unsigned long get_zeroed_page_noprof(gfp_t gfp_mask);
+#define get_zeroed_page(...) alloc_hooks(get_zeroed_page_noprof(__VA_ARGS__))
+
+void *alloc_pages_exact_noprof(size_t size, gfp_t gfp_mask) __alloc_size(1);
+#define alloc_pages_exact(...) alloc_hooks(alloc_pages_exact_noprof(__VA_ARGS__))
-void *alloc_pages_exact(size_t size, gfp_t gfp_mask) __alloc_size(1);
void free_pages_exact(void *virt, size_t size);
-__meminit void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask) __alloc_size(2);
-#define __get_free_page(gfp_mask) \
- __get_free_pages((gfp_mask), 0)
+__meminit void *alloc_pages_exact_nid_noprof(int nid, size_t size, gfp_t gfp_mask) __alloc_size(2);
+#define alloc_pages_exact_nid(...) \
+ alloc_hooks(alloc_pages_exact_nid_noprof(__VA_ARGS__))
+
+#define __get_free_page(gfp_mask) \
+ __get_free_pages((gfp_mask), 0)
-#define __get_dma_pages(gfp_mask, order) \
- __get_free_pages((gfp_mask) | GFP_DMA, (order))
+#define __get_dma_pages(gfp_mask, order) \
+ __get_free_pages((gfp_mask) | GFP_DMA, (order))
extern void __free_pages(struct page *page, unsigned int order);
extern void free_pages(unsigned long addr, unsigned int order);
@@ -374,10 +402,14 @@ extern gfp_t vma_thp_gfp_mask(struct vm_area_struct *vma);
#ifdef CONFIG_CONTIG_ALLOC
/* The below functions must be run on a range from a single zone. */
-extern int alloc_contig_range(unsigned long start, unsigned long end,
+extern int alloc_contig_range_noprof(unsigned long start, unsigned long end,
unsigned migratetype, gfp_t gfp_mask);
-extern struct page *alloc_contig_pages(unsigned long nr_pages, gfp_t gfp_mask,
- int nid, nodemask_t *nodemask);
+#define alloc_contig_range(...) alloc_hooks(alloc_contig_range_noprof(__VA_ARGS__))
+
+extern struct page *alloc_contig_pages_noprof(unsigned long nr_pages, gfp_t gfp_mask,
+ int nid, nodemask_t *nodemask);
+#define alloc_contig_pages(...) alloc_hooks(alloc_contig_pages_noprof(__VA_ARGS__))
+
#endif
void free_contig_range(unsigned long pfn, unsigned long nr_pages);
diff --git a/include/linux/gfp_types.h b/include/linux/gfp_types.h
index 868c8fb1bbc1c2..313be4ad79fd97 100644
--- a/include/linux/gfp_types.h
+++ b/include/linux/gfp_types.h
@@ -2,6 +2,8 @@
#ifndef __LINUX_GFP_TYPES_H
#define __LINUX_GFP_TYPES_H
+#include <linux/bits.h>
+
/* The typedef is in types.h but we want the documentation here */
#if 0
/**
@@ -53,6 +55,9 @@ enum {
#ifdef CONFIG_LOCKDEP
___GFP_NOLOCKDEP_BIT,
#endif
+#ifdef CONFIG_SLAB_OBJ_EXT
+ ___GFP_NO_OBJ_EXT_BIT,
+#endif
___GFP_LAST_BIT
};
@@ -93,6 +98,11 @@ enum {
#else
#define ___GFP_NOLOCKDEP 0
#endif
+#ifdef CONFIG_SLAB_OBJ_EXT
+#define ___GFP_NO_OBJ_EXT BIT(___GFP_NO_OBJ_EXT_BIT)
+#else
+#define ___GFP_NO_OBJ_EXT 0
+#endif
/*
* Physical address zone modifiers (see linux/mmzone.h - low four bits)
@@ -133,12 +143,15 @@ enum {
* node with no fallbacks or placement policy enforcements.
*
* %__GFP_ACCOUNT causes the allocation to be accounted to kmemcg.
+ *
+ * %__GFP_NO_OBJ_EXT causes slab allocation to have no object extension.
*/
#define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE)
#define __GFP_WRITE ((__force gfp_t)___GFP_WRITE)
#define __GFP_HARDWALL ((__force gfp_t)___GFP_HARDWALL)
#define __GFP_THISNODE ((__force gfp_t)___GFP_THISNODE)
#define __GFP_ACCOUNT ((__force gfp_t)___GFP_ACCOUNT)
+#define __GFP_NO_OBJ_EXT ((__force gfp_t)___GFP_NO_OBJ_EXT)
/**
* DOC: Watermark modifiers
diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index dc75f802e28476..f8617eaf08bad7 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -646,8 +646,6 @@ int devm_gpiochip_add_data_with_key(struct device *dev, struct gpio_chip *gc,
struct gpio_device *gpio_device_find(const void *data,
int (*match)(struct gpio_chip *gc,
const void *data));
-struct gpio_device *gpio_device_find_by_label(const char *label);
-struct gpio_device *gpio_device_find_by_fwnode(const struct fwnode_handle *fwnode);
struct gpio_device *gpio_device_get(struct gpio_device *gdev);
void gpio_device_put(struct gpio_device *gdev);
@@ -814,6 +812,9 @@ struct gpio_device *gpiod_to_gpio_device(struct gpio_desc *desc);
int gpio_device_get_base(struct gpio_device *gdev);
const char *gpio_device_get_label(struct gpio_device *gdev);
+struct gpio_device *gpio_device_find_by_label(const char *label);
+struct gpio_device *gpio_device_find_by_fwnode(const struct fwnode_handle *fwnode);
+
#else /* CONFIG_GPIOLIB */
#include <asm/bug.h>
@@ -843,6 +844,18 @@ static inline const char *gpio_device_get_label(struct gpio_device *gdev)
return NULL;
}
+static inline struct gpio_device *gpio_device_find_by_label(const char *label)
+{
+ WARN_ON(1);
+ return NULL;
+}
+
+static inline struct gpio_device *gpio_device_find_by_fwnode(const struct fwnode_handle *fwnode)
+{
+ WARN_ON(1);
+ return NULL;
+}
+
static inline int gpiochip_lock_as_irq(struct gpio_chip *gc,
unsigned int offset)
{
diff --git a/include/linux/gpio/property.h b/include/linux/gpio/property.h
index 6c75c8bd44a0bb..1a14e239221f7e 100644
--- a/include/linux/gpio/property.h
+++ b/include/linux/gpio/property.h
@@ -2,7 +2,6 @@
#ifndef __LINUX_GPIO_PROPERTY_H
#define __LINUX_GPIO_PROPERTY_H
-#include <dt-bindings/gpio/gpio.h> /* for GPIO_* flags */
#include <linux/property.h>
#define PROPERTY_ENTRY_GPIO(_name_, _chip_node_, _idx_, _flags_) \
diff --git a/include/linux/hid_bpf.h b/include/linux/hid_bpf.h
index 7118ac28d46879..ca70eeb6393e7f 100644
--- a/include/linux/hid_bpf.h
+++ b/include/linux/hid_bpf.h
@@ -149,10 +149,8 @@ static inline int hid_bpf_connect_device(struct hid_device *hdev) { return 0; }
static inline void hid_bpf_disconnect_device(struct hid_device *hdev) {}
static inline void hid_bpf_destroy_device(struct hid_device *hid) {}
static inline void hid_bpf_device_init(struct hid_device *hid) {}
-static inline u8 *call_hid_bpf_rdesc_fixup(struct hid_device *hdev, u8 *rdesc, unsigned int *size)
-{
- return kmemdup(rdesc, *size, GFP_KERNEL);
-}
+#define call_hid_bpf_rdesc_fixup(_hdev, _rdesc, _size) \
+ ((u8 *)kmemdup(_rdesc, *(_size), GFP_KERNEL))
#endif /* CONFIG_HID_BPF */
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index de0c8910507690..c8d3ec116e291b 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -64,9 +64,6 @@ ssize_t single_hugepage_flag_show(struct kobject *kobj,
enum transparent_hugepage_flag flag);
extern struct kobj_attribute shmem_enabled_attr;
-#define HPAGE_PMD_ORDER (HPAGE_PMD_SHIFT-PAGE_SHIFT)
-#define HPAGE_PMD_NR (1<<HPAGE_PMD_ORDER)
-
/*
* Mask of all large folio orders supported for anonymous THP; all orders up to
* and including PMD_ORDER, except order-0 (which is not "huge") and order-1
@@ -84,17 +81,32 @@ extern struct kobj_attribute shmem_enabled_attr;
*/
#define THP_ORDERS_ALL (THP_ORDERS_ALL_ANON | THP_ORDERS_ALL_FILE)
-#define thp_vma_allowable_order(vma, vm_flags, smaps, in_pf, enforce_sysfs, order) \
- (!!thp_vma_allowable_orders(vma, vm_flags, smaps, in_pf, enforce_sysfs, BIT(order)))
+#define TVA_SMAPS (1 << 0) /* Will be used for procfs */
+#define TVA_IN_PF (1 << 1) /* Page fault handler */
+#define TVA_ENFORCE_SYSFS (1 << 2) /* Obey sysfs configuration */
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define thp_vma_allowable_order(vma, vm_flags, tva_flags, order) \
+ (!!thp_vma_allowable_orders(vma, vm_flags, tva_flags, BIT(order)))
+
+#ifdef CONFIG_PGTABLE_HAS_HUGE_LEAVES
#define HPAGE_PMD_SHIFT PMD_SHIFT
-#define HPAGE_PMD_SIZE ((1UL) << HPAGE_PMD_SHIFT)
+#define HPAGE_PUD_SHIFT PUD_SHIFT
+#else
+#define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; })
+#define HPAGE_PUD_SHIFT ({ BUILD_BUG(); 0; })
+#endif
+
+#define HPAGE_PMD_ORDER (HPAGE_PMD_SHIFT-PAGE_SHIFT)
+#define HPAGE_PMD_NR (1<<HPAGE_PMD_ORDER)
#define HPAGE_PMD_MASK (~(HPAGE_PMD_SIZE - 1))
+#define HPAGE_PMD_SIZE ((1UL) << HPAGE_PMD_SHIFT)
-#define HPAGE_PUD_SHIFT PUD_SHIFT
-#define HPAGE_PUD_SIZE ((1UL) << HPAGE_PUD_SHIFT)
+#define HPAGE_PUD_ORDER (HPAGE_PUD_SHIFT-PAGE_SHIFT)
+#define HPAGE_PUD_NR (1<<HPAGE_PUD_ORDER)
#define HPAGE_PUD_MASK (~(HPAGE_PUD_SIZE - 1))
+#define HPAGE_PUD_SIZE ((1UL) << HPAGE_PUD_SHIFT)
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
extern unsigned long transparent_hugepage_flags;
extern unsigned long huge_anon_orders_always;
@@ -210,17 +222,15 @@ static inline bool file_thp_enabled(struct vm_area_struct *vma)
}
unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma,
- unsigned long vm_flags, bool smaps,
- bool in_pf, bool enforce_sysfs,
+ unsigned long vm_flags,
+ unsigned long tva_flags,
unsigned long orders);
/**
* thp_vma_allowable_orders - determine hugepage orders that are allowed for vma
* @vma: the vm area to check
* @vm_flags: use these vm_flags instead of vma->vm_flags
- * @smaps: whether answer will be used for smaps file
- * @in_pf: whether answer will be used by page fault handler
- * @enforce_sysfs: whether sysfs config should be taken into account
+ * @tva_flags: Which TVA flags to honour
* @orders: bitfield of all orders to consider
*
* Calculates the intersection of the requested hugepage orders and the allowed
@@ -233,12 +243,12 @@ unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma,
*/
static inline
unsigned long thp_vma_allowable_orders(struct vm_area_struct *vma,
- unsigned long vm_flags, bool smaps,
- bool in_pf, bool enforce_sysfs,
+ unsigned long vm_flags,
+ unsigned long tva_flags,
unsigned long orders)
{
/* Optimization to check if required orders are enabled early. */
- if (enforce_sysfs && vma_is_anonymous(vma)) {
+ if ((tva_flags & TVA_ENFORCE_SYSFS) && vma_is_anonymous(vma)) {
unsigned long mask = READ_ONCE(huge_anon_orders_always);
if (vm_flags & VM_HUGEPAGE)
@@ -252,8 +262,30 @@ unsigned long thp_vma_allowable_orders(struct vm_area_struct *vma,
return 0;
}
- return __thp_vma_allowable_orders(vma, vm_flags, smaps, in_pf,
- enforce_sysfs, orders);
+ return __thp_vma_allowable_orders(vma, vm_flags, tva_flags, orders);
+}
+
+enum mthp_stat_item {
+ MTHP_STAT_ANON_FAULT_ALLOC,
+ MTHP_STAT_ANON_FAULT_FALLBACK,
+ MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE,
+ MTHP_STAT_ANON_SWPOUT,
+ MTHP_STAT_ANON_SWPOUT_FALLBACK,
+ __MTHP_STAT_COUNT
+};
+
+struct mthp_stat {
+ unsigned long stats[ilog2(MAX_PTRS_PER_PTE) + 1][__MTHP_STAT_COUNT];
+};
+
+DECLARE_PER_CPU(struct mthp_stat, mthp_stats);
+
+static inline void count_mthp_stat(int order, enum mthp_stat_item item)
+{
+ if (order <= 0 || order > PMD_ORDER)
+ return;
+
+ this_cpu_inc(mthp_stats.stats[order][item]);
}
#define transparent_hugepage_use_zero_page() \
@@ -262,8 +294,10 @@ unsigned long thp_vma_allowable_orders(struct vm_area_struct *vma,
unsigned long thp_get_unmapped_area(struct file *filp, unsigned long addr,
unsigned long len, unsigned long pgoff, unsigned long flags);
+unsigned long thp_get_unmapped_area_vmflags(struct file *filp, unsigned long addr,
+ unsigned long len, unsigned long pgoff, unsigned long flags,
+ vm_flags_t vm_flags);
-void folio_prep_large_rmappable(struct folio *folio);
bool can_split_folio(struct folio *folio, int *pextra_pins);
int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
unsigned int new_order);
@@ -344,17 +378,15 @@ static inline bool folio_test_pmd_mappable(struct folio *folio)
struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
pmd_t *pmd, int flags, struct dev_pagemap **pgmap);
-struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr,
- pud_t *pud, int flags, struct dev_pagemap **pgmap);
vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf);
-extern struct page *huge_zero_page;
+extern struct folio *huge_zero_folio;
extern unsigned long huge_zero_pfn;
-static inline bool is_huge_zero_page(struct page *page)
+static inline bool is_huge_zero_folio(const struct folio *folio)
{
- return READ_ONCE(huge_zero_page) == page;
+ return READ_ONCE(huge_zero_folio) == folio;
}
static inline bool is_huge_zero_pmd(pmd_t pmd)
@@ -367,8 +399,8 @@ static inline bool is_huge_zero_pud(pud_t pud)
return false;
}
-struct page *mm_get_huge_zero_page(struct mm_struct *mm);
-void mm_put_huge_zero_page(struct mm_struct *mm);
+struct folio *mm_get_huge_zero_folio(struct mm_struct *mm);
+void mm_put_huge_zero_folio(struct mm_struct *mm);
#define mk_huge_pmd(page, prot) pmd_mkhuge(mk_pmd(page, prot))
@@ -378,13 +410,6 @@ static inline bool thp_migration_supported(void)
}
#else /* CONFIG_TRANSPARENT_HUGEPAGE */
-#define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; })
-#define HPAGE_PMD_MASK ({ BUILD_BUG(); 0; })
-#define HPAGE_PMD_SIZE ({ BUILD_BUG(); 0; })
-
-#define HPAGE_PUD_SHIFT ({ BUILD_BUG(); 0; })
-#define HPAGE_PUD_MASK ({ BUILD_BUG(); 0; })
-#define HPAGE_PUD_SIZE ({ BUILD_BUG(); 0; })
static inline bool folio_test_pmd_mappable(struct folio *folio)
{
@@ -404,19 +429,25 @@ static inline unsigned long thp_vma_suitable_orders(struct vm_area_struct *vma,
}
static inline unsigned long thp_vma_allowable_orders(struct vm_area_struct *vma,
- unsigned long vm_flags, bool smaps,
- bool in_pf, bool enforce_sysfs,
+ unsigned long vm_flags,
+ unsigned long tva_flags,
unsigned long orders)
{
return 0;
}
-static inline void folio_prep_large_rmappable(struct folio *folio) {}
-
#define transparent_hugepage_flags 0UL
#define thp_get_unmapped_area NULL
+static inline unsigned long
+thp_get_unmapped_area_vmflags(struct file *filp, unsigned long addr,
+ unsigned long len, unsigned long pgoff,
+ unsigned long flags, vm_flags_t vm_flags)
+{
+ return 0;
+}
+
static inline bool
can_split_folio(struct folio *folio, int *pextra_pins)
{
@@ -483,7 +514,7 @@ static inline vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf)
return 0;
}
-static inline bool is_huge_zero_page(struct page *page)
+static inline bool is_huge_zero_folio(const struct folio *folio)
{
return false;
}
@@ -498,7 +529,7 @@ static inline bool is_huge_zero_pud(pud_t pud)
return false;
}
-static inline void mm_put_huge_zero_page(struct mm_struct *mm)
+static inline void mm_put_huge_zero_folio(struct mm_struct *mm)
{
return;
}
@@ -509,12 +540,6 @@ static inline struct page *follow_devmap_pmd(struct vm_area_struct *vma,
return NULL;
}
-static inline struct page *follow_devmap_pud(struct vm_area_struct *vma,
- unsigned long addr, pud_t *pud, int flags, struct dev_pagemap **pgmap)
-{
- return NULL;
-}
-
static inline bool thp_migration_supported(void)
{
return false;
@@ -535,16 +560,4 @@ static inline int split_folio_to_order(struct folio *folio, int new_order)
#define split_folio_to_list(f, l) split_folio_to_list_to_order(f, l, 0)
#define split_folio(f) split_folio_to_order(f, 0)
-/*
- * archs that select ARCH_WANTS_THP_SWAP but don't support THP_SWP due to
- * limitations in the implementation like arm64 MTE can override this to
- * false
- */
-#ifndef arch_thp_swp_supported
-static inline bool arch_thp_swp_supported(void)
-{
- return true;
-}
-#endif
-
#endif /* _LINUX_HUGE_MM_H */
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 77b30a8c6076b6..68244bb3637a87 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -174,8 +174,11 @@ u32 hugetlb_fault_mutex_hash(struct address_space *mapping, pgoff_t idx);
pte_t *huge_pmd_share(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long addr, pud_t *pud);
+bool hugetlbfs_pagecache_present(struct hstate *h,
+ struct vm_area_struct *vma,
+ unsigned long address);
-struct address_space *hugetlb_page_mapping_lock_write(struct page *hpage);
+struct address_space *hugetlb_folio_mapping_lock_write(struct folio *folio);
extern int sysctl_hugetlb_shm_group;
extern struct list_head huge_boot_pages[MAX_NUMNODES];
@@ -272,13 +275,9 @@ void hugetlb_vma_unlock_write(struct vm_area_struct *vma);
int hugetlb_vma_trylock_write(struct vm_area_struct *vma);
void hugetlb_vma_assert_locked(struct vm_area_struct *vma);
void hugetlb_vma_lock_release(struct kref *kref);
-
-int pmd_huge(pmd_t pmd);
-int pud_huge(pud_t pud);
long hugetlb_change_protection(struct vm_area_struct *vma,
unsigned long address, unsigned long end, pgprot_t newprot,
unsigned long cp_flags);
-
bool is_hugetlb_entry_migration(pte_t pte);
bool is_hugetlb_entry_hwpoisoned(pte_t pte);
void hugetlb_unshare_all_pmds(struct vm_area_struct *vma);
@@ -298,8 +297,8 @@ static inline unsigned long hugetlb_total_pages(void)
return 0;
}
-static inline struct address_space *hugetlb_page_mapping_lock_write(
- struct page *hpage)
+static inline struct address_space *hugetlb_folio_mapping_lock_write(
+ struct folio *folio)
{
return NULL;
}
@@ -329,13 +328,6 @@ static inline void hugetlb_zap_end(
{
}
-static inline struct page *hugetlb_follow_page_mask(
- struct vm_area_struct *vma, unsigned long address, unsigned int flags,
- unsigned int *page_mask)
-{
- BUILD_BUG(); /* should never be compiled in if !CONFIG_HUGETLB_PAGE*/
-}
-
static inline int copy_hugetlb_page_range(struct mm_struct *dst,
struct mm_struct *src,
struct vm_area_struct *dst_vma,
@@ -399,16 +391,6 @@ static inline void hugetlb_vma_assert_locked(struct vm_area_struct *vma)
{
}
-static inline int pmd_huge(pmd_t pmd)
-{
- return 0;
-}
-
-static inline int pud_huge(pud_t pud)
-{
- return 0;
-}
-
static inline int is_hugepage_only_range(struct mm_struct *mm,
unsigned long addr, unsigned long len)
{
@@ -493,16 +475,6 @@ static inline vm_fault_t hugetlb_fault(struct mm_struct *mm,
static inline void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) { }
#endif /* !CONFIG_HUGETLB_PAGE */
-/*
- * hugepages at page global directory. If arch support
- * hugepages at pgd level, they need to define this.
- */
-#ifndef pgd_huge
-#define pgd_huge(x) 0
-#endif
-#ifndef p4d_huge
-#define p4d_huge(x) 0
-#endif
#ifndef pgd_write
static inline int pgd_write(pgd_t pgd)
@@ -747,7 +719,8 @@ int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list);
struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
unsigned long addr, int avoid_reserve);
struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid,
- nodemask_t *nmask, gfp_t gfp_mask);
+ nodemask_t *nmask, gfp_t gfp_mask,
+ bool allow_alloc_fallback);
int hugetlb_add_to_page_cache(struct folio *folio, struct address_space *mapping,
pgoff_t idx);
void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma,
@@ -859,9 +832,9 @@ static inline int is_hugepage_only_range(struct mm_struct *mm,
#define is_hugepage_only_range is_hugepage_only_range
#endif
-#ifndef arch_clear_hugepage_flags
-static inline void arch_clear_hugepage_flags(struct page *page) { }
-#define arch_clear_hugepage_flags arch_clear_hugepage_flags
+#ifndef arch_clear_hugetlb_flags
+static inline void arch_clear_hugetlb_flags(struct folio *folio) { }
+#define arch_clear_hugetlb_flags arch_clear_hugetlb_flags
#endif
#ifndef arch_make_huge_pte
@@ -888,8 +861,8 @@ static inline int hstate_index(struct hstate *h)
return h - hstates;
}
-extern int dissolve_free_huge_page(struct page *page);
-extern int dissolve_free_huge_pages(unsigned long start_pfn,
+int dissolve_free_hugetlb_folio(struct folio *folio);
+int dissolve_free_hugetlb_folios(unsigned long start_pfn,
unsigned long end_pfn);
#ifdef CONFIG_MEMORY_FAILURE
@@ -970,6 +943,30 @@ static inline gfp_t htlb_modify_alloc_mask(struct hstate *h, gfp_t gfp_mask)
return modified_mask;
}
+static inline bool htlb_allow_alloc_fallback(int reason)
+{
+ bool allowed_fallback = false;
+
+ /*
+ * Note: the memory offline, memory failure and migration syscalls will
+ * be allowed to fallback to other nodes due to lack of a better chioce,
+ * that might break the per-node hugetlb pool. While other cases will
+ * set the __GFP_THISNODE to avoid breaking the per-node hugetlb pool.
+ */
+ switch (reason) {
+ case MR_MEMORY_HOTPLUG:
+ case MR_MEMORY_FAILURE:
+ case MR_SYSCALL:
+ case MR_MEMPOLICY_MBIND:
+ allowed_fallback = true;
+ break;
+ default:
+ break;
+ }
+
+ return allowed_fallback;
+}
+
static inline spinlock_t *huge_pte_lockptr(struct hstate *h,
struct mm_struct *mm, pte_t *pte)
{
@@ -1065,7 +1062,8 @@ static inline struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
static inline struct folio *
alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid,
- nodemask_t *nmask, gfp_t gfp_mask)
+ nodemask_t *nmask, gfp_t gfp_mask,
+ bool allow_alloc_fallback)
{
return NULL;
}
@@ -1150,12 +1148,12 @@ static inline int hstate_index(struct hstate *h)
return 0;
}
-static inline int dissolve_free_huge_page(struct page *page)
+static inline int dissolve_free_hugetlb_folio(struct folio *folio)
{
return 0;
}
-static inline int dissolve_free_huge_pages(unsigned long start_pfn,
+static inline int dissolve_free_hugetlb_folios(unsigned long start_pfn,
unsigned long end_pfn)
{
return 0;
@@ -1181,6 +1179,11 @@ static inline gfp_t htlb_modify_alloc_mask(struct hstate *h, gfp_t gfp_mask)
return 0;
}
+static inline bool htlb_allow_alloc_fallback(int reason)
+{
+ return false;
+}
+
static inline spinlock_t *huge_pte_lockptr(struct hstate *h,
struct mm_struct *mm, pte_t *pte)
{
@@ -1221,6 +1224,12 @@ static inline void hugetlb_register_node(struct node *node)
static inline void hugetlb_unregister_node(struct node *node)
{
}
+
+static inline bool hugetlbfs_pagecache_present(
+ struct hstate *h, struct vm_area_struct *vma, unsigned long address)
+{
+ return false;
+}
#endif /* CONFIG_HUGETLB_PAGE */
static inline spinlock_t *huge_pte_lock(struct hstate *h,
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 6ef0557b4bff8e..96ceb4095425eb 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -832,6 +832,7 @@ struct vmbus_gpadl {
u32 gpadl_handle;
u32 size;
void *buffer;
+ bool decrypted;
};
struct vmbus_channel {
diff --git a/include/linux/iio/common/inv_sensors_timestamp.h b/include/linux/iio/common/inv_sensors_timestamp.h
index a47d304d1ba7c4..8d506f1e9df292 100644
--- a/include/linux/iio/common/inv_sensors_timestamp.h
+++ b/include/linux/iio/common/inv_sensors_timestamp.h
@@ -71,8 +71,7 @@ int inv_sensors_timestamp_update_odr(struct inv_sensors_timestamp *ts,
uint32_t period, bool fifo);
void inv_sensors_timestamp_interrupt(struct inv_sensors_timestamp *ts,
- uint32_t fifo_period, size_t fifo_nb,
- size_t sensor_nb, int64_t timestamp);
+ size_t sample_nb, int64_t timestamp);
static inline int64_t inv_sensors_timestamp_pop(struct inv_sensors_timestamp *ts)
{
diff --git a/include/linux/instrumented.h b/include/linux/instrumented.h
index 1b608e00290aa1..711a1f0d1a735d 100644
--- a/include/linux/instrumented.h
+++ b/include/linux/instrumented.h
@@ -148,6 +148,41 @@ instrument_copy_from_user_after(const void *to, const void __user *from,
}
/**
+ * instrument_memcpy_before - add instrumentation before non-instrumented memcpy
+ * @to: destination address
+ * @from: source address
+ * @n: number of bytes to copy
+ *
+ * Instrument memory accesses that happen in custom memcpy implementations. The
+ * instrumentation should be inserted before the memcpy call.
+ */
+static __always_inline void instrument_memcpy_before(void *to, const void *from,
+ unsigned long n)
+{
+ kasan_check_write(to, n);
+ kasan_check_read(from, n);
+ kcsan_check_write(to, n);
+ kcsan_check_read(from, n);
+}
+
+/**
+ * instrument_memcpy_after - add instrumentation after non-instrumented memcpy
+ * @to: destination address
+ * @from: source address
+ * @n: number of bytes to copy
+ * @left: number of bytes not copied (if known)
+ *
+ * Instrument memory accesses that happen in custom memcpy implementations. The
+ * instrumentation should be inserted after the memcpy call.
+ */
+static __always_inline void instrument_memcpy_after(void *to, const void *from,
+ unsigned long n,
+ unsigned long left)
+{
+ kmsan_memmove(to, from, n - left);
+}
+
+/**
* instrument_get_user() - add instrumentation to get_user()-like macros
* @to: destination variable, may not be address-taken
*
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 76121c2bb4f824..5c9bdd3ffccc89 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -67,6 +67,8 @@
* later.
* IRQF_NO_DEBUG - Exclude from runnaway detection for IPI and similar handlers,
* depends on IRQF_PERCPU.
+ * IRQF_COND_ONESHOT - Agree to do IRQF_ONESHOT if already set for a shared
+ * interrupt.
*/
#define IRQF_SHARED 0x00000080
#define IRQF_PROBE_SHARED 0x00000100
@@ -82,6 +84,7 @@
#define IRQF_COND_SUSPEND 0x00040000
#define IRQF_NO_AUTOEN 0x00080000
#define IRQF_NO_DEBUG 0x00100000
+#define IRQF_COND_ONESHOT 0x00200000
#define IRQF_TIMER (__IRQF_TIMER | IRQF_NO_SUSPEND | IRQF_NO_THREAD)
diff --git a/include/linux/io.h b/include/linux/io.h
index 235ba7d80a8f0d..e8ee40ee1843b2 100644
--- a/include/linux/io.h
+++ b/include/linux/io.h
@@ -6,6 +6,7 @@
#ifndef _LINUX_IO_H
#define _LINUX_IO_H
+#include <linux/sizes.h>
#include <linux/types.h>
#include <linux/init.h>
#include <linux/bug.h>
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index e248936250852d..ac333ea81d3195 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -13,7 +13,7 @@ enum {
* A hint to not wake right away but delay until there are enough of
* tw's queued to match the number of CQEs the task is waiting for.
*
- * Must not be used wirh requests generating more than one CQE.
+ * Must not be used with requests generating more than one CQE.
* It's also ignored unless IORING_SETUP_DEFER_TASKRUN is set.
*/
IOU_F_TWQ_LAZY_WAKE = 1,
@@ -294,7 +294,6 @@ struct io_ring_ctx {
struct io_submit_state submit_state;
- struct io_buffer_list *io_bl;
struct xarray io_bl_xa;
struct io_hash_table cancel_table_locked;
diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
index 147feebd508cab..3f003d5fde5341 100644
--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -114,7 +114,7 @@ do { \
# define lockdep_softirq_enter() do { } while (0)
# define lockdep_softirq_exit() do { } while (0)
# define lockdep_hrtimer_enter(__hrtimer) false
-# define lockdep_hrtimer_exit(__context) do { } while (0)
+# define lockdep_hrtimer_exit(__context) do { (void)(__context); } while (0)
# define lockdep_posixtimer_enter() do { } while (0)
# define lockdep_posixtimer_exit() do { } while (0)
# define lockdep_irq_work_enter(__work) do { } while (0)
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 971f3e826e1528..9512fe33266841 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1586,10 +1586,8 @@ void jbd2_journal_put_journal_head(struct journal_head *jh);
*/
extern struct kmem_cache *jbd2_handle_cache;
-static inline handle_t *jbd2_alloc_handle(gfp_t gfp_flags)
-{
- return kmem_cache_zalloc(jbd2_handle_cache, gfp_flags);
-}
+#define jbd2_alloc_handle(_gfp_flags) \
+ ((handle_t *)kmem_cache_zalloc(jbd2_handle_cache, _gfp_flags))
static inline void jbd2_free_handle(handle_t *handle)
{
@@ -1602,10 +1600,8 @@ static inline void jbd2_free_handle(handle_t *handle)
*/
extern struct kmem_cache *jbd2_inode_cache;
-static inline struct jbd2_inode *jbd2_alloc_inode(gfp_t gfp_flags)
-{
- return kmem_cache_alloc(jbd2_inode_cache, gfp_flags);
-}
+#define jbd2_alloc_inode(_gfp_flags) \
+ ((struct jbd2_inode *)kmem_cache_alloc(jbd2_inode_cache, _gfp_flags))
static inline void jbd2_free_inode(struct jbd2_inode *jinode)
{
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 060835bb82d52f..f31bd304df4518 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -461,10 +461,8 @@ static inline void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages) {
extern bool kexec_file_dbg_print;
-#define kexec_dprintk(fmt, ...) \
- printk("%s" fmt, \
- kexec_file_dbg_print ? KERN_INFO : KERN_DEBUG, \
- ##__VA_ARGS__)
+#define kexec_dprintk(fmt, arg...) \
+ do { if (kexec_file_dbg_print) pr_info(fmt, ##arg); } while (0)
#else /* !CONFIG_KEXEC_CORE */
struct pt_regs;
diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index 0b35a41440ff13..6b28d642f3325e 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -36,10 +36,15 @@
* to lock the reader.
*/
-#include <linux/kernel.h>
+#include <linux/array_size.h>
#include <linux/spinlock.h>
#include <linux/stddef.h>
-#include <linux/scatterlist.h>
+#include <linux/types.h>
+
+#include <asm/barrier.h>
+#include <asm/errno.h>
+
+struct scatterlist;
struct __kfifo {
unsigned int in;
diff --git a/include/linux/kmsan-checks.h b/include/linux/kmsan-checks.h
index c4cae333deec5f..e1082dc40abc2c 100644
--- a/include/linux/kmsan-checks.h
+++ b/include/linux/kmsan-checks.h
@@ -61,6 +61,17 @@ void kmsan_check_memory(const void *address, size_t size);
void kmsan_copy_to_user(void __user *to, const void *from, size_t to_copy,
size_t left);
+/**
+ * kmsan_memmove() - Notify KMSAN about a data copy within kernel.
+ * @to: destination address in the kernel.
+ * @from: source address in the kernel.
+ * @size: number of bytes to copy.
+ *
+ * Invoked after non-instrumented version (e.g. implemented using assembly
+ * code) of memmove()/memcpy() is called, in order to copy KMSAN's metadata.
+ */
+void kmsan_memmove(void *to, const void *from, size_t to_copy);
+
#else
static inline void kmsan_poison_memory(const void *address, size_t size,
@@ -78,6 +89,10 @@ static inline void kmsan_copy_to_user(void __user *to, const void *from,
{
}
+static inline void kmsan_memmove(void *to, const void *from, size_t to_copy)
+{
+}
+
#endif
#endif /* _LINUX_KMSAN_CHECKS_H */
diff --git a/include/linux/ksm.h b/include/linux/ksm.h
index 401348e9f92b4e..52c63a9c5a9cdf 100644
--- a/include/linux/ksm.h
+++ b/include/linux/ksm.h
@@ -45,16 +45,16 @@ static inline void ksm_might_unmap_zero_page(struct mm_struct *mm, pte_t pte)
static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
{
- int ret;
+ if (test_bit(MMF_VM_MERGEABLE, &oldmm->flags))
+ return __ksm_enter(mm);
- if (test_bit(MMF_VM_MERGEABLE, &oldmm->flags)) {
- ret = __ksm_enter(mm);
- if (ret)
- return ret;
- }
+ return 0;
+}
- if (test_bit(MMF_VM_MERGE_ANY, &oldmm->flags))
- set_bit(MMF_VM_MERGE_ANY, &mm->flags);
+static inline int ksm_execve(struct mm_struct *mm)
+{
+ if (test_bit(MMF_VM_MERGE_ANY, &mm->flags))
+ return __ksm_enter(mm);
return 0;
}
@@ -81,15 +81,9 @@ struct folio *ksm_might_need_to_copy(struct folio *folio,
void rmap_walk_ksm(struct folio *folio, struct rmap_walk_control *rwc);
void folio_migrate_ksm(struct folio *newfolio, struct folio *folio);
-
-#ifdef CONFIG_MEMORY_FAILURE
-void collect_procs_ksm(struct page *page, struct list_head *to_kill,
- int force_early);
-#endif
-
-#ifdef CONFIG_PROC_FS
+void collect_procs_ksm(struct folio *folio, struct page *page,
+ struct list_head *to_kill, int force_early);
long ksm_process_profit(struct mm_struct *);
-#endif /* CONFIG_PROC_FS */
#else /* !CONFIG_KSM */
@@ -107,6 +101,11 @@ static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
return 0;
}
+static inline int ksm_execve(struct mm_struct *mm)
+{
+ return 0;
+}
+
static inline void ksm_exit(struct mm_struct *mm)
{
}
@@ -115,12 +114,10 @@ static inline void ksm_might_unmap_zero_page(struct mm_struct *mm, pte_t pte)
{
}
-#ifdef CONFIG_MEMORY_FAILURE
-static inline void collect_procs_ksm(struct page *page,
+static inline void collect_procs_ksm(struct folio *folio, struct page *page,
struct list_head *to_kill, int force_early)
{
}
-#endif
#ifdef CONFIG_MMU
static inline int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 26d68115afb826..324d792e7c7864 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -107,6 +107,7 @@ enum {
ATA_DFLAG_NCQ_PRIO_ENABLED = (1 << 20), /* Priority cmds sent to dev */
ATA_DFLAG_CDL_ENABLED = (1 << 21), /* cmd duration limits is enabled */
+ ATA_DFLAG_RESUMING = (1 << 22), /* Device is resuming */
ATA_DFLAG_DETACH = (1 << 24),
ATA_DFLAG_DETACHED = (1 << 25),
ATA_DFLAG_DA = (1 << 26), /* device supports Device Attention */
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 394fd0a887ae75..9aba0d0462ca69 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -349,15 +349,32 @@ struct mem_cgroup {
extern struct mem_cgroup *root_mem_cgroup;
enum page_memcg_data_flags {
- /* page->memcg_data is a pointer to an objcgs vector */
- MEMCG_DATA_OBJCGS = (1UL << 0),
+ /* page->memcg_data is a pointer to an slabobj_ext vector */
+ MEMCG_DATA_OBJEXTS = (1UL << 0),
/* page has been accounted as a non-slab kernel page */
MEMCG_DATA_KMEM = (1UL << 1),
/* the next bit after the last actual flag */
__NR_MEMCG_DATA_FLAGS = (1UL << 2),
};
-#define MEMCG_DATA_FLAGS_MASK (__NR_MEMCG_DATA_FLAGS - 1)
+#define __FIRST_OBJEXT_FLAG __NR_MEMCG_DATA_FLAGS
+
+#else /* CONFIG_MEMCG */
+
+#define __FIRST_OBJEXT_FLAG (1UL << 0)
+
+#endif /* CONFIG_MEMCG */
+
+enum objext_flags {
+ /* slabobj_ext vector failed to allocate */
+ OBJEXTS_ALLOC_FAIL = __FIRST_OBJEXT_FLAG,
+ /* the next bit after the last actual flag */
+ __NR_OBJEXTS_FLAGS = (__FIRST_OBJEXT_FLAG << 1),
+};
+
+#define OBJEXTS_FLAGS_MASK (__NR_OBJEXTS_FLAGS - 1)
+
+#ifdef CONFIG_MEMCG
static inline bool folio_memcg_kmem(struct folio *folio);
@@ -388,10 +405,10 @@ static inline struct mem_cgroup *__folio_memcg(struct folio *folio)
unsigned long memcg_data = folio->memcg_data;
VM_BUG_ON_FOLIO(folio_test_slab(folio), folio);
- VM_BUG_ON_FOLIO(memcg_data & MEMCG_DATA_OBJCGS, folio);
+ VM_BUG_ON_FOLIO(memcg_data & MEMCG_DATA_OBJEXTS, folio);
VM_BUG_ON_FOLIO(memcg_data & MEMCG_DATA_KMEM, folio);
- return (struct mem_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
+ return (struct mem_cgroup *)(memcg_data & ~OBJEXTS_FLAGS_MASK);
}
/*
@@ -409,10 +426,10 @@ static inline struct obj_cgroup *__folio_objcg(struct folio *folio)
unsigned long memcg_data = folio->memcg_data;
VM_BUG_ON_FOLIO(folio_test_slab(folio), folio);
- VM_BUG_ON_FOLIO(memcg_data & MEMCG_DATA_OBJCGS, folio);
+ VM_BUG_ON_FOLIO(memcg_data & MEMCG_DATA_OBJEXTS, folio);
VM_BUG_ON_FOLIO(!(memcg_data & MEMCG_DATA_KMEM), folio);
- return (struct obj_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
+ return (struct obj_cgroup *)(memcg_data & ~OBJEXTS_FLAGS_MASK);
}
/*
@@ -469,11 +486,11 @@ static inline struct mem_cgroup *folio_memcg_rcu(struct folio *folio)
if (memcg_data & MEMCG_DATA_KMEM) {
struct obj_cgroup *objcg;
- objcg = (void *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
+ objcg = (void *)(memcg_data & ~OBJEXTS_FLAGS_MASK);
return obj_cgroup_memcg(objcg);
}
- return (struct mem_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
+ return (struct mem_cgroup *)(memcg_data & ~OBJEXTS_FLAGS_MASK);
}
/*
@@ -506,17 +523,17 @@ static inline struct mem_cgroup *folio_memcg_check(struct folio *folio)
*/
unsigned long memcg_data = READ_ONCE(folio->memcg_data);
- if (memcg_data & MEMCG_DATA_OBJCGS)
+ if (memcg_data & MEMCG_DATA_OBJEXTS)
return NULL;
if (memcg_data & MEMCG_DATA_KMEM) {
struct obj_cgroup *objcg;
- objcg = (void *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
+ objcg = (void *)(memcg_data & ~OBJEXTS_FLAGS_MASK);
return obj_cgroup_memcg(objcg);
}
- return (struct mem_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
+ return (struct mem_cgroup *)(memcg_data & ~OBJEXTS_FLAGS_MASK);
}
static inline struct mem_cgroup *page_memcg_check(struct page *page)
@@ -552,7 +569,7 @@ retry:
static inline bool folio_memcg_kmem(struct folio *folio)
{
VM_BUG_ON_PGFLAGS(PageTail(&folio->page), &folio->page);
- VM_BUG_ON_FOLIO(folio->memcg_data & MEMCG_DATA_OBJCGS, folio);
+ VM_BUG_ON_FOLIO(folio->memcg_data & MEMCG_DATA_OBJEXTS, folio);
return folio->memcg_data & MEMCG_DATA_KMEM;
}
@@ -818,7 +835,8 @@ static inline void obj_cgroup_get_many(struct obj_cgroup *objcg,
static inline void obj_cgroup_put(struct obj_cgroup *objcg)
{
- percpu_ref_put(&objcg->refcnt);
+ if (objcg)
+ percpu_ref_put(&objcg->refcnt);
}
static inline bool mem_cgroup_tryget(struct mem_cgroup *memcg)
@@ -1059,8 +1077,6 @@ static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec,
void mem_cgroup_flush_stats(struct mem_cgroup *memcg);
void mem_cgroup_flush_stats_ratelimited(struct mem_cgroup *memcg);
-void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
- int val);
void __mod_lruvec_kmem_state(void *p, enum node_stat_item idx, int val);
static inline void mod_lruvec_kmem_state(void *p, enum node_stat_item idx,
@@ -1073,16 +1089,6 @@ static inline void mod_lruvec_kmem_state(void *p, enum node_stat_item idx,
local_irq_restore(flags);
}
-static inline void mod_memcg_lruvec_state(struct lruvec *lruvec,
- enum node_stat_item idx, int val)
-{
- unsigned long flags;
-
- local_irq_save(flags);
- __mod_memcg_lruvec_state(lruvec, idx, val);
- local_irq_restore(flags);
-}
-
void __count_memcg_events(struct mem_cgroup *memcg, enum vm_event_item idx,
unsigned long count);
@@ -1576,11 +1582,6 @@ static inline void mem_cgroup_flush_stats_ratelimited(struct mem_cgroup *memcg)
{
}
-static inline void __mod_memcg_lruvec_state(struct lruvec *lruvec,
- enum node_stat_item idx, int val)
-{
-}
-
static inline void __mod_lruvec_kmem_state(void *p, enum node_stat_item idx,
int val)
{
@@ -1632,6 +1633,19 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
}
#endif /* CONFIG_MEMCG */
+/*
+ * Extended information for slab objects stored as an array in page->memcg_data
+ * if MEMCG_DATA_OBJEXTS is set.
+ */
+struct slabobj_ext {
+#ifdef CONFIG_MEMCG_KMEM
+ struct obj_cgroup *objcg;
+#endif
+#ifdef CONFIG_MEM_ALLOC_PROFILING
+ union codetag_ref ref;
+#endif
+} __aligned(8);
+
static inline void __inc_lruvec_kmem_state(void *p, enum node_stat_item idx)
{
__mod_lruvec_kmem_state(p, idx, 1);
diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h
index 69e78190008271..0d70788558f463 100644
--- a/include/linux/memory-tiers.h
+++ b/include/linux/memory-tiers.h
@@ -48,6 +48,9 @@ int mt_calc_adistance(int node, int *adist);
int mt_set_default_dram_perf(int nid, struct access_coordinate *perf,
const char *source);
int mt_perf_to_adistance(struct access_coordinate *perf, int *adist);
+struct memory_dev_type *mt_find_alloc_memory_type(int adist,
+ struct list_head *memory_types);
+void mt_put_memory_types(struct list_head *memory_types);
#ifdef CONFIG_MIGRATION
int next_demotion_node(int node);
void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets);
@@ -136,5 +139,15 @@ static inline int mt_perf_to_adistance(struct access_coordinate *perf, int *adis
{
return -EIO;
}
+
+static inline struct memory_dev_type *mt_find_alloc_memory_type(int adist,
+ struct list_head *memory_types)
+{
+ return NULL;
+}
+
+static inline void mt_put_memory_types(struct list_head *memory_types)
+{
+}
#endif /* CONFIG_NUMA */
#endif /* _LINUX_MEMORY_TIERS_H */
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 931b118336f48c..1add16f216124d 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -167,7 +167,8 @@ extern void mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol);
/* Check if a vma is migratable */
extern bool vma_migratable(struct vm_area_struct *vma);
-int mpol_misplaced(struct folio *, struct vm_area_struct *, unsigned long);
+int mpol_misplaced(struct folio *folio, struct vm_fault *vmf,
+ unsigned long addr);
extern void mpol_put_task_policy(struct task_struct *);
static inline bool mpol_is_preferred_many(struct mempolicy *pol)
@@ -282,7 +283,7 @@ static inline int mpol_parse_str(char *str, struct mempolicy **mpol)
#endif
static inline int mpol_misplaced(struct folio *folio,
- struct vm_area_struct *vma,
+ struct vm_fault *vmf,
unsigned long address)
{
return -1; /* no node preference */
diff --git a/include/linux/mempool.h b/include/linux/mempool.h
index 16c5cc807ff6b4..7b151441341b4c 100644
--- a/include/linux/mempool.h
+++ b/include/linux/mempool.h
@@ -5,6 +5,8 @@
#ifndef _LINUX_MEMPOOL_H
#define _LINUX_MEMPOOL_H
+#include <linux/sched.h>
+#include <linux/alloc_tag.h>
#include <linux/wait.h>
#include <linux/compiler.h>
@@ -39,18 +41,32 @@ void mempool_exit(mempool_t *pool);
int mempool_init_node(mempool_t *pool, int min_nr, mempool_alloc_t *alloc_fn,
mempool_free_t *free_fn, void *pool_data,
gfp_t gfp_mask, int node_id);
-int mempool_init(mempool_t *pool, int min_nr, mempool_alloc_t *alloc_fn,
+
+int mempool_init_noprof(mempool_t *pool, int min_nr, mempool_alloc_t *alloc_fn,
mempool_free_t *free_fn, void *pool_data);
+#define mempool_init(...) \
+ alloc_hooks(mempool_init_noprof(__VA_ARGS__))
extern mempool_t *mempool_create(int min_nr, mempool_alloc_t *alloc_fn,
mempool_free_t *free_fn, void *pool_data);
-extern mempool_t *mempool_create_node(int min_nr, mempool_alloc_t *alloc_fn,
+
+extern mempool_t *mempool_create_node_noprof(int min_nr, mempool_alloc_t *alloc_fn,
mempool_free_t *free_fn, void *pool_data,
gfp_t gfp_mask, int nid);
+#define mempool_create_node(...) \
+ alloc_hooks(mempool_create_node_noprof(__VA_ARGS__))
+
+#define mempool_create(_min_nr, _alloc_fn, _free_fn, _pool_data) \
+ mempool_create_node(_min_nr, _alloc_fn, _free_fn, _pool_data, \
+ GFP_KERNEL, NUMA_NO_NODE)
extern int mempool_resize(mempool_t *pool, int new_min_nr);
extern void mempool_destroy(mempool_t *pool);
-extern void *mempool_alloc(mempool_t *pool, gfp_t gfp_mask) __malloc;
+
+extern void *mempool_alloc_noprof(mempool_t *pool, gfp_t gfp_mask) __malloc;
+#define mempool_alloc(...) \
+ alloc_hooks(mempool_alloc_noprof(__VA_ARGS__))
+
extern void *mempool_alloc_preallocated(mempool_t *pool) __malloc;
extern void mempool_free(void *element, mempool_t *pool);
@@ -62,19 +78,10 @@ extern void mempool_free(void *element, mempool_t *pool);
void *mempool_alloc_slab(gfp_t gfp_mask, void *pool_data);
void mempool_free_slab(void *element, void *pool_data);
-static inline int
-mempool_init_slab_pool(mempool_t *pool, int min_nr, struct kmem_cache *kc)
-{
- return mempool_init(pool, min_nr, mempool_alloc_slab,
- mempool_free_slab, (void *) kc);
-}
-
-static inline mempool_t *
-mempool_create_slab_pool(int min_nr, struct kmem_cache *kc)
-{
- return mempool_create(min_nr, mempool_alloc_slab, mempool_free_slab,
- (void *) kc);
-}
+#define mempool_init_slab_pool(_pool, _min_nr, _kc) \
+ mempool_init(_pool, (_min_nr), mempool_alloc_slab, mempool_free_slab, (void *)(_kc))
+#define mempool_create_slab_pool(_min_nr, _kc) \
+ mempool_create((_min_nr), mempool_alloc_slab, mempool_free_slab, (void *)(_kc))
/*
* a mempool_alloc_t and a mempool_free_t to kmalloc and kfree the
@@ -83,17 +90,12 @@ mempool_create_slab_pool(int min_nr, struct kmem_cache *kc)
void *mempool_kmalloc(gfp_t gfp_mask, void *pool_data);
void mempool_kfree(void *element, void *pool_data);
-static inline int mempool_init_kmalloc_pool(mempool_t *pool, int min_nr, size_t size)
-{
- return mempool_init(pool, min_nr, mempool_kmalloc,
- mempool_kfree, (void *) size);
-}
-
-static inline mempool_t *mempool_create_kmalloc_pool(int min_nr, size_t size)
-{
- return mempool_create(min_nr, mempool_kmalloc, mempool_kfree,
- (void *) size);
-}
+#define mempool_init_kmalloc_pool(_pool, _min_nr, _size) \
+ mempool_init(_pool, (_min_nr), mempool_kmalloc, mempool_kfree, \
+ (void *)(unsigned long)(_size))
+#define mempool_create_kmalloc_pool(_min_nr, _size) \
+ mempool_create((_min_nr), mempool_kmalloc, mempool_kfree, \
+ (void *)(unsigned long)(_size))
void *mempool_kvmalloc(gfp_t gfp_mask, void *pool_data);
void mempool_kvfree(void *element, void *pool_data);
@@ -115,16 +117,11 @@ static inline mempool_t *mempool_create_kvmalloc_pool(int min_nr, size_t size)
void *mempool_alloc_pages(gfp_t gfp_mask, void *pool_data);
void mempool_free_pages(void *element, void *pool_data);
-static inline int mempool_init_page_pool(mempool_t *pool, int min_nr, int order)
-{
- return mempool_init(pool, min_nr, mempool_alloc_pages,
- mempool_free_pages, (void *)(long)order);
-}
-
-static inline mempool_t *mempool_create_page_pool(int min_nr, int order)
-{
- return mempool_create(min_nr, mempool_alloc_pages, mempool_free_pages,
- (void *)(long)order);
-}
+#define mempool_init_page_pool(_pool, _min_nr, _order) \
+ mempool_init(_pool, (_min_nr), mempool_alloc_pages, \
+ mempool_free_pages, (void *)(long)(_order))
+#define mempool_create_page_pool(_min_nr, _order) \
+ mempool_create((_min_nr), mempool_alloc_pages, \
+ mempool_free_pages, (void *)(long)(_order))
#endif /* _LINUX_MEMPOOL_H */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0436b919f1c7fc..9849dfda44d43c 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -5,6 +5,7 @@
#include <linux/errno.h>
#include <linux/mmdebug.h>
#include <linux/gfp.h>
+#include <linux/pgalloc_tag.h>
#include <linux/bug.h>
#include <linux/list.h>
#include <linux/mmzone.h>
@@ -1199,7 +1200,7 @@ static inline int is_vmalloc_or_module_addr(const void *x)
* debugging purposes - it does not include PTE-mapped sub-pages; look
* at folio_mapcount() or page_mapcount() instead.
*/
-static inline int folio_entire_mapcount(struct folio *folio)
+static inline int folio_entire_mapcount(const struct folio *folio)
{
VM_BUG_ON_FOLIO(!folio_test_large(folio), folio);
return atomic_read(&folio->_entire_mapcount) + 1;
@@ -1223,48 +1224,60 @@ static inline void page_mapcount_reset(struct page *page)
* a large folio, it includes the number of times this page is mapped
* as part of that folio.
*
- * The result is undefined for pages which cannot be mapped into userspace.
- * For example SLAB or special types of pages. See function page_has_type().
- * They use this field in struct page differently.
+ * Will report 0 for pages which cannot be mapped into userspace, eg
+ * slab, page tables and similar.
*/
static inline int page_mapcount(struct page *page)
{
int mapcount = atomic_read(&page->_mapcount) + 1;
+ /* Handle page_has_type() pages */
+ if (mapcount < PAGE_MAPCOUNT_RESERVE + 1)
+ mapcount = 0;
if (unlikely(PageCompound(page)))
mapcount += folio_entire_mapcount(page_folio(page));
return mapcount;
}
-int folio_total_mapcount(struct folio *folio);
+static inline int folio_large_mapcount(const struct folio *folio)
+{
+ VM_WARN_ON_FOLIO(!folio_test_large(folio), folio);
+ return atomic_read(&folio->_large_mapcount) + 1;
+}
/**
- * folio_mapcount() - Calculate the number of mappings of this folio.
+ * folio_mapcount() - Number of mappings of this folio.
* @folio: The folio.
*
- * A large folio tracks both how many times the entire folio is mapped,
- * and how many times each individual page in the folio is mapped.
- * This function calculates the total number of times the folio is
- * mapped.
+ * The folio mapcount corresponds to the number of present user page table
+ * entries that reference any part of a folio. Each such present user page
+ * table entry must be paired with exactly on folio reference.
+ *
+ * For ordindary folios, each user page table entry (PTE/PMD/PUD/...) counts
+ * exactly once.
+ *
+ * For hugetlb folios, each abstracted "hugetlb" user page table entry that
+ * references the entire folio counts exactly once, even when such special
+ * page table entries are comprised of multiple ordinary page table entries.
+ *
+ * Will report 0 for pages which cannot be mapped into userspace, such as
+ * slab, page tables and similar.
*
* Return: The number of times this folio is mapped.
*/
-static inline int folio_mapcount(struct folio *folio)
+static inline int folio_mapcount(const struct folio *folio)
{
- if (likely(!folio_test_large(folio)))
- return atomic_read(&folio->_mapcount) + 1;
- return folio_total_mapcount(folio);
-}
+ int mapcount;
-static inline bool folio_large_is_mapped(struct folio *folio)
-{
- /*
- * Reading _entire_mapcount below could be omitted if hugetlb
- * participated in incrementing nr_pages_mapped when compound mapped.
- */
- return atomic_read(&folio->_nr_pages_mapped) > 0 ||
- atomic_read(&folio->_entire_mapcount) >= 0;
+ if (likely(!folio_test_large(folio))) {
+ mapcount = atomic_read(&folio->_mapcount) + 1;
+ /* Handle page_has_type() pages */
+ if (mapcount < PAGE_MAPCOUNT_RESERVE + 1)
+ mapcount = 0;
+ return mapcount;
+ }
+ return folio_large_mapcount(folio);
}
/**
@@ -1273,11 +1286,9 @@ static inline bool folio_large_is_mapped(struct folio *folio)
*
* Return: True if any page in this folio is referenced by user page tables.
*/
-static inline bool folio_mapped(struct folio *folio)
+static inline bool folio_mapped(const struct folio *folio)
{
- if (likely(!folio_test_large(folio)))
- return atomic_read(&folio->_mapcount) >= 0;
- return folio_large_is_mapped(folio);
+ return folio_mapcount(folio) >= 1;
}
/*
@@ -1285,11 +1296,9 @@ static inline bool folio_mapped(struct folio *folio)
* For compound page it returns true if any sub-page of compound page is mapped,
* even if this particular sub-page is not itself mapped by any PTE or PMD.
*/
-static inline bool page_mapped(struct page *page)
+static inline bool page_mapped(const struct page *page)
{
- if (likely(!PageCompound(page)))
- return atomic_read(&page->_mapcount) >= 0;
- return folio_large_is_mapped(page_folio(page));
+ return folio_mapped(page_folio(page));
}
static inline struct page *virt_to_head_page(const void *x)
@@ -1315,8 +1324,6 @@ void folio_copy(struct folio *dst, struct folio *src);
unsigned long nr_free_buffer_pages(void);
-void destroy_large_folio(struct folio *folio);
-
/* Returns the number of bytes in this potentially compound page. */
static inline unsigned long page_size(struct page *page)
{
@@ -1434,27 +1441,22 @@ vm_fault_t finish_fault(struct vm_fault *vmf);
#if defined(CONFIG_ZONE_DEVICE) && defined(CONFIG_FS_DAX)
DECLARE_STATIC_KEY_FALSE(devmap_managed_key);
-bool __put_devmap_managed_page_refs(struct page *page, int refs);
-static inline bool put_devmap_managed_page_refs(struct page *page, int refs)
+bool __put_devmap_managed_folio_refs(struct folio *folio, int refs);
+static inline bool put_devmap_managed_folio_refs(struct folio *folio, int refs)
{
if (!static_branch_unlikely(&devmap_managed_key))
return false;
- if (!is_zone_device_page(page))
+ if (!folio_is_zone_device(folio))
return false;
- return __put_devmap_managed_page_refs(page, refs);
+ return __put_devmap_managed_folio_refs(folio, refs);
}
#else /* CONFIG_ZONE_DEVICE && CONFIG_FS_DAX */
-static inline bool put_devmap_managed_page_refs(struct page *page, int refs)
+static inline bool put_devmap_managed_folio_refs(struct folio *folio, int refs)
{
return false;
}
#endif /* CONFIG_ZONE_DEVICE && CONFIG_FS_DAX */
-static inline bool put_devmap_managed_page(struct page *page)
-{
- return put_devmap_managed_page_refs(page, 1);
-}
-
/* 127: arbitrary random number, small enough to assemble well */
#define folio_ref_zero_or_close_to_overflow(folio) \
((unsigned int) folio_ref_count(folio) + 127u <= 127u)
@@ -1573,7 +1575,7 @@ static inline void put_page(struct page *page)
* For some devmap managed pages we need to catch refcount transition
* from 2 to 1:
*/
- if (put_devmap_managed_page(&folio->page))
+ if (put_devmap_managed_folio_refs(folio, 1))
return;
folio_put(folio);
}
@@ -2067,7 +2069,7 @@ static inline void set_page_links(struct page *page, enum zone_type zone,
*
* Return: A positive power of two.
*/
-static inline long folio_nr_pages(struct folio *folio)
+static inline long folio_nr_pages(const struct folio *folio)
{
if (!folio_test_large(folio))
return 1;
@@ -2162,21 +2164,64 @@ static inline size_t folio_size(struct folio *folio)
}
/**
- * folio_estimated_sharers - Estimate the number of sharers of a folio.
+ * folio_likely_mapped_shared - Estimate if the folio is mapped into the page
+ * tables of more than one MM
* @folio: The folio.
*
- * folio_estimated_sharers() aims to serve as a function to efficiently
- * estimate the number of processes sharing a folio. This is done by
- * looking at the precise mapcount of the first subpage in the folio, and
- * assuming the other subpages are the same. This may not be true for large
- * folios. If you want exact mapcounts for exact calculations, look at
- * page_mapcount() or folio_total_mapcount().
+ * This function checks if the folio is currently mapped into more than one
+ * MM ("mapped shared"), or if the folio is only mapped into a single MM
+ * ("mapped exclusively").
+ *
+ * As precise information is not easily available for all folios, this function
+ * estimates the number of MMs ("sharers") that are currently mapping a folio
+ * using the number of times the first page of the folio is currently mapped
+ * into page tables.
+ *
+ * For small anonymous folios (except KSM folios) and anonymous hugetlb folios,
+ * the return value will be exactly correct, because they can only be mapped
+ * at most once into an MM, and they cannot be partially mapped.
+ *
+ * For other folios, the result can be fuzzy:
+ * #. For partially-mappable large folios (THP), the return value can wrongly
+ * indicate "mapped exclusively" (false negative) when the folio is
+ * only partially mapped into at least one MM.
+ * #. For pagecache folios (including hugetlb), the return value can wrongly
+ * indicate "mapped shared" (false positive) when two VMAs in the same MM
+ * cover the same file range.
+ * #. For (small) KSM folios, the return value can wrongly indicate "mapped
+ * shared" (false positive), when the folio is mapped multiple times into
+ * the same MM.
+ *
+ * Further, this function only considers current page table mappings that
+ * are tracked using the folio mapcount(s).
*
- * Return: The estimated number of processes sharing a folio.
+ * This function does not consider:
+ * #. If the folio might get mapped in the (near) future (e.g., swapcache,
+ * pagecache, temporary unmapping for migration).
+ * #. If the folio is mapped differently (VM_PFNMAP).
+ * #. If hugetlb page table sharing applies. Callers might want to check
+ * hugetlb_pmd_shared().
+ *
+ * Return: Whether the folio is estimated to be mapped into more than one MM.
*/
-static inline int folio_estimated_sharers(struct folio *folio)
+static inline bool folio_likely_mapped_shared(struct folio *folio)
{
- return page_mapcount(folio_page(folio, 0));
+ int mapcount = folio_mapcount(folio);
+
+ /* Only partially-mappable folios require more care. */
+ if (!folio_test_large(folio) || unlikely(folio_test_hugetlb(folio)))
+ return mapcount > 1;
+
+ /* A single mapping implies "mapped exclusively". */
+ if (mapcount <= 1)
+ return false;
+
+ /* If any page is mapped more than once we treat it "mapped shared". */
+ if (folio_entire_mapcount(folio) || mapcount > folio_nr_pages(folio))
+ return true;
+
+ /* Let's guess based on the first subpage. */
+ return atomic_read(&folio->_mapcount) > 0;
}
#ifndef HAVE_ARCH_MAKE_PAGE_ACCESSIBLE
@@ -2207,11 +2252,6 @@ static inline int arch_make_folio_accessible(struct folio *folio)
*/
#include <linux/vmstat.h>
-static __always_inline void *lowmem_page_address(const struct page *page)
-{
- return page_to_virt(page);
-}
-
#if defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL)
#define HASHED_PAGE_VIRTUAL
#endif
@@ -2234,6 +2274,11 @@ void set_page_address(struct page *page, void *virtual);
void page_address_init(void);
#endif
+static __always_inline void *lowmem_page_address(const struct page *page)
+{
+ return page_to_virt(page);
+}
+
#if !defined(HASHED_PAGE_VIRTUAL) && !defined(WANT_PAGE_VIRTUAL)
#define page_address(page) lowmem_page_address(page)
#define set_page_address(page, address) do { } while(0)
@@ -2391,12 +2436,8 @@ void free_pgd_range(struct mmu_gather *tlb, unsigned long addr,
unsigned long end, unsigned long floor, unsigned long ceiling);
int
copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma);
-int follow_pte(struct mm_struct *mm, unsigned long address,
+int follow_pte(struct vm_area_struct *vma, unsigned long address,
pte_t **ptepp, spinlock_t **ptlp);
-int follow_pfn(struct vm_area_struct *vma, unsigned long address,
- unsigned long *pfn);
-int follow_phys(struct vm_area_struct *vma, unsigned long address,
- unsigned int flags, unsigned long *prot, resource_size_t *phys);
int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
void *buf, int len, int write);
@@ -2550,7 +2591,7 @@ extern unsigned long move_page_tables(struct vm_area_struct *vma,
MM_CP_UFFD_WP_RESOLVE)
bool vma_needs_dirty_tracking(struct vm_area_struct *vma);
-int vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot);
+bool vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot);
static inline bool vma_wants_manual_pte_write_upgrade(struct vm_area_struct *vma)
{
/*
@@ -2857,12 +2898,13 @@ static inline bool pagetable_is_reserved(struct ptdesc *pt)
*
* Return: The ptdesc describing the allocated page tables.
*/
-static inline struct ptdesc *pagetable_alloc(gfp_t gfp, unsigned int order)
+static inline struct ptdesc *pagetable_alloc_noprof(gfp_t gfp, unsigned int order)
{
- struct page *page = alloc_pages(gfp | __GFP_COMP, order);
+ struct page *page = alloc_pages_noprof(gfp | __GFP_COMP, order);
return page_ptdesc(page);
}
+#define pagetable_alloc(...) alloc_hooks(pagetable_alloc_noprof(__VA_ARGS__))
/**
* pagetable_free - Free pagetables
@@ -3132,6 +3174,14 @@ extern void reserve_bootmem_region(phys_addr_t start,
/* Free the reserved page into the buddy system, so it gets managed. */
static inline void free_reserved_page(struct page *page)
{
+ if (mem_alloc_profiling_enabled()) {
+ union codetag_ref *ref = get_page_tag_ref(page);
+
+ if (ref) {
+ set_codetag_empty(ref);
+ put_page_tag_ref(ref);
+ }
+ }
ClearPageReserved(page);
init_page_count(page);
__free_page(page);
@@ -3193,8 +3243,6 @@ static inline unsigned long get_num_physpages(void)
*/
void free_area_init(unsigned long *max_zone_pfn);
unsigned long node_map_pfn_alignment(void);
-unsigned long __absent_pages_in_range(int nid, unsigned long start_pfn,
- unsigned long end_pfn);
extern unsigned long absent_pages_in_range(unsigned long start_pfn,
unsigned long end_pfn);
extern void get_pfn_range_for_nid(unsigned int nid,
@@ -3210,7 +3258,6 @@ static inline int early_pfn_to_nid(unsigned long pfn)
extern int __meminit early_pfn_to_nid(unsigned long pfn);
#endif
-extern void set_dma_reserve(unsigned long new_dma_reserve);
extern void mem_init(void);
extern void __init mmap_init(void);
@@ -3222,9 +3269,6 @@ static inline void show_mem(void)
extern long si_mem_available(void);
extern void si_meminfo(struct sysinfo * val);
extern void si_meminfo_node(struct sysinfo *val, int nid);
-#ifdef __HAVE_ARCH_RESERVED_KERNEL_PAGES
-extern unsigned long arch_reserved_kernel_pages(void);
-#endif
extern __printf(3, 4)
void warn_alloc(gfp_t gfp_mask, nodemask_t *nodemask, const char *fmt, ...);
@@ -3383,7 +3427,16 @@ extern int install_special_mapping(struct mm_struct *mm,
unsigned long randomize_stack_top(unsigned long stack_top);
unsigned long randomize_page(unsigned long start, unsigned long range);
-extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
+unsigned long
+__get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
+ unsigned long pgoff, unsigned long flags, vm_flags_t vm_flags);
+
+static inline unsigned long
+get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
+ unsigned long pgoff, unsigned long flags)
+{
+ return __get_unmapped_area(file, addr, len, pgoff, flags, 0);
+}
extern unsigned long mmap_region(struct file *file, unsigned long addr,
unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
@@ -3429,6 +3482,7 @@ struct vm_unmapped_area_info {
unsigned long high_limit;
unsigned long align_mask;
unsigned long align_offset;
+ unsigned long start_gap;
};
extern unsigned long vm_unmapped_area(struct vm_unmapped_area_info *info);
@@ -3722,7 +3776,14 @@ DECLARE_STATIC_KEY_MAYBE(CONFIG_INIT_ON_FREE_DEFAULT_ON, init_on_free);
static inline bool want_init_on_free(void)
{
return static_branch_maybe(CONFIG_INIT_ON_FREE_DEFAULT_ON,
- &init_on_free);
+ &init_on_free);
+}
+
+DECLARE_STATIC_KEY_MAYBE(CONFIG_INIT_MLOCKED_ON_FREE_DEFAULT_ON, init_mlocked_on_free);
+static inline bool want_init_mlocked_on_free(void)
+{
+ return static_branch_maybe(CONFIG_INIT_MLOCKED_ON_FREE_DEFAULT_ON,
+ &init_mlocked_on_free);
}
extern bool _debug_pagealloc_enabled_early;
@@ -3785,24 +3846,22 @@ static inline bool page_is_guard(struct page *page)
return PageGuard(page);
}
-bool __set_page_guard(struct zone *zone, struct page *page, unsigned int order,
- int migratetype);
+bool __set_page_guard(struct zone *zone, struct page *page, unsigned int order);
static inline bool set_page_guard(struct zone *zone, struct page *page,
- unsigned int order, int migratetype)
+ unsigned int order)
{
if (!debug_guardpage_enabled())
return false;
- return __set_page_guard(zone, page, order, migratetype);
+ return __set_page_guard(zone, page, order);
}
-void __clear_page_guard(struct zone *zone, struct page *page, unsigned int order,
- int migratetype);
+void __clear_page_guard(struct zone *zone, struct page *page, unsigned int order);
static inline void clear_page_guard(struct zone *zone, struct page *page,
- unsigned int order, int migratetype)
+ unsigned int order)
{
if (!debug_guardpage_enabled())
return;
- __clear_page_guard(zone, page, order, migratetype);
+ __clear_page_guard(zone, page, order);
}
#else /* CONFIG_DEBUG_PAGEALLOC */
@@ -3812,9 +3871,9 @@ static inline unsigned int debug_guardpage_minorder(void) { return 0; }
static inline bool debug_guardpage_enabled(void) { return false; }
static inline bool page_is_guard(struct page *page) { return false; }
static inline bool set_page_guard(struct zone *zone, struct page *page,
- unsigned int order, int migratetype) { return false; }
+ unsigned int order) { return false; }
static inline void clear_page_guard(struct zone *zone, struct page *page,
- unsigned int order, int migratetype) {}
+ unsigned int order) {}
#endif /* CONFIG_DEBUG_PAGEALLOC */
#ifdef __HAVE_ARCH_GATE_AREA
@@ -3969,7 +4028,6 @@ int mf_dax_kill_procs(struct address_space *mapping, pgoff_t index,
extern int memory_failure(unsigned long pfn, int flags);
extern void memory_failure_queue_kick(int cpu);
extern int unpoison_memory(unsigned long pfn);
-extern void shake_page(struct page *p);
extern atomic_long_t num_poisoned_pages __read_mostly;
extern int soft_offline_page(unsigned long pfn, int flags);
#ifdef CONFIG_MEMORY_FAILURE
@@ -4202,4 +4260,7 @@ static inline bool pfn_is_unaccepted_memory(unsigned long pfn)
return range_contains_unaccepted_memory(paddr, paddr + PAGE_SIZE);
}
+void vma_pgtable_walk_begin(struct vm_area_struct *vma);
+void vma_pgtable_walk_end(struct vm_area_struct *vma);
+
#endif /* _LINUX_MM_H */
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 5240bd7bca338c..24323c7d0bd484 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -169,7 +169,7 @@ struct page {
/* Usage count. *DO NOT USE DIRECTLY*. See page_ref.h */
atomic_t _refcount;
-#ifdef CONFIG_MEMCG
+#ifdef CONFIG_SLAB_OBJ_EXT
unsigned long memcg_data;
#endif
@@ -289,7 +289,8 @@ typedef struct {
* @virtual: Virtual address in the kernel direct map.
* @_last_cpupid: IDs of last CPU and last process that accessed the folio.
* @_entire_mapcount: Do not use directly, call folio_entire_mapcount().
- * @_nr_pages_mapped: Do not use directly, call folio_mapcount().
+ * @_large_mapcount: Do not use directly, call folio_mapcount().
+ * @_nr_pages_mapped: Do not use outside of rmap and debug code.
* @_pincount: Do not use directly, call folio_maybe_dma_pinned().
* @_folio_nr_pages: Do not use directly, call folio_nr_pages().
* @_hugetlb_subpool: Do not use directly, use accessor in hugetlb.h.
@@ -331,7 +332,7 @@ struct folio {
};
atomic_t _mapcount;
atomic_t _refcount;
-#ifdef CONFIG_MEMCG
+#ifdef CONFIG_SLAB_OBJ_EXT
unsigned long memcg_data;
#endif
#if defined(WANT_PAGE_VIRTUAL)
@@ -348,8 +349,8 @@ struct folio {
struct {
unsigned long _flags_1;
unsigned long _head_1;
- unsigned long _folio_avail;
/* public: */
+ atomic_t _large_mapcount;
atomic_t _entire_mapcount;
atomic_t _nr_pages_mapped;
atomic_t _pincount;
@@ -671,6 +672,9 @@ struct vm_area_struct {
};
#ifdef CONFIG_PER_VMA_LOCK
+ /* Flag to indicate areas detached from the mm->mm_mt tree */
+ bool detached;
+
/*
* Can only be written (using WRITE_ONCE()) while holding both:
* - mmap_lock (in write mode)
@@ -687,9 +691,6 @@ struct vm_area_struct {
*/
int vm_lock_seq;
struct vma_lock *vm_lock;
-
- /* Flag to indicate areas detached from the mm->mm_mt tree */
- bool detached;
#endif
/*
@@ -777,11 +778,7 @@ struct mm_struct {
} ____cacheline_aligned_in_smp;
struct maple_tree mm_mt;
-#ifdef CONFIG_MMU
- unsigned long (*get_unmapped_area) (struct file *filp,
- unsigned long addr, unsigned long len,
- unsigned long pgoff, unsigned long flags);
-#endif
+
unsigned long mmap_base; /* base of mmap area */
unsigned long mmap_legacy_base; /* base of mmap area in bottom-up allocations */
#ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES
@@ -1170,14 +1167,15 @@ static inline void mm_init_cid(struct mm_struct *mm)
cpumask_clear(mm_cidmask(mm));
}
-static inline int mm_alloc_cid(struct mm_struct *mm)
+static inline int mm_alloc_cid_noprof(struct mm_struct *mm)
{
- mm->pcpu_cid = alloc_percpu(struct mm_cid);
+ mm->pcpu_cid = alloc_percpu_noprof(struct mm_cid);
if (!mm->pcpu_cid)
return -ENOMEM;
mm_init_cid(mm);
return 0;
}
+#define mm_alloc_cid(...) alloc_hooks(mm_alloc_cid_noprof(__VA_ARGS__))
static inline void mm_destroy_cid(struct mm_struct *mm)
{
@@ -1370,6 +1368,15 @@ enum fault_flag {
typedef unsigned int __bitwise zap_flags_t;
+/* Flags for clear_young_dirty_ptes(). */
+typedef int __bitwise cydp_t;
+
+/* Clear the access bit */
+#define CYDP_CLEAR_YOUNG ((__force cydp_t)BIT(0))
+
+/* Clear the dirty bit */
+#define CYDP_CLEAR_DIRTY ((__force cydp_t)BIT(1))
+
/*
* FOLL_PIN and FOLL_LONGTERM may be used in various combinations with each
* other. Here is what they mean, and how to use them:
diff --git a/include/linux/mman.h b/include/linux/mman.h
index dc7048824be81d..bcb201ab7a412e 100644
--- a/include/linux/mman.h
+++ b/include/linux/mman.h
@@ -162,6 +162,14 @@ calc_vm_flag_bits(unsigned long flags)
unsigned long vm_commit_limit(void);
+#ifndef arch_memory_deny_write_exec_supported
+static inline bool arch_memory_deny_write_exec_supported(void)
+{
+ return true;
+}
+#define arch_memory_deny_write_exec_supported arch_memory_deny_write_exec_supported
+#endif
+
/*
* Denies creating a writable executable mapping or gaining executable permissions.
*
diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h
index 8d38dcb6d044cb..de9dc20b01ba74 100644
--- a/include/linux/mmap_lock.h
+++ b/include/linux/mmap_lock.h
@@ -60,16 +60,14 @@ static inline void __mmap_lock_trace_released(struct mm_struct *mm, bool write)
#endif /* CONFIG_TRACING */
-static inline void mmap_assert_locked(struct mm_struct *mm)
+static inline void mmap_assert_locked(const struct mm_struct *mm)
{
- lockdep_assert_held(&mm->mmap_lock);
- VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_lock), mm);
+ rwsem_assert_held(&mm->mmap_lock);
}
-static inline void mmap_assert_write_locked(struct mm_struct *mm)
+static inline void mmap_assert_write_locked(const struct mm_struct *mm)
{
- lockdep_assert_held_write(&mm->mmap_lock);
- VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_lock), mm);
+ rwsem_assert_held_write(&mm->mmap_lock);
}
#ifdef CONFIG_PER_VMA_LOCK
diff --git a/include/linux/oid_registry.h b/include/linux/oid_registry.h
index 3921fbed0b2868..51421fdbb0bad4 100644
--- a/include/linux/oid_registry.h
+++ b/include/linux/oid_registry.h
@@ -17,10 +17,12 @@
* build_OID_registry.pl to generate the data for look_up_OID().
*/
enum OID {
+ OID_id_dsa_with_sha1, /* 1.2.840.10030.4.3 */
OID_id_dsa, /* 1.2.840.10040.4.1 */
OID_id_ecPublicKey, /* 1.2.840.10045.2.1 */
OID_id_prime192v1, /* 1.2.840.10045.3.1.1 */
OID_id_prime256v1, /* 1.2.840.10045.3.1.7 */
+ OID_id_ecdsa_with_sha1, /* 1.2.840.10045.4.1 */
OID_id_ecdsa_with_sha224, /* 1.2.840.10045.4.3.1 */
OID_id_ecdsa_with_sha256, /* 1.2.840.10045.4.3.2 */
OID_id_ecdsa_with_sha384, /* 1.2.840.10045.4.3.3 */
@@ -28,6 +30,7 @@ enum OID {
/* PKCS#1 {iso(1) member-body(2) us(840) rsadsi(113549) pkcs(1) pkcs-1(1)} */
OID_rsaEncryption, /* 1.2.840.113549.1.1.1 */
+ OID_sha1WithRSAEncryption, /* 1.2.840.113549.1.1.5 */
OID_sha256WithRSAEncryption, /* 1.2.840.113549.1.1.11 */
OID_sha384WithRSAEncryption, /* 1.2.840.113549.1.1.12 */
OID_sha512WithRSAEncryption, /* 1.2.840.113549.1.1.13 */
@@ -64,6 +67,7 @@ enum OID {
OID_PKU2U, /* 1.3.5.1.5.2.7 */
OID_Scram, /* 1.3.6.1.5.5.14 */
OID_certAuthInfoAccess, /* 1.3.6.1.5.5.7.1.1 */
+ OID_sha1, /* 1.3.14.3.2.26 */
OID_id_ansip384r1, /* 1.3.132.0.34 */
OID_sha256, /* 2.16.840.1.101.3.4.2.1 */
OID_sha384, /* 2.16.840.1.101.3.4.2.2 */
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 652d77805e99df..104078afe0b16f 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -109,7 +109,6 @@ enum pageflags {
PG_active,
PG_workingset,
PG_error,
- PG_slab,
PG_owner_priv_1, /* Owner use. If pagecache, fs may use*/
PG_arch_1,
PG_reserved,
@@ -190,7 +189,6 @@ enum pageflags {
/* At least one page in this folio has the hwpoison flag set */
PG_has_hwpoisoned = PG_error,
- PG_hugetlb = PG_active,
PG_large_rmappable = PG_workingset, /* anon or file-backed */
};
@@ -458,30 +456,51 @@ static __always_inline int TestClearPage##uname(struct page *page) \
TESTSETFLAG(uname, lname, policy) \
TESTCLEARFLAG(uname, lname, policy)
+#define FOLIO_TEST_FLAG_FALSE(name) \
+static inline bool folio_test_##name(const struct folio *folio) \
+{ return false; }
+#define FOLIO_SET_FLAG_NOOP(name) \
+static inline void folio_set_##name(struct folio *folio) { }
+#define FOLIO_CLEAR_FLAG_NOOP(name) \
+static inline void folio_clear_##name(struct folio *folio) { }
+#define __FOLIO_SET_FLAG_NOOP(name) \
+static inline void __folio_set_##name(struct folio *folio) { }
+#define __FOLIO_CLEAR_FLAG_NOOP(name) \
+static inline void __folio_clear_##name(struct folio *folio) { }
+#define FOLIO_TEST_SET_FLAG_FALSE(name) \
+static inline bool folio_test_set_##name(struct folio *folio) \
+{ return false; }
+#define FOLIO_TEST_CLEAR_FLAG_FALSE(name) \
+static inline bool folio_test_clear_##name(struct folio *folio) \
+{ return false; }
+
+#define FOLIO_FLAG_FALSE(name) \
+FOLIO_TEST_FLAG_FALSE(name) \
+FOLIO_SET_FLAG_NOOP(name) \
+FOLIO_CLEAR_FLAG_NOOP(name)
+
#define TESTPAGEFLAG_FALSE(uname, lname) \
-static inline bool folio_test_##lname(const struct folio *folio) { return false; } \
+FOLIO_TEST_FLAG_FALSE(lname) \
static inline int Page##uname(const struct page *page) { return 0; }
#define SETPAGEFLAG_NOOP(uname, lname) \
-static inline void folio_set_##lname(struct folio *folio) { } \
+FOLIO_SET_FLAG_NOOP(lname) \
static inline void SetPage##uname(struct page *page) { }
#define CLEARPAGEFLAG_NOOP(uname, lname) \
-static inline void folio_clear_##lname(struct folio *folio) { } \
+FOLIO_CLEAR_FLAG_NOOP(lname) \
static inline void ClearPage##uname(struct page *page) { }
#define __CLEARPAGEFLAG_NOOP(uname, lname) \
-static inline void __folio_clear_##lname(struct folio *folio) { } \
+__FOLIO_CLEAR_FLAG_NOOP(lname) \
static inline void __ClearPage##uname(struct page *page) { }
#define TESTSETFLAG_FALSE(uname, lname) \
-static inline bool folio_test_set_##lname(struct folio *folio) \
-{ return 0; } \
+FOLIO_TEST_SET_FLAG_FALSE(lname) \
static inline int TestSetPage##uname(struct page *page) { return 0; }
#define TESTCLEARFLAG_FALSE(uname, lname) \
-static inline bool folio_test_clear_##lname(struct folio *folio) \
-{ return 0; } \
+FOLIO_TEST_CLEAR_FLAG_FALSE(lname) \
static inline int TestClearPage##uname(struct page *page) { return 0; }
#define PAGEFLAG_FALSE(uname, lname) TESTPAGEFLAG_FALSE(uname, lname) \
@@ -493,9 +512,9 @@ static inline int TestClearPage##uname(struct page *page) { return 0; }
__PAGEFLAG(Locked, locked, PF_NO_TAIL)
FOLIO_FLAG(waiters, FOLIO_HEAD_PAGE)
PAGEFLAG(Error, error, PF_NO_TAIL) TESTCLEARFLAG(Error, error, PF_NO_TAIL)
-PAGEFLAG(Referenced, referenced, PF_HEAD)
- TESTCLEARFLAG(Referenced, referenced, PF_HEAD)
- __SETPAGEFLAG(Referenced, referenced, PF_HEAD)
+FOLIO_FLAG(referenced, FOLIO_HEAD_PAGE)
+ FOLIO_TEST_CLEAR_FLAG(referenced, FOLIO_HEAD_PAGE)
+ __FOLIO_SET_FLAG(referenced, FOLIO_HEAD_PAGE)
PAGEFLAG(Dirty, dirty, PF_HEAD) TESTSCFLAG(Dirty, dirty, PF_HEAD)
__CLEARPAGEFLAG(Dirty, dirty, PF_HEAD)
PAGEFLAG(LRU, lru, PF_HEAD) __CLEARPAGEFLAG(LRU, lru, PF_HEAD)
@@ -504,7 +523,6 @@ PAGEFLAG(Active, active, PF_HEAD) __CLEARPAGEFLAG(Active, active, PF_HEAD)
TESTCLEARFLAG(Active, active, PF_HEAD)
PAGEFLAG(Workingset, workingset, PF_HEAD)
TESTCLEARFLAG(Workingset, workingset, PF_HEAD)
-__PAGEFLAG(Slab, slab, PF_NO_TAIL)
PAGEFLAG(Checked, checked, PF_NO_COMPOUND) /* Used by some filesystems */
/* Xen */
@@ -608,12 +626,19 @@ PAGEFLAG_FALSE(HWPoison, hwpoison)
#define __PG_HWPOISON 0
#endif
-#if defined(CONFIG_PAGE_IDLE_FLAG) && defined(CONFIG_64BIT)
+#ifdef CONFIG_PAGE_IDLE_FLAG
+#ifdef CONFIG_64BIT
FOLIO_TEST_FLAG(young, FOLIO_HEAD_PAGE)
FOLIO_SET_FLAG(young, FOLIO_HEAD_PAGE)
FOLIO_TEST_CLEAR_FLAG(young, FOLIO_HEAD_PAGE)
FOLIO_FLAG(idle, FOLIO_HEAD_PAGE)
#endif
+/* See page_idle.h for !64BIT workaround */
+#else /* !CONFIG_PAGE_IDLE_FLAG */
+FOLIO_FLAG_FALSE(young)
+FOLIO_TEST_CLEAR_FLAG_FALSE(young)
+FOLIO_FLAG_FALSE(idle)
+#endif
/*
* PageReported() is used to track reported free pages within the Buddy
@@ -668,7 +693,7 @@ static __always_inline bool folio_mapping_flags(const struct folio *folio)
return ((unsigned long)folio->mapping & PAGE_MAPPING_FLAGS) != 0;
}
-static __always_inline int PageMappingFlags(const struct page *page)
+static __always_inline bool PageMappingFlags(const struct page *page)
{
return ((unsigned long)page->mapping & PAGE_MAPPING_FLAGS) != 0;
}
@@ -689,7 +714,7 @@ static __always_inline bool __folio_test_movable(const struct folio *folio)
PAGE_MAPPING_MOVABLE;
}
-static __always_inline int __PageMovable(const struct page *page)
+static __always_inline bool __PageMovable(const struct page *page)
{
return ((unsigned long)page->mapping & PAGE_MAPPING_FLAGS) ==
PAGE_MAPPING_MOVABLE;
@@ -716,7 +741,7 @@ static __always_inline bool PageKsm(const struct page *page)
TESTPAGEFLAG_FALSE(Ksm, ksm)
#endif
-u64 stable_page_flags(struct page *page);
+u64 stable_page_flags(const struct page *page);
/**
* folio_xor_flags_has_waiters - Change some folio flags.
@@ -764,7 +789,7 @@ static inline bool folio_test_uptodate(const struct folio *folio)
return ret;
}
-static inline int PageUptodate(const struct page *page)
+static inline bool PageUptodate(const struct page *page)
{
return folio_test_uptodate(page_folio(page));
}
@@ -848,36 +873,13 @@ static inline void ClearPageCompound(struct page *page)
BUG_ON(!PageHead(page));
ClearPageHead(page);
}
-PAGEFLAG(LargeRmappable, large_rmappable, PF_SECOND)
+FOLIO_FLAG(large_rmappable, FOLIO_SECOND_PAGE)
#else
-TESTPAGEFLAG_FALSE(LargeRmappable, large_rmappable)
+FOLIO_FLAG_FALSE(large_rmappable)
#endif
#define PG_head_mask ((1UL << PG_head))
-#ifdef CONFIG_HUGETLB_PAGE
-int PageHuge(const struct page *page);
-SETPAGEFLAG(HugeTLB, hugetlb, PF_SECOND)
-CLEARPAGEFLAG(HugeTLB, hugetlb, PF_SECOND)
-
-/**
- * folio_test_hugetlb - Determine if the folio belongs to hugetlbfs
- * @folio: The folio to test.
- *
- * Context: Any context. Caller should have a reference on the folio to
- * prevent it from being turned into a tail page.
- * Return: True for hugetlbfs folios, false for anon folios or folios
- * belonging to other filesystems.
- */
-static inline bool folio_test_hugetlb(const struct folio *folio)
-{
- return folio_test_large(folio) &&
- test_bit(PG_hugetlb, const_folio_flags(folio, 1));
-}
-#else
-TESTPAGEFLAG_FALSE(Huge, hugetlb)
-#endif
-
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
/*
* PageHuge() only returns true for hugetlbfs pages, but not for
@@ -934,33 +936,23 @@ PAGEFLAG_FALSE(HasHWPoisoned, has_hwpoisoned)
#endif
/*
- * Check if a page is currently marked HWPoisoned. Note that this check is
- * best effort only and inherently racy: there is no way to synchronize with
- * failing hardware.
- */
-static inline bool is_page_hwpoison(struct page *page)
-{
- if (PageHWPoison(page))
- return true;
- return PageHuge(page) && PageHWPoison(compound_head(page));
-}
-
-/*
- * For pages that are never mapped to userspace (and aren't PageSlab),
+ * For pages that are never mapped to userspace,
* page_type may be used. Because it is initialised to -1, we invert the
* sense of the bit, so __SetPageFoo *clears* the bit used for PageFoo, and
* __ClearPageFoo *sets* the bit used for PageFoo. We reserve a few high and
- * low bits so that an underflow or overflow of page_mapcount() won't be
+ * low bits so that an underflow or overflow of _mapcount won't be
* mistaken for a page type value.
*/
#define PAGE_TYPE_BASE 0xf0000000
-/* Reserve 0x0000007f to catch underflows of page_mapcount */
+/* Reserve 0x0000007f to catch underflows of _mapcount */
#define PAGE_MAPCOUNT_RESERVE -128
#define PG_buddy 0x00000080
#define PG_offline 0x00000100
#define PG_table 0x00000200
#define PG_guard 0x00000400
+#define PG_hugetlb 0x00000800
+#define PG_slab 0x00001000
#define PageType(page, flag) \
((page->page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE)
@@ -977,35 +969,38 @@ static inline int page_has_type(const struct page *page)
return page_type_has_type(page->page_type);
}
+#define FOLIO_TYPE_OPS(lname, fname) \
+static __always_inline bool folio_test_##fname(const struct folio *folio)\
+{ \
+ return folio_test_type(folio, PG_##lname); \
+} \
+static __always_inline void __folio_set_##fname(struct folio *folio) \
+{ \
+ VM_BUG_ON_FOLIO(!folio_test_type(folio, 0), folio); \
+ folio->page.page_type &= ~PG_##lname; \
+} \
+static __always_inline void __folio_clear_##fname(struct folio *folio) \
+{ \
+ VM_BUG_ON_FOLIO(!folio_test_##fname(folio), folio); \
+ folio->page.page_type |= PG_##lname; \
+}
+
#define PAGE_TYPE_OPS(uname, lname, fname) \
+FOLIO_TYPE_OPS(lname, fname) \
static __always_inline int Page##uname(const struct page *page) \
{ \
return PageType(page, PG_##lname); \
} \
-static __always_inline int folio_test_##fname(const struct folio *folio)\
-{ \
- return folio_test_type(folio, PG_##lname); \
-} \
static __always_inline void __SetPage##uname(struct page *page) \
{ \
VM_BUG_ON_PAGE(!PageType(page, 0), page); \
page->page_type &= ~PG_##lname; \
} \
-static __always_inline void __folio_set_##fname(struct folio *folio) \
-{ \
- VM_BUG_ON_FOLIO(!folio_test_type(folio, 0), folio); \
- folio->page.page_type &= ~PG_##lname; \
-} \
static __always_inline void __ClearPage##uname(struct page *page) \
{ \
VM_BUG_ON_PAGE(!Page##uname(page), page); \
page->page_type |= PG_##lname; \
-} \
-static __always_inline void __folio_clear_##fname(struct folio *folio) \
-{ \
- VM_BUG_ON_FOLIO(!folio_test_##fname(folio), folio); \
- folio->page.page_type |= PG_##lname; \
-} \
+}
/*
* PageBuddy() indicates that the page is free and in the buddy system
@@ -1052,14 +1047,67 @@ PAGE_TYPE_OPS(Table, table, pgtable)
*/
PAGE_TYPE_OPS(Guard, guard, guard)
-extern bool is_free_buddy_page(struct page *page);
+FOLIO_TYPE_OPS(slab, slab)
+
+/**
+ * PageSlab - Determine if the page belongs to the slab allocator
+ * @page: The page to test.
+ *
+ * Context: Any context.
+ * Return: True for slab pages, false for any other kind of page.
+ */
+static inline bool PageSlab(const struct page *page)
+{
+ return folio_test_slab(page_folio(page));
+}
+
+#ifdef CONFIG_HUGETLB_PAGE
+FOLIO_TYPE_OPS(hugetlb, hugetlb)
+#else
+FOLIO_TEST_FLAG_FALSE(hugetlb)
+#endif
+
+/**
+ * PageHuge - Determine if the page belongs to hugetlbfs
+ * @page: The page to test.
+ *
+ * Context: Any context.
+ * Return: True for hugetlbfs pages, false for anon pages or pages
+ * belonging to other filesystems.
+ */
+static inline bool PageHuge(const struct page *page)
+{
+ return folio_test_hugetlb(page_folio(page));
+}
+
+/*
+ * Check if a page is currently marked HWPoisoned. Note that this check is
+ * best effort only and inherently racy: there is no way to synchronize with
+ * failing hardware.
+ */
+static inline bool is_page_hwpoison(const struct page *page)
+{
+ const struct folio *folio;
+
+ if (PageHWPoison(page))
+ return true;
+ folio = page_folio(page);
+ return folio_test_hugetlb(folio) && PageHWPoison(&folio->page);
+}
+
+bool is_free_buddy_page(const struct page *page);
PAGEFLAG(Isolated, isolated, PF_ANY);
static __always_inline int PageAnonExclusive(const struct page *page)
{
VM_BUG_ON_PGFLAGS(!PageAnon(page), page);
- VM_BUG_ON_PGFLAGS(PageHuge(page) && !PageHead(page), page);
+ /*
+ * HugeTLB stores this information on the head page; THP keeps it per
+ * page
+ */
+ if (PageHuge(page))
+ page = compound_head(page);
return test_bit(PG_anon_exclusive, &PF_ANY(page, 1)->flags);
}
@@ -1098,7 +1146,7 @@ static __always_inline void __ClearPageAnonExclusive(struct page *page)
(1UL << PG_lru | 1UL << PG_locked | \
1UL << PG_private | 1UL << PG_private_2 | \
1UL << PG_writeback | 1UL << PG_reserved | \
- 1UL << PG_slab | 1UL << PG_active | \
+ 1UL << PG_active | \
1UL << PG_unevictable | __PG_MLOCKED | LRU_GEN_MASK)
/*
@@ -1118,7 +1166,7 @@ static __always_inline void __ClearPageAnonExclusive(struct page *page)
*/
#define PAGE_FLAGS_SECOND \
(0xffUL /* order */ | 1UL << PG_has_hwpoisoned | \
- 1UL << PG_hugetlb | 1UL << PG_large_rmappable)
+ 1UL << PG_large_rmappable)
#define PAGE_FLAGS_PRIVATE \
(1UL << PG_private | 1UL << PG_private_2)
diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h
index 4ac34392823a9c..c16db006709073 100644
--- a/include/linux/page-isolation.h
+++ b/include/linux/page-isolation.h
@@ -34,8 +34,9 @@ static inline bool is_migrate_isolate(int migratetype)
#define REPORT_FAILURE 0x2
void set_pageblock_migratetype(struct page *page, int migratetype);
-int move_freepages_block(struct zone *zone, struct page *page,
- int migratetype, int *num_movable);
+
+bool move_freepages_block_isolate(struct zone *zone, struct page *page,
+ int migratetype);
int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
int migratetype, int flags, gfp_t gfp_flags);
diff --git a/include/linux/page_ext.h b/include/linux/page_ext.h
index be98564191e676..e4b48a0dda2446 100644
--- a/include/linux/page_ext.h
+++ b/include/linux/page_ext.h
@@ -4,7 +4,6 @@
#include <linux/types.h>
#include <linux/stacktrace.h>
-#include <linux/stackdepot.h>
struct pglist_data;
@@ -78,7 +77,7 @@ static inline void page_ext_init(void)
}
#endif
-extern struct page_ext *page_ext_get(struct page *page);
+extern struct page_ext *page_ext_get(const struct page *page);
extern void page_ext_put(struct page_ext *page_ext);
static inline void *page_ext_data(struct page_ext *page_ext,
@@ -118,7 +117,7 @@ static inline void page_ext_init_flatmem(void)
{
}
-static inline struct page_ext *page_ext_get(struct page *page)
+static inline struct page_ext *page_ext_get(const struct page *page)
{
return NULL;
}
diff --git a/include/linux/page_idle.h b/include/linux/page_idle.h
index d8f34484064302..89ca0d5dc1e71d 100644
--- a/include/linux/page_idle.h
+++ b/include/linux/page_idle.h
@@ -6,14 +6,12 @@
#include <linux/page-flags.h>
#include <linux/page_ext.h>
-#ifdef CONFIG_PAGE_IDLE_FLAG
-
-#ifndef CONFIG_64BIT
+#if defined(CONFIG_PAGE_IDLE_FLAG) && !defined(CONFIG_64BIT)
/*
* If there is not enough space to store Idle and Young bits in page flags, use
* page ext flags instead.
*/
-static inline bool folio_test_young(struct folio *folio)
+static inline bool folio_test_young(const struct folio *folio)
{
struct page_ext *page_ext = page_ext_get(&folio->page);
bool page_young;
@@ -52,7 +50,7 @@ static inline bool folio_test_clear_young(struct folio *folio)
return page_young;
}
-static inline bool folio_test_idle(struct folio *folio)
+static inline bool folio_test_idle(const struct folio *folio)
{
struct page_ext *page_ext = page_ext_get(&folio->page);
bool page_idle;
@@ -60,7 +58,7 @@ static inline bool folio_test_idle(struct folio *folio)
if (unlikely(!page_ext))
return false;
- page_idle = test_bit(PAGE_EXT_IDLE, &page_ext->flags);
+ page_idle = test_bit(PAGE_EXT_IDLE, &page_ext->flags);
page_ext_put(page_ext);
return page_idle;
@@ -87,61 +85,5 @@ static inline void folio_clear_idle(struct folio *folio)
clear_bit(PAGE_EXT_IDLE, &page_ext->flags);
page_ext_put(page_ext);
}
-#endif /* !CONFIG_64BIT */
-
-#else /* !CONFIG_PAGE_IDLE_FLAG */
-
-static inline bool folio_test_young(struct folio *folio)
-{
- return false;
-}
-
-static inline void folio_set_young(struct folio *folio)
-{
-}
-
-static inline bool folio_test_clear_young(struct folio *folio)
-{
- return false;
-}
-
-static inline bool folio_test_idle(struct folio *folio)
-{
- return false;
-}
-
-static inline void folio_set_idle(struct folio *folio)
-{
-}
-
-static inline void folio_clear_idle(struct folio *folio)
-{
-}
-
-#endif /* CONFIG_PAGE_IDLE_FLAG */
-
-static inline bool page_is_young(struct page *page)
-{
- return folio_test_young(page_folio(page));
-}
-
-static inline void set_page_young(struct page *page)
-{
- folio_set_young(page_folio(page));
-}
-
-static inline bool test_and_clear_page_young(struct page *page)
-{
- return folio_test_clear_young(page_folio(page));
-}
-
-static inline bool page_is_idle(struct page *page)
-{
- return folio_test_idle(page_folio(page));
-}
-
-static inline void set_page_idle(struct page *page)
-{
- folio_set_idle(page_folio(page));
-}
+#endif /* CONFIG_PAGE_IDLE_FLAG && !64BIT */
#endif /* _LINUX_MM_PAGE_IDLE_H */
diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h
index d7c2d33baa7f8e..1acf5bac7f503b 100644
--- a/include/linux/page_ref.h
+++ b/include/linux/page_ref.h
@@ -139,20 +139,15 @@ static inline void folio_ref_sub(struct folio *folio, int nr)
page_ref_sub(&folio->page, nr);
}
-static inline int page_ref_sub_return(struct page *page, int nr)
+static inline int folio_ref_sub_return(struct folio *folio, int nr)
{
- int ret = atomic_sub_return(nr, &page->_refcount);
+ int ret = atomic_sub_return(nr, &folio->_refcount);
if (page_ref_tracepoint_active(page_ref_mod_and_return))
- __page_ref_mod_and_return(page, -nr, ret);
+ __page_ref_mod_and_return(&folio->page, -nr, ret);
return ret;
}
-static inline int folio_ref_sub_return(struct folio *folio, int nr)
-{
- return page_ref_sub_return(&folio->page, nr);
-}
-
static inline void page_ref_inc(struct page *page)
{
atomic_inc(&page->_refcount);
diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h
index 3f2409b968ec65..547e82cdc89a44 100644
--- a/include/linux/pageblock-flags.h
+++ b/include/linux/pageblock-flags.h
@@ -28,7 +28,7 @@ enum pageblock_bits {
NR_PAGEBLOCK_BITS
};
-#ifdef CONFIG_HUGETLB_PAGE
+#if defined(CONFIG_HUGETLB_PAGE)
#ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
@@ -45,7 +45,11 @@ extern unsigned int pageblock_order;
#endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */
-#else /* CONFIG_HUGETLB_PAGE */
+#elif defined(CONFIG_TRANSPARENT_HUGEPAGE)
+
+#define pageblock_order min_t(unsigned int, HPAGE_PMD_ORDER, MAX_PAGE_ORDER)
+
+#else /* CONFIG_TRANSPARENT_HUGEPAGE */
/* If huge pages are not used, group by MAX_ORDER_NR_PAGES */
#define pageblock_order MAX_PAGE_ORDER
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 2df35e65557d27..4e85f33721c42f 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -399,7 +399,6 @@ static inline void filemap_nr_thps_dec(struct address_space *mapping)
#endif
}
-struct address_space *page_mapping(struct page *);
struct address_space *folio_mapping(struct folio *);
struct address_space *swapcache_mapping(struct folio *);
@@ -542,24 +541,22 @@ static inline void *detach_page_private(struct page *page)
#endif
#ifdef CONFIG_NUMA
-struct folio *filemap_alloc_folio(gfp_t gfp, unsigned int order);
+struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order);
#else
-static inline struct folio *filemap_alloc_folio(gfp_t gfp, unsigned int order)
+static inline struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order)
{
- return folio_alloc(gfp, order);
+ return folio_alloc_noprof(gfp, order);
}
#endif
+#define filemap_alloc_folio(...) \
+ alloc_hooks(filemap_alloc_folio_noprof(__VA_ARGS__))
+
static inline struct page *__page_cache_alloc(gfp_t gfp)
{
return &filemap_alloc_folio(gfp, 0)->page;
}
-static inline struct page *page_cache_alloc(struct address_space *x)
-{
- return __page_cache_alloc(mapping_gfp_mask(x));
-}
-
static inline gfp_t readahead_gfp_mask(struct address_space *x)
{
return mapping_gfp_mask(x) | __GFP_NORETRY | __GFP_NOWARN;
@@ -1144,11 +1141,6 @@ void folio_end_writeback(struct folio *folio);
void wait_for_stable_page(struct page *page);
void folio_wait_stable(struct folio *folio);
void __folio_mark_dirty(struct folio *folio, struct address_space *, int warn);
-static inline void __set_page_dirty(struct page *page,
- struct address_space *mapping, int warn)
-{
- __folio_mark_dirty(page_folio(page), mapping, warn);
-}
void folio_account_cleaned(struct folio *folio, struct bdi_writeback *wb);
void __folio_cancel_dirty(struct folio *folio);
static inline void folio_cancel_dirty(struct folio *folio)
@@ -1160,7 +1152,6 @@ static inline void folio_cancel_dirty(struct folio *folio)
bool folio_clear_dirty_for_io(struct folio *folio);
bool clear_page_dirty_for_io(struct page *page);
void folio_invalidate(struct folio *folio, size_t offset, size_t length);
-int __set_page_dirty_nobuffers(struct page *page);
bool noop_dirty_folio(struct address_space *mapping, struct folio *folio);
#ifdef CONFIG_MIGRATION
diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h
index fcc06c300a72c3..5d3a0cccc6bf8d 100644
--- a/include/linux/pagevec.h
+++ b/include/linux/pagevec.h
@@ -11,8 +11,8 @@
#include <linux/types.h>
-/* 15 pointers + header align the folio_batch structure to a power of two */
-#define PAGEVEC_SIZE 15
+/* 31 pointers + header align the folio_batch structure to a power of two */
+#define PAGEVEC_SIZE 31
struct folio;
diff --git a/include/linux/pds/pds_common.h b/include/linux/pds/pds_common.h
index 30581e2e04cc1f..5802e1deef24b5 100644
--- a/include/linux/pds/pds_common.h
+++ b/include/linux/pds/pds_common.h
@@ -4,6 +4,8 @@
#ifndef _PDS_COMMON_H_
#define _PDS_COMMON_H_
+#include <linux/notifier.h>
+
#define PDS_CORE_DRV_NAME "pds_core"
/* the device's internal addressing uses up to 52 bits */
diff --git a/include/linux/peci.h b/include/linux/peci.h
index 9b3d36aff431e1..90e241458ef690 100644
--- a/include/linux/peci.h
+++ b/include/linux/peci.h
@@ -58,7 +58,6 @@ static inline struct peci_controller *to_peci_controller(void *d)
/**
* struct peci_device - PECI device
* @dev: device object to register PECI device to the device model
- * @controller: manages the bus segment hosting this PECI device
* @info: PECI device characteristics
* @info.family: device family
* @info.model: device model
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 8c677f185901bc..03053de557cf60 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -2,6 +2,7 @@
#ifndef __LINUX_PERCPU_H
#define __LINUX_PERCPU_H
+#include <linux/alloc_tag.h>
#include <linux/mmdebug.h>
#include <linux/preempt.h>
#include <linux/smp.h>
@@ -9,12 +10,17 @@
#include <linux/pfn.h>
#include <linux/init.h>
#include <linux/cleanup.h>
+#include <linux/sched.h>
#include <asm/percpu.h>
/* enough to cover all DEFINE_PER_CPUs in modules */
#ifdef CONFIG_MODULES
+#ifdef CONFIG_MEM_ALLOC_PROFILING
+#define PERCPU_MODULE_RESERVE (8 << 13)
+#else
#define PERCPU_MODULE_RESERVE (8 << 10)
+#endif
#else
#define PERCPU_MODULE_RESERVE 0
#endif
@@ -121,7 +127,6 @@ extern int __init pcpu_page_first_chunk(size_t reserved_size,
pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn);
#endif
-extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align) __alloc_size(1);
extern bool __is_kernel_percpu_address(unsigned long addr, unsigned long *can_addr);
extern bool is_kernel_percpu_address(unsigned long addr);
@@ -129,14 +134,16 @@ extern bool is_kernel_percpu_address(unsigned long addr);
extern void __init setup_per_cpu_areas(void);
#endif
-extern void __percpu *__alloc_percpu_gfp(size_t size, size_t align, gfp_t gfp) __alloc_size(1);
-extern void __percpu *__alloc_percpu(size_t size, size_t align) __alloc_size(1);
-extern void free_percpu(void __percpu *__pdata);
+extern void __percpu *pcpu_alloc_noprof(size_t size, size_t align, bool reserved,
+ gfp_t gfp) __alloc_size(1);
extern size_t pcpu_alloc_size(void __percpu *__pdata);
-DEFINE_FREE(free_percpu, void __percpu *, free_percpu(_T))
-
-extern phys_addr_t per_cpu_ptr_to_phys(void *addr);
+#define __alloc_percpu_gfp(_size, _align, _gfp) \
+ alloc_hooks(pcpu_alloc_noprof(_size, _align, false, _gfp))
+#define __alloc_percpu(_size, _align) \
+ alloc_hooks(pcpu_alloc_noprof(_size, _align, false, GFP_KERNEL))
+#define __alloc_reserved_percpu(_size, _align) \
+ alloc_hooks(pcpu_alloc_noprof(_size, _align, true, GFP_KERNEL))
#define alloc_percpu_gfp(type, gfp) \
(typeof(type) __percpu *)__alloc_percpu_gfp(sizeof(type), \
@@ -144,6 +151,15 @@ extern phys_addr_t per_cpu_ptr_to_phys(void *addr);
#define alloc_percpu(type) \
(typeof(type) __percpu *)__alloc_percpu(sizeof(type), \
__alignof__(type))
+#define alloc_percpu_noprof(type) \
+ ((typeof(type) __percpu *)pcpu_alloc_noprof(sizeof(type), \
+ __alignof__(type), false, GFP_KERNEL))
+
+extern void free_percpu(void __percpu *__pdata);
+
+DEFINE_FREE(free_percpu, void __percpu *, free_percpu(_T))
+
+extern phys_addr_t per_cpu_ptr_to_phys(void *addr);
extern unsigned long pcpu_nr_pages(void);
diff --git a/include/linux/pgalloc_tag.h b/include/linux/pgalloc_tag.h
new file mode 100644
index 00000000000000..86ba5d33e43bdf
--- /dev/null
+++ b/include/linux/pgalloc_tag.h
@@ -0,0 +1,132 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * page allocation tagging
+ */
+#ifndef _LINUX_PGALLOC_TAG_H
+#define _LINUX_PGALLOC_TAG_H
+
+#include <linux/alloc_tag.h>
+
+#ifdef CONFIG_MEM_ALLOC_PROFILING
+
+#include <linux/page_ext.h>
+
+extern struct page_ext_operations page_alloc_tagging_ops;
+
+static inline union codetag_ref *codetag_ref_from_page_ext(struct page_ext *page_ext)
+{
+ return (void *)page_ext + page_alloc_tagging_ops.offset;
+}
+
+static inline struct page_ext *page_ext_from_codetag_ref(union codetag_ref *ref)
+{
+ return (void *)ref - page_alloc_tagging_ops.offset;
+}
+
+/* Should be called only if mem_alloc_profiling_enabled() */
+static inline union codetag_ref *get_page_tag_ref(struct page *page)
+{
+ if (page) {
+ struct page_ext *page_ext = page_ext_get(page);
+
+ if (page_ext)
+ return codetag_ref_from_page_ext(page_ext);
+ }
+ return NULL;
+}
+
+static inline void put_page_tag_ref(union codetag_ref *ref)
+{
+ page_ext_put(page_ext_from_codetag_ref(ref));
+}
+
+static inline void pgalloc_tag_add(struct page *page, struct task_struct *task,
+ unsigned int nr)
+{
+ if (mem_alloc_profiling_enabled()) {
+ union codetag_ref *ref = get_page_tag_ref(page);
+
+ if (ref) {
+ alloc_tag_add(ref, task->alloc_tag, PAGE_SIZE * nr);
+ put_page_tag_ref(ref);
+ }
+ }
+}
+
+static inline void pgalloc_tag_sub(struct page *page, unsigned int nr)
+{
+ if (mem_alloc_profiling_enabled()) {
+ union codetag_ref *ref = get_page_tag_ref(page);
+
+ if (ref) {
+ alloc_tag_sub(ref, PAGE_SIZE * nr);
+ put_page_tag_ref(ref);
+ }
+ }
+}
+
+static inline void pgalloc_tag_split(struct page *page, unsigned int nr)
+{
+ int i;
+ struct page_ext *page_ext;
+ union codetag_ref *ref;
+ struct alloc_tag *tag;
+
+ if (!mem_alloc_profiling_enabled())
+ return;
+
+ page_ext = page_ext_get(page);
+ if (unlikely(!page_ext))
+ return;
+
+ ref = codetag_ref_from_page_ext(page_ext);
+ if (!ref->ct)
+ goto out;
+
+ tag = ct_to_alloc_tag(ref->ct);
+ page_ext = page_ext_next(page_ext);
+ for (i = 1; i < nr; i++) {
+ /* Set new reference to point to the original tag */
+ alloc_tag_ref_set(codetag_ref_from_page_ext(page_ext), tag);
+ page_ext = page_ext_next(page_ext);
+ }
+out:
+ page_ext_put(page_ext);
+}
+
+static inline struct alloc_tag *pgalloc_tag_get(struct page *page)
+{
+ struct alloc_tag *tag = NULL;
+
+ if (mem_alloc_profiling_enabled()) {
+ union codetag_ref *ref = get_page_tag_ref(page);
+
+ alloc_tag_sub_check(ref);
+ if (ref && ref->ct)
+ tag = ct_to_alloc_tag(ref->ct);
+ put_page_tag_ref(ref);
+ }
+
+ return tag;
+}
+
+static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr)
+{
+ if (mem_alloc_profiling_enabled() && tag)
+ this_cpu_sub(tag->counters->bytes, PAGE_SIZE * nr);
+}
+
+#else /* CONFIG_MEM_ALLOC_PROFILING */
+
+static inline union codetag_ref *get_page_tag_ref(struct page *page) { return NULL; }
+static inline void put_page_tag_ref(union codetag_ref *ref) {}
+static inline void pgalloc_tag_add(struct page *page, struct task_struct *task,
+ unsigned int nr) {}
+static inline void pgalloc_tag_sub(struct page *page, unsigned int nr) {}
+static inline void pgalloc_tag_split(struct page *page, unsigned int nr) {}
+static inline struct alloc_tag *pgalloc_tag_get(struct page *page) { return NULL; }
+static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr) {}
+
+#endif /* CONFIG_MEM_ALLOC_PROFILING */
+
+#endif /* _LINUX_PGALLOC_TAG_H */
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 85fc7554cd52bc..18019f037baeca 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -50,6 +50,8 @@
#define pmd_pgtable(pmd) pmd_page(pmd)
#endif
+#define pmd_folio(pmd) page_folio(pmd_page(pmd))
+
/*
* A page table page can be thought of an array like this: pXd_t[PTRS_PER_PxD]
*
@@ -149,9 +151,7 @@ static inline pgd_t *pgd_offset_pgd(pgd_t *pgd, unsigned long address)
* a shortcut which implies the use of the kernel's pgd, instead
* of a process's
*/
-#ifndef pgd_offset_k
#define pgd_offset_k(address) pgd_offset(&init_mm, (address))
-#endif
/*
* In many cases it is known that a virtual address is mapped at PMD or PTE
@@ -459,6 +459,50 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
}
#endif
+#ifndef clear_young_dirty_ptes
+/**
+ * clear_young_dirty_ptes - Mark PTEs that map consecutive pages of the
+ * same folio as old/clean.
+ * @mm: Address space the pages are mapped into.
+ * @addr: Address the first page is mapped at.
+ * @ptep: Page table pointer for the first entry.
+ * @nr: Number of entries to mark old/clean.
+ * @flags: Flags to modify the PTE batch semantics.
+ *
+ * May be overridden by the architecture; otherwise, implemented by
+ * get_and_clear/modify/set for each pte in the range.
+ *
+ * Note that PTE bits in the PTE range besides the PFN can differ. For example,
+ * some PTEs might be write-protected.
+ *
+ * Context: The caller holds the page table lock. The PTEs map consecutive
+ * pages that belong to the same folio. The PTEs are all in the same PMD.
+ */
+static inline void clear_young_dirty_ptes(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep,
+ unsigned int nr, cydp_t flags)
+{
+ pte_t pte;
+
+ for (;;) {
+ if (flags == CYDP_CLEAR_YOUNG)
+ ptep_test_and_clear_young(vma, addr, ptep);
+ else {
+ pte = ptep_get_and_clear(vma->vm_mm, addr, ptep);
+ if (flags & CYDP_CLEAR_YOUNG)
+ pte = pte_mkold(pte);
+ if (flags & CYDP_CLEAR_DIRTY)
+ pte = pte_mkclean(pte);
+ set_pte_at(vma->vm_mm, addr, ptep, pte);
+ }
+ if (--nr == 0)
+ break;
+ ptep++;
+ addr += PAGE_SIZE;
+ }
+}
+#endif
+
static inline void ptep_clear(struct mm_struct *mm, unsigned long addr,
pte_t *ptep)
{
@@ -708,6 +752,35 @@ static inline void pte_clear_not_present_full(struct mm_struct *mm,
}
#endif
+#ifndef clear_not_present_full_ptes
+/**
+ * clear_not_present_full_ptes - Clear multiple not present PTEs which are
+ * consecutive in the pgtable.
+ * @mm: Address space the ptes represent.
+ * @addr: Address of the first pte.
+ * @ptep: Page table pointer for the first entry.
+ * @nr: Number of entries to clear.
+ * @full: Whether we are clearing a full mm.
+ *
+ * May be overridden by the architecture; otherwise, implemented as a simple
+ * loop over pte_clear_not_present_full().
+ *
+ * Context: The caller holds the page table lock. The PTEs are all not present.
+ * The PTEs are all in the same PMD.
+ */
+static inline void clear_not_present_full_ptes(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep, unsigned int nr, int full)
+{
+ for (;;) {
+ pte_clear_not_present_full(mm, addr, ptep, full);
+ if (--nr == 0)
+ break;
+ ptep++;
+ addr += PAGE_SIZE;
+ }
+}
+#endif
+
#ifndef __HAVE_ARCH_PTEP_CLEAR_FLUSH
extern pte_t ptep_clear_flush(struct vm_area_struct *vma,
unsigned long address,
@@ -1052,7 +1125,7 @@ static inline int arch_unmap_one(struct mm_struct *mm,
* prototypes must be defined in the arch-specific asm/pgtable.h file.
*/
#ifndef __HAVE_ARCH_PREPARE_TO_SWAP
-static inline int arch_prepare_to_swap(struct page *page)
+static inline int arch_prepare_to_swap(struct folio *folio)
{
return 0;
}
@@ -1079,7 +1152,7 @@ static inline void arch_swap_restore(swp_entry_t entry, struct folio *folio)
#endif
#ifndef __HAVE_ARCH_MOVE_PTE
-#define move_pte(pte, prot, old_addr, new_addr) (pte)
+#define move_pte(pte, old_addr, new_addr) (pte)
#endif
#ifndef pte_accessible
@@ -1770,11 +1843,25 @@ typedef unsigned int pgtbl_mod_mask;
#endif
/*
- * p?d_leaf() - true if this entry is a final mapping to a physical address.
- * This differs from p?d_huge() by the fact that they are always available (if
- * the architecture supports large pages at the appropriate level) even
- * if CONFIG_HUGETLB_PAGE is not defined.
- * Only meaningful when called on a valid entry.
+ * pXd_leaf() is the API to check whether a pgtable entry is a huge page
+ * mapping. It should work globally across all archs, without any
+ * dependency on CONFIG_* options. For architectures that do not support
+ * huge mappings on specific levels, below fallbacks will be used.
+ *
+ * A leaf pgtable entry should always imply the following:
+ *
+ * - It is a "present" entry. IOW, before using this API, please check it
+ * with pXd_present() first. NOTE: it may not always mean the "present
+ * bit" is set. For example, PROT_NONE entries are always "present".
+ *
+ * - It should _never_ be a swap entry of any type. Above "present" check
+ * should have guarded this, but let's be crystal clear on this.
+ *
+ * - It should contain a huge PFN, which points to a huge page larger than
+ * PAGE_SIZE of the platform. The PFN format isn't important here.
+ *
+ * - It should cover all kinds of huge mappings (e.g., pXd_trans_huge(),
+ * pXd_devmap(), or hugetlb mappings).
*/
#ifndef pgd_leaf
#define pgd_leaf(x) false
@@ -1806,6 +1893,20 @@ typedef unsigned int pgtbl_mod_mask;
#endif
/*
+ * We always define pmd_pfn for all archs as it's used in lots of generic
+ * code. Now it happens too for pud_pfn (and can happen for larger
+ * mappings too in the future; we're not there yet). Instead of defining
+ * it for all archs (like pmd_pfn), provide a fallback.
+ *
+ * Note that returning 0 here means any arch that didn't define this can
+ * get severely wrong when it hits a real pud leaf. It's arch's
+ * responsibility to properly define it when a huge pud is possible.
+ */
+#ifndef pud_pfn
+#define pud_pfn(x) 0
+#endif
+
+/*
* Some architectures have MMUs that are configurable or selectable at boot
* time. These lead to variable PTRS_PER_x. For statically allocated arrays it
* helps to have a static maximum value.
diff --git a/include/linux/profile.h b/include/linux/profile.h
index 11db1ec516e272..04ae5ebcb637aa 100644
--- a/include/linux/profile.h
+++ b/include/linux/profile.h
@@ -18,13 +18,8 @@ struct proc_dir_entry;
struct notifier_block;
#if defined(CONFIG_PROFILING) && defined(CONFIG_PROC_FS)
-void create_prof_cpu_mask(void);
int create_proc_profile(void);
#else
-static inline void create_prof_cpu_mask(void)
-{
-}
-
static inline int create_proc_profile(void)
{
return 0;
diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h
index 808f9d3ee54654..fd037c127bb071 100644
--- a/include/linux/ptr_ring.h
+++ b/include/linux/ptr_ring.h
@@ -464,11 +464,11 @@ static inline int ptr_ring_consume_batched_bh(struct ptr_ring *r,
/* Not all gfp_t flags (besides GFP_KERNEL) are allowed. See
* documentation for vmalloc for which of them are legal.
*/
-static inline void **__ptr_ring_init_queue_alloc(unsigned int size, gfp_t gfp)
+static inline void **__ptr_ring_init_queue_alloc_noprof(unsigned int size, gfp_t gfp)
{
if (size > KMALLOC_MAX_SIZE / sizeof(void *))
return NULL;
- return kvmalloc_array(size, sizeof(void *), gfp | __GFP_ZERO);
+ return kvmalloc_array_noprof(size, sizeof(void *), gfp | __GFP_ZERO);
}
static inline void __ptr_ring_set_size(struct ptr_ring *r, int size)
@@ -484,9 +484,9 @@ static inline void __ptr_ring_set_size(struct ptr_ring *r, int size)
r->batch = 1;
}
-static inline int ptr_ring_init(struct ptr_ring *r, int size, gfp_t gfp)
+static inline int ptr_ring_init_noprof(struct ptr_ring *r, int size, gfp_t gfp)
{
- r->queue = __ptr_ring_init_queue_alloc(size, gfp);
+ r->queue = __ptr_ring_init_queue_alloc_noprof(size, gfp);
if (!r->queue)
return -ENOMEM;
@@ -497,6 +497,7 @@ static inline int ptr_ring_init(struct ptr_ring *r, int size, gfp_t gfp)
return 0;
}
+#define ptr_ring_init(...) alloc_hooks(ptr_ring_init_noprof(__VA_ARGS__))
/*
* Return entries into ring. Destroy entries that don't fit.
@@ -587,11 +588,11 @@ static inline void **__ptr_ring_swap_queue(struct ptr_ring *r, void **queue,
* In particular if you consume ring in interrupt or BH context, you must
* disable interrupts/BH when doing so.
*/
-static inline int ptr_ring_resize(struct ptr_ring *r, int size, gfp_t gfp,
+static inline int ptr_ring_resize_noprof(struct ptr_ring *r, int size, gfp_t gfp,
void (*destroy)(void *))
{
unsigned long flags;
- void **queue = __ptr_ring_init_queue_alloc(size, gfp);
+ void **queue = __ptr_ring_init_queue_alloc_noprof(size, gfp);
void **old;
if (!queue)
@@ -609,6 +610,7 @@ static inline int ptr_ring_resize(struct ptr_ring *r, int size, gfp_t gfp,
return 0;
}
+#define ptr_ring_resize(...) alloc_hooks(ptr_ring_resize_noprof(__VA_ARGS__))
/*
* Note: producer lock is nested within consumer lock, so if you
@@ -616,21 +618,21 @@ static inline int ptr_ring_resize(struct ptr_ring *r, int size, gfp_t gfp,
* In particular if you consume ring in interrupt or BH context, you must
* disable interrupts/BH when doing so.
*/
-static inline int ptr_ring_resize_multiple(struct ptr_ring **rings,
- unsigned int nrings,
- int size,
- gfp_t gfp, void (*destroy)(void *))
+static inline int ptr_ring_resize_multiple_noprof(struct ptr_ring **rings,
+ unsigned int nrings,
+ int size,
+ gfp_t gfp, void (*destroy)(void *))
{
unsigned long flags;
void ***queues;
int i;
- queues = kmalloc_array(nrings, sizeof(*queues), gfp);
+ queues = kmalloc_array_noprof(nrings, sizeof(*queues), gfp);
if (!queues)
goto noqueues;
for (i = 0; i < nrings; ++i) {
- queues[i] = __ptr_ring_init_queue_alloc(size, gfp);
+ queues[i] = __ptr_ring_init_queue_alloc_noprof(size, gfp);
if (!queues[i])
goto nomem;
}
@@ -660,6 +662,8 @@ nomem:
noqueues:
return -ENOMEM;
}
+#define ptr_ring_resize_multiple(...) \
+ alloc_hooks(ptr_ring_resize_multiple_noprof(__VA_ARGS__))
static inline void ptr_ring_cleanup(struct ptr_ring *r, void (*destroy)(void *))
{
diff --git a/include/linux/randomize_kstack.h b/include/linux/randomize_kstack.h
index 5d868505a94e43..6d92b68efbf6c3 100644
--- a/include/linux/randomize_kstack.h
+++ b/include/linux/randomize_kstack.h
@@ -80,7 +80,7 @@ DECLARE_PER_CPU(u32, kstack_offset);
if (static_branch_maybe(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT, \
&randomize_kstack_offset)) { \
u32 offset = raw_cpu_read(kstack_offset); \
- offset ^= (rand); \
+ offset = ror32(offset, 5) ^ (rand); \
raw_cpu_write(kstack_offset, offset); \
} \
} while (0)
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index b743241cfb7caa..d470303b1bbbbe 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -1230,6 +1230,7 @@ int regmap_multi_reg_write_bypassed(struct regmap *map,
int regmap_raw_write_async(struct regmap *map, unsigned int reg,
const void *val, size_t val_len);
int regmap_read(struct regmap *map, unsigned int reg, unsigned int *val);
+int regmap_read_bypassed(struct regmap *map, unsigned int reg, unsigned int *val);
int regmap_raw_read(struct regmap *map, unsigned int reg,
void *val, size_t val_len);
int regmap_noinc_read(struct regmap *map, unsigned int reg,
@@ -1739,6 +1740,13 @@ static inline int regmap_read(struct regmap *map, unsigned int reg,
return -EINVAL;
}
+static inline int regmap_read_bypassed(struct regmap *map, unsigned int reg,
+ unsigned int *val)
+{
+ WARN_ONCE(1, "regmap API is disabled");
+ return -EINVAL;
+}
+
static inline int regmap_raw_read(struct regmap *map, unsigned int reg,
void *val, size_t val_len)
{
diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h
index 4660582a33022f..ed180ca419dad9 100644
--- a/include/linux/regulator/consumer.h
+++ b/include/linux/regulator/consumer.h
@@ -320,13 +320,13 @@ devm_regulator_get_exclusive(struct device *dev, const char *id)
static inline int devm_regulator_get_enable(struct device *dev, const char *id)
{
- return -ENODEV;
+ return 0;
}
static inline int devm_regulator_get_enable_optional(struct device *dev,
const char *id)
{
- return -ENODEV;
+ return 0;
}
static inline struct regulator *__must_check
diff --git a/include/linux/rhashtable-types.h b/include/linux/rhashtable-types.h
index b6f3797277ff83..015c8298bebc45 100644
--- a/include/linux/rhashtable-types.h
+++ b/include/linux/rhashtable-types.h
@@ -9,6 +9,7 @@
#ifndef _LINUX_RHASHTABLE_TYPES_H
#define _LINUX_RHASHTABLE_TYPES_H
+#include <linux/alloc_tag.h>
#include <linux/atomic.h>
#include <linux/compiler.h>
#include <linux/mutex.h>
@@ -88,6 +89,9 @@ struct rhashtable {
struct mutex mutex;
spinlock_t lock;
atomic_t nelems;
+#ifdef CONFIG_MEM_ALLOC_PROFILING
+ struct alloc_tag *alloc_tag;
+#endif
};
/**
@@ -127,9 +131,12 @@ struct rhashtable_iter {
bool end_of_table;
};
-int rhashtable_init(struct rhashtable *ht,
+int rhashtable_init_noprof(struct rhashtable *ht,
const struct rhashtable_params *params);
-int rhltable_init(struct rhltable *hlt,
+#define rhashtable_init(...) alloc_hooks(rhashtable_init_noprof(__VA_ARGS__))
+
+int rhltable_init_noprof(struct rhltable *hlt,
const struct rhashtable_params *params);
+#define rhltable_init(...) alloc_hooks(rhltable_init_noprof(__VA_ARGS__))
#endif /* _LINUX_RHASHTABLE_TYPES_H */
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index b7944a833668a7..7229b9baf20d87 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -273,6 +273,7 @@ static inline int hugetlb_try_dup_anon_rmap(struct folio *folio,
ClearPageAnonExclusive(&folio->page);
}
atomic_inc(&folio->_entire_mapcount);
+ atomic_inc(&folio->_large_mapcount);
return 0;
}
@@ -284,7 +285,7 @@ static inline int hugetlb_try_share_anon_rmap(struct folio *folio)
VM_WARN_ON_FOLIO(!PageAnonExclusive(&folio->page), folio);
/* Paired with the memory barrier in try_grab_folio(). */
- if (IS_ENABLED(CONFIG_HAVE_FAST_GUP))
+ if (IS_ENABLED(CONFIG_HAVE_GUP_FAST))
smp_mb();
if (unlikely(folio_maybe_dma_pinned(folio)))
@@ -295,7 +296,7 @@ static inline int hugetlb_try_share_anon_rmap(struct folio *folio)
* This is conceptually a smp_wmb() paired with the smp_rmb() in
* gup_must_unshare().
*/
- if (IS_ENABLED(CONFIG_HAVE_FAST_GUP))
+ if (IS_ENABLED(CONFIG_HAVE_GUP_FAST))
smp_mb__after_atomic();
return 0;
}
@@ -306,6 +307,7 @@ static inline void hugetlb_add_file_rmap(struct folio *folio)
VM_WARN_ON_FOLIO(folio_test_anon(folio), folio);
atomic_inc(&folio->_entire_mapcount);
+ atomic_inc(&folio->_large_mapcount);
}
static inline void hugetlb_remove_rmap(struct folio *folio)
@@ -313,21 +315,31 @@ static inline void hugetlb_remove_rmap(struct folio *folio)
VM_WARN_ON_FOLIO(!folio_test_hugetlb(folio), folio);
atomic_dec(&folio->_entire_mapcount);
+ atomic_dec(&folio->_large_mapcount);
}
static __always_inline void __folio_dup_file_rmap(struct folio *folio,
struct page *page, int nr_pages, enum rmap_level level)
{
+ const int orig_nr_pages = nr_pages;
+
__folio_rmap_sanity_checks(folio, page, nr_pages, level);
switch (level) {
case RMAP_LEVEL_PTE:
+ if (!folio_test_large(folio)) {
+ atomic_inc(&page->_mapcount);
+ break;
+ }
+
do {
atomic_inc(&page->_mapcount);
} while (page++, --nr_pages > 0);
+ atomic_add(orig_nr_pages, &folio->_large_mapcount);
break;
case RMAP_LEVEL_PMD:
atomic_inc(&folio->_entire_mapcount);
+ atomic_inc(&folio->_large_mapcount);
break;
}
}
@@ -347,8 +359,12 @@ static inline void folio_dup_file_rmap_ptes(struct folio *folio,
{
__folio_dup_file_rmap(folio, page, nr_pages, RMAP_LEVEL_PTE);
}
-#define folio_dup_file_rmap_pte(folio, page) \
- folio_dup_file_rmap_ptes(folio, page, 1)
+
+static __always_inline void folio_dup_file_rmap_pte(struct folio *folio,
+ struct page *page)
+{
+ __folio_dup_file_rmap(folio, page, 1, RMAP_LEVEL_PTE);
+}
/**
* folio_dup_file_rmap_pmd - duplicate a PMD mapping of a page range of a folio
@@ -373,6 +389,7 @@ static __always_inline int __folio_try_dup_anon_rmap(struct folio *folio,
struct page *page, int nr_pages, struct vm_area_struct *src_vma,
enum rmap_level level)
{
+ const int orig_nr_pages = nr_pages;
bool maybe_pinned;
int i;
@@ -401,11 +418,20 @@ static __always_inline int __folio_try_dup_anon_rmap(struct folio *folio,
if (PageAnonExclusive(page + i))
return -EBUSY;
}
+
+ if (!folio_test_large(folio)) {
+ if (PageAnonExclusive(page))
+ ClearPageAnonExclusive(page);
+ atomic_inc(&page->_mapcount);
+ break;
+ }
+
do {
if (PageAnonExclusive(page))
ClearPageAnonExclusive(page);
atomic_inc(&page->_mapcount);
} while (page++, --nr_pages > 0);
+ atomic_add(orig_nr_pages, &folio->_large_mapcount);
break;
case RMAP_LEVEL_PMD:
if (PageAnonExclusive(page)) {
@@ -414,6 +440,7 @@ static __always_inline int __folio_try_dup_anon_rmap(struct folio *folio,
ClearPageAnonExclusive(page);
}
atomic_inc(&folio->_entire_mapcount);
+ atomic_inc(&folio->_large_mapcount);
break;
}
return 0;
@@ -448,8 +475,13 @@ static inline int folio_try_dup_anon_rmap_ptes(struct folio *folio,
return __folio_try_dup_anon_rmap(folio, page, nr_pages, src_vma,
RMAP_LEVEL_PTE);
}
-#define folio_try_dup_anon_rmap_pte(folio, page, vma) \
- folio_try_dup_anon_rmap_ptes(folio, page, 1, vma)
+
+static __always_inline int folio_try_dup_anon_rmap_pte(struct folio *folio,
+ struct page *page, struct vm_area_struct *src_vma)
+{
+ return __folio_try_dup_anon_rmap(folio, page, 1, src_vma,
+ RMAP_LEVEL_PTE);
+}
/**
* folio_try_dup_anon_rmap_pmd - try duplicating a PMD mapping of a page range
@@ -541,7 +573,7 @@ static __always_inline int __folio_try_share_anon_rmap(struct folio *folio,
*/
/* Paired with the memory barrier in try_grab_folio(). */
- if (IS_ENABLED(CONFIG_HAVE_FAST_GUP))
+ if (IS_ENABLED(CONFIG_HAVE_GUP_FAST))
smp_mb();
if (unlikely(folio_maybe_dma_pinned(folio)))
@@ -552,7 +584,7 @@ static __always_inline int __folio_try_share_anon_rmap(struct folio *folio,
* This is conceptually a smp_wmb() paired with the smp_rmb() in
* gup_must_unshare().
*/
- if (IS_ENABLED(CONFIG_HAVE_FAST_GUP))
+ if (IS_ENABLED(CONFIG_HAVE_GUP_FAST))
smp_mb__after_atomic();
return 0;
}
@@ -698,7 +730,7 @@ int pfn_mkclean_range(unsigned long pfn, unsigned long nr_pages, pgoff_t pgoff,
void remove_migration_ptes(struct folio *src, struct folio *dst, bool locked);
-int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma);
+unsigned long page_mapped_in_vma(struct page *page, struct vm_area_struct *vma);
/*
* rmap_walk_control: To control rmap traversing for specific needs
diff --git a/include/linux/rwbase_rt.h b/include/linux/rwbase_rt.h
index 29c4e4f243e47d..f2394a409c9d5e 100644
--- a/include/linux/rwbase_rt.h
+++ b/include/linux/rwbase_rt.h
@@ -31,9 +31,9 @@ static __always_inline bool rw_base_is_locked(const struct rwbase_rt *rwb)
return atomic_read(&rwb->readers) != READER_BIAS;
}
-static inline void rw_base_assert_held_write(const struct rwbase_rt *rwb)
+static __always_inline bool rw_base_is_write_locked(const struct rwbase_rt *rwb)
{
- WARN_ON(atomic_read(&rwb->readers) != WRITER_BIAS);
+ return atomic_read(&rwb->readers) == WRITER_BIAS;
}
static __always_inline bool rw_base_is_contended(const struct rwbase_rt *rwb)
diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index 4f1c18992f768f..c8b543d428b0a8 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -167,14 +167,14 @@ static __always_inline int rwsem_is_locked(const struct rw_semaphore *sem)
return rw_base_is_locked(&sem->rwbase);
}
-static inline void rwsem_assert_held_nolockdep(const struct rw_semaphore *sem)
+static __always_inline void rwsem_assert_held_nolockdep(const struct rw_semaphore *sem)
{
WARN_ON(!rwsem_is_locked(sem));
}
-static inline void rwsem_assert_held_write_nolockdep(const struct rw_semaphore *sem)
+static __always_inline void rwsem_assert_held_write_nolockdep(const struct rw_semaphore *sem)
{
- rw_base_assert_held_write(sem);
+ WARN_ON(!rw_base_is_write_locked(&sem->rwbase));
}
static __always_inline int rwsem_is_contended(struct rw_semaphore *sem)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3c2abbc587b49c..4118b3f959c324 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -770,6 +770,10 @@ struct task_struct {
unsigned int flags;
unsigned int ptrace;
+#ifdef CONFIG_MEM_ALLOC_PROFILING
+ struct alloc_tag *alloc_tag;
+#endif
+
#ifdef CONFIG_SMP
int on_cpu;
struct __call_single_node wake_entry;
@@ -810,6 +814,7 @@ struct task_struct {
struct task_group *sched_task_group;
#endif
+
#ifdef CONFIG_UCLAMP_TASK
/*
* Clamp values requested for a scheduling entity.
@@ -2187,4 +2192,23 @@ static inline int sched_core_idle_cpu(int cpu) { return idle_cpu(cpu); }
extern void sched_set_stop_task(int cpu, struct task_struct *stop);
+#ifdef CONFIG_MEM_ALLOC_PROFILING
+static inline struct alloc_tag *alloc_tag_save(struct alloc_tag *tag)
+{
+ swap(current->alloc_tag, tag);
+ return tag;
+}
+
+static inline void alloc_tag_restore(struct alloc_tag *tag, struct alloc_tag *old)
+{
+#ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG
+ WARN(current->alloc_tag != tag, "current->alloc_tag was changed:\n");
+#endif
+ current->alloc_tag = old;
+}
+#else
+#define alloc_tag_save(_tag) NULL
+#define alloc_tag_restore(_tag, _old) do {} while (0)
+#endif
+
#endif
diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h
index 02f5090ffea29c..e62ff805cfc950 100644
--- a/include/linux/sched/coredump.h
+++ b/include/linux/sched/coredump.h
@@ -92,9 +92,12 @@ static inline int get_dumpable(struct mm_struct *mm)
#define MMF_VM_MERGE_ANY 30
#define MMF_VM_MERGE_ANY_MASK (1 << MMF_VM_MERGE_ANY)
+#define MMF_TOPDOWN 31 /* mm searches top down by default */
+#define MMF_TOPDOWN_MASK (1 << MMF_TOPDOWN)
+
#define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\
MMF_DISABLE_THP_MASK | MMF_HAS_MDWE_MASK |\
- MMF_VM_MERGE_ANY_MASK)
+ MMF_VM_MERGE_ANY_MASK | MMF_TOPDOWN_MASK)
static inline unsigned long mmf_init_flags(unsigned long flags)
{
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index b6543f9d78d6b8..91546493c43da2 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -8,6 +8,7 @@
#include <linux/mm_types.h>
#include <linux/gfp.h>
#include <linux/sync_core.h>
+#include <linux/sched/coredump.h>
/*
* Routines for handling mm_structs
@@ -186,6 +187,27 @@ arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
unsigned long len, unsigned long pgoff,
unsigned long flags);
+unsigned long mm_get_unmapped_area(struct mm_struct *mm, struct file *filp,
+ unsigned long addr, unsigned long len,
+ unsigned long pgoff, unsigned long flags);
+
+unsigned long
+arch_get_unmapped_area_vmflags(struct file *filp, unsigned long addr,
+ unsigned long len, unsigned long pgoff,
+ unsigned long flags, vm_flags_t vm_flags);
+unsigned long
+arch_get_unmapped_area_topdown_vmflags(struct file *filp, unsigned long addr,
+ unsigned long len, unsigned long pgoff,
+ unsigned long flags, vm_flags_t);
+
+unsigned long mm_get_unmapped_area_vmflags(struct mm_struct *mm,
+ struct file *filp,
+ unsigned long addr,
+ unsigned long len,
+ unsigned long pgoff,
+ unsigned long flags,
+ vm_flags_t vm_flags);
+
unsigned long
generic_get_unmapped_area(struct file *filp, unsigned long addr,
unsigned long len, unsigned long pgoff,
diff --git a/include/linux/secretmem.h b/include/linux/secretmem.h
index 35f3a4a8ceb1e3..e918f96881f569 100644
--- a/include/linux/secretmem.h
+++ b/include/linux/secretmem.h
@@ -6,25 +6,8 @@
extern const struct address_space_operations secretmem_aops;
-static inline bool folio_is_secretmem(struct folio *folio)
+static inline bool secretmem_mapping(struct address_space *mapping)
{
- struct address_space *mapping;
-
- /*
- * Using folio_mapping() is quite slow because of the actual call
- * instruction.
- * We know that secretmem pages are not compound and LRU so we can
- * save a couple of cycles here.
- */
- if (folio_test_large(folio) || !folio_test_lru(folio))
- return false;
-
- mapping = (struct address_space *)
- ((unsigned long)folio->mapping & ~PAGE_MAPPING_FLAGS);
-
- if (!mapping || mapping != folio->mapping)
- return false;
-
return mapping->a_ops == &secretmem_aops;
}
@@ -38,7 +21,7 @@ static inline bool vma_is_secretmem(struct vm_area_struct *vma)
return false;
}
-static inline bool folio_is_secretmem(struct folio *folio)
+static inline bool secretmem_mapping(struct address_space *mapping)
{
return false;
}
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index a4c15db2f5e540..3fb18f7eb73eaf 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -110,8 +110,17 @@ extern struct page *shmem_read_mapping_page_gfp(struct address_space *mapping,
extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end);
int shmem_unuse(unsigned int type);
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
extern bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force,
struct mm_struct *mm, unsigned long vm_flags);
+#else
+static __always_inline bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force,
+ struct mm_struct *mm, unsigned long vm_flags)
+{
+ return false;
+}
+#endif
+
#ifdef CONFIG_SHMEM
extern unsigned long shmem_swap_usage(struct vm_area_struct *vma);
#else
diff --git a/include/linux/skb_array.h b/include/linux/skb_array.h
index e2d45b7cb61980..926496c9cc9c3b 100644
--- a/include/linux/skb_array.h
+++ b/include/linux/skb_array.h
@@ -177,10 +177,11 @@ static inline int skb_array_peek_len_any(struct skb_array *a)
return PTR_RING_PEEK_CALL_ANY(&a->ring, __skb_array_len_with_tag);
}
-static inline int skb_array_init(struct skb_array *a, int size, gfp_t gfp)
+static inline int skb_array_init_noprof(struct skb_array *a, int size, gfp_t gfp)
{
- return ptr_ring_init(&a->ring, size, gfp);
+ return ptr_ring_init_noprof(&a->ring, size, gfp);
}
+#define skb_array_init(...) alloc_hooks(skb_array_init_noprof(__VA_ARGS__))
static void __skb_array_destroy_skb(void *ptr)
{
@@ -198,15 +199,17 @@ static inline int skb_array_resize(struct skb_array *a, int size, gfp_t gfp)
return ptr_ring_resize(&a->ring, size, gfp, __skb_array_destroy_skb);
}
-static inline int skb_array_resize_multiple(struct skb_array **rings,
- int nrings, unsigned int size,
- gfp_t gfp)
+static inline int skb_array_resize_multiple_noprof(struct skb_array **rings,
+ int nrings, unsigned int size,
+ gfp_t gfp)
{
BUILD_BUG_ON(offsetof(struct skb_array, ring));
- return ptr_ring_resize_multiple((struct ptr_ring **)rings,
- nrings, size, gfp,
- __skb_array_destroy_skb);
+ return ptr_ring_resize_multiple_noprof((struct ptr_ring **)rings,
+ nrings, size, gfp,
+ __skb_array_destroy_skb);
}
+#define skb_array_resize_multiple(...) \
+ alloc_hooks(skb_array_resize_multiple_noprof(__VA_ARGS__))
static inline void skb_array_cleanup(struct skb_array *a)
{
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 0c7c67b3a87b23..b72920c9887b73 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -753,8 +753,6 @@ typedef unsigned char *sk_buff_data_t;
* @list: queue head
* @ll_node: anchor in an llist (eg socket defer_list)
* @sk: Socket we are owned by
- * @ip_defrag_offset: (aka @sk) alternate use of @sk, used in
- * fragmentation management
* @dev: Device we arrived on/are leaving by
* @dev_scratch: (aka @dev) alternate use of @dev when @dev would be %NULL
* @cb: Control buffer. Free for use by every layer. Put private vars here
@@ -875,10 +873,7 @@ struct sk_buff {
struct llist_node ll_node;
};
- union {
- struct sock *sk;
- int ip_defrag_offset;
- };
+ struct sock *sk;
union {
ktime_t tstamp;
@@ -3376,7 +3371,7 @@ void __napi_kfree_skb(struct sk_buff *skb, enum skb_drop_reason reason);
*
* %NULL is returned if there is no free memory.
*/
-static inline struct page *__dev_alloc_pages(gfp_t gfp_mask,
+static inline struct page *__dev_alloc_pages_noprof(gfp_t gfp_mask,
unsigned int order)
{
/* This piece of code contains several assumptions.
@@ -3389,13 +3384,11 @@ static inline struct page *__dev_alloc_pages(gfp_t gfp_mask,
*/
gfp_mask |= __GFP_COMP | __GFP_MEMALLOC;
- return alloc_pages_node(NUMA_NO_NODE, gfp_mask, order);
+ return alloc_pages_node_noprof(NUMA_NO_NODE, gfp_mask, order);
}
+#define __dev_alloc_pages(...) alloc_hooks(__dev_alloc_pages_noprof(__VA_ARGS__))
-static inline struct page *dev_alloc_pages(unsigned int order)
-{
- return __dev_alloc_pages(GFP_ATOMIC | __GFP_NOWARN, order);
-}
+#define dev_alloc_pages(_order) __dev_alloc_pages(GFP_ATOMIC | __GFP_NOWARN, _order)
/**
* __dev_alloc_page - allocate a page for network Rx
@@ -3405,15 +3398,13 @@ static inline struct page *dev_alloc_pages(unsigned int order)
*
* %NULL is returned if there is no free memory.
*/
-static inline struct page *__dev_alloc_page(gfp_t gfp_mask)
+static inline struct page *__dev_alloc_page_noprof(gfp_t gfp_mask)
{
- return __dev_alloc_pages(gfp_mask, 0);
+ return __dev_alloc_pages_noprof(gfp_mask, 0);
}
+#define __dev_alloc_page(...) alloc_hooks(__dev_alloc_page_noprof(__VA_ARGS__))
-static inline struct page *dev_alloc_page(void)
-{
- return dev_alloc_pages(0);
-}
+#define dev_alloc_page() dev_alloc_pages(0)
/**
* dev_page_is_reusable - check whether a page can be reused for network Rx
diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index e65ec3fd27998a..df848425e05414 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -410,11 +410,9 @@ void sk_psock_stop_verdict(struct sock *sk, struct sk_psock *psock);
int sk_psock_msg_verdict(struct sock *sk, struct sk_psock *psock,
struct sk_msg *msg);
-static inline struct sk_psock_link *sk_psock_init_link(void)
-{
- return kzalloc(sizeof(struct sk_psock_link),
- GFP_ATOMIC | __GFP_NOWARN);
-}
+#define sk_psock_init_link() \
+ ((struct sk_psock_link *)kzalloc(sizeof(struct sk_psock_link), \
+ GFP_ATOMIC | __GFP_NOWARN))
static inline void sk_psock_free_link(struct sk_psock_link *link)
{
@@ -461,10 +459,12 @@ static inline void sk_psock_put(struct sock *sk, struct sk_psock *psock)
static inline void sk_psock_data_ready(struct sock *sk, struct sk_psock *psock)
{
+ read_lock_bh(&sk->sk_callback_lock);
if (psock->saved_data_ready)
psock->saved_data_ready(sk);
else
sk->sk_data_ready(sk);
+ read_unlock_bh(&sk->sk_callback_lock);
}
static inline void psock_set_prog(struct bpf_prog **pprog,
diff --git a/include/linux/slab.h b/include/linux/slab.h
index e53cbfa1832558..4cc37ef22aaed1 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -56,6 +56,9 @@ enum _slab_flag_bits {
#endif
_SLAB_OBJECT_POISON,
_SLAB_CMPXCHG_DOUBLE,
+#ifdef CONFIG_SLAB_OBJ_EXT
+ _SLAB_NO_OBJ_EXT,
+#endif
_SLAB_FLAGS_LAST_BIT
};
@@ -202,6 +205,13 @@ enum _slab_flag_bits {
#endif
#define SLAB_TEMPORARY SLAB_RECLAIM_ACCOUNT /* Objects are short-lived */
+/* Slab created using create_boot_cache */
+#ifdef CONFIG_SLAB_OBJ_EXT
+#define SLAB_NO_OBJ_EXT __SLAB_FLAG_BIT(_SLAB_NO_OBJ_EXT)
+#else
+#define SLAB_NO_OBJ_EXT __SLAB_FLAG_UNUSED
+#endif
+
/*
* ZERO_SIZE_PTR will be returned for zero sized kmalloc requests.
*
@@ -261,7 +271,10 @@ int kmem_cache_shrink(struct kmem_cache *s);
/*
* Common kmalloc functions provided by all allocators
*/
-void * __must_check krealloc(const void *objp, size_t new_size, gfp_t flags) __realloc_size(2);
+void * __must_check krealloc_noprof(const void *objp, size_t new_size,
+ gfp_t flags) __realloc_size(2);
+#define krealloc(...) alloc_hooks(krealloc_noprof(__VA_ARGS__))
+
void kfree(const void *objp);
void kfree_sensitive(const void *objp);
size_t __ksize(const void *objp);
@@ -513,7 +526,10 @@ static __always_inline unsigned int __kmalloc_index(size_t size,
static_assert(PAGE_SHIFT <= 20);
#define kmalloc_index(s) __kmalloc_index(s, true)
-void *__kmalloc(size_t size, gfp_t flags) __assume_kmalloc_alignment __alloc_size(1);
+#include <linux/alloc_tag.h>
+
+void *__kmalloc_noprof(size_t size, gfp_t flags) __assume_kmalloc_alignment __alloc_size(1);
+#define __kmalloc(...) alloc_hooks(__kmalloc_noprof(__VA_ARGS__))
/**
* kmem_cache_alloc - Allocate an object
@@ -525,9 +541,14 @@ void *__kmalloc(size_t size, gfp_t flags) __assume_kmalloc_alignment __alloc_siz
*
* Return: pointer to the new object or %NULL in case of error
*/
-void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags) __assume_slab_alignment __malloc;
-void *kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru,
- gfp_t gfpflags) __assume_slab_alignment __malloc;
+void *kmem_cache_alloc_noprof(struct kmem_cache *cachep,
+ gfp_t flags) __assume_slab_alignment __malloc;
+#define kmem_cache_alloc(...) alloc_hooks(kmem_cache_alloc_noprof(__VA_ARGS__))
+
+void *kmem_cache_alloc_lru_noprof(struct kmem_cache *s, struct list_lru *lru,
+ gfp_t gfpflags) __assume_slab_alignment __malloc;
+#define kmem_cache_alloc_lru(...) alloc_hooks(kmem_cache_alloc_lru_noprof(__VA_ARGS__))
+
void kmem_cache_free(struct kmem_cache *s, void *objp);
/*
@@ -538,29 +559,40 @@ void kmem_cache_free(struct kmem_cache *s, void *objp);
* Note that interrupts must be enabled when calling these functions.
*/
void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p);
-int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, void **p);
+
+int kmem_cache_alloc_bulk_noprof(struct kmem_cache *s, gfp_t flags, size_t size, void **p);
+#define kmem_cache_alloc_bulk(...) alloc_hooks(kmem_cache_alloc_bulk_noprof(__VA_ARGS__))
static __always_inline void kfree_bulk(size_t size, void **p)
{
kmem_cache_free_bulk(NULL, size, p);
}
-void *__kmalloc_node(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment
+void *__kmalloc_node_noprof(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment
__alloc_size(1);
-void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t flags, int node) __assume_slab_alignment
- __malloc;
+#define __kmalloc_node(...) alloc_hooks(__kmalloc_node_noprof(__VA_ARGS__))
-void *kmalloc_trace(struct kmem_cache *s, gfp_t flags, size_t size)
+void *kmem_cache_alloc_node_noprof(struct kmem_cache *s, gfp_t flags,
+ int node) __assume_slab_alignment __malloc;
+#define kmem_cache_alloc_node(...) alloc_hooks(kmem_cache_alloc_node_noprof(__VA_ARGS__))
+
+void *kmalloc_trace_noprof(struct kmem_cache *s, gfp_t flags, size_t size)
__assume_kmalloc_alignment __alloc_size(3);
-void *kmalloc_node_trace(struct kmem_cache *s, gfp_t gfpflags,
- int node, size_t size) __assume_kmalloc_alignment
+void *kmalloc_node_trace_noprof(struct kmem_cache *s, gfp_t gfpflags,
+ int node, size_t size) __assume_kmalloc_alignment
__alloc_size(4);
-void *kmalloc_large(size_t size, gfp_t flags) __assume_page_alignment
+#define kmalloc_trace(...) alloc_hooks(kmalloc_trace_noprof(__VA_ARGS__))
+
+#define kmalloc_node_trace(...) alloc_hooks(kmalloc_node_trace_noprof(__VA_ARGS__))
+
+void *kmalloc_large_noprof(size_t size, gfp_t flags) __assume_page_alignment
__alloc_size(1);
+#define kmalloc_large(...) alloc_hooks(kmalloc_large_noprof(__VA_ARGS__))
-void *kmalloc_large_node(size_t size, gfp_t flags, int node) __assume_page_alignment
+void *kmalloc_large_node_noprof(size_t size, gfp_t flags, int node) __assume_page_alignment
__alloc_size(1);
+#define kmalloc_large_node(...) alloc_hooks(kmalloc_large_node_noprof(__VA_ARGS__))
/**
* kmalloc - allocate kernel memory
@@ -616,37 +648,39 @@ void *kmalloc_large_node(size_t size, gfp_t flags, int node) __assume_page_align
* Try really hard to succeed the allocation but fail
* eventually.
*/
-static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags)
+static __always_inline __alloc_size(1) void *kmalloc_noprof(size_t size, gfp_t flags)
{
if (__builtin_constant_p(size) && size) {
unsigned int index;
if (size > KMALLOC_MAX_CACHE_SIZE)
- return kmalloc_large(size, flags);
+ return kmalloc_large_noprof(size, flags);
index = kmalloc_index(size);
- return kmalloc_trace(
+ return kmalloc_trace_noprof(
kmalloc_caches[kmalloc_type(flags, _RET_IP_)][index],
flags, size);
}
- return __kmalloc(size, flags);
+ return __kmalloc_noprof(size, flags);
}
+#define kmalloc(...) alloc_hooks(kmalloc_noprof(__VA_ARGS__))
-static __always_inline __alloc_size(1) void *kmalloc_node(size_t size, gfp_t flags, int node)
+static __always_inline __alloc_size(1) void *kmalloc_node_noprof(size_t size, gfp_t flags, int node)
{
if (__builtin_constant_p(size) && size) {
unsigned int index;
if (size > KMALLOC_MAX_CACHE_SIZE)
- return kmalloc_large_node(size, flags, node);
+ return kmalloc_large_node_noprof(size, flags, node);
index = kmalloc_index(size);
- return kmalloc_node_trace(
+ return kmalloc_node_trace_noprof(
kmalloc_caches[kmalloc_type(flags, _RET_IP_)][index],
flags, node, size);
}
- return __kmalloc_node(size, flags, node);
+ return __kmalloc_node_noprof(size, flags, node);
}
+#define kmalloc_node(...) alloc_hooks(kmalloc_node_noprof(__VA_ARGS__))
/**
* kmalloc_array - allocate memory for an array.
@@ -654,16 +688,17 @@ static __always_inline __alloc_size(1) void *kmalloc_node(size_t size, gfp_t fla
* @size: element size.
* @flags: the type of memory to allocate (see kmalloc).
*/
-static inline __alloc_size(1, 2) void *kmalloc_array(size_t n, size_t size, gfp_t flags)
+static inline __alloc_size(1, 2) void *kmalloc_array_noprof(size_t n, size_t size, gfp_t flags)
{
size_t bytes;
if (unlikely(check_mul_overflow(n, size, &bytes)))
return NULL;
if (__builtin_constant_p(n) && __builtin_constant_p(size))
- return kmalloc(bytes, flags);
- return __kmalloc(bytes, flags);
+ return kmalloc_noprof(bytes, flags);
+ return kmalloc_noprof(bytes, flags);
}
+#define kmalloc_array(...) alloc_hooks(kmalloc_array_noprof(__VA_ARGS__))
/**
* krealloc_array - reallocate memory for an array.
@@ -672,18 +707,19 @@ static inline __alloc_size(1, 2) void *kmalloc_array(size_t n, size_t size, gfp_
* @new_size: new size of a single member of the array
* @flags: the type of memory to allocate (see kmalloc)
*/
-static inline __realloc_size(2, 3) void * __must_check krealloc_array(void *p,
- size_t new_n,
- size_t new_size,
- gfp_t flags)
+static inline __realloc_size(2, 3) void * __must_check krealloc_array_noprof(void *p,
+ size_t new_n,
+ size_t new_size,
+ gfp_t flags)
{
size_t bytes;
if (unlikely(check_mul_overflow(new_n, new_size, &bytes)))
return NULL;
- return krealloc(p, bytes, flags);
+ return krealloc_noprof(p, bytes, flags);
}
+#define krealloc_array(...) alloc_hooks(krealloc_array_noprof(__VA_ARGS__))
/**
* kcalloc - allocate memory for an array. The memory is set to zero.
@@ -691,16 +727,12 @@ static inline __realloc_size(2, 3) void * __must_check krealloc_array(void *p,
* @size: element size.
* @flags: the type of memory to allocate (see kmalloc).
*/
-static inline __alloc_size(1, 2) void *kcalloc(size_t n, size_t size, gfp_t flags)
-{
- return kmalloc_array(n, size, flags | __GFP_ZERO);
-}
+#define kcalloc(n, size, flags) kmalloc_array(n, size, (flags) | __GFP_ZERO)
-void *__kmalloc_node_track_caller(size_t size, gfp_t flags, int node,
+void *kmalloc_node_track_caller_noprof(size_t size, gfp_t flags, int node,
unsigned long caller) __alloc_size(1);
-#define kmalloc_node_track_caller(size, flags, node) \
- __kmalloc_node_track_caller(size, flags, node, \
- _RET_IP_)
+#define kmalloc_node_track_caller(...) \
+ alloc_hooks(kmalloc_node_track_caller_noprof(__VA_ARGS__, _RET_IP_))
/*
* kmalloc_track_caller is a special version of kmalloc that records the
@@ -710,11 +742,12 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t flags, int node,
* allocator where we care about the real place the memory allocation
* request comes from.
*/
-#define kmalloc_track_caller(size, flags) \
- __kmalloc_node_track_caller(size, flags, \
- NUMA_NO_NODE, _RET_IP_)
+#define kmalloc_track_caller(...) kmalloc_node_track_caller(__VA_ARGS__, NUMA_NO_NODE)
-static inline __alloc_size(1, 2) void *kmalloc_array_node(size_t n, size_t size, gfp_t flags,
+#define kmalloc_track_caller_noprof(...) \
+ kmalloc_node_track_caller_noprof(__VA_ARGS__, NUMA_NO_NODE, _RET_IP_)
+
+static inline __alloc_size(1, 2) void *kmalloc_array_node_noprof(size_t n, size_t size, gfp_t flags,
int node)
{
size_t bytes;
@@ -722,75 +755,58 @@ static inline __alloc_size(1, 2) void *kmalloc_array_node(size_t n, size_t size,
if (unlikely(check_mul_overflow(n, size, &bytes)))
return NULL;
if (__builtin_constant_p(n) && __builtin_constant_p(size))
- return kmalloc_node(bytes, flags, node);
- return __kmalloc_node(bytes, flags, node);
+ return kmalloc_node_noprof(bytes, flags, node);
+ return __kmalloc_node_noprof(bytes, flags, node);
}
+#define kmalloc_array_node(...) alloc_hooks(kmalloc_array_node_noprof(__VA_ARGS__))
-static inline __alloc_size(1, 2) void *kcalloc_node(size_t n, size_t size, gfp_t flags, int node)
-{
- return kmalloc_array_node(n, size, flags | __GFP_ZERO, node);
-}
+#define kcalloc_node(_n, _size, _flags, _node) \
+ kmalloc_array_node(_n, _size, (_flags) | __GFP_ZERO, _node)
/*
* Shortcuts
*/
-static inline void *kmem_cache_zalloc(struct kmem_cache *k, gfp_t flags)
-{
- return kmem_cache_alloc(k, flags | __GFP_ZERO);
-}
+#define kmem_cache_zalloc(_k, _flags) kmem_cache_alloc(_k, (_flags)|__GFP_ZERO)
/**
* kzalloc - allocate memory. The memory is set to zero.
* @size: how many bytes of memory are required.
* @flags: the type of memory to allocate (see kmalloc).
*/
-static inline __alloc_size(1) void *kzalloc(size_t size, gfp_t flags)
+static inline __alloc_size(1) void *kzalloc_noprof(size_t size, gfp_t flags)
{
- return kmalloc(size, flags | __GFP_ZERO);
+ return kmalloc_noprof(size, flags | __GFP_ZERO);
}
+#define kzalloc(...) alloc_hooks(kzalloc_noprof(__VA_ARGS__))
+#define kzalloc_node(_size, _flags, _node) kmalloc_node(_size, (_flags)|__GFP_ZERO, _node)
-/**
- * kzalloc_node - allocate zeroed memory from a particular memory node.
- * @size: how many bytes of memory are required.
- * @flags: the type of memory to allocate (see kmalloc).
- * @node: memory node from which to allocate
- */
-static inline __alloc_size(1) void *kzalloc_node(size_t size, gfp_t flags, int node)
-{
- return kmalloc_node(size, flags | __GFP_ZERO, node);
-}
+extern void *kvmalloc_node_noprof(size_t size, gfp_t flags, int node) __alloc_size(1);
+#define kvmalloc_node(...) alloc_hooks(kvmalloc_node_noprof(__VA_ARGS__))
-extern void *kvmalloc_node(size_t size, gfp_t flags, int node) __alloc_size(1);
-static inline __alloc_size(1) void *kvmalloc(size_t size, gfp_t flags)
-{
- return kvmalloc_node(size, flags, NUMA_NO_NODE);
-}
-static inline __alloc_size(1) void *kvzalloc_node(size_t size, gfp_t flags, int node)
-{
- return kvmalloc_node(size, flags | __GFP_ZERO, node);
-}
-static inline __alloc_size(1) void *kvzalloc(size_t size, gfp_t flags)
-{
- return kvmalloc(size, flags | __GFP_ZERO);
-}
+#define kvmalloc(_size, _flags) kvmalloc_node(_size, _flags, NUMA_NO_NODE)
+#define kvmalloc_noprof(_size, _flags) kvmalloc_node_noprof(_size, _flags, NUMA_NO_NODE)
+#define kvzalloc(_size, _flags) kvmalloc(_size, _flags|__GFP_ZERO)
-static inline __alloc_size(1, 2) void *kvmalloc_array(size_t n, size_t size, gfp_t flags)
+#define kvzalloc_node(_size, _flags, _node) kvmalloc_node(_size, _flags|__GFP_ZERO, _node)
+
+static inline __alloc_size(1, 2) void *kvmalloc_array_noprof(size_t n, size_t size, gfp_t flags)
{
size_t bytes;
if (unlikely(check_mul_overflow(n, size, &bytes)))
return NULL;
- return kvmalloc(bytes, flags);
+ return kvmalloc_node_noprof(bytes, flags, NUMA_NO_NODE);
}
-static inline __alloc_size(1, 2) void *kvcalloc(size_t n, size_t size, gfp_t flags)
-{
- return kvmalloc_array(n, size, flags | __GFP_ZERO);
-}
+#define kvmalloc_array(...) alloc_hooks(kvmalloc_array_noprof(__VA_ARGS__))
+#define kvcalloc(_n, _size, _flags) kvmalloc_array(_n, _size, _flags|__GFP_ZERO)
+#define kvcalloc_noprof(_n, _size, _flags) kvmalloc_array_noprof(_n, _size, _flags|__GFP_ZERO)
-extern void *kvrealloc(const void *p, size_t oldsize, size_t newsize, gfp_t flags)
+extern void *kvrealloc_noprof(const void *p, size_t oldsize, size_t newsize, gfp_t flags)
__realloc_size(3);
+#define kvrealloc(...) alloc_hooks(kvrealloc_noprof(__VA_ARGS__))
+
extern void kvfree(const void *addr);
DEFINE_FREE(kvfree, void *, if (_T) kvfree(_T))
diff --git a/include/linux/sockptr.h b/include/linux/sockptr.h
index 307961b41541a6..fc5a206c40435f 100644
--- a/include/linux/sockptr.h
+++ b/include/linux/sockptr.h
@@ -50,11 +50,36 @@ static inline int copy_from_sockptr_offset(void *dst, sockptr_t src,
return 0;
}
+/* Deprecated.
+ * This is unsafe, unless caller checked user provided optlen.
+ * Prefer copy_safe_from_sockptr() instead.
+ */
static inline int copy_from_sockptr(void *dst, sockptr_t src, size_t size)
{
return copy_from_sockptr_offset(dst, src, 0, size);
}
+/**
+ * copy_safe_from_sockptr: copy a struct from sockptr
+ * @dst: Destination address, in kernel space. This buffer must be @ksize
+ * bytes long.
+ * @ksize: Size of @dst struct.
+ * @optval: Source address. (in user or kernel space)
+ * @optlen: Size of @optval data.
+ *
+ * Returns:
+ * * -EINVAL: @optlen < @ksize
+ * * -EFAULT: access to userspace failed.
+ * * 0 : @ksize bytes were copied
+ */
+static inline int copy_safe_from_sockptr(void *dst, size_t ksize,
+ sockptr_t optval, unsigned int optlen)
+{
+ if (optlen < ksize)
+ return -EINVAL;
+ return copy_from_sockptr(dst, optval, ksize);
+}
+
static inline int copy_struct_from_sockptr(void *dst, size_t ksize,
sockptr_t src, size_t usize)
{
@@ -92,9 +117,9 @@ static inline int copy_to_sockptr(sockptr_t dst, const void *src, size_t size)
return copy_to_sockptr_offset(dst, 0, src, size);
}
-static inline void *memdup_sockptr(sockptr_t src, size_t len)
+static inline void *memdup_sockptr_noprof(sockptr_t src, size_t len)
{
- void *p = kmalloc_track_caller(len, GFP_USER | __GFP_NOWARN);
+ void *p = kmalloc_track_caller_noprof(len, GFP_USER | __GFP_NOWARN);
if (!p)
return ERR_PTR(-ENOMEM);
@@ -104,10 +129,11 @@ static inline void *memdup_sockptr(sockptr_t src, size_t len)
}
return p;
}
+#define memdup_sockptr(...) alloc_hooks(memdup_sockptr_noprof(__VA_ARGS__))
-static inline void *memdup_sockptr_nul(sockptr_t src, size_t len)
+static inline void *memdup_sockptr_nul_noprof(sockptr_t src, size_t len)
{
- char *p = kmalloc_track_caller(len + 1, GFP_KERNEL);
+ char *p = kmalloc_track_caller_noprof(len + 1, GFP_KERNEL);
if (!p)
return ERR_PTR(-ENOMEM);
@@ -118,6 +144,7 @@ static inline void *memdup_sockptr_nul(sockptr_t src, size_t len)
p[len] = '\0';
return p;
}
+#define memdup_sockptr_nul(...) alloc_hooks(memdup_sockptr_nul_noprof(__VA_ARGS__))
static inline long strncpy_from_sockptr(char *dst, sockptr_t src, size_t count)
{
diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h
index 3c6caa5abc7c42..e9ec32fb97d4a7 100644
--- a/include/linux/stackdepot.h
+++ b/include/linux/stackdepot.h
@@ -44,10 +44,9 @@ typedef u32 depot_stack_handle_t;
union handle_parts {
depot_stack_handle_t handle;
struct {
- /* pool_index is offset by 1 */
- u32 pool_index : DEPOT_POOL_INDEX_BITS;
- u32 offset : DEPOT_OFFSET_BITS;
- u32 extra : STACK_DEPOT_EXTRA_BITS;
+ u32 pool_index_plus_1 : DEPOT_POOL_INDEX_BITS;
+ u32 offset : DEPOT_OFFSET_BITS;
+ u32 extra : STACK_DEPOT_EXTRA_BITS;
};
};
diff --git a/include/linux/string.h b/include/linux/string.h
index 9ba8b4597009c9..793c27ad7c0d21 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -282,7 +282,9 @@ extern void kfree_const(const void *x);
extern char *kstrdup(const char *s, gfp_t gfp) __malloc;
extern const char *kstrdup_const(const char *s, gfp_t gfp);
extern char *kstrndup(const char *s, size_t len, gfp_t gfp);
-extern void *kmemdup(const void *src, size_t len, gfp_t gfp) __realloc_size(2);
+extern void *kmemdup_noprof(const void *src, size_t len, gfp_t gfp) __realloc_size(2);
+#define kmemdup(...) alloc_hooks(kmemdup_noprof(__VA_ARGS__))
+
extern void *kvmemdup(const void *src, size_t len, gfp_t gfp) __realloc_size(2);
extern char *kmemdup_nul(const char *s, size_t len, gfp_t gfp);
extern void *kmemdup_array(const void *src, size_t element_size, size_t count, gfp_t gfp);
diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 24cd199dd6f3a9..d33bab33099ab0 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -210,7 +210,6 @@ struct svc_rdma_recv_ctxt {
*/
struct svc_rdma_write_info {
struct svcxprt_rdma *wi_rdma;
- struct list_head wi_list;
const struct svc_rdma_chunk *wi_chunk;
@@ -239,10 +238,7 @@ struct svc_rdma_send_ctxt {
struct ib_cqe sc_cqe;
struct xdr_buf sc_hdrbuf;
struct xdr_stream sc_stream;
-
- struct list_head sc_write_info_list;
struct svc_rdma_write_info sc_reply_info;
-
void *sc_xprt_buf;
int sc_page_count;
int sc_cur_sge_no;
@@ -274,14 +270,11 @@ extern void svc_rdma_cc_init(struct svcxprt_rdma *rdma,
extern void svc_rdma_cc_release(struct svcxprt_rdma *rdma,
struct svc_rdma_chunk_ctxt *cc,
enum dma_data_direction dir);
-extern void svc_rdma_write_chunk_release(struct svcxprt_rdma *rdma,
- struct svc_rdma_send_ctxt *ctxt);
extern void svc_rdma_reply_chunk_release(struct svcxprt_rdma *rdma,
struct svc_rdma_send_ctxt *ctxt);
-extern int svc_rdma_prepare_write_list(struct svcxprt_rdma *rdma,
- const struct svc_rdma_pcl *write_pcl,
- struct svc_rdma_send_ctxt *sctxt,
- const struct xdr_buf *xdr);
+extern int svc_rdma_send_write_list(struct svcxprt_rdma *rdma,
+ const struct svc_rdma_recv_ctxt *rctxt,
+ const struct xdr_buf *xdr);
extern int svc_rdma_prepare_reply_chunk(struct svcxprt_rdma *rdma,
const struct svc_rdma_pcl *write_pcl,
const struct svc_rdma_pcl *reply_pcl,
diff --git a/include/linux/swap.h b/include/linux/swap.h
index f53d608daa0131..11c53692f65fb7 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -259,7 +259,20 @@ struct swap_cluster_info {
};
#define CLUSTER_FLAG_FREE 1 /* This cluster is free */
#define CLUSTER_FLAG_NEXT_NULL 2 /* This cluster has no next cluster */
-#define CLUSTER_FLAG_HUGE 4 /* This cluster is backing a transparent huge page */
+
+/*
+ * The first page in the swap file is the swap header, which is always marked
+ * bad to prevent it from being allocated as an entry. This also prevents the
+ * cluster to which it belongs being marked free. Therefore 0 is safe to use as
+ * a sentinel to indicate next is not valid in percpu_cluster.
+ */
+#define SWAP_NEXT_INVALID 0
+
+#ifdef CONFIG_THP_SWAP
+#define SWAP_NR_ORDERS (PMD_ORDER + 1)
+#else
+#define SWAP_NR_ORDERS 1
+#endif
/*
* We assign a cluster to each CPU, so each CPU can allocate swap entry from
@@ -267,8 +280,7 @@ struct swap_cluster_info {
* throughput.
*/
struct percpu_cluster {
- struct swap_cluster_info index; /* Current cluster index */
- unsigned int next; /* Likely next allocation offset */
+ unsigned int next[SWAP_NR_ORDERS]; /* Likely next allocation offset */
};
struct swap_cluster_list {
@@ -394,10 +406,13 @@ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
#define MEMCG_RECLAIM_MAY_SWAP (1 << 1)
#define MEMCG_RECLAIM_PROACTIVE (1 << 2)
+#define MIN_SWAPPINESS 0
+#define MAX_SWAPPINESS 200
extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
unsigned long nr_pages,
gfp_t gfp_mask,
- unsigned int reclaim_options);
+ unsigned int reclaim_options,
+ int *swappiness);
extern unsigned long mem_cgroup_shrink_node(struct mem_cgroup *mem,
gfp_t gfp_mask, bool noswap,
pg_data_t *pgdat,
@@ -462,14 +477,14 @@ swp_entry_t folio_alloc_swap(struct folio *folio);
bool folio_free_swap(struct folio *folio);
void put_swap_folio(struct folio *folio, swp_entry_t entry);
extern swp_entry_t get_swap_page_of_type(int);
-extern int get_swap_pages(int n, swp_entry_t swp_entries[], int entry_size);
+extern int get_swap_pages(int n, swp_entry_t swp_entries[], int order);
extern int add_swap_count_continuation(swp_entry_t, gfp_t);
extern void swap_shmem_alloc(swp_entry_t);
extern int swap_duplicate(swp_entry_t);
extern int swapcache_prepare(swp_entry_t);
extern void swap_free(swp_entry_t);
extern void swapcache_free_entries(swp_entry_t *entries, int n);
-extern int free_swap_and_cache(swp_entry_t);
+extern void free_swap_and_cache_nr(swp_entry_t entry, int nr);
int swap_type_of(dev_t device, sector_t offset);
int find_first_swap(dev_t *device);
extern unsigned int count_swap_pages(int, int);
@@ -518,8 +533,9 @@ static inline void put_swap_device(struct swap_info_struct *si)
#define free_pages_and_swap_cache(pages, nr) \
release_pages((pages), (nr));
-/* used to sanity check ptes in zap_pte_range when CONFIG_SWAP=0 */
-#define free_swap_and_cache(e) is_pfn_swap_entry(e)
+static inline void free_swap_and_cache_nr(swp_entry_t entry, int nr)
+{
+}
static inline void free_swap_cache(struct folio *folio)
{
@@ -587,14 +603,10 @@ static inline int add_swap_extent(struct swap_info_struct *sis,
}
#endif /* CONFIG_SWAP */
-#ifdef CONFIG_THP_SWAP
-extern int split_swap_cluster(swp_entry_t entry);
-#else
-static inline int split_swap_cluster(swp_entry_t entry)
+static inline void free_swap_and_cache(swp_entry_t entry)
{
- return 0;
+ free_swap_and_cache_nr(entry, 1);
}
-#endif
#ifdef CONFIG_MEMCG
static inline int mem_cgroup_swappiness(struct mem_cgroup *memcg)
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index 48b700ba1d188a..a5c560a2f8c258 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -390,6 +390,35 @@ static inline bool is_migration_entry_dirty(swp_entry_t entry)
}
#endif /* CONFIG_MIGRATION */
+#ifdef CONFIG_MEMORY_FAILURE
+
+/*
+ * Support for hardware poisoned pages
+ */
+static inline swp_entry_t make_hwpoison_entry(struct page *page)
+{
+ BUG_ON(!PageLocked(page));
+ return swp_entry(SWP_HWPOISON, page_to_pfn(page));
+}
+
+static inline int is_hwpoison_entry(swp_entry_t entry)
+{
+ return swp_type(entry) == SWP_HWPOISON;
+}
+
+#else
+
+static inline swp_entry_t make_hwpoison_entry(struct page *page)
+{
+ return swp_entry(0, 0);
+}
+
+static inline int is_hwpoison_entry(swp_entry_t swp)
+{
+ return 0;
+}
+#endif
+
typedef unsigned long pte_marker;
#define PTE_MARKER_UFFD_WP BIT(0)
@@ -483,8 +512,9 @@ static inline struct folio *pfn_swap_entry_folio(swp_entry_t entry)
/*
* A pfn swap entry is a special type of swap entry that always has a pfn stored
- * in the swap offset. They are used to represent unaddressable device memory
- * and to restrict access to a page undergoing migration.
+ * in the swap offset. They can either be used to represent unaddressable device
+ * memory, to restrict access to a page undergoing migration or to represent a
+ * pfn which has been hwpoisoned and unmapped.
*/
static inline bool is_pfn_swap_entry(swp_entry_t entry)
{
@@ -492,7 +522,7 @@ static inline bool is_pfn_swap_entry(swp_entry_t entry)
BUILD_BUG_ON(SWP_TYPE_SHIFT < SWP_PFN_BITS);
return is_migration_entry(entry) || is_device_private_entry(entry) ||
- is_device_exclusive_entry(entry);
+ is_device_exclusive_entry(entry) || is_hwpoison_entry(entry);
}
struct page_vma_mapped_walk;
@@ -561,35 +591,6 @@ static inline int is_pmd_migration_entry(pmd_t pmd)
}
#endif /* CONFIG_ARCH_ENABLE_THP_MIGRATION */
-#ifdef CONFIG_MEMORY_FAILURE
-
-/*
- * Support for hardware poisoned pages
- */
-static inline swp_entry_t make_hwpoison_entry(struct page *page)
-{
- BUG_ON(!PageLocked(page));
- return swp_entry(SWP_HWPOISON, page_to_pfn(page));
-}
-
-static inline int is_hwpoison_entry(swp_entry_t entry)
-{
- return swp_type(entry) == SWP_HWPOISON;
-}
-
-#else
-
-static inline swp_entry_t make_hwpoison_entry(struct page *page)
-{
- return swp_entry(0, 0);
-}
-
-static inline int is_hwpoison_entry(swp_entry_t swp)
-{
- return 0;
-}
-#endif
-
static inline int non_swap_entry(swp_entry_t entry)
{
return swp_type(entry) >= MAX_SWAPFILES;
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index e619ac10cd234c..9104952d323d1d 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -821,6 +821,7 @@ asmlinkage long sys_process_mrelease(int pidfd, unsigned int flags);
asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size,
unsigned long prot, unsigned long pgoff,
unsigned long flags);
+asmlinkage long sys_mseal(unsigned long start, size_t len, unsigned long flags);
asmlinkage long sys_mbind(unsigned long start, unsigned long len,
unsigned long mode,
const unsigned long __user *nmask,
diff --git a/include/linux/timecounter.h b/include/linux/timecounter.h
index c6540ceea14303..0982d1d52b24d9 100644
--- a/include/linux/timecounter.h
+++ b/include/linux/timecounter.h
@@ -22,7 +22,7 @@
*
* @read: returns the current cycle value
* @mask: bitmask for two's complement
- * subtraction of non 64 bit counters,
+ * subtraction of non-64-bit counters,
* see CYCLECOUNTER_MASK() helper macro
* @mult: cycle to nanosecond multiplier
* @shift: cycle to nanosecond divisor (power of two)
@@ -35,7 +35,7 @@ struct cyclecounter {
};
/**
- * struct timecounter - layer above a %struct cyclecounter which counts nanoseconds
+ * struct timecounter - layer above a &struct cyclecounter which counts nanoseconds
* Contains the state needed by timecounter_read() to detect
* cycle counter wrap around. Initialize with
* timecounter_init(). Also used to convert cycle counts into the
@@ -66,6 +66,8 @@ struct timecounter {
* @cycles: Cycles
* @mask: bit mask for maintaining the 'frac' field
* @frac: pointer to storage for the fractional nanoseconds.
+ *
+ * Returns: cycle counter cycles converted to nanoseconds
*/
static inline u64 cyclecounter_cyc2ns(const struct cyclecounter *cc,
u64 cycles, u64 mask, u64 *frac)
@@ -79,6 +81,7 @@ static inline u64 cyclecounter_cyc2ns(const struct cyclecounter *cc,
/**
* timecounter_adjtime - Shifts the time of the clock.
+ * @tc: The &struct timecounter to adjust
* @delta: Desired change in nanoseconds.
*/
static inline void timecounter_adjtime(struct timecounter *tc, s64 delta)
@@ -107,6 +110,8 @@ extern void timecounter_init(struct timecounter *tc,
*
* In other words, keeps track of time since the same epoch as
* the function which generated the initial time stamp.
+ *
+ * Returns: nanoseconds since the initial time stamp
*/
extern u64 timecounter_read(struct timecounter *tc);
@@ -123,6 +128,8 @@ extern u64 timecounter_read(struct timecounter *tc);
*
* This allows conversion of cycle counter values which were generated
* in the past.
+ *
+ * Returns: cycle counter converted to nanoseconds since the initial time stamp
*/
extern u64 timecounter_cyc2time(const struct timecounter *tc,
u64 cycle_tstamp);
diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index 7e50cbd97f86e3..0ea7823b7f31f6 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -22,14 +22,14 @@ extern int do_sys_settimeofday64(const struct timespec64 *tv,
const struct timezone *tz);
/*
- * ktime_get() family: read the current time in a multitude of ways,
+ * ktime_get() family - read the current time in a multitude of ways.
*
* The default time reference is CLOCK_MONOTONIC, starting at
* boot time but not counting the time spent in suspend.
* For other references, use the functions with "real", "clocktai",
* "boottime" and "raw" suffixes.
*
- * To get the time in a different format, use the ones wit
+ * To get the time in a different format, use the ones with
* "ns", "ts64" and "seconds" suffix.
*
* See Documentation/core-api/timekeeping.rst for more details.
@@ -74,6 +74,8 @@ extern u32 ktime_get_resolution_ns(void);
/**
* ktime_get_real - get the real (wall-) time in ktime_t format
+ *
+ * Returns: real (wall) time in ktime_t format
*/
static inline ktime_t ktime_get_real(void)
{
@@ -86,10 +88,12 @@ static inline ktime_t ktime_get_coarse_real(void)
}
/**
- * ktime_get_boottime - Returns monotonic time since boot in ktime_t format
+ * ktime_get_boottime - Get monotonic time since boot in ktime_t format
*
* This is similar to CLOCK_MONTONIC/ktime_get, but also includes the
* time spent in suspend.
+ *
+ * Returns: monotonic time since boot in ktime_t format
*/
static inline ktime_t ktime_get_boottime(void)
{
@@ -102,7 +106,9 @@ static inline ktime_t ktime_get_coarse_boottime(void)
}
/**
- * ktime_get_clocktai - Returns the TAI time of day in ktime_t format
+ * ktime_get_clocktai - Get the TAI time of day in ktime_t format
+ *
+ * Returns: the TAI time of day in ktime_t format
*/
static inline ktime_t ktime_get_clocktai(void)
{
@@ -144,32 +150,60 @@ static inline u64 ktime_get_coarse_clocktai_ns(void)
/**
* ktime_mono_to_real - Convert monotonic time to clock realtime
+ * @mono: monotonic time to convert
+ *
+ * Returns: time converted to realtime clock
*/
static inline ktime_t ktime_mono_to_real(ktime_t mono)
{
return ktime_mono_to_any(mono, TK_OFFS_REAL);
}
+/**
+ * ktime_get_ns - Get the current time in nanoseconds
+ *
+ * Returns: current time converted to nanoseconds
+ */
static inline u64 ktime_get_ns(void)
{
return ktime_to_ns(ktime_get());
}
+/**
+ * ktime_get_real_ns - Get the current real/wall time in nanoseconds
+ *
+ * Returns: current real time converted to nanoseconds
+ */
static inline u64 ktime_get_real_ns(void)
{
return ktime_to_ns(ktime_get_real());
}
+/**
+ * ktime_get_boottime_ns - Get the monotonic time since boot in nanoseconds
+ *
+ * Returns: current boottime converted to nanoseconds
+ */
static inline u64 ktime_get_boottime_ns(void)
{
return ktime_to_ns(ktime_get_boottime());
}
+/**
+ * ktime_get_clocktai_ns - Get the current TAI time of day in nanoseconds
+ *
+ * Returns: current TAI time converted to nanoseconds
+ */
static inline u64 ktime_get_clocktai_ns(void)
{
return ktime_to_ns(ktime_get_clocktai());
}
+/**
+ * ktime_get_raw_ns - Get the raw monotonic time in nanoseconds
+ *
+ * Returns: current raw monotonic time converted to nanoseconds
+ */
static inline u64 ktime_get_raw_ns(void)
{
return ktime_to_ns(ktime_get_raw());
@@ -224,8 +258,8 @@ extern bool timekeeping_rtc_skipresume(void);
extern void timekeeping_inject_sleeptime64(const struct timespec64 *delta);
-/*
- * struct ktime_timestanps - Simultaneous mono/boot/real timestamps
+/**
+ * struct ktime_timestamps - Simultaneous mono/boot/real timestamps
* @mono: Monotonic timestamp
* @boot: Boottime timestamp
* @real: Realtime timestamp
@@ -242,7 +276,8 @@ struct ktime_timestamps {
* @cycles: Clocksource counter value to produce the system times
* @real: Realtime system time
* @raw: Monotonic raw system time
- * @clock_was_set_seq: The sequence number of clock was set events
+ * @cs_id: Clocksource ID
+ * @clock_was_set_seq: The sequence number of clock-was-set events
* @cs_was_changed_seq: The sequence number of clocksource change events
*/
struct system_time_snapshot {
diff --git a/include/linux/timer.h b/include/linux/timer.h
index 14a633ba61d643..e67ecd1cbc97d6 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -22,7 +22,7 @@
#define __TIMER_LOCKDEP_MAP_INITIALIZER(_kn)
#endif
-/**
+/*
* @TIMER_DEFERRABLE: A deferrable timer will work normally when the
* system is busy, but will not cause a CPU to come out of idle just
* to service it; instead, the timer will be serviced when the CPU
@@ -140,7 +140,7 @@ static inline void destroy_timer_on_stack(struct timer_list *timer) { }
* or not. Callers must ensure serialization wrt. other operations done
* to this timer, eg. interrupt contexts, or other CPUs on SMP.
*
- * return value: 1 if the timer is pending, 0 if not.
+ * Returns: 1 if the timer is pending, 0 if not.
*/
static inline int timer_pending(const struct timer_list * timer)
{
@@ -175,6 +175,10 @@ extern int timer_shutdown(struct timer_list *timer);
* See timer_delete_sync() for detailed explanation.
*
* Do not use in new code. Use timer_delete_sync() instead.
+ *
+ * Returns:
+ * * %0 - The timer was not pending
+ * * %1 - The timer was pending and deactivated
*/
static inline int del_timer_sync(struct timer_list *timer)
{
@@ -188,6 +192,10 @@ static inline int del_timer_sync(struct timer_list *timer)
* See timer_delete() for detailed explanation.
*
* Do not use in new code. Use timer_delete() instead.
+ *
+ * Returns:
+ * * %0 - The timer was not pending
+ * * %1 - The timer was pending and deactivated
*/
static inline int del_timer(struct timer_list *timer)
{
diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h
index ffe48e69b3f3ae..457879938fc198 100644
--- a/include/linux/u64_stats_sync.h
+++ b/include/linux/u64_stats_sync.h
@@ -135,10 +135,11 @@ static inline void u64_stats_inc(u64_stats_t *p)
p->v++;
}
-static inline void u64_stats_init(struct u64_stats_sync *syncp)
-{
- seqcount_init(&syncp->seq);
-}
+#define u64_stats_init(syncp) \
+ do { \
+ struct u64_stats_sync *__s = (syncp); \
+ seqcount_init(&__s->seq); \
+ } while (0)
static inline void __u64_stats_update_begin(struct u64_stats_sync *syncp)
{
diff --git a/include/linux/udp.h b/include/linux/udp.h
index 3748e82b627b70..e398e1dbd2d365 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -108,7 +108,7 @@ struct udp_sock {
#define udp_assign_bit(nr, sk, val) \
assign_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags, val)
-#define UDP_MAX_SEGMENTS (1 << 6UL)
+#define UDP_MAX_SEGMENTS (1 << 7UL)
#define udp_sk(ptr) container_of_const(ptr, struct udp_sock, inet.sk)
@@ -150,6 +150,24 @@ static inline void udp_cmsg_recv(struct msghdr *msg, struct sock *sk,
}
}
+DECLARE_STATIC_KEY_FALSE(udp_encap_needed_key);
+#if IS_ENABLED(CONFIG_IPV6)
+DECLARE_STATIC_KEY_FALSE(udpv6_encap_needed_key);
+#endif
+
+static inline bool udp_encap_needed(void)
+{
+ if (static_branch_unlikely(&udp_encap_needed_key))
+ return true;
+
+#if IS_ENABLED(CONFIG_IPV6)
+ if (static_branch_unlikely(&udpv6_encap_needed_key))
+ return true;
+#endif
+
+ return false;
+}
+
static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb)
{
if (!skb_is_gso(skb))
@@ -163,6 +181,16 @@ static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb)
!udp_test_bit(ACCEPT_FRAGLIST, sk))
return true;
+ /* GSO packets lacking the SKB_GSO_UDP_TUNNEL/_CSUM bits might still
+ * land in a tunnel as the socket check in udp_gro_receive cannot be
+ * foolproof.
+ */
+ if (udp_encap_needed() &&
+ READ_ONCE(udp_sk(sk)->encap_rcv) &&
+ !(skb_shinfo(skb)->gso_type &
+ (SKB_GSO_UDP_TUNNEL | SKB_GSO_UDP_TUNNEL_CSUM)))
+ return true;
+
return false;
}
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index b0201747a263a9..26c4325aa3734e 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -170,7 +170,7 @@ size_t virtio_max_dma_size(const struct virtio_device *vdev);
/**
* struct virtio_driver - operations for a virtio I/O driver
- * @driver: underlying device driver (populate name and owner).
+ * @driver: underlying device driver (populate name).
* @id_table: the ids serviced by this driver.
* @feature_table: an array of feature numbers supported by this driver.
* @feature_table_size: number of entries in the feature table array.
@@ -208,7 +208,10 @@ static inline struct virtio_driver *drv_to_virtio(struct device_driver *drv)
return container_of(drv, struct virtio_driver, driver);
}
-int register_virtio_driver(struct virtio_driver *drv);
+/* use a macro to avoid include chaining to get THIS_MODULE */
+#define register_virtio_driver(drv) \
+ __register_virtio_driver(drv, THIS_MODULE)
+int __register_virtio_driver(struct virtio_driver *drv, struct module *owner);
void unregister_virtio_driver(struct virtio_driver *drv);
/* module_virtio_driver() - Helper macro for drivers that don't do
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 98ea90e90439d8..e4a631ec430bb7 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -2,6 +2,8 @@
#ifndef _LINUX_VMALLOC_H
#define _LINUX_VMALLOC_H
+#include <linux/alloc_tag.h>
+#include <linux/sched.h>
#include <linux/spinlock.h>
#include <linux/init.h>
#include <linux/list.h>
@@ -138,26 +140,54 @@ extern unsigned long vmalloc_nr_pages(void);
static inline unsigned long vmalloc_nr_pages(void) { return 0; }
#endif
-extern void *vmalloc(unsigned long size) __alloc_size(1);
-extern void *vzalloc(unsigned long size) __alloc_size(1);
-extern void *vmalloc_user(unsigned long size) __alloc_size(1);
-extern void *vmalloc_node(unsigned long size, int node) __alloc_size(1);
-extern void *vzalloc_node(unsigned long size, int node) __alloc_size(1);
-extern void *vmalloc_32(unsigned long size) __alloc_size(1);
-extern void *vmalloc_32_user(unsigned long size) __alloc_size(1);
-extern void *__vmalloc(unsigned long size, gfp_t gfp_mask) __alloc_size(1);
-extern void *__vmalloc_node_range(unsigned long size, unsigned long align,
+extern void *vmalloc_noprof(unsigned long size) __alloc_size(1);
+#define vmalloc(...) alloc_hooks(vmalloc_noprof(__VA_ARGS__))
+
+extern void *vzalloc_noprof(unsigned long size) __alloc_size(1);
+#define vzalloc(...) alloc_hooks(vzalloc_noprof(__VA_ARGS__))
+
+extern void *vmalloc_user_noprof(unsigned long size) __alloc_size(1);
+#define vmalloc_user(...) alloc_hooks(vmalloc_user_noprof(__VA_ARGS__))
+
+extern void *vmalloc_node_noprof(unsigned long size, int node) __alloc_size(1);
+#define vmalloc_node(...) alloc_hooks(vmalloc_node_noprof(__VA_ARGS__))
+
+extern void *vzalloc_node_noprof(unsigned long size, int node) __alloc_size(1);
+#define vzalloc_node(...) alloc_hooks(vzalloc_node_noprof(__VA_ARGS__))
+
+extern void *vmalloc_32_noprof(unsigned long size) __alloc_size(1);
+#define vmalloc_32(...) alloc_hooks(vmalloc_32_noprof(__VA_ARGS__))
+
+extern void *vmalloc_32_user_noprof(unsigned long size) __alloc_size(1);
+#define vmalloc_32_user(...) alloc_hooks(vmalloc_32_user_noprof(__VA_ARGS__))
+
+extern void *__vmalloc_noprof(unsigned long size, gfp_t gfp_mask) __alloc_size(1);
+#define __vmalloc(...) alloc_hooks(__vmalloc_noprof(__VA_ARGS__))
+
+extern void *__vmalloc_node_range_noprof(unsigned long size, unsigned long align,
unsigned long start, unsigned long end, gfp_t gfp_mask,
pgprot_t prot, unsigned long vm_flags, int node,
const void *caller) __alloc_size(1);
-void *__vmalloc_node(unsigned long size, unsigned long align, gfp_t gfp_mask,
+#define __vmalloc_node_range(...) alloc_hooks(__vmalloc_node_range_noprof(__VA_ARGS__))
+
+void *__vmalloc_node_noprof(unsigned long size, unsigned long align, gfp_t gfp_mask,
int node, const void *caller) __alloc_size(1);
-void *vmalloc_huge(unsigned long size, gfp_t gfp_mask) __alloc_size(1);
+#define __vmalloc_node(...) alloc_hooks(__vmalloc_node_noprof(__VA_ARGS__))
+
+void *vmalloc_huge_noprof(unsigned long size, gfp_t gfp_mask) __alloc_size(1);
+#define vmalloc_huge(...) alloc_hooks(vmalloc_huge_noprof(__VA_ARGS__))
+
+extern void *__vmalloc_array_noprof(size_t n, size_t size, gfp_t flags) __alloc_size(1, 2);
+#define __vmalloc_array(...) alloc_hooks(__vmalloc_array_noprof(__VA_ARGS__))
+
+extern void *vmalloc_array_noprof(size_t n, size_t size) __alloc_size(1, 2);
+#define vmalloc_array(...) alloc_hooks(vmalloc_array_noprof(__VA_ARGS__))
+
+extern void *__vcalloc_noprof(size_t n, size_t size, gfp_t flags) __alloc_size(1, 2);
+#define __vcalloc(...) alloc_hooks(__vcalloc_noprof(__VA_ARGS__))
-extern void *__vmalloc_array(size_t n, size_t size, gfp_t flags) __alloc_size(1, 2);
-extern void *vmalloc_array(size_t n, size_t size) __alloc_size(1, 2);
-extern void *__vcalloc(size_t n, size_t size, gfp_t flags) __alloc_size(1, 2);
-extern void *vcalloc(size_t n, size_t size) __alloc_size(1, 2);
+extern void *vcalloc_noprof(size_t n, size_t size) __alloc_size(1, 2);
+#define vcalloc(...) alloc_hooks(vcalloc_noprof(__VA_ARGS__))
extern void vfree(const void *addr);
extern void vfree_atomic(const void *addr);
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 343906a98d6eed..735eae6e272cba 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -487,14 +487,6 @@ static inline void node_stat_sub_folio(struct folio *folio,
mod_node_page_state(folio_pgdat(folio), item, -folio_nr_pages(folio));
}
-static inline void __mod_zone_freepage_state(struct zone *zone, int nr_pages,
- int migratetype)
-{
- __mod_zone_page_state(zone, NR_FREE_PAGES, nr_pages);
- if (is_migrate_cma(migratetype))
- __mod_zone_page_state(zone, NR_FREE_CMA_PAGES, nr_pages);
-}
-
extern const char * const vmstat_text[];
static inline const char *zone_stat_name(enum zone_stat_item item)
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 9845cb62e40b2d..112d806ddbe49d 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -355,6 +355,7 @@ int dirtytime_interval_handler(struct ctl_table *table, int write,
void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty);
unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh);
+unsigned long cgwb_calc_thresh(struct bdi_writeback *wb);
void wb_update_bandwidth(struct bdi_writeback *wb);
diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index cb571dfcf4b167..d2e4d36246890c 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -12,14 +12,18 @@
#include <linux/bitmap.h>
#include <linux/bug.h>
#include <linux/compiler.h>
+#include <linux/err.h>
#include <linux/gfp.h>
#include <linux/kconfig.h>
-#include <linux/kernel.h>
+#include <linux/limits.h>
+#include <linux/lockdep.h>
#include <linux/rcupdate.h>
#include <linux/sched/mm.h>
#include <linux/spinlock.h>
#include <linux/types.h>
+struct list_lru;
+
/*
* The bottom two bits of the entry determine how the XArray interprets
* the contents:
@@ -1146,7 +1150,7 @@ static inline void xa_release(struct xarray *xa, unsigned long index)
#define XA_CHUNK_SIZE (1UL << XA_CHUNK_SHIFT)
#define XA_CHUNK_MASK (XA_CHUNK_SIZE - 1)
#define XA_MAX_MARKS 3
-#define XA_MARK_LONGS DIV_ROUND_UP(XA_CHUNK_SIZE, BITS_PER_LONG)
+#define XA_MARK_LONGS BITS_TO_LONGS(XA_CHUNK_SIZE)
/*
* @count is the count of every non-NULL element in the ->slots array
@@ -1548,6 +1552,7 @@ void xas_create_range(struct xa_state *);
#ifdef CONFIG_XARRAY_MULTI
int xa_get_order(struct xarray *, unsigned long index);
+int xas_get_order(struct xa_state *xas);
void xas_split(struct xa_state *, void *entry, unsigned int order);
void xas_split_alloc(struct xa_state *, void *entry, unsigned int order, gfp_t);
#else
@@ -1556,6 +1561,11 @@ static inline int xa_get_order(struct xarray *xa, unsigned long index)
return 0;
}
+static inline int xas_get_order(struct xa_state *xas)
+{
+ return 0;
+}
+
static inline void xas_split(struct xa_state *xas, void *entry,
unsigned int order)
{
diff --git a/include/linux/zpool.h b/include/linux/zpool.h
index 3296438eec0620..a67d62b796980a 100644
--- a/include/linux/zpool.h
+++ b/include/linux/zpool.h
@@ -53,7 +53,7 @@ void *zpool_map_handle(struct zpool *pool, unsigned long handle,
void zpool_unmap_handle(struct zpool *pool, unsigned long handle);
-u64 zpool_get_total_size(struct zpool *pool);
+u64 zpool_get_total_pages(struct zpool *pool);
/**
@@ -91,7 +91,7 @@ struct zpool_driver {
enum zpool_mapmode mm);
void (*unmap)(void *pool, unsigned long handle);
- u64 (*total_size)(void *pool);
+ u64 (*total_pages)(void *pool);
};
void zpool_register_driver(struct zpool_driver *driver);
diff --git a/include/linux/zswap.h b/include/linux/zswap.h
index 341aea4900704c..2a85b941db975d 100644
--- a/include/linux/zswap.h
+++ b/include/linux/zswap.h
@@ -7,7 +7,6 @@
struct lruvec;
-extern u64 zswap_pool_total_size;
extern atomic_t zswap_stored_pages;
#ifdef CONFIG_ZSWAP
@@ -27,6 +26,7 @@ struct zswap_lruvec_state {
atomic_long_t nr_zswap_protected;
};
+unsigned long zswap_total_pages(void);
bool zswap_store(struct folio *folio);
bool zswap_load(struct folio *folio);
void zswap_invalidate(swp_entry_t swp);
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 9d06eb945509ec..62a407db1bf5ff 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -438,6 +438,10 @@ static inline void in6_ifa_hold(struct inet6_ifaddr *ifp)
refcount_inc(&ifp->refcnt);
}
+static inline bool in6_ifa_hold_safe(struct inet6_ifaddr *ifp)
+{
+ return refcount_inc_not_zero(&ifp->refcnt);
+}
/*
* compute link-local solicited-node multicast address
diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index 627ea8e2d91598..3dee0b2721aa40 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -85,6 +85,9 @@ enum unix_socket_lock_class {
U_LOCK_NORMAL,
U_LOCK_SECOND, /* for double locking, see unix_state_double_lock(). */
U_LOCK_DIAG, /* used while dumping icons, see sk_diag_dump_icons(). */
+ U_LOCK_GC_LISTENER, /* used for listening socket while determining gc
+ * candidates to close a small race window.
+ */
};
static inline void unix_state_lock_nested(struct sock *sk,
diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index 9fe95a22abeb7e..eaec5d6caa29d2 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -585,6 +585,15 @@ static inline struct sk_buff *bt_skb_sendmmsg(struct sock *sk,
return skb;
}
+static inline int bt_copy_from_sockptr(void *dst, size_t dst_size,
+ sockptr_t src, size_t src_size)
+{
+ if (dst_size > src_size)
+ return -EINVAL;
+
+ return copy_from_sockptr(dst, src, dst_size);
+}
+
int bt_to_errno(u16 code);
__u8 bt_status(int err);
diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 8701ca5f31eec3..5c12761cbc0e21 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -176,6 +176,15 @@ enum {
*/
HCI_QUIRK_USE_BDADDR_PROPERTY,
+ /* When this quirk is set, the Bluetooth Device Address provided by
+ * the 'local-bd-address' fwnode property is incorrectly specified in
+ * big-endian order.
+ *
+ * This quirk can be set before hci_register_dev is called or
+ * during the hdev->setup vendor callback.
+ */
+ HCI_QUIRK_BDADDR_PROPERTY_BROKEN,
+
/* When this quirk is set, the duplicate filtering during
* scanning is based on Bluetooth devices addresses. To allow
* RSSI based updates, restart scanning if needed.
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 56fb42df44a333..e8f581f3f3ce6d 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -738,6 +738,8 @@ struct hci_conn {
__u8 le_per_adv_data[HCI_MAX_PER_AD_TOT_LEN];
__u16 le_per_adv_data_len;
__u16 le_per_adv_data_offset;
+ __u8 le_adv_phy;
+ __u8 le_adv_sec_phy;
__u8 le_tx_phy;
__u8 le_rx_phy;
__s8 rssi;
@@ -1512,7 +1514,7 @@ struct hci_conn *hci_connect_le_scan(struct hci_dev *hdev, bdaddr_t *dst,
enum conn_reasons conn_reason);
struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
u8 dst_type, bool dst_resolved, u8 sec_level,
- u16 conn_timeout, u8 role);
+ u16 conn_timeout, u8 role, u8 phy, u8 sec_phy);
void hci_connect_le_scan_cleanup(struct hci_conn *conn, u8 status);
struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst,
u8 sec_level, u8 auth_type,
@@ -1905,6 +1907,10 @@ void hci_conn_del_sysfs(struct hci_conn *conn);
#define privacy_mode_capable(dev) (use_ll_privacy(dev) && \
(hdev->commands[39] & 0x04))
+#define read_key_size_capable(dev) \
+ ((dev)->commands[20] & 0x10 && \
+ !test_bit(HCI_QUIRK_BROKEN_READ_ENC_KEY_SIZE, &hdev->quirks))
+
/* Use enhanced synchronous connection if command is supported and its quirk
* has not been set.
*/
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 2e2be4fd2bb653..1e09329acc4268 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -4991,6 +4991,7 @@ struct cfg80211_ops {
* set this flag to update channels on beacon hints.
* @WIPHY_FLAG_SUPPORTS_NSTR_NONPRIMARY: support connection to non-primary link
* of an NSTR mobile AP MLD.
+ * @WIPHY_FLAG_DISABLE_WEXT: disable wireless extensions for this device
*/
enum wiphy_flags {
WIPHY_FLAG_SUPPORTS_EXT_KEK_KCK = BIT(0),
@@ -5002,6 +5003,7 @@ enum wiphy_flags {
WIPHY_FLAG_4ADDR_STATION = BIT(6),
WIPHY_FLAG_CONTROL_PORT_PROTOCOL = BIT(7),
WIPHY_FLAG_IBSS_RSN = BIT(8),
+ WIPHY_FLAG_DISABLE_WEXT = BIT(9),
WIPHY_FLAG_MESH_AUTH = BIT(10),
WIPHY_FLAG_SUPPORTS_EXT_KCK_32 = BIT(11),
WIPHY_FLAG_SUPPORTS_NSTR_NONPRIMARY = BIT(12),
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 9ab4bf704e8643..ccf171f7eb60d4 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -175,6 +175,7 @@ void inet_csk_init_xmit_timers(struct sock *sk,
void (*delack_handler)(struct timer_list *),
void (*keepalive_handler)(struct timer_list *));
void inet_csk_clear_xmit_timers(struct sock *sk);
+void inet_csk_clear_xmit_timers_sync(struct sock *sk);
static inline void inet_csk_schedule_ack(struct sock *sk)
{
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 5cd64bb2104df3..c286cc2e766ee0 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -361,6 +361,39 @@ static inline bool pskb_inet_may_pull(struct sk_buff *skb)
return pskb_network_may_pull(skb, nhlen);
}
+/* Variant of pskb_inet_may_pull().
+ */
+static inline bool skb_vlan_inet_prepare(struct sk_buff *skb)
+{
+ int nhlen = 0, maclen = ETH_HLEN;
+ __be16 type = skb->protocol;
+
+ /* Essentially this is skb_protocol(skb, true)
+ * And we get MAC len.
+ */
+ if (eth_type_vlan(type))
+ type = __vlan_get_protocol(skb, type, &maclen);
+
+ switch (type) {
+#if IS_ENABLED(CONFIG_IPV6)
+ case htons(ETH_P_IPV6):
+ nhlen = sizeof(struct ipv6hdr);
+ break;
+#endif
+ case htons(ETH_P_IP):
+ nhlen = sizeof(struct iphdr);
+ break;
+ }
+ /* For ETH_P_IPV6/ETH_P_IP we make sure to pull
+ * a base network header in skb->head.
+ */
+ if (!pskb_may_pull(skb, maclen + nhlen))
+ return false;
+
+ skb_set_network_header(skb, maclen);
+ return true;
+}
+
static inline int ip_encap_hlen(struct ip_tunnel_encap *e)
{
const struct ip_tunnel_encap_ops *ops;
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 353488ab94a294..2d7f87bc5324b4 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -953,6 +953,8 @@ enum mac80211_tx_info_flags {
* of their QoS TID or other priority field values.
* @IEEE80211_TX_CTRL_MCAST_MLO_FIRST_TX: first MLO TX, used mostly internally
* for sequence number assignment
+ * @IEEE80211_TX_CTRL_SCAN_TX: Indicates that this frame is transmitted
+ * due to scanning, not in normal operation on the interface.
* @IEEE80211_TX_CTRL_MLO_LINK: If not @IEEE80211_LINK_UNSPECIFIED, this
* frame should be transmitted on the specific link. This really is
* only relevant for frames that do not have data present, and is
@@ -973,6 +975,7 @@ enum mac80211_tx_control_flags {
IEEE80211_TX_CTRL_NO_SEQNO = BIT(7),
IEEE80211_TX_CTRL_DONT_REORDER = BIT(8),
IEEE80211_TX_CTRL_MCAST_MLO_FIRST_TX = BIT(9),
+ IEEE80211_TX_CTRL_SCAN_TX = BIT(10),
IEEE80211_TX_CTRL_MLO_LINK = 0xf0000000,
};
diff --git a/include/net/macsec.h b/include/net/macsec.h
index dbd22180cc5c34..de216cbc6b059f 100644
--- a/include/net/macsec.h
+++ b/include/net/macsec.h
@@ -321,6 +321,7 @@ struct macsec_context {
* for the TX tag
* @needed_tailroom: number of bytes reserved at the end of the sk_buff for the
* TX tag
+ * @rx_uses_md_dst: whether MACsec device offload supports sk_buff md_dst
*/
struct macsec_ops {
/* Device wide */
@@ -352,6 +353,7 @@ struct macsec_ops {
struct sk_buff *skb);
unsigned int needed_headroom;
unsigned int needed_tailroom;
+ bool rx_uses_md_dst;
};
void macsec_pn_wrapped(struct macsec_secy *secy, struct macsec_tx_sa *tx_sa);
diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
index 76147feb0d10ae..4eeedf14711b30 100644
--- a/include/net/mana/mana.h
+++ b/include/net/mana/mana.h
@@ -39,7 +39,6 @@ enum TRI_STATE {
#define COMP_ENTRY_SIZE 64
#define RX_BUFFERS_PER_QUEUE 512
-#define MANA_RX_DATA_ALIGN 64
#define MAX_SEND_BUFFERS_PER_QUEUE 256
diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index a763dd327c6ea9..9abb7ee40d72fc 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -336,7 +336,7 @@ int nf_flow_rule_route_ipv6(struct net *net, struct flow_offload *flow,
int nf_flow_table_offload_init(void);
void nf_flow_table_offload_exit(void);
-static inline __be16 nf_flow_pppoe_proto(const struct sk_buff *skb)
+static inline __be16 __nf_flow_pppoe_proto(const struct sk_buff *skb)
{
__be16 proto;
@@ -352,6 +352,16 @@ static inline __be16 nf_flow_pppoe_proto(const struct sk_buff *skb)
return 0;
}
+static inline bool nf_flow_pppoe_proto(struct sk_buff *skb, __be16 *inner_proto)
+{
+ if (!pskb_may_pull(skb, PPPOE_SES_HLEN))
+ return false;
+
+ *inner_proto = __nf_flow_pppoe_proto(skb);
+
+ return true;
+}
+
#define NF_FLOW_TABLE_STAT_INC(net, count) __this_cpu_inc((net)->ft.stat->count)
#define NF_FLOW_TABLE_STAT_DEC(net, count) __this_cpu_dec((net)->ft.stat->count)
#define NF_FLOW_TABLE_STAT_INC_ATOMIC(net, count) \
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index e27c28b612e464..3f1ed467f951f6 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -307,9 +307,23 @@ static inline void *nft_elem_priv_cast(const struct nft_elem_priv *priv)
return (void *)priv;
}
+
+/**
+ * enum nft_iter_type - nftables set iterator type
+ *
+ * @NFT_ITER_READ: read-only iteration over set elements
+ * @NFT_ITER_UPDATE: iteration under mutex to update set element state
+ */
+enum nft_iter_type {
+ NFT_ITER_UNSPEC,
+ NFT_ITER_READ,
+ NFT_ITER_UPDATE,
+};
+
struct nft_set;
struct nft_set_iter {
u8 genmask;
+ enum nft_iter_type type:8;
unsigned int count;
unsigned int skip;
int err;
diff --git a/include/net/netlabel.h b/include/net/netlabel.h
index f3ab0b8a4b18ad..c31bd96dafdbac 100644
--- a/include/net/netlabel.h
+++ b/include/net/netlabel.h
@@ -274,15 +274,17 @@ struct netlbl_calipso_ops {
* on success, NULL on failure.
*
*/
-static inline struct netlbl_lsm_cache *netlbl_secattr_cache_alloc(gfp_t flags)
+static inline struct netlbl_lsm_cache *netlbl_secattr_cache_alloc_noprof(gfp_t flags)
{
struct netlbl_lsm_cache *cache;
- cache = kzalloc(sizeof(*cache), flags);
+ cache = kzalloc_noprof(sizeof(*cache), flags);
if (cache)
refcount_set(&cache->refcount, 1);
return cache;
}
+#define netlbl_secattr_cache_alloc(...) \
+ alloc_hooks(netlbl_secattr_cache_alloc_noprof(__VA_ARGS__))
/**
* netlbl_secattr_cache_free - Frees a netlbl_lsm_cache struct
@@ -311,10 +313,11 @@ static inline void netlbl_secattr_cache_free(struct netlbl_lsm_cache *cache)
* on failure.
*
*/
-static inline struct netlbl_lsm_catmap *netlbl_catmap_alloc(gfp_t flags)
+static inline struct netlbl_lsm_catmap *netlbl_catmap_alloc_noprof(gfp_t flags)
{
- return kzalloc(sizeof(struct netlbl_lsm_catmap), flags);
+ return kzalloc_noprof(sizeof(struct netlbl_lsm_catmap), flags);
}
+#define netlbl_catmap_alloc(...) alloc_hooks(netlbl_catmap_alloc_noprof(__VA_ARGS__))
/**
* netlbl_catmap_free - Free a LSM secattr catmap
@@ -376,10 +379,11 @@ static inline void netlbl_secattr_destroy(struct netlbl_lsm_secattr *secattr)
* pointer on success, or NULL on failure.
*
*/
-static inline struct netlbl_lsm_secattr *netlbl_secattr_alloc(gfp_t flags)
+static inline struct netlbl_lsm_secattr *netlbl_secattr_alloc_noprof(gfp_t flags)
{
- return kzalloc(sizeof(struct netlbl_lsm_secattr), flags);
+ return kzalloc_noprof(sizeof(struct netlbl_lsm_secattr), flags);
}
+#define netlbl_secattr_alloc(...) alloc_hooks(netlbl_secattr_alloc_noprof(__VA_ARGS__))
/**
* netlbl_secattr_free - Frees a netlbl_lsm_secattr struct
diff --git a/include/net/netlink.h b/include/net/netlink.h
index c19ff921b661ad..972b5484fa6fc4 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -1891,10 +1891,11 @@ static inline struct nla_bitfield32 nla_get_bitfield32(const struct nlattr *nla)
* @src: netlink attribute to duplicate from
* @gfp: GFP mask
*/
-static inline void *nla_memdup(const struct nlattr *src, gfp_t gfp)
+static inline void *nla_memdup_noprof(const struct nlattr *src, gfp_t gfp)
{
- return kmemdup(nla_data(src), nla_len(src), gfp);
+ return kmemdup_noprof(nla_data(src), nla_len(src), gfp);
}
+#define nla_memdup(...) alloc_hooks(nla_memdup_noprof(__VA_ARGS__))
/**
* nla_nest_start_noflag - Start a new level of nested attributes
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index 004e651e6067e7..29495c331d20e9 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -127,12 +127,12 @@ static inline struct sock *skb_steal_sock(struct sk_buff *skb,
}
static inline struct request_sock *
-reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener,
+reqsk_alloc_noprof(const struct request_sock_ops *ops, struct sock *sk_listener,
bool attach_listener)
{
struct request_sock *req;
- req = kmem_cache_alloc(ops->slab, GFP_ATOMIC | __GFP_NOWARN);
+ req = kmem_cache_alloc_noprof(ops->slab, GFP_ATOMIC | __GFP_NOWARN);
if (!req)
return NULL;
req->rsk_listener = NULL;
@@ -157,6 +157,7 @@ reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener,
return req;
}
+#define reqsk_alloc(...) alloc_hooks(reqsk_alloc_noprof(__VA_ARGS__))
static inline void __reqsk_free(struct request_sock *req)
{
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index cefe0c4bdae34c..41ca14e81d55f9 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -117,6 +117,7 @@ struct Qdisc {
struct qdisc_skb_head q;
struct gnet_stats_basic_sync bstats;
struct gnet_stats_queue qstats;
+ int owner;
unsigned long state;
unsigned long state2; /* must be written under qdisc spinlock */
struct Qdisc *next_sched;
diff --git a/include/net/sock.h b/include/net/sock.h
index b5e00702acc1f0..b4b553df7870c0 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1410,32 +1410,34 @@ sk_memory_allocated(const struct sock *sk)
#define SK_MEMORY_PCPU_RESERVE (1 << (20 - PAGE_SHIFT))
extern int sysctl_mem_pcpu_rsv;
+static inline void proto_memory_pcpu_drain(struct proto *proto)
+{
+ int val = this_cpu_xchg(*proto->per_cpu_fw_alloc, 0);
+
+ if (val)
+ atomic_long_add(val, proto->memory_allocated);
+}
+
static inline void
-sk_memory_allocated_add(struct sock *sk, int amt)
+sk_memory_allocated_add(const struct sock *sk, int val)
{
- int local_reserve;
+ struct proto *proto = sk->sk_prot;
- preempt_disable();
- local_reserve = __this_cpu_add_return(*sk->sk_prot->per_cpu_fw_alloc, amt);
- if (local_reserve >= READ_ONCE(sysctl_mem_pcpu_rsv)) {
- __this_cpu_sub(*sk->sk_prot->per_cpu_fw_alloc, local_reserve);
- atomic_long_add(local_reserve, sk->sk_prot->memory_allocated);
- }
- preempt_enable();
+ val = this_cpu_add_return(*proto->per_cpu_fw_alloc, val);
+
+ if (unlikely(val >= READ_ONCE(sysctl_mem_pcpu_rsv)))
+ proto_memory_pcpu_drain(proto);
}
static inline void
-sk_memory_allocated_sub(struct sock *sk, int amt)
+sk_memory_allocated_sub(const struct sock *sk, int val)
{
- int local_reserve;
+ struct proto *proto = sk->sk_prot;
- preempt_disable();
- local_reserve = __this_cpu_sub_return(*sk->sk_prot->per_cpu_fw_alloc, amt);
- if (local_reserve <= -READ_ONCE(sysctl_mem_pcpu_rsv)) {
- __this_cpu_sub(*sk->sk_prot->per_cpu_fw_alloc, local_reserve);
- atomic_long_add(local_reserve, sk->sk_prot->memory_allocated);
- }
- preempt_enable();
+ val = this_cpu_sub_return(*proto->per_cpu_fw_alloc, val);
+
+ if (unlikely(val <= -READ_ONCE(sysctl_mem_pcpu_rsv)))
+ proto_memory_pcpu_drain(proto);
}
#define SK_ALLOC_PERCPU_COUNTER_BATCH 16
@@ -1759,6 +1761,13 @@ static inline void sock_owned_by_me(const struct sock *sk)
#endif
}
+static inline void sock_not_owned_by_me(const struct sock *sk)
+{
+#ifdef CONFIG_LOCKDEP
+ WARN_ON_ONCE(lockdep_sock_is_held(sk) && debug_locks);
+#endif
+}
+
static inline bool sock_owned_by_user(const struct sock *sk)
{
sock_owned_by_me(sk);
diff --git a/include/net/tcx.h b/include/net/tcx.h
index 04be9377785d77..72a3e75e539fb0 100644
--- a/include/net/tcx.h
+++ b/include/net/tcx.h
@@ -75,9 +75,9 @@ tcx_entry_fetch(struct net_device *dev, bool ingress)
return rcu_dereference_rtnl(dev->tcx_egress);
}
-static inline struct bpf_mprog_entry *tcx_entry_create(void)
+static inline struct bpf_mprog_entry *tcx_entry_create_noprof(void)
{
- struct tcx_entry *tcx = kzalloc(sizeof(*tcx), GFP_KERNEL);
+ struct tcx_entry *tcx = kzalloc_noprof(sizeof(*tcx), GFP_KERNEL);
if (tcx) {
bpf_mprog_bundle_init(&tcx->bundle);
@@ -85,6 +85,7 @@ static inline struct bpf_mprog_entry *tcx_entry_create(void)
}
return NULL;
}
+#define tcx_entry_create(...) alloc_hooks(tcx_entry_create_noprof(__VA_ARGS__))
static inline void tcx_entry_free(struct bpf_mprog_entry *entry)
{
diff --git a/include/net/tls.h b/include/net/tls.h
index 340ad43971e471..33f657d3c0510a 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -111,7 +111,8 @@ struct tls_strparser {
u32 stopped : 1;
u32 copy_mode : 1;
u32 mixed_decrypted : 1;
- u32 msg_ready : 1;
+
+ bool msg_ready;
struct strp_msg stm;
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index 3cb4dc9bd70e44..3d54de168a6d9d 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -188,6 +188,8 @@ static inline void xsk_tx_metadata_complete(struct xsk_tx_metadata_compl *compl,
{
if (!compl)
return;
+ if (!compl->tx_timestamp)
+ return;
*compl->tx_timestamp = ops->tmo_fill_timestamp(priv);
}
diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h
index 2e58d5e6ac0e46..d678929441933b 100644
--- a/include/rdma/rdmavt_qp.h
+++ b/include/rdma/rdmavt_qp.h
@@ -11,6 +11,7 @@
#include <rdma/ib_verbs.h>
#include <rdma/rdmavt_cq.h>
#include <rdma/rvt-abi.h>
+#include <linux/vmalloc.h>
/*
* Atomic bit definitions for r_aflags.
*/
diff --git a/include/scsi/scsi_driver.h b/include/scsi/scsi_driver.h
index 4ce1988b2ba01c..f40915d2eceef4 100644
--- a/include/scsi/scsi_driver.h
+++ b/include/scsi/scsi_driver.h
@@ -12,6 +12,7 @@ struct request;
struct scsi_driver {
struct device_driver gendrv;
+ int (*resume)(struct device *);
void (*rescan)(struct device *);
blk_status_t (*init_command)(struct scsi_cmnd *);
void (*uninit_command)(struct scsi_cmnd *);
diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index b259d42a1e1aff..129001f600fc95 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -767,6 +767,7 @@ scsi_template_proc_dir(const struct scsi_host_template *sht);
#define scsi_template_proc_dir(sht) NULL
#endif
extern void scsi_scan_host(struct Scsi_Host *);
+extern int scsi_resume_device(struct scsi_device *sdev);
extern int scsi_rescan_device(struct scsi_device *sdev);
extern void scsi_remove_host(struct Scsi_Host *);
extern struct Scsi_Host *scsi_host_get(struct Scsi_Host *);
diff --git a/include/sound/cs35l56.h b/include/sound/cs35l56.h
index e0629699b56338..1a3c6f66f62055 100644
--- a/include/sound/cs35l56.h
+++ b/include/sound/cs35l56.h
@@ -267,6 +267,7 @@ struct cs35l56_base {
bool fw_patched;
bool secured;
bool can_hibernate;
+ bool fw_owns_asp1;
bool cal_data_valid;
s8 cal_index;
struct cirrus_amp_cal_data cal_data;
@@ -283,6 +284,7 @@ extern const char * const cs35l56_tx_input_texts[CS35L56_NUM_INPUT_SRC];
extern const unsigned int cs35l56_tx_input_values[CS35L56_NUM_INPUT_SRC];
int cs35l56_set_patch(struct cs35l56_base *cs35l56_base);
+int cs35l56_init_asp1_regs_for_driver_control(struct cs35l56_base *cs35l56_base);
int cs35l56_force_sync_asp1_registers_from_cache(struct cs35l56_base *cs35l56_base);
int cs35l56_mbox_send(struct cs35l56_base *cs35l56_base, unsigned int command);
int cs35l56_firmware_shutdown(struct cs35l56_base *cs35l56_base);
diff --git a/include/sound/emu10k1.h b/include/sound/emu10k1.h
index 1af9e68193920d..234b5baea69c8e 100644
--- a/include/sound/emu10k1.h
+++ b/include/sound/emu10k1.h
@@ -1684,8 +1684,8 @@ struct snd_emu1010 {
unsigned int clock_fallback;
unsigned int optical_in; /* 0:SPDIF, 1:ADAT */
unsigned int optical_out; /* 0:SPDIF, 1:ADAT */
- struct work_struct firmware_work;
- struct work_struct clock_work;
+ struct work_struct work;
+ struct mutex lock;
};
struct snd_emu10k1 {
@@ -1834,6 +1834,9 @@ unsigned int snd_emu10k1_ptr20_read(struct snd_emu10k1 * emu, unsigned int reg,
void snd_emu10k1_ptr20_write(struct snd_emu10k1 *emu, unsigned int reg, unsigned int chn, unsigned int data);
int snd_emu10k1_spi_write(struct snd_emu10k1 * emu, unsigned int data);
int snd_emu10k1_i2c_write(struct snd_emu10k1 *emu, u32 reg, u32 value);
+static inline void snd_emu1010_fpga_lock(struct snd_emu10k1 *emu) { mutex_lock(&emu->emu1010.lock); };
+static inline void snd_emu1010_fpga_unlock(struct snd_emu10k1 *emu) { mutex_unlock(&emu->emu1010.lock); };
+void snd_emu1010_fpga_write_lock(struct snd_emu10k1 *emu, u32 reg, u32 value);
void snd_emu1010_fpga_write(struct snd_emu10k1 *emu, u32 reg, u32 value);
void snd_emu1010_fpga_read(struct snd_emu10k1 *emu, u32 reg, u32 *value);
void snd_emu1010_fpga_link_dst_src_write(struct snd_emu10k1 *emu, u32 dst, u32 src);
diff --git a/include/sound/hdaudio_ext.h b/include/sound/hdaudio_ext.h
index a8bebac1e4b28d..957295364a5e3c 100644
--- a/include/sound/hdaudio_ext.h
+++ b/include/sound/hdaudio_ext.h
@@ -56,6 +56,9 @@ struct hdac_ext_stream {
u32 pphcldpl;
u32 pphcldpu;
+ u32 pplcllpl;
+ u32 pplcllpu;
+
bool decoupled:1;
bool link_locked:1;
bool link_prepared;
diff --git a/include/sound/intel-nhlt.h b/include/sound/intel-nhlt.h
index 53470d6a28d659..24dbe16684ae33 100644
--- a/include/sound/intel-nhlt.h
+++ b/include/sound/intel-nhlt.h
@@ -143,6 +143,9 @@ intel_nhlt_get_endpoint_blob(struct device *dev, struct nhlt_acpi_table *nhlt,
u32 bus_id, u8 link_type, u8 vbps, u8 bps,
u8 num_ch, u32 rate, u8 dir, u8 dev_type);
+int intel_nhlt_ssp_device_type(struct device *dev, struct nhlt_acpi_table *nhlt,
+ u8 virtual_bus_id);
+
#else
static inline struct nhlt_acpi_table *intel_nhlt_init(struct device *dev)
@@ -184,6 +187,13 @@ intel_nhlt_get_endpoint_blob(struct device *dev, struct nhlt_acpi_table *nhlt,
return NULL;
}
+static inline int intel_nhlt_ssp_device_type(struct device *dev,
+ struct nhlt_acpi_table *nhlt,
+ u8 virtual_bus_id)
+{
+ return -EINVAL;
+}
+
#endif
#endif
diff --git a/include/sound/tas2781-tlv.h b/include/sound/tas2781-tlv.h
index 4038dd421150a3..1dc59005d241fb 100644
--- a/include/sound/tas2781-tlv.h
+++ b/include/sound/tas2781-tlv.h
@@ -15,7 +15,7 @@
#ifndef __TAS2781_TLV_H__
#define __TAS2781_TLV_H__
-static const DECLARE_TLV_DB_SCALE(dvc_tlv, -10000, 100, 0);
+static const __maybe_unused DECLARE_TLV_DB_SCALE(dvc_tlv, -10000, 100, 0);
static const DECLARE_TLV_DB_SCALE(amp_vol_tlv, 1100, 50, 0);
#endif
diff --git a/include/trace/events/fs_dax.h b/include/trace/events/fs_dax.h
index 97b09fcf7e5287..86fe6aecff1e79 100644
--- a/include/trace/events/fs_dax.h
+++ b/include/trace/events/fs_dax.h
@@ -62,14 +62,14 @@ DEFINE_PMD_FAULT_EVENT(dax_pmd_fault_done);
DECLARE_EVENT_CLASS(dax_pmd_load_hole_class,
TP_PROTO(struct inode *inode, struct vm_fault *vmf,
- struct page *zero_page,
+ struct folio *zero_folio,
void *radix_entry),
- TP_ARGS(inode, vmf, zero_page, radix_entry),
+ TP_ARGS(inode, vmf, zero_folio, radix_entry),
TP_STRUCT__entry(
__field(unsigned long, ino)
__field(unsigned long, vm_flags)
__field(unsigned long, address)
- __field(struct page *, zero_page)
+ __field(struct folio *, zero_folio)
__field(void *, radix_entry)
__field(dev_t, dev)
),
@@ -78,17 +78,17 @@ DECLARE_EVENT_CLASS(dax_pmd_load_hole_class,
__entry->ino = inode->i_ino;
__entry->vm_flags = vmf->vma->vm_flags;
__entry->address = vmf->address;
- __entry->zero_page = zero_page;
+ __entry->zero_folio = zero_folio;
__entry->radix_entry = radix_entry;
),
- TP_printk("dev %d:%d ino %#lx %s address %#lx zero_page %p "
+ TP_printk("dev %d:%d ino %#lx %s address %#lx zero_folio %p "
"radix_entry %#lx",
MAJOR(__entry->dev),
MINOR(__entry->dev),
__entry->ino,
__entry->vm_flags & VM_SHARED ? "shared" : "private",
__entry->address,
- __entry->zero_page,
+ __entry->zero_folio,
(unsigned long)__entry->radix_entry
)
)
@@ -96,8 +96,8 @@ DECLARE_EVENT_CLASS(dax_pmd_load_hole_class,
#define DEFINE_PMD_LOAD_HOLE_EVENT(name) \
DEFINE_EVENT(dax_pmd_load_hole_class, name, \
TP_PROTO(struct inode *inode, struct vm_fault *vmf, \
- struct page *zero_page, void *radix_entry), \
- TP_ARGS(inode, vmf, zero_page, radix_entry))
+ struct folio *zero_folio, void *radix_entry), \
+ TP_ARGS(inode, vmf, zero_folio, radix_entry))
DEFINE_PMD_LOAD_HOLE_EVENT(dax_pmd_load_hole);
DEFINE_PMD_LOAD_HOLE_EVENT(dax_pmd_load_hole_fallback);
diff --git a/include/trace/events/huge_memory.h b/include/trace/events/huge_memory.h
index 6e2ef1d4b0028d..ab576898a1264d 100644
--- a/include/trace/events/huge_memory.h
+++ b/include/trace/events/huge_memory.h
@@ -174,10 +174,10 @@ TRACE_EVENT(mm_collapse_huge_page_swapin,
TRACE_EVENT(mm_khugepaged_scan_file,
- TP_PROTO(struct mm_struct *mm, struct page *page, struct file *file,
+ TP_PROTO(struct mm_struct *mm, struct folio *folio, struct file *file,
int present, int swap, int result),
- TP_ARGS(mm, page, file, present, swap, result),
+ TP_ARGS(mm, folio, file, present, swap, result),
TP_STRUCT__entry(
__field(struct mm_struct *, mm)
@@ -190,7 +190,7 @@ TRACE_EVENT(mm_khugepaged_scan_file,
TP_fast_assign(
__entry->mm = mm;
- __entry->pfn = page ? page_to_pfn(page) : -1;
+ __entry->pfn = folio ? folio_pfn(folio) : -1;
__assign_str(filename, file->f_path.dentry->d_iname);
__entry->present = present;
__entry->swap = swap;
@@ -207,10 +207,10 @@ TRACE_EVENT(mm_khugepaged_scan_file,
);
TRACE_EVENT(mm_khugepaged_collapse_file,
- TP_PROTO(struct mm_struct *mm, struct page *hpage, pgoff_t index,
+ TP_PROTO(struct mm_struct *mm, struct folio *new_folio, pgoff_t index,
bool is_shmem, unsigned long addr, struct file *file,
int nr, int result),
- TP_ARGS(mm, hpage, index, addr, is_shmem, file, nr, result),
+ TP_ARGS(mm, new_folio, index, addr, is_shmem, file, nr, result),
TP_STRUCT__entry(
__field(struct mm_struct *, mm)
__field(unsigned long, hpfn)
@@ -224,7 +224,7 @@ TRACE_EVENT(mm_khugepaged_collapse_file,
TP_fast_assign(
__entry->mm = mm;
- __entry->hpfn = hpage ? page_to_pfn(hpage) : -1;
+ __entry->hpfn = new_folio ? folio_pfn(new_folio) : -1;
__entry->index = index;
__entry->addr = addr;
__entry->is_shmem = is_shmem;
diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
index d801409b33cfec..e46d6e82765e71 100644
--- a/include/trace/events/mmflags.h
+++ b/include/trace/events/mmflags.h
@@ -107,7 +107,6 @@
DEF_PAGEFLAG_NAME(lru), \
DEF_PAGEFLAG_NAME(active), \
DEF_PAGEFLAG_NAME(workingset), \
- DEF_PAGEFLAG_NAME(slab), \
DEF_PAGEFLAG_NAME(owner_priv_1), \
DEF_PAGEFLAG_NAME(arch_1), \
DEF_PAGEFLAG_NAME(reserved), \
@@ -135,6 +134,8 @@ IF_HAVE_PG_ARCH_X(arch_3)
#define DEF_PAGETYPE_NAME(_name) { PG_##_name, __stringify(_name) }
#define __def_pagetype_names \
+ DEF_PAGETYPE_NAME(slab), \
+ DEF_PAGETYPE_NAME(hugetlb), \
DEF_PAGETYPE_NAME(offline), \
DEF_PAGETYPE_NAME(guard), \
DEF_PAGETYPE_NAME(table), \
diff --git a/include/trace/events/page_ref.h b/include/trace/events/page_ref.h
index 8a99c1cd417b81..fe33a255b7d093 100644
--- a/include/trace/events/page_ref.h
+++ b/include/trace/events/page_ref.h
@@ -30,7 +30,7 @@ DECLARE_EVENT_CLASS(page_ref_mod_template,
__entry->pfn = page_to_pfn(page);
__entry->flags = page->flags;
__entry->count = page_ref_count(page);
- __entry->mapcount = page_mapcount(page);
+ __entry->mapcount = atomic_read(&page->_mapcount);
__entry->mapping = page->mapping;
__entry->mt = get_pageblock_migratetype(page);
__entry->val = v;
@@ -79,7 +79,7 @@ DECLARE_EVENT_CLASS(page_ref_mod_and_test_template,
__entry->pfn = page_to_pfn(page);
__entry->flags = page->flags;
__entry->count = page_ref_count(page);
- __entry->mapcount = page_mapcount(page);
+ __entry->mapcount = atomic_read(&page->_mapcount);
__entry->mapping = page->mapping;
__entry->mt = get_pageblock_migratetype(page);
__entry->val = v;
diff --git a/include/trace/events/rpcgss.h b/include/trace/events/rpcgss.h
index ba2d96a1bc2f94..f50fcafc69de20 100644
--- a/include/trace/events/rpcgss.h
+++ b/include/trace/events/rpcgss.h
@@ -609,7 +609,7 @@ TRACE_EVENT(rpcgss_context,
__field(unsigned int, timeout)
__field(u32, window_size)
__field(int, len)
- __string(acceptor, data)
+ __string_len(acceptor, data, len)
),
TP_fast_assign(
@@ -618,7 +618,7 @@ TRACE_EVENT(rpcgss_context,
__entry->timeout = timeout;
__entry->window_size = window_size;
__entry->len = len;
- strncpy(__get_str(acceptor), data, len);
+ __assign_str(acceptor, data);
),
TP_printk("win_size=%u expiry=%lu now=%lu timeout=%u acceptor=%.*s",
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 75f00965ab1586..d983c48a3b6af1 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -842,8 +842,11 @@ __SYSCALL(__NR_lsm_set_self_attr, sys_lsm_set_self_attr)
#define __NR_lsm_list_modules 461
__SYSCALL(__NR_lsm_list_modules, sys_lsm_list_modules)
+#define __NR_mseal 462
+__SYSCALL(__NR_mseal, sys_mseal)
+
#undef __NR_syscalls
-#define __NR_syscalls 462
+#define __NR_syscalls 463
/*
* 32 bit systems traditionally used different
diff --git a/include/uapi/drm/etnaviv_drm.h b/include/uapi/drm/etnaviv_drm.h
index d87410a8443aae..af024d90453ddc 100644
--- a/include/uapi/drm/etnaviv_drm.h
+++ b/include/uapi/drm/etnaviv_drm.h
@@ -77,11 +77,6 @@ struct drm_etnaviv_timespec {
#define ETNAVIV_PARAM_GPU_PRODUCT_ID 0x1c
#define ETNAVIV_PARAM_GPU_CUSTOMER_ID 0x1d
#define ETNAVIV_PARAM_GPU_ECO_ID 0x1e
-#define ETNAVIV_PARAM_GPU_NN_CORE_COUNT 0x1f
-#define ETNAVIV_PARAM_GPU_NN_MAD_PER_CORE 0x20
-#define ETNAVIV_PARAM_GPU_TP_CORE_COUNT 0x21
-#define ETNAVIV_PARAM_GPU_ON_CHIP_SRAM_SIZE 0x22
-#define ETNAVIV_PARAM_GPU_AXI_SRAM_SIZE 0x23
#define ETNA_MAX_PIPES 4
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index 9ce46edc62a5b1..2040a470ddb41b 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -913,14 +913,25 @@ enum kfd_dbg_trap_exception_code {
KFD_EC_MASK(EC_DEVICE_NEW))
#define KFD_EC_MASK_PROCESS (KFD_EC_MASK(EC_PROCESS_RUNTIME) | \
KFD_EC_MASK(EC_PROCESS_DEVICE_REMOVE))
+#define KFD_EC_MASK_PACKET (KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_DIM_INVALID) | \
+ KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_GROUP_SEGMENT_SIZE_INVALID) | \
+ KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_CODE_INVALID) | \
+ KFD_EC_MASK(EC_QUEUE_PACKET_RESERVED) | \
+ KFD_EC_MASK(EC_QUEUE_PACKET_UNSUPPORTED) | \
+ KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_WORK_GROUP_SIZE_INVALID) | \
+ KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_REGISTER_INVALID) | \
+ KFD_EC_MASK(EC_QUEUE_PACKET_VENDOR_UNSUPPORTED))
/* Checks for exception code types for KFD search */
+#define KFD_DBG_EC_IS_VALID(ecode) (ecode > EC_NONE && ecode < EC_MAX)
#define KFD_DBG_EC_TYPE_IS_QUEUE(ecode) \
- (!!(KFD_EC_MASK(ecode) & KFD_EC_MASK_QUEUE))
+ (KFD_DBG_EC_IS_VALID(ecode) && !!(KFD_EC_MASK(ecode) & KFD_EC_MASK_QUEUE))
#define KFD_DBG_EC_TYPE_IS_DEVICE(ecode) \
- (!!(KFD_EC_MASK(ecode) & KFD_EC_MASK_DEVICE))
+ (KFD_DBG_EC_IS_VALID(ecode) && !!(KFD_EC_MASK(ecode) & KFD_EC_MASK_DEVICE))
#define KFD_DBG_EC_TYPE_IS_PROCESS(ecode) \
- (!!(KFD_EC_MASK(ecode) & KFD_EC_MASK_PROCESS))
+ (KFD_DBG_EC_IS_VALID(ecode) && !!(KFD_EC_MASK(ecode) & KFD_EC_MASK_PROCESS))
+#define KFD_DBG_EC_TYPE_IS_PACKET(ecode) \
+ (KFD_DBG_EC_IS_VALID(ecode) && !!(KFD_EC_MASK(ecode) & KFD_EC_MASK_PACKET))
/* Runtime enable states */
diff --git a/include/uapi/linux/vdpa.h b/include/uapi/linux/vdpa.h
index 43c51698195ceb..842bf1201ac414 100644
--- a/include/uapi/linux/vdpa.h
+++ b/include/uapi/linux/vdpa.h
@@ -57,7 +57,7 @@ enum vdpa_attr {
VDPA_ATTR_DEV_FEATURES, /* u64 */
VDPA_ATTR_DEV_BLK_CFG_CAPACITY, /* u64 */
- VDPA_ATTR_DEV_BLK_CFG_SEG_SIZE, /* u32 */
+ VDPA_ATTR_DEV_BLK_CFG_SIZE_MAX, /* u32 */
VDPA_ATTR_DEV_BLK_CFG_BLK_SIZE, /* u32 */
VDPA_ATTR_DEV_BLK_CFG_SEG_MAX, /* u32 */
VDPA_ATTR_DEV_BLK_CFG_NUM_QUEUES, /* u16 */
@@ -70,8 +70,8 @@ enum vdpa_attr {
VDPA_ATTR_DEV_BLK_CFG_DISCARD_SEC_ALIGN,/* u32 */
VDPA_ATTR_DEV_BLK_CFG_MAX_WRITE_ZEROES_SEC, /* u32 */
VDPA_ATTR_DEV_BLK_CFG_MAX_WRITE_ZEROES_SEG, /* u32 */
- VDPA_ATTR_DEV_BLK_CFG_READ_ONLY, /* u8 */
- VDPA_ATTR_DEV_BLK_CFG_FLUSH, /* u8 */
+ VDPA_ATTR_DEV_BLK_READ_ONLY, /* u8 */
+ VDPA_ATTR_DEV_BLK_FLUSH, /* u8 */
/* new attributes must be added above here */
VDPA_ATTR_MAX,
diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h
index bea69739061346..b95dd84eef2db2 100644
--- a/include/uapi/linux/vhost.h
+++ b/include/uapi/linux/vhost.h
@@ -179,12 +179,6 @@
/* Get the config size */
#define VHOST_VDPA_GET_CONFIG_SIZE _IOR(VHOST_VIRTIO, 0x79, __u32)
-/* Get the count of all virtqueues */
-#define VHOST_VDPA_GET_VQS_COUNT _IOR(VHOST_VIRTIO, 0x80, __u32)
-
-/* Get the number of virtqueue groups. */
-#define VHOST_VDPA_GET_GROUP_NUM _IOR(VHOST_VIRTIO, 0x81, __u32)
-
/* Get the number of address spaces. */
#define VHOST_VDPA_GET_AS_NUM _IOR(VHOST_VIRTIO, 0x7A, unsigned int)
@@ -228,10 +222,17 @@
#define VHOST_VDPA_GET_VRING_DESC_GROUP _IOWR(VHOST_VIRTIO, 0x7F, \
struct vhost_vring_state)
+
+/* Get the count of all virtqueues */
+#define VHOST_VDPA_GET_VQS_COUNT _IOR(VHOST_VIRTIO, 0x80, __u32)
+
+/* Get the number of virtqueue groups. */
+#define VHOST_VDPA_GET_GROUP_NUM _IOR(VHOST_VIRTIO, 0x81, __u32)
+
/* Get the queue size of a specific virtqueue.
* userspace set the vring index in vhost_vring_state.index
* kernel set the queue size in vhost_vring_state.num
*/
-#define VHOST_VDPA_GET_VRING_SIZE _IOWR(VHOST_VIRTIO, 0x80, \
+#define VHOST_VDPA_GET_VRING_SIZE _IOWR(VHOST_VIRTIO, 0x82, \
struct vhost_vring_state)
#endif
diff --git a/include/uapi/scsi/scsi_bsg_mpi3mr.h b/include/uapi/scsi/scsi_bsg_mpi3mr.h
index c72ce387286ad9..30a5c1a5937645 100644
--- a/include/uapi/scsi/scsi_bsg_mpi3mr.h
+++ b/include/uapi/scsi/scsi_bsg_mpi3mr.h
@@ -382,7 +382,7 @@ struct mpi3mr_bsg_in_reply_buf {
__u8 mpi_reply_type;
__u8 rsvd1;
__u16 rsvd2;
- __u8 reply_buf[1];
+ __u8 reply_buf[];
};
/**
diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h
index cb2afcebbdf514..a35e12f8e68baa 100644
--- a/include/ufs/ufshcd.h
+++ b/include/ufs/ufshcd.h
@@ -328,6 +328,7 @@ struct ufs_pwr_mode_info {
* @op_runtime_config: called to config Operation and runtime regs Pointers
* @get_outstanding_cqs: called to get outstanding completion queues
* @config_esi: called to config Event Specific Interrupt
+ * @config_scsi_dev: called to configure SCSI device parameters
*/
struct ufs_hba_variant_ops {
const char *name;
diff --git a/include/vdso/datapage.h b/include/vdso/datapage.h
index 5d5c0b8efff2d4..c71ddb6d46914b 100644
--- a/include/vdso/datapage.h
+++ b/include/vdso/datapage.h
@@ -19,12 +19,6 @@
#include <vdso/time32.h>
#include <vdso/time64.h>
-#ifdef CONFIG_ARM64
-#include <asm/page-def.h>
-#else
-#include <asm/page.h>
-#endif
-
#ifdef CONFIG_ARCH_HAS_VDSO_DATA
#include <asm/vdso/data.h>
#else
@@ -132,7 +126,7 @@ extern struct vdso_data _timens_data[CS_BASES] __attribute__((visibility("hidden
*/
union vdso_data_store {
struct vdso_data data[CS_BASES];
- u8 page[PAGE_SIZE];
+ u8 page[1U << CONFIG_PAGE_SHIFT];
};
/*
diff --git a/init/Kconfig b/init/Kconfig
index aa02aec6aa7d29..0a021d6b4939ae 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -929,6 +929,9 @@ config NUMA_BALANCING_DEFAULT_ENABLED
If set, automatic NUMA balancing will be enabled if running on a NUMA
machine.
+config SLAB_OBJ_EXT
+ bool
+
menuconfig CGROUPS
bool "Control Group support"
select KERNFS
@@ -962,6 +965,7 @@ config MEMCG
bool "Memory controller"
select PAGE_COUNTER
select EVENTFD
+ select SLAB_OBJ_EXT
help
Provides control over the memory footprint of tasks in a cgroup.
@@ -1899,11 +1903,11 @@ config RUST
bool "Rust support"
depends on HAVE_RUST
depends on RUST_IS_AVAILABLE
+ depends on !CFI_CLANG
depends on !MODVERSIONS
depends on !GCC_PLUGINS
depends on !RANDSTRUCT
depends on !DEBUG_INFO_BTF || PAHOLE_HAS_LANG_EXCLUDE
- select CONSTRUCTORS
help
Enables Rust support in the kernel.
diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c
index 425f4bcf4b77e0..22c7f41ff6422a 100644
--- a/init/do_mounts_initrd.c
+++ b/init/do_mounts_initrd.c
@@ -29,7 +29,6 @@ static struct ctl_table kern_do_mounts_initrd_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
- { }
};
static __init int kernel_do_mounts_initrd_sysctls_init(void)
diff --git a/init/initramfs.c b/init/initramfs.c
index da79760b8be3a6..a298a3854a8018 100644
--- a/init/initramfs.c
+++ b/init/initramfs.c
@@ -367,7 +367,7 @@ static int __init do_name(void)
if (S_ISREG(mode)) {
int ml = maybe_link();
if (ml >= 0) {
- int openflags = O_WRONLY|O_CREAT;
+ int openflags = O_WRONLY|O_CREAT|O_LARGEFILE;
if (ml != 1)
openflags |= O_TRUNC;
wfile = filp_open(collected, openflags, mode);
@@ -682,7 +682,7 @@ static void __init populate_initrd_image(char *err)
printk(KERN_INFO "rootfs image is not initramfs (%s); looks like an initrd\n",
err);
- file = filp_open("/initrd.image", O_WRONLY | O_CREAT, 0700);
+ file = filp_open("/initrd.image", O_WRONLY|O_CREAT|O_LARGEFILE, 0700);
if (IS_ERR(file))
return;
diff --git a/init/main.c b/init/main.c
index 2ca52474d0c303..3f956c6beb3d8c 100644
--- a/init/main.c
+++ b/init/main.c
@@ -327,7 +327,7 @@ static int __init xbc_snprint_cmdline(char *buf, size_t size,
{
struct xbc_node *knode, *vnode;
char *end = buf + size;
- const char *val;
+ const char *val, *q;
int ret;
xbc_node_for_each_key_value(root, knode, val) {
@@ -345,8 +345,14 @@ static int __init xbc_snprint_cmdline(char *buf, size_t size,
continue;
}
xbc_array_for_each_value(vnode, val) {
- ret = snprintf(buf, rest(buf, end), "%s=\"%s\" ",
- xbc_namebuf, val);
+ /*
+ * For prettier and more readable /proc/cmdline, only
+ * quote the value when necessary, i.e. when it contains
+ * whitespace.
+ */
+ q = strpbrk(val, " \t\r\n") ? "\"" : "";
+ ret = snprintf(buf, rest(buf, end), "%s=%s%s%s ",
+ xbc_namebuf, q, val, q);
if (ret < 0)
return ret;
buf += ret;
@@ -487,6 +493,11 @@ static int __init warn_bootconfig(char *str)
early_param("bootconfig", warn_bootconfig);
+bool __init cmdline_has_extra_options(void)
+{
+ return extra_command_line || extra_init_args;
+}
+
/* Change NUL term back to "=", to make "param" the whole string. */
static void __init repair_env_string(char *param, char *val)
{
@@ -631,6 +642,8 @@ static void __init setup_command_line(char *command_line)
if (!saved_command_line)
panic("%s: Failed to allocate %zu bytes\n", __func__, len + ilen);
+ len = xlen + strlen(command_line) + 1;
+
static_command_line = memblock_alloc(len, SMP_CACHE_BYTES);
if (!static_command_line)
panic("%s: Failed to allocate %zu bytes\n", __func__, len);
@@ -871,6 +884,19 @@ static void __init print_unknown_bootoptions(void)
memblock_free(unknown_options, len);
}
+static void __init early_numa_node_init(void)
+{
+#ifdef CONFIG_USE_PERCPU_NUMA_NODE_ID
+#ifndef cpu_to_node
+ int cpu;
+
+ /* The early_cpu_to_node() should be ready here. */
+ for_each_possible_cpu(cpu)
+ set_cpu_numa_node(cpu, early_cpu_to_node(cpu));
+#endif
+#endif
+}
+
asmlinkage __visible __init __no_sanitize_address __noreturn __no_stack_protector
void start_kernel(void)
{
@@ -901,6 +927,7 @@ void start_kernel(void)
setup_nr_cpu_ids();
setup_per_cpu_areas();
smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */
+ early_numa_node_init();
boot_cpu_hotplug_init();
pr_notice("Kernel command line: %s\n", saved_command_line);
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 5d4b448fdc5038..d5edfb8444d78f 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -147,6 +147,7 @@ static bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
static void io_queue_sqe(struct io_kiocb *req);
struct kmem_cache *req_cachep;
+static struct workqueue_struct *iou_wq __ro_after_init;
static int __read_mostly sysctl_io_uring_disabled;
static int __read_mostly sysctl_io_uring_group = -1;
@@ -350,7 +351,6 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
err:
kfree(ctx->cancel_table.hbs);
kfree(ctx->cancel_table_locked.hbs);
- kfree(ctx->io_bl);
xa_destroy(&ctx->io_bl_xa);
kfree(ctx);
return NULL;
@@ -1982,10 +1982,15 @@ fail:
err = -EBADFD;
if (!io_file_can_poll(req))
goto fail;
- err = -ECANCELED;
- if (io_arm_poll_handler(req, issue_flags) != IO_APOLL_OK)
- goto fail;
- return;
+ if (req->file->f_flags & O_NONBLOCK ||
+ req->file->f_mode & FMODE_NOWAIT) {
+ err = -ECANCELED;
+ if (io_arm_poll_handler(req, issue_flags) != IO_APOLL_OK)
+ goto fail;
+ return;
+ } else {
+ req->flags &= ~REQ_F_APOLL_MULTISHOT;
+ }
}
if (req->flags & REQ_F_FORCE_ASYNC) {
@@ -2597,19 +2602,6 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
if (__io_cqring_events_user(ctx) >= min_events)
return 0;
- if (sig) {
-#ifdef CONFIG_COMPAT
- if (in_compat_syscall())
- ret = set_compat_user_sigmask((const compat_sigset_t __user *)sig,
- sigsz);
- else
-#endif
- ret = set_user_sigmask(sig, sigsz);
-
- if (ret)
- return ret;
- }
-
init_waitqueue_func_entry(&iowq.wq, io_wake_function);
iowq.wq.private = current;
INIT_LIST_HEAD(&iowq.wq.entry);
@@ -2628,6 +2620,19 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
io_napi_adjust_timeout(ctx, &iowq, &ts);
}
+ if (sig) {
+#ifdef CONFIG_COMPAT
+ if (in_compat_syscall())
+ ret = set_compat_user_sigmask((const compat_sigset_t __user *)sig,
+ sigsz);
+ else
+#endif
+ ret = set_user_sigmask(sig, sigsz);
+
+ if (ret)
+ return ret;
+ }
+
io_napi_busy_loop(ctx, &iowq);
trace_io_uring_cqring_wait(ctx, min_events);
@@ -2926,7 +2931,6 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
io_napi_free(ctx);
kfree(ctx->cancel_table.hbs);
kfree(ctx->cancel_table_locked.hbs);
- kfree(ctx->io_bl);
xa_destroy(&ctx->io_bl_xa);
kfree(ctx);
}
@@ -3161,7 +3165,7 @@ static __cold void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
* noise and overhead, there's no discernable change in runtime
* over using system_wq.
*/
- queue_work(system_unbound_wq, &ctx->exit_work);
+ queue_work(iou_wq, &ctx->exit_work);
}
static int io_uring_release(struct inode *inode, struct file *file)
@@ -3443,14 +3447,15 @@ static void *io_uring_validate_mmap_request(struct file *file,
ptr = ctx->sq_sqes;
break;
case IORING_OFF_PBUF_RING: {
+ struct io_buffer_list *bl;
unsigned int bgid;
bgid = (offset & ~IORING_OFF_MMAP_MASK) >> IORING_OFF_PBUF_SHIFT;
- rcu_read_lock();
- ptr = io_pbuf_get_address(ctx, bgid);
- rcu_read_unlock();
- if (!ptr)
- return ERR_PTR(-EINVAL);
+ bl = io_pbuf_get_bl(ctx, bgid);
+ if (IS_ERR(bl))
+ return bl;
+ ptr = bl->buf_ring;
+ io_put_bl(ctx, bl);
break;
}
default:
@@ -3520,7 +3525,7 @@ static unsigned long io_uring_mmu_get_unmapped_area(struct file *filp,
#else
addr = 0UL;
#endif
- return current->mm->get_unmapped_area(filp, addr, len, pgoff, flags);
+ return mm_get_unmapped_area(current->mm, filp, addr, len, pgoff, flags);
}
#else /* !CONFIG_MMU */
@@ -4185,6 +4190,8 @@ static int __init io_uring_init(void)
io_buf_cachep = KMEM_CACHE(io_buffer,
SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT);
+ iou_wq = alloc_workqueue("iou_exit", WQ_UNBOUND, 64);
+
#ifdef CONFIG_SYSCTL
register_sysctl_init("kernel", kernel_io_uring_disabled_table);
#endif
diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c
index 693c26da4ee1a3..3aa16e27f5099a 100644
--- a/io_uring/kbuf.c
+++ b/io_uring/kbuf.c
@@ -17,8 +17,6 @@
#define IO_BUFFER_LIST_BUF_PER_PAGE (PAGE_SIZE / sizeof(struct io_uring_buf))
-#define BGID_ARRAY 64
-
/* BIDs are addressed by a 16-bit field in a CQE */
#define MAX_BIDS_PER_BGID (1 << 16)
@@ -40,13 +38,9 @@ struct io_buf_free {
int inuse;
};
-static struct io_buffer_list *__io_buffer_get_list(struct io_ring_ctx *ctx,
- struct io_buffer_list *bl,
- unsigned int bgid)
+static inline struct io_buffer_list *__io_buffer_get_list(struct io_ring_ctx *ctx,
+ unsigned int bgid)
{
- if (bl && bgid < BGID_ARRAY)
- return &bl[bgid];
-
return xa_load(&ctx->io_bl_xa, bgid);
}
@@ -55,7 +49,7 @@ static inline struct io_buffer_list *io_buffer_get_list(struct io_ring_ctx *ctx,
{
lockdep_assert_held(&ctx->uring_lock);
- return __io_buffer_get_list(ctx, ctx->io_bl, bgid);
+ return __io_buffer_get_list(ctx, bgid);
}
static int io_buffer_add_list(struct io_ring_ctx *ctx,
@@ -67,11 +61,7 @@ static int io_buffer_add_list(struct io_ring_ctx *ctx,
* always under the ->uring_lock, but the RCU lookup from mmap does.
*/
bl->bgid = bgid;
- smp_store_release(&bl->is_ready, 1);
-
- if (bgid < BGID_ARRAY)
- return 0;
-
+ atomic_set(&bl->refs, 1);
return xa_err(xa_store(&ctx->io_bl_xa, bgid, bl, GFP_KERNEL));
}
@@ -208,24 +198,6 @@ void __user *io_buffer_select(struct io_kiocb *req, size_t *len,
return ret;
}
-static __cold int io_init_bl_list(struct io_ring_ctx *ctx)
-{
- struct io_buffer_list *bl;
- int i;
-
- bl = kcalloc(BGID_ARRAY, sizeof(struct io_buffer_list), GFP_KERNEL);
- if (!bl)
- return -ENOMEM;
-
- for (i = 0; i < BGID_ARRAY; i++) {
- INIT_LIST_HEAD(&bl[i].buf_list);
- bl[i].bgid = i;
- }
-
- smp_store_release(&ctx->io_bl, bl);
- return 0;
-}
-
/*
* Mark the given mapped range as free for reuse
*/
@@ -294,24 +266,24 @@ static int __io_remove_buffers(struct io_ring_ctx *ctx,
return i;
}
+void io_put_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl)
+{
+ if (atomic_dec_and_test(&bl->refs)) {
+ __io_remove_buffers(ctx, bl, -1U);
+ kfree_rcu(bl, rcu);
+ }
+}
+
void io_destroy_buffers(struct io_ring_ctx *ctx)
{
struct io_buffer_list *bl;
struct list_head *item, *tmp;
struct io_buffer *buf;
unsigned long index;
- int i;
-
- for (i = 0; i < BGID_ARRAY; i++) {
- if (!ctx->io_bl)
- break;
- __io_remove_buffers(ctx, &ctx->io_bl[i], -1U);
- }
xa_for_each(&ctx->io_bl_xa, index, bl) {
xa_erase(&ctx->io_bl_xa, bl->bgid);
- __io_remove_buffers(ctx, bl, -1U);
- kfree_rcu(bl, rcu);
+ io_put_bl(ctx, bl);
}
/*
@@ -489,12 +461,6 @@ int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags)
io_ring_submit_lock(ctx, issue_flags);
- if (unlikely(p->bgid < BGID_ARRAY && !ctx->io_bl)) {
- ret = io_init_bl_list(ctx);
- if (ret)
- goto err;
- }
-
bl = io_buffer_get_list(ctx, p->bgid);
if (unlikely(!bl)) {
bl = kzalloc(sizeof(*bl), GFP_KERNEL_ACCOUNT);
@@ -507,14 +473,9 @@ int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags)
if (ret) {
/*
* Doesn't need rcu free as it was never visible, but
- * let's keep it consistent throughout. Also can't
- * be a lower indexed array group, as adding one
- * where lookup failed cannot happen.
+ * let's keep it consistent throughout.
*/
- if (p->bgid >= BGID_ARRAY)
- kfree_rcu(bl, rcu);
- else
- WARN_ON_ONCE(1);
+ kfree_rcu(bl, rcu);
goto err;
}
}
@@ -679,12 +640,6 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
if (reg.ring_entries >= 65536)
return -EINVAL;
- if (unlikely(reg.bgid < BGID_ARRAY && !ctx->io_bl)) {
- int ret = io_init_bl_list(ctx);
- if (ret)
- return ret;
- }
-
bl = io_buffer_get_list(ctx, reg.bgid);
if (bl) {
/* if mapped buffer ring OR classic exists, don't allow */
@@ -733,11 +688,8 @@ int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
if (!bl->is_buf_ring)
return -EINVAL;
- __io_remove_buffers(ctx, bl, -1U);
- if (bl->bgid >= BGID_ARRAY) {
- xa_erase(&ctx->io_bl_xa, bl->bgid);
- kfree_rcu(bl, rcu);
- }
+ xa_erase(&ctx->io_bl_xa, bl->bgid);
+ io_put_bl(ctx, bl);
return 0;
}
@@ -767,23 +719,35 @@ int io_register_pbuf_status(struct io_ring_ctx *ctx, void __user *arg)
return 0;
}
-void *io_pbuf_get_address(struct io_ring_ctx *ctx, unsigned long bgid)
+struct io_buffer_list *io_pbuf_get_bl(struct io_ring_ctx *ctx,
+ unsigned long bgid)
{
struct io_buffer_list *bl;
+ bool ret;
- bl = __io_buffer_get_list(ctx, smp_load_acquire(&ctx->io_bl), bgid);
-
- if (!bl || !bl->is_mmap)
- return NULL;
/*
- * Ensure the list is fully setup. Only strictly needed for RCU lookup
- * via mmap, and in that case only for the array indexed groups. For
- * the xarray lookups, it's either visible and ready, or not at all.
+ * We have to be a bit careful here - we're inside mmap and cannot grab
+ * the uring_lock. This means the buffer_list could be simultaneously
+ * going away, if someone is trying to be sneaky. Look it up under rcu
+ * so we know it's not going away, and attempt to grab a reference to
+ * it. If the ref is already zero, then fail the mapping. If successful,
+ * the caller will call io_put_bl() to drop the the reference at at the
+ * end. This may then safely free the buffer_list (and drop the pages)
+ * at that point, vm_insert_pages() would've already grabbed the
+ * necessary vma references.
*/
- if (!smp_load_acquire(&bl->is_ready))
- return NULL;
-
- return bl->buf_ring;
+ rcu_read_lock();
+ bl = xa_load(&ctx->io_bl_xa, bgid);
+ /* must be a mmap'able buffer ring and have pages */
+ ret = false;
+ if (bl && bl->is_mmap)
+ ret = atomic_inc_not_zero(&bl->refs);
+ rcu_read_unlock();
+
+ if (ret)
+ return bl;
+
+ return ERR_PTR(-EINVAL);
}
/*
diff --git a/io_uring/kbuf.h b/io_uring/kbuf.h
index 1c7b654ee7263a..df365b8860cf1e 100644
--- a/io_uring/kbuf.h
+++ b/io_uring/kbuf.h
@@ -25,12 +25,12 @@ struct io_buffer_list {
__u16 head;
__u16 mask;
+ atomic_t refs;
+
/* ring mapped provided buffers */
__u8 is_buf_ring;
/* ring mapped provided buffers, but mmap'ed by application */
__u8 is_mmap;
- /* bl is visible from an RCU point of view for lookup */
- __u8 is_ready;
};
struct io_buffer {
@@ -61,7 +61,9 @@ void __io_put_kbuf(struct io_kiocb *req, unsigned issue_flags);
bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags);
-void *io_pbuf_get_address(struct io_ring_ctx *ctx, unsigned long bgid);
+void io_put_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl);
+struct io_buffer_list *io_pbuf_get_bl(struct io_ring_ctx *ctx,
+ unsigned long bgid);
static inline bool io_kbuf_recycle_ring(struct io_kiocb *req)
{
diff --git a/io_uring/net.c b/io_uring/net.c
index 1e7665ff6ef702..4afb475d41974b 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -1276,6 +1276,7 @@ int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags)
if (req_has_async_data(req)) {
kmsg = req->async_data;
+ kmsg->msg.msg_control_user = sr->msg_control;
} else {
ret = io_sendmsg_copy_hdr(req, &iomsg);
if (ret)
diff --git a/io_uring/rw.c b/io_uring/rw.c
index 0585ebcc9773d3..c8d48287439e5a 100644
--- a/io_uring/rw.c
+++ b/io_uring/rw.c
@@ -937,6 +937,13 @@ int io_read_mshot(struct io_kiocb *req, unsigned int issue_flags)
ret = __io_read(req, issue_flags);
/*
+ * If the file doesn't support proper NOWAIT, then disable multishot
+ * and stay in single shot mode.
+ */
+ if (!io_file_supports_nowait(req))
+ req->flags &= ~REQ_F_APOLL_MULTISHOT;
+
+ /*
* If we get -EAGAIN, recycle our buffer and just let normal poll
* handling arm it.
*/
@@ -955,7 +962,7 @@ int io_read_mshot(struct io_kiocb *req, unsigned int issue_flags)
/*
* Any successful return value will keep the multishot read armed.
*/
- if (ret > 0) {
+ if (ret > 0 && req->flags & REQ_F_APOLL_MULTISHOT) {
/*
* Put our buffer and post a CQE. If we fail to post a CQE, then
* jump to the termination path. This request is then done.
diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c
index 45cb1dabce29ae..0867535af96fb0 100644
--- a/ipc/ipc_sysctl.c
+++ b/ipc/ipc_sysctl.c
@@ -178,7 +178,6 @@ static struct ctl_table ipc_sysctls[] = {
.extra2 = SYSCTL_INT_MAX,
},
#endif
- {}
};
static struct ctl_table_set *set_lookup(struct ctl_table_root *root)
diff --git a/ipc/mq_sysctl.c b/ipc/mq_sysctl.c
index 21fba3a6edaf7a..22ec532c7fa119 100644
--- a/ipc/mq_sysctl.c
+++ b/ipc/mq_sysctl.c
@@ -64,7 +64,6 @@ static struct ctl_table mq_sysctls[] = {
.extra1 = &msg_maxsize_limit_min,
.extra2 = &msg_maxsize_limit_max,
},
- {}
};
static struct ctl_table_set *set_lookup(struct ctl_table_root *root)
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index 368c5d86b5b7c8..e497011261b897 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -4,7 +4,7 @@ ifneq ($(CONFIG_BPF_JIT_ALWAYS_ON),y)
# ___bpf_prog_run() needs GCSE disabled on x86; see 3193c0836f203 for details
cflags-nogcse-$(CONFIG_X86)$(CONFIG_CC_IS_GCC) := -fno-gcse
endif
-CFLAGS_core.o += $(call cc-disable-warning, override-init) $(cflags-nogcse-yy)
+CFLAGS_core.o += -Wno-override-init $(cflags-nogcse-yy)
obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o log.o token.o
obj-$(CONFIG_BPF_SYSCALL) += bpf_iter.o map_iter.o task_iter.o prog_iter.o link_iter.o
diff --git a/kernel/bpf/arena.c b/kernel/bpf/arena.c
index 86571e760dd613..16dbf4f6b77fa5 100644
--- a/kernel/bpf/arena.c
+++ b/kernel/bpf/arena.c
@@ -38,7 +38,7 @@
/* number of bytes addressable by LDX/STX insn with 16-bit 'off' field */
#define GUARD_SZ (1ull << sizeof(((struct bpf_insn *)0)->off) * 8)
-#define KERN_VM_SZ ((1ull << 32) + GUARD_SZ)
+#define KERN_VM_SZ (SZ_4G + GUARD_SZ)
struct bpf_arena {
struct bpf_map map;
@@ -110,7 +110,7 @@ static struct bpf_map *arena_map_alloc(union bpf_attr *attr)
return ERR_PTR(-EINVAL);
vm_range = (u64)attr->max_entries * PAGE_SIZE;
- if (vm_range > (1ull << 32))
+ if (vm_range > SZ_4G)
return ERR_PTR(-E2BIG);
if ((attr->map_extra >> 32) != ((attr->map_extra + vm_range - 1) >> 32))
@@ -301,7 +301,7 @@ static unsigned long arena_get_unmapped_area(struct file *filp, unsigned long ad
if (pgoff)
return -EINVAL;
- if (len > (1ull << 32))
+ if (len > SZ_4G)
return -E2BIG;
/* if user_vm_start was specified at arena creation time */
@@ -314,7 +314,7 @@ static unsigned long arena_get_unmapped_area(struct file *filp, unsigned long ad
return -EINVAL;
}
- ret = current->mm->get_unmapped_area(filp, addr, len * 2, 0, flags);
+ ret = mm_get_unmapped_area(current->mm, filp, addr, len * 2, 0, flags);
if (IS_ERR_VALUE(ret))
return ret;
if ((ret >> 32) == ((ret + len - 1) >> 32))
@@ -322,7 +322,7 @@ static unsigned long arena_get_unmapped_area(struct file *filp, unsigned long ad
if (WARN_ON_ONCE(arena->user_vm_start))
/* checks at map creation time should prevent this */
return -EFAULT;
- return round_up(ret, 1ull << 32);
+ return round_up(ret, SZ_4G);
}
static int arena_map_mmap(struct bpf_map *map, struct vm_area_struct *vma)
@@ -346,7 +346,7 @@ static int arena_map_mmap(struct bpf_map *map, struct vm_area_struct *vma)
return -EBUSY;
/* Earlier checks should prevent this */
- if (WARN_ON_ONCE(vma->vm_end - vma->vm_start > (1ull << 32) || vma->vm_pgoff))
+ if (WARN_ON_ONCE(vma->vm_end - vma->vm_start > SZ_4G || vma->vm_pgoff))
return -EFAULT;
if (remember_vma(arena, vma))
@@ -420,7 +420,7 @@ static long arena_alloc_pages(struct bpf_arena *arena, long uaddr, long page_cnt
if (uaddr & ~PAGE_MASK)
return 0;
pgoff = compute_pgoff(arena, uaddr);
- if (pgoff + page_cnt > page_cnt_max)
+ if (pgoff > page_cnt_max - page_cnt)
/* requested address will be outside of user VMA */
return 0;
}
@@ -447,7 +447,13 @@ static long arena_alloc_pages(struct bpf_arena *arena, long uaddr, long page_cnt
goto out;
uaddr32 = (u32)(arena->user_vm_start + pgoff * PAGE_SIZE);
- /* Earlier checks make sure that uaddr32 + page_cnt * PAGE_SIZE will not overflow 32-bit */
+ /* Earlier checks made sure that uaddr32 + page_cnt * PAGE_SIZE - 1
+ * will not overflow 32-bit. Lower 32-bit need to represent
+ * contiguous user address range.
+ * Map these pages at kern_vm_start base.
+ * kern_vm_start + uaddr32 + page_cnt * PAGE_SIZE - 1 can overflow
+ * lower 32-bit and it's ok.
+ */
ret = vm_area_map_pages(arena->kern_vm, kern_vm_start + uaddr32,
kern_vm_start + uaddr32 + page_cnt * PAGE_SIZE, pages);
if (ret) {
@@ -510,6 +516,11 @@ static void arena_free_pages(struct bpf_arena *arena, long uaddr, long page_cnt)
if (!page)
continue;
if (page_cnt == 1 && page_mapped(page)) /* mapped by some user process */
+ /* Optimization for the common case of page_cnt==1:
+ * If page wasn't mapped into some user vma there
+ * is no need to call zap_pages which is slow. When
+ * page_cnt is big it's faster to do the batched zap.
+ */
zap_pages(arena, full_uaddr, 1);
vm_area_unmap_pages(arena->kern_vm, kaddr, kaddr + PAGE_SIZE);
__free_page(page);
diff --git a/kernel/bpf/bloom_filter.c b/kernel/bpf/bloom_filter.c
index addf3dd57b59b5..35e1ddca74d210 100644
--- a/kernel/bpf/bloom_filter.c
+++ b/kernel/bpf/bloom_filter.c
@@ -80,6 +80,18 @@ static int bloom_map_get_next_key(struct bpf_map *map, void *key, void *next_key
return -EOPNOTSUPP;
}
+/* Called from syscall */
+static int bloom_map_alloc_check(union bpf_attr *attr)
+{
+ if (attr->value_size > KMALLOC_MAX_SIZE)
+ /* if value_size is bigger, the user space won't be able to
+ * access the elements.
+ */
+ return -E2BIG;
+
+ return 0;
+}
+
static struct bpf_map *bloom_map_alloc(union bpf_attr *attr)
{
u32 bitset_bytes, bitset_mask, nr_hash_funcs, nr_bits;
@@ -191,6 +203,7 @@ static u64 bloom_map_mem_usage(const struct bpf_map *map)
BTF_ID_LIST_SINGLE(bpf_bloom_map_btf_ids, struct, bpf_bloom_filter)
const struct bpf_map_ops bloom_filter_map_ops = {
.map_meta_equal = bpf_map_meta_equal,
+ .map_alloc_check = bloom_map_alloc_check,
.map_alloc = bloom_map_alloc,
.map_free = bloom_map_free,
.map_get_next_key = bloom_map_get_next_key,
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 696bc55de8e82e..1ea5ce5bb59933 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -2942,6 +2942,15 @@ bool __weak bpf_jit_supports_arena(void)
return false;
}
+u64 __weak bpf_arch_uaddress_limit(void)
+{
+#if defined(CONFIG_64BIT) && defined(CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE)
+ return TASK_SIZE;
+#else
+ return 0;
+#endif
+}
+
/* Return TRUE if the JIT backend satisfies the following two conditions:
* 1) JIT backend supports atomic_xchg() on pointer-sized words.
* 2) Under the specific arch, the implementation of xchg() is the same
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index a895878595710b..449b9a5d3fe3f3 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -2548,7 +2548,7 @@ __bpf_kfunc void bpf_throw(u64 cookie)
__bpf_kfunc_end_defs();
BTF_KFUNCS_START(generic_btf_ids)
-#ifdef CONFIG_KEXEC_CORE
+#ifdef CONFIG_CRASH_DUMP
BTF_ID_FLAGS(func, crash_kexec, KF_DESTRUCTIVE)
#endif
BTF_ID_FLAGS(func, bpf_obj_new_impl, KF_ACQUIRE | KF_RET_NULL)
diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c
index 550f02e2cb1327..a546aba46d5da9 100644
--- a/kernel/bpf/memalloc.c
+++ b/kernel/bpf/memalloc.c
@@ -759,8 +759,7 @@ void bpf_mem_alloc_destroy(struct bpf_mem_alloc *ma)
rcu_in_progress += atomic_read(&c->call_rcu_ttrace_in_progress);
rcu_in_progress += atomic_read(&c->call_rcu_in_progress);
}
- if (ma->objcg)
- obj_cgroup_put(ma->objcg);
+ obj_cgroup_put(ma->objcg);
destroy_mem_alloc(ma, rcu_in_progress);
}
if (ma->caches) {
@@ -776,8 +775,7 @@ void bpf_mem_alloc_destroy(struct bpf_mem_alloc *ma)
rcu_in_progress += atomic_read(&c->call_rcu_in_progress);
}
}
- if (ma->objcg)
- obj_cgroup_put(ma->objcg);
+ obj_cgroup_put(ma->objcg);
destroy_mem_alloc(ma, rcu_in_progress);
}
}
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index ae2ff73bde7e79..9cb89e875f0d3f 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -980,7 +980,7 @@ static unsigned long bpf_get_unmapped_area(struct file *filp, unsigned long addr
if (map->ops->map_get_unmapped_area)
return map->ops->map_get_unmapped_area(filp, addr, len, pgoff, flags);
#ifdef CONFIG_MMU
- return current->mm->get_unmapped_area(filp, addr, len, pgoff, flags);
+ return mm_get_unmapped_area(current->mm, filp, addr, len, pgoff, flags);
#else
return addr;
#endif
@@ -3024,17 +3024,46 @@ void bpf_link_inc(struct bpf_link *link)
atomic64_inc(&link->refcnt);
}
+static void bpf_link_defer_dealloc_rcu_gp(struct rcu_head *rcu)
+{
+ struct bpf_link *link = container_of(rcu, struct bpf_link, rcu);
+
+ /* free bpf_link and its containing memory */
+ link->ops->dealloc_deferred(link);
+}
+
+static void bpf_link_defer_dealloc_mult_rcu_gp(struct rcu_head *rcu)
+{
+ if (rcu_trace_implies_rcu_gp())
+ bpf_link_defer_dealloc_rcu_gp(rcu);
+ else
+ call_rcu(rcu, bpf_link_defer_dealloc_rcu_gp);
+}
+
/* bpf_link_free is guaranteed to be called from process context */
static void bpf_link_free(struct bpf_link *link)
{
+ bool sleepable = false;
+
bpf_link_free_id(link->id);
if (link->prog) {
+ sleepable = link->prog->sleepable;
/* detach BPF program, clean up used resources */
link->ops->release(link);
bpf_prog_put(link->prog);
}
- /* free bpf_link and its containing memory */
- link->ops->dealloc(link);
+ if (link->ops->dealloc_deferred) {
+ /* schedule BPF link deallocation; if underlying BPF program
+ * is sleepable, we need to first wait for RCU tasks trace
+ * sync, then go through "classic" RCU grace period
+ */
+ if (sleepable)
+ call_rcu_tasks_trace(&link->rcu, bpf_link_defer_dealloc_mult_rcu_gp);
+ else
+ call_rcu(&link->rcu, bpf_link_defer_dealloc_rcu_gp);
+ }
+ if (link->ops->dealloc)
+ link->ops->dealloc(link);
}
static void bpf_link_put_deferred(struct work_struct *work)
@@ -3544,7 +3573,7 @@ static int bpf_raw_tp_link_fill_link_info(const struct bpf_link *link,
static const struct bpf_link_ops bpf_raw_tp_link_lops = {
.release = bpf_raw_tp_link_release,
- .dealloc = bpf_raw_tp_link_dealloc,
+ .dealloc_deferred = bpf_raw_tp_link_dealloc,
.show_fdinfo = bpf_raw_tp_link_show_fdinfo,
.fill_link_info = bpf_raw_tp_link_fill_link_info,
};
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 63749ad5ac6b8d..cb7ad1f795e18b 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5682,6 +5682,13 @@ static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno)
return reg->type == PTR_TO_FLOW_KEYS;
}
+static bool is_arena_reg(struct bpf_verifier_env *env, int regno)
+{
+ const struct bpf_reg_state *reg = reg_state(env, regno);
+
+ return reg->type == PTR_TO_ARENA;
+}
+
static u32 *reg2btf_ids[__BPF_REG_TYPE_MAX] = {
#ifdef CONFIG_NET
[PTR_TO_SOCKET] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK],
@@ -6694,6 +6701,11 @@ static int check_stack_access_within_bounds(
err = check_stack_slot_within_bounds(env, min_off, state, type);
if (!err && max_off > 0)
err = -EINVAL; /* out of stack access into non-negative offsets */
+ if (!err && access_size < 0)
+ /* access_size should not be negative (or overflow an int); others checks
+ * along the way should have prevented such an access.
+ */
+ err = -EFAULT; /* invalid negative access size; integer overflow? */
if (err) {
if (tnum_is_const(reg->var_off)) {
@@ -7019,7 +7031,8 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i
if (is_ctx_reg(env, insn->dst_reg) ||
is_pkt_reg(env, insn->dst_reg) ||
is_flow_key_reg(env, insn->dst_reg) ||
- is_sk_reg(env, insn->dst_reg)) {
+ is_sk_reg(env, insn->dst_reg) ||
+ is_arena_reg(env, insn->dst_reg)) {
verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n",
insn->dst_reg,
reg_type_str(env, reg_state(env, insn->dst_reg)->type));
@@ -14014,6 +14027,10 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
verbose(env, "addr_space_cast insn can only convert between address space 1 and 0\n");
return -EINVAL;
}
+ if (!env->prog->aux->arena) {
+ verbose(env, "addr_space_cast insn can only be used in a program that has an associated arena\n");
+ return -EINVAL;
+ }
} else {
if ((insn->off != 0 && insn->off != 8 && insn->off != 16 &&
insn->off != 32) || insn->imm) {
@@ -14046,8 +14063,11 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
if (insn->imm) {
/* off == BPF_ADDR_SPACE_CAST */
mark_reg_unknown(env, regs, insn->dst_reg);
- if (insn->imm == 1) /* cast from as(1) to as(0) */
+ if (insn->imm == 1) { /* cast from as(1) to as(0) */
dst_reg->type = PTR_TO_ARENA;
+ /* PTR_TO_ARENA is 32-bit */
+ dst_reg->subreg_def = env->insn_idx + 1;
+ }
} else if (insn->off == 0) {
/* case: R1 = R2
* copy register state to dest reg
@@ -18269,8 +18289,7 @@ static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env)
f = fdget(fd);
map = __bpf_map_get(f);
if (IS_ERR(map)) {
- verbose(env, "fd %d is not pointing to valid bpf_map\n",
- insn[0].imm);
+ verbose(env, "fd %d is not pointing to valid bpf_map\n", fd);
return PTR_ERR(map);
}
@@ -18359,15 +18378,18 @@ static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env)
}
if (!env->prog->jit_requested) {
verbose(env, "JIT is required to use arena\n");
+ fdput(f);
return -EOPNOTSUPP;
}
if (!bpf_jit_supports_arena()) {
verbose(env, "JIT doesn't support arena\n");
+ fdput(f);
return -EOPNOTSUPP;
}
env->prog->aux->arena = (void *)map;
if (!bpf_arena_get_user_vm_start(env->prog->aux->arena)) {
verbose(env, "arena's user address must be set via map_extra or mmap()\n");
+ fdput(f);
return -EINVAL;
}
}
@@ -19601,8 +19623,9 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
(((struct bpf_map *)env->prog->aux->arena)->map_flags & BPF_F_NO_USER_CONV)) {
/* convert to 32-bit mov that clears upper 32-bit */
insn->code = BPF_ALU | BPF_MOV | BPF_X;
- /* clear off, so it's a normal 'wX = wY' from JIT pov */
+ /* clear off and imm, so it's a normal 'wX = wY' from JIT pov */
insn->off = 0;
+ insn->imm = 0;
} /* cast from as(0) to as(1) should be handled by JIT */
goto next_insn;
}
@@ -19652,6 +19675,36 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
goto next_insn;
}
+ /* Make it impossible to de-reference a userspace address */
+ if (BPF_CLASS(insn->code) == BPF_LDX &&
+ (BPF_MODE(insn->code) == BPF_PROBE_MEM ||
+ BPF_MODE(insn->code) == BPF_PROBE_MEMSX)) {
+ struct bpf_insn *patch = &insn_buf[0];
+ u64 uaddress_limit = bpf_arch_uaddress_limit();
+
+ if (!uaddress_limit)
+ goto next_insn;
+
+ *patch++ = BPF_MOV64_REG(BPF_REG_AX, insn->src_reg);
+ if (insn->off)
+ *patch++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_AX, insn->off);
+ *patch++ = BPF_ALU64_IMM(BPF_RSH, BPF_REG_AX, 32);
+ *patch++ = BPF_JMP_IMM(BPF_JLE, BPF_REG_AX, uaddress_limit >> 32, 2);
+ *patch++ = *insn;
+ *patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
+ *patch++ = BPF_MOV64_IMM(insn->dst_reg, 0);
+
+ cnt = patch - insn_buf;
+ new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+ if (!new_prog)
+ return -ENOMEM;
+
+ delta += cnt - 1;
+ env->prog = prog = new_prog;
+ insn = new_prog->insnsi + i + delta;
+ goto next_insn;
+ }
+
/* Implement LD_ABS and LD_IND with a rewrite, if supported by the program type. */
if (BPF_CLASS(insn->code) == BPF_LD &&
(BPF_MODE(insn->code) == BPF_ABS ||
diff --git a/kernel/configs/hardening.config b/kernel/configs/hardening.config
index 7a5bbfc024b7d0..4b4cfcba319013 100644
--- a/kernel/configs/hardening.config
+++ b/kernel/configs/hardening.config
@@ -39,11 +39,12 @@ CONFIG_UBSAN=y
CONFIG_UBSAN_TRAP=y
CONFIG_UBSAN_BOUNDS=y
# CONFIG_UBSAN_SHIFT is not set
-# CONFIG_UBSAN_DIV_ZERO
-# CONFIG_UBSAN_UNREACHABLE
-# CONFIG_UBSAN_BOOL
-# CONFIG_UBSAN_ENUM
-# CONFIG_UBSAN_ALIGNMENT
+# CONFIG_UBSAN_DIV_ZERO is not set
+# CONFIG_UBSAN_UNREACHABLE is not set
+# CONFIG_UBSAN_SIGNED_WRAP is not set
+# CONFIG_UBSAN_BOOL is not set
+# CONFIG_UBSAN_ENUM is not set
+# CONFIG_UBSAN_ALIGNMENT is not set
# Sampling-based heap out-of-bounds and use-after-free detection.
CONFIG_KFENCE=y
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 8f6affd051f775..63447eb85dab6b 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -3196,6 +3196,7 @@ void __init boot_cpu_hotplug_init(void)
this_cpu_write(cpuhp_state.target, CPUHP_ONLINE);
}
+#ifdef CONFIG_CPU_MITIGATIONS
/*
* These are used for a global "mitigations=" cmdline option for toggling
* optional CPU mitigations.
@@ -3206,8 +3207,7 @@ enum cpu_mitigations {
CPU_MITIGATIONS_AUTO_NOSMT,
};
-static enum cpu_mitigations cpu_mitigations __ro_after_init =
- CPU_MITIGATIONS_AUTO;
+static enum cpu_mitigations cpu_mitigations __ro_after_init = CPU_MITIGATIONS_AUTO;
static int __init mitigations_parse_cmdline(char *arg)
{
@@ -3223,7 +3223,6 @@ static int __init mitigations_parse_cmdline(char *arg)
return 0;
}
-early_param("mitigations", mitigations_parse_cmdline);
/* mitigations=off */
bool cpu_mitigations_off(void)
@@ -3238,3 +3237,11 @@ bool cpu_mitigations_auto_nosmt(void)
return cpu_mitigations == CPU_MITIGATIONS_AUTO_NOSMT;
}
EXPORT_SYMBOL_GPL(cpu_mitigations_auto_nosmt);
+#else
+static int __init mitigations_parse_cmdline(char *arg)
+{
+ pr_crit("Kernel compiled without mitigations, ignoring 'mitigations'; system may still be vulnerable\n");
+ return 0;
+}
+#endif
+early_param("mitigations", mitigations_parse_cmdline);
diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index 78b5dc7cee3ab7..1e7ac977f7c0b4 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -4,6 +4,8 @@
* Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com>
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/buildid.h>
#include <linux/init.h>
#include <linux/utsname.h>
diff --git a/kernel/crash_reserve.c b/kernel/crash_reserve.c
index bbb6c3cb00e460..5b2722a93a48ca 100644
--- a/kernel/crash_reserve.c
+++ b/kernel/crash_reserve.c
@@ -109,7 +109,7 @@ static int __init parse_crashkernel_mem(char *cmdline,
size = memparse(cur, &tmp);
if (cur == tmp) {
- pr_warn("Memory value expected\n");
+ pr_warn("crashkernel: Memory value expected\n");
return -EINVAL;
}
cur = tmp;
@@ -132,7 +132,7 @@ static int __init parse_crashkernel_mem(char *cmdline,
cur++;
*crash_base = memparse(cur, &tmp);
if (cur == tmp) {
- pr_warn("Memory value expected after '@'\n");
+ pr_warn("crahskernel: Memory value expected after '@'\n");
return -EINVAL;
}
}
@@ -366,8 +366,10 @@ static int __init reserve_crashkernel_low(unsigned long long low_size)
crashk_low_res.start = low_base;
crashk_low_res.end = low_base + low_size - 1;
+#ifdef HAVE_ARCH_ADD_CRASH_RES_TO_IOMEM_EARLY
insert_resource(&iomem_resource, &crashk_low_res);
#endif
+#endif
return 0;
}
@@ -448,8 +450,12 @@ retry:
crashk_res.start = crash_base;
crashk_res.end = crash_base + crash_size - 1;
+#ifdef HAVE_ARCH_ADD_CRASH_RES_TO_IOMEM_EARLY
+ insert_resource(&iomem_resource, &crashk_res);
+#endif
}
+#ifndef HAVE_ARCH_ADD_CRASH_RES_TO_IOMEM_EARLY
static __init int insert_crashkernel_resources(void)
{
if (crashk_res.start < crashk_res.end)
@@ -462,3 +468,4 @@ static __init int insert_crashkernel_resources(void)
}
early_initcall(insert_crashkernel_resources);
#endif
+#endif
diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c
index 58db8fd70471a1..5e2d51e1cdf694 100644
--- a/kernel/dma/mapping.c
+++ b/kernel/dma/mapping.c
@@ -570,9 +570,9 @@ static struct page *__dma_alloc_pages(struct device *dev, size_t size,
size = PAGE_ALIGN(size);
if (dma_alloc_direct(dev, ops))
return dma_direct_alloc_pages(dev, size, dma_handle, dir, gfp);
- if (!ops->alloc_pages)
+ if (!ops->alloc_pages_op)
return NULL;
- return ops->alloc_pages(dev, size, dma_handle, dir, gfp);
+ return ops->alloc_pages_op(dev, size, dma_handle, dir, gfp);
}
struct page *dma_alloc_pages(struct device *dev, size_t size,
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 86fe172b595823..a5e0dfc44d24e2 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -69,11 +69,14 @@
* @alloc_size: Size of the allocated buffer.
* @list: The free list describing the number of free entries available
* from each index.
+ * @pad_slots: Number of preceding padding slots. Valid only in the first
+ * allocated non-padding slot.
*/
struct io_tlb_slot {
phys_addr_t orig_addr;
size_t alloc_size;
- unsigned int list;
+ unsigned short list;
+ unsigned short pad_slots;
};
static bool swiotlb_force_bounce;
@@ -287,6 +290,7 @@ static void swiotlb_init_io_tlb_pool(struct io_tlb_pool *mem, phys_addr_t start,
mem->nslabs - i);
mem->slots[i].orig_addr = INVALID_PHYS_ADDR;
mem->slots[i].alloc_size = 0;
+ mem->slots[i].pad_slots = 0;
}
memset(vaddr, 0, bytes);
@@ -821,12 +825,30 @@ void swiotlb_dev_init(struct device *dev)
#endif
}
-/*
- * Return the offset into a iotlb slot required to keep the device happy.
+/**
+ * swiotlb_align_offset() - Get required offset into an IO TLB allocation.
+ * @dev: Owning device.
+ * @align_mask: Allocation alignment mask.
+ * @addr: DMA address.
+ *
+ * Return the minimum offset from the start of an IO TLB allocation which is
+ * required for a given buffer address and allocation alignment to keep the
+ * device happy.
+ *
+ * First, the address bits covered by min_align_mask must be identical in the
+ * original address and the bounce buffer address. High bits are preserved by
+ * choosing a suitable IO TLB slot, but bits below IO_TLB_SHIFT require extra
+ * padding bytes before the bounce buffer.
+ *
+ * Second, @align_mask specifies which bits of the first allocated slot must
+ * be zero. This may require allocating additional padding slots, and then the
+ * offset (in bytes) from the first such padding slot is returned.
*/
-static unsigned int swiotlb_align_offset(struct device *dev, u64 addr)
+static unsigned int swiotlb_align_offset(struct device *dev,
+ unsigned int align_mask, u64 addr)
{
- return addr & dma_get_min_align_mask(dev) & (IO_TLB_SIZE - 1);
+ return addr & dma_get_min_align_mask(dev) &
+ (align_mask | (IO_TLB_SIZE - 1));
}
/*
@@ -841,27 +863,23 @@ static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size
size_t alloc_size = mem->slots[index].alloc_size;
unsigned long pfn = PFN_DOWN(orig_addr);
unsigned char *vaddr = mem->vaddr + tlb_addr - mem->start;
- unsigned int tlb_offset, orig_addr_offset;
+ int tlb_offset;
if (orig_addr == INVALID_PHYS_ADDR)
return;
- tlb_offset = tlb_addr & (IO_TLB_SIZE - 1);
- orig_addr_offset = swiotlb_align_offset(dev, orig_addr);
- if (tlb_offset < orig_addr_offset) {
- dev_WARN_ONCE(dev, 1,
- "Access before mapping start detected. orig offset %u, requested offset %u.\n",
- orig_addr_offset, tlb_offset);
- return;
- }
-
- tlb_offset -= orig_addr_offset;
- if (tlb_offset > alloc_size) {
- dev_WARN_ONCE(dev, 1,
- "Buffer overflow detected. Allocation size: %zu. Mapping size: %zu+%u.\n",
- alloc_size, size, tlb_offset);
- return;
- }
+ /*
+ * It's valid for tlb_offset to be negative. This can happen when the
+ * "offset" returned by swiotlb_align_offset() is non-zero, and the
+ * tlb_addr is pointing within the first "offset" bytes of the second
+ * or subsequent slots of the allocated swiotlb area. While it's not
+ * valid for tlb_addr to be pointing within the first "offset" bytes
+ * of the first slot, there's no way to check for such an error since
+ * this function can't distinguish the first slot from the second and
+ * subsequent slots.
+ */
+ tlb_offset = (tlb_addr & (IO_TLB_SIZE - 1)) -
+ swiotlb_align_offset(dev, 0, orig_addr);
orig_addr += tlb_offset;
alloc_size -= tlb_offset;
@@ -1005,7 +1023,7 @@ static int swiotlb_search_pool_area(struct device *dev, struct io_tlb_pool *pool
unsigned long max_slots = get_max_slots(boundary_mask);
unsigned int iotlb_align_mask = dma_get_min_align_mask(dev);
unsigned int nslots = nr_slots(alloc_size), stride;
- unsigned int offset = swiotlb_align_offset(dev, orig_addr);
+ unsigned int offset = swiotlb_align_offset(dev, 0, orig_addr);
unsigned int index, slots_checked, count = 0, i;
unsigned long flags;
unsigned int slot_base;
@@ -1328,11 +1346,12 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
unsigned long attrs)
{
struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
- unsigned int offset = swiotlb_align_offset(dev, orig_addr);
+ unsigned int offset;
struct io_tlb_pool *pool;
unsigned int i;
int index;
phys_addr_t tlb_addr;
+ unsigned short pad_slots;
if (!mem || !mem->nslabs) {
dev_warn_ratelimited(dev,
@@ -1349,6 +1368,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
return (phys_addr_t)DMA_MAPPING_ERROR;
}
+ offset = swiotlb_align_offset(dev, alloc_align_mask, orig_addr);
index = swiotlb_find_slots(dev, orig_addr,
alloc_size + offset, alloc_align_mask, &pool);
if (index == -1) {
@@ -1364,6 +1384,10 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
* This is needed when we sync the memory. Then we sync the buffer if
* needed.
*/
+ pad_slots = offset >> IO_TLB_SHIFT;
+ offset &= (IO_TLB_SIZE - 1);
+ index += pad_slots;
+ pool->slots[index].pad_slots = pad_slots;
for (i = 0; i < nr_slots(alloc_size + offset); i++)
pool->slots[index + i].orig_addr = slot_addr(orig_addr, i);
tlb_addr = slot_addr(pool->start, index) + offset;
@@ -1384,13 +1408,17 @@ static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr)
{
struct io_tlb_pool *mem = swiotlb_find_pool(dev, tlb_addr);
unsigned long flags;
- unsigned int offset = swiotlb_align_offset(dev, tlb_addr);
- int index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT;
- int nslots = nr_slots(mem->slots[index].alloc_size + offset);
- int aindex = index / mem->area_nslabs;
- struct io_tlb_area *area = &mem->areas[aindex];
+ unsigned int offset = swiotlb_align_offset(dev, 0, tlb_addr);
+ int index, nslots, aindex;
+ struct io_tlb_area *area;
int count, i;
+ index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT;
+ index -= mem->slots[index].pad_slots;
+ nslots = nr_slots(mem->slots[index].alloc_size + offset);
+ aindex = index / mem->area_nslabs;
+ area = &mem->areas[aindex];
+
/*
* Return the buffer to the free list by setting the corresponding
* entries to indicate the number of contiguous entries available.
@@ -1413,6 +1441,7 @@ static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr)
mem->slots[i].list = ++count;
mem->slots[i].orig_addr = INVALID_PHYS_ADDR;
mem->slots[i].alloc_size = 0;
+ mem->slots[i].pad_slots = 0;
}
/*
@@ -1647,9 +1676,6 @@ DEFINE_DEBUGFS_ATTRIBUTE(fops_io_tlb_hiwater, io_tlb_hiwater_get,
static void swiotlb_create_debugfs_files(struct io_tlb_mem *mem,
const char *dirname)
{
- atomic_long_set(&mem->total_used, 0);
- atomic_long_set(&mem->used_hiwater, 0);
-
mem->debugfs = debugfs_create_dir(dirname, io_tlb_default_mem.debugfs);
if (!mem->nslabs)
return;
@@ -1660,7 +1686,6 @@ static void swiotlb_create_debugfs_files(struct io_tlb_mem *mem,
debugfs_create_file("io_tlb_used_hiwater", 0600, mem->debugfs, mem,
&fops_io_tlb_hiwater);
#ifdef CONFIG_SWIOTLB_DYNAMIC
- atomic_long_set(&mem->transient_nslabs, 0);
debugfs_create_file("io_tlb_transient_nslabs", 0400, mem->debugfs,
mem, &fops_io_tlb_transient_used);
#endif
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 724e6d7e128f37..c5a0dc1f135f02 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7539,7 +7539,7 @@ static u64 perf_get_pgtable_size(struct mm_struct *mm, unsigned long addr)
{
u64 size = 0;
-#ifdef CONFIG_HAVE_FAST_GUP
+#ifdef CONFIG_HAVE_GUP_FAST
pgd_t *pgdp, pgd;
p4d_t *p4dp, p4d;
pud_t *pudp, pud;
@@ -7587,7 +7587,7 @@ again:
if (pte_present(pte))
size = pte_leaf_size(pte);
pte_unmap(ptep);
-#endif /* CONFIG_HAVE_FAST_GUP */
+#endif /* CONFIG_HAVE_GUP_FAST */
return size;
}
diff --git a/kernel/fork.c b/kernel/fork.c
index 39a5046c2f0bf4..99076dbe27d83f 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -714,6 +714,23 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
} else if (anon_vma_fork(tmp, mpnt))
goto fail_nomem_anon_vma_fork;
vm_flags_clear(tmp, VM_LOCKED_MASK);
+ /*
+ * Copy/update hugetlb private vma information.
+ */
+ if (is_vm_hugetlb_page(tmp))
+ hugetlb_dup_vma_private(tmp);
+
+ /*
+ * Link the vma into the MT. After using __mt_dup(), memory
+ * allocation is not necessary here, so it cannot fail.
+ */
+ vma_iter_bulk_store(&vmi, tmp);
+
+ mm->map_count++;
+
+ if (tmp->vm_ops && tmp->vm_ops->open)
+ tmp->vm_ops->open(tmp);
+
file = tmp->vm_file;
if (file) {
struct address_space *mapping = file->f_mapping;
@@ -730,25 +747,9 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
i_mmap_unlock_write(mapping);
}
- /*
- * Copy/update hugetlb private vma information.
- */
- if (is_vm_hugetlb_page(tmp))
- hugetlb_dup_vma_private(tmp);
-
- /*
- * Link the vma into the MT. After using __mt_dup(), memory
- * allocation is not necessary here, so it cannot fail.
- */
- vma_iter_bulk_store(&vmi, tmp);
-
- mm->map_count++;
if (!(tmp->vm_flags & VM_WIPEONFORK))
retval = copy_page_range(tmp, mpnt);
- if (tmp->vm_ops && tmp->vm_ops->open)
- tmp->vm_ops->open(tmp);
-
if (retval) {
mpnt = vma_next(&vmi);
goto loop_out;
@@ -1343,7 +1344,7 @@ static inline void __mmput(struct mm_struct *mm)
ksm_exit(mm);
khugepaged_exit(mm); /* must run before exit_mmap */
exit_mmap(mm);
- mm_put_huge_zero_page(mm);
+ mm_put_huge_zero_folio(mm);
set_mm_exe_file(mm, NULL);
if (!list_empty(&mm->mmlist)) {
spin_lock(&mmlist_lock);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index ad3eaf2ab95961..bf9ae8a8686ff6 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -1643,8 +1643,13 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
}
if (!((old->flags & new->flags) & IRQF_SHARED) ||
- (oldtype != (new->flags & IRQF_TRIGGER_MASK)) ||
- ((old->flags ^ new->flags) & IRQF_ONESHOT))
+ (oldtype != (new->flags & IRQF_TRIGGER_MASK)))
+ goto mismatch;
+
+ if ((old->flags & IRQF_ONESHOT) &&
+ (new->flags & IRQF_COND_ONESHOT))
+ new->flags |= IRQF_ONESHOT;
+ else if ((old->flags ^ new->flags) & IRQF_ONESHOT)
goto mismatch;
/* All handlers must agree on per-cpuness */
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 18edd57b5fe81e..22ea19a36e6e67 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -325,12 +325,6 @@ static unsigned long get_symbol_pos(unsigned long addr,
unsigned long symbol_start = 0, symbol_end = 0;
unsigned long i, low, high, mid;
- /* This kernel should never had been booted. */
- if (!IS_ENABLED(CONFIG_KALLSYMS_BASE_RELATIVE))
- BUG_ON(!kallsyms_addresses);
- else
- BUG_ON(!kallsyms_offsets);
-
/* Do a binary search on the sorted kallsyms_addresses array. */
low = 0;
high = kallsyms_num_syms;
diff --git a/kernel/kallsyms_internal.h b/kernel/kallsyms_internal.h
index 27fabdcc40f579..85480274fc8fb1 100644
--- a/kernel/kallsyms_internal.h
+++ b/kernel/kallsyms_internal.h
@@ -5,27 +5,21 @@
#include <linux/types.h>
/*
- * These will be re-linked against their real values
- * during the second link stage.
+ * These will be re-linked against their real values during the second link
+ * stage. Preliminary values must be provided in the linker script using the
+ * PROVIDE() directive so that the first link stage can complete successfully.
*/
-extern const unsigned long kallsyms_addresses[] __weak;
-extern const int kallsyms_offsets[] __weak;
-extern const u8 kallsyms_names[] __weak;
+extern const unsigned long kallsyms_addresses[];
+extern const int kallsyms_offsets[];
+extern const u8 kallsyms_names[];
-/*
- * Tell the compiler that the count isn't in the small data section if the arch
- * has one (eg: FRV).
- */
-extern const unsigned int kallsyms_num_syms
-__section(".rodata") __attribute__((weak));
-
-extern const unsigned long kallsyms_relative_base
-__section(".rodata") __attribute__((weak));
+extern const unsigned int kallsyms_num_syms;
+extern const unsigned long kallsyms_relative_base;
-extern const char kallsyms_token_table[] __weak;
-extern const u16 kallsyms_token_index[] __weak;
+extern const char kallsyms_token_table[];
+extern const u16 kallsyms_token_index[];
-extern const unsigned int kallsyms_markers[] __weak;
-extern const u8 kallsyms_seqs_of_names[] __weak;
+extern const unsigned int kallsyms_markers[];
+extern const u8 kallsyms_seqs_of_names[];
#endif // LINUX_KALLSYMS_INTERNAL_H_
diff --git a/kernel/kallsyms_selftest.c b/kernel/kallsyms_selftest.c
index 8a689b4ff4f982..2f84896a7bcbdf 100644
--- a/kernel/kallsyms_selftest.c
+++ b/kernel/kallsyms_selftest.c
@@ -82,7 +82,7 @@ static struct test_item test_items[] = {
ITEM_FUNC(kallsyms_test_func_static),
ITEM_FUNC(kallsyms_test_func),
ITEM_FUNC(kallsyms_test_func_weak),
- ITEM_FUNC(vmalloc),
+ ITEM_FUNC(vmalloc_noprof),
ITEM_FUNC(vfree),
#ifdef CONFIG_KALLSYMS_ALL
ITEM_DATA(kallsyms_test_var_bss_static),
diff --git a/kernel/kcov.c b/kernel/kcov.c
index f9ac2e9e460fc8..c3124f6d5536b7 100644
--- a/kernel/kcov.c
+++ b/kernel/kcov.c
@@ -627,7 +627,8 @@ static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd,
mode = kcov_get_mode(remote_arg->trace_mode);
if (mode < 0)
return mode;
- if (remote_arg->area_size > LONG_MAX / sizeof(unsigned long))
+ if ((unsigned long)remote_arg->area_size >
+ LONG_MAX / sizeof(unsigned long))
return -EINVAL;
kcov->mode = mode;
t->kcov = kcov;
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 9d9095e8179286..65adc815fc6e63 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1567,10 +1567,17 @@ static int check_kprobe_address_safe(struct kprobe *p,
jump_label_lock();
preempt_disable();
- /* Ensure it is not in reserved area nor out of text */
- if (!(core_kernel_text((unsigned long) p->addr) ||
- is_module_text_address((unsigned long) p->addr)) ||
- in_gate_area_no_mm((unsigned long) p->addr) ||
+ /* Ensure the address is in a text area, and find a module if exists. */
+ *probed_mod = NULL;
+ if (!core_kernel_text((unsigned long) p->addr)) {
+ *probed_mod = __module_text_address((unsigned long) p->addr);
+ if (!(*probed_mod)) {
+ ret = -EINVAL;
+ goto out;
+ }
+ }
+ /* Ensure it is not in reserved area. */
+ if (in_gate_area_no_mm((unsigned long) p->addr) ||
within_kprobe_blacklist((unsigned long) p->addr) ||
jump_label_text_reserved(p->addr, p->addr) ||
static_call_text_reserved(p->addr, p->addr) ||
@@ -1580,8 +1587,7 @@ static int check_kprobe_address_safe(struct kprobe *p,
goto out;
}
- /* Check if 'p' is probing a module. */
- *probed_mod = __module_text_address((unsigned long) p->addr);
+ /* Get module refcount and reject __init functions for loaded modules. */
if (*probed_mod) {
/*
* We must hold a refcount of the probed module while updating
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
index 495b69a71a5d7d..07fb5987b42bdf 100644
--- a/kernel/ksysfs.c
+++ b/kernel/ksysfs.c
@@ -228,8 +228,8 @@ KERNEL_ATTR_RW(rcu_normal);
/*
* Make /sys/kernel/notes give the raw contents of our kernel .notes section.
*/
-extern const void __start_notes __weak;
-extern const void __stop_notes __weak;
+extern const void __start_notes;
+extern const void __stop_notes;
#define notes_size (&__stop_notes - &__start_notes)
static ssize_t notes_read(struct file *filp, struct kobject *kobj,
diff --git a/kernel/module/Kconfig b/kernel/module/Kconfig
index c3ced519e14ba4..f3e0329337f61c 100644
--- a/kernel/module/Kconfig
+++ b/kernel/module/Kconfig
@@ -236,6 +236,10 @@ choice
possible to load a signed module containing the algorithm to check
the signature on that module.
+config MODULE_SIG_SHA1
+ bool "Sign modules with SHA-1"
+ select CRYPTO_SHA1
+
config MODULE_SIG_SHA256
bool "Sign modules with SHA-256"
select CRYPTO_SHA256
@@ -265,6 +269,7 @@ endchoice
config MODULE_SIG_HASH
string
depends on MODULE_SIG || IMA_APPRAISE_MODSIG
+ default "sha1" if MODULE_SIG_SHA1
default "sha256" if MODULE_SIG_SHA256
default "sha384" if MODULE_SIG_SHA384
default "sha512" if MODULE_SIG_SHA512
diff --git a/kernel/module/main.c b/kernel/module/main.c
index e1e8a7a9d6c193..2d25eebc549dbe 100644
--- a/kernel/module/main.c
+++ b/kernel/module/main.c
@@ -56,6 +56,7 @@
#include <linux/dynamic_debug.h>
#include <linux/audit.h>
#include <linux/cfi.h>
+#include <linux/codetag.h>
#include <linux/debugfs.h>
#include <uapi/linux/module.h>
#include "internal.h"
@@ -1210,15 +1211,19 @@ static void *module_memory_alloc(unsigned int size, enum mod_mem_type type)
return module_alloc(size);
}
-static void module_memory_free(void *ptr, enum mod_mem_type type)
+static void module_memory_free(void *ptr, enum mod_mem_type type,
+ bool unload_codetags)
{
+ if (!unload_codetags && mod_mem_type_is_core_data(type))
+ return;
+
if (mod_mem_use_vmalloc(type))
vfree(ptr);
else
module_memfree(ptr);
}
-static void free_mod_mem(struct module *mod)
+static void free_mod_mem(struct module *mod, bool unload_codetags)
{
for_each_mod_mem_type(type) {
struct module_memory *mod_mem = &mod->mem[type];
@@ -1229,19 +1234,27 @@ static void free_mod_mem(struct module *mod)
/* Free lock-classes; relies on the preceding sync_rcu(). */
lockdep_free_key_range(mod_mem->base, mod_mem->size);
if (mod_mem->size)
- module_memory_free(mod_mem->base, type);
+ module_memory_free(mod_mem->base, type,
+ unload_codetags);
}
/* MOD_DATA hosts mod, so free it at last */
lockdep_free_key_range(mod->mem[MOD_DATA].base, mod->mem[MOD_DATA].size);
- module_memory_free(mod->mem[MOD_DATA].base, MOD_DATA);
+ module_memory_free(mod->mem[MOD_DATA].base, MOD_DATA, unload_codetags);
}
/* Free a module, remove from lists, etc. */
static void free_module(struct module *mod)
{
+ bool unload_codetags;
+
trace_module_free(mod);
+ unload_codetags = codetag_unload_module(mod);
+ if (!unload_codetags)
+ pr_warn("%s: memory allocation(s) from the module still alive, cannot unload cleanly\n",
+ mod->name);
+
mod_sysfs_teardown(mod);
/*
@@ -1283,7 +1296,7 @@ static void free_module(struct module *mod)
kfree(mod->args);
percpu_modfree(mod);
- free_mod_mem(mod);
+ free_mod_mem(mod, unload_codetags);
}
void *__symbol_get(const char *symbol)
@@ -2296,7 +2309,7 @@ static int move_module(struct module *mod, struct load_info *info)
return 0;
out_enomem:
for (t--; t >= 0; t--)
- module_memory_free(mod->mem[t].base, t);
+ module_memory_free(mod->mem[t].base, t, true);
return ret;
}
@@ -2426,7 +2439,7 @@ static void module_deallocate(struct module *mod, struct load_info *info)
percpu_modfree(mod);
module_arch_freeing_init(mod);
- free_mod_mem(mod);
+ free_mod_mem(mod, true);
}
int __weak module_finalize(const Elf_Ehdr *hdr,
@@ -2995,6 +3008,8 @@ static int load_module(struct load_info *info, const char __user *uargs,
/* Get rid of temporary copy. */
free_copy(info, flags);
+ codetag_load_module(mod);
+
/* Done! */
trace_module_load(mod);
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index e3ae93bbcb9b50..09f8397bae15fb 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -106,6 +106,12 @@ static void s2idle_enter(void)
swait_event_exclusive(s2idle_wait_head,
s2idle_state == S2IDLE_STATE_WAKE);
+ /*
+ * Kick all CPUs to ensure that they resume their timers and restore
+ * consistent system state.
+ */
+ wake_up_all_idle_cpus();
+
cpus_read_unlock();
raw_spin_lock_irq(&s2idle_lock);
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index ca5146006b94c6..adf99c05adcafc 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -2009,6 +2009,12 @@ static int console_trylock_spinning(void)
*/
mutex_acquire(&console_lock_dep_map, 0, 1, _THIS_IP_);
+ /*
+ * Update @console_may_schedule for trylock because the previous
+ * owner may have been schedulable.
+ */
+ console_may_schedule = 0;
+
return 1;
}
diff --git a/kernel/profile.c b/kernel/profile.c
index 8a77769bc4b4cb..2b775cc5c28f96 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -344,49 +344,6 @@ void profile_tick(int type)
#include <linux/seq_file.h>
#include <linux/uaccess.h>
-static int prof_cpu_mask_proc_show(struct seq_file *m, void *v)
-{
- seq_printf(m, "%*pb\n", cpumask_pr_args(prof_cpu_mask));
- return 0;
-}
-
-static int prof_cpu_mask_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, prof_cpu_mask_proc_show, NULL);
-}
-
-static ssize_t prof_cpu_mask_proc_write(struct file *file,
- const char __user *buffer, size_t count, loff_t *pos)
-{
- cpumask_var_t new_value;
- int err;
-
- if (!zalloc_cpumask_var(&new_value, GFP_KERNEL))
- return -ENOMEM;
-
- err = cpumask_parse_user(buffer, count, new_value);
- if (!err) {
- cpumask_copy(prof_cpu_mask, new_value);
- err = count;
- }
- free_cpumask_var(new_value);
- return err;
-}
-
-static const struct proc_ops prof_cpu_mask_proc_ops = {
- .proc_open = prof_cpu_mask_proc_open,
- .proc_read = seq_read,
- .proc_lseek = seq_lseek,
- .proc_release = single_release,
- .proc_write = prof_cpu_mask_proc_write,
-};
-
-void create_prof_cpu_mask(void)
-{
- /* create /proc/irq/prof_cpu_mask */
- proc_create("irq/prof_cpu_mask", 0600, NULL, &prof_cpu_mask_proc_ops);
-}
-
/*
* This function accesses profiling information. The returned data is
* binary: the sampling step and the actual contents of the profile
diff --git a/kernel/regset.c b/kernel/regset.c
index 586823786f397f..b2871fa68b2a7f 100644
--- a/kernel/regset.c
+++ b/kernel/regset.c
@@ -16,14 +16,14 @@ static int __regset_get(struct task_struct *target,
if (size > regset->n * regset->size)
size = regset->n * regset->size;
if (!p) {
- to_free = p = kzalloc(size, GFP_KERNEL);
+ to_free = p = kvzalloc(size, GFP_KERNEL);
if (!p)
return -ENOMEM;
}
res = regset->regset_get(target, regset,
(struct membuf){.p = p, .left = size});
if (res < 0) {
- kfree(to_free);
+ kvfree(to_free);
return res;
}
*data = p;
@@ -71,6 +71,6 @@ int copy_regset_to_user(struct task_struct *target,
ret = regset_get_alloc(target, regset, size, &buf);
if (ret > 0)
ret = copy_to_user(data, buf, ret) ? -EFAULT : 0;
- kfree(buf);
+ kvfree(buf);
return ret;
}
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 03be0d1330a6b2..c62805dbd6088b 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -696,15 +696,21 @@ u64 avg_vruntime(struct cfs_rq *cfs_rq)
*
* XXX could add max_slice to the augmented data to track this.
*/
-static void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se)
+static s64 entity_lag(u64 avruntime, struct sched_entity *se)
{
- s64 lag, limit;
+ s64 vlag, limit;
+
+ vlag = avruntime - se->vruntime;
+ limit = calc_delta_fair(max_t(u64, 2*se->slice, TICK_NSEC), se);
+ return clamp(vlag, -limit, limit);
+}
+
+static void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
SCHED_WARN_ON(!se->on_rq);
- lag = avg_vruntime(cfs_rq) - se->vruntime;
- limit = calc_delta_fair(max_t(u64, 2*se->slice, TICK_NSEC), se);
- se->vlag = clamp(lag, -limit, limit);
+ se->vlag = entity_lag(avg_vruntime(cfs_rq), se);
}
/*
@@ -3676,11 +3682,10 @@ static inline void
dequeue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) { }
#endif
-static void reweight_eevdf(struct cfs_rq *cfs_rq, struct sched_entity *se,
+static void reweight_eevdf(struct sched_entity *se, u64 avruntime,
unsigned long weight)
{
unsigned long old_weight = se->load.weight;
- u64 avruntime = avg_vruntime(cfs_rq);
s64 vlag, vslice;
/*
@@ -3761,7 +3766,7 @@ static void reweight_eevdf(struct cfs_rq *cfs_rq, struct sched_entity *se,
* = V - vl'
*/
if (avruntime != se->vruntime) {
- vlag = (s64)(avruntime - se->vruntime);
+ vlag = entity_lag(avruntime, se);
vlag = div_s64(vlag * old_weight, weight);
se->vruntime = avruntime - vlag;
}
@@ -3787,25 +3792,26 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
unsigned long weight)
{
bool curr = cfs_rq->curr == se;
+ u64 avruntime;
if (se->on_rq) {
/* commit outstanding execution time */
- if (curr)
- update_curr(cfs_rq);
- else
+ update_curr(cfs_rq);
+ avruntime = avg_vruntime(cfs_rq);
+ if (!curr)
__dequeue_entity(cfs_rq, se);
update_load_sub(&cfs_rq->load, se->load.weight);
}
dequeue_load_avg(cfs_rq, se);
- if (!se->on_rq) {
+ if (se->on_rq) {
+ reweight_eevdf(se, avruntime, weight);
+ } else {
/*
* Because we keep se->vlag = V - v_i, while: lag_i = w_i*(V - v_i),
* we need to scale se->vlag when w_i changes.
*/
se->vlag = div_s64(se->vlag * se->load.weight, weight);
- } else {
- reweight_eevdf(cfs_rq, se, weight);
}
update_load_set(&se->load, weight);
diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c
index 373d42c707bc5d..5891e715f00d02 100644
--- a/kernel/sched/isolation.c
+++ b/kernel/sched/isolation.c
@@ -46,7 +46,16 @@ int housekeeping_any_cpu(enum hk_type type)
if (cpu < nr_cpu_ids)
return cpu;
- return cpumask_any_and(housekeeping.cpumasks[type], cpu_online_mask);
+ cpu = cpumask_any_and(housekeeping.cpumasks[type], cpu_online_mask);
+ if (likely(cpu < nr_cpu_ids))
+ return cpu;
+ /*
+ * Unless we have another problem this can only happen
+ * at boot time before start_secondary() brings the 1st
+ * housekeeping CPU up.
+ */
+ WARN_ON_ONCE(system_state == SYSTEM_RUNNING ||
+ type != HK_TYPE_TIMER);
}
}
return smp_processor_id();
@@ -109,6 +118,7 @@ static void __init housekeeping_setup_type(enum hk_type type,
static int __init housekeeping_setup(char *str, unsigned long flags)
{
cpumask_var_t non_housekeeping_mask, housekeeping_staging;
+ unsigned int first_cpu;
int err = 0;
if ((flags & HK_FLAG_TICK) && !(housekeeping.flags & HK_FLAG_TICK)) {
@@ -129,7 +139,8 @@ static int __init housekeeping_setup(char *str, unsigned long flags)
cpumask_andnot(housekeeping_staging,
cpu_possible_mask, non_housekeeping_mask);
- if (!cpumask_intersects(cpu_present_mask, housekeeping_staging)) {
+ first_cpu = cpumask_first_and(cpu_present_mask, housekeeping_staging);
+ if (first_cpu >= nr_cpu_ids || first_cpu >= setup_max_cpus) {
__cpumask_set_cpu(smp_processor_id(), housekeeping_staging);
__cpumask_clear_cpu(smp_processor_id(), non_housekeeping_mask);
if (!housekeeping.flags) {
@@ -138,6 +149,9 @@ static int __init housekeeping_setup(char *str, unsigned long flags)
}
}
+ if (cpumask_empty(non_housekeeping_mask))
+ goto free_housekeeping_staging;
+
if (!housekeeping.flags) {
/* First setup call ("nohz_full=" or "isolcpus=") */
enum hk_type type;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index d2242679239ec5..ae50f212775e5c 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -79,6 +79,8 @@
# include <asm/paravirt_api_clock.h>
#endif
+#include <asm/barrier.h>
+
#include "cpupri.h"
#include "cpudeadline.h"
@@ -3445,13 +3447,19 @@ static inline void switch_mm_cid(struct rq *rq,
* between rq->curr store and load of {prev,next}->mm->pcpu_cid[cpu].
* Provide it here.
*/
- if (!prev->mm) // from kernel
+ if (!prev->mm) { // from kernel
smp_mb();
- /*
- * user -> user transition guarantees a memory barrier through
- * switch_mm() when current->mm changes. If current->mm is
- * unchanged, no barrier is needed.
- */
+ } else { // from user
+ /*
+ * user->user transition relies on an implicit
+ * memory barrier in switch_mm() when
+ * current->mm changes. If the architecture
+ * switch_mm() does not have an implicit memory
+ * barrier, it is emitted here. If current->mm
+ * is unchanged, no barrier is needed.
+ */
+ smp_mb__after_switch_mm();
+ }
}
if (prev->mm_cid_active) {
mm_cid_snapshot_time(rq, prev->mm);
diff --git a/kernel/sys.c b/kernel/sys.c
index f8e543f1e38a06..8bb106a56b3a5f 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -2408,8 +2408,11 @@ static inline int prctl_set_mdwe(unsigned long bits, unsigned long arg3,
if (bits & PR_MDWE_NO_INHERIT && !(bits & PR_MDWE_REFUSE_EXEC_GAIN))
return -EINVAL;
- /* PARISC cannot allow mdwe as it needs writable stacks */
- if (IS_ENABLED(CONFIG_PARISC))
+ /*
+ * EOPNOTSUPP might be more appropriate here in principle, but
+ * existing userspace depends on EINVAL specifically.
+ */
+ if (!arch_memory_deny_write_exec_supported())
return -EINVAL;
current_bits = get_current_mdwe();
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index faad00cce269c3..d7eee421d4bc33 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -196,6 +196,7 @@ COND_SYSCALL(migrate_pages);
COND_SYSCALL(move_pages);
COND_SYSCALL(set_mempolicy_home_node);
COND_SYSCALL(cachestat);
+COND_SYSCALL(mseal);
COND_SYSCALL(perf_event_open);
COND_SYSCALL(accept4);
diff --git a/kernel/time/posix-clock.c b/kernel/time/posix-clock.c
index 9de66bbbb3d155..4782edcbe7b9b4 100644
--- a/kernel/time/posix-clock.c
+++ b/kernel/time/posix-clock.c
@@ -129,15 +129,17 @@ static int posix_clock_open(struct inode *inode, struct file *fp)
goto out;
}
pccontext->clk = clk;
- fp->private_data = pccontext;
- if (clk->ops.open)
+ if (clk->ops.open) {
err = clk->ops.open(pccontext, fp->f_mode);
- else
- err = 0;
-
- if (!err) {
- get_device(clk->dev);
+ if (err) {
+ kfree(pccontext);
+ goto out;
+ }
}
+
+ fp->private_data = pccontext;
+ get_device(clk->dev);
+ err = 0;
out:
up_read(&clk->rwsem);
return err;
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index fb0fdec8719a13..d88b13076b7944 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -7,6 +7,7 @@
* Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
* Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner
*/
+#include <linux/compiler.h>
#include <linux/cpu.h>
#include <linux/err.h>
#include <linux/hrtimer.h>
@@ -84,7 +85,7 @@ int tick_is_oneshot_available(void)
*/
static void tick_periodic(int cpu)
{
- if (tick_do_timer_cpu == cpu) {
+ if (READ_ONCE(tick_do_timer_cpu) == cpu) {
raw_spin_lock(&jiffies_lock);
write_seqcount_begin(&jiffies_seq);
@@ -215,8 +216,8 @@ static void tick_setup_device(struct tick_device *td,
* If no cpu took the do_timer update, assign it to
* this cpu:
*/
- if (tick_do_timer_cpu == TICK_DO_TIMER_BOOT) {
- tick_do_timer_cpu = cpu;
+ if (READ_ONCE(tick_do_timer_cpu) == TICK_DO_TIMER_BOOT) {
+ WRITE_ONCE(tick_do_timer_cpu, cpu);
tick_next_period = ktime_get();
#ifdef CONFIG_NO_HZ_FULL
/*
@@ -232,7 +233,7 @@ static void tick_setup_device(struct tick_device *td,
!tick_nohz_full_cpu(cpu)) {
tick_take_do_timer_from_boot();
tick_do_timer_boot_cpu = -1;
- WARN_ON(tick_do_timer_cpu != cpu);
+ WARN_ON(READ_ONCE(tick_do_timer_cpu) != cpu);
#endif
}
@@ -406,10 +407,10 @@ void tick_assert_timekeeping_handover(void)
int tick_cpu_dying(unsigned int dying_cpu)
{
/*
- * If the current CPU is the timekeeper, it's the only one that
- * can safely hand over its duty. Also all online CPUs are in
- * stop machine, guaranteed not to be idle, therefore it's safe
- * to pick any online successor.
+ * If the current CPU is the timekeeper, it's the only one that can
+ * safely hand over its duty. Also all online CPUs are in stop
+ * machine, guaranteed not to be idle, therefore there is no
+ * concurrency and it's safe to pick any online successor.
*/
if (tick_do_timer_cpu == dying_cpu)
tick_do_timer_cpu = cpumask_first(cpu_online_mask);
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 269e21590df536..71a792cd893620 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -8,6 +8,7 @@
*
* Started by: Thomas Gleixner and Ingo Molnar
*/
+#include <linux/compiler.h>
#include <linux/cpu.h>
#include <linux/err.h>
#include <linux/hrtimer.h>
@@ -204,7 +205,7 @@ static inline void tick_sched_flag_clear(struct tick_sched *ts,
static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
{
- int cpu = smp_processor_id();
+ int tick_cpu, cpu = smp_processor_id();
/*
* Check if the do_timer duty was dropped. We don't care about
@@ -216,16 +217,18 @@ static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
* If nohz_full is enabled, this should not happen because the
* 'tick_do_timer_cpu' CPU never relinquishes.
*/
- if (IS_ENABLED(CONFIG_NO_HZ_COMMON) &&
- unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) {
+ tick_cpu = READ_ONCE(tick_do_timer_cpu);
+
+ if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && unlikely(tick_cpu == TICK_DO_TIMER_NONE)) {
#ifdef CONFIG_NO_HZ_FULL
WARN_ON_ONCE(tick_nohz_full_running);
#endif
- tick_do_timer_cpu = cpu;
+ WRITE_ONCE(tick_do_timer_cpu, cpu);
+ tick_cpu = cpu;
}
/* Check if jiffies need an update */
- if (tick_do_timer_cpu == cpu)
+ if (tick_cpu == cpu)
tick_do_update_jiffies64(now);
/*
@@ -610,7 +613,7 @@ bool tick_nohz_cpu_hotpluggable(unsigned int cpu)
* timers, workqueues, timekeeping, ...) on behalf of full dynticks
* CPUs. It must remain online when nohz full is enabled.
*/
- if (tick_nohz_full_running && tick_do_timer_cpu == cpu)
+ if (tick_nohz_full_running && READ_ONCE(tick_do_timer_cpu) == cpu)
return false;
return true;
}
@@ -697,6 +700,7 @@ bool tick_nohz_tick_stopped_cpu(int cpu)
/**
* tick_nohz_update_jiffies - update jiffies when idle was interrupted
+ * @now: current ktime_t
*
* Called from interrupt entry when the CPU was idle
*
@@ -794,7 +798,7 @@ static u64 get_cpu_sleep_time_us(struct tick_sched *ts, ktime_t *sleeptime,
* This time is measured via accounting rather than sampling,
* and is as accurate as ktime_get() is.
*
- * This function returns -1 if NOHZ is not enabled.
+ * Return: -1 if NOHZ is not enabled, else total idle time of the @cpu
*/
u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time)
{
@@ -820,7 +824,7 @@ EXPORT_SYMBOL_GPL(get_cpu_idle_time_us);
* This time is measured via accounting rather than sampling,
* and is as accurate as ktime_get() is.
*
- * This function returns -1 if NOHZ is not enabled.
+ * Return: -1 if NOHZ is not enabled, else total iowait time of @cpu
*/
u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time)
{
@@ -890,6 +894,7 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
{
u64 basemono, next_tick, delta, expires;
unsigned long basejiff;
+ int tick_cpu;
basemono = get_jiffies_update(&basejiff);
ts->last_jiffies = basejiff;
@@ -946,9 +951,9 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
* Otherwise we can sleep as long as we want.
*/
delta = timekeeping_max_deferment();
- if (cpu != tick_do_timer_cpu &&
- (tick_do_timer_cpu != TICK_DO_TIMER_NONE ||
- !tick_sched_flag_test(ts, TS_FLAG_DO_TIMER_LAST)))
+ tick_cpu = READ_ONCE(tick_do_timer_cpu);
+ if (tick_cpu != cpu &&
+ (tick_cpu != TICK_DO_TIMER_NONE || !tick_sched_flag_test(ts, TS_FLAG_DO_TIMER_LAST)))
delta = KTIME_MAX;
/* Calculate the next expiry time */
@@ -969,6 +974,7 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
unsigned long basejiff = ts->last_jiffies;
u64 basemono = ts->timer_expires_base;
bool timer_idle = tick_sched_flag_test(ts, TS_FLAG_STOPPED);
+ int tick_cpu;
u64 expires;
/* Make sure we won't be trying to stop it twice in a row. */
@@ -1006,10 +1012,11 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
* do_timer() never gets invoked. Keep track of the fact that it
* was the one which had the do_timer() duty last.
*/
- if (cpu == tick_do_timer_cpu) {
- tick_do_timer_cpu = TICK_DO_TIMER_NONE;
+ tick_cpu = READ_ONCE(tick_do_timer_cpu);
+ if (tick_cpu == cpu) {
+ WRITE_ONCE(tick_do_timer_cpu, TICK_DO_TIMER_NONE);
tick_sched_flag_set(ts, TS_FLAG_DO_TIMER_LAST);
- } else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) {
+ } else if (tick_cpu != TICK_DO_TIMER_NONE) {
tick_sched_flag_clear(ts, TS_FLAG_DO_TIMER_LAST);
}
@@ -1172,15 +1179,17 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
return false;
if (tick_nohz_full_enabled()) {
+ int tick_cpu = READ_ONCE(tick_do_timer_cpu);
+
/*
* Keep the tick alive to guarantee timekeeping progression
* if there are full dynticks CPUs around
*/
- if (tick_do_timer_cpu == cpu)
+ if (tick_cpu == cpu)
return false;
/* Should not happen for nohz-full */
- if (WARN_ON_ONCE(tick_do_timer_cpu == TICK_DO_TIMER_NONE))
+ if (WARN_ON_ONCE(tick_cpu == TICK_DO_TIMER_NONE))
return false;
}
@@ -1287,6 +1296,8 @@ void tick_nohz_irq_exit(void)
/**
* tick_nohz_idle_got_tick - Check whether or not the tick handler has run
+ *
+ * Return: %true if the tick handler has run, otherwise %false
*/
bool tick_nohz_idle_got_tick(void)
{
@@ -1305,6 +1316,8 @@ bool tick_nohz_idle_got_tick(void)
* stopped, it returns the next hrtimer.
*
* Called from power state control code with interrupts disabled
+ *
+ * Return: the next expiration time
*/
ktime_t tick_nohz_get_next_hrtimer(void)
{
@@ -1320,6 +1333,8 @@ ktime_t tick_nohz_get_next_hrtimer(void)
* The return value of this function and/or the value returned by it through the
* @delta_next pointer can be negative which must be taken into account by its
* callers.
+ *
+ * Return: the expected length of the current sleep
*/
ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next)
{
@@ -1357,8 +1372,11 @@ ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next)
/**
* tick_nohz_get_idle_calls_cpu - return the current idle calls counter value
* for a particular CPU.
+ * @cpu: target CPU number
*
* Called from the schedutil frequency scaling governor in scheduler context.
+ *
+ * Return: the current idle calls counter value for @cpu
*/
unsigned long tick_nohz_get_idle_calls_cpu(int cpu)
{
@@ -1371,6 +1389,8 @@ unsigned long tick_nohz_get_idle_calls_cpu(int cpu)
* tick_nohz_get_idle_calls - return the current idle calls counter value
*
* Called from the schedutil frequency scaling governor in scheduler context.
+ *
+ * Return: the current idle calls counter value for the current CPU
*/
unsigned long tick_nohz_get_idle_calls(void)
{
@@ -1559,7 +1579,7 @@ early_param("skew_tick", skew_tick);
/**
* tick_setup_sched_timer - setup the tick emulation timer
- * @mode: tick_nohz_mode to setup for
+ * @hrtimer: whether to use the hrtimer or not
*/
void tick_setup_sched_timer(bool hrtimer)
{
diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h
index e11c4dc65bcb24..b4a7822f495d34 100644
--- a/kernel/time/tick-sched.h
+++ b/kernel/time/tick-sched.h
@@ -46,8 +46,8 @@ struct tick_device {
* @next_tick: Next tick to be fired when in dynticks mode.
* @idle_jiffies: jiffies at the entry to idle for idle time accounting
* @idle_waketime: Time when the idle was interrupted
+ * @idle_sleeptime_seq: sequence counter for data consistency
* @idle_entrytime: Time when the idle call was entered
- * @nohz_mode: Mode - one state of tick_nohz_mode
* @last_jiffies: Base jiffies snapshot when next event was last computed
* @timer_expires_base: Base time clock monotonic for @timer_expires
* @timer_expires: Anticipated timer expiration time (in case sched tick is stopped)
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index dee29f1f5b75f3..3baf2fbe6848f0 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -64,15 +64,15 @@ EXPORT_SYMBOL(jiffies_64);
/*
* The timer wheel has LVL_DEPTH array levels. Each level provides an array of
- * LVL_SIZE buckets. Each level is driven by its own clock and therefor each
+ * LVL_SIZE buckets. Each level is driven by its own clock and therefore each
* level has a different granularity.
*
- * The level granularity is: LVL_CLK_DIV ^ lvl
+ * The level granularity is: LVL_CLK_DIV ^ level
* The level clock frequency is: HZ / (LVL_CLK_DIV ^ level)
*
* The array level of a newly armed timer depends on the relative expiry
* time. The farther the expiry time is away the higher the array level and
- * therefor the granularity becomes.
+ * therefore the granularity becomes.
*
* Contrary to the original timer wheel implementation, which aims for 'exact'
* expiry of the timers, this implementation removes the need for recascading
@@ -207,7 +207,7 @@ EXPORT_SYMBOL(jiffies_64);
* struct timer_base - Per CPU timer base (number of base depends on config)
* @lock: Lock protecting the timer_base
* @running_timer: When expiring timers, the lock is dropped. To make
- * sure not to race agains deleting/modifying a
+ * sure not to race against deleting/modifying a
* currently running timer, the pointer is set to the
* timer, which expires at the moment. If no timer is
* running, the pointer is NULL.
@@ -737,7 +737,7 @@ static bool timer_is_static_object(void *addr)
}
/*
- * fixup_init is called when:
+ * timer_fixup_init is called when:
* - an active object is initialized
*/
static bool timer_fixup_init(void *addr, enum debug_obj_state state)
@@ -761,7 +761,7 @@ static void stub_timer(struct timer_list *unused)
}
/*
- * fixup_activate is called when:
+ * timer_fixup_activate is called when:
* - an active object is activated
* - an unknown non-static object is activated
*/
@@ -783,7 +783,7 @@ static bool timer_fixup_activate(void *addr, enum debug_obj_state state)
}
/*
- * fixup_free is called when:
+ * timer_fixup_free is called when:
* - an active object is freed
*/
static bool timer_fixup_free(void *addr, enum debug_obj_state state)
@@ -801,7 +801,7 @@ static bool timer_fixup_free(void *addr, enum debug_obj_state state)
}
/*
- * fixup_assert_init is called when:
+ * timer_fixup_assert_init is called when:
* - an untracked/uninit-ed object is found
*/
static bool timer_fixup_assert_init(void *addr, enum debug_obj_state state)
@@ -914,7 +914,7 @@ static void do_init_timer(struct timer_list *timer,
* @key: lockdep class key of the fake lock used for tracking timer
* sync lock dependencies
*
- * init_timer_key() must be done to a timer prior calling *any* of the
+ * init_timer_key() must be done to a timer prior to calling *any* of the
* other timer functions.
*/
void init_timer_key(struct timer_list *timer,
@@ -1417,7 +1417,7 @@ static int __timer_delete(struct timer_list *timer, bool shutdown)
* If @shutdown is set then the lock has to be taken whether the
* timer is pending or not to protect against a concurrent rearm
* which might hit between the lockless pending check and the lock
- * aquisition. By taking the lock it is ensured that such a newly
+ * acquisition. By taking the lock it is ensured that such a newly
* enqueued timer is dequeued and cannot end up with
* timer->function == NULL in the expiry code.
*
@@ -2306,7 +2306,7 @@ static inline u64 __get_next_timer_interrupt(unsigned long basej, u64 basem,
/*
* When timer base is not set idle, undo the effect of
- * tmigr_cpu_deactivate() to prevent inconsitent states - active
+ * tmigr_cpu_deactivate() to prevent inconsistent states - active
* timer base but inactive timer migration hierarchy.
*
* When timer base was already marked idle, nothing will be
diff --git a/kernel/time/timer_migration.c b/kernel/time/timer_migration.c
index c63a0afdcebed5..ccba875d2234fe 100644
--- a/kernel/time/timer_migration.c
+++ b/kernel/time/timer_migration.c
@@ -751,6 +751,33 @@ bool tmigr_update_events(struct tmigr_group *group, struct tmigr_group *child,
first_childevt = evt = data->evt;
+ /*
+ * Walking the hierarchy is required in any case when a
+ * remote expiry was done before. This ensures to not lose
+ * already queued events in non active groups (see section
+ * "Required event and timerqueue update after a remote
+ * expiry" in the documentation at the top).
+ *
+ * The two call sites which are executed without a remote expiry
+ * before, are not prevented from propagating changes through
+ * the hierarchy by the return:
+ * - When entering this path by tmigr_new_timer(), @evt->ignore
+ * is never set.
+ * - tmigr_inactive_up() takes care of the propagation by
+ * itself and ignores the return value. But an immediate
+ * return is possible if there is a parent, sparing group
+ * locking at this level, because the upper walking call to
+ * the parent will take care about removing this event from
+ * within the group and update next_expiry accordingly.
+ *
+ * However if there is no parent, ie: the hierarchy has only a
+ * single level so @group is the top level group, make sure the
+ * first event information of the group is updated properly and
+ * also handled properly, so skip this fast return path.
+ */
+ if (evt->ignore && !remote && group->parent)
+ return true;
+
raw_spin_lock(&group->lock);
childstate.state = 0;
@@ -762,8 +789,11 @@ bool tmigr_update_events(struct tmigr_group *group, struct tmigr_group *child,
* queue when the expiry time changed only or when it could be ignored.
*/
if (timerqueue_node_queued(&evt->nextevt)) {
- if ((evt->nextevt.expires == nextexp) && !evt->ignore)
+ if ((evt->nextevt.expires == nextexp) && !evt->ignore) {
+ /* Make sure not to miss a new CPU event with the same expiry */
+ evt->cpu = first_childevt->cpu;
goto check_toplvl;
+ }
if (!timerqueue_del(&group->events, &evt->nextevt))
WRITE_ONCE(group->next_expiry, KTIME_MAX);
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 61c541c36596d9..47345bf1d4a9f7 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -965,7 +965,7 @@ config FTRACE_RECORD_RECURSION
config FTRACE_RECORD_RECURSION_SIZE
int "Max number of recursed functions to record"
- default 128
+ default 128
depends on FTRACE_RECORD_RECURSION
help
This defines the limit of number of functions that can be
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index d5d94510afd3f8..8fd292d34d8984 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -524,8 +524,7 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
if (!buts->buf_size || !buts->buf_nr)
return -EINVAL;
- strncpy(buts->name, name, BLKTRACE_BDEV_SIZE);
- buts->name[BLKTRACE_BDEV_SIZE - 1] = '\0';
+ strscpy_pad(buts->name, name, BLKTRACE_BDEV_SIZE);
/*
* some device names have larger paths - convert the slashes
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 0a5c4efc73c367..9dc605f08a2314 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -2728,7 +2728,7 @@ static int bpf_kprobe_multi_link_fill_link_info(const struct bpf_link *link,
static const struct bpf_link_ops bpf_kprobe_multi_link_lops = {
.release = bpf_kprobe_multi_link_release,
- .dealloc = bpf_kprobe_multi_link_dealloc,
+ .dealloc_deferred = bpf_kprobe_multi_link_dealloc,
.fill_link_info = bpf_kprobe_multi_link_fill_link_info,
};
@@ -3157,6 +3157,9 @@ static void bpf_uprobe_multi_link_release(struct bpf_link *link)
umulti_link = container_of(link, struct bpf_uprobe_multi_link, link);
bpf_uprobe_unregister(&umulti_link->path, umulti_link->uprobes, umulti_link->cnt);
+ if (umulti_link->task)
+ put_task_struct(umulti_link->task);
+ path_put(&umulti_link->path);
}
static void bpf_uprobe_multi_link_dealloc(struct bpf_link *link)
@@ -3164,9 +3167,6 @@ static void bpf_uprobe_multi_link_dealloc(struct bpf_link *link)
struct bpf_uprobe_multi_link *umulti_link;
umulti_link = container_of(link, struct bpf_uprobe_multi_link, link);
- if (umulti_link->task)
- put_task_struct(umulti_link->task);
- path_put(&umulti_link->path);
kvfree(umulti_link->uprobes);
kfree(umulti_link);
}
@@ -3242,7 +3242,7 @@ static int bpf_uprobe_multi_link_fill_link_info(const struct bpf_link *link,
static const struct bpf_link_ops bpf_uprobe_multi_link_lops = {
.release = bpf_uprobe_multi_link_release,
- .dealloc = bpf_uprobe_multi_link_dealloc,
+ .dealloc_deferred = bpf_uprobe_multi_link_dealloc,
.fill_link_info = bpf_uprobe_multi_link_fill_link_info,
};
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 25476ead681b84..6511dc3a00da84 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1393,7 +1393,6 @@ static void rb_tail_page_update(struct ring_buffer_per_cpu *cpu_buffer,
old_write = local_add_return(RB_WRITE_INTCNT, &next_page->write);
old_entries = local_add_return(RB_WRITE_INTCNT, &next_page->entries);
- local_inc(&cpu_buffer->pages_touched);
/*
* Just make sure we have seen our old_write and synchronize
* with any interrupts that come in.
@@ -1430,8 +1429,9 @@ static void rb_tail_page_update(struct ring_buffer_per_cpu *cpu_buffer,
*/
local_set(&next_page->page->commit, 0);
- /* Again, either we update tail_page or an interrupt does */
- (void)cmpxchg(&cpu_buffer->tail_page, tail_page, next_page);
+ /* Either we update tail_page or an interrupt does */
+ if (try_cmpxchg(&cpu_buffer->tail_page, &tail_page, next_page))
+ local_inc(&cpu_buffer->pages_touched);
}
}
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 7c364b87352eed..52f75c36bbca49 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -1670,6 +1670,7 @@ static int trace_format_open(struct inode *inode, struct file *file)
return 0;
}
+#ifdef CONFIG_PERF_EVENTS
static ssize_t
event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
{
@@ -1684,6 +1685,7 @@ event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
}
+#endif
static ssize_t
event_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
@@ -2152,10 +2154,12 @@ static const struct file_operations ftrace_event_format_fops = {
.release = seq_release,
};
+#ifdef CONFIG_PERF_EVENTS
static const struct file_operations ftrace_event_id_fops = {
.read = event_id_read,
.llseek = default_llseek,
};
+#endif
static const struct file_operations ftrace_event_filter_fops = {
.open = tracing_open_file_tr,
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index 217169de0920ed..dfe3ee6035ecc7 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -839,7 +839,7 @@ out:
void store_trace_entry_data(void *edata, struct trace_probe *tp, struct pt_regs *regs)
{
struct probe_entry_arg *earg = tp->entry_arg;
- unsigned long val;
+ unsigned long val = 0;
int i;
if (!earg)
diff --git a/kernel/vmcore_info.c b/kernel/vmcore_info.c
index f95516cd45bbe2..1d5eadd9dd61cd 100644
--- a/kernel/vmcore_info.c
+++ b/kernel/vmcore_info.c
@@ -198,18 +198,18 @@ static int __init crash_save_vmcoreinfo_init(void)
VMCOREINFO_NUMBER(PG_private);
VMCOREINFO_NUMBER(PG_swapcache);
VMCOREINFO_NUMBER(PG_swapbacked);
- VMCOREINFO_NUMBER(PG_slab);
+#define PAGE_SLAB_MAPCOUNT_VALUE (~PG_slab)
+ VMCOREINFO_NUMBER(PAGE_SLAB_MAPCOUNT_VALUE);
#ifdef CONFIG_MEMORY_FAILURE
VMCOREINFO_NUMBER(PG_hwpoison);
#endif
VMCOREINFO_NUMBER(PG_head_mask);
#define PAGE_BUDDY_MAPCOUNT_VALUE (~PG_buddy)
VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE);
-#ifdef CONFIG_HUGETLB_PAGE
- VMCOREINFO_NUMBER(PG_hugetlb);
+#define PAGE_HUGETLB_MAPCOUNT_VALUE (~PG_hugetlb)
+ VMCOREINFO_NUMBER(PAGE_HUGETLB_MAPCOUNT_VALUE);
#define PAGE_OFFLINE_MAPCOUNT_VALUE (~PG_offline)
VMCOREINFO_NUMBER(PAGE_OFFLINE_MAPCOUNT_VALUE);
-#endif
#ifdef CONFIG_KALLSYMS
VMCOREINFO_SYMBOL(kallsyms_names);
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index c63a5fbf1f1c2b..c4b6b77f36bf81 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -375,7 +375,7 @@ config DEBUG_INFO_SPLIT
Incompatible with older versions of ccache.
config DEBUG_INFO_BTF
- bool "Generate BTF typeinfo"
+ bool "Generate BTF type information"
depends on !DEBUG_INFO_SPLIT && !DEBUG_INFO_REDUCED
depends on !GCC_PLUGIN_RANDSTRUCT || COMPILE_TEST
depends on BPF_SYSCALL
@@ -408,7 +408,8 @@ config PAHOLE_HAS_LANG_EXCLUDE
using DEBUG_INFO_BTF_MODULES.
config DEBUG_INFO_BTF_MODULES
- def_bool y
+ bool "Generate BTF type information for kernel modules"
+ default y
depends on DEBUG_INFO_BTF && MODULES && PAHOLE_HAS_SPLIT_BTF
help
Generate compact split BTF type information for kernel modules.
@@ -968,6 +969,37 @@ config DEBUG_STACKOVERFLOW
If in doubt, say "N".
+config CODE_TAGGING
+ bool
+ select KALLSYMS
+
+config MEM_ALLOC_PROFILING
+ bool "Enable memory allocation profiling"
+ default n
+ depends on PROC_FS
+ depends on !DEBUG_FORCE_WEAK_PER_CPU
+ select CODE_TAGGING
+ select PAGE_EXTENSION
+ select SLAB_OBJ_EXT
+ help
+ Track allocation source code and record total allocation size
+ initiated at that code location. The mechanism can be used to track
+ memory leaks with a low performance and memory impact.
+
+config MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT
+ bool "Enable memory allocation profiling by default"
+ default y
+ depends on MEM_ALLOC_PROFILING
+
+config MEM_ALLOC_PROFILING_DEBUG
+ bool "Memory allocation profiler debugging"
+ default n
+ depends on MEM_ALLOC_PROFILING
+ select MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT
+ help
+ Adds warnings with helpful error messages for memory allocation
+ profiling.
+
source "lib/Kconfig.kasan"
source "lib/Kconfig.kfence"
source "lib/Kconfig.kmsan"
@@ -2890,7 +2922,7 @@ config TEST_FREE_PAGES
config TEST_FPU
tristate "Test floating point operations in kernel space"
- depends on X86 && !KCOV_INSTRUMENT_ALL
+ depends on ARCH_HAS_KERNEL_FPU_SUPPORT && !KCOV_INSTRUMENT_ALL
help
Enable this option to add /sys/kernel/debug/selftest_helpers/test_fpu
which will trigger a sequence of floating point operations. This is used
diff --git a/lib/Kconfig.kgdb b/lib/Kconfig.kgdb
index b5c0e6576749dc..537e1b3f57340c 100644
--- a/lib/Kconfig.kgdb
+++ b/lib/Kconfig.kgdb
@@ -122,6 +122,7 @@ config KDB_DEFAULT_ENABLE
config KDB_KEYBOARD
bool "KGDB_KDB: keyboard as input device"
depends on VT && KGDB_KDB && !PARISC
+ depends on HAS_IOPORT
default n
help
KDB can use a PS/2 type keyboard for an input device
diff --git a/lib/Makefile b/lib/Makefile
index ffc6b2341b45a5..1f6c51420bf6b3 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -110,30 +110,10 @@ CFLAGS_test_fprobe.o += $(CC_FLAGS_FTRACE)
obj-$(CONFIG_FPROBE_SANITY_TEST) += test_fprobe.o
obj-$(CONFIG_TEST_OBJPOOL) += test_objpool.o
-#
-# CFLAGS for compiling floating point code inside the kernel. x86/Makefile turns
-# off the generation of FPU/SSE* instructions for kernel proper but FPU_FLAGS
-# get appended last to CFLAGS and thus override those previous compiler options.
-#
-FPU_CFLAGS := -msse -msse2
-ifdef CONFIG_CC_IS_GCC
-# Stack alignment mismatch, proceed with caution.
-# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
-# (8B stack alignment).
-# See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53383
-#
-# The "-msse" in the first argument is there so that the
-# -mpreferred-stack-boundary=3 build error:
-#
-# -mpreferred-stack-boundary=3 is not between 4 and 12
-#
-# can be triggered. Otherwise gcc doesn't complain.
-FPU_CFLAGS += -mhard-float
-FPU_CFLAGS += $(call cc-option,-msse -mpreferred-stack-boundary=3,-mpreferred-stack-boundary=4)
-endif
-
obj-$(CONFIG_TEST_FPU) += test_fpu.o
-CFLAGS_test_fpu.o += $(FPU_CFLAGS)
+test_fpu-y := test_fpu_glue.o test_fpu_impl.o
+CFLAGS_test_fpu_impl.o += $(CC_FLAGS_FPU)
+CFLAGS_REMOVE_test_fpu_impl.o += $(CC_FLAGS_NO_FPU)
# Some KUnit files (hooks.o) need to be built-in even when KUnit is a module,
# so we can't just use obj-$(CONFIG_KUNIT).
@@ -233,6 +213,9 @@ obj-$(CONFIG_OF_RECONFIG_NOTIFIER_ERROR_INJECT) += \
of-reconfig-notifier-error-inject.o
obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
+obj-$(CONFIG_CODE_TAGGING) += codetag.o
+obj-$(CONFIG_MEM_ALLOC_PROFILING) += alloc_tag.o
+
lib-$(CONFIG_GENERIC_BUG) += bug.o
obj-$(CONFIG_HAVE_ARCH_TRACEHOOK) += syscall.o
diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c
new file mode 100644
index 00000000000000..531dbe2f545635
--- /dev/null
+++ b/lib/alloc_tag.c
@@ -0,0 +1,246 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/alloc_tag.h>
+#include <linux/fs.h>
+#include <linux/gfp.h>
+#include <linux/module.h>
+#include <linux/page_ext.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_buf.h>
+#include <linux/seq_file.h>
+
+static struct codetag_type *alloc_tag_cttype;
+
+DEFINE_PER_CPU(struct alloc_tag_counters, _shared_alloc_tag);
+EXPORT_SYMBOL(_shared_alloc_tag);
+
+DEFINE_STATIC_KEY_MAYBE(CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT,
+ mem_alloc_profiling_key);
+
+static void *allocinfo_start(struct seq_file *m, loff_t *pos)
+{
+ struct codetag_iterator *iter;
+ struct codetag *ct;
+ loff_t node = *pos;
+
+ iter = kzalloc(sizeof(*iter), GFP_KERNEL);
+ m->private = iter;
+ if (!iter)
+ return NULL;
+
+ codetag_lock_module_list(alloc_tag_cttype, true);
+ *iter = codetag_get_ct_iter(alloc_tag_cttype);
+ while ((ct = codetag_next_ct(iter)) != NULL && node)
+ node--;
+
+ return ct ? iter : NULL;
+}
+
+static void *allocinfo_next(struct seq_file *m, void *arg, loff_t *pos)
+{
+ struct codetag_iterator *iter = (struct codetag_iterator *)arg;
+ struct codetag *ct = codetag_next_ct(iter);
+
+ (*pos)++;
+ if (!ct)
+ return NULL;
+
+ return iter;
+}
+
+static void allocinfo_stop(struct seq_file *m, void *arg)
+{
+ struct codetag_iterator *iter = (struct codetag_iterator *)m->private;
+
+ if (iter) {
+ codetag_lock_module_list(alloc_tag_cttype, false);
+ kfree(iter);
+ }
+}
+
+static void alloc_tag_to_text(struct seq_buf *out, struct codetag *ct)
+{
+ struct alloc_tag *tag = ct_to_alloc_tag(ct);
+ struct alloc_tag_counters counter = alloc_tag_read(tag);
+ s64 bytes = counter.bytes;
+
+ seq_buf_printf(out, "%12lli %8llu ", bytes, counter.calls);
+ codetag_to_text(out, ct);
+ seq_buf_putc(out, ' ');
+ seq_buf_putc(out, '\n');
+}
+
+static int allocinfo_show(struct seq_file *m, void *arg)
+{
+ struct codetag_iterator *iter = (struct codetag_iterator *)arg;
+ char *bufp;
+ size_t n = seq_get_buf(m, &bufp);
+ struct seq_buf buf;
+
+ seq_buf_init(&buf, bufp, n);
+ alloc_tag_to_text(&buf, iter->ct);
+ seq_commit(m, seq_buf_used(&buf));
+ return 0;
+}
+
+static const struct seq_operations allocinfo_seq_op = {
+ .start = allocinfo_start,
+ .next = allocinfo_next,
+ .stop = allocinfo_stop,
+ .show = allocinfo_show,
+};
+
+size_t alloc_tag_top_users(struct codetag_bytes *tags, size_t count, bool can_sleep)
+{
+ struct codetag_iterator iter;
+ struct codetag *ct;
+ struct codetag_bytes n;
+ unsigned int i, nr = 0;
+
+ if (can_sleep)
+ codetag_lock_module_list(alloc_tag_cttype, true);
+ else if (!codetag_trylock_module_list(alloc_tag_cttype))
+ return 0;
+
+ iter = codetag_get_ct_iter(alloc_tag_cttype);
+ while ((ct = codetag_next_ct(&iter))) {
+ struct alloc_tag_counters counter = alloc_tag_read(ct_to_alloc_tag(ct));
+
+ n.ct = ct;
+ n.bytes = counter.bytes;
+
+ for (i = 0; i < nr; i++)
+ if (n.bytes > tags[i].bytes)
+ break;
+
+ if (i < count) {
+ nr -= nr == count;
+ memmove(&tags[i + 1],
+ &tags[i],
+ sizeof(tags[0]) * (nr - i));
+ nr++;
+ tags[i] = n;
+ }
+ }
+
+ codetag_lock_module_list(alloc_tag_cttype, false);
+
+ return nr;
+}
+
+static void __init procfs_init(void)
+{
+ proc_create_seq("allocinfo", 0400, NULL, &allocinfo_seq_op);
+}
+
+static bool alloc_tag_module_unload(struct codetag_type *cttype,
+ struct codetag_module *cmod)
+{
+ struct codetag_iterator iter = codetag_get_ct_iter(cttype);
+ struct alloc_tag_counters counter;
+ bool module_unused = true;
+ struct alloc_tag *tag;
+ struct codetag *ct;
+
+ for (ct = codetag_next_ct(&iter); ct; ct = codetag_next_ct(&iter)) {
+ if (iter.cmod != cmod)
+ continue;
+
+ tag = ct_to_alloc_tag(ct);
+ counter = alloc_tag_read(tag);
+
+ if (WARN(counter.bytes,
+ "%s:%u module %s func:%s has %llu allocated at module unload",
+ ct->filename, ct->lineno, ct->modname, ct->function, counter.bytes))
+ module_unused = false;
+ }
+
+ return module_unused;
+}
+
+#ifdef CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT
+static bool mem_profiling_support __meminitdata = true;
+#else
+static bool mem_profiling_support __meminitdata;
+#endif
+
+static int __init setup_early_mem_profiling(char *str)
+{
+ bool enable;
+
+ if (!str || !str[0])
+ return -EINVAL;
+
+ if (!strncmp(str, "never", 5)) {
+ enable = false;
+ mem_profiling_support = false;
+ } else {
+ int res;
+
+ res = kstrtobool(str, &enable);
+ if (res)
+ return res;
+
+ mem_profiling_support = true;
+ }
+
+ if (enable != static_key_enabled(&mem_alloc_profiling_key)) {
+ if (enable)
+ static_branch_enable(&mem_alloc_profiling_key);
+ else
+ static_branch_disable(&mem_alloc_profiling_key);
+ }
+
+ return 0;
+}
+early_param("sysctl.vm.mem_profiling", setup_early_mem_profiling);
+
+static __init bool need_page_alloc_tagging(void)
+{
+ return mem_profiling_support;
+}
+
+static __init void init_page_alloc_tagging(void)
+{
+}
+
+struct page_ext_operations page_alloc_tagging_ops = {
+ .size = sizeof(union codetag_ref),
+ .need = need_page_alloc_tagging,
+ .init = init_page_alloc_tagging,
+};
+EXPORT_SYMBOL(page_alloc_tagging_ops);
+
+static struct ctl_table memory_allocation_profiling_sysctls[] = {
+ {
+ .procname = "mem_profiling",
+ .data = &mem_alloc_profiling_key,
+#ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG
+ .mode = 0444,
+#else
+ .mode = 0644,
+#endif
+ .proc_handler = proc_do_static_key,
+ },
+ { }
+};
+
+static int __init alloc_tag_init(void)
+{
+ const struct codetag_type_desc desc = {
+ .section = "alloc_tags",
+ .tag_size = sizeof(struct alloc_tag),
+ .module_unload = alloc_tag_module_unload,
+ };
+
+ alloc_tag_cttype = codetag_register_type(&desc);
+ if (IS_ERR(alloc_tag_cttype))
+ return PTR_ERR(alloc_tag_cttype);
+
+ if (!mem_profiling_support)
+ memory_allocation_profiling_sysctls[0].mode = 0444;
+ register_sysctl_init("vm", memory_allocation_profiling_sysctls);
+ procfs_init();
+
+ return 0;
+}
+module_init(alloc_tag_init);
diff --git a/lib/bootconfig.c b/lib/bootconfig.c
index c59d26068a6401..97f8911ea339e6 100644
--- a/lib/bootconfig.c
+++ b/lib/bootconfig.c
@@ -61,9 +61,12 @@ static inline void * __init xbc_alloc_mem(size_t size)
return memblock_alloc(size, SMP_CACHE_BYTES);
}
-static inline void __init xbc_free_mem(void *addr, size_t size)
+static inline void __init xbc_free_mem(void *addr, size_t size, bool early)
{
- memblock_free(addr, size);
+ if (early)
+ memblock_free(addr, size);
+ else if (addr)
+ memblock_free_late(__pa(addr), size);
}
#else /* !__KERNEL__ */
@@ -73,7 +76,7 @@ static inline void *xbc_alloc_mem(size_t size)
return malloc(size);
}
-static inline void xbc_free_mem(void *addr, size_t size)
+static inline void xbc_free_mem(void *addr, size_t size, bool early)
{
free(addr);
}
@@ -898,19 +901,20 @@ static int __init xbc_parse_tree(void)
}
/**
- * xbc_exit() - Clean up all parsed bootconfig
+ * _xbc_exit() - Clean up all parsed bootconfig
+ * @early: Set true if this is called before budy system is initialized.
*
* This clears all data structures of parsed bootconfig on memory.
* If you need to reuse xbc_init() with new boot config, you can
* use this.
*/
-void __init xbc_exit(void)
+void __init _xbc_exit(bool early)
{
- xbc_free_mem(xbc_data, xbc_data_size);
+ xbc_free_mem(xbc_data, xbc_data_size, early);
xbc_data = NULL;
xbc_data_size = 0;
xbc_node_num = 0;
- xbc_free_mem(xbc_nodes, sizeof(struct xbc_node) * XBC_NODE_MAX);
+ xbc_free_mem(xbc_nodes, sizeof(struct xbc_node) * XBC_NODE_MAX, early);
xbc_nodes = NULL;
brace_index = 0;
}
@@ -963,7 +967,7 @@ int __init xbc_init(const char *data, size_t size, const char **emsg, int *epos)
if (!xbc_nodes) {
if (emsg)
*emsg = "Failed to allocate bootconfig nodes";
- xbc_exit();
+ _xbc_exit(true);
return -ENOMEM;
}
memset(xbc_nodes, 0, sizeof(struct xbc_node) * XBC_NODE_MAX);
@@ -977,7 +981,7 @@ int __init xbc_init(const char *data, size_t size, const char **emsg, int *epos)
*epos = xbc_err_pos;
if (emsg)
*emsg = xbc_err_msg;
- xbc_exit();
+ _xbc_exit(true);
} else
ret = xbc_node_num;
diff --git a/lib/build_OID_registry b/lib/build_OID_registry
index d7fc32ea8ac224..56d8bafeb848b0 100755
--- a/lib/build_OID_registry
+++ b/lib/build_OID_registry
@@ -8,6 +8,7 @@
#
use strict;
+use Cwd qw(abs_path);
my @names = ();
my @oids = ();
@@ -17,6 +18,8 @@ if ($#ARGV != 1) {
exit(2);
}
+my $abs_srctree = abs_path($ENV{'srctree'});
+
#
# Open the file to read from
#
@@ -35,7 +38,7 @@ close IN_FILE || die;
#
open C_FILE, ">$ARGV[1]" or die;
print C_FILE "/*\n";
-print C_FILE " * Automatically generated by ", $0, ". Do not edit\n";
+print C_FILE " * Automatically generated by ", $0 =~ s#^\Q$abs_srctree/\E##r, ". Do not edit\n";
print C_FILE " */\n";
#
diff --git a/lib/buildid.c b/lib/buildid.c
index 898301b49eb644..7954dd92e36c01 100644
--- a/lib/buildid.c
+++ b/lib/buildid.c
@@ -182,8 +182,8 @@ unsigned char vmlinux_build_id[BUILD_ID_SIZE_MAX] __ro_after_init;
*/
void __init init_vmlinux_build_id(void)
{
- extern const void __start_notes __weak;
- extern const void __stop_notes __weak;
+ extern const void __start_notes;
+ extern const void __stop_notes;
unsigned int size = &__stop_notes - &__start_notes;
build_id_parse_buf(&__start_notes, vmlinux_build_id, size);
diff --git a/lib/checksum_kunit.c b/lib/checksum_kunit.c
index bf70850035c76f..404dba36bae380 100644
--- a/lib/checksum_kunit.c
+++ b/lib/checksum_kunit.c
@@ -594,13 +594,15 @@ static void test_ip_fast_csum(struct kunit *test)
static void test_csum_ipv6_magic(struct kunit *test)
{
-#if defined(CONFIG_NET)
const struct in6_addr *saddr;
const struct in6_addr *daddr;
unsigned int len;
unsigned char proto;
__wsum csum;
+ if (!IS_ENABLED(CONFIG_NET))
+ return;
+
const int daddr_offset = sizeof(struct in6_addr);
const int len_offset = sizeof(struct in6_addr) + sizeof(struct in6_addr);
const int proto_offset = sizeof(struct in6_addr) + sizeof(struct in6_addr) +
@@ -618,7 +620,6 @@ static void test_csum_ipv6_magic(struct kunit *test)
CHECK_EQ(to_sum16(expected_csum_ipv6_magic[i]),
csum_ipv6_magic(saddr, daddr, len, proto, csum));
}
-#endif /* !CONFIG_NET */
}
static struct kunit_case __refdata checksum_test_cases[] = {
diff --git a/lib/codetag.c b/lib/codetag.c
new file mode 100644
index 00000000000000..5ace625f2328f2
--- /dev/null
+++ b/lib/codetag.c
@@ -0,0 +1,283 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/codetag.h>
+#include <linux/idr.h>
+#include <linux/kallsyms.h>
+#include <linux/module.h>
+#include <linux/seq_buf.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+
+struct codetag_type {
+ struct list_head link;
+ unsigned int count;
+ struct idr mod_idr;
+ struct rw_semaphore mod_lock; /* protects mod_idr */
+ struct codetag_type_desc desc;
+};
+
+struct codetag_range {
+ struct codetag *start;
+ struct codetag *stop;
+};
+
+struct codetag_module {
+ struct module *mod;
+ struct codetag_range range;
+};
+
+static DEFINE_MUTEX(codetag_lock);
+static LIST_HEAD(codetag_types);
+
+void codetag_lock_module_list(struct codetag_type *cttype, bool lock)
+{
+ if (lock)
+ down_read(&cttype->mod_lock);
+ else
+ up_read(&cttype->mod_lock);
+}
+
+bool codetag_trylock_module_list(struct codetag_type *cttype)
+{
+ return down_read_trylock(&cttype->mod_lock) != 0;
+}
+
+struct codetag_iterator codetag_get_ct_iter(struct codetag_type *cttype)
+{
+ struct codetag_iterator iter = {
+ .cttype = cttype,
+ .cmod = NULL,
+ .mod_id = 0,
+ .ct = NULL,
+ };
+
+ return iter;
+}
+
+static inline struct codetag *get_first_module_ct(struct codetag_module *cmod)
+{
+ return cmod->range.start < cmod->range.stop ? cmod->range.start : NULL;
+}
+
+static inline
+struct codetag *get_next_module_ct(struct codetag_iterator *iter)
+{
+ struct codetag *res = (struct codetag *)
+ ((char *)iter->ct + iter->cttype->desc.tag_size);
+
+ return res < iter->cmod->range.stop ? res : NULL;
+}
+
+struct codetag *codetag_next_ct(struct codetag_iterator *iter)
+{
+ struct codetag_type *cttype = iter->cttype;
+ struct codetag_module *cmod;
+ struct codetag *ct;
+
+ lockdep_assert_held(&cttype->mod_lock);
+
+ if (unlikely(idr_is_empty(&cttype->mod_idr)))
+ return NULL;
+
+ ct = NULL;
+ while (true) {
+ cmod = idr_find(&cttype->mod_idr, iter->mod_id);
+
+ /* If module was removed move to the next one */
+ if (!cmod)
+ cmod = idr_get_next_ul(&cttype->mod_idr,
+ &iter->mod_id);
+
+ /* Exit if no more modules */
+ if (!cmod)
+ break;
+
+ if (cmod != iter->cmod) {
+ iter->cmod = cmod;
+ ct = get_first_module_ct(cmod);
+ } else
+ ct = get_next_module_ct(iter);
+
+ if (ct)
+ break;
+
+ iter->mod_id++;
+ }
+
+ iter->ct = ct;
+ return ct;
+}
+
+void codetag_to_text(struct seq_buf *out, struct codetag *ct)
+{
+ if (ct->modname)
+ seq_buf_printf(out, "%s:%u [%s] func:%s",
+ ct->filename, ct->lineno,
+ ct->modname, ct->function);
+ else
+ seq_buf_printf(out, "%s:%u func:%s",
+ ct->filename, ct->lineno, ct->function);
+}
+
+static inline size_t range_size(const struct codetag_type *cttype,
+ const struct codetag_range *range)
+{
+ return ((char *)range->stop - (char *)range->start) /
+ cttype->desc.tag_size;
+}
+
+#ifdef CONFIG_MODULES
+static void *get_symbol(struct module *mod, const char *prefix, const char *name)
+{
+ DECLARE_SEQ_BUF(sb, KSYM_NAME_LEN);
+ const char *buf;
+ void *ret;
+
+ seq_buf_printf(&sb, "%s%s", prefix, name);
+ if (seq_buf_has_overflowed(&sb))
+ return NULL;
+
+ buf = seq_buf_str(&sb);
+ preempt_disable();
+ ret = mod ?
+ (void *)find_kallsyms_symbol_value(mod, buf) :
+ (void *)kallsyms_lookup_name(buf);
+ preempt_enable();
+
+ return ret;
+}
+
+static struct codetag_range get_section_range(struct module *mod,
+ const char *section)
+{
+ return (struct codetag_range) {
+ get_symbol(mod, "__start_", section),
+ get_symbol(mod, "__stop_", section),
+ };
+}
+
+static int codetag_module_init(struct codetag_type *cttype, struct module *mod)
+{
+ struct codetag_range range;
+ struct codetag_module *cmod;
+ int err;
+
+ range = get_section_range(mod, cttype->desc.section);
+ if (!range.start || !range.stop) {
+ pr_warn("Failed to load code tags of type %s from the module %s\n",
+ cttype->desc.section,
+ mod ? mod->name : "(built-in)");
+ return -EINVAL;
+ }
+
+ /* Ignore empty ranges */
+ if (range.start == range.stop)
+ return 0;
+
+ BUG_ON(range.start > range.stop);
+
+ cmod = kmalloc(sizeof(*cmod), GFP_KERNEL);
+ if (unlikely(!cmod))
+ return -ENOMEM;
+
+ cmod->mod = mod;
+ cmod->range = range;
+
+ down_write(&cttype->mod_lock);
+ err = idr_alloc(&cttype->mod_idr, cmod, 0, 0, GFP_KERNEL);
+ if (err >= 0) {
+ cttype->count += range_size(cttype, &range);
+ if (cttype->desc.module_load)
+ cttype->desc.module_load(cttype, cmod);
+ }
+ up_write(&cttype->mod_lock);
+
+ if (err < 0) {
+ kfree(cmod);
+ return err;
+ }
+
+ return 0;
+}
+
+void codetag_load_module(struct module *mod)
+{
+ struct codetag_type *cttype;
+
+ if (!mod)
+ return;
+
+ mutex_lock(&codetag_lock);
+ list_for_each_entry(cttype, &codetag_types, link)
+ codetag_module_init(cttype, mod);
+ mutex_unlock(&codetag_lock);
+}
+
+bool codetag_unload_module(struct module *mod)
+{
+ struct codetag_type *cttype;
+ bool unload_ok = true;
+
+ if (!mod)
+ return true;
+
+ mutex_lock(&codetag_lock);
+ list_for_each_entry(cttype, &codetag_types, link) {
+ struct codetag_module *found = NULL;
+ struct codetag_module *cmod;
+ unsigned long mod_id, tmp;
+
+ down_write(&cttype->mod_lock);
+ idr_for_each_entry_ul(&cttype->mod_idr, cmod, tmp, mod_id) {
+ if (cmod->mod && cmod->mod == mod) {
+ found = cmod;
+ break;
+ }
+ }
+ if (found) {
+ if (cttype->desc.module_unload)
+ if (!cttype->desc.module_unload(cttype, cmod))
+ unload_ok = false;
+
+ cttype->count -= range_size(cttype, &cmod->range);
+ idr_remove(&cttype->mod_idr, mod_id);
+ kfree(cmod);
+ }
+ up_write(&cttype->mod_lock);
+ }
+ mutex_unlock(&codetag_lock);
+
+ return unload_ok;
+}
+
+#else /* CONFIG_MODULES */
+static int codetag_module_init(struct codetag_type *cttype, struct module *mod) { return 0; }
+#endif /* CONFIG_MODULES */
+
+struct codetag_type *
+codetag_register_type(const struct codetag_type_desc *desc)
+{
+ struct codetag_type *cttype;
+ int err;
+
+ BUG_ON(desc->tag_size <= 0);
+
+ cttype = kzalloc(sizeof(*cttype), GFP_KERNEL);
+ if (unlikely(!cttype))
+ return ERR_PTR(-ENOMEM);
+
+ cttype->desc = *desc;
+ idr_init(&cttype->mod_idr);
+ init_rwsem(&cttype->mod_lock);
+
+ err = codetag_module_init(cttype, NULL);
+ if (unlikely(err)) {
+ kfree(cttype);
+ return ERR_PTR(err);
+ }
+
+ mutex_lock(&codetag_lock);
+ list_add_tail(&cttype->link, &codetag_types);
+ mutex_unlock(&codetag_lock);
+
+ return cttype;
+}
diff --git a/lib/devres.c b/lib/devres.c
index fe0c63caeb6892..4fc152de6d8bb1 100644
--- a/lib/devres.c
+++ b/lib/devres.c
@@ -1,10 +1,13 @@
// SPDX-License-Identifier: GPL-2.0
+#include <linux/bug.h>
#include <linux/device.h>
-#include <linux/err.h>
-#include <linux/io.h>
-#include <linux/gfp.h>
+#include <linux/errno.h>
#include <linux/export.h>
+#include <linux/gfp_types.h>
+#include <linux/io.h>
+#include <linux/ioport.h>
#include <linux/of_address.h>
+#include <linux/types.h>
enum devm_ioremap_type {
DEVM_IOREMAP = 0,
@@ -125,12 +128,13 @@ __devm_ioremap_resource(struct device *dev, const struct resource *res,
resource_size_t size;
void __iomem *dest_ptr;
char *pretty_name;
+ int ret;
BUG_ON(!dev);
if (!res || resource_type(res) != IORESOURCE_MEM) {
- dev_err(dev, "invalid resource %pR\n", res);
- return IOMEM_ERR_PTR(-EINVAL);
+ ret = dev_err_probe(dev, -EINVAL, "invalid resource %pR\n", res);
+ return IOMEM_ERR_PTR(ret);
}
if (type == DEVM_IOREMAP && res->flags & IORESOURCE_MEM_NONPOSTED)
@@ -144,20 +148,20 @@ __devm_ioremap_resource(struct device *dev, const struct resource *res,
else
pretty_name = devm_kstrdup(dev, dev_name(dev), GFP_KERNEL);
if (!pretty_name) {
- dev_err(dev, "can't generate pretty name for resource %pR\n", res);
- return IOMEM_ERR_PTR(-ENOMEM);
+ ret = dev_err_probe(dev, -ENOMEM, "can't generate pretty name for resource %pR\n", res);
+ return IOMEM_ERR_PTR(ret);
}
if (!devm_request_mem_region(dev, res->start, size, pretty_name)) {
- dev_err(dev, "can't request region for resource %pR\n", res);
- return IOMEM_ERR_PTR(-EBUSY);
+ ret = dev_err_probe(dev, -EBUSY, "can't request region for resource %pR\n", res);
+ return IOMEM_ERR_PTR(ret);
}
dest_ptr = __devm_ioremap(dev, res->start, size, type);
if (!dest_ptr) {
- dev_err(dev, "ioremap failed for resource %pR\n", res);
devm_release_mem_region(dev, res->start, size);
- dest_ptr = IOMEM_ERR_PTR(-ENOMEM);
+ ret = dev_err_probe(dev, -ENOMEM, "ioremap failed for resource %pR\n", res);
+ return IOMEM_ERR_PTR(ret);
}
return dest_ptr;
diff --git a/lib/kfifo.c b/lib/kfifo.c
index 12f5a347aa1370..15acdee4a8f325 100644
--- a/lib/kfifo.c
+++ b/lib/kfifo.c
@@ -5,13 +5,13 @@
* Copyright (C) 2009/2010 Stefani Seibold <stefani@seibold.net>
*/
-#include <linux/kernel.h>
-#include <linux/export.h>
-#include <linux/slab.h>
#include <linux/err.h>
+#include <linux/export.h>
+#include <linux/kfifo.h>
#include <linux/log2.h>
+#include <linux/scatterlist.h>
+#include <linux/slab.h>
#include <linux/uaccess.h>
-#include <linux/kfifo.h>
/*
* internal helper to calculate the unused elements in a fifo
diff --git a/lib/maple_tree.c b/lib/maple_tree.c
index 55e1b35bf877ea..2d7d27e6ae3c69 100644
--- a/lib/maple_tree.c
+++ b/lib/maple_tree.c
@@ -5109,18 +5109,18 @@ int mas_empty_area_rev(struct ma_state *mas, unsigned long min,
if (size == 0 || max - min < size - 1)
return -EINVAL;
- if (mas_is_start(mas)) {
+ if (mas_is_start(mas))
mas_start(mas);
- mas->offset = mas_data_end(mas);
- } else if (mas->offset >= 2) {
- mas->offset -= 2;
- } else if (!mas_rewind_node(mas)) {
+ else if ((mas->offset < 2) && (!mas_rewind_node(mas)))
return -EBUSY;
- }
- /* Empty set. */
- if (mas_is_none(mas) || mas_is_ptr(mas))
+ if (unlikely(mas_is_none(mas) || mas_is_ptr(mas)))
return mas_sparse_area(mas, min, max, size, false);
+ else if (mas->offset >= 2)
+ mas->offset -= 2;
+ else
+ mas->offset = mas_data_end(mas);
+
/* The start of the window can only be within these values. */
mas->index = min;
diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile
index 385a94aa0b999b..0e88bfe6445bfb 100644
--- a/lib/raid6/Makefile
+++ b/lib/raid6/Makefile
@@ -33,25 +33,6 @@ CFLAGS_REMOVE_vpermxor8.o += -msoft-float
endif
endif
-# The GCC option -ffreestanding is required in order to compile code containing
-# ARM/NEON intrinsics in a non C99-compliant environment (such as the kernel)
-ifeq ($(CONFIG_KERNEL_MODE_NEON),y)
-NEON_FLAGS := -ffreestanding
-# Enable <arm_neon.h>
-NEON_FLAGS += -isystem $(shell $(CC) -print-file-name=include)
-ifeq ($(ARCH),arm)
-NEON_FLAGS += -march=armv7-a -mfloat-abi=softfp -mfpu=neon
-endif
-CFLAGS_recov_neon_inner.o += $(NEON_FLAGS)
-ifeq ($(ARCH),arm64)
-CFLAGS_REMOVE_recov_neon_inner.o += -mgeneral-regs-only
-CFLAGS_REMOVE_neon1.o += -mgeneral-regs-only
-CFLAGS_REMOVE_neon2.o += -mgeneral-regs-only
-CFLAGS_REMOVE_neon4.o += -mgeneral-regs-only
-CFLAGS_REMOVE_neon8.o += -mgeneral-regs-only
-endif
-endif
-
quiet_cmd_unroll = UNROLL $@
cmd_unroll = $(AWK) -v N=$* -f $(srctree)/$(src)/unroll.awk < $< > $@
@@ -75,10 +56,16 @@ targets += vpermxor1.c vpermxor2.c vpermxor4.c vpermxor8.c
$(obj)/vpermxor%.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
$(call if_changed,unroll)
-CFLAGS_neon1.o += $(NEON_FLAGS)
-CFLAGS_neon2.o += $(NEON_FLAGS)
-CFLAGS_neon4.o += $(NEON_FLAGS)
-CFLAGS_neon8.o += $(NEON_FLAGS)
+CFLAGS_neon1.o += $(CC_FLAGS_FPU)
+CFLAGS_neon2.o += $(CC_FLAGS_FPU)
+CFLAGS_neon4.o += $(CC_FLAGS_FPU)
+CFLAGS_neon8.o += $(CC_FLAGS_FPU)
+CFLAGS_recov_neon_inner.o += $(CC_FLAGS_FPU)
+CFLAGS_REMOVE_neon1.o += $(CC_FLAGS_NO_FPU)
+CFLAGS_REMOVE_neon2.o += $(CC_FLAGS_NO_FPU)
+CFLAGS_REMOVE_neon4.o += $(CC_FLAGS_NO_FPU)
+CFLAGS_REMOVE_neon8.o += $(CC_FLAGS_NO_FPU)
+CFLAGS_REMOVE_recov_neon_inner.o += $(CC_FLAGS_NO_FPU)
targets += neon1.c neon2.c neon4.c neon8.c
$(obj)/neon%.c: $(src)/neon.uc $(src)/unroll.awk FORCE
$(call if_changed,unroll)
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 6ae2ba8e06a213..dbbed19f8fff99 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -130,7 +130,8 @@ static union nested_table *nested_table_alloc(struct rhashtable *ht,
if (ntbl)
return ntbl;
- ntbl = kzalloc(PAGE_SIZE, GFP_ATOMIC);
+ ntbl = alloc_hooks_tag(ht->alloc_tag,
+ kmalloc_noprof(PAGE_SIZE, GFP_ATOMIC|__GFP_ZERO));
if (ntbl && leaf) {
for (i = 0; i < PAGE_SIZE / sizeof(ntbl[0]); i++)
@@ -157,7 +158,8 @@ static struct bucket_table *nested_bucket_table_alloc(struct rhashtable *ht,
size = sizeof(*tbl) + sizeof(tbl->buckets[0]);
- tbl = kzalloc(size, gfp);
+ tbl = alloc_hooks_tag(ht->alloc_tag,
+ kmalloc_noprof(size, gfp|__GFP_ZERO));
if (!tbl)
return NULL;
@@ -181,7 +183,9 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
int i;
static struct lock_class_key __key;
- tbl = kvzalloc(struct_size(tbl, buckets, nbuckets), gfp);
+ tbl = alloc_hooks_tag(ht->alloc_tag,
+ kvmalloc_node_noprof(struct_size(tbl, buckets, nbuckets),
+ gfp|__GFP_ZERO, NUMA_NO_NODE));
size = nbuckets;
@@ -1016,7 +1020,7 @@ static u32 rhashtable_jhash2(const void *key, u32 length, u32 seed)
* .obj_hashfn = my_hash_fn,
* };
*/
-int rhashtable_init(struct rhashtable *ht,
+int rhashtable_init_noprof(struct rhashtable *ht,
const struct rhashtable_params *params)
{
struct bucket_table *tbl;
@@ -1031,6 +1035,8 @@ int rhashtable_init(struct rhashtable *ht,
spin_lock_init(&ht->lock);
memcpy(&ht->p, params, sizeof(*params));
+ alloc_tag_record(ht->alloc_tag);
+
if (params->min_size)
ht->p.min_size = roundup_pow_of_two(params->min_size);
@@ -1076,7 +1082,7 @@ int rhashtable_init(struct rhashtable *ht,
return 0;
}
-EXPORT_SYMBOL_GPL(rhashtable_init);
+EXPORT_SYMBOL_GPL(rhashtable_init_noprof);
/**
* rhltable_init - initialize a new hash list table
@@ -1087,15 +1093,15 @@ EXPORT_SYMBOL_GPL(rhashtable_init);
*
* See documentation for rhashtable_init.
*/
-int rhltable_init(struct rhltable *hlt, const struct rhashtable_params *params)
+int rhltable_init_noprof(struct rhltable *hlt, const struct rhashtable_params *params)
{
int err;
- err = rhashtable_init(&hlt->ht, params);
+ err = rhashtable_init_noprof(&hlt->ht, params);
hlt->ht.rhlist = true;
return err;
}
-EXPORT_SYMBOL_GPL(rhltable_init);
+EXPORT_SYMBOL_GPL(rhltable_init_noprof);
static void rhashtable_free_one(struct rhashtable *ht, struct rhash_head *obj,
void (*free_fn)(void *ptr, void *arg),
diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index 68b45c82c37a69..7bc2220fea8058 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -1124,7 +1124,7 @@ static ssize_t extract_user_to_sg(struct iov_iter *iter,
do {
res = iov_iter_extract_pages(iter, &pages, maxsize, sg_max,
extraction_flags, &off);
- if (res < 0)
+ if (res <= 0)
goto failed;
len = res;
diff --git a/lib/stackdepot.c b/lib/stackdepot.c
index af6cc19a200331..cd8f2345528510 100644
--- a/lib/stackdepot.c
+++ b/lib/stackdepot.c
@@ -330,7 +330,7 @@ static struct stack_record *depot_pop_free_pool(void **prealloc, size_t size)
stack = current_pool + pool_offset;
/* Pre-initialize handle once. */
- stack->handle.pool_index = pool_index + 1;
+ stack->handle.pool_index_plus_1 = pool_index + 1;
stack->handle.offset = pool_offset >> DEPOT_STACK_ALIGN;
stack->handle.extra = 0;
INIT_LIST_HEAD(&stack->hash_list);
@@ -441,7 +441,7 @@ static struct stack_record *depot_fetch_stack(depot_stack_handle_t handle)
const int pools_num_cached = READ_ONCE(pools_num);
union handle_parts parts = { .handle = handle };
void *pool;
- u32 pool_index = parts.pool_index - 1;
+ u32 pool_index = parts.pool_index_plus_1 - 1;
size_t offset = parts.offset << DEPOT_STACK_ALIGN;
struct stack_record *stack;
@@ -627,10 +627,10 @@ depot_stack_handle_t stack_depot_save_flags(unsigned long *entries,
/*
* Zero out zone modifiers, as we don't have specific zone
* requirements. Keep the flags related to allocation in atomic
- * contexts and I/O.
+ * contexts, I/O, nolockdep.
*/
alloc_flags &= ~GFP_ZONEMASK;
- alloc_flags &= (GFP_ATOMIC | GFP_KERNEL);
+ alloc_flags &= (GFP_ATOMIC | GFP_KERNEL | __GFP_NOLOCKDEP);
alloc_flags |= __GFP_NOWARN;
page = alloc_pages(alloc_flags, DEPOT_POOL_ORDER);
if (page)
diff --git a/lib/test_fpu.h b/lib/test_fpu.h
new file mode 100644
index 00000000000000..4459807084bcee
--- /dev/null
+++ b/lib/test_fpu.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+
+#ifndef _LIB_TEST_FPU_H
+#define _LIB_TEST_FPU_H
+
+int test_fpu(void);
+
+#endif
diff --git a/lib/test_fpu.c b/lib/test_fpu_glue.c
index e82db19fed84ac..eef282a2715f21 100644
--- a/lib/test_fpu.c
+++ b/lib/test_fpu_glue.c
@@ -17,39 +17,9 @@
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/debugfs.h>
-#include <asm/fpu/api.h>
+#include <linux/fpu.h>
-static int test_fpu(void)
-{
- /*
- * This sequence of operations tests that rounding mode is
- * to nearest and that denormal numbers are supported.
- * Volatile variables are used to avoid compiler optimizing
- * the calculations away.
- */
- volatile double a, b, c, d, e, f, g;
-
- a = 4.0;
- b = 1e-15;
- c = 1e-310;
-
- /* Sets precision flag */
- d = a + b;
-
- /* Result depends on rounding mode */
- e = a + b / 2;
-
- /* Denormal and very large values */
- f = b / c;
-
- /* Depends on denormal support */
- g = a + c * f;
-
- if (d > a && e > a && g > a)
- return 0;
- else
- return -EINVAL;
-}
+#include "test_fpu.h"
static int test_fpu_get(void *data, u64 *val)
{
@@ -68,6 +38,9 @@ static struct dentry *selftest_dir;
static int __init test_fpu_init(void)
{
+ if (!kernel_fpu_available())
+ return -EINVAL;
+
selftest_dir = debugfs_create_dir("selftest_helpers", NULL);
if (!selftest_dir)
return -ENOMEM;
diff --git a/lib/test_fpu_impl.c b/lib/test_fpu_impl.c
new file mode 100644
index 00000000000000..777894dbbe86fa
--- /dev/null
+++ b/lib/test_fpu_impl.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include <linux/errno.h>
+
+#include "test_fpu.h"
+
+int test_fpu(void)
+{
+ /*
+ * This sequence of operations tests that rounding mode is
+ * to nearest and that denormal numbers are supported.
+ * Volatile variables are used to avoid compiler optimizing
+ * the calculations away.
+ */
+ volatile double a, b, c, d, e, f, g;
+
+ a = 4.0;
+ b = 1e-15;
+ c = 1e-310;
+
+ /* Sets precision flag */
+ d = a + b;
+
+ /* Result depends on rounding mode */
+ e = a + b / 2;
+
+ /* Denormal and very large values */
+ f = b / c;
+
+ /* Depends on denormal support */
+ g = a + c * f;
+
+ if (d > a && e > a && g > a)
+ return 0;
+ else
+ return -EINVAL;
+}
diff --git a/lib/test_hexdump.c b/lib/test_hexdump.c
index b916801f23a896..fe2682bb21e6dc 100644
--- a/lib/test_hexdump.c
+++ b/lib/test_hexdump.c
@@ -113,7 +113,7 @@ static void __init test_hexdump_prepare_test(size_t len, int rowsize,
*p++ = ' ';
} while (p < test + rs * 2 + rs / gs + 1);
- strncpy(p, data_a, l);
+ memcpy(p, data_a, l);
p += l;
}
diff --git a/lib/test_hmm.c b/lib/test_hmm.c
index 717dcb83012733..b823ba7cb6a156 100644
--- a/lib/test_hmm.c
+++ b/lib/test_hmm.c
@@ -1226,8 +1226,8 @@ static void dmirror_device_evict_chunk(struct dmirror_chunk *chunk)
unsigned long *src_pfns;
unsigned long *dst_pfns;
- src_pfns = kcalloc(npages, sizeof(*src_pfns), GFP_KERNEL);
- dst_pfns = kcalloc(npages, sizeof(*dst_pfns), GFP_KERNEL);
+ src_pfns = kvcalloc(npages, sizeof(*src_pfns), GFP_KERNEL | __GFP_NOFAIL);
+ dst_pfns = kvcalloc(npages, sizeof(*dst_pfns), GFP_KERNEL | __GFP_NOFAIL);
migrate_device_range(src_pfns, start_pfn, npages);
for (i = 0; i < npages; i++) {
@@ -1250,8 +1250,8 @@ static void dmirror_device_evict_chunk(struct dmirror_chunk *chunk)
}
migrate_device_pages(src_pfns, dst_pfns, npages);
migrate_device_finalize(src_pfns, dst_pfns, npages);
- kfree(src_pfns);
- kfree(dst_pfns);
+ kvfree(src_pfns);
+ kvfree(dst_pfns);
}
/* Removes free pages from the free list so they can't be re-allocated */
diff --git a/lib/test_ubsan.c b/lib/test_ubsan.c
index 276c12140ee26d..c288df9372ede1 100644
--- a/lib/test_ubsan.c
+++ b/lib/test_ubsan.c
@@ -134,7 +134,7 @@ static const test_ubsan_fp test_ubsan_array[] = {
};
/* Excluded because they Oops the module. */
-static const test_ubsan_fp skip_ubsan_array[] = {
+static __used const test_ubsan_fp skip_ubsan_array[] = {
test_ubsan_divrem_overflow,
};
diff --git a/lib/test_xarray.c b/lib/test_xarray.c
index ebe2af2e072db3..0aea6a85099d53 100644
--- a/lib/test_xarray.c
+++ b/lib/test_xarray.c
@@ -744,15 +744,20 @@ static noinline void check_xa_multi_store_adv_add(struct xarray *xa,
do {
xas_lock_irq(&xas);
-
xas_store(&xas, p);
- XA_BUG_ON(xa, xas_error(&xas));
- XA_BUG_ON(xa, xa_load(xa, index) != p);
-
xas_unlock_irq(&xas);
+ /*
+ * In our selftest case the only failure we can expect is for
+ * there not to be enough memory as we're not mimicking the
+ * entire page cache, so verify that's the only error we can run
+ * into here. The xas_nomem() which follows will ensure to fix
+ * that condition for us so to chug on on the loop.
+ */
+ XA_BUG_ON(xa, xas_error(&xas) && xas_error(&xas) != -ENOMEM);
} while (xas_nomem(&xas, GFP_KERNEL));
XA_BUG_ON(xa, xas_error(&xas));
+ XA_BUG_ON(xa, xa_load(xa, index) != p);
}
/* mimics page_cache_delete() */
@@ -1984,6 +1989,97 @@ static noinline void check_get_order(struct xarray *xa)
}
}
+static noinline void check_xas_get_order(struct xarray *xa)
+{
+ XA_STATE(xas, xa, 0);
+
+ unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 20 : 1;
+ unsigned int order;
+ unsigned long i, j;
+
+ for (order = 0; order < max_order; order++) {
+ for (i = 0; i < 10; i++) {
+ xas_set_order(&xas, i << order, order);
+ do {
+ xas_lock(&xas);
+ xas_store(&xas, xa_mk_value(i));
+ xas_unlock(&xas);
+ } while (xas_nomem(&xas, GFP_KERNEL));
+
+ for (j = i << order; j < (i + 1) << order; j++) {
+ xas_set_order(&xas, j, 0);
+ rcu_read_lock();
+ xas_load(&xas);
+ XA_BUG_ON(xa, xas_get_order(&xas) != order);
+ rcu_read_unlock();
+ }
+
+ xas_lock(&xas);
+ xas_set_order(&xas, i << order, order);
+ xas_store(&xas, NULL);
+ xas_unlock(&xas);
+ }
+ }
+}
+
+static noinline void check_xas_conflict_get_order(struct xarray *xa)
+{
+ XA_STATE(xas, xa, 0);
+
+ void *entry;
+ int only_once;
+ unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 20 : 1;
+ unsigned int order;
+ unsigned long i, j, k;
+
+ for (order = 0; order < max_order; order++) {
+ for (i = 0; i < 10; i++) {
+ xas_set_order(&xas, i << order, order);
+ do {
+ xas_lock(&xas);
+ xas_store(&xas, xa_mk_value(i));
+ xas_unlock(&xas);
+ } while (xas_nomem(&xas, GFP_KERNEL));
+
+ /*
+ * Ensure xas_get_order works with xas_for_each_conflict.
+ */
+ j = i << order;
+ for (k = 0; k < order; k++) {
+ only_once = 0;
+ xas_set_order(&xas, j + (1 << k), k);
+ xas_lock(&xas);
+ xas_for_each_conflict(&xas, entry) {
+ XA_BUG_ON(xa, entry != xa_mk_value(i));
+ XA_BUG_ON(xa, xas_get_order(&xas) != order);
+ only_once++;
+ }
+ XA_BUG_ON(xa, only_once != 1);
+ xas_unlock(&xas);
+ }
+
+ if (order < max_order - 1) {
+ only_once = 0;
+ xas_set_order(&xas, (i & ~1UL) << order, order + 1);
+ xas_lock(&xas);
+ xas_for_each_conflict(&xas, entry) {
+ XA_BUG_ON(xa, entry != xa_mk_value(i));
+ XA_BUG_ON(xa, xas_get_order(&xas) != order);
+ only_once++;
+ }
+ XA_BUG_ON(xa, only_once != 1);
+ xas_unlock(&xas);
+ }
+
+ xas_set_order(&xas, i << order, order);
+ xas_lock(&xas);
+ xas_store(&xas, NULL);
+ xas_unlock(&xas);
+ }
+ }
+}
+
+
static noinline void check_destroy(struct xarray *xa)
{
unsigned long index;
@@ -2035,6 +2131,8 @@ static int xarray_checks(void)
check_multi_store(&array);
check_multi_store_advanced(&array);
check_get_order(&array);
+ check_xas_get_order(&array);
+ check_xas_conflict_get_order(&array);
check_xa_alloc();
check_find(&array);
check_find_entry(&array);
diff --git a/lib/ubsan.c b/lib/ubsan.c
index 5fc107f61934c2..a1c983d148f16f 100644
--- a/lib/ubsan.c
+++ b/lib/ubsan.c
@@ -44,9 +44,10 @@ const char *report_ubsan_failure(struct pt_regs *regs, u32 check_type)
case ubsan_shift_out_of_bounds:
return "UBSAN: shift out of bounds";
#endif
-#ifdef CONFIG_UBSAN_DIV_ZERO
+#if defined(CONFIG_UBSAN_DIV_ZERO) || defined(CONFIG_UBSAN_SIGNED_WRAP)
/*
- * SanitizerKind::IntegerDivideByZero emits
+ * SanitizerKind::IntegerDivideByZero and
+ * SanitizerKind::SignedIntegerOverflow emit
* SanitizerHandler::DivremOverflow.
*/
case ubsan_divrem_overflow:
@@ -78,6 +79,19 @@ const char *report_ubsan_failure(struct pt_regs *regs, u32 check_type)
case ubsan_type_mismatch:
return "UBSAN: type mismatch";
#endif
+#ifdef CONFIG_UBSAN_SIGNED_WRAP
+ /*
+ * SanitizerKind::SignedIntegerOverflow emits
+ * SanitizerHandler::AddOverflow, SanitizerHandler::SubOverflow,
+ * or SanitizerHandler::MulOverflow.
+ */
+ case ubsan_add_overflow:
+ return "UBSAN: integer addition overflow";
+ case ubsan_sub_overflow:
+ return "UBSAN: integer subtraction overflow";
+ case ubsan_mul_overflow:
+ return "UBSAN: integer multiplication overflow";
+#endif
default:
return "UBSAN: unrecognized failure code";
}
diff --git a/lib/xarray.c b/lib/xarray.c
index 39f07bfc4dccac..1c87d871cacfa0 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -200,7 +200,8 @@ static void *xas_start(struct xa_state *xas)
return entry;
}
-static void *xas_descend(struct xa_state *xas, struct xa_node *node)
+static __always_inline void *xas_descend(struct xa_state *xas,
+ struct xa_node *node)
{
unsigned int offset = get_offset(xas->xa_index, node);
void *entry = xa_entry(xas->xa, node, offset);
@@ -1750,39 +1751,52 @@ unlock:
EXPORT_SYMBOL(xa_store_range);
/**
- * xa_get_order() - Get the order of an entry.
- * @xa: XArray.
- * @index: Index of the entry.
+ * xas_get_order() - Get the order of an entry.
+ * @xas: XArray operation state.
+ *
+ * Called after xas_load, the xas should not be in an error state.
*
* Return: A number between 0 and 63 indicating the order of the entry.
*/
-int xa_get_order(struct xarray *xa, unsigned long index)
+int xas_get_order(struct xa_state *xas)
{
- XA_STATE(xas, xa, index);
- void *entry;
int order = 0;
- rcu_read_lock();
- entry = xas_load(&xas);
-
- if (!entry)
- goto unlock;
-
- if (!xas.xa_node)
- goto unlock;
+ if (!xas->xa_node)
+ return 0;
for (;;) {
- unsigned int slot = xas.xa_offset + (1 << order);
+ unsigned int slot = xas->xa_offset + (1 << order);
if (slot >= XA_CHUNK_SIZE)
break;
- if (!xa_is_sibling(xas.xa_node->slots[slot]))
+ if (!xa_is_sibling(xa_entry(xas->xa, xas->xa_node, slot)))
break;
order++;
}
- order += xas.xa_node->shift;
-unlock:
+ order += xas->xa_node->shift;
+ return order;
+}
+EXPORT_SYMBOL_GPL(xas_get_order);
+
+/**
+ * xa_get_order() - Get the order of an entry.
+ * @xa: XArray.
+ * @index: Index of the entry.
+ *
+ * Return: A number between 0 and 63 indicating the order of the entry.
+ */
+int xa_get_order(struct xarray *xa, unsigned long index)
+{
+ XA_STATE(xas, xa, index);
+ int order = 0;
+ void *entry;
+
+ rcu_read_lock();
+ entry = xas_load(&xas);
+ if (entry)
+ order = xas_get_order(&xas);
rcu_read_unlock();
return order;
diff --git a/mm/Kconfig b/mm/Kconfig
index b1448aa81e15f4..50df323eaecef0 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -473,7 +473,7 @@ config ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP
config HAVE_MEMBLOCK_PHYS_MAP
bool
-config HAVE_FAST_GUP
+config HAVE_GUP_FAST
depends on MMU
bool
@@ -850,6 +850,12 @@ config READ_ONLY_THP_FOR_FS
endif # TRANSPARENT_HUGEPAGE
#
+# The architecture supports pgtable leaves that is larger than PAGE_SIZE
+#
+config PGTABLE_HAS_HUGE_LEAVES
+ def_bool TRANSPARENT_HUGEPAGE || HUGETLB_PAGE
+
+#
# UP and nommu archs use km based percpu allocator
#
config NEED_PER_CPU_KM
diff --git a/mm/Makefile b/mm/Makefile
index e4b5b75aaec9c1..25da205becdd24 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -5,6 +5,7 @@
KASAN_SANITIZE_slab_common.o := n
KASAN_SANITIZE_slub.o := n
+KASAN_SANITIZE_kmemleak.o := n
KCSAN_SANITIZE_kmemleak.o := n
# These produce frequent data race reports: most of them are due to races on
@@ -29,8 +30,7 @@ KCOV_INSTRUMENT_mmzone.o := n
KCOV_INSTRUMENT_vmstat.o := n
KCOV_INSTRUMENT_failslab.o := n
-CFLAGS_init-mm.o += $(call cc-disable-warning, override-init)
-CFLAGS_init-mm.o += $(call cc-disable-warning, initializer-overrides)
+CFLAGS_init-mm.o += -Wno-override-init
mmu-y := nommu.o
mmu-$(CONFIG_MMU) := highmem.o memory.o mincore.o \
@@ -43,6 +43,10 @@ ifdef CONFIG_CROSS_MEMORY_ATTACH
mmu-$(CONFIG_MMU) += process_vm_access.o
endif
+ifdef CONFIG_64BIT
+mmu-$(CONFIG_MMU) += mseal.o
+endif
+
obj-y := filemap.o mempool.o oom_kill.o fadvise.o \
maccess.o page-writeback.o folio-compat.o \
readahead.o swap.o truncate.o vmscan.o shrinker.o \
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 5f2be8c8df11f1..e61bbb1bd62218 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -39,6 +39,19 @@ struct workqueue_struct *bdi_wq;
#include <linux/debugfs.h>
#include <linux/seq_file.h>
+struct wb_stats {
+ unsigned long nr_dirty;
+ unsigned long nr_io;
+ unsigned long nr_more_io;
+ unsigned long nr_dirty_time;
+ unsigned long nr_writeback;
+ unsigned long nr_reclaimable;
+ unsigned long nr_dirtied;
+ unsigned long nr_written;
+ unsigned long dirty_thresh;
+ unsigned long wb_thresh;
+};
+
static struct dentry *bdi_debug_root;
static void bdi_debug_init(void)
@@ -46,31 +59,68 @@ static void bdi_debug_init(void)
bdi_debug_root = debugfs_create_dir("bdi", NULL);
}
-static int bdi_debug_stats_show(struct seq_file *m, void *v)
+static void collect_wb_stats(struct wb_stats *stats,
+ struct bdi_writeback *wb)
{
- struct backing_dev_info *bdi = m->private;
- struct bdi_writeback *wb = &bdi->wb;
- unsigned long background_thresh;
- unsigned long dirty_thresh;
- unsigned long wb_thresh;
- unsigned long nr_dirty, nr_io, nr_more_io, nr_dirty_time;
struct inode *inode;
- nr_dirty = nr_io = nr_more_io = nr_dirty_time = 0;
spin_lock(&wb->list_lock);
list_for_each_entry(inode, &wb->b_dirty, i_io_list)
- nr_dirty++;
+ stats->nr_dirty++;
list_for_each_entry(inode, &wb->b_io, i_io_list)
- nr_io++;
+ stats->nr_io++;
list_for_each_entry(inode, &wb->b_more_io, i_io_list)
- nr_more_io++;
+ stats->nr_more_io++;
list_for_each_entry(inode, &wb->b_dirty_time, i_io_list)
if (inode->i_state & I_DIRTY_TIME)
- nr_dirty_time++;
+ stats->nr_dirty_time++;
spin_unlock(&wb->list_lock);
+ stats->nr_writeback += wb_stat(wb, WB_WRITEBACK);
+ stats->nr_reclaimable += wb_stat(wb, WB_RECLAIMABLE);
+ stats->nr_dirtied += wb_stat(wb, WB_DIRTIED);
+ stats->nr_written += wb_stat(wb, WB_WRITTEN);
+ stats->wb_thresh += wb_calc_thresh(wb, stats->dirty_thresh);
+}
+
+#ifdef CONFIG_CGROUP_WRITEBACK
+static void bdi_collect_stats(struct backing_dev_info *bdi,
+ struct wb_stats *stats)
+{
+ struct bdi_writeback *wb;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(wb, &bdi->wb_list, bdi_node) {
+ if (!wb_tryget(wb))
+ continue;
+
+ collect_wb_stats(stats, wb);
+ wb_put(wb);
+ }
+ rcu_read_unlock();
+}
+#else
+static void bdi_collect_stats(struct backing_dev_info *bdi,
+ struct wb_stats *stats)
+{
+ collect_wb_stats(stats, &bdi->wb);
+}
+#endif
+
+static int bdi_debug_stats_show(struct seq_file *m, void *v)
+{
+ struct backing_dev_info *bdi = m->private;
+ unsigned long background_thresh;
+ unsigned long dirty_thresh;
+ struct wb_stats stats;
+ unsigned long tot_bw;
+
global_dirty_limits(&background_thresh, &dirty_thresh);
- wb_thresh = wb_calc_thresh(wb, dirty_thresh);
+
+ memset(&stats, 0, sizeof(stats));
+ stats.dirty_thresh = dirty_thresh;
+ bdi_collect_stats(bdi, &stats);
+ tot_bw = atomic_long_read(&bdi->tot_write_bandwidth);
seq_printf(m,
"BdiWriteback: %10lu kB\n"
@@ -87,37 +137,114 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
"b_dirty_time: %10lu\n"
"bdi_list: %10u\n"
"state: %10lx\n",
- (unsigned long) K(wb_stat(wb, WB_WRITEBACK)),
- (unsigned long) K(wb_stat(wb, WB_RECLAIMABLE)),
- K(wb_thresh),
+ K(stats.nr_writeback),
+ K(stats.nr_reclaimable),
+ K(stats.wb_thresh),
K(dirty_thresh),
K(background_thresh),
- (unsigned long) K(wb_stat(wb, WB_DIRTIED)),
- (unsigned long) K(wb_stat(wb, WB_WRITTEN)),
- (unsigned long) K(wb->write_bandwidth),
- nr_dirty,
- nr_io,
- nr_more_io,
- nr_dirty_time,
+ K(stats.nr_dirtied),
+ K(stats.nr_written),
+ K(tot_bw),
+ stats.nr_dirty,
+ stats.nr_io,
+ stats.nr_more_io,
+ stats.nr_dirty_time,
!list_empty(&bdi->bdi_list), bdi->wb.state);
return 0;
}
DEFINE_SHOW_ATTRIBUTE(bdi_debug_stats);
+static void wb_stats_show(struct seq_file *m, struct bdi_writeback *wb,
+ struct wb_stats *stats)
+{
+
+ seq_printf(m,
+ "WbCgIno: %10lu\n"
+ "WbWriteback: %10lu kB\n"
+ "WbReclaimable: %10lu kB\n"
+ "WbDirtyThresh: %10lu kB\n"
+ "WbDirtied: %10lu kB\n"
+ "WbWritten: %10lu kB\n"
+ "WbWriteBandwidth: %10lu kBps\n"
+ "b_dirty: %10lu\n"
+ "b_io: %10lu\n"
+ "b_more_io: %10lu\n"
+ "b_dirty_time: %10lu\n"
+ "state: %10lx\n\n",
+#ifdef CONFIG_CGROUP_WRITEBACK
+ cgroup_ino(wb->memcg_css->cgroup),
+#else
+ 1ul,
+#endif
+ K(stats->nr_writeback),
+ K(stats->nr_reclaimable),
+ K(stats->wb_thresh),
+ K(stats->nr_dirtied),
+ K(stats->nr_written),
+ K(wb->avg_write_bandwidth),
+ stats->nr_dirty,
+ stats->nr_io,
+ stats->nr_more_io,
+ stats->nr_dirty_time,
+ wb->state);
+}
+
+static int cgwb_debug_stats_show(struct seq_file *m, void *v)
+{
+ struct backing_dev_info *bdi = m->private;
+ unsigned long background_thresh;
+ unsigned long dirty_thresh;
+ struct bdi_writeback *wb;
+
+ global_dirty_limits(&background_thresh, &dirty_thresh);
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(wb, &bdi->wb_list, bdi_node) {
+ struct wb_stats stats = { .dirty_thresh = dirty_thresh };
+
+ if (!wb_tryget(wb))
+ continue;
+
+ collect_wb_stats(&stats, wb);
+
+ /*
+ * Calculate thresh of wb in writeback cgroup which is min of
+ * thresh in global domain and thresh in cgroup domain. Drop
+ * rcu lock because cgwb_calc_thresh may sleep in
+ * cgroup_rstat_flush. We can do so here because we have a ref.
+ */
+ if (mem_cgroup_wb_domain(wb)) {
+ rcu_read_unlock();
+ stats.wb_thresh = min(stats.wb_thresh, cgwb_calc_thresh(wb));
+ rcu_read_lock();
+ }
+
+ wb_stats_show(m, wb, &stats);
+
+ wb_put(wb);
+ }
+ rcu_read_unlock();
+
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(cgwb_debug_stats);
+
static void bdi_debug_register(struct backing_dev_info *bdi, const char *name)
{
bdi->debug_dir = debugfs_create_dir(name, bdi_debug_root);
debugfs_create_file("stats", 0444, bdi->debug_dir, bdi,
&bdi_debug_stats_fops);
+ debugfs_create_file("wb_stats", 0444, bdi->debug_dir, bdi,
+ &cgwb_debug_stats_fops);
}
static void bdi_debug_unregister(struct backing_dev_info *bdi)
{
debugfs_remove_recursive(bdi->debug_dir);
}
-#else
+#else /* CONFIG_DEBUG_FS */
static inline void bdi_debug_init(void)
{
}
@@ -128,7 +255,7 @@ static inline void bdi_debug_register(struct backing_dev_info *bdi,
static inline void bdi_debug_unregister(struct backing_dev_info *bdi)
{
}
-#endif
+#endif /* CONFIG_DEBUG_FS */
static ssize_t read_ahead_kb_store(struct device *dev,
struct device_attribute *attr,
@@ -388,7 +515,7 @@ static void wb_update_bandwidth_workfn(struct work_struct *work)
static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi,
gfp_t gfp)
{
- int i, err;
+ int err;
memset(wb, 0, sizeof(*wb));
@@ -416,18 +543,10 @@ static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi,
if (err)
return err;
- for (i = 0; i < NR_WB_STAT_ITEMS; i++) {
- err = percpu_counter_init(&wb->stat[i], 0, gfp);
- if (err)
- goto out_destroy_stat;
- }
-
- return 0;
+ err = percpu_counter_init_many(wb->stat, 0, gfp, NR_WB_STAT_ITEMS);
+ if (err)
+ fprop_local_destroy_percpu(&wb->completions);
-out_destroy_stat:
- while (i--)
- percpu_counter_destroy(&wb->stat[i]);
- fprop_local_destroy_percpu(&wb->completions);
return err;
}
@@ -460,13 +579,8 @@ static void wb_shutdown(struct bdi_writeback *wb)
static void wb_exit(struct bdi_writeback *wb)
{
- int i;
-
WARN_ON(delayed_work_pending(&wb->dwork));
-
- for (i = 0; i < NR_WB_STAT_ITEMS; i++)
- percpu_counter_destroy(&wb->stat[i]);
-
+ percpu_counter_destroy_many(wb->stat, NR_WB_STAT_ITEMS);
fprop_local_destroy_percpu(&wb->completions);
}
diff --git a/mm/cma.c b/mm/cma.c
index 01f5a8f71ddfa7..3e9724716bad87 100644
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -182,10 +182,6 @@ int __init cma_init_reserved_mem(phys_addr_t base, phys_addr_t size,
if (!size || !memblock_is_region_reserved(base, size))
return -EINVAL;
- /* alignment should be aligned with order_per_bit */
- if (!IS_ALIGNED(CMA_MIN_ALIGNMENT_PAGES, 1 << order_per_bit))
- return -EINVAL;
-
/* ensure minimal alignment required by mm core */
if (!IS_ALIGNED(base | size, CMA_MIN_ALIGNMENT_BYTES))
return -EINVAL;
diff --git a/mm/compaction.c b/mm/compaction.c
index 807b58e6eb68b3..e731d45befc780 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -1851,7 +1851,7 @@ static void isolate_freepages(struct compact_control *cc)
* This is a migrate-callback that "allocates" freepages by taking pages
* from the isolated freelists in the block we are migrating to.
*/
-static struct folio *compaction_alloc(struct folio *src, unsigned long data)
+static struct folio *compaction_alloc_noprof(struct folio *src, unsigned long data)
{
struct compact_control *cc = (struct compact_control *)data;
struct folio *dst;
@@ -1898,6 +1898,11 @@ again:
return page_rmappable_folio(&dst->page);
}
+static struct folio *compaction_alloc(struct folio *src, unsigned long data)
+{
+ return alloc_hooks(compaction_alloc_noprof(src, data));
+}
+
/*
* This is a migrate-callback that "frees" freepages back to the isolated
* freelist. All pages on the freelist are from the same zone, so there is no
@@ -3345,7 +3350,6 @@ static struct ctl_table vm_compaction[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
- { }
};
static int __init kcompactd_init(void)
diff --git a/mm/damon/paddr.c b/mm/damon/paddr.c
index 5e6dc312072cd0..5685ba485097d2 100644
--- a/mm/damon/paddr.c
+++ b/mm/damon/paddr.c
@@ -16,8 +16,8 @@
#include "../internal.h"
#include "ops-common.h"
-static bool __damon_pa_mkold(struct folio *folio, struct vm_area_struct *vma,
- unsigned long addr, void *arg)
+static bool damon_folio_mkold_one(struct folio *folio,
+ struct vm_area_struct *vma, unsigned long addr, void *arg)
{
DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, addr, 0);
@@ -31,33 +31,38 @@ static bool __damon_pa_mkold(struct folio *folio, struct vm_area_struct *vma,
return true;
}
-static void damon_pa_mkold(unsigned long paddr)
+static void damon_folio_mkold(struct folio *folio)
{
- struct folio *folio = damon_get_folio(PHYS_PFN(paddr));
struct rmap_walk_control rwc = {
- .rmap_one = __damon_pa_mkold,
+ .rmap_one = damon_folio_mkold_one,
.anon_lock = folio_lock_anon_vma_read,
};
bool need_lock;
- if (!folio)
- return;
-
if (!folio_mapped(folio) || !folio_raw_mapping(folio)) {
folio_set_idle(folio);
- goto out;
+ return;
}
need_lock = !folio_test_anon(folio) || folio_test_ksm(folio);
if (need_lock && !folio_trylock(folio))
- goto out;
+ return;
rmap_walk(folio, &rwc);
if (need_lock)
folio_unlock(folio);
-out:
+}
+
+static void damon_pa_mkold(unsigned long paddr)
+{
+ struct folio *folio = damon_get_folio(PHYS_PFN(paddr));
+
+ if (!folio)
+ return;
+
+ damon_folio_mkold(folio);
folio_put(folio);
}
@@ -79,8 +84,8 @@ static void damon_pa_prepare_access_checks(struct damon_ctx *ctx)
}
}
-static bool __damon_pa_young(struct folio *folio, struct vm_area_struct *vma,
- unsigned long addr, void *arg)
+static bool damon_folio_young_one(struct folio *folio,
+ struct vm_area_struct *vma, unsigned long addr, void *arg)
{
bool *accessed = arg;
DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, addr, 0);
@@ -111,38 +116,44 @@ static bool __damon_pa_young(struct folio *folio, struct vm_area_struct *vma,
return *accessed == false;
}
-static bool damon_pa_young(unsigned long paddr, unsigned long *folio_sz)
+static bool damon_folio_young(struct folio *folio)
{
- struct folio *folio = damon_get_folio(PHYS_PFN(paddr));
bool accessed = false;
struct rmap_walk_control rwc = {
.arg = &accessed,
- .rmap_one = __damon_pa_young,
+ .rmap_one = damon_folio_young_one,
.anon_lock = folio_lock_anon_vma_read,
};
bool need_lock;
- if (!folio)
- return false;
-
if (!folio_mapped(folio) || !folio_raw_mapping(folio)) {
if (folio_test_idle(folio))
- accessed = false;
+ return false;
else
- accessed = true;
- goto out;
+ return true;
}
need_lock = !folio_test_anon(folio) || folio_test_ksm(folio);
if (need_lock && !folio_trylock(folio))
- goto out;
+ return false;
rmap_walk(folio, &rwc);
if (need_lock)
folio_unlock(folio);
-out:
+ return accessed;
+}
+
+static bool damon_pa_young(unsigned long paddr, unsigned long *folio_sz)
+{
+ struct folio *folio = damon_get_folio(PHYS_PFN(paddr));
+ bool accessed;
+
+ if (!folio)
+ return false;
+
+ accessed = damon_folio_young(folio);
*folio_sz = folio_size(folio);
folio_put(folio);
return accessed;
@@ -203,6 +214,11 @@ static bool __damos_pa_filter_out(struct damos_filter *filter,
matched = filter->memcg_id == mem_cgroup_id(memcg);
rcu_read_unlock();
break;
+ case DAMOS_FILTER_TYPE_YOUNG:
+ matched = damon_folio_young(folio);
+ if (matched)
+ damon_folio_mkold(folio);
+ break;
default:
break;
}
diff --git a/mm/damon/sysfs-schemes.c b/mm/damon/sysfs-schemes.c
index 53a90ac678fb98..bea5bc52846a69 100644
--- a/mm/damon/sysfs-schemes.c
+++ b/mm/damon/sysfs-schemes.c
@@ -343,6 +343,7 @@ static struct damon_sysfs_scheme_filter *damon_sysfs_scheme_filter_alloc(void)
static const char * const damon_sysfs_scheme_filter_type_strs[] = {
"anon",
"memcg",
+ "young",
"addr",
"target",
};
diff --git a/mm/debug.c b/mm/debug.c
index c1c1a6a484e4c0..69e524c3e6012e 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -55,25 +55,17 @@ static void __dump_folio(struct folio *folio, struct page *page,
unsigned long pfn, unsigned long idx)
{
struct address_space *mapping = folio_mapping(folio);
- int mapcount = 0;
+ int mapcount = atomic_read(&page->_mapcount);
char *type = "";
- /*
- * page->_mapcount space in struct page is used by slab pages to
- * encode own info, and we must avoid calling page_folio() again.
- */
- if (!folio_test_slab(folio)) {
- mapcount = atomic_read(&page->_mapcount) + 1;
- if (folio_test_large(folio))
- mapcount += folio_entire_mapcount(folio);
- }
-
+ mapcount = page_type_has_type(mapcount) ? 0 : mapcount + 1;
pr_warn("page: refcount:%d mapcount:%d mapping:%p index:%#lx pfn:%#lx\n",
folio_ref_count(folio), mapcount, mapping,
folio->index + idx, pfn);
if (folio_test_large(folio)) {
- pr_warn("head: order:%u entire_mapcount:%d nr_pages_mapped:%d pincount:%d\n",
+ pr_warn("head: order:%u mapcount:%d entire_mapcount:%d nr_pages_mapped:%d pincount:%d\n",
folio_order(folio),
+ folio_mapcount(folio),
folio_entire_mapcount(folio),
folio_nr_pages_mapped(folio),
atomic_read(&folio->_pincount));
@@ -99,7 +91,8 @@ static void __dump_folio(struct folio *folio, struct page *page,
*/
pr_warn("%sflags: %pGp%s\n", type, &folio->flags,
is_migrate_cma_folio(folio, pfn) ? " CMA" : "");
- pr_warn("page_type: %pGt\n", &folio->page.page_type);
+ if (page_has_type(&folio->page))
+ pr_warn("page_type: %pGt\n", &folio->page.page_type);
print_hex_dump(KERN_WARNING, "raw: ", DUMP_PREFIX_NONE, 32,
sizeof(unsigned long), page,
@@ -180,9 +173,6 @@ EXPORT_SYMBOL(dump_vma);
void dump_mm(const struct mm_struct *mm)
{
pr_emerg("mm %px task_size %lu\n"
-#ifdef CONFIG_MMU
- "get_unmapped_area %px\n"
-#endif
"mmap_base %lu mmap_legacy_base %lu\n"
"pgd %px mm_users %d mm_count %d pgtables_bytes %lu map_count %d\n"
"hiwater_rss %lx hiwater_vm %lx total_vm %lx locked_vm %lx\n"
@@ -208,9 +198,6 @@ void dump_mm(const struct mm_struct *mm)
"def_flags: %#lx(%pGv)\n",
mm, mm->task_size,
-#ifdef CONFIG_MMU
- mm->get_unmapped_area,
-#endif
mm->mmap_base, mm->mmap_legacy_base,
mm->pgd, atomic_read(&mm->mm_users),
atomic_read(&mm->mm_count),
diff --git a/mm/debug_page_alloc.c b/mm/debug_page_alloc.c
index 6755f0c9d4a398..d46acf989dde88 100644
--- a/mm/debug_page_alloc.c
+++ b/mm/debug_page_alloc.c
@@ -32,8 +32,7 @@ static int __init debug_guardpage_minorder_setup(char *buf)
}
early_param("debug_guardpage_minorder", debug_guardpage_minorder_setup);
-bool __set_page_guard(struct zone *zone, struct page *page, unsigned int order,
- int migratetype)
+bool __set_page_guard(struct zone *zone, struct page *page, unsigned int order)
{
if (order >= debug_guardpage_minorder())
return false;
@@ -41,19 +40,12 @@ bool __set_page_guard(struct zone *zone, struct page *page, unsigned int order,
__SetPageGuard(page);
INIT_LIST_HEAD(&page->buddy_list);
set_page_private(page, order);
- /* Guard pages are not available for any usage */
- if (!is_migrate_isolate(migratetype))
- __mod_zone_freepage_state(zone, -(1 << order), migratetype);
return true;
}
-void __clear_page_guard(struct zone *zone, struct page *page, unsigned int order,
- int migratetype)
+void __clear_page_guard(struct zone *zone, struct page *page, unsigned int order)
{
__ClearPageGuard(page);
-
set_page_private(page, 0);
- if (!is_migrate_isolate(migratetype))
- __mod_zone_freepage_state(zone, (1 << order), migratetype);
}
diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
index 65c19025da3dfe..f1c9a2c5abc04c 100644
--- a/mm/debug_vm_pgtable.c
+++ b/mm/debug_vm_pgtable.c
@@ -30,6 +30,7 @@
#include <linux/start_kernel.h>
#include <linux/sched/mm.h>
#include <linux/io.h>
+#include <linux/vmalloc.h>
#include <asm/cacheflush.h>
#include <asm/pgalloc.h>
diff --git a/mm/filemap.c b/mm/filemap.c
index 7437b2bd75c1ab..ec273b00ce5fd6 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -168,7 +168,7 @@ static void filemap_unaccount_folio(struct address_space *mapping,
add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
if (mapping_exiting(mapping) && !folio_test_large(folio)) {
- int mapcount = page_mapcount(&folio->page);
+ int mapcount = folio_mapcount(folio);
if (folio_ref_count(folio) >= mapcount + 2) {
/*
@@ -852,23 +852,18 @@ noinline int __filemap_add_folio(struct address_space *mapping,
struct folio *folio, pgoff_t index, gfp_t gfp, void **shadowp)
{
XA_STATE(xas, &mapping->i_pages, index);
- bool huge = folio_test_hugetlb(folio);
- bool charged = false;
- long nr = 1;
+ void *alloced_shadow = NULL;
+ int alloced_order = 0;
+ bool huge;
+ long nr;
VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
VM_BUG_ON_FOLIO(folio_test_swapbacked(folio), folio);
mapping_set_update(&xas, mapping);
- if (!huge) {
- int error = mem_cgroup_charge(folio, NULL, gfp);
- if (error)
- return error;
- charged = true;
- }
-
VM_BUG_ON_FOLIO(index & (folio_nr_pages(folio) - 1), folio);
xas_set_order(&xas, index, folio_order(folio));
+ huge = folio_test_hugetlb(folio);
nr = folio_nr_pages(folio);
gfp &= GFP_RECLAIM_MASK;
@@ -876,13 +871,10 @@ noinline int __filemap_add_folio(struct address_space *mapping,
folio->mapping = mapping;
folio->index = xas.xa_index;
- do {
- unsigned int order = xa_get_order(xas.xa, xas.xa_index);
+ for (;;) {
+ int order = -1, split_order = 0;
void *entry, *old = NULL;
- if (order > folio_order(folio))
- xas_split_alloc(&xas, xa_load(xas.xa, xas.xa_index),
- order, gfp);
xas_lock_irq(&xas);
xas_for_each_conflict(&xas, entry) {
old = entry;
@@ -890,19 +882,33 @@ noinline int __filemap_add_folio(struct address_space *mapping,
xas_set_err(&xas, -EEXIST);
goto unlock;
}
+ /*
+ * If a larger entry exists,
+ * it will be the first and only entry iterated.
+ */
+ if (order == -1)
+ order = xas_get_order(&xas);
+ }
+
+ /* entry may have changed before we re-acquire the lock */
+ if (alloced_order && (old != alloced_shadow || order != alloced_order)) {
+ xas_destroy(&xas);
+ alloced_order = 0;
}
if (old) {
- if (shadowp)
- *shadowp = old;
- /* entry may have been split before we acquired lock */
- order = xa_get_order(xas.xa, xas.xa_index);
- if (order > folio_order(folio)) {
+ if (order > 0 && order > folio_order(folio)) {
/* How to handle large swap entries? */
BUG_ON(shmem_mapping(mapping));
+ if (!alloced_order) {
+ split_order = order;
+ goto unlock;
+ }
xas_split(&xas, old, order);
xas_reset(&xas);
}
+ if (shadowp)
+ *shadowp = old;
}
xas_store(&xas, folio);
@@ -918,9 +924,24 @@ noinline int __filemap_add_folio(struct address_space *mapping,
__lruvec_stat_mod_folio(folio,
NR_FILE_THPS, nr);
}
+
unlock:
xas_unlock_irq(&xas);
- } while (xas_nomem(&xas, gfp));
+
+ /* split needed, alloc here and retry. */
+ if (split_order) {
+ xas_split_alloc(&xas, old, split_order, gfp);
+ if (xas_error(&xas))
+ goto error;
+ alloced_shadow = old;
+ alloced_order = split_order;
+ xas_reset(&xas);
+ continue;
+ }
+
+ if (!xas_nomem(&xas, gfp))
+ break;
+ }
if (xas_error(&xas))
goto error;
@@ -928,8 +949,6 @@ unlock:
trace_mm_filemap_add_to_page_cache(folio);
return 0;
error:
- if (charged)
- mem_cgroup_uncharge(folio);
folio->mapping = NULL;
/* Leave page->index set: truncation relies upon it */
folio_put_refs(folio, nr);
@@ -943,11 +962,16 @@ int filemap_add_folio(struct address_space *mapping, struct folio *folio,
void *shadow = NULL;
int ret;
+ ret = mem_cgroup_charge(folio, NULL, gfp);
+ if (ret)
+ return ret;
+
__folio_set_locked(folio);
ret = __filemap_add_folio(mapping, folio, index, gfp, &shadow);
- if (unlikely(ret))
+ if (unlikely(ret)) {
+ mem_cgroup_uncharge(folio);
__folio_clear_locked(folio);
- else {
+ } else {
/*
* The folio might have been evicted from cache only
* recently, in which case it should be activated like
@@ -966,7 +990,7 @@ int filemap_add_folio(struct address_space *mapping, struct folio *folio,
EXPORT_SYMBOL_GPL(filemap_add_folio);
#ifdef CONFIG_NUMA
-struct folio *filemap_alloc_folio(gfp_t gfp, unsigned int order)
+struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order)
{
int n;
struct folio *folio;
@@ -981,9 +1005,9 @@ struct folio *filemap_alloc_folio(gfp_t gfp, unsigned int order)
return folio;
}
- return folio_alloc(gfp, order);
+ return folio_alloc_noprof(gfp, order);
}
-EXPORT_SYMBOL(filemap_alloc_folio);
+EXPORT_SYMBOL(filemap_alloc_folio_noprof);
#endif
/*
@@ -1786,7 +1810,7 @@ EXPORT_SYMBOL(page_cache_prev_miss);
* C. Return the page to the page allocator
*
* This means that any page may have its reference count temporarily
- * increased by a speculative page cache (or fast GUP) lookup as it can
+ * increased by a speculative page cache (or GUP-fast) lookup as it can
* be allocated by another user before the RCU grace period expires.
* Because the refcount temporarily acquired here may end up being the
* last refcount on the page, any page allocation must be freeable by
@@ -3207,7 +3231,8 @@ static vm_fault_t filemap_fault_recheck_pte_none(struct vm_fault *vmf)
if (!(vmf->flags & FAULT_FLAG_ORIG_PTE_VALID))
return 0;
- ptep = pte_offset_map(vmf->pmd, vmf->address);
+ ptep = pte_offset_map_nolock(vma->vm_mm, vmf->pmd, vmf->address,
+ &vmf->ptl);
if (unlikely(!ptep))
return VM_FAULT_NOPAGE;
@@ -3481,7 +3506,7 @@ skip:
static vm_fault_t filemap_map_folio_range(struct vm_fault *vmf,
struct folio *folio, unsigned long start,
unsigned long addr, unsigned int nr_pages,
- unsigned int *mmap_miss)
+ unsigned long *rss, unsigned int *mmap_miss)
{
vm_fault_t ret = 0;
struct page *page = folio_page(folio, start);
@@ -3492,7 +3517,15 @@ static vm_fault_t filemap_map_folio_range(struct vm_fault *vmf,
if (PageHWPoison(page + count))
goto skip;
- (*mmap_miss)++;
+ /*
+ * If there are too many folios that are recently evicted
+ * in a file, they will probably continue to be evicted.
+ * In such situation, read-ahead is only a waste of IO.
+ * Don't decrease mmap_miss in this scenario to make sure
+ * we can stop read-ahead.
+ */
+ if (!folio_test_workingset(folio))
+ (*mmap_miss)++;
/*
* NOTE: If there're PTE markers, we'll leave them to be
@@ -3507,6 +3540,7 @@ static vm_fault_t filemap_map_folio_range(struct vm_fault *vmf,
skip:
if (count) {
set_pte_range(vmf, folio, page, count, addr);
+ *rss += count;
folio_ref_add(folio, count);
if (in_range(vmf->address, addr, count * PAGE_SIZE))
ret = VM_FAULT_NOPAGE;
@@ -3521,6 +3555,7 @@ skip:
if (count) {
set_pte_range(vmf, folio, page, count, addr);
+ *rss += count;
folio_ref_add(folio, count);
if (in_range(vmf->address, addr, count * PAGE_SIZE))
ret = VM_FAULT_NOPAGE;
@@ -3533,7 +3568,7 @@ skip:
static vm_fault_t filemap_map_order0_folio(struct vm_fault *vmf,
struct folio *folio, unsigned long addr,
- unsigned int *mmap_miss)
+ unsigned long *rss, unsigned int *mmap_miss)
{
vm_fault_t ret = 0;
struct page *page = &folio->page;
@@ -3541,7 +3576,9 @@ static vm_fault_t filemap_map_order0_folio(struct vm_fault *vmf,
if (PageHWPoison(page))
return ret;
- (*mmap_miss)++;
+ /* See comment of filemap_map_folio_range() */
+ if (!folio_test_workingset(folio))
+ (*mmap_miss)++;
/*
* NOTE: If there're PTE markers, we'll leave them to be
@@ -3555,6 +3592,7 @@ static vm_fault_t filemap_map_order0_folio(struct vm_fault *vmf,
ret = VM_FAULT_NOPAGE;
set_pte_range(vmf, folio, page, 1, addr);
+ (*rss)++;
folio_ref_inc(folio);
return ret;
@@ -3571,7 +3609,8 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf,
XA_STATE(xas, &mapping->i_pages, start_pgoff);
struct folio *folio;
vm_fault_t ret = 0;
- unsigned int nr_pages = 0, mmap_miss = 0, mmap_miss_saved;
+ unsigned long rss = 0;
+ unsigned int nr_pages = 0, mmap_miss = 0, mmap_miss_saved, folio_type;
rcu_read_lock();
folio = next_uptodate_folio(&xas, mapping, end_pgoff);
@@ -3590,6 +3629,8 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf,
folio_put(folio);
goto out;
}
+
+ folio_type = mm_counter_file(folio);
do {
unsigned long end;
@@ -3601,15 +3642,16 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf,
if (!folio_test_large(folio))
ret |= filemap_map_order0_folio(vmf,
- folio, addr, &mmap_miss);
+ folio, addr, &rss, &mmap_miss);
else
ret |= filemap_map_folio_range(vmf, folio,
xas.xa_index - folio->index, addr,
- nr_pages, &mmap_miss);
+ nr_pages, &rss, &mmap_miss);
folio_unlock(folio);
folio_put(folio);
} while ((folio = next_uptodate_folio(&xas, mapping, end_pgoff)) != NULL);
+ add_mm_counter(vma->vm_mm, folio_type, rss);
pte_unmap_unlock(vmf->pte, vmf->ptl);
out:
rcu_read_unlock();
@@ -4197,7 +4239,23 @@ static void filemap_cachestat(struct address_space *mapping,
/* shmem file - in swap cache */
swp_entry_t swp = radix_to_swp_entry(folio);
+ /* swapin error results in poisoned entry */
+ if (non_swap_entry(swp))
+ goto resched;
+
+ /*
+ * Getting a swap entry from the shmem
+ * inode means we beat
+ * shmem_unuse(). rcu_read_lock()
+ * ensures swapoff waits for us before
+ * freeing the swapper space. However,
+ * we can race with swapping and
+ * invalidation, so there might not be
+ * a shadow in the swapcache (yet).
+ */
shadow = get_shadow_from_swap_cache(swp);
+ if (!shadow)
+ goto resched;
}
#endif
if (workingset_test_recent(shadow, true, &workingset))
diff --git a/mm/folio-compat.c b/mm/folio-compat.c
index 50412014f16f72..f05906006b3c33 100644
--- a/mm/folio-compat.c
+++ b/mm/folio-compat.c
@@ -10,12 +10,6 @@
#include <linux/swap.h>
#include "internal.h"
-struct address_space *page_mapping(struct page *page)
-{
- return folio_mapping(page_folio(page));
-}
-EXPORT_SYMBOL(page_mapping);
-
void unlock_page(struct page *page)
{
return folio_unlock(page_folio(page));
@@ -58,12 +52,6 @@ bool set_page_dirty(struct page *page)
}
EXPORT_SYMBOL(set_page_dirty);
-int __set_page_dirty_nobuffers(struct page *page)
-{
- return filemap_dirty_folio(page_mapping(page), page_folio(page));
-}
-EXPORT_SYMBOL(__set_page_dirty_nobuffers);
-
bool clear_page_dirty_for_io(struct page *page)
{
return folio_clear_dirty_for_io(page_folio(page));
diff --git a/mm/gup.c b/mm/gup.c
index df83182ec72d5d..2f7baf96f65593 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -89,7 +89,7 @@ retry:
* belongs to this folio.
*/
if (unlikely(page_folio(page) != folio)) {
- if (!put_devmap_managed_page_refs(&folio->page, refs))
+ if (!put_devmap_managed_folio_refs(folio, refs))
folio_put_refs(folio, refs);
goto retry;
}
@@ -156,7 +156,7 @@ struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags)
*/
if (unlikely((flags & FOLL_LONGTERM) &&
!folio_is_longterm_pinnable(folio))) {
- if (!put_devmap_managed_page_refs(&folio->page, refs))
+ if (!put_devmap_managed_folio_refs(folio, refs))
folio_put_refs(folio, refs);
return NULL;
}
@@ -198,7 +198,7 @@ static void gup_put_folio(struct folio *folio, int refs, unsigned int flags)
refs *= GUP_PIN_COUNTING_BIAS;
}
- if (!put_devmap_managed_page_refs(&folio->page, refs))
+ if (!put_devmap_managed_folio_refs(folio, refs))
folio_put_refs(folio, refs);
}
@@ -440,7 +440,7 @@ void unpin_user_page_range_dirty_lock(struct page *page, unsigned long npages,
}
EXPORT_SYMBOL(unpin_user_page_range_dirty_lock);
-static void unpin_user_pages_lockless(struct page **pages, unsigned long npages)
+static void gup_fast_unpin_user_pages(struct page **pages, unsigned long npages)
{
unsigned long i;
struct folio *folio;
@@ -500,23 +500,330 @@ static inline void mm_set_has_pinned_flag(unsigned long *mm_flags)
}
#ifdef CONFIG_MMU
+
+#if defined(CONFIG_ARCH_HAS_HUGEPD) || defined(CONFIG_HAVE_GUP_FAST)
+static int record_subpages(struct page *page, unsigned long sz,
+ unsigned long addr, unsigned long end,
+ struct page **pages)
+{
+ struct page *start_page;
+ int nr;
+
+ start_page = nth_page(page, (addr & (sz - 1)) >> PAGE_SHIFT);
+ for (nr = 0; addr != end; nr++, addr += PAGE_SIZE)
+ pages[nr] = nth_page(start_page, nr);
+
+ return nr;
+}
+#endif /* CONFIG_ARCH_HAS_HUGEPD || CONFIG_HAVE_GUP_FAST */
+
+#ifdef CONFIG_ARCH_HAS_HUGEPD
+static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end,
+ unsigned long sz)
+{
+ unsigned long __boundary = (addr + sz) & ~(sz-1);
+ return (__boundary - 1 < end - 1) ? __boundary : end;
+}
+
+static int gup_fast_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
+ unsigned long end, unsigned int flags, struct page **pages,
+ int *nr)
+{
+ unsigned long pte_end;
+ struct page *page;
+ struct folio *folio;
+ pte_t pte;
+ int refs;
+
+ pte_end = (addr + sz) & ~(sz-1);
+ if (pte_end < end)
+ end = pte_end;
+
+ pte = huge_ptep_get(ptep);
+
+ if (!pte_access_permitted(pte, flags & FOLL_WRITE))
+ return 0;
+
+ /* hugepages are never "special" */
+ VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
+
+ page = pte_page(pte);
+ refs = record_subpages(page, sz, addr, end, pages + *nr);
+
+ folio = try_grab_folio(page, refs, flags);
+ if (!folio)
+ return 0;
+
+ if (unlikely(pte_val(pte) != pte_val(ptep_get(ptep)))) {
+ gup_put_folio(folio, refs, flags);
+ return 0;
+ }
+
+ if (!pte_write(pte) && gup_must_unshare(NULL, flags, &folio->page)) {
+ gup_put_folio(folio, refs, flags);
+ return 0;
+ }
+
+ *nr += refs;
+ folio_set_referenced(folio);
+ return 1;
+}
+
+/*
+ * NOTE: currently GUP for a hugepd is only possible on hugetlbfs file
+ * systems on Power, which does not have issue with folio writeback against
+ * GUP updates. When hugepd will be extended to support non-hugetlbfs or
+ * even anonymous memory, we need to do extra check as what we do with most
+ * of the other folios. See writable_file_mapping_allowed() and
+ * gup_fast_folio_allowed() for more information.
+ */
+static int gup_fast_hugepd(hugepd_t hugepd, unsigned long addr,
+ unsigned int pdshift, unsigned long end, unsigned int flags,
+ struct page **pages, int *nr)
+{
+ pte_t *ptep;
+ unsigned long sz = 1UL << hugepd_shift(hugepd);
+ unsigned long next;
+
+ ptep = hugepte_offset(hugepd, addr, pdshift);
+ do {
+ next = hugepte_addr_end(addr, end, sz);
+ if (!gup_fast_hugepte(ptep, sz, addr, end, flags, pages, nr))
+ return 0;
+ } while (ptep++, addr = next, addr != end);
+
+ return 1;
+}
+
+static struct page *follow_hugepd(struct vm_area_struct *vma, hugepd_t hugepd,
+ unsigned long addr, unsigned int pdshift,
+ unsigned int flags,
+ struct follow_page_context *ctx)
+{
+ struct page *page;
+ struct hstate *h;
+ spinlock_t *ptl;
+ int nr = 0, ret;
+ pte_t *ptep;
+
+ /* Only hugetlb supports hugepd */
+ if (WARN_ON_ONCE(!is_vm_hugetlb_page(vma)))
+ return ERR_PTR(-EFAULT);
+
+ h = hstate_vma(vma);
+ ptep = hugepte_offset(hugepd, addr, pdshift);
+ ptl = huge_pte_lock(h, vma->vm_mm, ptep);
+ ret = gup_fast_hugepd(hugepd, addr, pdshift, addr + PAGE_SIZE,
+ flags, &page, &nr);
+ spin_unlock(ptl);
+
+ if (ret) {
+ WARN_ON_ONCE(nr != 1);
+ ctx->page_mask = (1U << huge_page_order(h)) - 1;
+ return page;
+ }
+
+ return NULL;
+}
+#else /* CONFIG_ARCH_HAS_HUGEPD */
+static inline int gup_fast_hugepd(hugepd_t hugepd, unsigned long addr,
+ unsigned int pdshift, unsigned long end, unsigned int flags,
+ struct page **pages, int *nr)
+{
+ return 0;
+}
+
+static struct page *follow_hugepd(struct vm_area_struct *vma, hugepd_t hugepd,
+ unsigned long addr, unsigned int pdshift,
+ unsigned int flags,
+ struct follow_page_context *ctx)
+{
+ return NULL;
+}
+#endif /* CONFIG_ARCH_HAS_HUGEPD */
+
+
static struct page *no_page_table(struct vm_area_struct *vma,
- unsigned int flags)
+ unsigned int flags, unsigned long address)
{
+ if (!(flags & FOLL_DUMP))
+ return NULL;
+
/*
- * When core dumping an enormous anonymous area that nobody
- * has touched so far, we don't want to allocate unnecessary pages or
+ * When core dumping, we don't want to allocate unnecessary pages or
* page tables. Return error instead of NULL to skip handle_mm_fault,
* then get_dump_page() will return NULL to leave a hole in the dump.
* But we can only make this optimization where a hole would surely
* be zero-filled if handle_mm_fault() actually did handle it.
*/
- if ((flags & FOLL_DUMP) &&
- (vma_is_anonymous(vma) || !vma->vm_ops->fault))
+ if (is_vm_hugetlb_page(vma)) {
+ struct hstate *h = hstate_vma(vma);
+
+ if (!hugetlbfs_pagecache_present(h, vma, address))
+ return ERR_PTR(-EFAULT);
+ } else if ((vma_is_anonymous(vma) || !vma->vm_ops->fault)) {
return ERR_PTR(-EFAULT);
+ }
+
return NULL;
}
+#ifdef CONFIG_PGTABLE_HAS_HUGE_LEAVES
+static struct page *follow_huge_pud(struct vm_area_struct *vma,
+ unsigned long addr, pud_t *pudp,
+ int flags, struct follow_page_context *ctx)
+{
+ struct mm_struct *mm = vma->vm_mm;
+ struct page *page;
+ pud_t pud = *pudp;
+ unsigned long pfn = pud_pfn(pud);
+ int ret;
+
+ assert_spin_locked(pud_lockptr(mm, pudp));
+
+ if ((flags & FOLL_WRITE) && !pud_write(pud))
+ return NULL;
+
+ if (!pud_present(pud))
+ return NULL;
+
+ pfn += (addr & ~PUD_MASK) >> PAGE_SHIFT;
+
+ if (IS_ENABLED(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) &&
+ pud_devmap(pud)) {
+ /*
+ * device mapped pages can only be returned if the caller
+ * will manage the page reference count.
+ *
+ * At least one of FOLL_GET | FOLL_PIN must be set, so
+ * assert that here:
+ */
+ if (!(flags & (FOLL_GET | FOLL_PIN)))
+ return ERR_PTR(-EEXIST);
+
+ if (flags & FOLL_TOUCH)
+ touch_pud(vma, addr, pudp, flags & FOLL_WRITE);
+
+ ctx->pgmap = get_dev_pagemap(pfn, ctx->pgmap);
+ if (!ctx->pgmap)
+ return ERR_PTR(-EFAULT);
+ }
+
+ page = pfn_to_page(pfn);
+
+ if (!pud_devmap(pud) && !pud_write(pud) &&
+ gup_must_unshare(vma, flags, page))
+ return ERR_PTR(-EMLINK);
+
+ ret = try_grab_page(page, flags);
+ if (ret)
+ page = ERR_PTR(ret);
+ else
+ ctx->page_mask = HPAGE_PUD_NR - 1;
+
+ return page;
+}
+
+/* FOLL_FORCE can write to even unwritable PMDs in COW mappings. */
+static inline bool can_follow_write_pmd(pmd_t pmd, struct page *page,
+ struct vm_area_struct *vma,
+ unsigned int flags)
+{
+ /* If the pmd is writable, we can write to the page. */
+ if (pmd_write(pmd))
+ return true;
+
+ /* Maybe FOLL_FORCE is set to override it? */
+ if (!(flags & FOLL_FORCE))
+ return false;
+
+ /* But FOLL_FORCE has no effect on shared mappings */
+ if (vma->vm_flags & (VM_MAYSHARE | VM_SHARED))
+ return false;
+
+ /* ... or read-only private ones */
+ if (!(vma->vm_flags & VM_MAYWRITE))
+ return false;
+
+ /* ... or already writable ones that just need to take a write fault */
+ if (vma->vm_flags & VM_WRITE)
+ return false;
+
+ /*
+ * See can_change_pte_writable(): we broke COW and could map the page
+ * writable if we have an exclusive anonymous page ...
+ */
+ if (!page || !PageAnon(page) || !PageAnonExclusive(page))
+ return false;
+
+ /* ... and a write-fault isn't required for other reasons. */
+ if (vma_soft_dirty_enabled(vma) && !pmd_soft_dirty(pmd))
+ return false;
+ return !userfaultfd_huge_pmd_wp(vma, pmd);
+}
+
+static struct page *follow_huge_pmd(struct vm_area_struct *vma,
+ unsigned long addr, pmd_t *pmd,
+ unsigned int flags,
+ struct follow_page_context *ctx)
+{
+ struct mm_struct *mm = vma->vm_mm;
+ pmd_t pmdval = *pmd;
+ struct page *page;
+ int ret;
+
+ assert_spin_locked(pmd_lockptr(mm, pmd));
+
+ page = pmd_page(pmdval);
+ if ((flags & FOLL_WRITE) &&
+ !can_follow_write_pmd(pmdval, page, vma, flags))
+ return NULL;
+
+ /* Avoid dumping huge zero page */
+ if ((flags & FOLL_DUMP) && is_huge_zero_pmd(pmdval))
+ return ERR_PTR(-EFAULT);
+
+ if (pmd_protnone(*pmd) && !gup_can_follow_protnone(vma, flags))
+ return NULL;
+
+ if (!pmd_write(pmdval) && gup_must_unshare(vma, flags, page))
+ return ERR_PTR(-EMLINK);
+
+ VM_BUG_ON_PAGE((flags & FOLL_PIN) && PageAnon(page) &&
+ !PageAnonExclusive(page), page);
+
+ ret = try_grab_page(page, flags);
+ if (ret)
+ return ERR_PTR(ret);
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ if (pmd_trans_huge(pmdval) && (flags & FOLL_TOUCH))
+ touch_pmd(vma, addr, pmd, flags & FOLL_WRITE);
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
+ page += (addr & ~HPAGE_PMD_MASK) >> PAGE_SHIFT;
+ ctx->page_mask = HPAGE_PMD_NR - 1;
+
+ return page;
+}
+
+#else /* CONFIG_PGTABLE_HAS_HUGE_LEAVES */
+static struct page *follow_huge_pud(struct vm_area_struct *vma,
+ unsigned long addr, pud_t *pudp,
+ int flags, struct follow_page_context *ctx)
+{
+ return NULL;
+}
+
+static struct page *follow_huge_pmd(struct vm_area_struct *vma,
+ unsigned long addr, pmd_t *pmd,
+ unsigned int flags,
+ struct follow_page_context *ctx)
+{
+ return NULL;
+}
+#endif /* CONFIG_PGTABLE_HAS_HUGE_LEAVES */
+
static int follow_pfn_pte(struct vm_area_struct *vma, unsigned long address,
pte_t *pte, unsigned int flags)
{
@@ -593,7 +900,7 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
if (!ptep)
- return no_page_table(vma, flags);
+ return no_page_table(vma, flags, address);
pte = ptep_get(ptep);
if (!pte_present(pte))
goto no_page;
@@ -685,7 +992,7 @@ no_page:
pte_unmap_unlock(ptep, ptl);
if (!pte_none(pte))
return NULL;
- return no_page_table(vma, flags);
+ return no_page_table(vma, flags, address);
}
static struct page *follow_pmd_mask(struct vm_area_struct *vma,
@@ -701,42 +1008,45 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma,
pmd = pmd_offset(pudp, address);
pmdval = pmdp_get_lockless(pmd);
if (pmd_none(pmdval))
- return no_page_table(vma, flags);
+ return no_page_table(vma, flags, address);
if (!pmd_present(pmdval))
- return no_page_table(vma, flags);
+ return no_page_table(vma, flags, address);
+ if (unlikely(is_hugepd(__hugepd(pmd_val(pmdval)))))
+ return follow_hugepd(vma, __hugepd(pmd_val(pmdval)),
+ address, PMD_SHIFT, flags, ctx);
if (pmd_devmap(pmdval)) {
ptl = pmd_lock(mm, pmd);
page = follow_devmap_pmd(vma, address, pmd, flags, &ctx->pgmap);
spin_unlock(ptl);
if (page)
return page;
- return no_page_table(vma, flags);
+ return no_page_table(vma, flags, address);
}
- if (likely(!pmd_trans_huge(pmdval)))
+ if (likely(!pmd_leaf(pmdval)))
return follow_page_pte(vma, address, pmd, flags, &ctx->pgmap);
if (pmd_protnone(pmdval) && !gup_can_follow_protnone(vma, flags))
- return no_page_table(vma, flags);
+ return no_page_table(vma, flags, address);
ptl = pmd_lock(mm, pmd);
- if (unlikely(!pmd_present(*pmd))) {
+ pmdval = *pmd;
+ if (unlikely(!pmd_present(pmdval))) {
spin_unlock(ptl);
- return no_page_table(vma, flags);
+ return no_page_table(vma, flags, address);
}
- if (unlikely(!pmd_trans_huge(*pmd))) {
+ if (unlikely(!pmd_leaf(pmdval))) {
spin_unlock(ptl);
return follow_page_pte(vma, address, pmd, flags, &ctx->pgmap);
}
- if (flags & FOLL_SPLIT_PMD) {
+ if (pmd_trans_huge(pmdval) && (flags & FOLL_SPLIT_PMD)) {
spin_unlock(ptl);
split_huge_pmd(vma, pmd, address);
/* If pmd was left empty, stuff a page table in there quickly */
return pte_alloc(mm, pmd) ? ERR_PTR(-ENOMEM) :
follow_page_pte(vma, address, pmd, flags, &ctx->pgmap);
}
- page = follow_trans_huge_pmd(vma, address, pmd, flags);
+ page = follow_huge_pmd(vma, address, pmd, flags, ctx);
spin_unlock(ptl);
- ctx->page_mask = HPAGE_PMD_NR - 1;
return page;
}
@@ -745,26 +1055,30 @@ static struct page *follow_pud_mask(struct vm_area_struct *vma,
unsigned int flags,
struct follow_page_context *ctx)
{
- pud_t *pud;
+ pud_t *pudp, pud;
spinlock_t *ptl;
struct page *page;
struct mm_struct *mm = vma->vm_mm;
- pud = pud_offset(p4dp, address);
- if (pud_none(*pud))
- return no_page_table(vma, flags);
- if (pud_devmap(*pud)) {
- ptl = pud_lock(mm, pud);
- page = follow_devmap_pud(vma, address, pud, flags, &ctx->pgmap);
+ pudp = pud_offset(p4dp, address);
+ pud = READ_ONCE(*pudp);
+ if (!pud_present(pud))
+ return no_page_table(vma, flags, address);
+ if (unlikely(is_hugepd(__hugepd(pud_val(pud)))))
+ return follow_hugepd(vma, __hugepd(pud_val(pud)),
+ address, PUD_SHIFT, flags, ctx);
+ if (pud_leaf(pud)) {
+ ptl = pud_lock(mm, pudp);
+ page = follow_huge_pud(vma, address, pudp, flags, ctx);
spin_unlock(ptl);
if (page)
return page;
- return no_page_table(vma, flags);
+ return no_page_table(vma, flags, address);
}
- if (unlikely(pud_bad(*pud)))
- return no_page_table(vma, flags);
+ if (unlikely(pud_bad(pud)))
+ return no_page_table(vma, flags, address);
- return follow_pmd_mask(vma, address, pud, flags, ctx);
+ return follow_pmd_mask(vma, address, pudp, flags, ctx);
}
static struct page *follow_p4d_mask(struct vm_area_struct *vma,
@@ -772,16 +1086,20 @@ static struct page *follow_p4d_mask(struct vm_area_struct *vma,
unsigned int flags,
struct follow_page_context *ctx)
{
- p4d_t *p4d;
+ p4d_t *p4dp, p4d;
- p4d = p4d_offset(pgdp, address);
- if (p4d_none(*p4d))
- return no_page_table(vma, flags);
- BUILD_BUG_ON(p4d_huge(*p4d));
- if (unlikely(p4d_bad(*p4d)))
- return no_page_table(vma, flags);
+ p4dp = p4d_offset(pgdp, address);
+ p4d = READ_ONCE(*p4dp);
+ BUILD_BUG_ON(p4d_leaf(p4d));
- return follow_pud_mask(vma, address, p4d, flags, ctx);
+ if (unlikely(is_hugepd(__hugepd(p4d_val(p4d)))))
+ return follow_hugepd(vma, __hugepd(p4d_val(p4d)),
+ address, P4D_SHIFT, flags, ctx);
+
+ if (!p4d_present(p4d) || p4d_bad(p4d))
+ return no_page_table(vma, flags, address);
+
+ return follow_pud_mask(vma, address, p4dp, flags, ctx);
}
/**
@@ -814,24 +1132,24 @@ static struct page *follow_page_mask(struct vm_area_struct *vma,
{
pgd_t *pgd;
struct mm_struct *mm = vma->vm_mm;
+ struct page *page;
- ctx->page_mask = 0;
-
- /*
- * Call hugetlb_follow_page_mask for hugetlb vmas as it will use
- * special hugetlb page table walking code. This eliminates the
- * need to check for hugetlb entries in the general walking code.
- */
- if (is_vm_hugetlb_page(vma))
- return hugetlb_follow_page_mask(vma, address, flags,
- &ctx->page_mask);
+ vma_pgtable_walk_begin(vma);
+ ctx->page_mask = 0;
pgd = pgd_offset(mm, address);
- if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
- return no_page_table(vma, flags);
+ if (unlikely(is_hugepd(__hugepd(pgd_val(*pgd)))))
+ page = follow_hugepd(vma, __hugepd(pgd_val(*pgd)),
+ address, PGDIR_SHIFT, flags, ctx);
+ else if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
+ page = no_page_table(vma, flags, address);
+ else
+ page = follow_p4d_mask(vma, address, pgd, flags, ctx);
+
+ vma_pgtable_walk_end(vma);
- return follow_p4d_mask(vma, address, pgd, flags, ctx);
+ return page;
}
struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
@@ -1206,6 +1524,22 @@ static long __get_user_pages(struct mm_struct *mm,
/* first iteration or cross vma bound */
if (!vma || start >= vma->vm_end) {
+ /*
+ * MADV_POPULATE_(READ|WRITE) wants to handle VMA
+ * lookups+error reporting differently.
+ */
+ if (gup_flags & FOLL_MADV_POPULATE) {
+ vma = vma_lookup(mm, start);
+ if (!vma) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ if (check_vma_flags(vma, gup_flags)) {
+ ret = -EINVAL;
+ goto out;
+ }
+ goto retry;
+ }
vma = gup_vma_lookup(mm, start);
if (!vma && in_gate_area(mm, start)) {
ret = get_gate_page(mm, start & PAGE_MASK,
@@ -1653,20 +1987,22 @@ long populate_vma_page_range(struct vm_area_struct *vma,
if (vma->vm_flags & VM_LOCKONFAULT)
return nr_pages;
+ /* ... similarly, we've never faulted in PROT_NONE pages */
+ if (!vma_is_accessible(vma))
+ return -EFAULT;
+
gup_flags = FOLL_TOUCH;
/*
* We want to touch writable mappings with a write fault in order
* to break COW, except for shared mappings because these don't COW
* and we would not want to dirty them for nothing.
+ *
+ * Otherwise, do a read fault, and use FOLL_FORCE in case it's not
+ * readable (ie write-only or executable).
*/
if ((vma->vm_flags & (VM_WRITE | VM_SHARED)) == VM_WRITE)
gup_flags |= FOLL_WRITE;
-
- /*
- * We want mlock to succeed for regions that have any permissions
- * other than PROT_NONE.
- */
- if (vma_is_accessible(vma))
+ else
gup_flags |= FOLL_FORCE;
if (locked)
@@ -1683,35 +2019,35 @@ long populate_vma_page_range(struct vm_area_struct *vma,
}
/*
- * faultin_vma_page_range() - populate (prefault) page tables inside the
- * given VMA range readable/writable
+ * faultin_page_range() - populate (prefault) page tables inside the
+ * given range readable/writable
*
* This takes care of mlocking the pages, too, if VM_LOCKED is set.
*
- * @vma: target vma
+ * @mm: the mm to populate page tables in
* @start: start address
* @end: end address
* @write: whether to prefault readable or writable
* @locked: whether the mmap_lock is still held
*
- * Returns either number of processed pages in the vma, or a negative error
- * code on error (see __get_user_pages()).
+ * Returns either number of processed pages in the MM, or a negative error
+ * code on error (see __get_user_pages()). Note that this function reports
+ * errors related to VMAs, such as incompatible mappings, as expected by
+ * MADV_POPULATE_(READ|WRITE).
+ *
+ * The range must be page-aligned.
*
- * vma->vm_mm->mmap_lock must be held. The range must be page-aligned and
- * covered by the VMA. If it's released, *@locked will be set to 0.
+ * mm->mmap_lock must be held. If it's released, *@locked will be set to 0.
*/
-long faultin_vma_page_range(struct vm_area_struct *vma, unsigned long start,
- unsigned long end, bool write, int *locked)
+long faultin_page_range(struct mm_struct *mm, unsigned long start,
+ unsigned long end, bool write, int *locked)
{
- struct mm_struct *mm = vma->vm_mm;
unsigned long nr_pages = (end - start) / PAGE_SIZE;
int gup_flags;
long ret;
VM_BUG_ON(!PAGE_ALIGNED(start));
VM_BUG_ON(!PAGE_ALIGNED(end));
- VM_BUG_ON_VMA(start < vma->vm_start, vma);
- VM_BUG_ON_VMA(end > vma->vm_end, vma);
mmap_assert_locked(mm);
/*
@@ -1723,19 +2059,13 @@ long faultin_vma_page_range(struct vm_area_struct *vma, unsigned long start,
* a poisoned page.
* !FOLL_FORCE: Require proper access permissions.
*/
- gup_flags = FOLL_TOUCH | FOLL_HWPOISON | FOLL_UNLOCKABLE;
+ gup_flags = FOLL_TOUCH | FOLL_HWPOISON | FOLL_UNLOCKABLE |
+ FOLL_MADV_POPULATE;
if (write)
gup_flags |= FOLL_WRITE;
- /*
- * We want to report -EINVAL instead of -EFAULT for any permission
- * problems or incompatible mappings.
- */
- if (check_vma_flags(vma, gup_flags))
- return -EINVAL;
-
- ret = __get_user_pages(mm, start, nr_pages, gup_flags,
- NULL, locked);
+ ret = __get_user_pages_locked(mm, start, nr_pages, NULL, locked,
+ gup_flags);
lru_add_drain();
return ret;
}
@@ -2132,6 +2462,7 @@ static int migrate_longterm_unpinnable_pages(
struct migration_target_control mtc = {
.nid = NUMA_NO_NODE,
.gfp_mask = GFP_USER | __GFP_NOWARN,
+ .reason = MR_LONGTERM_PIN,
};
if (migrate_pages(movable_page_list, alloc_migration_target,
@@ -2419,7 +2750,7 @@ long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
EXPORT_SYMBOL(get_user_pages_unlocked);
/*
- * Fast GUP
+ * GUP-fast
*
* get_user_pages_fast attempts to pin user pages by walking the page
* tables directly and avoids taking locks. Thus the walker needs to be
@@ -2433,7 +2764,7 @@ EXPORT_SYMBOL(get_user_pages_unlocked);
*
* Another way to achieve this is to batch up page table containing pages
* belonging to more than one mm_user, then rcu_sched a callback to free those
- * pages. Disabling interrupts will allow the fast_gup walker to both block
+ * pages. Disabling interrupts will allow the gup_fast() walker to both block
* the rcu_sched callback, and an IPI that we broadcast for splitting THPs
* (which is a relatively rare event). The code below adopts this strategy.
*
@@ -2451,15 +2782,17 @@ EXPORT_SYMBOL(get_user_pages_unlocked);
*
* This code is based heavily on the PowerPC implementation by Nick Piggin.
*/
-#ifdef CONFIG_HAVE_FAST_GUP
+#ifdef CONFIG_HAVE_GUP_FAST
/*
- * Used in the GUP-fast path to determine whether a pin is permitted for a
- * specific folio.
+ * Used in the GUP-fast path to determine whether GUP is permitted to work on
+ * a specific folio.
*
* This call assumes the caller has pinned the folio, that the lowest page table
* level still points to this folio, and that interrupts have been disabled.
*
+ * GUP-fast must reject all secretmem folios.
+ *
* Writing to pinned file-backed dirty tracked folios is inherently problematic
* (see comment describing the writable_file_mapping_allowed() function). We
* therefore try to avoid the most egregious case of a long-term mapping doing
@@ -2469,25 +2802,34 @@ EXPORT_SYMBOL(get_user_pages_unlocked);
* in the fast path, so instead we whitelist known good cases and if in doubt,
* fall back to the slow path.
*/
-static bool folio_fast_pin_allowed(struct folio *folio, unsigned int flags)
+static bool gup_fast_folio_allowed(struct folio *folio, unsigned int flags)
{
+ bool reject_file_backed = false;
struct address_space *mapping;
+ bool check_secretmem = false;
unsigned long mapping_flags;
/*
* If we aren't pinning then no problematic write can occur. A long term
* pin is the most egregious case so this is the one we disallow.
*/
- if ((flags & (FOLL_PIN | FOLL_LONGTERM | FOLL_WRITE)) !=
+ if ((flags & (FOLL_PIN | FOLL_LONGTERM | FOLL_WRITE)) ==
(FOLL_PIN | FOLL_LONGTERM | FOLL_WRITE))
- return true;
+ reject_file_backed = true;
+
+ /* We hold a folio reference, so we can safely access folio fields. */
- /* The folio is pinned, so we can safely access folio fields. */
+ /* secretmem folios are always order-0 folios. */
+ if (IS_ENABLED(CONFIG_SECRETMEM) && !folio_test_large(folio))
+ check_secretmem = true;
+
+ if (!reject_file_backed && !check_secretmem)
+ return true;
if (WARN_ON_ONCE(folio_test_slab(folio)))
return false;
- /* hugetlb mappings do not require dirty-tracking. */
+ /* hugetlb neither requires dirty-tracking nor can be secretmem. */
if (folio_test_hugetlb(folio))
return true;
@@ -2523,50 +2865,48 @@ static bool folio_fast_pin_allowed(struct folio *folio, unsigned int flags)
/*
* At this point, we know the mapping is non-null and points to an
- * address_space object. The only remaining whitelisted file system is
- * shmem.
+ * address_space object.
*/
- return shmem_mapping(mapping);
+ if (check_secretmem && secretmem_mapping(mapping))
+ return false;
+ /* The only remaining allowed file system is shmem. */
+ return !reject_file_backed || shmem_mapping(mapping);
}
-static void __maybe_unused undo_dev_pagemap(int *nr, int nr_start,
- unsigned int flags,
- struct page **pages)
+static void __maybe_unused gup_fast_undo_dev_pagemap(int *nr, int nr_start,
+ unsigned int flags, struct page **pages)
{
while ((*nr) - nr_start) {
- struct page *page = pages[--(*nr)];
+ struct folio *folio = page_folio(pages[--(*nr)]);
- ClearPageReferenced(page);
- if (flags & FOLL_PIN)
- unpin_user_page(page);
- else
- put_page(page);
+ folio_clear_referenced(folio);
+ gup_put_folio(folio, 1, flags);
}
}
#ifdef CONFIG_ARCH_HAS_PTE_SPECIAL
/*
- * Fast-gup relies on pte change detection to avoid concurrent pgtable
+ * GUP-fast relies on pte change detection to avoid concurrent pgtable
* operations.
*
- * To pin the page, fast-gup needs to do below in order:
+ * To pin the page, GUP-fast needs to do below in order:
* (1) pin the page (by prefetching pte), then (2) check pte not changed.
*
* For the rest of pgtable operations where pgtable updates can be racy
- * with fast-gup, we need to do (1) clear pte, then (2) check whether page
+ * with GUP-fast, we need to do (1) clear pte, then (2) check whether page
* is pinned.
*
* Above will work for all pte-level operations, including THP split.
*
- * For THP collapse, it's a bit more complicated because fast-gup may be
+ * For THP collapse, it's a bit more complicated because GUP-fast may be
* walking a pgtable page that is being freed (pte is still valid but pmd
* can be cleared already). To avoid race in such condition, we need to
* also check pmd here to make sure pmd doesn't change (corresponds to
* pmdp_collapse_flush() in the THP collapse code path).
*/
-static int gup_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
- unsigned long end, unsigned int flags,
- struct page **pages, int *nr)
+static int gup_fast_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
+ unsigned long end, unsigned int flags, struct page **pages,
+ int *nr)
{
struct dev_pagemap *pgmap = NULL;
int nr_start = *nr, ret = 0;
@@ -2599,7 +2939,7 @@ static int gup_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
pgmap = get_dev_pagemap(pte_pfn(pte), pgmap);
if (unlikely(!pgmap)) {
- undo_dev_pagemap(nr, nr_start, flags, pages);
+ gup_fast_undo_dev_pagemap(nr, nr_start, flags, pages);
goto pte_unmap;
}
} else if (pte_special(pte))
@@ -2612,18 +2952,13 @@ static int gup_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
if (!folio)
goto pte_unmap;
- if (unlikely(folio_is_secretmem(folio))) {
- gup_put_folio(folio, 1, flags);
- goto pte_unmap;
- }
-
if (unlikely(pmd_val(pmd) != pmd_val(*pmdp)) ||
unlikely(pte_val(pte) != pte_val(ptep_get(ptep)))) {
gup_put_folio(folio, 1, flags);
goto pte_unmap;
}
- if (!folio_fast_pin_allowed(folio, flags)) {
+ if (!gup_fast_folio_allowed(folio, flags)) {
gup_put_folio(folio, 1, flags);
goto pte_unmap;
}
@@ -2668,44 +3003,45 @@ pte_unmap:
*
* For a futex to be placed on a THP tail page, get_futex_key requires a
* get_user_pages_fast_only implementation that can pin pages. Thus it's still
- * useful to have gup_huge_pmd even if we can't operate on ptes.
+ * useful to have gup_fast_pmd_leaf even if we can't operate on ptes.
*/
-static int gup_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
- unsigned long end, unsigned int flags,
- struct page **pages, int *nr)
+static int gup_fast_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
+ unsigned long end, unsigned int flags, struct page **pages,
+ int *nr)
{
return 0;
}
#endif /* CONFIG_ARCH_HAS_PTE_SPECIAL */
#if defined(CONFIG_ARCH_HAS_PTE_DEVMAP) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
-static int __gup_device_huge(unsigned long pfn, unsigned long addr,
- unsigned long end, unsigned int flags,
- struct page **pages, int *nr)
+static int gup_fast_devmap_leaf(unsigned long pfn, unsigned long addr,
+ unsigned long end, unsigned int flags, struct page **pages, int *nr)
{
int nr_start = *nr;
struct dev_pagemap *pgmap = NULL;
do {
+ struct folio *folio;
struct page *page = pfn_to_page(pfn);
pgmap = get_dev_pagemap(pfn, pgmap);
if (unlikely(!pgmap)) {
- undo_dev_pagemap(nr, nr_start, flags, pages);
+ gup_fast_undo_dev_pagemap(nr, nr_start, flags, pages);
break;
}
if (!(flags & FOLL_PCI_P2PDMA) && is_pci_p2pdma_page(page)) {
- undo_dev_pagemap(nr, nr_start, flags, pages);
+ gup_fast_undo_dev_pagemap(nr, nr_start, flags, pages);
break;
}
- SetPageReferenced(page);
- pages[*nr] = page;
- if (unlikely(try_grab_page(page, flags))) {
- undo_dev_pagemap(nr, nr_start, flags, pages);
+ folio = try_grab_folio(page, 1, flags);
+ if (!folio) {
+ gup_fast_undo_dev_pagemap(nr, nr_start, flags, pages);
break;
}
+ folio_set_referenced(folio);
+ pages[*nr] = page;
(*nr)++;
pfn++;
} while (addr += PAGE_SIZE, addr != end);
@@ -2714,156 +3050,62 @@ static int __gup_device_huge(unsigned long pfn, unsigned long addr,
return addr == end;
}
-static int __gup_device_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
- unsigned long end, unsigned int flags,
- struct page **pages, int *nr)
+static int gup_fast_devmap_pmd_leaf(pmd_t orig, pmd_t *pmdp, unsigned long addr,
+ unsigned long end, unsigned int flags, struct page **pages,
+ int *nr)
{
unsigned long fault_pfn;
int nr_start = *nr;
fault_pfn = pmd_pfn(orig) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
- if (!__gup_device_huge(fault_pfn, addr, end, flags, pages, nr))
+ if (!gup_fast_devmap_leaf(fault_pfn, addr, end, flags, pages, nr))
return 0;
if (unlikely(pmd_val(orig) != pmd_val(*pmdp))) {
- undo_dev_pagemap(nr, nr_start, flags, pages);
+ gup_fast_undo_dev_pagemap(nr, nr_start, flags, pages);
return 0;
}
return 1;
}
-static int __gup_device_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
- unsigned long end, unsigned int flags,
- struct page **pages, int *nr)
+static int gup_fast_devmap_pud_leaf(pud_t orig, pud_t *pudp, unsigned long addr,
+ unsigned long end, unsigned int flags, struct page **pages,
+ int *nr)
{
unsigned long fault_pfn;
int nr_start = *nr;
fault_pfn = pud_pfn(orig) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
- if (!__gup_device_huge(fault_pfn, addr, end, flags, pages, nr))
+ if (!gup_fast_devmap_leaf(fault_pfn, addr, end, flags, pages, nr))
return 0;
if (unlikely(pud_val(orig) != pud_val(*pudp))) {
- undo_dev_pagemap(nr, nr_start, flags, pages);
+ gup_fast_undo_dev_pagemap(nr, nr_start, flags, pages);
return 0;
}
return 1;
}
#else
-static int __gup_device_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
- unsigned long end, unsigned int flags,
- struct page **pages, int *nr)
+static int gup_fast_devmap_pmd_leaf(pmd_t orig, pmd_t *pmdp, unsigned long addr,
+ unsigned long end, unsigned int flags, struct page **pages,
+ int *nr)
{
BUILD_BUG();
return 0;
}
-static int __gup_device_huge_pud(pud_t pud, pud_t *pudp, unsigned long addr,
- unsigned long end, unsigned int flags,
- struct page **pages, int *nr)
+static int gup_fast_devmap_pud_leaf(pud_t pud, pud_t *pudp, unsigned long addr,
+ unsigned long end, unsigned int flags, struct page **pages,
+ int *nr)
{
BUILD_BUG();
return 0;
}
#endif
-static int record_subpages(struct page *page, unsigned long addr,
- unsigned long end, struct page **pages)
-{
- int nr;
-
- for (nr = 0; addr != end; nr++, addr += PAGE_SIZE)
- pages[nr] = nth_page(page, nr);
-
- return nr;
-}
-
-#ifdef CONFIG_ARCH_HAS_HUGEPD
-static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end,
- unsigned long sz)
-{
- unsigned long __boundary = (addr + sz) & ~(sz-1);
- return (__boundary - 1 < end - 1) ? __boundary : end;
-}
-
-static int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
- unsigned long end, unsigned int flags,
- struct page **pages, int *nr)
-{
- unsigned long pte_end;
- struct page *page;
- struct folio *folio;
- pte_t pte;
- int refs;
-
- pte_end = (addr + sz) & ~(sz-1);
- if (pte_end < end)
- end = pte_end;
-
- pte = huge_ptep_get(ptep);
-
- if (!pte_access_permitted(pte, flags & FOLL_WRITE))
- return 0;
-
- /* hugepages are never "special" */
- VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
-
- page = nth_page(pte_page(pte), (addr & (sz - 1)) >> PAGE_SHIFT);
- refs = record_subpages(page, addr, end, pages + *nr);
-
- folio = try_grab_folio(page, refs, flags);
- if (!folio)
- return 0;
-
- if (unlikely(pte_val(pte) != pte_val(ptep_get(ptep)))) {
- gup_put_folio(folio, refs, flags);
- return 0;
- }
-
- if (!folio_fast_pin_allowed(folio, flags)) {
- gup_put_folio(folio, refs, flags);
- return 0;
- }
-
- if (!pte_write(pte) && gup_must_unshare(NULL, flags, &folio->page)) {
- gup_put_folio(folio, refs, flags);
- return 0;
- }
-
- *nr += refs;
- folio_set_referenced(folio);
- return 1;
-}
-
-static int gup_huge_pd(hugepd_t hugepd, unsigned long addr,
- unsigned int pdshift, unsigned long end, unsigned int flags,
- struct page **pages, int *nr)
-{
- pte_t *ptep;
- unsigned long sz = 1UL << hugepd_shift(hugepd);
- unsigned long next;
-
- ptep = hugepte_offset(hugepd, addr, pdshift);
- do {
- next = hugepte_addr_end(addr, end, sz);
- if (!gup_hugepte(ptep, sz, addr, end, flags, pages, nr))
- return 0;
- } while (ptep++, addr = next, addr != end);
-
- return 1;
-}
-#else
-static inline int gup_huge_pd(hugepd_t hugepd, unsigned long addr,
- unsigned int pdshift, unsigned long end, unsigned int flags,
- struct page **pages, int *nr)
-{
- return 0;
-}
-#endif /* CONFIG_ARCH_HAS_HUGEPD */
-
-static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
- unsigned long end, unsigned int flags,
- struct page **pages, int *nr)
+static int gup_fast_pmd_leaf(pmd_t orig, pmd_t *pmdp, unsigned long addr,
+ unsigned long end, unsigned int flags, struct page **pages,
+ int *nr)
{
struct page *page;
struct folio *folio;
@@ -2875,12 +3117,12 @@ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
if (pmd_devmap(orig)) {
if (unlikely(flags & FOLL_LONGTERM))
return 0;
- return __gup_device_huge_pmd(orig, pmdp, addr, end, flags,
- pages, nr);
+ return gup_fast_devmap_pmd_leaf(orig, pmdp, addr, end, flags,
+ pages, nr);
}
- page = nth_page(pmd_page(orig), (addr & ~PMD_MASK) >> PAGE_SHIFT);
- refs = record_subpages(page, addr, end, pages + *nr);
+ page = pmd_page(orig);
+ refs = record_subpages(page, PMD_SIZE, addr, end, pages + *nr);
folio = try_grab_folio(page, refs, flags);
if (!folio)
@@ -2891,7 +3133,7 @@ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
return 0;
}
- if (!folio_fast_pin_allowed(folio, flags)) {
+ if (!gup_fast_folio_allowed(folio, flags)) {
gup_put_folio(folio, refs, flags);
return 0;
}
@@ -2905,9 +3147,9 @@ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
return 1;
}
-static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
- unsigned long end, unsigned int flags,
- struct page **pages, int *nr)
+static int gup_fast_pud_leaf(pud_t orig, pud_t *pudp, unsigned long addr,
+ unsigned long end, unsigned int flags, struct page **pages,
+ int *nr)
{
struct page *page;
struct folio *folio;
@@ -2919,12 +3161,12 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
if (pud_devmap(orig)) {
if (unlikely(flags & FOLL_LONGTERM))
return 0;
- return __gup_device_huge_pud(orig, pudp, addr, end, flags,
- pages, nr);
+ return gup_fast_devmap_pud_leaf(orig, pudp, addr, end, flags,
+ pages, nr);
}
- page = nth_page(pud_page(orig), (addr & ~PUD_MASK) >> PAGE_SHIFT);
- refs = record_subpages(page, addr, end, pages + *nr);
+ page = pud_page(orig);
+ refs = record_subpages(page, PUD_SIZE, addr, end, pages + *nr);
folio = try_grab_folio(page, refs, flags);
if (!folio)
@@ -2935,7 +3177,7 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
return 0;
}
- if (!folio_fast_pin_allowed(folio, flags)) {
+ if (!gup_fast_folio_allowed(folio, flags)) {
gup_put_folio(folio, refs, flags);
return 0;
}
@@ -2950,9 +3192,9 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
return 1;
}
-static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr,
- unsigned long end, unsigned int flags,
- struct page **pages, int *nr)
+static int gup_fast_pgd_leaf(pgd_t orig, pgd_t *pgdp, unsigned long addr,
+ unsigned long end, unsigned int flags, struct page **pages,
+ int *nr)
{
int refs;
struct page *page;
@@ -2963,8 +3205,8 @@ static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr,
BUILD_BUG_ON(pgd_devmap(orig));
- page = nth_page(pgd_page(orig), (addr & ~PGDIR_MASK) >> PAGE_SHIFT);
- refs = record_subpages(page, addr, end, pages + *nr);
+ page = pgd_page(orig);
+ refs = record_subpages(page, PGDIR_SIZE, addr, end, pages + *nr);
folio = try_grab_folio(page, refs, flags);
if (!folio)
@@ -2980,7 +3222,7 @@ static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr,
return 0;
}
- if (!folio_fast_pin_allowed(folio, flags)) {
+ if (!gup_fast_folio_allowed(folio, flags)) {
gup_put_folio(folio, refs, flags);
return 0;
}
@@ -2990,8 +3232,9 @@ static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr,
return 1;
}
-static int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr, unsigned long end,
- unsigned int flags, struct page **pages, int *nr)
+static int gup_fast_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr,
+ unsigned long end, unsigned int flags, struct page **pages,
+ int *nr)
{
unsigned long next;
pmd_t *pmdp;
@@ -3004,13 +3247,12 @@ static int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr, unsigned lo
if (!pmd_present(pmd))
return 0;
- if (unlikely(pmd_trans_huge(pmd) || pmd_huge(pmd) ||
- pmd_devmap(pmd))) {
- /* See gup_pte_range() */
+ if (unlikely(pmd_leaf(pmd))) {
+ /* See gup_fast_pte_range() */
if (pmd_protnone(pmd))
return 0;
- if (!gup_huge_pmd(pmd, pmdp, addr, next, flags,
+ if (!gup_fast_pmd_leaf(pmd, pmdp, addr, next, flags,
pages, nr))
return 0;
@@ -3019,18 +3261,20 @@ static int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr, unsigned lo
* architecture have different format for hugetlbfs
* pmd format and THP pmd format
*/
- if (!gup_huge_pd(__hugepd(pmd_val(pmd)), addr,
- PMD_SHIFT, next, flags, pages, nr))
+ if (!gup_fast_hugepd(__hugepd(pmd_val(pmd)), addr,
+ PMD_SHIFT, next, flags, pages, nr))
return 0;
- } else if (!gup_pte_range(pmd, pmdp, addr, next, flags, pages, nr))
+ } else if (!gup_fast_pte_range(pmd, pmdp, addr, next, flags,
+ pages, nr))
return 0;
} while (pmdp++, addr = next, addr != end);
return 1;
}
-static int gup_pud_range(p4d_t *p4dp, p4d_t p4d, unsigned long addr, unsigned long end,
- unsigned int flags, struct page **pages, int *nr)
+static int gup_fast_pud_range(p4d_t *p4dp, p4d_t p4d, unsigned long addr,
+ unsigned long end, unsigned int flags, struct page **pages,
+ int *nr)
{
unsigned long next;
pud_t *pudp;
@@ -3042,23 +3286,25 @@ static int gup_pud_range(p4d_t *p4dp, p4d_t p4d, unsigned long addr, unsigned lo
next = pud_addr_end(addr, end);
if (unlikely(!pud_present(pud)))
return 0;
- if (unlikely(pud_huge(pud) || pud_devmap(pud))) {
- if (!gup_huge_pud(pud, pudp, addr, next, flags,
- pages, nr))
+ if (unlikely(pud_leaf(pud))) {
+ if (!gup_fast_pud_leaf(pud, pudp, addr, next, flags,
+ pages, nr))
return 0;
} else if (unlikely(is_hugepd(__hugepd(pud_val(pud))))) {
- if (!gup_huge_pd(__hugepd(pud_val(pud)), addr,
- PUD_SHIFT, next, flags, pages, nr))
+ if (!gup_fast_hugepd(__hugepd(pud_val(pud)), addr,
+ PUD_SHIFT, next, flags, pages, nr))
return 0;
- } else if (!gup_pmd_range(pudp, pud, addr, next, flags, pages, nr))
+ } else if (!gup_fast_pmd_range(pudp, pud, addr, next, flags,
+ pages, nr))
return 0;
} while (pudp++, addr = next, addr != end);
return 1;
}
-static int gup_p4d_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr, unsigned long end,
- unsigned int flags, struct page **pages, int *nr)
+static int gup_fast_p4d_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr,
+ unsigned long end, unsigned int flags, struct page **pages,
+ int *nr)
{
unsigned long next;
p4d_t *p4dp;
@@ -3068,21 +3314,22 @@ static int gup_p4d_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr, unsigned lo
p4d_t p4d = READ_ONCE(*p4dp);
next = p4d_addr_end(addr, end);
- if (p4d_none(p4d))
+ if (!p4d_present(p4d))
return 0;
- BUILD_BUG_ON(p4d_huge(p4d));
+ BUILD_BUG_ON(p4d_leaf(p4d));
if (unlikely(is_hugepd(__hugepd(p4d_val(p4d))))) {
- if (!gup_huge_pd(__hugepd(p4d_val(p4d)), addr,
- P4D_SHIFT, next, flags, pages, nr))
+ if (!gup_fast_hugepd(__hugepd(p4d_val(p4d)), addr,
+ P4D_SHIFT, next, flags, pages, nr))
return 0;
- } else if (!gup_pud_range(p4dp, p4d, addr, next, flags, pages, nr))
+ } else if (!gup_fast_pud_range(p4dp, p4d, addr, next, flags,
+ pages, nr))
return 0;
} while (p4dp++, addr = next, addr != end);
return 1;
}
-static void gup_pgd_range(unsigned long addr, unsigned long end,
+static void gup_fast_pgd_range(unsigned long addr, unsigned long end,
unsigned int flags, struct page **pages, int *nr)
{
unsigned long next;
@@ -3095,24 +3342,25 @@ static void gup_pgd_range(unsigned long addr, unsigned long end,
next = pgd_addr_end(addr, end);
if (pgd_none(pgd))
return;
- if (unlikely(pgd_huge(pgd))) {
- if (!gup_huge_pgd(pgd, pgdp, addr, next, flags,
- pages, nr))
+ if (unlikely(pgd_leaf(pgd))) {
+ if (!gup_fast_pgd_leaf(pgd, pgdp, addr, next, flags,
+ pages, nr))
return;
} else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) {
- if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr,
- PGDIR_SHIFT, next, flags, pages, nr))
+ if (!gup_fast_hugepd(__hugepd(pgd_val(pgd)), addr,
+ PGDIR_SHIFT, next, flags, pages, nr))
return;
- } else if (!gup_p4d_range(pgdp, pgd, addr, next, flags, pages, nr))
+ } else if (!gup_fast_p4d_range(pgdp, pgd, addr, next, flags,
+ pages, nr))
return;
} while (pgdp++, addr = next, addr != end);
}
#else
-static inline void gup_pgd_range(unsigned long addr, unsigned long end,
+static inline void gup_fast_pgd_range(unsigned long addr, unsigned long end,
unsigned int flags, struct page **pages, int *nr)
{
}
-#endif /* CONFIG_HAVE_FAST_GUP */
+#endif /* CONFIG_HAVE_GUP_FAST */
#ifndef gup_fast_permitted
/*
@@ -3125,16 +3373,14 @@ static bool gup_fast_permitted(unsigned long start, unsigned long end)
}
#endif
-static unsigned long lockless_pages_from_mm(unsigned long start,
- unsigned long end,
- unsigned int gup_flags,
- struct page **pages)
+static unsigned long gup_fast(unsigned long start, unsigned long end,
+ unsigned int gup_flags, struct page **pages)
{
unsigned long flags;
int nr_pinned = 0;
unsigned seq;
- if (!IS_ENABLED(CONFIG_HAVE_FAST_GUP) ||
+ if (!IS_ENABLED(CONFIG_HAVE_GUP_FAST) ||
!gup_fast_permitted(start, end))
return 0;
@@ -3156,16 +3402,16 @@ static unsigned long lockless_pages_from_mm(unsigned long start,
* that come from THPs splitting.
*/
local_irq_save(flags);
- gup_pgd_range(start, end, gup_flags, pages, &nr_pinned);
+ gup_fast_pgd_range(start, end, gup_flags, pages, &nr_pinned);
local_irq_restore(flags);
/*
* When pinning pages for DMA there could be a concurrent write protect
- * from fork() via copy_page_range(), in this case always fail fast GUP.
+ * from fork() via copy_page_range(), in this case always fail GUP-fast.
*/
if (gup_flags & FOLL_PIN) {
if (read_seqcount_retry(&current->mm->write_protect_seq, seq)) {
- unpin_user_pages_lockless(pages, nr_pinned);
+ gup_fast_unpin_user_pages(pages, nr_pinned);
return 0;
} else {
sanity_check_pinned_pages(pages, nr_pinned);
@@ -3174,10 +3420,8 @@ static unsigned long lockless_pages_from_mm(unsigned long start,
return nr_pinned;
}
-static int internal_get_user_pages_fast(unsigned long start,
- unsigned long nr_pages,
- unsigned int gup_flags,
- struct page **pages)
+static int gup_fast_fallback(unsigned long start, unsigned long nr_pages,
+ unsigned int gup_flags, struct page **pages)
{
unsigned long len, end;
unsigned long nr_pinned;
@@ -3205,7 +3449,7 @@ static int internal_get_user_pages_fast(unsigned long start,
if (unlikely(!access_ok((void __user *)start, len)))
return -EFAULT;
- nr_pinned = lockless_pages_from_mm(start, end, gup_flags, pages);
+ nr_pinned = gup_fast(start, end, gup_flags, pages);
if (nr_pinned == nr_pages || gup_flags & FOLL_FAST_ONLY)
return nr_pinned;
@@ -3259,7 +3503,7 @@ int get_user_pages_fast_only(unsigned long start, int nr_pages,
FOLL_GET | FOLL_FAST_ONLY))
return -EINVAL;
- return internal_get_user_pages_fast(start, nr_pages, gup_flags, pages);
+ return gup_fast_fallback(start, nr_pages, gup_flags, pages);
}
EXPORT_SYMBOL_GPL(get_user_pages_fast_only);
@@ -3290,7 +3534,7 @@ int get_user_pages_fast(unsigned long start, int nr_pages,
*/
if (!is_valid_gup_args(pages, NULL, &gup_flags, FOLL_GET))
return -EINVAL;
- return internal_get_user_pages_fast(start, nr_pages, gup_flags, pages);
+ return gup_fast_fallback(start, nr_pages, gup_flags, pages);
}
EXPORT_SYMBOL_GPL(get_user_pages_fast);
@@ -3318,7 +3562,7 @@ int pin_user_pages_fast(unsigned long start, int nr_pages,
{
if (!is_valid_gup_args(pages, NULL, &gup_flags, FOLL_PIN))
return -EINVAL;
- return internal_get_user_pages_fast(start, nr_pages, gup_flags, pages);
+ return gup_fast_fallback(start, nr_pages, gup_flags, pages);
}
EXPORT_SYMBOL_GPL(pin_user_pages_fast);
diff --git a/mm/hmm.c b/mm/hmm.c
index 277ddcab4947d9..93aebd9cc130ea 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -424,22 +424,17 @@ static int hmm_vma_walk_pud(pud_t *pudp, unsigned long start, unsigned long end,
walk->action = ACTION_CONTINUE;
pud = READ_ONCE(*pudp);
- if (pud_none(pud)) {
+ if (!pud_present(pud)) {
spin_unlock(ptl);
return hmm_vma_walk_hole(start, end, -1, walk);
}
- if (pud_huge(pud) && pud_devmap(pud)) {
+ if (pud_leaf(pud) && pud_devmap(pud)) {
unsigned long i, npages, pfn;
unsigned int required_fault;
unsigned long *hmm_pfns;
unsigned long cpu_flags;
- if (!pud_present(pud)) {
- spin_unlock(ptl);
- return hmm_vma_walk_hole(start, end, -1, walk);
- }
-
i = (addr - range->start) >> PAGE_SHIFT;
npages = (end - addr) >> PAGE_SHIFT;
hmm_pfns = &range->hmm_pfns[i];
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 9859aa4f755380..8261b56693978f 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -38,6 +38,7 @@
#include <linux/sched/sysctl.h>
#include <linux/memory-tiers.h>
#include <linux/compat.h>
+#include <linux/pgalloc_tag.h>
#include <asm/tlb.h>
#include <asm/pgalloc.h>
@@ -73,17 +74,20 @@ static unsigned long deferred_split_scan(struct shrinker *shrink,
struct shrink_control *sc);
static atomic_t huge_zero_refcount;
-struct page *huge_zero_page __read_mostly;
+struct folio *huge_zero_folio __read_mostly;
unsigned long huge_zero_pfn __read_mostly = ~0UL;
unsigned long huge_anon_orders_always __read_mostly;
unsigned long huge_anon_orders_madvise __read_mostly;
unsigned long huge_anon_orders_inherit __read_mostly;
unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma,
- unsigned long vm_flags, bool smaps,
- bool in_pf, bool enforce_sysfs,
+ unsigned long vm_flags,
+ unsigned long tva_flags,
unsigned long orders)
{
+ bool smaps = tva_flags & TVA_SMAPS;
+ bool in_pf = tva_flags & TVA_IN_PF;
+ bool enforce_sysfs = tva_flags & TVA_ENFORCE_SYSFS;
/* Check the intersection of requested and supported orders. */
orders &= vma_is_anonymous(vma) ?
THP_ORDERS_ALL_ANON : THP_ORDERS_ALL_FILE;
@@ -191,24 +195,24 @@ unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma,
static bool get_huge_zero_page(void)
{
- struct page *zero_page;
+ struct folio *zero_folio;
retry:
if (likely(atomic_inc_not_zero(&huge_zero_refcount)))
return true;
- zero_page = alloc_pages((GFP_TRANSHUGE | __GFP_ZERO) & ~__GFP_MOVABLE,
+ zero_folio = folio_alloc((GFP_TRANSHUGE | __GFP_ZERO) & ~__GFP_MOVABLE,
HPAGE_PMD_ORDER);
- if (!zero_page) {
+ if (!zero_folio) {
count_vm_event(THP_ZERO_PAGE_ALLOC_FAILED);
return false;
}
preempt_disable();
- if (cmpxchg(&huge_zero_page, NULL, zero_page)) {
+ if (cmpxchg(&huge_zero_folio, NULL, zero_folio)) {
preempt_enable();
- __free_pages(zero_page, compound_order(zero_page));
+ folio_put(zero_folio);
goto retry;
}
- WRITE_ONCE(huge_zero_pfn, page_to_pfn(zero_page));
+ WRITE_ONCE(huge_zero_pfn, folio_pfn(zero_folio));
/* We take additional reference here. It will be put back by shrinker */
atomic_set(&huge_zero_refcount, 2);
@@ -226,10 +230,10 @@ static void put_huge_zero_page(void)
BUG_ON(atomic_dec_and_test(&huge_zero_refcount));
}
-struct page *mm_get_huge_zero_page(struct mm_struct *mm)
+struct folio *mm_get_huge_zero_folio(struct mm_struct *mm)
{
if (test_bit(MMF_HUGE_ZERO_PAGE, &mm->flags))
- return READ_ONCE(huge_zero_page);
+ return READ_ONCE(huge_zero_folio);
if (!get_huge_zero_page())
return NULL;
@@ -237,10 +241,10 @@ struct page *mm_get_huge_zero_page(struct mm_struct *mm)
if (test_and_set_bit(MMF_HUGE_ZERO_PAGE, &mm->flags))
put_huge_zero_page();
- return READ_ONCE(huge_zero_page);
+ return READ_ONCE(huge_zero_folio);
}
-void mm_put_huge_zero_page(struct mm_struct *mm)
+void mm_put_huge_zero_folio(struct mm_struct *mm)
{
if (test_bit(MMF_HUGE_ZERO_PAGE, &mm->flags))
put_huge_zero_page();
@@ -257,10 +261,10 @@ static unsigned long shrink_huge_zero_page_scan(struct shrinker *shrink,
struct shrink_control *sc)
{
if (atomic_cmpxchg(&huge_zero_refcount, 1, 0) == 1) {
- struct page *zero_page = xchg(&huge_zero_page, NULL);
- BUG_ON(zero_page == NULL);
+ struct folio *zero_folio = xchg(&huge_zero_folio, NULL);
+ BUG_ON(zero_folio == NULL);
WRITE_ONCE(huge_zero_pfn, ~0UL);
- __free_pages(zero_page, compound_order(zero_page));
+ folio_put(zero_folio);
return HPAGE_PMD_NR;
}
@@ -525,6 +529,52 @@ static const struct kobj_type thpsize_ktype = {
.sysfs_ops = &kobj_sysfs_ops,
};
+DEFINE_PER_CPU(struct mthp_stat, mthp_stats) = {{{0}}};
+
+static unsigned long sum_mthp_stat(int order, enum mthp_stat_item item)
+{
+ unsigned long sum = 0;
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ struct mthp_stat *this = &per_cpu(mthp_stats, cpu);
+
+ sum += this->stats[order][item];
+ }
+
+ return sum;
+}
+
+#define DEFINE_MTHP_STAT_ATTR(_name, _index) \
+static ssize_t _name##_show(struct kobject *kobj, \
+ struct kobj_attribute *attr, char *buf) \
+{ \
+ int order = to_thpsize(kobj)->order; \
+ \
+ return sysfs_emit(buf, "%lu\n", sum_mthp_stat(order, _index)); \
+} \
+static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
+
+DEFINE_MTHP_STAT_ATTR(anon_fault_alloc, MTHP_STAT_ANON_FAULT_ALLOC);
+DEFINE_MTHP_STAT_ATTR(anon_fault_fallback, MTHP_STAT_ANON_FAULT_FALLBACK);
+DEFINE_MTHP_STAT_ATTR(anon_fault_fallback_charge, MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE);
+DEFINE_MTHP_STAT_ATTR(anon_swpout, MTHP_STAT_ANON_SWPOUT);
+DEFINE_MTHP_STAT_ATTR(anon_swpout_fallback, MTHP_STAT_ANON_SWPOUT_FALLBACK);
+
+static struct attribute *stats_attrs[] = {
+ &anon_fault_alloc_attr.attr,
+ &anon_fault_fallback_attr.attr,
+ &anon_fault_fallback_charge_attr.attr,
+ &anon_swpout_attr.attr,
+ &anon_swpout_fallback_attr.attr,
+ NULL,
+};
+
+static struct attribute_group stats_attr_group = {
+ .name = "stats",
+ .attrs = stats_attrs,
+};
+
static struct thpsize *thpsize_create(int order, struct kobject *parent)
{
unsigned long size = (PAGE_SIZE << order) / SZ_1K;
@@ -548,6 +598,12 @@ static struct thpsize *thpsize_create(int order, struct kobject *parent)
return ERR_PTR(ret);
}
+ ret = sysfs_create_group(&thpsize->kobj, &stats_attr_group);
+ if (ret) {
+ kobject_put(&thpsize->kobj);
+ return ERR_PTR(ret);
+ }
+
thpsize->order = order;
return thpsize;
}
@@ -788,27 +844,19 @@ struct deferred_split *get_deferred_split_queue(struct folio *folio)
}
#endif
-void folio_prep_large_rmappable(struct folio *folio)
-{
- if (!folio || !folio_test_large(folio))
- return;
- if (folio_order(folio) > 1)
- INIT_LIST_HEAD(&folio->_deferred_list);
- folio_set_large_rmappable(folio);
-}
-
-static inline bool is_transparent_hugepage(struct folio *folio)
+static inline bool is_transparent_hugepage(const struct folio *folio)
{
if (!folio_test_large(folio))
return false;
- return is_huge_zero_page(&folio->page) ||
+ return is_huge_zero_folio(folio) ||
folio_test_large_rmappable(folio);
}
static unsigned long __thp_get_unmapped_area(struct file *filp,
unsigned long addr, unsigned long len,
- loff_t off, unsigned long flags, unsigned long size)
+ loff_t off, unsigned long flags, unsigned long size,
+ vm_flags_t vm_flags)
{
loff_t off_end = off + len;
loff_t off_align = round_up(off, size);
@@ -824,8 +872,8 @@ static unsigned long __thp_get_unmapped_area(struct file *filp,
if (len_pad < len || (off + len_pad) < off)
return 0;
- ret = current->mm->get_unmapped_area(filp, addr, len_pad,
- off >> PAGE_SHIFT, flags);
+ ret = mm_get_unmapped_area_vmflags(current->mm, filp, addr, len_pad,
+ off >> PAGE_SHIFT, flags, vm_flags);
/*
* The failure might be due to length padding. The caller will retry
@@ -843,25 +891,32 @@ static unsigned long __thp_get_unmapped_area(struct file *filp,
off_sub = (off - ret) & (size - 1);
- if (current->mm->get_unmapped_area == arch_get_unmapped_area_topdown &&
- !off_sub)
+ if (test_bit(MMF_TOPDOWN, &current->mm->flags) && !off_sub)
return ret + size;
ret += off_sub;
return ret;
}
-unsigned long thp_get_unmapped_area(struct file *filp, unsigned long addr,
- unsigned long len, unsigned long pgoff, unsigned long flags)
+unsigned long thp_get_unmapped_area_vmflags(struct file *filp, unsigned long addr,
+ unsigned long len, unsigned long pgoff, unsigned long flags,
+ vm_flags_t vm_flags)
{
unsigned long ret;
loff_t off = (loff_t)pgoff << PAGE_SHIFT;
- ret = __thp_get_unmapped_area(filp, addr, len, off, flags, PMD_SIZE);
+ ret = __thp_get_unmapped_area(filp, addr, len, off, flags, PMD_SIZE, vm_flags);
if (ret)
return ret;
- return current->mm->get_unmapped_area(filp, addr, len, pgoff, flags);
+ return mm_get_unmapped_area_vmflags(current->mm, filp, addr, len, pgoff, flags,
+ vm_flags);
+}
+
+unsigned long thp_get_unmapped_area(struct file *filp, unsigned long addr,
+ unsigned long len, unsigned long pgoff, unsigned long flags)
+{
+ return thp_get_unmapped_area_vmflags(filp, addr, len, pgoff, flags, 0);
}
EXPORT_SYMBOL_GPL(thp_get_unmapped_area);
@@ -880,6 +935,8 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
folio_put(folio);
count_vm_event(THP_FAULT_FALLBACK);
count_vm_event(THP_FAULT_FALLBACK_CHARGE);
+ count_mthp_stat(HPAGE_PMD_ORDER, MTHP_STAT_ANON_FAULT_FALLBACK);
+ count_mthp_stat(HPAGE_PMD_ORDER, MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE);
return VM_FAULT_FALLBACK;
}
folio_throttle_swaprate(folio, gfp);
@@ -929,6 +986,7 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
mm_inc_nr_ptes(vma->vm_mm);
spin_unlock(vmf->ptl);
count_vm_event(THP_FAULT_ALLOC);
+ count_mthp_stat(HPAGE_PMD_ORDER, MTHP_STAT_ANON_FAULT_ALLOC);
count_memcg_event_mm(vma->vm_mm, THP_FAULT_ALLOC);
}
@@ -979,14 +1037,14 @@ gfp_t vma_thp_gfp_mask(struct vm_area_struct *vma)
}
/* Caller must hold page table lock. */
-static void set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm,
+static void set_huge_zero_folio(pgtable_t pgtable, struct mm_struct *mm,
struct vm_area_struct *vma, unsigned long haddr, pmd_t *pmd,
- struct page *zero_page)
+ struct folio *zero_folio)
{
pmd_t entry;
if (!pmd_none(*pmd))
return;
- entry = mk_pmd(zero_page, vma->vm_page_prot);
+ entry = mk_pmd(&zero_folio->page, vma->vm_page_prot);
entry = pmd_mkhuge(entry);
pgtable_trans_huge_deposit(mm, pmd, pgtable);
set_pmd_at(mm, haddr, pmd, entry);
@@ -999,24 +1057,27 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
gfp_t gfp;
struct folio *folio;
unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
+ vm_fault_t ret;
if (!thp_vma_suitable_order(vma, haddr, PMD_ORDER))
return VM_FAULT_FALLBACK;
- if (unlikely(anon_vma_prepare(vma)))
- return VM_FAULT_OOM;
+ ret = vmf_anon_prepare(vmf);
+ if (ret)
+ return ret;
khugepaged_enter_vma(vma, vma->vm_flags);
if (!(vmf->flags & FAULT_FLAG_WRITE) &&
!mm_forbids_zeropage(vma->vm_mm) &&
transparent_hugepage_use_zero_page()) {
pgtable_t pgtable;
- struct page *zero_page;
+ struct folio *zero_folio;
vm_fault_t ret;
+
pgtable = pte_alloc_one(vma->vm_mm);
if (unlikely(!pgtable))
return VM_FAULT_OOM;
- zero_page = mm_get_huge_zero_page(vma->vm_mm);
- if (unlikely(!zero_page)) {
+ zero_folio = mm_get_huge_zero_folio(vma->vm_mm);
+ if (unlikely(!zero_folio)) {
pte_free(vma->vm_mm, pgtable);
count_vm_event(THP_FAULT_FALLBACK);
return VM_FAULT_FALLBACK;
@@ -1034,8 +1095,8 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
ret = handle_userfault(vmf, VM_UFFD_MISSING);
VM_BUG_ON(ret & VM_FAULT_FALLBACK);
} else {
- set_huge_zero_page(pgtable, vma->vm_mm, vma,
- haddr, vmf->pmd, zero_page);
+ set_huge_zero_folio(pgtable, vma->vm_mm, vma,
+ haddr, vmf->pmd, zero_folio);
update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);
spin_unlock(vmf->ptl);
}
@@ -1049,6 +1110,7 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
folio = vma_alloc_folio(gfp, HPAGE_PMD_ORDER, vma, haddr, true);
if (unlikely(!folio)) {
count_vm_event(THP_FAULT_FALLBACK);
+ count_mthp_stat(HPAGE_PMD_ORDER, MTHP_STAT_ANON_FAULT_FALLBACK);
return VM_FAULT_FALLBACK;
}
return __do_huge_pmd_anonymous_page(vmf, &folio->page, gfp);
@@ -1228,8 +1290,8 @@ vm_fault_t vmf_insert_pfn_pud(struct vm_fault *vmf, pfn_t pfn, bool write)
EXPORT_SYMBOL_GPL(vmf_insert_pfn_pud);
#endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
-static void touch_pmd(struct vm_area_struct *vma, unsigned long addr,
- pmd_t *pmd, bool write)
+void touch_pmd(struct vm_area_struct *vma, unsigned long addr,
+ pmd_t *pmd, bool write)
{
pmd_t _pmd;
@@ -1344,11 +1406,11 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
*/
if (is_huge_zero_pmd(pmd)) {
/*
- * get_huge_zero_page() will never allocate a new page here,
- * since we already have a zero page to copy. It just takes a
- * reference.
+ * mm_get_huge_zero_folio() will never allocate a new
+ * folio here, since we already have a zero page to
+ * copy. It just takes a reference.
*/
- mm_get_huge_zero_page(dst_mm);
+ mm_get_huge_zero_folio(dst_mm);
goto out_zero_page;
}
@@ -1385,8 +1447,8 @@ out:
}
#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
-static void touch_pud(struct vm_area_struct *vma, unsigned long addr,
- pud_t *pud, bool write)
+void touch_pud(struct vm_area_struct *vma, unsigned long addr,
+ pud_t *pud, bool write)
{
pud_t _pud;
@@ -1398,49 +1460,6 @@ static void touch_pud(struct vm_area_struct *vma, unsigned long addr,
update_mmu_cache_pud(vma, addr, pud);
}
-struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr,
- pud_t *pud, int flags, struct dev_pagemap **pgmap)
-{
- unsigned long pfn = pud_pfn(*pud);
- struct mm_struct *mm = vma->vm_mm;
- struct page *page;
- int ret;
-
- assert_spin_locked(pud_lockptr(mm, pud));
-
- if (flags & FOLL_WRITE && !pud_write(*pud))
- return NULL;
-
- if (pud_present(*pud) && pud_devmap(*pud))
- /* pass */;
- else
- return NULL;
-
- if (flags & FOLL_TOUCH)
- touch_pud(vma, addr, pud, flags & FOLL_WRITE);
-
- /*
- * device mapped pages can only be returned if the
- * caller will manage the page reference count.
- *
- * At least one of FOLL_GET | FOLL_PIN must be set, so assert that here:
- */
- if (!(flags & (FOLL_GET | FOLL_PIN)))
- return ERR_PTR(-EEXIST);
-
- pfn += (addr & ~PUD_MASK) >> PAGE_SHIFT;
- *pgmap = get_dev_pagemap(pfn, *pgmap);
- if (!*pgmap)
- return ERR_PTR(-EFAULT);
- page = pfn_to_page(pfn);
-
- ret = try_grab_page(page, flags);
- if (ret)
- page = ERR_PTR(ret);
-
- return page;
-}
-
int copy_huge_pud(struct mm_struct *dst_mm, struct mm_struct *src_mm,
pud_t *dst_pud, pud_t *src_pud, unsigned long addr,
struct vm_area_struct *vma)
@@ -1627,88 +1646,6 @@ static inline bool can_change_pmd_writable(struct vm_area_struct *vma,
return pmd_dirty(pmd);
}
-/* FOLL_FORCE can write to even unwritable PMDs in COW mappings. */
-static inline bool can_follow_write_pmd(pmd_t pmd, struct page *page,
- struct vm_area_struct *vma,
- unsigned int flags)
-{
- /* If the pmd is writable, we can write to the page. */
- if (pmd_write(pmd))
- return true;
-
- /* Maybe FOLL_FORCE is set to override it? */
- if (!(flags & FOLL_FORCE))
- return false;
-
- /* But FOLL_FORCE has no effect on shared mappings */
- if (vma->vm_flags & (VM_MAYSHARE | VM_SHARED))
- return false;
-
- /* ... or read-only private ones */
- if (!(vma->vm_flags & VM_MAYWRITE))
- return false;
-
- /* ... or already writable ones that just need to take a write fault */
- if (vma->vm_flags & VM_WRITE)
- return false;
-
- /*
- * See can_change_pte_writable(): we broke COW and could map the page
- * writable if we have an exclusive anonymous page ...
- */
- if (!page || !PageAnon(page) || !PageAnonExclusive(page))
- return false;
-
- /* ... and a write-fault isn't required for other reasons. */
- if (vma_soft_dirty_enabled(vma) && !pmd_soft_dirty(pmd))
- return false;
- return !userfaultfd_huge_pmd_wp(vma, pmd);
-}
-
-struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
- unsigned long addr,
- pmd_t *pmd,
- unsigned int flags)
-{
- struct mm_struct *mm = vma->vm_mm;
- struct page *page;
- int ret;
-
- assert_spin_locked(pmd_lockptr(mm, pmd));
-
- page = pmd_page(*pmd);
- VM_BUG_ON_PAGE(!PageHead(page) && !is_zone_device_page(page), page);
-
- if ((flags & FOLL_WRITE) &&
- !can_follow_write_pmd(*pmd, page, vma, flags))
- return NULL;
-
- /* Avoid dumping huge zero page */
- if ((flags & FOLL_DUMP) && is_huge_zero_pmd(*pmd))
- return ERR_PTR(-EFAULT);
-
- if (pmd_protnone(*pmd) && !gup_can_follow_protnone(vma, flags))
- return NULL;
-
- if (!pmd_write(*pmd) && gup_must_unshare(vma, flags, page))
- return ERR_PTR(-EMLINK);
-
- VM_BUG_ON_PAGE((flags & FOLL_PIN) && PageAnon(page) &&
- !PageAnonExclusive(page), page);
-
- ret = try_grab_page(page, flags);
- if (ret)
- return ERR_PTR(ret);
-
- if (flags & FOLL_TOUCH)
- touch_pmd(vma, addr, pmd, flags & FOLL_WRITE);
-
- page += (addr & ~HPAGE_PMD_MASK) >> PAGE_SHIFT;
- VM_BUG_ON_PAGE(!PageCompound(page) && !is_zone_device_page(page), page);
-
- return page;
-}
-
/* NUMA hinting page fault entry point for trans huge pmds */
vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf)
{
@@ -1754,7 +1691,7 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf)
*/
if (node_is_toptier(nid))
last_cpupid = folio_last_cpupid(folio);
- target_nid = numa_migrate_prep(folio, vma, haddr, nid, &flags);
+ target_nid = numa_migrate_prep(folio, vmf, haddr, nid, &flags);
if (target_nid == NUMA_NO_NODE) {
folio_put(folio);
goto out_map;
@@ -1824,12 +1761,12 @@ bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
goto out;
}
- folio = pfn_folio(pmd_pfn(orig_pmd));
+ folio = pmd_folio(orig_pmd);
/*
* If other processes are mapping this folio, we couldn't discard
* the folio unless they all do MADV_FREE so let's skip the folio.
*/
- if (folio_estimated_sharers(folio) != 1)
+ if (folio_likely_mapped_shared(folio))
goto out;
if (!folio_trylock(folio))
@@ -1915,7 +1852,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
folio = page_folio(page);
folio_remove_rmap_pmd(folio, page, vma);
- VM_BUG_ON_PAGE(page_mapcount(page) < 0, page);
+ WARN_ON_ONCE(folio_mapcount(folio) < 0);
VM_BUG_ON_PAGE(!PageHead(page), page);
} else if (thp_migration_supported()) {
swp_entry_t entry;
@@ -2094,7 +2031,7 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
if (pmd_protnone(*pmd))
goto unlock;
- folio = page_folio(pmd_page(*pmd));
+ folio = pmd_folio(*pmd);
toptier = node_is_toptier(folio_nid(folio));
/*
* Skip scanning top tier node if normal numa
@@ -2259,9 +2196,6 @@ int move_pages_huge_pmd(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, pm
goto unlock_ptls;
}
- folio_move_anon_rmap(src_folio, dst_vma);
- WRITE_ONCE(src_folio->index, linear_page_index(dst_vma, dst_addr));
-
src_pmdval = pmdp_huge_clear_flush(src_vma, src_addr, src_pmd);
/* Folio got pinned from under us. Put it back and fail the move. */
if (folio_maybe_dma_pinned(src_folio)) {
@@ -2270,6 +2204,9 @@ int move_pages_huge_pmd(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, pm
goto unlock_ptls;
}
+ folio_move_anon_rmap(src_folio, dst_vma);
+ src_folio->index = linear_page_index(dst_vma, dst_addr);
+
_dst_pmd = mk_huge_pmd(&src_folio->page, dst_vma->vm_page_prot);
/* Follow mremap() behavior and treat the entry dirty after the move */
_dst_pmd = pmd_mkwrite(pmd_mkdirty(_dst_pmd), dst_vma);
@@ -2513,12 +2450,12 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
* for this pmd), then we flush the SMP TLB and finally we write the
* non-huge version of the pmd entry with pmd_populate.
*/
- old_pmd = pmdp_invalidate(vma, haddr, pmd);
- pmd_migration = is_pmd_migration_entry(old_pmd);
+ pmd_migration = is_pmd_migration_entry(*pmd);
if (unlikely(pmd_migration)) {
swp_entry_t entry;
+ old_pmd = *pmd;
entry = pmd_to_swp_entry(old_pmd);
page = pfn_swap_entry_to_page(entry);
write = is_writable_migration_entry(entry);
@@ -2529,6 +2466,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
soft_dirty = pmd_swp_soft_dirty(old_pmd);
uffd_wp = pmd_swp_uffd_wp(old_pmd);
} else {
+ old_pmd = pmdp_invalidate(vma, haddr, pmd);
page = pmd_page(old_pmd);
folio = page_folio(page);
if (pmd_dirty(old_pmd)) {
@@ -2671,7 +2609,7 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
* It's safe to call pmd_page when folio is set because it's
* guaranteed that pmd is present.
*/
- if (folio && folio != page_folio(pmd_page(*pmd)))
+ if (folio && folio != pmd_folio(*pmd))
goto out;
__split_huge_pmd_locked(vma, pmd, range.start, freeze);
}
@@ -2863,7 +2801,7 @@ static void __split_huge_page_tail(struct folio *folio, int tail,
clear_compound_head(page_tail);
if (new_order) {
prep_compound_page(page_tail, new_order);
- folio_prep_large_rmappable(new_folio);
+ folio_set_large_rmappable(new_folio);
}
/* Finally unfreeze refcount. Additional reference from page cache. */
@@ -2946,6 +2884,7 @@ static void __split_huge_page(struct page *page, struct list_head *list,
/* Caller disabled irqs, so they are still disabled here */
split_page_owner(head, order, new_order);
+ pgalloc_tag_split(head, 1 << order);
/* See comment in __split_huge_page_tail() */
if (folio_test_anon(folio)) {
@@ -2967,9 +2906,6 @@ static void __split_huge_page(struct page *page, struct list_head *list,
shmem_uncharge(folio->mapping->host, nr_dropped);
remap_page(folio, nr);
- if (folio_test_swapcache(folio))
- split_swap_cluster(folio->swap);
-
/*
* set page to its compound_head when split to non order-0 pages, so
* we can skip unlocking it below, since PG_locked is transferred to
@@ -3013,28 +2949,48 @@ bool can_split_folio(struct folio *folio, int *pextra_pins)
}
/*
- * This function splits huge page into pages in @new_order. @page can point to
- * any subpage of huge page to split. Split doesn't change the position of
- * @page.
+ * This function splits a large folio into smaller folios of order @new_order.
+ * @page can point to any page of the large folio to split. The split operation
+ * does not change the position of @page.
+ *
+ * Prerequisites:
+ *
+ * 1) The caller must hold a reference on the @page's owning folio, also known
+ * as the large folio.
+ *
+ * 2) The large folio must be locked.
*
- * NOTE: order-1 anonymous folio is not supported because _deferred_list,
- * which is used by partially mapped folios, is stored in subpage 2 and an
- * order-1 folio only has subpage 0 and 1. File-backed order-1 folios are OK,
- * since they do not use _deferred_list.
+ * 3) The folio must not be pinned. Any unexpected folio references, including
+ * GUP pins, will result in the folio not getting split; instead, the caller
+ * will receive an -EAGAIN.
*
- * Only caller must hold pin on the @page, otherwise split fails with -EBUSY.
- * The huge page must be locked.
+ * 4) @new_order > 1, usually. Splitting to order-1 anonymous folios is not
+ * supported for non-file-backed folios, because folio->_deferred_list, which
+ * is used by partially mapped folios, is stored in subpage 2, but an order-1
+ * folio only has subpages 0 and 1. File-backed order-1 folios are supported,
+ * since they do not use _deferred_list.
+ *
+ * After splitting, the caller's folio reference will be transferred to @page,
+ * resulting in a raised refcount of @page after this call. The other pages may
+ * be freed if they are not mapped.
*
* If @list is null, tail pages will be added to LRU list, otherwise, to @list.
*
- * Pages in new_order will inherit mapping, flags, and so on from the hugepage.
+ * Pages in @new_order will inherit the mapping, flags, and so on from the
+ * huge page.
+ *
+ * Returns 0 if the huge page was split successfully.
+ *
+ * Returns -EAGAIN if the folio has unexpected reference (e.g., GUP) or if
+ * the folio was concurrently removed from the page cache.
*
- * GUP pin and PG_locked transferred to @page or the compound page @page belongs
- * to. Rest subpages can be freed if they are not mapped.
+ * Returns -EBUSY when trying to split the huge zeropage, if the folio is
+ * under writeback, if fs-specific folio metadata cannot currently be
+ * released, or if some unexpected race happened (e.g., anon VMA disappeared,
+ * truncation).
*
- * Returns 0 if the hugepage is split successfully.
- * Returns -EBUSY if the page is pinned or if anon_vma disappeared from under
- * us.
+ * Returns -EINVAL when trying to split to an order that is incompatible
+ * with the folio. Splitting to order 0 is compatible with all folios.
*/
int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
unsigned int new_order)
@@ -3045,6 +3001,7 @@ int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
XA_STATE_ORDER(xas, &folio->mapping->i_pages, folio->index, new_order);
struct anon_vma *anon_vma = NULL;
struct address_space *mapping = NULL;
+ bool is_thp = folio_test_pmd_mappable(folio);
int extra_pins, ret;
pgoff_t end;
bool is_hzp;
@@ -3080,7 +3037,7 @@ int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
}
- is_hzp = is_huge_zero_page(&folio->page);
+ is_hzp = is_huge_zero_folio(folio);
if (is_hzp) {
pr_warn_ratelimited("Called split_huge_page for huge zero page\n");
return -EBUSY;
@@ -3223,7 +3180,8 @@ out_unlock:
i_mmap_unlock_read(mapping);
out:
xas_destroy(&xas);
- count_vm_event(!ret ? THP_SPLIT_PAGE : THP_SPLIT_PAGE_FAILED);
+ if (is_thp)
+ count_vm_event(!ret ? THP_SPLIT_PAGE : THP_SPLIT_PAGE_FAILED);
return ret;
}
@@ -3285,7 +3243,8 @@ void deferred_split_folio(struct folio *folio)
spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
if (list_empty(&folio->_deferred_list)) {
- count_vm_event(THP_DEFERRED_SPLIT_PAGE);
+ if (folio_test_pmd_mappable(folio))
+ count_vm_event(THP_DEFERRED_SPLIT_PAGE);
list_add_tail(&folio->_deferred_list, &ds_queue->split_queue);
ds_queue->split_queue_len++;
#ifdef CONFIG_MEMCG
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 23ef240ba48a60..417fc5cdb6eebd 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1517,7 +1517,7 @@ static void __destroy_compound_gigantic_folio(struct folio *folio,
struct page *p;
atomic_set(&folio->_entire_mapcount, 0);
- atomic_set(&folio->_nr_pages_mapped, 0);
+ atomic_set(&folio->_large_mapcount, 0);
atomic_set(&folio->_pincount, 0);
for (i = 1; i < nr_pages; i++) {
@@ -1619,19 +1619,11 @@ static inline void destroy_compound_gigantic_folio(struct folio *folio,
unsigned int order) { }
#endif
-static inline void __clear_hugetlb_destructor(struct hstate *h,
- struct folio *folio)
-{
- lockdep_assert_held(&hugetlb_lock);
-
- folio_clear_hugetlb(folio);
-}
-
/*
* Remove hugetlb folio from lists.
- * If vmemmap exists for the folio, update dtor so that the folio appears
- * as just a compound page. Otherwise, wait until after allocating vmemmap
- * to update dtor.
+ * If vmemmap exists for the folio, clear the hugetlb flag so that the
+ * folio appears as just a compound page. Otherwise, wait until after
+ * allocating vmemmap to clear the flag.
*
* A reference is held on the folio, except in the case of demote.
*
@@ -1662,12 +1654,12 @@ static void __remove_hugetlb_folio(struct hstate *h, struct folio *folio,
}
/*
- * We can only clear the hugetlb destructor after allocating vmemmap
+ * We can only clear the hugetlb flag after allocating vmemmap
* pages. Otherwise, someone (memory error handling) may try to write
* to tail struct pages.
*/
if (!folio_test_hugetlb_vmemmap_optimized(folio))
- __clear_hugetlb_destructor(h, folio);
+ __folio_clear_hugetlb(folio);
/*
* In the case of demote we do not ref count the page as it will soon
@@ -1711,7 +1703,7 @@ static void add_hugetlb_folio(struct hstate *h, struct folio *folio,
h->surplus_huge_pages_node[nid]++;
}
- folio_set_hugetlb(folio);
+ __folio_set_hugetlb(folio);
folio_change_private(folio, NULL);
/*
* We have to set hugetlb_vmemmap_optimized again as above
@@ -1734,14 +1726,14 @@ static void add_hugetlb_folio(struct hstate *h, struct folio *folio,
*/
return;
- arch_clear_hugepage_flags(&folio->page);
+ arch_clear_hugetlb_flags(folio);
enqueue_hugetlb_folio(h, folio);
}
static void __update_and_free_hugetlb_folio(struct hstate *h,
struct folio *folio)
{
- bool clear_dtor = folio_test_hugetlb_vmemmap_optimized(folio);
+ bool clear_flag = folio_test_hugetlb_vmemmap_optimized(folio);
if (hstate_is_gigantic(h) && !gigantic_page_runtime_supported())
return;
@@ -1754,11 +1746,11 @@ static void __update_and_free_hugetlb_folio(struct hstate *h,
return;
/*
- * If folio is not vmemmap optimized (!clear_dtor), then the folio
+ * If folio is not vmemmap optimized (!clear_flag), then the folio
* is no longer identified as a hugetlb page. hugetlb_vmemmap_restore_folio
* can only be passed hugetlb pages and will BUG otherwise.
*/
- if (clear_dtor && hugetlb_vmemmap_restore_folio(h, folio)) {
+ if (clear_flag && hugetlb_vmemmap_restore_folio(h, folio)) {
spin_lock_irq(&hugetlb_lock);
/*
* If we cannot allocate vmemmap pages, just refuse to free the
@@ -1779,11 +1771,11 @@ static void __update_and_free_hugetlb_folio(struct hstate *h,
/*
* If vmemmap pages were allocated above, then we need to clear the
- * hugetlb destructor under the hugetlb lock.
+ * hugetlb flag under the hugetlb lock.
*/
- if (clear_dtor) {
+ if (folio_test_hugetlb(folio)) {
spin_lock_irq(&hugetlb_lock);
- __clear_hugetlb_destructor(h, folio);
+ __folio_clear_hugetlb(folio);
spin_unlock_irq(&hugetlb_lock);
}
@@ -1796,7 +1788,8 @@ static void __update_and_free_hugetlb_folio(struct hstate *h,
destroy_compound_gigantic_folio(folio, huge_page_order(h));
free_gigantic_folio(folio, huge_page_order(h));
} else {
- __free_pages(&folio->page, huge_page_order(h));
+ INIT_LIST_HEAD(&folio->_deferred_list);
+ folio_put(folio);
}
}
@@ -1884,7 +1877,7 @@ static void bulk_vmemmap_restore_error(struct hstate *h,
list_for_each_entry_safe(folio, t_folio, non_hvo_folios, lru) {
list_del(&folio->lru);
spin_lock_irq(&hugetlb_lock);
- __clear_hugetlb_destructor(h, folio);
+ __folio_clear_hugetlb(folio);
spin_unlock_irq(&hugetlb_lock);
update_and_free_hugetlb_folio(h, folio, false);
cond_resched();
@@ -1909,7 +1902,7 @@ static void bulk_vmemmap_restore_error(struct hstate *h,
} else {
list_del(&folio->lru);
spin_lock_irq(&hugetlb_lock);
- __clear_hugetlb_destructor(h, folio);
+ __folio_clear_hugetlb(folio);
spin_unlock_irq(&hugetlb_lock);
update_and_free_hugetlb_folio(h, folio, false);
cond_resched();
@@ -1942,14 +1935,14 @@ retry:
* should only be pages on the non_hvo_folios list.
* Do note that the non_hvo_folios list could be empty.
* Without HVO enabled, ret will be 0 and there is no need to call
- * __clear_hugetlb_destructor as this was done previously.
+ * __folio_clear_hugetlb as this was done previously.
*/
VM_WARN_ON(!list_empty(folio_list));
VM_WARN_ON(ret < 0);
if (!list_empty(&non_hvo_folios) && ret) {
spin_lock_irq(&hugetlb_lock);
list_for_each_entry(folio, &non_hvo_folios, lru)
- __clear_hugetlb_destructor(h, folio);
+ __folio_clear_hugetlb(folio);
spin_unlock_irq(&hugetlb_lock);
}
@@ -1974,7 +1967,7 @@ void free_huge_folio(struct folio *folio)
{
/*
* Can't pass hstate in here because it is called from the
- * compound page destructor.
+ * generic mm code.
*/
struct hstate *h = folio_hstate(folio);
int nid = folio_nid(folio);
@@ -2031,7 +2024,7 @@ void free_huge_folio(struct folio *folio)
spin_unlock_irqrestore(&hugetlb_lock, flags);
update_and_free_hugetlb_folio(h, folio, true);
} else {
- arch_clear_hugepage_flags(&folio->page);
+ arch_clear_hugetlb_flags(folio);
enqueue_hugetlb_folio(h, folio);
spin_unlock_irqrestore(&hugetlb_lock, flags);
}
@@ -2049,7 +2042,7 @@ static void __prep_account_new_huge_page(struct hstate *h, int nid)
static void init_new_hugetlb_folio(struct hstate *h, struct folio *folio)
{
- folio_set_hugetlb(folio);
+ __folio_set_hugetlb(folio);
INIT_LIST_HEAD(&folio->lru);
hugetlb_set_folio_subpool(folio, NULL);
set_hugetlb_cgroup(folio, NULL);
@@ -2124,10 +2117,10 @@ static bool __prep_compound_gigantic_folio(struct folio *folio,
set_compound_head(p, &folio->page);
}
__folio_set_head(folio);
- /* we rely on prep_new_hugetlb_folio to set the destructor */
+ /* we rely on prep_new_hugetlb_folio to set the hugetlb flag */
folio_set_order(folio, order);
atomic_set(&folio->_entire_mapcount, -1);
- atomic_set(&folio->_nr_pages_mapped, 0);
+ atomic_set(&folio->_large_mapcount, -1);
atomic_set(&folio->_pincount, 0);
return true;
@@ -2160,31 +2153,15 @@ static bool prep_compound_gigantic_folio_for_demote(struct folio *folio,
}
/*
- * PageHuge() only returns true for hugetlbfs pages, but not for normal or
- * transparent huge pages. See the PageTransHuge() documentation for more
- * details.
- */
-int PageHuge(const struct page *page)
-{
- const struct folio *folio;
-
- if (!PageCompound(page))
- return 0;
- folio = page_folio(page);
- return folio_test_hugetlb(folio);
-}
-EXPORT_SYMBOL_GPL(PageHuge);
-
-/*
* Find and lock address space (mapping) in write mode.
*
- * Upon entry, the page is locked which means that page_mapping() is
+ * Upon entry, the folio is locked which means that folio_mapping() is
* stable. Due to locking order, we can only trylock_write. If we can
* not get the lock, simply return NULL to caller.
*/
-struct address_space *hugetlb_page_mapping_lock_write(struct page *hpage)
+struct address_space *hugetlb_folio_mapping_lock_write(struct folio *folio)
{
- struct address_space *mapping = page_mapping(hpage);
+ struct address_space *mapping = folio_mapping(folio);
if (!mapping)
return mapping;
@@ -2200,13 +2177,13 @@ static struct folio *alloc_buddy_hugetlb_folio(struct hstate *h,
nodemask_t *node_alloc_noretry)
{
int order = huge_page_order(h);
- struct page *page;
+ struct folio *folio;
bool alloc_try_hard = true;
bool retry = true;
/*
- * By default we always try hard to allocate the page with
- * __GFP_RETRY_MAYFAIL flag. However, if we are allocating pages in
+ * By default we always try hard to allocate the folio with
+ * __GFP_RETRY_MAYFAIL flag. However, if we are allocating folios in
* a loop (to adjust global huge page counts) and previous allocation
* failed, do not continue to try hard on the same node. Use the
* node_alloc_noretry bitmap to manage this state information.
@@ -2219,43 +2196,42 @@ static struct folio *alloc_buddy_hugetlb_folio(struct hstate *h,
if (nid == NUMA_NO_NODE)
nid = numa_mem_id();
retry:
- page = __alloc_pages(gfp_mask, order, nid, nmask);
+ folio = __folio_alloc(gfp_mask, order, nid, nmask);
- /* Freeze head page */
- if (page && !page_ref_freeze(page, 1)) {
- __free_pages(page, order);
+ if (folio && !folio_ref_freeze(folio, 1)) {
+ folio_put(folio);
if (retry) { /* retry once */
retry = false;
goto retry;
}
/* WOW! twice in a row. */
- pr_warn("HugeTLB head page unexpected inflated ref count\n");
- page = NULL;
+ pr_warn("HugeTLB unexpected inflated folio ref count\n");
+ folio = NULL;
}
/*
- * If we did not specify __GFP_RETRY_MAYFAIL, but still got a page this
- * indicates an overall state change. Clear bit so that we resume
- * normal 'try hard' allocations.
+ * If we did not specify __GFP_RETRY_MAYFAIL, but still got a
+ * folio this indicates an overall state change. Clear bit so
+ * that we resume normal 'try hard' allocations.
*/
- if (node_alloc_noretry && page && !alloc_try_hard)
+ if (node_alloc_noretry && folio && !alloc_try_hard)
node_clear(nid, *node_alloc_noretry);
/*
- * If we tried hard to get a page but failed, set bit so that
+ * If we tried hard to get a folio but failed, set bit so that
* subsequent attempts will not try as hard until there is an
* overall state change.
*/
- if (node_alloc_noretry && !page && alloc_try_hard)
+ if (node_alloc_noretry && !folio && alloc_try_hard)
node_set(nid, *node_alloc_noretry);
- if (!page) {
+ if (!folio) {
__count_vm_event(HTLB_BUDDY_PGALLOC_FAIL);
return NULL;
}
__count_vm_event(HTLB_BUDDY_PGALLOC);
- return page_folio(page);
+ return folio;
}
static struct folio *__alloc_fresh_hugetlb_folio(struct hstate *h,
@@ -2401,8 +2377,8 @@ static struct folio *remove_pool_hugetlb_folio(struct hstate *h,
}
/*
- * Dissolve a given free hugepage into free buddy pages. This function does
- * nothing for in-use hugepages and non-hugepages.
+ * Dissolve a given free hugetlb folio into free buddy pages. This function
+ * does nothing for in-use hugetlb folios and non-hugetlb folios.
* This function returns values like below:
*
* -ENOMEM: failed to allocate vmemmap pages to free the freed hugepages
@@ -2414,10 +2390,9 @@ static struct folio *remove_pool_hugetlb_folio(struct hstate *h,
* 0: successfully dissolved free hugepages or the page is not a
* hugepage (considered as already dissolved)
*/
-int dissolve_free_huge_page(struct page *page)
+int dissolve_free_hugetlb_folio(struct folio *folio)
{
int rc = -EBUSY;
- struct folio *folio = page_folio(page);
retry:
/* Not to disrupt normal path by vainly holding hugetlb_lock */
@@ -2494,13 +2469,13 @@ out:
* make specified memory blocks removable from the system.
* Note that this will dissolve a free gigantic hugepage completely, if any
* part of it lies within the given range.
- * Also note that if dissolve_free_huge_page() returns with an error, all
- * free hugepages that were dissolved before that error are lost.
+ * Also note that if dissolve_free_hugetlb_folio() returns with an error, all
+ * free hugetlb folios that were dissolved before that error are lost.
*/
-int dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn)
+int dissolve_free_hugetlb_folios(unsigned long start_pfn, unsigned long end_pfn)
{
unsigned long pfn;
- struct page *page;
+ struct folio *folio;
int rc = 0;
unsigned int order;
struct hstate *h;
@@ -2513,8 +2488,8 @@ int dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn)
order = min(order, huge_page_order(h));
for (pfn = start_pfn; pfn < end_pfn; pfn += 1 << order) {
- page = pfn_to_page(pfn);
- rc = dissolve_free_huge_page(page);
+ folio = pfn_folio(pfn);
+ rc = dissolve_free_hugetlb_folio(folio);
if (rc)
break;
}
@@ -2621,7 +2596,7 @@ struct folio *alloc_buddy_hugetlb_folio_with_mpol(struct hstate *h,
/* folio migration callback function */
struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid,
- nodemask_t *nmask, gfp_t gfp_mask)
+ nodemask_t *nmask, gfp_t gfp_mask, bool allow_alloc_fallback)
{
spin_lock_irq(&hugetlb_lock);
if (available_huge_pages(h)) {
@@ -2636,6 +2611,10 @@ struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid,
}
spin_unlock_irq(&hugetlb_lock);
+ /* We cannot fallback to other nodes, as we could break the per-node pool. */
+ if (!allow_alloc_fallback)
+ gfp_mask |= __GFP_THISNODE;
+
return alloc_migrate_hugetlb_folio(h, gfp_mask, preferred_nid, nmask);
}
@@ -3268,9 +3247,12 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
rsv_adjust = hugepage_subpool_put_pages(spool, 1);
hugetlb_acct_memory(h, -rsv_adjust);
- if (deferred_reserve)
+ if (deferred_reserve) {
+ spin_lock_irq(&hugetlb_lock);
hugetlb_cgroup_uncharge_folio_rsvd(hstate_index(h),
pages_per_huge_page(h), folio);
+ spin_unlock_irq(&hugetlb_lock);
+ }
}
if (!memcg_charge_ret)
@@ -5045,7 +5027,6 @@ static struct ctl_table hugetlb_table[] = {
.mode = 0644,
.proc_handler = hugetlb_overcommit_handler,
},
- { }
};
static void hugetlb_sysctl_init(void)
@@ -5936,19 +5917,18 @@ static void unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
* cannot race with other handlers or page migration.
* Keep the pte_same checks anyway to make transition from the mutex easier.
*/
-static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma,
- unsigned long address, pte_t *ptep, unsigned int flags,
- struct folio *pagecache_folio, spinlock_t *ptl,
+static vm_fault_t hugetlb_wp(struct folio *pagecache_folio,
struct vm_fault *vmf)
{
- const bool unshare = flags & FAULT_FLAG_UNSHARE;
- pte_t pte = huge_ptep_get(ptep);
+ struct vm_area_struct *vma = vmf->vma;
+ struct mm_struct *mm = vma->vm_mm;
+ const bool unshare = vmf->flags & FAULT_FLAG_UNSHARE;
+ pte_t pte = huge_ptep_get(vmf->pte);
struct hstate *h = hstate_vma(vma);
struct folio *old_folio;
struct folio *new_folio;
int outside_reserve = 0;
vm_fault_t ret = 0;
- unsigned long haddr = address & huge_page_mask(h);
struct mmu_notifier_range range;
/*
@@ -5971,7 +5951,7 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma,
/* Let's take out MAP_SHARED mappings first. */
if (vma->vm_flags & VM_MAYSHARE) {
- set_huge_ptep_writable(vma, haddr, ptep);
+ set_huge_ptep_writable(vma, vmf->address, vmf->pte);
return 0;
}
@@ -5990,7 +5970,7 @@ retry_avoidcopy:
SetPageAnonExclusive(&old_folio->page);
}
if (likely(!unshare))
- set_huge_ptep_writable(vma, haddr, ptep);
+ set_huge_ptep_writable(vma, vmf->address, vmf->pte);
delayacct_wpcopy_end();
return 0;
@@ -6017,8 +5997,8 @@ retry_avoidcopy:
* Drop page table lock as buddy allocator may be called. It will
* be acquired again before returning to the caller, as expected.
*/
- spin_unlock(ptl);
- new_folio = alloc_hugetlb_folio(vma, haddr, outside_reserve);
+ spin_unlock(vmf->ptl);
+ new_folio = alloc_hugetlb_folio(vma, vmf->address, outside_reserve);
if (IS_ERR(new_folio)) {
/*
@@ -6043,19 +6023,21 @@ retry_avoidcopy:
*
* Reacquire both after unmap operation.
*/
- idx = vma_hugecache_offset(h, vma, haddr);
+ idx = vma_hugecache_offset(h, vma, vmf->address);
hash = hugetlb_fault_mutex_hash(mapping, idx);
hugetlb_vma_unlock_read(vma);
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
- unmap_ref_private(mm, vma, &old_folio->page, haddr);
+ unmap_ref_private(mm, vma, &old_folio->page,
+ vmf->address);
mutex_lock(&hugetlb_fault_mutex_table[hash]);
hugetlb_vma_lock_read(vma);
- spin_lock(ptl);
- ptep = hugetlb_walk(vma, haddr, huge_page_size(h));
- if (likely(ptep &&
- pte_same(huge_ptep_get(ptep), pte)))
+ spin_lock(vmf->ptl);
+ vmf->pte = hugetlb_walk(vma, vmf->address,
+ huge_page_size(h));
+ if (likely(vmf->pte &&
+ pte_same(huge_ptep_get(vmf->pte), pte)))
goto retry_avoidcopy;
/*
* race occurs while re-acquiring page table
@@ -6077,37 +6059,38 @@ retry_avoidcopy:
if (unlikely(ret))
goto out_release_all;
- if (copy_user_large_folio(new_folio, old_folio, address, vma)) {
+ if (copy_user_large_folio(new_folio, old_folio, vmf->real_address, vma)) {
ret = VM_FAULT_HWPOISON_LARGE;
goto out_release_all;
}
__folio_mark_uptodate(new_folio);
- mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, mm, haddr,
- haddr + huge_page_size(h));
+ mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, mm, vmf->address,
+ vmf->address + huge_page_size(h));
mmu_notifier_invalidate_range_start(&range);
/*
* Retake the page table lock to check for racing updates
* before the page tables are altered
*/
- spin_lock(ptl);
- ptep = hugetlb_walk(vma, haddr, huge_page_size(h));
- if (likely(ptep && pte_same(huge_ptep_get(ptep), pte))) {
+ spin_lock(vmf->ptl);
+ vmf->pte = hugetlb_walk(vma, vmf->address, huge_page_size(h));
+ if (likely(vmf->pte && pte_same(huge_ptep_get(vmf->pte), pte))) {
pte_t newpte = make_huge_pte(vma, &new_folio->page, !unshare);
/* Break COW or unshare */
- huge_ptep_clear_flush(vma, haddr, ptep);
+ huge_ptep_clear_flush(vma, vmf->address, vmf->pte);
hugetlb_remove_rmap(old_folio);
- hugetlb_add_new_anon_rmap(new_folio, vma, haddr);
+ hugetlb_add_new_anon_rmap(new_folio, vma, vmf->address);
if (huge_pte_uffd_wp(pte))
newpte = huge_pte_mkuffd_wp(newpte);
- set_huge_pte_at(mm, haddr, ptep, newpte, huge_page_size(h));
+ set_huge_pte_at(mm, vmf->address, vmf->pte, newpte,
+ huge_page_size(h));
folio_set_hugetlb_migratable(new_folio);
/* Make the old page be freed below */
new_folio = old_folio;
}
- spin_unlock(ptl);
+ spin_unlock(vmf->ptl);
mmu_notifier_invalidate_range_end(&range);
out_release_all:
/*
@@ -6115,12 +6098,12 @@ out_release_all:
* unshare)
*/
if (new_folio != old_folio)
- restore_reserve_on_error(h, vma, haddr, new_folio);
+ restore_reserve_on_error(h, vma, vmf->address, new_folio);
folio_put(new_folio);
out_release_old:
folio_put(old_folio);
- spin_lock(ptl); /* Caller expects lock to be held */
+ spin_lock(vmf->ptl); /* Caller expects lock to be held */
delayacct_wpcopy_end();
return ret;
@@ -6129,8 +6112,8 @@ out_release_old:
/*
* Return whether there is a pagecache page to back given address within VMA.
*/
-static bool hugetlbfs_pagecache_present(struct hstate *h,
- struct vm_area_struct *vma, unsigned long address)
+bool hugetlbfs_pagecache_present(struct hstate *h,
+ struct vm_area_struct *vma, unsigned long address)
{
struct address_space *mapping = vma->vm_file->f_mapping;
pgoff_t idx = linear_page_index(vma, address);
@@ -6206,23 +6189,19 @@ static bool hugetlb_pte_stable(struct hstate *h, struct mm_struct *mm,
return same;
}
-static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
- struct vm_area_struct *vma,
- struct address_space *mapping, pgoff_t idx,
- unsigned long address, pte_t *ptep,
- pte_t old_pte, unsigned int flags,
+static vm_fault_t hugetlb_no_page(struct address_space *mapping,
struct vm_fault *vmf)
{
+ struct vm_area_struct *vma = vmf->vma;
+ struct mm_struct *mm = vma->vm_mm;
struct hstate *h = hstate_vma(vma);
vm_fault_t ret = VM_FAULT_SIGBUS;
int anon_rmap = 0;
unsigned long size;
struct folio *folio;
pte_t new_pte;
- spinlock_t *ptl;
- unsigned long haddr = address & huge_page_mask(h);
bool new_folio, new_pagecache_folio = false;
- u32 hash = hugetlb_fault_mutex_hash(mapping, idx);
+ u32 hash = hugetlb_fault_mutex_hash(mapping, vmf->pgoff);
/*
* Currently, we are forced to kill the process in the event the
@@ -6241,10 +6220,10 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
* before we get page_table_lock.
*/
new_folio = false;
- folio = filemap_lock_hugetlb_folio(h, mapping, idx);
+ folio = filemap_lock_hugetlb_folio(h, mapping, vmf->pgoff);
if (IS_ERR(folio)) {
size = i_size_read(mapping->host) >> huge_page_shift(h);
- if (idx >= size)
+ if (vmf->pgoff >= size)
goto out;
/* Check for page in userfault range */
if (userfaultfd_missing(vma)) {
@@ -6265,7 +6244,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
* never happen on the page after UFFDIO_COPY has
* correctly installed the page and returned.
*/
- if (!hugetlb_pte_stable(h, mm, ptep, old_pte)) {
+ if (!hugetlb_pte_stable(h, mm, vmf->pte, vmf->orig_pte)) {
ret = 0;
goto out;
}
@@ -6274,7 +6253,13 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
VM_UFFD_MISSING);
}
- folio = alloc_hugetlb_folio(vma, haddr, 0);
+ if (!(vma->vm_flags & VM_MAYSHARE)) {
+ ret = vmf_anon_prepare(vmf);
+ if (unlikely(ret))
+ goto out;
+ }
+
+ folio = alloc_hugetlb_folio(vma, vmf->address, 0);
if (IS_ERR(folio)) {
/*
* Returning error will result in faulting task being
@@ -6288,18 +6273,20 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
* here. Before returning error, get ptl and make
* sure there really is no pte entry.
*/
- if (hugetlb_pte_stable(h, mm, ptep, old_pte))
+ if (hugetlb_pte_stable(h, mm, vmf->pte, vmf->orig_pte))
ret = vmf_error(PTR_ERR(folio));
else
ret = 0;
goto out;
}
- clear_huge_page(&folio->page, address, pages_per_huge_page(h));
+ clear_huge_page(&folio->page, vmf->real_address,
+ pages_per_huge_page(h));
__folio_mark_uptodate(folio);
new_folio = true;
if (vma->vm_flags & VM_MAYSHARE) {
- int err = hugetlb_add_to_page_cache(folio, mapping, idx);
+ int err = hugetlb_add_to_page_cache(folio, mapping,
+ vmf->pgoff);
if (err) {
/*
* err can't be -EEXIST which implies someone
@@ -6308,17 +6295,15 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
* to the page cache. So it's safe to call
* restore_reserve_on_error() here.
*/
- restore_reserve_on_error(h, vma, haddr, folio);
+ restore_reserve_on_error(h, vma, vmf->address,
+ folio);
folio_put(folio);
+ ret = VM_FAULT_SIGBUS;
goto out;
}
new_pagecache_folio = true;
} else {
folio_lock(folio);
-
- ret = vmf_anon_prepare(vmf);
- if (unlikely(ret))
- goto backout_unlocked;
anon_rmap = 1;
}
} else {
@@ -6338,7 +6323,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
folio_unlock(folio);
folio_put(folio);
/* See comment in userfaultfd_missing() block above */
- if (!hugetlb_pte_stable(h, mm, ptep, old_pte)) {
+ if (!hugetlb_pte_stable(h, mm, vmf->pte, vmf->orig_pte)) {
ret = 0;
goto out;
}
@@ -6353,23 +6338,23 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
* any allocations necessary to record that reservation occur outside
* the spinlock.
*/
- if ((flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) {
- if (vma_needs_reservation(h, vma, haddr) < 0) {
+ if ((vmf->flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) {
+ if (vma_needs_reservation(h, vma, vmf->address) < 0) {
ret = VM_FAULT_OOM;
goto backout_unlocked;
}
/* Just decrements count, does not deallocate */
- vma_end_reservation(h, vma, haddr);
+ vma_end_reservation(h, vma, vmf->address);
}
- ptl = huge_pte_lock(h, mm, ptep);
+ vmf->ptl = huge_pte_lock(h, mm, vmf->pte);
ret = 0;
/* If pte changed from under us, retry */
- if (!pte_same(huge_ptep_get(ptep), old_pte))
+ if (!pte_same(huge_ptep_get(vmf->pte), vmf->orig_pte))
goto backout;
if (anon_rmap)
- hugetlb_add_new_anon_rmap(folio, vma, haddr);
+ hugetlb_add_new_anon_rmap(folio, vma, vmf->address);
else
hugetlb_add_file_rmap(folio);
new_pte = make_huge_pte(vma, &folio->page, ((vma->vm_flags & VM_WRITE)
@@ -6378,17 +6363,17 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
* If this pte was previously wr-protected, keep it wr-protected even
* if populated.
*/
- if (unlikely(pte_marker_uffd_wp(old_pte)))
+ if (unlikely(pte_marker_uffd_wp(vmf->orig_pte)))
new_pte = huge_pte_mkuffd_wp(new_pte);
- set_huge_pte_at(mm, haddr, ptep, new_pte, huge_page_size(h));
+ set_huge_pte_at(mm, vmf->address, vmf->pte, new_pte, huge_page_size(h));
hugetlb_count_add(pages_per_huge_page(h), mm);
- if ((flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) {
+ if ((vmf->flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) {
/* Optimization, do the COW without a second fault */
- ret = hugetlb_wp(mm, vma, address, ptep, flags, folio, ptl, vmf);
+ ret = hugetlb_wp(folio, vmf);
}
- spin_unlock(ptl);
+ spin_unlock(vmf->ptl);
/*
* Only set hugetlb_migratable in newly allocated pages. Existing pages
@@ -6405,10 +6390,10 @@ out:
return ret;
backout:
- spin_unlock(ptl);
+ spin_unlock(vmf->ptl);
backout_unlocked:
if (new_folio && !new_pagecache_folio)
- restore_reserve_on_error(h, vma, haddr, folio);
+ restore_reserve_on_error(h, vma, vmf->address, folio);
folio_unlock(folio);
folio_put(folio);
@@ -6442,8 +6427,6 @@ u32 hugetlb_fault_mutex_hash(struct address_space *mapping, pgoff_t idx)
vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, unsigned int flags)
{
- pte_t *ptep, entry;
- spinlock_t *ptl;
vm_fault_t ret;
u32 hash;
struct folio *folio = NULL;
@@ -6451,13 +6434,13 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
struct hstate *h = hstate_vma(vma);
struct address_space *mapping;
int need_wait_lock = 0;
- unsigned long haddr = address & huge_page_mask(h);
struct vm_fault vmf = {
.vma = vma,
- .address = haddr,
+ .address = address & huge_page_mask(h),
.real_address = address,
.flags = flags,
- .pgoff = vma_hugecache_offset(h, vma, haddr),
+ .pgoff = vma_hugecache_offset(h, vma,
+ address & huge_page_mask(h)),
/* TODO: Track hugetlb faults using vm_fault */
/*
@@ -6477,22 +6460,22 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
/*
* Acquire vma lock before calling huge_pte_alloc and hold
- * until finished with ptep. This prevents huge_pmd_unshare from
- * being called elsewhere and making the ptep no longer valid.
+ * until finished with vmf.pte. This prevents huge_pmd_unshare from
+ * being called elsewhere and making the vmf.pte no longer valid.
*/
hugetlb_vma_lock_read(vma);
- ptep = huge_pte_alloc(mm, vma, haddr, huge_page_size(h));
- if (!ptep) {
+ vmf.pte = huge_pte_alloc(mm, vma, vmf.address, huge_page_size(h));
+ if (!vmf.pte) {
hugetlb_vma_unlock_read(vma);
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
return VM_FAULT_OOM;
}
- entry = huge_ptep_get(ptep);
- if (huge_pte_none_mostly(entry)) {
- if (is_pte_marker(entry)) {
+ vmf.orig_pte = huge_ptep_get(vmf.pte);
+ if (huge_pte_none_mostly(vmf.orig_pte)) {
+ if (is_pte_marker(vmf.orig_pte)) {
pte_marker marker =
- pte_marker_get(pte_to_swp_entry(entry));
+ pte_marker_get(pte_to_swp_entry(vmf.orig_pte));
if (marker & PTE_MARKER_POISONED) {
ret = VM_FAULT_HWPOISON_LARGE;
@@ -6506,21 +6489,20 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
* hugetlb_no_page will drop vma lock and hugetlb fault
* mutex internally, which make us return immediately.
*/
- return hugetlb_no_page(mm, vma, mapping, vmf.pgoff, address,
- ptep, entry, flags, &vmf);
+ return hugetlb_no_page(mapping, &vmf);
}
ret = 0;
/*
- * entry could be a migration/hwpoison entry at this point, so this
- * check prevents the kernel from going below assuming that we have
- * an active hugepage in pagecache. This goto expects the 2nd page
- * fault, and is_hugetlb_entry_(migration|hwpoisoned) check will
- * properly handle it.
+ * vmf.orig_pte could be a migration/hwpoison vmf.orig_pte at this
+ * point, so this check prevents the kernel from going below assuming
+ * that we have an active hugepage in pagecache. This goto expects
+ * the 2nd page fault, and is_hugetlb_entry_(migration|hwpoisoned)
+ * check will properly handle it.
*/
- if (!pte_present(entry)) {
- if (unlikely(is_hugetlb_entry_migration(entry))) {
+ if (!pte_present(vmf.orig_pte)) {
+ if (unlikely(is_hugetlb_entry_migration(vmf.orig_pte))) {
/*
* Release the hugetlb fault lock now, but retain
* the vma lock, because it is needed to guard the
@@ -6529,9 +6511,9 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
* be released there.
*/
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
- migration_entry_wait_huge(vma, ptep);
+ migration_entry_wait_huge(vma, vmf.pte);
return 0;
- } else if (unlikely(is_hugetlb_entry_hwpoisoned(entry)))
+ } else if (unlikely(is_hugetlb_entry_hwpoisoned(vmf.orig_pte)))
ret = VM_FAULT_HWPOISON_LARGE |
VM_FAULT_SET_HINDEX(hstate_index(h));
goto out_mutex;
@@ -6545,13 +6527,13 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
* determine if a reservation has been consumed.
*/
if ((flags & (FAULT_FLAG_WRITE|FAULT_FLAG_UNSHARE)) &&
- !(vma->vm_flags & VM_MAYSHARE) && !huge_pte_write(entry)) {
- if (vma_needs_reservation(h, vma, haddr) < 0) {
+ !(vma->vm_flags & VM_MAYSHARE) && !huge_pte_write(vmf.orig_pte)) {
+ if (vma_needs_reservation(h, vma, vmf.address) < 0) {
ret = VM_FAULT_OOM;
goto out_mutex;
}
/* Just decrements count, does not deallocate */
- vma_end_reservation(h, vma, haddr);
+ vma_end_reservation(h, vma, vmf.address);
pagecache_folio = filemap_lock_hugetlb_folio(h, mapping,
vmf.pgoff);
@@ -6559,17 +6541,17 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
pagecache_folio = NULL;
}
- ptl = huge_pte_lock(h, mm, ptep);
+ vmf.ptl = huge_pte_lock(h, mm, vmf.pte);
/* Check for a racing update before calling hugetlb_wp() */
- if (unlikely(!pte_same(entry, huge_ptep_get(ptep))))
+ if (unlikely(!pte_same(vmf.orig_pte, huge_ptep_get(vmf.pte))))
goto out_ptl;
/* Handle userfault-wp first, before trying to lock more pages */
- if (userfaultfd_wp(vma) && huge_pte_uffd_wp(huge_ptep_get(ptep)) &&
- (flags & FAULT_FLAG_WRITE) && !huge_pte_write(entry)) {
+ if (userfaultfd_wp(vma) && huge_pte_uffd_wp(huge_ptep_get(vmf.pte)) &&
+ (flags & FAULT_FLAG_WRITE) && !huge_pte_write(vmf.orig_pte)) {
if (!userfaultfd_wp_async(vma)) {
- spin_unlock(ptl);
+ spin_unlock(vmf.ptl);
if (pagecache_folio) {
folio_unlock(pagecache_folio);
folio_put(pagecache_folio);
@@ -6579,18 +6561,18 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
return handle_userfault(&vmf, VM_UFFD_WP);
}
- entry = huge_pte_clear_uffd_wp(entry);
- set_huge_pte_at(mm, haddr, ptep, entry,
+ vmf.orig_pte = huge_pte_clear_uffd_wp(vmf.orig_pte);
+ set_huge_pte_at(mm, vmf.address, vmf.pte, vmf.orig_pte,
huge_page_size(hstate_vma(vma)));
/* Fallthrough to CoW */
}
/*
- * hugetlb_wp() requires page locks of pte_page(entry) and
+ * hugetlb_wp() requires page locks of pte_page(vmf.orig_pte) and
* pagecache_folio, so here we need take the former one
* when folio != pagecache_folio or !pagecache_folio.
*/
- folio = page_folio(pte_page(entry));
+ folio = page_folio(pte_page(vmf.orig_pte));
if (folio != pagecache_folio)
if (!folio_trylock(folio)) {
need_wait_lock = 1;
@@ -6600,24 +6582,23 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
folio_get(folio);
if (flags & (FAULT_FLAG_WRITE|FAULT_FLAG_UNSHARE)) {
- if (!huge_pte_write(entry)) {
- ret = hugetlb_wp(mm, vma, address, ptep, flags,
- pagecache_folio, ptl, &vmf);
+ if (!huge_pte_write(vmf.orig_pte)) {
+ ret = hugetlb_wp(pagecache_folio, &vmf);
goto out_put_page;
} else if (likely(flags & FAULT_FLAG_WRITE)) {
- entry = huge_pte_mkdirty(entry);
+ vmf.orig_pte = huge_pte_mkdirty(vmf.orig_pte);
}
}
- entry = pte_mkyoung(entry);
- if (huge_ptep_set_access_flags(vma, haddr, ptep, entry,
+ vmf.orig_pte = pte_mkyoung(vmf.orig_pte);
+ if (huge_ptep_set_access_flags(vma, vmf.address, vmf.pte, vmf.orig_pte,
flags & FAULT_FLAG_WRITE))
- update_mmu_cache(vma, haddr, ptep);
+ update_mmu_cache(vma, vmf.address, vmf.pte);
out_put_page:
if (folio != pagecache_folio)
folio_unlock(folio);
folio_put(folio);
out_ptl:
- spin_unlock(ptl);
+ spin_unlock(vmf.ptl);
if (pagecache_folio) {
folio_unlock(pagecache_folio);
@@ -6653,7 +6634,13 @@ static struct folio *alloc_hugetlb_folio_vma(struct hstate *h,
gfp_mask = htlb_alloc_mask(h);
node = huge_node(vma, address, gfp_mask, &mpol, &nodemask);
- folio = alloc_hugetlb_folio_nodemask(h, node, nodemask, gfp_mask);
+ /*
+ * This is used to allocate a temporary hugetlb to hold the copied
+ * content, which will then be copied again to the final hugetlb
+ * consuming a reservation. Set the alloc_fallback to false to indicate
+ * that breaking the per-node hugetlb pool is not allowed in this case.
+ */
+ folio = alloc_hugetlb_folio_nodemask(h, node, nodemask, gfp_mask, false);
mpol_cond_put(mpol);
return folio;
@@ -6883,77 +6870,6 @@ out_release_nounlock:
}
#endif /* CONFIG_USERFAULTFD */
-struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma,
- unsigned long address, unsigned int flags,
- unsigned int *page_mask)
-{
- struct hstate *h = hstate_vma(vma);
- struct mm_struct *mm = vma->vm_mm;
- unsigned long haddr = address & huge_page_mask(h);
- struct page *page = NULL;
- spinlock_t *ptl;
- pte_t *pte, entry;
- int ret;
-
- hugetlb_vma_lock_read(vma);
- pte = hugetlb_walk(vma, haddr, huge_page_size(h));
- if (!pte)
- goto out_unlock;
-
- ptl = huge_pte_lock(h, mm, pte);
- entry = huge_ptep_get(pte);
- if (pte_present(entry)) {
- page = pte_page(entry);
-
- if (!huge_pte_write(entry)) {
- if (flags & FOLL_WRITE) {
- page = NULL;
- goto out;
- }
-
- if (gup_must_unshare(vma, flags, page)) {
- /* Tell the caller to do unsharing */
- page = ERR_PTR(-EMLINK);
- goto out;
- }
- }
-
- page = nth_page(page, ((address & ~huge_page_mask(h)) >> PAGE_SHIFT));
-
- /*
- * Note that page may be a sub-page, and with vmemmap
- * optimizations the page struct may be read only.
- * try_grab_page() will increase the ref count on the
- * head page, so this will be OK.
- *
- * try_grab_page() should always be able to get the page here,
- * because we hold the ptl lock and have verified pte_present().
- */
- ret = try_grab_page(page, flags);
-
- if (WARN_ON_ONCE(ret)) {
- page = ERR_PTR(ret);
- goto out;
- }
-
- *page_mask = (1U << huge_page_order(h)) - 1;
- }
-out:
- spin_unlock(ptl);
-out_unlock:
- hugetlb_vma_unlock_read(vma);
-
- /*
- * Fixup retval for dump requests: if pagecache doesn't exist,
- * don't try to allocate a new page but just skip it.
- */
- if (!page && (flags & FOLL_DUMP) &&
- !hugetlbfs_pagecache_present(h, vma, address))
- page = ERR_PTR(-EFAULT);
-
- return page;
-}
-
long hugetlb_change_protection(struct vm_area_struct *vma,
unsigned long address, unsigned long end,
pgprot_t newprot, unsigned long cp_flags)
@@ -7044,9 +6960,13 @@ long hugetlb_change_protection(struct vm_area_struct *vma,
if (!pte_same(pte, newpte))
set_huge_pte_at(mm, address, ptep, newpte, psize);
} else if (unlikely(is_pte_marker(pte))) {
- /* No other markers apply for now. */
- WARN_ON_ONCE(!pte_marker_uffd_wp(pte));
- if (uffd_wp_resolve)
+ /*
+ * Do nothing on a poison marker; page is
+ * corrupted, permissons do not apply. Here
+ * pte_marker_uffd_wp()==true implies !poison
+ * because they're mutual exclusive.
+ */
+ if (pte_marker_uffd_wp(pte) && uffd_wp_resolve)
/* Safe to modify directly (non-present->none). */
huge_pte_clear(mm, address, ptep, psize);
} else if (!huge_pte_none(pte)) {
@@ -7873,9 +7793,9 @@ void __init hugetlb_cma_reserve(int order)
* huge page demotion.
*/
res = cma_declare_contiguous_nid(0, size, 0,
- PAGE_SIZE << HUGETLB_PAGE_ORDER,
- 0, false, name,
- &hugetlb_cma[nid], nid);
+ PAGE_SIZE << HUGETLB_PAGE_ORDER,
+ HUGETLB_PAGE_ORDER, false, name,
+ &hugetlb_cma[nid], nid);
if (res) {
pr_warn("hugetlb_cma: reservation failed: err %d, node %d",
res, nid);
diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c
index aa4486bd390493..e20339a346b935 100644
--- a/mm/hugetlb_cgroup.c
+++ b/mm/hugetlb_cgroup.c
@@ -308,7 +308,7 @@ static void __hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages,
{
if (hugetlb_cgroup_disabled() || !h_cg)
return;
-
+ lockdep_assert_held(&hugetlb_lock);
__set_hugetlb_cgroup(folio, h_cg, rsvd);
if (!rsvd) {
unsigned long usage =
diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c
index da177e49d95648..b9a55322e52ce9 100644
--- a/mm/hugetlb_vmemmap.c
+++ b/mm/hugetlb_vmemmap.c
@@ -679,7 +679,6 @@ static struct ctl_table hugetlb_vmemmap_sysctls[] = {
.mode = 0644,
.proc_handler = proc_dobool,
},
- { }
};
static int __init hugetlb_vmemmap_init(void)
diff --git a/mm/hwpoison-inject.c b/mm/hwpoison-inject.c
index d0548e382b6ba2..c9d653f51e45bb 100644
--- a/mm/hwpoison-inject.c
+++ b/mm/hwpoison-inject.c
@@ -15,7 +15,7 @@ static int hwpoison_inject(void *data, u64 val)
{
unsigned long pfn = val;
struct page *p;
- struct page *hpage;
+ struct folio *folio;
int err;
if (!capable(CAP_SYS_ADMIN))
@@ -25,16 +25,17 @@ static int hwpoison_inject(void *data, u64 val)
return -ENXIO;
p = pfn_to_page(pfn);
- hpage = compound_head(p);
+ folio = page_folio(p);
if (!hwpoison_filter_enable)
goto inject;
- shake_page(hpage);
+ shake_folio(folio);
/*
* This implies unable to support non-LRU pages except free page.
*/
- if (!PageLRU(hpage) && !PageHuge(p) && !is_free_buddy_page(p))
+ if (!folio_test_lru(folio) && !folio_test_hugetlb(folio) &&
+ !is_free_buddy_page(p))
return 0;
/*
@@ -42,7 +43,7 @@ static int hwpoison_inject(void *data, u64 val)
* the targeted owner (or on a free page).
* memory_failure() will redo the check reliably inside page lock.
*/
- err = hwpoison_filter(hpage);
+ err = hwpoison_filter(&folio->page);
if (err)
return 0;
diff --git a/mm/internal.h b/mm/internal.h
index 7e486f2c502cee..c5552d35d99551 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -11,6 +11,8 @@
#include <linux/mm.h>
#include <linux/pagemap.h>
#include <linux/rmap.h>
+#include <linux/swap.h>
+#include <linux/swapops.h>
#include <linux/tracepoint-defs.h>
struct folio_batch;
@@ -70,13 +72,30 @@ void page_writeback_init(void);
/*
* How many individual pages have an elevated _mapcount. Excludes
* the folio's entire_mapcount.
+ *
+ * Don't use this function outside of debugging code.
*/
-static inline int folio_nr_pages_mapped(struct folio *folio)
+static inline int folio_nr_pages_mapped(const struct folio *folio)
{
return atomic_read(&folio->_nr_pages_mapped) & FOLIO_PAGES_MAPPED;
}
-static inline void *folio_raw_mapping(struct folio *folio)
+/*
+ * Retrieve the first entry of a folio based on a provided entry within the
+ * folio. We cannot rely on folio->swap as there is no guarantee that it has
+ * been initialized. Used for calling arch_swap_restore()
+ */
+static inline swp_entry_t folio_swap(swp_entry_t entry,
+ const struct folio *folio)
+{
+ swp_entry_t swap = {
+ .val = ALIGN_DOWN(entry.val, folio_nr_pages(folio)),
+ };
+
+ return swap;
+}
+
+static inline void *folio_raw_mapping(const struct folio *folio)
{
unsigned long mapping = (unsigned long)folio->mapping;
@@ -113,6 +132,10 @@ static inline pte_t __pte_batch_clear_ignored(pte_t pte, fpb_t flags)
* @flags: Flags to modify the PTE batch semantics.
* @any_writable: Optional pointer to indicate whether any entry except the
* first one is writable.
+ * @any_young: Optional pointer to indicate whether any entry except the
+ * first one is young.
+ * @any_dirty: Optional pointer to indicate whether any entry except the
+ * first one is dirty.
*
* Detect a PTE batch: consecutive (present) PTEs that map consecutive
* pages of the same large folio.
@@ -128,16 +151,20 @@ static inline pte_t __pte_batch_clear_ignored(pte_t pte, fpb_t flags)
*/
static inline int folio_pte_batch(struct folio *folio, unsigned long addr,
pte_t *start_ptep, pte_t pte, int max_nr, fpb_t flags,
- bool *any_writable)
+ bool *any_writable, bool *any_young, bool *any_dirty)
{
unsigned long folio_end_pfn = folio_pfn(folio) + folio_nr_pages(folio);
const pte_t *end_ptep = start_ptep + max_nr;
pte_t expected_pte, *ptep;
- bool writable;
+ bool writable, young, dirty;
int nr;
if (any_writable)
*any_writable = false;
+ if (any_young)
+ *any_young = false;
+ if (any_dirty)
+ *any_dirty = false;
VM_WARN_ON_FOLIO(!pte_present(pte), folio);
VM_WARN_ON_FOLIO(!folio_test_large(folio) || max_nr < 1, folio);
@@ -151,6 +178,10 @@ static inline int folio_pte_batch(struct folio *folio, unsigned long addr,
pte = ptep_get(ptep);
if (any_writable)
writable = !!pte_write(pte);
+ if (any_young)
+ young = !!pte_young(pte);
+ if (any_dirty)
+ dirty = !!pte_dirty(pte);
pte = __pte_batch_clear_ignored(pte, flags);
if (!pte_same(pte, expected_pte))
@@ -166,6 +197,10 @@ static inline int folio_pte_batch(struct folio *folio, unsigned long addr,
if (any_writable)
*any_writable |= writable;
+ if (any_young)
+ *any_young |= young;
+ if (any_dirty)
+ *any_dirty |= dirty;
nr = pte_batch_hint(ptep, pte);
expected_pte = pte_advance_pfn(expected_pte, nr);
@@ -174,6 +209,68 @@ static inline int folio_pte_batch(struct folio *folio, unsigned long addr,
return min(ptep - start_ptep, max_nr);
}
+
+/**
+ * pte_next_swp_offset - Increment the swap entry offset field of a swap pte.
+ * @pte: The initial pte state; is_swap_pte(pte) must be true and
+ * non_swap_entry() must be false.
+ *
+ * Increments the swap offset, while maintaining all other fields, including
+ * swap type, and any swp pte bits. The resulting pte is returned.
+ */
+static inline pte_t pte_next_swp_offset(pte_t pte)
+{
+ swp_entry_t entry = pte_to_swp_entry(pte);
+ pte_t new = __swp_entry_to_pte(__swp_entry(swp_type(entry),
+ (swp_offset(entry) + 1)));
+
+ if (pte_swp_soft_dirty(pte))
+ new = pte_swp_mksoft_dirty(new);
+ if (pte_swp_exclusive(pte))
+ new = pte_swp_mkexclusive(new);
+ if (pte_swp_uffd_wp(pte))
+ new = pte_swp_mkuffd_wp(new);
+
+ return new;
+}
+
+/**
+ * swap_pte_batch - detect a PTE batch for a set of contiguous swap entries
+ * @start_ptep: Page table pointer for the first entry.
+ * @max_nr: The maximum number of table entries to consider.
+ * @pte: Page table entry for the first entry.
+ *
+ * Detect a batch of contiguous swap entries: consecutive (non-present) PTEs
+ * containing swap entries all with consecutive offsets and targeting the same
+ * swap type, all with matching swp pte bits.
+ *
+ * max_nr must be at least one and must be limited by the caller so scanning
+ * cannot exceed a single page table.
+ *
+ * Return: the number of table entries in the batch.
+ */
+static inline int swap_pte_batch(pte_t *start_ptep, int max_nr, pte_t pte)
+{
+ pte_t expected_pte = pte_next_swp_offset(pte);
+ const pte_t *end_ptep = start_ptep + max_nr;
+ pte_t *ptep = start_ptep + 1;
+
+ VM_WARN_ON(max_nr < 1);
+ VM_WARN_ON(!is_swap_pte(pte));
+ VM_WARN_ON(non_swap_entry(pte_to_swp_entry(pte)));
+
+ while (ptep < end_ptep) {
+ pte = ptep_get(ptep);
+
+ if (!pte_same(pte, expected_pte))
+ break;
+
+ expected_pte = pte_next_swp_offset(expected_pte);
+ ptep++;
+ }
+
+ return ptep - start_ptep;
+}
#endif /* CONFIG_MMU */
void __acct_reclaim_writeback(pg_data_t *pgdat, struct folio *folio,
@@ -491,6 +588,7 @@ extern void __putback_isolated_page(struct page *page, unsigned int order,
extern void memblock_free_pages(struct page *page, unsigned long pfn,
unsigned int order);
extern void __free_pages_core(struct page *page, unsigned int order);
+extern void kernel_init_pages(struct page *page, int numpages);
/*
* This will have no effect, other than possibly generating a warning, if the
@@ -513,7 +611,8 @@ static inline struct folio *page_rmappable_folio(struct page *page)
{
struct folio *folio = (struct folio *)page;
- folio_prep_large_rmappable(folio);
+ if (folio && folio_test_large(folio))
+ folio_set_large_rmappable(folio);
return folio;
}
@@ -522,9 +621,12 @@ static inline void prep_compound_head(struct page *page, unsigned int order)
struct folio *folio = (struct folio *)page;
folio_set_order(folio, order);
+ atomic_set(&folio->_large_mapcount, -1);
atomic_set(&folio->_entire_mapcount, -1);
atomic_set(&folio->_nr_pages_mapped, 0);
atomic_set(&folio->_pincount, 0);
+ if (order > 1)
+ INIT_LIST_HEAD(&folio->_deferred_list);
}
static inline void prep_compound_tail(struct page *head, int tail_idx)
@@ -559,10 +661,6 @@ extern void *memmap_alloc(phys_addr_t size, phys_addr_t align,
void memmap_init_range(unsigned long, int, unsigned long, unsigned long,
unsigned long, enum meminit_context, struct vmem_altmap *, int);
-
-int split_free_page(struct page *free_page,
- unsigned int order, unsigned long split_pfn_offset);
-
#if defined CONFIG_COMPACTION || defined CONFIG_CMA
/*
@@ -686,9 +784,8 @@ struct anon_vma *folio_anon_vma(struct folio *folio);
void unmap_mapping_folio(struct folio *folio);
extern long populate_vma_page_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end, int *locked);
-extern long faultin_vma_page_range(struct vm_area_struct *vma,
- unsigned long start, unsigned long end,
- bool write, int *locked);
+extern long faultin_page_range(struct mm_struct *mm, unsigned long start,
+ unsigned long end, bool write, int *locked);
extern bool mlock_future_ok(struct mm_struct *mm, unsigned long flags,
unsigned long bytes);
@@ -790,13 +887,17 @@ void mlock_drain_remote(int cpu);
extern pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma);
-/*
- * Return the start of user virtual address at the specific offset within
- * a vma.
+/**
+ * vma_address - Find the virtual address a page range is mapped at
+ * @vma: The vma which maps this object.
+ * @pgoff: The page offset within its object.
+ * @nr_pages: The number of pages to consider.
+ *
+ * If any page in this range is mapped by this VMA, return the first address
+ * where any of these pages appear. Otherwise, return -EFAULT.
*/
-static inline unsigned long
-vma_pgoff_address(pgoff_t pgoff, unsigned long nr_pages,
- struct vm_area_struct *vma)
+static inline unsigned long vma_address(struct vm_area_struct *vma,
+ pgoff_t pgoff, unsigned long nr_pages)
{
unsigned long address;
@@ -816,18 +917,6 @@ vma_pgoff_address(pgoff_t pgoff, unsigned long nr_pages,
}
/*
- * Return the start of user virtual address of a page within a vma.
- * Returns -EFAULT if all of the page is outside the range of vma.
- * If page is a compound head, the entire compound page is considered.
- */
-static inline unsigned long
-vma_address(struct page *page, struct vm_area_struct *vma)
-{
- VM_BUG_ON_PAGE(PageKsm(page), page); /* KSM page->index unusable */
- return vma_pgoff_address(page_to_pgoff(page), compound_nr(page), vma);
-}
-
-/*
* Then at what user virtual address will none of the range be found in vma?
* Assumes that vma_address() already returned a good starting address.
*/
@@ -948,6 +1037,7 @@ static inline int find_next_best_node(int node, nodemask_t *used_node_mask)
/*
* mm/memory-failure.c
*/
+void shake_folio(struct folio *folio);
extern int hwpoison_filter(struct page *p);
extern u32 hwpoison_filter_dev_major;
@@ -1041,17 +1131,13 @@ static inline bool is_migrate_highatomic(enum migratetype migratetype)
return migratetype == MIGRATE_HIGHATOMIC;
}
-static inline bool is_migrate_highatomic_page(struct page *page)
-{
- return get_pageblock_migratetype(page) == MIGRATE_HIGHATOMIC;
-}
-
void setup_zone_pageset(struct zone *zone);
struct migration_target_control {
int nid; /* preferred node id */
nodemask_t *nmask;
gfp_t gfp_mask;
+ enum migrate_reason reason;
};
/*
@@ -1088,10 +1174,10 @@ void vunmap_range_noflush(unsigned long start, unsigned long end);
void __vunmap_range_noflush(unsigned long start, unsigned long end);
-int numa_migrate_prep(struct folio *folio, struct vm_area_struct *vma,
+int numa_migrate_prep(struct folio *folio, struct vm_fault *vmf,
unsigned long addr, int page_nid, int *flags);
-void free_zone_device_page(struct page *page);
+void free_zone_device_folio(struct folio *folio);
int migrate_device_coherent_page(struct page *page);
/*
@@ -1103,9 +1189,10 @@ int __must_check try_grab_page(struct page *page, unsigned int flags);
/*
* mm/huge_memory.c
*/
-struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
- unsigned long addr, pmd_t *pmd,
- unsigned int flags);
+void touch_pud(struct vm_area_struct *vma, unsigned long addr,
+ pud_t *pud, bool write);
+void touch_pmd(struct vm_area_struct *vma, unsigned long addr,
+ pmd_t *pmd, bool write);
/*
* mm/mmap.c
@@ -1127,10 +1214,13 @@ enum {
FOLL_FAST_ONLY = 1 << 20,
/* allow unlocking the mmap lock */
FOLL_UNLOCKABLE = 1 << 21,
+ /* VMA lookup+checks compatible with MADV_POPULATE_(READ|WRITE) */
+ FOLL_MADV_POPULATE = 1 << 22,
};
#define INTERNAL_GUP_FLAGS (FOLL_TOUCH | FOLL_TRIED | FOLL_REMOTE | FOLL_PIN | \
- FOLL_FAST_ONLY | FOLL_UNLOCKABLE)
+ FOLL_FAST_ONLY | FOLL_UNLOCKABLE | \
+ FOLL_MADV_POPULATE)
/*
* Indicates for which pages that are write-protected in the page table,
@@ -1187,20 +1277,10 @@ static inline bool gup_must_unshare(struct vm_area_struct *vma,
}
/* Paired with a memory barrier in folio_try_share_anon_rmap_*(). */
- if (IS_ENABLED(CONFIG_HAVE_FAST_GUP))
+ if (IS_ENABLED(CONFIG_HAVE_GUP_FAST))
smp_rmb();
/*
- * During GUP-fast we might not get called on the head page for a
- * hugetlb page that is mapped using cont-PTE, because GUP-fast does
- * not work with the abstracted hugetlb PTEs that always point at the
- * head page. For hugetlb, PageAnonExclusive only applies on the head
- * page (as it cannot be partially COW-shared), so lookup the head page.
- */
- if (unlikely(!PageHead(page) && PageHuge(page)))
- page = compound_head(page);
-
- /*
* Note that PageKsm() pages cannot be exclusive, and consequently,
* cannot get pinned.
*/
@@ -1243,6 +1323,35 @@ static inline void vma_iter_config(struct vma_iterator *vmi,
__mas_set_range(&vmi->mas, index, last - 1);
}
+static inline void vma_iter_reset(struct vma_iterator *vmi)
+{
+ mas_reset(&vmi->mas);
+}
+
+static inline
+struct vm_area_struct *vma_iter_prev_range_limit(struct vma_iterator *vmi, unsigned long min)
+{
+ return mas_prev_range(&vmi->mas, min);
+}
+
+static inline
+struct vm_area_struct *vma_iter_next_range_limit(struct vma_iterator *vmi, unsigned long max)
+{
+ return mas_next_range(&vmi->mas, max);
+}
+
+static inline int vma_iter_area_lowest(struct vma_iterator *vmi, unsigned long min,
+ unsigned long max, unsigned long size)
+{
+ return mas_empty_area(&vmi->mas, min, max - 1, size);
+}
+
+static inline int vma_iter_area_highest(struct vma_iterator *vmi, unsigned long min,
+ unsigned long max, unsigned long size)
+{
+ return mas_empty_area_rev(&vmi->mas, min, max - 1, size);
+}
+
/*
* VMA Iterator functions shared between nommu and mmap
*/
@@ -1326,6 +1435,43 @@ void __meminit __init_single_page(struct page *page, unsigned long pfn,
unsigned long shrink_slab(gfp_t gfp_mask, int nid, struct mem_cgroup *memcg,
int priority);
+#ifdef CONFIG_64BIT
+/* VM is sealed, in vm_flags */
+#define VM_SEALED _BITUL(63)
+#endif
+
+#ifdef CONFIG_64BIT
+static inline int can_do_mseal(unsigned long flags)
+{
+ if (flags)
+ return -EINVAL;
+
+ return 0;
+}
+
+bool can_modify_mm(struct mm_struct *mm, unsigned long start,
+ unsigned long end);
+bool can_modify_mm_madv(struct mm_struct *mm, unsigned long start,
+ unsigned long end, int behavior);
+#else
+static inline int can_do_mseal(unsigned long flags)
+{
+ return -EPERM;
+}
+
+static inline bool can_modify_mm(struct mm_struct *mm, unsigned long start,
+ unsigned long end)
+{
+ return true;
+}
+
+static inline bool can_modify_mm_madv(struct mm_struct *mm, unsigned long start,
+ unsigned long end, int behavior)
+{
+ return true;
+}
+#endif
+
#ifdef CONFIG_SHRINKER_DEBUG
static inline __printf(2, 0) int shrinker_debugfs_name_alloc(
struct shrinker *shrinker, const char *fmt, va_list ap)
diff --git a/mm/kasan/hw_tags.c b/mm/kasan/hw_tags.c
index 2b994092a2d430..9958ebc15d3830 100644
--- a/mm/kasan/hw_tags.c
+++ b/mm/kasan/hw_tags.c
@@ -16,6 +16,7 @@
#include <linux/static_key.h>
#include <linux/string.h>
#include <linux/types.h>
+#include <linux/vmalloc.h>
#include "kasan.h"
diff --git a/mm/kfence/core.c b/mm/kfence/core.c
index 8350f5c06f2e0a..964b8482275ba0 100644
--- a/mm/kfence/core.c
+++ b/mm/kfence/core.c
@@ -595,9 +595,9 @@ static unsigned long kfence_init_pool(void)
continue;
__folio_set_slab(slab_folio(slab));
-#ifdef CONFIG_MEMCG
- slab->memcg_data = (unsigned long)&kfence_metadata_init[i / 2 - 1].objcg |
- MEMCG_DATA_OBJCGS;
+#ifdef CONFIG_MEMCG_KMEM
+ slab->obj_exts = (unsigned long)&kfence_metadata_init[i / 2 - 1].obj_exts |
+ MEMCG_DATA_OBJEXTS;
#endif
}
@@ -645,8 +645,8 @@ reset_slab:
if (!i || (i % 2))
continue;
-#ifdef CONFIG_MEMCG
- slab->memcg_data = 0;
+#ifdef CONFIG_MEMCG_KMEM
+ slab->obj_exts = 0;
#endif
__folio_clear_slab(slab_folio(slab));
}
@@ -1139,8 +1139,8 @@ void __kfence_free(void *addr)
{
struct kfence_metadata *meta = addr_to_metadata((unsigned long)addr);
-#ifdef CONFIG_MEMCG
- KFENCE_WARN_ON(meta->objcg);
+#ifdef CONFIG_MEMCG_KMEM
+ KFENCE_WARN_ON(meta->obj_exts.objcg);
#endif
/*
* If the objects of the cache are SLAB_TYPESAFE_BY_RCU, defer freeing
diff --git a/mm/kfence/kfence.h b/mm/kfence/kfence.h
index f46fbb03062b9c..084f5f36e8e76a 100644
--- a/mm/kfence/kfence.h
+++ b/mm/kfence/kfence.h
@@ -97,8 +97,8 @@ struct kfence_metadata {
struct kfence_track free_track;
/* For updating alloc_covered on frees. */
u32 alloc_stack_hash;
-#ifdef CONFIG_MEMCG
- struct obj_cgroup *objcg;
+#ifdef CONFIG_MEMCG_KMEM
+ struct slabobj_ext obj_exts;
#endif
};
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 38830174608fba..774a97e6e2da39 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -453,7 +453,7 @@ void khugepaged_enter_vma(struct vm_area_struct *vma,
{
if (!test_bit(MMF_VM_HUGEPAGE, &vma->vm_mm->flags) &&
hugepage_flags_enabled()) {
- if (thp_vma_allowable_order(vma, vm_flags, false, false, true,
+ if (thp_vma_allowable_order(vma, vm_flags, TVA_ENFORCE_SYSFS,
PMD_ORDER))
__khugepaged_enter(vma->vm_mm);
}
@@ -583,7 +583,8 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
folio = page_folio(page);
VM_BUG_ON_FOLIO(!folio_test_anon(folio), folio);
- if (page_mapcount(page) > 1) {
+ /* See hpage_collapse_scan_pmd(). */
+ if (folio_likely_mapped_shared(folio)) {
++shared;
if (cc->is_khugepaged &&
shared > khugepaged_max_ptes_shared) {
@@ -767,7 +768,7 @@ static void __collapse_huge_page_copy_failed(pte_t *pte,
* Returns SCAN_SUCCEED if copying succeeds, otherwise returns SCAN_COPY_MC.
*
* @pte: starting of the PTEs to copy from
- * @page: the new hugepage to copy contents to
+ * @folio: the new hugepage to copy contents to
* @pmd: pointer to the new hugepage's PMD
* @orig_pmd: the original raw pages' PMD
* @vma: the original raw pages' virtual memory area
@@ -775,33 +776,29 @@ static void __collapse_huge_page_copy_failed(pte_t *pte,
* @ptl: lock on raw pages' PTEs
* @compound_pagelist: list that stores compound pages
*/
-static int __collapse_huge_page_copy(pte_t *pte,
- struct page *page,
- pmd_t *pmd,
- pmd_t orig_pmd,
- struct vm_area_struct *vma,
- unsigned long address,
- spinlock_t *ptl,
- struct list_head *compound_pagelist)
+static int __collapse_huge_page_copy(pte_t *pte, struct folio *folio,
+ pmd_t *pmd, pmd_t orig_pmd, struct vm_area_struct *vma,
+ unsigned long address, spinlock_t *ptl,
+ struct list_head *compound_pagelist)
{
- struct page *src_page;
- pte_t *_pte;
- pte_t pteval;
- unsigned long _address;
+ unsigned int i;
int result = SCAN_SUCCEED;
/*
* Copying pages' contents is subject to memory poison at any iteration.
*/
- for (_pte = pte, _address = address; _pte < pte + HPAGE_PMD_NR;
- _pte++, page++, _address += PAGE_SIZE) {
- pteval = ptep_get(_pte);
+ for (i = 0; i < HPAGE_PMD_NR; i++) {
+ pte_t pteval = ptep_get(pte + i);
+ struct page *page = folio_page(folio, i);
+ unsigned long src_addr = address + i * PAGE_SIZE;
+ struct page *src_page;
+
if (pte_none(pteval) || is_zero_pfn(pte_pfn(pteval))) {
- clear_user_highpage(page, _address);
+ clear_user_highpage(page, src_addr);
continue;
}
src_page = pte_page(pteval);
- if (copy_mc_user_highpage(page, src_page, _address, vma) > 0) {
+ if (copy_mc_user_highpage(page, src_page, src_addr, vma) > 0) {
result = SCAN_COPY_MC;
break;
}
@@ -891,20 +888,6 @@ static int hpage_collapse_find_target_node(struct collapse_control *cc)
}
#endif
-static bool hpage_collapse_alloc_folio(struct folio **folio, gfp_t gfp, int node,
- nodemask_t *nmask)
-{
- *folio = __folio_alloc(gfp, HPAGE_PMD_ORDER, node, nmask);
-
- if (unlikely(!*folio)) {
- count_vm_event(THP_COLLAPSE_ALLOC_FAILED);
- return false;
- }
-
- count_vm_event(THP_COLLAPSE_ALLOC);
- return true;
-}
-
/*
* If mmap_lock temporarily dropped, revalidate vma
* before taking mmap_lock.
@@ -917,6 +900,7 @@ static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address,
struct collapse_control *cc)
{
struct vm_area_struct *vma;
+ unsigned long tva_flags = cc->is_khugepaged ? TVA_ENFORCE_SYSFS : 0;
if (unlikely(hpage_collapse_test_exit_or_disable(mm)))
return SCAN_ANY_PROCESS;
@@ -927,8 +911,7 @@ static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address,
if (!thp_vma_suitable_order(vma, address, PMD_ORDER))
return SCAN_ADDRESS_RANGE;
- if (!thp_vma_allowable_order(vma, vma->vm_flags, false, false,
- cc->is_khugepaged, PMD_ORDER))
+ if (!thp_vma_allowable_order(vma, vma->vm_flags, tva_flags, PMD_ORDER))
return SCAN_VMA_CHECK;
/*
* Anon VMA expected, the address may be unmapped then
@@ -1059,7 +1042,7 @@ out:
return result;
}
-static int alloc_charge_hpage(struct page **hpage, struct mm_struct *mm,
+static int alloc_charge_folio(struct folio **foliop, struct mm_struct *mm,
struct collapse_control *cc)
{
gfp_t gfp = (cc->is_khugepaged ? alloc_hugepage_khugepaged_gfpmask() :
@@ -1067,20 +1050,23 @@ static int alloc_charge_hpage(struct page **hpage, struct mm_struct *mm,
int node = hpage_collapse_find_target_node(cc);
struct folio *folio;
- if (!hpage_collapse_alloc_folio(&folio, gfp, node, &cc->alloc_nmask)) {
- *hpage = NULL;
+ folio = __folio_alloc(gfp, HPAGE_PMD_ORDER, node, &cc->alloc_nmask);
+ if (!folio) {
+ *foliop = NULL;
+ count_vm_event(THP_COLLAPSE_ALLOC_FAILED);
return SCAN_ALLOC_HUGE_PAGE_FAIL;
}
+ count_vm_event(THP_COLLAPSE_ALLOC);
if (unlikely(mem_cgroup_charge(folio, mm, gfp))) {
folio_put(folio);
- *hpage = NULL;
+ *foliop = NULL;
return SCAN_CGROUP_CHARGE_FAIL;
}
count_memcg_folio_events(folio, THP_COLLAPSE_ALLOC, 1);
- *hpage = folio_page(folio, 0);
+ *foliop = folio;
return SCAN_SUCCEED;
}
@@ -1093,7 +1079,6 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address,
pte_t *pte;
pgtable_t pgtable;
struct folio *folio;
- struct page *hpage;
spinlock_t *pmd_ptl, *pte_ptl;
int result = SCAN_FAIL;
struct vm_area_struct *vma;
@@ -1109,7 +1094,7 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address,
*/
mmap_read_unlock(mm);
- result = alloc_charge_hpage(&hpage, mm, cc);
+ result = alloc_charge_folio(&folio, mm, cc);
if (result != SCAN_SUCCEED)
goto out_nolock;
@@ -1169,7 +1154,7 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address,
* huge and small TLB entries for the same virtual address to
* avoid the risk of CPU bugs in that area.
*
- * Parallel fast GUP is fine since fast GUP will back off when
+ * Parallel GUP-fast is fine since GUP-fast will back off when
* it detects PMD is changed.
*/
_pmd = pmdp_collapse_flush(vma, address, pmd);
@@ -1208,14 +1193,13 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address,
*/
anon_vma_unlock_write(vma->anon_vma);
- result = __collapse_huge_page_copy(pte, hpage, pmd, _pmd,
+ result = __collapse_huge_page_copy(pte, folio, pmd, _pmd,
vma, address, pte_ptl,
&compound_pagelist);
pte_unmap(pte);
if (unlikely(result != SCAN_SUCCEED))
goto out_up_write;
- folio = page_folio(hpage);
/*
* The smp_wmb() inside __folio_mark_uptodate() ensures the
* copy_huge_page writes become visible before the set_pmd_at()
@@ -1224,7 +1208,7 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address,
__folio_mark_uptodate(folio);
pgtable = pmd_pgtable(_pmd);
- _pmd = mk_huge_pmd(hpage, vma->vm_page_prot);
+ _pmd = mk_huge_pmd(&folio->page, vma->vm_page_prot);
_pmd = maybe_pmd_mkwrite(pmd_mkdirty(_pmd), vma);
spin_lock(pmd_ptl);
@@ -1236,14 +1220,14 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address,
update_mmu_cache_pmd(vma, address, pmd);
spin_unlock(pmd_ptl);
- hpage = NULL;
+ folio = NULL;
result = SCAN_SUCCEED;
out_up_write:
mmap_write_unlock(mm);
out_nolock:
- if (hpage)
- put_page(hpage);
+ if (folio)
+ folio_put(folio);
trace_mm_collapse_huge_page(mm, result == SCAN_SUCCEED, result);
return result;
}
@@ -1334,8 +1318,20 @@ static int hpage_collapse_scan_pmd(struct mm_struct *mm,
result = SCAN_PAGE_NULL;
goto out_unmap;
}
+ folio = page_folio(page);
+
+ if (!folio_test_anon(folio)) {
+ result = SCAN_PAGE_ANON;
+ goto out_unmap;
+ }
- if (page_mapcount(page) > 1) {
+ /*
+ * We treat a single page as shared if any part of the THP
+ * is shared. "False negatives" from
+ * folio_likely_mapped_shared() are not expected to matter
+ * much in practice.
+ */
+ if (folio_likely_mapped_shared(folio)) {
++shared;
if (cc->is_khugepaged &&
shared > khugepaged_max_ptes_shared) {
@@ -1345,7 +1341,6 @@ static int hpage_collapse_scan_pmd(struct mm_struct *mm,
}
}
- folio = page_folio(page);
/*
* Record which node the original page is from and save this
* information to cc->node_load[].
@@ -1366,16 +1361,12 @@ static int hpage_collapse_scan_pmd(struct mm_struct *mm,
result = SCAN_PAGE_LOCK;
goto out_unmap;
}
- if (!folio_test_anon(folio)) {
- result = SCAN_PAGE_ANON;
- goto out_unmap;
- }
/*
* Check if the page has any GUP (or other external) pins.
*
* Here the check may be racy:
- * it may see total_mapcount > refcount in some cases?
+ * it may see folio_mapcount() > folio_ref_count().
* But such case is ephemeral we could always retry collapse
* later. However it may report false positive if the page
* has excessive GUP pins (i.e. 512). Anyway the same check
@@ -1510,8 +1501,7 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr,
* and map it by a PMD, regardless of sysfs THP settings. As such, let's
* analogously elide sysfs THP settings here.
*/
- if (!thp_vma_allowable_order(vma, vma->vm_flags, false, false, false,
- PMD_ORDER))
+ if (!thp_vma_allowable_order(vma, vma->vm_flags, 0, PMD_ORDER))
return SCAN_VMA_CHECK;
/* Keep pmd pgtable for uffd-wp; see comment in retract_page_tables() */
@@ -1797,29 +1787,26 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
struct collapse_control *cc)
{
struct address_space *mapping = file->f_mapping;
- struct page *hpage;
- struct page *page;
- struct page *tmp;
- struct folio *folio;
+ struct page *dst;
+ struct folio *folio, *tmp, *new_folio;
pgoff_t index = 0, end = start + HPAGE_PMD_NR;
LIST_HEAD(pagelist);
XA_STATE_ORDER(xas, &mapping->i_pages, start, HPAGE_PMD_ORDER);
int nr_none = 0, result = SCAN_SUCCEED;
bool is_shmem = shmem_file(file);
- int nr = 0;
VM_BUG_ON(!IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && !is_shmem);
VM_BUG_ON(start & (HPAGE_PMD_NR - 1));
- result = alloc_charge_hpage(&hpage, mm, cc);
+ result = alloc_charge_folio(&new_folio, mm, cc);
if (result != SCAN_SUCCEED)
goto out;
- __SetPageLocked(hpage);
+ __folio_set_locked(new_folio);
if (is_shmem)
- __SetPageSwapBacked(hpage);
- hpage->index = start;
- hpage->mapping = mapping;
+ __folio_set_swapbacked(new_folio);
+ new_folio->index = start;
+ new_folio->mapping = mapping;
/*
* Ensure we have slots for all the pages in the range. This is
@@ -1839,11 +1826,11 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
for (index = start; index < end; index++) {
xas_set(&xas, index);
- page = xas_load(&xas);
+ folio = xas_load(&xas);
VM_BUG_ON(index != xas.xa_index);
if (is_shmem) {
- if (!page) {
+ if (!folio) {
/*
* Stop if extent has been truncated or
* hole-punched, and is now completely
@@ -1859,7 +1846,7 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
continue;
}
- if (xa_is_value(page) || !PageUptodate(page)) {
+ if (xa_is_value(folio) || !folio_test_uptodate(folio)) {
xas_unlock_irq(&xas);
/* swap in or instantiate fallocated page */
if (shmem_get_folio(mapping->host, index,
@@ -1869,28 +1856,27 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
}
/* drain lru cache to help isolate_lru_page() */
lru_add_drain();
- page = folio_file_page(folio, index);
- } else if (trylock_page(page)) {
- get_page(page);
+ } else if (folio_trylock(folio)) {
+ folio_get(folio);
xas_unlock_irq(&xas);
} else {
result = SCAN_PAGE_LOCK;
goto xa_locked;
}
} else { /* !is_shmem */
- if (!page || xa_is_value(page)) {
+ if (!folio || xa_is_value(folio)) {
xas_unlock_irq(&xas);
page_cache_sync_readahead(mapping, &file->f_ra,
file, index,
end - index);
/* drain lru cache to help isolate_lru_page() */
lru_add_drain();
- page = find_lock_page(mapping, index);
- if (unlikely(page == NULL)) {
+ folio = filemap_lock_folio(mapping, index);
+ if (IS_ERR(folio)) {
result = SCAN_FAIL;
goto xa_unlocked;
}
- } else if (PageDirty(page)) {
+ } else if (folio_test_dirty(folio)) {
/*
* khugepaged only works on read-only fd,
* so this page is dirty because it hasn't
@@ -1908,12 +1894,12 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
filemap_flush(mapping);
result = SCAN_FAIL;
goto xa_unlocked;
- } else if (PageWriteback(page)) {
+ } else if (folio_test_writeback(folio)) {
xas_unlock_irq(&xas);
result = SCAN_FAIL;
goto xa_unlocked;
- } else if (trylock_page(page)) {
- get_page(page);
+ } else if (folio_trylock(folio)) {
+ folio_get(folio);
xas_unlock_irq(&xas);
} else {
result = SCAN_PAGE_LOCK;
@@ -1922,35 +1908,31 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
}
/*
- * The page must be locked, so we can drop the i_pages lock
+ * The folio must be locked, so we can drop the i_pages lock
* without racing with truncate.
*/
- VM_BUG_ON_PAGE(!PageLocked(page), page);
+ VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
- /* make sure the page is up to date */
- if (unlikely(!PageUptodate(page))) {
+ /* make sure the folio is up to date */
+ if (unlikely(!folio_test_uptodate(folio))) {
result = SCAN_FAIL;
goto out_unlock;
}
/*
* If file was truncated then extended, or hole-punched, before
- * we locked the first page, then a THP might be there already.
+ * we locked the first folio, then a THP might be there already.
* This will be discovered on the first iteration.
*/
- if (PageTransCompound(page)) {
- struct page *head = compound_head(page);
-
- result = compound_order(head) == HPAGE_PMD_ORDER &&
- head->index == start
+ if (folio_test_large(folio)) {
+ result = folio_order(folio) == HPAGE_PMD_ORDER &&
+ folio->index == start
/* Maybe PMD-mapped */
? SCAN_PTE_MAPPED_HUGEPAGE
: SCAN_PAGE_COMPOUND;
goto out_unlock;
}
- folio = page_folio(page);
-
if (folio_mapping(folio) != mapping) {
result = SCAN_TRUNCATED;
goto out_unlock;
@@ -1960,7 +1942,7 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
folio_test_writeback(folio))) {
/*
* khugepaged only works on read-only fd, so this
- * page is dirty because it hasn't been flushed
+ * folio is dirty because it hasn't been flushed
* since first write.
*/
result = SCAN_FAIL;
@@ -1984,33 +1966,34 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
xas_lock_irq(&xas);
- VM_BUG_ON_PAGE(page != xa_load(xas.xa, index), page);
+ VM_BUG_ON_FOLIO(folio != xa_load(xas.xa, index), folio);
/*
- * We control three references to the page:
+ * We control three references to the folio:
* - we hold a pin on it;
* - one reference from page cache;
- * - one from isolate_lru_page;
- * If those are the only references, then any new usage of the
- * page will have to fetch it from the page cache. That requires
- * locking the page to handle truncate, so any new usage will be
- * blocked until we unlock page after collapse/during rollback.
+ * - one from lru_isolate_folio;
+ * If those are the only references, then any new usage
+ * of the folio will have to fetch it from the page
+ * cache. That requires locking the folio to handle
+ * truncate, so any new usage will be blocked until we
+ * unlock folio after collapse/during rollback.
*/
- if (page_count(page) != 3) {
+ if (folio_ref_count(folio) != 3) {
result = SCAN_PAGE_COUNT;
xas_unlock_irq(&xas);
- putback_lru_page(page);
+ folio_putback_lru(folio);
goto out_unlock;
}
/*
- * Accumulate the pages that are being collapsed.
+ * Accumulate the folios that are being collapsed.
*/
- list_add_tail(&page->lru, &pagelist);
+ list_add_tail(&folio->lru, &pagelist);
continue;
out_unlock:
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
goto xa_unlocked;
}
@@ -2049,23 +2032,27 @@ xa_unlocked:
}
/*
- * The old pages are locked, so they won't change anymore.
+ * The old folios are locked, so they won't change anymore.
*/
index = start;
- list_for_each_entry(page, &pagelist, lru) {
- while (index < page->index) {
- clear_highpage(hpage + (index % HPAGE_PMD_NR));
+ dst = folio_page(new_folio, 0);
+ list_for_each_entry(folio, &pagelist, lru) {
+ while (index < folio->index) {
+ clear_highpage(dst);
index++;
+ dst++;
}
- if (copy_mc_highpage(hpage + (page->index % HPAGE_PMD_NR), page) > 0) {
+ if (copy_mc_highpage(dst, folio_page(folio, 0)) > 0) {
result = SCAN_COPY_MC;
goto rollback;
}
index++;
+ dst++;
}
while (index < end) {
- clear_highpage(hpage + (index % HPAGE_PMD_NR));
+ clear_highpage(dst);
index++;
+ dst++;
}
if (nr_none) {
@@ -2093,16 +2080,17 @@ xa_unlocked:
}
/*
- * If userspace observed a missing page in a VMA with a MODE_MISSING
- * userfaultfd, then it might expect a UFFD_EVENT_PAGEFAULT for that
- * page. If so, we need to roll back to avoid suppressing such an
- * event. Since wp/minor userfaultfds don't give userspace any
- * guarantees that the kernel doesn't fill a missing page with a zero
- * page, so they don't matter here.
+ * If userspace observed a missing page in a VMA with
+ * a MODE_MISSING userfaultfd, then it might expect a
+ * UFFD_EVENT_PAGEFAULT for that page. If so, we need to
+ * roll back to avoid suppressing such an event. Since
+ * wp/minor userfaultfds don't give userspace any
+ * guarantees that the kernel doesn't fill a missing
+ * page with a zero page, so they don't matter here.
*
- * Any userfaultfds registered after this point will not be able to
- * observe any missing pages due to the previously inserted retry
- * entries.
+ * Any userfaultfds registered after this point will
+ * not be able to observe any missing pages due to the
+ * previously inserted retry entries.
*/
vma_interval_tree_foreach(vma, &mapping->i_mmap, start, end) {
if (userfaultfd_missing(vma)) {
@@ -2127,33 +2115,32 @@ immap_locked:
xas_lock_irq(&xas);
}
- folio = page_folio(hpage);
- nr = folio_nr_pages(folio);
if (is_shmem)
- __lruvec_stat_mod_folio(folio, NR_SHMEM_THPS, nr);
+ __lruvec_stat_mod_folio(new_folio, NR_SHMEM_THPS, HPAGE_PMD_NR);
else
- __lruvec_stat_mod_folio(folio, NR_FILE_THPS, nr);
+ __lruvec_stat_mod_folio(new_folio, NR_FILE_THPS, HPAGE_PMD_NR);
if (nr_none) {
- __lruvec_stat_mod_folio(folio, NR_FILE_PAGES, nr_none);
+ __lruvec_stat_mod_folio(new_folio, NR_FILE_PAGES, nr_none);
/* nr_none is always 0 for non-shmem. */
- __lruvec_stat_mod_folio(folio, NR_SHMEM, nr_none);
+ __lruvec_stat_mod_folio(new_folio, NR_SHMEM, nr_none);
}
/*
- * Mark hpage as uptodate before inserting it into the page cache so
- * that it isn't mistaken for an fallocated but unwritten page.
+ * Mark new_folio as uptodate before inserting it into the
+ * page cache so that it isn't mistaken for an fallocated but
+ * unwritten page.
*/
- folio_mark_uptodate(folio);
- folio_ref_add(folio, HPAGE_PMD_NR - 1);
+ folio_mark_uptodate(new_folio);
+ folio_ref_add(new_folio, HPAGE_PMD_NR - 1);
if (is_shmem)
- folio_mark_dirty(folio);
- folio_add_lru(folio);
+ folio_mark_dirty(new_folio);
+ folio_add_lru(new_folio);
/* Join all the small entries into a single multi-index entry. */
xas_set_order(&xas, start, HPAGE_PMD_ORDER);
- xas_store(&xas, folio);
+ xas_store(&xas, new_folio);
WARN_ON_ONCE(xas_error(&xas));
xas_unlock_irq(&xas);
@@ -2164,18 +2151,18 @@ immap_locked:
retract_page_tables(mapping, start);
if (cc && !cc->is_khugepaged)
result = SCAN_PTE_MAPPED_HUGEPAGE;
- folio_unlock(folio);
+ folio_unlock(new_folio);
/*
- * The collapse has succeeded, so free the old pages.
+ * The collapse has succeeded, so free the old folios.
*/
- list_for_each_entry_safe(page, tmp, &pagelist, lru) {
- list_del(&page->lru);
- page->mapping = NULL;
- ClearPageActive(page);
- ClearPageUnevictable(page);
- unlock_page(page);
- folio_put_refs(page_folio(page), 3);
+ list_for_each_entry_safe(folio, tmp, &pagelist, lru) {
+ list_del(&folio->lru);
+ folio->mapping = NULL;
+ folio_clear_active(folio);
+ folio_clear_unevictable(folio);
+ folio_unlock(folio);
+ folio_put_refs(folio, 3);
}
goto out;
@@ -2189,11 +2176,11 @@ rollback:
shmem_uncharge(mapping->host, nr_none);
}
- list_for_each_entry_safe(page, tmp, &pagelist, lru) {
- list_del(&page->lru);
- unlock_page(page);
- putback_lru_page(page);
- put_page(page);
+ list_for_each_entry_safe(folio, tmp, &pagelist, lru) {
+ list_del(&folio->lru);
+ folio_unlock(folio);
+ folio_putback_lru(folio);
+ folio_put(folio);
}
/*
* Undo the updates of filemap_nr_thps_inc for non-SHMEM
@@ -2209,13 +2196,13 @@ rollback:
smp_mb();
}
- hpage->mapping = NULL;
+ new_folio->mapping = NULL;
- unlock_page(hpage);
- put_page(hpage);
+ folio_unlock(new_folio);
+ folio_put(new_folio);
out:
VM_BUG_ON(!list_empty(&pagelist));
- trace_mm_khugepaged_collapse_file(mm, hpage, index, is_shmem, addr, file, nr, result);
+ trace_mm_khugepaged_collapse_file(mm, new_folio, index, is_shmem, addr, file, HPAGE_PMD_NR, result);
return result;
}
@@ -2223,7 +2210,7 @@ static int hpage_collapse_scan_file(struct mm_struct *mm, unsigned long addr,
struct file *file, pgoff_t start,
struct collapse_control *cc)
{
- struct page *page = NULL;
+ struct folio *folio = NULL;
struct address_space *mapping = file->f_mapping;
XA_STATE(xas, &mapping->i_pages, start);
int present, swap;
@@ -2235,11 +2222,11 @@ static int hpage_collapse_scan_file(struct mm_struct *mm, unsigned long addr,
memset(cc->node_load, 0, sizeof(cc->node_load));
nodes_clear(cc->alloc_nmask);
rcu_read_lock();
- xas_for_each(&xas, page, start + HPAGE_PMD_NR - 1) {
- if (xas_retry(&xas, page))
+ xas_for_each(&xas, folio, start + HPAGE_PMD_NR - 1) {
+ if (xas_retry(&xas, folio))
continue;
- if (xa_is_value(page)) {
+ if (xa_is_value(folio)) {
++swap;
if (cc->is_khugepaged &&
swap > khugepaged_max_ptes_swap) {
@@ -2254,11 +2241,9 @@ static int hpage_collapse_scan_file(struct mm_struct *mm, unsigned long addr,
* TODO: khugepaged should compact smaller compound pages
* into a PMD sized page
*/
- if (PageTransCompound(page)) {
- struct page *head = compound_head(page);
-
- result = compound_order(head) == HPAGE_PMD_ORDER &&
- head->index == start
+ if (folio_test_large(folio)) {
+ result = folio_order(folio) == HPAGE_PMD_ORDER &&
+ folio->index == start
/* Maybe PMD-mapped */
? SCAN_PTE_MAPPED_HUGEPAGE
: SCAN_PAGE_COMPOUND;
@@ -2271,28 +2256,29 @@ static int hpage_collapse_scan_file(struct mm_struct *mm, unsigned long addr,
break;
}
- node = page_to_nid(page);
+ node = folio_nid(folio);
if (hpage_collapse_scan_abort(node, cc)) {
result = SCAN_SCAN_ABORT;
break;
}
cc->node_load[node]++;
- if (!PageLRU(page)) {
+ if (!folio_test_lru(folio)) {
result = SCAN_PAGE_LRU;
break;
}
- if (page_count(page) !=
- 1 + page_mapcount(page) + page_has_private(page)) {
+ if (folio_ref_count(folio) !=
+ 1 + folio_mapcount(folio) + folio_test_private(folio)) {
result = SCAN_PAGE_COUNT;
break;
}
/*
- * We probably should check if the page is referenced here, but
- * nobody would transfer pte_young() to PageReferenced() for us.
- * And rmap walk here is just too costly...
+ * We probably should check if the folio is referenced
+ * here, but nobody would transfer pte_young() to
+ * folio_test_referenced() for us. And rmap walk here
+ * is just too costly...
*/
present++;
@@ -2314,7 +2300,7 @@ static int hpage_collapse_scan_file(struct mm_struct *mm, unsigned long addr,
}
}
- trace_mm_khugepaged_scan_file(mm, page, file, present, swap, result);
+ trace_mm_khugepaged_scan_file(mm, folio, file, present, swap, result);
return result;
}
#else
@@ -2376,8 +2362,8 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages, int *result,
progress++;
break;
}
- if (!thp_vma_allowable_order(vma, vma->vm_flags, false, false,
- true, PMD_ORDER)) {
+ if (!thp_vma_allowable_order(vma, vma->vm_flags,
+ TVA_ENFORCE_SYSFS, PMD_ORDER)) {
skip:
progress++;
continue;
@@ -2714,8 +2700,7 @@ int madvise_collapse(struct vm_area_struct *vma, struct vm_area_struct **prev,
*prev = vma;
- if (!thp_vma_allowable_order(vma, vma->vm_flags, false, false, false,
- PMD_ORDER))
+ if (!thp_vma_allowable_order(vma, vma->vm_flags, 0, PMD_ORDER))
return -EINVAL;
cc = kmalloc(sizeof(*cc), GFP_KERNEL);
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 6a540c2b27c524..fdcf01f622028f 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -158,9 +158,9 @@ struct kmemleak_object {
int count;
/* checksum for detecting modified objects */
u32 checksum;
+ depot_stack_handle_t trace_handle;
/* memory ranges to be scanned inside an object (empty for all) */
struct hlist_head area_list;
- depot_stack_handle_t trace_handle;
unsigned long jiffies; /* creation timestamp */
pid_t pid; /* pid of the current task */
char comm[TASK_COMM_LEN]; /* executable name */
@@ -463,7 +463,7 @@ static struct kmemleak_object *mem_pool_alloc(gfp_t gfp)
/* try the slab allocator first */
if (object_cache) {
- object = kmem_cache_alloc(object_cache, gfp_kmemleak_mask(gfp));
+ object = kmem_cache_alloc_noprof(object_cache, gfp_kmemleak_mask(gfp));
if (object)
return object;
}
@@ -947,7 +947,7 @@ static void add_scan_area(unsigned long ptr, size_t size, gfp_t gfp)
untagged_objp = (unsigned long)kasan_reset_tag((void *)object->pointer);
if (scan_area_cache)
- area = kmem_cache_alloc(scan_area_cache, gfp_kmemleak_mask(gfp));
+ area = kmem_cache_alloc_noprof(scan_area_cache, gfp_kmemleak_mask(gfp));
raw_spin_lock_irqsave(&object->lock, flags);
if (!area) {
diff --git a/mm/kmsan/hooks.c b/mm/kmsan/hooks.c
index 0b09daa188ef6c..22e8657800effb 100644
--- a/mm/kmsan/hooks.c
+++ b/mm/kmsan/hooks.c
@@ -285,6 +285,17 @@ void kmsan_copy_to_user(void __user *to, const void *from, size_t to_copy,
}
EXPORT_SYMBOL(kmsan_copy_to_user);
+void kmsan_memmove(void *to, const void *from, size_t size)
+{
+ if (!kmsan_enabled || kmsan_in_runtime())
+ return;
+
+ kmsan_enter_runtime();
+ kmsan_internal_memmove_metadata(to, (void *)from, size);
+ kmsan_leave_runtime();
+}
+EXPORT_SYMBOL(kmsan_memmove);
+
/* Helper function to check an URB. */
void kmsan_handle_urb(const struct urb *urb, bool is_out)
{
diff --git a/mm/ksm.c b/mm/ksm.c
index 8c001819cf10f5..e1034bf1c937e9 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -890,14 +890,14 @@ static void remove_node_from_stable_tree(struct ksm_stable_node *stable_node)
free_stable_node(stable_node);
}
-enum get_ksm_page_flags {
- GET_KSM_PAGE_NOLOCK,
- GET_KSM_PAGE_LOCK,
- GET_KSM_PAGE_TRYLOCK
+enum ksm_get_folio_flags {
+ KSM_GET_FOLIO_NOLOCK,
+ KSM_GET_FOLIO_LOCK,
+ KSM_GET_FOLIO_TRYLOCK
};
/*
- * get_ksm_page: checks if the page indicated by the stable node
+ * ksm_get_folio: checks if the page indicated by the stable node
* is still its ksm page, despite having held no reference to it.
* In which case we can trust the content of the page, and it
* returns the gotten page; but if the page has now been zapped,
@@ -915,10 +915,10 @@ enum get_ksm_page_flags {
* a page to put something that might look like our key in page->mapping.
* is on its way to being freed; but it is an anomaly to bear in mind.
*/
-static struct page *get_ksm_page(struct ksm_stable_node *stable_node,
- enum get_ksm_page_flags flags)
+static struct folio *ksm_get_folio(struct ksm_stable_node *stable_node,
+ enum ksm_get_folio_flags flags)
{
- struct page *page;
+ struct folio *folio;
void *expected_mapping;
unsigned long kpfn;
@@ -926,8 +926,8 @@ static struct page *get_ksm_page(struct ksm_stable_node *stable_node,
PAGE_MAPPING_KSM);
again:
kpfn = READ_ONCE(stable_node->kpfn); /* Address dependency. */
- page = pfn_to_page(kpfn);
- if (READ_ONCE(page->mapping) != expected_mapping)
+ folio = pfn_folio(kpfn);
+ if (READ_ONCE(folio->mapping) != expected_mapping)
goto stale;
/*
@@ -940,41 +940,41 @@ again:
* in folio_migrate_mapping(), it might still be our page,
* in which case it's essential to keep the node.
*/
- while (!get_page_unless_zero(page)) {
+ while (!folio_try_get(folio)) {
/*
* Another check for page->mapping != expected_mapping would
* work here too. We have chosen the !PageSwapCache test to
* optimize the common case, when the page is or is about to
* be freed: PageSwapCache is cleared (under spin_lock_irq)
* in the ref_freeze section of __remove_mapping(); but Anon
- * page->mapping reset to NULL later, in free_pages_prepare().
+ * folio->mapping reset to NULL later, in free_pages_prepare().
*/
- if (!PageSwapCache(page))
+ if (!folio_test_swapcache(folio))
goto stale;
cpu_relax();
}
- if (READ_ONCE(page->mapping) != expected_mapping) {
- put_page(page);
+ if (READ_ONCE(folio->mapping) != expected_mapping) {
+ folio_put(folio);
goto stale;
}
- if (flags == GET_KSM_PAGE_TRYLOCK) {
- if (!trylock_page(page)) {
- put_page(page);
+ if (flags == KSM_GET_FOLIO_TRYLOCK) {
+ if (!folio_trylock(folio)) {
+ folio_put(folio);
return ERR_PTR(-EBUSY);
}
- } else if (flags == GET_KSM_PAGE_LOCK)
- lock_page(page);
+ } else if (flags == KSM_GET_FOLIO_LOCK)
+ folio_lock(folio);
- if (flags != GET_KSM_PAGE_NOLOCK) {
- if (READ_ONCE(page->mapping) != expected_mapping) {
- unlock_page(page);
- put_page(page);
+ if (flags != KSM_GET_FOLIO_NOLOCK) {
+ if (READ_ONCE(folio->mapping) != expected_mapping) {
+ folio_unlock(folio);
+ folio_put(folio);
goto stale;
}
}
- return page;
+ return folio;
stale:
/*
@@ -998,16 +998,16 @@ static void remove_rmap_item_from_tree(struct ksm_rmap_item *rmap_item)
{
if (rmap_item->address & STABLE_FLAG) {
struct ksm_stable_node *stable_node;
- struct page *page;
+ struct folio *folio;
stable_node = rmap_item->head;
- page = get_ksm_page(stable_node, GET_KSM_PAGE_LOCK);
- if (!page)
+ folio = ksm_get_folio(stable_node, KSM_GET_FOLIO_LOCK);
+ if (!folio)
goto out;
hlist_del(&rmap_item->hlist);
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
if (!hlist_empty(&stable_node->hlist))
ksm_pages_sharing--;
@@ -1094,11 +1094,11 @@ static inline struct ksm_stable_node *page_stable_node(struct page *page)
return folio_stable_node(page_folio(page));
}
-static inline void set_page_stable_node(struct page *page,
- struct ksm_stable_node *stable_node)
+static inline void folio_set_stable_node(struct folio *folio,
+ struct ksm_stable_node *stable_node)
{
- VM_BUG_ON_PAGE(PageAnon(page) && PageAnonExclusive(page), page);
- page->mapping = (void *)((unsigned long)stable_node | PAGE_MAPPING_KSM);
+ VM_WARN_ON_FOLIO(folio_test_anon(folio) && PageAnonExclusive(&folio->page), folio);
+ folio->mapping = (void *)((unsigned long)stable_node | PAGE_MAPPING_KSM);
}
#ifdef CONFIG_SYSFS
@@ -1107,13 +1107,13 @@ static inline void set_page_stable_node(struct page *page,
*/
static int remove_stable_node(struct ksm_stable_node *stable_node)
{
- struct page *page;
+ struct folio *folio;
int err;
- page = get_ksm_page(stable_node, GET_KSM_PAGE_LOCK);
- if (!page) {
+ folio = ksm_get_folio(stable_node, KSM_GET_FOLIO_LOCK);
+ if (!folio) {
/*
- * get_ksm_page did remove_node_from_stable_tree itself.
+ * ksm_get_folio did remove_node_from_stable_tree itself.
*/
return 0;
}
@@ -1124,22 +1124,22 @@ static int remove_stable_node(struct ksm_stable_node *stable_node)
* merge_across_nodes/max_page_sharing be switched.
*/
err = -EBUSY;
- if (!page_mapped(page)) {
+ if (!folio_mapped(folio)) {
/*
- * The stable node did not yet appear stale to get_ksm_page(),
- * since that allows for an unmapped ksm page to be recognized
+ * The stable node did not yet appear stale to ksm_get_folio(),
+ * since that allows for an unmapped ksm folio to be recognized
* right up until it is freed; but the node is safe to remove.
- * This page might be in an LRU cache waiting to be freed,
- * or it might be PageSwapCache (perhaps under writeback),
+ * This folio might be in an LRU cache waiting to be freed,
+ * or it might be in the swapcache (perhaps under writeback),
* or it might have been removed from swapcache a moment ago.
*/
- set_page_stable_node(page, NULL);
+ folio_set_stable_node(folio, NULL);
remove_node_from_stable_tree(stable_node);
err = 0;
}
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
return err;
}
@@ -1275,23 +1275,24 @@ static u32 calc_checksum(struct page *page)
return checksum;
}
-static int write_protect_page(struct vm_area_struct *vma, struct page *page,
+static int write_protect_page(struct vm_area_struct *vma, struct folio *folio,
pte_t *orig_pte)
{
struct mm_struct *mm = vma->vm_mm;
- DEFINE_PAGE_VMA_WALK(pvmw, page, vma, 0, 0);
+ DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, 0, 0);
int swapped;
int err = -EFAULT;
struct mmu_notifier_range range;
bool anon_exclusive;
pte_t entry;
- pvmw.address = page_address_in_vma(page, vma);
+ if (WARN_ON_ONCE(folio_test_large(folio)))
+ return err;
+
+ pvmw.address = page_address_in_vma(&folio->page, vma);
if (pvmw.address == -EFAULT)
goto out;
- BUG_ON(PageTransCompound(page));
-
mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, mm, pvmw.address,
pvmw.address + PAGE_SIZE);
mmu_notifier_invalidate_range_start(&range);
@@ -1301,12 +1302,12 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page,
if (WARN_ONCE(!pvmw.pte, "Unexpected PMD mapping?"))
goto out_unlock;
- anon_exclusive = PageAnonExclusive(page);
+ anon_exclusive = PageAnonExclusive(&folio->page);
entry = ptep_get(pvmw.pte);
if (pte_write(entry) || pte_dirty(entry) ||
anon_exclusive || mm_tlb_flush_pending(mm)) {
- swapped = PageSwapCache(page);
- flush_cache_page(vma, pvmw.address, page_to_pfn(page));
+ swapped = folio_test_swapcache(folio);
+ flush_cache_page(vma, pvmw.address, folio_pfn(folio));
/*
* Ok this is tricky, when get_user_pages_fast() run it doesn't
* take any lock, therefore the check that we are going to make
@@ -1326,20 +1327,20 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page,
* Check that no O_DIRECT or similar I/O is in progress on the
* page
*/
- if (page_mapcount(page) + 1 + swapped != page_count(page)) {
+ if (folio_mapcount(folio) + 1 + swapped != folio_ref_count(folio)) {
set_pte_at(mm, pvmw.address, pvmw.pte, entry);
goto out_unlock;
}
/* See folio_try_share_anon_rmap_pte(): clear PTE first. */
if (anon_exclusive &&
- folio_try_share_anon_rmap_pte(page_folio(page), page)) {
+ folio_try_share_anon_rmap_pte(folio, &folio->page)) {
set_pte_at(mm, pvmw.address, pvmw.pte, entry);
goto out_unlock;
}
if (pte_dirty(entry))
- set_page_dirty(page);
+ folio_mark_dirty(folio);
entry = pte_mkclean(entry);
if (pte_write(entry))
@@ -1505,14 +1506,14 @@ static int try_to_merge_one_page(struct vm_area_struct *vma,
* ptes are necessarily already write-protected. But in either
* case, we need to lock and check page_count is not raised.
*/
- if (write_protect_page(vma, page, &orig_pte) == 0) {
+ if (write_protect_page(vma, page_folio(page), &orig_pte) == 0) {
if (!kpage) {
/*
* While we hold page lock, upgrade page from
* PageAnon+anon_vma to PageKsm+NULL stable_node:
* stable_tree_insert() will update stable_node.
*/
- set_page_stable_node(page, NULL);
+ folio_set_stable_node(page_folio(page), NULL);
mark_page_accessed(page);
/*
* Page reclaim just frees a clean page with no dirty
@@ -1617,14 +1618,14 @@ bool is_page_sharing_candidate(struct ksm_stable_node *stable_node)
return __is_page_sharing_candidate(stable_node, 0);
}
-static struct page *stable_node_dup(struct ksm_stable_node **_stable_node_dup,
- struct ksm_stable_node **_stable_node,
- struct rb_root *root,
- bool prune_stale_stable_nodes)
+static struct folio *stable_node_dup(struct ksm_stable_node **_stable_node_dup,
+ struct ksm_stable_node **_stable_node,
+ struct rb_root *root,
+ bool prune_stale_stable_nodes)
{
struct ksm_stable_node *dup, *found = NULL, *stable_node = *_stable_node;
struct hlist_node *hlist_safe;
- struct page *_tree_page, *tree_page = NULL;
+ struct folio *folio, *tree_folio = NULL;
int nr = 0;
int found_rmap_hlist_len;
@@ -1643,24 +1644,24 @@ static struct page *stable_node_dup(struct ksm_stable_node **_stable_node_dup,
* We must walk all stable_node_dup to prune the stale
* stable nodes during lookup.
*
- * get_ksm_page can drop the nodes from the
+ * ksm_get_folio can drop the nodes from the
* stable_node->hlist if they point to freed pages
* (that's why we do a _safe walk). The "dup"
* stable_node parameter itself will be freed from
* under us if it returns NULL.
*/
- _tree_page = get_ksm_page(dup, GET_KSM_PAGE_NOLOCK);
- if (!_tree_page)
+ folio = ksm_get_folio(dup, KSM_GET_FOLIO_NOLOCK);
+ if (!folio)
continue;
nr += 1;
if (is_page_sharing_candidate(dup)) {
if (!found ||
dup->rmap_hlist_len > found_rmap_hlist_len) {
if (found)
- put_page(tree_page);
+ folio_put(tree_folio);
found = dup;
found_rmap_hlist_len = found->rmap_hlist_len;
- tree_page = _tree_page;
+ tree_folio = folio;
/* skip put_page for found dup */
if (!prune_stale_stable_nodes)
@@ -1668,7 +1669,7 @@ static struct page *stable_node_dup(struct ksm_stable_node **_stable_node_dup,
continue;
}
}
- put_page(_tree_page);
+ folio_put(folio);
}
if (found) {
@@ -1733,7 +1734,7 @@ static struct page *stable_node_dup(struct ksm_stable_node **_stable_node_dup,
}
*_stable_node_dup = found;
- return tree_page;
+ return tree_folio;
}
static struct ksm_stable_node *stable_node_dup_any(struct ksm_stable_node *stable_node,
@@ -1750,7 +1751,7 @@ static struct ksm_stable_node *stable_node_dup_any(struct ksm_stable_node *stabl
}
/*
- * Like for get_ksm_page, this function can free the *_stable_node and
+ * Like for ksm_get_folio, this function can free the *_stable_node and
* *_stable_node_dup if the returned tree_page is NULL.
*
* It can also free and overwrite *_stable_node with the found
@@ -1763,16 +1764,16 @@ static struct ksm_stable_node *stable_node_dup_any(struct ksm_stable_node *stabl
* function and will be overwritten in all cases, the caller doesn't
* need to initialize it.
*/
-static struct page *__stable_node_chain(struct ksm_stable_node **_stable_node_dup,
- struct ksm_stable_node **_stable_node,
- struct rb_root *root,
- bool prune_stale_stable_nodes)
+static struct folio *__stable_node_chain(struct ksm_stable_node **_stable_node_dup,
+ struct ksm_stable_node **_stable_node,
+ struct rb_root *root,
+ bool prune_stale_stable_nodes)
{
struct ksm_stable_node *stable_node = *_stable_node;
if (!is_stable_node_chain(stable_node)) {
if (is_page_sharing_candidate(stable_node)) {
*_stable_node_dup = stable_node;
- return get_ksm_page(stable_node, GET_KSM_PAGE_NOLOCK);
+ return ksm_get_folio(stable_node, KSM_GET_FOLIO_NOLOCK);
}
/*
* _stable_node_dup set to NULL means the stable_node
@@ -1785,24 +1786,24 @@ static struct page *__stable_node_chain(struct ksm_stable_node **_stable_node_du
prune_stale_stable_nodes);
}
-static __always_inline struct page *chain_prune(struct ksm_stable_node **s_n_d,
- struct ksm_stable_node **s_n,
- struct rb_root *root)
+static __always_inline struct folio *chain_prune(struct ksm_stable_node **s_n_d,
+ struct ksm_stable_node **s_n,
+ struct rb_root *root)
{
return __stable_node_chain(s_n_d, s_n, root, true);
}
-static __always_inline struct page *chain(struct ksm_stable_node **s_n_d,
- struct ksm_stable_node *s_n,
- struct rb_root *root)
+static __always_inline struct folio *chain(struct ksm_stable_node **s_n_d,
+ struct ksm_stable_node *s_n,
+ struct rb_root *root)
{
struct ksm_stable_node *old_stable_node = s_n;
- struct page *tree_page;
+ struct folio *tree_folio;
- tree_page = __stable_node_chain(s_n_d, &s_n, root, false);
+ tree_folio = __stable_node_chain(s_n_d, &s_n, root, false);
/* not pruning dups so s_n cannot have changed */
VM_BUG_ON(s_n != old_stable_node);
- return tree_page;
+ return tree_folio;
}
/*
@@ -1822,28 +1823,30 @@ static struct page *stable_tree_search(struct page *page)
struct rb_node *parent;
struct ksm_stable_node *stable_node, *stable_node_dup, *stable_node_any;
struct ksm_stable_node *page_node;
+ struct folio *folio;
- page_node = page_stable_node(page);
+ folio = page_folio(page);
+ page_node = folio_stable_node(folio);
if (page_node && page_node->head != &migrate_nodes) {
/* ksm page forked */
- get_page(page);
- return page;
+ folio_get(folio);
+ return &folio->page;
}
- nid = get_kpfn_nid(page_to_pfn(page));
+ nid = get_kpfn_nid(folio_pfn(folio));
root = root_stable_tree + nid;
again:
new = &root->rb_node;
parent = NULL;
while (*new) {
- struct page *tree_page;
+ struct folio *tree_folio;
int ret;
cond_resched();
stable_node = rb_entry(*new, struct ksm_stable_node, node);
stable_node_any = NULL;
- tree_page = chain_prune(&stable_node_dup, &stable_node, root);
+ tree_folio = chain_prune(&stable_node_dup, &stable_node, root);
/*
* NOTE: stable_node may have been freed by
* chain_prune() if the returned stable_node_dup is
@@ -1877,14 +1880,14 @@ again:
* write protected at all times. Any will work
* fine to continue the walk.
*/
- tree_page = get_ksm_page(stable_node_any,
- GET_KSM_PAGE_NOLOCK);
+ tree_folio = ksm_get_folio(stable_node_any,
+ KSM_GET_FOLIO_NOLOCK);
}
VM_BUG_ON(!stable_node_dup ^ !!stable_node_any);
- if (!tree_page) {
+ if (!tree_folio) {
/*
* If we walked over a stale stable_node,
- * get_ksm_page() will call rb_erase() and it
+ * ksm_get_folio() will call rb_erase() and it
* may rebalance the tree from under us. So
* restart the search from scratch. Returning
* NULL would be safe too, but we'd generate
@@ -1894,8 +1897,8 @@ again:
goto again;
}
- ret = memcmp_pages(page, tree_page);
- put_page(tree_page);
+ ret = memcmp_pages(page, &tree_folio->page);
+ folio_put(tree_folio);
parent = *new;
if (ret < 0)
@@ -1906,12 +1909,15 @@ again:
if (page_node) {
VM_BUG_ON(page_node->head != &migrate_nodes);
/*
- * Test if the migrated page should be merged
- * into a stable node dup. If the mapcount is
- * 1 we can migrate it with another KSM page
- * without adding it to the chain.
+ * If the mapcount of our migrated KSM folio is
+ * at most 1, we can merge it with another
+ * KSM folio where we know that we have space
+ * for one more mapping without exceeding the
+ * ksm_max_page_sharing limit: see
+ * chain_prune(). This way, we can avoid adding
+ * this stable node to the chain.
*/
- if (page_mapcount(page) > 1)
+ if (folio_mapcount(folio) > 1)
goto chain_append;
}
@@ -1938,26 +1944,26 @@ again:
* It would be more elegant to return stable_node
* than kpage, but that involves more changes.
*/
- tree_page = get_ksm_page(stable_node_dup,
- GET_KSM_PAGE_TRYLOCK);
+ tree_folio = ksm_get_folio(stable_node_dup,
+ KSM_GET_FOLIO_TRYLOCK);
- if (PTR_ERR(tree_page) == -EBUSY)
+ if (PTR_ERR(tree_folio) == -EBUSY)
return ERR_PTR(-EBUSY);
- if (unlikely(!tree_page))
+ if (unlikely(!tree_folio))
/*
* The tree may have been rebalanced,
* so re-evaluate parent and new.
*/
goto again;
- unlock_page(tree_page);
+ folio_unlock(tree_folio);
if (get_kpfn_nid(stable_node_dup->kpfn) !=
NUMA(stable_node_dup->nid)) {
- put_page(tree_page);
+ folio_put(tree_folio);
goto replace;
}
- return tree_page;
+ return &tree_folio->page;
}
}
@@ -1970,8 +1976,8 @@ again:
rb_insert_color(&page_node->node, root);
out:
if (is_page_sharing_candidate(page_node)) {
- get_page(page);
- return page;
+ folio_get(folio);
+ return &folio->page;
} else
return NULL;
@@ -1996,12 +2002,12 @@ replace:
&page_node->node,
root);
if (is_page_sharing_candidate(page_node))
- get_page(page);
+ folio_get(folio);
else
- page = NULL;
+ folio = NULL;
} else {
rb_erase(&stable_node_dup->node, root);
- page = NULL;
+ folio = NULL;
}
} else {
VM_BUG_ON(!is_stable_node_chain(stable_node));
@@ -2012,16 +2018,16 @@ replace:
DO_NUMA(page_node->nid = nid);
stable_node_chain_add_dup(page_node, stable_node);
if (is_page_sharing_candidate(page_node))
- get_page(page);
+ folio_get(folio);
else
- page = NULL;
+ folio = NULL;
} else {
- page = NULL;
+ folio = NULL;
}
}
stable_node_dup->head = &migrate_nodes;
list_add(&stable_node_dup->list, stable_node_dup->head);
- return page;
+ return &folio->page;
chain_append:
/* stable_node_dup could be null if it reached the limit */
@@ -2064,7 +2070,7 @@ chain_append:
* This function returns the stable tree node just allocated on success,
* NULL otherwise.
*/
-static struct ksm_stable_node *stable_tree_insert(struct page *kpage)
+static struct ksm_stable_node *stable_tree_insert(struct folio *kfolio)
{
int nid;
unsigned long kpfn;
@@ -2074,7 +2080,7 @@ static struct ksm_stable_node *stable_tree_insert(struct page *kpage)
struct ksm_stable_node *stable_node, *stable_node_dup, *stable_node_any;
bool need_chain = false;
- kpfn = page_to_pfn(kpage);
+ kpfn = folio_pfn(kfolio);
nid = get_kpfn_nid(kpfn);
root = root_stable_tree + nid;
again:
@@ -2082,13 +2088,13 @@ again:
new = &root->rb_node;
while (*new) {
- struct page *tree_page;
+ struct folio *tree_folio;
int ret;
cond_resched();
stable_node = rb_entry(*new, struct ksm_stable_node, node);
stable_node_any = NULL;
- tree_page = chain(&stable_node_dup, stable_node, root);
+ tree_folio = chain(&stable_node_dup, stable_node, root);
if (!stable_node_dup) {
/*
* Either all stable_node dups were full in
@@ -2110,14 +2116,14 @@ again:
* write protected at all times. Any will work
* fine to continue the walk.
*/
- tree_page = get_ksm_page(stable_node_any,
- GET_KSM_PAGE_NOLOCK);
+ tree_folio = ksm_get_folio(stable_node_any,
+ KSM_GET_FOLIO_NOLOCK);
}
VM_BUG_ON(!stable_node_dup ^ !!stable_node_any);
- if (!tree_page) {
+ if (!tree_folio) {
/*
* If we walked over a stale stable_node,
- * get_ksm_page() will call rb_erase() and it
+ * ksm_get_folio() will call rb_erase() and it
* may rebalance the tree from under us. So
* restart the search from scratch. Returning
* NULL would be safe too, but we'd generate
@@ -2127,8 +2133,8 @@ again:
goto again;
}
- ret = memcmp_pages(kpage, tree_page);
- put_page(tree_page);
+ ret = memcmp_pages(&kfolio->page, &tree_folio->page);
+ folio_put(tree_folio);
parent = *new;
if (ret < 0)
@@ -2147,7 +2153,7 @@ again:
INIT_HLIST_HEAD(&stable_node_dup->hlist);
stable_node_dup->kpfn = kpfn;
- set_page_stable_node(kpage, stable_node_dup);
+ folio_set_stable_node(kfolio, stable_node_dup);
stable_node_dup->rmap_hlist_len = 0;
DO_NUMA(stable_node_dup->nid = nid);
if (!need_chain) {
@@ -2425,7 +2431,7 @@ static void cmp_and_merge_page(struct page *page, struct ksm_rmap_item *rmap_ite
* node in the stable tree and add both rmap_items.
*/
lock_page(kpage);
- stable_node = stable_tree_insert(kpage);
+ stable_node = stable_tree_insert(page_folio(kpage));
if (stable_node) {
stable_tree_append(tree_rmap_item, stable_node,
false);
@@ -2597,14 +2603,14 @@ static struct ksm_rmap_item *scan_get_next_rmap_item(struct page **page)
*/
if (!ksm_merge_across_nodes) {
struct ksm_stable_node *stable_node, *next;
- struct page *page;
+ struct folio *folio;
list_for_each_entry_safe(stable_node, next,
&migrate_nodes, list) {
- page = get_ksm_page(stable_node,
- GET_KSM_PAGE_NOLOCK);
- if (page)
- put_page(page);
+ folio = ksm_get_folio(stable_node,
+ KSM_GET_FOLIO_NOLOCK);
+ if (folio)
+ folio_put(folio);
cond_resched();
}
}
@@ -3172,12 +3178,11 @@ again:
/*
* Collect processes when the error hit an ksm page.
*/
-void collect_procs_ksm(struct page *page, struct list_head *to_kill,
- int force_early)
+void collect_procs_ksm(struct folio *folio, struct page *page,
+ struct list_head *to_kill, int force_early)
{
struct ksm_stable_node *stable_node;
struct ksm_rmap_item *rmap_item;
- struct folio *folio = page_folio(page);
struct vm_area_struct *vma;
struct task_struct *tsk;
@@ -3229,11 +3234,11 @@ void folio_migrate_ksm(struct folio *newfolio, struct folio *folio)
/*
* newfolio->mapping was set in advance; now we need smp_wmb()
* to make sure that the new stable_node->kpfn is visible
- * to get_ksm_page() before it can see that folio->mapping
+ * to ksm_get_folio() before it can see that folio->mapping
* has gone stale (or that folio_test_swapcache has been cleared).
*/
smp_wmb();
- set_page_stable_node(&folio->page, NULL);
+ folio_set_stable_node(folio, NULL);
}
}
#endif /* CONFIG_MIGRATION */
@@ -3256,7 +3261,7 @@ static bool stable_node_dup_remove_range(struct ksm_stable_node *stable_node,
if (stable_node->kpfn >= start_pfn &&
stable_node->kpfn < end_pfn) {
/*
- * Don't get_ksm_page, page has already gone:
+ * Don't ksm_get_folio, page has already gone:
* which is why we keep kpfn instead of page*
*/
remove_node_from_stable_tree(stable_node);
@@ -3344,7 +3349,7 @@ static int ksm_memory_callback(struct notifier_block *self,
* Most of the work is done by page migration; but there might
* be a few stable_nodes left over, still pointing to struct
* pages which have been offlined: prune those from the tree,
- * otherwise get_ksm_page() might later try to access a
+ * otherwise ksm_get_folio() might later try to access a
* non-existent struct page.
*/
ksm_check_stable_tree(mn->start_pfn,
diff --git a/mm/madvise.c b/mm/madvise.c
index 44a498c94158c8..c49fef453491d0 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -321,6 +321,18 @@ static inline bool can_do_file_pageout(struct vm_area_struct *vma)
file_permission(vma->vm_file, MAY_WRITE) == 0;
}
+static inline int madvise_folio_pte_batch(unsigned long addr, unsigned long end,
+ struct folio *folio, pte_t *ptep,
+ pte_t pte, bool *any_young,
+ bool *any_dirty)
+{
+ const fpb_t fpb_flags = FPB_IGNORE_DIRTY | FPB_IGNORE_SOFT_DIRTY;
+ int max_nr = (end - addr) / PAGE_SIZE;
+
+ return folio_pte_batch(folio, addr, ptep, pte, max_nr, fpb_flags, NULL,
+ any_young, any_dirty);
+}
+
static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
unsigned long addr, unsigned long end,
struct mm_walk *walk)
@@ -336,6 +348,7 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
LIST_HEAD(folio_list);
bool pageout_anon_only_filter;
unsigned int batch_count = 0;
+ int nr;
if (fatal_signal_pending(current))
return -EINTR;
@@ -363,10 +376,10 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
goto huge_unlock;
}
- folio = pfn_folio(pmd_pfn(orig_pmd));
+ folio = pmd_folio(orig_pmd);
/* Do not interfere with other mappings of this folio */
- if (folio_estimated_sharers(folio) != 1)
+ if (folio_likely_mapped_shared(folio))
goto huge_unlock;
if (pageout_anon_only_filter && !folio_test_anon(folio))
@@ -423,7 +436,8 @@ restart:
return 0;
flush_tlb_batched_pending(mm);
arch_enter_lazy_mmu_mode();
- for (; addr < end; pte++, addr += PAGE_SIZE) {
+ for (; addr < end; pte += nr, addr += nr * PAGE_SIZE) {
+ nr = 1;
ptent = ptep_get(pte);
if (++batch_count == SWAP_CLUSTER_MAX) {
@@ -447,55 +461,64 @@ restart:
continue;
/*
- * Creating a THP page is expensive so split it only if we
- * are sure it's worth. Split it if we are only owner.
+ * If we encounter a large folio, only split it if it is not
+ * fully mapped within the range we are operating on. Otherwise
+ * leave it as is so that it can be swapped out whole. If we
+ * fail to split a folio, leave it in place and advance to the
+ * next pte in the range.
*/
if (folio_test_large(folio)) {
- int err;
-
- if (folio_estimated_sharers(folio) > 1)
- break;
- if (pageout_anon_only_filter && !folio_test_anon(folio))
- break;
- if (!folio_trylock(folio))
- break;
- folio_get(folio);
- arch_leave_lazy_mmu_mode();
- pte_unmap_unlock(start_pte, ptl);
- start_pte = NULL;
- err = split_folio(folio);
- folio_unlock(folio);
- folio_put(folio);
- if (err)
- break;
- start_pte = pte =
- pte_offset_map_lock(mm, pmd, addr, &ptl);
- if (!start_pte)
- break;
- arch_enter_lazy_mmu_mode();
- pte--;
- addr -= PAGE_SIZE;
- continue;
+ bool any_young;
+
+ nr = madvise_folio_pte_batch(addr, end, folio, pte,
+ ptent, &any_young, NULL);
+ if (any_young)
+ ptent = pte_mkyoung(ptent);
+
+ if (nr < folio_nr_pages(folio)) {
+ int err;
+
+ if (folio_likely_mapped_shared(folio))
+ continue;
+ if (pageout_anon_only_filter && !folio_test_anon(folio))
+ continue;
+ if (!folio_trylock(folio))
+ continue;
+ folio_get(folio);
+ arch_leave_lazy_mmu_mode();
+ pte_unmap_unlock(start_pte, ptl);
+ start_pte = NULL;
+ err = split_folio(folio);
+ folio_unlock(folio);
+ folio_put(folio);
+ start_pte = pte =
+ pte_offset_map_lock(mm, pmd, addr, &ptl);
+ if (!start_pte)
+ break;
+ arch_enter_lazy_mmu_mode();
+ if (!err)
+ nr = 0;
+ continue;
+ }
}
/*
* Do not interfere with other mappings of this folio and
- * non-LRU folio.
+ * non-LRU folio. If we have a large folio at this point, we
+ * know it is fully mapped so if its mapcount is the same as its
+ * number of pages, it must be exclusive.
*/
- if (!folio_test_lru(folio) || folio_mapcount(folio) != 1)
+ if (!folio_test_lru(folio) ||
+ folio_mapcount(folio) != folio_nr_pages(folio))
continue;
if (pageout_anon_only_filter && !folio_test_anon(folio))
continue;
- VM_BUG_ON_FOLIO(folio_test_large(folio), folio);
-
if (!pageout && pte_young(ptent)) {
- ptent = ptep_get_and_clear_full(mm, addr, pte,
- tlb->fullmm);
- ptent = pte_mkold(ptent);
- set_pte_at(mm, addr, pte, ptent);
- tlb_remove_tlb_entry(tlb, pte, addr);
+ clear_young_dirty_ptes(vma, addr, pte, nr,
+ CYDP_CLEAR_YOUNG);
+ tlb_remove_tlb_entries(tlb, pte, nr, addr);
}
/*
@@ -620,6 +643,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
unsigned long end, struct mm_walk *walk)
{
+ const cydp_t cydp_flags = CYDP_CLEAR_YOUNG | CYDP_CLEAR_DIRTY;
struct mmu_gather *tlb = walk->private;
struct mm_struct *mm = tlb->mm;
struct vm_area_struct *vma = walk->vma;
@@ -628,6 +652,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
struct folio *folio;
int nr_swap = 0;
unsigned long next;
+ int nr, max_nr;
next = pmd_addr_end(addr, end);
if (pmd_trans_huge(*pmd))
@@ -640,7 +665,8 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
return 0;
flush_tlb_batched_pending(mm);
arch_enter_lazy_mmu_mode();
- for (; addr != end; pte++, addr += PAGE_SIZE) {
+ for (; addr != end; pte += nr, addr += PAGE_SIZE * nr) {
+ nr = 1;
ptent = ptep_get(pte);
if (pte_none(ptent))
@@ -655,9 +681,11 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
entry = pte_to_swp_entry(ptent);
if (!non_swap_entry(entry)) {
- nr_swap--;
- free_swap_and_cache(entry);
- pte_clear_not_present_full(mm, addr, pte, tlb->fullmm);
+ max_nr = (end - addr) / PAGE_SIZE;
+ nr = swap_pte_batch(pte, max_nr, ptent);
+ nr_swap -= nr;
+ free_swap_and_cache_nr(entry, nr);
+ clear_not_present_full_ptes(mm, addr, pte, nr, tlb->fullmm);
} else if (is_hwpoison_entry(entry) ||
is_poisoned_swp_entry(entry)) {
pte_clear_not_present_full(mm, addr, pte, tlb->fullmm);
@@ -670,44 +698,57 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
continue;
/*
- * If pmd isn't transhuge but the folio is large and
- * is owned by only this process, split it and
- * deactivate all pages.
+ * If we encounter a large folio, only split it if it is not
+ * fully mapped within the range we are operating on. Otherwise
+ * leave it as is so that it can be marked as lazyfree. If we
+ * fail to split a folio, leave it in place and advance to the
+ * next pte in the range.
*/
if (folio_test_large(folio)) {
- int err;
+ bool any_young, any_dirty;
- if (folio_estimated_sharers(folio) != 1)
- break;
- if (!folio_trylock(folio))
- break;
- folio_get(folio);
- arch_leave_lazy_mmu_mode();
- pte_unmap_unlock(start_pte, ptl);
- start_pte = NULL;
- err = split_folio(folio);
- folio_unlock(folio);
- folio_put(folio);
- if (err)
- break;
- start_pte = pte =
- pte_offset_map_lock(mm, pmd, addr, &ptl);
- if (!start_pte)
- break;
- arch_enter_lazy_mmu_mode();
- pte--;
- addr -= PAGE_SIZE;
- continue;
+ nr = madvise_folio_pte_batch(addr, end, folio, pte,
+ ptent, &any_young, &any_dirty);
+
+ if (nr < folio_nr_pages(folio)) {
+ int err;
+
+ if (folio_likely_mapped_shared(folio))
+ continue;
+ if (!folio_trylock(folio))
+ continue;
+ folio_get(folio);
+ arch_leave_lazy_mmu_mode();
+ pte_unmap_unlock(start_pte, ptl);
+ start_pte = NULL;
+ err = split_folio(folio);
+ folio_unlock(folio);
+ folio_put(folio);
+ pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
+ start_pte = pte;
+ if (!start_pte)
+ break;
+ arch_enter_lazy_mmu_mode();
+ if (!err)
+ nr = 0;
+ continue;
+ }
+
+ if (any_young)
+ ptent = pte_mkyoung(ptent);
+ if (any_dirty)
+ ptent = pte_mkdirty(ptent);
}
if (folio_test_swapcache(folio) || folio_test_dirty(folio)) {
if (!folio_trylock(folio))
continue;
/*
- * If folio is shared with others, we mustn't clear
- * the folio's dirty flag.
+ * If we have a large folio at this point, we know it is
+ * fully mapped so if its mapcount is the same as its
+ * number of pages, it must be exclusive.
*/
- if (folio_mapcount(folio) != 1) {
+ if (folio_mapcount(folio) != folio_nr_pages(folio)) {
folio_unlock(folio);
continue;
}
@@ -723,19 +764,8 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
}
if (pte_young(ptent) || pte_dirty(ptent)) {
- /*
- * Some of architecture(ex, PPC) don't update TLB
- * with set_pte_at and tlb_remove_tlb_entry so for
- * the portability, remap the pte with old|clean
- * after pte clearing.
- */
- ptent = ptep_get_and_clear_full(mm, addr, pte,
- tlb->fullmm);
-
- ptent = pte_mkold(ptent);
- ptent = pte_mkclean(ptent);
- set_pte_at(mm, addr, pte, ptent);
- tlb_remove_tlb_entry(tlb, pte, addr);
+ clear_young_dirty_ptes(vma, addr, pte, nr, cydp_flags);
+ tlb_remove_tlb_entries(tlb, pte, nr, addr);
}
folio_mark_lazyfree(folio);
}
@@ -901,39 +931,19 @@ static long madvise_dontneed_free(struct vm_area_struct *vma,
return -EINVAL;
}
-static long madvise_populate(struct vm_area_struct *vma,
- struct vm_area_struct **prev,
- unsigned long start, unsigned long end,
- int behavior)
+static long madvise_populate(struct mm_struct *mm, unsigned long start,
+ unsigned long end, int behavior)
{
const bool write = behavior == MADV_POPULATE_WRITE;
- struct mm_struct *mm = vma->vm_mm;
- unsigned long tmp_end;
int locked = 1;
long pages;
- *prev = vma;
-
while (start < end) {
- /*
- * We might have temporarily dropped the lock. For example,
- * our VMA might have been split.
- */
- if (!vma || start >= vma->vm_end) {
- vma = vma_lookup(mm, start);
- if (!vma)
- return -ENOMEM;
- }
-
- tmp_end = min_t(unsigned long, end, vma->vm_end);
/* Populate (prefault) page tables readable/writable. */
- pages = faultin_vma_page_range(vma, start, tmp_end, write,
- &locked);
+ pages = faultin_page_range(mm, start, end, write, &locked);
if (!locked) {
mmap_read_lock(mm);
locked = 1;
- *prev = NULL;
- vma = NULL;
}
if (pages < 0) {
switch (pages) {
@@ -949,7 +959,7 @@ static long madvise_populate(struct vm_area_struct *vma,
pr_warn_once("%s: unhandled return value: %ld\n",
__func__, pages);
fallthrough;
- case -ENOMEM:
+ case -ENOMEM: /* No VMA or out of memory. */
return -ENOMEM;
}
}
@@ -1034,9 +1044,6 @@ static int madvise_vma_behavior(struct vm_area_struct *vma,
case MADV_DONTNEED:
case MADV_DONTNEED_LOCKED:
return madvise_dontneed_free(vma, prev, start, end, behavior);
- case MADV_POPULATE_READ:
- case MADV_POPULATE_WRITE:
- return madvise_populate(vma, prev, start, end, behavior);
case MADV_NORMAL:
new_flags = new_flags & ~VM_RAND_READ & ~VM_SEQ_READ;
break;
@@ -1394,6 +1401,7 @@ int madvise_set_anon_name(struct mm_struct *mm, unsigned long start,
* -EIO - an I/O error occurred while paging in data.
* -EBADF - map exists, but area maps something that isn't a file.
* -EAGAIN - a kernel resource was temporarily unavailable.
+ * -EPERM - memory is sealed.
*/
int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int behavior)
{
@@ -1437,10 +1445,29 @@ int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int beh
start = untagged_addr_remote(mm, start);
end = start + len;
+ /*
+ * Check if the address range is sealed for do_madvise().
+ * can_modify_mm_madv assumes we have acquired the lock on MM.
+ */
+ if (unlikely(!can_modify_mm_madv(mm, start, end, behavior))) {
+ error = -EPERM;
+ goto out;
+ }
+
blk_start_plug(&plug);
- error = madvise_walk_vmas(mm, start, end, behavior,
- madvise_vma_behavior);
+ switch (behavior) {
+ case MADV_POPULATE_READ:
+ case MADV_POPULATE_WRITE:
+ error = madvise_populate(mm, start, end, behavior);
+ break;
+ default:
+ error = madvise_walk_vmas(mm, start, end, behavior,
+ madvise_vma_behavior);
+ break;
+ }
blk_finish_plug(&plug);
+
+out:
if (write)
mmap_write_unlock(mm);
else
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index fabce2b50c6955..602ad5faad4dd1 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -53,6 +53,7 @@
#include <linux/sort.h>
#include <linux/fs.h>
#include <linux/seq_file.h>
+#include <linux/parser.h>
#include <linux/vmpressure.h>
#include <linux/memremap.h>
#include <linux/mm_inline.h>
@@ -350,7 +351,7 @@ static void memcg_reparent_objcgs(struct mem_cgroup *memcg,
/*
* A lot of the calls to the cache allocation functions are expected to be
- * inlined by the compiler. Since the calls to memcg_slab_pre_alloc_hook() are
+ * inlined by the compiler. Since the calls to memcg_slab_post_alloc_hook() are
* conditional to this static branch, we'll have to allow modules that does
* kmem_cache_alloc and the such to see this symbol as well
*/
@@ -715,6 +716,7 @@ static inline void memcg_rstat_updated(struct mem_cgroup *memcg, int val)
{
struct memcg_vmstats_percpu *statc;
int cpu = smp_processor_id();
+ unsigned int stats_updates;
if (!val)
return;
@@ -722,8 +724,9 @@ static inline void memcg_rstat_updated(struct mem_cgroup *memcg, int val)
cgroup_rstat_updated(memcg->css.cgroup, cpu);
statc = this_cpu_ptr(memcg->vmstats_percpu);
for (; statc; statc = statc->parent) {
- statc->stats_updates += abs(val);
- if (statc->stats_updates < MEMCG_CHARGE_BATCH)
+ stats_updates = READ_ONCE(statc->stats_updates) + abs(val);
+ WRITE_ONCE(statc->stats_updates, stats_updates);
+ if (stats_updates < MEMCG_CHARGE_BATCH)
continue;
/*
@@ -731,9 +734,9 @@ static inline void memcg_rstat_updated(struct mem_cgroup *memcg, int val)
* redundant. Avoid the overhead of the atomic update.
*/
if (!memcg_vmstats_needs_flush(statc->vmstats))
- atomic64_add(statc->stats_updates,
+ atomic64_add(stats_updates,
&statc->vmstats->stats_updates);
- statc->stats_updates = 0;
+ WRITE_ONCE(statc->stats_updates, 0);
}
}
@@ -836,8 +839,9 @@ static unsigned long memcg_page_state_local(struct mem_cgroup *memcg, int idx)
return x;
}
-void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
- int val)
+static void __mod_memcg_lruvec_state(struct lruvec *lruvec,
+ enum node_stat_item idx,
+ int val)
{
struct mem_cgroup_per_node *pn;
struct mem_cgroup *memcg;
@@ -859,10 +863,13 @@ void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
case NR_ANON_THPS:
case NR_SHMEM_PMDMAPPED:
case NR_FILE_PMDMAPPED:
- WARN_ON_ONCE(!in_task());
+ if (WARN_ON_ONCE(!in_task()))
+ pr_warn("stat item index: %d\n", idx);
break;
default:
VM_WARN_ON_IRQS_ENABLED();
+ if (!irqs_disabled())
+ pr_warn("stat item index: %d\n", idx);
}
}
@@ -2369,8 +2376,7 @@ static void drain_local_stock(struct work_struct *dummy)
clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags);
local_unlock_irqrestore(&memcg_stock.stock_lock, flags);
- if (old)
- obj_cgroup_put(old);
+ obj_cgroup_put(old);
}
/*
@@ -2475,7 +2481,8 @@ static unsigned long reclaim_high(struct mem_cgroup *memcg,
psi_memstall_enter(&pflags);
nr_reclaimed += try_to_free_mem_cgroup_pages(memcg, nr_pages,
gfp_mask,
- MEMCG_RECLAIM_MAY_SWAP);
+ MEMCG_RECLAIM_MAY_SWAP,
+ NULL);
psi_memstall_leave(&pflags);
} while ((memcg = parent_mem_cgroup(memcg)) &&
!mem_cgroup_is_root(memcg));
@@ -2781,7 +2788,7 @@ retry:
psi_memstall_enter(&pflags);
nr_reclaimed = try_to_free_mem_cgroup_pages(mem_over_limit, nr_pages,
- gfp_mask, reclaim_options);
+ gfp_mask, reclaim_options, NULL);
psi_memstall_leave(&pflags);
if (mem_cgroup_margin(mem_over_limit) >= nr_pages)
@@ -2978,88 +2985,44 @@ void mem_cgroup_commit_charge(struct folio *folio, struct mem_cgroup *memcg)
}
#ifdef CONFIG_MEMCG_KMEM
-/*
- * The allocated objcg pointers array is not accounted directly.
- * Moreover, it should not come from DMA buffer and is not readily
- * reclaimable. So those GFP bits should be masked off.
- */
-#define OBJCGS_CLEAR_MASK (__GFP_DMA | __GFP_RECLAIMABLE | \
- __GFP_ACCOUNT | __GFP_NOFAIL)
-/*
- * mod_objcg_mlstate() may be called with irq enabled, so
- * mod_memcg_lruvec_state() should be used.
- */
-static inline void mod_objcg_mlstate(struct obj_cgroup *objcg,
- struct pglist_data *pgdat,
- enum node_stat_item idx, int nr)
+static inline void __mod_objcg_mlstate(struct obj_cgroup *objcg,
+ struct pglist_data *pgdat,
+ enum node_stat_item idx, int nr)
{
struct mem_cgroup *memcg;
struct lruvec *lruvec;
+ lockdep_assert_irqs_disabled();
+
rcu_read_lock();
memcg = obj_cgroup_memcg(objcg);
lruvec = mem_cgroup_lruvec(memcg, pgdat);
- mod_memcg_lruvec_state(lruvec, idx, nr);
+ __mod_memcg_lruvec_state(lruvec, idx, nr);
rcu_read_unlock();
}
-int memcg_alloc_slab_cgroups(struct slab *slab, struct kmem_cache *s,
- gfp_t gfp, bool new_slab)
-{
- unsigned int objects = objs_per_slab(s, slab);
- unsigned long memcg_data;
- void *vec;
-
- gfp &= ~OBJCGS_CLEAR_MASK;
- vec = kcalloc_node(objects, sizeof(struct obj_cgroup *), gfp,
- slab_nid(slab));
- if (!vec)
- return -ENOMEM;
-
- memcg_data = (unsigned long) vec | MEMCG_DATA_OBJCGS;
- if (new_slab) {
- /*
- * If the slab is brand new and nobody can yet access its
- * memcg_data, no synchronization is required and memcg_data can
- * be simply assigned.
- */
- slab->memcg_data = memcg_data;
- } else if (cmpxchg(&slab->memcg_data, 0, memcg_data)) {
- /*
- * If the slab is already in use, somebody can allocate and
- * assign obj_cgroups in parallel. In this case the existing
- * objcg vector should be reused.
- */
- kfree(vec);
- return 0;
- }
-
- kmemleak_not_leak(vec);
- return 0;
-}
-
static __always_inline
struct mem_cgroup *mem_cgroup_from_obj_folio(struct folio *folio, void *p)
{
/*
* Slab objects are accounted individually, not per-page.
* Memcg membership data for each individual object is saved in
- * slab->memcg_data.
+ * slab->obj_exts.
*/
if (folio_test_slab(folio)) {
- struct obj_cgroup **objcgs;
+ struct slabobj_ext *obj_exts;
struct slab *slab;
unsigned int off;
slab = folio_slab(folio);
- objcgs = slab_objcgs(slab);
- if (!objcgs)
+ obj_exts = slab_obj_exts(slab);
+ if (!obj_exts)
return NULL;
off = obj_to_index(slab->slab_cache, slab, p);
- if (objcgs[off])
- return obj_cgroup_memcg(objcgs[off]);
+ if (obj_exts[off].objcg)
+ return obj_cgroup_memcg(obj_exts[off].objcg);
return NULL;
}
@@ -3067,7 +3030,7 @@ struct mem_cgroup *mem_cgroup_from_obj_folio(struct folio *folio, void *p)
/*
* folio_memcg_check() is used here, because in theory we can encounter
* a folio where the slab flag has been cleared already, but
- * slab->memcg_data has not been freed yet
+ * slab->obj_exts has not been freed yet
* folio_memcg_check() will guarantee that a proper memory
* cgroup pointer or NULL will be returned.
*/
@@ -3145,8 +3108,7 @@ static struct obj_cgroup *current_objcg_update(void)
if (old) {
old = (struct obj_cgroup *)
((unsigned long)old & ~CURRENT_OBJCG_UPDATE_FLAG);
- if (old)
- obj_cgroup_put(old);
+ obj_cgroup_put(old);
old = NULL;
}
@@ -3356,7 +3318,7 @@ void __memcg_kmem_uncharge_page(struct page *page, int order)
obj_cgroup_put(objcg);
}
-void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat,
+static void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat,
enum node_stat_item idx, int nr)
{
struct memcg_stock_pcp *stock;
@@ -3384,12 +3346,12 @@ void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat,
struct pglist_data *oldpg = stock->cached_pgdat;
if (stock->nr_slab_reclaimable_b) {
- mod_objcg_mlstate(objcg, oldpg, NR_SLAB_RECLAIMABLE_B,
+ __mod_objcg_mlstate(objcg, oldpg, NR_SLAB_RECLAIMABLE_B,
stock->nr_slab_reclaimable_b);
stock->nr_slab_reclaimable_b = 0;
}
if (stock->nr_slab_unreclaimable_b) {
- mod_objcg_mlstate(objcg, oldpg, NR_SLAB_UNRECLAIMABLE_B,
+ __mod_objcg_mlstate(objcg, oldpg, NR_SLAB_UNRECLAIMABLE_B,
stock->nr_slab_unreclaimable_b);
stock->nr_slab_unreclaimable_b = 0;
}
@@ -3415,11 +3377,10 @@ void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat,
}
}
if (nr)
- mod_objcg_mlstate(objcg, pgdat, idx, nr);
+ __mod_objcg_mlstate(objcg, pgdat, idx, nr);
local_unlock_irqrestore(&memcg_stock.stock_lock, flags);
- if (old)
- obj_cgroup_put(old);
+ obj_cgroup_put(old);
}
static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes)
@@ -3482,13 +3443,13 @@ static struct obj_cgroup *drain_obj_stock(struct memcg_stock_pcp *stock)
*/
if (stock->nr_slab_reclaimable_b || stock->nr_slab_unreclaimable_b) {
if (stock->nr_slab_reclaimable_b) {
- mod_objcg_mlstate(old, stock->cached_pgdat,
+ __mod_objcg_mlstate(old, stock->cached_pgdat,
NR_SLAB_RECLAIMABLE_B,
stock->nr_slab_reclaimable_b);
stock->nr_slab_reclaimable_b = 0;
}
if (stock->nr_slab_unreclaimable_b) {
- mod_objcg_mlstate(old, stock->cached_pgdat,
+ __mod_objcg_mlstate(old, stock->cached_pgdat,
NR_SLAB_UNRECLAIMABLE_B,
stock->nr_slab_unreclaimable_b);
stock->nr_slab_unreclaimable_b = 0;
@@ -3546,8 +3507,7 @@ static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes,
}
local_unlock_irqrestore(&memcg_stock.stock_lock, flags);
- if (old)
- obj_cgroup_put(old);
+ obj_cgroup_put(old);
if (nr_pages)
obj_cgroup_uncharge_pages(objcg, nr_pages);
@@ -3602,6 +3562,96 @@ void obj_cgroup_uncharge(struct obj_cgroup *objcg, size_t size)
refill_obj_stock(objcg, size, true);
}
+static inline size_t obj_full_size(struct kmem_cache *s)
+{
+ /*
+ * For each accounted object there is an extra space which is used
+ * to store obj_cgroup membership. Charge it too.
+ */
+ return s->size + sizeof(struct obj_cgroup *);
+}
+
+bool __memcg_slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
+ gfp_t flags, size_t size, void **p)
+{
+ struct obj_cgroup *objcg;
+ struct slab *slab;
+ unsigned long off;
+ size_t i;
+
+ /*
+ * The obtained objcg pointer is safe to use within the current scope,
+ * defined by current task or set_active_memcg() pair.
+ * obj_cgroup_get() is used to get a permanent reference.
+ */
+ objcg = current_obj_cgroup();
+ if (!objcg)
+ return true;
+
+ /*
+ * slab_alloc_node() avoids the NULL check, so we might be called with a
+ * single NULL object. kmem_cache_alloc_bulk() aborts if it can't fill
+ * the whole requested size.
+ * return success as there's nothing to free back
+ */
+ if (unlikely(*p == NULL))
+ return true;
+
+ flags &= gfp_allowed_mask;
+
+ if (lru) {
+ int ret;
+ struct mem_cgroup *memcg;
+
+ memcg = get_mem_cgroup_from_objcg(objcg);
+ ret = memcg_list_lru_alloc(memcg, lru, flags);
+ css_put(&memcg->css);
+
+ if (ret)
+ return false;
+ }
+
+ if (obj_cgroup_charge(objcg, flags, size * obj_full_size(s)))
+ return false;
+
+ for (i = 0; i < size; i++) {
+ slab = virt_to_slab(p[i]);
+
+ if (!slab_obj_exts(slab) &&
+ alloc_slab_obj_exts(slab, s, flags, false)) {
+ obj_cgroup_uncharge(objcg, obj_full_size(s));
+ continue;
+ }
+
+ off = obj_to_index(s, slab, p[i]);
+ obj_cgroup_get(objcg);
+ slab_obj_exts(slab)[off].objcg = objcg;
+ mod_objcg_state(objcg, slab_pgdat(slab),
+ cache_vmstat_idx(s), obj_full_size(s));
+ }
+
+ return true;
+}
+
+void __memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab,
+ void **p, int objects, struct slabobj_ext *obj_exts)
+{
+ for (int i = 0; i < objects; i++) {
+ struct obj_cgroup *objcg;
+ unsigned int off;
+
+ off = obj_to_index(s, slab, p[i]);
+ objcg = obj_exts[off].objcg;
+ if (!objcg)
+ continue;
+
+ obj_exts[off].objcg = NULL;
+ obj_cgroup_uncharge(objcg, obj_full_size(s));
+ mod_objcg_state(objcg, slab_pgdat(slab), cache_vmstat_idx(s),
+ -obj_full_size(s));
+ obj_cgroup_put(objcg);
+ }
+}
#endif /* CONFIG_MEMCG_KMEM */
/*
@@ -3709,7 +3759,7 @@ static int mem_cgroup_resize_max(struct mem_cgroup *memcg,
}
if (!try_to_free_mem_cgroup_pages(memcg, 1, GFP_KERNEL,
- memsw ? 0 : MEMCG_RECLAIM_MAY_SWAP)) {
+ memsw ? 0 : MEMCG_RECLAIM_MAY_SWAP, NULL)) {
ret = -EBUSY;
break;
}
@@ -3823,7 +3873,7 @@ static int mem_cgroup_force_empty(struct mem_cgroup *memcg)
return -EINTR;
if (!try_to_free_mem_cgroup_pages(memcg, 1, GFP_KERNEL,
- MEMCG_RECLAIM_MAY_SWAP))
+ MEMCG_RECLAIM_MAY_SWAP, NULL))
nr_retries--;
}
@@ -4386,7 +4436,7 @@ static int mem_cgroup_swappiness_write(struct cgroup_subsys_state *css,
{
struct mem_cgroup *memcg = mem_cgroup_from_css(css);
- if (val > 200)
+ if (val > MAX_SWAPPINESS)
return -EINVAL;
if (!mem_cgroup_is_root(memcg))
@@ -5468,8 +5518,7 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
{
int node;
- if (memcg->orig_objcg)
- obj_cgroup_put(memcg->orig_objcg);
+ obj_cgroup_put(memcg->orig_objcg);
for_each_node(node)
free_mem_cgroup_per_node_info(memcg, node);
@@ -5845,7 +5894,7 @@ static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu)
}
}
}
- statc->stats_updates = 0;
+ WRITE_ONCE(statc->stats_updates, 0);
/* We are in a per-cpu loop here, only do the atomic write once */
if (atomic64_read(&memcg->vmstats->stats_updates))
atomic64_set(&memcg->vmstats->stats_updates, 0);
@@ -6620,8 +6669,7 @@ static void mem_cgroup_exit(struct task_struct *task)
objcg = (struct obj_cgroup *)
((unsigned long)objcg & ~CURRENT_OBJCG_UPDATE_FLAG);
- if (objcg)
- obj_cgroup_put(objcg);
+ obj_cgroup_put(objcg);
/*
* Some kernel allocations can happen after this point,
@@ -6789,7 +6837,7 @@ static ssize_t memory_high_write(struct kernfs_open_file *of,
}
reclaimed = try_to_free_mem_cgroup_pages(memcg, nr_pages - high,
- GFP_KERNEL, MEMCG_RECLAIM_MAY_SWAP);
+ GFP_KERNEL, MEMCG_RECLAIM_MAY_SWAP, NULL);
if (!reclaimed && !nr_retries--)
break;
@@ -6838,7 +6886,7 @@ static ssize_t memory_max_write(struct kernfs_open_file *of,
if (nr_reclaims) {
if (!try_to_free_mem_cgroup_pages(memcg, nr_pages - max,
- GFP_KERNEL, MEMCG_RECLAIM_MAY_SWAP))
+ GFP_KERNEL, MEMCG_RECLAIM_MAY_SWAP, NULL))
nr_reclaims--;
continue;
}
@@ -6968,19 +7016,50 @@ static ssize_t memory_oom_group_write(struct kernfs_open_file *of,
return nbytes;
}
+enum {
+ MEMORY_RECLAIM_SWAPPINESS = 0,
+ MEMORY_RECLAIM_NULL,
+};
+
+static const match_table_t tokens = {
+ { MEMORY_RECLAIM_SWAPPINESS, "swappiness=%d"},
+ { MEMORY_RECLAIM_NULL, NULL },
+};
+
static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf,
size_t nbytes, loff_t off)
{
struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
unsigned int nr_retries = MAX_RECLAIM_RETRIES;
unsigned long nr_to_reclaim, nr_reclaimed = 0;
+ int swappiness = -1;
unsigned int reclaim_options;
- int err;
+ char *old_buf, *start;
+ substring_t args[MAX_OPT_ARGS];
buf = strstrip(buf);
- err = page_counter_memparse(buf, "", &nr_to_reclaim);
- if (err)
- return err;
+
+ old_buf = buf;
+ nr_to_reclaim = memparse(buf, &buf) / PAGE_SIZE;
+ if (buf == old_buf)
+ return -EINVAL;
+
+ buf = strstrip(buf);
+
+ while ((start = strsep(&buf, " ")) != NULL) {
+ if (!strlen(start))
+ continue;
+ switch (match_token(start, tokens, args)) {
+ case MEMORY_RECLAIM_SWAPPINESS:
+ if (match_int(&args[0], &swappiness))
+ return -EINVAL;
+ if (swappiness < MIN_SWAPPINESS || swappiness > MAX_SWAPPINESS)
+ return -EINVAL;
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
reclaim_options = MEMCG_RECLAIM_MAY_SWAP | MEMCG_RECLAIM_PROACTIVE;
while (nr_reclaimed < nr_to_reclaim) {
@@ -7000,7 +7079,9 @@ static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf,
lru_add_drain_all();
reclaimed = try_to_free_mem_cgroup_pages(memcg,
- batch_size, GFP_KERNEL, reclaim_options);
+ batch_size, GFP_KERNEL,
+ reclaim_options,
+ swappiness == -1 ? NULL : &swappiness);
if (!reclaimed && !nr_retries--)
return -EAGAIN;
@@ -7448,6 +7529,9 @@ static void uncharge_folio(struct folio *folio, struct uncharge_gather *ug)
struct obj_cgroup *objcg;
VM_BUG_ON_FOLIO(folio_test_lru(folio), folio);
+ VM_BUG_ON_FOLIO(folio_order(folio) > 1 &&
+ !folio_test_hugetlb(folio) &&
+ !list_empty(&folio->_deferred_list), folio);
/*
* Nobody should be changing or seriously looking at
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 9349948f1abfd1..16ada4fb02b799 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -141,7 +141,6 @@ static struct ctl_table memory_failure_table[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
- { }
};
/*
@@ -154,11 +153,23 @@ static int __page_handle_poison(struct page *page)
{
int ret;
- zone_pcp_disable(page_zone(page));
- ret = dissolve_free_huge_page(page);
- if (!ret)
+ /*
+ * zone_pcp_disable() can't be used here. It will
+ * hold pcp_batch_high_lock and dissolve_free_hugetlb_folio() might hold
+ * cpu_hotplug_lock via static_key_slow_dec() when hugetlb vmemmap
+ * optimization is enabled. This will break current lock dependency
+ * chain and leads to deadlock.
+ * Disabling pcp before dissolving the page was a deterministic
+ * approach because we made sure that those pages cannot end up in any
+ * PCP list. Draining PCP lists expels those pages to the buddy system,
+ * but nothing guarantees that those pages do not get back to a PCP
+ * queue if we need to refill those.
+ */
+ ret = dissolve_free_hugetlb_folio(page_folio(page));
+ if (!ret) {
+ drain_all_pages(page_zone(page));
ret = take_page_off_buddy(page);
- zone_pcp_enable(page_zone(page));
+ }
return ret;
}
@@ -167,8 +178,8 @@ static bool page_handle_poison(struct page *page, bool hugepage_or_freepage, boo
{
if (hugepage_or_freepage) {
/*
- * Doing this check for free pages is also fine since dissolve_free_huge_page
- * returns 0 for non-hugetlb pages as well.
+ * Doing this check for free pages is also fine since
+ * dissolve_free_hugetlb_folio() returns 0 for non-hugetlb folios as well.
*/
if (__page_handle_poison(page) <= 0)
/*
@@ -205,6 +216,7 @@ EXPORT_SYMBOL_GPL(hwpoison_filter_flags_value);
static int hwpoison_filter_dev(struct page *p)
{
+ struct folio *folio = page_folio(p);
struct address_space *mapping;
dev_t dev;
@@ -212,7 +224,7 @@ static int hwpoison_filter_dev(struct page *p)
hwpoison_filter_dev_minor == ~0U)
return 0;
- mapping = page_mapping(p);
+ mapping = folio_mapping(folio);
if (mapping == NULL || mapping->host == NULL)
return -EINVAL;
@@ -358,20 +370,25 @@ static int kill_proc(struct to_kill *tk, unsigned long pfn, int flags)
* Unknown page type encountered. Try to check whether it can turn PageLRU by
* lru_add_drain_all.
*/
-void shake_page(struct page *p)
+void shake_folio(struct folio *folio)
{
- if (PageHuge(p))
+ if (folio_test_hugetlb(folio))
return;
/*
* TODO: Could shrink slab caches here if a lightweight range-based
* shrinker will be available.
*/
- if (PageSlab(p))
+ if (folio_test_slab(folio))
return;
lru_add_drain_all();
}
-EXPORT_SYMBOL_GPL(shake_page);
+EXPORT_SYMBOL_GPL(shake_folio);
+
+static void shake_page(struct page *page)
+{
+ shake_folio(page_folio(page));
+}
static unsigned long dev_pagemap_mapping_shift(struct vm_area_struct *vma,
unsigned long address)
@@ -416,21 +433,13 @@ static unsigned long dev_pagemap_mapping_shift(struct vm_area_struct *vma,
* not much we can do. We just print a message and ignore otherwise.
*/
-#define FSDAX_INVALID_PGOFF ULONG_MAX
-
/*
* Schedule a process for later kill.
* Uses GFP_ATOMIC allocations to avoid potential recursions in the VM.
- *
- * Note: @fsdax_pgoff is used only when @p is a fsdax page and a
- * filesystem with a memory failure handler has claimed the
- * memory_failure event. In all other cases, page->index and
- * page->mapping are sufficient for mapping the page back to its
- * corresponding user virtual address.
*/
static void __add_to_kill(struct task_struct *tsk, struct page *p,
struct vm_area_struct *vma, struct list_head *to_kill,
- unsigned long ksm_addr, pgoff_t fsdax_pgoff)
+ unsigned long addr)
{
struct to_kill *tk;
@@ -440,12 +449,10 @@ static void __add_to_kill(struct task_struct *tsk, struct page *p,
return;
}
- tk->addr = ksm_addr ? ksm_addr : page_address_in_vma(p, vma);
- if (is_zone_device_page(p)) {
- if (fsdax_pgoff != FSDAX_INVALID_PGOFF)
- tk->addr = vma_pgoff_address(fsdax_pgoff, 1, vma);
+ tk->addr = addr;
+ if (is_zone_device_page(p))
tk->size_shift = dev_pagemap_mapping_shift(vma, tk->addr);
- } else
+ else
tk->size_shift = page_shift(compound_head(p));
/*
@@ -472,10 +479,12 @@ static void __add_to_kill(struct task_struct *tsk, struct page *p,
}
static void add_to_kill_anon_file(struct task_struct *tsk, struct page *p,
- struct vm_area_struct *vma,
- struct list_head *to_kill)
+ struct vm_area_struct *vma, struct list_head *to_kill,
+ unsigned long addr)
{
- __add_to_kill(tsk, p, vma, to_kill, 0, FSDAX_INVALID_PGOFF);
+ if (addr == -EFAULT)
+ return;
+ __add_to_kill(tsk, p, vma, to_kill, addr);
}
#ifdef CONFIG_KSM
@@ -491,12 +500,13 @@ static bool task_in_to_kill_list(struct list_head *to_kill,
return false;
}
+
void add_to_kill_ksm(struct task_struct *tsk, struct page *p,
struct vm_area_struct *vma, struct list_head *to_kill,
- unsigned long ksm_addr)
+ unsigned long addr)
{
if (!task_in_to_kill_list(to_kill, tsk))
- __add_to_kill(tsk, p, vma, to_kill, ksm_addr, FSDAX_INVALID_PGOFF);
+ __add_to_kill(tsk, p, vma, to_kill, addr);
}
#endif
/*
@@ -598,7 +608,6 @@ struct task_struct *task_early_kill(struct task_struct *tsk, int force_early)
static void collect_procs_anon(struct folio *folio, struct page *page,
struct list_head *to_kill, int force_early)
{
- struct vm_area_struct *vma;
struct task_struct *tsk;
struct anon_vma *av;
pgoff_t pgoff;
@@ -610,8 +619,10 @@ static void collect_procs_anon(struct folio *folio, struct page *page,
pgoff = page_to_pgoff(page);
rcu_read_lock();
for_each_process(tsk) {
+ struct vm_area_struct *vma;
struct anon_vma_chain *vmac;
struct task_struct *t = task_early_kill(tsk, force_early);
+ unsigned long addr;
if (!t)
continue;
@@ -620,9 +631,8 @@ static void collect_procs_anon(struct folio *folio, struct page *page,
vma = vmac->vma;
if (vma->vm_mm != t->mm)
continue;
- if (!page_mapped_in_vma(page, vma))
- continue;
- add_to_kill_anon_file(t, page, vma, to_kill);
+ addr = page_mapped_in_vma(page, vma);
+ add_to_kill_anon_file(t, page, vma, to_kill, addr);
}
}
rcu_read_unlock();
@@ -645,6 +655,7 @@ static void collect_procs_file(struct folio *folio, struct page *page,
pgoff = page_to_pgoff(page);
for_each_process(tsk) {
struct task_struct *t = task_early_kill(tsk, force_early);
+ unsigned long addr;
if (!t)
continue;
@@ -657,8 +668,10 @@ static void collect_procs_file(struct folio *folio, struct page *page,
* Assume applications who requested early kill want
* to be informed of all such data corruptions.
*/
- if (vma->vm_mm == t->mm)
- add_to_kill_anon_file(t, page, vma, to_kill);
+ if (vma->vm_mm != t->mm)
+ continue;
+ addr = page_address_in_vma(page, vma);
+ add_to_kill_anon_file(t, page, vma, to_kill, addr);
}
}
rcu_read_unlock();
@@ -670,7 +683,8 @@ static void add_to_kill_fsdax(struct task_struct *tsk, struct page *p,
struct vm_area_struct *vma,
struct list_head *to_kill, pgoff_t pgoff)
{
- __add_to_kill(tsk, p, vma, to_kill, 0, pgoff);
+ unsigned long addr = vma_address(vma, pgoff, 1);
+ __add_to_kill(tsk, p, vma, to_kill, addr);
}
/*
@@ -715,9 +729,9 @@ static void collect_procs(struct folio *folio, struct page *page,
{
if (!folio->mapping)
return;
- if (unlikely(PageKsm(page)))
- collect_procs_ksm(page, tokill, force_early);
- else if (PageAnon(page))
+ if (unlikely(folio_test_ksm(folio)))
+ collect_procs_ksm(folio, page, tokill, force_early);
+ else if (folio_test_anon(folio))
collect_procs_anon(folio, page, tokill, force_early);
else
collect_procs_file(folio, page, tokill, force_early);
@@ -1077,7 +1091,8 @@ out:
*/
static int me_pagecache_dirty(struct page_state *ps, struct page *p)
{
- struct address_space *mapping = page_mapping(p);
+ struct folio *folio = page_folio(p);
+ struct address_space *mapping = folio_mapping(folio);
SetPageError(p);
/* TBD: print more information about the file. */
@@ -1239,7 +1254,6 @@ static int me_huge_page(struct page_state *ps, struct page *p)
#define mlock (1UL << PG_mlocked)
#define lru (1UL << PG_lru)
#define head (1UL << PG_head)
-#define slab (1UL << PG_slab)
#define reserved (1UL << PG_reserved)
static struct page_state error_states[] = {
@@ -1249,13 +1263,6 @@ static struct page_state error_states[] = {
* PG_buddy pages only make a small fraction of all free pages.
*/
- /*
- * Could in theory check if slab page is free or if we can drop
- * currently unused objects without touching them. But just
- * treat it as standard kernel for now.
- */
- { slab, slab, MF_MSG_SLAB, me_kernel },
-
{ head, head, MF_MSG_HUGE, me_huge_page },
{ sc|dirty, sc|dirty, MF_MSG_DIRTY_SWAPCACHE, me_swapcache_dirty },
@@ -1282,7 +1289,6 @@ static struct page_state error_states[] = {
#undef mlock
#undef lru
#undef head
-#undef slab
#undef reserved
static void update_per_node_mf_stats(unsigned long pfn,
@@ -1555,24 +1561,24 @@ static int get_hwpoison_page(struct page *p, unsigned long flags)
* Do all that is necessary to remove user space mappings. Unmap
* the pages and send SIGBUS to the processes if the data was dirty.
*/
-static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
- int flags, struct page *hpage)
+static bool hwpoison_user_mappings(struct folio *folio, struct page *p,
+ unsigned long pfn, int flags)
{
- struct folio *folio = page_folio(hpage);
enum ttu_flags ttu = TTU_IGNORE_MLOCK | TTU_SYNC | TTU_HWPOISON;
struct address_space *mapping;
LIST_HEAD(tokill);
bool unmap_success;
int forcekill;
- bool mlocked = PageMlocked(hpage);
+ bool mlocked = folio_test_mlocked(folio);
/*
* Here we are interested only in user-mapped pages, so skip any
* other types of pages.
*/
- if (PageReserved(p) || PageSlab(p) || PageTable(p) || PageOffline(p))
+ if (folio_test_reserved(folio) || folio_test_slab(folio) ||
+ folio_test_pgtable(folio) || folio_test_offline(folio))
return true;
- if (!(PageLRU(hpage) || PageHuge(p)))
+ if (!(folio_test_lru(folio) || folio_test_hugetlb(folio)))
return true;
/*
@@ -1582,7 +1588,7 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
if (!page_mapped(p))
return true;
- if (PageSwapCache(p)) {
+ if (folio_test_swapcache(folio)) {
pr_err("%#lx: keeping poisoned page in swap cache\n", pfn);
ttu &= ~TTU_HWPOISON;
}
@@ -1593,11 +1599,11 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
* XXX: the dirty test could be racy: set_page_dirty() may not always
* be called inside page lock (it's recommended but not enforced).
*/
- mapping = page_mapping(hpage);
- if (!(flags & MF_MUST_KILL) && !PageDirty(hpage) && mapping &&
+ mapping = folio_mapping(folio);
+ if (!(flags & MF_MUST_KILL) && !folio_test_dirty(folio) && mapping &&
mapping_can_writeback(mapping)) {
- if (page_mkclean(hpage)) {
- SetPageDirty(hpage);
+ if (folio_mkclean(folio)) {
+ folio_set_dirty(folio);
} else {
ttu &= ~TTU_HWPOISON;
pr_info("%#lx: corrupted page was clean: dropped without side effects\n",
@@ -1612,7 +1618,7 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
*/
collect_procs(folio, p, &tokill, flags & MF_ACTION_REQUIRED);
- if (PageHuge(hpage) && !PageAnon(hpage)) {
+ if (folio_test_hugetlb(folio) && !folio_test_anon(folio)) {
/*
* For hugetlb pages in shared mappings, try_to_unmap
* could potentially call huge_pmd_unshare. Because of
@@ -1620,7 +1626,7 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
* TTU_RMAP_LOCKED to indicate we have taken the lock
* at this higher level.
*/
- mapping = hugetlb_page_mapping_lock_write(hpage);
+ mapping = hugetlb_folio_mapping_lock_write(folio);
if (mapping) {
try_to_unmap(folio, ttu|TTU_RMAP_LOCKED);
i_mmap_unlock_write(mapping);
@@ -1632,15 +1638,15 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
unmap_success = !page_mapped(p);
if (!unmap_success)
- pr_err("%#lx: failed to unmap page (mapcount=%d)\n",
- pfn, page_mapcount(p));
+ pr_err("%#lx: failed to unmap page (folio mapcount=%d)\n",
+ pfn, folio_mapcount(page_folio(p)));
/*
* try_to_unmap() might put mlocked page in lru cache, so call
* shake_page() again to ensure that it's flushed.
*/
if (mlocked)
- shake_page(hpage);
+ shake_folio(folio);
/*
* Now that the dirty bit has been propagated to the
@@ -1652,7 +1658,7 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
* use a more force-full uncatchable kill to prevent
* any accesses to the poisoned memory.
*/
- forcekill = PageDirty(hpage) || (flags & MF_MUST_KILL) ||
+ forcekill = folio_test_dirty(folio) || (flags & MF_MUST_KILL) ||
!unmap_success;
kill_procs(&tokill, forcekill, !unmap_success, pfn, flags);
@@ -2096,7 +2102,7 @@ retry:
page_flags = folio->flags;
- if (!hwpoison_user_mappings(p, pfn, flags, &folio->page)) {
+ if (!hwpoison_user_mappings(folio, p, pfn, flags)) {
folio_unlock(folio);
return action_result(pfn, MF_MSG_UNMAP_FAILED, MF_IGNORED);
}
@@ -2185,7 +2191,7 @@ out:
int memory_failure(unsigned long pfn, int flags)
{
struct page *p;
- struct page *hpage;
+ struct folio *folio;
struct dev_pagemap *pgmap;
int res = 0;
unsigned long page_flags;
@@ -2273,8 +2279,8 @@ try_again:
}
}
- hpage = compound_head(p);
- if (PageTransHuge(hpage)) {
+ folio = page_folio(p);
+ if (folio_test_large(folio)) {
/*
* The flag must be set after the refcount is bumped
* otherwise it may race with THP split.
@@ -2288,12 +2294,13 @@ try_again:
* or unhandlable page. The refcount is bumped iff the
* page is a valid handlable page.
*/
- SetPageHasHWPoisoned(hpage);
+ folio_set_has_hwpoisoned(folio);
if (try_to_split_thp_page(p) < 0) {
res = action_result(pfn, MF_MSG_UNSPLIT_THP, MF_IGNORED);
goto unlock_mutex;
}
VM_BUG_ON_PAGE(!page_count(p), p);
+ folio = page_folio(p);
}
/*
@@ -2304,9 +2311,9 @@ try_again:
* The check (unnecessarily) ignores LRU pages being isolated and
* walked by the page reclaim code, however that's not a big loss.
*/
- shake_page(p);
+ shake_folio(folio);
- lock_page(p);
+ folio_lock(folio);
/*
* We're only intended to deal with the non-Compound page here.
@@ -2314,11 +2321,11 @@ try_again:
* race window. If this happens, we could try again to hopefully
* handle the page next round.
*/
- if (PageCompound(p)) {
+ if (folio_test_large(folio)) {
if (retry) {
ClearPageHWPoison(p);
- unlock_page(p);
- put_page(p);
+ folio_unlock(folio);
+ folio_put(folio);
flags &= ~MF_COUNT_INCREASED;
retry = false;
goto try_again;
@@ -2334,35 +2341,35 @@ try_again:
* folio_remove_rmap_*() in try_to_unmap_one(). So to determine page
* status correctly, we save a copy of the page flags at this time.
*/
- page_flags = p->flags;
+ page_flags = folio->flags;
if (hwpoison_filter(p)) {
ClearPageHWPoison(p);
- unlock_page(p);
- put_page(p);
+ folio_unlock(folio);
+ folio_put(folio);
res = -EOPNOTSUPP;
goto unlock_mutex;
}
/*
- * __munlock_folio() may clear a writeback page's LRU flag without
- * page_lock. We need wait writeback completion for this page or it
- * may trigger vfs BUG while evict inode.
+ * __munlock_folio() may clear a writeback folio's LRU flag without
+ * the folio lock. We need to wait for writeback completion for this
+ * folio or it may trigger a vfs BUG while evicting inode.
*/
- if (!PageLRU(p) && !PageWriteback(p))
+ if (!folio_test_lru(folio) && !folio_test_writeback(folio))
goto identify_page_state;
/*
* It's very difficult to mess with pages currently under IO
* and in many cases impossible, so we just avoid it here.
*/
- wait_on_page_writeback(p);
+ folio_wait_writeback(folio);
/*
* Now take care of user space mappings.
* Abort on fail: __filemap_remove_folio() assumes unmapped page.
*/
- if (!hwpoison_user_mappings(p, pfn, flags, p)) {
+ if (!hwpoison_user_mappings(folio, p, pfn, flags)) {
res = action_result(pfn, MF_MSG_UNMAP_FAILED, MF_IGNORED);
goto unlock_page;
}
@@ -2370,7 +2377,8 @@ try_again:
/*
* Torn down by someone else?
*/
- if (PageLRU(p) && !PageSwapCache(p) && p->mapping == NULL) {
+ if (folio_test_lru(folio) && !folio_test_swapcache(folio) &&
+ folio->mapping == NULL) {
res = action_result(pfn, MF_MSG_TRUNCATED_LRU, MF_IGNORED);
goto unlock_page;
}
@@ -2380,7 +2388,7 @@ identify_page_state:
mutex_unlock(&mf_mutex);
return res;
unlock_page:
- unlock_page(p);
+ folio_unlock(folio);
unlock_mutex:
mutex_unlock(&mf_mutex);
return res;
@@ -2550,8 +2558,8 @@ int unpoison_memory(unsigned long pfn)
goto unlock_mutex;
}
- if (folio_test_slab(folio) || PageTable(&folio->page) ||
- folio_test_reserved(folio) || PageOffline(&folio->page))
+ if (folio_test_slab(folio) || folio_test_pgtable(folio) ||
+ folio_test_reserved(folio) || folio_test_offline(folio))
goto unlock_mutex;
/*
@@ -2572,7 +2580,7 @@ int unpoison_memory(unsigned long pfn)
ghp = get_hwpoison_page(p, MF_UNPOISON);
if (!ghp) {
- if (PageHuge(p)) {
+ if (folio_test_hugetlb(folio)) {
huge = true;
count = folio_free_raw_hwp(folio, false);
if (count == 0)
@@ -2588,7 +2596,7 @@ int unpoison_memory(unsigned long pfn)
pfn, &unpoison_rs);
}
} else {
- if (PageHuge(p)) {
+ if (folio_test_hugetlb(folio)) {
huge = true;
count = folio_free_raw_hwp(folio, false);
if (count == 0) {
@@ -2666,6 +2674,7 @@ static int soft_offline_in_use_page(struct page *page)
struct migration_target_control mtc = {
.nid = NUMA_NO_NODE,
.gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL,
+ .reason = MR_MEMORY_FAILURE,
};
if (!huge && folio_test_large(folio)) {
diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c
index 0537664620e5f5..6632102bd5c99d 100644
--- a/mm/memory-tiers.c
+++ b/mm/memory-tiers.c
@@ -36,6 +36,11 @@ struct node_memory_type_map {
static DEFINE_MUTEX(memory_tier_lock);
static LIST_HEAD(memory_tiers);
+/*
+ * The list is used to store all memory types that are not created
+ * by a device driver.
+ */
+static LIST_HEAD(default_memory_types);
static struct node_memory_type_map node_memory_types[MAX_NUMNODES];
struct memory_dev_type *default_dram_type;
@@ -108,6 +113,8 @@ static struct demotion_nodes *node_demotion __read_mostly;
static BLOCKING_NOTIFIER_HEAD(mt_adistance_algorithms);
+/* The lock is used to protect `default_dram_perf*` info and nid. */
+static DEFINE_MUTEX(default_dram_perf_lock);
static bool default_dram_perf_error;
static struct access_coordinate default_dram_perf;
static int default_dram_perf_ref_nid = NUMA_NO_NODE;
@@ -505,7 +512,8 @@ static inline void __init_node_memory_type(int node, struct memory_dev_type *mem
static struct memory_tier *set_node_memory_tier(int node)
{
struct memory_tier *memtier;
- struct memory_dev_type *memtype;
+ struct memory_dev_type *memtype = default_dram_type;
+ int adist = MEMTIER_ADISTANCE_DRAM;
pg_data_t *pgdat = NODE_DATA(node);
@@ -514,7 +522,16 @@ static struct memory_tier *set_node_memory_tier(int node)
if (!node_state(node, N_MEMORY))
return ERR_PTR(-EINVAL);
- __init_node_memory_type(node, default_dram_type);
+ mt_calc_adistance(node, &adist);
+ if (!node_memory_types[node].memtype) {
+ memtype = mt_find_alloc_memory_type(adist, &default_memory_types);
+ if (IS_ERR(memtype)) {
+ memtype = default_dram_type;
+ pr_info("Failed to allocate a memory type. Fall back.\n");
+ }
+ }
+
+ __init_node_memory_type(node, memtype);
memtype = node_memory_types[node].memtype;
node_set(node, memtype->nodes);
@@ -623,6 +640,64 @@ void clear_node_memory_type(int node, struct memory_dev_type *memtype)
}
EXPORT_SYMBOL_GPL(clear_node_memory_type);
+struct memory_dev_type *mt_find_alloc_memory_type(int adist, struct list_head *memory_types)
+{
+ struct memory_dev_type *mtype;
+
+ list_for_each_entry(mtype, memory_types, list)
+ if (mtype->adistance == adist)
+ return mtype;
+
+ mtype = alloc_memory_type(adist);
+ if (IS_ERR(mtype))
+ return mtype;
+
+ list_add(&mtype->list, memory_types);
+
+ return mtype;
+}
+EXPORT_SYMBOL_GPL(mt_find_alloc_memory_type);
+
+void mt_put_memory_types(struct list_head *memory_types)
+{
+ struct memory_dev_type *mtype, *mtn;
+
+ list_for_each_entry_safe(mtype, mtn, memory_types, list) {
+ list_del(&mtype->list);
+ put_memory_type(mtype);
+ }
+}
+EXPORT_SYMBOL_GPL(mt_put_memory_types);
+
+/*
+ * This is invoked via `late_initcall()` to initialize memory tiers for
+ * CPU-less memory nodes after driver initialization, which is
+ * expected to provide `adistance` algorithms.
+ */
+static int __init memory_tier_late_init(void)
+{
+ int nid;
+
+ guard(mutex)(&memory_tier_lock);
+ for_each_node_state(nid, N_MEMORY) {
+ /*
+ * Some device drivers may have initialized memory tiers
+ * between `memory_tier_init()` and `memory_tier_late_init()`,
+ * potentially bringing online memory nodes and
+ * configuring memory tiers. Exclude them here.
+ */
+ if (node_memory_types[nid].memtype)
+ continue;
+
+ set_node_memory_tier(nid);
+ }
+
+ establish_demotion_targets();
+
+ return 0;
+}
+late_initcall(memory_tier_late_init);
+
static void dump_hmem_attrs(struct access_coordinate *coord, const char *prefix)
{
pr_info(
@@ -634,25 +709,19 @@ static void dump_hmem_attrs(struct access_coordinate *coord, const char *prefix)
int mt_set_default_dram_perf(int nid, struct access_coordinate *perf,
const char *source)
{
- int rc = 0;
-
- mutex_lock(&memory_tier_lock);
- if (default_dram_perf_error) {
- rc = -EIO;
- goto out;
- }
+ guard(mutex)(&default_dram_perf_lock);
+ if (default_dram_perf_error)
+ return -EIO;
if (perf->read_latency + perf->write_latency == 0 ||
- perf->read_bandwidth + perf->write_bandwidth == 0) {
- rc = -EINVAL;
- goto out;
- }
+ perf->read_bandwidth + perf->write_bandwidth == 0)
+ return -EINVAL;
if (default_dram_perf_ref_nid == NUMA_NO_NODE) {
default_dram_perf = *perf;
default_dram_perf_ref_nid = nid;
default_dram_perf_ref_source = kstrdup(source, GFP_KERNEL);
- goto out;
+ return 0;
}
/*
@@ -680,27 +749,25 @@ int mt_set_default_dram_perf(int nid, struct access_coordinate *perf,
pr_info(
" disable default DRAM node performance based abstract distance algorithm.\n");
default_dram_perf_error = true;
- rc = -EINVAL;
+ return -EINVAL;
}
-out:
- mutex_unlock(&memory_tier_lock);
- return rc;
+ return 0;
}
int mt_perf_to_adistance(struct access_coordinate *perf, int *adist)
{
+ guard(mutex)(&default_dram_perf_lock);
if (default_dram_perf_error)
return -EIO;
- if (default_dram_perf_ref_nid == NUMA_NO_NODE)
- return -ENOENT;
-
if (perf->read_latency + perf->write_latency == 0 ||
perf->read_bandwidth + perf->write_bandwidth == 0)
return -EINVAL;
- mutex_lock(&memory_tier_lock);
+ if (default_dram_perf_ref_nid == NUMA_NO_NODE)
+ return -ENOENT;
+
/*
* The abstract distance of a memory node is in direct proportion to
* its memory latency (read + write) and inversely proportional to its
@@ -713,7 +780,6 @@ int mt_perf_to_adistance(struct access_coordinate *perf, int *adist)
(default_dram_perf.read_latency + default_dram_perf.write_latency) *
(default_dram_perf.read_bandwidth + default_dram_perf.write_bandwidth) /
(perf->read_bandwidth + perf->write_bandwidth);
- mutex_unlock(&memory_tier_lock);
return 0;
}
@@ -826,7 +892,8 @@ static int __init memory_tier_init(void)
* For now we can have 4 faster memory tiers with smaller adistance
* than default DRAM tier.
*/
- default_dram_type = alloc_memory_type(MEMTIER_ADISTANCE_DRAM);
+ default_dram_type = mt_find_alloc_memory_type(MEMTIER_ADISTANCE_DRAM,
+ &default_memory_types);
if (IS_ERR(default_dram_type))
panic("%s() failed to allocate default DRAM tier\n", __func__);
@@ -836,6 +903,14 @@ static int __init memory_tier_init(void)
* types assigned.
*/
for_each_node_state(node, N_MEMORY) {
+ if (!node_state(node, N_CPU))
+ /*
+ * Defer memory tier initialization on
+ * CPUless numa nodes. These will be initialized
+ * after firmware and devices are initialized.
+ */
+ continue;
+
memtier = set_node_memory_tier(node);
if (IS_ERR(memtier))
/*
diff --git a/mm/memory.c b/mm/memory.c
index f2bc6dd15eb830..eea6e4984eaefa 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -112,8 +112,10 @@ static bool vmf_pte_changed(struct vm_fault *vmf);
* Return true if the original pte was a uffd-wp pte marker (so the pte was
* wr-protected).
*/
-static bool vmf_orig_pte_uffd_wp(struct vm_fault *vmf)
+static __always_inline bool vmf_orig_pte_uffd_wp(struct vm_fault *vmf)
{
+ if (!userfaultfd_wp(vmf->vma))
+ return false;
if (!(vmf->flags & FAULT_FLAG_ORIG_PTE_VALID))
return false;
@@ -989,7 +991,7 @@ copy_present_ptes(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma
flags |= FPB_IGNORE_SOFT_DIRTY;
nr = folio_pte_batch(folio, addr, src_pte, pte, max_nr, flags,
- &any_writable);
+ &any_writable, NULL, NULL);
folio_ref_add(folio, nr);
if (folio_test_anon(folio)) {
if (unlikely(folio_try_dup_anon_rmap_ptes(folio, page,
@@ -1502,10 +1504,15 @@ static __always_inline void zap_present_folio_ptes(struct mmu_gather *tlb,
if (!delay_rmap) {
folio_remove_rmap_ptes(folio, page, nr, vma);
- /* Only sanity-check the first page in a batch. */
- if (unlikely(page_mapcount(page) < 0))
+ if (unlikely(folio_mapcount(folio) < 0))
print_bad_pte(vma, addr, ptent, page);
}
+
+ if (want_init_mlocked_on_free() && folio_test_mlocked(folio) &&
+ !delay_rmap && folio_test_anon(folio)) {
+ kernel_init_pages(page, folio_nr_pages(folio));
+ }
+
if (unlikely(__tlb_remove_folio_pages(tlb, page, nr, delay_rmap))) {
*force_flush = true;
*force_break = true;
@@ -1536,7 +1543,9 @@ static inline int zap_present_ptes(struct mmu_gather *tlb,
ptep_get_and_clear_full(mm, addr, pte, tlb->fullmm);
arch_check_zapped_pte(vma, ptent);
tlb_remove_tlb_entry(tlb, pte, addr);
- VM_WARN_ON_ONCE(userfaultfd_wp(vma));
+ if (userfaultfd_pte_wp(vma, ptent))
+ zap_install_uffd_wp_if_needed(vma, addr, pte, 1,
+ details, ptent);
ksm_might_unmap_zero_page(mm, ptent);
return 1;
}
@@ -1551,7 +1560,7 @@ static inline int zap_present_ptes(struct mmu_gather *tlb,
*/
if (unlikely(folio_test_large(folio) && max_nr != 1)) {
nr = folio_pte_batch(folio, addr, pte, ptent, max_nr, fpb_flags,
- NULL);
+ NULL, NULL, NULL);
zap_present_folio_ptes(tlb, vma, folio, page, pte, ptent, nr,
addr, details, rss, force_flush,
@@ -1629,12 +1638,13 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
folio_remove_rmap_pte(folio, page, vma);
folio_put(folio);
} else if (!non_swap_entry(entry)) {
- /* Genuine swap entry, hence a private anon page */
+ max_nr = (end - addr) / PAGE_SIZE;
+ nr = swap_pte_batch(pte, max_nr, ptent);
+ /* Genuine swap entries, hence a private anon pages */
if (!should_zap_cows(details))
continue;
- rss[MM_SWAPENTS]--;
- if (unlikely(!free_swap_and_cache(entry)))
- print_bad_pte(vma, addr, ptent, NULL);
+ rss[MM_SWAPENTS] -= nr;
+ free_swap_and_cache_nr(entry, nr);
} else if (is_migration_entry(entry)) {
folio = pfn_swap_entry_folio(entry);
if (!should_zap_folio(details, folio))
@@ -1657,8 +1667,8 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
pr_alert("unrecognized swap entry 0x%lx\n", entry.val);
WARN_ON_ONCE(1);
}
- pte_clear_not_present_full(mm, addr, pte, tlb->fullmm);
- zap_install_uffd_wp_if_needed(vma, addr, pte, 1, details, ptent);
+ clear_not_present_full_ptes(mm, addr, pte, nr, tlb->fullmm);
+ zap_install_uffd_wp_if_needed(vma, addr, pte, nr, details, ptent);
} while (pte += nr, addr += PAGE_SIZE * nr, addr != end);
add_mm_rss_vec(mm, rss);
@@ -2763,7 +2773,7 @@ static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud,
unsigned long next;
int err = 0;
- BUG_ON(pud_huge(*pud));
+ BUG_ON(pud_leaf(*pud));
if (create) {
pmd = pmd_alloc_track(mm, pud, addr, mask);
@@ -3204,19 +3214,39 @@ static inline vm_fault_t vmf_can_call_fault(const struct vm_fault *vmf)
return VM_FAULT_RETRY;
}
+/**
+ * vmf_anon_prepare - Prepare to handle an anonymous fault.
+ * @vmf: The vm_fault descriptor passed from the fault handler.
+ *
+ * When preparing to insert an anonymous page into a VMA from a
+ * fault handler, call this function rather than anon_vma_prepare().
+ * If this vma does not already have an associated anon_vma and we are
+ * only protected by the per-VMA lock, the caller must retry with the
+ * mmap_lock held. __anon_vma_prepare() will look at adjacent VMAs to
+ * determine if this VMA can share its anon_vma, and that's not safe to
+ * do with only the per-VMA lock held for this VMA.
+ *
+ * Return: 0 if fault handling can proceed. Any other value should be
+ * returned to the caller.
+ */
vm_fault_t vmf_anon_prepare(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
+ vm_fault_t ret = 0;
if (likely(vma->anon_vma))
return 0;
if (vmf->flags & FAULT_FLAG_VMA_LOCK) {
- vma_end_read(vma);
- return VM_FAULT_RETRY;
+ if (!mmap_read_trylock(vma->vm_mm)) {
+ vma_end_read(vma);
+ return VM_FAULT_RETRY;
+ }
}
if (__anon_vma_prepare(vma))
- return VM_FAULT_OOM;
- return 0;
+ ret = VM_FAULT_OOM;
+ if (vmf->flags & FAULT_FLAG_VMA_LOCK)
+ mmap_read_unlock(vma->vm_mm);
+ return ret;
}
/*
@@ -4188,7 +4218,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
* when reading from swap. This metadata may be indexed by swap entry
* so this must be called before swap_free().
*/
- arch_swap_restore(entry, folio);
+ arch_swap_restore(folio_swap(entry, folio), folio);
/*
* Remove the swap entry and conditionally try to free up the swapcache.
@@ -4324,8 +4354,8 @@ static struct folio *alloc_anon_folio(struct vm_fault *vmf)
* for this vma. Then filter out the orders that can't be allocated over
* the faulting address and still be fully contained in the vma.
*/
- orders = thp_vma_allowable_orders(vma, vma->vm_flags, false, true, true,
- BIT(PMD_ORDER) - 1);
+ orders = thp_vma_allowable_orders(vma, vma->vm_flags,
+ TVA_IN_PF | TVA_ENFORCE_SYSFS, BIT(PMD_ORDER) - 1);
orders = thp_vma_suitable_orders(vma, vmf->address, orders);
if (!orders)
@@ -4350,6 +4380,9 @@ static struct folio *alloc_anon_folio(struct vm_fault *vmf)
pte_unmap(pte);
+ if (!orders)
+ goto fallback;
+
/* Try allocating the highest of the remaining orders. */
gfp = vma_thp_gfp_mask(vma);
while (orders) {
@@ -4357,6 +4390,7 @@ static struct folio *alloc_anon_folio(struct vm_fault *vmf)
folio = vma_alloc_folio(gfp, order, vma, addr, true);
if (folio) {
if (mem_cgroup_charge(folio, vma->vm_mm, gfp)) {
+ count_mthp_stat(order, MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE);
folio_put(folio);
goto next;
}
@@ -4365,6 +4399,7 @@ static struct folio *alloc_anon_folio(struct vm_fault *vmf)
return folio;
}
next:
+ count_mthp_stat(order, MTHP_STAT_ANON_FAULT_FALLBACK);
order = next_order(&orders, order);
}
@@ -4380,7 +4415,6 @@ fallback:
*/
static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
{
- bool uffd_wp = vmf_orig_pte_uffd_wp(vmf);
struct vm_area_struct *vma = vmf->vma;
unsigned long addr = vmf->address;
struct folio *folio;
@@ -4425,8 +4459,9 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
}
/* Allocate our own private page. */
- if (unlikely(anon_vma_prepare(vma)))
- goto oom;
+ ret = vmf_anon_prepare(vmf);
+ if (ret)
+ return ret;
/* Returns NULL on OOM or ERR_PTR(-EAGAIN) if we must retry the fault */
folio = alloc_anon_folio(vmf);
if (IS_ERR(folio))
@@ -4474,10 +4509,13 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
folio_ref_add(folio, nr_pages - 1);
add_mm_counter(vma->vm_mm, MM_ANONPAGES, nr_pages);
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ count_mthp_stat(folio_order(folio), MTHP_STAT_ANON_FAULT_ALLOC);
+#endif
folio_add_new_anon_rmap(folio, vma, addr);
folio_add_lru_vma(folio, vma);
setpte:
- if (uffd_wp)
+ if (vmf_orig_pte_uffd_wp(vmf))
entry = pte_mkuffd_wp(entry);
set_ptes(vma->vm_mm, addr, vmf->pte, entry, nr_pages);
@@ -4652,7 +4690,6 @@ void set_pte_range(struct vm_fault *vmf, struct folio *folio,
struct page *page, unsigned int nr, unsigned long addr)
{
struct vm_area_struct *vma = vmf->vma;
- bool uffd_wp = vmf_orig_pte_uffd_wp(vmf);
bool write = vmf->flags & FAULT_FLAG_WRITE;
bool prefault = in_range(vmf->address, addr, nr * PAGE_SIZE);
pte_t entry;
@@ -4667,16 +4704,14 @@ void set_pte_range(struct vm_fault *vmf, struct folio *folio,
if (write)
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
- if (unlikely(uffd_wp))
+ if (unlikely(vmf_orig_pte_uffd_wp(vmf)))
entry = pte_mkuffd_wp(entry);
/* copy-on-write page */
if (write && !(vma->vm_flags & VM_SHARED)) {
- add_mm_counter(vma->vm_mm, MM_ANONPAGES, nr);
VM_BUG_ON_FOLIO(nr != 1, folio);
folio_add_new_anon_rmap(folio, vma, addr);
folio_add_lru_vma(folio, vma);
} else {
- add_mm_counter(vma->vm_mm, mm_counter_file(folio), nr);
folio_add_file_rmap_ptes(folio, page, nr, vma);
}
set_ptes(vma->vm_mm, addr, vmf->pte, entry, nr);
@@ -4713,9 +4748,11 @@ vm_fault_t finish_fault(struct vm_fault *vmf)
struct vm_area_struct *vma = vmf->vma;
struct page *page;
vm_fault_t ret;
+ bool is_cow = (vmf->flags & FAULT_FLAG_WRITE) &&
+ !(vma->vm_flags & VM_SHARED);
/* Did we COW the page? */
- if ((vmf->flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED))
+ if (is_cow)
page = vmf->cow_page;
else
page = vmf->page;
@@ -4751,8 +4788,10 @@ vm_fault_t finish_fault(struct vm_fault *vmf)
/* Re-check under ptl */
if (likely(!vmf_pte_changed(vmf))) {
struct folio *folio = page_folio(page);
+ int type = is_cow ? MM_ANONPAGES : mm_counter_file(folio);
set_pte_range(vmf, folio, page, 1, vmf->address);
+ add_mm_counter(vma->vm_mm, type, 1);
ret = 0;
} else {
update_mmu_tlb(vma, vmf->address, vmf->pte);
@@ -5033,9 +5072,11 @@ static vm_fault_t do_fault(struct vm_fault *vmf)
return ret;
}
-int numa_migrate_prep(struct folio *folio, struct vm_area_struct *vma,
+int numa_migrate_prep(struct folio *folio, struct vm_fault *vmf,
unsigned long addr, int page_nid, int *flags)
{
+ struct vm_area_struct *vma = vmf->vma;
+
folio_get(folio);
/* Record the current PID acceesing VMA */
@@ -5047,7 +5088,55 @@ int numa_migrate_prep(struct folio *folio, struct vm_area_struct *vma,
*flags |= TNF_FAULT_LOCAL;
}
- return mpol_misplaced(folio, vma, addr);
+ return mpol_misplaced(folio, vmf, addr);
+}
+
+static void numa_rebuild_single_mapping(struct vm_fault *vmf, struct vm_area_struct *vma,
+ unsigned long fault_addr, pte_t *fault_pte,
+ bool writable)
+{
+ pte_t pte, old_pte;
+
+ old_pte = ptep_modify_prot_start(vma, fault_addr, fault_pte);
+ pte = pte_modify(old_pte, vma->vm_page_prot);
+ pte = pte_mkyoung(pte);
+ if (writable)
+ pte = pte_mkwrite(pte, vma);
+ ptep_modify_prot_commit(vma, fault_addr, fault_pte, old_pte, pte);
+ update_mmu_cache_range(vmf, vma, fault_addr, fault_pte, 1);
+}
+
+static void numa_rebuild_large_mapping(struct vm_fault *vmf, struct vm_area_struct *vma,
+ struct folio *folio, pte_t fault_pte,
+ bool ignore_writable, bool pte_write_upgrade)
+{
+ int nr = pte_pfn(fault_pte) - folio_pfn(folio);
+ unsigned long start = max(vmf->address - nr * PAGE_SIZE, vma->vm_start);
+ unsigned long end = min(vmf->address + (folio_nr_pages(folio) - nr) * PAGE_SIZE, vma->vm_end);
+ pte_t *start_ptep = vmf->pte - (vmf->address - start) / PAGE_SIZE;
+ unsigned long addr;
+
+ /* Restore all PTEs' mapping of the large folio */
+ for (addr = start; addr != end; start_ptep++, addr += PAGE_SIZE) {
+ pte_t ptent = ptep_get(start_ptep);
+ bool writable = false;
+
+ if (!pte_present(ptent) || !pte_protnone(ptent))
+ continue;
+
+ if (pfn_folio(pte_pfn(ptent)) != folio)
+ continue;
+
+ if (!ignore_writable) {
+ ptent = pte_modify(ptent, vma->vm_page_prot);
+ writable = pte_write(ptent);
+ if (!writable && pte_write_upgrade &&
+ can_change_pte_writable(vma, addr, ptent))
+ writable = true;
+ }
+
+ numa_rebuild_single_mapping(vmf, vma, addr, start_ptep, writable);
+ }
}
static vm_fault_t do_numa_page(struct vm_fault *vmf)
@@ -5055,11 +5144,12 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
struct vm_area_struct *vma = vmf->vma;
struct folio *folio = NULL;
int nid = NUMA_NO_NODE;
- bool writable = false;
+ bool writable = false, ignore_writable = false;
+ bool pte_write_upgrade = vma_wants_manual_pte_write_upgrade(vma);
int last_cpupid;
int target_nid;
pte_t pte, old_pte;
- int flags = 0;
+ int flags = 0, nr_pages;
/*
* The pte cannot be used safely until we verify, while holding the page
@@ -5081,7 +5171,7 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
* is only valid while holding the PT lock.
*/
writable = pte_write(pte);
- if (!writable && vma_wants_manual_pte_write_upgrade(vma) &&
+ if (!writable && pte_write_upgrade &&
can_change_pte_writable(vma, vmf->address, pte))
writable = true;
@@ -5089,10 +5179,6 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
if (!folio || folio_is_zone_device(folio))
goto out_map;
- /* TODO: handle PTE-mapped THP */
- if (folio_test_large(folio))
- goto out_map;
-
/*
* Avoid grouping on RO pages in general. RO pages shouldn't hurt as
* much anyway since they can be in shared cache state. This misses
@@ -5108,10 +5194,11 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
* Flag if the folio is shared between multiple address spaces. This
* is later used when determining whether to group tasks together
*/
- if (folio_estimated_sharers(folio) > 1 && (vma->vm_flags & VM_SHARED))
+ if (folio_likely_mapped_shared(folio) && (vma->vm_flags & VM_SHARED))
flags |= TNF_SHARED;
nid = folio_nid(folio);
+ nr_pages = folio_nr_pages(folio);
/*
* For memory tiering mode, cpupid of slow memory page is used
* to record page access time. So use default value.
@@ -5121,13 +5208,14 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
last_cpupid = (-1 & LAST_CPUPID_MASK);
else
last_cpupid = folio_last_cpupid(folio);
- target_nid = numa_migrate_prep(folio, vma, vmf->address, nid, &flags);
+ target_nid = numa_migrate_prep(folio, vmf, vmf->address, nid, &flags);
if (target_nid == NUMA_NO_NODE) {
folio_put(folio);
goto out_map;
}
pte_unmap_unlock(vmf->pte, vmf->ptl);
writable = false;
+ ignore_writable = true;
/* Migrate to the requested node */
if (migrate_misplaced_folio(folio, vma, target_nid)) {
@@ -5148,20 +5236,19 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
out:
if (nid != NUMA_NO_NODE)
- task_numa_fault(last_cpupid, nid, 1, flags);
+ task_numa_fault(last_cpupid, nid, nr_pages, flags);
return 0;
out_map:
/*
* Make it present again, depending on how arch implements
* non-accessible ptes, some can allow access by kernel mode.
*/
- old_pte = ptep_modify_prot_start(vma, vmf->address, vmf->pte);
- pte = pte_modify(old_pte, vma->vm_page_prot);
- pte = pte_mkyoung(pte);
- if (writable)
- pte = pte_mkwrite(pte, vma);
- ptep_modify_prot_commit(vma, vmf->address, vmf->pte, old_pte, pte);
- update_mmu_cache_range(vmf, vma, vmf->address, vmf->pte, 1);
+ if (folio && folio_test_large(folio))
+ numa_rebuild_large_mapping(vmf, vma, folio, pte, ignore_writable,
+ pte_write_upgrade);
+ else
+ numa_rebuild_single_mapping(vmf, vma, vmf->address, vmf->pte,
+ writable);
pte_unmap_unlock(vmf->pte, vmf->ptl);
goto out;
}
@@ -5372,7 +5459,8 @@ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma,
return VM_FAULT_OOM;
retry_pud:
if (pud_none(*vmf.pud) &&
- thp_vma_allowable_order(vma, vm_flags, false, true, true, PUD_ORDER)) {
+ thp_vma_allowable_order(vma, vm_flags,
+ TVA_IN_PF | TVA_ENFORCE_SYSFS, PUD_ORDER)) {
ret = create_huge_pud(&vmf);
if (!(ret & VM_FAULT_FALLBACK))
return ret;
@@ -5406,7 +5494,8 @@ retry_pud:
goto retry_pud;
if (pmd_none(*vmf.pmd) &&
- thp_vma_allowable_order(vma, vm_flags, false, true, true, PMD_ORDER)) {
+ thp_vma_allowable_order(vma, vm_flags,
+ TVA_IN_PF | TVA_ENFORCE_SYSFS, PMD_ORDER)) {
ret = create_huge_pmd(&vmf);
if (!(ret & VM_FAULT_FALLBACK))
return ret;
@@ -5760,15 +5849,6 @@ retry:
if (!vma_start_read(vma))
goto inval;
- /*
- * find_mergeable_anon_vma uses adjacent vmas which are not locked.
- * This check must happen after vma_start_read(); otherwise, a
- * concurrent mremap() with MREMAP_DONTUNMAP could dissociate the VMA
- * from its anon_vma.
- */
- if (unlikely(vma_is_anonymous(vma) && !vma->anon_vma))
- goto inval_end_read;
-
/* Check since vm_start/vm_end might change before we lock the VMA */
if (unlikely(address < vma->vm_start || address >= vma->vm_end))
goto inval_end_read;
@@ -5866,34 +5946,48 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
/**
* follow_pte - look up PTE at a user virtual address
- * @mm: the mm_struct of the target address space
+ * @vma: the memory mapping
* @address: user virtual address
* @ptepp: location to store found PTE
* @ptlp: location to store the lock for the PTE
*
* On a successful return, the pointer to the PTE is stored in @ptepp;
* the corresponding lock is taken and its location is stored in @ptlp.
- * The contents of the PTE are only stable until @ptlp is released;
- * any further use, if any, must be protected against invalidation
- * with MMU notifiers.
+ *
+ * The contents of the PTE are only stable until @ptlp is released using
+ * pte_unmap_unlock(). This function will fail if the PTE is non-present.
+ * Present PTEs may include PTEs that map refcounted pages, such as
+ * anonymous folios in COW mappings.
+ *
+ * Callers must be careful when relying on PTE content after
+ * pte_unmap_unlock(). Especially if the PTE maps a refcounted page,
+ * callers must protect against invalidation with MMU notifiers; otherwise
+ * access to the PFN at a later point in time can trigger use-after-free.
*
* Only IO mappings and raw PFN mappings are allowed. The mmap semaphore
* should be taken for read.
*
- * KVM uses this function. While it is arguably less bad than ``follow_pfn``,
- * it is not a good general-purpose API.
+ * This function must not be used to modify PTE content.
*
* Return: zero on success, -ve otherwise.
*/
-int follow_pte(struct mm_struct *mm, unsigned long address,
+int follow_pte(struct vm_area_struct *vma, unsigned long address,
pte_t **ptepp, spinlock_t **ptlp)
{
+ struct mm_struct *mm = vma->vm_mm;
pgd_t *pgd;
p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *ptep;
+ mmap_assert_locked(mm);
+ if (unlikely(address < vma->vm_start || address >= vma->vm_end))
+ goto out;
+
+ if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
+ goto out;
+
pgd = pgd_offset(mm, address);
if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
goto out;
@@ -5923,67 +6017,7 @@ out:
}
EXPORT_SYMBOL_GPL(follow_pte);
-/**
- * follow_pfn - look up PFN at a user virtual address
- * @vma: memory mapping
- * @address: user virtual address
- * @pfn: location to store found PFN
- *
- * Only IO mappings and raw PFN mappings are allowed.
- *
- * This function does not allow the caller to read the permissions
- * of the PTE. Do not use it.
- *
- * Return: zero and the pfn at @pfn on success, -ve otherwise.
- */
-int follow_pfn(struct vm_area_struct *vma, unsigned long address,
- unsigned long *pfn)
-{
- int ret = -EINVAL;
- spinlock_t *ptl;
- pte_t *ptep;
-
- if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
- return ret;
-
- ret = follow_pte(vma->vm_mm, address, &ptep, &ptl);
- if (ret)
- return ret;
- *pfn = pte_pfn(ptep_get(ptep));
- pte_unmap_unlock(ptep, ptl);
- return 0;
-}
-EXPORT_SYMBOL(follow_pfn);
-
#ifdef CONFIG_HAVE_IOREMAP_PROT
-int follow_phys(struct vm_area_struct *vma,
- unsigned long address, unsigned int flags,
- unsigned long *prot, resource_size_t *phys)
-{
- int ret = -EINVAL;
- pte_t *ptep, pte;
- spinlock_t *ptl;
-
- if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
- goto out;
-
- if (follow_pte(vma->vm_mm, address, &ptep, &ptl))
- goto out;
- pte = ptep_get(ptep);
-
- if ((flags & FOLL_WRITE) && !pte_write(pte))
- goto unlock;
-
- *prot = pgprot_val(pte_pgprot(pte));
- *phys = (resource_size_t)pte_pfn(pte) << PAGE_SHIFT;
-
- ret = 0;
-unlock:
- pte_unmap_unlock(ptep, ptl);
-out:
- return ret;
-}
-
/**
* generic_access_phys - generic implementation for iomem mmap access
* @vma: the vma to access
@@ -6007,11 +6041,8 @@ int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
int offset = offset_in_page(addr);
int ret = -EINVAL;
- if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
- return -EINVAL;
-
retry:
- if (follow_pte(vma->vm_mm, addr, &ptep, &ptl))
+ if (follow_pte(vma, addr, &ptep, &ptl))
return -EINVAL;
pte = ptep_get(ptep);
pte_unmap_unlock(ptep, ptl);
@@ -6026,7 +6057,7 @@ retry:
if (!maddr)
return -ENOMEM;
- if (follow_pte(vma->vm_mm, addr, &ptep, &ptl))
+ if (follow_pte(vma, addr, &ptep, &ptl))
goto out_unmap;
if (!pte_same(pte, ptep_get(ptep))) {
@@ -6434,3 +6465,15 @@ void ptlock_free(struct ptdesc *ptdesc)
kmem_cache_free(page_ptl_cachep, ptdesc->ptl);
}
#endif
+
+void vma_pgtable_walk_begin(struct vm_area_struct *vma)
+{
+ if (is_vm_hugetlb_page(vma))
+ hugetlb_vma_lock_read(vma);
+}
+
+void vma_pgtable_walk_end(struct vm_area_struct *vma)
+{
+ if (is_vm_hugetlb_page(vma))
+ hugetlb_vma_unlock_read(vma);
+}
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index a444e2d7dd2bff..431b1f6753c0b9 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1841,6 +1841,7 @@ static void do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
struct migration_target_control mtc = {
.nmask = &nmask,
.gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL,
+ .reason = MR_MEMORY_HOTPLUG,
};
int ret;
@@ -2050,11 +2051,11 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages,
}
/*
- * Dissolve free hugepages in the memory block before doing
+ * Dissolve free hugetlb folios in the memory block before doing
* offlining actually in order to make hugetlbfs's object
* counting consistent.
*/
- ret = dissolve_free_huge_pages(start_pfn, end_pfn);
+ ret = dissolve_free_hugetlb_folios(start_pfn, end_pfn);
if (ret) {
reason = "failure to dissolve huge pages";
goto failed_removal_isolated;
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 0fe77738d971de..aec756ae563779 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -509,8 +509,8 @@ static void queue_folios_pmd(pmd_t *pmd, struct mm_walk *walk)
qp->nr_failed++;
return;
}
- folio = pfn_folio(pmd_pfn(*pmd));
- if (is_huge_zero_page(&folio->page)) {
+ folio = pmd_folio(*pmd);
+ if (is_huge_zero_folio(folio)) {
walk->action = ACTION_CONTINUE;
return;
}
@@ -642,12 +642,11 @@ static int queue_folios_hugetlb(pte_t *pte, unsigned long hmask,
* Unless MPOL_MF_MOVE_ALL, we try to avoid migrating a shared folio.
* Choosing not to migrate a shared folio is not counted as a failure.
*
- * To check if the folio is shared, ideally we want to make sure
- * every page is mapped to the same process. Doing that is very
- * expensive, so check the estimated sharers of the folio instead.
+ * See folio_likely_mapped_shared() on possible imprecision when we
+ * cannot easily detect if a folio is shared.
*/
if ((flags & MPOL_MF_MOVE_ALL) ||
- (folio_estimated_sharers(folio) == 1 && !hugetlb_pmd_shared(pte)))
+ (!folio_likely_mapped_shared(folio) && !hugetlb_pmd_shared(pte)))
if (!isolate_hugetlb(folio, qp->pagelist))
qp->nr_failed++;
unlock:
@@ -1032,11 +1031,10 @@ static bool migrate_folio_add(struct folio *folio, struct list_head *foliolist,
* Unless MPOL_MF_MOVE_ALL, we try to avoid migrating a shared folio.
* Choosing not to migrate a shared folio is not counted as a failure.
*
- * To check if the folio is shared, ideally we want to make sure
- * every page is mapped to the same process. Doing that is very
- * expensive, so check the estimated sharers of the folio instead.
+ * See folio_likely_mapped_shared() on possible imprecision when we
+ * cannot easily detect if a folio is shared.
*/
- if ((flags & MPOL_MF_MOVE_ALL) || folio_estimated_sharers(folio) == 1) {
+ if ((flags & MPOL_MF_MOVE_ALL) || !folio_likely_mapped_shared(folio)) {
if (folio_isolate_lru(folio)) {
list_add_tail(&folio->lru, foliolist);
node_stat_mod_folio(folio,
@@ -1070,6 +1068,7 @@ static long migrate_to_node(struct mm_struct *mm, int source, int dest,
struct migration_target_control mtc = {
.nid = dest,
.gfp_mask = GFP_HIGHUSER_MOVABLE | __GFP_THISNODE,
+ .reason = MR_SYSCALL,
};
nodes_clear(nmask);
@@ -1227,7 +1226,8 @@ static struct folio *alloc_migration_target_by_mpol(struct folio *src,
h = folio_hstate(src);
gfp = htlb_alloc_mask(h);
nodemask = policy_nodemask(gfp, pol, ilx, &nid);
- return alloc_hugetlb_folio_nodemask(h, nid, nodemask, gfp);
+ return alloc_hugetlb_folio_nodemask(h, nid, nodemask, gfp,
+ htlb_allow_alloc_fallback(MR_MEMPOLICY_MBIND));
}
if (folio_test_large(src))
@@ -1504,9 +1504,10 @@ static inline int sanitize_mpol_flags(int *mode, unsigned short *flags)
if ((*flags & MPOL_F_STATIC_NODES) && (*flags & MPOL_F_RELATIVE_NODES))
return -EINVAL;
if (*flags & MPOL_F_NUMA_BALANCING) {
- if (*mode != MPOL_BIND)
+ if (*mode == MPOL_BIND || *mode == MPOL_PREFERRED_MANY)
+ *flags |= (MPOL_F_MOF | MPOL_F_MORON);
+ else
return -EINVAL;
- *flags |= (MPOL_F_MOF | MPOL_F_MORON);
}
return 0;
}
@@ -2200,9 +2201,9 @@ static struct page *alloc_pages_preferred_many(gfp_t gfp, unsigned int order,
*/
preferred_gfp = gfp | __GFP_NOWARN;
preferred_gfp &= ~(__GFP_DIRECT_RECLAIM | __GFP_NOFAIL);
- page = __alloc_pages(preferred_gfp, order, nid, nodemask);
+ page = __alloc_pages_noprof(preferred_gfp, order, nid, nodemask);
if (!page)
- page = __alloc_pages(gfp, order, nid, NULL);
+ page = __alloc_pages_noprof(gfp, order, nid, NULL);
return page;
}
@@ -2217,7 +2218,7 @@ static struct page *alloc_pages_preferred_many(gfp_t gfp, unsigned int order,
*
* Return: The page on success or NULL if allocation fails.
*/
-struct page *alloc_pages_mpol(gfp_t gfp, unsigned int order,
+struct page *alloc_pages_mpol_noprof(gfp_t gfp, unsigned int order,
struct mempolicy *pol, pgoff_t ilx, int nid)
{
nodemask_t *nodemask;
@@ -2248,7 +2249,7 @@ struct page *alloc_pages_mpol(gfp_t gfp, unsigned int order,
* First, try to allocate THP only on local node, but
* don't reclaim unnecessarily, just compact.
*/
- page = __alloc_pages_node(nid,
+ page = __alloc_pages_node_noprof(nid,
gfp | __GFP_THISNODE | __GFP_NORETRY, order);
if (page || !(gfp & __GFP_DIRECT_RECLAIM))
return page;
@@ -2261,7 +2262,7 @@ struct page *alloc_pages_mpol(gfp_t gfp, unsigned int order,
}
}
- page = __alloc_pages(gfp, order, nid, nodemask);
+ page = __alloc_pages_noprof(gfp, order, nid, nodemask);
if (unlikely(pol->mode == MPOL_INTERLEAVE) && page) {
/* skip NUMA_INTERLEAVE_HIT update if numa stats is disabled */
@@ -2292,7 +2293,7 @@ struct page *alloc_pages_mpol(gfp_t gfp, unsigned int order,
*
* Return: The folio on success or NULL if allocation fails.
*/
-struct folio *vma_alloc_folio(gfp_t gfp, int order, struct vm_area_struct *vma,
+struct folio *vma_alloc_folio_noprof(gfp_t gfp, int order, struct vm_area_struct *vma,
unsigned long addr, bool hugepage)
{
struct mempolicy *pol;
@@ -2300,12 +2301,12 @@ struct folio *vma_alloc_folio(gfp_t gfp, int order, struct vm_area_struct *vma,
struct page *page;
pol = get_vma_policy(vma, addr, order, &ilx);
- page = alloc_pages_mpol(gfp | __GFP_COMP, order,
- pol, ilx, numa_node_id());
+ page = alloc_pages_mpol_noprof(gfp | __GFP_COMP, order,
+ pol, ilx, numa_node_id());
mpol_cond_put(pol);
return page_rmappable_folio(page);
}
-EXPORT_SYMBOL(vma_alloc_folio);
+EXPORT_SYMBOL(vma_alloc_folio_noprof);
/**
* alloc_pages - Allocate pages.
@@ -2321,7 +2322,7 @@ EXPORT_SYMBOL(vma_alloc_folio);
* flags are used.
* Return: The page on success or NULL if allocation fails.
*/
-struct page *alloc_pages(gfp_t gfp, unsigned int order)
+struct page *alloc_pages_noprof(gfp_t gfp, unsigned int order)
{
struct mempolicy *pol = &default_policy;
@@ -2332,16 +2333,16 @@ struct page *alloc_pages(gfp_t gfp, unsigned int order)
if (!in_interrupt() && !(gfp & __GFP_THISNODE))
pol = get_task_policy(current);
- return alloc_pages_mpol(gfp, order,
- pol, NO_INTERLEAVE_INDEX, numa_node_id());
+ return alloc_pages_mpol_noprof(gfp, order, pol, NO_INTERLEAVE_INDEX,
+ numa_node_id());
}
-EXPORT_SYMBOL(alloc_pages);
+EXPORT_SYMBOL(alloc_pages_noprof);
-struct folio *folio_alloc(gfp_t gfp, unsigned int order)
+struct folio *folio_alloc_noprof(gfp_t gfp, unsigned int order)
{
- return page_rmappable_folio(alloc_pages(gfp | __GFP_COMP, order));
+ return page_rmappable_folio(alloc_pages_noprof(gfp | __GFP_COMP, order));
}
-EXPORT_SYMBOL(folio_alloc);
+EXPORT_SYMBOL(folio_alloc_noprof);
static unsigned long alloc_pages_bulk_array_interleave(gfp_t gfp,
struct mempolicy *pol, unsigned long nr_pages,
@@ -2360,13 +2361,13 @@ static unsigned long alloc_pages_bulk_array_interleave(gfp_t gfp,
for (i = 0; i < nodes; i++) {
if (delta) {
- nr_allocated = __alloc_pages_bulk(gfp,
+ nr_allocated = alloc_pages_bulk_noprof(gfp,
interleave_nodes(pol), NULL,
nr_pages_per_node + 1, NULL,
page_array);
delta--;
} else {
- nr_allocated = __alloc_pages_bulk(gfp,
+ nr_allocated = alloc_pages_bulk_noprof(gfp,
interleave_nodes(pol), NULL,
nr_pages_per_node, NULL, page_array);
}
@@ -2503,11 +2504,11 @@ static unsigned long alloc_pages_bulk_array_preferred_many(gfp_t gfp, int nid,
preferred_gfp = gfp | __GFP_NOWARN;
preferred_gfp &= ~(__GFP_DIRECT_RECLAIM | __GFP_NOFAIL);
- nr_allocated = __alloc_pages_bulk(preferred_gfp, nid, &pol->nodes,
+ nr_allocated = alloc_pages_bulk_noprof(preferred_gfp, nid, &pol->nodes,
nr_pages, NULL, page_array);
if (nr_allocated < nr_pages)
- nr_allocated += __alloc_pages_bulk(gfp, numa_node_id(), NULL,
+ nr_allocated += alloc_pages_bulk_noprof(gfp, numa_node_id(), NULL,
nr_pages - nr_allocated, NULL,
page_array + nr_allocated);
return nr_allocated;
@@ -2519,7 +2520,7 @@ static unsigned long alloc_pages_bulk_array_preferred_many(gfp_t gfp, int nid,
* It can accelerate memory allocation especially interleaving
* allocate memory.
*/
-unsigned long alloc_pages_bulk_array_mempolicy(gfp_t gfp,
+unsigned long alloc_pages_bulk_array_mempolicy_noprof(gfp_t gfp,
unsigned long nr_pages, struct page **page_array)
{
struct mempolicy *pol = &default_policy;
@@ -2543,8 +2544,8 @@ unsigned long alloc_pages_bulk_array_mempolicy(gfp_t gfp,
nid = numa_node_id();
nodemask = policy_nodemask(gfp, pol, NO_INTERLEAVE_INDEX, &nid);
- return __alloc_pages_bulk(gfp, nid, nodemask,
- nr_pages, NULL, page_array);
+ return alloc_pages_bulk_noprof(gfp, nid, nodemask,
+ nr_pages, NULL, page_array);
}
int vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst)
@@ -2718,7 +2719,7 @@ static void sp_free(struct sp_node *n)
* mpol_misplaced - check whether current folio node is valid in policy
*
* @folio: folio to be checked
- * @vma: vm area where folio mapped
+ * @vmf: structure describing the fault
* @addr: virtual address in @vma for shared policy lookup and interleave policy
*
* Lookup current policy node id for vma,addr and "compare to" folio's
@@ -2728,18 +2729,24 @@ static void sp_free(struct sp_node *n)
* Return: NUMA_NO_NODE if the page is in a node that is valid for this
* policy, or a suitable node ID to allocate a replacement folio from.
*/
-int mpol_misplaced(struct folio *folio, struct vm_area_struct *vma,
+int mpol_misplaced(struct folio *folio, struct vm_fault *vmf,
unsigned long addr)
{
struct mempolicy *pol;
pgoff_t ilx;
struct zoneref *z;
int curnid = folio_nid(folio);
+ struct vm_area_struct *vma = vmf->vma;
int thiscpu = raw_smp_processor_id();
- int thisnid = cpu_to_node(thiscpu);
+ int thisnid = numa_node_id();
int polnid = NUMA_NO_NODE;
int ret = NUMA_NO_NODE;
+ /*
+ * Make sure ptl is held so that we don't preempt and we
+ * have a stable smp processor id
+ */
+ lockdep_assert_held(vmf->ptl);
pol = get_vma_policy(vma, addr, folio_order(folio), &ilx);
if (!(pol->flags & MPOL_F_MOF))
goto out;
@@ -2764,15 +2771,26 @@ int mpol_misplaced(struct folio *folio, struct vm_area_struct *vma,
break;
case MPOL_BIND:
- /* Optimize placement among multiple nodes via NUMA balancing */
+ case MPOL_PREFERRED_MANY:
+ /*
+ * Even though MPOL_PREFERRED_MANY can allocate pages outside
+ * policy nodemask we don't allow numa migration to nodes
+ * outside policy nodemask for now. This is done so that if we
+ * want demotion to slow memory to happen, before allocating
+ * from some DRAM node say 'x', we will end up using a
+ * MPOL_PREFERRED_MANY mask excluding node 'x'. In such scenario
+ * we should not promote to node 'x' from slow memory node.
+ */
if (pol->flags & MPOL_F_MORON) {
+ /*
+ * Optimize placement among multiple nodes
+ * via NUMA balancing
+ */
if (node_isset(thisnid, pol->nodes))
break;
goto out;
}
- fallthrough;
- case MPOL_PREFERRED_MANY:
/*
* use current page if in policy nodemask,
* else select nearest allowed node, if any.
@@ -2781,7 +2799,7 @@ int mpol_misplaced(struct folio *folio, struct vm_area_struct *vma,
if (node_isset(curnid, pol->nodes))
goto out;
z = first_zones_zonelist(
- node_zonelist(numa_node_id(), GFP_HIGHUSER),
+ node_zonelist(thisnid, GFP_HIGHUSER),
gfp_zone(GFP_HIGHUSER),
&pol->nodes);
polnid = zone_to_nid(z->zone);
diff --git a/mm/mempool.c b/mm/mempool.c
index 076c736f5f1ff8..6ece63a00acf58 100644
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -240,22 +240,24 @@ EXPORT_SYMBOL(mempool_init_node);
*
* Return: %0 on success, negative error code otherwise.
*/
-int mempool_init(mempool_t *pool, int min_nr, mempool_alloc_t *alloc_fn,
- mempool_free_t *free_fn, void *pool_data)
+int mempool_init_noprof(mempool_t *pool, int min_nr, mempool_alloc_t *alloc_fn,
+ mempool_free_t *free_fn, void *pool_data)
{
return mempool_init_node(pool, min_nr, alloc_fn, free_fn,
pool_data, GFP_KERNEL, NUMA_NO_NODE);
}
-EXPORT_SYMBOL(mempool_init);
+EXPORT_SYMBOL(mempool_init_noprof);
/**
- * mempool_create - create a memory pool
+ * mempool_create_node - create a memory pool
* @min_nr: the minimum number of elements guaranteed to be
* allocated for this pool.
* @alloc_fn: user-defined element-allocation function.
* @free_fn: user-defined element-freeing function.
* @pool_data: optional private data available to the user-defined functions.
+ * @gfp_mask: memory allocation flags
+ * @node_id: numa node to allocate on
*
* this function creates and allocates a guaranteed size, preallocated
* memory pool. The pool can be used from the mempool_alloc() and mempool_free()
@@ -265,17 +267,9 @@ EXPORT_SYMBOL(mempool_init);
*
* Return: pointer to the created memory pool object or %NULL on error.
*/
-mempool_t *mempool_create(int min_nr, mempool_alloc_t *alloc_fn,
- mempool_free_t *free_fn, void *pool_data)
-{
- return mempool_create_node(min_nr, alloc_fn, free_fn, pool_data,
- GFP_KERNEL, NUMA_NO_NODE);
-}
-EXPORT_SYMBOL(mempool_create);
-
-mempool_t *mempool_create_node(int min_nr, mempool_alloc_t *alloc_fn,
- mempool_free_t *free_fn, void *pool_data,
- gfp_t gfp_mask, int node_id)
+mempool_t *mempool_create_node_noprof(int min_nr, mempool_alloc_t *alloc_fn,
+ mempool_free_t *free_fn, void *pool_data,
+ gfp_t gfp_mask, int node_id)
{
mempool_t *pool;
@@ -291,7 +285,7 @@ mempool_t *mempool_create_node(int min_nr, mempool_alloc_t *alloc_fn,
return pool;
}
-EXPORT_SYMBOL(mempool_create_node);
+EXPORT_SYMBOL(mempool_create_node_noprof);
/**
* mempool_resize - resize an existing memory pool
@@ -387,7 +381,7 @@ EXPORT_SYMBOL(mempool_resize);
*
* Return: pointer to the allocated element or %NULL on error.
*/
-void *mempool_alloc(mempool_t *pool, gfp_t gfp_mask)
+void *mempool_alloc_noprof(mempool_t *pool, gfp_t gfp_mask)
{
void *element;
unsigned long flags;
@@ -454,7 +448,7 @@ repeat_alloc:
finish_wait(&pool->wait, &wait);
goto repeat_alloc;
}
-EXPORT_SYMBOL(mempool_alloc);
+EXPORT_SYMBOL(mempool_alloc_noprof);
/**
* mempool_alloc_preallocated - allocate an element from preallocated elements
@@ -562,7 +556,7 @@ void *mempool_alloc_slab(gfp_t gfp_mask, void *pool_data)
{
struct kmem_cache *mem = pool_data;
VM_BUG_ON(mem->ctor);
- return kmem_cache_alloc(mem, gfp_mask);
+ return kmem_cache_alloc_noprof(mem, gfp_mask);
}
EXPORT_SYMBOL(mempool_alloc_slab);
@@ -580,7 +574,7 @@ EXPORT_SYMBOL(mempool_free_slab);
void *mempool_kmalloc(gfp_t gfp_mask, void *pool_data)
{
size_t size = (size_t)pool_data;
- return kmalloc(size, gfp_mask);
+ return kmalloc_noprof(size, gfp_mask);
}
EXPORT_SYMBOL(mempool_kmalloc);
@@ -610,7 +604,7 @@ EXPORT_SYMBOL(mempool_kvfree);
void *mempool_alloc_pages(gfp_t gfp_mask, void *pool_data)
{
int order = (int)(long)pool_data;
- return alloc_pages(gfp_mask, order);
+ return alloc_pages_noprof(gfp_mask, order);
}
EXPORT_SYMBOL(mempool_alloc_pages);
diff --git a/mm/memremap.c b/mm/memremap.c
index 9e9fb1972fff0a..40d4547ce5144e 100644
--- a/mm/memremap.c
+++ b/mm/memremap.c
@@ -456,21 +456,23 @@ struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
}
EXPORT_SYMBOL_GPL(get_dev_pagemap);
-void free_zone_device_page(struct page *page)
+void free_zone_device_folio(struct folio *folio)
{
- if (WARN_ON_ONCE(!page->pgmap->ops || !page->pgmap->ops->page_free))
+ if (WARN_ON_ONCE(!folio->page.pgmap->ops ||
+ !folio->page.pgmap->ops->page_free))
return;
- mem_cgroup_uncharge(page_folio(page));
+ mem_cgroup_uncharge(folio);
/*
* Note: we don't expect anonymous compound pages yet. Once supported
* and we could PTE-map them similar to THP, we'd have to clear
* PG_anon_exclusive on all tail pages.
*/
- VM_BUG_ON_PAGE(PageAnon(page) && PageCompound(page), page);
- if (PageAnon(page))
- __ClearPageAnonExclusive(page);
+ if (folio_test_anon(folio)) {
+ VM_BUG_ON_FOLIO(folio_test_large(folio), folio);
+ __ClearPageAnonExclusive(folio_page(folio, 0));
+ }
/*
* When a device managed page is freed, the folio->mapping field
@@ -481,20 +483,20 @@ void free_zone_device_page(struct page *page)
*
* For other types of ZONE_DEVICE pages, migration is either
* handled differently or not done at all, so there is no need
- * to clear page->mapping.
+ * to clear folio->mapping.
*/
- page->mapping = NULL;
- page->pgmap->ops->page_free(page);
+ folio->mapping = NULL;
+ folio->page.pgmap->ops->page_free(folio_page(folio, 0));
- if (page->pgmap->type != MEMORY_DEVICE_PRIVATE &&
- page->pgmap->type != MEMORY_DEVICE_COHERENT)
+ if (folio->page.pgmap->type != MEMORY_DEVICE_PRIVATE &&
+ folio->page.pgmap->type != MEMORY_DEVICE_COHERENT)
/*
- * Reset the page count to 1 to prepare for handing out the page
+ * Reset the refcount to 1 to prepare for handing out the page
* again.
*/
- set_page_count(page, 1);
+ folio_set_count(folio, 1);
else
- put_dev_pagemap(page->pgmap);
+ put_dev_pagemap(folio->page.pgmap);
}
void zone_device_page_init(struct page *page)
@@ -510,9 +512,9 @@ void zone_device_page_init(struct page *page)
EXPORT_SYMBOL_GPL(zone_device_page_init);
#ifdef CONFIG_FS_DAX
-bool __put_devmap_managed_page_refs(struct page *page, int refs)
+bool __put_devmap_managed_folio_refs(struct folio *folio, int refs)
{
- if (page->pgmap->type != MEMORY_DEVICE_FS_DAX)
+ if (folio->page.pgmap->type != MEMORY_DEVICE_FS_DAX)
return false;
/*
@@ -520,9 +522,9 @@ bool __put_devmap_managed_page_refs(struct page *page, int refs)
* refcount is 1, then the page is free and the refcount is
* stable because nobody holds a reference on the page.
*/
- if (page_ref_sub_return(page, refs) == 1)
- wake_up_var(&page->_refcount);
+ if (folio_ref_sub_return(folio, refs) == 1)
+ wake_up_var(&folio->_refcount);
return true;
}
-EXPORT_SYMBOL(__put_devmap_managed_page_refs);
+EXPORT_SYMBOL(__put_devmap_managed_folio_refs);
#endif /* CONFIG_FS_DAX */
diff --git a/mm/migrate.c b/mm/migrate.c
index 73a052a382f13a..dd04f578c19c3e 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -113,7 +113,7 @@ bool isolate_movable_page(struct page *page, isolate_mode_t mode)
if (!mops->isolate_page(&folio->page, mode))
goto out_no_isolated;
- /* Driver shouldn't use PG_isolated bit of page->flags */
+ /* Driver shouldn't use the isolated flag */
WARN_ON_ONCE(folio_test_isolated(folio));
folio_set_isolated(folio);
folio_unlock(folio);
@@ -616,7 +616,7 @@ void folio_migrate_flags(struct folio *newfolio, struct folio *folio)
folio_migrate_ksm(newfolio, folio);
/*
* Please do not reorder this without considering how mm/ksm.c's
- * get_ksm_page() depends upon ksm_migrate_page() and PageSwapCache().
+ * ksm_get_folio() depends upon ksm_migrate_page() and PageSwapCache().
*/
if (folio_test_swapcache(folio))
folio_clear_swapcache(folio);
@@ -1425,7 +1425,7 @@ static int unmap_and_move_huge_page(new_folio_t get_new_folio,
* semaphore in write mode here and set TTU_RMAP_LOCKED
* to let lower levels know we have taken the lock.
*/
- mapping = hugetlb_page_mapping_lock_write(&src->page);
+ mapping = hugetlb_folio_mapping_lock_write(src);
if (unlikely(!mapping))
goto unlock_put_anon;
@@ -1653,6 +1653,29 @@ static int migrate_pages_batch(struct list_head *from,
cond_resched();
/*
+ * The rare folio on the deferred split list should
+ * be split now. It should not count as a failure.
+ * Only check it without removing it from the list.
+ * Since the folio can be on deferred_split_scan()
+ * local list and removing it can cause the local list
+ * corruption. Folio split process below can handle it
+ * with the help of folio_ref_freeze().
+ *
+ * nr_pages > 2 is needed to avoid checking order-1
+ * page cache folios. They exist, in contrast to
+ * non-existent order-1 anonymous folios, and do not
+ * use _deferred_list.
+ */
+ if (nr_pages > 2 &&
+ !list_empty(&folio->_deferred_list)) {
+ if (try_split_folio(folio, split_folios) == 0) {
+ stats->nr_thp_split += is_thp;
+ stats->nr_split++;
+ continue;
+ }
+ }
+
+ /*
* Large folio migration might be unsupported or
* the allocation might be failed so we should retry
* on the same folio with the large folio split
@@ -2022,7 +2045,8 @@ struct folio *alloc_migration_target(struct folio *src, unsigned long private)
gfp_mask = htlb_modify_alloc_mask(h, gfp_mask);
return alloc_hugetlb_folio_nodemask(h, nid,
- mtc->nmask, gfp_mask);
+ mtc->nmask, gfp_mask,
+ htlb_allow_alloc_fallback(mtc->reason));
}
if (folio_test_large(src)) {
@@ -2060,6 +2084,7 @@ static int do_move_pages_to_node(struct list_head *pagelist, int node)
struct migration_target_control mtc = {
.nid = node,
.gfp_mask = GFP_HIGHUSER_MOVABLE | __GFP_THISNODE,
+ .reason = MR_SYSCALL,
};
err = migrate_pages(pagelist, alloc_migration_target, NULL,
@@ -2115,7 +2140,7 @@ static int add_page_for_migration(struct mm_struct *mm, const void __user *p,
goto out_putfolio;
err = -EACCES;
- if (page_mapcount(page) > 1 && !migrate_all)
+ if (folio_likely_mapped_shared(folio) && !migrate_all)
goto out_putfolio;
err = -EBUSY;
@@ -2568,11 +2593,11 @@ int migrate_misplaced_folio(struct folio *folio, struct vm_area_struct *vma,
/*
* Don't migrate file folios that are mapped in multiple processes
* with execute permissions as they are probably shared libraries.
- * To check if the folio is shared, ideally we want to make sure
- * every page is mapped to the same process. Doing that is very
- * expensive, so check the estimated mapcount of the folio instead.
+ *
+ * See folio_likely_mapped_shared() on possible imprecision when we
+ * cannot easily detect if a folio is shared.
*/
- if (folio_estimated_sharers(folio) != 1 && folio_is_file_lru(folio) &&
+ if (folio_likely_mapped_shared(folio) && folio_is_file_lru(folio) &&
(vma->vm_flags & VM_EXEC))
goto out;
diff --git a/mm/migrate_device.c b/mm/migrate_device.c
index b6c27c76e1a0b2..7e0712493d1f48 100644
--- a/mm/migrate_device.c
+++ b/mm/migrate_device.c
@@ -71,7 +71,7 @@ again:
return migrate_vma_collect_hole(start, end, -1, walk);
if (pmd_trans_huge(*pmdp)) {
- struct page *page;
+ struct folio *folio;
ptl = pmd_lock(mm, pmdp);
if (unlikely(!pmd_trans_huge(*pmdp))) {
@@ -79,21 +79,21 @@ again:
goto again;
}
- page = pmd_page(*pmdp);
- if (is_huge_zero_page(page)) {
+ folio = pmd_folio(*pmdp);
+ if (is_huge_zero_folio(folio)) {
spin_unlock(ptl);
split_huge_pmd(vma, pmdp, addr);
} else {
int ret;
- get_page(page);
+ folio_get(folio);
spin_unlock(ptl);
- if (unlikely(!trylock_page(page)))
+ if (unlikely(!folio_trylock(folio)))
return migrate_vma_collect_skip(start, end,
walk);
- ret = split_huge_page(page);
- unlock_page(page);
- put_page(page);
+ ret = split_folio(folio);
+ folio_unlock(folio);
+ folio_put(folio);
if (ret)
return migrate_vma_collect_skip(start, end,
walk);
@@ -324,6 +324,8 @@ static void migrate_vma_collect(struct migrate_vma *migrate)
*/
static bool migrate_vma_check_page(struct page *page, struct page *fault_page)
{
+ struct folio *folio = page_folio(page);
+
/*
* One extra ref because caller holds an extra reference, either from
* isolate_lru_page() for a regular page, or migrate_vma_collect() for
@@ -336,18 +338,18 @@ static bool migrate_vma_check_page(struct page *page, struct page *fault_page)
* check them than regular pages, because they can be mapped with a pmd
* or with a pte (split pte mapping).
*/
- if (PageCompound(page))
+ if (folio_test_large(folio))
return false;
/* Page from ZONE_DEVICE have one extra reference */
- if (is_zone_device_page(page))
+ if (folio_is_zone_device(folio))
extra++;
/* For file back page */
- if (page_mapping(page))
- extra += 1 + page_has_private(page);
+ if (folio_mapping(folio))
+ extra += 1 + folio_has_private(folio);
- if ((page_count(page) - extra) > page_mapcount(page))
+ if ((folio_ref_count(folio) - extra) > folio_mapcount(folio))
return false;
return true;
@@ -694,6 +696,7 @@ static void __migrate_device_pages(unsigned long *src_pfns,
struct page *newpage = migrate_pfn_to_page(dst_pfns[i]);
struct page *page = migrate_pfn_to_page(src_pfns[i]);
struct address_space *mapping;
+ struct folio *folio;
int r;
if (!newpage) {
@@ -728,15 +731,12 @@ static void __migrate_device_pages(unsigned long *src_pfns,
continue;
}
- mapping = page_mapping(page);
+ folio = page_folio(page);
+ mapping = folio_mapping(folio);
if (is_device_private_page(newpage) ||
is_device_coherent_page(newpage)) {
if (mapping) {
- struct folio *folio;
-
- folio = page_folio(page);
-
/*
* For now only support anonymous memory migrating to
* device private or coherent memory.
@@ -759,11 +759,10 @@ static void __migrate_device_pages(unsigned long *src_pfns,
if (migrate && migrate->fault_page == page)
r = migrate_folio_extra(mapping, page_folio(newpage),
- page_folio(page),
- MIGRATE_SYNC_NO_COPY, 1);
+ folio, MIGRATE_SYNC_NO_COPY, 1);
else
r = migrate_folio(mapping, page_folio(newpage),
- page_folio(page), MIGRATE_SYNC_NO_COPY);
+ folio, MIGRATE_SYNC_NO_COPY);
if (r != MIGRATEPAGE_SUCCESS)
src_pfns[i] &= ~MIGRATE_PFN_MIGRATE;
}
diff --git a/mm/mlock.c b/mm/mlock.c
index 1ed2f2ab37cd18..30b51cdea89dec 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -378,7 +378,7 @@ static int mlock_pte_range(pmd_t *pmd, unsigned long addr,
goto out;
if (is_huge_zero_pmd(*pmd))
goto out;
- folio = page_folio(pmd_page(*pmd));
+ folio = pmd_folio(*pmd);
if (vma->vm_flags & VM_LOCKED)
mlock_folio(folio);
else
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 549e76af8f82a8..2c8f3af4430f5c 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -24,6 +24,7 @@
#include <linux/page_ext.h>
#include <linux/pti.h>
#include <linux/pgtable.h>
+#include <linux/stackdepot.h>
#include <linux/swap.h>
#include <linux/cma.h>
#include <linux/crash_dump.h>
@@ -226,7 +227,6 @@ static unsigned long required_movablecore_percent __initdata;
static unsigned long nr_kernel_pages __initdata;
static unsigned long nr_all_pages __initdata;
-static unsigned long dma_reserve __initdata;
static bool deferred_struct_pages __meminitdata;
@@ -1144,7 +1144,7 @@ static void __init adjust_zone_range_for_zone_movable(int nid,
* Return the number of holes in a range on a node. If nid is MAX_NUMNODES,
* then all holes in the requested range will be accounted for.
*/
-unsigned long __init __absent_pages_in_range(int nid,
+static unsigned long __init __absent_pages_in_range(int nid,
unsigned long range_start_pfn,
unsigned long range_end_pfn)
{
@@ -1265,6 +1265,30 @@ static void __init reset_memoryless_node_totalpages(struct pglist_data *pgdat)
pr_debug("On node %d totalpages: 0\n", pgdat->node_id);
}
+static void __init calc_nr_kernel_pages(void)
+{
+ unsigned long start_pfn, end_pfn;
+ phys_addr_t start_addr, end_addr;
+ u64 u;
+#ifdef CONFIG_HIGHMEM
+ unsigned long high_zone_low = arch_zone_lowest_possible_pfn[ZONE_HIGHMEM];
+#endif
+
+ for_each_free_mem_range(u, NUMA_NO_NODE, MEMBLOCK_NONE, &start_addr, &end_addr, NULL) {
+ start_pfn = PFN_UP(start_addr);
+ end_pfn = PFN_DOWN(end_addr);
+
+ if (start_pfn < end_pfn) {
+ nr_all_pages += end_pfn - start_pfn;
+#ifdef CONFIG_HIGHMEM
+ start_pfn = clamp(start_pfn, 0, high_zone_low);
+ end_pfn = clamp(end_pfn, 0, high_zone_low);
+#endif
+ nr_kernel_pages += end_pfn - start_pfn;
+ }
+ }
+}
+
static void __init calculate_node_totalpages(struct pglist_data *pgdat,
unsigned long node_start_pfn,
unsigned long node_end_pfn)
@@ -1308,26 +1332,6 @@ static void __init calculate_node_totalpages(struct pglist_data *pgdat,
pr_debug("On node %d totalpages: %lu\n", pgdat->node_id, realtotalpages);
}
-static unsigned long __init calc_memmap_size(unsigned long spanned_pages,
- unsigned long present_pages)
-{
- unsigned long pages = spanned_pages;
-
- /*
- * Provide a more accurate estimation if there are holes within
- * the zone and SPARSEMEM is in use. If there are holes within the
- * zone, each populated memory region may cost us one or two extra
- * memmap pages due to alignment because memmap pages for each
- * populated regions may not be naturally aligned on page boundary.
- * So the (present_pages >> 4) heuristic is a tradeoff for that.
- */
- if (spanned_pages > present_pages + (present_pages >> 4) &&
- IS_ENABLED(CONFIG_SPARSEMEM))
- pages = present_pages;
-
- return PAGE_ALIGN(pages * sizeof(struct page)) >> PAGE_SHIFT;
-}
-
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
static void pgdat_init_split_queue(struct pglist_data *pgdat)
{
@@ -1542,15 +1546,6 @@ void __ref free_area_init_core_hotplug(struct pglist_data *pgdat)
}
#endif
-/*
- * Set up the zone data structures:
- * - mark all pages reserved
- * - mark all memory queues empty
- * - clear the memory bitmaps
- *
- * NOTE: pgdat should get zeroed by caller.
- * NOTE: this function is only called during early init.
- */
static void __init free_area_init_core(struct pglist_data *pgdat)
{
enum zone_type j;
@@ -1561,47 +1556,13 @@ static void __init free_area_init_core(struct pglist_data *pgdat)
for (j = 0; j < MAX_NR_ZONES; j++) {
struct zone *zone = pgdat->node_zones + j;
- unsigned long size, freesize, memmap_pages;
-
- size = zone->spanned_pages;
- freesize = zone->present_pages;
-
- /*
- * Adjust freesize so that it accounts for how much memory
- * is used by this zone for memmap. This affects the watermark
- * and per-cpu initialisations
- */
- memmap_pages = calc_memmap_size(size, freesize);
- if (!is_highmem_idx(j)) {
- if (freesize >= memmap_pages) {
- freesize -= memmap_pages;
- if (memmap_pages)
- pr_debug(" %s zone: %lu pages used for memmap\n",
- zone_names[j], memmap_pages);
- } else
- pr_warn(" %s zone: %lu memmap pages exceeds freesize %lu\n",
- zone_names[j], memmap_pages, freesize);
- }
-
- /* Account for reserved pages */
- if (j == 0 && freesize > dma_reserve) {
- freesize -= dma_reserve;
- pr_debug(" %s zone: %lu pages reserved\n", zone_names[0], dma_reserve);
- }
-
- if (!is_highmem_idx(j))
- nr_kernel_pages += freesize;
- /* Charge for highmem memmap if there are enough kernel pages */
- else if (nr_kernel_pages > memmap_pages * 2)
- nr_kernel_pages -= memmap_pages;
- nr_all_pages += freesize;
+ unsigned long size = zone->spanned_pages;
/*
- * Set an approximate value for lowmem here, it will be adjusted
- * when the bootmem allocator frees pages into the buddy system.
- * And all highmem pages will be managed by the buddy system.
+ * Initialize zone->managed_pages as 0 , it will be reset
+ * when memblock allocator frees pages into buddy system.
*/
- zone_init_internals(zone, j, nid, freesize);
+ zone_init_internals(zone, j, nid, zone->present_pages);
if (!size)
continue;
@@ -1874,30 +1835,26 @@ void __init free_area_init(unsigned long *max_zone_pfn)
panic("Cannot allocate %zuB for node %d.\n",
sizeof(*pgdat), nid);
arch_refresh_nodedata(nid, pgdat);
- free_area_init_node(nid);
-
- /*
- * We do not want to confuse userspace by sysfs
- * files/directories for node without any memory
- * attached to it, so this node is not marked as
- * N_MEMORY and not marked online so that no sysfs
- * hierarchy will be created via register_one_node for
- * it. The pgdat will get fully initialized by
- * hotadd_init_pgdat() when memory is hotplugged into
- * this node.
- */
- continue;
}
pgdat = NODE_DATA(nid);
free_area_init_node(nid);
- /* Any memory on that node */
- if (pgdat->node_present_pages)
+ /*
+ * No sysfs hierarcy will be created via register_one_node()
+ *for memory-less node because here it's not marked as N_MEMORY
+ *and won't be set online later. The benefit is userspace
+ *program won't be confused by sysfs files/directories of
+ *memory-less node. The pgdat will get fully initialized by
+ *hotadd_init_pgdat() when memory is hotplugged into this node.
+ */
+ if (pgdat->node_present_pages) {
node_set_state(nid, N_MEMORY);
- check_for_memory(pgdat);
+ check_for_memory(pgdat);
+ }
}
+ calc_nr_kernel_pages();
memmap_init();
/* disable hash distribution for systems with a single node */
@@ -2057,7 +2014,7 @@ static unsigned long __init deferred_init_pages(struct zone *zone,
__init_single_page(page, pfn, zid, nid);
nr_pages++;
}
- return (nr_pages);
+ return nr_pages;
}
/*
@@ -2259,10 +2216,6 @@ zone_empty:
* Return true when zone was grown, otherwise return false. We return true even
* when we grow less than requested, to let the caller decide if there are
* enough pages to satisfy the allocation.
- *
- * Note: We use noinline because this function is needed only during boot, and
- * it is called from a __ref function _deferred_grow_zone. This way we are
- * making sure that it is not inlined into permanent text section.
*/
bool __init deferred_grow_zone(struct zone *zone, unsigned int order)
{
@@ -2412,17 +2365,6 @@ void __init page_alloc_init_late(void)
page_alloc_sysctl_init();
}
-#ifndef __HAVE_ARCH_RESERVED_KERNEL_PAGES
-/*
- * Returns the number of pages that arch has reserved but
- * is not known to alloc_large_system_hash().
- */
-static unsigned long __init arch_reserved_kernel_pages(void)
-{
- return 0;
-}
-#endif
-
/*
* Adaptive scale is meant to reduce sizes of hash tables on large memory
* machines. As memory size is increased the scale is also increased but at
@@ -2465,7 +2407,6 @@ void *__init alloc_large_system_hash(const char *tablename,
if (!numentries) {
/* round applicable memory size up to nearest megabyte */
numentries = nr_kernel_pages;
- numentries -= arch_reserved_kernel_pages();
/* It isn't necessary when PAGE_SIZE >= 1MB */
if (PAGE_SIZE < SZ_1M)
@@ -2547,26 +2488,9 @@ void *__init alloc_large_system_hash(const char *tablename,
return table;
}
-/**
- * set_dma_reserve - set the specified number of pages reserved in the first zone
- * @new_dma_reserve: The number of pages to mark reserved
- *
- * The per-cpu batchsize and zone watermarks are determined by managed_pages.
- * In the DMA zone, a significant percentage may be consumed by kernel image
- * and other unfreeable allocations which can skew the watermarks badly. This
- * function may optionally be used to account for unfreeable pages in the
- * first zone (e.g., ZONE_DMA). The effect will be lower watermarks and
- * smaller per-cpu batchsize.
- */
-void __init set_dma_reserve(unsigned long new_dma_reserve)
-{
- dma_reserve = new_dma_reserve;
-}
-
void __init memblock_free_pages(struct page *page, unsigned long pfn,
unsigned int order)
{
-
if (IS_ENABLED(CONFIG_DEFERRED_STRUCT_PAGE_INIT)) {
int nid = early_pfn_to_nid(pfn);
@@ -2578,6 +2502,17 @@ void __init memblock_free_pages(struct page *page, unsigned long pfn,
/* KMSAN will take care of these pages. */
return;
}
+
+ /* pages were reserved and not allocated */
+ if (mem_alloc_profiling_enabled()) {
+ union codetag_ref *ref = get_page_tag_ref(page);
+
+ if (ref) {
+ set_codetag_empty(ref);
+ put_page_tag_ref(ref);
+ }
+ }
+
__free_pages_core(page, order);
}
@@ -2587,6 +2522,9 @@ EXPORT_SYMBOL(init_on_alloc);
DEFINE_STATIC_KEY_MAYBE(CONFIG_INIT_ON_FREE_DEFAULT_ON, init_on_free);
EXPORT_SYMBOL(init_on_free);
+DEFINE_STATIC_KEY_MAYBE(CONFIG_INIT_MLOCKED_ON_FREE_DEFAULT_ON, init_mlocked_on_free);
+EXPORT_SYMBOL(init_mlocked_on_free);
+
static bool _init_on_alloc_enabled_early __read_mostly
= IS_ENABLED(CONFIG_INIT_ON_ALLOC_DEFAULT_ON);
static int __init early_init_on_alloc(char *buf)
@@ -2604,6 +2542,14 @@ static int __init early_init_on_free(char *buf)
}
early_param("init_on_free", early_init_on_free);
+static bool _init_mlocked_on_free_enabled_early __read_mostly
+ = IS_ENABLED(CONFIG_INIT_MLOCKED_ON_FREE_DEFAULT_ON);
+static int __init early_init_mlocked_on_free(char *buf)
+{
+ return kstrtobool(buf, &_init_mlocked_on_free_enabled_early);
+}
+early_param("init_mlocked_on_free", early_init_mlocked_on_free);
+
DEFINE_STATIC_KEY_MAYBE(CONFIG_DEBUG_VM, check_pages_enabled);
/*
@@ -2631,12 +2577,21 @@ static void __init mem_debugging_and_hardening_init(void)
}
#endif
- if ((_init_on_alloc_enabled_early || _init_on_free_enabled_early) &&
+ if ((_init_on_alloc_enabled_early || _init_on_free_enabled_early ||
+ _init_mlocked_on_free_enabled_early) &&
page_poisoning_requested) {
pr_info("mem auto-init: CONFIG_PAGE_POISONING is on, "
- "will take precedence over init_on_alloc and init_on_free\n");
+ "will take precedence over init_on_alloc, init_on_free "
+ "and init_mlocked_on_free\n");
_init_on_alloc_enabled_early = false;
_init_on_free_enabled_early = false;
+ _init_mlocked_on_free_enabled_early = false;
+ }
+
+ if (_init_mlocked_on_free_enabled_early && _init_on_free_enabled_early) {
+ pr_info("mem auto-init: init_on_free is on, "
+ "will take precedence over init_mlocked_on_free\n");
+ _init_mlocked_on_free_enabled_early = false;
}
if (_init_on_alloc_enabled_early) {
@@ -2653,9 +2608,17 @@ static void __init mem_debugging_and_hardening_init(void)
static_branch_disable(&init_on_free);
}
- if (IS_ENABLED(CONFIG_KMSAN) &&
- (_init_on_alloc_enabled_early || _init_on_free_enabled_early))
- pr_info("mem auto-init: please make sure init_on_alloc and init_on_free are disabled when running KMSAN\n");
+ if (_init_mlocked_on_free_enabled_early) {
+ want_check_pages = true;
+ static_branch_enable(&init_mlocked_on_free);
+ } else {
+ static_branch_disable(&init_mlocked_on_free);
+ }
+
+ if (IS_ENABLED(CONFIG_KMSAN) && (_init_on_alloc_enabled_early ||
+ _init_on_free_enabled_early || _init_mlocked_on_free_enabled_early))
+ pr_info("mem auto-init: please make sure init_on_alloc, init_on_free and "
+ "init_mlocked_on_free are disabled when running KMSAN\n");
#ifdef CONFIG_DEBUG_PAGEALLOC
if (debug_pagealloc_enabled()) {
@@ -2694,9 +2657,10 @@ static void __init report_meminit(void)
else
stack = "off";
- pr_info("mem auto-init: stack:%s, heap alloc:%s, heap free:%s\n",
+ pr_info("mem auto-init: stack:%s, heap alloc:%s, heap free:%s, mlocked free:%s\n",
stack, want_init_on_alloc(GFP_KERNEL) ? "on" : "off",
- want_init_on_free() ? "on" : "off");
+ want_init_on_free() ? "on" : "off",
+ want_init_mlocked_on_free() ? "on" : "off");
if (want_init_on_free())
pr_info("mem auto-init: clearing system memory may take some time...\n");
}
diff --git a/mm/mmap.c b/mm/mmap.c
index 6dbda99a47da1c..057270dbe3aa12 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1114,21 +1114,21 @@ static struct anon_vma *reusable_anon_vma(struct vm_area_struct *old, struct vm_
*/
struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma)
{
- MA_STATE(mas, &vma->vm_mm->mm_mt, vma->vm_end, vma->vm_end);
struct anon_vma *anon_vma = NULL;
struct vm_area_struct *prev, *next;
+ VMA_ITERATOR(vmi, vma->vm_mm, vma->vm_end);
/* Try next first. */
- next = mas_walk(&mas);
+ next = vma_iter_load(&vmi);
if (next) {
anon_vma = reusable_anon_vma(next, vma, next);
if (anon_vma)
return anon_vma;
}
- prev = mas_prev(&mas, 0);
+ prev = vma_prev(&vmi);
VM_BUG_ON_VMA(prev != vma, vma);
- prev = mas_prev(&mas, 0);
+ prev = vma_prev(&vmi);
/* Try prev next. */
if (prev)
anon_vma = reusable_anon_vma(prev, prev, vma);
@@ -1255,17 +1255,15 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
if (mm->map_count > sysctl_max_map_count)
return -ENOMEM;
- /* Obtain the address to map to. we verify (or select) it and ensure
- * that it represents a valid section of the address space.
+ /*
+ * addr is returned from get_unmapped_area,
+ * There are two cases:
+ * 1> MAP_FIXED == false
+ * unallocated memory, no need to check sealing.
+ * 1> MAP_FIXED == true
+ * sealing is checked inside mmap_region when
+ * do_vmi_munmap is called.
*/
- addr = get_unmapped_area(file, addr, len, pgoff, flags);
- if (IS_ERR_VALUE(addr))
- return addr;
-
- if (flags & MAP_FIXED_NOREPLACE) {
- if (find_vma_intersection(mm, addr, addr + len))
- return -EEXIST;
- }
if (prot == PROT_EXEC) {
pkey = execute_only_pkey(mm);
@@ -1280,6 +1278,18 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
vm_flags |= calc_vm_prot_bits(prot, pkey) | calc_vm_flag_bits(flags) |
mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
+ /* Obtain the address to map to. we verify (or select) it and ensure
+ * that it represents a valid section of the address space.
+ */
+ addr = __get_unmapped_area(file, addr, len, pgoff, flags, vm_flags);
+ if (IS_ERR_VALUE(addr))
+ return addr;
+
+ if (flags & MAP_FIXED_NOREPLACE) {
+ if (find_vma_intersection(mm, addr, addr + len))
+ return -EEXIST;
+ }
+
if (flags & MAP_LOCKED)
if (!can_do_mlock())
return -EPERM;
@@ -1514,32 +1524,32 @@ bool vma_needs_dirty_tracking(struct vm_area_struct *vma)
* to the private version (using protection_map[] without the
* VM_SHARED bit).
*/
-int vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot)
+bool vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot)
{
/* If it was private or non-writable, the write bit is already clear */
if (!vma_is_shared_writable(vma))
- return 0;
+ return false;
/* The backer wishes to know when pages are first written to? */
if (vm_ops_needs_writenotify(vma->vm_ops))
- return 1;
+ return true;
/* The open routine did something to the protections that pgprot_modify
* won't preserve? */
if (pgprot_val(vm_page_prot) !=
pgprot_val(vm_pgprot_modify(vm_page_prot, vma->vm_flags)))
- return 0;
+ return false;
/*
* Do we need to track softdirty? hugetlb does not support softdirty
* tracking yet.
*/
if (vma_soft_dirty_enabled(vma) && !is_vm_hugetlb_page(vma))
- return 1;
+ return true;
/* Do we need write faults for uffd-wp tracking? */
if (userfaultfd_wp(vma))
- return 1;
+ return true;
/* Can the mapping track the dirty pages? */
return vma_fs_can_writeback(vma);
@@ -1549,14 +1559,14 @@ int vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot)
* We account for memory if it's a private writeable mapping,
* not hugepages and VM_NORESERVE wasn't set.
*/
-static inline int accountable_mapping(struct file *file, vm_flags_t vm_flags)
+static inline bool accountable_mapping(struct file *file, vm_flags_t vm_flags)
{
/*
* hugetlb has its own accounting separate from the core VM
* VM_HUGETLB may not be set yet so we cannot check for that flag.
*/
if (file && is_file_hugepages(file))
- return 0;
+ return false;
return (vm_flags & (VM_NORESERVE | VM_SHARED | VM_WRITE)) == VM_WRITE;
}
@@ -1576,11 +1586,10 @@ static unsigned long unmapped_area(struct vm_unmapped_area_info *info)
unsigned long length, gap;
unsigned long low_limit, high_limit;
struct vm_area_struct *tmp;
-
- MA_STATE(mas, &current->mm->mm_mt, 0, 0);
+ VMA_ITERATOR(vmi, current->mm, 0);
/* Adjust search length to account for worst case alignment overhead */
- length = info->length + info->align_mask;
+ length = info->length + info->align_mask + info->start_gap;
if (length < info->length)
return -ENOMEM;
@@ -1589,23 +1598,29 @@ static unsigned long unmapped_area(struct vm_unmapped_area_info *info)
low_limit = mmap_min_addr;
high_limit = info->high_limit;
retry:
- if (mas_empty_area(&mas, low_limit, high_limit - 1, length))
+ if (vma_iter_area_lowest(&vmi, low_limit, high_limit, length))
return -ENOMEM;
- gap = mas.index;
+ /*
+ * Adjust for the gap first so it doesn't interfere with the
+ * later alignment. The first step is the minimum needed to
+ * fulill the start gap, the next steps is the minimum to align
+ * that. It is the minimum needed to fulill both.
+ */
+ gap = vma_iter_addr(&vmi) + info->start_gap;
gap += (info->align_offset - gap) & info->align_mask;
- tmp = mas_next(&mas, ULONG_MAX);
+ tmp = vma_next(&vmi);
if (tmp && (tmp->vm_flags & VM_STARTGAP_FLAGS)) { /* Avoid prev check if possible */
if (vm_start_gap(tmp) < gap + length - 1) {
low_limit = tmp->vm_end;
- mas_reset(&mas);
+ vma_iter_reset(&vmi);
goto retry;
}
} else {
- tmp = mas_prev(&mas, 0);
+ tmp = vma_prev(&vmi);
if (tmp && vm_end_gap(tmp) > gap) {
low_limit = vm_end_gap(tmp);
- mas_reset(&mas);
+ vma_iter_reset(&vmi);
goto retry;
}
}
@@ -1628,10 +1643,10 @@ static unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
unsigned long length, gap, gap_end;
unsigned long low_limit, high_limit;
struct vm_area_struct *tmp;
+ VMA_ITERATOR(vmi, current->mm, 0);
- MA_STATE(mas, &current->mm->mm_mt, 0, 0);
/* Adjust search length to account for worst case alignment overhead */
- length = info->length + info->align_mask;
+ length = info->length + info->align_mask + info->start_gap;
if (length < info->length)
return -ENOMEM;
@@ -1640,24 +1655,24 @@ static unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
low_limit = mmap_min_addr;
high_limit = info->high_limit;
retry:
- if (mas_empty_area_rev(&mas, low_limit, high_limit - 1, length))
+ if (vma_iter_area_highest(&vmi, low_limit, high_limit, length))
return -ENOMEM;
- gap = mas.last + 1 - info->length;
+ gap = vma_iter_end(&vmi) - info->length;
gap -= (gap - info->align_offset) & info->align_mask;
- gap_end = mas.last;
- tmp = mas_next(&mas, ULONG_MAX);
+ gap_end = vma_iter_end(&vmi);
+ tmp = vma_next(&vmi);
if (tmp && (tmp->vm_flags & VM_STARTGAP_FLAGS)) { /* Avoid prev check if possible */
- if (vm_start_gap(tmp) <= gap_end) {
+ if (vm_start_gap(tmp) < gap_end) {
high_limit = vm_start_gap(tmp);
- mas_reset(&mas);
+ vma_iter_reset(&vmi);
goto retry;
}
} else {
- tmp = mas_prev(&mas, 0);
+ tmp = vma_prev(&vmi);
if (tmp && vm_end_gap(tmp) > gap) {
high_limit = tmp->vm_start;
- mas_reset(&mas);
+ vma_iter_reset(&vmi);
goto retry;
}
}
@@ -1705,7 +1720,7 @@ generic_get_unmapped_area(struct file *filp, unsigned long addr,
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma, *prev;
- struct vm_unmapped_area_info info;
+ struct vm_unmapped_area_info info = {};
const unsigned long mmap_end = arch_get_mmap_end(addr, len, flags);
if (len > mmap_end - mmap_min_addr)
@@ -1723,12 +1738,9 @@ generic_get_unmapped_area(struct file *filp, unsigned long addr,
return addr;
}
- info.flags = 0;
info.length = len;
info.low_limit = mm->mmap_base;
info.high_limit = mmap_end;
- info.align_mask = 0;
- info.align_offset = 0;
return vm_unmapped_area(&info);
}
@@ -1753,7 +1765,7 @@ generic_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
{
struct vm_area_struct *vma, *prev;
struct mm_struct *mm = current->mm;
- struct vm_unmapped_area_info info;
+ struct vm_unmapped_area_info info = {};
const unsigned long mmap_end = arch_get_mmap_end(addr, len, flags);
/* requested length too big for entire address space */
@@ -1777,8 +1789,6 @@ generic_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
info.length = len;
info.low_limit = PAGE_SIZE;
info.high_limit = arch_get_mmap_base(addr, mm->mmap_base);
- info.align_mask = 0;
- info.align_offset = 0;
addr = vm_unmapped_area(&info);
/*
@@ -1808,12 +1818,41 @@ arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
}
#endif
+#ifndef HAVE_ARCH_UNMAPPED_AREA_VMFLAGS
+unsigned long
+arch_get_unmapped_area_vmflags(struct file *filp, unsigned long addr, unsigned long len,
+ unsigned long pgoff, unsigned long flags, vm_flags_t vm_flags)
+{
+ return arch_get_unmapped_area(filp, addr, len, pgoff, flags);
+}
+
unsigned long
-get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
- unsigned long pgoff, unsigned long flags)
+arch_get_unmapped_area_topdown_vmflags(struct file *filp, unsigned long addr,
+ unsigned long len, unsigned long pgoff,
+ unsigned long flags, vm_flags_t vm_flags)
+{
+ return arch_get_unmapped_area_topdown(filp, addr, len, pgoff, flags);
+}
+#endif
+
+unsigned long mm_get_unmapped_area_vmflags(struct mm_struct *mm, struct file *filp,
+ unsigned long addr, unsigned long len,
+ unsigned long pgoff, unsigned long flags,
+ vm_flags_t vm_flags)
+{
+ if (test_bit(MMF_TOPDOWN, &mm->flags))
+ return arch_get_unmapped_area_topdown_vmflags(filp, addr, len, pgoff,
+ flags, vm_flags);
+ return arch_get_unmapped_area_vmflags(filp, addr, len, pgoff, flags, vm_flags);
+}
+
+unsigned long
+__get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
+ unsigned long pgoff, unsigned long flags, vm_flags_t vm_flags)
{
unsigned long (*get_area)(struct file *, unsigned long,
- unsigned long, unsigned long, unsigned long);
+ unsigned long, unsigned long, unsigned long)
+ = NULL;
unsigned long error = arch_mmap_check(addr, len, flags);
if (error)
@@ -1823,7 +1862,6 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
if (len > TASK_SIZE)
return -ENOMEM;
- get_area = current->mm->get_unmapped_area;
if (file) {
if (file->f_op->get_unmapped_area)
get_area = file->f_op->get_unmapped_area;
@@ -1833,16 +1871,22 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
* so use shmem's get_unmapped_area in case it can be huge.
*/
get_area = shmem_get_unmapped_area;
- } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
- /* Ensures that larger anonymous mappings are THP aligned. */
- get_area = thp_get_unmapped_area;
}
/* Always treat pgoff as zero for anonymous memory. */
if (!file)
pgoff = 0;
- addr = get_area(file, addr, len, pgoff, flags);
+ if (get_area) {
+ addr = get_area(file, addr, len, pgoff, flags);
+ } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
+ /* Ensures that larger anonymous mappings are THP aligned. */
+ addr = thp_get_unmapped_area_vmflags(file, addr, len,
+ pgoff, flags, vm_flags);
+ } else {
+ addr = mm_get_unmapped_area_vmflags(current->mm, file, addr, len,
+ pgoff, flags, vm_flags);
+ }
if (IS_ERR_VALUE(addr))
return addr;
@@ -1855,7 +1899,16 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
return error ? error : addr;
}
-EXPORT_SYMBOL(get_unmapped_area);
+unsigned long
+mm_get_unmapped_area(struct mm_struct *mm, struct file *file,
+ unsigned long addr, unsigned long len,
+ unsigned long pgoff, unsigned long flags)
+{
+ if (test_bit(MMF_TOPDOWN, &mm->flags))
+ return arch_get_unmapped_area_topdown(file, addr, len, pgoff, flags);
+ return arch_get_unmapped_area(file, addr, len, pgoff, flags);
+}
+EXPORT_SYMBOL(mm_get_unmapped_area);
/**
* find_vma_intersection() - Look up the first VMA which intersects the interval
@@ -1912,12 +1965,12 @@ find_vma_prev(struct mm_struct *mm, unsigned long addr,
struct vm_area_struct **pprev)
{
struct vm_area_struct *vma;
- MA_STATE(mas, &mm->mm_mt, addr, addr);
+ VMA_ITERATOR(vmi, mm, addr);
- vma = mas_walk(&mas);
- *pprev = mas_prev(&mas, 0);
+ vma = vma_iter_load(&vmi);
+ *pprev = vma_prev(&vmi);
if (!vma)
- vma = mas_next(&mas, ULONG_MAX);
+ vma = vma_next(&vmi);
return vma;
}
@@ -1971,7 +2024,7 @@ static int expand_upwards(struct vm_area_struct *vma, unsigned long address)
struct vm_area_struct *next;
unsigned long gap_addr;
int error = 0;
- MA_STATE(mas, &mm->mm_mt, vma->vm_start, address);
+ VMA_ITERATOR(vmi, mm, vma->vm_start);
if (!(vma->vm_flags & VM_GROWSUP))
return -EFAULT;
@@ -1997,15 +2050,15 @@ static int expand_upwards(struct vm_area_struct *vma, unsigned long address)
}
if (next)
- mas_prev_range(&mas, address);
+ vma_iter_prev_range_limit(&vmi, address);
- __mas_set_range(&mas, vma->vm_start, address - 1);
- if (mas_preallocate(&mas, vma, GFP_KERNEL))
+ vma_iter_config(&vmi, vma->vm_start, address);
+ if (vma_iter_prealloc(&vmi, vma))
return -ENOMEM;
/* We must make sure the anon_vma is allocated. */
if (unlikely(anon_vma_prepare(vma))) {
- mas_destroy(&mas);
+ vma_iter_free(&vmi);
return -ENOMEM;
}
@@ -2045,7 +2098,7 @@ static int expand_upwards(struct vm_area_struct *vma, unsigned long address)
anon_vma_interval_tree_pre_update_vma(vma);
vma->vm_end = address;
/* Overwrite old entry in mtree. */
- mas_store_prealloc(&mas, vma);
+ vma_iter_store(&vmi, vma);
anon_vma_interval_tree_post_update_vma(vma);
spin_unlock(&mm->page_table_lock);
@@ -2054,7 +2107,7 @@ static int expand_upwards(struct vm_area_struct *vma, unsigned long address)
}
}
anon_vma_unlock_write(vma->anon_vma);
- mas_destroy(&mas);
+ vma_iter_free(&vmi);
validate_mm(mm);
return error;
}
@@ -2067,9 +2120,9 @@ static int expand_upwards(struct vm_area_struct *vma, unsigned long address)
int expand_downwards(struct vm_area_struct *vma, unsigned long address)
{
struct mm_struct *mm = vma->vm_mm;
- MA_STATE(mas, &mm->mm_mt, vma->vm_start, vma->vm_start);
struct vm_area_struct *prev;
int error = 0;
+ VMA_ITERATOR(vmi, mm, vma->vm_start);
if (!(vma->vm_flags & VM_GROWSDOWN))
return -EFAULT;
@@ -2079,7 +2132,7 @@ int expand_downwards(struct vm_area_struct *vma, unsigned long address)
return -EPERM;
/* Enforce stack_guard_gap */
- prev = mas_prev(&mas, 0);
+ prev = vma_prev(&vmi);
/* Check that both stack segments have the same anon_vma? */
if (prev) {
if (!(prev->vm_flags & VM_GROWSDOWN) &&
@@ -2089,15 +2142,15 @@ int expand_downwards(struct vm_area_struct *vma, unsigned long address)
}
if (prev)
- mas_next_range(&mas, vma->vm_start);
+ vma_iter_next_range_limit(&vmi, vma->vm_start);
- __mas_set_range(&mas, address, vma->vm_end - 1);
- if (mas_preallocate(&mas, vma, GFP_KERNEL))
+ vma_iter_config(&vmi, address, vma->vm_end);
+ if (vma_iter_prealloc(&vmi, vma))
return -ENOMEM;
/* We must make sure the anon_vma is allocated. */
if (unlikely(anon_vma_prepare(vma))) {
- mas_destroy(&mas);
+ vma_iter_free(&vmi);
return -ENOMEM;
}
@@ -2138,7 +2191,7 @@ int expand_downwards(struct vm_area_struct *vma, unsigned long address)
vma->vm_start = address;
vma->vm_pgoff -= grow;
/* Overwrite old entry in mtree. */
- mas_store_prealloc(&mas, vma);
+ vma_iter_store(&vmi, vma);
anon_vma_interval_tree_post_update_vma(vma);
spin_unlock(&mm->page_table_lock);
@@ -2147,7 +2200,7 @@ int expand_downwards(struct vm_area_struct *vma, unsigned long address)
}
}
anon_vma_unlock_write(vma->anon_vma);
- mas_destroy(&mas);
+ vma_iter_free(&vmi);
validate_mm(mm);
return error;
}
@@ -2682,6 +2735,14 @@ int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm,
if (end == start)
return -EINVAL;
+ /*
+ * Check if memory is sealed before arch_unmap.
+ * Prevent unmapping a sealed VMA.
+ * can_modify_mm assumes we have acquired the lock on MM.
+ */
+ if (unlikely(!can_modify_mm(mm, start, end)))
+ return -EPERM;
+
/* arch_unmap() might do unmaps itself. */
arch_unmap(mm, start, end);
@@ -2744,7 +2805,10 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
}
/* Unmap any existing mapping in the area */
- if (do_vmi_munmap(&vmi, mm, addr, len, uf, false))
+ error = do_vmi_munmap(&vmi, mm, addr, len, uf, false);
+ if (error == -EPERM)
+ return error;
+ else if (error)
return -ENOMEM;
/*
@@ -3094,6 +3158,14 @@ int do_vma_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
{
struct mm_struct *mm = vma->vm_mm;
+ /*
+ * Check if memory is sealed before arch_unmap.
+ * Prevent unmapping a sealed VMA.
+ * can_modify_mm assumes we have acquired the lock on MM.
+ */
+ if (unlikely(!can_modify_mm(mm, start, end)))
+ return -EPERM;
+
arch_unmap(mm, start, end);
return do_vmi_align_munmap(vmi, vma, mm, start, end, uf, unlock);
}
@@ -3242,7 +3314,7 @@ void exit_mmap(struct mm_struct *mm)
struct mmu_gather tlb;
struct vm_area_struct *vma;
unsigned long nr_accounted = 0;
- MA_STATE(mas, &mm->mm_mt, 0, 0);
+ VMA_ITERATOR(vmi, mm, 0);
int count = 0;
/* mm's last user has gone, and its about to be pulled down */
@@ -3251,7 +3323,7 @@ void exit_mmap(struct mm_struct *mm)
mmap_read_lock(mm);
arch_exit_mmap(mm);
- vma = mas_find(&mas, ULONG_MAX);
+ vma = vma_next(&vmi);
if (!vma || unlikely(xa_is_zero(vma))) {
/* Can happen if dup_mmap() received an OOM */
mmap_read_unlock(mm);
@@ -3264,7 +3336,7 @@ void exit_mmap(struct mm_struct *mm)
tlb_gather_mmu_fullmm(&tlb, mm);
/* update_hiwater_rss(mm) here? but nobody should be looking */
/* Use ULONG_MAX here to ensure all VMAs in the mm are unmapped */
- unmap_vmas(&tlb, &mas, vma, 0, ULONG_MAX, ULONG_MAX, false);
+ unmap_vmas(&tlb, &vmi.mas, vma, 0, ULONG_MAX, ULONG_MAX, false);
mmap_read_unlock(mm);
/*
@@ -3274,8 +3346,8 @@ void exit_mmap(struct mm_struct *mm)
set_bit(MMF_OOM_SKIP, &mm->flags);
mmap_write_lock(mm);
mt_clear_in_rcu(&mm->mm_mt);
- mas_set(&mas, vma->vm_end);
- free_pgtables(&tlb, &mas, vma, FIRST_USER_ADDRESS,
+ vma_iter_set(&vmi, vma->vm_end);
+ free_pgtables(&tlb, &vmi.mas, vma, FIRST_USER_ADDRESS,
USER_PGTABLES_CEILING, true);
tlb_finish_mmu(&tlb);
@@ -3284,14 +3356,14 @@ void exit_mmap(struct mm_struct *mm)
* enabled, without holding any MM locks besides the unreachable
* mmap_write_lock.
*/
- mas_set(&mas, vma->vm_end);
+ vma_iter_set(&vmi, vma->vm_end);
do {
if (vma->vm_flags & VM_ACCOUNT)
nr_accounted += vma_pages(vma);
remove_vma(vma, true);
count++;
cond_resched();
- vma = mas_find(&mas, ULONG_MAX);
+ vma = vma_next(&vmi);
} while (vma && likely(!xa_is_zero(vma)));
BUG_ON(count != mm->map_count);
@@ -3713,7 +3785,7 @@ int mm_take_all_locks(struct mm_struct *mm)
{
struct vm_area_struct *vma;
struct anon_vma_chain *avc;
- MA_STATE(mas, &mm->mm_mt, 0, 0);
+ VMA_ITERATOR(vmi, mm, 0);
mmap_assert_write_locked(mm);
@@ -3725,14 +3797,14 @@ int mm_take_all_locks(struct mm_struct *mm)
* being written to until mmap_write_unlock() or mmap_write_downgrade()
* is reached.
*/
- mas_for_each(&mas, vma, ULONG_MAX) {
+ for_each_vma(vmi, vma) {
if (signal_pending(current))
goto out_unlock;
vma_start_write(vma);
}
- mas_set(&mas, 0);
- mas_for_each(&mas, vma, ULONG_MAX) {
+ vma_iter_init(&vmi, mm, 0);
+ for_each_vma(vmi, vma) {
if (signal_pending(current))
goto out_unlock;
if (vma->vm_file && vma->vm_file->f_mapping &&
@@ -3740,8 +3812,8 @@ int mm_take_all_locks(struct mm_struct *mm)
vm_lock_mapping(mm, vma->vm_file->f_mapping);
}
- mas_set(&mas, 0);
- mas_for_each(&mas, vma, ULONG_MAX) {
+ vma_iter_init(&vmi, mm, 0);
+ for_each_vma(vmi, vma) {
if (signal_pending(current))
goto out_unlock;
if (vma->vm_file && vma->vm_file->f_mapping &&
@@ -3749,8 +3821,8 @@ int mm_take_all_locks(struct mm_struct *mm)
vm_lock_mapping(mm, vma->vm_file->f_mapping);
}
- mas_set(&mas, 0);
- mas_for_each(&mas, vma, ULONG_MAX) {
+ vma_iter_init(&vmi, mm, 0);
+ for_each_vma(vmi, vma) {
if (signal_pending(current))
goto out_unlock;
if (vma->anon_vma)
@@ -3809,12 +3881,12 @@ void mm_drop_all_locks(struct mm_struct *mm)
{
struct vm_area_struct *vma;
struct anon_vma_chain *avc;
- MA_STATE(mas, &mm->mm_mt, 0, 0);
+ VMA_ITERATOR(vmi, mm, 0);
mmap_assert_write_locked(mm);
BUG_ON(!mutex_is_locked(&mm_all_locks_mutex));
- mas_for_each(&mas, vma, ULONG_MAX) {
+ for_each_vma(vmi, vma) {
if (vma->anon_vma)
list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
vm_unlock_anon_vma(avc->anon_vma);
diff --git a/mm/mprotect.c b/mm/mprotect.c
index f8a4544b4601db..8c6cd88252738d 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -32,6 +32,7 @@
#include <linux/sched/sysctl.h>
#include <linux/userfaultfd_k.h>
#include <linux/memory-tiers.h>
+#include <uapi/linux/mman.h>
#include <asm/cacheflush.h>
#include <asm/mmu_context.h>
#include <asm/tlbflush.h>
@@ -129,7 +130,8 @@ static long change_pte_range(struct mmu_gather *tlb,
/* Also skip shared copy-on-write pages */
if (is_cow_mapping(vma->vm_flags) &&
- folio_ref_count(folio) != 1)
+ (folio_maybe_dma_pinned(folio) ||
+ folio_likely_mapped_shared(folio)))
continue;
/*
@@ -743,6 +745,15 @@ static int do_mprotect_pkey(unsigned long start, size_t len,
}
}
+ /*
+ * checking if memory is sealed.
+ * can_modify_mm assumes we have acquired the lock on MM.
+ */
+ if (unlikely(!can_modify_mm(current->mm, start, end))) {
+ error = -EPERM;
+ goto out;
+ }
+
prev = vma_prev(&vmi);
if (start > vma->vm_start)
prev = vma;
diff --git a/mm/mremap.c b/mm/mremap.c
index 38d98465f3d825..5f96bc5ee91827 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -205,7 +205,7 @@ static int move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
*/
if (pte_present(pte))
force_flush = true;
- pte = move_pte(pte, new_vma->vm_page_prot, old_addr, new_addr);
+ pte = move_pte(pte, old_addr, new_addr);
pte = move_soft_dirty_pte(pte);
set_pte_at(mm, new_addr, new_pte, pte);
}
@@ -902,7 +902,25 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len,
if ((mm->map_count + 2) >= sysctl_max_map_count - 3)
return -ENOMEM;
+ /*
+ * In mremap_to().
+ * Move a VMA to another location, check if src addr is sealed.
+ *
+ * Place can_modify_mm here because mremap_to()
+ * does its own checking for address range, and we only
+ * check the sealing after passing those checks.
+ *
+ * can_modify_mm assumes we have acquired the lock on MM.
+ */
+ if (unlikely(!can_modify_mm(mm, addr, addr + old_len)))
+ return -EPERM;
+
if (flags & MREMAP_FIXED) {
+ /*
+ * In mremap_to().
+ * VMA is moved to dst address, and munmap dst first.
+ * do_munmap will check if dst is sealed.
+ */
ret = do_munmap(mm, new_addr, new_len, uf_unmap_early);
if (ret)
goto out;
@@ -1062,6 +1080,19 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
}
/*
+ * Below is shrink/expand case (not mremap_to())
+ * Check if src address is sealed, if so, reject.
+ * In other words, prevent shrinking or expanding a sealed VMA.
+ *
+ * Place can_modify_mm here so we can keep the logic related to
+ * shrink/expand together.
+ */
+ if (unlikely(!can_modify_mm(mm, addr, addr + old_len))) {
+ ret = -EPERM;
+ goto out;
+ }
+
+ /*
* Always allow a shrinking remap: that just unmaps
* the unnecessary pages..
* do_vmi_munmap does all the needed commit accounting, and
diff --git a/mm/mseal.c b/mm/mseal.c
new file mode 100644
index 00000000000000..bf783bba8ed0b9
--- /dev/null
+++ b/mm/mseal.c
@@ -0,0 +1,307 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Implement mseal() syscall.
+ *
+ * Copyright (c) 2023,2024 Google, Inc.
+ *
+ * Author: Jeff Xu <jeffxu@chromium.org>
+ */
+
+#include <linux/mempolicy.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/mm_inline.h>
+#include <linux/mmu_context.h>
+#include <linux/syscalls.h>
+#include <linux/sched.h>
+#include "internal.h"
+
+static inline bool vma_is_sealed(struct vm_area_struct *vma)
+{
+ return (vma->vm_flags & VM_SEALED);
+}
+
+static inline void set_vma_sealed(struct vm_area_struct *vma)
+{
+ vm_flags_set(vma, VM_SEALED);
+}
+
+/*
+ * check if a vma is sealed for modification.
+ * return true, if modification is allowed.
+ */
+static bool can_modify_vma(struct vm_area_struct *vma)
+{
+ if (unlikely(vma_is_sealed(vma)))
+ return false;
+
+ return true;
+}
+
+static bool is_madv_discard(int behavior)
+{
+ return behavior &
+ (MADV_FREE | MADV_DONTNEED | MADV_DONTNEED_LOCKED |
+ MADV_REMOVE | MADV_DONTFORK | MADV_WIPEONFORK);
+}
+
+static bool is_ro_anon(struct vm_area_struct *vma)
+{
+ /* check anonymous mapping. */
+ if (vma->vm_file || vma->vm_flags & VM_SHARED)
+ return false;
+
+ /*
+ * check for non-writable:
+ * PROT=RO or PKRU is not writeable.
+ */
+ if (!(vma->vm_flags & VM_WRITE) ||
+ !arch_vma_access_permitted(vma, true, false, false))
+ return true;
+
+ return false;
+}
+
+/*
+ * Check if the vmas of a memory range are allowed to be modified.
+ * the memory ranger can have a gap (unallocated memory).
+ * return true, if it is allowed.
+ */
+bool can_modify_mm(struct mm_struct *mm, unsigned long start, unsigned long end)
+{
+ struct vm_area_struct *vma;
+
+ VMA_ITERATOR(vmi, mm, start);
+
+ /* going through each vma to check. */
+ for_each_vma_range(vmi, vma, end) {
+ if (unlikely(!can_modify_vma(vma)))
+ return false;
+ }
+
+ /* Allow by default. */
+ return true;
+}
+
+/*
+ * Check if the vmas of a memory range are allowed to be modified by madvise.
+ * the memory ranger can have a gap (unallocated memory).
+ * return true, if it is allowed.
+ */
+bool can_modify_mm_madv(struct mm_struct *mm, unsigned long start, unsigned long end,
+ int behavior)
+{
+ struct vm_area_struct *vma;
+
+ VMA_ITERATOR(vmi, mm, start);
+
+ if (!is_madv_discard(behavior))
+ return true;
+
+ /* going through each vma to check. */
+ for_each_vma_range(vmi, vma, end)
+ if (unlikely(is_ro_anon(vma) && !can_modify_vma(vma)))
+ return false;
+
+ /* Allow by default. */
+ return true;
+}
+
+static int mseal_fixup(struct vma_iterator *vmi, struct vm_area_struct *vma,
+ struct vm_area_struct **prev, unsigned long start,
+ unsigned long end, vm_flags_t newflags)
+{
+ int ret = 0;
+ vm_flags_t oldflags = vma->vm_flags;
+
+ if (newflags == oldflags)
+ goto out;
+
+ vma = vma_modify_flags(vmi, *prev, vma, start, end, newflags);
+ if (IS_ERR(vma)) {
+ ret = PTR_ERR(vma);
+ goto out;
+ }
+
+ set_vma_sealed(vma);
+out:
+ *prev = vma;
+ return ret;
+}
+
+/*
+ * Check for do_mseal:
+ * 1> start is part of a valid vma.
+ * 2> end is part of a valid vma.
+ * 3> No gap (unallocated address) between start and end.
+ * 4> map is sealable.
+ */
+static int check_mm_seal(unsigned long start, unsigned long end)
+{
+ struct vm_area_struct *vma;
+ unsigned long nstart = start;
+
+ VMA_ITERATOR(vmi, current->mm, start);
+
+ /* going through each vma to check. */
+ for_each_vma_range(vmi, vma, end) {
+ if (vma->vm_start > nstart)
+ /* unallocated memory found. */
+ return -ENOMEM;
+
+ if (vma->vm_end >= end)
+ return 0;
+
+ nstart = vma->vm_end;
+ }
+
+ return -ENOMEM;
+}
+
+/*
+ * Apply sealing.
+ */
+static int apply_mm_seal(unsigned long start, unsigned long end)
+{
+ unsigned long nstart;
+ struct vm_area_struct *vma, *prev;
+
+ VMA_ITERATOR(vmi, current->mm, start);
+
+ vma = vma_iter_load(&vmi);
+ /*
+ * Note: check_mm_seal should already checked ENOMEM case.
+ * so vma should not be null, same for the other ENOMEM cases.
+ */
+ prev = vma_prev(&vmi);
+ if (start > vma->vm_start)
+ prev = vma;
+
+ nstart = start;
+ for_each_vma_range(vmi, vma, end) {
+ int error;
+ unsigned long tmp;
+ vm_flags_t newflags;
+
+ newflags = vma->vm_flags | VM_SEALED;
+ tmp = vma->vm_end;
+ if (tmp > end)
+ tmp = end;
+ error = mseal_fixup(&vmi, vma, &prev, nstart, tmp, newflags);
+ if (error)
+ return error;
+ nstart = vma_iter_end(&vmi);
+ }
+
+ return 0;
+}
+
+/*
+ * mseal(2) seals the VM's meta data from
+ * selected syscalls.
+ *
+ * addr/len: VM address range.
+ *
+ * The address range by addr/len must meet:
+ * start (addr) must be in a valid VMA.
+ * end (addr + len) must be in a valid VMA.
+ * no gap (unallocated memory) between start and end.
+ * start (addr) must be page aligned.
+ *
+ * len: len will be page aligned implicitly.
+ *
+ * Below VMA operations are blocked after sealing.
+ * 1> Unmapping, moving to another location, and shrinking
+ * the size, via munmap() and mremap(), can leave an empty
+ * space, therefore can be replaced with a VMA with a new
+ * set of attributes.
+ * 2> Moving or expanding a different vma into the current location,
+ * via mremap().
+ * 3> Modifying a VMA via mmap(MAP_FIXED).
+ * 4> Size expansion, via mremap(), does not appear to pose any
+ * specific risks to sealed VMAs. It is included anyway because
+ * the use case is unclear. In any case, users can rely on
+ * merging to expand a sealed VMA.
+ * 5> mprotect and pkey_mprotect.
+ * 6> Some destructive madvice() behavior (e.g. MADV_DONTNEED)
+ * for anonymous memory, when users don't have write permission to the
+ * memory. Those behaviors can alter region contents by discarding pages,
+ * effectively a memset(0) for anonymous memory.
+ *
+ * flags: reserved.
+ *
+ * return values:
+ * zero: success.
+ * -EINVAL:
+ * invalid input flags.
+ * start address is not page aligned.
+ * Address arange (start + len) overflow.
+ * -ENOMEM:
+ * addr is not a valid address (not allocated).
+ * end (start + len) is not a valid address.
+ * a gap (unallocated memory) between start and end.
+ * -EPERM:
+ * - In 32 bit architecture, sealing is not supported.
+ * Note:
+ * user can call mseal(2) multiple times, adding a seal on an
+ * already sealed memory is a no-action (no error).
+ *
+ * unseal() is not supported.
+ */
+static int do_mseal(unsigned long start, size_t len_in, unsigned long flags)
+{
+ size_t len;
+ int ret = 0;
+ unsigned long end;
+ struct mm_struct *mm = current->mm;
+
+ ret = can_do_mseal(flags);
+ if (ret)
+ return ret;
+
+ start = untagged_addr(start);
+ if (!PAGE_ALIGNED(start))
+ return -EINVAL;
+
+ len = PAGE_ALIGN(len_in);
+ /* Check to see whether len was rounded up from small -ve to zero. */
+ if (len_in && !len)
+ return -EINVAL;
+
+ end = start + len;
+ if (end < start)
+ return -EINVAL;
+
+ if (end == start)
+ return 0;
+
+ if (mmap_write_lock_killable(mm))
+ return -EINTR;
+
+ /*
+ * First pass, this helps to avoid
+ * partial sealing in case of error in input address range,
+ * e.g. ENOMEM error.
+ */
+ ret = check_mm_seal(start, end);
+ if (ret)
+ goto out;
+
+ /*
+ * Second pass, this should success, unless there are errors
+ * from vma_modify_flags, e.g. merge/split error, or process
+ * reaching the max supported VMAs, however, those cases shall
+ * be rare.
+ */
+ ret = apply_mm_seal(start, end);
+
+out:
+ mmap_write_unlock(current->mm);
+ return ret;
+}
+
+SYSCALL_DEFINE3(mseal, unsigned long, start, size_t, len, unsigned long,
+ flags)
+{
+ return do_mseal(start, len, flags);
+}
diff --git a/mm/nommu.c b/mm/nommu.c
index 5ec8f44e7ce976..331d2f778695c9 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -110,55 +110,34 @@ unsigned int kobjsize(const void *objp)
return page_size(page);
}
-/**
- * follow_pfn - look up PFN at a user virtual address
- * @vma: memory mapping
- * @address: user virtual address
- * @pfn: location to store found PFN
- *
- * Only IO mappings and raw PFN mappings are allowed.
- *
- * Returns zero and the pfn at @pfn on success, -ve otherwise.
- */
-int follow_pfn(struct vm_area_struct *vma, unsigned long address,
- unsigned long *pfn)
-{
- if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
- return -EINVAL;
-
- *pfn = address >> PAGE_SHIFT;
- return 0;
-}
-EXPORT_SYMBOL(follow_pfn);
-
void vfree(const void *addr)
{
kfree(addr);
}
EXPORT_SYMBOL(vfree);
-void *__vmalloc(unsigned long size, gfp_t gfp_mask)
+void *__vmalloc_noprof(unsigned long size, gfp_t gfp_mask)
{
/*
* You can't specify __GFP_HIGHMEM with kmalloc() since kmalloc()
* returns only a logical address.
*/
- return kmalloc(size, (gfp_mask | __GFP_COMP) & ~__GFP_HIGHMEM);
+ return kmalloc_noprof(size, (gfp_mask | __GFP_COMP) & ~__GFP_HIGHMEM);
}
-EXPORT_SYMBOL(__vmalloc);
+EXPORT_SYMBOL(__vmalloc_noprof);
-void *__vmalloc_node_range(unsigned long size, unsigned long align,
+void *__vmalloc_node_range_noprof(unsigned long size, unsigned long align,
unsigned long start, unsigned long end, gfp_t gfp_mask,
pgprot_t prot, unsigned long vm_flags, int node,
const void *caller)
{
- return __vmalloc(size, gfp_mask);
+ return __vmalloc_noprof(size, gfp_mask);
}
-void *__vmalloc_node(unsigned long size, unsigned long align, gfp_t gfp_mask,
+void *__vmalloc_node_noprof(unsigned long size, unsigned long align, gfp_t gfp_mask,
int node, const void *caller)
{
- return __vmalloc(size, gfp_mask);
+ return __vmalloc_noprof(size, gfp_mask);
}
static void *__vmalloc_user_flags(unsigned long size, gfp_t flags)
@@ -179,11 +158,11 @@ static void *__vmalloc_user_flags(unsigned long size, gfp_t flags)
return ret;
}
-void *vmalloc_user(unsigned long size)
+void *vmalloc_user_noprof(unsigned long size)
{
return __vmalloc_user_flags(size, GFP_KERNEL | __GFP_ZERO);
}
-EXPORT_SYMBOL(vmalloc_user);
+EXPORT_SYMBOL(vmalloc_user_noprof);
struct page *vmalloc_to_page(const void *addr)
{
@@ -217,13 +196,13 @@ long vread_iter(struct iov_iter *iter, const char *addr, size_t count)
* For tight control over page level allocator and protection flags
* use __vmalloc() instead.
*/
-void *vmalloc(unsigned long size)
+void *vmalloc_noprof(unsigned long size)
{
- return __vmalloc(size, GFP_KERNEL);
+ return __vmalloc_noprof(size, GFP_KERNEL);
}
-EXPORT_SYMBOL(vmalloc);
+EXPORT_SYMBOL(vmalloc_noprof);
-void *vmalloc_huge(unsigned long size, gfp_t gfp_mask) __weak __alias(__vmalloc);
+void *vmalloc_huge_noprof(unsigned long size, gfp_t gfp_mask) __weak __alias(__vmalloc_noprof);
/*
* vzalloc - allocate virtually contiguous memory with zero fill
@@ -237,11 +216,11 @@ void *vmalloc_huge(unsigned long size, gfp_t gfp_mask) __weak __alias(__vmalloc)
* For tight control over page level allocator and protection flags
* use __vmalloc() instead.
*/
-void *vzalloc(unsigned long size)
+void *vzalloc_noprof(unsigned long size)
{
- return __vmalloc(size, GFP_KERNEL | __GFP_ZERO);
+ return __vmalloc_noprof(size, GFP_KERNEL | __GFP_ZERO);
}
-EXPORT_SYMBOL(vzalloc);
+EXPORT_SYMBOL(vzalloc_noprof);
/**
* vmalloc_node - allocate memory on a specific node
@@ -254,11 +233,11 @@ EXPORT_SYMBOL(vzalloc);
* For tight control over page level allocator and protection flags
* use __vmalloc() instead.
*/
-void *vmalloc_node(unsigned long size, int node)
+void *vmalloc_node_noprof(unsigned long size, int node)
{
- return vmalloc(size);
+ return vmalloc_noprof(size);
}
-EXPORT_SYMBOL(vmalloc_node);
+EXPORT_SYMBOL(vmalloc_node_noprof);
/**
* vzalloc_node - allocate memory on a specific node with zero fill
@@ -272,11 +251,11 @@ EXPORT_SYMBOL(vmalloc_node);
* For tight control over page level allocator and protection flags
* use __vmalloc() instead.
*/
-void *vzalloc_node(unsigned long size, int node)
+void *vzalloc_node_noprof(unsigned long size, int node)
{
- return vzalloc(size);
+ return vzalloc_noprof(size);
}
-EXPORT_SYMBOL(vzalloc_node);
+EXPORT_SYMBOL(vzalloc_node_noprof);
/**
* vmalloc_32 - allocate virtually contiguous memory (32bit addressable)
@@ -285,11 +264,11 @@ EXPORT_SYMBOL(vzalloc_node);
* Allocate enough 32bit PA addressable pages to cover @size from the
* page level allocator and map them into contiguous kernel virtual space.
*/
-void *vmalloc_32(unsigned long size)
+void *vmalloc_32_noprof(unsigned long size)
{
- return __vmalloc(size, GFP_KERNEL);
+ return __vmalloc_noprof(size, GFP_KERNEL);
}
-EXPORT_SYMBOL(vmalloc_32);
+EXPORT_SYMBOL(vmalloc_32_noprof);
/**
* vmalloc_32_user - allocate zeroed virtually contiguous 32bit memory
@@ -301,15 +280,15 @@ EXPORT_SYMBOL(vmalloc_32);
* VM_USERMAP is set on the corresponding VMA so that subsequent calls to
* remap_vmalloc_range() are permissible.
*/
-void *vmalloc_32_user(unsigned long size)
+void *vmalloc_32_user_noprof(unsigned long size)
{
/*
* We'll have to sort out the ZONE_DMA bits for 64-bit,
* but for now this can simply use vmalloc_user() directly.
*/
- return vmalloc_user(size);
+ return vmalloc_user_noprof(size);
}
-EXPORT_SYMBOL(vmalloc_32_user);
+EXPORT_SYMBOL(vmalloc_32_user_noprof);
void *vmap(struct page **pages, unsigned int count, unsigned long flags, pgprot_t prot)
{
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 8d6a207c3c5905..4d7a0004df2cac 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -724,7 +724,6 @@ static struct ctl_table vm_oom_kill_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
- {}
};
#endif
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 3e19b87049db17..692c0da04cbd03 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -838,13 +838,15 @@ static void mdtc_calc_avail(struct dirty_throttle_control *mdtc,
}
/**
- * __wb_calc_thresh - @wb's share of dirty throttling threshold
+ * __wb_calc_thresh - @wb's share of dirty threshold
* @dtc: dirty_throttle_context of interest
+ * @thresh: dirty throttling or dirty background threshold of wb_domain in @dtc
*
- * Note that balance_dirty_pages() will only seriously take it as a hard limit
- * when sleeping max_pause per page is not enough to keep the dirty pages under
- * control. For example, when the device is completely stalled due to some error
- * conditions, or when there are 1000 dd tasks writing to a slow 10MB/s USB key.
+ * Note that balance_dirty_pages() will only seriously take dirty throttling
+ * threshold as a hard limit when sleeping max_pause per page is not enough
+ * to keep the dirty pages under control. For example, when the device is
+ * completely stalled due to some error conditions, or when there are 1000
+ * dd tasks writing to a slow 10MB/s USB key.
* In the other normal situations, it acts more gently by throttling the tasks
* more (rather than completely block them) when the wb dirty pages go high.
*
@@ -855,19 +857,20 @@ static void mdtc_calc_avail(struct dirty_throttle_control *mdtc,
* The wb's share of dirty limit will be adapting to its throughput and
* bounded by the bdi->min_ratio and/or bdi->max_ratio parameters, if set.
*
- * Return: @wb's dirty limit in pages. The term "dirty" in the context of
- * dirty balancing includes all PG_dirty and PG_writeback pages.
+ * Return: @wb's dirty limit in pages. For dirty throttling limit, the term
+ * "dirty" in the context of dirty balancing includes all PG_dirty and
+ * PG_writeback pages.
*/
-static unsigned long __wb_calc_thresh(struct dirty_throttle_control *dtc)
+static unsigned long __wb_calc_thresh(struct dirty_throttle_control *dtc,
+ unsigned long thresh)
{
struct wb_domain *dom = dtc_dom(dtc);
- unsigned long thresh = dtc->thresh;
u64 wb_thresh;
unsigned long numerator, denominator;
unsigned long wb_min_ratio, wb_max_ratio;
/*
- * Calculate this BDI's share of the thresh ratio.
+ * Calculate this wb's share of the thresh ratio.
*/
fprop_fraction_percpu(&dom->completions, dtc->wb_completions,
&numerator, &denominator);
@@ -887,9 +890,28 @@ static unsigned long __wb_calc_thresh(struct dirty_throttle_control *dtc)
unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh)
{
- struct dirty_throttle_control gdtc = { GDTC_INIT(wb),
- .thresh = thresh };
- return __wb_calc_thresh(&gdtc);
+ struct dirty_throttle_control gdtc = { GDTC_INIT(wb) };
+
+ return __wb_calc_thresh(&gdtc, thresh);
+}
+
+unsigned long cgwb_calc_thresh(struct bdi_writeback *wb)
+{
+ struct dirty_throttle_control gdtc = { GDTC_INIT_NO_WB };
+ struct dirty_throttle_control mdtc = { MDTC_INIT(wb, &gdtc) };
+ unsigned long filepages = 0, headroom = 0, writeback = 0;
+
+ gdtc.avail = global_dirtyable_memory();
+ gdtc.dirty = global_node_page_state(NR_FILE_DIRTY) +
+ global_node_page_state(NR_WRITEBACK);
+
+ mem_cgroup_wb_stats(wb, &filepages, &headroom,
+ &mdtc.dirty, &writeback);
+ mdtc.dirty += writeback;
+ mdtc_calc_avail(&mdtc, filepages, headroom);
+ domain_dirty_limits(&mdtc);
+
+ return __wb_calc_thresh(&mdtc, mdtc.thresh);
}
/*
@@ -1636,7 +1658,7 @@ static inline void wb_dirty_limits(struct dirty_throttle_control *dtc)
* wb_position_ratio() will let the dirtier task progress
* at some rate <= (write_bw / 2) for bringing down wb_dirty.
*/
- dtc->wb_thresh = __wb_calc_thresh(dtc);
+ dtc->wb_thresh = __wb_calc_thresh(dtc, dtc->thresh);
dtc->wb_bg_thresh = dtc->thresh ?
div64_u64(dtc->wb_thresh * dtc->bg_thresh, dtc->thresh) : 0;
@@ -1675,7 +1697,7 @@ static int balance_dirty_pages(struct bdi_writeback *wb,
struct dirty_throttle_control * const mdtc = mdtc_valid(&mdtc_stor) ?
&mdtc_stor : NULL;
struct dirty_throttle_control *sdtc;
- unsigned long nr_reclaimable; /* = file_dirty */
+ unsigned long nr_dirty;
long period;
long pause;
long max_pause;
@@ -1696,9 +1718,9 @@ static int balance_dirty_pages(struct bdi_writeback *wb,
unsigned long m_thresh = 0;
unsigned long m_bg_thresh = 0;
- nr_reclaimable = global_node_page_state(NR_FILE_DIRTY);
+ nr_dirty = global_node_page_state(NR_FILE_DIRTY);
gdtc->avail = global_dirtyable_memory();
- gdtc->dirty = nr_reclaimable + global_node_page_state(NR_WRITEBACK);
+ gdtc->dirty = nr_dirty + global_node_page_state(NR_WRITEBACK);
domain_dirty_limits(gdtc);
@@ -1749,7 +1771,7 @@ static int balance_dirty_pages(struct bdi_writeback *wb,
* In normal mode, we start background writeout at the lower
* background_thresh, to keep the amount of dirty memory low.
*/
- if (!laptop_mode && nr_reclaimable > gdtc->bg_thresh &&
+ if (!laptop_mode && nr_dirty > gdtc->bg_thresh &&
!writeback_in_progress(wb))
wb_start_background_writeback(wb);
@@ -2095,7 +2117,7 @@ bool wb_over_bg_thresh(struct bdi_writeback *wb)
if (gdtc->dirty > gdtc->bg_thresh)
return true;
- thresh = wb_calc_thresh(gdtc->wb, gdtc->bg_thresh);
+ thresh = __wb_calc_thresh(gdtc, gdtc->bg_thresh);
if (thresh < 2 * wb_stat_error())
reclaimable = wb_stat_sum(wb, WB_RECLAIMABLE);
else
@@ -2115,7 +2137,7 @@ bool wb_over_bg_thresh(struct bdi_writeback *wb)
if (mdtc->dirty > mdtc->bg_thresh)
return true;
- thresh = wb_calc_thresh(mdtc->wb, mdtc->bg_thresh);
+ thresh = __wb_calc_thresh(mdtc, mdtc->bg_thresh);
if (thresh < 2 * wb_stat_error())
reclaimable = wb_stat_sum(wb, WB_RECLAIMABLE);
else
@@ -2291,7 +2313,6 @@ static struct ctl_table vm_page_writeback_sysctls[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
- {}
};
#endif
@@ -2700,17 +2721,20 @@ void folio_account_cleaned(struct folio *folio, struct bdi_writeback *wb)
}
/*
- * Mark the folio dirty, and set it dirty in the page cache, and mark
- * the inode dirty.
+ * Mark the folio dirty, and set it dirty in the page cache.
*
* If warn is true, then emit a warning if the folio is not uptodate and has
* not been truncated.
*
- * The caller must hold folio_memcg_lock(). Most callers have the folio
- * locked. A few have the folio blocked from truncation through other
- * means (eg zap_vma_pages() has it mapped and is holding the page table
- * lock). This can also be called from mark_buffer_dirty(), which I
- * cannot prove is always protected against truncate.
+ * The caller must hold folio_memcg_lock(). It is the caller's
+ * responsibility to prevent the folio from being truncated while
+ * this function is in progress, although it may have been truncated
+ * before this function is called. Most callers have the folio locked.
+ * A few have the folio blocked from truncation through other means (e.g.
+ * zap_vma_pages() has it mapped and is holding the page table lock).
+ * When called from mark_buffer_dirty(), the filesystem should hold a
+ * reference to the buffer_head that is being marked dirty, which causes
+ * try_to_free_buffers() to fail.
*/
void __folio_mark_dirty(struct folio *folio, struct address_space *mapping,
int warn)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 14d39f34d3367f..cd584aace6bfde 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -54,6 +54,7 @@
#include <linux/khugepaged.h>
#include <linux/delayacct.h>
#include <linux/cacheinfo.h>
+#include <linux/pgalloc_tag.h>
#include <asm/div64.h>
#include "internal.h"
#include "shuffle.h"
@@ -206,24 +207,6 @@ EXPORT_SYMBOL(node_states);
gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK;
-/*
- * A cached value of the page's pageblock's migratetype, used when the page is
- * put on a pcplist. Used to avoid the pageblock migratetype lookup when
- * freeing from pcplists in most cases, at the cost of possibly becoming stale.
- * Also the migratetype set in the page does not necessarily match the pcplist
- * index, e.g. page might have MIGRATE_CMA set but be on a pcplist with any
- * other index - this ensures that it will be put on the correct CMA freelist.
- */
-static inline int get_pcppage_migratetype(struct page *page)
-{
- return page->index;
-}
-
-static inline void set_pcppage_migratetype(struct page *page, int migratetype)
-{
- page->index = migratetype;
-}
-
#ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
unsigned int pageblock_order __read_mostly;
#endif
@@ -332,7 +315,7 @@ static inline bool deferred_pages_enabled(void)
static bool __ref
_deferred_grow_zone(struct zone *zone, unsigned int order)
{
- return deferred_grow_zone(zone, order);
+ return deferred_grow_zone(zone, order);
}
#else
static inline bool deferred_pages_enabled(void)
@@ -523,7 +506,7 @@ static inline unsigned int order_to_pindex(int migratetype, int order)
{
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
if (order > PAGE_ALLOC_COSTLY_ORDER) {
- VM_BUG_ON(order != pageblock_order);
+ VM_BUG_ON(order != HPAGE_PMD_ORDER);
return NR_LOWORDER_PCP_LISTS;
}
#else
@@ -539,7 +522,7 @@ static inline int pindex_to_order(unsigned int pindex)
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
if (pindex == NR_LOWORDER_PCP_LISTS)
- order = pageblock_order;
+ order = HPAGE_PMD_ORDER;
#else
VM_BUG_ON(order > PAGE_ALLOC_COSTLY_ORDER);
#endif
@@ -552,20 +535,12 @@ static inline bool pcp_allowed_order(unsigned int order)
if (order <= PAGE_ALLOC_COSTLY_ORDER)
return true;
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- if (order == pageblock_order)
+ if (order == HPAGE_PMD_ORDER)
return true;
#endif
return false;
}
-static inline void free_the_page(struct page *page, unsigned int order)
-{
- if (pcp_allowed_order(order)) /* Via pcp? */
- free_unref_page(page, order);
- else
- __free_pages_ok(page, order, FPI_NONE);
-}
-
/*
* Higher-order pages are called "compound pages". They are structured thusly:
*
@@ -590,20 +565,6 @@ void prep_compound_page(struct page *page, unsigned int order)
prep_compound_head(page, order);
}
-void destroy_large_folio(struct folio *folio)
-{
- if (folio_test_hugetlb(folio)) {
- free_huge_folio(folio);
- return;
- }
-
- if (folio_test_large_rmappable(folio))
- folio_undo_large_rmappable(folio);
-
- mem_cgroup_uncharge(folio);
- free_the_page(&folio->page, folio_order(folio));
-}
-
static inline void set_buddy_order(struct page *page, unsigned int order)
{
set_page_private(page, order);
@@ -634,12 +595,14 @@ compaction_capture(struct capture_control *capc, struct page *page,
return false;
/*
- * Do not let lower order allocations pollute a movable pageblock.
+ * Do not let lower order allocations pollute a movable pageblock
+ * unless compaction is also requesting movable pages.
* This might let an unmovable request use a reclaimable pageblock
* and vice-versa but no more than normal fallback logic which can
* have trouble finding a high-order free page.
*/
- if (order < pageblock_order && migratetype == MIGRATE_MOVABLE)
+ if (order < pageblock_order && migratetype == MIGRATE_MOVABLE &&
+ capc->cc->migratetype != MIGRATE_MOVABLE)
return false;
capc->page = page;
@@ -660,23 +623,33 @@ compaction_capture(struct capture_control *capc, struct page *page,
}
#endif /* CONFIG_COMPACTION */
-/* Used for pages not on another list */
-static inline void add_to_free_list(struct page *page, struct zone *zone,
- unsigned int order, int migratetype)
+static inline void account_freepages(struct zone *zone, int nr_pages,
+ int migratetype)
{
- struct free_area *area = &zone->free_area[order];
+ if (is_migrate_isolate(migratetype))
+ return;
- list_add(&page->buddy_list, &area->free_list[migratetype]);
- area->nr_free++;
+ __mod_zone_page_state(zone, NR_FREE_PAGES, nr_pages);
+
+ if (is_migrate_cma(migratetype))
+ __mod_zone_page_state(zone, NR_FREE_CMA_PAGES, nr_pages);
}
/* Used for pages not on another list */
-static inline void add_to_free_list_tail(struct page *page, struct zone *zone,
- unsigned int order, int migratetype)
+static inline void __add_to_free_list(struct page *page, struct zone *zone,
+ unsigned int order, int migratetype,
+ bool tail)
{
struct free_area *area = &zone->free_area[order];
- list_add_tail(&page->buddy_list, &area->free_list[migratetype]);
+ VM_WARN_ONCE(get_pageblock_migratetype(page) != migratetype,
+ "page type is %lu, passed migratetype is %d (nr=%d)\n",
+ get_pageblock_migratetype(page), migratetype, 1 << order);
+
+ if (tail)
+ list_add_tail(&page->buddy_list, &area->free_list[migratetype]);
+ else
+ list_add(&page->buddy_list, &area->free_list[migratetype]);
area->nr_free++;
}
@@ -686,16 +659,28 @@ static inline void add_to_free_list_tail(struct page *page, struct zone *zone,
* allocation again (e.g., optimization for memory onlining).
*/
static inline void move_to_free_list(struct page *page, struct zone *zone,
- unsigned int order, int migratetype)
+ unsigned int order, int old_mt, int new_mt)
{
struct free_area *area = &zone->free_area[order];
- list_move_tail(&page->buddy_list, &area->free_list[migratetype]);
+ /* Free page moving can fail, so it happens before the type update */
+ VM_WARN_ONCE(get_pageblock_migratetype(page) != old_mt,
+ "page type is %lu, passed migratetype is %d (nr=%d)\n",
+ get_pageblock_migratetype(page), old_mt, 1 << order);
+
+ list_move_tail(&page->buddy_list, &area->free_list[new_mt]);
+
+ account_freepages(zone, -(1 << order), old_mt);
+ account_freepages(zone, 1 << order, new_mt);
}
-static inline void del_page_from_free_list(struct page *page, struct zone *zone,
- unsigned int order)
+static inline void __del_page_from_free_list(struct page *page, struct zone *zone,
+ unsigned int order, int migratetype)
{
+ VM_WARN_ONCE(get_pageblock_migratetype(page) != migratetype,
+ "page type is %lu, passed migratetype is %d (nr=%d)\n",
+ get_pageblock_migratetype(page), migratetype, 1 << order);
+
/* clear reported state and update reported page count */
if (page_reported(page))
__ClearPageReported(page);
@@ -706,6 +691,13 @@ static inline void del_page_from_free_list(struct page *page, struct zone *zone,
zone->free_area[order].nr_free--;
}
+static inline void del_page_from_free_list(struct page *page, struct zone *zone,
+ unsigned int order, int migratetype)
+{
+ __del_page_from_free_list(page, zone, order, migratetype);
+ account_freepages(zone, -(1 << order), migratetype);
+}
+
static inline struct page *get_page_from_free_area(struct free_area *area,
int migratetype)
{
@@ -777,16 +769,16 @@ static inline void __free_one_page(struct page *page,
VM_BUG_ON_PAGE(page->flags & PAGE_FLAGS_CHECK_AT_PREP, page);
VM_BUG_ON(migratetype == -1);
- if (likely(!is_migrate_isolate(migratetype)))
- __mod_zone_freepage_state(zone, 1 << order, migratetype);
-
VM_BUG_ON_PAGE(pfn & ((1 << order) - 1), page);
VM_BUG_ON_PAGE(bad_range(zone, page), page);
+ account_freepages(zone, 1 << order, migratetype);
+
while (order < MAX_PAGE_ORDER) {
+ int buddy_mt = migratetype;
+
if (compaction_capture(capc, page, order, migratetype)) {
- __mod_zone_freepage_state(zone, -(1 << order),
- migratetype);
+ account_freepages(zone, -(1 << order), migratetype);
return;
}
@@ -801,11 +793,11 @@ static inline void __free_one_page(struct page *page,
* pageblock isolation could cause incorrect freepage or CMA
* accounting or HIGHATOMIC accounting.
*/
- int buddy_mt = get_pfnblock_migratetype(buddy, buddy_pfn);
+ buddy_mt = get_pfnblock_migratetype(buddy, buddy_pfn);
- if (migratetype != buddy_mt
- && (!migratetype_is_mergeable(migratetype) ||
- !migratetype_is_mergeable(buddy_mt)))
+ if (migratetype != buddy_mt &&
+ (!migratetype_is_mergeable(migratetype) ||
+ !migratetype_is_mergeable(buddy_mt)))
goto done_merging;
}
@@ -814,9 +806,19 @@ static inline void __free_one_page(struct page *page,
* merge with it and move up one order.
*/
if (page_is_guard(buddy))
- clear_page_guard(zone, buddy, order, migratetype);
+ clear_page_guard(zone, buddy, order);
else
- del_page_from_free_list(buddy, zone, order);
+ __del_page_from_free_list(buddy, zone, order, buddy_mt);
+
+ if (unlikely(buddy_mt != migratetype)) {
+ /*
+ * Match buddy type. This ensures that an
+ * expand() down the line puts the sub-blocks
+ * on the right freelists.
+ */
+ set_pageblock_migratetype(buddy, migratetype);
+ }
+
combined_pfn = buddy_pfn & pfn;
page = page + (combined_pfn - pfn);
pfn = combined_pfn;
@@ -833,74 +835,13 @@ done_merging:
else
to_tail = buddy_merge_likely(pfn, buddy_pfn, page, order);
- if (to_tail)
- add_to_free_list_tail(page, zone, order, migratetype);
- else
- add_to_free_list(page, zone, order, migratetype);
+ __add_to_free_list(page, zone, order, migratetype, to_tail);
/* Notify page reporting subsystem of freed page */
if (!(fpi_flags & FPI_SKIP_REPORT_NOTIFY))
page_reporting_notify_free(order);
}
-/**
- * split_free_page() -- split a free page at split_pfn_offset
- * @free_page: the original free page
- * @order: the order of the page
- * @split_pfn_offset: split offset within the page
- *
- * Return -ENOENT if the free page is changed, otherwise 0
- *
- * It is used when the free page crosses two pageblocks with different migratetypes
- * at split_pfn_offset within the page. The split free page will be put into
- * separate migratetype lists afterwards. Otherwise, the function achieves
- * nothing.
- */
-int split_free_page(struct page *free_page,
- unsigned int order, unsigned long split_pfn_offset)
-{
- struct zone *zone = page_zone(free_page);
- unsigned long free_page_pfn = page_to_pfn(free_page);
- unsigned long pfn;
- unsigned long flags;
- int free_page_order;
- int mt;
- int ret = 0;
-
- if (split_pfn_offset == 0)
- return ret;
-
- spin_lock_irqsave(&zone->lock, flags);
-
- if (!PageBuddy(free_page) || buddy_order(free_page) != order) {
- ret = -ENOENT;
- goto out;
- }
-
- mt = get_pfnblock_migratetype(free_page, free_page_pfn);
- if (likely(!is_migrate_isolate(mt)))
- __mod_zone_freepage_state(zone, -(1UL << order), mt);
-
- del_page_from_free_list(free_page, zone, order);
- for (pfn = free_page_pfn;
- pfn < free_page_pfn + (1UL << order);) {
- int mt = get_pfnblock_migratetype(pfn_to_page(pfn), pfn);
-
- free_page_order = min_t(unsigned int,
- pfn ? __ffs(pfn) : order,
- __fls(split_pfn_offset));
- __free_one_page(pfn_to_page(pfn), pfn, zone, free_page_order,
- mt, FPI_NONE);
- pfn += 1UL << free_page_order;
- split_pfn_offset -= (1UL << free_page_order);
- /* we have done the first part, now switch to second part */
- if (split_pfn_offset == 0)
- split_pfn_offset = (1UL << order) - (pfn - free_page_pfn);
- }
-out:
- spin_unlock_irqrestore(&zone->lock, flags);
- return ret;
-}
/*
* A bad page could be due to a number of fields. Instead of multiple branches,
* try and check multiple fields with one check. The caller must do a detailed
@@ -996,6 +937,10 @@ static int free_tail_page_prepare(struct page *head_page, struct page *page)
bad_page(page, "nonzero entire_mapcount");
goto out;
}
+ if (unlikely(folio_large_mapcount(folio))) {
+ bad_page(page, "nonzero large_mapcount");
+ goto out;
+ }
if (unlikely(atomic_read(&folio->_nr_pages_mapped))) {
bad_page(page, "nonzero nr_pages_mapped");
goto out;
@@ -1006,10 +951,11 @@ static int free_tail_page_prepare(struct page *head_page, struct page *page)
}
break;
case 2:
- /*
- * the second tail page: ->mapping is
- * deferred_list.next -- ignore value.
- */
+ /* the second tail page: deferred_list overlaps ->mapping */
+ if (unlikely(!list_empty(&folio->_deferred_list))) {
+ bad_page(page, "on deferred list");
+ goto out;
+ }
break;
default:
if (page->mapping != TAIL_MAPPING) {
@@ -1070,7 +1016,7 @@ static inline bool should_skip_kasan_poison(struct page *page)
return page_kasan_tag(page) == KASAN_TAG_KERNEL;
}
-static void kernel_init_pages(struct page *page, int numpages)
+void kernel_init_pages(struct page *page, int numpages)
{
int i;
@@ -1101,6 +1047,7 @@ __always_inline bool free_pages_prepare(struct page *page,
/* Do not let hwpoison pages hit pcplists/buddy */
reset_page_owner(page, order);
page_table_check_free(page, order);
+ pgalloc_tag_sub(page, 1 << order);
return false;
}
@@ -1140,6 +1087,7 @@ __always_inline bool free_pages_prepare(struct page *page,
page->flags &= ~PAGE_FLAGS_CHECK_AT_PREP;
reset_page_owner(page, order);
page_table_check_free(page, order);
+ pgalloc_tag_sub(page, 1 << order);
if (!PageHighMem(page)) {
debug_check_no_locks_freed(page_address(page),
@@ -1191,7 +1139,6 @@ static void free_pcppages_bulk(struct zone *zone, int count,
{
unsigned long flags;
unsigned int order;
- bool isolated_pageblocks;
struct page *page;
/*
@@ -1204,7 +1151,6 @@ static void free_pcppages_bulk(struct zone *zone, int count,
pindex = pindex - 1;
spin_lock_irqsave(&zone->lock, flags);
- isolated_pageblocks = has_isolate_pageblock(zone);
while (count > 0) {
struct list_head *list;
@@ -1220,23 +1166,19 @@ static void free_pcppages_bulk(struct zone *zone, int count,
order = pindex_to_order(pindex);
nr_pages = 1 << order;
do {
+ unsigned long pfn;
int mt;
page = list_last_entry(list, struct page, pcp_list);
- mt = get_pcppage_migratetype(page);
+ pfn = page_to_pfn(page);
+ mt = get_pfnblock_migratetype(page, pfn);
/* must delete to avoid corrupting pcp list */
list_del(&page->pcp_list);
count -= nr_pages;
pcp->count -= nr_pages;
- /* MIGRATE_ISOLATE page should not go to pcplists */
- VM_BUG_ON_PAGE(is_migrate_isolate(mt), page);
- /* Pageblock could have been isolated meanwhile */
- if (unlikely(isolated_pageblocks))
- mt = get_pageblock_migratetype(page);
-
- __free_one_page(page, page_to_pfn(page), zone, order, mt, FPI_NONE);
+ __free_one_page(page, pfn, zone, order, mt, FPI_NONE);
trace_mm_page_pcpu_drain(page, order, mt);
} while (count > 0 && !list_empty(list));
}
@@ -1244,18 +1186,15 @@ static void free_pcppages_bulk(struct zone *zone, int count,
spin_unlock_irqrestore(&zone->lock, flags);
}
-static void free_one_page(struct zone *zone,
- struct page *page, unsigned long pfn,
- unsigned int order,
- int migratetype, fpi_t fpi_flags)
+static void free_one_page(struct zone *zone, struct page *page,
+ unsigned long pfn, unsigned int order,
+ fpi_t fpi_flags)
{
unsigned long flags;
+ int migratetype;
spin_lock_irqsave(&zone->lock, flags);
- if (unlikely(has_isolate_pageblock(zone) ||
- is_migrate_isolate(migratetype))) {
- migratetype = get_pfnblock_migratetype(page, pfn);
- }
+ migratetype = get_pfnblock_migratetype(page, pfn);
__free_one_page(page, pfn, zone, order, migratetype, fpi_flags);
spin_unlock_irqrestore(&zone->lock, flags);
}
@@ -1263,21 +1202,13 @@ static void free_one_page(struct zone *zone,
static void __free_pages_ok(struct page *page, unsigned int order,
fpi_t fpi_flags)
{
- int migratetype;
unsigned long pfn = page_to_pfn(page);
struct zone *zone = page_zone(page);
if (!free_pages_prepare(page, order))
return;
- /*
- * Calling get_pfnblock_migratetype() without spin_lock_irqsave() here
- * is used to avoid calling get_pfnblock_migratetype() under the lock.
- * This will reduce the lock holding time.
- */
- migratetype = get_pfnblock_migratetype(page, pfn);
-
- free_one_page(zone, page, pfn, order, migratetype, fpi_flags);
+ free_one_page(zone, page, pfn, order, fpi_flags);
__count_vm_events(PGFREE, 1 << order);
}
@@ -1388,6 +1319,7 @@ static inline void expand(struct zone *zone, struct page *page,
int low, int high, int migratetype)
{
unsigned long size = 1 << high;
+ unsigned long nr_added = 0;
while (high > low) {
high--;
@@ -1400,12 +1332,14 @@ static inline void expand(struct zone *zone, struct page *page,
* Corresponding page table entries will not be touched,
* pages will stay not present in virtual address space
*/
- if (set_page_guard(zone, &page[size], high, migratetype))
+ if (set_page_guard(zone, &page[size], high))
continue;
- add_to_free_list(&page[size], zone, high, migratetype);
+ __add_to_free_list(&page[size], zone, high, migratetype, false);
set_buddy_order(&page[size], high);
+ nr_added += size;
}
+ account_freepages(zone, nr_added, migratetype);
}
static void check_new_page_bad(struct page *page)
@@ -1533,6 +1467,7 @@ inline void post_alloc_hook(struct page *page, unsigned int order,
set_page_owner(page, order, gfp_flags);
page_table_check_alloc(page, order);
+ pgalloc_tag_add(page, current, 1 << order);
}
static void prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags,
@@ -1573,9 +1508,8 @@ struct page *__rmqueue_smallest(struct zone *zone, unsigned int order,
page = get_page_from_free_area(area, migratetype);
if (!page)
continue;
- del_page_from_free_list(page, zone, current_order);
+ del_page_from_free_list(page, zone, current_order, migratetype);
expand(zone, page, order, current_order, migratetype);
- set_pcppage_migratetype(page, migratetype);
trace_mm_page_alloc_zone_locked(page, order, migratetype,
pcp_allowed_order(order) &&
migratetype < MIGRATE_PCPTYPES);
@@ -1592,7 +1526,7 @@ struct page *__rmqueue_smallest(struct zone *zone, unsigned int order,
*
* The other migratetypes do not have fallbacks.
*/
-static int fallbacks[MIGRATE_TYPES][MIGRATE_PCPTYPES - 1] = {
+static int fallbacks[MIGRATE_PCPTYPES][MIGRATE_PCPTYPES - 1] = {
[MIGRATE_UNMOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE },
[MIGRATE_MOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE },
[MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE, MIGRATE_MOVABLE },
@@ -1610,30 +1544,23 @@ static inline struct page *__rmqueue_cma_fallback(struct zone *zone,
#endif
/*
- * Move the free pages in a range to the freelist tail of the requested type.
- * Note that start_page and end_pages are not aligned on a pageblock
- * boundary. If alignment is required, use move_freepages_block()
+ * Change the type of a block and move all its free pages to that
+ * type's freelist.
*/
-static int move_freepages(struct zone *zone,
- unsigned long start_pfn, unsigned long end_pfn,
- int migratetype, int *num_movable)
+static int __move_freepages_block(struct zone *zone, unsigned long start_pfn,
+ int old_mt, int new_mt)
{
struct page *page;
- unsigned long pfn;
+ unsigned long pfn, end_pfn;
unsigned int order;
int pages_moved = 0;
- for (pfn = start_pfn; pfn <= end_pfn;) {
+ VM_WARN_ON(start_pfn & (pageblock_nr_pages - 1));
+ end_pfn = pageblock_end_pfn(start_pfn);
+
+ for (pfn = start_pfn; pfn < end_pfn;) {
page = pfn_to_page(pfn);
if (!PageBuddy(page)) {
- /*
- * We assume that pages that could be isolated for
- * migration are movable. But we don't actually try
- * isolating, as that would be expensive.
- */
- if (num_movable &&
- (PageLRU(page) || __PageMovable(page)))
- (*num_movable)++;
pfn++;
continue;
}
@@ -1643,35 +1570,186 @@ static int move_freepages(struct zone *zone,
VM_BUG_ON_PAGE(page_zone(page) != zone, page);
order = buddy_order(page);
- move_to_free_list(page, zone, order, migratetype);
+
+ move_to_free_list(page, zone, order, old_mt, new_mt);
+
pfn += 1 << order;
pages_moved += 1 << order;
}
+ set_pageblock_migratetype(pfn_to_page(start_pfn), new_mt);
+
return pages_moved;
}
-int move_freepages_block(struct zone *zone, struct page *page,
- int migratetype, int *num_movable)
+static bool prep_move_freepages_block(struct zone *zone, struct page *page,
+ unsigned long *start_pfn,
+ int *num_free, int *num_movable)
{
- unsigned long start_pfn, end_pfn, pfn;
+ unsigned long pfn, start, end;
- if (num_movable)
+ pfn = page_to_pfn(page);
+ start = pageblock_start_pfn(pfn);
+ end = pageblock_end_pfn(pfn);
+
+ /*
+ * The caller only has the lock for @zone, don't touch ranges
+ * that straddle into other zones. While we could move part of
+ * the range that's inside the zone, this call is usually
+ * accompanied by other operations such as migratetype updates
+ * which also should be locked.
+ */
+ if (!zone_spans_pfn(zone, start))
+ return false;
+ if (!zone_spans_pfn(zone, end - 1))
+ return false;
+
+ *start_pfn = start;
+
+ if (num_free) {
+ *num_free = 0;
*num_movable = 0;
+ for (pfn = start; pfn < end;) {
+ page = pfn_to_page(pfn);
+ if (PageBuddy(page)) {
+ int nr = 1 << buddy_order(page);
- pfn = page_to_pfn(page);
- start_pfn = pageblock_start_pfn(pfn);
- end_pfn = pageblock_end_pfn(pfn) - 1;
+ *num_free += nr;
+ pfn += nr;
+ continue;
+ }
+ /*
+ * We assume that pages that could be isolated for
+ * migration are movable. But we don't actually try
+ * isolating, as that would be expensive.
+ */
+ if (PageLRU(page) || __PageMovable(page))
+ (*num_movable)++;
+ pfn++;
+ }
+ }
- /* Do not cross zone boundaries */
- if (!zone_spans_pfn(zone, start_pfn))
- start_pfn = pfn;
- if (!zone_spans_pfn(zone, end_pfn))
- return 0;
+ return true;
+}
+
+static int move_freepages_block(struct zone *zone, struct page *page,
+ int old_mt, int new_mt)
+{
+ unsigned long start_pfn;
+
+ if (!prep_move_freepages_block(zone, page, &start_pfn, NULL, NULL))
+ return -1;
+
+ return __move_freepages_block(zone, start_pfn, old_mt, new_mt);
+}
+
+#ifdef CONFIG_MEMORY_ISOLATION
+/* Look for a buddy that straddles start_pfn */
+static unsigned long find_large_buddy(unsigned long start_pfn)
+{
+ int order = 0;
+ struct page *page;
+ unsigned long pfn = start_pfn;
+
+ while (!PageBuddy(page = pfn_to_page(pfn))) {
+ /* Nothing found */
+ if (++order > MAX_PAGE_ORDER)
+ return start_pfn;
+ pfn &= ~0UL << order;
+ }
+
+ /*
+ * Found a preceding buddy, but does it straddle?
+ */
+ if (pfn + (1 << buddy_order(page)) > start_pfn)
+ return pfn;
+
+ /* Nothing found */
+ return start_pfn;
+}
+
+/* Split a multi-block free page into its individual pageblocks */
+static void split_large_buddy(struct zone *zone, struct page *page,
+ unsigned long pfn, int order)
+{
+ unsigned long end_pfn = pfn + (1 << order);
+
+ VM_WARN_ON_ONCE(order <= pageblock_order);
+ VM_WARN_ON_ONCE(pfn & (pageblock_nr_pages - 1));
+
+ /* Caller removed page from freelist, buddy info cleared! */
+ VM_WARN_ON_ONCE(PageBuddy(page));
+
+ while (pfn != end_pfn) {
+ int mt = get_pfnblock_migratetype(page, pfn);
+
+ __free_one_page(page, pfn, zone, pageblock_order, mt, FPI_NONE);
+ pfn += pageblock_nr_pages;
+ page = pfn_to_page(pfn);
+ }
+}
+
+/**
+ * move_freepages_block_isolate - move free pages in block for page isolation
+ * @zone: the zone
+ * @page: the pageblock page
+ * @migratetype: migratetype to set on the pageblock
+ *
+ * This is similar to move_freepages_block(), but handles the special
+ * case encountered in page isolation, where the block of interest
+ * might be part of a larger buddy spanning multiple pageblocks.
+ *
+ * Unlike the regular page allocator path, which moves pages while
+ * stealing buddies off the freelist, page isolation is interested in
+ * arbitrary pfn ranges that may have overlapping buddies on both ends.
+ *
+ * This function handles that. Straddling buddies are split into
+ * individual pageblocks. Only the block of interest is moved.
+ *
+ * Returns %true if pages could be moved, %false otherwise.
+ */
+bool move_freepages_block_isolate(struct zone *zone, struct page *page,
+ int migratetype)
+{
+ unsigned long start_pfn, pfn;
+
+ if (!prep_move_freepages_block(zone, page, &start_pfn, NULL, NULL))
+ return false;
+
+ /* No splits needed if buddies can't span multiple blocks */
+ if (pageblock_order == MAX_PAGE_ORDER)
+ goto move;
+
+ /* We're a tail block in a larger buddy */
+ pfn = find_large_buddy(start_pfn);
+ if (pfn != start_pfn) {
+ struct page *buddy = pfn_to_page(pfn);
+ int order = buddy_order(buddy);
- return move_freepages(zone, start_pfn, end_pfn, migratetype,
- num_movable);
+ del_page_from_free_list(buddy, zone, order,
+ get_pfnblock_migratetype(buddy, pfn));
+ set_pageblock_migratetype(page, migratetype);
+ split_large_buddy(zone, buddy, pfn, order);
+ return true;
+ }
+
+ /* We're the starting block of a larger buddy */
+ if (PageBuddy(page) && buddy_order(page) > pageblock_order) {
+ int order = buddy_order(page);
+
+ del_page_from_free_list(page, zone, order,
+ get_pfnblock_migratetype(page, pfn));
+ set_pageblock_migratetype(page, migratetype);
+ split_large_buddy(zone, page, pfn, order);
+ return true;
+ }
+move:
+ __move_freepages_block(zone, start_pfn,
+ get_pfnblock_migratetype(page, start_pfn),
+ migratetype);
+ return true;
}
+#endif /* CONFIG_MEMORY_ISOLATION */
static void change_pageblock_range(struct page *pageblock_page,
int start_order, int migratetype)
@@ -1755,33 +1833,37 @@ static inline bool boost_watermark(struct zone *zone)
}
/*
- * This function implements actual steal behaviour. If order is large enough,
- * we can steal whole pageblock. If not, we first move freepages in this
- * pageblock to our migratetype and determine how many already-allocated pages
- * are there in the pageblock with a compatible migratetype. If at least half
- * of pages are free or compatible, we can change migratetype of the pageblock
- * itself, so pages freed in the future will be put on the correct free list.
+ * This function implements actual steal behaviour. If order is large enough, we
+ * can claim the whole pageblock for the requested migratetype. If not, we check
+ * the pageblock for constituent pages; if at least half of the pages are free
+ * or compatible, we can still claim the whole block, so pages freed in the
+ * future will be put on the correct free list. Otherwise, we isolate exactly
+ * the order we need from the fallback block and leave its migratetype alone.
*/
-static void steal_suitable_fallback(struct zone *zone, struct page *page,
- unsigned int alloc_flags, int start_type, bool whole_block)
+static struct page *
+steal_suitable_fallback(struct zone *zone, struct page *page,
+ int current_order, int order, int start_type,
+ unsigned int alloc_flags, bool whole_block)
{
- unsigned int current_order = buddy_order(page);
int free_pages, movable_pages, alike_pages;
- int old_block_type;
+ unsigned long start_pfn;
+ int block_type;
- old_block_type = get_pageblock_migratetype(page);
+ block_type = get_pageblock_migratetype(page);
/*
* This can happen due to races and we want to prevent broken
* highatomic accounting.
*/
- if (is_migrate_highatomic(old_block_type))
+ if (is_migrate_highatomic(block_type))
goto single_page;
/* Take ownership for orders >= pageblock_order */
if (current_order >= pageblock_order) {
+ del_page_from_free_list(page, zone, current_order, block_type);
change_pageblock_range(page, current_order, start_type);
- goto single_page;
+ expand(zone, page, order, current_order, start_type);
+ return page;
}
/*
@@ -1796,10 +1878,9 @@ static void steal_suitable_fallback(struct zone *zone, struct page *page,
if (!whole_block)
goto single_page;
- free_pages = move_freepages_block(zone, page, start_type,
- &movable_pages);
/* moving whole block can fail due to zone boundary conditions */
- if (!free_pages)
+ if (!prep_move_freepages_block(zone, page, &start_pfn, &free_pages,
+ &movable_pages))
goto single_page;
/*
@@ -1817,7 +1898,7 @@ static void steal_suitable_fallback(struct zone *zone, struct page *page,
* vice versa, be conservative since we can't distinguish the
* exact migratetype of non-movable pages.
*/
- if (old_block_type == MIGRATE_MOVABLE)
+ if (block_type == MIGRATE_MOVABLE)
alike_pages = pageblock_nr_pages
- (free_pages + movable_pages);
else
@@ -1828,13 +1909,15 @@ static void steal_suitable_fallback(struct zone *zone, struct page *page,
* compatible migratability as our allocation, claim the whole block.
*/
if (free_pages + alike_pages >= (1 << (pageblock_order-1)) ||
- page_group_by_mobility_disabled)
- set_pageblock_migratetype(page, start_type);
-
- return;
+ page_group_by_mobility_disabled) {
+ __move_freepages_block(zone, start_pfn, block_type, start_type);
+ return __rmqueue_smallest(zone, order, start_type);
+ }
single_page:
- move_to_free_list(page, zone, current_order, start_type);
+ del_page_from_free_list(page, zone, current_order, block_type);
+ expand(zone, page, order, current_order, block_type);
+ return page;
}
/*
@@ -1901,11 +1984,10 @@ static void reserve_highatomic_pageblock(struct page *page, struct zone *zone)
/* Yoink! */
mt = get_pageblock_migratetype(page);
/* Only reserve normal pageblocks (i.e., they can merge with others) */
- if (migratetype_is_mergeable(mt)) {
- zone->nr_reserved_highatomic += pageblock_nr_pages;
- set_pageblock_migratetype(page, MIGRATE_HIGHATOMIC);
- move_freepages_block(zone, page, MIGRATE_HIGHATOMIC, NULL);
- }
+ if (migratetype_is_mergeable(mt))
+ if (move_freepages_block(zone, page, mt,
+ MIGRATE_HIGHATOMIC) != -1)
+ zone->nr_reserved_highatomic += pageblock_nr_pages;
out_unlock:
spin_unlock_irqrestore(&zone->lock, flags);
@@ -1929,7 +2011,7 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac,
struct zone *zone;
struct page *page;
int order;
- bool ret;
+ int ret;
for_each_zone_zonelist_nodemask(zone, z, zonelist, ac->highest_zoneidx,
ac->nodemask) {
@@ -1944,11 +2026,13 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac,
spin_lock_irqsave(&zone->lock, flags);
for (order = 0; order < NR_PAGE_ORDERS; order++) {
struct free_area *area = &(zone->free_area[order]);
+ int mt;
page = get_page_from_free_area(area, MIGRATE_HIGHATOMIC);
if (!page)
continue;
+ mt = get_pageblock_migratetype(page);
/*
* In page freeing path, migratetype change is racy so
* we can counter several free pages in a pageblock
@@ -1956,7 +2040,7 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac,
* from highatomic to ac->migratetype. So we should
* adjust the count once.
*/
- if (is_migrate_highatomic_page(page)) {
+ if (is_migrate_highatomic(mt)) {
/*
* It should never happen but changes to
* locking could inadvertently allow a per-cpu
@@ -1978,10 +2062,14 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac,
* of pageblocks that cannot be completely freed
* may increase.
*/
- set_pageblock_migratetype(page, ac->migratetype);
- ret = move_freepages_block(zone, page, ac->migratetype,
- NULL);
- if (ret) {
+ ret = move_freepages_block(zone, page, mt,
+ ac->migratetype);
+ /*
+ * Reserving this block already succeeded, so this should
+ * not fail on zone boundaries.
+ */
+ WARN_ON_ONCE(ret == -1);
+ if (ret > 0) {
spin_unlock_irqrestore(&zone->lock, flags);
return ret;
}
@@ -2002,7 +2090,7 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac,
* deviation from the rest of this file, to make the for loop
* condition simpler.
*/
-static __always_inline bool
+static __always_inline struct page *
__rmqueue_fallback(struct zone *zone, int order, int start_migratetype,
unsigned int alloc_flags)
{
@@ -2049,7 +2137,7 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype,
goto do_steal;
}
- return false;
+ return NULL;
find_smallest:
for (current_order = order; current_order < NR_PAGE_ORDERS; current_order++) {
@@ -2069,16 +2157,53 @@ find_smallest:
do_steal:
page = get_page_from_free_area(area, fallback_mt);
- steal_suitable_fallback(zone, page, alloc_flags, start_migratetype,
- can_steal);
+ /* take off list, maybe claim block, expand remainder */
+ page = steal_suitable_fallback(zone, page, current_order, order,
+ start_migratetype, alloc_flags, can_steal);
trace_mm_page_alloc_extfrag(page, order, current_order,
start_migratetype, fallback_mt);
- return true;
-
+ return page;
}
+#ifdef CONFIG_CMA
+/*
+ * GFP_MOVABLE allocation could drain UNMOVABLE & RECLAIMABLE page blocks via
+ * the help of CMA which makes GFP_KERNEL failed. Checking if zone_watermark_ok
+ * again without ALLOC_CMA to see if to use CMA first.
+ */
+static bool use_cma_first(struct zone *zone, unsigned int order, unsigned int alloc_flags)
+{
+ unsigned long watermark;
+ bool cma_first = false;
+
+ watermark = wmark_pages(zone, alloc_flags & ALLOC_WMARK_MASK);
+ /* check if GFP_MOVABLE pass previous zone_watermark_ok via the help of CMA */
+ if (zone_watermark_ok(zone, order, watermark, 0, alloc_flags & (~ALLOC_CMA))) {
+ /*
+ * Balance movable allocations between regular and CMA areas by
+ * allocating from CMA when over half of the zone's free memory
+ * is in the CMA area.
+ */
+ cma_first = (zone_page_state(zone, NR_FREE_CMA_PAGES) >
+ zone_page_state(zone, NR_FREE_PAGES) / 2);
+ } else {
+ /*
+ * watermark failed means UNMOVABLE & RECLAIMBLE is not enough
+ * now, we should use cma first to keep them stay around the
+ * corresponding watermark
+ */
+ cma_first = true;
+ }
+ return cma_first;
+}
+#else
+static bool use_cma_first(struct zone *zone, unsigned int order, unsigned int alloc_flags)
+{
+ return false;
+}
+#endif
/*
* Do the hard work of removing an element from the buddy allocator.
* Call me with the zone->lock already held.
@@ -2092,26 +2217,25 @@ __rmqueue(struct zone *zone, unsigned int order, int migratetype,
if (IS_ENABLED(CONFIG_CMA)) {
/*
* Balance movable allocations between regular and CMA areas by
- * allocating from CMA when over half of the zone's free memory
- * is in the CMA area.
+ * allocating from CMA base on judging zone_watermark_ok again
+ * to see if the latest check got pass via the help of CMA
*/
if (alloc_flags & ALLOC_CMA &&
- zone_page_state(zone, NR_FREE_CMA_PAGES) >
- zone_page_state(zone, NR_FREE_PAGES) / 2) {
+ use_cma_first(zone, order, alloc_flags)) {
page = __rmqueue_cma_fallback(zone, order);
if (page)
return page;
}
}
-retry:
+
page = __rmqueue_smallest(zone, order, migratetype);
if (unlikely(!page)) {
if (alloc_flags & ALLOC_CMA)
page = __rmqueue_cma_fallback(zone, order);
- if (!page && __rmqueue_fallback(zone, order, migratetype,
- alloc_flags))
- goto retry;
+ if (!page)
+ page = __rmqueue_fallback(zone, order, migratetype,
+ alloc_flags);
}
return page;
}
@@ -2146,12 +2270,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
* pages are ordered properly.
*/
list_add_tail(&page->pcp_list, list);
- if (is_migrate_cma(get_pcppage_migratetype(page)))
- __mod_zone_page_state(zone, NR_FREE_CMA_PAGES,
- -(1 << order));
}
-
- __mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order));
spin_unlock_irqrestore(&zone->lock, flags);
return i;
@@ -2216,12 +2335,15 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
*/
static void drain_pages_zone(unsigned int cpu, struct zone *zone)
{
- struct per_cpu_pages *pcp;
+ struct per_cpu_pages *pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu);
+ int count = READ_ONCE(pcp->count);
+
+ while (count) {
+ int to_drain = min(count, pcp->batch << CONFIG_PCP_BATCH_SCALE_MAX);
+ count -= to_drain;
- pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu);
- if (pcp->count) {
spin_lock(&pcp->lock);
- free_pcppages_bulk(zone, pcp->count, pcp, 0);
+ free_pcppages_bulk(zone, to_drain, pcp, 0);
spin_unlock(&pcp->lock);
}
}
@@ -2339,19 +2461,6 @@ void drain_all_pages(struct zone *zone)
__drain_all_pages(zone, false);
}
-static bool free_unref_page_prepare(struct page *page, unsigned long pfn,
- unsigned int order)
-{
- int migratetype;
-
- if (!free_pages_prepare(page, order))
- return false;
-
- migratetype = get_pfnblock_migratetype(page, pfn);
- set_pcppage_migratetype(page, migratetype);
- return true;
-}
-
static int nr_pcp_free(struct per_cpu_pages *pcp, int batch, int high, bool free_high)
{
int min_nr_free, max_nr_free;
@@ -2482,9 +2591,14 @@ void free_unref_page(struct page *page, unsigned int order)
struct per_cpu_pages *pcp;
struct zone *zone;
unsigned long pfn = page_to_pfn(page);
- int migratetype, pcpmigratetype;
+ int migratetype;
- if (!free_unref_page_prepare(page, pfn, order))
+ if (!pcp_allowed_order(order)) {
+ __free_pages_ok(page, order, FPI_NONE);
+ return;
+ }
+
+ if (!free_pages_prepare(page, order))
return;
/*
@@ -2494,23 +2608,23 @@ void free_unref_page(struct page *page, unsigned int order)
* get those areas back if necessary. Otherwise, we may have to free
* excessively into the page allocator
*/
- migratetype = pcpmigratetype = get_pcppage_migratetype(page);
+ migratetype = get_pfnblock_migratetype(page, pfn);
if (unlikely(migratetype >= MIGRATE_PCPTYPES)) {
if (unlikely(is_migrate_isolate(migratetype))) {
- free_one_page(page_zone(page), page, pfn, order, migratetype, FPI_NONE);
+ free_one_page(page_zone(page), page, pfn, order, FPI_NONE);
return;
}
- pcpmigratetype = MIGRATE_MOVABLE;
+ migratetype = MIGRATE_MOVABLE;
}
zone = page_zone(page);
pcp_trylock_prepare(UP_flags);
pcp = pcp_spin_trylock(zone->per_cpu_pageset);
if (pcp) {
- free_unref_page_commit(zone, pcp, page, pcpmigratetype, order);
+ free_unref_page_commit(zone, pcp, page, migratetype, order);
pcp_spin_unlock(pcp);
} else {
- free_one_page(zone, page, pfn, order, migratetype, FPI_NONE);
+ free_one_page(zone, page, pfn, order, FPI_NONE);
}
pcp_trylock_finish(UP_flags);
}
@@ -2523,7 +2637,7 @@ void free_unref_folios(struct folio_batch *folios)
unsigned long __maybe_unused UP_flags;
struct per_cpu_pages *pcp = NULL;
struct zone *locked_zone = NULL;
- int i, j, migratetype;
+ int i, j;
/* Prepare folios for freeing */
for (i = 0, j = 0; i < folios->nr; i++) {
@@ -2533,18 +2647,15 @@ void free_unref_folios(struct folio_batch *folios)
if (order > 0 && folio_test_large_rmappable(folio))
folio_undo_large_rmappable(folio);
- if (!free_unref_page_prepare(&folio->page, pfn, order))
+ if (!free_pages_prepare(&folio->page, order))
continue;
-
/*
- * Free isolated folios and orders not handled on the PCP
- * directly to the allocator, see comment in free_unref_page.
+ * Free orders not handled on the PCP directly to the
+ * allocator.
*/
- migratetype = get_pcppage_migratetype(&folio->page);
- if (!pcp_allowed_order(order) ||
- is_migrate_isolate(migratetype)) {
- free_one_page(folio_zone(folio), &folio->page, pfn,
- order, migratetype, FPI_NONE);
+ if (!pcp_allowed_order(order)) {
+ free_one_page(folio_zone(folio), &folio->page,
+ pfn, order, FPI_NONE);
continue;
}
folio->private = (void *)(unsigned long)order;
@@ -2557,16 +2668,31 @@ void free_unref_folios(struct folio_batch *folios)
for (i = 0; i < folios->nr; i++) {
struct folio *folio = folios->folios[i];
struct zone *zone = folio_zone(folio);
+ unsigned long pfn = folio_pfn(folio);
unsigned int order = (unsigned long)folio->private;
+ int migratetype;
folio->private = NULL;
- migratetype = get_pcppage_migratetype(&folio->page);
+ migratetype = get_pfnblock_migratetype(&folio->page, pfn);
/* Different zone requires a different pcp lock */
- if (zone != locked_zone) {
+ if (zone != locked_zone ||
+ is_migrate_isolate(migratetype)) {
if (pcp) {
pcp_spin_unlock(pcp);
pcp_trylock_finish(UP_flags);
+ locked_zone = NULL;
+ pcp = NULL;
+ }
+
+ /*
+ * Free isolated pages directly to the
+ * allocator, see comment in free_unref_page.
+ */
+ if (is_migrate_isolate(migratetype)) {
+ free_one_page(zone, &folio->page, pfn,
+ order, FPI_NONE);
+ continue;
}
/*
@@ -2577,10 +2703,8 @@ void free_unref_folios(struct folio_batch *folios)
pcp = pcp_spin_trylock(zone->per_cpu_pageset);
if (unlikely(!pcp)) {
pcp_trylock_finish(UP_flags);
- free_one_page(zone, &folio->page,
- folio_pfn(folio), order,
- migratetype, FPI_NONE);
- locked_zone = NULL;
+ free_one_page(zone, &folio->page, pfn,
+ order, FPI_NONE);
continue;
}
locked_zone = zone;
@@ -2623,6 +2747,7 @@ void split_page(struct page *page, unsigned int order)
for (i = 1; i < (1 << order); i++)
set_page_refcounted(page + i);
split_page_owner(page, order, 0);
+ pgalloc_tag_split(page, 1 << order);
split_page_memcg(page, order, 0);
}
EXPORT_SYMBOL_GPL(split_page);
@@ -2643,11 +2768,9 @@ int __isolate_free_page(struct page *page, unsigned int order)
watermark = zone->_watermark[WMARK_MIN] + (1UL << order);
if (!zone_watermark_ok(zone, 0, watermark, 0, ALLOC_CMA))
return 0;
-
- __mod_zone_freepage_state(zone, -(1UL << order), mt);
}
- del_page_from_free_list(page, zone, order);
+ del_page_from_free_list(page, zone, order, mt);
/*
* Set the pageblock if the isolated page is at least half of a
@@ -2662,8 +2785,8 @@ int __isolate_free_page(struct page *page, unsigned int order)
* with others)
*/
if (migratetype_is_mergeable(mt))
- set_pageblock_migratetype(page,
- MIGRATE_MOVABLE);
+ move_freepages_block(zone, page, mt,
+ MIGRATE_MOVABLE);
}
}
@@ -2747,8 +2870,6 @@ struct page *rmqueue_buddy(struct zone *preferred_zone, struct zone *zone,
return NULL;
}
}
- __mod_zone_freepage_state(zone, -(1 << order),
- get_pcppage_migratetype(page));
spin_unlock_irqrestore(&zone->lock, flags);
} while (check_new_pages(page, order));
@@ -4384,7 +4505,7 @@ static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order,
*
* Returns the number of pages on the list or array.
*/
-unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid,
+unsigned long alloc_pages_bulk_noprof(gfp_t gfp, int preferred_nid,
nodemask_t *nodemask, int nr_pages,
struct list_head *page_list,
struct page **page_array)
@@ -4520,7 +4641,7 @@ failed_irq:
pcp_trylock_finish(UP_flags);
failed:
- page = __alloc_pages(gfp, 0, preferred_nid, nodemask);
+ page = __alloc_pages_noprof(gfp, 0, preferred_nid, nodemask);
if (page) {
if (page_list)
list_add(&page->lru, page_list);
@@ -4531,13 +4652,13 @@ failed:
goto out;
}
-EXPORT_SYMBOL_GPL(__alloc_pages_bulk);
+EXPORT_SYMBOL_GPL(alloc_pages_bulk_noprof);
/*
* This is the 'heart' of the zoned buddy allocator.
*/
-struct page *__alloc_pages(gfp_t gfp, unsigned int order, int preferred_nid,
- nodemask_t *nodemask)
+struct page *__alloc_pages_noprof(gfp_t gfp, unsigned int order,
+ int preferred_nid, nodemask_t *nodemask)
{
struct page *page;
unsigned int alloc_flags = ALLOC_WMARK_LOW;
@@ -4599,38 +4720,38 @@ out:
return page;
}
-EXPORT_SYMBOL(__alloc_pages);
+EXPORT_SYMBOL(__alloc_pages_noprof);
-struct folio *__folio_alloc(gfp_t gfp, unsigned int order, int preferred_nid,
+struct folio *__folio_alloc_noprof(gfp_t gfp, unsigned int order, int preferred_nid,
nodemask_t *nodemask)
{
- struct page *page = __alloc_pages(gfp | __GFP_COMP, order,
+ struct page *page = __alloc_pages_noprof(gfp | __GFP_COMP, order,
preferred_nid, nodemask);
return page_rmappable_folio(page);
}
-EXPORT_SYMBOL(__folio_alloc);
+EXPORT_SYMBOL(__folio_alloc_noprof);
/*
* Common helper functions. Never use with __GFP_HIGHMEM because the returned
* address cannot represent highmem pages. Use alloc_pages and then kmap if
* you need to access high mem.
*/
-unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order)
+unsigned long get_free_pages_noprof(gfp_t gfp_mask, unsigned int order)
{
struct page *page;
- page = alloc_pages(gfp_mask & ~__GFP_HIGHMEM, order);
+ page = alloc_pages_noprof(gfp_mask & ~__GFP_HIGHMEM, order);
if (!page)
return 0;
return (unsigned long) page_address(page);
}
-EXPORT_SYMBOL(__get_free_pages);
+EXPORT_SYMBOL(get_free_pages_noprof);
-unsigned long get_zeroed_page(gfp_t gfp_mask)
+unsigned long get_zeroed_page_noprof(gfp_t gfp_mask)
{
- return __get_free_page(gfp_mask | __GFP_ZERO);
+ return get_free_pages_noprof(gfp_mask | __GFP_ZERO, 0);
}
-EXPORT_SYMBOL(get_zeroed_page);
+EXPORT_SYMBOL(get_zeroed_page_noprof);
/**
* __free_pages - Free pages allocated with alloc_pages().
@@ -4656,12 +4777,15 @@ void __free_pages(struct page *page, unsigned int order)
{
/* get PageHead before we drop reference */
int head = PageHead(page);
+ struct alloc_tag *tag = pgalloc_tag_get(page);
if (put_page_testzero(page))
- free_the_page(page, order);
- else if (!head)
+ free_unref_page(page, order);
+ else if (!head) {
+ pgalloc_tag_sub_pages(tag, (1 << order) - 1);
while (order-- > 0)
- free_the_page(page + (1 << order), order);
+ free_unref_page(page + (1 << order), order);
+ }
}
EXPORT_SYMBOL(__free_pages);
@@ -4722,7 +4846,7 @@ void __page_frag_cache_drain(struct page *page, unsigned int count)
VM_BUG_ON_PAGE(page_ref_count(page) == 0, page);
if (page_ref_sub_and_test(page, count))
- free_the_page(page, compound_order(page));
+ free_unref_page(page, compound_order(page));
}
EXPORT_SYMBOL(__page_frag_cache_drain);
@@ -4763,7 +4887,7 @@ refill:
goto refill;
if (unlikely(nc->pfmemalloc)) {
- free_the_page(page, compound_order(page));
+ free_unref_page(page, compound_order(page));
goto refill;
}
@@ -4807,7 +4931,7 @@ void page_frag_free(void *addr)
struct page *page = virt_to_head_page(addr);
if (unlikely(put_page_testzero(page)))
- free_the_page(page, compound_order(page));
+ free_unref_page(page, compound_order(page));
}
EXPORT_SYMBOL(page_frag_free);
@@ -4820,6 +4944,7 @@ static void *make_alloc_exact(unsigned long addr, unsigned int order,
struct page *last = page + nr;
split_page_owner(page, order, 0);
+ pgalloc_tag_split(page, 1 << order);
split_page_memcg(page, order, 0);
while (page < --last)
set_page_refcounted(last);
@@ -4846,7 +4971,7 @@ static void *make_alloc_exact(unsigned long addr, unsigned int order,
*
* Return: pointer to the allocated area or %NULL in case of error.
*/
-void *alloc_pages_exact(size_t size, gfp_t gfp_mask)
+void *alloc_pages_exact_noprof(size_t size, gfp_t gfp_mask)
{
unsigned int order = get_order(size);
unsigned long addr;
@@ -4854,10 +4979,10 @@ void *alloc_pages_exact(size_t size, gfp_t gfp_mask)
if (WARN_ON_ONCE(gfp_mask & (__GFP_COMP | __GFP_HIGHMEM)))
gfp_mask &= ~(__GFP_COMP | __GFP_HIGHMEM);
- addr = __get_free_pages(gfp_mask, order);
+ addr = get_free_pages_noprof(gfp_mask, order);
return make_alloc_exact(addr, order, size);
}
-EXPORT_SYMBOL(alloc_pages_exact);
+EXPORT_SYMBOL(alloc_pages_exact_noprof);
/**
* alloc_pages_exact_nid - allocate an exact number of physically-contiguous
@@ -4871,7 +4996,7 @@ EXPORT_SYMBOL(alloc_pages_exact);
*
* Return: pointer to the allocated area or %NULL in case of error.
*/
-void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask)
+void * __meminit alloc_pages_exact_nid_noprof(int nid, size_t size, gfp_t gfp_mask)
{
unsigned int order = get_order(size);
struct page *p;
@@ -4879,7 +5004,7 @@ void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask)
if (WARN_ON_ONCE(gfp_mask & (__GFP_COMP | __GFP_HIGHMEM)))
gfp_mask &= ~(__GFP_COMP | __GFP_HIGHMEM);
- p = alloc_pages_node(nid, gfp_mask, order);
+ p = alloc_pages_node_noprof(nid, gfp_mask, order);
if (!p)
return NULL;
return make_alloc_exact((unsigned long)page_address(p), order, size);
@@ -5180,37 +5305,13 @@ static void setup_min_slab_ratio(void);
static void build_zonelists(pg_data_t *pgdat)
{
- int node, local_node;
struct zoneref *zonerefs;
int nr_zones;
- local_node = pgdat->node_id;
-
zonerefs = pgdat->node_zonelists[ZONELIST_FALLBACK]._zonerefs;
nr_zones = build_zonerefs_node(pgdat, zonerefs);
zonerefs += nr_zones;
- /*
- * Now we build the zonelist so that it contains the zones
- * of all the other nodes.
- * We don't want to pressure a particular node, so when
- * building the zones for node N, we make sure that the
- * zones coming right after the local ones are those from
- * node N+1 (modulo N)
- */
- for (node = local_node + 1; node < MAX_NUMNODES; node++) {
- if (!node_online(node))
- continue;
- nr_zones = build_zonerefs_node(NODE_DATA(node), zonerefs);
- zonerefs += nr_zones;
- }
- for (node = 0; node < local_node; node++) {
- if (!node_online(node))
- continue;
- nr_zones = build_zonerefs_node(NODE_DATA(node), zonerefs);
- zonerefs += nr_zones;
- }
-
zonerefs->zone = NULL;
zonerefs->zone_idx = 0;
}
@@ -5827,10 +5928,11 @@ static void setup_per_zone_lowmem_reserve(void)
for (j = i + 1; j < MAX_NR_ZONES; j++) {
struct zone *upper_zone = &pgdat->node_zones[j];
+ bool empty = !zone_managed_pages(upper_zone);
managed_pages += zone_managed_pages(upper_zone);
- if (clear)
+ if (clear || empty)
zone->lowmem_reserve[j] = 0;
else
zone->lowmem_reserve[j] = managed_pages / ratio;
@@ -6211,7 +6313,6 @@ static struct ctl_table page_alloc_sysctl_table[] = {
.extra2 = SYSCTL_ONE_HUNDRED,
},
#endif
- {}
};
void __init page_alloc_sysctl_init(void)
@@ -6251,6 +6352,7 @@ int __alloc_contig_migrate_range(struct compact_control *cc,
struct migration_target_control mtc = {
.nid = zone_to_nid(cc->zone),
.gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL,
+ .reason = MR_CONTIG_RANGE,
};
struct page *page;
unsigned long total_mapped = 0;
@@ -6283,8 +6385,12 @@ int __alloc_contig_migrate_range(struct compact_control *cc,
if (trace_mm_alloc_contig_migrate_range_info_enabled()) {
total_reclaimed += nr_reclaimed;
- list_for_each_entry(page, &cc->migratepages, lru)
- total_mapped += page_mapcount(page);
+ list_for_each_entry(page, &cc->migratepages, lru) {
+ struct folio *folio = page_folio(page);
+
+ total_mapped += folio_mapped(folio) *
+ folio_nr_pages(folio);
+ }
}
ret = migrate_pages(&cc->migratepages, alloc_migration_target,
@@ -6336,11 +6442,10 @@ int __alloc_contig_migrate_range(struct compact_control *cc,
* pages which PFN is in [start, end) are allocated for the caller and
* need to be freed with free_contig_range().
*/
-int alloc_contig_range(unsigned long start, unsigned long end,
+int alloc_contig_range_noprof(unsigned long start, unsigned long end,
unsigned migratetype, gfp_t gfp_mask)
{
unsigned long outer_start, outer_end;
- int order;
int ret = 0;
struct compact_control cc = {
@@ -6413,29 +6518,7 @@ int alloc_contig_range(unsigned long start, unsigned long end,
* We don't have to hold zone->lock here because the pages are
* isolated thus they won't get removed from buddy.
*/
-
- order = 0;
- outer_start = start;
- while (!PageBuddy(pfn_to_page(outer_start))) {
- if (++order > MAX_PAGE_ORDER) {
- outer_start = start;
- break;
- }
- outer_start &= ~0UL << order;
- }
-
- if (outer_start != start) {
- order = buddy_order(pfn_to_page(outer_start));
-
- /*
- * outer_start page could be small order buddy page and
- * it doesn't include start page. Adjust outer_start
- * in this case to report failed page properly
- * on tracepoint in test_pages_isolated()
- */
- if (outer_start + (1UL << order) <= start)
- outer_start = start;
- }
+ outer_start = find_large_buddy(start);
/* Make sure the range is really isolated. */
if (test_pages_isolated(outer_start, end, 0)) {
@@ -6460,15 +6543,15 @@ done:
undo_isolate_page_range(start, end, migratetype);
return ret;
}
-EXPORT_SYMBOL(alloc_contig_range);
+EXPORT_SYMBOL(alloc_contig_range_noprof);
static int __alloc_contig_pages(unsigned long start_pfn,
unsigned long nr_pages, gfp_t gfp_mask)
{
unsigned long end_pfn = start_pfn + nr_pages;
- return alloc_contig_range(start_pfn, end_pfn, MIGRATE_MOVABLE,
- gfp_mask);
+ return alloc_contig_range_noprof(start_pfn, end_pfn, MIGRATE_MOVABLE,
+ gfp_mask);
}
static bool pfn_range_valid_contig(struct zone *z, unsigned long start_pfn,
@@ -6523,8 +6606,8 @@ static bool zone_spans_last_pfn(const struct zone *zone,
*
* Return: pointer to contiguous pages on success, or NULL if not successful.
*/
-struct page *alloc_contig_pages(unsigned long nr_pages, gfp_t gfp_mask,
- int nid, nodemask_t *nodemask)
+struct page *alloc_contig_pages_noprof(unsigned long nr_pages, gfp_t gfp_mask,
+ int nid, nodemask_t *nodemask)
{
unsigned long ret, pfn, flags;
struct zonelist *zonelist;
@@ -6655,8 +6738,9 @@ void __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn)
BUG_ON(page_count(page));
BUG_ON(!PageBuddy(page));
+ VM_WARN_ON(get_pageblock_migratetype(page) != MIGRATE_ISOLATE);
order = buddy_order(page);
- del_page_from_free_list(page, zone, order);
+ del_page_from_free_list(page, zone, order, MIGRATE_ISOLATE);
pfn += (1 << order);
}
spin_unlock_irqrestore(&zone->lock, flags);
@@ -6666,16 +6750,16 @@ void __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn)
/*
* This function returns a stable result only if called under zone lock.
*/
-bool is_free_buddy_page(struct page *page)
+bool is_free_buddy_page(const struct page *page)
{
unsigned long pfn = page_to_pfn(page);
unsigned int order;
for (order = 0; order < NR_PAGE_ORDERS; order++) {
- struct page *page_head = page - (pfn & ((1 << order) - 1));
+ const struct page *head = page - (pfn & ((1 << order) - 1));
- if (PageBuddy(page_head) &&
- buddy_order_unsafe(page_head) >= order)
+ if (PageBuddy(head) &&
+ buddy_order_unsafe(head) >= order)
break;
}
@@ -6684,6 +6768,14 @@ bool is_free_buddy_page(struct page *page)
EXPORT_SYMBOL(is_free_buddy_page);
#ifdef CONFIG_MEMORY_FAILURE
+static inline void add_to_free_list(struct page *page, struct zone *zone,
+ unsigned int order, int migratetype,
+ bool tail)
+{
+ __add_to_free_list(page, zone, order, migratetype, tail);
+ account_freepages(zone, 1 << order, migratetype);
+}
+
/*
* Break down a higher-order page in sub-pages, and keep our target out of
* buddy allocator.
@@ -6706,10 +6798,10 @@ static void break_down_buddy_pages(struct zone *zone, struct page *page,
current_buddy = page + size;
}
- if (set_page_guard(zone, current_buddy, high, migratetype))
+ if (set_page_guard(zone, current_buddy, high))
continue;
- add_to_free_list(current_buddy, zone, high, migratetype);
+ add_to_free_list(current_buddy, zone, high, migratetype, false);
set_buddy_order(current_buddy, high);
}
}
@@ -6735,12 +6827,11 @@ bool take_page_off_buddy(struct page *page)
int migratetype = get_pfnblock_migratetype(page_head,
pfn_head);
- del_page_from_free_list(page_head, zone, page_order);
+ del_page_from_free_list(page_head, zone, page_order,
+ migratetype);
break_down_buddy_pages(zone, page_head, page, 0,
page_order, migratetype);
SetPageHWPoisonTakenOff(page);
- if (!is_migrate_isolate(migratetype))
- __mod_zone_freepage_state(zone, -1, migratetype);
ret = true;
break;
}
@@ -6757,13 +6848,14 @@ bool take_page_off_buddy(struct page *page)
bool put_page_back_buddy(struct page *page)
{
struct zone *zone = page_zone(page);
- unsigned long pfn = page_to_pfn(page);
unsigned long flags;
- int migratetype = get_pfnblock_migratetype(page, pfn);
bool ret = false;
spin_lock_irqsave(&zone->lock, flags);
if (put_page_testzero(page)) {
+ unsigned long pfn = page_to_pfn(page);
+ int migratetype = get_pfnblock_migratetype(page, pfn);
+
ClearPageHWPoisonTakenOff(page);
__free_one_page(page, pfn, zone, 0, migratetype, FPI_NONE);
if (TestClearPageHWPoison(page)) {
@@ -6847,7 +6939,7 @@ static bool try_to_accept_memory_one(struct zone *zone)
list_del(&page->lru);
last = list_empty(&zone->unaccepted_pages);
- __mod_zone_freepage_state(zone, -MAX_ORDER_NR_PAGES, MIGRATE_MOVABLE);
+ account_freepages(zone, -MAX_ORDER_NR_PAGES, MIGRATE_MOVABLE);
__mod_zone_page_state(zone, NR_UNACCEPTED, -MAX_ORDER_NR_PAGES);
spin_unlock_irqrestore(&zone->lock, flags);
@@ -6899,7 +6991,7 @@ static bool __free_unaccepted(struct page *page)
spin_lock_irqsave(&zone->lock, flags);
first = list_empty(&zone->unaccepted_pages);
list_add_tail(&page->lru, &zone->unaccepted_pages);
- __mod_zone_freepage_state(zone, MAX_ORDER_NR_PAGES, MIGRATE_MOVABLE);
+ account_freepages(zone, MAX_ORDER_NR_PAGES, MIGRATE_MOVABLE);
__mod_zone_page_state(zone, NR_UNACCEPTED, MAX_ORDER_NR_PAGES);
spin_unlock_irqrestore(&zone->lock, flags);
diff --git a/mm/page_ext.c b/mm/page_ext.c
index 4548fcc66d74d0..95dd8ffeaf811a 100644
--- a/mm/page_ext.c
+++ b/mm/page_ext.c
@@ -10,6 +10,7 @@
#include <linux/page_idle.h>
#include <linux/page_table_check.h>
#include <linux/rcupdate.h>
+#include <linux/pgalloc_tag.h>
/*
* struct page extension
@@ -82,6 +83,9 @@ static struct page_ext_operations *page_ext_ops[] __initdata = {
#if defined(CONFIG_PAGE_IDLE_FLAG) && !defined(CONFIG_64BIT)
&page_idle_ops,
#endif
+#ifdef CONFIG_MEM_ALLOC_PROFILING
+ &page_alloc_tagging_ops,
+#endif
#ifdef CONFIG_PAGE_TABLE_CHECK
&page_table_check_ops,
#endif
@@ -91,7 +95,16 @@ unsigned long page_ext_size;
static unsigned long total_usage;
+#ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG
+/*
+ * To ensure correct allocation tagging for pages, page_ext should be available
+ * before the first page allocation. Otherwise early task stacks will be
+ * allocated before page_ext initialization and missing tags will be flagged.
+ */
+bool early_page_ext __meminitdata = true;
+#else
bool early_page_ext __meminitdata;
+#endif
static int __init setup_early_page_ext(char *str)
{
early_page_ext = true;
@@ -501,7 +514,7 @@ void __meminit pgdat_page_ext_init(struct pglist_data *pgdat)
* Context: Any context. Caller may not sleep until they have called
* page_ext_put().
*/
-struct page_ext *page_ext_get(struct page *page)
+struct page_ext *page_ext_get(const struct page *page)
{
struct page_ext *page_ext;
diff --git a/mm/page_io.c b/mm/page_io.c
index ae2b49055e4357..46c603dddf043d 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -189,7 +189,7 @@ int swap_writepage(struct page *page, struct writeback_control *wbc)
* Arch code may have to preserve more data than just the page
* contents, e.g. memory tags.
*/
- ret = arch_prepare_to_swap(&folio->page);
+ ret = arch_prepare_to_swap(folio);
if (ret) {
folio_mark_dirty(folio);
folio_unlock(folio);
@@ -217,6 +217,7 @@ static inline void count_swpout_vm_event(struct folio *folio)
count_memcg_folio_events(folio, THP_SWPOUT, 1);
count_vm_event(THP_SWPOUT);
}
+ count_mthp_stat(folio_order(folio), MTHP_STAT_ANON_SWPOUT);
#endif
count_vm_events(PSWPOUT, folio_nr_pages(folio));
}
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index a5c8fa4c2a75c3..042937d5abe4b8 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -178,15 +178,11 @@ static int set_migratetype_isolate(struct page *page, int migratetype, int isol_
unmovable = has_unmovable_pages(check_unmovable_start, check_unmovable_end,
migratetype, isol_flags);
if (!unmovable) {
- unsigned long nr_pages;
- int mt = get_pageblock_migratetype(page);
-
- set_pageblock_migratetype(page, MIGRATE_ISOLATE);
+ if (!move_freepages_block_isolate(zone, page, MIGRATE_ISOLATE)) {
+ spin_unlock_irqrestore(&zone->lock, flags);
+ return -EBUSY;
+ }
zone->nr_isolate_pageblock++;
- nr_pages = move_freepages_block(zone, page, MIGRATE_ISOLATE,
- NULL);
-
- __mod_zone_freepage_state(zone, -nr_pages, mt);
spin_unlock_irqrestore(&zone->lock, flags);
return 0;
}
@@ -206,7 +202,7 @@ static int set_migratetype_isolate(struct page *page, int migratetype, int isol_
static void unset_migratetype_isolate(struct page *page, int migratetype)
{
struct zone *zone;
- unsigned long flags, nr_pages;
+ unsigned long flags;
bool isolated_page = false;
unsigned int order;
struct page *buddy;
@@ -252,12 +248,15 @@ static void unset_migratetype_isolate(struct page *page, int migratetype)
* allocation.
*/
if (!isolated_page) {
- nr_pages = move_freepages_block(zone, page, migratetype, NULL);
- __mod_zone_freepage_state(zone, nr_pages, migratetype);
- }
- set_pageblock_migratetype(page, migratetype);
- if (isolated_page)
+ /*
+ * Isolating this block already succeeded, so this
+ * should not fail on zone boundaries.
+ */
+ WARN_ON_ONCE(!move_freepages_block_isolate(zone, page, migratetype));
+ } else {
+ set_pageblock_migratetype(page, migratetype);
__putback_isolated_page(page, order, migratetype);
+ }
zone->nr_isolate_pageblock--;
out:
spin_unlock_irqrestore(&zone->lock, flags);
@@ -367,26 +366,29 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags,
VM_BUG_ON(!page);
pfn = page_to_pfn(page);
- /*
- * start_pfn is MAX_ORDER_NR_PAGES aligned, if there is any
- * free pages in [start_pfn, boundary_pfn), its head page will
- * always be in the range.
- */
+
if (PageBuddy(page)) {
int order = buddy_order(page);
- if (pfn + (1UL << order) > boundary_pfn) {
- /* free page changed before split, check it again */
- if (split_free_page(page, order, boundary_pfn - pfn))
- continue;
- }
+ /* move_freepages_block_isolate() handled this */
+ VM_WARN_ON_ONCE(pfn + (1 << order) > boundary_pfn);
pfn += 1UL << order;
continue;
}
+
/*
- * migrate compound pages then let the free page handling code
- * above do the rest. If migration is not possible, just fail.
+ * If a compound page is straddling our block, attempt
+ * to migrate it out of the way.
+ *
+ * We don't have to worry about this creating a large
+ * free page that straddles into our block: gigantic
+ * pages are freed as order-0 chunks, and LRU pages
+ * (currently) do not exceed pageblock_order.
+ *
+ * The block of interest has already been marked
+ * MIGRATE_ISOLATE above, so when migration is done it
+ * will free its pages onto the correct freelists.
*/
if (PageCompound(page)) {
struct page *head = compound_head(page);
@@ -397,16 +399,10 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags,
pfn = head_pfn + nr_pages;
continue;
}
+
#if defined CONFIG_COMPACTION || defined CONFIG_CMA
- /*
- * hugetlb, lru compound (THP), and movable compound pages
- * can be migrated. Otherwise, fail the isolation.
- */
- if (PageHuge(page) || PageLRU(page) || __PageMovable(page)) {
- int order;
- unsigned long outer_pfn;
+ if (PageHuge(page)) {
int page_mt = get_pageblock_migratetype(page);
- bool isolate_page = !is_migrate_isolate_page(page);
struct compact_control cc = {
.nr_migratepages = 0,
.order = -1,
@@ -419,56 +415,25 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags,
};
INIT_LIST_HEAD(&cc.migratepages);
- /*
- * XXX: mark the page as MIGRATE_ISOLATE so that
- * no one else can grab the freed page after migration.
- * Ideally, the page should be freed as two separate
- * pages to be added into separate migratetype free
- * lists.
- */
- if (isolate_page) {
- ret = set_migratetype_isolate(page, page_mt,
- flags, head_pfn, head_pfn + nr_pages);
- if (ret)
- goto failed;
- }
-
ret = __alloc_contig_migrate_range(&cc, head_pfn,
head_pfn + nr_pages, page_mt);
-
- /*
- * restore the page's migratetype so that it can
- * be split into separate migratetype free lists
- * later.
- */
- if (isolate_page)
- unset_migratetype_isolate(page, page_mt);
-
if (ret)
goto failed;
- /*
- * reset pfn to the head of the free page, so
- * that the free page handling code above can split
- * the free page to the right migratetype list.
- *
- * head_pfn is not used here as a hugetlb page order
- * can be bigger than MAX_PAGE_ORDER, but after it is
- * freed, the free page order is not. Use pfn within
- * the range to find the head of the free page.
- */
- order = 0;
- outer_pfn = pfn;
- while (!PageBuddy(pfn_to_page(outer_pfn))) {
- /* stop if we cannot find the free page */
- if (++order > MAX_PAGE_ORDER)
- goto failed;
- outer_pfn &= ~0UL << order;
- }
- pfn = outer_pfn;
+ pfn = head_pfn + nr_pages;
continue;
- } else
+ }
+
+ /*
+ * These pages are movable too, but they're
+ * not expected to exceed pageblock_order.
+ *
+ * Let us know when they do, so we can add
+ * proper free and split handling for them.
+ */
+ VM_WARN_ON_ONCE_PAGE(PageLRU(page), page);
+ VM_WARN_ON_ONCE_PAGE(__PageMovable(page), page);
#endif
- goto failed;
+ goto failed;
}
pfn++;
diff --git a/mm/page_owner.c b/mm/page_owner.c
index e7139952ffd9de..6e65dc88c1b811 100644
--- a/mm/page_owner.c
+++ b/mm/page_owner.c
@@ -54,6 +54,22 @@ static depot_stack_handle_t early_handle;
static void init_early_allocated_pages(void);
+static inline void set_current_in_page_owner(void)
+{
+ /*
+ * Avoid recursion.
+ *
+ * We might need to allocate more memory from page_owner code, so make
+ * sure to signal it in order to avoid recursion.
+ */
+ current->in_page_owner = 1;
+}
+
+static inline void unset_current_in_page_owner(void)
+{
+ current->in_page_owner = 0;
+}
+
static int __init early_page_owner_param(char *buf)
{
int ret = kstrtobool(buf, &page_owner_enabled);
@@ -102,7 +118,6 @@ static __init void init_page_owner(void)
register_dummy_stack();
register_failure_stack();
register_early_stack();
- static_branch_enable(&page_owner_inited);
init_early_allocated_pages();
/* Initialize dummy and failure stacks and link them to stack_list */
dummy_stack.stack_record = __stack_depot_get_stack_record(dummy_handle);
@@ -113,6 +128,7 @@ static __init void init_page_owner(void)
refcount_set(&failure_stack.stack_record->count, 1);
dummy_stack.next = &failure_stack;
stack_list = &dummy_stack;
+ static_branch_enable(&page_owner_inited);
}
struct page_ext_operations page_owner_ops = {
@@ -133,23 +149,16 @@ static noinline depot_stack_handle_t save_stack(gfp_t flags)
depot_stack_handle_t handle;
unsigned int nr_entries;
- /*
- * Avoid recursion.
- *
- * Sometimes page metadata allocation tracking requires more
- * memory to be allocated:
- * - when new stack trace is saved to stack depot
- */
if (current->in_page_owner)
return dummy_handle;
- current->in_page_owner = 1;
+ set_current_in_page_owner();
nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 2);
handle = stack_depot_save(entries, nr_entries, flags);
if (!handle)
handle = failure_handle;
+ unset_current_in_page_owner();
- current->in_page_owner = 0;
return handle;
}
@@ -164,9 +173,13 @@ static void add_stack_record_to_list(struct stack_record *stack_record,
gfp_mask &= (GFP_ATOMIC | GFP_KERNEL);
gfp_mask |= __GFP_NOWARN;
+ set_current_in_page_owner();
stack = kmalloc(sizeof(*stack), gfp_mask);
- if (!stack)
+ if (!stack) {
+ unset_current_in_page_owner();
return;
+ }
+ unset_current_in_page_owner();
stack->stack_record = stack_record;
stack->next = NULL;
@@ -183,7 +196,8 @@ static void add_stack_record_to_list(struct stack_record *stack_record,
spin_unlock_irqrestore(&stack_list_lock, flags);
}
-static void inc_stack_record_count(depot_stack_handle_t handle, gfp_t gfp_mask)
+static void inc_stack_record_count(depot_stack_handle_t handle, gfp_t gfp_mask,
+ int nr_base_pages)
{
struct stack_record *stack_record = __stack_depot_get_stack_record(handle);
@@ -204,20 +218,74 @@ static void inc_stack_record_count(depot_stack_handle_t handle, gfp_t gfp_mask)
/* Add the new stack_record to our list */
add_stack_record_to_list(stack_record, gfp_mask);
}
- refcount_inc(&stack_record->count);
+ refcount_add(nr_base_pages, &stack_record->count);
}
-static void dec_stack_record_count(depot_stack_handle_t handle)
+static void dec_stack_record_count(depot_stack_handle_t handle,
+ int nr_base_pages)
{
struct stack_record *stack_record = __stack_depot_get_stack_record(handle);
- if (stack_record)
- refcount_dec(&stack_record->count);
+ if (!stack_record)
+ return;
+
+ if (refcount_sub_and_test(nr_base_pages, &stack_record->count))
+ pr_warn("%s: refcount went to 0 for %u handle\n", __func__,
+ handle);
}
-void __reset_page_owner(struct page *page, unsigned short order)
+static inline void __update_page_owner_handle(struct page_ext *page_ext,
+ depot_stack_handle_t handle,
+ unsigned short order,
+ gfp_t gfp_mask,
+ short last_migrate_reason, u64 ts_nsec,
+ pid_t pid, pid_t tgid, char *comm)
{
int i;
+ struct page_owner *page_owner;
+
+ for (i = 0; i < (1 << order); i++) {
+ page_owner = get_page_owner(page_ext);
+ page_owner->handle = handle;
+ page_owner->order = order;
+ page_owner->gfp_mask = gfp_mask;
+ page_owner->last_migrate_reason = last_migrate_reason;
+ page_owner->pid = pid;
+ page_owner->tgid = tgid;
+ page_owner->ts_nsec = ts_nsec;
+ strscpy(page_owner->comm, comm,
+ sizeof(page_owner->comm));
+ __set_bit(PAGE_EXT_OWNER, &page_ext->flags);
+ __set_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags);
+ page_ext = page_ext_next(page_ext);
+ }
+}
+
+static inline void __update_page_owner_free_handle(struct page_ext *page_ext,
+ depot_stack_handle_t handle,
+ unsigned short order,
+ pid_t pid, pid_t tgid,
+ u64 free_ts_nsec)
+{
+ int i;
+ struct page_owner *page_owner;
+
+ for (i = 0; i < (1 << order); i++) {
+ page_owner = get_page_owner(page_ext);
+ /* Only __reset_page_owner() wants to clear the bit */
+ if (handle) {
+ __clear_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags);
+ page_owner->free_handle = handle;
+ }
+ page_owner->free_ts_nsec = free_ts_nsec;
+ page_owner->free_pid = current->pid;
+ page_owner->free_tgid = current->tgid;
+ page_ext = page_ext_next(page_ext);
+ }
+}
+
+void __reset_page_owner(struct page *page, unsigned short order)
+{
struct page_ext *page_ext;
depot_stack_handle_t handle;
depot_stack_handle_t alloc_handle;
@@ -232,16 +300,10 @@ void __reset_page_owner(struct page *page, unsigned short order)
alloc_handle = page_owner->handle;
handle = save_stack(GFP_NOWAIT | __GFP_NOWARN);
- for (i = 0; i < (1 << order); i++) {
- __clear_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags);
- page_owner->free_handle = handle;
- page_owner->free_ts_nsec = free_ts_nsec;
- page_owner->free_pid = current->pid;
- page_owner->free_tgid = current->tgid;
- page_ext = page_ext_next(page_ext);
- page_owner = get_page_owner(page_ext);
- }
+ __update_page_owner_free_handle(page_ext, handle, order, current->pid,
+ current->tgid, free_ts_nsec);
page_ext_put(page_ext);
+
if (alloc_handle != early_handle)
/*
* early_handle is being set as a handle for all those
@@ -250,39 +312,14 @@ void __reset_page_owner(struct page *page, unsigned short order)
* the machinery is not ready yet, we cannot decrement
* their refcount either.
*/
- dec_stack_record_count(alloc_handle);
-}
-
-static inline void __set_page_owner_handle(struct page_ext *page_ext,
- depot_stack_handle_t handle,
- unsigned short order, gfp_t gfp_mask)
-{
- struct page_owner *page_owner;
- int i;
- u64 ts_nsec = local_clock();
-
- for (i = 0; i < (1 << order); i++) {
- page_owner = get_page_owner(page_ext);
- page_owner->handle = handle;
- page_owner->order = order;
- page_owner->gfp_mask = gfp_mask;
- page_owner->last_migrate_reason = -1;
- page_owner->pid = current->pid;
- page_owner->tgid = current->tgid;
- page_owner->ts_nsec = ts_nsec;
- strscpy(page_owner->comm, current->comm,
- sizeof(page_owner->comm));
- __set_bit(PAGE_EXT_OWNER, &page_ext->flags);
- __set_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags);
-
- page_ext = page_ext_next(page_ext);
- }
+ dec_stack_record_count(alloc_handle, 1 << order);
}
noinline void __set_page_owner(struct page *page, unsigned short order,
gfp_t gfp_mask)
{
struct page_ext *page_ext;
+ u64 ts_nsec = local_clock();
depot_stack_handle_t handle;
handle = save_stack(gfp_mask);
@@ -290,9 +327,11 @@ noinline void __set_page_owner(struct page *page, unsigned short order,
page_ext = page_ext_get(page);
if (unlikely(!page_ext))
return;
- __set_page_owner_handle(page_ext, handle, order, gfp_mask);
+ __update_page_owner_handle(page_ext, handle, order, gfp_mask, -1,
+ ts_nsec, current->pid, current->tgid,
+ current->comm);
page_ext_put(page_ext);
- inc_stack_record_count(handle, gfp_mask);
+ inc_stack_record_count(handle, gfp_mask, 1 << order);
}
void __set_page_owner_migrate_reason(struct page *page, int reason)
@@ -327,9 +366,12 @@ void __split_page_owner(struct page *page, int old_order, int new_order)
void __folio_copy_owner(struct folio *newfolio, struct folio *old)
{
+ int i;
struct page_ext *old_ext;
struct page_ext *new_ext;
- struct page_owner *old_page_owner, *new_page_owner;
+ struct page_owner *old_page_owner;
+ struct page_owner *new_page_owner;
+ depot_stack_handle_t migrate_handle;
old_ext = page_ext_get(&old->page);
if (unlikely(!old_ext))
@@ -343,30 +385,32 @@ void __folio_copy_owner(struct folio *newfolio, struct folio *old)
old_page_owner = get_page_owner(old_ext);
new_page_owner = get_page_owner(new_ext);
- new_page_owner->order = old_page_owner->order;
- new_page_owner->gfp_mask = old_page_owner->gfp_mask;
- new_page_owner->last_migrate_reason =
- old_page_owner->last_migrate_reason;
- new_page_owner->handle = old_page_owner->handle;
- new_page_owner->pid = old_page_owner->pid;
- new_page_owner->tgid = old_page_owner->tgid;
- new_page_owner->free_pid = old_page_owner->free_pid;
- new_page_owner->free_tgid = old_page_owner->free_tgid;
- new_page_owner->ts_nsec = old_page_owner->ts_nsec;
- new_page_owner->free_ts_nsec = old_page_owner->ts_nsec;
- strcpy(new_page_owner->comm, old_page_owner->comm);
-
+ migrate_handle = new_page_owner->handle;
+ __update_page_owner_handle(new_ext, old_page_owner->handle,
+ old_page_owner->order, old_page_owner->gfp_mask,
+ old_page_owner->last_migrate_reason,
+ old_page_owner->ts_nsec, old_page_owner->pid,
+ old_page_owner->tgid, old_page_owner->comm);
/*
- * We don't clear the bit on the old folio as it's going to be freed
- * after migration. Until then, the info can be useful in case of
- * a bug, and the overall stats will be off a bit only temporarily.
- * Also, migrate_misplaced_transhuge_page() can still fail the
- * migration and then we want the old folio to retain the info. But
- * in that case we also don't need to explicitly clear the info from
- * the new page, which will be freed.
+ * Do not proactively clear PAGE_EXT_OWNER{_ALLOCATED} bits as the folio
+ * will be freed after migration. Keep them until then as they may be
+ * useful.
*/
- __set_bit(PAGE_EXT_OWNER, &new_ext->flags);
- __set_bit(PAGE_EXT_OWNER_ALLOCATED, &new_ext->flags);
+ __update_page_owner_free_handle(new_ext, 0, old_page_owner->order,
+ old_page_owner->free_pid,
+ old_page_owner->free_tgid,
+ old_page_owner->free_ts_nsec);
+ /*
+ * We linked the original stack to the new folio, we need to do the same
+ * for the new one and the old folio otherwise there will be an imbalance
+ * when subtracting those pages from the stack.
+ */
+ for (i = 0; i < (1 << new_page_owner->order); i++) {
+ old_page_owner->handle = migrate_handle;
+ old_ext = page_ext_next(old_ext);
+ old_page_owner = get_page_owner(old_ext);
+ }
+
page_ext_put(new_ext);
page_ext_put(old_ext);
}
@@ -471,7 +515,7 @@ static inline int print_page_owner_memcg(char *kbuf, size_t count, int ret,
if (!memcg_data)
goto out_unlock;
- if (memcg_data & MEMCG_DATA_OBJCGS)
+ if (memcg_data & MEMCG_DATA_OBJEXTS)
ret += scnprintf(kbuf + ret, count - ret,
"Slab cache page\n");
@@ -774,8 +818,9 @@ static void init_pages_in_zone(pg_data_t *pgdat, struct zone *zone)
goto ext_put_continue;
/* Found early allocated page */
- __set_page_owner_handle(page_ext, early_handle,
- 0, 0);
+ __update_page_owner_handle(page_ext, early_handle, 0, 0,
+ -1, local_clock(), current->pid,
+ current->tgid, current->comm);
count++;
ext_put_continue:
page_ext_put(page_ext);
@@ -827,13 +872,11 @@ static void *stack_start(struct seq_file *m, loff_t *ppos)
* value of stack_list.
*/
stack = smp_load_acquire(&stack_list);
+ m->private = stack;
} else {
stack = m->private;
- stack = stack->next;
}
- m->private = stack;
-
return stack;
}
@@ -848,11 +891,11 @@ static void *stack_next(struct seq_file *m, void *v, loff_t *ppos)
return stack;
}
-static unsigned long page_owner_stack_threshold;
+static unsigned long page_owner_pages_threshold;
static int stack_print(struct seq_file *m, void *v)
{
- int i, stack_count;
+ int i, nr_base_pages;
struct stack *stack = v;
unsigned long *entries;
unsigned long nr_entries;
@@ -863,14 +906,14 @@ static int stack_print(struct seq_file *m, void *v)
nr_entries = stack_record->size;
entries = stack_record->entries;
- stack_count = refcount_read(&stack_record->count) - 1;
+ nr_base_pages = refcount_read(&stack_record->count) - 1;
- if (stack_count < 1 || stack_count < page_owner_stack_threshold)
+ if (nr_base_pages < 1 || nr_base_pages < page_owner_pages_threshold)
return 0;
for (i = 0; i < nr_entries; i++)
seq_printf(m, " %pS\n", (void *)entries[i]);
- seq_printf(m, "stack_count: %d\n\n", stack_count);
+ seq_printf(m, "nr_base_pages: %d\n\n", nr_base_pages);
return 0;
}
@@ -900,13 +943,13 @@ static const struct file_operations page_owner_stack_operations = {
static int page_owner_threshold_get(void *data, u64 *val)
{
- *val = READ_ONCE(page_owner_stack_threshold);
+ *val = READ_ONCE(page_owner_pages_threshold);
return 0;
}
static int page_owner_threshold_set(void *data, u64 val)
{
- WRITE_ONCE(page_owner_stack_threshold, val);
+ WRITE_ONCE(page_owner_pages_threshold, val);
return 0;
}
diff --git a/mm/page_table_check.c b/mm/page_table_check.c
index af69c3c8f7c2d5..4169576bed7298 100644
--- a/mm/page_table_check.c
+++ b/mm/page_table_check.c
@@ -7,6 +7,8 @@
#include <linux/kstrtox.h>
#include <linux/mm.h>
#include <linux/page_table_check.h>
+#include <linux/swap.h>
+#include <linux/swapops.h>
#undef pr_fmt
#define pr_fmt(fmt) "page_table_check: " fmt
@@ -182,6 +184,22 @@ void __page_table_check_pud_clear(struct mm_struct *mm, pud_t pud)
}
EXPORT_SYMBOL(__page_table_check_pud_clear);
+/* Whether the swap entry cached writable information */
+static inline bool swap_cached_writable(swp_entry_t entry)
+{
+ return is_writable_device_exclusive_entry(entry) ||
+ is_writable_device_private_entry(entry) ||
+ is_writable_migration_entry(entry);
+}
+
+static inline void page_table_check_pte_flags(pte_t pte)
+{
+ if (pte_present(pte) && pte_uffd_wp(pte))
+ WARN_ON_ONCE(pte_write(pte));
+ else if (is_swap_pte(pte) && pte_swp_uffd_wp(pte))
+ WARN_ON_ONCE(swap_cached_writable(pte_to_swp_entry(pte)));
+}
+
void __page_table_check_ptes_set(struct mm_struct *mm, pte_t *ptep, pte_t pte,
unsigned int nr)
{
@@ -190,6 +208,8 @@ void __page_table_check_ptes_set(struct mm_struct *mm, pte_t *ptep, pte_t pte,
if (&init_mm == mm)
return;
+ page_table_check_pte_flags(pte);
+
for (i = 0; i < nr; i++)
__page_table_check_pte_clear(mm, ptep_get(ptep + i));
if (pte_user_accessible_page(pte))
@@ -197,11 +217,21 @@ void __page_table_check_ptes_set(struct mm_struct *mm, pte_t *ptep, pte_t pte,
}
EXPORT_SYMBOL(__page_table_check_ptes_set);
+static inline void page_table_check_pmd_flags(pmd_t pmd)
+{
+ if (pmd_present(pmd) && pmd_uffd_wp(pmd))
+ WARN_ON_ONCE(pmd_write(pmd));
+ else if (is_swap_pmd(pmd) && pmd_swp_uffd_wp(pmd))
+ WARN_ON_ONCE(swap_cached_writable(pmd_to_swp_entry(pmd)));
+}
+
void __page_table_check_pmd_set(struct mm_struct *mm, pmd_t *pmdp, pmd_t pmd)
{
if (&init_mm == mm)
return;
+ page_table_check_pmd_flags(pmd);
+
__page_table_check_pmd_clear(mm, *pmdp);
if (pmd_user_accessible_page(pmd)) {
page_table_check_set(pmd_pfn(pmd), PMD_SIZE >> PAGE_SHIFT,
diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
index 74d2de15fb5e09..ae5cc42aa2087a 100644
--- a/mm/page_vma_mapped.c
+++ b/mm/page_vma_mapped.c
@@ -314,17 +314,21 @@ next_pte:
return false;
}
+#ifdef CONFIG_MEMORY_FAILURE
/**
* page_mapped_in_vma - check whether a page is really mapped in a VMA
* @page: the page to test
* @vma: the VMA to test
*
- * Returns 1 if the page is mapped into the page tables of the VMA, 0
- * if the page is not mapped into the page tables of this VMA. Only
- * valid for normal file or anonymous VMAs.
+ * Return: The address the page is mapped at if the page is in the range
+ * covered by the VMA and present in the page table. If the page is
+ * outside the VMA or not present, returns -EFAULT.
+ * Only valid for normal file or anonymous VMAs.
*/
-int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
+unsigned long page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
{
+ struct folio *folio = page_folio(page);
+ pgoff_t pgoff = folio->index + folio_page_idx(folio, page);
struct page_vma_mapped_walk pvmw = {
.pfn = page_to_pfn(page),
.nr_pages = 1,
@@ -332,11 +336,13 @@ int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
.flags = PVMW_SYNC,
};
- pvmw.address = vma_address(page, vma);
+ pvmw.address = vma_address(vma, pgoff, 1);
if (pvmw.address == -EFAULT)
- return 0;
+ goto out;
if (!page_vma_mapped_walk(&pvmw))
- return 0;
+ return -EFAULT;
page_vma_mapped_walk_done(&pvmw);
- return 1;
+out:
+ return pvmw.address;
}
+#endif
diff --git a/mm/percpu-internal.h b/mm/percpu-internal.h
index cdd0aa597a8196..7e42f0ca3b7bb3 100644
--- a/mm/percpu-internal.h
+++ b/mm/percpu-internal.h
@@ -32,6 +32,19 @@ struct pcpu_block_md {
int nr_bits; /* total bits responsible for */
};
+struct pcpuobj_ext {
+#ifdef CONFIG_MEMCG_KMEM
+ struct obj_cgroup *cgroup;
+#endif
+#ifdef CONFIG_MEM_ALLOC_PROFILING
+ union codetag_ref tag;
+#endif
+};
+
+#if defined(CONFIG_MEMCG_KMEM) || defined(CONFIG_MEM_ALLOC_PROFILING)
+#define NEED_PCPUOBJ_EXT
+#endif
+
struct pcpu_chunk {
#ifdef CONFIG_PERCPU_STATS
int nr_alloc; /* # of allocations */
@@ -64,8 +77,8 @@ struct pcpu_chunk {
int end_offset; /* additional area required to
have the region end page
aligned */
-#ifdef CONFIG_MEMCG_KMEM
- struct obj_cgroup **obj_cgroups; /* vector of object cgroups */
+#ifdef NEED_PCPUOBJ_EXT
+ struct pcpuobj_ext *obj_exts; /* vector of object cgroups */
#endif
int nr_pages; /* # of pages served by this chunk */
@@ -74,6 +87,15 @@ struct pcpu_chunk {
unsigned long populated[]; /* populated bitmap */
};
+static inline bool need_pcpuobj_ext(void)
+{
+ if (IS_ENABLED(CONFIG_MEM_ALLOC_PROFILING))
+ return true;
+ if (!mem_cgroup_kmem_disabled())
+ return true;
+ return false;
+}
+
extern spinlock_t pcpu_lock;
extern struct list_head *pcpu_chunk_lists;
diff --git a/mm/percpu-vm.c b/mm/percpu-vm.c
index 2054c9213c4339..cd69caf6aa8d8e 100644
--- a/mm/percpu-vm.c
+++ b/mm/percpu-vm.c
@@ -231,10 +231,10 @@ static int pcpu_map_pages(struct pcpu_chunk *chunk,
return 0;
err:
for_each_possible_cpu(tcpu) {
- if (tcpu == cpu)
- break;
__pcpu_unmap_pages(pcpu_chunk_addr(chunk, tcpu, page_start),
page_end - page_start);
+ if (tcpu == cpu)
+ break;
}
pcpu_post_unmap_tlb_flush(chunk, page_start, page_end);
return err;
diff --git a/mm/percpu.c b/mm/percpu.c
index 4e11fc1e6deff0..474e3683b74d17 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1392,9 +1392,9 @@ static struct pcpu_chunk * __init pcpu_alloc_first_chunk(unsigned long tmp_addr,
panic("%s: Failed to allocate %zu bytes\n", __func__,
alloc_size);
-#ifdef CONFIG_MEMCG_KMEM
+#ifdef NEED_PCPUOBJ_EXT
/* first chunk is free to use */
- chunk->obj_cgroups = NULL;
+ chunk->obj_exts = NULL;
#endif
pcpu_init_md_blocks(chunk);
@@ -1463,12 +1463,12 @@ static struct pcpu_chunk *pcpu_alloc_chunk(gfp_t gfp)
if (!chunk->md_blocks)
goto md_blocks_fail;
-#ifdef CONFIG_MEMCG_KMEM
- if (!mem_cgroup_kmem_disabled()) {
- chunk->obj_cgroups =
+#ifdef NEED_PCPUOBJ_EXT
+ if (need_pcpuobj_ext()) {
+ chunk->obj_exts =
pcpu_mem_zalloc(pcpu_chunk_map_bits(chunk) *
- sizeof(struct obj_cgroup *), gfp);
- if (!chunk->obj_cgroups)
+ sizeof(struct pcpuobj_ext), gfp);
+ if (!chunk->obj_exts)
goto objcg_fail;
}
#endif
@@ -1480,7 +1480,7 @@ static struct pcpu_chunk *pcpu_alloc_chunk(gfp_t gfp)
return chunk;
-#ifdef CONFIG_MEMCG_KMEM
+#ifdef NEED_PCPUOBJ_EXT
objcg_fail:
pcpu_mem_free(chunk->md_blocks);
#endif
@@ -1498,8 +1498,8 @@ static void pcpu_free_chunk(struct pcpu_chunk *chunk)
{
if (!chunk)
return;
-#ifdef CONFIG_MEMCG_KMEM
- pcpu_mem_free(chunk->obj_cgroups);
+#ifdef NEED_PCPUOBJ_EXT
+ pcpu_mem_free(chunk->obj_exts);
#endif
pcpu_mem_free(chunk->md_blocks);
pcpu_mem_free(chunk->bound_map);
@@ -1646,9 +1646,9 @@ static void pcpu_memcg_post_alloc_hook(struct obj_cgroup *objcg,
if (!objcg)
return;
- if (likely(chunk && chunk->obj_cgroups)) {
+ if (likely(chunk && chunk->obj_exts)) {
obj_cgroup_get(objcg);
- chunk->obj_cgroups[off >> PCPU_MIN_ALLOC_SHIFT] = objcg;
+ chunk->obj_exts[off >> PCPU_MIN_ALLOC_SHIFT].cgroup = objcg;
rcu_read_lock();
mod_memcg_state(obj_cgroup_memcg(objcg), MEMCG_PERCPU_B,
@@ -1663,13 +1663,13 @@ static void pcpu_memcg_free_hook(struct pcpu_chunk *chunk, int off, size_t size)
{
struct obj_cgroup *objcg;
- if (unlikely(!chunk->obj_cgroups))
+ if (unlikely(!chunk->obj_exts))
return;
- objcg = chunk->obj_cgroups[off >> PCPU_MIN_ALLOC_SHIFT];
+ objcg = chunk->obj_exts[off >> PCPU_MIN_ALLOC_SHIFT].cgroup;
if (!objcg)
return;
- chunk->obj_cgroups[off >> PCPU_MIN_ALLOC_SHIFT] = NULL;
+ chunk->obj_exts[off >> PCPU_MIN_ALLOC_SHIFT].cgroup = NULL;
obj_cgroup_uncharge(objcg, pcpu_obj_full_size(size));
@@ -1699,6 +1699,32 @@ static void pcpu_memcg_free_hook(struct pcpu_chunk *chunk, int off, size_t size)
}
#endif /* CONFIG_MEMCG_KMEM */
+#ifdef CONFIG_MEM_ALLOC_PROFILING
+static void pcpu_alloc_tag_alloc_hook(struct pcpu_chunk *chunk, int off,
+ size_t size)
+{
+ if (mem_alloc_profiling_enabled() && likely(chunk->obj_exts)) {
+ alloc_tag_add(&chunk->obj_exts[off >> PCPU_MIN_ALLOC_SHIFT].tag,
+ current->alloc_tag, size);
+ }
+}
+
+static void pcpu_alloc_tag_free_hook(struct pcpu_chunk *chunk, int off, size_t size)
+{
+ if (mem_alloc_profiling_enabled() && likely(chunk->obj_exts))
+ alloc_tag_sub(&chunk->obj_exts[off >> PCPU_MIN_ALLOC_SHIFT].tag, size);
+}
+#else
+static void pcpu_alloc_tag_alloc_hook(struct pcpu_chunk *chunk, int off,
+ size_t size)
+{
+}
+
+static void pcpu_alloc_tag_free_hook(struct pcpu_chunk *chunk, int off, size_t size)
+{
+}
+#endif
+
/**
* pcpu_alloc - the percpu allocator
* @size: size of area to allocate in bytes
@@ -1714,7 +1740,7 @@ static void pcpu_memcg_free_hook(struct pcpu_chunk *chunk, int off, size_t size)
* RETURNS:
* Percpu pointer to the allocated area on success, NULL on failure.
*/
-static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,
+void __percpu *pcpu_alloc_noprof(size_t size, size_t align, bool reserved,
gfp_t gfp)
{
gfp_t pcpu_gfp;
@@ -1881,6 +1907,8 @@ area_found:
pcpu_memcg_post_alloc_hook(objcg, chunk, off, size);
+ pcpu_alloc_tag_alloc_hook(chunk, off, size);
+
return ptr;
fail_unlock:
@@ -1909,61 +1937,7 @@ fail:
return NULL;
}
-
-/**
- * __alloc_percpu_gfp - allocate dynamic percpu area
- * @size: size of area to allocate in bytes
- * @align: alignment of area (max PAGE_SIZE)
- * @gfp: allocation flags
- *
- * Allocate zero-filled percpu area of @size bytes aligned at @align. If
- * @gfp doesn't contain %GFP_KERNEL, the allocation doesn't block and can
- * be called from any context but is a lot more likely to fail. If @gfp
- * has __GFP_NOWARN then no warning will be triggered on invalid or failed
- * allocation requests.
- *
- * RETURNS:
- * Percpu pointer to the allocated area on success, NULL on failure.
- */
-void __percpu *__alloc_percpu_gfp(size_t size, size_t align, gfp_t gfp)
-{
- return pcpu_alloc(size, align, false, gfp);
-}
-EXPORT_SYMBOL_GPL(__alloc_percpu_gfp);
-
-/**
- * __alloc_percpu - allocate dynamic percpu area
- * @size: size of area to allocate in bytes
- * @align: alignment of area (max PAGE_SIZE)
- *
- * Equivalent to __alloc_percpu_gfp(size, align, %GFP_KERNEL).
- */
-void __percpu *__alloc_percpu(size_t size, size_t align)
-{
- return pcpu_alloc(size, align, false, GFP_KERNEL);
-}
-EXPORT_SYMBOL_GPL(__alloc_percpu);
-
-/**
- * __alloc_reserved_percpu - allocate reserved percpu area
- * @size: size of area to allocate in bytes
- * @align: alignment of area (max PAGE_SIZE)
- *
- * Allocate zero-filled percpu area of @size bytes aligned at @align
- * from reserved percpu area if arch has set it up; otherwise,
- * allocation is served from the same dynamic area. Might sleep.
- * Might trigger writeouts.
- *
- * CONTEXT:
- * Does GFP_KERNEL allocation.
- *
- * RETURNS:
- * Percpu pointer to the allocated area on success, NULL on failure.
- */
-void __percpu *__alloc_reserved_percpu(size_t size, size_t align)
-{
- return pcpu_alloc(size, align, true, GFP_KERNEL);
-}
+EXPORT_SYMBOL_GPL(pcpu_alloc_noprof);
/**
* pcpu_balance_free - manage the amount of free chunks
@@ -2302,6 +2276,8 @@ void free_percpu(void __percpu *ptr)
spin_lock_irqsave(&pcpu_lock, flags);
size = pcpu_free_area(chunk, off);
+ pcpu_alloc_tag_free_hook(chunk, off, size);
+
pcpu_memcg_free_hook(chunk, off, size);
/*
diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c
index 4fcd959dcc4d02..74e34ea90656a5 100644
--- a/mm/pgtable-generic.c
+++ b/mm/pgtable-generic.c
@@ -198,6 +198,7 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
pmd_t *pmdp)
{
+ VM_WARN_ON(!pmd_present(*pmdp));
pmd_t old = pmdp_establish(vma, address, pmdp, pmd_mkinvalid(*pmdp));
flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
return old;
@@ -208,6 +209,7 @@ pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
pmd_t pmdp_invalidate_ad(struct vm_area_struct *vma, unsigned long address,
pmd_t *pmdp)
{
+ VM_WARN_ON(!pmd_present(*pmdp));
return pmdp_invalidate(vma, address, pmdp);
}
#endif
diff --git a/mm/readahead.c b/mm/readahead.c
index 130c0e7df99f58..c1b23989d9caf3 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -228,6 +228,7 @@ void page_cache_ra_unbounded(struct readahead_control *ractl,
*/
for (i = 0; i < nr_to_read; i++) {
struct folio *folio = xa_load(&mapping->i_pages, index + i);
+ int ret;
if (folio && !xa_is_value(folio)) {
/*
@@ -247,9 +248,12 @@ void page_cache_ra_unbounded(struct readahead_control *ractl,
folio = filemap_alloc_folio(gfp_mask, 0);
if (!folio)
break;
- if (filemap_add_folio(mapping, folio, index + i,
- gfp_mask) < 0) {
+
+ ret = filemap_add_folio(mapping, folio, index + i, gfp_mask);
+ if (ret < 0) {
folio_put(folio);
+ if (ret == -ENOMEM)
+ break;
read_pages(ractl);
ractl->_index++;
i = ractl->_index + ractl->_nr_pages - index - 1;
@@ -490,6 +494,7 @@ void page_cache_ra_order(struct readahead_control *ractl,
pgoff_t index = readahead_index(ractl);
pgoff_t limit = (i_size_read(mapping->host) - 1) >> PAGE_SHIFT;
pgoff_t mark = index + ra->size - ra->async_size;
+ unsigned int nofs;
int err = 0;
gfp_t gfp = readahead_gfp_mask(mapping);
@@ -504,6 +509,8 @@ void page_cache_ra_order(struct readahead_control *ractl,
new_order = min_t(unsigned int, new_order, ilog2(ra->size));
}
+ /* See comment in page_cache_ra_unbounded() */
+ nofs = memalloc_nofs_save();
filemap_invalidate_lock_shared(mapping);
while (index <= limit) {
unsigned int order = new_order;
@@ -527,6 +534,7 @@ void page_cache_ra_order(struct readahead_control *ractl,
read_pages(ractl);
filemap_invalidate_unlock_shared(mapping);
+ memalloc_nofs_restore(nofs);
/*
* If there were already pages in the page cache, then we may have
diff --git a/mm/rmap.c b/mm/rmap.c
index 3746a553101832..7faa60bc3e4db6 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -23,7 +23,7 @@
* inode->i_rwsem (while writing or truncating, not reading or faulting)
* mm->mmap_lock
* mapping->invalidate_lock (in filemap_fault)
- * page->flags PG_locked (lock_page)
+ * folio_lock
* hugetlbfs_i_mmap_rwsem_key (in huge_pmd_share, see hugetlbfs below)
* vma_start_write
* mapping->i_mmap_rwsem
@@ -50,7 +50,7 @@
* hugetlb_fault_mutex (hugetlbfs specific page fault mutex)
* vma_lock (hugetlb specific lock for pmd_sharing)
* mapping->i_mmap_rwsem (also used for hugetlb pmd sharing)
- * page->flags PG_locked (lock_page)
+ * folio_lock
*/
#include <linux/mm.h>
@@ -182,8 +182,6 @@ static void anon_vma_chain_link(struct vm_area_struct *vma,
* for the new allocation. At the same time, we do not want
* to do any locking for the common case of already having
* an anon_vma.
- *
- * This must be called with the mmap_lock held for reading.
*/
int __anon_vma_prepare(struct vm_area_struct *vma)
{
@@ -191,6 +189,7 @@ int __anon_vma_prepare(struct vm_area_struct *vma)
struct anon_vma *anon_vma, *allocated;
struct anon_vma_chain *avc;
+ mmap_assert_locked(mm);
might_sleep();
avc = anon_vma_chain_alloc(GFP_KERNEL);
@@ -775,6 +774,8 @@ static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags)
unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
{
struct folio *folio = page_folio(page);
+ pgoff_t pgoff;
+
if (folio_test_anon(folio)) {
struct anon_vma *page__anon_vma = folio_anon_vma(folio);
/*
@@ -790,7 +791,9 @@ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
return -EFAULT;
}
- return vma_address(page, vma);
+ /* The !page__anon_vma above handles KSM folios */
+ pgoff = folio->index + folio_page_idx(folio, page);
+ return vma_address(vma, pgoff, 1);
}
/*
@@ -1128,56 +1131,38 @@ int pfn_mkclean_range(unsigned long pfn, unsigned long nr_pages, pgoff_t pgoff,
if (invalid_mkclean_vma(vma, NULL))
return 0;
- pvmw.address = vma_pgoff_address(pgoff, nr_pages, vma);
+ pvmw.address = vma_address(vma, pgoff, nr_pages);
VM_BUG_ON_VMA(pvmw.address == -EFAULT, vma);
return page_vma_mkclean_one(&pvmw);
}
-int folio_total_mapcount(struct folio *folio)
-{
- int mapcount = folio_entire_mapcount(folio);
- int nr_pages;
- int i;
-
- /* In the common case, avoid the loop when no pages mapped by PTE */
- if (folio_nr_pages_mapped(folio) == 0)
- return mapcount;
- /*
- * Add all the PTE mappings of those pages mapped by PTE.
- * Limit the loop to folio_nr_pages_mapped()?
- * Perhaps: given all the raciness, that may be a good or a bad idea.
- */
- nr_pages = folio_nr_pages(folio);
- for (i = 0; i < nr_pages; i++)
- mapcount += atomic_read(&folio_page(folio, i)->_mapcount);
-
- /* But each of those _mapcounts was based on -1 */
- mapcount += nr_pages;
- return mapcount;
-}
-
static __always_inline unsigned int __folio_add_rmap(struct folio *folio,
struct page *page, int nr_pages, enum rmap_level level,
int *nr_pmdmapped)
{
atomic_t *mapped = &folio->_nr_pages_mapped;
+ const int orig_nr_pages = nr_pages;
int first, nr = 0;
__folio_rmap_sanity_checks(folio, page, nr_pages, level);
switch (level) {
case RMAP_LEVEL_PTE:
+ if (!folio_test_large(folio)) {
+ nr = atomic_inc_and_test(&page->_mapcount);
+ break;
+ }
+
do {
first = atomic_inc_and_test(&page->_mapcount);
- if (first && folio_test_large(folio)) {
+ if (first) {
first = atomic_inc_return_relaxed(mapped);
- first = (first < ENTIRELY_MAPPED);
+ if (first < ENTIRELY_MAPPED)
+ nr++;
}
-
- if (first)
- nr++;
} while (page++, --nr_pages > 0);
+ atomic_add(orig_nr_pages, &folio->_large_mapcount);
break;
case RMAP_LEVEL_PMD:
first = atomic_inc_and_test(&folio->_entire_mapcount);
@@ -1194,6 +1179,7 @@ static __always_inline unsigned int __folio_add_rmap(struct folio *folio,
nr = 0;
}
}
+ atomic_inc(&folio->_large_mapcount);
break;
}
return nr;
@@ -1429,10 +1415,14 @@ void folio_add_new_anon_rmap(struct folio *folio, struct vm_area_struct *vma,
SetPageAnonExclusive(page);
}
+ /* increment count (starts at -1) */
+ atomic_set(&folio->_large_mapcount, nr - 1);
atomic_set(&folio->_nr_pages_mapped, nr);
} else {
/* increment count (starts at -1) */
atomic_set(&folio->_entire_mapcount, 0);
+ /* increment count (starts at -1) */
+ atomic_set(&folio->_large_mapcount, 0);
atomic_set(&folio->_nr_pages_mapped, ENTIRELY_MAPPED);
SetPageAnonExclusive(&folio->page);
__lruvec_stat_mod_folio(folio, NR_ANON_THPS, nr);
@@ -1504,24 +1494,32 @@ static __always_inline void __folio_remove_rmap(struct folio *folio,
{
atomic_t *mapped = &folio->_nr_pages_mapped;
int last, nr = 0, nr_pmdmapped = 0;
+ bool partially_mapped = false;
enum node_stat_item idx;
__folio_rmap_sanity_checks(folio, page, nr_pages, level);
switch (level) {
case RMAP_LEVEL_PTE:
+ if (!folio_test_large(folio)) {
+ nr = atomic_add_negative(-1, &page->_mapcount);
+ break;
+ }
+
+ atomic_sub(nr_pages, &folio->_large_mapcount);
do {
last = atomic_add_negative(-1, &page->_mapcount);
- if (last && folio_test_large(folio)) {
+ if (last) {
last = atomic_dec_return_relaxed(mapped);
- last = (last < ENTIRELY_MAPPED);
+ if (last < ENTIRELY_MAPPED)
+ nr++;
}
-
- if (last)
- nr++;
} while (page++, --nr_pages > 0);
+
+ partially_mapped = nr && atomic_read(mapped);
break;
case RMAP_LEVEL_PMD:
+ atomic_dec(&folio->_large_mapcount);
last = atomic_add_negative(-1, &folio->_entire_mapcount);
if (last) {
nr = atomic_sub_return_relaxed(ENTIRELY_MAPPED, mapped);
@@ -1536,6 +1534,8 @@ static __always_inline void __folio_remove_rmap(struct folio *folio,
nr = 0;
}
}
+
+ partially_mapped = nr < nr_pmdmapped;
break;
}
@@ -1557,9 +1557,10 @@ static __always_inline void __folio_remove_rmap(struct folio *folio,
* page of the folio is unmapped and at least one page
* is still mapped.
*/
- if (folio_test_large(folio) && folio_test_anon(folio))
- if (level == RMAP_LEVEL_PTE || nr < nr_pmdmapped)
- deferred_split_folio(folio);
+ if (folio_test_anon(folio) &&
+ list_empty(&folio->_deferred_list) &&
+ partially_mapped)
+ deferred_split_folio(folio);
}
/*
@@ -2588,7 +2589,8 @@ static void rmap_walk_anon(struct folio *folio,
anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root,
pgoff_start, pgoff_end) {
struct vm_area_struct *vma = avc->vma;
- unsigned long address = vma_address(&folio->page, vma);
+ unsigned long address = vma_address(vma, pgoff_start,
+ folio_nr_pages(folio));
VM_BUG_ON_VMA(address == -EFAULT, vma);
cond_resched();
@@ -2649,7 +2651,8 @@ static void rmap_walk_file(struct folio *folio,
lookup:
vma_interval_tree_foreach(vma, &mapping->i_mmap,
pgoff_start, pgoff_end) {
- unsigned long address = vma_address(&folio->page, vma);
+ unsigned long address = vma_address(vma, pgoff_start,
+ folio_nr_pages(folio));
VM_BUG_ON_VMA(address == -EFAULT, vma);
cond_resched();
@@ -2702,6 +2705,7 @@ void hugetlb_add_anon_rmap(struct folio *folio, struct vm_area_struct *vma,
VM_WARN_ON_FOLIO(!folio_test_anon(folio), folio);
atomic_inc(&folio->_entire_mapcount);
+ atomic_inc(&folio->_large_mapcount);
if (flags & RMAP_EXCLUSIVE)
SetPageAnonExclusive(&folio->page);
VM_WARN_ON_FOLIO(folio_entire_mapcount(folio) > 1 &&
@@ -2716,6 +2720,7 @@ void hugetlb_add_new_anon_rmap(struct folio *folio,
BUG_ON(address < vma->vm_start || address >= vma->vm_end);
/* increment count (starts at -1) */
atomic_set(&folio->_entire_mapcount, 0);
+ atomic_set(&folio->_large_mapcount, 0);
folio_clear_hugetlb_restore_reserve(folio);
__folio_set_anon(folio, vma, address, true);
SetPageAnonExclusive(&folio->page);
diff --git a/mm/shmem.c b/mm/shmem.c
index 0aad0d9a621b80..fa2a0ed97507d1 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -748,12 +748,6 @@ static long shmem_unused_huge_count(struct super_block *sb,
#define shmem_huge SHMEM_HUGE_DENY
-bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force,
- struct mm_struct *mm, unsigned long vm_flags)
-{
- return false;
-}
-
static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
struct shrink_control *sc, unsigned long nr_to_split)
{
@@ -1913,7 +1907,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
* Some architectures may have to restore extra metadata to the
* folio after reading from swap.
*/
- arch_swap_restore(swap, folio);
+ arch_swap_restore(folio_swap(swap, folio), folio);
if (shmem_should_replace_folio(folio, gfp)) {
error = shmem_replace_folio(&folio, gfp, info, index);
@@ -2273,8 +2267,6 @@ unsigned long shmem_get_unmapped_area(struct file *file,
unsigned long uaddr, unsigned long len,
unsigned long pgoff, unsigned long flags)
{
- unsigned long (*get_area)(struct file *,
- unsigned long, unsigned long, unsigned long, unsigned long);
unsigned long addr;
unsigned long offset;
unsigned long inflated_len;
@@ -2284,8 +2276,8 @@ unsigned long shmem_get_unmapped_area(struct file *file,
if (len > TASK_SIZE)
return -ENOMEM;
- get_area = current->mm->get_unmapped_area;
- addr = get_area(file, uaddr, len, pgoff, flags);
+ addr = mm_get_unmapped_area(current->mm, file, uaddr, len, pgoff,
+ flags);
if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
return addr;
@@ -2342,7 +2334,8 @@ unsigned long shmem_get_unmapped_area(struct file *file,
if (inflated_len < len)
return addr;
- inflated_addr = get_area(NULL, uaddr, inflated_len, 0, flags);
+ inflated_addr = mm_get_unmapped_area(current->mm, NULL, uaddr,
+ inflated_len, 0, flags);
if (IS_ERR_VALUE(inflated_addr))
return addr;
if (inflated_addr & ~PAGE_MASK)
@@ -4807,7 +4800,7 @@ unsigned long shmem_get_unmapped_area(struct file *file,
unsigned long addr, unsigned long len,
unsigned long pgoff, unsigned long flags)
{
- return current->mm->get_unmapped_area(file, addr, len, pgoff, flags);
+ return mm_get_unmapped_area(current->mm, file, addr, len, pgoff, flags);
}
#endif
diff --git a/mm/shmem_quota.c b/mm/shmem_quota.c
index 062d1c1097ae35..ce514e700d2f65 100644
--- a/mm/shmem_quota.c
+++ b/mm/shmem_quota.c
@@ -116,7 +116,7 @@ static int shmem_free_file_info(struct super_block *sb, int type)
static int shmem_get_next_id(struct super_block *sb, struct kqid *qid)
{
struct mem_dqinfo *info = sb_dqinfo(sb, qid->type);
- struct rb_node *node = ((struct rb_root *)info->dqi_priv)->rb_node;
+ struct rb_node *node;
qid_t id = from_kqid(&init_user_ns, *qid);
struct quota_info *dqopt = sb_dqopt(sb);
struct quota_id *entry = NULL;
@@ -126,6 +126,7 @@ static int shmem_get_next_id(struct super_block *sb, struct kqid *qid)
return -ESRCH;
down_read(&dqopt->dqio_sem);
+ node = ((struct rb_root *)info->dqi_priv)->rb_node;
while (node) {
entry = rb_entry(node, struct quota_id, node);
@@ -165,7 +166,7 @@ out_unlock:
static int shmem_acquire_dquot(struct dquot *dquot)
{
struct mem_dqinfo *info = sb_dqinfo(dquot->dq_sb, dquot->dq_id.type);
- struct rb_node **n = &((struct rb_root *)info->dqi_priv)->rb_node;
+ struct rb_node **n;
struct shmem_sb_info *sbinfo = dquot->dq_sb->s_fs_info;
struct rb_node *parent = NULL, *new_node = NULL;
struct quota_id *new_entry, *entry;
@@ -176,6 +177,8 @@ static int shmem_acquire_dquot(struct dquot *dquot)
mutex_lock(&dquot->dq_lock);
down_write(&dqopt->dqio_sem);
+ n = &((struct rb_root *)info->dqi_priv)->rb_node;
+
while (*n) {
parent = *n;
entry = rb_entry(parent, struct quota_id, node);
@@ -264,7 +267,7 @@ static bool shmem_is_empty_dquot(struct dquot *dquot)
static int shmem_release_dquot(struct dquot *dquot)
{
struct mem_dqinfo *info = sb_dqinfo(dquot->dq_sb, dquot->dq_id.type);
- struct rb_node *node = ((struct rb_root *)info->dqi_priv)->rb_node;
+ struct rb_node *node;
qid_t id = from_kqid(&init_user_ns, dquot->dq_id);
struct quota_info *dqopt = sb_dqopt(dquot->dq_sb);
struct quota_id *entry = NULL;
@@ -275,6 +278,7 @@ static int shmem_release_dquot(struct dquot *dquot)
goto out_dqlock;
down_write(&dqopt->dqio_sem);
+ node = ((struct rb_root *)info->dqi_priv)->rb_node;
while (node) {
entry = rb_entry(node, struct quota_id, node);
diff --git a/mm/show_mem.c b/mm/show_mem.c
index 8dcfafbd283c12..bdb439551eefcb 100644
--- a/mm/show_mem.c
+++ b/mm/show_mem.c
@@ -423,4 +423,30 @@ void __show_mem(unsigned int filter, nodemask_t *nodemask, int max_zone_idx)
#ifdef CONFIG_MEMORY_FAILURE
printk("%lu pages hwpoisoned\n", atomic_long_read(&num_poisoned_pages));
#endif
+#ifdef CONFIG_MEM_ALLOC_PROFILING
+ {
+ struct codetag_bytes tags[10];
+ size_t i, nr;
+
+ nr = alloc_tag_top_users(tags, ARRAY_SIZE(tags), false);
+ if (nr) {
+ pr_notice("Memory allocations:\n");
+ for (i = 0; i < nr; i++) {
+ struct codetag *ct = tags[i].ct;
+ struct alloc_tag *tag = ct_to_alloc_tag(ct);
+ struct alloc_tag_counters counter = alloc_tag_read(tag);
+
+ /* Same as alloc_tag_to_text() but w/o intermediate buffer */
+ if (ct->modname)
+ pr_notice("%12lli %8llu %s:%u [%s] func:%s\n",
+ counter.bytes, counter.calls, ct->filename,
+ ct->lineno, ct->modname, ct->function);
+ else
+ pr_notice("%12lli %8llu %s:%u func:%s\n",
+ counter.bytes, counter.calls, ct->filename,
+ ct->lineno, ct->function);
+ }
+ }
+ }
+#endif
}
diff --git a/mm/slab.h b/mm/slab.h
index d2bc9b19122292..d12fb4392e3539 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -84,11 +84,11 @@ struct slab {
};
struct rcu_head rcu_head;
};
- unsigned int __unused;
+ unsigned int __page_type;
atomic_t __page_refcount;
-#ifdef CONFIG_MEMCG
- unsigned long memcg_data;
+#ifdef CONFIG_SLAB_OBJ_EXT
+ unsigned long obj_exts;
#endif
};
@@ -97,8 +97,8 @@ struct slab {
SLAB_MATCH(flags, __page_flags);
SLAB_MATCH(compound_head, slab_cache); /* Ensure bit 0 is clear */
SLAB_MATCH(_refcount, __page_refcount);
-#ifdef CONFIG_MEMCG
-SLAB_MATCH(memcg_data, memcg_data);
+#ifdef CONFIG_SLAB_OBJ_EXT
+SLAB_MATCH(memcg_data, obj_exts);
#endif
#undef SLAB_MATCH
static_assert(sizeof(struct slab) <= sizeof(struct page));
@@ -536,42 +536,52 @@ static inline bool kmem_cache_debug_flags(struct kmem_cache *s, slab_flags_t fla
return false;
}
-#ifdef CONFIG_MEMCG_KMEM
+#ifdef CONFIG_SLAB_OBJ_EXT
+
/*
- * slab_objcgs - get the object cgroups vector associated with a slab
+ * slab_obj_exts - get the pointer to the slab object extension vector
+ * associated with a slab.
* @slab: a pointer to the slab struct
*
- * Returns a pointer to the object cgroups vector associated with the slab,
+ * Returns a pointer to the object extension vector associated with the slab,
* or NULL if no such vector has been associated yet.
*/
-static inline struct obj_cgroup **slab_objcgs(struct slab *slab)
+static inline struct slabobj_ext *slab_obj_exts(struct slab *slab)
{
- unsigned long memcg_data = READ_ONCE(slab->memcg_data);
+ unsigned long obj_exts = READ_ONCE(slab->obj_exts);
- VM_BUG_ON_PAGE(memcg_data && !(memcg_data & MEMCG_DATA_OBJCGS),
+#ifdef CONFIG_MEMCG
+ VM_BUG_ON_PAGE(obj_exts && !(obj_exts & MEMCG_DATA_OBJEXTS),
slab_page(slab));
- VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_KMEM, slab_page(slab));
-
- return (struct obj_cgroup **)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
+ VM_BUG_ON_PAGE(obj_exts & MEMCG_DATA_KMEM, slab_page(slab));
+#endif
+ return (struct slabobj_ext *)(obj_exts & ~OBJEXTS_FLAGS_MASK);
}
-int memcg_alloc_slab_cgroups(struct slab *slab, struct kmem_cache *s,
- gfp_t gfp, bool new_slab);
-void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat,
- enum node_stat_item idx, int nr);
-#else /* CONFIG_MEMCG_KMEM */
-static inline struct obj_cgroup **slab_objcgs(struct slab *slab)
+int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s,
+ gfp_t gfp, bool new_slab);
+
+#else /* CONFIG_SLAB_OBJ_EXT */
+
+static inline struct slabobj_ext *slab_obj_exts(struct slab *slab)
{
return NULL;
}
-static inline int memcg_alloc_slab_cgroups(struct slab *slab,
- struct kmem_cache *s, gfp_t gfp,
- bool new_slab)
+#endif /* CONFIG_SLAB_OBJ_EXT */
+
+static inline enum node_stat_item cache_vmstat_idx(struct kmem_cache *s)
{
- return 0;
+ return (s->flags & SLAB_RECLAIM_ACCOUNT) ?
+ NR_SLAB_RECLAIMABLE_B : NR_SLAB_UNRECLAIMABLE_B;
}
-#endif /* CONFIG_MEMCG_KMEM */
+
+#ifdef CONFIG_MEMCG_KMEM
+bool __memcg_slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
+ gfp_t flags, size_t size, void **p);
+void __memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab,
+ void **p, int objects, struct slabobj_ext *obj_exts);
+#endif
size_t __ksize(const void *objp);
diff --git a/mm/slab_common.c b/mm/slab_common.c
index f5234672f03cea..3179a6aeffc56e 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -1189,7 +1189,7 @@ __do_krealloc(const void *p, size_t new_size, gfp_t flags)
return (void *)p;
}
- ret = kmalloc_track_caller(new_size, flags);
+ ret = kmalloc_node_track_caller_noprof(new_size, flags, NUMA_NO_NODE, _RET_IP_);
if (ret && p) {
/* Disable KASAN checks as the object's redzone is accessed. */
kasan_disable_current();
@@ -1213,7 +1213,7 @@ __do_krealloc(const void *p, size_t new_size, gfp_t flags)
*
* Return: pointer to the allocated memory or %NULL in case of error
*/
-void *krealloc(const void *p, size_t new_size, gfp_t flags)
+void *krealloc_noprof(const void *p, size_t new_size, gfp_t flags)
{
void *ret;
@@ -1228,7 +1228,7 @@ void *krealloc(const void *p, size_t new_size, gfp_t flags)
return ret;
}
-EXPORT_SYMBOL(krealloc);
+EXPORT_SYMBOL(krealloc_noprof);
/**
* kfree_sensitive - Clear sensitive information in memory before freeing
diff --git a/mm/slub.c b/mm/slub.c
index 1bb2a93cf7b6a4..3e33ff900d35ea 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -616,18 +616,12 @@ slub_set_cpu_partial(struct kmem_cache *s, unsigned int nr_objects)
*/
static __always_inline void slab_lock(struct slab *slab)
{
- struct page *page = slab_page(slab);
-
- VM_BUG_ON_PAGE(PageTail(page), page);
- bit_spin_lock(PG_locked, &page->flags);
+ bit_spin_lock(PG_locked, &slab->__page_flags);
}
static __always_inline void slab_unlock(struct slab *slab)
{
- struct page *page = slab_page(slab);
-
- VM_BUG_ON_PAGE(PageTail(page), page);
- bit_spin_unlock(PG_locked, &page->flags);
+ bit_spin_unlock(PG_locked, &slab->__page_flags);
}
static inline bool
@@ -1865,198 +1859,278 @@ static bool freelist_corrupted(struct kmem_cache *s, struct slab *slab,
#endif
#endif /* CONFIG_SLUB_DEBUG */
-static inline enum node_stat_item cache_vmstat_idx(struct kmem_cache *s)
+#ifdef CONFIG_SLAB_OBJ_EXT
+
+#ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG
+
+static inline void mark_objexts_empty(struct slabobj_ext *obj_exts)
{
- return (s->flags & SLAB_RECLAIM_ACCOUNT) ?
- NR_SLAB_RECLAIMABLE_B : NR_SLAB_UNRECLAIMABLE_B;
+ struct slabobj_ext *slab_exts;
+ struct slab *obj_exts_slab;
+
+ obj_exts_slab = virt_to_slab(obj_exts);
+ slab_exts = slab_obj_exts(obj_exts_slab);
+ if (slab_exts) {
+ unsigned int offs = obj_to_index(obj_exts_slab->slab_cache,
+ obj_exts_slab, obj_exts);
+ /* codetag should be NULL */
+ WARN_ON(slab_exts[offs].ref.ct);
+ set_codetag_empty(&slab_exts[offs].ref);
+ }
}
-#ifdef CONFIG_MEMCG_KMEM
-static inline void memcg_free_slab_cgroups(struct slab *slab)
+static inline void mark_failed_objexts_alloc(struct slab *slab)
{
- kfree(slab_objcgs(slab));
- slab->memcg_data = 0;
+ slab->obj_exts = OBJEXTS_ALLOC_FAIL;
}
-static inline size_t obj_full_size(struct kmem_cache *s)
+static inline void handle_failed_objexts_alloc(unsigned long obj_exts,
+ struct slabobj_ext *vec, unsigned int objects)
{
/*
- * For each accounted object there is an extra space which is used
- * to store obj_cgroup membership. Charge it too.
+ * If vector previously failed to allocate then we have live
+ * objects with no tag reference. Mark all references in this
+ * vector as empty to avoid warnings later on.
*/
- return s->size + sizeof(struct obj_cgroup *);
+ if (obj_exts & OBJEXTS_ALLOC_FAIL) {
+ unsigned int i;
+
+ for (i = 0; i < objects; i++)
+ set_codetag_empty(&vec[i].ref);
+ }
}
+#else /* CONFIG_MEM_ALLOC_PROFILING_DEBUG */
+
+static inline void mark_objexts_empty(struct slabobj_ext *obj_exts) {}
+static inline void mark_failed_objexts_alloc(struct slab *slab) {}
+static inline void handle_failed_objexts_alloc(unsigned long obj_exts,
+ struct slabobj_ext *vec, unsigned int objects) {}
+
+#endif /* CONFIG_MEM_ALLOC_PROFILING_DEBUG */
+
/*
- * Returns false if the allocation should fail.
+ * The allocated objcg pointers array is not accounted directly.
+ * Moreover, it should not come from DMA buffer and is not readily
+ * reclaimable. So those GFP bits should be masked off.
*/
-static bool __memcg_slab_pre_alloc_hook(struct kmem_cache *s,
- struct list_lru *lru,
- struct obj_cgroup **objcgp,
- size_t objects, gfp_t flags)
+#define OBJCGS_CLEAR_MASK (__GFP_DMA | __GFP_RECLAIMABLE | \
+ __GFP_ACCOUNT | __GFP_NOFAIL)
+
+int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s,
+ gfp_t gfp, bool new_slab)
+{
+ unsigned int objects = objs_per_slab(s, slab);
+ unsigned long new_exts;
+ unsigned long old_exts;
+ struct slabobj_ext *vec;
+
+ gfp &= ~OBJCGS_CLEAR_MASK;
+ /* Prevent recursive extension vector allocation */
+ gfp |= __GFP_NO_OBJ_EXT;
+ vec = kcalloc_node(objects, sizeof(struct slabobj_ext), gfp,
+ slab_nid(slab));
+ if (!vec) {
+ /* Mark vectors which failed to allocate */
+ if (new_slab)
+ mark_failed_objexts_alloc(slab);
+
+ return -ENOMEM;
+ }
+
+ new_exts = (unsigned long)vec;
+#ifdef CONFIG_MEMCG
+ new_exts |= MEMCG_DATA_OBJEXTS;
+#endif
+ old_exts = slab->obj_exts;
+ handle_failed_objexts_alloc(old_exts, vec, objects);
+ if (new_slab) {
+ /*
+ * If the slab is brand new and nobody can yet access its
+ * obj_exts, no synchronization is required and obj_exts can
+ * be simply assigned.
+ */
+ slab->obj_exts = new_exts;
+ } else if (cmpxchg(&slab->obj_exts, old_exts, new_exts) != old_exts) {
+ /*
+ * If the slab is already in use, somebody can allocate and
+ * assign slabobj_exts in parallel. In this case the existing
+ * objcg vector should be reused.
+ */
+ mark_objexts_empty(vec);
+ kfree(vec);
+ return 0;
+ }
+
+ kmemleak_not_leak(vec);
+ return 0;
+}
+
+static inline void free_slab_obj_exts(struct slab *slab)
{
+ struct slabobj_ext *obj_exts;
+
+ obj_exts = slab_obj_exts(slab);
+ if (!obj_exts)
+ return;
+
/*
- * The obtained objcg pointer is safe to use within the current scope,
- * defined by current task or set_active_memcg() pair.
- * obj_cgroup_get() is used to get a permanent reference.
+ * obj_exts was created with __GFP_NO_OBJ_EXT flag, therefore its
+ * corresponding extension will be NULL. alloc_tag_sub() will throw a
+ * warning if slab has extensions but the extension of an object is
+ * NULL, therefore replace NULL with CODETAG_EMPTY to indicate that
+ * the extension for obj_exts is expected to be NULL.
*/
- struct obj_cgroup *objcg = current_obj_cgroup();
- if (!objcg)
+ mark_objexts_empty(obj_exts);
+ kfree(obj_exts);
+ slab->obj_exts = 0;
+}
+
+static inline bool need_slab_obj_ext(void)
+{
+ if (mem_alloc_profiling_enabled())
return true;
- if (lru) {
- int ret;
- struct mem_cgroup *memcg;
+ /*
+ * CONFIG_MEMCG_KMEM creates vector of obj_cgroup objects conditionally
+ * inside memcg_slab_post_alloc_hook. No other users for now.
+ */
+ return false;
+}
- memcg = get_mem_cgroup_from_objcg(objcg);
- ret = memcg_list_lru_alloc(memcg, lru, flags);
- css_put(&memcg->css);
+static inline struct slabobj_ext *
+prepare_slab_obj_exts_hook(struct kmem_cache *s, gfp_t flags, void *p)
+{
+ struct slab *slab;
- if (ret)
- return false;
- }
+ if (!p)
+ return NULL;
- if (obj_cgroup_charge(objcg, flags, objects * obj_full_size(s)))
- return false;
+ if (s->flags & (SLAB_NO_OBJ_EXT | SLAB_NOLEAKTRACE))
+ return NULL;
- *objcgp = objcg;
- return true;
+ if (flags & __GFP_NO_OBJ_EXT)
+ return NULL;
+
+ slab = virt_to_slab(p);
+ if (!slab_obj_exts(slab) &&
+ WARN(alloc_slab_obj_exts(slab, s, flags, false),
+ "%s, %s: Failed to create slab extension vector!\n",
+ __func__, s->name))
+ return NULL;
+
+ return slab_obj_exts(slab) + obj_to_index(s, slab, p);
}
-/*
- * Returns false if the allocation should fail.
- */
-static __fastpath_inline
-bool memcg_slab_pre_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
- struct obj_cgroup **objcgp, size_t objects,
- gfp_t flags)
+static inline void
+alloc_tagging_slab_free_hook(struct kmem_cache *s, struct slab *slab, void **p,
+ int objects)
{
- if (!memcg_kmem_online())
- return true;
+#ifdef CONFIG_MEM_ALLOC_PROFILING
+ struct slabobj_ext *obj_exts;
+ int i;
- if (likely(!(flags & __GFP_ACCOUNT) && !(s->flags & SLAB_ACCOUNT)))
- return true;
+ if (!mem_alloc_profiling_enabled())
+ return;
- return likely(__memcg_slab_pre_alloc_hook(s, lru, objcgp, objects,
- flags));
+ obj_exts = slab_obj_exts(slab);
+ if (!obj_exts)
+ return;
+
+ for (i = 0; i < objects; i++) {
+ unsigned int off = obj_to_index(s, slab, p[i]);
+
+ alloc_tag_sub(&obj_exts[off].ref, s->size);
+ }
+#endif
}
-static void __memcg_slab_post_alloc_hook(struct kmem_cache *s,
- struct obj_cgroup *objcg,
- gfp_t flags, size_t size,
- void **p)
+#else /* CONFIG_SLAB_OBJ_EXT */
+
+static int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s,
+ gfp_t gfp, bool new_slab)
{
- struct slab *slab;
- unsigned long off;
- size_t i;
+ return 0;
+}
- flags &= gfp_allowed_mask;
+static inline void free_slab_obj_exts(struct slab *slab)
+{
+}
- for (i = 0; i < size; i++) {
- if (likely(p[i])) {
- slab = virt_to_slab(p[i]);
+static inline bool need_slab_obj_ext(void)
+{
+ return false;
+}
- if (!slab_objcgs(slab) &&
- memcg_alloc_slab_cgroups(slab, s, flags, false)) {
- obj_cgroup_uncharge(objcg, obj_full_size(s));
- continue;
- }
+static inline struct slabobj_ext *
+prepare_slab_obj_exts_hook(struct kmem_cache *s, gfp_t flags, void *p)
+{
+ return NULL;
+}
- off = obj_to_index(s, slab, p[i]);
- obj_cgroup_get(objcg);
- slab_objcgs(slab)[off] = objcg;
- mod_objcg_state(objcg, slab_pgdat(slab),
- cache_vmstat_idx(s), obj_full_size(s));
- } else {
- obj_cgroup_uncharge(objcg, obj_full_size(s));
- }
- }
+static inline void
+alloc_tagging_slab_free_hook(struct kmem_cache *s, struct slab *slab, void **p,
+ int objects)
+{
}
+#endif /* CONFIG_SLAB_OBJ_EXT */
+
+#ifdef CONFIG_MEMCG_KMEM
+
+static void memcg_alloc_abort_single(struct kmem_cache *s, void *object);
+
static __fastpath_inline
-void memcg_slab_post_alloc_hook(struct kmem_cache *s, struct obj_cgroup *objcg,
+bool memcg_slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
gfp_t flags, size_t size, void **p)
{
- if (likely(!memcg_kmem_online() || !objcg))
- return;
-
- return __memcg_slab_post_alloc_hook(s, objcg, flags, size, p);
-}
+ if (likely(!memcg_kmem_online()))
+ return true;
-static void __memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab,
- void **p, int objects,
- struct obj_cgroup **objcgs)
-{
- for (int i = 0; i < objects; i++) {
- struct obj_cgroup *objcg;
- unsigned int off;
+ if (likely(!(flags & __GFP_ACCOUNT) && !(s->flags & SLAB_ACCOUNT)))
+ return true;
- off = obj_to_index(s, slab, p[i]);
- objcg = objcgs[off];
- if (!objcg)
- continue;
+ if (likely(__memcg_slab_post_alloc_hook(s, lru, flags, size, p)))
+ return true;
- objcgs[off] = NULL;
- obj_cgroup_uncharge(objcg, obj_full_size(s));
- mod_objcg_state(objcg, slab_pgdat(slab), cache_vmstat_idx(s),
- -obj_full_size(s));
- obj_cgroup_put(objcg);
+ if (likely(size == 1)) {
+ memcg_alloc_abort_single(s, *p);
+ *p = NULL;
+ } else {
+ kmem_cache_free_bulk(s, size, p);
}
+
+ return false;
}
static __fastpath_inline
void memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab, void **p,
int objects)
{
- struct obj_cgroup **objcgs;
+ struct slabobj_ext *obj_exts;
if (!memcg_kmem_online())
return;
- objcgs = slab_objcgs(slab);
- if (likely(!objcgs))
+ obj_exts = slab_obj_exts(slab);
+ if (likely(!obj_exts))
return;
- __memcg_slab_free_hook(s, slab, p, objects, objcgs);
-}
-
-static inline
-void memcg_slab_alloc_error_hook(struct kmem_cache *s, int objects,
- struct obj_cgroup *objcg)
-{
- if (objcg)
- obj_cgroup_uncharge(objcg, objects * obj_full_size(s));
+ __memcg_slab_free_hook(s, slab, p, objects, obj_exts);
}
#else /* CONFIG_MEMCG_KMEM */
-static inline void memcg_free_slab_cgroups(struct slab *slab)
-{
-}
-
-static inline bool memcg_slab_pre_alloc_hook(struct kmem_cache *s,
- struct list_lru *lru,
- struct obj_cgroup **objcgp,
- size_t objects, gfp_t flags)
-{
- return true;
-}
-
-static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s,
- struct obj_cgroup *objcg,
+static inline bool memcg_slab_post_alloc_hook(struct kmem_cache *s,
+ struct list_lru *lru,
gfp_t flags, size_t size,
void **p)
{
+ return true;
}
static inline void memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab,
void **p, int objects)
{
}
-
-static inline
-void memcg_slab_alloc_error_hook(struct kmem_cache *s, int objects,
- struct obj_cgroup *objcg)
-{
-}
#endif /* CONFIG_MEMCG_KMEM */
/*
@@ -2106,9 +2180,9 @@ bool slab_free_hook(struct kmem_cache *s, void *x, bool init)
return !kasan_slab_free(s, x, init);
}
-static inline bool slab_free_freelist_hook(struct kmem_cache *s,
- void **head, void **tail,
- int *cnt)
+static __fastpath_inline
+bool slab_free_freelist_hook(struct kmem_cache *s, void **head, void **tail,
+ int *cnt)
{
void *object;
@@ -2298,7 +2372,7 @@ static __always_inline void account_slab(struct slab *slab, int order,
struct kmem_cache *s, gfp_t gfp)
{
if (memcg_kmem_online() && (s->flags & SLAB_ACCOUNT))
- memcg_alloc_slab_cgroups(slab, s, gfp, true);
+ alloc_slab_obj_exts(slab, s, gfp, true);
mod_node_page_state(slab_pgdat(slab), cache_vmstat_idx(s),
PAGE_SIZE << order);
@@ -2307,8 +2381,8 @@ static __always_inline void account_slab(struct slab *slab, int order,
static __always_inline void unaccount_slab(struct slab *slab, int order,
struct kmem_cache *s)
{
- if (memcg_kmem_online())
- memcg_free_slab_cgroups(slab);
+ if (memcg_kmem_online() || need_slab_obj_ext())
+ free_slab_obj_exts(slab);
mod_node_page_state(slab_pgdat(slab), cache_vmstat_idx(s),
-(PAGE_SIZE << order));
@@ -3736,10 +3810,7 @@ noinline int should_failslab(struct kmem_cache *s, gfp_t gfpflags)
ALLOW_ERROR_INJECTION(should_failslab, ERRNO);
static __fastpath_inline
-struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
- struct list_lru *lru,
- struct obj_cgroup **objcgp,
- size_t size, gfp_t flags)
+struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
{
flags &= gfp_allowed_mask;
@@ -3748,18 +3819,16 @@ struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
if (unlikely(should_failslab(s, flags)))
return NULL;
- if (unlikely(!memcg_slab_pre_alloc_hook(s, lru, objcgp, size, flags)))
- return NULL;
-
return s;
}
static __fastpath_inline
-void slab_post_alloc_hook(struct kmem_cache *s, struct obj_cgroup *objcg,
+bool slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
gfp_t flags, size_t size, void **p, bool init,
unsigned int orig_size)
{
unsigned int zero_size = s->object_size;
+ struct slabobj_ext *obj_exts;
bool kasan_init = init;
size_t i;
gfp_t init_flags = flags & gfp_allowed_mask;
@@ -3802,9 +3871,21 @@ void slab_post_alloc_hook(struct kmem_cache *s, struct obj_cgroup *objcg,
kmemleak_alloc_recursive(p[i], s->object_size, 1,
s->flags, init_flags);
kmsan_slab_alloc(s, p[i], init_flags);
+ if (need_slab_obj_ext()) {
+ obj_exts = prepare_slab_obj_exts_hook(s, flags, p[i]);
+#ifdef CONFIG_MEM_ALLOC_PROFILING
+ /*
+ * Currently obj_exts is used only for allocation profiling.
+ * If other users appear then mem_alloc_profiling_enabled()
+ * check should be added before alloc_tag_add().
+ */
+ if (likely(obj_exts))
+ alloc_tag_add(&obj_exts->ref, current->alloc_tag, s->size);
+#endif
+ }
}
- memcg_slab_post_alloc_hook(s, objcg, flags, size, p);
+ return memcg_slab_post_alloc_hook(s, lru, flags, size, p);
}
/*
@@ -3821,10 +3902,9 @@ static __fastpath_inline void *slab_alloc_node(struct kmem_cache *s, struct list
gfp_t gfpflags, int node, unsigned long addr, size_t orig_size)
{
void *object;
- struct obj_cgroup *objcg = NULL;
bool init = false;
- s = slab_pre_alloc_hook(s, lru, &objcg, 1, gfpflags);
+ s = slab_pre_alloc_hook(s, gfpflags);
if (unlikely(!s))
return NULL;
@@ -3841,13 +3921,15 @@ out:
/*
* When init equals 'true', like for kzalloc() family, only
* @orig_size bytes might be zeroed instead of s->object_size
+ * In case this fails due to memcg_slab_post_alloc_hook(),
+ * object is set to NULL
*/
- slab_post_alloc_hook(s, objcg, gfpflags, 1, &object, init, orig_size);
+ slab_post_alloc_hook(s, lru, gfpflags, 1, &object, init, orig_size);
return object;
}
-void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
+void *kmem_cache_alloc_noprof(struct kmem_cache *s, gfp_t gfpflags)
{
void *ret = slab_alloc_node(s, NULL, gfpflags, NUMA_NO_NODE, _RET_IP_,
s->object_size);
@@ -3856,9 +3938,9 @@ void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
return ret;
}
-EXPORT_SYMBOL(kmem_cache_alloc);
+EXPORT_SYMBOL(kmem_cache_alloc_noprof);
-void *kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru,
+void *kmem_cache_alloc_lru_noprof(struct kmem_cache *s, struct list_lru *lru,
gfp_t gfpflags)
{
void *ret = slab_alloc_node(s, lru, gfpflags, NUMA_NO_NODE, _RET_IP_,
@@ -3868,7 +3950,7 @@ void *kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru,
return ret;
}
-EXPORT_SYMBOL(kmem_cache_alloc_lru);
+EXPORT_SYMBOL(kmem_cache_alloc_lru_noprof);
/**
* kmem_cache_alloc_node - Allocate an object on the specified node
@@ -3883,7 +3965,7 @@ EXPORT_SYMBOL(kmem_cache_alloc_lru);
*
* Return: pointer to the new object or %NULL in case of error
*/
-void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
+void *kmem_cache_alloc_node_noprof(struct kmem_cache *s, gfp_t gfpflags, int node)
{
void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, s->object_size);
@@ -3891,7 +3973,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
return ret;
}
-EXPORT_SYMBOL(kmem_cache_alloc_node);
+EXPORT_SYMBOL(kmem_cache_alloc_node_noprof);
/*
* To avoid unnecessary overhead, we pass through large allocation requests
@@ -3908,7 +3990,7 @@ static void *__kmalloc_large_node(size_t size, gfp_t flags, int node)
flags = kmalloc_fix_flags(flags);
flags |= __GFP_COMP;
- folio = (struct folio *)alloc_pages_node(node, flags, order);
+ folio = (struct folio *)alloc_pages_node_noprof(node, flags, order);
if (folio) {
ptr = folio_address(folio);
lruvec_stat_mod_folio(folio, NR_SLAB_UNRECLAIMABLE_B,
@@ -3923,7 +4005,7 @@ static void *__kmalloc_large_node(size_t size, gfp_t flags, int node)
return ptr;
}
-void *kmalloc_large(size_t size, gfp_t flags)
+void *kmalloc_large_noprof(size_t size, gfp_t flags)
{
void *ret = __kmalloc_large_node(size, flags, NUMA_NO_NODE);
@@ -3931,9 +4013,9 @@ void *kmalloc_large(size_t size, gfp_t flags)
flags, NUMA_NO_NODE);
return ret;
}
-EXPORT_SYMBOL(kmalloc_large);
+EXPORT_SYMBOL(kmalloc_large_noprof);
-void *kmalloc_large_node(size_t size, gfp_t flags, int node)
+void *kmalloc_large_node_noprof(size_t size, gfp_t flags, int node)
{
void *ret = __kmalloc_large_node(size, flags, node);
@@ -3941,7 +4023,7 @@ void *kmalloc_large_node(size_t size, gfp_t flags, int node)
flags, node);
return ret;
}
-EXPORT_SYMBOL(kmalloc_large_node);
+EXPORT_SYMBOL(kmalloc_large_node_noprof);
static __always_inline
void *__do_kmalloc_node(size_t size, gfp_t flags, int node,
@@ -3968,26 +4050,26 @@ void *__do_kmalloc_node(size_t size, gfp_t flags, int node,
return ret;
}
-void *__kmalloc_node(size_t size, gfp_t flags, int node)
+void *__kmalloc_node_noprof(size_t size, gfp_t flags, int node)
{
return __do_kmalloc_node(size, flags, node, _RET_IP_);
}
-EXPORT_SYMBOL(__kmalloc_node);
+EXPORT_SYMBOL(__kmalloc_node_noprof);
-void *__kmalloc(size_t size, gfp_t flags)
+void *__kmalloc_noprof(size_t size, gfp_t flags)
{
return __do_kmalloc_node(size, flags, NUMA_NO_NODE, _RET_IP_);
}
-EXPORT_SYMBOL(__kmalloc);
+EXPORT_SYMBOL(__kmalloc_noprof);
-void *__kmalloc_node_track_caller(size_t size, gfp_t flags,
- int node, unsigned long caller)
+void *kmalloc_node_track_caller_noprof(size_t size, gfp_t flags,
+ int node, unsigned long caller)
{
return __do_kmalloc_node(size, flags, node, caller);
}
-EXPORT_SYMBOL(__kmalloc_node_track_caller);
+EXPORT_SYMBOL(kmalloc_node_track_caller_noprof);
-void *kmalloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
+void *kmalloc_trace_noprof(struct kmem_cache *s, gfp_t gfpflags, size_t size)
{
void *ret = slab_alloc_node(s, NULL, gfpflags, NUMA_NO_NODE,
_RET_IP_, size);
@@ -3997,9 +4079,9 @@ void *kmalloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
ret = kasan_kmalloc(s, ret, size, gfpflags);
return ret;
}
-EXPORT_SYMBOL(kmalloc_trace);
+EXPORT_SYMBOL(kmalloc_trace_noprof);
-void *kmalloc_node_trace(struct kmem_cache *s, gfp_t gfpflags,
+void *kmalloc_node_trace_noprof(struct kmem_cache *s, gfp_t gfpflags,
int node, size_t size)
{
void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, size);
@@ -4009,7 +4091,7 @@ void *kmalloc_node_trace(struct kmem_cache *s, gfp_t gfpflags,
ret = kasan_kmalloc(s, ret, size, gfpflags);
return ret;
}
-EXPORT_SYMBOL(kmalloc_node_trace);
+EXPORT_SYMBOL(kmalloc_node_trace_noprof);
static noinline void free_to_partial_list(
struct kmem_cache *s, struct slab *slab,
@@ -4276,16 +4358,28 @@ void slab_free(struct kmem_cache *s, struct slab *slab, void *object,
unsigned long addr)
{
memcg_slab_free_hook(s, slab, &object, 1);
+ alloc_tagging_slab_free_hook(s, slab, &object, 1);
if (likely(slab_free_hook(s, object, slab_want_init_on_free(s))))
do_slab_free(s, slab, object, object, 1, addr);
}
+#ifdef CONFIG_MEMCG_KMEM
+/* Do not inline the rare memcg charging failed path into the allocation path */
+static noinline
+void memcg_alloc_abort_single(struct kmem_cache *s, void *object)
+{
+ if (likely(slab_free_hook(s, object, slab_want_init_on_free(s))))
+ do_slab_free(s, virt_to_slab(object), object, object, 1, _RET_IP_);
+}
+#endif
+
static __fastpath_inline
void slab_free_bulk(struct kmem_cache *s, struct slab *slab, void *head,
void *tail, void **p, int cnt, unsigned long addr)
{
memcg_slab_free_hook(s, slab, p, cnt);
+ alloc_tagging_slab_free_hook(s, slab, p, cnt);
/*
* With KASAN enabled slab_free_freelist_hook modifies the freelist
* to remove objects, whose reuse must be delayed.
@@ -4612,36 +4706,33 @@ error:
#endif /* CONFIG_SLUB_TINY */
/* Note that interrupts must be enabled when calling this function. */
-int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
- void **p)
+int kmem_cache_alloc_bulk_noprof(struct kmem_cache *s, gfp_t flags, size_t size,
+ void **p)
{
int i;
- struct obj_cgroup *objcg = NULL;
if (!size)
return 0;
- /* memcg and kmem_cache debug support */
- s = slab_pre_alloc_hook(s, NULL, &objcg, size, flags);
+ s = slab_pre_alloc_hook(s, flags);
if (unlikely(!s))
return 0;
i = __kmem_cache_alloc_bulk(s, flags, size, p);
+ if (unlikely(i == 0))
+ return 0;
/*
* memcg and kmem_cache debug support and memory initialization.
* Done outside of the IRQ disabled fastpath loop.
*/
- if (likely(i != 0)) {
- slab_post_alloc_hook(s, objcg, flags, size, p,
- slab_want_init_on_alloc(flags, s), s->object_size);
- } else {
- memcg_slab_alloc_error_hook(s, size, objcg);
+ if (unlikely(!slab_post_alloc_hook(s, NULL, flags, size, p,
+ slab_want_init_on_alloc(flags, s), s->object_size))) {
+ return 0;
}
-
return i;
}
-EXPORT_SYMBOL(kmem_cache_alloc_bulk);
+EXPORT_SYMBOL(kmem_cache_alloc_bulk_noprof);
/*
@@ -5630,7 +5721,8 @@ void __init kmem_cache_init(void)
node_set(node, slab_nodes);
create_boot_cache(kmem_cache_node, "kmem_cache_node",
- sizeof(struct kmem_cache_node), SLAB_HWCACHE_ALIGN, 0, 0);
+ sizeof(struct kmem_cache_node),
+ SLAB_HWCACHE_ALIGN | SLAB_NO_OBJ_EXT, 0, 0);
hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI);
@@ -5640,7 +5732,7 @@ void __init kmem_cache_init(void)
create_boot_cache(kmem_cache, "kmem_cache",
offsetof(struct kmem_cache, node) +
nr_node_ids * sizeof(struct kmem_cache_node *),
- SLAB_HWCACHE_ALIGN, 0, 0);
+ SLAB_HWCACHE_ALIGN | SLAB_NO_OBJ_EXT, 0, 0);
kmem_cache = bootstrap(&boot_kmem_cache);
kmem_cache_node = bootstrap(&boot_kmem_cache_node);
diff --git a/mm/sparse.c b/mm/sparse.c
index aed0951b87fa04..de40b2c7340673 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -226,19 +226,6 @@ static void __init memory_present(int nid, unsigned long start, unsigned long en
{
unsigned long pfn;
-#ifdef CONFIG_SPARSEMEM_EXTREME
- if (unlikely(!mem_section)) {
- unsigned long size, align;
-
- size = sizeof(struct mem_section *) * NR_SECTION_ROOTS;
- align = 1 << (INTERNODE_CACHE_SHIFT);
- mem_section = memblock_alloc(size, align);
- if (!mem_section)
- panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
- __func__, size, align);
- }
-#endif
-
start &= PAGE_SECTION_MASK;
mminit_validate_memmodel_limits(&start, &end);
for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) {
@@ -267,6 +254,19 @@ static void __init memblocks_present(void)
unsigned long start, end;
int i, nid;
+#ifdef CONFIG_SPARSEMEM_EXTREME
+ if (unlikely(!mem_section)) {
+ unsigned long size, align;
+
+ size = sizeof(struct mem_section *) * NR_SECTION_ROOTS;
+ align = 1 << (INTERNODE_CACHE_SHIFT);
+ mem_section = memblock_alloc(size, align);
+ if (!mem_section)
+ panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
+ __func__, size, align);
+ }
+#endif
+
for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, &nid)
memory_present(nid, start, end);
}
@@ -560,6 +560,8 @@ void __init sparse_init(void)
unsigned long pnum_end, pnum_begin, map_count = 1;
int nid_begin;
+ /* see include/linux/mmzone.h 'struct mem_section' definition */
+ BUILD_BUG_ON(!is_power_of_2(sizeof(struct mem_section)));
memblocks_present();
pnum_begin = first_present_section_nr();
diff --git a/mm/swap.c b/mm/swap.c
index 500a09a48dfd3a..67786cb771305c 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -112,34 +112,21 @@ static void page_cache_release(struct folio *folio)
unlock_page_lruvec_irqrestore(lruvec, flags);
}
-static void __folio_put_small(struct folio *folio)
+void __folio_put(struct folio *folio)
{
+ if (unlikely(folio_is_zone_device(folio))) {
+ free_zone_device_folio(folio);
+ return;
+ } else if (folio_test_hugetlb(folio)) {
+ free_huge_folio(folio);
+ return;
+ }
+
page_cache_release(folio);
+ if (folio_test_large(folio) && folio_test_large_rmappable(folio))
+ folio_undo_large_rmappable(folio);
mem_cgroup_uncharge(folio);
- free_unref_page(&folio->page, 0);
-}
-
-static void __folio_put_large(struct folio *folio)
-{
- /*
- * __page_cache_release() is supposed to be called for thp, not for
- * hugetlb. This is because hugetlb page does never have PageLRU set
- * (it's never listed to any LRU lists) and no memcg routines should
- * be called for hugetlb (it has a separate hugetlb_cgroup.)
- */
- if (!folio_test_hugetlb(folio))
- page_cache_release(folio);
- destroy_large_folio(folio);
-}
-
-void __folio_put(struct folio *folio)
-{
- if (unlikely(folio_is_zone_device(folio)))
- free_zone_device_page(&folio->page);
- else if (unlikely(folio_test_large(folio)))
- __folio_put_large(folio);
- else
- __folio_put_small(folio);
+ free_unref_page(&folio->page, folio_order(folio));
}
EXPORT_SYMBOL(__folio_put);
@@ -158,8 +145,8 @@ void put_pages_list(struct list_head *pages)
list_for_each_entry_safe(folio, next, pages, lru) {
if (!folio_put_testzero(folio))
continue;
- if (folio_test_large(folio)) {
- __folio_put_large(folio);
+ if (folio_test_hugetlb(folio)) {
+ free_huge_folio(folio);
continue;
}
/* LRU flag must be clear because it's passed using the lru */
@@ -460,15 +447,18 @@ static void folio_inc_refs(struct folio *folio)
}
#endif /* CONFIG_LRU_GEN */
-/*
- * Mark a page as having seen activity.
+/**
+ * folio_mark_accessed - Mark a folio as having seen activity.
+ * @folio: The folio to mark.
+ *
+ * This function will perform one of the following transitions:
*
- * inactive,unreferenced -> inactive,referenced
- * inactive,referenced -> active,unreferenced
- * active,unreferenced -> active,referenced
+ * * inactive,unreferenced -> inactive,referenced
+ * * inactive,referenced -> active,unreferenced
+ * * active,unreferenced -> active,referenced
*
- * When a newly allocated page is not yet visible, so safe for non-atomic ops,
- * __SetPageReferenced(page) may be substituted for mark_page_accessed(page).
+ * When a newly allocated folio is not yet visible, so safe for non-atomic ops,
+ * __folio_set_referenced() may be substituted for folio_mark_accessed().
*/
void folio_mark_accessed(struct folio *folio)
{
@@ -985,7 +975,7 @@ void folios_put_refs(struct folio_batch *folios, unsigned int *refs)
struct folio *folio = folios->folios[i];
unsigned int nr_refs = refs ? refs[i] : 1;
- if (is_huge_zero_page(&folio->page))
+ if (is_huge_zero_folio(folio))
continue;
if (folio_is_zone_device(folio)) {
@@ -993,10 +983,10 @@ void folios_put_refs(struct folio_batch *folios, unsigned int *refs)
unlock_page_lruvec_irqrestore(lruvec, flags);
lruvec = NULL;
}
- if (put_devmap_managed_page_refs(&folio->page, nr_refs))
+ if (put_devmap_managed_folio_refs(folio, nr_refs))
continue;
if (folio_ref_sub_and_test(folio, nr_refs))
- free_zone_device_page(&folio->page);
+ free_zone_device_folio(folio);
continue;
}
diff --git a/mm/swap_slots.c b/mm/swap_slots.c
index 90973ce7881db2..13ab3b771409c3 100644
--- a/mm/swap_slots.c
+++ b/mm/swap_slots.c
@@ -264,7 +264,7 @@ static int refill_swap_slots_cache(struct swap_slots_cache *cache)
cache->cur = 0;
if (swap_slot_cache_active)
cache->nr = get_swap_pages(SWAP_SLOTS_CACHE_SIZE,
- cache->slots, 1);
+ cache->slots, 0);
return cache->nr;
}
@@ -310,8 +310,8 @@ swp_entry_t folio_alloc_swap(struct folio *folio)
entry.val = 0;
if (folio_test_large(folio)) {
- if (IS_ENABLED(CONFIG_THP_SWAP) && arch_thp_swp_supported())
- get_swap_pages(1, &entry, folio_nr_pages(folio));
+ if (IS_ENABLED(CONFIG_THP_SWAP))
+ get_swap_pages(1, &entry, folio_order(folio));
goto out;
}
@@ -343,7 +343,7 @@ repeat:
goto out;
}
- get_swap_pages(1, &entry, 1);
+ get_swap_pages(1, &entry, 0);
out:
if (mem_cgroup_try_charge_swap(folio, entry)) {
put_swap_folio(folio, entry);
diff --git a/mm/swap_state.c b/mm/swap_state.c
index bfc7e8c58a6d34..642c30d8376c60 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -73,11 +73,11 @@ void *get_shadow_from_swap_cache(swp_entry_t entry)
{
struct address_space *address_space = swap_address_space(entry);
pgoff_t idx = swp_offset(entry);
- struct page *page;
+ void *shadow;
- page = xa_load(&address_space->i_pages, idx);
- if (xa_is_value(page))
- return page;
+ shadow = xa_load(&address_space->i_pages, idx);
+ if (xa_is_value(shadow))
+ return shadow;
return NULL;
}
@@ -301,7 +301,7 @@ void free_page_and_swap_cache(struct page *page)
struct folio *folio = page_folio(page);
free_swap_cache(folio);
- if (!is_huge_zero_page(page))
+ if (!is_huge_zero_folio(folio))
folio_put(folio);
}
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 4919423cce76a3..96606580ee091d 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -130,7 +130,11 @@ static inline unsigned char swap_count(unsigned char ent)
/* Reclaim the swap entry if swap is getting full*/
#define TTRS_FULL 0x4
-/* returns 1 if swap entry is freed */
+/*
+ * returns number of pages in the folio that backs the swap entry. If positive,
+ * the folio was reclaimed. If negative, the folio was not reclaimed. If 0, no
+ * folio was associated with the swap entry.
+ */
static int __try_to_reclaim_swap(struct swap_info_struct *si,
unsigned long offset, unsigned long flags)
{
@@ -155,6 +159,7 @@ static int __try_to_reclaim_swap(struct swap_info_struct *si,
ret = folio_free_swap(folio);
folio_unlock(folio);
}
+ ret = ret ? folio_nr_pages(folio) : -folio_nr_pages(folio);
folio_put(folio);
return ret;
}
@@ -273,15 +278,15 @@ static void discard_swap_cluster(struct swap_info_struct *si,
#ifdef CONFIG_THP_SWAP
#define SWAPFILE_CLUSTER HPAGE_PMD_NR
-#define swap_entry_size(size) (size)
+#define swap_entry_order(order) (order)
#else
#define SWAPFILE_CLUSTER 256
/*
- * Define swap_entry_size() as constant to let compiler to optimize
+ * Define swap_entry_order() as constant to let compiler to optimize
* out some code if !CONFIG_THP_SWAP
*/
-#define swap_entry_size(size) 1
+#define swap_entry_order(order) 0
#endif
#define LATENCY_LIMIT 256
@@ -343,18 +348,6 @@ static inline void cluster_set_null(struct swap_cluster_info *info)
info->data = 0;
}
-static inline bool cluster_is_huge(struct swap_cluster_info *info)
-{
- if (IS_ENABLED(CONFIG_THP_SWAP))
- return info->flags & CLUSTER_FLAG_HUGE;
- return false;
-}
-
-static inline void cluster_clear_huge(struct swap_cluster_info *info)
-{
- info->flags &= ~CLUSTER_FLAG_HUGE;
-}
-
static inline struct swap_cluster_info *lock_cluster(struct swap_info_struct *si,
unsigned long offset)
{
@@ -558,10 +551,12 @@ static void free_cluster(struct swap_info_struct *si, unsigned long idx)
/*
* The cluster corresponding to page_nr will be used. The cluster will be
- * removed from free cluster list and its usage counter will be increased.
+ * removed from free cluster list and its usage counter will be increased by
+ * count.
*/
-static void inc_cluster_info_page(struct swap_info_struct *p,
- struct swap_cluster_info *cluster_info, unsigned long page_nr)
+static void add_cluster_info_page(struct swap_info_struct *p,
+ struct swap_cluster_info *cluster_info, unsigned long page_nr,
+ unsigned long count)
{
unsigned long idx = page_nr / SWAPFILE_CLUSTER;
@@ -570,9 +565,19 @@ static void inc_cluster_info_page(struct swap_info_struct *p,
if (cluster_is_free(&cluster_info[idx]))
alloc_cluster(p, idx);
- VM_BUG_ON(cluster_count(&cluster_info[idx]) >= SWAPFILE_CLUSTER);
+ VM_BUG_ON(cluster_count(&cluster_info[idx]) + count > SWAPFILE_CLUSTER);
cluster_set_count(&cluster_info[idx],
- cluster_count(&cluster_info[idx]) + 1);
+ cluster_count(&cluster_info[idx]) + count);
+}
+
+/*
+ * The cluster corresponding to page_nr will be used. The cluster will be
+ * removed from free cluster list and its usage counter will be increased by 1.
+ */
+static void inc_cluster_info_page(struct swap_info_struct *p,
+ struct swap_cluster_info *cluster_info, unsigned long page_nr)
+{
+ add_cluster_info_page(p, cluster_info, page_nr, 1);
}
/*
@@ -602,7 +607,7 @@ static void dec_cluster_info_page(struct swap_info_struct *p,
*/
static bool
scan_swap_map_ssd_cluster_conflict(struct swap_info_struct *si,
- unsigned long offset)
+ unsigned long offset, int order)
{
struct percpu_cluster *percpu_cluster;
bool conflict;
@@ -616,27 +621,42 @@ scan_swap_map_ssd_cluster_conflict(struct swap_info_struct *si,
return false;
percpu_cluster = this_cpu_ptr(si->percpu_cluster);
- cluster_set_null(&percpu_cluster->index);
+ percpu_cluster->next[order] = SWAP_NEXT_INVALID;
+ return true;
+}
+
+static inline bool swap_range_empty(char *swap_map, unsigned int start,
+ unsigned int nr_pages)
+{
+ unsigned int i;
+
+ for (i = 0; i < nr_pages; i++) {
+ if (swap_map[start + i])
+ return false;
+ }
+
return true;
}
/*
- * Try to get a swap entry from current cpu's swap entry pool (a cluster). This
- * might involve allocating a new cluster for current CPU too.
+ * Try to get swap entries with specified order from current cpu's swap entry
+ * pool (a cluster). This might involve allocating a new cluster for current CPU
+ * too.
*/
static bool scan_swap_map_try_ssd_cluster(struct swap_info_struct *si,
- unsigned long *offset, unsigned long *scan_base)
+ unsigned long *offset, unsigned long *scan_base, int order)
{
+ unsigned int nr_pages = 1 << order;
struct percpu_cluster *cluster;
struct swap_cluster_info *ci;
- unsigned long tmp, max;
+ unsigned int tmp, max;
new_cluster:
cluster = this_cpu_ptr(si->percpu_cluster);
- if (cluster_is_null(&cluster->index)) {
+ tmp = cluster->next[order];
+ if (tmp == SWAP_NEXT_INVALID) {
if (!cluster_list_empty(&si->free_clusters)) {
- cluster->index = si->free_clusters.head;
- cluster->next = cluster_next(&cluster->index) *
+ tmp = cluster_next(&si->free_clusters.head) *
SWAPFILE_CLUSTER;
} else if (!cluster_list_empty(&si->discard_clusters)) {
/*
@@ -654,27 +674,27 @@ new_cluster:
/*
* Other CPUs can use our cluster if they can't find a free cluster,
- * check if there is still free entry in the cluster
+ * check if there is still free entry in the cluster, maintaining
+ * natural alignment.
*/
- tmp = cluster->next;
- max = min_t(unsigned long, si->max,
- (cluster_next(&cluster->index) + 1) * SWAPFILE_CLUSTER);
+ max = min_t(unsigned long, si->max, ALIGN(tmp + 1, SWAPFILE_CLUSTER));
if (tmp < max) {
ci = lock_cluster(si, tmp);
while (tmp < max) {
- if (!si->swap_map[tmp])
+ if (swap_range_empty(si->swap_map, tmp, nr_pages))
break;
- tmp++;
+ tmp += nr_pages;
}
unlock_cluster(ci);
}
if (tmp >= max) {
- cluster_set_null(&cluster->index);
+ cluster->next[order] = SWAP_NEXT_INVALID;
goto new_cluster;
}
- cluster->next = tmp + 1;
*offset = tmp;
*scan_base = tmp;
+ tmp += nr_pages;
+ cluster->next[order] = tmp < max ? tmp : SWAP_NEXT_INVALID;
return true;
}
@@ -804,13 +824,14 @@ static bool swap_offset_available_and_locked(struct swap_info_struct *si,
static int scan_swap_map_slots(struct swap_info_struct *si,
unsigned char usage, int nr,
- swp_entry_t slots[])
+ swp_entry_t slots[], int order)
{
struct swap_cluster_info *ci;
unsigned long offset;
unsigned long scan_base;
unsigned long last_in_cluster = 0;
int latency_ration = LATENCY_LIMIT;
+ unsigned int nr_pages = 1 << order;
int n_ret = 0;
bool scanned_many = false;
@@ -825,6 +846,25 @@ static int scan_swap_map_slots(struct swap_info_struct *si,
* And we let swap pages go all over an SSD partition. Hugh
*/
+ if (order > 0) {
+ /*
+ * Should not even be attempting large allocations when huge
+ * page swap is disabled. Warn and fail the allocation.
+ */
+ if (!IS_ENABLED(CONFIG_THP_SWAP) ||
+ nr_pages > SWAPFILE_CLUSTER) {
+ VM_WARN_ON_ONCE(1);
+ return 0;
+ }
+
+ /*
+ * Swapfile is not block device or not using clusters so unable
+ * to allocate large entries.
+ */
+ if (!(si->flags & SWP_BLKDEV) || !si->cluster_info)
+ return 0;
+ }
+
si->flags += SWP_SCANNING;
/*
* Use percpu scan base for SSD to reduce lock contention on
@@ -839,8 +879,11 @@ static int scan_swap_map_slots(struct swap_info_struct *si,
/* SSD algorithm */
if (si->cluster_info) {
- if (!scan_swap_map_try_ssd_cluster(si, &offset, &scan_base))
+ if (!scan_swap_map_try_ssd_cluster(si, &offset, &scan_base, order)) {
+ if (order > 0)
+ goto no_page;
goto scan;
+ }
} else if (unlikely(!si->cluster_nr--)) {
if (si->pages - si->inuse_pages < SWAPFILE_CLUSTER) {
si->cluster_nr = SWAPFILE_CLUSTER - 1;
@@ -882,13 +925,16 @@ static int scan_swap_map_slots(struct swap_info_struct *si,
checks:
if (si->cluster_info) {
- while (scan_swap_map_ssd_cluster_conflict(si, offset)) {
+ while (scan_swap_map_ssd_cluster_conflict(si, offset, order)) {
/* take a break if we already got some slots */
if (n_ret)
goto done;
if (!scan_swap_map_try_ssd_cluster(si, &offset,
- &scan_base))
+ &scan_base, order)) {
+ if (order > 0)
+ goto no_page;
goto scan;
+ }
}
}
if (!(si->flags & SWP_WRITEOK))
@@ -907,7 +953,7 @@ checks:
swap_was_freed = __try_to_reclaim_swap(si, offset, TTRS_ANYWAY);
spin_lock(&si->lock);
/* entry was freed successfully, try to use this again */
- if (swap_was_freed)
+ if (swap_was_freed > 0)
goto checks;
goto scan; /* check next one */
}
@@ -919,11 +965,11 @@ checks:
else
goto done;
}
- WRITE_ONCE(si->swap_map[offset], usage);
- inc_cluster_info_page(si, si->cluster_info, offset);
+ memset(si->swap_map + offset, usage, nr_pages);
+ add_cluster_info_page(si, si->cluster_info, offset, nr_pages);
unlock_cluster(ci);
- swap_range_alloc(si, offset, 1);
+ swap_range_alloc(si, offset, nr_pages);
slots[n_ret++] = swp_entry(si->type, offset);
/* got enough slots or reach max slots? */
@@ -944,8 +990,10 @@ checks:
/* try to get more slots in cluster */
if (si->cluster_info) {
- if (scan_swap_map_try_ssd_cluster(si, &offset, &scan_base))
+ if (scan_swap_map_try_ssd_cluster(si, &offset, &scan_base, order))
goto checks;
+ if (order > 0)
+ goto done;
} else if (si->cluster_nr && !si->swap_map[++offset]) {
/* non-ssd case, still more slots in cluster? */
--si->cluster_nr;
@@ -972,11 +1020,13 @@ checks:
}
done:
- set_cluster_next(si, offset + 1);
+ if (order == 0)
+ set_cluster_next(si, offset + 1);
si->flags -= SWP_SCANNING;
return n_ret;
scan:
+ VM_WARN_ON(order > 0);
spin_unlock(&si->lock);
while (++offset <= READ_ONCE(si->highest_bit)) {
if (unlikely(--latency_ration < 0)) {
@@ -1005,38 +1055,6 @@ no_page:
return n_ret;
}
-static int swap_alloc_cluster(struct swap_info_struct *si, swp_entry_t *slot)
-{
- unsigned long idx;
- struct swap_cluster_info *ci;
- unsigned long offset;
-
- /*
- * Should not even be attempting cluster allocations when huge
- * page swap is disabled. Warn and fail the allocation.
- */
- if (!IS_ENABLED(CONFIG_THP_SWAP)) {
- VM_WARN_ON_ONCE(1);
- return 0;
- }
-
- if (cluster_list_empty(&si->free_clusters))
- return 0;
-
- idx = cluster_list_first(&si->free_clusters);
- offset = idx * SWAPFILE_CLUSTER;
- ci = lock_cluster(si, offset);
- alloc_cluster(si, idx);
- cluster_set_count_flag(ci, SWAPFILE_CLUSTER, CLUSTER_FLAG_HUGE);
-
- memset(si->swap_map + offset, SWAP_HAS_CACHE, SWAPFILE_CLUSTER);
- unlock_cluster(ci);
- swap_range_alloc(si, offset, SWAPFILE_CLUSTER);
- *slot = swp_entry(si->type, offset);
-
- return 1;
-}
-
static void swap_free_cluster(struct swap_info_struct *si, unsigned long idx)
{
unsigned long offset = idx * SWAPFILE_CLUSTER;
@@ -1050,17 +1068,15 @@ static void swap_free_cluster(struct swap_info_struct *si, unsigned long idx)
swap_range_free(si, offset, SWAPFILE_CLUSTER);
}
-int get_swap_pages(int n_goal, swp_entry_t swp_entries[], int entry_size)
+int get_swap_pages(int n_goal, swp_entry_t swp_entries[], int entry_order)
{
- unsigned long size = swap_entry_size(entry_size);
+ int order = swap_entry_order(entry_order);
+ unsigned long size = 1 << order;
struct swap_info_struct *si, *next;
long avail_pgs;
int n_ret = 0;
int node;
- /* Only single cluster request supported */
- WARN_ON_ONCE(n_goal > 1 && size == SWAPFILE_CLUSTER);
-
spin_lock(&swap_avail_lock);
avail_pgs = atomic_long_read(&nr_swap_pages) / size;
@@ -1096,14 +1112,10 @@ start_over:
spin_unlock(&si->lock);
goto nextsi;
}
- if (size == SWAPFILE_CLUSTER) {
- if (si->flags & SWP_BLKDEV)
- n_ret = swap_alloc_cluster(si, swp_entries);
- } else
- n_ret = scan_swap_map_slots(si, SWAP_HAS_CACHE,
- n_goal, swp_entries);
+ n_ret = scan_swap_map_slots(si, SWAP_HAS_CACHE,
+ n_goal, swp_entries, order);
spin_unlock(&si->lock);
- if (n_ret || size == SWAPFILE_CLUSTER)
+ if (n_ret || size > 1)
goto check_out;
cond_resched();
@@ -1226,16 +1238,15 @@ static unsigned char __swap_entry_free_locked(struct swap_info_struct *p,
/*
* When we get a swap entry, if there aren't some other ways to
- * prevent swapoff, such as the folio in swap cache is locked, page
- * table lock is held, etc., the swap entry may become invalid because
- * of swapoff. Then, we need to enclose all swap related functions
- * with get_swap_device() and put_swap_device(), unless the swap
- * functions call get/put_swap_device() by themselves.
+ * prevent swapoff, such as the folio in swap cache is locked, RCU
+ * reader side is locked, etc., the swap entry may become invalid
+ * because of swapoff. Then, we need to enclose all swap related
+ * functions with get_swap_device() and put_swap_device(), unless the
+ * swap functions call get/put_swap_device() by themselves.
*
- * Note that when only holding the PTL, swapoff might succeed immediately
- * after freeing a swap entry. Therefore, immediately after
- * __swap_entry_free(), the swap info might become stale and should not
- * be touched without a prior get_swap_device().
+ * RCU reader side lock (including any spinlock) is sufficient to
+ * prevent swapoff, because synchronize_rcu() is called in swapoff()
+ * before freeing data structures.
*
* Check whether swap entry is valid in the swap device. If so,
* return pointer to swap_info_struct, and keep the swap entry valid
@@ -1357,7 +1368,7 @@ void put_swap_folio(struct folio *folio, swp_entry_t entry)
unsigned char *map;
unsigned int i, free_entries = 0;
unsigned char val;
- int size = swap_entry_size(folio_nr_pages(folio));
+ int size = 1 << swap_entry_order(folio_order(folio));
si = _swap_info_get(entry);
if (!si)
@@ -1365,7 +1376,6 @@ void put_swap_folio(struct folio *folio, swp_entry_t entry)
ci = lock_cluster_or_swap_info(si, offset);
if (size == SWAPFILE_CLUSTER) {
- VM_BUG_ON(!cluster_is_huge(ci));
map = si->swap_map + offset;
for (i = 0; i < SWAPFILE_CLUSTER; i++) {
val = map[i];
@@ -1373,7 +1383,6 @@ void put_swap_folio(struct folio *folio, swp_entry_t entry)
if (val == SWAP_HAS_CACHE)
free_entries++;
}
- cluster_clear_huge(ci);
if (free_entries == SWAPFILE_CLUSTER) {
unlock_cluster_or_swap_info(si, ci);
spin_lock(&si->lock);
@@ -1395,23 +1404,6 @@ void put_swap_folio(struct folio *folio, swp_entry_t entry)
unlock_cluster_or_swap_info(si, ci);
}
-#ifdef CONFIG_THP_SWAP
-int split_swap_cluster(swp_entry_t entry)
-{
- struct swap_info_struct *si;
- struct swap_cluster_info *ci;
- unsigned long offset = swp_offset(entry);
-
- si = _swap_info_get(entry);
- if (!si)
- return -EBUSY;
- ci = lock_cluster(si, offset);
- cluster_clear_huge(ci);
- unlock_cluster(ci);
- return 0;
-}
-#endif
-
static int swp_entry_cmp(const void *ent1, const void *ent2)
{
const swp_entry_t *e1 = ent1, *e2 = ent2;
@@ -1519,22 +1511,23 @@ out:
}
static bool swap_page_trans_huge_swapped(struct swap_info_struct *si,
- swp_entry_t entry)
+ swp_entry_t entry, int order)
{
struct swap_cluster_info *ci;
unsigned char *map = si->swap_map;
+ unsigned int nr_pages = 1 << order;
unsigned long roffset = swp_offset(entry);
- unsigned long offset = round_down(roffset, SWAPFILE_CLUSTER);
+ unsigned long offset = round_down(roffset, nr_pages);
int i;
bool ret = false;
ci = lock_cluster_or_swap_info(si, offset);
- if (!ci || !cluster_is_huge(ci)) {
+ if (!ci || nr_pages == 1) {
if (swap_count(map[roffset]))
ret = true;
goto unlock_out;
}
- for (i = 0; i < SWAPFILE_CLUSTER; i++) {
+ for (i = 0; i < nr_pages; i++) {
if (swap_count(map[offset + i])) {
ret = true;
break;
@@ -1556,7 +1549,7 @@ static bool folio_swapped(struct folio *folio)
if (!IS_ENABLED(CONFIG_THP_SWAP) || likely(!folio_test_large(folio)))
return swap_swapcount(si, entry) != 0;
- return swap_page_trans_huge_swapped(si, entry);
+ return swap_page_trans_huge_swapped(si, entry, folio_order(folio));
}
/**
@@ -1602,33 +1595,88 @@ bool folio_free_swap(struct folio *folio)
return true;
}
-/*
- * Free the swap entry like above, but also try to
- * free the page cache entry if it is the last user.
+/**
+ * free_swap_and_cache_nr() - Release reference on range of swap entries and
+ * reclaim their cache if no more references remain.
+ * @entry: First entry of range.
+ * @nr: Number of entries in range.
+ *
+ * For each swap entry in the contiguous range, release a reference. If any swap
+ * entries become free, try to reclaim their underlying folios, if present. The
+ * offset range is defined by [entry.offset, entry.offset + nr).
*/
-int free_swap_and_cache(swp_entry_t entry)
+void free_swap_and_cache_nr(swp_entry_t entry, int nr)
{
- struct swap_info_struct *p;
+ const unsigned long start_offset = swp_offset(entry);
+ const unsigned long end_offset = start_offset + nr;
+ unsigned int type = swp_type(entry);
+ struct swap_info_struct *si;
+ bool any_only_cache = false;
+ unsigned long offset;
unsigned char count;
if (non_swap_entry(entry))
- return 1;
+ return;
- p = get_swap_device(entry);
- if (p) {
- if (WARN_ON(data_race(!p->swap_map[swp_offset(entry)]))) {
- put_swap_device(p);
- return 0;
+ si = get_swap_device(entry);
+ if (!si)
+ return;
+
+ if (WARN_ON(end_offset > si->max))
+ goto out;
+
+ /*
+ * First free all entries in the range.
+ */
+ for (offset = start_offset; offset < end_offset; offset++) {
+ if (data_race(si->swap_map[offset])) {
+ count = __swap_entry_free(si, swp_entry(type, offset));
+ if (count == SWAP_HAS_CACHE)
+ any_only_cache = true;
+ } else {
+ WARN_ON_ONCE(1);
}
+ }
- count = __swap_entry_free(p, entry);
- if (count == SWAP_HAS_CACHE &&
- !swap_page_trans_huge_swapped(p, entry))
- __try_to_reclaim_swap(p, swp_offset(entry),
+ /*
+ * Short-circuit the below loop if none of the entries had their
+ * reference drop to zero.
+ */
+ if (!any_only_cache)
+ goto out;
+
+ /*
+ * Now go back over the range trying to reclaim the swap cache. This is
+ * more efficient for large folios because we will only try to reclaim
+ * the swap once per folio in the common case. If we do
+ * __swap_entry_free() and __try_to_reclaim_swap() in the same loop, the
+ * latter will get a reference and lock the folio for every individual
+ * page but will only succeed once the swap slot for every subpage is
+ * zero.
+ */
+ for (offset = start_offset; offset < end_offset; offset += nr) {
+ nr = 1;
+ if (READ_ONCE(si->swap_map[offset]) == SWAP_HAS_CACHE) {
+ /*
+ * Folios are always naturally aligned in swap so
+ * advance forward to the next boundary. Zero means no
+ * folio was found for the swap entry, so advance by 1
+ * in this case. Negative value means folio was found
+ * but could not be reclaimed. Here we can still advance
+ * to the next boundary.
+ */
+ nr = __try_to_reclaim_swap(si, offset,
TTRS_UNMAPPED | TTRS_FULL);
- put_swap_device(p);
+ if (nr == 0)
+ nr = 1;
+ else if (nr < 0)
+ nr = -nr;
+ nr = ALIGN(offset + 1, nr) - offset;
+ }
}
- return p != NULL;
+
+out:
+ put_swap_device(si);
}
#ifdef CONFIG_HIBERNATION
@@ -1643,7 +1691,7 @@ swp_entry_t get_swap_page_of_type(int type)
/* This is called for allocating swap entry, not cache */
spin_lock(&si->lock);
- if ((si->flags & SWP_WRITEOK) && scan_swap_map_slots(si, 1, 1, &entry))
+ if ((si->flags & SWP_WRITEOK) && scan_swap_map_slots(si, 1, 1, &entry, 0))
atomic_long_dec(&nr_swap_pages);
spin_unlock(&si->lock);
fail:
@@ -1806,7 +1854,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
* when reading from swap. This metadata may be indexed by swap entry
* so this must be called before swap_free().
*/
- arch_swap_restore(entry, folio);
+ arch_swap_restore(folio_swap(entry, folio), folio);
dec_mm_counter(vma->vm_mm, MM_SWAPENTS);
inc_mm_counter(vma->vm_mm, MM_ANONPAGES);
@@ -2396,13 +2444,17 @@ static void reinsert_swap_info(struct swap_info_struct *p)
spin_unlock(&swap_lock);
}
+static bool __has_usable_swap(void)
+{
+ return !plist_head_empty(&swap_active_head);
+}
+
bool has_usable_swap(void)
{
- bool ret = true;
+ bool ret;
spin_lock(&swap_lock);
- if (plist_head_empty(&swap_active_head))
- ret = false;
+ ret = __has_usable_swap();
spin_unlock(&swap_lock);
return ret;
}
@@ -2495,10 +2547,11 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
/*
* Wait for swap operations protected by get/put_swap_device()
- * to complete.
- *
- * We need synchronize_rcu() here to protect the accessing to
- * the swap cache data structure.
+ * to complete. Because of synchronize_rcu() here, all swap
+ * operations protected by RCU reader side lock (including any
+ * spinlock) will be waited too. This makes it easy to
+ * prevent folio_test_swapcache() and the following swap cache
+ * operations from racing with swapoff.
*/
percpu_ref_kill(&p->users);
synchronize_rcu();
@@ -3097,7 +3150,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
p->flags |= SWP_SYNCHRONOUS_IO;
if (p->bdev && bdev_nonrot(p->bdev)) {
- int cpu;
+ int cpu, i;
unsigned long ci, nr_cluster;
p->flags |= SWP_SOLIDSTATE;
@@ -3133,8 +3186,10 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
}
for_each_possible_cpu(cpu) {
struct percpu_cluster *cluster;
+
cluster = per_cpu_ptr(p->percpu_cluster, cpu);
- cluster_set_null(&cluster->index);
+ for (i = 0; i < SWAP_NR_ORDERS; i++)
+ cluster->next[i] = SWAP_NEXT_INVALID;
}
} else {
atomic_inc(&nr_rotate_swap);
@@ -3659,6 +3714,9 @@ void __folio_throttle_swaprate(struct folio *folio, gfp_t gfp)
if (!(gfp & __GFP_IO))
return;
+ if (!__has_usable_swap())
+ return;
+
if (!blk_cgroup_congested())
return;
diff --git a/mm/truncate.c b/mm/truncate.c
index 725b150e47ac4c..e99085bf3d34d9 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -764,15 +764,15 @@ EXPORT_SYMBOL(truncate_setsize);
* @from: original inode size
* @to: new inode size
*
- * Handle extension of inode size either caused by extending truncate or by
- * write starting after current i_size. We mark the page straddling current
- * i_size RO so that page_mkwrite() is called on the nearest write access to
- * the page. This way filesystem can be sure that page_mkwrite() is called on
- * the page before user writes to the page via mmap after the i_size has been
- * changed.
+ * Handle extension of inode size either caused by extending truncate or
+ * by write starting after current i_size. We mark the page straddling
+ * current i_size RO so that page_mkwrite() is called on the first
+ * write access to the page. The filesystem will update its per-block
+ * information before user writes to the page via mmap after the i_size
+ * has been changed.
*
* The function must be called after i_size is updated so that page fault
- * coming after we unlock the page will already see the new i_size.
+ * coming after we unlock the folio will already see the new i_size.
* The function must be called while we still hold i_rwsem - this not only
* makes sure i_size is stable but also that userspace cannot observe new
* i_size value before we are prepared to store mmap writes at new inode size.
@@ -781,31 +781,29 @@ void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to)
{
int bsize = i_blocksize(inode);
loff_t rounded_from;
- struct page *page;
- pgoff_t index;
+ struct folio *folio;
WARN_ON(to > inode->i_size);
- if (from >= to || bsize == PAGE_SIZE)
+ if (from >= to || bsize >= PAGE_SIZE)
return;
/* Page straddling @from will not have any hole block created? */
rounded_from = round_up(from, bsize);
if (to <= rounded_from || !(rounded_from & (PAGE_SIZE - 1)))
return;
- index = from >> PAGE_SHIFT;
- page = find_lock_page(inode->i_mapping, index);
- /* Page not cached? Nothing to do */
- if (!page)
+ folio = filemap_lock_folio(inode->i_mapping, from / PAGE_SIZE);
+ /* Folio not cached? Nothing to do */
+ if (IS_ERR(folio))
return;
/*
- * See clear_page_dirty_for_io() for details why set_page_dirty()
+ * See folio_clear_dirty_for_io() for details why folio_mark_dirty()
* is needed.
*/
- if (page_mkclean(page))
- set_page_dirty(page);
- unlock_page(page);
- put_page(page);
+ if (folio_mkclean(folio))
+ folio_mark_dirty(folio);
+ folio_unlock(folio);
+ folio_put(folio);
}
EXPORT_SYMBOL(pagecache_isize_extended);
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 712160cd41ecac..d9e82ae68244e6 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -56,17 +56,16 @@ struct vm_area_struct *find_vma_and_prepare_anon(struct mm_struct *mm,
#ifdef CONFIG_PER_VMA_LOCK
/*
- * lock_vma() - Lookup and lock vma corresponding to @address.
+ * uffd_lock_vma() - Lookup and lock vma corresponding to @address.
* @mm: mm to search vma in.
* @address: address that the vma should contain.
*
- * Should be called without holding mmap_lock. vma should be unlocked after use
- * with unlock_vma().
+ * Should be called without holding mmap_lock.
*
* Return: A locked vma containing @address, -ENOENT if no vma is found, or
* -ENOMEM if anon_vma couldn't be allocated.
*/
-static struct vm_area_struct *lock_vma(struct mm_struct *mm,
+static struct vm_area_struct *uffd_lock_vma(struct mm_struct *mm,
unsigned long address)
{
struct vm_area_struct *vma;
@@ -74,9 +73,8 @@ static struct vm_area_struct *lock_vma(struct mm_struct *mm,
vma = lock_vma_under_rcu(mm, address);
if (vma) {
/*
- * lock_vma_under_rcu() only checks anon_vma for private
- * anonymous mappings. But we need to ensure it is assigned in
- * private file-backed vmas as well.
+ * We know we're going to need to use anon_vma, so check
+ * that early.
*/
if (!(vma->vm_flags & VM_SHARED) && unlikely(!vma->anon_vma))
vma_end_read(vma);
@@ -107,7 +105,7 @@ static struct vm_area_struct *uffd_mfill_lock(struct mm_struct *dst_mm,
{
struct vm_area_struct *dst_vma;
- dst_vma = lock_vma(dst_mm, dst_start);
+ dst_vma = uffd_lock_vma(dst_mm, dst_start);
if (IS_ERR(dst_vma) || validate_dst_vma(dst_vma, dst_start + len))
return dst_vma;
@@ -180,9 +178,9 @@ int mfill_atomic_install_pte(pmd_t *dst_pmd,
pte_t _dst_pte, *dst_pte;
bool writable = dst_vma->vm_flags & VM_WRITE;
bool vm_shared = dst_vma->vm_flags & VM_SHARED;
- bool page_in_cache = page_mapping(page);
spinlock_t *ptl;
- struct folio *folio;
+ struct folio *folio = page_folio(page);
+ bool page_in_cache = folio_mapping(folio);
_dst_pte = mk_pte(page, dst_vma->vm_page_prot);
_dst_pte = pte_mkdirty(_dst_pte);
@@ -212,7 +210,6 @@ int mfill_atomic_install_pte(pmd_t *dst_pmd,
if (!pte_none_mostly(ptep_get(dst_pte)))
goto out_unlock;
- folio = page_folio(page);
if (page_in_cache) {
/* Usually, cache pages are already added to LRU */
if (newly_allocated)
@@ -1026,7 +1023,7 @@ static int move_present_pte(struct mm_struct *mm,
}
folio_move_anon_rmap(src_folio, dst_vma);
- WRITE_ONCE(src_folio->index, linear_page_index(dst_vma, dst_addr));
+ src_folio->index = linear_page_index(dst_vma, dst_addr);
orig_dst_pte = mk_pte(&src_folio->page, dst_vma->vm_page_prot);
/* Follow mremap() behavior and treat the entry dirty after the move */
@@ -1402,7 +1399,7 @@ static int uffd_move_lock(struct mm_struct *mm,
struct vm_area_struct *vma;
int err;
- vma = lock_vma(mm, dst_start);
+ vma = uffd_lock_vma(mm, dst_start);
if (IS_ERR(vma))
return PTR_ERR(vma);
@@ -1417,7 +1414,7 @@ static int uffd_move_lock(struct mm_struct *mm,
}
/*
- * Using lock_vma() to get src_vma can lead to following deadlock:
+ * Using uffd_lock_vma() to get src_vma can lead to following deadlock:
*
* Thread1 Thread2
* ------- -------
@@ -1439,12 +1436,13 @@ static int uffd_move_lock(struct mm_struct *mm,
err = find_vmas_mm_locked(mm, dst_start, src_start, dst_vmap, src_vmap);
if (!err) {
/*
- * See comment in lock_vma() as to why not using
+ * See comment in uffd_lock_vma() as to why not using
* vma_start_read() here.
*/
down_read(&(*dst_vmap)->vm_lock->lock);
if (*dst_vmap != *src_vmap)
- down_read(&(*src_vmap)->vm_lock->lock);
+ down_read_nested(&(*src_vmap)->vm_lock->lock,
+ SINGLE_DEPTH_NESTING);
}
mmap_read_unlock(mm);
return err;
@@ -1661,9 +1659,9 @@ ssize_t move_pages(struct userfaultfd_ctx *ctx, unsigned long dst_start,
/* Check if we can move the pmd without splitting it. */
if (move_splits_huge_pmd(dst_addr, src_addr, src_start + len) ||
!pmd_none(dst_pmdval)) {
- struct folio *folio = pfn_folio(pmd_pfn(*src_pmd));
+ struct folio *folio = pmd_folio(*src_pmd);
- if (!folio || (!is_huge_zero_page(&folio->page) &&
+ if (!folio || (!is_huge_zero_folio(folio) &&
!PageAnonExclusive(&folio->page))) {
spin_unlock(ptl);
err = -EBUSY;
diff --git a/mm/util.c b/mm/util.c
index 669397235787b9..c9e519e6811f55 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -124,16 +124,16 @@ EXPORT_SYMBOL(kstrndup);
* Return: newly allocated copy of @src or %NULL in case of error,
* result is physically contiguous. Use kfree() to free.
*/
-void *kmemdup(const void *src, size_t len, gfp_t gfp)
+void *kmemdup_noprof(const void *src, size_t len, gfp_t gfp)
{
void *p;
- p = kmalloc_track_caller(len, gfp);
+ p = kmalloc_node_track_caller_noprof(len, gfp, NUMA_NO_NODE, _RET_IP_);
if (p)
memcpy(p, src, len);
return p;
}
-EXPORT_SYMBOL(kmemdup);
+EXPORT_SYMBOL(kmemdup_noprof);
/**
* kmemdup_array - duplicate a given array.
@@ -469,17 +469,17 @@ void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
if (mmap_is_legacy(rlim_stack)) {
mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
- mm->get_unmapped_area = arch_get_unmapped_area;
+ clear_bit(MMF_TOPDOWN, &mm->flags);
} else {
mm->mmap_base = mmap_base(random_factor, rlim_stack);
- mm->get_unmapped_area = arch_get_unmapped_area_topdown;
+ set_bit(MMF_TOPDOWN, &mm->flags);
}
}
#elif defined(CONFIG_MMU) && !defined(HAVE_ARCH_PICK_MMAP_LAYOUT)
void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
{
mm->mmap_base = TASK_UNMAPPED_BASE;
- mm->get_unmapped_area = arch_get_unmapped_area;
+ clear_bit(MMF_TOPDOWN, &mm->flags);
}
#endif
@@ -609,7 +609,7 @@ EXPORT_SYMBOL(vm_mmap);
*
* Return: pointer to the allocated memory of %NULL in case of failure
*/
-void *kvmalloc_node(size_t size, gfp_t flags, int node)
+void *kvmalloc_node_noprof(size_t size, gfp_t flags, int node)
{
gfp_t kmalloc_flags = flags;
void *ret;
@@ -631,7 +631,7 @@ void *kvmalloc_node(size_t size, gfp_t flags, int node)
kmalloc_flags &= ~__GFP_NOFAIL;
}
- ret = kmalloc_node(size, kmalloc_flags, node);
+ ret = kmalloc_node_noprof(size, kmalloc_flags, node);
/*
* It doesn't really make sense to fallback to vmalloc for sub page
@@ -656,11 +656,11 @@ void *kvmalloc_node(size_t size, gfp_t flags, int node)
* about the resulting pointer, and cannot play
* protection games.
*/
- return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END,
+ return __vmalloc_node_range_noprof(size, 1, VMALLOC_START, VMALLOC_END,
flags, PAGE_KERNEL, VM_ALLOW_HUGE_VMAP,
node, __builtin_return_address(0));
}
-EXPORT_SYMBOL(kvmalloc_node);
+EXPORT_SYMBOL(kvmalloc_node_noprof);
/**
* kvfree() - Free memory.
@@ -699,7 +699,7 @@ void kvfree_sensitive(const void *addr, size_t len)
}
EXPORT_SYMBOL(kvfree_sensitive);
-void *kvrealloc(const void *p, size_t oldsize, size_t newsize, gfp_t flags)
+void *kvrealloc_noprof(const void *p, size_t oldsize, size_t newsize, gfp_t flags)
{
void *newp;
@@ -712,7 +712,7 @@ void *kvrealloc(const void *p, size_t oldsize, size_t newsize, gfp_t flags)
kvfree(p);
return newp;
}
-EXPORT_SYMBOL(kvrealloc);
+EXPORT_SYMBOL(kvrealloc_noprof);
/**
* __vmalloc_array - allocate memory for a virtually contiguous array.
@@ -720,7 +720,7 @@ EXPORT_SYMBOL(kvrealloc);
* @size: element size.
* @flags: the type of memory to allocate (see kmalloc).
*/
-void *__vmalloc_array(size_t n, size_t size, gfp_t flags)
+void *__vmalloc_array_noprof(size_t n, size_t size, gfp_t flags)
{
size_t bytes;
@@ -728,18 +728,18 @@ void *__vmalloc_array(size_t n, size_t size, gfp_t flags)
return NULL;
return __vmalloc(bytes, flags);
}
-EXPORT_SYMBOL(__vmalloc_array);
+EXPORT_SYMBOL(__vmalloc_array_noprof);
/**
* vmalloc_array - allocate memory for a virtually contiguous array.
* @n: number of elements.
* @size: element size.
*/
-void *vmalloc_array(size_t n, size_t size)
+void *vmalloc_array_noprof(size_t n, size_t size)
{
return __vmalloc_array(n, size, GFP_KERNEL);
}
-EXPORT_SYMBOL(vmalloc_array);
+EXPORT_SYMBOL(vmalloc_array_noprof);
/**
* __vcalloc - allocate and zero memory for a virtually contiguous array.
@@ -747,22 +747,22 @@ EXPORT_SYMBOL(vmalloc_array);
* @size: element size.
* @flags: the type of memory to allocate (see kmalloc).
*/
-void *__vcalloc(size_t n, size_t size, gfp_t flags)
+void *__vcalloc_noprof(size_t n, size_t size, gfp_t flags)
{
return __vmalloc_array(n, size, flags | __GFP_ZERO);
}
-EXPORT_SYMBOL(__vcalloc);
+EXPORT_SYMBOL(__vcalloc_noprof);
/**
* vcalloc - allocate and zero memory for a virtually contiguous array.
* @n: number of elements.
* @size: element size.
*/
-void *vcalloc(size_t n, size_t size)
+void *vcalloc_noprof(size_t n, size_t size)
{
return __vmalloc_array(n, size, GFP_KERNEL | __GFP_ZERO);
}
-EXPORT_SYMBOL(vcalloc);
+EXPORT_SYMBOL(vcalloc_noprof);
struct anon_vma *folio_anon_vma(struct folio *folio)
{
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 22aa63f4ef6322..6641be0ca80b4d 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -42,6 +42,7 @@
#include <linux/sched/mm.h>
#include <asm/tlbflush.h>
#include <asm/shmparam.h>
+#include <linux/page_owner.h>
#define CREATE_TRACE_POINTS
#include <trace/events/vmalloc.h>
@@ -96,6 +97,7 @@ static int vmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
{
pte_t *pte;
u64 pfn;
+ struct page *page;
unsigned long size = PAGE_SIZE;
pfn = phys_addr >> PAGE_SHIFT;
@@ -103,7 +105,13 @@ static int vmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
if (!pte)
return -ENOMEM;
do {
- BUG_ON(!pte_none(ptep_get(pte)));
+ if (!pte_none(ptep_get(pte))) {
+ if (pfn_valid(pfn)) {
+ page = pfn_to_page(pfn);
+ dump_page(page, "remapping already mapped page");
+ }
+ BUG();
+ }
#ifdef CONFIG_HUGETLB_PAGE
size = arch_vmap_pte_range_map_size(addr, end, pfn, max_page_shift);
@@ -989,6 +997,27 @@ unsigned long vmalloc_nr_pages(void)
return atomic_long_read(&nr_vmalloc_pages);
}
+static struct vmap_area *__find_vmap_area(unsigned long addr, struct rb_root *root)
+{
+ struct rb_node *n = root->rb_node;
+
+ addr = (unsigned long)kasan_reset_tag((void *)addr);
+
+ while (n) {
+ struct vmap_area *va;
+
+ va = rb_entry(n, struct vmap_area, rb_node);
+ if (addr < va->va_start)
+ n = n->rb_left;
+ else if (addr >= va->va_end)
+ n = n->rb_right;
+ else
+ return va;
+ }
+
+ return NULL;
+}
+
/* Look up the first VA which satisfies addr < va_end, NULL if none. */
static struct vmap_area *
__find_vmap_area_exceed_addr(unsigned long addr, struct rb_root *root)
@@ -1025,47 +1054,39 @@ __find_vmap_area_exceed_addr(unsigned long addr, struct rb_root *root)
static struct vmap_node *
find_vmap_area_exceed_addr_lock(unsigned long addr, struct vmap_area **va)
{
- struct vmap_node *vn, *va_node = NULL;
- struct vmap_area *va_lowest;
+ unsigned long va_start_lowest;
+ struct vmap_node *vn;
int i;
- for (i = 0; i < nr_vmap_nodes; i++) {
+repeat:
+ for (i = 0, va_start_lowest = 0; i < nr_vmap_nodes; i++) {
vn = &vmap_nodes[i];
spin_lock(&vn->busy.lock);
- va_lowest = __find_vmap_area_exceed_addr(addr, &vn->busy.root);
- if (va_lowest) {
- if (!va_node || va_lowest->va_start < (*va)->va_start) {
- if (va_node)
- spin_unlock(&va_node->busy.lock);
-
- *va = va_lowest;
- va_node = vn;
- continue;
- }
- }
+ *va = __find_vmap_area_exceed_addr(addr, &vn->busy.root);
+
+ if (*va)
+ if (!va_start_lowest || (*va)->va_start < va_start_lowest)
+ va_start_lowest = (*va)->va_start;
spin_unlock(&vn->busy.lock);
}
- return va_node;
-}
-
-static struct vmap_area *__find_vmap_area(unsigned long addr, struct rb_root *root)
-{
- struct rb_node *n = root->rb_node;
+ /*
+ * Check if found VA exists, it might have gone away. In this case we
+ * repeat the search because a VA has been removed concurrently and we
+ * need to proceed to the next one, which is a rare case.
+ */
+ if (va_start_lowest) {
+ vn = addr_to_node(va_start_lowest);
- addr = (unsigned long)kasan_reset_tag((void *)addr);
+ spin_lock(&vn->busy.lock);
+ *va = __find_vmap_area(va_start_lowest, &vn->busy.root);
- while (n) {
- struct vmap_area *va;
+ if (*va)
+ return vn;
- va = rb_entry(n, struct vmap_area, rb_node);
- if (addr < va->va_start)
- n = n->rb_left;
- else if (addr >= va->va_end)
- n = n->rb_right;
- else
- return va;
+ spin_unlock(&vn->busy.lock);
+ goto repeat;
}
return NULL;
@@ -1913,15 +1934,25 @@ node_alloc(unsigned long size, unsigned long align,
return va;
}
+static inline void setup_vmalloc_vm(struct vm_struct *vm,
+ struct vmap_area *va, unsigned long flags, const void *caller)
+{
+ vm->flags = flags;
+ vm->addr = (void *)va->va_start;
+ vm->size = va->va_end - va->va_start;
+ vm->caller = caller;
+ va->vm = vm;
+}
+
/*
* Allocate a region of KVA of the specified size and alignment, within the
- * vstart and vend.
+ * vstart and vend. If vm is passed in, the two will also be bound.
*/
static struct vmap_area *alloc_vmap_area(unsigned long size,
unsigned long align,
unsigned long vstart, unsigned long vend,
int node, gfp_t gfp_mask,
- unsigned long va_flags)
+ unsigned long va_flags, struct vm_struct *vm)
{
struct vmap_node *vn;
struct vmap_area *va;
@@ -1984,6 +2015,12 @@ retry:
va->vm = NULL;
va->flags = (va_flags | vn_id);
+ if (vm) {
+ vm->addr = (void *)va->va_start;
+ vm->size = va->va_end - va->va_start;
+ va->vm = vm;
+ }
+
vn = addr_to_node(va->va_start);
spin_lock(&vn->busy.lock);
@@ -2343,6 +2380,9 @@ struct vmap_area *find_vmap_area(unsigned long addr)
struct vmap_area *va;
int i, j;
+ if (unlikely(!vmap_initialized))
+ return NULL;
+
/*
* An addr_to_node_id(addr) converts an address to a node index
* where a VA is located. If VA spans several zones and passed
@@ -2558,7 +2598,7 @@ static void *new_vmap_block(unsigned int order, gfp_t gfp_mask)
va = alloc_vmap_area(VMAP_BLOCK_SIZE, VMAP_BLOCK_SIZE,
VMALLOC_START, VMALLOC_END,
node, gfp_mask,
- VMAP_RAM|VMAP_BLOCK);
+ VMAP_RAM|VMAP_BLOCK, NULL);
if (IS_ERR(va)) {
kfree(vb);
return ERR_CAST(va);
@@ -2694,7 +2734,7 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
* get_order(0) returns funny result. Just warn and terminate
* early.
*/
- return NULL;
+ return ERR_PTR(-EINVAL);
}
order = get_order(size);
@@ -2915,7 +2955,8 @@ void *vm_map_ram(struct page **pages, unsigned int count, int node)
struct vmap_area *va;
va = alloc_vmap_area(size, PAGE_SIZE,
VMALLOC_START, VMALLOC_END,
- node, GFP_KERNEL, VMAP_RAM);
+ node, GFP_KERNEL, VMAP_RAM,
+ NULL);
if (IS_ERR(va))
return NULL;
@@ -3018,26 +3059,6 @@ void __init vm_area_register_early(struct vm_struct *vm, size_t align)
kasan_populate_early_vm_area_shadow(vm->addr, vm->size);
}
-static inline void setup_vmalloc_vm_locked(struct vm_struct *vm,
- struct vmap_area *va, unsigned long flags, const void *caller)
-{
- vm->flags = flags;
- vm->addr = (void *)va->va_start;
- vm->size = va->va_end - va->va_start;
- vm->caller = caller;
- va->vm = vm;
-}
-
-static void setup_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va,
- unsigned long flags, const void *caller)
-{
- struct vmap_node *vn = addr_to_node(va->va_start);
-
- spin_lock(&vn->busy.lock);
- setup_vmalloc_vm_locked(vm, va, flags, caller);
- spin_unlock(&vn->busy.lock);
-}
-
static void clear_vm_uninitialized_flag(struct vm_struct *vm)
{
/*
@@ -3074,14 +3095,15 @@ static struct vm_struct *__get_vm_area_node(unsigned long size,
if (!(flags & VM_NO_GUARD))
size += PAGE_SIZE;
- va = alloc_vmap_area(size, align, start, end, node, gfp_mask, 0);
+ area->flags = flags;
+ area->caller = caller;
+
+ va = alloc_vmap_area(size, align, start, end, node, gfp_mask, 0, area);
if (IS_ERR(va)) {
kfree(area);
return NULL;
}
- setup_vmalloc_vm(area, va, flags, caller);
-
/*
* Mark pages for non-VM_ALLOC mappings as accessible. Do it now as a
* best-effort approach, as they can be mapped outside of vmalloc code.
@@ -3507,12 +3529,12 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
* but mempolicy wants to alloc memory by interleaving.
*/
if (IS_ENABLED(CONFIG_NUMA) && nid == NUMA_NO_NODE)
- nr = alloc_pages_bulk_array_mempolicy(bulk_gfp,
+ nr = alloc_pages_bulk_array_mempolicy_noprof(bulk_gfp,
nr_pages_request,
pages + nr_allocated);
else
- nr = alloc_pages_bulk_array_node(bulk_gfp, nid,
+ nr = alloc_pages_bulk_array_node_noprof(bulk_gfp, nid,
nr_pages_request,
pages + nr_allocated);
@@ -3542,9 +3564,9 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
break;
if (nid == NUMA_NO_NODE)
- page = alloc_pages(alloc_gfp, order);
+ page = alloc_pages_noprof(alloc_gfp, order);
else
- page = alloc_pages_node(nid, alloc_gfp, order);
+ page = alloc_pages_node_noprof(nid, alloc_gfp, order);
if (unlikely(!page)) {
if (!nofail)
break;
@@ -3601,10 +3623,10 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
/* Please note that the recursion is strictly bounded. */
if (array_size > PAGE_SIZE) {
- area->pages = __vmalloc_node(array_size, 1, nested_gfp, node,
+ area->pages = __vmalloc_node_noprof(array_size, 1, nested_gfp, node,
area->caller);
} else {
- area->pages = kmalloc_node(array_size, nested_gfp, node);
+ area->pages = kmalloc_node_noprof(array_size, nested_gfp, node);
}
if (!area->pages) {
@@ -3714,7 +3736,7 @@ fail:
*
* Return: the address of the area or %NULL on failure
*/
-void *__vmalloc_node_range(unsigned long size, unsigned long align,
+void *__vmalloc_node_range_noprof(unsigned long size, unsigned long align,
unsigned long start, unsigned long end, gfp_t gfp_mask,
pgprot_t prot, unsigned long vm_flags, int node,
const void *caller)
@@ -3861,10 +3883,10 @@ fail:
*
* Return: pointer to the allocated memory or %NULL on error
*/
-void *__vmalloc_node(unsigned long size, unsigned long align,
+void *__vmalloc_node_noprof(unsigned long size, unsigned long align,
gfp_t gfp_mask, int node, const void *caller)
{
- return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END,
+ return __vmalloc_node_range_noprof(size, align, VMALLOC_START, VMALLOC_END,
gfp_mask, PAGE_KERNEL, 0, node, caller);
}
/*
@@ -3873,15 +3895,15 @@ void *__vmalloc_node(unsigned long size, unsigned long align,
* than that.
*/
#ifdef CONFIG_TEST_VMALLOC_MODULE
-EXPORT_SYMBOL_GPL(__vmalloc_node);
+EXPORT_SYMBOL_GPL(__vmalloc_node_noprof);
#endif
-void *__vmalloc(unsigned long size, gfp_t gfp_mask)
+void *__vmalloc_noprof(unsigned long size, gfp_t gfp_mask)
{
- return __vmalloc_node(size, 1, gfp_mask, NUMA_NO_NODE,
+ return __vmalloc_node_noprof(size, 1, gfp_mask, NUMA_NO_NODE,
__builtin_return_address(0));
}
-EXPORT_SYMBOL(__vmalloc);
+EXPORT_SYMBOL(__vmalloc_noprof);
/**
* vmalloc - allocate virtually contiguous memory
@@ -3895,12 +3917,12 @@ EXPORT_SYMBOL(__vmalloc);
*
* Return: pointer to the allocated memory or %NULL on error
*/
-void *vmalloc(unsigned long size)
+void *vmalloc_noprof(unsigned long size)
{
- return __vmalloc_node(size, 1, GFP_KERNEL, NUMA_NO_NODE,
+ return __vmalloc_node_noprof(size, 1, GFP_KERNEL, NUMA_NO_NODE,
__builtin_return_address(0));
}
-EXPORT_SYMBOL(vmalloc);
+EXPORT_SYMBOL(vmalloc_noprof);
/**
* vmalloc_huge - allocate virtually contiguous memory, allow huge pages
@@ -3914,13 +3936,13 @@ EXPORT_SYMBOL(vmalloc);
*
* Return: pointer to the allocated memory or %NULL on error
*/
-void *vmalloc_huge(unsigned long size, gfp_t gfp_mask)
+void *vmalloc_huge_noprof(unsigned long size, gfp_t gfp_mask)
{
- return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END,
+ return __vmalloc_node_range_noprof(size, 1, VMALLOC_START, VMALLOC_END,
gfp_mask, PAGE_KERNEL, VM_ALLOW_HUGE_VMAP,
NUMA_NO_NODE, __builtin_return_address(0));
}
-EXPORT_SYMBOL_GPL(vmalloc_huge);
+EXPORT_SYMBOL_GPL(vmalloc_huge_noprof);
/**
* vzalloc - allocate virtually contiguous memory with zero fill
@@ -3935,12 +3957,12 @@ EXPORT_SYMBOL_GPL(vmalloc_huge);
*
* Return: pointer to the allocated memory or %NULL on error
*/
-void *vzalloc(unsigned long size)
+void *vzalloc_noprof(unsigned long size)
{
- return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_ZERO, NUMA_NO_NODE,
+ return __vmalloc_node_noprof(size, 1, GFP_KERNEL | __GFP_ZERO, NUMA_NO_NODE,
__builtin_return_address(0));
}
-EXPORT_SYMBOL(vzalloc);
+EXPORT_SYMBOL(vzalloc_noprof);
/**
* vmalloc_user - allocate zeroed virtually contiguous memory for userspace
@@ -3951,14 +3973,14 @@ EXPORT_SYMBOL(vzalloc);
*
* Return: pointer to the allocated memory or %NULL on error
*/
-void *vmalloc_user(unsigned long size)
+void *vmalloc_user_noprof(unsigned long size)
{
- return __vmalloc_node_range(size, SHMLBA, VMALLOC_START, VMALLOC_END,
+ return __vmalloc_node_range_noprof(size, SHMLBA, VMALLOC_START, VMALLOC_END,
GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL,
VM_USERMAP, NUMA_NO_NODE,
__builtin_return_address(0));
}
-EXPORT_SYMBOL(vmalloc_user);
+EXPORT_SYMBOL(vmalloc_user_noprof);
/**
* vmalloc_node - allocate memory on a specific node
@@ -3973,12 +3995,12 @@ EXPORT_SYMBOL(vmalloc_user);
*
* Return: pointer to the allocated memory or %NULL on error
*/
-void *vmalloc_node(unsigned long size, int node)
+void *vmalloc_node_noprof(unsigned long size, int node)
{
- return __vmalloc_node(size, 1, GFP_KERNEL, node,
+ return __vmalloc_node_noprof(size, 1, GFP_KERNEL, node,
__builtin_return_address(0));
}
-EXPORT_SYMBOL(vmalloc_node);
+EXPORT_SYMBOL(vmalloc_node_noprof);
/**
* vzalloc_node - allocate memory on a specific node with zero fill
@@ -3991,12 +4013,12 @@ EXPORT_SYMBOL(vmalloc_node);
*
* Return: pointer to the allocated memory or %NULL on error
*/
-void *vzalloc_node(unsigned long size, int node)
+void *vzalloc_node_noprof(unsigned long size, int node)
{
- return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_ZERO, node,
+ return __vmalloc_node_noprof(size, 1, GFP_KERNEL | __GFP_ZERO, node,
__builtin_return_address(0));
}
-EXPORT_SYMBOL(vzalloc_node);
+EXPORT_SYMBOL(vzalloc_node_noprof);
#if defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA32)
#define GFP_VMALLOC32 (GFP_DMA32 | GFP_KERNEL)
@@ -4019,12 +4041,12 @@ EXPORT_SYMBOL(vzalloc_node);
*
* Return: pointer to the allocated memory or %NULL on error
*/
-void *vmalloc_32(unsigned long size)
+void *vmalloc_32_noprof(unsigned long size)
{
- return __vmalloc_node(size, 1, GFP_VMALLOC32, NUMA_NO_NODE,
+ return __vmalloc_node_noprof(size, 1, GFP_VMALLOC32, NUMA_NO_NODE,
__builtin_return_address(0));
}
-EXPORT_SYMBOL(vmalloc_32);
+EXPORT_SYMBOL(vmalloc_32_noprof);
/**
* vmalloc_32_user - allocate zeroed virtually contiguous 32bit memory
@@ -4035,14 +4057,14 @@ EXPORT_SYMBOL(vmalloc_32);
*
* Return: pointer to the allocated memory or %NULL on error
*/
-void *vmalloc_32_user(unsigned long size)
+void *vmalloc_32_user_noprof(unsigned long size)
{
- return __vmalloc_node_range(size, SHMLBA, VMALLOC_START, VMALLOC_END,
+ return __vmalloc_node_range_noprof(size, SHMLBA, VMALLOC_START, VMALLOC_END,
GFP_VMALLOC32 | __GFP_ZERO, PAGE_KERNEL,
VM_USERMAP, NUMA_NO_NODE,
__builtin_return_address(0));
}
-EXPORT_SYMBOL(vmalloc_32_user);
+EXPORT_SYMBOL(vmalloc_32_user_noprof);
/*
* Atomically zero bytes in the iterator.
@@ -4656,7 +4678,7 @@ retry:
spin_lock(&vn->busy.lock);
insert_vmap_area(vas[area], &vn->busy.root, &vn->busy.head);
- setup_vmalloc_vm_locked(vms[area], vas[area], VM_ALLOC,
+ setup_vmalloc_vm(vms[area], vas[area], VM_ALLOC,
pcpu_get_vm_areas);
spin_unlock(&vn->busy.lock);
}
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 3ef654addd44c2..49bd944239613d 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -92,6 +92,11 @@ struct scan_control {
unsigned long anon_cost;
unsigned long file_cost;
+#ifdef CONFIG_MEMCG
+ /* Swappiness value for proactive reclaim. Always use sc_swappiness()! */
+ int *proactive_swappiness;
+#endif
+
/* Can active folios be deactivated as part of reclaim? */
#define DEACTIVATE_ANON 1
#define DEACTIVATE_FILE 2
@@ -189,7 +194,7 @@ struct scan_control {
#endif
/*
- * From 0 .. 200. Higher means more swappy.
+ * From 0 .. MAX_SWAPPINESS. Higher means more swappy.
*/
int vm_swappiness = 60;
@@ -233,6 +238,13 @@ static bool writeback_throttling_sane(struct scan_control *sc)
#endif
return false;
}
+
+static int sc_swappiness(struct scan_control *sc, struct mem_cgroup *memcg)
+{
+ if (sc->proactive && sc->proactive_swappiness)
+ return *sc->proactive_swappiness;
+ return mem_cgroup_swappiness(memcg);
+}
#else
static bool cgroup_reclaim(struct scan_control *sc)
{
@@ -248,6 +260,11 @@ static bool writeback_throttling_sane(struct scan_control *sc)
{
return true;
}
+
+static int sc_swappiness(struct scan_control *sc, struct mem_cgroup *memcg)
+{
+ return READ_ONCE(vm_swappiness);
+}
#endif
static void set_task_reclaim_state(struct task_struct *task,
@@ -967,7 +984,8 @@ static unsigned int demote_folio_list(struct list_head *demote_folios,
.gfp_mask = (GFP_HIGHUSER_MOVABLE & ~__GFP_RECLAIM) | __GFP_NOWARN |
__GFP_NOMEMALLOC | GFP_NOWAIT,
.nid = target_nid,
- .nmask = &allowed_mask
+ .nmask = &allowed_mask,
+ .reason = MR_DEMOTION,
};
if (list_empty(demote_folios))
@@ -1205,25 +1223,28 @@ retry:
if (!can_split_folio(folio, NULL))
goto activate_locked;
/*
- * Split folios without a PMD map right
- * away. Chances are some or all of the
- * tail pages can be freed without IO.
+ * Split partially mapped folios right away.
+ * We can free the unmapped pages without IO.
*/
- if (!folio_entire_mapcount(folio) &&
- split_folio_to_list(folio,
- folio_list))
+ if (data_race(!list_empty(&folio->_deferred_list)) &&
+ split_folio_to_list(folio, folio_list))
goto activate_locked;
}
if (!add_to_swap(folio)) {
+ int __maybe_unused order = folio_order(folio);
+
if (!folio_test_large(folio))
goto activate_locked_split;
/* Fallback to swap normal pages */
- if (split_folio_to_list(folio,
- folio_list))
+ if (split_folio_to_list(folio, folio_list))
goto activate_locked;
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- count_memcg_folio_events(folio, THP_SWPOUT_FALLBACK, 1);
- count_vm_event(THP_SWPOUT_FALLBACK);
+ if (nr_pages >= HPAGE_PMD_NR) {
+ count_memcg_folio_events(folio,
+ THP_SWPOUT_FALLBACK, 1);
+ count_vm_event(THP_SWPOUT_FALLBACK);
+ }
+ count_mthp_stat(order, MTHP_STAT_ANON_SWPOUT_FALLBACK);
#endif
if (!add_to_swap(folio))
goto activate_locked_split;
@@ -1256,6 +1277,20 @@ retry:
if (folio_test_pmd_mappable(folio))
flags |= TTU_SPLIT_HUGE_PMD;
+ /*
+ * Without TTU_SYNC, try_to_unmap will only begin to
+ * hold PTL from the first present PTE within a large
+ * folio. Some initial PTEs might be skipped due to
+ * races with parallel PTE writes in which PTEs can be
+ * cleared temporarily before being written new present
+ * values. This will lead to a large folio is still
+ * mapped while some subpages have been partially
+ * unmapped after try_to_unmap; TTU_SYNC helps
+ * try_to_unmap acquire PTL from the first PTE,
+ * eliminating the influence of temporary PTE values.
+ */
+ if (folio_test_large(folio) && list_empty(&folio->_deferred_list))
+ flags |= TTU_SYNC;
try_to_unmap(folio, flags);
if (folio_mapped(folio)) {
@@ -2337,7 +2372,7 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
struct pglist_data *pgdat = lruvec_pgdat(lruvec);
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
unsigned long anon_cost, file_cost, total_cost;
- int swappiness = mem_cgroup_swappiness(memcg);
+ int swappiness = sc_swappiness(sc, memcg);
u64 fraction[ANON_AND_FILE];
u64 denominator = 0; /* gcc */
enum scan_balance scan_balance;
@@ -2413,7 +2448,7 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
ap = swappiness * (total_cost + 1);
ap /= anon_cost + 1;
- fp = (200 - swappiness) * (total_cost + 1);
+ fp = (MAX_SWAPPINESS - swappiness) * (total_cost + 1);
fp /= file_cost + 1;
fraction[0] = ap;
@@ -2618,7 +2653,7 @@ static int get_swappiness(struct lruvec *lruvec, struct scan_control *sc)
mem_cgroup_get_nr_swap_pages(memcg) < MIN_LRU_BATCH)
return 0;
- return mem_cgroup_swappiness(memcg);
+ return sc_swappiness(sc, memcg);
}
static int get_nr_gens(struct lruvec *lruvec, int type)
@@ -4433,7 +4468,7 @@ static int get_type_to_scan(struct lruvec *lruvec, int swappiness, int *tier_idx
{
int type, tier;
struct ctrl_pos sp, pv;
- int gain[ANON_AND_FILE] = { swappiness, 200 - swappiness };
+ int gain[ANON_AND_FILE] = { swappiness, MAX_SWAPPINESS - swappiness };
/*
* Compare the first tier of anon with that of file to determine which
@@ -4480,7 +4515,7 @@ static int isolate_folios(struct lruvec *lruvec, struct scan_control *sc, int sw
type = LRU_GEN_ANON;
else if (swappiness == 1)
type = LRU_GEN_FILE;
- else if (swappiness == 200)
+ else if (swappiness == MAX_SWAPPINESS)
type = LRU_GEN_ANON;
else if (!(sc->gfp_mask & __GFP_IO))
type = LRU_GEN_FILE;
@@ -5414,9 +5449,9 @@ static int run_cmd(char cmd, int memcg_id, int nid, unsigned long seq,
lruvec = get_lruvec(memcg, nid);
- if (swappiness < 0)
+ if (swappiness < MIN_SWAPPINESS)
swappiness = get_swappiness(lruvec, sc);
- else if (swappiness > 200)
+ else if (swappiness > MAX_SWAPPINESS)
goto done;
switch (cmd) {
@@ -6499,12 +6534,14 @@ unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg,
unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
unsigned long nr_pages,
gfp_t gfp_mask,
- unsigned int reclaim_options)
+ unsigned int reclaim_options,
+ int *swappiness)
{
unsigned long nr_reclaimed;
unsigned int noreclaim_flag;
struct scan_control sc = {
.nr_to_reclaim = max(nr_pages, SWAP_CLUSTER_MAX),
+ .proactive_swappiness = swappiness,
.gfp_mask = (current_gfp_context(gfp_mask) & GFP_RECLAIM_MASK) |
(GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK),
.reclaim_idx = MAX_NR_ZONES - 1,
diff --git a/mm/z3fold.c b/mm/z3fold.c
index 7ab05621052dc5..2ebfed32871bf3 100644
--- a/mm/z3fold.c
+++ b/mm/z3fold.c
@@ -1237,12 +1237,12 @@ static void z3fold_unmap(struct z3fold_pool *pool, unsigned long handle)
}
/**
- * z3fold_get_pool_size() - gets the z3fold pool size in pages
+ * z3fold_get_pool_pages() - gets the z3fold pool size in pages
* @pool: pool whose size is being queried
*
* Returns: size in pages of the given pool.
*/
-static u64 z3fold_get_pool_size(struct z3fold_pool *pool)
+static u64 z3fold_get_pool_pages(struct z3fold_pool *pool)
{
return atomic64_read(&pool->pages_nr);
}
@@ -1402,9 +1402,9 @@ static void z3fold_zpool_unmap(void *pool, unsigned long handle)
z3fold_unmap(pool, handle);
}
-static u64 z3fold_zpool_total_size(void *pool)
+static u64 z3fold_zpool_total_pages(void *pool)
{
- return z3fold_get_pool_size(pool) * PAGE_SIZE;
+ return z3fold_get_pool_pages(pool);
}
static struct zpool_driver z3fold_zpool_driver = {
@@ -1417,7 +1417,7 @@ static struct zpool_driver z3fold_zpool_driver = {
.free = z3fold_zpool_free,
.map = z3fold_zpool_map,
.unmap = z3fold_zpool_unmap,
- .total_size = z3fold_zpool_total_size,
+ .total_pages = z3fold_zpool_total_pages,
};
MODULE_ALIAS("zpool-z3fold");
diff --git a/mm/zbud.c b/mm/zbud.c
index 2190cc1f37b3dc..e9836fff943811 100644
--- a/mm/zbud.c
+++ b/mm/zbud.c
@@ -365,13 +365,13 @@ static void zbud_unmap(struct zbud_pool *pool, unsigned long handle)
}
/**
- * zbud_get_pool_size() - gets the zbud pool size in pages
+ * zbud_get_pool_pages() - gets the zbud pool size in pages
* @pool: pool whose size is being queried
*
* Returns: size in pages of the given pool. The pool lock need not be
* taken to access pages_nr.
*/
-static u64 zbud_get_pool_size(struct zbud_pool *pool)
+static u64 zbud_get_pool_pages(struct zbud_pool *pool)
{
return pool->pages_nr;
}
@@ -410,9 +410,9 @@ static void zbud_zpool_unmap(void *pool, unsigned long handle)
zbud_unmap(pool, handle);
}
-static u64 zbud_zpool_total_size(void *pool)
+static u64 zbud_zpool_total_pages(void *pool)
{
- return zbud_get_pool_size(pool) * PAGE_SIZE;
+ return zbud_get_pool_pages(pool);
}
static struct zpool_driver zbud_zpool_driver = {
@@ -425,7 +425,7 @@ static struct zpool_driver zbud_zpool_driver = {
.free = zbud_zpool_free,
.map = zbud_zpool_map,
.unmap = zbud_zpool_unmap,
- .total_size = zbud_zpool_total_size,
+ .total_pages = zbud_zpool_total_pages,
};
MODULE_ALIAS("zpool-zbud");
diff --git a/mm/zpool.c b/mm/zpool.c
index 846410479c2f41..b9fda1fa857da7 100644
--- a/mm/zpool.c
+++ b/mm/zpool.c
@@ -321,16 +321,16 @@ void zpool_unmap_handle(struct zpool *zpool, unsigned long handle)
}
/**
- * zpool_get_total_size() - The total size of the pool
+ * zpool_get_total_pages() - The total size of the pool
* @zpool: The zpool to check
*
- * This returns the total size in bytes of the pool.
+ * This returns the total size in pages of the pool.
*
- * Returns: Total size of the zpool in bytes.
+ * Returns: Total size of the zpool in pages.
*/
-u64 zpool_get_total_size(struct zpool *zpool)
+u64 zpool_get_total_pages(struct zpool *zpool)
{
- return zpool->driver->total_size(zpool->pool);
+ return zpool->driver->total_pages(zpool->pool);
}
/**
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 7d7cb3eaabe029..b42d3545ca856f 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -399,9 +399,9 @@ static void zs_zpool_unmap(void *pool, unsigned long handle)
zs_unmap_object(pool, handle);
}
-static u64 zs_zpool_total_size(void *pool)
+static u64 zs_zpool_total_pages(void *pool)
{
- return zs_get_total_pages(pool) << PAGE_SHIFT;
+ return zs_get_total_pages(pool);
}
static struct zpool_driver zs_zpool_driver = {
@@ -414,7 +414,7 @@ static struct zpool_driver zs_zpool_driver = {
.free = zs_zpool_free,
.map = zs_zpool_map,
.unmap = zs_zpool_unmap,
- .total_size = zs_zpool_total_size,
+ .total_pages = zs_zpool_total_pages,
};
MODULE_ALIAS("zpool-zsmalloc");
diff --git a/mm/zswap.c b/mm/zswap.c
index 9dec853647c8e4..a50e2986cd2fab 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -20,7 +20,6 @@
#include <linux/spinlock.h>
#include <linux/types.h>
#include <linux/atomic.h>
-#include <linux/rbtree.h>
#include <linux/swap.h>
#include <linux/crypto.h>
#include <linux/scatterlist.h>
@@ -43,8 +42,6 @@
/*********************************
* statistics
**********************************/
-/* Total bytes used by the compressed storage */
-u64 zswap_pool_total_size;
/* The number of compressed pages currently stored in zswap */
atomic_t zswap_stored_pages = ATOMIC_INIT(0);
/* The number of same-value filled pages currently stored in zswap */
@@ -126,19 +123,6 @@ static unsigned int zswap_accept_thr_percent = 90; /* of max pool size */
module_param_named(accept_threshold_percent, zswap_accept_thr_percent,
uint, 0644);
-/*
- * Enable/disable handling same-value filled pages (enabled by default).
- * If disabled every page is considered non-same-value filled.
- */
-static bool zswap_same_filled_pages_enabled = true;
-module_param_named(same_filled_pages_enabled, zswap_same_filled_pages_enabled,
- bool, 0644);
-
-/* Enable/disable handling non-same-value filled pages (enabled by default) */
-static bool zswap_non_same_filled_pages_enabled = true;
-module_param_named(non_same_filled_pages_enabled, zswap_non_same_filled_pages_enabled,
- bool, 0644);
-
/* Number of zpools in zswap_pool (empirically determined for scalability) */
#define ZSWAP_NR_ZPOOLS 32
@@ -183,8 +167,6 @@ struct zswap_pool {
/* Global LRU lists shared by all zswap pools. */
static struct list_lru zswap_list_lru;
-/* counter of pages stored in all zswap pools. */
-static atomic_t zswap_nr_stored = ATOMIC_INIT(0);
/* The lock protects zswap_next_shrink updates. */
static DEFINE_SPINLOCK(zswap_shrink_lock);
@@ -198,7 +180,6 @@ static struct shrinker *zswap_shrinker;
* This structure contains the metadata for tracking a single compressed
* page within zswap.
*
- * rbnode - links the entry into red-black tree for the appropriate swap type
* swpentry - associated swap entry, the offset indexes into the red-black tree
* length - the length in bytes of the compressed page data. Needed during
* decompression. For a same value filled page length is 0, and both
@@ -210,7 +191,6 @@ static struct shrinker *zswap_shrinker;
* lru - handle to the pool's lru used to evict pages.
*/
struct zswap_entry {
- struct rb_node rbnode;
swp_entry_t swpentry;
unsigned int length;
struct zswap_pool *pool;
@@ -222,12 +202,7 @@ struct zswap_entry {
struct list_head lru;
};
-struct zswap_tree {
- struct rb_root rbroot;
- spinlock_t lock;
-};
-
-static struct zswap_tree *zswap_trees[MAX_SWAPFILES];
+static struct xarray *zswap_trees[MAX_SWAPFILES];
static unsigned int nr_zswap_trees[MAX_SWAPFILES];
/* RCU-protected iteration */
@@ -255,7 +230,7 @@ static bool zswap_has_pool;
* helpers and fwd declarations
**********************************/
-static inline struct zswap_tree *swap_zswap_tree(swp_entry_t swp)
+static inline struct xarray *swap_zswap_tree(swp_entry_t swp)
{
return &zswap_trees[swp_type(swp)][swp_offset(swp)
>> SWAP_ADDRESS_SPACE_SHIFT];
@@ -265,45 +240,6 @@ static inline struct zswap_tree *swap_zswap_tree(swp_entry_t swp)
pr_debug("%s pool %s/%s\n", msg, (p)->tfm_name, \
zpool_get_type((p)->zpools[0]))
-static bool zswap_is_full(void)
-{
- return totalram_pages() * zswap_max_pool_percent / 100 <
- DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE);
-}
-
-static bool zswap_can_accept(void)
-{
- return totalram_pages() * zswap_accept_thr_percent / 100 *
- zswap_max_pool_percent / 100 >
- DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE);
-}
-
-static u64 get_zswap_pool_size(struct zswap_pool *pool)
-{
- u64 pool_size = 0;
- int i;
-
- for (i = 0; i < ZSWAP_NR_ZPOOLS; i++)
- pool_size += zpool_get_total_size(pool->zpools[i]);
-
- return pool_size;
-}
-
-static void zswap_update_total_size(void)
-{
- struct zswap_pool *pool;
- u64 total = 0;
-
- rcu_read_lock();
-
- list_for_each_entry_rcu(pool, &zswap_pools, list)
- total += get_zswap_pool_size(pool);
-
- rcu_read_unlock();
-
- zswap_pool_total_size = total;
-}
-
/*********************************
* pool functions
**********************************/
@@ -541,6 +477,48 @@ static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor)
return NULL;
}
+static unsigned long zswap_max_pages(void)
+{
+ return totalram_pages() * zswap_max_pool_percent / 100;
+}
+
+static unsigned long zswap_accept_thr_pages(void)
+{
+ return zswap_max_pages() * zswap_accept_thr_percent / 100;
+}
+
+unsigned long zswap_total_pages(void)
+{
+ struct zswap_pool *pool;
+ unsigned long total = 0;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(pool, &zswap_pools, list) {
+ int i;
+
+ for (i = 0; i < ZSWAP_NR_ZPOOLS; i++)
+ total += zpool_get_total_pages(pool->zpools[i]);
+ }
+ rcu_read_unlock();
+
+ return total;
+}
+
+static bool zswap_check_limits(void)
+{
+ unsigned long cur_pages = zswap_total_pages();
+ unsigned long max_pages = zswap_max_pages();
+
+ if (cur_pages >= max_pages) {
+ zswap_pool_limit_hit++;
+ zswap_pool_reached_full = true;
+ } else if (zswap_pool_reached_full &&
+ cur_pages <= zswap_accept_thr_pages()) {
+ zswap_pool_reached_full = false;
+ }
+ return zswap_pool_reached_full;
+}
+
/*********************************
* param callbacks
**********************************/
@@ -807,63 +785,6 @@ void zswap_memcg_offline_cleanup(struct mem_cgroup *memcg)
}
/*********************************
-* rbtree functions
-**********************************/
-static struct zswap_entry *zswap_rb_search(struct rb_root *root, pgoff_t offset)
-{
- struct rb_node *node = root->rb_node;
- struct zswap_entry *entry;
- pgoff_t entry_offset;
-
- while (node) {
- entry = rb_entry(node, struct zswap_entry, rbnode);
- entry_offset = swp_offset(entry->swpentry);
- if (entry_offset > offset)
- node = node->rb_left;
- else if (entry_offset < offset)
- node = node->rb_right;
- else
- return entry;
- }
- return NULL;
-}
-
-/*
- * In the case that a entry with the same offset is found, a pointer to
- * the existing entry is stored in dupentry and the function returns -EEXIST
- */
-static int zswap_rb_insert(struct rb_root *root, struct zswap_entry *entry,
- struct zswap_entry **dupentry)
-{
- struct rb_node **link = &root->rb_node, *parent = NULL;
- struct zswap_entry *myentry;
- pgoff_t myentry_offset, entry_offset = swp_offset(entry->swpentry);
-
- while (*link) {
- parent = *link;
- myentry = rb_entry(parent, struct zswap_entry, rbnode);
- myentry_offset = swp_offset(myentry->swpentry);
- if (myentry_offset > entry_offset)
- link = &(*link)->rb_left;
- else if (myentry_offset < entry_offset)
- link = &(*link)->rb_right;
- else {
- *dupentry = myentry;
- return -EEXIST;
- }
- }
- rb_link_node(&entry->rbnode, parent, link);
- rb_insert_color(&entry->rbnode, root);
- return 0;
-}
-
-static void zswap_rb_erase(struct rb_root *root, struct zswap_entry *entry)
-{
- rb_erase(&entry->rbnode, root);
- RB_CLEAR_NODE(&entry->rbnode);
-}
-
-/*********************************
* zswap entry functions
**********************************/
static struct kmem_cache *zswap_entry_cache;
@@ -874,7 +795,6 @@ static struct zswap_entry *zswap_entry_cache_alloc(gfp_t gfp, int nid)
entry = kmem_cache_alloc_node(zswap_entry_cache, gfp, nid);
if (!entry)
return NULL;
- RB_CLEAR_NODE(&entry->rbnode);
return entry;
}
@@ -885,12 +805,7 @@ static void zswap_entry_cache_free(struct zswap_entry *entry)
static struct zpool *zswap_find_zpool(struct zswap_entry *entry)
{
- int i = 0;
-
- if (ZSWAP_NR_ZPOOLS > 1)
- i = hash_ptr(entry, ilog2(ZSWAP_NR_ZPOOLS));
-
- return entry->pool->zpools[i];
+ return entry->pool->zpools[hash_ptr(entry, ilog2(ZSWAP_NR_ZPOOLS))];
}
/*
@@ -904,7 +819,6 @@ static void zswap_entry_free(struct zswap_entry *entry)
else {
zswap_lru_del(&zswap_list_lru, entry);
zpool_free(zswap_find_zpool(entry), entry->handle);
- atomic_dec(&zswap_nr_stored);
zswap_pool_put(entry->pool);
}
if (entry->objcg) {
@@ -913,18 +827,6 @@ static void zswap_entry_free(struct zswap_entry *entry)
}
zswap_entry_cache_free(entry);
atomic_dec(&zswap_stored_pages);
- zswap_update_total_size();
-}
-
-/*
- * The caller hold the tree lock and search the entry from the tree,
- * so it must be on the tree, remove it from the tree and free it.
- */
-static void zswap_invalidate_entry(struct zswap_tree *tree,
- struct zswap_entry *entry)
-{
- zswap_rb_erase(&tree->rbroot, entry);
- zswap_entry_free(entry);
}
/*********************************
@@ -1080,7 +982,17 @@ static void zswap_decompress(struct zswap_entry *entry, struct page *page)
mutex_lock(&acomp_ctx->mutex);
src = zpool_map_handle(zpool, entry->handle, ZPOOL_MM_RO);
- if (acomp_ctx->is_sleepable && !zpool_can_sleep_mapped(zpool)) {
+ /*
+ * If zpool_map_handle is atomic, we cannot reliably utilize its mapped buffer
+ * to do crypto_acomp_decompress() which might sleep. In such cases, we must
+ * resort to copying the buffer to a temporary one.
+ * Meanwhile, zpool_map_handle() might return a non-linearly mapped buffer,
+ * such as a kmap address of high memory or even ever a vmap address.
+ * However, sg_init_one is only equipped to handle linearly mapped low memory.
+ * In such cases, we also must copy the buffer to a temporary and lowmem one.
+ */
+ if ((acomp_ctx->is_sleepable && !zpool_can_sleep_mapped(zpool)) ||
+ !virt_addr_valid(src)) {
memcpy(acomp_ctx->buffer, src, entry->length);
src = acomp_ctx->buffer;
zpool_unmap_handle(zpool, entry->handle);
@@ -1094,7 +1006,7 @@ static void zswap_decompress(struct zswap_entry *entry, struct page *page)
BUG_ON(acomp_ctx->req->dlen != PAGE_SIZE);
mutex_unlock(&acomp_ctx->mutex);
- if (!acomp_ctx->is_sleepable || zpool_can_sleep_mapped(zpool))
+ if (src != acomp_ctx->buffer)
zpool_unmap_handle(zpool, entry->handle);
}
@@ -1116,7 +1028,8 @@ static void zswap_decompress(struct zswap_entry *entry, struct page *page)
static int zswap_writeback_entry(struct zswap_entry *entry,
swp_entry_t swpentry)
{
- struct zswap_tree *tree;
+ struct xarray *tree;
+ pgoff_t offset = swp_offset(swpentry);
struct folio *folio;
struct mempolicy *mpol;
bool folio_was_allocated;
@@ -1153,19 +1066,13 @@ static int zswap_writeback_entry(struct zswap_entry *entry,
* be dereferenced.
*/
tree = swap_zswap_tree(swpentry);
- spin_lock(&tree->lock);
- if (zswap_rb_search(&tree->rbroot, swp_offset(swpentry)) != entry) {
- spin_unlock(&tree->lock);
+ if (entry != xa_cmpxchg(tree, offset, entry, NULL, GFP_KERNEL)) {
delete_from_swap_cache(folio);
folio_unlock(folio);
folio_put(folio);
return -ENOMEM;
}
- /* Safe to deref entry after the entry is verified above. */
- zswap_rb_erase(&tree->rbroot, entry);
- spin_unlock(&tree->lock);
-
zswap_decompress(entry, &folio->page);
count_vm_event(ZSWPWB);
@@ -1313,15 +1220,30 @@ static unsigned long zswap_shrinker_count(struct shrinker *shrinker,
if (!zswap_shrinker_enabled || !mem_cgroup_zswap_writeback_enabled(memcg))
return 0;
-#ifdef CONFIG_MEMCG_KMEM
- mem_cgroup_flush_stats(memcg);
- nr_backing = memcg_page_state(memcg, MEMCG_ZSWAP_B) >> PAGE_SHIFT;
- nr_stored = memcg_page_state(memcg, MEMCG_ZSWAPPED);
-#else
- /* use pool stats instead of memcg stats */
- nr_backing = zswap_pool_total_size >> PAGE_SHIFT;
- nr_stored = atomic_read(&zswap_nr_stored);
-#endif
+ /*
+ * The shrinker resumes swap writeback, which will enter block
+ * and may enter fs. XXX: Harmonize with vmscan.c __GFP_FS
+ * rules (may_enter_fs()), which apply on a per-folio basis.
+ */
+ if (!gfp_has_io_fs(sc->gfp_mask))
+ return 0;
+
+ /*
+ * For memcg, use the cgroup-wide ZSWAP stats since we don't
+ * have them per-node and thus per-lruvec. Careful if memcg is
+ * runtime-disabled: we can get sc->memcg == NULL, which is ok
+ * for the lruvec, but not for memcg_page_state().
+ *
+ * Without memcg, use the zswap pool-wide metrics.
+ */
+ if (!mem_cgroup_disabled()) {
+ mem_cgroup_flush_stats(memcg);
+ nr_backing = memcg_page_state(memcg, MEMCG_ZSWAP_B) >> PAGE_SHIFT;
+ nr_stored = memcg_page_state(memcg, MEMCG_ZSWAPPED);
+ } else {
+ nr_backing = zswap_total_pages();
+ nr_stored = atomic_read(&zswap_stored_pages);
+ }
if (!nr_stored)
return 0;
@@ -1340,6 +1262,11 @@ static unsigned long zswap_shrinker_count(struct shrinker *shrinker,
* This ensures that the better zswap compresses memory, the fewer
* pages we will evict to swap (as it will otherwise incur IO for
* relatively small memory saving).
+ *
+ * The memory saving factor calculated here takes same-filled pages into
+ * account, but those are not freeable since they almost occupy no
+ * space. Hence, we may scale nr_freeable down a little bit more than we
+ * should if we have a lot of same-filled pages.
*/
return mult_frac(nr_freeable, nr_backing, nr_stored);
}
@@ -1387,6 +1314,10 @@ static void shrink_worker(struct work_struct *w)
{
struct mem_cgroup *memcg;
int ret, failures = 0;
+ unsigned long thr;
+
+ /* Reclaim down to the accept threshold */
+ thr = zswap_accept_thr_pages();
/* global reclaim will select cgroup in a round-robin fashion. */
do {
@@ -1434,32 +1365,37 @@ static void shrink_worker(struct work_struct *w)
break;
if (ret && ++failures == MAX_RECLAIM_RETRIES)
break;
-
resched:
cond_resched();
- } while (!zswap_can_accept());
+ } while (zswap_total_pages() > thr);
}
-static int zswap_is_page_same_filled(void *ptr, unsigned long *value)
+/*********************************
+* same-filled functions
+**********************************/
+static bool zswap_is_folio_same_filled(struct folio *folio, unsigned long *value)
{
unsigned long *page;
unsigned long val;
unsigned int pos, last_pos = PAGE_SIZE / sizeof(*page) - 1;
+ bool ret = false;
- page = (unsigned long *)ptr;
+ page = kmap_local_folio(folio, 0);
val = page[0];
if (val != page[last_pos])
- return 0;
+ goto out;
for (pos = 1; pos < last_pos; pos++) {
if (val != page[pos])
- return 0;
+ goto out;
}
*value = val;
-
- return 1;
+ ret = true;
+out:
+ kunmap_local(page);
+ return ret;
}
static void zswap_fill_page(void *ptr, unsigned long value)
@@ -1470,14 +1406,18 @@ static void zswap_fill_page(void *ptr, unsigned long value)
memset_l(page, value, PAGE_SIZE / sizeof(unsigned long));
}
+/*********************************
+* main API
+**********************************/
bool zswap_store(struct folio *folio)
{
swp_entry_t swp = folio->swap;
pgoff_t offset = swp_offset(swp);
- struct zswap_tree *tree = swap_zswap_tree(swp);
- struct zswap_entry *entry, *dupentry;
+ struct xarray *tree = swap_zswap_tree(swp);
+ struct zswap_entry *entry, *old;
struct obj_cgroup *objcg = NULL;
struct mem_cgroup *memcg = NULL;
+ unsigned long value;
VM_WARN_ON_ONCE(!folio_test_locked(folio));
VM_WARN_ON_ONCE(!folio_test_swapcache(folio));
@@ -1489,6 +1429,7 @@ bool zswap_store(struct folio *folio)
if (!zswap_enabled)
goto check_old;
+ /* Check cgroup limits */
objcg = get_obj_cgroup_from_folio(folio);
if (objcg && !obj_cgroup_may_zswap(objcg)) {
memcg = get_mem_cgroup_from_objcg(objcg);
@@ -1499,19 +1440,8 @@ bool zswap_store(struct folio *folio)
mem_cgroup_put(memcg);
}
- /* reclaim space if needed */
- if (zswap_is_full()) {
- zswap_pool_limit_hit++;
- zswap_pool_reached_full = true;
- goto shrink;
- }
-
- if (zswap_pool_reached_full) {
- if (!zswap_can_accept())
- goto shrink;
- else
- zswap_pool_reached_full = false;
- }
+ if (zswap_check_limits())
+ goto reject;
/* allocate entry */
entry = zswap_entry_cache_alloc(GFP_KERNEL, folio_nid(folio));
@@ -1520,24 +1450,13 @@ bool zswap_store(struct folio *folio)
goto reject;
}
- if (zswap_same_filled_pages_enabled) {
- unsigned long value;
- u8 *src;
-
- src = kmap_local_folio(folio, 0);
- if (zswap_is_page_same_filled(src, &value)) {
- kunmap_local(src);
- entry->length = 0;
- entry->value = value;
- atomic_inc(&zswap_same_filled_pages);
- goto insert_entry;
- }
- kunmap_local(src);
+ if (zswap_is_folio_same_filled(folio, &value)) {
+ entry->length = 0;
+ entry->value = value;
+ atomic_inc(&zswap_same_filled_pages);
+ goto store_entry;
}
- if (!zswap_non_same_filled_pages_enabled)
- goto freepage;
-
/* if entry is successfully added, it keeps the reference */
entry->pool = zswap_pool_current_get();
if (!entry->pool)
@@ -1555,62 +1474,77 @@ bool zswap_store(struct folio *folio)
if (!zswap_compress(folio, entry))
goto put_pool;
-insert_entry:
+store_entry:
entry->swpentry = swp;
entry->objcg = objcg;
+
+ old = xa_store(tree, offset, entry, GFP_KERNEL);
+ if (xa_is_err(old)) {
+ int err = xa_err(old);
+
+ WARN_ONCE(err != -ENOMEM, "unexpected xarray error: %d\n", err);
+ zswap_reject_alloc_fail++;
+ goto store_failed;
+ }
+
+ /*
+ * We may have had an existing entry that became stale when
+ * the folio was redirtied and now the new version is being
+ * swapped out. Get rid of the old.
+ */
+ if (old)
+ zswap_entry_free(old);
+
if (objcg) {
obj_cgroup_charge_zswap(objcg, entry->length);
- /* Account before objcg ref is moved to tree */
count_objcg_event(objcg, ZSWPOUT);
}
- /* map */
- spin_lock(&tree->lock);
/*
- * The folio may have been dirtied again, invalidate the
- * possibly stale entry before inserting the new entry.
+ * We finish initializing the entry while it's already in xarray.
+ * This is safe because:
+ *
+ * 1. Concurrent stores and invalidations are excluded by folio lock.
+ *
+ * 2. Writeback is excluded by the entry not being on the LRU yet.
+ * The publishing order matters to prevent writeback from seeing
+ * an incoherent entry.
*/
- if (zswap_rb_insert(&tree->rbroot, entry, &dupentry) == -EEXIST) {
- zswap_invalidate_entry(tree, dupentry);
- WARN_ON(zswap_rb_insert(&tree->rbroot, entry, &dupentry));
- }
if (entry->length) {
INIT_LIST_HEAD(&entry->lru);
zswap_lru_add(&zswap_list_lru, entry);
- atomic_inc(&zswap_nr_stored);
}
- spin_unlock(&tree->lock);
/* update stats */
atomic_inc(&zswap_stored_pages);
- zswap_update_total_size();
count_vm_event(ZSWPOUT);
return true;
+store_failed:
+ if (!entry->length)
+ atomic_dec(&zswap_same_filled_pages);
+ else {
+ zpool_free(zswap_find_zpool(entry), entry->handle);
put_pool:
- zswap_pool_put(entry->pool);
+ zswap_pool_put(entry->pool);
+ }
freepage:
zswap_entry_cache_free(entry);
reject:
- if (objcg)
- obj_cgroup_put(objcg);
+ obj_cgroup_put(objcg);
+ if (zswap_pool_reached_full)
+ queue_work(shrink_wq, &zswap_shrink_work);
check_old:
/*
* If the zswap store fails or zswap is disabled, we must invalidate the
* possibly stale entry which was previously stored at this offset.
* Otherwise, writeback could overwrite the new data in the swapfile.
*/
- spin_lock(&tree->lock);
- entry = zswap_rb_search(&tree->rbroot, offset);
+ entry = xa_erase(tree, offset);
if (entry)
- zswap_invalidate_entry(tree, entry);
- spin_unlock(&tree->lock);
+ zswap_entry_free(entry);
return false;
-
-shrink:
- queue_work(shrink_wq, &zswap_shrink_work);
- goto reject;
}
bool zswap_load(struct folio *folio)
@@ -1618,20 +1552,32 @@ bool zswap_load(struct folio *folio)
swp_entry_t swp = folio->swap;
pgoff_t offset = swp_offset(swp);
struct page *page = &folio->page;
- struct zswap_tree *tree = swap_zswap_tree(swp);
+ bool swapcache = folio_test_swapcache(folio);
+ struct xarray *tree = swap_zswap_tree(swp);
struct zswap_entry *entry;
u8 *dst;
VM_WARN_ON_ONCE(!folio_test_locked(folio));
- spin_lock(&tree->lock);
- entry = zswap_rb_search(&tree->rbroot, offset);
- if (!entry) {
- spin_unlock(&tree->lock);
+ /*
+ * When reading into the swapcache, invalidate our entry. The
+ * swapcache can be the authoritative owner of the page and
+ * its mappings, and the pressure that results from having two
+ * in-memory copies outweighs any benefits of caching the
+ * compression work.
+ *
+ * (Most swapins go through the swapcache. The notable
+ * exception is the singleton fault on SWP_SYNCHRONOUS_IO
+ * files, which reads into a private page and may free it if
+ * the fault fails. We remain the primary owner of the entry.)
+ */
+ if (swapcache)
+ entry = xa_erase(tree, offset);
+ else
+ entry = xa_load(tree, offset);
+
+ if (!entry)
return false;
- }
- zswap_rb_erase(&tree->rbroot, entry);
- spin_unlock(&tree->lock);
if (entry->length)
zswap_decompress(entry, page);
@@ -1645,9 +1591,10 @@ bool zswap_load(struct folio *folio)
if (entry->objcg)
count_objcg_event(entry->objcg, ZSWPIN);
- zswap_entry_free(entry);
-
- folio_mark_dirty(folio);
+ if (swapcache) {
+ zswap_entry_free(entry);
+ folio_mark_dirty(folio);
+ }
return true;
}
@@ -1655,19 +1602,17 @@ bool zswap_load(struct folio *folio)
void zswap_invalidate(swp_entry_t swp)
{
pgoff_t offset = swp_offset(swp);
- struct zswap_tree *tree = swap_zswap_tree(swp);
+ struct xarray *tree = swap_zswap_tree(swp);
struct zswap_entry *entry;
- spin_lock(&tree->lock);
- entry = zswap_rb_search(&tree->rbroot, offset);
+ entry = xa_erase(tree, offset);
if (entry)
- zswap_invalidate_entry(tree, entry);
- spin_unlock(&tree->lock);
+ zswap_entry_free(entry);
}
int zswap_swapon(int type, unsigned long nr_pages)
{
- struct zswap_tree *trees, *tree;
+ struct xarray *trees, *tree;
unsigned int nr, i;
nr = DIV_ROUND_UP(nr_pages, SWAP_ADDRESS_SPACE_PAGES);
@@ -1677,11 +1622,8 @@ int zswap_swapon(int type, unsigned long nr_pages)
return -ENOMEM;
}
- for (i = 0; i < nr; i++) {
- tree = trees + i;
- tree->rbroot = RB_ROOT;
- spin_lock_init(&tree->lock);
- }
+ for (i = 0; i < nr; i++)
+ xa_init(trees + i);
nr_zswap_trees[type] = nr;
zswap_trees[type] = trees;
@@ -1690,7 +1632,7 @@ int zswap_swapon(int type, unsigned long nr_pages)
void zswap_swapoff(int type)
{
- struct zswap_tree *trees = zswap_trees[type];
+ struct xarray *trees = zswap_trees[type];
unsigned int i;
if (!trees)
@@ -1698,7 +1640,7 @@ void zswap_swapoff(int type)
/* try_to_unuse() invalidated all the entries already */
for (i = 0; i < nr_zswap_trees[type]; i++)
- WARN_ON_ONCE(!RB_EMPTY_ROOT(&trees[i].rbroot));
+ WARN_ON_ONCE(!xa_empty(trees + i));
kvfree(trees);
nr_zswap_trees[type] = 0;
@@ -1713,6 +1655,13 @@ void zswap_swapoff(int type)
static struct dentry *zswap_debugfs_root;
+static int debugfs_get_total_size(void *data, u64 *val)
+{
+ *val = zswap_total_pages() * PAGE_SIZE;
+ return 0;
+}
+DEFINE_DEBUGFS_ATTRIBUTE(total_size_fops, debugfs_get_total_size, NULL, "%llu\n");
+
static int zswap_debugfs_init(void)
{
if (!debugfs_initialized())
@@ -1734,8 +1683,8 @@ static int zswap_debugfs_init(void)
zswap_debugfs_root, &zswap_reject_compress_poor);
debugfs_create_u64("written_back_pages", 0444,
zswap_debugfs_root, &zswap_written_back_pages);
- debugfs_create_u64("pool_total_size", 0444,
- zswap_debugfs_root, &zswap_pool_total_size);
+ debugfs_create_file("pool_total_size", 0444,
+ zswap_debugfs_root, NULL, &total_size_fops);
debugfs_create_atomic_t("stored_pages", 0444,
zswap_debugfs_root, &zswap_stored_pages);
debugfs_create_atomic_t("same_filled_pages", 0444,
diff --git a/net/9p/client.c b/net/9p/client.c
index e265a0ca6bddd4..f7e90b4769bba9 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -1583,7 +1583,7 @@ p9_client_read_once(struct p9_fid *fid, u64 offset, struct iov_iter *to,
received = rsize;
}
- p9_debug(P9_DEBUG_9P, "<<< RREAD count %d\n", count);
+ p9_debug(P9_DEBUG_9P, "<<< RREAD count %d\n", received);
if (non_zc) {
int n = copy_to_iter(dataptr, received, to);
@@ -1609,9 +1609,6 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err)
int total = 0;
*err = 0;
- p9_debug(P9_DEBUG_9P, ">>> TWRITE fid %d offset %llu count %zd\n",
- fid->fid, offset, iov_iter_count(from));
-
while (iov_iter_count(from)) {
int count = iov_iter_count(from);
int rsize = fid->iounit;
@@ -1623,6 +1620,9 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err)
if (count < rsize)
rsize = count;
+ p9_debug(P9_DEBUG_9P, ">>> TWRITE fid %d offset %llu count %d (/%d)\n",
+ fid->fid, offset, rsize, count);
+
/* Don't bother zerocopy for small IO (< 1024) */
if (clnt->trans_mod->zc_request && rsize > 1024) {
req = p9_client_zc_rpc(clnt, P9_TWRITE, NULL, from, 0,
@@ -1650,7 +1650,7 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err)
written = rsize;
}
- p9_debug(P9_DEBUG_9P, "<<< RWRITE count %d\n", count);
+ p9_debug(P9_DEBUG_9P, "<<< RWRITE count %d\n", written);
p9_req_put(clnt, req);
iov_iter_revert(from, count - written - iov_iter_count(from));
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 1a3948b8c493ed..196060dc6138af 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -95,7 +95,6 @@ struct p9_poll_wait {
* @unsent_req_list: accounting for requests that haven't been sent
* @rreq: read request
* @wreq: write request
- * @req: current request being processed (if any)
* @tmp_buf: temporary buffer to read in header
* @rc: temporary fcall for reading current frame
* @wpos: write position for current frame
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 558e158c98d010..9169efb2f43aa9 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -103,7 +103,7 @@ again:
s->ax25_dev = NULL;
if (sk->sk_socket) {
netdev_put(ax25_dev->dev,
- &ax25_dev->dev_tracker);
+ &s->dev_tracker);
ax25_dev_put(ax25_dev);
}
ax25_cb_del(s);
diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c
index c5462486dbca10..282ec581c07201 100644
--- a/net/ax25/ax25_dev.c
+++ b/net/ax25/ax25_dev.c
@@ -105,7 +105,7 @@ void ax25_dev_device_down(struct net_device *dev)
spin_lock_bh(&ax25_dev_lock);
#ifdef CONFIG_AX25_DAMA_SLAVE
- ax25_ds_del_timer(ax25_dev);
+ timer_shutdown_sync(&ax25_dev->dama.slave_timer);
#endif
/*
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index b95c36765d045c..2243cec18ecc86 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -3948,7 +3948,7 @@ void batadv_tt_local_resize_to_mtu(struct net_device *soft_iface)
spin_lock_bh(&bat_priv->tt.commit_lock);
- while (true) {
+ while (timeout) {
table_size = batadv_tt_local_table_transmit_size(bat_priv);
if (packet_size_max >= table_size)
break;
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 3ad74f76983b24..05346250f7195b 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -1263,7 +1263,7 @@ u8 hci_conn_set_handle(struct hci_conn *conn, u16 handle)
struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
u8 dst_type, bool dst_resolved, u8 sec_level,
- u16 conn_timeout, u8 role)
+ u16 conn_timeout, u8 role, u8 phy, u8 sec_phy)
{
struct hci_conn *conn;
struct smp_irk *irk;
@@ -1326,6 +1326,8 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
conn->dst_type = dst_type;
conn->sec_level = BT_SECURITY_LOW;
conn->conn_timeout = conn_timeout;
+ conn->le_adv_phy = phy;
+ conn->le_adv_sec_phy = sec_phy;
err = hci_connect_le_sync(hdev, conn);
if (err) {
@@ -2273,7 +2275,7 @@ struct hci_conn *hci_connect_cis(struct hci_dev *hdev, bdaddr_t *dst,
le = hci_connect_le(hdev, dst, dst_type, false,
BT_SECURITY_LOW,
HCI_LE_CONN_TIMEOUT,
- HCI_ROLE_SLAVE);
+ HCI_ROLE_SLAVE, 0, 0);
else
le = hci_connect_le_scan(hdev, dst, dst_type,
BT_SECURITY_LOW,
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 1690ae57a09dbb..a7028d38c1f5cc 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -2874,7 +2874,7 @@ static void hci_cancel_cmd_sync(struct hci_dev *hdev, int err)
cancel_delayed_work_sync(&hdev->ncmd_timer);
atomic_set(&hdev->cmd_cnt, 1);
- hci_cmd_sync_cancel_sync(hdev, -err);
+ hci_cmd_sync_cancel_sync(hdev, err);
}
/* Suspend HCI device */
@@ -2894,7 +2894,7 @@ int hci_suspend_dev(struct hci_dev *hdev)
return 0;
/* Cancel potentially blocking sync operation before suspend */
- hci_cancel_cmd_sync(hdev, -EHOSTDOWN);
+ hci_cancel_cmd_sync(hdev, EHOSTDOWN);
hci_req_sync_lock(hdev);
ret = hci_suspend_sync(hdev);
@@ -4210,7 +4210,7 @@ static void hci_send_cmd_sync(struct hci_dev *hdev, struct sk_buff *skb)
err = hci_send_frame(hdev, skb);
if (err < 0) {
- hci_cmd_sync_cancel_sync(hdev, err);
+ hci_cmd_sync_cancel_sync(hdev, -err);
return;
}
diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c
index 233453807b5099..ce3ff2fa72e58a 100644
--- a/net/bluetooth/hci_debugfs.c
+++ b/net/bluetooth/hci_debugfs.c
@@ -218,10 +218,12 @@ static int conn_info_min_age_set(void *data, u64 val)
{
struct hci_dev *hdev = data;
- if (val == 0 || val > hdev->conn_info_max_age)
+ hci_dev_lock(hdev);
+ if (val == 0 || val > hdev->conn_info_max_age) {
+ hci_dev_unlock(hdev);
return -EINVAL;
+ }
- hci_dev_lock(hdev);
hdev->conn_info_min_age = val;
hci_dev_unlock(hdev);
@@ -246,10 +248,12 @@ static int conn_info_max_age_set(void *data, u64 val)
{
struct hci_dev *hdev = data;
- if (val == 0 || val < hdev->conn_info_min_age)
+ hci_dev_lock(hdev);
+ if (val == 0 || val < hdev->conn_info_min_age) {
+ hci_dev_unlock(hdev);
return -EINVAL;
+ }
- hci_dev_lock(hdev);
hdev->conn_info_max_age = val;
hci_dev_unlock(hdev);
@@ -567,10 +571,12 @@ static int sniff_min_interval_set(void *data, u64 val)
{
struct hci_dev *hdev = data;
- if (val == 0 || val % 2 || val > hdev->sniff_max_interval)
+ hci_dev_lock(hdev);
+ if (val == 0 || val % 2 || val > hdev->sniff_max_interval) {
+ hci_dev_unlock(hdev);
return -EINVAL;
+ }
- hci_dev_lock(hdev);
hdev->sniff_min_interval = val;
hci_dev_unlock(hdev);
@@ -595,10 +601,12 @@ static int sniff_max_interval_set(void *data, u64 val)
{
struct hci_dev *hdev = data;
- if (val == 0 || val % 2 || val < hdev->sniff_min_interval)
+ hci_dev_lock(hdev);
+ if (val == 0 || val % 2 || val < hdev->sniff_min_interval) {
+ hci_dev_unlock(hdev);
return -EINVAL;
+ }
- hci_dev_lock(hdev);
hdev->sniff_max_interval = val;
hci_dev_unlock(hdev);
@@ -850,10 +858,12 @@ static int conn_min_interval_set(void *data, u64 val)
{
struct hci_dev *hdev = data;
- if (val < 0x0006 || val > 0x0c80 || val > hdev->le_conn_max_interval)
+ hci_dev_lock(hdev);
+ if (val < 0x0006 || val > 0x0c80 || val > hdev->le_conn_max_interval) {
+ hci_dev_unlock(hdev);
return -EINVAL;
+ }
- hci_dev_lock(hdev);
hdev->le_conn_min_interval = val;
hci_dev_unlock(hdev);
@@ -878,10 +888,12 @@ static int conn_max_interval_set(void *data, u64 val)
{
struct hci_dev *hdev = data;
- if (val < 0x0006 || val > 0x0c80 || val < hdev->le_conn_min_interval)
+ hci_dev_lock(hdev);
+ if (val < 0x0006 || val > 0x0c80 || val < hdev->le_conn_min_interval) {
+ hci_dev_unlock(hdev);
return -EINVAL;
+ }
- hci_dev_lock(hdev);
hdev->le_conn_max_interval = val;
hci_dev_unlock(hdev);
@@ -990,10 +1002,12 @@ static int adv_min_interval_set(void *data, u64 val)
{
struct hci_dev *hdev = data;
- if (val < 0x0020 || val > 0x4000 || val > hdev->le_adv_max_interval)
+ hci_dev_lock(hdev);
+ if (val < 0x0020 || val > 0x4000 || val > hdev->le_adv_max_interval) {
+ hci_dev_unlock(hdev);
return -EINVAL;
+ }
- hci_dev_lock(hdev);
hdev->le_adv_min_interval = val;
hci_dev_unlock(hdev);
@@ -1018,10 +1032,12 @@ static int adv_max_interval_set(void *data, u64 val)
{
struct hci_dev *hdev = data;
- if (val < 0x0020 || val > 0x4000 || val < hdev->le_adv_min_interval)
+ hci_dev_lock(hdev);
+ if (val < 0x0020 || val > 0x4000 || val < hdev->le_adv_min_interval) {
+ hci_dev_unlock(hdev);
return -EINVAL;
+ }
- hci_dev_lock(hdev);
hdev->le_adv_max_interval = val;
hci_dev_unlock(hdev);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 4ae2248240121c..4a27e4a17a6744 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -3208,6 +3208,31 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data,
if (test_bit(HCI_ENCRYPT, &hdev->flags))
set_bit(HCI_CONN_ENCRYPT, &conn->flags);
+ /* "Link key request" completed ahead of "connect request" completes */
+ if (ev->encr_mode == 1 && !test_bit(HCI_CONN_ENCRYPT, &conn->flags) &&
+ ev->link_type == ACL_LINK) {
+ struct link_key *key;
+ struct hci_cp_read_enc_key_size cp;
+
+ key = hci_find_link_key(hdev, &ev->bdaddr);
+ if (key) {
+ set_bit(HCI_CONN_ENCRYPT, &conn->flags);
+
+ if (!read_key_size_capable(hdev)) {
+ conn->enc_key_size = HCI_LINK_KEY_SIZE;
+ } else {
+ cp.handle = cpu_to_le16(conn->handle);
+ if (hci_send_cmd(hdev, HCI_OP_READ_ENC_KEY_SIZE,
+ sizeof(cp), &cp)) {
+ bt_dev_err(hdev, "sending read key size failed");
+ conn->enc_key_size = HCI_LINK_KEY_SIZE;
+ }
+ }
+
+ hci_encrypt_cfm(conn, ev->status);
+ }
+ }
+
/* Get remote features */
if (conn->type == ACL_LINK) {
struct hci_cp_read_remote_features cp;
@@ -3641,8 +3666,7 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, void *data,
* controller really supports it. If it doesn't, assume
* the default size (16).
*/
- if (!(hdev->commands[20] & 0x10) ||
- test_bit(HCI_QUIRK_BROKEN_READ_ENC_KEY_SIZE, &hdev->quirks)) {
+ if (!read_key_size_capable(hdev)) {
conn->enc_key_size = HCI_LINK_KEY_SIZE;
goto notify;
}
@@ -6013,7 +6037,7 @@ static void hci_le_conn_update_complete_evt(struct hci_dev *hdev, void *data,
static struct hci_conn *check_pending_le_conn(struct hci_dev *hdev,
bdaddr_t *addr,
u8 addr_type, bool addr_resolved,
- u8 adv_type)
+ u8 adv_type, u8 phy, u8 sec_phy)
{
struct hci_conn *conn;
struct hci_conn_params *params;
@@ -6068,7 +6092,7 @@ static struct hci_conn *check_pending_le_conn(struct hci_dev *hdev,
conn = hci_connect_le(hdev, addr, addr_type, addr_resolved,
BT_SECURITY_LOW, hdev->def_le_autoconnect_timeout,
- HCI_ROLE_MASTER);
+ HCI_ROLE_MASTER, phy, sec_phy);
if (!IS_ERR(conn)) {
/* If HCI_AUTO_CONN_EXPLICIT is set, conn is already owned
* by higher layer that tried to connect, if no then
@@ -6103,8 +6127,9 @@ static struct hci_conn *check_pending_le_conn(struct hci_dev *hdev,
static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
u8 bdaddr_type, bdaddr_t *direct_addr,
- u8 direct_addr_type, s8 rssi, u8 *data, u8 len,
- bool ext_adv, bool ctl_time, u64 instant)
+ u8 direct_addr_type, u8 phy, u8 sec_phy, s8 rssi,
+ u8 *data, u8 len, bool ext_adv, bool ctl_time,
+ u64 instant)
{
struct discovery_state *d = &hdev->discovery;
struct smp_irk *irk;
@@ -6192,7 +6217,7 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
* for advertising reports) and is already verified to be RPA above.
*/
conn = check_pending_le_conn(hdev, bdaddr, bdaddr_type, bdaddr_resolved,
- type);
+ type, phy, sec_phy);
if (!ext_adv && conn && type == LE_ADV_IND &&
len <= max_adv_len(hdev)) {
/* Store report for later inclusion by
@@ -6338,7 +6363,8 @@ static void hci_le_adv_report_evt(struct hci_dev *hdev, void *data,
if (info->length <= max_adv_len(hdev)) {
rssi = info->data[info->length];
process_adv_report(hdev, info->type, &info->bdaddr,
- info->bdaddr_type, NULL, 0, rssi,
+ info->bdaddr_type, NULL, 0,
+ HCI_ADV_PHY_1M, 0, rssi,
info->data, info->length, false,
false, instant);
} else {
@@ -6423,6 +6449,8 @@ static void hci_le_ext_adv_report_evt(struct hci_dev *hdev, void *data,
if (legacy_evt_type != LE_ADV_INVALID) {
process_adv_report(hdev, legacy_evt_type, &info->bdaddr,
info->bdaddr_type, NULL, 0,
+ info->primary_phy,
+ info->secondary_phy,
info->rssi, info->data, info->length,
!(evt_type & LE_EXT_ADV_LEGACY_PDU),
false, instant);
@@ -6705,8 +6733,8 @@ static void hci_le_direct_adv_report_evt(struct hci_dev *hdev, void *data,
process_adv_report(hdev, info->type, &info->bdaddr,
info->bdaddr_type, &info->direct_addr,
- info->direct_addr_type, info->rssi, NULL, 0,
- false, false, instant);
+ info->direct_addr_type, HCI_ADV_PHY_1M, 0,
+ info->rssi, NULL, 0, false, false, instant);
}
hci_dev_unlock(hdev);
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index 00e02138003ece..efea25eb56ce03 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -105,8 +105,10 @@ void hci_req_sync_complete(struct hci_dev *hdev, u8 result, u16 opcode,
if (hdev->req_status == HCI_REQ_PEND) {
hdev->req_result = result;
hdev->req_status = HCI_REQ_DONE;
- if (skb)
+ if (skb) {
+ kfree_skb(hdev->req_skb);
hdev->req_skb = skb_get(skb);
+ }
wake_up_interruptible(&hdev->req_wait_q);
}
}
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 4ee1b976678b25..703b84bd48d5be 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -1946,10 +1946,9 @@ static int hci_sock_setsockopt_old(struct socket *sock, int level, int optname,
switch (optname) {
case HCI_DATA_DIR:
- if (copy_from_sockptr(&opt, optval, sizeof(opt))) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, len);
+ if (err)
break;
- }
if (opt)
hci_pi(sk)->cmsg_mask |= HCI_CMSG_DIR;
@@ -1958,10 +1957,9 @@ static int hci_sock_setsockopt_old(struct socket *sock, int level, int optname,
break;
case HCI_TIME_STAMP:
- if (copy_from_sockptr(&opt, optval, sizeof(opt))) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, len);
+ if (err)
break;
- }
if (opt)
hci_pi(sk)->cmsg_mask |= HCI_CMSG_TSTAMP;
@@ -1979,11 +1977,9 @@ static int hci_sock_setsockopt_old(struct socket *sock, int level, int optname,
uf.event_mask[1] = *((u32 *) f->event_mask + 1);
}
- len = min_t(unsigned int, len, sizeof(uf));
- if (copy_from_sockptr(&uf, optval, len)) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&uf, sizeof(uf), optval, len);
+ if (err)
break;
- }
if (!capable(CAP_NET_RAW)) {
uf.type_mask &= hci_sec_filter.type_mask;
@@ -2042,10 +2038,9 @@ static int hci_sock_setsockopt(struct socket *sock, int level, int optname,
goto done;
}
- if (copy_from_sockptr(&opt, optval, sizeof(opt))) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, len);
+ if (err)
break;
- }
hci_pi(sk)->mtu = opt;
break;
diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
index f6b662369322b3..4c707eb64e6f63 100644
--- a/net/bluetooth/hci_sync.c
+++ b/net/bluetooth/hci_sync.c
@@ -617,7 +617,10 @@ void hci_cmd_sync_cancel_sync(struct hci_dev *hdev, int err)
bt_dev_dbg(hdev, "err 0x%2.2x", err);
if (hdev->req_status == HCI_REQ_PEND) {
- hdev->req_result = err;
+ /* req_result is __u32 so error must be positive to be properly
+ * propagated.
+ */
+ hdev->req_result = err < 0 ? -err : err;
hdev->req_status = HCI_REQ_CANCELED;
wake_up_interruptible(&hdev->req_wait_q);
@@ -2811,8 +2814,8 @@ static int hci_le_set_ext_scan_param_sync(struct hci_dev *hdev, u8 type,
if (qos->bcast.in.phy & BT_ISO_PHY_CODED) {
cp->scanning_phys |= LE_SCAN_PHY_CODED;
hci_le_scan_phy_params(phy, type,
- interval,
- window);
+ interval * 3,
+ window * 3);
num_phy++;
phy++;
}
@@ -2832,7 +2835,7 @@ static int hci_le_set_ext_scan_param_sync(struct hci_dev *hdev, u8 type,
if (scan_coded(hdev)) {
cp->scanning_phys |= LE_SCAN_PHY_CODED;
- hci_le_scan_phy_params(phy, type, interval, window);
+ hci_le_scan_phy_params(phy, type, interval * 3, window * 3);
num_phy++;
phy++;
}
@@ -3416,7 +3419,10 @@ static void hci_dev_get_bd_addr_from_property(struct hci_dev *hdev)
if (ret < 0 || !bacmp(&ba, BDADDR_ANY))
return;
- bacpy(&hdev->public_addr, &ba);
+ if (test_bit(HCI_QUIRK_BDADDR_PROPERTY_BROKEN, &hdev->quirks))
+ baswap(&hdev->public_addr, &ba);
+ else
+ bacpy(&hdev->public_addr, &ba);
}
struct hci_init_stage {
@@ -6340,7 +6346,8 @@ static int hci_le_ext_create_conn_sync(struct hci_dev *hdev,
plen = sizeof(*cp);
- if (scan_1m(hdev)) {
+ if (scan_1m(hdev) && (conn->le_adv_phy == HCI_ADV_PHY_1M ||
+ conn->le_adv_sec_phy == HCI_ADV_PHY_1M)) {
cp->phys |= LE_SCAN_PHY_1M;
set_ext_conn_params(conn, p);
@@ -6348,7 +6355,8 @@ static int hci_le_ext_create_conn_sync(struct hci_dev *hdev,
plen += sizeof(*p);
}
- if (scan_2m(hdev)) {
+ if (scan_2m(hdev) && (conn->le_adv_phy == HCI_ADV_PHY_2M ||
+ conn->le_adv_sec_phy == HCI_ADV_PHY_2M)) {
cp->phys |= LE_SCAN_PHY_2M;
set_ext_conn_params(conn, p);
@@ -6356,7 +6364,8 @@ static int hci_le_ext_create_conn_sync(struct hci_dev *hdev,
plen += sizeof(*p);
}
- if (scan_coded(hdev)) {
+ if (scan_coded(hdev) && (conn->le_adv_phy == HCI_ADV_PHY_CODED ||
+ conn->le_adv_sec_phy == HCI_ADV_PHY_CODED)) {
cp->phys |= LE_SCAN_PHY_CODED;
set_ext_conn_params(conn, p);
diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c
index c8793e57f4b547..ef0cc80b4c0cc1 100644
--- a/net/bluetooth/iso.c
+++ b/net/bluetooth/iso.c
@@ -1451,8 +1451,8 @@ static bool check_ucast_qos(struct bt_iso_qos *qos)
static bool check_bcast_qos(struct bt_iso_qos *qos)
{
- if (qos->bcast.sync_factor == 0x00)
- return false;
+ if (!qos->bcast.sync_factor)
+ qos->bcast.sync_factor = 0x01;
if (qos->bcast.packing > 0x01)
return false;
@@ -1475,6 +1475,9 @@ static bool check_bcast_qos(struct bt_iso_qos *qos)
if (qos->bcast.skip > 0x01f3)
return false;
+ if (!qos->bcast.sync_timeout)
+ qos->bcast.sync_timeout = BT_ISO_SYNC_TIMEOUT;
+
if (qos->bcast.sync_timeout < 0x000a || qos->bcast.sync_timeout > 0x4000)
return false;
@@ -1484,6 +1487,9 @@ static bool check_bcast_qos(struct bt_iso_qos *qos)
if (qos->bcast.mse > 0x1f)
return false;
+ if (!qos->bcast.timeout)
+ qos->bcast.sync_timeout = BT_ISO_SYNC_TIMEOUT;
+
if (qos->bcast.timeout < 0x000a || qos->bcast.timeout > 0x4000)
return false;
@@ -1494,7 +1500,7 @@ static int iso_sock_setsockopt(struct socket *sock, int level, int optname,
sockptr_t optval, unsigned int optlen)
{
struct sock *sk = sock->sk;
- int len, err = 0;
+ int err = 0;
struct bt_iso_qos qos = default_qos;
u32 opt;
@@ -1509,10 +1515,9 @@ static int iso_sock_setsockopt(struct socket *sock, int level, int optname,
break;
}
- if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen);
+ if (err)
break;
- }
if (opt)
set_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags);
@@ -1521,10 +1526,9 @@ static int iso_sock_setsockopt(struct socket *sock, int level, int optname,
break;
case BT_PKT_STATUS:
- if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen);
+ if (err)
break;
- }
if (opt)
set_bit(BT_SK_PKT_STATUS, &bt_sk(sk)->flags);
@@ -1539,17 +1543,9 @@ static int iso_sock_setsockopt(struct socket *sock, int level, int optname,
break;
}
- len = min_t(unsigned int, sizeof(qos), optlen);
-
- if (copy_from_sockptr(&qos, optval, len)) {
- err = -EFAULT;
- break;
- }
-
- if (len == sizeof(qos.ucast) && !check_ucast_qos(&qos)) {
- err = -EINVAL;
+ err = bt_copy_from_sockptr(&qos, sizeof(qos), optval, optlen);
+ if (err)
break;
- }
iso_pi(sk)->qos = qos;
iso_pi(sk)->qos_user_set = true;
@@ -1564,18 +1560,16 @@ static int iso_sock_setsockopt(struct socket *sock, int level, int optname,
}
if (optlen > sizeof(iso_pi(sk)->base)) {
- err = -EOVERFLOW;
+ err = -EINVAL;
break;
}
- len = min_t(unsigned int, sizeof(iso_pi(sk)->base), optlen);
-
- if (copy_from_sockptr(iso_pi(sk)->base, optval, len)) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(iso_pi(sk)->base, optlen, optval,
+ optlen);
+ if (err)
break;
- }
- iso_pi(sk)->base_len = len;
+ iso_pi(sk)->base_len = optlen;
break;
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 467b242d8be071..84fc70862d78ae 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -4054,8 +4054,7 @@ static int l2cap_connect_req(struct l2cap_conn *conn,
return -EPROTO;
hci_dev_lock(hdev);
- if (hci_dev_test_flag(hdev, HCI_MGMT) &&
- !test_and_set_bit(HCI_CONN_MGMT_CONNECTED, &hcon->flags))
+ if (hci_dev_test_flag(hdev, HCI_MGMT))
mgmt_device_connected(hdev, hcon, NULL, 0);
hci_dev_unlock(hdev);
@@ -7019,7 +7018,7 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid,
if (hci_dev_test_flag(hdev, HCI_ADVERTISING))
hcon = hci_connect_le(hdev, dst, dst_type, false,
chan->sec_level, timeout,
- HCI_ROLE_SLAVE);
+ HCI_ROLE_SLAVE, 0, 0);
else
hcon = hci_connect_le_scan(hdev, dst, dst_type,
chan->sec_level, timeout,
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index 4287aa6cc988e3..5cc83f906c123f 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -439,7 +439,8 @@ static int l2cap_sock_getsockopt_old(struct socket *sock, int optname,
struct l2cap_chan *chan = l2cap_pi(sk)->chan;
struct l2cap_options opts;
struct l2cap_conninfo cinfo;
- int len, err = 0;
+ int err = 0;
+ size_t len;
u32 opt;
BT_DBG("sk %p", sk);
@@ -486,7 +487,7 @@ static int l2cap_sock_getsockopt_old(struct socket *sock, int optname,
BT_DBG("mode 0x%2.2x", chan->mode);
- len = min_t(unsigned int, len, sizeof(opts));
+ len = min(len, sizeof(opts));
if (copy_to_user(optval, (char *) &opts, len))
err = -EFAULT;
@@ -536,7 +537,7 @@ static int l2cap_sock_getsockopt_old(struct socket *sock, int optname,
cinfo.hci_handle = chan->conn->hcon->handle;
memcpy(cinfo.dev_class, chan->conn->hcon->dev_class, 3);
- len = min_t(unsigned int, len, sizeof(cinfo));
+ len = min(len, sizeof(cinfo));
if (copy_to_user(optval, (char *) &cinfo, len))
err = -EFAULT;
@@ -727,7 +728,7 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname,
struct sock *sk = sock->sk;
struct l2cap_chan *chan = l2cap_pi(sk)->chan;
struct l2cap_options opts;
- int len, err = 0;
+ int err = 0;
u32 opt;
BT_DBG("sk %p", sk);
@@ -754,11 +755,9 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname,
opts.max_tx = chan->max_tx;
opts.txwin_size = chan->tx_win;
- len = min_t(unsigned int, sizeof(opts), optlen);
- if (copy_from_sockptr(&opts, optval, len)) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&opts, sizeof(opts), optval, optlen);
+ if (err)
break;
- }
if (opts.txwin_size > L2CAP_DEFAULT_EXT_WINDOW) {
err = -EINVAL;
@@ -801,10 +800,9 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname,
break;
case L2CAP_LM:
- if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen);
+ if (err)
break;
- }
if (opt & L2CAP_LM_FIPS) {
err = -EINVAL;
@@ -885,7 +883,7 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
struct bt_security sec;
struct bt_power pwr;
struct l2cap_conn *conn;
- int len, err = 0;
+ int err = 0;
u32 opt;
u16 mtu;
u8 mode;
@@ -911,11 +909,9 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
sec.level = BT_SECURITY_LOW;
- len = min_t(unsigned int, sizeof(sec), optlen);
- if (copy_from_sockptr(&sec, optval, len)) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&sec, sizeof(sec), optval, optlen);
+ if (err)
break;
- }
if (sec.level < BT_SECURITY_LOW ||
sec.level > BT_SECURITY_FIPS) {
@@ -960,10 +956,9 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
break;
}
- if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen);
+ if (err)
break;
- }
if (opt) {
set_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags);
@@ -975,10 +970,9 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
break;
case BT_FLUSHABLE:
- if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen);
+ if (err)
break;
- }
if (opt > BT_FLUSHABLE_ON) {
err = -EINVAL;
@@ -1010,11 +1004,9 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
pwr.force_active = BT_POWER_FORCE_ACTIVE_ON;
- len = min_t(unsigned int, sizeof(pwr), optlen);
- if (copy_from_sockptr(&pwr, optval, len)) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&pwr, sizeof(pwr), optval, optlen);
+ if (err)
break;
- }
if (pwr.force_active)
set_bit(FLAG_FORCE_ACTIVE, &chan->flags);
@@ -1023,10 +1015,9 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
break;
case BT_CHANNEL_POLICY:
- if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen);
+ if (err)
break;
- }
err = -EOPNOTSUPP;
break;
@@ -1055,10 +1046,9 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
break;
}
- if (copy_from_sockptr(&mtu, optval, sizeof(u16))) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&mtu, sizeof(mtu), optval, optlen);
+ if (err)
break;
- }
if (chan->mode == L2CAP_MODE_EXT_FLOWCTL &&
sk->sk_state == BT_CONNECTED)
@@ -1086,10 +1076,9 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
break;
}
- if (copy_from_sockptr(&mode, optval, sizeof(u8))) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&mode, sizeof(mode), optval, optlen);
+ if (err)
break;
- }
BT_DBG("mode %u", mode);
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 32ed6e9245a307..965f621ef865ad 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -2623,7 +2623,11 @@ static int add_uuid(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
goto failed;
}
- err = hci_cmd_sync_queue(hdev, add_uuid_sync, cmd, mgmt_class_complete);
+ /* MGMT_OP_ADD_UUID don't require adapter the UP/Running so use
+ * hci_cmd_sync_submit instead of hci_cmd_sync_queue.
+ */
+ err = hci_cmd_sync_submit(hdev, add_uuid_sync, cmd,
+ mgmt_class_complete);
if (err < 0) {
mgmt_pending_free(cmd);
goto failed;
@@ -2717,8 +2721,11 @@ update_class:
goto unlock;
}
- err = hci_cmd_sync_queue(hdev, remove_uuid_sync, cmd,
- mgmt_class_complete);
+ /* MGMT_OP_REMOVE_UUID don't require adapter the UP/Running so use
+ * hci_cmd_sync_submit instead of hci_cmd_sync_queue.
+ */
+ err = hci_cmd_sync_submit(hdev, remove_uuid_sync, cmd,
+ mgmt_class_complete);
if (err < 0)
mgmt_pending_free(cmd);
@@ -2784,8 +2791,11 @@ static int set_dev_class(struct sock *sk, struct hci_dev *hdev, void *data,
goto unlock;
}
- err = hci_cmd_sync_queue(hdev, set_class_sync, cmd,
- mgmt_class_complete);
+ /* MGMT_OP_SET_DEV_CLASS don't require adapter the UP/Running so use
+ * hci_cmd_sync_submit instead of hci_cmd_sync_queue.
+ */
+ err = hci_cmd_sync_submit(hdev, set_class_sync, cmd,
+ mgmt_class_complete);
if (err < 0)
mgmt_pending_free(cmd);
@@ -5475,8 +5485,8 @@ static int remove_adv_monitor(struct sock *sk, struct hci_dev *hdev,
goto unlock;
}
- err = hci_cmd_sync_queue(hdev, mgmt_remove_adv_monitor_sync, cmd,
- mgmt_remove_adv_monitor_complete);
+ err = hci_cmd_sync_submit(hdev, mgmt_remove_adv_monitor_sync, cmd,
+ mgmt_remove_adv_monitor_complete);
if (err) {
mgmt_pending_remove(cmd);
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index b54e8a530f55a1..29aa07e9db9d71 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -629,7 +629,7 @@ static int rfcomm_sock_setsockopt_old(struct socket *sock, int optname,
switch (optname) {
case RFCOMM_LM:
- if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
+ if (bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen)) {
err = -EFAULT;
break;
}
@@ -664,7 +664,6 @@ static int rfcomm_sock_setsockopt(struct socket *sock, int level, int optname,
struct sock *sk = sock->sk;
struct bt_security sec;
int err = 0;
- size_t len;
u32 opt;
BT_DBG("sk %p", sk);
@@ -686,11 +685,9 @@ static int rfcomm_sock_setsockopt(struct socket *sock, int level, int optname,
sec.level = BT_SECURITY_LOW;
- len = min_t(unsigned int, sizeof(sec), optlen);
- if (copy_from_sockptr(&sec, optval, len)) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&sec, sizeof(sec), optval, optlen);
+ if (err)
break;
- }
if (sec.level > BT_SECURITY_HIGH) {
err = -EINVAL;
@@ -706,10 +703,9 @@ static int rfcomm_sock_setsockopt(struct socket *sock, int level, int optname,
break;
}
- if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen);
+ if (err)
break;
- }
if (opt)
set_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags);
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 43daf965a01e4a..5d03c5440b06f8 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -824,7 +824,7 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname,
sockptr_t optval, unsigned int optlen)
{
struct sock *sk = sock->sk;
- int len, err = 0;
+ int err = 0;
struct bt_voice voice;
u32 opt;
struct bt_codecs *codecs;
@@ -843,10 +843,9 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname,
break;
}
- if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen);
+ if (err)
break;
- }
if (opt)
set_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags);
@@ -863,11 +862,10 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname,
voice.setting = sco_pi(sk)->setting;
- len = min_t(unsigned int, sizeof(voice), optlen);
- if (copy_from_sockptr(&voice, optval, len)) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&voice, sizeof(voice), optval,
+ optlen);
+ if (err)
break;
- }
/* Explicitly check for these values */
if (voice.setting != BT_VOICE_TRANSPARENT &&
@@ -890,10 +888,9 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname,
break;
case BT_PKT_STATUS:
- if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen);
+ if (err)
break;
- }
if (opt)
set_bit(BT_SK_PKT_STATUS, &bt_sk(sk)->flags);
@@ -934,9 +931,9 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname,
break;
}
- if (copy_from_sockptr(buffer, optval, optlen)) {
+ err = bt_copy_from_sockptr(buffer, optlen, optval, optlen);
+ if (err) {
hci_dev_put(hdev);
- err = -EFAULT;
break;
}
@@ -967,7 +964,8 @@ static int sco_sock_getsockopt_old(struct socket *sock, int optname,
struct sock *sk = sock->sk;
struct sco_options opts;
struct sco_conninfo cinfo;
- int len, err = 0;
+ int err = 0;
+ size_t len;
BT_DBG("sk %p", sk);
@@ -989,7 +987,7 @@ static int sco_sock_getsockopt_old(struct socket *sock, int optname,
BT_DBG("mtu %u", opts.mtu);
- len = min_t(unsigned int, len, sizeof(opts));
+ len = min(len, sizeof(opts));
if (copy_to_user(optval, (char *)&opts, len))
err = -EFAULT;
@@ -1007,7 +1005,7 @@ static int sco_sock_getsockopt_old(struct socket *sock, int optname,
cinfo.hci_handle = sco_pi(sk)->conn->hcon->handle;
memcpy(cinfo.dev_class, sco_pi(sk)->conn->hcon->dev_class, 3);
- len = min_t(unsigned int, len, sizeof(cinfo));
+ len = min(len, sizeof(cinfo));
if (copy_to_user(optval, (char *)&cinfo, len))
err = -EFAULT;
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index f21097e7348278..ceaa5a89b947fc 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -30,7 +30,7 @@ br_netif_receive_skb(struct net *net, struct sock *sk, struct sk_buff *skb)
return netif_receive_skb(skb);
}
-static int br_pass_frame_up(struct sk_buff *skb)
+static int br_pass_frame_up(struct sk_buff *skb, bool promisc)
{
struct net_device *indev, *brdev = BR_INPUT_SKB_CB(skb)->brdev;
struct net_bridge *br = netdev_priv(brdev);
@@ -65,6 +65,8 @@ static int br_pass_frame_up(struct sk_buff *skb)
br_multicast_count(br, NULL, skb, br_multicast_igmp_type(skb),
BR_MCAST_DIR_TX);
+ BR_INPUT_SKB_CB(skb)->promisc = promisc;
+
return NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN,
dev_net(indev), NULL, skb, indev, NULL,
br_netif_receive_skb);
@@ -82,6 +84,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
struct net_bridge_mcast *brmctx;
struct net_bridge_vlan *vlan;
struct net_bridge *br;
+ bool promisc;
u16 vid = 0;
u8 state;
@@ -137,7 +140,9 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
if (p->flags & BR_LEARNING)
br_fdb_update(br, p, eth_hdr(skb)->h_source, vid, 0);
- local_rcv = !!(br->dev->flags & IFF_PROMISC);
+ promisc = !!(br->dev->flags & IFF_PROMISC);
+ local_rcv = promisc;
+
if (is_multicast_ether_addr(eth_hdr(skb)->h_dest)) {
/* by definition the broadcast is also a multicast address */
if (is_broadcast_ether_addr(eth_hdr(skb)->h_dest)) {
@@ -200,7 +205,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
unsigned long now = jiffies;
if (test_bit(BR_FDB_LOCAL, &dst->flags))
- return br_pass_frame_up(skb);
+ return br_pass_frame_up(skb, false);
if (now != dst->used)
dst->used = now;
@@ -213,7 +218,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
}
if (local_rcv)
- return br_pass_frame_up(skb);
+ return br_pass_frame_up(skb, promisc);
out:
return 0;
@@ -386,6 +391,8 @@ static rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
goto forward;
}
+ BR_INPUT_SKB_CB(skb)->promisc = false;
+
/* The else clause should be hit when nf_hook():
* - returns < 0 (drop/error)
* - returns = 0 (stolen/nf_queue)
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 35e10c5a766d55..22e35623c148ac 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -600,11 +600,17 @@ static unsigned int br_nf_local_in(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
+ bool promisc = BR_INPUT_SKB_CB(skb)->promisc;
struct nf_conntrack *nfct = skb_nfct(skb);
const struct nf_ct_hook *ct_hook;
struct nf_conn *ct;
int ret;
+ if (promisc) {
+ nf_reset_ct(skb);
+ return NF_ACCEPT;
+ }
+
if (!nfct || skb->pkt_type == PACKET_HOST)
return NF_ACCEPT;
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 2cf4fc75626399..f17dbac7d82843 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -667,7 +667,7 @@ void br_ifinfo_notify(int event, const struct net_bridge *br,
{
u32 filter = RTEXT_FILTER_BRVLAN_COMPRESSED;
- return br_info_notify(event, br, port, filter);
+ br_info_notify(event, br, port, filter);
}
/*
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 86ea5e6689b5ce..d4bedc87b1d8f1 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -589,6 +589,7 @@ struct br_input_skb_cb {
#endif
u8 proxyarp_replied:1;
u8 src_port_isolated:1;
+ u8 promisc:1;
#ifdef CONFIG_BRIDGE_VLAN_FILTERING
u8 vlan_filtered:1;
#endif
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 99d82676f780ac..cbd0e3586c3f61 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1111,6 +1111,8 @@ static int do_replace(struct net *net, sockptr_t arg, unsigned int len)
struct ebt_table_info *newinfo;
struct ebt_replace tmp;
+ if (len < sizeof(tmp))
+ return -EINVAL;
if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
return -EFAULT;
@@ -1423,6 +1425,8 @@ static int update_counters(struct net *net, sockptr_t arg, unsigned int len)
{
struct ebt_replace hlp;
+ if (len < sizeof(hlp))
+ return -EINVAL;
if (copy_from_sockptr(&hlp, arg, sizeof(hlp)))
return -EFAULT;
@@ -2352,6 +2356,8 @@ static int compat_update_counters(struct net *net, sockptr_t arg,
{
struct compat_ebt_replace hlp;
+ if (len < sizeof(hlp))
+ return -EINVAL;
if (copy_from_sockptr(&hlp, arg, sizeof(hlp)))
return -EFAULT;
diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c
index 6f877e31709bad..c3c51b9a68265b 100644
--- a/net/bridge/netfilter/nf_conntrack_bridge.c
+++ b/net/bridge/netfilter/nf_conntrack_bridge.c
@@ -294,18 +294,24 @@ static unsigned int nf_ct_bridge_pre(void *priv, struct sk_buff *skb,
static unsigned int nf_ct_bridge_in(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
- enum ip_conntrack_info ctinfo;
+ bool promisc = BR_INPUT_SKB_CB(skb)->promisc;
+ struct nf_conntrack *nfct = skb_nfct(skb);
struct nf_conn *ct;
- if (skb->pkt_type == PACKET_HOST)
+ if (promisc) {
+ nf_reset_ct(skb);
+ return NF_ACCEPT;
+ }
+
+ if (!nfct || skb->pkt_type == PACKET_HOST)
return NF_ACCEPT;
/* nf_conntrack_confirm() cannot handle concurrent clones,
* this happens for broad/multicast frames with e.g. macvlan on top
* of the bridge device.
*/
- ct = nf_ct_get(skb, &ctinfo);
- if (!ct || nf_ct_is_confirmed(ct) || nf_ct_is_template(ct))
+ ct = container_of(nfct, struct nf_conn, ct_general);
+ if (nf_ct_is_confirmed(ct) || nf_ct_is_template(ct))
return NF_ACCEPT;
/* let inet prerouting call conntrack again */
diff --git a/net/core/dev.c b/net/core/dev.c
index 9a67003e49db87..331848eca7d310 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -429,7 +429,7 @@ EXPORT_PER_CPU_SYMBOL(softnet_data);
* PP consumers must pay attention to run APIs in the appropriate context
* (e.g. NAPI context).
*/
-static DEFINE_PER_CPU_ALIGNED(struct page_pool *, system_page_pool);
+static DEFINE_PER_CPU(struct page_pool *, system_page_pool);
#ifdef CONFIG_LOCKDEP
/*
@@ -3775,6 +3775,10 @@ no_lock_out:
return rc;
}
+ if (unlikely(READ_ONCE(q->owner) == smp_processor_id())) {
+ kfree_skb_reason(skb, SKB_DROP_REASON_TC_RECLASSIFY_LOOP);
+ return NET_XMIT_DROP;
+ }
/*
* Heuristic to force contended enqueues to serialize on a
* separate lock before trying to get qdisc main lock.
@@ -3814,7 +3818,9 @@ no_lock_out:
qdisc_run_end(q);
rc = NET_XMIT_SUCCESS;
} else {
+ WRITE_ONCE(q->owner, smp_processor_id());
rc = dev_qdisc_enqueue(skb, q, &to_free, txq);
+ WRITE_ONCE(q->owner, -1);
if (qdisc_run_begin(q)) {
if (unlikely(contended)) {
spin_unlock(&q->busylock);
diff --git a/net/core/filter.c b/net/core/filter.c
index 8adf95765cdd96..ae5254f712c94b 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4360,10 +4360,12 @@ static __always_inline int __xdp_do_redirect_frame(struct bpf_redirect_info *ri,
enum bpf_map_type map_type = ri->map_type;
void *fwd = ri->tgt_value;
u32 map_id = ri->map_id;
+ u32 flags = ri->flags;
struct bpf_map *map;
int err;
ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */
+ ri->flags = 0;
ri->map_type = BPF_MAP_TYPE_UNSPEC;
if (unlikely(!xdpf)) {
@@ -4375,11 +4377,20 @@ static __always_inline int __xdp_do_redirect_frame(struct bpf_redirect_info *ri,
case BPF_MAP_TYPE_DEVMAP:
fallthrough;
case BPF_MAP_TYPE_DEVMAP_HASH:
- map = READ_ONCE(ri->map);
- if (unlikely(map)) {
+ if (unlikely(flags & BPF_F_BROADCAST)) {
+ map = READ_ONCE(ri->map);
+
+ /* The map pointer is cleared when the map is being torn
+ * down by bpf_clear_redirect_map()
+ */
+ if (unlikely(!map)) {
+ err = -ENOENT;
+ break;
+ }
+
WRITE_ONCE(ri->map, NULL);
err = dev_map_enqueue_multi(xdpf, dev, map,
- ri->flags & BPF_F_EXCLUDE_INGRESS);
+ flags & BPF_F_EXCLUDE_INGRESS);
} else {
err = dev_map_enqueue(fwd, xdpf, dev);
}
@@ -4442,9 +4453,9 @@ EXPORT_SYMBOL_GPL(xdp_do_redirect_frame);
static int xdp_do_generic_redirect_map(struct net_device *dev,
struct sk_buff *skb,
struct xdp_buff *xdp,
- struct bpf_prog *xdp_prog,
- void *fwd,
- enum bpf_map_type map_type, u32 map_id)
+ struct bpf_prog *xdp_prog, void *fwd,
+ enum bpf_map_type map_type, u32 map_id,
+ u32 flags)
{
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
struct bpf_map *map;
@@ -4454,11 +4465,20 @@ static int xdp_do_generic_redirect_map(struct net_device *dev,
case BPF_MAP_TYPE_DEVMAP:
fallthrough;
case BPF_MAP_TYPE_DEVMAP_HASH:
- map = READ_ONCE(ri->map);
- if (unlikely(map)) {
+ if (unlikely(flags & BPF_F_BROADCAST)) {
+ map = READ_ONCE(ri->map);
+
+ /* The map pointer is cleared when the map is being torn
+ * down by bpf_clear_redirect_map()
+ */
+ if (unlikely(!map)) {
+ err = -ENOENT;
+ break;
+ }
+
WRITE_ONCE(ri->map, NULL);
err = dev_map_redirect_multi(dev, skb, xdp_prog, map,
- ri->flags & BPF_F_EXCLUDE_INGRESS);
+ flags & BPF_F_EXCLUDE_INGRESS);
} else {
err = dev_map_generic_redirect(fwd, skb, xdp_prog);
}
@@ -4495,9 +4515,11 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
enum bpf_map_type map_type = ri->map_type;
void *fwd = ri->tgt_value;
u32 map_id = ri->map_id;
+ u32 flags = ri->flags;
int err;
ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */
+ ri->flags = 0;
ri->map_type = BPF_MAP_TYPE_UNSPEC;
if (map_type == BPF_MAP_TYPE_UNSPEC && map_id == INT_MAX) {
@@ -4517,7 +4539,7 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
return 0;
}
- return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog, fwd, map_type, map_id);
+ return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog, fwd, map_type, map_id, flags);
err:
_trace_xdp_redirect_err(dev, xdp_prog, ri->tgt_index, err);
return err;
diff --git a/net/core/gro.c b/net/core/gro.c
index ee30d4f0c03876..83f35d99a682c2 100644
--- a/net/core/gro.c
+++ b/net/core/gro.c
@@ -192,8 +192,9 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
}
merge:
- /* sk owenrship - if any - completely transferred to the aggregated packet */
+ /* sk ownership - if any - completely transferred to the aggregated packet */
skb->destructor = NULL;
+ skb->sk = NULL;
delta_truesize = skb->truesize;
if (offset > headlen) {
unsigned int eat = offset - headlen;
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 4d75ef9d24bfa7..fd20aae30be23c 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -1226,11 +1226,8 @@ static void sk_psock_verdict_data_ready(struct sock *sk)
rcu_read_lock();
psock = sk_psock(sk);
- if (psock) {
- read_lock_bh(&sk->sk_callback_lock);
+ if (psock)
sk_psock_data_ready(sk, psock);
- read_unlock_bh(&sk->sk_callback_lock);
- }
rcu_read_unlock();
}
}
diff --git a/net/core/sock.c b/net/core/sock.c
index 43bf3818c19e82..0963689a59506a 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -482,7 +482,7 @@ int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
unsigned long flags;
struct sk_buff_head *list = &sk->sk_receive_queue;
- if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) {
+ if (atomic_read(&sk->sk_rmem_alloc) >= READ_ONCE(sk->sk_rcvbuf)) {
atomic_inc(&sk->sk_drops);
trace_sock_rcvqueue_full(sk, skb);
return -ENOMEM;
@@ -552,7 +552,7 @@ int __sk_receive_skb(struct sock *sk, struct sk_buff *skb,
skb->dev = NULL;
- if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) {
+ if (sk_rcvqueues_full(sk, READ_ONCE(sk->sk_rcvbuf))) {
atomic_inc(&sk->sk_drops);
goto discard_and_relse;
}
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 27d733c0f65e16..8598466a380578 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -411,6 +411,9 @@ static int __sock_map_delete(struct bpf_stab *stab, struct sock *sk_test,
struct sock *sk;
int err = 0;
+ if (irqs_disabled())
+ return -EOPNOTSUPP; /* locks here are hardirq-unsafe */
+
spin_lock_bh(&stab->lock);
sk = *psk;
if (!sk_test || sk_test == sk)
@@ -933,6 +936,9 @@ static long sock_hash_delete_elem(struct bpf_map *map, void *key)
struct bpf_shtab_elem *elem;
int ret = -ENOENT;
+ if (irqs_disabled())
+ return -EOPNOTSUPP; /* locks here are hardirq-unsafe */
+
hash = sock_hash_bucket_hash(key, key_size);
bucket = sock_hash_select_bucket(htab, hash);
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 2edc8b796a4e73..049c3adeb85044 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -164,17 +164,7 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
eth = (struct ethhdr *)skb->data;
skb_pull_inline(skb, ETH_HLEN);
- if (unlikely(!ether_addr_equal_64bits(eth->h_dest,
- dev->dev_addr))) {
- if (unlikely(is_multicast_ether_addr_64bits(eth->h_dest))) {
- if (ether_addr_equal_64bits(eth->h_dest, dev->broadcast))
- skb->pkt_type = PACKET_BROADCAST;
- else
- skb->pkt_type = PACKET_MULTICAST;
- } else {
- skb->pkt_type = PACKET_OTHERHOST;
- }
- }
+ eth_skb_pkt_type(skb, dev);
/*
* Some variants of DSA tagging don't have an ethertype field
diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index c98b5b71ad7c32..e9d45133d6412e 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -132,30 +132,29 @@ static int hsr_dev_open(struct net_device *dev)
{
struct hsr_priv *hsr;
struct hsr_port *port;
- char designation;
+ const char *designation = NULL;
hsr = netdev_priv(dev);
- designation = '\0';
hsr_for_each_port(hsr, port) {
if (port->type == HSR_PT_MASTER)
continue;
switch (port->type) {
case HSR_PT_SLAVE_A:
- designation = 'A';
+ designation = "Slave A";
break;
case HSR_PT_SLAVE_B:
- designation = 'B';
+ designation = "Slave B";
break;
default:
- designation = '?';
+ designation = "Unknown";
}
if (!is_slave_up(port->dev))
- netdev_warn(dev, "Slave %c (%s) is not up; please bring it up to get a fully working HSR network\n",
+ netdev_warn(dev, "%s (%s) is not up; please bring it up to get a fully working HSR network\n",
designation, port->dev->name);
}
- if (designation == '\0')
+ if (!designation)
netdev_warn(dev, "No slave devices configured\n");
return 0;
diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c
index e5742f2a2d522a..1b6457f357bdb2 100644
--- a/net/hsr/hsr_slave.c
+++ b/net/hsr/hsr_slave.c
@@ -220,7 +220,8 @@ void hsr_del_port(struct hsr_port *port)
netdev_update_features(master->dev);
dev_set_mtu(master->dev, hsr_get_max_mtu(hsr));
netdev_rx_handler_unregister(port->dev);
- dev_set_promiscuity(port->dev, -1);
+ if (!port->hsr->fwd_offloaded)
+ dev_set_promiscuity(port->dev, -1);
netdev_upper_dev_unlink(port->dev, master->dev);
}
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 48741352a88a72..c484b1c0fc00a7 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1050,6 +1050,11 @@ next:
e++;
}
}
+
+ /* Don't let NLM_DONE coalesce into a message, even if it could.
+ * Some user space expects NLM_DONE in a separate recv().
+ */
+ err = skb->len;
out:
cb->args[1] = e;
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index e63a3bf9961762..437e782b9663bb 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -92,6 +92,7 @@
#include <net/inet_common.h>
#include <net/ip_fib.h>
#include <net/l3mdev.h>
+#include <net/addrconf.h>
/*
* Build xmit assembly blocks
@@ -1032,6 +1033,8 @@ bool icmp_build_probe(struct sk_buff *skb, struct icmphdr *icmphdr)
struct icmp_ext_hdr *ext_hdr, _ext_hdr;
struct icmp_ext_echo_iio *iio, _iio;
struct net *net = dev_net(skb->dev);
+ struct inet6_dev *in6_dev;
+ struct in_device *in_dev;
struct net_device *dev;
char buff[IFNAMSIZ];
u16 ident_len;
@@ -1115,10 +1118,15 @@ bool icmp_build_probe(struct sk_buff *skb, struct icmphdr *icmphdr)
/* Fill bits in reply message */
if (dev->flags & IFF_UP)
status |= ICMP_EXT_ECHOREPLY_ACTIVE;
- if (__in_dev_get_rcu(dev) && __in_dev_get_rcu(dev)->ifa_list)
+
+ in_dev = __in_dev_get_rcu(dev);
+ if (in_dev && rcu_access_pointer(in_dev->ifa_list))
status |= ICMP_EXT_ECHOREPLY_IPV4;
- if (!list_empty(&rcu_dereference(dev->ip6_ptr)->addr_list))
+
+ in6_dev = __in6_dev_get(dev);
+ if (in6_dev && !list_empty(&in6_dev->addr_list))
status |= ICMP_EXT_ECHOREPLY_IPV6;
+
dev_put(dev);
icmphdr->un.echo.sequence |= htons(status);
return true;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 7d8090f109ef4e..3b38610958ee4b 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -203,8 +203,15 @@ static bool __inet_bhash2_conflict(const struct sock *sk, struct sock *sk2,
kuid_t sk_uid, bool relax,
bool reuseport_cb_ok, bool reuseport_ok)
{
- if (sk->sk_family == AF_INET && ipv6_only_sock(sk2))
- return false;
+ if (ipv6_only_sock(sk2)) {
+ if (sk->sk_family == AF_INET)
+ return false;
+
+#if IS_ENABLED(CONFIG_IPV6)
+ if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
+ return false;
+#endif
+ }
return inet_bind_conflict(sk, sk2, sk_uid, relax,
reuseport_cb_ok, reuseport_ok);
@@ -287,6 +294,7 @@ static bool inet_bhash2_addr_any_conflict(const struct sock *sk, int port, int l
struct sock_reuseport *reuseport_cb;
struct inet_bind_hashbucket *head2;
struct inet_bind2_bucket *tb2;
+ bool conflict = false;
bool reuseport_cb_ok;
rcu_read_lock();
@@ -299,18 +307,20 @@ static bool inet_bhash2_addr_any_conflict(const struct sock *sk, int port, int l
spin_lock(&head2->lock);
- inet_bind_bucket_for_each(tb2, &head2->chain)
- if (inet_bind2_bucket_match_addr_any(tb2, net, port, l3mdev, sk))
- break;
+ inet_bind_bucket_for_each(tb2, &head2->chain) {
+ if (!inet_bind2_bucket_match_addr_any(tb2, net, port, l3mdev, sk))
+ continue;
- if (tb2 && inet_bhash2_conflict(sk, tb2, uid, relax, reuseport_cb_ok,
- reuseport_ok)) {
- spin_unlock(&head2->lock);
- return true;
+ if (!inet_bhash2_conflict(sk, tb2, uid, relax, reuseport_cb_ok, reuseport_ok))
+ continue;
+
+ conflict = true;
+ break;
}
spin_unlock(&head2->lock);
- return false;
+
+ return conflict;
}
/*
@@ -771,6 +781,20 @@ void inet_csk_clear_xmit_timers(struct sock *sk)
}
EXPORT_SYMBOL(inet_csk_clear_xmit_timers);
+void inet_csk_clear_xmit_timers_sync(struct sock *sk)
+{
+ struct inet_connection_sock *icsk = inet_csk(sk);
+
+ /* ongoing timer handlers need to acquire socket lock. */
+ sock_not_owned_by_me(sk);
+
+ icsk->icsk_pending = icsk->icsk_ack.pending = 0;
+
+ sk_stop_timer_sync(sk, &icsk->icsk_retransmit_timer);
+ sk_stop_timer_sync(sk, &icsk->icsk_delack_timer);
+ sk_stop_timer_sync(sk, &sk->sk_timer);
+}
+
void inet_csk_delete_keepalive_timer(struct sock *sk)
{
sk_stop_timer(sk, &sk->sk_timer);
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 7072fc0783ef56..c88c9034d63004 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -24,6 +24,8 @@
#include <net/ip.h>
#include <net/ipv6.h>
+#include "../core/sock_destructor.h"
+
/* Use skb->cb to track consecutive/adjacent fragments coming at
* the end of the queue. Nodes in the rb-tree queue will
* contain "runs" of one or more adjacent fragments.
@@ -39,6 +41,7 @@ struct ipfrag_skb_cb {
};
struct sk_buff *next_frag;
int frag_run_len;
+ int ip_defrag_offset;
};
#define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb))
@@ -396,12 +399,12 @@ int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb,
*/
if (!last)
fragrun_create(q, skb); /* First fragment. */
- else if (last->ip_defrag_offset + last->len < end) {
+ else if (FRAG_CB(last)->ip_defrag_offset + last->len < end) {
/* This is the common case: skb goes to the end. */
/* Detect and discard overlaps. */
- if (offset < last->ip_defrag_offset + last->len)
+ if (offset < FRAG_CB(last)->ip_defrag_offset + last->len)
return IPFRAG_OVERLAP;
- if (offset == last->ip_defrag_offset + last->len)
+ if (offset == FRAG_CB(last)->ip_defrag_offset + last->len)
fragrun_append_to_last(q, skb);
else
fragrun_create(q, skb);
@@ -418,13 +421,13 @@ int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb,
parent = *rbn;
curr = rb_to_skb(parent);
- curr_run_end = curr->ip_defrag_offset +
+ curr_run_end = FRAG_CB(curr)->ip_defrag_offset +
FRAG_CB(curr)->frag_run_len;
- if (end <= curr->ip_defrag_offset)
+ if (end <= FRAG_CB(curr)->ip_defrag_offset)
rbn = &parent->rb_left;
else if (offset >= curr_run_end)
rbn = &parent->rb_right;
- else if (offset >= curr->ip_defrag_offset &&
+ else if (offset >= FRAG_CB(curr)->ip_defrag_offset &&
end <= curr_run_end)
return IPFRAG_DUP;
else
@@ -438,7 +441,7 @@ int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb,
rb_insert_color(&skb->rbnode, &q->rb_fragments);
}
- skb->ip_defrag_offset = offset;
+ FRAG_CB(skb)->ip_defrag_offset = offset;
return IPFRAG_OK;
}
@@ -448,13 +451,28 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb,
struct sk_buff *parent)
{
struct sk_buff *fp, *head = skb_rb_first(&q->rb_fragments);
- struct sk_buff **nextp;
+ void (*destructor)(struct sk_buff *);
+ unsigned int orig_truesize = 0;
+ struct sk_buff **nextp = NULL;
+ struct sock *sk = skb->sk;
int delta;
+ if (sk && is_skb_wmem(skb)) {
+ /* TX: skb->sk might have been passed as argument to
+ * dst->output and must remain valid until tx completes.
+ *
+ * Move sk to reassembled skb and fix up wmem accounting.
+ */
+ orig_truesize = skb->truesize;
+ destructor = skb->destructor;
+ }
+
if (head != skb) {
fp = skb_clone(skb, GFP_ATOMIC);
- if (!fp)
- return NULL;
+ if (!fp) {
+ head = skb;
+ goto out_restore_sk;
+ }
FRAG_CB(fp)->next_frag = FRAG_CB(skb)->next_frag;
if (RB_EMPTY_NODE(&skb->rbnode))
FRAG_CB(parent)->next_frag = fp;
@@ -463,6 +481,12 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb,
&q->rb_fragments);
if (q->fragments_tail == skb)
q->fragments_tail = fp;
+
+ if (orig_truesize) {
+ /* prevent skb_morph from releasing sk */
+ skb->sk = NULL;
+ skb->destructor = NULL;
+ }
skb_morph(skb, head);
FRAG_CB(skb)->next_frag = FRAG_CB(head)->next_frag;
rb_replace_node(&head->rbnode, &skb->rbnode,
@@ -470,13 +494,13 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb,
consume_skb(head);
head = skb;
}
- WARN_ON(head->ip_defrag_offset != 0);
+ WARN_ON(FRAG_CB(head)->ip_defrag_offset != 0);
delta = -head->truesize;
/* Head of list must not be cloned. */
if (skb_unclone(head, GFP_ATOMIC))
- return NULL;
+ goto out_restore_sk;
delta += head->truesize;
if (delta)
@@ -492,7 +516,7 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb,
clone = alloc_skb(0, GFP_ATOMIC);
if (!clone)
- return NULL;
+ goto out_restore_sk;
skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
skb_frag_list_init(head);
for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
@@ -509,6 +533,21 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb,
nextp = &skb_shinfo(head)->frag_list;
}
+out_restore_sk:
+ if (orig_truesize) {
+ int ts_delta = head->truesize - orig_truesize;
+
+ /* if this reassembled skb is fragmented later,
+ * fraglist skbs will get skb->sk assigned from head->sk,
+ * and each frag skb will be released via sock_wfree.
+ *
+ * Update sk_wmem_alloc.
+ */
+ head->sk = sk;
+ head->destructor = destructor;
+ refcount_add(ts_delta, &sk->sk_wmem_alloc);
+ }
+
return nextp;
}
EXPORT_SYMBOL(inet_frag_reasm_prepare);
@@ -516,6 +555,8 @@ EXPORT_SYMBOL(inet_frag_reasm_prepare);
void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head,
void *reasm_data, bool try_coalesce)
{
+ struct sock *sk = is_skb_wmem(head) ? head->sk : NULL;
+ const unsigned int head_truesize = head->truesize;
struct sk_buff **nextp = reasm_data;
struct rb_node *rbn;
struct sk_buff *fp;
@@ -579,6 +620,9 @@ void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head,
head->prev = NULL;
head->tstamp = q->stamp;
head->mono_delivery_time = q->mono_delivery_time;
+
+ if (sk)
+ refcount_add(sum_truesize - head_truesize, &sk->sk_wmem_alloc);
}
EXPORT_SYMBOL(inet_frag_reasm_finish);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index a4941f53b52372..fb947d1613fe2b 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -384,6 +384,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
}
skb_dst_drop(skb);
+ skb_orphan(skb);
return -EINPROGRESS;
insert_error:
@@ -487,7 +488,6 @@ int ip_defrag(struct net *net, struct sk_buff *skb, u32 user)
struct ipq *qp;
__IP_INC_STATS(net, IPSTATS_MIB_REASMREQDS);
- skb_orphan(skb);
/* Lookup (or create) queue header */
qp = ip_find(net, ip_hdr(skb), user, vif);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 7b16c211b90447..57ddcd8c62f67e 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -280,8 +280,13 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
tpi->flags | TUNNEL_NO_KEY,
iph->saddr, iph->daddr, 0);
} else {
+ if (unlikely(!pskb_may_pull(skb,
+ gre_hdr_len + sizeof(*ershdr))))
+ return PACKET_REJECT;
+
ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len);
ver = ershdr->ver;
+ iph = ip_hdr(skb);
tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
tpi->flags | TUNNEL_KEY,
iph->saddr, iph->daddr, tpi->key);
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 8f6e950163a792..1b991b889506a9 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -329,6 +329,7 @@ config NFT_COMPAT_ARP
config IP_NF_ARPFILTER
tristate "arptables-legacy packet filtering support"
select IP_NF_ARPTABLES
+ select NETFILTER_FAMILY_ARP
depends on NETFILTER_XTABLES
help
ARP packet filtering defines a table `filter', which has a series of
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 2407066b0fec11..14365b20f1c5c0 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -956,6 +956,8 @@ static int do_replace(struct net *net, sockptr_t arg, unsigned int len)
void *loc_cpu_entry;
struct arpt_entry *iter;
+ if (len < sizeof(tmp))
+ return -EINVAL;
if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
return -EFAULT;
@@ -964,6 +966,8 @@ static int do_replace(struct net *net, sockptr_t arg, unsigned int len)
return -ENOMEM;
if (tmp.num_counters == 0)
return -EINVAL;
+ if ((u64)len < (u64)tmp.size + sizeof(tmp))
+ return -EINVAL;
tmp.name[sizeof(tmp.name)-1] = 0;
@@ -1254,6 +1258,8 @@ static int compat_do_replace(struct net *net, sockptr_t arg, unsigned int len)
void *loc_cpu_entry;
struct arpt_entry *iter;
+ if (len < sizeof(tmp))
+ return -EINVAL;
if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
return -EFAULT;
@@ -1262,6 +1268,8 @@ static int compat_do_replace(struct net *net, sockptr_t arg, unsigned int len)
return -ENOMEM;
if (tmp.num_counters == 0)
return -EINVAL;
+ if ((u64)len < (u64)tmp.size + sizeof(tmp))
+ return -EINVAL;
tmp.name[sizeof(tmp.name)-1] = 0;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 7da1df4997d057..fe89a056eb06c4 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1108,6 +1108,8 @@ do_replace(struct net *net, sockptr_t arg, unsigned int len)
void *loc_cpu_entry;
struct ipt_entry *iter;
+ if (len < sizeof(tmp))
+ return -EINVAL;
if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
return -EFAULT;
@@ -1116,6 +1118,8 @@ do_replace(struct net *net, sockptr_t arg, unsigned int len)
return -ENOMEM;
if (tmp.num_counters == 0)
return -EINVAL;
+ if ((u64)len < (u64)tmp.size + sizeof(tmp))
+ return -EINVAL;
tmp.name[sizeof(tmp.name)-1] = 0;
@@ -1492,6 +1496,8 @@ compat_do_replace(struct net *net, sockptr_t arg, unsigned int len)
void *loc_cpu_entry;
struct ipt_entry *iter;
+ if (len < sizeof(tmp))
+ return -EINVAL;
if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
return -EFAULT;
@@ -1500,6 +1506,8 @@ compat_do_replace(struct net *net, sockptr_t arg, unsigned int len)
return -ENOMEM;
if (tmp.num_counters == 0)
return -EINVAL;
+ if ((u64)len < (u64)tmp.size + sizeof(tmp))
+ return -EINVAL;
tmp.name[sizeof(tmp.name)-1] = 0;
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index 74928a9d1aa48b..535856b0f0edce 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -768,8 +768,10 @@ static int nh_grp_hw_stats_update(struct nexthop *nh, bool *hw_stats_used)
struct net *net = nh->net;
int err;
- if (nexthop_notifiers_is_empty(net))
+ if (nexthop_notifiers_is_empty(net)) {
+ *hw_stats_used = false;
return 0;
+ }
err = nh_notifier_grp_hw_stats_init(&info, nh);
if (err)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index c8f76f56dc1653..b814fdab19f710 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -926,13 +926,11 @@ void ip_rt_send_redirect(struct sk_buff *skb)
icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, gw);
peer->rate_last = jiffies;
++peer->n_redirects;
-#ifdef CONFIG_IP_ROUTE_VERBOSE
- if (log_martians &&
+ if (IS_ENABLED(CONFIG_IP_ROUTE_VERBOSE) && log_martians &&
peer->n_redirects == ip_rt_redirect_number)
net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n",
&ip_hdr(skb)->saddr, inet_iif(skb),
&ip_hdr(skb)->daddr, &gw);
-#endif
}
out_put_peer:
inet_putpeer(peer);
@@ -2168,6 +2166,9 @@ int ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr,
int err = -EINVAL;
u32 tag = 0;
+ if (!in_dev)
+ return -EINVAL;
+
if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
goto martian_source;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index d20b62d521712a..e767721b3a588b 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2931,6 +2931,8 @@ void tcp_close(struct sock *sk, long timeout)
lock_sock(sk);
__tcp_close(sk, timeout);
release_sock(sk);
+ if (!sk->sk_net_refcnt)
+ inet_csk_clear_xmit_timers_sync(sk);
sock_put(sk);
}
EXPORT_SYMBOL(tcp_close);
diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c
index 3afeeb68e8a7e2..781b67a525719a 100644
--- a/net/ipv4/tcp_ao.c
+++ b/net/ipv4/tcp_ao.c
@@ -1068,6 +1068,7 @@ void tcp_ao_connect_init(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
struct tcp_ao_info *ao_info;
+ struct hlist_node *next;
union tcp_ao_addr *addr;
struct tcp_ao_key *key;
int family, l3index;
@@ -1090,7 +1091,7 @@ void tcp_ao_connect_init(struct sock *sk)
l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
sk->sk_bound_dev_if);
- hlist_for_each_entry_rcu(key, &ao_info->head, node) {
+ hlist_for_each_entry_safe(key, next, &ao_info->head, node) {
if (!tcp_ao_key_cmp(key, l3index, addr, key->prefixlen, family, -1, -1))
continue;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 661d0e0d273f61..420905be5f30c9 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -582,6 +582,13 @@ static inline bool __udp_is_mcast_sock(struct net *net, const struct sock *sk,
}
DEFINE_STATIC_KEY_FALSE(udp_encap_needed_key);
+EXPORT_SYMBOL(udp_encap_needed_key);
+
+#if IS_ENABLED(CONFIG_IPV6)
+DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key);
+EXPORT_SYMBOL(udpv6_encap_needed_key);
+#endif
+
void udp_encap_enable(void)
{
static_branch_inc(&udp_encap_needed_key);
@@ -1116,16 +1123,17 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (msg->msg_controllen) {
err = udp_cmsg_send(sk, msg, &ipc.gso_size);
- if (err > 0)
+ if (err > 0) {
err = ip_cmsg_send(sk, msg, &ipc,
sk->sk_family == AF_INET6);
+ connected = 0;
+ }
if (unlikely(err < 0)) {
kfree(ipc.opt);
return err;
}
if (ipc.opt)
free = 1;
- connected = 0;
}
if (!ipc.opt) {
struct ip_options_rcu *inet_opt;
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index b9880743765c6c..3498dd1d0694dc 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -449,8 +449,9 @@ static int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb)
NAPI_GRO_CB(p)->count++;
p->data_len += skb->len;
- /* sk owenrship - if any - completely transferred to the aggregated packet */
+ /* sk ownership - if any - completely transferred to the aggregated packet */
skb->destructor = NULL;
+ skb->sk = NULL;
p->truesize += skb->truesize;
p->len += skb->len;
@@ -551,11 +552,19 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
unsigned int off = skb_gro_offset(skb);
int flush = 1;
- /* we can do L4 aggregation only if the packet can't land in a tunnel
- * otherwise we could corrupt the inner stream
+ /* We can do L4 aggregation only if the packet can't land in a tunnel
+ * otherwise we could corrupt the inner stream. Detecting such packets
+ * cannot be foolproof and the aggregation might still happen in some
+ * cases. Such packets should be caught in udp_unexpected_gso later.
*/
NAPI_GRO_CB(skb)->is_flist = 0;
if (!sk || !udp_sk(sk)->gro_receive) {
+ /* If the packet was locally encapsulated in a UDP tunnel that
+ * wasn't detected above, do not GRO.
+ */
+ if (skb->encapsulation)
+ goto out;
+
if (skb->dev->features & NETIF_F_GRO_FRAGLIST)
NAPI_GRO_CB(skb)->is_flist = sk ? !udp_test_bit(GRO_ENABLED, sk) : 1;
@@ -719,13 +728,7 @@ INDIRECT_CALLABLE_SCOPE int udp4_gro_complete(struct sk_buff *skb, int nhoff)
skb_shinfo(skb)->gso_type |= (SKB_GSO_FRAGLIST|SKB_GSO_UDP_L4);
skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
- if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
- if (skb->csum_level < SKB_MAX_CSUM_LEVEL)
- skb->csum_level++;
- } else {
- skb->ip_summed = CHECKSUM_UNNECESSARY;
- skb->csum_level = 0;
- }
+ __skb_incr_checksum_unnecessary(skb);
return 0;
}
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 247bd4d8ee45a6..779aa6ecdd499b 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2091,9 +2091,10 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add
if (ipv6_addr_equal(&ifp->addr, addr)) {
if (!dev || ifp->idev->dev == dev ||
!(ifp->scope&(IFA_LINK|IFA_HOST) || strict)) {
- result = ifp;
- in6_ifa_hold(ifp);
- break;
+ if (in6_ifa_hold_safe(ifp)) {
+ result = ifp;
+ break;
+ }
}
}
}
@@ -5416,10 +5417,11 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
err = 0;
if (fillargs.ifindex) {
- err = -ENODEV;
dev = dev_get_by_index_rcu(tgt_net, fillargs.ifindex);
- if (!dev)
+ if (!dev) {
+ err = -ENODEV;
goto done;
+ }
idev = __in6_dev_get(dev);
if (idev)
err = in6_dump_addrs(idev, skb, cb,
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 5c558dc1c68386..c1f62352a48145 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -651,19 +651,19 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
if (!w) {
/* New dump:
*
- * 1. hook callback destructor.
- */
- cb->args[3] = (long)cb->done;
- cb->done = fib6_dump_done;
-
- /*
- * 2. allocate and initialize walker.
+ * 1. allocate and initialize walker.
*/
w = kzalloc(sizeof(*w), GFP_ATOMIC);
if (!w)
return -ENOMEM;
w->func = fib6_dump_node;
cb->args[2] = (long)w;
+
+ /* 2. hook callback destructor.
+ */
+ cb->args[3] = (long)cb->done;
+ cb->done = fib6_dump_done;
+
}
arg.skb = skb;
@@ -1385,7 +1385,10 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt,
struct nl_info *info, struct netlink_ext_ack *extack)
{
struct fib6_table *table = rt->fib6_table;
- struct fib6_node *fn, *pn = NULL;
+ struct fib6_node *fn;
+#ifdef CONFIG_IPV6_SUBTREES
+ struct fib6_node *pn = NULL;
+#endif
int err = -ENOMEM;
int allow_create = 1;
int replace_required = 0;
@@ -1409,9 +1412,9 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt,
goto out;
}
+#ifdef CONFIG_IPV6_SUBTREES
pn = fn;
-#ifdef CONFIG_IPV6_SUBTREES
if (rt->fib6_src.plen) {
struct fib6_node *sn;
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index ca7e77e842835a..c89aef524df9a2 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -528,6 +528,9 @@ static int ip6erspan_rcv(struct sk_buff *skb,
struct ip6_tnl *tunnel;
u8 ver;
+ if (unlikely(!pskb_may_pull(skb, sizeof(*ershdr))))
+ return PACKET_REJECT;
+
ipv6h = ipv6_hdr(skb);
ershdr = (struct erspan_base_hdr *)skb->data;
ver = ershdr->ver;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index fd9f049d6d41e7..131f7bb2110d3a 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1125,6 +1125,8 @@ do_replace(struct net *net, sockptr_t arg, unsigned int len)
void *loc_cpu_entry;
struct ip6t_entry *iter;
+ if (len < sizeof(tmp))
+ return -EINVAL;
if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
return -EFAULT;
@@ -1133,6 +1135,8 @@ do_replace(struct net *net, sockptr_t arg, unsigned int len)
return -ENOMEM;
if (tmp.num_counters == 0)
return -EINVAL;
+ if ((u64)len < (u64)tmp.size + sizeof(tmp))
+ return -EINVAL;
tmp.name[sizeof(tmp.name)-1] = 0;
@@ -1501,6 +1505,8 @@ compat_do_replace(struct net *net, sockptr_t arg, unsigned int len)
void *loc_cpu_entry;
struct ip6t_entry *iter;
+ if (len < sizeof(tmp))
+ return -EINVAL;
if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
return -EFAULT;
@@ -1509,6 +1515,8 @@ compat_do_replace(struct net *net, sockptr_t arg, unsigned int len)
return -ENOMEM;
if (tmp.num_counters == 0)
return -EINVAL;
+ if ((u64)len < (u64)tmp.size + sizeof(tmp))
+ return -EINVAL;
tmp.name[sizeof(tmp.name)-1] = 0;
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 1a51a44571c372..d0dcbaca19943a 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -294,6 +294,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
}
skb_dst_drop(skb);
+ skb_orphan(skb);
return -EINPROGRESS;
insert_error:
@@ -469,7 +470,6 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
hdr = ipv6_hdr(skb);
fhdr = (struct frag_hdr *)skb_transport_header(skb);
- skb_orphan(skb);
fq = fq_find(net, fhdr->identification, user, hdr,
skb->dev ? skb->dev->ifindex : 0);
if (fq == NULL) {
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 7c1e6469d091d2..1a4cccdd40c9ca 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -447,7 +447,7 @@ csum_copy_err:
goto try_again;
}
-DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key);
+DECLARE_STATIC_KEY_FALSE(udpv6_encap_needed_key);
void udpv6_encap_enable(void)
{
static_branch_inc(&udpv6_encap_needed_key);
@@ -1474,9 +1474,11 @@ do_udp_sendmsg:
ipc6.opt = opt;
err = udp_cmsg_send(sk, msg, &ipc6.gso_size);
- if (err > 0)
+ if (err > 0) {
err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, fl6,
&ipc6);
+ connected = false;
+ }
if (err < 0) {
fl6_sock_release(flowlabel);
return err;
@@ -1488,7 +1490,6 @@ do_udp_sendmsg:
}
if (!(opt->opt_nflen|opt->opt_flen))
opt = NULL;
- connected = false;
}
if (!opt) {
opt = txopt_get(np);
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 312bcaeea96fb7..bbd347de00b450 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -174,13 +174,7 @@ INDIRECT_CALLABLE_SCOPE int udp6_gro_complete(struct sk_buff *skb, int nhoff)
skb_shinfo(skb)->gso_type |= (SKB_GSO_FRAGLIST|SKB_GSO_UDP_L4);
skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
- if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
- if (skb->csum_level < SKB_MAX_CSUM_LEVEL)
- skb->csum_level++;
- } else {
- skb->ip_summed = CHECKSUM_UNNECESSARY;
- skb->csum_level = 0;
- }
+ __skb_incr_checksum_unnecessary(skb);
return 0;
}
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 39e487ccc46881..8ba00ad433c21b 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -127,6 +127,9 @@ static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb,
/* checksums verified by L2TP */
skb->ip_summed = CHECKSUM_NONE;
+ /* drop outer flow-hash */
+ skb_clear_hash(skb);
+
skb_dst_drop(skb);
nf_reset_ct(skb);
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index f03452dc716d5d..f67c1d0218121d 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -2199,15 +2199,14 @@ static int ieee80211_change_station(struct wiphy *wiphy,
}
if (sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN &&
- sta->sdata->u.vlan.sta) {
- ieee80211_clear_fast_rx(sta);
+ sta->sdata->u.vlan.sta)
RCU_INIT_POINTER(sta->sdata->u.vlan.sta, NULL);
- }
if (test_sta_flag(sta, WLAN_STA_AUTHORIZED))
ieee80211_vif_dec_num_mcast(sta->sdata);
sta->sdata = vlansdata;
+ ieee80211_check_fast_rx(sta);
ieee80211_check_fast_xmit(sta);
if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) {
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index 80e4b9784131d1..ccacaed32817ae 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -797,6 +797,7 @@ static int ieee80211_assign_link_chanctx(struct ieee80211_link_data *link,
struct ieee80211_local *local = sdata->local;
struct ieee80211_chanctx_conf *conf;
struct ieee80211_chanctx *curr_ctx = NULL;
+ bool new_idle;
int ret = 0;
if (WARN_ON(sdata->vif.type == NL80211_IFTYPE_NAN))
@@ -829,8 +830,6 @@ static int ieee80211_assign_link_chanctx(struct ieee80211_link_data *link,
out:
rcu_assign_pointer(link->conf->chanctx_conf, conf);
- sdata->vif.cfg.idle = !conf;
-
if (curr_ctx && ieee80211_chanctx_num_assigned(local, curr_ctx) > 0) {
ieee80211_recalc_chanctx_chantype(local, curr_ctx);
ieee80211_recalc_smps_chanctx(local, curr_ctx);
@@ -843,9 +842,27 @@ out:
ieee80211_recalc_chanctx_min_def(local, new_ctx, NULL);
}
- if (sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE &&
- sdata->vif.type != NL80211_IFTYPE_MONITOR)
- ieee80211_vif_cfg_change_notify(sdata, BSS_CHANGED_IDLE);
+ if (conf) {
+ new_idle = false;
+ } else {
+ struct ieee80211_link_data *tmp;
+
+ new_idle = true;
+ for_each_sdata_link(local, tmp) {
+ if (rcu_access_pointer(tmp->conf->chanctx_conf)) {
+ new_idle = false;
+ break;
+ }
+ }
+ }
+
+ if (new_idle != sdata->vif.cfg.idle) {
+ sdata->vif.cfg.idle = new_idle;
+
+ if (sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE &&
+ sdata->vif.type != NL80211_IFTYPE_MONITOR)
+ ieee80211_vif_cfg_change_notify(sdata, BSS_CHANGED_IDLE);
+ }
ieee80211_check_fast_xmit_iface(sdata);
diff --git a/net/mac80211/debug.h b/net/mac80211/debug.h
index 49da401c53408b..35a8ba25fa57fd 100644
--- a/net/mac80211/debug.h
+++ b/net/mac80211/debug.h
@@ -158,7 +158,7 @@ do { \
_sdata_dbg(print, sdata, "[link %d] " fmt, \
link_id, ##__VA_ARGS__); \
else \
- _sdata_dbg(1, sdata, fmt, ##__VA_ARGS__); \
+ _sdata_dbg(print, sdata, fmt, ##__VA_ARGS__); \
} while (0)
#define link_dbg(link, fmt, ...) \
_link_id_dbg(1, (link)->sdata, (link)->link_id, \
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index b6fead612b66b5..bd507d6b65e3f6 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -131,7 +131,7 @@ struct ieee80211_bss {
};
/**
- * enum ieee80211_corrupt_data_flags - BSS data corruption flags
+ * enum ieee80211_bss_corrupt_data_flags - BSS data corruption flags
* @IEEE80211_BSS_CORRUPT_BEACON: last beacon frame received was corrupted
* @IEEE80211_BSS_CORRUPT_PROBE_RESP: last probe response received was corrupted
*
@@ -144,7 +144,7 @@ enum ieee80211_bss_corrupt_data_flags {
};
/**
- * enum ieee80211_valid_data_flags - BSS valid data flags
+ * enum ieee80211_bss_valid_data_flags - BSS valid data flags
* @IEEE80211_BSS_VALID_WMM: WMM/UAPSD data was gathered from non-corrupt IE
* @IEEE80211_BSS_VALID_RATES: Supported rates were gathered from non-corrupt IE
* @IEEE80211_BSS_VALID_ERP: ERP flag was gathered from non-corrupt IE
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 32475da98d739c..cbc9b5e40cb35e 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -747,6 +747,9 @@ bool ieee80211_mesh_xmit_fast(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb, u32 ctrl_flags)
{
struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
+ struct ieee80211_mesh_fast_tx_key key = {
+ .type = MESH_FAST_TX_TYPE_LOCAL
+ };
struct ieee80211_mesh_fast_tx *entry;
struct ieee80211s_hdr *meshhdr;
u8 sa[ETH_ALEN] __aligned(2);
@@ -782,7 +785,10 @@ bool ieee80211_mesh_xmit_fast(struct ieee80211_sub_if_data *sdata,
return false;
}
- entry = mesh_fast_tx_get(sdata, skb->data);
+ ether_addr_copy(key.addr, skb->data);
+ if (!ether_addr_equal(skb->data + ETH_ALEN, sdata->vif.addr))
+ key.type = MESH_FAST_TX_TYPE_PROXIED;
+ entry = mesh_fast_tx_get(sdata, &key);
if (!entry)
return false;
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index d913ce7ba72ef8..3f9664e4e00c6c 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -135,9 +135,38 @@ struct mesh_path {
#define MESH_FAST_TX_CACHE_TIMEOUT 8000 /* msecs */
/**
+ * enum ieee80211_mesh_fast_tx_type - cached mesh fast tx entry type
+ *
+ * @MESH_FAST_TX_TYPE_LOCAL: tx from the local vif address as SA
+ * @MESH_FAST_TX_TYPE_PROXIED: local tx with a different SA (e.g. bridged)
+ * @MESH_FAST_TX_TYPE_FORWARDED: forwarded from a different mesh point
+ * @NUM_MESH_FAST_TX_TYPE: number of entry types
+ */
+enum ieee80211_mesh_fast_tx_type {
+ MESH_FAST_TX_TYPE_LOCAL,
+ MESH_FAST_TX_TYPE_PROXIED,
+ MESH_FAST_TX_TYPE_FORWARDED,
+
+ /* must be last */
+ NUM_MESH_FAST_TX_TYPE
+};
+
+
+/**
+ * struct ieee80211_mesh_fast_tx_key - cached mesh fast tx entry key
+ *
+ * @addr: The Ethernet DA for this entry
+ * @type: cache entry type
+ */
+struct ieee80211_mesh_fast_tx_key {
+ u8 addr[ETH_ALEN] __aligned(2);
+ u16 type;
+};
+
+/**
* struct ieee80211_mesh_fast_tx - cached mesh fast tx entry
* @rhash: rhashtable pointer
- * @addr_key: The Ethernet DA which is the key for this entry
+ * @key: the lookup key for this cache entry
* @fast_tx: base fast_tx data
* @hdr: cached mesh and rfc1042 headers
* @hdrlen: length of mesh + rfc1042
@@ -148,7 +177,7 @@ struct mesh_path {
*/
struct ieee80211_mesh_fast_tx {
struct rhash_head rhash;
- u8 addr_key[ETH_ALEN] __aligned(2);
+ struct ieee80211_mesh_fast_tx_key key;
struct ieee80211_fast_tx fast_tx;
u8 hdr[sizeof(struct ieee80211s_hdr) + sizeof(rfc1042_header)];
@@ -334,7 +363,8 @@ void mesh_path_tx_root_frame(struct ieee80211_sub_if_data *sdata);
bool mesh_action_is_path_sel(struct ieee80211_mgmt *mgmt);
struct ieee80211_mesh_fast_tx *
-mesh_fast_tx_get(struct ieee80211_sub_if_data *sdata, const u8 *addr);
+mesh_fast_tx_get(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_mesh_fast_tx_key *key);
bool ieee80211_mesh_xmit_fast(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb, u32 ctrl_flags);
void mesh_fast_tx_cache(struct ieee80211_sub_if_data *sdata,
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index 91b55d6a68b973..a6b62169f08483 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -37,8 +37,8 @@ static const struct rhashtable_params mesh_rht_params = {
static const struct rhashtable_params fast_tx_rht_params = {
.nelem_hint = 10,
.automatic_shrinking = true,
- .key_len = ETH_ALEN,
- .key_offset = offsetof(struct ieee80211_mesh_fast_tx, addr_key),
+ .key_len = sizeof_field(struct ieee80211_mesh_fast_tx, key),
+ .key_offset = offsetof(struct ieee80211_mesh_fast_tx, key),
.head_offset = offsetof(struct ieee80211_mesh_fast_tx, rhash),
.hashfn = mesh_table_hash,
};
@@ -431,20 +431,21 @@ static void mesh_fast_tx_entry_free(struct mesh_tx_cache *cache,
}
struct ieee80211_mesh_fast_tx *
-mesh_fast_tx_get(struct ieee80211_sub_if_data *sdata, const u8 *addr)
+mesh_fast_tx_get(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_mesh_fast_tx_key *key)
{
struct ieee80211_mesh_fast_tx *entry;
struct mesh_tx_cache *cache;
cache = &sdata->u.mesh.tx_cache;
- entry = rhashtable_lookup(&cache->rht, addr, fast_tx_rht_params);
+ entry = rhashtable_lookup(&cache->rht, key, fast_tx_rht_params);
if (!entry)
return NULL;
if (!(entry->mpath->flags & MESH_PATH_ACTIVE) ||
mpath_expired(entry->mpath)) {
spin_lock_bh(&cache->walk_lock);
- entry = rhashtable_lookup(&cache->rht, addr, fast_tx_rht_params);
+ entry = rhashtable_lookup(&cache->rht, key, fast_tx_rht_params);
if (entry)
mesh_fast_tx_entry_free(cache, entry);
spin_unlock_bh(&cache->walk_lock);
@@ -489,18 +490,24 @@ void mesh_fast_tx_cache(struct ieee80211_sub_if_data *sdata,
if (!sta)
return;
+ build.key.type = MESH_FAST_TX_TYPE_LOCAL;
if ((meshhdr->flags & MESH_FLAGS_AE) == MESH_FLAGS_AE_A5_A6) {
/* This is required to keep the mppath alive */
mppath = mpp_path_lookup(sdata, meshhdr->eaddr1);
if (!mppath)
return;
build.mppath = mppath;
+ if (!ether_addr_equal(meshhdr->eaddr2, sdata->vif.addr))
+ build.key.type = MESH_FAST_TX_TYPE_PROXIED;
} else if (ieee80211_has_a4(hdr->frame_control)) {
mppath = mpath;
} else {
return;
}
+ if (!ether_addr_equal(hdr->addr4, sdata->vif.addr))
+ build.key.type = MESH_FAST_TX_TYPE_FORWARDED;
+
/* rate limit, in case fast xmit can't be enabled */
if (mppath->fast_tx_check == jiffies)
return;
@@ -547,7 +554,7 @@ void mesh_fast_tx_cache(struct ieee80211_sub_if_data *sdata,
}
}
- memcpy(build.addr_key, mppath->dst, ETH_ALEN);
+ memcpy(build.key.addr, mppath->dst, ETH_ALEN);
build.timestamp = jiffies;
build.fast_tx.band = info->band;
build.fast_tx.da_offs = offsetof(struct ieee80211_hdr, addr3);
@@ -646,12 +653,18 @@ void mesh_fast_tx_flush_addr(struct ieee80211_sub_if_data *sdata,
const u8 *addr)
{
struct mesh_tx_cache *cache = &sdata->u.mesh.tx_cache;
+ struct ieee80211_mesh_fast_tx_key key = {};
struct ieee80211_mesh_fast_tx *entry;
+ int i;
+ ether_addr_copy(key.addr, addr);
spin_lock_bh(&cache->walk_lock);
- entry = rhashtable_lookup_fast(&cache->rht, addr, fast_tx_rht_params);
- if (entry)
- mesh_fast_tx_entry_free(cache, entry);
+ for (i = 0; i < NUM_MESH_FAST_TX_TYPE; i++) {
+ key.type = i;
+ entry = rhashtable_lookup_fast(&cache->rht, &key, fast_tx_rht_params);
+ if (entry)
+ mesh_fast_tx_entry_free(cache, entry);
+ }
spin_unlock_bh(&cache->walk_lock);
}
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 47a2cba8313f04..3bbb216a0fc8ce 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -616,7 +616,6 @@ ieee80211_determine_chan_mode(struct ieee80211_sub_if_data *sdata,
.from_ap = true,
.start = ies->data,
.len = ies->len,
- .mode = conn->mode,
};
struct ieee802_11_elems *elems;
struct ieee80211_supported_band *sband;
@@ -625,6 +624,7 @@ ieee80211_determine_chan_mode(struct ieee80211_sub_if_data *sdata,
int ret;
again:
+ parse_params.mode = conn->mode;
elems = ieee802_11_parse_elems_full(&parse_params);
if (!elems)
return ERR_PTR(-ENOMEM);
@@ -632,15 +632,21 @@ again:
ap_mode = ieee80211_determine_ap_chan(sdata, channel, bss->vht_cap_info,
elems, false, conn, &ap_chandef);
- mlme_link_id_dbg(sdata, link_id, "determined AP %pM to be %s\n",
- cbss->bssid, ieee80211_conn_mode_str(ap_mode));
-
/* this should be impossible since parsing depends on our mode */
if (WARN_ON(ap_mode > conn->mode)) {
ret = -EINVAL;
goto free;
}
+ if (conn->mode != ap_mode) {
+ conn->mode = ap_mode;
+ kfree(elems);
+ goto again;
+ }
+
+ mlme_link_id_dbg(sdata, link_id, "determined AP %pM to be %s\n",
+ cbss->bssid, ieee80211_conn_mode_str(ap_mode));
+
sband = sdata->local->hw.wiphy->bands[channel->band];
switch (channel->band) {
@@ -691,7 +697,6 @@ again:
break;
}
- conn->mode = ap_mode;
chanreq->oper = ap_chandef;
/* wider-bandwidth OFDMA is only done in EHT */
@@ -753,8 +758,10 @@ again:
}
/* the mode can only decrease, so this must terminate */
- if (ap_mode != conn->mode)
+ if (ap_mode != conn->mode) {
+ kfree(elems);
goto again;
+ }
mlme_link_id_dbg(sdata, link_id,
"connecting with %s mode, max bandwidth %d MHz\n",
@@ -5812,7 +5819,7 @@ static void ieee80211_ml_reconfiguration(struct ieee80211_sub_if_data *sdata,
*/
if (control &
IEEE80211_MLE_STA_RECONF_CONTROL_AP_REM_TIMER_PRESENT)
- link_removal_timeout[link_id] = le16_to_cpu(*(__le16 *)pos);
+ link_removal_timeout[link_id] = get_unaligned_le16(pos);
}
removed_links &= sdata->vif.valid_links;
@@ -5837,8 +5844,11 @@ static void ieee80211_ml_reconfiguration(struct ieee80211_sub_if_data *sdata,
continue;
}
- link_delay = link_conf->beacon_int *
- link_removal_timeout[link_id];
+ if (link_removal_timeout[link_id] < 1)
+ link_delay = 0;
+ else
+ link_delay = link_conf->beacon_int *
+ (link_removal_timeout[link_id] - 1);
if (!delay)
delay = link_delay;
@@ -5874,6 +5884,15 @@ static int ieee80211_ttlm_set_links(struct ieee80211_sub_if_data *sdata,
}
if (sdata->vif.active_links != active_links) {
+ /* usable links are affected when active_links are changed,
+ * so notify the driver about the status change
+ */
+ changed |= BSS_CHANGED_MLD_VALID_LINKS;
+ active_links &= sdata->vif.active_links;
+ if (!active_links)
+ active_links =
+ BIT(__ffs(sdata->vif.valid_links &
+ ~dormant_links));
ret = ieee80211_set_active_links(&sdata->vif, active_links);
if (ret) {
sdata_info(sdata, "Failed to set TTLM active links\n");
@@ -5888,7 +5907,6 @@ static int ieee80211_ttlm_set_links(struct ieee80211_sub_if_data *sdata,
goto out;
}
- changed |= BSS_CHANGED_MLD_VALID_LINKS;
sdata->vif.suspended_links = suspended_links;
if (sdata->vif.suspended_links)
changed |= BSS_CHANGED_MLD_TTLM;
@@ -6185,7 +6203,8 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link,
link->u.mgd.dtim_period = elems->dtim_period;
link->u.mgd.have_beacon = true;
ifmgd->assoc_data->need_beacon = false;
- if (ieee80211_hw_check(&local->hw, TIMING_BEACON_ONLY)) {
+ if (ieee80211_hw_check(&local->hw, TIMING_BEACON_ONLY) &&
+ !ieee80211_is_s1g_beacon(hdr->frame_control)) {
link->conf->sync_tsf =
le64_to_cpu(mgmt->u.beacon.timestamp);
link->conf->sync_device_ts =
@@ -7652,7 +7671,7 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata,
sdata_info(sdata,
"failed to insert STA entry for the AP (error %d)\n",
err);
- goto out_err;
+ goto out_release_chan;
}
} else
WARN_ON_ONCE(!ether_addr_equal(link->u.mgd.bssid, cbss->bssid));
@@ -7663,8 +7682,9 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata,
return 0;
+out_release_chan:
+ ieee80211_link_release_channel(link);
out_err:
- ieee80211_link_release_channel(&sdata->deflink);
ieee80211_vif_set_links(sdata, 0, 0);
return err;
}
diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index 23404b275457a7..4dc1def6954865 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -877,6 +877,7 @@ void ieee80211_get_tx_rates(struct ieee80211_vif *vif,
struct ieee80211_sub_if_data *sdata;
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
struct ieee80211_supported_band *sband;
+ u32 mask = ~0;
rate_control_fill_sta_table(sta, info, dest, max_rates);
@@ -889,9 +890,12 @@ void ieee80211_get_tx_rates(struct ieee80211_vif *vif,
if (ieee80211_is_tx_data(skb))
rate_control_apply_mask(sdata, sta, sband, dest, max_rates);
+ if (!(info->control.flags & IEEE80211_TX_CTRL_SCAN_TX))
+ mask = sdata->rc_rateidx_mask[info->band];
+
if (dest[0].idx < 0)
__rate_control_send_low(&sdata->local->hw, sband, sta, info,
- sdata->rc_rateidx_mask[info->band]);
+ mask);
if (sta)
rate_fixup_ratelist(vif, sband, info, dest, max_rates);
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index c1f8501384056d..6e24864f9a40ba 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2763,7 +2763,10 @@ ieee80211_rx_mesh_fast_forward(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb, int hdrlen)
{
struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
- struct ieee80211_mesh_fast_tx *entry = NULL;
+ struct ieee80211_mesh_fast_tx_key key = {
+ .type = MESH_FAST_TX_TYPE_FORWARDED
+ };
+ struct ieee80211_mesh_fast_tx *entry;
struct ieee80211s_hdr *mesh_hdr;
struct tid_ampdu_tx *tid_tx;
struct sta_info *sta;
@@ -2772,9 +2775,13 @@ ieee80211_rx_mesh_fast_forward(struct ieee80211_sub_if_data *sdata,
mesh_hdr = (struct ieee80211s_hdr *)(skb->data + sizeof(eth));
if ((mesh_hdr->flags & MESH_FLAGS_AE) == MESH_FLAGS_AE_A5_A6)
- entry = mesh_fast_tx_get(sdata, mesh_hdr->eaddr1);
+ ether_addr_copy(key.addr, mesh_hdr->eaddr1);
else if (!(mesh_hdr->flags & MESH_FLAGS_AE))
- entry = mesh_fast_tx_get(sdata, skb->data);
+ ether_addr_copy(key.addr, skb->data);
+ else
+ return false;
+
+ entry = mesh_fast_tx_get(sdata, &key);
if (!entry)
return false;
@@ -3780,6 +3787,10 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
}
break;
case WLAN_CATEGORY_PROTECTED_EHT:
+ if (len < offsetofend(typeof(*mgmt),
+ u.action.u.ttlm_req.action_code))
+ break;
+
switch (mgmt->u.action.u.ttlm_req.action_code) {
case WLAN_PROTECTED_EHT_ACTION_TTLM_REQ:
if (sdata->vif.type != NL80211_IFTYPE_STATION)
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 0429e59ba387c9..73850312580f70 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -648,6 +648,7 @@ static void ieee80211_send_scan_probe_req(struct ieee80211_sub_if_data *sdata,
cpu_to_le16(IEEE80211_SN_TO_SEQ(sn));
}
IEEE80211_SKB_CB(skb)->flags |= tx_flags;
+ IEEE80211_SKB_CB(skb)->control.flags |= IEEE80211_TX_CTRL_SCAN_TX;
ieee80211_tx_skb_tid_band(sdata, skb, 7, channel->band);
}
}
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 6bf223e6cd1a54..cfd0a62d0152bd 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -698,11 +698,16 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
txrc.bss_conf = &tx->sdata->vif.bss_conf;
txrc.skb = tx->skb;
txrc.reported_rate.idx = -1;
- txrc.rate_idx_mask = tx->sdata->rc_rateidx_mask[info->band];
- if (tx->sdata->rc_has_mcs_mask[info->band])
- txrc.rate_idx_mcs_mask =
- tx->sdata->rc_rateidx_mcs_mask[info->band];
+ if (unlikely(info->control.flags & IEEE80211_TX_CTRL_SCAN_TX)) {
+ txrc.rate_idx_mask = ~0;
+ } else {
+ txrc.rate_idx_mask = tx->sdata->rc_rateidx_mask[info->band];
+
+ if (tx->sdata->rc_has_mcs_mask[info->band])
+ txrc.rate_idx_mcs_mask =
+ tx->sdata->rc_rateidx_mcs_mask[info->band];
+ }
txrc.bss = (tx->sdata->vif.type == NL80211_IFTYPE_AP ||
tx->sdata->vif.type == NL80211_IFTYPE_MESH_POINT ||
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 3a1967bc7bad63..7e74b812e366ae 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -3937,8 +3937,6 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
mptcp_set_state(newsk, TCP_CLOSE);
}
} else {
- MPTCP_INC_STATS(sock_net(ssk),
- MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK);
tcpfallback:
newsk->sk_kern_sock = kern;
lock_sock(newsk);
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index dcd1c76d2a3ba1..73fdf423de44ee 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -1493,6 +1493,10 @@ int mptcp_set_rcvlowat(struct sock *sk, int val)
struct mptcp_subflow_context *subflow;
int space, cap;
+ /* bpf can land here with a wrong sk type */
+ if (sk->sk_protocol == IPPROTO_TCP)
+ return -EINVAL;
+
if (sk->sk_userlocks & SOCK_RCVBUF_LOCK)
cap = sk->sk_rcvbuf >> 1;
else
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 1626dd20c68f1f..6042a47da61be8 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -905,6 +905,8 @@ dispose_child:
return child;
fallback:
+ if (fallback)
+ SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK);
mptcp_subflow_drop_ctx(child);
return child;
}
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index a0921adc31a9ff..1e689c71412716 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -126,7 +126,8 @@ sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
if (sctph->source != cp->vport || payload_csum ||
skb->ip_summed == CHECKSUM_PARTIAL) {
sctph->source = cp->vport;
- sctp_nat_csum(skb, sctph, sctphoff);
+ if (!skb_is_gso(skb) || !skb_is_gso_sctp(skb))
+ sctp_nat_csum(skb, sctph, sctphoff);
} else {
skb->ip_summed = CHECKSUM_UNNECESSARY;
}
@@ -174,7 +175,8 @@ sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
(skb->ip_summed == CHECKSUM_PARTIAL &&
!(skb_dst(skb)->dev->features & NETIF_F_SCTP_CRC))) {
sctph->dest = cp->dport;
- sctp_nat_csum(skb, sctph, sctphoff);
+ if (!skb_is_gso(skb) || !skb_is_gso_sctp(skb))
+ sctp_nat_csum(skb, sctph, sctphoff);
} else if (skb->ip_summed != CHECKSUM_PARTIAL) {
skb->ip_summed = CHECKSUM_UNNECESSARY;
}
diff --git a/net/netfilter/nf_flow_table_inet.c b/net/netfilter/nf_flow_table_inet.c
index 9505f9d188ff25..6eef15648b7b08 100644
--- a/net/netfilter/nf_flow_table_inet.c
+++ b/net/netfilter/nf_flow_table_inet.c
@@ -21,7 +21,8 @@ nf_flow_offload_inet_hook(void *priv, struct sk_buff *skb,
proto = veth->h_vlan_encapsulated_proto;
break;
case htons(ETH_P_PPP_SES):
- proto = nf_flow_pppoe_proto(skb);
+ if (!nf_flow_pppoe_proto(skb, &proto))
+ return NF_ACCEPT;
break;
default:
proto = skb->protocol;
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index e45fade7640961..5383bed3d3e002 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -157,7 +157,7 @@ static void nf_flow_tuple_encap(struct sk_buff *skb,
tuple->encap[i].proto = skb->protocol;
break;
case htons(ETH_P_PPP_SES):
- phdr = (struct pppoe_hdr *)skb_mac_header(skb);
+ phdr = (struct pppoe_hdr *)skb_network_header(skb);
tuple->encap[i].id = ntohs(phdr->sid);
tuple->encap[i].proto = skb->protocol;
break;
@@ -273,10 +273,11 @@ static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb,
return NF_STOLEN;
}
-static bool nf_flow_skb_encap_protocol(const struct sk_buff *skb, __be16 proto,
+static bool nf_flow_skb_encap_protocol(struct sk_buff *skb, __be16 proto,
u32 *offset)
{
struct vlan_ethhdr *veth;
+ __be16 inner_proto;
switch (skb->protocol) {
case htons(ETH_P_8021Q):
@@ -287,7 +288,8 @@ static bool nf_flow_skb_encap_protocol(const struct sk_buff *skb, __be16 proto,
}
break;
case htons(ETH_P_PPP_SES):
- if (nf_flow_pppoe_proto(skb) == proto) {
+ if (nf_flow_pppoe_proto(skb, &inner_proto) &&
+ inner_proto == proto) {
*offset += PPPOE_SES_HLEN;
return true;
}
@@ -316,7 +318,7 @@ static void nf_flow_encap_pop(struct sk_buff *skb,
skb_reset_network_header(skb);
break;
case htons(ETH_P_PPP_SES):
- skb->protocol = nf_flow_pppoe_proto(skb);
+ skb->protocol = __nf_flow_pppoe_proto(skb);
skb_pull(skb, PPPOE_SES_HLEN);
skb_reset_network_header(skb);
break;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 5fa3d3540c93c0..167074283ea91d 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -594,6 +594,12 @@ static int nft_mapelem_deactivate(const struct nft_ctx *ctx,
const struct nft_set_iter *iter,
struct nft_elem_priv *elem_priv)
{
+ struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv);
+
+ if (!nft_set_elem_active(ext, iter->genmask))
+ return 0;
+
+ nft_set_elem_change_active(ctx->net, set, ext);
nft_setelem_data_deactivate(ctx->net, set, elem_priv);
return 0;
@@ -617,6 +623,7 @@ static void nft_map_catchall_deactivate(const struct nft_ctx *ctx,
if (!nft_set_elem_active(ext, genmask))
continue;
+ nft_set_elem_change_active(ctx->net, set, ext);
nft_setelem_data_deactivate(ctx->net, set, catchall->elem);
break;
}
@@ -626,6 +633,7 @@ static void nft_map_deactivate(const struct nft_ctx *ctx, struct nft_set *set)
{
struct nft_set_iter iter = {
.genmask = nft_genmask_next(ctx->net),
+ .type = NFT_ITER_UPDATE,
.fn = nft_mapelem_deactivate,
};
@@ -1200,6 +1208,26 @@ static void nf_tables_table_disable(struct net *net, struct nft_table *table)
__NFT_TABLE_F_WAS_AWAKEN | \
__NFT_TABLE_F_WAS_ORPHAN)
+static bool nft_table_pending_update(const struct nft_ctx *ctx)
+{
+ struct nftables_pernet *nft_net = nft_pernet(ctx->net);
+ struct nft_trans *trans;
+
+ if (ctx->table->flags & __NFT_TABLE_F_UPDATE)
+ return true;
+
+ list_for_each_entry(trans, &nft_net->commit_list, list) {
+ if (trans->ctx.table == ctx->table &&
+ ((trans->msg_type == NFT_MSG_NEWCHAIN &&
+ nft_trans_chain_update(trans)) ||
+ (trans->msg_type == NFT_MSG_DELCHAIN &&
+ nft_is_base_chain(trans->ctx.chain))))
+ return true;
+ }
+
+ return false;
+}
+
static int nf_tables_updtable(struct nft_ctx *ctx)
{
struct nft_trans *trans;
@@ -1226,7 +1254,7 @@ static int nf_tables_updtable(struct nft_ctx *ctx)
return -EOPNOTSUPP;
/* No dormant off/on/off/on games in single transaction */
- if (ctx->table->flags & __NFT_TABLE_F_UPDATE)
+ if (nft_table_pending_update(ctx))
return -EINVAL;
trans = nft_trans_alloc(ctx, NFT_MSG_NEWTABLE,
@@ -2430,6 +2458,9 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
struct nft_stats __percpu *stats = NULL;
struct nft_chain_hook hook = {};
+ if (table->flags & __NFT_TABLE_F_UPDATE)
+ return -EINVAL;
+
if (flags & NFT_CHAIN_BINDING)
return -EOPNOTSUPP;
@@ -2631,6 +2662,13 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
}
}
+ if (table->flags & __NFT_TABLE_F_UPDATE &&
+ !list_empty(&hook.list)) {
+ NL_SET_BAD_ATTR(extack, attr);
+ err = -EOPNOTSUPP;
+ goto err_hooks;
+ }
+
if (!(table->flags & NFT_TABLE_F_DORMANT) &&
nft_is_base_chain(chain) &&
!list_empty(&hook.list)) {
@@ -2860,6 +2898,9 @@ static int nft_delchain_hook(struct nft_ctx *ctx,
struct nft_trans *trans;
int err;
+ if (ctx->table->flags & __NFT_TABLE_F_UPDATE)
+ return -EOPNOTSUPP;
+
err = nft_chain_parse_hook(ctx->net, basechain, nla, &chain_hook,
ctx->family, chain->flags, extack);
if (err < 0)
@@ -2944,7 +2985,8 @@ static int nf_tables_delchain(struct sk_buff *skb, const struct nfnl_info *info,
nft_ctx_init(&ctx, net, skb, info->nlh, family, table, chain, nla);
if (nla[NFTA_CHAIN_HOOK]) {
- if (chain->flags & NFT_CHAIN_HW_OFFLOAD)
+ if (NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_DESTROYCHAIN ||
+ chain->flags & NFT_CHAIN_HW_OFFLOAD)
return -EOPNOTSUPP;
if (nft_is_base_chain(chain)) {
@@ -3026,7 +3068,7 @@ static const struct nft_expr_type *__nft_expr_type_get(u8 family,
{
const struct nft_expr_type *type, *candidate = NULL;
- list_for_each_entry(type, &nf_tables_expressions, list) {
+ list_for_each_entry_rcu(type, &nf_tables_expressions, list) {
if (!nla_strcmp(nla, type->name)) {
if (!type->family && !candidate)
candidate = type;
@@ -3058,9 +3100,13 @@ static const struct nft_expr_type *nft_expr_type_get(struct net *net,
if (nla == NULL)
return ERR_PTR(-EINVAL);
+ rcu_read_lock();
type = __nft_expr_type_get(family, nla);
- if (type != NULL && try_module_get(type->owner))
+ if (type != NULL && try_module_get(type->owner)) {
+ rcu_read_unlock();
return type;
+ }
+ rcu_read_unlock();
lockdep_nfnl_nft_mutex_not_held();
#ifdef CONFIG_MODULES
@@ -3841,6 +3887,9 @@ int nft_setelem_validate(const struct nft_ctx *ctx, struct nft_set *set,
const struct nft_data *data;
int err;
+ if (!nft_set_elem_active(ext, iter->genmask))
+ return 0;
+
if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) &&
*nft_set_ext_flags(ext) & NFT_SET_ELEM_INTERVAL_END)
return 0;
@@ -3864,17 +3913,20 @@ int nft_setelem_validate(const struct nft_ctx *ctx, struct nft_set *set,
int nft_set_catchall_validate(const struct nft_ctx *ctx, struct nft_set *set)
{
- u8 genmask = nft_genmask_next(ctx->net);
+ struct nft_set_iter dummy_iter = {
+ .genmask = nft_genmask_next(ctx->net),
+ };
struct nft_set_elem_catchall *catchall;
+
struct nft_set_ext *ext;
int ret = 0;
list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
ext = nft_set_elem_ext(set, catchall->elem);
- if (!nft_set_elem_active(ext, genmask))
+ if (!nft_set_elem_active(ext, dummy_iter.genmask))
continue;
- ret = nft_setelem_validate(ctx, set, NULL, catchall->elem);
+ ret = nft_setelem_validate(ctx, set, &dummy_iter, catchall->elem);
if (ret < 0)
return ret;
}
@@ -5363,6 +5415,11 @@ static int nf_tables_bind_check_setelem(const struct nft_ctx *ctx,
const struct nft_set_iter *iter,
struct nft_elem_priv *elem_priv)
{
+ const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv);
+
+ if (!nft_set_elem_active(ext, iter->genmask))
+ return 0;
+
return nft_setelem_data_validate(ctx, set, elem_priv);
}
@@ -5407,6 +5464,7 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
}
iter.genmask = nft_genmask_next(ctx->net);
+ iter.type = NFT_ITER_UPDATE;
iter.skip = 0;
iter.count = 0;
iter.err = 0;
@@ -5454,6 +5512,13 @@ static int nft_mapelem_activate(const struct nft_ctx *ctx,
const struct nft_set_iter *iter,
struct nft_elem_priv *elem_priv)
{
+ struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv);
+
+ /* called from abort path, reverse check to undo changes. */
+ if (nft_set_elem_active(ext, iter->genmask))
+ return 0;
+
+ nft_clear(ctx->net, ext);
nft_setelem_data_activate(ctx->net, set, elem_priv);
return 0;
@@ -5471,6 +5536,7 @@ static void nft_map_catchall_activate(const struct nft_ctx *ctx,
if (!nft_set_elem_active(ext, genmask))
continue;
+ nft_clear(ctx->net, ext);
nft_setelem_data_activate(ctx->net, set, catchall->elem);
break;
}
@@ -5480,6 +5546,7 @@ static void nft_map_activate(const struct nft_ctx *ctx, struct nft_set *set)
{
struct nft_set_iter iter = {
.genmask = nft_genmask_next(ctx->net),
+ .type = NFT_ITER_UPDATE,
.fn = nft_mapelem_activate,
};
@@ -5744,6 +5811,9 @@ static int nf_tables_dump_setelem(const struct nft_ctx *ctx,
const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv);
struct nft_set_dump_args *args;
+ if (!nft_set_elem_active(ext, iter->genmask))
+ return 0;
+
if (nft_set_elem_expired(ext) || nft_set_elem_is_dead(ext))
return 0;
@@ -5854,6 +5924,7 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
args.skb = skb;
args.reset = dump_ctx->reset;
args.iter.genmask = nft_genmask_cur(net);
+ args.iter.type = NFT_ITER_READ;
args.iter.skip = cb->args[0];
args.iter.count = 0;
args.iter.err = 0;
@@ -6593,7 +6664,7 @@ static void nft_setelem_activate(struct net *net, struct nft_set *set,
struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv);
if (nft_setelem_is_catchall(set, elem_priv)) {
- nft_set_elem_change_active(net, set, ext);
+ nft_clear(net, ext);
} else {
set->ops->activate(net, set, elem_priv);
}
@@ -7152,6 +7223,16 @@ void nft_data_hold(const struct nft_data *data, enum nft_data_types type)
}
}
+static int nft_setelem_active_next(const struct net *net,
+ const struct nft_set *set,
+ struct nft_elem_priv *elem_priv)
+{
+ const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv);
+ u8 genmask = nft_genmask_next(net);
+
+ return nft_set_elem_active(ext, genmask);
+}
+
static void nft_setelem_data_activate(const struct net *net,
const struct nft_set *set,
struct nft_elem_priv *elem_priv)
@@ -7275,8 +7356,12 @@ static int nft_setelem_flush(const struct nft_ctx *ctx,
const struct nft_set_iter *iter,
struct nft_elem_priv *elem_priv)
{
+ const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv);
struct nft_trans *trans;
+ if (!nft_set_elem_active(ext, iter->genmask))
+ return 0;
+
trans = nft_trans_alloc_gfp(ctx, NFT_MSG_DELSETELEM,
sizeof(struct nft_trans_elem), GFP_ATOMIC);
if (!trans)
@@ -7338,6 +7423,7 @@ static int nft_set_flush(struct nft_ctx *ctx, struct nft_set *set, u8 genmask)
{
struct nft_set_iter iter = {
.genmask = genmask,
+ .type = NFT_ITER_UPDATE,
.fn = nft_setelem_flush,
};
@@ -7573,7 +7659,7 @@ static const struct nft_object_type *__nft_obj_type_get(u32 objtype, u8 family)
{
const struct nft_object_type *type;
- list_for_each_entry(type, &nf_tables_objects, list) {
+ list_for_each_entry_rcu(type, &nf_tables_objects, list) {
if (type->family != NFPROTO_UNSPEC &&
type->family != family)
continue;
@@ -7589,9 +7675,13 @@ nft_obj_type_get(struct net *net, u32 objtype, u8 family)
{
const struct nft_object_type *type;
+ rcu_read_lock();
type = __nft_obj_type_get(objtype, family);
- if (type != NULL && try_module_get(type->owner))
+ if (type != NULL && try_module_get(type->owner)) {
+ rcu_read_unlock();
return type;
+ }
+ rcu_read_unlock();
lockdep_nfnl_nft_mutex_not_held();
#ifdef CONFIG_MODULES
@@ -8263,11 +8353,12 @@ static int nft_flowtable_parse_hook(const struct nft_ctx *ctx,
return err;
}
+/* call under rcu_read_lock */
static const struct nf_flowtable_type *__nft_flowtable_type_get(u8 family)
{
const struct nf_flowtable_type *type;
- list_for_each_entry(type, &nf_tables_flowtables, list) {
+ list_for_each_entry_rcu(type, &nf_tables_flowtables, list) {
if (family == type->family)
return type;
}
@@ -8279,9 +8370,13 @@ nft_flowtable_type_get(struct net *net, u8 family)
{
const struct nf_flowtable_type *type;
+ rcu_read_lock();
type = __nft_flowtable_type_get(family);
- if (type != NULL && try_module_get(type->owner))
+ if (type != NULL && try_module_get(type->owner)) {
+ rcu_read_unlock();
return type;
+ }
+ rcu_read_unlock();
lockdep_nfnl_nft_mutex_not_held();
#ifdef CONFIG_MODULES
@@ -10182,9 +10277,11 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
if (nft_trans_chain_update(trans)) {
nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN,
&nft_trans_chain_hooks(trans));
- nft_netdev_unregister_hooks(net,
- &nft_trans_chain_hooks(trans),
- true);
+ if (!(trans->ctx.table->flags & NFT_TABLE_F_DORMANT)) {
+ nft_netdev_unregister_hooks(net,
+ &nft_trans_chain_hooks(trans),
+ true);
+ }
} else {
nft_chain_del(trans->ctx.chain);
nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN,
@@ -10423,10 +10520,11 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
struct nft_trans *trans, *next;
LIST_HEAD(set_update_list);
struct nft_trans_elem *te;
+ int err = 0;
if (action == NFNL_ABORT_VALIDATE &&
nf_tables_validate(net) < 0)
- return -EAGAIN;
+ err = -EAGAIN;
list_for_each_entry_safe_reverse(trans, next, &nft_net->commit_list,
list) {
@@ -10460,9 +10558,11 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
break;
case NFT_MSG_NEWCHAIN:
if (nft_trans_chain_update(trans)) {
- nft_netdev_unregister_hooks(net,
- &nft_trans_chain_hooks(trans),
- true);
+ if (!(trans->ctx.table->flags & NFT_TABLE_F_DORMANT)) {
+ nft_netdev_unregister_hooks(net,
+ &nft_trans_chain_hooks(trans),
+ true);
+ }
free_percpu(nft_trans_chain_stats(trans));
kfree(nft_trans_chain_name(trans));
nft_trans_destroy(trans);
@@ -10554,8 +10654,10 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
case NFT_MSG_DESTROYSETELEM:
te = (struct nft_trans_elem *)trans->data;
- nft_setelem_data_activate(net, te->set, te->elem_priv);
- nft_setelem_activate(net, te->set, te->elem_priv);
+ if (!nft_setelem_active_next(net, te->set, te->elem_priv)) {
+ nft_setelem_data_activate(net, te->set, te->elem_priv);
+ nft_setelem_activate(net, te->set, te->elem_priv);
+ }
if (!nft_setelem_is_catchall(te->set, te->elem_priv))
te->set->ndeact--;
@@ -10616,12 +10718,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
nf_tables_abort_release(trans);
}
- if (action == NFNL_ABORT_AUTOLOAD)
- nf_tables_module_autoload(net);
- else
- nf_tables_module_autoload_cleanup(net);
-
- return 0;
+ return err;
}
static int nf_tables_abort(struct net *net, struct sk_buff *skb,
@@ -10634,6 +10731,17 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb,
gc_seq = nft_gc_seq_begin(nft_net);
ret = __nf_tables_abort(net, action);
nft_gc_seq_end(nft_net, gc_seq);
+
+ WARN_ON_ONCE(!list_empty(&nft_net->commit_list));
+
+ /* module autoload needs to happen after GC sequence update because it
+ * temporarily releases and grabs mutex again.
+ */
+ if (action == NFNL_ABORT_AUTOLOAD)
+ nf_tables_module_autoload(net);
+ else
+ nf_tables_module_autoload_cleanup(net);
+
mutex_unlock(&nft_net->commit_mutex);
return ret;
@@ -10737,6 +10845,9 @@ static int nf_tables_loop_check_setelem(const struct nft_ctx *ctx,
{
const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv);
+ if (!nft_set_elem_active(ext, iter->genmask))
+ return 0;
+
if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) &&
*nft_set_ext_flags(ext) & NFT_SET_ELEM_INTERVAL_END)
return 0;
@@ -10821,6 +10932,7 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx,
continue;
iter.genmask = nft_genmask_next(ctx->net);
+ iter.type = NFT_ITER_UPDATE;
iter.skip = 0;
iter.count = 0;
iter.err = 0;
@@ -11439,9 +11551,10 @@ static void __net_exit nf_tables_exit_net(struct net *net)
gc_seq = nft_gc_seq_begin(nft_net);
- if (!list_empty(&nft_net->commit_list) ||
- !list_empty(&nft_net->module_list))
- __nf_tables_abort(net, NFNL_ABORT_NONE);
+ WARN_ON_ONCE(!list_empty(&nft_net->commit_list));
+
+ if (!list_empty(&nft_net->module_list))
+ nf_tables_module_autoload_cleanup(net);
__nft_release_tables(net);
@@ -11533,6 +11646,7 @@ static void __exit nf_tables_module_exit(void)
unregister_netdevice_notifier(&nf_tables_flowtable_notifier);
nft_chain_filter_fini();
nft_chain_route_fini();
+ nf_tables_trans_destroy_flush_work();
unregister_pernet_subsys(&nf_tables_net_ops);
cancel_work_sync(&trans_gc_work);
cancel_work_sync(&trans_destroy_work);
diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c
index 274b6f7e6bb57e..d170758a1eb5d0 100644
--- a/net/netfilter/nft_chain_filter.c
+++ b/net/netfilter/nft_chain_filter.c
@@ -338,7 +338,9 @@ static void nft_netdev_event(unsigned long event, struct net_device *dev,
return;
if (n > 1) {
- nf_unregister_net_hook(ctx->net, &found->ops);
+ if (!(ctx->chain->table->flags & NFT_TABLE_F_DORMANT))
+ nf_unregister_net_hook(ctx->net, &found->ops);
+
list_del_rcu(&found->list);
kfree_rcu(found, rcu);
return;
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index a0055f510e31e9..b314ca728a2912 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -216,6 +216,7 @@ static int nft_lookup_validate(const struct nft_ctx *ctx,
return 0;
iter.genmask = nft_genmask_next(ctx->net);
+ iter.type = NFT_ITER_UPDATE;
iter.skip = 0;
iter.count = 0;
iter.err = 0;
diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c
index 32df7a16835da3..1caa04619dc6da 100644
--- a/net/netfilter/nft_set_bitmap.c
+++ b/net/netfilter/nft_set_bitmap.c
@@ -172,7 +172,7 @@ static void nft_bitmap_activate(const struct net *net,
nft_bitmap_location(set, nft_set_ext_key(&be->ext), &idx, &off);
/* Enter 11 state. */
priv->bitmap[idx] |= (genmask << off);
- nft_set_elem_change_active(net, set, &be->ext);
+ nft_clear(net, &be->ext);
}
static void nft_bitmap_flush(const struct net *net,
@@ -222,8 +222,6 @@ static void nft_bitmap_walk(const struct nft_ctx *ctx,
list_for_each_entry_rcu(be, &priv->list, head) {
if (iter->count < iter->skip)
goto cont;
- if (!nft_set_elem_active(&be->ext, iter->genmask))
- goto cont;
iter->err = iter->fn(ctx, set, iter, &be->priv);
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index 6968a3b342367c..daa56dda737ae2 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -199,7 +199,7 @@ static void nft_rhash_activate(const struct net *net, const struct nft_set *set,
{
struct nft_rhash_elem *he = nft_elem_priv_cast(elem_priv);
- nft_set_elem_change_active(net, set, &he->ext);
+ nft_clear(net, &he->ext);
}
static void nft_rhash_flush(const struct net *net,
@@ -286,8 +286,6 @@ static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set,
if (iter->count < iter->skip)
goto cont;
- if (!nft_set_elem_active(&he->ext, iter->genmask))
- goto cont;
iter->err = iter->fn(ctx, set, iter, &he->priv);
if (iter->err < 0)
@@ -599,7 +597,7 @@ static void nft_hash_activate(const struct net *net, const struct nft_set *set,
{
struct nft_hash_elem *he = nft_elem_priv_cast(elem_priv);
- nft_set_elem_change_active(net, set, &he->ext);
+ nft_clear(net, &he->ext);
}
static void nft_hash_flush(const struct net *net,
@@ -652,8 +650,6 @@ static void nft_hash_walk(const struct nft_ctx *ctx, struct nft_set *set,
hlist_for_each_entry_rcu(he, &priv->table[i], node) {
if (iter->count < iter->skip)
goto cont;
- if (!nft_set_elem_active(&he->ext, iter->genmask))
- goto cont;
iter->err = iter->fn(ctx, set, iter, &he->priv);
if (iter->err < 0)
diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index df8de509024637..187138afac45d4 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -1847,7 +1847,7 @@ static void nft_pipapo_activate(const struct net *net,
{
struct nft_pipapo_elem *e = nft_elem_priv_cast(elem_priv);
- nft_set_elem_change_active(net, set, &e->ext);
+ nft_clear(net, &e->ext);
}
/**
@@ -2077,6 +2077,8 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set,
rules_fx = rules_f0;
nft_pipapo_for_each_field(f, i, m) {
+ bool last = i == m->field_count - 1;
+
if (!pipapo_match_field(f, start, rules_fx,
match_start, match_end))
break;
@@ -2089,16 +2091,18 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set,
match_start += NFT_PIPAPO_GROUPS_PADDED_SIZE(f);
match_end += NFT_PIPAPO_GROUPS_PADDED_SIZE(f);
- }
- if (i == m->field_count) {
- priv->dirty = true;
- pipapo_drop(m, rulemap);
- return;
+ if (last && f->mt[rulemap[i].to].e == e) {
+ priv->dirty = true;
+ pipapo_drop(m, rulemap);
+ return;
+ }
}
first_rule += rules_f0;
}
+
+ WARN_ON_ONCE(1); /* elem_priv not found */
}
/**
@@ -2115,13 +2119,15 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set,
struct nft_set_iter *iter)
{
struct nft_pipapo *priv = nft_set_priv(set);
- struct net *net = read_pnet(&set->net);
const struct nft_pipapo_match *m;
const struct nft_pipapo_field *f;
unsigned int i, r;
+ WARN_ON_ONCE(iter->type != NFT_ITER_READ &&
+ iter->type != NFT_ITER_UPDATE);
+
rcu_read_lock();
- if (iter->genmask == nft_genmask_cur(net))
+ if (iter->type == NFT_ITER_READ)
m = rcu_dereference(priv->match);
else
m = priv->clone;
@@ -2143,9 +2149,6 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set,
e = f->mt[r].e;
- if (!nft_set_elem_active(&e->ext, iter->genmask))
- goto cont;
-
iter->err = iter->fn(ctx, set, iter, &e->priv);
if (iter->err < 0)
goto out;
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index 9944fe479e5361..b7ea21327549b3 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -532,7 +532,7 @@ static void nft_rbtree_activate(const struct net *net,
{
struct nft_rbtree_elem *rbe = nft_elem_priv_cast(elem_priv);
- nft_set_elem_change_active(net, set, &rbe->ext);
+ nft_clear(net, &rbe->ext);
}
static void nft_rbtree_flush(const struct net *net,
@@ -600,8 +600,6 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx,
if (iter->count < iter->skip)
goto cont;
- if (!nft_set_elem_active(&rbe->ext, iter->genmask))
- goto cont;
iter->err = iter->fn(ctx, set, iter, &rbe->priv);
if (iter->err < 0) {
diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index 819157bbb5a2c6..d5344563e525c9 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -252,10 +252,10 @@ static int nfc_llcp_setsockopt(struct socket *sock, int level, int optname,
break;
}
- if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
- err = -EFAULT;
+ err = copy_safe_from_sockptr(&opt, sizeof(opt),
+ optval, optlen);
+ if (err)
break;
- }
if (opt > LLCP_MAX_RW) {
err = -EINVAL;
@@ -274,10 +274,10 @@ static int nfc_llcp_setsockopt(struct socket *sock, int level, int optname,
break;
}
- if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
- err = -EFAULT;
+ err = copy_safe_from_sockptr(&opt, sizeof(opt),
+ optval, optlen);
+ if (err)
break;
- }
if (opt > LLCP_MAX_MIUX) {
err = -EINVAL;
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index cdad47b140fa4b..0d26c8ec9993ea 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -1516,6 +1516,11 @@ static void nci_rx_work(struct work_struct *work)
nfc_send_to_raw_sock(ndev->nfc_dev, skb,
RAW_PAYLOAD_NCI, NFC_DIRECTION_RX);
+ if (!nci_plen(skb->data)) {
+ kfree_skb(skb);
+ break;
+ }
+
/* Process frame */
switch (nci_mt(skb->data)) {
case NCI_MT_RSP_PKT:
diff --git a/net/nsh/nsh.c b/net/nsh/nsh.c
index f4a38bd6a7e04f..bfb7758063f315 100644
--- a/net/nsh/nsh.c
+++ b/net/nsh/nsh.c
@@ -77,13 +77,15 @@ EXPORT_SYMBOL_GPL(nsh_pop);
static struct sk_buff *nsh_gso_segment(struct sk_buff *skb,
netdev_features_t features)
{
+ unsigned int outer_hlen, mac_len, nsh_len;
struct sk_buff *segs = ERR_PTR(-EINVAL);
u16 mac_offset = skb->mac_header;
- unsigned int nsh_len, mac_len;
- __be16 proto;
+ __be16 outer_proto, proto;
skb_reset_network_header(skb);
+ outer_proto = skb->protocol;
+ outer_hlen = skb_mac_header_len(skb);
mac_len = skb->mac_len;
if (unlikely(!pskb_may_pull(skb, NSH_BASE_HDR_LEN)))
@@ -113,10 +115,10 @@ static struct sk_buff *nsh_gso_segment(struct sk_buff *skb,
}
for (skb = segs; skb; skb = skb->next) {
- skb->protocol = htons(ETH_P_NSH);
- __skb_push(skb, nsh_len);
- skb->mac_header = mac_offset;
- skb->network_header = skb->mac_header + mac_len;
+ skb->protocol = outer_proto;
+ __skb_push(skb, nsh_len + outer_hlen);
+ skb_reset_mac_header(skb);
+ skb_set_network_header(skb, outer_hlen);
skb->mac_len = mac_len;
}
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 3019a4406ca4f7..2928c142a2ddb3 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -1380,8 +1380,9 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
if (ct_info.timeout[0]) {
if (nf_ct_set_timeout(net, ct_info.ct, family, key->ip.proto,
ct_info.timeout))
- pr_info_ratelimited("Failed to associated timeout "
- "policy `%s'\n", ct_info.timeout);
+ OVS_NLERR(log,
+ "Failed to associated timeout policy '%s'",
+ ct_info.timeout);
else
ct_info.nf_ct_timeout = rcu_dereference(
nf_ct_timeout_find(ct_info.ct)->timeout);
@@ -1592,9 +1593,9 @@ static void ovs_ct_limit_exit(struct net *net, struct ovs_net *ovs_net)
for (i = 0; i < CT_LIMIT_HASH_BUCKETS; ++i) {
struct hlist_head *head = &info->limits[i];
struct ovs_ct_limit *ct_limit;
+ struct hlist_node *next;
- hlist_for_each_entry_rcu(ct_limit, head, hlist_node,
- lockdep_ovsl_is_held())
+ hlist_for_each_entry_safe(ct_limit, next, head, hlist_node)
kfree_rcu(ct_limit, rcu);
}
kfree(info->limits);
diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index a4e3c5de998be4..00dbcd4d28e680 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -302,7 +302,7 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args,
}
ret = PTR_ERR(trans_private);
/* Trigger connection so that its ready for the next retry */
- if (ret == -ENODEV)
+ if (ret == -ENODEV && cp)
rds_conn_connect_if_down(cp->cp_conn);
goto out;
}
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index 39945b139c4817..cd0accaf844a18 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -241,13 +241,13 @@ static int tcf_skbmod_dump(struct sk_buff *skb, struct tc_action *a,
struct tcf_skbmod *d = to_skbmod(a);
unsigned char *b = skb_tail_pointer(skb);
struct tcf_skbmod_params *p;
- struct tc_skbmod opt = {
- .index = d->tcf_index,
- .refcnt = refcount_read(&d->tcf_refcnt) - ref,
- .bindcnt = atomic_read(&d->tcf_bindcnt) - bind,
- };
+ struct tc_skbmod opt;
struct tcf_t t;
+ memset(&opt, 0, sizeof(opt));
+ opt.index = d->tcf_index;
+ opt.refcnt = refcount_read(&d->tcf_refcnt) - ref,
+ opt.bindcnt = atomic_read(&d->tcf_bindcnt) - bind;
spin_lock_bh(&d->tcf_lock);
opt.action = d->tcf_action;
p = rcu_dereference_protected(d->skbmod_p,
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 65e05b0c98e461..60239378d43fb7 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -809,7 +809,7 @@ void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len)
notify = !sch->q.qlen && !WARN_ON_ONCE(!n &&
!qdisc_is_offloaded);
/* TODO: perform the search on a per txq basis */
- sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
+ sch = qdisc_lookup_rcu(qdisc_dev(sch), TC_H_MAJ(parentid));
if (sch == NULL) {
WARN_ON_ONCE(parentid != TC_H_ROOT);
break;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index ff533649377750..4a2c763e2d1166 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -974,6 +974,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
sch->enqueue = ops->enqueue;
sch->dequeue = ops->dequeue;
sch->dev_queue = dev_queue;
+ sch->owner = -1;
netdev_hold(dev, &sch->dev_tracker, GFP_KERNEL);
refcount_set(&sch->refcnt, 1);
diff --git a/net/sunrpc/auth_gss/auth_gss_internal.h b/net/sunrpc/auth_gss/auth_gss_internal.h
index c53b329092d405..4ebc1b7043d917 100644
--- a/net/sunrpc/auth_gss/auth_gss_internal.h
+++ b/net/sunrpc/auth_gss/auth_gss_internal.h
@@ -23,7 +23,7 @@ simple_get_bytes(const void *p, const void *end, void *res, size_t len)
}
static inline const void *
-simple_get_netobj(const void *p, const void *end, struct xdr_netobj *dest)
+simple_get_netobj_noprof(const void *p, const void *end, struct xdr_netobj *dest)
{
const void *q;
unsigned int len;
@@ -35,7 +35,7 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *dest)
if (unlikely(q > end || q < p))
return ERR_PTR(-EFAULT);
if (len) {
- dest->data = kmemdup(p, len, GFP_KERNEL);
+ dest->data = kmemdup_noprof(p, len, GFP_KERNEL);
if (unlikely(dest->data == NULL))
return ERR_PTR(-ENOMEM);
} else
@@ -43,3 +43,5 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *dest)
dest->len = len;
return q;
}
+
+#define simple_get_netobj(...) alloc_hooks(simple_get_netobj_noprof(__VA_ARGS__))
diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index b2c1b683a88ee2..d2b02710ab0709 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -921,8 +921,6 @@ out_err:
* Caller provides the truncation length of the output token (h) in
* cksumout.len.
*
- * Note that for RPCSEC, the "initial cipher state" is always all zeroes.
- *
* Return values:
* %GSS_S_COMPLETE: Digest computed, @cksumout filled in
* %GSS_S_FAILURE: Call failed
@@ -933,19 +931,22 @@ u32 krb5_etm_checksum(struct crypto_sync_skcipher *cipher,
int body_offset, struct xdr_netobj *cksumout)
{
unsigned int ivsize = crypto_sync_skcipher_ivsize(cipher);
- static const u8 iv[GSS_KRB5_MAX_BLOCKSIZE];
struct ahash_request *req;
struct scatterlist sg[1];
+ u8 *iv, *checksumdata;
int err = -ENOMEM;
- u8 *checksumdata;
checksumdata = kmalloc(crypto_ahash_digestsize(tfm), GFP_KERNEL);
if (!checksumdata)
return GSS_S_FAILURE;
+ /* For RPCSEC, the "initial cipher state" is always all zeroes. */
+ iv = kzalloc(ivsize, GFP_KERNEL);
+ if (!iv)
+ goto out_free_mem;
req = ahash_request_alloc(tfm, GFP_KERNEL);
if (!req)
- goto out_free_cksumdata;
+ goto out_free_mem;
ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL);
err = crypto_ahash_init(req);
if (err)
@@ -969,7 +970,8 @@ u32 krb5_etm_checksum(struct crypto_sync_skcipher *cipher,
out_free_ahash:
ahash_request_free(req);
-out_free_cksumdata:
+out_free_mem:
+ kfree(iv);
kfree_sensitive(checksumdata);
return err ? GSS_S_FAILURE : GSS_S_COMPLETE;
}
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 545017a3daa4d6..6b3f01beb294b9 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1206,15 +1206,6 @@ err_noclose:
* MSG_SPLICE_PAGES is used exclusively to reduce the number of
* copy operations in this path. Therefore the caller must ensure
* that the pages backing @xdr are unchanging.
- *
- * Note that the send is non-blocking. The caller has incremented
- * the reference count on each page backing the RPC message, and
- * the network layer will "put" these pages when transmission is
- * complete.
- *
- * This is safe for our RPC services because the memory backing
- * the head and tail components is never kmalloc'd. These always
- * come from pages in the svc_rqst::rq_pages array.
*/
static int svc_tcp_sendmsg(struct svc_sock *svsk, struct svc_rqst *rqstp,
rpc_fraghdr marker, unsigned int *sentp)
@@ -1244,6 +1235,7 @@ static int svc_tcp_sendmsg(struct svc_sock *svsk, struct svc_rqst *rqstp,
iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec,
1 + count, sizeof(marker) + rqstp->rq_res.len);
ret = sock_sendmsg(svsk->sk_sock, &msg);
+ page_frag_free(buf);
if (ret < 0)
return ret;
*sentp += ret;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c
index f2a100c4c81f12..40797114d50a49 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_rw.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c
@@ -231,28 +231,6 @@ static void svc_rdma_write_info_free(struct svc_rdma_write_info *info)
}
/**
- * svc_rdma_write_chunk_release - Release Write chunk I/O resources
- * @rdma: controlling transport
- * @ctxt: Send context that is being released
- */
-void svc_rdma_write_chunk_release(struct svcxprt_rdma *rdma,
- struct svc_rdma_send_ctxt *ctxt)
-{
- struct svc_rdma_write_info *info;
- struct svc_rdma_chunk_ctxt *cc;
-
- while (!list_empty(&ctxt->sc_write_info_list)) {
- info = list_first_entry(&ctxt->sc_write_info_list,
- struct svc_rdma_write_info, wi_list);
- list_del(&info->wi_list);
-
- cc = &info->wi_cc;
- svc_rdma_wake_send_waiters(rdma, cc->cc_sqecount);
- svc_rdma_write_info_free(info);
- }
-}
-
-/**
* svc_rdma_reply_chunk_release - Release Reply chunk I/O resources
* @rdma: controlling transport
* @ctxt: Send context that is being released
@@ -308,11 +286,13 @@ static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
struct ib_cqe *cqe = wc->wr_cqe;
struct svc_rdma_chunk_ctxt *cc =
container_of(cqe, struct svc_rdma_chunk_ctxt, cc_cqe);
+ struct svc_rdma_write_info *info =
+ container_of(cc, struct svc_rdma_write_info, wi_cc);
switch (wc->status) {
case IB_WC_SUCCESS:
trace_svcrdma_wc_write(&cc->cc_cid);
- return;
+ break;
case IB_WC_WR_FLUSH_ERR:
trace_svcrdma_wc_write_flush(wc, &cc->cc_cid);
break;
@@ -320,11 +300,12 @@ static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
trace_svcrdma_wc_write_err(wc, &cc->cc_cid);
}
- /* The RDMA Write has flushed, so the client won't get
- * some of the outgoing RPC message. Signal the loss
- * to the client by closing the connection.
- */
- svc_xprt_deferred_close(&rdma->sc_xprt);
+ svc_rdma_wake_send_waiters(rdma, cc->cc_sqecount);
+
+ if (unlikely(wc->status != IB_WC_SUCCESS))
+ svc_xprt_deferred_close(&rdma->sc_xprt);
+
+ svc_rdma_write_info_free(info);
}
/**
@@ -620,19 +601,13 @@ static int svc_rdma_xb_write(const struct xdr_buf *xdr, void *data)
return xdr->len;
}
-/* Link Write WRs for @chunk onto @sctxt's WR chain.
- */
-static int svc_rdma_prepare_write_chunk(struct svcxprt_rdma *rdma,
- struct svc_rdma_send_ctxt *sctxt,
- const struct svc_rdma_chunk *chunk,
- const struct xdr_buf *xdr)
+static int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma,
+ const struct svc_rdma_chunk *chunk,
+ const struct xdr_buf *xdr)
{
struct svc_rdma_write_info *info;
struct svc_rdma_chunk_ctxt *cc;
- struct ib_send_wr *first_wr;
struct xdr_buf payload;
- struct list_head *pos;
- struct ib_cqe *cqe;
int ret;
if (xdr_buf_subsegment(xdr, &payload, chunk->ch_position,
@@ -648,25 +623,10 @@ static int svc_rdma_prepare_write_chunk(struct svcxprt_rdma *rdma,
if (ret != payload.len)
goto out_err;
- ret = -EINVAL;
- if (unlikely(cc->cc_sqecount > rdma->sc_sq_depth))
- goto out_err;
-
- first_wr = sctxt->sc_wr_chain;
- cqe = &cc->cc_cqe;
- list_for_each(pos, &cc->cc_rwctxts) {
- struct svc_rdma_rw_ctxt *rwc;
-
- rwc = list_entry(pos, struct svc_rdma_rw_ctxt, rw_list);
- first_wr = rdma_rw_ctx_wrs(&rwc->rw_ctx, rdma->sc_qp,
- rdma->sc_port_num, cqe, first_wr);
- cqe = NULL;
- }
- sctxt->sc_wr_chain = first_wr;
- sctxt->sc_sqecount += cc->cc_sqecount;
- list_add(&info->wi_list, &sctxt->sc_write_info_list);
-
trace_svcrdma_post_write_chunk(&cc->cc_cid, cc->cc_sqecount);
+ ret = svc_rdma_post_chunk_ctxt(rdma, cc);
+ if (ret < 0)
+ goto out_err;
return 0;
out_err:
@@ -675,27 +635,25 @@ out_err:
}
/**
- * svc_rdma_prepare_write_list - Construct WR chain for sending Write list
+ * svc_rdma_send_write_list - Send all chunks on the Write list
* @rdma: controlling RDMA transport
- * @write_pcl: Write list provisioned by the client
- * @sctxt: Send WR resources
+ * @rctxt: Write list provisioned by the client
* @xdr: xdr_buf containing an RPC Reply message
*
* Returns zero on success, or a negative errno if one or more
* Write chunks could not be sent.
*/
-int svc_rdma_prepare_write_list(struct svcxprt_rdma *rdma,
- const struct svc_rdma_pcl *write_pcl,
- struct svc_rdma_send_ctxt *sctxt,
- const struct xdr_buf *xdr)
+int svc_rdma_send_write_list(struct svcxprt_rdma *rdma,
+ const struct svc_rdma_recv_ctxt *rctxt,
+ const struct xdr_buf *xdr)
{
struct svc_rdma_chunk *chunk;
int ret;
- pcl_for_each_chunk(chunk, write_pcl) {
+ pcl_for_each_chunk(chunk, &rctxt->rc_write_pcl) {
if (!chunk->ch_payload_length)
break;
- ret = svc_rdma_prepare_write_chunk(rdma, sctxt, chunk, xdr);
+ ret = svc_rdma_send_write_chunk(rdma, chunk, xdr);
if (ret < 0)
return ret;
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index dfca39abd16c88..bb5436b719e051 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -142,7 +142,6 @@ svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma)
ctxt->sc_send_wr.sg_list = ctxt->sc_sges;
ctxt->sc_send_wr.send_flags = IB_SEND_SIGNALED;
ctxt->sc_cqe.done = svc_rdma_wc_send;
- INIT_LIST_HEAD(&ctxt->sc_write_info_list);
ctxt->sc_xprt_buf = buffer;
xdr_buf_init(&ctxt->sc_hdrbuf, ctxt->sc_xprt_buf,
rdma->sc_max_req_size);
@@ -228,7 +227,6 @@ static void svc_rdma_send_ctxt_release(struct svcxprt_rdma *rdma,
struct ib_device *device = rdma->sc_cm_id->device;
unsigned int i;
- svc_rdma_write_chunk_release(rdma, ctxt);
svc_rdma_reply_chunk_release(rdma, ctxt);
if (ctxt->sc_page_count)
@@ -1015,8 +1013,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
if (!p)
goto put_ctxt;
- ret = svc_rdma_prepare_write_list(rdma, &rctxt->rc_write_pcl, sctxt,
- &rqstp->rq_res);
+ ret = svc_rdma_send_write_list(rdma, rctxt, &rqstp->rq_res);
if (ret < 0)
goto put_ctxt;
diff --git a/net/tls/tls.h b/net/tls/tls.h
index 762f424ff2d59c..e5e47452308ab7 100644
--- a/net/tls/tls.h
+++ b/net/tls/tls.h
@@ -215,7 +215,7 @@ static inline struct sk_buff *tls_strp_msg(struct tls_sw_context_rx *ctx)
static inline bool tls_strp_msg_ready(struct tls_sw_context_rx *ctx)
{
- return ctx->strp.msg_ready;
+ return READ_ONCE(ctx->strp.msg_ready);
}
static inline bool tls_strp_msg_mixed_decrypted(struct tls_sw_context_rx *ctx)
diff --git a/net/tls/tls_strp.c b/net/tls/tls_strp.c
index ca1e0e198ceb45..5df08d848b5c9c 100644
--- a/net/tls/tls_strp.c
+++ b/net/tls/tls_strp.c
@@ -360,7 +360,7 @@ static int tls_strp_copyin(read_descriptor_t *desc, struct sk_buff *in_skb,
if (strp->stm.full_len && strp->stm.full_len == skb->len) {
desc->count = 0;
- strp->msg_ready = 1;
+ WRITE_ONCE(strp->msg_ready, 1);
tls_rx_msg_ready(strp);
}
@@ -528,7 +528,7 @@ static int tls_strp_read_sock(struct tls_strparser *strp)
if (!tls_strp_check_queue_ok(strp))
return tls_strp_read_copy(strp, false);
- strp->msg_ready = 1;
+ WRITE_ONCE(strp->msg_ready, 1);
tls_rx_msg_ready(strp);
return 0;
@@ -580,7 +580,7 @@ void tls_strp_msg_done(struct tls_strparser *strp)
else
tls_strp_flush_anchor_copy(strp);
- strp->msg_ready = 0;
+ WRITE_ONCE(strp->msg_ready, 0);
memset(&strp->stm, 0, sizeof(strp->stm));
tls_strp_check_rcv(strp);
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 211f57164cb611..b783231668c651 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -1976,10 +1976,10 @@ int tls_sw_recvmsg(struct sock *sk,
if (unlikely(flags & MSG_ERRQUEUE))
return sock_recv_errqueue(sk, msg, len, SOL_IP, IP_RECVERR);
- psock = sk_psock_get(sk);
err = tls_rx_reader_lock(sk, ctx, flags & MSG_DONTWAIT);
if (err < 0)
return err;
+ psock = sk_psock_get(sk);
bpf_strp_enabled = sk_psock_strp_enabled(psock);
/* If crypto failed the connection is broken */
@@ -2152,12 +2152,15 @@ recv_end:
}
/* Drain records from the rx_list & copy if required */
- if (is_peek || is_kvec)
+ if (is_peek)
err = process_rx_list(ctx, msg, &control, copied + peeked,
decrypted - peeked, is_peek, NULL);
else
err = process_rx_list(ctx, msg, &control, 0,
async_copy_bytes, is_peek, NULL);
+
+ /* we could have copied less than we wanted, and possibly nothing */
+ decrypted += max(err, 0) - async_copy_bytes;
}
copied += decrypted;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 5b41e2321209ae..9a6ad5974dff5e 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -2663,9 +2663,13 @@ static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,
WRITE_ONCE(u->oob_skb, NULL);
consume_skb(skb);
}
- } else if (!(flags & MSG_PEEK)) {
+ } else if (flags & MSG_PEEK) {
+ skb = NULL;
+ } else {
skb_unlink(skb, &sk->sk_receive_queue);
- consume_skb(skb);
+ WRITE_ONCE(u->oob_skb, NULL);
+ if (!WARN_ON_ONCE(skb_unref(skb)))
+ kfree_skb(skb);
skb = skb_peek(&sk->sk_receive_queue);
}
}
@@ -2739,18 +2743,16 @@ redo:
last = skb = skb_peek(&sk->sk_receive_queue);
last_len = last ? last->len : 0;
+again:
#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
if (skb) {
skb = manage_oob(skb, sk, flags, copied);
- if (!skb) {
+ if (!skb && copied) {
unix_state_unlock(sk);
- if (copied)
- break;
- goto redo;
+ break;
}
}
#endif
-again:
if (skb == NULL) {
if (copied >= target)
goto unlock;
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index fa39b626523851..0104be9d470456 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -274,11 +274,22 @@ static void __unix_gc(struct work_struct *work)
* receive queues. Other, non candidate sockets _can_ be
* added to queue, so we must make sure only to touch
* candidates.
+ *
+ * Embryos, though never candidates themselves, affect which
+ * candidates are reachable by the garbage collector. Before
+ * being added to a listener's queue, an embryo may already
+ * receive data carrying SCM_RIGHTS, potentially making the
+ * passed socket a candidate that is not yet reachable by the
+ * collector. It becomes reachable once the embryo is
+ * enqueued. Therefore, we must ensure that no SCM-laden
+ * embryo appears in a (candidate) listener's queue between
+ * consecutive scan_children() calls.
*/
list_for_each_entry_safe(u, next, &gc_inflight_list, link) {
+ struct sock *sk = &u->sk;
long total_refs;
- total_refs = file_count(u->sk.sk_socket->file);
+ total_refs = file_count(sk->sk_socket->file);
WARN_ON_ONCE(!u->inflight);
WARN_ON_ONCE(total_refs < u->inflight);
@@ -286,6 +297,11 @@ static void __unix_gc(struct work_struct *work)
list_move_tail(&u->link, &gc_candidates);
__set_bit(UNIX_GC_CANDIDATE, &u->gc_flags);
__set_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags);
+
+ if (sk->sk_state == TCP_LISTEN) {
+ unix_state_lock_nested(sk, U_LOCK_GC_LISTENER);
+ unix_state_unlock(sk);
+ }
}
}
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index 1748268e0694f2..ee5d306a96d0f8 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -120,7 +120,6 @@ virtio_transport_send_pkt_work(struct work_struct *work)
if (!skb)
break;
- virtio_transport_deliver_tap_pkt(skb);
reply = virtio_vsock_skb_reply(skb);
sgs = vsock->out_sgs;
sg_init_one(sgs[out_sg], virtio_vsock_hdr(skb),
@@ -170,6 +169,8 @@ virtio_transport_send_pkt_work(struct work_struct *work)
break;
}
+ virtio_transport_deliver_tap_pkt(skb);
+
if (reply) {
struct virtqueue *rx_vq = vsock->vqs[VSOCK_VQ_RX];
int val;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index b4edba6b0b7ba0..30ff9a47081348 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -14030,6 +14030,8 @@ static int nl80211_set_coalesce(struct sk_buff *skb, struct genl_info *info)
error:
for (i = 0; i < new_coalesce.n_rules; i++) {
tmp_rule = &new_coalesce.rules[i];
+ if (!tmp_rule)
+ continue;
for (j = 0; j < tmp_rule->n_patterns; j++)
kfree(tmp_rule->patterns[j].mask);
kfree(tmp_rule->patterns);
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index e039e66ab37774..df013c98b80dfb 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -1024,7 +1024,7 @@ TRACE_EVENT(rdev_get_mpp,
TRACE_EVENT(rdev_dump_mpp,
TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, int _idx,
u8 *dst, u8 *mpp),
- TP_ARGS(wiphy, netdev, _idx, mpp, dst),
+ TP_ARGS(wiphy, netdev, _idx, dst, mpp),
TP_STRUCT__entry(
WIPHY_ENTRY
NETDEV_ENTRY
@@ -1758,7 +1758,7 @@ TRACE_EVENT(rdev_return_void_tx_rx,
DECLARE_EVENT_CLASS(tx_rx_evt,
TP_PROTO(struct wiphy *wiphy, u32 tx, u32 rx),
- TP_ARGS(wiphy, rx, tx),
+ TP_ARGS(wiphy, tx, rx),
TP_STRUCT__entry(
WIPHY_ENTRY
__field(u32, tx)
@@ -1775,7 +1775,7 @@ DECLARE_EVENT_CLASS(tx_rx_evt,
DEFINE_EVENT(tx_rx_evt, rdev_set_antenna,
TP_PROTO(struct wiphy *wiphy, u32 tx, u32 rx),
- TP_ARGS(wiphy, rx, tx)
+ TP_ARGS(wiphy, tx, rx)
);
DECLARE_EVENT_CLASS(wiphy_netdev_id_evt,
diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c
index a161c64d1765e6..838ad6541a17d8 100644
--- a/net/wireless/wext-core.c
+++ b/net/wireless/wext-core.c
@@ -4,6 +4,7 @@
* Authors : Jean Tourrilhes - HPL - <jt@hpl.hp.com>
* Copyright (c) 1997-2007 Jean Tourrilhes, All Rights Reserved.
* Copyright 2009 Johannes Berg <johannes@sipsolutions.net>
+ * Copyright (C) 2024 Intel Corporation
*
* (As all part of the Linux kernel, this file is GPL)
*/
@@ -662,7 +663,8 @@ struct iw_statistics *get_wireless_stats(struct net_device *dev)
dev->ieee80211_ptr->wiphy->wext &&
dev->ieee80211_ptr->wiphy->wext->get_wireless_stats) {
wireless_warn_cfg80211_wext();
- if (dev->ieee80211_ptr->wiphy->flags & WIPHY_FLAG_SUPPORTS_MLO)
+ if (dev->ieee80211_ptr->wiphy->flags & (WIPHY_FLAG_SUPPORTS_MLO |
+ WIPHY_FLAG_DISABLE_WEXT))
return NULL;
return dev->ieee80211_ptr->wiphy->wext->get_wireless_stats(dev);
}
@@ -704,7 +706,8 @@ static iw_handler get_handler(struct net_device *dev, unsigned int cmd)
#ifdef CONFIG_CFG80211_WEXT
if (dev->ieee80211_ptr && dev->ieee80211_ptr->wiphy) {
wireless_warn_cfg80211_wext();
- if (dev->ieee80211_ptr->wiphy->flags & WIPHY_FLAG_SUPPORTS_MLO)
+ if (dev->ieee80211_ptr->wiphy->flags & (WIPHY_FLAG_SUPPORTS_MLO |
+ WIPHY_FLAG_DISABLE_WEXT))
return NULL;
handlers = dev->ieee80211_ptr->wiphy->wext;
}
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 3404d076a8a3e6..727aa20be4bde8 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -1417,6 +1417,8 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
struct xsk_queue **q;
int entries;
+ if (optlen < sizeof(entries))
+ return -EINVAL;
if (copy_from_sockptr(&entries, optval, sizeof(entries)))
return -EFAULT;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 6affe5cd85d8f1..53d8fabfa68580 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -3593,6 +3593,8 @@ xfrm_policy *xfrm_in_fwd_icmp(struct sk_buff *skb,
return pol;
pol = xfrm_policy_lookup(net, &fl1, family, XFRM_POLICY_FWD, if_id);
+ if (IS_ERR(pol))
+ pol = NULL;
}
return pol;
diff --git a/rust/Makefile b/rust/Makefile
index 846e6ab9d5a9b6..86a125c4243cb2 100644
--- a/rust/Makefile
+++ b/rust/Makefile
@@ -175,7 +175,6 @@ quiet_cmd_rustdoc_test_kernel = RUSTDOC TK $<
mkdir -p $(objtree)/$(obj)/test/doctests/kernel; \
OBJTREE=$(abspath $(objtree)) \
$(RUSTDOC) --test $(rust_flags) \
- @$(objtree)/include/generated/rustc_cfg \
-L$(objtree)/$(obj) --extern alloc --extern kernel \
--extern build_error --extern macros \
--extern bindings --extern uapi \
diff --git a/rust/helpers.c b/rust/helpers.c
index 70e59efd92bc43..858d802abd115e 100644
--- a/rust/helpers.c
+++ b/rust/helpers.c
@@ -28,6 +28,7 @@
#include <linux/mutex.h>
#include <linux/refcount.h>
#include <linux/sched/signal.h>
+#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/wait.h>
#include <linux/workqueue.h>
@@ -157,6 +158,13 @@ void rust_helper_init_work_with_key(struct work_struct *work, work_func_t func,
}
EXPORT_SYMBOL_GPL(rust_helper_init_work_with_key);
+void * __must_check __realloc_size(2)
+rust_helper_krealloc(const void *objp, size_t new_size, gfp_t flags)
+{
+ return krealloc(objp, new_size, flags);
+}
+EXPORT_SYMBOL_GPL(rust_helper_krealloc);
+
/*
* `bindgen` binds the C `size_t` type as the Rust `usize` type, so we can
* use it in contexts where Rust expects a `usize` like slice (array) indices.
diff --git a/rust/kernel/init.rs b/rust/kernel/init.rs
index 424257284d167b..09004b56fb65c7 100644
--- a/rust/kernel/init.rs
+++ b/rust/kernel/init.rs
@@ -1292,8 +1292,15 @@ impl_zeroable! {
i8, i16, i32, i64, i128, isize,
f32, f64,
- // SAFETY: These are ZSTs, there is nothing to zero.
- {<T: ?Sized>} PhantomData<T>, core::marker::PhantomPinned, Infallible, (),
+ // Note: do not add uninhabited types (such as `!` or `core::convert::Infallible`) to this list;
+ // creating an instance of an uninhabited type is immediate undefined behavior. For more on
+ // uninhabited/empty types, consult The Rustonomicon:
+ // <https://doc.rust-lang.org/stable/nomicon/exotic-sizes.html#empty-types>. The Rust Reference
+ // also has information on undefined behavior:
+ // <https://doc.rust-lang.org/stable/reference/behavior-considered-undefined.html>.
+ //
+ // SAFETY: These are inhabited ZSTs; there is nothing to zero and a valid value exists.
+ {<T: ?Sized>} PhantomData<T>, core::marker::PhantomPinned, (),
// SAFETY: Type is allowed to take any value, including all zeros.
{<T>} MaybeUninit<T>,
diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs
index be68d5e567b1a1..6858e2f8a3ed97 100644
--- a/rust/kernel/lib.rs
+++ b/rust/kernel/lib.rs
@@ -65,7 +65,7 @@ const __LOG_PREFIX: &[u8] = b"rust_kernel\0";
/// The top level entrypoint to implementing a kernel module.
///
/// For any teardown or cleanup operations, your type may implement [`Drop`].
-pub trait Module: Sized + Sync {
+pub trait Module: Sized + Sync + Send {
/// Called at module initialization time.
///
/// Use this method to perform whatever setup or registration your module
diff --git a/rust/kernel/net/phy.rs b/rust/kernel/net/phy.rs
index 96e09c6e8530b4..265d0e1c13710a 100644
--- a/rust/kernel/net/phy.rs
+++ b/rust/kernel/net/phy.rs
@@ -640,6 +640,10 @@ pub struct Registration {
drivers: Pin<&'static mut [DriverVTable]>,
}
+// SAFETY: The only action allowed in a `Registration` instance is dropping it, which is safe to do
+// from any thread because `phy_drivers_unregister` can be called from any thread context.
+unsafe impl Send for Registration {}
+
impl Registration {
/// Registers a PHY driver.
pub fn register(
diff --git a/rust/macros/lib.rs b/rust/macros/lib.rs
index f489f315738323..520eae5fd79281 100644
--- a/rust/macros/lib.rs
+++ b/rust/macros/lib.rs
@@ -35,18 +35,6 @@ use proc_macro::TokenStream;
/// author: "Rust for Linux Contributors",
/// description: "My very own kernel module!",
/// license: "GPL",
-/// params: {
-/// my_i32: i32 {
-/// default: 42,
-/// permissions: 0o000,
-/// description: "Example of i32",
-/// },
-/// writeable_i32: i32 {
-/// default: 42,
-/// permissions: 0o644,
-/// description: "Example of i32",
-/// },
-/// },
/// }
///
/// struct MyModule;
diff --git a/rust/macros/module.rs b/rust/macros/module.rs
index 27979e582e4b94..acd0393b509574 100644
--- a/rust/macros/module.rs
+++ b/rust/macros/module.rs
@@ -199,17 +199,6 @@ pub(crate) fn module(ts: TokenStream) -> TokenStream {
/// Used by the printing macros, e.g. [`info!`].
const __LOG_PREFIX: &[u8] = b\"{name}\\0\";
- /// The \"Rust loadable module\" mark.
- //
- // This may be best done another way later on, e.g. as a new modinfo
- // key or a new section. For the moment, keep it simple.
- #[cfg(MODULE)]
- #[doc(hidden)]
- #[used]
- static __IS_RUST_MODULE: () = ();
-
- static mut __MOD: Option<{type_}> = None;
-
// SAFETY: `__this_module` is constructed by the kernel at load time and will not be
// freed until the module is unloaded.
#[cfg(MODULE)]
@@ -221,81 +210,132 @@ pub(crate) fn module(ts: TokenStream) -> TokenStream {
kernel::ThisModule::from_ptr(core::ptr::null_mut())
}};
- // Loadable modules need to export the `{{init,cleanup}}_module` identifiers.
- /// # Safety
- ///
- /// This function must not be called after module initialization, because it may be
- /// freed after that completes.
- #[cfg(MODULE)]
- #[doc(hidden)]
- #[no_mangle]
- #[link_section = \".init.text\"]
- pub unsafe extern \"C\" fn init_module() -> core::ffi::c_int {{
- __init()
- }}
-
- #[cfg(MODULE)]
- #[doc(hidden)]
- #[no_mangle]
- pub extern \"C\" fn cleanup_module() {{
- __exit()
- }}
+ // Double nested modules, since then nobody can access the public items inside.
+ mod __module_init {{
+ mod __module_init {{
+ use super::super::{type_};
+
+ /// The \"Rust loadable module\" mark.
+ //
+ // This may be best done another way later on, e.g. as a new modinfo
+ // key or a new section. For the moment, keep it simple.
+ #[cfg(MODULE)]
+ #[doc(hidden)]
+ #[used]
+ static __IS_RUST_MODULE: () = ();
+
+ static mut __MOD: Option<{type_}> = None;
+
+ // Loadable modules need to export the `{{init,cleanup}}_module` identifiers.
+ /// # Safety
+ ///
+ /// This function must not be called after module initialization, because it may be
+ /// freed after that completes.
+ #[cfg(MODULE)]
+ #[doc(hidden)]
+ #[no_mangle]
+ #[link_section = \".init.text\"]
+ pub unsafe extern \"C\" fn init_module() -> core::ffi::c_int {{
+ // SAFETY: This function is inaccessible to the outside due to the double
+ // module wrapping it. It is called exactly once by the C side via its
+ // unique name.
+ unsafe {{ __init() }}
+ }}
- // Built-in modules are initialized through an initcall pointer
- // and the identifiers need to be unique.
- #[cfg(not(MODULE))]
- #[cfg(not(CONFIG_HAVE_ARCH_PREL32_RELOCATIONS))]
- #[doc(hidden)]
- #[link_section = \"{initcall_section}\"]
- #[used]
- pub static __{name}_initcall: extern \"C\" fn() -> core::ffi::c_int = __{name}_init;
+ #[cfg(MODULE)]
+ #[doc(hidden)]
+ #[no_mangle]
+ pub extern \"C\" fn cleanup_module() {{
+ // SAFETY:
+ // - This function is inaccessible to the outside due to the double
+ // module wrapping it. It is called exactly once by the C side via its
+ // unique name,
+ // - furthermore it is only called after `init_module` has returned `0`
+ // (which delegates to `__init`).
+ unsafe {{ __exit() }}
+ }}
- #[cfg(not(MODULE))]
- #[cfg(CONFIG_HAVE_ARCH_PREL32_RELOCATIONS)]
- core::arch::global_asm!(
- r#\".section \"{initcall_section}\", \"a\"
- __{name}_initcall:
- .long __{name}_init - .
- .previous
- \"#
- );
+ // Built-in modules are initialized through an initcall pointer
+ // and the identifiers need to be unique.
+ #[cfg(not(MODULE))]
+ #[cfg(not(CONFIG_HAVE_ARCH_PREL32_RELOCATIONS))]
+ #[doc(hidden)]
+ #[link_section = \"{initcall_section}\"]
+ #[used]
+ pub static __{name}_initcall: extern \"C\" fn() -> core::ffi::c_int = __{name}_init;
+
+ #[cfg(not(MODULE))]
+ #[cfg(CONFIG_HAVE_ARCH_PREL32_RELOCATIONS)]
+ core::arch::global_asm!(
+ r#\".section \"{initcall_section}\", \"a\"
+ __{name}_initcall:
+ .long __{name}_init - .
+ .previous
+ \"#
+ );
+
+ #[cfg(not(MODULE))]
+ #[doc(hidden)]
+ #[no_mangle]
+ pub extern \"C\" fn __{name}_init() -> core::ffi::c_int {{
+ // SAFETY: This function is inaccessible to the outside due to the double
+ // module wrapping it. It is called exactly once by the C side via its
+ // placement above in the initcall section.
+ unsafe {{ __init() }}
+ }}
- #[cfg(not(MODULE))]
- #[doc(hidden)]
- #[no_mangle]
- pub extern \"C\" fn __{name}_init() -> core::ffi::c_int {{
- __init()
- }}
+ #[cfg(not(MODULE))]
+ #[doc(hidden)]
+ #[no_mangle]
+ pub extern \"C\" fn __{name}_exit() {{
+ // SAFETY:
+ // - This function is inaccessible to the outside due to the double
+ // module wrapping it. It is called exactly once by the C side via its
+ // unique name,
+ // - furthermore it is only called after `__{name}_init` has returned `0`
+ // (which delegates to `__init`).
+ unsafe {{ __exit() }}
+ }}
- #[cfg(not(MODULE))]
- #[doc(hidden)]
- #[no_mangle]
- pub extern \"C\" fn __{name}_exit() {{
- __exit()
- }}
+ /// # Safety
+ ///
+ /// This function must only be called once.
+ unsafe fn __init() -> core::ffi::c_int {{
+ match <{type_} as kernel::Module>::init(&super::super::THIS_MODULE) {{
+ Ok(m) => {{
+ // SAFETY: No data race, since `__MOD` can only be accessed by this
+ // module and there only `__init` and `__exit` access it. These
+ // functions are only called once and `__exit` cannot be called
+ // before or during `__init`.
+ unsafe {{
+ __MOD = Some(m);
+ }}
+ return 0;
+ }}
+ Err(e) => {{
+ return e.to_errno();
+ }}
+ }}
+ }}
- fn __init() -> core::ffi::c_int {{
- match <{type_} as kernel::Module>::init(&THIS_MODULE) {{
- Ok(m) => {{
+ /// # Safety
+ ///
+ /// This function must
+ /// - only be called once,
+ /// - be called after `__init` has been called and returned `0`.
+ unsafe fn __exit() {{
+ // SAFETY: No data race, since `__MOD` can only be accessed by this module
+ // and there only `__init` and `__exit` access it. These functions are only
+ // called once and `__init` was already called.
unsafe {{
- __MOD = Some(m);
+ // Invokes `drop()` on `__MOD`, which should be used for cleanup.
+ __MOD = None;
}}
- return 0;
- }}
- Err(e) => {{
- return e.to_errno();
}}
- }}
- }}
- fn __exit() {{
- unsafe {{
- // Invokes `drop()` on `__MOD`, which should be used for cleanup.
- __MOD = None;
+ {modinfo}
}}
}}
-
- {modinfo}
",
type_ = info.type_,
name = info.name,
diff --git a/samples/kfifo/dma-example.c b/samples/kfifo/dma-example.c
index 0cf27483cb3613..74fe915b7ffe8c 100644
--- a/samples/kfifo/dma-example.c
+++ b/samples/kfifo/dma-example.c
@@ -6,8 +6,9 @@
*/
#include <linux/init.h>
-#include <linux/module.h>
#include <linux/kfifo.h>
+#include <linux/module.h>
+#include <linux/scatterlist.h>
/*
* This module shows how to handle fifo dma operations.
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index baf86c0880b6d7..533a7799fdfe6d 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -273,7 +273,7 @@ rust_common_cmd = \
-Zallow-features=$(rust_allowed_features) \
-Zcrate-attr=no_std \
-Zcrate-attr='feature($(rust_allowed_features))' \
- --extern alloc --extern kernel \
+ -Zunstable-options --extern force:alloc --extern kernel \
--crate-type rlib -L $(objtree)/rust/ \
--crate-name $(basename $(notdir $@)) \
--sysroot=/dev/null \
diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn
index 3ce5d503a6da98..1d13cecc7cc780 100644
--- a/scripts/Makefile.extrawarn
+++ b/scripts/Makefile.extrawarn
@@ -37,11 +37,6 @@ else
KBUILD_CFLAGS += -Wno-main
endif
-# These warnings generated too much noise in a regular build.
-# Use make W=1 to enable them (see scripts/Makefile.extrawarn)
-KBUILD_CFLAGS += $(call cc-disable-warning, unused-but-set-variable)
-KBUILD_CFLAGS += $(call cc-disable-warning, unused-const-variable)
-
# These result in bogus false positives
KBUILD_CFLAGS += $(call cc-disable-warning, dangling-pointer)
@@ -82,22 +77,17 @@ KBUILD_CFLAGS += $(call cc-option,-Werror=designated-init)
# Warn if there is an enum types mismatch
KBUILD_CFLAGS += $(call cc-option,-Wenum-conversion)
+KBUILD_CFLAGS += -Wextra
+KBUILD_CFLAGS += -Wunused
+
#
# W=1 - warnings which may be relevant and do not occur too often
#
ifneq ($(findstring 1, $(KBUILD_EXTRA_WARN)),)
-KBUILD_CFLAGS += -Wextra -Wunused -Wno-unused-parameter
-KBUILD_CFLAGS += $(call cc-option, -Wrestrict)
KBUILD_CFLAGS += -Wmissing-format-attribute
-KBUILD_CFLAGS += -Wold-style-definition
KBUILD_CFLAGS += -Wmissing-include-dirs
-KBUILD_CFLAGS += $(call cc-option, -Wunused-but-set-variable)
KBUILD_CFLAGS += $(call cc-option, -Wunused-const-variable)
-KBUILD_CFLAGS += $(call cc-option, -Wpacked-not-aligned)
-KBUILD_CFLAGS += $(call cc-option, -Wformat-overflow)
-KBUILD_CFLAGS += $(call cc-option, -Wformat-truncation)
-KBUILD_CFLAGS += $(call cc-option, -Wstringop-truncation)
KBUILD_CPPFLAGS += -Wundef
KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN1
@@ -108,12 +98,20 @@ else
# Suppress them by using -Wno... except for W=1.
KBUILD_CFLAGS += $(call cc-disable-warning, unused-but-set-variable)
KBUILD_CFLAGS += $(call cc-disable-warning, unused-const-variable)
-KBUILD_CFLAGS += $(call cc-disable-warning, restrict)
KBUILD_CFLAGS += $(call cc-disable-warning, packed-not-aligned)
KBUILD_CFLAGS += $(call cc-disable-warning, format-overflow)
+ifdef CONFIG_CC_IS_GCC
KBUILD_CFLAGS += $(call cc-disable-warning, format-truncation)
+else
+# Clang checks for overflow/truncation with '%p', while GCC does not:
+# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111219
+KBUILD_CFLAGS += $(call cc-disable-warning, format-overflow-non-kprintf)
+KBUILD_CFLAGS += $(call cc-disable-warning, format-truncation-non-kprintf)
+endif
KBUILD_CFLAGS += $(call cc-disable-warning, stringop-truncation)
+KBUILD_CFLAGS += -Wno-override-init # alias for -Wno-initializer-overrides in clang
+
ifdef CONFIG_CC_IS_CLANG
# Clang before clang-16 would warn on default argument promotions.
ifneq ($(call clang-min-version, 160000),y)
@@ -131,7 +129,6 @@ endif
KBUILD_CFLAGS += $(call cc-disable-warning, pointer-to-enum-cast)
KBUILD_CFLAGS += -Wno-tautological-constant-out-of-range-compare
KBUILD_CFLAGS += $(call cc-disable-warning, unaligned-access)
-KBUILD_CFLAGS += $(call cc-disable-warning, cast-function-type-strict)
KBUILD_CFLAGS += -Wno-enum-compare-conditional
KBUILD_CFLAGS += -Wno-enum-enum-conversion
endif
@@ -146,15 +143,8 @@ ifneq ($(findstring 2, $(KBUILD_EXTRA_WARN)),)
KBUILD_CFLAGS += -Wdisabled-optimization
KBUILD_CFLAGS += -Wshadow
KBUILD_CFLAGS += $(call cc-option, -Wlogical-op)
-KBUILD_CFLAGS += -Wmissing-field-initializers
-KBUILD_CFLAGS += -Wtype-limits
-KBUILD_CFLAGS += $(call cc-option, -Wmaybe-uninitialized)
KBUILD_CFLAGS += $(call cc-option, -Wunused-macros)
-ifdef CONFIG_CC_IS_CLANG
-KBUILD_CFLAGS += -Winitializer-overrides
-endif
-
KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN2
else
@@ -164,9 +154,7 @@ KBUILD_CFLAGS += -Wno-missing-field-initializers
KBUILD_CFLAGS += -Wno-type-limits
KBUILD_CFLAGS += -Wno-shift-negative-value
-ifdef CONFIG_CC_IS_CLANG
-KBUILD_CFLAGS += -Wno-initializer-overrides
-else
+ifdef CONFIG_CC_IS_GCC
KBUILD_CFLAGS += -Wno-maybe-uninitialized
endif
@@ -194,6 +182,7 @@ else
# The following turn off the warnings enabled by -Wextra
KBUILD_CFLAGS += -Wno-sign-compare
+KBUILD_CFLAGS += -Wno-unused-parameter
endif
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index 3179747cbd2cc0..20e76d89c1864d 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -410,7 +410,7 @@ $(multi-dtb-y): FORCE
$(call if_changed,fdtoverlay)
$(call multi_depend, $(multi-dtb-y), .dtb, -dtbs)
-ifneq ($(CHECK_DTBS)$(CHECK_DT_BINDING),)
+ifneq ($(CHECK_DTBS),)
DT_CHECKER ?= dt-validate
DT_CHECKER_FLAGS ?= $(if $(DT_SCHEMA_FILES),-l $(DT_SCHEMA_FILES),-m)
DT_BINDING_DIR := Documentation/devicetree/bindings
@@ -504,6 +504,22 @@ quiet_cmd_uimage = UIMAGE $@
-a $(UIMAGE_LOADADDR) -e $(UIMAGE_ENTRYADDR) \
-n '$(UIMAGE_NAME)' -d $< $@
+# Flat Image Tree (FIT)
+# This allows for packaging of a kernel and all devicetrees files, using
+# compression.
+# ---------------------------------------------------------------------------
+
+MAKE_FIT := $(srctree)/scripts/make_fit.py
+
+# Use this to override the compression algorithm
+FIT_COMPRESSION ?= gzip
+
+quiet_cmd_fit = FIT $@
+ cmd_fit = $(MAKE_FIT) -o $@ --arch $(UIMAGE_ARCH) --os linux \
+ --name '$(UIMAGE_NAME)' \
+ $(if $(findstring 1,$(KBUILD_VERBOSE)),-v) \
+ --compress $(FIT_COMPRESSION) -k $< @$(word 2,$^)
+
# XZ
# ---------------------------------------------------------------------------
# Use xzkern to compress the kernel image and xzmisc to compress other things.
diff --git a/scripts/Makefile.modfinal b/scripts/Makefile.modfinal
index 8568d256d6fbff..79fcf27316864f 100644
--- a/scripts/Makefile.modfinal
+++ b/scripts/Makefile.modfinal
@@ -23,7 +23,7 @@ modname = $(notdir $(@:.mod.o=))
part-of-module = y
quiet_cmd_cc_o_c = CC [M] $@
- cmd_cc_o_c = $(CC) $(filter-out $(CC_FLAGS_CFI) $(CFLAGS_GCOV), $(c_flags)) -c -o $@ $<
+ cmd_cc_o_c = $(CC) $(filter-out $(CC_FLAGS_CFI) $(CFLAGS_GCOV) $(CFLAGS_KCSAN), $(c_flags)) -c -o $@ $<
%.mod.o: %.mod.c FORCE
$(call if_changed_dep,cc_o_c)
diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py
index 4606944984ee2e..c55878bddfddc4 100755
--- a/scripts/bpf_doc.py
+++ b/scripts/bpf_doc.py
@@ -414,8 +414,8 @@ class PrinterRST(Printer):
version = version.stdout.decode().rstrip()
except:
try:
- version = subprocess.run(['make', 'kernelversion'], cwd=linuxRoot,
- capture_output=True, check=True)
+ version = subprocess.run(['make', '-s', '--no-print-directory', 'kernelversion'],
+ cwd=linuxRoot, capture_output=True, check=True)
version = version.stdout.decode().rstrip()
except:
return 'Linux'
diff --git a/scripts/gcc-plugins/stackleak_plugin.c b/scripts/gcc-plugins/stackleak_plugin.c
index c5c2ce113c9232..d20c47d21ad835 100644
--- a/scripts/gcc-plugins/stackleak_plugin.c
+++ b/scripts/gcc-plugins/stackleak_plugin.c
@@ -467,6 +467,8 @@ static bool stackleak_gate(void)
return false;
if (STRING_EQUAL(section, ".entry.text"))
return false;
+ if (STRING_EQUAL(section, ".head.text"))
+ return false;
}
return track_frame_size >= 0;
diff --git a/scripts/gdb/linux/cpus.py b/scripts/gdb/linux/cpus.py
index cba589e5b57d61..2f11c4f9c345a0 100644
--- a/scripts/gdb/linux/cpus.py
+++ b/scripts/gdb/linux/cpus.py
@@ -26,11 +26,7 @@ def get_current_cpu():
if utils.get_gdbserver_type() == utils.GDBSERVER_QEMU:
return gdb.selected_thread().num - 1
elif utils.get_gdbserver_type() == utils.GDBSERVER_KGDB:
- tid = gdb.selected_thread().ptid[2]
- if tid > (0x100000000 - MAX_CPUS - 2):
- return 0x100000000 - tid - 2
- else:
- return tasks.get_thread_info(tasks.get_task_by_pid(tid))['cpu']
+ return gdb.parse_and_eval("kgdb_active.counter")
else:
raise gdb.GdbError("Sorry, obtaining the current CPU is not yet "
"supported with this gdb server.")
@@ -152,9 +148,8 @@ Note that VAR has to be quoted as string."""
def __init__(self):
super(PerCpu, self).__init__("lx_per_cpu")
- def invoke(self, var_name, cpu=-1):
- var_ptr = gdb.parse_and_eval("&" + var_name.string())
- return per_cpu(var_ptr, cpu)
+ def invoke(self, var, cpu=-1):
+ return per_cpu(var.address, cpu)
PerCpu()
diff --git a/scripts/gdb/linux/tasks.py b/scripts/gdb/linux/tasks.py
index 6793d6e86e777b..62348397c1f5c8 100644
--- a/scripts/gdb/linux/tasks.py
+++ b/scripts/gdb/linux/tasks.py
@@ -85,7 +85,7 @@ thread_info_type = utils.CachedType("struct thread_info")
def get_thread_info(task):
thread_info_ptr_type = thread_info_type.get_type().pointer()
- if task.type.fields()[0].type == thread_info_type.get_type():
+ if task_type.get_type().fields()[0].type == thread_info_type.get_type():
return task['thread_info']
thread_info = task['stack'].cast(thread_info_ptr_type)
return thread_info.dereference()
diff --git a/scripts/gdb/linux/utils.py b/scripts/gdb/linux/utils.py
index 7d5278d815fa15..245ab297ea84a0 100644
--- a/scripts/gdb/linux/utils.py
+++ b/scripts/gdb/linux/utils.py
@@ -196,7 +196,7 @@ def get_gdbserver_type():
def probe_kgdb():
try:
thread_info = gdb.execute("info thread 2", to_string=True)
- return "shadowCPU0" in thread_info
+ return "shadowCPU" in thread_info
except gdb.error:
return False
diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c
index 653b92f6d4c8f4..47978efe4797c2 100644
--- a/scripts/kallsyms.c
+++ b/scripts/kallsyms.c
@@ -204,6 +204,11 @@ static int symbol_in_range(const struct sym_entry *s,
return 0;
}
+static bool string_starts_with(const char *s, const char *prefix)
+{
+ return strncmp(s, prefix, strlen(prefix)) == 0;
+}
+
static int symbol_valid(const struct sym_entry *s)
{
const char *name = sym_name(s);
@@ -211,6 +216,14 @@ static int symbol_valid(const struct sym_entry *s)
/* if --all-symbols is not specified, then symbols outside the text
* and inittext sections are discarded */
if (!all_symbols) {
+ /*
+ * Symbols starting with __start and __stop are used to denote
+ * section boundaries, and should always be included:
+ */
+ if (string_starts_with(name, "__start_") ||
+ string_starts_with(name, "__stop_"))
+ return 1;
+
if (symbol_in_range(s, text_ranges,
ARRAY_SIZE(text_ranges)) == 0)
return 0;
diff --git a/scripts/kconfig/conf.c b/scripts/kconfig/conf.c
index b5730061872bae..0156ca13f949c5 100644
--- a/scripts/kconfig/conf.c
+++ b/scripts/kconfig/conf.c
@@ -497,9 +497,8 @@ static int conf_choice(struct menu *menu)
printf("%*c", indent, '>');
} else
printf("%*c", indent, ' ');
- printf(" %d. %s", cnt, menu_get_prompt(child));
- if (child->sym->name)
- printf(" (%s)", child->sym->name);
+ printf(" %d. %s (%s)", cnt, menu_get_prompt(child),
+ child->sym->name);
if (!sym_has_value(child->sym))
printf(" (NEW)");
printf("\n");
@@ -552,11 +551,6 @@ static int conf_choice(struct menu *menu)
continue;
}
sym_set_tristate_value(child->sym, yes);
- for (child = child->list; child; child = child->next) {
- indent += 2;
- conf(child);
- indent -= 2;
- }
return 1;
}
}
diff --git a/scripts/kconfig/confdata.c b/scripts/kconfig/confdata.c
index 0e35c4819cf1d2..bcce87658998c4 100644
--- a/scripts/kconfig/confdata.c
+++ b/scripts/kconfig/confdata.c
@@ -793,30 +793,23 @@ int conf_write_defconfig(const char *filename)
sym_clear_all_valid();
- /* Traverse all menus to find all relevant symbols */
- menu = rootmenu.list;
-
- while (menu != NULL)
- {
+ menu_for_each_entry(menu) {
sym = menu->sym;
if (sym && !sym_is_choice(sym)) {
sym_calc_value(sym);
if (!(sym->flags & SYMBOL_WRITE))
- goto next_menu;
+ continue;
sym->flags &= ~SYMBOL_WRITE;
/* If we cannot change the symbol - skip */
if (!sym_is_changeable(sym))
- goto next_menu;
+ continue;
/* If symbol equals to default value - skip */
if (strcmp(sym_get_string_value(sym), sym_get_string_default(sym)) == 0)
- goto next_menu;
+ continue;
/*
* If symbol is a choice value and equals to the
* default for a choice - skip.
- * But only if value is bool and equal to "y" and
- * choice is not "optional".
- * (If choice is "optional" then all values can be "n")
*/
if (sym_is_choice_value(sym)) {
struct symbol *cs;
@@ -824,28 +817,14 @@ int conf_write_defconfig(const char *filename)
cs = prop_get_symbol(sym_get_choice_prop(sym));
ds = sym_choice_default(cs);
- if (!sym_is_optional(cs) && sym == ds) {
+ if (sym == ds) {
if ((sym->type == S_BOOLEAN) &&
sym_get_tristate_value(sym) == yes)
- goto next_menu;
+ continue;
}
}
print_symbol_for_dotconfig(out, sym);
}
-next_menu:
- if (menu->list != NULL) {
- menu = menu->list;
- }
- else if (menu->next != NULL) {
- menu = menu->next;
- } else {
- while ((menu = menu->parent)) {
- if (menu->next != NULL) {
- menu = menu->next;
- break;
- }
- }
- }
}
fclose(out);
return 0;
@@ -906,7 +885,7 @@ int conf_write(const char *name)
"# %s\n"
"#\n", str);
need_newline = false;
- } else if (!(sym->flags & SYMBOL_CHOICE) &&
+ } else if (!sym_is_choice(sym) &&
!(sym->flags & SYMBOL_WRITTEN)) {
sym_calc_value(sym);
if (!(sym->flags & SYMBOL_WRITE))
diff --git a/scripts/kconfig/expr.h b/scripts/kconfig/expr.h
index 0158f5eac45428..f646a98de0063a 100644
--- a/scripts/kconfig/expr.h
+++ b/scripts/kconfig/expr.h
@@ -72,8 +72,7 @@ enum {
/*
* Represents a configuration symbol.
*
- * Choices are represented as a special kind of symbol and have the
- * SYMBOL_CHOICE bit set in 'flags'.
+ * Choices are represented as a special kind of symbol with null name.
*/
struct symbol {
/* link node for the hash table */
@@ -131,10 +130,8 @@ struct symbol {
#define SYMBOL_CONST 0x0001 /* symbol is const */
#define SYMBOL_CHECK 0x0008 /* used during dependency checking */
-#define SYMBOL_CHOICE 0x0010 /* start of a choice block (null name) */
#define SYMBOL_CHOICEVAL 0x0020 /* used as a value in a choice block */
#define SYMBOL_VALID 0x0080 /* set when symbol.curr is calculated */
-#define SYMBOL_OPTIONAL 0x0100 /* choice is optional - values can be 'n' */
#define SYMBOL_WRITE 0x0200 /* write symbol to file (KCONFIG_CONFIG) */
#define SYMBOL_CHANGED 0x0400 /* ? */
#define SYMBOL_WRITTEN 0x0800 /* track info to avoid double-write to .config */
diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c
index 9709aca3a30fe9..13e2449ac83fad 100644
--- a/scripts/kconfig/gconf.c
+++ b/scripts/kconfig/gconf.c
@@ -83,14 +83,10 @@ static const char *dbg_sym_flags(int val)
strcat(buf, "const/");
if (val & SYMBOL_CHECK)
strcat(buf, "check/");
- if (val & SYMBOL_CHOICE)
- strcat(buf, "choice/");
if (val & SYMBOL_CHOICEVAL)
strcat(buf, "choiceval/");
if (val & SYMBOL_VALID)
strcat(buf, "valid/");
- if (val & SYMBOL_OPTIONAL)
- strcat(buf, "optional/");
if (val & SYMBOL_WRITE)
strcat(buf, "write/");
if (val & SYMBOL_CHANGED)
diff --git a/scripts/kconfig/lexer.l b/scripts/kconfig/lexer.l
index 89544c3a1a2995..8dd597c4710dcb 100644
--- a/scripts/kconfig/lexer.l
+++ b/scripts/kconfig/lexer.l
@@ -120,7 +120,6 @@ n [A-Za-z0-9_-]
"menuconfig" return T_MENUCONFIG;
"modules" return T_MODULES;
"on" return T_ON;
-"optional" return T_OPTIONAL;
"prompt" return T_PROMPT;
"range" return T_RANGE;
"select" return T_SELECT;
diff --git a/scripts/kconfig/lkc.h b/scripts/kconfig/lkc.h
index e69d7c59d93027..64dfc354dd5c17 100644
--- a/scripts/kconfig/lkc.h
+++ b/scripts/kconfig/lkc.h
@@ -79,6 +79,11 @@ void str_printf(struct gstr *gs, const char *fmt, ...);
char *str_get(struct gstr *gs);
/* menu.c */
+struct menu *menu_next(struct menu *menu, struct menu *root);
+#define menu_for_each_sub_entry(menu, root) \
+ for (menu = menu_next(root, root); menu; menu = menu_next(menu, root))
+#define menu_for_each_entry(menu) \
+ menu_for_each_sub_entry(menu, &rootmenu)
void _menu_init(void);
void menu_warn(struct menu *menu, const char *fmt, ...);
struct menu *menu_add_menu(void);
@@ -89,7 +94,7 @@ void menu_add_visibility(struct expr *dep);
struct property *menu_add_prompt(enum prop_type type, char *prompt, struct expr *dep);
void menu_add_expr(enum prop_type type, struct expr *expr, struct expr *dep);
void menu_add_symbol(enum prop_type type, struct symbol *sym, struct expr *dep);
-void menu_finalize(struct menu *parent);
+void menu_finalize(void);
void menu_set_type(int type);
extern struct menu rootmenu;
@@ -124,7 +129,8 @@ static inline struct symbol *sym_get_choice_value(struct symbol *sym)
static inline bool sym_is_choice(struct symbol *sym)
{
- return sym->flags & SYMBOL_CHOICE ? true : false;
+ /* A choice is a symbol with no name */
+ return sym->name == NULL;
}
static inline bool sym_is_choice_value(struct symbol *sym)
@@ -132,11 +138,6 @@ static inline bool sym_is_choice_value(struct symbol *sym)
return sym->flags & SYMBOL_CHOICEVAL ? true : false;
}
-static inline bool sym_is_optional(struct symbol *sym)
-{
- return sym->flags & SYMBOL_OPTIONAL ? true : false;
-}
-
static inline bool sym_has_value(struct symbol *sym)
{
return sym->flags & SYMBOL_DEF_USER ? true : false;
diff --git a/scripts/kconfig/lxdialog/checklist.c b/scripts/kconfig/lxdialog/checklist.c
index 31d0a89fbeb7ac..75493302fb8570 100644
--- a/scripts/kconfig/lxdialog/checklist.c
+++ b/scripts/kconfig/lxdialog/checklist.c
@@ -119,7 +119,7 @@ int dialog_checklist(const char *title, const char *prompt, int height,
}
do_resize:
- if (getmaxy(stdscr) < (height + CHECKLIST_HEIGTH_MIN))
+ if (getmaxy(stdscr) < (height + CHECKLIST_HEIGHT_MIN))
return -ERRDISPLAYTOOSMALL;
if (getmaxx(stdscr) < (width + CHECKLIST_WIDTH_MIN))
return -ERRDISPLAYTOOSMALL;
diff --git a/scripts/kconfig/lxdialog/dialog.h b/scripts/kconfig/lxdialog/dialog.h
index 2d15ba893fbf89..f6c2ebe6d1f91d 100644
--- a/scripts/kconfig/lxdialog/dialog.h
+++ b/scripts/kconfig/lxdialog/dialog.h
@@ -162,17 +162,17 @@ int on_key_esc(WINDOW *win);
int on_key_resize(void);
/* minimum (re)size values */
-#define CHECKLIST_HEIGTH_MIN 6 /* For dialog_checklist() */
+#define CHECKLIST_HEIGHT_MIN 6 /* For dialog_checklist() */
#define CHECKLIST_WIDTH_MIN 6
-#define INPUTBOX_HEIGTH_MIN 2 /* For dialog_inputbox() */
+#define INPUTBOX_HEIGHT_MIN 2 /* For dialog_inputbox() */
#define INPUTBOX_WIDTH_MIN 2
-#define MENUBOX_HEIGTH_MIN 15 /* For dialog_menu() */
+#define MENUBOX_HEIGHT_MIN 15 /* For dialog_menu() */
#define MENUBOX_WIDTH_MIN 65
-#define TEXTBOX_HEIGTH_MIN 8 /* For dialog_textbox() */
+#define TEXTBOX_HEIGHT_MIN 8 /* For dialog_textbox() */
#define TEXTBOX_WIDTH_MIN 8
-#define YESNO_HEIGTH_MIN 4 /* For dialog_yesno() */
+#define YESNO_HEIGHT_MIN 4 /* For dialog_yesno() */
#define YESNO_WIDTH_MIN 4
-#define WINDOW_HEIGTH_MIN 19 /* For init_dialog() */
+#define WINDOW_HEIGHT_MIN 19 /* For init_dialog() */
#define WINDOW_WIDTH_MIN 80
int init_dialog(const char *backtitle);
diff --git a/scripts/kconfig/lxdialog/inputbox.c b/scripts/kconfig/lxdialog/inputbox.c
index 1dcfb288ee6363..3c6e24b20f5be6 100644
--- a/scripts/kconfig/lxdialog/inputbox.c
+++ b/scripts/kconfig/lxdialog/inputbox.c
@@ -43,7 +43,7 @@ int dialog_inputbox(const char *title, const char *prompt, int height, int width
strcpy(instr, init);
do_resize:
- if (getmaxy(stdscr) <= (height - INPUTBOX_HEIGTH_MIN))
+ if (getmaxy(stdscr) <= (height - INPUTBOX_HEIGHT_MIN))
return -ERRDISPLAYTOOSMALL;
if (getmaxx(stdscr) <= (width - INPUTBOX_WIDTH_MIN))
return -ERRDISPLAYTOOSMALL;
diff --git a/scripts/kconfig/lxdialog/menubox.c b/scripts/kconfig/lxdialog/menubox.c
index 0e333284e947bc..6e6244df0c56e3 100644
--- a/scripts/kconfig/lxdialog/menubox.c
+++ b/scripts/kconfig/lxdialog/menubox.c
@@ -172,7 +172,7 @@ int dialog_menu(const char *title, const char *prompt,
do_resize:
height = getmaxy(stdscr);
width = getmaxx(stdscr);
- if (height < MENUBOX_HEIGTH_MIN || width < MENUBOX_WIDTH_MIN)
+ if (height < MENUBOX_HEIGHT_MIN || width < MENUBOX_WIDTH_MIN)
return -ERRDISPLAYTOOSMALL;
height -= 4;
diff --git a/scripts/kconfig/lxdialog/textbox.c b/scripts/kconfig/lxdialog/textbox.c
index 058ed0e5bbd545..0abaf635978f98 100644
--- a/scripts/kconfig/lxdialog/textbox.c
+++ b/scripts/kconfig/lxdialog/textbox.c
@@ -175,7 +175,7 @@ int dialog_textbox(const char *title, const char *tbuf, int initial_height,
do_resize:
getmaxyx(stdscr, height, width);
- if (height < TEXTBOX_HEIGTH_MIN || width < TEXTBOX_WIDTH_MIN)
+ if (height < TEXTBOX_HEIGHT_MIN || width < TEXTBOX_WIDTH_MIN)
return -ERRDISPLAYTOOSMALL;
if (initial_height != 0)
height = initial_height;
diff --git a/scripts/kconfig/lxdialog/util.c b/scripts/kconfig/lxdialog/util.c
index 3fb7508b68a240..f18e2a89f6135d 100644
--- a/scripts/kconfig/lxdialog/util.c
+++ b/scripts/kconfig/lxdialog/util.c
@@ -291,7 +291,7 @@ int init_dialog(const char *backtitle)
getyx(stdscr, saved_y, saved_x);
getmaxyx(stdscr, height, width);
- if (height < WINDOW_HEIGTH_MIN || width < WINDOW_WIDTH_MIN) {
+ if (height < WINDOW_HEIGHT_MIN || width < WINDOW_WIDTH_MIN) {
endwin();
return -ERRDISPLAYTOOSMALL;
}
diff --git a/scripts/kconfig/lxdialog/yesno.c b/scripts/kconfig/lxdialog/yesno.c
index bcaac9b7bab2ca..b57d25e1549fe4 100644
--- a/scripts/kconfig/lxdialog/yesno.c
+++ b/scripts/kconfig/lxdialog/yesno.c
@@ -32,7 +32,7 @@ int dialog_yesno(const char *title, const char *prompt, int height, int width)
WINDOW *dialog;
do_resize:
- if (getmaxy(stdscr) < (height + YESNO_HEIGTH_MIN))
+ if (getmaxy(stdscr) < (height + YESNO_HEIGHT_MIN))
return -ERRDISPLAYTOOSMALL;
if (getmaxx(stdscr) < (width + YESNO_WIDTH_MIN))
return -ERRDISPLAYTOOSMALL;
diff --git a/scripts/kconfig/mconf.c b/scripts/kconfig/mconf.c
index f4bb391d50cf99..c0969097447da5 100644
--- a/scripts/kconfig/mconf.c
+++ b/scripts/kconfig/mconf.c
@@ -659,9 +659,9 @@ static void conf_choice(struct menu *menu)
dialog_clear();
res = dialog_checklist(prompt ? prompt : "Main Menu",
radiolist_instructions,
- MENUBOX_HEIGTH_MIN,
+ MENUBOX_HEIGHT_MIN,
MENUBOX_WIDTH_MIN,
- CHECKLIST_HEIGTH_MIN);
+ CHECKLIST_HEIGHT_MIN);
selected = item_activate_selected();
switch (res) {
case 0:
diff --git a/scripts/kconfig/menu.c b/scripts/kconfig/menu.c
index 8498481e6afe36..e01b9ee87c05b3 100644
--- a/scripts/kconfig/menu.c
+++ b/scripts/kconfig/menu.c
@@ -17,6 +17,27 @@ static const char nohelp_text[] = "There is no help available for this option.";
struct menu rootmenu;
static struct menu **last_entry_ptr;
+/**
+ * menu_next - return the next menu entry with depth-first traversal
+ * @menu: pointer to the current menu
+ * @root: root of the sub-tree to traverse. If NULL is given, the traveral
+ * continues until it reaches the end of the entire menu tree.
+ * return: the menu to visit next, or NULL when it reaches the end.
+ */
+struct menu *menu_next(struct menu *menu, struct menu *root)
+{
+ if (menu->list)
+ return menu->list;
+
+ while (menu != root && !menu->next)
+ menu = menu->parent;
+
+ if (menu == root)
+ return NULL;
+
+ return menu->next;
+}
+
void menu_warn(struct menu *menu, const char *fmt, ...)
{
va_list ap;
@@ -282,7 +303,7 @@ static void sym_check_prop(struct symbol *sym)
}
}
-void menu_finalize(struct menu *parent)
+static void _menu_finalize(struct menu *parent, bool inside_choice)
{
struct menu *menu, *last_menu;
struct symbol *sym;
@@ -296,7 +317,12 @@ void menu_finalize(struct menu *parent)
* and propagate parent dependencies before moving on.
*/
- if (sym && sym_is_choice(sym)) {
+ bool is_choice = false;
+
+ if (sym && sym_is_choice(sym))
+ is_choice = true;
+
+ if (is_choice) {
if (sym->type == S_UNKNOWN) {
/* find the first choice value to find out choice type */
current_entry = parent;
@@ -394,7 +420,7 @@ void menu_finalize(struct menu *parent)
}
}
- if (sym && sym_is_choice(sym))
+ if (is_choice)
expr_free(parentdep);
/*
@@ -402,8 +428,8 @@ void menu_finalize(struct menu *parent)
* moving on
*/
for (menu = parent->list; menu; menu = menu->next)
- menu_finalize(menu);
- } else if (sym) {
+ _menu_finalize(menu, is_choice);
+ } else if (!inside_choice && sym) {
/*
* Automatic submenu creation. If sym is a symbol and A, B, C,
* ... are consecutive items (symbols, menus, ifs, etc.) that
@@ -463,7 +489,7 @@ void menu_finalize(struct menu *parent)
/* Superset, put in submenu */
expr_free(dep2);
next:
- menu_finalize(menu);
+ _menu_finalize(menu, false);
menu->parent = parent;
last_menu = menu;
}
@@ -567,21 +593,22 @@ void menu_finalize(struct menu *parent)
}
/*
- * For non-optional choices, add a reverse dependency (corresponding to
- * a select) of '<visibility> && m'. This prevents the user from
- * setting the choice mode to 'n' when the choice is visible.
- *
- * This would also work for non-choice symbols, but only non-optional
- * choices clear SYMBOL_OPTIONAL as of writing. Choices are implemented
- * as a type of symbol.
+ * For choices, add a reverse dependency (corresponding to a select) of
+ * '<visibility> && m'. This prevents the user from setting the choice
+ * mode to 'n' when the choice is visible.
*/
- if (sym && !sym_is_optional(sym) && parent->prompt) {
+ if (sym && sym_is_choice(sym) && parent->prompt) {
sym->rev_dep.expr = expr_alloc_or(sym->rev_dep.expr,
expr_alloc_and(parent->prompt->visible.expr,
expr_alloc_symbol(&symbol_mod)));
}
}
+void menu_finalize(void)
+{
+ _menu_finalize(&rootmenu, false);
+}
+
bool menu_has_prompt(struct menu *menu)
{
if (!menu->prompt)
diff --git a/scripts/kconfig/parser.y b/scripts/kconfig/parser.y
index b45bfaf0a02b12..69dc0c098acbdb 100644
--- a/scripts/kconfig/parser.y
+++ b/scripts/kconfig/parser.y
@@ -69,7 +69,6 @@ struct menu *current_menu, *current_entry;
%token T_MODULES
%token T_ON
%token T_OPEN_PAREN
-%token T_OPTIONAL
%token T_PLUS_EQUAL
%token T_PROMPT
%token T_RANGE
@@ -140,7 +139,6 @@ stmt_list_in_choice:
config_entry_start: T_CONFIG nonconst_symbol T_EOL
{
- $2->flags |= SYMBOL_OPTIONAL;
menu_add_entry($2);
printd(DEBUG_PARSE, "%s:%d:config %s\n", cur_filename, cur_lineno, $2->name);
};
@@ -152,7 +150,6 @@ config_stmt: config_entry_start config_option_list
menuconfig_entry_start: T_MENUCONFIG nonconst_symbol T_EOL
{
- $2->flags |= SYMBOL_OPTIONAL;
menu_add_entry($2);
printd(DEBUG_PARSE, "%s:%d:menuconfig %s\n", cur_filename, cur_lineno, $2->name);
};
@@ -224,7 +221,7 @@ config_option: T_MODULES T_EOL
choice: T_CHOICE T_EOL
{
- struct symbol *sym = sym_lookup(NULL, SYMBOL_CHOICE);
+ struct symbol *sym = sym_lookup(NULL, 0);
sym->flags |= SYMBOL_NO_WRITE;
menu_add_entry(sym);
menu_add_expr(P_CHOICE, NULL, NULL);
@@ -272,12 +269,6 @@ choice_option: logic_type prompt_stmt_opt T_EOL
printd(DEBUG_PARSE, "%s:%d:type(%u)\n", cur_filename, cur_lineno, $1);
};
-choice_option: T_OPTIONAL T_EOL
-{
- current_entry->sym->flags |= SYMBOL_OPTIONAL;
- printd(DEBUG_PARSE, "%s:%d:optional\n", cur_filename, cur_lineno);
-};
-
choice_option: T_DEFAULT nonconst_symbol if_expr T_EOL
{
menu_add_symbol(P_DEFAULT, $2, $3);
@@ -515,22 +506,11 @@ void conf_parse(const char *name)
menu_add_prompt(P_MENU, "Main menu", NULL);
}
- menu_finalize(&rootmenu);
+ menu_finalize();
- menu = &rootmenu;
- while (menu) {
+ menu_for_each_entry(menu) {
if (menu->sym && sym_check_deps(menu->sym))
yynerrs++;
-
- if (menu->list) {
- menu = menu->list;
- continue;
- }
-
- while (!menu->next && menu->parent)
- menu = menu->parent;
-
- menu = menu->next;
}
if (yynerrs)
diff --git a/scripts/kconfig/symbol.c b/scripts/kconfig/symbol.c
index 81fe1884ef8ae3..8b34992ba5ed98 100644
--- a/scripts/kconfig/symbol.c
+++ b/scripts/kconfig/symbol.c
@@ -827,7 +827,7 @@ struct symbol *sym_lookup(const char *name, int flags)
if (symbol->name &&
!strcmp(symbol->name, name) &&
(flags ? symbol->flags & flags
- : !(symbol->flags & (SYMBOL_CONST|SYMBOL_CHOICE))))
+ : !(symbol->flags & SYMBOL_CONST)))
return symbol;
}
new_name = xstrdup(name);
diff --git a/scripts/kconfig/tests/choice/Kconfig b/scripts/kconfig/tests/choice/Kconfig
index 0930eb65e93283..8cdda40868a134 100644
--- a/scripts/kconfig/tests/choice/Kconfig
+++ b/scripts/kconfig/tests/choice/Kconfig
@@ -18,19 +18,6 @@ config BOOL_CHOICE1
endchoice
choice
- prompt "optional boolean choice"
- optional
- default OPT_BOOL_CHOICE1
-
-config OPT_BOOL_CHOICE0
- bool "choice 0"
-
-config OPT_BOOL_CHOICE1
- bool "choice 1"
-
-endchoice
-
-choice
prompt "tristate choice"
default TRI_CHOICE1
@@ -41,16 +28,3 @@ config TRI_CHOICE1
tristate "choice 1"
endchoice
-
-choice
- prompt "optional tristate choice"
- optional
- default OPT_TRI_CHOICE1
-
-config OPT_TRI_CHOICE0
- tristate "choice 0"
-
-config OPT_TRI_CHOICE1
- tristate "choice 1"
-
-endchoice
diff --git a/scripts/kconfig/tests/choice/__init__.py b/scripts/kconfig/tests/choice/__init__.py
index 4318fce05912f4..05e162220085c7 100644
--- a/scripts/kconfig/tests/choice/__init__.py
+++ b/scripts/kconfig/tests/choice/__init__.py
@@ -6,8 +6,6 @@ The handling of 'choice' is a bit complicated part in Kconfig.
The behavior of 'y' choice is intuitive. If choice values are tristate,
the choice can be 'm' where each value can be enabled independently.
-Also, if a choice is marked as 'optional', the whole choice can be
-invisible.
"""
diff --git a/scripts/kconfig/tests/choice/allmod_expected_config b/scripts/kconfig/tests/choice/allmod_expected_config
index f1f5dcdb792304..d1f51651740c4f 100644
--- a/scripts/kconfig/tests/choice/allmod_expected_config
+++ b/scripts/kconfig/tests/choice/allmod_expected_config
@@ -1,9 +1,5 @@
CONFIG_MODULES=y
# CONFIG_BOOL_CHOICE0 is not set
CONFIG_BOOL_CHOICE1=y
-# CONFIG_OPT_BOOL_CHOICE0 is not set
-CONFIG_OPT_BOOL_CHOICE1=y
CONFIG_TRI_CHOICE0=m
CONFIG_TRI_CHOICE1=m
-CONFIG_OPT_TRI_CHOICE0=m
-CONFIG_OPT_TRI_CHOICE1=m
diff --git a/scripts/kconfig/tests/choice/allyes_expected_config b/scripts/kconfig/tests/choice/allyes_expected_config
index e5a062a1157cd4..8a76c1816893b0 100644
--- a/scripts/kconfig/tests/choice/allyes_expected_config
+++ b/scripts/kconfig/tests/choice/allyes_expected_config
@@ -1,9 +1,5 @@
CONFIG_MODULES=y
# CONFIG_BOOL_CHOICE0 is not set
CONFIG_BOOL_CHOICE1=y
-# CONFIG_OPT_BOOL_CHOICE0 is not set
-CONFIG_OPT_BOOL_CHOICE1=y
# CONFIG_TRI_CHOICE0 is not set
CONFIG_TRI_CHOICE1=y
-# CONFIG_OPT_TRI_CHOICE0 is not set
-CONFIG_OPT_TRI_CHOICE1=y
diff --git a/scripts/kconfig/tests/choice/oldask0_expected_stdout b/scripts/kconfig/tests/choice/oldask0_expected_stdout
index b251bba9698b29..d2257db4642308 100644
--- a/scripts/kconfig/tests/choice/oldask0_expected_stdout
+++ b/scripts/kconfig/tests/choice/oldask0_expected_stdout
@@ -3,8 +3,6 @@ boolean choice
1. choice 0 (BOOL_CHOICE0) (NEW)
> 2. choice 1 (BOOL_CHOICE1) (NEW)
choice[1-2?]:
-optional boolean choice [N/y/?] (NEW)
tristate choice [M/y/?] (NEW)
choice 0 (TRI_CHOICE0) [N/m/?] (NEW)
choice 1 (TRI_CHOICE1) [N/m/?] (NEW)
-optional tristate choice [N/m/y/?] (NEW)
diff --git a/scripts/kconfig/tests/choice/oldask1_config b/scripts/kconfig/tests/choice/oldask1_config
index b67bfe3c641fa1..0f417856c81c1b 100644
--- a/scripts/kconfig/tests/choice/oldask1_config
+++ b/scripts/kconfig/tests/choice/oldask1_config
@@ -1,2 +1 @@
# CONFIG_MODULES is not set
-CONFIG_OPT_BOOL_CHOICE0=y
diff --git a/scripts/kconfig/tests/choice/oldask1_expected_stdout b/scripts/kconfig/tests/choice/oldask1_expected_stdout
index c2125e9bf96a76..ffa20ad7f38eb9 100644
--- a/scripts/kconfig/tests/choice/oldask1_expected_stdout
+++ b/scripts/kconfig/tests/choice/oldask1_expected_stdout
@@ -3,13 +3,7 @@ boolean choice
1. choice 0 (BOOL_CHOICE0) (NEW)
> 2. choice 1 (BOOL_CHOICE1) (NEW)
choice[1-2?]:
-optional boolean choice [Y/n/?] (NEW)
-optional boolean choice
-> 1. choice 0 (OPT_BOOL_CHOICE0)
- 2. choice 1 (OPT_BOOL_CHOICE1) (NEW)
-choice[1-2?]:
tristate choice
1. choice 0 (TRI_CHOICE0) (NEW)
> 2. choice 1 (TRI_CHOICE1) (NEW)
choice[1-2?]:
-optional tristate choice [N/y/?]
diff --git a/scripts/kernel-doc b/scripts/kernel-doc
index 967f1abb0edbd8..b463acecad401b 100755
--- a/scripts/kernel-doc
+++ b/scripts/kernel-doc
@@ -1541,7 +1541,7 @@ sub create_parameterlist($$$$) {
save_struct_actual($2);
push_parameter($2, "$type $1", $arg, $file, $declaration_name);
- } elsif ($param =~ m/(.*?):(\d+)/) {
+ } elsif ($param =~ m/(.*?):(\w+)/) {
if ($type ne "") { # skip unnamed bit-fields
save_struct_actual($1);
push_parameter($1, "$type:$2", $arg, $file, $declaration_name)
@@ -1723,6 +1723,7 @@ sub dump_function($$) {
$prototype =~ s/__must_check +//;
$prototype =~ s/__weak +//;
$prototype =~ s/__sched +//;
+ $prototype =~ s/_noprof//;
$prototype =~ s/__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +//;
$prototype =~ s/__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +//;
$prototype =~ s/__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +//;
diff --git a/scripts/make_fit.py b/scripts/make_fit.py
new file mode 100755
index 00000000000000..3de90c5a094b7a
--- /dev/null
+++ b/scripts/make_fit.py
@@ -0,0 +1,290 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Copyright 2024 Google LLC
+# Written by Simon Glass <sjg@chromium.org>
+#
+
+"""Build a FIT containing a lot of devicetree files
+
+Usage:
+ make_fit.py -A arm64 -n 'Linux-6.6' -O linux
+ -o arch/arm64/boot/image.fit -k /tmp/kern/arch/arm64/boot/image.itk
+ @arch/arm64/boot/dts/dtbs-list -E -c gzip
+
+Creates a FIT containing the supplied kernel and a set of devicetree files,
+either specified individually or listed in a file (with an '@' prefix).
+
+Use -E to generate an external FIT (where the data is placed after the
+FIT data structure). This allows parsing of the data without loading
+the entire FIT.
+
+Use -c to compress the data, using bzip2, gzip, lz4, lzma, lzo and
+zstd algorithms.
+
+The resulting FIT can be booted by bootloaders which support FIT, such
+as U-Boot, Linuxboot, Tianocore, etc.
+
+Note that this tool does not yet support adding a ramdisk / initrd.
+"""
+
+import argparse
+import collections
+import os
+import subprocess
+import sys
+import tempfile
+import time
+
+import libfdt
+
+
+# Tool extension and the name of the command-line tools
+CompTool = collections.namedtuple('CompTool', 'ext,tools')
+
+COMP_TOOLS = {
+ 'bzip2': CompTool('.bz2', 'bzip2'),
+ 'gzip': CompTool('.gz', 'pigz,gzip'),
+ 'lz4': CompTool('.lz4', 'lz4'),
+ 'lzma': CompTool('.lzma', 'lzma'),
+ 'lzo': CompTool('.lzo', 'lzop'),
+ 'zstd': CompTool('.zstd', 'zstd'),
+}
+
+
+def parse_args():
+ """Parse the program ArgumentParser
+
+ Returns:
+ Namespace object containing the arguments
+ """
+ epilog = 'Build a FIT from a directory tree containing .dtb files'
+ parser = argparse.ArgumentParser(epilog=epilog, fromfile_prefix_chars='@')
+ parser.add_argument('-A', '--arch', type=str, required=True,
+ help='Specifies the architecture')
+ parser.add_argument('-c', '--compress', type=str, default='none',
+ help='Specifies the compression')
+ parser.add_argument('-E', '--external', action='store_true',
+ help='Convert the FIT to use external data')
+ parser.add_argument('-n', '--name', type=str, required=True,
+ help='Specifies the name')
+ parser.add_argument('-o', '--output', type=str, required=True,
+ help='Specifies the output file (.fit)')
+ parser.add_argument('-O', '--os', type=str, required=True,
+ help='Specifies the operating system')
+ parser.add_argument('-k', '--kernel', type=str, required=True,
+ help='Specifies the (uncompressed) kernel input file (.itk)')
+ parser.add_argument('-v', '--verbose', action='store_true',
+ help='Enable verbose output')
+ parser.add_argument('dtbs', type=str, nargs='*',
+ help='Specifies the devicetree files to process')
+
+ return parser.parse_args()
+
+
+def setup_fit(fsw, name):
+ """Make a start on writing the FIT
+
+ Outputs the root properties and the 'images' node
+
+ Args:
+ fsw (libfdt.FdtSw): Object to use for writing
+ name (str): Name of kernel image
+ """
+ fsw.INC_SIZE = 65536
+ fsw.finish_reservemap()
+ fsw.begin_node('')
+ fsw.property_string('description', f'{name} with devicetree set')
+ fsw.property_u32('#address-cells', 1)
+
+ fsw.property_u32('timestamp', int(time.time()))
+ fsw.begin_node('images')
+
+
+def write_kernel(fsw, data, args):
+ """Write out the kernel image
+
+ Writes a kernel node along with the required properties
+
+ Args:
+ fsw (libfdt.FdtSw): Object to use for writing
+ data (bytes): Data to write (possibly compressed)
+ args (Namespace): Contains necessary strings:
+ arch: FIT architecture, e.g. 'arm64'
+ fit_os: Operating Systems, e.g. 'linux'
+ name: Name of OS, e.g. 'Linux-6.6.0-rc7'
+ compress: Compression algorithm to use, e.g. 'gzip'
+ """
+ with fsw.add_node('kernel'):
+ fsw.property_string('description', args.name)
+ fsw.property_string('type', 'kernel_noload')
+ fsw.property_string('arch', args.arch)
+ fsw.property_string('os', args.os)
+ fsw.property_string('compression', args.compress)
+ fsw.property('data', data)
+ fsw.property_u32('load', 0)
+ fsw.property_u32('entry', 0)
+
+
+def finish_fit(fsw, entries):
+ """Finish the FIT ready for use
+
+ Writes the /configurations node and subnodes
+
+ Args:
+ fsw (libfdt.FdtSw): Object to use for writing
+ entries (list of tuple): List of configurations:
+ str: Description of model
+ str: Compatible stringlist
+ """
+ fsw.end_node()
+ seq = 0
+ with fsw.add_node('configurations'):
+ for model, compat in entries:
+ seq += 1
+ with fsw.add_node(f'conf-{seq}'):
+ fsw.property('compatible', bytes(compat))
+ fsw.property_string('description', model)
+ fsw.property_string('fdt', f'fdt-{seq}')
+ fsw.property_string('kernel', 'kernel')
+ fsw.end_node()
+
+
+def compress_data(inf, compress):
+ """Compress data using a selected algorithm
+
+ Args:
+ inf (IOBase): Filename containing the data to compress
+ compress (str): Compression algorithm, e.g. 'gzip'
+
+ Return:
+ bytes: Compressed data
+ """
+ if compress == 'none':
+ return inf.read()
+
+ comp = COMP_TOOLS.get(compress)
+ if not comp:
+ raise ValueError(f"Unknown compression algorithm '{compress}'")
+
+ with tempfile.NamedTemporaryFile() as comp_fname:
+ with open(comp_fname.name, 'wb') as outf:
+ done = False
+ for tool in comp.tools.split(','):
+ try:
+ subprocess.call([tool, '-c'], stdin=inf, stdout=outf)
+ done = True
+ break
+ except FileNotFoundError:
+ pass
+ if not done:
+ raise ValueError(f'Missing tool(s): {comp.tools}\n')
+ with open(comp_fname.name, 'rb') as compf:
+ comp_data = compf.read()
+ return comp_data
+
+
+def output_dtb(fsw, seq, fname, arch, compress):
+ """Write out a single devicetree to the FIT
+
+ Args:
+ fsw (libfdt.FdtSw): Object to use for writing
+ seq (int): Sequence number (1 for first)
+ fmame (str): Filename containing the DTB
+ arch: FIT architecture, e.g. 'arm64'
+ compress (str): Compressed algorithm, e.g. 'gzip'
+
+ Returns:
+ tuple:
+ str: Model name
+ bytes: Compatible stringlist
+ """
+ with fsw.add_node(f'fdt-{seq}'):
+ # Get the compatible / model information
+ with open(fname, 'rb') as inf:
+ data = inf.read()
+ fdt = libfdt.FdtRo(data)
+ model = fdt.getprop(0, 'model').as_str()
+ compat = fdt.getprop(0, 'compatible')
+
+ fsw.property_string('description', model)
+ fsw.property_string('type', 'flat_dt')
+ fsw.property_string('arch', arch)
+ fsw.property_string('compression', compress)
+ fsw.property('compatible', bytes(compat))
+
+ with open(fname, 'rb') as inf:
+ compressed = compress_data(inf, compress)
+ fsw.property('data', compressed)
+ return model, compat
+
+
+def build_fit(args):
+ """Build the FIT from the provided files and arguments
+
+ Args:
+ args (Namespace): Program arguments
+
+ Returns:
+ tuple:
+ bytes: FIT data
+ int: Number of configurations generated
+ size: Total uncompressed size of data
+ """
+ seq = 0
+ size = 0
+ fsw = libfdt.FdtSw()
+ setup_fit(fsw, args.name)
+ entries = []
+
+ # Handle the kernel
+ with open(args.kernel, 'rb') as inf:
+ comp_data = compress_data(inf, args.compress)
+ size += os.path.getsize(args.kernel)
+ write_kernel(fsw, comp_data, args)
+
+ for fname in args.dtbs:
+ # Ignore overlay (.dtbo) files
+ if os.path.splitext(fname)[1] == '.dtb':
+ seq += 1
+ size += os.path.getsize(fname)
+ model, compat = output_dtb(fsw, seq, fname, args.arch, args.compress)
+ entries.append([model, compat])
+
+ finish_fit(fsw, entries)
+
+ # Include the kernel itself in the returned file count
+ return fsw.as_fdt().as_bytearray(), seq + 1, size
+
+
+def run_make_fit():
+ """Run the tool's main logic"""
+ args = parse_args()
+
+ out_data, count, size = build_fit(args)
+ with open(args.output, 'wb') as outf:
+ outf.write(out_data)
+
+ ext_fit_size = None
+ if args.external:
+ mkimage = os.environ.get('MKIMAGE', 'mkimage')
+ subprocess.check_call([mkimage, '-E', '-F', args.output],
+ stdout=subprocess.DEVNULL)
+
+ with open(args.output, 'rb') as inf:
+ data = inf.read()
+ ext_fit = libfdt.FdtRo(data)
+ ext_fit_size = ext_fit.totalsize()
+
+ if args.verbose:
+ comp_size = len(out_data)
+ print(f'FIT size {comp_size:#x}/{comp_size / 1024 / 1024:.1f} MB',
+ end='')
+ if ext_fit_size:
+ print(f', header {ext_fit_size:#x}/{ext_fit_size / 1024:.1f} KB',
+ end='')
+ print(f', {count} files, uncompressed {size / 1024 / 1024:.1f} MB')
+
+
+if __name__ == "__main__":
+ sys.exit(run_make_fit())
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index 6b37039c9e927b..2f5b91da5afa9e 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -1007,6 +1007,8 @@ static Elf_Sym *find_fromsym(struct elf_info *elf, Elf_Addr addr,
static Elf_Sym *find_tosym(struct elf_info *elf, Elf_Addr addr, Elf_Sym *sym)
{
+ Elf_Sym *new_sym;
+
/* If the supplied symbol has a valid name, return it */
if (is_valid_name(elf, sym))
return sym;
@@ -1015,8 +1017,9 @@ static Elf_Sym *find_tosym(struct elf_info *elf, Elf_Addr addr, Elf_Sym *sym)
* Strive to find a better symbol name, but the resulting name may not
* match the symbol referenced in the original code.
*/
- return symsearch_find_nearest(elf, addr, get_secindex(elf, sym),
- true, 20);
+ new_sym = symsearch_find_nearest(elf, addr, get_secindex(elf, sym),
+ true, 20);
+ return new_sym ? new_sym : sym;
}
static bool is_executable_section(struct elf_info *elf, unsigned int secndx)
diff --git a/scripts/module.lds.S b/scripts/module.lds.S
index bf5bcf2836d815..45c67a0994f3e3 100644
--- a/scripts/module.lds.S
+++ b/scripts/module.lds.S
@@ -9,6 +9,8 @@
#define DISCARD_EH_FRAME *(.eh_frame)
#endif
+#include <asm-generic/codetag.lds.h>
+
SECTIONS {
/DISCARD/ : {
*(.discard)
@@ -47,12 +49,17 @@ SECTIONS {
.data : {
*(.data .data.[0-9a-zA-Z_]*)
*(.data..L*)
+ CODETAG_SECTIONS()
}
.rodata : {
*(.rodata .rodata.[0-9a-zA-Z_]*)
*(.rodata..L*)
}
+#else
+ .data : {
+ CODETAG_SECTIONS()
+ }
#endif
}
diff --git a/scripts/package/buildtar b/scripts/package/buildtar
index 72c91a1b832f93..fe816f62a290c9 100755
--- a/scripts/package/buildtar
+++ b/scripts/package/buildtar
@@ -53,18 +53,21 @@ cp -v -- "${objtree}/vmlinux" "${tmpdir}/boot/vmlinux-${KERNELRELEASE}"
#
# Install arch-specific kernel image(s)
#
+# Note:
+# mips, arm64, and riscv copy the first image found. This may not produce
+# the desired outcome because it may pick up a stale file remaining in the
+# build tree.
+#
case "${ARCH}" in
- x86|i386|x86_64)
- [ -f "${objtree}/arch/x86/boot/bzImage" ] && cp -v -- "${objtree}/arch/x86/boot/bzImage" "${tmpdir}/boot/vmlinuz-${KERNELRELEASE}"
- ;;
alpha)
- [ -f "${objtree}/arch/alpha/boot/vmlinux.gz" ] && cp -v -- "${objtree}/arch/alpha/boot/vmlinux.gz" "${tmpdir}/boot/vmlinuz-${KERNELRELEASE}"
+ cp -v -- "${objtree}/arch/alpha/boot/vmlinux.gz" "${tmpdir}/boot/vmlinuz-${KERNELRELEASE}"
;;
parisc*)
- [ -f "${KBUILD_IMAGE}" ] && cp -v -- "${KBUILD_IMAGE}" "${tmpdir}/boot/vmlinux-${KERNELRELEASE}"
+ cp -v -- "${KBUILD_IMAGE}" "${tmpdir}/boot/vmlinux-${KERNELRELEASE}"
[ -f "${objtree}/lifimage" ] && cp -v -- "${objtree}/lifimage" "${tmpdir}/boot/lifimage-${KERNELRELEASE}"
;;
mips)
+ # Please note the following code may copy a stale file.
if [ -f "${objtree}/arch/mips/boot/compressed/vmlinux.bin" ]; then
cp -v -- "${objtree}/arch/mips/boot/compressed/vmlinux.bin" "${tmpdir}/boot/vmlinuz-${KERNELRELEASE}"
elif [ -f "${objtree}/arch/mips/boot/compressed/vmlinux.ecoff" ]; then
@@ -86,6 +89,7 @@ case "${ARCH}" in
fi
;;
arm64)
+ # Please note the following code may copy a stale file.
for i in Image.bz2 Image.gz Image.lz4 Image.lzma Image.lzo vmlinuz.efi ; do
if [ -f "${objtree}/arch/arm64/boot/${i}" ] ; then
cp -v -- "${objtree}/arch/arm64/boot/${i}" "${tmpdir}/boot/vmlinuz-${KERNELRELEASE}"
@@ -94,6 +98,7 @@ case "${ARCH}" in
done
;;
riscv)
+ # Please note the following code may copy a stale file.
for i in Image.bz2 Image.gz Image; do
if [ -f "${objtree}/arch/riscv/boot/${i}" ] ; then
cp -v -- "${objtree}/arch/riscv/boot/${i}" "${tmpdir}/boot/vmlinux-${KERNELRELEASE}"
@@ -102,13 +107,6 @@ case "${ARCH}" in
done
;;
*)
- [ -f "${KBUILD_IMAGE}" ] && cp -v -- "${KBUILD_IMAGE}" "${tmpdir}/boot/vmlinux-kbuild-${KERNELRELEASE}"
- echo "" >&2
- echo '** ** ** WARNING ** ** **' >&2
- echo "" >&2
- echo "Your architecture did not define any architecture-dependent files" >&2
- echo "to be placed into the tarball. Please add those to ${0} ..." >&2
- echo "" >&2
- sleep 5
+ cp -v -- "${KBUILD_IMAGE}" "${tmpdir}/boot/vmlinuz-${KERNELRELEASE}"
;;
esac
diff --git a/scripts/unifdef.c b/scripts/unifdef.c
index db00e3e30a59d7..ff15efd6e7d749 100644
--- a/scripts/unifdef.c
+++ b/scripts/unifdef.c
@@ -203,7 +203,7 @@ static int depth; /* current #if nesting */
static int delcount; /* count of deleted lines */
static unsigned blankcount; /* count of blank lines */
static unsigned blankmax; /* maximum recent blankcount */
-static bool constexpr; /* constant #if expression */
+static bool constexpression; /* constant #if expression */
static bool zerosyms = true; /* to format symdepth output */
static bool firstsym; /* ditto */
@@ -819,7 +819,7 @@ static const struct ops {
/*
* Function for evaluating the innermost parts of expressions,
* viz. !expr (expr) number defined(symbol) symbol
- * We reset the constexpr flag in the last two cases.
+ * We reset the constexpression flag in the last two cases.
*/
static Linetype
eval_unary(const struct ops *ops, int *valp, const char **cpp)
@@ -877,7 +877,7 @@ eval_unary(const struct ops *ops, int *valp, const char **cpp)
cp = skipcomment(cp);
if (defparen && *cp++ != ')')
return (LT_ERROR);
- constexpr = false;
+ constexpression = false;
} else if (!endsym(*cp)) {
debug("eval%d symbol", ops - eval_ops);
sym = findsym(cp);
@@ -895,7 +895,7 @@ eval_unary(const struct ops *ops, int *valp, const char **cpp)
lt = *valp ? LT_TRUE : LT_FALSE;
cp = skipargs(cp);
}
- constexpr = false;
+ constexpression = false;
} else {
debug("eval%d bad expr", ops - eval_ops);
return (LT_ERROR);
@@ -955,10 +955,10 @@ ifeval(const char **cpp)
int val = 0;
debug("eval %s", *cpp);
- constexpr = killconsts ? false : true;
+ constexpression = killconsts ? false : true;
ret = eval_table(eval_ops, &val, cpp);
debug("eval = %d", val);
- return (constexpr ? LT_IF : ret == LT_ERROR ? LT_IF : ret);
+ return (constexpression ? LT_IF : ret == LT_ERROR ? LT_IF : ret);
}
/*
diff --git a/security/Kconfig.hardening b/security/Kconfig.hardening
index 2cff851ebfd7e1..effbf5982be10f 100644
--- a/security/Kconfig.hardening
+++ b/security/Kconfig.hardening
@@ -255,6 +255,21 @@ config INIT_ON_FREE_DEFAULT_ON
touching "cold" memory areas. Most cases see 3-5% impact. Some
synthetic workloads have measured as high as 8%.
+config INIT_MLOCKED_ON_FREE_DEFAULT_ON
+ bool "Enable mlocked memory zeroing on free"
+ depends on !KMSAN
+ help
+ This config has the effect of setting "init_mlocked_on_free=1"
+ on the kernel command line. If it is enabled, all mlocked process
+ memory is zeroed when freed. This restriction to mlocked memory
+ improves performance over "init_on_free" but can still be used to
+ protect confidential data like key material from content exposures
+ to other processes, as well as live forensics and cold boot attacks.
+ Any non-mlocked memory is not cleared before it is reassigned. This
+ configuration can be overwritten by setting "init_mlocked_on_free=0"
+ on the command line. The "init_on_free" boot option takes
+ precedence over "init_mlocked_on_free".
+
config CC_HAS_ZERO_CALL_USED_REGS
def_bool $(cc-option,-fzero-call-used-regs=used-gpr)
# https://github.com/ClangBuiltLinux/linux/issues/1766
diff --git a/security/security.c b/security/security.c
index 7e118858b545c1..0a9a0ac3f26624 100644
--- a/security/security.c
+++ b/security/security.c
@@ -1793,11 +1793,11 @@ int security_path_mknod(const struct path *dir, struct dentry *dentry,
EXPORT_SYMBOL(security_path_mknod);
/**
- * security_path_post_mknod() - Update inode security field after file creation
+ * security_path_post_mknod() - Update inode security after reg file creation
* @idmap: idmap of the mount
* @dentry: new file
*
- * Update inode security field after a file has been created.
+ * Update inode security field after a regular file has been created.
*/
void security_path_post_mknod(struct mnt_idmap *idmap, struct dentry *dentry)
{
diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c
index 0619a1cbbfbe41..074d6c2714eb55 100644
--- a/security/selinux/selinuxfs.c
+++ b/security/selinux/selinuxfs.c
@@ -2123,7 +2123,6 @@ static struct file_system_type sel_fs_type = {
.kill_sb = sel_kill_sb,
};
-static struct vfsmount *selinuxfs_mount __ro_after_init;
struct path selinux_null __ro_after_init;
static int __init init_sel_fs(void)
@@ -2145,18 +2144,21 @@ static int __init init_sel_fs(void)
return err;
}
- selinux_null.mnt = selinuxfs_mount = kern_mount(&sel_fs_type);
- if (IS_ERR(selinuxfs_mount)) {
+ selinux_null.mnt = kern_mount(&sel_fs_type);
+ if (IS_ERR(selinux_null.mnt)) {
pr_err("selinuxfs: could not mount!\n");
- err = PTR_ERR(selinuxfs_mount);
- selinuxfs_mount = NULL;
+ err = PTR_ERR(selinux_null.mnt);
+ selinux_null.mnt = NULL;
+ return err;
}
+
selinux_null.dentry = d_hash_and_lookup(selinux_null.mnt->mnt_root,
&null_name);
if (IS_ERR(selinux_null.dentry)) {
pr_err("selinuxfs: could not lookup null!\n");
err = PTR_ERR(selinux_null.dentry);
selinux_null.dentry = NULL;
+ return err;
}
return err;
diff --git a/sound/aoa/soundbus/i2sbus/core.c b/sound/aoa/soundbus/i2sbus/core.c
index b8ff5cccd0c811..5431d2c4942106 100644
--- a/sound/aoa/soundbus/i2sbus/core.c
+++ b/sound/aoa/soundbus/i2sbus/core.c
@@ -158,7 +158,7 @@ static int i2sbus_add_dev(struct macio_dev *macio,
struct device_node *child, *sound = NULL;
struct resource *r;
int i, layout = 0, rlen, ok = force;
- char node_name[6];
+ char node_name[8];
static const char *rnames[] = { "i2sbus: %pOFn (control)",
"i2sbus: %pOFn (tx)",
"i2sbus: %pOFn (rx)" };
diff --git a/sound/core/seq/seq_ump_convert.c b/sound/core/seq/seq_ump_convert.c
index b141024830ecc8..ee6ac649df836d 100644
--- a/sound/core/seq/seq_ump_convert.c
+++ b/sound/core/seq/seq_ump_convert.c
@@ -428,7 +428,7 @@ static int cvt_ump_midi2_to_midi1(struct snd_seq_client *dest,
midi1->note.group = midi2->note.group;
midi1->note.status = midi2->note.status;
midi1->note.channel = midi2->note.channel;
- switch (midi2->note.status << 4) {
+ switch (midi2->note.status) {
case UMP_MSG_STATUS_NOTE_ON:
case UMP_MSG_STATUS_NOTE_OFF:
midi1->note.note = midi2->note.note;
diff --git a/sound/hda/intel-nhlt.c b/sound/hda/intel-nhlt.c
index 696a958d93e9c3..088cff799e0bee 100644
--- a/sound/hda/intel-nhlt.c
+++ b/sound/hda/intel-nhlt.c
@@ -343,3 +343,29 @@ intel_nhlt_get_endpoint_blob(struct device *dev, struct nhlt_acpi_table *nhlt,
return NULL;
}
EXPORT_SYMBOL(intel_nhlt_get_endpoint_blob);
+
+int intel_nhlt_ssp_device_type(struct device *dev, struct nhlt_acpi_table *nhlt,
+ u8 virtual_bus_id)
+{
+ struct nhlt_endpoint *epnt;
+ int i;
+
+ if (!nhlt)
+ return -EINVAL;
+
+ epnt = (struct nhlt_endpoint *)nhlt->desc;
+ for (i = 0; i < nhlt->endpoint_count; i++) {
+ /* for SSP link the virtual bus id is the SSP port number */
+ if (epnt->linktype == NHLT_LINK_SSP &&
+ epnt->virtual_bus_id == virtual_bus_id) {
+ dev_dbg(dev, "SSP%d: dev_type=%d\n", virtual_bus_id,
+ epnt->device_type);
+ return epnt->device_type;
+ }
+
+ epnt = (struct nhlt_endpoint *)((u8 *)epnt + epnt->length);
+ }
+
+ return -EINVAL;
+}
+EXPORT_SYMBOL(intel_nhlt_ssp_device_type);
diff --git a/sound/oss/dmasound/dmasound_paula.c b/sound/oss/dmasound/dmasound_paula.c
index 0ba8f0c4cd99a2..3a593da09280dc 100644
--- a/sound/oss/dmasound/dmasound_paula.c
+++ b/sound/oss/dmasound/dmasound_paula.c
@@ -725,7 +725,13 @@ static void __exit amiga_audio_remove(struct platform_device *pdev)
dmasound_deinit();
}
-static struct platform_driver amiga_audio_driver = {
+/*
+ * amiga_audio_remove() lives in .exit.text. For drivers registered via
+ * module_platform_driver_probe() this is ok because they cannot get unbound at
+ * runtime. So mark the driver struct with __refdata to prevent modpost
+ * triggering a section mismatch warning.
+ */
+static struct platform_driver amiga_audio_driver __refdata = {
.remove_new = __exit_p(amiga_audio_remove),
.driver = {
.name = "amiga-audio",
diff --git a/sound/pci/emu10k1/emu10k1.c b/sound/pci/emu10k1/emu10k1.c
index fe72e7d7724127..dadeda7758ceeb 100644
--- a/sound/pci/emu10k1/emu10k1.c
+++ b/sound/pci/emu10k1/emu10k1.c
@@ -189,8 +189,7 @@ static int snd_emu10k1_suspend(struct device *dev)
emu->suspend = 1;
- cancel_work_sync(&emu->emu1010.firmware_work);
- cancel_work_sync(&emu->emu1010.clock_work);
+ cancel_work_sync(&emu->emu1010.work);
snd_ac97_suspend(emu->ac97);
diff --git a/sound/pci/emu10k1/emu10k1_callback.c b/sound/pci/emu10k1/emu10k1_callback.c
index d36234b88fb421..941bfbf812ed30 100644
--- a/sound/pci/emu10k1/emu10k1_callback.c
+++ b/sound/pci/emu10k1/emu10k1_callback.c
@@ -255,7 +255,7 @@ lookup_voices(struct snd_emux *emu, struct snd_emu10k1 *hw,
/* check if sample is finished playing (non-looping only) */
if (bp != best + V_OFF && bp != best + V_FREE &&
(vp->reg.sample_mode & SNDRV_SFNT_SAMPLE_SINGLESHOT)) {
- val = snd_emu10k1_ptr_read(hw, CCCA_CURRADDR, vp->ch) - 64;
+ val = snd_emu10k1_ptr_read(hw, CCCA_CURRADDR, vp->ch);
if (val >= vp->reg.loopstart)
bp = best + V_OFF;
}
@@ -362,7 +362,7 @@ start_voice(struct snd_emux_voice *vp)
map = (hw->silent_page.addr << hw->address_mode) | (hw->address_mode ? MAP_PTI_MASK1 : MAP_PTI_MASK0);
- addr = vp->reg.start + 64;
+ addr = vp->reg.start;
temp = vp->reg.parm.filterQ;
ccca = (temp << 28) | addr;
if (vp->apitch < 0xe400)
@@ -430,9 +430,6 @@ start_voice(struct snd_emux_voice *vp)
/* Q & current address (Q 4bit value, MSB) */
CCCA, ccca,
- /* cache */
- CCR, REG_VAL_PUT(CCR_CACHEINVALIDSIZE, 64),
-
/* reset volume */
VTFT, vtarget | vp->ftarget,
CVCF, vtarget | CVCF_CURRENTFILTER_MASK,
diff --git a/sound/pci/emu10k1/emu10k1_main.c b/sound/pci/emu10k1/emu10k1_main.c
index de5c41e578e1f4..8ccc0178360ce8 100644
--- a/sound/pci/emu10k1/emu10k1_main.c
+++ b/sound/pci/emu10k1/emu10k1_main.c
@@ -732,69 +732,67 @@ static int snd_emu1010_load_firmware(struct snd_emu10k1 *emu, int dock,
return snd_emu1010_load_firmware_entry(emu, *fw);
}
-static void emu1010_firmware_work(struct work_struct *work)
+static void snd_emu1010_load_dock_firmware(struct snd_emu10k1 *emu)
{
- struct snd_emu10k1 *emu;
- u32 tmp, tmp2, reg;
+ u32 tmp, tmp2;
int err;
- emu = container_of(work, struct snd_emu10k1,
- emu1010.firmware_work);
- if (emu->card->shutdown)
+ // The docking events clearly arrive prematurely - while the
+ // Dock's FPGA seems to be successfully programmed, the Dock
+ // fails to initialize subsequently if we don't give it some
+ // time to "warm up" here.
+ msleep(200);
+
+ dev_info(emu->card->dev, "emu1010: Loading Audio Dock Firmware\n");
+ /* Return to Audio Dock programming mode */
+ snd_emu1010_fpga_write(emu, EMU_HANA_FPGA_CONFIG,
+ EMU_HANA_FPGA_CONFIG_AUDIODOCK);
+ err = snd_emu1010_load_firmware(emu, 1, &emu->dock_fw);
+ if (err < 0)
return;
-#ifdef CONFIG_PM_SLEEP
- if (emu->suspend)
+ snd_emu1010_fpga_write(emu, EMU_HANA_FPGA_CONFIG, 0);
+
+ snd_emu1010_fpga_read(emu, EMU_HANA_ID, &tmp);
+ dev_dbg(emu->card->dev, "emu1010: EMU_HANA+DOCK_ID = 0x%x\n", tmp);
+ if ((tmp & 0x1f) != 0x15) {
+ /* FPGA failed to be programmed */
+ dev_err(emu->card->dev,
+ "emu1010: Loading Audio Dock Firmware failed, reg = 0x%x\n",
+ tmp);
return;
-#endif
+ }
+ dev_info(emu->card->dev, "emu1010: Audio Dock Firmware loaded\n");
+
+ snd_emu1010_fpga_read(emu, EMU_DOCK_MAJOR_REV, &tmp);
+ snd_emu1010_fpga_read(emu, EMU_DOCK_MINOR_REV, &tmp2);
+ dev_info(emu->card->dev, "Audio Dock ver: %u.%u\n", tmp, tmp2);
+
+ /* Allow DLL to settle, to sync clocking between 1010 and Dock */
+ msleep(10);
+}
+
+static void emu1010_dock_event(struct snd_emu10k1 *emu)
+{
+ u32 reg;
+
snd_emu1010_fpga_read(emu, EMU_HANA_OPTION_CARDS, &reg); /* OPTIONS: Which cards are attached to the EMU */
if (reg & EMU_HANA_OPTION_DOCK_OFFLINE) {
/* Audio Dock attached */
- /* Return to Audio Dock programming mode */
- dev_info(emu->card->dev,
- "emu1010: Loading Audio Dock Firmware\n");
- snd_emu1010_fpga_write(emu, EMU_HANA_FPGA_CONFIG,
- EMU_HANA_FPGA_CONFIG_AUDIODOCK);
- err = snd_emu1010_load_firmware(emu, 1, &emu->dock_fw);
- if (err < 0)
- return;
- snd_emu1010_fpga_write(emu, EMU_HANA_FPGA_CONFIG, 0);
- snd_emu1010_fpga_read(emu, EMU_HANA_ID, &tmp);
- dev_info(emu->card->dev,
- "emu1010: EMU_HANA+DOCK_ID = 0x%x\n", tmp);
- if ((tmp & 0x1f) != 0x15) {
- /* FPGA failed to be programmed */
- dev_info(emu->card->dev,
- "emu1010: Loading Audio Dock Firmware file failed, reg = 0x%x\n",
- tmp);
- return;
- }
- dev_info(emu->card->dev,
- "emu1010: Audio Dock Firmware loaded\n");
- snd_emu1010_fpga_read(emu, EMU_DOCK_MAJOR_REV, &tmp);
- snd_emu1010_fpga_read(emu, EMU_DOCK_MINOR_REV, &tmp2);
- dev_info(emu->card->dev, "Audio Dock ver: %u.%u\n", tmp, tmp2);
- /* Sync clocking between 1010 and Dock */
- /* Allow DLL to settle */
- msleep(10);
+ snd_emu1010_load_dock_firmware(emu);
/* Unmute all. Default is muted after a firmware load */
snd_emu1010_fpga_write(emu, EMU_HANA_UNMUTE, EMU_UNMUTE);
+ } else if (!(reg & EMU_HANA_OPTION_DOCK_ONLINE)) {
+ /* Audio Dock removed */
+ dev_info(emu->card->dev, "emu1010: Audio Dock detached\n");
+ /* The hardware auto-mutes all, so we unmute again */
+ snd_emu1010_fpga_write(emu, EMU_HANA_UNMUTE, EMU_UNMUTE);
}
}
-static void emu1010_clock_work(struct work_struct *work)
+static void emu1010_clock_event(struct snd_emu10k1 *emu)
{
- struct snd_emu10k1 *emu;
struct snd_ctl_elem_id id;
- emu = container_of(work, struct snd_emu10k1,
- emu1010.clock_work);
- if (emu->card->shutdown)
- return;
-#ifdef CONFIG_PM_SLEEP
- if (emu->suspend)
- return;
-#endif
-
spin_lock_irq(&emu->reg_lock);
// This is the only thing that can actually happen.
emu->emu1010.clock_source = emu->emu1010.clock_fallback;
@@ -805,21 +803,44 @@ static void emu1010_clock_work(struct work_struct *work)
snd_ctl_notify(emu->card, SNDRV_CTL_EVENT_MASK_VALUE, &id);
}
-static void emu1010_interrupt(struct snd_emu10k1 *emu)
+static void emu1010_work(struct work_struct *work)
{
+ struct snd_emu10k1 *emu;
u32 sts;
+ emu = container_of(work, struct snd_emu10k1, emu1010.work);
+ if (emu->card->shutdown)
+ return;
+#ifdef CONFIG_PM_SLEEP
+ if (emu->suspend)
+ return;
+#endif
+
+ snd_emu1010_fpga_lock(emu);
+
snd_emu1010_fpga_read(emu, EMU_HANA_IRQ_STATUS, &sts);
- if (sts & EMU_HANA_IRQ_DOCK_LOST) {
- /* Audio Dock removed */
- dev_info(emu->card->dev, "emu1010: Audio Dock detached\n");
- /* The hardware auto-mutes all, so we unmute again */
- snd_emu1010_fpga_write(emu, EMU_HANA_UNMUTE, EMU_UNMUTE);
- } else if (sts & EMU_HANA_IRQ_DOCK) {
- schedule_work(&emu->emu1010.firmware_work);
- }
+
+ // The distinction of the IRQ status bits is unreliable,
+ // so we dispatch later based on option card status.
+ if (sts & (EMU_HANA_IRQ_DOCK | EMU_HANA_IRQ_DOCK_LOST))
+ emu1010_dock_event(emu);
+
if (sts & EMU_HANA_IRQ_WCLK_CHANGED)
- schedule_work(&emu->emu1010.clock_work);
+ emu1010_clock_event(emu);
+
+ snd_emu1010_fpga_unlock(emu);
+}
+
+static void emu1010_interrupt(struct snd_emu10k1 *emu)
+{
+ // We get an interrupt on each GPIO input pin change, but we
+ // care only about the ones triggered by the dedicated pin.
+ u16 sts = inw(emu->port + A_GPIO);
+ u16 bit = emu->card_capabilities->ca0108_chip ? 0x2000 : 0x8000;
+ if (!(sts & bit))
+ return;
+
+ schedule_work(&emu->emu1010.work);
}
/*
@@ -841,6 +862,8 @@ static int snd_emu10k1_emu1010_init(struct snd_emu10k1 *emu)
* Proper init follows in snd_emu10k1_init(). */
outl(HCFG_LOCKSOUNDCACHE | HCFG_LOCKTANKCACHE_MASK, emu->port + HCFG);
+ snd_emu1010_fpga_lock(emu);
+
/* Disable 48Volt power to Audio Dock */
snd_emu1010_fpga_write(emu, EMU_HANA_DOCK_PWR, 0);
@@ -866,7 +889,7 @@ static int snd_emu10k1_emu1010_init(struct snd_emu10k1 *emu)
err = snd_emu1010_load_firmware(emu, 0, &emu->firmware);
if (err < 0) {
dev_info(emu->card->dev, "emu1010: Loading Firmware failed\n");
- return err;
+ goto fail;
}
/* ID, should read & 0x7f = 0x55 when FPGA programmed. */
@@ -876,7 +899,8 @@ static int snd_emu10k1_emu1010_init(struct snd_emu10k1 *emu)
dev_info(emu->card->dev,
"emu1010: Loading Hana Firmware file failed, reg = 0x%x\n",
reg);
- return -ENODEV;
+ err = -ENODEV;
+ goto fail;
}
dev_info(emu->card->dev, "emu1010: Hana Firmware loaded\n");
@@ -889,7 +913,7 @@ static int snd_emu10k1_emu1010_init(struct snd_emu10k1 *emu)
snd_emu1010_fpga_read(emu, EMU_HANA_OPTION_CARDS, &reg);
dev_info(emu->card->dev, "emu1010: Card options = 0x%x\n", reg);
if (reg & EMU_HANA_OPTION_DOCK_OFFLINE)
- schedule_work(&emu->emu1010.firmware_work);
+ snd_emu1010_load_dock_firmware(emu);
if (emu->card_capabilities->no_adat) {
emu->emu1010.optical_in = 0; /* IN_SPDIF */
emu->emu1010.optical_out = 0; /* OUT_SPDIF */
@@ -936,7 +960,9 @@ static int snd_emu10k1_emu1010_init(struct snd_emu10k1 *emu)
// so it is safe to simply enable the outputs.
snd_emu1010_fpga_write(emu, EMU_HANA_UNMUTE, EMU_UNMUTE);
- return 0;
+fail:
+ snd_emu1010_fpga_unlock(emu);
+ return err;
}
/*
* Create the EMU10K1 instance
@@ -958,10 +984,10 @@ static void snd_emu10k1_free(struct snd_card *card)
}
if (emu->card_capabilities->emu_model == EMU_MODEL_EMU1010) {
/* Disable 48Volt power to Audio Dock */
- snd_emu1010_fpga_write(emu, EMU_HANA_DOCK_PWR, 0);
+ snd_emu1010_fpga_write_lock(emu, EMU_HANA_DOCK_PWR, 0);
}
- cancel_work_sync(&emu->emu1010.firmware_work);
- cancel_work_sync(&emu->emu1010.clock_work);
+ cancel_work_sync(&emu->emu1010.work);
+ mutex_destroy(&emu->emu1010.lock);
release_firmware(emu->firmware);
release_firmware(emu->dock_fw);
snd_util_memhdr_free(emu->memhdr);
@@ -1540,8 +1566,8 @@ int snd_emu10k1_create(struct snd_card *card,
emu->irq = -1;
emu->synth = NULL;
emu->get_synth_voice = NULL;
- INIT_WORK(&emu->emu1010.firmware_work, emu1010_firmware_work);
- INIT_WORK(&emu->emu1010.clock_work, emu1010_clock_work);
+ INIT_WORK(&emu->emu1010.work, emu1010_work);
+ mutex_init(&emu->emu1010.lock);
/* read revision & serial */
emu->revision = pci->revision;
pci_read_config_dword(pci, PCI_SUBSYSTEM_VENDOR_ID, &emu->serial);
diff --git a/sound/pci/emu10k1/emumixer.c b/sound/pci/emu10k1/emumixer.c
index 0a32ea53d8c648..05b98d9b547b2a 100644
--- a/sound/pci/emu10k1/emumixer.c
+++ b/sound/pci/emu10k1/emumixer.c
@@ -661,7 +661,9 @@ static int snd_emu1010_output_source_put(struct snd_kcontrol *kcontrol,
change = (emu->emu1010.output_source[channel] != val);
if (change) {
emu->emu1010.output_source[channel] = val;
+ snd_emu1010_fpga_lock(emu);
snd_emu1010_output_source_apply(emu, channel, val);
+ snd_emu1010_fpga_unlock(emu);
}
return change;
}
@@ -705,7 +707,9 @@ static int snd_emu1010_input_source_put(struct snd_kcontrol *kcontrol,
change = (emu->emu1010.input_source[channel] != val);
if (change) {
emu->emu1010.input_source[channel] = val;
+ snd_emu1010_fpga_lock(emu);
snd_emu1010_input_source_apply(emu, channel, val);
+ snd_emu1010_fpga_unlock(emu);
}
return change;
}
@@ -774,7 +778,7 @@ static int snd_emu1010_adc_pads_put(struct snd_kcontrol *kcontrol, struct snd_ct
cache = cache & ~mask;
change = (cache != emu->emu1010.adc_pads);
if (change) {
- snd_emu1010_fpga_write(emu, EMU_HANA_ADC_PADS, cache );
+ snd_emu1010_fpga_write_lock(emu, EMU_HANA_ADC_PADS, cache );
emu->emu1010.adc_pads = cache;
}
@@ -832,7 +836,7 @@ static int snd_emu1010_dac_pads_put(struct snd_kcontrol *kcontrol, struct snd_ct
cache = cache & ~mask;
change = (cache != emu->emu1010.dac_pads);
if (change) {
- snd_emu1010_fpga_write(emu, EMU_HANA_DAC_PADS, cache );
+ snd_emu1010_fpga_write_lock(emu, EMU_HANA_DAC_PADS, cache );
emu->emu1010.dac_pads = cache;
}
@@ -980,6 +984,7 @@ static int snd_emu1010_clock_source_put(struct snd_kcontrol *kcontrol,
val = ucontrol->value.enumerated.item[0] ;
if (val >= emu_ci->num)
return -EINVAL;
+ snd_emu1010_fpga_lock(emu);
spin_lock_irq(&emu->reg_lock);
change = (emu->emu1010.clock_source != val);
if (change) {
@@ -996,6 +1001,7 @@ static int snd_emu1010_clock_source_put(struct snd_kcontrol *kcontrol,
} else {
spin_unlock_irq(&emu->reg_lock);
}
+ snd_emu1010_fpga_unlock(emu);
return change;
}
@@ -1041,7 +1047,7 @@ static int snd_emu1010_clock_fallback_put(struct snd_kcontrol *kcontrol,
change = (emu->emu1010.clock_fallback != val);
if (change) {
emu->emu1010.clock_fallback = val;
- snd_emu1010_fpga_write(emu, EMU_HANA_DEFCLOCK, 1 - val);
+ snd_emu1010_fpga_write_lock(emu, EMU_HANA_DEFCLOCK, 1 - val);
}
return change;
}
@@ -1093,7 +1099,7 @@ static int snd_emu1010_optical_out_put(struct snd_kcontrol *kcontrol,
emu->emu1010.optical_out = val;
tmp = (emu->emu1010.optical_in ? EMU_HANA_OPTICAL_IN_ADAT : EMU_HANA_OPTICAL_IN_SPDIF) |
(emu->emu1010.optical_out ? EMU_HANA_OPTICAL_OUT_ADAT : EMU_HANA_OPTICAL_OUT_SPDIF);
- snd_emu1010_fpga_write(emu, EMU_HANA_OPTICAL_TYPE, tmp);
+ snd_emu1010_fpga_write_lock(emu, EMU_HANA_OPTICAL_TYPE, tmp);
}
return change;
}
@@ -1144,7 +1150,7 @@ static int snd_emu1010_optical_in_put(struct snd_kcontrol *kcontrol,
emu->emu1010.optical_in = val;
tmp = (emu->emu1010.optical_in ? EMU_HANA_OPTICAL_IN_ADAT : EMU_HANA_OPTICAL_IN_SPDIF) |
(emu->emu1010.optical_out ? EMU_HANA_OPTICAL_OUT_ADAT : EMU_HANA_OPTICAL_OUT_SPDIF);
- snd_emu1010_fpga_write(emu, EMU_HANA_OPTICAL_TYPE, tmp);
+ snd_emu1010_fpga_write_lock(emu, EMU_HANA_OPTICAL_TYPE, tmp);
}
return change;
}
@@ -2323,7 +2329,9 @@ int snd_emu10k1_mixer(struct snd_emu10k1 *emu,
for (i = 0; i < emu_ri->n_outs; i++)
emu->emu1010.output_source[i] =
emu1010_map_source(emu_ri, emu_ri->out_dflts[i]);
+ snd_emu1010_fpga_lock(emu);
snd_emu1010_apply_sources(emu);
+ snd_emu1010_fpga_unlock(emu);
kctl = emu->ctl_clock_source = snd_ctl_new1(&snd_emu1010_clock_source, emu);
err = snd_ctl_add(card, kctl);
diff --git a/sound/pci/emu10k1/emuproc.c b/sound/pci/emu10k1/emuproc.c
index 2f80fd91017c39..737c28d31b41a3 100644
--- a/sound/pci/emu10k1/emuproc.c
+++ b/sound/pci/emu10k1/emuproc.c
@@ -165,6 +165,8 @@ static void snd_emu10k1_proc_spdif_read(struct snd_info_entry *entry,
u32 value2;
if (emu->card_capabilities->emu_model) {
+ snd_emu1010_fpga_lock(emu);
+
// This represents the S/PDIF lock status on 0404b, which is
// kinda weird and unhelpful, because monitoring it via IRQ is
// impractical (one gets an IRQ flood as long as it is desynced).
@@ -197,6 +199,8 @@ static void snd_emu10k1_proc_spdif_read(struct snd_info_entry *entry,
snd_iprintf(buffer, "\nS/PDIF mode: %s%s\n",
value & EMU_HANA_SPDIF_MODE_RX_PRO ? "professional" : "consumer",
value & EMU_HANA_SPDIF_MODE_RX_NOCOPY ? ", no copy" : "");
+
+ snd_emu1010_fpga_unlock(emu);
} else {
snd_emu10k1_proc_spdif_status(emu, buffer, "CD-ROM S/PDIF In", CDCS, CDSRCS);
snd_emu10k1_proc_spdif_status(emu, buffer, "Optical or Coax S/PDIF In", GPSCS, GPSRCS);
@@ -458,6 +462,9 @@ static void snd_emu_proc_emu1010_reg_read(struct snd_info_entry *entry,
struct snd_emu10k1 *emu = entry->private_data;
u32 value;
int i;
+
+ snd_emu1010_fpga_lock(emu);
+
snd_iprintf(buffer, "EMU1010 Registers:\n\n");
for(i = 0; i < 0x40; i+=1) {
@@ -496,6 +503,8 @@ static void snd_emu_proc_emu1010_reg_read(struct snd_info_entry *entry,
snd_emu_proc_emu1010_link_read(emu, buffer, 0x701);
}
}
+
+ snd_emu1010_fpga_unlock(emu);
}
static void snd_emu_proc_io_reg_read(struct snd_info_entry *entry,
diff --git a/sound/pci/emu10k1/io.c b/sound/pci/emu10k1/io.c
index 74df2330015f66..f4a1c2d4b07875 100644
--- a/sound/pci/emu10k1/io.c
+++ b/sound/pci/emu10k1/io.c
@@ -285,24 +285,33 @@ static void snd_emu1010_fpga_write_locked(struct snd_emu10k1 *emu, u32 reg, u32
outw(value, emu->port + A_GPIO);
udelay(10);
outw(value | 0x80 , emu->port + A_GPIO); /* High bit clocks the value into the fpga. */
+ udelay(10);
}
void snd_emu1010_fpga_write(struct snd_emu10k1 *emu, u32 reg, u32 value)
{
- unsigned long flags;
+ if (snd_BUG_ON(!mutex_is_locked(&emu->emu1010.lock)))
+ return;
+ snd_emu1010_fpga_write_locked(emu, reg, value);
+}
- spin_lock_irqsave(&emu->emu_lock, flags);
+void snd_emu1010_fpga_write_lock(struct snd_emu10k1 *emu, u32 reg, u32 value)
+{
+ snd_emu1010_fpga_lock(emu);
snd_emu1010_fpga_write_locked(emu, reg, value);
- spin_unlock_irqrestore(&emu->emu_lock, flags);
+ snd_emu1010_fpga_unlock(emu);
}
-static void snd_emu1010_fpga_read_locked(struct snd_emu10k1 *emu, u32 reg, u32 *value)
+void snd_emu1010_fpga_read(struct snd_emu10k1 *emu, u32 reg, u32 *value)
{
// The higest input pin is used as the designated interrupt trigger,
// so it needs to be masked out.
// But note that any other input pin change will also cause an IRQ,
// so using this function often causes an IRQ as a side effect.
u32 mask = emu->card_capabilities->ca0108_chip ? 0x1f : 0x7f;
+
+ if (snd_BUG_ON(!mutex_is_locked(&emu->emu1010.lock)))
+ return;
if (snd_BUG_ON(reg > 0x3f))
return;
reg += 0x40; /* 0x40 upwards are registers. */
@@ -313,47 +322,31 @@ static void snd_emu1010_fpga_read_locked(struct snd_emu10k1 *emu, u32 reg, u32 *
*value = ((inw(emu->port + A_GPIO) >> 8) & mask);
}
-void snd_emu1010_fpga_read(struct snd_emu10k1 *emu, u32 reg, u32 *value)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&emu->emu_lock, flags);
- snd_emu1010_fpga_read_locked(emu, reg, value);
- spin_unlock_irqrestore(&emu->emu_lock, flags);
-}
-
/* Each Destination has one and only one Source,
* but one Source can feed any number of Destinations simultaneously.
*/
void snd_emu1010_fpga_link_dst_src_write(struct snd_emu10k1 *emu, u32 dst, u32 src)
{
- unsigned long flags;
-
if (snd_BUG_ON(dst & ~0x71f))
return;
if (snd_BUG_ON(src & ~0x71f))
return;
- spin_lock_irqsave(&emu->emu_lock, flags);
- snd_emu1010_fpga_write_locked(emu, EMU_HANA_DESTHI, dst >> 8);
- snd_emu1010_fpga_write_locked(emu, EMU_HANA_DESTLO, dst & 0x1f);
- snd_emu1010_fpga_write_locked(emu, EMU_HANA_SRCHI, src >> 8);
- snd_emu1010_fpga_write_locked(emu, EMU_HANA_SRCLO, src & 0x1f);
- spin_unlock_irqrestore(&emu->emu_lock, flags);
+ snd_emu1010_fpga_write(emu, EMU_HANA_DESTHI, dst >> 8);
+ snd_emu1010_fpga_write(emu, EMU_HANA_DESTLO, dst & 0x1f);
+ snd_emu1010_fpga_write(emu, EMU_HANA_SRCHI, src >> 8);
+ snd_emu1010_fpga_write(emu, EMU_HANA_SRCLO, src & 0x1f);
}
u32 snd_emu1010_fpga_link_dst_src_read(struct snd_emu10k1 *emu, u32 dst)
{
- unsigned long flags;
u32 hi, lo;
if (snd_BUG_ON(dst & ~0x71f))
return 0;
- spin_lock_irqsave(&emu->emu_lock, flags);
- snd_emu1010_fpga_write_locked(emu, EMU_HANA_DESTHI, dst >> 8);
- snd_emu1010_fpga_write_locked(emu, EMU_HANA_DESTLO, dst & 0x1f);
- snd_emu1010_fpga_read_locked(emu, EMU_HANA_SRCHI, &hi);
- snd_emu1010_fpga_read_locked(emu, EMU_HANA_SRCLO, &lo);
- spin_unlock_irqrestore(&emu->emu_lock, flags);
+ snd_emu1010_fpga_write(emu, EMU_HANA_DESTHI, dst >> 8);
+ snd_emu1010_fpga_write(emu, EMU_HANA_DESTLO, dst & 0x1f);
+ snd_emu1010_fpga_read(emu, EMU_HANA_SRCHI, &hi);
+ snd_emu1010_fpga_read(emu, EMU_HANA_SRCLO, &lo);
return (hi << 8) | lo;
}
diff --git a/sound/pci/hda/cs35l41_hda.c b/sound/pci/hda/cs35l41_hda.c
index d3fa6e136744de..990b5bd717a1cb 100644
--- a/sound/pci/hda/cs35l41_hda.c
+++ b/sound/pci/hda/cs35l41_hda.c
@@ -13,6 +13,7 @@
#include <sound/soc.h>
#include <linux/pm_runtime.h>
#include <linux/spi/spi.h>
+#include <linux/vmalloc.h>
#include "hda_local.h"
#include "hda_auto_parser.h"
#include "hda_jack.h"
diff --git a/sound/pci/hda/cs35l41_hda_property.c b/sound/pci/hda/cs35l41_hda_property.c
index 72ec872afb8d27..8fb688e4141485 100644
--- a/sound/pci/hda/cs35l41_hda_property.c
+++ b/sound/pci/hda/cs35l41_hda_property.c
@@ -108,7 +108,10 @@ static const struct cs35l41_config cs35l41_config_table[] = {
{ "10431F12", 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 1000, 4500, 24 },
{ "10431F1F", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, -1, 0, 0, 0, 0 },
{ "10431F62", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 0, 0, 0 },
+ { "10433A60", 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 1000, 4500, 24 },
{ "17AA386F", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, -1, -1, 0, 0, 0 },
+ { "17AA3877", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 0, 0, 0 },
+ { "17AA3878", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 0, 0, 0 },
{ "17AA38A9", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 2, -1, 0, 0, 0 },
{ "17AA38AB", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 2, -1, 0, 0, 0 },
{ "17AA38B4", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 0, 0, 0 },
@@ -496,7 +499,10 @@ static const struct cs35l41_prop_model cs35l41_prop_model_table[] = {
{ "CSC3551", "10431F12", generic_dsd_config },
{ "CSC3551", "10431F1F", generic_dsd_config },
{ "CSC3551", "10431F62", generic_dsd_config },
+ { "CSC3551", "10433A60", generic_dsd_config },
{ "CSC3551", "17AA386F", generic_dsd_config },
+ { "CSC3551", "17AA3877", generic_dsd_config },
+ { "CSC3551", "17AA3878", generic_dsd_config },
{ "CSC3551", "17AA38A9", generic_dsd_config },
{ "CSC3551", "17AA38AB", generic_dsd_config },
{ "CSC3551", "17AA38B4", generic_dsd_config },
diff --git a/sound/pci/hda/cs35l56_hda.c b/sound/pci/hda/cs35l56_hda.c
index 41974b3897a723..558c1f38fe9718 100644
--- a/sound/pci/hda/cs35l56_hda.c
+++ b/sound/pci/hda/cs35l56_hda.c
@@ -644,6 +644,8 @@ static int cs35l56_hda_fw_load(struct cs35l56_hda *cs35l56)
ret = cs35l56_wait_for_firmware_boot(&cs35l56->base);
if (ret)
goto err_powered_up;
+
+ regcache_cache_only(cs35l56->base.regmap, false);
}
/* Disable auto-hibernate so that runtime_pm has control */
@@ -1002,6 +1004,8 @@ int cs35l56_hda_common_probe(struct cs35l56_hda *cs35l56, int hid, int id)
if (ret)
goto err;
+ regcache_cache_only(cs35l56->base.regmap, false);
+
ret = cs35l56_set_patch(&cs35l56->base);
if (ret)
goto err;
@@ -1024,8 +1028,8 @@ int cs35l56_hda_common_probe(struct cs35l56_hda *cs35l56, int hid, int id)
goto err;
}
- dev_dbg(cs35l56->base.dev, "DSP system name: '%s', amp name: '%s'\n",
- cs35l56->system_name, cs35l56->amp_name);
+ dev_info(cs35l56->base.dev, "DSP system name: '%s', amp name: '%s'\n",
+ cs35l56->system_name, cs35l56->amp_name);
regmap_multi_reg_write(cs35l56->base.regmap, cs35l56_hda_dai_config,
ARRAY_SIZE(cs35l56_hda_dai_config));
@@ -1045,14 +1049,14 @@ int cs35l56_hda_common_probe(struct cs35l56_hda *cs35l56, int hid, int id)
pm_runtime_mark_last_busy(cs35l56->base.dev);
pm_runtime_enable(cs35l56->base.dev);
+ cs35l56->base.init_done = true;
+
ret = component_add(cs35l56->base.dev, &cs35l56_hda_comp_ops);
if (ret) {
dev_err(cs35l56->base.dev, "Register component failed: %d\n", ret);
goto pm_err;
}
- cs35l56->base.init_done = true;
-
return 0;
pm_err:
diff --git a/sound/pci/hda/cs35l56_hda_i2c.c b/sound/pci/hda/cs35l56_hda_i2c.c
index 13beee807308f1..40f2f97944d54c 100644
--- a/sound/pci/hda/cs35l56_hda_i2c.c
+++ b/sound/pci/hda/cs35l56_hda_i2c.c
@@ -56,10 +56,19 @@ static const struct i2c_device_id cs35l56_hda_i2c_id[] = {
{}
};
+static const struct acpi_device_id cs35l56_acpi_hda_match[] = {
+ { "CSC3554", 0 },
+ { "CSC3556", 0 },
+ { "CSC3557", 0 },
+ {}
+};
+MODULE_DEVICE_TABLE(acpi, cs35l56_acpi_hda_match);
+
static struct i2c_driver cs35l56_hda_i2c_driver = {
.driver = {
- .name = "cs35l56-hda",
- .pm = &cs35l56_hda_pm_ops,
+ .name = "cs35l56-hda",
+ .acpi_match_table = cs35l56_acpi_hda_match,
+ .pm = &cs35l56_hda_pm_ops,
},
.id_table = cs35l56_hda_i2c_id,
.probe = cs35l56_hda_i2c_probe,
diff --git a/sound/pci/hda/cs35l56_hda_spi.c b/sound/pci/hda/cs35l56_hda_spi.c
index a3b2fa76663d36..7f02155fe61e3c 100644
--- a/sound/pci/hda/cs35l56_hda_spi.c
+++ b/sound/pci/hda/cs35l56_hda_spi.c
@@ -56,10 +56,19 @@ static const struct spi_device_id cs35l56_hda_spi_id[] = {
{}
};
+static const struct acpi_device_id cs35l56_acpi_hda_match[] = {
+ { "CSC3554", 0 },
+ { "CSC3556", 0 },
+ { "CSC3557", 0 },
+ {}
+};
+MODULE_DEVICE_TABLE(acpi, cs35l56_acpi_hda_match);
+
static struct spi_driver cs35l56_hda_spi_driver = {
.driver = {
- .name = "cs35l56-hda",
- .pm = &cs35l56_hda_pm_ops,
+ .name = "cs35l56-hda",
+ .acpi_match_table = cs35l56_acpi_hda_match,
+ .pm = &cs35l56_hda_pm_ops,
},
.id_table = cs35l56_hda_spi_id,
.probe = cs35l56_hda_spi_probe,
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index a17c36a36aa537..70d80b6af3fe37 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -6875,11 +6875,38 @@ static void alc287_fixup_legion_16ithg6_speakers(struct hda_codec *cdc, const st
comp_generic_fixup(cdc, action, "i2c", "CLSA0101", "-%s:00-cs35l41-hda.%d", 2);
}
+static void cs35l56_fixup_i2c_two(struct hda_codec *cdc, const struct hda_fixup *fix, int action)
+{
+ comp_generic_fixup(cdc, action, "i2c", "CSC3556", "-%s:00-cs35l56-hda.%d", 2);
+}
+
+static void cs35l56_fixup_i2c_four(struct hda_codec *cdc, const struct hda_fixup *fix, int action)
+{
+ comp_generic_fixup(cdc, action, "i2c", "CSC3556", "-%s:00-cs35l56-hda.%d", 4);
+}
+
+static void cs35l56_fixup_spi_two(struct hda_codec *cdc, const struct hda_fixup *fix, int action)
+{
+ comp_generic_fixup(cdc, action, "spi", "CSC3556", "-%s:00-cs35l56-hda.%d", 2);
+}
+
static void cs35l56_fixup_spi_four(struct hda_codec *cdc, const struct hda_fixup *fix, int action)
{
comp_generic_fixup(cdc, action, "spi", "CSC3556", "-%s:00-cs35l56-hda.%d", 4);
}
+static void alc285_fixup_asus_ga403u(struct hda_codec *cdc, const struct hda_fixup *fix, int action)
+{
+ /*
+ * The same SSID has been re-used in different hardware, they have
+ * different codecs and the newer GA403U has a ALC285.
+ */
+ if (cdc->core.vendor_id == 0x10ec0285)
+ cs35l56_fixup_i2c_two(cdc, fix, action);
+ else
+ alc_fixup_inv_dmic(cdc, fix, action);
+}
+
static void tas2781_fixup_i2c(struct hda_codec *cdc,
const struct hda_fixup *fix, int action)
{
@@ -7436,6 +7463,14 @@ enum {
ALC256_FIXUP_ACER_SFG16_MICMUTE_LED,
ALC256_FIXUP_HEADPHONE_AMP_VOL,
ALC245_FIXUP_HP_SPECTRE_X360_EU0XXX,
+ ALC285_FIXUP_CS35L56_SPI_2,
+ ALC285_FIXUP_CS35L56_I2C_2,
+ ALC285_FIXUP_CS35L56_I2C_4,
+ ALC285_FIXUP_ASUS_GA403U,
+ ALC285_FIXUP_ASUS_GA403U_HEADSET_MIC,
+ ALC285_FIXUP_ASUS_GA403U_I2C_SPEAKER2_TO_DAC1,
+ ALC285_FIXUP_ASUS_GU605_SPI_2_HEADSET_MIC,
+ ALC285_FIXUP_ASUS_GU605_SPI_SPEAKER2_TO_DAC1
};
/* A special fixup for Lenovo C940 and Yoga Duet 7;
@@ -9643,6 +9678,54 @@ static const struct hda_fixup alc269_fixups[] = {
.type = HDA_FIXUP_FUNC,
.v.func = alc245_fixup_hp_spectre_x360_eu0xxx,
},
+ [ALC285_FIXUP_CS35L56_SPI_2] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = cs35l56_fixup_spi_two,
+ },
+ [ALC285_FIXUP_CS35L56_I2C_2] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = cs35l56_fixup_i2c_two,
+ },
+ [ALC285_FIXUP_CS35L56_I2C_4] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = cs35l56_fixup_i2c_four,
+ },
+ [ALC285_FIXUP_ASUS_GA403U] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = alc285_fixup_asus_ga403u,
+ },
+ [ALC285_FIXUP_ASUS_GA403U_HEADSET_MIC] = {
+ .type = HDA_FIXUP_PINS,
+ .v.pins = (const struct hda_pintbl[]) {
+ { 0x19, 0x03a11050 },
+ { 0x1b, 0x03a11c30 },
+ { }
+ },
+ .chained = true,
+ .chain_id = ALC285_FIXUP_ASUS_GA403U_I2C_SPEAKER2_TO_DAC1
+ },
+ [ALC285_FIXUP_ASUS_GU605_SPI_SPEAKER2_TO_DAC1] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = alc285_fixup_speaker2_to_dac1,
+ .chained = true,
+ .chain_id = ALC285_FIXUP_ASUS_GU605_SPI_2_HEADSET_MIC,
+ },
+ [ALC285_FIXUP_ASUS_GU605_SPI_2_HEADSET_MIC] = {
+ .type = HDA_FIXUP_PINS,
+ .v.pins = (const struct hda_pintbl[]) {
+ { 0x19, 0x03a11050 },
+ { 0x1b, 0x03a11c30 },
+ { }
+ },
+ .chained = true,
+ .chain_id = ALC285_FIXUP_CS35L56_SPI_2
+ },
+ [ALC285_FIXUP_ASUS_GA403U_I2C_SPEAKER2_TO_DAC1] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = alc285_fixup_speaker2_to_dac1,
+ .chained = true,
+ .chain_id = ALC285_FIXUP_ASUS_GA403U,
+ },
};
static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -10037,6 +10120,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x8ca7, "HP ZBook Fury", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8cdd, "HP Spectre", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x103c, 0x8cde, "HP Spectre", ALC287_FIXUP_CS35L41_I2C_2),
+ SND_PCI_QUIRK(0x103c, 0x8cdf, "HP SnowWhite", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8ce0, "HP SnowWhite", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8cf5, "HP ZBook Studio 16", ALC245_FIXUP_CS35L41_SPI_4_HP_GPIO_LED),
SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC),
SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300),
@@ -10096,7 +10181,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1043, 0x1a83, "ASUS UM5302LA", ALC294_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x1043, 0x1a8f, "ASUS UX582ZS", ALC245_FIXUP_CS35L41_SPI_2),
SND_PCI_QUIRK(0x1043, 0x1b11, "ASUS UX431DA", ALC294_FIXUP_ASUS_COEF_1B),
- SND_PCI_QUIRK(0x1043, 0x1b13, "Asus U41SV", ALC269_FIXUP_INV_DMIC),
+ SND_PCI_QUIRK(0x1043, 0x1b13, "ASUS U41SV/GA403U", ALC285_FIXUP_ASUS_GA403U_HEADSET_MIC),
SND_PCI_QUIRK(0x1043, 0x1b93, "ASUS G614JVR/JIR", ALC245_FIXUP_CS35L41_SPI_2),
SND_PCI_QUIRK(0x1043, 0x1bbd, "ASUS Z550MA", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1043, 0x1c03, "ASUS UM3406HA", ALC287_FIXUP_CS35L41_I2C_2),
@@ -10104,6 +10189,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1043, 0x1c33, "ASUS UX5304MA", ALC245_FIXUP_CS35L41_SPI_2),
SND_PCI_QUIRK(0x1043, 0x1c43, "ASUS UX8406MA", ALC245_FIXUP_CS35L41_SPI_2),
SND_PCI_QUIRK(0x1043, 0x1c62, "ASUS GU603", ALC289_FIXUP_ASUS_GA401),
+ SND_PCI_QUIRK(0x1043, 0x1c63, "ASUS GU605M", ALC285_FIXUP_ASUS_GU605_SPI_SPEAKER2_TO_DAC1),
SND_PCI_QUIRK(0x1043, 0x1c92, "ASUS ROG Strix G15", ALC285_FIXUP_ASUS_G533Z_PINS),
SND_PCI_QUIRK(0x1043, 0x1c9f, "ASUS G614JU/JV/JI", ALC285_FIXUP_ASUS_HEADSET_MIC),
SND_PCI_QUIRK(0x1043, 0x1caf, "ASUS G634JY/JZ/JI/JG", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS),
@@ -10115,11 +10201,14 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1043, 0x1d42, "ASUS Zephyrus G14 2022", ALC289_FIXUP_ASUS_GA401),
SND_PCI_QUIRK(0x1043, 0x1d4e, "ASUS TM420", ALC256_FIXUP_ASUS_HPE),
SND_PCI_QUIRK(0x1043, 0x1da2, "ASUS UP6502ZA/ZD", ALC245_FIXUP_CS35L41_SPI_2),
+ SND_PCI_QUIRK(0x1043, 0x1df3, "ASUS UM5606", ALC285_FIXUP_CS35L56_I2C_4),
SND_PCI_QUIRK(0x1043, 0x1e02, "ASUS UX3402ZA", ALC245_FIXUP_CS35L41_SPI_2),
SND_PCI_QUIRK(0x1043, 0x1e11, "ASUS Zephyrus G15", ALC289_FIXUP_ASUS_GA502),
SND_PCI_QUIRK(0x1043, 0x1e12, "ASUS UM3402", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x1043, 0x1e51, "ASUS Zephyrus M15", ALC294_FIXUP_ASUS_GU502_PINS),
SND_PCI_QUIRK(0x1043, 0x1e5e, "ASUS ROG Strix G513", ALC294_FIXUP_ASUS_G513_PINS),
+ SND_PCI_QUIRK(0x1043, 0x1e63, "ASUS H7606W", ALC285_FIXUP_CS35L56_I2C_2),
+ SND_PCI_QUIRK(0x1043, 0x1e83, "ASUS GA605W", ALC285_FIXUP_CS35L56_I2C_2),
SND_PCI_QUIRK(0x1043, 0x1e8e, "ASUS Zephyrus G15", ALC289_FIXUP_ASUS_GA401),
SND_PCI_QUIRK(0x1043, 0x1ee2, "ASUS UM6702RA/RC", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x1043, 0x1c52, "ASUS Zephyrus G15 2022", ALC289_FIXUP_ASUS_GA401),
@@ -10133,7 +10222,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1043, 0x3a30, "ASUS G814JVR/JIR", ALC245_FIXUP_CS35L41_SPI_2),
SND_PCI_QUIRK(0x1043, 0x3a40, "ASUS G814JZR", ALC245_FIXUP_CS35L41_SPI_2),
SND_PCI_QUIRK(0x1043, 0x3a50, "ASUS G834JYR/JZR", ALC245_FIXUP_CS35L41_SPI_2),
- SND_PCI_QUIRK(0x1043, 0x3a60, "ASUS G634JYR/JZR", ALC245_FIXUP_CS35L41_SPI_2),
+ SND_PCI_QUIRK(0x1043, 0x3a60, "ASUS G634JYR/JZR", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS),
SND_PCI_QUIRK(0x1043, 0x831a, "ASUS P901", ALC269_FIXUP_STEREO_DMIC),
SND_PCI_QUIRK(0x1043, 0x834a, "ASUS S101", ALC269_FIXUP_STEREO_DMIC),
SND_PCI_QUIRK(0x1043, 0x8398, "ASUS P1005", ALC269_FIXUP_STEREO_DMIC),
@@ -10159,7 +10248,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x10ec, 0x1254, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK),
SND_PCI_QUIRK(0x10ec, 0x12cc, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK),
SND_PCI_QUIRK(0x10ec, 0x12f6, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK),
- SND_PCI_QUIRK(0x10f7, 0x8338, "Panasonic CF-SZ6", ALC269_FIXUP_HEADSET_MODE),
+ SND_PCI_QUIRK(0x10f7, 0x8338, "Panasonic CF-SZ6", ALC269_FIXUP_ASPIRE_HEADSET_MIC),
SND_PCI_QUIRK(0x144d, 0xc109, "Samsung Ativ book 9 (NP900X3G)", ALC269_FIXUP_INV_DMIC),
SND_PCI_QUIRK(0x144d, 0xc169, "Samsung Notebook 9 Pen (NP930SBE-K01US)", ALC298_FIXUP_SAMSUNG_AMP),
SND_PCI_QUIRK(0x144d, 0xc176, "Samsung Notebook 9 Pro (NP930MBE-K04US)", ALC298_FIXUP_SAMSUNG_AMP),
@@ -10177,6 +10266,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1462, 0xb120, "MSI Cubi MS-B120", ALC283_FIXUP_HEADSET_MIC),
SND_PCI_QUIRK(0x1462, 0xb171, "Cubi N 8GL (MS-B171)", ALC283_FIXUP_HEADSET_MIC),
SND_PCI_QUIRK(0x152d, 0x1082, "Quanta NL3", ALC269_FIXUP_LIFEBOOK),
+ SND_PCI_QUIRK(0x152d, 0x1262, "Huawei NBLB-WAX9N", ALC2XX_FIXUP_HEADSET_MIC),
SND_PCI_QUIRK(0x1558, 0x0353, "Clevo V35[05]SN[CDE]Q", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1558, 0x1323, "Clevo N130ZU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1558, 0x1325, "Clevo N15[01][CW]U", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
@@ -10282,6 +10372,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x17aa, 0x222e, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
SND_PCI_QUIRK(0x17aa, 0x2231, "Thinkpad T560", ALC292_FIXUP_TPT460),
SND_PCI_QUIRK(0x17aa, 0x2233, "Thinkpad", ALC292_FIXUP_TPT460),
+ SND_PCI_QUIRK(0x17aa, 0x2234, "Thinkpad ICE-1", ALC287_FIXUP_TAS2781_I2C),
SND_PCI_QUIRK(0x17aa, 0x2245, "Thinkpad T470", ALC298_FIXUP_TPT470_DOCK),
SND_PCI_QUIRK(0x17aa, 0x2246, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
SND_PCI_QUIRK(0x17aa, 0x2247, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
@@ -10333,6 +10424,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x17aa, 0x3869, "Lenovo Yoga7 14IAL7", ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN),
SND_PCI_QUIRK(0x17aa, 0x386f, "Legion 7i 16IAX7", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x17aa, 0x3870, "Lenovo Yoga 7 14ARB7", ALC287_FIXUP_YOGA7_14ARB7_I2C),
+ SND_PCI_QUIRK(0x17aa, 0x3877, "Lenovo Legion 7 Slim 16ARHA7", ALC287_FIXUP_CS35L41_I2C_2),
+ SND_PCI_QUIRK(0x17aa, 0x3878, "Lenovo Legion 7 Slim 16ARHA7", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x17aa, 0x387d, "Yoga S780-16 pro Quad AAC", ALC287_FIXUP_TAS2781_I2C),
SND_PCI_QUIRK(0x17aa, 0x387e, "Yoga S780-16 pro Quad YC", ALC287_FIXUP_TAS2781_I2C),
SND_PCI_QUIRK(0x17aa, 0x3881, "YB9 dual power mode2 YC", ALC287_FIXUP_TAS2781_I2C),
@@ -10341,8 +10434,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x17aa, 0x3886, "Y780 VECO DUAL", ALC287_FIXUP_TAS2781_I2C),
SND_PCI_QUIRK(0x17aa, 0x38a7, "Y780P AMD YG dual", ALC287_FIXUP_TAS2781_I2C),
SND_PCI_QUIRK(0x17aa, 0x38a8, "Y780P AMD VECO dual", ALC287_FIXUP_TAS2781_I2C),
- SND_PCI_QUIRK(0x17aa, 0x38a9, "Thinkbook 16P", ALC287_FIXUP_CS35L41_I2C_2),
- SND_PCI_QUIRK(0x17aa, 0x38ab, "Thinkbook 16P", ALC287_FIXUP_CS35L41_I2C_2),
+ SND_PCI_QUIRK(0x17aa, 0x38a9, "Thinkbook 16P", ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD),
+ SND_PCI_QUIRK(0x17aa, 0x38ab, "Thinkbook 16P", ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD),
SND_PCI_QUIRK(0x17aa, 0x38b4, "Legion Slim 7 16IRH8", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x17aa, 0x38b5, "Legion Slim 7 16IRH8", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x17aa, 0x38b6, "Legion Slim 7 16APH8", ALC287_FIXUP_CS35L41_I2C_2),
@@ -10403,6 +10496,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1d05, 0x1147, "TongFang GMxTGxx", ALC269_FIXUP_NO_SHUTUP),
SND_PCI_QUIRK(0x1d05, 0x115c, "TongFang GMxTGxx", ALC269_FIXUP_NO_SHUTUP),
SND_PCI_QUIRK(0x1d05, 0x121b, "TongFang GMxAGxx", ALC269_FIXUP_NO_SHUTUP),
+ SND_PCI_QUIRK(0x1d05, 0x1387, "TongFang GMxIXxx", ALC2XX_FIXUP_HEADSET_MIC),
+ SND_PCI_QUIRK(0x1d17, 0x3288, "Haier Boyue G42", ALC269VC_FIXUP_ACER_VCOPPERBOX_PINS),
SND_PCI_QUIRK(0x1d72, 0x1602, "RedmiBook", ALC255_FIXUP_XIAOMI_HEADSET_MIC),
SND_PCI_QUIRK(0x1d72, 0x1701, "XiaomiNotebook Pro", ALC298_FIXUP_DELL1_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1d72, 0x1901, "RedmiBook 14", ALC256_FIXUP_ASUS_HEADSET_MIC),
diff --git a/sound/pci/hda/tas2781_hda_i2c.c b/sound/pci/hda/tas2781_hda_i2c.c
index 4475cea8e9f703..75f7674c66ee7a 100644
--- a/sound/pci/hda/tas2781_hda_i2c.c
+++ b/sound/pci/hda/tas2781_hda_i2c.c
@@ -89,7 +89,7 @@ struct tas2781_hda {
struct snd_kcontrol *dsp_prog_ctl;
struct snd_kcontrol *dsp_conf_ctl;
struct snd_kcontrol *prof_ctl;
- struct snd_kcontrol *snd_ctls[3];
+ struct snd_kcontrol *snd_ctls[2];
};
static int tas2781_get_i2c_res(struct acpi_resource *ares, void *data)
@@ -161,8 +161,6 @@ static void tas2781_hda_playback_hook(struct device *dev, int action)
pm_runtime_put_autosuspend(dev);
break;
default:
- dev_dbg(tas_hda->dev, "Playback action not supported: %d\n",
- action);
break;
}
}
@@ -185,8 +183,15 @@ static int tasdevice_get_profile_id(struct snd_kcontrol *kcontrol,
{
struct tasdevice_priv *tas_priv = snd_kcontrol_chip(kcontrol);
+ mutex_lock(&tas_priv->codec_lock);
+
ucontrol->value.integer.value[0] = tas_priv->rcabin.profile_cfg_id;
+ dev_dbg(tas_priv->dev, "%s: kcontrol %s: %d\n",
+ __func__, kcontrol->id.name, tas_priv->rcabin.profile_cfg_id);
+
+ mutex_unlock(&tas_priv->codec_lock);
+
return 0;
}
@@ -200,11 +205,19 @@ static int tasdevice_set_profile_id(struct snd_kcontrol *kcontrol,
val = clamp(nr_profile, 0, max);
+ mutex_lock(&tas_priv->codec_lock);
+
+ dev_dbg(tas_priv->dev, "%s: kcontrol %s: %d -> %d\n",
+ __func__, kcontrol->id.name,
+ tas_priv->rcabin.profile_cfg_id, val);
+
if (tas_priv->rcabin.profile_cfg_id != val) {
tas_priv->rcabin.profile_cfg_id = val;
ret = 1;
}
+ mutex_unlock(&tas_priv->codec_lock);
+
return ret;
}
@@ -241,8 +254,15 @@ static int tasdevice_program_get(struct snd_kcontrol *kcontrol,
{
struct tasdevice_priv *tas_priv = snd_kcontrol_chip(kcontrol);
+ mutex_lock(&tas_priv->codec_lock);
+
ucontrol->value.integer.value[0] = tas_priv->cur_prog;
+ dev_dbg(tas_priv->dev, "%s: kcontrol %s: %d\n",
+ __func__, kcontrol->id.name, tas_priv->cur_prog);
+
+ mutex_unlock(&tas_priv->codec_lock);
+
return 0;
}
@@ -257,11 +277,18 @@ static int tasdevice_program_put(struct snd_kcontrol *kcontrol,
val = clamp(nr_program, 0, max);
+ mutex_lock(&tas_priv->codec_lock);
+
+ dev_dbg(tas_priv->dev, "%s: kcontrol %s: %d -> %d\n",
+ __func__, kcontrol->id.name, tas_priv->cur_prog, val);
+
if (tas_priv->cur_prog != val) {
tas_priv->cur_prog = val;
ret = 1;
}
+ mutex_unlock(&tas_priv->codec_lock);
+
return ret;
}
@@ -270,8 +297,15 @@ static int tasdevice_config_get(struct snd_kcontrol *kcontrol,
{
struct tasdevice_priv *tas_priv = snd_kcontrol_chip(kcontrol);
+ mutex_lock(&tas_priv->codec_lock);
+
ucontrol->value.integer.value[0] = tas_priv->cur_conf;
+ dev_dbg(tas_priv->dev, "%s: kcontrol %s: %d\n",
+ __func__, kcontrol->id.name, tas_priv->cur_conf);
+
+ mutex_unlock(&tas_priv->codec_lock);
+
return 0;
}
@@ -286,54 +320,39 @@ static int tasdevice_config_put(struct snd_kcontrol *kcontrol,
val = clamp(nr_config, 0, max);
+ mutex_lock(&tas_priv->codec_lock);
+
+ dev_dbg(tas_priv->dev, "%s: kcontrol %s: %d -> %d\n",
+ __func__, kcontrol->id.name, tas_priv->cur_conf, val);
+
if (tas_priv->cur_conf != val) {
tas_priv->cur_conf = val;
ret = 1;
}
+ mutex_unlock(&tas_priv->codec_lock);
+
return ret;
}
-/*
- * tas2781_digital_getvol - get the volum control
- * @kcontrol: control pointer
- * @ucontrol: User data
- * Customer Kcontrol for tas2781 is primarily for regmap booking, paging
- * depends on internal regmap mechanism.
- * tas2781 contains book and page two-level register map, especially
- * book switching will set the register BXXP00R7F, after switching to the
- * correct book, then leverage the mechanism for paging to access the
- * register.
- */
-static int tas2781_digital_getvol(struct snd_kcontrol *kcontrol,
+static int tas2781_amp_getvol(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_value *ucontrol)
{
struct tasdevice_priv *tas_priv = snd_kcontrol_chip(kcontrol);
struct soc_mixer_control *mc =
(struct soc_mixer_control *)kcontrol->private_value;
+ int ret;
- return tasdevice_digital_getvol(tas_priv, ucontrol, mc);
-}
+ mutex_lock(&tas_priv->codec_lock);
-static int tas2781_amp_getvol(struct snd_kcontrol *kcontrol,
- struct snd_ctl_elem_value *ucontrol)
-{
- struct tasdevice_priv *tas_priv = snd_kcontrol_chip(kcontrol);
- struct soc_mixer_control *mc =
- (struct soc_mixer_control *)kcontrol->private_value;
+ ret = tasdevice_amp_getvol(tas_priv, ucontrol, mc);
- return tasdevice_amp_getvol(tas_priv, ucontrol, mc);
-}
+ dev_dbg(tas_priv->dev, "%s: kcontrol %s: %ld\n",
+ __func__, kcontrol->id.name, ucontrol->value.integer.value[0]);
-static int tas2781_digital_putvol(struct snd_kcontrol *kcontrol,
- struct snd_ctl_elem_value *ucontrol)
-{
- struct tasdevice_priv *tas_priv = snd_kcontrol_chip(kcontrol);
- struct soc_mixer_control *mc =
- (struct soc_mixer_control *)kcontrol->private_value;
+ mutex_unlock(&tas_priv->codec_lock);
- /* The check of the given value is in tasdevice_digital_putvol. */
- return tasdevice_digital_putvol(tas_priv, ucontrol, mc);
+ return ret;
}
static int tas2781_amp_putvol(struct snd_kcontrol *kcontrol,
@@ -342,9 +361,19 @@ static int tas2781_amp_putvol(struct snd_kcontrol *kcontrol,
struct tasdevice_priv *tas_priv = snd_kcontrol_chip(kcontrol);
struct soc_mixer_control *mc =
(struct soc_mixer_control *)kcontrol->private_value;
+ int ret;
+
+ mutex_lock(&tas_priv->codec_lock);
+
+ dev_dbg(tas_priv->dev, "%s: kcontrol %s: -> %ld\n",
+ __func__, kcontrol->id.name, ucontrol->value.integer.value[0]);
/* The check of the given value is in tasdevice_amp_putvol. */
- return tasdevice_amp_putvol(tas_priv, ucontrol, mc);
+ ret = tasdevice_amp_putvol(tas_priv, ucontrol, mc);
+
+ mutex_unlock(&tas_priv->codec_lock);
+
+ return ret;
}
static int tas2781_force_fwload_get(struct snd_kcontrol *kcontrol,
@@ -352,9 +381,13 @@ static int tas2781_force_fwload_get(struct snd_kcontrol *kcontrol,
{
struct tasdevice_priv *tas_priv = snd_kcontrol_chip(kcontrol);
+ mutex_lock(&tas_priv->codec_lock);
+
ucontrol->value.integer.value[0] = (int)tas_priv->force_fwload_status;
- dev_dbg(tas_priv->dev, "%s : Force FWload %s\n", __func__,
- tas_priv->force_fwload_status ? "ON" : "OFF");
+ dev_dbg(tas_priv->dev, "%s: kcontrol %s: %d\n",
+ __func__, kcontrol->id.name, tas_priv->force_fwload_status);
+
+ mutex_unlock(&tas_priv->codec_lock);
return 0;
}
@@ -365,14 +398,20 @@ static int tas2781_force_fwload_put(struct snd_kcontrol *kcontrol,
struct tasdevice_priv *tas_priv = snd_kcontrol_chip(kcontrol);
bool change, val = (bool)ucontrol->value.integer.value[0];
+ mutex_lock(&tas_priv->codec_lock);
+
+ dev_dbg(tas_priv->dev, "%s: kcontrol %s: %d -> %d\n",
+ __func__, kcontrol->id.name,
+ tas_priv->force_fwload_status, val);
+
if (tas_priv->force_fwload_status == val)
change = false;
else {
change = true;
tas_priv->force_fwload_status = val;
}
- dev_dbg(tas_priv->dev, "%s : Force FWload %s\n", __func__,
- tas_priv->force_fwload_status ? "ON" : "OFF");
+
+ mutex_unlock(&tas_priv->codec_lock);
return change;
}
@@ -381,9 +420,6 @@ static const struct snd_kcontrol_new tas2781_snd_controls[] = {
ACARD_SINGLE_RANGE_EXT_TLV("Speaker Analog Gain", TAS2781_AMP_LEVEL,
1, 0, 20, 0, tas2781_amp_getvol,
tas2781_amp_putvol, amp_vol_tlv),
- ACARD_SINGLE_RANGE_EXT_TLV("Speaker Digital Gain", TAS2781_DVC_LVL,
- 0, 0, 200, 1, tas2781_digital_getvol,
- tas2781_digital_putvol, dvc_tlv),
ACARD_SINGLE_BOOL_EXT("Speaker Force Firmware Load", 0,
tas2781_force_fwload_get, tas2781_force_fwload_put),
};
@@ -478,10 +514,10 @@ static int tas2563_save_calibration(struct tasdevice_priv *tas_priv)
static void tas2781_apply_calib(struct tasdevice_priv *tas_priv)
{
static const unsigned char page_array[CALIB_MAX] = {
- 0x17, 0x18, 0x18, 0x0d, 0x18
+ 0x17, 0x18, 0x18, 0x13, 0x18,
};
static const unsigned char rgno_array[CALIB_MAX] = {
- 0x74, 0x0c, 0x14, 0x3c, 0x7c
+ 0x74, 0x0c, 0x14, 0x70, 0x7c,
};
unsigned char *data;
int i, j, rc;
diff --git a/sound/sh/aica.c b/sound/sh/aica.c
index 320ac792c7fe24..3182c634464d42 100644
--- a/sound/sh/aica.c
+++ b/sound/sh/aica.c
@@ -278,7 +278,8 @@ static void run_spu_dma(struct work_struct *work)
dreamcastcard->clicks++;
if (unlikely(dreamcastcard->clicks >= AICA_PERIOD_NUMBER))
dreamcastcard->clicks %= AICA_PERIOD_NUMBER;
- mod_timer(&dreamcastcard->timer, jiffies + 1);
+ if (snd_pcm_running(dreamcastcard->substream))
+ mod_timer(&dreamcastcard->timer, jiffies + 1);
}
}
@@ -290,6 +291,8 @@ static void aica_period_elapsed(struct timer_list *t)
/*timer function - so cannot sleep */
int play_period;
struct snd_pcm_runtime *runtime;
+ if (!snd_pcm_running(substream))
+ return;
runtime = substream->runtime;
dreamcastcard = substream->pcm->private_data;
/* Have we played out an additional period? */
@@ -350,12 +353,19 @@ static int snd_aicapcm_pcm_open(struct snd_pcm_substream
return 0;
}
+static int snd_aicapcm_pcm_sync_stop(struct snd_pcm_substream *substream)
+{
+ struct snd_card_aica *dreamcastcard = substream->pcm->private_data;
+
+ del_timer_sync(&dreamcastcard->timer);
+ cancel_work_sync(&dreamcastcard->spu_dma_work);
+ return 0;
+}
+
static int snd_aicapcm_pcm_close(struct snd_pcm_substream
*substream)
{
struct snd_card_aica *dreamcastcard = substream->pcm->private_data;
- flush_work(&(dreamcastcard->spu_dma_work));
- del_timer(&dreamcastcard->timer);
dreamcastcard->substream = NULL;
kfree(dreamcastcard->channel);
spu_disable();
@@ -401,6 +411,7 @@ static const struct snd_pcm_ops snd_aicapcm_playback_ops = {
.prepare = snd_aicapcm_pcm_prepare,
.trigger = snd_aicapcm_pcm_trigger,
.pointer = snd_aicapcm_pcm_pointer,
+ .sync_stop = snd_aicapcm_pcm_sync_stop,
};
/* TO DO: set up to handle more than one pcm instance */
diff --git a/sound/soc/amd/acp/acp-pci.c b/sound/soc/amd/acp/acp-pci.c
index 8c8b1dcac6281c..5f35b90eab8d3f 100644
--- a/sound/soc/amd/acp/acp-pci.c
+++ b/sound/soc/amd/acp/acp-pci.c
@@ -115,7 +115,10 @@ static int acp_pci_probe(struct pci_dev *pci, const struct pci_device_id *pci_id
goto unregister_dmic_dev;
}
- acp_init(chip);
+ ret = acp_init(chip);
+ if (ret)
+ goto unregister_dmic_dev;
+
res = devm_kcalloc(&pci->dev, num_res, sizeof(struct resource), GFP_KERNEL);
if (!res) {
ret = -ENOMEM;
@@ -133,11 +136,9 @@ static int acp_pci_probe(struct pci_dev *pci, const struct pci_device_id *pci_id
}
}
- if (flag == FLAG_AMD_LEGACY_ONLY_DMIC) {
- ret = check_acp_pdm(pci, chip);
- if (ret < 0)
- goto skip_pdev_creation;
- }
+ ret = check_acp_pdm(pci, chip);
+ if (ret < 0)
+ goto skip_pdev_creation;
chip->flag = flag;
memset(&pdevinfo, 0, sizeof(pdevinfo));
diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c
index 69c68d8e7a6b54..1760b5d42460af 100644
--- a/sound/soc/amd/yc/acp6x-mach.c
+++ b/sound/soc/amd/yc/acp6x-mach.c
@@ -433,6 +433,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = {
{
.driver_data = &acp6x_card,
.matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "MDC"),
+ DMI_MATCH(DMI_BOARD_NAME, "Herbag_MDU"),
+ }
+ },
+ {
+ .driver_data = &acp6x_card,
+ .matches = {
DMI_MATCH(DMI_BOARD_VENDOR, "System76"),
DMI_MATCH(DMI_PRODUCT_VERSION, "pang12"),
}
diff --git a/sound/soc/codecs/cs-amp-lib.c b/sound/soc/codecs/cs-amp-lib.c
index 01ef4db5407da5..287ac01a387357 100644
--- a/sound/soc/codecs/cs-amp-lib.c
+++ b/sound/soc/codecs/cs-amp-lib.c
@@ -56,6 +56,11 @@ static int _cs_amp_write_cal_coeffs(struct cs_dsp *dsp,
dev_dbg(dsp->dev, "Calibration: Ambient=%#x, Status=%#x, CalR=%d\n",
data->calAmbient, data->calStatus, data->calR);
+ if (list_empty(&dsp->ctl_list)) {
+ dev_info(dsp->dev, "Calibration disabled due to missing firmware controls\n");
+ return -ENOENT;
+ }
+
ret = cs_amp_write_cal_coeff(dsp, controls, controls->ambient, data->calAmbient);
if (ret)
return ret;
diff --git a/sound/soc/codecs/cs35l41.c b/sound/soc/codecs/cs35l41.c
index dfb4ce53491bba..f8e57a2fc3e324 100644
--- a/sound/soc/codecs/cs35l41.c
+++ b/sound/soc/codecs/cs35l41.c
@@ -1094,6 +1094,7 @@ static int cs35l41_handle_pdata(struct device *dev, struct cs35l41_hw_cfg *hw_cf
static int cs35l41_dsp_init(struct cs35l41_private *cs35l41)
{
struct wm_adsp *dsp;
+ uint32_t dsp1rx5_src;
int ret;
dsp = &cs35l41->dsp;
@@ -1113,16 +1114,29 @@ static int cs35l41_dsp_init(struct cs35l41_private *cs35l41)
return ret;
}
- ret = regmap_write(cs35l41->regmap, CS35L41_DSP1_RX5_SRC,
- CS35L41_INPUT_SRC_VPMON);
+ switch (cs35l41->hw_cfg.bst_type) {
+ case CS35L41_INT_BOOST:
+ case CS35L41_SHD_BOOST_ACTV:
+ dsp1rx5_src = CS35L41_INPUT_SRC_VPMON;
+ break;
+ case CS35L41_EXT_BOOST:
+ case CS35L41_SHD_BOOST_PASS:
+ dsp1rx5_src = CS35L41_INPUT_SRC_VBSTMON;
+ break;
+ default:
+ dev_err(cs35l41->dev, "wm_halo_init failed - Invalid Boost Type: %d\n",
+ cs35l41->hw_cfg.bst_type);
+ goto err_dsp;
+ }
+
+ ret = regmap_write(cs35l41->regmap, CS35L41_DSP1_RX5_SRC, dsp1rx5_src);
if (ret < 0) {
- dev_err(cs35l41->dev, "Write INPUT_SRC_VPMON failed: %d\n", ret);
+ dev_err(cs35l41->dev, "Write DSP1RX5_SRC: %d failed: %d\n", dsp1rx5_src, ret);
goto err_dsp;
}
- ret = regmap_write(cs35l41->regmap, CS35L41_DSP1_RX6_SRC,
- CS35L41_INPUT_SRC_CLASSH);
+ ret = regmap_write(cs35l41->regmap, CS35L41_DSP1_RX6_SRC, CS35L41_INPUT_SRC_VBSTMON);
if (ret < 0) {
- dev_err(cs35l41->dev, "Write INPUT_SRC_CLASSH failed: %d\n", ret);
+ dev_err(cs35l41->dev, "Write CS35L41_INPUT_SRC_VBSTMON failed: %d\n", ret);
goto err_dsp;
}
ret = regmap_write(cs35l41->regmap, CS35L41_DSP1_RX7_SRC,
diff --git a/sound/soc/codecs/cs35l56-sdw.c b/sound/soc/codecs/cs35l56-sdw.c
index 14a5f86019aad4..70ff55c1517fe0 100644
--- a/sound/soc/codecs/cs35l56-sdw.c
+++ b/sound/soc/codecs/cs35l56-sdw.c
@@ -188,8 +188,6 @@ static void cs35l56_sdw_init(struct sdw_slave *peripheral)
goto out;
}
- regcache_cache_only(cs35l56->base.regmap, false);
-
ret = cs35l56_init(cs35l56);
if (ret < 0) {
regcache_cache_only(cs35l56->base.regmap, true);
diff --git a/sound/soc/codecs/cs35l56-shared.c b/sound/soc/codecs/cs35l56-shared.c
index 08cac58e3ab222..fd02b621da52c0 100644
--- a/sound/soc/codecs/cs35l56-shared.c
+++ b/sound/soc/codecs/cs35l56-shared.c
@@ -40,16 +40,11 @@ EXPORT_SYMBOL_NS_GPL(cs35l56_set_patch, SND_SOC_CS35L56_SHARED);
static const struct reg_default cs35l56_reg_defaults[] = {
/* no defaults for OTP_MEM - first read populates cache */
- { CS35L56_ASP1_ENABLES1, 0x00000000 },
- { CS35L56_ASP1_CONTROL1, 0x00000028 },
- { CS35L56_ASP1_CONTROL2, 0x18180200 },
- { CS35L56_ASP1_CONTROL3, 0x00000002 },
- { CS35L56_ASP1_FRAME_CONTROL1, 0x03020100 },
- { CS35L56_ASP1_FRAME_CONTROL5, 0x00020100 },
- { CS35L56_ASP1_DATA_CONTROL1, 0x00000018 },
- { CS35L56_ASP1_DATA_CONTROL5, 0x00000018 },
-
- /* no defaults for ASP1TX mixer */
+ /*
+ * No defaults for ASP1 control or ASP1TX mixer. See
+ * cs35l56_populate_asp1_register_defaults() and
+ * cs35l56_sync_asp1_mixer_widgets_with_firmware().
+ */
{ CS35L56_SWIRE_DP3_CH1_INPUT, 0x00000018 },
{ CS35L56_SWIRE_DP3_CH2_INPUT, 0x00000019 },
@@ -210,6 +205,36 @@ static bool cs35l56_volatile_reg(struct device *dev, unsigned int reg)
}
}
+static const struct reg_sequence cs35l56_asp1_defaults[] = {
+ REG_SEQ0(CS35L56_ASP1_ENABLES1, 0x00000000),
+ REG_SEQ0(CS35L56_ASP1_CONTROL1, 0x00000028),
+ REG_SEQ0(CS35L56_ASP1_CONTROL2, 0x18180200),
+ REG_SEQ0(CS35L56_ASP1_CONTROL3, 0x00000002),
+ REG_SEQ0(CS35L56_ASP1_FRAME_CONTROL1, 0x03020100),
+ REG_SEQ0(CS35L56_ASP1_FRAME_CONTROL5, 0x00020100),
+ REG_SEQ0(CS35L56_ASP1_DATA_CONTROL1, 0x00000018),
+ REG_SEQ0(CS35L56_ASP1_DATA_CONTROL5, 0x00000018),
+};
+
+/*
+ * The firmware can have control of the ASP so we don't provide regmap
+ * with defaults for these registers, to prevent a regcache_sync() from
+ * overwriting the firmware settings. But if the machine driver hooks up
+ * the ASP it means the driver is taking control of the ASP, so then the
+ * registers are populated with the defaults.
+ */
+int cs35l56_init_asp1_regs_for_driver_control(struct cs35l56_base *cs35l56_base)
+{
+ if (!cs35l56_base->fw_owns_asp1)
+ return 0;
+
+ cs35l56_base->fw_owns_asp1 = false;
+
+ return regmap_multi_reg_write(cs35l56_base->regmap, cs35l56_asp1_defaults,
+ ARRAY_SIZE(cs35l56_asp1_defaults));
+}
+EXPORT_SYMBOL_NS_GPL(cs35l56_init_asp1_regs_for_driver_control, SND_SOC_CS35L56_SHARED);
+
/*
* The firmware boot sequence can overwrite the ASP1 config registers so that
* they don't match regmap's view of their values. Rewrite the values from the
@@ -217,19 +242,15 @@ static bool cs35l56_volatile_reg(struct device *dev, unsigned int reg)
*/
int cs35l56_force_sync_asp1_registers_from_cache(struct cs35l56_base *cs35l56_base)
{
- struct reg_sequence asp1_regs[] = {
- { .reg = CS35L56_ASP1_ENABLES1 },
- { .reg = CS35L56_ASP1_CONTROL1 },
- { .reg = CS35L56_ASP1_CONTROL2 },
- { .reg = CS35L56_ASP1_CONTROL3 },
- { .reg = CS35L56_ASP1_FRAME_CONTROL1 },
- { .reg = CS35L56_ASP1_FRAME_CONTROL5 },
- { .reg = CS35L56_ASP1_DATA_CONTROL1 },
- { .reg = CS35L56_ASP1_DATA_CONTROL5 },
- };
+ struct reg_sequence asp1_regs[ARRAY_SIZE(cs35l56_asp1_defaults)];
int i, ret;
- /* Read values from regmap cache into a write sequence */
+ if (cs35l56_base->fw_owns_asp1)
+ return 0;
+
+ memcpy(asp1_regs, cs35l56_asp1_defaults, sizeof(asp1_regs));
+
+ /* Read current values from regmap cache into the write sequence */
for (i = 0; i < ARRAY_SIZE(asp1_regs); ++i) {
ret = regmap_read(cs35l56_base->regmap, asp1_regs[i].reg, &asp1_regs[i].def);
if (ret)
@@ -307,10 +328,10 @@ int cs35l56_wait_for_firmware_boot(struct cs35l56_base *cs35l56_base)
reg = CS35L56_DSP1_HALO_STATE;
/*
- * This can't be a regmap_read_poll_timeout() because cs35l56 will NAK
- * I2C until it has booted which would terminate the poll
+ * The regmap must remain in cache-only until the chip has
+ * booted, so use a bypassed read of the status register.
*/
- poll_ret = read_poll_timeout(regmap_read, read_ret,
+ poll_ret = read_poll_timeout(regmap_read_bypassed, read_ret,
(val < 0xFFFF) && (val >= CS35L56_HALO_STATE_BOOT_DONE),
CS35L56_HALO_STATE_POLL_US,
CS35L56_HALO_STATE_TIMEOUT_US,
@@ -362,7 +383,8 @@ void cs35l56_system_reset(struct cs35l56_base *cs35l56_base, bool is_soundwire)
return;
cs35l56_wait_control_port_ready();
- regcache_cache_only(cs35l56_base->regmap, false);
+
+ /* Leave in cache-only. This will be revoked when the chip has rebooted. */
}
EXPORT_SYMBOL_NS_GPL(cs35l56_system_reset, SND_SOC_CS35L56_SHARED);
@@ -577,14 +599,14 @@ int cs35l56_runtime_resume_common(struct cs35l56_base *cs35l56_base, bool is_sou
cs35l56_issue_wake_event(cs35l56_base);
out_sync:
- regcache_cache_only(cs35l56_base->regmap, false);
-
ret = cs35l56_wait_for_firmware_boot(cs35l56_base);
if (ret) {
dev_err(cs35l56_base->dev, "Hibernate wake failed: %d\n", ret);
goto err;
}
+ regcache_cache_only(cs35l56_base->regmap, false);
+
ret = cs35l56_mbox_send(cs35l56_base, CS35L56_MBOX_CMD_PREVENT_AUTO_HIBERNATE);
if (ret)
goto err;
@@ -684,7 +706,7 @@ EXPORT_SYMBOL_NS_GPL(cs35l56_calibration_controls, SND_SOC_CS35L56_SHARED);
int cs35l56_get_calibration(struct cs35l56_base *cs35l56_base)
{
- u64 silicon_uid;
+ u64 silicon_uid = 0;
int ret;
/* Driver can't apply calibration to a secured part, so skip */
@@ -757,7 +779,7 @@ int cs35l56_hw_init(struct cs35l56_base *cs35l56_base)
* devices so the REVID needs to be determined before waiting for the
* firmware to boot.
*/
- ret = regmap_read(cs35l56_base->regmap, CS35L56_REVID, &revid);
+ ret = regmap_read_bypassed(cs35l56_base->regmap, CS35L56_REVID, &revid);
if (ret < 0) {
dev_err(cs35l56_base->dev, "Get Revision ID failed\n");
return ret;
@@ -768,7 +790,7 @@ int cs35l56_hw_init(struct cs35l56_base *cs35l56_base)
if (ret)
return ret;
- ret = regmap_read(cs35l56_base->regmap, CS35L56_DEVID, &devid);
+ ret = regmap_read_bypassed(cs35l56_base->regmap, CS35L56_DEVID, &devid);
if (ret < 0) {
dev_err(cs35l56_base->dev, "Get Device ID failed\n");
return ret;
@@ -787,6 +809,9 @@ int cs35l56_hw_init(struct cs35l56_base *cs35l56_base)
cs35l56_base->type = devid & 0xFF;
+ /* Silicon is now identified and booted so exit cache-only */
+ regcache_cache_only(cs35l56_base->regmap, false);
+
ret = regmap_read(cs35l56_base->regmap, CS35L56_DSP_RESTRICT_STS1, &secured);
if (ret) {
dev_err(cs35l56_base->dev, "Get Secure status failed\n");
diff --git a/sound/soc/codecs/cs35l56.c b/sound/soc/codecs/cs35l56.c
index 8d2f021fb36281..6331b8c6136eaf 100644
--- a/sound/soc/codecs/cs35l56.c
+++ b/sound/soc/codecs/cs35l56.c
@@ -454,9 +454,14 @@ static int cs35l56_asp_dai_set_fmt(struct snd_soc_dai *codec_dai, unsigned int f
{
struct cs35l56_private *cs35l56 = snd_soc_component_get_drvdata(codec_dai->component);
unsigned int val;
+ int ret;
dev_dbg(cs35l56->base.dev, "%s: %#x\n", __func__, fmt);
+ ret = cs35l56_init_asp1_regs_for_driver_control(&cs35l56->base);
+ if (ret)
+ return ret;
+
switch (fmt & SND_SOC_DAIFMT_CLOCK_PROVIDER_MASK) {
case SND_SOC_DAIFMT_CBC_CFC:
break;
@@ -530,6 +535,11 @@ static int cs35l56_asp_dai_set_tdm_slot(struct snd_soc_dai *dai, unsigned int tx
unsigned int rx_mask, int slots, int slot_width)
{
struct cs35l56_private *cs35l56 = snd_soc_component_get_drvdata(dai->component);
+ int ret;
+
+ ret = cs35l56_init_asp1_regs_for_driver_control(&cs35l56->base);
+ if (ret)
+ return ret;
if ((slots == 0) || (slot_width == 0)) {
dev_dbg(cs35l56->base.dev, "tdm config cleared\n");
@@ -578,6 +588,11 @@ static int cs35l56_asp_dai_hw_params(struct snd_pcm_substream *substream,
struct cs35l56_private *cs35l56 = snd_soc_component_get_drvdata(dai->component);
unsigned int rate = params_rate(params);
u8 asp_width, asp_wl;
+ int ret;
+
+ ret = cs35l56_init_asp1_regs_for_driver_control(&cs35l56->base);
+ if (ret)
+ return ret;
asp_wl = params_width(params);
if (cs35l56->asp_slot_width)
@@ -634,7 +649,11 @@ static int cs35l56_asp_dai_set_sysclk(struct snd_soc_dai *dai,
int clk_id, unsigned int freq, int dir)
{
struct cs35l56_private *cs35l56 = snd_soc_component_get_drvdata(dai->component);
- int freq_id;
+ int freq_id, ret;
+
+ ret = cs35l56_init_asp1_regs_for_driver_control(&cs35l56->base);
+ if (ret)
+ return ret;
if (freq == 0) {
cs35l56->sysclk_set = false;
@@ -1403,6 +1422,9 @@ int cs35l56_common_probe(struct cs35l56_private *cs35l56)
cs35l56->base.cal_index = -1;
cs35l56->speaker_id = -ENOENT;
+ /* Assume that the firmware owns ASP1 until we know different */
+ cs35l56->base.fw_owns_asp1 = true;
+
dev_set_drvdata(cs35l56->base.dev, cs35l56);
cs35l56_fill_supply_names(cs35l56->supplies);
@@ -1531,6 +1553,8 @@ post_soft_reset:
return ret;
dev_dbg(cs35l56->base.dev, "Firmware rebooted after soft reset\n");
+
+ regcache_cache_only(cs35l56->base.regmap, false);
}
/* Disable auto-hibernate so that runtime_pm has control */
diff --git a/sound/soc/codecs/cs42l43.c b/sound/soc/codecs/cs42l43.c
index 860d5cda67bffe..94685449f0f48c 100644
--- a/sound/soc/codecs/cs42l43.c
+++ b/sound/soc/codecs/cs42l43.c
@@ -2364,7 +2364,8 @@ static int cs42l43_codec_runtime_resume(struct device *dev)
static int cs42l43_codec_suspend(struct device *dev)
{
- struct cs42l43 *cs42l43 = dev_get_drvdata(dev);
+ struct cs42l43_codec *priv = dev_get_drvdata(dev);
+ struct cs42l43 *cs42l43 = priv->core;
disable_irq(cs42l43->irq);
@@ -2373,7 +2374,8 @@ static int cs42l43_codec_suspend(struct device *dev)
static int cs42l43_codec_suspend_noirq(struct device *dev)
{
- struct cs42l43 *cs42l43 = dev_get_drvdata(dev);
+ struct cs42l43_codec *priv = dev_get_drvdata(dev);
+ struct cs42l43 *cs42l43 = priv->core;
enable_irq(cs42l43->irq);
@@ -2382,7 +2384,8 @@ static int cs42l43_codec_suspend_noirq(struct device *dev)
static int cs42l43_codec_resume(struct device *dev)
{
- struct cs42l43 *cs42l43 = dev_get_drvdata(dev);
+ struct cs42l43_codec *priv = dev_get_drvdata(dev);
+ struct cs42l43 *cs42l43 = priv->core;
enable_irq(cs42l43->irq);
@@ -2391,7 +2394,8 @@ static int cs42l43_codec_resume(struct device *dev)
static int cs42l43_codec_resume_noirq(struct device *dev)
{
- struct cs42l43 *cs42l43 = dev_get_drvdata(dev);
+ struct cs42l43_codec *priv = dev_get_drvdata(dev);
+ struct cs42l43 *cs42l43 = priv->core;
disable_irq(cs42l43->irq);
diff --git a/sound/soc/codecs/es8326.c b/sound/soc/codecs/es8326.c
index 15289dadafea09..17bd6b5160772e 100644
--- a/sound/soc/codecs/es8326.c
+++ b/sound/soc/codecs/es8326.c
@@ -412,9 +412,9 @@ static const struct _coeff_div coeff_div_v3[] = {
{125, 48000, 6000000, 0x04, 0x04, 0x1F, 0x2D, 0x8A, 0x0A, 0x27, 0x27},
{128, 8000, 1024000, 0x60, 0x00, 0x05, 0x75, 0x8A, 0x1B, 0x1F, 0x7F},
- {128, 16000, 2048000, 0x20, 0x00, 0x31, 0x35, 0x8A, 0x1B, 0x1F, 0x3F},
- {128, 44100, 5644800, 0xE0, 0x00, 0x01, 0x2D, 0xCA, 0x0A, 0x1F, 0x1F},
- {128, 48000, 6144000, 0xE0, 0x00, 0x01, 0x2D, 0xCA, 0x0A, 0x1F, 0x1F},
+ {128, 16000, 2048000, 0x20, 0x00, 0x31, 0x35, 0x08, 0x19, 0x1F, 0x3F},
+ {128, 44100, 5644800, 0xE0, 0x00, 0x01, 0x2D, 0x48, 0x08, 0x1F, 0x1F},
+ {128, 48000, 6144000, 0xE0, 0x00, 0x01, 0x2D, 0x48, 0x08, 0x1F, 0x1F},
{144, 8000, 1152000, 0x20, 0x00, 0x03, 0x35, 0x8A, 0x1B, 0x23, 0x47},
{144, 16000, 2304000, 0x20, 0x00, 0x11, 0x35, 0x8A, 0x1B, 0x23, 0x47},
{192, 8000, 1536000, 0x60, 0x02, 0x0D, 0x75, 0x8A, 0x1B, 0x1F, 0x7F},
@@ -423,10 +423,10 @@ static const struct _coeff_div coeff_div_v3[] = {
{200, 48000, 9600000, 0x04, 0x04, 0x0F, 0x2D, 0xCA, 0x0A, 0x1F, 0x1F},
{250, 48000, 12000000, 0x04, 0x04, 0x0F, 0x2D, 0xCA, 0x0A, 0x27, 0x27},
- {256, 8000, 2048000, 0x60, 0x00, 0x31, 0x35, 0x8A, 0x1B, 0x1F, 0x7F},
- {256, 16000, 4096000, 0x20, 0x00, 0x01, 0x35, 0x8A, 0x1B, 0x1F, 0x3F},
- {256, 44100, 11289600, 0xE0, 0x00, 0x30, 0x2D, 0xCA, 0x0A, 0x1F, 0x1F},
- {256, 48000, 12288000, 0xE0, 0x00, 0x30, 0x2D, 0xCA, 0x0A, 0x1F, 0x1F},
+ {256, 8000, 2048000, 0x60, 0x00, 0x31, 0x35, 0x08, 0x19, 0x1F, 0x7F},
+ {256, 16000, 4096000, 0x20, 0x00, 0x01, 0x35, 0x08, 0x19, 0x1F, 0x3F},
+ {256, 44100, 11289600, 0xE0, 0x01, 0x01, 0x2D, 0x48, 0x08, 0x1F, 0x1F},
+ {256, 48000, 12288000, 0xE0, 0x01, 0x01, 0x2D, 0x48, 0x08, 0x1F, 0x1F},
{288, 8000, 2304000, 0x20, 0x00, 0x01, 0x35, 0x8A, 0x1B, 0x23, 0x47},
{384, 8000, 3072000, 0x60, 0x02, 0x05, 0x75, 0x8A, 0x1B, 0x1F, 0x7F},
{384, 16000, 6144000, 0x20, 0x02, 0x03, 0x35, 0x8A, 0x1B, 0x1F, 0x3F},
@@ -435,10 +435,10 @@ static const struct _coeff_div coeff_div_v3[] = {
{400, 48000, 19200000, 0xE4, 0x04, 0x35, 0x6d, 0xCA, 0x0A, 0x1F, 0x1F},
{500, 48000, 24000000, 0xF8, 0x04, 0x3F, 0x6D, 0xCA, 0x0A, 0x1F, 0x1F},
- {512, 8000, 4096000, 0x60, 0x00, 0x01, 0x35, 0x8A, 0x1B, 0x1F, 0x7F},
- {512, 16000, 8192000, 0x20, 0x00, 0x30, 0x35, 0x8A, 0x1B, 0x1F, 0x3F},
- {512, 44100, 22579200, 0xE0, 0x00, 0x00, 0x2D, 0xCA, 0x0A, 0x1F, 0x1F},
- {512, 48000, 24576000, 0xE0, 0x00, 0x00, 0x2D, 0xCA, 0x0A, 0x1F, 0x1F},
+ {512, 8000, 4096000, 0x60, 0x00, 0x01, 0x08, 0x19, 0x1B, 0x1F, 0x7F},
+ {512, 16000, 8192000, 0x20, 0x00, 0x30, 0x35, 0x08, 0x19, 0x1F, 0x3F},
+ {512, 44100, 22579200, 0xE0, 0x00, 0x00, 0x2D, 0x48, 0x08, 0x1F, 0x1F},
+ {512, 48000, 24576000, 0xE0, 0x00, 0x00, 0x2D, 0x48, 0x08, 0x1F, 0x1F},
{768, 8000, 6144000, 0x60, 0x02, 0x11, 0x35, 0x8A, 0x1B, 0x1F, 0x7F},
{768, 16000, 12288000, 0x20, 0x02, 0x01, 0x35, 0x8A, 0x1B, 0x1F, 0x3F},
{768, 32000, 24576000, 0xE0, 0x02, 0x30, 0x2D, 0xCA, 0x0A, 0x1F, 0x1F},
@@ -835,7 +835,6 @@ static void es8326_jack_detect_handler(struct work_struct *work)
dev_dbg(comp->dev, "Report hp remove event\n");
snd_soc_jack_report(es8326->jack, 0, SND_JACK_HEADSET);
/* mute adc when mic path switch */
- regmap_write(es8326->regmap, ES8326_ADC_SCALE, 0x33);
regmap_write(es8326->regmap, ES8326_ADC1_SRC, 0x44);
regmap_write(es8326->regmap, ES8326_ADC2_SRC, 0x66);
es8326->hp = 0;
@@ -843,6 +842,7 @@ static void es8326_jack_detect_handler(struct work_struct *work)
regmap_update_bits(es8326->regmap, ES8326_HPDET_TYPE, 0x03, 0x01);
regmap_write(es8326->regmap, ES8326_SYS_BIAS, 0x0a);
regmap_update_bits(es8326->regmap, ES8326_HP_DRIVER_REF, 0x0f, 0x03);
+ regmap_write(es8326->regmap, ES8326_INT_SOURCE, ES8326_INT_SRC_PIN9);
/*
* Inverted HPJACK_POL bit to trigger one IRQ to double check HP Removal event
*/
@@ -865,6 +865,8 @@ static void es8326_jack_detect_handler(struct work_struct *work)
* set auto-check mode, then restart jack_detect_work after 400ms.
* Don't report jack status.
*/
+ regmap_write(es8326->regmap, ES8326_INT_SOURCE,
+ (ES8326_INT_SRC_PIN9 | ES8326_INT_SRC_BUTTON));
regmap_update_bits(es8326->regmap, ES8326_HPDET_TYPE, 0x03, 0x01);
es8326_enable_micbias(es8326->component);
usleep_range(50000, 70000);
@@ -891,7 +893,6 @@ static void es8326_jack_detect_handler(struct work_struct *work)
snd_soc_jack_report(es8326->jack,
SND_JACK_HEADSET, SND_JACK_HEADSET);
- regmap_write(es8326->regmap, ES8326_ADC_SCALE, 0x33);
regmap_update_bits(es8326->regmap, ES8326_PGA_PDN,
0x08, 0x08);
regmap_update_bits(es8326->regmap, ES8326_PGAGAIN,
@@ -987,7 +988,7 @@ static int es8326_resume(struct snd_soc_component *component)
regmap_write(es8326->regmap, ES8326_VMIDSEL, 0x0E);
regmap_write(es8326->regmap, ES8326_ANA_LP, 0xf0);
usleep_range(10000, 15000);
- regmap_write(es8326->regmap, ES8326_HPJACK_TIMER, 0xe9);
+ regmap_write(es8326->regmap, ES8326_HPJACK_TIMER, 0xd9);
regmap_write(es8326->regmap, ES8326_ANA_MICBIAS, 0xcb);
/* set headphone default type and detect pin */
regmap_write(es8326->regmap, ES8326_HPDET_TYPE, 0x83);
@@ -1038,8 +1039,7 @@ static int es8326_resume(struct snd_soc_component *component)
es8326_enable_micbias(es8326->component);
usleep_range(50000, 70000);
regmap_update_bits(es8326->regmap, ES8326_HPDET_TYPE, 0x03, 0x00);
- regmap_write(es8326->regmap, ES8326_INT_SOURCE,
- (ES8326_INT_SRC_PIN9 | ES8326_INT_SRC_BUTTON));
+ regmap_write(es8326->regmap, ES8326_INT_SOURCE, ES8326_INT_SRC_PIN9);
regmap_write(es8326->regmap, ES8326_INTOUT_IO,
es8326->interrupt_clk);
regmap_write(es8326->regmap, ES8326_SDINOUT1_IO,
@@ -1060,6 +1060,8 @@ static int es8326_resume(struct snd_soc_component *component)
es8326->hp = 0;
es8326->hpl_vol = 0x03;
es8326->hpr_vol = 0x03;
+
+ es8326_irq(es8326->irq, es8326);
return 0;
}
@@ -1070,6 +1072,9 @@ static int es8326_suspend(struct snd_soc_component *component)
cancel_delayed_work_sync(&es8326->jack_detect_work);
es8326_disable_micbias(component);
es8326->calibrated = false;
+ regmap_write(es8326->regmap, ES8326_CLK_MUX, 0x2d);
+ regmap_write(es8326->regmap, ES8326_DAC2HPMIX, 0x00);
+ regmap_write(es8326->regmap, ES8326_ANA_PDN, 0x3b);
regmap_write(es8326->regmap, ES8326_CLK_CTL, ES8326_CLK_OFF);
regcache_cache_only(es8326->regmap, true);
regcache_mark_dirty(es8326->regmap);
diff --git a/sound/soc/codecs/es8326.h b/sound/soc/codecs/es8326.h
index ee12caef810532..c3e52e7bdef57d 100644
--- a/sound/soc/codecs/es8326.h
+++ b/sound/soc/codecs/es8326.h
@@ -104,7 +104,7 @@
#define ES8326_MUTE (3 << 0)
/* ES8326_CLK_CTL */
-#define ES8326_CLK_ON (0x7e << 0)
+#define ES8326_CLK_ON (0x7f << 0)
#define ES8326_CLK_OFF (0 << 0)
/* ES8326_CLK_INV */
diff --git a/sound/soc/codecs/rt1316-sdw.c b/sound/soc/codecs/rt1316-sdw.c
index 47511f70119ae3..0b3bf920bcab23 100644
--- a/sound/soc/codecs/rt1316-sdw.c
+++ b/sound/soc/codecs/rt1316-sdw.c
@@ -537,7 +537,7 @@ static int rt1316_sdw_hw_params(struct snd_pcm_substream *substream,
retval = sdw_stream_add_slave(rt1316->sdw_slave, &stream_config,
&port_config, 1, sdw_stream);
if (retval) {
- dev_err(dai->dev, "Unable to configure port\n");
+ dev_err(dai->dev, "%s: Unable to configure port\n", __func__);
return retval;
}
@@ -577,12 +577,12 @@ static int rt1316_sdw_parse_dt(struct rt1316_sdw_priv *rt1316, struct device *de
if (rt1316->bq_params_cnt) {
rt1316->bq_params = devm_kzalloc(dev, rt1316->bq_params_cnt, GFP_KERNEL);
if (!rt1316->bq_params) {
- dev_err(dev, "Could not allocate bq_params memory\n");
+ dev_err(dev, "%s: Could not allocate bq_params memory\n", __func__);
ret = -ENOMEM;
} else {
ret = device_property_read_u8_array(dev, "realtek,bq-params", rt1316->bq_params, rt1316->bq_params_cnt);
if (ret < 0)
- dev_err(dev, "Could not read list of realtek,bq-params\n");
+ dev_err(dev, "%s: Could not read list of realtek,bq-params\n", __func__);
}
}
@@ -759,7 +759,7 @@ static int __maybe_unused rt1316_dev_resume(struct device *dev)
time = wait_for_completion_timeout(&slave->initialization_complete,
msecs_to_jiffies(RT1316_PROBE_TIMEOUT));
if (!time) {
- dev_err(&slave->dev, "Initialization not complete, timed out\n");
+ dev_err(&slave->dev, "%s: Initialization not complete, timed out\n", __func__);
sdw_show_ping_status(slave->bus, true);
return -ETIMEDOUT;
diff --git a/sound/soc/codecs/rt1318-sdw.c b/sound/soc/codecs/rt1318-sdw.c
index ff364bde4a0849..462c9a4b1be5dd 100644
--- a/sound/soc/codecs/rt1318-sdw.c
+++ b/sound/soc/codecs/rt1318-sdw.c
@@ -606,7 +606,7 @@ static int rt1318_sdw_hw_params(struct snd_pcm_substream *substream,
retval = sdw_stream_add_slave(rt1318->sdw_slave, &stream_config,
&port_config, 1, sdw_stream);
if (retval) {
- dev_err(dai->dev, "Unable to configure port\n");
+ dev_err(dai->dev, "%s: Unable to configure port\n", __func__);
return retval;
}
@@ -631,8 +631,8 @@ static int rt1318_sdw_hw_params(struct snd_pcm_substream *substream,
sampling_rate = RT1318_SDCA_RATE_192000HZ;
break;
default:
- dev_err(component->dev, "Rate %d is not supported\n",
- params_rate(params));
+ dev_err(component->dev, "%s: Rate %d is not supported\n",
+ __func__, params_rate(params));
return -EINVAL;
}
@@ -835,7 +835,7 @@ static int __maybe_unused rt1318_dev_resume(struct device *dev)
time = wait_for_completion_timeout(&slave->initialization_complete,
msecs_to_jiffies(RT1318_PROBE_TIMEOUT));
if (!time) {
- dev_err(&slave->dev, "Initialization not complete, timed out\n");
+ dev_err(&slave->dev, "%s: Initialization not complete, timed out\n", __func__);
return -ETIMEDOUT;
}
diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c
index e3ba04484813a2..d0d24a53df7462 100644
--- a/sound/soc/codecs/rt5645.c
+++ b/sound/soc/codecs/rt5645.c
@@ -444,6 +444,7 @@ struct rt5645_priv {
struct regmap *regmap;
struct i2c_client *i2c;
struct gpio_desc *gpiod_hp_det;
+ struct gpio_desc *gpiod_cbj_sleeve;
struct snd_soc_jack *hp_jack;
struct snd_soc_jack *mic_jack;
struct snd_soc_jack *btn_jack;
@@ -3186,6 +3187,9 @@ static int rt5645_jack_detect(struct snd_soc_component *component, int jack_inse
regmap_update_bits(rt5645->regmap, RT5645_IN1_CTRL2,
RT5645_CBJ_MN_JD, 0);
+ if (rt5645->gpiod_cbj_sleeve)
+ gpiod_set_value(rt5645->gpiod_cbj_sleeve, 1);
+
msleep(600);
regmap_read(rt5645->regmap, RT5645_IN1_CTRL3, &val);
val &= 0x7;
@@ -3202,6 +3206,8 @@ static int rt5645_jack_detect(struct snd_soc_component *component, int jack_inse
snd_soc_dapm_disable_pin(dapm, "Mic Det Power");
snd_soc_dapm_sync(dapm);
rt5645->jack_type = SND_JACK_HEADPHONE;
+ if (rt5645->gpiod_cbj_sleeve)
+ gpiod_set_value(rt5645->gpiod_cbj_sleeve, 0);
}
if (rt5645->pdata.level_trigger_irq)
regmap_update_bits(rt5645->regmap, RT5645_IRQ_CTRL2,
@@ -3229,6 +3235,9 @@ static int rt5645_jack_detect(struct snd_soc_component *component, int jack_inse
if (rt5645->pdata.level_trigger_irq)
regmap_update_bits(rt5645->regmap, RT5645_IRQ_CTRL2,
RT5645_JD_1_1_MASK, RT5645_JD_1_1_INV);
+
+ if (rt5645->gpiod_cbj_sleeve)
+ gpiod_set_value(rt5645->gpiod_cbj_sleeve, 0);
}
return rt5645->jack_type;
@@ -4012,6 +4021,16 @@ static int rt5645_i2c_probe(struct i2c_client *i2c)
return ret;
}
+ rt5645->gpiod_cbj_sleeve = devm_gpiod_get_optional(&i2c->dev, "cbj-sleeve",
+ GPIOD_OUT_LOW);
+
+ if (IS_ERR(rt5645->gpiod_cbj_sleeve)) {
+ ret = PTR_ERR(rt5645->gpiod_cbj_sleeve);
+ dev_info(&i2c->dev, "failed to initialize gpiod, ret=%d\n", ret);
+ if (ret != -ENOENT)
+ return ret;
+ }
+
for (i = 0; i < ARRAY_SIZE(rt5645->supplies); i++)
rt5645->supplies[i].supply = rt5645_supply_names[i];
@@ -4259,6 +4278,9 @@ static void rt5645_i2c_remove(struct i2c_client *i2c)
cancel_delayed_work_sync(&rt5645->jack_detect_work);
cancel_delayed_work_sync(&rt5645->rcclock_work);
+ if (rt5645->gpiod_cbj_sleeve)
+ gpiod_set_value(rt5645->gpiod_cbj_sleeve, 0);
+
regulator_bulk_disable(ARRAY_SIZE(rt5645->supplies), rt5645->supplies);
}
@@ -4274,6 +4296,9 @@ static void rt5645_i2c_shutdown(struct i2c_client *i2c)
0);
msleep(20);
regmap_write(rt5645->regmap, RT5645_RESET, 0);
+
+ if (rt5645->gpiod_cbj_sleeve)
+ gpiod_set_value(rt5645->gpiod_cbj_sleeve, 0);
}
static int __maybe_unused rt5645_sys_suspend(struct device *dev)
diff --git a/sound/soc/codecs/rt5682-sdw.c b/sound/soc/codecs/rt5682-sdw.c
index e67c2e19cb1a72..f9ee42c13dbac3 100644
--- a/sound/soc/codecs/rt5682-sdw.c
+++ b/sound/soc/codecs/rt5682-sdw.c
@@ -132,7 +132,7 @@ static int rt5682_sdw_hw_params(struct snd_pcm_substream *substream,
retval = sdw_stream_add_slave(rt5682->slave, &stream_config,
&port_config, 1, sdw_stream);
if (retval) {
- dev_err(dai->dev, "Unable to configure port\n");
+ dev_err(dai->dev, "%s: Unable to configure port\n", __func__);
return retval;
}
@@ -315,8 +315,8 @@ static int rt5682_sdw_init(struct device *dev, struct regmap *regmap,
&rt5682_sdw_indirect_regmap);
if (IS_ERR(rt5682->regmap)) {
ret = PTR_ERR(rt5682->regmap);
- dev_err(dev, "Failed to allocate register map: %d\n",
- ret);
+ dev_err(dev, "%s: Failed to allocate register map: %d\n",
+ __func__, ret);
return ret;
}
@@ -400,7 +400,7 @@ static int rt5682_io_init(struct device *dev, struct sdw_slave *slave)
}
if (val != DEVICE_ID) {
- dev_err(dev, "Device with ID register %x is not rt5682\n", val);
+ dev_err(dev, "%s: Device with ID register %x is not rt5682\n", __func__, val);
ret = -ENODEV;
goto err_nodev;
}
@@ -648,7 +648,7 @@ static int rt5682_bus_config(struct sdw_slave *slave,
ret = rt5682_clock_config(&slave->dev);
if (ret < 0)
- dev_err(&slave->dev, "Invalid clk config");
+ dev_err(&slave->dev, "%s: Invalid clk config", __func__);
return ret;
}
@@ -763,19 +763,19 @@ static int __maybe_unused rt5682_dev_resume(struct device *dev)
return 0;
if (!slave->unattach_request) {
+ mutex_lock(&rt5682->disable_irq_lock);
if (rt5682->disable_irq == true) {
- mutex_lock(&rt5682->disable_irq_lock);
sdw_write_no_pm(slave, SDW_SCP_INTMASK1, SDW_SCP_INT1_IMPL_DEF);
rt5682->disable_irq = false;
- mutex_unlock(&rt5682->disable_irq_lock);
}
+ mutex_unlock(&rt5682->disable_irq_lock);
goto regmap_sync;
}
time = wait_for_completion_timeout(&slave->initialization_complete,
msecs_to_jiffies(RT5682_PROBE_TIMEOUT));
if (!time) {
- dev_err(&slave->dev, "Initialization not complete, timed out\n");
+ dev_err(&slave->dev, "%s: Initialization not complete, timed out\n", __func__);
sdw_show_ping_status(slave->bus, true);
return -ETIMEDOUT;
diff --git a/sound/soc/codecs/rt700.c b/sound/soc/codecs/rt700.c
index 0ebf344a1b6094..434b926f96c837 100644
--- a/sound/soc/codecs/rt700.c
+++ b/sound/soc/codecs/rt700.c
@@ -37,8 +37,8 @@ static int rt700_index_write(struct regmap *regmap,
ret = regmap_write(regmap, addr, value);
if (ret < 0)
- pr_err("Failed to set private value: %06x <= %04x ret=%d\n",
- addr, value, ret);
+ pr_err("%s: Failed to set private value: %06x <= %04x ret=%d\n",
+ __func__, addr, value, ret);
return ret;
}
@@ -52,8 +52,8 @@ static int rt700_index_read(struct regmap *regmap,
*value = 0;
ret = regmap_read(regmap, addr, value);
if (ret < 0)
- pr_err("Failed to get private value: %06x => %04x ret=%d\n",
- addr, *value, ret);
+ pr_err("%s: Failed to get private value: %06x => %04x ret=%d\n",
+ __func__, addr, *value, ret);
return ret;
}
@@ -930,14 +930,14 @@ static int rt700_pcm_hw_params(struct snd_pcm_substream *substream,
port_config.num += 2;
break;
default:
- dev_err(component->dev, "Invalid DAI id %d\n", dai->id);
+ dev_err(component->dev, "%s: Invalid DAI id %d\n", __func__, dai->id);
return -EINVAL;
}
retval = sdw_stream_add_slave(rt700->slave, &stream_config,
&port_config, 1, sdw_stream);
if (retval) {
- dev_err(dai->dev, "Unable to configure port\n");
+ dev_err(dai->dev, "%s: Unable to configure port\n", __func__);
return retval;
}
@@ -945,8 +945,8 @@ static int rt700_pcm_hw_params(struct snd_pcm_substream *substream,
/* bit 3:0 Number of Channel */
val |= (params_channels(params) - 1);
} else {
- dev_err(component->dev, "Unsupported channels %d\n",
- params_channels(params));
+ dev_err(component->dev, "%s: Unsupported channels %d\n",
+ __func__, params_channels(params));
return -EINVAL;
}
diff --git a/sound/soc/codecs/rt711-sdca-sdw.c b/sound/soc/codecs/rt711-sdca-sdw.c
index 935e597022d324..2636c2eea4bc8b 100644
--- a/sound/soc/codecs/rt711-sdca-sdw.c
+++ b/sound/soc/codecs/rt711-sdca-sdw.c
@@ -438,20 +438,20 @@ static int __maybe_unused rt711_sdca_dev_resume(struct device *dev)
return 0;
if (!slave->unattach_request) {
+ mutex_lock(&rt711->disable_irq_lock);
if (rt711->disable_irq == true) {
- mutex_lock(&rt711->disable_irq_lock);
sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK1, SDW_SCP_SDCA_INTMASK_SDCA_0);
sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK2, SDW_SCP_SDCA_INTMASK_SDCA_8);
rt711->disable_irq = false;
- mutex_unlock(&rt711->disable_irq_lock);
}
+ mutex_unlock(&rt711->disable_irq_lock);
goto regmap_sync;
}
time = wait_for_completion_timeout(&slave->initialization_complete,
msecs_to_jiffies(RT711_PROBE_TIMEOUT));
if (!time) {
- dev_err(&slave->dev, "Initialization not complete, timed out\n");
+ dev_err(&slave->dev, "%s: Initialization not complete, timed out\n", __func__);
sdw_show_ping_status(slave->bus, true);
return -ETIMEDOUT;
diff --git a/sound/soc/codecs/rt711-sdca.c b/sound/soc/codecs/rt711-sdca.c
index 447154cb60104d..1e8dbfc3ecd969 100644
--- a/sound/soc/codecs/rt711-sdca.c
+++ b/sound/soc/codecs/rt711-sdca.c
@@ -36,8 +36,8 @@ static int rt711_sdca_index_write(struct rt711_sdca_priv *rt711,
ret = regmap_write(regmap, addr, value);
if (ret < 0)
dev_err(&rt711->slave->dev,
- "Failed to set private value: %06x <= %04x ret=%d\n",
- addr, value, ret);
+ "%s: Failed to set private value: %06x <= %04x ret=%d\n",
+ __func__, addr, value, ret);
return ret;
}
@@ -52,8 +52,8 @@ static int rt711_sdca_index_read(struct rt711_sdca_priv *rt711,
ret = regmap_read(regmap, addr, value);
if (ret < 0)
dev_err(&rt711->slave->dev,
- "Failed to get private value: %06x => %04x ret=%d\n",
- addr, *value, ret);
+ "%s: Failed to get private value: %06x => %04x ret=%d\n",
+ __func__, addr, *value, ret);
return ret;
}
@@ -1293,13 +1293,13 @@ static int rt711_sdca_pcm_hw_params(struct snd_pcm_substream *substream,
retval = sdw_stream_add_slave(rt711->slave, &stream_config,
&port_config, 1, sdw_stream);
if (retval) {
- dev_err(dai->dev, "Unable to configure port\n");
+ dev_err(dai->dev, "%s: Unable to configure port\n", __func__);
return retval;
}
if (params_channels(params) > 16) {
- dev_err(component->dev, "Unsupported channels %d\n",
- params_channels(params));
+ dev_err(component->dev, "%s: Unsupported channels %d\n",
+ __func__, params_channels(params));
return -EINVAL;
}
@@ -1318,8 +1318,8 @@ static int rt711_sdca_pcm_hw_params(struct snd_pcm_substream *substream,
sampling_rate = RT711_SDCA_RATE_192000HZ;
break;
default:
- dev_err(component->dev, "Rate %d is not supported\n",
- params_rate(params));
+ dev_err(component->dev, "%s: Rate %d is not supported\n",
+ __func__, params_rate(params));
return -EINVAL;
}
diff --git a/sound/soc/codecs/rt711-sdw.c b/sound/soc/codecs/rt711-sdw.c
index 3f5773310ae8cc..0d3b43dd22e63d 100644
--- a/sound/soc/codecs/rt711-sdw.c
+++ b/sound/soc/codecs/rt711-sdw.c
@@ -408,7 +408,7 @@ static int rt711_bus_config(struct sdw_slave *slave,
ret = rt711_clock_config(&slave->dev);
if (ret < 0)
- dev_err(&slave->dev, "Invalid clk config");
+ dev_err(&slave->dev, "%s: Invalid clk config", __func__);
return ret;
}
@@ -536,19 +536,19 @@ static int __maybe_unused rt711_dev_resume(struct device *dev)
return 0;
if (!slave->unattach_request) {
+ mutex_lock(&rt711->disable_irq_lock);
if (rt711->disable_irq == true) {
- mutex_lock(&rt711->disable_irq_lock);
sdw_write_no_pm(slave, SDW_SCP_INTMASK1, SDW_SCP_INT1_IMPL_DEF);
rt711->disable_irq = false;
- mutex_unlock(&rt711->disable_irq_lock);
}
+ mutex_unlock(&rt711->disable_irq_lock);
goto regmap_sync;
}
time = wait_for_completion_timeout(&slave->initialization_complete,
msecs_to_jiffies(RT711_PROBE_TIMEOUT));
if (!time) {
- dev_err(&slave->dev, "Initialization not complete, timed out\n");
+ dev_err(&slave->dev, "%s: Initialization not complete, timed out\n", __func__);
return -ETIMEDOUT;
}
diff --git a/sound/soc/codecs/rt711.c b/sound/soc/codecs/rt711.c
index 66eaed13b0d6a0..5446f9506a1672 100644
--- a/sound/soc/codecs/rt711.c
+++ b/sound/soc/codecs/rt711.c
@@ -37,8 +37,8 @@ static int rt711_index_write(struct regmap *regmap,
ret = regmap_write(regmap, addr, value);
if (ret < 0)
- pr_err("Failed to set private value: %06x <= %04x ret=%d\n",
- addr, value, ret);
+ pr_err("%s: Failed to set private value: %06x <= %04x ret=%d\n",
+ __func__, addr, value, ret);
return ret;
}
@@ -52,8 +52,8 @@ static int rt711_index_read(struct regmap *regmap,
*value = 0;
ret = regmap_read(regmap, addr, value);
if (ret < 0)
- pr_err("Failed to get private value: %06x => %04x ret=%d\n",
- addr, *value, ret);
+ pr_err("%s: Failed to get private value: %06x => %04x ret=%d\n",
+ __func__, addr, *value, ret);
return ret;
}
@@ -428,7 +428,7 @@ static void rt711_jack_init(struct rt711_priv *rt711)
RT711_HP_JD_FINAL_RESULT_CTL_JD12);
break;
default:
- dev_warn(rt711->component->dev, "Wrong JD source\n");
+ dev_warn(rt711->component->dev, "%s: Wrong JD source\n", __func__);
break;
}
@@ -1020,7 +1020,7 @@ static int rt711_pcm_hw_params(struct snd_pcm_substream *substream,
retval = sdw_stream_add_slave(rt711->slave, &stream_config,
&port_config, 1, sdw_stream);
if (retval) {
- dev_err(dai->dev, "Unable to configure port\n");
+ dev_err(dai->dev, "%s: Unable to configure port\n", __func__);
return retval;
}
@@ -1028,8 +1028,8 @@ static int rt711_pcm_hw_params(struct snd_pcm_substream *substream,
/* bit 3:0 Number of Channel */
val |= (params_channels(params) - 1);
} else {
- dev_err(component->dev, "Unsupported channels %d\n",
- params_channels(params));
+ dev_err(component->dev, "%s: Unsupported channels %d\n",
+ __func__, params_channels(params));
return -EINVAL;
}
diff --git a/sound/soc/codecs/rt712-sdca-dmic.c b/sound/soc/codecs/rt712-sdca-dmic.c
index 0926b26619bd45..012b79e72cf6b6 100644
--- a/sound/soc/codecs/rt712-sdca-dmic.c
+++ b/sound/soc/codecs/rt712-sdca-dmic.c
@@ -139,8 +139,8 @@ static int rt712_sdca_dmic_index_write(struct rt712_sdca_dmic_priv *rt712,
ret = regmap_write(regmap, addr, value);
if (ret < 0)
dev_err(&rt712->slave->dev,
- "Failed to set private value: %06x <= %04x ret=%d\n",
- addr, value, ret);
+ "%s: Failed to set private value: %06x <= %04x ret=%d\n",
+ __func__, addr, value, ret);
return ret;
}
@@ -155,8 +155,8 @@ static int rt712_sdca_dmic_index_read(struct rt712_sdca_dmic_priv *rt712,
ret = regmap_read(regmap, addr, value);
if (ret < 0)
dev_err(&rt712->slave->dev,
- "Failed to get private value: %06x => %04x ret=%d\n",
- addr, *value, ret);
+ "%s: Failed to get private value: %06x => %04x ret=%d\n",
+ __func__, addr, *value, ret);
return ret;
}
@@ -317,7 +317,8 @@ static int rt712_sdca_dmic_set_gain_put(struct snd_kcontrol *kcontrol,
for (i = 0; i < p->count; i++) {
err = regmap_write(rt712->mbq_regmap, p->reg_base + i, gain_val[i]);
if (err < 0)
- dev_err(&rt712->slave->dev, "0x%08x can't be set\n", p->reg_base + i);
+ dev_err(&rt712->slave->dev, "%s: 0x%08x can't be set\n",
+ __func__, p->reg_base + i);
}
return changed;
@@ -667,13 +668,13 @@ static int rt712_sdca_dmic_hw_params(struct snd_pcm_substream *substream,
retval = sdw_stream_add_slave(rt712->slave, &stream_config,
&port_config, 1, sdw_stream);
if (retval) {
- dev_err(dai->dev, "Unable to configure port\n");
+ dev_err(dai->dev, "%s: Unable to configure port\n", __func__);
return retval;
}
if (params_channels(params) > 4) {
- dev_err(component->dev, "Unsupported channels %d\n",
- params_channels(params));
+ dev_err(component->dev, "%s: Unsupported channels %d\n",
+ __func__, params_channels(params));
return -EINVAL;
}
@@ -698,8 +699,8 @@ static int rt712_sdca_dmic_hw_params(struct snd_pcm_substream *substream,
sampling_rate = RT712_SDCA_RATE_192000HZ;
break;
default:
- dev_err(component->dev, "Rate %d is not supported\n",
- params_rate(params));
+ dev_err(component->dev, "%s: Rate %d is not supported\n",
+ __func__, params_rate(params));
return -EINVAL;
}
@@ -923,7 +924,8 @@ static int __maybe_unused rt712_sdca_dmic_dev_resume(struct device *dev)
time = wait_for_completion_timeout(&slave->initialization_complete,
msecs_to_jiffies(RT712_PROBE_TIMEOUT));
if (!time) {
- dev_err(&slave->dev, "Initialization not complete, timed out\n");
+ dev_err(&slave->dev, "%s: Initialization not complete, timed out\n",
+ __func__);
sdw_show_ping_status(slave->bus, true);
return -ETIMEDOUT;
diff --git a/sound/soc/codecs/rt712-sdca-sdw.c b/sound/soc/codecs/rt712-sdca-sdw.c
index 01ac555cd79b84..4e9ab3ef135b34 100644
--- a/sound/soc/codecs/rt712-sdca-sdw.c
+++ b/sound/soc/codecs/rt712-sdca-sdw.c
@@ -438,20 +438,21 @@ static int __maybe_unused rt712_sdca_dev_resume(struct device *dev)
return 0;
if (!slave->unattach_request) {
+ mutex_lock(&rt712->disable_irq_lock);
if (rt712->disable_irq == true) {
- mutex_lock(&rt712->disable_irq_lock);
+
sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK1, SDW_SCP_SDCA_INTMASK_SDCA_0);
sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK2, SDW_SCP_SDCA_INTMASK_SDCA_8);
rt712->disable_irq = false;
- mutex_unlock(&rt712->disable_irq_lock);
}
+ mutex_unlock(&rt712->disable_irq_lock);
goto regmap_sync;
}
time = wait_for_completion_timeout(&slave->initialization_complete,
msecs_to_jiffies(RT712_PROBE_TIMEOUT));
if (!time) {
- dev_err(&slave->dev, "Initialization not complete, timed out\n");
+ dev_err(&slave->dev, "%s: Initialization not complete, timed out\n", __func__);
sdw_show_ping_status(slave->bus, true);
return -ETIMEDOUT;
diff --git a/sound/soc/codecs/rt712-sdca.c b/sound/soc/codecs/rt712-sdca.c
index 6954fbe7ec5f3b..b503de9fda80e7 100644
--- a/sound/soc/codecs/rt712-sdca.c
+++ b/sound/soc/codecs/rt712-sdca.c
@@ -34,8 +34,8 @@ static int rt712_sdca_index_write(struct rt712_sdca_priv *rt712,
ret = regmap_write(regmap, addr, value);
if (ret < 0)
dev_err(&rt712->slave->dev,
- "Failed to set private value: %06x <= %04x ret=%d\n",
- addr, value, ret);
+ "%s: Failed to set private value: %06x <= %04x ret=%d\n",
+ __func__, addr, value, ret);
return ret;
}
@@ -50,8 +50,8 @@ static int rt712_sdca_index_read(struct rt712_sdca_priv *rt712,
ret = regmap_read(regmap, addr, value);
if (ret < 0)
dev_err(&rt712->slave->dev,
- "Failed to get private value: %06x => %04x ret=%d\n",
- addr, *value, ret);
+ "%s: Failed to get private value: %06x => %04x ret=%d\n",
+ __func__, addr, *value, ret);
return ret;
}
@@ -1060,13 +1060,13 @@ static int rt712_sdca_pcm_hw_params(struct snd_pcm_substream *substream,
retval = sdw_stream_add_slave(rt712->slave, &stream_config,
&port_config, 1, sdw_stream);
if (retval) {
- dev_err(dai->dev, "Unable to configure port\n");
+ dev_err(dai->dev, "%s: Unable to configure port\n", __func__);
return retval;
}
if (params_channels(params) > 16) {
- dev_err(component->dev, "Unsupported channels %d\n",
- params_channels(params));
+ dev_err(component->dev, "%s: Unsupported channels %d\n",
+ __func__, params_channels(params));
return -EINVAL;
}
@@ -1085,8 +1085,8 @@ static int rt712_sdca_pcm_hw_params(struct snd_pcm_substream *substream,
sampling_rate = RT712_SDCA_RATE_192000HZ;
break;
default:
- dev_err(component->dev, "Rate %d is not supported\n",
- params_rate(params));
+ dev_err(component->dev, "%s: Rate %d is not supported\n",
+ __func__, params_rate(params));
return -EINVAL;
}
@@ -1106,7 +1106,7 @@ static int rt712_sdca_pcm_hw_params(struct snd_pcm_substream *substream,
sampling_rate);
break;
default:
- dev_err(component->dev, "Wrong DAI id\n");
+ dev_err(component->dev, "%s: Wrong DAI id\n", __func__);
return -EINVAL;
}
diff --git a/sound/soc/codecs/rt715-sdca-sdw.c b/sound/soc/codecs/rt715-sdca-sdw.c
index ab54a67a27ebbf..ee450126106f96 100644
--- a/sound/soc/codecs/rt715-sdca-sdw.c
+++ b/sound/soc/codecs/rt715-sdca-sdw.c
@@ -237,7 +237,7 @@ static int __maybe_unused rt715_dev_resume(struct device *dev)
time = wait_for_completion_timeout(&slave->enumeration_complete,
msecs_to_jiffies(RT715_PROBE_TIMEOUT));
if (!time) {
- dev_err(&slave->dev, "Enumeration not complete, timed out\n");
+ dev_err(&slave->dev, "%s: Enumeration not complete, timed out\n", __func__);
sdw_show_ping_status(slave->bus, true);
return -ETIMEDOUT;
diff --git a/sound/soc/codecs/rt715-sdca.c b/sound/soc/codecs/rt715-sdca.c
index 4533eedd7e189f..bc3579203c7a47 100644
--- a/sound/soc/codecs/rt715-sdca.c
+++ b/sound/soc/codecs/rt715-sdca.c
@@ -41,8 +41,8 @@ static int rt715_sdca_index_write(struct rt715_sdca_priv *rt715,
ret = regmap_write(regmap, addr, value);
if (ret < 0)
dev_err(&rt715->slave->dev,
- "Failed to set private value: %08x <= %04x %d\n",
- addr, value, ret);
+ "%s: Failed to set private value: %08x <= %04x %d\n",
+ __func__, addr, value, ret);
return ret;
}
@@ -59,8 +59,8 @@ static int rt715_sdca_index_read(struct rt715_sdca_priv *rt715,
ret = regmap_read(regmap, addr, value);
if (ret < 0)
dev_err(&rt715->slave->dev,
- "Failed to get private value: %06x => %04x ret=%d\n",
- addr, *value, ret);
+ "%s: Failed to get private value: %06x => %04x ret=%d\n",
+ __func__, addr, *value, ret);
return ret;
}
@@ -152,8 +152,8 @@ static int rt715_sdca_set_amp_gain_put(struct snd_kcontrol *kcontrol,
mc->shift);
ret = regmap_write(rt715->mbq_regmap, mc->reg + i, gain_val);
if (ret != 0) {
- dev_err(component->dev, "Failed to write 0x%x=0x%x\n",
- mc->reg + i, gain_val);
+ dev_err(component->dev, "%s: Failed to write 0x%x=0x%x\n",
+ __func__, mc->reg + i, gain_val);
return ret;
}
}
@@ -188,8 +188,8 @@ static int rt715_sdca_set_amp_gain_4ch_put(struct snd_kcontrol *kcontrol,
ret = regmap_write(rt715->mbq_regmap, reg_base + i,
gain_val);
if (ret != 0) {
- dev_err(component->dev, "Failed to write 0x%x=0x%x\n",
- reg_base + i, gain_val);
+ dev_err(component->dev, "%s: Failed to write 0x%x=0x%x\n",
+ __func__, reg_base + i, gain_val);
return ret;
}
}
@@ -224,8 +224,8 @@ static int rt715_sdca_set_amp_gain_8ch_put(struct snd_kcontrol *kcontrol,
reg = i < 7 ? reg_base + i : (reg_base - 1) | BIT(15);
ret = regmap_write(rt715->mbq_regmap, reg, gain_val);
if (ret != 0) {
- dev_err(component->dev, "Failed to write 0x%x=0x%x\n",
- reg, gain_val);
+ dev_err(component->dev, "%s: Failed to write 0x%x=0x%x\n",
+ __func__, reg, gain_val);
return ret;
}
}
@@ -246,8 +246,8 @@ static int rt715_sdca_set_amp_gain_get(struct snd_kcontrol *kcontrol,
for (i = 0; i < 2; i++) {
ret = regmap_read(rt715->mbq_regmap, mc->reg + i, &val);
if (ret < 0) {
- dev_err(component->dev, "Failed to read 0x%x, ret=%d\n",
- mc->reg + i, ret);
+ dev_err(component->dev, "%s: Failed to read 0x%x, ret=%d\n",
+ __func__, mc->reg + i, ret);
return ret;
}
ucontrol->value.integer.value[i] = rt715_sdca_get_gain(val, mc->shift);
@@ -271,8 +271,8 @@ static int rt715_sdca_set_amp_gain_4ch_get(struct snd_kcontrol *kcontrol,
for (i = 0; i < 4; i++) {
ret = regmap_read(rt715->mbq_regmap, reg_base + i, &val);
if (ret < 0) {
- dev_err(component->dev, "Failed to read 0x%x, ret=%d\n",
- reg_base + i, ret);
+ dev_err(component->dev, "%s: Failed to read 0x%x, ret=%d\n",
+ __func__, reg_base + i, ret);
return ret;
}
ucontrol->value.integer.value[i] = rt715_sdca_get_gain(val, gain_sft);
@@ -297,8 +297,8 @@ static int rt715_sdca_set_amp_gain_8ch_get(struct snd_kcontrol *kcontrol,
for (i = 0; i < 8; i += 2) {
ret = regmap_read(rt715->mbq_regmap, reg_base + i, &val_l);
if (ret < 0) {
- dev_err(component->dev, "Failed to read 0x%x, ret=%d\n",
- reg_base + i, ret);
+ dev_err(component->dev, "%s: Failed to read 0x%x, ret=%d\n",
+ __func__, reg_base + i, ret);
return ret;
}
ucontrol->value.integer.value[i] = (val_l >> gain_sft) / 10;
@@ -306,8 +306,8 @@ static int rt715_sdca_set_amp_gain_8ch_get(struct snd_kcontrol *kcontrol,
reg = (i == 6) ? (reg_base - 1) | BIT(15) : reg_base + 1 + i;
ret = regmap_read(rt715->mbq_regmap, reg, &val_r);
if (ret < 0) {
- dev_err(component->dev, "Failed to read 0x%x, ret=%d\n",
- reg, ret);
+ dev_err(component->dev, "%s: Failed to read 0x%x, ret=%d\n",
+ __func__, reg, ret);
return ret;
}
ucontrol->value.integer.value[i + 1] = (val_r >> gain_sft) / 10;
@@ -316,7 +316,7 @@ static int rt715_sdca_set_amp_gain_8ch_get(struct snd_kcontrol *kcontrol,
return 0;
}
-static const DECLARE_TLV_DB_SCALE(in_vol_tlv, -17625, 375, 0);
+static const DECLARE_TLV_DB_SCALE(in_vol_tlv, -1725, 75, 0);
static const DECLARE_TLV_DB_SCALE(mic_vol_tlv, 0, 1000, 0);
static int rt715_sdca_get_volsw(struct snd_kcontrol *kcontrol,
@@ -477,7 +477,7 @@ static const struct snd_kcontrol_new rt715_sdca_snd_controls[] = {
RT715_SDCA_FU_VOL_CTRL, CH_01),
SDW_SDCA_CTL(FUN_MIC_ARRAY, RT715_SDCA_FU_ADC7_27_VOL,
RT715_SDCA_FU_VOL_CTRL, CH_02),
- 0x2f, 0x7f, 0,
+ 0x2f, 0x3f, 0,
rt715_sdca_set_amp_gain_get, rt715_sdca_set_amp_gain_put,
in_vol_tlv),
RT715_SDCA_EXT_TLV("FU02 Capture Volume",
@@ -485,13 +485,13 @@ static const struct snd_kcontrol_new rt715_sdca_snd_controls[] = {
RT715_SDCA_FU_VOL_CTRL, CH_01),
rt715_sdca_set_amp_gain_4ch_get,
rt715_sdca_set_amp_gain_4ch_put,
- in_vol_tlv, 4, 0x7f),
+ in_vol_tlv, 4, 0x3f),
RT715_SDCA_EXT_TLV("FU06 Capture Volume",
SDW_SDCA_CTL(FUN_MIC_ARRAY, RT715_SDCA_FU_ADC10_11_VOL,
RT715_SDCA_FU_VOL_CTRL, CH_01),
rt715_sdca_set_amp_gain_4ch_get,
rt715_sdca_set_amp_gain_4ch_put,
- in_vol_tlv, 4, 0x7f),
+ in_vol_tlv, 4, 0x3f),
/* MIC Boost Control */
RT715_SDCA_BOOST_EXT_TLV("FU0E Boost",
SDW_SDCA_CTL(FUN_MIC_ARRAY, RT715_SDCA_FU_DMIC_GAIN_EN,
@@ -834,15 +834,15 @@ static int rt715_sdca_pcm_hw_params(struct snd_pcm_substream *substream,
0xaf00);
break;
default:
- dev_err(component->dev, "Invalid DAI id %d\n", dai->id);
+ dev_err(component->dev, "%s: Invalid DAI id %d\n", __func__, dai->id);
return -EINVAL;
}
retval = sdw_stream_add_slave(rt715->slave, &stream_config,
&port_config, 1, sdw_stream);
if (retval) {
- dev_err(component->dev, "Unable to configure port, retval:%d\n",
- retval);
+ dev_err(component->dev, "%s: Unable to configure port, retval:%d\n",
+ __func__, retval);
return retval;
}
@@ -893,8 +893,8 @@ static int rt715_sdca_pcm_hw_params(struct snd_pcm_substream *substream,
val = 0xf;
break;
default:
- dev_err(component->dev, "Unsupported sample rate %d\n",
- params_rate(params));
+ dev_err(component->dev, "%s: Unsupported sample rate %d\n",
+ __func__, params_rate(params));
return -EINVAL;
}
diff --git a/sound/soc/codecs/rt715-sdw.c b/sound/soc/codecs/rt715-sdw.c
index 21f37babd148a4..f012fe0ded6d28 100644
--- a/sound/soc/codecs/rt715-sdw.c
+++ b/sound/soc/codecs/rt715-sdw.c
@@ -111,6 +111,7 @@ static bool rt715_readable_register(struct device *dev, unsigned int reg)
case 0x839d:
case 0x83a7:
case 0x83a9:
+ case 0x752001:
case 0x752039:
return true;
default:
@@ -482,7 +483,7 @@ static int rt715_bus_config(struct sdw_slave *slave,
ret = rt715_clock_config(&slave->dev);
if (ret < 0)
- dev_err(&slave->dev, "Invalid clk config");
+ dev_err(&slave->dev, "%s: Invalid clk config", __func__);
return 0;
}
@@ -554,7 +555,7 @@ static int __maybe_unused rt715_dev_resume(struct device *dev)
time = wait_for_completion_timeout(&slave->initialization_complete,
msecs_to_jiffies(RT715_PROBE_TIMEOUT));
if (!time) {
- dev_err(&slave->dev, "Initialization not complete, timed out\n");
+ dev_err(&slave->dev, "%s: Initialization not complete, timed out\n", __func__);
sdw_show_ping_status(slave->bus, true);
return -ETIMEDOUT;
diff --git a/sound/soc/codecs/rt715.c b/sound/soc/codecs/rt715.c
index 9f732a5abd53f3..299c9b12377c6a 100644
--- a/sound/soc/codecs/rt715.c
+++ b/sound/soc/codecs/rt715.c
@@ -40,8 +40,8 @@ static int rt715_index_write(struct regmap *regmap, unsigned int reg,
ret = regmap_write(regmap, addr, value);
if (ret < 0) {
- pr_err("Failed to set private value: %08x <= %04x %d\n",
- addr, value, ret);
+ pr_err("%s: Failed to set private value: %08x <= %04x %d\n",
+ __func__, addr, value, ret);
}
return ret;
@@ -55,8 +55,8 @@ static int rt715_index_write_nid(struct regmap *regmap,
ret = regmap_write(regmap, addr, value);
if (ret < 0)
- pr_err("Failed to set private value: %06x <= %04x ret=%d\n",
- addr, value, ret);
+ pr_err("%s: Failed to set private value: %06x <= %04x ret=%d\n",
+ __func__, addr, value, ret);
return ret;
}
@@ -70,8 +70,8 @@ static int rt715_index_read_nid(struct regmap *regmap,
*value = 0;
ret = regmap_read(regmap, addr, value);
if (ret < 0)
- pr_err("Failed to get private value: %06x => %04x ret=%d\n",
- addr, *value, ret);
+ pr_err("%s: Failed to get private value: %06x => %04x ret=%d\n",
+ __func__, addr, *value, ret);
return ret;
}
@@ -862,14 +862,14 @@ static int rt715_pcm_hw_params(struct snd_pcm_substream *substream,
rt715_index_write(rt715->regmap, RT715_SDW_INPUT_SEL, 0xa000);
break;
default:
- dev_err(component->dev, "Invalid DAI id %d\n", dai->id);
+ dev_err(component->dev, "%s: Invalid DAI id %d\n", __func__, dai->id);
return -EINVAL;
}
retval = sdw_stream_add_slave(rt715->slave, &stream_config,
&port_config, 1, sdw_stream);
if (retval) {
- dev_err(dai->dev, "Unable to configure port\n");
+ dev_err(dai->dev, "%s: Unable to configure port\n", __func__);
return retval;
}
@@ -883,8 +883,8 @@ static int rt715_pcm_hw_params(struct snd_pcm_substream *substream,
val |= 0x0 << 8;
break;
default:
- dev_err(component->dev, "Unsupported sample rate %d\n",
- params_rate(params));
+ dev_err(component->dev, "%s: Unsupported sample rate %d\n",
+ __func__, params_rate(params));
return -EINVAL;
}
@@ -892,8 +892,8 @@ static int rt715_pcm_hw_params(struct snd_pcm_substream *substream,
/* bit 3:0 Number of Channel */
val |= (params_channels(params) - 1);
} else {
- dev_err(component->dev, "Unsupported channels %d\n",
- params_channels(params));
+ dev_err(component->dev, "%s: Unsupported channels %d\n",
+ __func__, params_channels(params));
return -EINVAL;
}
diff --git a/sound/soc/codecs/rt722-sdca-sdw.c b/sound/soc/codecs/rt722-sdca-sdw.c
index eb76f4c675b67f..65d584c1886e81 100644
--- a/sound/soc/codecs/rt722-sdca-sdw.c
+++ b/sound/soc/codecs/rt722-sdca-sdw.c
@@ -467,13 +467,13 @@ static int __maybe_unused rt722_sdca_dev_resume(struct device *dev)
return 0;
if (!slave->unattach_request) {
+ mutex_lock(&rt722->disable_irq_lock);
if (rt722->disable_irq == true) {
- mutex_lock(&rt722->disable_irq_lock);
sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK1, SDW_SCP_SDCA_INTMASK_SDCA_6);
sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK2, SDW_SCP_SDCA_INTMASK_SDCA_8);
rt722->disable_irq = false;
- mutex_unlock(&rt722->disable_irq_lock);
}
+ mutex_unlock(&rt722->disable_irq_lock);
goto regmap_sync;
}
diff --git a/sound/soc/codecs/rt722-sdca.c b/sound/soc/codecs/rt722-sdca.c
index 0e1c65a20392ad..e5bd9ef812de13 100644
--- a/sound/soc/codecs/rt722-sdca.c
+++ b/sound/soc/codecs/rt722-sdca.c
@@ -35,8 +35,8 @@ int rt722_sdca_index_write(struct rt722_sdca_priv *rt722,
ret = regmap_write(regmap, addr, value);
if (ret < 0)
dev_err(&rt722->slave->dev,
- "Failed to set private value: %06x <= %04x ret=%d\n",
- addr, value, ret);
+ "%s: Failed to set private value: %06x <= %04x ret=%d\n",
+ __func__, addr, value, ret);
return ret;
}
@@ -51,8 +51,8 @@ int rt722_sdca_index_read(struct rt722_sdca_priv *rt722,
ret = regmap_read(regmap, addr, value);
if (ret < 0)
dev_err(&rt722->slave->dev,
- "Failed to get private value: %06x => %04x ret=%d\n",
- addr, *value, ret);
+ "%s: Failed to get private value: %06x => %04x ret=%d\n",
+ __func__, addr, *value, ret);
return ret;
}
@@ -663,7 +663,8 @@ static int rt722_sdca_dmic_set_gain_put(struct snd_kcontrol *kcontrol,
for (i = 0; i < p->count; i++) {
err = regmap_write(rt722->mbq_regmap, p->reg_base + i, gain_val[i]);
if (err < 0)
- dev_err(&rt722->slave->dev, "%#08x can't be set\n", p->reg_base + i);
+ dev_err(&rt722->slave->dev, "%s: %#08x can't be set\n",
+ __func__, p->reg_base + i);
}
return changed;
@@ -1211,13 +1212,13 @@ static int rt722_sdca_pcm_hw_params(struct snd_pcm_substream *substream,
retval = sdw_stream_add_slave(rt722->slave, &stream_config,
&port_config, 1, sdw_stream);
if (retval) {
- dev_err(dai->dev, "Unable to configure port\n");
+ dev_err(dai->dev, "%s: Unable to configure port\n", __func__);
return retval;
}
if (params_channels(params) > 16) {
- dev_err(component->dev, "Unsupported channels %d\n",
- params_channels(params));
+ dev_err(component->dev, "%s: Unsupported channels %d\n",
+ __func__, params_channels(params));
return -EINVAL;
}
@@ -1236,8 +1237,8 @@ static int rt722_sdca_pcm_hw_params(struct snd_pcm_substream *substream,
sampling_rate = RT722_SDCA_RATE_192000HZ;
break;
default:
- dev_err(component->dev, "Rate %d is not supported\n",
- params_rate(params));
+ dev_err(component->dev, "%s: Rate %d is not supported\n",
+ __func__, params_rate(params));
return -EINVAL;
}
@@ -1329,7 +1330,7 @@ static struct snd_soc_dai_driver rt722_sdca_dai[] = {
.capture = {
.stream_name = "DP6 DMic Capture",
.channels_min = 1,
- .channels_max = 2,
+ .channels_max = 4,
.rates = RT722_STEREO_RATES,
.formats = RT722_FORMATS,
},
@@ -1438,9 +1439,12 @@ static void rt722_sdca_jack_preset(struct rt722_sdca_priv *rt722)
int loop_check, chk_cnt = 100, ret;
unsigned int calib_status = 0;
- /* Read eFuse */
- rt722_sdca_index_write(rt722, RT722_VENDOR_SPK_EFUSE, RT722_DC_CALIB_CTRL,
- 0x4808);
+ /* Config analog bias */
+ rt722_sdca_index_write(rt722, RT722_VENDOR_REG, RT722_ANALOG_BIAS_CTL3,
+ 0xa081);
+ /* GE related settings */
+ rt722_sdca_index_write(rt722, RT722_VENDOR_HDA_CTL, RT722_GE_RELATED_CTL2,
+ 0xa009);
/* Button A, B, C, D bypass mode */
rt722_sdca_index_write(rt722, RT722_VENDOR_HDA_CTL, RT722_UMP_HID_CTL4,
0xcf00);
@@ -1474,9 +1478,6 @@ static void rt722_sdca_jack_preset(struct rt722_sdca_priv *rt722)
if ((calib_status & 0x0040) == 0x0)
break;
}
- /* Release HP-JD, EN_CBJ_TIE_GL/R open, en_osw gating auto done bit */
- rt722_sdca_index_write(rt722, RT722_VENDOR_REG, RT722_DIGITAL_MISC_CTRL4,
- 0x0010);
/* Set ADC09 power entity floating control */
rt722_sdca_index_write(rt722, RT722_VENDOR_HDA_CTL, RT722_ADC0A_08_PDE_FLOAT_CTL,
0x2a12);
@@ -1489,8 +1490,21 @@ static void rt722_sdca_jack_preset(struct rt722_sdca_priv *rt722)
/* Set DAC03 and HP power entity floating control */
rt722_sdca_index_write(rt722, RT722_VENDOR_HDA_CTL, RT722_DAC03_HP_PDE_FLOAT_CTL,
0x4040);
+ rt722_sdca_index_write(rt722, RT722_VENDOR_HDA_CTL, RT722_ENT_FLOAT_CTRL_1,
+ 0x4141);
+ rt722_sdca_index_write(rt722, RT722_VENDOR_HDA_CTL, RT722_FLOAT_CTRL_1,
+ 0x0101);
/* Fine tune PDE40 latency */
regmap_write(rt722->regmap, 0x2f58, 0x07);
+ regmap_write(rt722->regmap, 0x2f03, 0x06);
+ /* MIC VRefo */
+ rt722_sdca_index_update_bits(rt722, RT722_VENDOR_REG,
+ RT722_COMBO_JACK_AUTO_CTL1, 0x0200, 0x0200);
+ rt722_sdca_index_update_bits(rt722, RT722_VENDOR_REG,
+ RT722_VREFO_GAT, 0x4000, 0x4000);
+ /* Release HP-JD, EN_CBJ_TIE_GL/R open, en_osw gating auto done bit */
+ rt722_sdca_index_write(rt722, RT722_VENDOR_REG, RT722_DIGITAL_MISC_CTRL4,
+ 0x0010);
}
int rt722_sdca_io_init(struct device *dev, struct sdw_slave *slave)
diff --git a/sound/soc/codecs/rt722-sdca.h b/sound/soc/codecs/rt722-sdca.h
index 44af8901352eb6..2464361a7958c6 100644
--- a/sound/soc/codecs/rt722-sdca.h
+++ b/sound/soc/codecs/rt722-sdca.h
@@ -69,6 +69,7 @@ struct rt722_sdca_dmic_kctrl_priv {
#define RT722_COMBO_JACK_AUTO_CTL2 0x46
#define RT722_COMBO_JACK_AUTO_CTL3 0x47
#define RT722_DIGITAL_MISC_CTRL4 0x4a
+#define RT722_VREFO_GAT 0x63
#define RT722_FSM_CTL 0x67
#define RT722_SDCA_INTR_REC 0x82
#define RT722_SW_CONFIG1 0x8a
@@ -127,6 +128,8 @@ struct rt722_sdca_dmic_kctrl_priv {
#define RT722_UMP_HID_CTL6 0x66
#define RT722_UMP_HID_CTL7 0x67
#define RT722_UMP_HID_CTL8 0x68
+#define RT722_FLOAT_CTRL_1 0x70
+#define RT722_ENT_FLOAT_CTRL_1 0x76
/* Parameter & Verb control 01 (0x1a)(NID:20h) */
#define RT722_HIDDEN_REG_SW_RESET (0x1 << 14)
diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c
index e451c009f2d999..7d5c096e06cd32 100644
--- a/sound/soc/codecs/wm_adsp.c
+++ b/sound/soc/codecs/wm_adsp.c
@@ -683,11 +683,12 @@ static void wm_adsp_control_remove(struct cs_dsp_coeff_ctl *cs_ctl)
int wm_adsp_write_ctl(struct wm_adsp *dsp, const char *name, int type,
unsigned int alg, void *buf, size_t len)
{
- struct cs_dsp_coeff_ctl *cs_ctl = cs_dsp_get_ctl(&dsp->cs_dsp, name, type, alg);
+ struct cs_dsp_coeff_ctl *cs_ctl;
struct wm_coeff_ctl *ctl;
int ret;
mutex_lock(&dsp->cs_dsp.pwr_lock);
+ cs_ctl = cs_dsp_get_ctl(&dsp->cs_dsp, name, type, alg);
ret = cs_dsp_coeff_write_ctrl(cs_ctl, 0, buf, len);
mutex_unlock(&dsp->cs_dsp.pwr_lock);
diff --git a/sound/soc/codecs/wsa881x.c b/sound/soc/codecs/wsa881x.c
index 3c025dabaf7a47..1253695bebd863 100644
--- a/sound/soc/codecs/wsa881x.c
+++ b/sound/soc/codecs/wsa881x.c
@@ -1155,6 +1155,7 @@ static int wsa881x_probe(struct sdw_slave *pdev,
pdev->prop.sink_ports = GENMASK(WSA881X_MAX_SWR_PORTS, 0);
pdev->prop.sink_dpn_prop = wsa_sink_dpn_prop;
pdev->prop.scp_int1_mask = SDW_SCP_INT1_BUS_CLASH | SDW_SCP_INT1_PARITY;
+ pdev->prop.clk_stop_mode1 = true;
gpiod_direction_output(wsa881x->sd_n, !wsa881x->sd_n_val);
wsa881x->regmap = devm_regmap_init_sdw(pdev, &wsa881x_regmap_config);
diff --git a/sound/soc/intel/avs/boards/da7219.c b/sound/soc/intel/avs/boards/da7219.c
index c018f84fe02529..fc072dc58968cb 100644
--- a/sound/soc/intel/avs/boards/da7219.c
+++ b/sound/soc/intel/avs/boards/da7219.c
@@ -296,5 +296,6 @@ static struct platform_driver avs_da7219_driver = {
module_platform_driver(avs_da7219_driver);
+MODULE_DESCRIPTION("Intel da7219 machine driver");
MODULE_AUTHOR("Cezary Rojewski <cezary.rojewski@intel.com>");
MODULE_LICENSE("GPL");
diff --git a/sound/soc/intel/avs/boards/dmic.c b/sound/soc/intel/avs/boards/dmic.c
index ba2bc7f689eb60..d9e5e85f523358 100644
--- a/sound/soc/intel/avs/boards/dmic.c
+++ b/sound/soc/intel/avs/boards/dmic.c
@@ -96,4 +96,5 @@ static struct platform_driver avs_dmic_driver = {
module_platform_driver(avs_dmic_driver);
+MODULE_DESCRIPTION("Intel DMIC machine driver");
MODULE_LICENSE("GPL");
diff --git a/sound/soc/intel/avs/boards/es8336.c b/sound/soc/intel/avs/boards/es8336.c
index 1090082e7d5bfc..5c90a600757734 100644
--- a/sound/soc/intel/avs/boards/es8336.c
+++ b/sound/soc/intel/avs/boards/es8336.c
@@ -326,4 +326,5 @@ static struct platform_driver avs_es8336_driver = {
module_platform_driver(avs_es8336_driver);
+MODULE_DESCRIPTION("Intel es8336 machine driver");
MODULE_LICENSE("GPL");
diff --git a/sound/soc/intel/avs/boards/i2s_test.c b/sound/soc/intel/avs/boards/i2s_test.c
index 28f254eb0d03fc..027373d6a16d60 100644
--- a/sound/soc/intel/avs/boards/i2s_test.c
+++ b/sound/soc/intel/avs/boards/i2s_test.c
@@ -204,4 +204,5 @@ static struct platform_driver avs_i2s_test_driver = {
module_platform_driver(avs_i2s_test_driver);
+MODULE_DESCRIPTION("Intel i2s test machine driver");
MODULE_LICENSE("GPL");
diff --git a/sound/soc/intel/avs/boards/max98357a.c b/sound/soc/intel/avs/boards/max98357a.c
index a83b95f25129f9..1ff85e4d8e160b 100644
--- a/sound/soc/intel/avs/boards/max98357a.c
+++ b/sound/soc/intel/avs/boards/max98357a.c
@@ -154,4 +154,5 @@ static struct platform_driver avs_max98357a_driver = {
module_platform_driver(avs_max98357a_driver)
+MODULE_DESCRIPTION("Intel max98357a machine driver");
MODULE_LICENSE("GPL");
diff --git a/sound/soc/intel/avs/boards/max98373.c b/sound/soc/intel/avs/boards/max98373.c
index 3b980a025e6f69..8d31586b73eaec 100644
--- a/sound/soc/intel/avs/boards/max98373.c
+++ b/sound/soc/intel/avs/boards/max98373.c
@@ -211,4 +211,5 @@ static struct platform_driver avs_max98373_driver = {
module_platform_driver(avs_max98373_driver)
+MODULE_DESCRIPTION("Intel max98373 machine driver");
MODULE_LICENSE("GPL");
diff --git a/sound/soc/intel/avs/boards/max98927.c b/sound/soc/intel/avs/boards/max98927.c
index 86dd2b228df3a5..572ec58073d06b 100644
--- a/sound/soc/intel/avs/boards/max98927.c
+++ b/sound/soc/intel/avs/boards/max98927.c
@@ -208,4 +208,5 @@ static struct platform_driver avs_max98927_driver = {
module_platform_driver(avs_max98927_driver)
+MODULE_DESCRIPTION("Intel max98927 machine driver");
MODULE_LICENSE("GPL");
diff --git a/sound/soc/intel/avs/boards/nau8825.c b/sound/soc/intel/avs/boards/nau8825.c
index 1c1e2083f474df..55db75efae4142 100644
--- a/sound/soc/intel/avs/boards/nau8825.c
+++ b/sound/soc/intel/avs/boards/nau8825.c
@@ -313,4 +313,5 @@ static struct platform_driver avs_nau8825_driver = {
module_platform_driver(avs_nau8825_driver)
+MODULE_DESCRIPTION("Intel nau8825 machine driver");
MODULE_LICENSE("GPL");
diff --git a/sound/soc/intel/avs/boards/probe.c b/sound/soc/intel/avs/boards/probe.c
index a9469b5ecb402f..8be6887bbc6e81 100644
--- a/sound/soc/intel/avs/boards/probe.c
+++ b/sound/soc/intel/avs/boards/probe.c
@@ -69,4 +69,5 @@ static struct platform_driver avs_probe_mb_driver = {
module_platform_driver(avs_probe_mb_driver);
+MODULE_DESCRIPTION("Intel probe machine driver");
MODULE_LICENSE("GPL");
diff --git a/sound/soc/intel/avs/boards/rt274.c b/sound/soc/intel/avs/boards/rt274.c
index bfcb8845fd15d0..1cf52421608753 100644
--- a/sound/soc/intel/avs/boards/rt274.c
+++ b/sound/soc/intel/avs/boards/rt274.c
@@ -276,4 +276,5 @@ static struct platform_driver avs_rt274_driver = {
module_platform_driver(avs_rt274_driver);
+MODULE_DESCRIPTION("Intel rt274 machine driver");
MODULE_LICENSE("GPL");
diff --git a/sound/soc/intel/avs/boards/rt286.c b/sound/soc/intel/avs/boards/rt286.c
index 28d7d86b1cc99d..4740bba1057032 100644
--- a/sound/soc/intel/avs/boards/rt286.c
+++ b/sound/soc/intel/avs/boards/rt286.c
@@ -247,4 +247,5 @@ static struct platform_driver avs_rt286_driver = {
module_platform_driver(avs_rt286_driver);
+MODULE_DESCRIPTION("Intel rt286 machine driver");
MODULE_LICENSE("GPL");
diff --git a/sound/soc/intel/avs/boards/rt298.c b/sound/soc/intel/avs/boards/rt298.c
index 80f490b9e11842..6e409e29f69746 100644
--- a/sound/soc/intel/avs/boards/rt298.c
+++ b/sound/soc/intel/avs/boards/rt298.c
@@ -266,4 +266,5 @@ static struct platform_driver avs_rt298_driver = {
module_platform_driver(avs_rt298_driver);
+MODULE_DESCRIPTION("Intel rt298 machine driver");
MODULE_LICENSE("GPL");
diff --git a/sound/soc/intel/avs/boards/rt5514.c b/sound/soc/intel/avs/boards/rt5514.c
index 60105f453ae235..097ae5f73241ef 100644
--- a/sound/soc/intel/avs/boards/rt5514.c
+++ b/sound/soc/intel/avs/boards/rt5514.c
@@ -192,4 +192,5 @@ static struct platform_driver avs_rt5514_driver = {
module_platform_driver(avs_rt5514_driver);
+MODULE_DESCRIPTION("Intel rt5514 machine driver");
MODULE_LICENSE("GPL");
diff --git a/sound/soc/intel/avs/boards/rt5663.c b/sound/soc/intel/avs/boards/rt5663.c
index b4762c2a7bf2d1..1880c315cc4d1f 100644
--- a/sound/soc/intel/avs/boards/rt5663.c
+++ b/sound/soc/intel/avs/boards/rt5663.c
@@ -265,4 +265,5 @@ static struct platform_driver avs_rt5663_driver = {
module_platform_driver(avs_rt5663_driver);
+MODULE_DESCRIPTION("Intel rt5663 machine driver");
MODULE_LICENSE("GPL");
diff --git a/sound/soc/intel/avs/boards/rt5682.c b/sound/soc/intel/avs/boards/rt5682.c
index 243f979fda98a4..594a971ded9eb2 100644
--- a/sound/soc/intel/avs/boards/rt5682.c
+++ b/sound/soc/intel/avs/boards/rt5682.c
@@ -341,5 +341,6 @@ static struct platform_driver avs_rt5682_driver = {
module_platform_driver(avs_rt5682_driver)
+MODULE_DESCRIPTION("Intel rt5682 machine driver");
MODULE_AUTHOR("Cezary Rojewski <cezary.rojewski@intel.com>");
MODULE_LICENSE("GPL");
diff --git a/sound/soc/intel/avs/boards/ssm4567.c b/sound/soc/intel/avs/boards/ssm4567.c
index 4a0e136835ff5d..d6f7f046c24e5d 100644
--- a/sound/soc/intel/avs/boards/ssm4567.c
+++ b/sound/soc/intel/avs/boards/ssm4567.c
@@ -200,4 +200,5 @@ static struct platform_driver avs_ssm4567_driver = {
module_platform_driver(avs_ssm4567_driver)
+MODULE_DESCRIPTION("Intel ssm4567 machine driver");
MODULE_LICENSE("GPL");
diff --git a/sound/soc/intel/avs/icl.c b/sound/soc/intel/avs/icl.c
index 9d9921e1cd4ded..d2554c8577326b 100644
--- a/sound/soc/intel/avs/icl.c
+++ b/sound/soc/intel/avs/icl.c
@@ -64,7 +64,7 @@ struct avs_icl_memwnd2_desc {
struct avs_icl_memwnd2 {
union {
struct avs_icl_memwnd2_desc slot_desc[AVS_ICL_MEMWND2_SLOTS_COUNT];
- u8 rsvd[PAGE_SIZE];
+ u8 rsvd[SZ_4K];
};
u8 slot_array[AVS_ICL_MEMWND2_SLOTS_COUNT][PAGE_SIZE];
} __packed;
diff --git a/sound/soc/intel/avs/topology.c b/sound/soc/intel/avs/topology.c
index 13061bd1488bb4..42b42903ae9de7 100644
--- a/sound/soc/intel/avs/topology.c
+++ b/sound/soc/intel/avs/topology.c
@@ -1582,6 +1582,8 @@ static int avs_widget_load(struct snd_soc_component *comp, int index,
if (!le32_to_cpu(dw->priv.size))
return 0;
+ w->no_wname_in_kcontrol_name = true;
+
if (w->ignore_suspend && !AVS_S0IX_SUPPORTED) {
dev_info_once(comp->dev, "Device does not support S0IX, check BIOS settings\n");
w->ignore_suspend = false;
diff --git a/sound/soc/intel/boards/bytcr_rt5640.c b/sound/soc/intel/boards/bytcr_rt5640.c
index 05f38d1f7d824d..b41a1147f1c345 100644
--- a/sound/soc/intel/boards/bytcr_rt5640.c
+++ b/sound/soc/intel/boards/bytcr_rt5640.c
@@ -636,28 +636,30 @@ static const struct dmi_system_id byt_rt5640_quirk_table[] = {
BYT_RT5640_USE_AMCR0F28),
},
{
+ /* Asus T100TAF, unlike other T100TA* models this one has a mono speaker */
.matches = {
DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
- DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "T100TA"),
+ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "T100TAF"),
},
.driver_data = (void *)(BYT_RT5640_IN1_MAP |
BYT_RT5640_JD_SRC_JD2_IN4N |
BYT_RT5640_OVCD_TH_2000UA |
BYT_RT5640_OVCD_SF_0P75 |
+ BYT_RT5640_MONO_SPEAKER |
+ BYT_RT5640_DIFF_MIC |
+ BYT_RT5640_SSP0_AIF2 |
BYT_RT5640_MCLK_EN),
},
{
+ /* Asus T100TA and T100TAM, must come after T100TAF (mono spk) match */
.matches = {
- DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
- DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "T100TAF"),
+ DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "T100TA"),
},
.driver_data = (void *)(BYT_RT5640_IN1_MAP |
BYT_RT5640_JD_SRC_JD2_IN4N |
BYT_RT5640_OVCD_TH_2000UA |
BYT_RT5640_OVCD_SF_0P75 |
- BYT_RT5640_MONO_SPEAKER |
- BYT_RT5640_DIFF_MIC |
- BYT_RT5640_SSP0_AIF2 |
BYT_RT5640_MCLK_EN),
},
{
diff --git a/sound/soc/soc-ops.c b/sound/soc/soc-ops.c
index 2d25748ca70662..b27e89ff6a1673 100644
--- a/sound/soc/soc-ops.c
+++ b/sound/soc/soc-ops.c
@@ -263,7 +263,7 @@ int snd_soc_get_volsw(struct snd_kcontrol *kcontrol,
int max = mc->max;
int min = mc->min;
int sign_bit = mc->sign_bit;
- unsigned int mask = (1 << fls(max)) - 1;
+ unsigned int mask = (1ULL << fls(max)) - 1;
unsigned int invert = mc->invert;
int val;
int ret;
diff --git a/sound/soc/sof/amd/acp.c b/sound/soc/sof/amd/acp.c
index be7dc1e02284ab..c12c7f82052947 100644
--- a/sound/soc/sof/amd/acp.c
+++ b/sound/soc/sof/amd/acp.c
@@ -704,6 +704,10 @@ int amd_sof_acp_probe(struct snd_sof_dev *sdev)
goto unregister_dev;
}
+ ret = acp_init(sdev);
+ if (ret < 0)
+ goto free_smn_dev;
+
sdev->ipc_irq = pci->irq;
ret = request_threaded_irq(sdev->ipc_irq, acp_irq_handler, acp_irq_thread,
IRQF_SHARED, "AudioDSP", sdev);
@@ -713,10 +717,6 @@ int amd_sof_acp_probe(struct snd_sof_dev *sdev)
goto free_smn_dev;
}
- ret = acp_init(sdev);
- if (ret < 0)
- goto free_ipc_irq;
-
/* scan SoundWire capabilities exposed by DSDT */
ret = acp_sof_scan_sdw_devices(sdev, chip->sdw_acpi_dev_addr);
if (ret < 0) {
diff --git a/sound/soc/sof/core.c b/sound/soc/sof/core.c
index 9b00ede2a486a2..238bda5f6b76fa 100644
--- a/sound/soc/sof/core.c
+++ b/sound/soc/sof/core.c
@@ -339,8 +339,7 @@ static int sof_init_environment(struct snd_sof_dev *sdev)
ret = snd_sof_probe(sdev);
if (ret < 0) {
dev_err(sdev->dev, "failed to probe DSP %d\n", ret);
- sof_ops_free(sdev);
- return ret;
+ goto err_sof_probe;
}
/* check machine info */
@@ -351,22 +350,27 @@ static int sof_init_environment(struct snd_sof_dev *sdev)
}
ret = sof_select_ipc_and_paths(sdev);
- if (!ret && plat_data->ipc_type != base_profile->ipc_type) {
+ if (ret) {
+ goto err_machine_check;
+ } else if (plat_data->ipc_type != base_profile->ipc_type) {
/* IPC type changed, re-initialize the ops */
sof_ops_free(sdev);
ret = validate_sof_ops(sdev);
if (ret < 0) {
snd_sof_remove(sdev);
+ snd_sof_remove_late(sdev);
return ret;
}
}
+ return 0;
+
err_machine_check:
- if (ret) {
- snd_sof_remove(sdev);
- sof_ops_free(sdev);
- }
+ snd_sof_remove(sdev);
+err_sof_probe:
+ snd_sof_remove_late(sdev);
+ sof_ops_free(sdev);
return ret;
}
diff --git a/sound/soc/sof/debug.c b/sound/soc/sof/debug.c
index 7c8aafca8fdef8..7275437ea8d8a5 100644
--- a/sound/soc/sof/debug.c
+++ b/sound/soc/sof/debug.c
@@ -330,14 +330,32 @@ EXPORT_SYMBOL_GPL(snd_sof_dbg_memory_info_init);
int snd_sof_dbg_init(struct snd_sof_dev *sdev)
{
+ struct snd_sof_pdata *plat_data = sdev->pdata;
struct snd_sof_dsp_ops *ops = sof_ops(sdev);
const struct snd_sof_debugfs_map *map;
+ struct dentry *fw_profile;
int i;
int err;
/* use "sof" as top level debugFS dir */
sdev->debugfs_root = debugfs_create_dir("sof", NULL);
+ /* expose firmware/topology prefix/names for test purposes */
+ fw_profile = debugfs_create_dir("fw_profile", sdev->debugfs_root);
+
+ debugfs_create_str("fw_path", 0444, fw_profile,
+ (char **)&plat_data->fw_filename_prefix);
+ debugfs_create_str("fw_lib_path", 0444, fw_profile,
+ (char **)&plat_data->fw_lib_prefix);
+ debugfs_create_str("tplg_path", 0444, fw_profile,
+ (char **)&plat_data->tplg_filename_prefix);
+ debugfs_create_str("fw_name", 0444, fw_profile,
+ (char **)&plat_data->fw_filename);
+ debugfs_create_str("tplg_name", 0444, fw_profile,
+ (char **)&plat_data->tplg_filename);
+ debugfs_create_u32("ipc_type", 0444, fw_profile,
+ (u32 *)&plat_data->ipc_type);
+
/* init dfsentry list */
INIT_LIST_HEAD(&sdev->dfsentry_list);
diff --git a/sound/soc/sof/intel/hda-common-ops.c b/sound/soc/sof/intel/hda-common-ops.c
index 2b385cddc385c5..d71bb66b999116 100644
--- a/sound/soc/sof/intel/hda-common-ops.c
+++ b/sound/soc/sof/intel/hda-common-ops.c
@@ -57,6 +57,9 @@ struct snd_sof_dsp_ops sof_hda_common_ops = {
.pcm_pointer = hda_dsp_pcm_pointer,
.pcm_ack = hda_dsp_pcm_ack,
+ .get_dai_frame_counter = hda_dsp_get_stream_llp,
+ .get_host_byte_counter = hda_dsp_get_stream_ldp,
+
/* firmware loading */
.load_firmware = snd_sof_load_firmware_raw,
diff --git a/sound/soc/sof/intel/hda-dai-ops.c b/sound/soc/sof/intel/hda-dai-ops.c
index c50ca9e72d3738..b073720b4cf432 100644
--- a/sound/soc/sof/intel/hda-dai-ops.c
+++ b/sound/soc/sof/intel/hda-dai-ops.c
@@ -7,6 +7,7 @@
#include <sound/pcm_params.h>
#include <sound/hdaudio_ext.h>
+#include <sound/hda_register.h>
#include <sound/hda-mlink.h>
#include <sound/sof/ipc4/header.h>
#include <uapi/sound/sof/header.h>
@@ -362,6 +363,16 @@ static int hda_trigger(struct snd_sof_dev *sdev, struct snd_soc_dai *cpu_dai,
case SNDRV_PCM_TRIGGER_STOP:
case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
snd_hdac_ext_stream_clear(hext_stream);
+
+ /*
+ * Save the LLP registers in case the stream is
+ * restarting due PAUSE_RELEASE, or START without a pcm
+ * close/open since in this case the LLP register is not reset
+ * to 0 and the delay calculation will return with invalid
+ * results.
+ */
+ hext_stream->pplcllpl = readl(hext_stream->pplc_addr + AZX_REG_PPLCLLPL);
+ hext_stream->pplcllpu = readl(hext_stream->pplc_addr + AZX_REG_PPLCLLPU);
break;
default:
dev_err(sdev->dev, "unknown trigger command %d\n", cmd);
diff --git a/sound/soc/sof/intel/hda-dsp.c b/sound/soc/sof/intel/hda-dsp.c
index 31ffa1a8f2ac04..ef5c915db8ffb4 100644
--- a/sound/soc/sof/intel/hda-dsp.c
+++ b/sound/soc/sof/intel/hda-dsp.c
@@ -681,17 +681,27 @@ static int hda_suspend(struct snd_sof_dev *sdev, bool runtime_suspend)
struct sof_intel_hda_dev *hda = sdev->pdata->hw_pdata;
const struct sof_intel_dsp_desc *chip = hda->desc;
struct hdac_bus *bus = sof_to_bus(sdev);
+ bool imr_lost = false;
int ret, j;
/*
- * The memory used for IMR boot loses its content in deeper than S3 state
- * We must not try IMR boot on next power up (as it will fail).
- *
+ * The memory used for IMR boot loses its content in deeper than S3
+ * state on CAVS platforms.
+ * On ACE platforms due to the system architecture the IMR content is
+ * lost at S3 state already, they are tailored for s2idle use.
+ * We must not try IMR boot on next power up in these cases as it will
+ * fail.
+ */
+ if (sdev->system_suspend_target > SOF_SUSPEND_S3 ||
+ (chip->hw_ip_version >= SOF_INTEL_ACE_1_0 &&
+ sdev->system_suspend_target == SOF_SUSPEND_S3))
+ imr_lost = true;
+
+ /*
* In case of firmware crash or boot failure set the skip_imr_boot to true
* as well in order to try to re-load the firmware to do a 'cold' boot.
*/
- if (sdev->system_suspend_target > SOF_SUSPEND_S3 ||
- sdev->fw_state == SOF_FW_CRASHED ||
+ if (imr_lost || sdev->fw_state == SOF_FW_CRASHED ||
sdev->fw_state == SOF_FW_BOOT_FAILED)
hda->skip_imr_boot = true;
diff --git a/sound/soc/sof/intel/hda-pcm.c b/sound/soc/sof/intel/hda-pcm.c
index 18f07364d21984..d7b446f3f973e3 100644
--- a/sound/soc/sof/intel/hda-pcm.c
+++ b/sound/soc/sof/intel/hda-pcm.c
@@ -259,8 +259,37 @@ int hda_dsp_pcm_open(struct snd_sof_dev *sdev,
snd_pcm_hw_constraint_mask64(substream->runtime, SNDRV_PCM_HW_PARAM_FORMAT,
SNDRV_PCM_FMTBIT_S16 | SNDRV_PCM_FMTBIT_S32);
+ /*
+ * The dsp_max_burst_size_in_ms is the length of the maximum burst size
+ * of the host DMA in the ALSA buffer.
+ *
+ * On playback start the DMA will transfer dsp_max_burst_size_in_ms
+ * amount of data in one initial burst to fill up the host DMA buffer.
+ * Consequent DMA burst sizes are shorter and their length can vary.
+ * To make sure that userspace allocate large enough ALSA buffer we need
+ * to place a constraint on the buffer time.
+ *
+ * On capture the DMA will transfer 1ms chunks.
+ *
+ * Exact dsp_max_burst_size_in_ms constraint is racy, so set the
+ * constraint to a minimum of 2x dsp_max_burst_size_in_ms.
+ */
+ if (spcm->stream[direction].dsp_max_burst_size_in_ms)
+ snd_pcm_hw_constraint_minmax(substream->runtime,
+ SNDRV_PCM_HW_PARAM_BUFFER_TIME,
+ spcm->stream[direction].dsp_max_burst_size_in_ms * USEC_PER_MSEC * 2,
+ UINT_MAX);
+
/* binding pcm substream to hda stream */
substream->runtime->private_data = &dsp_stream->hstream;
+
+ /*
+ * Reset the llp cache values (they are used for LLP compensation in
+ * case the counter is not reset)
+ */
+ dsp_stream->pplcllpl = 0;
+ dsp_stream->pplcllpu = 0;
+
return 0;
}
diff --git a/sound/soc/sof/intel/hda-stream.c b/sound/soc/sof/intel/hda-stream.c
index b387b1a69d7ea3..0c189d3b19c1af 100644
--- a/sound/soc/sof/intel/hda-stream.c
+++ b/sound/soc/sof/intel/hda-stream.c
@@ -1063,3 +1063,73 @@ snd_pcm_uframes_t hda_dsp_stream_get_position(struct hdac_stream *hstream,
return pos;
}
+
+#define merge_u64(u32_u, u32_l) (((u64)(u32_u) << 32) | (u32_l))
+
+/**
+ * hda_dsp_get_stream_llp - Retrieve the LLP (Linear Link Position) of the stream
+ * @sdev: SOF device
+ * @component: ASoC component
+ * @substream: PCM substream
+ *
+ * Returns the raw Linear Link Position value
+ */
+u64 hda_dsp_get_stream_llp(struct snd_sof_dev *sdev,
+ struct snd_soc_component *component,
+ struct snd_pcm_substream *substream)
+{
+ struct hdac_stream *hstream = substream->runtime->private_data;
+ struct hdac_ext_stream *hext_stream = stream_to_hdac_ext_stream(hstream);
+ u32 llp_l, llp_u;
+
+ /*
+ * The pplc_addr have been calculated during probe in
+ * hda_dsp_stream_init():
+ * pplc_addr = sdev->bar[HDA_DSP_PP_BAR] +
+ * SOF_HDA_PPLC_BASE +
+ * SOF_HDA_PPLC_MULTI * total_stream +
+ * SOF_HDA_PPLC_INTERVAL * stream_index
+ *
+ * Use this pre-calculated address to avoid repeated re-calculation.
+ */
+ llp_l = readl(hext_stream->pplc_addr + AZX_REG_PPLCLLPL);
+ llp_u = readl(hext_stream->pplc_addr + AZX_REG_PPLCLLPU);
+
+ /* Compensate the LLP counter with the saved offset */
+ if (hext_stream->pplcllpl || hext_stream->pplcllpu)
+ return merge_u64(llp_u, llp_l) -
+ merge_u64(hext_stream->pplcllpu, hext_stream->pplcllpl);
+
+ return merge_u64(llp_u, llp_l);
+}
+
+/**
+ * hda_dsp_get_stream_ldp - Retrieve the LDP (Linear DMA Position) of the stream
+ * @sdev: SOF device
+ * @component: ASoC component
+ * @substream: PCM substream
+ *
+ * Returns the raw Linear Link Position value
+ */
+u64 hda_dsp_get_stream_ldp(struct snd_sof_dev *sdev,
+ struct snd_soc_component *component,
+ struct snd_pcm_substream *substream)
+{
+ struct hdac_stream *hstream = substream->runtime->private_data;
+ struct hdac_ext_stream *hext_stream = stream_to_hdac_ext_stream(hstream);
+ u32 ldp_l, ldp_u;
+
+ /*
+ * The pphc_addr have been calculated during probe in
+ * hda_dsp_stream_init():
+ * pphc_addr = sdev->bar[HDA_DSP_PP_BAR] +
+ * SOF_HDA_PPHC_BASE +
+ * SOF_HDA_PPHC_INTERVAL * stream_index
+ *
+ * Use this pre-calculated address to avoid repeated re-calculation.
+ */
+ ldp_l = readl(hext_stream->pphc_addr + AZX_REG_PPHCLDPL);
+ ldp_u = readl(hext_stream->pphc_addr + AZX_REG_PPHCLDPU);
+
+ return ((u64)ldp_u << 32) | ldp_l;
+}
diff --git a/sound/soc/sof/intel/hda.h b/sound/soc/sof/intel/hda.h
index b36eb7c7891335..81a1d4606d3cde 100644
--- a/sound/soc/sof/intel/hda.h
+++ b/sound/soc/sof/intel/hda.h
@@ -662,6 +662,12 @@ bool hda_dsp_check_stream_irq(struct snd_sof_dev *sdev);
snd_pcm_uframes_t hda_dsp_stream_get_position(struct hdac_stream *hstream,
int direction, bool can_sleep);
+u64 hda_dsp_get_stream_llp(struct snd_sof_dev *sdev,
+ struct snd_soc_component *component,
+ struct snd_pcm_substream *substream);
+u64 hda_dsp_get_stream_ldp(struct snd_sof_dev *sdev,
+ struct snd_soc_component *component,
+ struct snd_pcm_substream *substream);
struct hdac_ext_stream *
hda_dsp_stream_get(struct snd_sof_dev *sdev, int direction, u32 flags);
diff --git a/sound/soc/sof/intel/lnl.c b/sound/soc/sof/intel/lnl.c
index 7ae017a00184e5..aeb4350cce6bba 100644
--- a/sound/soc/sof/intel/lnl.c
+++ b/sound/soc/sof/intel/lnl.c
@@ -29,15 +29,17 @@ static const struct snd_sof_debugfs_map lnl_dsp_debugfs[] = {
};
/* this helps allows the DSP to setup DMIC/SSP */
-static int hdac_bus_offload_dmic_ssp(struct hdac_bus *bus)
+static int hdac_bus_offload_dmic_ssp(struct hdac_bus *bus, bool enable)
{
int ret;
- ret = hdac_bus_eml_enable_offload(bus, true, AZX_REG_ML_LEPTR_ID_INTEL_SSP, true);
+ ret = hdac_bus_eml_enable_offload(bus, true,
+ AZX_REG_ML_LEPTR_ID_INTEL_SSP, enable);
if (ret < 0)
return ret;
- ret = hdac_bus_eml_enable_offload(bus, true, AZX_REG_ML_LEPTR_ID_INTEL_DMIC, true);
+ ret = hdac_bus_eml_enable_offload(bus, true,
+ AZX_REG_ML_LEPTR_ID_INTEL_DMIC, enable);
if (ret < 0)
return ret;
@@ -52,7 +54,19 @@ static int lnl_hda_dsp_probe(struct snd_sof_dev *sdev)
if (ret < 0)
return ret;
- return hdac_bus_offload_dmic_ssp(sof_to_bus(sdev));
+ return hdac_bus_offload_dmic_ssp(sof_to_bus(sdev), true);
+}
+
+static void lnl_hda_dsp_remove(struct snd_sof_dev *sdev)
+{
+ int ret;
+
+ ret = hdac_bus_offload_dmic_ssp(sof_to_bus(sdev), false);
+ if (ret < 0)
+ dev_warn(sdev->dev,
+ "Failed to disable offload for DMIC/SSP: %d\n", ret);
+
+ hda_dsp_remove(sdev);
}
static int lnl_hda_dsp_resume(struct snd_sof_dev *sdev)
@@ -63,7 +77,7 @@ static int lnl_hda_dsp_resume(struct snd_sof_dev *sdev)
if (ret < 0)
return ret;
- return hdac_bus_offload_dmic_ssp(sof_to_bus(sdev));
+ return hdac_bus_offload_dmic_ssp(sof_to_bus(sdev), true);
}
static int lnl_hda_dsp_runtime_resume(struct snd_sof_dev *sdev)
@@ -74,7 +88,7 @@ static int lnl_hda_dsp_runtime_resume(struct snd_sof_dev *sdev)
if (ret < 0)
return ret;
- return hdac_bus_offload_dmic_ssp(sof_to_bus(sdev));
+ return hdac_bus_offload_dmic_ssp(sof_to_bus(sdev), true);
}
static int lnl_dsp_post_fw_run(struct snd_sof_dev *sdev)
@@ -97,9 +111,11 @@ int sof_lnl_ops_init(struct snd_sof_dev *sdev)
/* common defaults */
memcpy(&sof_lnl_ops, &sof_hda_common_ops, sizeof(struct snd_sof_dsp_ops));
- /* probe */
- if (!sdev->dspless_mode_selected)
+ /* probe/remove */
+ if (!sdev->dspless_mode_selected) {
sof_lnl_ops.probe = lnl_hda_dsp_probe;
+ sof_lnl_ops.remove = lnl_hda_dsp_remove;
+ }
/* shutdown */
sof_lnl_ops.shutdown = hda_dsp_shutdown;
@@ -134,8 +150,6 @@ int sof_lnl_ops_init(struct snd_sof_dev *sdev)
sof_lnl_ops.runtime_resume = lnl_hda_dsp_runtime_resume;
}
- sof_lnl_ops.get_stream_position = mtl_dsp_get_stream_hda_link_position;
-
/* dsp core get/put */
sof_lnl_ops.core_get = mtl_dsp_core_get;
sof_lnl_ops.core_put = mtl_dsp_core_put;
diff --git a/sound/soc/sof/intel/mtl.c b/sound/soc/sof/intel/mtl.c
index df05dc77b8d5e3..060c34988e90d1 100644
--- a/sound/soc/sof/intel/mtl.c
+++ b/sound/soc/sof/intel/mtl.c
@@ -626,18 +626,6 @@ static int mtl_dsp_disable_interrupts(struct snd_sof_dev *sdev)
return mtl_enable_interrupts(sdev, false);
}
-u64 mtl_dsp_get_stream_hda_link_position(struct snd_sof_dev *sdev,
- struct snd_soc_component *component,
- struct snd_pcm_substream *substream)
-{
- struct hdac_stream *hstream = substream->runtime->private_data;
- u32 llp_l, llp_u;
-
- llp_l = snd_sof_dsp_read(sdev, HDA_DSP_HDA_BAR, MTL_PPLCLLPL(hstream->index));
- llp_u = snd_sof_dsp_read(sdev, HDA_DSP_HDA_BAR, MTL_PPLCLLPU(hstream->index));
- return ((u64)llp_u << 32) | llp_l;
-}
-
int mtl_dsp_core_get(struct snd_sof_dev *sdev, int core)
{
const struct sof_ipc_pm_ops *pm_ops = sdev->ipc->ops->pm;
@@ -707,8 +695,6 @@ int sof_mtl_ops_init(struct snd_sof_dev *sdev)
sof_mtl_ops.core_get = mtl_dsp_core_get;
sof_mtl_ops.core_put = mtl_dsp_core_put;
- sof_mtl_ops.get_stream_position = mtl_dsp_get_stream_hda_link_position;
-
sdev->private = kzalloc(sizeof(struct sof_ipc4_fw_data), GFP_KERNEL);
if (!sdev->private)
return -ENOMEM;
diff --git a/sound/soc/sof/intel/mtl.h b/sound/soc/sof/intel/mtl.h
index cc5a1f46fd0956..ea8c1b83f7127d 100644
--- a/sound/soc/sof/intel/mtl.h
+++ b/sound/soc/sof/intel/mtl.h
@@ -6,12 +6,6 @@
* Copyright(c) 2020-2022 Intel Corporation. All rights reserved.
*/
-/* HDA Registers */
-#define MTL_PPLCLLPL_BASE 0x948
-#define MTL_PPLCLLPU_STRIDE 0x10
-#define MTL_PPLCLLPL(x) (MTL_PPLCLLPL_BASE + (x) * MTL_PPLCLLPU_STRIDE)
-#define MTL_PPLCLLPU(x) (MTL_PPLCLLPL_BASE + 0x4 + (x) * MTL_PPLCLLPU_STRIDE)
-
/* DSP Registers */
#define MTL_HFDSSCS 0x1000
#define MTL_HFDSSCS_SPA_MASK BIT(16)
@@ -103,9 +97,5 @@ int mtl_dsp_ipc_get_window_offset(struct snd_sof_dev *sdev, u32 id);
void mtl_ipc_dump(struct snd_sof_dev *sdev);
-u64 mtl_dsp_get_stream_hda_link_position(struct snd_sof_dev *sdev,
- struct snd_soc_component *component,
- struct snd_pcm_substream *substream);
-
int mtl_dsp_core_get(struct snd_sof_dev *sdev, int core);
int mtl_dsp_core_put(struct snd_sof_dev *sdev, int core);
diff --git a/sound/soc/sof/intel/pci-lnl.c b/sound/soc/sof/intel/pci-lnl.c
index b26ffe767fab55..b14e508f1f315b 100644
--- a/sound/soc/sof/intel/pci-lnl.c
+++ b/sound/soc/sof/intel/pci-lnl.c
@@ -35,6 +35,9 @@ static const struct sof_dev_desc lnl_desc = {
.default_fw_path = {
[SOF_IPC_TYPE_4] = "intel/sof-ipc4/lnl",
},
+ .default_lib_path = {
+ [SOF_IPC_TYPE_4] = "intel/sof-ipc4-lib/lnl",
+ },
.default_tplg_path = {
[SOF_IPC_TYPE_4] = "intel/sof-ipc4-tplg",
},
diff --git a/sound/soc/sof/ipc3-pcm.c b/sound/soc/sof/ipc3-pcm.c
index 35769dd7905ebe..af0bf354cb209e 100644
--- a/sound/soc/sof/ipc3-pcm.c
+++ b/sound/soc/sof/ipc3-pcm.c
@@ -434,4 +434,5 @@ const struct sof_ipc_pcm_ops ipc3_pcm_ops = {
.trigger = sof_ipc3_pcm_trigger,
.dai_link_fixup = sof_ipc3_pcm_dai_link_fixup,
.reset_hw_params_during_stop = true,
+ .d0i3_supported_in_s0ix = true,
};
diff --git a/sound/soc/sof/ipc4-mtrace.c b/sound/soc/sof/ipc4-mtrace.c
index 9f1e33ee882612..0e04bea9432dda 100644
--- a/sound/soc/sof/ipc4-mtrace.c
+++ b/sound/soc/sof/ipc4-mtrace.c
@@ -4,6 +4,7 @@
#include <linux/debugfs.h>
#include <linux/sched/signal.h>
+#include <linux/sched/clock.h>
#include <sound/sof/ipc4/header.h>
#include "sof-priv.h"
#include "ipc4-priv.h"
@@ -412,7 +413,6 @@ static int ipc4_mtrace_enable(struct snd_sof_dev *sdev)
const struct sof_ipc_ops *iops = sdev->ipc->ops;
struct sof_ipc4_msg msg;
u64 system_time;
- ktime_t kt;
int ret;
if (priv->mtrace_state != SOF_MTRACE_DISABLED)
@@ -424,9 +424,12 @@ static int ipc4_mtrace_enable(struct snd_sof_dev *sdev)
msg.primary |= SOF_IPC4_MOD_INSTANCE(SOF_IPC4_MOD_INIT_BASEFW_INSTANCE_ID);
msg.extension = SOF_IPC4_MOD_EXT_MSG_PARAM_ID(SOF_IPC4_FW_PARAM_SYSTEM_TIME);
- /* The system time is in usec, UTC, epoch is 1601-01-01 00:00:00 */
- kt = ktime_add_us(ktime_get_real(), FW_EPOCH_DELTA * USEC_PER_SEC);
- system_time = ktime_to_us(kt);
+ /*
+ * local_clock() is used to align with dmesg, so both kernel and firmware logs have
+ * the same base and a minor delta due to the IPC. system time is in us format but
+ * local_clock() returns the time in ns, so convert to ns.
+ */
+ system_time = div64_u64(local_clock(), NSEC_PER_USEC);
msg.data_size = sizeof(system_time);
msg.data_ptr = &system_time;
ret = iops->set_get_data(sdev, &msg, msg.data_size, true);
diff --git a/sound/soc/sof/ipc4-pcm.c b/sound/soc/sof/ipc4-pcm.c
index 0f332c8cdbe6af..4594470ed08b1e 100644
--- a/sound/soc/sof/ipc4-pcm.c
+++ b/sound/soc/sof/ipc4-pcm.c
@@ -15,6 +15,47 @@
#include "ipc4-topology.h"
#include "ipc4-fw-reg.h"
+/**
+ * struct sof_ipc4_timestamp_info - IPC4 timestamp info
+ * @host_copier: the host copier of the pcm stream
+ * @dai_copier: the dai copier of the pcm stream
+ * @stream_start_offset: reported by fw in memory window (converted to frames)
+ * @stream_end_offset: reported by fw in memory window (converted to frames)
+ * @llp_offset: llp offset in memory window
+ * @boundary: wrap boundary should be used for the LLP frame counter
+ * @delay: Calculated and stored in pointer callback. The stored value is
+ * returned in the delay callback.
+ */
+struct sof_ipc4_timestamp_info {
+ struct sof_ipc4_copier *host_copier;
+ struct sof_ipc4_copier *dai_copier;
+ u64 stream_start_offset;
+ u64 stream_end_offset;
+ u32 llp_offset;
+
+ u64 boundary;
+ snd_pcm_sframes_t delay;
+};
+
+/**
+ * struct sof_ipc4_pcm_stream_priv - IPC4 specific private data
+ * @time_info: pointer to time info struct if it is supported, otherwise NULL
+ * @chain_dma_allocated: indicates the ChainDMA allocation state
+ */
+struct sof_ipc4_pcm_stream_priv {
+ struct sof_ipc4_timestamp_info *time_info;
+
+ bool chain_dma_allocated;
+};
+
+static inline struct sof_ipc4_timestamp_info *
+sof_ipc4_sps_to_time_info(struct snd_sof_pcm_stream *sps)
+{
+ struct sof_ipc4_pcm_stream_priv *stream_priv = sps->private;
+
+ return stream_priv->time_info;
+}
+
static int sof_ipc4_set_multi_pipeline_state(struct snd_sof_dev *sdev, u32 state,
struct ipc4_pipeline_set_state_data *trigger_list)
{
@@ -231,14 +272,17 @@ sof_ipc4_update_pipeline_state(struct snd_sof_dev *sdev, int state, int cmd,
*/
static int sof_ipc4_chain_dma_trigger(struct snd_sof_dev *sdev,
- int direction,
+ struct snd_sof_pcm *spcm, int direction,
struct snd_sof_pcm_stream_pipeline_list *pipeline_list,
int state, int cmd)
{
struct sof_ipc4_fw_data *ipc4_data = sdev->private;
+ struct sof_ipc4_pcm_stream_priv *stream_priv;
bool allocate, enable, set_fifo_size;
struct sof_ipc4_msg msg = {{ 0 }};
- int i;
+ int ret, i;
+
+ stream_priv = spcm->stream[direction].private;
switch (state) {
case SOF_IPC4_PIPE_RUNNING: /* Allocate and start chained dma */
@@ -259,6 +303,11 @@ static int sof_ipc4_chain_dma_trigger(struct snd_sof_dev *sdev,
set_fifo_size = false;
break;
case SOF_IPC4_PIPE_RESET: /* Disable and free chained DMA. */
+
+ /* ChainDMA can only be reset if it has been allocated */
+ if (!stream_priv->chain_dma_allocated)
+ return 0;
+
allocate = false;
enable = false;
set_fifo_size = false;
@@ -316,7 +365,12 @@ static int sof_ipc4_chain_dma_trigger(struct snd_sof_dev *sdev,
if (enable)
msg.primary |= SOF_IPC4_GLB_CHAIN_DMA_ENABLE_MASK;
- return sof_ipc_tx_message_no_reply(sdev->ipc, &msg, 0);
+ ret = sof_ipc_tx_message_no_reply(sdev->ipc, &msg, 0);
+ /* Update the ChainDMA allocation state */
+ if (!ret)
+ stream_priv->chain_dma_allocated = allocate;
+
+ return ret;
}
static int sof_ipc4_trigger_pipelines(struct snd_soc_component *component,
@@ -356,7 +410,7 @@ static int sof_ipc4_trigger_pipelines(struct snd_soc_component *component,
* trigger function that handles the rest for the substream.
*/
if (pipeline->use_chain_dma)
- return sof_ipc4_chain_dma_trigger(sdev, substream->stream,
+ return sof_ipc4_chain_dma_trigger(sdev, spcm, substream->stream,
pipeline_list, state, cmd);
/* allocate memory for the pipeline data */
@@ -423,8 +477,19 @@ static int sof_ipc4_trigger_pipelines(struct snd_soc_component *component,
}
/* return if this is the final state */
- if (state == SOF_IPC4_PIPE_PAUSED)
+ if (state == SOF_IPC4_PIPE_PAUSED) {
+ struct sof_ipc4_timestamp_info *time_info;
+
+ /*
+ * Invalidate the stream_start_offset to make sure that it is
+ * going to be updated if the stream resumes
+ */
+ time_info = sof_ipc4_sps_to_time_info(&spcm->stream[substream->stream]);
+ if (time_info)
+ time_info->stream_start_offset = SOF_IPC4_INVALID_STREAM_POSITION;
+
goto free;
+ }
skip_pause_transition:
/* else set the RUNNING/RESET state in the DSP */
ret = sof_ipc4_set_multi_pipeline_state(sdev, state, trigger_list);
@@ -464,14 +529,12 @@ static int sof_ipc4_pcm_trigger(struct snd_soc_component *component,
/* determine the pipeline state */
switch (cmd) {
- case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
- state = SOF_IPC4_PIPE_PAUSED;
- break;
case SNDRV_PCM_TRIGGER_PAUSE_RELEASE:
case SNDRV_PCM_TRIGGER_RESUME:
case SNDRV_PCM_TRIGGER_START:
state = SOF_IPC4_PIPE_RUNNING;
break;
+ case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
case SNDRV_PCM_TRIGGER_SUSPEND:
case SNDRV_PCM_TRIGGER_STOP:
state = SOF_IPC4_PIPE_PAUSED;
@@ -675,12 +738,16 @@ static int sof_ipc4_pcm_dai_link_fixup(struct snd_soc_pcm_runtime *rtd,
static void sof_ipc4_pcm_free(struct snd_sof_dev *sdev, struct snd_sof_pcm *spcm)
{
struct snd_sof_pcm_stream_pipeline_list *pipeline_list;
+ struct sof_ipc4_pcm_stream_priv *stream_priv;
int stream;
for_each_pcm_streams(stream) {
pipeline_list = &spcm->stream[stream].pipeline_list;
kfree(pipeline_list->pipelines);
pipeline_list->pipelines = NULL;
+
+ stream_priv = spcm->stream[stream].private;
+ kfree(stream_priv->time_info);
kfree(spcm->stream[stream].private);
spcm->stream[stream].private = NULL;
}
@@ -690,7 +757,8 @@ static int sof_ipc4_pcm_setup(struct snd_sof_dev *sdev, struct snd_sof_pcm *spcm
{
struct snd_sof_pcm_stream_pipeline_list *pipeline_list;
struct sof_ipc4_fw_data *ipc4_data = sdev->private;
- struct sof_ipc4_timestamp_info *stream_info;
+ struct sof_ipc4_pcm_stream_priv *stream_priv;
+ struct sof_ipc4_timestamp_info *time_info;
bool support_info = true;
u32 abi_version;
u32 abi_offset;
@@ -703,6 +771,10 @@ static int sof_ipc4_pcm_setup(struct snd_sof_dev *sdev, struct snd_sof_pcm *spcm
if (abi_version < SOF_IPC4_FW_REGS_ABI_VER)
support_info = false;
+ /* For delay reporting the get_host_byte_counter callback is needed */
+ if (!sof_ops(sdev) || !sof_ops(sdev)->get_host_byte_counter)
+ support_info = false;
+
for_each_pcm_streams(stream) {
pipeline_list = &spcm->stream[stream].pipeline_list;
@@ -714,33 +786,41 @@ static int sof_ipc4_pcm_setup(struct snd_sof_dev *sdev, struct snd_sof_pcm *spcm
return -ENOMEM;
}
+ stream_priv = kzalloc(sizeof(*stream_priv), GFP_KERNEL);
+ if (!stream_priv) {
+ sof_ipc4_pcm_free(sdev, spcm);
+ return -ENOMEM;
+ }
+
+ spcm->stream[stream].private = stream_priv;
+
if (!support_info)
continue;
- stream_info = kzalloc(sizeof(*stream_info), GFP_KERNEL);
- if (!stream_info) {
+ time_info = kzalloc(sizeof(*time_info), GFP_KERNEL);
+ if (!time_info) {
sof_ipc4_pcm_free(sdev, spcm);
return -ENOMEM;
}
- spcm->stream[stream].private = stream_info;
+ stream_priv->time_info = time_info;
}
return 0;
}
-static void sof_ipc4_build_time_info(struct snd_sof_dev *sdev, struct snd_sof_pcm_stream *spcm)
+static void sof_ipc4_build_time_info(struct snd_sof_dev *sdev, struct snd_sof_pcm_stream *sps)
{
struct sof_ipc4_copier *host_copier = NULL;
struct sof_ipc4_copier *dai_copier = NULL;
struct sof_ipc4_llp_reading_slot llp_slot;
- struct sof_ipc4_timestamp_info *info;
+ struct sof_ipc4_timestamp_info *time_info;
struct snd_soc_dapm_widget *widget;
struct snd_sof_dai *dai;
int i;
/* find host & dai to locate info in memory window */
- for_each_dapm_widgets(spcm->list, i, widget) {
+ for_each_dapm_widgets(sps->list, i, widget) {
struct snd_sof_widget *swidget = widget->dobj.private;
if (!swidget)
@@ -760,44 +840,44 @@ static void sof_ipc4_build_time_info(struct snd_sof_dev *sdev, struct snd_sof_pc
return;
}
- info = spcm->private;
- info->host_copier = host_copier;
- info->dai_copier = dai_copier;
- info->llp_offset = offsetof(struct sof_ipc4_fw_registers, llp_gpdma_reading_slots) +
- sdev->fw_info_box.offset;
+ time_info = sof_ipc4_sps_to_time_info(sps);
+ time_info->host_copier = host_copier;
+ time_info->dai_copier = dai_copier;
+ time_info->llp_offset = offsetof(struct sof_ipc4_fw_registers,
+ llp_gpdma_reading_slots) + sdev->fw_info_box.offset;
/* find llp slot used by current dai */
for (i = 0; i < SOF_IPC4_MAX_LLP_GPDMA_READING_SLOTS; i++) {
- sof_mailbox_read(sdev, info->llp_offset, &llp_slot, sizeof(llp_slot));
+ sof_mailbox_read(sdev, time_info->llp_offset, &llp_slot, sizeof(llp_slot));
if (llp_slot.node_id == dai_copier->data.gtw_cfg.node_id)
break;
- info->llp_offset += sizeof(llp_slot);
+ time_info->llp_offset += sizeof(llp_slot);
}
if (i < SOF_IPC4_MAX_LLP_GPDMA_READING_SLOTS)
return;
/* if no llp gpdma slot is used, check aggregated sdw slot */
- info->llp_offset = offsetof(struct sof_ipc4_fw_registers, llp_sndw_reading_slots) +
- sdev->fw_info_box.offset;
+ time_info->llp_offset = offsetof(struct sof_ipc4_fw_registers,
+ llp_sndw_reading_slots) + sdev->fw_info_box.offset;
for (i = 0; i < SOF_IPC4_MAX_LLP_SNDW_READING_SLOTS; i++) {
- sof_mailbox_read(sdev, info->llp_offset, &llp_slot, sizeof(llp_slot));
+ sof_mailbox_read(sdev, time_info->llp_offset, &llp_slot, sizeof(llp_slot));
if (llp_slot.node_id == dai_copier->data.gtw_cfg.node_id)
break;
- info->llp_offset += sizeof(llp_slot);
+ time_info->llp_offset += sizeof(llp_slot);
}
if (i < SOF_IPC4_MAX_LLP_SNDW_READING_SLOTS)
return;
/* check EVAD slot */
- info->llp_offset = offsetof(struct sof_ipc4_fw_registers, llp_evad_reading_slot) +
- sdev->fw_info_box.offset;
- sof_mailbox_read(sdev, info->llp_offset, &llp_slot, sizeof(llp_slot));
+ time_info->llp_offset = offsetof(struct sof_ipc4_fw_registers,
+ llp_evad_reading_slot) + sdev->fw_info_box.offset;
+ sof_mailbox_read(sdev, time_info->llp_offset, &llp_slot, sizeof(llp_slot));
if (llp_slot.node_id != dai_copier->data.gtw_cfg.node_id)
- info->llp_offset = 0;
+ time_info->llp_offset = 0;
}
static int sof_ipc4_pcm_hw_params(struct snd_soc_component *component,
@@ -814,7 +894,7 @@ static int sof_ipc4_pcm_hw_params(struct snd_soc_component *component,
if (!spcm)
return -EINVAL;
- time_info = spcm->stream[substream->stream].private;
+ time_info = sof_ipc4_sps_to_time_info(&spcm->stream[substream->stream]);
/* delay calculation is not supported by current fw_reg ABI */
if (!time_info)
return 0;
@@ -829,13 +909,12 @@ static int sof_ipc4_pcm_hw_params(struct snd_soc_component *component,
static int sof_ipc4_get_stream_start_offset(struct snd_sof_dev *sdev,
struct snd_pcm_substream *substream,
- struct snd_sof_pcm_stream *stream,
+ struct snd_sof_pcm_stream *sps,
struct sof_ipc4_timestamp_info *time_info)
{
struct sof_ipc4_copier *host_copier = time_info->host_copier;
struct sof_ipc4_copier *dai_copier = time_info->dai_copier;
struct sof_ipc4_pipeline_registers ppl_reg;
- u64 stream_start_position;
u32 dai_sample_size;
u32 ch, node_index;
u32 offset;
@@ -852,38 +931,51 @@ static int sof_ipc4_get_stream_start_offset(struct snd_sof_dev *sdev,
if (ppl_reg.stream_start_offset == SOF_IPC4_INVALID_STREAM_POSITION)
return -EINVAL;
- stream_start_position = ppl_reg.stream_start_offset;
ch = dai_copier->data.out_format.fmt_cfg;
ch = SOF_IPC4_AUDIO_FORMAT_CFG_CHANNELS_COUNT(ch);
dai_sample_size = (dai_copier->data.out_format.bit_depth >> 3) * ch;
- /* convert offset to sample count */
- do_div(stream_start_position, dai_sample_size);
- time_info->stream_start_offset = stream_start_position;
+
+ /* convert offsets to frame count */
+ time_info->stream_start_offset = ppl_reg.stream_start_offset;
+ do_div(time_info->stream_start_offset, dai_sample_size);
+ time_info->stream_end_offset = ppl_reg.stream_end_offset;
+ do_div(time_info->stream_end_offset, dai_sample_size);
+
+ /*
+ * Calculate the wrap boundary need to be used for delay calculation
+ * The host counter is in bytes, it will wrap earlier than the frames
+ * based link counter.
+ */
+ time_info->boundary = div64_u64(~((u64)0),
+ frames_to_bytes(substream->runtime, 1));
+ /* Initialize the delay value to 0 (no delay) */
+ time_info->delay = 0;
return 0;
}
-static snd_pcm_sframes_t sof_ipc4_pcm_delay(struct snd_soc_component *component,
- struct snd_pcm_substream *substream)
+static int sof_ipc4_pcm_pointer(struct snd_soc_component *component,
+ struct snd_pcm_substream *substream,
+ snd_pcm_uframes_t *pointer)
{
struct snd_sof_dev *sdev = snd_soc_component_get_drvdata(component);
struct snd_soc_pcm_runtime *rtd = snd_soc_substream_to_rtd(substream);
struct sof_ipc4_timestamp_info *time_info;
struct sof_ipc4_llp_reading_slot llp;
- snd_pcm_uframes_t head_ptr, tail_ptr;
- struct snd_sof_pcm_stream *stream;
+ snd_pcm_uframes_t head_cnt, tail_cnt;
+ struct snd_sof_pcm_stream *sps;
+ u64 dai_cnt, host_cnt, host_ptr;
struct snd_sof_pcm *spcm;
- u64 tmp_ptr;
int ret;
spcm = snd_sof_find_spcm_dai(component, rtd);
if (!spcm)
- return 0;
+ return -EOPNOTSUPP;
- stream = &spcm->stream[substream->stream];
- time_info = stream->private;
+ sps = &spcm->stream[substream->stream];
+ time_info = sof_ipc4_sps_to_time_info(sps);
if (!time_info)
- return 0;
+ return -EOPNOTSUPP;
/*
* stream_start_offset is updated to memory window by FW based on
@@ -891,47 +983,116 @@ static snd_pcm_sframes_t sof_ipc4_pcm_delay(struct snd_soc_component *component,
* the statistics is complete. And it will not change after the first initiailization.
*/
if (time_info->stream_start_offset == SOF_IPC4_INVALID_STREAM_POSITION) {
- ret = sof_ipc4_get_stream_start_offset(sdev, substream, stream, time_info);
+ ret = sof_ipc4_get_stream_start_offset(sdev, substream, sps, time_info);
if (ret < 0)
- return 0;
+ return -EOPNOTSUPP;
}
+ /* For delay calculation we need the host counter */
+ host_cnt = snd_sof_pcm_get_host_byte_counter(sdev, component, substream);
+ host_ptr = host_cnt;
+
+ /* convert the host_cnt to frames */
+ host_cnt = div64_u64(host_cnt, frames_to_bytes(substream->runtime, 1));
+
/*
- * HDaudio links don't support the LLP counter reported by firmware
- * the link position is read directly from hardware registers.
+ * If the LLP counter is not reported by firmware in the SRAM window
+ * then read the dai (link) counter via host accessible means if
+ * available.
*/
if (!time_info->llp_offset) {
- tmp_ptr = snd_sof_pcm_get_stream_position(sdev, component, substream);
- if (!tmp_ptr)
- return 0;
+ dai_cnt = snd_sof_pcm_get_dai_frame_counter(sdev, component, substream);
+ if (!dai_cnt)
+ return -EOPNOTSUPP;
} else {
sof_mailbox_read(sdev, time_info->llp_offset, &llp, sizeof(llp));
- tmp_ptr = ((u64)llp.reading.llp_u << 32) | llp.reading.llp_l;
+ dai_cnt = ((u64)llp.reading.llp_u << 32) | llp.reading.llp_l;
}
+ dai_cnt += time_info->stream_end_offset;
- /* In two cases dai dma position is not accurate
+ /* In two cases dai dma counter is not accurate
* (1) dai pipeline is started before host pipeline
- * (2) multiple streams mixed into one. Each stream has the same dai dma position
+ * (2) multiple streams mixed into one. Each stream has the same dai dma
+ * counter
*
- * Firmware calculates correct stream_start_offset for all cases including above two.
- * Driver subtracts stream_start_offset from dai dma position to get accurate one
+ * Firmware calculates correct stream_start_offset for all cases
+ * including above two.
+ * Driver subtracts stream_start_offset from dai dma counter to get
+ * accurate one
*/
- tmp_ptr -= time_info->stream_start_offset;
- /* Calculate the delay taking into account that both pointer can wrap */
- div64_u64_rem(tmp_ptr, substream->runtime->boundary, &tmp_ptr);
+ /*
+ * On stream start the dai counter might not yet have reached the
+ * stream_start_offset value which means that no frames have left the
+ * DSP yet from the audio stream (on playback, capture streams have
+ * offset of 0 as we start capturing right away).
+ * In this case we need to adjust the distance between the counters by
+ * increasing the host counter by (offset - dai_counter).
+ * Otherwise the dai_counter needs to be adjusted to reflect the number
+ * of valid frames passed on the DAI side.
+ *
+ * The delay is the difference between the counters on the two
+ * sides of the DSP.
+ */
+ if (dai_cnt < time_info->stream_start_offset) {
+ host_cnt += time_info->stream_start_offset - dai_cnt;
+ dai_cnt = 0;
+ } else {
+ dai_cnt -= time_info->stream_start_offset;
+ }
+
+ /* Wrap the dai counter at the boundary where the host counter wraps */
+ div64_u64_rem(dai_cnt, time_info->boundary, &dai_cnt);
+
if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) {
- head_ptr = substream->runtime->status->hw_ptr;
- tail_ptr = tmp_ptr;
+ head_cnt = host_cnt;
+ tail_cnt = dai_cnt;
} else {
- head_ptr = tmp_ptr;
- tail_ptr = substream->runtime->status->hw_ptr;
+ head_cnt = dai_cnt;
+ tail_cnt = host_cnt;
+ }
+
+ if (head_cnt < tail_cnt) {
+ time_info->delay = time_info->boundary - tail_cnt + head_cnt;
+ goto out;
}
- if (head_ptr < tail_ptr)
- return substream->runtime->boundary - tail_ptr + head_ptr;
+ time_info->delay = head_cnt - tail_cnt;
+
+out:
+ /*
+ * Convert the host byte counter to PCM pointer which wraps in buffer
+ * and it is in frames
+ */
+ div64_u64_rem(host_ptr, snd_pcm_lib_buffer_bytes(substream), &host_ptr);
+ *pointer = bytes_to_frames(substream->runtime, host_ptr);
+
+ return 0;
+}
+
+static snd_pcm_sframes_t sof_ipc4_pcm_delay(struct snd_soc_component *component,
+ struct snd_pcm_substream *substream)
+{
+ struct snd_soc_pcm_runtime *rtd = snd_soc_substream_to_rtd(substream);
+ struct sof_ipc4_timestamp_info *time_info;
+ struct snd_sof_pcm *spcm;
+
+ spcm = snd_sof_find_spcm_dai(component, rtd);
+ if (!spcm)
+ return 0;
+
+ time_info = sof_ipc4_sps_to_time_info(&spcm->stream[substream->stream]);
+ /*
+ * Report the stored delay value calculated in the pointer callback.
+ * In the unlikely event that the calculation was skipped/aborted, the
+ * default 0 delay returned.
+ */
+ if (time_info)
+ return time_info->delay;
+
+ /* No delay information available, report 0 as delay */
+ return 0;
- return head_ptr - tail_ptr;
}
const struct sof_ipc_pcm_ops ipc4_pcm_ops = {
@@ -941,6 +1102,7 @@ const struct sof_ipc_pcm_ops ipc4_pcm_ops = {
.dai_link_fixup = sof_ipc4_pcm_dai_link_fixup,
.pcm_setup = sof_ipc4_pcm_setup,
.pcm_free = sof_ipc4_pcm_free,
+ .pointer = sof_ipc4_pcm_pointer,
.delay = sof_ipc4_pcm_delay,
.ipc_first_on_start = true,
.platform_stop_during_hw_free = true,
diff --git a/sound/soc/sof/ipc4-priv.h b/sound/soc/sof/ipc4-priv.h
index f3b908b093f956..afed618a15f061 100644
--- a/sound/soc/sof/ipc4-priv.h
+++ b/sound/soc/sof/ipc4-priv.h
@@ -92,20 +92,6 @@ struct sof_ipc4_fw_data {
struct mutex pipeline_state_mutex; /* protect pipeline triggers, ref counts and states */
};
-/**
- * struct sof_ipc4_timestamp_info - IPC4 timestamp info
- * @host_copier: the host copier of the pcm stream
- * @dai_copier: the dai copier of the pcm stream
- * @stream_start_offset: reported by fw in memory window
- * @llp_offset: llp offset in memory window
- */
-struct sof_ipc4_timestamp_info {
- struct sof_ipc4_copier *host_copier;
- struct sof_ipc4_copier *dai_copier;
- u64 stream_start_offset;
- u32 llp_offset;
-};
-
extern const struct sof_ipc_fw_loader_ops ipc4_loader_ops;
extern const struct sof_ipc_tplg_ops ipc4_tplg_ops;
extern const struct sof_ipc_tplg_control_ops tplg_ipc4_control_ops;
diff --git a/sound/soc/sof/ipc4-topology.c b/sound/soc/sof/ipc4-topology.c
index da4a83afb87a8a..5cca0584212609 100644
--- a/sound/soc/sof/ipc4-topology.c
+++ b/sound/soc/sof/ipc4-topology.c
@@ -412,8 +412,9 @@ static int sof_ipc4_widget_setup_pcm(struct snd_sof_widget *swidget)
struct sof_ipc4_available_audio_format *available_fmt;
struct snd_soc_component *scomp = swidget->scomp;
struct sof_ipc4_copier *ipc4_copier;
+ struct snd_sof_pcm *spcm;
int node_type = 0;
- int ret;
+ int ret, dir;
ipc4_copier = kzalloc(sizeof(*ipc4_copier), GFP_KERNEL);
if (!ipc4_copier)
@@ -447,6 +448,25 @@ static int sof_ipc4_widget_setup_pcm(struct snd_sof_widget *swidget)
}
dev_dbg(scomp->dev, "host copier '%s' node_type %u\n", swidget->widget->name, node_type);
+ spcm = snd_sof_find_spcm_comp(scomp, swidget->comp_id, &dir);
+ if (!spcm)
+ goto skip_gtw_cfg;
+
+ if (dir == SNDRV_PCM_STREAM_PLAYBACK) {
+ struct snd_sof_pcm_stream *sps = &spcm->stream[dir];
+
+ sof_update_ipc_object(scomp, &sps->dsp_max_burst_size_in_ms,
+ SOF_COPIER_DEEP_BUFFER_TOKENS,
+ swidget->tuples,
+ swidget->num_tuples, sizeof(u32), 1);
+ /* Set default DMA buffer size if it is not specified in topology */
+ if (!sps->dsp_max_burst_size_in_ms)
+ sps->dsp_max_burst_size_in_ms = SOF_IPC4_MIN_DMA_BUFFER_SIZE;
+ } else {
+ /* Capture data is copied from DSP to host in 1ms bursts */
+ spcm->stream[dir].dsp_max_burst_size_in_ms = 1;
+ }
+
skip_gtw_cfg:
ipc4_copier->gtw_attr = kzalloc(sizeof(*ipc4_copier->gtw_attr), GFP_KERNEL);
if (!ipc4_copier->gtw_attr) {
@@ -1356,6 +1376,7 @@ static int snd_sof_get_nhlt_endpoint_data(struct snd_sof_dev *sdev, struct snd_s
int sample_rate, channel_count;
int bit_depth, ret;
u32 nhlt_type;
+ int dev_type = 0;
/* convert to NHLT type */
switch (linktype) {
@@ -1371,18 +1392,30 @@ static int snd_sof_get_nhlt_endpoint_data(struct snd_sof_dev *sdev, struct snd_s
&bit_depth);
if (ret < 0)
return ret;
+
+ /*
+ * We need to know the type of the external device attached to a SSP
+ * port to retrieve the blob from NHLT. However, device type is not
+ * specified in topology.
+ * Query the type for the port and then pass that information back
+ * to the blob lookup function.
+ */
+ dev_type = intel_nhlt_ssp_device_type(sdev->dev, ipc4_data->nhlt,
+ dai_index);
+ if (dev_type < 0)
+ return dev_type;
break;
default:
return 0;
}
- dev_dbg(sdev->dev, "dai index %d nhlt type %d direction %d\n",
- dai_index, nhlt_type, dir);
+ dev_dbg(sdev->dev, "dai index %d nhlt type %d direction %d dev type %d\n",
+ dai_index, nhlt_type, dir, dev_type);
/* find NHLT blob with matching params */
cfg = intel_nhlt_get_endpoint_blob(sdev->dev, ipc4_data->nhlt, dai_index, nhlt_type,
bit_depth, bit_depth, channel_count, sample_rate,
- dir, 0);
+ dir, dev_type);
if (!cfg) {
dev_err(sdev->dev,
diff --git a/sound/soc/sof/ops.h b/sound/soc/sof/ops.h
index 6cf21e829e0727..3cd748e1346091 100644
--- a/sound/soc/sof/ops.h
+++ b/sound/soc/sof/ops.h
@@ -523,12 +523,26 @@ static inline int snd_sof_pcm_platform_ack(struct snd_sof_dev *sdev,
return 0;
}
-static inline u64 snd_sof_pcm_get_stream_position(struct snd_sof_dev *sdev,
- struct snd_soc_component *component,
- struct snd_pcm_substream *substream)
+static inline u64
+snd_sof_pcm_get_dai_frame_counter(struct snd_sof_dev *sdev,
+ struct snd_soc_component *component,
+ struct snd_pcm_substream *substream)
{
- if (sof_ops(sdev) && sof_ops(sdev)->get_stream_position)
- return sof_ops(sdev)->get_stream_position(sdev, component, substream);
+ if (sof_ops(sdev) && sof_ops(sdev)->get_dai_frame_counter)
+ return sof_ops(sdev)->get_dai_frame_counter(sdev, component,
+ substream);
+
+ return 0;
+}
+
+static inline u64
+snd_sof_pcm_get_host_byte_counter(struct snd_sof_dev *sdev,
+ struct snd_soc_component *component,
+ struct snd_pcm_substream *substream)
+{
+ if (sof_ops(sdev) && sof_ops(sdev)->get_host_byte_counter)
+ return sof_ops(sdev)->get_host_byte_counter(sdev, component,
+ substream);
return 0;
}
diff --git a/sound/soc/sof/pcm.c b/sound/soc/sof/pcm.c
index 33d576b1764783..8804e00e7251b9 100644
--- a/sound/soc/sof/pcm.c
+++ b/sound/soc/sof/pcm.c
@@ -325,14 +325,13 @@ static int sof_pcm_trigger(struct snd_soc_component *component,
ipc_first = true;
break;
case SNDRV_PCM_TRIGGER_SUSPEND:
- if (sdev->system_suspend_target == SOF_SUSPEND_S0IX &&
+ /*
+ * If DSP D0I3 is allowed during S0iX, set the suspend_ignored flag for
+ * D0I3-compatible streams to keep the firmware pipeline running
+ */
+ if (pcm_ops && pcm_ops->d0i3_supported_in_s0ix &&
+ sdev->system_suspend_target == SOF_SUSPEND_S0IX &&
spcm->stream[substream->stream].d0i3_compatible) {
- /*
- * trap the event, not sending trigger stop to
- * prevent the FW pipelines from being stopped,
- * and mark the flag to ignore the upcoming DAPM
- * PM events.
- */
spcm->stream[substream->stream].suspend_ignored = true;
return 0;
}
@@ -388,13 +387,21 @@ static snd_pcm_uframes_t sof_pcm_pointer(struct snd_soc_component *component,
{
struct snd_soc_pcm_runtime *rtd = snd_soc_substream_to_rtd(substream);
struct snd_sof_dev *sdev = snd_soc_component_get_drvdata(component);
+ const struct sof_ipc_pcm_ops *pcm_ops = sof_ipc_get_ops(sdev, pcm);
struct snd_sof_pcm *spcm;
snd_pcm_uframes_t host, dai;
+ int ret = -EOPNOTSUPP;
/* nothing to do for BE */
if (rtd->dai_link->no_pcm)
return 0;
+ if (pcm_ops && pcm_ops->pointer)
+ ret = pcm_ops->pointer(component, substream, &host);
+
+ if (ret != -EOPNOTSUPP)
+ return ret ? ret : host;
+
/* use dsp ops pointer callback directly if set */
if (sof_ops(sdev)->pcm_pointer)
return sof_ops(sdev)->pcm_pointer(sdev, substream);
diff --git a/sound/soc/sof/sof-audio.h b/sound/soc/sof/sof-audio.h
index 9ea2ac5adac79e..499b6084b52637 100644
--- a/sound/soc/sof/sof-audio.h
+++ b/sound/soc/sof/sof-audio.h
@@ -103,7 +103,10 @@ struct snd_sof_dai_config_data {
* additional memory in the SOF PCM stream structure
* @pcm_free: Function pointer for PCM free that can be used for freeing any
* additional memory in the SOF PCM stream structure
- * @delay: Function pointer for pcm delay calculation
+ * @pointer: Function pointer for pcm pointer
+ * Note: the @pointer callback may return -EOPNOTSUPP which should be
+ * handled in a same way as if the callback is not provided
+ * @delay: Function pointer for pcm delay reporting
* @reset_hw_params_during_stop: Flag indicating whether the hw_params should be reset during the
* STOP pcm trigger
* @ipc_first_on_start: Send IPC before invoking platform trigger during
@@ -113,6 +116,7 @@ struct snd_sof_dai_config_data {
* triggers. The FW keeps the host DMA running in this case and
* therefore the host must do the same and should stop the DMA during
* hw_free.
+ * @d0i3_supported_in_s0ix: Allow DSP D0I3 during S0iX
*/
struct sof_ipc_pcm_ops {
int (*hw_params)(struct snd_soc_component *component, struct snd_pcm_substream *substream,
@@ -124,11 +128,15 @@ struct sof_ipc_pcm_ops {
int (*dai_link_fixup)(struct snd_soc_pcm_runtime *rtd, struct snd_pcm_hw_params *params);
int (*pcm_setup)(struct snd_sof_dev *sdev, struct snd_sof_pcm *spcm);
void (*pcm_free)(struct snd_sof_dev *sdev, struct snd_sof_pcm *spcm);
+ int (*pointer)(struct snd_soc_component *component,
+ struct snd_pcm_substream *substream,
+ snd_pcm_uframes_t *pointer);
snd_pcm_sframes_t (*delay)(struct snd_soc_component *component,
struct snd_pcm_substream *substream);
bool reset_hw_params_during_stop;
bool ipc_first_on_start;
bool platform_stop_during_hw_free;
+ bool d0i3_supported_in_s0ix;
};
/**
@@ -322,6 +330,7 @@ struct snd_sof_pcm_stream {
struct work_struct period_elapsed_work;
struct snd_soc_dapm_widget_list *list; /* list of connected DAPM widgets */
bool d0i3_compatible; /* DSP can be in D0I3 when this pcm is opened */
+ unsigned int dsp_max_burst_size_in_ms; /* The maximum size of the host DMA burst in ms */
/*
* flag to indicate that the DSP pipelines should be kept
* active or not while suspending the stream
diff --git a/sound/soc/sof/sof-priv.h b/sound/soc/sof/sof-priv.h
index d453a4ce3b219d..d3c436f826046b 100644
--- a/sound/soc/sof/sof-priv.h
+++ b/sound/soc/sof/sof-priv.h
@@ -262,13 +262,25 @@ struct snd_sof_dsp_ops {
int (*pcm_ack)(struct snd_sof_dev *sdev, struct snd_pcm_substream *substream); /* optional */
/*
- * optional callback to retrieve the link DMA position for the substream
- * when the position is not reported in the shared SRAM windows but
- * instead from a host-accessible hardware counter.
+ * optional callback to retrieve the number of frames left/arrived from/to
+ * the DSP on the DAI side (link/codec/DMIC/etc).
+ *
+ * The callback is used when the firmware does not provide this information
+ * via the shared SRAM window and it can be retrieved by host.
*/
- u64 (*get_stream_position)(struct snd_sof_dev *sdev,
- struct snd_soc_component *component,
- struct snd_pcm_substream *substream); /* optional */
+ u64 (*get_dai_frame_counter)(struct snd_sof_dev *sdev,
+ struct snd_soc_component *component,
+ struct snd_pcm_substream *substream); /* optional */
+
+ /*
+ * Optional callback to retrieve the number of bytes left/arrived from/to
+ * the DSP on the host side (bytes between host ALSA buffer and DSP).
+ *
+ * The callback is needed for ALSA delay reporting.
+ */
+ u64 (*get_host_byte_counter)(struct snd_sof_dev *sdev,
+ struct snd_soc_component *component,
+ struct snd_pcm_substream *substream); /* optional */
/* host read DSP stream data */
int (*ipc_msg_data)(struct snd_sof_dev *sdev,
diff --git a/sound/soc/tegra/tegra186_dspk.c b/sound/soc/tegra/tegra186_dspk.c
index aa37c4ab0adbd9..21cd41fec7a9cb 100644
--- a/sound/soc/tegra/tegra186_dspk.c
+++ b/sound/soc/tegra/tegra186_dspk.c
@@ -1,8 +1,7 @@
// SPDX-License-Identifier: GPL-2.0-only
+// SPDX-FileCopyrightText: Copyright (c) 2020-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// tegra186_dspk.c - Tegra186 DSPK driver
-//
-// Copyright (c) 2020 NVIDIA CORPORATION. All rights reserved.
#include <linux/clk.h>
#include <linux/device.h>
@@ -241,14 +240,14 @@ static int tegra186_dspk_hw_params(struct snd_pcm_substream *substream,
return -EINVAL;
}
- cif_conf.client_bits = TEGRA_ACIF_BITS_24;
-
switch (params_format(params)) {
case SNDRV_PCM_FORMAT_S16_LE:
cif_conf.audio_bits = TEGRA_ACIF_BITS_16;
+ cif_conf.client_bits = TEGRA_ACIF_BITS_16;
break;
case SNDRV_PCM_FORMAT_S32_LE:
cif_conf.audio_bits = TEGRA_ACIF_BITS_32;
+ cif_conf.client_bits = TEGRA_ACIF_BITS_24;
break;
default:
dev_err(dev, "unsupported format!\n");
diff --git a/sound/soc/ti/davinci-mcasp.c b/sound/soc/ti/davinci-mcasp.c
index b892d66f78470a..1e760c3155213d 100644
--- a/sound/soc/ti/davinci-mcasp.c
+++ b/sound/soc/ti/davinci-mcasp.c
@@ -2417,12 +2417,6 @@ static int davinci_mcasp_probe(struct platform_device *pdev)
mcasp_reparent_fck(pdev);
- ret = devm_snd_soc_register_component(&pdev->dev, &davinci_mcasp_component,
- &davinci_mcasp_dai[mcasp->op_mode], 1);
-
- if (ret != 0)
- goto err;
-
ret = davinci_mcasp_get_dma_type(mcasp);
switch (ret) {
case PCM_EDMA:
@@ -2449,6 +2443,12 @@ static int davinci_mcasp_probe(struct platform_device *pdev)
goto err;
}
+ ret = devm_snd_soc_register_component(&pdev->dev, &davinci_mcasp_component,
+ &davinci_mcasp_dai[mcasp->op_mode], 1);
+
+ if (ret != 0)
+ goto err;
+
no_audio:
ret = davinci_mcasp_init_gpiochip(mcasp);
if (ret) {
diff --git a/sound/usb/line6/driver.c b/sound/usb/line6/driver.c
index b67617b68e509d..f4437015d43a75 100644
--- a/sound/usb/line6/driver.c
+++ b/sound/usb/line6/driver.c
@@ -202,7 +202,7 @@ int line6_send_raw_message_async(struct usb_line6 *line6, const char *buffer,
struct urb *urb;
/* create message: */
- msg = kmalloc(sizeof(struct message), GFP_ATOMIC);
+ msg = kzalloc(sizeof(struct message), GFP_ATOMIC);
if (msg == NULL)
return -ENOMEM;
@@ -688,7 +688,7 @@ static int line6_init_cap_control(struct usb_line6 *line6)
int ret;
/* initialize USB buffers: */
- line6->buffer_listen = kmalloc(LINE6_BUFSIZE_LISTEN, GFP_KERNEL);
+ line6->buffer_listen = kzalloc(LINE6_BUFSIZE_LISTEN, GFP_KERNEL);
if (!line6->buffer_listen)
return -ENOMEM;
@@ -697,7 +697,7 @@ static int line6_init_cap_control(struct usb_line6 *line6)
return -ENOMEM;
if (line6->properties->capabilities & LINE6_CAP_CONTROL_MIDI) {
- line6->buffer_message = kmalloc(LINE6_MIDI_MESSAGE_MAXLEN, GFP_KERNEL);
+ line6->buffer_message = kzalloc(LINE6_MIDI_MESSAGE_MAXLEN, GFP_KERNEL);
if (!line6->buffer_message)
return -ENOMEM;
diff --git a/tools/Makefile b/tools/Makefile
index 37e9f680483264..276f5d0d53a447 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -11,7 +11,6 @@ help:
@echo ''
@echo ' acpi - ACPI tools'
@echo ' bpf - misc BPF tools'
- @echo ' cgroup - cgroup tools'
@echo ' counter - counter tools'
@echo ' cpupower - a tool for all things x86 CPU power'
@echo ' debugging - tools for debugging'
@@ -69,7 +68,7 @@ acpi: FORCE
cpupower: FORCE
$(call descend,power/$@)
-cgroup counter firewire hv guest bootconfig spi usb virtio mm bpf iio gpio objtool leds wmi pci firmware debugging tracing: FORCE
+counter firewire hv guest bootconfig spi usb virtio mm bpf iio gpio objtool leds wmi pci firmware debugging tracing: FORCE
$(call descend,$@)
bpf/%: FORCE
@@ -116,7 +115,7 @@ freefall: FORCE
kvm_stat: FORCE
$(call descend,kvm/$@)
-all: acpi cgroup counter cpupower gpio hv firewire \
+all: acpi counter cpupower gpio hv firewire \
perf selftests bootconfig spi turbostat usb \
virtio mm bpf x86_energy_perf_policy \
tmon freefall iio objtool kvm_stat wmi \
@@ -128,7 +127,7 @@ acpi_install:
cpupower_install:
$(call descend,power/$(@:_install=),install)
-cgroup_install counter_install firewire_install gpio_install hv_install iio_install perf_install bootconfig_install spi_install usb_install virtio_install mm_install bpf_install objtool_install wmi_install pci_install debugging_install tracing_install:
+counter_install firewire_install gpio_install hv_install iio_install perf_install bootconfig_install spi_install usb_install virtio_install mm_install bpf_install objtool_install wmi_install pci_install debugging_install tracing_install:
$(call descend,$(@:_install=),install)
selftests_install:
@@ -155,7 +154,7 @@ freefall_install:
kvm_stat_install:
$(call descend,kvm/$(@:_install=),install)
-install: acpi_install cgroup_install counter_install cpupower_install gpio_install \
+install: acpi_install counter_install cpupower_install gpio_install \
hv_install firewire_install iio_install \
perf_install selftests_install turbostat_install usb_install \
virtio_install mm_install bpf_install x86_energy_perf_policy_install \
@@ -169,7 +168,7 @@ acpi_clean:
cpupower_clean:
$(call descend,power/cpupower,clean)
-cgroup_clean counter_clean hv_clean firewire_clean bootconfig_clean spi_clean usb_clean virtio_clean mm_clean wmi_clean bpf_clean iio_clean gpio_clean objtool_clean leds_clean pci_clean firmware_clean debugging_clean tracing_clean:
+counter_clean hv_clean firewire_clean bootconfig_clean spi_clean usb_clean virtio_clean mm_clean wmi_clean bpf_clean iio_clean gpio_clean objtool_clean leds_clean pci_clean firmware_clean debugging_clean tracing_clean:
$(call descend,$(@:_clean=),clean)
libapi_clean:
@@ -209,7 +208,7 @@ freefall_clean:
build_clean:
$(call descend,build,clean)
-clean: acpi_clean cgroup_clean counter_clean cpupower_clean hv_clean firewire_clean \
+clean: acpi_clean counter_clean cpupower_clean hv_clean firewire_clean \
perf_clean selftests_clean turbostat_clean bootconfig_clean spi_clean usb_clean virtio_clean \
mm_clean bpf_clean iio_clean x86_energy_perf_policy_clean tmon_clean \
freefall_clean build_clean libbpf_clean libsubcmd_clean \
diff --git a/tools/arch/arm64/include/asm/cputype.h b/tools/arch/arm64/include/asm/cputype.h
index 7c7493cb571f97..52f076afeb9600 100644
--- a/tools/arch/arm64/include/asm/cputype.h
+++ b/tools/arch/arm64/include/asm/cputype.h
@@ -61,6 +61,7 @@
#define ARM_CPU_IMP_HISI 0x48
#define ARM_CPU_IMP_APPLE 0x61
#define ARM_CPU_IMP_AMPERE 0xC0
+#define ARM_CPU_IMP_MICROSOFT 0x6D
#define ARM_CPU_PART_AEM_V8 0xD0F
#define ARM_CPU_PART_FOUNDATION 0xD00
@@ -135,6 +136,8 @@
#define AMPERE_CPU_PART_AMPERE1 0xAC3
+#define MICROSOFT_CPU_PART_AZURE_COBALT_100 0xD49 /* Based on r0p0 of ARM Neoverse N2 */
+
#define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53)
#define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57)
#define MIDR_CORTEX_A72 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A72)
@@ -193,6 +196,7 @@
#define MIDR_APPLE_M2_BLIZZARD_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD_MAX)
#define MIDR_APPLE_M2_AVALANCHE_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE_MAX)
#define MIDR_AMPERE1 MIDR_CPU_MODEL(ARM_CPU_IMP_AMPERE, AMPERE_CPU_PART_AMPERE1)
+#define MIDR_MICROSOFT_AZURE_COBALT_100 MIDR_CPU_MODEL(ARM_CPU_IMP_MICROSOFT, MICROSOFT_CPU_PART_AZURE_COBALT_100)
/* Fujitsu Erratum 010001 affects A64FX 1.0 and 1.1, (v0r0 and v1r0) */
#define MIDR_FUJITSU_ERRATUM_010001 MIDR_FUJITSU_A64FX
diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h
index 89d2fc872d9f5e..964df31da9751c 100644
--- a/tools/arch/arm64/include/uapi/asm/kvm.h
+++ b/tools/arch/arm64/include/uapi/asm/kvm.h
@@ -37,9 +37,7 @@
#include <asm/ptrace.h>
#include <asm/sve_context.h>
-#define __KVM_HAVE_GUEST_DEBUG
#define __KVM_HAVE_IRQ_LINE
-#define __KVM_HAVE_READONLY_MEM
#define __KVM_HAVE_VCPU_EVENTS
#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
@@ -76,11 +74,11 @@ struct kvm_regs {
/* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */
#define KVM_ARM_DEVICE_TYPE_SHIFT 0
-#define KVM_ARM_DEVICE_TYPE_MASK GENMASK(KVM_ARM_DEVICE_TYPE_SHIFT + 15, \
- KVM_ARM_DEVICE_TYPE_SHIFT)
+#define KVM_ARM_DEVICE_TYPE_MASK __GENMASK(KVM_ARM_DEVICE_TYPE_SHIFT + 15, \
+ KVM_ARM_DEVICE_TYPE_SHIFT)
#define KVM_ARM_DEVICE_ID_SHIFT 16
-#define KVM_ARM_DEVICE_ID_MASK GENMASK(KVM_ARM_DEVICE_ID_SHIFT + 15, \
- KVM_ARM_DEVICE_ID_SHIFT)
+#define KVM_ARM_DEVICE_ID_MASK __GENMASK(KVM_ARM_DEVICE_ID_SHIFT + 15, \
+ KVM_ARM_DEVICE_ID_SHIFT)
/* Supported device IDs */
#define KVM_ARM_DEVICE_VGIC_V2 0
@@ -162,6 +160,11 @@ struct kvm_sync_regs {
__u64 device_irq_level;
};
+/* Bits for run->s.regs.device_irq_level */
+#define KVM_ARM_DEV_EL1_VTIMER (1 << 0)
+#define KVM_ARM_DEV_EL1_PTIMER (1 << 1)
+#define KVM_ARM_DEV_PMU (1 << 2)
+
/*
* PMU filter structure. Describe a range of events with a particular
* action. To be used with KVM_ARM_VCPU_PMU_V3_FILTER.
diff --git a/tools/arch/powerpc/include/uapi/asm/kvm.h b/tools/arch/powerpc/include/uapi/asm/kvm.h
index 9f18fa090f1f1d..1691297a766a9c 100644
--- a/tools/arch/powerpc/include/uapi/asm/kvm.h
+++ b/tools/arch/powerpc/include/uapi/asm/kvm.h
@@ -28,7 +28,6 @@
#define __KVM_HAVE_PPC_SMT
#define __KVM_HAVE_IRQCHIP
#define __KVM_HAVE_IRQ_LINE
-#define __KVM_HAVE_GUEST_DEBUG
/* Not always available, but if it is, this is the correct offset. */
#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
@@ -733,4 +732,48 @@ struct kvm_ppc_xive_eq {
#define KVM_XIVE_TIMA_PAGE_OFFSET 0
#define KVM_XIVE_ESB_PAGE_OFFSET 4
+/* for KVM_PPC_GET_PVINFO */
+
+#define KVM_PPC_PVINFO_FLAGS_EV_IDLE (1<<0)
+
+struct kvm_ppc_pvinfo {
+ /* out */
+ __u32 flags;
+ __u32 hcall[4];
+ __u8 pad[108];
+};
+
+/* for KVM_PPC_GET_SMMU_INFO */
+#define KVM_PPC_PAGE_SIZES_MAX_SZ 8
+
+struct kvm_ppc_one_page_size {
+ __u32 page_shift; /* Page shift (or 0) */
+ __u32 pte_enc; /* Encoding in the HPTE (>>12) */
+};
+
+struct kvm_ppc_one_seg_page_size {
+ __u32 page_shift; /* Base page shift of segment (or 0) */
+ __u32 slb_enc; /* SLB encoding for BookS */
+ struct kvm_ppc_one_page_size enc[KVM_PPC_PAGE_SIZES_MAX_SZ];
+};
+
+#define KVM_PPC_PAGE_SIZES_REAL 0x00000001
+#define KVM_PPC_1T_SEGMENTS 0x00000002
+#define KVM_PPC_NO_HASH 0x00000004
+
+struct kvm_ppc_smmu_info {
+ __u64 flags;
+ __u32 slb_size;
+ __u16 data_keys; /* # storage keys supported for data */
+ __u16 instr_keys; /* # storage keys supported for instructions */
+ struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ];
+};
+
+/* for KVM_PPC_RESIZE_HPT_{PREPARE,COMMIT} */
+struct kvm_ppc_resize_hpt {
+ __u64 flags;
+ __u32 shift;
+ __u32 pad;
+};
+
#endif /* __LINUX_KVM_POWERPC_H */
diff --git a/tools/arch/s390/include/uapi/asm/kvm.h b/tools/arch/s390/include/uapi/asm/kvm.h
index abe926d43cbe0a..05eaf6db3ad4cb 100644
--- a/tools/arch/s390/include/uapi/asm/kvm.h
+++ b/tools/arch/s390/include/uapi/asm/kvm.h
@@ -12,7 +12,320 @@
#include <linux/types.h>
#define __KVM_S390
-#define __KVM_HAVE_GUEST_DEBUG
+
+struct kvm_s390_skeys {
+ __u64 start_gfn;
+ __u64 count;
+ __u64 skeydata_addr;
+ __u32 flags;
+ __u32 reserved[9];
+};
+
+#define KVM_S390_CMMA_PEEK (1 << 0)
+
+/**
+ * kvm_s390_cmma_log - Used for CMMA migration.
+ *
+ * Used both for input and output.
+ *
+ * @start_gfn: Guest page number to start from.
+ * @count: Size of the result buffer.
+ * @flags: Control operation mode via KVM_S390_CMMA_* flags
+ * @remaining: Used with KVM_S390_GET_CMMA_BITS. Indicates how many dirty
+ * pages are still remaining.
+ * @mask: Used with KVM_S390_SET_CMMA_BITS. Bitmap of bits to actually set
+ * in the PGSTE.
+ * @values: Pointer to the values buffer.
+ *
+ * Used in KVM_S390_{G,S}ET_CMMA_BITS ioctls.
+ */
+struct kvm_s390_cmma_log {
+ __u64 start_gfn;
+ __u32 count;
+ __u32 flags;
+ union {
+ __u64 remaining;
+ __u64 mask;
+ };
+ __u64 values;
+};
+
+#define KVM_S390_RESET_POR 1
+#define KVM_S390_RESET_CLEAR 2
+#define KVM_S390_RESET_SUBSYSTEM 4
+#define KVM_S390_RESET_CPU_INIT 8
+#define KVM_S390_RESET_IPL 16
+
+/* for KVM_S390_MEM_OP */
+struct kvm_s390_mem_op {
+ /* in */
+ __u64 gaddr; /* the guest address */
+ __u64 flags; /* flags */
+ __u32 size; /* amount of bytes */
+ __u32 op; /* type of operation */
+ __u64 buf; /* buffer in userspace */
+ union {
+ struct {
+ __u8 ar; /* the access register number */
+ __u8 key; /* access key, ignored if flag unset */
+ __u8 pad1[6]; /* ignored */
+ __u64 old_addr; /* ignored if cmpxchg flag unset */
+ };
+ __u32 sida_offset; /* offset into the sida */
+ __u8 reserved[32]; /* ignored */
+ };
+};
+/* types for kvm_s390_mem_op->op */
+#define KVM_S390_MEMOP_LOGICAL_READ 0
+#define KVM_S390_MEMOP_LOGICAL_WRITE 1
+#define KVM_S390_MEMOP_SIDA_READ 2
+#define KVM_S390_MEMOP_SIDA_WRITE 3
+#define KVM_S390_MEMOP_ABSOLUTE_READ 4
+#define KVM_S390_MEMOP_ABSOLUTE_WRITE 5
+#define KVM_S390_MEMOP_ABSOLUTE_CMPXCHG 6
+
+/* flags for kvm_s390_mem_op->flags */
+#define KVM_S390_MEMOP_F_CHECK_ONLY (1ULL << 0)
+#define KVM_S390_MEMOP_F_INJECT_EXCEPTION (1ULL << 1)
+#define KVM_S390_MEMOP_F_SKEY_PROTECTION (1ULL << 2)
+
+/* flags specifying extension support via KVM_CAP_S390_MEM_OP_EXTENSION */
+#define KVM_S390_MEMOP_EXTENSION_CAP_BASE (1 << 0)
+#define KVM_S390_MEMOP_EXTENSION_CAP_CMPXCHG (1 << 1)
+
+struct kvm_s390_psw {
+ __u64 mask;
+ __u64 addr;
+};
+
+/* valid values for type in kvm_s390_interrupt */
+#define KVM_S390_SIGP_STOP 0xfffe0000u
+#define KVM_S390_PROGRAM_INT 0xfffe0001u
+#define KVM_S390_SIGP_SET_PREFIX 0xfffe0002u
+#define KVM_S390_RESTART 0xfffe0003u
+#define KVM_S390_INT_PFAULT_INIT 0xfffe0004u
+#define KVM_S390_INT_PFAULT_DONE 0xfffe0005u
+#define KVM_S390_MCHK 0xfffe1000u
+#define KVM_S390_INT_CLOCK_COMP 0xffff1004u
+#define KVM_S390_INT_CPU_TIMER 0xffff1005u
+#define KVM_S390_INT_VIRTIO 0xffff2603u
+#define KVM_S390_INT_SERVICE 0xffff2401u
+#define KVM_S390_INT_EMERGENCY 0xffff1201u
+#define KVM_S390_INT_EXTERNAL_CALL 0xffff1202u
+/* Anything below 0xfffe0000u is taken by INT_IO */
+#define KVM_S390_INT_IO(ai,cssid,ssid,schid) \
+ (((schid)) | \
+ ((ssid) << 16) | \
+ ((cssid) << 18) | \
+ ((ai) << 26))
+#define KVM_S390_INT_IO_MIN 0x00000000u
+#define KVM_S390_INT_IO_MAX 0xfffdffffu
+#define KVM_S390_INT_IO_AI_MASK 0x04000000u
+
+
+struct kvm_s390_interrupt {
+ __u32 type;
+ __u32 parm;
+ __u64 parm64;
+};
+
+struct kvm_s390_io_info {
+ __u16 subchannel_id;
+ __u16 subchannel_nr;
+ __u32 io_int_parm;
+ __u32 io_int_word;
+};
+
+struct kvm_s390_ext_info {
+ __u32 ext_params;
+ __u32 pad;
+ __u64 ext_params2;
+};
+
+struct kvm_s390_pgm_info {
+ __u64 trans_exc_code;
+ __u64 mon_code;
+ __u64 per_address;
+ __u32 data_exc_code;
+ __u16 code;
+ __u16 mon_class_nr;
+ __u8 per_code;
+ __u8 per_atmid;
+ __u8 exc_access_id;
+ __u8 per_access_id;
+ __u8 op_access_id;
+#define KVM_S390_PGM_FLAGS_ILC_VALID 0x01
+#define KVM_S390_PGM_FLAGS_ILC_0 0x02
+#define KVM_S390_PGM_FLAGS_ILC_1 0x04
+#define KVM_S390_PGM_FLAGS_ILC_MASK 0x06
+#define KVM_S390_PGM_FLAGS_NO_REWIND 0x08
+ __u8 flags;
+ __u8 pad[2];
+};
+
+struct kvm_s390_prefix_info {
+ __u32 address;
+};
+
+struct kvm_s390_extcall_info {
+ __u16 code;
+};
+
+struct kvm_s390_emerg_info {
+ __u16 code;
+};
+
+#define KVM_S390_STOP_FLAG_STORE_STATUS 0x01
+struct kvm_s390_stop_info {
+ __u32 flags;
+};
+
+struct kvm_s390_mchk_info {
+ __u64 cr14;
+ __u64 mcic;
+ __u64 failing_storage_address;
+ __u32 ext_damage_code;
+ __u32 pad;
+ __u8 fixed_logout[16];
+};
+
+struct kvm_s390_irq {
+ __u64 type;
+ union {
+ struct kvm_s390_io_info io;
+ struct kvm_s390_ext_info ext;
+ struct kvm_s390_pgm_info pgm;
+ struct kvm_s390_emerg_info emerg;
+ struct kvm_s390_extcall_info extcall;
+ struct kvm_s390_prefix_info prefix;
+ struct kvm_s390_stop_info stop;
+ struct kvm_s390_mchk_info mchk;
+ char reserved[64];
+ } u;
+};
+
+struct kvm_s390_irq_state {
+ __u64 buf;
+ __u32 flags; /* will stay unused for compatibility reasons */
+ __u32 len;
+ __u32 reserved[4]; /* will stay unused for compatibility reasons */
+};
+
+struct kvm_s390_ucas_mapping {
+ __u64 user_addr;
+ __u64 vcpu_addr;
+ __u64 length;
+};
+
+struct kvm_s390_pv_sec_parm {
+ __u64 origin;
+ __u64 length;
+};
+
+struct kvm_s390_pv_unp {
+ __u64 addr;
+ __u64 size;
+ __u64 tweak;
+};
+
+enum pv_cmd_dmp_id {
+ KVM_PV_DUMP_INIT,
+ KVM_PV_DUMP_CONFIG_STOR_STATE,
+ KVM_PV_DUMP_COMPLETE,
+ KVM_PV_DUMP_CPU,
+};
+
+struct kvm_s390_pv_dmp {
+ __u64 subcmd;
+ __u64 buff_addr;
+ __u64 buff_len;
+ __u64 gaddr; /* For dump storage state */
+ __u64 reserved[4];
+};
+
+enum pv_cmd_info_id {
+ KVM_PV_INFO_VM,
+ KVM_PV_INFO_DUMP,
+};
+
+struct kvm_s390_pv_info_dump {
+ __u64 dump_cpu_buffer_len;
+ __u64 dump_config_mem_buffer_per_1m;
+ __u64 dump_config_finalize_len;
+};
+
+struct kvm_s390_pv_info_vm {
+ __u64 inst_calls_list[4];
+ __u64 max_cpus;
+ __u64 max_guests;
+ __u64 max_guest_addr;
+ __u64 feature_indication;
+};
+
+struct kvm_s390_pv_info_header {
+ __u32 id;
+ __u32 len_max;
+ __u32 len_written;
+ __u32 reserved;
+};
+
+struct kvm_s390_pv_info {
+ struct kvm_s390_pv_info_header header;
+ union {
+ struct kvm_s390_pv_info_dump dump;
+ struct kvm_s390_pv_info_vm vm;
+ };
+};
+
+enum pv_cmd_id {
+ KVM_PV_ENABLE,
+ KVM_PV_DISABLE,
+ KVM_PV_SET_SEC_PARMS,
+ KVM_PV_UNPACK,
+ KVM_PV_VERIFY,
+ KVM_PV_PREP_RESET,
+ KVM_PV_UNSHARE_ALL,
+ KVM_PV_INFO,
+ KVM_PV_DUMP,
+ KVM_PV_ASYNC_CLEANUP_PREPARE,
+ KVM_PV_ASYNC_CLEANUP_PERFORM,
+};
+
+struct kvm_pv_cmd {
+ __u32 cmd; /* Command to be executed */
+ __u16 rc; /* Ultravisor return code */
+ __u16 rrc; /* Ultravisor return reason code */
+ __u64 data; /* Data or address */
+ __u32 flags; /* flags for future extensions. Must be 0 for now */
+ __u32 reserved[3];
+};
+
+struct kvm_s390_zpci_op {
+ /* in */
+ __u32 fh; /* target device */
+ __u8 op; /* operation to perform */
+ __u8 pad[3];
+ union {
+ /* for KVM_S390_ZPCIOP_REG_AEN */
+ struct {
+ __u64 ibv; /* Guest addr of interrupt bit vector */
+ __u64 sb; /* Guest addr of summary bit */
+ __u32 flags;
+ __u32 noi; /* Number of interrupts */
+ __u8 isc; /* Guest interrupt subclass */
+ __u8 sbo; /* Offset of guest summary bit vector */
+ __u16 pad;
+ } reg_aen;
+ __u64 reserved[8];
+ } u;
+};
+
+/* types for kvm_s390_zpci_op->op */
+#define KVM_S390_ZPCIOP_REG_AEN 0
+#define KVM_S390_ZPCIOP_DEREG_AEN 1
+
+/* flags for kvm_s390_zpci_op->u.reg_aen.flags */
+#define KVM_S390_ZPCIOP_REGAEN_HOST (1 << 0)
/* Device control API: s390-specific devices */
#define KVM_DEV_FLIC_GET_ALL_IRQS 1
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 25160d26764b5e..3c7434329661c6 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -13,7 +13,7 @@
/*
* Defines x86 CPU feature bits
*/
-#define NCAPINTS 21 /* N 32-bit words worth of info */
+#define NCAPINTS 22 /* N 32-bit words worth of info */
#define NBUGINTS 2 /* N 32-bit bug flags */
/*
@@ -81,10 +81,8 @@
#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */
#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */
#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */
-
-/* CPU types for specific tunings: */
#define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */
-/* FREE, was #define X86_FEATURE_K7 ( 3*32+ 5) "" Athlon */
+#define X86_FEATURE_ZEN5 ( 3*32+ 5) /* "" CPU based on Zen5 microarchitecture */
#define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */
#define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */
#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */
@@ -97,7 +95,7 @@
#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */
#define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */
#define X86_FEATURE_AMD_LBR_V2 ( 3*32+17) /* AMD Last Branch Record Extension Version 2 */
-/* FREE, was #define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) "" LFENCE synchronizes RDTSC */
+#define X86_FEATURE_CLEAR_CPU_BUF ( 3*32+18) /* "" Clear CPU buffers using VERW */
#define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */
#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */
#define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */
@@ -462,6 +460,18 @@
#define X86_FEATURE_SRSO_NO (20*32+29) /* "" CPU is not affected by SRSO */
/*
+ * Extended auxiliary flags: Linux defined - for features scattered in various
+ * CPUID levels like 0x80000022, etc and Linux defined features.
+ *
+ * Reuse free bits when adding new feature flags!
+ */
+#define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* AMD LBR and PMC Freeze */
+#define X86_FEATURE_CLEAR_BHB_LOOP (21*32+ 1) /* "" Clear branch history at syscall entry using SW loop */
+#define X86_FEATURE_BHI_CTRL (21*32+ 2) /* "" BHI_DIS_S HW control available */
+#define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* "" BHI_DIS_S HW control enabled */
+#define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* "" Clear branch history at vmexit using SW loop */
+
+/*
* BUG word(s)
*/
#define X86_BUG(x) (NCAPINTS*32 + (x))
@@ -508,4 +518,6 @@
/* BUG word 2 */
#define X86_BUG_SRSO X86_BUG(1*32 + 0) /* AMD SRSO bug */
#define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* AMD DIV0 speculation bug */
+#define X86_BUG_RFDS X86_BUG(1*32 + 2) /* CPU is vulnerable to Register File Data Sampling */
+#define X86_BUG_BHI X86_BUG(1*32 + 3) /* CPU is affected by Branch History Injection */
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h
index 1f23960d2b06e7..c492bdc97b0595 100644
--- a/tools/arch/x86/include/asm/disabled-features.h
+++ b/tools/arch/x86/include/asm/disabled-features.h
@@ -123,6 +123,12 @@
# define DISABLE_FRED (1 << (X86_FEATURE_FRED & 31))
#endif
+#ifdef CONFIG_KVM_AMD_SEV
+#define DISABLE_SEV_SNP 0
+#else
+#define DISABLE_SEV_SNP (1 << (X86_FEATURE_SEV_SNP & 31))
+#endif
+
/*
* Make sure to add features to the correct mask
*/
@@ -147,8 +153,9 @@
DISABLE_ENQCMD)
#define DISABLED_MASK17 0
#define DISABLED_MASK18 (DISABLE_IBT)
-#define DISABLED_MASK19 0
+#define DISABLED_MASK19 (DISABLE_SEV_SNP)
#define DISABLED_MASK20 0
-#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21)
+#define DISABLED_MASK21 0
+#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 22)
#endif /* _ASM_X86_DISABLED_FEATURES_H */
diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h
index 1f9dc9bd13eb7e..e72c2b87295799 100644
--- a/tools/arch/x86/include/asm/msr-index.h
+++ b/tools/arch/x86/include/asm/msr-index.h
@@ -61,10 +61,13 @@
#define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */
#define SPEC_CTRL_RRSBA_DIS_S_SHIFT 6 /* Disable RRSBA behavior */
#define SPEC_CTRL_RRSBA_DIS_S BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT)
+#define SPEC_CTRL_BHI_DIS_S_SHIFT 10 /* Disable Branch History Injection behavior */
+#define SPEC_CTRL_BHI_DIS_S BIT(SPEC_CTRL_BHI_DIS_S_SHIFT)
/* A mask for bits which the kernel toggles when controlling mitigations */
#define SPEC_CTRL_MITIGATIONS_MASK (SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD \
- | SPEC_CTRL_RRSBA_DIS_S)
+ | SPEC_CTRL_RRSBA_DIS_S \
+ | SPEC_CTRL_BHI_DIS_S)
#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */
#define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */
@@ -163,6 +166,10 @@
* are restricted to targets in
* kernel.
*/
+#define ARCH_CAP_BHI_NO BIT(20) /*
+ * CPU is not affected by Branch
+ * History Injection.
+ */
#define ARCH_CAP_PBRSB_NO BIT(24) /*
* Not susceptible to Post-Barrier
* Return Stack Buffer Predictions.
@@ -176,6 +183,14 @@
* CPU is not vulnerable to Gather
* Data Sampling (GDS).
*/
+#define ARCH_CAP_RFDS_NO BIT(27) /*
+ * Not susceptible to Register
+ * File Data Sampling.
+ */
+#define ARCH_CAP_RFDS_CLEAR BIT(28) /*
+ * VERW clears CPU Register
+ * File.
+ */
#define ARCH_CAP_XAPIC_DISABLE BIT(21) /*
* IA32_XAPIC_DISABLE_STATUS MSR
@@ -605,34 +620,47 @@
#define MSR_AMD64_SEV_ES_GHCB 0xc0010130
#define MSR_AMD64_SEV 0xc0010131
#define MSR_AMD64_SEV_ENABLED_BIT 0
-#define MSR_AMD64_SEV_ES_ENABLED_BIT 1
-#define MSR_AMD64_SEV_SNP_ENABLED_BIT 2
#define MSR_AMD64_SEV_ENABLED BIT_ULL(MSR_AMD64_SEV_ENABLED_BIT)
+#define MSR_AMD64_SEV_ES_ENABLED_BIT 1
#define MSR_AMD64_SEV_ES_ENABLED BIT_ULL(MSR_AMD64_SEV_ES_ENABLED_BIT)
+#define MSR_AMD64_SEV_SNP_ENABLED_BIT 2
#define MSR_AMD64_SEV_SNP_ENABLED BIT_ULL(MSR_AMD64_SEV_SNP_ENABLED_BIT)
-
-/* SNP feature bits enabled by the hypervisor */
-#define MSR_AMD64_SNP_VTOM BIT_ULL(3)
-#define MSR_AMD64_SNP_REFLECT_VC BIT_ULL(4)
-#define MSR_AMD64_SNP_RESTRICTED_INJ BIT_ULL(5)
-#define MSR_AMD64_SNP_ALT_INJ BIT_ULL(6)
-#define MSR_AMD64_SNP_DEBUG_SWAP BIT_ULL(7)
-#define MSR_AMD64_SNP_PREVENT_HOST_IBS BIT_ULL(8)
-#define MSR_AMD64_SNP_BTB_ISOLATION BIT_ULL(9)
-#define MSR_AMD64_SNP_VMPL_SSS BIT_ULL(10)
-#define MSR_AMD64_SNP_SECURE_TSC BIT_ULL(11)
-#define MSR_AMD64_SNP_VMGEXIT_PARAM BIT_ULL(12)
-#define MSR_AMD64_SNP_IBS_VIRT BIT_ULL(14)
-#define MSR_AMD64_SNP_VMSA_REG_PROTECTION BIT_ULL(16)
-#define MSR_AMD64_SNP_SMT_PROTECTION BIT_ULL(17)
-
-/* SNP feature bits reserved for future use. */
-#define MSR_AMD64_SNP_RESERVED_BIT13 BIT_ULL(13)
-#define MSR_AMD64_SNP_RESERVED_BIT15 BIT_ULL(15)
-#define MSR_AMD64_SNP_RESERVED_MASK GENMASK_ULL(63, 18)
+#define MSR_AMD64_SNP_VTOM_BIT 3
+#define MSR_AMD64_SNP_VTOM BIT_ULL(MSR_AMD64_SNP_VTOM_BIT)
+#define MSR_AMD64_SNP_REFLECT_VC_BIT 4
+#define MSR_AMD64_SNP_REFLECT_VC BIT_ULL(MSR_AMD64_SNP_REFLECT_VC_BIT)
+#define MSR_AMD64_SNP_RESTRICTED_INJ_BIT 5
+#define MSR_AMD64_SNP_RESTRICTED_INJ BIT_ULL(MSR_AMD64_SNP_RESTRICTED_INJ_BIT)
+#define MSR_AMD64_SNP_ALT_INJ_BIT 6
+#define MSR_AMD64_SNP_ALT_INJ BIT_ULL(MSR_AMD64_SNP_ALT_INJ_BIT)
+#define MSR_AMD64_SNP_DEBUG_SWAP_BIT 7
+#define MSR_AMD64_SNP_DEBUG_SWAP BIT_ULL(MSR_AMD64_SNP_DEBUG_SWAP_BIT)
+#define MSR_AMD64_SNP_PREVENT_HOST_IBS_BIT 8
+#define MSR_AMD64_SNP_PREVENT_HOST_IBS BIT_ULL(MSR_AMD64_SNP_PREVENT_HOST_IBS_BIT)
+#define MSR_AMD64_SNP_BTB_ISOLATION_BIT 9
+#define MSR_AMD64_SNP_BTB_ISOLATION BIT_ULL(MSR_AMD64_SNP_BTB_ISOLATION_BIT)
+#define MSR_AMD64_SNP_VMPL_SSS_BIT 10
+#define MSR_AMD64_SNP_VMPL_SSS BIT_ULL(MSR_AMD64_SNP_VMPL_SSS_BIT)
+#define MSR_AMD64_SNP_SECURE_TSC_BIT 11
+#define MSR_AMD64_SNP_SECURE_TSC BIT_ULL(MSR_AMD64_SNP_SECURE_TSC_BIT)
+#define MSR_AMD64_SNP_VMGEXIT_PARAM_BIT 12
+#define MSR_AMD64_SNP_VMGEXIT_PARAM BIT_ULL(MSR_AMD64_SNP_VMGEXIT_PARAM_BIT)
+#define MSR_AMD64_SNP_RESERVED_BIT13 BIT_ULL(13)
+#define MSR_AMD64_SNP_IBS_VIRT_BIT 14
+#define MSR_AMD64_SNP_IBS_VIRT BIT_ULL(MSR_AMD64_SNP_IBS_VIRT_BIT)
+#define MSR_AMD64_SNP_RESERVED_BIT15 BIT_ULL(15)
+#define MSR_AMD64_SNP_VMSA_REG_PROT_BIT 16
+#define MSR_AMD64_SNP_VMSA_REG_PROT BIT_ULL(MSR_AMD64_SNP_VMSA_REG_PROT_BIT)
+#define MSR_AMD64_SNP_SMT_PROT_BIT 17
+#define MSR_AMD64_SNP_SMT_PROT BIT_ULL(MSR_AMD64_SNP_SMT_PROT_BIT)
+#define MSR_AMD64_SNP_RESV_BIT 18
+#define MSR_AMD64_SNP_RESERVED_MASK GENMASK_ULL(63, MSR_AMD64_SNP_RESV_BIT)
#define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f
+#define MSR_AMD64_RMP_BASE 0xc0010132
+#define MSR_AMD64_RMP_END 0xc0010133
+
/* AMD Collaborative Processor Performance Control MSRs */
#define MSR_AMD_CPPC_CAP1 0xc00102b0
#define MSR_AMD_CPPC_ENABLE 0xc00102b1
@@ -719,8 +747,15 @@
#define MSR_K8_TOP_MEM1 0xc001001a
#define MSR_K8_TOP_MEM2 0xc001001d
#define MSR_AMD64_SYSCFG 0xc0010010
-#define MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT 23
+#define MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT 23
#define MSR_AMD64_SYSCFG_MEM_ENCRYPT BIT_ULL(MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT)
+#define MSR_AMD64_SYSCFG_SNP_EN_BIT 24
+#define MSR_AMD64_SYSCFG_SNP_EN BIT_ULL(MSR_AMD64_SYSCFG_SNP_EN_BIT)
+#define MSR_AMD64_SYSCFG_SNP_VMPL_EN_BIT 25
+#define MSR_AMD64_SYSCFG_SNP_VMPL_EN BIT_ULL(MSR_AMD64_SYSCFG_SNP_VMPL_EN_BIT)
+#define MSR_AMD64_SYSCFG_MFDM_BIT 19
+#define MSR_AMD64_SYSCFG_MFDM BIT_ULL(MSR_AMD64_SYSCFG_MFDM_BIT)
+
#define MSR_K8_INT_PENDING_MSG 0xc0010055
/* C1E active bits in int pending message */
#define K8_INTP_C1E_ACTIVE_MASK 0x18000000
diff --git a/tools/arch/x86/include/asm/required-features.h b/tools/arch/x86/include/asm/required-features.h
index 7ba1726b71c7b8..e9187ddd3d1fdc 100644
--- a/tools/arch/x86/include/asm/required-features.h
+++ b/tools/arch/x86/include/asm/required-features.h
@@ -99,6 +99,7 @@
#define REQUIRED_MASK18 0
#define REQUIRED_MASK19 0
#define REQUIRED_MASK20 0
-#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21)
+#define REQUIRED_MASK21 0
+#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 22)
#endif /* _ASM_X86_REQUIRED_FEATURES_H */
diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h
index a448d0964fc06e..ef11aa4cab4253 100644
--- a/tools/arch/x86/include/uapi/asm/kvm.h
+++ b/tools/arch/x86/include/uapi/asm/kvm.h
@@ -7,6 +7,8 @@
*
*/
+#include <linux/const.h>
+#include <linux/bits.h>
#include <linux/types.h>
#include <linux/ioctl.h>
#include <linux/stddef.h>
@@ -40,7 +42,6 @@
#define __KVM_HAVE_IRQ_LINE
#define __KVM_HAVE_MSI
#define __KVM_HAVE_USER_NMI
-#define __KVM_HAVE_GUEST_DEBUG
#define __KVM_HAVE_MSIX
#define __KVM_HAVE_MCE
#define __KVM_HAVE_PIT_STATE2
@@ -49,7 +50,6 @@
#define __KVM_HAVE_DEBUGREGS
#define __KVM_HAVE_XSAVE
#define __KVM_HAVE_XCRS
-#define __KVM_HAVE_READONLY_MEM
/* Architectural interrupt line count. */
#define KVM_NR_INTERRUPTS 256
@@ -526,9 +526,301 @@ struct kvm_pmu_event_filter {
#define KVM_PMU_EVENT_ALLOW 0
#define KVM_PMU_EVENT_DENY 1
-#define KVM_PMU_EVENT_FLAG_MASKED_EVENTS BIT(0)
+#define KVM_PMU_EVENT_FLAG_MASKED_EVENTS _BITUL(0)
#define KVM_PMU_EVENT_FLAGS_VALID_MASK (KVM_PMU_EVENT_FLAG_MASKED_EVENTS)
+/* for KVM_CAP_MCE */
+struct kvm_x86_mce {
+ __u64 status;
+ __u64 addr;
+ __u64 misc;
+ __u64 mcg_status;
+ __u8 bank;
+ __u8 pad1[7];
+ __u64 pad2[3];
+};
+
+/* for KVM_CAP_XEN_HVM */
+#define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR (1 << 0)
+#define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1)
+#define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2)
+#define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 3)
+#define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 4)
+#define KVM_XEN_HVM_CONFIG_EVTCHN_SEND (1 << 5)
+#define KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG (1 << 6)
+#define KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE (1 << 7)
+#define KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA (1 << 8)
+
+struct kvm_xen_hvm_config {
+ __u32 flags;
+ __u32 msr;
+ __u64 blob_addr_32;
+ __u64 blob_addr_64;
+ __u8 blob_size_32;
+ __u8 blob_size_64;
+ __u8 pad2[30];
+};
+
+struct kvm_xen_hvm_attr {
+ __u16 type;
+ __u16 pad[3];
+ union {
+ __u8 long_mode;
+ __u8 vector;
+ __u8 runstate_update_flag;
+ union {
+ __u64 gfn;
+#define KVM_XEN_INVALID_GFN ((__u64)-1)
+ __u64 hva;
+ } shared_info;
+ struct {
+ __u32 send_port;
+ __u32 type; /* EVTCHNSTAT_ipi / EVTCHNSTAT_interdomain */
+ __u32 flags;
+#define KVM_XEN_EVTCHN_DEASSIGN (1 << 0)
+#define KVM_XEN_EVTCHN_UPDATE (1 << 1)
+#define KVM_XEN_EVTCHN_RESET (1 << 2)
+ /*
+ * Events sent by the guest are either looped back to
+ * the guest itself (potentially on a different port#)
+ * or signalled via an eventfd.
+ */
+ union {
+ struct {
+ __u32 port;
+ __u32 vcpu;
+ __u32 priority;
+ } port;
+ struct {
+ __u32 port; /* Zero for eventfd */
+ __s32 fd;
+ } eventfd;
+ __u32 padding[4];
+ } deliver;
+ } evtchn;
+ __u32 xen_version;
+ __u64 pad[8];
+ } u;
+};
+
+
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */
+#define KVM_XEN_ATTR_TYPE_LONG_MODE 0x0
+#define KVM_XEN_ATTR_TYPE_SHARED_INFO 0x1
+#define KVM_XEN_ATTR_TYPE_UPCALL_VECTOR 0x2
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */
+#define KVM_XEN_ATTR_TYPE_EVTCHN 0x3
+#define KVM_XEN_ATTR_TYPE_XEN_VERSION 0x4
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG */
+#define KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG 0x5
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA */
+#define KVM_XEN_ATTR_TYPE_SHARED_INFO_HVA 0x6
+
+struct kvm_xen_vcpu_attr {
+ __u16 type;
+ __u16 pad[3];
+ union {
+ __u64 gpa;
+#define KVM_XEN_INVALID_GPA ((__u64)-1)
+ __u64 hva;
+ __u64 pad[8];
+ struct {
+ __u64 state;
+ __u64 state_entry_time;
+ __u64 time_running;
+ __u64 time_runnable;
+ __u64 time_blocked;
+ __u64 time_offline;
+ } runstate;
+ __u32 vcpu_id;
+ struct {
+ __u32 port;
+ __u32 priority;
+ __u64 expires_ns;
+ } timer;
+ __u8 vector;
+ } u;
+};
+
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */
+#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO 0x0
+#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO 0x1
+#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR 0x2
+#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT 0x3
+#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA 0x4
+#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST 0x5
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */
+#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID 0x6
+#define KVM_XEN_VCPU_ATTR_TYPE_TIMER 0x7
+#define KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR 0x8
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA */
+#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO_HVA 0x9
+
+/* Secure Encrypted Virtualization command */
+enum sev_cmd_id {
+ /* Guest initialization commands */
+ KVM_SEV_INIT = 0,
+ KVM_SEV_ES_INIT,
+ /* Guest launch commands */
+ KVM_SEV_LAUNCH_START,
+ KVM_SEV_LAUNCH_UPDATE_DATA,
+ KVM_SEV_LAUNCH_UPDATE_VMSA,
+ KVM_SEV_LAUNCH_SECRET,
+ KVM_SEV_LAUNCH_MEASURE,
+ KVM_SEV_LAUNCH_FINISH,
+ /* Guest migration commands (outgoing) */
+ KVM_SEV_SEND_START,
+ KVM_SEV_SEND_UPDATE_DATA,
+ KVM_SEV_SEND_UPDATE_VMSA,
+ KVM_SEV_SEND_FINISH,
+ /* Guest migration commands (incoming) */
+ KVM_SEV_RECEIVE_START,
+ KVM_SEV_RECEIVE_UPDATE_DATA,
+ KVM_SEV_RECEIVE_UPDATE_VMSA,
+ KVM_SEV_RECEIVE_FINISH,
+ /* Guest status and debug commands */
+ KVM_SEV_GUEST_STATUS,
+ KVM_SEV_DBG_DECRYPT,
+ KVM_SEV_DBG_ENCRYPT,
+ /* Guest certificates commands */
+ KVM_SEV_CERT_EXPORT,
+ /* Attestation report */
+ KVM_SEV_GET_ATTESTATION_REPORT,
+ /* Guest Migration Extension */
+ KVM_SEV_SEND_CANCEL,
+
+ KVM_SEV_NR_MAX,
+};
+
+struct kvm_sev_cmd {
+ __u32 id;
+ __u32 pad0;
+ __u64 data;
+ __u32 error;
+ __u32 sev_fd;
+};
+
+struct kvm_sev_launch_start {
+ __u32 handle;
+ __u32 policy;
+ __u64 dh_uaddr;
+ __u32 dh_len;
+ __u32 pad0;
+ __u64 session_uaddr;
+ __u32 session_len;
+ __u32 pad1;
+};
+
+struct kvm_sev_launch_update_data {
+ __u64 uaddr;
+ __u32 len;
+ __u32 pad0;
+};
+
+
+struct kvm_sev_launch_secret {
+ __u64 hdr_uaddr;
+ __u32 hdr_len;
+ __u32 pad0;
+ __u64 guest_uaddr;
+ __u32 guest_len;
+ __u32 pad1;
+ __u64 trans_uaddr;
+ __u32 trans_len;
+ __u32 pad2;
+};
+
+struct kvm_sev_launch_measure {
+ __u64 uaddr;
+ __u32 len;
+ __u32 pad0;
+};
+
+struct kvm_sev_guest_status {
+ __u32 handle;
+ __u32 policy;
+ __u32 state;
+};
+
+struct kvm_sev_dbg {
+ __u64 src_uaddr;
+ __u64 dst_uaddr;
+ __u32 len;
+ __u32 pad0;
+};
+
+struct kvm_sev_attestation_report {
+ __u8 mnonce[16];
+ __u64 uaddr;
+ __u32 len;
+ __u32 pad0;
+};
+
+struct kvm_sev_send_start {
+ __u32 policy;
+ __u32 pad0;
+ __u64 pdh_cert_uaddr;
+ __u32 pdh_cert_len;
+ __u32 pad1;
+ __u64 plat_certs_uaddr;
+ __u32 plat_certs_len;
+ __u32 pad2;
+ __u64 amd_certs_uaddr;
+ __u32 amd_certs_len;
+ __u32 pad3;
+ __u64 session_uaddr;
+ __u32 session_len;
+ __u32 pad4;
+};
+
+struct kvm_sev_send_update_data {
+ __u64 hdr_uaddr;
+ __u32 hdr_len;
+ __u32 pad0;
+ __u64 guest_uaddr;
+ __u32 guest_len;
+ __u32 pad1;
+ __u64 trans_uaddr;
+ __u32 trans_len;
+ __u32 pad2;
+};
+
+struct kvm_sev_receive_start {
+ __u32 handle;
+ __u32 policy;
+ __u64 pdh_uaddr;
+ __u32 pdh_len;
+ __u32 pad0;
+ __u64 session_uaddr;
+ __u32 session_len;
+ __u32 pad1;
+};
+
+struct kvm_sev_receive_update_data {
+ __u64 hdr_uaddr;
+ __u32 hdr_len;
+ __u32 pad0;
+ __u64 guest_uaddr;
+ __u32 guest_len;
+ __u32 pad1;
+ __u64 trans_uaddr;
+ __u32 trans_len;
+ __u32 pad2;
+};
+
+#define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0)
+#define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1)
+
+struct kvm_hyperv_eventfd {
+ __u32 conn_id;
+ __s32 fd;
+ __u32 flags;
+ __u32 padding[3];
+};
+
+#define KVM_HYPERV_CONN_ID_MASK 0x00ffffff
+#define KVM_HYPERV_EVENTFD_DEASSIGN (1 << 0)
+
/*
* Masked event layout.
* Bits Description
@@ -549,10 +841,10 @@ struct kvm_pmu_event_filter {
((__u64)(!!(exclude)) << 55))
#define KVM_PMU_MASKED_ENTRY_EVENT_SELECT \
- (GENMASK_ULL(7, 0) | GENMASK_ULL(35, 32))
-#define KVM_PMU_MASKED_ENTRY_UMASK_MASK (GENMASK_ULL(63, 56))
-#define KVM_PMU_MASKED_ENTRY_UMASK_MATCH (GENMASK_ULL(15, 8))
-#define KVM_PMU_MASKED_ENTRY_EXCLUDE (BIT_ULL(55))
+ (__GENMASK_ULL(7, 0) | __GENMASK_ULL(35, 32))
+#define KVM_PMU_MASKED_ENTRY_UMASK_MASK (__GENMASK_ULL(63, 56))
+#define KVM_PMU_MASKED_ENTRY_UMASK_MATCH (__GENMASK_ULL(15, 8))
+#define KVM_PMU_MASKED_ENTRY_EXCLUDE (_BITULL(55))
#define KVM_PMU_MASKED_ENTRY_UMASK_MASK_SHIFT (56)
/* for KVM_{GET,SET,HAS}_DEVICE_ATTR */
@@ -560,7 +852,7 @@ struct kvm_pmu_event_filter {
#define KVM_VCPU_TSC_OFFSET 0 /* attribute for the TSC offset */
/* x86-specific KVM_EXIT_HYPERCALL flags. */
-#define KVM_EXIT_HYPERCALL_LONG_MODE BIT(0)
+#define KVM_EXIT_HYPERCALL_LONG_MODE _BITULL(0)
#define KVM_X86_DEFAULT_VM 0
#define KVM_X86_SW_PROTECTED_VM 1
diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
index 4fa4ade1ce7445..540c0f2c4fda07 100644
--- a/tools/bpf/bpftool/gen.c
+++ b/tools/bpf/bpftool/gen.c
@@ -121,7 +121,7 @@ static bool get_datasec_ident(const char *sec_name, char *buf, size_t buf_sz)
int i, n;
/* recognize hard coded LLVM section name */
- if (strcmp(sec_name, ".arena.1") == 0) {
+ if (strcmp(sec_name, ".addr_space.1") == 0) {
/* this is the name to use in skeleton */
snprintf(buf, buf_sz, "arena");
return true;
diff --git a/tools/cgroup/memcg_slabinfo.py b/tools/cgroup/memcg_slabinfo.py
index 1d3a90d93fe267..270c28a0d09801 100644
--- a/tools/cgroup/memcg_slabinfo.py
+++ b/tools/cgroup/memcg_slabinfo.py
@@ -146,12 +146,11 @@ def detect_kernel_config():
def for_each_slab(prog):
- PGSlab = 1 << prog.constant('PG_slab')
- PGHead = 1 << prog.constant('PG_head')
+ PGSlab = ~prog.constant('PG_slab')
for page in for_each_page(prog):
try:
- if page.flags.value_() & PGSlab:
+ if page.page_type.value_() == PGSlab:
yield cast('struct slab *', page)
except FaultError:
pass
diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c
index 318e2dad27e048..ae695911641b3b 100644
--- a/tools/hv/hv_kvp_daemon.c
+++ b/tools/hv/hv_kvp_daemon.c
@@ -76,6 +76,12 @@ enum {
DNS
};
+enum {
+ IPV4 = 1,
+ IPV6,
+ IP_TYPE_MAX
+};
+
static int in_hand_shake;
static char *os_name = "";
@@ -88,6 +94,8 @@ static char *lic_version = "Unknown version";
static char full_domain_name[HV_KVP_EXCHANGE_MAX_VALUE_SIZE];
static struct utsname uts_buf;
+#define CFG_HEADER "# Generated by Hyper-V key-value pair daemon. Please do not modify.\n"
+
/*
* The location of the interface configuration file.
*/
@@ -102,6 +110,11 @@ static struct utsname uts_buf;
#define MAX_FILE_NAME 100
#define ENTRIES_PER_BLOCK 50
+/*
+ * Change this entry if the number of addresses increases in future
+ */
+#define MAX_IP_ENTRIES 64
+#define OUTSTR_BUF_SIZE ((INET6_ADDRSTRLEN + 1) * MAX_IP_ENTRIES)
struct kvp_record {
char key[HV_KVP_EXCHANGE_MAX_KEY_SIZE];
@@ -1171,6 +1184,18 @@ static int process_ip_string(FILE *f, char *ip_string, int type)
return 0;
}
+int ip_version_check(const char *input_addr)
+{
+ struct in6_addr addr;
+
+ if (inet_pton(AF_INET, input_addr, &addr))
+ return IPV4;
+ else if (inet_pton(AF_INET6, input_addr, &addr))
+ return IPV6;
+
+ return -EINVAL;
+}
+
/*
* Only IPv4 subnet strings needs to be converted to plen
* For IPv6 the subnet is already privided in plen format
@@ -1197,14 +1222,75 @@ static int kvp_subnet_to_plen(char *subnet_addr_str)
return plen;
}
+static int process_dns_gateway_nm(FILE *f, char *ip_string, int type,
+ int ip_sec)
+{
+ char addr[INET6_ADDRSTRLEN], *output_str;
+ int ip_offset = 0, error = 0, ip_ver;
+ char *param_name;
+
+ if (type == DNS)
+ param_name = "dns";
+ else if (type == GATEWAY)
+ param_name = "gateway";
+ else
+ return -EINVAL;
+
+ output_str = (char *)calloc(OUTSTR_BUF_SIZE, sizeof(char));
+ if (!output_str)
+ return -ENOMEM;
+
+ while (1) {
+ memset(addr, 0, sizeof(addr));
+
+ if (!parse_ip_val_buffer(ip_string, &ip_offset, addr,
+ (MAX_IP_ADDR_SIZE * 2)))
+ break;
+
+ ip_ver = ip_version_check(addr);
+ if (ip_ver < 0)
+ continue;
+
+ if ((ip_ver == IPV4 && ip_sec == IPV4) ||
+ (ip_ver == IPV6 && ip_sec == IPV6)) {
+ /*
+ * do a bound check to avoid out-of bound writes
+ */
+ if ((OUTSTR_BUF_SIZE - strlen(output_str)) >
+ (strlen(addr) + 1)) {
+ strncat(output_str, addr,
+ OUTSTR_BUF_SIZE -
+ strlen(output_str) - 1);
+ strncat(output_str, ",",
+ OUTSTR_BUF_SIZE -
+ strlen(output_str) - 1);
+ }
+ } else {
+ continue;
+ }
+ }
+
+ if (strlen(output_str)) {
+ /*
+ * This is to get rid of that extra comma character
+ * in the end of the string
+ */
+ output_str[strlen(output_str) - 1] = '\0';
+ error = fprintf(f, "%s=%s\n", param_name, output_str);
+ }
+
+ free(output_str);
+ return error;
+}
+
static int process_ip_string_nm(FILE *f, char *ip_string, char *subnet,
- int is_ipv6)
+ int ip_sec)
{
char addr[INET6_ADDRSTRLEN];
char subnet_addr[INET6_ADDRSTRLEN];
- int error, i = 0;
+ int error = 0, i = 0;
int ip_offset = 0, subnet_offset = 0;
- int plen;
+ int plen, ip_ver;
memset(addr, 0, sizeof(addr));
memset(subnet_addr, 0, sizeof(subnet_addr));
@@ -1216,10 +1302,16 @@ static int process_ip_string_nm(FILE *f, char *ip_string, char *subnet,
subnet_addr,
(MAX_IP_ADDR_SIZE *
2))) {
- if (!is_ipv6)
+ ip_ver = ip_version_check(addr);
+ if (ip_ver < 0)
+ continue;
+
+ if (ip_ver == IPV4 && ip_sec == IPV4)
plen = kvp_subnet_to_plen((char *)subnet_addr);
- else
+ else if (ip_ver == IPV6 && ip_sec == IPV6)
plen = atoi(subnet_addr);
+ else
+ continue;
if (plen < 0)
return plen;
@@ -1233,17 +1325,16 @@ static int process_ip_string_nm(FILE *f, char *ip_string, char *subnet,
memset(subnet_addr, 0, sizeof(subnet_addr));
}
- return 0;
+ return error;
}
static int kvp_set_ip_info(char *if_name, struct hv_kvp_ipaddr_value *new_val)
{
- int error = 0;
+ int error = 0, ip_ver;
char if_filename[PATH_MAX];
char nm_filename[PATH_MAX];
FILE *ifcfg_file, *nmfile;
char cmd[PATH_MAX];
- int is_ipv6 = 0;
char *mac_addr;
int str_len;
@@ -1346,6 +1437,18 @@ static int kvp_set_ip_info(char *if_name, struct hv_kvp_ipaddr_value *new_val)
return HV_E_FAIL;
}
+ /* Write the config file headers */
+ error = fprintf(ifcfg_file, CFG_HEADER);
+ if (error < 0) {
+ error = HV_E_FAIL;
+ goto setval_error;
+ }
+ error = fprintf(nmfile, CFG_HEADER);
+ if (error < 0) {
+ error = HV_E_FAIL;
+ goto setval_error;
+ }
+
/*
* First write out the MAC address.
*/
@@ -1421,52 +1524,94 @@ static int kvp_set_ip_info(char *if_name, struct hv_kvp_ipaddr_value *new_val)
if (error)
goto setval_error;
- if (new_val->addr_family & ADDR_FAMILY_IPV6) {
- error = fprintf(nmfile, "\n[ipv6]\n");
- if (error < 0)
- goto setval_error;
- is_ipv6 = 1;
- } else {
- error = fprintf(nmfile, "\n[ipv4]\n");
- if (error < 0)
- goto setval_error;
- }
-
/*
* Now we populate the keyfile format
+ *
+ * The keyfile format expects the IPv6 and IPv4 configuration in
+ * different sections. Therefore we iterate through the list twice,
+ * once to populate the IPv4 section and the next time for IPv6
*/
+ ip_ver = IPV4;
+ do {
+ if (ip_ver == IPV4) {
+ error = fprintf(nmfile, "\n[ipv4]\n");
+ if (error < 0)
+ goto setval_error;
+ } else {
+ error = fprintf(nmfile, "\n[ipv6]\n");
+ if (error < 0)
+ goto setval_error;
+ }
- if (new_val->dhcp_enabled) {
- error = kvp_write_file(nmfile, "method", "", "auto");
- if (error < 0)
- goto setval_error;
- } else {
- error = kvp_write_file(nmfile, "method", "", "manual");
+ /*
+ * Write the configuration for ipaddress, netmask, gateway and
+ * name services
+ */
+ error = process_ip_string_nm(nmfile, (char *)new_val->ip_addr,
+ (char *)new_val->sub_net,
+ ip_ver);
if (error < 0)
goto setval_error;
- }
- /*
- * Write the configuration for ipaddress, netmask, gateway and
- * name services
- */
- error = process_ip_string_nm(nmfile, (char *)new_val->ip_addr,
- (char *)new_val->sub_net, is_ipv6);
- if (error < 0)
- goto setval_error;
+ /*
+ * As dhcp_enabled is only valid for ipv4, we do not set dhcp
+ * methods for ipv6 based on dhcp_enabled flag.
+ *
+ * For ipv4, set method to manual only when dhcp_enabled is
+ * false and specific ipv4 addresses are configured. If neither
+ * dhcp_enabled is true and no ipv4 addresses are configured,
+ * set method to 'disabled'.
+ *
+ * For ipv6, set method to manual when we configure ipv6
+ * addresses. Otherwise set method to 'auto' so that SLAAC from
+ * RA may be used.
+ */
+ if (ip_ver == IPV4) {
+ if (new_val->dhcp_enabled) {
+ error = kvp_write_file(nmfile, "method", "",
+ "auto");
+ if (error < 0)
+ goto setval_error;
+ } else if (error) {
+ error = kvp_write_file(nmfile, "method", "",
+ "manual");
+ if (error < 0)
+ goto setval_error;
+ } else {
+ error = kvp_write_file(nmfile, "method", "",
+ "disabled");
+ if (error < 0)
+ goto setval_error;
+ }
+ } else if (ip_ver == IPV6) {
+ if (error) {
+ error = kvp_write_file(nmfile, "method", "",
+ "manual");
+ if (error < 0)
+ goto setval_error;
+ } else {
+ error = kvp_write_file(nmfile, "method", "",
+ "auto");
+ if (error < 0)
+ goto setval_error;
+ }
+ }
- /* we do not want ipv4 addresses in ipv6 section and vice versa */
- if (is_ipv6 != is_ipv4((char *)new_val->gate_way)) {
- error = fprintf(nmfile, "gateway=%s\n", (char *)new_val->gate_way);
+ error = process_dns_gateway_nm(nmfile,
+ (char *)new_val->gate_way,
+ GATEWAY, ip_ver);
if (error < 0)
goto setval_error;
- }
- if (is_ipv6 != is_ipv4((char *)new_val->dns_addr)) {
- error = fprintf(nmfile, "dns=%s\n", (char *)new_val->dns_addr);
+ error = process_dns_gateway_nm(nmfile,
+ (char *)new_val->dns_addr, DNS,
+ ip_ver);
if (error < 0)
goto setval_error;
- }
+
+ ip_ver++;
+ } while (ip_ver < IP_TYPE_MAX);
+
fclose(nmfile);
fclose(ifcfg_file);
diff --git a/tools/include/asm-generic/bitops/__fls.h b/tools/include/asm-generic/bitops/__fls.h
index 03f721a8a2b199..54ccccf96e21ea 100644
--- a/tools/include/asm-generic/bitops/__fls.h
+++ b/tools/include/asm-generic/bitops/__fls.h
@@ -5,12 +5,12 @@
#include <asm/types.h>
/**
- * __fls - find last (most-significant) set bit in a long word
+ * generic___fls - find last (most-significant) set bit in a long word
* @word: the word to search
*
* Undefined if no set bit exists, so code should check against 0 first.
*/
-static __always_inline unsigned long __fls(unsigned long word)
+static __always_inline unsigned long generic___fls(unsigned long word)
{
int num = BITS_PER_LONG - 1;
@@ -41,4 +41,8 @@ static __always_inline unsigned long __fls(unsigned long word)
return num;
}
+#ifndef __HAVE_ARCH___FLS
+#define __fls(word) generic___fls(word)
+#endif
+
#endif /* _ASM_GENERIC_BITOPS___FLS_H_ */
diff --git a/tools/include/asm-generic/bitops/fls.h b/tools/include/asm-generic/bitops/fls.h
index b168bb10e1be17..26f3ce1dd6e448 100644
--- a/tools/include/asm-generic/bitops/fls.h
+++ b/tools/include/asm-generic/bitops/fls.h
@@ -3,14 +3,14 @@
#define _ASM_GENERIC_BITOPS_FLS_H_
/**
- * fls - find last (most-significant) bit set
+ * generic_fls - find last (most-significant) bit set
* @x: the word to search
*
* This is defined the same way as ffs.
* Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32.
*/
-static __always_inline int fls(unsigned int x)
+static __always_inline int generic_fls(unsigned int x)
{
int r = 32;
@@ -39,4 +39,8 @@ static __always_inline int fls(unsigned int x)
return r;
}
+#ifndef __HAVE_ARCH_FLS
+#define fls(x) generic_fls(x)
+#endif
+
#endif /* _ASM_GENERIC_BITOPS_FLS_H_ */
diff --git a/tools/include/linux/bits.h b/tools/include/linux/bits.h
index 7c0cf5031abe87..0eb24d21aac214 100644
--- a/tools/include/linux/bits.h
+++ b/tools/include/linux/bits.h
@@ -4,6 +4,7 @@
#include <linux/const.h>
#include <vdso/bits.h>
+#include <uapi/linux/bits.h>
#include <asm/bitsperlong.h>
#define BIT_MASK(nr) (UL(1) << ((nr) % BITS_PER_LONG))
@@ -30,15 +31,8 @@
#define GENMASK_INPUT_CHECK(h, l) 0
#endif
-#define __GENMASK(h, l) \
- (((~UL(0)) - (UL(1) << (l)) + 1) & \
- (~UL(0) >> (BITS_PER_LONG - 1 - (h))))
#define GENMASK(h, l) \
(GENMASK_INPUT_CHECK(h, l) + __GENMASK(h, l))
-
-#define __GENMASK_ULL(h, l) \
- (((~ULL(0)) - (ULL(1) << (l)) + 1) & \
- (~ULL(0) >> (BITS_PER_LONG_LONG - 1 - (h))))
#define GENMASK_ULL(h, l) \
(GENMASK_INPUT_CHECK(h, l) + __GENMASK_ULL(h, l))
diff --git a/tools/include/linux/btf_ids.h b/tools/include/linux/btf_ids.h
index 72535f00572f6e..72ea363d434db0 100644
--- a/tools/include/linux/btf_ids.h
+++ b/tools/include/linux/btf_ids.h
@@ -3,6 +3,8 @@
#ifndef _LINUX_BTF_IDS_H
#define _LINUX_BTF_IDS_H
+#include <linux/types.h> /* for u32 */
+
struct btf_id_set {
u32 cnt;
u32 ids[];
diff --git a/tools/include/linux/kernel.h b/tools/include/linux/kernel.h
index 4b0673bf52c2e6..07cfad817d5390 100644
--- a/tools/include/linux/kernel.h
+++ b/tools/include/linux/kernel.h
@@ -8,6 +8,7 @@
#include <linux/build_bug.h>
#include <linux/compiler.h>
#include <linux/math.h>
+#include <linux/panic.h>
#include <endian.h>
#include <byteswap.h>
diff --git a/tools/include/linux/mm.h b/tools/include/linux/mm.h
index f3c82ab5b14cd7..7d73da0980473f 100644
--- a/tools/include/linux/mm.h
+++ b/tools/include/linux/mm.h
@@ -37,4 +37,9 @@ static inline void totalram_pages_add(long count)
{
}
+static inline int early_pfn_to_nid(unsigned long pfn)
+{
+ return 0;
+}
+
#endif
diff --git a/tools/include/linux/panic.h b/tools/include/linux/panic.h
new file mode 100644
index 00000000000000..9c8f17a41ce8ed
--- /dev/null
+++ b/tools/include/linux/panic.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TOOLS_LINUX_PANIC_H
+#define _TOOLS_LINUX_PANIC_H
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+static inline void panic(const char *fmt, ...)
+{
+ va_list argp;
+
+ va_start(argp, fmt);
+ vfprintf(stderr, fmt, argp);
+ va_end(argp);
+ exit(-1);
+}
+
+#endif
diff --git a/tools/include/linux/rbtree_augmented.h b/tools/include/linux/rbtree_augmented.h
index 570bb9794421b9..95483c7d81df74 100644
--- a/tools/include/linux/rbtree_augmented.h
+++ b/tools/include/linux/rbtree_augmented.h
@@ -158,13 +158,13 @@ RB_DECLARE_CALLBACKS(RBSTATIC, RBNAME, \
static inline void rb_set_parent(struct rb_node *rb, struct rb_node *p)
{
- rb->__rb_parent_color = rb_color(rb) | (unsigned long)p;
+ rb->__rb_parent_color = rb_color(rb) + (unsigned long)p;
}
static inline void rb_set_parent_color(struct rb_node *rb,
struct rb_node *p, int color)
{
- rb->__rb_parent_color = (unsigned long)p | color;
+ rb->__rb_parent_color = (unsigned long)p + color;
}
static inline void
diff --git a/tools/include/uapi/asm-generic/bitsperlong.h b/tools/include/uapi/asm-generic/bitsperlong.h
index 352cb81947b876..fadb3f857f2855 100644
--- a/tools/include/uapi/asm-generic/bitsperlong.h
+++ b/tools/include/uapi/asm-generic/bitsperlong.h
@@ -24,4 +24,8 @@
#endif
#endif
+#ifndef __BITS_PER_LONG_LONG
+#define __BITS_PER_LONG_LONG 64
+#endif
+
#endif /* _UAPI__ASM_GENERIC_BITS_PER_LONG */
diff --git a/tools/include/uapi/asm-generic/fcntl.h b/tools/include/uapi/asm-generic/fcntl.h
deleted file mode 100644
index 1c7a0f6632c09e..00000000000000
--- a/tools/include/uapi/asm-generic/fcntl.h
+++ /dev/null
@@ -1,221 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef _ASM_GENERIC_FCNTL_H
-#define _ASM_GENERIC_FCNTL_H
-
-#include <linux/types.h>
-
-/*
- * FMODE_EXEC is 0x20
- * FMODE_NONOTIFY is 0x4000000
- * These cannot be used by userspace O_* until internal and external open
- * flags are split.
- * -Eric Paris
- */
-
-/*
- * When introducing new O_* bits, please check its uniqueness in fcntl_init().
- */
-
-#define O_ACCMODE 00000003
-#define O_RDONLY 00000000
-#define O_WRONLY 00000001
-#define O_RDWR 00000002
-#ifndef O_CREAT
-#define O_CREAT 00000100 /* not fcntl */
-#endif
-#ifndef O_EXCL
-#define O_EXCL 00000200 /* not fcntl */
-#endif
-#ifndef O_NOCTTY
-#define O_NOCTTY 00000400 /* not fcntl */
-#endif
-#ifndef O_TRUNC
-#define O_TRUNC 00001000 /* not fcntl */
-#endif
-#ifndef O_APPEND
-#define O_APPEND 00002000
-#endif
-#ifndef O_NONBLOCK
-#define O_NONBLOCK 00004000
-#endif
-#ifndef O_DSYNC
-#define O_DSYNC 00010000 /* used to be O_SYNC, see below */
-#endif
-#ifndef FASYNC
-#define FASYNC 00020000 /* fcntl, for BSD compatibility */
-#endif
-#ifndef O_DIRECT
-#define O_DIRECT 00040000 /* direct disk access hint */
-#endif
-#ifndef O_LARGEFILE
-#define O_LARGEFILE 00100000
-#endif
-#ifndef O_DIRECTORY
-#define O_DIRECTORY 00200000 /* must be a directory */
-#endif
-#ifndef O_NOFOLLOW
-#define O_NOFOLLOW 00400000 /* don't follow links */
-#endif
-#ifndef O_NOATIME
-#define O_NOATIME 01000000
-#endif
-#ifndef O_CLOEXEC
-#define O_CLOEXEC 02000000 /* set close_on_exec */
-#endif
-
-/*
- * Before Linux 2.6.33 only O_DSYNC semantics were implemented, but using
- * the O_SYNC flag. We continue to use the existing numerical value
- * for O_DSYNC semantics now, but using the correct symbolic name for it.
- * This new value is used to request true Posix O_SYNC semantics. It is
- * defined in this strange way to make sure applications compiled against
- * new headers get at least O_DSYNC semantics on older kernels.
- *
- * This has the nice side-effect that we can simply test for O_DSYNC
- * wherever we do not care if O_DSYNC or O_SYNC is used.
- *
- * Note: __O_SYNC must never be used directly.
- */
-#ifndef O_SYNC
-#define __O_SYNC 04000000
-#define O_SYNC (__O_SYNC|O_DSYNC)
-#endif
-
-#ifndef O_PATH
-#define O_PATH 010000000
-#endif
-
-#ifndef __O_TMPFILE
-#define __O_TMPFILE 020000000
-#endif
-
-/* a horrid kludge trying to make sure that this will fail on old kernels */
-#define O_TMPFILE (__O_TMPFILE | O_DIRECTORY)
-
-#ifndef O_NDELAY
-#define O_NDELAY O_NONBLOCK
-#endif
-
-#define F_DUPFD 0 /* dup */
-#define F_GETFD 1 /* get close_on_exec */
-#define F_SETFD 2 /* set/clear close_on_exec */
-#define F_GETFL 3 /* get file->f_flags */
-#define F_SETFL 4 /* set file->f_flags */
-#ifndef F_GETLK
-#define F_GETLK 5
-#define F_SETLK 6
-#define F_SETLKW 7
-#endif
-#ifndef F_SETOWN
-#define F_SETOWN 8 /* for sockets. */
-#define F_GETOWN 9 /* for sockets. */
-#endif
-#ifndef F_SETSIG
-#define F_SETSIG 10 /* for sockets. */
-#define F_GETSIG 11 /* for sockets. */
-#endif
-
-#if __BITS_PER_LONG == 32 || defined(__KERNEL__)
-#ifndef F_GETLK64
-#define F_GETLK64 12 /* using 'struct flock64' */
-#define F_SETLK64 13
-#define F_SETLKW64 14
-#endif
-#endif /* __BITS_PER_LONG == 32 || defined(__KERNEL__) */
-
-#ifndef F_SETOWN_EX
-#define F_SETOWN_EX 15
-#define F_GETOWN_EX 16
-#endif
-
-#ifndef F_GETOWNER_UIDS
-#define F_GETOWNER_UIDS 17
-#endif
-
-/*
- * Open File Description Locks
- *
- * Usually record locks held by a process are released on *any* close and are
- * not inherited across a fork().
- *
- * These cmd values will set locks that conflict with process-associated
- * record locks, but are "owned" by the open file description, not the
- * process. This means that they are inherited across fork() like BSD (flock)
- * locks, and they are only released automatically when the last reference to
- * the open file against which they were acquired is put.
- */
-#define F_OFD_GETLK 36
-#define F_OFD_SETLK 37
-#define F_OFD_SETLKW 38
-
-#define F_OWNER_TID 0
-#define F_OWNER_PID 1
-#define F_OWNER_PGRP 2
-
-struct f_owner_ex {
- int type;
- __kernel_pid_t pid;
-};
-
-/* for F_[GET|SET]FL */
-#define FD_CLOEXEC 1 /* actually anything with low bit set goes */
-
-/* for posix fcntl() and lockf() */
-#ifndef F_RDLCK
-#define F_RDLCK 0
-#define F_WRLCK 1
-#define F_UNLCK 2
-#endif
-
-/* for old implementation of bsd flock () */
-#ifndef F_EXLCK
-#define F_EXLCK 4 /* or 3 */
-#define F_SHLCK 8 /* or 4 */
-#endif
-
-/* operations for bsd flock(), also used by the kernel implementation */
-#define LOCK_SH 1 /* shared lock */
-#define LOCK_EX 2 /* exclusive lock */
-#define LOCK_NB 4 /* or'd with one of the above to prevent
- blocking */
-#define LOCK_UN 8 /* remove lock */
-
-/*
- * LOCK_MAND support has been removed from the kernel. We leave the symbols
- * here to not break legacy builds, but these should not be used in new code.
- */
-#define LOCK_MAND 32 /* This is a mandatory flock ... */
-#define LOCK_READ 64 /* which allows concurrent read operations */
-#define LOCK_WRITE 128 /* which allows concurrent write operations */
-#define LOCK_RW 192 /* which allows concurrent read & write ops */
-
-#define F_LINUX_SPECIFIC_BASE 1024
-
-#ifndef HAVE_ARCH_STRUCT_FLOCK
-struct flock {
- short l_type;
- short l_whence;
- __kernel_off_t l_start;
- __kernel_off_t l_len;
- __kernel_pid_t l_pid;
-#ifdef __ARCH_FLOCK_EXTRA_SYSID
- __ARCH_FLOCK_EXTRA_SYSID
-#endif
-#ifdef __ARCH_FLOCK_PAD
- __ARCH_FLOCK_PAD
-#endif
-};
-
-struct flock64 {
- short l_type;
- short l_whence;
- __kernel_loff_t l_start;
- __kernel_loff_t l_len;
- __kernel_pid_t l_pid;
-#ifdef __ARCH_FLOCK64_PAD
- __ARCH_FLOCK64_PAD
-#endif
-};
-#endif /* HAVE_ARCH_STRUCT_FLOCK */
-
-#endif /* _ASM_GENERIC_FCNTL_H */
diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h
index fd4f9574d177a2..2ee338860b7e08 100644
--- a/tools/include/uapi/drm/i915_drm.h
+++ b/tools/include/uapi/drm/i915_drm.h
@@ -3013,6 +3013,7 @@ struct drm_i915_query_item {
* - %DRM_I915_QUERY_MEMORY_REGIONS (see struct drm_i915_query_memory_regions)
* - %DRM_I915_QUERY_HWCONFIG_BLOB (see `GuC HWCONFIG blob uAPI`)
* - %DRM_I915_QUERY_GEOMETRY_SUBSLICES (see struct drm_i915_query_topology_info)
+ * - %DRM_I915_QUERY_GUC_SUBMISSION_VERSION (see struct drm_i915_query_guc_submission_version)
*/
__u64 query_id;
#define DRM_I915_QUERY_TOPOLOGY_INFO 1
@@ -3021,6 +3022,7 @@ struct drm_i915_query_item {
#define DRM_I915_QUERY_MEMORY_REGIONS 4
#define DRM_I915_QUERY_HWCONFIG_BLOB 5
#define DRM_I915_QUERY_GEOMETRY_SUBSLICES 6
+#define DRM_I915_QUERY_GUC_SUBMISSION_VERSION 7
/* Must be kept compact -- no holes and well documented */
/**
@@ -3567,6 +3569,20 @@ struct drm_i915_query_memory_regions {
};
/**
+ * struct drm_i915_query_guc_submission_version - query GuC submission interface version
+ */
+struct drm_i915_query_guc_submission_version {
+ /** @branch: Firmware branch version. */
+ __u32 branch;
+ /** @major: Firmware major version. */
+ __u32 major;
+ /** @minor: Firmware minor version. */
+ __u32 minor;
+ /** @patch: Firmware patch version. */
+ __u32 patch;
+};
+
+/**
* DOC: GuC HWCONFIG blob uAPI
*
* The GuC produces a blob with information about the current device.
diff --git a/tools/include/uapi/linux/bits.h b/tools/include/uapi/linux/bits.h
new file mode 100644
index 00000000000000..3c2a101986a314
--- /dev/null
+++ b/tools/include/uapi/linux/bits.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/* bits.h: Macros for dealing with bitmasks. */
+
+#ifndef _UAPI_LINUX_BITS_H
+#define _UAPI_LINUX_BITS_H
+
+#define __GENMASK(h, l) \
+ (((~_UL(0)) - (_UL(1) << (l)) + 1) & \
+ (~_UL(0) >> (__BITS_PER_LONG - 1 - (h))))
+
+#define __GENMASK_ULL(h, l) \
+ (((~_ULL(0)) - (_ULL(1) << (l)) + 1) & \
+ (~_ULL(0) >> (__BITS_PER_LONG_LONG - 1 - (h))))
+
+#endif /* _UAPI_LINUX_BITS_H */
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index c3308536482bdb..2190adbe30027c 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -16,6 +16,11 @@
#define KVM_API_VERSION 12
+/*
+ * Backwards-compatible definitions.
+ */
+#define __KVM_HAVE_GUEST_DEBUG
+
/* for KVM_SET_USER_MEMORY_REGION */
struct kvm_userspace_memory_region {
__u32 slot;
@@ -85,43 +90,6 @@ struct kvm_pit_config {
#define KVM_PIT_SPEAKER_DUMMY 1
-struct kvm_s390_skeys {
- __u64 start_gfn;
- __u64 count;
- __u64 skeydata_addr;
- __u32 flags;
- __u32 reserved[9];
-};
-
-#define KVM_S390_CMMA_PEEK (1 << 0)
-
-/**
- * kvm_s390_cmma_log - Used for CMMA migration.
- *
- * Used both for input and output.
- *
- * @start_gfn: Guest page number to start from.
- * @count: Size of the result buffer.
- * @flags: Control operation mode via KVM_S390_CMMA_* flags
- * @remaining: Used with KVM_S390_GET_CMMA_BITS. Indicates how many dirty
- * pages are still remaining.
- * @mask: Used with KVM_S390_SET_CMMA_BITS. Bitmap of bits to actually set
- * in the PGSTE.
- * @values: Pointer to the values buffer.
- *
- * Used in KVM_S390_{G,S}ET_CMMA_BITS ioctls.
- */
-struct kvm_s390_cmma_log {
- __u64 start_gfn;
- __u32 count;
- __u32 flags;
- union {
- __u64 remaining;
- __u64 mask;
- };
- __u64 values;
-};
-
struct kvm_hyperv_exit {
#define KVM_EXIT_HYPERV_SYNIC 1
#define KVM_EXIT_HYPERV_HCALL 2
@@ -315,11 +283,6 @@ struct kvm_run {
__u32 ipb;
} s390_sieic;
/* KVM_EXIT_S390_RESET */
-#define KVM_S390_RESET_POR 1
-#define KVM_S390_RESET_CLEAR 2
-#define KVM_S390_RESET_SUBSYSTEM 4
-#define KVM_S390_RESET_CPU_INIT 8
-#define KVM_S390_RESET_IPL 16
__u64 s390_reset_flags;
/* KVM_EXIT_S390_UCONTROL */
struct {
@@ -536,43 +499,6 @@ struct kvm_translation {
__u8 pad[5];
};
-/* for KVM_S390_MEM_OP */
-struct kvm_s390_mem_op {
- /* in */
- __u64 gaddr; /* the guest address */
- __u64 flags; /* flags */
- __u32 size; /* amount of bytes */
- __u32 op; /* type of operation */
- __u64 buf; /* buffer in userspace */
- union {
- struct {
- __u8 ar; /* the access register number */
- __u8 key; /* access key, ignored if flag unset */
- __u8 pad1[6]; /* ignored */
- __u64 old_addr; /* ignored if cmpxchg flag unset */
- };
- __u32 sida_offset; /* offset into the sida */
- __u8 reserved[32]; /* ignored */
- };
-};
-/* types for kvm_s390_mem_op->op */
-#define KVM_S390_MEMOP_LOGICAL_READ 0
-#define KVM_S390_MEMOP_LOGICAL_WRITE 1
-#define KVM_S390_MEMOP_SIDA_READ 2
-#define KVM_S390_MEMOP_SIDA_WRITE 3
-#define KVM_S390_MEMOP_ABSOLUTE_READ 4
-#define KVM_S390_MEMOP_ABSOLUTE_WRITE 5
-#define KVM_S390_MEMOP_ABSOLUTE_CMPXCHG 6
-
-/* flags for kvm_s390_mem_op->flags */
-#define KVM_S390_MEMOP_F_CHECK_ONLY (1ULL << 0)
-#define KVM_S390_MEMOP_F_INJECT_EXCEPTION (1ULL << 1)
-#define KVM_S390_MEMOP_F_SKEY_PROTECTION (1ULL << 2)
-
-/* flags specifying extension support via KVM_CAP_S390_MEM_OP_EXTENSION */
-#define KVM_S390_MEMOP_EXTENSION_CAP_BASE (1 << 0)
-#define KVM_S390_MEMOP_EXTENSION_CAP_CMPXCHG (1 << 1)
-
/* for KVM_INTERRUPT */
struct kvm_interrupt {
/* in */
@@ -637,124 +563,6 @@ struct kvm_mp_state {
__u32 mp_state;
};
-struct kvm_s390_psw {
- __u64 mask;
- __u64 addr;
-};
-
-/* valid values for type in kvm_s390_interrupt */
-#define KVM_S390_SIGP_STOP 0xfffe0000u
-#define KVM_S390_PROGRAM_INT 0xfffe0001u
-#define KVM_S390_SIGP_SET_PREFIX 0xfffe0002u
-#define KVM_S390_RESTART 0xfffe0003u
-#define KVM_S390_INT_PFAULT_INIT 0xfffe0004u
-#define KVM_S390_INT_PFAULT_DONE 0xfffe0005u
-#define KVM_S390_MCHK 0xfffe1000u
-#define KVM_S390_INT_CLOCK_COMP 0xffff1004u
-#define KVM_S390_INT_CPU_TIMER 0xffff1005u
-#define KVM_S390_INT_VIRTIO 0xffff2603u
-#define KVM_S390_INT_SERVICE 0xffff2401u
-#define KVM_S390_INT_EMERGENCY 0xffff1201u
-#define KVM_S390_INT_EXTERNAL_CALL 0xffff1202u
-/* Anything below 0xfffe0000u is taken by INT_IO */
-#define KVM_S390_INT_IO(ai,cssid,ssid,schid) \
- (((schid)) | \
- ((ssid) << 16) | \
- ((cssid) << 18) | \
- ((ai) << 26))
-#define KVM_S390_INT_IO_MIN 0x00000000u
-#define KVM_S390_INT_IO_MAX 0xfffdffffu
-#define KVM_S390_INT_IO_AI_MASK 0x04000000u
-
-
-struct kvm_s390_interrupt {
- __u32 type;
- __u32 parm;
- __u64 parm64;
-};
-
-struct kvm_s390_io_info {
- __u16 subchannel_id;
- __u16 subchannel_nr;
- __u32 io_int_parm;
- __u32 io_int_word;
-};
-
-struct kvm_s390_ext_info {
- __u32 ext_params;
- __u32 pad;
- __u64 ext_params2;
-};
-
-struct kvm_s390_pgm_info {
- __u64 trans_exc_code;
- __u64 mon_code;
- __u64 per_address;
- __u32 data_exc_code;
- __u16 code;
- __u16 mon_class_nr;
- __u8 per_code;
- __u8 per_atmid;
- __u8 exc_access_id;
- __u8 per_access_id;
- __u8 op_access_id;
-#define KVM_S390_PGM_FLAGS_ILC_VALID 0x01
-#define KVM_S390_PGM_FLAGS_ILC_0 0x02
-#define KVM_S390_PGM_FLAGS_ILC_1 0x04
-#define KVM_S390_PGM_FLAGS_ILC_MASK 0x06
-#define KVM_S390_PGM_FLAGS_NO_REWIND 0x08
- __u8 flags;
- __u8 pad[2];
-};
-
-struct kvm_s390_prefix_info {
- __u32 address;
-};
-
-struct kvm_s390_extcall_info {
- __u16 code;
-};
-
-struct kvm_s390_emerg_info {
- __u16 code;
-};
-
-#define KVM_S390_STOP_FLAG_STORE_STATUS 0x01
-struct kvm_s390_stop_info {
- __u32 flags;
-};
-
-struct kvm_s390_mchk_info {
- __u64 cr14;
- __u64 mcic;
- __u64 failing_storage_address;
- __u32 ext_damage_code;
- __u32 pad;
- __u8 fixed_logout[16];
-};
-
-struct kvm_s390_irq {
- __u64 type;
- union {
- struct kvm_s390_io_info io;
- struct kvm_s390_ext_info ext;
- struct kvm_s390_pgm_info pgm;
- struct kvm_s390_emerg_info emerg;
- struct kvm_s390_extcall_info extcall;
- struct kvm_s390_prefix_info prefix;
- struct kvm_s390_stop_info stop;
- struct kvm_s390_mchk_info mchk;
- char reserved[64];
- } u;
-};
-
-struct kvm_s390_irq_state {
- __u64 buf;
- __u32 flags; /* will stay unused for compatibility reasons */
- __u32 len;
- __u32 reserved[4]; /* will stay unused for compatibility reasons */
-};
-
/* for KVM_SET_GUEST_DEBUG */
#define KVM_GUESTDBG_ENABLE 0x00000001
@@ -810,50 +618,6 @@ struct kvm_enable_cap {
__u8 pad[64];
};
-/* for KVM_PPC_GET_PVINFO */
-
-#define KVM_PPC_PVINFO_FLAGS_EV_IDLE (1<<0)
-
-struct kvm_ppc_pvinfo {
- /* out */
- __u32 flags;
- __u32 hcall[4];
- __u8 pad[108];
-};
-
-/* for KVM_PPC_GET_SMMU_INFO */
-#define KVM_PPC_PAGE_SIZES_MAX_SZ 8
-
-struct kvm_ppc_one_page_size {
- __u32 page_shift; /* Page shift (or 0) */
- __u32 pte_enc; /* Encoding in the HPTE (>>12) */
-};
-
-struct kvm_ppc_one_seg_page_size {
- __u32 page_shift; /* Base page shift of segment (or 0) */
- __u32 slb_enc; /* SLB encoding for BookS */
- struct kvm_ppc_one_page_size enc[KVM_PPC_PAGE_SIZES_MAX_SZ];
-};
-
-#define KVM_PPC_PAGE_SIZES_REAL 0x00000001
-#define KVM_PPC_1T_SEGMENTS 0x00000002
-#define KVM_PPC_NO_HASH 0x00000004
-
-struct kvm_ppc_smmu_info {
- __u64 flags;
- __u32 slb_size;
- __u16 data_keys; /* # storage keys supported for data */
- __u16 instr_keys; /* # storage keys supported for instructions */
- struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ];
-};
-
-/* for KVM_PPC_RESIZE_HPT_{PREPARE,COMMIT} */
-struct kvm_ppc_resize_hpt {
- __u64 flags;
- __u32 shift;
- __u32 pad;
-};
-
#define KVMIO 0xAE
/* machine type bits, to be used as argument to KVM_CREATE_VM */
@@ -923,9 +687,7 @@ struct kvm_ppc_resize_hpt {
/* Bug in KVM_SET_USER_MEMORY_REGION fixed: */
#define KVM_CAP_DESTROY_MEMORY_REGION_WORKS 21
#define KVM_CAP_USER_NMI 22
-#ifdef __KVM_HAVE_GUEST_DEBUG
#define KVM_CAP_SET_GUEST_DEBUG 23
-#endif
#ifdef __KVM_HAVE_PIT
#define KVM_CAP_REINJECT_CONTROL 24
#endif
@@ -1156,8 +918,6 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_GUEST_MEMFD 234
#define KVM_CAP_VM_TYPES 235
-#ifdef KVM_CAP_IRQ_ROUTING
-
struct kvm_irq_routing_irqchip {
__u32 irqchip;
__u32 pin;
@@ -1222,42 +982,6 @@ struct kvm_irq_routing {
struct kvm_irq_routing_entry entries[];
};
-#endif
-
-#ifdef KVM_CAP_MCE
-/* x86 MCE */
-struct kvm_x86_mce {
- __u64 status;
- __u64 addr;
- __u64 misc;
- __u64 mcg_status;
- __u8 bank;
- __u8 pad1[7];
- __u64 pad2[3];
-};
-#endif
-
-#ifdef KVM_CAP_XEN_HVM
-#define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR (1 << 0)
-#define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1)
-#define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2)
-#define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 3)
-#define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 4)
-#define KVM_XEN_HVM_CONFIG_EVTCHN_SEND (1 << 5)
-#define KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG (1 << 6)
-#define KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE (1 << 7)
-
-struct kvm_xen_hvm_config {
- __u32 flags;
- __u32 msr;
- __u64 blob_addr_32;
- __u64 blob_addr_64;
- __u8 blob_size_32;
- __u8 blob_size_64;
- __u8 pad2[30];
-};
-#endif
-
#define KVM_IRQFD_FLAG_DEASSIGN (1 << 0)
/*
* Available with KVM_CAP_IRQFD_RESAMPLE
@@ -1442,11 +1166,6 @@ struct kvm_vfio_spapr_tce {
struct kvm_userspace_memory_region2)
/* enable ucontrol for s390 */
-struct kvm_s390_ucas_mapping {
- __u64 user_addr;
- __u64 vcpu_addr;
- __u64 length;
-};
#define KVM_S390_UCAS_MAP _IOW(KVMIO, 0x50, struct kvm_s390_ucas_mapping)
#define KVM_S390_UCAS_UNMAP _IOW(KVMIO, 0x51, struct kvm_s390_ucas_mapping)
#define KVM_S390_VCPU_FAULT _IOW(KVMIO, 0x52, unsigned long)
@@ -1641,89 +1360,6 @@ struct kvm_enc_region {
#define KVM_S390_NORMAL_RESET _IO(KVMIO, 0xc3)
#define KVM_S390_CLEAR_RESET _IO(KVMIO, 0xc4)
-struct kvm_s390_pv_sec_parm {
- __u64 origin;
- __u64 length;
-};
-
-struct kvm_s390_pv_unp {
- __u64 addr;
- __u64 size;
- __u64 tweak;
-};
-
-enum pv_cmd_dmp_id {
- KVM_PV_DUMP_INIT,
- KVM_PV_DUMP_CONFIG_STOR_STATE,
- KVM_PV_DUMP_COMPLETE,
- KVM_PV_DUMP_CPU,
-};
-
-struct kvm_s390_pv_dmp {
- __u64 subcmd;
- __u64 buff_addr;
- __u64 buff_len;
- __u64 gaddr; /* For dump storage state */
- __u64 reserved[4];
-};
-
-enum pv_cmd_info_id {
- KVM_PV_INFO_VM,
- KVM_PV_INFO_DUMP,
-};
-
-struct kvm_s390_pv_info_dump {
- __u64 dump_cpu_buffer_len;
- __u64 dump_config_mem_buffer_per_1m;
- __u64 dump_config_finalize_len;
-};
-
-struct kvm_s390_pv_info_vm {
- __u64 inst_calls_list[4];
- __u64 max_cpus;
- __u64 max_guests;
- __u64 max_guest_addr;
- __u64 feature_indication;
-};
-
-struct kvm_s390_pv_info_header {
- __u32 id;
- __u32 len_max;
- __u32 len_written;
- __u32 reserved;
-};
-
-struct kvm_s390_pv_info {
- struct kvm_s390_pv_info_header header;
- union {
- struct kvm_s390_pv_info_dump dump;
- struct kvm_s390_pv_info_vm vm;
- };
-};
-
-enum pv_cmd_id {
- KVM_PV_ENABLE,
- KVM_PV_DISABLE,
- KVM_PV_SET_SEC_PARMS,
- KVM_PV_UNPACK,
- KVM_PV_VERIFY,
- KVM_PV_PREP_RESET,
- KVM_PV_UNSHARE_ALL,
- KVM_PV_INFO,
- KVM_PV_DUMP,
- KVM_PV_ASYNC_CLEANUP_PREPARE,
- KVM_PV_ASYNC_CLEANUP_PERFORM,
-};
-
-struct kvm_pv_cmd {
- __u32 cmd; /* Command to be executed */
- __u16 rc; /* Ultravisor return code */
- __u16 rrc; /* Ultravisor return reason code */
- __u64 data; /* Data or address */
- __u32 flags; /* flags for future extensions. Must be 0 for now */
- __u32 reserved[3];
-};
-
/* Available with KVM_CAP_S390_PROTECTED */
#define KVM_S390_PV_COMMAND _IOWR(KVMIO, 0xc5, struct kvm_pv_cmd)
@@ -1737,58 +1373,6 @@ struct kvm_pv_cmd {
#define KVM_XEN_HVM_GET_ATTR _IOWR(KVMIO, 0xc8, struct kvm_xen_hvm_attr)
#define KVM_XEN_HVM_SET_ATTR _IOW(KVMIO, 0xc9, struct kvm_xen_hvm_attr)
-struct kvm_xen_hvm_attr {
- __u16 type;
- __u16 pad[3];
- union {
- __u8 long_mode;
- __u8 vector;
- __u8 runstate_update_flag;
- struct {
- __u64 gfn;
-#define KVM_XEN_INVALID_GFN ((__u64)-1)
- } shared_info;
- struct {
- __u32 send_port;
- __u32 type; /* EVTCHNSTAT_ipi / EVTCHNSTAT_interdomain */
- __u32 flags;
-#define KVM_XEN_EVTCHN_DEASSIGN (1 << 0)
-#define KVM_XEN_EVTCHN_UPDATE (1 << 1)
-#define KVM_XEN_EVTCHN_RESET (1 << 2)
- /*
- * Events sent by the guest are either looped back to
- * the guest itself (potentially on a different port#)
- * or signalled via an eventfd.
- */
- union {
- struct {
- __u32 port;
- __u32 vcpu;
- __u32 priority;
- } port;
- struct {
- __u32 port; /* Zero for eventfd */
- __s32 fd;
- } eventfd;
- __u32 padding[4];
- } deliver;
- } evtchn;
- __u32 xen_version;
- __u64 pad[8];
- } u;
-};
-
-
-/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */
-#define KVM_XEN_ATTR_TYPE_LONG_MODE 0x0
-#define KVM_XEN_ATTR_TYPE_SHARED_INFO 0x1
-#define KVM_XEN_ATTR_TYPE_UPCALL_VECTOR 0x2
-/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */
-#define KVM_XEN_ATTR_TYPE_EVTCHN 0x3
-#define KVM_XEN_ATTR_TYPE_XEN_VERSION 0x4
-/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG */
-#define KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG 0x5
-
/* Per-vCPU Xen attributes */
#define KVM_XEN_VCPU_GET_ATTR _IOWR(KVMIO, 0xca, struct kvm_xen_vcpu_attr)
#define KVM_XEN_VCPU_SET_ATTR _IOW(KVMIO, 0xcb, struct kvm_xen_vcpu_attr)
@@ -1799,242 +1383,6 @@ struct kvm_xen_hvm_attr {
#define KVM_GET_SREGS2 _IOR(KVMIO, 0xcc, struct kvm_sregs2)
#define KVM_SET_SREGS2 _IOW(KVMIO, 0xcd, struct kvm_sregs2)
-struct kvm_xen_vcpu_attr {
- __u16 type;
- __u16 pad[3];
- union {
- __u64 gpa;
-#define KVM_XEN_INVALID_GPA ((__u64)-1)
- __u64 pad[8];
- struct {
- __u64 state;
- __u64 state_entry_time;
- __u64 time_running;
- __u64 time_runnable;
- __u64 time_blocked;
- __u64 time_offline;
- } runstate;
- __u32 vcpu_id;
- struct {
- __u32 port;
- __u32 priority;
- __u64 expires_ns;
- } timer;
- __u8 vector;
- } u;
-};
-
-/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */
-#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO 0x0
-#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO 0x1
-#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR 0x2
-#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT 0x3
-#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA 0x4
-#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST 0x5
-/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */
-#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID 0x6
-#define KVM_XEN_VCPU_ATTR_TYPE_TIMER 0x7
-#define KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR 0x8
-
-/* Secure Encrypted Virtualization command */
-enum sev_cmd_id {
- /* Guest initialization commands */
- KVM_SEV_INIT = 0,
- KVM_SEV_ES_INIT,
- /* Guest launch commands */
- KVM_SEV_LAUNCH_START,
- KVM_SEV_LAUNCH_UPDATE_DATA,
- KVM_SEV_LAUNCH_UPDATE_VMSA,
- KVM_SEV_LAUNCH_SECRET,
- KVM_SEV_LAUNCH_MEASURE,
- KVM_SEV_LAUNCH_FINISH,
- /* Guest migration commands (outgoing) */
- KVM_SEV_SEND_START,
- KVM_SEV_SEND_UPDATE_DATA,
- KVM_SEV_SEND_UPDATE_VMSA,
- KVM_SEV_SEND_FINISH,
- /* Guest migration commands (incoming) */
- KVM_SEV_RECEIVE_START,
- KVM_SEV_RECEIVE_UPDATE_DATA,
- KVM_SEV_RECEIVE_UPDATE_VMSA,
- KVM_SEV_RECEIVE_FINISH,
- /* Guest status and debug commands */
- KVM_SEV_GUEST_STATUS,
- KVM_SEV_DBG_DECRYPT,
- KVM_SEV_DBG_ENCRYPT,
- /* Guest certificates commands */
- KVM_SEV_CERT_EXPORT,
- /* Attestation report */
- KVM_SEV_GET_ATTESTATION_REPORT,
- /* Guest Migration Extension */
- KVM_SEV_SEND_CANCEL,
-
- KVM_SEV_NR_MAX,
-};
-
-struct kvm_sev_cmd {
- __u32 id;
- __u64 data;
- __u32 error;
- __u32 sev_fd;
-};
-
-struct kvm_sev_launch_start {
- __u32 handle;
- __u32 policy;
- __u64 dh_uaddr;
- __u32 dh_len;
- __u64 session_uaddr;
- __u32 session_len;
-};
-
-struct kvm_sev_launch_update_data {
- __u64 uaddr;
- __u32 len;
-};
-
-
-struct kvm_sev_launch_secret {
- __u64 hdr_uaddr;
- __u32 hdr_len;
- __u64 guest_uaddr;
- __u32 guest_len;
- __u64 trans_uaddr;
- __u32 trans_len;
-};
-
-struct kvm_sev_launch_measure {
- __u64 uaddr;
- __u32 len;
-};
-
-struct kvm_sev_guest_status {
- __u32 handle;
- __u32 policy;
- __u32 state;
-};
-
-struct kvm_sev_dbg {
- __u64 src_uaddr;
- __u64 dst_uaddr;
- __u32 len;
-};
-
-struct kvm_sev_attestation_report {
- __u8 mnonce[16];
- __u64 uaddr;
- __u32 len;
-};
-
-struct kvm_sev_send_start {
- __u32 policy;
- __u64 pdh_cert_uaddr;
- __u32 pdh_cert_len;
- __u64 plat_certs_uaddr;
- __u32 plat_certs_len;
- __u64 amd_certs_uaddr;
- __u32 amd_certs_len;
- __u64 session_uaddr;
- __u32 session_len;
-};
-
-struct kvm_sev_send_update_data {
- __u64 hdr_uaddr;
- __u32 hdr_len;
- __u64 guest_uaddr;
- __u32 guest_len;
- __u64 trans_uaddr;
- __u32 trans_len;
-};
-
-struct kvm_sev_receive_start {
- __u32 handle;
- __u32 policy;
- __u64 pdh_uaddr;
- __u32 pdh_len;
- __u64 session_uaddr;
- __u32 session_len;
-};
-
-struct kvm_sev_receive_update_data {
- __u64 hdr_uaddr;
- __u32 hdr_len;
- __u64 guest_uaddr;
- __u32 guest_len;
- __u64 trans_uaddr;
- __u32 trans_len;
-};
-
-#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0)
-#define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1)
-#define KVM_DEV_ASSIGN_MASK_INTX (1 << 2)
-
-struct kvm_assigned_pci_dev {
- __u32 assigned_dev_id;
- __u32 busnr;
- __u32 devfn;
- __u32 flags;
- __u32 segnr;
- union {
- __u32 reserved[11];
- };
-};
-
-#define KVM_DEV_IRQ_HOST_INTX (1 << 0)
-#define KVM_DEV_IRQ_HOST_MSI (1 << 1)
-#define KVM_DEV_IRQ_HOST_MSIX (1 << 2)
-
-#define KVM_DEV_IRQ_GUEST_INTX (1 << 8)
-#define KVM_DEV_IRQ_GUEST_MSI (1 << 9)
-#define KVM_DEV_IRQ_GUEST_MSIX (1 << 10)
-
-#define KVM_DEV_IRQ_HOST_MASK 0x00ff
-#define KVM_DEV_IRQ_GUEST_MASK 0xff00
-
-struct kvm_assigned_irq {
- __u32 assigned_dev_id;
- __u32 host_irq; /* ignored (legacy field) */
- __u32 guest_irq;
- __u32 flags;
- union {
- __u32 reserved[12];
- };
-};
-
-struct kvm_assigned_msix_nr {
- __u32 assigned_dev_id;
- __u16 entry_nr;
- __u16 padding;
-};
-
-#define KVM_MAX_MSIX_PER_DEV 256
-struct kvm_assigned_msix_entry {
- __u32 assigned_dev_id;
- __u32 gsi;
- __u16 entry; /* The index of entry in the MSI-X table */
- __u16 padding[3];
-};
-
-#define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0)
-#define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1)
-
-/* Available with KVM_CAP_ARM_USER_IRQ */
-
-/* Bits for run->s.regs.device_irq_level */
-#define KVM_ARM_DEV_EL1_VTIMER (1 << 0)
-#define KVM_ARM_DEV_EL1_PTIMER (1 << 1)
-#define KVM_ARM_DEV_PMU (1 << 2)
-
-struct kvm_hyperv_eventfd {
- __u32 conn_id;
- __s32 fd;
- __u32 flags;
- __u32 padding[3];
-};
-
-#define KVM_HYPERV_CONN_ID_MASK 0x00ffffff
-#define KVM_HYPERV_EVENTFD_DEASSIGN (1 << 0)
-
#define KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE (1 << 0)
#define KVM_DIRTY_LOG_INITIALLY_SET (1 << 1)
@@ -2180,33 +1528,6 @@ struct kvm_stats_desc {
/* Available with KVM_CAP_S390_ZPCI_OP */
#define KVM_S390_ZPCI_OP _IOW(KVMIO, 0xd1, struct kvm_s390_zpci_op)
-struct kvm_s390_zpci_op {
- /* in */
- __u32 fh; /* target device */
- __u8 op; /* operation to perform */
- __u8 pad[3];
- union {
- /* for KVM_S390_ZPCIOP_REG_AEN */
- struct {
- __u64 ibv; /* Guest addr of interrupt bit vector */
- __u64 sb; /* Guest addr of summary bit */
- __u32 flags;
- __u32 noi; /* Number of interrupts */
- __u8 isc; /* Guest interrupt subclass */
- __u8 sbo; /* Offset of guest summary bit vector */
- __u16 pad;
- } reg_aen;
- __u64 reserved[8];
- } u;
-};
-
-/* types for kvm_s390_zpci_op->op */
-#define KVM_S390_ZPCIOP_REG_AEN 0
-#define KVM_S390_ZPCIOP_DEREG_AEN 1
-
-/* flags for kvm_s390_zpci_op->u.reg_aen.flags */
-#define KVM_S390_ZPCIOP_REGAEN_HOST (1 << 0)
-
/* Available with KVM_CAP_MEMORY_ATTRIBUTES */
#define KVM_SET_MEMORY_ATTRIBUTES _IOW(KVMIO, 0xd2, struct kvm_memory_attributes)
diff --git a/tools/include/uapi/linux/memfd.h b/tools/include/uapi/linux/memfd.h
new file mode 100644
index 00000000000000..01c0324e773316
--- /dev/null
+++ b/tools/include/uapi/linux/memfd.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _LINUX_MEMFD_H
+#define _LINUX_MEMFD_H
+
+#include <asm-generic/hugetlb_encode.h>
+
+/* flags for memfd_create(2) (unsigned int) */
+#define MFD_CLOEXEC 0x0001U
+#define MFD_ALLOW_SEALING 0x0002U
+#define MFD_HUGETLB 0x0004U
+/* not executable and sealed to prevent changing to executable. */
+#define MFD_NOEXEC_SEAL 0x0008U
+/* executable */
+#define MFD_EXEC 0x0010U
+
+/*
+ * Huge page size encoding when MFD_HUGETLB is specified, and a huge page
+ * size other than the default is desired. See hugetlb_encode.h.
+ * All known huge page size encodings are provided here. It is the
+ * responsibility of the application to know which sizes are supported on
+ * the running system. See mmap(2) man page for details.
+ */
+#define MFD_HUGE_SHIFT HUGETLB_FLAG_ENCODE_SHIFT
+#define MFD_HUGE_MASK HUGETLB_FLAG_ENCODE_MASK
+
+#define MFD_HUGE_64KB HUGETLB_FLAG_ENCODE_64KB
+#define MFD_HUGE_512KB HUGETLB_FLAG_ENCODE_512KB
+#define MFD_HUGE_1MB HUGETLB_FLAG_ENCODE_1MB
+#define MFD_HUGE_2MB HUGETLB_FLAG_ENCODE_2MB
+#define MFD_HUGE_8MB HUGETLB_FLAG_ENCODE_8MB
+#define MFD_HUGE_16MB HUGETLB_FLAG_ENCODE_16MB
+#define MFD_HUGE_32MB HUGETLB_FLAG_ENCODE_32MB
+#define MFD_HUGE_256MB HUGETLB_FLAG_ENCODE_256MB
+#define MFD_HUGE_512MB HUGETLB_FLAG_ENCODE_512MB
+#define MFD_HUGE_1GB HUGETLB_FLAG_ENCODE_1GB
+#define MFD_HUGE_2GB HUGETLB_FLAG_ENCODE_2GB
+#define MFD_HUGE_16GB HUGETLB_FLAG_ENCODE_16GB
+
+#endif /* _LINUX_MEMFD_H */
diff --git a/tools/include/uapi/linux/openat2.h b/tools/include/uapi/linux/openat2.h
deleted file mode 100644
index a5feb760494879..00000000000000
--- a/tools/include/uapi/linux/openat2.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef _UAPI_LINUX_OPENAT2_H
-#define _UAPI_LINUX_OPENAT2_H
-
-#include <linux/types.h>
-
-/*
- * Arguments for how openat2(2) should open the target path. If only @flags and
- * @mode are non-zero, then openat2(2) operates very similarly to openat(2).
- *
- * However, unlike openat(2), unknown or invalid bits in @flags result in
- * -EINVAL rather than being silently ignored. @mode must be zero unless one of
- * {O_CREAT, O_TMPFILE} are set.
- *
- * @flags: O_* flags.
- * @mode: O_CREAT/O_TMPFILE file mode.
- * @resolve: RESOLVE_* flags.
- */
-struct open_how {
- __u64 flags;
- __u64 mode;
- __u64 resolve;
-};
-
-/* how->resolve flags for openat2(2). */
-#define RESOLVE_NO_XDEV 0x01 /* Block mount-point crossings
- (includes bind-mounts). */
-#define RESOLVE_NO_MAGICLINKS 0x02 /* Block traversal through procfs-style
- "magic-links". */
-#define RESOLVE_NO_SYMLINKS 0x04 /* Block traversal through all symlinks
- (implies OEXT_NO_MAGICLINKS) */
-#define RESOLVE_BENEATH 0x08 /* Block "lexical" trickery like
- "..", symlinks, and absolute
- paths which escape the dirfd. */
-#define RESOLVE_IN_ROOT 0x10 /* Make all jumps to "/" and ".."
- be scoped inside the dirfd
- (similar to chroot(2)). */
-#define RESOLVE_CACHED 0x20 /* Only complete if resolution can be
- completed through cached lookup. May
- return -EAGAIN if that's not
- possible. */
-
-#endif /* _UAPI_LINUX_OPENAT2_H */
diff --git a/tools/include/uapi/linux/userfaultfd.h b/tools/include/uapi/linux/userfaultfd.h
new file mode 100644
index 00000000000000..4283de22d5b659
--- /dev/null
+++ b/tools/include/uapi/linux/userfaultfd.h
@@ -0,0 +1,386 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * include/linux/userfaultfd.h
+ *
+ * Copyright (C) 2007 Davide Libenzi <davidel@xmailserver.org>
+ * Copyright (C) 2015 Red Hat, Inc.
+ *
+ */
+
+#ifndef _LINUX_USERFAULTFD_H
+#define _LINUX_USERFAULTFD_H
+
+#include <linux/types.h>
+
+/* ioctls for /dev/userfaultfd */
+#define USERFAULTFD_IOC 0xAA
+#define USERFAULTFD_IOC_NEW _IO(USERFAULTFD_IOC, 0x00)
+
+/*
+ * If the UFFDIO_API is upgraded someday, the UFFDIO_UNREGISTER and
+ * UFFDIO_WAKE ioctls should be defined as _IOW and not as _IOR. In
+ * userfaultfd.h we assumed the kernel was reading (instead _IOC_READ
+ * means the userland is reading).
+ */
+#define UFFD_API ((__u64)0xAA)
+#define UFFD_API_REGISTER_MODES (UFFDIO_REGISTER_MODE_MISSING | \
+ UFFDIO_REGISTER_MODE_WP | \
+ UFFDIO_REGISTER_MODE_MINOR)
+#define UFFD_API_FEATURES (UFFD_FEATURE_PAGEFAULT_FLAG_WP | \
+ UFFD_FEATURE_EVENT_FORK | \
+ UFFD_FEATURE_EVENT_REMAP | \
+ UFFD_FEATURE_EVENT_REMOVE | \
+ UFFD_FEATURE_EVENT_UNMAP | \
+ UFFD_FEATURE_MISSING_HUGETLBFS | \
+ UFFD_FEATURE_MISSING_SHMEM | \
+ UFFD_FEATURE_SIGBUS | \
+ UFFD_FEATURE_THREAD_ID | \
+ UFFD_FEATURE_MINOR_HUGETLBFS | \
+ UFFD_FEATURE_MINOR_SHMEM | \
+ UFFD_FEATURE_EXACT_ADDRESS | \
+ UFFD_FEATURE_WP_HUGETLBFS_SHMEM | \
+ UFFD_FEATURE_WP_UNPOPULATED | \
+ UFFD_FEATURE_POISON | \
+ UFFD_FEATURE_WP_ASYNC | \
+ UFFD_FEATURE_MOVE)
+#define UFFD_API_IOCTLS \
+ ((__u64)1 << _UFFDIO_REGISTER | \
+ (__u64)1 << _UFFDIO_UNREGISTER | \
+ (__u64)1 << _UFFDIO_API)
+#define UFFD_API_RANGE_IOCTLS \
+ ((__u64)1 << _UFFDIO_WAKE | \
+ (__u64)1 << _UFFDIO_COPY | \
+ (__u64)1 << _UFFDIO_ZEROPAGE | \
+ (__u64)1 << _UFFDIO_MOVE | \
+ (__u64)1 << _UFFDIO_WRITEPROTECT | \
+ (__u64)1 << _UFFDIO_CONTINUE | \
+ (__u64)1 << _UFFDIO_POISON)
+#define UFFD_API_RANGE_IOCTLS_BASIC \
+ ((__u64)1 << _UFFDIO_WAKE | \
+ (__u64)1 << _UFFDIO_COPY | \
+ (__u64)1 << _UFFDIO_WRITEPROTECT | \
+ (__u64)1 << _UFFDIO_CONTINUE | \
+ (__u64)1 << _UFFDIO_POISON)
+
+/*
+ * Valid ioctl command number range with this API is from 0x00 to
+ * 0x3F. UFFDIO_API is the fixed number, everything else can be
+ * changed by implementing a different UFFD_API. If sticking to the
+ * same UFFD_API more ioctl can be added and userland will be aware of
+ * which ioctl the running kernel implements through the ioctl command
+ * bitmask written by the UFFDIO_API.
+ */
+#define _UFFDIO_REGISTER (0x00)
+#define _UFFDIO_UNREGISTER (0x01)
+#define _UFFDIO_WAKE (0x02)
+#define _UFFDIO_COPY (0x03)
+#define _UFFDIO_ZEROPAGE (0x04)
+#define _UFFDIO_MOVE (0x05)
+#define _UFFDIO_WRITEPROTECT (0x06)
+#define _UFFDIO_CONTINUE (0x07)
+#define _UFFDIO_POISON (0x08)
+#define _UFFDIO_API (0x3F)
+
+/* userfaultfd ioctl ids */
+#define UFFDIO 0xAA
+#define UFFDIO_API _IOWR(UFFDIO, _UFFDIO_API, \
+ struct uffdio_api)
+#define UFFDIO_REGISTER _IOWR(UFFDIO, _UFFDIO_REGISTER, \
+ struct uffdio_register)
+#define UFFDIO_UNREGISTER _IOR(UFFDIO, _UFFDIO_UNREGISTER, \
+ struct uffdio_range)
+#define UFFDIO_WAKE _IOR(UFFDIO, _UFFDIO_WAKE, \
+ struct uffdio_range)
+#define UFFDIO_COPY _IOWR(UFFDIO, _UFFDIO_COPY, \
+ struct uffdio_copy)
+#define UFFDIO_ZEROPAGE _IOWR(UFFDIO, _UFFDIO_ZEROPAGE, \
+ struct uffdio_zeropage)
+#define UFFDIO_MOVE _IOWR(UFFDIO, _UFFDIO_MOVE, \
+ struct uffdio_move)
+#define UFFDIO_WRITEPROTECT _IOWR(UFFDIO, _UFFDIO_WRITEPROTECT, \
+ struct uffdio_writeprotect)
+#define UFFDIO_CONTINUE _IOWR(UFFDIO, _UFFDIO_CONTINUE, \
+ struct uffdio_continue)
+#define UFFDIO_POISON _IOWR(UFFDIO, _UFFDIO_POISON, \
+ struct uffdio_poison)
+
+/* read() structure */
+struct uffd_msg {
+ __u8 event;
+
+ __u8 reserved1;
+ __u16 reserved2;
+ __u32 reserved3;
+
+ union {
+ struct {
+ __u64 flags;
+ __u64 address;
+ union {
+ __u32 ptid;
+ } feat;
+ } pagefault;
+
+ struct {
+ __u32 ufd;
+ } fork;
+
+ struct {
+ __u64 from;
+ __u64 to;
+ __u64 len;
+ } remap;
+
+ struct {
+ __u64 start;
+ __u64 end;
+ } remove;
+
+ struct {
+ /* unused reserved fields */
+ __u64 reserved1;
+ __u64 reserved2;
+ __u64 reserved3;
+ } reserved;
+ } arg;
+} __attribute__((packed));
+
+/*
+ * Start at 0x12 and not at 0 to be more strict against bugs.
+ */
+#define UFFD_EVENT_PAGEFAULT 0x12
+#define UFFD_EVENT_FORK 0x13
+#define UFFD_EVENT_REMAP 0x14
+#define UFFD_EVENT_REMOVE 0x15
+#define UFFD_EVENT_UNMAP 0x16
+
+/* flags for UFFD_EVENT_PAGEFAULT */
+#define UFFD_PAGEFAULT_FLAG_WRITE (1<<0) /* If this was a write fault */
+#define UFFD_PAGEFAULT_FLAG_WP (1<<1) /* If reason is VM_UFFD_WP */
+#define UFFD_PAGEFAULT_FLAG_MINOR (1<<2) /* If reason is VM_UFFD_MINOR */
+
+struct uffdio_api {
+ /* userland asks for an API number and the features to enable */
+ __u64 api;
+ /*
+ * Kernel answers below with the all available features for
+ * the API, this notifies userland of which events and/or
+ * which flags for each event are enabled in the current
+ * kernel.
+ *
+ * Note: UFFD_EVENT_PAGEFAULT and UFFD_PAGEFAULT_FLAG_WRITE
+ * are to be considered implicitly always enabled in all kernels as
+ * long as the uffdio_api.api requested matches UFFD_API.
+ *
+ * UFFD_FEATURE_MISSING_HUGETLBFS means an UFFDIO_REGISTER
+ * with UFFDIO_REGISTER_MODE_MISSING mode will succeed on
+ * hugetlbfs virtual memory ranges. Adding or not adding
+ * UFFD_FEATURE_MISSING_HUGETLBFS to uffdio_api.features has
+ * no real functional effect after UFFDIO_API returns, but
+ * it's only useful for an initial feature set probe at
+ * UFFDIO_API time. There are two ways to use it:
+ *
+ * 1) by adding UFFD_FEATURE_MISSING_HUGETLBFS to the
+ * uffdio_api.features before calling UFFDIO_API, an error
+ * will be returned by UFFDIO_API on a kernel without
+ * hugetlbfs missing support
+ *
+ * 2) the UFFD_FEATURE_MISSING_HUGETLBFS can not be added in
+ * uffdio_api.features and instead it will be set by the
+ * kernel in the uffdio_api.features if the kernel supports
+ * it, so userland can later check if the feature flag is
+ * present in uffdio_api.features after UFFDIO_API
+ * succeeded.
+ *
+ * UFFD_FEATURE_MISSING_SHMEM works the same as
+ * UFFD_FEATURE_MISSING_HUGETLBFS, but it applies to shmem
+ * (i.e. tmpfs and other shmem based APIs).
+ *
+ * UFFD_FEATURE_SIGBUS feature means no page-fault
+ * (UFFD_EVENT_PAGEFAULT) event will be delivered, instead
+ * a SIGBUS signal will be sent to the faulting process.
+ *
+ * UFFD_FEATURE_THREAD_ID pid of the page faulted task_struct will
+ * be returned, if feature is not requested 0 will be returned.
+ *
+ * UFFD_FEATURE_MINOR_HUGETLBFS indicates that minor faults
+ * can be intercepted (via REGISTER_MODE_MINOR) for
+ * hugetlbfs-backed pages.
+ *
+ * UFFD_FEATURE_MINOR_SHMEM indicates the same support as
+ * UFFD_FEATURE_MINOR_HUGETLBFS, but for shmem-backed pages instead.
+ *
+ * UFFD_FEATURE_EXACT_ADDRESS indicates that the exact address of page
+ * faults would be provided and the offset within the page would not be
+ * masked.
+ *
+ * UFFD_FEATURE_WP_HUGETLBFS_SHMEM indicates that userfaultfd
+ * write-protection mode is supported on both shmem and hugetlbfs.
+ *
+ * UFFD_FEATURE_WP_UNPOPULATED indicates that userfaultfd
+ * write-protection mode will always apply to unpopulated pages
+ * (i.e. empty ptes). This will be the default behavior for shmem
+ * & hugetlbfs, so this flag only affects anonymous memory behavior
+ * when userfault write-protection mode is registered.
+ *
+ * UFFD_FEATURE_WP_ASYNC indicates that userfaultfd write-protection
+ * asynchronous mode is supported in which the write fault is
+ * automatically resolved and write-protection is un-set.
+ * It implies UFFD_FEATURE_WP_UNPOPULATED.
+ *
+ * UFFD_FEATURE_MOVE indicates that the kernel supports moving an
+ * existing page contents from userspace.
+ */
+#define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0)
+#define UFFD_FEATURE_EVENT_FORK (1<<1)
+#define UFFD_FEATURE_EVENT_REMAP (1<<2)
+#define UFFD_FEATURE_EVENT_REMOVE (1<<3)
+#define UFFD_FEATURE_MISSING_HUGETLBFS (1<<4)
+#define UFFD_FEATURE_MISSING_SHMEM (1<<5)
+#define UFFD_FEATURE_EVENT_UNMAP (1<<6)
+#define UFFD_FEATURE_SIGBUS (1<<7)
+#define UFFD_FEATURE_THREAD_ID (1<<8)
+#define UFFD_FEATURE_MINOR_HUGETLBFS (1<<9)
+#define UFFD_FEATURE_MINOR_SHMEM (1<<10)
+#define UFFD_FEATURE_EXACT_ADDRESS (1<<11)
+#define UFFD_FEATURE_WP_HUGETLBFS_SHMEM (1<<12)
+#define UFFD_FEATURE_WP_UNPOPULATED (1<<13)
+#define UFFD_FEATURE_POISON (1<<14)
+#define UFFD_FEATURE_WP_ASYNC (1<<15)
+#define UFFD_FEATURE_MOVE (1<<16)
+ __u64 features;
+
+ __u64 ioctls;
+};
+
+struct uffdio_range {
+ __u64 start;
+ __u64 len;
+};
+
+struct uffdio_register {
+ struct uffdio_range range;
+#define UFFDIO_REGISTER_MODE_MISSING ((__u64)1<<0)
+#define UFFDIO_REGISTER_MODE_WP ((__u64)1<<1)
+#define UFFDIO_REGISTER_MODE_MINOR ((__u64)1<<2)
+ __u64 mode;
+
+ /*
+ * kernel answers which ioctl commands are available for the
+ * range, keep at the end as the last 8 bytes aren't read.
+ */
+ __u64 ioctls;
+};
+
+struct uffdio_copy {
+ __u64 dst;
+ __u64 src;
+ __u64 len;
+#define UFFDIO_COPY_MODE_DONTWAKE ((__u64)1<<0)
+ /*
+ * UFFDIO_COPY_MODE_WP will map the page write protected on
+ * the fly. UFFDIO_COPY_MODE_WP is available only if the
+ * write protected ioctl is implemented for the range
+ * according to the uffdio_register.ioctls.
+ */
+#define UFFDIO_COPY_MODE_WP ((__u64)1<<1)
+ __u64 mode;
+
+ /*
+ * "copy" is written by the ioctl and must be at the end: the
+ * copy_from_user will not read the last 8 bytes.
+ */
+ __s64 copy;
+};
+
+struct uffdio_zeropage {
+ struct uffdio_range range;
+#define UFFDIO_ZEROPAGE_MODE_DONTWAKE ((__u64)1<<0)
+ __u64 mode;
+
+ /*
+ * "zeropage" is written by the ioctl and must be at the end:
+ * the copy_from_user will not read the last 8 bytes.
+ */
+ __s64 zeropage;
+};
+
+struct uffdio_writeprotect {
+ struct uffdio_range range;
+/*
+ * UFFDIO_WRITEPROTECT_MODE_WP: set the flag to write protect a range,
+ * unset the flag to undo protection of a range which was previously
+ * write protected.
+ *
+ * UFFDIO_WRITEPROTECT_MODE_DONTWAKE: set the flag to avoid waking up
+ * any wait thread after the operation succeeds.
+ *
+ * NOTE: Write protecting a region (WP=1) is unrelated to page faults,
+ * therefore DONTWAKE flag is meaningless with WP=1. Removing write
+ * protection (WP=0) in response to a page fault wakes the faulting
+ * task unless DONTWAKE is set.
+ */
+#define UFFDIO_WRITEPROTECT_MODE_WP ((__u64)1<<0)
+#define UFFDIO_WRITEPROTECT_MODE_DONTWAKE ((__u64)1<<1)
+ __u64 mode;
+};
+
+struct uffdio_continue {
+ struct uffdio_range range;
+#define UFFDIO_CONTINUE_MODE_DONTWAKE ((__u64)1<<0)
+ /*
+ * UFFDIO_CONTINUE_MODE_WP will map the page write protected on
+ * the fly. UFFDIO_CONTINUE_MODE_WP is available only if the
+ * write protected ioctl is implemented for the range
+ * according to the uffdio_register.ioctls.
+ */
+#define UFFDIO_CONTINUE_MODE_WP ((__u64)1<<1)
+ __u64 mode;
+
+ /*
+ * Fields below here are written by the ioctl and must be at the end:
+ * the copy_from_user will not read past here.
+ */
+ __s64 mapped;
+};
+
+struct uffdio_poison {
+ struct uffdio_range range;
+#define UFFDIO_POISON_MODE_DONTWAKE ((__u64)1<<0)
+ __u64 mode;
+
+ /*
+ * Fields below here are written by the ioctl and must be at the end:
+ * the copy_from_user will not read past here.
+ */
+ __s64 updated;
+};
+
+struct uffdio_move {
+ __u64 dst;
+ __u64 src;
+ __u64 len;
+ /*
+ * Especially if used to atomically remove memory from the
+ * address space the wake on the dst range is not needed.
+ */
+#define UFFDIO_MOVE_MODE_DONTWAKE ((__u64)1<<0)
+#define UFFDIO_MOVE_MODE_ALLOW_SRC_HOLES ((__u64)1<<1)
+ __u64 mode;
+ /*
+ * "move" is written by the ioctl and must be at the end: the
+ * copy_from_user will not read the last 8 bytes.
+ */
+ __s64 move;
+};
+
+/*
+ * Flags for the userfaultfd(2) system call itself.
+ */
+
+/*
+ * Create a userfaultfd that can handle page faults only in user mode.
+ */
+#define UFFD_USER_MODE_ONLY 1
+
+#endif /* _LINUX_USERFAULTFD_H */
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index efab29b8935bd9..a2061fcd612d7f 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -498,7 +498,7 @@ struct bpf_struct_ops {
#define KSYMS_SEC ".ksyms"
#define STRUCT_OPS_SEC ".struct_ops"
#define STRUCT_OPS_LINK_SEC ".struct_ops.link"
-#define ARENA_SEC ".arena.1"
+#define ARENA_SEC ".addr_space.1"
enum libbpf_map_type {
LIBBPF_MAP_UNSPEC,
@@ -1650,6 +1650,10 @@ static int sys_memfd_create(const char *name, unsigned flags)
return syscall(__NR_memfd_create, name, flags);
}
+#ifndef MFD_CLOEXEC
+#define MFD_CLOEXEC 0x0001U
+#endif
+
static int create_placeholder_fd(void)
{
int fd;
@@ -5352,8 +5356,8 @@ retry:
goto err_out;
}
if (map->def.type == BPF_MAP_TYPE_ARENA) {
- map->mmaped = mmap((void *)map->map_extra, bpf_map_mmap_sz(map),
- PROT_READ | PROT_WRITE,
+ map->mmaped = mmap((void *)(long)map->map_extra,
+ bpf_map_mmap_sz(map), PROT_READ | PROT_WRITE,
map->map_extra ? MAP_SHARED | MAP_FIXED : MAP_SHARED,
map->fd, 0);
if (map->mmaped == MAP_FAILED) {
diff --git a/tools/lib/perf/cpumap.c b/tools/lib/perf/cpumap.c
index 4adcd7920d033d..cae799ad44e136 100644
--- a/tools/lib/perf/cpumap.c
+++ b/tools/lib/perf/cpumap.c
@@ -18,9 +18,13 @@ void perf_cpu_map__set_nr(struct perf_cpu_map *map, int nr_cpus)
struct perf_cpu_map *perf_cpu_map__alloc(int nr_cpus)
{
- RC_STRUCT(perf_cpu_map) *cpus = malloc(sizeof(*cpus) + sizeof(struct perf_cpu) * nr_cpus);
+ RC_STRUCT(perf_cpu_map) *cpus;
struct perf_cpu_map *result;
+ if (nr_cpus == 0)
+ return NULL;
+
+ cpus = malloc(sizeof(*cpus) + sizeof(struct perf_cpu) * nr_cpus);
if (ADD_RC_CHK(result, cpus)) {
cpus->nr = nr_cpus;
refcount_set(&cpus->refcnt, 1);
@@ -316,6 +320,19 @@ bool perf_cpu_map__has_any_cpu_or_is_empty(const struct perf_cpu_map *map)
return map ? __perf_cpu_map__cpu(map, 0).cpu == -1 : true;
}
+bool perf_cpu_map__is_any_cpu_or_is_empty(const struct perf_cpu_map *map)
+{
+ if (!map)
+ return true;
+
+ return __perf_cpu_map__nr(map) == 1 && __perf_cpu_map__cpu(map, 0).cpu == -1;
+}
+
+bool perf_cpu_map__is_empty(const struct perf_cpu_map *map)
+{
+ return map == NULL;
+}
+
int perf_cpu_map__idx(const struct perf_cpu_map *cpus, struct perf_cpu cpu)
{
int low, high;
@@ -372,6 +389,20 @@ bool perf_cpu_map__has_any_cpu(const struct perf_cpu_map *map)
return map && __perf_cpu_map__cpu(map, 0).cpu == -1;
}
+struct perf_cpu perf_cpu_map__min(const struct perf_cpu_map *map)
+{
+ struct perf_cpu cpu, result = {
+ .cpu = -1
+ };
+ int idx;
+
+ perf_cpu_map__for_each_cpu_skip_any(cpu, idx, map) {
+ result = cpu;
+ break;
+ }
+ return result;
+}
+
struct perf_cpu perf_cpu_map__max(const struct perf_cpu_map *map)
{
struct perf_cpu result = {
diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h
index 228c6c629b0ce1..90457d17fb2f51 100644
--- a/tools/lib/perf/include/perf/cpumap.h
+++ b/tools/lib/perf/include/perf/cpumap.h
@@ -61,6 +61,22 @@ LIBPERF_API int perf_cpu_map__nr(const struct perf_cpu_map *cpus);
* perf_cpu_map__has_any_cpu_or_is_empty - is map either empty or has the "any CPU"/dummy value.
*/
LIBPERF_API bool perf_cpu_map__has_any_cpu_or_is_empty(const struct perf_cpu_map *map);
+/**
+ * perf_cpu_map__is_any_cpu_or_is_empty - is map either empty or the "any CPU"/dummy value.
+ */
+LIBPERF_API bool perf_cpu_map__is_any_cpu_or_is_empty(const struct perf_cpu_map *map);
+/**
+ * perf_cpu_map__is_empty - does the map contain no values and it doesn't
+ * contain the special "any CPU"/dummy value.
+ */
+LIBPERF_API bool perf_cpu_map__is_empty(const struct perf_cpu_map *map);
+/**
+ * perf_cpu_map__min - the minimum CPU value or -1 if empty or just the "any CPU"/dummy value.
+ */
+LIBPERF_API struct perf_cpu perf_cpu_map__min(const struct perf_cpu_map *map);
+/**
+ * perf_cpu_map__max - the maximum CPU value or -1 if empty or just the "any CPU"/dummy value.
+ */
LIBPERF_API struct perf_cpu perf_cpu_map__max(const struct perf_cpu_map *map);
LIBPERF_API bool perf_cpu_map__has(const struct perf_cpu_map *map, struct perf_cpu cpu);
LIBPERF_API bool perf_cpu_map__equal(const struct perf_cpu_map *lhs,
diff --git a/tools/lib/perf/libperf.map b/tools/lib/perf/libperf.map
index 10b3f372264264..2aa79b69603268 100644
--- a/tools/lib/perf/libperf.map
+++ b/tools/lib/perf/libperf.map
@@ -10,6 +10,10 @@ LIBPERF_0.0.1 {
perf_cpu_map__nr;
perf_cpu_map__cpu;
perf_cpu_map__has_any_cpu_or_is_empty;
+ perf_cpu_map__is_any_cpu_or_is_empty;
+ perf_cpu_map__is_empty;
+ perf_cpu_map__has_any_cpu;
+ perf_cpu_map__min;
perf_cpu_map__max;
perf_cpu_map__has;
perf_thread_map__new_array;
diff --git a/tools/lib/perf/mmap.c b/tools/lib/perf/mmap.c
index 0c903c2372c978..c1a51d925e0e02 100644
--- a/tools/lib/perf/mmap.c
+++ b/tools/lib/perf/mmap.c
@@ -279,7 +279,7 @@ union perf_event *perf_mmap__read_event(struct perf_mmap *map)
if (!refcount_read(&map->refcnt))
return NULL;
- /* non-overwirte doesn't pause the ringbuffer */
+ /* non-overwrite doesn't pause the ringbuffer */
if (!map->overwrite)
map->end = perf_mmap__read_head(map);
diff --git a/tools/lib/rbtree.c b/tools/lib/rbtree.c
index 727396de6be54f..9e7307186b7f41 100644
--- a/tools/lib/rbtree.c
+++ b/tools/lib/rbtree.c
@@ -58,7 +58,7 @@
static inline void rb_set_black(struct rb_node *rb)
{
- rb->__rb_parent_color |= RB_BLACK;
+ rb->__rb_parent_color += RB_BLACK;
}
static inline struct rb_node *rb_red_parent(struct rb_node *red)
diff --git a/tools/lib/subcmd/run-command.c b/tools/lib/subcmd/run-command.c
index d435eb42354bfe..4e3a557a2f3741 100644
--- a/tools/lib/subcmd/run-command.c
+++ b/tools/lib/subcmd/run-command.c
@@ -165,43 +165,65 @@ int start_command(struct child_process *cmd)
return 0;
}
-static int wait_or_whine(pid_t pid)
+static int wait_or_whine(struct child_process *cmd, bool block)
{
- char sbuf[STRERR_BUFSIZE];
+ bool finished = cmd->finished;
+ int result = cmd->finish_result;
- for (;;) {
+ while (!finished) {
int status, code;
- pid_t waiting = waitpid(pid, &status, 0);
+ pid_t waiting = waitpid(cmd->pid, &status, block ? 0 : WNOHANG);
+
+ if (!block && waiting == 0)
+ break;
+
+ if (waiting < 0 && errno == EINTR)
+ continue;
+ finished = true;
if (waiting < 0) {
- if (errno == EINTR)
- continue;
+ char sbuf[STRERR_BUFSIZE];
+
fprintf(stderr, " Error: waitpid failed (%s)",
str_error_r(errno, sbuf, sizeof(sbuf)));
- return -ERR_RUN_COMMAND_WAITPID;
- }
- if (waiting != pid)
- return -ERR_RUN_COMMAND_WAITPID_WRONG_PID;
- if (WIFSIGNALED(status))
- return -ERR_RUN_COMMAND_WAITPID_SIGNAL;
-
- if (!WIFEXITED(status))
- return -ERR_RUN_COMMAND_WAITPID_NOEXIT;
- code = WEXITSTATUS(status);
- switch (code) {
- case 127:
- return -ERR_RUN_COMMAND_EXEC;
- case 0:
- return 0;
- default:
- return -code;
+ result = -ERR_RUN_COMMAND_WAITPID;
+ } else if (waiting != cmd->pid) {
+ result = -ERR_RUN_COMMAND_WAITPID_WRONG_PID;
+ } else if (WIFSIGNALED(status)) {
+ result = -ERR_RUN_COMMAND_WAITPID_SIGNAL;
+ } else if (!WIFEXITED(status)) {
+ result = -ERR_RUN_COMMAND_WAITPID_NOEXIT;
+ } else {
+ code = WEXITSTATUS(status);
+ switch (code) {
+ case 127:
+ result = -ERR_RUN_COMMAND_EXEC;
+ break;
+ case 0:
+ result = 0;
+ break;
+ default:
+ result = -code;
+ break;
+ }
}
}
+ if (finished) {
+ cmd->finished = 1;
+ cmd->finish_result = result;
+ }
+ return result;
+}
+
+int check_if_command_finished(struct child_process *cmd)
+{
+ wait_or_whine(cmd, /*block=*/false);
+ return cmd->finished;
}
int finish_command(struct child_process *cmd)
{
- return wait_or_whine(cmd->pid);
+ return wait_or_whine(cmd, /*block=*/true);
}
int run_command(struct child_process *cmd)
diff --git a/tools/lib/subcmd/run-command.h b/tools/lib/subcmd/run-command.h
index d794138a797f4a..b2d39de6e690b5 100644
--- a/tools/lib/subcmd/run-command.h
+++ b/tools/lib/subcmd/run-command.h
@@ -41,17 +41,20 @@ struct child_process {
int err;
const char *dir;
const char *const *env;
+ int finish_result;
unsigned no_stdin:1;
unsigned no_stdout:1;
unsigned no_stderr:1;
unsigned exec_cmd:1; /* if this is to be external sub-command */
unsigned stdout_to_stderr:1;
+ unsigned finished:1;
void (*preexec_cb)(void);
/* If set, call function in child rather than doing an exec. */
int (*no_exec_cmd)(struct child_process *process);
};
int start_command(struct child_process *);
+int check_if_command_finished(struct child_process *);
int finish_command(struct child_process *);
int run_command(struct child_process *);
diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py
index 5fa7957f6e0f56..25810e18b0a732 100644
--- a/tools/net/ynl/lib/ynl.py
+++ b/tools/net/ynl/lib/ynl.py
@@ -182,6 +182,7 @@ class NlMsg:
self.done = 1
extack_off = 20
elif self.nl_type == Netlink.NLMSG_DONE:
+ self.error = struct.unpack("i", self.raw[0:4])[0]
self.done = 1
extack_off = 4
diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py
index 6b7eb2d2aaf188..a451cbfbd781d9 100755
--- a/tools/net/ynl/ynl-gen-c.py
+++ b/tools/net/ynl/ynl-gen-c.py
@@ -228,8 +228,11 @@ class Type(SpecAttr):
presence = ''
for i in range(0, len(ref)):
presence = f"{var}->{'.'.join(ref[:i] + [''])}_present.{ref[i]}"
- if self.presence_type() == 'bit':
- code.append(presence + ' = 1;')
+ # Every layer below last is a nest, so we know it uses bit presence
+ # last layer is "self" and may be a complex type
+ if i == len(ref) - 1 and self.presence_type() != 'bit':
+ continue
+ code.append(presence + ' = 1;')
code += self._setter_lines(ri, member, presence)
func_name = f"{op_prefix(ri, direction, deref=deref)}_set_{'_'.join(ref)}"
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 0b10ad00866867..0a33d9195b7a91 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -585,7 +585,7 @@ static int add_dead_ends(struct objtool_file *file)
struct section *rsec;
struct reloc *reloc;
struct instruction *insn;
- unsigned long offset;
+ uint64_t offset;
/*
* Check for manually annotated dead ends.
diff --git a/tools/perf/Build b/tools/perf/Build
index aa762362283492..b0cb7ad8e6ac24 100644
--- a/tools/perf/Build
+++ b/tools/perf/Build
@@ -59,3 +59,17 @@ perf-y += ui/
perf-y += scripts/
gtk-y += ui/gtk/
+
+ifdef SHELLCHECK
+ SHELL_TESTS := $(wildcard *.sh)
+ TEST_LOGS := $(SHELL_TESTS:%=%.shellcheck_log)
+else
+ SHELL_TESTS :=
+ TEST_LOGS :=
+endif
+
+$(OUTPUT)%.shellcheck_log: %
+ $(call rule_mkdir)
+ $(Q)$(call echo-cmd,test)shellcheck -s bash -a -S warning "$<" > $@ || (cat $@ && rm $@ && false)
+
+perf-y += $(TEST_LOGS)
diff --git a/tools/perf/Documentation/perf-arm-spe.txt b/tools/perf/Documentation/perf-arm-spe.txt
index bf03222e9a68e4..0a3eda482307a3 100644
--- a/tools/perf/Documentation/perf-arm-spe.txt
+++ b/tools/perf/Documentation/perf-arm-spe.txt
@@ -116,6 +116,15 @@ Depending on CPU model, the kernel may need to be booted with page table isolati
(kpti=off). If KPTI needs to be disabled, this will fail with a console message "profiling buffer
inaccessible. Try passing 'kpti=off' on the kernel command line".
+For the full criteria that determine whether KPTI needs to be forced off or not, see function
+unmap_kernel_at_el0() in the kernel sources. Common cases where it's not required
+are on the CPUs in kpti_safe_list, or on Arm v8.5+ where FEAT_E0PD is mandatory.
+
+The SPE interrupt must also be described by the firmware. If the module is loaded and KPTI is
+disabled (or isn't required to be disabled) but the SPE PMU still doesn't show in
+/sys/bus/event_source/devices/, then it's possible that the SPE interrupt isn't described by
+ACPI or DT. In this case no warning will be printed by the driver.
+
Capturing SPE with perf command-line tools
------------------------------------------
@@ -199,7 +208,8 @@ Common errors
- "Cannot find PMU `arm_spe'. Missing kernel support?"
- Module not built or loaded, KPTI not disabled (see above), or running on a VM
+ Module not built or loaded, KPTI not disabled, interrupt not described by firmware,
+ or running on a VM. See 'Kernel Requirements' above.
- "Arm SPE CONTEXT packets not found in the traces."
diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt
index 3b12595193c9f4..6bf2468f59d31d 100644
--- a/tools/perf/Documentation/perf-list.txt
+++ b/tools/perf/Documentation/perf-list.txt
@@ -71,6 +71,7 @@ counted. The following modifiers exist:
D - pin the event to the PMU
W - group is weak and will fallback to non-group if not schedulable,
e - group or event are exclusive and do not share the PMU
+ b - use BPF aggregration (see perf stat --bpf-counters)
The 'p' modifier can be used for specifying how precise the instruction
address should be. The 'p' modifier can be specified multiple times:
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index d8b863e01fe082..d2b1593ef7006c 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -121,6 +121,9 @@ OPTIONS
- type: Data type of sample memory access.
- typeoff: Offset in the data type of sample memory access.
- symoff: Offset in the symbol.
+ - weight1: Average value of event specific weight (1st field of weight_struct).
+ - weight2: Average value of event specific weight (2nd field of weight_struct).
+ - weight3: Average value of event specific weight (3rd field of weight_struct).
By default, comm, dso and symbol keys are used.
(i.e. --sort comm,dso,symbol)
@@ -198,7 +201,11 @@ OPTIONS
--fields=::
Specify output field - multiple keys can be specified in CSV format.
Following fields are available:
- overhead, overhead_sys, overhead_us, overhead_children, sample and period.
+ overhead, overhead_sys, overhead_us, overhead_children, sample, period,
+ weight1, weight2, weight3, ins_lat, p_stage_cyc and retire_lat. The
+ last 3 names are alias for the corresponding weights. When the weight
+ fields are used, they will show the average value of the weight.
+
Also it can contain any sort key(s).
By default, every sort keys not specified in -F will be appended
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 005e51df855e7c..ff086ef05a0c50 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -132,9 +132,9 @@ OPTIONS
Comma separated list of fields to print. Options are:
comm, tid, pid, time, cpu, event, trace, ip, sym, dso, dsoff, addr, symoff,
srcline, period, iregs, uregs, brstack, brstacksym, flags, bpf-output,
- brstackinsn, brstackinsnlen, brstackoff, callindent, insn, disasm,
+ brstackinsn, brstackinsnlen, brstackdisasm, brstackoff, callindent, insn, disasm,
insnlen, synth, phys_addr, metric, misc, srccode, ipc, data_page_size,
- code_page_size, ins_lat, machine_pid, vcpu, cgroup, retire_lat.
+ code_page_size, ins_lat, machine_pid, vcpu, cgroup, retire_lat,
Field list can be prepended with the type, trace, sw or hw,
to indicate to which event type the field list applies.
@@ -257,6 +257,9 @@ OPTIONS
can’t know the next sequential instruction after an unconditional branch unless
you calculate that based on its length.
+ brstackdisasm acts like brstackinsn, but will print disassembled instructions if
+ perf is built with the capstone library.
+
The brstackoff field will print an offset into a specific dso/binary.
With the metric option perf script can compute metrics for
diff --git a/tools/perf/Documentation/perf-test.txt b/tools/perf/Documentation/perf-test.txt
index 951a2f2628727a..9acb8d1f658890 100644
--- a/tools/perf/Documentation/perf-test.txt
+++ b/tools/perf/Documentation/perf-test.txt
@@ -31,9 +31,20 @@ OPTIONS
--verbose::
Be more verbose.
+-S::
+--sequential::
+ Run tests one after the other, this is the default mode.
+
+-p::
+--parallel::
+ Run tests in parallel, speeds up the whole process but is not safe with
+ the current infrastructure, where some tests that compete for some resources,
+ for instance, 'perf probe' tests that add/remove probes or clean all probes, etc.
+
-F::
--dont-fork::
- Do not fork child for each test, run all tests within single process.
+ Do not fork child for each test, run all tests within single process, this
+ sets sequential mode.
--dso::
Specify a DSO for the "Symbols" test.
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 1fe8df97fe8840..7f1e016a92536c 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -182,6 +182,16 @@ endif
FEATURE_CHECK_CFLAGS-libzstd := $(LIBZSTD_CFLAGS)
FEATURE_CHECK_LDFLAGS-libzstd := $(LIBZSTD_LDFLAGS)
+# for linking with debug library, run like:
+# make DEBUG=1 LIBTRACEEVENT_DIR=/opt/libtraceevent/
+TRACEEVENTLIBS := -ltraceevent
+ifdef LIBTRACEEVENT_DIR
+ LIBTRACEEVENT_CFLAGS := -I$(LIBTRACEEVENT_DIR)/include
+ LIBTRACEEVENT_LDFLAGS := -L$(LIBTRACEEVENT_DIR)/lib
+endif
+FEATURE_CHECK_CFLAGS-libtraceevent := $(LIBTRACEEVENT_CFLAGS)
+FEATURE_CHECK_LDFLAGS-libtraceevent := $(LIBTRACEEVENT_LDFLAGS) $(TRACEEVENTLIBS)
+
FEATURE_CHECK_CFLAGS-bpf = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(SRCARCH)/include/uapi -I$(srctree)/tools/include/uapi
# include ARCH specific config
-include $(src-perf)/arch/$(SRCARCH)/Makefile
@@ -486,7 +496,10 @@ ifdef NO_DWARF
endif
ifeq ($(feature-scandirat), 1)
- CFLAGS += -DHAVE_SCANDIRAT_SUPPORT
+ # Ignore having scandirat with memory sanitizer that lacks an interceptor.
+ ifeq ($(filter s% -fsanitize=memory%,$(EXTRA_CFLAGS),),)
+ CFLAGS += -DHAVE_SCANDIRAT_SUPPORT
+ endif
endif
ifeq ($(feature-sched_getcpu), 1)
@@ -1165,9 +1178,10 @@ endif
ifneq ($(NO_LIBTRACEEVENT),1)
$(call feature_check,libtraceevent)
ifeq ($(feature-libtraceevent), 1)
- CFLAGS += -DHAVE_LIBTRACEEVENT
- EXTLIBS += -ltraceevent
- LIBTRACEEVENT_VERSION := $(shell $(PKG_CONFIG) --modversion libtraceevent)
+ CFLAGS += -DHAVE_LIBTRACEEVENT $(LIBTRACEEVENT_CFLAGS)
+ LDFLAGS += $(LIBTRACEEVENT_LDFLAGS)
+ EXTLIBS += ${TRACEEVENTLIBS}
+ LIBTRACEEVENT_VERSION := $(shell PKG_CONFIG_PATH=$(LIBTRACEEVENT_DIR) $(PKG_CONFIG) --modversion libtraceevent)
LIBTRACEEVENT_VERSION_1 := $(word 1, $(subst ., ,$(LIBTRACEEVENT_VERSION)))
LIBTRACEEVENT_VERSION_2 := $(word 2, $(subst ., ,$(LIBTRACEEVENT_VERSION)))
LIBTRACEEVENT_VERSION_3 := $(word 3, $(subst ., ,$(LIBTRACEEVENT_VERSION)))
@@ -1175,7 +1189,7 @@ ifneq ($(NO_LIBTRACEEVENT),1)
CFLAGS += -DLIBTRACEEVENT_VERSION=$(LIBTRACEEVENT_VERSION_CPP)
$(call detected,CONFIG_LIBTRACEEVENT)
else
- $(error ERROR: libtraceevent is missing. Please install libtraceevent-dev/libtraceevent-devel or build with NO_LIBTRACEEVENT=1)
+ $(error ERROR: libtraceevent is missing. Please install libtraceevent-dev/libtraceevent-devel and/or set LIBTRACEEVENT_DIR or build with NO_LIBTRACEEVENT=1)
endif
$(call feature_check,libtracefs)
@@ -1301,6 +1315,7 @@ ifeq ($(VF),1)
$(call print_var,LIBUNWIND_DIR)
$(call print_var,LIBDW_DIR)
$(call print_var,JDIR)
+ $(call print_var,LIBTRACEEVENT_DIR)
ifeq ($(dwarf-post-unwind),1)
$(call feature_print_text,"DWARF post unwind library", $(dwarf-post-unwind-text)) $(info $(MSG))
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 04d89d2ed209b4..5c35c0d8930696 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -458,35 +458,53 @@ SHELL = $(SHELL_PATH)
arm64_gen_sysreg_dir := $(srctree)/tools/arch/arm64/tools
ifneq ($(OUTPUT),)
- arm64_gen_sysreg_outdir := $(OUTPUT)
+ arm64_gen_sysreg_outdir := $(abspath $(OUTPUT))
else
arm64_gen_sysreg_outdir := $(CURDIR)
endif
arm64-sysreg-defs: FORCE
- $(Q)$(MAKE) -C $(arm64_gen_sysreg_dir) O=$(arm64_gen_sysreg_outdir)
+ $(Q)$(MAKE) -C $(arm64_gen_sysreg_dir) O=$(arm64_gen_sysreg_outdir) \
+ prefix= subdir=
arm64-sysreg-defs-clean:
$(call QUIET_CLEAN,arm64-sysreg-defs)
$(Q)$(MAKE) -C $(arm64_gen_sysreg_dir) O=$(arm64_gen_sysreg_outdir) \
- clean > /dev/null
+ prefix= subdir= clean > /dev/null
beauty_linux_dir := $(srctree)/tools/perf/trace/beauty/include/linux/
+beauty_uapi_linux_dir := $(srctree)/tools/perf/trace/beauty/include/uapi/linux/
+beauty_uapi_sound_dir := $(srctree)/tools/perf/trace/beauty/include/uapi/sound/
+beauty_arch_asm_dir := $(srctree)/tools/perf/trace/beauty/arch/x86/include/asm/
+beauty_x86_arch_asm_uapi_dir := $(srctree)/tools/perf/trace/beauty/arch/x86/include/uapi/asm/
+
linux_uapi_dir := $(srctree)/tools/include/uapi/linux
asm_generic_uapi_dir := $(srctree)/tools/include/uapi/asm-generic
arch_asm_uapi_dir := $(srctree)/tools/arch/$(SRCARCH)/include/uapi/asm/
-x86_arch_asm_uapi_dir := $(srctree)/tools/arch/x86/include/uapi/asm/
x86_arch_asm_dir := $(srctree)/tools/arch/x86/include/asm/
beauty_outdir := $(OUTPUT)trace/beauty/generated
beauty_ioctl_outdir := $(beauty_outdir)/ioctl
-drm_ioctl_array := $(beauty_ioctl_outdir)/drm_ioctl_array.c
-drm_hdr_dir := $(srctree)/tools/include/uapi/drm
-drm_ioctl_tbl := $(srctree)/tools/perf/trace/beauty/drm_ioctl.sh
# Create output directory if not already present
$(shell [ -d '$(beauty_ioctl_outdir)' ] || mkdir -p '$(beauty_ioctl_outdir)')
+fs_at_flags_array := $(beauty_outdir)/fs_at_flags_array.c
+fs_at_flags_tbl := $(srctree)/tools/perf/trace/beauty/fs_at_flags.sh
+
+$(fs_at_flags_array): $(beauty_uapi_linux_dir)/fcntl.h $(fs_at_flags_tbl)
+ $(Q)$(SHELL) '$(fs_at_flags_tbl)' $(beauty_uapi_linux_dir) > $@
+
+clone_flags_array := $(beauty_outdir)/clone_flags_array.c
+clone_flags_tbl := $(srctree)/tools/perf/trace/beauty/clone.sh
+
+$(clone_flags_array): $(beauty_uapi_linux_dir)/sched.h $(clone_flags_tbl)
+ $(Q)$(SHELL) '$(clone_flags_tbl)' $(beauty_uapi_linux_dir) > $@
+
+drm_ioctl_array := $(beauty_ioctl_outdir)/drm_ioctl_array.c
+drm_hdr_dir := $(srctree)/tools/include/uapi/drm
+drm_ioctl_tbl := $(srctree)/tools/perf/trace/beauty/drm_ioctl.sh
+
$(drm_ioctl_array): $(drm_hdr_dir)/drm.h $(drm_hdr_dir)/i915_drm.h $(drm_ioctl_tbl)
$(Q)$(SHELL) '$(drm_ioctl_tbl)' $(drm_hdr_dir) > $@
@@ -499,20 +517,20 @@ $(fadvise_advice_array): $(linux_uapi_dir)/in.h $(fadvise_advice_tbl)
fsmount_arrays := $(beauty_outdir)/fsmount_arrays.c
fsmount_tbls := $(srctree)/tools/perf/trace/beauty/fsmount.sh
-$(fsmount_arrays): $(linux_uapi_dir)/fs.h $(fsmount_tbls)
- $(Q)$(SHELL) '$(fsmount_tbls)' $(linux_uapi_dir) > $@
+$(fsmount_arrays): $(beauty_uapi_linux_dir)/mount.h $(fsmount_tbls)
+ $(Q)$(SHELL) '$(fsmount_tbls)' $(beauty_uapi_linux_dir) > $@
fspick_arrays := $(beauty_outdir)/fspick_arrays.c
fspick_tbls := $(srctree)/tools/perf/trace/beauty/fspick.sh
-$(fspick_arrays): $(linux_uapi_dir)/fs.h $(fspick_tbls)
- $(Q)$(SHELL) '$(fspick_tbls)' $(linux_uapi_dir) > $@
+$(fspick_arrays): $(beauty_uapi_linux_dir)/mount.h $(fspick_tbls)
+ $(Q)$(SHELL) '$(fspick_tbls)' $(beauty_uapi_linux_dir) > $@
fsconfig_arrays := $(beauty_outdir)/fsconfig_arrays.c
fsconfig_tbls := $(srctree)/tools/perf/trace/beauty/fsconfig.sh
-$(fsconfig_arrays): $(linux_uapi_dir)/fs.h $(fsconfig_tbls)
- $(Q)$(SHELL) '$(fsconfig_tbls)' $(linux_uapi_dir) > $@
+$(fsconfig_arrays): $(beauty_uapi_linux_dir)/mount.h $(fsconfig_tbls)
+ $(Q)$(SHELL) '$(fsconfig_tbls)' $(beauty_uapi_linux_dir) > $@
pkey_alloc_access_rights_array := $(beauty_outdir)/pkey_alloc_access_rights_array.c
asm_generic_hdr_dir := $(srctree)/tools/include/uapi/asm-generic/
@@ -525,15 +543,15 @@ sndrv_ctl_ioctl_array := $(beauty_ioctl_outdir)/sndrv_ctl_ioctl_array.c
sndrv_ctl_hdr_dir := $(srctree)/tools/include/uapi/sound
sndrv_ctl_ioctl_tbl := $(srctree)/tools/perf/trace/beauty/sndrv_ctl_ioctl.sh
-$(sndrv_ctl_ioctl_array): $(sndrv_ctl_hdr_dir)/asound.h $(sndrv_ctl_ioctl_tbl)
- $(Q)$(SHELL) '$(sndrv_ctl_ioctl_tbl)' $(sndrv_ctl_hdr_dir) > $@
+$(sndrv_ctl_ioctl_array): $(beauty_uapi_sound_dir)/asound.h $(sndrv_ctl_ioctl_tbl)
+ $(Q)$(SHELL) '$(sndrv_ctl_ioctl_tbl)' $(beauty_uapi_sound_dir) > $@
sndrv_pcm_ioctl_array := $(beauty_ioctl_outdir)/sndrv_pcm_ioctl_array.c
sndrv_pcm_hdr_dir := $(srctree)/tools/include/uapi/sound
sndrv_pcm_ioctl_tbl := $(srctree)/tools/perf/trace/beauty/sndrv_pcm_ioctl.sh
-$(sndrv_pcm_ioctl_array): $(sndrv_pcm_hdr_dir)/asound.h $(sndrv_pcm_ioctl_tbl)
- $(Q)$(SHELL) '$(sndrv_pcm_ioctl_tbl)' $(sndrv_pcm_hdr_dir) > $@
+$(sndrv_pcm_ioctl_array): $(beauty_uapi_sound_dir)/asound.h $(sndrv_pcm_ioctl_tbl)
+ $(Q)$(SHELL) '$(sndrv_pcm_ioctl_tbl)' $(beauty_uapi_sound_dir) > $@
kcmp_type_array := $(beauty_outdir)/kcmp_type_array.c
kcmp_hdr_dir := $(srctree)/tools/include/uapi/linux/
@@ -562,11 +580,10 @@ $(sockaddr_arrays): $(beauty_linux_dir)/socket.h $(sockaddr_tbl)
$(Q)$(SHELL) '$(sockaddr_tbl)' $(beauty_linux_dir) > $@
vhost_virtio_ioctl_array := $(beauty_ioctl_outdir)/vhost_virtio_ioctl_array.c
-vhost_virtio_hdr_dir := $(srctree)/tools/include/uapi/linux
vhost_virtio_ioctl_tbl := $(srctree)/tools/perf/trace/beauty/vhost_virtio_ioctl.sh
-$(vhost_virtio_ioctl_array): $(vhost_virtio_hdr_dir)/vhost.h $(vhost_virtio_ioctl_tbl)
- $(Q)$(SHELL) '$(vhost_virtio_ioctl_tbl)' $(vhost_virtio_hdr_dir) > $@
+$(vhost_virtio_ioctl_array): $(beauty_uapi_linux_dir)/vhost.h $(vhost_virtio_ioctl_tbl)
+ $(Q)$(SHELL) '$(vhost_virtio_ioctl_tbl)' $(beauty_uapi_linux_dir) > $@
perf_ioctl_array := $(beauty_ioctl_outdir)/perf_ioctl_array.c
perf_hdr_dir := $(srctree)/tools/include/uapi/linux
@@ -597,15 +614,14 @@ $(mremap_flags_array): $(linux_uapi_dir)/mman.h $(mremap_flags_tbl)
mount_flags_array := $(beauty_outdir)/mount_flags_array.c
mount_flags_tbl := $(srctree)/tools/perf/trace/beauty/mount_flags.sh
-$(mount_flags_array): $(linux_uapi_dir)/fs.h $(mount_flags_tbl)
- $(Q)$(SHELL) '$(mount_flags_tbl)' $(linux_uapi_dir) > $@
+$(mount_flags_array): $(beauty_uapi_linux_dir)/mount.h $(mount_flags_tbl)
+ $(Q)$(SHELL) '$(mount_flags_tbl)' $(beauty_uapi_linux_dir) > $@
move_mount_flags_array := $(beauty_outdir)/move_mount_flags_array.c
move_mount_flags_tbl := $(srctree)/tools/perf/trace/beauty/move_mount_flags.sh
-$(move_mount_flags_array): $(linux_uapi_dir)/fs.h $(move_mount_flags_tbl)
- $(Q)$(SHELL) '$(move_mount_flags_tbl)' $(linux_uapi_dir) > $@
-
+$(move_mount_flags_array): $(beauty_uapi_linux_dir)/mount.h $(move_mount_flags_tbl)
+ $(Q)$(SHELL) '$(move_mount_flags_tbl)' $(beauty_uapi_linux_dir) > $@
mmap_prot_array := $(beauty_outdir)/mmap_prot_array.c
mmap_prot_tbl := $(srctree)/tools/perf/trace/beauty/mmap_prot.sh
@@ -614,29 +630,28 @@ $(mmap_prot_array): $(asm_generic_uapi_dir)/mman.h $(asm_generic_uapi_dir)/mman-
$(Q)$(SHELL) '$(mmap_prot_tbl)' $(asm_generic_uapi_dir) $(arch_asm_uapi_dir) > $@
prctl_option_array := $(beauty_outdir)/prctl_option_array.c
-prctl_hdr_dir := $(srctree)/tools/include/uapi/linux/
prctl_option_tbl := $(srctree)/tools/perf/trace/beauty/prctl_option.sh
-$(prctl_option_array): $(prctl_hdr_dir)/prctl.h $(prctl_option_tbl)
- $(Q)$(SHELL) '$(prctl_option_tbl)' $(prctl_hdr_dir) > $@
+$(prctl_option_array): $(beauty_uapi_linux_dir)/prctl.h $(prctl_option_tbl)
+ $(Q)$(SHELL) '$(prctl_option_tbl)' $(beauty_uapi_linux_dir) > $@
usbdevfs_ioctl_array := $(beauty_ioctl_outdir)/usbdevfs_ioctl_array.c
usbdevfs_ioctl_tbl := $(srctree)/tools/perf/trace/beauty/usbdevfs_ioctl.sh
-$(usbdevfs_ioctl_array): $(linux_uapi_dir)/usbdevice_fs.h $(usbdevfs_ioctl_tbl)
- $(Q)$(SHELL) '$(usbdevfs_ioctl_tbl)' $(linux_uapi_dir) > $@
+$(usbdevfs_ioctl_array): $(beauty_uapi_linux_dir)/usbdevice_fs.h $(usbdevfs_ioctl_tbl)
+ $(Q)$(SHELL) '$(usbdevfs_ioctl_tbl)' $(beauty_uapi_linux_dir) > $@
x86_arch_prctl_code_array := $(beauty_outdir)/x86_arch_prctl_code_array.c
x86_arch_prctl_code_tbl := $(srctree)/tools/perf/trace/beauty/x86_arch_prctl.sh
-$(x86_arch_prctl_code_array): $(x86_arch_asm_uapi_dir)/prctl.h $(x86_arch_prctl_code_tbl)
- $(Q)$(SHELL) '$(x86_arch_prctl_code_tbl)' $(x86_arch_asm_uapi_dir) > $@
+$(x86_arch_prctl_code_array): $(beauty_x86_arch_asm_uapi_dir)/prctl.h $(x86_arch_prctl_code_tbl)
+ $(Q)$(SHELL) '$(x86_arch_prctl_code_tbl)' $(beauty_x86_arch_asm_uapi_dir) > $@
x86_arch_irq_vectors_array := $(beauty_outdir)/x86_arch_irq_vectors_array.c
x86_arch_irq_vectors_tbl := $(srctree)/tools/perf/trace/beauty/tracepoints/x86_irq_vectors.sh
-$(x86_arch_irq_vectors_array): $(x86_arch_asm_dir)/irq_vectors.h $(x86_arch_irq_vectors_tbl)
- $(Q)$(SHELL) '$(x86_arch_irq_vectors_tbl)' $(x86_arch_asm_dir) > $@
+$(x86_arch_irq_vectors_array): $(beauty_arch_asm_dir)/irq_vectors.h $(x86_arch_irq_vectors_tbl)
+ $(Q)$(SHELL) '$(x86_arch_irq_vectors_tbl)' $(beauty_arch_asm_dir) > $@
x86_arch_MSRs_array := $(beauty_outdir)/x86_arch_MSRs_array.c
x86_arch_MSRs_tbl := $(srctree)/tools/perf/trace/beauty/tracepoints/x86_msr.sh
@@ -647,8 +662,8 @@ $(x86_arch_MSRs_array): $(x86_arch_asm_dir)/msr-index.h $(x86_arch_MSRs_tbl)
rename_flags_array := $(beauty_outdir)/rename_flags_array.c
rename_flags_tbl := $(srctree)/tools/perf/trace/beauty/rename_flags.sh
-$(rename_flags_array): $(linux_uapi_dir)/fs.h $(rename_flags_tbl)
- $(Q)$(SHELL) '$(rename_flags_tbl)' $(linux_uapi_dir) > $@
+$(rename_flags_array): $(beauty_uapi_linux_dir)/fs.h $(rename_flags_tbl)
+ $(Q)$(SHELL) '$(rename_flags_tbl)' $(beauty_uapi_linux_dir) > $@
arch_errno_name_array := $(beauty_outdir)/arch_errno_name_array.c
arch_errno_hdr_dir := $(srctree)/tools
@@ -657,11 +672,17 @@ arch_errno_tbl := $(srctree)/tools/perf/trace/beauty/arch_errno_names.sh
$(arch_errno_name_array): $(arch_errno_tbl)
$(Q)$(SHELL) '$(arch_errno_tbl)' '$(patsubst -%,,$(CC))' $(arch_errno_hdr_dir) > $@
+statx_mask_array := $(beauty_outdir)/statx_mask_array.c
+statx_mask_tbl := $(srctree)/tools/perf/trace/beauty/statx_mask.sh
+
+$(statx_mask_array): $(beauty_uapi_linux_dir)/stat.h $(statx_mask_tbl)
+ $(Q)$(SHELL) '$(statx_mask_tbl)' $(beauty_uapi_linux_dir) > $@
+
sync_file_range_arrays := $(beauty_outdir)/sync_file_range_arrays.c
sync_file_range_tbls := $(srctree)/tools/perf/trace/beauty/sync_file_range.sh
-$(sync_file_range_arrays): $(linux_uapi_dir)/fs.h $(sync_file_range_tbls)
- $(Q)$(SHELL) '$(sync_file_range_tbls)' $(linux_uapi_dir) > $@
+$(sync_file_range_arrays): $(beauty_uapi_linux_dir)/fs.h $(sync_file_range_tbls)
+ $(Q)$(SHELL) '$(sync_file_range_tbls)' $(beauty_uapi_linux_dir) > $@
TESTS_CORESIGHT_DIR := $(srctree)/tools/perf/tests/shell/coresight
@@ -762,6 +783,8 @@ build-dir = $(or $(__build-dir),.)
prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders \
arm64-sysreg-defs \
+ $(fs_at_flags_array) \
+ $(clone_flags_array) \
$(drm_ioctl_array) \
$(fadvise_advice_array) \
$(fsconfig_arrays) \
@@ -789,6 +812,7 @@ prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders \
$(x86_arch_prctl_code_array) \
$(rename_flags_array) \
$(arch_errno_name_array) \
+ $(statx_mask_array) \
$(sync_file_range_arrays) \
$(LIBAPI) \
$(LIBPERF) \
diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c
index 77e6663c1703b8..07be32d99805c3 100644
--- a/tools/perf/arch/arm/util/cs-etm.c
+++ b/tools/perf/arch/arm/util/cs-etm.c
@@ -197,38 +197,37 @@ static int cs_etm_validate_timestamp(struct auxtrace_record *itr,
static int cs_etm_validate_config(struct auxtrace_record *itr,
struct evsel *evsel)
{
- int i, err = -EINVAL;
+ int idx, err = 0;
struct perf_cpu_map *event_cpus = evsel->evlist->core.user_requested_cpus;
- struct perf_cpu_map *online_cpus = perf_cpu_map__new_online_cpus();
-
- /* Set option of each CPU we have */
- for (i = 0; i < cpu__max_cpu().cpu; i++) {
- struct perf_cpu cpu = { .cpu = i, };
+ struct perf_cpu_map *intersect_cpus;
+ struct perf_cpu cpu;
- /*
- * In per-cpu case, do the validation for CPUs to work with.
- * In per-thread case, the CPU map is empty. Since the traced
- * program can run on any CPUs in this case, thus don't skip
- * validation.
- */
- if (!perf_cpu_map__has_any_cpu_or_is_empty(event_cpus) &&
- !perf_cpu_map__has(event_cpus, cpu))
- continue;
+ /*
+ * Set option of each CPU we have. In per-cpu case, do the validation
+ * for CPUs to work with. In per-thread case, the CPU map has the "any"
+ * CPU value. Since the traced program can run on any CPUs in this case,
+ * thus don't skip validation.
+ */
+ if (!perf_cpu_map__has_any_cpu(event_cpus)) {
+ struct perf_cpu_map *online_cpus = perf_cpu_map__new_online_cpus();
- if (!perf_cpu_map__has(online_cpus, cpu))
- continue;
+ intersect_cpus = perf_cpu_map__intersect(event_cpus, online_cpus);
+ perf_cpu_map__put(online_cpus);
+ } else {
+ intersect_cpus = perf_cpu_map__new_online_cpus();
+ }
- err = cs_etm_validate_context_id(itr, evsel, i);
+ perf_cpu_map__for_each_cpu_skip_any(cpu, idx, intersect_cpus) {
+ err = cs_etm_validate_context_id(itr, evsel, cpu.cpu);
if (err)
- goto out;
- err = cs_etm_validate_timestamp(itr, evsel, i);
+ break;
+
+ err = cs_etm_validate_timestamp(itr, evsel, cpu.cpu);
if (err)
- goto out;
+ break;
}
- err = 0;
-out:
- perf_cpu_map__put(online_cpus);
+ perf_cpu_map__put(intersect_cpus);
return err;
}
@@ -435,7 +434,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
* Also the case of per-cpu mmaps, need the contextID in order to be notified
* when a context switch happened.
*/
- if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) {
+ if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus)) {
evsel__set_config_if_unset(cs_etm_pmu, cs_etm_evsel,
"timestamp", 1);
evsel__set_config_if_unset(cs_etm_pmu, cs_etm_evsel,
@@ -461,7 +460,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
evsel->core.attr.sample_period = 1;
/* In per-cpu case, always need the time of mmap events etc */
- if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus))
+ if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus))
evsel__set_sample_bit(evsel, TIME);
err = cs_etm_validate_config(itr, cs_etm_evsel);
@@ -533,45 +532,31 @@ static size_t
cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused,
struct evlist *evlist __maybe_unused)
{
- int i;
+ int idx;
int etmv3 = 0, etmv4 = 0, ete = 0;
struct perf_cpu_map *event_cpus = evlist->core.user_requested_cpus;
- struct perf_cpu_map *online_cpus = perf_cpu_map__new_online_cpus();
-
- /* cpu map is not empty, we have specific CPUs to work with */
- if (!perf_cpu_map__has_any_cpu_or_is_empty(event_cpus)) {
- for (i = 0; i < cpu__max_cpu().cpu; i++) {
- struct perf_cpu cpu = { .cpu = i, };
+ struct perf_cpu_map *intersect_cpus;
+ struct perf_cpu cpu;
- if (!perf_cpu_map__has(event_cpus, cpu) ||
- !perf_cpu_map__has(online_cpus, cpu))
- continue;
+ if (!perf_cpu_map__has_any_cpu(event_cpus)) {
+ /* cpu map is not "any" CPU , we have specific CPUs to work with */
+ struct perf_cpu_map *online_cpus = perf_cpu_map__new_online_cpus();
- if (cs_etm_is_ete(itr, i))
- ete++;
- else if (cs_etm_is_etmv4(itr, i))
- etmv4++;
- else
- etmv3++;
- }
+ intersect_cpus = perf_cpu_map__intersect(event_cpus, online_cpus);
+ perf_cpu_map__put(online_cpus);
} else {
- /* get configuration for all CPUs in the system */
- for (i = 0; i < cpu__max_cpu().cpu; i++) {
- struct perf_cpu cpu = { .cpu = i, };
-
- if (!perf_cpu_map__has(online_cpus, cpu))
- continue;
-
- if (cs_etm_is_ete(itr, i))
- ete++;
- else if (cs_etm_is_etmv4(itr, i))
- etmv4++;
- else
- etmv3++;
- }
+ /* Event can be "any" CPU so count all online CPUs. */
+ intersect_cpus = perf_cpu_map__new_online_cpus();
}
-
- perf_cpu_map__put(online_cpus);
+ perf_cpu_map__for_each_cpu_skip_any(cpu, idx, intersect_cpus) {
+ if (cs_etm_is_ete(itr, cpu.cpu))
+ ete++;
+ else if (cs_etm_is_etmv4(itr, cpu.cpu))
+ etmv4++;
+ else
+ etmv3++;
+ }
+ perf_cpu_map__put(intersect_cpus);
return (CS_ETM_HEADER_SIZE +
(ete * CS_ETE_PRIV_SIZE) +
@@ -813,16 +798,15 @@ static int cs_etm_info_fill(struct auxtrace_record *itr,
if (!session->evlist->core.nr_mmaps)
return -EINVAL;
- /* If the cpu_map is empty all online CPUs are involved */
- if (perf_cpu_map__has_any_cpu_or_is_empty(event_cpus)) {
+ /* If the cpu_map has the "any" CPU all online CPUs are involved */
+ if (perf_cpu_map__has_any_cpu(event_cpus)) {
cpu_map = online_cpus;
} else {
/* Make sure all specified CPUs are online */
- for (i = 0; i < perf_cpu_map__nr(event_cpus); i++) {
- struct perf_cpu cpu = { .cpu = i, };
+ struct perf_cpu cpu;
- if (perf_cpu_map__has(event_cpus, cpu) &&
- !perf_cpu_map__has(online_cpus, cpu))
+ perf_cpu_map__for_each_cpu(cpu, i, event_cpus) {
+ if (!perf_cpu_map__has(online_cpus, cpu))
return -EINVAL;
}
diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c
index 51ccbfd3d246d4..0b52e67edb3b47 100644
--- a/tools/perf/arch/arm64/util/arm-spe.c
+++ b/tools/perf/arch/arm64/util/arm-spe.c
@@ -232,7 +232,7 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
* In the case of per-cpu mmaps, sample CPU for AUX event;
* also enable the timestamp tracing for samples correlation.
*/
- if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) {
+ if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus)) {
evsel__set_sample_bit(arm_spe_evsel, CPU);
evsel__set_config_if_unset(arm_spe_pmu, arm_spe_evsel,
"ts_enable", 1);
@@ -265,7 +265,7 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
tracking_evsel->core.attr.sample_period = 1;
/* In per-cpu case, always need the time of mmap events etc */
- if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) {
+ if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus)) {
evsel__set_sample_bit(tracking_evsel, TIME);
evsel__set_sample_bit(tracking_evsel, CPU);
diff --git a/tools/perf/arch/arm64/util/header.c b/tools/perf/arch/arm64/util/header.c
index 97037499152ef7..741df3614a09ac 100644
--- a/tools/perf/arch/arm64/util/header.c
+++ b/tools/perf/arch/arm64/util/header.c
@@ -4,8 +4,6 @@
#include <stdio.h>
#include <stdlib.h>
#include <perf/cpumap.h>
-#include <util/cpumap.h>
-#include <internal/cpumap.h>
#include <api/fs/fs.h>
#include <errno.h>
#include "debug.h"
@@ -19,20 +17,18 @@
static int _get_cpuid(char *buf, size_t sz, struct perf_cpu_map *cpus)
{
const char *sysfs = sysfs__mountpoint();
- int cpu;
- int ret = EINVAL;
+ struct perf_cpu cpu;
+ int idx, ret = EINVAL;
if (!sysfs || sz < MIDR_SIZE)
return EINVAL;
- cpus = perf_cpu_map__get(cpus);
-
- for (cpu = 0; cpu < perf_cpu_map__nr(cpus); cpu++) {
+ perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
char path[PATH_MAX];
FILE *file;
scnprintf(path, PATH_MAX, "%s/devices/system/cpu/cpu%d" MIDR,
- sysfs, RC_CHK_ACCESS(cpus)->map[cpu].cpu);
+ sysfs, cpu.cpu);
file = fopen(path, "r");
if (!file) {
@@ -51,7 +47,6 @@ static int _get_cpuid(char *buf, size_t sz, struct perf_cpu_map *cpus)
break;
}
- perf_cpu_map__put(cpus);
return ret;
}
diff --git a/tools/perf/arch/riscv/util/header.c b/tools/perf/arch/riscv/util/header.c
index 4a41856938a888..1b29030021eeba 100644
--- a/tools/perf/arch/riscv/util/header.c
+++ b/tools/perf/arch/riscv/util/header.c
@@ -41,7 +41,7 @@ static char *_get_cpuid(void)
char *mimpid = NULL;
char *cpuid = NULL;
int read;
- unsigned long line_sz;
+ size_t line_sz;
FILE *cpuinfo;
cpuinfo = fopen(CPUINFO, "r");
diff --git a/tools/perf/arch/x86/Build b/tools/perf/arch/x86/Build
index a7dd46a5b67850..ed37013b4289bb 100644
--- a/tools/perf/arch/x86/Build
+++ b/tools/perf/arch/x86/Build
@@ -1,2 +1,16 @@
perf-y += util/
perf-y += tests/
+
+ifdef SHELLCHECK
+ SHELL_TESTS := entry/syscalls/syscalltbl.sh
+ TEST_LOGS := $(SHELL_TESTS:%=%.shellcheck_log)
+else
+ SHELL_TESTS :=
+ TEST_LOGS :=
+endif
+
+$(OUTPUT)%.shellcheck_log: %
+ $(call rule_mkdir)
+ $(Q)$(call echo-cmd,test)shellcheck -a -S warning "$<" > $@ || (cat $@ && rm $@ && false)
+
+perf-y += $(TEST_LOGS)
diff --git a/tools/perf/arch/x86/tests/Build b/tools/perf/arch/x86/tests/Build
index b87f46e5feea37..c1e3b7d3955440 100644
--- a/tools/perf/arch/x86/tests/Build
+++ b/tools/perf/arch/x86/tests/Build
@@ -10,3 +10,17 @@ perf-$(CONFIG_AUXTRACE) += insn-x86.o
endif
perf-$(CONFIG_X86_64) += bp-modify.o
perf-y += amd-ibs-via-core-pmu.o
+
+ifdef SHELLCHECK
+ SHELL_TESTS := gen-insn-x86-dat.sh
+ TEST_LOGS := $(SHELL_TESTS:%=%.shellcheck_log)
+else
+ SHELL_TESTS :=
+ TEST_LOGS :=
+endif
+
+$(OUTPUT)%.shellcheck_log: %
+ $(call rule_mkdir)
+ $(Q)$(call echo-cmd,test)shellcheck -a -S warning "$<" > $@ || (cat $@ && rm $@ && false)
+
+perf-y += $(TEST_LOGS)
diff --git a/tools/perf/arch/x86/tests/gen-insn-x86-dat.sh b/tools/perf/arch/x86/tests/gen-insn-x86-dat.sh
index 0d0a003a9c5eac..89c46532cd5c9d 100755
--- a/tools/perf/arch/x86/tests/gen-insn-x86-dat.sh
+++ b/tools/perf/arch/x86/tests/gen-insn-x86-dat.sh
@@ -11,7 +11,7 @@ if [ "$(uname -m)" != "x86_64" ]; then
exit 1
fi
-cd $(dirname $0)
+cd "$(dirname $0)"
trap 'echo "Might need a more recent version of binutils"' EXIT
diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c
index af8ae4647585b4..34696f3d3d5dfc 100644
--- a/tools/perf/arch/x86/util/intel-bts.c
+++ b/tools/perf/arch/x86/util/intel-bts.c
@@ -143,7 +143,7 @@ static int intel_bts_recording_options(struct auxtrace_record *itr,
if (!opts->full_auxtrace)
return 0;
- if (opts->full_auxtrace && !perf_cpu_map__has_any_cpu_or_is_empty(cpus)) {
+ if (opts->full_auxtrace && !perf_cpu_map__is_any_cpu_or_is_empty(cpus)) {
pr_err(INTEL_BTS_PMU_NAME " does not support per-cpu recording\n");
return -EINVAL;
}
@@ -224,7 +224,7 @@ static int intel_bts_recording_options(struct auxtrace_record *itr,
* In the case of per-cpu mmaps, we need the CPU on the
* AUX event.
*/
- if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus))
+ if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus))
evsel__set_sample_bit(intel_bts_evsel, CPU);
}
diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c
index d199619df3abe1..6de7e2d2107562 100644
--- a/tools/perf/arch/x86/util/intel-pt.c
+++ b/tools/perf/arch/x86/util/intel-pt.c
@@ -369,7 +369,7 @@ static int intel_pt_info_fill(struct auxtrace_record *itr,
ui__warning("Intel Processor Trace: TSC not available\n");
}
- per_cpu_mmaps = !perf_cpu_map__has_any_cpu_or_is_empty(session->evlist->core.user_requested_cpus);
+ per_cpu_mmaps = !perf_cpu_map__is_any_cpu_or_is_empty(session->evlist->core.user_requested_cpus);
auxtrace_info->type = PERF_AUXTRACE_INTEL_PT;
auxtrace_info->priv[INTEL_PT_PMU_TYPE] = intel_pt_pmu->type;
@@ -774,7 +774,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
* Per-cpu recording needs sched_switch events to distinguish different
* threads.
*/
- if (have_timing_info && !perf_cpu_map__has_any_cpu_or_is_empty(cpus) &&
+ if (have_timing_info && !perf_cpu_map__is_any_cpu_or_is_empty(cpus) &&
!record_opts__no_switch_events(opts)) {
if (perf_can_record_switch_events()) {
bool cpu_wide = !target__none(&opts->target) &&
@@ -832,7 +832,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
* In the case of per-cpu mmaps, we need the CPU on the
* AUX event.
*/
- if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus))
+ if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus))
evsel__set_sample_bit(intel_pt_evsel, CPU);
}
@@ -858,7 +858,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
tracking_evsel->immediate = true;
/* In per-cpu case, always need the time of mmap events etc */
- if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) {
+ if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus)) {
evsel__set_sample_bit(tracking_evsel, TIME);
/* And the CPU for switch events */
evsel__set_sample_bit(tracking_evsel, CPU);
@@ -870,7 +870,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
* Warn the user when we do not have enough information to decode i.e.
* per-cpu with no sched_switch (except workload-only).
*/
- if (!ptr->have_sched_switch && !perf_cpu_map__has_any_cpu_or_is_empty(cpus) &&
+ if (!ptr->have_sched_switch && !perf_cpu_map__is_any_cpu_or_is_empty(cpus) &&
!target__none(&opts->target) &&
!intel_pt_evsel->core.attr.exclude_user)
ui__warning("Intel Processor Trace decoding will not be possible except for kernel tracing!\n");
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
index faa18e6d24671d..9f736423af53c3 100644
--- a/tools/perf/bench/bench.h
+++ b/tools/perf/bench/bench.h
@@ -46,6 +46,8 @@ int bench_breakpoint_enable(int argc, const char **argv);
int bench_uprobe_baseline(int argc, const char **argv);
int bench_uprobe_empty(int argc, const char **argv);
int bench_uprobe_trace_printk(int argc, const char **argv);
+int bench_uprobe_empty_ret(int argc, const char **argv);
+int bench_uprobe_trace_printk_ret(int argc, const char **argv);
int bench_pmu_scan(int argc, const char **argv);
#define BENCH_FORMAT_DEFAULT_STR "default"
diff --git a/tools/perf/bench/uprobe.c b/tools/perf/bench/uprobe.c
index 5c71fdc419dd7f..0b90275862e19f 100644
--- a/tools/perf/bench/uprobe.c
+++ b/tools/perf/bench/uprobe.c
@@ -26,9 +26,11 @@
static int loops = LOOPS_DEFAULT;
enum bench_uprobe {
- BENCH_UPROBE__BASELINE,
- BENCH_UPROBE__EMPTY,
- BENCH_UPROBE__TRACE_PRINTK,
+ BENCH_UPROBE__BASELINE,
+ BENCH_UPROBE__EMPTY,
+ BENCH_UPROBE__TRACE_PRINTK,
+ BENCH_UPROBE__EMPTY_RET,
+ BENCH_UPROBE__TRACE_PRINTK_RET,
};
static const struct option options[] = {
@@ -47,7 +49,7 @@ static const char * const bench_uprobe_usage[] = {
#define bench_uprobe__attach_uprobe(prog) \
skel->links.prog = bpf_program__attach_uprobe_opts(/*prog=*/skel->progs.prog, \
/*pid=*/-1, \
- /*binary_path=*/"/lib64/libc.so.6", \
+ /*binary_path=*/"libc.so.6", \
/*func_offset=*/0, \
/*opts=*/&uprobe_opts); \
if (!skel->links.prog) { \
@@ -81,6 +83,8 @@ static int bench_uprobe__setup_bpf_skel(enum bench_uprobe bench)
case BENCH_UPROBE__BASELINE: break;
case BENCH_UPROBE__EMPTY: bench_uprobe__attach_uprobe(empty); break;
case BENCH_UPROBE__TRACE_PRINTK: bench_uprobe__attach_uprobe(trace_printk); break;
+ case BENCH_UPROBE__EMPTY_RET: bench_uprobe__attach_uprobe(empty_ret); break;
+ case BENCH_UPROBE__TRACE_PRINTK_RET: bench_uprobe__attach_uprobe(trace_printk_ret); break;
default:
fprintf(stderr, "Invalid bench: %d\n", bench);
goto cleanup;
@@ -197,3 +201,13 @@ int bench_uprobe_trace_printk(int argc, const char **argv)
{
return bench_uprobe(argc, argv, BENCH_UPROBE__TRACE_PRINTK);
}
+
+int bench_uprobe_empty_ret(int argc, const char **argv)
+{
+ return bench_uprobe(argc, argv, BENCH_UPROBE__EMPTY_RET);
+}
+
+int bench_uprobe_trace_printk_ret(int argc, const char **argv)
+{
+ return bench_uprobe(argc, argv, BENCH_UPROBE__TRACE_PRINTK_RET);
+}
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 6c1cc797692d94..83812b9d53638c 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -37,11 +37,13 @@
#include "util/map_symbol.h"
#include "util/branch.h"
#include "util/util.h"
+#include "ui/progress.h"
#include <dlfcn.h>
#include <errno.h>
#include <linux/bitmap.h>
#include <linux/err.h>
+#include <inttypes.h>
struct perf_annotate {
struct perf_tool tool;
@@ -327,77 +329,6 @@ static int hist_entry__tty_annotate(struct hist_entry *he,
return symbol__tty_annotate2(&he->ms, evsel);
}
-static void print_annotated_data_header(struct hist_entry *he, struct evsel *evsel)
-{
- struct dso *dso = map__dso(he->ms.map);
- int nr_members = 1;
- int nr_samples = he->stat.nr_events;
-
- if (evsel__is_group_event(evsel)) {
- struct hist_entry *pair;
-
- list_for_each_entry(pair, &he->pairs.head, pairs.node)
- nr_samples += pair->stat.nr_events;
- }
-
- printf("Annotate type: '%s' in %s (%d samples):\n",
- he->mem_type->self.type_name, dso->name, nr_samples);
-
- if (evsel__is_group_event(evsel)) {
- struct evsel *pos;
- int i = 0;
-
- for_each_group_evsel(pos, evsel)
- printf(" event[%d] = %s\n", i++, pos->name);
-
- nr_members = evsel->core.nr_members;
- }
-
- printf("============================================================================\n");
- printf("%*s %10s %10s %s\n", 11 * nr_members, "samples", "offset", "size", "field");
-}
-
-static void print_annotated_data_type(struct annotated_data_type *mem_type,
- struct annotated_member *member,
- struct evsel *evsel, int indent)
-{
- struct annotated_member *child;
- struct type_hist *h = mem_type->histograms[evsel->core.idx];
- int i, nr_events = 1, samples = 0;
-
- for (i = 0; i < member->size; i++)
- samples += h->addr[member->offset + i].nr_samples;
- printf(" %10d", samples);
-
- if (evsel__is_group_event(evsel)) {
- struct evsel *pos;
-
- for_each_group_member(pos, evsel) {
- h = mem_type->histograms[pos->core.idx];
-
- samples = 0;
- for (i = 0; i < member->size; i++)
- samples += h->addr[member->offset + i].nr_samples;
- printf(" %10d", samples);
- }
- nr_events = evsel->core.nr_members;
- }
-
- printf(" %10d %10d %*s%s\t%s",
- member->offset, member->size, indent, "", member->type_name,
- member->var_name ?: "");
-
- if (!list_empty(&member->children))
- printf(" {\n");
-
- list_for_each_entry(child, &member->children, node)
- print_annotated_data_type(mem_type, child, evsel, indent + 4);
-
- if (!list_empty(&member->children))
- printf("%*s}", 11 * nr_events + 24 + indent, "");
- printf(";\n");
-}
-
static void print_annotate_data_stat(struct annotated_data_stat *s)
{
#define PRINT_STAT(fld) if (s->fld) printf("%10d : %s\n", s->fld, #fld)
@@ -430,6 +361,7 @@ static void print_annotate_data_stat(struct annotated_data_stat *s)
PRINT_STAT(no_typeinfo);
PRINT_STAT(invalid_size);
PRINT_STAT(bad_offset);
+ PRINT_STAT(insn_track);
printf("\n");
#undef PRINT_STAT
@@ -537,10 +469,32 @@ find_next:
goto find_next;
}
- print_annotated_data_header(he, evsel);
- print_annotated_data_type(he->mem_type, &he->mem_type->self, evsel, 0);
- printf("\n");
- goto find_next;
+ if (use_browser == 1)
+ key = hist_entry__annotate_data_tui(he, evsel, NULL);
+ else
+ key = hist_entry__annotate_data_tty(he, evsel);
+
+ switch (key) {
+ case -1:
+ if (!ann->skip_missing)
+ return;
+ /* fall through */
+ case K_RIGHT:
+ case '>':
+ next = rb_next(nd);
+ break;
+ case K_LEFT:
+ case '<':
+ next = rb_prev(nd);
+ break;
+ default:
+ return;
+ }
+
+ if (use_browser == 0 || next != NULL)
+ nd = next;
+
+ continue;
}
if (use_browser == 2) {
@@ -632,13 +586,23 @@ static int __cmd_annotate(struct perf_annotate *ann)
evlist__for_each_entry(session->evlist, pos) {
struct hists *hists = evsel__hists(pos);
u32 nr_samples = hists->stats.nr_samples;
+ struct ui_progress prog;
if (nr_samples > 0) {
total_nr_samples += nr_samples;
- hists__collapse_resort(hists, NULL);
+
+ ui_progress__init(&prog, nr_samples,
+ "Merging related events...");
+ hists__collapse_resort(hists, &prog);
+ ui_progress__finish();
+
/* Don't sort callchain */
evsel__reset_sample_bit(pos, CALLCHAIN);
- evsel__output_resort(pos, NULL);
+
+ ui_progress__init(&prog, nr_samples,
+ "Sorting events for output...");
+ evsel__output_resort(pos, &prog);
+ ui_progress__finish();
/*
* An event group needs to display other events too.
@@ -809,8 +773,6 @@ int cmd_annotate(int argc, const char **argv)
"Enable symbol demangling"),
OPT_BOOLEAN(0, "demangle-kernel", &symbol_conf.demangle_kernel,
"Enable kernel symbol demangling"),
- OPT_BOOLEAN(0, "group", &symbol_conf.event_group,
- "Show event group information together"),
OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period,
"Show a column with the sum of periods"),
OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples,
@@ -935,9 +897,7 @@ int cmd_annotate(int argc, const char **argv)
use_browser = 2;
#endif
- /* FIXME: only support stdio for now */
if (annotate.data_type) {
- use_browser = 0;
annotate_opts.annotate_src = false;
symbol_conf.annotate_data_member = true;
symbol_conf.annotate_data_sample = true;
diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c
index 1a8898d5b560f3..2c1a9f3d847a21 100644
--- a/tools/perf/builtin-bench.c
+++ b/tools/perf/builtin-bench.c
@@ -109,6 +109,8 @@ static struct bench uprobe_benchmarks[] = {
{ "baseline", "Baseline libc usleep(1000) call", bench_uprobe_baseline, },
{ "empty", "Attach empty BPF prog to uprobe on usleep, system wide", bench_uprobe_empty, },
{ "trace_printk", "Attach trace_printk BPF prog to uprobe on usleep syswide", bench_uprobe_trace_printk, },
+ { "empty_ret", "Attach empty BPF prog to uretprobe on usleep, system wide", bench_uprobe_empty_ret, },
+ { "trace_printk_ret", "Attach trace_printk BPF prog to uretprobe on usleep syswide", bench_uprobe_trace_printk_ret,},
{ NULL, NULL, NULL },
};
diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 16b40f5d43db06..1f1d17df9b9ac5 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -2050,7 +2050,7 @@ static int hpp_list__parse(struct perf_hpp_list *hpp_list,
perf_hpp__setup_output_field(hpp_list);
/*
- * We dont need other sorting keys other than those
+ * We don't need other sorting keys other than those
* we already specified. It also really slows down
* the processing a lot with big number of output
* fields, so switching this off for c2c.
@@ -2319,11 +2319,7 @@ static int setup_nodes(struct perf_session *session)
nodes[node] = set;
- /* empty node, skip */
- if (perf_cpu_map__has_any_cpu_or_is_empty(map))
- continue;
-
- perf_cpu_map__for_each_cpu(cpu, idx, map) {
+ perf_cpu_map__for_each_cpu_skip_any(cpu, idx, map) {
__set_bit(cpu.cpu, set);
if (WARN_ONCE(cpu2node[cpu.cpu] != -1, "node/cpu topology bug"))
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index eb3ef5c24b6625..ce5e28eaad9041 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -1187,23 +1187,28 @@ static int synthesize_build_id(struct perf_inject *inject, struct dso *dso, pid_
process_build_id, machine);
}
+static int guest_session__add_build_ids_cb(struct dso *dso, void *data)
+{
+ struct guest_session *gs = data;
+ struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session);
+
+ if (!dso->has_build_id)
+ return 0;
+
+ return synthesize_build_id(inject, dso, gs->machine_pid);
+
+}
+
static int guest_session__add_build_ids(struct guest_session *gs)
{
struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session);
- struct machine *machine = &gs->session->machines.host;
- struct dso *dso;
- int ret;
/* Build IDs will be put in the Build ID feature section */
perf_header__set_feat(&inject->session->header, HEADER_BUILD_ID);
- dsos__for_each_with_build_id(dso, &machine->dsos.head) {
- ret = synthesize_build_id(inject, dso, gs->machine_pid);
- if (ret)
- return ret;
- }
-
- return 0;
+ return dsos__for_each_dso(&gs->session->machines.host.dsos,
+ guest_session__add_build_ids_cb,
+ gs);
}
static int guest_session__ksymbol_event(struct perf_tool *tool,
@@ -2122,7 +2127,7 @@ static int __cmd_inject(struct perf_inject *inject)
*/
if (perf_header__has_feat(&session->header, HEADER_BUILD_ID) &&
inject->have_auxtrace && !inject->itrace_synth_opts.set)
- dsos__hit_all(session);
+ perf_session__dsos_hit_all(session);
/*
* The AUX areas have been removed and replaced with
* synthesized hardware events, so clear the feature flag.
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 9714327fd0eadd..6fd95be5032b0e 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -1408,7 +1408,7 @@ static int __cmd_kmem(struct perf_session *session)
}
evlist__for_each_entry(session->evlist, evsel) {
- if (!strcmp(evsel__name(evsel), "kmem:mm_page_alloc") &&
+ if (evsel__name_is(evsel, "kmem:mm_page_alloc") &&
evsel__field(evsel, "pfn")) {
use_pfn = true;
break;
diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c
index 02bf608d585ee6..5cab31231551da 100644
--- a/tools/perf/builtin-list.c
+++ b/tools/perf/builtin-list.c
@@ -22,6 +22,7 @@
#include <subcmd/pager.h>
#include <subcmd/parse-options.h>
#include <linux/zalloc.h>
+#include <ctype.h>
#include <stdarg.h>
#include <stdio.h>
@@ -76,26 +77,38 @@ static void default_print_start(void *ps)
static void default_print_end(void *print_state __maybe_unused) {}
+static const char *skip_spaces_or_commas(const char *str)
+{
+ while (isspace(*str) || *str == ',')
+ ++str;
+ return str;
+}
+
static void wordwrap(FILE *fp, const char *s, int start, int max, int corr)
{
int column = start;
int n;
bool saw_newline = false;
+ bool comma = false;
while (*s) {
- int wlen = strcspn(s, " \t\n");
+ int wlen = strcspn(s, " ,\t\n");
+ const char *sep = comma ? "," : " ";
if ((column + wlen >= max && column > start) || saw_newline) {
- fprintf(fp, "\n%*s", start, "");
+ fprintf(fp, comma ? ",\n%*s" : "\n%*s", start, "");
column = start + corr;
}
- n = fprintf(fp, "%s%.*s", column > start ? " " : "", wlen, s);
+ if (column <= start)
+ sep = "";
+ n = fprintf(fp, "%s%.*s", sep, wlen, s);
if (n <= 0)
break;
saw_newline = s[wlen] == '\n';
s += wlen;
+ comma = s[0] == ',';
column += n;
- s = skip_spaces(s);
+ s = skip_spaces_or_commas(s);
}
}
@@ -313,6 +326,9 @@ static void fix_escape_fprintf(FILE *fp, struct strbuf *buf, const char *fmt, ..
case '\n':
strbuf_addstr(buf, "\\n");
break;
+ case '\r':
+ strbuf_addstr(buf, "\\r");
+ break;
case '\\':
fallthrough;
case '\"':
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index ff7e1d6cfcd2e7..66a3de8ac66186 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -332,7 +332,7 @@ static int record__aio_complete(struct mmap *md, struct aiocb *cblock)
} else {
/*
* aio write request may require restart with the
- * reminder if the kernel didn't write whole
+ * remainder if the kernel didn't write whole
* chunk at once.
*/
rem_off = cblock->aio_offset + written;
@@ -400,7 +400,7 @@ static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size
*
* Coping can be done in two steps in case the chunk of profiling data
* crosses the upper bound of the kernel buffer. In this case we first move
- * part of data from map->start till the upper bound and then the reminder
+ * part of data from map->start till the upper bound and then the remainder
* from the beginning of the kernel buffer till the end of the data chunk.
*/
@@ -1355,8 +1355,6 @@ static int record__open(struct record *rec)
struct record_opts *opts = &rec->opts;
int rc = 0;
- evlist__config(evlist, opts, &callchain_param);
-
evlist__for_each_entry(evlist, pos) {
try_again:
if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) {
@@ -1790,7 +1788,7 @@ record__finish_output(struct record *rec)
process_buildids(rec);
if (rec->buildid_all)
- dsos__hit_all(rec->session);
+ perf_session__dsos_hit_all(rec->session);
}
perf_session__write_header(rec->session, rec->evlist, fd, true);
@@ -2483,6 +2481,8 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
evlist__uniquify_name(rec->evlist);
+ evlist__config(rec->evlist, opts, &callchain_param);
+
/* Debug message used by test scripts */
pr_debug3("perf record opening and mmapping events\n");
if (record__open(rec) != 0) {
@@ -2881,10 +2881,10 @@ out_delete_session:
}
#endif
zstd_fini(&session->zstd_data);
- perf_session__delete(session);
-
if (!opts->no_bpf_event)
evlist__stop_sb_thread(rec->sb_evlist);
+
+ perf_session__delete(session);
return status;
}
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index dcd93ee5fc24e3..dafba6e030ef70 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -172,7 +172,7 @@ static int hist_iter__report_callback(struct hist_entry_iter *iter,
struct mem_info *mi;
struct branch_info *bi;
- if (!ui__has_annotation() && !rep->symbol_ipc && !rep->data_type)
+ if (!ui__has_annotation() && !rep->symbol_ipc)
return 0;
if (sort__mode == SORT_MODE__BRANCH) {
@@ -1694,6 +1694,11 @@ repeat:
else
use_browser = 0;
+ if (report.data_type && use_browser == 1) {
+ symbol_conf.annotate_data_member = true;
+ symbol_conf.annotate_data_sample = true;
+ }
+
if (sort_order && strstr(sort_order, "ipc")) {
parse_options_usage(report_usage, options, "s", 1);
goto error;
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index b248c433529a8f..0fce7d8986c074 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -2148,7 +2148,7 @@ static bool is_idle_sample(struct perf_sample *sample,
struct evsel *evsel)
{
/* pid 0 == swapper == idle task */
- if (strcmp(evsel__name(evsel), "sched:sched_switch") == 0)
+ if (evsel__name_is(evsel, "sched:sched_switch"))
return evsel__intval(evsel, sample, "prev_pid") == 0;
return sample->pid == 0;
@@ -2375,7 +2375,7 @@ static bool timehist_skip_sample(struct perf_sched *sched,
}
if (sched->idle_hist) {
- if (strcmp(evsel__name(evsel), "sched:sched_switch"))
+ if (!evsel__name_is(evsel, "sched:sched_switch"))
rc = true;
else if (evsel__intval(evsel, sample, "prev_pid") != 0 &&
evsel__intval(evsel, sample, "next_pid") != 0)
@@ -2963,8 +2963,11 @@ static int timehist_check_attr(struct perf_sched *sched,
return -1;
}
- if (sched->show_callchain && !evsel__has_callchain(evsel)) {
- pr_info("Samples do not have callchains.\n");
+ /* only need to save callchain related to sched_switch event */
+ if (sched->show_callchain &&
+ evsel__name_is(evsel, "sched:sched_switch") &&
+ !evsel__has_callchain(evsel)) {
+ pr_info("Samples of sched_switch event do not have callchains.\n");
sched->show_callchain = 0;
symbol_conf.use_callchain = 0;
}
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 37088cc0ff1b5f..647cb31a47c8f1 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -136,6 +136,7 @@ enum perf_output_field {
PERF_OUTPUT_RETIRE_LAT = 1ULL << 40,
PERF_OUTPUT_DSOFF = 1ULL << 41,
PERF_OUTPUT_DISASM = 1ULL << 42,
+ PERF_OUTPUT_BRSTACKDISASM = 1ULL << 43,
};
struct perf_script {
@@ -210,6 +211,7 @@ struct output_option {
{.str = "vcpu", .field = PERF_OUTPUT_VCPU},
{.str = "cgroup", .field = PERF_OUTPUT_CGROUP},
{.str = "retire_lat", .field = PERF_OUTPUT_RETIRE_LAT},
+ {.str = "brstackdisasm", .field = PERF_OUTPUT_BRSTACKDISASM},
};
enum {
@@ -510,7 +512,8 @@ static int evsel__check_attr(struct evsel *evsel, struct perf_session *session)
"selected. Hence, no address to lookup the source line number.\n");
return -EINVAL;
}
- if ((PRINT_FIELD(BRSTACKINSN) || PRINT_FIELD(BRSTACKINSNLEN)) && !allow_user_set &&
+ if ((PRINT_FIELD(BRSTACKINSN) || PRINT_FIELD(BRSTACKINSNLEN) || PRINT_FIELD(BRSTACKDISASM))
+ && !allow_user_set &&
!(evlist__combined_branch_type(session->evlist) & PERF_SAMPLE_BRANCH_ANY)) {
pr_err("Display of branch stack assembler requested, but non all-branch filter set\n"
"Hint: run 'perf record -b ...'\n");
@@ -1162,6 +1165,31 @@ out:
return ret;
}
+static int any_dump_insn(struct perf_event_attr *attr __maybe_unused,
+ struct perf_insn *x, uint64_t ip,
+ u8 *inbuf, int inlen, int *lenp,
+ FILE *fp)
+{
+#ifdef HAVE_LIBCAPSTONE_SUPPORT
+ if (PRINT_FIELD(BRSTACKDISASM)) {
+ int printed = fprintf_insn_asm(x->machine, x->thread, x->cpumode, x->is64bit,
+ (uint8_t *)inbuf, inlen, ip, lenp,
+ PRINT_INSN_IMM_HEX, fp);
+
+ if (printed > 0)
+ return printed;
+ }
+#endif
+ return fprintf(fp, "%s", dump_insn(x, ip, inbuf, inlen, lenp));
+}
+
+static int add_padding(FILE *fp, int printed, int padding)
+{
+ if (printed >= 0 && printed < padding)
+ printed += fprintf(fp, "%*s", padding - printed, "");
+ return printed;
+}
+
static int ip__fprintf_jump(uint64_t ip, struct branch_entry *en,
struct perf_insn *x, u8 *inbuf, int len,
int insn, FILE *fp, int *total_cycles,
@@ -1169,8 +1197,10 @@ static int ip__fprintf_jump(uint64_t ip, struct branch_entry *en,
struct thread *thread)
{
int ilen = 0;
- int printed = fprintf(fp, "\t%016" PRIx64 "\t%-30s\t", ip,
- dump_insn(x, ip, inbuf, len, &ilen));
+ int printed = fprintf(fp, "\t%016" PRIx64 "\t", ip);
+
+ printed += add_padding(fp, any_dump_insn(attr, x, ip, inbuf, len, &ilen, fp), 30);
+ printed += fprintf(fp, "\t");
if (PRINT_FIELD(BRSTACKINSNLEN))
printed += fprintf(fp, "ilen: %d\t", ilen);
@@ -1262,6 +1292,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
nr = max_blocks + 1;
x.thread = thread;
+ x.machine = machine;
x.cpu = sample->cpu;
printed += fprintf(fp, "%c", '\n');
@@ -1312,8 +1343,8 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
break;
} else {
ilen = 0;
- printed += fprintf(fp, "\t%016" PRIx64 "\t%s", ip,
- dump_insn(&x, ip, buffer + off, len - off, &ilen));
+ printed += fprintf(fp, "\t%016" PRIx64 "\t", ip);
+ printed += any_dump_insn(attr, &x, ip, buffer + off, len - off, &ilen, fp);
if (PRINT_FIELD(BRSTACKINSNLEN))
printed += fprintf(fp, "\tilen: %d", ilen);
printed += fprintf(fp, "\n");
@@ -1360,8 +1391,8 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
if (len <= 0)
goto out;
ilen = 0;
- printed += fprintf(fp, "\t%016" PRIx64 "\t%s", sample->ip,
- dump_insn(&x, sample->ip, buffer, len, &ilen));
+ printed += fprintf(fp, "\t%016" PRIx64 "\t", sample->ip);
+ printed += any_dump_insn(attr, &x, sample->ip, buffer, len, &ilen, fp);
if (PRINT_FIELD(BRSTACKINSNLEN))
printed += fprintf(fp, "\tilen: %d", ilen);
printed += fprintf(fp, "\n");
@@ -1371,8 +1402,8 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
}
for (off = 0; off <= end - start; off += ilen) {
ilen = 0;
- printed += fprintf(fp, "\t%016" PRIx64 "\t%s", start + off,
- dump_insn(&x, start + off, buffer + off, len - off, &ilen));
+ printed += fprintf(fp, "\t%016" PRIx64 "\t", start + off);
+ printed += any_dump_insn(attr, &x, start + off, buffer + off, len - off, &ilen, fp);
if (PRINT_FIELD(BRSTACKINSNLEN))
printed += fprintf(fp, "\tilen: %d", ilen);
printed += fprintf(fp, "\n");
@@ -1517,7 +1548,8 @@ void script_fetch_insn(struct perf_sample *sample, struct thread *thread,
static int perf_sample__fprintf_insn(struct perf_sample *sample,
struct perf_event_attr *attr,
struct thread *thread,
- struct machine *machine, FILE *fp)
+ struct machine *machine, FILE *fp,
+ struct addr_location *al)
{
int printed = 0;
@@ -1531,9 +1563,9 @@ static int perf_sample__fprintf_insn(struct perf_sample *sample,
}
if (PRINT_FIELD(DISASM) && sample->insn_len) {
printed += fprintf(fp, "\t\t");
- printed += sample__fprintf_insn_asm(sample, thread, machine, fp);
+ printed += sample__fprintf_insn_asm(sample, thread, machine, fp, al);
}
- if (PRINT_FIELD(BRSTACKINSN) || PRINT_FIELD(BRSTACKINSNLEN))
+ if (PRINT_FIELD(BRSTACKINSN) || PRINT_FIELD(BRSTACKINSNLEN) || PRINT_FIELD(BRSTACKDISASM))
printed += perf_sample__fprintf_brstackinsn(sample, thread, attr, machine, fp);
return printed;
@@ -1606,7 +1638,7 @@ static int perf_sample__fprintf_bts(struct perf_sample *sample,
if (print_srcline_last)
printed += map__fprintf_srcline(al->map, al->addr, "\n ", fp);
- printed += perf_sample__fprintf_insn(sample, attr, thread, machine, fp);
+ printed += perf_sample__fprintf_insn(sample, attr, thread, machine, fp, al);
printed += fprintf(fp, "\n");
if (PRINT_FIELD(SRCCODE)) {
int ret = map__fprintf_srccode(al->map, al->addr, stdout,
@@ -2259,7 +2291,7 @@ static void process_event(struct perf_script *script,
if (evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT))
perf_sample__fprintf_bpf_output(sample, fp);
- perf_sample__fprintf_insn(sample, attr, thread, machine, fp);
+ perf_sample__fprintf_insn(sample, attr, thread, machine, fp, al);
if (PRINT_FIELD(PHYS_ADDR))
fprintf(fp, "%16" PRIx64, sample->phys_addr);
@@ -3471,7 +3503,7 @@ static int check_ev_match(char *dir_name, char *scriptname,
match = 0;
evlist__for_each_entry(session->evlist, pos) {
- if (!strcmp(evsel__name(pos), evname)) {
+ if (evsel__name_is(pos, evname)) {
match = 1;
break;
}
@@ -3806,7 +3838,7 @@ static int parse_insn_trace(const struct option *opt __maybe_unused,
if (ret < 0)
return ret;
- itrace_parse_synth_opts(opt, "i0ns", 0);
+ itrace_parse_synth_opts(opt, "i0nse", 0);
symbol_conf.nanosecs = true;
return 0;
}
@@ -3939,7 +3971,7 @@ int cmd_script(int argc, const char **argv)
"Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,dsoff,"
"addr,symoff,srcline,period,iregs,uregs,brstack,"
"brstacksym,flags,data_src,weight,bpf-output,brstackinsn,"
- "brstackinsnlen,brstackoff,callindent,insn,disasm,insnlen,synth,"
+ "brstackinsnlen,brstackdisasm,brstackoff,callindent,insn,disasm,insnlen,synth,"
"phys_addr,metric,misc,srccode,ipc,tod,data_page_size,"
"code_page_size,ins_lat,machine_pid,vcpu,cgroup,retire_lat",
parse_output_fields),
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 6bba1a89d03015..65a3dd7ffac30c 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -164,26 +164,6 @@ static struct perf_stat_config stat_config = {
.iostat_run = false,
};
-static bool cpus_map_matched(struct evsel *a, struct evsel *b)
-{
- if (!a->core.cpus && !b->core.cpus)
- return true;
-
- if (!a->core.cpus || !b->core.cpus)
- return false;
-
- if (perf_cpu_map__nr(a->core.cpus) != perf_cpu_map__nr(b->core.cpus))
- return false;
-
- for (int i = 0; i < perf_cpu_map__nr(a->core.cpus); i++) {
- if (perf_cpu_map__cpu(a->core.cpus, i).cpu !=
- perf_cpu_map__cpu(b->core.cpus, i).cpu)
- return false;
- }
-
- return true;
-}
-
static void evlist__check_cpu_maps(struct evlist *evlist)
{
struct evsel *evsel, *warned_leader = NULL;
@@ -194,7 +174,7 @@ static void evlist__check_cpu_maps(struct evlist *evlist)
/* Check that leader matches cpus with each member. */
if (leader == evsel)
continue;
- if (cpus_map_matched(leader, evsel))
+ if (perf_cpu_map__equal(leader->core.cpus, evsel->core.cpus))
continue;
/* If there's mismatch disable the group and warn user. */
@@ -1319,10 +1299,9 @@ static int cpu__get_cache_id_from_map(struct perf_cpu cpu, char *map)
* be the first online CPU in the cache domain else use the
* first online CPU of the cache domain as the ID.
*/
- if (perf_cpu_map__has_any_cpu_or_is_empty(cpu_map))
+ id = perf_cpu_map__min(cpu_map).cpu;
+ if (id == -1)
id = cpu.cpu;
- else
- id = perf_cpu_map__cpu(cpu_map, 0).cpu;
/* Free the perf_cpu_map used to find the cache ID */
perf_cpu_map__put(cpu_map);
@@ -1642,7 +1621,7 @@ static int perf_stat_init_aggr_mode(void)
* taking the highest cpu number to be the size of
* the aggregation translate cpumap.
*/
- if (!perf_cpu_map__has_any_cpu_or_is_empty(evsel_list->core.user_requested_cpus))
+ if (!perf_cpu_map__is_any_cpu_or_is_empty(evsel_list->core.user_requested_cpus))
nr = perf_cpu_map__max(evsel_list->core.user_requested_cpus).cpu;
else
nr = 0;
@@ -2106,6 +2085,7 @@ static int add_default_attributes(void)
stat_config.metric_no_threshold,
stat_config.user_requested_cpu_list,
stat_config.system_wide,
+ stat_config.hardware_aware_grouping,
&stat_config.metric_events);
}
@@ -2139,6 +2119,7 @@ static int add_default_attributes(void)
stat_config.metric_no_threshold,
stat_config.user_requested_cpu_list,
stat_config.system_wide,
+ stat_config.hardware_aware_grouping,
&stat_config.metric_events);
}
@@ -2173,6 +2154,7 @@ static int add_default_attributes(void)
/*metric_no_threshold=*/true,
stat_config.user_requested_cpu_list,
stat_config.system_wide,
+ stat_config.hardware_aware_grouping,
&stat_config.metric_events) < 0)
return -1;
}
@@ -2214,6 +2196,7 @@ static int add_default_attributes(void)
/*metric_no_threshold=*/true,
stat_config.user_requested_cpu_list,
stat_config.system_wide,
+ stat_config.hardware_aware_grouping,
&stat_config.metric_events) < 0)
return -1;
@@ -2334,7 +2317,7 @@ int process_stat_config_event(struct perf_session *session,
perf_event__read_stat_config(&stat_config, &event->stat_config);
- if (perf_cpu_map__has_any_cpu_or_is_empty(st->cpus)) {
+ if (perf_cpu_map__is_empty(st->cpus)) {
if (st->aggr_mode != AGGR_UNSET)
pr_warning("warning: processing task data, aggregation mode not set\n");
} else if (st->aggr_mode != AGGR_UNSET) {
@@ -2748,6 +2731,7 @@ int cmd_stat(int argc, const char **argv)
stat_config.metric_no_threshold,
stat_config.user_requested_cpu_list,
stat_config.system_wide,
+ stat_config.hardware_aware_grouping,
&stat_config.metric_events);
zfree(&metrics);
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 90eaff8c0f6e3a..e5fef39c34bf40 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -947,6 +947,15 @@ static const struct syscall_fmt syscall_fmts[] = {
.arg = { [1] = STRARRAY(op, epoll_ctl_ops), }, },
{ .name = "eventfd2",
.arg = { [1] = { .scnprintf = SCA_EFD_FLAGS, /* flags */ }, }, },
+ { .name = "faccessat",
+ .arg = { [0] = { .scnprintf = SCA_FDAT, /* dirfd */ },
+ [1] = { .scnprintf = SCA_FILENAME, /* pathname */ },
+ [2] = { .scnprintf = SCA_ACCMODE, /* mode */ }, }, },
+ { .name = "faccessat2",
+ .arg = { [0] = { .scnprintf = SCA_FDAT, /* dirfd */ },
+ [1] = { .scnprintf = SCA_FILENAME, /* pathname */ },
+ [2] = { .scnprintf = SCA_ACCMODE, /* mode */ },
+ [3] = { .scnprintf = SCA_FACCESSAT2_FLAGS, /* flags */ }, }, },
{ .name = "fchmodat",
.arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
{ .name = "fchownat",
@@ -969,7 +978,6 @@ static const struct syscall_fmt syscall_fmts[] = {
[1] = { .scnprintf = SCA_FILENAME, /* path */ },
[2] = { .scnprintf = SCA_FSPICK_FLAGS, /* flags */ }, }, },
{ .name = "fstat", .alias = "newfstat", },
- { .name = "fstatat", .alias = "newfstatat", },
{ .name = "futex",
.arg = { [1] = { .scnprintf = SCA_FUTEX_OP, /* op */ },
[5] = { .scnprintf = SCA_FUTEX_VAL3, /* val3 */ }, }, },
@@ -1049,8 +1057,12 @@ static const struct syscall_fmt syscall_fmts[] = {
.arg = { [3] = { .scnprintf = SCA_MREMAP_FLAGS, /* flags */ }, }, },
{ .name = "name_to_handle_at",
.arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
- { .name = "newfstatat",
- .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
+ { .name = "nanosleep",
+ .arg = { [0] = { .scnprintf = SCA_TIMESPEC, /* req */ }, }, },
+ { .name = "newfstatat", .alias = "fstatat",
+ .arg = { [0] = { .scnprintf = SCA_FDAT, /* dirfd */ },
+ [1] = { .scnprintf = SCA_FILENAME, /* pathname */ },
+ [3] = { .scnprintf = SCA_FS_AT_FLAGS, /* flags */ }, }, },
{ .name = "open",
.arg = { [1] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, },
{ .name = "open_by_handle_at",
@@ -1142,7 +1154,7 @@ static const struct syscall_fmt syscall_fmts[] = {
{ .name = "stat", .alias = "newstat", },
{ .name = "statx",
.arg = { [0] = { .scnprintf = SCA_FDAT, /* fdat */ },
- [2] = { .scnprintf = SCA_STATX_FLAGS, /* flags */ } ,
+ [2] = { .scnprintf = SCA_FS_AT_FLAGS, /* flags */ } ,
[3] = { .scnprintf = SCA_STATX_MASK, /* mask */ }, }, },
{ .name = "swapoff",
.arg = { [0] = { .scnprintf = SCA_FILENAME, /* specialfile */ }, }, },
@@ -1160,7 +1172,9 @@ static const struct syscall_fmt syscall_fmts[] = {
.arg = { [0] = { .scnprintf = SCA_FILENAME, /* name */ }, }, },
{ .name = "uname", .alias = "newuname", },
{ .name = "unlinkat",
- .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
+ .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ },
+ [1] = { .scnprintf = SCA_FILENAME, /* pathname */ },
+ [2] = { .scnprintf = SCA_FS_AT_FLAGS, /* flags */ }, }, },
{ .name = "utimensat",
.arg = { [0] = { .scnprintf = SCA_FDAT, /* dirfd */ }, }, },
{ .name = "wait4", .errpid = true,
@@ -4902,7 +4916,7 @@ int cmd_trace(int argc, const char **argv)
goto out;
}
trace.syscalls.events.bpf_output = evlist__last(trace.evlist);
- assert(!strcmp(evsel__name(trace.syscalls.events.bpf_output), "__augmented_syscalls__"));
+ assert(evsel__name_is(trace.syscalls.events.bpf_output, "__augmented_syscalls__"));
skip_augmentation:
#endif
err = -1;
@@ -4959,7 +4973,7 @@ skip_augmentation:
*/
if (trace.syscalls.events.bpf_output) {
evlist__for_each_entry(trace.evlist, evsel) {
- bool raw_syscalls_sys_exit = strcmp(evsel__name(evsel), "raw_syscalls:sys_exit") == 0;
+ bool raw_syscalls_sys_exit = evsel__name_is(evsel, "raw_syscalls:sys_exit");
if (raw_syscalls_sys_exit) {
trace.raw_augmented_syscalls = true;
diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h
index f2ab5bae2150be..f4375deabfa395 100644
--- a/tools/perf/builtin.h
+++ b/tools/perf/builtin.h
@@ -2,8 +2,10 @@
#ifndef BUILTIN_H
#define BUILTIN_H
+struct cmdnames;
+
void list_common_cmds_help(void);
-const char *help_unknown_cmd(const char *cmd);
+const char *help_unknown_cmd(const char *cmd, struct cmdnames *main_cmds);
int cmd_annotate(int argc, const char **argv);
int cmd_bench(int argc, const char **argv);
diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh
index 66ba33dbcef22b..672421b858ac1a 100755
--- a/tools/perf/check-headers.sh
+++ b/tools/perf/check-headers.sh
@@ -9,23 +9,15 @@ FILES=(
"include/uapi/linux/const.h"
"include/uapi/drm/drm.h"
"include/uapi/drm/i915_drm.h"
+ "include/uapi/linux/bits.h"
"include/uapi/linux/fadvise.h"
- "include/uapi/linux/fcntl.h"
- "include/uapi/linux/fs.h"
"include/uapi/linux/fscrypt.h"
"include/uapi/linux/kcmp.h"
"include/uapi/linux/kvm.h"
"include/uapi/linux/in.h"
- "include/uapi/linux/mount.h"
- "include/uapi/linux/openat2.h"
"include/uapi/linux/perf_event.h"
- "include/uapi/linux/prctl.h"
- "include/uapi/linux/sched.h"
"include/uapi/linux/seccomp.h"
"include/uapi/linux/stat.h"
- "include/uapi/linux/usbdevice_fs.h"
- "include/uapi/linux/vhost.h"
- "include/uapi/sound/asound.h"
"include/linux/bits.h"
"include/vdso/bits.h"
"include/linux/const.h"
@@ -38,9 +30,7 @@ FILES=(
"arch/x86/include/asm/cpufeatures.h"
"arch/x86/include/asm/inat_types.h"
"arch/x86/include/asm/emulate_prefix.h"
- "arch/x86/include/asm/irq_vectors.h"
"arch/x86/include/asm/msr-index.h"
- "arch/x86/include/uapi/asm/prctl.h"
"arch/x86/lib/x86-opcode-map.txt"
"arch/x86/tools/gen-insn-attr-x86.awk"
"arch/arm/include/uapi/asm/perf_regs.h"
@@ -97,7 +87,18 @@ SYNC_CHECK_FILES=(
declare -a BEAUTY_FILES
BEAUTY_FILES=(
+ "arch/x86/include/asm/irq_vectors.h"
+ "arch/x86/include/uapi/asm/prctl.h"
"include/linux/socket.h"
+ "include/uapi/linux/fcntl.h"
+ "include/uapi/linux/fs.h"
+ "include/uapi/linux/mount.h"
+ "include/uapi/linux/prctl.h"
+ "include/uapi/linux/sched.h"
+ "include/uapi/linux/stat.h"
+ "include/uapi/linux/usbdevice_fs.h"
+ "include/uapi/linux/vhost.h"
+ "include/uapi/sound/asound.h"
)
declare -a FAILURES
diff --git a/tools/perf/perf-archive.sh b/tools/perf/perf-archive.sh
index f94795794b3614..6ed7e52ab88175 100755
--- a/tools/perf/perf-archive.sh
+++ b/tools/perf/perf-archive.sh
@@ -34,7 +34,7 @@ if [ $UNPACK -eq 1 ]; then
TARGET=`find . -regex "\./perf.*\.tar\.bz2"`
TARGET_NUM=`echo -n "$TARGET" | grep -c '^'`
- if [ -z "$TARGET" -o $TARGET_NUM -gt 1 ]; then
+ if [ -z "$TARGET" ] || [ $TARGET_NUM -gt 1 ]; then
echo -e "Error: $TARGET_NUM files found for unpacking:\n$TARGET"
echo "Provide the requested file as an argument"
exit 1
diff --git a/tools/perf/perf-completion.sh b/tools/perf/perf-completion.sh
index f224d79b89e69f..69cba3c170d5ad 100644
--- a/tools/perf/perf-completion.sh
+++ b/tools/perf/perf-completion.sh
@@ -108,6 +108,8 @@ __perf__ltrim_colon_completions()
__perfcomp ()
{
+ # Expansion of spaces to array is deliberate.
+ # shellcheck disable=SC2207
COMPREPLY=( $( compgen -W "$1" -- "$2" ) )
}
@@ -127,13 +129,13 @@ __perf_prev_skip_opts ()
let i=cword-1
cmds_=$($cmd $1 --list-cmds)
- prev_skip_opts=()
+ prev_skip_opts=""
while [ $i -ge 0 ]; do
- if [[ ${words[i]} == $1 ]]; then
+ if [[ ${words[i]} == "$1" ]]; then
return
fi
for cmd_ in $cmds_; do
- if [[ ${words[i]} == $cmd_ ]]; then
+ if [[ ${words[i]} == "$cmd_" ]]; then
prev_skip_opts=${words[i]}
return
fi
@@ -164,9 +166,10 @@ __perf_main ()
$prev_skip_opts == @(record|stat|top) ]]; then
local cur1=${COMP_WORDS[COMP_CWORD]}
- local raw_evts=$($cmd list --raw-dump hw sw cache tracepoint pmu sdt)
+ local raw_evts
local arr s tmp result cpu_evts
+ raw_evts=$($cmd list --raw-dump hw sw cache tracepoint pmu sdt)
# aarch64 doesn't have /sys/bus/event_source/devices/cpu/events
if [[ `uname -m` != aarch64 ]]; then
cpu_evts=$(ls /sys/bus/event_source/devices/cpu/events)
@@ -175,10 +178,12 @@ __perf_main ()
if [[ "$cur1" == */* && ${cur1#*/} =~ ^[A-Z] ]]; then
OLD_IFS="$IFS"
IFS=" "
+ # Expansion of spaces to array is deliberate.
+ # shellcheck disable=SC2206
arr=($raw_evts)
IFS="$OLD_IFS"
- for s in ${arr[@]}
+ for s in "${arr[@]}"
do
if [[ "$s" == *cpu/* ]]; then
tmp=${s#*cpu/}
@@ -200,11 +205,13 @@ __perf_main ()
fi
elif [[ $prev == @("--pfm-events") &&
$prev_skip_opts == @(record|stat|top) ]]; then
- local evts=$($cmd list --raw-dump pfm)
+ local evts
+ evts=$($cmd list --raw-dump pfm)
__perfcomp "$evts" "$cur"
elif [[ $prev == @("-M"|"--metrics") &&
$prev_skip_opts == @(stat) ]]; then
- local metrics=$($cmd list --raw-dump metric metricgroup)
+ local metrics
+ metrics=$($cmd list --raw-dump metric metricgroup)
__perfcomp "$metrics" "$cur"
else
# List subcommands for perf commands
@@ -278,6 +285,8 @@ if [[ -n ${ZSH_VERSION-} ]]; then
let cword=CURRENT-1
emulate ksh -c __perf_main
let _ret && _default && _ret=0
+ # _ret is only assigned 0 or 1, disable inaccurate analysis.
+ # shellcheck disable=SC2152
return _ret
}
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index 921bee0a643707..bd3f80b5bb4623 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -18,6 +18,7 @@
#include <subcmd/run-command.h>
#include "util/parse-events.h"
#include <subcmd/parse-options.h>
+#include <subcmd/help.h>
#include "util/debug.h"
#include "util/event.h"
#include "util/util.h" // usage()
@@ -458,7 +459,7 @@ static int libperf_print(enum libperf_print_level level,
int main(int argc, const char **argv)
{
- int err;
+ int err, done_help = 0;
const char *cmd;
char sbuf[STRERR_BUFSIZE];
@@ -557,22 +558,32 @@ int main(int argc, const char **argv)
pthread__block_sigwinch();
while (1) {
- static int done_help;
-
run_argv(&argc, &argv);
if (errno != ENOENT)
break;
if (!done_help) {
- cmd = argv[0] = help_unknown_cmd(cmd);
+ struct cmdnames main_cmds = {};
+
+ for (unsigned int i = 0; i < ARRAY_SIZE(commands); i++) {
+ add_cmdname(&main_cmds,
+ commands[i].cmd,
+ strlen(commands[i].cmd));
+ }
+ cmd = argv[0] = help_unknown_cmd(cmd, &main_cmds);
+ clean_cmdnames(&main_cmds);
done_help = 1;
+ if (!cmd)
+ break;
} else
break;
}
- fprintf(stderr, "Failed to run command '%s': %s\n",
- cmd, str_error_r(errno, sbuf, sizeof(sbuf)));
+ if (cmd) {
+ fprintf(stderr, "Failed to run command '%s': %s\n",
+ cmd, str_error_r(errno, sbuf, sizeof(sbuf)));
+ }
out:
if (debug_fp)
fclose(debug_fp);
diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereone/cache.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/cache.json
index 7a2b7b200f1449..ac75f12e27bf5a 100644
--- a/tools/perf/pmu-events/arch/arm64/ampere/ampereone/cache.json
+++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/cache.json
@@ -9,7 +9,9 @@
"ArchStdEvent": "L1D_CACHE_REFILL_RD"
},
{
- "ArchStdEvent": "L1D_CACHE_INVAL"
+ "ArchStdEvent": "L1D_CACHE_INVAL",
+ "Errata": "Errata AC03_CPU_41",
+ "BriefDescription": "L1D cache invalidate. Impacted by errata -"
},
{
"ArchStdEvent": "L1D_TLB_REFILL_RD"
diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/cache.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/cache.json
index c50d8e930b05ee..f4bfe7083a6bad 100644
--- a/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/cache.json
+++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/cache.json
@@ -9,7 +9,9 @@
"ArchStdEvent": "L1D_CACHE_REFILL_RD"
},
{
- "ArchStdEvent": "L1D_CACHE_INVAL"
+ "ArchStdEvent": "L1D_CACHE_INVAL",
+ "Errata": "Errata AC04_CPU_1",
+ "BriefDescription": "L1D cache invalidate. Impacted by errata -"
},
{
"ArchStdEvent": "L1D_TLB_REFILL_RD"
diff --git a/tools/perf/pmu-events/arch/s390/cf_z16/transaction.json b/tools/perf/pmu-events/arch/s390/cf_z16/transaction.json
index ec2ff78e2b5f2c..3ab1d3a6638c46 100644
--- a/tools/perf/pmu-events/arch/s390/cf_z16/transaction.json
+++ b/tools/perf/pmu-events/arch/s390/cf_z16/transaction.json
@@ -2,71 +2,71 @@
{
"BriefDescription": "Transaction count",
"MetricName": "transaction",
- "MetricExpr": "TX_C_TEND + TX_NC_TEND + TX_NC_TABORT + TX_C_TABORT_SPECIAL + TX_C_TABORT_NO_SPECIAL"
+ "MetricExpr": "TX_C_TEND + TX_NC_TEND + TX_NC_TABORT + TX_C_TABORT_SPECIAL + TX_C_TABORT_NO_SPECIAL if has_event(TX_C_TEND) else 0"
},
{
"BriefDescription": "Cycles per Instruction",
"MetricName": "cpi",
- "MetricExpr": "CPU_CYCLES / INSTRUCTIONS"
+ "MetricExpr": "CPU_CYCLES / INSTRUCTIONS if has_event(INSTRUCTIONS) else 0"
},
{
"BriefDescription": "Problem State Instruction Ratio",
"MetricName": "prbstate",
- "MetricExpr": "(PROBLEM_STATE_INSTRUCTIONS / INSTRUCTIONS) * 100"
+ "MetricExpr": "(PROBLEM_STATE_INSTRUCTIONS / INSTRUCTIONS) * 100 if has_event(INSTRUCTIONS) else 0"
},
{
"BriefDescription": "Level One Miss per 100 Instructions",
"MetricName": "l1mp",
- "MetricExpr": "((L1I_DIR_WRITES + L1D_DIR_WRITES) / INSTRUCTIONS) * 100"
+ "MetricExpr": "((L1I_DIR_WRITES + L1D_DIR_WRITES) / INSTRUCTIONS) * 100 if has_event(INSTRUCTIONS) else 0"
},
{
"BriefDescription": "Percentage sourced from Level 2 cache",
"MetricName": "l2p",
- "MetricExpr": "((DCW_REQ + DCW_REQ_IV + ICW_REQ + ICW_REQ_IV) / (L1I_DIR_WRITES + L1D_DIR_WRITES)) * 100"
+ "MetricExpr": "((DCW_REQ + DCW_REQ_IV + ICW_REQ + ICW_REQ_IV) / (L1I_DIR_WRITES + L1D_DIR_WRITES)) * 100 if has_event(DCW_REQ) else 0"
},
{
"BriefDescription": "Percentage sourced from Level 3 on same chip cache",
"MetricName": "l3p",
- "MetricExpr": "((DCW_REQ_CHIP_HIT + DCW_ON_CHIP + DCW_ON_CHIP_IV + DCW_ON_CHIP_CHIP_HIT + ICW_REQ_CHIP_HIT + ICW_ON_CHIP + ICW_ON_CHIP_IV + ICW_ON_CHIP_CHIP_HIT) / (L1I_DIR_WRITES + L1D_DIR_WRITES)) * 100"
+ "MetricExpr": "((DCW_REQ_CHIP_HIT + DCW_ON_CHIP + DCW_ON_CHIP_IV + DCW_ON_CHIP_CHIP_HIT + ICW_REQ_CHIP_HIT + ICW_ON_CHIP + ICW_ON_CHIP_IV + ICW_ON_CHIP_CHIP_HIT) / (L1I_DIR_WRITES + L1D_DIR_WRITES)) * 100 if has_event(DCW_REQ_CHIP_HIT) else 0"
},
{
"BriefDescription": "Percentage sourced from Level 4 Local cache on same book",
"MetricName": "l4lp",
- "MetricExpr": "((DCW_REQ_DRAWER_HIT + DCW_ON_CHIP_DRAWER_HIT + DCW_ON_MODULE + DCW_ON_DRAWER + IDCW_ON_MODULE_IV + IDCW_ON_MODULE_CHIP_HIT + IDCW_ON_MODULE_DRAWER_HIT + IDCW_ON_DRAWER_IV + IDCW_ON_DRAWER_CHIP_HIT + IDCW_ON_DRAWER_DRAWER_HIT + ICW_REQ_DRAWER_HIT + ICW_ON_CHIP_DRAWER_HIT + ICW_ON_MODULE + ICW_ON_DRAWER) / (L1I_DIR_WRITES + L1D_DIR_WRITES)) * 100"
+ "MetricExpr": "((DCW_REQ_DRAWER_HIT + DCW_ON_CHIP_DRAWER_HIT + DCW_ON_MODULE + DCW_ON_DRAWER + IDCW_ON_MODULE_IV + IDCW_ON_MODULE_CHIP_HIT + IDCW_ON_MODULE_DRAWER_HIT + IDCW_ON_DRAWER_IV + IDCW_ON_DRAWER_CHIP_HIT + IDCW_ON_DRAWER_DRAWER_HIT + ICW_REQ_DRAWER_HIT + ICW_ON_CHIP_DRAWER_HIT + ICW_ON_MODULE + ICW_ON_DRAWER) / (L1I_DIR_WRITES + L1D_DIR_WRITES)) * 100 if has_event(DCW_REQ_DRAWER_HIT) else 0"
},
{
"BriefDescription": "Percentage sourced from Level 4 Remote cache on different book",
"MetricName": "l4rp",
- "MetricExpr": "((DCW_OFF_DRAWER + IDCW_OFF_DRAWER_IV + IDCW_OFF_DRAWER_CHIP_HIT + IDCW_OFF_DRAWER_DRAWER_HIT + ICW_OFF_DRAWER) / (L1I_DIR_WRITES + L1D_DIR_WRITES)) * 100"
+ "MetricExpr": "((DCW_OFF_DRAWER + IDCW_OFF_DRAWER_IV + IDCW_OFF_DRAWER_CHIP_HIT + IDCW_OFF_DRAWER_DRAWER_HIT + ICW_OFF_DRAWER) / (L1I_DIR_WRITES + L1D_DIR_WRITES)) * 100 if has_event(DCW_OFF_DRAWER) else 0"
},
{
"BriefDescription": "Percentage sourced from memory",
"MetricName": "memp",
- "MetricExpr": "((DCW_ON_CHIP_MEMORY + DCW_ON_MODULE_MEMORY + DCW_ON_DRAWER_MEMORY + DCW_OFF_DRAWER_MEMORY + ICW_ON_CHIP_MEMORY + ICW_ON_MODULE_MEMORY + ICW_ON_DRAWER_MEMORY + ICW_OFF_DRAWER_MEMORY) / (L1I_DIR_WRITES + L1D_DIR_WRITES)) * 100"
+ "MetricExpr": "((DCW_ON_CHIP_MEMORY + DCW_ON_MODULE_MEMORY + DCW_ON_DRAWER_MEMORY + DCW_OFF_DRAWER_MEMORY + ICW_ON_CHIP_MEMORY + ICW_ON_MODULE_MEMORY + ICW_ON_DRAWER_MEMORY + ICW_OFF_DRAWER_MEMORY) / (L1I_DIR_WRITES + L1D_DIR_WRITES)) * 100 if has_event(DCW_ON_CHIP_MEMORY) else 0"
},
{
"BriefDescription": "Cycles per Instructions from Finite cache/memory",
"MetricName": "finite_cpi",
- "MetricExpr": "L1C_TLB2_MISSES / INSTRUCTIONS"
+ "MetricExpr": "L1C_TLB2_MISSES / INSTRUCTIONS if has_event(L1C_TLB2_MISSES) else 0"
},
{
"BriefDescription": "Estimated Instruction Complexity CPI infinite Level 1",
"MetricName": "est_cpi",
- "MetricExpr": "(CPU_CYCLES / INSTRUCTIONS) - (L1C_TLB2_MISSES / INSTRUCTIONS)"
+ "MetricExpr": "(CPU_CYCLES / INSTRUCTIONS) - (L1C_TLB2_MISSES / INSTRUCTIONS) if has_event(INSTRUCTIONS) else 0"
},
{
"BriefDescription": "Estimated Sourcing Cycles per Level 1 Miss",
"MetricName": "scpl1m",
- "MetricExpr": "L1C_TLB2_MISSES / (L1I_DIR_WRITES + L1D_DIR_WRITES)"
+ "MetricExpr": "L1C_TLB2_MISSES / (L1I_DIR_WRITES + L1D_DIR_WRITES) if has_event(L1C_TLB2_MISSES) else 0"
},
{
"BriefDescription": "Estimated TLB CPU percentage of Total CPU",
"MetricName": "tlb_percent",
- "MetricExpr": "((DTLB2_MISSES + ITLB2_MISSES) / CPU_CYCLES) * (L1C_TLB2_MISSES / (L1I_PENALTY_CYCLES + L1D_PENALTY_CYCLES)) * 100"
+ "MetricExpr": "((DTLB2_MISSES + ITLB2_MISSES) / CPU_CYCLES) * (L1C_TLB2_MISSES / (L1I_PENALTY_CYCLES + L1D_PENALTY_CYCLES)) * 100 if has_event(CPU_CYCLES) else 0"
},
{
"BriefDescription": "Estimated Cycles per TLB Miss",
"MetricName": "tlb_miss",
- "MetricExpr": "((DTLB2_MISSES + ITLB2_MISSES) / (DTLB2_WRITES + ITLB2_WRITES)) * (L1C_TLB2_MISSES / (L1I_PENALTY_CYCLES + L1D_PENALTY_CYCLES))"
+ "MetricExpr": "((DTLB2_MISSES + ITLB2_MISSES) / (DTLB2_WRITES + ITLB2_WRITES)) * (L1C_TLB2_MISSES / (L1I_PENALTY_CYCLES + L1D_PENALTY_CYCLES)) if has_event(DTLB2_MISSES) else 0"
}
]
diff --git a/tools/perf/pmu-events/arch/s390/mapfile.csv b/tools/perf/pmu-events/arch/s390/mapfile.csv
index a918e1af77a573..b22648d127517a 100644
--- a/tools/perf/pmu-events/arch/s390/mapfile.csv
+++ b/tools/perf/pmu-events/arch/s390/mapfile.csv
@@ -5,4 +5,4 @@ Family-model,Version,Filename,EventType
^IBM.296[45].*[13]\.[1-5].[[:xdigit:]]+$,1,cf_z13,core
^IBM.390[67].*[13]\.[1-5].[[:xdigit:]]+$,3,cf_z14,core
^IBM.856[12].*3\.6.[[:xdigit:]]+$,3,cf_z15,core
-^IBM.393[12].*3\.7.[[:xdigit:]]+$,3,cf_z16,core
+^IBM.393[12].*$,3,cf_z16,core
diff --git a/tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json b/tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json
index f2d378c9d68f7e..0aed533da88241 100644
--- a/tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json
@@ -732,9 +732,8 @@
{
"BriefDescription": "Average Parallel L2 cache miss data reads",
"MetricExpr": "tma_info_memory_latency_data_l2_mlp",
- "MetricGroup": "Memory_BW;Offcore;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_data_l2_mlp",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Memory_BW;Offcore",
+ "MetricName": "tma_info_memory_data_l2_mlp"
},
{
"BriefDescription": "",
@@ -745,9 +744,8 @@
{
"BriefDescription": "Average per-core data fill bandwidth to the L1 data cache [GB / sec]",
"MetricExpr": "64 * L1D.REPLACEMENT / 1e9 / (duration_time * 1e3 / 1e3)",
- "MetricGroup": "Mem;MemoryBW;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_l1d_cache_fill_bw_2t",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_l1d_cache_fill_bw_2t"
},
{
"BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
@@ -764,9 +762,8 @@
{
"BriefDescription": "Average per-core data fill bandwidth to the L2 cache [GB / sec]",
"MetricExpr": "64 * L2_LINES_IN.ALL / 1e9 / (duration_time * 1e3 / 1e3)",
- "MetricGroup": "Mem;MemoryBW;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_l2_cache_fill_bw_2t",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_l2_cache_fill_bw_2t"
},
{
"BriefDescription": "L2 cache hits per kilo instruction for all request types (including speculative)",
@@ -807,9 +804,8 @@
{
"BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
"MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / (duration_time * 1e3 / 1e3)",
- "MetricGroup": "Mem;MemoryBW;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_l3_cache_fill_bw_2t",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_l3_cache_fill_bw_2t"
},
{
"BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
@@ -838,16 +834,14 @@
{
"BriefDescription": "Average Latency for L2 cache miss demand Loads",
"MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS.DEMAND_DATA_RD",
- "MetricGroup": "Memory_Lat;Offcore;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_load_l2_miss_latency",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Memory_Lat;Offcore",
+ "MetricName": "tma_info_memory_load_l2_miss_latency"
},
{
"BriefDescription": "Average Parallel L2 cache miss demand Loads",
"MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
- "MetricGroup": "Memory_BW;Offcore;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_load_l2_mlp",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Memory_BW;Offcore",
+ "MetricName": "tma_info_memory_load_l2_mlp"
},
{
"BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
@@ -867,9 +861,8 @@
{
"BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
"MetricExpr": "tma_info_memory_tlb_page_walks_utilization",
- "MetricGroup": "Mem;MemoryTLB;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_page_walks_utilization",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Mem;MemoryTLB",
+ "MetricName": "tma_info_memory_page_walks_utilization"
},
{
"BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json
index 7f88b156f73b6f..297046818efe26 100644
--- a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json
@@ -670,23 +670,20 @@
{
"BriefDescription": "Probability of Core Bound bottleneck hidden by SMT-profiling artifacts",
"MetricExpr": "(100 * (1 - tma_core_bound / (((EXE_ACTIVITY.EXE_BOUND_0_PORTS + tma_core_bound * RS_EVENTS.EMPTY_CYCLES) / CPU_CLK_UNHALTED.THREAD * (CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY) / CPU_CLK_UNHALTED.THREAD * CPU_CLK_UNHALTED.THREAD + (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL)) / CPU_CLK_UNHALTED.THREAD if ARITH.DIVIDER_ACTIVE < CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY else (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL) / CPU_CLK_UNHALTED.THREAD) if tma_core_bound < (((EXE_ACTIVITY.EXE_BOUND_0_PORTS + tma_core_bound * RS_EVENTS.EMPTY_CYCLES) / CPU_CLK_UNHALTED.THREAD * (CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY) / CPU_CLK_UNHALTED.THREAD * CPU_CLK_UNHALTED.THREAD + (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL)) / CPU_CLK_UNHALTED.THREAD if ARITH.DIVIDER_ACTIVE < CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY else (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL) / CPU_CLK_UNHALTED.THREAD) else 1) if tma_info_system_smt_2t_utilization > 0.5 else 0)",
- "MetricGroup": "Cor;SMT;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_botlnk_core_bound_likely",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Cor;SMT",
+ "MetricName": "tma_info_botlnk_core_bound_likely"
},
{
"BriefDescription": "Total pipeline cost of DSB (uop cache) misses - subset of the Instruction_Fetch_BW Bottleneck.",
"MetricExpr": "100 * (100 * (tma_fetch_latency * (DSB2MITE_SWITCHES.PENALTY_CYCLES / CPU_CLK_UNHALTED.THREAD) / ((ICACHE_16B.IFDATA_STALL + 2 * cpu@ICACHE_16B.IFDATA_STALL\\,cmask\\=0x1\\,edge\\=0x1@) / CPU_CLK_UNHALTED.THREAD + ICACHE_TAG.STALLS / CPU_CLK_UNHALTED.THREAD + (INT_MISC.CLEAR_RESTEER_CYCLES / CPU_CLK_UNHALTED.THREAD + 9 * BACLEARS.ANY / CPU_CLK_UNHALTED.THREAD) + min(2 * IDQ.MS_SWITCHES / CPU_CLK_UNHALTED.THREAD, 1) + DECODE.LCP / CPU_CLK_UNHALTED.THREAD + DSB2MITE_SWITCHES.PENALTY_CYCLES / CPU_CLK_UNHALTED.THREAD) + tma_fetch_bandwidth * tma_mite / (tma_mite + tma_dsb)))",
- "MetricGroup": "DSBmiss;Fed;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_botlnk_dsb_misses",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "DSBmiss;Fed",
+ "MetricName": "tma_info_botlnk_dsb_misses"
},
{
"BriefDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck.",
"MetricExpr": "100 * (100 * (tma_fetch_latency * ((ICACHE_16B.IFDATA_STALL + 2 * cpu@ICACHE_16B.IFDATA_STALL\\,cmask\\=0x1\\,edge\\=0x1@) / CPU_CLK_UNHALTED.THREAD) / ((ICACHE_16B.IFDATA_STALL + 2 * cpu@ICACHE_16B.IFDATA_STALL\\,cmask\\=0x1\\,edge\\=0x1@) / CPU_CLK_UNHALTED.THREAD + ICACHE_TAG.STALLS / CPU_CLK_UNHALTED.THREAD + (INT_MISC.CLEAR_RESTEER_CYCLES / CPU_CLK_UNHALTED.THREAD + 9 * BACLEARS.ANY / CPU_CLK_UNHALTED.THREAD) + min(2 * IDQ.MS_SWITCHES / CPU_CLK_UNHALTED.THREAD, 1) + DECODE.LCP / CPU_CLK_UNHALTED.THREAD + DSB2MITE_SWITCHES.PENALTY_CYCLES / CPU_CLK_UNHALTED.THREAD)))",
- "MetricGroup": "Fed;FetchLat;IcMiss;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_botlnk_ic_misses",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Fed;FetchLat;IcMiss",
+ "MetricName": "tma_info_botlnk_ic_misses"
},
{
"BriefDescription": "Probability of Core Bound bottleneck hidden by SMT-profiling artifacts",
@@ -1045,9 +1042,8 @@
{
"BriefDescription": "STLB (2nd level TLB) code speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
"MetricExpr": "tma_info_memory_tlb_code_stlb_mpki",
- "MetricGroup": "Fed;MemoryTLB;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_code_stlb_mpki",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Fed;MemoryTLB",
+ "MetricName": "tma_info_memory_code_stlb_mpki"
},
{
"BriefDescription": "Average per-core data fill bandwidth to the L1 data cache [GB / sec]",
@@ -1088,9 +1084,8 @@
{
"BriefDescription": "Average Parallel L2 cache miss data reads",
"MetricExpr": "tma_info_memory_latency_data_l2_mlp",
- "MetricGroup": "Memory_BW;Offcore;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_data_l2_mlp",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Memory_BW;Offcore",
+ "MetricName": "tma_info_memory_data_l2_mlp"
},
{
"BriefDescription": "Fill Buffer (FB) hits per kilo instructions for retired demand loads (L1D misses that merge into ongoing miss-handling entries)",
@@ -1107,9 +1102,8 @@
{
"BriefDescription": "Average per-core data fill bandwidth to the L1 data cache [GB / sec]",
"MetricExpr": "64 * L1D.REPLACEMENT / 1e9 / (duration_time * 1e3 / 1e3)",
- "MetricGroup": "Mem;MemoryBW;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_l1d_cache_fill_bw_2t",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_l1d_cache_fill_bw_2t"
},
{
"BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
@@ -1132,23 +1126,20 @@
{
"BriefDescription": "Average per-core data fill bandwidth to the L2 cache [GB / sec]",
"MetricExpr": "64 * L2_LINES_IN.ALL / 1e9 / (duration_time * 1e3 / 1e3)",
- "MetricGroup": "Mem;MemoryBW;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_l2_cache_fill_bw_2t",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_l2_cache_fill_bw_2t"
},
{
"BriefDescription": "Rate of non silent evictions from the L2 cache per Kilo instruction",
"MetricExpr": "1e3 * L2_LINES_OUT.NON_SILENT / INST_RETIRED.ANY",
- "MetricGroup": "L2Evicts;Mem;Server;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_l2_evictions_nonsilent_pki",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "L2Evicts;Mem;Server",
+ "MetricName": "tma_info_memory_l2_evictions_nonsilent_pki"
},
{
"BriefDescription": "Rate of silent evictions from the L2 cache per Kilo instruction where the evicted lines are dropped (no writeback to L3 or memory)",
"MetricExpr": "1e3 * L2_LINES_OUT.SILENT / INST_RETIRED.ANY",
- "MetricGroup": "L2Evicts;Mem;Server;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_l2_evictions_silent_pki",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "L2Evicts;Mem;Server",
+ "MetricName": "tma_info_memory_l2_evictions_silent_pki"
},
{
"BriefDescription": "L2 cache hits per kilo instruction for all request types (including speculative)",
@@ -1189,9 +1180,8 @@
{
"BriefDescription": "Average per-core data access bandwidth to the L3 cache [GB / sec]",
"MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1e9 / (duration_time * 1e3 / 1e3)",
- "MetricGroup": "Mem;MemoryBW;Offcore;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_l3_cache_access_bw_2t",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Mem;MemoryBW;Offcore",
+ "MetricName": "tma_info_memory_l3_cache_access_bw_2t"
},
{
"BriefDescription": "",
@@ -1202,9 +1192,8 @@
{
"BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
"MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / (duration_time * 1e3 / 1e3)",
- "MetricGroup": "Mem;MemoryBW;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_l3_cache_fill_bw_2t",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_l3_cache_fill_bw_2t"
},
{
"BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
@@ -1233,16 +1222,14 @@
{
"BriefDescription": "Average Latency for L2 cache miss demand Loads",
"MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS.DEMAND_DATA_RD",
- "MetricGroup": "Memory_Lat;Offcore;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_load_l2_miss_latency",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Memory_Lat;Offcore",
+ "MetricName": "tma_info_memory_load_l2_miss_latency"
},
{
"BriefDescription": "Average Parallel L2 cache miss demand Loads",
"MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
- "MetricGroup": "Memory_BW;Offcore;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_load_l2_mlp",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Memory_BW;Offcore",
+ "MetricName": "tma_info_memory_load_l2_mlp"
},
{
"BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
@@ -1253,9 +1240,8 @@
{
"BriefDescription": "STLB (2nd level TLB) data load speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
"MetricExpr": "tma_info_memory_tlb_load_stlb_mpki",
- "MetricGroup": "Mem;MemoryTLB;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_load_stlb_mpki",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Mem;MemoryTLB",
+ "MetricName": "tma_info_memory_load_stlb_mpki"
},
{
"BriefDescription": "Un-cacheable retired load per kilo instruction",
@@ -1273,16 +1259,14 @@
{
"BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
"MetricExpr": "tma_info_memory_tlb_page_walks_utilization",
- "MetricGroup": "Mem;MemoryTLB;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_page_walks_utilization",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Mem;MemoryTLB",
+ "MetricName": "tma_info_memory_page_walks_utilization"
},
{
"BriefDescription": "STLB (2nd level TLB) data store speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
"MetricExpr": "tma_info_memory_tlb_store_stlb_mpki",
- "MetricGroup": "Mem;MemoryTLB;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_store_stlb_mpki",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Mem;MemoryTLB",
+ "MetricName": "tma_info_memory_store_stlb_mpki"
},
{
"BriefDescription": "STLB (2nd level TLB) code speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
@@ -1313,9 +1297,8 @@
{
"BriefDescription": "Un-cacheable retired load per kilo instruction",
"MetricExpr": "1e3 * MEM_LOAD_MISC_RETIRED.UC / INST_RETIRED.ANY",
- "MetricGroup": "Mem;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_uc_load_pki",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Mem",
+ "MetricName": "tma_info_memory_uc_load_pki"
},
{
"BriefDescription": "",
diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/frontend.json b/tools/perf/pmu-events/arch/x86/cascadelakex/frontend.json
index 095904c7700198..d6f543471b2436 100644
--- a/tools/perf/pmu-events/arch/x86/cascadelakex/frontend.json
+++ b/tools/perf/pmu-events/arch/x86/cascadelakex/frontend.json
@@ -19,7 +19,7 @@
"BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switches",
"EventCode": "0xAB",
"EventName": "DSB2MITE_SWITCHES.COUNT",
- "PublicDescription": "This event counts the number of the Decode Stream Buffer (DSB)-to-MITE switches including all misses because of missing Decode Stream Buffer (DSB) cache and u-arch forced misses.\nNote: Invoking MITE requires two or three cycles delay.",
+ "PublicDescription": "This event counts the number of the Decode Stream Buffer (DSB)-to-MITE switches including all misses because of missing Decode Stream Buffer (DSB) cache and u-arch forced misses. Note: Invoking MITE requires two or three cycles delay.",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -267,11 +267,11 @@
"UMask": "0x4"
},
{
- "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops [This event is alias to IDQ.DSB_CYCLES_OK]",
+ "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 or more Uops [This event is alias to IDQ.DSB_CYCLES_OK]",
"CounterMask": "4",
"EventCode": "0x79",
"EventName": "IDQ.ALL_DSB_CYCLES_4_UOPS",
- "PublicDescription": "Counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ. [This event is alias to IDQ.DSB_CYCLES_OK]",
+ "PublicDescription": "Counts the number of cycles 4 or more uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ. [This event is alias to IDQ.DSB_CYCLES_OK]",
"SampleAfterValue": "2000003",
"UMask": "0x18"
},
@@ -321,11 +321,11 @@
"UMask": "0x18"
},
{
- "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops [This event is alias to IDQ.ALL_DSB_CYCLES_4_UOPS]",
+ "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 or more Uops [This event is alias to IDQ.ALL_DSB_CYCLES_4_UOPS]",
"CounterMask": "4",
"EventCode": "0x79",
"EventName": "IDQ.DSB_CYCLES_OK",
- "PublicDescription": "Counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ. [This event is alias to IDQ.ALL_DSB_CYCLES_4_UOPS]",
+ "PublicDescription": "Counts the number of cycles 4 or more uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ. [This event is alias to IDQ.ALL_DSB_CYCLES_4_UOPS]",
"SampleAfterValue": "2000003",
"UMask": "0x18"
},
diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/memory.json b/tools/perf/pmu-events/arch/x86/cascadelakex/memory.json
index a00ad0aaf1bad7..c69b2c33334b5e 100644
--- a/tools/perf/pmu-events/arch/x86/cascadelakex/memory.json
+++ b/tools/perf/pmu-events/arch/x86/cascadelakex/memory.json
@@ -6866,7 +6866,7 @@
"BriefDescription": "Number of times an RTM execution aborted due to any reasons (multiple categories may count as one).",
"EventCode": "0xC9",
"EventName": "RTM_RETIRED.ABORTED",
- "PEBS": "1",
+ "PEBS": "2",
"PublicDescription": "Number of times RTM abort was triggered.",
"SampleAfterValue": "2000003",
"UMask": "0x4"
diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/other.json b/tools/perf/pmu-events/arch/x86/cascadelakex/other.json
index 3ab5e91a4c1c72..95d42ac3671787 100644
--- a/tools/perf/pmu-events/arch/x86/cascadelakex/other.json
+++ b/tools/perf/pmu-events/arch/x86/cascadelakex/other.json
@@ -19,7 +19,7 @@
"BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the AVX512 turbo schedule.",
"EventCode": "0x28",
"EventName": "CORE_POWER.LVL2_TURBO_LICENSE",
- "PublicDescription": "Core cycles where the core was running with power-delivery for license level 2 (introduced in Skylake Server michroarchtecture). This includes high current AVX 512-bit instructions.",
+ "PublicDescription": "Core cycles where the core was running with power-delivery for license level 2 (introduced in Skylake Server microarchitecture). This includes high current AVX 512-bit instructions.",
"SampleAfterValue": "200003",
"UMask": "0x20"
},
diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/pipeline.json b/tools/perf/pmu-events/arch/x86/cascadelakex/pipeline.json
index 66d686cc933e35..c50ddf5b40dd09 100644
--- a/tools/perf/pmu-events/arch/x86/cascadelakex/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/cascadelakex/pipeline.json
@@ -396,7 +396,7 @@
"Errata": "SKL091, SKL044",
"EventCode": "0xC0",
"EventName": "INST_RETIRED.NOP",
- "PEBS": "1",
+ "PEBS": "2",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-interconnect.json b/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-interconnect.json
index 1a342dff1503ac..3fe9ce483bbef3 100644
--- a/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-interconnect.json
+++ b/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-interconnect.json
@@ -38,7 +38,7 @@
"EventCode": "0x10",
"EventName": "UNC_I_COHERENT_OPS.CLFLUSH",
"PerPkg": "1",
- "PublicDescription": "Counts the number of coherency related operations servied by the IRP",
+ "PublicDescription": "Counts the number of coherency related operations serviced by the IRP",
"UMask": "0x80",
"Unit": "IRP"
},
@@ -47,7 +47,7 @@
"EventCode": "0x10",
"EventName": "UNC_I_COHERENT_OPS.CRD",
"PerPkg": "1",
- "PublicDescription": "Counts the number of coherency related operations servied by the IRP",
+ "PublicDescription": "Counts the number of coherency related operations serviced by the IRP",
"UMask": "0x2",
"Unit": "IRP"
},
@@ -56,7 +56,7 @@
"EventCode": "0x10",
"EventName": "UNC_I_COHERENT_OPS.DRD",
"PerPkg": "1",
- "PublicDescription": "Counts the number of coherency related operations servied by the IRP",
+ "PublicDescription": "Counts the number of coherency related operations serviced by the IRP",
"UMask": "0x4",
"Unit": "IRP"
},
@@ -65,7 +65,7 @@
"EventCode": "0x10",
"EventName": "UNC_I_COHERENT_OPS.PCIDCAHINT",
"PerPkg": "1",
- "PublicDescription": "Counts the number of coherency related operations servied by the IRP",
+ "PublicDescription": "Counts the number of coherency related operations serviced by the IRP",
"UMask": "0x20",
"Unit": "IRP"
},
@@ -74,7 +74,7 @@
"EventCode": "0x10",
"EventName": "UNC_I_COHERENT_OPS.PCIRDCUR",
"PerPkg": "1",
- "PublicDescription": "Counts the number of coherency related operations servied by the IRP",
+ "PublicDescription": "Counts the number of coherency related operations serviced by the IRP",
"UMask": "0x1",
"Unit": "IRP"
},
@@ -101,7 +101,7 @@
"EventCode": "0x10",
"EventName": "UNC_I_COHERENT_OPS.WBMTOI",
"PerPkg": "1",
- "PublicDescription": "Counts the number of coherency related operations servied by the IRP",
+ "PublicDescription": "Counts the number of coherency related operations serviced by the IRP",
"UMask": "0x40",
"Unit": "IRP"
},
@@ -500,7 +500,7 @@
"EventCode": "0x11",
"EventName": "UNC_I_TRANSACTIONS.WRITES",
"PerPkg": "1",
- "PublicDescription": "Counts the number of Inbound transactions from the IRP to the Uncore. This can be filtered based on request type in addition to the source queue. Note the special filtering equation. We do OR-reduction on the request type. If the SOURCE bit is set, then we also do AND qualification based on the source portID.; Trackes only write requests. Each write request should have a prefetch, so there is no need to explicitly track these requests. For writes that are tickled and have to retry, the counter will be incremented for each retry.",
+ "PublicDescription": "Counts the number of Inbound transactions from the IRP to the Uncore. This can be filtered based on request type in addition to the source queue. Note the special filtering equation. We do OR-reduction on the request type. If the SOURCE bit is set, then we also do AND qualification based on the source portID.; Tracks only write requests. Each write request should have a prefetch, so there is no need to explicitly track these requests. For writes that are tickled and have to retry, the counter will be incremented for each retry.",
"UMask": "0x2",
"Unit": "IRP"
},
diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/virtual-memory.json b/tools/perf/pmu-events/arch/x86/cascadelakex/virtual-memory.json
index f59405877ae8bc..73feadaf767406 100644
--- a/tools/perf/pmu-events/arch/x86/cascadelakex/virtual-memory.json
+++ b/tools/perf/pmu-events/arch/x86/cascadelakex/virtual-memory.json
@@ -205,7 +205,7 @@
"BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for an instruction fetch request. EPT page walk duration are excluded in Skylake.",
"EventCode": "0x85",
"EventName": "ITLB_MISSES.WALK_PENDING",
- "PublicDescription": "Counts 1 per cycle for each PMH (Page Miss Handler) that is busy with a page walk for an instruction fetch request. EPT page walk duration are excluded in Skylake michroarchitecture.",
+ "PublicDescription": "Counts 1 per cycle for each PMH (Page Miss Handler) that is busy with a page walk for an instruction fetch request. EPT page walk duration are excluded in Skylake microarchitecture.",
"SampleAfterValue": "100003",
"UMask": "0x10"
},
diff --git a/tools/perf/pmu-events/arch/x86/emeraldrapids/frontend.json b/tools/perf/pmu-events/arch/x86/emeraldrapids/frontend.json
index 9e53da55d0c118..93d99318a62390 100644
--- a/tools/perf/pmu-events/arch/x86/emeraldrapids/frontend.json
+++ b/tools/perf/pmu-events/arch/x86/emeraldrapids/frontend.json
@@ -267,7 +267,7 @@
"CounterMask": "6",
"EventCode": "0x79",
"EventName": "IDQ.DSB_CYCLES_OK",
- "PublicDescription": "Counts the number of cycles where optimal number of uops was delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
+ "PublicDescription": "Counts the number of cycles where optimal number of uops was delivered to the Instruction Decode Queue (IDQ) from the DSB (Decode Stream Buffer) path. Count includes uops that may 'bypass' the IDQ.",
"SampleAfterValue": "2000003",
"UMask": "0x8"
},
diff --git a/tools/perf/pmu-events/arch/x86/emeraldrapids/memory.json b/tools/perf/pmu-events/arch/x86/emeraldrapids/memory.json
index e8bf7c9c44e1a3..5420f529f49185 100644
--- a/tools/perf/pmu-events/arch/x86/emeraldrapids/memory.json
+++ b/tools/perf/pmu-events/arch/x86/emeraldrapids/memory.json
@@ -264,6 +264,7 @@
"BriefDescription": "Number of times an RTM execution aborted.",
"EventCode": "0xc9",
"EventName": "RTM_RETIRED.ABORTED",
+ "PEBS": "1",
"PublicDescription": "Counts the number of times RTM abort was triggered.",
"SampleAfterValue": "100003",
"UMask": "0x4"
diff --git a/tools/perf/pmu-events/arch/x86/emeraldrapids/pipeline.json b/tools/perf/pmu-events/arch/x86/emeraldrapids/pipeline.json
index 1f8200fb896476..e2086bedeca809 100644
--- a/tools/perf/pmu-events/arch/x86/emeraldrapids/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/emeraldrapids/pipeline.json
@@ -428,6 +428,7 @@
"BriefDescription": "INST_RETIRED.MACRO_FUSED",
"EventCode": "0xc0",
"EventName": "INST_RETIRED.MACRO_FUSED",
+ "PEBS": "1",
"SampleAfterValue": "2000003",
"UMask": "0x10"
},
@@ -435,6 +436,7 @@
"BriefDescription": "Retired NOP instructions.",
"EventCode": "0xc0",
"EventName": "INST_RETIRED.NOP",
+ "PEBS": "1",
"PublicDescription": "Counts all retired NOP or ENDBR32/64 instructions",
"SampleAfterValue": "2000003",
"UMask": "0x2"
@@ -451,6 +453,7 @@
"BriefDescription": "Iterations of Repeat string retired instructions.",
"EventCode": "0xc0",
"EventName": "INST_RETIRED.REP_ITERATION",
+ "PEBS": "1",
"PublicDescription": "Number of iterations of Repeat (REP) string retired instructions such as MOVS, CMPS, and SCAS. Each has a byte, word, and doubleword version and string instructions can be repeated using a repetition prefix, REP, that allows their architectural execution to be repeated a number of times as specified by the RCX register. Note the number of iterations is implementation-dependent.",
"SampleAfterValue": "2000003",
"UMask": "0x8"
diff --git a/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-cache.json b/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-cache.json
index 86a8f3b7fe1d6a..141dab46682e36 100644
--- a/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-cache.json
+++ b/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-cache.json
@@ -4198,6 +4198,42 @@
"Unit": "CHA"
},
{
+ "BriefDescription": "ItoMCacheNear (partial write) transactions from an IO device that addresses memory on the local socket",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IO_ITOMCACHENEAR_LOCAL",
+ "PerPkg": "1",
+ "PublicDescription": "TOR Inserts : ItoMCacheNears, indicating a partial write request, from IO Devices that missed the LLC : Counts the number of entries successfully inserted into the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.",
+ "UMask": "0xcd42ff04",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "ItoMCacheNear (partial write) transactions from an IO device that addresses memory on a remote socket",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IO_ITOMCACHENEAR_REMOTE",
+ "PerPkg": "1",
+ "PublicDescription": "TOR Inserts : ItoMCacheNears, indicating a partial write request, from IO Devices that missed the LLC : Counts the number of entries successfully inserted into the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.",
+ "UMask": "0xcd437f04",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "ItoM (write) transactions from an IO device that addresses memory on the local socket",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IO_ITOM_LOCAL",
+ "PerPkg": "1",
+ "PublicDescription": "Counts the number of entries successfully inserted into the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.",
+ "UMask": "0xcc42ff04",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "ItoM (write) transactions from an IO device that addresses memory on a remote socket",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IO_ITOM_REMOTE",
+ "PerPkg": "1",
+ "PublicDescription": "Counts the number of entries successfully inserted into the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.",
+ "UMask": "0xcc437f04",
+ "Unit": "CHA"
+ },
+ {
"BriefDescription": "TOR Inserts; Misses from local IO",
"EventCode": "0x35",
"EventName": "UNC_CHA_TOR_INSERTS.IO_MISS",
@@ -4207,7 +4243,7 @@
"Unit": "CHA"
},
{
- "BriefDescription": "TOR Inserts; ItoM misses from local IO",
+ "BriefDescription": "TOR Inserts : ItoM, indicating a full cacheline write request, from IO Devices that missed the LLC",
"EventCode": "0x35",
"EventName": "UNC_CHA_TOR_INSERTS.IO_MISS_ITOM",
"PerPkg": "1",
@@ -4225,7 +4261,7 @@
"Unit": "CHA"
},
{
- "BriefDescription": "TOR Inserts; RdCur and FsRdCur misses from local IO",
+ "BriefDescription": "TOR Inserts; RdCur and FsRdCur requests from local IO that miss LLC",
"EventCode": "0x35",
"EventName": "UNC_CHA_TOR_INSERTS.IO_MISS_PCIRDCUR",
"PerPkg": "1",
@@ -4252,6 +4288,24 @@
"Unit": "CHA"
},
{
+ "BriefDescription": "PCIRDCUR (read) transactions from an IO device that addresses memory on a remote socket",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IO_PCIRDCUR_LOCAL",
+ "PerPkg": "1",
+ "PublicDescription": "Counts the number of entries successfully inserted into the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.",
+ "UMask": "0xc8f2ff04",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "PCIRDCUR (read) transactions from an IO device that addresses memory on the local socket",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IO_PCIRDCUR_REMOTE",
+ "PerPkg": "1",
+ "PublicDescription": "Counts the number of entries successfully inserted into the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.",
+ "UMask": "0xc8f37f04",
+ "Unit": "CHA"
+ },
+ {
"BriefDescription": "TOR Inserts; RFO from local IO",
"EventCode": "0x35",
"EventName": "UNC_CHA_TOR_INSERTS.IO_RFO",
@@ -5714,6 +5768,42 @@
"Unit": "CHA"
},
{
+ "BriefDescription": "TOR Occupancy : ItoMCacheNears, indicating a partial write request, from IO Devices that missed the LLC and targets local memory",
+ "EventCode": "0x36",
+ "EventName": "UNC_CHA_TOR_OCCUPANCY.IO_MISS_ITOMCACHENEAR_LOCAL",
+ "PerPkg": "1",
+ "PublicDescription": "For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.",
+ "UMask": "0xcd42fe04",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Occupancy : ItoMCacheNears, indicating a partial write request, from IO Devices that missed the LLC and targets remote memory",
+ "EventCode": "0x36",
+ "EventName": "UNC_CHA_TOR_OCCUPANCY.IO_MISS_ITOMCACHENEAR_REMOTE",
+ "PerPkg": "1",
+ "PublicDescription": "For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.",
+ "UMask": "0xcd437e04",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Occupancy; ITOM misses from local IO and targets local memory",
+ "EventCode": "0x36",
+ "EventName": "UNC_CHA_TOR_OCCUPANCY.IO_MISS_ITOM_LOCAL",
+ "PerPkg": "1",
+ "PublicDescription": "For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.",
+ "UMask": "0xcc42fe04",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Occupancy; ITOM misses from local IO and targets remote memory",
+ "EventCode": "0x36",
+ "EventName": "UNC_CHA_TOR_OCCUPANCY.IO_MISS_ITOM_REMOTE",
+ "PerPkg": "1",
+ "PublicDescription": "For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.",
+ "UMask": "0xcc437e04",
+ "Unit": "CHA"
+ },
+ {
"BriefDescription": "TOR Occupancy; RdCur and FsRdCur misses from local IO",
"EventCode": "0x36",
"EventName": "UNC_CHA_TOR_OCCUPANCY.IO_MISS_PCIRDCUR",
@@ -5723,6 +5813,24 @@
"Unit": "CHA"
},
{
+ "BriefDescription": "TOR Occupancy; RdCur and FsRdCur misses from local IO and targets local memory",
+ "EventCode": "0x36",
+ "EventName": "UNC_CHA_TOR_OCCUPANCY.IO_MISS_PCIRDCUR_LOCAL",
+ "PerPkg": "1",
+ "PublicDescription": "For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.",
+ "UMask": "0xc8f2fe04",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Occupancy; RdCur and FsRdCur misses from local IO and targets remote memory",
+ "EventCode": "0x36",
+ "EventName": "UNC_CHA_TOR_OCCUPANCY.IO_MISS_PCIRDCUR_REMOTE",
+ "PerPkg": "1",
+ "PublicDescription": "For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.",
+ "UMask": "0xc8f37e04",
+ "Unit": "CHA"
+ },
+ {
"BriefDescription": "TOR Occupancy; RFO misses from local IO",
"EventCode": "0x36",
"EventName": "UNC_CHA_TOR_OCCUPANCY.IO_MISS_RFO",
diff --git a/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-interconnect.json b/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-interconnect.json
index 65d088556bae8d..22bb490e96662d 100644
--- a/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-interconnect.json
+++ b/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-interconnect.json
@@ -4888,7 +4888,7 @@
"Unit": "MDF"
},
{
- "BriefDescription": "Number of cycles incoming messages from the vertical ring that are bounced at the SBO\r\nIngress (V-EMIB) (AD)",
+ "BriefDescription": "Number of cycles incoming messages from the vertical ring that are bounced at the SBO Ingress (V-EMIB) (AD)",
"EventCode": "0x4B",
"EventName": "UNC_MDF_CRS_TxR_V_BOUNCES.AD",
"PerPkg": "1",
@@ -4897,7 +4897,7 @@
"Unit": "MDF"
},
{
- "BriefDescription": "Number of cycles incoming messages from the vertical ring that are bounced at the SBO\r\nIngress (V-EMIB) (AK)",
+ "BriefDescription": "Number of cycles incoming messages from the vertical ring that are bounced at the SBO Ingress (V-EMIB) (AK)",
"EventCode": "0x4B",
"EventName": "UNC_MDF_CRS_TxR_V_BOUNCES.AK",
"PerPkg": "1",
@@ -4906,7 +4906,7 @@
"Unit": "MDF"
},
{
- "BriefDescription": "Number of cycles incoming messages from the vertical ring that are bounced at the SBO\r\nIngress (V-EMIB) (AKC)",
+ "BriefDescription": "Number of cycles incoming messages from the vertical ring that are bounced at the SBO Ingress (V-EMIB) (AKC)",
"EventCode": "0x4B",
"EventName": "UNC_MDF_CRS_TxR_V_BOUNCES.AKC",
"PerPkg": "1",
@@ -4915,7 +4915,7 @@
"Unit": "MDF"
},
{
- "BriefDescription": "Number of cycles incoming messages from the vertical ring that are bounced at the SBO\r\nIngress (V-EMIB) (BL)",
+ "BriefDescription": "Number of cycles incoming messages from the vertical ring that are bounced at the SBO Ingress (V-EMIB) (BL)",
"EventCode": "0x4B",
"EventName": "UNC_MDF_CRS_TxR_V_BOUNCES.BL",
"PerPkg": "1",
@@ -4924,7 +4924,7 @@
"Unit": "MDF"
},
{
- "BriefDescription": "Number of cycles incoming messages from the vertical ring that are bounced at the SBO\r\nIngress (V-EMIB) (IV)",
+ "BriefDescription": "Number of cycles incoming messages from the vertical ring that are bounced at the SBO Ingress (V-EMIB) (IV)",
"EventCode": "0x4B",
"EventName": "UNC_MDF_CRS_TxR_V_BOUNCES.IV",
"PerPkg": "1",
@@ -5291,7 +5291,7 @@
"EventCode": "0x05",
"EventName": "UNC_UPI_RxL_BASIC_HDR_MATCH.NCB",
"PerPkg": "1",
- "PublicDescription": "Matches on Receive path of a UPI Port : Non-Coherent Bypass : Matches on Receive path of a UPI port.\r\nMatch based on UMask specific bits:\r\nZ: Message Class (3-bit)\r\nY: Message Class Enable\r\nW: Opcode (4-bit)\r\nV: Opcode Enable\r\nU: Local Enable\r\nT: Remote Enable\r\nS: Data Hdr Enable\r\nR: Non-Data Hdr Enable\r\nQ: Dual Slot Hdr Enable\r\nP: Single Slot Hdr Enable\r\nLink Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases.\r\nNote: If Message Class is disabled, we expect opcode to also be disabled.",
+ "PublicDescription": "Matches on Receive path of a UPI Port : Non-Coherent Bypass : Matches on Receive path of a UPI port. Match based on UMask specific bits: Z: Message Class (3-bit) Y: Message Class Enable W: Opcode (4-bit) V: Opcode Enable U: Local Enable T: Remote Enable S: Data Hdr Enable R: Non-Data Hdr Enable Q: Dual Slot Hdr Enable P: Single Slot Hdr Enable Link Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases. Note: If Message Class is disabled, we expect opcode to also be disabled.",
"UMask": "0xe",
"Unit": "UPI"
},
@@ -5300,7 +5300,7 @@
"EventCode": "0x05",
"EventName": "UNC_UPI_RxL_BASIC_HDR_MATCH.NCB_OPC",
"PerPkg": "1",
- "PublicDescription": "Matches on Receive path of a UPI Port : Non-Coherent Bypass, Match Opcode : Matches on Receive path of a UPI port.\r\nMatch based on UMask specific bits:\r\nZ: Message Class (3-bit)\r\nY: Message Class Enable\r\nW: Opcode (4-bit)\r\nV: Opcode Enable\r\nU: Local Enable\r\nT: Remote Enable\r\nS: Data Hdr Enable\r\nR: Non-Data Hdr Enable\r\nQ: Dual Slot Hdr Enable\r\nP: Single Slot Hdr Enable\r\nLink Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases.\r\nNote: If Message Class is disabled, we expect opcode to also be disabled.",
+ "PublicDescription": "Matches on Receive path of a UPI Port : Non-Coherent Bypass, Match Opcode : Matches on Receive path of a UPI port. Match based on UMask specific bits: Z: Message Class (3-bit) Y: Message Class Enable W: Opcode (4-bit) V: Opcode Enable U: Local Enable T: Remote Enable S: Data Hdr Enable R: Non-Data Hdr Enable Q: Dual Slot Hdr Enable P: Single Slot Hdr Enable Link Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases. Note: If Message Class is disabled, we expect opcode to also be disabled.",
"UMask": "0x10e",
"Unit": "UPI"
},
@@ -5309,7 +5309,7 @@
"EventCode": "0x05",
"EventName": "UNC_UPI_RxL_BASIC_HDR_MATCH.NCS",
"PerPkg": "1",
- "PublicDescription": "Matches on Receive path of a UPI Port : Non-Coherent Standard : Matches on Receive path of a UPI port.\r\nMatch based on UMask specific bits:\r\nZ: Message Class (3-bit)\r\nY: Message Class Enable\r\nW: Opcode (4-bit)\r\nV: Opcode Enable\r\nU: Local Enable\r\nT: Remote Enable\r\nS: Data Hdr Enable\r\nR: Non-Data Hdr Enable\r\nQ: Dual Slot Hdr Enable\r\nP: Single Slot Hdr Enable\r\nLink Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases.\r\nNote: If Message Class is disabled, we expect opcode to also be disabled.",
+ "PublicDescription": "Matches on Receive path of a UPI Port : Non-Coherent Standard : Matches on Receive path of a UPI port. Match based on UMask specific bits: Z: Message Class (3-bit) Y: Message Class Enable W: Opcode (4-bit) V: Opcode Enable U: Local Enable T: Remote Enable S: Data Hdr Enable R: Non-Data Hdr Enable Q: Dual Slot Hdr Enable P: Single Slot Hdr Enable Link Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases. Note: If Message Class is disabled, we expect opcode to also be disabled.",
"UMask": "0xf",
"Unit": "UPI"
},
@@ -5318,7 +5318,7 @@
"EventCode": "0x05",
"EventName": "UNC_UPI_RxL_BASIC_HDR_MATCH.NCS_OPC",
"PerPkg": "1",
- "PublicDescription": "Matches on Receive path of a UPI Port : Non-Coherent Standard, Match Opcode : Matches on Receive path of a UPI port.\r\nMatch based on UMask specific bits:\r\nZ: Message Class (3-bit)\r\nY: Message Class Enable\r\nW: Opcode (4-bit)\r\nV: Opcode Enable\r\nU: Local Enable\r\nT: Remote Enable\r\nS: Data Hdr Enable\r\nR: Non-Data Hdr Enable\r\nQ: Dual Slot Hdr Enable\r\nP: Single Slot Hdr Enable\r\nLink Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases.\r\nNote: If Message Class is disabled, we expect opcode to also be disabled.",
+ "PublicDescription": "Matches on Receive path of a UPI Port : Non-Coherent Standard, Match Opcode : Matches on Receive path of a UPI port. Match based on UMask specific bits: Z: Message Class (3-bit) Y: Message Class Enable W: Opcode (4-bit) V: Opcode Enable U: Local Enable T: Remote Enable S: Data Hdr Enable R: Non-Data Hdr Enable Q: Dual Slot Hdr Enable P: Single Slot Hdr Enable Link Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases. Note: If Message Class is disabled, we expect opcode to also be disabled.",
"UMask": "0x10f",
"Unit": "UPI"
},
@@ -5763,7 +5763,7 @@
"EventCode": "0x04",
"EventName": "UNC_UPI_TxL_BASIC_HDR_MATCH.NCB",
"PerPkg": "1",
- "PublicDescription": "Matches on Transmit path of a UPI Port : Non-Coherent Bypass : Matches on Transmit path of a UPI port.\r\nMatch based on UMask specific bits:\r\nZ: Message Class (3-bit)\r\nY: Message Class Enable\r\nW: Opcode (4-bit)\r\nV: Opcode Enable\r\nU: Local Enable\r\nT: Remote Enable\r\nS: Data Hdr Enable\r\nR: Non-Data Hdr Enable\r\nQ: Dual Slot Hdr Enable\r\nP: Single Slot Hdr Enable\r\nLink Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases.\r\nNote: If Message Class is disabled, we expect opcode to also be disabled.",
+ "PublicDescription": "Matches on Transmit path of a UPI Port : Non-Coherent Bypass : Matches on Transmit path of a UPI port. Match based on UMask specific bits: Z: Message Class (3-bit) Y: Message Class Enable W: Opcode (4-bit) V: Opcode Enable U: Local Enable T: Remote Enable S: Data Hdr Enable R: Non-Data Hdr Enable Q: Dual Slot Hdr Enable P: Single Slot Hdr Enable Link Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases. Note: If Message Class is disabled, we expect opcode to also be disabled.",
"UMask": "0xe",
"Unit": "UPI"
},
@@ -5772,7 +5772,7 @@
"EventCode": "0x04",
"EventName": "UNC_UPI_TxL_BASIC_HDR_MATCH.NCB_OPC",
"PerPkg": "1",
- "PublicDescription": "Matches on Transmit path of a UPI Port : Non-Coherent Bypass, Match Opcode : Matches on Transmit path of a UPI port.\r\nMatch based on UMask specific bits:\r\nZ: Message Class (3-bit)\r\nY: Message Class Enable\r\nW: Opcode (4-bit)\r\nV: Opcode Enable\r\nU: Local Enable\r\nT: Remote Enable\r\nS: Data Hdr Enable\r\nR: Non-Data Hdr Enable\r\nQ: Dual Slot Hdr Enable\r\nP: Single Slot Hdr Enable\r\nLink Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases.\r\nNote: If Message Class is disabled, we expect opcode to also be disabled.",
+ "PublicDescription": "Matches on Transmit path of a UPI Port : Non-Coherent Bypass, Match Opcode : Matches on Transmit path of a UPI port. Match based on UMask specific bits: Z: Message Class (3-bit) Y: Message Class Enable W: Opcode (4-bit) V: Opcode Enable U: Local Enable T: Remote Enable S: Data Hdr Enable R: Non-Data Hdr Enable Q: Dual Slot Hdr Enable P: Single Slot Hdr Enable Link Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases. Note: If Message Class is disabled, we expect opcode to also be disabled.",
"UMask": "0x10e",
"Unit": "UPI"
},
@@ -5781,7 +5781,7 @@
"EventCode": "0x04",
"EventName": "UNC_UPI_TxL_BASIC_HDR_MATCH.NCS",
"PerPkg": "1",
- "PublicDescription": "Matches on Transmit path of a UPI Port : Non-Coherent Standard : Matches on Transmit path of a UPI port.\r\nMatch based on UMask specific bits:\r\nZ: Message Class (3-bit)\r\nY: Message Class Enable\r\nW: Opcode (4-bit)\r\nV: Opcode Enable\r\nU: Local Enable\r\nT: Remote Enable\r\nS: Data Hdr Enable\r\nR: Non-Data Hdr Enable\r\nQ: Dual Slot Hdr Enable\r\nP: Single Slot Hdr Enable\r\nLink Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases.\r\nNote: If Message Class is disabled, we expect opcode to also be disabled.",
+ "PublicDescription": "Matches on Transmit path of a UPI Port : Non-Coherent Standard : Matches on Transmit path of a UPI port. Match based on UMask specific bits: Z: Message Class (3-bit) Y: Message Class Enable W: Opcode (4-bit) V: Opcode Enable U: Local Enable T: Remote Enable S: Data Hdr Enable R: Non-Data Hdr Enable Q: Dual Slot Hdr Enable P: Single Slot Hdr Enable Link Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases. Note: If Message Class is disabled, we expect opcode to also be disabled.",
"UMask": "0xf",
"Unit": "UPI"
},
@@ -5790,7 +5790,7 @@
"EventCode": "0x04",
"EventName": "UNC_UPI_TxL_BASIC_HDR_MATCH.NCS_OPC",
"PerPkg": "1",
- "PublicDescription": "Matches on Transmit path of a UPI Port : Non-Coherent Standard, Match Opcode : Matches on Transmit path of a UPI port.\r\nMatch based on UMask specific bits:\r\nZ: Message Class (3-bit)\r\nY: Message Class Enable\r\nW: Opcode (4-bit)\r\nV: Opcode Enable\r\nU: Local Enable\r\nT: Remote Enable\r\nS: Data Hdr Enable\r\nR: Non-Data Hdr Enable\r\nQ: Dual Slot Hdr Enable\r\nP: Single Slot Hdr Enable\r\nLink Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases.\r\nNote: If Message Class is disabled, we expect opcode to also be disabled.",
+ "PublicDescription": "Matches on Transmit path of a UPI Port : Non-Coherent Standard, Match Opcode : Matches on Transmit path of a UPI port. Match based on UMask specific bits: Z: Message Class (3-bit) Y: Message Class Enable W: Opcode (4-bit) V: Opcode Enable U: Local Enable T: Remote Enable S: Data Hdr Enable R: Non-Data Hdr Enable Q: Dual Slot Hdr Enable P: Single Slot Hdr Enable Link Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases. Note: If Message Class is disabled, we expect opcode to also be disabled.",
"UMask": "0x10f",
"Unit": "UPI"
},
diff --git a/tools/perf/pmu-events/arch/x86/grandridge/pipeline.json b/tools/perf/pmu-events/arch/x86/grandridge/pipeline.json
index daa0639bb1cabe..90292dc03d33e2 100644
--- a/tools/perf/pmu-events/arch/x86/grandridge/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/grandridge/pipeline.json
@@ -249,10 +249,17 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts the number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear.",
+ "BriefDescription": "Counts the number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear. [This event is alias to TOPDOWN_BAD_SPECULATION.ALL_P]",
"EventCode": "0x73",
"EventName": "TOPDOWN_BAD_SPECULATION.ALL",
- "PublicDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear. Only issue slots wasted due to fast nukes such as memory ordering nukes are counted. Other nukes are not accounted for. Counts all issue slots blocked during this recovery window, including relevant microcode flows, and while uops are not yet available in the instruction queue (IQ) or until an FE_BOUND event occurs besides OTHER and CISC. Also includes the issue slots that were consumed by the backend but were thrown away because they were younger than the mispredict or machine clear.",
+ "PublicDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear. Only issue slots wasted due to fast nukes such as memory ordering nukes are counted. Other nukes are not accounted for. Counts all issue slots blocked during this recovery window, including relevant microcode flows, and while uops are not yet available in the instruction queue (IQ) or until an FE_BOUND event occurs besides OTHER and CISC. Also includes the issue slots that were consumed by the backend but were thrown away because they were younger than the mispredict or machine clear. [This event is alias to TOPDOWN_BAD_SPECULATION.ALL_P]",
+ "SampleAfterValue": "1000003"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear. [This event is alias to TOPDOWN_BAD_SPECULATION.ALL]",
+ "EventCode": "0x73",
+ "EventName": "TOPDOWN_BAD_SPECULATION.ALL_P",
+ "PublicDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear. Only issue slots wasted due to fast nukes such as memory ordering nukes are counted. Other nukes are not accounted for. Counts all issue slots blocked during this recovery window, including relevant microcode flows, and while uops are not yet available in the instruction queue (IQ) or until an FE_BOUND event occurs besides OTHER and CISC. Also includes the issue slots that were consumed by the backend but were thrown away because they were younger than the mispredict or machine clear. [This event is alias to TOPDOWN_BAD_SPECULATION.ALL]",
"SampleAfterValue": "1000003"
},
{
@@ -284,7 +291,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts the number of retirement slots not consumed due to backend stalls",
+ "BriefDescription": "Counts the number of retirement slots not consumed due to backend stalls [This event is alias to TOPDOWN_BE_BOUND.ALL_P]",
"EventCode": "0x74",
"EventName": "TOPDOWN_BE_BOUND.ALL",
"SampleAfterValue": "1000003"
@@ -297,6 +304,12 @@
"UMask": "0x1"
},
{
+ "BriefDescription": "Counts the number of retirement slots not consumed due to backend stalls [This event is alias to TOPDOWN_BE_BOUND.ALL]",
+ "EventCode": "0x74",
+ "EventName": "TOPDOWN_BE_BOUND.ALL_P",
+ "SampleAfterValue": "1000003"
+ },
+ {
"BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to memory reservation stall (scheduler not being able to accept another uop). This could be caused by RSV full or load/store buffer block.",
"EventCode": "0x74",
"EventName": "TOPDOWN_BE_BOUND.MEM_SCHEDULER",
@@ -318,6 +331,13 @@
"UMask": "0x20"
},
{
+ "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to ROB full",
+ "EventCode": "0x74",
+ "EventName": "TOPDOWN_BE_BOUND.REORDER_BUFFER",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x40"
+ },
+ {
"BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to iq/jeu scoreboards or ms scb",
"EventCode": "0x74",
"EventName": "TOPDOWN_BE_BOUND.SERIALIZATION",
@@ -325,12 +345,18 @@
"UMask": "0x10"
},
{
- "BriefDescription": "Counts the number of retirement slots not consumed due to front end stalls",
+ "BriefDescription": "Counts the number of retirement slots not consumed due to front end stalls [This event is alias to TOPDOWN_FE_BOUND.ALL_P]",
"EventCode": "0x71",
"EventName": "TOPDOWN_FE_BOUND.ALL",
"SampleAfterValue": "1000003"
},
{
+ "BriefDescription": "Counts the number of retirement slots not consumed due to front end stalls [This event is alias to TOPDOWN_FE_BOUND.ALL]",
+ "EventCode": "0x71",
+ "EventName": "TOPDOWN_FE_BOUND.ALL_P",
+ "SampleAfterValue": "1000003"
+ },
+ {
"BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to BAClear",
"EventCode": "0x71",
"EventName": "TOPDOWN_FE_BOUND.BRANCH_DETECT",
@@ -402,13 +428,20 @@
"UMask": "0x4"
},
{
- "BriefDescription": "Counts the number of consumed retirement slots. Similar to UOPS_RETIRED.ALL",
+ "BriefDescription": "Counts the number of consumed retirement slots. Similar to UOPS_RETIRED.ALL [This event is alias to TOPDOWN_RETIRING.ALL_P]",
"EventCode": "0x72",
"EventName": "TOPDOWN_RETIRING.ALL",
"PEBS": "1",
"SampleAfterValue": "1000003"
},
{
+ "BriefDescription": "Counts the number of consumed retirement slots. Similar to UOPS_RETIRED.ALL [This event is alias to TOPDOWN_RETIRING.ALL]",
+ "EventCode": "0x72",
+ "EventName": "TOPDOWN_RETIRING.ALL_P",
+ "PEBS": "1",
+ "SampleAfterValue": "1000003"
+ },
+ {
"BriefDescription": "Counts the number of uops issued by the front end every cycle.",
"EventCode": "0x0e",
"EventName": "UOPS_ISSUED.ANY",
diff --git a/tools/perf/pmu-events/arch/x86/grandridge/uncore-cache.json b/tools/perf/pmu-events/arch/x86/grandridge/uncore-cache.json
index 74dfd9272cef87..36614429dd720a 100644
--- a/tools/perf/pmu-events/arch/x86/grandridge/uncore-cache.json
+++ b/tools/perf/pmu-events/arch/x86/grandridge/uncore-cache.json
@@ -5,7 +5,6 @@
"EventName": "UNC_CHACMS_CLOCKTICKS",
"PerPkg": "1",
"PortMask": "0x000",
- "PublicDescription": "UNC_CHACMS_CLOCKTICKS",
"Unit": "CHACMS"
},
{
@@ -1217,6 +1216,15 @@
"Unit": "CHA"
},
{
+ "BriefDescription": "TOR Occupancy for Data read opt from local IA that miss the cache",
+ "EventCode": "0x36",
+ "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_DRD_OPT",
+ "PerPkg": "1",
+ "PublicDescription": "TOR Occupancy : DRd_Opts issued by iA Cores",
+ "UMask": "0xc827ff01",
+ "Unit": "CHA"
+ },
+ {
"BriefDescription": "TOR Occupancy for Data read opt prefetch from local IA that miss the cache",
"EventCode": "0x36",
"EventName": "UNC_CHA_TOR_OCCUPANCY.IA_DRD_OPT_PREF",
@@ -1253,6 +1261,15 @@
"Unit": "CHA"
},
{
+ "BriefDescription": "TOR Occupancy for Data read opt from local IA that hit the cache",
+ "EventCode": "0x36",
+ "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_HIT_DRD_OPT",
+ "PerPkg": "1",
+ "PublicDescription": "TOR Occupancy : DRd_Opts issued by iA Cores that hit the LLC",
+ "UMask": "0xc827fd01",
+ "Unit": "CHA"
+ },
+ {
"BriefDescription": "TOR Occupancy for Data read opt prefetch from local IA that hit the cache",
"EventCode": "0x36",
"EventName": "UNC_CHA_TOR_OCCUPANCY.IA_HIT_DRD_OPT_PREF",
@@ -1406,6 +1423,15 @@
"Unit": "CHA"
},
{
+ "BriefDescription": "TOR Occupancy for Data read opt from local IA that miss the cache",
+ "EventCode": "0x36",
+ "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_OPT",
+ "PerPkg": "1",
+ "PublicDescription": "TOR Occupancy : DRd_Opt issued by iA Cores that missed the LLC",
+ "UMask": "0xc827fe01",
+ "Unit": "CHA"
+ },
+ {
"BriefDescription": "TOR Occupancy for Data read opt prefetch from local IA that miss the cache",
"EventCode": "0x36",
"EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_OPT_PREF",
diff --git a/tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json b/tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json
index 21e2cb5e31783d..83d50d80a148e3 100644
--- a/tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json
@@ -618,9 +618,8 @@
{
"BriefDescription": "Average Parallel L2 cache miss data reads",
"MetricExpr": "tma_info_memory_latency_data_l2_mlp",
- "MetricGroup": "Memory_BW;Offcore;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_data_l2_mlp",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Memory_BW;Offcore",
+ "MetricName": "tma_info_memory_data_l2_mlp"
},
{
"BriefDescription": "",
@@ -631,9 +630,8 @@
{
"BriefDescription": "Average per-core data fill bandwidth to the L1 data cache [GB / sec]",
"MetricExpr": "64 * L1D.REPLACEMENT / 1e9 / (duration_time * 1e3 / 1e3)",
- "MetricGroup": "Mem;MemoryBW;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_l1d_cache_fill_bw_2t",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_l1d_cache_fill_bw_2t"
},
{
"BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
@@ -650,9 +648,8 @@
{
"BriefDescription": "Average per-core data fill bandwidth to the L2 cache [GB / sec]",
"MetricExpr": "64 * L2_LINES_IN.ALL / 1e9 / (duration_time * 1e3 / 1e3)",
- "MetricGroup": "Mem;MemoryBW;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_l2_cache_fill_bw_2t",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_l2_cache_fill_bw_2t"
},
{
"BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads",
@@ -669,9 +666,8 @@
{
"BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
"MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / (duration_time * 1e3 / 1e3)",
- "MetricGroup": "Mem;MemoryBW;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_l3_cache_fill_bw_2t",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_l3_cache_fill_bw_2t"
},
{
"BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
@@ -700,16 +696,14 @@
{
"BriefDescription": "Average Latency for L2 cache miss demand Loads",
"MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS.DEMAND_DATA_RD",
- "MetricGroup": "Memory_Lat;Offcore;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_load_l2_miss_latency",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Memory_Lat;Offcore",
+ "MetricName": "tma_info_memory_load_l2_miss_latency"
},
{
"BriefDescription": "Average Parallel L2 cache miss demand Loads",
"MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
- "MetricGroup": "Memory_BW;Offcore;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_load_l2_mlp",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Memory_BW;Offcore",
+ "MetricName": "tma_info_memory_load_l2_mlp"
},
{
"BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
@@ -729,9 +723,8 @@
{
"BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
"MetricExpr": "tma_info_memory_tlb_page_walks_utilization",
- "MetricGroup": "Mem;MemoryTLB;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_page_walks_utilization",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Mem;MemoryTLB",
+ "MetricName": "tma_info_memory_page_walks_utilization"
},
{
"BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
diff --git a/tools/perf/pmu-events/arch/x86/icelakex/frontend.json b/tools/perf/pmu-events/arch/x86/icelakex/frontend.json
index f6edc4222f424f..66669d062e68a0 100644
--- a/tools/perf/pmu-events/arch/x86/icelakex/frontend.json
+++ b/tools/perf/pmu-events/arch/x86/icelakex/frontend.json
@@ -282,7 +282,7 @@
"CounterMask": "5",
"EventCode": "0x79",
"EventName": "IDQ.DSB_CYCLES_OK",
- "PublicDescription": "Counts the number of cycles where optimal number of uops was delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
+ "PublicDescription": "Counts the number of cycles where optimal number of uops was delivered to the Instruction Decode Queue (IDQ) from the DSB (Decode Stream Buffer) path. Count includes uops that may 'bypass' the IDQ.",
"SampleAfterValue": "2000003",
"UMask": "0x8"
},
diff --git a/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json b/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json
index c015b8277dc76d..769ba12bef876c 100644
--- a/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json
@@ -667,23 +667,20 @@
{
"BriefDescription": "Probability of Core Bound bottleneck hidden by SMT-profiling artifacts",
"MetricExpr": "(100 * (1 - max(0, topdown\\-be\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 5 * INT_MISC.CLEARS_COUNT / slots - (CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + topdown\\-retiring / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES) * (topdown\\-be\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 5 * INT_MISC.CLEARS_COUNT / slots)) / (((cpu@EXE_ACTIVITY.3_PORTS_UTIL\\,umask\\=0x80@ + max(0, topdown\\-be\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 5 * INT_MISC.CLEARS_COUNT / slots - (CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + topdown\\-retiring / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES) * (topdown\\-be\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 5 * INT_MISC.CLEARS_COUNT / slots)) * RS_EVENTS.EMPTY_CYCLES) / CPU_CLK_UNHALTED.THREAD * (CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY) / CPU_CLK_UNHALTED.THREAD * CPU_CLK_UNHALTED.THREAD + (EXE_ACTIVITY.1_PORTS_UTIL + topdown\\-retiring / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) * EXE_ACTIVITY.2_PORTS_UTIL)) / CPU_CLK_UNHALTED.THREAD if ARITH.DIVIDER_ACTIVE < CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY else (EXE_ACTIVITY.1_PORTS_UTIL + topdown\\-retiring / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) * EXE_ACTIVITY.2_PORTS_UTIL) / CPU_CLK_UNHALTED.THREAD) if max(0, topdown\\-be\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 5 * INT_MISC.CLEARS_COUNT / slots - (CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + topdown\\-retiring / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES) * (topdown\\-be\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 5 * INT_MISC.CLEARS_COUNT / slots)) < (((cpu@EXE_ACTIVITY.3_PORTS_UTIL\\,umask\\=0x80@ + max(0, topdown\\-be\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 5 * INT_MISC.CLEARS_COUNT / slots - (CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + topdown\\-retiring / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES) * (topdown\\-be\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 5 * INT_MISC.CLEARS_COUNT / slots)) * RS_EVENTS.EMPTY_CYCLES) / CPU_CLK_UNHALTED.THREAD * (CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY) / CPU_CLK_UNHALTED.THREAD * CPU_CLK_UNHALTED.THREAD + (EXE_ACTIVITY.1_PORTS_UTIL + topdown\\-retiring / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) * EXE_ACTIVITY.2_PORTS_UTIL)) / CPU_CLK_UNHALTED.THREAD if ARITH.DIVIDER_ACTIVE < CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY else (EXE_ACTIVITY.1_PORTS_UTIL + topdown\\-retiring / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) * EXE_ACTIVITY.2_PORTS_UTIL) / CPU_CLK_UNHALTED.THREAD) else 1) if tma_info_system_smt_2t_utilization > 0.5 else 0)",
- "MetricGroup": "Cor;SMT;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_botlnk_core_bound_likely",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Cor;SMT",
+ "MetricName": "tma_info_botlnk_core_bound_likely"
},
{
"BriefDescription": "Total pipeline cost of DSB (uop cache) misses - subset of the Instruction_Fetch_BW Bottleneck.",
"MetricExpr": "100 * (100 * ((5 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE - INT_MISC.UOP_DROPPING) / slots * (DSB2MITE_SWITCHES.PENALTY_CYCLES / CPU_CLK_UNHALTED.THREAD) / (ICACHE_DATA.STALLS / CPU_CLK_UNHALTED.THREAD + ICACHE_TAG.STALLS / CPU_CLK_UNHALTED.THREAD + (INT_MISC.CLEAR_RESTEER_CYCLES / CPU_CLK_UNHALTED.THREAD + 10 * BACLEARS.ANY / CPU_CLK_UNHALTED.THREAD) + min(3 * IDQ.MS_SWITCHES / CPU_CLK_UNHALTED.THREAD, 1) + DECODE.LCP / CPU_CLK_UNHALTED.THREAD + DSB2MITE_SWITCHES.PENALTY_CYCLES / CPU_CLK_UNHALTED.THREAD) + max(0, topdown\\-fe\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) - INT_MISC.UOP_DROPPING / slots - (5 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE - INT_MISC.UOP_DROPPING) / slots) * ((IDQ.MITE_CYCLES_ANY - IDQ.MITE_CYCLES_OK) / (CPU_CLK_UNHALTED.DISTRIBUTED if #SMT_on else CPU_CLK_UNHALTED.THREAD) / 2) / ((IDQ.MITE_CYCLES_ANY - IDQ.MITE_CYCLES_OK) / (CPU_CLK_UNHALTED.DISTRIBUTED if #SMT_on else CPU_CLK_UNHALTED.THREAD) / 2 + (IDQ.DSB_CYCLES_ANY - IDQ.DSB_CYCLES_OK) / (CPU_CLK_UNHALTED.DISTRIBUTED if #SMT_on else CPU_CLK_UNHALTED.THREAD) / 2)))",
- "MetricGroup": "DSBmiss;Fed;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_botlnk_dsb_misses",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "DSBmiss;Fed",
+ "MetricName": "tma_info_botlnk_dsb_misses"
},
{
"BriefDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck.",
"MetricExpr": "100 * (100 * ((5 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE - INT_MISC.UOP_DROPPING) / slots * (ICACHE_DATA.STALLS / CPU_CLK_UNHALTED.THREAD) / (ICACHE_DATA.STALLS / CPU_CLK_UNHALTED.THREAD + ICACHE_TAG.STALLS / CPU_CLK_UNHALTED.THREAD + (INT_MISC.CLEAR_RESTEER_CYCLES / CPU_CLK_UNHALTED.THREAD + 10 * BACLEARS.ANY / CPU_CLK_UNHALTED.THREAD) + min(3 * IDQ.MS_SWITCHES / CPU_CLK_UNHALTED.THREAD, 1) + DECODE.LCP / CPU_CLK_UNHALTED.THREAD + DSB2MITE_SWITCHES.PENALTY_CYCLES / CPU_CLK_UNHALTED.THREAD)))",
- "MetricGroup": "Fed;FetchLat;IcMiss;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_botlnk_ic_misses",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Fed;FetchLat;IcMiss",
+ "MetricName": "tma_info_botlnk_ic_misses"
},
{
"BriefDescription": "Probability of Core Bound bottleneck hidden by SMT-profiling artifacts",
@@ -1045,16 +1042,14 @@
{
"BriefDescription": "\"Bus lock\" per kilo instruction",
"MetricExpr": "tma_info_memory_mix_bus_lock_pki",
- "MetricGroup": "Mem;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_bus_lock_pki",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Mem",
+ "MetricName": "tma_info_memory_bus_lock_pki"
},
{
"BriefDescription": "STLB (2nd level TLB) code speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
"MetricExpr": "tma_info_memory_tlb_code_stlb_mpki",
- "MetricGroup": "Fed;MemoryTLB;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_code_stlb_mpki",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Fed;MemoryTLB",
+ "MetricName": "tma_info_memory_code_stlb_mpki"
},
{
"BriefDescription": "Average per-core data fill bandwidth to the L1 data cache [GB / sec]",
@@ -1095,9 +1090,8 @@
{
"BriefDescription": "Average Parallel L2 cache miss data reads",
"MetricExpr": "tma_info_memory_latency_data_l2_mlp",
- "MetricGroup": "Memory_BW;Offcore;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_data_l2_mlp",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Memory_BW;Offcore",
+ "MetricName": "tma_info_memory_data_l2_mlp"
},
{
"BriefDescription": "Fill Buffer (FB) hits per kilo instructions for retired demand loads (L1D misses that merge into ongoing miss-handling entries)",
@@ -1114,9 +1108,8 @@
{
"BriefDescription": "Average per-core data fill bandwidth to the L1 data cache [GB / sec]",
"MetricExpr": "64 * L1D.REPLACEMENT / 1e9 / (duration_time * 1e3 / 1e3)",
- "MetricGroup": "Mem;MemoryBW;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_l1d_cache_fill_bw_2t",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_l1d_cache_fill_bw_2t"
},
{
"BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
@@ -1139,23 +1132,20 @@
{
"BriefDescription": "Average per-core data fill bandwidth to the L2 cache [GB / sec]",
"MetricExpr": "64 * L2_LINES_IN.ALL / 1e9 / (duration_time * 1e3 / 1e3)",
- "MetricGroup": "Mem;MemoryBW;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_l2_cache_fill_bw_2t",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_l2_cache_fill_bw_2t"
},
{
"BriefDescription": "Rate of non silent evictions from the L2 cache per Kilo instruction",
"MetricExpr": "1e3 * L2_LINES_OUT.NON_SILENT / INST_RETIRED.ANY",
- "MetricGroup": "L2Evicts;Mem;Server;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_l2_evictions_nonsilent_pki",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "L2Evicts;Mem;Server",
+ "MetricName": "tma_info_memory_l2_evictions_nonsilent_pki"
},
{
"BriefDescription": "Rate of silent evictions from the L2 cache per Kilo instruction where the evicted lines are dropped (no writeback to L3 or memory)",
"MetricExpr": "1e3 * L2_LINES_OUT.SILENT / INST_RETIRED.ANY",
- "MetricGroup": "L2Evicts;Mem;Server;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_l2_evictions_silent_pki",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "L2Evicts;Mem;Server",
+ "MetricName": "tma_info_memory_l2_evictions_silent_pki"
},
{
"BriefDescription": "L2 cache hits per kilo instruction for all demand loads (including speculative)",
@@ -1190,9 +1180,8 @@
{
"BriefDescription": "Average per-core data access bandwidth to the L3 cache [GB / sec]",
"MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1e9 / (duration_time * 1e3 / 1e3)",
- "MetricGroup": "Mem;MemoryBW;Offcore;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_l3_cache_access_bw_2t",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Mem;MemoryBW;Offcore",
+ "MetricName": "tma_info_memory_l3_cache_access_bw_2t"
},
{
"BriefDescription": "",
@@ -1203,9 +1192,8 @@
{
"BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
"MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / (duration_time * 1e3 / 1e3)",
- "MetricGroup": "Mem;MemoryBW;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_l3_cache_fill_bw_2t",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_l3_cache_fill_bw_2t"
},
{
"BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
@@ -1240,23 +1228,20 @@
{
"BriefDescription": "Average Latency for L2 cache miss demand Loads",
"MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS.DEMAND_DATA_RD",
- "MetricGroup": "Memory_Lat;Offcore;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_load_l2_miss_latency",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Memory_Lat;Offcore",
+ "MetricName": "tma_info_memory_load_l2_miss_latency"
},
{
"BriefDescription": "Average Parallel L2 cache miss demand Loads",
"MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / cpu@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD\\,cmask\\=0x1@",
- "MetricGroup": "Memory_BW;Offcore;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_load_l2_mlp",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Memory_BW;Offcore",
+ "MetricName": "tma_info_memory_load_l2_mlp"
},
{
"BriefDescription": "Average Latency for L3 cache miss demand Loads",
"MetricExpr": "cpu@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD\\,umask\\=0x10@ / OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD",
- "MetricGroup": "Memory_Lat;Offcore;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_load_l3_miss_latency",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Memory_Lat;Offcore",
+ "MetricName": "tma_info_memory_load_l3_miss_latency"
},
{
"BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
@@ -1267,9 +1252,8 @@
{
"BriefDescription": "STLB (2nd level TLB) data load speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
"MetricExpr": "tma_info_memory_tlb_load_stlb_mpki",
- "MetricGroup": "Mem;MemoryTLB;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_load_stlb_mpki",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Mem;MemoryTLB",
+ "MetricName": "tma_info_memory_load_stlb_mpki"
},
{
"BriefDescription": "\"Bus lock\" per kilo instruction",
@@ -1293,16 +1277,14 @@
{
"BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
"MetricExpr": "(ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING) / (2 * (CPU_CLK_UNHALTED.DISTRIBUTED if #SMT_on else CPU_CLK_UNHALTED.THREAD))",
- "MetricGroup": "Mem;MemoryTLB;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_page_walks_utilization",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Mem;MemoryTLB",
+ "MetricName": "tma_info_memory_page_walks_utilization"
},
{
"BriefDescription": "STLB (2nd level TLB) data store speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
"MetricExpr": "tma_info_memory_tlb_store_stlb_mpki",
- "MetricGroup": "Mem;MemoryTLB;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_store_stlb_mpki",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Mem;MemoryTLB",
+ "MetricName": "tma_info_memory_store_stlb_mpki"
},
{
"BriefDescription": "STLB (2nd level TLB) code speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
@@ -1332,9 +1314,8 @@
{
"BriefDescription": "Un-cacheable retired load per kilo instruction",
"MetricExpr": "1e3 * MEM_LOAD_MISC_RETIRED.UC / INST_RETIRED.ANY",
- "MetricGroup": "Mem;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_uc_load_pki",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Mem",
+ "MetricName": "tma_info_memory_uc_load_pki"
},
{
"BriefDescription": "",
diff --git a/tools/perf/pmu-events/arch/x86/icelakex/memory.json b/tools/perf/pmu-events/arch/x86/icelakex/memory.json
index f36ac04f8d76a5..875b584b84431e 100644
--- a/tools/perf/pmu-events/arch/x86/icelakex/memory.json
+++ b/tools/perf/pmu-events/arch/x86/icelakex/memory.json
@@ -319,6 +319,7 @@
"BriefDescription": "Number of times an RTM execution aborted.",
"EventCode": "0xc9",
"EventName": "RTM_RETIRED.ABORTED",
+ "PEBS": "1",
"PublicDescription": "Counts the number of times RTM abort was triggered.",
"SampleAfterValue": "100003",
"UMask": "0x4"
diff --git a/tools/perf/pmu-events/arch/x86/icelakex/uncore-cache.json b/tools/perf/pmu-events/arch/x86/icelakex/uncore-cache.json
index b6ce14ebf84418..a950ba3ddcb483 100644
--- a/tools/perf/pmu-events/arch/x86/icelakex/uncore-cache.json
+++ b/tools/perf/pmu-events/arch/x86/icelakex/uncore-cache.json
@@ -1580,7 +1580,7 @@
"Unit": "CHA"
},
{
- "BriefDescription": "This event is deprecated. Refer to new event UNC_CHA_LLC_LOOKUP.CODE_READ",
+ "BriefDescription": "This event is deprecated.",
"Deprecated": "1",
"EventCode": "0x34",
"EventName": "UNC_CHA_LLC_LOOKUP.CODE",
@@ -1677,7 +1677,7 @@
"Unit": "CHA"
},
{
- "BriefDescription": "This event is deprecated. Refer to new event UNC_CHA_LLC_LOOKUP.DATA_READ",
+ "BriefDescription": "This event is deprecated.",
"Deprecated": "1",
"EventCode": "0x34",
"EventName": "UNC_CHA_LLC_LOOKUP.DATA_READ_ALL",
@@ -6783,6 +6783,24 @@
"Unit": "CHA"
},
{
+ "BriefDescription": "PCIRDCUR (read) transactions from an IO device that addresses memory on the local socket",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IO_PCIRDCUR_LOCAL",
+ "PerPkg": "1",
+ "PublicDescription": "TOR Inserts : PCIRdCurs issued by IO Devices and targets local memory : Counts the number of entries successfully inserted into the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.",
+ "UMask": "0xc8f2ff04",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "PCIRDCUR (read) transactions from an IO device that addresses memory on a remote socket",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IO_PCIRDCUR_REMOTE",
+ "PerPkg": "1",
+ "PublicDescription": "TOR Inserts : PCIRdCurs issued by IO Devices and targets remote memory : Counts the number of entries successfully inserted into the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.",
+ "UMask": "0xc8f37f04",
+ "Unit": "CHA"
+ },
+ {
"BriefDescription": "TOR Inserts : RFOs issued by IO Devices",
"EventCode": "0x35",
"EventName": "UNC_CHA_TOR_INSERTS.IO_RFO",
diff --git a/tools/perf/pmu-events/arch/x86/icelakex/uncore-interconnect.json b/tools/perf/pmu-events/arch/x86/icelakex/uncore-interconnect.json
index a066a009c51178..6997e6f7d3664d 100644
--- a/tools/perf/pmu-events/arch/x86/icelakex/uncore-interconnect.json
+++ b/tools/perf/pmu-events/arch/x86/icelakex/uncore-interconnect.json
@@ -13523,7 +13523,7 @@
"EventCode": "0x05",
"EventName": "UNC_UPI_RxL_BASIC_HDR_MATCH.NCB",
"PerPkg": "1",
- "PublicDescription": "Matches on Receive path of a UPI Port : Non-Coherent Bypass : Matches on Receive path of a UPI port.\r\nMatch based on UMask specific bits:\r\nZ: Message Class (3-bit)\r\nY: Message Class Enable\r\nW: Opcode (4-bit)\r\nV: Opcode Enable\r\nU: Local Enable\r\nT: Remote Enable\r\nS: Data Hdr Enable\r\nR: Non-Data Hdr Enable\r\nQ: Dual Slot Hdr Enable\r\nP: Single Slot Hdr Enable\r\nLink Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases.\r\nNote: If Message Class is disabled, we expect opcode to also be disabled.",
+ "PublicDescription": "Matches on Receive path of a UPI Port : Non-Coherent Bypass : Matches on Receive path of a UPI port. Match based on UMask specific bits: Z: Message Class (3-bit) Y: Message Class Enable W: Opcode (4-bit) V: Opcode Enable U: Local Enable T: Remote Enable S: Data Hdr Enable R: Non-Data Hdr Enable Q: Dual Slot Hdr Enable P: Single Slot Hdr Enable Link Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases. Note: If Message Class is disabled, we expect opcode to also be disabled.",
"UMask": "0xe",
"Unit": "UPI"
},
@@ -13532,7 +13532,7 @@
"EventCode": "0x05",
"EventName": "UNC_UPI_RxL_BASIC_HDR_MATCH.NCB_OPC",
"PerPkg": "1",
- "PublicDescription": "Matches on Receive path of a UPI Port : Non-Coherent Bypass, Match Opcode : Matches on Receive path of a UPI port.\r\nMatch based on UMask specific bits:\r\nZ: Message Class (3-bit)\r\nY: Message Class Enable\r\nW: Opcode (4-bit)\r\nV: Opcode Enable\r\nU: Local Enable\r\nT: Remote Enable\r\nS: Data Hdr Enable\r\nR: Non-Data Hdr Enable\r\nQ: Dual Slot Hdr Enable\r\nP: Single Slot Hdr Enable\r\nLink Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases.\r\nNote: If Message Class is disabled, we expect opcode to also be disabled.",
+ "PublicDescription": "Matches on Receive path of a UPI Port : Non-Coherent Bypass, Match Opcode : Matches on Receive path of a UPI port. Match based on UMask specific bits: Z: Message Class (3-bit) Y: Message Class Enable W: Opcode (4-bit) V: Opcode Enable U: Local Enable T: Remote Enable S: Data Hdr Enable R: Non-Data Hdr Enable Q: Dual Slot Hdr Enable P: Single Slot Hdr Enable Link Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases. Note: If Message Class is disabled, we expect opcode to also be disabled.",
"UMask": "0x10e",
"Unit": "UPI"
},
@@ -13541,7 +13541,7 @@
"EventCode": "0x05",
"EventName": "UNC_UPI_RxL_BASIC_HDR_MATCH.NCS",
"PerPkg": "1",
- "PublicDescription": "Matches on Receive path of a UPI Port : Non-Coherent Standard : Matches on Receive path of a UPI port.\r\nMatch based on UMask specific bits:\r\nZ: Message Class (3-bit)\r\nY: Message Class Enable\r\nW: Opcode (4-bit)\r\nV: Opcode Enable\r\nU: Local Enable\r\nT: Remote Enable\r\nS: Data Hdr Enable\r\nR: Non-Data Hdr Enable\r\nQ: Dual Slot Hdr Enable\r\nP: Single Slot Hdr Enable\r\nLink Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases.\r\nNote: If Message Class is disabled, we expect opcode to also be disabled.",
+ "PublicDescription": "Matches on Receive path of a UPI Port : Non-Coherent Standard : Matches on Receive path of a UPI port. Match based on UMask specific bits: Z: Message Class (3-bit) Y: Message Class Enable W: Opcode (4-bit) V: Opcode Enable U: Local Enable T: Remote Enable S: Data Hdr Enable R: Non-Data Hdr Enable Q: Dual Slot Hdr Enable P: Single Slot Hdr Enable Link Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases. Note: If Message Class is disabled, we expect opcode to also be disabled.",
"UMask": "0xf",
"Unit": "UPI"
},
@@ -13550,7 +13550,7 @@
"EventCode": "0x05",
"EventName": "UNC_UPI_RxL_BASIC_HDR_MATCH.NCS_OPC",
"PerPkg": "1",
- "PublicDescription": "Matches on Receive path of a UPI Port : Non-Coherent Standard, Match Opcode : Matches on Receive path of a UPI port.\r\nMatch based on UMask specific bits:\r\nZ: Message Class (3-bit)\r\nY: Message Class Enable\r\nW: Opcode (4-bit)\r\nV: Opcode Enable\r\nU: Local Enable\r\nT: Remote Enable\r\nS: Data Hdr Enable\r\nR: Non-Data Hdr Enable\r\nQ: Dual Slot Hdr Enable\r\nP: Single Slot Hdr Enable\r\nLink Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases.\r\nNote: If Message Class is disabled, we expect opcode to also be disabled.",
+ "PublicDescription": "Matches on Receive path of a UPI Port : Non-Coherent Standard, Match Opcode : Matches on Receive path of a UPI port. Match based on UMask specific bits: Z: Message Class (3-bit) Y: Message Class Enable W: Opcode (4-bit) V: Opcode Enable U: Local Enable T: Remote Enable S: Data Hdr Enable R: Non-Data Hdr Enable Q: Dual Slot Hdr Enable P: Single Slot Hdr Enable Link Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases. Note: If Message Class is disabled, we expect opcode to also be disabled.",
"UMask": "0x10f",
"Unit": "UPI"
},
@@ -13559,7 +13559,7 @@
"EventCode": "0x05",
"EventName": "UNC_UPI_RxL_BASIC_HDR_MATCH.REQ",
"PerPkg": "1",
- "PublicDescription": "Matches on Receive path of a UPI Port : Request : Matches on Receive path of a UPI port.\r\nMatch based on UMask specific bits:\r\nZ: Message Class (3-bit)\r\nY: Message Class Enable\r\nW: Opcode (4-bit)\r\nV: Opcode Enable\r\nU: Local Enable\r\nT: Remote Enable\r\nS: Data Hdr Enable\r\nR: Non-Data Hdr Enable\r\nQ: Dual Slot Hdr Enable\r\nP: Single Slot Hdr Enable\r\nLink Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases.\r\nNote: If Message Class is disabled, we expect opcode to also be disabled.",
+ "PublicDescription": "Matches on Receive path of a UPI Port : Request : Matches on Receive path of a UPI port. Match based on UMask specific bits: Z: Message Class (3-bit) Y: Message Class Enable W: Opcode (4-bit) V: Opcode Enable U: Local Enable T: Remote Enable S: Data Hdr Enable R: Non-Data Hdr Enable Q: Dual Slot Hdr Enable P: Single Slot Hdr Enable Link Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases. Note: If Message Class is disabled, we expect opcode to also be disabled.",
"UMask": "0x8",
"Unit": "UPI"
},
@@ -13568,7 +13568,7 @@
"EventCode": "0x05",
"EventName": "UNC_UPI_RxL_BASIC_HDR_MATCH.REQ_OPC",
"PerPkg": "1",
- "PublicDescription": "Matches on Receive path of a UPI Port : Request, Match Opcode : Matches on Receive path of a UPI port.\r\nMatch based on UMask specific bits:\r\nZ: Message Class (3-bit)\r\nY: Message Class Enable\r\nW: Opcode (4-bit)\r\nV: Opcode Enable\r\nU: Local Enable\r\nT: Remote Enable\r\nS: Data Hdr Enable\r\nR: Non-Data Hdr Enable\r\nQ: Dual Slot Hdr Enable\r\nP: Single Slot Hdr Enable\r\nLink Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases.\r\nNote: If Message Class is disabled, we expect opcode to also be disabled.",
+ "PublicDescription": "Matches on Receive path of a UPI Port : Request, Match Opcode : Matches on Receive path of a UPI port. Match based on UMask specific bits: Z: Message Class (3-bit) Y: Message Class Enable W: Opcode (4-bit) V: Opcode Enable U: Local Enable T: Remote Enable S: Data Hdr Enable R: Non-Data Hdr Enable Q: Dual Slot Hdr Enable P: Single Slot Hdr Enable Link Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases. Note: If Message Class is disabled, we expect opcode to also be disabled.",
"UMask": "0x108",
"Unit": "UPI"
},
@@ -13577,7 +13577,7 @@
"EventCode": "0x05",
"EventName": "UNC_UPI_RxL_BASIC_HDR_MATCH.RSPCNFLT",
"PerPkg": "1",
- "PublicDescription": "Matches on Receive path of a UPI Port : Response - Conflict : Matches on Receive path of a UPI port.\r\nMatch based on UMask specific bits:\r\nZ: Message Class (3-bit)\r\nY: Message Class Enable\r\nW: Opcode (4-bit)\r\nV: Opcode Enable\r\nU: Local Enable\r\nT: Remote Enable\r\nS: Data Hdr Enable\r\nR: Non-Data Hdr Enable\r\nQ: Dual Slot Hdr Enable\r\nP: Single Slot Hdr Enable\r\nLink Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases.\r\nNote: If Message Class is disabled, we expect opcode to also be disabled.",
+ "PublicDescription": "Matches on Receive path of a UPI Port : Response - Conflict : Matches on Receive path of a UPI port. Match based on UMask specific bits: Z: Message Class (3-bit) Y: Message Class Enable W: Opcode (4-bit) V: Opcode Enable U: Local Enable T: Remote Enable S: Data Hdr Enable R: Non-Data Hdr Enable Q: Dual Slot Hdr Enable P: Single Slot Hdr Enable Link Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases. Note: If Message Class is disabled, we expect opcode to also be disabled.",
"UMask": "0x1aa",
"Unit": "UPI"
},
@@ -13586,7 +13586,7 @@
"EventCode": "0x05",
"EventName": "UNC_UPI_RxL_BASIC_HDR_MATCH.RSPI",
"PerPkg": "1",
- "PublicDescription": "Matches on Receive path of a UPI Port : Response - Invalid : Matches on Receive path of a UPI port.\r\nMatch based on UMask specific bits:\r\nZ: Message Class (3-bit)\r\nY: Message Class Enable\r\nW: Opcode (4-bit)\r\nV: Opcode Enable\r\nU: Local Enable\r\nT: Remote Enable\r\nS: Data Hdr Enable\r\nR: Non-Data Hdr Enable\r\nQ: Dual Slot Hdr Enable\r\nP: Single Slot Hdr Enable\r\nLink Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases.\r\nNote: If Message Class is disabled, we expect opcode to also be disabled.",
+ "PublicDescription": "Matches on Receive path of a UPI Port : Response - Invalid : Matches on Receive path of a UPI port. Match based on UMask specific bits: Z: Message Class (3-bit) Y: Message Class Enable W: Opcode (4-bit) V: Opcode Enable U: Local Enable T: Remote Enable S: Data Hdr Enable R: Non-Data Hdr Enable Q: Dual Slot Hdr Enable P: Single Slot Hdr Enable Link Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases. Note: If Message Class is disabled, we expect opcode to also be disabled.",
"UMask": "0x12a",
"Unit": "UPI"
},
@@ -13595,7 +13595,7 @@
"EventCode": "0x05",
"EventName": "UNC_UPI_RxL_BASIC_HDR_MATCH.RSP_DATA",
"PerPkg": "1",
- "PublicDescription": "Matches on Receive path of a UPI Port : Response - Data : Matches on Receive path of a UPI port.\r\nMatch based on UMask specific bits:\r\nZ: Message Class (3-bit)\r\nY: Message Class Enable\r\nW: Opcode (4-bit)\r\nV: Opcode Enable\r\nU: Local Enable\r\nT: Remote Enable\r\nS: Data Hdr Enable\r\nR: Non-Data Hdr Enable\r\nQ: Dual Slot Hdr Enable\r\nP: Single Slot Hdr Enable\r\nLink Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases.\r\nNote: If Message Class is disabled, we expect opcode to also be disabled.",
+ "PublicDescription": "Matches on Receive path of a UPI Port : Response - Data : Matches on Receive path of a UPI port. Match based on UMask specific bits: Z: Message Class (3-bit) Y: Message Class Enable W: Opcode (4-bit) V: Opcode Enable U: Local Enable T: Remote Enable S: Data Hdr Enable R: Non-Data Hdr Enable Q: Dual Slot Hdr Enable P: Single Slot Hdr Enable Link Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases. Note: If Message Class is disabled, we expect opcode to also be disabled.",
"UMask": "0xc",
"Unit": "UPI"
},
@@ -13604,7 +13604,7 @@
"EventCode": "0x05",
"EventName": "UNC_UPI_RxL_BASIC_HDR_MATCH.RSP_DATA_OPC",
"PerPkg": "1",
- "PublicDescription": "Matches on Receive path of a UPI Port : Response - Data, Match Opcode : Matches on Receive path of a UPI port.\r\nMatch based on UMask specific bits:\r\nZ: Message Class (3-bit)\r\nY: Message Class Enable\r\nW: Opcode (4-bit)\r\nV: Opcode Enable\r\nU: Local Enable\r\nT: Remote Enable\r\nS: Data Hdr Enable\r\nR: Non-Data Hdr Enable\r\nQ: Dual Slot Hdr Enable\r\nP: Single Slot Hdr Enable\r\nLink Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases.\r\nNote: If Message Class is disabled, we expect opcode to also be disabled.",
+ "PublicDescription": "Matches on Receive path of a UPI Port : Response - Data, Match Opcode : Matches on Receive path of a UPI port. Match based on UMask specific bits: Z: Message Class (3-bit) Y: Message Class Enable W: Opcode (4-bit) V: Opcode Enable U: Local Enable T: Remote Enable S: Data Hdr Enable R: Non-Data Hdr Enable Q: Dual Slot Hdr Enable P: Single Slot Hdr Enable Link Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases. Note: If Message Class is disabled, we expect opcode to also be disabled.",
"UMask": "0x10c",
"Unit": "UPI"
},
@@ -13613,7 +13613,7 @@
"EventCode": "0x05",
"EventName": "UNC_UPI_RxL_BASIC_HDR_MATCH.RSP_NODATA",
"PerPkg": "1",
- "PublicDescription": "Matches on Receive path of a UPI Port : Response - No Data : Matches on Receive path of a UPI port.\r\nMatch based on UMask specific bits:\r\nZ: Message Class (3-bit)\r\nY: Message Class Enable\r\nW: Opcode (4-bit)\r\nV: Opcode Enable\r\nU: Local Enable\r\nT: Remote Enable\r\nS: Data Hdr Enable\r\nR: Non-Data Hdr Enable\r\nQ: Dual Slot Hdr Enable\r\nP: Single Slot Hdr Enable\r\nLink Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases.\r\nNote: If Message Class is disabled, we expect opcode to also be disabled.",
+ "PublicDescription": "Matches on Receive path of a UPI Port : Response - No Data : Matches on Receive path of a UPI port. Match based on UMask specific bits: Z: Message Class (3-bit) Y: Message Class Enable W: Opcode (4-bit) V: Opcode Enable U: Local Enable T: Remote Enable S: Data Hdr Enable R: Non-Data Hdr Enable Q: Dual Slot Hdr Enable P: Single Slot Hdr Enable Link Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases. Note: If Message Class is disabled, we expect opcode to also be disabled.",
"UMask": "0xa",
"Unit": "UPI"
},
@@ -13622,7 +13622,7 @@
"EventCode": "0x05",
"EventName": "UNC_UPI_RxL_BASIC_HDR_MATCH.RSP_NODATA_OPC",
"PerPkg": "1",
- "PublicDescription": "Matches on Receive path of a UPI Port : Response - No Data, Match Opcode : Matches on Receive path of a UPI port.\r\nMatch based on UMask specific bits:\r\nZ: Message Class (3-bit)\r\nY: Message Class Enable\r\nW: Opcode (4-bit)\r\nV: Opcode Enable\r\nU: Local Enable\r\nT: Remote Enable\r\nS: Data Hdr Enable\r\nR: Non-Data Hdr Enable\r\nQ: Dual Slot Hdr Enable\r\nP: Single Slot Hdr Enable\r\nLink Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases.\r\nNote: If Message Class is disabled, we expect opcode to also be disabled.",
+ "PublicDescription": "Matches on Receive path of a UPI Port : Response - No Data, Match Opcode : Matches on Receive path of a UPI port. Match based on UMask specific bits: Z: Message Class (3-bit) Y: Message Class Enable W: Opcode (4-bit) V: Opcode Enable U: Local Enable T: Remote Enable S: Data Hdr Enable R: Non-Data Hdr Enable Q: Dual Slot Hdr Enable P: Single Slot Hdr Enable Link Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases. Note: If Message Class is disabled, we expect opcode to also be disabled.",
"UMask": "0x10a",
"Unit": "UPI"
},
@@ -13631,7 +13631,7 @@
"EventCode": "0x05",
"EventName": "UNC_UPI_RxL_BASIC_HDR_MATCH.SNP",
"PerPkg": "1",
- "PublicDescription": "Matches on Receive path of a UPI Port : Snoop : Matches on Receive path of a UPI port.\r\nMatch based on UMask specific bits:\r\nZ: Message Class (3-bit)\r\nY: Message Class Enable\r\nW: Opcode (4-bit)\r\nV: Opcode Enable\r\nU: Local Enable\r\nT: Remote Enable\r\nS: Data Hdr Enable\r\nR: Non-Data Hdr Enable\r\nQ: Dual Slot Hdr Enable\r\nP: Single Slot Hdr Enable\r\nLink Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases.\r\nNote: If Message Class is disabled, we expect opcode to also be disabled.",
+ "PublicDescription": "Matches on Receive path of a UPI Port : Snoop : Matches on Receive path of a UPI port. Match based on UMask specific bits: Z: Message Class (3-bit) Y: Message Class Enable W: Opcode (4-bit) V: Opcode Enable U: Local Enable T: Remote Enable S: Data Hdr Enable R: Non-Data Hdr Enable Q: Dual Slot Hdr Enable P: Single Slot Hdr Enable Link Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases. Note: If Message Class is disabled, we expect opcode to also be disabled.",
"UMask": "0x9",
"Unit": "UPI"
},
@@ -13640,7 +13640,7 @@
"EventCode": "0x05",
"EventName": "UNC_UPI_RxL_BASIC_HDR_MATCH.SNP_OPC",
"PerPkg": "1",
- "PublicDescription": "Matches on Receive path of a UPI Port : Snoop, Match Opcode : Matches on Receive path of a UPI port.\r\nMatch based on UMask specific bits:\r\nZ: Message Class (3-bit)\r\nY: Message Class Enable\r\nW: Opcode (4-bit)\r\nV: Opcode Enable\r\nU: Local Enable\r\nT: Remote Enable\r\nS: Data Hdr Enable\r\nR: Non-Data Hdr Enable\r\nQ: Dual Slot Hdr Enable\r\nP: Single Slot Hdr Enable\r\nLink Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases.\r\nNote: If Message Class is disabled, we expect opcode to also be disabled.",
+ "PublicDescription": "Matches on Receive path of a UPI Port : Snoop, Match Opcode : Matches on Receive path of a UPI port. Match based on UMask specific bits: Z: Message Class (3-bit) Y: Message Class Enable W: Opcode (4-bit) V: Opcode Enable U: Local Enable T: Remote Enable S: Data Hdr Enable R: Non-Data Hdr Enable Q: Dual Slot Hdr Enable P: Single Slot Hdr Enable Link Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases. Note: If Message Class is disabled, we expect opcode to also be disabled.",
"UMask": "0x109",
"Unit": "UPI"
},
@@ -13649,7 +13649,7 @@
"EventCode": "0x05",
"EventName": "UNC_UPI_RxL_BASIC_HDR_MATCH.WB",
"PerPkg": "1",
- "PublicDescription": "Matches on Receive path of a UPI Port : Writeback : Matches on Receive path of a UPI port.\r\nMatch based on UMask specific bits:\r\nZ: Message Class (3-bit)\r\nY: Message Class Enable\r\nW: Opcode (4-bit)\r\nV: Opcode Enable\r\nU: Local Enable\r\nT: Remote Enable\r\nS: Data Hdr Enable\r\nR: Non-Data Hdr Enable\r\nQ: Dual Slot Hdr Enable\r\nP: Single Slot Hdr Enable\r\nLink Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases.\r\nNote: If Message Class is disabled, we expect opcode to also be disabled.",
+ "PublicDescription": "Matches on Receive path of a UPI Port : Writeback : Matches on Receive path of a UPI port. Match based on UMask specific bits: Z: Message Class (3-bit) Y: Message Class Enable W: Opcode (4-bit) V: Opcode Enable U: Local Enable T: Remote Enable S: Data Hdr Enable R: Non-Data Hdr Enable Q: Dual Slot Hdr Enable P: Single Slot Hdr Enable Link Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases. Note: If Message Class is disabled, we expect opcode to also be disabled.",
"UMask": "0xd",
"Unit": "UPI"
},
@@ -13658,7 +13658,7 @@
"EventCode": "0x05",
"EventName": "UNC_UPI_RxL_BASIC_HDR_MATCH.WB_OPC",
"PerPkg": "1",
- "PublicDescription": "Matches on Receive path of a UPI Port : Writeback, Match Opcode : Matches on Receive path of a UPI port.\r\nMatch based on UMask specific bits:\r\nZ: Message Class (3-bit)\r\nY: Message Class Enable\r\nW: Opcode (4-bit)\r\nV: Opcode Enable\r\nU: Local Enable\r\nT: Remote Enable\r\nS: Data Hdr Enable\r\nR: Non-Data Hdr Enable\r\nQ: Dual Slot Hdr Enable\r\nP: Single Slot Hdr Enable\r\nLink Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases.\r\nNote: If Message Class is disabled, we expect opcode to also be disabled.",
+ "PublicDescription": "Matches on Receive path of a UPI Port : Writeback, Match Opcode : Matches on Receive path of a UPI port. Match based on UMask specific bits: Z: Message Class (3-bit) Y: Message Class Enable W: Opcode (4-bit) V: Opcode Enable U: Local Enable T: Remote Enable S: Data Hdr Enable R: Non-Data Hdr Enable Q: Dual Slot Hdr Enable P: Single Slot Hdr Enable Link Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases. Note: If Message Class is disabled, we expect opcode to also be disabled.",
"UMask": "0x10d",
"Unit": "UPI"
},
@@ -14038,7 +14038,7 @@
"EventCode": "0x04",
"EventName": "UNC_UPI_TxL_BASIC_HDR_MATCH.NCB",
"PerPkg": "1",
- "PublicDescription": "Matches on Transmit path of a UPI Port : Non-Coherent Bypass : Matches on Transmit path of a UPI port.\r\nMatch based on UMask specific bits:\r\nZ: Message Class (3-bit)\r\nY: Message Class Enable\r\nW: Opcode (4-bit)\r\nV: Opcode Enable\r\nU: Local Enable\r\nT: Remote Enable\r\nS: Data Hdr Enable\r\nR: Non-Data Hdr Enable\r\nQ: Dual Slot Hdr Enable\r\nP: Single Slot Hdr Enable\r\nLink Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases.\r\nNote: If Message Class is disabled, we expect opcode to also be disabled.",
+ "PublicDescription": "Matches on Transmit path of a UPI Port : Non-Coherent Bypass : Matches on Transmit path of a UPI port. Match based on UMask specific bits: Z: Message Class (3-bit) Y: Message Class Enable W: Opcode (4-bit) V: Opcode Enable U: Local Enable T: Remote Enable S: Data Hdr Enable R: Non-Data Hdr Enable Q: Dual Slot Hdr Enable P: Single Slot Hdr Enable Link Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases. Note: If Message Class is disabled, we expect opcode to also be disabled.",
"UMask": "0xe",
"Unit": "UPI"
},
@@ -14047,7 +14047,7 @@
"EventCode": "0x04",
"EventName": "UNC_UPI_TxL_BASIC_HDR_MATCH.NCB_OPC",
"PerPkg": "1",
- "PublicDescription": "Matches on Transmit path of a UPI Port : Non-Coherent Bypass, Match Opcode : Matches on Transmit path of a UPI port.\r\nMatch based on UMask specific bits:\r\nZ: Message Class (3-bit)\r\nY: Message Class Enable\r\nW: Opcode (4-bit)\r\nV: Opcode Enable\r\nU: Local Enable\r\nT: Remote Enable\r\nS: Data Hdr Enable\r\nR: Non-Data Hdr Enable\r\nQ: Dual Slot Hdr Enable\r\nP: Single Slot Hdr Enable\r\nLink Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases.\r\nNote: If Message Class is disabled, we expect opcode to also be disabled.",
+ "PublicDescription": "Matches on Transmit path of a UPI Port : Non-Coherent Bypass, Match Opcode : Matches on Transmit path of a UPI port. Match based on UMask specific bits: Z: Message Class (3-bit) Y: Message Class Enable W: Opcode (4-bit) V: Opcode Enable U: Local Enable T: Remote Enable S: Data Hdr Enable R: Non-Data Hdr Enable Q: Dual Slot Hdr Enable P: Single Slot Hdr Enable Link Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases. Note: If Message Class is disabled, we expect opcode to also be disabled.",
"UMask": "0x10e",
"Unit": "UPI"
},
@@ -14056,7 +14056,7 @@
"EventCode": "0x04",
"EventName": "UNC_UPI_TxL_BASIC_HDR_MATCH.NCS",
"PerPkg": "1",
- "PublicDescription": "Matches on Transmit path of a UPI Port : Non-Coherent Standard : Matches on Transmit path of a UPI port.\r\nMatch based on UMask specific bits:\r\nZ: Message Class (3-bit)\r\nY: Message Class Enable\r\nW: Opcode (4-bit)\r\nV: Opcode Enable\r\nU: Local Enable\r\nT: Remote Enable\r\nS: Data Hdr Enable\r\nR: Non-Data Hdr Enable\r\nQ: Dual Slot Hdr Enable\r\nP: Single Slot Hdr Enable\r\nLink Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases.\r\nNote: If Message Class is disabled, we expect opcode to also be disabled.",
+ "PublicDescription": "Matches on Transmit path of a UPI Port : Non-Coherent Standard : Matches on Transmit path of a UPI port. Match based on UMask specific bits: Z: Message Class (3-bit) Y: Message Class Enable W: Opcode (4-bit) V: Opcode Enable U: Local Enable T: Remote Enable S: Data Hdr Enable R: Non-Data Hdr Enable Q: Dual Slot Hdr Enable P: Single Slot Hdr Enable Link Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases. Note: If Message Class is disabled, we expect opcode to also be disabled.",
"UMask": "0xf",
"Unit": "UPI"
},
@@ -14065,7 +14065,7 @@
"EventCode": "0x04",
"EventName": "UNC_UPI_TxL_BASIC_HDR_MATCH.NCS_OPC",
"PerPkg": "1",
- "PublicDescription": "Matches on Transmit path of a UPI Port : Non-Coherent Standard, Match Opcode : Matches on Transmit path of a UPI port.\r\nMatch based on UMask specific bits:\r\nZ: Message Class (3-bit)\r\nY: Message Class Enable\r\nW: Opcode (4-bit)\r\nV: Opcode Enable\r\nU: Local Enable\r\nT: Remote Enable\r\nS: Data Hdr Enable\r\nR: Non-Data Hdr Enable\r\nQ: Dual Slot Hdr Enable\r\nP: Single Slot Hdr Enable\r\nLink Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases.\r\nNote: If Message Class is disabled, we expect opcode to also be disabled.",
+ "PublicDescription": "Matches on Transmit path of a UPI Port : Non-Coherent Standard, Match Opcode : Matches on Transmit path of a UPI port. Match based on UMask specific bits: Z: Message Class (3-bit) Y: Message Class Enable W: Opcode (4-bit) V: Opcode Enable U: Local Enable T: Remote Enable S: Data Hdr Enable R: Non-Data Hdr Enable Q: Dual Slot Hdr Enable P: Single Slot Hdr Enable Link Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases. Note: If Message Class is disabled, we expect opcode to also be disabled.",
"UMask": "0x10f",
"Unit": "UPI"
},
@@ -14074,7 +14074,7 @@
"EventCode": "0x04",
"EventName": "UNC_UPI_TxL_BASIC_HDR_MATCH.REQ",
"PerPkg": "1",
- "PublicDescription": "Matches on Transmit path of a UPI Port : Request : Matches on Transmit path of a UPI port.\r\nMatch based on UMask specific bits:\r\nZ: Message Class (3-bit)\r\nY: Message Class Enable\r\nW: Opcode (4-bit)\r\nV: Opcode Enable\r\nU: Local Enable\r\nT: Remote Enable\r\nS: Data Hdr Enable\r\nR: Non-Data Hdr Enable\r\nQ: Dual Slot Hdr Enable\r\nP: Single Slot Hdr Enable\r\nLink Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases.\r\nNote: If Message Class is disabled, we expect opcode to also be disabled.",
+ "PublicDescription": "Matches on Transmit path of a UPI Port : Request : Matches on Transmit path of a UPI port. Match based on UMask specific bits: Z: Message Class (3-bit) Y: Message Class Enable W: Opcode (4-bit) V: Opcode Enable U: Local Enable T: Remote Enable S: Data Hdr Enable R: Non-Data Hdr Enable Q: Dual Slot Hdr Enable P: Single Slot Hdr Enable Link Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases. Note: If Message Class is disabled, we expect opcode to also be disabled.",
"UMask": "0x8",
"Unit": "UPI"
},
@@ -14083,7 +14083,7 @@
"EventCode": "0x04",
"EventName": "UNC_UPI_TxL_BASIC_HDR_MATCH.REQ_OPC",
"PerPkg": "1",
- "PublicDescription": "Matches on Transmit path of a UPI Port : Request, Match Opcode : Matches on Transmit path of a UPI port.\r\nMatch based on UMask specific bits:\r\nZ: Message Class (3-bit)\r\nY: Message Class Enable\r\nW: Opcode (4-bit)\r\nV: Opcode Enable\r\nU: Local Enable\r\nT: Remote Enable\r\nS: Data Hdr Enable\r\nR: Non-Data Hdr Enable\r\nQ: Dual Slot Hdr Enable\r\nP: Single Slot Hdr Enable\r\nLink Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases.\r\nNote: If Message Class is disabled, we expect opcode to also be disabled.",
+ "PublicDescription": "Matches on Transmit path of a UPI Port : Request, Match Opcode : Matches on Transmit path of a UPI port. Match based on UMask specific bits: Z: Message Class (3-bit) Y: Message Class Enable W: Opcode (4-bit) V: Opcode Enable U: Local Enable T: Remote Enable S: Data Hdr Enable R: Non-Data Hdr Enable Q: Dual Slot Hdr Enable P: Single Slot Hdr Enable Link Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases. Note: If Message Class is disabled, we expect opcode to also be disabled.",
"UMask": "0x108",
"Unit": "UPI"
},
@@ -14092,7 +14092,7 @@
"EventCode": "0x04",
"EventName": "UNC_UPI_TxL_BASIC_HDR_MATCH.RSPCNFLT",
"PerPkg": "1",
- "PublicDescription": "Matches on Transmit path of a UPI Port : Response - Conflict : Matches on Transmit path of a UPI port.\r\nMatch based on UMask specific bits:\r\nZ: Message Class (3-bit)\r\nY: Message Class Enable\r\nW: Opcode (4-bit)\r\nV: Opcode Enable\r\nU: Local Enable\r\nT: Remote Enable\r\nS: Data Hdr Enable\r\nR: Non-Data Hdr Enable\r\nQ: Dual Slot Hdr Enable\r\nP: Single Slot Hdr Enable\r\nLink Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases.\r\nNote: If Message Class is disabled, we expect opcode to also be disabled.",
+ "PublicDescription": "Matches on Transmit path of a UPI Port : Response - Conflict : Matches on Transmit path of a UPI port. Match based on UMask specific bits: Z: Message Class (3-bit) Y: Message Class Enable W: Opcode (4-bit) V: Opcode Enable U: Local Enable T: Remote Enable S: Data Hdr Enable R: Non-Data Hdr Enable Q: Dual Slot Hdr Enable P: Single Slot Hdr Enable Link Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases. Note: If Message Class is disabled, we expect opcode to also be disabled.",
"UMask": "0x1aa",
"Unit": "UPI"
},
@@ -14101,7 +14101,7 @@
"EventCode": "0x04",
"EventName": "UNC_UPI_TxL_BASIC_HDR_MATCH.RSPI",
"PerPkg": "1",
- "PublicDescription": "Matches on Transmit path of a UPI Port : Response - Invalid : Matches on Transmit path of a UPI port.\r\nMatch based on UMask specific bits:\r\nZ: Message Class (3-bit)\r\nY: Message Class Enable\r\nW: Opcode (4-bit)\r\nV: Opcode Enable\r\nU: Local Enable\r\nT: Remote Enable\r\nS: Data Hdr Enable\r\nR: Non-Data Hdr Enable\r\nQ: Dual Slot Hdr Enable\r\nP: Single Slot Hdr Enable\r\nLink Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases.\r\nNote: If Message Class is disabled, we expect opcode to also be disabled.",
+ "PublicDescription": "Matches on Transmit path of a UPI Port : Response - Invalid : Matches on Transmit path of a UPI port. Match based on UMask specific bits: Z: Message Class (3-bit) Y: Message Class Enable W: Opcode (4-bit) V: Opcode Enable U: Local Enable T: Remote Enable S: Data Hdr Enable R: Non-Data Hdr Enable Q: Dual Slot Hdr Enable P: Single Slot Hdr Enable Link Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases. Note: If Message Class is disabled, we expect opcode to also be disabled.",
"UMask": "0x12a",
"Unit": "UPI"
},
@@ -14110,7 +14110,7 @@
"EventCode": "0x04",
"EventName": "UNC_UPI_TxL_BASIC_HDR_MATCH.RSP_DATA",
"PerPkg": "1",
- "PublicDescription": "Matches on Transmit path of a UPI Port : Response - Data : Matches on Transmit path of a UPI port.\r\nMatch based on UMask specific bits:\r\nZ: Message Class (3-bit)\r\nY: Message Class Enable\r\nW: Opcode (4-bit)\r\nV: Opcode Enable\r\nU: Local Enable\r\nT: Remote Enable\r\nS: Data Hdr Enable\r\nR: Non-Data Hdr Enable\r\nQ: Dual Slot Hdr Enable\r\nP: Single Slot Hdr Enable\r\nLink Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases.\r\nNote: If Message Class is disabled, we expect opcode to also be disabled.",
+ "PublicDescription": "Matches on Transmit path of a UPI Port : Response - Data : Matches on Transmit path of a UPI port. Match based on UMask specific bits: Z: Message Class (3-bit) Y: Message Class Enable W: Opcode (4-bit) V: Opcode Enable U: Local Enable T: Remote Enable S: Data Hdr Enable R: Non-Data Hdr Enable Q: Dual Slot Hdr Enable P: Single Slot Hdr Enable Link Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases. Note: If Message Class is disabled, we expect opcode to also be disabled.",
"UMask": "0xc",
"Unit": "UPI"
},
@@ -14119,7 +14119,7 @@
"EventCode": "0x04",
"EventName": "UNC_UPI_TxL_BASIC_HDR_MATCH.RSP_DATA_OPC",
"PerPkg": "1",
- "PublicDescription": "Matches on Transmit path of a UPI Port : Response - Data, Match Opcode : Matches on Transmit path of a UPI port.\r\nMatch based on UMask specific bits:\r\nZ: Message Class (3-bit)\r\nY: Message Class Enable\r\nW: Opcode (4-bit)\r\nV: Opcode Enable\r\nU: Local Enable\r\nT: Remote Enable\r\nS: Data Hdr Enable\r\nR: Non-Data Hdr Enable\r\nQ: Dual Slot Hdr Enable\r\nP: Single Slot Hdr Enable\r\nLink Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases.\r\nNote: If Message Class is disabled, we expect opcode to also be disabled.",
+ "PublicDescription": "Matches on Transmit path of a UPI Port : Response - Data, Match Opcode : Matches on Transmit path of a UPI port. Match based on UMask specific bits: Z: Message Class (3-bit) Y: Message Class Enable W: Opcode (4-bit) V: Opcode Enable U: Local Enable T: Remote Enable S: Data Hdr Enable R: Non-Data Hdr Enable Q: Dual Slot Hdr Enable P: Single Slot Hdr Enable Link Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases. Note: If Message Class is disabled, we expect opcode to also be disabled.",
"UMask": "0x10c",
"Unit": "UPI"
},
@@ -14128,7 +14128,7 @@
"EventCode": "0x04",
"EventName": "UNC_UPI_TxL_BASIC_HDR_MATCH.RSP_NODATA",
"PerPkg": "1",
- "PublicDescription": "Matches on Transmit path of a UPI Port : Response - No Data : Matches on Transmit path of a UPI port.\r\nMatch based on UMask specific bits:\r\nZ: Message Class (3-bit)\r\nY: Message Class Enable\r\nW: Opcode (4-bit)\r\nV: Opcode Enable\r\nU: Local Enable\r\nT: Remote Enable\r\nS: Data Hdr Enable\r\nR: Non-Data Hdr Enable\r\nQ: Dual Slot Hdr Enable\r\nP: Single Slot Hdr Enable\r\nLink Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases.\r\nNote: If Message Class is disabled, we expect opcode to also be disabled.",
+ "PublicDescription": "Matches on Transmit path of a UPI Port : Response - No Data : Matches on Transmit path of a UPI port. Match based on UMask specific bits: Z: Message Class (3-bit) Y: Message Class Enable W: Opcode (4-bit) V: Opcode Enable U: Local Enable T: Remote Enable S: Data Hdr Enable R: Non-Data Hdr Enable Q: Dual Slot Hdr Enable P: Single Slot Hdr Enable Link Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases. Note: If Message Class is disabled, we expect opcode to also be disabled.",
"UMask": "0xa",
"Unit": "UPI"
},
@@ -14137,7 +14137,7 @@
"EventCode": "0x04",
"EventName": "UNC_UPI_TxL_BASIC_HDR_MATCH.RSP_NODATA_OPC",
"PerPkg": "1",
- "PublicDescription": "Matches on Transmit path of a UPI Port : Response - No Data, Match Opcode : Matches on Transmit path of a UPI port.\r\nMatch based on UMask specific bits:\r\nZ: Message Class (3-bit)\r\nY: Message Class Enable\r\nW: Opcode (4-bit)\r\nV: Opcode Enable\r\nU: Local Enable\r\nT: Remote Enable\r\nS: Data Hdr Enable\r\nR: Non-Data Hdr Enable\r\nQ: Dual Slot Hdr Enable\r\nP: Single Slot Hdr Enable\r\nLink Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases.\r\nNote: If Message Class is disabled, we expect opcode to also be disabled.",
+ "PublicDescription": "Matches on Transmit path of a UPI Port : Response - No Data, Match Opcode : Matches on Transmit path of a UPI port. Match based on UMask specific bits: Z: Message Class (3-bit) Y: Message Class Enable W: Opcode (4-bit) V: Opcode Enable U: Local Enable T: Remote Enable S: Data Hdr Enable R: Non-Data Hdr Enable Q: Dual Slot Hdr Enable P: Single Slot Hdr Enable Link Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases. Note: If Message Class is disabled, we expect opcode to also be disabled.",
"UMask": "0x10a",
"Unit": "UPI"
},
@@ -14146,7 +14146,7 @@
"EventCode": "0x04",
"EventName": "UNC_UPI_TxL_BASIC_HDR_MATCH.SNP",
"PerPkg": "1",
- "PublicDescription": "Matches on Transmit path of a UPI Port : Snoop : Matches on Transmit path of a UPI port.\r\nMatch based on UMask specific bits:\r\nZ: Message Class (3-bit)\r\nY: Message Class Enable\r\nW: Opcode (4-bit)\r\nV: Opcode Enable\r\nU: Local Enable\r\nT: Remote Enable\r\nS: Data Hdr Enable\r\nR: Non-Data Hdr Enable\r\nQ: Dual Slot Hdr Enable\r\nP: Single Slot Hdr Enable\r\nLink Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases.\r\nNote: If Message Class is disabled, we expect opcode to also be disabled.",
+ "PublicDescription": "Matches on Transmit path of a UPI Port : Snoop : Matches on Transmit path of a UPI port. Match based on UMask specific bits: Z: Message Class (3-bit) Y: Message Class Enable W: Opcode (4-bit) V: Opcode Enable U: Local Enable T: Remote Enable S: Data Hdr Enable R: Non-Data Hdr Enable Q: Dual Slot Hdr Enable P: Single Slot Hdr Enable Link Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases. Note: If Message Class is disabled, we expect opcode to also be disabled.",
"UMask": "0x9",
"Unit": "UPI"
},
@@ -14155,7 +14155,7 @@
"EventCode": "0x04",
"EventName": "UNC_UPI_TxL_BASIC_HDR_MATCH.SNP_OPC",
"PerPkg": "1",
- "PublicDescription": "Matches on Transmit path of a UPI Port : Snoop, Match Opcode : Matches on Transmit path of a UPI port.\r\nMatch based on UMask specific bits:\r\nZ: Message Class (3-bit)\r\nY: Message Class Enable\r\nW: Opcode (4-bit)\r\nV: Opcode Enable\r\nU: Local Enable\r\nT: Remote Enable\r\nS: Data Hdr Enable\r\nR: Non-Data Hdr Enable\r\nQ: Dual Slot Hdr Enable\r\nP: Single Slot Hdr Enable\r\nLink Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases.\r\nNote: If Message Class is disabled, we expect opcode to also be disabled.",
+ "PublicDescription": "Matches on Transmit path of a UPI Port : Snoop, Match Opcode : Matches on Transmit path of a UPI port. Match based on UMask specific bits: Z: Message Class (3-bit) Y: Message Class Enable W: Opcode (4-bit) V: Opcode Enable U: Local Enable T: Remote Enable S: Data Hdr Enable R: Non-Data Hdr Enable Q: Dual Slot Hdr Enable P: Single Slot Hdr Enable Link Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases. Note: If Message Class is disabled, we expect opcode to also be disabled.",
"UMask": "0x109",
"Unit": "UPI"
},
@@ -14164,7 +14164,7 @@
"EventCode": "0x04",
"EventName": "UNC_UPI_TxL_BASIC_HDR_MATCH.WB",
"PerPkg": "1",
- "PublicDescription": "Matches on Transmit path of a UPI Port : Writeback : Matches on Transmit path of a UPI port.\r\nMatch based on UMask specific bits:\r\nZ: Message Class (3-bit)\r\nY: Message Class Enable\r\nW: Opcode (4-bit)\r\nV: Opcode Enable\r\nU: Local Enable\r\nT: Remote Enable\r\nS: Data Hdr Enable\r\nR: Non-Data Hdr Enable\r\nQ: Dual Slot Hdr Enable\r\nP: Single Slot Hdr Enable\r\nLink Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases.\r\nNote: If Message Class is disabled, we expect opcode to also be disabled.",
+ "PublicDescription": "Matches on Transmit path of a UPI Port : Writeback : Matches on Transmit path of a UPI port. Match based on UMask specific bits: Z: Message Class (3-bit) Y: Message Class Enable W: Opcode (4-bit) V: Opcode Enable U: Local Enable T: Remote Enable S: Data Hdr Enable R: Non-Data Hdr Enable Q: Dual Slot Hdr Enable P: Single Slot Hdr Enable Link Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases. Note: If Message Class is disabled, we expect opcode to also be disabled.",
"UMask": "0xd",
"Unit": "UPI"
},
@@ -14173,7 +14173,7 @@
"EventCode": "0x04",
"EventName": "UNC_UPI_TxL_BASIC_HDR_MATCH.WB_OPC",
"PerPkg": "1",
- "PublicDescription": "Matches on Transmit path of a UPI Port : Writeback, Match Opcode : Matches on Transmit path of a UPI port.\r\nMatch based on UMask specific bits:\r\nZ: Message Class (3-bit)\r\nY: Message Class Enable\r\nW: Opcode (4-bit)\r\nV: Opcode Enable\r\nU: Local Enable\r\nT: Remote Enable\r\nS: Data Hdr Enable\r\nR: Non-Data Hdr Enable\r\nQ: Dual Slot Hdr Enable\r\nP: Single Slot Hdr Enable\r\nLink Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases.\r\nNote: If Message Class is disabled, we expect opcode to also be disabled.",
+ "PublicDescription": "Matches on Transmit path of a UPI Port : Writeback, Match Opcode : Matches on Transmit path of a UPI port. Match based on UMask specific bits: Z: Message Class (3-bit) Y: Message Class Enable W: Opcode (4-bit) V: Opcode Enable U: Local Enable T: Remote Enable S: Data Hdr Enable R: Non-Data Hdr Enable Q: Dual Slot Hdr Enable P: Single Slot Hdr Enable Link Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases. Note: If Message Class is disabled, we expect opcode to also be disabled.",
"UMask": "0x10d",
"Unit": "UPI"
},
diff --git a/tools/perf/pmu-events/arch/x86/icelakex/uncore-io.json b/tools/perf/pmu-events/arch/x86/icelakex/uncore-io.json
index 9cef8862c4283c..1b8a719b81a593 100644
--- a/tools/perf/pmu-events/arch/x86/icelakex/uncore-io.json
+++ b/tools/perf/pmu-events/arch/x86/icelakex/uncore-io.json
@@ -2477,17 +2477,6 @@
"Unit": "IIO"
},
{
- "BriefDescription": "Number requests sent to PCIe from main die : From IRP",
- "EventCode": "0xC2",
- "EventName": "UNC_IIO_NUM_REQ_FROM_CPU.IRP",
- "FCMask": "0x07",
- "PerPkg": "1",
- "PortMask": "0xFF",
- "PublicDescription": "Number requests sent to PCIe from main die : From IRP : Captures Posted/Non-posted allocations from IRP. i.e. either non-confined P2P traffic or from the CPU",
- "UMask": "0x1",
- "Unit": "IIO"
- },
- {
"BriefDescription": "Number requests sent to PCIe from main die : From ITC",
"EventCode": "0xC2",
"EventName": "UNC_IIO_NUM_REQ_FROM_CPU.ITC",
diff --git a/tools/perf/pmu-events/arch/x86/lunarlake/cache.json b/tools/perf/pmu-events/arch/x86/lunarlake/cache.json
index 1823149067b5b4..fb48be357c4e7c 100644
--- a/tools/perf/pmu-events/arch/x86/lunarlake/cache.json
+++ b/tools/perf/pmu-events/arch/x86/lunarlake/cache.json
@@ -31,7 +31,7 @@
"EventCode": "0x2e",
"EventName": "LONGEST_LAT_CACHE.REFERENCE",
"PublicDescription": "Counts the number of cacheable memory requests that access the Last Level Cache (LLC). Requests include demand loads, reads for ownership (RFO), instruction fetches and L1 HW prefetches. If the platform has an L3 cache, the LLC is the L3 cache, otherwise it is the L2 cache. Counts on a per core basis.",
- "SampleAfterValue": "1000003",
+ "SampleAfterValue": "200003",
"UMask": "0x4f",
"Unit": "cpu_atom"
},
@@ -94,7 +94,7 @@
"MSRIndex": "0x3F6",
"MSRValue": "0x400",
"PEBS": "2",
- "SampleAfterValue": "1000003",
+ "SampleAfterValue": "200003",
"UMask": "0x5",
"Unit": "cpu_atom"
},
@@ -106,7 +106,7 @@
"MSRIndex": "0x3F6",
"MSRValue": "0x80",
"PEBS": "2",
- "SampleAfterValue": "1000003",
+ "SampleAfterValue": "200003",
"UMask": "0x5",
"Unit": "cpu_atom"
},
@@ -118,7 +118,7 @@
"MSRIndex": "0x3F6",
"MSRValue": "0x10",
"PEBS": "2",
- "SampleAfterValue": "1000003",
+ "SampleAfterValue": "200003",
"UMask": "0x5",
"Unit": "cpu_atom"
},
@@ -130,7 +130,7 @@
"MSRIndex": "0x3F6",
"MSRValue": "0x800",
"PEBS": "2",
- "SampleAfterValue": "1000003",
+ "SampleAfterValue": "200003",
"UMask": "0x5",
"Unit": "cpu_atom"
},
@@ -142,7 +142,7 @@
"MSRIndex": "0x3F6",
"MSRValue": "0x100",
"PEBS": "2",
- "SampleAfterValue": "1000003",
+ "SampleAfterValue": "200003",
"UMask": "0x5",
"Unit": "cpu_atom"
},
@@ -154,7 +154,7 @@
"MSRIndex": "0x3F6",
"MSRValue": "0x20",
"PEBS": "2",
- "SampleAfterValue": "1000003",
+ "SampleAfterValue": "200003",
"UMask": "0x5",
"Unit": "cpu_atom"
},
@@ -166,7 +166,7 @@
"MSRIndex": "0x3F6",
"MSRValue": "0x4",
"PEBS": "2",
- "SampleAfterValue": "1000003",
+ "SampleAfterValue": "200003",
"UMask": "0x5",
"Unit": "cpu_atom"
},
@@ -178,7 +178,7 @@
"MSRIndex": "0x3F6",
"MSRValue": "0x200",
"PEBS": "2",
- "SampleAfterValue": "1000003",
+ "SampleAfterValue": "200003",
"UMask": "0x5",
"Unit": "cpu_atom"
},
@@ -190,7 +190,7 @@
"MSRIndex": "0x3F6",
"MSRValue": "0x40",
"PEBS": "2",
- "SampleAfterValue": "1000003",
+ "SampleAfterValue": "200003",
"UMask": "0x5",
"Unit": "cpu_atom"
},
@@ -202,7 +202,7 @@
"MSRIndex": "0x3F6",
"MSRValue": "0x8",
"PEBS": "2",
- "SampleAfterValue": "1000003",
+ "SampleAfterValue": "200003",
"UMask": "0x5",
"Unit": "cpu_atom"
},
@@ -212,7 +212,7 @@
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.STORE_LATENCY",
"PEBS": "2",
- "SampleAfterValue": "1000003",
+ "SampleAfterValue": "200003",
"UMask": "0x6",
"Unit": "cpu_atom"
}
diff --git a/tools/perf/pmu-events/arch/x86/lunarlake/frontend.json b/tools/perf/pmu-events/arch/x86/lunarlake/frontend.json
index 5e4ef81b43d6fa..3a24934e8d6e9c 100644
--- a/tools/perf/pmu-events/arch/x86/lunarlake/frontend.json
+++ b/tools/perf/pmu-events/arch/x86/lunarlake/frontend.json
@@ -19,7 +19,7 @@
"BriefDescription": "This event counts a subset of the Topdown Slots event that were no operation was delivered to the back-end pipeline due to instruction fetch limitations when the back-end could have accepted more operations. Common examples include instruction cache misses or x86 instruction decode limitations.",
"EventCode": "0x9c",
"EventName": "IDQ_BUBBLES.CORE",
- "PublicDescription": "This event counts a subset of the Topdown Slots event that were no operation was delivered to the back-end pipeline due to instruction fetch limitations when the back-end could have accepted more operations. Common examples include instruction cache misses or x86 instruction decode limitations.\nSoftware can use this event as the numerator for the Frontend Bound metric (or top-level category) of the Top-down Microarchitecture Analysis method.",
+ "PublicDescription": "This event counts a subset of the Topdown Slots event that were no operation was delivered to the back-end pipeline due to instruction fetch limitations when the back-end could have accepted more operations. Common examples include instruction cache misses or x86 instruction decode limitations. Software can use this event as the numerator for the Frontend Bound metric (or top-level category) of the Top-down Microarchitecture Analysis method.",
"SampleAfterValue": "1000003",
"UMask": "0x1",
"Unit": "cpu_core"
diff --git a/tools/perf/pmu-events/arch/x86/lunarlake/memory.json b/tools/perf/pmu-events/arch/x86/lunarlake/memory.json
index 51d70ba00bd495..9c188d80b7b90b 100644
--- a/tools/perf/pmu-events/arch/x86/lunarlake/memory.json
+++ b/tools/perf/pmu-events/arch/x86/lunarlake/memory.json
@@ -155,7 +155,7 @@
"EventCode": "0x2A,0x2B",
"EventName": "OCR.DEMAND_DATA_RD.L3_MISS",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x3FBFC00001",
+ "MSRValue": "0xFE7F8000001",
"SampleAfterValue": "100003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -175,7 +175,7 @@
"EventCode": "0x2A,0x2B",
"EventName": "OCR.DEMAND_RFO.L3_MISS",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x3FBFC00002",
+ "MSRValue": "0xFE7F8000002",
"SampleAfterValue": "100003",
"UMask": "0x1",
"Unit": "cpu_core"
diff --git a/tools/perf/pmu-events/arch/x86/lunarlake/other.json b/tools/perf/pmu-events/arch/x86/lunarlake/other.json
index 69adaed5686dd1..377f717db6cc10 100644
--- a/tools/perf/pmu-events/arch/x86/lunarlake/other.json
+++ b/tools/perf/pmu-events/arch/x86/lunarlake/other.json
@@ -24,7 +24,7 @@
"EventCode": "0xB7",
"EventName": "OCR.DEMAND_DATA_RD.DRAM",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x184000001",
+ "MSRValue": "0x1FBC000001",
"SampleAfterValue": "100003",
"UMask": "0x1",
"Unit": "cpu_atom"
@@ -34,7 +34,7 @@
"EventCode": "0x2A,0x2B",
"EventName": "OCR.DEMAND_DATA_RD.DRAM",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x184000001",
+ "MSRValue": "0x1E780000001",
"SampleAfterValue": "100003",
"UMask": "0x1",
"Unit": "cpu_core"
diff --git a/tools/perf/pmu-events/arch/x86/lunarlake/pipeline.json b/tools/perf/pmu-events/arch/x86/lunarlake/pipeline.json
index 2bde664fdc0f8b..2c9f85ec8c4ad8 100644
--- a/tools/perf/pmu-events/arch/x86/lunarlake/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/lunarlake/pipeline.json
@@ -38,11 +38,19 @@
{
"BriefDescription": "Fixed Counter: Counts the number of unhalted core clock cycles",
"EventName": "CPU_CLK_UNHALTED.CORE",
- "SampleAfterValue": "1000003",
+ "SampleAfterValue": "2000003",
"UMask": "0x2",
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "Core cycles when the core is not in a halt state.",
+ "EventName": "CPU_CLK_UNHALTED.CORE",
+ "PublicDescription": "Counts the number of core cycles while the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the programmable counters available for other events.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x2",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Counts the number of unhalted core clock cycles [This event is alias to CPU_CLK_UNHALTED.THREAD_P]",
"EventCode": "0x3c",
"EventName": "CPU_CLK_UNHALTED.CORE_P",
@@ -50,9 +58,17 @@
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "Thread cycles when thread is not in halt state [This event is alias to CPU_CLK_UNHALTED.THREAD_P]",
+ "EventCode": "0x3c",
+ "EventName": "CPU_CLK_UNHALTED.CORE_P",
+ "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time. [This event is alias to CPU_CLK_UNHALTED.THREAD_P]",
+ "SampleAfterValue": "2000003",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Fixed Counter: Counts the number of unhalted reference clock cycles",
"EventName": "CPU_CLK_UNHALTED.REF_TSC",
- "SampleAfterValue": "1000003",
+ "SampleAfterValue": "2000003",
"UMask": "0x3",
"Unit": "cpu_atom"
},
@@ -65,6 +81,15 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts the number of unhalted reference clock cycles",
+ "EventCode": "0x3c",
+ "EventName": "CPU_CLK_UNHALTED.REF_TSC_P",
+ "PublicDescription": "Counts the number of reference cycles that the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. This event is not affected by core frequency changes and increments at a fixed frequency that is also used for the Time Stamp Counter (TSC). This event uses a programmable general purpose performance counter.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Reference cycles when the core is not in halt state.",
"EventCode": "0x3c",
"EventName": "CPU_CLK_UNHALTED.REF_TSC_P",
@@ -74,9 +99,16 @@
"Unit": "cpu_core"
},
{
- "BriefDescription": "Core cycles when the thread is not in halt state",
+ "BriefDescription": "Fixed Counter: Counts the number of unhalted core clock cycles",
+ "EventName": "CPU_CLK_UNHALTED.THREAD",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x2",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Core cycles when the thread is not in a halt state.",
"EventName": "CPU_CLK_UNHALTED.THREAD",
- "PublicDescription": "Counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the eight programmable counters available for other events.",
+ "PublicDescription": "Counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the programmable counters available for other events.",
"SampleAfterValue": "2000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -89,10 +121,10 @@
"Unit": "cpu_atom"
},
{
- "BriefDescription": "Thread cycles when thread is not in halt state",
+ "BriefDescription": "Thread cycles when thread is not in halt state [This event is alias to CPU_CLK_UNHALTED.CORE_P]",
"EventCode": "0x3c",
"EventName": "CPU_CLK_UNHALTED.THREAD_P",
- "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time.",
+ "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time. [This event is alias to CPU_CLK_UNHALTED.CORE_P]",
"SampleAfterValue": "2000003",
"Unit": "cpu_core"
},
@@ -100,7 +132,7 @@
"BriefDescription": "Fixed Counter: Counts the number of instructions retired",
"EventName": "INST_RETIRED.ANY",
"PEBS": "1",
- "SampleAfterValue": "1000003",
+ "SampleAfterValue": "2000003",
"UMask": "0x1",
"Unit": "cpu_atom"
},
@@ -149,10 +181,28 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts the number of LBR entries recorded. Requires LBRs to be enabled in IA32_LBR_CTL.",
+ "EventCode": "0xe4",
+ "EventName": "MISC_RETIRED.LBR_INSERTS",
+ "PEBS": "1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "LBR record is inserted",
+ "EventCode": "0xe4",
+ "EventName": "MISC_RETIRED.LBR_INSERTS",
+ "PEBS": "1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "This event counts a subset of the Topdown Slots event that were not consumed by the back-end pipeline due to lack of back-end resources, as a result of memory subsystem delays, execution units limitations, or other conditions.",
"EventCode": "0xa4",
"EventName": "TOPDOWN.BACKEND_BOUND_SLOTS",
- "PublicDescription": "This event counts a subset of the Topdown Slots event that were not consumed by the back-end pipeline due to lack of back-end resources, as a result of memory subsystem delays, execution units limitations, or other conditions.\nSoftware can use this event as the numerator for the Backend Bound metric (or top-level category) of the Top-down Microarchitecture Analysis method.",
+ "PublicDescription": "This event counts a subset of the Topdown Slots event that were not consumed by the back-end pipeline due to lack of back-end resources, as a result of memory subsystem delays, execution units limitations, or other conditions. Software can use this event as the numerator for the Backend Bound metric (or top-level category) of the Top-down Microarchitecture Analysis method.",
"SampleAfterValue": "10000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -175,15 +225,21 @@
"Unit": "cpu_core"
},
{
- "BriefDescription": "Fixed Counter: Counts the number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear.",
+ "BriefDescription": "Counts the number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear. [This event is alias to TOPDOWN_BAD_SPECULATION.ALL_P]",
+ "EventCode": "0x73",
"EventName": "TOPDOWN_BAD_SPECULATION.ALL",
- "PublicDescription": "Fixed Counter: Counts the number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear. Counts all issue slots blocked during this recovery window including relevant microcode flows and while uops are not yet available in the IQ. Also, includes the issue slots that were consumed by the backend but were thrown away because they were younger than the mispredict or machine clear.",
"SampleAfterValue": "1000003",
- "UMask": "0x5",
"Unit": "cpu_atom"
},
{
- "BriefDescription": "Counts the number of retirement slots not consumed due to backend stalls",
+ "BriefDescription": "Counts the number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear. [This event is alias to TOPDOWN_BAD_SPECULATION.ALL]",
+ "EventCode": "0x73",
+ "EventName": "TOPDOWN_BAD_SPECULATION.ALL_P",
+ "SampleAfterValue": "1000003",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of retirement slots not consumed due to backend stalls [This event is alias to TOPDOWN_BE_BOUND.ALL_P]",
"EventCode": "0xa4",
"EventName": "TOPDOWN_BE_BOUND.ALL",
"SampleAfterValue": "1000003",
@@ -191,6 +247,14 @@
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "Counts the number of retirement slots not consumed due to backend stalls [This event is alias to TOPDOWN_BE_BOUND.ALL]",
+ "EventCode": "0xa4",
+ "EventName": "TOPDOWN_BE_BOUND.ALL_P",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Fixed Counter: Counts the number of retirement slots not consumed due to front end stalls",
"EventName": "TOPDOWN_FE_BOUND.ALL",
"SampleAfterValue": "1000003",
@@ -198,7 +262,15 @@
"Unit": "cpu_atom"
},
{
- "BriefDescription": "Fixed Counter: Counts the number of consumed retirement slots. Similar to UOPS_RETIRED.ALL",
+ "BriefDescription": "Counts the number of retirement slots not consumed due to front end stalls",
+ "EventCode": "0x9c",
+ "EventName": "TOPDOWN_FE_BOUND.ALL_P",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Fixed Counter: Counts the number of consumed retirement slots.",
"EventName": "TOPDOWN_RETIRING.ALL",
"PEBS": "1",
"SampleAfterValue": "1000003",
@@ -206,10 +278,19 @@
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "Counts the number of consumed retirement slots.",
+ "EventCode": "0xc2",
+ "EventName": "TOPDOWN_RETIRING.ALL_P",
+ "PEBS": "1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "This event counts a subset of the Topdown Slots event that are utilized by operations that eventually get retired (committed) by the processor pipeline. Usually, this event positively correlates with higher performance for example, as measured by the instructions-per-cycle metric.",
"EventCode": "0xc2",
"EventName": "UOPS_RETIRED.SLOTS",
- "PublicDescription": "This event counts a subset of the Topdown Slots event that are utilized by operations that eventually get retired (committed) by the processor pipeline. Usually, this event positively correlates with higher performance for example, as measured by the instructions-per-cycle metric.\nSoftware can use this event as the numerator for the Retiring metric (or top-level category) of the Top-down Microarchitecture Analysis method.",
+ "PublicDescription": "This event counts a subset of the Topdown Slots event that are utilized by operations that eventually get retired (committed) by the processor pipeline. Usually, this event positively correlates with higher performance for example, as measured by the instructions-per-cycle metric. Software can use this event as the numerator for the Retiring metric (or top-level category) of the Top-down Microarchitecture Analysis method.",
"SampleAfterValue": "2000003",
"UMask": "0x2",
"Unit": "cpu_core"
diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv
index 5297d25f4e0333..c372e3594a69a4 100644
--- a/tools/perf/pmu-events/arch/x86/mapfile.csv
+++ b/tools/perf/pmu-events/arch/x86/mapfile.csv
@@ -5,33 +5,33 @@ GenuineIntel-6-(1C|26|27|35|36),v5,bonnell,core
GenuineIntel-6-(3D|47),v29,broadwell,core
GenuineIntel-6-56,v11,broadwellde,core
GenuineIntel-6-4F,v22,broadwellx,core
-GenuineIntel-6-55-[56789ABCDEF],v1.20,cascadelakex,core
+GenuineIntel-6-55-[56789ABCDEF],v1.21,cascadelakex,core
GenuineIntel-6-9[6C],v1.04,elkhartlake,core
-GenuineIntel-6-CF,v1.03,emeraldrapids,core
+GenuineIntel-6-CF,v1.06,emeraldrapids,core
GenuineIntel-6-5[CF],v13,goldmont,core
GenuineIntel-6-7A,v1.01,goldmontplus,core
-GenuineIntel-6-B6,v1.01,grandridge,core
+GenuineIntel-6-B6,v1.02,grandridge,core
GenuineIntel-6-A[DE],v1.01,graniterapids,core
GenuineIntel-6-(3C|45|46),v35,haswell,core
GenuineIntel-6-3F,v28,haswellx,core
GenuineIntel-6-7[DE],v1.21,icelake,core
-GenuineIntel-6-6[AC],v1.23,icelakex,core
+GenuineIntel-6-6[AC],v1.24,icelakex,core
GenuineIntel-6-3A,v24,ivybridge,core
GenuineIntel-6-3E,v24,ivytown,core
GenuineIntel-6-2D,v24,jaketown,core
GenuineIntel-6-(57|85),v16,knightslanding,core
-GenuineIntel-6-BD,v1.00,lunarlake,core
-GenuineIntel-6-A[AC],v1.07,meteorlake,core
+GenuineIntel-6-BD,v1.01,lunarlake,core
+GenuineIntel-6-A[AC],v1.08,meteorlake,core
GenuineIntel-6-1[AEF],v4,nehalemep,core
GenuineIntel-6-2E,v4,nehalemex,core
GenuineIntel-6-A7,v1.02,rocketlake,core
GenuineIntel-6-2A,v19,sandybridge,core
-GenuineIntel-6-8F,v1.17,sapphirerapids,core
-GenuineIntel-6-AF,v1.01,sierraforest,core
+GenuineIntel-6-8F,v1.20,sapphirerapids,core
+GenuineIntel-6-AF,v1.02,sierraforest,core
GenuineIntel-6-(37|4A|4C|4D|5A),v15,silvermont,core
GenuineIntel-6-(4E|5E|8E|9E|A5|A6),v58,skylake,core
-GenuineIntel-6-55-[01234],v1.32,skylakex,core
-GenuineIntel-6-86,v1.21,snowridgex,core
+GenuineIntel-6-55-[01234],v1.33,skylakex,core
+GenuineIntel-6-86,v1.22,snowridgex,core
GenuineIntel-6-8[CD],v1.15,tigerlake,core
GenuineIntel-6-2C,v5,westmereep-dp,core
GenuineIntel-6-25,v4,westmereep-sp,core
diff --git a/tools/perf/pmu-events/arch/x86/meteorlake/cache.json b/tools/perf/pmu-events/arch/x86/meteorlake/cache.json
index 47861a6dd8e9a4..af7acb15f661ee 100644
--- a/tools/perf/pmu-events/arch/x86/meteorlake/cache.json
+++ b/tools/perf/pmu-events/arch/x86/meteorlake/cache.json
@@ -967,6 +967,16 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts demand data reads that were supplied by the L3 cache.",
+ "EventCode": "0xB7",
+ "EventName": "OCR.DEMAND_DATA_RD.L3_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3F803C0001",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts demand data reads that were supplied by the L3 cache where a snoop was sent, the snoop hit, and modified data was forwarded.",
"EventCode": "0xB7",
"EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM",
@@ -987,6 +997,16 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts demand data reads that were supplied by the L3 cache where a snoop was sent, the snoop hit, but no data was forwarded.",
+ "EventCode": "0xB7",
+ "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x4003C0001",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts demand data reads that were supplied by the L3 cache where a snoop was sent, the snoop hit, and non-modified data was forwarded.",
"EventCode": "0xB7",
"EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
@@ -1007,6 +1027,16 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that were supplied by the L3 cache.",
+ "EventCode": "0xB7",
+ "EventName": "OCR.DEMAND_RFO.L3_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3F803C0002",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that were supplied by the L3 cache where a snoop was sent, the snoop hit, and modified data was forwarded.",
"EventCode": "0xB7",
"EventName": "OCR.DEMAND_RFO.L3_HIT.SNOOP_HITM",
diff --git a/tools/perf/pmu-events/arch/x86/meteorlake/frontend.json b/tools/perf/pmu-events/arch/x86/meteorlake/frontend.json
index 9da8689eda8148..f3b7b211afb538 100644
--- a/tools/perf/pmu-events/arch/x86/meteorlake/frontend.json
+++ b/tools/perf/pmu-events/arch/x86/meteorlake/frontend.json
@@ -378,7 +378,7 @@
"CounterMask": "6",
"EventCode": "0x79",
"EventName": "IDQ.DSB_CYCLES_OK",
- "PublicDescription": "Counts the number of cycles where optimal number of uops was delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
+ "PublicDescription": "Counts the number of cycles where optimal number of uops was delivered to the Instruction Decode Queue (IDQ) from the DSB (Decode Stream Buffer) path. Count includes uops that may 'bypass' the IDQ.",
"SampleAfterValue": "2000003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -455,7 +455,7 @@
"BriefDescription": "This event counts a subset of the Topdown Slots event that were no operation was delivered to the back-end pipeline due to instruction fetch limitations when the back-end could have accepted more operations. Common examples include instruction cache misses or x86 instruction decode limitations.",
"EventCode": "0x9c",
"EventName": "IDQ_BUBBLES.CORE",
- "PublicDescription": "This event counts a subset of the Topdown Slots event that were no operation was delivered to the back-end pipeline due to instruction fetch limitations when the back-end could have accepted more operations. Common examples include instruction cache misses or x86 instruction decode limitations.\nThe count may be distributed among unhalted logical processors (hyper-threads) who share the same physical core, in processors that support Intel Hyper-Threading Technology. Software can use this event as the numerator for the Frontend Bound metric (or top-level category) of the Top-down Microarchitecture Analysis method.",
+ "PublicDescription": "This event counts a subset of the Topdown Slots event that were no operation was delivered to the back-end pipeline due to instruction fetch limitations when the back-end could have accepted more operations. Common examples include instruction cache misses or x86 instruction decode limitations. The count may be distributed among unhalted logical processors (hyper-threads) who share the same physical core, in processors that support Intel Hyper-Threading Technology. Software can use this event as the numerator for the Frontend Bound metric (or top-level category) of the Top-down Microarchitecture Analysis method.",
"SampleAfterValue": "1000003",
"UMask": "0x1",
"Unit": "cpu_core"
diff --git a/tools/perf/pmu-events/arch/x86/meteorlake/memory.json b/tools/perf/pmu-events/arch/x86/meteorlake/memory.json
index a5b83293f15782..617d0e255fd5b5 100644
--- a/tools/perf/pmu-events/arch/x86/meteorlake/memory.json
+++ b/tools/perf/pmu-events/arch/x86/meteorlake/memory.json
@@ -298,6 +298,16 @@
},
{
"BriefDescription": "Counts demand data reads that were not supplied by the L3 cache.",
+ "EventCode": "0xB7",
+ "EventName": "OCR.DEMAND_DATA_RD.L3_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3FBFC00001",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts demand data reads that were not supplied by the L3 cache.",
"EventCode": "0x2A,0x2B",
"EventName": "OCR.DEMAND_DATA_RD.L3_MISS",
"MSRIndex": "0x1a6,0x1a7",
@@ -307,6 +317,16 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that were not supplied by the L3 cache.",
+ "EventCode": "0xB7",
+ "EventName": "OCR.DEMAND_RFO.L3_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3FBFC00002",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts demand read for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that were not supplied by the L3 cache.",
"EventCode": "0x2A,0x2B",
"EventName": "OCR.DEMAND_RFO.L3_MISS",
diff --git a/tools/perf/pmu-events/arch/x86/meteorlake/other.json b/tools/perf/pmu-events/arch/x86/meteorlake/other.json
index 7effc1f271e77b..0bc2cb2eabb32c 100644
--- a/tools/perf/pmu-events/arch/x86/meteorlake/other.json
+++ b/tools/perf/pmu-events/arch/x86/meteorlake/other.json
@@ -19,6 +19,16 @@
},
{
"BriefDescription": "Counts demand data reads that have any type of response.",
+ "EventCode": "0xB7",
+ "EventName": "OCR.DEMAND_DATA_RD.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x10001",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts demand data reads that have any type of response.",
"EventCode": "0x2A,0x2B",
"EventName": "OCR.DEMAND_DATA_RD.ANY_RESPONSE",
"MSRIndex": "0x1a6,0x1a7",
@@ -29,6 +39,16 @@
},
{
"BriefDescription": "Counts demand data reads that were supplied by DRAM.",
+ "EventCode": "0xB7",
+ "EventName": "OCR.DEMAND_DATA_RD.DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x184000001",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts demand data reads that were supplied by DRAM.",
"EventCode": "0x2A,0x2B",
"EventName": "OCR.DEMAND_DATA_RD.DRAM",
"MSRIndex": "0x1a6,0x1a7",
@@ -38,6 +58,16 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that have any type of response.",
+ "EventCode": "0xB7",
+ "EventName": "OCR.DEMAND_RFO.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x10002",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts demand read for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that have any type of response.",
"EventCode": "0x2A,0x2B",
"EventName": "OCR.DEMAND_RFO.ANY_RESPONSE",
@@ -48,6 +78,16 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that were supplied by DRAM.",
+ "EventCode": "0xB7",
+ "EventName": "OCR.DEMAND_RFO.DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x184000002",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts streaming stores that have any type of response.",
"EventCode": "0xB7",
"EventName": "OCR.STREAMING_WR.ANY_RESPONSE",
@@ -97,7 +137,7 @@
"Unit": "cpu_core"
},
{
- "BriefDescription": "Counts the number of issue slots in a UMWAIT or TPAUSE instruction where no uop issues due to the instruction putting the CPU into the C0.1 activity state. For Tremont, UMWAIT and TPAUSE will only put the CPU into C0.1 activity state (not C0.2 activity state)",
+ "BriefDescription": "Counts the number of issue slots in a UMWAIT or TPAUSE instruction where no uop issues due to the instruction putting the CPU into the C0.1 activity state.",
"EventCode": "0x75",
"EventName": "SERIALIZATION.C01_MS_SCB",
"SampleAfterValue": "200003",
diff --git a/tools/perf/pmu-events/arch/x86/meteorlake/pipeline.json b/tools/perf/pmu-events/arch/x86/meteorlake/pipeline.json
index 24bbfcebd2bed2..5ff4a7a322500e 100644
--- a/tools/perf/pmu-events/arch/x86/meteorlake/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/meteorlake/pipeline.json
@@ -1067,7 +1067,7 @@
"BriefDescription": "This event counts a subset of the Topdown Slots event that were not consumed by the back-end pipeline due to lack of back-end resources, as a result of memory subsystem delays, execution units limitations, or other conditions.",
"EventCode": "0xa4",
"EventName": "TOPDOWN.BACKEND_BOUND_SLOTS",
- "PublicDescription": "This event counts a subset of the Topdown Slots event that were not consumed by the back-end pipeline due to lack of back-end resources, as a result of memory subsystem delays, execution units limitations, or other conditions.\nThe count is distributed among unhalted logical processors (hyper-threads) who share the same physical core, in processors that support Intel Hyper-Threading Technology. Software can use this event as the numerator for the Backend Bound metric (or top-level category) of the Top-down Microarchitecture Analysis method.",
+ "PublicDescription": "This event counts a subset of the Topdown Slots event that were not consumed by the back-end pipeline due to lack of back-end resources, as a result of memory subsystem delays, execution units limitations, or other conditions. The count is distributed among unhalted logical processors (hyper-threads) who share the same physical core, in processors that support Intel Hyper-Threading Technology. Software can use this event as the numerator for the Backend Bound metric (or top-level category) of the Top-down Microarchitecture Analysis method.",
"SampleAfterValue": "10000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -1116,10 +1116,18 @@
"Unit": "cpu_core"
},
{
- "BriefDescription": "Counts the number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear.",
+ "BriefDescription": "Counts the number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear. [This event is alias to TOPDOWN_BAD_SPECULATION.ALL_P]",
"EventCode": "0x73",
"EventName": "TOPDOWN_BAD_SPECULATION.ALL",
- "PublicDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear. Only issue slots wasted due to fast nukes such as memory ordering nukes are counted. Other nukes are not accounted for. Counts all issue slots blocked during this recovery window, including relevant microcode flows, and while uops are not yet available in the instruction queue (IQ) or until an FE_BOUND event occurs besides OTHER and CISC. Also includes the issue slots that were consumed by the backend but were thrown away because they were younger than the mispredict or machine clear.",
+ "PublicDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear. Only issue slots wasted due to fast nukes such as memory ordering nukes are counted. Other nukes are not accounted for. Counts all issue slots blocked during this recovery window, including relevant microcode flows, and while uops are not yet available in the instruction queue (IQ) or until an FE_BOUND event occurs besides OTHER and CISC. Also includes the issue slots that were consumed by the backend but were thrown away because they were younger than the mispredict or machine clear. [This event is alias to TOPDOWN_BAD_SPECULATION.ALL_P]",
+ "SampleAfterValue": "1000003",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear. [This event is alias to TOPDOWN_BAD_SPECULATION.ALL]",
+ "EventCode": "0x73",
+ "EventName": "TOPDOWN_BAD_SPECULATION.ALL_P",
+ "PublicDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear. Only issue slots wasted due to fast nukes such as memory ordering nukes are counted. Other nukes are not accounted for. Counts all issue slots blocked during this recovery window, including relevant microcode flows, and while uops are not yet available in the instruction queue (IQ) or until an FE_BOUND event occurs besides OTHER and CISC. Also includes the issue slots that were consumed by the backend but were thrown away because they were younger than the mispredict or machine clear. [This event is alias to TOPDOWN_BAD_SPECULATION.ALL]",
"SampleAfterValue": "1000003",
"Unit": "cpu_atom"
},
@@ -1156,7 +1164,7 @@
"Unit": "cpu_atom"
},
{
- "BriefDescription": "Counts the number of retirement slots not consumed due to backend stalls",
+ "BriefDescription": "Counts the number of retirement slots not consumed due to backend stalls [This event is alias to TOPDOWN_BE_BOUND.ALL_P]",
"EventCode": "0x74",
"EventName": "TOPDOWN_BE_BOUND.ALL",
"SampleAfterValue": "1000003",
@@ -1171,6 +1179,13 @@
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "Counts the number of retirement slots not consumed due to backend stalls [This event is alias to TOPDOWN_BE_BOUND.ALL]",
+ "EventCode": "0x74",
+ "EventName": "TOPDOWN_BE_BOUND.ALL_P",
+ "SampleAfterValue": "1000003",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to memory reservation stall (scheduler not being able to accept another uop). This could be caused by RSV full or load/store buffer block.",
"EventCode": "0x74",
"EventName": "TOPDOWN_BE_BOUND.MEM_SCHEDULER",
@@ -1211,13 +1226,20 @@
"Unit": "cpu_atom"
},
{
- "BriefDescription": "Counts the number of retirement slots not consumed due to front end stalls",
+ "BriefDescription": "Counts the number of retirement slots not consumed due to front end stalls [This event is alias to TOPDOWN_FE_BOUND.ALL_P]",
"EventCode": "0x71",
"EventName": "TOPDOWN_FE_BOUND.ALL",
"SampleAfterValue": "1000003",
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "Counts the number of retirement slots not consumed due to front end stalls [This event is alias to TOPDOWN_FE_BOUND.ALL]",
+ "EventCode": "0x71",
+ "EventName": "TOPDOWN_FE_BOUND.ALL_P",
+ "SampleAfterValue": "1000003",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to BAClear",
"EventCode": "0x71",
"EventName": "TOPDOWN_FE_BOUND.BRANCH_DETECT",
@@ -1299,7 +1321,7 @@
"Unit": "cpu_atom"
},
{
- "BriefDescription": "Counts the number of consumed retirement slots. Similar to UOPS_RETIRED.ALL",
+ "BriefDescription": "Counts the number of consumed retirement slots. Similar to UOPS_RETIRED.ALL [This event is alias to TOPDOWN_RETIRING.ALL_P]",
"EventCode": "0x72",
"EventName": "TOPDOWN_RETIRING.ALL",
"PEBS": "1",
@@ -1307,6 +1329,14 @@
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "Counts the number of consumed retirement slots. Similar to UOPS_RETIRED.ALL [This event is alias to TOPDOWN_RETIRING.ALL]",
+ "EventCode": "0x72",
+ "EventName": "TOPDOWN_RETIRING.ALL_P",
+ "PEBS": "1",
+ "SampleAfterValue": "1000003",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Number of non dec-by-all uops decoded by decoder",
"EventCode": "0x76",
"EventName": "UOPS_DECODED.DEC0_UOPS",
@@ -1591,7 +1621,7 @@
"BriefDescription": "This event counts a subset of the Topdown Slots event that are utilized by operations that eventually get retired (committed) by the processor pipeline. Usually, this event positively correlates with higher performance for example, as measured by the instructions-per-cycle metric.",
"EventCode": "0xc2",
"EventName": "UOPS_RETIRED.SLOTS",
- "PublicDescription": "This event counts a subset of the Topdown Slots event that are utilized by operations that eventually get retired (committed) by the processor pipeline. Usually, this event positively correlates with higher performance for example, as measured by the instructions-per-cycle metric.\nSoftware can use this event as the numerator for the Retiring metric (or top-level category) of the Top-down Microarchitecture Analysis method.",
+ "PublicDescription": "This event counts a subset of the Topdown Slots event that are utilized by operations that eventually get retired (committed) by the processor pipeline. Usually, this event positively correlates with higher performance for example, as measured by the instructions-per-cycle metric. Software can use this event as the numerator for the Retiring metric (or top-level category) of the Top-down Microarchitecture Analysis method.",
"SampleAfterValue": "2000003",
"UMask": "0x2",
"Unit": "cpu_core"
diff --git a/tools/perf/pmu-events/arch/x86/meteorlake/uncore-interconnect.json b/tools/perf/pmu-events/arch/x86/meteorlake/uncore-interconnect.json
index 08b5c7574cfcc3..901d8510f90fa5 100644
--- a/tools/perf/pmu-events/arch/x86/meteorlake/uncore-interconnect.json
+++ b/tools/perf/pmu-events/arch/x86/meteorlake/uncore-interconnect.json
@@ -1,5 +1,21 @@
[
{
+ "BriefDescription": "Each cycle counts number of coherent reads pending on data return from memory controller that were issued by any core.",
+ "EventCode": "0x85",
+ "EventName": "UNC_ARB_DAT_OCCUPANCY.RD",
+ "PerPkg": "1",
+ "UMask": "0x2",
+ "Unit": "ARB"
+ },
+ {
+ "BriefDescription": "Number of entries allocated. Account for Any type: e.g. Snoop, etc.",
+ "EventCode": "0x84",
+ "EventName": "UNC_HAC_ARB_COH_TRK_REQUESTS.ALL",
+ "PerPkg": "1",
+ "UMask": "0x1",
+ "Unit": "HAC_ARB"
+ },
+ {
"BriefDescription": "Number of all coherent Data Read entries. Doesn't include prefetches",
"EventCode": "0x81",
"EventName": "UNC_HAC_ARB_REQ_TRK_REQUEST.DRD",
@@ -9,7 +25,7 @@
},
{
"BriefDescription": "Number of all CMI transactions",
- "EventCode": "0x8a",
+ "EventCode": "0x8A",
"EventName": "UNC_HAC_ARB_TRANSACTIONS.ALL",
"PerPkg": "1",
"UMask": "0x1",
@@ -17,7 +33,7 @@
},
{
"BriefDescription": "Number of all CMI reads",
- "EventCode": "0x8a",
+ "EventCode": "0x8A",
"EventName": "UNC_HAC_ARB_TRANSACTIONS.READS",
"PerPkg": "1",
"UMask": "0x2",
@@ -25,7 +41,7 @@
},
{
"BriefDescription": "Number of all CMI writes not including Mflush",
- "EventCode": "0x8a",
+ "EventCode": "0x8A",
"EventName": "UNC_HAC_ARB_TRANSACTIONS.WRITES",
"PerPkg": "1",
"UMask": "0x4",
diff --git a/tools/perf/pmu-events/arch/x86/sapphirerapids/cache.json b/tools/perf/pmu-events/arch/x86/sapphirerapids/cache.json
index 9606e76b98d6bb..b0447aad0dfc59 100644
--- a/tools/perf/pmu-events/arch/x86/sapphirerapids/cache.json
+++ b/tools/perf/pmu-events/arch/x86/sapphirerapids/cache.json
@@ -432,6 +432,7 @@
"BriefDescription": "Retired load instructions with remote Intel(R) Optane(TM) DC persistent memory as the data source where the data request missed all caches.",
"EventCode": "0xd3",
"EventName": "MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM",
+ "PEBS": "1",
"PublicDescription": "Counts retired load instructions with remote Intel(R) Optane(TM) DC persistent memory as the data source and the data request missed L3.",
"SampleAfterValue": "100007",
"UMask": "0x10"
diff --git a/tools/perf/pmu-events/arch/x86/sapphirerapids/frontend.json b/tools/perf/pmu-events/arch/x86/sapphirerapids/frontend.json
index 9e53da55d0c118..93d99318a62390 100644
--- a/tools/perf/pmu-events/arch/x86/sapphirerapids/frontend.json
+++ b/tools/perf/pmu-events/arch/x86/sapphirerapids/frontend.json
@@ -267,7 +267,7 @@
"CounterMask": "6",
"EventCode": "0x79",
"EventName": "IDQ.DSB_CYCLES_OK",
- "PublicDescription": "Counts the number of cycles where optimal number of uops was delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
+ "PublicDescription": "Counts the number of cycles where optimal number of uops was delivered to the Instruction Decode Queue (IDQ) from the DSB (Decode Stream Buffer) path. Count includes uops that may 'bypass' the IDQ.",
"SampleAfterValue": "2000003",
"UMask": "0x8"
},
diff --git a/tools/perf/pmu-events/arch/x86/sapphirerapids/memory.json b/tools/perf/pmu-events/arch/x86/sapphirerapids/memory.json
index e8bf7c9c44e1a3..5420f529f49185 100644
--- a/tools/perf/pmu-events/arch/x86/sapphirerapids/memory.json
+++ b/tools/perf/pmu-events/arch/x86/sapphirerapids/memory.json
@@ -264,6 +264,7 @@
"BriefDescription": "Number of times an RTM execution aborted.",
"EventCode": "0xc9",
"EventName": "RTM_RETIRED.ABORTED",
+ "PEBS": "1",
"PublicDescription": "Counts the number of times RTM abort was triggered.",
"SampleAfterValue": "100003",
"UMask": "0x4"
diff --git a/tools/perf/pmu-events/arch/x86/sapphirerapids/pipeline.json b/tools/perf/pmu-events/arch/x86/sapphirerapids/pipeline.json
index 2cfe814d20151c..e2086bedeca809 100644
--- a/tools/perf/pmu-events/arch/x86/sapphirerapids/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/sapphirerapids/pipeline.json
@@ -1,21 +1,5 @@
[
{
- "BriefDescription": "AMX retired arithmetic BF16 operations.",
- "EventCode": "0xce",
- "EventName": "AMX_OPS_RETIRED.BF16",
- "PublicDescription": "Number of AMX-based retired arithmetic bfloat16 (BF16) floating-point operations. Counts TDPBF16PS FP instructions. SW to use operation multiplier of 4",
- "SampleAfterValue": "1000003",
- "UMask": "0x2"
- },
- {
- "BriefDescription": "AMX retired arithmetic integer 8-bit operations.",
- "EventCode": "0xce",
- "EventName": "AMX_OPS_RETIRED.INT8",
- "PublicDescription": "Number of AMX-based retired arithmetic integer operations of 8-bit width source operands. Counts TDPB[SS,UU,US,SU]D instructions. SW should use operation multiplier of 8.",
- "SampleAfterValue": "1000003",
- "UMask": "0x1"
- },
- {
"BriefDescription": "This event is deprecated. Refer to new event ARITH.DIV_ACTIVE",
"CounterMask": "1",
"Deprecated": "1",
@@ -444,6 +428,7 @@
"BriefDescription": "INST_RETIRED.MACRO_FUSED",
"EventCode": "0xc0",
"EventName": "INST_RETIRED.MACRO_FUSED",
+ "PEBS": "1",
"SampleAfterValue": "2000003",
"UMask": "0x10"
},
@@ -451,6 +436,7 @@
"BriefDescription": "Retired NOP instructions.",
"EventCode": "0xc0",
"EventName": "INST_RETIRED.NOP",
+ "PEBS": "1",
"PublicDescription": "Counts all retired NOP or ENDBR32/64 instructions",
"SampleAfterValue": "2000003",
"UMask": "0x2"
@@ -467,6 +453,7 @@
"BriefDescription": "Iterations of Repeat string retired instructions.",
"EventCode": "0xc0",
"EventName": "INST_RETIRED.REP_ITERATION",
+ "PEBS": "1",
"PublicDescription": "Number of iterations of Repeat (REP) string retired instructions such as MOVS, CMPS, and SCAS. Each has a byte, word, and doubleword version and string instructions can be repeated using a repetition prefix, REP, that allows their architectural execution to be repeated a number of times as specified by the RCX register. Note the number of iterations is implementation-dependent.",
"SampleAfterValue": "2000003",
"UMask": "0x8"
diff --git a/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json b/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json
index 6f0e6360e989e1..f8c0eac8b8282b 100644
--- a/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json
@@ -727,23 +727,20 @@
{
"BriefDescription": "Probability of Core Bound bottleneck hidden by SMT-profiling artifacts",
"MetricExpr": "(100 * (1 - max(0, topdown\\-be\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) - topdown\\-mem\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound)) / (((cpu@EXE_ACTIVITY.3_PORTS_UTIL\\,umask\\=0x80@ + cpu@RS.EMPTY\\,umask\\=0x1@) / CPU_CLK_UNHALTED.THREAD * (CYCLE_ACTIVITY.STALLS_TOTAL - EXE_ACTIVITY.BOUND_ON_LOADS) / CPU_CLK_UNHALTED.THREAD * CPU_CLK_UNHALTED.THREAD + (EXE_ACTIVITY.1_PORTS_UTIL + topdown\\-retiring / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) * cpu@EXE_ACTIVITY.2_PORTS_UTIL\\,umask\\=0xc@)) / CPU_CLK_UNHALTED.THREAD if ARITH.DIV_ACTIVE < CYCLE_ACTIVITY.STALLS_TOTAL - EXE_ACTIVITY.BOUND_ON_LOADS else (EXE_ACTIVITY.1_PORTS_UTIL + topdown\\-retiring / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) * cpu@EXE_ACTIVITY.2_PORTS_UTIL\\,umask\\=0xc@) / CPU_CLK_UNHALTED.THREAD) if max(0, topdown\\-be\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) - topdown\\-mem\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound)) < (((cpu@EXE_ACTIVITY.3_PORTS_UTIL\\,umask\\=0x80@ + cpu@RS.EMPTY\\,umask\\=0x1@) / CPU_CLK_UNHALTED.THREAD * (CYCLE_ACTIVITY.STALLS_TOTAL - EXE_ACTIVITY.BOUND_ON_LOADS) / CPU_CLK_UNHALTED.THREAD * CPU_CLK_UNHALTED.THREAD + (EXE_ACTIVITY.1_PORTS_UTIL + topdown\\-retiring / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) * cpu@EXE_ACTIVITY.2_PORTS_UTIL\\,umask\\=0xc@)) / CPU_CLK_UNHALTED.THREAD if ARITH.DIV_ACTIVE < CYCLE_ACTIVITY.STALLS_TOTAL - EXE_ACTIVITY.BOUND_ON_LOADS else (EXE_ACTIVITY.1_PORTS_UTIL + topdown\\-retiring / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) * cpu@EXE_ACTIVITY.2_PORTS_UTIL\\,umask\\=0xc@) / CPU_CLK_UNHALTED.THREAD) else 1) if tma_info_system_smt_2t_utilization > 0.5 else 0) + 0 * slots",
- "MetricGroup": "Cor;SMT;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_botlnk_core_bound_likely",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Cor;SMT",
+ "MetricName": "tma_info_botlnk_core_bound_likely"
},
{
"BriefDescription": "Total pipeline cost of DSB (uop cache) misses - subset of the Instruction_Fetch_BW Bottleneck.",
"MetricExpr": "100 * (100 * ((topdown\\-fetch\\-lat / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) - INT_MISC.UOP_DROPPING / slots) * (DSB2MITE_SWITCHES.PENALTY_CYCLES / CPU_CLK_UNHALTED.THREAD) / (ICACHE_DATA.STALLS / CPU_CLK_UNHALTED.THREAD + ICACHE_TAG.STALLS / CPU_CLK_UNHALTED.THREAD + (INT_MISC.CLEAR_RESTEER_CYCLES / CPU_CLK_UNHALTED.THREAD + INT_MISC.UNKNOWN_BRANCH_CYCLES / CPU_CLK_UNHALTED.THREAD) + min(3 * cpu@UOPS_RETIRED.MS\\,cmask\\=0x1\\,edge\\=0x1@ / (UOPS_RETIRED.SLOTS / UOPS_ISSUED.ANY) / CPU_CLK_UNHALTED.THREAD, 1) + DECODE.LCP / CPU_CLK_UNHALTED.THREAD + DSB2MITE_SWITCHES.PENALTY_CYCLES / CPU_CLK_UNHALTED.THREAD) + max(0, topdown\\-fe\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) - INT_MISC.UOP_DROPPING / slots - (topdown\\-fetch\\-lat / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) - INT_MISC.UOP_DROPPING / slots)) * ((IDQ.MITE_CYCLES_ANY - IDQ.MITE_CYCLES_OK) / (CPU_CLK_UNHALTED.DISTRIBUTED if #SMT_on else CPU_CLK_UNHALTED.THREAD) / 2) / ((IDQ.MITE_CYCLES_ANY - IDQ.MITE_CYCLES_OK) / (CPU_CLK_UNHALTED.DISTRIBUTED if #SMT_on else CPU_CLK_UNHALTED.THREAD) / 2 + (IDQ.DSB_CYCLES_ANY - IDQ.DSB_CYCLES_OK) / (CPU_CLK_UNHALTED.DISTRIBUTED if #SMT_on else CPU_CLK_UNHALTED.THREAD) / 2)))",
- "MetricGroup": "DSBmiss;Fed;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_botlnk_dsb_misses",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "DSBmiss;Fed",
+ "MetricName": "tma_info_botlnk_dsb_misses"
},
{
"BriefDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck.",
"MetricExpr": "100 * (100 * ((topdown\\-fetch\\-lat / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) - INT_MISC.UOP_DROPPING / slots) * (ICACHE_DATA.STALLS / CPU_CLK_UNHALTED.THREAD) / (ICACHE_DATA.STALLS / CPU_CLK_UNHALTED.THREAD + ICACHE_TAG.STALLS / CPU_CLK_UNHALTED.THREAD + (INT_MISC.CLEAR_RESTEER_CYCLES / CPU_CLK_UNHALTED.THREAD + INT_MISC.UNKNOWN_BRANCH_CYCLES / CPU_CLK_UNHALTED.THREAD) + min(3 * cpu@UOPS_RETIRED.MS\\,cmask\\=0x1\\,edge\\=0x1@ / (UOPS_RETIRED.SLOTS / UOPS_ISSUED.ANY) / CPU_CLK_UNHALTED.THREAD, 1) + DECODE.LCP / CPU_CLK_UNHALTED.THREAD + DSB2MITE_SWITCHES.PENALTY_CYCLES / CPU_CLK_UNHALTED.THREAD)))",
- "MetricGroup": "Fed;FetchLat;IcMiss;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_botlnk_ic_misses",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Fed;FetchLat;IcMiss",
+ "MetricName": "tma_info_botlnk_ic_misses"
},
{
"BriefDescription": "Probability of Core Bound bottleneck hidden by SMT-profiling artifacts",
@@ -1113,16 +1110,14 @@
{
"BriefDescription": "\"Bus lock\" per kilo instruction",
"MetricExpr": "tma_info_memory_mix_bus_lock_pki",
- "MetricGroup": "Mem;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_bus_lock_pki",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Mem",
+ "MetricName": "tma_info_memory_bus_lock_pki"
},
{
"BriefDescription": "STLB (2nd level TLB) code speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
"MetricExpr": "tma_info_memory_tlb_code_stlb_mpki",
- "MetricGroup": "Fed;MemoryTLB;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_code_stlb_mpki",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Fed;MemoryTLB",
+ "MetricName": "tma_info_memory_code_stlb_mpki"
},
{
"BriefDescription": "Average per-core data fill bandwidth to the L1 data cache [GB / sec]",
@@ -1163,9 +1158,8 @@
{
"BriefDescription": "Average Parallel L2 cache miss data reads",
"MetricExpr": "tma_info_memory_latency_data_l2_mlp",
- "MetricGroup": "Memory_BW;Offcore;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_data_l2_mlp",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Memory_BW;Offcore",
+ "MetricName": "tma_info_memory_data_l2_mlp"
},
{
"BriefDescription": "Fill Buffer (FB) hits per kilo instructions for retired demand loads (L1D misses that merge into ongoing miss-handling entries)",
@@ -1182,9 +1176,8 @@
{
"BriefDescription": "Average per-core data fill bandwidth to the L1 data cache [GB / sec]",
"MetricExpr": "64 * L1D.REPLACEMENT / 1e9 / (duration_time * 1e3 / 1e3)",
- "MetricGroup": "Mem;MemoryBW;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_l1d_cache_fill_bw_2t",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_l1d_cache_fill_bw_2t"
},
{
"BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
@@ -1207,23 +1200,20 @@
{
"BriefDescription": "Average per-core data fill bandwidth to the L2 cache [GB / sec]",
"MetricExpr": "64 * L2_LINES_IN.ALL / 1e9 / (duration_time * 1e3 / 1e3)",
- "MetricGroup": "Mem;MemoryBW;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_l2_cache_fill_bw_2t",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_l2_cache_fill_bw_2t"
},
{
"BriefDescription": "Rate of non silent evictions from the L2 cache per Kilo instruction",
"MetricExpr": "1e3 * L2_LINES_OUT.NON_SILENT / INST_RETIRED.ANY",
- "MetricGroup": "L2Evicts;Mem;Server;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_l2_evictions_nonsilent_pki",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "L2Evicts;Mem;Server",
+ "MetricName": "tma_info_memory_l2_evictions_nonsilent_pki"
},
{
"BriefDescription": "Rate of silent evictions from the L2 cache per Kilo instruction where the evicted lines are dropped (no writeback to L3 or memory)",
"MetricExpr": "1e3 * L2_LINES_OUT.SILENT / INST_RETIRED.ANY",
- "MetricGroup": "L2Evicts;Mem;Server;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_l2_evictions_silent_pki",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "L2Evicts;Mem;Server",
+ "MetricName": "tma_info_memory_l2_evictions_silent_pki"
},
{
"BriefDescription": "L2 cache hits per kilo instruction for all request types (including speculative)",
@@ -1264,9 +1254,8 @@
{
"BriefDescription": "Average per-core data access bandwidth to the L3 cache [GB / sec]",
"MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1e9 / (duration_time * 1e3 / 1e3)",
- "MetricGroup": "Mem;MemoryBW;Offcore;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_l3_cache_access_bw_2t",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Mem;MemoryBW;Offcore",
+ "MetricName": "tma_info_memory_l3_cache_access_bw_2t"
},
{
"BriefDescription": "",
@@ -1277,9 +1266,8 @@
{
"BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
"MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / (duration_time * 1e3 / 1e3)",
- "MetricGroup": "Mem;MemoryBW;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_l3_cache_fill_bw_2t",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_l3_cache_fill_bw_2t"
},
{
"BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
@@ -1314,23 +1302,20 @@
{
"BriefDescription": "Average Latency for L2 cache miss demand Loads",
"MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS.DEMAND_DATA_RD",
- "MetricGroup": "Memory_Lat;Offcore;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_load_l2_miss_latency",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Memory_Lat;Offcore",
+ "MetricName": "tma_info_memory_load_l2_miss_latency"
},
{
"BriefDescription": "Average Parallel L2 cache miss demand Loads",
"MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / cpu@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD\\,cmask\\=0x1@",
- "MetricGroup": "Memory_BW;Offcore;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_load_l2_mlp",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Memory_BW;Offcore",
+ "MetricName": "tma_info_memory_load_l2_mlp"
},
{
"BriefDescription": "Average Latency for L3 cache miss demand Loads",
"MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD / OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD",
- "MetricGroup": "Memory_Lat;Offcore;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_load_l3_miss_latency",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Memory_Lat;Offcore",
+ "MetricName": "tma_info_memory_load_l3_miss_latency"
},
{
"BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
@@ -1341,9 +1326,8 @@
{
"BriefDescription": "STLB (2nd level TLB) data load speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
"MetricExpr": "tma_info_memory_tlb_load_stlb_mpki",
- "MetricGroup": "Mem;MemoryTLB;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_load_stlb_mpki",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Mem;MemoryTLB",
+ "MetricName": "tma_info_memory_load_stlb_mpki"
},
{
"BriefDescription": "\"Bus lock\" per kilo instruction",
@@ -1385,53 +1369,46 @@
{
"BriefDescription": "Off-core accesses per kilo instruction for modified write requests",
"MetricExpr": "1e3 * OCR.MODIFIED_WRITE.ANY_RESPONSE / INST_RETIRED.ANY",
- "MetricGroup": "Offcore;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_offcore_mwrite_any_pki",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Offcore",
+ "MetricName": "tma_info_memory_offcore_mwrite_any_pki"
},
{
"BriefDescription": "Off-core accesses per kilo instruction for reads-to-core requests (speculative; including in-core HW prefetches)",
"MetricExpr": "1e3 * OCR.READS_TO_CORE.ANY_RESPONSE / INST_RETIRED.ANY",
- "MetricGroup": "CacheHits;Offcore;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_offcore_read_any_pki",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "CacheHits;Offcore",
+ "MetricName": "tma_info_memory_offcore_read_any_pki"
},
{
"BriefDescription": "L3 cache misses per kilo instruction for reads-to-core requests (speculative; including in-core HW prefetches)",
"MetricExpr": "1e3 * OCR.READS_TO_CORE.L3_MISS / INST_RETIRED.ANY",
- "MetricGroup": "Offcore;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_offcore_read_l3m_pki",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Offcore",
+ "MetricName": "tma_info_memory_offcore_read_l3m_pki"
},
{
"BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
"MetricExpr": "(ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING) / (4 * (CPU_CLK_UNHALTED.DISTRIBUTED if #SMT_on else CPU_CLK_UNHALTED.THREAD))",
- "MetricGroup": "Mem;MemoryTLB;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_page_walks_utilization",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Mem;MemoryTLB",
+ "MetricName": "tma_info_memory_page_walks_utilization"
},
{
"BriefDescription": "Average DRAM BW for Reads-to-Core (R2C) covering for memory attached to local- and remote-socket",
"MetricExpr": "64 * OCR.READS_TO_CORE.DRAM / 1e9 / (duration_time * 1e3 / 1e3)",
- "MetricGroup": "HPC;Mem;MemoryBW;SoC;TopdownL1;tma_L1_group",
+ "MetricGroup": "HPC;Mem;MemoryBW;SoC",
"MetricName": "tma_info_memory_r2c_dram_bw",
- "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "Average DRAM BW for Reads-to-Core (R2C) covering for memory attached to local- and remote-socket. See R2C_Offcore_BW."
},
{
"BriefDescription": "Average L3-cache miss BW for Reads-to-Core (R2C)",
"MetricExpr": "64 * OCR.READS_TO_CORE.L3_MISS / 1e9 / (duration_time * 1e3 / 1e3)",
- "MetricGroup": "HPC;Mem;MemoryBW;SoC;TopdownL1;tma_L1_group",
+ "MetricGroup": "HPC;Mem;MemoryBW;SoC",
"MetricName": "tma_info_memory_r2c_l3m_bw",
- "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "Average L3-cache miss BW for Reads-to-Core (R2C). This covering going to DRAM or other memory off-chip memory tears. See R2C_Offcore_BW."
},
{
"BriefDescription": "Average Off-core access BW for Reads-to-Core (R2C)",
"MetricExpr": "64 * OCR.READS_TO_CORE.ANY_RESPONSE / 1e9 / (duration_time * 1e3 / 1e3)",
- "MetricGroup": "HPC;Mem;MemoryBW;SoC;TopdownL1;tma_L1_group",
+ "MetricGroup": "HPC;Mem;MemoryBW;SoC",
"MetricName": "tma_info_memory_r2c_offcore_bw",
- "MetricgroupNoGroup": "TopdownL1",
"PublicDescription": "Average Off-core access BW for Reads-to-Core (R2C). R2C account for demand or prefetch load/RFO/code access that fill data into the Core caches."
},
{
@@ -1458,9 +1435,8 @@
{
"BriefDescription": "STLB (2nd level TLB) data store speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
"MetricExpr": "tma_info_memory_tlb_store_stlb_mpki",
- "MetricGroup": "Mem;MemoryTLB;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_store_stlb_mpki",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Mem;MemoryTLB",
+ "MetricName": "tma_info_memory_store_stlb_mpki"
},
{
"BriefDescription": "STLB (2nd level TLB) code speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
@@ -1490,9 +1466,8 @@
{
"BriefDescription": "Un-cacheable retired load per kilo instruction",
"MetricExpr": "1e3 * MEM_LOAD_MISC_RETIRED.UC / INST_RETIRED.ANY",
- "MetricGroup": "Mem;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_uc_load_pki",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Mem",
+ "MetricName": "tma_info_memory_uc_load_pki"
},
{
"BriefDescription": "",
diff --git a/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-cache.json b/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-cache.json
index cf6fa70f37c107..25a2b9695135b6 100644
--- a/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-cache.json
+++ b/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-cache.json
@@ -4144,6 +4144,42 @@
"Unit": "CHA"
},
{
+ "BriefDescription": "ItoMCacheNear (partial write) transactions from an IO device that addresses memory on the local socket",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IO_ITOMCACHENEAR_LOCAL",
+ "PerPkg": "1",
+ "PublicDescription": "TOR Inserts : ItoMCacheNears, indicating a partial write request, from IO Devices that missed the LLC : Counts the number of entries successfully inserted into the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.",
+ "UMask": "0xcd42ff04",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "ItoMCacheNear (partial write) transactions from an IO device that addresses memory on a remote socket",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IO_ITOMCACHENEAR_REMOTE",
+ "PerPkg": "1",
+ "PublicDescription": "TOR Inserts : ItoMCacheNears, indicating a partial write request, from IO Devices that missed the LLC : Counts the number of entries successfully inserted into the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.",
+ "UMask": "0xcd437f04",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "ItoM (write) transactions from an IO device that addresses memory on the local socket",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IO_ITOM_LOCAL",
+ "PerPkg": "1",
+ "PublicDescription": "Counts the number of entries successfully inserted into the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.",
+ "UMask": "0xcc42ff04",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "ItoM (write) transactions from an IO device that addresses memory on a remote socket",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IO_ITOM_REMOTE",
+ "PerPkg": "1",
+ "PublicDescription": "Counts the number of entries successfully inserted into the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.",
+ "UMask": "0xcc437f04",
+ "Unit": "CHA"
+ },
+ {
"BriefDescription": "TOR Inserts; Misses from local IO",
"EventCode": "0x35",
"EventName": "UNC_CHA_TOR_INSERTS.IO_MISS",
@@ -4153,7 +4189,7 @@
"Unit": "CHA"
},
{
- "BriefDescription": "TOR Inserts; ItoM misses from local IO",
+ "BriefDescription": "TOR Inserts : ItoM, indicating a full cacheline write request, from IO Devices that missed the LLC",
"EventCode": "0x35",
"EventName": "UNC_CHA_TOR_INSERTS.IO_MISS_ITOM",
"PerPkg": "1",
@@ -4171,7 +4207,7 @@
"Unit": "CHA"
},
{
- "BriefDescription": "TOR Inserts; RdCur and FsRdCur misses from local IO",
+ "BriefDescription": "TOR Inserts; RdCur and FsRdCur requests from local IO that miss LLC",
"EventCode": "0x35",
"EventName": "UNC_CHA_TOR_INSERTS.IO_MISS_PCIRDCUR",
"PerPkg": "1",
@@ -4198,6 +4234,24 @@
"Unit": "CHA"
},
{
+ "BriefDescription": "PCIRDCUR (read) transactions from an IO device that addresses memory on a remote socket",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IO_PCIRDCUR_LOCAL",
+ "PerPkg": "1",
+ "PublicDescription": "Counts the number of entries successfully inserted into the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.",
+ "UMask": "0xc8f2ff04",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "PCIRDCUR (read) transactions from an IO device that addresses memory on the local socket",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHA_TOR_INSERTS.IO_PCIRDCUR_REMOTE",
+ "PerPkg": "1",
+ "PublicDescription": "Counts the number of entries successfully inserted into the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.",
+ "UMask": "0xc8f37f04",
+ "Unit": "CHA"
+ },
+ {
"BriefDescription": "TOR Inserts; RFO from local IO",
"EventCode": "0x35",
"EventName": "UNC_CHA_TOR_INSERTS.IO_RFO",
@@ -5566,6 +5620,42 @@
"Unit": "CHA"
},
{
+ "BriefDescription": "TOR Occupancy : ItoMCacheNears, indicating a partial write request, from IO Devices that missed the LLC and targets local memory",
+ "EventCode": "0x36",
+ "EventName": "UNC_CHA_TOR_OCCUPANCY.IO_MISS_ITOMCACHENEAR_LOCAL",
+ "PerPkg": "1",
+ "PublicDescription": "For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.",
+ "UMask": "0xcd42fe04",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Occupancy : ItoMCacheNears, indicating a partial write request, from IO Devices that missed the LLC and targets remote memory",
+ "EventCode": "0x36",
+ "EventName": "UNC_CHA_TOR_OCCUPANCY.IO_MISS_ITOMCACHENEAR_REMOTE",
+ "PerPkg": "1",
+ "PublicDescription": "For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.",
+ "UMask": "0xcd437e04",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Occupancy; ITOM misses from local IO and targets local memory",
+ "EventCode": "0x36",
+ "EventName": "UNC_CHA_TOR_OCCUPANCY.IO_MISS_ITOM_LOCAL",
+ "PerPkg": "1",
+ "PublicDescription": "For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.",
+ "UMask": "0xcc42fe04",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Occupancy; ITOM misses from local IO and targets remote memory",
+ "EventCode": "0x36",
+ "EventName": "UNC_CHA_TOR_OCCUPANCY.IO_MISS_ITOM_REMOTE",
+ "PerPkg": "1",
+ "PublicDescription": "For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.",
+ "UMask": "0xcc437e04",
+ "Unit": "CHA"
+ },
+ {
"BriefDescription": "TOR Occupancy; RdCur and FsRdCur misses from local IO",
"EventCode": "0x36",
"EventName": "UNC_CHA_TOR_OCCUPANCY.IO_MISS_PCIRDCUR",
@@ -5575,6 +5665,24 @@
"Unit": "CHA"
},
{
+ "BriefDescription": "TOR Occupancy; RdCur and FsRdCur misses from local IO and targets local memory",
+ "EventCode": "0x36",
+ "EventName": "UNC_CHA_TOR_OCCUPANCY.IO_MISS_PCIRDCUR_LOCAL",
+ "PerPkg": "1",
+ "PublicDescription": "For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.",
+ "UMask": "0xc8f2fe04",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "TOR Occupancy; RdCur and FsRdCur misses from local IO and targets remote memory",
+ "EventCode": "0x36",
+ "EventName": "UNC_CHA_TOR_OCCUPANCY.IO_MISS_PCIRDCUR_REMOTE",
+ "PerPkg": "1",
+ "PublicDescription": "For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.",
+ "UMask": "0xc8f37e04",
+ "Unit": "CHA"
+ },
+ {
"BriefDescription": "TOR Occupancy; RFO misses from local IO",
"EventCode": "0x36",
"EventName": "UNC_CHA_TOR_OCCUPANCY.IO_MISS_RFO",
diff --git a/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-interconnect.json b/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-interconnect.json
index 65d088556bae8d..22bb490e96662d 100644
--- a/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-interconnect.json
+++ b/tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-interconnect.json
@@ -4888,7 +4888,7 @@
"Unit": "MDF"
},
{
- "BriefDescription": "Number of cycles incoming messages from the vertical ring that are bounced at the SBO\r\nIngress (V-EMIB) (AD)",
+ "BriefDescription": "Number of cycles incoming messages from the vertical ring that are bounced at the SBO Ingress (V-EMIB) (AD)",
"EventCode": "0x4B",
"EventName": "UNC_MDF_CRS_TxR_V_BOUNCES.AD",
"PerPkg": "1",
@@ -4897,7 +4897,7 @@
"Unit": "MDF"
},
{
- "BriefDescription": "Number of cycles incoming messages from the vertical ring that are bounced at the SBO\r\nIngress (V-EMIB) (AK)",
+ "BriefDescription": "Number of cycles incoming messages from the vertical ring that are bounced at the SBO Ingress (V-EMIB) (AK)",
"EventCode": "0x4B",
"EventName": "UNC_MDF_CRS_TxR_V_BOUNCES.AK",
"PerPkg": "1",
@@ -4906,7 +4906,7 @@
"Unit": "MDF"
},
{
- "BriefDescription": "Number of cycles incoming messages from the vertical ring that are bounced at the SBO\r\nIngress (V-EMIB) (AKC)",
+ "BriefDescription": "Number of cycles incoming messages from the vertical ring that are bounced at the SBO Ingress (V-EMIB) (AKC)",
"EventCode": "0x4B",
"EventName": "UNC_MDF_CRS_TxR_V_BOUNCES.AKC",
"PerPkg": "1",
@@ -4915,7 +4915,7 @@
"Unit": "MDF"
},
{
- "BriefDescription": "Number of cycles incoming messages from the vertical ring that are bounced at the SBO\r\nIngress (V-EMIB) (BL)",
+ "BriefDescription": "Number of cycles incoming messages from the vertical ring that are bounced at the SBO Ingress (V-EMIB) (BL)",
"EventCode": "0x4B",
"EventName": "UNC_MDF_CRS_TxR_V_BOUNCES.BL",
"PerPkg": "1",
@@ -4924,7 +4924,7 @@
"Unit": "MDF"
},
{
- "BriefDescription": "Number of cycles incoming messages from the vertical ring that are bounced at the SBO\r\nIngress (V-EMIB) (IV)",
+ "BriefDescription": "Number of cycles incoming messages from the vertical ring that are bounced at the SBO Ingress (V-EMIB) (IV)",
"EventCode": "0x4B",
"EventName": "UNC_MDF_CRS_TxR_V_BOUNCES.IV",
"PerPkg": "1",
@@ -5291,7 +5291,7 @@
"EventCode": "0x05",
"EventName": "UNC_UPI_RxL_BASIC_HDR_MATCH.NCB",
"PerPkg": "1",
- "PublicDescription": "Matches on Receive path of a UPI Port : Non-Coherent Bypass : Matches on Receive path of a UPI port.\r\nMatch based on UMask specific bits:\r\nZ: Message Class (3-bit)\r\nY: Message Class Enable\r\nW: Opcode (4-bit)\r\nV: Opcode Enable\r\nU: Local Enable\r\nT: Remote Enable\r\nS: Data Hdr Enable\r\nR: Non-Data Hdr Enable\r\nQ: Dual Slot Hdr Enable\r\nP: Single Slot Hdr Enable\r\nLink Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases.\r\nNote: If Message Class is disabled, we expect opcode to also be disabled.",
+ "PublicDescription": "Matches on Receive path of a UPI Port : Non-Coherent Bypass : Matches on Receive path of a UPI port. Match based on UMask specific bits: Z: Message Class (3-bit) Y: Message Class Enable W: Opcode (4-bit) V: Opcode Enable U: Local Enable T: Remote Enable S: Data Hdr Enable R: Non-Data Hdr Enable Q: Dual Slot Hdr Enable P: Single Slot Hdr Enable Link Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases. Note: If Message Class is disabled, we expect opcode to also be disabled.",
"UMask": "0xe",
"Unit": "UPI"
},
@@ -5300,7 +5300,7 @@
"EventCode": "0x05",
"EventName": "UNC_UPI_RxL_BASIC_HDR_MATCH.NCB_OPC",
"PerPkg": "1",
- "PublicDescription": "Matches on Receive path of a UPI Port : Non-Coherent Bypass, Match Opcode : Matches on Receive path of a UPI port.\r\nMatch based on UMask specific bits:\r\nZ: Message Class (3-bit)\r\nY: Message Class Enable\r\nW: Opcode (4-bit)\r\nV: Opcode Enable\r\nU: Local Enable\r\nT: Remote Enable\r\nS: Data Hdr Enable\r\nR: Non-Data Hdr Enable\r\nQ: Dual Slot Hdr Enable\r\nP: Single Slot Hdr Enable\r\nLink Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases.\r\nNote: If Message Class is disabled, we expect opcode to also be disabled.",
+ "PublicDescription": "Matches on Receive path of a UPI Port : Non-Coherent Bypass, Match Opcode : Matches on Receive path of a UPI port. Match based on UMask specific bits: Z: Message Class (3-bit) Y: Message Class Enable W: Opcode (4-bit) V: Opcode Enable U: Local Enable T: Remote Enable S: Data Hdr Enable R: Non-Data Hdr Enable Q: Dual Slot Hdr Enable P: Single Slot Hdr Enable Link Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases. Note: If Message Class is disabled, we expect opcode to also be disabled.",
"UMask": "0x10e",
"Unit": "UPI"
},
@@ -5309,7 +5309,7 @@
"EventCode": "0x05",
"EventName": "UNC_UPI_RxL_BASIC_HDR_MATCH.NCS",
"PerPkg": "1",
- "PublicDescription": "Matches on Receive path of a UPI Port : Non-Coherent Standard : Matches on Receive path of a UPI port.\r\nMatch based on UMask specific bits:\r\nZ: Message Class (3-bit)\r\nY: Message Class Enable\r\nW: Opcode (4-bit)\r\nV: Opcode Enable\r\nU: Local Enable\r\nT: Remote Enable\r\nS: Data Hdr Enable\r\nR: Non-Data Hdr Enable\r\nQ: Dual Slot Hdr Enable\r\nP: Single Slot Hdr Enable\r\nLink Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases.\r\nNote: If Message Class is disabled, we expect opcode to also be disabled.",
+ "PublicDescription": "Matches on Receive path of a UPI Port : Non-Coherent Standard : Matches on Receive path of a UPI port. Match based on UMask specific bits: Z: Message Class (3-bit) Y: Message Class Enable W: Opcode (4-bit) V: Opcode Enable U: Local Enable T: Remote Enable S: Data Hdr Enable R: Non-Data Hdr Enable Q: Dual Slot Hdr Enable P: Single Slot Hdr Enable Link Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases. Note: If Message Class is disabled, we expect opcode to also be disabled.",
"UMask": "0xf",
"Unit": "UPI"
},
@@ -5318,7 +5318,7 @@
"EventCode": "0x05",
"EventName": "UNC_UPI_RxL_BASIC_HDR_MATCH.NCS_OPC",
"PerPkg": "1",
- "PublicDescription": "Matches on Receive path of a UPI Port : Non-Coherent Standard, Match Opcode : Matches on Receive path of a UPI port.\r\nMatch based on UMask specific bits:\r\nZ: Message Class (3-bit)\r\nY: Message Class Enable\r\nW: Opcode (4-bit)\r\nV: Opcode Enable\r\nU: Local Enable\r\nT: Remote Enable\r\nS: Data Hdr Enable\r\nR: Non-Data Hdr Enable\r\nQ: Dual Slot Hdr Enable\r\nP: Single Slot Hdr Enable\r\nLink Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases.\r\nNote: If Message Class is disabled, we expect opcode to also be disabled.",
+ "PublicDescription": "Matches on Receive path of a UPI Port : Non-Coherent Standard, Match Opcode : Matches on Receive path of a UPI port. Match based on UMask specific bits: Z: Message Class (3-bit) Y: Message Class Enable W: Opcode (4-bit) V: Opcode Enable U: Local Enable T: Remote Enable S: Data Hdr Enable R: Non-Data Hdr Enable Q: Dual Slot Hdr Enable P: Single Slot Hdr Enable Link Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases. Note: If Message Class is disabled, we expect opcode to also be disabled.",
"UMask": "0x10f",
"Unit": "UPI"
},
@@ -5763,7 +5763,7 @@
"EventCode": "0x04",
"EventName": "UNC_UPI_TxL_BASIC_HDR_MATCH.NCB",
"PerPkg": "1",
- "PublicDescription": "Matches on Transmit path of a UPI Port : Non-Coherent Bypass : Matches on Transmit path of a UPI port.\r\nMatch based on UMask specific bits:\r\nZ: Message Class (3-bit)\r\nY: Message Class Enable\r\nW: Opcode (4-bit)\r\nV: Opcode Enable\r\nU: Local Enable\r\nT: Remote Enable\r\nS: Data Hdr Enable\r\nR: Non-Data Hdr Enable\r\nQ: Dual Slot Hdr Enable\r\nP: Single Slot Hdr Enable\r\nLink Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases.\r\nNote: If Message Class is disabled, we expect opcode to also be disabled.",
+ "PublicDescription": "Matches on Transmit path of a UPI Port : Non-Coherent Bypass : Matches on Transmit path of a UPI port. Match based on UMask specific bits: Z: Message Class (3-bit) Y: Message Class Enable W: Opcode (4-bit) V: Opcode Enable U: Local Enable T: Remote Enable S: Data Hdr Enable R: Non-Data Hdr Enable Q: Dual Slot Hdr Enable P: Single Slot Hdr Enable Link Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases. Note: If Message Class is disabled, we expect opcode to also be disabled.",
"UMask": "0xe",
"Unit": "UPI"
},
@@ -5772,7 +5772,7 @@
"EventCode": "0x04",
"EventName": "UNC_UPI_TxL_BASIC_HDR_MATCH.NCB_OPC",
"PerPkg": "1",
- "PublicDescription": "Matches on Transmit path of a UPI Port : Non-Coherent Bypass, Match Opcode : Matches on Transmit path of a UPI port.\r\nMatch based on UMask specific bits:\r\nZ: Message Class (3-bit)\r\nY: Message Class Enable\r\nW: Opcode (4-bit)\r\nV: Opcode Enable\r\nU: Local Enable\r\nT: Remote Enable\r\nS: Data Hdr Enable\r\nR: Non-Data Hdr Enable\r\nQ: Dual Slot Hdr Enable\r\nP: Single Slot Hdr Enable\r\nLink Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases.\r\nNote: If Message Class is disabled, we expect opcode to also be disabled.",
+ "PublicDescription": "Matches on Transmit path of a UPI Port : Non-Coherent Bypass, Match Opcode : Matches on Transmit path of a UPI port. Match based on UMask specific bits: Z: Message Class (3-bit) Y: Message Class Enable W: Opcode (4-bit) V: Opcode Enable U: Local Enable T: Remote Enable S: Data Hdr Enable R: Non-Data Hdr Enable Q: Dual Slot Hdr Enable P: Single Slot Hdr Enable Link Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases. Note: If Message Class is disabled, we expect opcode to also be disabled.",
"UMask": "0x10e",
"Unit": "UPI"
},
@@ -5781,7 +5781,7 @@
"EventCode": "0x04",
"EventName": "UNC_UPI_TxL_BASIC_HDR_MATCH.NCS",
"PerPkg": "1",
- "PublicDescription": "Matches on Transmit path of a UPI Port : Non-Coherent Standard : Matches on Transmit path of a UPI port.\r\nMatch based on UMask specific bits:\r\nZ: Message Class (3-bit)\r\nY: Message Class Enable\r\nW: Opcode (4-bit)\r\nV: Opcode Enable\r\nU: Local Enable\r\nT: Remote Enable\r\nS: Data Hdr Enable\r\nR: Non-Data Hdr Enable\r\nQ: Dual Slot Hdr Enable\r\nP: Single Slot Hdr Enable\r\nLink Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases.\r\nNote: If Message Class is disabled, we expect opcode to also be disabled.",
+ "PublicDescription": "Matches on Transmit path of a UPI Port : Non-Coherent Standard : Matches on Transmit path of a UPI port. Match based on UMask specific bits: Z: Message Class (3-bit) Y: Message Class Enable W: Opcode (4-bit) V: Opcode Enable U: Local Enable T: Remote Enable S: Data Hdr Enable R: Non-Data Hdr Enable Q: Dual Slot Hdr Enable P: Single Slot Hdr Enable Link Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases. Note: If Message Class is disabled, we expect opcode to also be disabled.",
"UMask": "0xf",
"Unit": "UPI"
},
@@ -5790,7 +5790,7 @@
"EventCode": "0x04",
"EventName": "UNC_UPI_TxL_BASIC_HDR_MATCH.NCS_OPC",
"PerPkg": "1",
- "PublicDescription": "Matches on Transmit path of a UPI Port : Non-Coherent Standard, Match Opcode : Matches on Transmit path of a UPI port.\r\nMatch based on UMask specific bits:\r\nZ: Message Class (3-bit)\r\nY: Message Class Enable\r\nW: Opcode (4-bit)\r\nV: Opcode Enable\r\nU: Local Enable\r\nT: Remote Enable\r\nS: Data Hdr Enable\r\nR: Non-Data Hdr Enable\r\nQ: Dual Slot Hdr Enable\r\nP: Single Slot Hdr Enable\r\nLink Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases.\r\nNote: If Message Class is disabled, we expect opcode to also be disabled.",
+ "PublicDescription": "Matches on Transmit path of a UPI Port : Non-Coherent Standard, Match Opcode : Matches on Transmit path of a UPI port. Match based on UMask specific bits: Z: Message Class (3-bit) Y: Message Class Enable W: Opcode (4-bit) V: Opcode Enable U: Local Enable T: Remote Enable S: Data Hdr Enable R: Non-Data Hdr Enable Q: Dual Slot Hdr Enable P: Single Slot Hdr Enable Link Layer control types are excluded (LL CTRL, slot NULL, LLCRD) even under specific opcode match_en cases. Note: If Message Class is disabled, we expect opcode to also be disabled.",
"UMask": "0x10f",
"Unit": "UPI"
},
diff --git a/tools/perf/pmu-events/arch/x86/sierraforest/pipeline.json b/tools/perf/pmu-events/arch/x86/sierraforest/pipeline.json
index ba9843110f070d..90292dc03d33e2 100644
--- a/tools/perf/pmu-events/arch/x86/sierraforest/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/sierraforest/pipeline.json
@@ -249,10 +249,17 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts the number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear.",
+ "BriefDescription": "Counts the number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear. [This event is alias to TOPDOWN_BAD_SPECULATION.ALL_P]",
"EventCode": "0x73",
"EventName": "TOPDOWN_BAD_SPECULATION.ALL",
- "PublicDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear. Only issue slots wasted due to fast nukes such as memory ordering nukes are counted. Other nukes are not accounted for. Counts all issue slots blocked during this recovery window, including relevant microcode flows, and while uops are not yet available in the instruction queue (IQ) or until an FE_BOUND event occurs besides OTHER and CISC. Also includes the issue slots that were consumed by the backend but were thrown away because they were younger than the mispredict or machine clear.",
+ "PublicDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear. Only issue slots wasted due to fast nukes such as memory ordering nukes are counted. Other nukes are not accounted for. Counts all issue slots blocked during this recovery window, including relevant microcode flows, and while uops are not yet available in the instruction queue (IQ) or until an FE_BOUND event occurs besides OTHER and CISC. Also includes the issue slots that were consumed by the backend but were thrown away because they were younger than the mispredict or machine clear. [This event is alias to TOPDOWN_BAD_SPECULATION.ALL_P]",
+ "SampleAfterValue": "1000003"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear. [This event is alias to TOPDOWN_BAD_SPECULATION.ALL]",
+ "EventCode": "0x73",
+ "EventName": "TOPDOWN_BAD_SPECULATION.ALL_P",
+ "PublicDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear. Only issue slots wasted due to fast nukes such as memory ordering nukes are counted. Other nukes are not accounted for. Counts all issue slots blocked during this recovery window, including relevant microcode flows, and while uops are not yet available in the instruction queue (IQ) or until an FE_BOUND event occurs besides OTHER and CISC. Also includes the issue slots that were consumed by the backend but were thrown away because they were younger than the mispredict or machine clear. [This event is alias to TOPDOWN_BAD_SPECULATION.ALL]",
"SampleAfterValue": "1000003"
},
{
@@ -284,7 +291,7 @@
"UMask": "0x1"
},
{
- "BriefDescription": "Counts the number of retirement slots not consumed due to backend stalls",
+ "BriefDescription": "Counts the number of retirement slots not consumed due to backend stalls [This event is alias to TOPDOWN_BE_BOUND.ALL_P]",
"EventCode": "0x74",
"EventName": "TOPDOWN_BE_BOUND.ALL",
"SampleAfterValue": "1000003"
@@ -297,6 +304,12 @@
"UMask": "0x1"
},
{
+ "BriefDescription": "Counts the number of retirement slots not consumed due to backend stalls [This event is alias to TOPDOWN_BE_BOUND.ALL]",
+ "EventCode": "0x74",
+ "EventName": "TOPDOWN_BE_BOUND.ALL_P",
+ "SampleAfterValue": "1000003"
+ },
+ {
"BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to memory reservation stall (scheduler not being able to accept another uop). This could be caused by RSV full or load/store buffer block.",
"EventCode": "0x74",
"EventName": "TOPDOWN_BE_BOUND.MEM_SCHEDULER",
@@ -332,12 +345,18 @@
"UMask": "0x10"
},
{
- "BriefDescription": "Counts the number of retirement slots not consumed due to front end stalls",
+ "BriefDescription": "Counts the number of retirement slots not consumed due to front end stalls [This event is alias to TOPDOWN_FE_BOUND.ALL_P]",
"EventCode": "0x71",
"EventName": "TOPDOWN_FE_BOUND.ALL",
"SampleAfterValue": "1000003"
},
{
+ "BriefDescription": "Counts the number of retirement slots not consumed due to front end stalls [This event is alias to TOPDOWN_FE_BOUND.ALL]",
+ "EventCode": "0x71",
+ "EventName": "TOPDOWN_FE_BOUND.ALL_P",
+ "SampleAfterValue": "1000003"
+ },
+ {
"BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to BAClear",
"EventCode": "0x71",
"EventName": "TOPDOWN_FE_BOUND.BRANCH_DETECT",
@@ -409,13 +428,20 @@
"UMask": "0x4"
},
{
- "BriefDescription": "Counts the number of consumed retirement slots. Similar to UOPS_RETIRED.ALL",
+ "BriefDescription": "Counts the number of consumed retirement slots. Similar to UOPS_RETIRED.ALL [This event is alias to TOPDOWN_RETIRING.ALL_P]",
"EventCode": "0x72",
"EventName": "TOPDOWN_RETIRING.ALL",
"PEBS": "1",
"SampleAfterValue": "1000003"
},
{
+ "BriefDescription": "Counts the number of consumed retirement slots. Similar to UOPS_RETIRED.ALL [This event is alias to TOPDOWN_RETIRING.ALL]",
+ "EventCode": "0x72",
+ "EventName": "TOPDOWN_RETIRING.ALL_P",
+ "PEBS": "1",
+ "SampleAfterValue": "1000003"
+ },
+ {
"BriefDescription": "Counts the number of uops issued by the front end every cycle.",
"EventCode": "0x0e",
"EventName": "UOPS_ISSUED.ANY",
diff --git a/tools/perf/pmu-events/arch/x86/skylake/frontend.json b/tools/perf/pmu-events/arch/x86/skylake/frontend.json
index 095904c7700198..d6f543471b2436 100644
--- a/tools/perf/pmu-events/arch/x86/skylake/frontend.json
+++ b/tools/perf/pmu-events/arch/x86/skylake/frontend.json
@@ -19,7 +19,7 @@
"BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switches",
"EventCode": "0xAB",
"EventName": "DSB2MITE_SWITCHES.COUNT",
- "PublicDescription": "This event counts the number of the Decode Stream Buffer (DSB)-to-MITE switches including all misses because of missing Decode Stream Buffer (DSB) cache and u-arch forced misses.\nNote: Invoking MITE requires two or three cycles delay.",
+ "PublicDescription": "This event counts the number of the Decode Stream Buffer (DSB)-to-MITE switches including all misses because of missing Decode Stream Buffer (DSB) cache and u-arch forced misses. Note: Invoking MITE requires two or three cycles delay.",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -267,11 +267,11 @@
"UMask": "0x4"
},
{
- "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops [This event is alias to IDQ.DSB_CYCLES_OK]",
+ "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 or more Uops [This event is alias to IDQ.DSB_CYCLES_OK]",
"CounterMask": "4",
"EventCode": "0x79",
"EventName": "IDQ.ALL_DSB_CYCLES_4_UOPS",
- "PublicDescription": "Counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ. [This event is alias to IDQ.DSB_CYCLES_OK]",
+ "PublicDescription": "Counts the number of cycles 4 or more uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ. [This event is alias to IDQ.DSB_CYCLES_OK]",
"SampleAfterValue": "2000003",
"UMask": "0x18"
},
@@ -321,11 +321,11 @@
"UMask": "0x18"
},
{
- "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops [This event is alias to IDQ.ALL_DSB_CYCLES_4_UOPS]",
+ "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 or more Uops [This event is alias to IDQ.ALL_DSB_CYCLES_4_UOPS]",
"CounterMask": "4",
"EventCode": "0x79",
"EventName": "IDQ.DSB_CYCLES_OK",
- "PublicDescription": "Counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ. [This event is alias to IDQ.ALL_DSB_CYCLES_4_UOPS]",
+ "PublicDescription": "Counts the number of cycles 4 or more uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ. [This event is alias to IDQ.ALL_DSB_CYCLES_4_UOPS]",
"SampleAfterValue": "2000003",
"UMask": "0x18"
},
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/cache.json b/tools/perf/pmu-events/arch/x86/skylakex/cache.json
index d28d8822a51a81..14229f4b29d834 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/cache.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/cache.json
@@ -764,6 +764,15 @@
"UMask": "0x1"
},
{
+ "BriefDescription": "OFFCORE_RESPONSE.ALL_READS.L3_HIT.HIT_OTHER_CORE_FWD hit in the L3 and the snoop to one of the sibling cores hits the line in E/S/F state and the line is forwarded.",
+ "EventCode": "0xB7, 0xBB",
+ "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT.HIT_OTHER_CORE_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x8003C07F7",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
"BriefDescription": "Counts all demand & prefetch RFOs that have any response type.",
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.ALL_RFO.ANY_RESPONSE",
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/frontend.json b/tools/perf/pmu-events/arch/x86/skylakex/frontend.json
index 095904c7700198..d6f543471b2436 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/frontend.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/frontend.json
@@ -19,7 +19,7 @@
"BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switches",
"EventCode": "0xAB",
"EventName": "DSB2MITE_SWITCHES.COUNT",
- "PublicDescription": "This event counts the number of the Decode Stream Buffer (DSB)-to-MITE switches including all misses because of missing Decode Stream Buffer (DSB) cache and u-arch forced misses.\nNote: Invoking MITE requires two or three cycles delay.",
+ "PublicDescription": "This event counts the number of the Decode Stream Buffer (DSB)-to-MITE switches including all misses because of missing Decode Stream Buffer (DSB) cache and u-arch forced misses. Note: Invoking MITE requires two or three cycles delay.",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -267,11 +267,11 @@
"UMask": "0x4"
},
{
- "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops [This event is alias to IDQ.DSB_CYCLES_OK]",
+ "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 or more Uops [This event is alias to IDQ.DSB_CYCLES_OK]",
"CounterMask": "4",
"EventCode": "0x79",
"EventName": "IDQ.ALL_DSB_CYCLES_4_UOPS",
- "PublicDescription": "Counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ. [This event is alias to IDQ.DSB_CYCLES_OK]",
+ "PublicDescription": "Counts the number of cycles 4 or more uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ. [This event is alias to IDQ.DSB_CYCLES_OK]",
"SampleAfterValue": "2000003",
"UMask": "0x18"
},
@@ -321,11 +321,11 @@
"UMask": "0x18"
},
{
- "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops [This event is alias to IDQ.ALL_DSB_CYCLES_4_UOPS]",
+ "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 or more Uops [This event is alias to IDQ.ALL_DSB_CYCLES_4_UOPS]",
"CounterMask": "4",
"EventCode": "0x79",
"EventName": "IDQ.DSB_CYCLES_OK",
- "PublicDescription": "Counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ. [This event is alias to IDQ.ALL_DSB_CYCLES_4_UOPS]",
+ "PublicDescription": "Counts the number of cycles 4 or more uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ. [This event is alias to IDQ.ALL_DSB_CYCLES_4_UOPS]",
"SampleAfterValue": "2000003",
"UMask": "0x18"
},
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/memory.json b/tools/perf/pmu-events/arch/x86/skylakex/memory.json
index 2b797dbc75fe04..dba3cd6b3690e5 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/memory.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/memory.json
@@ -864,7 +864,7 @@
"BriefDescription": "Number of times an RTM execution aborted due to any reasons (multiple categories may count as one).",
"EventCode": "0xC9",
"EventName": "RTM_RETIRED.ABORTED",
- "PEBS": "1",
+ "PEBS": "2",
"PublicDescription": "Number of times RTM abort was triggered.",
"SampleAfterValue": "2000003",
"UMask": "0x4"
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/other.json b/tools/perf/pmu-events/arch/x86/skylakex/other.json
index cda8a7a45f0c04..2511d722327a12 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/other.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/other.json
@@ -19,7 +19,7 @@
"BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the AVX512 turbo schedule.",
"EventCode": "0x28",
"EventName": "CORE_POWER.LVL2_TURBO_LICENSE",
- "PublicDescription": "Core cycles where the core was running with power-delivery for license level 2 (introduced in Skylake Server michroarchtecture). This includes high current AVX 512-bit instructions.",
+ "PublicDescription": "Core cycles where the core was running with power-delivery for license level 2 (introduced in Skylake Server microarchitecture). This includes high current AVX 512-bit instructions.",
"SampleAfterValue": "200003",
"UMask": "0x20"
},
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json b/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json
index 66d686cc933e35..c50ddf5b40dd09 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json
@@ -396,7 +396,7 @@
"Errata": "SKL091, SKL044",
"EventCode": "0xC0",
"EventName": "INST_RETIRED.NOP",
- "PEBS": "1",
+ "PEBS": "2",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
index 025e836a1c80dc..8126f952a30c85 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
@@ -652,23 +652,20 @@
{
"BriefDescription": "Probability of Core Bound bottleneck hidden by SMT-profiling artifacts",
"MetricExpr": "(100 * (1 - tma_core_bound / (((EXE_ACTIVITY.EXE_BOUND_0_PORTS + tma_core_bound * RS_EVENTS.EMPTY_CYCLES) / CPU_CLK_UNHALTED.THREAD * (CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY) / CPU_CLK_UNHALTED.THREAD * CPU_CLK_UNHALTED.THREAD + (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL)) / CPU_CLK_UNHALTED.THREAD if ARITH.DIVIDER_ACTIVE < CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY else (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL) / CPU_CLK_UNHALTED.THREAD) if tma_core_bound < (((EXE_ACTIVITY.EXE_BOUND_0_PORTS + tma_core_bound * RS_EVENTS.EMPTY_CYCLES) / CPU_CLK_UNHALTED.THREAD * (CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY) / CPU_CLK_UNHALTED.THREAD * CPU_CLK_UNHALTED.THREAD + (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL)) / CPU_CLK_UNHALTED.THREAD if ARITH.DIVIDER_ACTIVE < CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY else (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL) / CPU_CLK_UNHALTED.THREAD) else 1) if tma_info_system_smt_2t_utilization > 0.5 else 0)",
- "MetricGroup": "Cor;SMT;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_botlnk_core_bound_likely",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Cor;SMT",
+ "MetricName": "tma_info_botlnk_core_bound_likely"
},
{
"BriefDescription": "Total pipeline cost of DSB (uop cache) misses - subset of the Instruction_Fetch_BW Bottleneck.",
"MetricExpr": "100 * (100 * (tma_fetch_latency * (DSB2MITE_SWITCHES.PENALTY_CYCLES / CPU_CLK_UNHALTED.THREAD) / ((ICACHE_16B.IFDATA_STALL + 2 * cpu@ICACHE_16B.IFDATA_STALL\\,cmask\\=0x1\\,edge\\=0x1@) / CPU_CLK_UNHALTED.THREAD + ICACHE_TAG.STALLS / CPU_CLK_UNHALTED.THREAD + (INT_MISC.CLEAR_RESTEER_CYCLES / CPU_CLK_UNHALTED.THREAD + 9 * BACLEARS.ANY / CPU_CLK_UNHALTED.THREAD) + min(2 * IDQ.MS_SWITCHES / CPU_CLK_UNHALTED.THREAD, 1) + DECODE.LCP / CPU_CLK_UNHALTED.THREAD + DSB2MITE_SWITCHES.PENALTY_CYCLES / CPU_CLK_UNHALTED.THREAD) + tma_fetch_bandwidth * tma_mite / (tma_mite + tma_dsb)))",
- "MetricGroup": "DSBmiss;Fed;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_botlnk_dsb_misses",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "DSBmiss;Fed",
+ "MetricName": "tma_info_botlnk_dsb_misses"
},
{
"BriefDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck.",
"MetricExpr": "100 * (100 * (tma_fetch_latency * ((ICACHE_16B.IFDATA_STALL + 2 * cpu@ICACHE_16B.IFDATA_STALL\\,cmask\\=0x1\\,edge\\=0x1@) / CPU_CLK_UNHALTED.THREAD) / ((ICACHE_16B.IFDATA_STALL + 2 * cpu@ICACHE_16B.IFDATA_STALL\\,cmask\\=0x1\\,edge\\=0x1@) / CPU_CLK_UNHALTED.THREAD + ICACHE_TAG.STALLS / CPU_CLK_UNHALTED.THREAD + (INT_MISC.CLEAR_RESTEER_CYCLES / CPU_CLK_UNHALTED.THREAD + 9 * BACLEARS.ANY / CPU_CLK_UNHALTED.THREAD) + min(2 * IDQ.MS_SWITCHES / CPU_CLK_UNHALTED.THREAD, 1) + DECODE.LCP / CPU_CLK_UNHALTED.THREAD + DSB2MITE_SWITCHES.PENALTY_CYCLES / CPU_CLK_UNHALTED.THREAD)))",
- "MetricGroup": "Fed;FetchLat;IcMiss;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_botlnk_ic_misses",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Fed;FetchLat;IcMiss",
+ "MetricName": "tma_info_botlnk_ic_misses"
},
{
"BriefDescription": "Probability of Core Bound bottleneck hidden by SMT-profiling artifacts",
@@ -1021,9 +1018,8 @@
{
"BriefDescription": "STLB (2nd level TLB) code speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
"MetricExpr": "tma_info_memory_tlb_code_stlb_mpki",
- "MetricGroup": "Fed;MemoryTLB;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_code_stlb_mpki",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Fed;MemoryTLB",
+ "MetricName": "tma_info_memory_code_stlb_mpki"
},
{
"BriefDescription": "Average per-core data fill bandwidth to the L1 data cache [GB / sec]",
@@ -1064,9 +1060,8 @@
{
"BriefDescription": "Average Parallel L2 cache miss data reads",
"MetricExpr": "tma_info_memory_latency_data_l2_mlp",
- "MetricGroup": "Memory_BW;Offcore;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_data_l2_mlp",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Memory_BW;Offcore",
+ "MetricName": "tma_info_memory_data_l2_mlp"
},
{
"BriefDescription": "Fill Buffer (FB) hits per kilo instructions for retired demand loads (L1D misses that merge into ongoing miss-handling entries)",
@@ -1083,9 +1078,8 @@
{
"BriefDescription": "Average per-core data fill bandwidth to the L1 data cache [GB / sec]",
"MetricExpr": "64 * L1D.REPLACEMENT / 1e9 / (duration_time * 1e3 / 1e3)",
- "MetricGroup": "Mem;MemoryBW;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_l1d_cache_fill_bw_2t",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_l1d_cache_fill_bw_2t"
},
{
"BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
@@ -1108,23 +1102,20 @@
{
"BriefDescription": "Average per-core data fill bandwidth to the L2 cache [GB / sec]",
"MetricExpr": "64 * L2_LINES_IN.ALL / 1e9 / (duration_time * 1e3 / 1e3)",
- "MetricGroup": "Mem;MemoryBW;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_l2_cache_fill_bw_2t",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_l2_cache_fill_bw_2t"
},
{
"BriefDescription": "Rate of non silent evictions from the L2 cache per Kilo instruction",
"MetricExpr": "1e3 * L2_LINES_OUT.NON_SILENT / INST_RETIRED.ANY",
- "MetricGroup": "L2Evicts;Mem;Server;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_l2_evictions_nonsilent_pki",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "L2Evicts;Mem;Server",
+ "MetricName": "tma_info_memory_l2_evictions_nonsilent_pki"
},
{
"BriefDescription": "Rate of silent evictions from the L2 cache per Kilo instruction where the evicted lines are dropped (no writeback to L3 or memory)",
"MetricExpr": "1e3 * L2_LINES_OUT.SILENT / INST_RETIRED.ANY",
- "MetricGroup": "L2Evicts;Mem;Server;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_l2_evictions_silent_pki",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "L2Evicts;Mem;Server",
+ "MetricName": "tma_info_memory_l2_evictions_silent_pki"
},
{
"BriefDescription": "L2 cache hits per kilo instruction for all request types (including speculative)",
@@ -1165,9 +1156,8 @@
{
"BriefDescription": "Average per-core data access bandwidth to the L3 cache [GB / sec]",
"MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1e9 / (duration_time * 1e3 / 1e3)",
- "MetricGroup": "Mem;MemoryBW;Offcore;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_l3_cache_access_bw_2t",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Mem;MemoryBW;Offcore",
+ "MetricName": "tma_info_memory_l3_cache_access_bw_2t"
},
{
"BriefDescription": "",
@@ -1178,9 +1168,8 @@
{
"BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
"MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / (duration_time * 1e3 / 1e3)",
- "MetricGroup": "Mem;MemoryBW;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_l3_cache_fill_bw_2t",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Mem;MemoryBW",
+ "MetricName": "tma_info_memory_l3_cache_fill_bw_2t"
},
{
"BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
@@ -1209,16 +1198,14 @@
{
"BriefDescription": "Average Latency for L2 cache miss demand Loads",
"MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS.DEMAND_DATA_RD",
- "MetricGroup": "Memory_Lat;Offcore;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_load_l2_miss_latency",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Memory_Lat;Offcore",
+ "MetricName": "tma_info_memory_load_l2_miss_latency"
},
{
"BriefDescription": "Average Parallel L2 cache miss demand Loads",
"MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
- "MetricGroup": "Memory_BW;Offcore;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_load_l2_mlp",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Memory_BW;Offcore",
+ "MetricName": "tma_info_memory_load_l2_mlp"
},
{
"BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
@@ -1229,9 +1216,8 @@
{
"BriefDescription": "STLB (2nd level TLB) data load speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
"MetricExpr": "tma_info_memory_tlb_load_stlb_mpki",
- "MetricGroup": "Mem;MemoryTLB;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_load_stlb_mpki",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Mem;MemoryTLB",
+ "MetricName": "tma_info_memory_load_stlb_mpki"
},
{
"BriefDescription": "Un-cacheable retired load per kilo instruction",
@@ -1249,16 +1235,14 @@
{
"BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
"MetricExpr": "tma_info_memory_tlb_page_walks_utilization",
- "MetricGroup": "Mem;MemoryTLB;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_page_walks_utilization",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Mem;MemoryTLB",
+ "MetricName": "tma_info_memory_page_walks_utilization"
},
{
"BriefDescription": "STLB (2nd level TLB) data store speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
"MetricExpr": "tma_info_memory_tlb_store_stlb_mpki",
- "MetricGroup": "Mem;MemoryTLB;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_store_stlb_mpki",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Mem;MemoryTLB",
+ "MetricName": "tma_info_memory_store_stlb_mpki"
},
{
"BriefDescription": "STLB (2nd level TLB) code speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
@@ -1289,9 +1273,8 @@
{
"BriefDescription": "Un-cacheable retired load per kilo instruction",
"MetricExpr": "1e3 * MEM_LOAD_MISC_RETIRED.UC / INST_RETIRED.ANY",
- "MetricGroup": "Mem;TopdownL1;tma_L1_group",
- "MetricName": "tma_info_memory_uc_load_pki",
- "MetricgroupNoGroup": "TopdownL1"
+ "MetricGroup": "Mem",
+ "MetricName": "tma_info_memory_uc_load_pki"
},
{
"BriefDescription": "",
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/uncore-interconnect.json b/tools/perf/pmu-events/arch/x86/skylakex/uncore-interconnect.json
index 3eece8a728b56b..f32d4d9d283a3f 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/uncore-interconnect.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/uncore-interconnect.json
@@ -38,7 +38,7 @@
"EventCode": "0x10",
"EventName": "UNC_I_COHERENT_OPS.CLFLUSH",
"PerPkg": "1",
- "PublicDescription": "Counts the number of coherency related operations servied by the IRP",
+ "PublicDescription": "Counts the number of coherency related operations serviced by the IRP",
"UMask": "0x80",
"Unit": "IRP"
},
@@ -47,7 +47,7 @@
"EventCode": "0x10",
"EventName": "UNC_I_COHERENT_OPS.CRD",
"PerPkg": "1",
- "PublicDescription": "Counts the number of coherency related operations servied by the IRP",
+ "PublicDescription": "Counts the number of coherency related operations serviced by the IRP",
"UMask": "0x2",
"Unit": "IRP"
},
@@ -56,7 +56,7 @@
"EventCode": "0x10",
"EventName": "UNC_I_COHERENT_OPS.DRD",
"PerPkg": "1",
- "PublicDescription": "Counts the number of coherency related operations servied by the IRP",
+ "PublicDescription": "Counts the number of coherency related operations serviced by the IRP",
"UMask": "0x4",
"Unit": "IRP"
},
@@ -65,7 +65,7 @@
"EventCode": "0x10",
"EventName": "UNC_I_COHERENT_OPS.PCIDCAHINT",
"PerPkg": "1",
- "PublicDescription": "Counts the number of coherency related operations servied by the IRP",
+ "PublicDescription": "Counts the number of coherency related operations serviced by the IRP",
"UMask": "0x20",
"Unit": "IRP"
},
@@ -74,7 +74,7 @@
"EventCode": "0x10",
"EventName": "UNC_I_COHERENT_OPS.PCIRDCUR",
"PerPkg": "1",
- "PublicDescription": "Counts the number of coherency related operations servied by the IRP",
+ "PublicDescription": "Counts the number of coherency related operations serviced by the IRP",
"UMask": "0x1",
"Unit": "IRP"
},
@@ -101,7 +101,7 @@
"EventCode": "0x10",
"EventName": "UNC_I_COHERENT_OPS.WBMTOI",
"PerPkg": "1",
- "PublicDescription": "Counts the number of coherency related operations servied by the IRP",
+ "PublicDescription": "Counts the number of coherency related operations serviced by the IRP",
"UMask": "0x40",
"Unit": "IRP"
},
@@ -500,7 +500,7 @@
"EventCode": "0x11",
"EventName": "UNC_I_TRANSACTIONS.WRITES",
"PerPkg": "1",
- "PublicDescription": "Counts the number of Inbound transactions from the IRP to the Uncore. This can be filtered based on request type in addition to the source queue. Note the special filtering equation. We do OR-reduction on the request type. If the SOURCE bit is set, then we also do AND qualification based on the source portID.; Trackes only write requests. Each write request should have a prefetch, so there is no need to explicitly track these requests. For writes that are tickled and have to retry, the counter will be incremented for each retry.",
+ "PublicDescription": "Counts the number of Inbound transactions from the IRP to the Uncore. This can be filtered based on request type in addition to the source queue. Note the special filtering equation. We do OR-reduction on the request type. If the SOURCE bit is set, then we also do AND qualification based on the source portID.; Tracks only write requests. Each write request should have a prefetch, so there is no need to explicitly track these requests. For writes that are tickled and have to retry, the counter will be incremented for each retry.",
"UMask": "0x2",
"Unit": "IRP"
},
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/uncore-io.json b/tools/perf/pmu-events/arch/x86/skylakex/uncore-io.json
index 2a3a709018bb6b..743c91f3d2f0b7 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/uncore-io.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/uncore-io.json
@@ -34,7 +34,7 @@
"EventCode": "0x1",
"EventName": "UNC_IIO_CLOCKTICKS",
"PerPkg": "1",
- "PublicDescription": "Counts clockticks of the 1GHz trafiic controller clock in the IIO unit.",
+ "PublicDescription": "Counts clockticks of the 1GHz traffic controller clock in the IIO unit.",
"Unit": "IIO"
},
{
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/virtual-memory.json b/tools/perf/pmu-events/arch/x86/skylakex/virtual-memory.json
index f59405877ae8bc..73feadaf767406 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/virtual-memory.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/virtual-memory.json
@@ -205,7 +205,7 @@
"BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for an instruction fetch request. EPT page walk duration are excluded in Skylake.",
"EventCode": "0x85",
"EventName": "ITLB_MISSES.WALK_PENDING",
- "PublicDescription": "Counts 1 per cycle for each PMH (Page Miss Handler) that is busy with a page walk for an instruction fetch request. EPT page walk duration are excluded in Skylake michroarchitecture.",
+ "PublicDescription": "Counts 1 per cycle for each PMH (Page Miss Handler) that is busy with a page walk for an instruction fetch request. EPT page walk duration are excluded in Skylake microarchitecture.",
"SampleAfterValue": "100003",
"UMask": "0x10"
},
diff --git a/tools/perf/pmu-events/arch/x86/snowridgex/uncore-cache.json b/tools/perf/pmu-events/arch/x86/snowridgex/uncore-cache.json
index a68a5bb05c22b2..4090e4da1bd01e 100644
--- a/tools/perf/pmu-events/arch/x86/snowridgex/uncore-cache.json
+++ b/tools/perf/pmu-events/arch/x86/snowridgex/uncore-cache.json
@@ -1444,7 +1444,7 @@
"Unit": "CHA"
},
{
- "BriefDescription": "This event is deprecated. Refer to new event UNC_CHA_LLC_LOOKUP.DATA_READ_LOCAL",
+ "BriefDescription": "This event is deprecated.",
"Deprecated": "1",
"EventCode": "0x34",
"EventName": "UNC_CHA_LLC_LOOKUP.DMND_READ_LOCAL",
@@ -1638,7 +1638,7 @@
"Unit": "CHA"
},
{
- "BriefDescription": "This event is deprecated. Refer to new event UNC_CHA_LLC_LOOKUP.RFO_LOCAL",
+ "BriefDescription": "This event is deprecated.",
"Deprecated": "1",
"EventCode": "0x34",
"EventName": "UNC_CHA_LLC_LOOKUP.RFO_PREF_LOCAL",
diff --git a/tools/perf/pmu-events/arch/x86/snowridgex/uncore-interconnect.json b/tools/perf/pmu-events/arch/x86/snowridgex/uncore-interconnect.json
index 7e2895f7fe3d4c..7cc3635b118b4e 100644
--- a/tools/perf/pmu-events/arch/x86/snowridgex/uncore-interconnect.json
+++ b/tools/perf/pmu-events/arch/x86/snowridgex/uncore-interconnect.json
@@ -38,7 +38,7 @@
"EventCode": "0x10",
"EventName": "UNC_I_COHERENT_OPS.CLFLUSH",
"PerPkg": "1",
- "PublicDescription": "Coherent Ops : CLFlush : Counts the number of coherency related operations servied by the IRP",
+ "PublicDescription": "Coherent Ops : CLFlush : Counts the number of coherency related operations serviced by the IRP",
"UMask": "0x80",
"Unit": "IRP"
},
@@ -65,7 +65,7 @@
"EventCode": "0x10",
"EventName": "UNC_I_COHERENT_OPS.WBMTOI",
"PerPkg": "1",
- "PublicDescription": "Coherent Ops : WbMtoI : Counts the number of coherency related operations servied by the IRP",
+ "PublicDescription": "Coherent Ops : WbMtoI : Counts the number of coherency related operations serviced by the IRP",
"UMask": "0x40",
"Unit": "IRP"
},
@@ -454,7 +454,7 @@
"EventCode": "0x11",
"EventName": "UNC_I_TRANSACTIONS.WRITES",
"PerPkg": "1",
- "PublicDescription": "Inbound Transaction Count : Writes : Counts the number of Inbound transactions from the IRP to the Uncore. This can be filtered based on request type in addition to the source queue. Note the special filtering equation. We do OR-reduction on the request type. If the SOURCE bit is set, then we also do AND qualification based on the source portID. : Trackes only write requests. Each write request should have a prefetch, so there is no need to explicitly track these requests. For writes that are tickled and have to retry, the counter will be incremented for each retry.",
+ "PublicDescription": "Inbound Transaction Count : Writes : Counts the number of Inbound transactions from the IRP to the Uncore. This can be filtered based on request type in addition to the source queue. Note the special filtering equation. We do OR-reduction on the request type. If the SOURCE bit is set, then we also do AND qualification based on the source portID. : Tracks only write requests. Each write request should have a prefetch, so there is no need to explicitly track these requests. For writes that are tickled and have to retry, the counter will be incremented for each retry.",
"UMask": "0x2",
"Unit": "IRP"
},
diff --git a/tools/perf/pmu-events/arch/x86/snowridgex/uncore-io.json b/tools/perf/pmu-events/arch/x86/snowridgex/uncore-io.json
index ecdd6f0f8e8f6e..de156e499f5661 100644
--- a/tools/perf/pmu-events/arch/x86/snowridgex/uncore-io.json
+++ b/tools/perf/pmu-events/arch/x86/snowridgex/uncore-io.json
@@ -2506,17 +2506,6 @@
"Unit": "IIO"
},
{
- "BriefDescription": "Number requests sent to PCIe from main die : From IRP",
- "EventCode": "0xC2",
- "EventName": "UNC_IIO_NUM_REQ_FROM_CPU.IRP",
- "FCMask": "0x07",
- "PerPkg": "1",
- "PortMask": "0xFF",
- "PublicDescription": "Number requests sent to PCIe from main die : From IRP : Captures Posted/Non-posted allocations from IRP. i.e. either non-confined P2P traffic or from the CPU",
- "UMask": "0x1",
- "Unit": "IIO"
- },
- {
"BriefDescription": "Number requests sent to PCIe from main die : From ITC",
"EventCode": "0xC2",
"EventName": "UNC_IIO_NUM_REQ_FROM_CPU.ITC",
diff --git a/tools/perf/scripts/python/parallel-perf.py b/tools/perf/scripts/python/parallel-perf.py
new file mode 100755
index 00000000000000..21f32ec5ed46df
--- /dev/null
+++ b/tools/perf/scripts/python/parallel-perf.py
@@ -0,0 +1,988 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+#
+# Run a perf script command multiple times in parallel, using perf script
+# options --cpu and --time so that each job processes a different chunk
+# of the data.
+#
+# Copyright (c) 2024, Intel Corporation.
+
+import subprocess
+import argparse
+import pathlib
+import shlex
+import time
+import copy
+import sys
+import os
+import re
+
+glb_prog_name = "parallel-perf.py"
+glb_min_interval = 10.0
+glb_min_samples = 64
+
+class Verbosity():
+
+ def __init__(self, quiet=False, verbose=False, debug=False):
+ self.normal = True
+ self.verbose = verbose
+ self.debug = debug
+ self.self_test = True
+ if self.debug:
+ self.verbose = True
+ if self.verbose:
+ quiet = False
+ if quiet:
+ self.normal = False
+
+# Manage work (Start/Wait/Kill), as represented by a subprocess.Popen command
+class Work():
+
+ def __init__(self, cmd, pipe_to, output_dir="."):
+ self.popen = None
+ self.consumer = None
+ self.cmd = cmd
+ self.pipe_to = pipe_to
+ self.output_dir = output_dir
+ self.cmdout_name = f"{output_dir}/cmd.txt"
+ self.stdout_name = f"{output_dir}/out.txt"
+ self.stderr_name = f"{output_dir}/err.txt"
+
+ def Command(self):
+ sh_cmd = [ shlex.quote(x) for x in self.cmd ]
+ return " ".join(self.cmd)
+
+ def Stdout(self):
+ return open(self.stdout_name, "w")
+
+ def Stderr(self):
+ return open(self.stderr_name, "w")
+
+ def CreateOutputDir(self):
+ pathlib.Path(self.output_dir).mkdir(parents=True, exist_ok=True)
+
+ def Start(self):
+ if self.popen:
+ return
+ self.CreateOutputDir()
+ with open(self.cmdout_name, "w") as f:
+ f.write(self.Command())
+ f.write("\n")
+ stdout = self.Stdout()
+ stderr = self.Stderr()
+ if self.pipe_to:
+ self.popen = subprocess.Popen(self.cmd, stdout=subprocess.PIPE, stderr=stderr)
+ args = shlex.split(self.pipe_to)
+ self.consumer = subprocess.Popen(args, stdin=self.popen.stdout, stdout=stdout, stderr=stderr)
+ else:
+ self.popen = subprocess.Popen(self.cmd, stdout=stdout, stderr=stderr)
+
+ def RemoveEmptyErrFile(self):
+ if os.path.exists(self.stderr_name):
+ if os.path.getsize(self.stderr_name) == 0:
+ os.unlink(self.stderr_name)
+
+ def Errors(self):
+ if os.path.exists(self.stderr_name):
+ if os.path.getsize(self.stderr_name) != 0:
+ return [ f"Non-empty error file {self.stderr_name}" ]
+ return []
+
+ def TidyUp(self):
+ self.RemoveEmptyErrFile()
+
+ def RawPollWait(self, p, wait):
+ if wait:
+ return p.wait()
+ return p.poll()
+
+ def Poll(self, wait=False):
+ if not self.popen:
+ return None
+ result = self.RawPollWait(self.popen, wait)
+ if self.consumer:
+ res = result
+ result = self.RawPollWait(self.consumer, wait)
+ if result != None and res == None:
+ self.popen.kill()
+ result = None
+ elif result == 0 and res != None and res != 0:
+ result = res
+ if result != None:
+ self.TidyUp()
+ return result
+
+ def Wait(self):
+ return self.Poll(wait=True)
+
+ def Kill(self):
+ if not self.popen:
+ return
+ self.popen.kill()
+ if self.consumer:
+ self.consumer.kill()
+
+def KillWork(worklist, verbosity):
+ for w in worklist:
+ w.Kill()
+ for w in worklist:
+ w.Wait()
+
+def NumberOfCPUs():
+ return os.sysconf("SC_NPROCESSORS_ONLN")
+
+def NanoSecsToSecsStr(x):
+ if x == None:
+ return ""
+ x = str(x)
+ if len(x) < 10:
+ x = "0" * (10 - len(x)) + x
+ return x[:len(x) - 9] + "." + x[-9:]
+
+def InsertOptionAfter(cmd, option, after):
+ try:
+ pos = cmd.index(after)
+ cmd.insert(pos + 1, option)
+ except:
+ cmd.append(option)
+
+def CreateWorkList(cmd, pipe_to, output_dir, cpus, time_ranges_by_cpu):
+ max_len = len(str(cpus[-1]))
+ cpu_dir_fmt = f"cpu-%.{max_len}u"
+ worklist = []
+ pos = 0
+ for cpu in cpus:
+ if cpu >= 0:
+ cpu_dir = os.path.join(output_dir, cpu_dir_fmt % cpu)
+ cpu_option = f"--cpu={cpu}"
+ else:
+ cpu_dir = output_dir
+ cpu_option = None
+
+ tr_dir_fmt = "time-range"
+
+ if len(time_ranges_by_cpu) > 1:
+ time_ranges = time_ranges_by_cpu[pos]
+ tr_dir_fmt += f"-{pos}"
+ pos += 1
+ else:
+ time_ranges = time_ranges_by_cpu[0]
+
+ max_len = len(str(len(time_ranges)))
+ tr_dir_fmt += f"-%.{max_len}u"
+
+ i = 0
+ for r in time_ranges:
+ if r == [None, None]:
+ time_option = None
+ work_output_dir = cpu_dir
+ else:
+ time_option = "--time=" + NanoSecsToSecsStr(r[0]) + "," + NanoSecsToSecsStr(r[1])
+ work_output_dir = os.path.join(cpu_dir, tr_dir_fmt % i)
+ i += 1
+ work_cmd = list(cmd)
+ if time_option != None:
+ InsertOptionAfter(work_cmd, time_option, "script")
+ if cpu_option != None:
+ InsertOptionAfter(work_cmd, cpu_option, "script")
+ w = Work(work_cmd, pipe_to, work_output_dir)
+ worklist.append(w)
+ return worklist
+
+def DoRunWork(worklist, nr_jobs, verbosity):
+ nr_to_do = len(worklist)
+ not_started = list(worklist)
+ running = []
+ done = []
+ chg = False
+ while True:
+ nr_done = len(done)
+ if chg and verbosity.normal:
+ nr_run = len(running)
+ print(f"\rThere are {nr_to_do} jobs: {nr_done} completed, {nr_run} running", flush=True, end=" ")
+ if verbosity.verbose:
+ print()
+ chg = False
+ if nr_done == nr_to_do:
+ break
+ while len(running) < nr_jobs and len(not_started):
+ w = not_started.pop(0)
+ running.append(w)
+ if verbosity.verbose:
+ print("Starting:", w.Command())
+ w.Start()
+ chg = True
+ if len(running):
+ time.sleep(0.1)
+ finished = []
+ not_finished = []
+ while len(running):
+ w = running.pop(0)
+ r = w.Poll()
+ if r == None:
+ not_finished.append(w)
+ continue
+ if r == 0:
+ if verbosity.verbose:
+ print("Finished:", w.Command())
+ finished.append(w)
+ chg = True
+ continue
+ if verbosity.normal and not verbosity.verbose:
+ print()
+ print("Job failed!\n return code:", r, "\n command: ", w.Command())
+ if w.pipe_to:
+ print(" piped to: ", w.pipe_to)
+ print("Killing outstanding jobs")
+ KillWork(not_finished, verbosity)
+ KillWork(running, verbosity)
+ return False
+ running = not_finished
+ done += finished
+ errorlist = []
+ for w in worklist:
+ errorlist += w.Errors()
+ if len(errorlist):
+ print("Errors:")
+ for e in errorlist:
+ print(e)
+ elif verbosity.normal:
+ print("\r"," "*50, "\rAll jobs finished successfully", flush=True)
+ return True
+
+def RunWork(worklist, nr_jobs=NumberOfCPUs(), verbosity=Verbosity()):
+ try:
+ return DoRunWork(worklist, nr_jobs, verbosity)
+ except:
+ for w in worklist:
+ w.Kill()
+ raise
+ return True
+
+def ReadHeader(perf, file_name):
+ return subprocess.Popen([perf, "script", "--header-only", "--input", file_name], stdout=subprocess.PIPE).stdout.read().decode("utf-8")
+
+def ParseHeader(hdr):
+ result = {}
+ lines = hdr.split("\n")
+ for line in lines:
+ if ":" in line and line[0] == "#":
+ pos = line.index(":")
+ name = line[1:pos-1].strip()
+ value = line[pos+1:].strip()
+ if name in result:
+ orig_name = name
+ nr = 2
+ while True:
+ name = f"{orig_name} {nr}"
+ if name not in result:
+ break
+ nr += 1
+ result[name] = value
+ return result
+
+def HeaderField(hdr_dict, hdr_fld):
+ if hdr_fld not in hdr_dict:
+ raise Exception(f"'{hdr_fld}' missing from header information")
+ return hdr_dict[hdr_fld]
+
+# Represent the position of an option within a command string
+# and provide the option value and/or remove the option
+class OptPos():
+
+ def Init(self, opt_element=-1, value_element=-1, opt_pos=-1, value_pos=-1, error=None):
+ self.opt_element = opt_element # list element that contains option
+ self.value_element = value_element # list element that contains option value
+ self.opt_pos = opt_pos # string position of option
+ self.value_pos = value_pos # string position of value
+ self.error = error # error message string
+
+ def __init__(self, args, short_name, long_name, default=None):
+ self.args = list(args)
+ self.default = default
+ n = 2 + len(long_name)
+ m = len(short_name)
+ pos = -1
+ for opt in args:
+ pos += 1
+ if m and opt[:2] == f"-{short_name}":
+ if len(opt) == 2:
+ if pos + 1 < len(args):
+ self.Init(pos, pos + 1, 0, 0)
+ else:
+ self.Init(error = f"-{short_name} option missing value")
+ else:
+ self.Init(pos, pos, 0, 2)
+ return
+ if opt[:n] == f"--{long_name}":
+ if len(opt) == n:
+ if pos + 1 < len(args):
+ self.Init(pos, pos + 1, 0, 0)
+ else:
+ self.Init(error = f"--{long_name} option missing value")
+ elif opt[n] == "=":
+ self.Init(pos, pos, 0, n + 1)
+ else:
+ self.Init(error = f"--{long_name} option expected '='")
+ return
+ if m and opt[:1] == "-" and opt[:2] != "--" and short_name in opt:
+ ipos = opt.index(short_name)
+ if "-" in opt[1:]:
+ hpos = opt[1:].index("-")
+ if hpos < ipos:
+ continue
+ if ipos + 1 == len(opt):
+ if pos + 1 < len(args):
+ self.Init(pos, pos + 1, ipos, 0)
+ else:
+ self.Init(error = f"-{short_name} option missing value")
+ else:
+ self.Init(pos, pos, ipos, ipos + 1)
+ return
+ self.Init()
+
+ def Value(self):
+ if self.opt_element >= 0:
+ if self.opt_element != self.value_element:
+ return self.args[self.value_element]
+ else:
+ return self.args[self.value_element][self.value_pos:]
+ return self.default
+
+ def Remove(self, args):
+ if self.opt_element == -1:
+ return
+ if self.opt_element != self.value_element:
+ del args[self.value_element]
+ if self.opt_pos:
+ args[self.opt_element] = args[self.opt_element][:self.opt_pos]
+ else:
+ del args[self.opt_element]
+
+def DetermineInputFileName(cmd):
+ p = OptPos(cmd, "i", "input", "perf.data")
+ if p.error:
+ raise Exception(f"perf command {p.error}")
+ file_name = p.Value()
+ if not os.path.exists(file_name):
+ raise Exception(f"perf command input file '{file_name}' not found")
+ return file_name
+
+def ReadOption(args, short_name, long_name, err_prefix, remove=False):
+ p = OptPos(args, short_name, long_name)
+ if p.error:
+ raise Exception(f"{err_prefix}{p.error}")
+ value = p.Value()
+ if remove:
+ p.Remove(args)
+ return value
+
+def ExtractOption(args, short_name, long_name, err_prefix):
+ return ReadOption(args, short_name, long_name, err_prefix, True)
+
+def ReadPerfOption(args, short_name, long_name):
+ return ReadOption(args, short_name, long_name, "perf command ")
+
+def ExtractPerfOption(args, short_name, long_name):
+ return ExtractOption(args, short_name, long_name, "perf command ")
+
+def PerfDoubleQuickCommands(cmd, file_name):
+ cpu_str = ReadPerfOption(cmd, "C", "cpu")
+ time_str = ReadPerfOption(cmd, "", "time")
+ # Use double-quick sampling to determine trace data density
+ times_cmd = ["perf", "script", "--ns", "--input", file_name, "--itrace=qqi"]
+ if cpu_str != None and cpu_str != "":
+ times_cmd.append(f"--cpu={cpu_str}")
+ if time_str != None and time_str != "":
+ times_cmd.append(f"--time={time_str}")
+ cnts_cmd = list(times_cmd)
+ cnts_cmd.append("-Fcpu")
+ times_cmd.append("-Fcpu,time")
+ return cnts_cmd, times_cmd
+
+class CPUTimeRange():
+ def __init__(self, cpu):
+ self.cpu = cpu
+ self.sample_cnt = 0
+ self.time_ranges = None
+ self.interval = 0
+ self.interval_remaining = 0
+ self.remaining = 0
+ self.tr_pos = 0
+
+def CalcTimeRangesByCPU(line, cpu, cpu_time_ranges, max_time):
+ cpu_time_range = cpu_time_ranges[cpu]
+ cpu_time_range.remaining -= 1
+ cpu_time_range.interval_remaining -= 1
+ if cpu_time_range.remaining == 0:
+ cpu_time_range.time_ranges[cpu_time_range.tr_pos][1] = max_time
+ return
+ if cpu_time_range.interval_remaining == 0:
+ time = TimeVal(line[1][:-1], 0)
+ time_ranges = cpu_time_range.time_ranges
+ time_ranges[cpu_time_range.tr_pos][1] = time - 1
+ time_ranges.append([time, max_time])
+ cpu_time_range.tr_pos += 1
+ cpu_time_range.interval_remaining = cpu_time_range.interval
+
+def CountSamplesByCPU(line, cpu, cpu_time_ranges):
+ try:
+ cpu_time_ranges[cpu].sample_cnt += 1
+ except:
+ print("exception")
+ print("cpu", cpu)
+ print("len(cpu_time_ranges)", len(cpu_time_ranges))
+ raise
+
+def ProcessCommandOutputLines(cmd, per_cpu, fn, *x):
+ # Assume CPU number is at beginning of line and enclosed by []
+ pat = re.compile(r"\s*\[[0-9]+\]")
+ p = subprocess.Popen(cmd, stdout=subprocess.PIPE)
+ while True:
+ if line := p.stdout.readline():
+ line = line.decode("utf-8")
+ if pat.match(line):
+ line = line.split()
+ if per_cpu:
+ # Assumes CPU number is enclosed by []
+ cpu = int(line[0][1:-1])
+ else:
+ cpu = 0
+ fn(line, cpu, *x)
+ else:
+ break
+ p.wait()
+
+def IntersectTimeRanges(new_time_ranges, time_ranges):
+ pos = 0
+ new_pos = 0
+ # Can assume len(time_ranges) != 0 and len(new_time_ranges) != 0
+ # Note also, there *must* be at least one intersection.
+ while pos < len(time_ranges) and new_pos < len(new_time_ranges):
+ # new end < old start => no intersection, remove new
+ if new_time_ranges[new_pos][1] < time_ranges[pos][0]:
+ del new_time_ranges[new_pos]
+ continue
+ # new start > old end => no intersection, check next
+ if new_time_ranges[new_pos][0] > time_ranges[pos][1]:
+ pos += 1
+ if pos < len(time_ranges):
+ continue
+ # no next, so remove remaining
+ while new_pos < len(new_time_ranges):
+ del new_time_ranges[new_pos]
+ return
+ # Found an intersection
+ # new start < old start => adjust new start = old start
+ if new_time_ranges[new_pos][0] < time_ranges[pos][0]:
+ new_time_ranges[new_pos][0] = time_ranges[pos][0]
+ # new end > old end => keep the overlap, insert the remainder
+ if new_time_ranges[new_pos][1] > time_ranges[pos][1]:
+ r = [ time_ranges[pos][1] + 1, new_time_ranges[new_pos][1] ]
+ new_time_ranges[new_pos][1] = time_ranges[pos][1]
+ new_pos += 1
+ new_time_ranges.insert(new_pos, r)
+ continue
+ # new [start, end] is within old [start, end]
+ new_pos += 1
+
+def SplitTimeRangesByTraceDataDensity(time_ranges, cpus, nr, cmd, file_name, per_cpu, min_size, min_interval, verbosity):
+ if verbosity.normal:
+ print("\rAnalyzing...", flush=True, end=" ")
+ if verbosity.verbose:
+ print()
+ cnts_cmd, times_cmd = PerfDoubleQuickCommands(cmd, file_name)
+
+ nr_cpus = cpus[-1] + 1 if per_cpu else 1
+ if per_cpu:
+ nr_cpus = cpus[-1] + 1
+ cpu_time_ranges = [ CPUTimeRange(cpu) for cpu in range(nr_cpus) ]
+ else:
+ nr_cpus = 1
+ cpu_time_ranges = [ CPUTimeRange(-1) ]
+
+ if verbosity.debug:
+ print("nr_cpus", nr_cpus)
+ print("cnts_cmd", cnts_cmd)
+ print("times_cmd", times_cmd)
+
+ # Count the number of "double quick" samples per CPU
+ ProcessCommandOutputLines(cnts_cmd, per_cpu, CountSamplesByCPU, cpu_time_ranges)
+
+ tot = 0
+ mx = 0
+ for cpu_time_range in cpu_time_ranges:
+ cnt = cpu_time_range.sample_cnt
+ tot += cnt
+ if cnt > mx:
+ mx = cnt
+ if verbosity.debug:
+ print("cpu:", cpu_time_range.cpu, "sample_cnt", cnt)
+
+ if min_size < 1:
+ min_size = 1
+
+ if mx < min_size:
+ # Too little data to be worth splitting
+ if verbosity.debug:
+ print("Too little data to split by time")
+ if nr == 0:
+ nr = 1
+ return [ SplitTimeRangesIntoN(time_ranges, nr, min_interval) ]
+
+ if nr:
+ divisor = nr
+ min_size = 1
+ else:
+ divisor = NumberOfCPUs()
+
+ interval = int(round(tot / divisor, 0))
+ if interval < min_size:
+ interval = min_size
+
+ if verbosity.debug:
+ print("divisor", divisor)
+ print("min_size", min_size)
+ print("interval", interval)
+
+ min_time = time_ranges[0][0]
+ max_time = time_ranges[-1][1]
+
+ for cpu_time_range in cpu_time_ranges:
+ cnt = cpu_time_range.sample_cnt
+ if cnt == 0:
+ cpu_time_range.time_ranges = copy.deepcopy(time_ranges)
+ continue
+ # Adjust target interval for CPU to give approximately equal interval sizes
+ # Determine number of intervals, rounding to nearest integer
+ n = int(round(cnt / interval, 0))
+ if n < 1:
+ n = 1
+ # Determine interval size, rounding up
+ d, m = divmod(cnt, n)
+ if m:
+ d += 1
+ cpu_time_range.interval = d
+ cpu_time_range.interval_remaining = d
+ cpu_time_range.remaining = cnt
+ # Init. time ranges for each CPU with the start time
+ cpu_time_range.time_ranges = [ [min_time, max_time] ]
+
+ # Set time ranges so that the same number of "double quick" samples
+ # will fall into each time range.
+ ProcessCommandOutputLines(times_cmd, per_cpu, CalcTimeRangesByCPU, cpu_time_ranges, max_time)
+
+ for cpu_time_range in cpu_time_ranges:
+ if cpu_time_range.sample_cnt:
+ IntersectTimeRanges(cpu_time_range.time_ranges, time_ranges)
+
+ return [cpu_time_ranges[cpu].time_ranges for cpu in cpus]
+
+def SplitSingleTimeRangeIntoN(time_range, n):
+ if n <= 1:
+ return [time_range]
+ start = time_range[0]
+ end = time_range[1]
+ duration = int((end - start + 1) / n)
+ if duration < 1:
+ return [time_range]
+ time_ranges = []
+ for i in range(n):
+ time_ranges.append([start, start + duration - 1])
+ start += duration
+ time_ranges[-1][1] = end
+ return time_ranges
+
+def TimeRangeDuration(r):
+ return r[1] - r[0] + 1
+
+def TotalDuration(time_ranges):
+ duration = 0
+ for r in time_ranges:
+ duration += TimeRangeDuration(r)
+ return duration
+
+def SplitTimeRangesByInterval(time_ranges, interval):
+ new_ranges = []
+ for r in time_ranges:
+ duration = TimeRangeDuration(r)
+ n = duration / interval
+ n = int(round(n, 0))
+ new_ranges += SplitSingleTimeRangeIntoN(r, n)
+ return new_ranges
+
+def SplitTimeRangesIntoN(time_ranges, n, min_interval):
+ if n <= len(time_ranges):
+ return time_ranges
+ duration = TotalDuration(time_ranges)
+ interval = duration / n
+ if interval < min_interval:
+ interval = min_interval
+ return SplitTimeRangesByInterval(time_ranges, interval)
+
+def RecombineTimeRanges(tr):
+ new_tr = copy.deepcopy(tr)
+ n = len(new_tr)
+ i = 1
+ while i < len(new_tr):
+ # if prev end + 1 == cur start, combine them
+ if new_tr[i - 1][1] + 1 == new_tr[i][0]:
+ new_tr[i][0] = new_tr[i - 1][0]
+ del new_tr[i - 1]
+ else:
+ i += 1
+ return new_tr
+
+def OpenTimeRangeEnds(time_ranges, min_time, max_time):
+ if time_ranges[0][0] <= min_time:
+ time_ranges[0][0] = None
+ if time_ranges[-1][1] >= max_time:
+ time_ranges[-1][1] = None
+
+def BadTimeStr(time_str):
+ raise Exception(f"perf command bad time option: '{time_str}'\nCheck also 'time of first sample' and 'time of last sample' in perf script --header-only")
+
+def ValidateTimeRanges(time_ranges, time_str):
+ n = len(time_ranges)
+ for i in range(n):
+ start = time_ranges[i][0]
+ end = time_ranges[i][1]
+ if i != 0 and start <= time_ranges[i - 1][1]:
+ BadTimeStr(time_str)
+ if start > end:
+ BadTimeStr(time_str)
+
+def TimeVal(s, dflt):
+ s = s.strip()
+ if s == "":
+ return dflt
+ a = s.split(".")
+ if len(a) > 2:
+ raise Exception(f"Bad time value'{s}'")
+ x = int(a[0])
+ if x < 0:
+ raise Exception("Negative time not allowed")
+ x *= 1000000000
+ if len(a) > 1:
+ x += int((a[1] + "000000000")[:9])
+ return x
+
+def BadCPUStr(cpu_str):
+ raise Exception(f"perf command bad cpu option: '{cpu_str}'\nCheck also 'nrcpus avail' in perf script --header-only")
+
+def ParseTimeStr(time_str, min_time, max_time):
+ if time_str == None or time_str == "":
+ return [[min_time, max_time]]
+ time_ranges = []
+ for r in time_str.split():
+ a = r.split(",")
+ if len(a) != 2:
+ BadTimeStr(time_str)
+ try:
+ start = TimeVal(a[0], min_time)
+ end = TimeVal(a[1], max_time)
+ except:
+ BadTimeStr(time_str)
+ time_ranges.append([start, end])
+ ValidateTimeRanges(time_ranges, time_str)
+ return time_ranges
+
+def ParseCPUStr(cpu_str, nr_cpus):
+ if cpu_str == None or cpu_str == "":
+ return [-1]
+ cpus = []
+ for r in cpu_str.split(","):
+ a = r.split("-")
+ if len(a) < 1 or len(a) > 2:
+ BadCPUStr(cpu_str)
+ try:
+ start = int(a[0].strip())
+ if len(a) > 1:
+ end = int(a[1].strip())
+ else:
+ end = start
+ except:
+ BadCPUStr(cpu_str)
+ if start < 0 or end < 0 or end < start or end >= nr_cpus:
+ BadCPUStr(cpu_str)
+ cpus.extend(range(start, end + 1))
+ cpus = list(set(cpus)) # Remove duplicates
+ cpus.sort()
+ return cpus
+
+class ParallelPerf():
+
+ def __init__(self, a):
+ for arg_name in vars(a):
+ setattr(self, arg_name, getattr(a, arg_name))
+ self.orig_nr = self.nr
+ self.orig_cmd = list(self.cmd)
+ self.perf = self.cmd[0]
+ if os.path.exists(self.output_dir):
+ raise Exception(f"Output '{self.output_dir}' already exists")
+ if self.jobs < 0 or self.nr < 0 or self.interval < 0:
+ raise Exception("Bad options (negative values): try -h option for help")
+ if self.nr != 0 and self.interval != 0:
+ raise Exception("Cannot specify number of time subdivisions and time interval")
+ if self.jobs == 0:
+ self.jobs = NumberOfCPUs()
+ if self.nr == 0 and self.interval == 0:
+ if self.per_cpu:
+ self.nr = 1
+ else:
+ self.nr = self.jobs
+
+ def Init(self):
+ if self.verbosity.debug:
+ print("cmd", self.cmd)
+ self.file_name = DetermineInputFileName(self.cmd)
+ self.hdr = ReadHeader(self.perf, self.file_name)
+ self.hdr_dict = ParseHeader(self.hdr)
+ self.cmd_line = HeaderField(self.hdr_dict, "cmdline")
+
+ def ExtractTimeInfo(self):
+ self.min_time = TimeVal(HeaderField(self.hdr_dict, "time of first sample"), 0)
+ self.max_time = TimeVal(HeaderField(self.hdr_dict, "time of last sample"), 0)
+ self.time_str = ExtractPerfOption(self.cmd, "", "time")
+ self.time_ranges = ParseTimeStr(self.time_str, self.min_time, self.max_time)
+ if self.verbosity.debug:
+ print("time_ranges", self.time_ranges)
+
+ def ExtractCPUInfo(self):
+ if self.per_cpu:
+ nr_cpus = int(HeaderField(self.hdr_dict, "nrcpus avail"))
+ self.cpu_str = ExtractPerfOption(self.cmd, "C", "cpu")
+ if self.cpu_str == None or self.cpu_str == "":
+ self.cpus = [ x for x in range(nr_cpus) ]
+ else:
+ self.cpus = ParseCPUStr(self.cpu_str, nr_cpus)
+ else:
+ self.cpu_str = None
+ self.cpus = [-1]
+ if self.verbosity.debug:
+ print("cpus", self.cpus)
+
+ def IsIntelPT(self):
+ return self.cmd_line.find("intel_pt") >= 0
+
+ def SplitTimeRanges(self):
+ if self.IsIntelPT() and self.interval == 0:
+ self.split_time_ranges_for_each_cpu = \
+ SplitTimeRangesByTraceDataDensity(self.time_ranges, self.cpus, self.orig_nr,
+ self.orig_cmd, self.file_name, self.per_cpu,
+ self.min_size, self.min_interval, self.verbosity)
+ elif self.nr:
+ self.split_time_ranges_for_each_cpu = [ SplitTimeRangesIntoN(self.time_ranges, self.nr, self.min_interval) ]
+ else:
+ self.split_time_ranges_for_each_cpu = [ SplitTimeRangesByInterval(self.time_ranges, self.interval) ]
+
+ def CheckTimeRanges(self):
+ for tr in self.split_time_ranges_for_each_cpu:
+ # Re-combined time ranges should be the same
+ new_tr = RecombineTimeRanges(tr)
+ if new_tr != self.time_ranges:
+ if self.verbosity.debug:
+ print("tr", tr)
+ print("new_tr", new_tr)
+ raise Exception("Self test failed!")
+
+ def OpenTimeRangeEnds(self):
+ for time_ranges in self.split_time_ranges_for_each_cpu:
+ OpenTimeRangeEnds(time_ranges, self.min_time, self.max_time)
+
+ def CreateWorkList(self):
+ self.worklist = CreateWorkList(self.cmd, self.pipe_to, self.output_dir, self.cpus, self.split_time_ranges_for_each_cpu)
+
+ def PerfDataRecordedPerCPU(self):
+ if "--per-thread" in self.cmd_line.split():
+ return False
+ return True
+
+ def DefaultToPerCPU(self):
+ # --no-per-cpu option takes precedence
+ if self.no_per_cpu:
+ return False
+ if not self.PerfDataRecordedPerCPU():
+ return False
+ # Default to per-cpu for Intel PT data that was recorded per-cpu,
+ # because decoding can be done for each CPU separately.
+ if self.IsIntelPT():
+ return True
+ return False
+
+ def Config(self):
+ self.Init()
+ self.ExtractTimeInfo()
+ if not self.per_cpu:
+ self.per_cpu = self.DefaultToPerCPU()
+ if self.verbosity.debug:
+ print("per_cpu", self.per_cpu)
+ self.ExtractCPUInfo()
+ self.SplitTimeRanges()
+ if self.verbosity.self_test:
+ self.CheckTimeRanges()
+ # Prefer open-ended time range to starting / ending with min_time / max_time resp.
+ self.OpenTimeRangeEnds()
+ self.CreateWorkList()
+
+ def Run(self):
+ if self.dry_run:
+ print(len(self.worklist),"jobs:")
+ for w in self.worklist:
+ print(w.Command())
+ return True
+ result = RunWork(self.worklist, self.jobs, verbosity=self.verbosity)
+ if self.verbosity.verbose:
+ print(glb_prog_name, "done")
+ return result
+
+def RunParallelPerf(a):
+ pp = ParallelPerf(a)
+ pp.Config()
+ return pp.Run()
+
+def Main(args):
+ ap = argparse.ArgumentParser(
+ prog=glb_prog_name, formatter_class = argparse.RawDescriptionHelpFormatter,
+ description =
+"""
+Run a perf script command multiple times in parallel, using perf script options
+--cpu and --time so that each job processes a different chunk of the data.
+""",
+ epilog =
+"""
+Follow the options by '--' and then the perf script command e.g.
+
+ $ perf record -a -- sleep 10
+ $ parallel-perf.py --nr=4 -- perf script --ns
+ All jobs finished successfully
+ $ tree parallel-perf-output/
+ parallel-perf-output/
+ ├── time-range-0
+ │   ├── cmd.txt
+ │   └── out.txt
+ ├── time-range-1
+ │   ├── cmd.txt
+ │   └── out.txt
+ ├── time-range-2
+ │   ├── cmd.txt
+ │   └── out.txt
+ └── time-range-3
+ ├── cmd.txt
+ └── out.txt
+ $ find parallel-perf-output -name cmd.txt | sort | xargs grep -H .
+ parallel-perf-output/time-range-0/cmd.txt:perf script --time=,9466.504461499 --ns
+ parallel-perf-output/time-range-1/cmd.txt:perf script --time=9466.504461500,9469.005396999 --ns
+ parallel-perf-output/time-range-2/cmd.txt:perf script --time=9469.005397000,9471.506332499 --ns
+ parallel-perf-output/time-range-3/cmd.txt:perf script --time=9471.506332500, --ns
+
+Any perf script command can be used, including the use of perf script options
+--dlfilter and --script, so that the benefit of running parallel jobs
+naturally extends to them also.
+
+If option --pipe-to is used, standard output is first piped through that
+command. Beware, if the command fails (e.g. grep with no matches), it will be
+considered a fatal error.
+
+Final standard output is redirected to files named out.txt in separate
+subdirectories under the output directory. Similarly, standard error is
+written to files named err.txt. In addition, files named cmd.txt contain the
+corresponding perf script command. After processing, err.txt files are removed
+if they are empty.
+
+If any job exits with a non-zero exit code, then all jobs are killed and no
+more are started. A message is printed if any job results in a non-empty
+err.txt file.
+
+There is a separate output subdirectory for each time range. If the --per-cpu
+option is used, these are further grouped under cpu-n subdirectories, e.g.
+
+ $ parallel-perf.py --per-cpu --nr=2 -- perf script --ns --cpu=0,1
+ All jobs finished successfully
+ $ tree parallel-perf-output
+ parallel-perf-output/
+ ├── cpu-0
+ │   ├── time-range-0
+ │   │   ├── cmd.txt
+ │   │   └── out.txt
+ │   └── time-range-1
+ │   ├── cmd.txt
+ │   └── out.txt
+ └── cpu-1
+ ├── time-range-0
+ │   ├── cmd.txt
+ │   └── out.txt
+ └── time-range-1
+ ├── cmd.txt
+ └── out.txt
+ $ find parallel-perf-output -name cmd.txt | sort | xargs grep -H .
+ parallel-perf-output/cpu-0/time-range-0/cmd.txt:perf script --cpu=0 --time=,9469.005396999 --ns
+ parallel-perf-output/cpu-0/time-range-1/cmd.txt:perf script --cpu=0 --time=9469.005397000, --ns
+ parallel-perf-output/cpu-1/time-range-0/cmd.txt:perf script --cpu=1 --time=,9469.005396999 --ns
+ parallel-perf-output/cpu-1/time-range-1/cmd.txt:perf script --cpu=1 --time=9469.005397000, --ns
+
+Subdivisions of time range, and cpus if the --per-cpu option is used, are
+expressed by the --time and --cpu perf script options respectively. If the
+supplied perf script command has a --time option, then that time range is
+subdivided, otherwise the time range given by 'time of first sample' to
+'time of last sample' is used (refer perf script --header-only). Similarly, the
+supplied perf script command may provide a --cpu option, and only those CPUs
+will be processed.
+
+To prevent time intervals becoming too small, the --min-interval option can
+be used.
+
+Note there is special handling for processing Intel PT traces. If an interval is
+not specified and the perf record command contained the intel_pt event, then the
+time range will be subdivided in order to produce subdivisions that contain
+approximately the same amount of trace data. That is accomplished by counting
+double-quick (--itrace=qqi) samples, and choosing time ranges that encompass
+approximately the same number of samples. In that case, time ranges may not be
+the same for each CPU processed. For Intel PT, --per-cpu is the default, but
+that can be overridden by --no-per-cpu. Note, for Intel PT, double-quick
+decoding produces 1 sample for each PSB synchronization packet, which in turn
+come after a certain number of bytes output, determined by psb_period (refer
+perf Intel PT documentation). The minimum number of double-quick samples that
+will define a time range can be set by the --min_size option, which defaults to
+64.
+""")
+ ap.add_argument("-o", "--output-dir", default="parallel-perf-output", help="output directory (default 'parallel-perf-output')")
+ ap.add_argument("-j", "--jobs", type=int, default=0, help="maximum number of jobs to run in parallel at one time (default is the number of CPUs)")
+ ap.add_argument("-n", "--nr", type=int, default=0, help="number of time subdivisions (default is the number of jobs)")
+ ap.add_argument("-i", "--interval", type=float, default=0, help="subdivide the time range using this time interval (in seconds e.g. 0.1 for a tenth of a second)")
+ ap.add_argument("-c", "--per-cpu", action="store_true", help="process data for each CPU in parallel")
+ ap.add_argument("-m", "--min-interval", type=float, default=glb_min_interval, help=f"minimum interval (default {glb_min_interval} seconds)")
+ ap.add_argument("-p", "--pipe-to", help="command to pipe output to (optional)")
+ ap.add_argument("-N", "--no-per-cpu", action="store_true", help="do not process data for each CPU in parallel")
+ ap.add_argument("-b", "--min_size", type=int, default=glb_min_samples, help="minimum data size (for Intel PT in PSBs)")
+ ap.add_argument("-D", "--dry-run", action="store_true", help="do not run any jobs, just show the perf script commands")
+ ap.add_argument("-q", "--quiet", action="store_true", help="do not print any messages except errors")
+ ap.add_argument("-v", "--verbose", action="store_true", help="print more messages")
+ ap.add_argument("-d", "--debug", action="store_true", help="print debugging messages")
+ cmd_line = list(args)
+ try:
+ split_pos = cmd_line.index("--")
+ cmd = cmd_line[split_pos + 1:]
+ args = cmd_line[:split_pos]
+ except:
+ cmd = None
+ args = cmd_line
+ a = ap.parse_args(args=args[1:])
+ a.cmd = cmd
+ a.verbosity = Verbosity(a.quiet, a.verbose, a.debug)
+ try:
+ if a.cmd == None:
+ if len(args) <= 1:
+ ap.print_help()
+ return True
+ raise Exception("Command line must contain '--' before perf command")
+ return RunParallelPerf(a)
+ except Exception as e:
+ print("Fatal error: ", str(e))
+ if a.debug:
+ raise
+ return False
+
+if __name__ == "__main__":
+ if not Main(sys.argv):
+ sys.exit(1)
diff --git a/tools/perf/tests/bitmap.c b/tools/perf/tests/bitmap.c
index 0173f5402a35b9..98956e0e076559 100644
--- a/tools/perf/tests/bitmap.c
+++ b/tools/perf/tests/bitmap.c
@@ -11,18 +11,19 @@
static unsigned long *get_bitmap(const char *str, int nbits)
{
struct perf_cpu_map *map = perf_cpu_map__new(str);
- unsigned long *bm = NULL;
- int i;
+ unsigned long *bm;
bm = bitmap_zalloc(nbits);
if (map && bm) {
- for (i = 0; i < perf_cpu_map__nr(map); i++)
- __set_bit(perf_cpu_map__cpu(map, i).cpu, bm);
+ int i;
+ struct perf_cpu cpu;
+
+ perf_cpu_map__for_each_cpu(cpu, i, map)
+ __set_bit(cpu.cpu, bm);
}
- if (map)
- perf_cpu_map__put(map);
+ perf_cpu_map__put(map);
return bm;
}
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index d13ee7683d9d83..c3d84b67ca8e77 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -39,7 +39,10 @@
* making them easier to debug.
*/
static bool dont_fork;
-/* Fork the tests in parallel and then wait for their completion. */
+/* Don't fork the tests in parallel and wait for their completion. */
+static bool sequential = true;
+/* Do it in parallel, lacks infrastructure to avoid running tests that clash for resources,
+ * So leave it as the developers choice to enable while working on the needed infra */
static bool parallel;
const char *dso_to_test;
const char *test_objdump_path = "objdump";
@@ -274,11 +277,8 @@ static int finish_test(struct child_test *child_test, int width)
struct test_suite *t = child_test->test;
int i = child_test->test_num;
int subi = child_test->subtest;
- int out = child_test->process.out;
int err = child_test->process.err;
- bool out_done = out <= 0;
bool err_done = err <= 0;
- struct strbuf out_output = STRBUF_INIT;
struct strbuf err_output = STRBUF_INIT;
int ret;
@@ -290,11 +290,9 @@ static int finish_test(struct child_test *child_test, int width)
pr_info("%3d: %-*s:\n", i + 1, width, test_description(t, -1));
/*
- * Busy loop reading from the child's stdout and stderr that are set to
- * be non-blocking until EOF.
+ * Busy loop reading from the child's stdout/stderr that are set to be
+ * non-blocking until EOF.
*/
- if (!out_done)
- fcntl(out, F_SETFL, O_NONBLOCK);
if (!err_done)
fcntl(err, F_SETFL, O_NONBLOCK);
if (verbose > 1) {
@@ -303,11 +301,8 @@ static int finish_test(struct child_test *child_test, int width)
else
pr_info("%3d: %s:\n", i + 1, test_description(t, -1));
}
- while (!out_done || !err_done) {
- struct pollfd pfds[2] = {
- { .fd = out,
- .events = POLLIN | POLLERR | POLLHUP | POLLNVAL,
- },
+ while (!err_done) {
+ struct pollfd pfds[1] = {
{ .fd = err,
.events = POLLIN | POLLERR | POLLHUP | POLLNVAL,
},
@@ -315,23 +310,9 @@ static int finish_test(struct child_test *child_test, int width)
char buf[512];
ssize_t len;
- /* Poll to avoid excessive spinning, timeout set for 1000ms. */
- poll(pfds, ARRAY_SIZE(pfds), /*timeout=*/1000);
- if (!out_done && pfds[0].revents) {
- errno = 0;
- len = read(out, buf, sizeof(buf) - 1);
-
- if (len <= 0) {
- out_done = errno != EAGAIN;
- } else {
- buf[len] = '\0';
- if (verbose > 1)
- fprintf(stdout, "%s", buf);
- else
- strbuf_addstr(&out_output, buf);
- }
- }
- if (!err_done && pfds[1].revents) {
+ /* Poll to avoid excessive spinning, timeout set for 100ms. */
+ poll(pfds, ARRAY_SIZE(pfds), /*timeout=*/100);
+ if (!err_done && pfds[0].revents) {
errno = 0;
len = read(err, buf, sizeof(buf) - 1);
@@ -354,14 +335,10 @@ static int finish_test(struct child_test *child_test, int width)
pr_info("%3d.%1d: %s:\n", i + 1, subi + 1, test_description(t, subi));
else
pr_info("%3d: %s:\n", i + 1, test_description(t, -1));
- fprintf(stdout, "%s", out_output.buf);
fprintf(stderr, "%s", err_output.buf);
}
- strbuf_release(&out_output);
strbuf_release(&err_output);
print_test_result(t, i, subi, ret, width);
- if (out > 0)
- close(out);
if (err > 0)
close(err);
return 0;
@@ -394,12 +371,13 @@ static int start_test(struct test_suite *test, int i, int subi, struct child_tes
(*child)->process.no_stdout = 1;
(*child)->process.no_stderr = 1;
} else {
+ (*child)->process.stdout_to_stderr = 1;
(*child)->process.out = -1;
(*child)->process.err = -1;
}
(*child)->process.no_exec_cmd = run_test_child;
err = start_command(&(*child)->process);
- if (err || parallel)
+ if (err || !sequential)
return err;
return finish_test(*child, width);
}
@@ -465,7 +443,7 @@ static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist)
int err = start_test(t, curr, -1, &child_tests[child_test_num++], width);
if (err) {
- /* TODO: if parallel waitpid the already forked children. */
+ /* TODO: if !sequential waitpid the already forked children. */
free(child_tests);
return err;
}
@@ -485,7 +463,7 @@ static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist)
}
}
for (i = 0; i < child_test_num; i++) {
- if (parallel) {
+ if (!sequential) {
int ret = finish_test(child_tests[i], width);
if (ret)
@@ -561,8 +539,9 @@ int cmd_test(int argc, const char **argv)
"be more verbose (show symbol address, etc)"),
OPT_BOOLEAN('F', "dont-fork", &dont_fork,
"Do not fork for testcase"),
- OPT_BOOLEAN('p', "parallel", &parallel,
- "Run the tests altogether in parallel"),
+ OPT_BOOLEAN('p', "parallel", &parallel, "Run the tests in parallel"),
+ OPT_BOOLEAN('S', "sequential", &sequential,
+ "Run the tests one after another rather than in parallel"),
OPT_STRING('w', "workload", &workload, "work", "workload to run for testing"),
OPT_STRING(0, "dso", &dso_to_test, "dso", "dso to test"),
OPT_STRING(0, "objdump", &test_objdump_path, "path",
@@ -589,6 +568,11 @@ int cmd_test(int argc, const char **argv)
if (workload)
return run_workload(workload, argc, argv);
+ if (dont_fork)
+ sequential = true;
+ else if (parallel)
+ sequential = false;
+
symbol_conf.priv_size = sizeof(int);
symbol_conf.try_vmlinux_path = true;
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index 7a3a7bbbec7146..29d2f3ee4e10fb 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -637,11 +637,11 @@ static int do_test_code_reading(bool try_kcore)
evlist__config(evlist, &opts, NULL);
- evsel = evlist__first(evlist);
-
- evsel->core.attr.comm = 1;
- evsel->core.attr.disabled = 1;
- evsel->core.attr.enable_on_exec = 0;
+ evlist__for_each_entry(evlist, evsel) {
+ evsel->core.attr.comm = 1;
+ evsel->core.attr.disabled = 1;
+ evsel->core.attr.enable_on_exec = 0;
+ }
ret = evlist__open(evlist);
if (ret < 0) {
diff --git a/tools/perf/tests/config-fragments/config b/tools/perf/tests/config-fragments/config
index c340b3195fcaa3..4fca12851016d1 100644
--- a/tools/perf/tests/config-fragments/config
+++ b/tools/perf/tests/config-fragments/config
@@ -9,3 +9,6 @@ CONFIG_GENERIC_TRACER=y
CONFIG_FTRACE=y
CONFIG_FTRACE_SYSCALLS=y
CONFIG_BRANCH_PROFILE_NONE=y
+CONFIG_KPROBES=y
+CONFIG_KPROBE_EVENTS=y
+CONFIG_UPROBE_EVENTS=y
diff --git a/tools/perf/tests/evsel-roundtrip-name.c b/tools/perf/tests/evsel-roundtrip-name.c
index 15ff86f9da0b1d..1922cac13a2453 100644
--- a/tools/perf/tests/evsel-roundtrip-name.c
+++ b/tools/perf/tests/evsel-roundtrip-name.c
@@ -37,7 +37,7 @@ static int perf_evsel__roundtrip_cache_name_test(void)
continue;
}
evlist__for_each_entry(evlist, evsel) {
- if (strcmp(evsel__name(evsel), name)) {
+ if (!evsel__name_is(evsel, name)) {
pr_debug("%s != %s\n", evsel__name(evsel), name);
ret = TEST_FAIL;
}
@@ -71,7 +71,7 @@ static int perf_evsel__name_array_test(const char *const names[], int nr_names)
continue;
}
evlist__for_each_entry(evlist, evsel) {
- if (strcmp(evsel__name(evsel), names[i])) {
+ if (!evsel__name_is(evsel, names[i])) {
pr_debug("%s != %s\n", evsel__name(evsel), names[i]);
ret = TEST_FAIL;
}
diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index feb5727584d141..993e482f094c2d 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -470,8 +470,7 @@ static int test__checkevent_breakpoint_modifier(struct evlist *evlist)
TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong name",
- !strcmp(evsel__name(evsel), "mem:0:u"));
+ TEST_ASSERT_VAL("wrong name", evsel__name_is(evsel, "mem:0:u"));
return test__checkevent_breakpoint(evlist);
}
@@ -484,8 +483,7 @@ static int test__checkevent_breakpoint_x_modifier(struct evlist *evlist)
TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong name",
- !strcmp(evsel__name(evsel), "mem:0:x:k"));
+ TEST_ASSERT_VAL("wrong name", evsel__name_is(evsel, "mem:0:x:k"));
return test__checkevent_breakpoint_x(evlist);
}
@@ -498,8 +496,7 @@ static int test__checkevent_breakpoint_r_modifier(struct evlist *evlist)
TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv);
TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong name",
- !strcmp(evsel__name(evsel), "mem:0:r:hp"));
+ TEST_ASSERT_VAL("wrong name", evsel__name_is(evsel, "mem:0:r:hp"));
return test__checkevent_breakpoint_r(evlist);
}
@@ -512,8 +509,7 @@ static int test__checkevent_breakpoint_w_modifier(struct evlist *evlist)
TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong name",
- !strcmp(evsel__name(evsel), "mem:0:w:up"));
+ TEST_ASSERT_VAL("wrong name", evsel__name_is(evsel, "mem:0:w:up"));
return test__checkevent_breakpoint_w(evlist);
}
@@ -526,8 +522,7 @@ static int test__checkevent_breakpoint_rw_modifier(struct evlist *evlist)
TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong name",
- !strcmp(evsel__name(evsel), "mem:0:rw:kp"));
+ TEST_ASSERT_VAL("wrong name", evsel__name_is(evsel, "mem:0:rw:kp"));
return test__checkevent_breakpoint_rw(evlist);
}
@@ -540,8 +535,7 @@ static int test__checkevent_breakpoint_modifier_name(struct evlist *evlist)
TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong name",
- !strcmp(evsel__name(evsel), "breakpoint"));
+ TEST_ASSERT_VAL("wrong name", evsel__name_is(evsel, "breakpoint"));
return test__checkevent_breakpoint(evlist);
}
@@ -554,8 +548,7 @@ static int test__checkevent_breakpoint_x_modifier_name(struct evlist *evlist)
TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong name",
- !strcmp(evsel__name(evsel), "breakpoint"));
+ TEST_ASSERT_VAL("wrong name", evsel__name_is(evsel, "breakpoint"));
return test__checkevent_breakpoint_x(evlist);
}
@@ -568,8 +561,7 @@ static int test__checkevent_breakpoint_r_modifier_name(struct evlist *evlist)
TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv);
TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong name",
- !strcmp(evsel__name(evsel), "breakpoint"));
+ TEST_ASSERT_VAL("wrong name", evsel__name_is(evsel, "breakpoint"));
return test__checkevent_breakpoint_r(evlist);
}
@@ -582,8 +574,7 @@ static int test__checkevent_breakpoint_w_modifier_name(struct evlist *evlist)
TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong name",
- !strcmp(evsel__name(evsel), "breakpoint"));
+ TEST_ASSERT_VAL("wrong name", evsel__name_is(evsel, "breakpoint"));
return test__checkevent_breakpoint_w(evlist);
}
@@ -596,8 +587,7 @@ static int test__checkevent_breakpoint_rw_modifier_name(struct evlist *evlist)
TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong name",
- !strcmp(evsel__name(evsel), "breakpoint"));
+ TEST_ASSERT_VAL("wrong name", evsel__name_is(evsel, "breakpoint"));
return test__checkevent_breakpoint_rw(evlist);
}
@@ -609,12 +599,12 @@ static int test__checkevent_breakpoint_2_events(struct evlist *evlist)
TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries);
TEST_ASSERT_VAL("wrong type", PERF_TYPE_BREAKPOINT == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong name", !strcmp(evsel__name(evsel), "breakpoint1"));
+ TEST_ASSERT_VAL("wrong name", evsel__name_is(evsel, "breakpoint1"));
evsel = evsel__next(evsel);
TEST_ASSERT_VAL("wrong type", PERF_TYPE_BREAKPOINT == evsel->core.attr.type);
- TEST_ASSERT_VAL("wrong name", !strcmp(evsel__name(evsel), "breakpoint2"));
+ TEST_ASSERT_VAL("wrong name", evsel__name_is(evsel, "breakpoint2"));
return TEST_OK;
}
@@ -691,15 +681,14 @@ static int test__checkevent_pmu_name(struct evlist *evlist)
TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries);
TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type);
TEST_ASSERT_VAL("wrong config", test_config(evsel, 1));
- TEST_ASSERT_VAL("wrong name", !strcmp(evsel__name(evsel), "krava"));
+ TEST_ASSERT_VAL("wrong name", evsel__name_is(evsel, "krava"));
/* cpu/config=2/u" */
evsel = evsel__next(evsel);
TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries);
TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type);
TEST_ASSERT_VAL("wrong config", test_config(evsel, 2));
- TEST_ASSERT_VAL("wrong name",
- !strcmp(evsel__name(evsel), "cpu/config=2/u"));
+ TEST_ASSERT_VAL("wrong name", evsel__name_is(evsel, "cpu/config=2/u"));
return TEST_OK;
}
@@ -953,8 +942,8 @@ static int test__group2(struct evlist *evlist)
continue;
}
if (evsel->core.attr.type == PERF_TYPE_HARDWARE &&
- test_config(evsel, PERF_COUNT_HW_CACHE_REFERENCES)) {
- /* cache-references + :u modifier */
+ test_config(evsel, PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) {
+ /* branches + :u modifier */
TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
@@ -2043,7 +2032,7 @@ static const struct evlist_test test__events[] = {
/* 8 */
},
{
- .name = "{faults:k,cache-references}:u,cycles:k",
+ .name = "{faults:k,branches}:u,cycles:k",
.check = test__group2,
/* 9 */
},
diff --git a/tools/perf/tests/shell/annotate.sh b/tools/perf/tests/shell/annotate.sh
new file mode 100755
index 00000000000000..1db1e8113d9943
--- /dev/null
+++ b/tools/perf/tests/shell/annotate.sh
@@ -0,0 +1,83 @@
+#!/bin/sh
+# perf annotate basic tests
+# SPDX-License-Identifier: GPL-2.0
+
+set -e
+
+shelldir=$(dirname "$0")
+
+# shellcheck source=lib/perf_has_symbol.sh
+. "${shelldir}"/lib/perf_has_symbol.sh
+
+testsym="noploop"
+
+skip_test_missing_symbol ${testsym}
+
+err=0
+perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX)
+testprog="perf test -w noploop"
+# disassembly format: "percent : offset: instruction (operands ...)"
+disasm_regex="[0-9]*\.[0-9]* *: *\w*: *\w*"
+
+cleanup() {
+ rm -rf "${perfdata}"
+ rm -rf "${perfdata}".old
+
+ trap - EXIT TERM INT
+}
+
+trap_cleanup() {
+ cleanup
+ exit 1
+}
+trap trap_cleanup EXIT TERM INT
+
+test_basic() {
+ echo "Basic perf annotate test"
+ if ! perf record -o "${perfdata}" ${testprog} 2> /dev/null
+ then
+ echo "Basic annotate [Failed: perf record]"
+ err=1
+ return
+ fi
+
+ # check if it has the target symbol
+ if ! perf annotate -i "${perfdata}" 2> /dev/null | grep "${testsym}"
+ then
+ echo "Basic annotate [Failed: missing target symbol]"
+ err=1
+ return
+ fi
+
+ # check if it has the disassembly lines
+ if ! perf annotate -i "${perfdata}" 2> /dev/null | grep "${disasm_regex}"
+ then
+ echo "Basic annotate [Failed: missing disasm output from default disassembler]"
+ err=1
+ return
+ fi
+
+ # check again with a target symbol name
+ if ! perf annotate -i "${perfdata}" "${testsym}" 2> /dev/null | \
+ grep -m 3 "${disasm_regex}"
+ then
+ echo "Basic annotate [Failed: missing disasm output when specifying the target symbol]"
+ err=1
+ return
+ fi
+
+ # check one more with external objdump tool (forced by --objdump option)
+ if ! perf annotate -i "${perfdata}" --objdump=objdump 2> /dev/null | \
+ grep -m 3 "${disasm_regex}"
+ then
+ echo "Basic annotate [Failed: missing disasm output from non default disassembler (using --objdump)]"
+ err=1
+ return
+ fi
+ echo "Basic annotate test [Success]"
+}
+
+test_basic
+
+cleanup
+exit $err
diff --git a/tools/perf/tests/shell/base_probe/test_adding_kernel.sh b/tools/perf/tests/shell/base_probe/test_adding_kernel.sh
index a5d707efad85cb..63bb8974b38e0b 100755
--- a/tools/perf/tests/shell/base_probe/test_adding_kernel.sh
+++ b/tools/perf/tests/shell/base_probe/test_adding_kernel.sh
@@ -1,4 +1,5 @@
#!/bin/bash
+# Add 'perf probe's, list and remove them
# SPDX-License-Identifier: GPL-2.0
#
diff --git a/tools/perf/tests/shell/lib/stat_output.sh b/tools/perf/tests/shell/lib/stat_output.sh
index c81d6a9f7983d3..9a176ceae4a3c3 100644
--- a/tools/perf/tests/shell/lib/stat_output.sh
+++ b/tools/perf/tests/shell/lib/stat_output.sh
@@ -79,7 +79,7 @@ check_per_thread()
echo "[Skip] paranoid and not root"
return
fi
- perf stat --per-thread -a $2 true
+ perf stat --per-thread -p $$ $2 true
commachecker --per-thread
echo "[Success]"
}
diff --git a/tools/perf/tests/shell/script.sh b/tools/perf/tests/shell/script.sh
index fa4d71e2e72a61..c1a60365366201 100755
--- a/tools/perf/tests/shell/script.sh
+++ b/tools/perf/tests/shell/script.sh
@@ -17,7 +17,7 @@ cleanup()
sane=$(echo "${temp_dir}" | cut -b 1-21)
if [ "${sane}" = "/tmp/perf-test-script" ] ; then
echo "--- Cleaning up ---"
- rm -f "${temp_dir}/"*
+ rm -rf "${temp_dir:?}/"*
rmdir "${temp_dir}"
fi
}
@@ -65,7 +65,31 @@ _end_of_file_
echo "DB test [Success]"
}
+test_parallel_perf()
+{
+ echo "parallel-perf test"
+ if ! python3 --version >/dev/null 2>&1 ; then
+ echo "SKIP: no python3"
+ err=2
+ return
+ fi
+ pp=$(dirname "$0")/../../scripts/python/parallel-perf.py
+ if [ ! -f "${pp}" ] ; then
+ echo "SKIP: parallel-perf.py script not found "
+ err=2
+ return
+ fi
+ perf_data="${temp_dir}/pp-perf.data"
+ output1_dir="${temp_dir}/output1"
+ output2_dir="${temp_dir}/output2"
+ perf record -o "${perf_data}" --sample-cpu uname
+ python3 "${pp}" -o "${output1_dir}" --jobs 4 --verbose -- perf script -i "${perf_data}"
+ python3 "${pp}" -o "${output2_dir}" --jobs 4 --verbose --per-cpu -- perf script -i "${perf_data}"
+ echo "parallel-perf test [Success]"
+}
+
test_db
+test_parallel_perf
cleanup
diff --git a/tools/perf/tests/shell/stat+json_output.sh b/tools/perf/tests/shell/stat+json_output.sh
index 2b9c6212dffc6f..6b630d33c32878 100755
--- a/tools/perf/tests/shell/stat+json_output.sh
+++ b/tools/perf/tests/shell/stat+json_output.sh
@@ -105,7 +105,7 @@ check_per_thread()
echo "[Skip] paranoia and not root"
return
fi
- perf stat -j --per-thread -a -o "${stat_output}" true
+ perf stat -j --per-thread -p $$ -o "${stat_output}" true
$PYTHON $pythonchecker --per-thread --file "${stat_output}"
echo "[Success]"
}
diff --git a/tools/perf/tests/shell/stat_bpf_counters.sh b/tools/perf/tests/shell/stat_bpf_counters.sh
index 2d920987477468..61f8149d854e12 100755
--- a/tools/perf/tests/shell/stat_bpf_counters.sh
+++ b/tools/perf/tests/shell/stat_bpf_counters.sh
@@ -4,21 +4,59 @@
set -e
+workload="perf bench sched messaging -g 1 -l 100 -t"
+
# check whether $2 is within +/- 20% of $1
compare_number()
{
- first_num=$1
- second_num=$2
-
- # upper bound is first_num * 120%
- upper=$(expr $first_num + $first_num / 5 )
- # lower bound is first_num * 80%
- lower=$(expr $first_num - $first_num / 5 )
-
- if [ $second_num -gt $upper ] || [ $second_num -lt $lower ]; then
- echo "The difference between $first_num and $second_num are greater than 20%."
- exit 1
- fi
+ first_num=$1
+ second_num=$2
+
+ # upper bound is first_num * 120%
+ upper=$(expr $first_num + $first_num / 5 )
+ # lower bound is first_num * 80%
+ lower=$(expr $first_num - $first_num / 5 )
+
+ if [ $second_num -gt $upper ] || [ $second_num -lt $lower ]; then
+ echo "The difference between $first_num and $second_num are greater than 20%."
+ exit 1
+ fi
+}
+
+check_counts()
+{
+ base_cycles=$1
+ bpf_cycles=$2
+
+ if [ "$base_cycles" = "<not" ]; then
+ echo "Skipping: cycles event not counted"
+ exit 2
+ fi
+ if [ "$bpf_cycles" = "<not" ]; then
+ echo "Failed: cycles not counted with --bpf-counters"
+ exit 1
+ fi
+}
+
+test_bpf_counters()
+{
+ printf "Testing --bpf-counters "
+ base_cycles=$(perf stat --no-big-num -e cycles -- $workload 2>&1 | awk '/cycles/ {print $1}')
+ bpf_cycles=$(perf stat --no-big-num --bpf-counters -e cycles -- $workload 2>&1 | awk '/cycles/ {print $1}')
+ check_counts $base_cycles $bpf_cycles
+ compare_number $base_cycles $bpf_cycles
+ echo "[Success]"
+}
+
+test_bpf_modifier()
+{
+ printf "Testing bpf event modifier "
+ stat_output=$(perf stat --no-big-num -e cycles/name=base_cycles/,cycles/name=bpf_cycles/b -- $workload 2>&1)
+ base_cycles=$(echo "$stat_output"| awk '/base_cycles/ {print $1}')
+ bpf_cycles=$(echo "$stat_output"| awk '/bpf_cycles/ {print $1}')
+ check_counts $base_cycles $bpf_cycles
+ compare_number $base_cycles $bpf_cycles
+ echo "[Success]"
}
# skip if --bpf-counters is not supported
@@ -30,16 +68,7 @@ if ! perf stat -e cycles --bpf-counters true > /dev/null 2>&1; then
exit 2
fi
-base_cycles=$(perf stat --no-big-num -e cycles -- perf bench sched messaging -g 1 -l 100 -t 2>&1 | awk '/cycles/ {print $1}')
-if [ "$base_cycles" = "<not" ]; then
- echo "Skipping: cycles event not counted"
- exit 2
-fi
-bpf_cycles=$(perf stat --no-big-num --bpf-counters -e cycles -- perf bench sched messaging -g 1 -l 100 -t 2>&1 | awk '/cycles/ {print $1}')
-if [ "$bpf_cycles" = "<not" ]; then
- echo "Failed: cycles not counted with --bpf-counters"
- exit 1
-fi
+test_bpf_counters
+test_bpf_modifier
-compare_number $base_cycles $bpf_cycles
exit 0
diff --git a/tools/perf/tests/shell/test_arm_callgraph_fp.sh b/tools/perf/tests/shell/test_arm_callgraph_fp.sh
index 83b53591b1eacc..61898e25661607 100755
--- a/tools/perf/tests/shell/test_arm_callgraph_fp.sh
+++ b/tools/perf/tests/shell/test_arm_callgraph_fp.sh
@@ -6,7 +6,9 @@ shelldir=$(dirname "$0")
# shellcheck source=lib/perf_has_symbol.sh
. "${shelldir}"/lib/perf_has_symbol.sh
-lscpu | grep -q "aarch64" || exit 2
+if [ "$(uname -m)" != "aarch64" ]; then
+ exit 2
+fi
if perf version --build-options | grep HAVE_DWARF_UNWIND_SUPPORT | grep -q OFF
then
diff --git a/tools/perf/tests/shell/test_arm_coresight.sh b/tools/perf/tests/shell/test_arm_coresight.sh
index 65dd852071250a..3302ea0b96723b 100755
--- a/tools/perf/tests/shell/test_arm_coresight.sh
+++ b/tools/perf/tests/shell/test_arm_coresight.sh
@@ -188,7 +188,7 @@ arm_cs_etm_snapshot_test() {
arm_cs_etm_basic_test() {
echo "Recording trace with '$*'"
- perf record -o ${perfdata} "$@" -- ls > /dev/null 2>&1
+ perf record -o ${perfdata} "$@" -m,8M -- ls > /dev/null 2>&1
perf_script_branch_samples ls &&
perf_report_branch_samples ls &&
diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c
index 2a842f53fbb575..a8cb5ba898ab3a 100644
--- a/tools/perf/tests/topology.c
+++ b/tools/perf/tests/topology.c
@@ -68,6 +68,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map)
};
int i;
struct aggr_cpu_id id;
+ struct perf_cpu cpu;
session = perf_session__new(&data, NULL);
TEST_ASSERT_VAL("can't get session", !IS_ERR(session));
@@ -113,8 +114,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map)
TEST_ASSERT_VAL("Session header CPU map not set", session->header.env.cpu);
for (i = 0; i < session->header.env.nr_cpus_avail; i++) {
- struct perf_cpu cpu = { .cpu = i };
-
+ cpu.cpu = i;
if (!perf_cpu_map__has(map, cpu))
continue;
pr_debug("CPU %d, core %d, socket %d\n", i,
@@ -123,48 +123,48 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map)
}
// Test that CPU ID contains socket, die, core and CPU
- for (i = 0; i < perf_cpu_map__nr(map); i++) {
- id = aggr_cpu_id__cpu(perf_cpu_map__cpu(map, i), NULL);
+ perf_cpu_map__for_each_cpu(cpu, i, map) {
+ id = aggr_cpu_id__cpu(cpu, NULL);
TEST_ASSERT_VAL("Cpu map - CPU ID doesn't match",
- perf_cpu_map__cpu(map, i).cpu == id.cpu.cpu);
+ cpu.cpu == id.cpu.cpu);
TEST_ASSERT_VAL("Cpu map - Core ID doesn't match",
- session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].core_id == id.core);
+ session->header.env.cpu[cpu.cpu].core_id == id.core);
TEST_ASSERT_VAL("Cpu map - Socket ID doesn't match",
- session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].socket_id ==
+ session->header.env.cpu[cpu.cpu].socket_id ==
id.socket);
TEST_ASSERT_VAL("Cpu map - Die ID doesn't match",
- session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].die_id == id.die);
+ session->header.env.cpu[cpu.cpu].die_id == id.die);
TEST_ASSERT_VAL("Cpu map - Node ID is set", id.node == -1);
TEST_ASSERT_VAL("Cpu map - Thread IDX is set", id.thread_idx == -1);
}
// Test that core ID contains socket, die and core
- for (i = 0; i < perf_cpu_map__nr(map); i++) {
- id = aggr_cpu_id__core(perf_cpu_map__cpu(map, i), NULL);
+ perf_cpu_map__for_each_cpu(cpu, i, map) {
+ id = aggr_cpu_id__core(cpu, NULL);
TEST_ASSERT_VAL("Core map - Core ID doesn't match",
- session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].core_id == id.core);
+ session->header.env.cpu[cpu.cpu].core_id == id.core);
TEST_ASSERT_VAL("Core map - Socket ID doesn't match",
- session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].socket_id ==
+ session->header.env.cpu[cpu.cpu].socket_id ==
id.socket);
TEST_ASSERT_VAL("Core map - Die ID doesn't match",
- session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].die_id == id.die);
+ session->header.env.cpu[cpu.cpu].die_id == id.die);
TEST_ASSERT_VAL("Core map - Node ID is set", id.node == -1);
TEST_ASSERT_VAL("Core map - Thread IDX is set", id.thread_idx == -1);
}
// Test that die ID contains socket and die
- for (i = 0; i < perf_cpu_map__nr(map); i++) {
- id = aggr_cpu_id__die(perf_cpu_map__cpu(map, i), NULL);
+ perf_cpu_map__for_each_cpu(cpu, i, map) {
+ id = aggr_cpu_id__die(cpu, NULL);
TEST_ASSERT_VAL("Die map - Socket ID doesn't match",
- session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].socket_id ==
+ session->header.env.cpu[cpu.cpu].socket_id ==
id.socket);
TEST_ASSERT_VAL("Die map - Die ID doesn't match",
- session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].die_id == id.die);
+ session->header.env.cpu[cpu.cpu].die_id == id.die);
TEST_ASSERT_VAL("Die map - Node ID is set", id.node == -1);
TEST_ASSERT_VAL("Die map - Core is set", id.core == -1);
@@ -173,10 +173,10 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map)
}
// Test that socket ID contains only socket
- for (i = 0; i < perf_cpu_map__nr(map); i++) {
- id = aggr_cpu_id__socket(perf_cpu_map__cpu(map, i), NULL);
+ perf_cpu_map__for_each_cpu(cpu, i, map) {
+ id = aggr_cpu_id__socket(cpu, NULL);
TEST_ASSERT_VAL("Socket map - Socket ID doesn't match",
- session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].socket_id ==
+ session->header.env.cpu[cpu.cpu].socket_id ==
id.socket);
TEST_ASSERT_VAL("Socket map - Node ID is set", id.node == -1);
@@ -187,10 +187,10 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map)
}
// Test that node ID contains only node
- for (i = 0; i < perf_cpu_map__nr(map); i++) {
- id = aggr_cpu_id__node(perf_cpu_map__cpu(map, i), NULL);
+ perf_cpu_map__for_each_cpu(cpu, i, map) {
+ id = aggr_cpu_id__node(cpu, NULL);
TEST_ASSERT_VAL("Node map - Node ID doesn't match",
- cpu__get_node(perf_cpu_map__cpu(map, i)) == id.node);
+ cpu__get_node(cpu) == id.node);
TEST_ASSERT_VAL("Node map - Socket is set", id.socket == -1);
TEST_ASSERT_VAL("Node map - Die ID is set", id.die == -1);
TEST_ASSERT_VAL("Node map - Core is set", id.core == -1);
diff --git a/tools/perf/tests/workloads/datasym.c b/tools/perf/tests/workloads/datasym.c
index ddd40bc63448ae..8e08fc75a973e5 100644
--- a/tools/perf/tests/workloads/datasym.c
+++ b/tools/perf/tests/workloads/datasym.c
@@ -16,6 +16,22 @@ static int datasym(int argc __maybe_unused, const char **argv __maybe_unused)
{
for (;;) {
buf1.data1++;
+ if (buf1.data1 == 123) {
+ /*
+ * Add some 'noise' in the loop to work around errata
+ * 1694299 on Arm N1.
+ *
+ * Bias exists in SPE sampling which can cause the load
+ * and store instructions to be skipped entirely. This
+ * comes and goes randomly depending on the offset the
+ * linker places the datasym loop at in the Perf binary.
+ * With an extra branch in the middle of the loop that
+ * isn't always taken, the instruction stream is no
+ * longer a continuous repeating pattern that interacts
+ * badly with the bias.
+ */
+ buf1.data1++;
+ }
buf1.data2 += buf1.data1;
}
return 0;
diff --git a/tools/perf/trace/beauty/Build b/tools/perf/trace/beauty/Build
index d11ce256f51140..cb3c1399ff4091 100644
--- a/tools/perf/trace/beauty/Build
+++ b/tools/perf/trace/beauty/Build
@@ -1,6 +1,7 @@
perf-y += clone.o
perf-y += fcntl.o
perf-y += flock.o
+perf-y += fs_at_flags.o
perf-y += fsmount.o
perf-y += fspick.o
ifeq ($(SRCARCH),$(filter $(SRCARCH),x86))
@@ -19,3 +20,17 @@ perf-y += statx.o
perf-y += sync_file_range.o
perf-y += timespec.o
perf-y += tracepoints/
+
+ifdef SHELLCHECK
+ SHELL_TESTS := $(wildcard trace/beauty/*.sh)
+ TEST_LOGS := $(SHELL_TESTS:trace/beauty/%=%.shellcheck_log)
+else
+ SHELL_TESTS :=
+ TEST_LOGS :=
+endif
+
+$(OUTPUT)%.shellcheck_log: %
+ $(call rule_mkdir)
+ $(Q)$(call echo-cmd,test)shellcheck -s bash -a -S warning "$<" > $@ || (cat $@ && rm $@ && false)
+
+perf-y += $(TEST_LOGS)
diff --git a/tools/arch/x86/include/asm/irq_vectors.h b/tools/perf/trace/beauty/arch/x86/include/asm/irq_vectors.h
index 3f73ac3ed3a070..d18bfb238f660f 100644
--- a/tools/arch/x86/include/asm/irq_vectors.h
+++ b/tools/perf/trace/beauty/arch/x86/include/asm/irq_vectors.h
@@ -84,11 +84,9 @@
#define HYPERVISOR_CALLBACK_VECTOR 0xf3
/* Vector for KVM to deliver posted interrupt IPI */
-#if IS_ENABLED(CONFIG_KVM)
#define POSTED_INTR_VECTOR 0xf2
#define POSTED_INTR_WAKEUP_VECTOR 0xf1
#define POSTED_INTR_NESTED_VECTOR 0xf0
-#endif
#define MANAGED_IRQ_SHUTDOWN_VECTOR 0xef
diff --git a/tools/arch/x86/include/uapi/asm/prctl.h b/tools/perf/trace/beauty/arch/x86/include/uapi/asm/prctl.h
index 384e2cc6ac190d..384e2cc6ac190d 100644
--- a/tools/arch/x86/include/uapi/asm/prctl.h
+++ b/tools/perf/trace/beauty/arch/x86/include/uapi/asm/prctl.h
diff --git a/tools/perf/trace/beauty/arch_errno_names.sh b/tools/perf/trace/beauty/arch_errno_names.sh
index 7df4bf5b55a3cc..30d3889b295771 100755
--- a/tools/perf/trace/beauty/arch_errno_names.sh
+++ b/tools/perf/trace/beauty/arch_errno_names.sh
@@ -60,10 +60,12 @@ create_arch_errno_table_func()
printf 'arch_syscalls__strerrno_t *arch_syscalls__strerrno_function(const char *arch)\n'
printf '{\n'
for arch in $archlist; do
- printf '\tif (!strcmp(arch, "%s"))\n' $(arch_string "$arch")
- printf '\t\treturn errno_to_name__%s;\n' $(arch_string "$arch")
+ arch_str=$(arch_string "$arch")
+ printf '\tif (!strcmp(arch, "%s"))\n' "$arch_str"
+ printf '\t\treturn errno_to_name__%s;\n' "$arch_str"
done
- printf '\treturn errno_to_name__%s;\n' $(arch_string "$default")
+ arch_str=$(arch_string "$default")
+ printf '\treturn errno_to_name__%s;\n' "$arch_str"
printf '}\n'
}
diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h
index 9feb794f5c6e15..78d10d92d351f8 100644
--- a/tools/perf/trace/beauty/beauty.h
+++ b/tools/perf/trace/beauty/beauty.h
@@ -234,8 +234,11 @@ size_t syscall_arg__scnprintf_socket_protocol(char *bf, size_t size, struct sysc
size_t syscall_arg__scnprintf_socket_level(char *bf, size_t size, struct syscall_arg *arg);
#define SCA_SK_LEVEL syscall_arg__scnprintf_socket_level
-size_t syscall_arg__scnprintf_statx_flags(char *bf, size_t size, struct syscall_arg *arg);
-#define SCA_STATX_FLAGS syscall_arg__scnprintf_statx_flags
+size_t syscall_arg__scnprintf_fs_at_flags(char *bf, size_t size, struct syscall_arg *arg);
+#define SCA_FS_AT_FLAGS syscall_arg__scnprintf_fs_at_flags
+
+size_t syscall_arg__scnprintf_faccessat2_flags(char *bf, size_t size, struct syscall_arg *arg);
+#define SCA_FACCESSAT2_FLAGS syscall_arg__scnprintf_faccessat2_flags
size_t syscall_arg__scnprintf_statx_mask(char *bf, size_t size, struct syscall_arg *arg);
#define SCA_STATX_MASK syscall_arg__scnprintf_statx_mask
diff --git a/tools/perf/trace/beauty/clone.c b/tools/perf/trace/beauty/clone.c
index f4db894e0af6d1..c9fa8f7e82b909 100644
--- a/tools/perf/trace/beauty/clone.c
+++ b/tools/perf/trace/beauty/clone.c
@@ -7,52 +7,16 @@
#include "trace/beauty/beauty.h"
#include <linux/kernel.h>
+#include <linux/log2.h>
#include <sys/types.h>
-#include <uapi/linux/sched.h>
+#include <sched.h>
static size_t clone__scnprintf_flags(unsigned long flags, char *bf, size_t size, bool show_prefix)
{
- const char *prefix = "CLONE_";
- int printed = 0;
+#include "trace/beauty/generated/clone_flags_array.c"
+ static DEFINE_STRARRAY(clone_flags, "CLONE_");
-#define P_FLAG(n) \
- if (flags & CLONE_##n) { \
- printed += scnprintf(bf + printed, size - printed, "%s%s%s", printed ? "|" : "", show_prefix ? prefix : "", #n); \
- flags &= ~CLONE_##n; \
- }
-
- P_FLAG(VM);
- P_FLAG(FS);
- P_FLAG(FILES);
- P_FLAG(SIGHAND);
- P_FLAG(PIDFD);
- P_FLAG(PTRACE);
- P_FLAG(VFORK);
- P_FLAG(PARENT);
- P_FLAG(THREAD);
- P_FLAG(NEWNS);
- P_FLAG(SYSVSEM);
- P_FLAG(SETTLS);
- P_FLAG(PARENT_SETTID);
- P_FLAG(CHILD_CLEARTID);
- P_FLAG(DETACHED);
- P_FLAG(UNTRACED);
- P_FLAG(CHILD_SETTID);
- P_FLAG(NEWCGROUP);
- P_FLAG(NEWUTS);
- P_FLAG(NEWIPC);
- P_FLAG(NEWUSER);
- P_FLAG(NEWPID);
- P_FLAG(NEWNET);
- P_FLAG(IO);
- P_FLAG(CLEAR_SIGHAND);
- P_FLAG(INTO_CGROUP);
-#undef P_FLAG
-
- if (flags)
- printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
-
- return printed;
+ return strarray__scnprintf_flags(&strarray__clone_flags, bf, size, show_prefix, flags);
}
size_t syscall_arg__scnprintf_clone_flags(char *bf, size_t size, struct syscall_arg *arg)
diff --git a/tools/perf/trace/beauty/clone.sh b/tools/perf/trace/beauty/clone.sh
new file mode 100755
index 00000000000000..18b6c0d7569372
--- /dev/null
+++ b/tools/perf/trace/beauty/clone.sh
@@ -0,0 +1,17 @@
+#!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
+
+if [ $# -ne 1 ] ; then
+ beauty_uapi_linux_dir=tools/perf/trace/beauty/include/uapi/linux/
+else
+ beauty_uapi_linux_dir=$1
+fi
+
+linux_sched=${beauty_uapi_linux_dir}/sched.h
+
+printf "static const char *clone_flags[] = {\n"
+regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+CLONE_([^_]+[[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*'
+grep -E $regex ${linux_sched} | \
+ sed -r "s/$regex/\2 \1/g" | \
+ xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n"
+printf "};\n"
diff --git a/tools/perf/trace/beauty/fcntl.c b/tools/perf/trace/beauty/fcntl.c
index 56ef83b3d130b5..d075904dcccedc 100644
--- a/tools/perf/trace/beauty/fcntl.c
+++ b/tools/perf/trace/beauty/fcntl.c
@@ -7,7 +7,7 @@
#include "trace/beauty/beauty.h"
#include <linux/kernel.h>
-#include <uapi/linux/fcntl.h>
+#include <linux/fcntl.h>
static size_t fcntl__scnprintf_getfd(unsigned long val, char *bf, size_t size, bool show_prefix)
{
diff --git a/tools/perf/trace/beauty/flock.c b/tools/perf/trace/beauty/flock.c
index c14274edd6d9de..a6514a6f07cfa0 100644
--- a/tools/perf/trace/beauty/flock.c
+++ b/tools/perf/trace/beauty/flock.c
@@ -2,7 +2,7 @@
#include "trace/beauty/beauty.h"
#include <linux/kernel.h>
-#include <uapi/linux/fcntl.h>
+#include <linux/fcntl.h>
#ifndef LOCK_MAND
#define LOCK_MAND 32
diff --git a/tools/perf/trace/beauty/fs_at_flags.c b/tools/perf/trace/beauty/fs_at_flags.c
new file mode 100644
index 00000000000000..c200669cb944a4
--- /dev/null
+++ b/tools/perf/trace/beauty/fs_at_flags.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * trace/beauty/fs_at_flags.c
+ *
+ * Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ */
+
+#include "trace/beauty/beauty.h"
+#include <sys/types.h>
+#include <linux/fcntl.h>
+#include <linux/log2.h>
+
+/*
+ * uapi/linux/fcntl.h does not keep a copy in tools headers directory,
+ * for system with kernel versions before v5.8, need to sync AT_EACCESS macro.
+ */
+#ifndef AT_EACCESS
+#define AT_EACCESS 0x200
+#endif
+
+#include "trace/beauty/generated/fs_at_flags_array.c"
+static DEFINE_STRARRAY(fs_at_flags, "AT_");
+
+static size_t fs_at__scnprintf_flags(unsigned long flags, char *bf, size_t size, bool show_prefix)
+{
+ return strarray__scnprintf_flags(&strarray__fs_at_flags, bf, size, show_prefix, flags);
+}
+
+size_t syscall_arg__scnprintf_fs_at_flags(char *bf, size_t size, struct syscall_arg *arg)
+{
+ bool show_prefix = arg->show_string_prefix;
+ int flags = arg->val;
+
+ return fs_at__scnprintf_flags(flags, bf, size, show_prefix);
+}
+
+static size_t faccessat2__scnprintf_flags(unsigned long flags, char *bf, size_t size, bool show_prefix)
+{
+ int printed = 0;
+
+ // AT_EACCESS is the same as AT_REMOVEDIR, that is in fs_at_flags_array,
+ // special case it here.
+ if (flags & AT_EACCESS) {
+ flags &= ~AT_EACCESS;
+ printed += scnprintf(bf + printed, size - printed, "%sEACCESS%s",
+ show_prefix ? strarray__fs_at_flags.prefix : "", flags ? "|" : "");
+ }
+
+ return strarray__scnprintf_flags(&strarray__fs_at_flags, bf + printed, size - printed, show_prefix, flags);
+}
+
+size_t syscall_arg__scnprintf_faccessat2_flags(char *bf, size_t size, struct syscall_arg *arg)
+{
+ bool show_prefix = arg->show_string_prefix;
+ int flags = arg->val;
+
+ return faccessat2__scnprintf_flags(flags, bf, size, show_prefix);
+}
diff --git a/tools/perf/trace/beauty/fs_at_flags.sh b/tools/perf/trace/beauty/fs_at_flags.sh
new file mode 100755
index 00000000000000..456f59addf7410
--- /dev/null
+++ b/tools/perf/trace/beauty/fs_at_flags.sh
@@ -0,0 +1,21 @@
+#!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
+
+if [ $# -ne 1 ] ; then
+ beauty_uapi_linux_dir=tools/perf/trace/beauty/include/uapi/linux/
+else
+ beauty_uapi_linux_dir=$1
+fi
+
+linux_fcntl=${beauty_uapi_linux_dir}/fcntl.h
+
+printf "static const char *fs_at_flags[] = {\n"
+regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+AT_([^_]+[[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*'
+# AT_EACCESS is only meaningful to faccessat, so we will special case it there...
+# AT_STATX_SYNC_TYPE is not a bit, its a mask of AT_STATX_SYNC_AS_STAT, AT_STATX_FORCE_SYNC and AT_STATX_DONT_SYNC
+grep -E $regex ${linux_fcntl} | \
+ grep -v AT_EACCESS | \
+ grep -v AT_STATX_SYNC_TYPE | \
+ sed -r "s/$regex/\2 \1/g" | \
+ xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n"
+printf "};\n"
diff --git a/tools/perf/trace/beauty/fsconfig.sh b/tools/perf/trace/beauty/fsconfig.sh
index bc6ef7bb7a5f93..09cee79de00ca3 100755
--- a/tools/perf/trace/beauty/fsconfig.sh
+++ b/tools/perf/trace/beauty/fsconfig.sh
@@ -2,12 +2,12 @@
# SPDX-License-Identifier: LGPL-2.1
if [ $# -ne 1 ] ; then
- linux_header_dir=tools/include/uapi/linux
+ beauty_uapi_linux_dir=tools/perf/trace/beauty/include/uapi/linux/
else
- linux_header_dir=$1
+ beauty_uapi_linux_dir=$1
fi
-linux_mount=${linux_header_dir}/mount.h
+linux_mount=${beauty_uapi_linux_dir}/mount.h
printf "static const char *fsconfig_cmds[] = {\n"
ms='[[:space:]]*'
diff --git a/tools/perf/trace/beauty/fsmount.c b/tools/perf/trace/beauty/fsmount.c
index 30c8c082a3c3b3..28c2c16fc1a80d 100644
--- a/tools/perf/trace/beauty/fsmount.c
+++ b/tools/perf/trace/beauty/fsmount.c
@@ -7,7 +7,14 @@
#include "trace/beauty/beauty.h"
#include <linux/log2.h>
-#include <uapi/linux/mount.h>
+#include <sys/mount.h>
+
+#ifndef MOUNT_ATTR__ATIME
+#define MOUNT_ATTR__ATIME 0x00000070 /* Setting on how atime should be updated */
+#endif
+#ifndef MOUNT_ATTR_RELATIME
+#define MOUNT_ATTR_RELATIME 0x00000000 /* - Update atime relative to mtime/ctime. */
+#endif
static size_t fsmount__scnprintf_attr_flags(unsigned long flags, char *bf, size_t size, bool show_prefix)
{
diff --git a/tools/perf/trace/beauty/fsmount.sh b/tools/perf/trace/beauty/fsmount.sh
index cba8897a751fd4..6b67a54cdeee64 100755
--- a/tools/perf/trace/beauty/fsmount.sh
+++ b/tools/perf/trace/beauty/fsmount.sh
@@ -2,12 +2,12 @@
# SPDX-License-Identifier: LGPL-2.1
if [ $# -ne 1 ] ; then
- linux_header_dir=tools/include/uapi/linux
+ beauty_uapi_linux_dir=tools/perf/trace/beauty/include/uapi/linux/
else
- linux_header_dir=$1
+ beauty_uapi_linux_dir=$1
fi
-linux_mount=${linux_header_dir}/mount.h
+linux_mount=${beauty_uapi_linux_dir}/mount.h
# Remove MOUNT_ATTR_RELATIME as it is zeros, handle it a special way in the beautifier
# Only handle MOUNT_ATTR_ followed by a capital letter/num as __ is special case
diff --git a/tools/perf/trace/beauty/fspick.sh b/tools/perf/trace/beauty/fspick.sh
index 1f088329b96ef0..0d9951c22b952e 100755
--- a/tools/perf/trace/beauty/fspick.sh
+++ b/tools/perf/trace/beauty/fspick.sh
@@ -2,12 +2,12 @@
# SPDX-License-Identifier: LGPL-2.1
if [ $# -ne 1 ] ; then
- linux_header_dir=tools/include/uapi/linux
+ beauty_uapi_linux_dir=tools/perf/trace/beauty/include/uapi/linux/
else
- linux_header_dir=$1
+ beauty_uapi_linux_dir=$1
fi
-linux_mount=${linux_header_dir}/mount.h
+linux_mount=${beauty_uapi_linux_dir}/mount.h
printf "static const char *fspick_flags[] = {\n"
regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+FSPICK_([[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*'
diff --git a/tools/include/uapi/linux/fcntl.h b/tools/perf/trace/beauty/include/uapi/linux/fcntl.h
index 282e90aeb163c0..282e90aeb163c0 100644
--- a/tools/include/uapi/linux/fcntl.h
+++ b/tools/perf/trace/beauty/include/uapi/linux/fcntl.h
diff --git a/tools/include/uapi/linux/fs.h b/tools/perf/trace/beauty/include/uapi/linux/fs.h
index 48ad69f7722e1a..45e4e64fd6643c 100644
--- a/tools/include/uapi/linux/fs.h
+++ b/tools/perf/trace/beauty/include/uapi/linux/fs.h
@@ -64,6 +64,24 @@ struct fstrim_range {
__u64 minlen;
};
+/*
+ * We include a length field because some filesystems (vfat) have an identifier
+ * that we do want to expose as a UUID, but doesn't have the standard length.
+ *
+ * We use a fixed size buffer beacuse this interface will, by fiat, never
+ * support "UUIDs" longer than 16 bytes; we don't want to force all downstream
+ * users to have to deal with that.
+ */
+struct fsuuid2 {
+ __u8 len;
+ __u8 uuid[16];
+};
+
+struct fs_sysfs_path {
+ __u8 len;
+ __u8 name[128];
+};
+
/* extent-same (dedupe) ioctls; these MUST match the btrfs ioctl definitions */
#define FILE_DEDUPE_RANGE_SAME 0
#define FILE_DEDUPE_RANGE_DIFFERS 1
@@ -215,6 +233,13 @@ struct fsxattr {
#define FS_IOC_FSSETXATTR _IOW('X', 32, struct fsxattr)
#define FS_IOC_GETFSLABEL _IOR(0x94, 49, char[FSLABEL_MAX])
#define FS_IOC_SETFSLABEL _IOW(0x94, 50, char[FSLABEL_MAX])
+/* Returns the external filesystem UUID, the same one blkid returns */
+#define FS_IOC_GETFSUUID _IOR(0x15, 0, struct fsuuid2)
+/*
+ * Returns the path component under /sys/fs/ that refers to this filesystem;
+ * also /sys/kernel/debug/ for filesystems with debugfs exports
+ */
+#define FS_IOC_GETFSSYSFSPATH _IOR(0x15, 1, struct fs_sysfs_path)
/*
* Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS)
@@ -301,9 +326,12 @@ typedef int __bitwise __kernel_rwf_t;
/* per-IO O_APPEND */
#define RWF_APPEND ((__force __kernel_rwf_t)0x00000010)
+/* per-IO negation of O_APPEND */
+#define RWF_NOAPPEND ((__force __kernel_rwf_t)0x00000020)
+
/* mask of flags supported by the kernel */
#define RWF_SUPPORTED (RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT |\
- RWF_APPEND)
+ RWF_APPEND | RWF_NOAPPEND)
/* Pagemap ioctl */
#define PAGEMAP_SCAN _IOWR('f', 16, struct pm_scan_arg)
diff --git a/tools/include/uapi/linux/mount.h b/tools/perf/trace/beauty/include/uapi/linux/mount.h
index ad5478dbad0073..ad5478dbad0073 100644
--- a/tools/include/uapi/linux/mount.h
+++ b/tools/perf/trace/beauty/include/uapi/linux/mount.h
diff --git a/tools/include/uapi/linux/prctl.h b/tools/perf/trace/beauty/include/uapi/linux/prctl.h
index 370ed14b1ae092..370ed14b1ae092 100644
--- a/tools/include/uapi/linux/prctl.h
+++ b/tools/perf/trace/beauty/include/uapi/linux/prctl.h
diff --git a/tools/include/uapi/linux/sched.h b/tools/perf/trace/beauty/include/uapi/linux/sched.h
index 3bac0a8ceab26e..3bac0a8ceab26e 100644
--- a/tools/include/uapi/linux/sched.h
+++ b/tools/perf/trace/beauty/include/uapi/linux/sched.h
diff --git a/tools/perf/trace/beauty/include/uapi/linux/stat.h b/tools/perf/trace/beauty/include/uapi/linux/stat.h
new file mode 100644
index 00000000000000..2f2ee82d55175d
--- /dev/null
+++ b/tools/perf/trace/beauty/include/uapi/linux/stat.h
@@ -0,0 +1,195 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_LINUX_STAT_H
+#define _UAPI_LINUX_STAT_H
+
+#include <linux/types.h>
+
+#if defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2)
+
+#define S_IFMT 00170000
+#define S_IFSOCK 0140000
+#define S_IFLNK 0120000
+#define S_IFREG 0100000
+#define S_IFBLK 0060000
+#define S_IFDIR 0040000
+#define S_IFCHR 0020000
+#define S_IFIFO 0010000
+#define S_ISUID 0004000
+#define S_ISGID 0002000
+#define S_ISVTX 0001000
+
+#define S_ISLNK(m) (((m) & S_IFMT) == S_IFLNK)
+#define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
+#define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR)
+#define S_ISCHR(m) (((m) & S_IFMT) == S_IFCHR)
+#define S_ISBLK(m) (((m) & S_IFMT) == S_IFBLK)
+#define S_ISFIFO(m) (((m) & S_IFMT) == S_IFIFO)
+#define S_ISSOCK(m) (((m) & S_IFMT) == S_IFSOCK)
+
+#define S_IRWXU 00700
+#define S_IRUSR 00400
+#define S_IWUSR 00200
+#define S_IXUSR 00100
+
+#define S_IRWXG 00070
+#define S_IRGRP 00040
+#define S_IWGRP 00020
+#define S_IXGRP 00010
+
+#define S_IRWXO 00007
+#define S_IROTH 00004
+#define S_IWOTH 00002
+#define S_IXOTH 00001
+
+#endif
+
+/*
+ * Timestamp structure for the timestamps in struct statx.
+ *
+ * tv_sec holds the number of seconds before (negative) or after (positive)
+ * 00:00:00 1st January 1970 UTC.
+ *
+ * tv_nsec holds a number of nanoseconds (0..999,999,999) after the tv_sec time.
+ *
+ * __reserved is held in case we need a yet finer resolution.
+ */
+struct statx_timestamp {
+ __s64 tv_sec;
+ __u32 tv_nsec;
+ __s32 __reserved;
+};
+
+/*
+ * Structures for the extended file attribute retrieval system call
+ * (statx()).
+ *
+ * The caller passes a mask of what they're specifically interested in as a
+ * parameter to statx(). What statx() actually got will be indicated in
+ * st_mask upon return.
+ *
+ * For each bit in the mask argument:
+ *
+ * - if the datum is not supported:
+ *
+ * - the bit will be cleared, and
+ *
+ * - the datum will be set to an appropriate fabricated value if one is
+ * available (eg. CIFS can take a default uid and gid), otherwise
+ *
+ * - the field will be cleared;
+ *
+ * - otherwise, if explicitly requested:
+ *
+ * - the datum will be synchronised to the server if AT_STATX_FORCE_SYNC is
+ * set or if the datum is considered out of date, and
+ *
+ * - the field will be filled in and the bit will be set;
+ *
+ * - otherwise, if not requested, but available in approximate form without any
+ * effort, it will be filled in anyway, and the bit will be set upon return
+ * (it might not be up to date, however, and no attempt will be made to
+ * synchronise the internal state first);
+ *
+ * - otherwise the field and the bit will be cleared before returning.
+ *
+ * Items in STATX_BASIC_STATS may be marked unavailable on return, but they
+ * will have values installed for compatibility purposes so that stat() and
+ * co. can be emulated in userspace.
+ */
+struct statx {
+ /* 0x00 */
+ __u32 stx_mask; /* What results were written [uncond] */
+ __u32 stx_blksize; /* Preferred general I/O size [uncond] */
+ __u64 stx_attributes; /* Flags conveying information about the file [uncond] */
+ /* 0x10 */
+ __u32 stx_nlink; /* Number of hard links */
+ __u32 stx_uid; /* User ID of owner */
+ __u32 stx_gid; /* Group ID of owner */
+ __u16 stx_mode; /* File mode */
+ __u16 __spare0[1];
+ /* 0x20 */
+ __u64 stx_ino; /* Inode number */
+ __u64 stx_size; /* File size */
+ __u64 stx_blocks; /* Number of 512-byte blocks allocated */
+ __u64 stx_attributes_mask; /* Mask to show what's supported in stx_attributes */
+ /* 0x40 */
+ struct statx_timestamp stx_atime; /* Last access time */
+ struct statx_timestamp stx_btime; /* File creation time */
+ struct statx_timestamp stx_ctime; /* Last attribute change time */
+ struct statx_timestamp stx_mtime; /* Last data modification time */
+ /* 0x80 */
+ __u32 stx_rdev_major; /* Device ID of special file [if bdev/cdev] */
+ __u32 stx_rdev_minor;
+ __u32 stx_dev_major; /* ID of device containing file [uncond] */
+ __u32 stx_dev_minor;
+ /* 0x90 */
+ __u64 stx_mnt_id;
+ __u32 stx_dio_mem_align; /* Memory buffer alignment for direct I/O */
+ __u32 stx_dio_offset_align; /* File offset alignment for direct I/O */
+ /* 0xa0 */
+ __u64 __spare3[12]; /* Spare space for future expansion */
+ /* 0x100 */
+};
+
+/*
+ * Flags to be stx_mask
+ *
+ * Query request/result mask for statx() and struct statx::stx_mask.
+ *
+ * These bits should be set in the mask argument of statx() to request
+ * particular items when calling statx().
+ */
+#define STATX_TYPE 0x00000001U /* Want/got stx_mode & S_IFMT */
+#define STATX_MODE 0x00000002U /* Want/got stx_mode & ~S_IFMT */
+#define STATX_NLINK 0x00000004U /* Want/got stx_nlink */
+#define STATX_UID 0x00000008U /* Want/got stx_uid */
+#define STATX_GID 0x00000010U /* Want/got stx_gid */
+#define STATX_ATIME 0x00000020U /* Want/got stx_atime */
+#define STATX_MTIME 0x00000040U /* Want/got stx_mtime */
+#define STATX_CTIME 0x00000080U /* Want/got stx_ctime */
+#define STATX_INO 0x00000100U /* Want/got stx_ino */
+#define STATX_SIZE 0x00000200U /* Want/got stx_size */
+#define STATX_BLOCKS 0x00000400U /* Want/got stx_blocks */
+#define STATX_BASIC_STATS 0x000007ffU /* The stuff in the normal stat struct */
+#define STATX_BTIME 0x00000800U /* Want/got stx_btime */
+#define STATX_MNT_ID 0x00001000U /* Got stx_mnt_id */
+#define STATX_DIOALIGN 0x00002000U /* Want/got direct I/O alignment info */
+#define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */
+
+#define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */
+
+#ifndef __KERNEL__
+/*
+ * This is deprecated, and shall remain the same value in the future. To avoid
+ * confusion please use the equivalent (STATX_BASIC_STATS | STATX_BTIME)
+ * instead.
+ */
+#define STATX_ALL 0x00000fffU
+#endif
+
+/*
+ * Attributes to be found in stx_attributes and masked in stx_attributes_mask.
+ *
+ * These give information about the features or the state of a file that might
+ * be of use to ordinary userspace programs such as GUIs or ls rather than
+ * specialised tools.
+ *
+ * Note that the flags marked [I] correspond to the FS_IOC_SETFLAGS flags
+ * semantically. Where possible, the numerical value is picked to correspond
+ * also. Note that the DAX attribute indicates that the file is in the CPU
+ * direct access state. It does not correspond to the per-inode flag that
+ * some filesystems support.
+ *
+ */
+#define STATX_ATTR_COMPRESSED 0x00000004 /* [I] File is compressed by the fs */
+#define STATX_ATTR_IMMUTABLE 0x00000010 /* [I] File is marked immutable */
+#define STATX_ATTR_APPEND 0x00000020 /* [I] File is append-only */
+#define STATX_ATTR_NODUMP 0x00000040 /* [I] File is not to be dumped */
+#define STATX_ATTR_ENCRYPTED 0x00000800 /* [I] File requires key to decrypt in fs */
+#define STATX_ATTR_AUTOMOUNT 0x00001000 /* Dir: Automount trigger */
+#define STATX_ATTR_MOUNT_ROOT 0x00002000 /* Root of a mount */
+#define STATX_ATTR_VERITY 0x00100000 /* [I] Verity protected file */
+#define STATX_ATTR_DAX 0x00200000 /* File is currently in DAX state */
+
+
+#endif /* _UAPI_LINUX_STAT_H */
diff --git a/tools/include/uapi/linux/usbdevice_fs.h b/tools/perf/trace/beauty/include/uapi/linux/usbdevice_fs.h
index 74a84e02422ab8..74a84e02422ab8 100644
--- a/tools/include/uapi/linux/usbdevice_fs.h
+++ b/tools/perf/trace/beauty/include/uapi/linux/usbdevice_fs.h
diff --git a/tools/include/uapi/linux/vhost.h b/tools/perf/trace/beauty/include/uapi/linux/vhost.h
index 649560c685f13b..b95dd84eef2db2 100644
--- a/tools/include/uapi/linux/vhost.h
+++ b/tools/perf/trace/beauty/include/uapi/linux/vhost.h
@@ -179,12 +179,6 @@
/* Get the config size */
#define VHOST_VDPA_GET_CONFIG_SIZE _IOR(VHOST_VIRTIO, 0x79, __u32)
-/* Get the count of all virtqueues */
-#define VHOST_VDPA_GET_VQS_COUNT _IOR(VHOST_VIRTIO, 0x80, __u32)
-
-/* Get the number of virtqueue groups. */
-#define VHOST_VDPA_GET_GROUP_NUM _IOR(VHOST_VIRTIO, 0x81, __u32)
-
/* Get the number of address spaces. */
#define VHOST_VDPA_GET_AS_NUM _IOR(VHOST_VIRTIO, 0x7A, unsigned int)
@@ -227,4 +221,18 @@
*/
#define VHOST_VDPA_GET_VRING_DESC_GROUP _IOWR(VHOST_VIRTIO, 0x7F, \
struct vhost_vring_state)
+
+
+/* Get the count of all virtqueues */
+#define VHOST_VDPA_GET_VQS_COUNT _IOR(VHOST_VIRTIO, 0x80, __u32)
+
+/* Get the number of virtqueue groups. */
+#define VHOST_VDPA_GET_GROUP_NUM _IOR(VHOST_VIRTIO, 0x81, __u32)
+
+/* Get the queue size of a specific virtqueue.
+ * userspace set the vring index in vhost_vring_state.index
+ * kernel set the queue size in vhost_vring_state.num
+ */
+#define VHOST_VDPA_GET_VRING_SIZE _IOWR(VHOST_VIRTIO, 0x82, \
+ struct vhost_vring_state)
#endif
diff --git a/tools/include/uapi/sound/asound.h b/tools/perf/trace/beauty/include/uapi/sound/asound.h
index d5b9cfbd9ceac6..628d46a0da92eb 100644
--- a/tools/include/uapi/sound/asound.h
+++ b/tools/perf/trace/beauty/include/uapi/sound/asound.h
@@ -142,7 +142,7 @@ struct snd_hwdep_dsp_image {
* *
*****************************************************************************/
-#define SNDRV_PCM_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 16)
+#define SNDRV_PCM_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 17)
typedef unsigned long snd_pcm_uframes_t;
typedef signed long snd_pcm_sframes_t;
@@ -416,7 +416,7 @@ struct snd_pcm_hw_params {
unsigned int rmask; /* W: requested masks */
unsigned int cmask; /* R: changed masks */
unsigned int info; /* R: Info flags for returned setup */
- unsigned int msbits; /* R: used most significant bits */
+ unsigned int msbits; /* R: used most significant bits (in sample bit-width) */
unsigned int rate_num; /* R: rate numerator */
unsigned int rate_den; /* R: rate denominator */
snd_pcm_uframes_t fifo_size; /* R: chip FIFO size in frames */
diff --git a/tools/perf/trace/beauty/mount_flags.sh b/tools/perf/trace/beauty/mount_flags.sh
index 730099a9a67c1e..ff578f7b451b5a 100755
--- a/tools/perf/trace/beauty/mount_flags.sh
+++ b/tools/perf/trace/beauty/mount_flags.sh
@@ -1,15 +1,15 @@
#!/bin/sh
# SPDX-License-Identifier: LGPL-2.1
-[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/
+[ $# -eq 1 ] && beauty_uapi_linux_dir=$1 || beauty_uapi_linux_dir=tools/perf/trace/beauty/include/uapi/linux/
printf "static const char *mount_flags[] = {\n"
regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MS_([[:alnum:]_]+)[[:space:]]+([[:digit:]]+)[[:space:]]*.*'
-grep -E $regex ${header_dir}/mount.h | grep -E -v '(MSK|VERBOSE|MGC_VAL)\>' | \
+grep -E $regex ${beauty_uapi_linux_dir}/mount.h | grep -E -v '(MSK|VERBOSE|MGC_VAL)\>' | \
sed -r "s/$regex/\2 \2 \1/g" | sort -n | \
xargs printf "\t[%s ? (ilog2(%s) + 1) : 0] = \"%s\",\n"
regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MS_([[:alnum:]_]+)[[:space:]]+\(1<<([[:digit:]]+)\)[[:space:]]*.*'
-grep -E $regex ${header_dir}/mount.h | \
+grep -E $regex ${beauty_uapi_linux_dir}/mount.h | \
sed -r "s/$regex/\2 \1/g" | \
xargs printf "\t[%s + 1] = \"%s\",\n"
printf "};\n"
diff --git a/tools/perf/trace/beauty/move_mount_flags.sh b/tools/perf/trace/beauty/move_mount_flags.sh
index ce5e632d14484b..c0dde9020bc38c 100755
--- a/tools/perf/trace/beauty/move_mount_flags.sh
+++ b/tools/perf/trace/beauty/move_mount_flags.sh
@@ -2,12 +2,12 @@
# SPDX-License-Identifier: LGPL-2.1
if [ $# -ne 1 ] ; then
- linux_header_dir=tools/include/uapi/linux
+ beauty_uapi_linux_dir=tools/perf/trace/beauty/include/uapi/linux/
else
- linux_header_dir=$1
+ beauty_uapi_linux_dir=$1
fi
-linux_mount=${linux_header_dir}/mount.h
+linux_mount=${beauty_uapi_linux_dir}/mount.h
printf "static const char *move_mount_flags[] = {\n"
regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MOVE_MOUNT_([^_]+[[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*'
diff --git a/tools/perf/trace/beauty/prctl.c b/tools/perf/trace/beauty/prctl.c
index 6fe5ad5f5d3a4e..7d1aa9fd03da52 100644
--- a/tools/perf/trace/beauty/prctl.c
+++ b/tools/perf/trace/beauty/prctl.c
@@ -7,7 +7,7 @@
#include "trace/beauty/beauty.h"
#include <linux/kernel.h>
-#include <uapi/linux/prctl.h>
+#include <linux/prctl.h>
#include "trace/beauty/generated/prctl_option_array.c"
diff --git a/tools/perf/trace/beauty/prctl_option.sh b/tools/perf/trace/beauty/prctl_option.sh
index 9455d9672f140d..e049f5e9c01116 100755
--- a/tools/perf/trace/beauty/prctl_option.sh
+++ b/tools/perf/trace/beauty/prctl_option.sh
@@ -1,18 +1,18 @@
#!/bin/sh
# SPDX-License-Identifier: LGPL-2.1
-[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/
+[ $# -eq 1 ] && beauty_uapi_linux_dir=$1 || beauty_uapi_linux_dir=tools/perf/trace/beauty/include/uapi/linux/
printf "static const char *prctl_options[] = {\n"
regex='^#define[[:space:]]{1}PR_(\w+)[[:space:]]*([[:xdigit:]]+)([[:space:]]*/.*)?$'
-grep -E $regex ${header_dir}/prctl.h | grep -v PR_SET_PTRACER | \
+grep -E $regex ${beauty_uapi_linux_dir}/prctl.h | grep -v PR_SET_PTRACER | \
sed -E "s%$regex%\2 \1%g" | \
sort -n | xargs printf "\t[%s] = \"%s\",\n"
printf "};\n"
printf "static const char *prctl_set_mm_options[] = {\n"
regex='^#[[:space:]]+define[[:space:]]+PR_SET_MM_(\w+)[[:space:]]*([[:digit:]]+).*'
-grep -E $regex ${header_dir}/prctl.h | \
+grep -E $regex ${beauty_uapi_linux_dir}/prctl.h | \
sed -r "s/$regex/\2 \1/g" | \
sort -n | xargs printf "\t[%s] = \"%s\",\n"
printf "};\n"
diff --git a/tools/perf/trace/beauty/rename_flags.sh b/tools/perf/trace/beauty/rename_flags.sh
index 94bf7f45d28e6b..702411dd7a1c2f 100755
--- a/tools/perf/trace/beauty/rename_flags.sh
+++ b/tools/perf/trace/beauty/rename_flags.sh
@@ -2,7 +2,7 @@
# Copyright (C) 2018, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
# SPDX-License-Identifier: LGPL-2.1
-[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/
+[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/perf/trace/beauty/include/uapi/linux/
fs_header=${header_dir}/fs.h
diff --git a/tools/perf/trace/beauty/sndrv_ctl_ioctl.sh b/tools/perf/trace/beauty/sndrv_ctl_ioctl.sh
index e0803b95759324..572939a1288455 100755
--- a/tools/perf/trace/beauty/sndrv_ctl_ioctl.sh
+++ b/tools/perf/trace/beauty/sndrv_ctl_ioctl.sh
@@ -1,9 +1,9 @@
#!/bin/sh
# SPDX-License-Identifier: LGPL-2.1
-[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/sound/
+[ $# -eq 1 ] && beauty_uapi_sound_dir=$1 || beauty_uapi_sound_dir=tools/perf/trace/beauty/include/uapi/sound/
printf "static const char *sndrv_ctl_ioctl_cmds[] = {\n"
-grep "^#define[\t ]\+SNDRV_CTL_IOCTL_" $header_dir/asound.h | \
+grep "^#define[\t ]\+SNDRV_CTL_IOCTL_" $beauty_uapi_sound_dir/asound.h | \
sed -r 's/^#define +SNDRV_CTL_IOCTL_([A-Z0-9_]+)[\t ]+_IO[RW]*\( *.U., *(0x[[:xdigit:]]+),?.*/\t[\2] = \"\1\",/g'
printf "};\n"
diff --git a/tools/perf/trace/beauty/sndrv_pcm_ioctl.sh b/tools/perf/trace/beauty/sndrv_pcm_ioctl.sh
index 7a464a7bf91399..33afae9a1c07ca 100755
--- a/tools/perf/trace/beauty/sndrv_pcm_ioctl.sh
+++ b/tools/perf/trace/beauty/sndrv_pcm_ioctl.sh
@@ -1,9 +1,9 @@
#!/bin/sh
# SPDX-License-Identifier: LGPL-2.1
-[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/sound/
+[ $# -eq 1 ] && beauty_uapi_sound_dir=$1 || beauty_uapi_sound_dir=tools/perf/trace/beauty/include/uapi/sound/
printf "static const char *sndrv_pcm_ioctl_cmds[] = {\n"
-grep "^#define[\t ]\+SNDRV_PCM_IOCTL_" $header_dir/asound.h | \
+grep "^#define[\t ]\+SNDRV_PCM_IOCTL_" $beauty_uapi_sound_dir/asound.h | \
sed -r 's/^#define +SNDRV_PCM_IOCTL_([A-Z0-9_]+)[\t ]+_IO[RW]*\( *.A., *(0x[[:xdigit:]]+),?.*/\t[\2] = \"\1\",/g'
printf "};\n"
diff --git a/tools/perf/trace/beauty/statx.c b/tools/perf/trace/beauty/statx.c
index dc5943a6352d91..24843e614b935f 100644
--- a/tools/perf/trace/beauty/statx.c
+++ b/tools/perf/trace/beauty/statx.c
@@ -6,73 +6,20 @@
*/
#include "trace/beauty/beauty.h"
-#include <linux/kernel.h>
#include <sys/types.h>
-#include <uapi/linux/fcntl.h>
-#include <uapi/linux/stat.h>
+#include <linux/log2.h>
-size_t syscall_arg__scnprintf_statx_flags(char *bf, size_t size, struct syscall_arg *arg)
+static size_t statx__scnprintf_mask(unsigned long mask, char *bf, size_t size, bool show_prefix)
{
- bool show_prefix = arg->show_string_prefix;
- const char *prefix = "AT_";
- int printed = 0, flags = arg->val;
-
- if (flags == 0)
- return scnprintf(bf, size, "%s%s", show_prefix ? "AT_STATX_" : "", "SYNC_AS_STAT");
-#define P_FLAG(n) \
- if (flags & AT_##n) { \
- printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", show_prefix ? prefix : "", #n); \
- flags &= ~AT_##n; \
- }
-
- P_FLAG(SYMLINK_NOFOLLOW);
- P_FLAG(REMOVEDIR);
- P_FLAG(SYMLINK_FOLLOW);
- P_FLAG(NO_AUTOMOUNT);
- P_FLAG(EMPTY_PATH);
- P_FLAG(STATX_FORCE_SYNC);
- P_FLAG(STATX_DONT_SYNC);
-
-#undef P_FLAG
-
- if (flags)
- printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
-
- return printed;
+ #include "trace/beauty/generated/statx_mask_array.c"
+ static DEFINE_STRARRAY(statx_mask, "STATX_");
+ return strarray__scnprintf_flags(&strarray__statx_mask, bf, size, show_prefix, mask);
}
size_t syscall_arg__scnprintf_statx_mask(char *bf, size_t size, struct syscall_arg *arg)
{
bool show_prefix = arg->show_string_prefix;
- const char *prefix = "STATX_";
- int printed = 0, flags = arg->val;
-
-#define P_FLAG(n) \
- if (flags & STATX_##n) { \
- printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", show_prefix ? prefix : "", #n); \
- flags &= ~STATX_##n; \
- }
-
- P_FLAG(TYPE);
- P_FLAG(MODE);
- P_FLAG(NLINK);
- P_FLAG(UID);
- P_FLAG(GID);
- P_FLAG(ATIME);
- P_FLAG(MTIME);
- P_FLAG(CTIME);
- P_FLAG(INO);
- P_FLAG(SIZE);
- P_FLAG(BLOCKS);
- P_FLAG(BTIME);
- P_FLAG(MNT_ID);
- P_FLAG(DIOALIGN);
- P_FLAG(MNT_ID_UNIQUE);
-
-#undef P_FLAG
-
- if (flags)
- printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
+ int mask = arg->val;
- return printed;
+ return statx__scnprintf_mask(mask, bf, size, show_prefix);
}
diff --git a/tools/perf/trace/beauty/statx_mask.sh b/tools/perf/trace/beauty/statx_mask.sh
new file mode 100755
index 00000000000000..18c802ed0c7157
--- /dev/null
+++ b/tools/perf/trace/beauty/statx_mask.sh
@@ -0,0 +1,23 @@
+#!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
+
+if [ $# -ne 1 ] ; then
+ beauty_uapi_linux_dir=tools/perf/trace/beauty/include/uapi/linux/
+else
+ beauty_uapi_linux_dir=$1
+fi
+
+linux_stat=${beauty_uapi_linux_dir}/stat.h
+
+printf "static const char *statx_mask[] = {\n"
+regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+STATX_([^_]+[[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*'
+# STATX_BASIC_STATS its a bitmask formed by the mask in the normal stat struct
+# STATX_ALL is another bitmask and deprecated
+# STATX_ATTR_*: Attributes to be found in stx_attributes and masked in stx_attributes_mask
+grep -E $regex ${linux_stat} | \
+ grep -v STATX_ALL | \
+ grep -v STATX_BASIC_STATS | \
+ grep -v '\<STATX_ATTR_' | \
+ sed -r "s/$regex/\2 \1/g" | \
+ xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n"
+printf "};\n"
diff --git a/tools/perf/trace/beauty/sync_file_range.c b/tools/perf/trace/beauty/sync_file_range.c
index 1c425f04047dbb..3e8f50ff4fc701 100644
--- a/tools/perf/trace/beauty/sync_file_range.c
+++ b/tools/perf/trace/beauty/sync_file_range.c
@@ -7,7 +7,16 @@
#include "trace/beauty/beauty.h"
#include <linux/log2.h>
-#include <uapi/linux/fs.h>
+#include <linux/fs.h>
+
+#ifndef SYNC_FILE_RANGE_WRITE_AND_WAIT
+#define SYNC_FILE_RANGE_WAIT_BEFORE 1
+#define SYNC_FILE_RANGE_WRITE 2
+#define SYNC_FILE_RANGE_WAIT_AFTER 4
+#define SYNC_FILE_RANGE_WRITE_AND_WAIT (SYNC_FILE_RANGE_WRITE | \
+ SYNC_FILE_RANGE_WAIT_BEFORE | \
+ SYNC_FILE_RANGE_WAIT_AFTER)
+#endif
static size_t sync_file_range__scnprintf_flags(unsigned long flags, char *bf, size_t size, bool show_prefix)
{
diff --git a/tools/perf/trace/beauty/sync_file_range.sh b/tools/perf/trace/beauty/sync_file_range.sh
index 90bf633be87990..b1084c4cab636b 100755
--- a/tools/perf/trace/beauty/sync_file_range.sh
+++ b/tools/perf/trace/beauty/sync_file_range.sh
@@ -2,7 +2,7 @@
# SPDX-License-Identifier: LGPL-2.1
if [ $# -ne 1 ] ; then
- linux_header_dir=tools/include/uapi/linux
+ linux_header_dir=tools/perf/trace/beauty/include/uapi/linux/
else
linux_header_dir=$1
fi
diff --git a/tools/perf/trace/beauty/tracepoints/x86_irq_vectors.sh b/tools/perf/trace/beauty/tracepoints/x86_irq_vectors.sh
index 87dc68c7de0c29..d8e927dd2bb75c 100755
--- a/tools/perf/trace/beauty/tracepoints/x86_irq_vectors.sh
+++ b/tools/perf/trace/beauty/tracepoints/x86_irq_vectors.sh
@@ -3,12 +3,12 @@
# (C) 2019, Arnaldo Carvalho de Melo <acme@redhat.com>
if [ $# -ne 1 ] ; then
- arch_x86_header_dir=tools/arch/x86/include/asm/
+ beauty_arch_asm_dir=tools/perf/trace/beauty/arch/x86/include/asm/
else
- arch_x86_header_dir=$1
+ beauty_arch_asm_dir=$1
fi
-x86_irq_vectors=${arch_x86_header_dir}/irq_vectors.h
+x86_irq_vectors=${beauty_arch_asm_dir}/irq_vectors.h
# FIRST_EXTERNAL_VECTOR is not that useful, find what is its number
# and then replace whatever is using it and that is useful, which at
diff --git a/tools/perf/trace/beauty/usbdevfs_ioctl.sh b/tools/perf/trace/beauty/usbdevfs_ioctl.sh
index b39cfb3720b806..12a30a9a8e0c8f 100755
--- a/tools/perf/trace/beauty/usbdevfs_ioctl.sh
+++ b/tools/perf/trace/beauty/usbdevfs_ioctl.sh
@@ -1,21 +1,21 @@
#!/bin/sh
# SPDX-License-Identifier: LGPL-2.1
-[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/
+[ $# -eq 1 ] && beauty_uapi_linux_dir=$1 || beauty_uapi_linux_dir=tools/perf/trace/beauty/include/uapi/linux/
# also as:
# #define USBDEVFS_CONNINFO_EX(len) _IOC(_IOC_READ, 'U', 32, len)
printf "static const char *usbdevfs_ioctl_cmds[] = {\n"
regex="^#[[:space:]]*define[[:space:]]+USBDEVFS_(\w+)(\(\w+\))?[[:space:]]+_IO[CWR]{0,2}\([[:space:]]*(_IOC_\w+,[[:space:]]*)?'U'[[:space:]]*,[[:space:]]*([[:digit:]]+).*"
-grep -E "$regex" ${header_dir}/usbdevice_fs.h | grep -E -v 'USBDEVFS_\w+32[[:space:]]' | \
+grep -E "$regex" ${beauty_uapi_linux_dir}/usbdevice_fs.h | grep -E -v 'USBDEVFS_\w+32[[:space:]]' | \
sed -r "s/$regex/\4 \1/g" | \
sort | xargs printf "\t[%s] = \"%s\",\n"
printf "};\n\n"
printf "#if 0\n"
printf "static const char *usbdevfs_ioctl_32_cmds[] = {\n"
regex="^#[[:space:]]*define[[:space:]]+USBDEVFS_(\w+)[[:space:]]+_IO[WR]{0,2}\([[:space:]]*'U'[[:space:]]*,[[:space:]]*([[:digit:]]+).*"
-grep -E $regex ${header_dir}/usbdevice_fs.h | grep -E 'USBDEVFS_\w+32[[:space:]]' | \
+grep -E $regex ${beauty_uapi_linux_dir}/usbdevice_fs.h | grep -E 'USBDEVFS_\w+32[[:space:]]' | \
sed -r "s/$regex/\2 \1/g" | \
sort | xargs printf "\t[%s] = \"%s\",\n"
printf "};\n"
diff --git a/tools/perf/trace/beauty/vhost_virtio_ioctl.sh b/tools/perf/trace/beauty/vhost_virtio_ioctl.sh
index 2dd0a3b1f55aac..e4f395e7650a0b 100755
--- a/tools/perf/trace/beauty/vhost_virtio_ioctl.sh
+++ b/tools/perf/trace/beauty/vhost_virtio_ioctl.sh
@@ -1,18 +1,18 @@
#!/bin/sh
# SPDX-License-Identifier: LGPL-2.1
-[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/
+[ $# -eq 1 ] && beauty_uapi_linux_dir=$1 || beauty_uapi_linux_dir=tools/perf/trace/beauty/include/uapi/linux
printf "static const char *vhost_virtio_ioctl_cmds[] = {\n"
regex='^#[[:space:]]*define[[:space:]]+VHOST_(\w+)[[:space:]]+_IOW?\([[:space:]]*VHOST_VIRTIO[[:space:]]*,[[:space:]]*(0x[[:xdigit:]]+).*'
-grep -E $regex ${header_dir}/vhost.h | \
+grep -E $regex ${beauty_uapi_linux_dir}/vhost.h | \
sed -r "s/$regex/\2 \1/g" | \
sort | xargs printf "\t[%s] = \"%s\",\n"
printf "};\n"
printf "static const char *vhost_virtio_ioctl_read_cmds[] = {\n"
regex='^#[[:space:]]*define[[:space:]]+VHOST_(\w+)[[:space:]]+_IOW?R\([[:space:]]*VHOST_VIRTIO[[:space:]]*,[[:space:]]*(0x[[:xdigit:]]+).*'
-grep -E $regex ${header_dir}/vhost.h | \
+grep -E $regex ${beauty_uapi_linux_dir}/vhost.h | \
sed -r "s/$regex/\2 \1/g" | \
sort | xargs printf "\t[%s] = \"%s\",\n"
printf "};\n"
diff --git a/tools/perf/trace/beauty/x86_arch_prctl.sh b/tools/perf/trace/beauty/x86_arch_prctl.sh
index b1596df251f0aa..b714ffa3cb7a4e 100755
--- a/tools/perf/trace/beauty/x86_arch_prctl.sh
+++ b/tools/perf/trace/beauty/x86_arch_prctl.sh
@@ -2,9 +2,9 @@
# Copyright (C) 2018, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
# SPDX-License-Identifier: LGPL-2.1
-[ $# -eq 1 ] && x86_header_dir=$1 || x86_header_dir=tools/arch/x86/include/uapi/asm/
+[ $# -eq 1 ] && beauty_x86_arch_asm_uapi_dir=$1 || beauty_x86_arch_asm_uapi_dir=tools/perf/trace/beauty/arch/x86/include/uapi/asm/
-prctl_arch_header=${x86_header_dir}/prctl.h
+prctl_arch_header=${beauty_x86_arch_asm_uapi_dir}/prctl.h
print_range () {
idx=$1
diff --git a/tools/perf/ui/browsers/Build b/tools/perf/ui/browsers/Build
index 7a1d5ddaf6888c..2608b5da316768 100644
--- a/tools/perf/ui/browsers/Build
+++ b/tools/perf/ui/browsers/Build
@@ -1,4 +1,5 @@
perf-y += annotate.o
+perf-y += annotate-data.o
perf-y += hists.o
perf-y += map.o
perf-y += scripts.o
diff --git a/tools/perf/ui/browsers/annotate-data.c b/tools/perf/ui/browsers/annotate-data.c
new file mode 100644
index 00000000000000..a4a0f042f201a3
--- /dev/null
+++ b/tools/perf/ui/browsers/annotate-data.c
@@ -0,0 +1,312 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <inttypes.h>
+#include <string.h>
+#include <sys/ttydefaults.h>
+
+#include "ui/browser.h"
+#include "ui/helpline.h"
+#include "ui/keysyms.h"
+#include "ui/ui.h"
+#include "util/annotate.h"
+#include "util/annotate-data.h"
+#include "util/evsel.h"
+#include "util/evlist.h"
+#include "util/sort.h"
+
+struct annotated_data_browser {
+ struct ui_browser b;
+ struct list_head entries;
+ int nr_events;
+};
+
+struct browser_entry {
+ struct list_head node;
+ struct annotated_member *data;
+ struct type_hist_entry *hists;
+ int indent;
+};
+
+static struct annotated_data_browser *get_browser(struct ui_browser *uib)
+{
+ return container_of(uib, struct annotated_data_browser, b);
+}
+
+static void update_hist_entry(struct type_hist_entry *dst,
+ struct type_hist_entry *src)
+{
+ dst->nr_samples += src->nr_samples;
+ dst->period += src->period;
+}
+
+static int get_member_overhead(struct annotated_data_type *adt,
+ struct browser_entry *entry,
+ struct evsel *leader)
+{
+ struct annotated_member *member = entry->data;
+ int i, k;
+
+ for (i = 0; i < member->size; i++) {
+ struct type_hist *h;
+ struct evsel *evsel;
+ int offset = member->offset + i;
+
+ for_each_group_evsel(evsel, leader) {
+ h = adt->histograms[evsel->core.idx];
+ k = evsel__group_idx(evsel);
+ update_hist_entry(&entry->hists[k], &h->addr[offset]);
+ }
+ }
+ return 0;
+}
+
+static int add_child_entries(struct annotated_data_browser *browser,
+ struct annotated_data_type *adt,
+ struct annotated_member *member,
+ struct evsel *evsel, int indent)
+{
+ struct annotated_member *pos;
+ struct browser_entry *entry;
+ int nr_entries = 0;
+
+ entry = zalloc(sizeof(*entry));
+ if (entry == NULL)
+ return -1;
+
+ entry->hists = calloc(browser->nr_events, sizeof(*entry->hists));
+ if (entry->hists == NULL) {
+ free(entry);
+ return -1;
+ }
+
+ entry->data = member;
+ entry->indent = indent;
+ if (get_member_overhead(adt, entry, evsel) < 0) {
+ free(entry);
+ return -1;
+ }
+
+ list_add_tail(&entry->node, &browser->entries);
+ nr_entries++;
+
+ list_for_each_entry(pos, &member->children, node) {
+ int nr = add_child_entries(browser, adt, pos, evsel, indent + 1);
+
+ if (nr < 0)
+ return nr;
+
+ nr_entries += nr;
+ }
+
+ /* add an entry for the closing bracket ("}") */
+ if (!list_empty(&member->children)) {
+ entry = zalloc(sizeof(*entry));
+ if (entry == NULL)
+ return -1;
+
+ entry->indent = indent;
+ list_add_tail(&entry->node, &browser->entries);
+ nr_entries++;
+ }
+
+ return nr_entries;
+}
+
+static int annotated_data_browser__collect_entries(struct annotated_data_browser *browser)
+{
+ struct hist_entry *he = browser->b.priv;
+ struct annotated_data_type *adt = he->mem_type;
+ struct evsel *evsel = hists_to_evsel(he->hists);
+
+ INIT_LIST_HEAD(&browser->entries);
+ browser->b.entries = &browser->entries;
+ browser->b.nr_entries = add_child_entries(browser, adt, &adt->self,
+ evsel, /*indent=*/0);
+ return 0;
+}
+
+static void annotated_data_browser__delete_entries(struct annotated_data_browser *browser)
+{
+ struct browser_entry *pos, *tmp;
+
+ list_for_each_entry_safe(pos, tmp, &browser->entries, node) {
+ list_del_init(&pos->node);
+ free(pos->hists);
+ free(pos);
+ }
+}
+
+static unsigned int browser__refresh(struct ui_browser *uib)
+{
+ return ui_browser__list_head_refresh(uib);
+}
+
+static int browser__show(struct ui_browser *uib)
+{
+ struct hist_entry *he = uib->priv;
+ struct annotated_data_type *adt = he->mem_type;
+ struct annotated_data_browser *browser = get_browser(uib);
+ const char *help = "Press 'h' for help on key bindings";
+ char title[256];
+
+ snprintf(title, sizeof(title), "Annotate type: '%s' (%d samples)",
+ adt->self.type_name, he->stat.nr_events);
+
+ if (ui_browser__show(uib, title, help) < 0)
+ return -1;
+
+ /* second line header */
+ ui_browser__gotorc_title(uib, 0, 0);
+ ui_browser__set_color(uib, HE_COLORSET_ROOT);
+
+ if (symbol_conf.show_total_period)
+ strcpy(title, "Period");
+ else if (symbol_conf.show_nr_samples)
+ strcpy(title, "Samples");
+ else
+ strcpy(title, "Percent");
+
+ ui_browser__printf(uib, "%*s %10s %10s %10s %s",
+ 11 * (browser->nr_events - 1), "",
+ title, "Offset", "Size", "Field");
+ ui_browser__write_nstring(uib, "", uib->width);
+ return 0;
+}
+
+static void browser__write_overhead(struct ui_browser *uib,
+ struct type_hist *total,
+ struct type_hist_entry *hist, int row)
+{
+ u64 period = hist->period;
+ double percent = total->period ? (100.0 * period / total->period) : 0;
+ bool current = ui_browser__is_current_entry(uib, row);
+ int nr_samples = 0;
+
+ ui_browser__set_percent_color(uib, percent, current);
+
+ if (symbol_conf.show_total_period)
+ ui_browser__printf(uib, " %10" PRIu64, period);
+ else if (symbol_conf.show_nr_samples)
+ ui_browser__printf(uib, " %10d", nr_samples);
+ else
+ ui_browser__printf(uib, " %10.2f", percent);
+
+ ui_browser__set_percent_color(uib, 0, current);
+}
+
+static void browser__write(struct ui_browser *uib, void *entry, int row)
+{
+ struct annotated_data_browser *browser = get_browser(uib);
+ struct browser_entry *be = entry;
+ struct annotated_member *member = be->data;
+ struct hist_entry *he = uib->priv;
+ struct annotated_data_type *adt = he->mem_type;
+ struct evsel *leader = hists_to_evsel(he->hists);
+ struct evsel *evsel;
+
+ if (member == NULL) {
+ bool current = ui_browser__is_current_entry(uib, row);
+
+ /* print the closing bracket */
+ ui_browser__set_percent_color(uib, 0, current);
+ ui_browser__write_nstring(uib, "", 11 * browser->nr_events);
+ ui_browser__printf(uib, " %10s %10s %*s};",
+ "", "", be->indent * 4, "");
+ ui_browser__write_nstring(uib, "", uib->width);
+ return;
+ }
+
+ /* print the number */
+ for_each_group_evsel(evsel, leader) {
+ struct type_hist *h = adt->histograms[evsel->core.idx];
+ int idx = evsel__group_idx(evsel);
+
+ browser__write_overhead(uib, h, &be->hists[idx], row);
+ }
+
+ /* print type info */
+ if (be->indent == 0 && !member->var_name) {
+ ui_browser__printf(uib, " %10d %10d %s%s",
+ member->offset, member->size,
+ member->type_name,
+ list_empty(&member->children) ? ";" : " {");
+ } else {
+ ui_browser__printf(uib, " %10d %10d %*s%s\t%s%s",
+ member->offset, member->size,
+ be->indent * 4, "", member->type_name,
+ member->var_name ?: "",
+ list_empty(&member->children) ? ";" : " {");
+ }
+ /* fill the rest */
+ ui_browser__write_nstring(uib, "", uib->width);
+}
+
+static int annotated_data_browser__run(struct annotated_data_browser *browser,
+ struct evsel *evsel __maybe_unused,
+ struct hist_browser_timer *hbt)
+{
+ int delay_secs = hbt ? hbt->refresh : 0;
+ int key;
+
+ if (browser__show(&browser->b) < 0)
+ return -1;
+
+ while (1) {
+ key = ui_browser__run(&browser->b, delay_secs);
+
+ switch (key) {
+ case K_TIMER:
+ if (hbt)
+ hbt->timer(hbt->arg);
+ continue;
+ case K_F1:
+ case 'h':
+ ui_browser__help_window(&browser->b,
+ "UP/DOWN/PGUP\n"
+ "PGDN/SPACE Navigate\n"
+ "</> Move to prev/next symbol\n"
+ "q/ESC/CTRL+C Exit\n\n");
+ continue;
+ case K_LEFT:
+ case '<':
+ case '>':
+ case K_ESC:
+ case 'q':
+ case CTRL('c'):
+ goto out;
+ default:
+ continue;
+ }
+ }
+out:
+ ui_browser__hide(&browser->b);
+ return key;
+}
+
+int hist_entry__annotate_data_tui(struct hist_entry *he, struct evsel *evsel,
+ struct hist_browser_timer *hbt)
+{
+ struct annotated_data_browser browser = {
+ .b = {
+ .refresh = browser__refresh,
+ .seek = ui_browser__list_head_seek,
+ .write = browser__write,
+ .priv = he,
+ .extra_title_lines = 1,
+ },
+ .nr_events = 1,
+ };
+ int ret;
+
+ ui_helpline__push("Press ESC to exit");
+
+ if (evsel__is_group_event(evsel))
+ browser.nr_events = evsel->core.nr_members;
+
+ ret = annotated_data_browser__collect_entries(&browser);
+ if (ret == 0)
+ ret = annotated_data_browser__run(&browser, evsel, hbt);
+
+ annotated_data_browser__delete_entries(&browser);
+
+ return ret;
+}
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index ec5e2193287603..0dd429cf612c04 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -49,7 +49,7 @@ static int ui_browser__jumps_percent_color(struct ui_browser *browser, int nr, b
if (current && (!browser->use_navkeypressed || browser->navkeypressed))
return HE_COLORSET_SELECTED;
- if (nr == notes->max_jump_sources)
+ if (nr == notes->src->max_jump_sources)
return HE_COLORSET_TOP;
if (nr > 1)
return HE_COLORSET_MEDIUM;
@@ -186,7 +186,7 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser)
* name right after the '<' token and probably treating this like a
* 'call' instruction.
*/
- target = notes->src->offsets[cursor->ops.target.offset];
+ target = annotated_source__get_line(notes->src, cursor->ops.target.offset);
if (target == NULL) {
ui_helpline__printf("WARN: jump target inconsistency, press 'o', notes->offsets[%#x] = NULL\n",
cursor->ops.target.offset);
@@ -205,13 +205,13 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser)
ui_browser__set_color(browser, HE_COLORSET_JUMP_ARROWS);
__ui_browser__line_arrow(browser,
- pcnt_width + 2 + notes->widths.addr + width,
+ pcnt_width + 2 + notes->src->widths.addr + width,
from, to);
diff = is_fused(ab, cursor);
if (diff > 0) {
ui_browser__mark_fused(browser,
- pcnt_width + 3 + notes->widths.addr + width,
+ pcnt_width + 3 + notes->src->widths.addr + width,
from - diff, diff, to > from);
}
}
@@ -970,20 +970,20 @@ int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel,
if (dso->annotate_warned)
return -1;
- if (not_annotated) {
+ if (not_annotated || !sym->annotate2) {
err = symbol__annotate2(ms, evsel, &browser.arch);
if (err) {
char msg[BUFSIZ];
dso->annotate_warned = true;
symbol__strerror_disassemble(ms, err, msg, sizeof(msg));
ui__error("Couldn't annotate %s:\n%s", sym->name, msg);
- goto out_free_offsets;
+ return -1;
}
}
ui_helpline__push("Press ESC to exit");
- browser.b.width = notes->src->max_line_len;
+ browser.b.width = notes->src->widths.max_line_len;
browser.b.nr_entries = notes->src->nr_entries;
browser.b.entries = &notes->src->source,
browser.b.width += 18; /* Percentage */
@@ -996,8 +996,5 @@ int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel,
if(not_annotated)
annotated_source__purge(notes->src);
-out_free_offsets:
- if(not_annotated)
- zfree(&notes->src->offsets);
return ret;
}
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 0c02b3a8e121ff..71b32591d61a65 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -38,6 +38,7 @@
#include "../ui.h"
#include "map.h"
#include "annotate.h"
+#include "annotate-data.h"
#include "srcline.h"
#include "string2.h"
#include "units.h"
@@ -2506,6 +2507,32 @@ add_annotate_opt(struct hist_browser *browser __maybe_unused,
}
static int
+do_annotate_type(struct hist_browser *browser, struct popup_action *act)
+{
+ struct hist_entry *he = browser->he_selection;
+
+ hist_entry__annotate_data_tui(he, act->evsel, browser->hbt);
+ ui_browser__handle_resize(&browser->b);
+ return 0;
+}
+
+static int
+add_annotate_type_opt(struct hist_browser *browser,
+ struct popup_action *act, char **optstr,
+ struct hist_entry *he)
+{
+ if (he == NULL || he->mem_type == NULL || he->mem_type->histograms == NULL)
+ return 0;
+
+ if (asprintf(optstr, "Annotate type %s", he->mem_type->self.type_name) < 0)
+ return 0;
+
+ act->evsel = hists_to_evsel(browser->hists);
+ act->fn = do_annotate_type;
+ return 1;
+}
+
+static int
do_zoom_thread(struct hist_browser *browser, struct popup_action *act)
{
struct thread *thread = act->thread;
@@ -3307,6 +3334,10 @@ do_hotkey: // key came straight from options ui__popup_menu()
browser->he_selection->ip);
}
skip_annotation:
+ nr_options += add_annotate_type_opt(browser,
+ &actions[nr_options],
+ &options[nr_options],
+ browser->he_selection);
nr_options += add_thread_opt(browser, &actions[nr_options],
&options[nr_options], thread);
nr_options += add_dso_opt(browser, &actions[nr_options],
diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c
index 2bf959d0835430..685ba2a54fd8e2 100644
--- a/tools/perf/ui/hist.c
+++ b/tools/perf/ui/hist.c
@@ -25,7 +25,7 @@
static int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he,
hpp_field_fn get_field, const char *fmt, int len,
- hpp_snprint_fn print_fn, bool fmt_percent)
+ hpp_snprint_fn print_fn, enum perf_hpp_fmt_type fmtype)
{
int ret;
struct hists *hists = he->hists;
@@ -33,7 +33,7 @@ static int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he,
char *buf = hpp->buf;
size_t size = hpp->size;
- if (fmt_percent) {
+ if (fmtype == PERF_HPP_FMT_TYPE__PERCENT) {
double percent = 0.0;
u64 total = hists__total_period(hists);
@@ -41,8 +41,16 @@ static int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he,
percent = 100.0 * get_field(he) / total;
ret = hpp__call_print_fn(hpp, print_fn, fmt, len, percent);
- } else
+ } else if (fmtype == PERF_HPP_FMT_TYPE__AVERAGE) {
+ double average = 0;
+
+ if (he->stat.nr_events)
+ average = 1.0 * get_field(he) / he->stat.nr_events;
+
+ ret = hpp__call_print_fn(hpp, print_fn, fmt, len, average);
+ } else {
ret = hpp__call_print_fn(hpp, print_fn, fmt, len, get_field(he));
+ }
if (evsel__is_group_event(evsel)) {
int prev_idx, idx_delta;
@@ -54,6 +62,7 @@ static int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he,
list_for_each_entry(pair, &he->pairs.head, pairs.node) {
u64 period = get_field(pair);
u64 total = hists__total_period(pair->hists);
+ int nr_samples = pair->stat.nr_events;
if (!total)
continue;
@@ -66,7 +75,7 @@ static int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he,
* zero-fill group members in the middle which
* have no sample
*/
- if (fmt_percent) {
+ if (fmtype != PERF_HPP_FMT_TYPE__RAW) {
ret += hpp__call_print_fn(hpp, print_fn,
fmt, len, 0.0);
} else {
@@ -75,9 +84,14 @@ static int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he,
}
}
- if (fmt_percent) {
+ if (fmtype == PERF_HPP_FMT_TYPE__PERCENT) {
ret += hpp__call_print_fn(hpp, print_fn, fmt, len,
100.0 * period / total);
+ } else if (fmtype == PERF_HPP_FMT_TYPE__AVERAGE) {
+ double avg = nr_samples ? (period / nr_samples) : 0;
+
+ ret += hpp__call_print_fn(hpp, print_fn, fmt,
+ len, avg);
} else {
ret += hpp__call_print_fn(hpp, print_fn, fmt,
len, period);
@@ -92,7 +106,7 @@ static int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he,
/*
* zero-fill group members at last which have no sample
*/
- if (fmt_percent) {
+ if (fmtype != PERF_HPP_FMT_TYPE__RAW) {
ret += hpp__call_print_fn(hpp, print_fn,
fmt, len, 0.0);
} else {
@@ -114,33 +128,35 @@ static int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he,
int hpp__fmt(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
struct hist_entry *he, hpp_field_fn get_field,
- const char *fmtstr, hpp_snprint_fn print_fn, bool fmt_percent)
+ const char *fmtstr, hpp_snprint_fn print_fn,
+ enum perf_hpp_fmt_type fmtype)
{
int len = fmt->user_len ?: fmt->len;
if (symbol_conf.field_sep) {
return __hpp__fmt(hpp, he, get_field, fmtstr, 1,
- print_fn, fmt_percent);
+ print_fn, fmtype);
}
- if (fmt_percent)
+ if (fmtype == PERF_HPP_FMT_TYPE__PERCENT)
len -= 2; /* 2 for a space and a % sign */
else
len -= 1;
- return __hpp__fmt(hpp, he, get_field, fmtstr, len, print_fn, fmt_percent);
+ return __hpp__fmt(hpp, he, get_field, fmtstr, len, print_fn, fmtype);
}
int hpp__fmt_acc(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
struct hist_entry *he, hpp_field_fn get_field,
- const char *fmtstr, hpp_snprint_fn print_fn, bool fmt_percent)
+ const char *fmtstr, hpp_snprint_fn print_fn,
+ enum perf_hpp_fmt_type fmtype)
{
if (!symbol_conf.cumulate_callchain) {
int len = fmt->user_len ?: fmt->len;
return snprintf(hpp->buf, hpp->size, " %*s", len - 1, "N/A");
}
- return hpp__fmt(fmt, hpp, he, get_field, fmtstr, print_fn, fmt_percent);
+ return hpp__fmt(fmt, hpp, he, get_field, fmtstr, print_fn, fmtype);
}
static int field_cmp(u64 field_a, u64 field_b)
@@ -350,7 +366,7 @@ static int hpp__color_##_type(struct perf_hpp_fmt *fmt, \
struct perf_hpp *hpp, struct hist_entry *he) \
{ \
return hpp__fmt(fmt, hpp, he, he_get_##_field, " %*.2f%%", \
- hpp_color_scnprintf, true); \
+ hpp_color_scnprintf, PERF_HPP_FMT_TYPE__PERCENT); \
}
#define __HPP_ENTRY_PERCENT_FN(_type, _field) \
@@ -358,7 +374,7 @@ static int hpp__entry_##_type(struct perf_hpp_fmt *fmt, \
struct perf_hpp *hpp, struct hist_entry *he) \
{ \
return hpp__fmt(fmt, hpp, he, he_get_##_field, " %*.2f%%", \
- hpp_entry_scnprintf, true); \
+ hpp_entry_scnprintf, PERF_HPP_FMT_TYPE__PERCENT); \
}
#define __HPP_SORT_FN(_type, _field) \
@@ -378,7 +394,7 @@ static int hpp__color_##_type(struct perf_hpp_fmt *fmt, \
struct perf_hpp *hpp, struct hist_entry *he) \
{ \
return hpp__fmt_acc(fmt, hpp, he, he_get_acc_##_field, " %*.2f%%", \
- hpp_color_scnprintf, true); \
+ hpp_color_scnprintf, PERF_HPP_FMT_TYPE__PERCENT); \
}
#define __HPP_ENTRY_ACC_PERCENT_FN(_type, _field) \
@@ -386,7 +402,7 @@ static int hpp__entry_##_type(struct perf_hpp_fmt *fmt, \
struct perf_hpp *hpp, struct hist_entry *he) \
{ \
return hpp__fmt_acc(fmt, hpp, he, he_get_acc_##_field, " %*.2f%%", \
- hpp_entry_scnprintf, true); \
+ hpp_entry_scnprintf, PERF_HPP_FMT_TYPE__PERCENT); \
}
#define __HPP_SORT_ACC_FN(_type, _field) \
@@ -406,7 +422,7 @@ static int hpp__entry_##_type(struct perf_hpp_fmt *fmt, \
struct perf_hpp *hpp, struct hist_entry *he) \
{ \
return hpp__fmt(fmt, hpp, he, he_get_raw_##_field, " %*"PRIu64, \
- hpp_entry_scnprintf, false); \
+ hpp_entry_scnprintf, PERF_HPP_FMT_TYPE__RAW); \
}
#define __HPP_SORT_RAW_FN(_type, _field) \
@@ -416,6 +432,26 @@ static int64_t hpp__sort_##_type(struct perf_hpp_fmt *fmt __maybe_unused, \
return __hpp__sort(a, b, he_get_raw_##_field); \
}
+#define __HPP_ENTRY_AVERAGE_FN(_type, _field) \
+static u64 he_get_##_field(struct hist_entry *he) \
+{ \
+ return he->stat._field; \
+} \
+ \
+static int hpp__entry_##_type(struct perf_hpp_fmt *fmt, \
+ struct perf_hpp *hpp, struct hist_entry *he) \
+{ \
+ return hpp__fmt(fmt, hpp, he, he_get_##_field, " %*.1f", \
+ hpp_entry_scnprintf, PERF_HPP_FMT_TYPE__AVERAGE); \
+}
+
+#define __HPP_SORT_AVERAGE_FN(_type, _field) \
+static int64_t hpp__sort_##_type(struct perf_hpp_fmt *fmt __maybe_unused, \
+ struct hist_entry *a, struct hist_entry *b) \
+{ \
+ return __hpp__sort(a, b, he_get_##_field); \
+}
+
#define HPP_PERCENT_FNS(_type, _field) \
__HPP_COLOR_PERCENT_FN(_type, _field) \
@@ -431,6 +467,10 @@ __HPP_SORT_ACC_FN(_type, _field)
__HPP_ENTRY_RAW_FN(_type, _field) \
__HPP_SORT_RAW_FN(_type, _field)
+#define HPP_AVERAGE_FNS(_type, _field) \
+__HPP_ENTRY_AVERAGE_FN(_type, _field) \
+__HPP_SORT_AVERAGE_FN(_type, _field)
+
HPP_PERCENT_FNS(overhead, period)
HPP_PERCENT_FNS(overhead_sys, period_sys)
HPP_PERCENT_FNS(overhead_us, period_us)
@@ -441,6 +481,10 @@ HPP_PERCENT_ACC_FNS(overhead_acc, period)
HPP_RAW_FNS(samples, nr_events)
HPP_RAW_FNS(period, period)
+HPP_AVERAGE_FNS(weight1, weight1)
+HPP_AVERAGE_FNS(weight2, weight2)
+HPP_AVERAGE_FNS(weight3, weight3)
+
static int64_t hpp__nop_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
struct hist_entry *a __maybe_unused,
struct hist_entry *b __maybe_unused)
@@ -510,7 +554,10 @@ struct perf_hpp_fmt perf_hpp__format[] = {
HPP__COLOR_PRINT_FNS("guest usr", overhead_guest_us, OVERHEAD_GUEST_US),
HPP__COLOR_ACC_PRINT_FNS("Children", overhead_acc, OVERHEAD_ACC),
HPP__PRINT_FNS("Samples", samples, SAMPLES),
- HPP__PRINT_FNS("Period", period, PERIOD)
+ HPP__PRINT_FNS("Period", period, PERIOD),
+ HPP__PRINT_FNS("Weight1", weight1, WEIGHT1),
+ HPP__PRINT_FNS("Weight2", weight2, WEIGHT2),
+ HPP__PRINT_FNS("Weight3", weight3, WEIGHT3),
};
struct perf_hpp_list perf_hpp_list = {
@@ -526,6 +573,7 @@ struct perf_hpp_list perf_hpp_list = {
#undef HPP_PERCENT_FNS
#undef HPP_PERCENT_ACC_FNS
#undef HPP_RAW_FNS
+#undef HPP_AVERAGE_FNS
#undef __HPP_HEADER_FN
#undef __HPP_WIDTH_FN
@@ -534,9 +582,11 @@ struct perf_hpp_list perf_hpp_list = {
#undef __HPP_COLOR_ACC_PERCENT_FN
#undef __HPP_ENTRY_ACC_PERCENT_FN
#undef __HPP_ENTRY_RAW_FN
+#undef __HPP_ENTRY_AVERAGE_FN
#undef __HPP_SORT_FN
#undef __HPP_SORT_ACC_FN
#undef __HPP_SORT_RAW_FN
+#undef __HPP_SORT_AVERAGE_FN
static void fmt_free(struct perf_hpp_fmt *fmt)
{
@@ -785,6 +835,12 @@ void perf_hpp__reset_width(struct perf_hpp_fmt *fmt, struct hists *hists)
fmt->len = 12;
break;
+ case PERF_HPP__WEIGHT1:
+ case PERF_HPP__WEIGHT2:
+ case PERF_HPP__WEIGHT3:
+ fmt->len = 8;
+ break;
+
default:
break;
}
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index e0a723e2450386..292170a99ab6ed 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -12,6 +12,7 @@ perf-y += config.o
perf-y += copyfile.o
perf-y += ctype.o
perf-y += db-export.o
+perf-y += disasm.o
perf-y += env.o
perf-y += event.o
perf-y += evlist.o
@@ -388,3 +389,17 @@ $(OUTPUT)util/vsprintf.o: ../lib/vsprintf.c FORCE
$(OUTPUT)util/list_sort.o: ../lib/list_sort.c FORCE
$(call rule_mkdir)
$(call if_changed_dep,cc_o_c)
+
+ifdef SHELLCHECK
+ SHELL_TESTS := generate-cmdlist.sh
+ TEST_LOGS := $(SHELL_TESTS:%=%.shellcheck_log)
+else
+ SHELL_TESTS :=
+ TEST_LOGS :=
+endif
+
+$(OUTPUT)%.shellcheck_log: %
+ $(call rule_mkdir)
+ $(Q)$(call echo-cmd,test)shellcheck -a -S warning "$<" > $@ || (cat $@ && rm $@ && false)
+
+perf-y += $(TEST_LOGS)
diff --git a/tools/perf/util/annotate-data.c b/tools/perf/util/annotate-data.c
index 30c4d19fcf112f..12d5faff3b7af5 100644
--- a/tools/perf/util/annotate-data.c
+++ b/tools/perf/util/annotate-data.c
@@ -19,9 +19,200 @@
#include "evlist.h"
#include "map.h"
#include "map_symbol.h"
+#include "sort.h"
#include "strbuf.h"
#include "symbol.h"
#include "symbol_conf.h"
+#include "thread.h"
+
+/* register number of the stack pointer */
+#define X86_REG_SP 7
+
+enum type_state_kind {
+ TSR_KIND_INVALID = 0,
+ TSR_KIND_TYPE,
+ TSR_KIND_PERCPU_BASE,
+ TSR_KIND_CONST,
+ TSR_KIND_POINTER,
+ TSR_KIND_CANARY,
+};
+
+#define pr_debug_dtp(fmt, ...) \
+do { \
+ if (debug_type_profile) \
+ pr_info(fmt, ##__VA_ARGS__); \
+ else \
+ pr_debug3(fmt, ##__VA_ARGS__); \
+} while (0)
+
+static void pr_debug_type_name(Dwarf_Die *die, enum type_state_kind kind)
+{
+ struct strbuf sb;
+ char *str;
+ Dwarf_Word size = 0;
+
+ if (!debug_type_profile && verbose < 3)
+ return;
+
+ switch (kind) {
+ case TSR_KIND_INVALID:
+ pr_info("\n");
+ return;
+ case TSR_KIND_PERCPU_BASE:
+ pr_info(" percpu base\n");
+ return;
+ case TSR_KIND_CONST:
+ pr_info(" constant\n");
+ return;
+ case TSR_KIND_POINTER:
+ pr_info(" pointer");
+ /* it also prints the type info */
+ break;
+ case TSR_KIND_CANARY:
+ pr_info(" stack canary\n");
+ return;
+ case TSR_KIND_TYPE:
+ default:
+ break;
+ }
+
+ dwarf_aggregate_size(die, &size);
+
+ strbuf_init(&sb, 32);
+ die_get_typename_from_type(die, &sb);
+ str = strbuf_detach(&sb, NULL);
+ pr_info(" type='%s' size=%#lx (die:%#lx)\n",
+ str, (long)size, (long)dwarf_dieoffset(die));
+ free(str);
+}
+
+static void pr_debug_location(Dwarf_Die *die, u64 pc, int reg)
+{
+ ptrdiff_t off = 0;
+ Dwarf_Attribute attr;
+ Dwarf_Addr base, start, end;
+ Dwarf_Op *ops;
+ size_t nops;
+
+ if (!debug_type_profile && verbose < 3)
+ return;
+
+ if (dwarf_attr(die, DW_AT_location, &attr) == NULL)
+ return;
+
+ while ((off = dwarf_getlocations(&attr, off, &base, &start, &end, &ops, &nops)) > 0) {
+ if (reg != DWARF_REG_PC && end < pc)
+ continue;
+ if (reg != DWARF_REG_PC && start > pc)
+ break;
+
+ pr_info(" variable location: ");
+ switch (ops->atom) {
+ case DW_OP_reg0 ...DW_OP_reg31:
+ pr_info("reg%d\n", ops->atom - DW_OP_reg0);
+ break;
+ case DW_OP_breg0 ...DW_OP_breg31:
+ pr_info("base=reg%d, offset=%#lx\n",
+ ops->atom - DW_OP_breg0, (long)ops->number);
+ break;
+ case DW_OP_regx:
+ pr_info("reg%ld\n", (long)ops->number);
+ break;
+ case DW_OP_bregx:
+ pr_info("base=reg%ld, offset=%#lx\n",
+ (long)ops->number, (long)ops->number2);
+ break;
+ case DW_OP_fbreg:
+ pr_info("use frame base, offset=%#lx\n", (long)ops->number);
+ break;
+ case DW_OP_addr:
+ pr_info("address=%#lx\n", (long)ops->number);
+ break;
+ default:
+ pr_info("unknown: code=%#x, number=%#lx\n",
+ ops->atom, (long)ops->number);
+ break;
+ }
+ break;
+ }
+}
+
+/*
+ * Type information in a register, valid when @ok is true.
+ * The @caller_saved registers are invalidated after a function call.
+ */
+struct type_state_reg {
+ Dwarf_Die type;
+ u32 imm_value;
+ bool ok;
+ bool caller_saved;
+ u8 kind;
+};
+
+/* Type information in a stack location, dynamically allocated */
+struct type_state_stack {
+ struct list_head list;
+ Dwarf_Die type;
+ int offset;
+ int size;
+ bool compound;
+ u8 kind;
+};
+
+/* FIXME: This should be arch-dependent */
+#define TYPE_STATE_MAX_REGS 16
+
+/*
+ * State table to maintain type info in each register and stack location.
+ * It'll be updated when new variable is allocated or type info is moved
+ * to a new location (register or stack). As it'd be used with the
+ * shortest path of basic blocks, it only maintains a single table.
+ */
+struct type_state {
+ /* state of general purpose registers */
+ struct type_state_reg regs[TYPE_STATE_MAX_REGS];
+ /* state of stack location */
+ struct list_head stack_vars;
+ /* return value register */
+ int ret_reg;
+ /* stack pointer register */
+ int stack_reg;
+};
+
+static bool has_reg_type(struct type_state *state, int reg)
+{
+ return (unsigned)reg < ARRAY_SIZE(state->regs);
+}
+
+static void init_type_state(struct type_state *state, struct arch *arch)
+{
+ memset(state, 0, sizeof(*state));
+ INIT_LIST_HEAD(&state->stack_vars);
+
+ if (arch__is(arch, "x86")) {
+ state->regs[0].caller_saved = true;
+ state->regs[1].caller_saved = true;
+ state->regs[2].caller_saved = true;
+ state->regs[4].caller_saved = true;
+ state->regs[5].caller_saved = true;
+ state->regs[8].caller_saved = true;
+ state->regs[9].caller_saved = true;
+ state->regs[10].caller_saved = true;
+ state->regs[11].caller_saved = true;
+ state->ret_reg = 0;
+ state->stack_reg = X86_REG_SP;
+ }
+}
+
+static void exit_type_state(struct type_state *state)
+{
+ struct type_state_stack *stack, *tmp;
+
+ list_for_each_entry_safe(stack, tmp, &state->stack_vars, list) {
+ list_del(&stack->list);
+ free(stack);
+ }
+}
/*
* Compare type name and size to maintain them in a tree.
@@ -194,14 +385,22 @@ static bool find_cu_die(struct debuginfo *di, u64 pc, Dwarf_Die *cu_die)
}
/* The type info will be saved in @type_die */
-static int check_variable(Dwarf_Die *var_die, Dwarf_Die *type_die, int offset,
- bool is_pointer)
+static int check_variable(struct data_loc_info *dloc, Dwarf_Die *var_die,
+ Dwarf_Die *type_die, int reg, int offset, bool is_fbreg)
{
Dwarf_Word size;
+ bool is_pointer = true;
+
+ if (reg == DWARF_REG_PC)
+ is_pointer = false;
+ else if (reg == dloc->fbreg || is_fbreg)
+ is_pointer = false;
+ else if (arch__is(dloc->arch, "x86") && reg == X86_REG_SP)
+ is_pointer = false;
/* Get the type of the variable */
if (die_get_real_type(var_die, type_die) == NULL) {
- pr_debug("variable has no type\n");
+ pr_debug_dtp("variable has no type\n");
ann_data_stat.no_typeinfo++;
return -1;
}
@@ -215,7 +414,7 @@ static int check_variable(Dwarf_Die *var_die, Dwarf_Die *type_die, int offset,
if ((dwarf_tag(type_die) != DW_TAG_pointer_type &&
dwarf_tag(type_die) != DW_TAG_array_type) ||
die_get_real_type(type_die, type_die) == NULL) {
- pr_debug("no pointer or no type\n");
+ pr_debug_dtp("no pointer or no type\n");
ann_data_stat.no_typeinfo++;
return -1;
}
@@ -223,14 +422,15 @@ static int check_variable(Dwarf_Die *var_die, Dwarf_Die *type_die, int offset,
/* Get the size of the actual type */
if (dwarf_aggregate_size(type_die, &size) < 0) {
- pr_debug("type size is unknown\n");
+ pr_debug_dtp("type size is unknown\n");
ann_data_stat.invalid_size++;
return -1;
}
/* Minimal sanity check */
if ((unsigned)offset >= size) {
- pr_debug("offset: %d is bigger than size: %" PRIu64 "\n", offset, size);
+ pr_debug_dtp("offset: %d is bigger than size: %"PRIu64"\n",
+ offset, size);
ann_data_stat.bad_offset++;
return -1;
}
@@ -238,23 +438,1096 @@ static int check_variable(Dwarf_Die *var_die, Dwarf_Die *type_die, int offset,
return 0;
}
+static struct type_state_stack *find_stack_state(struct type_state *state,
+ int offset)
+{
+ struct type_state_stack *stack;
+
+ list_for_each_entry(stack, &state->stack_vars, list) {
+ if (offset == stack->offset)
+ return stack;
+
+ if (stack->compound && stack->offset < offset &&
+ offset < stack->offset + stack->size)
+ return stack;
+ }
+ return NULL;
+}
+
+static void set_stack_state(struct type_state_stack *stack, int offset, u8 kind,
+ Dwarf_Die *type_die)
+{
+ int tag;
+ Dwarf_Word size;
+
+ if (dwarf_aggregate_size(type_die, &size) < 0)
+ size = 0;
+
+ tag = dwarf_tag(type_die);
+
+ stack->type = *type_die;
+ stack->size = size;
+ stack->offset = offset;
+ stack->kind = kind;
+
+ switch (tag) {
+ case DW_TAG_structure_type:
+ case DW_TAG_union_type:
+ stack->compound = (kind != TSR_KIND_POINTER);
+ break;
+ default:
+ stack->compound = false;
+ break;
+ }
+}
+
+static struct type_state_stack *findnew_stack_state(struct type_state *state,
+ int offset, u8 kind,
+ Dwarf_Die *type_die)
+{
+ struct type_state_stack *stack = find_stack_state(state, offset);
+
+ if (stack) {
+ set_stack_state(stack, offset, kind, type_die);
+ return stack;
+ }
+
+ stack = malloc(sizeof(*stack));
+ if (stack) {
+ set_stack_state(stack, offset, kind, type_die);
+ list_add(&stack->list, &state->stack_vars);
+ }
+ return stack;
+}
+
+/* Maintain a cache for quick global variable lookup */
+struct global_var_entry {
+ struct rb_node node;
+ char *name;
+ u64 start;
+ u64 end;
+ u64 die_offset;
+};
+
+static int global_var_cmp(const void *_key, const struct rb_node *node)
+{
+ const u64 addr = (uintptr_t)_key;
+ struct global_var_entry *gvar;
+
+ gvar = rb_entry(node, struct global_var_entry, node);
+
+ if (gvar->start <= addr && addr < gvar->end)
+ return 0;
+ return gvar->start > addr ? -1 : 1;
+}
+
+static bool global_var_less(struct rb_node *node_a, const struct rb_node *node_b)
+{
+ struct global_var_entry *gvar_a, *gvar_b;
+
+ gvar_a = rb_entry(node_a, struct global_var_entry, node);
+ gvar_b = rb_entry(node_b, struct global_var_entry, node);
+
+ return gvar_a->start < gvar_b->start;
+}
+
+static struct global_var_entry *global_var__find(struct data_loc_info *dloc, u64 addr)
+{
+ struct dso *dso = map__dso(dloc->ms->map);
+ struct rb_node *node;
+
+ node = rb_find((void *)(uintptr_t)addr, &dso->global_vars, global_var_cmp);
+ if (node == NULL)
+ return NULL;
+
+ return rb_entry(node, struct global_var_entry, node);
+}
+
+static bool global_var__add(struct data_loc_info *dloc, u64 addr,
+ const char *name, Dwarf_Die *type_die)
+{
+ struct dso *dso = map__dso(dloc->ms->map);
+ struct global_var_entry *gvar;
+ Dwarf_Word size;
+
+ if (dwarf_aggregate_size(type_die, &size) < 0)
+ return false;
+
+ gvar = malloc(sizeof(*gvar));
+ if (gvar == NULL)
+ return false;
+
+ gvar->name = strdup(name);
+ if (gvar->name == NULL) {
+ free(gvar);
+ return false;
+ }
+
+ gvar->start = addr;
+ gvar->end = addr + size;
+ gvar->die_offset = dwarf_dieoffset(type_die);
+
+ rb_add(&gvar->node, &dso->global_vars, global_var_less);
+ return true;
+}
+
+void global_var_type__tree_delete(struct rb_root *root)
+{
+ struct global_var_entry *gvar;
+
+ while (!RB_EMPTY_ROOT(root)) {
+ struct rb_node *node = rb_first(root);
+
+ rb_erase(node, root);
+ gvar = rb_entry(node, struct global_var_entry, node);
+ free(gvar->name);
+ free(gvar);
+ }
+}
+
+static bool get_global_var_info(struct data_loc_info *dloc, u64 addr,
+ const char **var_name, int *var_offset)
+{
+ struct addr_location al;
+ struct symbol *sym;
+ u64 mem_addr;
+
+ /* Kernel symbols might be relocated */
+ mem_addr = addr + map__reloc(dloc->ms->map);
+
+ addr_location__init(&al);
+ sym = thread__find_symbol_fb(dloc->thread, dloc->cpumode,
+ mem_addr, &al);
+ if (sym) {
+ *var_name = sym->name;
+ /* Calculate type offset from the start of variable */
+ *var_offset = mem_addr - map__unmap_ip(al.map, sym->start);
+ } else {
+ *var_name = NULL;
+ }
+ addr_location__exit(&al);
+ if (*var_name == NULL)
+ return false;
+
+ return true;
+}
+
+static bool get_global_var_type(Dwarf_Die *cu_die, struct data_loc_info *dloc,
+ u64 ip, u64 var_addr, int *var_offset,
+ Dwarf_Die *type_die)
+{
+ u64 pc;
+ int offset;
+ const char *var_name = NULL;
+ struct global_var_entry *gvar;
+ Dwarf_Die var_die;
+
+ gvar = global_var__find(dloc, var_addr);
+ if (gvar) {
+ if (!dwarf_offdie(dloc->di->dbg, gvar->die_offset, type_die))
+ return false;
+
+ *var_offset = var_addr - gvar->start;
+ return true;
+ }
+
+ /* Try to get the variable by address first */
+ if (die_find_variable_by_addr(cu_die, var_addr, &var_die, &offset) &&
+ check_variable(dloc, &var_die, type_die, DWARF_REG_PC, offset,
+ /*is_fbreg=*/false) == 0) {
+ var_name = dwarf_diename(&var_die);
+ *var_offset = offset;
+ goto ok;
+ }
+
+ if (!get_global_var_info(dloc, var_addr, &var_name, var_offset))
+ return false;
+
+ pc = map__rip_2objdump(dloc->ms->map, ip);
+
+ /* Try to get the name of global variable */
+ if (die_find_variable_at(cu_die, var_name, pc, &var_die) &&
+ check_variable(dloc, &var_die, type_die, DWARF_REG_PC, *var_offset,
+ /*is_fbreg=*/false) == 0)
+ goto ok;
+
+ return false;
+
+ok:
+ /* The address should point to the start of the variable */
+ global_var__add(dloc, var_addr - *var_offset, var_name, type_die);
+ return true;
+}
+
+/**
+ * update_var_state - Update type state using given variables
+ * @state: type state table
+ * @dloc: data location info
+ * @addr: instruction address to match with variable
+ * @insn_offset: instruction offset (for debug)
+ * @var_types: list of variables with type info
+ *
+ * This function fills the @state table using @var_types info. Each variable
+ * is used only at the given location and updates an entry in the table.
+ */
+static void update_var_state(struct type_state *state, struct data_loc_info *dloc,
+ u64 addr, u64 insn_offset, struct die_var_type *var_types)
+{
+ Dwarf_Die mem_die;
+ struct die_var_type *var;
+ int fbreg = dloc->fbreg;
+ int fb_offset = 0;
+
+ if (dloc->fb_cfa) {
+ if (die_get_cfa(dloc->di->dbg, addr, &fbreg, &fb_offset) < 0)
+ fbreg = -1;
+ }
+
+ for (var = var_types; var != NULL; var = var->next) {
+ if (var->addr != addr)
+ continue;
+ /* Get the type DIE using the offset */
+ if (!dwarf_offdie(dloc->di->dbg, var->die_off, &mem_die))
+ continue;
+
+ if (var->reg == DWARF_REG_FB) {
+ findnew_stack_state(state, var->offset, TSR_KIND_TYPE,
+ &mem_die);
+
+ pr_debug_dtp("var [%"PRIx64"] -%#x(stack)",
+ insn_offset, -var->offset);
+ pr_debug_type_name(&mem_die, TSR_KIND_TYPE);
+ } else if (var->reg == fbreg) {
+ findnew_stack_state(state, var->offset - fb_offset,
+ TSR_KIND_TYPE, &mem_die);
+
+ pr_debug_dtp("var [%"PRIx64"] -%#x(stack)",
+ insn_offset, -var->offset + fb_offset);
+ pr_debug_type_name(&mem_die, TSR_KIND_TYPE);
+ } else if (has_reg_type(state, var->reg) && var->offset == 0) {
+ struct type_state_reg *reg;
+
+ reg = &state->regs[var->reg];
+ reg->type = mem_die;
+ reg->kind = TSR_KIND_TYPE;
+ reg->ok = true;
+
+ pr_debug_dtp("var [%"PRIx64"] reg%d",
+ insn_offset, var->reg);
+ pr_debug_type_name(&mem_die, TSR_KIND_TYPE);
+ }
+ }
+}
+
+static void update_insn_state_x86(struct type_state *state,
+ struct data_loc_info *dloc, Dwarf_Die *cu_die,
+ struct disasm_line *dl)
+{
+ struct annotated_insn_loc loc;
+ struct annotated_op_loc *src = &loc.ops[INSN_OP_SOURCE];
+ struct annotated_op_loc *dst = &loc.ops[INSN_OP_TARGET];
+ struct type_state_reg *tsr;
+ Dwarf_Die type_die;
+ u32 insn_offset = dl->al.offset;
+ int fbreg = dloc->fbreg;
+ int fboff = 0;
+
+ if (annotate_get_insn_location(dloc->arch, dl, &loc) < 0)
+ return;
+
+ if (ins__is_call(&dl->ins)) {
+ struct symbol *func = dl->ops.target.sym;
+
+ if (func == NULL)
+ return;
+
+ /* __fentry__ will preserve all registers */
+ if (!strcmp(func->name, "__fentry__"))
+ return;
+
+ pr_debug_dtp("call [%x] %s\n", insn_offset, func->name);
+
+ /* Otherwise invalidate caller-saved registers after call */
+ for (unsigned i = 0; i < ARRAY_SIZE(state->regs); i++) {
+ if (state->regs[i].caller_saved)
+ state->regs[i].ok = false;
+ }
+
+ /* Update register with the return type (if any) */
+ if (die_find_func_rettype(cu_die, func->name, &type_die)) {
+ tsr = &state->regs[state->ret_reg];
+ tsr->type = type_die;
+ tsr->kind = TSR_KIND_TYPE;
+ tsr->ok = true;
+
+ pr_debug_dtp("call [%x] return -> reg%d",
+ insn_offset, state->ret_reg);
+ pr_debug_type_name(&type_die, tsr->kind);
+ }
+ return;
+ }
+
+ if (!strncmp(dl->ins.name, "add", 3)) {
+ u64 imm_value = -1ULL;
+ int offset;
+ const char *var_name = NULL;
+ struct map_symbol *ms = dloc->ms;
+ u64 ip = ms->sym->start + dl->al.offset;
+
+ if (!has_reg_type(state, dst->reg1))
+ return;
+
+ tsr = &state->regs[dst->reg1];
+
+ if (src->imm)
+ imm_value = src->offset;
+ else if (has_reg_type(state, src->reg1) &&
+ state->regs[src->reg1].kind == TSR_KIND_CONST)
+ imm_value = state->regs[src->reg1].imm_value;
+ else if (src->reg1 == DWARF_REG_PC) {
+ u64 var_addr = annotate_calc_pcrel(dloc->ms, ip,
+ src->offset, dl);
+
+ if (get_global_var_info(dloc, var_addr,
+ &var_name, &offset) &&
+ !strcmp(var_name, "this_cpu_off") &&
+ tsr->kind == TSR_KIND_CONST) {
+ tsr->kind = TSR_KIND_PERCPU_BASE;
+ imm_value = tsr->imm_value;
+ }
+ }
+ else
+ return;
+
+ if (tsr->kind != TSR_KIND_PERCPU_BASE)
+ return;
+
+ if (get_global_var_type(cu_die, dloc, ip, imm_value, &offset,
+ &type_die) && offset == 0) {
+ /*
+ * This is not a pointer type, but it should be treated
+ * as a pointer.
+ */
+ tsr->type = type_die;
+ tsr->kind = TSR_KIND_POINTER;
+ tsr->ok = true;
+
+ pr_debug_dtp("add [%x] percpu %#"PRIx64" -> reg%d",
+ insn_offset, imm_value, dst->reg1);
+ pr_debug_type_name(&tsr->type, tsr->kind);
+ }
+ return;
+ }
+
+ if (strncmp(dl->ins.name, "mov", 3))
+ return;
+
+ if (dloc->fb_cfa) {
+ u64 ip = dloc->ms->sym->start + dl->al.offset;
+ u64 pc = map__rip_2objdump(dloc->ms->map, ip);
+
+ if (die_get_cfa(dloc->di->dbg, pc, &fbreg, &fboff) < 0)
+ fbreg = -1;
+ }
+
+ /* Case 1. register to register or segment:offset to register transfers */
+ if (!src->mem_ref && !dst->mem_ref) {
+ if (!has_reg_type(state, dst->reg1))
+ return;
+
+ tsr = &state->regs[dst->reg1];
+ if (map__dso(dloc->ms->map)->kernel &&
+ src->segment == INSN_SEG_X86_GS && src->imm) {
+ u64 ip = dloc->ms->sym->start + dl->al.offset;
+ u64 var_addr;
+ int offset;
+
+ /*
+ * In kernel, %gs points to a per-cpu region for the
+ * current CPU. Access with a constant offset should
+ * be treated as a global variable access.
+ */
+ var_addr = src->offset;
+
+ if (var_addr == 40) {
+ tsr->kind = TSR_KIND_CANARY;
+ tsr->ok = true;
+
+ pr_debug_dtp("mov [%x] stack canary -> reg%d\n",
+ insn_offset, dst->reg1);
+ return;
+ }
+
+ if (!get_global_var_type(cu_die, dloc, ip, var_addr,
+ &offset, &type_die) ||
+ !die_get_member_type(&type_die, offset, &type_die)) {
+ tsr->ok = false;
+ return;
+ }
+
+ tsr->type = type_die;
+ tsr->kind = TSR_KIND_TYPE;
+ tsr->ok = true;
+
+ pr_debug_dtp("mov [%x] this-cpu addr=%#"PRIx64" -> reg%d",
+ insn_offset, var_addr, dst->reg1);
+ pr_debug_type_name(&tsr->type, tsr->kind);
+ return;
+ }
+
+ if (src->imm) {
+ tsr->kind = TSR_KIND_CONST;
+ tsr->imm_value = src->offset;
+ tsr->ok = true;
+
+ pr_debug_dtp("mov [%x] imm=%#x -> reg%d\n",
+ insn_offset, tsr->imm_value, dst->reg1);
+ return;
+ }
+
+ if (!has_reg_type(state, src->reg1) ||
+ !state->regs[src->reg1].ok) {
+ tsr->ok = false;
+ return;
+ }
+
+ tsr->type = state->regs[src->reg1].type;
+ tsr->kind = state->regs[src->reg1].kind;
+ tsr->ok = true;
+
+ pr_debug_dtp("mov [%x] reg%d -> reg%d",
+ insn_offset, src->reg1, dst->reg1);
+ pr_debug_type_name(&tsr->type, tsr->kind);
+ }
+ /* Case 2. memory to register transers */
+ if (src->mem_ref && !dst->mem_ref) {
+ int sreg = src->reg1;
+
+ if (!has_reg_type(state, dst->reg1))
+ return;
+
+ tsr = &state->regs[dst->reg1];
+
+retry:
+ /* Check stack variables with offset */
+ if (sreg == fbreg) {
+ struct type_state_stack *stack;
+ int offset = src->offset - fboff;
+
+ stack = find_stack_state(state, offset);
+ if (stack == NULL) {
+ tsr->ok = false;
+ return;
+ } else if (!stack->compound) {
+ tsr->type = stack->type;
+ tsr->kind = stack->kind;
+ tsr->ok = true;
+ } else if (die_get_member_type(&stack->type,
+ offset - stack->offset,
+ &type_die)) {
+ tsr->type = type_die;
+ tsr->kind = TSR_KIND_TYPE;
+ tsr->ok = true;
+ } else {
+ tsr->ok = false;
+ return;
+ }
+
+ pr_debug_dtp("mov [%x] -%#x(stack) -> reg%d",
+ insn_offset, -offset, dst->reg1);
+ pr_debug_type_name(&tsr->type, tsr->kind);
+ }
+ /* And then dereference the pointer if it has one */
+ else if (has_reg_type(state, sreg) && state->regs[sreg].ok &&
+ state->regs[sreg].kind == TSR_KIND_TYPE &&
+ die_deref_ptr_type(&state->regs[sreg].type,
+ src->offset, &type_die)) {
+ tsr->type = type_die;
+ tsr->kind = TSR_KIND_TYPE;
+ tsr->ok = true;
+
+ pr_debug_dtp("mov [%x] %#x(reg%d) -> reg%d",
+ insn_offset, src->offset, sreg, dst->reg1);
+ pr_debug_type_name(&tsr->type, tsr->kind);
+ }
+ /* Or check if it's a global variable */
+ else if (sreg == DWARF_REG_PC) {
+ struct map_symbol *ms = dloc->ms;
+ u64 ip = ms->sym->start + dl->al.offset;
+ u64 addr;
+ int offset;
+
+ addr = annotate_calc_pcrel(ms, ip, src->offset, dl);
+
+ if (!get_global_var_type(cu_die, dloc, ip, addr, &offset,
+ &type_die) ||
+ !die_get_member_type(&type_die, offset, &type_die)) {
+ tsr->ok = false;
+ return;
+ }
+
+ tsr->type = type_die;
+ tsr->kind = TSR_KIND_TYPE;
+ tsr->ok = true;
+
+ pr_debug_dtp("mov [%x] global addr=%"PRIx64" -> reg%d",
+ insn_offset, addr, dst->reg1);
+ pr_debug_type_name(&type_die, tsr->kind);
+ }
+ /* And check percpu access with base register */
+ else if (has_reg_type(state, sreg) &&
+ state->regs[sreg].kind == TSR_KIND_PERCPU_BASE) {
+ u64 ip = dloc->ms->sym->start + dl->al.offset;
+ int offset;
+
+ /*
+ * In kernel, %gs points to a per-cpu region for the
+ * current CPU. Access with a constant offset should
+ * be treated as a global variable access.
+ */
+ if (get_global_var_type(cu_die, dloc, ip, src->offset,
+ &offset, &type_die) &&
+ die_get_member_type(&type_die, offset, &type_die)) {
+ tsr->type = type_die;
+ tsr->kind = TSR_KIND_TYPE;
+ tsr->ok = true;
+
+ pr_debug_dtp("mov [%x] percpu %#x(reg%d) -> reg%d",
+ insn_offset, src->offset, sreg, dst->reg1);
+ pr_debug_type_name(&tsr->type, tsr->kind);
+ } else {
+ tsr->ok = false;
+ }
+ }
+ /* And then dereference the calculated pointer if it has one */
+ else if (has_reg_type(state, sreg) && state->regs[sreg].ok &&
+ state->regs[sreg].kind == TSR_KIND_POINTER &&
+ die_get_member_type(&state->regs[sreg].type,
+ src->offset, &type_die)) {
+ tsr->type = type_die;
+ tsr->kind = TSR_KIND_TYPE;
+ tsr->ok = true;
+
+ pr_debug_dtp("mov [%x] pointer %#x(reg%d) -> reg%d",
+ insn_offset, src->offset, sreg, dst->reg1);
+ pr_debug_type_name(&tsr->type, tsr->kind);
+ }
+ /* Or try another register if any */
+ else if (src->multi_regs && sreg == src->reg1 &&
+ src->reg1 != src->reg2) {
+ sreg = src->reg2;
+ goto retry;
+ }
+ else {
+ int offset;
+ const char *var_name = NULL;
+
+ /* it might be per-cpu variable (in kernel) access */
+ if (src->offset < 0) {
+ if (get_global_var_info(dloc, (s64)src->offset,
+ &var_name, &offset) &&
+ !strcmp(var_name, "__per_cpu_offset")) {
+ tsr->kind = TSR_KIND_PERCPU_BASE;
+
+ pr_debug_dtp("mov [%x] percpu base reg%d\n",
+ insn_offset, dst->reg1);
+ }
+ }
+
+ tsr->ok = false;
+ }
+ }
+ /* Case 3. register to memory transfers */
+ if (!src->mem_ref && dst->mem_ref) {
+ if (!has_reg_type(state, src->reg1) ||
+ !state->regs[src->reg1].ok)
+ return;
+
+ /* Check stack variables with offset */
+ if (dst->reg1 == fbreg) {
+ struct type_state_stack *stack;
+ int offset = dst->offset - fboff;
+
+ tsr = &state->regs[src->reg1];
+
+ stack = find_stack_state(state, offset);
+ if (stack) {
+ /*
+ * The source register is likely to hold a type
+ * of member if it's a compound type. Do not
+ * update the stack variable type since we can
+ * get the member type later by using the
+ * die_get_member_type().
+ */
+ if (!stack->compound)
+ set_stack_state(stack, offset, tsr->kind,
+ &tsr->type);
+ } else {
+ findnew_stack_state(state, offset, tsr->kind,
+ &tsr->type);
+ }
+
+ pr_debug_dtp("mov [%x] reg%d -> -%#x(stack)",
+ insn_offset, src->reg1, -offset);
+ pr_debug_type_name(&tsr->type, tsr->kind);
+ }
+ /*
+ * Ignore other transfers since it'd set a value in a struct
+ * and won't change the type.
+ */
+ }
+ /* Case 4. memory to memory transfers (not handled for now) */
+}
+
+/**
+ * update_insn_state - Update type state for an instruction
+ * @state: type state table
+ * @dloc: data location info
+ * @cu_die: compile unit debug entry
+ * @dl: disasm line for the instruction
+ *
+ * This function updates the @state table for the target operand of the
+ * instruction at @dl if it transfers the type like MOV on x86. Since it
+ * tracks the type, it won't care about the values like in arithmetic
+ * instructions like ADD/SUB/MUL/DIV and INC/DEC.
+ *
+ * Note that ops->reg2 is only available when both mem_ref and multi_regs
+ * are true.
+ */
+static void update_insn_state(struct type_state *state, struct data_loc_info *dloc,
+ Dwarf_Die *cu_die, struct disasm_line *dl)
+{
+ if (arch__is(dloc->arch, "x86"))
+ update_insn_state_x86(state, dloc, cu_die, dl);
+}
+
+/*
+ * Prepend this_blocks (from the outer scope) to full_blocks, removing
+ * duplicate disasm line.
+ */
+static void prepend_basic_blocks(struct list_head *this_blocks,
+ struct list_head *full_blocks)
+{
+ struct annotated_basic_block *first_bb, *last_bb;
+
+ last_bb = list_last_entry(this_blocks, typeof(*last_bb), list);
+ first_bb = list_first_entry(full_blocks, typeof(*first_bb), list);
+
+ if (list_empty(full_blocks))
+ goto out;
+
+ /* Last insn in this_blocks should be same as first insn in full_blocks */
+ if (last_bb->end != first_bb->begin) {
+ pr_debug("prepend basic blocks: mismatched disasm line %"PRIx64" -> %"PRIx64"\n",
+ last_bb->end->al.offset, first_bb->begin->al.offset);
+ goto out;
+ }
+
+ /* Is the basic block have only one disasm_line? */
+ if (last_bb->begin == last_bb->end) {
+ list_del(&last_bb->list);
+ free(last_bb);
+ goto out;
+ }
+
+ /* Point to the insn before the last when adding this block to full_blocks */
+ last_bb->end = list_prev_entry(last_bb->end, al.node);
+
+out:
+ list_splice(this_blocks, full_blocks);
+}
+
+static void delete_basic_blocks(struct list_head *basic_blocks)
+{
+ struct annotated_basic_block *bb, *tmp;
+
+ list_for_each_entry_safe(bb, tmp, basic_blocks, list) {
+ list_del(&bb->list);
+ free(bb);
+ }
+}
+
+/* Make sure all variables have a valid start address */
+static void fixup_var_address(struct die_var_type *var_types, u64 addr)
+{
+ while (var_types) {
+ /*
+ * Some variables have no address range meaning it's always
+ * available in the whole scope. Let's adjust the start
+ * address to the start of the scope.
+ */
+ if (var_types->addr == 0)
+ var_types->addr = addr;
+
+ var_types = var_types->next;
+ }
+}
+
+static void delete_var_types(struct die_var_type *var_types)
+{
+ while (var_types) {
+ struct die_var_type *next = var_types->next;
+
+ free(var_types);
+ var_types = next;
+ }
+}
+
+/* should match to is_stack_canary() in util/annotate.c */
+static void setup_stack_canary(struct data_loc_info *dloc)
+{
+ if (arch__is(dloc->arch, "x86")) {
+ dloc->op->segment = INSN_SEG_X86_GS;
+ dloc->op->imm = true;
+ dloc->op->offset = 40;
+ }
+}
+
+/*
+ * It's at the target address, check if it has a matching type.
+ * It returns 1 if found, 0 if not or -1 if not found but no need to
+ * repeat the search. The last case is for per-cpu variables which
+ * are similar to global variables and no additional info is needed.
+ */
+static int check_matching_type(struct type_state *state,
+ struct data_loc_info *dloc, int reg,
+ Dwarf_Die *cu_die, Dwarf_Die *type_die)
+{
+ Dwarf_Word size;
+ u32 insn_offset = dloc->ip - dloc->ms->sym->start;
+
+ pr_debug_dtp("chk [%x] reg%d offset=%#x ok=%d kind=%d",
+ insn_offset, reg, dloc->op->offset,
+ state->regs[reg].ok, state->regs[reg].kind);
+
+ if (state->regs[reg].ok && state->regs[reg].kind == TSR_KIND_TYPE) {
+ int tag = dwarf_tag(&state->regs[reg].type);
+
+ pr_debug_dtp("\n");
+
+ /*
+ * Normal registers should hold a pointer (or array) to
+ * dereference a memory location.
+ */
+ if (tag != DW_TAG_pointer_type && tag != DW_TAG_array_type)
+ return -1;
+
+ /* Remove the pointer and get the target type */
+ if (die_get_real_type(&state->regs[reg].type, type_die) == NULL)
+ return -1;
+
+ dloc->type_offset = dloc->op->offset;
+
+ /* Get the size of the actual type */
+ if (dwarf_aggregate_size(type_die, &size) < 0 ||
+ (unsigned)dloc->type_offset >= size)
+ return -1;
+
+ return 1;
+ }
+
+ if (reg == dloc->fbreg) {
+ struct type_state_stack *stack;
+
+ pr_debug_dtp(" fbreg\n");
+
+ stack = find_stack_state(state, dloc->type_offset);
+ if (stack == NULL)
+ return 0;
+
+ if (stack->kind == TSR_KIND_CANARY) {
+ setup_stack_canary(dloc);
+ return -1;
+ }
+
+ *type_die = stack->type;
+ /* Update the type offset from the start of slot */
+ dloc->type_offset -= stack->offset;
+
+ return 1;
+ }
+
+ if (dloc->fb_cfa) {
+ struct type_state_stack *stack;
+ u64 pc = map__rip_2objdump(dloc->ms->map, dloc->ip);
+ int fbreg, fboff;
+
+ pr_debug_dtp(" cfa\n");
+
+ if (die_get_cfa(dloc->di->dbg, pc, &fbreg, &fboff) < 0)
+ fbreg = -1;
+
+ if (reg != fbreg)
+ return 0;
+
+ stack = find_stack_state(state, dloc->type_offset - fboff);
+ if (stack == NULL)
+ return 0;
+
+ if (stack->kind == TSR_KIND_CANARY) {
+ setup_stack_canary(dloc);
+ return -1;
+ }
+
+ *type_die = stack->type;
+ /* Update the type offset from the start of slot */
+ dloc->type_offset -= fboff + stack->offset;
+
+ return 1;
+ }
+
+ if (state->regs[reg].kind == TSR_KIND_PERCPU_BASE) {
+ u64 var_addr = dloc->op->offset;
+ int var_offset;
+
+ pr_debug_dtp(" percpu var\n");
+
+ if (get_global_var_type(cu_die, dloc, dloc->ip, var_addr,
+ &var_offset, type_die)) {
+ dloc->type_offset = var_offset;
+ return 1;
+ }
+ /* No need to retry per-cpu (global) variables */
+ return -1;
+ }
+
+ if (state->regs[reg].ok && state->regs[reg].kind == TSR_KIND_POINTER) {
+ pr_debug_dtp(" percpu ptr\n");
+
+ /*
+ * It's actaully pointer but the address was calculated using
+ * some arithmetic. So it points to the actual type already.
+ */
+ *type_die = state->regs[reg].type;
+
+ dloc->type_offset = dloc->op->offset;
+
+ /* Get the size of the actual type */
+ if (dwarf_aggregate_size(type_die, &size) < 0 ||
+ (unsigned)dloc->type_offset >= size)
+ return -1;
+
+ return 1;
+ }
+
+ if (state->regs[reg].ok && state->regs[reg].kind == TSR_KIND_CANARY) {
+ pr_debug_dtp(" stack canary\n");
+
+ /*
+ * This is a saved value of the stack canary which will be handled
+ * in the outer logic when it returns failure here. Pretend it's
+ * from the stack canary directly.
+ */
+ setup_stack_canary(dloc);
+
+ return -1;
+ }
+
+ if (map__dso(dloc->ms->map)->kernel && arch__is(dloc->arch, "x86")) {
+ u64 addr;
+ int offset;
+
+ /* Direct this-cpu access like "%gs:0x34740" */
+ if (dloc->op->segment == INSN_SEG_X86_GS && dloc->op->imm) {
+ pr_debug_dtp(" this-cpu var\n");
+
+ addr = dloc->op->offset;
+
+ if (get_global_var_type(cu_die, dloc, dloc->ip, addr,
+ &offset, type_die)) {
+ dloc->type_offset = offset;
+ return 1;
+ }
+ return -1;
+ }
+
+ /* Access to per-cpu base like "-0x7dcf0500(,%rdx,8)" */
+ if (dloc->op->offset < 0 && reg != state->stack_reg) {
+ const char *var_name = NULL;
+
+ addr = (s64) dloc->op->offset;
+
+ if (get_global_var_info(dloc, addr, &var_name, &offset) &&
+ !strcmp(var_name, "__per_cpu_offset") && offset == 0 &&
+ get_global_var_type(cu_die, dloc, dloc->ip, addr,
+ &offset, type_die)) {
+ pr_debug_dtp(" percpu base\n");
+
+ dloc->type_offset = offset;
+ return 1;
+ }
+ pr_debug_dtp(" negative offset\n");
+ return -1;
+ }
+ }
+
+ pr_debug_dtp("\n");
+ return 0;
+}
+
+/* Iterate instructions in basic blocks and update type table */
+static int find_data_type_insn(struct data_loc_info *dloc, int reg,
+ struct list_head *basic_blocks,
+ struct die_var_type *var_types,
+ Dwarf_Die *cu_die, Dwarf_Die *type_die)
+{
+ struct type_state state;
+ struct symbol *sym = dloc->ms->sym;
+ struct annotation *notes = symbol__annotation(sym);
+ struct annotated_basic_block *bb;
+ int ret = 0;
+
+ init_type_state(&state, dloc->arch);
+
+ list_for_each_entry(bb, basic_blocks, list) {
+ struct disasm_line *dl = bb->begin;
+
+ BUG_ON(bb->begin->al.offset == -1 || bb->end->al.offset == -1);
+
+ pr_debug_dtp("bb: [%"PRIx64" - %"PRIx64"]\n",
+ bb->begin->al.offset, bb->end->al.offset);
+
+ list_for_each_entry_from(dl, &notes->src->source, al.node) {
+ u64 this_ip = sym->start + dl->al.offset;
+ u64 addr = map__rip_2objdump(dloc->ms->map, this_ip);
+
+ /* Skip comment or debug info lines */
+ if (dl->al.offset == -1)
+ continue;
+
+ /* Update variable type at this address */
+ update_var_state(&state, dloc, addr, dl->al.offset, var_types);
+
+ if (this_ip == dloc->ip) {
+ ret = check_matching_type(&state, dloc, reg,
+ cu_die, type_die);
+ goto out;
+ }
+
+ /* Update type table after processing the instruction */
+ update_insn_state(&state, dloc, cu_die, dl);
+ if (dl == bb->end)
+ break;
+ }
+ }
+
+out:
+ exit_type_state(&state);
+ return ret;
+}
+
+/*
+ * Construct a list of basic blocks for each scope with variables and try to find
+ * the data type by updating a type state table through instructions.
+ */
+static int find_data_type_block(struct data_loc_info *dloc, int reg,
+ Dwarf_Die *cu_die, Dwarf_Die *scopes,
+ int nr_scopes, Dwarf_Die *type_die)
+{
+ LIST_HEAD(basic_blocks);
+ struct die_var_type *var_types = NULL;
+ u64 src_ip, dst_ip, prev_dst_ip;
+ int ret = -1;
+
+ /* TODO: other architecture support */
+ if (!arch__is(dloc->arch, "x86"))
+ return -1;
+
+ prev_dst_ip = dst_ip = dloc->ip;
+ for (int i = nr_scopes - 1; i >= 0; i--) {
+ Dwarf_Addr base, start, end;
+ LIST_HEAD(this_blocks);
+ int found;
+
+ if (dwarf_ranges(&scopes[i], 0, &base, &start, &end) < 0)
+ break;
+
+ pr_debug_dtp("scope: [%d/%d] (die:%lx)\n",
+ i + 1, nr_scopes, (long)dwarf_dieoffset(&scopes[i]));
+ src_ip = map__objdump_2rip(dloc->ms->map, start);
+
+again:
+ /* Get basic blocks for this scope */
+ if (annotate_get_basic_blocks(dloc->ms->sym, src_ip, dst_ip,
+ &this_blocks) < 0) {
+ /* Try previous block if they are not connected */
+ if (prev_dst_ip != dst_ip) {
+ dst_ip = prev_dst_ip;
+ goto again;
+ }
+
+ pr_debug_dtp("cannot find a basic block from %"PRIx64" to %"PRIx64"\n",
+ src_ip - dloc->ms->sym->start,
+ dst_ip - dloc->ms->sym->start);
+ continue;
+ }
+ prepend_basic_blocks(&this_blocks, &basic_blocks);
+
+ /* Get variable info for this scope and add to var_types list */
+ die_collect_vars(&scopes[i], &var_types);
+ fixup_var_address(var_types, start);
+
+ /* Find from start of this scope to the target instruction */
+ found = find_data_type_insn(dloc, reg, &basic_blocks, var_types,
+ cu_die, type_die);
+ if (found > 0) {
+ pr_debug_dtp("found by insn track: %#x(reg%d) type-offset=%#x\n",
+ dloc->op->offset, reg, dloc->type_offset);
+ pr_debug_type_name(type_die, TSR_KIND_TYPE);
+ ret = 0;
+ break;
+ }
+
+ if (found < 0)
+ break;
+
+ /* Go up to the next scope and find blocks to the start */
+ prev_dst_ip = dst_ip;
+ dst_ip = src_ip;
+ }
+
+ delete_basic_blocks(&basic_blocks);
+ delete_var_types(var_types);
+ return ret;
+}
+
/* The result will be saved in @type_die */
-static int find_data_type_die(struct debuginfo *di, u64 pc, u64 addr,
- const char *var_name, struct annotated_op_loc *loc,
- Dwarf_Die *type_die)
+static int find_data_type_die(struct data_loc_info *dloc, Dwarf_Die *type_die)
{
+ struct annotated_op_loc *loc = dloc->op;
Dwarf_Die cu_die, var_die;
Dwarf_Die *scopes = NULL;
int reg, offset;
int ret = -1;
int i, nr_scopes;
int fbreg = -1;
- bool is_fbreg = false;
int fb_offset = 0;
+ bool is_fbreg = false;
+ u64 pc;
+ char buf[64];
+
+ if (dloc->op->multi_regs)
+ snprintf(buf, sizeof(buf), "reg%d, reg%d", dloc->op->reg1, dloc->op->reg2);
+ else if (dloc->op->reg1 == DWARF_REG_PC)
+ snprintf(buf, sizeof(buf), "PC");
+ else
+ snprintf(buf, sizeof(buf), "reg%d", dloc->op->reg1);
+
+ pr_debug_dtp("-----------------------------------------------------------\n");
+ pr_debug_dtp("find data type for %#x(%s) at %s+%#"PRIx64"\n",
+ dloc->op->offset, buf, dloc->ms->sym->name,
+ dloc->ip - dloc->ms->sym->start);
+
+ /*
+ * IP is a relative instruction address from the start of the map, as
+ * it can be randomized/relocated, it needs to translate to PC which is
+ * a file address for DWARF processing.
+ */
+ pc = map__rip_2objdump(dloc->ms->map, dloc->ip);
/* Get a compile_unit for this address */
- if (!find_cu_die(di, pc, &cu_die)) {
- pr_debug("cannot find CU for address %" PRIx64 "\n", pc);
+ if (!find_cu_die(dloc->di, pc, &cu_die)) {
+ pr_debug_dtp("cannot find CU for address %"PRIx64"\n", pc);
ann_data_stat.no_cuinfo++;
return -1;
}
@@ -262,19 +1535,18 @@ static int find_data_type_die(struct debuginfo *di, u64 pc, u64 addr,
reg = loc->reg1;
offset = loc->offset;
- if (reg == DWARF_REG_PC) {
- if (die_find_variable_by_addr(&cu_die, pc, addr, &var_die, &offset)) {
- ret = check_variable(&var_die, type_die, offset,
- /*is_pointer=*/false);
- loc->offset = offset;
- goto out;
- }
+ pr_debug_dtp("CU for %s (die:%#lx)\n",
+ dwarf_diename(&cu_die), (long)dwarf_dieoffset(&cu_die));
- if (var_name && die_find_variable_at(&cu_die, var_name, pc,
- &var_die)) {
- ret = check_variable(&var_die, type_die, 0,
- /*is_pointer=*/false);
- /* loc->offset will be updated by the caller */
+ if (reg == DWARF_REG_PC) {
+ if (get_global_var_type(&cu_die, dloc, dloc->ip, dloc->var_addr,
+ &offset, type_die)) {
+ dloc->type_offset = offset;
+
+ pr_debug_dtp("found by addr=%#"PRIx64" type_offset=%#x\n",
+ dloc->var_addr, offset);
+ pr_debug_type_name(type_die, TSR_KIND_TYPE);
+ ret = 0;
goto out;
}
}
@@ -291,16 +1563,20 @@ static int find_data_type_die(struct debuginfo *di, u64 pc, u64 addr,
dwarf_formblock(&attr, &block) == 0 && block.length == 1) {
switch (*block.data) {
case DW_OP_reg0 ... DW_OP_reg31:
- fbreg = *block.data - DW_OP_reg0;
+ fbreg = dloc->fbreg = *block.data - DW_OP_reg0;
break;
case DW_OP_call_frame_cfa:
- if (die_get_cfa(di->dbg, pc, &fbreg,
+ dloc->fb_cfa = true;
+ if (die_get_cfa(dloc->di->dbg, pc, &fbreg,
&fb_offset) < 0)
fbreg = -1;
break;
default:
break;
}
+
+ pr_debug_dtp("frame base: cfa=%d fbreg=%d\n",
+ dloc->fb_cfa, fbreg);
}
}
@@ -312,7 +1588,7 @@ retry:
/* Search from the inner-most scope to the outer */
for (i = nr_scopes - 1; i >= 0; i--) {
if (reg == DWARF_REG_PC) {
- if (!die_find_variable_by_addr(&scopes[i], pc, addr,
+ if (!die_find_variable_by_addr(&scopes[i], dloc->var_addr,
&var_die, &offset))
continue;
} else {
@@ -323,19 +1599,51 @@ retry:
}
/* Found a variable, see if it's correct */
- ret = check_variable(&var_die, type_die, offset,
- reg != DWARF_REG_PC && !is_fbreg);
- loc->offset = offset;
+ ret = check_variable(dloc, &var_die, type_die, reg, offset, is_fbreg);
+ if (ret == 0) {
+ pr_debug_dtp("found \"%s\" in scope=%d/%d (die: %#lx) ",
+ dwarf_diename(&var_die), i+1, nr_scopes,
+ (long)dwarf_dieoffset(&scopes[i]));
+ if (reg == DWARF_REG_PC) {
+ pr_debug_dtp("addr=%#"PRIx64" type_offset=%#x\n",
+ dloc->var_addr, offset);
+ } else if (reg == DWARF_REG_FB || is_fbreg) {
+ pr_debug_dtp("stack_offset=%#x type_offset=%#x\n",
+ fb_offset, offset);
+ } else {
+ pr_debug_dtp("type_offset=%#x\n", offset);
+ }
+ pr_debug_location(&var_die, pc, reg);
+ pr_debug_type_name(type_die, TSR_KIND_TYPE);
+ } else {
+ pr_debug_dtp("check variable \"%s\" failed (die: %#lx)\n",
+ dwarf_diename(&var_die),
+ (long)dwarf_dieoffset(&var_die));
+ pr_debug_location(&var_die, pc, reg);
+ pr_debug_type_name(type_die, TSR_KIND_TYPE);
+ }
+ dloc->type_offset = offset;
goto out;
}
+ if (reg != DWARF_REG_PC) {
+ ret = find_data_type_block(dloc, reg, &cu_die, scopes,
+ nr_scopes, type_die);
+ if (ret == 0) {
+ ann_data_stat.insn_track++;
+ goto out;
+ }
+ }
+
if (loc->multi_regs && reg == loc->reg1 && loc->reg1 != loc->reg2) {
reg = loc->reg2;
goto retry;
}
- if (ret < 0)
+ if (ret < 0) {
+ pr_debug_dtp("no variable found\n");
ann_data_stat.no_var++;
+ }
out:
free(scopes);
@@ -344,50 +1652,45 @@ out:
/**
* find_data_type - Return a data type at the location
- * @ms: map and symbol at the location
- * @ip: instruction address of the memory access
- * @loc: instruction operand location
- * @addr: data address of the memory access
- * @var_name: global variable name
+ * @dloc: data location
*
* This functions searches the debug information of the binary to get the data
- * type it accesses. The exact location is expressed by (@ip, reg, offset)
- * for pointer variables or (@ip, @addr) for global variables. Note that global
- * variables might update the @loc->offset after finding the start of the variable.
- * If it cannot find a global variable by address, it tried to fine a declaration
- * of the variable using @var_name. In that case, @loc->offset won't be updated.
+ * type it accesses. The exact location is expressed by (ip, reg, offset)
+ * for pointer variables or (ip, addr) for global variables. Note that global
+ * variables might update the @dloc->type_offset after finding the start of the
+ * variable. If it cannot find a global variable by address, it tried to find
+ * a declaration of the variable using var_name. In that case, @dloc->offset
+ * won't be updated.
*
* It return %NULL if not found.
*/
-struct annotated_data_type *find_data_type(struct map_symbol *ms, u64 ip,
- struct annotated_op_loc *loc, u64 addr,
- const char *var_name)
+struct annotated_data_type *find_data_type(struct data_loc_info *dloc)
{
struct annotated_data_type *result = NULL;
- struct dso *dso = map__dso(ms->map);
- struct debuginfo *di;
+ struct dso *dso = map__dso(dloc->ms->map);
Dwarf_Die type_die;
- u64 pc;
- di = debuginfo__new(dso->long_name);
- if (di == NULL) {
- pr_debug("cannot get the debug info\n");
+ dloc->di = debuginfo__new(dso->long_name);
+ if (dloc->di == NULL) {
+ pr_debug_dtp("cannot get the debug info\n");
return NULL;
}
/*
- * IP is a relative instruction address from the start of the map, as
- * it can be randomized/relocated, it needs to translate to PC which is
- * a file address for DWARF processing.
+ * The type offset is the same as instruction offset by default.
+ * But when finding a global variable, the offset won't be valid.
*/
- pc = map__rip_2objdump(ms->map, ip);
- if (find_data_type_die(di, pc, addr, var_name, loc, &type_die) < 0)
+ dloc->type_offset = dloc->op->offset;
+
+ dloc->fbreg = -1;
+
+ if (find_data_type_die(dloc, &type_die) < 0)
goto out;
result = dso__findnew_data_type(dso, &type_die);
out:
- debuginfo__delete(di);
+ debuginfo__delete(dloc->di);
return result;
}
@@ -484,3 +1787,115 @@ int annotated_data_type__update_samples(struct annotated_data_type *adt,
h->addr[offset].period += period;
return 0;
}
+
+static void print_annotated_data_header(struct hist_entry *he, struct evsel *evsel)
+{
+ struct dso *dso = map__dso(he->ms.map);
+ int nr_members = 1;
+ int nr_samples = he->stat.nr_events;
+ int width = 7;
+ const char *val_hdr = "Percent";
+
+ if (evsel__is_group_event(evsel)) {
+ struct hist_entry *pair;
+
+ list_for_each_entry(pair, &he->pairs.head, pairs.node)
+ nr_samples += pair->stat.nr_events;
+ }
+
+ printf("Annotate type: '%s' in %s (%d samples):\n",
+ he->mem_type->self.type_name, dso->name, nr_samples);
+
+ if (evsel__is_group_event(evsel)) {
+ struct evsel *pos;
+ int i = 0;
+
+ for_each_group_evsel(pos, evsel)
+ printf(" event[%d] = %s\n", i++, pos->name);
+
+ nr_members = evsel->core.nr_members;
+ }
+
+ if (symbol_conf.show_total_period) {
+ width = 11;
+ val_hdr = "Period";
+ } else if (symbol_conf.show_nr_samples) {
+ width = 7;
+ val_hdr = "Samples";
+ }
+
+ printf("============================================================================\n");
+ printf("%*s %10s %10s %s\n", (width + 1) * nr_members, val_hdr,
+ "offset", "size", "field");
+}
+
+static void print_annotated_data_value(struct type_hist *h, u64 period, int nr_samples)
+{
+ double percent = h->period ? (100.0 * period / h->period) : 0;
+ const char *color = get_percent_color(percent);
+
+ if (symbol_conf.show_total_period)
+ color_fprintf(stdout, color, " %11" PRIu64, period);
+ else if (symbol_conf.show_nr_samples)
+ color_fprintf(stdout, color, " %7d", nr_samples);
+ else
+ color_fprintf(stdout, color, " %7.2f", percent);
+}
+
+static void print_annotated_data_type(struct annotated_data_type *mem_type,
+ struct annotated_member *member,
+ struct evsel *evsel, int indent)
+{
+ struct annotated_member *child;
+ struct type_hist *h = mem_type->histograms[evsel->core.idx];
+ int i, nr_events = 1, samples = 0;
+ u64 period = 0;
+ int width = symbol_conf.show_total_period ? 11 : 7;
+
+ for (i = 0; i < member->size; i++) {
+ samples += h->addr[member->offset + i].nr_samples;
+ period += h->addr[member->offset + i].period;
+ }
+ print_annotated_data_value(h, period, samples);
+
+ if (evsel__is_group_event(evsel)) {
+ struct evsel *pos;
+
+ for_each_group_member(pos, evsel) {
+ h = mem_type->histograms[pos->core.idx];
+
+ samples = 0;
+ period = 0;
+ for (i = 0; i < member->size; i++) {
+ samples += h->addr[member->offset + i].nr_samples;
+ period += h->addr[member->offset + i].period;
+ }
+ print_annotated_data_value(h, period, samples);
+ }
+ nr_events = evsel->core.nr_members;
+ }
+
+ printf(" %10d %10d %*s%s\t%s",
+ member->offset, member->size, indent, "", member->type_name,
+ member->var_name ?: "");
+
+ if (!list_empty(&member->children))
+ printf(" {\n");
+
+ list_for_each_entry(child, &member->children, node)
+ print_annotated_data_type(mem_type, child, evsel, indent + 4);
+
+ if (!list_empty(&member->children))
+ printf("%*s}", (width + 1) * nr_events + 24 + indent, "");
+ printf(";\n");
+}
+
+int hist_entry__annotate_data_tty(struct hist_entry *he, struct evsel *evsel)
+{
+ print_annotated_data_header(he, evsel);
+ print_annotated_data_type(he->mem_type, &he->mem_type->self, evsel, 0);
+ printf("\n");
+
+ /* move to the next entry */
+ return '>';
+}
diff --git a/tools/perf/util/annotate-data.h b/tools/perf/util/annotate-data.h
index 1b0db8e8c40e6b..0a57d9f5ee7812 100644
--- a/tools/perf/util/annotate-data.h
+++ b/tools/perf/util/annotate-data.h
@@ -8,8 +8,12 @@
#include <linux/types.h>
struct annotated_op_loc;
+struct debuginfo;
struct evsel;
+struct hist_browser_timer;
+struct hist_entry;
struct map_symbol;
+struct thread;
/**
* struct annotated_member - Type of member field
@@ -71,6 +75,40 @@ struct annotated_data_type {
extern struct annotated_data_type unknown_type;
extern struct annotated_data_type stackop_type;
+extern struct annotated_data_type canary_type;
+
+/**
+ * struct data_loc_info - Data location information
+ * @arch: CPU architecture info
+ * @thread: Thread info
+ * @ms: Map and Symbol info
+ * @ip: Instruction address
+ * @var_addr: Data address (for global variables)
+ * @cpumode: CPU execution mode
+ * @op: Instruction operand location (regs and offset)
+ * @di: Debug info
+ * @fbreg: Frame base register
+ * @fb_cfa: Whether the frame needs to check CFA
+ * @type_offset: Final offset in the type
+ */
+struct data_loc_info {
+ /* These are input field, should be filled by caller */
+ struct arch *arch;
+ struct thread *thread;
+ struct map_symbol *ms;
+ u64 ip;
+ u64 var_addr;
+ u8 cpumode;
+ struct annotated_op_loc *op;
+
+ /* These are used internally */
+ struct debuginfo *di;
+ int fbreg;
+ bool fb_cfa;
+
+ /* This is for the result */
+ int type_offset;
+};
/**
* struct annotated_data_stat - Debug statistics
@@ -100,15 +138,14 @@ struct annotated_data_stat {
int no_typeinfo;
int invalid_size;
int bad_offset;
+ int insn_track;
};
extern struct annotated_data_stat ann_data_stat;
#ifdef HAVE_DWARF_SUPPORT
/* Returns data type at the location (ip, reg, offset) */
-struct annotated_data_type *find_data_type(struct map_symbol *ms, u64 ip,
- struct annotated_op_loc *loc, u64 addr,
- const char *var_name);
+struct annotated_data_type *find_data_type(struct data_loc_info *dloc);
/* Update type access histogram at the given offset */
int annotated_data_type__update_samples(struct annotated_data_type *adt,
@@ -118,12 +155,15 @@ int annotated_data_type__update_samples(struct annotated_data_type *adt,
/* Release all data type information in the tree */
void annotated_data_type__tree_delete(struct rb_root *root);
+/* Release all global variable information in the tree */
+void global_var_type__tree_delete(struct rb_root *root);
+
+int hist_entry__annotate_data_tty(struct hist_entry *he, struct evsel *evsel);
+
#else /* HAVE_DWARF_SUPPORT */
static inline struct annotated_data_type *
-find_data_type(struct map_symbol *ms __maybe_unused, u64 ip __maybe_unused,
- struct annotated_op_loc *loc __maybe_unused,
- u64 addr __maybe_unused, const char *var_name __maybe_unused)
+find_data_type(struct data_loc_info *dloc __maybe_unused)
{
return NULL;
}
@@ -142,6 +182,28 @@ static inline void annotated_data_type__tree_delete(struct rb_root *root __maybe
{
}
+static inline void global_var_type__tree_delete(struct rb_root *root __maybe_unused)
+{
+}
+
+static inline int hist_entry__annotate_data_tty(struct hist_entry *he __maybe_unused,
+ struct evsel *evsel __maybe_unused)
+{
+ return -1;
+}
+
#endif /* HAVE_DWARF_SUPPORT */
+#ifdef HAVE_SLANG_SUPPORT
+int hist_entry__annotate_data_tui(struct hist_entry *he, struct evsel *evsel,
+ struct hist_browser_timer *hbt);
+#else
+static inline int hist_entry__annotate_data_tui(struct hist_entry *he __maybe_unused,
+ struct evsel *evsel __maybe_unused,
+ struct hist_browser_timer *hbt __maybe_unused)
+{
+ return -1;
+}
+#endif /* HAVE_SLANG_SUPPORT */
+
#endif /* _PERF_ANNOTATE_DATA_H */
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index ac002d907d8180..f5b6b5e5e7571e 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -16,6 +16,7 @@
#include "build-id.h"
#include "color.h"
#include "config.h"
+#include "disasm.h"
#include "dso.h"
#include "env.h"
#include "map.h"
@@ -64,47 +65,6 @@
/* global annotation options */
struct annotation_options annotate_opts;
-static regex_t file_lineno;
-
-static struct ins_ops *ins__find(struct arch *arch, const char *name);
-static void ins__sort(struct arch *arch);
-static int disasm_line__parse(char *line, const char **namep, char **rawp);
-static int call__scnprintf(struct ins *ins, char *bf, size_t size,
- struct ins_operands *ops, int max_ins_name);
-static int jump__scnprintf(struct ins *ins, char *bf, size_t size,
- struct ins_operands *ops, int max_ins_name);
-
-struct arch {
- const char *name;
- struct ins *instructions;
- size_t nr_instructions;
- size_t nr_instructions_allocated;
- struct ins_ops *(*associate_instruction_ops)(struct arch *arch, const char *name);
- bool sorted_instructions;
- bool initialized;
- const char *insn_suffix;
- void *priv;
- unsigned int model;
- unsigned int family;
- int (*init)(struct arch *arch, char *cpuid);
- bool (*ins_is_fused)(struct arch *arch, const char *ins1,
- const char *ins2);
- struct {
- char comment_char;
- char skip_functions_char;
- char register_char;
- char memory_ref_char;
- } objdump;
-};
-
-static struct ins_ops call_ops;
-static struct ins_ops dec_ops;
-static struct ins_ops jump_ops;
-static struct ins_ops mov_ops;
-static struct ins_ops nop_ops;
-static struct ins_ops lock_ops;
-static struct ins_ops ret_ops;
-
/* Data type collection debug statistics */
struct annotated_data_stat ann_data_stat;
LIST_HEAD(ann_insn_stat);
@@ -117,753 +77,13 @@ struct annotated_data_type stackop_type = {
},
};
-static int arch__grow_instructions(struct arch *arch)
-{
- struct ins *new_instructions;
- size_t new_nr_allocated;
-
- if (arch->nr_instructions_allocated == 0 && arch->instructions)
- goto grow_from_non_allocated_table;
-
- new_nr_allocated = arch->nr_instructions_allocated + 128;
- new_instructions = realloc(arch->instructions, new_nr_allocated * sizeof(struct ins));
- if (new_instructions == NULL)
- return -1;
-
-out_update_instructions:
- arch->instructions = new_instructions;
- arch->nr_instructions_allocated = new_nr_allocated;
- return 0;
-
-grow_from_non_allocated_table:
- new_nr_allocated = arch->nr_instructions + 128;
- new_instructions = calloc(new_nr_allocated, sizeof(struct ins));
- if (new_instructions == NULL)
- return -1;
-
- memcpy(new_instructions, arch->instructions, arch->nr_instructions);
- goto out_update_instructions;
-}
-
-static int arch__associate_ins_ops(struct arch* arch, const char *name, struct ins_ops *ops)
-{
- struct ins *ins;
-
- if (arch->nr_instructions == arch->nr_instructions_allocated &&
- arch__grow_instructions(arch))
- return -1;
-
- ins = &arch->instructions[arch->nr_instructions];
- ins->name = strdup(name);
- if (!ins->name)
- return -1;
-
- ins->ops = ops;
- arch->nr_instructions++;
-
- ins__sort(arch);
- return 0;
-}
-
-#include "arch/arc/annotate/instructions.c"
-#include "arch/arm/annotate/instructions.c"
-#include "arch/arm64/annotate/instructions.c"
-#include "arch/csky/annotate/instructions.c"
-#include "arch/loongarch/annotate/instructions.c"
-#include "arch/mips/annotate/instructions.c"
-#include "arch/x86/annotate/instructions.c"
-#include "arch/powerpc/annotate/instructions.c"
-#include "arch/riscv64/annotate/instructions.c"
-#include "arch/s390/annotate/instructions.c"
-#include "arch/sparc/annotate/instructions.c"
-
-static struct arch architectures[] = {
- {
- .name = "arc",
- .init = arc__annotate_init,
- },
- {
- .name = "arm",
- .init = arm__annotate_init,
- },
- {
- .name = "arm64",
- .init = arm64__annotate_init,
- },
- {
- .name = "csky",
- .init = csky__annotate_init,
- },
- {
- .name = "mips",
- .init = mips__annotate_init,
- .objdump = {
- .comment_char = '#',
- },
- },
- {
- .name = "x86",
- .init = x86__annotate_init,
- .instructions = x86__instructions,
- .nr_instructions = ARRAY_SIZE(x86__instructions),
- .insn_suffix = "bwlq",
- .objdump = {
- .comment_char = '#',
- .register_char = '%',
- .memory_ref_char = '(',
- },
- },
- {
- .name = "powerpc",
- .init = powerpc__annotate_init,
- },
- {
- .name = "riscv64",
- .init = riscv64__annotate_init,
- },
- {
- .name = "s390",
- .init = s390__annotate_init,
- .objdump = {
- .comment_char = '#',
- },
- },
- {
- .name = "sparc",
- .init = sparc__annotate_init,
- .objdump = {
- .comment_char = '#',
- },
- },
- {
- .name = "loongarch",
- .init = loongarch__annotate_init,
- .objdump = {
- .comment_char = '#',
- },
+struct annotated_data_type canary_type = {
+ .self = {
+ .type_name = (char *)"(stack canary)",
+ .children = LIST_HEAD_INIT(canary_type.self.children),
},
};
-static void ins__delete(struct ins_operands *ops)
-{
- if (ops == NULL)
- return;
- zfree(&ops->source.raw);
- zfree(&ops->source.name);
- zfree(&ops->target.raw);
- zfree(&ops->target.name);
-}
-
-static int ins__raw_scnprintf(struct ins *ins, char *bf, size_t size,
- struct ins_operands *ops, int max_ins_name)
-{
- return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, ops->raw);
-}
-
-int ins__scnprintf(struct ins *ins, char *bf, size_t size,
- struct ins_operands *ops, int max_ins_name)
-{
- if (ins->ops->scnprintf)
- return ins->ops->scnprintf(ins, bf, size, ops, max_ins_name);
-
- return ins__raw_scnprintf(ins, bf, size, ops, max_ins_name);
-}
-
-bool ins__is_fused(struct arch *arch, const char *ins1, const char *ins2)
-{
- if (!arch || !arch->ins_is_fused)
- return false;
-
- return arch->ins_is_fused(arch, ins1, ins2);
-}
-
-static int call__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms)
-{
- char *endptr, *tok, *name;
- struct map *map = ms->map;
- struct addr_map_symbol target = {
- .ms = { .map = map, },
- };
-
- ops->target.addr = strtoull(ops->raw, &endptr, 16);
-
- name = strchr(endptr, '<');
- if (name == NULL)
- goto indirect_call;
-
- name++;
-
- if (arch->objdump.skip_functions_char &&
- strchr(name, arch->objdump.skip_functions_char))
- return -1;
-
- tok = strchr(name, '>');
- if (tok == NULL)
- return -1;
-
- *tok = '\0';
- ops->target.name = strdup(name);
- *tok = '>';
-
- if (ops->target.name == NULL)
- return -1;
-find_target:
- target.addr = map__objdump_2mem(map, ops->target.addr);
-
- if (maps__find_ams(ms->maps, &target) == 0 &&
- map__rip_2objdump(target.ms.map, map__map_ip(target.ms.map, target.addr)) == ops->target.addr)
- ops->target.sym = target.ms.sym;
-
- return 0;
-
-indirect_call:
- tok = strchr(endptr, '*');
- if (tok != NULL) {
- endptr++;
-
- /* Indirect call can use a non-rip register and offset: callq *0x8(%rbx).
- * Do not parse such instruction. */
- if (strstr(endptr, "(%r") == NULL)
- ops->target.addr = strtoull(endptr, NULL, 16);
- }
- goto find_target;
-}
-
-static int call__scnprintf(struct ins *ins, char *bf, size_t size,
- struct ins_operands *ops, int max_ins_name)
-{
- if (ops->target.sym)
- return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, ops->target.sym->name);
-
- if (ops->target.addr == 0)
- return ins__raw_scnprintf(ins, bf, size, ops, max_ins_name);
-
- if (ops->target.name)
- return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, ops->target.name);
-
- return scnprintf(bf, size, "%-*s *%" PRIx64, max_ins_name, ins->name, ops->target.addr);
-}
-
-static struct ins_ops call_ops = {
- .parse = call__parse,
- .scnprintf = call__scnprintf,
-};
-
-bool ins__is_call(const struct ins *ins)
-{
- return ins->ops == &call_ops || ins->ops == &s390_call_ops || ins->ops == &loongarch_call_ops;
-}
-
-/*
- * Prevents from matching commas in the comment section, e.g.:
- * ffff200008446e70: b.cs ffff2000084470f4 <generic_exec_single+0x314> // b.hs, b.nlast
- *
- * and skip comma as part of function arguments, e.g.:
- * 1d8b4ac <linemap_lookup(line_maps const*, unsigned int)+0xcc>
- */
-static inline const char *validate_comma(const char *c, struct ins_operands *ops)
-{
- if (ops->jump.raw_comment && c > ops->jump.raw_comment)
- return NULL;
-
- if (ops->jump.raw_func_start && c > ops->jump.raw_func_start)
- return NULL;
-
- return c;
-}
-
-static int jump__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms)
-{
- struct map *map = ms->map;
- struct symbol *sym = ms->sym;
- struct addr_map_symbol target = {
- .ms = { .map = map, },
- };
- const char *c = strchr(ops->raw, ',');
- u64 start, end;
-
- ops->jump.raw_comment = strchr(ops->raw, arch->objdump.comment_char);
- ops->jump.raw_func_start = strchr(ops->raw, '<');
-
- c = validate_comma(c, ops);
-
- /*
- * Examples of lines to parse for the _cpp_lex_token@@Base
- * function:
- *
- * 1159e6c: jne 115aa32 <_cpp_lex_token@@Base+0xf92>
- * 1159e8b: jne c469be <cpp_named_operator2name@@Base+0xa72>
- *
- * The first is a jump to an offset inside the same function,
- * the second is to another function, i.e. that 0xa72 is an
- * offset in the cpp_named_operator2name@@base function.
- */
- /*
- * skip over possible up to 2 operands to get to address, e.g.:
- * tbnz w0, #26, ffff0000083cd190 <security_file_permission+0xd0>
- */
- if (c++ != NULL) {
- ops->target.addr = strtoull(c, NULL, 16);
- if (!ops->target.addr) {
- c = strchr(c, ',');
- c = validate_comma(c, ops);
- if (c++ != NULL)
- ops->target.addr = strtoull(c, NULL, 16);
- }
- } else {
- ops->target.addr = strtoull(ops->raw, NULL, 16);
- }
-
- target.addr = map__objdump_2mem(map, ops->target.addr);
- start = map__unmap_ip(map, sym->start);
- end = map__unmap_ip(map, sym->end);
-
- ops->target.outside = target.addr < start || target.addr > end;
-
- /*
- * FIXME: things like this in _cpp_lex_token (gcc's cc1 program):
-
- cpp_named_operator2name@@Base+0xa72
-
- * Point to a place that is after the cpp_named_operator2name
- * boundaries, i.e. in the ELF symbol table for cc1
- * cpp_named_operator2name is marked as being 32-bytes long, but it in
- * fact is much larger than that, so we seem to need a symbols__find()
- * routine that looks for >= current->start and < next_symbol->start,
- * possibly just for C++ objects?
- *
- * For now lets just make some progress by marking jumps to outside the
- * current function as call like.
- *
- * Actual navigation will come next, with further understanding of how
- * the symbol searching and disassembly should be done.
- */
- if (maps__find_ams(ms->maps, &target) == 0 &&
- map__rip_2objdump(target.ms.map, map__map_ip(target.ms.map, target.addr)) == ops->target.addr)
- ops->target.sym = target.ms.sym;
-
- if (!ops->target.outside) {
- ops->target.offset = target.addr - start;
- ops->target.offset_avail = true;
- } else {
- ops->target.offset_avail = false;
- }
-
- return 0;
-}
-
-static int jump__scnprintf(struct ins *ins, char *bf, size_t size,
- struct ins_operands *ops, int max_ins_name)
-{
- const char *c;
-
- if (!ops->target.addr || ops->target.offset < 0)
- return ins__raw_scnprintf(ins, bf, size, ops, max_ins_name);
-
- if (ops->target.outside && ops->target.sym != NULL)
- return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, ops->target.sym->name);
-
- c = strchr(ops->raw, ',');
- c = validate_comma(c, ops);
-
- if (c != NULL) {
- const char *c2 = strchr(c + 1, ',');
-
- c2 = validate_comma(c2, ops);
- /* check for 3-op insn */
- if (c2 != NULL)
- c = c2;
- c++;
-
- /* mirror arch objdump's space-after-comma style */
- if (*c == ' ')
- c++;
- }
-
- return scnprintf(bf, size, "%-*s %.*s%" PRIx64, max_ins_name,
- ins->name, c ? c - ops->raw : 0, ops->raw,
- ops->target.offset);
-}
-
-static void jump__delete(struct ins_operands *ops __maybe_unused)
-{
- /*
- * The ops->jump.raw_comment and ops->jump.raw_func_start belong to the
- * raw string, don't free them.
- */
-}
-
-static struct ins_ops jump_ops = {
- .free = jump__delete,
- .parse = jump__parse,
- .scnprintf = jump__scnprintf,
-};
-
-bool ins__is_jump(const struct ins *ins)
-{
- return ins->ops == &jump_ops || ins->ops == &loongarch_jump_ops;
-}
-
-static int comment__symbol(char *raw, char *comment, u64 *addrp, char **namep)
-{
- char *endptr, *name, *t;
-
- if (strstr(raw, "(%rip)") == NULL)
- return 0;
-
- *addrp = strtoull(comment, &endptr, 16);
- if (endptr == comment)
- return 0;
- name = strchr(endptr, '<');
- if (name == NULL)
- return -1;
-
- name++;
-
- t = strchr(name, '>');
- if (t == NULL)
- return 0;
-
- *t = '\0';
- *namep = strdup(name);
- *t = '>';
-
- return 0;
-}
-
-static int lock__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms)
-{
- ops->locked.ops = zalloc(sizeof(*ops->locked.ops));
- if (ops->locked.ops == NULL)
- return 0;
-
- if (disasm_line__parse(ops->raw, &ops->locked.ins.name, &ops->locked.ops->raw) < 0)
- goto out_free_ops;
-
- ops->locked.ins.ops = ins__find(arch, ops->locked.ins.name);
-
- if (ops->locked.ins.ops == NULL)
- goto out_free_ops;
-
- if (ops->locked.ins.ops->parse &&
- ops->locked.ins.ops->parse(arch, ops->locked.ops, ms) < 0)
- goto out_free_ops;
-
- return 0;
-
-out_free_ops:
- zfree(&ops->locked.ops);
- return 0;
-}
-
-static int lock__scnprintf(struct ins *ins, char *bf, size_t size,
- struct ins_operands *ops, int max_ins_name)
-{
- int printed;
-
- if (ops->locked.ins.ops == NULL)
- return ins__raw_scnprintf(ins, bf, size, ops, max_ins_name);
-
- printed = scnprintf(bf, size, "%-*s ", max_ins_name, ins->name);
- return printed + ins__scnprintf(&ops->locked.ins, bf + printed,
- size - printed, ops->locked.ops, max_ins_name);
-}
-
-static void lock__delete(struct ins_operands *ops)
-{
- struct ins *ins = &ops->locked.ins;
-
- if (ins->ops && ins->ops->free)
- ins->ops->free(ops->locked.ops);
- else
- ins__delete(ops->locked.ops);
-
- zfree(&ops->locked.ops);
- zfree(&ops->target.raw);
- zfree(&ops->target.name);
-}
-
-static struct ins_ops lock_ops = {
- .free = lock__delete,
- .parse = lock__parse,
- .scnprintf = lock__scnprintf,
-};
-
-/*
- * Check if the operand has more than one registers like x86 SIB addressing:
- * 0x1234(%rax, %rbx, 8)
- *
- * But it doesn't care segment selectors like %gs:0x5678(%rcx), so just check
- * the input string after 'memory_ref_char' if exists.
- */
-static bool check_multi_regs(struct arch *arch, const char *op)
-{
- int count = 0;
-
- if (arch->objdump.register_char == 0)
- return false;
-
- if (arch->objdump.memory_ref_char) {
- op = strchr(op, arch->objdump.memory_ref_char);
- if (op == NULL)
- return false;
- }
-
- while ((op = strchr(op, arch->objdump.register_char)) != NULL) {
- count++;
- op++;
- }
-
- return count > 1;
-}
-
-static int mov__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms __maybe_unused)
-{
- char *s = strchr(ops->raw, ','), *target, *comment, prev;
-
- if (s == NULL)
- return -1;
-
- *s = '\0';
-
- /*
- * x86 SIB addressing has something like 0x8(%rax, %rcx, 1)
- * then it needs to have the closing parenthesis.
- */
- if (strchr(ops->raw, '(')) {
- *s = ',';
- s = strchr(ops->raw, ')');
- if (s == NULL || s[1] != ',')
- return -1;
- *++s = '\0';
- }
-
- ops->source.raw = strdup(ops->raw);
- *s = ',';
-
- if (ops->source.raw == NULL)
- return -1;
-
- ops->source.multi_regs = check_multi_regs(arch, ops->source.raw);
-
- target = skip_spaces(++s);
- comment = strchr(s, arch->objdump.comment_char);
-
- if (comment != NULL)
- s = comment - 1;
- else
- s = strchr(s, '\0') - 1;
-
- while (s > target && isspace(s[0]))
- --s;
- s++;
- prev = *s;
- *s = '\0';
-
- ops->target.raw = strdup(target);
- *s = prev;
-
- if (ops->target.raw == NULL)
- goto out_free_source;
-
- ops->target.multi_regs = check_multi_regs(arch, ops->target.raw);
-
- if (comment == NULL)
- return 0;
-
- comment = skip_spaces(comment);
- comment__symbol(ops->source.raw, comment + 1, &ops->source.addr, &ops->source.name);
- comment__symbol(ops->target.raw, comment + 1, &ops->target.addr, &ops->target.name);
-
- return 0;
-
-out_free_source:
- zfree(&ops->source.raw);
- return -1;
-}
-
-static int mov__scnprintf(struct ins *ins, char *bf, size_t size,
- struct ins_operands *ops, int max_ins_name)
-{
- return scnprintf(bf, size, "%-*s %s,%s", max_ins_name, ins->name,
- ops->source.name ?: ops->source.raw,
- ops->target.name ?: ops->target.raw);
-}
-
-static struct ins_ops mov_ops = {
- .parse = mov__parse,
- .scnprintf = mov__scnprintf,
-};
-
-static int dec__parse(struct arch *arch __maybe_unused, struct ins_operands *ops, struct map_symbol *ms __maybe_unused)
-{
- char *target, *comment, *s, prev;
-
- target = s = ops->raw;
-
- while (s[0] != '\0' && !isspace(s[0]))
- ++s;
- prev = *s;
- *s = '\0';
-
- ops->target.raw = strdup(target);
- *s = prev;
-
- if (ops->target.raw == NULL)
- return -1;
-
- comment = strchr(s, arch->objdump.comment_char);
- if (comment == NULL)
- return 0;
-
- comment = skip_spaces(comment);
- comment__symbol(ops->target.raw, comment + 1, &ops->target.addr, &ops->target.name);
-
- return 0;
-}
-
-static int dec__scnprintf(struct ins *ins, char *bf, size_t size,
- struct ins_operands *ops, int max_ins_name)
-{
- return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name,
- ops->target.name ?: ops->target.raw);
-}
-
-static struct ins_ops dec_ops = {
- .parse = dec__parse,
- .scnprintf = dec__scnprintf,
-};
-
-static int nop__scnprintf(struct ins *ins __maybe_unused, char *bf, size_t size,
- struct ins_operands *ops __maybe_unused, int max_ins_name)
-{
- return scnprintf(bf, size, "%-*s", max_ins_name, "nop");
-}
-
-static struct ins_ops nop_ops = {
- .scnprintf = nop__scnprintf,
-};
-
-static struct ins_ops ret_ops = {
- .scnprintf = ins__raw_scnprintf,
-};
-
-bool ins__is_ret(const struct ins *ins)
-{
- return ins->ops == &ret_ops;
-}
-
-bool ins__is_lock(const struct ins *ins)
-{
- return ins->ops == &lock_ops;
-}
-
-static int ins__key_cmp(const void *name, const void *insp)
-{
- const struct ins *ins = insp;
-
- return strcmp(name, ins->name);
-}
-
-static int ins__cmp(const void *a, const void *b)
-{
- const struct ins *ia = a;
- const struct ins *ib = b;
-
- return strcmp(ia->name, ib->name);
-}
-
-static void ins__sort(struct arch *arch)
-{
- const int nmemb = arch->nr_instructions;
-
- qsort(arch->instructions, nmemb, sizeof(struct ins), ins__cmp);
-}
-
-static struct ins_ops *__ins__find(struct arch *arch, const char *name)
-{
- struct ins *ins;
- const int nmemb = arch->nr_instructions;
-
- if (!arch->sorted_instructions) {
- ins__sort(arch);
- arch->sorted_instructions = true;
- }
-
- ins = bsearch(name, arch->instructions, nmemb, sizeof(struct ins), ins__key_cmp);
- if (ins)
- return ins->ops;
-
- if (arch->insn_suffix) {
- char tmp[32];
- char suffix;
- size_t len = strlen(name);
-
- if (len == 0 || len >= sizeof(tmp))
- return NULL;
-
- suffix = name[len - 1];
- if (strchr(arch->insn_suffix, suffix) == NULL)
- return NULL;
-
- strcpy(tmp, name);
- tmp[len - 1] = '\0'; /* remove the suffix and check again */
-
- ins = bsearch(tmp, arch->instructions, nmemb, sizeof(struct ins), ins__key_cmp);
- }
- return ins ? ins->ops : NULL;
-}
-
-static struct ins_ops *ins__find(struct arch *arch, const char *name)
-{
- struct ins_ops *ops = __ins__find(arch, name);
-
- if (!ops && arch->associate_instruction_ops)
- ops = arch->associate_instruction_ops(arch, name);
-
- return ops;
-}
-
-static int arch__key_cmp(const void *name, const void *archp)
-{
- const struct arch *arch = archp;
-
- return strcmp(name, arch->name);
-}
-
-static int arch__cmp(const void *a, const void *b)
-{
- const struct arch *aa = a;
- const struct arch *ab = b;
-
- return strcmp(aa->name, ab->name);
-}
-
-static void arch__sort(void)
-{
- const int nmemb = ARRAY_SIZE(architectures);
-
- qsort(architectures, nmemb, sizeof(struct arch), arch__cmp);
-}
-
-static struct arch *arch__find(const char *name)
-{
- const int nmemb = ARRAY_SIZE(architectures);
- static bool sorted;
-
- if (!sorted) {
- arch__sort();
- sorted = true;
- }
-
- return bsearch(name, architectures, nmemb, sizeof(struct arch), arch__key_cmp);
-}
-
-bool arch__is(struct arch *arch, const char *name)
-{
- return !strcmp(arch->name, name);
-}
-
/* symbol histogram: key = offset << 16 | evsel->core.idx */
static size_t sym_hist_hash(long key, void *ctx __maybe_unused)
{
@@ -1149,14 +369,33 @@ int addr_map_symbol__account_cycles(struct addr_map_symbol *ams,
return err;
}
+struct annotation_line *annotated_source__get_line(struct annotated_source *src,
+ s64 offset)
+{
+ struct annotation_line *al;
+
+ list_for_each_entry(al, &src->source, node) {
+ if (al->offset == offset)
+ return al;
+ }
+ return NULL;
+}
+
static unsigned annotation__count_insn(struct annotation *notes, u64 start, u64 end)
{
+ struct annotation_line *al;
unsigned n_insn = 0;
- u64 offset;
- for (offset = start; offset <= end; offset++) {
- if (notes->src->offsets[offset])
- n_insn++;
+ al = annotated_source__get_line(notes->src, start);
+ if (al == NULL)
+ return 0;
+
+ list_for_each_entry_from(al, &notes->src->source, node) {
+ if (al->offset == -1)
+ continue;
+ if ((u64)al->offset > end)
+ break;
+ n_insn++;
}
return n_insn;
}
@@ -1173,10 +412,10 @@ static void annotation__count_and_fill(struct annotation *notes, u64 start, u64
{
unsigned n_insn;
unsigned int cover_insn = 0;
- u64 offset;
n_insn = annotation__count_insn(notes, start, end);
if (n_insn && ch->num && ch->cycles) {
+ struct annotation_line *al;
struct annotated_branch *branch;
float ipc = n_insn / ((double)ch->cycles / (double)ch->num);
@@ -1184,10 +423,16 @@ static void annotation__count_and_fill(struct annotation *notes, u64 start, u64
if (ch->reset >= 0x7fff)
return;
- for (offset = start; offset <= end; offset++) {
- struct annotation_line *al = notes->src->offsets[offset];
+ al = annotated_source__get_line(notes->src, start);
+ if (al == NULL)
+ return;
- if (al && al->cycles && al->cycles->ipc == 0.0) {
+ list_for_each_entry_from(al, &notes->src->source, node) {
+ if (al->offset == -1)
+ continue;
+ if ((u64)al->offset > end)
+ break;
+ if (al->cycles && al->cycles->ipc == 0.0) {
al->cycles->ipc = ipc;
cover_insn++;
}
@@ -1223,7 +468,7 @@ static int annotation__compute_ipc(struct annotation *notes, size_t size)
if (ch && ch->cycles) {
struct annotation_line *al;
- al = notes->src->offsets[offset];
+ al = annotated_source__get_line(notes->src, offset);
if (al && al->cycles == NULL) {
al->cycles = zalloc(sizeof(*al->cycles));
if (al->cycles == NULL) {
@@ -1246,7 +491,9 @@ static int annotation__compute_ipc(struct annotation *notes, size_t size)
struct cyc_hist *ch = &notes->branch->cycles_hist[offset];
if (ch && ch->cycles) {
- struct annotation_line *al = notes->src->offsets[offset];
+ struct annotation_line *al;
+
+ al = annotated_source__get_line(notes->src, offset);
if (al)
zfree(&al->cycles);
}
@@ -1269,142 +516,6 @@ int hist_entry__inc_addr_samples(struct hist_entry *he, struct perf_sample *samp
return symbol__inc_addr_samples(&he->ms, evsel, ip, sample);
}
-static void disasm_line__init_ins(struct disasm_line *dl, struct arch *arch, struct map_symbol *ms)
-{
- dl->ins.ops = ins__find(arch, dl->ins.name);
-
- if (!dl->ins.ops)
- return;
-
- if (dl->ins.ops->parse && dl->ins.ops->parse(arch, &dl->ops, ms) < 0)
- dl->ins.ops = NULL;
-}
-
-static int disasm_line__parse(char *line, const char **namep, char **rawp)
-{
- char tmp, *name = skip_spaces(line);
-
- if (name[0] == '\0')
- return -1;
-
- *rawp = name + 1;
-
- while ((*rawp)[0] != '\0' && !isspace((*rawp)[0]))
- ++*rawp;
-
- tmp = (*rawp)[0];
- (*rawp)[0] = '\0';
- *namep = strdup(name);
-
- if (*namep == NULL)
- goto out;
-
- (*rawp)[0] = tmp;
- *rawp = strim(*rawp);
-
- return 0;
-
-out:
- return -1;
-}
-
-struct annotate_args {
- struct arch *arch;
- struct map_symbol ms;
- struct evsel *evsel;
- struct annotation_options *options;
- s64 offset;
- char *line;
- int line_nr;
- char *fileloc;
-};
-
-static void annotation_line__init(struct annotation_line *al,
- struct annotate_args *args,
- int nr)
-{
- al->offset = args->offset;
- al->line = strdup(args->line);
- al->line_nr = args->line_nr;
- al->fileloc = args->fileloc;
- al->data_nr = nr;
-}
-
-static void annotation_line__exit(struct annotation_line *al)
-{
- zfree_srcline(&al->path);
- zfree(&al->line);
- zfree(&al->cycles);
-}
-
-static size_t disasm_line_size(int nr)
-{
- struct annotation_line *al;
-
- return (sizeof(struct disasm_line) + (sizeof(al->data[0]) * nr));
-}
-
-/*
- * Allocating the disasm annotation line data with
- * following structure:
- *
- * -------------------------------------------
- * struct disasm_line | struct annotation_line
- * -------------------------------------------
- *
- * We have 'struct annotation_line' member as last member
- * of 'struct disasm_line' to have an easy access.
- */
-static struct disasm_line *disasm_line__new(struct annotate_args *args)
-{
- struct disasm_line *dl = NULL;
- int nr = 1;
-
- if (evsel__is_group_event(args->evsel))
- nr = args->evsel->core.nr_members;
-
- dl = zalloc(disasm_line_size(nr));
- if (!dl)
- return NULL;
-
- annotation_line__init(&dl->al, args, nr);
- if (dl->al.line == NULL)
- goto out_delete;
-
- if (args->offset != -1) {
- if (disasm_line__parse(dl->al.line, &dl->ins.name, &dl->ops.raw) < 0)
- goto out_free_line;
-
- disasm_line__init_ins(dl, args->arch, &args->ms);
- }
-
- return dl;
-
-out_free_line:
- zfree(&dl->al.line);
-out_delete:
- free(dl);
- return NULL;
-}
-
-void disasm_line__free(struct disasm_line *dl)
-{
- if (dl->ins.ops && dl->ins.ops->free)
- dl->ins.ops->free(&dl->ops);
- else
- ins__delete(&dl->ops);
- zfree(&dl->ins.name);
- annotation_line__exit(&dl->al);
- free(dl);
-}
-
-int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw, int max_ins_name)
-{
- if (raw || !dl->ins.ops)
- return scnprintf(bf, size, "%-*s %s", max_ins_name, dl->ins.name, dl->ops.raw);
-
- return ins__scnprintf(&dl->ins, bf, size, &dl->ops, max_ins_name);
-}
void annotation__exit(struct annotation *notes)
{
@@ -1464,8 +575,7 @@ bool annotation__trylock(struct annotation *notes)
return mutex_trylock(mutex);
}
-
-static void annotation_line__add(struct annotation_line *al, struct list_head *head)
+void annotation_line__add(struct annotation_line *al, struct list_head *head)
{
list_add_tail(&al->node, head);
}
@@ -1675,673 +785,6 @@ annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start
return 0;
}
-/*
- * symbol__parse_objdump_line() parses objdump output (with -d --no-show-raw)
- * which looks like following
- *
- * 0000000000415500 <_init>:
- * 415500: sub $0x8,%rsp
- * 415504: mov 0x2f5ad5(%rip),%rax # 70afe0 <_DYNAMIC+0x2f8>
- * 41550b: test %rax,%rax
- * 41550e: je 415515 <_init+0x15>
- * 415510: callq 416e70 <__gmon_start__@plt>
- * 415515: add $0x8,%rsp
- * 415519: retq
- *
- * it will be parsed and saved into struct disasm_line as
- * <offset> <name> <ops.raw>
- *
- * The offset will be a relative offset from the start of the symbol and -1
- * means that it's not a disassembly line so should be treated differently.
- * The ops.raw part will be parsed further according to type of the instruction.
- */
-static int symbol__parse_objdump_line(struct symbol *sym,
- struct annotate_args *args,
- char *parsed_line, int *line_nr, char **fileloc)
-{
- struct map *map = args->ms.map;
- struct annotation *notes = symbol__annotation(sym);
- struct disasm_line *dl;
- char *tmp;
- s64 line_ip, offset = -1;
- regmatch_t match[2];
-
- /* /filename:linenr ? Save line number and ignore. */
- if (regexec(&file_lineno, parsed_line, 2, match, 0) == 0) {
- *line_nr = atoi(parsed_line + match[1].rm_so);
- free(*fileloc);
- *fileloc = strdup(parsed_line);
- return 0;
- }
-
- /* Process hex address followed by ':'. */
- line_ip = strtoull(parsed_line, &tmp, 16);
- if (parsed_line != tmp && tmp[0] == ':' && tmp[1] != '\0') {
- u64 start = map__rip_2objdump(map, sym->start),
- end = map__rip_2objdump(map, sym->end);
-
- offset = line_ip - start;
- if ((u64)line_ip < start || (u64)line_ip >= end)
- offset = -1;
- else
- parsed_line = tmp + 1;
- }
-
- args->offset = offset;
- args->line = parsed_line;
- args->line_nr = *line_nr;
- args->fileloc = *fileloc;
- args->ms.sym = sym;
-
- dl = disasm_line__new(args);
- (*line_nr)++;
-
- if (dl == NULL)
- return -1;
-
- if (!disasm_line__has_local_offset(dl)) {
- dl->ops.target.offset = dl->ops.target.addr -
- map__rip_2objdump(map, sym->start);
- dl->ops.target.offset_avail = true;
- }
-
- /* kcore has no symbols, so add the call target symbol */
- if (dl->ins.ops && ins__is_call(&dl->ins) && !dl->ops.target.sym) {
- struct addr_map_symbol target = {
- .addr = dl->ops.target.addr,
- .ms = { .map = map, },
- };
-
- if (!maps__find_ams(args->ms.maps, &target) &&
- target.ms.sym->start == target.al_addr)
- dl->ops.target.sym = target.ms.sym;
- }
-
- annotation_line__add(&dl->al, &notes->src->source);
- return 0;
-}
-
-static __attribute__((constructor)) void symbol__init_regexpr(void)
-{
- regcomp(&file_lineno, "^/[^:]+:([0-9]+)", REG_EXTENDED);
-}
-
-static void delete_last_nop(struct symbol *sym)
-{
- struct annotation *notes = symbol__annotation(sym);
- struct list_head *list = &notes->src->source;
- struct disasm_line *dl;
-
- while (!list_empty(list)) {
- dl = list_entry(list->prev, struct disasm_line, al.node);
-
- if (dl->ins.ops) {
- if (dl->ins.ops != &nop_ops)
- return;
- } else {
- if (!strstr(dl->al.line, " nop ") &&
- !strstr(dl->al.line, " nopl ") &&
- !strstr(dl->al.line, " nopw "))
- return;
- }
-
- list_del_init(&dl->al.node);
- disasm_line__free(dl);
- }
-}
-
-int symbol__strerror_disassemble(struct map_symbol *ms, int errnum, char *buf, size_t buflen)
-{
- struct dso *dso = map__dso(ms->map);
-
- BUG_ON(buflen == 0);
-
- if (errnum >= 0) {
- str_error_r(errnum, buf, buflen);
- return 0;
- }
-
- switch (errnum) {
- case SYMBOL_ANNOTATE_ERRNO__NO_VMLINUX: {
- char bf[SBUILD_ID_SIZE + 15] = " with build id ";
- char *build_id_msg = NULL;
-
- if (dso->has_build_id) {
- build_id__sprintf(&dso->bid, bf + 15);
- build_id_msg = bf;
- }
- scnprintf(buf, buflen,
- "No vmlinux file%s\nwas found in the path.\n\n"
- "Note that annotation using /proc/kcore requires CAP_SYS_RAWIO capability.\n\n"
- "Please use:\n\n"
- " perf buildid-cache -vu vmlinux\n\n"
- "or:\n\n"
- " --vmlinux vmlinux\n", build_id_msg ?: "");
- }
- break;
- case SYMBOL_ANNOTATE_ERRNO__NO_LIBOPCODES_FOR_BPF:
- scnprintf(buf, buflen, "Please link with binutils's libopcode to enable BPF annotation");
- break;
- case SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_REGEXP:
- scnprintf(buf, buflen, "Problems with arch specific instruction name regular expressions.");
- break;
- case SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_CPUID_PARSING:
- scnprintf(buf, buflen, "Problems while parsing the CPUID in the arch specific initialization.");
- break;
- case SYMBOL_ANNOTATE_ERRNO__BPF_INVALID_FILE:
- scnprintf(buf, buflen, "Invalid BPF file: %s.", dso->long_name);
- break;
- case SYMBOL_ANNOTATE_ERRNO__BPF_MISSING_BTF:
- scnprintf(buf, buflen, "The %s BPF file has no BTF section, compile with -g or use pahole -J.",
- dso->long_name);
- break;
- default:
- scnprintf(buf, buflen, "Internal error: Invalid %d error code\n", errnum);
- break;
- }
-
- return 0;
-}
-
-static int dso__disassemble_filename(struct dso *dso, char *filename, size_t filename_size)
-{
- char linkname[PATH_MAX];
- char *build_id_filename;
- char *build_id_path = NULL;
- char *pos;
- int len;
-
- if (dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS &&
- !dso__is_kcore(dso))
- return SYMBOL_ANNOTATE_ERRNO__NO_VMLINUX;
-
- build_id_filename = dso__build_id_filename(dso, NULL, 0, false);
- if (build_id_filename) {
- __symbol__join_symfs(filename, filename_size, build_id_filename);
- free(build_id_filename);
- } else {
- if (dso->has_build_id)
- return ENOMEM;
- goto fallback;
- }
-
- build_id_path = strdup(filename);
- if (!build_id_path)
- return ENOMEM;
-
- /*
- * old style build-id cache has name of XX/XXXXXXX.. while
- * new style has XX/XXXXXXX../{elf,kallsyms,vdso}.
- * extract the build-id part of dirname in the new style only.
- */
- pos = strrchr(build_id_path, '/');
- if (pos && strlen(pos) < SBUILD_ID_SIZE - 2)
- dirname(build_id_path);
-
- if (dso__is_kcore(dso))
- goto fallback;
-
- len = readlink(build_id_path, linkname, sizeof(linkname) - 1);
- if (len < 0)
- goto fallback;
-
- linkname[len] = '\0';
- if (strstr(linkname, DSO__NAME_KALLSYMS) ||
- access(filename, R_OK)) {
-fallback:
- /*
- * If we don't have build-ids or the build-id file isn't in the
- * cache, or is just a kallsyms file, well, lets hope that this
- * DSO is the same as when 'perf record' ran.
- */
- if (dso->kernel && dso->long_name[0] == '/')
- snprintf(filename, filename_size, "%s", dso->long_name);
- else
- __symbol__join_symfs(filename, filename_size, dso->long_name);
-
- mutex_lock(&dso->lock);
- if (access(filename, R_OK) && errno == ENOENT && dso->nsinfo) {
- char *new_name = dso__filename_with_chroot(dso, filename);
- if (new_name) {
- strlcpy(filename, new_name, filename_size);
- free(new_name);
- }
- }
- mutex_unlock(&dso->lock);
- }
-
- free(build_id_path);
- return 0;
-}
-
-#if defined(HAVE_LIBBFD_SUPPORT) && defined(HAVE_LIBBPF_SUPPORT)
-#define PACKAGE "perf"
-#include <bfd.h>
-#include <dis-asm.h>
-#include <bpf/bpf.h>
-#include <bpf/btf.h>
-#include <bpf/libbpf.h>
-#include <linux/btf.h>
-#include <tools/dis-asm-compat.h>
-
-static int symbol__disassemble_bpf(struct symbol *sym,
- struct annotate_args *args)
-{
- struct annotation *notes = symbol__annotation(sym);
- struct bpf_prog_linfo *prog_linfo = NULL;
- struct bpf_prog_info_node *info_node;
- int len = sym->end - sym->start;
- disassembler_ftype disassemble;
- struct map *map = args->ms.map;
- struct perf_bpil *info_linear;
- struct disassemble_info info;
- struct dso *dso = map__dso(map);
- int pc = 0, count, sub_id;
- struct btf *btf = NULL;
- char tpath[PATH_MAX];
- size_t buf_size;
- int nr_skip = 0;
- char *buf;
- bfd *bfdf;
- int ret;
- FILE *s;
-
- if (dso->binary_type != DSO_BINARY_TYPE__BPF_PROG_INFO)
- return SYMBOL_ANNOTATE_ERRNO__BPF_INVALID_FILE;
-
- pr_debug("%s: handling sym %s addr %" PRIx64 " len %" PRIx64 "\n", __func__,
- sym->name, sym->start, sym->end - sym->start);
-
- memset(tpath, 0, sizeof(tpath));
- perf_exe(tpath, sizeof(tpath));
-
- bfdf = bfd_openr(tpath, NULL);
- if (bfdf == NULL)
- abort();
-
- if (!bfd_check_format(bfdf, bfd_object))
- abort();
-
- s = open_memstream(&buf, &buf_size);
- if (!s) {
- ret = errno;
- goto out;
- }
- init_disassemble_info_compat(&info, s,
- (fprintf_ftype) fprintf,
- fprintf_styled);
- info.arch = bfd_get_arch(bfdf);
- info.mach = bfd_get_mach(bfdf);
-
- info_node = perf_env__find_bpf_prog_info(dso->bpf_prog.env,
- dso->bpf_prog.id);
- if (!info_node) {
- ret = SYMBOL_ANNOTATE_ERRNO__BPF_MISSING_BTF;
- goto out;
- }
- info_linear = info_node->info_linear;
- sub_id = dso->bpf_prog.sub_id;
-
- info.buffer = (void *)(uintptr_t)(info_linear->info.jited_prog_insns);
- info.buffer_length = info_linear->info.jited_prog_len;
-
- if (info_linear->info.nr_line_info)
- prog_linfo = bpf_prog_linfo__new(&info_linear->info);
-
- if (info_linear->info.btf_id) {
- struct btf_node *node;
-
- node = perf_env__find_btf(dso->bpf_prog.env,
- info_linear->info.btf_id);
- if (node)
- btf = btf__new((__u8 *)(node->data),
- node->data_size);
- }
-
- disassemble_init_for_target(&info);
-
-#ifdef DISASM_FOUR_ARGS_SIGNATURE
- disassemble = disassembler(info.arch,
- bfd_big_endian(bfdf),
- info.mach,
- bfdf);
-#else
- disassemble = disassembler(bfdf);
-#endif
- if (disassemble == NULL)
- abort();
-
- fflush(s);
- do {
- const struct bpf_line_info *linfo = NULL;
- struct disasm_line *dl;
- size_t prev_buf_size;
- const char *srcline;
- u64 addr;
-
- addr = pc + ((u64 *)(uintptr_t)(info_linear->info.jited_ksyms))[sub_id];
- count = disassemble(pc, &info);
-
- if (prog_linfo)
- linfo = bpf_prog_linfo__lfind_addr_func(prog_linfo,
- addr, sub_id,
- nr_skip);
-
- if (linfo && btf) {
- srcline = btf__name_by_offset(btf, linfo->line_off);
- nr_skip++;
- } else
- srcline = NULL;
-
- fprintf(s, "\n");
- prev_buf_size = buf_size;
- fflush(s);
-
- if (!annotate_opts.hide_src_code && srcline) {
- args->offset = -1;
- args->line = strdup(srcline);
- args->line_nr = 0;
- args->fileloc = NULL;
- args->ms.sym = sym;
- dl = disasm_line__new(args);
- if (dl) {
- annotation_line__add(&dl->al,
- &notes->src->source);
- }
- }
-
- args->offset = pc;
- args->line = buf + prev_buf_size;
- args->line_nr = 0;
- args->fileloc = NULL;
- args->ms.sym = sym;
- dl = disasm_line__new(args);
- if (dl)
- annotation_line__add(&dl->al, &notes->src->source);
-
- pc += count;
- } while (count > 0 && pc < len);
-
- ret = 0;
-out:
- free(prog_linfo);
- btf__free(btf);
- fclose(s);
- bfd_close(bfdf);
- return ret;
-}
-#else // defined(HAVE_LIBBFD_SUPPORT) && defined(HAVE_LIBBPF_SUPPORT)
-static int symbol__disassemble_bpf(struct symbol *sym __maybe_unused,
- struct annotate_args *args __maybe_unused)
-{
- return SYMBOL_ANNOTATE_ERRNO__NO_LIBOPCODES_FOR_BPF;
-}
-#endif // defined(HAVE_LIBBFD_SUPPORT) && defined(HAVE_LIBBPF_SUPPORT)
-
-static int
-symbol__disassemble_bpf_image(struct symbol *sym,
- struct annotate_args *args)
-{
- struct annotation *notes = symbol__annotation(sym);
- struct disasm_line *dl;
-
- args->offset = -1;
- args->line = strdup("to be implemented");
- args->line_nr = 0;
- args->fileloc = NULL;
- dl = disasm_line__new(args);
- if (dl)
- annotation_line__add(&dl->al, &notes->src->source);
-
- zfree(&args->line);
- return 0;
-}
-
-/*
- * Possibly create a new version of line with tabs expanded. Returns the
- * existing or new line, storage is updated if a new line is allocated. If
- * allocation fails then NULL is returned.
- */
-static char *expand_tabs(char *line, char **storage, size_t *storage_len)
-{
- size_t i, src, dst, len, new_storage_len, num_tabs;
- char *new_line;
- size_t line_len = strlen(line);
-
- for (num_tabs = 0, i = 0; i < line_len; i++)
- if (line[i] == '\t')
- num_tabs++;
-
- if (num_tabs == 0)
- return line;
-
- /*
- * Space for the line and '\0', less the leading and trailing
- * spaces. Each tab may introduce 7 additional spaces.
- */
- new_storage_len = line_len + 1 + (num_tabs * 7);
-
- new_line = malloc(new_storage_len);
- if (new_line == NULL) {
- pr_err("Failure allocating memory for tab expansion\n");
- return NULL;
- }
-
- /*
- * Copy regions starting at src and expand tabs. If there are two
- * adjacent tabs then 'src == i', the memcpy is of size 0 and the spaces
- * are inserted.
- */
- for (i = 0, src = 0, dst = 0; i < line_len && num_tabs; i++) {
- if (line[i] == '\t') {
- len = i - src;
- memcpy(&new_line[dst], &line[src], len);
- dst += len;
- new_line[dst++] = ' ';
- while (dst % 8 != 0)
- new_line[dst++] = ' ';
- src = i + 1;
- num_tabs--;
- }
- }
-
- /* Expand the last region. */
- len = line_len - src;
- memcpy(&new_line[dst], &line[src], len);
- dst += len;
- new_line[dst] = '\0';
-
- free(*storage);
- *storage = new_line;
- *storage_len = new_storage_len;
- return new_line;
-
-}
-
-static int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
-{
- struct annotation_options *opts = &annotate_opts;
- struct map *map = args->ms.map;
- struct dso *dso = map__dso(map);
- char *command;
- FILE *file;
- char symfs_filename[PATH_MAX];
- struct kcore_extract kce;
- bool delete_extract = false;
- bool decomp = false;
- int lineno = 0;
- char *fileloc = NULL;
- int nline;
- char *line;
- size_t line_len;
- const char *objdump_argv[] = {
- "/bin/sh",
- "-c",
- NULL, /* Will be the objdump command to run. */
- "--",
- NULL, /* Will be the symfs path. */
- NULL,
- };
- struct child_process objdump_process;
- int err = dso__disassemble_filename(dso, symfs_filename, sizeof(symfs_filename));
-
- if (err)
- return err;
-
- pr_debug("%s: filename=%s, sym=%s, start=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__,
- symfs_filename, sym->name, map__unmap_ip(map, sym->start),
- map__unmap_ip(map, sym->end));
-
- pr_debug("annotating [%p] %30s : [%p] %30s\n",
- dso, dso->long_name, sym, sym->name);
-
- if (dso->binary_type == DSO_BINARY_TYPE__BPF_PROG_INFO) {
- return symbol__disassemble_bpf(sym, args);
- } else if (dso->binary_type == DSO_BINARY_TYPE__BPF_IMAGE) {
- return symbol__disassemble_bpf_image(sym, args);
- } else if (dso__is_kcore(dso)) {
- kce.kcore_filename = symfs_filename;
- kce.addr = map__rip_2objdump(map, sym->start);
- kce.offs = sym->start;
- kce.len = sym->end - sym->start;
- if (!kcore_extract__create(&kce)) {
- delete_extract = true;
- strlcpy(symfs_filename, kce.extract_filename,
- sizeof(symfs_filename));
- }
- } else if (dso__needs_decompress(dso)) {
- char tmp[KMOD_DECOMP_LEN];
-
- if (dso__decompress_kmodule_path(dso, symfs_filename,
- tmp, sizeof(tmp)) < 0)
- return -1;
-
- decomp = true;
- strcpy(symfs_filename, tmp);
- }
-
- err = asprintf(&command,
- "%s %s%s --start-address=0x%016" PRIx64
- " --stop-address=0x%016" PRIx64
- " %s -d %s %s %s %c%s%c %s%s -C \"$1\"",
- opts->objdump_path ?: "objdump",
- opts->disassembler_style ? "-M " : "",
- opts->disassembler_style ?: "",
- map__rip_2objdump(map, sym->start),
- map__rip_2objdump(map, sym->end),
- opts->show_linenr ? "-l" : "",
- opts->show_asm_raw ? "" : "--no-show-raw-insn",
- opts->annotate_src ? "-S" : "",
- opts->prefix ? "--prefix " : "",
- opts->prefix ? '"' : ' ',
- opts->prefix ?: "",
- opts->prefix ? '"' : ' ',
- opts->prefix_strip ? "--prefix-strip=" : "",
- opts->prefix_strip ?: "");
-
- if (err < 0) {
- pr_err("Failure allocating memory for the command to run\n");
- goto out_remove_tmp;
- }
-
- pr_debug("Executing: %s\n", command);
-
- objdump_argv[2] = command;
- objdump_argv[4] = symfs_filename;
-
- /* Create a pipe to read from for stdout */
- memset(&objdump_process, 0, sizeof(objdump_process));
- objdump_process.argv = objdump_argv;
- objdump_process.out = -1;
- objdump_process.err = -1;
- objdump_process.no_stderr = 1;
- if (start_command(&objdump_process)) {
- pr_err("Failure starting to run %s\n", command);
- err = -1;
- goto out_free_command;
- }
-
- file = fdopen(objdump_process.out, "r");
- if (!file) {
- pr_err("Failure creating FILE stream for %s\n", command);
- /*
- * If we were using debug info should retry with
- * original binary.
- */
- err = -1;
- goto out_close_stdout;
- }
-
- /* Storage for getline. */
- line = NULL;
- line_len = 0;
-
- nline = 0;
- while (!feof(file)) {
- const char *match;
- char *expanded_line;
-
- if (getline(&line, &line_len, file) < 0 || !line)
- break;
-
- /* Skip lines containing "filename:" */
- match = strstr(line, symfs_filename);
- if (match && match[strlen(symfs_filename)] == ':')
- continue;
-
- expanded_line = strim(line);
- expanded_line = expand_tabs(expanded_line, &line, &line_len);
- if (!expanded_line)
- break;
-
- /*
- * The source code line number (lineno) needs to be kept in
- * across calls to symbol__parse_objdump_line(), so that it
- * can associate it with the instructions till the next one.
- * See disasm_line__new() and struct disasm_line::line_nr.
- */
- if (symbol__parse_objdump_line(sym, args, expanded_line,
- &lineno, &fileloc) < 0)
- break;
- nline++;
- }
- free(line);
- free(fileloc);
-
- err = finish_command(&objdump_process);
- if (err)
- pr_err("Error running %s\n", command);
-
- if (nline == 0) {
- err = -1;
- pr_err("No output from %s\n", command);
- }
-
- /*
- * kallsyms does not have symbol sizes so there may a nop at the end.
- * Remove it.
- */
- if (dso__is_kcore(dso))
- delete_last_nop(sym);
-
- fclose(file);
-
-out_close_stdout:
- close(objdump_process.out);
-
-out_free_command:
- free(command);
-
-out_remove_tmp:
- if (decomp)
- unlink(symfs_filename);
-
- if (delete_extract)
- kcore_extract__delete(&kce);
-
- return err;
-}
-
static void calc_percent(struct annotation *notes,
struct evsel *evsel,
struct annotation_data *data,
@@ -2422,8 +865,10 @@ static int evsel__get_arch(struct evsel *evsel, struct arch **parch)
struct arch *arch;
int err;
- if (!arch_name)
+ if (!arch_name) {
+ *parch = NULL;
return errno;
+ }
*parch = arch = arch__find(arch_name);
if (arch == NULL) {
@@ -2461,12 +906,22 @@ int symbol__annotate(struct map_symbol *ms, struct evsel *evsel,
if (parch)
*parch = arch;
+ if (notes->src && !list_empty(&notes->src->source))
+ return 0;
+
args.arch = arch;
args.ms = *ms;
+
+ if (notes->src == NULL) {
+ notes->src = annotated_source__new();
+ if (notes->src == NULL)
+ return -1;
+ }
+
if (annotate_opts.full_addr)
- notes->start = map__objdump_2mem(ms->map, ms->sym->start);
+ notes->src->start = map__objdump_2mem(ms->map, ms->sym->start);
else
- notes->start = map__rip_2objdump(ms->map, ms->sym->start);
+ notes->src->start = map__rip_2objdump(ms->map, ms->sym->start);
return symbol__disassemble(sym, &args);
}
@@ -2835,13 +1290,16 @@ void symbol__annotate_decay_histogram(struct symbol *sym, int evidx)
{
struct annotation *notes = symbol__annotation(sym);
struct sym_hist *h = annotation__histogram(notes, evidx);
- int len = symbol__size(sym), offset;
+ struct annotation_line *al;
h->nr_samples = 0;
- for (offset = 0; offset < len; ++offset) {
+ list_for_each_entry(al, &notes->src->source, node) {
struct sym_hist_entry *entry;
- entry = annotated_source__hist_entry(notes->src, evidx, offset);
+ if (al->offset == -1)
+ continue;
+
+ entry = annotated_source__hist_entry(notes->src, evidx, al->offset);
if (entry == NULL)
continue;
@@ -2898,64 +1356,56 @@ bool disasm_line__is_valid_local_jump(struct disasm_line *dl, struct symbol *sym
return true;
}
-void annotation__mark_jump_targets(struct annotation *notes, struct symbol *sym)
+static void
+annotation__mark_jump_targets(struct annotation *notes, struct symbol *sym)
{
- u64 offset, size = symbol__size(sym);
+ struct annotation_line *al;
/* PLT symbols contain external offsets */
if (strstr(sym->name, "@plt"))
return;
- for (offset = 0; offset < size; ++offset) {
- struct annotation_line *al = notes->src->offsets[offset];
+ list_for_each_entry(al, &notes->src->source, node) {
struct disasm_line *dl;
+ struct annotation_line *target;
dl = disasm_line(al);
if (!disasm_line__is_valid_local_jump(dl, sym))
continue;
- al = notes->src->offsets[dl->ops.target.offset];
-
+ target = annotated_source__get_line(notes->src,
+ dl->ops.target.offset);
/*
* FIXME: Oops, no jump target? Buggy disassembler? Or do we
* have to adjust to the previous offset?
*/
- if (al == NULL)
+ if (target == NULL)
continue;
- if (++al->jump_sources > notes->max_jump_sources)
- notes->max_jump_sources = al->jump_sources;
+ if (++target->jump_sources > notes->src->max_jump_sources)
+ notes->src->max_jump_sources = target->jump_sources;
}
}
-void annotation__set_offsets(struct annotation *notes, s64 size)
+static void annotation__set_index(struct annotation *notes)
{
struct annotation_line *al;
struct annotated_source *src = notes->src;
- src->max_line_len = 0;
+ src->widths.max_line_len = 0;
src->nr_entries = 0;
src->nr_asm_entries = 0;
list_for_each_entry(al, &src->source, node) {
size_t line_len = strlen(al->line);
- if (src->max_line_len < line_len)
- src->max_line_len = line_len;
+ if (src->widths.max_line_len < line_len)
+ src->widths.max_line_len = line_len;
al->idx = src->nr_entries++;
- if (al->offset != -1) {
+ if (al->offset != -1)
al->idx_asm = src->nr_asm_entries++;
- /*
- * FIXME: short term bandaid to cope with assembly
- * routines that comes with labels in the same column
- * as the address in objdump, sigh.
- *
- * E.g. copy_user_generic_unrolled
- */
- if (al->offset < size)
- notes->src->offsets[al->offset] = al;
- } else
+ else
al->idx_asm = -1;
}
}
@@ -2986,28 +1436,29 @@ static int annotation__max_ins_name(struct annotation *notes)
return max_name;
}
-void annotation__init_column_widths(struct annotation *notes, struct symbol *sym)
+static void
+annotation__init_column_widths(struct annotation *notes, struct symbol *sym)
{
- notes->widths.addr = notes->widths.target =
- notes->widths.min_addr = hex_width(symbol__size(sym));
- notes->widths.max_addr = hex_width(sym->end);
- notes->widths.jumps = width_jumps(notes->max_jump_sources);
- notes->widths.max_ins_name = annotation__max_ins_name(notes);
+ notes->src->widths.addr = notes->src->widths.target =
+ notes->src->widths.min_addr = hex_width(symbol__size(sym));
+ notes->src->widths.max_addr = hex_width(sym->end);
+ notes->src->widths.jumps = width_jumps(notes->src->max_jump_sources);
+ notes->src->widths.max_ins_name = annotation__max_ins_name(notes);
}
void annotation__update_column_widths(struct annotation *notes)
{
if (annotate_opts.use_offset)
- notes->widths.target = notes->widths.min_addr;
+ notes->src->widths.target = notes->src->widths.min_addr;
else if (annotate_opts.full_addr)
- notes->widths.target = BITS_PER_LONG / 4;
+ notes->src->widths.target = BITS_PER_LONG / 4;
else
- notes->widths.target = notes->widths.max_addr;
+ notes->src->widths.target = notes->src->widths.max_addr;
- notes->widths.addr = notes->widths.target;
+ notes->src->widths.addr = notes->src->widths.target;
if (annotate_opts.show_nr_jumps)
- notes->widths.addr += notes->widths.jumps + 1;
+ notes->src->widths.addr += notes->src->widths.jumps + 1;
}
void annotation__toggle_full_addr(struct annotation *notes, struct map_symbol *ms)
@@ -3015,14 +1466,14 @@ void annotation__toggle_full_addr(struct annotation *notes, struct map_symbol *m
annotate_opts.full_addr = !annotate_opts.full_addr;
if (annotate_opts.full_addr)
- notes->start = map__objdump_2mem(ms->map, ms->sym->start);
+ notes->src->start = map__objdump_2mem(ms->map, ms->sym->start);
else
- notes->start = map__rip_2objdump(ms->map, ms->sym->start);
+ notes->src->start = map__rip_2objdump(ms->map, ms->sym->start);
annotation__update_column_widths(notes);
}
-static void annotation__calc_lines(struct annotation *notes, struct map *map,
+static void annotation__calc_lines(struct annotation *notes, struct map_symbol *ms,
struct rb_root *root)
{
struct annotation_line *al;
@@ -3030,6 +1481,7 @@ static void annotation__calc_lines(struct annotation *notes, struct map *map,
list_for_each_entry(al, &notes->src->source, node) {
double percent_max = 0.0;
+ u64 addr;
int i;
for (i = 0; i < al->data_nr; i++) {
@@ -3045,8 +1497,9 @@ static void annotation__calc_lines(struct annotation *notes, struct map *map,
if (percent_max <= 0.5)
continue;
- al->path = get_srcline(map__dso(map), notes->start + al->offset, NULL,
- false, true, notes->start + al->offset);
+ addr = map__rip_2objdump(ms->map, ms->sym->start);
+ al->path = get_srcline(map__dso(ms->map), addr + al->offset, NULL,
+ false, true, ms->sym->start + al->offset);
insert_source_line(&tmp_root, al);
}
@@ -3057,7 +1510,7 @@ static void symbol__calc_lines(struct map_symbol *ms, struct rb_root *root)
{
struct annotation *notes = symbol__annotation(ms->sym);
- annotation__calc_lines(notes, ms->map, root);
+ annotation__calc_lines(notes, ms, root);
}
int symbol__tty_annotate2(struct map_symbol *ms, struct evsel *evsel)
@@ -3141,7 +1594,7 @@ static double annotation_line__max_percent(struct annotation_line *al,
double percent_max = 0.0;
int i;
- for (i = 0; i < notes->nr_events; i++) {
+ for (i = 0; i < notes->src->nr_events; i++) {
double percent;
percent = annotation_data__percent(&al->data[i],
@@ -3182,7 +1635,8 @@ call_like:
obj__printf(obj, " ");
}
- disasm_line__scnprintf(dl, bf, size, !annotate_opts.use_offset, notes->widths.max_ins_name);
+ disasm_line__scnprintf(dl, bf, size, !annotate_opts.use_offset,
+ notes->src->widths.max_ins_name);
}
static void ipc_coverage_string(char *bf, int size, struct annotation *notes)
@@ -3230,7 +1684,7 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati
if (al->offset != -1 && percent_max != 0.0) {
int i;
- for (i = 0; i < notes->nr_events; i++) {
+ for (i = 0; i < notes->src->nr_events; i++) {
double percent;
percent = annotation_data__percent(&al->data[i], percent_type);
@@ -3310,9 +1764,11 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati
obj__printf(obj, "%-*s", width - pcnt_width - cycles_width, " ");
else if (al->offset == -1) {
if (al->line_nr && annotate_opts.show_linenr)
- printed = scnprintf(bf, sizeof(bf), "%-*d ", notes->widths.addr + 1, al->line_nr);
+ printed = scnprintf(bf, sizeof(bf), "%-*d ",
+ notes->src->widths.addr + 1, al->line_nr);
else
- printed = scnprintf(bf, sizeof(bf), "%-*s ", notes->widths.addr, " ");
+ printed = scnprintf(bf, sizeof(bf), "%-*s ",
+ notes->src->widths.addr, " ");
obj__printf(obj, bf);
obj__printf(obj, "%-*s", width - printed - pcnt_width - cycles_width + 1, al->line);
} else {
@@ -3320,7 +1776,7 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati
int color = -1;
if (!annotate_opts.use_offset)
- addr += notes->start;
+ addr += notes->src->start;
if (!annotate_opts.use_offset) {
printed = scnprintf(bf, sizeof(bf), "%" PRIx64 ": ", addr);
@@ -3330,7 +1786,7 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati
if (annotate_opts.show_nr_jumps) {
int prev;
printed = scnprintf(bf, sizeof(bf), "%*d ",
- notes->widths.jumps,
+ notes->src->widths.jumps,
al->jump_sources);
prev = obj__set_jumps_percent_color(obj, al->jump_sources,
current_entry);
@@ -3339,7 +1795,7 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati
}
print_addr:
printed = scnprintf(bf, sizeof(bf), "%*" PRIx64 ": ",
- notes->widths.target, addr);
+ notes->src->widths.target, addr);
} else if (ins__is_call(&disasm_line(al)->ins) &&
annotate_opts.offset_level >= ANNOTATION__OFFSET_CALL) {
goto print_addr;
@@ -3347,7 +1803,7 @@ print_addr:
goto print_addr;
} else {
printed = scnprintf(bf, sizeof(bf), "%-*s ",
- notes->widths.addr, " ");
+ notes->src->widths.addr, " ");
}
}
@@ -3383,37 +1839,29 @@ int symbol__annotate2(struct map_symbol *ms, struct evsel *evsel,
size_t size = symbol__size(sym);
int nr_pcnt = 1, err;
- notes->src->offsets = zalloc(size * sizeof(struct annotation_line *));
- if (notes->src->offsets == NULL)
- return ENOMEM;
-
if (evsel__is_group_event(evsel))
nr_pcnt = evsel->core.nr_members;
err = symbol__annotate(ms, evsel, parch);
if (err)
- goto out_free_offsets;
+ return err;
symbol__calc_percent(sym, evsel);
- annotation__set_offsets(notes, size);
+ annotation__set_index(notes);
annotation__mark_jump_targets(notes, sym);
err = annotation__compute_ipc(notes, size);
if (err)
- goto out_free_offsets;
+ return err;
annotation__init_column_widths(notes, sym);
- notes->nr_events = nr_pcnt;
+ notes->src->nr_events = nr_pcnt;
annotation__update_column_widths(notes);
sym->annotate2 = 1;
return 0;
-
-out_free_offsets:
- zfree(&notes->src->offsets);
- return err;
}
static int annotation__config(const char *var, const char *value, void *data)
@@ -3585,6 +2033,12 @@ static int extract_reg_offset(struct arch *arch, const char *str,
* %gs:0x18(%rbx). In that case it should skip the part.
*/
if (*str == arch->objdump.register_char) {
+ if (arch__is(arch, "x86")) {
+ /* FIXME: Handle other segment registers */
+ if (!strncmp(str, "%gs:", 4))
+ op_loc->segment = INSN_SEG_X86_GS;
+ }
+
while (*str && !isdigit(*str) &&
*str != arch->objdump.memory_ref_char)
str++;
@@ -3650,7 +2104,7 @@ int annotate_get_insn_location(struct arch *arch, struct disasm_line *dl,
struct annotated_op_loc *op_loc;
int i;
- if (!strcmp(dl->ins.name, "lock"))
+ if (ins__is_lock(&dl->ins))
ops = dl->ops.locked.ops;
else
ops = &dl->ops;
@@ -3681,40 +2135,40 @@ int annotate_get_insn_location(struct arch *arch, struct disasm_line *dl,
op_loc->multi_regs = multi_regs;
extract_reg_offset(arch, insn_str, op_loc);
} else {
- char *s = strdup(insn_str);
+ char *s, *p = NULL;
+
+ if (arch__is(arch, "x86")) {
+ /* FIXME: Handle other segment registers */
+ if (!strncmp(insn_str, "%gs:", 4)) {
+ op_loc->segment = INSN_SEG_X86_GS;
+ op_loc->offset = strtol(insn_str + 4,
+ &p, 0);
+ if (p && p != insn_str + 4)
+ op_loc->imm = true;
+ continue;
+ }
+ }
- if (s) {
+ s = strdup(insn_str);
+ if (s == NULL)
+ return -1;
+
+ if (*s == arch->objdump.register_char)
op_loc->reg1 = get_dwarf_regnum(s, 0);
- free(s);
+ else if (*s == arch->objdump.imm_char) {
+ op_loc->offset = strtol(s + 1, &p, 0);
+ if (p && p != s + 1)
+ op_loc->imm = true;
}
+ free(s);
}
}
return 0;
}
-static void symbol__ensure_annotate(struct map_symbol *ms, struct evsel *evsel)
-{
- struct disasm_line *dl, *tmp_dl;
- struct annotation *notes;
-
- notes = symbol__annotation(ms->sym);
- if (!list_empty(&notes->src->source))
- return;
-
- if (symbol__annotate(ms, evsel, NULL) < 0)
- return;
-
- /* remove non-insn disasm lines for simplicity */
- list_for_each_entry_safe(dl, tmp_dl, &notes->src->source, al.node) {
- if (dl->al.offset == -1) {
- list_del(&dl->al.node);
- free(dl);
- }
- }
-}
-
-static struct disasm_line *find_disasm_line(struct symbol *sym, u64 ip)
+static struct disasm_line *find_disasm_line(struct symbol *sym, u64 ip,
+ bool allow_update)
{
struct disasm_line *dl;
struct annotation *notes;
@@ -3722,12 +2176,16 @@ static struct disasm_line *find_disasm_line(struct symbol *sym, u64 ip)
notes = symbol__annotation(sym);
list_for_each_entry(dl, &notes->src->source, al.node) {
+ if (dl->al.offset == -1)
+ continue;
+
if (sym->start + dl->al.offset == ip) {
/*
* llvm-objdump places "lock" in a separate line and
* in that case, we want to get the next line.
*/
- if (!strcmp(dl->ins.name, "lock") && *dl->ops.raw == '\0') {
+ if (ins__is_lock(&dl->ins) &&
+ *dl->ops.raw == '\0' && allow_update) {
ip++;
continue;
}
@@ -3773,6 +2231,58 @@ static bool is_stack_operation(struct arch *arch, struct disasm_line *dl)
return false;
}
+static bool is_stack_canary(struct arch *arch, struct annotated_op_loc *loc)
+{
+ /* On x86_64, %gs:40 is used for stack canary */
+ if (arch__is(arch, "x86")) {
+ if (loc->segment == INSN_SEG_X86_GS && loc->imm &&
+ loc->offset == 40)
+ return true;
+ }
+
+ return false;
+}
+
+static struct disasm_line *
+annotation__prev_asm_line(struct annotation *notes, struct disasm_line *curr)
+{
+ struct list_head *sources = &notes->src->source;
+ struct disasm_line *prev;
+
+ if (curr == list_first_entry(sources, struct disasm_line, al.node))
+ return NULL;
+
+ prev = list_prev_entry(curr, al.node);
+ while (prev->al.offset == -1 &&
+ prev != list_first_entry(sources, struct disasm_line, al.node))
+ prev = list_prev_entry(prev, al.node);
+
+ if (prev->al.offset == -1)
+ return NULL;
+
+ return prev;
+}
+
+static struct disasm_line *
+annotation__next_asm_line(struct annotation *notes, struct disasm_line *curr)
+{
+ struct list_head *sources = &notes->src->source;
+ struct disasm_line *next;
+
+ if (curr == list_last_entry(sources, struct disasm_line, al.node))
+ return NULL;
+
+ next = list_next_entry(curr, al.node);
+ while (next->al.offset == -1 &&
+ next != list_last_entry(sources, struct disasm_line, al.node))
+ next = list_next_entry(next, al.node);
+
+ if (next->al.offset == -1)
+ return NULL;
+
+ return next;
+}
+
u64 annotate_calc_pcrel(struct map_symbol *ms, u64 ip, int offset,
struct disasm_line *dl)
{
@@ -3788,12 +2298,12 @@ u64 annotate_calc_pcrel(struct map_symbol *ms, u64 ip, int offset,
* disasm_line. If it's the last one, we can use symbol's end
* address directly.
*/
- if (&dl->al.node == notes->src->source.prev)
+ next = annotation__next_asm_line(notes, dl);
+ if (next == NULL)
addr = ms->sym->end + offset;
- else {
- next = list_next_entry(dl, al.node);
+ else
addr = ip + (next->al.offset - dl->al.offset) + offset;
- }
+
return map__rip_2objdump(ms->map, addr);
}
@@ -3816,9 +2326,7 @@ struct annotated_data_type *hist_entry__get_data_type(struct hist_entry *he)
struct annotated_op_loc *op_loc;
struct annotated_data_type *mem_type;
struct annotated_item_stat *istat;
- u64 ip = he->ip, addr = 0;
- const char *var_name = NULL;
- int var_offset;
+ u64 ip = he->ip;
int i;
ann_data_stat.total++;
@@ -3833,19 +2341,17 @@ struct annotated_data_type *hist_entry__get_data_type(struct hist_entry *he)
return NULL;
}
- if (evsel__get_arch(evsel, &arch) < 0) {
+ /* Make sure it has the disasm of the function */
+ if (symbol__annotate(ms, evsel, &arch) < 0) {
ann_data_stat.no_insn++;
return NULL;
}
- /* Make sure it runs objdump to get disasm of the function */
- symbol__ensure_annotate(ms, evsel);
-
/*
* Get a disasm to extract the location from the insn.
* This is too slow...
*/
- dl = find_disasm_line(ms->sym, ip);
+ dl = find_disasm_line(ms->sym, ip, /*allow_update=*/true);
if (dl == NULL) {
ann_data_stat.no_insn++;
return NULL;
@@ -3871,51 +2377,55 @@ retry:
}
for_each_insn_op_loc(&loc, i, op_loc) {
- if (!op_loc->mem_ref)
+ struct data_loc_info dloc = {
+ .arch = arch,
+ .thread = he->thread,
+ .ms = ms,
+ /* Recalculate IP for LOCK prefix or insn fusion */
+ .ip = ms->sym->start + dl->al.offset,
+ .cpumode = he->cpumode,
+ .op = op_loc,
+ };
+
+ if (!op_loc->mem_ref && op_loc->segment == INSN_SEG_NONE)
continue;
/* Recalculate IP because of LOCK prefix or insn fusion */
ip = ms->sym->start + dl->al.offset;
- var_offset = op_loc->offset;
-
/* PC-relative addressing */
if (op_loc->reg1 == DWARF_REG_PC) {
- struct addr_location al;
- struct symbol *var;
- u64 map_addr;
-
- addr = annotate_calc_pcrel(ms, ip, op_loc->offset, dl);
- /* Kernel symbols might be relocated */
- map_addr = addr + map__reloc(ms->map);
-
- addr_location__init(&al);
- var = thread__find_symbol_fb(he->thread, he->cpumode,
- map_addr, &al);
- if (var) {
- var_name = var->name;
- /* Calculate type offset from the start of variable */
- var_offset = map_addr - map__unmap_ip(al.map, var->start);
- }
- addr_location__exit(&al);
+ dloc.var_addr = annotate_calc_pcrel(ms, dloc.ip,
+ op_loc->offset, dl);
+ }
+
+ /* This CPU access in kernel - pretend PC-relative addressing */
+ if (map__dso(ms->map)->kernel && arch__is(arch, "x86") &&
+ op_loc->segment == INSN_SEG_X86_GS && op_loc->imm) {
+ dloc.var_addr = op_loc->offset;
+ op_loc->reg1 = DWARF_REG_PC;
+ }
+
+ mem_type = find_data_type(&dloc);
+
+ if (mem_type == NULL && is_stack_canary(arch, op_loc)) {
+ istat->good++;
+ he->mem_type_off = 0;
+ return &canary_type;
}
- mem_type = find_data_type(ms, ip, op_loc, addr, var_name);
if (mem_type)
istat->good++;
else
istat->bad++;
- if (mem_type && var_name)
- op_loc->offset = var_offset;
-
if (symbol_conf.annotate_data_sample) {
annotated_data_type__update_samples(mem_type, evsel,
- op_loc->offset,
+ dloc.type_offset,
he->stat.nr_events,
he->stat.period);
}
- he->mem_type_off = op_loc->offset;
+ he->mem_type_off = dloc.type_offset;
return mem_type;
}
@@ -3924,10 +2434,13 @@ retry:
* from the previous instruction.
*/
if (dl->al.offset > 0) {
+ struct annotation *notes;
struct disasm_line *prev_dl;
- prev_dl = list_prev_entry(dl, al.node);
- if (ins__is_fused(arch, prev_dl->ins.name, dl->ins.name)) {
+ notes = symbol__annotation(ms->sym);
+ prev_dl = annotation__prev_asm_line(notes, dl);
+
+ if (prev_dl && ins__is_fused(arch, prev_dl->ins.name, dl->ins.name)) {
dl = prev_dl;
goto retry;
}
@@ -3937,3 +2450,227 @@ retry:
istat->bad++;
return NULL;
}
+
+/* Basic block traversal (BFS) data structure */
+struct basic_block_data {
+ struct list_head queue;
+ struct list_head visited;
+};
+
+/*
+ * During the traversal, it needs to know the parent block where the current
+ * block block started from. Note that single basic block can be parent of
+ * two child basic blocks (in case of condition jump).
+ */
+struct basic_block_link {
+ struct list_head node;
+ struct basic_block_link *parent;
+ struct annotated_basic_block *bb;
+};
+
+/* Check any of basic block in the list already has the offset */
+static bool basic_block_has_offset(struct list_head *head, s64 offset)
+{
+ struct basic_block_link *link;
+
+ list_for_each_entry(link, head, node) {
+ s64 begin_offset = link->bb->begin->al.offset;
+ s64 end_offset = link->bb->end->al.offset;
+
+ if (begin_offset <= offset && offset <= end_offset)
+ return true;
+ }
+ return false;
+}
+
+static bool is_new_basic_block(struct basic_block_data *bb_data,
+ struct disasm_line *dl)
+{
+ s64 offset = dl->al.offset;
+
+ if (basic_block_has_offset(&bb_data->visited, offset))
+ return false;
+ if (basic_block_has_offset(&bb_data->queue, offset))
+ return false;
+ return true;
+}
+
+/* Add a basic block starting from dl and link it to the parent */
+static int add_basic_block(struct basic_block_data *bb_data,
+ struct basic_block_link *parent,
+ struct disasm_line *dl)
+{
+ struct annotated_basic_block *bb;
+ struct basic_block_link *link;
+
+ if (dl == NULL)
+ return -1;
+
+ if (!is_new_basic_block(bb_data, dl))
+ return 0;
+
+ bb = zalloc(sizeof(*bb));
+ if (bb == NULL)
+ return -1;
+
+ bb->begin = dl;
+ bb->end = dl;
+ INIT_LIST_HEAD(&bb->list);
+
+ link = malloc(sizeof(*link));
+ if (link == NULL) {
+ free(bb);
+ return -1;
+ }
+
+ link->bb = bb;
+ link->parent = parent;
+ list_add_tail(&link->node, &bb_data->queue);
+ return 0;
+}
+
+/* Returns true when it finds the target in the current basic block */
+static bool process_basic_block(struct basic_block_data *bb_data,
+ struct basic_block_link *link,
+ struct symbol *sym, u64 target)
+{
+ struct disasm_line *dl, *next_dl, *last_dl;
+ struct annotation *notes = symbol__annotation(sym);
+ bool found = false;
+
+ dl = link->bb->begin;
+ /* Check if it's already visited */
+ if (basic_block_has_offset(&bb_data->visited, dl->al.offset))
+ return false;
+
+ last_dl = list_last_entry(&notes->src->source,
+ struct disasm_line, al.node);
+ if (last_dl->al.offset == -1)
+ last_dl = annotation__prev_asm_line(notes, last_dl);
+
+ if (last_dl == NULL)
+ return false;
+
+ list_for_each_entry_from(dl, &notes->src->source, al.node) {
+ /* Skip comment or debug info line */
+ if (dl->al.offset == -1)
+ continue;
+ /* Found the target instruction */
+ if (sym->start + dl->al.offset == target) {
+ found = true;
+ break;
+ }
+ /* End of the function, finish the block */
+ if (dl == last_dl)
+ break;
+ /* 'return' instruction finishes the block */
+ if (ins__is_ret(&dl->ins))
+ break;
+ /* normal instructions are part of the basic block */
+ if (!ins__is_jump(&dl->ins))
+ continue;
+ /* jump to a different function, tail call or return */
+ if (dl->ops.target.outside)
+ break;
+ /* jump instruction creates new basic block(s) */
+ next_dl = find_disasm_line(sym, sym->start + dl->ops.target.offset,
+ /*allow_update=*/false);
+ if (next_dl)
+ add_basic_block(bb_data, link, next_dl);
+
+ /*
+ * FIXME: determine conditional jumps properly.
+ * Conditional jumps create another basic block with the
+ * next disasm line.
+ */
+ if (!strstr(dl->ins.name, "jmp")) {
+ next_dl = annotation__next_asm_line(notes, dl);
+ if (next_dl)
+ add_basic_block(bb_data, link, next_dl);
+ }
+ break;
+
+ }
+ link->bb->end = dl;
+ return found;
+}
+
+/*
+ * It founds a target basic block, build a proper linked list of basic blocks
+ * by following the link recursively.
+ */
+static void link_found_basic_blocks(struct basic_block_link *link,
+ struct list_head *head)
+{
+ while (link) {
+ struct basic_block_link *parent = link->parent;
+
+ list_move(&link->bb->list, head);
+ list_del(&link->node);
+ free(link);
+
+ link = parent;
+ }
+}
+
+static void delete_basic_blocks(struct basic_block_data *bb_data)
+{
+ struct basic_block_link *link, *tmp;
+
+ list_for_each_entry_safe(link, tmp, &bb_data->queue, node) {
+ list_del(&link->node);
+ free(link->bb);
+ free(link);
+ }
+
+ list_for_each_entry_safe(link, tmp, &bb_data->visited, node) {
+ list_del(&link->node);
+ free(link->bb);
+ free(link);
+ }
+}
+
+/**
+ * annotate_get_basic_blocks - Get basic blocks for given address range
+ * @sym: symbol to annotate
+ * @src: source address
+ * @dst: destination address
+ * @head: list head to save basic blocks
+ *
+ * This function traverses disasm_lines from @src to @dst and save them in a
+ * list of annotated_basic_block to @head. It uses BFS to find the shortest
+ * path between two. The basic_block_link is to maintain parent links so
+ * that it can build a list of blocks from the start.
+ */
+int annotate_get_basic_blocks(struct symbol *sym, s64 src, s64 dst,
+ struct list_head *head)
+{
+ struct basic_block_data bb_data = {
+ .queue = LIST_HEAD_INIT(bb_data.queue),
+ .visited = LIST_HEAD_INIT(bb_data.visited),
+ };
+ struct basic_block_link *link;
+ struct disasm_line *dl;
+ int ret = -1;
+
+ dl = find_disasm_line(sym, src, /*allow_update=*/false);
+ if (dl == NULL)
+ return -1;
+
+ if (add_basic_block(&bb_data, /*parent=*/NULL, dl) < 0)
+ return -1;
+
+ /* Find shortest path from src to dst using BFS */
+ while (!list_empty(&bb_data.queue)) {
+ link = list_first_entry(&bb_data.queue, struct basic_block_link, node);
+
+ if (process_basic_block(&bb_data, link, sym, dst)) {
+ link_found_basic_blocks(link, head);
+ ret = 0;
+ break;
+ }
+ list_move(&link->node, &bb_data.visited);
+ }
+ delete_basic_blocks(&bb_data);
+ return ret;
+}
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index 13cc659e508c79..d5c821c22f79e5 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -13,10 +13,10 @@
#include "mutex.h"
#include "spark.h"
#include "hashmap.h"
+#include "disasm.h"
struct hist_browser_timer;
struct hist_entry;
-struct ins_ops;
struct map;
struct map_symbol;
struct addr_map_symbol;
@@ -26,59 +26,6 @@ struct evsel;
struct symbol;
struct annotated_data_type;
-struct ins {
- const char *name;
- struct ins_ops *ops;
-};
-
-struct ins_operands {
- char *raw;
- struct {
- char *raw;
- char *name;
- struct symbol *sym;
- u64 addr;
- s64 offset;
- bool offset_avail;
- bool outside;
- bool multi_regs;
- } target;
- union {
- struct {
- char *raw;
- char *name;
- u64 addr;
- bool multi_regs;
- } source;
- struct {
- struct ins ins;
- struct ins_operands *ops;
- } locked;
- struct {
- char *raw_comment;
- char *raw_func_start;
- } jump;
- };
-};
-
-struct arch;
-
-bool arch__is(struct arch *arch, const char *name);
-
-struct ins_ops {
- void (*free)(struct ins_operands *ops);
- int (*parse)(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms);
- int (*scnprintf)(struct ins *ins, char *bf, size_t size,
- struct ins_operands *ops, int max_ins_name);
-};
-
-bool ins__is_jump(const struct ins *ins);
-bool ins__is_call(const struct ins *ins);
-bool ins__is_ret(const struct ins *ins);
-bool ins__is_lock(const struct ins *ins);
-int ins__scnprintf(struct ins *ins, char *bf, size_t size, struct ins_operands *ops, int max_ins_name);
-bool ins__is_fused(struct arch *arch, const char *ins1, const char *ins2);
-
#define ANNOTATION__IPC_WIDTH 6
#define ANNOTATION__CYCLES_WIDTH 6
#define ANNOTATION__MINMAX_CYCLES_WIDTH 19
@@ -171,6 +118,8 @@ struct disasm_line {
struct annotation_line al;
};
+void annotation_line__add(struct annotation_line *al, struct list_head *head);
+
static inline double annotation_data__percent(struct annotation_data *data,
unsigned int which)
{
@@ -212,7 +161,6 @@ static inline bool disasm_line__has_local_offset(const struct disasm_line *dl)
*/
bool disasm_line__is_valid_local_jump(struct disasm_line *dl, struct symbol *sym);
-void disasm_line__free(struct disasm_line *dl);
struct annotation_line *
annotation_line__next(struct annotation_line *pos, struct list_head *head);
@@ -235,7 +183,6 @@ int __annotation__scnprintf_samples_period(struct annotation *notes,
struct evsel *evsel,
bool show_freq);
-int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw, int max_ins_name);
size_t disasm__fprintf(struct list_head *head, FILE *fp);
void symbol__calc_percent(struct symbol *sym, struct evsel *evsel);
@@ -299,12 +246,14 @@ struct cyc_hist {
* we have more than a group in a evlist, where we will want
* to see each group separately, that is why symbol__annotate2()
* sets src->nr_histograms to evsel->nr_members.
- * @offsets: Array of annotation_line to be accessed by offset.
* @samples: Hash map of sym_hist_entry. Keyed by event index and offset in symbol.
+ * @nr_events: Number of events in the current output.
* @nr_entries: Number of annotated_line in the source list.
* @nr_asm_entries: Number of annotated_line with actual asm instruction in the
* source list.
- * @max_line_len: Maximum length of objdump output in an annotated_line.
+ * @max_jump_sources: Maximum number of jump instructions targeting to the same
+ * instruction.
+ * @widths: Precalculated width of each column in the TUI output.
*
* disasm_lines are allocated, percentages calculated and all sorted by percentage
* when the annotation is about to be presented, so the percentages are for
@@ -315,14 +264,27 @@ struct cyc_hist {
struct annotated_source {
struct list_head source;
struct sym_hist *histograms;
- struct annotation_line **offsets;
struct hashmap *samples;
int nr_histograms;
+ int nr_events;
int nr_entries;
int nr_asm_entries;
- u16 max_line_len;
+ int max_jump_sources;
+ u64 start;
+ struct {
+ u8 addr;
+ u8 jumps;
+ u8 target;
+ u8 min_addr;
+ u8 max_addr;
+ u8 max_ins_name;
+ u16 max_line_len;
+ } widths;
};
+struct annotation_line *annotated_source__get_line(struct annotated_source *src,
+ s64 offset);
+
/**
* struct annotated_branch - basic block and IPC information for a symbol.
*
@@ -351,17 +313,6 @@ struct annotated_branch {
};
struct LOCKABLE annotation {
- u64 start;
- int nr_events;
- int max_jump_sources;
- struct {
- u8 addr;
- u8 jumps;
- u8 target;
- u8 min_addr;
- u8 max_addr;
- u8 max_ins_name;
- } widths;
struct annotated_source *src;
struct annotated_branch *branch;
};
@@ -385,7 +336,7 @@ static inline int annotation__cycles_width(struct annotation *notes)
static inline int annotation__pcnt_width(struct annotation *notes)
{
- return (symbol_conf.show_total_period ? 12 : 7) * notes->nr_events;
+ return (symbol_conf.show_total_period ? 12 : 7) * notes->src->nr_events;
}
static inline bool annotation_line__filter(struct annotation_line *al)
@@ -393,10 +344,7 @@ static inline bool annotation_line__filter(struct annotation_line *al)
return annotate_opts.hide_src_code && al->offset == -1;
}
-void annotation__set_offsets(struct annotation *notes, s64 size);
-void annotation__mark_jump_targets(struct annotation *notes, struct symbol *sym);
void annotation__update_column_widths(struct annotation *notes);
-void annotation__init_column_widths(struct annotation *notes, struct symbol *sym);
void annotation__toggle_full_addr(struct annotation *notes, struct map_symbol *ms);
static inline struct sym_hist *annotated_source__histogram(struct annotated_source *src, int idx)
@@ -511,15 +459,19 @@ int annotate_check_args(void);
* @reg1: First register in the operand
* @reg2: Second register in the operand
* @offset: Memory access offset in the operand
+ * @segment: Segment selector register
* @mem_ref: Whether the operand accesses memory
* @multi_regs: Whether the second register is used
+ * @imm: Whether the operand is an immediate value (in offset)
*/
struct annotated_op_loc {
int reg1;
int reg2;
int offset;
+ u8 segment;
bool mem_ref;
bool multi_regs;
+ bool imm;
};
enum annotated_insn_ops {
@@ -529,6 +481,17 @@ enum annotated_insn_ops {
INSN_OP_MAX,
};
+enum annotated_x86_segment {
+ INSN_SEG_NONE = 0,
+
+ INSN_SEG_X86_CS,
+ INSN_SEG_X86_DS,
+ INSN_SEG_X86_ES,
+ INSN_SEG_X86_FS,
+ INSN_SEG_X86_GS,
+ INSN_SEG_X86_SS,
+};
+
/**
* struct annotated_insn_loc - Location info of instruction
* @ops: Array of location info for source and target operands
@@ -561,4 +524,20 @@ extern struct list_head ann_insn_stat;
u64 annotate_calc_pcrel(struct map_symbol *ms, u64 ip, int offset,
struct disasm_line *dl);
+/**
+ * struct annotated_basic_block - Basic block of instructions
+ * @list: List node
+ * @begin: start instruction in the block
+ * @end: end instruction in the block
+ */
+struct annotated_basic_block {
+ struct list_head list;
+ struct disasm_line *begin;
+ struct disasm_line *end;
+};
+
+/* Get a list of basic blocks from src to dst addresses */
+int annotate_get_basic_blocks(struct symbol *sym, s64 src, s64 dst,
+ struct list_head *head);
+
#endif /* __PERF_ANNOTATE_H */
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index 3684e6009b6350..cfa2153d4611dd 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -174,7 +174,7 @@ void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp,
struct evlist *evlist,
struct evsel *evsel, int idx)
{
- bool per_cpu = !perf_cpu_map__has_any_cpu_or_is_empty(evlist->core.user_requested_cpus);
+ bool per_cpu = !perf_cpu_map__has_any_cpu(evlist->core.user_requested_cpus);
mp->mmap_needed = evsel->needs_auxtrace_mmap;
@@ -648,7 +648,7 @@ int auxtrace_parse_snapshot_options(struct auxtrace_record *itr,
static int evlist__enable_event_idx(struct evlist *evlist, struct evsel *evsel, int idx)
{
- bool per_cpu_mmaps = !perf_cpu_map__has_any_cpu_or_is_empty(evlist->core.user_requested_cpus);
+ bool per_cpu_mmaps = !perf_cpu_map__has_any_cpu(evlist->core.user_requested_cpus);
if (per_cpu_mmaps) {
struct perf_cpu evlist_cpu = perf_cpu_map__cpu(evlist->core.all_cpus, idx);
@@ -1466,6 +1466,7 @@ int itrace_do_parse_synth_opts(struct itrace_synth_opts *synth_opts,
char *endptr;
bool period_type_set = false;
bool period_set = false;
+ bool iy = false;
synth_opts->set = true;
@@ -1484,6 +1485,7 @@ int itrace_do_parse_synth_opts(struct itrace_synth_opts *synth_opts,
switch (*p++) {
case 'i':
case 'y':
+ iy = true;
if (p[-1] == 'y')
synth_opts->cycles = true;
else
@@ -1649,7 +1651,7 @@ int itrace_do_parse_synth_opts(struct itrace_synth_opts *synth_opts,
}
}
out:
- if (synth_opts->instructions || synth_opts->cycles) {
+ if (iy) {
if (!period_type_set)
synth_opts->period_type =
PERF_ITRACE_DEFAULT_PERIOD_TYPE;
diff --git a/tools/perf/util/bpf_kwork.c b/tools/perf/util/bpf_kwork.c
index 6eb2c78fd7f4a3..44f0f708a15d6a 100644
--- a/tools/perf/util/bpf_kwork.c
+++ b/tools/perf/util/bpf_kwork.c
@@ -147,12 +147,12 @@ static bool valid_kwork_class_type(enum kwork_class_type type)
static int setup_filters(struct perf_kwork *kwork)
{
- u8 val = 1;
- int i, nr_cpus, key, fd;
- struct perf_cpu_map *map;
-
if (kwork->cpu_list != NULL) {
- fd = bpf_map__fd(skel->maps.perf_kwork_cpu_filter);
+ int idx, nr_cpus;
+ struct perf_cpu_map *map;
+ struct perf_cpu cpu;
+ int fd = bpf_map__fd(skel->maps.perf_kwork_cpu_filter);
+
if (fd < 0) {
pr_debug("Invalid cpu filter fd\n");
return -1;
@@ -165,8 +165,8 @@ static int setup_filters(struct perf_kwork *kwork)
}
nr_cpus = libbpf_num_possible_cpus();
- for (i = 0; i < perf_cpu_map__nr(map); i++) {
- struct perf_cpu cpu = perf_cpu_map__cpu(map, i);
+ perf_cpu_map__for_each_cpu(cpu, idx, map) {
+ u8 val = 1;
if (cpu.cpu >= nr_cpus) {
perf_cpu_map__put(map);
@@ -181,6 +181,8 @@ static int setup_filters(struct perf_kwork *kwork)
}
if (kwork->profile_name != NULL) {
+ int key, fd;
+
if (strlen(kwork->profile_name) >= MAX_KWORKNAME) {
pr_err("Requested name filter %s too large, limit to %d\n",
kwork->profile_name, MAX_KWORKNAME - 1);
diff --git a/tools/perf/util/bpf_kwork_top.c b/tools/perf/util/bpf_kwork_top.c
index 035e02272790a3..22a3b00a1e23f9 100644
--- a/tools/perf/util/bpf_kwork_top.c
+++ b/tools/perf/util/bpf_kwork_top.c
@@ -122,11 +122,11 @@ static bool valid_kwork_class_type(enum kwork_class_type type)
static int setup_filters(struct perf_kwork *kwork)
{
- u8 val = 1;
- int i, nr_cpus, fd;
- struct perf_cpu_map *map;
-
if (kwork->cpu_list) {
+ int idx, nr_cpus, fd;
+ struct perf_cpu_map *map;
+ struct perf_cpu cpu;
+
fd = bpf_map__fd(skel->maps.kwork_top_cpu_filter);
if (fd < 0) {
pr_debug("Invalid cpu filter fd\n");
@@ -140,8 +140,8 @@ static int setup_filters(struct perf_kwork *kwork)
}
nr_cpus = libbpf_num_possible_cpus();
- for (i = 0; i < perf_cpu_map__nr(map); i++) {
- struct perf_cpu cpu = perf_cpu_map__cpu(map, i);
+ perf_cpu_map__for_each_cpu(cpu, idx, map) {
+ u8 val = 1;
if (cpu.cpu >= nr_cpus) {
perf_cpu_map__put(map);
diff --git a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
index 2872f9bc07850b..0acbd74e8c7609 100644
--- a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
+++ b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
@@ -341,6 +341,27 @@ failure:
return 1; /* Failure: don't filter */
}
+SEC("tp/syscalls/sys_enter_nanosleep")
+int sys_enter_nanosleep(struct syscall_enter_args *args)
+{
+ struct augmented_args_payload *augmented_args = augmented_args_payload();
+ const void *req_arg = (const void *)args->args[0];
+ unsigned int len = sizeof(augmented_args->args);
+ __u32 size = sizeof(struct timespec64);
+
+ if (augmented_args == NULL)
+ goto failure;
+
+ if (size > sizeof(augmented_args->__data))
+ goto failure;
+
+ bpf_probe_read_user(&augmented_args->__data, size, req_arg);
+
+ return augmented__output(args, augmented_args, len + size);
+failure:
+ return 1; /* Failure: don't filter */
+}
+
static pid_t getpid(void)
{
return bpf_get_current_pid_tgid();
diff --git a/tools/perf/util/bpf_skel/bench_uprobe.bpf.c b/tools/perf/util/bpf_skel/bench_uprobe.bpf.c
index 2c55896bb33c31..a01c7f791fcde4 100644
--- a/tools/perf/util/bpf_skel/bench_uprobe.bpf.c
+++ b/tools/perf/util/bpf_skel/bench_uprobe.bpf.c
@@ -4,6 +4,7 @@
#include <bpf/bpf_tracing.h>
unsigned int nr_uprobes;
+unsigned int nr_uretprobes;
SEC("uprobe")
int BPF_UPROBE(empty)
@@ -20,4 +21,19 @@ int BPF_UPROBE(trace_printk)
return 0;
}
+SEC("uretprobe")
+int BPF_URETPROBE(empty_ret)
+{
+ return 0;
+}
+
+SEC("uretprobe")
+int BPF_URETPROBE(trace_printk_ret)
+{
+ char fmt[] = "perf bench uretprobe %u";
+
+ bpf_trace_printk(fmt, sizeof(fmt), ++nr_uretprobes);
+ return 0;
+}
+
char LICENSE[] SEC("license") = "Dual BSD/GPL";
diff --git a/tools/perf/util/bpf_skel/lock_contention.bpf.c b/tools/perf/util/bpf_skel/lock_contention.bpf.c
index fb54bd38e7d094..d931a898c434be 100644
--- a/tools/perf/util/bpf_skel/lock_contention.bpf.c
+++ b/tools/perf/util/bpf_skel/lock_contention.bpf.c
@@ -284,6 +284,7 @@ static inline __u32 check_lock_type(__u64 lock, __u32 flags)
struct task_struct *curr;
struct mm_struct___old *mm_old;
struct mm_struct___new *mm_new;
+ struct sighand_struct *sighand;
switch (flags) {
case LCB_F_READ: /* rwsem */
@@ -305,7 +306,9 @@ static inline __u32 check_lock_type(__u64 lock, __u32 flags)
break;
case LCB_F_SPIN: /* spinlock */
curr = bpf_get_current_task_btf();
- if (&curr->sighand->siglock == (void *)lock)
+ sighand = curr->sighand;
+
+ if (sighand && &sighand->siglock == (void *)lock)
return LCD_F_SIGHAND_LOCK;
break;
default:
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index 03c64b85383b8b..864bc26b6b4610 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -327,48 +327,56 @@ static int write_buildid(const char *name, size_t name_len, struct build_id *bid
return write_padded(fd, name, name_len + 1, len);
}
-static int machine__write_buildid_table(struct machine *machine,
- struct feat_fd *fd)
+struct machine__write_buildid_table_cb_args {
+ struct machine *machine;
+ struct feat_fd *fd;
+ u16 kmisc, umisc;
+};
+
+static int machine__write_buildid_table_cb(struct dso *dso, void *data)
{
- int err = 0;
- struct dso *pos;
- u16 kmisc = PERF_RECORD_MISC_KERNEL,
- umisc = PERF_RECORD_MISC_USER;
+ struct machine__write_buildid_table_cb_args *args = data;
+ const char *name;
+ size_t name_len;
+ bool in_kernel = false;
- if (!machine__is_host(machine)) {
- kmisc = PERF_RECORD_MISC_GUEST_KERNEL;
- umisc = PERF_RECORD_MISC_GUEST_USER;
- }
+ if (!dso->has_build_id)
+ return 0;
- dsos__for_each_with_build_id(pos, &machine->dsos.head) {
- const char *name;
- size_t name_len;
- bool in_kernel = false;
+ if (!dso->hit && !dso__is_vdso(dso))
+ return 0;
- if (!pos->hit && !dso__is_vdso(pos))
- continue;
+ if (dso__is_vdso(dso)) {
+ name = dso->short_name;
+ name_len = dso->short_name_len;
+ } else if (dso__is_kcore(dso)) {
+ name = args->machine->mmap_name;
+ name_len = strlen(name);
+ } else {
+ name = dso->long_name;
+ name_len = dso->long_name_len;
+ }
- if (dso__is_vdso(pos)) {
- name = pos->short_name;
- name_len = pos->short_name_len;
- } else if (dso__is_kcore(pos)) {
- name = machine->mmap_name;
- name_len = strlen(name);
- } else {
- name = pos->long_name;
- name_len = pos->long_name_len;
- }
+ in_kernel = dso->kernel || is_kernel_module(name, PERF_RECORD_MISC_CPUMODE_UNKNOWN);
+ return write_buildid(name, name_len, &dso->bid, args->machine->pid,
+ in_kernel ? args->kmisc : args->umisc, args->fd);
+}
- in_kernel = pos->kernel ||
- is_kernel_module(name,
- PERF_RECORD_MISC_CPUMODE_UNKNOWN);
- err = write_buildid(name, name_len, &pos->bid, machine->pid,
- in_kernel ? kmisc : umisc, fd);
- if (err)
- break;
+static int machine__write_buildid_table(struct machine *machine, struct feat_fd *fd)
+{
+ struct machine__write_buildid_table_cb_args args = {
+ .machine = machine,
+ .fd = fd,
+ .kmisc = PERF_RECORD_MISC_KERNEL,
+ .umisc = PERF_RECORD_MISC_USER,
+ };
+
+ if (!machine__is_host(machine)) {
+ args.kmisc = PERF_RECORD_MISC_GUEST_KERNEL;
+ args.umisc = PERF_RECORD_MISC_GUEST_USER;
}
- return err;
+ return dsos__for_each_dso(&machine->dsos, machine__write_buildid_table_cb, &args);
}
int perf_session__write_buildid_table(struct perf_session *session,
@@ -390,42 +398,6 @@ int perf_session__write_buildid_table(struct perf_session *session,
return err;
}
-static int __dsos__hit_all(struct list_head *head)
-{
- struct dso *pos;
-
- list_for_each_entry(pos, head, node)
- pos->hit = true;
-
- return 0;
-}
-
-static int machine__hit_all_dsos(struct machine *machine)
-{
- return __dsos__hit_all(&machine->dsos.head);
-}
-
-int dsos__hit_all(struct perf_session *session)
-{
- struct rb_node *nd;
- int err;
-
- err = machine__hit_all_dsos(&session->machines.host);
- if (err)
- return err;
-
- for (nd = rb_first_cached(&session->machines.guests); nd;
- nd = rb_next(nd)) {
- struct machine *pos = rb_entry(nd, struct machine, rb_node);
-
- err = machine__hit_all_dsos(pos);
- if (err)
- return err;
- }
-
- return 0;
-}
-
void disable_buildid_cache(void)
{
no_buildid_cache = true;
@@ -992,7 +964,7 @@ int perf_session__cache_build_ids(struct perf_session *session)
static bool machine__read_build_ids(struct machine *machine, bool with_hits)
{
- return __dsos__read_build_ids(&machine->dsos.head, with_hits);
+ return dsos__read_build_ids(&machine->dsos, with_hits);
}
bool perf_session__read_build_ids(struct perf_session *session, bool with_hits)
diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h
index 4e3a1169379b8c..3fa8bffb07cae4 100644
--- a/tools/perf/util/build-id.h
+++ b/tools/perf/util/build-id.h
@@ -39,8 +39,6 @@ int build_id__mark_dso_hit(struct perf_tool *tool, union perf_event *event,
struct perf_sample *sample, struct evsel *evsel,
struct machine *machine);
-int dsos__hit_all(struct perf_session *session);
-
int perf_event__inject_buildid(struct perf_tool *tool, union perf_event *event,
struct perf_sample *sample, struct evsel *evsel,
struct machine *machine);
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index 356e30c42cd838..6a270d640acb90 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -655,10 +655,10 @@ static char hex_char(unsigned char val)
size_t cpu_map__snprint_mask(struct perf_cpu_map *map, char *buf, size_t size)
{
- int i, cpu;
+ int idx;
char *ptr = buf;
unsigned char *bitmap;
- struct perf_cpu last_cpu = perf_cpu_map__cpu(map, perf_cpu_map__nr(map) - 1);
+ struct perf_cpu c, last_cpu = perf_cpu_map__max(map);
if (buf == NULL)
return 0;
@@ -669,12 +669,10 @@ size_t cpu_map__snprint_mask(struct perf_cpu_map *map, char *buf, size_t size)
return 0;
}
- for (i = 0; i < perf_cpu_map__nr(map); i++) {
- cpu = perf_cpu_map__cpu(map, i).cpu;
- bitmap[cpu / 8] |= 1 << (cpu % 8);
- }
+ perf_cpu_map__for_each_cpu(c, idx, map)
+ bitmap[c.cpu / 8] |= 1 << (c.cpu % 8);
- for (cpu = last_cpu.cpu / 4 * 4; cpu >= 0; cpu -= 4) {
+ for (int cpu = last_cpu.cpu / 4 * 4; cpu >= 0; cpu -= 4) {
unsigned char bits = bitmap[cpu / 8];
if (cpu % 8)
diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c
index c39ee0fcb8cf1d..d633d15329fa09 100644
--- a/tools/perf/util/debug.c
+++ b/tools/perf/util/debug.c
@@ -41,6 +41,7 @@ static int redirect_to_stderr;
int debug_data_convert;
static FILE *_debug_file;
bool debug_display_time;
+int debug_type_profile;
FILE *debug_file(void)
{
@@ -231,6 +232,7 @@ static struct sublevel_option debug_opts[] = {
{ .name = "data-convert", .value_ptr = &debug_data_convert },
{ .name = "perf-event-open", .value_ptr = &debug_peo_args },
{ .name = "kmaps", .value_ptr = &debug_kmaps },
+ { .name = "type-profile", .value_ptr = &debug_type_profile },
{ .name = NULL, }
};
@@ -270,6 +272,7 @@ int perf_quiet_option(void)
redirect_to_stderr = 0;
debug_peo_args = 0;
debug_kmaps = 0;
+ debug_type_profile = 0;
return 0;
}
diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h
index 35a7a5ae762e6f..a4026d1fd6a311 100644
--- a/tools/perf/util/debug.h
+++ b/tools/perf/util/debug.h
@@ -14,6 +14,7 @@ extern int debug_peo_args;
extern bool quiet, dump_trace;
extern int debug_ordered_events;
extern int debug_data_convert;
+extern int debug_type_profile;
#ifndef pr_fmt
#define pr_fmt(fmt) fmt
diff --git a/tools/perf/util/disasm.c b/tools/perf/util/disasm.c
new file mode 100644
index 00000000000000..6d1125e687b71e
--- /dev/null
+++ b/tools/perf/util/disasm.c
@@ -0,0 +1,1837 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <libgen.h>
+#include <regex.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <linux/string.h>
+#include <subcmd/run-command.h>
+
+#include "annotate.h"
+#include "build-id.h"
+#include "debug.h"
+#include "disasm.h"
+#include "dso.h"
+#include "env.h"
+#include "evsel.h"
+#include "map.h"
+#include "maps.h"
+#include "namespaces.h"
+#include "srcline.h"
+#include "symbol.h"
+#include "util.h"
+
+static regex_t file_lineno;
+
+/* These can be referred from the arch-dependent code */
+static struct ins_ops call_ops;
+static struct ins_ops dec_ops;
+static struct ins_ops jump_ops;
+static struct ins_ops mov_ops;
+static struct ins_ops nop_ops;
+static struct ins_ops lock_ops;
+static struct ins_ops ret_ops;
+
+static int jump__scnprintf(struct ins *ins, char *bf, size_t size,
+ struct ins_operands *ops, int max_ins_name);
+static int call__scnprintf(struct ins *ins, char *bf, size_t size,
+ struct ins_operands *ops, int max_ins_name);
+
+static void ins__sort(struct arch *arch);
+static int disasm_line__parse(char *line, const char **namep, char **rawp);
+
+static __attribute__((constructor)) void symbol__init_regexpr(void)
+{
+ regcomp(&file_lineno, "^/[^:]+:([0-9]+)", REG_EXTENDED);
+}
+
+static int arch__grow_instructions(struct arch *arch)
+{
+ struct ins *new_instructions;
+ size_t new_nr_allocated;
+
+ if (arch->nr_instructions_allocated == 0 && arch->instructions)
+ goto grow_from_non_allocated_table;
+
+ new_nr_allocated = arch->nr_instructions_allocated + 128;
+ new_instructions = realloc(arch->instructions, new_nr_allocated * sizeof(struct ins));
+ if (new_instructions == NULL)
+ return -1;
+
+out_update_instructions:
+ arch->instructions = new_instructions;
+ arch->nr_instructions_allocated = new_nr_allocated;
+ return 0;
+
+grow_from_non_allocated_table:
+ new_nr_allocated = arch->nr_instructions + 128;
+ new_instructions = calloc(new_nr_allocated, sizeof(struct ins));
+ if (new_instructions == NULL)
+ return -1;
+
+ memcpy(new_instructions, arch->instructions, arch->nr_instructions);
+ goto out_update_instructions;
+}
+
+static int arch__associate_ins_ops(struct arch* arch, const char *name, struct ins_ops *ops)
+{
+ struct ins *ins;
+
+ if (arch->nr_instructions == arch->nr_instructions_allocated &&
+ arch__grow_instructions(arch))
+ return -1;
+
+ ins = &arch->instructions[arch->nr_instructions];
+ ins->name = strdup(name);
+ if (!ins->name)
+ return -1;
+
+ ins->ops = ops;
+ arch->nr_instructions++;
+
+ ins__sort(arch);
+ return 0;
+}
+
+#include "arch/arc/annotate/instructions.c"
+#include "arch/arm/annotate/instructions.c"
+#include "arch/arm64/annotate/instructions.c"
+#include "arch/csky/annotate/instructions.c"
+#include "arch/loongarch/annotate/instructions.c"
+#include "arch/mips/annotate/instructions.c"
+#include "arch/x86/annotate/instructions.c"
+#include "arch/powerpc/annotate/instructions.c"
+#include "arch/riscv64/annotate/instructions.c"
+#include "arch/s390/annotate/instructions.c"
+#include "arch/sparc/annotate/instructions.c"
+
+static struct arch architectures[] = {
+ {
+ .name = "arc",
+ .init = arc__annotate_init,
+ },
+ {
+ .name = "arm",
+ .init = arm__annotate_init,
+ },
+ {
+ .name = "arm64",
+ .init = arm64__annotate_init,
+ },
+ {
+ .name = "csky",
+ .init = csky__annotate_init,
+ },
+ {
+ .name = "mips",
+ .init = mips__annotate_init,
+ .objdump = {
+ .comment_char = '#',
+ },
+ },
+ {
+ .name = "x86",
+ .init = x86__annotate_init,
+ .instructions = x86__instructions,
+ .nr_instructions = ARRAY_SIZE(x86__instructions),
+ .insn_suffix = "bwlq",
+ .objdump = {
+ .comment_char = '#',
+ .register_char = '%',
+ .memory_ref_char = '(',
+ .imm_char = '$',
+ },
+ },
+ {
+ .name = "powerpc",
+ .init = powerpc__annotate_init,
+ },
+ {
+ .name = "riscv64",
+ .init = riscv64__annotate_init,
+ },
+ {
+ .name = "s390",
+ .init = s390__annotate_init,
+ .objdump = {
+ .comment_char = '#',
+ },
+ },
+ {
+ .name = "sparc",
+ .init = sparc__annotate_init,
+ .objdump = {
+ .comment_char = '#',
+ },
+ },
+ {
+ .name = "loongarch",
+ .init = loongarch__annotate_init,
+ .objdump = {
+ .comment_char = '#',
+ },
+ },
+};
+
+static int arch__key_cmp(const void *name, const void *archp)
+{
+ const struct arch *arch = archp;
+
+ return strcmp(name, arch->name);
+}
+
+static int arch__cmp(const void *a, const void *b)
+{
+ const struct arch *aa = a;
+ const struct arch *ab = b;
+
+ return strcmp(aa->name, ab->name);
+}
+
+static void arch__sort(void)
+{
+ const int nmemb = ARRAY_SIZE(architectures);
+
+ qsort(architectures, nmemb, sizeof(struct arch), arch__cmp);
+}
+
+struct arch *arch__find(const char *name)
+{
+ const int nmemb = ARRAY_SIZE(architectures);
+ static bool sorted;
+
+ if (!sorted) {
+ arch__sort();
+ sorted = true;
+ }
+
+ return bsearch(name, architectures, nmemb, sizeof(struct arch), arch__key_cmp);
+}
+
+bool arch__is(struct arch *arch, const char *name)
+{
+ return !strcmp(arch->name, name);
+}
+
+static void ins_ops__delete(struct ins_operands *ops)
+{
+ if (ops == NULL)
+ return;
+ zfree(&ops->source.raw);
+ zfree(&ops->source.name);
+ zfree(&ops->target.raw);
+ zfree(&ops->target.name);
+}
+
+static int ins__raw_scnprintf(struct ins *ins, char *bf, size_t size,
+ struct ins_operands *ops, int max_ins_name)
+{
+ return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, ops->raw);
+}
+
+int ins__scnprintf(struct ins *ins, char *bf, size_t size,
+ struct ins_operands *ops, int max_ins_name)
+{
+ if (ins->ops->scnprintf)
+ return ins->ops->scnprintf(ins, bf, size, ops, max_ins_name);
+
+ return ins__raw_scnprintf(ins, bf, size, ops, max_ins_name);
+}
+
+bool ins__is_fused(struct arch *arch, const char *ins1, const char *ins2)
+{
+ if (!arch || !arch->ins_is_fused)
+ return false;
+
+ return arch->ins_is_fused(arch, ins1, ins2);
+}
+
+static int call__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms)
+{
+ char *endptr, *tok, *name;
+ struct map *map = ms->map;
+ struct addr_map_symbol target = {
+ .ms = { .map = map, },
+ };
+
+ ops->target.addr = strtoull(ops->raw, &endptr, 16);
+
+ name = strchr(endptr, '<');
+ if (name == NULL)
+ goto indirect_call;
+
+ name++;
+
+ if (arch->objdump.skip_functions_char &&
+ strchr(name, arch->objdump.skip_functions_char))
+ return -1;
+
+ tok = strchr(name, '>');
+ if (tok == NULL)
+ return -1;
+
+ *tok = '\0';
+ ops->target.name = strdup(name);
+ *tok = '>';
+
+ if (ops->target.name == NULL)
+ return -1;
+find_target:
+ target.addr = map__objdump_2mem(map, ops->target.addr);
+
+ if (maps__find_ams(ms->maps, &target) == 0 &&
+ map__rip_2objdump(target.ms.map, map__map_ip(target.ms.map, target.addr)) == ops->target.addr)
+ ops->target.sym = target.ms.sym;
+
+ return 0;
+
+indirect_call:
+ tok = strchr(endptr, '*');
+ if (tok != NULL) {
+ endptr++;
+
+ /* Indirect call can use a non-rip register and offset: callq *0x8(%rbx).
+ * Do not parse such instruction. */
+ if (strstr(endptr, "(%r") == NULL)
+ ops->target.addr = strtoull(endptr, NULL, 16);
+ }
+ goto find_target;
+}
+
+static int call__scnprintf(struct ins *ins, char *bf, size_t size,
+ struct ins_operands *ops, int max_ins_name)
+{
+ if (ops->target.sym)
+ return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, ops->target.sym->name);
+
+ if (ops->target.addr == 0)
+ return ins__raw_scnprintf(ins, bf, size, ops, max_ins_name);
+
+ if (ops->target.name)
+ return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, ops->target.name);
+
+ return scnprintf(bf, size, "%-*s *%" PRIx64, max_ins_name, ins->name, ops->target.addr);
+}
+
+static struct ins_ops call_ops = {
+ .parse = call__parse,
+ .scnprintf = call__scnprintf,
+};
+
+bool ins__is_call(const struct ins *ins)
+{
+ return ins->ops == &call_ops || ins->ops == &s390_call_ops || ins->ops == &loongarch_call_ops;
+}
+
+/*
+ * Prevents from matching commas in the comment section, e.g.:
+ * ffff200008446e70: b.cs ffff2000084470f4 <generic_exec_single+0x314> // b.hs, b.nlast
+ *
+ * and skip comma as part of function arguments, e.g.:
+ * 1d8b4ac <linemap_lookup(line_maps const*, unsigned int)+0xcc>
+ */
+static inline const char *validate_comma(const char *c, struct ins_operands *ops)
+{
+ if (ops->jump.raw_comment && c > ops->jump.raw_comment)
+ return NULL;
+
+ if (ops->jump.raw_func_start && c > ops->jump.raw_func_start)
+ return NULL;
+
+ return c;
+}
+
+static int jump__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms)
+{
+ struct map *map = ms->map;
+ struct symbol *sym = ms->sym;
+ struct addr_map_symbol target = {
+ .ms = { .map = map, },
+ };
+ const char *c = strchr(ops->raw, ',');
+ u64 start, end;
+
+ ops->jump.raw_comment = strchr(ops->raw, arch->objdump.comment_char);
+ ops->jump.raw_func_start = strchr(ops->raw, '<');
+
+ c = validate_comma(c, ops);
+
+ /*
+ * Examples of lines to parse for the _cpp_lex_token@@Base
+ * function:
+ *
+ * 1159e6c: jne 115aa32 <_cpp_lex_token@@Base+0xf92>
+ * 1159e8b: jne c469be <cpp_named_operator2name@@Base+0xa72>
+ *
+ * The first is a jump to an offset inside the same function,
+ * the second is to another function, i.e. that 0xa72 is an
+ * offset in the cpp_named_operator2name@@base function.
+ */
+ /*
+ * skip over possible up to 2 operands to get to address, e.g.:
+ * tbnz w0, #26, ffff0000083cd190 <security_file_permission+0xd0>
+ */
+ if (c++ != NULL) {
+ ops->target.addr = strtoull(c, NULL, 16);
+ if (!ops->target.addr) {
+ c = strchr(c, ',');
+ c = validate_comma(c, ops);
+ if (c++ != NULL)
+ ops->target.addr = strtoull(c, NULL, 16);
+ }
+ } else {
+ ops->target.addr = strtoull(ops->raw, NULL, 16);
+ }
+
+ target.addr = map__objdump_2mem(map, ops->target.addr);
+ start = map__unmap_ip(map, sym->start);
+ end = map__unmap_ip(map, sym->end);
+
+ ops->target.outside = target.addr < start || target.addr > end;
+
+ /*
+ * FIXME: things like this in _cpp_lex_token (gcc's cc1 program):
+
+ cpp_named_operator2name@@Base+0xa72
+
+ * Point to a place that is after the cpp_named_operator2name
+ * boundaries, i.e. in the ELF symbol table for cc1
+ * cpp_named_operator2name is marked as being 32-bytes long, but it in
+ * fact is much larger than that, so we seem to need a symbols__find()
+ * routine that looks for >= current->start and < next_symbol->start,
+ * possibly just for C++ objects?
+ *
+ * For now lets just make some progress by marking jumps to outside the
+ * current function as call like.
+ *
+ * Actual navigation will come next, with further understanding of how
+ * the symbol searching and disassembly should be done.
+ */
+ if (maps__find_ams(ms->maps, &target) == 0 &&
+ map__rip_2objdump(target.ms.map, map__map_ip(target.ms.map, target.addr)) == ops->target.addr)
+ ops->target.sym = target.ms.sym;
+
+ if (!ops->target.outside) {
+ ops->target.offset = target.addr - start;
+ ops->target.offset_avail = true;
+ } else {
+ ops->target.offset_avail = false;
+ }
+
+ return 0;
+}
+
+static int jump__scnprintf(struct ins *ins, char *bf, size_t size,
+ struct ins_operands *ops, int max_ins_name)
+{
+ const char *c;
+
+ if (!ops->target.addr || ops->target.offset < 0)
+ return ins__raw_scnprintf(ins, bf, size, ops, max_ins_name);
+
+ if (ops->target.outside && ops->target.sym != NULL)
+ return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, ops->target.sym->name);
+
+ c = strchr(ops->raw, ',');
+ c = validate_comma(c, ops);
+
+ if (c != NULL) {
+ const char *c2 = strchr(c + 1, ',');
+
+ c2 = validate_comma(c2, ops);
+ /* check for 3-op insn */
+ if (c2 != NULL)
+ c = c2;
+ c++;
+
+ /* mirror arch objdump's space-after-comma style */
+ if (*c == ' ')
+ c++;
+ }
+
+ return scnprintf(bf, size, "%-*s %.*s%" PRIx64, max_ins_name,
+ ins->name, c ? c - ops->raw : 0, ops->raw,
+ ops->target.offset);
+}
+
+static void jump__delete(struct ins_operands *ops __maybe_unused)
+{
+ /*
+ * The ops->jump.raw_comment and ops->jump.raw_func_start belong to the
+ * raw string, don't free them.
+ */
+}
+
+static struct ins_ops jump_ops = {
+ .free = jump__delete,
+ .parse = jump__parse,
+ .scnprintf = jump__scnprintf,
+};
+
+bool ins__is_jump(const struct ins *ins)
+{
+ return ins->ops == &jump_ops || ins->ops == &loongarch_jump_ops;
+}
+
+static int comment__symbol(char *raw, char *comment, u64 *addrp, char **namep)
+{
+ char *endptr, *name, *t;
+
+ if (strstr(raw, "(%rip)") == NULL)
+ return 0;
+
+ *addrp = strtoull(comment, &endptr, 16);
+ if (endptr == comment)
+ return 0;
+ name = strchr(endptr, '<');
+ if (name == NULL)
+ return -1;
+
+ name++;
+
+ t = strchr(name, '>');
+ if (t == NULL)
+ return 0;
+
+ *t = '\0';
+ *namep = strdup(name);
+ *t = '>';
+
+ return 0;
+}
+
+static int lock__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms)
+{
+ ops->locked.ops = zalloc(sizeof(*ops->locked.ops));
+ if (ops->locked.ops == NULL)
+ return 0;
+
+ if (disasm_line__parse(ops->raw, &ops->locked.ins.name, &ops->locked.ops->raw) < 0)
+ goto out_free_ops;
+
+ ops->locked.ins.ops = ins__find(arch, ops->locked.ins.name);
+
+ if (ops->locked.ins.ops == NULL)
+ goto out_free_ops;
+
+ if (ops->locked.ins.ops->parse &&
+ ops->locked.ins.ops->parse(arch, ops->locked.ops, ms) < 0)
+ goto out_free_ops;
+
+ return 0;
+
+out_free_ops:
+ zfree(&ops->locked.ops);
+ return 0;
+}
+
+static int lock__scnprintf(struct ins *ins, char *bf, size_t size,
+ struct ins_operands *ops, int max_ins_name)
+{
+ int printed;
+
+ if (ops->locked.ins.ops == NULL)
+ return ins__raw_scnprintf(ins, bf, size, ops, max_ins_name);
+
+ printed = scnprintf(bf, size, "%-*s ", max_ins_name, ins->name);
+ return printed + ins__scnprintf(&ops->locked.ins, bf + printed,
+ size - printed, ops->locked.ops, max_ins_name);
+}
+
+static void lock__delete(struct ins_operands *ops)
+{
+ struct ins *ins = &ops->locked.ins;
+
+ if (ins->ops && ins->ops->free)
+ ins->ops->free(ops->locked.ops);
+ else
+ ins_ops__delete(ops->locked.ops);
+
+ zfree(&ops->locked.ops);
+ zfree(&ops->target.raw);
+ zfree(&ops->target.name);
+}
+
+static struct ins_ops lock_ops = {
+ .free = lock__delete,
+ .parse = lock__parse,
+ .scnprintf = lock__scnprintf,
+};
+
+/*
+ * Check if the operand has more than one registers like x86 SIB addressing:
+ * 0x1234(%rax, %rbx, 8)
+ *
+ * But it doesn't care segment selectors like %gs:0x5678(%rcx), so just check
+ * the input string after 'memory_ref_char' if exists.
+ */
+static bool check_multi_regs(struct arch *arch, const char *op)
+{
+ int count = 0;
+
+ if (arch->objdump.register_char == 0)
+ return false;
+
+ if (arch->objdump.memory_ref_char) {
+ op = strchr(op, arch->objdump.memory_ref_char);
+ if (op == NULL)
+ return false;
+ }
+
+ while ((op = strchr(op, arch->objdump.register_char)) != NULL) {
+ count++;
+ op++;
+ }
+
+ return count > 1;
+}
+
+static int mov__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms __maybe_unused)
+{
+ char *s = strchr(ops->raw, ','), *target, *comment, prev;
+
+ if (s == NULL)
+ return -1;
+
+ *s = '\0';
+
+ /*
+ * x86 SIB addressing has something like 0x8(%rax, %rcx, 1)
+ * then it needs to have the closing parenthesis.
+ */
+ if (strchr(ops->raw, '(')) {
+ *s = ',';
+ s = strchr(ops->raw, ')');
+ if (s == NULL || s[1] != ',')
+ return -1;
+ *++s = '\0';
+ }
+
+ ops->source.raw = strdup(ops->raw);
+ *s = ',';
+
+ if (ops->source.raw == NULL)
+ return -1;
+
+ ops->source.multi_regs = check_multi_regs(arch, ops->source.raw);
+
+ target = skip_spaces(++s);
+ comment = strchr(s, arch->objdump.comment_char);
+
+ if (comment != NULL)
+ s = comment - 1;
+ else
+ s = strchr(s, '\0') - 1;
+
+ while (s > target && isspace(s[0]))
+ --s;
+ s++;
+ prev = *s;
+ *s = '\0';
+
+ ops->target.raw = strdup(target);
+ *s = prev;
+
+ if (ops->target.raw == NULL)
+ goto out_free_source;
+
+ ops->target.multi_regs = check_multi_regs(arch, ops->target.raw);
+
+ if (comment == NULL)
+ return 0;
+
+ comment = skip_spaces(comment);
+ comment__symbol(ops->source.raw, comment + 1, &ops->source.addr, &ops->source.name);
+ comment__symbol(ops->target.raw, comment + 1, &ops->target.addr, &ops->target.name);
+
+ return 0;
+
+out_free_source:
+ zfree(&ops->source.raw);
+ return -1;
+}
+
+static int mov__scnprintf(struct ins *ins, char *bf, size_t size,
+ struct ins_operands *ops, int max_ins_name)
+{
+ return scnprintf(bf, size, "%-*s %s,%s", max_ins_name, ins->name,
+ ops->source.name ?: ops->source.raw,
+ ops->target.name ?: ops->target.raw);
+}
+
+static struct ins_ops mov_ops = {
+ .parse = mov__parse,
+ .scnprintf = mov__scnprintf,
+};
+
+static int dec__parse(struct arch *arch __maybe_unused, struct ins_operands *ops, struct map_symbol *ms __maybe_unused)
+{
+ char *target, *comment, *s, prev;
+
+ target = s = ops->raw;
+
+ while (s[0] != '\0' && !isspace(s[0]))
+ ++s;
+ prev = *s;
+ *s = '\0';
+
+ ops->target.raw = strdup(target);
+ *s = prev;
+
+ if (ops->target.raw == NULL)
+ return -1;
+
+ comment = strchr(s, arch->objdump.comment_char);
+ if (comment == NULL)
+ return 0;
+
+ comment = skip_spaces(comment);
+ comment__symbol(ops->target.raw, comment + 1, &ops->target.addr, &ops->target.name);
+
+ return 0;
+}
+
+static int dec__scnprintf(struct ins *ins, char *bf, size_t size,
+ struct ins_operands *ops, int max_ins_name)
+{
+ return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name,
+ ops->target.name ?: ops->target.raw);
+}
+
+static struct ins_ops dec_ops = {
+ .parse = dec__parse,
+ .scnprintf = dec__scnprintf,
+};
+
+static int nop__scnprintf(struct ins *ins __maybe_unused, char *bf, size_t size,
+ struct ins_operands *ops __maybe_unused, int max_ins_name)
+{
+ return scnprintf(bf, size, "%-*s", max_ins_name, "nop");
+}
+
+static struct ins_ops nop_ops = {
+ .scnprintf = nop__scnprintf,
+};
+
+static struct ins_ops ret_ops = {
+ .scnprintf = ins__raw_scnprintf,
+};
+
+bool ins__is_nop(const struct ins *ins)
+{
+ return ins->ops == &nop_ops;
+}
+
+bool ins__is_ret(const struct ins *ins)
+{
+ return ins->ops == &ret_ops;
+}
+
+bool ins__is_lock(const struct ins *ins)
+{
+ return ins->ops == &lock_ops;
+}
+
+static int ins__key_cmp(const void *name, const void *insp)
+{
+ const struct ins *ins = insp;
+
+ return strcmp(name, ins->name);
+}
+
+static int ins__cmp(const void *a, const void *b)
+{
+ const struct ins *ia = a;
+ const struct ins *ib = b;
+
+ return strcmp(ia->name, ib->name);
+}
+
+static void ins__sort(struct arch *arch)
+{
+ const int nmemb = arch->nr_instructions;
+
+ qsort(arch->instructions, nmemb, sizeof(struct ins), ins__cmp);
+}
+
+static struct ins_ops *__ins__find(struct arch *arch, const char *name)
+{
+ struct ins *ins;
+ const int nmemb = arch->nr_instructions;
+
+ if (!arch->sorted_instructions) {
+ ins__sort(arch);
+ arch->sorted_instructions = true;
+ }
+
+ ins = bsearch(name, arch->instructions, nmemb, sizeof(struct ins), ins__key_cmp);
+ if (ins)
+ return ins->ops;
+
+ if (arch->insn_suffix) {
+ char tmp[32];
+ char suffix;
+ size_t len = strlen(name);
+
+ if (len == 0 || len >= sizeof(tmp))
+ return NULL;
+
+ suffix = name[len - 1];
+ if (strchr(arch->insn_suffix, suffix) == NULL)
+ return NULL;
+
+ strcpy(tmp, name);
+ tmp[len - 1] = '\0'; /* remove the suffix and check again */
+
+ ins = bsearch(tmp, arch->instructions, nmemb, sizeof(struct ins), ins__key_cmp);
+ }
+ return ins ? ins->ops : NULL;
+}
+
+struct ins_ops *ins__find(struct arch *arch, const char *name)
+{
+ struct ins_ops *ops = __ins__find(arch, name);
+
+ if (!ops && arch->associate_instruction_ops)
+ ops = arch->associate_instruction_ops(arch, name);
+
+ return ops;
+}
+
+static void disasm_line__init_ins(struct disasm_line *dl, struct arch *arch, struct map_symbol *ms)
+{
+ dl->ins.ops = ins__find(arch, dl->ins.name);
+
+ if (!dl->ins.ops)
+ return;
+
+ if (dl->ins.ops->parse && dl->ins.ops->parse(arch, &dl->ops, ms) < 0)
+ dl->ins.ops = NULL;
+}
+
+static int disasm_line__parse(char *line, const char **namep, char **rawp)
+{
+ char tmp, *name = skip_spaces(line);
+
+ if (name[0] == '\0')
+ return -1;
+
+ *rawp = name + 1;
+
+ while ((*rawp)[0] != '\0' && !isspace((*rawp)[0]))
+ ++*rawp;
+
+ tmp = (*rawp)[0];
+ (*rawp)[0] = '\0';
+ *namep = strdup(name);
+
+ if (*namep == NULL)
+ goto out;
+
+ (*rawp)[0] = tmp;
+ *rawp = strim(*rawp);
+
+ return 0;
+
+out:
+ return -1;
+}
+
+static void annotation_line__init(struct annotation_line *al,
+ struct annotate_args *args,
+ int nr)
+{
+ al->offset = args->offset;
+ al->line = strdup(args->line);
+ al->line_nr = args->line_nr;
+ al->fileloc = args->fileloc;
+ al->data_nr = nr;
+}
+
+static void annotation_line__exit(struct annotation_line *al)
+{
+ zfree_srcline(&al->path);
+ zfree(&al->line);
+ zfree(&al->cycles);
+}
+
+static size_t disasm_line_size(int nr)
+{
+ struct annotation_line *al;
+
+ return (sizeof(struct disasm_line) + (sizeof(al->data[0]) * nr));
+}
+
+/*
+ * Allocating the disasm annotation line data with
+ * following structure:
+ *
+ * -------------------------------------------
+ * struct disasm_line | struct annotation_line
+ * -------------------------------------------
+ *
+ * We have 'struct annotation_line' member as last member
+ * of 'struct disasm_line' to have an easy access.
+ */
+struct disasm_line *disasm_line__new(struct annotate_args *args)
+{
+ struct disasm_line *dl = NULL;
+ int nr = 1;
+
+ if (evsel__is_group_event(args->evsel))
+ nr = args->evsel->core.nr_members;
+
+ dl = zalloc(disasm_line_size(nr));
+ if (!dl)
+ return NULL;
+
+ annotation_line__init(&dl->al, args, nr);
+ if (dl->al.line == NULL)
+ goto out_delete;
+
+ if (args->offset != -1) {
+ if (disasm_line__parse(dl->al.line, &dl->ins.name, &dl->ops.raw) < 0)
+ goto out_free_line;
+
+ disasm_line__init_ins(dl, args->arch, &args->ms);
+ }
+
+ return dl;
+
+out_free_line:
+ zfree(&dl->al.line);
+out_delete:
+ free(dl);
+ return NULL;
+}
+
+void disasm_line__free(struct disasm_line *dl)
+{
+ if (dl->ins.ops && dl->ins.ops->free)
+ dl->ins.ops->free(&dl->ops);
+ else
+ ins_ops__delete(&dl->ops);
+ zfree(&dl->ins.name);
+ annotation_line__exit(&dl->al);
+ free(dl);
+}
+
+int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw, int max_ins_name)
+{
+ if (raw || !dl->ins.ops)
+ return scnprintf(bf, size, "%-*s %s", max_ins_name, dl->ins.name, dl->ops.raw);
+
+ return ins__scnprintf(&dl->ins, bf, size, &dl->ops, max_ins_name);
+}
+
+/*
+ * symbol__parse_objdump_line() parses objdump output (with -d --no-show-raw)
+ * which looks like following
+ *
+ * 0000000000415500 <_init>:
+ * 415500: sub $0x8,%rsp
+ * 415504: mov 0x2f5ad5(%rip),%rax # 70afe0 <_DYNAMIC+0x2f8>
+ * 41550b: test %rax,%rax
+ * 41550e: je 415515 <_init+0x15>
+ * 415510: callq 416e70 <__gmon_start__@plt>
+ * 415515: add $0x8,%rsp
+ * 415519: retq
+ *
+ * it will be parsed and saved into struct disasm_line as
+ * <offset> <name> <ops.raw>
+ *
+ * The offset will be a relative offset from the start of the symbol and -1
+ * means that it's not a disassembly line so should be treated differently.
+ * The ops.raw part will be parsed further according to type of the instruction.
+ */
+static int symbol__parse_objdump_line(struct symbol *sym,
+ struct annotate_args *args,
+ char *parsed_line, int *line_nr, char **fileloc)
+{
+ struct map *map = args->ms.map;
+ struct annotation *notes = symbol__annotation(sym);
+ struct disasm_line *dl;
+ char *tmp;
+ s64 line_ip, offset = -1;
+ regmatch_t match[2];
+
+ /* /filename:linenr ? Save line number and ignore. */
+ if (regexec(&file_lineno, parsed_line, 2, match, 0) == 0) {
+ *line_nr = atoi(parsed_line + match[1].rm_so);
+ free(*fileloc);
+ *fileloc = strdup(parsed_line);
+ return 0;
+ }
+
+ /* Process hex address followed by ':'. */
+ line_ip = strtoull(parsed_line, &tmp, 16);
+ if (parsed_line != tmp && tmp[0] == ':' && tmp[1] != '\0') {
+ u64 start = map__rip_2objdump(map, sym->start),
+ end = map__rip_2objdump(map, sym->end);
+
+ offset = line_ip - start;
+ if ((u64)line_ip < start || (u64)line_ip >= end)
+ offset = -1;
+ else
+ parsed_line = tmp + 1;
+ }
+
+ args->offset = offset;
+ args->line = parsed_line;
+ args->line_nr = *line_nr;
+ args->fileloc = *fileloc;
+ args->ms.sym = sym;
+
+ dl = disasm_line__new(args);
+ (*line_nr)++;
+
+ if (dl == NULL)
+ return -1;
+
+ if (!disasm_line__has_local_offset(dl)) {
+ dl->ops.target.offset = dl->ops.target.addr -
+ map__rip_2objdump(map, sym->start);
+ dl->ops.target.offset_avail = true;
+ }
+
+ /* kcore has no symbols, so add the call target symbol */
+ if (dl->ins.ops && ins__is_call(&dl->ins) && !dl->ops.target.sym) {
+ struct addr_map_symbol target = {
+ .addr = dl->ops.target.addr,
+ .ms = { .map = map, },
+ };
+
+ if (!maps__find_ams(args->ms.maps, &target) &&
+ target.ms.sym->start == target.al_addr)
+ dl->ops.target.sym = target.ms.sym;
+ }
+
+ annotation_line__add(&dl->al, &notes->src->source);
+ return 0;
+}
+
+static void delete_last_nop(struct symbol *sym)
+{
+ struct annotation *notes = symbol__annotation(sym);
+ struct list_head *list = &notes->src->source;
+ struct disasm_line *dl;
+
+ while (!list_empty(list)) {
+ dl = list_entry(list->prev, struct disasm_line, al.node);
+
+ if (dl->ins.ops) {
+ if (!ins__is_nop(&dl->ins))
+ return;
+ } else {
+ if (!strstr(dl->al.line, " nop ") &&
+ !strstr(dl->al.line, " nopl ") &&
+ !strstr(dl->al.line, " nopw "))
+ return;
+ }
+
+ list_del_init(&dl->al.node);
+ disasm_line__free(dl);
+ }
+}
+
+int symbol__strerror_disassemble(struct map_symbol *ms, int errnum, char *buf, size_t buflen)
+{
+ struct dso *dso = map__dso(ms->map);
+
+ BUG_ON(buflen == 0);
+
+ if (errnum >= 0) {
+ str_error_r(errnum, buf, buflen);
+ return 0;
+ }
+
+ switch (errnum) {
+ case SYMBOL_ANNOTATE_ERRNO__NO_VMLINUX: {
+ char bf[SBUILD_ID_SIZE + 15] = " with build id ";
+ char *build_id_msg = NULL;
+
+ if (dso->has_build_id) {
+ build_id__sprintf(&dso->bid, bf + 15);
+ build_id_msg = bf;
+ }
+ scnprintf(buf, buflen,
+ "No vmlinux file%s\nwas found in the path.\n\n"
+ "Note that annotation using /proc/kcore requires CAP_SYS_RAWIO capability.\n\n"
+ "Please use:\n\n"
+ " perf buildid-cache -vu vmlinux\n\n"
+ "or:\n\n"
+ " --vmlinux vmlinux\n", build_id_msg ?: "");
+ }
+ break;
+ case SYMBOL_ANNOTATE_ERRNO__NO_LIBOPCODES_FOR_BPF:
+ scnprintf(buf, buflen, "Please link with binutils's libopcode to enable BPF annotation");
+ break;
+ case SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_REGEXP:
+ scnprintf(buf, buflen, "Problems with arch specific instruction name regular expressions.");
+ break;
+ case SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_CPUID_PARSING:
+ scnprintf(buf, buflen, "Problems while parsing the CPUID in the arch specific initialization.");
+ break;
+ case SYMBOL_ANNOTATE_ERRNO__BPF_INVALID_FILE:
+ scnprintf(buf, buflen, "Invalid BPF file: %s.", dso->long_name);
+ break;
+ case SYMBOL_ANNOTATE_ERRNO__BPF_MISSING_BTF:
+ scnprintf(buf, buflen, "The %s BPF file has no BTF section, compile with -g or use pahole -J.",
+ dso->long_name);
+ break;
+ default:
+ scnprintf(buf, buflen, "Internal error: Invalid %d error code\n", errnum);
+ break;
+ }
+
+ return 0;
+}
+
+static int dso__disassemble_filename(struct dso *dso, char *filename, size_t filename_size)
+{
+ char linkname[PATH_MAX];
+ char *build_id_filename;
+ char *build_id_path = NULL;
+ char *pos;
+ int len;
+
+ if (dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS &&
+ !dso__is_kcore(dso))
+ return SYMBOL_ANNOTATE_ERRNO__NO_VMLINUX;
+
+ build_id_filename = dso__build_id_filename(dso, NULL, 0, false);
+ if (build_id_filename) {
+ __symbol__join_symfs(filename, filename_size, build_id_filename);
+ free(build_id_filename);
+ } else {
+ if (dso->has_build_id)
+ return ENOMEM;
+ goto fallback;
+ }
+
+ build_id_path = strdup(filename);
+ if (!build_id_path)
+ return ENOMEM;
+
+ /*
+ * old style build-id cache has name of XX/XXXXXXX.. while
+ * new style has XX/XXXXXXX../{elf,kallsyms,vdso}.
+ * extract the build-id part of dirname in the new style only.
+ */
+ pos = strrchr(build_id_path, '/');
+ if (pos && strlen(pos) < SBUILD_ID_SIZE - 2)
+ dirname(build_id_path);
+
+ if (dso__is_kcore(dso))
+ goto fallback;
+
+ len = readlink(build_id_path, linkname, sizeof(linkname) - 1);
+ if (len < 0)
+ goto fallback;
+
+ linkname[len] = '\0';
+ if (strstr(linkname, DSO__NAME_KALLSYMS) ||
+ access(filename, R_OK)) {
+fallback:
+ /*
+ * If we don't have build-ids or the build-id file isn't in the
+ * cache, or is just a kallsyms file, well, lets hope that this
+ * DSO is the same as when 'perf record' ran.
+ */
+ if (dso->kernel && dso->long_name[0] == '/')
+ snprintf(filename, filename_size, "%s", dso->long_name);
+ else
+ __symbol__join_symfs(filename, filename_size, dso->long_name);
+
+ mutex_lock(&dso->lock);
+ if (access(filename, R_OK) && errno == ENOENT && dso->nsinfo) {
+ char *new_name = dso__filename_with_chroot(dso, filename);
+ if (new_name) {
+ strlcpy(filename, new_name, filename_size);
+ free(new_name);
+ }
+ }
+ mutex_unlock(&dso->lock);
+ } else if (dso->binary_type == DSO_BINARY_TYPE__NOT_FOUND) {
+ dso->binary_type = DSO_BINARY_TYPE__BUILD_ID_CACHE;
+ }
+
+ free(build_id_path);
+ return 0;
+}
+
+#if defined(HAVE_LIBBFD_SUPPORT) && defined(HAVE_LIBBPF_SUPPORT)
+#define PACKAGE "perf"
+#include <bfd.h>
+#include <dis-asm.h>
+#include <bpf/bpf.h>
+#include <bpf/btf.h>
+#include <bpf/libbpf.h>
+#include <linux/btf.h>
+#include <tools/dis-asm-compat.h>
+
+#include "bpf-event.h"
+#include "bpf-utils.h"
+
+static int symbol__disassemble_bpf(struct symbol *sym,
+ struct annotate_args *args)
+{
+ struct annotation *notes = symbol__annotation(sym);
+ struct bpf_prog_linfo *prog_linfo = NULL;
+ struct bpf_prog_info_node *info_node;
+ int len = sym->end - sym->start;
+ disassembler_ftype disassemble;
+ struct map *map = args->ms.map;
+ struct perf_bpil *info_linear;
+ struct disassemble_info info;
+ struct dso *dso = map__dso(map);
+ int pc = 0, count, sub_id;
+ struct btf *btf = NULL;
+ char tpath[PATH_MAX];
+ size_t buf_size;
+ int nr_skip = 0;
+ char *buf;
+ bfd *bfdf;
+ int ret;
+ FILE *s;
+
+ if (dso->binary_type != DSO_BINARY_TYPE__BPF_PROG_INFO)
+ return SYMBOL_ANNOTATE_ERRNO__BPF_INVALID_FILE;
+
+ pr_debug("%s: handling sym %s addr %" PRIx64 " len %" PRIx64 "\n", __func__,
+ sym->name, sym->start, sym->end - sym->start);
+
+ memset(tpath, 0, sizeof(tpath));
+ perf_exe(tpath, sizeof(tpath));
+
+ bfdf = bfd_openr(tpath, NULL);
+ if (bfdf == NULL)
+ abort();
+
+ if (!bfd_check_format(bfdf, bfd_object))
+ abort();
+
+ s = open_memstream(&buf, &buf_size);
+ if (!s) {
+ ret = errno;
+ goto out;
+ }
+ init_disassemble_info_compat(&info, s,
+ (fprintf_ftype) fprintf,
+ fprintf_styled);
+ info.arch = bfd_get_arch(bfdf);
+ info.mach = bfd_get_mach(bfdf);
+
+ info_node = perf_env__find_bpf_prog_info(dso->bpf_prog.env,
+ dso->bpf_prog.id);
+ if (!info_node) {
+ ret = SYMBOL_ANNOTATE_ERRNO__BPF_MISSING_BTF;
+ goto out;
+ }
+ info_linear = info_node->info_linear;
+ sub_id = dso->bpf_prog.sub_id;
+
+ info.buffer = (void *)(uintptr_t)(info_linear->info.jited_prog_insns);
+ info.buffer_length = info_linear->info.jited_prog_len;
+
+ if (info_linear->info.nr_line_info)
+ prog_linfo = bpf_prog_linfo__new(&info_linear->info);
+
+ if (info_linear->info.btf_id) {
+ struct btf_node *node;
+
+ node = perf_env__find_btf(dso->bpf_prog.env,
+ info_linear->info.btf_id);
+ if (node)
+ btf = btf__new((__u8 *)(node->data),
+ node->data_size);
+ }
+
+ disassemble_init_for_target(&info);
+
+#ifdef DISASM_FOUR_ARGS_SIGNATURE
+ disassemble = disassembler(info.arch,
+ bfd_big_endian(bfdf),
+ info.mach,
+ bfdf);
+#else
+ disassemble = disassembler(bfdf);
+#endif
+ if (disassemble == NULL)
+ abort();
+
+ fflush(s);
+ do {
+ const struct bpf_line_info *linfo = NULL;
+ struct disasm_line *dl;
+ size_t prev_buf_size;
+ const char *srcline;
+ u64 addr;
+
+ addr = pc + ((u64 *)(uintptr_t)(info_linear->info.jited_ksyms))[sub_id];
+ count = disassemble(pc, &info);
+
+ if (prog_linfo)
+ linfo = bpf_prog_linfo__lfind_addr_func(prog_linfo,
+ addr, sub_id,
+ nr_skip);
+
+ if (linfo && btf) {
+ srcline = btf__name_by_offset(btf, linfo->line_off);
+ nr_skip++;
+ } else
+ srcline = NULL;
+
+ fprintf(s, "\n");
+ prev_buf_size = buf_size;
+ fflush(s);
+
+ if (!annotate_opts.hide_src_code && srcline) {
+ args->offset = -1;
+ args->line = strdup(srcline);
+ args->line_nr = 0;
+ args->fileloc = NULL;
+ args->ms.sym = sym;
+ dl = disasm_line__new(args);
+ if (dl) {
+ annotation_line__add(&dl->al,
+ &notes->src->source);
+ }
+ }
+
+ args->offset = pc;
+ args->line = buf + prev_buf_size;
+ args->line_nr = 0;
+ args->fileloc = NULL;
+ args->ms.sym = sym;
+ dl = disasm_line__new(args);
+ if (dl)
+ annotation_line__add(&dl->al, &notes->src->source);
+
+ pc += count;
+ } while (count > 0 && pc < len);
+
+ ret = 0;
+out:
+ free(prog_linfo);
+ btf__free(btf);
+ fclose(s);
+ bfd_close(bfdf);
+ return ret;
+}
+#else // defined(HAVE_LIBBFD_SUPPORT) && defined(HAVE_LIBBPF_SUPPORT)
+static int symbol__disassemble_bpf(struct symbol *sym __maybe_unused,
+ struct annotate_args *args __maybe_unused)
+{
+ return SYMBOL_ANNOTATE_ERRNO__NO_LIBOPCODES_FOR_BPF;
+}
+#endif // defined(HAVE_LIBBFD_SUPPORT) && defined(HAVE_LIBBPF_SUPPORT)
+
+static int
+symbol__disassemble_bpf_image(struct symbol *sym,
+ struct annotate_args *args)
+{
+ struct annotation *notes = symbol__annotation(sym);
+ struct disasm_line *dl;
+
+ args->offset = -1;
+ args->line = strdup("to be implemented");
+ args->line_nr = 0;
+ args->fileloc = NULL;
+ dl = disasm_line__new(args);
+ if (dl)
+ annotation_line__add(&dl->al, &notes->src->source);
+
+ zfree(&args->line);
+ return 0;
+}
+
+#ifdef HAVE_LIBCAPSTONE_SUPPORT
+#include <capstone/capstone.h>
+
+static int open_capstone_handle(struct annotate_args *args, bool is_64bit,
+ csh *handle)
+{
+ struct annotation_options *opt = args->options;
+ cs_mode mode = is_64bit ? CS_MODE_64 : CS_MODE_32;
+
+ /* TODO: support more architectures */
+ if (!arch__is(args->arch, "x86"))
+ return -1;
+
+ if (cs_open(CS_ARCH_X86, mode, handle) != CS_ERR_OK)
+ return -1;
+
+ if (!opt->disassembler_style ||
+ !strcmp(opt->disassembler_style, "att"))
+ cs_option(*handle, CS_OPT_SYNTAX, CS_OPT_SYNTAX_ATT);
+
+ /*
+ * Resolving address operands to symbols is implemented
+ * on x86 by investigating instruction details.
+ */
+ cs_option(*handle, CS_OPT_DETAIL, CS_OPT_ON);
+
+ return 0;
+}
+
+struct find_file_offset_data {
+ u64 ip;
+ u64 offset;
+};
+
+/* This will be called for each PHDR in an ELF binary */
+static int find_file_offset(u64 start, u64 len, u64 pgoff, void *arg)
+{
+ struct find_file_offset_data *data = arg;
+
+ if (start <= data->ip && data->ip < start + len) {
+ data->offset = pgoff + data->ip - start;
+ return 1;
+ }
+ return 0;
+}
+
+static void print_capstone_detail(cs_insn *insn, char *buf, size_t len,
+ struct annotate_args *args, u64 addr)
+{
+ int i;
+ struct map *map = args->ms.map;
+ struct symbol *sym;
+
+ /* TODO: support more architectures */
+ if (!arch__is(args->arch, "x86"))
+ return;
+
+ if (insn->detail == NULL)
+ return;
+
+ for (i = 0; i < insn->detail->x86.op_count; i++) {
+ cs_x86_op *op = &insn->detail->x86.operands[i];
+ u64 orig_addr;
+
+ if (op->type != X86_OP_MEM)
+ continue;
+
+ /* only print RIP-based global symbols for now */
+ if (op->mem.base != X86_REG_RIP)
+ continue;
+
+ /* get the target address */
+ orig_addr = addr + insn->size + op->mem.disp;
+ addr = map__objdump_2mem(map, orig_addr);
+
+ if (map__dso(map)->kernel) {
+ /*
+ * The kernel maps can be splitted into sections,
+ * let's find the map first and the search the symbol.
+ */
+ map = maps__find(map__kmaps(map), addr);
+ if (map == NULL)
+ continue;
+ }
+
+ /* convert it to map-relative address for search */
+ addr = map__map_ip(map, addr);
+
+ sym = map__find_symbol(map, addr);
+ if (sym == NULL)
+ continue;
+
+ if (addr == sym->start) {
+ scnprintf(buf, len, "\t# %"PRIx64" <%s>",
+ orig_addr, sym->name);
+ } else {
+ scnprintf(buf, len, "\t# %"PRIx64" <%s+%#"PRIx64">",
+ orig_addr, sym->name, addr - sym->start);
+ }
+ break;
+ }
+}
+
+static int symbol__disassemble_capstone(char *filename, struct symbol *sym,
+ struct annotate_args *args)
+{
+ struct annotation *notes = symbol__annotation(sym);
+ struct map *map = args->ms.map;
+ struct dso *dso = map__dso(map);
+ struct nscookie nsc;
+ u64 start = map__rip_2objdump(map, sym->start);
+ u64 end = map__rip_2objdump(map, sym->end);
+ u64 len = end - start;
+ u64 offset;
+ int i, fd, count;
+ bool is_64bit = false;
+ bool needs_cs_close = false;
+ u8 *buf = NULL;
+ struct find_file_offset_data data = {
+ .ip = start,
+ };
+ csh handle;
+ cs_insn *insn;
+ char disasm_buf[512];
+ struct disasm_line *dl;
+
+ if (args->options->objdump_path)
+ return -1;
+
+ nsinfo__mountns_enter(dso->nsinfo, &nsc);
+ fd = open(filename, O_RDONLY);
+ nsinfo__mountns_exit(&nsc);
+ if (fd < 0)
+ return -1;
+
+ if (file__read_maps(fd, /*exe=*/true, find_file_offset, &data,
+ &is_64bit) == 0)
+ goto err;
+
+ if (open_capstone_handle(args, is_64bit, &handle) < 0)
+ goto err;
+
+ needs_cs_close = true;
+
+ buf = malloc(len);
+ if (buf == NULL)
+ goto err;
+
+ count = pread(fd, buf, len, data.offset);
+ close(fd);
+ fd = -1;
+
+ if ((u64)count != len)
+ goto err;
+
+ /* add the function address and name */
+ scnprintf(disasm_buf, sizeof(disasm_buf), "%#"PRIx64" <%s>:",
+ start, sym->name);
+
+ args->offset = -1;
+ args->line = disasm_buf;
+ args->line_nr = 0;
+ args->fileloc = NULL;
+ args->ms.sym = sym;
+
+ dl = disasm_line__new(args);
+ if (dl == NULL)
+ goto err;
+
+ annotation_line__add(&dl->al, &notes->src->source);
+
+ count = cs_disasm(handle, buf, len, start, len, &insn);
+ for (i = 0, offset = 0; i < count; i++) {
+ int printed;
+
+ printed = scnprintf(disasm_buf, sizeof(disasm_buf),
+ " %-7s %s",
+ insn[i].mnemonic, insn[i].op_str);
+ print_capstone_detail(&insn[i], disasm_buf + printed,
+ sizeof(disasm_buf) - printed, args,
+ start + offset);
+
+ args->offset = offset;
+ args->line = disasm_buf;
+
+ dl = disasm_line__new(args);
+ if (dl == NULL)
+ goto err;
+
+ annotation_line__add(&dl->al, &notes->src->source);
+
+ offset += insn[i].size;
+ }
+
+ /* It failed in the middle: probably due to unknown instructions */
+ if (offset != len) {
+ struct list_head *list = &notes->src->source;
+
+ /* Discard all lines and fallback to objdump */
+ while (!list_empty(list)) {
+ dl = list_first_entry(list, struct disasm_line, al.node);
+
+ list_del_init(&dl->al.node);
+ disasm_line__free(dl);
+ }
+ count = -1;
+ }
+
+out:
+ if (needs_cs_close)
+ cs_close(&handle);
+ free(buf);
+ return count < 0 ? count : 0;
+
+err:
+ if (fd >= 0)
+ close(fd);
+ if (needs_cs_close) {
+ struct disasm_line *tmp;
+
+ /*
+ * It probably failed in the middle of the above loop.
+ * Release any resources it might add.
+ */
+ list_for_each_entry_safe(dl, tmp, &notes->src->source, al.node) {
+ list_del(&dl->al.node);
+ free(dl);
+ }
+ }
+ count = -1;
+ goto out;
+}
+#endif
+
+/*
+ * Possibly create a new version of line with tabs expanded. Returns the
+ * existing or new line, storage is updated if a new line is allocated. If
+ * allocation fails then NULL is returned.
+ */
+static char *expand_tabs(char *line, char **storage, size_t *storage_len)
+{
+ size_t i, src, dst, len, new_storage_len, num_tabs;
+ char *new_line;
+ size_t line_len = strlen(line);
+
+ for (num_tabs = 0, i = 0; i < line_len; i++)
+ if (line[i] == '\t')
+ num_tabs++;
+
+ if (num_tabs == 0)
+ return line;
+
+ /*
+ * Space for the line and '\0', less the leading and trailing
+ * spaces. Each tab may introduce 7 additional spaces.
+ */
+ new_storage_len = line_len + 1 + (num_tabs * 7);
+
+ new_line = malloc(new_storage_len);
+ if (new_line == NULL) {
+ pr_err("Failure allocating memory for tab expansion\n");
+ return NULL;
+ }
+
+ /*
+ * Copy regions starting at src and expand tabs. If there are two
+ * adjacent tabs then 'src == i', the memcpy is of size 0 and the spaces
+ * are inserted.
+ */
+ for (i = 0, src = 0, dst = 0; i < line_len && num_tabs; i++) {
+ if (line[i] == '\t') {
+ len = i - src;
+ memcpy(&new_line[dst], &line[src], len);
+ dst += len;
+ new_line[dst++] = ' ';
+ while (dst % 8 != 0)
+ new_line[dst++] = ' ';
+ src = i + 1;
+ num_tabs--;
+ }
+ }
+
+ /* Expand the last region. */
+ len = line_len - src;
+ memcpy(&new_line[dst], &line[src], len);
+ dst += len;
+ new_line[dst] = '\0';
+
+ free(*storage);
+ *storage = new_line;
+ *storage_len = new_storage_len;
+ return new_line;
+}
+
+int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
+{
+ struct annotation_options *opts = &annotate_opts;
+ struct map *map = args->ms.map;
+ struct dso *dso = map__dso(map);
+ char *command;
+ FILE *file;
+ char symfs_filename[PATH_MAX];
+ struct kcore_extract kce;
+ bool delete_extract = false;
+ bool decomp = false;
+ int lineno = 0;
+ char *fileloc = NULL;
+ int nline;
+ char *line;
+ size_t line_len;
+ const char *objdump_argv[] = {
+ "/bin/sh",
+ "-c",
+ NULL, /* Will be the objdump command to run. */
+ "--",
+ NULL, /* Will be the symfs path. */
+ NULL,
+ };
+ struct child_process objdump_process;
+ int err = dso__disassemble_filename(dso, symfs_filename, sizeof(symfs_filename));
+
+ if (err)
+ return err;
+
+ pr_debug("%s: filename=%s, sym=%s, start=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__,
+ symfs_filename, sym->name, map__unmap_ip(map, sym->start),
+ map__unmap_ip(map, sym->end));
+
+ pr_debug("annotating [%p] %30s : [%p] %30s\n",
+ dso, dso->long_name, sym, sym->name);
+
+ if (dso->binary_type == DSO_BINARY_TYPE__BPF_PROG_INFO) {
+ return symbol__disassemble_bpf(sym, args);
+ } else if (dso->binary_type == DSO_BINARY_TYPE__BPF_IMAGE) {
+ return symbol__disassemble_bpf_image(sym, args);
+ } else if (dso->binary_type == DSO_BINARY_TYPE__NOT_FOUND) {
+ return -1;
+ } else if (dso__is_kcore(dso)) {
+ kce.kcore_filename = symfs_filename;
+ kce.addr = map__rip_2objdump(map, sym->start);
+ kce.offs = sym->start;
+ kce.len = sym->end - sym->start;
+ if (!kcore_extract__create(&kce)) {
+ delete_extract = true;
+ strlcpy(symfs_filename, kce.extract_filename,
+ sizeof(symfs_filename));
+ }
+ } else if (dso__needs_decompress(dso)) {
+ char tmp[KMOD_DECOMP_LEN];
+
+ if (dso__decompress_kmodule_path(dso, symfs_filename,
+ tmp, sizeof(tmp)) < 0)
+ return -1;
+
+ decomp = true;
+ strcpy(symfs_filename, tmp);
+ }
+
+#ifdef HAVE_LIBCAPSTONE_SUPPORT
+ err = symbol__disassemble_capstone(symfs_filename, sym, args);
+ if (err == 0)
+ goto out_remove_tmp;
+#endif
+
+ err = asprintf(&command,
+ "%s %s%s --start-address=0x%016" PRIx64
+ " --stop-address=0x%016" PRIx64
+ " %s -d %s %s %s %c%s%c %s%s -C \"$1\"",
+ opts->objdump_path ?: "objdump",
+ opts->disassembler_style ? "-M " : "",
+ opts->disassembler_style ?: "",
+ map__rip_2objdump(map, sym->start),
+ map__rip_2objdump(map, sym->end),
+ opts->show_linenr ? "-l" : "",
+ opts->show_asm_raw ? "" : "--no-show-raw-insn",
+ opts->annotate_src ? "-S" : "",
+ opts->prefix ? "--prefix " : "",
+ opts->prefix ? '"' : ' ',
+ opts->prefix ?: "",
+ opts->prefix ? '"' : ' ',
+ opts->prefix_strip ? "--prefix-strip=" : "",
+ opts->prefix_strip ?: "");
+
+ if (err < 0) {
+ pr_err("Failure allocating memory for the command to run\n");
+ goto out_remove_tmp;
+ }
+
+ pr_debug("Executing: %s\n", command);
+
+ objdump_argv[2] = command;
+ objdump_argv[4] = symfs_filename;
+
+ /* Create a pipe to read from for stdout */
+ memset(&objdump_process, 0, sizeof(objdump_process));
+ objdump_process.argv = objdump_argv;
+ objdump_process.out = -1;
+ objdump_process.err = -1;
+ objdump_process.no_stderr = 1;
+ if (start_command(&objdump_process)) {
+ pr_err("Failure starting to run %s\n", command);
+ err = -1;
+ goto out_free_command;
+ }
+
+ file = fdopen(objdump_process.out, "r");
+ if (!file) {
+ pr_err("Failure creating FILE stream for %s\n", command);
+ /*
+ * If we were using debug info should retry with
+ * original binary.
+ */
+ err = -1;
+ goto out_close_stdout;
+ }
+
+ /* Storage for getline. */
+ line = NULL;
+ line_len = 0;
+
+ nline = 0;
+ while (!feof(file)) {
+ const char *match;
+ char *expanded_line;
+
+ if (getline(&line, &line_len, file) < 0 || !line)
+ break;
+
+ /* Skip lines containing "filename:" */
+ match = strstr(line, symfs_filename);
+ if (match && match[strlen(symfs_filename)] == ':')
+ continue;
+
+ expanded_line = strim(line);
+ expanded_line = expand_tabs(expanded_line, &line, &line_len);
+ if (!expanded_line)
+ break;
+
+ /*
+ * The source code line number (lineno) needs to be kept in
+ * across calls to symbol__parse_objdump_line(), so that it
+ * can associate it with the instructions till the next one.
+ * See disasm_line__new() and struct disasm_line::line_nr.
+ */
+ if (symbol__parse_objdump_line(sym, args, expanded_line,
+ &lineno, &fileloc) < 0)
+ break;
+ nline++;
+ }
+ free(line);
+ free(fileloc);
+
+ err = finish_command(&objdump_process);
+ if (err)
+ pr_err("Error running %s\n", command);
+
+ if (nline == 0) {
+ err = -1;
+ pr_err("No output from %s\n", command);
+ }
+
+ /*
+ * kallsyms does not have symbol sizes so there may a nop at the end.
+ * Remove it.
+ */
+ if (dso__is_kcore(dso))
+ delete_last_nop(sym);
+
+ fclose(file);
+
+out_close_stdout:
+ close(objdump_process.out);
+
+out_free_command:
+ free(command);
+
+out_remove_tmp:
+ if (decomp)
+ unlink(symfs_filename);
+
+ if (delete_extract)
+ kcore_extract__delete(&kce);
+
+ return err;
+}
diff --git a/tools/perf/util/disasm.h b/tools/perf/util/disasm.h
new file mode 100644
index 00000000000000..3d381a0435201a
--- /dev/null
+++ b/tools/perf/util/disasm.h
@@ -0,0 +1,112 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef __PERF_UTIL_DISASM_H
+#define __PERF_UTIL_DISASM_H
+
+#include "map_symbol.h"
+
+struct annotation_options;
+struct disasm_line;
+struct ins;
+struct evsel;
+struct symbol;
+
+struct arch {
+ const char *name;
+ struct ins *instructions;
+ size_t nr_instructions;
+ size_t nr_instructions_allocated;
+ struct ins_ops *(*associate_instruction_ops)(struct arch *arch, const char *name);
+ bool sorted_instructions;
+ bool initialized;
+ const char *insn_suffix;
+ void *priv;
+ unsigned int model;
+ unsigned int family;
+ int (*init)(struct arch *arch, char *cpuid);
+ bool (*ins_is_fused)(struct arch *arch, const char *ins1,
+ const char *ins2);
+ struct {
+ char comment_char;
+ char skip_functions_char;
+ char register_char;
+ char memory_ref_char;
+ char imm_char;
+ } objdump;
+};
+
+struct ins {
+ const char *name;
+ struct ins_ops *ops;
+};
+
+struct ins_operands {
+ char *raw;
+ struct {
+ char *raw;
+ char *name;
+ struct symbol *sym;
+ u64 addr;
+ s64 offset;
+ bool offset_avail;
+ bool outside;
+ bool multi_regs;
+ } target;
+ union {
+ struct {
+ char *raw;
+ char *name;
+ u64 addr;
+ bool multi_regs;
+ } source;
+ struct {
+ struct ins ins;
+ struct ins_operands *ops;
+ } locked;
+ struct {
+ char *raw_comment;
+ char *raw_func_start;
+ } jump;
+ };
+};
+
+struct ins_ops {
+ void (*free)(struct ins_operands *ops);
+ int (*parse)(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms);
+ int (*scnprintf)(struct ins *ins, char *bf, size_t size,
+ struct ins_operands *ops, int max_ins_name);
+};
+
+struct annotate_args {
+ struct arch *arch;
+ struct map_symbol ms;
+ struct evsel *evsel;
+ struct annotation_options *options;
+ s64 offset;
+ char *line;
+ int line_nr;
+ char *fileloc;
+};
+
+struct arch *arch__find(const char *name);
+bool arch__is(struct arch *arch, const char *name);
+
+struct ins_ops *ins__find(struct arch *arch, const char *name);
+int ins__scnprintf(struct ins *ins, char *bf, size_t size,
+ struct ins_operands *ops, int max_ins_name);
+
+bool ins__is_call(const struct ins *ins);
+bool ins__is_jump(const struct ins *ins);
+bool ins__is_fused(struct arch *arch, const char *ins1, const char *ins2);
+bool ins__is_nop(const struct ins *ins);
+bool ins__is_ret(const struct ins *ins);
+bool ins__is_lock(const struct ins *ins);
+
+struct disasm_line *disasm_line__new(struct annotate_args *args);
+void disasm_line__free(struct disasm_line *dl);
+
+int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size,
+ bool raw, int max_ins_name);
+
+int symbol__disassemble(struct symbol *sym, struct annotate_args *args);
+
+#endif /* __PERF_UTIL_DISASM_H */
diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index 22fd5fa806ed8f..ad562743d769eb 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -1269,6 +1269,67 @@ static void dso__set_long_name_id(struct dso *dso, const char *name, struct dso_
__dsos__findnew_link_by_longname_id(root, dso, NULL, id);
}
+static int __dso_id__cmp(struct dso_id *a, struct dso_id *b)
+{
+ if (a->maj > b->maj) return -1;
+ if (a->maj < b->maj) return 1;
+
+ if (a->min > b->min) return -1;
+ if (a->min < b->min) return 1;
+
+ if (a->ino > b->ino) return -1;
+ if (a->ino < b->ino) return 1;
+
+ /*
+ * Synthesized MMAP events have zero ino_generation, avoid comparing
+ * them with MMAP events with actual ino_generation.
+ *
+ * I found it harmful because the mismatch resulted in a new
+ * dso that did not have a build ID whereas the original dso did have a
+ * build ID. The build ID was essential because the object was not found
+ * otherwise. - Adrian
+ */
+ if (a->ino_generation && b->ino_generation) {
+ if (a->ino_generation > b->ino_generation) return -1;
+ if (a->ino_generation < b->ino_generation) return 1;
+ }
+
+ return 0;
+}
+
+bool dso_id__empty(struct dso_id *id)
+{
+ if (!id)
+ return true;
+
+ return !id->maj && !id->min && !id->ino && !id->ino_generation;
+}
+
+void dso__inject_id(struct dso *dso, struct dso_id *id)
+{
+ dso->id.maj = id->maj;
+ dso->id.min = id->min;
+ dso->id.ino = id->ino;
+ dso->id.ino_generation = id->ino_generation;
+}
+
+int dso_id__cmp(struct dso_id *a, struct dso_id *b)
+{
+ /*
+ * The second is always dso->id, so zeroes if not set, assume passing
+ * NULL for a means a zeroed id
+ */
+ if (dso_id__empty(a) || dso_id__empty(b))
+ return 0;
+
+ return __dso_id__cmp(a, b);
+}
+
+int dso__cmp_id(struct dso *a, struct dso *b)
+{
+ return __dso_id__cmp(&a->id, &b->id);
+}
+
void dso__set_long_name(struct dso *dso, const char *name, bool name_allocated)
{
dso__set_long_name_id(dso, name, NULL, name_allocated);
@@ -1329,6 +1390,7 @@ struct dso *dso__new_id(const char *name, struct dso_id *id)
dso->inlined_nodes = RB_ROOT_CACHED;
dso->srclines = RB_ROOT_CACHED;
dso->data_types = RB_ROOT;
+ dso->global_vars = RB_ROOT;
dso->data.fd = -1;
dso->data.status = DSO_DATA_STATUS_UNKNOWN;
dso->symtab_type = DSO_BINARY_TYPE__NOT_FOUND;
@@ -1373,6 +1435,7 @@ void dso__delete(struct dso *dso)
dso->symbol_names_len = 0;
zfree(&dso->symbol_names);
annotated_data_type__tree_delete(&dso->data_types);
+ global_var_type__tree_delete(&dso->global_vars);
if (dso->short_name_allocated) {
zfree((char **)&dso->short_name);
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index ce9f3849a773cc..2c295438226d7a 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -154,72 +154,73 @@ struct dso {
size_t symbol_names_len;
struct rb_root_cached inlined_nodes;
struct rb_root_cached srclines;
- struct rb_root data_types;
+ struct rb_root data_types;
+ struct rb_root global_vars;
struct {
u64 addr;
struct symbol *symbol;
} last_find_result;
- void *a2l;
- char *symsrc_filename;
- unsigned int a2l_fails;
- enum dso_space_type kernel;
- bool is_kmod;
- enum dso_swap_type needs_swap;
- enum dso_binary_type symtab_type;
- enum dso_binary_type binary_type;
- enum dso_load_errno load_errno;
- u8 adjust_symbols:1;
- u8 has_build_id:1;
- u8 header_build_id:1;
- u8 has_srcline:1;
- u8 hit:1;
- u8 annotate_warned:1;
- u8 auxtrace_warned:1;
- u8 short_name_allocated:1;
- u8 long_name_allocated:1;
- u8 is_64_bit:1;
- bool sorted_by_name;
- bool loaded;
- u8 rel;
struct build_id bid;
u64 text_offset;
u64 text_end;
const char *short_name;
const char *long_name;
- u16 long_name_len;
- u16 short_name_len;
+ void *a2l;
+ char *symsrc_filename;
+#if defined(__powerpc__)
void *dwfl; /* DWARF debug info */
+#endif
+ struct nsinfo *nsinfo;
struct auxtrace_cache *auxtrace_cache;
- int comp;
-
+ union { /* Tool specific area */
+ void *priv;
+ u64 db_id;
+ };
+ /* bpf prog information */
+ struct {
+ struct perf_env *env;
+ u32 id;
+ u32 sub_id;
+ } bpf_prog;
/* dso data file */
struct {
struct rb_root cache;
- int fd;
- int status;
- u32 status_seen;
- u64 file_size;
struct list_head open_entry;
+ u64 file_size;
u64 elf_base_addr;
u64 debug_frame_offset;
u64 eh_frame_hdr_addr;
u64 eh_frame_hdr_offset;
+ int fd;
+ int status;
+ u32 status_seen;
} data;
- /* bpf prog information */
- struct {
- u32 id;
- u32 sub_id;
- struct perf_env *env;
- } bpf_prog;
-
- union { /* Tool specific area */
- void *priv;
- u64 db_id;
- };
- struct nsinfo *nsinfo;
struct dso_id id;
+ unsigned int a2l_fails;
+ int comp;
refcount_t refcnt;
+ enum dso_load_errno load_errno;
+ u16 long_name_len;
+ u16 short_name_len;
+ enum dso_binary_type symtab_type:8;
+ enum dso_binary_type binary_type:8;
+ enum dso_space_type kernel:2;
+ enum dso_swap_type needs_swap:2;
+ bool is_kmod:1;
+ u8 adjust_symbols:1;
+ u8 has_build_id:1;
+ u8 header_build_id:1;
+ u8 has_srcline:1;
+ u8 hit:1;
+ u8 annotate_warned:1;
+ u8 auxtrace_warned:1;
+ u8 short_name_allocated:1;
+ u8 long_name_allocated:1;
+ u8 is_64_bit:1;
+ bool sorted_by_name;
+ bool loaded;
+ u8 rel;
char name[];
};
@@ -232,17 +233,14 @@ struct dso {
#define dso__for_each_symbol(dso, pos, n) \
symbols__for_each_entry(&(dso)->symbols, pos, n)
-#define dsos__for_each_with_build_id(pos, head) \
- list_for_each_entry(pos, head, node) \
- if (!pos->has_build_id) \
- continue; \
- else
-
static inline void dso__set_loaded(struct dso *dso)
{
dso->loaded = true;
}
+int dso_id__cmp(struct dso_id *a, struct dso_id *b);
+bool dso_id__empty(struct dso_id *id);
+
struct dso *dso__new_id(const char *name, struct dso_id *id);
struct dso *dso__new(const char *name);
void dso__delete(struct dso *dso);
@@ -250,6 +248,7 @@ void dso__delete(struct dso *dso);
int dso__cmp_id(struct dso *a, struct dso *b);
void dso__set_short_name(struct dso *dso, const char *name, bool name_allocated);
void dso__set_long_name(struct dso *dso, const char *name, bool name_allocated);
+void dso__inject_id(struct dso *dso, struct dso_id *id);
int dso__name_len(const struct dso *dso);
@@ -411,4 +410,7 @@ int dso__strerror_load(struct dso *dso, char *buf, size_t buflen);
void reset_fd_limit(void);
+u64 dso__find_global_type(struct dso *dso, u64 addr);
+u64 dso__findnew_global_type(struct dso *dso, u64 addr, u64 offset);
+
#endif /* __PERF_DSO */
diff --git a/tools/perf/util/dsos.c b/tools/perf/util/dsos.c
index cf80aa42dd07b0..b7fbfb877ae365 100644
--- a/tools/perf/util/dsos.c
+++ b/tools/perf/util/dsos.c
@@ -12,98 +12,94 @@
#include <symbol.h> // filename__read_build_id
#include <unistd.h>
-static int __dso_id__cmp(struct dso_id *a, struct dso_id *b)
+void dsos__init(struct dsos *dsos)
{
- if (a->maj > b->maj) return -1;
- if (a->maj < b->maj) return 1;
+ INIT_LIST_HEAD(&dsos->head);
+ dsos->root = RB_ROOT;
+ init_rwsem(&dsos->lock);
+}
- if (a->min > b->min) return -1;
- if (a->min < b->min) return 1;
+static void dsos__purge(struct dsos *dsos)
+{
+ struct dso *pos, *n;
- if (a->ino > b->ino) return -1;
- if (a->ino < b->ino) return 1;
+ down_write(&dsos->lock);
- /*
- * Synthesized MMAP events have zero ino_generation, avoid comparing
- * them with MMAP events with actual ino_generation.
- *
- * I found it harmful because the mismatch resulted in a new
- * dso that did not have a build ID whereas the original dso did have a
- * build ID. The build ID was essential because the object was not found
- * otherwise. - Adrian
- */
- if (a->ino_generation && b->ino_generation) {
- if (a->ino_generation > b->ino_generation) return -1;
- if (a->ino_generation < b->ino_generation) return 1;
+ list_for_each_entry_safe(pos, n, &dsos->head, node) {
+ RB_CLEAR_NODE(&pos->rb_node);
+ pos->root = NULL;
+ list_del_init(&pos->node);
+ dso__put(pos);
}
- return 0;
+ up_write(&dsos->lock);
}
-static bool dso_id__empty(struct dso_id *id)
+void dsos__exit(struct dsos *dsos)
{
- if (!id)
- return true;
-
- return !id->maj && !id->min && !id->ino && !id->ino_generation;
+ dsos__purge(dsos);
+ exit_rwsem(&dsos->lock);
}
-static void dso__inject_id(struct dso *dso, struct dso_id *id)
-{
- dso->id.maj = id->maj;
- dso->id.min = id->min;
- dso->id.ino = id->ino;
- dso->id.ino_generation = id->ino_generation;
-}
-static int dso_id__cmp(struct dso_id *a, struct dso_id *b)
+static int __dsos__for_each_dso(struct dsos *dsos,
+ int (*cb)(struct dso *dso, void *data),
+ void *data)
{
- /*
- * The second is always dso->id, so zeroes if not set, assume passing
- * NULL for a means a zeroed id
- */
- if (dso_id__empty(a) || dso_id__empty(b))
- return 0;
+ struct dso *dso;
- return __dso_id__cmp(a, b);
-}
+ list_for_each_entry(dso, &dsos->head, node) {
+ int err;
-int dso__cmp_id(struct dso *a, struct dso *b)
-{
- return __dso_id__cmp(&a->id, &b->id);
+ err = cb(dso, data);
+ if (err)
+ return err;
+ }
+ return 0;
}
-bool __dsos__read_build_ids(struct list_head *head, bool with_hits)
+struct dsos__read_build_ids_cb_args {
+ bool with_hits;
+ bool have_build_id;
+};
+
+static int dsos__read_build_ids_cb(struct dso *dso, void *data)
{
- bool have_build_id = false;
- struct dso *pos;
+ struct dsos__read_build_ids_cb_args *args = data;
struct nscookie nsc;
- list_for_each_entry(pos, head, node) {
- if (with_hits && !pos->hit && !dso__is_vdso(pos))
- continue;
- if (pos->has_build_id) {
- have_build_id = true;
- continue;
- }
- nsinfo__mountns_enter(pos->nsinfo, &nsc);
- if (filename__read_build_id(pos->long_name, &pos->bid) > 0) {
- have_build_id = true;
- pos->has_build_id = true;
- } else if (errno == ENOENT && pos->nsinfo) {
- char *new_name = dso__filename_with_chroot(pos, pos->long_name);
-
- if (new_name && filename__read_build_id(new_name,
- &pos->bid) > 0) {
- have_build_id = true;
- pos->has_build_id = true;
- }
- free(new_name);
+ if (args->with_hits && !dso->hit && !dso__is_vdso(dso))
+ return 0;
+ if (dso->has_build_id) {
+ args->have_build_id = true;
+ return 0;
+ }
+ nsinfo__mountns_enter(dso->nsinfo, &nsc);
+ if (filename__read_build_id(dso->long_name, &dso->bid) > 0) {
+ args->have_build_id = true;
+ dso->has_build_id = true;
+ } else if (errno == ENOENT && dso->nsinfo) {
+ char *new_name = dso__filename_with_chroot(dso, dso->long_name);
+
+ if (new_name && filename__read_build_id(new_name, &dso->bid) > 0) {
+ args->have_build_id = true;
+ dso->has_build_id = true;
}
- nsinfo__mountns_exit(&nsc);
+ free(new_name);
}
+ nsinfo__mountns_exit(&nsc);
+ return 0;
+}
- return have_build_id;
+bool dsos__read_build_ids(struct dsos *dsos, bool with_hits)
+{
+ struct dsos__read_build_ids_cb_args args = {
+ .with_hits = with_hits,
+ .have_build_id = false,
+ };
+
+ dsos__for_each_dso(dsos, dsos__read_build_ids_cb, &args);
+ return args.have_build_id;
}
static int __dso__cmp_long_name(const char *long_name, struct dso_id *id, struct dso *b)
@@ -136,6 +132,7 @@ struct dso *__dsos__findnew_link_by_longname_id(struct rb_root *root, struct dso
if (!name)
name = dso->long_name;
+
/*
* Find node with the matching name
*/
@@ -151,7 +148,7 @@ struct dso *__dsos__findnew_link_by_longname_id(struct rb_root *root, struct dso
* at the end of the list of duplicates.
*/
if (!dso || (dso == this))
- return this; /* Find matching dso */
+ return dso__get(this); /* Find matching dso */
/*
* The core kernel DSOs may have duplicated long name.
* In this case, the short name should be different.
@@ -216,22 +213,50 @@ static struct dso *__dsos__findnew_by_longname_id(struct rb_root *root, const ch
return __dsos__findnew_link_by_longname_id(root, NULL, name, id);
}
+struct dsos__find_id_cb_args {
+ const char *name;
+ struct dso_id *id;
+ struct dso *res;
+};
+
+static int dsos__find_id_cb(struct dso *dso, void *data)
+{
+ struct dsos__find_id_cb_args *args = data;
+
+ if (__dso__cmp_short_name(args->name, args->id, dso) == 0) {
+ args->res = dso__get(dso);
+ return 1;
+ }
+ return 0;
+
+}
+
static struct dso *__dsos__find_id(struct dsos *dsos, const char *name, struct dso_id *id, bool cmp_short)
{
- struct dso *pos;
+ struct dso *res;
if (cmp_short) {
- list_for_each_entry(pos, &dsos->head, node)
- if (__dso__cmp_short_name(name, id, pos) == 0)
- return pos;
- return NULL;
+ struct dsos__find_id_cb_args args = {
+ .name = name,
+ .id = id,
+ .res = NULL,
+ };
+
+ __dsos__for_each_dso(dsos, dsos__find_id_cb, &args);
+ return args.res;
}
- return __dsos__findnew_by_longname_id(&dsos->root, name, id);
+ res = __dsos__findnew_by_longname_id(&dsos->root, name, id);
+ return res;
}
-struct dso *__dsos__find(struct dsos *dsos, const char *name, bool cmp_short)
+struct dso *dsos__find(struct dsos *dsos, const char *name, bool cmp_short)
{
- return __dsos__find_id(dsos, name, NULL, cmp_short);
+ struct dso *res;
+
+ down_read(&dsos->lock);
+ res = __dsos__find_id(dsos, name, NULL, cmp_short);
+ up_read(&dsos->lock);
+ return res;
}
static void dso__set_basename(struct dso *dso)
@@ -273,8 +298,6 @@ static struct dso *__dsos__addnew_id(struct dsos *dsos, const char *name, struct
if (dso != NULL) {
__dsos__add(dsos, dso);
dso__set_basename(dso);
- /* Put dso here because __dsos_add already got it */
- dso__put(dso);
}
return dso;
}
@@ -298,36 +321,142 @@ struct dso *dsos__findnew_id(struct dsos *dsos, const char *name, struct dso_id
{
struct dso *dso;
down_write(&dsos->lock);
- dso = dso__get(__dsos__findnew_id(dsos, name, id));
+ dso = __dsos__findnew_id(dsos, name, id);
up_write(&dsos->lock);
return dso;
}
-size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp,
- bool (skip)(struct dso *dso, int parm), int parm)
+struct dsos__fprintf_buildid_cb_args {
+ FILE *fp;
+ bool (*skip)(struct dso *dso, int parm);
+ int parm;
+ size_t ret;
+};
+
+static int dsos__fprintf_buildid_cb(struct dso *dso, void *data)
{
- struct dso *pos;
- size_t ret = 0;
+ struct dsos__fprintf_buildid_cb_args *args = data;
+ char sbuild_id[SBUILD_ID_SIZE];
- list_for_each_entry(pos, head, node) {
- char sbuild_id[SBUILD_ID_SIZE];
+ if (args->skip && args->skip(dso, args->parm))
+ return 0;
+ build_id__sprintf(&dso->bid, sbuild_id);
+ args->ret += fprintf(args->fp, "%-40s %s\n", sbuild_id, dso->long_name);
+ return 0;
+}
- if (skip && skip(pos, parm))
- continue;
- build_id__sprintf(&pos->bid, sbuild_id);
- ret += fprintf(fp, "%-40s %s\n", sbuild_id, pos->long_name);
- }
- return ret;
+size_t dsos__fprintf_buildid(struct dsos *dsos, FILE *fp,
+ bool (*skip)(struct dso *dso, int parm), int parm)
+{
+ struct dsos__fprintf_buildid_cb_args args = {
+ .fp = fp,
+ .skip = skip,
+ .parm = parm,
+ .ret = 0,
+ };
+
+ dsos__for_each_dso(dsos, dsos__fprintf_buildid_cb, &args);
+ return args.ret;
}
-size_t __dsos__fprintf(struct list_head *head, FILE *fp)
+struct dsos__fprintf_cb_args {
+ FILE *fp;
+ size_t ret;
+};
+
+static int dsos__fprintf_cb(struct dso *dso, void *data)
{
- struct dso *pos;
- size_t ret = 0;
+ struct dsos__fprintf_cb_args *args = data;
+
+ args->ret += dso__fprintf(dso, args->fp);
+ return 0;
+}
+
+size_t dsos__fprintf(struct dsos *dsos, FILE *fp)
+{
+ struct dsos__fprintf_cb_args args = {
+ .fp = fp,
+ .ret = 0,
+ };
+
+ dsos__for_each_dso(dsos, dsos__fprintf_cb, &args);
+ return args.ret;
+}
+
+static int dsos__hit_all_cb(struct dso *dso, void *data __maybe_unused)
+{
+ dso->hit = true;
+ return 0;
+}
+
+int dsos__hit_all(struct dsos *dsos)
+{
+ return dsos__for_each_dso(dsos, dsos__hit_all_cb, NULL);
+}
+
+struct dso *dsos__findnew_module_dso(struct dsos *dsos,
+ struct machine *machine,
+ struct kmod_path *m,
+ const char *filename)
+{
+ struct dso *dso;
+
+ down_write(&dsos->lock);
+
+ dso = __dsos__find_id(dsos, m->name, NULL, /*cmp_short=*/true);
+ if (!dso) {
+ dso = __dsos__addnew(dsos, m->name);
+ if (dso == NULL)
+ goto out_unlock;
- list_for_each_entry(pos, head, node) {
- ret += dso__fprintf(pos, fp);
+ dso__set_module_info(dso, m, machine);
+ dso__set_long_name(dso, strdup(filename), true);
+ dso->kernel = DSO_SPACE__KERNEL;
}
- return ret;
+out_unlock:
+ up_write(&dsos->lock);
+ return dso;
+}
+
+static int dsos__find_kernel_dso_cb(struct dso *dso, void *data)
+{
+ struct dso **res = data;
+ /*
+ * The cpumode passed to is_kernel_module is not the cpumode of *this*
+ * event. If we insist on passing correct cpumode to is_kernel_module,
+ * we should record the cpumode when we adding this dso to the linked
+ * list.
+ *
+ * However we don't really need passing correct cpumode. We know the
+ * correct cpumode must be kernel mode (if not, we should not link it
+ * onto kernel_dsos list).
+ *
+ * Therefore, we pass PERF_RECORD_MISC_CPUMODE_UNKNOWN.
+ * is_kernel_module() treats it as a kernel cpumode.
+ */
+ if (!dso->kernel ||
+ is_kernel_module(dso->long_name, PERF_RECORD_MISC_CPUMODE_UNKNOWN))
+ return 0;
+
+ *res = dso__get(dso);
+ return 1;
+}
+
+struct dso *dsos__find_kernel_dso(struct dsos *dsos)
+{
+ struct dso *res = NULL;
+
+ dsos__for_each_dso(dsos, dsos__find_kernel_dso_cb, &res);
+ return res;
+}
+
+int dsos__for_each_dso(struct dsos *dsos, int (*cb)(struct dso *dso, void *data), void *data)
+{
+ int err;
+
+ down_read(&dsos->lock);
+ err = __dsos__for_each_dso(dsos, cb, data);
+ up_read(&dsos->lock);
+ return err;
}
diff --git a/tools/perf/util/dsos.h b/tools/perf/util/dsos.h
index 5dbec2bc6966d4..50bd5152347552 100644
--- a/tools/perf/util/dsos.h
+++ b/tools/perf/util/dsos.h
@@ -10,6 +10,8 @@
struct dso;
struct dso_id;
+struct kmod_path;
+struct machine;
/*
* DSOs are put into both a list for fast iteration and rbtree for fast
@@ -21,20 +23,32 @@ struct dsos {
struct rw_semaphore lock;
};
+void dsos__init(struct dsos *dsos);
+void dsos__exit(struct dsos *dsos);
+
void __dsos__add(struct dsos *dsos, struct dso *dso);
void dsos__add(struct dsos *dsos, struct dso *dso);
struct dso *__dsos__addnew(struct dsos *dsos, const char *name);
-struct dso *__dsos__find(struct dsos *dsos, const char *name, bool cmp_short);
+struct dso *dsos__find(struct dsos *dsos, const char *name, bool cmp_short);
struct dso *dsos__findnew_id(struct dsos *dsos, const char *name, struct dso_id *id);
+bool dsos__read_build_ids(struct dsos *dsos, bool with_hits);
+
struct dso *__dsos__findnew_link_by_longname_id(struct rb_root *root, struct dso *dso,
const char *name, struct dso_id *id);
-bool __dsos__read_build_ids(struct list_head *head, bool with_hits);
-
-size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp,
+size_t dsos__fprintf_buildid(struct dsos *dsos, FILE *fp,
bool (skip)(struct dso *dso, int parm), int parm);
-size_t __dsos__fprintf(struct list_head *head, FILE *fp);
+size_t dsos__fprintf(struct dsos *dsos, FILE *fp);
+
+int dsos__hit_all(struct dsos *dsos);
+
+struct dso *dsos__findnew_module_dso(struct dsos *dsos, struct machine *machine,
+ struct kmod_path *m, const char *filename);
+
+struct dso *dsos__find_kernel_dso(struct dsos *dsos);
+
+int dsos__for_each_dso(struct dsos *dsos, int (*cb)(struct dso *dso, void *data), void *data);
#endif /* __PERF_DSOS */
diff --git a/tools/perf/util/dump-insn.h b/tools/perf/util/dump-insn.h
index 65012506153093..4a7797dd6d0924 100644
--- a/tools/perf/util/dump-insn.h
+++ b/tools/perf/util/dump-insn.h
@@ -11,6 +11,7 @@ struct thread;
struct perf_insn {
/* Initialized by callers: */
struct thread *thread;
+ struct machine *machine;
u8 cpumode;
bool is64bit;
int cpu;
diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c
index 2791126069b4f8..40cfbdfe2d752e 100644
--- a/tools/perf/util/dwarf-aux.c
+++ b/tools/perf/util/dwarf-aux.c
@@ -9,6 +9,7 @@
#include <stdlib.h>
#include "debug.h"
#include "dwarf-aux.h"
+#include "dwarf-regs.h"
#include "strbuf.h"
#include "string2.h"
@@ -696,6 +697,49 @@ Dwarf_Die *die_find_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr,
return die_mem;
}
+static int __die_find_func_rettype_cb(Dwarf_Die *die_mem, void *data)
+{
+ const char *func_name;
+
+ if (dwarf_tag(die_mem) != DW_TAG_subprogram)
+ return DIE_FIND_CB_SIBLING;
+
+ func_name = dwarf_diename(die_mem);
+ if (func_name && !strcmp(func_name, data))
+ return DIE_FIND_CB_END;
+
+ return DIE_FIND_CB_SIBLING;
+}
+
+/**
+ * die_find_func_rettype - Search a return type of function
+ * @cu_die: a CU DIE
+ * @name: target function name
+ * @die_mem: a buffer for result DIE
+ *
+ * Search a non-inlined function which matches to @name and stores the
+ * return type of the function to @die_mem and returns it if found.
+ * Returns NULL if failed. Note that it doesn't needs to find a
+ * definition of the function, so it doesn't match with address.
+ * Most likely, it can find a declaration at the top level. Thus the
+ * callback function continues to sibling entries only.
+ */
+Dwarf_Die *die_find_func_rettype(Dwarf_Die *cu_die, const char *name,
+ Dwarf_Die *die_mem)
+{
+ Dwarf_Die tmp_die;
+
+ cu_die = die_find_child(cu_die, __die_find_func_rettype_cb,
+ (void *)name, &tmp_die);
+ if (!cu_die)
+ return NULL;
+
+ if (die_get_real_type(&tmp_die, die_mem) == NULL)
+ return NULL;
+
+ return die_mem;
+}
+
struct __instance_walk_param {
void *addr;
int (*callback)(Dwarf_Die *, void *);
@@ -1136,6 +1180,43 @@ int die_get_varname(Dwarf_Die *vr_die, struct strbuf *buf)
return ret < 0 ? ret : strbuf_addf(buf, "\t%s", dwarf_diename(vr_die));
}
+#if defined(HAVE_DWARF_GETLOCATIONS_SUPPORT) || defined(HAVE_DWARF_CFI_SUPPORT)
+static int reg_from_dwarf_op(Dwarf_Op *op)
+{
+ switch (op->atom) {
+ case DW_OP_reg0 ... DW_OP_reg31:
+ return op->atom - DW_OP_reg0;
+ case DW_OP_breg0 ... DW_OP_breg31:
+ return op->atom - DW_OP_breg0;
+ case DW_OP_regx:
+ case DW_OP_bregx:
+ return op->number;
+ case DW_OP_fbreg:
+ return DWARF_REG_FB;
+ default:
+ break;
+ }
+ return -1;
+}
+
+static int offset_from_dwarf_op(Dwarf_Op *op)
+{
+ switch (op->atom) {
+ case DW_OP_reg0 ... DW_OP_reg31:
+ case DW_OP_regx:
+ return 0;
+ case DW_OP_breg0 ... DW_OP_breg31:
+ case DW_OP_fbreg:
+ return op->number;
+ case DW_OP_bregx:
+ return op->number2;
+ default:
+ break;
+ }
+ return -1;
+}
+#endif /* HAVE_DWARF_GETLOCATIONS_SUPPORT || HAVE_DWARF_CFI_SUPPORT */
+
#ifdef HAVE_DWARF_GETLOCATIONS_SUPPORT
/**
* die_get_var_innermost_scope - Get innermost scope range of given variable DIE
@@ -1280,7 +1361,7 @@ struct find_var_data {
#define DWARF_OP_DIRECT_REGS 32
static bool match_var_offset(Dwarf_Die *die_mem, struct find_var_data *data,
- u64 addr_offset, u64 addr_type)
+ u64 addr_offset, u64 addr_type, bool is_pointer)
{
Dwarf_Die type_die;
Dwarf_Word size;
@@ -1291,9 +1372,18 @@ static bool match_var_offset(Dwarf_Die *die_mem, struct find_var_data *data,
return true;
}
+ if (addr_offset < addr_type)
+ return false;
+
if (die_get_real_type(die_mem, &type_die) == NULL)
return false;
+ if (is_pointer && dwarf_tag(&type_die) == DW_TAG_pointer_type) {
+ /* Get the target type of the pointer */
+ if (die_get_real_type(&type_die, &type_die) == NULL)
+ return false;
+ }
+
if (dwarf_aggregate_size(&type_die, &size) < 0)
return false;
@@ -1359,33 +1449,39 @@ static int __die_find_var_reg_cb(Dwarf_Die *die_mem, void *arg)
/* Local variables accessed using frame base register */
if (data->is_fbreg && ops->atom == DW_OP_fbreg &&
- data->offset >= (int)ops->number &&
check_allowed_ops(ops, nops) &&
- match_var_offset(die_mem, data, data->offset, ops->number))
+ match_var_offset(die_mem, data, data->offset, ops->number,
+ /*is_pointer=*/false))
return DIE_FIND_CB_END;
/* Only match with a simple case */
if (data->reg < DWARF_OP_DIRECT_REGS) {
/* pointer variables saved in a register 0 to 31 */
if (ops->atom == (DW_OP_reg0 + data->reg) &&
- check_allowed_ops(ops, nops))
+ check_allowed_ops(ops, nops) &&
+ match_var_offset(die_mem, data, data->offset, 0,
+ /*is_pointer=*/true))
return DIE_FIND_CB_END;
/* Local variables accessed by a register + offset */
if (ops->atom == (DW_OP_breg0 + data->reg) &&
check_allowed_ops(ops, nops) &&
- match_var_offset(die_mem, data, data->offset, ops->number))
+ match_var_offset(die_mem, data, data->offset, ops->number,
+ /*is_pointer=*/false))
return DIE_FIND_CB_END;
} else {
/* pointer variables saved in a register 32 or above */
if (ops->atom == DW_OP_regx && ops->number == data->reg &&
- check_allowed_ops(ops, nops))
+ check_allowed_ops(ops, nops) &&
+ match_var_offset(die_mem, data, data->offset, 0,
+ /*is_pointer=*/true))
return DIE_FIND_CB_END;
/* Local variables accessed by a register + offset */
if (ops->atom == DW_OP_bregx && data->reg == ops->number &&
check_allowed_ops(ops, nops) &&
- match_var_offset(die_mem, data, data->offset, ops->number2))
+ match_var_offset(die_mem, data, data->offset, ops->number2,
+ /*is_poitner=*/false))
return DIE_FIND_CB_END;
}
}
@@ -1443,11 +1539,9 @@ static int __die_find_var_addr_cb(Dwarf_Die *die_mem, void *arg)
if (ops->atom != DW_OP_addr)
continue;
- if (data->addr < ops->number)
- continue;
-
if (check_allowed_ops(ops, nops) &&
- match_var_offset(die_mem, data, data->addr, ops->number))
+ match_var_offset(die_mem, data, data->addr, ops->number,
+ /*is_pointer=*/false))
return DIE_FIND_CB_END;
}
return DIE_FIND_CB_SIBLING;
@@ -1456,7 +1550,6 @@ static int __die_find_var_addr_cb(Dwarf_Die *die_mem, void *arg)
/**
* die_find_variable_by_addr - Find variable located at given address
* @sc_die: a scope DIE
- * @pc: the program address to find
* @addr: the data address to find
* @die_mem: a buffer to save the resulting DIE
* @offset: the offset in the resulting type
@@ -1464,12 +1557,10 @@ static int __die_find_var_addr_cb(Dwarf_Die *die_mem, void *arg)
* Find the variable DIE located at the given address (in PC-relative mode).
* This is usually for global variables.
*/
-Dwarf_Die *die_find_variable_by_addr(Dwarf_Die *sc_die, Dwarf_Addr pc,
- Dwarf_Addr addr, Dwarf_Die *die_mem,
- int *offset)
+Dwarf_Die *die_find_variable_by_addr(Dwarf_Die *sc_die, Dwarf_Addr addr,
+ Dwarf_Die *die_mem, int *offset)
{
struct find_var_data data = {
- .pc = pc,
.addr = addr,
};
Dwarf_Die *result;
@@ -1479,41 +1570,69 @@ Dwarf_Die *die_find_variable_by_addr(Dwarf_Die *sc_die, Dwarf_Addr pc,
*offset = data.offset;
return result;
}
-#endif /* HAVE_DWARF_GETLOCATIONS_SUPPORT */
-#ifdef HAVE_DWARF_CFI_SUPPORT
-static int reg_from_dwarf_op(Dwarf_Op *op)
+static int __die_collect_vars_cb(Dwarf_Die *die_mem, void *arg)
{
- switch (op->atom) {
- case DW_OP_reg0 ... DW_OP_reg31:
- return op->atom - DW_OP_reg0;
- case DW_OP_breg0 ... DW_OP_breg31:
- return op->atom - DW_OP_breg0;
- case DW_OP_regx:
- case DW_OP_bregx:
- return op->number;
- default:
- break;
- }
- return -1;
+ struct die_var_type **var_types = arg;
+ Dwarf_Die type_die;
+ int tag = dwarf_tag(die_mem);
+ Dwarf_Attribute attr;
+ Dwarf_Addr base, start, end;
+ Dwarf_Op *ops;
+ size_t nops;
+ struct die_var_type *vt;
+
+ if (tag != DW_TAG_variable && tag != DW_TAG_formal_parameter)
+ return DIE_FIND_CB_SIBLING;
+
+ if (dwarf_attr(die_mem, DW_AT_location, &attr) == NULL)
+ return DIE_FIND_CB_SIBLING;
+
+ /*
+ * Only collect the first location as it can reconstruct the
+ * remaining state by following the instructions.
+ * start = 0 means it covers the whole range.
+ */
+ if (dwarf_getlocations(&attr, 0, &base, &start, &end, &ops, &nops) <= 0)
+ return DIE_FIND_CB_SIBLING;
+
+ if (die_get_real_type(die_mem, &type_die) == NULL)
+ return DIE_FIND_CB_SIBLING;
+
+ vt = malloc(sizeof(*vt));
+ if (vt == NULL)
+ return DIE_FIND_CB_END;
+
+ vt->die_off = dwarf_dieoffset(&type_die);
+ vt->addr = start;
+ vt->reg = reg_from_dwarf_op(ops);
+ vt->offset = offset_from_dwarf_op(ops);
+ vt->next = *var_types;
+ *var_types = vt;
+
+ return DIE_FIND_CB_SIBLING;
}
-static int offset_from_dwarf_op(Dwarf_Op *op)
+/**
+ * die_collect_vars - Save all variables and parameters
+ * @sc_die: a scope DIE
+ * @var_types: a pointer to save the resulting list
+ *
+ * Save all variables and parameters in the @sc_die and save them to @var_types.
+ * The @var_types is a singly-linked list containing type and location info.
+ * Actual type can be retrieved using dwarf_offdie() with 'die_off' later.
+ *
+ * Callers should free @var_types.
+ */
+void die_collect_vars(Dwarf_Die *sc_die, struct die_var_type **var_types)
{
- switch (op->atom) {
- case DW_OP_reg0 ... DW_OP_reg31:
- case DW_OP_regx:
- return 0;
- case DW_OP_breg0 ... DW_OP_breg31:
- return op->number;
- case DW_OP_bregx:
- return op->number2;
- default:
- break;
- }
- return -1;
+ Dwarf_Die die_mem;
+
+ die_find_child(sc_die, __die_collect_vars_cb, (void *)var_types, &die_mem);
}
+#endif /* HAVE_DWARF_GETLOCATIONS_SUPPORT */
+#ifdef HAVE_DWARF_CFI_SUPPORT
/**
* die_get_cfa - Get frame base information
* @dwarf: a Dwarf info
@@ -1779,3 +1898,116 @@ int die_get_scopes(Dwarf_Die *cu_die, Dwarf_Addr pc, Dwarf_Die **scopes)
*scopes = data.scopes;
return data.nr;
}
+
+static int __die_find_member_offset_cb(Dwarf_Die *die_mem, void *arg)
+{
+ Dwarf_Die type_die;
+ Dwarf_Word size, loc;
+ Dwarf_Word offset = (long)arg;
+ int tag = dwarf_tag(die_mem);
+
+ if (tag != DW_TAG_member)
+ return DIE_FIND_CB_SIBLING;
+
+ /* Unions might not have location */
+ if (die_get_data_member_location(die_mem, &loc) < 0)
+ loc = 0;
+
+ if (offset == loc)
+ return DIE_FIND_CB_END;
+
+ if (die_get_real_type(die_mem, &type_die) == NULL) {
+ // TODO: add a pr_debug_dtp() later for this unlikely failure
+ return DIE_FIND_CB_SIBLING;
+ }
+
+ if (dwarf_aggregate_size(&type_die, &size) < 0)
+ size = 0;
+
+ if (loc < offset && offset < (loc + size))
+ return DIE_FIND_CB_END;
+
+ return DIE_FIND_CB_SIBLING;
+}
+
+/**
+ * die_get_member_type - Return type info of struct member
+ * @type_die: a type DIE
+ * @offset: offset in the type
+ * @die_mem: a buffer to save the resulting DIE
+ *
+ * This function returns a type of a member in @type_die where it's located at
+ * @offset if it's a struct. For now, it just returns the first matching
+ * member in a union. For other types, it'd return the given type directly
+ * if it's within the size of the type or NULL otherwise.
+ */
+Dwarf_Die *die_get_member_type(Dwarf_Die *type_die, int offset,
+ Dwarf_Die *die_mem)
+{
+ Dwarf_Die *member;
+ Dwarf_Die mb_type;
+ int tag;
+
+ tag = dwarf_tag(type_die);
+ /* If it's not a compound type, return the type directly */
+ if (tag != DW_TAG_structure_type && tag != DW_TAG_union_type) {
+ Dwarf_Word size;
+
+ if (dwarf_aggregate_size(type_die, &size) < 0)
+ size = 0;
+
+ if ((unsigned)offset >= size)
+ return NULL;
+
+ *die_mem = *type_die;
+ return die_mem;
+ }
+
+ mb_type = *type_die;
+ /* TODO: Handle union types better? */
+ while (tag == DW_TAG_structure_type || tag == DW_TAG_union_type) {
+ member = die_find_child(&mb_type, __die_find_member_offset_cb,
+ (void *)(long)offset, die_mem);
+ if (member == NULL)
+ return NULL;
+
+ if (die_get_real_type(member, &mb_type) == NULL)
+ return NULL;
+
+ tag = dwarf_tag(&mb_type);
+
+ if (tag == DW_TAG_structure_type || tag == DW_TAG_union_type) {
+ Dwarf_Word loc;
+
+ /* Update offset for the start of the member struct */
+ if (die_get_data_member_location(member, &loc) == 0)
+ offset -= loc;
+ }
+ }
+ *die_mem = mb_type;
+ return die_mem;
+}
+
+/**
+ * die_deref_ptr_type - Return type info for pointer access
+ * @ptr_die: a pointer type DIE
+ * @offset: access offset for the pointer
+ * @die_mem: a buffer to save the resulting DIE
+ *
+ * This function follows the pointer in @ptr_die with given @offset
+ * and saves the resulting type in @die_mem. If the pointer points
+ * a struct type, actual member at the offset would be returned.
+ */
+Dwarf_Die *die_deref_ptr_type(Dwarf_Die *ptr_die, int offset,
+ Dwarf_Die *die_mem)
+{
+ Dwarf_Die type_die;
+
+ if (dwarf_tag(ptr_die) != DW_TAG_pointer_type)
+ return NULL;
+
+ if (die_get_real_type(ptr_die, &type_die) == NULL)
+ return NULL;
+
+ return die_get_member_type(&type_die, offset, die_mem);
+}
diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h
index 85dd527ae1f70d..b0f25fbf966828 100644
--- a/tools/perf/util/dwarf-aux.h
+++ b/tools/perf/util/dwarf-aux.h
@@ -94,6 +94,10 @@ Dwarf_Die *die_find_top_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr,
Dwarf_Die *die_find_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr,
Dwarf_Die *die_mem);
+/* Search a non-inlined function by name and returns its return type */
+Dwarf_Die *die_find_func_rettype(Dwarf_Die *sp_die, const char *name,
+ Dwarf_Die *die_mem);
+
/* Walk on the instances of given DIE */
int die_walk_instances(Dwarf_Die *in_die,
int (*callback)(Dwarf_Die *, void *), void *data);
@@ -135,6 +139,21 @@ void die_skip_prologue(Dwarf_Die *sp_die, Dwarf_Die *cu_die,
/* Get the list of including scopes */
int die_get_scopes(Dwarf_Die *cu_die, Dwarf_Addr pc, Dwarf_Die **scopes);
+/* Variable type information */
+struct die_var_type {
+ struct die_var_type *next;
+ u64 die_off;
+ u64 addr;
+ int reg;
+ int offset;
+};
+
+/* Return type info of a member at offset */
+Dwarf_Die *die_get_member_type(Dwarf_Die *type_die, int offset, Dwarf_Die *die_mem);
+
+/* Return type info where the pointer and offset point to */
+Dwarf_Die *die_deref_ptr_type(Dwarf_Die *ptr_die, int offset, Dwarf_Die *die_mem);
+
#ifdef HAVE_DWARF_GETLOCATIONS_SUPPORT
/* Get byte offset range of given variable DIE */
@@ -146,9 +165,11 @@ Dwarf_Die *die_find_variable_by_reg(Dwarf_Die *sc_die, Dwarf_Addr pc, int reg,
Dwarf_Die *die_mem);
/* Find a (global) variable located in the 'addr' */
-Dwarf_Die *die_find_variable_by_addr(Dwarf_Die *sc_die, Dwarf_Addr pc,
- Dwarf_Addr addr, Dwarf_Die *die_mem,
- int *offset);
+Dwarf_Die *die_find_variable_by_addr(Dwarf_Die *sc_die, Dwarf_Addr addr,
+ Dwarf_Die *die_mem, int *offset);
+
+/* Save all variables and parameters in this scope */
+void die_collect_vars(Dwarf_Die *sc_die, struct die_var_type **var_types);
#else /* HAVE_DWARF_GETLOCATIONS_SUPPORT */
@@ -170,7 +191,6 @@ static inline Dwarf_Die *die_find_variable_by_reg(Dwarf_Die *sc_die __maybe_unus
}
static inline Dwarf_Die *die_find_variable_by_addr(Dwarf_Die *sc_die __maybe_unused,
- Dwarf_Addr pc __maybe_unused,
Dwarf_Addr addr __maybe_unused,
Dwarf_Die *die_mem __maybe_unused,
int *offset __maybe_unused)
@@ -178,6 +198,11 @@ static inline Dwarf_Die *die_find_variable_by_addr(Dwarf_Die *sc_die __maybe_unu
return NULL;
}
+static inline void die_collect_vars(Dwarf_Die *sc_die __maybe_unused,
+ struct die_var_type **var_types __maybe_unused)
+{
+}
+
#endif /* HAVE_DWARF_GETLOCATIONS_SUPPORT */
#ifdef HAVE_DWARF_CFI_SUPPORT
diff --git a/tools/perf/util/genelf.h b/tools/perf/util/genelf.h
index 5f18d20ea903a2..4e2e4f40e134fd 100644
--- a/tools/perf/util/genelf.h
+++ b/tools/perf/util/genelf.h
@@ -43,6 +43,9 @@ int jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_ent
#elif defined(__riscv) && __riscv_xlen == 64
#define GEN_ELF_ARCH EM_RISCV
#define GEN_ELF_CLASS ELFCLASS64
+#elif defined(__riscv) && __riscv_xlen == 32
+#define GEN_ELF_ARCH EM_RISCV
+#define GEN_ELF_CLASS ELFCLASS32
#elif defined(__loongarch__)
#define GEN_ELF_ARCH EM_LOONGARCH
#define GEN_ELF_CLASS ELFCLASS64
diff --git a/tools/perf/util/help-unknown-cmd.c b/tools/perf/util/help-unknown-cmd.c
index eab99ea6ac01e6..a0a46e34f8d142 100644
--- a/tools/perf/util/help-unknown-cmd.c
+++ b/tools/perf/util/help-unknown-cmd.c
@@ -52,46 +52,48 @@ static int add_cmd_list(struct cmdnames *cmds, struct cmdnames *old)
return 0;
}
-const char *help_unknown_cmd(const char *cmd)
+const char *help_unknown_cmd(const char *cmd, struct cmdnames *main_cmds)
{
unsigned int i, n = 0, best_similarity = 0;
- struct cmdnames main_cmds, other_cmds;
+ struct cmdnames other_cmds;
- memset(&main_cmds, 0, sizeof(main_cmds));
- memset(&other_cmds, 0, sizeof(main_cmds));
+ memset(&other_cmds, 0, sizeof(other_cmds));
perf_config(perf_unknown_cmd_config, NULL);
- load_command_list("perf-", &main_cmds, &other_cmds);
+ load_command_list("perf-", main_cmds, &other_cmds);
- if (add_cmd_list(&main_cmds, &other_cmds) < 0) {
+ if (add_cmd_list(main_cmds, &other_cmds) < 0) {
fprintf(stderr, "ERROR: Failed to allocate command list for unknown command.\n");
goto end;
}
- qsort(main_cmds.names, main_cmds.cnt,
- sizeof(main_cmds.names), cmdname_compare);
- uniq(&main_cmds);
+ qsort(main_cmds->names, main_cmds->cnt,
+ sizeof(main_cmds->names), cmdname_compare);
+ uniq(main_cmds);
- if (main_cmds.cnt) {
+ if (main_cmds->cnt) {
/* This reuses cmdname->len for similarity index */
- for (i = 0; i < main_cmds.cnt; ++i)
- main_cmds.names[i]->len =
- levenshtein(cmd, main_cmds.names[i]->name, 0, 2, 1, 4);
-
- qsort(main_cmds.names, main_cmds.cnt,
- sizeof(*main_cmds.names), levenshtein_compare);
+ for (i = 0; i < main_cmds->cnt; ++i) {
+ main_cmds->names[i]->len =
+ levenshtein(cmd, main_cmds->names[i]->name,
+ /*swap_penalty=*/0,
+ /*substition_penality=*/2,
+ /*insertion_penality=*/1,
+ /*deletion_penalty=*/1);
+ }
+ qsort(main_cmds->names, main_cmds->cnt,
+ sizeof(*main_cmds->names), levenshtein_compare);
- best_similarity = main_cmds.names[0]->len;
+ best_similarity = main_cmds->names[0]->len;
n = 1;
- while (n < main_cmds.cnt && best_similarity == main_cmds.names[n]->len)
+ while (n < main_cmds->cnt && best_similarity == main_cmds->names[n]->len)
++n;
}
if (autocorrect && n == 1) {
- const char *assumed = main_cmds.names[0]->name;
+ const char *assumed = main_cmds->names[0]->name;
- main_cmds.names[0] = NULL;
- clean_cmdnames(&main_cmds);
+ main_cmds->names[0] = NULL;
clean_cmdnames(&other_cmds);
fprintf(stderr, "WARNING: You called a perf program named '%s', "
"which does not exist.\n"
@@ -107,15 +109,14 @@ const char *help_unknown_cmd(const char *cmd)
fprintf(stderr, "perf: '%s' is not a perf-command. See 'perf --help'.\n", cmd);
- if (main_cmds.cnt && best_similarity < 6) {
+ if (main_cmds->cnt && best_similarity < 6) {
fprintf(stderr, "\nDid you mean %s?\n",
n < 2 ? "this": "one of these");
for (i = 0; i < n; i++)
- fprintf(stderr, "\t%s\n", main_cmds.names[i]->name);
+ fprintf(stderr, "\t%s\n", main_cmds->names[i]->name);
}
end:
- clean_cmdnames(&main_cmds);
clean_cmdnames(&other_cmds);
- exit(1);
+ return NULL;
}
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index fa359180ebf8fc..9d43f8ae412d85 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -308,6 +308,9 @@ static void he_stat__add_stat(struct he_stat *dest, struct he_stat *src)
dest->period_us += src->period_us;
dest->period_guest_sys += src->period_guest_sys;
dest->period_guest_us += src->period_guest_us;
+ dest->weight1 += src->weight1;
+ dest->weight2 += src->weight2;
+ dest->weight3 += src->weight3;
dest->nr_events += src->nr_events;
}
@@ -315,7 +318,9 @@ static void he_stat__decay(struct he_stat *he_stat)
{
he_stat->period = (he_stat->period * 7) / 8;
he_stat->nr_events = (he_stat->nr_events * 7) / 8;
- /* XXX need decay for weight too? */
+ he_stat->weight1 = (he_stat->weight1 * 7) / 8;
+ he_stat->weight2 = (he_stat->weight2 * 7) / 8;
+ he_stat->weight3 = (he_stat->weight3 * 7) / 8;
}
static void hists__delete_entry(struct hists *hists, struct hist_entry *he);
@@ -614,7 +619,7 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists,
cmp = hist_entry__cmp(he, entry);
if (!cmp) {
if (sample_self) {
- he_stat__add_period(&he->stat, period);
+ he_stat__add_stat(&he->stat, &entry->stat);
hist_entry__add_callchain_period(he, period);
}
if (symbol_conf.cumulate_callchain)
@@ -731,6 +736,9 @@ __hists__add_entry(struct hists *hists,
.stat = {
.nr_events = 1,
.period = sample->period,
+ .weight1 = sample->weight,
+ .weight2 = sample->ins_lat,
+ .weight3 = sample->p_stage_cyc,
},
.parent = sym_parent,
.filtered = symbol__parent_filter(sym_parent) | al->filtered,
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 4a0aea0c9e00e0..5260822b9773e1 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -4,21 +4,22 @@
#include <linux/rbtree.h>
#include <linux/types.h>
-#include "evsel.h"
+#include "callchain.h"
#include "color.h"
#include "events_stats.h"
+#include "evsel.h"
+#include "map_symbol.h"
#include "mutex.h"
+#include "sample.h"
+#include "spark.h"
+#include "stat.h"
-struct hist_entry;
-struct hist_entry_ops;
struct addr_location;
-struct map_symbol;
struct mem_info;
struct kvm_info;
struct branch_info;
struct branch_stack;
struct block_info;
-struct symbol;
struct ui_progress;
enum hist_filter {
@@ -150,6 +151,162 @@ extern const struct hist_iter_ops hist_iter_branch;
extern const struct hist_iter_ops hist_iter_mem;
extern const struct hist_iter_ops hist_iter_cumulative;
+struct res_sample {
+ u64 time;
+ int cpu;
+ int tid;
+};
+
+struct he_stat {
+ u64 period;
+ u64 period_sys;
+ u64 period_us;
+ u64 period_guest_sys;
+ u64 period_guest_us;
+ u64 weight1;
+ u64 weight2;
+ u64 weight3;
+ u32 nr_events;
+};
+
+struct namespace_id {
+ u64 dev;
+ u64 ino;
+};
+
+struct hist_entry_diff {
+ bool computed;
+ union {
+ /* PERF_HPP__DELTA */
+ double period_ratio_delta;
+
+ /* PERF_HPP__RATIO */
+ double period_ratio;
+
+ /* HISTC_WEIGHTED_DIFF */
+ s64 wdiff;
+
+ /* PERF_HPP_DIFF__CYCLES */
+ s64 cycles;
+ };
+ struct stats stats;
+ unsigned long svals[NUM_SPARKS];
+};
+
+struct hist_entry_ops {
+ void *(*new)(size_t size);
+ void (*free)(void *ptr);
+};
+
+/**
+ * struct hist_entry - histogram entry
+ *
+ * @row_offset - offset from the first callchain expanded to appear on screen
+ * @nr_rows - rows expanded in callchain, recalculated on folding/unfolding
+ */
+struct hist_entry {
+ struct rb_node rb_node_in;
+ struct rb_node rb_node;
+ union {
+ struct list_head node;
+ struct list_head head;
+ } pairs;
+ struct he_stat stat;
+ struct he_stat *stat_acc;
+ struct map_symbol ms;
+ struct thread *thread;
+ struct comm *comm;
+ struct namespace_id cgroup_id;
+ u64 cgroup;
+ u64 ip;
+ u64 transaction;
+ s32 socket;
+ s32 cpu;
+ u64 code_page_size;
+ u64 weight;
+ u64 ins_lat;
+ u64 p_stage_cyc;
+ u8 cpumode;
+ u8 depth;
+ int mem_type_off;
+ struct simd_flags simd_flags;
+
+ /* We are added by hists__add_dummy_entry. */
+ bool dummy;
+ bool leaf;
+
+ char level;
+ u8 filtered;
+
+ u16 callchain_size;
+ union {
+ /*
+ * Since perf diff only supports the stdio output, TUI
+ * fields are only accessed from perf report (or perf
+ * top). So make it a union to reduce memory usage.
+ */
+ struct hist_entry_diff diff;
+ struct /* for TUI */ {
+ u16 row_offset;
+ u16 nr_rows;
+ bool init_have_children;
+ bool unfolded;
+ bool has_children;
+ bool has_no_entry;
+ };
+ };
+ char *srcline;
+ char *srcfile;
+ struct symbol *parent;
+ struct branch_info *branch_info;
+ long time;
+ struct hists *hists;
+ struct mem_info *mem_info;
+ struct block_info *block_info;
+ struct kvm_info *kvm_info;
+ void *raw_data;
+ u32 raw_size;
+ int num_res;
+ struct res_sample *res_samples;
+ void *trace_output;
+ struct perf_hpp_list *hpp_list;
+ struct hist_entry *parent_he;
+ struct hist_entry_ops *ops;
+ struct annotated_data_type *mem_type;
+ union {
+ /* this is for hierarchical entry structure */
+ struct {
+ struct rb_root_cached hroot_in;
+ struct rb_root_cached hroot_out;
+ }; /* non-leaf entries */
+ struct rb_root sorted_chain; /* leaf entry has callchains */
+ };
+ struct callchain_root callchain[0]; /* must be last member */
+};
+
+static __pure inline bool hist_entry__has_callchains(struct hist_entry *he)
+{
+ return he->callchain_size != 0;
+}
+
+static inline bool hist_entry__has_pairs(struct hist_entry *he)
+{
+ return !list_empty(&he->pairs.node);
+}
+
+static inline struct hist_entry *hist_entry__next_pair(struct hist_entry *he)
+{
+ if (hist_entry__has_pairs(he))
+ return list_entry(he->pairs.node.next, struct hist_entry, pairs.node);
+ return NULL;
+}
+
+static inline void hist_entry__add_pair(struct hist_entry *pair,
+ struct hist_entry *he)
+{
+ list_add_tail(&pair->pairs.node, &he->pairs.head);
+}
+
struct hist_entry *hists__add_entry(struct hists *hists,
struct addr_location *al,
struct symbol *parent,
@@ -186,6 +343,8 @@ int hist_entry__sort_snprintf(struct hist_entry *he, char *bf, size_t size,
struct hists *hists);
int hist_entry__snprintf_alignment(struct hist_entry *he, struct perf_hpp *hpp,
struct perf_hpp_fmt *fmt, int printed);
+int hist_entry__sym_snprintf(struct hist_entry *he, char *bf, size_t size,
+ unsigned int width);
void hist_entry__delete(struct hist_entry *he);
typedef int (*hists__resort_cb_t)(struct hist_entry *he, void *arg);
@@ -238,6 +397,20 @@ void hists__match(struct hists *leader, struct hists *other);
int hists__link(struct hists *leader, struct hists *other);
int hists__unlink(struct hists *hists);
+static inline float hist_entry__get_percent_limit(struct hist_entry *he)
+{
+ u64 period = he->stat.period;
+ u64 total_period = hists__total_period(he->hists);
+
+ if (unlikely(total_period == 0))
+ return 0;
+
+ if (symbol_conf.cumulate_callchain)
+ period = he->stat_acc->period;
+
+ return period * 100.0 / total_period;
+}
+
struct hists_evsel {
struct evsel evsel;
struct hists hists;
@@ -377,6 +550,9 @@ enum {
PERF_HPP__OVERHEAD_ACC,
PERF_HPP__SAMPLES,
PERF_HPP__PERIOD,
+ PERF_HPP__WEIGHT1,
+ PERF_HPP__WEIGHT2,
+ PERF_HPP__WEIGHT3,
PERF_HPP__MAX_INDEX
};
@@ -423,16 +599,24 @@ void perf_hpp__reset_sort_width(struct perf_hpp_fmt *fmt, struct hists *hists);
void perf_hpp__set_user_width(const char *width_list_str);
void hists__reset_column_width(struct hists *hists);
+enum perf_hpp_fmt_type {
+ PERF_HPP_FMT_TYPE__RAW,
+ PERF_HPP_FMT_TYPE__PERCENT,
+ PERF_HPP_FMT_TYPE__AVERAGE,
+};
+
typedef u64 (*hpp_field_fn)(struct hist_entry *he);
typedef int (*hpp_callback_fn)(struct perf_hpp *hpp, bool front);
typedef int (*hpp_snprint_fn)(struct perf_hpp *hpp, const char *fmt, ...);
int hpp__fmt(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
struct hist_entry *he, hpp_field_fn get_field,
- const char *fmtstr, hpp_snprint_fn print_fn, bool fmt_percent);
+ const char *fmtstr, hpp_snprint_fn print_fn,
+ enum perf_hpp_fmt_type fmtype);
int hpp__fmt_acc(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
struct hist_entry *he, hpp_field_fn get_field,
- const char *fmtstr, hpp_snprint_fn print_fn, bool fmt_percent);
+ const char *fmtstr, hpp_snprint_fn print_fn,
+ enum perf_hpp_fmt_type fmtype);
static inline void advance_hpp(struct perf_hpp *hpp, int inc)
{
@@ -460,15 +644,20 @@ struct hist_browser_timer {
int refresh;
};
-struct res_sample;
-
enum rstype {
A_NORMAL,
A_ASM,
A_SOURCE
};
-struct block_hist;
+struct block_hist {
+ struct hists block_hists;
+ struct perf_hpp_list block_list;
+ struct perf_hpp_fmt block_fmt;
+ int block_idx;
+ bool valid;
+ struct hist_entry he;
+};
#ifdef HAVE_SLANG_SUPPORT
#include "../ui/keysyms.h"
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
index b450178e3420ba..e733f6b1f7ac58 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
@@ -1319,6 +1319,8 @@ static bool intel_pt_fup_event(struct intel_pt_decoder *decoder, bool no_tip)
bool ret = false;
decoder->state.type &= ~INTEL_PT_BRANCH;
+ decoder->state.insn_op = INTEL_PT_OP_OTHER;
+ decoder->state.insn_len = 0;
if (decoder->set_fup_cfe_ip || decoder->set_fup_cfe) {
bool ip = decoder->set_fup_cfe_ip;
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index f38893e0b03692..4db9a098f59262 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -764,6 +764,7 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
addr_location__init(&al);
intel_pt_insn->length = 0;
+ intel_pt_insn->op = INTEL_PT_OP_OTHER;
if (to_ip && *ip == to_ip)
goto out_no_cache;
@@ -898,6 +899,7 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
if (to_ip && *ip == to_ip) {
intel_pt_insn->length = 0;
+ intel_pt_insn->op = INTEL_PT_OP_OTHER;
goto out_no_cache;
}
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 527517db31821f..c5c895131bca90 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -48,13 +48,6 @@ static struct dso *machine__kernel_dso(struct machine *machine)
return map__dso(machine->vmlinux_map);
}
-static void dsos__init(struct dsos *dsos)
-{
- INIT_LIST_HEAD(&dsos->head);
- dsos->root = RB_ROOT;
- init_rwsem(&dsos->lock);
-}
-
static int machine__set_mmap_name(struct machine *machine)
{
if (machine__is_host(machine))
@@ -165,28 +158,6 @@ struct machine *machine__new_kallsyms(void)
return machine;
}
-static void dsos__purge(struct dsos *dsos)
-{
- struct dso *pos, *n;
-
- down_write(&dsos->lock);
-
- list_for_each_entry_safe(pos, n, &dsos->head, node) {
- RB_CLEAR_NODE(&pos->rb_node);
- pos->root = NULL;
- list_del_init(&pos->node);
- dso__put(pos);
- }
-
- up_write(&dsos->lock);
-}
-
-static void dsos__exit(struct dsos *dsos)
-{
- dsos__purge(dsos);
- exit_rwsem(&dsos->lock);
-}
-
void machine__delete_threads(struct machine *machine)
{
threads__remove_all_threads(&machine->threads);
@@ -675,31 +646,6 @@ int machine__process_lost_samples_event(struct machine *machine __maybe_unused,
return 0;
}
-static struct dso *machine__findnew_module_dso(struct machine *machine,
- struct kmod_path *m,
- const char *filename)
-{
- struct dso *dso;
-
- down_write(&machine->dsos.lock);
-
- dso = __dsos__find(&machine->dsos, m->name, true);
- if (!dso) {
- dso = __dsos__addnew(&machine->dsos, m->name);
- if (dso == NULL)
- goto out_unlock;
-
- dso__set_module_info(dso, m, machine);
- dso__set_long_name(dso, strdup(filename), true);
- dso->kernel = DSO_SPACE__KERNEL;
- }
-
- dso__get(dso);
-out_unlock:
- up_write(&machine->dsos.lock);
- return dso;
-}
-
int machine__process_aux_event(struct machine *machine __maybe_unused,
union perf_event *event)
{
@@ -883,7 +829,7 @@ static struct map *machine__addnew_module_map(struct machine *machine, u64 start
if (kmod_path__parse_name(&m, filename))
return NULL;
- dso = machine__findnew_module_dso(machine, &m, filename);
+ dso = dsos__findnew_module_dso(&machine->dsos, machine, &m, filename);
if (dso == NULL)
goto out;
@@ -907,11 +853,11 @@ out:
size_t machines__fprintf_dsos(struct machines *machines, FILE *fp)
{
struct rb_node *nd;
- size_t ret = __dsos__fprintf(&machines->host.dsos.head, fp);
+ size_t ret = dsos__fprintf(&machines->host.dsos, fp);
for (nd = rb_first_cached(&machines->guests); nd; nd = rb_next(nd)) {
struct machine *pos = rb_entry(nd, struct machine, rb_node);
- ret += __dsos__fprintf(&pos->dsos.head, fp);
+ ret += dsos__fprintf(&pos->dsos, fp);
}
return ret;
@@ -920,7 +866,7 @@ size_t machines__fprintf_dsos(struct machines *machines, FILE *fp)
size_t machine__fprintf_dsos_buildid(struct machine *m, FILE *fp,
bool (skip)(struct dso *dso, int parm), int parm)
{
- return __dsos__fprintf_buildid(&m->dsos.head, fp, skip, parm);
+ return dsos__fprintf_buildid(&m->dsos, fp, skip, parm);
}
size_t machines__fprintf_dsos_buildid(struct machines *machines, FILE *fp,
@@ -1549,8 +1495,8 @@ static int machine__update_kernel_mmap(struct machine *machine,
updated = map__get(orig);
machine->vmlinux_map = updated;
- machine__set_kernel_mmap(machine, start, end);
maps__remove(machine__kernel_maps(machine), orig);
+ machine__set_kernel_mmap(machine, start, end);
err = maps__insert(machine__kernel_maps(machine), updated);
map__put(orig);
@@ -1616,16 +1562,14 @@ out_put:
return ret;
}
-static bool machine__uses_kcore(struct machine *machine)
+static int machine__uses_kcore_cb(struct dso *dso, void *data __maybe_unused)
{
- struct dso *dso;
-
- list_for_each_entry(dso, &machine->dsos.head, node) {
- if (dso__is_kcore(dso))
- return true;
- }
+ return dso__is_kcore(dso) ? 1 : 0;
+}
- return false;
+static bool machine__uses_kcore(struct machine *machine)
+{
+ return dsos__for_each_dso(&machine->dsos, machine__uses_kcore_cb, NULL) != 0 ? true : false;
}
static bool perf_event__is_extra_kernel_mmap(struct machine *machine,
@@ -1692,40 +1636,7 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
* Should be there already, from the build-id table in
* the header.
*/
- struct dso *kernel = NULL;
- struct dso *dso;
-
- down_read(&machine->dsos.lock);
-
- list_for_each_entry(dso, &machine->dsos.head, node) {
-
- /*
- * The cpumode passed to is_kernel_module is not the
- * cpumode of *this* event. If we insist on passing
- * correct cpumode to is_kernel_module, we should
- * record the cpumode when we adding this dso to the
- * linked list.
- *
- * However we don't really need passing correct
- * cpumode. We know the correct cpumode must be kernel
- * mode (if not, we should not link it onto kernel_dsos
- * list).
- *
- * Therefore, we pass PERF_RECORD_MISC_CPUMODE_UNKNOWN.
- * is_kernel_module() treats it as a kernel cpumode.
- */
-
- if (!dso->kernel ||
- is_kernel_module(dso->long_name,
- PERF_RECORD_MISC_CPUMODE_UNKNOWN))
- continue;
-
-
- kernel = dso__get(dso);
- break;
- }
-
- up_read(&machine->dsos.lock);
+ struct dso *kernel = dsos__find_kernel_dso(&machine->dsos);
if (kernel == NULL)
kernel = machine__findnew_dso(machine, machine->mmap_name);
@@ -3224,16 +3135,28 @@ char *machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, ch
return sym->name;
}
+struct machine__for_each_dso_cb_args {
+ struct machine *machine;
+ machine__dso_t fn;
+ void *priv;
+};
+
+static int machine__for_each_dso_cb(struct dso *dso, void *data)
+{
+ struct machine__for_each_dso_cb_args *args = data;
+
+ return args->fn(dso, args->machine, args->priv);
+}
+
int machine__for_each_dso(struct machine *machine, machine__dso_t fn, void *priv)
{
- struct dso *pos;
- int err = 0;
+ struct machine__for_each_dso_cb_args args = {
+ .machine = machine,
+ .fn = fn,
+ .priv = priv,
+ };
- list_for_each_entry(pos, &machine->dsos.head, node) {
- if (fn(pos, machine, priv))
- err = -1;
- }
- return err;
+ return dsos__for_each_dso(&machine->dsos, machine__for_each_dso_cb, &args);
}
int machine__for_each_kernel_map(struct machine *machine, machine__map_t fn, void *priv)
@@ -3266,6 +3189,17 @@ bool machine__is_lock_function(struct machine *machine, u64 addr)
sym = machine__find_kernel_symbol_by_name(machine, "__lock_text_end", &kmap);
machine->lock.text_end = map__unmap_ip(kmap, sym->start);
+
+ sym = machine__find_kernel_symbol_by_name(machine, "__traceiter_contention_begin", &kmap);
+ if (sym) {
+ machine->traceiter.text_start = map__unmap_ip(kmap, sym->start);
+ machine->traceiter.text_end = map__unmap_ip(kmap, sym->end);
+ }
+ sym = machine__find_kernel_symbol_by_name(machine, "trace_contention_begin", &kmap);
+ if (sym) {
+ machine->trace.text_start = map__unmap_ip(kmap, sym->start);
+ machine->trace.text_end = map__unmap_ip(kmap, sym->end);
+ }
}
/* failed to get kernel symbols */
@@ -3280,5 +3214,23 @@ bool machine__is_lock_function(struct machine *machine, u64 addr)
if (machine->lock.text_start <= addr && addr < machine->lock.text_end)
return true;
+ /* traceiter functions currently don't have their own section
+ * but we consider them lock functions
+ */
+ if (machine->traceiter.text_start != 0) {
+ if (machine->traceiter.text_start <= addr && addr < machine->traceiter.text_end)
+ return true;
+ }
+
+ if (machine->trace.text_start != 0) {
+ if (machine->trace.text_start <= addr && addr < machine->trace.text_end)
+ return true;
+ }
+
return false;
}
+
+int machine__hit_all_dsos(struct machine *machine)
+{
+ return dsos__hit_all(&machine->dsos);
+}
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index e28c787616fe4e..82a47bac80233e 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -49,7 +49,7 @@ struct machine {
struct {
u64 text_start;
u64 text_end;
- } sched, lock;
+ } sched, lock, traceiter, trace;
pid_t *current_tid;
size_t current_tid_sz;
union { /* Tool specific area */
@@ -306,4 +306,6 @@ int machine__map_x86_64_entry_trampolines(struct machine *machine,
int machine__resolve(struct machine *machine, struct addr_location *al,
struct perf_sample *sample);
+int machine__hit_all_dsos(struct machine *machine);
+
#endif /* __PERF_MACHINE_H */
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 14a5ea70d81e11..cca871959f87ab 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -196,9 +196,7 @@ struct map *map__new(struct machine *machine, u64 start, u64 len,
* reading the header will have the build ID set and all future mmaps will
* have it missing.
*/
- down_read(&machine->dsos.lock);
- header_bid_dso = __dsos__find(&machine->dsos, filename, false);
- up_read(&machine->dsos.lock);
+ header_bid_dso = dsos__find(&machine->dsos, filename, false);
if (header_bid_dso && header_bid_dso->header_build_id) {
dso__set_build_id(dso, &header_bid_dso->bid);
dso->header_build_id = 1;
@@ -587,6 +585,23 @@ u64 map__objdump_2mem(struct map *map, u64 ip)
return ip + map__reloc(map);
}
+/* convert objdump address to relative address. (To be removed) */
+u64 map__objdump_2rip(struct map *map, u64 ip)
+{
+ const struct dso *dso = map__dso(map);
+
+ if (!dso->adjust_symbols)
+ return ip;
+
+ if (dso->rel)
+ return ip + map__pgoff(map);
+
+ if (dso->kernel == DSO_SPACE__USER)
+ return ip - dso->text_offset;
+
+ return map__map_ip(map, ip + map__reloc(map));
+}
+
bool map__contains_symbol(const struct map *map, const struct symbol *sym)
{
u64 ip = map__unmap_ip(map, sym->start);
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index 49756716cb1327..65e2609fa1b1ad 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -132,6 +132,9 @@ u64 map__rip_2objdump(struct map *map, u64 rip);
/* objdump address -> memory address */
u64 map__objdump_2mem(struct map *map, u64 ip);
+/* objdump address -> rip */
+u64 map__objdump_2rip(struct map *map, u64 ip);
+
struct symbol;
struct thread;
diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index 79ef6095ab2891..9be40652461792 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -455,7 +455,7 @@ static int metricgroup__add_to_mep_groups(const struct pmu_metric *pm,
const char *g;
char *omg, *mg;
- mg = strdup(pm->metric_group ?: "No_group");
+ mg = strdup(pm->metric_group ?: pm->metric_name);
if (!mg)
return -ENOMEM;
omg = mg;
@@ -466,7 +466,7 @@ static int metricgroup__add_to_mep_groups(const struct pmu_metric *pm,
if (strlen(g))
me = mep_lookup(groups, g, pm->metric_name);
else
- me = mep_lookup(groups, "No_group", pm->metric_name);
+ me = mep_lookup(groups, pm->metric_name, pm->metric_name);
if (me) {
me->metric_desc = pm->desc;
@@ -1690,12 +1690,15 @@ int metricgroup__parse_groups(struct evlist *perf_evlist,
bool metric_no_threshold,
const char *user_requested_cpu_list,
bool system_wide,
+ bool hardware_aware_grouping,
struct rblist *metric_events)
{
const struct pmu_metrics_table *table = pmu_metrics_table__find();
if (!table)
return -EINVAL;
+ if (hardware_aware_grouping)
+ pr_debug("Use hardware aware grouping instead of traditional metric grouping method\n");
return parse_groups(perf_evlist, pmu, str, metric_no_group, metric_no_merge,
metric_no_threshold, user_requested_cpu_list, system_wide,
diff --git a/tools/perf/util/metricgroup.h b/tools/perf/util/metricgroup.h
index d5325c6ec8e114..779f6ede1b5114 100644
--- a/tools/perf/util/metricgroup.h
+++ b/tools/perf/util/metricgroup.h
@@ -77,6 +77,7 @@ int metricgroup__parse_groups(struct evlist *perf_evlist,
bool metric_no_threshold,
const char *user_requested_cpu_list,
bool system_wide,
+ bool hardware_aware_grouping,
struct rblist *metric_events);
int metricgroup__parse_groups_test(struct evlist *evlist,
const struct pmu_metrics_table *table,
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 6f8b0fa176891f..0f308b4db2b972 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -34,11 +34,12 @@
#ifdef PARSER_DEBUG
extern int parse_events_debug;
#endif
-static int get_config_terms(struct parse_events_terms *head_config, struct list_head *head_terms);
+static int get_config_terms(const struct parse_events_terms *head_config,
+ struct list_head *head_terms);
static int parse_events_terms__copy(const struct parse_events_terms *src,
struct parse_events_terms *dest);
-struct event_symbol event_symbols_hw[PERF_COUNT_HW_MAX] = {
+const struct event_symbol event_symbols_hw[PERF_COUNT_HW_MAX] = {
[PERF_COUNT_HW_CPU_CYCLES] = {
.symbol = "cpu-cycles",
.alias = "cycles",
@@ -81,7 +82,7 @@ struct event_symbol event_symbols_hw[PERF_COUNT_HW_MAX] = {
},
};
-struct event_symbol event_symbols_sw[PERF_COUNT_SW_MAX] = {
+const struct event_symbol event_symbols_sw[PERF_COUNT_SW_MAX] = {
[PERF_COUNT_SW_CPU_CLOCK] = {
.symbol = "cpu-clock",
.alias = "",
@@ -154,7 +155,7 @@ const char *event_type(int type)
return "unknown";
}
-static char *get_config_str(struct parse_events_terms *head_terms,
+static char *get_config_str(const struct parse_events_terms *head_terms,
enum parse_events__term_type type_term)
{
struct parse_events_term *term;
@@ -169,12 +170,12 @@ static char *get_config_str(struct parse_events_terms *head_terms,
return NULL;
}
-static char *get_config_metric_id(struct parse_events_terms *head_terms)
+static char *get_config_metric_id(const struct parse_events_terms *head_terms)
{
return get_config_str(head_terms, PARSE_EVENTS__TERM_TYPE_METRIC_ID);
}
-static char *get_config_name(struct parse_events_terms *head_terms)
+static char *get_config_name(const struct parse_events_terms *head_terms)
{
return get_config_str(head_terms, PARSE_EVENTS__TERM_TYPE_NAME);
}
@@ -358,7 +359,7 @@ static int config_term_common(struct perf_event_attr *attr,
struct parse_events_term *term,
struct parse_events_error *err);
static int config_attr(struct perf_event_attr *attr,
- struct parse_events_terms *head,
+ const struct parse_events_terms *head,
struct parse_events_error *err,
config_term_func_t config_term);
@@ -442,17 +443,21 @@ bool parse_events__filter_pmu(const struct parse_events_state *parse_state,
return strcmp(parse_state->pmu_filter, pmu->name) != 0;
}
+static int parse_events_add_pmu(struct parse_events_state *parse_state,
+ struct list_head *list, struct perf_pmu *pmu,
+ const struct parse_events_terms *const_parsed_terms,
+ bool auto_merge_stats);
+
int parse_events_add_cache(struct list_head *list, int *idx, const char *name,
struct parse_events_state *parse_state,
- struct parse_events_terms *head_config)
+ struct parse_events_terms *parsed_terms)
{
struct perf_pmu *pmu = NULL;
bool found_supported = false;
- const char *config_name = get_config_name(head_config);
- const char *metric_id = get_config_metric_id(head_config);
+ const char *config_name = get_config_name(parsed_terms);
+ const char *metric_id = get_config_metric_id(parsed_terms);
- /* Legacy cache events are only supported by core PMUs. */
- while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
+ while ((pmu = perf_pmus__scan(pmu)) != NULL) {
LIST_HEAD(config_terms);
struct perf_event_attr attr;
int ret;
@@ -460,6 +465,24 @@ int parse_events_add_cache(struct list_head *list, int *idx, const char *name,
if (parse_events__filter_pmu(parse_state, pmu))
continue;
+ if (perf_pmu__have_event(pmu, name)) {
+ /*
+ * The PMU has the event so add as not a legacy cache
+ * event.
+ */
+ ret = parse_events_add_pmu(parse_state, list, pmu,
+ parsed_terms,
+ perf_pmu__auto_merge_stats(pmu));
+ if (ret)
+ return ret;
+ continue;
+ }
+
+ if (!pmu->is_core) {
+ /* Legacy cache events are only supported by core PMUs. */
+ continue;
+ }
+
memset(&attr, 0, sizeof(attr));
attr.type = PERF_TYPE_HW_CACHE;
@@ -469,11 +492,12 @@ int parse_events_add_cache(struct list_head *list, int *idx, const char *name,
found_supported = true;
- if (head_config) {
- if (config_attr(&attr, head_config, parse_state->error, config_term_common))
+ if (parsed_terms) {
+ if (config_attr(&attr, parsed_terms, parse_state->error,
+ config_term_common))
return -EINVAL;
- if (get_config_terms(head_config, &config_terms))
+ if (get_config_terms(parsed_terms, &config_terms))
return -ENOMEM;
}
@@ -1085,7 +1109,7 @@ static int config_term_tracepoint(struct perf_event_attr *attr,
#endif
static int config_attr(struct perf_event_attr *attr,
- struct parse_events_terms *head,
+ const struct parse_events_terms *head,
struct parse_events_error *err,
config_term_func_t config_term)
{
@@ -1098,7 +1122,8 @@ static int config_attr(struct perf_event_attr *attr,
return 0;
}
-static int get_config_terms(struct parse_events_terms *head_config, struct list_head *head_terms)
+static int get_config_terms(const struct parse_events_terms *head_config,
+ struct list_head *head_terms)
{
#define ADD_CONFIG_TERM(__type, __weak) \
struct evsel_config_term *__t; \
@@ -1302,7 +1327,7 @@ int parse_events_add_tracepoint(struct list_head *list, int *idx,
static int __parse_events_add_numeric(struct parse_events_state *parse_state,
struct list_head *list,
struct perf_pmu *pmu, u32 type, u32 extended_type,
- u64 config, struct parse_events_terms *head_config)
+ u64 config, const struct parse_events_terms *head_config)
{
struct perf_event_attr attr;
LIST_HEAD(config_terms);
@@ -1338,7 +1363,7 @@ static int __parse_events_add_numeric(struct parse_events_state *parse_state,
int parse_events_add_numeric(struct parse_events_state *parse_state,
struct list_head *list,
u32 type, u64 config,
- struct parse_events_terms *head_config,
+ const struct parse_events_terms *head_config,
bool wildcard)
{
struct perf_pmu *pmu = NULL;
@@ -1385,56 +1410,34 @@ static bool config_term_percore(struct list_head *config_terms)
return false;
}
-int parse_events_add_pmu(struct parse_events_state *parse_state,
- struct list_head *list, const char *name,
- const struct parse_events_terms *const_parsed_terms,
- bool auto_merge_stats, void *loc_)
+static int parse_events_add_pmu(struct parse_events_state *parse_state,
+ struct list_head *list, struct perf_pmu *pmu,
+ const struct parse_events_terms *const_parsed_terms,
+ bool auto_merge_stats)
{
struct perf_event_attr attr;
struct perf_pmu_info info;
- struct perf_pmu *pmu;
struct evsel *evsel;
struct parse_events_error *err = parse_state->error;
- YYLTYPE *loc = loc_;
LIST_HEAD(config_terms);
struct parse_events_terms parsed_terms;
bool alias_rewrote_terms = false;
- pmu = parse_state->fake_pmu ?: perf_pmus__find(name);
-
- if (!pmu) {
- char *err_str;
-
- if (asprintf(&err_str,
- "Cannot find PMU `%s'. Missing kernel support?",
- name) >= 0)
- parse_events_error__handle(err, loc->first_column, err_str, NULL);
- return -EINVAL;
- }
-
- parse_events_terms__init(&parsed_terms);
- if (const_parsed_terms) {
- int ret = parse_events_terms__copy(const_parsed_terms, &parsed_terms);
-
- if (ret)
- return ret;
- }
-
if (verbose > 1) {
struct strbuf sb;
strbuf_init(&sb, /*hint=*/ 0);
- if (pmu->selectable && list_empty(&parsed_terms.terms)) {
- strbuf_addf(&sb, "%s//", name);
+ if (pmu->selectable && const_parsed_terms &&
+ list_empty(&const_parsed_terms->terms)) {
+ strbuf_addf(&sb, "%s//", pmu->name);
} else {
- strbuf_addf(&sb, "%s/", name);
- parse_events_terms__to_strbuf(&parsed_terms, &sb);
+ strbuf_addf(&sb, "%s/", pmu->name);
+ parse_events_terms__to_strbuf(const_parsed_terms, &sb);
strbuf_addch(&sb, '/');
}
fprintf(stderr, "Attempt to add: %s\n", sb.buf);
strbuf_release(&sb);
}
- fix_raw(&parsed_terms, pmu);
memset(&attr, 0, sizeof(attr));
if (pmu->perf_event_attr_init_default)
@@ -1442,7 +1445,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
attr.type = pmu->type;
- if (list_empty(&parsed_terms.terms)) {
+ if (!const_parsed_terms || list_empty(&const_parsed_terms->terms)) {
evsel = __add_event(list, &parse_state->idx, &attr,
/*init_attr=*/true, /*name=*/NULL,
/*metric_id=*/NULL, pmu,
@@ -1451,6 +1454,15 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
return evsel ? 0 : -ENOMEM;
}
+ parse_events_terms__init(&parsed_terms);
+ if (const_parsed_terms) {
+ int ret = parse_events_terms__copy(const_parsed_terms, &parsed_terms);
+
+ if (ret)
+ return ret;
+ }
+ fix_raw(&parsed_terms, pmu);
+
/* Configure attr/terms with a known PMU, this will set hardcoded terms. */
if (config_attr(&attr, &parsed_terms, parse_state->error, config_term_pmu)) {
parse_events_terms__exit(&parsed_terms);
@@ -1469,7 +1481,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
strbuf_init(&sb, /*hint=*/ 0);
parse_events_terms__to_strbuf(&parsed_terms, &sb);
- fprintf(stderr, "..after resolving event: %s/%s/\n", name, sb.buf);
+ fprintf(stderr, "..after resolving event: %s/%s/\n", pmu->name, sb.buf);
strbuf_release(&sb);
}
@@ -1531,7 +1543,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
}
int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
- const char *event_name,
+ const char *event_name, u64 hw_config,
const struct parse_events_terms *const_parsed_terms,
struct list_head **listp, void *loc_)
{
@@ -1539,8 +1551,8 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
struct list_head *list = NULL;
struct perf_pmu *pmu = NULL;
YYLTYPE *loc = loc_;
- int ok = 0;
- const char *config;
+ int ok = 0, core_ok = 0;
+ const char *tmp;
struct parse_events_terms parsed_terms;
*listp = NULL;
@@ -1553,15 +1565,15 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
return ret;
}
- config = strdup(event_name);
- if (!config)
+ tmp = strdup(event_name);
+ if (!tmp)
goto out_err;
if (parse_events_term__num(&term,
PARSE_EVENTS__TERM_TYPE_USER,
- config, /*num=*/1, /*novalue=*/true,
+ tmp, /*num=*/1, /*novalue=*/true,
loc, /*loc_val=*/NULL) < 0) {
- zfree(&config);
+ zfree(&tmp);
goto out_err;
}
list_add_tail(&term->list, &parsed_terms.terms);
@@ -1583,8 +1595,8 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
continue;
auto_merge_stats = perf_pmu__auto_merge_stats(pmu);
- if (!parse_events_add_pmu(parse_state, list, pmu->name,
- &parsed_terms, auto_merge_stats, loc)) {
+ if (!parse_events_add_pmu(parse_state, list, pmu,
+ &parsed_terms, auto_merge_stats)) {
struct strbuf sb;
strbuf_init(&sb, /*hint=*/ 0);
@@ -1592,12 +1604,14 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
pr_debug("%s -> %s/%s/\n", event_name, pmu->name, sb.buf);
strbuf_release(&sb);
ok++;
+ if (pmu->is_core)
+ core_ok++;
}
}
if (parse_state->fake_pmu) {
- if (!parse_events_add_pmu(parse_state, list, event_name, &parsed_terms,
- /*auto_merge_stats=*/true, loc)) {
+ if (!parse_events_add_pmu(parse_state, list, parse_state->fake_pmu, &parsed_terms,
+ /*auto_merge_stats=*/true)) {
struct strbuf sb;
strbuf_init(&sb, /*hint=*/ 0);
@@ -1608,6 +1622,18 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
}
}
+ if (hw_config != PERF_COUNT_HW_MAX && !core_ok) {
+ /*
+ * The event wasn't found on core PMUs but it has a hardware
+ * config version to try.
+ */
+ if (!parse_events_add_numeric(parse_state, list,
+ PERF_TYPE_HARDWARE, hw_config,
+ const_parsed_terms,
+ /*wildcard=*/true))
+ ok++;
+ }
+
out_err:
parse_events_terms__exit(&parsed_terms);
if (ok)
@@ -1618,10 +1644,60 @@ out_err:
return ok ? 0 : -1;
}
-int parse_events__modifier_group(struct list_head *list,
- char *event_mod)
+int parse_events_multi_pmu_add_or_add_pmu(struct parse_events_state *parse_state,
+ const char *event_or_pmu,
+ const struct parse_events_terms *const_parsed_terms,
+ struct list_head **listp,
+ void *loc_)
{
- return parse_events__modifier_event(list, event_mod, true);
+ YYLTYPE *loc = loc_;
+ struct perf_pmu *pmu;
+ int ok = 0;
+ char *help;
+
+ *listp = malloc(sizeof(**listp));
+ if (!*listp)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(*listp);
+
+ /* Attempt to add to list assuming event_or_pmu is a PMU name. */
+ pmu = parse_state->fake_pmu ?: perf_pmus__find(event_or_pmu);
+ if (pmu && !parse_events_add_pmu(parse_state, *listp, pmu, const_parsed_terms,
+ /*auto_merge_stats=*/false))
+ return 0;
+
+ pmu = NULL;
+ /* Failed to add, try wildcard expansion of event_or_pmu as a PMU name. */
+ while ((pmu = perf_pmus__scan(pmu)) != NULL) {
+ if (!parse_events__filter_pmu(parse_state, pmu) &&
+ perf_pmu__match(pmu, event_or_pmu)) {
+ bool auto_merge_stats = perf_pmu__auto_merge_stats(pmu);
+
+ if (!parse_events_add_pmu(parse_state, *listp, pmu,
+ const_parsed_terms,
+ auto_merge_stats)) {
+ ok++;
+ parse_state->wild_card_pmus = true;
+ }
+ }
+ }
+ if (ok)
+ return 0;
+
+ /* Failure to add, assume event_or_pmu is an event name. */
+ zfree(listp);
+ if (!parse_events_multi_pmu_add(parse_state, event_or_pmu, PERF_COUNT_HW_MAX,
+ const_parsed_terms, listp, loc))
+ return 0;
+
+ if (asprintf(&help, "Unable to find PMU or event on a PMU of '%s'", event_or_pmu) < 0)
+ help = NULL;
+ parse_events_error__handle(parse_state->error, loc->first_column,
+ strdup("Bad event or PMU"),
+ help);
+ zfree(listp);
+ return -EINVAL;
}
void parse_events__set_leader(char *name, struct list_head *list)
@@ -1635,213 +1711,146 @@ void parse_events__set_leader(char *name, struct list_head *list)
leader = list_first_entry(list, struct evsel, core.node);
__perf_evlist__set_leader(list, &leader->core);
+ zfree(&leader->group_name);
leader->group_name = name;
}
-/* list_event is assumed to point to malloc'ed memory */
-void parse_events_update_lists(struct list_head *list_event,
- struct list_head *list_all)
+static int parse_events__modifier_list(struct parse_events_state *parse_state,
+ YYLTYPE *loc,
+ struct list_head *list,
+ struct parse_events_modifier mod,
+ bool group)
{
- /*
- * Called for single event definition. Update the
- * 'all event' list, and reinit the 'single event'
- * list, for next event definition.
- */
- list_splice_tail(list_event, list_all);
- free(list_event);
-}
-
-struct event_modifier {
- int eu;
- int ek;
- int eh;
- int eH;
- int eG;
- int eI;
- int precise;
- int precise_max;
- int exclude_GH;
- int sample_read;
- int pinned;
- int weak;
- int exclusive;
- int bpf_counter;
-};
+ struct evsel *evsel;
-static int get_event_modifier(struct event_modifier *mod, char *str,
- struct evsel *evsel)
-{
- int eu = evsel ? evsel->core.attr.exclude_user : 0;
- int ek = evsel ? evsel->core.attr.exclude_kernel : 0;
- int eh = evsel ? evsel->core.attr.exclude_hv : 0;
- int eH = evsel ? evsel->core.attr.exclude_host : 0;
- int eG = evsel ? evsel->core.attr.exclude_guest : 0;
- int eI = evsel ? evsel->core.attr.exclude_idle : 0;
- int precise = evsel ? evsel->core.attr.precise_ip : 0;
- int precise_max = 0;
- int sample_read = 0;
- int pinned = evsel ? evsel->core.attr.pinned : 0;
- int exclusive = evsel ? evsel->core.attr.exclusive : 0;
-
- int exclude = eu | ek | eh;
- int exclude_GH = evsel ? evsel->exclude_GH : 0;
- int weak = 0;
- int bpf_counter = 0;
-
- memset(mod, 0, sizeof(*mod));
-
- while (*str) {
- if (*str == 'u') {
+ if (!group && mod.weak) {
+ parse_events_error__handle(parse_state->error, loc->first_column,
+ strdup("Weak modifier is for use with groups"), NULL);
+ return -EINVAL;
+ }
+
+ __evlist__for_each_entry(list, evsel) {
+ /* Translate modifiers into the equivalent evsel excludes. */
+ int eu = group ? evsel->core.attr.exclude_user : 0;
+ int ek = group ? evsel->core.attr.exclude_kernel : 0;
+ int eh = group ? evsel->core.attr.exclude_hv : 0;
+ int eH = group ? evsel->core.attr.exclude_host : 0;
+ int eG = group ? evsel->core.attr.exclude_guest : 0;
+ int exclude = eu | ek | eh;
+ int exclude_GH = group ? evsel->exclude_GH : 0;
+
+ if (mod.precise) {
+ /* use of precise requires exclude_guest */
+ eG = 1;
+ }
+ if (mod.user) {
if (!exclude)
exclude = eu = ek = eh = 1;
if (!exclude_GH && !perf_guest)
eG = 1;
eu = 0;
- } else if (*str == 'k') {
+ }
+ if (mod.kernel) {
if (!exclude)
exclude = eu = ek = eh = 1;
ek = 0;
- } else if (*str == 'h') {
+ }
+ if (mod.hypervisor) {
if (!exclude)
exclude = eu = ek = eh = 1;
eh = 0;
- } else if (*str == 'G') {
+ }
+ if (mod.guest) {
if (!exclude_GH)
exclude_GH = eG = eH = 1;
eG = 0;
- } else if (*str == 'H') {
+ }
+ if (mod.host) {
if (!exclude_GH)
exclude_GH = eG = eH = 1;
eH = 0;
- } else if (*str == 'I') {
- eI = 1;
- } else if (*str == 'p') {
- precise++;
- /* use of precise requires exclude_guest */
- if (!exclude_GH)
- eG = 1;
- } else if (*str == 'P') {
- precise_max = 1;
- } else if (*str == 'S') {
- sample_read = 1;
- } else if (*str == 'D') {
- pinned = 1;
- } else if (*str == 'e') {
- exclusive = 1;
- } else if (*str == 'W') {
- weak = 1;
- } else if (*str == 'b') {
- bpf_counter = 1;
- } else
- break;
-
- ++str;
+ }
+ evsel->core.attr.exclude_user = eu;
+ evsel->core.attr.exclude_kernel = ek;
+ evsel->core.attr.exclude_hv = eh;
+ evsel->core.attr.exclude_host = eH;
+ evsel->core.attr.exclude_guest = eG;
+ evsel->exclude_GH = exclude_GH;
+
+ /* Simple modifiers copied to the evsel. */
+ if (mod.precise) {
+ u8 precise = evsel->core.attr.precise_ip + mod.precise;
+ /*
+ * precise ip:
+ *
+ * 0 - SAMPLE_IP can have arbitrary skid
+ * 1 - SAMPLE_IP must have constant skid
+ * 2 - SAMPLE_IP requested to have 0 skid
+ * 3 - SAMPLE_IP must have 0 skid
+ *
+ * See also PERF_RECORD_MISC_EXACT_IP
+ */
+ if (precise > 3) {
+ char *help;
+
+ if (asprintf(&help,
+ "Maximum combined precise value is 3, adding precision to \"%s\"",
+ evsel__name(evsel)) > 0) {
+ parse_events_error__handle(parse_state->error,
+ loc->first_column,
+ help, NULL);
+ }
+ return -EINVAL;
+ }
+ evsel->core.attr.precise_ip = precise;
+ }
+ if (mod.precise_max)
+ evsel->precise_max = 1;
+ if (mod.non_idle)
+ evsel->core.attr.exclude_idle = 1;
+ if (mod.sample_read)
+ evsel->sample_read = 1;
+ if (mod.pinned && evsel__is_group_leader(evsel))
+ evsel->core.attr.pinned = 1;
+ if (mod.exclusive && evsel__is_group_leader(evsel))
+ evsel->core.attr.exclusive = 1;
+ if (mod.weak)
+ evsel->weak_group = true;
+ if (mod.bpf)
+ evsel->bpf_counter = true;
}
-
- /*
- * precise ip:
- *
- * 0 - SAMPLE_IP can have arbitrary skid
- * 1 - SAMPLE_IP must have constant skid
- * 2 - SAMPLE_IP requested to have 0 skid
- * 3 - SAMPLE_IP must have 0 skid
- *
- * See also PERF_RECORD_MISC_EXACT_IP
- */
- if (precise > 3)
- return -EINVAL;
-
- mod->eu = eu;
- mod->ek = ek;
- mod->eh = eh;
- mod->eH = eH;
- mod->eG = eG;
- mod->eI = eI;
- mod->precise = precise;
- mod->precise_max = precise_max;
- mod->exclude_GH = exclude_GH;
- mod->sample_read = sample_read;
- mod->pinned = pinned;
- mod->weak = weak;
- mod->bpf_counter = bpf_counter;
- mod->exclusive = exclusive;
-
return 0;
}
-/*
- * Basic modifier sanity check to validate it contains only one
- * instance of any modifier (apart from 'p') present.
- */
-static int check_modifier(char *str)
+int parse_events__modifier_group(struct parse_events_state *parse_state, void *loc,
+ struct list_head *list,
+ struct parse_events_modifier mod)
{
- char *p = str;
-
- /* The sizeof includes 0 byte as well. */
- if (strlen(str) > (sizeof("ukhGHpppPSDIWeb") - 1))
- return -1;
-
- while (*p) {
- if (*p != 'p' && strchr(p + 1, *p))
- return -1;
- p++;
- }
-
- return 0;
+ return parse_events__modifier_list(parse_state, loc, list, mod, /*group=*/true);
}
-int parse_events__modifier_event(struct list_head *list, char *str, bool add)
+int parse_events__modifier_event(struct parse_events_state *parse_state, void *loc,
+ struct list_head *list,
+ struct parse_events_modifier mod)
{
- struct evsel *evsel;
- struct event_modifier mod;
-
- if (str == NULL)
- return 0;
-
- if (check_modifier(str))
- return -EINVAL;
-
- if (!add && get_event_modifier(&mod, str, NULL))
- return -EINVAL;
-
- __evlist__for_each_entry(list, evsel) {
- if (add && get_event_modifier(&mod, str, evsel))
- return -EINVAL;
-
- evsel->core.attr.exclude_user = mod.eu;
- evsel->core.attr.exclude_kernel = mod.ek;
- evsel->core.attr.exclude_hv = mod.eh;
- evsel->core.attr.precise_ip = mod.precise;
- evsel->core.attr.exclude_host = mod.eH;
- evsel->core.attr.exclude_guest = mod.eG;
- evsel->core.attr.exclude_idle = mod.eI;
- evsel->exclude_GH = mod.exclude_GH;
- evsel->sample_read = mod.sample_read;
- evsel->precise_max = mod.precise_max;
- evsel->weak_group = mod.weak;
- evsel->bpf_counter = mod.bpf_counter;
-
- if (evsel__is_group_leader(evsel)) {
- evsel->core.attr.pinned = mod.pinned;
- evsel->core.attr.exclusive = mod.exclusive;
- }
- }
-
- return 0;
+ return parse_events__modifier_list(parse_state, loc, list, mod, /*group=*/false);
}
-int parse_events_name(struct list_head *list, const char *name)
+int parse_events__set_default_name(struct list_head *list, char *name)
{
struct evsel *evsel;
+ bool used_name = false;
__evlist__for_each_entry(list, evsel) {
if (!evsel->name) {
- evsel->name = strdup(name);
+ evsel->name = used_name ? strdup(name) : name;
+ used_name = true;
if (!evsel->name)
return -ENOMEM;
}
}
-
+ if (!used_name)
+ free(name);
return 0;
}
@@ -2691,15 +2700,6 @@ int parse_events_terms__to_strbuf(const struct parse_events_terms *terms, struct
return 0;
}
-void parse_events_evlist_error(struct parse_events_state *parse_state,
- int idx, const char *str)
-{
- if (!parse_state->error)
- return;
-
- parse_events_error__handle(parse_state->error, idx, strdup(str), NULL);
-}
-
static void config_terms_list(char *buf, size_t buf_sz)
{
int i;
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 809359e8544ef3..5695308efab981 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -186,9 +186,28 @@ void parse_events_terms__init(struct parse_events_terms *terms);
void parse_events_terms__exit(struct parse_events_terms *terms);
int parse_events_terms(struct parse_events_terms *terms, const char *str, FILE *input);
int parse_events_terms__to_strbuf(const struct parse_events_terms *terms, struct strbuf *sb);
-int parse_events__modifier_event(struct list_head *list, char *str, bool add);
-int parse_events__modifier_group(struct list_head *list, char *event_mod);
-int parse_events_name(struct list_head *list, const char *name);
+
+struct parse_events_modifier {
+ u8 precise; /* Number of repeated 'p' for precision. */
+ bool precise_max : 1; /* 'P' */
+ bool non_idle : 1; /* 'I' */
+ bool sample_read : 1; /* 'S' */
+ bool pinned : 1; /* 'D' */
+ bool exclusive : 1; /* 'e' */
+ bool weak : 1; /* 'W' */
+ bool bpf : 1; /* 'b' */
+ bool user : 1; /* 'u' */
+ bool kernel : 1; /* 'k' */
+ bool hypervisor : 1; /* 'h' */
+ bool guest : 1; /* 'G' */
+ bool host : 1; /* 'H' */
+};
+
+int parse_events__modifier_event(struct parse_events_state *parse_state, void *loc,
+ struct list_head *list, struct parse_events_modifier mod);
+int parse_events__modifier_group(struct parse_events_state *parse_state, void *loc,
+ struct list_head *list, struct parse_events_modifier mod);
+int parse_events__set_default_name(struct list_head *list, char *name);
int parse_events_add_tracepoint(struct list_head *list, int *idx,
const char *sys, const char *event,
struct parse_events_error *error,
@@ -196,45 +215,43 @@ int parse_events_add_tracepoint(struct list_head *list, int *idx,
int parse_events_add_numeric(struct parse_events_state *parse_state,
struct list_head *list,
u32 type, u64 config,
- struct parse_events_terms *head_config,
+ const struct parse_events_terms *head_config,
bool wildcard);
int parse_events_add_tool(struct parse_events_state *parse_state,
struct list_head *list,
int tool_event);
int parse_events_add_cache(struct list_head *list, int *idx, const char *name,
struct parse_events_state *parse_state,
- struct parse_events_terms *head_config);
+ struct parse_events_terms *parsed_terms);
int parse_events__decode_legacy_cache(const char *name, int pmu_type, __u64 *config);
int parse_events_add_breakpoint(struct parse_events_state *parse_state,
struct list_head *list,
u64 addr, char *type, u64 len,
struct parse_events_terms *head_config);
-int parse_events_add_pmu(struct parse_events_state *parse_state,
- struct list_head *list, const char *name,
- const struct parse_events_terms *const_parsed_terms,
- bool auto_merge_stats, void *loc);
struct evsel *parse_events__add_event(int idx, struct perf_event_attr *attr,
const char *name, const char *metric_id,
struct perf_pmu *pmu);
int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
- const char *event_name,
+ const char *event_name, u64 hw_config,
const struct parse_events_terms *const_parsed_terms,
struct list_head **listp, void *loc);
+int parse_events_multi_pmu_add_or_add_pmu(struct parse_events_state *parse_state,
+ const char *event_or_pmu,
+ const struct parse_events_terms *const_parsed_terms,
+ struct list_head **listp,
+ void *loc_);
+
void parse_events__set_leader(char *name, struct list_head *list);
-void parse_events_update_lists(struct list_head *list_event,
- struct list_head *list_all);
-void parse_events_evlist_error(struct parse_events_state *parse_state,
- int idx, const char *str);
struct event_symbol {
const char *symbol;
const char *alias;
};
-extern struct event_symbol event_symbols_hw[];
-extern struct event_symbol event_symbols_sw[];
+extern const struct event_symbol event_symbols_hw[];
+extern const struct event_symbol event_symbols_sw[];
char *parse_events_formats_error_string(char *additional_terms);
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index e86c45675e1d50..08ea2d845dc361 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -18,26 +18,34 @@
char *parse_events_get_text(yyscan_t yyscanner);
YYSTYPE *parse_events_get_lval(yyscan_t yyscanner);
+int parse_events_get_column(yyscan_t yyscanner);
+int parse_events_get_leng(yyscan_t yyscanner);
-static int __value(YYSTYPE *yylval, char *str, int base, int token)
+static int get_column(yyscan_t scanner)
{
- u64 num;
-
- errno = 0;
- num = strtoull(str, NULL, base);
- if (errno)
- return PE_ERROR;
-
- yylval->num = num;
- return token;
+ return parse_events_get_column(scanner) - parse_events_get_leng(scanner);
}
-static int value(yyscan_t scanner, int base)
+static int value(struct parse_events_state *parse_state, yyscan_t scanner, int base)
{
YYSTYPE *yylval = parse_events_get_lval(scanner);
char *text = parse_events_get_text(scanner);
+ u64 num;
- return __value(yylval, text, base, PE_VALUE);
+ errno = 0;
+ num = strtoull(text, NULL, base);
+ if (errno) {
+ struct parse_events_error *error = parse_state->error;
+ char *help = NULL;
+
+ if (asprintf(&help, "Bad base %d number \"%s\"", base, text) > 0)
+ parse_events_error__handle(error, get_column(scanner), help , NULL);
+
+ return PE_ERROR;
+ }
+
+ yylval->num = num;
+ return PE_VALUE;
}
static int str(yyscan_t scanner, int token)
@@ -88,6 +96,11 @@ static int drv_str(yyscan_t scanner, int token)
return token;
}
+/*
+ * Use yyless to return all the characaters to the input. Update the column for
+ * location debugging. If __alloc is non-zero set yylval to the text for the
+ * returned token's value.
+ */
#define REWIND(__alloc) \
do { \
YYSTYPE *__yylval = parse_events_get_lval(yyscanner); \
@@ -100,12 +113,12 @@ do { \
yyless(0); \
} while (0)
-static int sym(yyscan_t scanner, int type, int config)
+static int sym(yyscan_t scanner, int config)
{
YYSTYPE *yylval = parse_events_get_lval(scanner);
- yylval->num = (type << 16) + config;
- return type == PERF_TYPE_HARDWARE ? PE_VALUE_SYM_HW : PE_VALUE_SYM_SW;
+ yylval->num = config;
+ return PE_VALUE_SYM_SW;
}
static int tool(yyscan_t scanner, enum perf_tool_event event)
@@ -124,16 +137,87 @@ static int term(yyscan_t scanner, enum parse_events__term_type type)
return PE_TERM;
}
-static int hw_term(yyscan_t scanner, int config)
+static int hw(yyscan_t scanner, int config)
{
YYSTYPE *yylval = parse_events_get_lval(scanner);
char *text = parse_events_get_text(scanner);
- yylval->hardware_term.str = strdup(text);
- yylval->hardware_term.num = PERF_TYPE_HARDWARE + config;
+ yylval->hardware_event.str = strdup(text);
+ yylval->hardware_event.num = config;
return PE_TERM_HW;
}
+static void modifiers_error(struct parse_events_state *parse_state, yyscan_t scanner,
+ int pos, char mod_char, const char *mod_name)
+{
+ struct parse_events_error *error = parse_state->error;
+ char *help = NULL;
+
+ if (asprintf(&help, "Duplicate modifier '%c' (%s)", mod_char, mod_name) > 0)
+ parse_events_error__handle(error, get_column(scanner) + pos, help , NULL);
+}
+
+static int modifiers(struct parse_events_state *parse_state, yyscan_t scanner)
+{
+ YYSTYPE *yylval = parse_events_get_lval(scanner);
+ char *text = parse_events_get_text(scanner);
+ struct parse_events_modifier mod = { .precise = 0, };
+
+ for (size_t i = 0, n = strlen(text); i < n; i++) {
+#define CASE(c, field) \
+ case c: \
+ if (mod.field) { \
+ modifiers_error(parse_state, scanner, i, c, #field); \
+ return PE_ERROR; \
+ } \
+ mod.field = true; \
+ break
+
+ switch (text[i]) {
+ CASE('u', user);
+ CASE('k', kernel);
+ CASE('h', hypervisor);
+ CASE('I', non_idle);
+ CASE('G', guest);
+ CASE('H', host);
+ case 'p':
+ mod.precise++;
+ /*
+ * precise ip:
+ *
+ * 0 - SAMPLE_IP can have arbitrary skid
+ * 1 - SAMPLE_IP must have constant skid
+ * 2 - SAMPLE_IP requested to have 0 skid
+ * 3 - SAMPLE_IP must have 0 skid
+ *
+ * See also PERF_RECORD_MISC_EXACT_IP
+ */
+ if (mod.precise > 3) {
+ struct parse_events_error *error = parse_state->error;
+ char *help = strdup("Maximum precise value is 3");
+
+ if (help) {
+ parse_events_error__handle(error, get_column(scanner) + i,
+ help , NULL);
+ }
+ return PE_ERROR;
+ }
+ break;
+ CASE('P', precise_max);
+ CASE('S', sample_read);
+ CASE('D', pinned);
+ CASE('W', weak);
+ CASE('e', exclusive);
+ CASE('b', bpf);
+ default:
+ return PE_ERROR;
+ }
+#undef CASE
+ }
+ yylval->mod = mod;
+ return PE_MODIFIER_EVENT;
+}
+
#define YY_USER_ACTION \
do { \
yylloc->last_column = yylloc->first_column; \
@@ -166,7 +250,7 @@ drv_cfg_term [a-zA-Z0-9_\.]+(=[a-zA-Z0-9_*?\.:]+)?
* If you add a modifier you need to update check_modifier().
* Also, the letters in modifier_event must not be in modifier_bp.
*/
-modifier_event [ukhpPGHSDIWeb]+
+modifier_event [ukhpPGHSDIWeb]{1,15}
modifier_bp [rwx]{1,3}
lc_type (L1-dcache|l1-d|l1d|L1-data|L1-icache|l1-i|l1i|L1-instruction|LLC|L2|dTLB|d-tlb|Data-TLB|iTLB|i-tlb|Instruction-TLB|branch|branches|bpu|btb|bpc|node)
lc_op_result (load|loads|read|store|stores|write|prefetch|prefetches|speculative-read|speculative-load|refs|Reference|ops|access|misses|miss)
@@ -246,16 +330,16 @@ percore { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_PERCORE); }
aux-output { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT); }
aux-sample-size { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE); }
metric-id { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_METRIC_ID); }
-cpu-cycles|cycles { return hw_term(yyscanner, PERF_COUNT_HW_CPU_CYCLES); }
-stalled-cycles-frontend|idle-cycles-frontend { return hw_term(yyscanner, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND); }
-stalled-cycles-backend|idle-cycles-backend { return hw_term(yyscanner, PERF_COUNT_HW_STALLED_CYCLES_BACKEND); }
-instructions { return hw_term(yyscanner, PERF_COUNT_HW_INSTRUCTIONS); }
-cache-references { return hw_term(yyscanner, PERF_COUNT_HW_CACHE_REFERENCES); }
-cache-misses { return hw_term(yyscanner, PERF_COUNT_HW_CACHE_MISSES); }
-branch-instructions|branches { return hw_term(yyscanner, PERF_COUNT_HW_BRANCH_INSTRUCTIONS); }
-branch-misses { return hw_term(yyscanner, PERF_COUNT_HW_BRANCH_MISSES); }
-bus-cycles { return hw_term(yyscanner, PERF_COUNT_HW_BUS_CYCLES); }
-ref-cycles { return hw_term(yyscanner, PERF_COUNT_HW_REF_CPU_CYCLES); }
+cpu-cycles|cycles { return hw(yyscanner, PERF_COUNT_HW_CPU_CYCLES); }
+stalled-cycles-frontend|idle-cycles-frontend { return hw(yyscanner, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND); }
+stalled-cycles-backend|idle-cycles-backend { return hw(yyscanner, PERF_COUNT_HW_STALLED_CYCLES_BACKEND); }
+instructions { return hw(yyscanner, PERF_COUNT_HW_INSTRUCTIONS); }
+cache-references { return hw(yyscanner, PERF_COUNT_HW_CACHE_REFERENCES); }
+cache-misses { return hw(yyscanner, PERF_COUNT_HW_CACHE_MISSES); }
+branch-instructions|branches { return hw(yyscanner, PERF_COUNT_HW_BRANCH_INSTRUCTIONS); }
+branch-misses { return hw(yyscanner, PERF_COUNT_HW_BRANCH_MISSES); }
+bus-cycles { return hw(yyscanner, PERF_COUNT_HW_BUS_CYCLES); }
+ref-cycles { return hw(yyscanner, PERF_COUNT_HW_REF_CPU_CYCLES); }
r{num_raw_hex} { return str(yyscanner, PE_RAW); }
r0x{num_raw_hex} { return str(yyscanner, PE_RAW); }
, { return ','; }
@@ -283,8 +367,8 @@ r0x{num_raw_hex} { return str(yyscanner, PE_RAW); }
*/
"/"/{digit} { return PE_BP_SLASH; }
"/"/{non_digit} { BEGIN(config); return '/'; }
-{num_dec} { return value(yyscanner, 10); }
-{num_hex} { return value(yyscanner, 16); }
+{num_dec} { return value(_parse_state, yyscanner, 10); }
+{num_hex} { return value(_parse_state, yyscanner, 16); }
/*
* We need to separate 'mem:' scanner part, in order to get specific
* modifier bits parsed out. Otherwise we would need to handle PE_NAME
@@ -299,41 +383,41 @@ r0x{num_raw_hex} { return str(yyscanner, PE_RAW); }
<<EOF>> { BEGIN(INITIAL); }
}
-cpu-cycles|cycles { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES); }
-stalled-cycles-frontend|idle-cycles-frontend { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND); }
-stalled-cycles-backend|idle-cycles-backend { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_BACKEND); }
-instructions { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS); }
-cache-references { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES); }
-cache-misses { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES); }
-branch-instructions|branches { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_INSTRUCTIONS); }
-branch-misses { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_MISSES); }
-bus-cycles { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BUS_CYCLES); }
-ref-cycles { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES); }
-cpu-clock { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_CLOCK); }
-task-clock { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_TASK_CLOCK); }
-page-faults|faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS); }
-minor-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MIN); }
-major-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MAJ); }
-context-switches|cs { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CONTEXT_SWITCHES); }
-cpu-migrations|migrations { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_MIGRATIONS); }
-alignment-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_ALIGNMENT_FAULTS); }
-emulation-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EMULATION_FAULTS); }
-dummy { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_DUMMY); }
+cpu-cycles|cycles { return hw(yyscanner, PERF_COUNT_HW_CPU_CYCLES); }
+stalled-cycles-frontend|idle-cycles-frontend { return hw(yyscanner, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND); }
+stalled-cycles-backend|idle-cycles-backend { return hw(yyscanner, PERF_COUNT_HW_STALLED_CYCLES_BACKEND); }
+instructions { return hw(yyscanner, PERF_COUNT_HW_INSTRUCTIONS); }
+cache-references { return hw(yyscanner, PERF_COUNT_HW_CACHE_REFERENCES); }
+cache-misses { return hw(yyscanner, PERF_COUNT_HW_CACHE_MISSES); }
+branch-instructions|branches { return hw(yyscanner, PERF_COUNT_HW_BRANCH_INSTRUCTIONS); }
+branch-misses { return hw(yyscanner, PERF_COUNT_HW_BRANCH_MISSES); }
+bus-cycles { return hw(yyscanner, PERF_COUNT_HW_BUS_CYCLES); }
+ref-cycles { return hw(yyscanner, PERF_COUNT_HW_REF_CPU_CYCLES); }
+cpu-clock { return sym(yyscanner, PERF_COUNT_SW_CPU_CLOCK); }
+task-clock { return sym(yyscanner, PERF_COUNT_SW_TASK_CLOCK); }
+page-faults|faults { return sym(yyscanner, PERF_COUNT_SW_PAGE_FAULTS); }
+minor-faults { return sym(yyscanner, PERF_COUNT_SW_PAGE_FAULTS_MIN); }
+major-faults { return sym(yyscanner, PERF_COUNT_SW_PAGE_FAULTS_MAJ); }
+context-switches|cs { return sym(yyscanner, PERF_COUNT_SW_CONTEXT_SWITCHES); }
+cpu-migrations|migrations { return sym(yyscanner, PERF_COUNT_SW_CPU_MIGRATIONS); }
+alignment-faults { return sym(yyscanner, PERF_COUNT_SW_ALIGNMENT_FAULTS); }
+emulation-faults { return sym(yyscanner, PERF_COUNT_SW_EMULATION_FAULTS); }
+dummy { return sym(yyscanner, PERF_COUNT_SW_DUMMY); }
duration_time { return tool(yyscanner, PERF_TOOL_DURATION_TIME); }
user_time { return tool(yyscanner, PERF_TOOL_USER_TIME); }
system_time { return tool(yyscanner, PERF_TOOL_SYSTEM_TIME); }
-bpf-output { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_BPF_OUTPUT); }
-cgroup-switches { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CGROUP_SWITCHES); }
+bpf-output { return sym(yyscanner, PERF_COUNT_SW_BPF_OUTPUT); }
+cgroup-switches { return sym(yyscanner, PERF_COUNT_SW_CGROUP_SWITCHES); }
{lc_type} { return str(yyscanner, PE_LEGACY_CACHE); }
{lc_type}-{lc_op_result} { return str(yyscanner, PE_LEGACY_CACHE); }
{lc_type}-{lc_op_result}-{lc_op_result} { return str(yyscanner, PE_LEGACY_CACHE); }
mem: { BEGIN(mem); return PE_PREFIX_MEM; }
r{num_raw_hex} { return str(yyscanner, PE_RAW); }
-{num_dec} { return value(yyscanner, 10); }
-{num_hex} { return value(yyscanner, 16); }
+{num_dec} { return value(_parse_state, yyscanner, 10); }
+{num_hex} { return value(_parse_state, yyscanner, 16); }
-{modifier_event} { return str(yyscanner, PE_MODIFIER_EVENT); }
+{modifier_event} { return modifiers(_parse_state, yyscanner); }
{name} { return str(yyscanner, PE_NAME); }
{name_tag} { return str(yyscanner, PE_NAME); }
"/" { BEGIN(config); return '/'; }
diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
index d70f5d84af92d1..68b3b06c7ff06b 100644
--- a/tools/perf/util/parse-events.y
+++ b/tools/perf/util/parse-events.y
@@ -55,7 +55,7 @@ static void free_list_evsel(struct list_head* list_evsel)
%}
%token PE_START_EVENTS PE_START_TERMS
-%token PE_VALUE PE_VALUE_SYM_HW PE_VALUE_SYM_SW PE_TERM
+%token PE_VALUE PE_VALUE_SYM_SW PE_TERM
%token PE_VALUE_SYM_TOOL
%token PE_EVENT_NAME
%token PE_RAW PE_NAME
@@ -66,15 +66,13 @@ static void free_list_evsel(struct list_head* list_evsel)
%token PE_DRV_CFG_TERM
%token PE_TERM_HW
%type <num> PE_VALUE
-%type <num> PE_VALUE_SYM_HW
%type <num> PE_VALUE_SYM_SW
%type <num> PE_VALUE_SYM_TOOL
+%type <mod> PE_MODIFIER_EVENT
%type <term_type> PE_TERM
-%type <num> value_sym
%type <str> PE_RAW
%type <str> PE_NAME
%type <str> PE_LEGACY_CACHE
-%type <str> PE_MODIFIER_EVENT
%type <str> PE_MODIFIER_BP
%type <str> PE_EVENT_NAME
%type <str> PE_DRV_CFG_TERM
@@ -87,6 +85,7 @@ static void free_list_evsel(struct list_head* list_evsel)
%type <list_terms> opt_pmu_config
%destructor { parse_events_terms__delete ($$); } <list_terms>
%type <list_evsel> event_pmu
+%type <list_evsel> event_legacy_hardware
%type <list_evsel> event_legacy_symbol
%type <list_evsel> event_legacy_cache
%type <list_evsel> event_legacy_mem
@@ -104,13 +103,14 @@ static void free_list_evsel(struct list_head* list_evsel)
%destructor { free_list_evsel ($$); } <list_evsel>
%type <tracepoint_name> tracepoint_name
%destructor { free ($$.sys); free ($$.event); } <tracepoint_name>
-%type <hardware_term> PE_TERM_HW
-%destructor { free ($$.str); } <hardware_term>
+%type <hardware_event> PE_TERM_HW
+%destructor { free ($$.str); } <hardware_event>
%union
{
char *str;
u64 num;
+ struct parse_events_modifier mod;
enum parse_events__term_type term_type;
struct list_head *list_evsel;
struct parse_events_terms *list_terms;
@@ -119,13 +119,17 @@ static void free_list_evsel(struct list_head* list_evsel)
char *sys;
char *event;
} tracepoint_name;
- struct hardware_term {
+ struct hardware_event {
char *str;
u64 num;
- } hardware_term;
+ } hardware_event;
}
%%
+ /*
+ * Entry points. We are either parsing events or terminals. Just terminal
+ * parsing is used for parsing events in sysfs.
+ */
start:
PE_START_EVENTS start_events
|
@@ -133,31 +137,36 @@ PE_START_TERMS start_terms
start_events: groups
{
+ /* Take the parsed events, groups.. and place into parse_state. */
+ struct list_head *groups = $1;
struct parse_events_state *parse_state = _parse_state;
- /* frees $1 */
- parse_events_update_lists($1, &parse_state->list);
+ list_splice_tail(groups, &parse_state->list);
+ free(groups);
}
-groups:
+groups: /* A list of groups or events. */
groups ',' group
{
- struct list_head *list = $1;
- struct list_head *group = $3;
+ /* Merge group into the list of events/groups. */
+ struct list_head *groups = $1;
+ struct list_head *group = $3;
- /* frees $3 */
- parse_events_update_lists(group, list);
- $$ = list;
+ list_splice_tail(group, groups);
+ free(group);
+ $$ = groups;
}
|
groups ',' event
{
- struct list_head *list = $1;
+ /* Merge event into the list of events/groups. */
+ struct list_head *groups = $1;
struct list_head *event = $3;
- /* frees $3 */
- parse_events_update_lists(event, list);
- $$ = list;
+
+ list_splice_tail(event, groups);
+ free(event);
+ $$ = groups;
}
|
group
@@ -167,20 +176,13 @@ event
group:
group_def ':' PE_MODIFIER_EVENT
{
+ /* Apply the modifier to the events in the group_def. */
struct list_head *list = $1;
int err;
- err = parse_events__modifier_group(list, $3);
- free($3);
- if (err) {
- struct parse_events_state *parse_state = _parse_state;
- struct parse_events_error *error = parse_state->error;
-
- parse_events_error__handle(error, @3.first_column,
- strdup("Bad modifier"), NULL);
- free_list_evsel(list);
+ err = parse_events__modifier_group(_parse_state, &@3, list, $3);
+ if (err)
YYABORT;
- }
$$ = list;
}
|
@@ -191,7 +193,10 @@ PE_NAME '{' events '}'
{
struct list_head *list = $3;
- /* Takes ownership of $1. */
+ /*
+ * Set the first entry of list to be the leader. Set the group name on
+ * the leader to $1 taking ownership.
+ */
parse_events__set_leader($1, list);
$$ = list;
}
@@ -200,6 +205,7 @@ PE_NAME '{' events '}'
{
struct list_head *list = $2;
+ /* Set the first entry of list to be the leader clearing the group name. */
parse_events__set_leader(NULL, list);
$$ = list;
}
@@ -207,12 +213,12 @@ PE_NAME '{' events '}'
events:
events ',' event
{
+ struct list_head *events = $1;
struct list_head *event = $3;
- struct list_head *list = $1;
- /* frees $3 */
- parse_events_update_lists(event, list);
- $$ = list;
+ list_splice_tail(event, events);
+ free(event);
+ $$ = events;
}
|
event
@@ -230,17 +236,9 @@ event_name PE_MODIFIER_EVENT
* (there could be more events added for multiple tracepoint
* definitions via '*?'.
*/
- err = parse_events__modifier_event(list, $2, false);
- free($2);
- if (err) {
- struct parse_events_state *parse_state = _parse_state;
- struct parse_events_error *error = parse_state->error;
-
- parse_events_error__handle(error, @2.first_column,
- strdup("Bad modifier"), NULL);
- free_list_evsel(list);
+ err = parse_events__modifier_event(_parse_state, &@2, list, $2);
+ if (err)
YYABORT;
- }
$$ = list;
}
|
@@ -249,10 +247,14 @@ event_name
event_name:
PE_EVENT_NAME event_def
{
- int err;
+ /*
+ * When an event is parsed the text is rewound and the entire text of
+ * the event is set to the str of PE_EVENT_NAME token matched here. If
+ * no name was on an event via a term, set the name to the entire text
+ * taking ownership of the allocation.
+ */
+ int err = parse_events__set_default_name($2, $1);
- err = parse_events_name($2, $1);
- free($1);
if (err) {
free_list_evsel($2);
YYNOMEM;
@@ -263,6 +265,7 @@ PE_EVENT_NAME event_def
event_def
event_def: event_pmu |
+ event_legacy_hardware |
event_legacy_symbol |
event_legacy_cache sep_dc |
event_legacy_mem sep_dc |
@@ -273,78 +276,15 @@ event_def: event_pmu |
event_pmu:
PE_NAME opt_pmu_config
{
- struct parse_events_state *parse_state = _parse_state;
/* List of created evsels. */
struct list_head *list = NULL;
- char *pattern = NULL;
+ int err = parse_events_multi_pmu_add_or_add_pmu(_parse_state, $1, $2, &list, &@1);
-#define CLEANUP \
- do { \
- parse_events_terms__delete($2); \
- free(list); \
- free($1); \
- free(pattern); \
- } while(0)
-
- list = alloc_list();
- if (!list) {
- CLEANUP;
- YYNOMEM;
- }
- /* Attempt to add to list assuming $1 is a PMU name. */
- if (parse_events_add_pmu(parse_state, list, $1, $2, /*auto_merge_stats=*/false, &@1)) {
- struct perf_pmu *pmu = NULL;
- int ok = 0;
-
- /* Failure to add, try wildcard expansion of $1 as a PMU name. */
- if (asprintf(&pattern, "%s*", $1) < 0) {
- CLEANUP;
- YYNOMEM;
- }
-
- while ((pmu = perf_pmus__scan(pmu)) != NULL) {
- const char *name = pmu->name;
-
- if (parse_events__filter_pmu(parse_state, pmu))
- continue;
-
- if (!strncmp(name, "uncore_", 7) &&
- strncmp($1, "uncore_", 7))
- name += 7;
- if (!perf_pmu__match(pattern, name, $1) ||
- !perf_pmu__match(pattern, pmu->alias_name, $1)) {
- bool auto_merge_stats = perf_pmu__auto_merge_stats(pmu);
-
- if (!parse_events_add_pmu(parse_state, list, pmu->name, $2,
- auto_merge_stats, &@1)) {
- ok++;
- parse_state->wild_card_pmus = true;
- }
- }
- }
-
- if (!ok) {
- /* Failure to add, assume $1 is an event name. */
- zfree(&list);
- ok = !parse_events_multi_pmu_add(parse_state, $1, $2, &list, &@1);
- }
- if (!ok) {
- struct parse_events_error *error = parse_state->error;
- char *help;
-
- if (asprintf(&help, "Unable to find PMU or event on a PMU of '%s'", $1) < 0)
- help = NULL;
- parse_events_error__handle(error, @1.first_column,
- strdup("Bad event or PMU"),
- help);
- CLEANUP;
- YYABORT;
- }
- }
+ parse_events_terms__delete($2);
+ free($1);
+ if (err)
+ PE_ABORT(err);
$$ = list;
- list = NULL;
- CLEANUP;
-#undef CLEANUP
}
|
PE_NAME sep_dc
@@ -352,7 +292,7 @@ PE_NAME sep_dc
struct list_head *list;
int err;
- err = parse_events_multi_pmu_add(_parse_state, $1, NULL, &list, &@1);
+ err = parse_events_multi_pmu_add(_parse_state, $1, PERF_COUNT_HW_MAX, NULL, &list, &@1);
if (err < 0) {
struct parse_events_state *parse_state = _parse_state;
struct parse_events_error *error = parse_state->error;
@@ -368,24 +308,45 @@ PE_NAME sep_dc
$$ = list;
}
-value_sym:
-PE_VALUE_SYM_HW
+event_legacy_hardware:
+PE_TERM_HW opt_pmu_config
+{
+ /* List of created evsels. */
+ struct list_head *list = NULL;
+ int err = parse_events_multi_pmu_add(_parse_state, $1.str, $1.num, $2, &list, &@1);
+
+ free($1.str);
+ parse_events_terms__delete($2);
+ if (err)
+ PE_ABORT(err);
+
+ $$ = list;
+}
|
-PE_VALUE_SYM_SW
+PE_TERM_HW sep_dc
+{
+ struct list_head *list;
+ int err;
+
+ err = parse_events_multi_pmu_add(_parse_state, $1.str, $1.num, NULL, &list, &@1);
+ free($1.str);
+ if (err)
+ PE_ABORT(err);
+ $$ = list;
+}
event_legacy_symbol:
-value_sym '/' event_config '/'
+PE_VALUE_SYM_SW '/' event_config '/'
{
struct list_head *list;
- int type = $1 >> 16;
- int config = $1 & 255;
int err;
- bool wildcard = (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE);
list = alloc_list();
if (!list)
YYNOMEM;
- err = parse_events_add_numeric(_parse_state, list, type, config, $3, wildcard);
+ err = parse_events_add_numeric(_parse_state, list,
+ /*type=*/PERF_TYPE_SOFTWARE, /*config=*/$1,
+ $3, /*wildcard=*/false);
parse_events_terms__delete($3);
if (err) {
free_list_evsel(list);
@@ -394,18 +355,17 @@ value_sym '/' event_config '/'
$$ = list;
}
|
-value_sym sep_slash_slash_dc
+PE_VALUE_SYM_SW sep_slash_slash_dc
{
struct list_head *list;
- int type = $1 >> 16;
- int config = $1 & 255;
- bool wildcard = (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE);
int err;
list = alloc_list();
if (!list)
YYNOMEM;
- err = parse_events_add_numeric(_parse_state, list, type, config, /*head_config=*/NULL, wildcard);
+ err = parse_events_add_numeric(_parse_state, list,
+ /*type=*/PERF_TYPE_SOFTWARE, /*config=*/$1,
+ /*head_config=*/NULL, /*wildcard=*/false);
if (err)
PE_ABORT(err);
$$ = list;
@@ -666,6 +626,11 @@ event_term
}
name_or_raw: PE_RAW | PE_NAME | PE_LEGACY_CACHE
+|
+PE_TERM_HW
+{
+ $$ = $1.str;
+}
event_term:
PE_RAW
@@ -707,20 +672,6 @@ name_or_raw '=' PE_VALUE
$$ = term;
}
|
-name_or_raw '=' PE_TERM_HW
-{
- struct parse_events_term *term;
- int err = parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_USER,
- $1, $3.str, &@1, &@3);
-
- if (err) {
- free($1);
- free($3.str);
- PE_ABORT(err);
- }
- $$ = term;
-}
-|
PE_LEGACY_CACHE
{
struct parse_events_term *term;
@@ -773,18 +724,6 @@ PE_TERM '=' name_or_raw
$$ = term;
}
|
-PE_TERM '=' PE_TERM_HW
-{
- struct parse_events_term *term;
- int err = parse_events_term__str(&term, $1, /*config=*/NULL, $3.str, &@1, &@3);
-
- if (err) {
- free($3.str);
- PE_ABORT(err);
- }
- $$ = term;
-}
-|
PE_TERM '=' PE_TERM
{
struct parse_events_term *term;
@@ -845,9 +784,15 @@ sep_slash_slash_dc: '/' '/' | ':' |
%%
-void parse_events_error(YYLTYPE *loc, void *parse_state,
+void parse_events_error(YYLTYPE *loc, void *_parse_state,
void *scanner __maybe_unused,
char const *msg __maybe_unused)
{
- parse_events_evlist_error(parse_state, loc->last_column, "parser error");
+ struct parse_events_state *parse_state = _parse_state;
+
+ if (!parse_state->error || !list_empty(&parse_state->error->list))
+ return;
+
+ parse_events_error__handle(parse_state->error, loc->last_column,
+ strdup("Unrecognized input"), NULL);
}
diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c
index 8f04d3b7f3ec78..59fbbba7969740 100644
--- a/tools/perf/util/perf_event_attr_fprintf.c
+++ b/tools/perf/util/perf_event_attr_fprintf.c
@@ -7,6 +7,8 @@
#include <linux/types.h>
#include <linux/perf_event.h>
#include "util/evsel_fprintf.h"
+#include "util/pmu.h"
+#include "util/pmus.h"
#include "trace-event.h"
struct bit_names {
@@ -75,9 +77,12 @@ static void __p_read_format(char *buf, size_t size, u64 value)
}
#define ENUM_ID_TO_STR_CASE(x) case x: return (#x);
-static const char *stringify_perf_type_id(u64 value)
+static const char *stringify_perf_type_id(struct perf_pmu *pmu, u32 type)
{
- switch (value) {
+ if (pmu)
+ return pmu->name;
+
+ switch (type) {
ENUM_ID_TO_STR_CASE(PERF_TYPE_HARDWARE)
ENUM_ID_TO_STR_CASE(PERF_TYPE_SOFTWARE)
ENUM_ID_TO_STR_CASE(PERF_TYPE_TRACEPOINT)
@@ -175,9 +180,9 @@ do { \
#define print_id_unsigned(_s) PRINT_ID(_s, "%"PRIu64)
#define print_id_hex(_s) PRINT_ID(_s, "%#"PRIx64)
-static void __p_type_id(char *buf, size_t size, u64 value)
+static void __p_type_id(struct perf_pmu *pmu, char *buf, size_t size, u64 value)
{
- print_id_unsigned(stringify_perf_type_id(value));
+ print_id_unsigned(stringify_perf_type_id(pmu, value));
}
static void __p_config_hw_id(char *buf, size_t size, u64 value)
@@ -217,8 +222,14 @@ static void __p_config_tracepoint_id(char *buf, size_t size, u64 value)
}
#endif
-static void __p_config_id(char *buf, size_t size, u32 type, u64 value)
+static void __p_config_id(struct perf_pmu *pmu, char *buf, size_t size, u32 type, u64 value)
{
+ const char *name = perf_pmu__name_from_config(pmu, value);
+
+ if (name) {
+ print_id_hex(name);
+ return;
+ }
switch (type) {
case PERF_TYPE_HARDWARE:
return __p_config_hw_id(buf, size, value);
@@ -246,8 +257,8 @@ static void __p_config_id(char *buf, size_t size, u32 type, u64 value)
#define p_sample_type(val) __p_sample_type(buf, BUF_SIZE, val)
#define p_branch_sample_type(val) __p_branch_sample_type(buf, BUF_SIZE, val)
#define p_read_format(val) __p_read_format(buf, BUF_SIZE, val)
-#define p_type_id(val) __p_type_id(buf, BUF_SIZE, val)
-#define p_config_id(val) __p_config_id(buf, BUF_SIZE, attr->type, val)
+#define p_type_id(val) __p_type_id(pmu, buf, BUF_SIZE, val)
+#define p_config_id(val) __p_config_id(pmu, buf, BUF_SIZE, attr->type, val)
#define PRINT_ATTRn(_n, _f, _p, _a) \
do { \
@@ -262,6 +273,7 @@ do { \
int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
attr__fprintf_f attr__fprintf, void *priv)
{
+ struct perf_pmu *pmu = perf_pmus__find_by_type(attr->type);
char buf[BUF_SIZE];
int ret = 0;
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index f39cbbc1a7ec1d..74dd5bd49d9a86 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -518,7 +518,8 @@ static int perf_pmu__new_alias(struct perf_pmu *pmu, const char *name,
unit = pe->unit;
perpkg = pe->perpkg;
deprecated = pe->deprecated;
- pmu_name = pe->pmu;
+ if (pe->pmu && strcmp(pe->pmu, "default_core"))
+ pmu_name = pe->pmu;
}
alias = zalloc(sizeof(*alias));
@@ -1602,6 +1603,62 @@ bool perf_pmu__has_format(const struct perf_pmu *pmu, const char *name)
return false;
}
+int perf_pmu__for_each_format(struct perf_pmu *pmu, void *state, pmu_format_callback cb)
+{
+ static const char *const terms[] = {
+ "config=0..0xffffffffffffffff",
+ "config1=0..0xffffffffffffffff",
+ "config2=0..0xffffffffffffffff",
+ "config3=0..0xffffffffffffffff",
+ "name=string",
+ "period=number",
+ "freq=number",
+ "branch_type=(u|k|hv|any|...)",
+ "time",
+ "call-graph=(fp|dwarf|lbr)",
+ "stack-size=number",
+ "max-stack=number",
+ "nr=number",
+ "inherit",
+ "no-inherit",
+ "overwrite",
+ "no-overwrite",
+ "percore",
+ "aux-output",
+ "aux-sample-size=number",
+ };
+ struct perf_pmu_format *format;
+ int ret;
+
+ /*
+ * max-events and driver-config are missing above as are the internal
+ * types user, metric-id, raw, legacy cache and hardware. Assert against
+ * the enum parse_events__term_type so they are kept in sync.
+ */
+ _Static_assert(ARRAY_SIZE(terms) == __PARSE_EVENTS__TERM_TYPE_NR - 6,
+ "perf_pmu__for_each_format()'s terms must be kept in sync with enum parse_events__term_type");
+ list_for_each_entry(format, &pmu->format, list) {
+ perf_pmu_format__load(pmu, format);
+ ret = cb(state, format->name, (int)format->value, format->bits);
+ if (ret)
+ return ret;
+ }
+ if (!pmu->is_core)
+ return 0;
+
+ for (size_t i = 0; i < ARRAY_SIZE(terms); i++) {
+ int config = PERF_PMU_FORMAT_VALUE_CONFIG;
+
+ if (i < PERF_PMU_FORMAT_VALUE_CONFIG_END)
+ config = i;
+
+ ret = cb(state, terms[i], config, /*bits=*/NULL);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
bool is_pmu_core(const char *name)
{
return !strcmp(name, "cpu") || !strcmp(name, "cpum_cf") || is_sysfs_pmu_core(name);
@@ -1696,8 +1753,12 @@ int perf_pmu__for_each_event(struct perf_pmu *pmu, bool skip_duplicate_pmus,
pmu_add_cpu_aliases(pmu);
list_for_each_entry(event, &pmu->aliases, list) {
size_t buf_used;
+ int pmu_name_len;
info.pmu_name = event->pmu_name ?: pmu->name;
+ pmu_name_len = skip_duplicate_pmus
+ ? pmu_name_len_no_suffix(info.pmu_name, /*num=*/NULL)
+ : (int)strlen(info.pmu_name);
info.alias = NULL;
if (event->desc) {
info.name = event->name;
@@ -1722,7 +1783,7 @@ int perf_pmu__for_each_event(struct perf_pmu *pmu, bool skip_duplicate_pmus,
info.encoding_desc = buf + buf_used;
parse_events_terms__to_strbuf(&event->terms, &sb);
buf_used += snprintf(buf + buf_used, sizeof(buf) - buf_used,
- "%s/%s/", info.pmu_name, sb.buf) + 1;
+ "%.*s/%s/", pmu_name_len, info.pmu_name, sb.buf) + 1;
info.topic = event->topic;
info.str = sb.buf;
info.deprecated = event->deprecated;
@@ -2003,18 +2064,29 @@ void perf_pmu__warn_invalid_config(struct perf_pmu *pmu, __u64 config,
name ?: "N/A", buf, config_name, config);
}
-int perf_pmu__match(const char *pattern, const char *name, const char *tok)
+bool perf_pmu__match(const struct perf_pmu *pmu, const char *tok)
{
- if (!name)
- return -1;
+ const char *name = pmu->name;
+ bool need_fnmatch = strchr(tok, '*') != NULL;
- if (fnmatch(pattern, name, 0))
- return -1;
+ if (!strncmp(tok, "uncore_", 7))
+ tok += 7;
+ if (!strncmp(name, "uncore_", 7))
+ name += 7;
- if (tok && !perf_pmu__match_ignoring_suffix(name, tok))
- return -1;
+ if (perf_pmu__match_ignoring_suffix(name, tok) ||
+ (need_fnmatch && !fnmatch(tok, name, 0)))
+ return true;
- return 0;
+ name = pmu->alias_name;
+ if (!name)
+ return false;
+
+ if (!strncmp(name, "uncore_", 7))
+ name += 7;
+
+ return perf_pmu__match_ignoring_suffix(name, tok) ||
+ (need_fnmatch && !fnmatch(tok, name, 0));
}
double __weak perf_pmu__cpu_slots_per_cycle(void)
@@ -2085,3 +2157,21 @@ void perf_pmu__delete(struct perf_pmu *pmu)
zfree(&pmu->id);
free(pmu);
}
+
+const char *perf_pmu__name_from_config(struct perf_pmu *pmu, u64 config)
+{
+ struct perf_pmu_alias *event;
+
+ if (!pmu)
+ return NULL;
+
+ pmu_add_cpu_aliases(pmu);
+ list_for_each_entry(event, &pmu->aliases, list) {
+ struct perf_event_attr attr = {.config = 0,};
+ int ret = perf_pmu__config(pmu, &attr, &event->terms, NULL);
+
+ if (ret == 0 && config == attr.config)
+ return event->name;
+ }
+ return NULL;
+}
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index e35d985206db51..93d03bd3ecbeb2 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -196,6 +196,8 @@ struct pmu_event_info {
};
typedef int (*pmu_event_callback)(void *state, struct pmu_event_info *info);
+typedef int (*pmu_format_callback)(void *state, const char *name, int config,
+ const unsigned long *bits);
void pmu_add_sys_aliases(struct perf_pmu *pmu);
int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr,
@@ -215,6 +217,7 @@ int perf_pmu__find_event(struct perf_pmu *pmu, const char *event, void *state, p
int perf_pmu__format_parse(struct perf_pmu *pmu, int dirfd, bool eager_load);
void perf_pmu_format__set_value(void *format, int config, unsigned long *bits);
bool perf_pmu__has_format(const struct perf_pmu *pmu, const char *name);
+int perf_pmu__for_each_format(struct perf_pmu *pmu, void *state, pmu_format_callback cb);
bool is_pmu_core(const char *name);
bool perf_pmu__supports_legacy_cache(const struct perf_pmu *pmu);
@@ -260,7 +263,7 @@ void perf_pmu__warn_invalid_config(struct perf_pmu *pmu, __u64 config,
const char *config_name);
void perf_pmu__warn_invalid_formats(struct perf_pmu *pmu);
-int perf_pmu__match(const char *pattern, const char *name, const char *tok);
+bool perf_pmu__match(const struct perf_pmu *pmu, const char *tok);
double perf_pmu__cpu_slots_per_cycle(void);
int perf_pmu__event_source_devices_scnprintf(char *pathname, size_t size);
@@ -273,5 +276,6 @@ struct perf_pmu *perf_pmu__lookup(struct list_head *pmus, int dirfd, const char
struct perf_pmu *perf_pmu__create_placeholder_core_pmu(struct list_head *core_pmus);
void perf_pmu__delete(struct perf_pmu *pmu);
struct perf_pmu *perf_pmus__find_core_pmu(void);
+const char *perf_pmu__name_from_config(struct perf_pmu *pmu, u64 config);
#endif /* __PMU_H */
diff --git a/tools/perf/util/pmus.c b/tools/perf/util/pmus.c
index 16505071d362f4..2fd369e45832b6 100644
--- a/tools/perf/util/pmus.c
+++ b/tools/perf/util/pmus.c
@@ -16,6 +16,7 @@
#include "pmus.h"
#include "pmu.h"
#include "print-events.h"
+#include "strbuf.h"
/*
* core_pmus: A PMU belongs to core_pmus if it's name is "cpu" or it's sysfs
@@ -503,6 +504,99 @@ void perf_pmus__print_pmu_events(const struct print_callbacks *print_cb, void *p
zfree(&aliases);
}
+struct build_format_string_args {
+ struct strbuf short_string;
+ struct strbuf long_string;
+ int num_formats;
+};
+
+static int build_format_string(void *state, const char *name, int config,
+ const unsigned long *bits)
+{
+ struct build_format_string_args *args = state;
+ unsigned int num_bits;
+ int ret1, ret2 = 0;
+
+ (void)config;
+ args->num_formats++;
+ if (args->num_formats > 1) {
+ strbuf_addch(&args->long_string, ',');
+ if (args->num_formats < 4)
+ strbuf_addch(&args->short_string, ',');
+ }
+ num_bits = bits ? bitmap_weight(bits, PERF_PMU_FORMAT_BITS) : 0;
+ if (num_bits <= 1) {
+ ret1 = strbuf_addf(&args->long_string, "%s", name);
+ if (args->num_formats < 4)
+ ret2 = strbuf_addf(&args->short_string, "%s", name);
+ } else if (num_bits > 8) {
+ ret1 = strbuf_addf(&args->long_string, "%s=0..0x%llx", name,
+ ULLONG_MAX >> (64 - num_bits));
+ if (args->num_formats < 4) {
+ ret2 = strbuf_addf(&args->short_string, "%s=0..0x%llx", name,
+ ULLONG_MAX >> (64 - num_bits));
+ }
+ } else {
+ ret1 = strbuf_addf(&args->long_string, "%s=0..%llu", name,
+ ULLONG_MAX >> (64 - num_bits));
+ if (args->num_formats < 4) {
+ ret2 = strbuf_addf(&args->short_string, "%s=0..%llu", name,
+ ULLONG_MAX >> (64 - num_bits));
+ }
+ }
+ return ret1 < 0 ? ret1 : (ret2 < 0 ? ret2 : 0);
+}
+
+void perf_pmus__print_raw_pmu_events(const struct print_callbacks *print_cb, void *print_state)
+{
+ bool skip_duplicate_pmus = print_cb->skip_duplicate_pmus(print_state);
+ struct perf_pmu *(*scan_fn)(struct perf_pmu *);
+ struct perf_pmu *pmu = NULL;
+
+ if (skip_duplicate_pmus)
+ scan_fn = perf_pmus__scan_skip_duplicates;
+ else
+ scan_fn = perf_pmus__scan;
+
+ while ((pmu = scan_fn(pmu)) != NULL) {
+ struct build_format_string_args format_args = {
+ .short_string = STRBUF_INIT,
+ .long_string = STRBUF_INIT,
+ .num_formats = 0,
+ };
+ int len = pmu_name_len_no_suffix(pmu->name, /*num=*/NULL);
+ const char *desc = "(see 'man perf-list' or 'man perf-record' on how to encode it)";
+
+ if (!pmu->is_core)
+ desc = NULL;
+
+ strbuf_addf(&format_args.short_string, "%.*s/", len, pmu->name);
+ strbuf_addf(&format_args.long_string, "%.*s/", len, pmu->name);
+ perf_pmu__for_each_format(pmu, &format_args, build_format_string);
+
+ if (format_args.num_formats > 3)
+ strbuf_addf(&format_args.short_string, ",.../modifier");
+ else
+ strbuf_addf(&format_args.short_string, "/modifier");
+
+ strbuf_addf(&format_args.long_string, "/modifier");
+ print_cb->print_event(print_state,
+ /*topic=*/NULL,
+ /*pmu_name=*/NULL,
+ format_args.short_string.buf,
+ /*event_alias=*/NULL,
+ /*scale_unit=*/NULL,
+ /*deprecated=*/false,
+ "Raw event descriptor",
+ desc,
+ /*long_desc=*/NULL,
+ format_args.long_string.buf);
+
+ strbuf_release(&format_args.short_string);
+ strbuf_release(&format_args.long_string);
+ }
+}
+
bool perf_pmus__have_event(const char *pname, const char *name)
{
struct perf_pmu *pmu = perf_pmus__find(pname);
diff --git a/tools/perf/util/pmus.h b/tools/perf/util/pmus.h
index 94d2a08d894b7d..eec599d8aebda6 100644
--- a/tools/perf/util/pmus.h
+++ b/tools/perf/util/pmus.h
@@ -18,6 +18,7 @@ struct perf_pmu *perf_pmus__scan_core(struct perf_pmu *pmu);
const struct perf_pmu *perf_pmus__pmu_for_pmu_filter(const char *str);
void perf_pmus__print_pmu_events(const struct print_callbacks *print_cb, void *print_state);
+void perf_pmus__print_raw_pmu_events(const struct print_callbacks *print_cb, void *print_state);
bool perf_pmus__have_event(const char *pname, const char *name);
int perf_pmus__num_core_pmus(void);
bool perf_pmus__supports_extended_type(void);
diff --git a/tools/perf/util/print-events.c b/tools/perf/util/print-events.c
index 7b54e93854428a..3f38c27f015761 100644
--- a/tools/perf/util/print-events.c
+++ b/tools/perf/util/print-events.c
@@ -9,6 +9,7 @@
#include <unistd.h>
#include <api/fs/tracing_path.h>
+#include <api/io.h>
#include <linux/stddef.h>
#include <linux/perf_event.h>
#include <linux/zalloc.h>
@@ -38,7 +39,7 @@ static const char * const event_type_descriptors[] = {
"Software event",
"Tracepoint event",
"Hardware cache event",
- "Raw hardware event descriptor",
+ "Raw event descriptor",
"Hardware breakpoint",
};
@@ -92,34 +93,48 @@ void print_tracepoint_events(const struct print_callbacks *print_cb __maybe_unus
evt_items = scandirat(events_fd, sys_dirent->d_name, &evt_namelist, NULL, alphasort);
for (int j = 0; j < evt_items; j++) {
+ /*
+ * Buffer sized at twice the max filename length + 1
+ * separator + 1 \0 terminator.
+ */
+ char buf[NAME_MAX * 2 + 2];
+ /* 16 possible hex digits and 22 other characters and \0. */
+ char encoding[16 + 22];
struct dirent *evt_dirent = evt_namelist[j];
- char evt_path[MAXPATHLEN];
- int evt_fd;
+ struct io id;
+ __u64 config;
if (evt_dirent->d_type != DT_DIR ||
!strcmp(evt_dirent->d_name, ".") ||
!strcmp(evt_dirent->d_name, ".."))
goto next_evt;
- snprintf(evt_path, sizeof(evt_path), "%s/id", evt_dirent->d_name);
- evt_fd = openat(dir_fd, evt_path, O_RDONLY);
- if (evt_fd < 0)
+ snprintf(buf, sizeof(buf), "%s/id", evt_dirent->d_name);
+ io__init(&id, openat(dir_fd, buf, O_RDONLY), buf, sizeof(buf));
+
+ if (id.fd < 0)
+ goto next_evt;
+
+ if (io__get_dec(&id, &config) < 0) {
+ close(id.fd);
goto next_evt;
- close(evt_fd);
+ }
+ close(id.fd);
- snprintf(evt_path, MAXPATHLEN, "%s:%s",
+ snprintf(buf, sizeof(buf), "%s:%s",
sys_dirent->d_name, evt_dirent->d_name);
+ snprintf(encoding, sizeof(encoding), "tracepoint/config=0x%llx/", config);
print_cb->print_event(print_state,
/*topic=*/NULL,
- /*pmu_name=*/NULL,
- evt_path,
+ /*pmu_name=*/NULL, /* really "tracepoint" */
+ /*event_name=*/buf,
/*event_alias=*/NULL,
/*scale_unit=*/NULL,
/*deprecated=*/false,
"Tracepoint event",
/*desc=*/NULL,
/*long_desc=*/NULL,
- /*encoding_desc=*/NULL);
+ encoding);
next_evt:
free(evt_namelist[j]);
}
@@ -401,8 +416,6 @@ void print_symbol_events(const struct print_callbacks *print_cb, void *print_sta
*/
void print_events(const struct print_callbacks *print_cb, void *print_state)
{
- char *tmp;
-
print_symbol_events(print_cb, print_state, PERF_TYPE_HARDWARE,
event_symbols_hw, PERF_COUNT_HW_MAX);
print_symbol_events(print_cb, print_state, PERF_TYPE_SOFTWARE,
@@ -426,21 +439,7 @@ void print_events(const struct print_callbacks *print_cb, void *print_state)
/*long_desc=*/NULL,
/*encoding_desc=*/NULL);
- if (asprintf(&tmp, "%s/t1=v1[,t2=v2,t3 ...]/modifier",
- perf_pmus__scan_core(/*pmu=*/NULL)->name) > 0) {
- print_cb->print_event(print_state,
- /*topic=*/NULL,
- /*pmu_name=*/NULL,
- tmp,
- /*event_alias=*/NULL,
- /*scale_unit=*/NULL,
- /*deprecated=*/false,
- event_type_descriptors[PERF_TYPE_RAW],
- "(see 'man perf-list' on how to encode it)",
- /*long_desc=*/NULL,
- /*encoding_desc=*/NULL);
- free(tmp);
- }
+ perf_pmus__print_raw_pmu_events(print_cb, print_state);
print_cb->print_event(print_state,
/*topic=*/NULL,
diff --git a/tools/perf/util/print_insn.c b/tools/perf/util/print_insn.c
index 459e0e93d7b1b2..aab11d8e1b1d2a 100644
--- a/tools/perf/util/print_insn.c
+++ b/tools/perf/util/print_insn.c
@@ -4,6 +4,7 @@
*
* Author(s): Changbin Du <changbin.du@huawei.com>
*/
+#include <inttypes.h>
#include <string.h>
#include <stdbool.h>
#include "debug.h"
@@ -12,6 +13,9 @@
#include "machine.h"
#include "thread.h"
#include "print_insn.h"
+#include "dump-insn.h"
+#include "map.h"
+#include "dso.h"
size_t sample__fprintf_insn_raw(struct perf_sample *sample, FILE *fp)
{
@@ -28,12 +32,12 @@ size_t sample__fprintf_insn_raw(struct perf_sample *sample, FILE *fp)
#ifdef HAVE_LIBCAPSTONE_SUPPORT
#include <capstone/capstone.h>
-static int capstone_init(struct machine *machine, csh *cs_handle)
+static int capstone_init(struct machine *machine, csh *cs_handle, bool is64)
{
cs_arch arch;
cs_mode mode;
- if (machine__is(machine, "x86_64")) {
+ if (machine__is(machine, "x86_64") && is64) {
arch = CS_ARCH_X86;
mode = CS_MODE_64;
} else if (machine__normalized_is(machine, "x86")) {
@@ -69,8 +73,8 @@ static int capstone_init(struct machine *machine, csh *cs_handle)
return 0;
}
-static size_t print_insn_x86(struct perf_sample *sample, struct thread *thread,
- cs_insn *insn, FILE *fp)
+static size_t print_insn_x86(struct thread *thread, u8 cpumode, cs_insn *insn,
+ int print_opts, FILE *fp)
{
struct addr_location al;
size_t printed = 0;
@@ -80,9 +84,11 @@ static size_t print_insn_x86(struct perf_sample *sample, struct thread *thread,
addr_location__init(&al);
if (op->type == X86_OP_IMM &&
- thread__find_symbol(thread, sample->cpumode, op->imm, &al)) {
+ thread__find_symbol(thread, cpumode, op->imm, &al)) {
printed += fprintf(fp, "%s ", insn[0].mnemonic);
printed += symbol__fprintf_symname_offs(al.sym, &al, fp);
+ if (print_opts & PRINT_INSN_IMM_HEX)
+ printed += fprintf(fp, " [%#" PRIx64 "]", op->imm);
addr_location__exit(&al);
return printed;
}
@@ -93,42 +99,71 @@ static size_t print_insn_x86(struct perf_sample *sample, struct thread *thread,
return printed;
}
-size_t sample__fprintf_insn_asm(struct perf_sample *sample, struct thread *thread,
- struct machine *machine, FILE *fp)
+static bool is64bitip(struct machine *machine, struct addr_location *al)
{
- csh cs_handle;
+ const struct dso *dso = al->map ? map__dso(al->map) : NULL;
+
+ if (dso)
+ return dso->is_64_bit;
+
+ return machine__is(machine, "x86_64") ||
+ machine__normalized_is(machine, "arm64") ||
+ machine__normalized_is(machine, "s390");
+}
+
+ssize_t fprintf_insn_asm(struct machine *machine, struct thread *thread, u8 cpumode,
+ bool is64bit, const uint8_t *code, size_t code_size,
+ uint64_t ip, int *lenp, int print_opts, FILE *fp)
+{
+ size_t printed;
cs_insn *insn;
+ csh cs_handle;
size_t count;
- size_t printed = 0;
int ret;
/* TODO: Try to initiate capstone only once but need a proper place. */
- ret = capstone_init(machine, &cs_handle);
- if (ret < 0) {
- /* fallback */
- return sample__fprintf_insn_raw(sample, fp);
- }
+ ret = capstone_init(machine, &cs_handle, is64bit);
+ if (ret < 0)
+ return ret;
- count = cs_disasm(cs_handle, (uint8_t *)sample->insn, sample->insn_len,
- sample->ip, 1, &insn);
+ count = cs_disasm(cs_handle, code, code_size, ip, 1, &insn);
if (count > 0) {
if (machine__normalized_is(machine, "x86"))
- printed += print_insn_x86(sample, thread, &insn[0], fp);
+ printed = print_insn_x86(thread, cpumode, &insn[0], print_opts, fp);
else
- printed += fprintf(fp, "%s %s", insn[0].mnemonic, insn[0].op_str);
+ printed = fprintf(fp, "%s %s", insn[0].mnemonic, insn[0].op_str);
+ if (lenp)
+ *lenp = insn->size;
cs_free(insn, count);
} else {
- printed += fprintf(fp, "illegal instruction");
+ printed = -1;
}
cs_close(&cs_handle);
return printed;
}
+
+size_t sample__fprintf_insn_asm(struct perf_sample *sample, struct thread *thread,
+ struct machine *machine, FILE *fp,
+ struct addr_location *al)
+{
+ bool is64bit = is64bitip(machine, al);
+ ssize_t printed;
+
+ printed = fprintf_insn_asm(machine, thread, sample->cpumode, is64bit,
+ (uint8_t *)sample->insn, sample->insn_len,
+ sample->ip, NULL, 0, fp);
+ if (printed < 0)
+ return sample__fprintf_insn_raw(sample, fp);
+
+ return printed;
+}
#else
size_t sample__fprintf_insn_asm(struct perf_sample *sample __maybe_unused,
struct thread *thread __maybe_unused,
struct machine *machine __maybe_unused,
- FILE *fp __maybe_unused)
+ FILE *fp __maybe_unused,
+ struct addr_location *al __maybe_unused)
{
return 0;
}
diff --git a/tools/perf/util/print_insn.h b/tools/perf/util/print_insn.h
index 465bdcfcc2fd6e..07d11af3fc1cbe 100644
--- a/tools/perf/util/print_insn.h
+++ b/tools/perf/util/print_insn.h
@@ -8,9 +8,15 @@
struct perf_sample;
struct thread;
struct machine;
+struct perf_insn;
+
+#define PRINT_INSN_IMM_HEX (1<<0)
size_t sample__fprintf_insn_asm(struct perf_sample *sample, struct thread *thread,
- struct machine *machine, FILE *fp);
+ struct machine *machine, FILE *fp, struct addr_location *al);
size_t sample__fprintf_insn_raw(struct perf_sample *sample, FILE *fp);
+ssize_t fprintf_insn_asm(struct machine *machine, struct thread *thread, u8 cpumode,
+ bool is64bit, const uint8_t *code, size_t code_size,
+ uint64_t ip, int *lenp, int print_opts, FILE *fp);
#endif /* PERF_PRINT_INSN_H */
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 2a0ad9ecf0a20e..8a73c9464b709b 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -11,6 +11,7 @@
#include <sys/stat.h>
#include <fcntl.h>
#include <errno.h>
+#include <libgen.h>
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
@@ -235,7 +236,7 @@ static int convert_exec_to_group(const char *exec, char **result)
}
}
- ret = e_snprintf(buf, 64, "%s_%s", PERFPROBE_GROUP, ptr1);
+ ret = e_snprintf(buf, sizeof(buf), "%s_%s", PERFPROBE_GROUP, ptr1);
if (ret < 0)
goto out;
@@ -2757,7 +2758,7 @@ static int get_new_event_name(char *buf, size_t len, const char *base,
/* Try no suffix number */
ret = e_snprintf(buf, len, "%s%s", nbase, ret_event ? "__return" : "");
if (ret < 0) {
- pr_debug("snprintf() failed: %d\n", ret);
+ pr_warning("snprintf() failed: %d; the event name nbase='%s' is too long\n", ret, nbase);
goto out;
}
if (!strlist__has_entry(namelist, buf))
@@ -2866,7 +2867,7 @@ static int probe_trace_event__set_name(struct probe_trace_event *tev,
group = PERFPROBE_GROUP;
/* Get an unused new event name */
- ret = get_new_event_name(buf, 64, event, namelist,
+ ret = get_new_event_name(buf, sizeof(buf), event, namelist,
tev->point.retprobe, allow_suffix);
if (ret < 0)
return ret;
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index 075c0f79b1b92d..0aeb97c11c0306 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -103,6 +103,16 @@ int perf_pmu__scan_file(const struct perf_pmu *pmu, const char *name, const char
return EOF;
}
+const char *perf_pmu__name_from_config(struct perf_pmu *pmu __maybe_unused, u64 config __maybe_unused)
+{
+ return NULL;
+}
+
+struct perf_pmu *perf_pmus__find_by_type(unsigned int type __maybe_unused)
+{
+ return NULL;
+}
+
int perf_pmus__num_core_pmus(void)
{
return 1;
diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c
index 87e817b3cf7e9d..e867de8ddaaa41 100644
--- a/tools/perf/util/record.c
+++ b/tools/perf/util/record.c
@@ -237,7 +237,7 @@ bool evlist__can_select_event(struct evlist *evlist, const char *str)
evsel = evlist__last(temp_evlist);
- if (!evlist || perf_cpu_map__has_any_cpu_or_is_empty(evlist->core.user_requested_cpus)) {
+ if (!evlist || perf_cpu_map__is_any_cpu_or_is_empty(evlist->core.user_requested_cpus)) {
struct perf_cpu_map *cpus = perf_cpu_map__new_online_cpus();
if (cpus)
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index b4f0f60e60a63f..8aa301948de579 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -1699,13 +1699,15 @@ static void python_process_stat(struct perf_stat_config *config,
{
struct perf_thread_map *threads = counter->core.threads;
struct perf_cpu_map *cpus = counter->core.cpus;
- int cpu, thread;
- for (thread = 0; thread < perf_thread_map__nr(threads); thread++) {
- for (cpu = 0; cpu < perf_cpu_map__nr(cpus); cpu++) {
- process_stat(counter, perf_cpu_map__cpu(cpus, cpu),
+ for (int thread = 0; thread < perf_thread_map__nr(threads); thread++) {
+ int idx;
+ struct perf_cpu cpu;
+
+ perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
+ process_stat(counter, cpu,
perf_thread_map__pid(threads, thread), tstamp,
- perf_counts(counter->counts, cpu, thread));
+ perf_counts(counter->counts, idx, thread));
}
}
}
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 06d0bd7fb45999..a10343b9dcd419 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -2749,6 +2749,7 @@ int perf_session__cpu_bitmap(struct perf_session *session,
int i, err = -1;
struct perf_cpu_map *map;
int nr_cpus = min(session->header.env.nr_cpus_avail, MAX_NR_CPUS);
+ struct perf_cpu cpu;
for (i = 0; i < PERF_TYPE_MAX; ++i) {
struct evsel *evsel;
@@ -2770,9 +2771,7 @@ int perf_session__cpu_bitmap(struct perf_session *session,
return -1;
}
- for (i = 0; i < perf_cpu_map__nr(map); i++) {
- struct perf_cpu cpu = perf_cpu_map__cpu(map, i);
-
+ perf_cpu_map__for_each_cpu(cpu, i, map) {
if (cpu.cpu >= nr_cpus) {
pr_err("Requested CPU %d too large. "
"Consider raising MAX_NR_CPUS\n", cpu.cpu);
@@ -2917,3 +2916,24 @@ int perf_event__process_id_index(struct perf_session *session,
}
return 0;
}
+
+int perf_session__dsos_hit_all(struct perf_session *session)
+{
+ struct rb_node *nd;
+ int err;
+
+ err = machine__hit_all_dsos(&session->machines.host);
+ if (err)
+ return err;
+
+ for (nd = rb_first_cached(&session->machines.guests); nd;
+ nd = rb_next(nd)) {
+ struct machine *pos = rb_entry(nd, struct machine, rb_node);
+
+ err = machine__hit_all_dsos(pos);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index 5064c6ec11e731..3b0256e977a6c1 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -156,6 +156,8 @@ int perf_session__deliver_synth_event(struct perf_session *session,
union perf_event *event,
struct perf_sample *sample);
+int perf_session__dsos_hit_all(struct perf_session *session);
+
int perf_event__process_id_index(struct perf_session *session,
union perf_event *event);
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 92a1bd695e8aa3..add8601c57fd47 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -2441,6 +2441,13 @@ static struct hpp_dimension hpp_sort_dimensions[] = {
DIM(PERF_HPP__OVERHEAD_ACC, "overhead_children"),
DIM(PERF_HPP__SAMPLES, "sample"),
DIM(PERF_HPP__PERIOD, "period"),
+ DIM(PERF_HPP__WEIGHT1, "weight1"),
+ DIM(PERF_HPP__WEIGHT2, "weight2"),
+ DIM(PERF_HPP__WEIGHT3, "weight3"),
+ /* aliases for weight_struct */
+ DIM(PERF_HPP__WEIGHT2, "ins_lat"),
+ DIM(PERF_HPP__WEIGHT3, "retire_lat"),
+ DIM(PERF_HPP__WEIGHT3, "p_stage_cyc"),
};
#undef DIM
@@ -3743,26 +3750,29 @@ void sort__setup_elide(FILE *output)
}
}
-int output_field_add(struct perf_hpp_list *list, char *tok)
+int output_field_add(struct perf_hpp_list *list, const char *tok)
{
unsigned int i;
- for (i = 0; i < ARRAY_SIZE(common_sort_dimensions); i++) {
- struct sort_dimension *sd = &common_sort_dimensions[i];
+ for (i = 0; i < ARRAY_SIZE(hpp_sort_dimensions); i++) {
+ struct hpp_dimension *hd = &hpp_sort_dimensions[i];
- if (!sd->name || strncasecmp(tok, sd->name, strlen(tok)))
+ if (strncasecmp(tok, hd->name, strlen(tok)))
continue;
- return __sort_dimension__add_output(list, sd);
+ if (!strcasecmp(tok, "weight"))
+ ui__warning("--fields weight shows the average value unlike in the --sort key.\n");
+
+ return __hpp_dimension__add_output(list, hd);
}
- for (i = 0; i < ARRAY_SIZE(hpp_sort_dimensions); i++) {
- struct hpp_dimension *hd = &hpp_sort_dimensions[i];
+ for (i = 0; i < ARRAY_SIZE(common_sort_dimensions); i++) {
+ struct sort_dimension *sd = &common_sort_dimensions[i];
- if (strncasecmp(tok, hd->name, strlen(tok)))
+ if (!sd->name || strncasecmp(tok, sd->name, strlen(tok)))
continue;
- return __hpp_dimension__add_output(list, hd);
+ return __sort_dimension__add_output(list, sd);
}
for (i = 0; i < ARRAY_SIZE(bstack_sort_dimensions); i++) {
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 6f6b4189a38978..0bd0ee3ae76bd3 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -3,19 +3,9 @@
#define __PERF_SORT_H
#include <regex.h>
#include <stdbool.h>
-#include <linux/list.h>
-#include <linux/rbtree.h>
-#include "map_symbol.h"
-#include "symbol_conf.h"
-#include "callchain.h"
-#include "values.h"
#include "hist.h"
-#include "stat.h"
-#include "spark.h"
struct option;
-struct thread;
-struct annotated_data_type;
extern regex_t parent_regex;
extern const char *sort_order;
@@ -39,175 +29,6 @@ extern struct sort_entry sort_type;
extern const char default_mem_sort_order[];
extern bool chk_double_cl;
-struct res_sample {
- u64 time;
- int cpu;
- int tid;
-};
-
-struct he_stat {
- u64 period;
- u64 period_sys;
- u64 period_us;
- u64 period_guest_sys;
- u64 period_guest_us;
- u32 nr_events;
-};
-
-struct namespace_id {
- u64 dev;
- u64 ino;
-};
-
-struct hist_entry_diff {
- bool computed;
- union {
- /* PERF_HPP__DELTA */
- double period_ratio_delta;
-
- /* PERF_HPP__RATIO */
- double period_ratio;
-
- /* HISTC_WEIGHTED_DIFF */
- s64 wdiff;
-
- /* PERF_HPP_DIFF__CYCLES */
- s64 cycles;
- };
- struct stats stats;
- unsigned long svals[NUM_SPARKS];
-};
-
-struct hist_entry_ops {
- void *(*new)(size_t size);
- void (*free)(void *ptr);
-};
-
-/**
- * struct hist_entry - histogram entry
- *
- * @row_offset - offset from the first callchain expanded to appear on screen
- * @nr_rows - rows expanded in callchain, recalculated on folding/unfolding
- */
-struct hist_entry {
- struct rb_node rb_node_in;
- struct rb_node rb_node;
- union {
- struct list_head node;
- struct list_head head;
- } pairs;
- struct he_stat stat;
- struct he_stat *stat_acc;
- struct map_symbol ms;
- struct thread *thread;
- struct comm *comm;
- struct namespace_id cgroup_id;
- u64 cgroup;
- u64 ip;
- u64 transaction;
- s32 socket;
- s32 cpu;
- u64 code_page_size;
- u64 weight;
- u64 ins_lat;
- u64 p_stage_cyc;
- u8 cpumode;
- u8 depth;
- int mem_type_off;
- struct simd_flags simd_flags;
-
- /* We are added by hists__add_dummy_entry. */
- bool dummy;
- bool leaf;
-
- char level;
- u8 filtered;
-
- u16 callchain_size;
- union {
- /*
- * Since perf diff only supports the stdio output, TUI
- * fields are only accessed from perf report (or perf
- * top). So make it a union to reduce memory usage.
- */
- struct hist_entry_diff diff;
- struct /* for TUI */ {
- u16 row_offset;
- u16 nr_rows;
- bool init_have_children;
- bool unfolded;
- bool has_children;
- bool has_no_entry;
- };
- };
- char *srcline;
- char *srcfile;
- struct symbol *parent;
- struct branch_info *branch_info;
- long time;
- struct hists *hists;
- struct mem_info *mem_info;
- struct block_info *block_info;
- struct kvm_info *kvm_info;
- void *raw_data;
- u32 raw_size;
- int num_res;
- struct res_sample *res_samples;
- void *trace_output;
- struct perf_hpp_list *hpp_list;
- struct hist_entry *parent_he;
- struct hist_entry_ops *ops;
- struct annotated_data_type *mem_type;
- union {
- /* this is for hierarchical entry structure */
- struct {
- struct rb_root_cached hroot_in;
- struct rb_root_cached hroot_out;
- }; /* non-leaf entries */
- struct rb_root sorted_chain; /* leaf entry has callchains */
- };
- struct callchain_root callchain[0]; /* must be last member */
-};
-
-static __pure inline bool hist_entry__has_callchains(struct hist_entry *he)
-{
- return he->callchain_size != 0;
-}
-
-int hist_entry__sym_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width);
-
-static inline bool hist_entry__has_pairs(struct hist_entry *he)
-{
- return !list_empty(&he->pairs.node);
-}
-
-static inline struct hist_entry *hist_entry__next_pair(struct hist_entry *he)
-{
- if (hist_entry__has_pairs(he))
- return list_entry(he->pairs.node.next, struct hist_entry, pairs.node);
- return NULL;
-}
-
-static inline void hist_entry__add_pair(struct hist_entry *pair,
- struct hist_entry *he)
-{
- list_add_tail(&pair->pairs.node, &he->pairs.head);
-}
-
-static inline float hist_entry__get_percent_limit(struct hist_entry *he)
-{
- u64 period = he->stat.period;
- u64 total_period = hists__total_period(he->hists);
-
- if (unlikely(total_period == 0))
- return 0;
-
- if (symbol_conf.cumulate_callchain)
- period = he->stat_acc->period;
-
- return period * 100.0 / total_period;
-}
-
enum sort_mode {
SORT_MODE__NORMAL,
SORT_MODE__BRANCH,
@@ -299,15 +120,6 @@ struct sort_entry {
u8 se_width_idx;
};
-struct block_hist {
- struct hists block_hists;
- struct perf_hpp_list block_list;
- struct perf_hpp_fmt block_fmt;
- int block_idx;
- bool valid;
- struct hist_entry he;
-};
-
extern struct sort_entry sort_thread;
struct evlist;
@@ -329,7 +141,7 @@ void reset_dimensions(void);
int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
struct evlist *evlist,
int level);
-int output_field_add(struct perf_hpp_list *list, char *tok);
+int output_field_add(struct perf_hpp_list *list, const char *tok);
int64_t
sort__iaddr_cmp(struct hist_entry *left, struct hist_entry *right);
int64_t
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index b0bcf92f0f9c37..0bd5467389e462 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -315,7 +315,7 @@ static int check_per_pkg(struct evsel *counter, struct perf_counts_values *vals,
if (!counter->per_pkg)
return 0;
- if (perf_cpu_map__has_any_cpu_or_is_empty(cpus))
+ if (perf_cpu_map__is_any_cpu_or_is_empty(cpus))
return 0;
if (!mask) {
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index d6e5c8787ba23a..fd7a187551bd1e 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -87,6 +87,7 @@ struct perf_stat_config {
bool metric_no_group;
bool metric_no_merge;
bool metric_no_threshold;
+ bool hardware_aware_grouping;
bool stop_read_counter;
bool iostat_run;
char *user_requested_cpu_list;
diff --git a/tools/perf/util/svghelper.c b/tools/perf/util/svghelper.c
index 1892e9b6aa7ff5..2b04f47f4db0f0 100644
--- a/tools/perf/util/svghelper.c
+++ b/tools/perf/util/svghelper.c
@@ -725,26 +725,24 @@ static void scan_core_topology(int *map, struct topology *t, int nr_cpus)
static int str_to_bitmap(char *s, cpumask_t *b, int nr_cpus)
{
- int i;
- int ret = 0;
- struct perf_cpu_map *m;
- struct perf_cpu c;
+ int idx, ret = 0;
+ struct perf_cpu_map *map;
+ struct perf_cpu cpu;
- m = perf_cpu_map__new(s);
- if (!m)
+ map = perf_cpu_map__new(s);
+ if (!map)
return -1;
- for (i = 0; i < perf_cpu_map__nr(m); i++) {
- c = perf_cpu_map__cpu(m, i);
- if (c.cpu >= nr_cpus) {
+ perf_cpu_map__for_each_cpu(cpu, idx, map) {
+ if (cpu.cpu >= nr_cpus) {
ret = -1;
break;
}
- __set_bit(c.cpu, cpumask_bits(b));
+ __set_bit(cpu.cpu, cpumask_bits(b));
}
- perf_cpu_map__put(m);
+ perf_cpu_map__put(map);
return ret;
}
diff --git a/tools/perf/util/values.h b/tools/perf/util/values.h
index 8c41f22f42cfbb..791c1ad606c290 100644
--- a/tools/perf/util/values.h
+++ b/tools/perf/util/values.h
@@ -2,6 +2,7 @@
#ifndef __PERF_VALUES_H
#define __PERF_VALUES_H
+#include <stdio.h>
#include <linux/types.h>
struct perf_read_values {
diff --git a/tools/perf/util/vdso.c b/tools/perf/util/vdso.c
index df8963796187dc..35532dcbff743d 100644
--- a/tools/perf/util/vdso.c
+++ b/tools/perf/util/vdso.c
@@ -133,8 +133,6 @@ static struct dso *__machine__addnew_vdso(struct machine *machine, const char *s
if (dso != NULL) {
__dsos__add(&machine->dsos, dso);
dso__set_long_name(dso, long_name, false);
- /* Put dso here because __dsos_add already got it */
- dso__put(dso);
}
return dso;
@@ -252,17 +250,15 @@ static struct dso *__machine__findnew_compat(struct machine *machine,
const char *file_name;
struct dso *dso;
- dso = __dsos__find(&machine->dsos, vdso_file->dso_name, true);
+ dso = dsos__find(&machine->dsos, vdso_file->dso_name, true);
if (dso)
- goto out;
+ return dso;
file_name = vdso__get_compat_file(vdso_file);
if (!file_name)
- goto out;
+ return NULL;
- dso = __machine__addnew_vdso(machine, vdso_file->dso_name, file_name);
-out:
- return dso;
+ return __machine__addnew_vdso(machine, vdso_file->dso_name, file_name);
}
static int __machine__findnew_vdso_compat(struct machine *machine,
@@ -308,21 +304,21 @@ static struct dso *machine__find_vdso(struct machine *machine,
dso_type = machine__thread_dso_type(machine, thread);
switch (dso_type) {
case DSO__TYPE_32BIT:
- dso = __dsos__find(&machine->dsos, DSO__NAME_VDSO32, true);
+ dso = dsos__find(&machine->dsos, DSO__NAME_VDSO32, true);
if (!dso) {
- dso = __dsos__find(&machine->dsos, DSO__NAME_VDSO,
- true);
+ dso = dsos__find(&machine->dsos, DSO__NAME_VDSO,
+ true);
if (dso && dso_type != dso__type(dso, machine))
dso = NULL;
}
break;
case DSO__TYPE_X32BIT:
- dso = __dsos__find(&machine->dsos, DSO__NAME_VDSOX32, true);
+ dso = dsos__find(&machine->dsos, DSO__NAME_VDSOX32, true);
break;
case DSO__TYPE_64BIT:
case DSO__TYPE_UNKNOWN:
default:
- dso = __dsos__find(&machine->dsos, DSO__NAME_VDSO, true);
+ dso = dsos__find(&machine->dsos, DSO__NAME_VDSO, true);
break;
}
@@ -334,37 +330,33 @@ struct dso *machine__findnew_vdso(struct machine *machine,
{
struct vdso_info *vdso_info;
struct dso *dso = NULL;
+ char *file;
- down_write(&machine->dsos.lock);
if (!machine->vdso_info)
machine->vdso_info = vdso_info__new();
vdso_info = machine->vdso_info;
if (!vdso_info)
- goto out_unlock;
+ return NULL;
dso = machine__find_vdso(machine, thread);
if (dso)
- goto out_unlock;
+ return dso;
#if BITS_PER_LONG == 64
if (__machine__findnew_vdso_compat(machine, thread, vdso_info, &dso))
- goto out_unlock;
+ return dso;
#endif
- dso = __dsos__find(&machine->dsos, DSO__NAME_VDSO, true);
- if (!dso) {
- char *file;
+ dso = dsos__find(&machine->dsos, DSO__NAME_VDSO, true);
+ if (dso)
+ return dso;
- file = get_file(&vdso_info->vdso);
- if (file)
- dso = __machine__addnew_vdso(machine, DSO__NAME_VDSO, file);
- }
+ file = get_file(&vdso_info->vdso);
+ if (!file)
+ return NULL;
-out_unlock:
- dso__get(dso);
- up_write(&machine->dsos.lock);
- return dso;
+ return __machine__addnew_vdso(machine, DSO__NAME_VDSO, file);
}
bool dso__is_vdso(struct dso *dso)
diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
index 8f08c3fd498d5b..0d3672e5d9ed15 100644
--- a/tools/power/x86/turbostat/turbostat.8
+++ b/tools/power/x86/turbostat/turbostat.8
@@ -67,6 +67,10 @@ The column name "all" can be used to enable all disabled-by-default built-in cou
.PP
\fB--quiet\fP Do not decode and print the system configuration header information.
.PP
++\fB--no-msr\fP Disable all the uses of the MSR driver.
++.PP
++\fB--no-perf\fP Disable all the uses of the perf API.
++.PP
\fB--interval seconds\fP overrides the default 5.0 second measurement interval.
.PP
\fB--num_iterations num\fP number of the measurement iterations.
@@ -125,9 +129,17 @@ The system configuration dump (if --quiet is not used) is followed by statistics
.PP
\fBPkgTmp\fP Degrees Celsius reported by the per-package Package Thermal Monitor.
.PP
-\fBGFX%rc6\fP The percentage of time the GPU is in the "render C6" state, rc6, during the measurement interval. From /sys/class/drm/card0/power/rc6_residency_ms.
+\fBGFX%rc6\fP The percentage of time the GPU is in the "render C6" state, rc6, during the measurement interval. From /sys/class/drm/card0/power/rc6_residency_ms or /sys/class/drm/card0/gt/gt0/rc6_residency_ms or /sys/class/drm/card0/device/tile0/gtN/gtidle/idle_residency_ms depending on the graphics driver being used.
.PP
-\fBGFXMHz\fP Instantaneous snapshot of what sysfs presents at the end of the measurement interval. From /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz.
+\fBGFXMHz\fP Instantaneous snapshot of what sysfs presents at the end of the measurement interval. From /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz or /sys/class/drm/card0/gt_cur_freq_mhz or /sys/class/drm/card0/gt/gt0/rps_cur_freq_mhz or /sys/class/drm/card0/device/tile0/gtN/freq0/cur_freq depending on the graphics driver being used.
+.PP
+\fBGFXAMHz\fP Instantaneous snapshot of what sysfs presents at the end of the measurement interval. From /sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz or /sys/class/drm/card0/gt_act_freq_mhz or /sys/class/drm/card0/gt/gt0/rps_act_freq_mhz or /sys/class/drm/card0/device/tile0/gtN/freq0/act_freq depending on the graphics driver being used.
+.PP
+\fBSAM%mc6\fP The percentage of time the SA Media is in the "module C6" state, mc6, during the measurement interval. From /sys/class/drm/card0/gt/gt1/rc6_residency_ms or /sys/class/drm/card0/device/tile0/gtN/gtidle/idle_residency_ms depending on the graphics driver being used.
+.PP
+\fBSAMMHz\fP Instantaneous snapshot of what sysfs presents at the end of the measurement interval. From /sys/class/drm/card0/gt/gt1/rps_cur_freq_mhz or /sys/class/drm/card0/device/tile0/gtN/freq0/cur_freq depending on the graphics driver being used.
+.PP
+\fBSAMAMHz\fP Instantaneous snapshot of what sysfs presents at the end of the measurement interval. From /sys/class/drm/card0/gt/gt1/rps_act_freq_mhz or /sys/class/drm/card0/device/tile0/gtN/freq0/act_freq depending on the graphics driver being used.
.PP
\fBPkg%pc2, Pkg%pc3, Pkg%pc6, Pkg%pc7\fP percentage residency in hardware package idle states. These numbers are from hardware residency counters.
.PP
@@ -370,7 +382,7 @@ below the processor's base frequency.
Busy% = MPERF_delta/TSC_delta
-Bzy_MHz = TSC_delta/APERF_delta/MPERF_delta/measurement_interval
+Bzy_MHz = TSC_delta*APERF_delta/MPERF_delta/measurement_interval
Note that these calculations depend on TSC_delta, so they
are not reliable during intervals when TSC_MHz is not running at the base frequency.
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 7a334377f92b97..98256468e24806 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -3,7 +3,7 @@
* turbostat -- show CPU frequency and C-state residency
* on modern Intel and AMD processors.
*
- * Copyright (c) 2023 Intel Corporation.
+ * Copyright (c) 2024 Intel Corporation.
* Len Brown <len.brown@intel.com>
*/
@@ -36,6 +36,8 @@
#include <linux/perf_event.h>
#include <asm/unistd.h>
#include <stdbool.h>
+#include <assert.h>
+#include <linux/kernel.h>
#define UNUSED(x) (void)(x)
@@ -53,9 +55,13 @@
#define NAME_BYTES 20
#define PATH_BYTES 128
+#define MAX_NOFILE 0x8000
+
enum counter_scope { SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE };
enum counter_type { COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC };
enum counter_format { FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT };
+enum amperf_source { AMPERF_SOURCE_PERF, AMPERF_SOURCE_MSR };
+enum rapl_source { RAPL_SOURCE_NONE, RAPL_SOURCE_PERF, RAPL_SOURCE_MSR };
struct msr_counter {
unsigned int msr_num;
@@ -127,6 +133,9 @@ struct msr_counter bic[] = {
{ 0x0, "IPC", "", 0, 0, 0, NULL, 0 },
{ 0x0, "CoreThr", "", 0, 0, 0, NULL, 0 },
{ 0x0, "UncMHz", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "SAM%mc6", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "SAMMHz", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "SAMAMHz", "", 0, 0, 0, NULL, 0 },
};
#define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
@@ -185,11 +194,14 @@ struct msr_counter bic[] = {
#define BIC_IPC (1ULL << 52)
#define BIC_CORE_THROT_CNT (1ULL << 53)
#define BIC_UNCORE_MHZ (1ULL << 54)
+#define BIC_SAM_mc6 (1ULL << 55)
+#define BIC_SAMMHz (1ULL << 56)
+#define BIC_SAMACTMHz (1ULL << 57)
#define BIC_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die )
#define BIC_THERMAL_PWR ( BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__)
-#define BIC_FREQUENCY ( BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz | BIC_UNCORE_MHZ)
-#define BIC_IDLE ( BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX)
+#define BIC_FREQUENCY (BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz | BIC_SAMMHz | BIC_SAMACTMHz | BIC_UNCORE_MHZ)
+#define BIC_IDLE (BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6)
#define BIC_OTHER ( BIC_IRQ | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC)
#define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC)
@@ -204,10 +216,13 @@ unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs | BIC_APIC | BIC
#define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT)
#define BIC_IS_ENABLED(COUNTER_BIT) (bic_enabled & COUNTER_BIT)
+struct amperf_group_fd;
+
char *proc_stat = "/proc/stat";
FILE *outf;
int *fd_percpu;
int *fd_instr_count_percpu;
+struct amperf_group_fd *fd_amperf_percpu; /* File descriptors for perf group with APERF and MPERF counters. */
struct timeval interval_tv = { 5, 0 };
struct timespec interval_ts = { 5, 0 };
@@ -242,11 +257,8 @@ char *output_buffer, *outp;
unsigned int do_dts;
unsigned int do_ptm;
unsigned int do_ipc;
-unsigned long long gfx_cur_rc6_ms;
unsigned long long cpuidle_cur_cpu_lpi_us;
unsigned long long cpuidle_cur_sys_lpi_us;
-unsigned int gfx_cur_mhz;
-unsigned int gfx_act_mhz;
unsigned int tj_max;
unsigned int tj_max_override;
double rapl_power_units, rapl_time_units;
@@ -263,6 +275,28 @@ unsigned int has_hwp_epp; /* IA32_HWP_REQUEST[bits 31:24] */
unsigned int has_hwp_pkg; /* IA32_HWP_REQUEST_PKG */
unsigned int first_counter_read = 1;
int ignore_stdin;
+bool no_msr;
+bool no_perf;
+enum amperf_source amperf_source;
+
+enum gfx_sysfs_idx {
+ GFX_rc6,
+ GFX_MHz,
+ GFX_ACTMHz,
+ SAM_mc6,
+ SAM_MHz,
+ SAM_ACTMHz,
+ GFX_MAX
+};
+
+struct gfx_sysfs_info {
+ const char *path;
+ FILE *fp;
+ unsigned int val;
+ unsigned long long val_ull;
+};
+
+static struct gfx_sysfs_info gfx_info[GFX_MAX];
int get_msr(int cpu, off_t offset, unsigned long long *msr);
@@ -652,6 +686,7 @@ static const struct platform_features icx_features = {
.bclk_freq = BCLK_100MHZ,
.supported_cstates = CC1 | CC6 | PC2 | PC6,
.cst_limit = CST_LIMIT_ICX,
+ .has_msr_core_c1_res = 1,
.has_irtl_msrs = 1,
.has_cst_prewake_bit = 1,
.trl_msrs = TRL_BASE | TRL_CORECOUNT,
@@ -948,6 +983,175 @@ size_t cpu_present_setsize, cpu_effective_setsize, cpu_allowed_setsize, cpu_affi
#define MAX_ADDED_THREAD_COUNTERS 24
#define BITMASK_SIZE 32
+/* Indexes used to map data read from perf and MSRs into global variables */
+enum rapl_rci_index {
+ RAPL_RCI_INDEX_ENERGY_PKG = 0,
+ RAPL_RCI_INDEX_ENERGY_CORES = 1,
+ RAPL_RCI_INDEX_DRAM = 2,
+ RAPL_RCI_INDEX_GFX = 3,
+ RAPL_RCI_INDEX_PKG_PERF_STATUS = 4,
+ RAPL_RCI_INDEX_DRAM_PERF_STATUS = 5,
+ RAPL_RCI_INDEX_CORE_ENERGY = 6,
+ NUM_RAPL_COUNTERS,
+};
+
+enum rapl_unit {
+ RAPL_UNIT_INVALID,
+ RAPL_UNIT_JOULES,
+ RAPL_UNIT_WATTS,
+};
+
+struct rapl_counter_info_t {
+ unsigned long long data[NUM_RAPL_COUNTERS];
+ enum rapl_source source[NUM_RAPL_COUNTERS];
+ unsigned long long flags[NUM_RAPL_COUNTERS];
+ double scale[NUM_RAPL_COUNTERS];
+ enum rapl_unit unit[NUM_RAPL_COUNTERS];
+
+ union {
+ /* Active when source == RAPL_SOURCE_MSR */
+ struct {
+ unsigned long long msr[NUM_RAPL_COUNTERS];
+ unsigned long long msr_mask[NUM_RAPL_COUNTERS];
+ int msr_shift[NUM_RAPL_COUNTERS];
+ };
+ };
+
+ int fd_perf;
+};
+
+/* struct rapl_counter_info_t for each RAPL domain */
+struct rapl_counter_info_t *rapl_counter_info_perdomain;
+
+#define RAPL_COUNTER_FLAG_USE_MSR_SUM (1u << 1)
+
+struct rapl_counter_arch_info {
+ int feature_mask; /* Mask for testing if the counter is supported on host */
+ const char *perf_subsys;
+ const char *perf_name;
+ unsigned long long msr;
+ unsigned long long msr_mask;
+ int msr_shift; /* Positive mean shift right, negative mean shift left */
+ double *platform_rapl_msr_scale; /* Scale applied to values read by MSR (platform dependent, filled at runtime) */
+ unsigned int rci_index; /* Maps data from perf counters to global variables */
+ unsigned long long bic;
+ double compat_scale; /* Some counters require constant scaling to be in the same range as other, similar ones */
+ unsigned long long flags;
+};
+
+static const struct rapl_counter_arch_info rapl_counter_arch_infos[] = {
+ {
+ .feature_mask = RAPL_PKG,
+ .perf_subsys = "power",
+ .perf_name = "energy-pkg",
+ .msr = MSR_PKG_ENERGY_STATUS,
+ .msr_mask = 0xFFFFFFFFFFFFFFFF,
+ .msr_shift = 0,
+ .platform_rapl_msr_scale = &rapl_energy_units,
+ .rci_index = RAPL_RCI_INDEX_ENERGY_PKG,
+ .bic = BIC_PkgWatt | BIC_Pkg_J,
+ .compat_scale = 1.0,
+ .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
+ },
+ {
+ .feature_mask = RAPL_AMD_F17H,
+ .perf_subsys = "power",
+ .perf_name = "energy-pkg",
+ .msr = MSR_PKG_ENERGY_STAT,
+ .msr_mask = 0xFFFFFFFFFFFFFFFF,
+ .msr_shift = 0,
+ .platform_rapl_msr_scale = &rapl_energy_units,
+ .rci_index = RAPL_RCI_INDEX_ENERGY_PKG,
+ .bic = BIC_PkgWatt | BIC_Pkg_J,
+ .compat_scale = 1.0,
+ .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
+ },
+ {
+ .feature_mask = RAPL_CORE_ENERGY_STATUS,
+ .perf_subsys = "power",
+ .perf_name = "energy-cores",
+ .msr = MSR_PP0_ENERGY_STATUS,
+ .msr_mask = 0xFFFFFFFFFFFFFFFF,
+ .msr_shift = 0,
+ .platform_rapl_msr_scale = &rapl_energy_units,
+ .rci_index = RAPL_RCI_INDEX_ENERGY_CORES,
+ .bic = BIC_CorWatt | BIC_Cor_J,
+ .compat_scale = 1.0,
+ .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
+ },
+ {
+ .feature_mask = RAPL_DRAM,
+ .perf_subsys = "power",
+ .perf_name = "energy-ram",
+ .msr = MSR_DRAM_ENERGY_STATUS,
+ .msr_mask = 0xFFFFFFFFFFFFFFFF,
+ .msr_shift = 0,
+ .platform_rapl_msr_scale = &rapl_dram_energy_units,
+ .rci_index = RAPL_RCI_INDEX_DRAM,
+ .bic = BIC_RAMWatt | BIC_RAM_J,
+ .compat_scale = 1.0,
+ .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
+ },
+ {
+ .feature_mask = RAPL_GFX,
+ .perf_subsys = "power",
+ .perf_name = "energy-gpu",
+ .msr = MSR_PP1_ENERGY_STATUS,
+ .msr_mask = 0xFFFFFFFFFFFFFFFF,
+ .msr_shift = 0,
+ .platform_rapl_msr_scale = &rapl_energy_units,
+ .rci_index = RAPL_RCI_INDEX_GFX,
+ .bic = BIC_GFXWatt | BIC_GFX_J,
+ .compat_scale = 1.0,
+ .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
+ },
+ {
+ .feature_mask = RAPL_PKG_PERF_STATUS,
+ .perf_subsys = NULL,
+ .perf_name = NULL,
+ .msr = MSR_PKG_PERF_STATUS,
+ .msr_mask = 0xFFFFFFFFFFFFFFFF,
+ .msr_shift = 0,
+ .platform_rapl_msr_scale = &rapl_time_units,
+ .rci_index = RAPL_RCI_INDEX_PKG_PERF_STATUS,
+ .bic = BIC_PKG__,
+ .compat_scale = 100.0,
+ .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
+ },
+ {
+ .feature_mask = RAPL_DRAM_PERF_STATUS,
+ .perf_subsys = NULL,
+ .perf_name = NULL,
+ .msr = MSR_DRAM_PERF_STATUS,
+ .msr_mask = 0xFFFFFFFFFFFFFFFF,
+ .msr_shift = 0,
+ .platform_rapl_msr_scale = &rapl_time_units,
+ .rci_index = RAPL_RCI_INDEX_DRAM_PERF_STATUS,
+ .bic = BIC_RAM__,
+ .compat_scale = 100.0,
+ .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
+ },
+ {
+ .feature_mask = RAPL_AMD_F17H,
+ .perf_subsys = NULL,
+ .perf_name = NULL,
+ .msr = MSR_CORE_ENERGY_STAT,
+ .msr_mask = 0xFFFFFFFF,
+ .msr_shift = 0,
+ .platform_rapl_msr_scale = &rapl_energy_units,
+ .rci_index = RAPL_RCI_INDEX_CORE_ENERGY,
+ .bic = BIC_CorWatt | BIC_Cor_J,
+ .compat_scale = 1.0,
+ .flags = 0,
+ },
+};
+
+struct rapl_counter {
+ unsigned long long raw_value;
+ enum rapl_unit unit;
+ double scale;
+};
+
struct thread_data {
struct timeval tv_begin;
struct timeval tv_end;
@@ -974,7 +1178,7 @@ struct core_data {
unsigned long long c7;
unsigned long long mc6_us; /* duplicate as per-core for now, even though per module */
unsigned int core_temp_c;
- unsigned int core_energy; /* MSR_CORE_ENERGY_STAT */
+ struct rapl_counter core_energy; /* MSR_CORE_ENERGY_STAT */
unsigned int core_id;
unsigned long long core_throt_cnt;
unsigned long long counter[MAX_ADDED_COUNTERS];
@@ -989,8 +1193,8 @@ struct pkg_data {
unsigned long long pc8;
unsigned long long pc9;
unsigned long long pc10;
- unsigned long long cpu_lpi;
- unsigned long long sys_lpi;
+ long long cpu_lpi;
+ long long sys_lpi;
unsigned long long pkg_wtd_core_c0;
unsigned long long pkg_any_core_c0;
unsigned long long pkg_any_gfxe_c0;
@@ -998,13 +1202,16 @@ struct pkg_data {
long long gfx_rc6_ms;
unsigned int gfx_mhz;
unsigned int gfx_act_mhz;
+ long long sam_mc6_ms;
+ unsigned int sam_mhz;
+ unsigned int sam_act_mhz;
unsigned int package_id;
- unsigned long long energy_pkg; /* MSR_PKG_ENERGY_STATUS */
- unsigned long long energy_dram; /* MSR_DRAM_ENERGY_STATUS */
- unsigned long long energy_cores; /* MSR_PP0_ENERGY_STATUS */
- unsigned long long energy_gfx; /* MSR_PP1_ENERGY_STATUS */
- unsigned long long rapl_pkg_perf_status; /* MSR_PKG_PERF_STATUS */
- unsigned long long rapl_dram_perf_status; /* MSR_DRAM_PERF_STATUS */
+ struct rapl_counter energy_pkg; /* MSR_PKG_ENERGY_STATUS */
+ struct rapl_counter energy_dram; /* MSR_DRAM_ENERGY_STATUS */
+ struct rapl_counter energy_cores; /* MSR_PP0_ENERGY_STATUS */
+ struct rapl_counter energy_gfx; /* MSR_PP1_ENERGY_STATUS */
+ struct rapl_counter rapl_pkg_perf_status; /* MSR_PKG_PERF_STATUS */
+ struct rapl_counter rapl_dram_perf_status; /* MSR_DRAM_PERF_STATUS */
unsigned int pkg_temp_c;
unsigned int uncore_mhz;
unsigned long long counter[MAX_ADDED_COUNTERS];
@@ -1150,6 +1357,38 @@ struct sys_counters {
struct msr_counter *pp;
} sys;
+void free_sys_counters(void)
+{
+ struct msr_counter *p = sys.tp, *pnext = NULL;
+
+ while (p) {
+ pnext = p->next;
+ free(p);
+ p = pnext;
+ }
+
+ p = sys.cp, pnext = NULL;
+ while (p) {
+ pnext = p->next;
+ free(p);
+ p = pnext;
+ }
+
+ p = sys.pp, pnext = NULL;
+ while (p) {
+ pnext = p->next;
+ free(p);
+ p = pnext;
+ }
+
+ sys.added_thread_counters = 0;
+ sys.added_core_counters = 0;
+ sys.added_package_counters = 0;
+ sys.tp = NULL;
+ sys.cp = NULL;
+ sys.pp = NULL;
+}
+
struct system_summary {
struct thread_data threads;
struct core_data cores;
@@ -1280,34 +1519,60 @@ int get_msr_fd(int cpu)
sprintf(pathname, "/dev/cpu/%d/msr", cpu);
fd = open(pathname, O_RDONLY);
if (fd < 0)
- err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname);
+ err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, "
+ "or run with --no-msr, or run as root", pathname);
fd_percpu[cpu] = fd;
return fd;
}
+static void bic_disable_msr_access(void)
+{
+ const unsigned long bic_msrs =
+ BIC_SMI |
+ BIC_CPU_c1 |
+ BIC_CPU_c3 |
+ BIC_CPU_c6 |
+ BIC_CPU_c7 |
+ BIC_Mod_c6 |
+ BIC_CoreTmp |
+ BIC_Totl_c0 |
+ BIC_Any_c0 |
+ BIC_GFX_c0 |
+ BIC_CPUGFX |
+ BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_PkgTmp;
+
+ bic_enabled &= ~bic_msrs;
+
+ free_sys_counters();
+}
+
static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags)
{
+ assert(!no_perf);
+
return syscall(__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags);
}
-static int perf_instr_count_open(int cpu_num)
+static long open_perf_counter(int cpu, unsigned int type, unsigned int config, int group_fd, __u64 read_format)
{
- struct perf_event_attr pea;
- int fd;
+ struct perf_event_attr attr;
+ const pid_t pid = -1;
+ const unsigned long flags = 0;
- memset(&pea, 0, sizeof(struct perf_event_attr));
- pea.type = PERF_TYPE_HARDWARE;
- pea.size = sizeof(struct perf_event_attr);
- pea.config = PERF_COUNT_HW_INSTRUCTIONS;
+ assert(!no_perf);
- /* counter for cpu_num, including user + kernel and all processes */
- fd = perf_event_open(&pea, -1, cpu_num, -1, 0);
- if (fd == -1) {
- warnx("capget(CAP_PERFMON) failed, try \"# setcap cap_sys_admin=ep %s\"", progname);
- BIC_NOT_PRESENT(BIC_IPC);
- }
+ memset(&attr, 0, sizeof(struct perf_event_attr));
+
+ attr.type = type;
+ attr.size = sizeof(struct perf_event_attr);
+ attr.config = config;
+ attr.disabled = 0;
+ attr.sample_type = PERF_SAMPLE_IDENTIFIER;
+ attr.read_format = read_format;
+
+ const int fd = perf_event_open(&attr, pid, cpu, group_fd, flags);
return fd;
}
@@ -1317,7 +1582,7 @@ int get_instr_count_fd(int cpu)
if (fd_instr_count_percpu[cpu])
return fd_instr_count_percpu[cpu];
- fd_instr_count_percpu[cpu] = perf_instr_count_open(cpu);
+ fd_instr_count_percpu[cpu] = open_perf_counter(cpu, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, -1, 0);
return fd_instr_count_percpu[cpu];
}
@@ -1326,6 +1591,8 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr)
{
ssize_t retval;
+ assert(!no_msr);
+
retval = pread(get_msr_fd(cpu), msr, sizeof(*msr), offset);
if (retval != sizeof *msr)
@@ -1334,6 +1601,21 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr)
return 0;
}
+int probe_msr(int cpu, off_t offset)
+{
+ ssize_t retval;
+ unsigned long long dummy;
+
+ assert(!no_msr);
+
+ retval = pread(get_msr_fd(cpu), &dummy, sizeof(dummy), offset);
+
+ if (retval != sizeof(dummy))
+ return 1;
+
+ return 0;
+}
+
#define MAX_DEFERRED 16
char *deferred_add_names[MAX_DEFERRED];
char *deferred_skip_names[MAX_DEFERRED];
@@ -1369,6 +1651,8 @@ void help(void)
" Override default 5-second measurement interval\n"
" -J, --Joules displays energy in Joules instead of Watts\n"
" -l, --list list column headers only\n"
+ " -M, --no-msr Disable all uses of the MSR driver\n"
+ " -P, --no-perf Disable all uses of the perf API\n"
" -n, --num_iterations num\n"
" number of the measurement iterations\n"
" -N, --header_iterations num\n"
@@ -1573,6 +1857,15 @@ void print_header(char *delim)
if (DO_BIC(BIC_GFXACTMHz))
outp += sprintf(outp, "%sGFXAMHz", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_SAM_mc6))
+ outp += sprintf(outp, "%sSAM%%mc6", (printed++ ? delim : ""));
+
+ if (DO_BIC(BIC_SAMMHz))
+ outp += sprintf(outp, "%sSAMMHz", (printed++ ? delim : ""));
+
+ if (DO_BIC(BIC_SAMACTMHz))
+ outp += sprintf(outp, "%sSAMAMHz", (printed++ ? delim : ""));
+
if (DO_BIC(BIC_Totl_c0))
outp += sprintf(outp, "%sTotl%%C0", (printed++ ? delim : ""));
if (DO_BIC(BIC_Any_c0))
@@ -1671,26 +1964,35 @@ int dump_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p
outp += sprintf(outp, "SMI: %d\n", t->smi_count);
for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
- outp += sprintf(outp, "tADDED [%d] msr0x%x: %08llX\n", i, mp->msr_num, t->counter[i]);
+ outp +=
+ sprintf(outp, "tADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num,
+ t->counter[i], mp->path);
}
}
- if (c) {
+ if (c && is_cpu_first_thread_in_core(t, c, p)) {
outp += sprintf(outp, "core: %d\n", c->core_id);
outp += sprintf(outp, "c3: %016llX\n", c->c3);
outp += sprintf(outp, "c6: %016llX\n", c->c6);
outp += sprintf(outp, "c7: %016llX\n", c->c7);
outp += sprintf(outp, "DTS: %dC\n", c->core_temp_c);
outp += sprintf(outp, "cpu_throt_count: %016llX\n", c->core_throt_cnt);
- outp += sprintf(outp, "Joules: %0X\n", c->core_energy);
+
+ const unsigned long long energy_value = c->core_energy.raw_value * c->core_energy.scale;
+ const double energy_scale = c->core_energy.scale;
+
+ if (c->core_energy.unit == RAPL_UNIT_JOULES)
+ outp += sprintf(outp, "Joules: %0llX (scale: %lf)\n", energy_value, energy_scale);
for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
- outp += sprintf(outp, "cADDED [%d] msr0x%x: %08llX\n", i, mp->msr_num, c->counter[i]);
+ outp +=
+ sprintf(outp, "cADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num,
+ c->counter[i], mp->path);
}
outp += sprintf(outp, "mc6_us: %016llX\n", c->mc6_us);
}
- if (p) {
+ if (p && is_cpu_first_core_in_package(t, c, p)) {
outp += sprintf(outp, "package: %d\n", p->package_id);
outp += sprintf(outp, "Weighted cores: %016llX\n", p->pkg_wtd_core_c0);
@@ -1710,16 +2012,18 @@ int dump_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p
outp += sprintf(outp, "pc10: %016llX\n", p->pc10);
outp += sprintf(outp, "cpu_lpi: %016llX\n", p->cpu_lpi);
outp += sprintf(outp, "sys_lpi: %016llX\n", p->sys_lpi);
- outp += sprintf(outp, "Joules PKG: %0llX\n", p->energy_pkg);
- outp += sprintf(outp, "Joules COR: %0llX\n", p->energy_cores);
- outp += sprintf(outp, "Joules GFX: %0llX\n", p->energy_gfx);
- outp += sprintf(outp, "Joules RAM: %0llX\n", p->energy_dram);
- outp += sprintf(outp, "Throttle PKG: %0llX\n", p->rapl_pkg_perf_status);
- outp += sprintf(outp, "Throttle RAM: %0llX\n", p->rapl_dram_perf_status);
+ outp += sprintf(outp, "Joules PKG: %0llX\n", p->energy_pkg.raw_value);
+ outp += sprintf(outp, "Joules COR: %0llX\n", p->energy_cores.raw_value);
+ outp += sprintf(outp, "Joules GFX: %0llX\n", p->energy_gfx.raw_value);
+ outp += sprintf(outp, "Joules RAM: %0llX\n", p->energy_dram.raw_value);
+ outp += sprintf(outp, "Throttle PKG: %0llX\n", p->rapl_pkg_perf_status.raw_value);
+ outp += sprintf(outp, "Throttle RAM: %0llX\n", p->rapl_dram_perf_status.raw_value);
outp += sprintf(outp, "PTM: %dC\n", p->pkg_temp_c);
for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
- outp += sprintf(outp, "pADDED [%d] msr0x%x: %08llX\n", i, mp->msr_num, p->counter[i]);
+ outp +=
+ sprintf(outp, "pADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num,
+ p->counter[i], mp->path);
}
}
@@ -1728,6 +2032,23 @@ int dump_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p
return 0;
}
+double rapl_counter_get_value(const struct rapl_counter *c, enum rapl_unit desired_unit, double interval)
+{
+ assert(desired_unit != RAPL_UNIT_INVALID);
+
+ /*
+ * For now we don't expect anything other than joules,
+ * so just simplify the logic.
+ */
+ assert(c->unit == RAPL_UNIT_JOULES);
+
+ const double scaled = c->raw_value * c->scale;
+
+ if (desired_unit == RAPL_UNIT_WATTS)
+ return scaled / interval;
+ return scaled;
+}
+
/*
* column formatting convention & formats
*/
@@ -1921,9 +2242,11 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
if (DO_BIC(BIC_CorWatt) && platform->has_per_core_rapl)
outp +=
- sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units / interval_float);
+ sprintf(outp, fmt8, (printed++ ? delim : ""),
+ rapl_counter_get_value(&c->core_energy, RAPL_UNIT_WATTS, interval_float));
if (DO_BIC(BIC_Cor_J) && platform->has_per_core_rapl)
- outp += sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units);
+ outp += sprintf(outp, fmt8, (printed++ ? delim : ""),
+ rapl_counter_get_value(&c->core_energy, RAPL_UNIT_JOULES, interval_float));
/* print per-package data only for 1st core in package */
if (!is_cpu_first_core_in_package(t, c, p))
@@ -1951,6 +2274,24 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
if (DO_BIC(BIC_GFXACTMHz))
outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_act_mhz);
+ /* SAMmc6 */
+ if (DO_BIC(BIC_SAM_mc6)) {
+ if (p->sam_mc6_ms == -1) { /* detect GFX counter reset */
+ outp += sprintf(outp, "%s**.**", (printed++ ? delim : ""));
+ } else {
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
+ p->sam_mc6_ms / 10.0 / interval_float);
+ }
+ }
+
+ /* SAMMHz */
+ if (DO_BIC(BIC_SAMMHz))
+ outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->sam_mhz);
+
+ /* SAMACTMHz */
+ if (DO_BIC(BIC_SAMACTMHz))
+ outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->sam_act_mhz);
+
/* Totl%C0, Any%C0 GFX%C0 CPUGFX% */
if (DO_BIC(BIC_Totl_c0))
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_wtd_core_c0 / tsc);
@@ -1976,43 +2317,59 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
if (DO_BIC(BIC_Pkgpc10))
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc10 / tsc);
- if (DO_BIC(BIC_CPU_LPI))
- outp +=
- sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->cpu_lpi / 1000000.0 / interval_float);
- if (DO_BIC(BIC_SYS_LPI))
- outp +=
- sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->sys_lpi / 1000000.0 / interval_float);
+ if (DO_BIC(BIC_CPU_LPI)) {
+ if (p->cpu_lpi >= 0)
+ outp +=
+ sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
+ 100.0 * p->cpu_lpi / 1000000.0 / interval_float);
+ else
+ outp += sprintf(outp, "%s(neg)", (printed++ ? delim : ""));
+ }
+ if (DO_BIC(BIC_SYS_LPI)) {
+ if (p->sys_lpi >= 0)
+ outp +=
+ sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
+ 100.0 * p->sys_lpi / 1000000.0 / interval_float);
+ else
+ outp += sprintf(outp, "%s(neg)", (printed++ ? delim : ""));
+ }
if (DO_BIC(BIC_PkgWatt))
outp +=
- sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units / interval_float);
-
+ sprintf(outp, fmt8, (printed++ ? delim : ""),
+ rapl_counter_get_value(&p->energy_pkg, RAPL_UNIT_WATTS, interval_float));
if (DO_BIC(BIC_CorWatt) && !platform->has_per_core_rapl)
outp +=
- sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units / interval_float);
+ sprintf(outp, fmt8, (printed++ ? delim : ""),
+ rapl_counter_get_value(&p->energy_cores, RAPL_UNIT_WATTS, interval_float));
if (DO_BIC(BIC_GFXWatt))
outp +=
- sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units / interval_float);
+ sprintf(outp, fmt8, (printed++ ? delim : ""),
+ rapl_counter_get_value(&p->energy_gfx, RAPL_UNIT_WATTS, interval_float));
if (DO_BIC(BIC_RAMWatt))
outp +=
sprintf(outp, fmt8, (printed++ ? delim : ""),
- p->energy_dram * rapl_dram_energy_units / interval_float);
+ rapl_counter_get_value(&p->energy_dram, RAPL_UNIT_WATTS, interval_float));
if (DO_BIC(BIC_Pkg_J))
- outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units);
+ outp += sprintf(outp, fmt8, (printed++ ? delim : ""),
+ rapl_counter_get_value(&p->energy_pkg, RAPL_UNIT_JOULES, interval_float));
if (DO_BIC(BIC_Cor_J) && !platform->has_per_core_rapl)
- outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units);
+ outp += sprintf(outp, fmt8, (printed++ ? delim : ""),
+ rapl_counter_get_value(&p->energy_cores, RAPL_UNIT_JOULES, interval_float));
if (DO_BIC(BIC_GFX_J))
- outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units);
+ outp += sprintf(outp, fmt8, (printed++ ? delim : ""),
+ rapl_counter_get_value(&p->energy_gfx, RAPL_UNIT_JOULES, interval_float));
if (DO_BIC(BIC_RAM_J))
- outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_dram * rapl_dram_energy_units);
+ outp += sprintf(outp, fmt8, (printed++ ? delim : ""),
+ rapl_counter_get_value(&p->energy_dram, RAPL_UNIT_JOULES, interval_float));
if (DO_BIC(BIC_PKG__))
outp +=
sprintf(outp, fmt8, (printed++ ? delim : ""),
- 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
+ rapl_counter_get_value(&p->rapl_pkg_perf_status, RAPL_UNIT_WATTS, interval_float));
if (DO_BIC(BIC_RAM__))
outp +=
sprintf(outp, fmt8, (printed++ ? delim : ""),
- 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float);
+ rapl_counter_get_value(&p->rapl_dram_perf_status, RAPL_UNIT_WATTS, interval_float));
/* UncMHz */
if (DO_BIC(BIC_UNCORE_MHZ))
outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->uncore_mhz);
@@ -2121,12 +2478,22 @@ int delta_package(struct pkg_data *new, struct pkg_data *old)
old->gfx_mhz = new->gfx_mhz;
old->gfx_act_mhz = new->gfx_act_mhz;
- old->energy_pkg = new->energy_pkg - old->energy_pkg;
- old->energy_cores = new->energy_cores - old->energy_cores;
- old->energy_gfx = new->energy_gfx - old->energy_gfx;
- old->energy_dram = new->energy_dram - old->energy_dram;
- old->rapl_pkg_perf_status = new->rapl_pkg_perf_status - old->rapl_pkg_perf_status;
- old->rapl_dram_perf_status = new->rapl_dram_perf_status - old->rapl_dram_perf_status;
+ /* flag an error when mc6 counter resets/wraps */
+ if (old->sam_mc6_ms > new->sam_mc6_ms)
+ old->sam_mc6_ms = -1;
+ else
+ old->sam_mc6_ms = new->sam_mc6_ms - old->sam_mc6_ms;
+
+ old->sam_mhz = new->sam_mhz;
+ old->sam_act_mhz = new->sam_act_mhz;
+
+ old->energy_pkg.raw_value = new->energy_pkg.raw_value - old->energy_pkg.raw_value;
+ old->energy_cores.raw_value = new->energy_cores.raw_value - old->energy_cores.raw_value;
+ old->energy_gfx.raw_value = new->energy_gfx.raw_value - old->energy_gfx.raw_value;
+ old->energy_dram.raw_value = new->energy_dram.raw_value - old->energy_dram.raw_value;
+ old->rapl_pkg_perf_status.raw_value = new->rapl_pkg_perf_status.raw_value - old->rapl_pkg_perf_status.raw_value;
+ old->rapl_dram_perf_status.raw_value =
+ new->rapl_dram_perf_status.raw_value - old->rapl_dram_perf_status.raw_value;
for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
if (mp->format == FORMAT_RAW)
@@ -2150,7 +2517,7 @@ void delta_core(struct core_data *new, struct core_data *old)
old->core_throt_cnt = new->core_throt_cnt;
old->mc6_us = new->mc6_us - old->mc6_us;
- DELTA_WRAP32(new->core_energy, old->core_energy);
+ DELTA_WRAP32(new->core_energy.raw_value, old->core_energy.raw_value);
for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
if (mp->format == FORMAT_RAW)
@@ -2277,6 +2644,13 @@ int delta_cpu(struct thread_data *t, struct core_data *c,
return retval;
}
+void rapl_counter_clear(struct rapl_counter *c)
+{
+ c->raw_value = 0;
+ c->scale = 0.0;
+ c->unit = RAPL_UNIT_INVALID;
+}
+
void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
{
int i;
@@ -2304,7 +2678,7 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
c->c7 = 0;
c->mc6_us = 0;
c->core_temp_c = 0;
- c->core_energy = 0;
+ rapl_counter_clear(&c->core_energy);
c->core_throt_cnt = 0;
p->pkg_wtd_core_c0 = 0;
@@ -2325,18 +2699,21 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
p->cpu_lpi = 0;
p->sys_lpi = 0;
- p->energy_pkg = 0;
- p->energy_dram = 0;
- p->energy_cores = 0;
- p->energy_gfx = 0;
- p->rapl_pkg_perf_status = 0;
- p->rapl_dram_perf_status = 0;
+ rapl_counter_clear(&p->energy_pkg);
+ rapl_counter_clear(&p->energy_dram);
+ rapl_counter_clear(&p->energy_cores);
+ rapl_counter_clear(&p->energy_gfx);
+ rapl_counter_clear(&p->rapl_pkg_perf_status);
+ rapl_counter_clear(&p->rapl_dram_perf_status);
p->pkg_temp_c = 0;
p->gfx_rc6_ms = 0;
p->uncore_mhz = 0;
p->gfx_mhz = 0;
p->gfx_act_mhz = 0;
+ p->sam_mc6_ms = 0;
+ p->sam_mhz = 0;
+ p->sam_act_mhz = 0;
for (i = 0, mp = sys.tp; mp; i++, mp = mp->next)
t->counter[i] = 0;
@@ -2347,6 +2724,20 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
p->counter[i] = 0;
}
+void rapl_counter_accumulate(struct rapl_counter *dst, const struct rapl_counter *src)
+{
+ /* Copy unit and scale from src if dst is not initialized */
+ if (dst->unit == RAPL_UNIT_INVALID) {
+ dst->unit = src->unit;
+ dst->scale = src->scale;
+ }
+
+ assert(dst->unit == src->unit);
+ assert(dst->scale == src->scale);
+
+ dst->raw_value += src->raw_value;
+}
+
int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
{
int i;
@@ -2393,7 +2784,7 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c);
average.cores.core_throt_cnt = MAX(average.cores.core_throt_cnt, c->core_throt_cnt);
- average.cores.core_energy += c->core_energy;
+ rapl_counter_accumulate(&average.cores.core_energy, &c->core_energy);
for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
if (mp->format == FORMAT_RAW)
@@ -2428,25 +2819,29 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
average.packages.cpu_lpi = p->cpu_lpi;
average.packages.sys_lpi = p->sys_lpi;
- average.packages.energy_pkg += p->energy_pkg;
- average.packages.energy_dram += p->energy_dram;
- average.packages.energy_cores += p->energy_cores;
- average.packages.energy_gfx += p->energy_gfx;
+ rapl_counter_accumulate(&average.packages.energy_pkg, &p->energy_pkg);
+ rapl_counter_accumulate(&average.packages.energy_dram, &p->energy_dram);
+ rapl_counter_accumulate(&average.packages.energy_cores, &p->energy_cores);
+ rapl_counter_accumulate(&average.packages.energy_gfx, &p->energy_gfx);
average.packages.gfx_rc6_ms = p->gfx_rc6_ms;
average.packages.uncore_mhz = p->uncore_mhz;
average.packages.gfx_mhz = p->gfx_mhz;
average.packages.gfx_act_mhz = p->gfx_act_mhz;
+ average.packages.sam_mc6_ms = p->sam_mc6_ms;
+ average.packages.sam_mhz = p->sam_mhz;
+ average.packages.sam_act_mhz = p->sam_act_mhz;
average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c);
- average.packages.rapl_pkg_perf_status += p->rapl_pkg_perf_status;
- average.packages.rapl_dram_perf_status += p->rapl_dram_perf_status;
+ rapl_counter_accumulate(&average.packages.rapl_pkg_perf_status, &p->rapl_pkg_perf_status);
+ rapl_counter_accumulate(&average.packages.rapl_dram_perf_status, &p->rapl_dram_perf_status);
for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
- if (mp->format == FORMAT_RAW)
- continue;
- average.packages.counter[i] += p->counter[i];
+ if ((mp->format == FORMAT_RAW) && (topo.num_packages == 0))
+ average.packages.counter[i] = p->counter[i];
+ else
+ average.packages.counter[i] += p->counter[i];
}
return 0;
}
@@ -2578,6 +2973,7 @@ unsigned long long snapshot_sysfs_counter(char *path)
int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp)
{
if (mp->msr_num != 0) {
+ assert(!no_msr);
if (get_msr(cpu, mp->msr_num, counterp))
return -1;
} else {
@@ -2599,7 +2995,7 @@ unsigned long long get_uncore_mhz(int package, int die)
{
char path[128];
- sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/current_freq_khz", package,
+ sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_%02d_die_%02d/current_freq_khz", package,
die);
return (snapshot_sysfs_counter(path) / 1000);
@@ -2627,6 +3023,9 @@ int get_epb(int cpu)
return epb;
msr_fallback:
+ if (no_msr)
+ return -1;
+
get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr);
return msr & 0xf;
@@ -2700,6 +3099,351 @@ int get_core_throt_cnt(int cpu, unsigned long long *cnt)
return 0;
}
+struct amperf_group_fd {
+ int aperf; /* Also the group descriptor */
+ int mperf;
+};
+
+static int read_perf_counter_info(const char *const path, const char *const parse_format, void *value_ptr)
+{
+ int fdmt;
+ int bytes_read;
+ char buf[64];
+ int ret = -1;
+
+ fdmt = open(path, O_RDONLY, 0);
+ if (fdmt == -1) {
+ if (debug)
+ fprintf(stderr, "Failed to parse perf counter info %s\n", path);
+ ret = -1;
+ goto cleanup_and_exit;
+ }
+
+ bytes_read = read(fdmt, buf, sizeof(buf) - 1);
+ if (bytes_read <= 0 || bytes_read >= (int)sizeof(buf)) {
+ if (debug)
+ fprintf(stderr, "Failed to parse perf counter info %s\n", path);
+ ret = -1;
+ goto cleanup_and_exit;
+ }
+
+ buf[bytes_read] = '\0';
+
+ if (sscanf(buf, parse_format, value_ptr) != 1) {
+ if (debug)
+ fprintf(stderr, "Failed to parse perf counter info %s\n", path);
+ ret = -1;
+ goto cleanup_and_exit;
+ }
+
+ ret = 0;
+
+cleanup_and_exit:
+ close(fdmt);
+ return ret;
+}
+
+static unsigned int read_perf_counter_info_n(const char *const path, const char *const parse_format)
+{
+ unsigned int v;
+ int status;
+
+ status = read_perf_counter_info(path, parse_format, &v);
+ if (status)
+ v = -1;
+
+ return v;
+}
+
+static unsigned int read_msr_type(void)
+{
+ const char *const path = "/sys/bus/event_source/devices/msr/type";
+ const char *const format = "%u";
+
+ return read_perf_counter_info_n(path, format);
+}
+
+static unsigned int read_aperf_config(void)
+{
+ const char *const path = "/sys/bus/event_source/devices/msr/events/aperf";
+ const char *const format = "event=%x";
+
+ return read_perf_counter_info_n(path, format);
+}
+
+static unsigned int read_mperf_config(void)
+{
+ const char *const path = "/sys/bus/event_source/devices/msr/events/mperf";
+ const char *const format = "event=%x";
+
+ return read_perf_counter_info_n(path, format);
+}
+
+static unsigned int read_perf_type(const char *subsys)
+{
+ const char *const path_format = "/sys/bus/event_source/devices/%s/type";
+ const char *const format = "%u";
+ char path[128];
+
+ snprintf(path, sizeof(path), path_format, subsys);
+
+ return read_perf_counter_info_n(path, format);
+}
+
+static unsigned int read_rapl_config(const char *subsys, const char *event_name)
+{
+ const char *const path_format = "/sys/bus/event_source/devices/%s/events/%s";
+ const char *const format = "event=%x";
+ char path[128];
+
+ snprintf(path, sizeof(path), path_format, subsys, event_name);
+
+ return read_perf_counter_info_n(path, format);
+}
+
+static unsigned int read_perf_rapl_unit(const char *subsys, const char *event_name)
+{
+ const char *const path_format = "/sys/bus/event_source/devices/%s/events/%s.unit";
+ const char *const format = "%s";
+ char path[128];
+ char unit_buffer[16];
+
+ snprintf(path, sizeof(path), path_format, subsys, event_name);
+
+ read_perf_counter_info(path, format, &unit_buffer);
+ if (strcmp("Joules", unit_buffer) == 0)
+ return RAPL_UNIT_JOULES;
+
+ return RAPL_UNIT_INVALID;
+}
+
+static double read_perf_rapl_scale(const char *subsys, const char *event_name)
+{
+ const char *const path_format = "/sys/bus/event_source/devices/%s/events/%s.scale";
+ const char *const format = "%lf";
+ char path[128];
+ double scale;
+
+ snprintf(path, sizeof(path), path_format, subsys, event_name);
+
+ if (read_perf_counter_info(path, format, &scale))
+ return 0.0;
+
+ return scale;
+}
+
+static struct amperf_group_fd open_amperf_fd(int cpu)
+{
+ const unsigned int msr_type = read_msr_type();
+ const unsigned int aperf_config = read_aperf_config();
+ const unsigned int mperf_config = read_mperf_config();
+ struct amperf_group_fd fds = {.aperf = -1, .mperf = -1 };
+
+ fds.aperf = open_perf_counter(cpu, msr_type, aperf_config, -1, PERF_FORMAT_GROUP);
+ fds.mperf = open_perf_counter(cpu, msr_type, mperf_config, fds.aperf, PERF_FORMAT_GROUP);
+
+ return fds;
+}
+
+static int get_amperf_fd(int cpu)
+{
+ assert(fd_amperf_percpu);
+
+ if (fd_amperf_percpu[cpu].aperf)
+ return fd_amperf_percpu[cpu].aperf;
+
+ fd_amperf_percpu[cpu] = open_amperf_fd(cpu);
+
+ return fd_amperf_percpu[cpu].aperf;
+}
+
+/* Read APERF, MPERF and TSC using the perf API. */
+static int read_aperf_mperf_tsc_perf(struct thread_data *t, int cpu)
+{
+ union {
+ struct {
+ unsigned long nr_entries;
+ unsigned long aperf;
+ unsigned long mperf;
+ };
+
+ unsigned long as_array[3];
+ } cnt;
+
+ const int fd_amperf = get_amperf_fd(cpu);
+
+ /*
+ * Read the TSC with rdtsc, because we want the absolute value and not
+ * the offset from the start of the counter.
+ */
+ t->tsc = rdtsc();
+
+ const int n = read(fd_amperf, &cnt.as_array[0], sizeof(cnt.as_array));
+
+ if (n != sizeof(cnt.as_array))
+ return -2;
+
+ t->aperf = cnt.aperf * aperf_mperf_multiplier;
+ t->mperf = cnt.mperf * aperf_mperf_multiplier;
+
+ return 0;
+}
+
+/* Read APERF, MPERF and TSC using the MSR driver and rdtsc instruction. */
+static int read_aperf_mperf_tsc_msr(struct thread_data *t, int cpu)
+{
+ unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time;
+ int aperf_mperf_retry_count = 0;
+
+ /*
+ * The TSC, APERF and MPERF must be read together for
+ * APERF/MPERF and MPERF/TSC to give accurate results.
+ *
+ * Unfortunately, APERF and MPERF are read by
+ * individual system call, so delays may occur
+ * between them. If the time to read them
+ * varies by a large amount, we re-read them.
+ */
+
+ /*
+ * This initial dummy APERF read has been seen to
+ * reduce jitter in the subsequent reads.
+ */
+
+ if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
+ return -3;
+
+retry:
+ t->tsc = rdtsc(); /* re-read close to APERF */
+
+ tsc_before = t->tsc;
+
+ if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
+ return -3;
+
+ tsc_between = rdtsc();
+
+ if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf))
+ return -4;
+
+ tsc_after = rdtsc();
+
+ aperf_time = tsc_between - tsc_before;
+ mperf_time = tsc_after - tsc_between;
+
+ /*
+ * If the system call latency to read APERF and MPERF
+ * differ by more than 2x, then try again.
+ */
+ if ((aperf_time > (2 * mperf_time)) || (mperf_time > (2 * aperf_time))) {
+ aperf_mperf_retry_count++;
+ if (aperf_mperf_retry_count < 5)
+ goto retry;
+ else
+ warnx("cpu%d jitter %lld %lld", cpu, aperf_time, mperf_time);
+ }
+ aperf_mperf_retry_count = 0;
+
+ t->aperf = t->aperf * aperf_mperf_multiplier;
+ t->mperf = t->mperf * aperf_mperf_multiplier;
+
+ return 0;
+}
+
+size_t rapl_counter_info_count_perf(const struct rapl_counter_info_t *rci)
+{
+ size_t ret = 0;
+
+ for (int i = 0; i < NUM_RAPL_COUNTERS; ++i)
+ if (rci->source[i] == RAPL_SOURCE_PERF)
+ ++ret;
+
+ return ret;
+}
+
+void write_rapl_counter(struct rapl_counter *rc, struct rapl_counter_info_t *rci, unsigned int idx)
+{
+ rc->raw_value = rci->data[idx];
+ rc->unit = rci->unit[idx];
+ rc->scale = rci->scale[idx];
+}
+
+int get_rapl_counters(int cpu, int domain, struct core_data *c, struct pkg_data *p)
+{
+ unsigned long long perf_data[NUM_RAPL_COUNTERS + 1];
+ struct rapl_counter_info_t *rci = &rapl_counter_info_perdomain[domain];
+
+ if (debug)
+ fprintf(stderr, "%s: cpu%d domain%d\n", __func__, cpu, domain);
+
+ assert(rapl_counter_info_perdomain);
+
+ /*
+ * If we have any perf counters to read, read them all now, in bulk
+ */
+ if (rci->fd_perf != -1) {
+ size_t num_perf_counters = rapl_counter_info_count_perf(rci);
+ const ssize_t expected_read_size = (num_perf_counters + 1) * sizeof(unsigned long long);
+ const ssize_t actual_read_size = read(rci->fd_perf, &perf_data[0], sizeof(perf_data));
+
+ if (actual_read_size != expected_read_size)
+ err(-1, "%s: failed to read perf_data (%zu %zu)", __func__, expected_read_size,
+ actual_read_size);
+ }
+
+ for (unsigned int i = 0, pi = 1; i < NUM_RAPL_COUNTERS; ++i) {
+ switch (rci->source[i]) {
+ case RAPL_SOURCE_NONE:
+ break;
+
+ case RAPL_SOURCE_PERF:
+ assert(pi < ARRAY_SIZE(perf_data));
+ assert(rci->fd_perf != -1);
+
+ if (debug)
+ fprintf(stderr, "Reading rapl counter via perf at %u (%llu %e %lf)\n",
+ i, perf_data[pi], rci->scale[i], perf_data[pi] * rci->scale[i]);
+
+ rci->data[i] = perf_data[pi];
+
+ ++pi;
+ break;
+
+ case RAPL_SOURCE_MSR:
+ if (debug)
+ fprintf(stderr, "Reading rapl counter via msr at %u\n", i);
+
+ assert(!no_msr);
+ if (rci->flags[i] & RAPL_COUNTER_FLAG_USE_MSR_SUM) {
+ if (get_msr_sum(cpu, rci->msr[i], &rci->data[i]))
+ return -13 - i;
+ } else {
+ if (get_msr(cpu, rci->msr[i], &rci->data[i]))
+ return -13 - i;
+ }
+
+ rci->data[i] &= rci->msr_mask[i];
+ if (rci->msr_shift[i] >= 0)
+ rci->data[i] >>= abs(rci->msr_shift[i]);
+ else
+ rci->data[i] <<= abs(rci->msr_shift[i]);
+
+ break;
+ }
+ }
+
+ _Static_assert(NUM_RAPL_COUNTERS == 7);
+ write_rapl_counter(&p->energy_pkg, rci, RAPL_RCI_INDEX_ENERGY_PKG);
+ write_rapl_counter(&p->energy_cores, rci, RAPL_RCI_INDEX_ENERGY_CORES);
+ write_rapl_counter(&p->energy_dram, rci, RAPL_RCI_INDEX_DRAM);
+ write_rapl_counter(&p->energy_gfx, rci, RAPL_RCI_INDEX_GFX);
+ write_rapl_counter(&p->rapl_pkg_perf_status, rci, RAPL_RCI_INDEX_PKG_PERF_STATUS);
+ write_rapl_counter(&p->rapl_dram_perf_status, rci, RAPL_RCI_INDEX_DRAM_PERF_STATUS);
+ write_rapl_counter(&c->core_energy, rci, RAPL_RCI_INDEX_CORE_ENERGY);
+
+ return 0;
+}
+
/*
* get_counters(...)
* migrate to cpu
@@ -2709,12 +3453,12 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
{
int cpu = t->cpu_id;
unsigned long long msr;
- int aperf_mperf_retry_count = 0;
struct msr_counter *mp;
int i;
+ int status;
if (cpu_migrate(cpu)) {
- fprintf(outf, "get_counters: Could not migrate to CPU %d\n", cpu);
+ fprintf(outf, "%s: Could not migrate to CPU %d\n", __func__, cpu);
return -1;
}
@@ -2722,63 +3466,26 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
if (first_counter_read)
get_apic_id(t);
-retry:
+
t->tsc = rdtsc(); /* we are running on local CPU of interest */
if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || DO_BIC(BIC_IPC)
|| soft_c1_residency_display(BIC_Avg_MHz)) {
- unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time;
-
- /*
- * The TSC, APERF and MPERF must be read together for
- * APERF/MPERF and MPERF/TSC to give accurate results.
- *
- * Unfortunately, APERF and MPERF are read by
- * individual system call, so delays may occur
- * between them. If the time to read them
- * varies by a large amount, we re-read them.
- */
-
- /*
- * This initial dummy APERF read has been seen to
- * reduce jitter in the subsequent reads.
- */
-
- if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
- return -3;
-
- t->tsc = rdtsc(); /* re-read close to APERF */
-
- tsc_before = t->tsc;
+ int status = -1;
- if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
- return -3;
+ assert(!no_perf || !no_msr);
- tsc_between = rdtsc();
-
- if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf))
- return -4;
-
- tsc_after = rdtsc();
-
- aperf_time = tsc_between - tsc_before;
- mperf_time = tsc_after - tsc_between;
-
- /*
- * If the system call latency to read APERF and MPERF
- * differ by more than 2x, then try again.
- */
- if ((aperf_time > (2 * mperf_time)) || (mperf_time > (2 * aperf_time))) {
- aperf_mperf_retry_count++;
- if (aperf_mperf_retry_count < 5)
- goto retry;
- else
- warnx("cpu%d jitter %lld %lld", cpu, aperf_time, mperf_time);
+ switch (amperf_source) {
+ case AMPERF_SOURCE_PERF:
+ status = read_aperf_mperf_tsc_perf(t, cpu);
+ break;
+ case AMPERF_SOURCE_MSR:
+ status = read_aperf_mperf_tsc_msr(t, cpu);
+ break;
}
- aperf_mperf_retry_count = 0;
- t->aperf = t->aperf * aperf_mperf_multiplier;
- t->mperf = t->mperf * aperf_mperf_multiplier;
+ if (status != 0)
+ return status;
}
if (DO_BIC(BIC_IPC))
@@ -2806,6 +3513,12 @@ retry:
if (!is_cpu_first_thread_in_core(t, c, p))
goto done;
+ if (platform->has_per_core_rapl) {
+ status = get_rapl_counters(cpu, c->core_id, c, p);
+ if (status != 0)
+ return status;
+ }
+
if (DO_BIC(BIC_CPU_c3) || soft_c1_residency_display(BIC_CPU_c3)) {
if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3))
return -6;
@@ -2846,12 +3559,6 @@ retry:
if (DO_BIC(BIC_CORE_THROT_CNT))
get_core_throt_cnt(cpu, &c->core_throt_cnt);
- if (platform->rapl_msrs & RAPL_AMD_F17H) {
- if (get_msr(cpu, MSR_CORE_ENERGY_STAT, &msr))
- return -14;
- c->core_energy = msr & 0xFFFFFFFF;
- }
-
for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
if (get_mp(cpu, mp, &c->counter[i]))
return -10;
@@ -2911,59 +3618,39 @@ retry:
if (DO_BIC(BIC_SYS_LPI))
p->sys_lpi = cpuidle_cur_sys_lpi_us;
- if (platform->rapl_msrs & RAPL_PKG) {
- if (get_msr_sum(cpu, MSR_PKG_ENERGY_STATUS, &msr))
- return -13;
- p->energy_pkg = msr;
- }
- if (platform->rapl_msrs & RAPL_CORE_ENERGY_STATUS) {
- if (get_msr_sum(cpu, MSR_PP0_ENERGY_STATUS, &msr))
- return -14;
- p->energy_cores = msr;
- }
- if (platform->rapl_msrs & RAPL_DRAM) {
- if (get_msr_sum(cpu, MSR_DRAM_ENERGY_STATUS, &msr))
- return -15;
- p->energy_dram = msr;
- }
- if (platform->rapl_msrs & RAPL_GFX) {
- if (get_msr_sum(cpu, MSR_PP1_ENERGY_STATUS, &msr))
- return -16;
- p->energy_gfx = msr;
- }
- if (platform->rapl_msrs & RAPL_PKG_PERF_STATUS) {
- if (get_msr_sum(cpu, MSR_PKG_PERF_STATUS, &msr))
- return -16;
- p->rapl_pkg_perf_status = msr;
- }
- if (platform->rapl_msrs & RAPL_DRAM_PERF_STATUS) {
- if (get_msr_sum(cpu, MSR_DRAM_PERF_STATUS, &msr))
- return -16;
- p->rapl_dram_perf_status = msr;
- }
- if (platform->rapl_msrs & RAPL_AMD_F17H) {
- if (get_msr_sum(cpu, MSR_PKG_ENERGY_STAT, &msr))
- return -13;
- p->energy_pkg = msr;
+ if (!platform->has_per_core_rapl) {
+ status = get_rapl_counters(cpu, p->package_id, c, p);
+ if (status != 0)
+ return status;
}
+
if (DO_BIC(BIC_PkgTmp)) {
if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
return -17;
p->pkg_temp_c = tj_max - ((msr >> 16) & 0x7F);
}
- if (DO_BIC(BIC_GFX_rc6))
- p->gfx_rc6_ms = gfx_cur_rc6_ms;
-
/* n.b. assume die0 uncore frequency applies to whole package */
if (DO_BIC(BIC_UNCORE_MHZ))
p->uncore_mhz = get_uncore_mhz(p->package_id, 0);
+ if (DO_BIC(BIC_GFX_rc6))
+ p->gfx_rc6_ms = gfx_info[GFX_rc6].val_ull;
+
if (DO_BIC(BIC_GFXMHz))
- p->gfx_mhz = gfx_cur_mhz;
+ p->gfx_mhz = gfx_info[GFX_MHz].val;
if (DO_BIC(BIC_GFXACTMHz))
- p->gfx_act_mhz = gfx_act_mhz;
+ p->gfx_act_mhz = gfx_info[GFX_ACTMHz].val;
+
+ if (DO_BIC(BIC_SAM_mc6))
+ p->sam_mc6_ms = gfx_info[SAM_mc6].val_ull;
+
+ if (DO_BIC(BIC_SAMMHz))
+ p->sam_mhz = gfx_info[SAM_MHz].val;
+
+ if (DO_BIC(BIC_SAMACTMHz))
+ p->sam_act_mhz = gfx_info[SAM_ACTMHz].val;
for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
if (get_mp(cpu, mp, &p->counter[i]))
@@ -3053,7 +3740,7 @@ void probe_cst_limit(void)
unsigned long long msr;
int *pkg_cstate_limits;
- if (!platform->has_nhm_msrs)
+ if (!platform->has_nhm_msrs || no_msr)
return;
switch (platform->cst_limit) {
@@ -3097,7 +3784,7 @@ static void dump_platform_info(void)
unsigned long long msr;
unsigned int ratio;
- if (!platform->has_nhm_msrs)
+ if (!platform->has_nhm_msrs || no_msr)
return;
get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
@@ -3115,7 +3802,7 @@ static void dump_power_ctl(void)
{
unsigned long long msr;
- if (!platform->has_nhm_msrs)
+ if (!platform->has_nhm_msrs || no_msr)
return;
get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr);
@@ -3321,7 +4008,7 @@ static void dump_cst_cfg(void)
{
unsigned long long msr;
- if (!platform->has_nhm_msrs)
+ if (!platform->has_nhm_msrs || no_msr)
return;
get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
@@ -3393,7 +4080,7 @@ void print_irtl(void)
{
unsigned long long msr;
- if (!platform->has_irtl_msrs)
+ if (!platform->has_irtl_msrs || no_msr)
return;
if (platform->supported_cstates & PC3) {
@@ -3443,12 +4130,64 @@ void free_fd_percpu(void)
{
int i;
+ if (!fd_percpu)
+ return;
+
for (i = 0; i < topo.max_cpu_num + 1; ++i) {
if (fd_percpu[i] != 0)
close(fd_percpu[i]);
}
free(fd_percpu);
+ fd_percpu = NULL;
+}
+
+void free_fd_amperf_percpu(void)
+{
+ int i;
+
+ if (!fd_amperf_percpu)
+ return;
+
+ for (i = 0; i < topo.max_cpu_num + 1; ++i) {
+ if (fd_amperf_percpu[i].mperf != 0)
+ close(fd_amperf_percpu[i].mperf);
+
+ if (fd_amperf_percpu[i].aperf != 0)
+ close(fd_amperf_percpu[i].aperf);
+ }
+
+ free(fd_amperf_percpu);
+ fd_amperf_percpu = NULL;
+}
+
+void free_fd_instr_count_percpu(void)
+{
+ if (!fd_instr_count_percpu)
+ return;
+
+ for (int i = 0; i < topo.max_cpu_num + 1; ++i) {
+ if (fd_instr_count_percpu[i] != 0)
+ close(fd_instr_count_percpu[i]);
+ }
+
+ free(fd_instr_count_percpu);
+ fd_instr_count_percpu = NULL;
+}
+
+void free_fd_rapl_percpu(void)
+{
+ if (!rapl_counter_info_perdomain)
+ return;
+
+ const int num_domains = platform->has_per_core_rapl ? topo.num_cores : topo.num_packages;
+
+ for (int domain_id = 0; domain_id < num_domains; ++domain_id) {
+ if (rapl_counter_info_perdomain[domain_id].fd_perf != -1)
+ close(rapl_counter_info_perdomain[domain_id].fd_perf);
+ }
+
+ free(rapl_counter_info_perdomain);
}
void free_all_buffers(void)
@@ -3492,6 +4231,9 @@ void free_all_buffers(void)
outp = NULL;
free_fd_percpu();
+ free_fd_instr_count_percpu();
+ free_fd_amperf_percpu();
+ free_fd_rapl_percpu();
free(irq_column_2_cpu);
free(irqs_per_cpu);
@@ -3825,11 +4567,17 @@ static void update_effective_set(bool startup)
err(1, "%s: cpu str malformat %s\n", PATH_EFFECTIVE_CPUS, cpu_effective_str);
}
+void linux_perf_init(void);
+void rapl_perf_init(void);
+
void re_initialize(void)
{
free_all_buffers();
setup_all_buffers(false);
- fprintf(outf, "turbostat: re-initialized with num_cpus %d, allowed_cpus %d\n", topo.num_cpus, topo.allowed_cpus);
+ linux_perf_init();
+ rapl_perf_init();
+ fprintf(outf, "turbostat: re-initialized with num_cpus %d, allowed_cpus %d\n", topo.num_cpus,
+ topo.allowed_cpus);
}
void set_max_cpu_num(void)
@@ -3940,85 +4688,43 @@ int snapshot_proc_interrupts(void)
}
/*
- * snapshot_gfx_rc6_ms()
+ * snapshot_graphics()
*
- * record snapshot of
- * /sys/class/drm/card0/power/rc6_residency_ms
+ * record snapshot of specified graphics sysfs knob
*
* return 1 if config change requires a restart, else return 0
*/
-int snapshot_gfx_rc6_ms(void)
+int snapshot_graphics(int idx)
{
FILE *fp;
int retval;
- fp = fopen_or_die("/sys/class/drm/card0/power/rc6_residency_ms", "r");
-
- retval = fscanf(fp, "%lld", &gfx_cur_rc6_ms);
- if (retval != 1)
- err(1, "GFX rc6");
-
- fclose(fp);
-
- return 0;
-}
-
-/*
- * snapshot_gfx_mhz()
- *
- * fall back to /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz
- * when /sys/class/drm/card0/gt_cur_freq_mhz is not available.
- *
- * return 1 if config change requires a restart, else return 0
- */
-int snapshot_gfx_mhz(void)
-{
- static FILE *fp;
- int retval;
-
- if (fp == NULL) {
- fp = fopen("/sys/class/drm/card0/gt_cur_freq_mhz", "r");
- if (!fp)
- fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", "r");
- } else {
- rewind(fp);
- fflush(fp);
- }
-
- retval = fscanf(fp, "%d", &gfx_cur_mhz);
- if (retval != 1)
- err(1, "GFX MHz");
-
- return 0;
-}
-
-/*
- * snapshot_gfx_cur_mhz()
- *
- * fall back to /sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz
- * when /sys/class/drm/card0/gt_act_freq_mhz is not available.
- *
- * return 1 if config change requires a restart, else return 0
- */
-int snapshot_gfx_act_mhz(void)
-{
- static FILE *fp;
- int retval;
-
- if (fp == NULL) {
- fp = fopen("/sys/class/drm/card0/gt_act_freq_mhz", "r");
- if (!fp)
- fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", "r");
- } else {
- rewind(fp);
- fflush(fp);
+ switch (idx) {
+ case GFX_rc6:
+ case SAM_mc6:
+ fp = fopen_or_die(gfx_info[idx].path, "r");
+ retval = fscanf(fp, "%lld", &gfx_info[idx].val_ull);
+ if (retval != 1)
+ err(1, "rc6");
+ fclose(fp);
+ return 0;
+ case GFX_MHz:
+ case GFX_ACTMHz:
+ case SAM_MHz:
+ case SAM_ACTMHz:
+ if (gfx_info[idx].fp == NULL) {
+ gfx_info[idx].fp = fopen_or_die(gfx_info[idx].path, "r");
+ } else {
+ rewind(gfx_info[idx].fp);
+ fflush(gfx_info[idx].fp);
+ }
+ retval = fscanf(gfx_info[idx].fp, "%d", &gfx_info[idx].val);
+ if (retval != 1)
+ err(1, "MHz");
+ return 0;
+ default:
+ return -EINVAL;
}
-
- retval = fscanf(fp, "%d", &gfx_act_mhz);
- if (retval != 1)
- err(1, "GFX ACT MHz");
-
- return 0;
}
/*
@@ -4083,13 +4789,22 @@ int snapshot_proc_sysfs_files(void)
return 1;
if (DO_BIC(BIC_GFX_rc6))
- snapshot_gfx_rc6_ms();
+ snapshot_graphics(GFX_rc6);
if (DO_BIC(BIC_GFXMHz))
- snapshot_gfx_mhz();
+ snapshot_graphics(GFX_MHz);
if (DO_BIC(BIC_GFXACTMHz))
- snapshot_gfx_act_mhz();
+ snapshot_graphics(GFX_ACTMHz);
+
+ if (DO_BIC(BIC_SAM_mc6))
+ snapshot_graphics(SAM_mc6);
+
+ if (DO_BIC(BIC_SAMMHz))
+ snapshot_graphics(SAM_MHz);
+
+ if (DO_BIC(BIC_SAMACTMHz))
+ snapshot_graphics(SAM_ACTMHz);
if (DO_BIC(BIC_CPU_LPI))
snapshot_cpu_lpi_us();
@@ -4173,6 +4888,8 @@ int get_msr_sum(int cpu, off_t offset, unsigned long long *msr)
int ret, idx;
unsigned long long msr_cur, msr_last;
+ assert(!no_msr);
+
if (!per_cpu_msr_sum)
return 1;
@@ -4201,6 +4918,8 @@ static int update_msr_sum(struct thread_data *t, struct core_data *c, struct pkg
UNUSED(c);
UNUSED(p);
+ assert(!no_msr);
+
for (i = IDX_PKG_ENERGY; i < IDX_COUNT; i++) {
unsigned long long msr_cur, msr_last;
off_t offset;
@@ -4280,7 +4999,8 @@ release_msr:
/*
* set_my_sched_priority(pri)
- * return previous
+ * return previous priority on success
+ * return value < -20 on failure
*/
int set_my_sched_priority(int priority)
{
@@ -4290,16 +5010,16 @@ int set_my_sched_priority(int priority)
errno = 0;
original_priority = getpriority(PRIO_PROCESS, 0);
if (errno && (original_priority == -1))
- err(errno, "getpriority");
+ return -21;
retval = setpriority(PRIO_PROCESS, 0, priority);
if (retval)
- errx(retval, "capget(CAP_SYS_NICE) failed,try \"# setcap cap_sys_nice=ep %s\"", progname);
+ return -21;
errno = 0;
retval = getpriority(PRIO_PROCESS, 0);
if (retval != priority)
- err(retval, "getpriority(%d) != setpriority(%d)", retval, priority);
+ return -21;
return original_priority;
}
@@ -4314,6 +5034,9 @@ void turbostat_loop()
/*
* elevate own priority for interval mode
+ *
+ * ignore on error - we probably don't have permission to set it, but
+ * it's not a big deal
*/
set_my_sched_priority(-20);
@@ -4399,10 +5122,13 @@ void check_dev_msr()
struct stat sb;
char pathname[32];
+ if (no_msr)
+ return;
+
sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
if (stat(pathname, &sb))
if (system("/sbin/modprobe msr > /dev/null 2>&1"))
- err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" ");
+ no_msr = 1;
}
/*
@@ -4414,47 +5140,51 @@ int check_for_cap_sys_rawio(void)
{
cap_t caps;
cap_flag_value_t cap_flag_value;
+ int ret = 0;
caps = cap_get_proc();
if (caps == NULL)
- err(-6, "cap_get_proc\n");
+ return 1;
- if (cap_get_flag(caps, CAP_SYS_RAWIO, CAP_EFFECTIVE, &cap_flag_value))
- err(-6, "cap_get\n");
+ if (cap_get_flag(caps, CAP_SYS_RAWIO, CAP_EFFECTIVE, &cap_flag_value)) {
+ ret = 1;
+ goto free_and_exit;
+ }
if (cap_flag_value != CAP_SET) {
- warnx("capget(CAP_SYS_RAWIO) failed," " try \"# setcap cap_sys_rawio=ep %s\"", progname);
- return 1;
+ ret = 1;
+ goto free_and_exit;
}
+free_and_exit:
if (cap_free(caps) == -1)
err(-6, "cap_free\n");
- return 0;
+ return ret;
}
-void check_permissions(void)
+void check_msr_permission(void)
{
- int do_exit = 0;
+ int failed = 0;
char pathname[32];
+ if (no_msr)
+ return;
+
/* check for CAP_SYS_RAWIO */
- do_exit += check_for_cap_sys_rawio();
+ failed += check_for_cap_sys_rawio();
/* test file permissions */
sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
if (euidaccess(pathname, R_OK)) {
- do_exit++;
- warn("/dev/cpu/0/msr open failed, try chown or chmod +r /dev/cpu/*/msr");
+ failed++;
}
- /* if all else fails, thell them to be root */
- if (do_exit)
- if (getuid() != 0)
- warnx("... or simply run as root");
-
- if (do_exit)
- exit(-6);
+ if (failed) {
+ warnx("Failed to access %s. Some of the counters may not be available\n"
+ "\tRun as root to enable them or use %s to disable the access explicitly", pathname, "--no-msr");
+ no_msr = 1;
+ }
}
void probe_bclk(void)
@@ -4462,7 +5192,7 @@ void probe_bclk(void)
unsigned long long msr;
unsigned int base_ratio;
- if (!platform->has_nhm_msrs)
+ if (!platform->has_nhm_msrs || no_msr)
return;
if (platform->bclk_freq == BCLK_100MHZ)
@@ -4502,7 +5232,7 @@ static void dump_turbo_ratio_info(void)
if (!has_turbo)
return;
- if (!platform->has_nhm_msrs)
+ if (!platform->has_nhm_msrs || no_msr)
return;
if (platform->trl_msrs & TRL_LIMIT2)
@@ -4567,20 +5297,15 @@ static void dump_sysfs_file(char *path)
static void probe_intel_uncore_frequency(void)
{
int i, j;
- char path[128];
+ char path[256];
if (!genuine_intel)
return;
- if (access("/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00", R_OK))
- return;
-
- /* Cluster level sysfs not supported yet. */
- if (!access("/sys/devices/system/cpu/intel_uncore_frequency/uncore00", R_OK))
- return;
+ if (access("/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00/current_freq_khz", R_OK))
+ goto probe_cluster;
- if (!access("/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00/current_freq_khz", R_OK))
- BIC_PRESENT(BIC_UNCORE_MHZ);
+ BIC_PRESENT(BIC_UNCORE_MHZ);
if (quiet)
return;
@@ -4588,40 +5313,178 @@ static void probe_intel_uncore_frequency(void)
for (i = 0; i < topo.num_packages; ++i) {
for (j = 0; j < topo.num_die; ++j) {
int k, l;
+ char path_base[128];
+
+ sprintf(path_base, "/sys/devices/system/cpu/intel_uncore_frequency/package_%02d_die_%02d", i,
+ j);
- sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/min_freq_khz",
- i, j);
+ sprintf(path, "%s/min_freq_khz", path_base);
k = read_sysfs_int(path);
- sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/max_freq_khz",
- i, j);
+ sprintf(path, "%s/max_freq_khz", path_base);
l = read_sysfs_int(path);
- fprintf(outf, "Uncore Frequency pkg%d die%d: %d - %d MHz ", i, j, k / 1000, l / 1000);
+ fprintf(outf, "Uncore Frequency package%d die%d: %d - %d MHz ", i, j, k / 1000, l / 1000);
- sprintf(path,
- "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/initial_min_freq_khz",
- i, j);
+ sprintf(path, "%s/initial_min_freq_khz", path_base);
k = read_sysfs_int(path);
- sprintf(path,
- "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/initial_max_freq_khz",
- i, j);
+ sprintf(path, "%s/initial_max_freq_khz", path_base);
l = read_sysfs_int(path);
- fprintf(outf, "(%d - %d MHz)\n", k / 1000, l / 1000);
+ fprintf(outf, "(%d - %d MHz)", k / 1000, l / 1000);
+
+ sprintf(path, "%s/current_freq_khz", path_base);
+ k = read_sysfs_int(path);
+ fprintf(outf, " %d MHz\n", k / 1000);
}
}
+ return;
+
+probe_cluster:
+ if (access("/sys/devices/system/cpu/intel_uncore_frequency/uncore00/current_freq_khz", R_OK))
+ return;
+
+ if (quiet)
+ return;
+
+ for (i = 0;; ++i) {
+ int k, l;
+ char path_base[128];
+ int package_id, domain_id, cluster_id;
+
+ sprintf(path_base, "/sys/devices/system/cpu/intel_uncore_frequency/uncore%02d", i);
+
+ if (access(path_base, R_OK))
+ break;
+
+ sprintf(path, "%s/package_id", path_base);
+ package_id = read_sysfs_int(path);
+
+ sprintf(path, "%s/domain_id", path_base);
+ domain_id = read_sysfs_int(path);
+
+ sprintf(path, "%s/fabric_cluster_id", path_base);
+ cluster_id = read_sysfs_int(path);
+
+ sprintf(path, "%s/min_freq_khz", path_base);
+ k = read_sysfs_int(path);
+ sprintf(path, "%s/max_freq_khz", path_base);
+ l = read_sysfs_int(path);
+ fprintf(outf, "Uncore Frequency package%d domain%d cluster%d: %d - %d MHz ", package_id, domain_id,
+ cluster_id, k / 1000, l / 1000);
+
+ sprintf(path, "%s/initial_min_freq_khz", path_base);
+ k = read_sysfs_int(path);
+ sprintf(path, "%s/initial_max_freq_khz", path_base);
+ l = read_sysfs_int(path);
+ fprintf(outf, "(%d - %d MHz)", k / 1000, l / 1000);
+
+ sprintf(path, "%s/current_freq_khz", path_base);
+ k = read_sysfs_int(path);
+ fprintf(outf, " %d MHz\n", k / 1000);
+ }
}
static void probe_graphics(void)
{
+ /* Xe graphics sysfs knobs */
+ if (!access("/sys/class/drm/card0/device/tile0/gt0/gtidle/idle_residency_ms", R_OK)) {
+ FILE *fp;
+ char buf[8];
+ bool gt0_is_gt;
+ int idx;
+
+ fp = fopen("/sys/class/drm/card0/device/tile0/gt0/gtidle/name", "r");
+ if (!fp)
+ goto next;
+
+ if (!fread(buf, sizeof(char), 7, fp)) {
+ fclose(fp);
+ goto next;
+ }
+ fclose(fp);
+
+ if (!strncmp(buf, "gt0-rc", strlen("gt0-rc")))
+ gt0_is_gt = true;
+ else if (!strncmp(buf, "gt0-mc", strlen("gt0-mc")))
+ gt0_is_gt = false;
+ else
+ goto next;
+
+ idx = gt0_is_gt ? GFX_rc6 : SAM_mc6;
+ gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt0/gtidle/idle_residency_ms";
+
+ idx = gt0_is_gt ? GFX_MHz : SAM_MHz;
+ if (!access("/sys/class/drm/card0/device/tile0/gt0/freq0/cur_freq", R_OK))
+ gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt0/freq0/cur_freq";
+
+ idx = gt0_is_gt ? GFX_ACTMHz : SAM_ACTMHz;
+ if (!access("/sys/class/drm/card0/device/tile0/gt0/freq0/act_freq", R_OK))
+ gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt0/freq0/act_freq";
+
+ idx = gt0_is_gt ? SAM_mc6 : GFX_rc6;
+ if (!access("/sys/class/drm/card0/device/tile0/gt1/gtidle/idle_residency_ms", R_OK))
+ gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt1/gtidle/idle_residency_ms";
+
+ idx = gt0_is_gt ? SAM_MHz : GFX_MHz;
+ if (!access("/sys/class/drm/card0/device/tile0/gt1/freq0/cur_freq", R_OK))
+ gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt1/freq0/cur_freq";
+
+ idx = gt0_is_gt ? SAM_ACTMHz : GFX_ACTMHz;
+ if (!access("/sys/class/drm/card0/device/tile0/gt1/freq0/act_freq", R_OK))
+ gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt1/freq0/act_freq";
+
+ goto end;
+ }
+
+next:
+ /* New i915 graphics sysfs knobs */
+ if (!access("/sys/class/drm/card0/gt/gt0/rc6_residency_ms", R_OK)) {
+ gfx_info[GFX_rc6].path = "/sys/class/drm/card0/gt/gt0/rc6_residency_ms";
+
+ if (!access("/sys/class/drm/card0/gt/gt0/rps_cur_freq_mhz", R_OK))
+ gfx_info[GFX_MHz].path = "/sys/class/drm/card0/gt/gt0/rps_cur_freq_mhz";
+
+ if (!access("/sys/class/drm/card0/gt/gt0/rps_act_freq_mhz", R_OK))
+ gfx_info[GFX_ACTMHz].path = "/sys/class/drm/card0/gt/gt0/rps_act_freq_mhz";
+
+ if (!access("/sys/class/drm/card0/gt/gt1/rc6_residency_ms", R_OK))
+ gfx_info[SAM_mc6].path = "/sys/class/drm/card0/gt/gt1/rc6_residency_ms";
+
+ if (!access("/sys/class/drm/card0/gt/gt1/rps_cur_freq_mhz", R_OK))
+ gfx_info[SAM_MHz].path = "/sys/class/drm/card0/gt/gt1/rps_cur_freq_mhz";
+
+ if (!access("/sys/class/drm/card0/gt/gt1/rps_act_freq_mhz", R_OK))
+ gfx_info[SAM_ACTMHz].path = "/sys/class/drm/card0/gt/gt1/rps_act_freq_mhz";
+
+ goto end;
+ }
+
+ /* Fall back to traditional i915 graphics sysfs knobs */
if (!access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK))
- BIC_PRESENT(BIC_GFX_rc6);
+ gfx_info[GFX_rc6].path = "/sys/class/drm/card0/power/rc6_residency_ms";
+
+ if (!access("/sys/class/drm/card0/gt_cur_freq_mhz", R_OK))
+ gfx_info[GFX_MHz].path = "/sys/class/drm/card0/gt_cur_freq_mhz";
+ else if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK))
+ gfx_info[GFX_MHz].path = "/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz";
- if (!access("/sys/class/drm/card0/gt_cur_freq_mhz", R_OK) ||
- !access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK))
- BIC_PRESENT(BIC_GFXMHz);
- if (!access("/sys/class/drm/card0/gt_act_freq_mhz", R_OK) ||
- !access("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", R_OK))
+ if (!access("/sys/class/drm/card0/gt_act_freq_mhz", R_OK))
+ gfx_info[GFX_ACTMHz].path = "/sys/class/drm/card0/gt_act_freq_mhz";
+ else if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", R_OK))
+ gfx_info[GFX_ACTMHz].path = "/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz";
+
+end:
+ if (gfx_info[GFX_rc6].path)
+ BIC_PRESENT(BIC_GFX_rc6);
+ if (gfx_info[GFX_MHz].path)
+ BIC_PRESENT(BIC_GFXMHz);
+ if (gfx_info[GFX_ACTMHz].path)
BIC_PRESENT(BIC_GFXACTMHz);
+ if (gfx_info[SAM_mc6].path)
+ BIC_PRESENT(BIC_SAM_mc6);
+ if (gfx_info[SAM_MHz].path)
+ BIC_PRESENT(BIC_SAMMHz);
+ if (gfx_info[SAM_ACTMHz].path)
+ BIC_PRESENT(BIC_SAMACTMHz);
}
static void dump_sysfs_cstate_config(void)
@@ -4783,6 +5646,9 @@ int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p)
UNUSED(c);
UNUSED(p);
+ if (no_msr)
+ return 0;
+
if (!has_hwp)
return 0;
@@ -4869,6 +5735,9 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data
UNUSED(c);
UNUSED(p);
+ if (no_msr)
+ return 0;
+
cpu = t->cpu_id;
/* per-package */
@@ -4983,31 +5852,18 @@ void rapl_probe_intel(void)
unsigned long long msr;
unsigned int time_unit;
double tdp;
+ const unsigned long long bic_watt_bits = BIC_PkgWatt | BIC_CorWatt | BIC_RAMWatt | BIC_GFXWatt;
+ const unsigned long long bic_joules_bits = BIC_Pkg_J | BIC_Cor_J | BIC_RAM_J | BIC_GFX_J;
- if (rapl_joules) {
- if (platform->rapl_msrs & RAPL_PKG_ENERGY_STATUS)
- BIC_PRESENT(BIC_Pkg_J);
- if (platform->rapl_msrs & RAPL_CORE_ENERGY_STATUS)
- BIC_PRESENT(BIC_Cor_J);
- if (platform->rapl_msrs & RAPL_DRAM_ENERGY_STATUS)
- BIC_PRESENT(BIC_RAM_J);
- if (platform->rapl_msrs & RAPL_GFX_ENERGY_STATUS)
- BIC_PRESENT(BIC_GFX_J);
- } else {
- if (platform->rapl_msrs & RAPL_PKG_ENERGY_STATUS)
- BIC_PRESENT(BIC_PkgWatt);
- if (platform->rapl_msrs & RAPL_CORE_ENERGY_STATUS)
- BIC_PRESENT(BIC_CorWatt);
- if (platform->rapl_msrs & RAPL_DRAM_ENERGY_STATUS)
- BIC_PRESENT(BIC_RAMWatt);
- if (platform->rapl_msrs & RAPL_GFX_ENERGY_STATUS)
- BIC_PRESENT(BIC_GFXWatt);
- }
+ if (rapl_joules)
+ bic_enabled &= ~bic_watt_bits;
+ else
+ bic_enabled &= ~bic_joules_bits;
- if (platform->rapl_msrs & RAPL_PKG_PERF_STATUS)
- BIC_PRESENT(BIC_PKG__);
- if (platform->rapl_msrs & RAPL_DRAM_PERF_STATUS)
- BIC_PRESENT(BIC_RAM__);
+ if (!(platform->rapl_msrs & RAPL_PKG_PERF_STATUS))
+ bic_enabled &= ~BIC_PKG__;
+ if (!(platform->rapl_msrs & RAPL_DRAM_PERF_STATUS))
+ bic_enabled &= ~BIC_RAM__;
/* units on package 0, verify later other packages match */
if (get_msr(base_cpu, MSR_RAPL_POWER_UNIT, &msr))
@@ -5041,14 +5897,13 @@ void rapl_probe_amd(void)
{
unsigned long long msr;
double tdp;
+ const unsigned long long bic_watt_bits = BIC_PkgWatt | BIC_CorWatt;
+ const unsigned long long bic_joules_bits = BIC_Pkg_J | BIC_Cor_J;
- if (rapl_joules) {
- BIC_PRESENT(BIC_Pkg_J);
- BIC_PRESENT(BIC_Cor_J);
- } else {
- BIC_PRESENT(BIC_PkgWatt);
- BIC_PRESENT(BIC_CorWatt);
- }
+ if (rapl_joules)
+ bic_enabled &= ~bic_watt_bits;
+ else
+ bic_enabled &= ~bic_joules_bits;
if (get_msr(base_cpu, MSR_RAPL_PWR_UNIT, &msr))
return;
@@ -5202,7 +6057,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
*/
void probe_rapl(void)
{
- if (!platform->rapl_msrs)
+ if (!platform->rapl_msrs || no_msr)
return;
if (genuine_intel)
@@ -5258,7 +6113,7 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk
}
/* Temperature Target MSR is Nehalem and newer only */
- if (!platform->has_nhm_msrs)
+ if (!platform->has_nhm_msrs || no_msr)
goto guess;
if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr))
@@ -5305,6 +6160,9 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p
UNUSED(c);
UNUSED(p);
+ if (no_msr)
+ return 0;
+
if (!(do_dts || do_ptm))
return 0;
@@ -5402,6 +6260,9 @@ void decode_feature_control_msr(void)
{
unsigned long long msr;
+ if (no_msr)
+ return;
+
if (!get_msr(base_cpu, MSR_IA32_FEAT_CTL, &msr))
fprintf(outf, "cpu%d: MSR_IA32_FEATURE_CONTROL: 0x%08llx (%sLocked %s)\n",
base_cpu, msr, msr & FEAT_CTL_LOCKED ? "" : "UN-", msr & (1 << 18) ? "SGX" : "");
@@ -5411,6 +6272,9 @@ void decode_misc_enable_msr(void)
{
unsigned long long msr;
+ if (no_msr)
+ return;
+
if (!genuine_intel)
return;
@@ -5428,6 +6292,9 @@ void decode_misc_feature_control(void)
{
unsigned long long msr;
+ if (no_msr)
+ return;
+
if (!platform->has_msr_misc_feature_control)
return;
@@ -5449,6 +6316,9 @@ void decode_misc_pwr_mgmt_msr(void)
{
unsigned long long msr;
+ if (no_msr)
+ return;
+
if (!platform->has_msr_misc_pwr_mgmt)
return;
@@ -5468,6 +6338,9 @@ void decode_c6_demotion_policy_msr(void)
{
unsigned long long msr;
+ if (no_msr)
+ return;
+
if (!platform->has_msr_c6_demotion_policy_config)
return;
@@ -5489,7 +6362,8 @@ void print_dev_latency(void)
fd = open(path, O_RDONLY);
if (fd < 0) {
- warnx("capget(CAP_SYS_ADMIN) failed, try \"# setcap cap_sys_admin=ep %s\"", progname);
+ if (debug)
+ warnx("Read %s failed", path);
return;
}
@@ -5504,23 +6378,260 @@ void print_dev_latency(void)
close(fd);
}
+static int has_instr_count_access(void)
+{
+ int fd;
+ int has_access;
+
+ if (no_perf)
+ return 0;
+
+ fd = open_perf_counter(base_cpu, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, -1, 0);
+ has_access = fd != -1;
+
+ if (fd != -1)
+ close(fd);
+
+ if (!has_access)
+ warnx("Failed to access %s. Some of the counters may not be available\n"
+ "\tRun as root to enable them or use %s to disable the access explicitly",
+ "instructions retired perf counter", "--no-perf");
+
+ return has_access;
+}
+
+bool is_aperf_access_required(void)
+{
+ return BIC_IS_ENABLED(BIC_Avg_MHz)
+ || BIC_IS_ENABLED(BIC_Busy)
+ || BIC_IS_ENABLED(BIC_Bzy_MHz)
+ || BIC_IS_ENABLED(BIC_IPC);
+}
+
+int add_rapl_perf_counter_(int cpu, struct rapl_counter_info_t *rci, const struct rapl_counter_arch_info *cai,
+ double *scale_, enum rapl_unit *unit_)
+{
+ if (no_perf)
+ return -1;
+
+ const double scale = read_perf_rapl_scale(cai->perf_subsys, cai->perf_name);
+
+ if (scale == 0.0)
+ return -1;
+
+ const enum rapl_unit unit = read_perf_rapl_unit(cai->perf_subsys, cai->perf_name);
+
+ if (unit == RAPL_UNIT_INVALID)
+ return -1;
+
+ const unsigned int rapl_type = read_perf_type(cai->perf_subsys);
+ const unsigned int rapl_energy_pkg_config = read_rapl_config(cai->perf_subsys, cai->perf_name);
+
+ const int fd_counter =
+ open_perf_counter(cpu, rapl_type, rapl_energy_pkg_config, rci->fd_perf, PERF_FORMAT_GROUP);
+ if (fd_counter == -1)
+ return -1;
+
+ /* If it's the first counter opened, make it a group descriptor */
+ if (rci->fd_perf == -1)
+ rci->fd_perf = fd_counter;
+
+ *scale_ = scale;
+ *unit_ = unit;
+ return fd_counter;
+}
+
+int add_rapl_perf_counter(int cpu, struct rapl_counter_info_t *rci, const struct rapl_counter_arch_info *cai,
+ double *scale, enum rapl_unit *unit)
+{
+ int ret = add_rapl_perf_counter_(cpu, rci, cai, scale, unit);
+
+ if (debug)
+ fprintf(stderr, "%s: %d (cpu: %d)\n", __func__, ret, cpu);
+
+ return ret;
+}
+
/*
* Linux-perf manages the HW instructions-retired counter
* by enabling when requested, and hiding rollover
*/
void linux_perf_init(void)
{
- if (!BIC_IS_ENABLED(BIC_IPC))
- return;
-
if (access("/proc/sys/kernel/perf_event_paranoid", F_OK))
return;
- fd_instr_count_percpu = calloc(topo.max_cpu_num + 1, sizeof(int));
- if (fd_instr_count_percpu == NULL)
- err(-1, "calloc fd_instr_count_percpu");
+ if (BIC_IS_ENABLED(BIC_IPC) && has_aperf) {
+ fd_instr_count_percpu = calloc(topo.max_cpu_num + 1, sizeof(int));
+ if (fd_instr_count_percpu == NULL)
+ err(-1, "calloc fd_instr_count_percpu");
+ }
+
+ const bool aperf_required = is_aperf_access_required();
+
+ if (aperf_required && has_aperf && amperf_source == AMPERF_SOURCE_PERF) {
+ fd_amperf_percpu = calloc(topo.max_cpu_num + 1, sizeof(*fd_amperf_percpu));
+ if (fd_amperf_percpu == NULL)
+ err(-1, "calloc fd_amperf_percpu");
+ }
+}
+
+void rapl_perf_init(void)
+{
+ const int num_domains = platform->has_per_core_rapl ? topo.num_cores : topo.num_packages;
+ bool *domain_visited = calloc(num_domains, sizeof(bool));
+
+ rapl_counter_info_perdomain = calloc(num_domains, sizeof(*rapl_counter_info_perdomain));
+ if (rapl_counter_info_perdomain == NULL)
+ err(-1, "calloc rapl_counter_info_percpu");
+
+ /*
+ * Initialize rapl_counter_info_percpu
+ */
+ for (int domain_id = 0; domain_id < num_domains; ++domain_id) {
+ struct rapl_counter_info_t *rci = &rapl_counter_info_perdomain[domain_id];
+
+ rci->fd_perf = -1;
+ for (size_t i = 0; i < NUM_RAPL_COUNTERS; ++i) {
+ rci->data[i] = 0;
+ rci->source[i] = RAPL_SOURCE_NONE;
+ }
+ }
- BIC_PRESENT(BIC_IPC);
+ /*
+ * Open/probe the counters
+ * If can't get it via perf, fallback to MSR
+ */
+ for (size_t i = 0; i < ARRAY_SIZE(rapl_counter_arch_infos); ++i) {
+
+ const struct rapl_counter_arch_info *const cai = &rapl_counter_arch_infos[i];
+ bool has_counter = 0;
+ double scale;
+ enum rapl_unit unit;
+ int next_domain;
+
+ memset(domain_visited, 0, num_domains * sizeof(*domain_visited));
+
+ for (int cpu = 0; cpu < topo.max_cpu_num + 1; ++cpu) {
+
+ if (cpu_is_not_allowed(cpu))
+ continue;
+
+ /* Skip already seen and handled RAPL domains */
+ next_domain =
+ platform->has_per_core_rapl ? cpus[cpu].physical_core_id : cpus[cpu].physical_package_id;
+
+ if (domain_visited[next_domain])
+ continue;
+
+ domain_visited[next_domain] = 1;
+
+ struct rapl_counter_info_t *rci = &rapl_counter_info_perdomain[next_domain];
+
+ /* Check if the counter is enabled and accessible */
+ if (BIC_IS_ENABLED(cai->bic) && (platform->rapl_msrs & cai->feature_mask)) {
+
+ /* Use perf API for this counter */
+ if (!no_perf && cai->perf_name
+ && add_rapl_perf_counter(cpu, rci, cai, &scale, &unit) != -1) {
+ rci->source[cai->rci_index] = RAPL_SOURCE_PERF;
+ rci->scale[cai->rci_index] = scale * cai->compat_scale;
+ rci->unit[cai->rci_index] = unit;
+ rci->flags[cai->rci_index] = cai->flags;
+
+ /* Use MSR for this counter */
+ } else if (!no_msr && cai->msr && probe_msr(cpu, cai->msr) == 0) {
+ rci->source[cai->rci_index] = RAPL_SOURCE_MSR;
+ rci->msr[cai->rci_index] = cai->msr;
+ rci->msr_mask[cai->rci_index] = cai->msr_mask;
+ rci->msr_shift[cai->rci_index] = cai->msr_shift;
+ rci->unit[cai->rci_index] = RAPL_UNIT_JOULES;
+ rci->scale[cai->rci_index] = *cai->platform_rapl_msr_scale * cai->compat_scale;
+ rci->flags[cai->rci_index] = cai->flags;
+ }
+ }
+
+ if (rci->source[cai->rci_index] != RAPL_SOURCE_NONE)
+ has_counter = 1;
+ }
+
+ /* If any CPU has access to the counter, make it present */
+ if (has_counter)
+ BIC_PRESENT(cai->bic);
+ }
+
+ free(domain_visited);
+}
+
+static int has_amperf_access_via_msr(void)
+{
+ if (no_msr)
+ return 0;
+
+ if (probe_msr(base_cpu, MSR_IA32_APERF))
+ return 0;
+
+ if (probe_msr(base_cpu, MSR_IA32_MPERF))
+ return 0;
+
+ return 1;
+}
+
+static int has_amperf_access_via_perf(void)
+{
+ struct amperf_group_fd fds;
+
+ /*
+ * Cache the last result, so we don't warn the user multiple times
+ *
+ * Negative means cached, no access
+ * Zero means not cached
+ * Positive means cached, has access
+ */
+ static int has_access_cached;
+
+ if (no_perf)
+ return 0;
+
+ if (has_access_cached != 0)
+ return has_access_cached > 0;
+
+ fds = open_amperf_fd(base_cpu);
+ has_access_cached = (fds.aperf != -1) && (fds.mperf != -1);
+
+ if (fds.aperf == -1)
+ warnx("Failed to access %s. Some of the counters may not be available\n"
+ "\tRun as root to enable them or use %s to disable the access explicitly",
+ "APERF perf counter", "--no-perf");
+ else
+ close(fds.aperf);
+
+ if (fds.mperf == -1)
+ warnx("Failed to access %s. Some of the counters may not be available\n"
+ "\tRun as root to enable them or use %s to disable the access explicitly",
+ "MPERF perf counter", "--no-perf");
+ else
+ close(fds.mperf);
+
+ if (has_access_cached == 0)
+ has_access_cached = -1;
+
+ return has_access_cached > 0;
+}
+
+/* Check if we can access APERF and MPERF */
+static int has_amperf_access(void)
+{
+ if (!is_aperf_access_required())
+ return 0;
+
+ if (!no_msr && has_amperf_access_via_msr())
+ return 1;
+
+ if (!no_perf && has_amperf_access_via_perf())
+ return 1;
+
+ return 0;
}
void probe_cstates(void)
@@ -5563,7 +6674,7 @@ void probe_cstates(void)
if (platform->has_msr_module_c6_res_ms)
BIC_PRESENT(BIC_Mod_c6);
- if (platform->has_ext_cst_msrs) {
+ if (platform->has_ext_cst_msrs && !no_msr) {
BIC_PRESENT(BIC_Totl_c0);
BIC_PRESENT(BIC_Any_c0);
BIC_PRESENT(BIC_GFX_c0);
@@ -5623,6 +6734,7 @@ void process_cpuid()
unsigned int eax, ebx, ecx, edx;
unsigned int fms, family, model, stepping, ecx_flags, edx_flags;
unsigned long long ucode_patch = 0;
+ bool ucode_patch_valid = false;
eax = ebx = ecx = edx = 0;
@@ -5650,8 +6762,12 @@ void process_cpuid()
ecx_flags = ecx;
edx_flags = edx;
- if (get_msr(sched_getcpu(), MSR_IA32_UCODE_REV, &ucode_patch))
- warnx("get_msr(UCODE)");
+ if (!no_msr) {
+ if (get_msr(sched_getcpu(), MSR_IA32_UCODE_REV, &ucode_patch))
+ warnx("get_msr(UCODE)");
+ else
+ ucode_patch_valid = true;
+ }
/*
* check max extended function levels of CPUID.
@@ -5662,9 +6778,12 @@ void process_cpuid()
__cpuid(0x80000000, max_extended_level, ebx, ecx, edx);
if (!quiet) {
- fprintf(outf, "CPUID(1): family:model:stepping 0x%x:%x:%x (%d:%d:%d) microcode 0x%x\n",
- family, model, stepping, family, model, stepping,
- (unsigned int)((ucode_patch >> 32) & 0xFFFFFFFF));
+ fprintf(outf, "CPUID(1): family:model:stepping 0x%x:%x:%x (%d:%d:%d)",
+ family, model, stepping, family, model, stepping);
+ if (ucode_patch_valid)
+ fprintf(outf, " microcode 0x%x", (unsigned int)((ucode_patch >> 32) & 0xFFFFFFFF));
+ fputc('\n', outf);
+
fprintf(outf, "CPUID(0x80000000): max_extended_levels: 0x%x\n", max_extended_level);
fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s %s\n",
ecx_flags & (1 << 0) ? "SSE3" : "-",
@@ -5700,10 +6819,11 @@ void process_cpuid()
__cpuid(0x6, eax, ebx, ecx, edx);
has_aperf = ecx & (1 << 0);
- if (has_aperf) {
+ if (has_aperf && has_amperf_access()) {
BIC_PRESENT(BIC_Avg_MHz);
BIC_PRESENT(BIC_Busy);
BIC_PRESENT(BIC_Bzy_MHz);
+ BIC_PRESENT(BIC_IPC);
}
do_dts = eax & (1 << 0);
if (do_dts)
@@ -5786,6 +6906,15 @@ void process_cpuid()
base_mhz = max_mhz = bus_mhz = edx = 0;
__cpuid(0x16, base_mhz, max_mhz, bus_mhz, edx);
+
+ bclk = bus_mhz;
+
+ base_hz = base_mhz * 1000000;
+ has_base_hz = 1;
+
+ if (platform->enable_tsc_tweak)
+ tsc_tweak = base_hz / tsc_hz;
+
if (!quiet)
fprintf(outf, "CPUID(0x16): base_mhz: %d max_mhz: %d bus_mhz: %d\n",
base_mhz, max_mhz, bus_mhz);
@@ -5814,7 +6943,7 @@ void probe_pm_features(void)
probe_thermal();
- if (platform->has_nhm_msrs)
+ if (platform->has_nhm_msrs && !no_msr)
BIC_PRESENT(BIC_SMI);
if (!quiet)
@@ -6142,6 +7271,7 @@ void topology_update(void)
topo.allowed_packages = 0;
for_all_cpus(update_topo, ODD_COUNTERS);
}
+
void setup_all_buffers(bool startup)
{
topology_probe(startup);
@@ -6169,21 +7299,129 @@ void set_base_cpu(void)
err(-ENODEV, "No valid cpus found");
}
+static void set_amperf_source(void)
+{
+ amperf_source = AMPERF_SOURCE_PERF;
+
+ const bool aperf_required = is_aperf_access_required();
+
+ if (no_perf || !aperf_required || !has_amperf_access_via_perf())
+ amperf_source = AMPERF_SOURCE_MSR;
+
+ if (quiet || !debug)
+ return;
+
+ fprintf(outf, "aperf/mperf source preference: %s\n", amperf_source == AMPERF_SOURCE_MSR ? "msr" : "perf");
+}
+
+bool has_added_counters(void)
+{
+ /*
+ * It only makes sense to call this after the command line is parsed,
+ * otherwise sys structure is not populated.
+ */
+
+ return sys.added_core_counters | sys.added_thread_counters | sys.added_package_counters;
+}
+
+bool is_msr_access_required(void)
+{
+ if (no_msr)
+ return false;
+
+ if (has_added_counters())
+ return true;
+
+ return BIC_IS_ENABLED(BIC_SMI)
+ || BIC_IS_ENABLED(BIC_CPU_c1)
+ || BIC_IS_ENABLED(BIC_CPU_c3)
+ || BIC_IS_ENABLED(BIC_CPU_c6)
+ || BIC_IS_ENABLED(BIC_CPU_c7)
+ || BIC_IS_ENABLED(BIC_Mod_c6)
+ || BIC_IS_ENABLED(BIC_CoreTmp)
+ || BIC_IS_ENABLED(BIC_Totl_c0)
+ || BIC_IS_ENABLED(BIC_Any_c0)
+ || BIC_IS_ENABLED(BIC_GFX_c0)
+ || BIC_IS_ENABLED(BIC_CPUGFX)
+ || BIC_IS_ENABLED(BIC_Pkgpc3)
+ || BIC_IS_ENABLED(BIC_Pkgpc6)
+ || BIC_IS_ENABLED(BIC_Pkgpc2)
+ || BIC_IS_ENABLED(BIC_Pkgpc7)
+ || BIC_IS_ENABLED(BIC_Pkgpc8)
+ || BIC_IS_ENABLED(BIC_Pkgpc9)
+ || BIC_IS_ENABLED(BIC_Pkgpc10)
+ /* TODO: Multiplex access with perf */
+ || BIC_IS_ENABLED(BIC_CorWatt)
+ || BIC_IS_ENABLED(BIC_Cor_J)
+ || BIC_IS_ENABLED(BIC_PkgWatt)
+ || BIC_IS_ENABLED(BIC_CorWatt)
+ || BIC_IS_ENABLED(BIC_GFXWatt)
+ || BIC_IS_ENABLED(BIC_RAMWatt)
+ || BIC_IS_ENABLED(BIC_Pkg_J)
+ || BIC_IS_ENABLED(BIC_Cor_J)
+ || BIC_IS_ENABLED(BIC_GFX_J)
+ || BIC_IS_ENABLED(BIC_RAM_J)
+ || BIC_IS_ENABLED(BIC_PKG__)
+ || BIC_IS_ENABLED(BIC_RAM__)
+ || BIC_IS_ENABLED(BIC_PkgTmp)
+ || (is_aperf_access_required() && !has_amperf_access_via_perf());
+}
+
+void check_msr_access(void)
+{
+ if (!is_msr_access_required())
+ no_msr = 1;
+
+ check_dev_msr();
+ check_msr_permission();
+
+ if (no_msr)
+ bic_disable_msr_access();
+}
+
+void check_perf_access(void)
+{
+ const bool intrcount_required = BIC_IS_ENABLED(BIC_IPC);
+
+ if (no_perf || !intrcount_required || !has_instr_count_access())
+ bic_enabled &= ~BIC_IPC;
+
+ const bool aperf_required = is_aperf_access_required();
+
+ if (!aperf_required || !has_amperf_access()) {
+ bic_enabled &= ~BIC_Avg_MHz;
+ bic_enabled &= ~BIC_Busy;
+ bic_enabled &= ~BIC_Bzy_MHz;
+ bic_enabled &= ~BIC_IPC;
+ }
+}
+
void turbostat_init()
{
setup_all_buffers(true);
set_base_cpu();
- check_dev_msr();
- check_permissions();
+ check_msr_access();
+ check_perf_access();
process_cpuid();
probe_pm_features();
+ set_amperf_source();
linux_perf_init();
+ rapl_perf_init();
for_all_cpus(get_cpu_type, ODD_COUNTERS);
for_all_cpus(get_cpu_type, EVEN_COUNTERS);
if (DO_BIC(BIC_IPC))
(void)get_instr_count_fd(base_cpu);
+
+ /*
+ * If TSC tweak is needed, but couldn't get it,
+ * disable more BICs, since it can't be reported accurately.
+ */
+ if (platform->enable_tsc_tweak && !has_base_hz) {
+ bic_enabled &= ~BIC_Busy;
+ bic_enabled &= ~BIC_Bzy_MHz;
+ }
}
int fork_it(char **argv)
@@ -6259,7 +7497,7 @@ int get_and_dump_counters(void)
void print_version()
{
- fprintf(outf, "turbostat version 2023.11.07 - Len Brown <lenb@kernel.org>\n");
+ fprintf(outf, "turbostat version 2024.04.08 - Len Brown <lenb@kernel.org>\n");
}
#define COMMAND_LINE_SIZE 2048
@@ -6291,6 +7529,9 @@ int add_counter(unsigned int msr_num, char *path, char *name,
{
struct msr_counter *msrp;
+ if (no_msr && msr_num)
+ errx(1, "Requested MSR counter 0x%x, but in --no-msr mode", msr_num);
+
msrp = calloc(1, sizeof(struct msr_counter));
if (msrp == NULL) {
perror("calloc");
@@ -6595,6 +7836,8 @@ void cmdline(int argc, char **argv)
{ "list", no_argument, 0, 'l' },
{ "out", required_argument, 0, 'o' },
{ "quiet", no_argument, 0, 'q' },
+ { "no-msr", no_argument, 0, 'M' },
+ { "no-perf", no_argument, 0, 'P' },
{ "show", required_argument, 0, 's' },
{ "Summary", no_argument, 0, 'S' },
{ "TCC", required_argument, 0, 'T' },
@@ -6604,7 +7847,25 @@ void cmdline(int argc, char **argv)
progname = argv[0];
- while ((opt = getopt_long_only(argc, argv, "+C:c:Dde:hi:Jn:o:qST:v", long_options, &option_index)) != -1) {
+ /*
+ * Parse some options early, because they may make other options invalid,
+ * like adding the MSR counter with --add and at the same time using --no-msr.
+ */
+ while ((opt = getopt_long_only(argc, argv, "MP", long_options, &option_index)) != -1) {
+ switch (opt) {
+ case 'M':
+ no_msr = 1;
+ break;
+ case 'P':
+ no_perf = 1;
+ break;
+ default:
+ break;
+ }
+ }
+ optind = 0;
+
+ while ((opt = getopt_long_only(argc, argv, "+C:c:Dde:hi:Jn:o:qMST:v", long_options, &option_index)) != -1) {
switch (opt) {
case 'a':
parse_add_command(optarg);
@@ -6662,6 +7923,10 @@ void cmdline(int argc, char **argv)
case 'q':
quiet = 1;
break;
+ case 'M':
+ case 'P':
+ /* Parsed earlier */
+ break;
case 'n':
num_iterations = strtod(optarg, NULL);
@@ -6704,6 +7969,22 @@ void cmdline(int argc, char **argv)
}
}
+void set_rlimit(void)
+{
+ struct rlimit limit;
+
+ if (getrlimit(RLIMIT_NOFILE, &limit) < 0)
+ err(1, "Failed to get rlimit");
+
+ if (limit.rlim_max < MAX_NOFILE)
+ limit.rlim_max = MAX_NOFILE;
+ if (limit.rlim_cur < MAX_NOFILE)
+ limit.rlim_cur = MAX_NOFILE;
+
+ if (setrlimit(RLIMIT_NOFILE, &limit) < 0)
+ err(1, "Failed to set rlimit");
+}
+
int main(int argc, char **argv)
{
int fd, ret;
@@ -6729,9 +8010,13 @@ skip_cgroup_setting:
probe_sysfs();
+ if (!getuid())
+ set_rlimit();
+
turbostat_init();
- msr_sum_record();
+ if (!no_msr)
+ msr_sum_record();
/* dump counters and exit */
if (dump_only)
diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c
index 908e0d0839369c..61c69297e7978f 100644
--- a/tools/testing/cxl/test/cxl.c
+++ b/tools/testing/cxl/test/cxl.c
@@ -986,10 +986,12 @@ static void dpa_perf_setup(struct cxl_port *endpoint, struct range *range,
{
dpa_perf->qos_class = FAKE_QTG_ID;
dpa_perf->dpa_range = *range;
- dpa_perf->coord.read_latency = 500;
- dpa_perf->coord.write_latency = 500;
- dpa_perf->coord.read_bandwidth = 1000;
- dpa_perf->coord.write_bandwidth = 1000;
+ for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
+ dpa_perf->coord[i].read_latency = 500;
+ dpa_perf->coord[i].write_latency = 500;
+ dpa_perf->coord[i].read_bandwidth = 1000;
+ dpa_perf->coord[i].write_bandwidth = 1000;
+ }
}
static void mock_cxl_endpoint_parse_cdat(struct cxl_port *port)
diff --git a/tools/testing/kunit/configs/all_tests.config b/tools/testing/kunit/configs/all_tests.config
index aa5ec149f96c16..b3b00269a52aac 100644
--- a/tools/testing/kunit/configs/all_tests.config
+++ b/tools/testing/kunit/configs/all_tests.config
@@ -28,6 +28,8 @@ CONFIG_MCTP_FLOWS=y
CONFIG_INET=y
CONFIG_MPTCP=y
+CONFIG_NETDEVICES=y
+CONFIG_WLAN=y
CONFIG_CFG80211=y
CONFIG_MAC80211=y
CONFIG_WLAN_VENDOR_INTEL=y
@@ -38,6 +40,7 @@ CONFIG_DAMON_VADDR=y
CONFIG_DAMON_PADDR=y
CONFIG_DEBUG_FS=y
CONFIG_DAMON_DBGFS=y
+CONFIG_DAMON_DBGFS_DEPRECATED=y
CONFIG_REGMAP_BUILD=y
diff --git a/tools/testing/radix-tree/linux/kernel.h b/tools/testing/radix-tree/linux/kernel.h
index c5c9d05f29da95..c0a2bb785b92b4 100644
--- a/tools/testing/radix-tree/linux/kernel.h
+++ b/tools/testing/radix-tree/linux/kernel.h
@@ -18,6 +18,8 @@
#define pr_info printk
#define pr_debug printk
#define pr_cont printk
+#define schedule()
+#define PAGE_SHIFT 12
#define __acquires(x)
#define __releases(x)
diff --git a/tools/testing/selftests/bpf/bpf_arena_common.h b/tools/testing/selftests/bpf/bpf_arena_common.h
index bcf195c64a45c1..567491f3e1b51b 100644
--- a/tools/testing/selftests/bpf/bpf_arena_common.h
+++ b/tools/testing/selftests/bpf/bpf_arena_common.h
@@ -32,7 +32,7 @@
*/
#endif
-#if defined(__BPF_FEATURE_ARENA_CAST) && !defined(BPF_ARENA_FORCE_ASM)
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST) && !defined(BPF_ARENA_FORCE_ASM)
#define __arena __attribute__((address_space(1)))
#define cast_kern(ptr) /* nop for bpf prog. emitted by LLVM */
#define cast_user(ptr) /* nop for bpf prog. emitted by LLVM */
diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
index 39ad96a18123f6..edcd26106557b4 100644
--- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
+++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
@@ -205,6 +205,9 @@ __weak noinline struct file *bpf_testmod_return_ptr(int arg)
case 5: return (void *)~(1ull << 30); /* trigger extable */
case 6: return &f; /* valid addr */
case 7: return (void *)((long)&f | 1); /* kernel tricks */
+#ifdef CONFIG_X86_64
+ case 8: return (void *)VSYSCALL_ADDR; /* vsyscall page address */
+#endif
default: return NULL;
}
}
diff --git a/tools/testing/selftests/bpf/prog_tests/arena_htab.c b/tools/testing/selftests/bpf/prog_tests/arena_htab.c
index 0766702de84657..d69fd2465f5367 100644
--- a/tools/testing/selftests/bpf/prog_tests/arena_htab.c
+++ b/tools/testing/selftests/bpf/prog_tests/arena_htab.c
@@ -3,12 +3,14 @@
#include <test_progs.h>
#include <sys/mman.h>
#include <network_helpers.h>
-
+#include <sys/user.h>
+#ifndef PAGE_SIZE /* on some archs it comes in sys/user.h */
+#include <unistd.h>
+#define PAGE_SIZE getpagesize()
+#endif
#include "arena_htab_asm.skel.h"
#include "arena_htab.skel.h"
-#define PAGE_SIZE 4096
-
#include "bpf_arena_htab.h"
static void test_arena_htab_common(struct htab *htab)
diff --git a/tools/testing/selftests/bpf/prog_tests/arena_list.c b/tools/testing/selftests/bpf/prog_tests/arena_list.c
index e61886debab127..d15867cddde06a 100644
--- a/tools/testing/selftests/bpf/prog_tests/arena_list.c
+++ b/tools/testing/selftests/bpf/prog_tests/arena_list.c
@@ -3,8 +3,11 @@
#include <test_progs.h>
#include <sys/mman.h>
#include <network_helpers.h>
-
-#define PAGE_SIZE 4096
+#include <sys/user.h>
+#ifndef PAGE_SIZE /* on some archs it comes in sys/user.h */
+#include <unistd.h>
+#define PAGE_SIZE getpagesize()
+#endif
#include "bpf_arena_list.h"
#include "arena_list.skel.h"
diff --git a/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c b/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c
index 053f4d6da77a48..cc184e4420f6e3 100644
--- a/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c
+++ b/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c
@@ -2,6 +2,7 @@
/* Copyright (c) 2021 Facebook */
#include <sys/syscall.h>
+#include <limits.h>
#include <test_progs.h>
#include "bloom_filter_map.skel.h"
@@ -21,6 +22,11 @@ static void test_fail_cases(void)
if (!ASSERT_LT(fd, 0, "bpf_map_create bloom filter invalid value size 0"))
close(fd);
+ /* Invalid value size: too big */
+ fd = bpf_map_create(BPF_MAP_TYPE_BLOOM_FILTER, NULL, 0, INT32_MAX, 100, NULL);
+ if (!ASSERT_LT(fd, 0, "bpf_map_create bloom filter invalid value too large"))
+ close(fd);
+
/* Invalid max entries size */
fd = bpf_map_create(BPF_MAP_TYPE_BLOOM_FILTER, NULL, 0, sizeof(value), 0, NULL);
if (!ASSERT_LT(fd, 0, "bpf_map_create bloom filter invalid max entries size"))
diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c
index 985273832f891c..c4f9f306646ed3 100644
--- a/tools/testing/selftests/bpf/prog_tests/verifier.c
+++ b/tools/testing/selftests/bpf/prog_tests/verifier.c
@@ -5,6 +5,7 @@
#include "cap_helpers.h"
#include "verifier_and.skel.h"
#include "verifier_arena.skel.h"
+#include "verifier_arena_large.skel.h"
#include "verifier_array_access.skel.h"
#include "verifier_basic_stack.skel.h"
#include "verifier_bitfield_write.skel.h"
@@ -120,6 +121,7 @@ static void run_tests_aux(const char *skel_name,
void test_verifier_and(void) { RUN(verifier_and); }
void test_verifier_arena(void) { RUN(verifier_arena); }
+void test_verifier_arena_large(void) { RUN(verifier_arena_large); }
void test_verifier_basic_stack(void) { RUN(verifier_basic_stack); }
void test_verifier_bitfield_write(void) { RUN(verifier_bitfield_write); }
void test_verifier_bounds(void) { RUN(verifier_bounds); }
diff --git a/tools/testing/selftests/bpf/progs/arena_htab.c b/tools/testing/selftests/bpf/progs/arena_htab.c
index b7bb712cacfdcc..1e6ac187a6a0ce 100644
--- a/tools/testing/selftests/bpf/progs/arena_htab.c
+++ b/tools/testing/selftests/bpf/progs/arena_htab.c
@@ -22,7 +22,7 @@ int zero = 0;
SEC("syscall")
int arena_htab_llvm(void *ctx)
{
-#if defined(__BPF_FEATURE_ARENA_CAST) || defined(BPF_ARENA_FORCE_ASM)
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST) || defined(BPF_ARENA_FORCE_ASM)
struct htab __arena *htab;
__u64 i;
diff --git a/tools/testing/selftests/bpf/progs/arena_list.c b/tools/testing/selftests/bpf/progs/arena_list.c
index cd35b844843560..c0422c58cee2c5 100644
--- a/tools/testing/selftests/bpf/progs/arena_list.c
+++ b/tools/testing/selftests/bpf/progs/arena_list.c
@@ -30,13 +30,13 @@ int list_sum;
int cnt;
bool skip = false;
-#ifdef __BPF_FEATURE_ARENA_CAST
+#ifdef __BPF_FEATURE_ADDR_SPACE_CAST
long __arena arena_sum;
int __arena test_val = 1;
struct arena_list_head __arena global_head;
#else
-long arena_sum SEC(".arena.1");
-int test_val SEC(".arena.1");
+long arena_sum SEC(".addr_space.1");
+int test_val SEC(".addr_space.1");
#endif
int zero;
@@ -44,7 +44,7 @@ int zero;
SEC("syscall")
int arena_list_add(void *ctx)
{
-#ifdef __BPF_FEATURE_ARENA_CAST
+#ifdef __BPF_FEATURE_ADDR_SPACE_CAST
__u64 i;
list_head = &global_head;
@@ -66,7 +66,7 @@ int arena_list_add(void *ctx)
SEC("syscall")
int arena_list_del(void *ctx)
{
-#ifdef __BPF_FEATURE_ARENA_CAST
+#ifdef __BPF_FEATURE_ADDR_SPACE_CAST
struct elem __arena *n;
int sum = 0;
diff --git a/tools/testing/selftests/bpf/progs/verifier_arena.c b/tools/testing/selftests/bpf/progs/verifier_arena.c
index 5540b05ff9ee13..93144ae6df7412 100644
--- a/tools/testing/selftests/bpf/progs/verifier_arena.c
+++ b/tools/testing/selftests/bpf/progs/verifier_arena.c
@@ -12,14 +12,18 @@ struct {
__uint(type, BPF_MAP_TYPE_ARENA);
__uint(map_flags, BPF_F_MMAPABLE);
__uint(max_entries, 2); /* arena of two pages close to 32-bit boundary*/
- __ulong(map_extra, (1ull << 44) | (~0u - __PAGE_SIZE * 2 + 1)); /* start of mmap() region */
+#ifdef __TARGET_ARCH_arm64
+ __ulong(map_extra, (1ull << 32) | (~0u - __PAGE_SIZE * 2 + 1)); /* start of mmap() region */
+#else
+ __ulong(map_extra, (1ull << 44) | (~0u - __PAGE_SIZE * 2 + 1)); /* start of mmap() region */
+#endif
} arena SEC(".maps");
SEC("syscall")
__success __retval(0)
int basic_alloc1(void *ctx)
{
-#if defined(__BPF_FEATURE_ARENA_CAST)
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
volatile int __arena *page1, *page2, *no_page, *page3;
page1 = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
@@ -58,7 +62,7 @@ SEC("syscall")
__success __retval(0)
int basic_alloc2(void *ctx)
{
-#if defined(__BPF_FEATURE_ARENA_CAST)
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
volatile char __arena *page1, *page2, *page3, *page4;
page1 = bpf_arena_alloc_pages(&arena, NULL, 2, NUMA_NO_NODE, 0);
diff --git a/tools/testing/selftests/bpf/progs/verifier_arena_large.c b/tools/testing/selftests/bpf/progs/verifier_arena_large.c
new file mode 100644
index 00000000000000..ef66ea460264c1
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_arena_large.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+#include "bpf_arena_common.h"
+
+#define ARENA_SIZE (1ull << 32)
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARENA);
+ __uint(map_flags, BPF_F_MMAPABLE);
+ __uint(max_entries, ARENA_SIZE / PAGE_SIZE);
+} arena SEC(".maps");
+
+SEC("syscall")
+__success __retval(0)
+int big_alloc1(void *ctx)
+{
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+ volatile char __arena *page1, *page2, *no_page, *page3;
+ void __arena *base;
+
+ page1 = base = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
+ if (!page1)
+ return 1;
+ *page1 = 1;
+ page2 = bpf_arena_alloc_pages(&arena, base + ARENA_SIZE - PAGE_SIZE,
+ 1, NUMA_NO_NODE, 0);
+ if (!page2)
+ return 2;
+ *page2 = 2;
+ no_page = bpf_arena_alloc_pages(&arena, base + ARENA_SIZE,
+ 1, NUMA_NO_NODE, 0);
+ if (no_page)
+ return 3;
+ if (*page1 != 1)
+ return 4;
+ if (*page2 != 2)
+ return 5;
+ bpf_arena_free_pages(&arena, (void __arena *)page1, 1);
+ if (*page2 != 2)
+ return 6;
+ if (*page1 != 0) /* use-after-free should return 0 */
+ return 7;
+ page3 = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
+ if (!page3)
+ return 8;
+ *page3 = 3;
+ if (page1 != page3)
+ return 9;
+ if (*page2 != 2)
+ return 10;
+ if (*(page1 + PAGE_SIZE) != 0)
+ return 11;
+ if (*(page1 - PAGE_SIZE) != 0)
+ return 12;
+ if (*(page2 + PAGE_SIZE) != 0)
+ return 13;
+ if (*(page2 - PAGE_SIZE) != 0)
+ return 14;
+#endif
+ return 0;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/dmabuf-heaps/config b/tools/testing/selftests/dmabuf-heaps/config
new file mode 100644
index 00000000000000..be091f1cdfa04e
--- /dev/null
+++ b/tools/testing/selftests/dmabuf-heaps/config
@@ -0,0 +1,3 @@
+CONFIG_DMABUF_HEAPS=y
+CONFIG_DMABUF_HEAPS_SYSTEM=y
+CONFIG_DRM_VGEM=y
diff --git a/tools/testing/selftests/drivers/net/netdevsim/settings b/tools/testing/selftests/drivers/net/netdevsim/settings
new file mode 100644
index 00000000000000..a62d2fa1275c6b
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/settings
@@ -0,0 +1 @@
+timeout=600
diff --git a/tools/testing/selftests/exec/Makefile b/tools/testing/selftests/exec/Makefile
index a0b8688b083694..3c79ec9bf780f5 100644
--- a/tools/testing/selftests/exec/Makefile
+++ b/tools/testing/selftests/exec/Makefile
@@ -19,8 +19,8 @@ include ../lib.mk
$(OUTPUT)/subdir:
mkdir -p $@
-$(OUTPUT)/script:
- echo '#!/bin/sh' > $@
+$(OUTPUT)/script: Makefile
+ echo '#!/bin/bash' > $@
echo 'exit $$*' >> $@
chmod +x $@
$(OUTPUT)/execveat.symlink: $(OUTPUT)/execveat
@@ -29,8 +29,8 @@ $(OUTPUT)/execveat.denatured: $(OUTPUT)/execveat
cp $< $@
chmod -x $@
$(OUTPUT)/load_address_4096: load_address.c
- $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x1000 -pie -static $< -o $@
+ $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x1000 -static-pie $< -o $@
$(OUTPUT)/load_address_2097152: load_address.c
- $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x200000 -pie -static $< -o $@
+ $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x200000 -static-pie $< -o $@
$(OUTPUT)/load_address_16777216: load_address.c
- $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x1000000 -pie -static $< -o $@
+ $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x1000000 -static-pie $< -o $@
diff --git a/tools/testing/selftests/exec/binfmt_script.py b/tools/testing/selftests/exec/binfmt_script.py
index 05f94a741c7aa0..2c575a2c0eab41 100755
--- a/tools/testing/selftests/exec/binfmt_script.py
+++ b/tools/testing/selftests/exec/binfmt_script.py
@@ -16,6 +16,8 @@ SIZE=256
NAME_MAX=int(subprocess.check_output(["getconf", "NAME_MAX", "."]))
test_num=0
+pass_num=0
+fail_num=0
code='''#!/usr/bin/perl
print "Executed interpreter! Args:\n";
@@ -42,7 +44,7 @@ foreach my $a (@ARGV) {
# ...
def test(name, size, good=True, leading="", root="./", target="/perl",
fill="A", arg="", newline="\n", hashbang="#!"):
- global test_num, tests, NAME_MAX
+ global test_num, pass_num, fail_num, tests, NAME_MAX
test_num += 1
if test_num > tests:
raise ValueError("more binfmt_script tests than expected! (want %d, expected %d)"
@@ -80,16 +82,20 @@ def test(name, size, good=True, leading="", root="./", target="/perl",
if good:
print("ok %d - binfmt_script %s (successful good exec)"
% (test_num, name))
+ pass_num += 1
else:
print("not ok %d - binfmt_script %s succeeded when it should have failed"
% (test_num, name))
+ fail_num = 1
else:
if good:
print("not ok %d - binfmt_script %s failed when it should have succeeded (rc:%d)"
% (test_num, name, proc.returncode))
+ fail_num = 1
else:
print("ok %d - binfmt_script %s (correctly failed bad exec)"
% (test_num, name))
+ pass_num += 1
# Clean up crazy binaries
os.unlink(script)
@@ -166,6 +172,8 @@ test(name="two-under-trunc-arg", size=int(SIZE/2), arg=" ")
test(name="two-under-leading", size=int(SIZE/2), leading=" ")
test(name="two-under-lead-trunc-arg", size=int(SIZE/2), leading=" ", arg=" ")
+print("# Totals: pass:%d fail:%d xfail:0 xpass:0 skip:0 error:0" % (pass_num, fail_num))
+
if test_num != tests:
raise ValueError("fewer binfmt_script tests than expected! (ran %d, expected %d"
% (test_num, tests))
diff --git a/tools/testing/selftests/exec/execveat.c b/tools/testing/selftests/exec/execveat.c
index 0546ca24f2b20c..6418ded40bdddc 100644
--- a/tools/testing/selftests/exec/execveat.c
+++ b/tools/testing/selftests/exec/execveat.c
@@ -98,10 +98,9 @@ static int check_execveat_invoked_rc(int fd, const char *path, int flags,
if (child == 0) {
/* Child: do execveat(). */
rc = execveat_(fd, path, argv, envp, flags);
- ksft_print_msg("execveat() failed, rc=%d errno=%d (%s)\n",
+ ksft_print_msg("child execveat() failed, rc=%d errno=%d (%s)\n",
rc, errno, strerror(errno));
- ksft_test_result_fail("%s\n", test_name);
- exit(1); /* should not reach here */
+ exit(errno);
}
/* Parent: wait for & check child's exit status. */
rc = waitpid(child, &status, 0);
@@ -226,11 +225,14 @@ static int check_execveat_pathmax(int root_dfd, const char *src, int is_script)
* "If the command name is found, but it is not an executable utility,
* the exit status shall be 126."), so allow either.
*/
- if (is_script)
+ if (is_script) {
+ ksft_print_msg("Invoke script via root_dfd and relative filename\n");
fail += check_execveat_invoked_rc(root_dfd, longpath + 1, 0,
127, 126);
- else
+ } else {
+ ksft_print_msg("Invoke exec via root_dfd and relative filename\n");
fail += check_execveat(root_dfd, longpath + 1, 0);
+ }
return fail;
}
diff --git a/tools/testing/selftests/exec/load_address.c b/tools/testing/selftests/exec/load_address.c
index d487c2f6a61509..17e3207d34ae7e 100644
--- a/tools/testing/selftests/exec/load_address.c
+++ b/tools/testing/selftests/exec/load_address.c
@@ -5,6 +5,7 @@
#include <link.h>
#include <stdio.h>
#include <stdlib.h>
+#include "../kselftest.h"
struct Statistics {
unsigned long long load_address;
@@ -41,28 +42,23 @@ int main(int argc, char **argv)
unsigned long long misalign;
int ret;
+ ksft_print_header();
+ ksft_set_plan(1);
+
ret = dl_iterate_phdr(ExtractStatistics, &extracted);
- if (ret != 1) {
- fprintf(stderr, "FAILED\n");
- return 1;
- }
+ if (ret != 1)
+ ksft_exit_fail_msg("FAILED: dl_iterate_phdr\n");
- if (extracted.alignment == 0) {
- fprintf(stderr, "No alignment found\n");
- return 1;
- } else if (extracted.alignment & (extracted.alignment - 1)) {
- fprintf(stderr, "Alignment is not a power of 2\n");
- return 1;
- }
+ if (extracted.alignment == 0)
+ ksft_exit_fail_msg("FAILED: No alignment found\n");
+ else if (extracted.alignment & (extracted.alignment - 1))
+ ksft_exit_fail_msg("FAILED: Alignment is not a power of 2\n");
misalign = extracted.load_address & (extracted.alignment - 1);
- if (misalign) {
- printf("alignment = %llu, load_address = %llu\n",
- extracted.alignment, extracted.load_address);
- fprintf(stderr, "FAILED\n");
- return 1;
- }
+ if (misalign)
+ ksft_exit_fail_msg("FAILED: alignment = %llu, load_address = %llu\n",
+ extracted.alignment, extracted.load_address);
- fprintf(stderr, "PASS\n");
- return 0;
+ ksft_test_result_pass("Completed\n");
+ ksft_finished();
}
diff --git a/tools/testing/selftests/exec/recursion-depth.c b/tools/testing/selftests/exec/recursion-depth.c
index 2dbd5bc45b3ed0..b2f37d86a5f623 100644
--- a/tools/testing/selftests/exec/recursion-depth.c
+++ b/tools/testing/selftests/exec/recursion-depth.c
@@ -23,45 +23,44 @@
#include <fcntl.h>
#include <sys/mount.h>
#include <unistd.h>
+#include "../kselftest.h"
int main(void)
{
+ int fd, rv;
+
+ ksft_print_header();
+ ksft_set_plan(1);
+
if (unshare(CLONE_NEWNS) == -1) {
if (errno == ENOSYS || errno == EPERM) {
- fprintf(stderr, "error: unshare, errno %d\n", errno);
- return 4;
+ ksft_test_result_skip("error: unshare, errno %d\n", errno);
+ ksft_finished();
}
- fprintf(stderr, "error: unshare, errno %d\n", errno);
- return 1;
- }
- if (mount(NULL, "/", NULL, MS_PRIVATE|MS_REC, NULL) == -1) {
- fprintf(stderr, "error: mount '/', errno %d\n", errno);
- return 1;
+ ksft_exit_fail_msg("error: unshare, errno %d\n", errno);
}
+
+ if (mount(NULL, "/", NULL, MS_PRIVATE | MS_REC, NULL) == -1)
+ ksft_exit_fail_msg("error: mount '/', errno %d\n", errno);
+
/* Require "exec" filesystem. */
- if (mount(NULL, "/tmp", "ramfs", 0, NULL) == -1) {
- fprintf(stderr, "error: mount ramfs, errno %d\n", errno);
- return 1;
- }
+ if (mount(NULL, "/tmp", "ramfs", 0, NULL) == -1)
+ ksft_exit_fail_msg("error: mount ramfs, errno %d\n", errno);
#define FILENAME "/tmp/1"
- int fd = creat(FILENAME, 0700);
- if (fd == -1) {
- fprintf(stderr, "error: creat, errno %d\n", errno);
- return 1;
- }
+ fd = creat(FILENAME, 0700);
+ if (fd == -1)
+ ksft_exit_fail_msg("error: creat, errno %d\n", errno);
+
#define S "#!" FILENAME "\n"
- if (write(fd, S, strlen(S)) != strlen(S)) {
- fprintf(stderr, "error: write, errno %d\n", errno);
- return 1;
- }
+ if (write(fd, S, strlen(S)) != strlen(S))
+ ksft_exit_fail_msg("error: write, errno %d\n", errno);
+
close(fd);
- int rv = execve(FILENAME, NULL, NULL);
- if (rv == -1 && errno == ELOOP) {
- return 0;
- }
- fprintf(stderr, "error: execve, rv %d, errno %d\n", rv, errno);
- return 1;
+ rv = execve(FILENAME, NULL, NULL);
+ ksft_test_result(rv == -1 && errno == ELOOP,
+ "execve failed as expected (ret %d, errno %d)\n", rv, errno);
+ ksft_finished();
}
diff --git a/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc b/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc
index b1ede624986676..b7c8f29c09a978 100644
--- a/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc
+++ b/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc
@@ -18,7 +18,7 @@ echo 'sched:*' > set_event
yield
-count=`cat trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l`
+count=`head -n 100 trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l`
if [ $count -lt 3 ]; then
fail "at least fork, exec and exit events should be recorded"
fi
@@ -29,7 +29,7 @@ echo 1 > events/sched/enable
yield
-count=`cat trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l`
+count=`head -n 100 trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l`
if [ $count -lt 3 ]; then
fail "at least fork, exec and exit events should be recorded"
fi
@@ -40,7 +40,7 @@ echo 0 > events/sched/enable
yield
-count=`cat trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l`
+count=`head -n 100 trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l`
if [ $count -ne 0 ]; then
fail "any of scheduler events should not be recorded"
fi
diff --git a/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc b/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc
index 2de7c61d1ae308..3f74c09c56b624 100644
--- a/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc
+++ b/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc
@@ -24,7 +24,7 @@ echo 0 > events/enable
echo "Get the most frequently calling function"
sample_events
-target_func=`cut -d: -f3 trace | sed 's/call_site=\([^+]*\)+0x.*/\1/' | sort | uniq -c | sort | tail -n 1 | sed 's/^[ 0-9]*//'`
+target_func=`cat trace | grep -o 'call_site=\([^+]*\)' | sed 's/call_site=//' | sort | uniq -c | sort | tail -n 1 | sed 's/^[ 0-9]*//'`
if [ -z "$target_func" ]; then
exit_fail
fi
diff --git a/tools/testing/selftests/iommu/config b/tools/testing/selftests/iommu/config
index 110d73917615d1..02a2a1b267c1ea 100644
--- a/tools/testing/selftests/iommu/config
+++ b/tools/testing/selftests/iommu/config
@@ -1,3 +1,5 @@
CONFIG_IOMMUFD=y
+CONFIG_FAULT_INJECTION_DEBUG_FS=y
CONFIG_FAULT_INJECTION=y
CONFIG_IOMMUFD_TEST=y
+CONFIG_FAILSLAB=y
diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h
index 541bf192e30e6b..14bbab0cce1352 100644
--- a/tools/testing/selftests/kselftest.h
+++ b/tools/testing/selftests/kselftest.h
@@ -51,6 +51,7 @@
#include <stdarg.h>
#include <string.h>
#include <stdio.h>
+#include <sys/utsname.h>
#endif
#ifndef ARRAY_SIZE
@@ -79,6 +80,9 @@
#define KSFT_XPASS 3
#define KSFT_SKIP 4
+#ifndef __noreturn
+#define __noreturn __attribute__((__noreturn__))
+#endif
#define __printf(a, b) __attribute__((format(printf, a, b)))
/* counters */
@@ -288,24 +292,26 @@ void ksft_test_result_code(int exit_code, const char *test_name,
}
/* Docs seem to call for double space if directive is absent */
- if (!directive[0] && msg[0])
+ if (!directive[0] && msg)
directive = " # ";
- va_start(args, msg);
printf("%s %u %s%s", tap_code, ksft_test_num(), test_name, directive);
errno = saved_errno;
- vprintf(msg, args);
+ if (msg) {
+ va_start(args, msg);
+ vprintf(msg, args);
+ va_end(args);
+ }
printf("\n");
- va_end(args);
}
-static inline int ksft_exit_pass(void)
+static inline __noreturn int ksft_exit_pass(void)
{
ksft_print_cnts();
exit(KSFT_PASS);
}
-static inline int ksft_exit_fail(void)
+static inline __noreturn int ksft_exit_fail(void)
{
ksft_print_cnts();
exit(KSFT_FAIL);
@@ -332,7 +338,7 @@ static inline int ksft_exit_fail(void)
ksft_cnt.ksft_xfail + \
ksft_cnt.ksft_xskip)
-static inline __printf(1, 2) int ksft_exit_fail_msg(const char *msg, ...)
+static inline __noreturn __printf(1, 2) int ksft_exit_fail_msg(const char *msg, ...)
{
int saved_errno = errno;
va_list args;
@@ -347,19 +353,19 @@ static inline __printf(1, 2) int ksft_exit_fail_msg(const char *msg, ...)
exit(KSFT_FAIL);
}
-static inline int ksft_exit_xfail(void)
+static inline __noreturn int ksft_exit_xfail(void)
{
ksft_print_cnts();
exit(KSFT_XFAIL);
}
-static inline int ksft_exit_xpass(void)
+static inline __noreturn int ksft_exit_xpass(void)
{
ksft_print_cnts();
exit(KSFT_XPASS);
}
-static inline __printf(1, 2) int ksft_exit_skip(const char *msg, ...)
+static inline __noreturn __printf(1, 2) int ksft_exit_skip(const char *msg, ...)
{
int saved_errno = errno;
va_list args;
@@ -388,4 +394,21 @@ static inline __printf(1, 2) int ksft_exit_skip(const char *msg, ...)
exit(KSFT_SKIP);
}
+static inline int ksft_min_kernel_version(unsigned int min_major,
+ unsigned int min_minor)
+{
+#ifdef NOLIBC
+ ksft_print_msg("NOLIBC: Can't check kernel version: Function not implemented\n");
+ return 0;
+#else
+ unsigned int major, minor;
+ struct utsname info;
+
+ if (uname(&info) || sscanf(info.release, "%u.%u.", &major, &minor) != 2)
+ ksft_exit_fail_msg("Can't parse kernel version\n");
+
+ return major > min_major || (major == min_major && minor >= min_minor);
+#endif
+}
+
#endif /* __KSELFTEST_H */
diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h
index 4fd735e48ee7ee..d98702b6955df2 100644
--- a/tools/testing/selftests/kselftest_harness.h
+++ b/tools/testing/selftests/kselftest_harness.h
@@ -56,7 +56,6 @@
#include <asm/types.h>
#include <ctype.h>
#include <errno.h>
-#include <limits.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
@@ -383,6 +382,7 @@
FIXTURE_DATA(fixture_name) self; \
pid_t child = 1; \
int status = 0; \
+ bool jmp = false; \
memset(&self, 0, sizeof(FIXTURE_DATA(fixture_name))); \
if (setjmp(_metadata->env) == 0) { \
/* Use the same _metadata. */ \
@@ -399,8 +399,10 @@
_metadata->exit_code = KSFT_FAIL; \
} \
} \
+ else \
+ jmp = true; \
if (child == 0) { \
- if (_metadata->setup_completed && !_metadata->teardown_parent) \
+ if (_metadata->setup_completed && !_metadata->teardown_parent && !jmp) \
fixture_name##_teardown(_metadata, &self, variant->data); \
_exit(0); \
} \
@@ -1156,7 +1158,7 @@ void __run_test(struct __fixture_metadata *f,
struct __test_metadata *t)
{
struct __test_xfail *xfail;
- char test_name[LINE_MAX];
+ char *test_name;
const char *diagnostic;
/* reset test struct */
@@ -1164,8 +1166,12 @@ void __run_test(struct __fixture_metadata *f,
t->trigger = 0;
memset(t->results->reason, 0, sizeof(t->results->reason));
- snprintf(test_name, sizeof(test_name), "%s%s%s.%s",
- f->name, variant->name[0] ? "." : "", variant->name, t->name);
+ if (asprintf(&test_name, "%s%s%s.%s", f->name,
+ variant->name[0] ? "." : "", variant->name, t->name) == -1) {
+ ksft_print_msg("ERROR ALLOCATING MEMORY\n");
+ t->exit_code = KSFT_FAIL;
+ _exit(t->exit_code);
+ }
ksft_print_msg(" RUN %s ...\n", test_name);
@@ -1202,7 +1208,8 @@ void __run_test(struct __fixture_metadata *f,
diagnostic = "unknown";
ksft_test_result_code(t->exit_code, test_name,
- diagnostic ? "%s" : "", diagnostic);
+ diagnostic ? "%s" : NULL, diagnostic);
+ free(test_name);
}
static int test_harness_run(int argc, char **argv)
diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer.c b/tools/testing/selftests/kvm/aarch64/arch_timer.c
index ddba2c2fb5deb1..4eaba83cdcf3d2 100644
--- a/tools/testing/selftests/kvm/aarch64/arch_timer.c
+++ b/tools/testing/selftests/kvm/aarch64/arch_timer.c
@@ -135,8 +135,8 @@ static void guest_run_stage(struct test_vcpu_shared_data *shared_data,
irq_iter = READ_ONCE(shared_data->nr_iter);
__GUEST_ASSERT(config_iter + 1 == irq_iter,
- "config_iter + 1 = 0x%lx, irq_iter = 0x%lx.\n"
- " Guest timer interrupt was not trigged within the specified\n"
+ "config_iter + 1 = 0x%x, irq_iter = 0x%x.\n"
+ " Guest timer interrupt was not triggered within the specified\n"
" interval, try to increase the error margin by [-e] option.\n",
config_iter + 1, irq_iter);
}
diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 3bd03b088dda60..81ce37ec407dd1 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -1037,8 +1037,19 @@ static inline void vcpu_set_cpuid(struct kvm_vcpu *vcpu)
void vcpu_set_cpuid_property(struct kvm_vcpu *vcpu,
struct kvm_x86_cpu_property property,
uint32_t value);
+void vcpu_set_cpuid_maxphyaddr(struct kvm_vcpu *vcpu, uint8_t maxphyaddr);
void vcpu_clear_cpuid_entry(struct kvm_vcpu *vcpu, uint32_t function);
+
+static inline bool vcpu_cpuid_has(struct kvm_vcpu *vcpu,
+ struct kvm_x86_cpu_feature feature)
+{
+ struct kvm_cpuid_entry2 *entry;
+
+ entry = __vcpu_get_cpuid_entry(vcpu, feature.function, feature.index);
+ return *((&entry->eax) + feature.reg) & BIT(feature.bit);
+}
+
void vcpu_set_or_clear_cpuid_feature(struct kvm_vcpu *vcpu,
struct kvm_x86_cpu_feature feature,
bool set);
diff --git a/tools/testing/selftests/kvm/max_guest_memory_test.c b/tools/testing/selftests/kvm/max_guest_memory_test.c
index 6628dc4dda89f3..1a6da7389bf1f5 100644
--- a/tools/testing/selftests/kvm/max_guest_memory_test.c
+++ b/tools/testing/selftests/kvm/max_guest_memory_test.c
@@ -22,10 +22,11 @@ static void guest_code(uint64_t start_gpa, uint64_t end_gpa, uint64_t stride)
{
uint64_t gpa;
- for (gpa = start_gpa; gpa < end_gpa; gpa += stride)
- *((volatile uint64_t *)gpa) = gpa;
-
- GUEST_DONE();
+ for (;;) {
+ for (gpa = start_gpa; gpa < end_gpa; gpa += stride)
+ *((volatile uint64_t *)gpa) = gpa;
+ GUEST_SYNC(0);
+ }
}
struct vcpu_info {
@@ -55,7 +56,7 @@ static void rendezvous_with_boss(void)
static void run_vcpu(struct kvm_vcpu *vcpu)
{
vcpu_run(vcpu);
- TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
+ TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_SYNC);
}
static void *vcpu_worker(void *data)
@@ -64,17 +65,13 @@ static void *vcpu_worker(void *data)
struct kvm_vcpu *vcpu = info->vcpu;
struct kvm_vm *vm = vcpu->vm;
struct kvm_sregs sregs;
- struct kvm_regs regs;
vcpu_args_set(vcpu, 3, info->start_gpa, info->end_gpa, vm->page_size);
- /* Snapshot regs before the first run. */
- vcpu_regs_get(vcpu, &regs);
rendezvous_with_boss();
run_vcpu(vcpu);
rendezvous_with_boss();
- vcpu_regs_set(vcpu, &regs);
vcpu_sregs_get(vcpu, &sregs);
#ifdef __x86_64__
/* Toggle CR0.WP to trigger a MMU context reset. */
diff --git a/tools/testing/selftests/kvm/riscv/arch_timer.c b/tools/testing/selftests/kvm/riscv/arch_timer.c
index e22848f747c015..0f9cabd99fd451 100644
--- a/tools/testing/selftests/kvm/riscv/arch_timer.c
+++ b/tools/testing/selftests/kvm/riscv/arch_timer.c
@@ -60,7 +60,7 @@ static void guest_run(struct test_vcpu_shared_data *shared_data)
irq_iter = READ_ONCE(shared_data->nr_iter);
__GUEST_ASSERT(config_iter + 1 == irq_iter,
"config_iter + 1 = 0x%x, irq_iter = 0x%x.\n"
- " Guest timer interrupt was not trigged within the specified\n"
+ " Guest timer interrupt was not triggered within the specified\n"
" interval, try to increase the error margin by [-e] option.\n",
config_iter + 1, irq_iter);
}
diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c
index 06b43ed23580b6..bd57d991e27d85 100644
--- a/tools/testing/selftests/kvm/set_memory_region_test.c
+++ b/tools/testing/selftests/kvm/set_memory_region_test.c
@@ -333,7 +333,7 @@ static void test_invalid_memory_region_flags(void)
struct kvm_vm *vm;
int r, i;
-#if defined __aarch64__ || defined __x86_64__
+#if defined __aarch64__ || defined __riscv || defined __x86_64__
supported_flags |= KVM_MEM_READONLY;
#endif
diff --git a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c
index 9e2879af7c201f..40cc59f4e65013 100644
--- a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c
+++ b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c
@@ -133,6 +133,43 @@ static void enter_guest(struct kvm_vcpu *vcpu)
}
}
+static void test_pv_unhalt(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct kvm_cpuid_entry2 *ent;
+ u32 kvm_sig_old;
+
+ pr_info("testing KVM_FEATURE_PV_UNHALT\n");
+
+ TEST_REQUIRE(KVM_CAP_X86_DISABLE_EXITS);
+
+ /* KVM_PV_UNHALT test */
+ vm = vm_create_with_one_vcpu(&vcpu, guest_main);
+ vcpu_set_cpuid_feature(vcpu, X86_FEATURE_KVM_PV_UNHALT);
+
+ TEST_ASSERT(vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT),
+ "Enabling X86_FEATURE_KVM_PV_UNHALT had no effect");
+
+ /* Make sure KVM clears vcpu->arch.kvm_cpuid */
+ ent = vcpu_get_cpuid_entry(vcpu, KVM_CPUID_SIGNATURE);
+ kvm_sig_old = ent->ebx;
+ ent->ebx = 0xdeadbeef;
+ vcpu_set_cpuid(vcpu);
+
+ vm_enable_cap(vm, KVM_CAP_X86_DISABLE_EXITS, KVM_X86_DISABLE_EXITS_HLT);
+ ent = vcpu_get_cpuid_entry(vcpu, KVM_CPUID_SIGNATURE);
+ ent->ebx = kvm_sig_old;
+ vcpu_set_cpuid(vcpu);
+
+ TEST_ASSERT(!vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT),
+ "KVM_FEATURE_PV_UNHALT is set with KVM_CAP_X86_DISABLE_EXITS");
+
+ /* FIXME: actually test KVM_FEATURE_PV_UNHALT feature */
+
+ kvm_vm_free(vm);
+}
+
int main(void)
{
struct kvm_vcpu *vcpu;
@@ -151,4 +188,6 @@ int main(void)
enter_guest(vcpu);
kvm_vm_free(vm);
+
+ test_pv_unhalt();
}
diff --git a/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c b/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c
index 29609b52f8fa0c..26c85815f7e983 100644
--- a/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c
+++ b/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c
@@ -416,12 +416,30 @@ static void guest_rd_wr_counters(uint32_t base_msr, uint8_t nr_possible_counters
static void guest_test_gp_counters(void)
{
+ uint8_t pmu_version = guest_get_pmu_version();
uint8_t nr_gp_counters = 0;
uint32_t base_msr;
- if (guest_get_pmu_version())
+ if (pmu_version)
nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
+ /*
+ * For v2+ PMUs, PERF_GLOBAL_CTRL's architectural post-RESET value is
+ * "Sets bits n-1:0 and clears the upper bits", where 'n' is the number
+ * of GP counters. If there are no GP counters, require KVM to leave
+ * PERF_GLOBAL_CTRL '0'. This edge case isn't covered by the SDM, but
+ * follow the spirit of the architecture and only globally enable GP
+ * counters, of which there are none.
+ */
+ if (pmu_version > 1) {
+ uint64_t global_ctrl = rdmsr(MSR_CORE_PERF_GLOBAL_CTRL);
+
+ if (nr_gp_counters)
+ GUEST_ASSERT_EQ(global_ctrl, GENMASK_ULL(nr_gp_counters - 1, 0));
+ else
+ GUEST_ASSERT_EQ(global_ctrl, 0);
+ }
+
if (this_cpu_has(X86_FEATURE_PDCM) &&
rdmsr(MSR_IA32_PERF_CAPABILITIES) & PMU_CAP_FW_WRITES)
base_msr = MSR_IA32_PMC0;
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
index 7f6f5f23fb9b67..977948fd52e6b8 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
@@ -28,16 +28,16 @@
#define NESTED_TEST_MEM1 0xc0001000
#define NESTED_TEST_MEM2 0xc0002000
-static void l2_guest_code(void)
+static void l2_guest_code(u64 *a, u64 *b)
{
- *(volatile uint64_t *)NESTED_TEST_MEM1;
- *(volatile uint64_t *)NESTED_TEST_MEM1 = 1;
+ READ_ONCE(*a);
+ WRITE_ONCE(*a, 1);
GUEST_SYNC(true);
GUEST_SYNC(false);
- *(volatile uint64_t *)NESTED_TEST_MEM2 = 1;
+ WRITE_ONCE(*b, 1);
GUEST_SYNC(true);
- *(volatile uint64_t *)NESTED_TEST_MEM2 = 1;
+ WRITE_ONCE(*b, 1);
GUEST_SYNC(true);
GUEST_SYNC(false);
@@ -45,17 +45,33 @@ static void l2_guest_code(void)
vmcall();
}
+static void l2_guest_code_ept_enabled(void)
+{
+ l2_guest_code((u64 *)NESTED_TEST_MEM1, (u64 *)NESTED_TEST_MEM2);
+}
+
+static void l2_guest_code_ept_disabled(void)
+{
+ /* Access the same L1 GPAs as l2_guest_code_ept_enabled() */
+ l2_guest_code((u64 *)GUEST_TEST_MEM, (u64 *)GUEST_TEST_MEM);
+}
+
void l1_guest_code(struct vmx_pages *vmx)
{
#define L2_GUEST_STACK_SIZE 64
unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ void *l2_rip;
GUEST_ASSERT(vmx->vmcs_gpa);
GUEST_ASSERT(prepare_for_vmx_operation(vmx));
GUEST_ASSERT(load_vmcs(vmx));
- prepare_vmcs(vmx, l2_guest_code,
- &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+ if (vmx->eptp_gpa)
+ l2_rip = l2_guest_code_ept_enabled;
+ else
+ l2_rip = l2_guest_code_ept_disabled;
+
+ prepare_vmcs(vmx, l2_rip, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
GUEST_SYNC(false);
GUEST_ASSERT(!vmlaunch());
@@ -64,7 +80,7 @@ void l1_guest_code(struct vmx_pages *vmx)
GUEST_DONE();
}
-int main(int argc, char *argv[])
+static void test_vmx_dirty_log(bool enable_ept)
{
vm_vaddr_t vmx_pages_gva = 0;
struct vmx_pages *vmx;
@@ -76,8 +92,7 @@ int main(int argc, char *argv[])
struct ucall uc;
bool done = false;
- TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
- TEST_REQUIRE(kvm_cpu_has_ept());
+ pr_info("Nested EPT: %s\n", enable_ept ? "enabled" : "disabled");
/* Create VM */
vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
@@ -103,11 +118,16 @@ int main(int argc, char *argv[])
*
* Note that prepare_eptp should be called only L1's GPA map is done,
* meaning after the last call to virt_map.
+ *
+ * When EPT is disabled, the L2 guest code will still access the same L1
+ * GPAs as the EPT enabled case.
*/
- prepare_eptp(vmx, vm, 0);
- nested_map_memslot(vmx, vm, 0);
- nested_map(vmx, vm, NESTED_TEST_MEM1, GUEST_TEST_MEM, 4096);
- nested_map(vmx, vm, NESTED_TEST_MEM2, GUEST_TEST_MEM, 4096);
+ if (enable_ept) {
+ prepare_eptp(vmx, vm, 0);
+ nested_map_memslot(vmx, vm, 0);
+ nested_map(vmx, vm, NESTED_TEST_MEM1, GUEST_TEST_MEM, 4096);
+ nested_map(vmx, vm, NESTED_TEST_MEM2, GUEST_TEST_MEM, 4096);
+ }
bmap = bitmap_zalloc(TEST_MEM_PAGES);
host_test_mem = addr_gpa2hva(vm, GUEST_TEST_MEM);
@@ -148,3 +168,15 @@ int main(int argc, char *argv[])
}
}
}
+
+int main(int argc, char *argv[])
+{
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+
+ test_vmx_dirty_log(/*enable_ept=*/false);
+
+ if (kvm_cpu_has_ept())
+ test_vmx_dirty_log(/*enable_ept=*/true);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index da2cade3bab0e6..1dae4a02957f9d 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -48,6 +48,15 @@ ifeq ($(KHDR_INCLUDES),)
KHDR_INCLUDES := -isystem $(top_srcdir)/usr/include
endif
+# In order to use newer items that haven't yet been added to the user's system
+# header files, add $(TOOLS_INCLUDES) to the compiler invocation in each
+# each selftest.
+# You may need to add files to that location, or to refresh an existing file. In
+# order to do that, run "make headers" from $(top_srcdir), then copy the
+# header file that you want from $(top_srcdir)/usr/include/... , to the matching
+# subdir in $(TOOLS_INCLUDE).
+TOOLS_INCLUDES := -isystem $(top_srcdir)/tools/include/uapi
+
# The following are built by lib.mk common compile rules.
# TEST_CUSTOM_PROGS should be used by tests that require
# custom build rule and prevent common build rule use.
diff --git a/tools/testing/selftests/mm/.gitignore b/tools/testing/selftests/mm/.gitignore
index d26e962f2ac490..0b9ab987601cca 100644
--- a/tools/testing/selftests/mm/.gitignore
+++ b/tools/testing/selftests/mm/.gitignore
@@ -47,3 +47,5 @@ mkdirty
va_high_addr_switch
hugetlb_fault_after_madv
hugetlb_madv_vs_map
+mseal_test
+seal_elf
diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile
index eb5f39a2668b44..1b07a48fc2ba4a 100644
--- a/tools/testing/selftests/mm/Makefile
+++ b/tools/testing/selftests/mm/Makefile
@@ -32,7 +32,7 @@ endif
# LDLIBS.
MAKEFLAGS += --no-builtin-rules
-CFLAGS = -Wall -I $(top_srcdir) $(EXTRA_CFLAGS) $(KHDR_INCLUDES)
+CFLAGS = -Wall -I $(top_srcdir) $(EXTRA_CFLAGS) $(KHDR_INCLUDES) $(TOOLS_INCLUDES)
LDLIBS = -lrt -lpthread -lm
TEST_GEN_FILES = cow
@@ -59,6 +59,8 @@ TEST_GEN_FILES += mlock2-tests
TEST_GEN_FILES += mrelease_test
TEST_GEN_FILES += mremap_dontunmap
TEST_GEN_FILES += mremap_test
+TEST_GEN_FILES += mseal_test
+TEST_GEN_FILES += seal_elf
TEST_GEN_FILES += on-fault-limit
TEST_GEN_FILES += pagemap_ioctl
TEST_GEN_FILES += thuge-gen
diff --git a/tools/testing/selftests/mm/gup_test.c b/tools/testing/selftests/mm/gup_test.c
index cbe99594d319b4..18a49c70d4c635 100644
--- a/tools/testing/selftests/mm/gup_test.c
+++ b/tools/testing/selftests/mm/gup_test.c
@@ -203,7 +203,7 @@ int main(int argc, char **argv)
ksft_print_header();
ksft_set_plan(nthreads);
- filed = open(file, O_RDWR|O_CREAT);
+ filed = open(file, O_RDWR|O_CREAT, 0664);
if (filed < 0)
ksft_exit_fail_msg("Unable to open %s: %s\n", file, strerror(errno));
diff --git a/tools/testing/selftests/mm/ksm_functional_tests.c b/tools/testing/selftests/mm/ksm_functional_tests.c
index d615767e396bec..db845dca8d1950 100644
--- a/tools/testing/selftests/mm/ksm_functional_tests.c
+++ b/tools/testing/selftests/mm/ksm_functional_tests.c
@@ -28,6 +28,15 @@
#define MiB (1024 * KiB)
#define FORK_EXEC_CHILD_PRG_NAME "ksm_fork_exec_child"
+#define MAP_MERGE_FAIL ((void *)-1)
+#define MAP_MERGE_SKIP ((void *)-2)
+
+enum ksm_merge_mode {
+ KSM_MERGE_PRCTL,
+ KSM_MERGE_MADVISE,
+ KSM_MERGE_NONE, /* PRCTL already set */
+};
+
static int mem_fd;
static int ksm_fd;
static int ksm_full_scans_fd;
@@ -146,33 +155,34 @@ static int ksm_unmerge(void)
return 0;
}
-static char *mmap_and_merge_range(char val, unsigned long size, int prot,
- bool use_prctl)
+static char *__mmap_and_merge_range(char val, unsigned long size, int prot,
+ enum ksm_merge_mode mode)
{
char *map;
+ char *err_map = MAP_MERGE_FAIL;
int ret;
/* Stabilize accounting by disabling KSM completely. */
if (ksm_unmerge()) {
- ksft_test_result_fail("Disabling (unmerging) KSM failed\n");
- return MAP_FAILED;
+ ksft_print_msg("Disabling (unmerging) KSM failed\n");
+ return err_map;
}
if (get_my_merging_pages() > 0) {
- ksft_test_result_fail("Still pages merged\n");
- return MAP_FAILED;
+ ksft_print_msg("Still pages merged\n");
+ return err_map;
}
map = mmap(NULL, size, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_ANON, -1, 0);
if (map == MAP_FAILED) {
- ksft_test_result_fail("mmap() failed\n");
- return MAP_FAILED;
+ ksft_print_msg("mmap() failed\n");
+ return err_map;
}
/* Don't use THP. Ignore if THP are not around on a kernel. */
if (madvise(map, size, MADV_NOHUGEPAGE) && errno != EINVAL) {
- ksft_test_result_fail("MADV_NOHUGEPAGE failed\n");
+ ksft_print_msg("MADV_NOHUGEPAGE failed\n");
goto unmap;
}
@@ -180,27 +190,36 @@ static char *mmap_and_merge_range(char val, unsigned long size, int prot,
memset(map, val, size);
if (mprotect(map, size, prot)) {
- ksft_test_result_skip("mprotect() failed\n");
+ ksft_print_msg("mprotect() failed\n");
+ err_map = MAP_MERGE_SKIP;
goto unmap;
}
- if (use_prctl) {
+ switch (mode) {
+ case KSM_MERGE_PRCTL:
ret = prctl(PR_SET_MEMORY_MERGE, 1, 0, 0, 0);
if (ret < 0 && errno == EINVAL) {
- ksft_test_result_skip("PR_SET_MEMORY_MERGE not supported\n");
+ ksft_print_msg("PR_SET_MEMORY_MERGE not supported\n");
+ err_map = MAP_MERGE_SKIP;
goto unmap;
} else if (ret) {
- ksft_test_result_fail("PR_SET_MEMORY_MERGE=1 failed\n");
+ ksft_print_msg("PR_SET_MEMORY_MERGE=1 failed\n");
goto unmap;
}
- } else if (madvise(map, size, MADV_MERGEABLE)) {
- ksft_test_result_fail("MADV_MERGEABLE failed\n");
- goto unmap;
+ break;
+ case KSM_MERGE_MADVISE:
+ if (madvise(map, size, MADV_MERGEABLE)) {
+ ksft_print_msg("MADV_MERGEABLE failed\n");
+ goto unmap;
+ }
+ break;
+ case KSM_MERGE_NONE:
+ break;
}
/* Run KSM to trigger merging and wait. */
if (ksm_merge()) {
- ksft_test_result_fail("Running KSM failed\n");
+ ksft_print_msg("Running KSM failed\n");
goto unmap;
}
@@ -209,14 +228,31 @@ static char *mmap_and_merge_range(char val, unsigned long size, int prot,
* accounted differently (depending on kernel support).
*/
if (val && !get_my_merging_pages()) {
- ksft_test_result_fail("No pages got merged\n");
+ ksft_print_msg("No pages got merged\n");
goto unmap;
}
return map;
unmap:
munmap(map, size);
- return MAP_FAILED;
+ return err_map;
+}
+
+static char *mmap_and_merge_range(char val, unsigned long size, int prot,
+ enum ksm_merge_mode mode)
+{
+ char *map;
+ char *ret = MAP_FAILED;
+
+ map = __mmap_and_merge_range(val, size, prot, mode);
+ if (map == MAP_MERGE_FAIL)
+ ksft_test_result_fail("Merging memory failed");
+ else if (map == MAP_MERGE_SKIP)
+ ksft_test_result_skip("Merging memory skipped");
+ else
+ ret = map;
+
+ return ret;
}
static void test_unmerge(void)
@@ -226,7 +262,7 @@ static void test_unmerge(void)
ksft_print_msg("[RUN] %s\n", __func__);
- map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, false);
+ map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, KSM_MERGE_MADVISE);
if (map == MAP_FAILED)
return;
@@ -264,7 +300,7 @@ static void test_unmerge_zero_pages(void)
}
/* Let KSM deduplicate zero pages. */
- map = mmap_and_merge_range(0x00, size, PROT_READ | PROT_WRITE, false);
+ map = mmap_and_merge_range(0x00, size, PROT_READ | PROT_WRITE, KSM_MERGE_MADVISE);
if (map == MAP_FAILED)
return;
@@ -312,7 +348,7 @@ static void test_unmerge_discarded(void)
ksft_print_msg("[RUN] %s\n", __func__);
- map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, false);
+ map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, KSM_MERGE_MADVISE);
if (map == MAP_FAILED)
return;
@@ -344,7 +380,7 @@ static void test_unmerge_uffd_wp(void)
ksft_print_msg("[RUN] %s\n", __func__);
- map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, false);
+ map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, KSM_MERGE_MADVISE);
if (map == MAP_FAILED)
return;
@@ -439,6 +475,36 @@ static void test_prctl(void)
ksft_test_result_pass("Setting/clearing PR_SET_MEMORY_MERGE works\n");
}
+static int test_child_ksm(void)
+{
+ const unsigned int size = 2 * MiB;
+ char *map;
+
+ /* Test if KSM is enabled for the process. */
+ if (prctl(PR_GET_MEMORY_MERGE, 0, 0, 0, 0) != 1)
+ return -1;
+
+ /* Test if merge could really happen. */
+ map = __mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, KSM_MERGE_NONE);
+ if (map == MAP_MERGE_FAIL)
+ return -2;
+ else if (map == MAP_MERGE_SKIP)
+ return -3;
+
+ munmap(map, size);
+ return 0;
+}
+
+static void test_child_ksm_err(int status)
+{
+ if (status == -1)
+ ksft_test_result_fail("unexpected PR_GET_MEMORY_MERGE result in child\n");
+ else if (status == -2)
+ ksft_test_result_fail("Merge in child failed\n");
+ else if (status == -3)
+ ksft_test_result_skip("Merge in child skipped\n");
+}
+
/* Verify that prctl ksm flag is inherited. */
static void test_prctl_fork(void)
{
@@ -458,7 +524,7 @@ static void test_prctl_fork(void)
child_pid = fork();
if (!child_pid) {
- exit(prctl(PR_GET_MEMORY_MERGE, 0, 0, 0, 0));
+ exit(test_child_ksm());
} else if (child_pid < 0) {
ksft_test_result_fail("fork() failed\n");
return;
@@ -467,8 +533,11 @@ static void test_prctl_fork(void)
if (waitpid(child_pid, &status, 0) < 0) {
ksft_test_result_fail("waitpid() failed\n");
return;
- } else if (WEXITSTATUS(status) != 1) {
- ksft_test_result_fail("unexpected PR_GET_MEMORY_MERGE result in child\n");
+ }
+
+ status = WEXITSTATUS(status);
+ if (status) {
+ test_child_ksm_err(status);
return;
}
@@ -480,12 +549,6 @@ static void test_prctl_fork(void)
ksft_test_result_pass("PR_SET_MEMORY_MERGE value is inherited\n");
}
-static int ksm_fork_exec_child(void)
-{
- /* Test if KSM is enabled for the process. */
- return prctl(PR_GET_MEMORY_MERGE, 0, 0, 0, 0) == 1;
-}
-
static void test_prctl_fork_exec(void)
{
int ret, status;
@@ -518,7 +581,7 @@ static void test_prctl_fork_exec(void)
if (WIFEXITED(status)) {
status = WEXITSTATUS(status);
if (status) {
- ksft_test_result_fail("KSM not enabled\n");
+ test_child_ksm_err(status);
return;
}
} else {
@@ -545,7 +608,7 @@ static void test_prctl_unmerge(void)
ksft_print_msg("[RUN] %s\n", __func__);
- map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, true);
+ map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, KSM_MERGE_PRCTL);
if (map == MAP_FAILED)
return;
@@ -568,7 +631,7 @@ static void test_prot_none(void)
ksft_print_msg("[RUN] %s\n", __func__);
- map = mmap_and_merge_range(0x11, size, PROT_NONE, false);
+ map = mmap_and_merge_range(0x11, size, PROT_NONE, KSM_MERGE_MADVISE);
if (map == MAP_FAILED)
goto unmap;
@@ -599,7 +662,7 @@ int main(int argc, char **argv)
int err;
if (argc > 1 && !strcmp(argv[1], FORK_EXEC_CHILD_PRG_NAME)) {
- exit(ksm_fork_exec_child() == 1 ? 0 : 1);
+ exit(test_child_ksm());
}
#ifdef __NR_userfaultfd
diff --git a/tools/testing/selftests/mm/mdwe_test.c b/tools/testing/selftests/mm/mdwe_test.c
index 200bedcdc32e9c..1e01d3ddc11c58 100644
--- a/tools/testing/selftests/mm/mdwe_test.c
+++ b/tools/testing/selftests/mm/mdwe_test.c
@@ -7,6 +7,7 @@
#include <linux/mman.h>
#include <linux/prctl.h>
+#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <sys/auxv.h>
diff --git a/tools/testing/selftests/mm/memfd_secret.c b/tools/testing/selftests/mm/memfd_secret.c
index 9b298f6a04b371..9a0597310a7651 100644
--- a/tools/testing/selftests/mm/memfd_secret.c
+++ b/tools/testing/selftests/mm/memfd_secret.c
@@ -20,6 +20,7 @@
#include <unistd.h>
#include <errno.h>
#include <stdio.h>
+#include <fcntl.h>
#include "../kselftest.h"
@@ -83,6 +84,45 @@ static void test_mlock_limit(int fd)
pass("mlock limit is respected\n");
}
+static void test_vmsplice(int fd, const char *desc)
+{
+ ssize_t transferred;
+ struct iovec iov;
+ int pipefd[2];
+ char *mem;
+
+ if (pipe(pipefd)) {
+ fail("pipe failed: %s\n", strerror(errno));
+ return;
+ }
+
+ mem = mmap(NULL, page_size, prot, mode, fd, 0);
+ if (mem == MAP_FAILED) {
+ fail("Unable to mmap secret memory\n");
+ goto close_pipe;
+ }
+
+ /*
+ * vmsplice() may use GUP-fast, which must also fail. Prefault the
+ * page table, so GUP-fast could find it.
+ */
+ memset(mem, PATTERN, page_size);
+
+ iov.iov_base = mem;
+ iov.iov_len = page_size;
+ transferred = vmsplice(pipefd[1], &iov, 1, 0);
+
+ if (transferred < 0 && errno == EFAULT)
+ pass("vmsplice is blocked as expected with %s\n", desc);
+ else
+ fail("vmsplice: unexpected memory access with %s\n", desc);
+
+ munmap(mem, page_size);
+close_pipe:
+ close(pipefd[0]);
+ close(pipefd[1]);
+}
+
static void try_process_vm_read(int fd, int pipefd[2])
{
struct iovec liov, riov;
@@ -187,7 +227,6 @@ static void test_remote_access(int fd, const char *name,
return;
}
- ftruncate(fd, page_size);
memset(mem, PATTERN, page_size);
if (write(pipefd[1], &mem, sizeof(mem)) < 0) {
@@ -258,7 +297,7 @@ static void prepare(void)
strerror(errno));
}
-#define NUM_TESTS 4
+#define NUM_TESTS 6
int main(int argc, char *argv[])
{
@@ -277,9 +316,17 @@ int main(int argc, char *argv[])
ksft_exit_fail_msg("memfd_secret failed: %s\n",
strerror(errno));
}
+ if (ftruncate(fd, page_size))
+ ksft_exit_fail_msg("ftruncate failed: %s\n", strerror(errno));
test_mlock_limit(fd);
test_file_apis(fd);
+ /*
+ * We have to run the first vmsplice test before any secretmem page was
+ * allocated for this fd.
+ */
+ test_vmsplice(fd, "fresh page");
+ test_vmsplice(fd, "existing page");
test_process_vm_read(fd);
test_ptrace(fd);
diff --git a/tools/testing/selftests/mm/mlock2-tests.c b/tools/testing/selftests/mm/mlock2-tests.c
index 26f744188ad0c8..7f0d50fa361dcf 100644
--- a/tools/testing/selftests/mm/mlock2-tests.c
+++ b/tools/testing/selftests/mm/mlock2-tests.c
@@ -20,8 +20,6 @@ static int get_vm_area(unsigned long addr, struct vm_boundaries *area)
FILE *file;
int ret = 1;
char line[1024] = {0};
- char *end_addr;
- char *stop;
unsigned long start;
unsigned long end;
@@ -37,21 +35,10 @@ static int get_vm_area(unsigned long addr, struct vm_boundaries *area)
memset(area, 0, sizeof(struct vm_boundaries));
while(fgets(line, 1024, file)) {
- end_addr = strchr(line, '-');
- if (!end_addr) {
+ if (sscanf(line, "%lx-%lx", &start, &end) != 2) {
ksft_print_msg("cannot parse /proc/self/maps\n");
goto out;
}
- *end_addr = '\0';
- end_addr++;
- stop = strchr(end_addr, ' ');
- if (!stop) {
- ksft_print_msg("cannot parse /proc/self/maps\n");
- goto out;
- }
-
- sscanf(line, "%lx", &start);
- sscanf(end_addr, "%lx", &end);
if (start <= addr && end > addr) {
area->start = start;
diff --git a/tools/testing/selftests/mm/mremap_test.c b/tools/testing/selftests/mm/mremap_test.c
index 2f8b991f78cb4c..1b03bcfaefdfa2 100644
--- a/tools/testing/selftests/mm/mremap_test.c
+++ b/tools/testing/selftests/mm/mremap_test.c
@@ -23,6 +23,7 @@
#define VALIDATION_NO_THRESHOLD 0 /* Verify the entire region */
#define MIN(X, Y) ((X) < (Y) ? (X) : (Y))
+#define MAX(X, Y) ((X) > (Y) ? (X) : (Y))
#define SIZE_MB(m) ((size_t)m * (1024 * 1024))
#define SIZE_KB(k) ((size_t)k * 1024)
@@ -69,6 +70,27 @@ enum {
.expect_failure = should_fail \
}
+/* compute square root using binary search */
+static unsigned long get_sqrt(unsigned long val)
+{
+ unsigned long low = 1;
+
+ /* assuming rand_size is less than 1TB */
+ unsigned long high = (1UL << 20);
+
+ while (low <= high) {
+ unsigned long mid = low + (high - low) / 2;
+ unsigned long temp = mid * mid;
+
+ if (temp == val)
+ return mid;
+ if (temp < val)
+ low = mid + 1;
+ high = mid - 1;
+ }
+ return low;
+}
+
/*
* Returns false if the requested remap region overlaps with an
* existing mapping (e.g text, stack) else returns true.
@@ -126,19 +148,21 @@ static unsigned long long get_mmap_min_addr(void)
* Using /proc/self/maps, assert that the specified address range is contained
* within a single mapping.
*/
-static bool is_range_mapped(FILE *maps_fp, void *start, void *end)
+static bool is_range_mapped(FILE *maps_fp, unsigned long start,
+ unsigned long end)
{
char *line = NULL;
size_t len = 0;
bool success = false;
+ unsigned long first_val, second_val;
rewind(maps_fp);
while (getline(&line, &len, maps_fp) != -1) {
- char *first = strtok(line, "- ");
- void *first_val = (void *)strtol(first, NULL, 16);
- char *second = strtok(NULL, "- ");
- void *second_val = (void *) strtol(second, NULL, 16);
+ if (sscanf(line, "%lx-%lx", &first_val, &second_val) != 2) {
+ ksft_exit_fail_msg("cannot parse /proc/self/maps\n");
+ break;
+ }
if (first_val <= start && second_val >= end) {
success = true;
@@ -233,7 +257,8 @@ static void mremap_expand_merge(FILE *maps_fp, unsigned long page_size)
goto out;
}
- success = is_range_mapped(maps_fp, start, start + 3 * page_size);
+ success = is_range_mapped(maps_fp, (unsigned long)start,
+ (unsigned long)(start + 3 * page_size));
munmap(start, 3 * page_size);
out:
@@ -272,7 +297,8 @@ static void mremap_expand_merge_offset(FILE *maps_fp, unsigned long page_size)
goto out;
}
- success = is_range_mapped(maps_fp, start, start + 3 * page_size);
+ success = is_range_mapped(maps_fp, (unsigned long)start,
+ (unsigned long)(start + 3 * page_size));
munmap(start, 3 * page_size);
out:
@@ -296,7 +322,7 @@ out:
*
* |DDDDddddSSSSssss|
*/
-static void mremap_move_within_range(char pattern_seed)
+static void mremap_move_within_range(unsigned int pattern_seed, char *rand_addr)
{
char *test_name = "mremap mremap move within range";
void *src, *dest;
@@ -316,10 +342,7 @@ static void mremap_move_within_range(char pattern_seed)
src = (void *)((unsigned long)src & ~(SIZE_MB(2) - 1));
/* Set byte pattern for source block. */
- srand(pattern_seed);
- for (i = 0; i < SIZE_MB(2); i++) {
- ((char *)src)[i] = (char) rand();
- }
+ memcpy(src, rand_addr, SIZE_MB(2));
dest = src - SIZE_MB(2);
@@ -357,14 +380,14 @@ out:
/* Returns the time taken for the remap on success else returns -1. */
static long long remap_region(struct config c, unsigned int threshold_mb,
- char pattern_seed)
+ char *rand_addr)
{
void *addr, *src_addr, *dest_addr, *dest_preamble_addr;
- int d;
- unsigned long long t;
+ unsigned long long t, d;
struct timespec t_start = {0, 0}, t_end = {0, 0};
long long start_ns, end_ns, align_mask, ret, offset;
unsigned long long threshold;
+ unsigned long num_chunks;
if (threshold_mb == VALIDATION_NO_THRESHOLD)
threshold = c.region_size;
@@ -378,9 +401,7 @@ static long long remap_region(struct config c, unsigned int threshold_mb,
}
/* Set byte pattern for source block. */
- srand(pattern_seed);
- for (t = 0; t < threshold; t++)
- memset((char *) src_addr + t, (char) rand(), 1);
+ memcpy(src_addr, rand_addr, threshold);
/* Mask to zero out lower bits of address for alignment */
align_mask = ~(c.dest_alignment - 1);
@@ -420,9 +441,7 @@ static long long remap_region(struct config c, unsigned int threshold_mb,
}
/* Set byte pattern for the dest preamble block. */
- srand(pattern_seed);
- for (d = 0; d < c.dest_preamble_size; d++)
- memset((char *) dest_preamble_addr + d, (char) rand(), 1);
+ memcpy(dest_preamble_addr, rand_addr, c.dest_preamble_size);
}
clock_gettime(CLOCK_MONOTONIC, &t_start);
@@ -436,15 +455,42 @@ static long long remap_region(struct config c, unsigned int threshold_mb,
goto clean_up_dest_preamble;
}
- /* Verify byte pattern after remapping */
- srand(pattern_seed);
- for (t = 0; t < threshold; t++) {
- char c = (char) rand();
+ /*
+ * Verify byte pattern after remapping. Employ an algorithm with a
+ * square root time complexity in threshold: divide the range into
+ * chunks, if memcmp() returns non-zero, only then perform an
+ * iteration in that chunk to find the mismatch index.
+ */
+ num_chunks = get_sqrt(threshold);
+ for (unsigned long i = 0; i < num_chunks; ++i) {
+ size_t chunk_size = threshold / num_chunks;
+ unsigned long shift = i * chunk_size;
+
+ if (!memcmp(dest_addr + shift, rand_addr + shift, chunk_size))
+ continue;
+
+ /* brute force iteration only over mismatch segment */
+ for (t = shift; t < shift + chunk_size; ++t) {
+ if (((char *) dest_addr)[t] != rand_addr[t]) {
+ ksft_print_msg("Data after remap doesn't match at offset %llu\n",
+ t);
+ ksft_print_msg("Expected: %#x\t Got: %#x\n", rand_addr[t] & 0xff,
+ ((char *) dest_addr)[t] & 0xff);
+ ret = -1;
+ goto clean_up_dest;
+ }
+ }
+ }
- if (((char *) dest_addr)[t] != c) {
+ /*
+ * if threshold is not divisible by num_chunks, then check the
+ * last chunk
+ */
+ for (t = num_chunks * (threshold / num_chunks); t < threshold; ++t) {
+ if (((char *) dest_addr)[t] != rand_addr[t]) {
ksft_print_msg("Data after remap doesn't match at offset %llu\n",
- t);
- ksft_print_msg("Expected: %#x\t Got: %#x\n", c & 0xff,
+ t);
+ ksft_print_msg("Expected: %#x\t Got: %#x\n", rand_addr[t] & 0xff,
((char *) dest_addr)[t] & 0xff);
ret = -1;
goto clean_up_dest;
@@ -452,22 +498,44 @@ static long long remap_region(struct config c, unsigned int threshold_mb,
}
/* Verify the dest preamble byte pattern after remapping */
- if (c.dest_preamble_size) {
- srand(pattern_seed);
- for (d = 0; d < c.dest_preamble_size; d++) {
- char c = (char) rand();
-
- if (((char *) dest_preamble_addr)[d] != c) {
- ksft_print_msg("Preamble data after remap doesn't match at offset %d\n",
- d);
- ksft_print_msg("Expected: %#x\t Got: %#x\n", c & 0xff,
- ((char *) dest_preamble_addr)[d] & 0xff);
+ if (!c.dest_preamble_size)
+ goto no_preamble;
+
+ num_chunks = get_sqrt(c.dest_preamble_size);
+
+ for (unsigned long i = 0; i < num_chunks; ++i) {
+ size_t chunk_size = c.dest_preamble_size / num_chunks;
+ unsigned long shift = i * chunk_size;
+
+ if (!memcmp(dest_preamble_addr + shift, rand_addr + shift,
+ chunk_size))
+ continue;
+
+ /* brute force iteration only over mismatched segment */
+ for (d = shift; d < shift + chunk_size; ++d) {
+ if (((char *) dest_preamble_addr)[d] != rand_addr[d]) {
+ ksft_print_msg("Preamble data after remap doesn't match at offset %llu\n",
+ d);
+ ksft_print_msg("Expected: %#x\t Got: %#x\n", rand_addr[d] & 0xff,
+ ((char *) dest_preamble_addr)[d] & 0xff);
ret = -1;
goto clean_up_dest;
}
}
}
+ for (d = num_chunks * (c.dest_preamble_size / num_chunks); d < c.dest_preamble_size; ++d) {
+ if (((char *) dest_preamble_addr)[d] != rand_addr[d]) {
+ ksft_print_msg("Preamble data after remap doesn't match at offset %llu\n",
+ d);
+ ksft_print_msg("Expected: %#x\t Got: %#x\n", rand_addr[d] & 0xff,
+ ((char *) dest_preamble_addr)[d] & 0xff);
+ ret = -1;
+ goto clean_up_dest;
+ }
+ }
+
+no_preamble:
start_ns = t_start.tv_sec * NS_PER_SEC + t_start.tv_nsec;
end_ns = t_end.tv_sec * NS_PER_SEC + t_end.tv_nsec;
ret = end_ns - start_ns;
@@ -494,7 +562,8 @@ out:
* the beginning of the mapping just because the aligned
* down address landed on a mapping that maybe does not exist.
*/
-static void mremap_move_1mb_from_start(char pattern_seed)
+static void mremap_move_1mb_from_start(unsigned int pattern_seed,
+ char *rand_addr)
{
char *test_name = "mremap move 1mb from start at 1MB+256KB aligned src";
void *src = NULL, *dest = NULL;
@@ -520,10 +589,7 @@ static void mremap_move_1mb_from_start(char pattern_seed)
}
/* Set byte pattern for source block. */
- srand(pattern_seed);
- for (i = 0; i < SIZE_MB(2); i++) {
- ((char *)src)[i] = (char) rand();
- }
+ memcpy(src, rand_addr, SIZE_MB(2));
/*
* Unmap the beginning of dest so that the aligned address
@@ -568,10 +634,10 @@ out:
static void run_mremap_test_case(struct test test_case, int *failures,
unsigned int threshold_mb,
- unsigned int pattern_seed)
+ unsigned int pattern_seed, char *rand_addr)
{
long long remap_time = remap_region(test_case.config, threshold_mb,
- pattern_seed);
+ rand_addr);
if (remap_time < 0) {
if (test_case.expect_failure)
@@ -642,7 +708,15 @@ int main(int argc, char **argv)
int failures = 0;
int i, run_perf_tests;
unsigned int threshold_mb = VALIDATION_DEFAULT_THRESHOLD;
+
+ /* hard-coded test configs */
+ size_t max_test_variable_region_size = _2GB;
+ size_t max_test_constant_region_size = _2MB;
+ size_t dest_preamble_size = 10 * _4MB;
+
unsigned int pattern_seed;
+ char *rand_addr;
+ size_t rand_size;
int num_expand_tests = 2;
int num_misc_tests = 2;
struct test test_cases[MAX_TEST] = {};
@@ -659,6 +733,31 @@ int main(int argc, char **argv)
ksft_print_msg("Test configs:\n\tthreshold_mb=%u\n\tpattern_seed=%u\n\n",
threshold_mb, pattern_seed);
+ /*
+ * set preallocated random array according to test configs; see the
+ * functions for the logic of setting the size
+ */
+ if (!threshold_mb)
+ rand_size = MAX(max_test_variable_region_size,
+ max_test_constant_region_size);
+ else
+ rand_size = MAX(MIN(threshold_mb * _1MB,
+ max_test_variable_region_size),
+ max_test_constant_region_size);
+ rand_size = MAX(dest_preamble_size, rand_size);
+
+ rand_addr = (char *)mmap(NULL, rand_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (rand_addr == MAP_FAILED) {
+ perror("mmap");
+ ksft_exit_fail_msg("cannot mmap rand_addr\n");
+ }
+
+ /* fill stream of random bytes */
+ srand(pattern_seed);
+ for (unsigned long i = 0; i < rand_size; ++i)
+ rand_addr[i] = (char) rand();
+
page_size = sysconf(_SC_PAGESIZE);
/* Expected mremap failures */
@@ -730,13 +829,13 @@ int main(int argc, char **argv)
for (i = 0; i < ARRAY_SIZE(test_cases); i++)
run_mremap_test_case(test_cases[i], &failures, threshold_mb,
- pattern_seed);
+ pattern_seed, rand_addr);
maps_fp = fopen("/proc/self/maps", "r");
if (maps_fp == NULL) {
- ksft_print_msg("Failed to read /proc/self/maps: %s\n", strerror(errno));
- exit(KSFT_FAIL);
+ munmap(rand_addr, rand_size);
+ ksft_exit_fail_msg("Failed to read /proc/self/maps: %s\n", strerror(errno));
}
mremap_expand_merge(maps_fp, page_size);
@@ -744,17 +843,20 @@ int main(int argc, char **argv)
fclose(maps_fp);
- mremap_move_within_range(pattern_seed);
- mremap_move_1mb_from_start(pattern_seed);
+ mremap_move_within_range(pattern_seed, rand_addr);
+ mremap_move_1mb_from_start(pattern_seed, rand_addr);
if (run_perf_tests) {
ksft_print_msg("\n%s\n",
"mremap HAVE_MOVE_PMD/PUD optimization time comparison for 1GB region:");
for (i = 0; i < ARRAY_SIZE(perf_test_cases); i++)
run_mremap_test_case(perf_test_cases[i], &failures,
- threshold_mb, pattern_seed);
+ threshold_mb, pattern_seed,
+ rand_addr);
}
+ munmap(rand_addr, rand_size);
+
if (failures > 0)
ksft_exit_fail();
else
diff --git a/tools/testing/selftests/mm/mseal_test.c b/tools/testing/selftests/mm/mseal_test.c
new file mode 100644
index 00000000000000..ca8dbee0c612e7
--- /dev/null
+++ b/tools/testing/selftests/mm/mseal_test.c
@@ -0,0 +1,1893 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <sys/mman.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <stdbool.h>
+#include "../kselftest.h"
+#include <syscall.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/vfs.h>
+#include <sys/stat.h>
+
+/*
+ * need those definition for manually build using gcc.
+ * gcc -I ../../../../usr/include -DDEBUG -O3 -DDEBUG -O3 mseal_test.c -o mseal_test
+ */
+#ifndef PKEY_DISABLE_ACCESS
+# define PKEY_DISABLE_ACCESS 0x1
+#endif
+
+#ifndef PKEY_DISABLE_WRITE
+# define PKEY_DISABLE_WRITE 0x2
+#endif
+
+#ifndef PKEY_BITS_PER_KEY
+#define PKEY_BITS_PER_PKEY 2
+#endif
+
+#ifndef PKEY_MASK
+#define PKEY_MASK (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)
+#endif
+
+#define FAIL_TEST_IF_FALSE(c) do {\
+ if (!(c)) {\
+ ksft_test_result_fail("%s, line:%d\n", __func__, __LINE__);\
+ goto test_end;\
+ } \
+ } \
+ while (0)
+
+#define SKIP_TEST_IF_FALSE(c) do {\
+ if (!(c)) {\
+ ksft_test_result_skip("%s, line:%d\n", __func__, __LINE__);\
+ goto test_end;\
+ } \
+ } \
+ while (0)
+
+
+#define TEST_END_CHECK() {\
+ ksft_test_result_pass("%s\n", __func__);\
+ return;\
+test_end:\
+ return;\
+}
+
+#ifndef u64
+#define u64 unsigned long long
+#endif
+
+static unsigned long get_vma_size(void *addr, int *prot)
+{
+ FILE *maps;
+ char line[256];
+ int size = 0;
+ uintptr_t addr_start, addr_end;
+ char protstr[5];
+ *prot = 0;
+
+ maps = fopen("/proc/self/maps", "r");
+ if (!maps)
+ return 0;
+
+ while (fgets(line, sizeof(line), maps)) {
+ if (sscanf(line, "%lx-%lx %4s", &addr_start, &addr_end, protstr) == 3) {
+ if (addr_start == (uintptr_t) addr) {
+ size = addr_end - addr_start;
+ if (protstr[0] == 'r')
+ *prot |= 0x4;
+ if (protstr[1] == 'w')
+ *prot |= 0x2;
+ if (protstr[2] == 'x')
+ *prot |= 0x1;
+ break;
+ }
+ }
+ }
+ fclose(maps);
+ return size;
+}
+
+/*
+ * define sys_xyx to call syscall directly.
+ */
+static int sys_mseal(void *start, size_t len)
+{
+ int sret;
+
+ errno = 0;
+ sret = syscall(__NR_mseal, start, len, 0);
+ return sret;
+}
+
+static int sys_mprotect(void *ptr, size_t size, unsigned long prot)
+{
+ int sret;
+
+ errno = 0;
+ sret = syscall(__NR_mprotect, ptr, size, prot);
+ return sret;
+}
+
+static int sys_mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
+ unsigned long pkey)
+{
+ int sret;
+
+ errno = 0;
+ sret = syscall(__NR_pkey_mprotect, ptr, size, orig_prot, pkey);
+ return sret;
+}
+
+static void *sys_mmap(void *addr, unsigned long len, unsigned long prot,
+ unsigned long flags, unsigned long fd, unsigned long offset)
+{
+ void *sret;
+
+ errno = 0;
+ sret = (void *) syscall(__NR_mmap, addr, len, prot,
+ flags, fd, offset);
+ return sret;
+}
+
+static int sys_munmap(void *ptr, size_t size)
+{
+ int sret;
+
+ errno = 0;
+ sret = syscall(__NR_munmap, ptr, size);
+ return sret;
+}
+
+static int sys_madvise(void *start, size_t len, int types)
+{
+ int sret;
+
+ errno = 0;
+ sret = syscall(__NR_madvise, start, len, types);
+ return sret;
+}
+
+static int sys_pkey_alloc(unsigned long flags, unsigned long init_val)
+{
+ int ret = syscall(__NR_pkey_alloc, flags, init_val);
+
+ return ret;
+}
+
+static unsigned int __read_pkey_reg(void)
+{
+ unsigned int pkey_reg = 0;
+#if defined(__i386__) || defined(__x86_64__) /* arch */
+ unsigned int eax, edx;
+ unsigned int ecx = 0;
+
+ asm volatile(".byte 0x0f,0x01,0xee\n\t"
+ : "=a" (eax), "=d" (edx)
+ : "c" (ecx));
+ pkey_reg = eax;
+#endif
+ return pkey_reg;
+}
+
+static void __write_pkey_reg(u64 pkey_reg)
+{
+#if defined(__i386__) || defined(__x86_64__) /* arch */
+ unsigned int eax = pkey_reg;
+ unsigned int ecx = 0;
+ unsigned int edx = 0;
+
+ asm volatile(".byte 0x0f,0x01,0xef\n\t"
+ : : "a" (eax), "c" (ecx), "d" (edx));
+#endif
+}
+
+static unsigned long pkey_bit_position(int pkey)
+{
+ return pkey * PKEY_BITS_PER_PKEY;
+}
+
+static u64 set_pkey_bits(u64 reg, int pkey, u64 flags)
+{
+ unsigned long shift = pkey_bit_position(pkey);
+
+ /* mask out bits from pkey in old value */
+ reg &= ~((u64)PKEY_MASK << shift);
+ /* OR in new bits for pkey */
+ reg |= (flags & PKEY_MASK) << shift;
+ return reg;
+}
+
+static void set_pkey(int pkey, unsigned long pkey_value)
+{
+ u64 new_pkey_reg;
+
+ new_pkey_reg = set_pkey_bits(__read_pkey_reg(), pkey, pkey_value);
+ __write_pkey_reg(new_pkey_reg);
+}
+
+static void setup_single_address(int size, void **ptrOut)
+{
+ void *ptr;
+
+ ptr = sys_mmap(NULL, size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ *ptrOut = ptr;
+}
+
+static void setup_single_address_rw(int size, void **ptrOut)
+{
+ void *ptr;
+ unsigned long mapflags = MAP_ANONYMOUS | MAP_PRIVATE;
+
+ ptr = sys_mmap(NULL, size, PROT_READ | PROT_WRITE, mapflags, -1, 0);
+ *ptrOut = ptr;
+}
+
+static int clean_single_address(void *ptr, int size)
+{
+ int ret;
+ ret = munmap(ptr, size);
+ return ret;
+}
+
+static int seal_single_address(void *ptr, int size)
+{
+ int ret;
+ ret = sys_mseal(ptr, size);
+ return ret;
+}
+
+bool seal_support(void)
+{
+ int ret;
+ void *ptr;
+ unsigned long page_size = getpagesize();
+
+ ptr = sys_mmap(NULL, page_size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ if (ptr == (void *) -1)
+ return false;
+
+ ret = sys_mseal(ptr, page_size);
+ if (ret < 0)
+ return false;
+
+ return true;
+}
+
+bool pkey_supported(void)
+{
+#if defined(__i386__) || defined(__x86_64__) /* arch */
+ int pkey = sys_pkey_alloc(0, 0);
+
+ if (pkey > 0)
+ return true;
+#endif
+ return false;
+}
+
+static void test_seal_addseal(void)
+{
+ int ret;
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_unmapped_start(void)
+{
+ int ret;
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ /* munmap 2 pages from ptr. */
+ ret = sys_munmap(ptr, 2 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* mprotect will fail because 2 pages from ptr are unmapped. */
+ ret = sys_mprotect(ptr, size, PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(ret < 0);
+
+ /* mseal will fail because 2 pages from ptr are unmapped. */
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(ret < 0);
+
+ ret = sys_mseal(ptr + 2 * page_size, 2 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_unmapped_middle(void)
+{
+ int ret;
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ /* munmap 2 pages from ptr + page. */
+ ret = sys_munmap(ptr + page_size, 2 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* mprotect will fail, since middle 2 pages are unmapped. */
+ ret = sys_mprotect(ptr, size, PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(ret < 0);
+
+ /* mseal will fail as well. */
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(ret < 0);
+
+ /* we still can add seal to the first page and last page*/
+ ret = sys_mseal(ptr, page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ ret = sys_mseal(ptr + 3 * page_size, page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_unmapped_end(void)
+{
+ int ret;
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ /* unmap last 2 pages. */
+ ret = sys_munmap(ptr + 2 * page_size, 2 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* mprotect will fail since last 2 pages are unmapped. */
+ ret = sys_mprotect(ptr, size, PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(ret < 0);
+
+ /* mseal will fail as well. */
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(ret < 0);
+
+ /* The first 2 pages is not sealed, and can add seals */
+ ret = sys_mseal(ptr, 2 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_multiple_vmas(void)
+{
+ int ret;
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ /* use mprotect to split the vma into 3. */
+ ret = sys_mprotect(ptr + page_size, 2 * page_size,
+ PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* mprotect will get applied to all 4 pages - 3 VMAs. */
+ ret = sys_mprotect(ptr, size, PROT_READ);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* use mprotect to split the vma into 3. */
+ ret = sys_mprotect(ptr + page_size, 2 * page_size,
+ PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* mseal get applied to all 4 pages - 3 VMAs. */
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_split_start(void)
+{
+ int ret;
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ /* use mprotect to split at middle */
+ ret = sys_mprotect(ptr, 2 * page_size, PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* seal the first page, this will split the VMA */
+ ret = sys_mseal(ptr, page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* add seal to the remain 3 pages */
+ ret = sys_mseal(ptr + page_size, 3 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_split_end(void)
+{
+ int ret;
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ /* use mprotect to split at middle */
+ ret = sys_mprotect(ptr, 2 * page_size, PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* seal the last page */
+ ret = sys_mseal(ptr + 3 * page_size, page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* Adding seals to the first 3 pages */
+ ret = sys_mseal(ptr, 3 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_invalid_input(void)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address(8 * page_size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+ ret = clean_single_address(ptr + 4 * page_size, 4 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* invalid flag */
+ ret = syscall(__NR_mseal, ptr, size, 0x20);
+ FAIL_TEST_IF_FALSE(ret < 0);
+
+ /* unaligned address */
+ ret = sys_mseal(ptr + 1, 2 * page_size);
+ FAIL_TEST_IF_FALSE(ret < 0);
+
+ /* length too big */
+ ret = sys_mseal(ptr, 5 * page_size);
+ FAIL_TEST_IF_FALSE(ret < 0);
+
+ /* length overflow */
+ ret = sys_mseal(ptr, UINT64_MAX/page_size);
+ FAIL_TEST_IF_FALSE(ret < 0);
+
+ /* start is not in a valid VMA */
+ ret = sys_mseal(ptr - page_size, 5 * page_size);
+ FAIL_TEST_IF_FALSE(ret < 0);
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_zero_length(void)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ ret = sys_mprotect(ptr, 0, PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* seal 0 length will be OK, same as mprotect */
+ ret = sys_mseal(ptr, 0);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* verify the 4 pages are not sealed by previous call. */
+ ret = sys_mprotect(ptr, size, PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_zero_address(void)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+ int prot;
+
+ /* use mmap to change protection. */
+ ptr = sys_mmap(0, size, PROT_NONE,
+ MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
+ FAIL_TEST_IF_FALSE(ptr == 0);
+
+ size = get_vma_size(ptr, &prot);
+ FAIL_TEST_IF_FALSE(size == 4 * page_size);
+
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* verify the 4 pages are sealed by previous call. */
+ ret = sys_mprotect(ptr, size, PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(ret);
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_twice(void)
+{
+ int ret;
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* apply the same seal will be OK. idempotent. */
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_mprotect(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ if (seal) {
+ ret = seal_single_address(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ ret = sys_mprotect(ptr, size, PROT_READ | PROT_WRITE);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_start_mprotect(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ if (seal) {
+ ret = seal_single_address(ptr, page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* the first page is sealed. */
+ ret = sys_mprotect(ptr, page_size, PROT_READ | PROT_WRITE);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* pages after the first page is not sealed. */
+ ret = sys_mprotect(ptr + page_size, page_size * 3,
+ PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_end_mprotect(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ if (seal) {
+ ret = seal_single_address(ptr + page_size, 3 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* first page is not sealed */
+ ret = sys_mprotect(ptr, page_size, PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* last 3 page are sealed */
+ ret = sys_mprotect(ptr + page_size, page_size * 3,
+ PROT_READ | PROT_WRITE);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_mprotect_unalign_len(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ if (seal) {
+ ret = seal_single_address(ptr, page_size * 2 - 1);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* 2 pages are sealed. */
+ ret = sys_mprotect(ptr, page_size * 2, PROT_READ | PROT_WRITE);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ ret = sys_mprotect(ptr + page_size * 2, page_size,
+ PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_mprotect_unalign_len_variant_2(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+ if (seal) {
+ ret = seal_single_address(ptr, page_size * 2 + 1);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* 3 pages are sealed. */
+ ret = sys_mprotect(ptr, page_size * 3, PROT_READ | PROT_WRITE);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ ret = sys_mprotect(ptr + page_size * 3, page_size,
+ PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_mprotect_two_vma(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ /* use mprotect to split */
+ ret = sys_mprotect(ptr, page_size * 2, PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ if (seal) {
+ ret = seal_single_address(ptr, page_size * 4);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ ret = sys_mprotect(ptr, page_size * 2, PROT_READ | PROT_WRITE);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ ret = sys_mprotect(ptr + page_size * 2, page_size * 2,
+ PROT_READ | PROT_WRITE);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_mprotect_two_vma_with_split(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ /* use mprotect to split as two vma. */
+ ret = sys_mprotect(ptr, page_size * 2, PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* mseal can apply across 2 vma, also split them. */
+ if (seal) {
+ ret = seal_single_address(ptr + page_size, page_size * 2);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* the first page is not sealed. */
+ ret = sys_mprotect(ptr, page_size, PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* the second page is sealed. */
+ ret = sys_mprotect(ptr + page_size, page_size, PROT_READ | PROT_WRITE);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* the third page is sealed. */
+ ret = sys_mprotect(ptr + 2 * page_size, page_size,
+ PROT_READ | PROT_WRITE);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* the fouth page is not sealed. */
+ ret = sys_mprotect(ptr + 3 * page_size, page_size,
+ PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_mprotect_partial_mprotect(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ /* seal one page. */
+ if (seal) {
+ ret = seal_single_address(ptr, page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* mprotect first 2 page will fail, since the first page are sealed. */
+ ret = sys_mprotect(ptr, 2 * page_size, PROT_READ | PROT_WRITE);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_mprotect_two_vma_with_gap(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ /* use mprotect to split. */
+ ret = sys_mprotect(ptr, page_size, PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* use mprotect to split. */
+ ret = sys_mprotect(ptr + 3 * page_size, page_size,
+ PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* use munmap to free two pages in the middle */
+ ret = sys_munmap(ptr + page_size, 2 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* mprotect will fail, because there is a gap in the address. */
+ /* notes, internally mprotect still updated the first page. */
+ ret = sys_mprotect(ptr, 4 * page_size, PROT_READ);
+ FAIL_TEST_IF_FALSE(ret < 0);
+
+ /* mseal will fail as well. */
+ ret = sys_mseal(ptr, 4 * page_size);
+ FAIL_TEST_IF_FALSE(ret < 0);
+
+ /* the first page is not sealed. */
+ ret = sys_mprotect(ptr, page_size, PROT_READ);
+ FAIL_TEST_IF_FALSE(ret == 0);
+
+ /* the last page is not sealed. */
+ ret = sys_mprotect(ptr + 3 * page_size, page_size, PROT_READ);
+ FAIL_TEST_IF_FALSE(ret == 0);
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_mprotect_split(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ /* use mprotect to split. */
+ ret = sys_mprotect(ptr, page_size, PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* seal all 4 pages. */
+ if (seal) {
+ ret = sys_mseal(ptr, 4 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* mprotect is sealed. */
+ ret = sys_mprotect(ptr, 2 * page_size, PROT_READ);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+
+ ret = sys_mprotect(ptr + 2 * page_size, 2 * page_size, PROT_READ);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_mprotect_merge(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ /* use mprotect to split one page. */
+ ret = sys_mprotect(ptr, page_size, PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* seal first two pages. */
+ if (seal) {
+ ret = sys_mseal(ptr, 2 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* 2 pages are sealed. */
+ ret = sys_mprotect(ptr, 2 * page_size, PROT_READ);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* last 2 pages are not sealed. */
+ ret = sys_mprotect(ptr + 2 * page_size, 2 * page_size, PROT_READ);
+ FAIL_TEST_IF_FALSE(ret == 0);
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_munmap(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ if (seal) {
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* 4 pages are sealed. */
+ ret = sys_munmap(ptr, size);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ TEST_END_CHECK();
+}
+
+/*
+ * allocate 4 pages,
+ * use mprotect to split it as two VMAs
+ * seal the whole range
+ * munmap will fail on both
+ */
+static void test_seal_munmap_two_vma(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ /* use mprotect to split */
+ ret = sys_mprotect(ptr, page_size * 2, PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ if (seal) {
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ ret = sys_munmap(ptr, page_size * 2);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ ret = sys_munmap(ptr + page_size, page_size * 2);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ TEST_END_CHECK();
+}
+
+/*
+ * allocate a VMA with 4 pages.
+ * munmap the middle 2 pages.
+ * seal the whole 4 pages, will fail.
+ * munmap the first page will be OK.
+ * munmap the last page will be OK.
+ */
+static void test_seal_munmap_vma_with_gap(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ ret = sys_munmap(ptr + page_size, page_size * 2);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ if (seal) {
+ /* can't have gap in the middle. */
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(ret < 0);
+ }
+
+ ret = sys_munmap(ptr, page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ ret = sys_munmap(ptr + page_size * 2, page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ ret = sys_munmap(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ TEST_END_CHECK();
+}
+
+static void test_munmap_start_freed(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+ int prot;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ /* unmap the first page. */
+ ret = sys_munmap(ptr, page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* seal the last 3 pages. */
+ if (seal) {
+ ret = sys_mseal(ptr + page_size, 3 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* unmap from the first page. */
+ ret = sys_munmap(ptr, size);
+ if (seal) {
+ FAIL_TEST_IF_FALSE(ret < 0);
+
+ size = get_vma_size(ptr + page_size, &prot);
+ FAIL_TEST_IF_FALSE(size == page_size * 3);
+ } else {
+ /* note: this will be OK, even the first page is */
+ /* already unmapped. */
+ FAIL_TEST_IF_FALSE(!ret);
+
+ size = get_vma_size(ptr + page_size, &prot);
+ FAIL_TEST_IF_FALSE(size == 0);
+ }
+
+ TEST_END_CHECK();
+}
+
+static void test_munmap_end_freed(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ /* unmap last page. */
+ ret = sys_munmap(ptr + page_size * 3, page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* seal the first 3 pages. */
+ if (seal) {
+ ret = sys_mseal(ptr, 3 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* unmap all pages. */
+ ret = sys_munmap(ptr, size);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ TEST_END_CHECK();
+}
+
+static void test_munmap_middle_freed(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+ int prot;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ /* unmap 2 pages in the middle. */
+ ret = sys_munmap(ptr + page_size, page_size * 2);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* seal the first page. */
+ if (seal) {
+ ret = sys_mseal(ptr, page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* munmap all 4 pages. */
+ ret = sys_munmap(ptr, size);
+ if (seal) {
+ FAIL_TEST_IF_FALSE(ret < 0);
+
+ size = get_vma_size(ptr, &prot);
+ FAIL_TEST_IF_FALSE(size == page_size);
+
+ size = get_vma_size(ptr + page_size * 3, &prot);
+ FAIL_TEST_IF_FALSE(size == page_size);
+ } else {
+ FAIL_TEST_IF_FALSE(!ret);
+
+ size = get_vma_size(ptr, &prot);
+ FAIL_TEST_IF_FALSE(size == 0);
+
+ size = get_vma_size(ptr + page_size * 3, &prot);
+ FAIL_TEST_IF_FALSE(size == 0);
+ }
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_mremap_shrink(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+ void *ret2;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ if (seal) {
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* shrink from 4 pages to 2 pages. */
+ ret2 = mremap(ptr, size, 2 * page_size, 0, 0);
+ if (seal) {
+ FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED);
+ FAIL_TEST_IF_FALSE(errno == EPERM);
+ } else {
+ FAIL_TEST_IF_FALSE(ret2 != MAP_FAILED);
+
+ }
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_mremap_expand(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+ void *ret2;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+ /* ummap last 2 pages. */
+ ret = sys_munmap(ptr + 2 * page_size, 2 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ if (seal) {
+ ret = sys_mseal(ptr, 2 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* expand from 2 page to 4 pages. */
+ ret2 = mremap(ptr, 2 * page_size, 4 * page_size, 0, 0);
+ if (seal) {
+ FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED);
+ FAIL_TEST_IF_FALSE(errno == EPERM);
+ } else {
+ FAIL_TEST_IF_FALSE(ret2 == ptr);
+
+ }
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_mremap_move(bool seal)
+{
+ void *ptr, *newPtr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = page_size;
+ int ret;
+ void *ret2;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+ setup_single_address(size, &newPtr);
+ FAIL_TEST_IF_FALSE(newPtr != (void *)-1);
+ ret = clean_single_address(newPtr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ if (seal) {
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* move from ptr to fixed address. */
+ ret2 = mremap(ptr, size, size, MREMAP_MAYMOVE | MREMAP_FIXED, newPtr);
+ if (seal) {
+ FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED);
+ FAIL_TEST_IF_FALSE(errno == EPERM);
+ } else {
+ FAIL_TEST_IF_FALSE(ret2 != MAP_FAILED);
+
+ }
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_mmap_overwrite_prot(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = page_size;
+ int ret;
+ void *ret2;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ if (seal) {
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* use mmap to change protection. */
+ ret2 = sys_mmap(ptr, size, PROT_NONE,
+ MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
+ if (seal) {
+ FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED);
+ FAIL_TEST_IF_FALSE(errno == EPERM);
+ } else
+ FAIL_TEST_IF_FALSE(ret2 == ptr);
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_mmap_expand(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 12 * page_size;
+ int ret;
+ void *ret2;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+ /* ummap last 4 pages. */
+ ret = sys_munmap(ptr + 8 * page_size, 4 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ if (seal) {
+ ret = sys_mseal(ptr, 8 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* use mmap to expand. */
+ ret2 = sys_mmap(ptr, size, PROT_READ,
+ MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
+ if (seal) {
+ FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED);
+ FAIL_TEST_IF_FALSE(errno == EPERM);
+ } else
+ FAIL_TEST_IF_FALSE(ret2 == ptr);
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_mmap_shrink(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 12 * page_size;
+ int ret;
+ void *ret2;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ if (seal) {
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* use mmap to shrink. */
+ ret2 = sys_mmap(ptr, 8 * page_size, PROT_READ,
+ MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
+ if (seal) {
+ FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED);
+ FAIL_TEST_IF_FALSE(errno == EPERM);
+ } else
+ FAIL_TEST_IF_FALSE(ret2 == ptr);
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_mremap_shrink_fixed(bool seal)
+{
+ void *ptr;
+ void *newAddr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+ void *ret2;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+ setup_single_address(size, &newAddr);
+ FAIL_TEST_IF_FALSE(newAddr != (void *)-1);
+
+ if (seal) {
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* mremap to move and shrink to fixed address */
+ ret2 = mremap(ptr, size, 2 * page_size, MREMAP_MAYMOVE | MREMAP_FIXED,
+ newAddr);
+ if (seal) {
+ FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED);
+ FAIL_TEST_IF_FALSE(errno == EPERM);
+ } else
+ FAIL_TEST_IF_FALSE(ret2 == newAddr);
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_mremap_expand_fixed(bool seal)
+{
+ void *ptr;
+ void *newAddr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+ void *ret2;
+
+ setup_single_address(page_size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+ setup_single_address(size, &newAddr);
+ FAIL_TEST_IF_FALSE(newAddr != (void *)-1);
+
+ if (seal) {
+ ret = sys_mseal(newAddr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* mremap to move and expand to fixed address */
+ ret2 = mremap(ptr, page_size, size, MREMAP_MAYMOVE | MREMAP_FIXED,
+ newAddr);
+ if (seal) {
+ FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED);
+ FAIL_TEST_IF_FALSE(errno == EPERM);
+ } else
+ FAIL_TEST_IF_FALSE(ret2 == newAddr);
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_mremap_move_fixed(bool seal)
+{
+ void *ptr;
+ void *newAddr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+ void *ret2;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+ setup_single_address(size, &newAddr);
+ FAIL_TEST_IF_FALSE(newAddr != (void *)-1);
+
+ if (seal) {
+ ret = sys_mseal(newAddr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* mremap to move to fixed address */
+ ret2 = mremap(ptr, size, size, MREMAP_MAYMOVE | MREMAP_FIXED, newAddr);
+ if (seal) {
+ FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED);
+ FAIL_TEST_IF_FALSE(errno == EPERM);
+ } else
+ FAIL_TEST_IF_FALSE(ret2 == newAddr);
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_mremap_move_fixed_zero(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+ void *ret2;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ if (seal) {
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /*
+ * MREMAP_FIXED can move the mapping to zero address
+ */
+ ret2 = mremap(ptr, size, 2 * page_size, MREMAP_MAYMOVE | MREMAP_FIXED,
+ 0);
+ if (seal) {
+ FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED);
+ FAIL_TEST_IF_FALSE(errno == EPERM);
+ } else {
+ FAIL_TEST_IF_FALSE(ret2 == 0);
+
+ }
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_mremap_move_dontunmap(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+ void *ret2;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ if (seal) {
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* mremap to move, and don't unmap src addr. */
+ ret2 = mremap(ptr, size, size, MREMAP_MAYMOVE | MREMAP_DONTUNMAP, 0);
+ if (seal) {
+ FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED);
+ FAIL_TEST_IF_FALSE(errno == EPERM);
+ } else {
+ FAIL_TEST_IF_FALSE(ret2 != MAP_FAILED);
+
+ }
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_mremap_move_dontunmap_anyaddr(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+ void *ret2;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ if (seal) {
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /*
+ * The 0xdeaddead should not have effect on dest addr
+ * when MREMAP_DONTUNMAP is set.
+ */
+ ret2 = mremap(ptr, size, size, MREMAP_MAYMOVE | MREMAP_DONTUNMAP,
+ 0xdeaddead);
+ if (seal) {
+ FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED);
+ FAIL_TEST_IF_FALSE(errno == EPERM);
+ } else {
+ FAIL_TEST_IF_FALSE(ret2 != MAP_FAILED);
+ FAIL_TEST_IF_FALSE((long)ret2 != 0xdeaddead);
+
+ }
+
+ TEST_END_CHECK();
+}
+
+
+static void test_seal_merge_and_split(void)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size;
+ int ret;
+ int prot;
+
+ /* (24 RO) */
+ setup_single_address(24 * page_size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ /* use mprotect(NONE) to set out boundary */
+ /* (1 NONE) (22 RO) (1 NONE) */
+ ret = sys_mprotect(ptr, page_size, PROT_NONE);
+ FAIL_TEST_IF_FALSE(!ret);
+ ret = sys_mprotect(ptr + 23 * page_size, page_size, PROT_NONE);
+ FAIL_TEST_IF_FALSE(!ret);
+ size = get_vma_size(ptr + page_size, &prot);
+ FAIL_TEST_IF_FALSE(size == 22 * page_size);
+ FAIL_TEST_IF_FALSE(prot == 4);
+
+ /* use mseal to split from beginning */
+ /* (1 NONE) (1 RO_SEAL) (21 RO) (1 NONE) */
+ ret = sys_mseal(ptr + page_size, page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+ size = get_vma_size(ptr + page_size, &prot);
+ FAIL_TEST_IF_FALSE(size == page_size);
+ FAIL_TEST_IF_FALSE(prot == 0x4);
+ size = get_vma_size(ptr + 2 * page_size, &prot);
+ FAIL_TEST_IF_FALSE(size == 21 * page_size);
+ FAIL_TEST_IF_FALSE(prot == 0x4);
+
+ /* use mseal to split from the end. */
+ /* (1 NONE) (1 RO_SEAL) (20 RO) (1 RO_SEAL) (1 NONE) */
+ ret = sys_mseal(ptr + 22 * page_size, page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+ size = get_vma_size(ptr + 22 * page_size, &prot);
+ FAIL_TEST_IF_FALSE(size == page_size);
+ FAIL_TEST_IF_FALSE(prot == 0x4);
+ size = get_vma_size(ptr + 2 * page_size, &prot);
+ FAIL_TEST_IF_FALSE(size == 20 * page_size);
+ FAIL_TEST_IF_FALSE(prot == 0x4);
+
+ /* merge with prev. */
+ /* (1 NONE) (2 RO_SEAL) (19 RO) (1 RO_SEAL) (1 NONE) */
+ ret = sys_mseal(ptr + 2 * page_size, page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+ size = get_vma_size(ptr + page_size, &prot);
+ FAIL_TEST_IF_FALSE(size == 2 * page_size);
+ FAIL_TEST_IF_FALSE(prot == 0x4);
+
+ /* merge with after. */
+ /* (1 NONE) (2 RO_SEAL) (18 RO) (2 RO_SEALS) (1 NONE) */
+ ret = sys_mseal(ptr + 21 * page_size, page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+ size = get_vma_size(ptr + 21 * page_size, &prot);
+ FAIL_TEST_IF_FALSE(size == 2 * page_size);
+ FAIL_TEST_IF_FALSE(prot == 0x4);
+
+ /* split and merge from prev */
+ /* (1 NONE) (3 RO_SEAL) (17 RO) (2 RO_SEALS) (1 NONE) */
+ ret = sys_mseal(ptr + 2 * page_size, 2 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+ size = get_vma_size(ptr + 1 * page_size, &prot);
+ FAIL_TEST_IF_FALSE(size == 3 * page_size);
+ FAIL_TEST_IF_FALSE(prot == 0x4);
+ ret = sys_munmap(ptr + page_size, page_size);
+ FAIL_TEST_IF_FALSE(ret < 0);
+ ret = sys_mprotect(ptr + 2 * page_size, page_size, PROT_NONE);
+ FAIL_TEST_IF_FALSE(ret < 0);
+
+ /* split and merge from next */
+ /* (1 NONE) (3 RO_SEAL) (16 RO) (3 RO_SEALS) (1 NONE) */
+ ret = sys_mseal(ptr + 20 * page_size, 2 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+ FAIL_TEST_IF_FALSE(prot == 0x4);
+ size = get_vma_size(ptr + 20 * page_size, &prot);
+ FAIL_TEST_IF_FALSE(size == 3 * page_size);
+ FAIL_TEST_IF_FALSE(prot == 0x4);
+
+ /* merge from middle of prev and middle of next. */
+ /* (1 NONE) (22 RO_SEAL) (1 NONE) */
+ ret = sys_mseal(ptr + 2 * page_size, 20 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+ size = get_vma_size(ptr + page_size, &prot);
+ FAIL_TEST_IF_FALSE(size == 22 * page_size);
+ FAIL_TEST_IF_FALSE(prot == 0x4);
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_discard_ro_anon_on_rw(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address_rw(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ if (seal) {
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* sealing doesn't take effect on RW memory. */
+ ret = sys_madvise(ptr, size, MADV_DONTNEED);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* base seal still apply. */
+ ret = sys_munmap(ptr, size);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_discard_ro_anon_on_pkey(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+ int pkey;
+
+ SKIP_TEST_IF_FALSE(pkey_supported());
+
+ setup_single_address_rw(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ pkey = sys_pkey_alloc(0, 0);
+ FAIL_TEST_IF_FALSE(pkey > 0);
+
+ ret = sys_mprotect_pkey((void *)ptr, size, PROT_READ | PROT_WRITE, pkey);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ if (seal) {
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* sealing doesn't take effect if PKRU allow write. */
+ set_pkey(pkey, 0);
+ ret = sys_madvise(ptr, size, MADV_DONTNEED);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* sealing will take effect if PKRU deny write. */
+ set_pkey(pkey, PKEY_DISABLE_WRITE);
+ ret = sys_madvise(ptr, size, MADV_DONTNEED);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* base seal still apply. */
+ ret = sys_munmap(ptr, size);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_discard_ro_anon_on_filebacked(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+ int fd;
+ unsigned long mapflags = MAP_PRIVATE;
+
+ fd = memfd_create("test", 0);
+ FAIL_TEST_IF_FALSE(fd > 0);
+
+ ret = fallocate(fd, 0, 0, size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ ptr = sys_mmap(NULL, size, PROT_READ, mapflags, fd, 0);
+ FAIL_TEST_IF_FALSE(ptr != MAP_FAILED);
+
+ if (seal) {
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* sealing doesn't apply for file backed mapping. */
+ ret = sys_madvise(ptr, size, MADV_DONTNEED);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ ret = sys_munmap(ptr, size);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+ close(fd);
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_discard_ro_anon_on_shared(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+ unsigned long mapflags = MAP_ANONYMOUS | MAP_SHARED;
+
+ ptr = sys_mmap(NULL, size, PROT_READ, mapflags, -1, 0);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ if (seal) {
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* sealing doesn't apply for shared mapping. */
+ ret = sys_madvise(ptr, size, MADV_DONTNEED);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ ret = sys_munmap(ptr, size);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_discard_ro_anon(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ if (seal) {
+ ret = seal_single_address(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ ret = sys_madvise(ptr, size, MADV_DONTNEED);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ ret = sys_munmap(ptr, size);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ TEST_END_CHECK();
+}
+
+int main(int argc, char **argv)
+{
+ bool test_seal = seal_support();
+
+ ksft_print_header();
+
+ if (!test_seal)
+ ksft_exit_skip("sealing not supported, check CONFIG_64BIT\n");
+
+ if (!pkey_supported())
+ ksft_print_msg("PKEY not supported\n");
+
+ ksft_set_plan(80);
+
+ test_seal_addseal();
+ test_seal_unmapped_start();
+ test_seal_unmapped_middle();
+ test_seal_unmapped_end();
+ test_seal_multiple_vmas();
+ test_seal_split_start();
+ test_seal_split_end();
+ test_seal_invalid_input();
+ test_seal_zero_length();
+ test_seal_twice();
+
+ test_seal_mprotect(false);
+ test_seal_mprotect(true);
+
+ test_seal_start_mprotect(false);
+ test_seal_start_mprotect(true);
+
+ test_seal_end_mprotect(false);
+ test_seal_end_mprotect(true);
+
+ test_seal_mprotect_unalign_len(false);
+ test_seal_mprotect_unalign_len(true);
+
+ test_seal_mprotect_unalign_len_variant_2(false);
+ test_seal_mprotect_unalign_len_variant_2(true);
+
+ test_seal_mprotect_two_vma(false);
+ test_seal_mprotect_two_vma(true);
+
+ test_seal_mprotect_two_vma_with_split(false);
+ test_seal_mprotect_two_vma_with_split(true);
+
+ test_seal_mprotect_partial_mprotect(false);
+ test_seal_mprotect_partial_mprotect(true);
+
+ test_seal_mprotect_two_vma_with_gap(false);
+ test_seal_mprotect_two_vma_with_gap(true);
+
+ test_seal_mprotect_merge(false);
+ test_seal_mprotect_merge(true);
+
+ test_seal_mprotect_split(false);
+ test_seal_mprotect_split(true);
+
+ test_seal_munmap(false);
+ test_seal_munmap(true);
+ test_seal_munmap_two_vma(false);
+ test_seal_munmap_two_vma(true);
+ test_seal_munmap_vma_with_gap(false);
+ test_seal_munmap_vma_with_gap(true);
+
+ test_munmap_start_freed(false);
+ test_munmap_start_freed(true);
+ test_munmap_middle_freed(false);
+ test_munmap_middle_freed(true);
+ test_munmap_end_freed(false);
+ test_munmap_end_freed(true);
+
+ test_seal_mremap_shrink(false);
+ test_seal_mremap_shrink(true);
+ test_seal_mremap_expand(false);
+ test_seal_mremap_expand(true);
+ test_seal_mremap_move(false);
+ test_seal_mremap_move(true);
+
+ test_seal_mremap_shrink_fixed(false);
+ test_seal_mremap_shrink_fixed(true);
+ test_seal_mremap_expand_fixed(false);
+ test_seal_mremap_expand_fixed(true);
+ test_seal_mremap_move_fixed(false);
+ test_seal_mremap_move_fixed(true);
+ test_seal_mremap_move_dontunmap(false);
+ test_seal_mremap_move_dontunmap(true);
+ test_seal_mremap_move_fixed_zero(false);
+ test_seal_mremap_move_fixed_zero(true);
+ test_seal_mremap_move_dontunmap_anyaddr(false);
+ test_seal_mremap_move_dontunmap_anyaddr(true);
+ test_seal_discard_ro_anon(false);
+ test_seal_discard_ro_anon(true);
+ test_seal_discard_ro_anon_on_rw(false);
+ test_seal_discard_ro_anon_on_rw(true);
+ test_seal_discard_ro_anon_on_shared(false);
+ test_seal_discard_ro_anon_on_shared(true);
+ test_seal_discard_ro_anon_on_filebacked(false);
+ test_seal_discard_ro_anon_on_filebacked(true);
+ test_seal_mmap_overwrite_prot(false);
+ test_seal_mmap_overwrite_prot(true);
+ test_seal_mmap_expand(false);
+ test_seal_mmap_expand(true);
+ test_seal_mmap_shrink(false);
+ test_seal_mmap_shrink(true);
+
+ test_seal_merge_and_split();
+ test_seal_zero_address();
+
+ test_seal_discard_ro_anon_on_pkey(false);
+ test_seal_discard_ro_anon_on_pkey(true);
+
+ ksft_finished();
+}
diff --git a/tools/testing/selftests/mm/protection_keys.c b/tools/testing/selftests/mm/protection_keys.c
index f822ae31af22e2..48dc151f8fca8a 100644
--- a/tools/testing/selftests/mm/protection_keys.c
+++ b/tools/testing/selftests/mm/protection_keys.c
@@ -54,7 +54,6 @@ int test_nr;
u64 shadow_pkey_reg;
int dprint_in_signal;
char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE];
-char buf[256];
void cat_into_file(char *str, char *file)
{
@@ -1745,38 +1744,6 @@ void pkey_setup_shadow(void)
shadow_pkey_reg = __read_pkey_reg();
}
-void restore_settings_atexit(void)
-{
- cat_into_file(buf, "/proc/sys/vm/nr_hugepages");
-}
-
-void save_settings(void)
-{
- int fd;
- int err;
-
- if (geteuid())
- return;
-
- fd = open("/proc/sys/vm/nr_hugepages", O_RDONLY);
- if (fd < 0) {
- fprintf(stderr, "error opening\n");
- perror("error: ");
- exit(__LINE__);
- }
-
- /* -1 to guarantee leaving the trailing \0 */
- err = read(fd, buf, sizeof(buf)-1);
- if (err < 0) {
- fprintf(stderr, "error reading\n");
- perror("error: ");
- exit(__LINE__);
- }
-
- atexit(restore_settings_atexit);
- close(fd);
-}
-
int main(void)
{
int nr_iterations = 22;
@@ -1784,7 +1751,6 @@ int main(void)
srand((unsigned int)time(NULL));
- save_settings();
setup_handlers();
printf("has pkeys: %d\n", pkeys_supported);
diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh
index c2c542fe7b17bb..3157204b90476b 100755
--- a/tools/testing/selftests/mm/run_vmtests.sh
+++ b/tools/testing/selftests/mm/run_vmtests.sh
@@ -152,9 +152,13 @@ done < /proc/meminfo
# both of these requirements into account and attempt to increase
# number of huge pages available.
nr_cpus=$(nproc)
-hpgsize_MB=$((hpgsize_KB / 1024))
-half_ufd_size_MB=$((((nr_cpus * hpgsize_MB + 127) / 128) * 128))
-needmem_KB=$((half_ufd_size_MB * 2 * 1024))
+uffd_min_KB=$((hpgsize_KB * nr_cpus * 2))
+hugetlb_min_KB=$((256 * 1024))
+if [[ $uffd_min_KB -gt $hugetlb_min_KB ]]; then
+ needmem_KB=$uffd_min_KB
+else
+ needmem_KB=$hugetlb_min_KB
+fi
# set proper nr_hugepages
if [ -n "$freepgs" ] && [ -n "$hpgsize_KB" ]; then
@@ -294,7 +298,8 @@ CATEGORY="userfaultfd" run_test ./uffd-unit-tests
uffd_stress_bin=./uffd-stress
CATEGORY="userfaultfd" run_test ${uffd_stress_bin} anon 20 16
# Hugetlb tests require source and destination huge pages. Pass in half
-# the size ($half_ufd_size_MB), which is used for *each*.
+# the size of the free pages we have, which is used for *each*.
+half_ufd_size_MB=$((freepgs / 2))
CATEGORY="userfaultfd" run_test ${uffd_stress_bin} hugetlb "$half_ufd_size_MB" 32
CATEGORY="userfaultfd" run_test ${uffd_stress_bin} hugetlb-private "$half_ufd_size_MB" 32
CATEGORY="userfaultfd" run_test ${uffd_stress_bin} shmem 20 16
@@ -385,6 +390,7 @@ CATEGORY="ksm_numa" run_test ./ksm_tests -N -m 0
CATEGORY="ksm" run_test ./ksm_functional_tests
# protection_keys tests
+nr_hugepgs=$(cat /proc/sys/vm/nr_hugepages)
if [ -x ./protection_keys_32 ]
then
CATEGORY="pkey" run_test ./protection_keys_32
@@ -394,6 +400,7 @@ if [ -x ./protection_keys_64 ]
then
CATEGORY="pkey" run_test ./protection_keys_64
fi
+echo "$nr_hugepgs" > /proc/sys/vm/nr_hugepages
if [ -x ./soft-dirty ]
then
diff --git a/tools/testing/selftests/mm/seal_elf.c b/tools/testing/selftests/mm/seal_elf.c
new file mode 100644
index 00000000000000..f2babec79bb63d
--- /dev/null
+++ b/tools/testing/selftests/mm/seal_elf.c
@@ -0,0 +1,179 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <sys/mman.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <stdbool.h>
+#include "../kselftest.h"
+#include <syscall.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/vfs.h>
+#include <sys/stat.h>
+
+/*
+ * need those definition for manually build using gcc.
+ * gcc -I ../../../../usr/include -DDEBUG -O3 -DDEBUG -O3 seal_elf.c -o seal_elf
+ */
+#define FAIL_TEST_IF_FALSE(c) do {\
+ if (!(c)) {\
+ ksft_test_result_fail("%s, line:%d\n", __func__, __LINE__);\
+ goto test_end;\
+ } \
+ } \
+ while (0)
+
+#define SKIP_TEST_IF_FALSE(c) do {\
+ if (!(c)) {\
+ ksft_test_result_skip("%s, line:%d\n", __func__, __LINE__);\
+ goto test_end;\
+ } \
+ } \
+ while (0)
+
+
+#define TEST_END_CHECK() {\
+ ksft_test_result_pass("%s\n", __func__);\
+ return;\
+test_end:\
+ return;\
+}
+
+#ifndef u64
+#define u64 unsigned long long
+#endif
+
+/*
+ * define sys_xyx to call syscall directly.
+ */
+static int sys_mseal(void *start, size_t len)
+{
+ int sret;
+
+ errno = 0;
+ sret = syscall(__NR_mseal, start, len, 0);
+ return sret;
+}
+
+static void *sys_mmap(void *addr, unsigned long len, unsigned long prot,
+ unsigned long flags, unsigned long fd, unsigned long offset)
+{
+ void *sret;
+
+ errno = 0;
+ sret = (void *) syscall(__NR_mmap, addr, len, prot,
+ flags, fd, offset);
+ return sret;
+}
+
+static inline int sys_mprotect(void *ptr, size_t size, unsigned long prot)
+{
+ int sret;
+
+ errno = 0;
+ sret = syscall(__NR_mprotect, ptr, size, prot);
+ return sret;
+}
+
+static bool seal_support(void)
+{
+ int ret;
+ void *ptr;
+ unsigned long page_size = getpagesize();
+
+ ptr = sys_mmap(NULL, page_size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ if (ptr == (void *) -1)
+ return false;
+
+ ret = sys_mseal(ptr, page_size);
+ if (ret < 0)
+ return false;
+
+ return true;
+}
+
+const char somestr[4096] = {"READONLY"};
+
+static void test_seal_elf(void)
+{
+ int ret;
+ FILE *maps;
+ char line[512];
+ uintptr_t addr_start, addr_end;
+ char prot[5];
+ char filename[256];
+ unsigned long page_size = getpagesize();
+ unsigned long long ptr = (unsigned long long) somestr;
+ char *somestr2 = (char *)somestr;
+
+ /*
+ * Modify the protection of readonly somestr
+ */
+ if (((unsigned long long)ptr % page_size) != 0)
+ ptr = (unsigned long long)ptr & ~(page_size - 1);
+
+ ksft_print_msg("somestr = %s\n", somestr);
+ ksft_print_msg("change protection to rw\n");
+ ret = sys_mprotect((void *)ptr, page_size, PROT_READ|PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+ *somestr2 = 'A';
+ ksft_print_msg("somestr is modified to: %s\n", somestr);
+ ret = sys_mprotect((void *)ptr, page_size, PROT_READ);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ maps = fopen("/proc/self/maps", "r");
+ FAIL_TEST_IF_FALSE(maps);
+
+ /*
+ * apply sealing to elf binary
+ */
+ while (fgets(line, sizeof(line), maps)) {
+ if (sscanf(line, "%lx-%lx %4s %*x %*x:%*x %*u %255[^\n]",
+ &addr_start, &addr_end, prot, filename) == 4) {
+ if (strlen(filename)) {
+ /*
+ * seal the mapping if read only.
+ */
+ if (strstr(prot, "r-")) {
+ ret = sys_mseal((void *)addr_start, addr_end - addr_start);
+ FAIL_TEST_IF_FALSE(!ret);
+ ksft_print_msg("sealed: %lx-%lx %s %s\n",
+ addr_start, addr_end, prot, filename);
+ if ((uintptr_t) somestr >= addr_start &&
+ (uintptr_t) somestr <= addr_end)
+ ksft_print_msg("mapping for somestr found\n");
+ }
+ }
+ }
+ }
+ fclose(maps);
+
+ ret = sys_mprotect((void *)ptr, page_size, PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(ret < 0);
+ ksft_print_msg("somestr is sealed, mprotect is rejected\n");
+
+ TEST_END_CHECK();
+}
+
+int main(int argc, char **argv)
+{
+ bool test_seal = seal_support();
+
+ ksft_print_header();
+ ksft_print_msg("pid=%d\n", getpid());
+
+ if (!test_seal)
+ ksft_exit_skip("sealing not supported, check CONFIG_64BIT\n");
+
+ ksft_set_plan(1);
+
+ test_seal_elf();
+
+ ksft_finished();
+}
diff --git a/tools/testing/selftests/mm/soft-dirty.c b/tools/testing/selftests/mm/soft-dirty.c
index cc5f144430d4d2..bdfa5d085f0099 100644
--- a/tools/testing/selftests/mm/soft-dirty.c
+++ b/tools/testing/selftests/mm/soft-dirty.c
@@ -137,7 +137,7 @@ static void test_mprotect(int pagemap_fd, int pagesize, bool anon)
if (!map)
ksft_exit_fail_msg("anon mmap failed\n");
} else {
- test_fd = open(fname, O_RDWR | O_CREAT);
+ test_fd = open(fname, O_RDWR | O_CREAT, 0664);
if (test_fd < 0) {
ksft_test_result_skip("Test %s open() file failed\n", __func__);
return;
@@ -209,5 +209,5 @@ int main(int argc, char **argv)
close(pagemap_fd);
- return ksft_exit_pass();
+ ksft_finished();
}
diff --git a/tools/testing/selftests/mm/split_huge_page_test.c b/tools/testing/selftests/mm/split_huge_page_test.c
index 856662d2f87a1b..d3c7f5fb3e7b77 100644
--- a/tools/testing/selftests/mm/split_huge_page_test.c
+++ b/tools/testing/selftests/mm/split_huge_page_test.c
@@ -223,7 +223,7 @@ void split_file_backed_thp(void)
ksft_exit_fail_msg("Fail to create file-backed THP split testing file\n");
}
- fd = open(testfile, O_CREAT|O_WRONLY);
+ fd = open(testfile, O_CREAT|O_WRONLY, 0664);
if (fd == -1) {
ksft_perror("Cannot open testing file");
goto cleanup;
@@ -300,7 +300,7 @@ int create_pagecache_thp_and_fd(const char *testfile, size_t fd_size, int *fd,
char **addr)
{
size_t i;
- int dummy;
+ int __attribute__((unused)) dummy = 0;
srand(time(NULL));
diff --git a/tools/testing/selftests/mm/uffd-common.c b/tools/testing/selftests/mm/uffd-common.c
index b0ac0ec2356d65..7ad6ba660c7d6f 100644
--- a/tools/testing/selftests/mm/uffd-common.c
+++ b/tools/testing/selftests/mm/uffd-common.c
@@ -18,6 +18,7 @@ bool test_uffdio_wp = true;
unsigned long long *count_verify;
uffd_test_ops_t *uffd_test_ops;
uffd_test_case_ops_t *uffd_test_case_ops;
+atomic_bool ready_for_fork;
static int uffd_mem_fd_create(off_t mem_size, bool hugetlb)
{
@@ -518,6 +519,8 @@ void *uffd_poll_thread(void *arg)
pollfd[1].fd = pipefd[cpu*2];
pollfd[1].events = POLLIN;
+ ready_for_fork = true;
+
for (;;) {
ret = poll(pollfd, 2, -1);
if (ret <= 0) {
diff --git a/tools/testing/selftests/mm/uffd-common.h b/tools/testing/selftests/mm/uffd-common.h
index cb055282c89c96..cc5629c3d2aa10 100644
--- a/tools/testing/selftests/mm/uffd-common.h
+++ b/tools/testing/selftests/mm/uffd-common.h
@@ -32,6 +32,7 @@
#include <inttypes.h>
#include <stdint.h>
#include <sys/random.h>
+#include <stdatomic.h>
#include "../kselftest.h"
#include "vm_util.h"
@@ -103,6 +104,7 @@ extern bool map_shared;
extern bool test_uffdio_wp;
extern unsigned long long *count_verify;
extern volatile bool test_uffdio_copy_eexist;
+extern atomic_bool ready_for_fork;
extern uffd_test_ops_t anon_uffd_test_ops;
extern uffd_test_ops_t shmem_uffd_test_ops;
diff --git a/tools/testing/selftests/mm/uffd-unit-tests.c b/tools/testing/selftests/mm/uffd-unit-tests.c
index 2b9f8cc52639d1..21ec23206ab44a 100644
--- a/tools/testing/selftests/mm/uffd-unit-tests.c
+++ b/tools/testing/selftests/mm/uffd-unit-tests.c
@@ -775,6 +775,8 @@ static void uffd_sigbus_test_common(bool wp)
char c;
struct uffd_args args = { 0 };
+ ready_for_fork = false;
+
fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK);
if (uffd_register(uffd, area_dst, nr_pages * page_size,
@@ -790,6 +792,9 @@ static void uffd_sigbus_test_common(bool wp)
if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args))
err("uffd_poll_thread create");
+ while (!ready_for_fork)
+ ; /* Wait for the poll_thread to start executing before forking */
+
pid = fork();
if (pid < 0)
err("fork");
@@ -829,6 +834,8 @@ static void uffd_events_test_common(bool wp)
char c;
struct uffd_args args = { 0 };
+ ready_for_fork = false;
+
fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK);
if (uffd_register(uffd, area_dst, nr_pages * page_size,
true, wp, false))
@@ -838,6 +845,9 @@ static void uffd_events_test_common(bool wp)
if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args))
err("uffd_poll_thread create");
+ while (!ready_for_fork)
+ ; /* Wait for the poll_thread to start executing before forking */
+
pid = fork();
if (pid < 0)
err("fork");
@@ -1427,7 +1437,8 @@ uffd_test_case_t uffd_tests[] = {
.uffd_fn = uffd_sigbus_wp_test,
.mem_targets = MEM_ALL,
.uffd_feature_required = UFFD_FEATURE_SIGBUS |
- UFFD_FEATURE_EVENT_FORK | UFFD_FEATURE_PAGEFAULT_FLAG_WP,
+ UFFD_FEATURE_EVENT_FORK | UFFD_FEATURE_PAGEFAULT_FLAG_WP |
+ UFFD_FEATURE_WP_HUGETLBFS_SHMEM,
},
{
.name = "events",
diff --git a/tools/testing/selftests/mm/virtual_address_range.c b/tools/testing/selftests/mm/virtual_address_range.c
index 7bcf8d48256a66..4e4c1e311247ff 100644
--- a/tools/testing/selftests/mm/virtual_address_range.c
+++ b/tools/testing/selftests/mm/virtual_address_range.c
@@ -12,6 +12,8 @@
#include <errno.h>
#include <sys/mman.h>
#include <sys/time.h>
+#include <fcntl.h>
+
#include "../kselftest.h"
/*
@@ -85,7 +87,7 @@ static int validate_lower_address_hint(void)
char *ptr;
ptr = mmap((void *) (1UL << 45), MAP_CHUNK_SIZE, PROT_READ |
- PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (ptr == MAP_FAILED)
return 0;
@@ -93,6 +95,66 @@ static int validate_lower_address_hint(void)
return 1;
}
+static int validate_complete_va_space(void)
+{
+ unsigned long start_addr, end_addr, prev_end_addr;
+ char line[400];
+ char prot[6];
+ FILE *file;
+ int fd;
+
+ fd = open("va_dump", O_CREAT | O_WRONLY, 0600);
+ unlink("va_dump");
+ if (fd < 0) {
+ ksft_test_result_skip("cannot create or open dump file\n");
+ ksft_finished();
+ }
+
+ file = fopen("/proc/self/maps", "r");
+ if (file == NULL)
+ ksft_exit_fail_msg("cannot open /proc/self/maps\n");
+
+ prev_end_addr = 0;
+ while (fgets(line, sizeof(line), file)) {
+ unsigned long hop;
+
+ if (sscanf(line, "%lx-%lx %s[rwxp-]",
+ &start_addr, &end_addr, prot) != 3)
+ ksft_exit_fail_msg("cannot parse /proc/self/maps\n");
+
+ /* end of userspace mappings; ignore vsyscall mapping */
+ if (start_addr & (1UL << 63))
+ return 0;
+
+ /* /proc/self/maps must have gaps less than MAP_CHUNK_SIZE */
+ if (start_addr - prev_end_addr >= MAP_CHUNK_SIZE)
+ return 1;
+
+ prev_end_addr = end_addr;
+
+ if (prot[0] != 'r')
+ continue;
+
+ /*
+ * Confirm whether MAP_CHUNK_SIZE chunk can be found or not.
+ * If write succeeds, no need to check MAP_CHUNK_SIZE - 1
+ * addresses after that. If the address was not held by this
+ * process, write would fail with errno set to EFAULT.
+ * Anyways, if write returns anything apart from 1, exit the
+ * program since that would mean a bug in /proc/self/maps.
+ */
+ hop = 0;
+ while (start_addr + hop < end_addr) {
+ if (write(fd, (void *)(start_addr + hop), 1) != 1)
+ return 1;
+ lseek(fd, 0, SEEK_SET);
+
+ hop += MAP_CHUNK_SIZE;
+ }
+ }
+ return 0;
+}
+
int main(int argc, char *argv[])
{
char *ptr[NR_CHUNKS_LOW];
@@ -105,13 +167,11 @@ int main(int argc, char *argv[])
for (i = 0; i < NR_CHUNKS_LOW; i++) {
ptr[i] = mmap(NULL, MAP_CHUNK_SIZE, PROT_READ | PROT_WRITE,
- MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (ptr[i] == MAP_FAILED) {
- if (validate_lower_address_hint()) {
- ksft_test_result_skip("Memory constraint not fulfilled\n");
- ksft_finished();
- }
+ if (validate_lower_address_hint())
+ ksft_exit_fail_msg("mmap unexpectedly succeeded with hint\n");
break;
}
@@ -127,7 +187,7 @@ int main(int argc, char *argv[])
for (i = 0; i < NR_CHUNKS_HIGH; i++) {
hint = hind_addr();
hptr[i] = mmap(hint, MAP_CHUNK_SIZE, PROT_READ | PROT_WRITE,
- MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (hptr[i] == MAP_FAILED)
break;
@@ -135,6 +195,10 @@ int main(int argc, char *argv[])
validate_addr(hptr[i], 1);
}
hchunks = i;
+ if (validate_complete_va_space()) {
+ ksft_test_result_fail("BUG in mmap() or /proc/self/maps\n");
+ ksft_finished();
+ }
for (i = 0; i < lchunks; i++)
munmap(ptr[i], MAP_CHUNK_SIZE);
diff --git a/tools/testing/selftests/mm/vm_util.h b/tools/testing/selftests/mm/vm_util.h
index c02990bbd56f4c..9007c420d52c52 100644
--- a/tools/testing/selftests/mm/vm_util.h
+++ b/tools/testing/selftests/mm/vm_util.h
@@ -3,7 +3,7 @@
#include <stdbool.h>
#include <sys/mman.h>
#include <err.h>
-#include <string.h> /* ffsl() */
+#include <strings.h> /* ffsl() */
#include <unistd.h> /* _SC_PAGESIZE */
#define BIT_ULL(nr) (1ULL << (nr))
diff --git a/tools/testing/selftests/net/bind_wildcard.c b/tools/testing/selftests/net/bind_wildcard.c
index a2662348cdb1a2..b7b54d646b937c 100644
--- a/tools/testing/selftests/net/bind_wildcard.c
+++ b/tools/testing/selftests/net/bind_wildcard.c
@@ -6,7 +6,9 @@
#include "../kselftest_harness.h"
-struct in6_addr in6addr_v4mapped_any = {
+static const __u32 in4addr_any = INADDR_ANY;
+static const __u32 in4addr_loopback = INADDR_LOOPBACK;
+static const struct in6_addr in6addr_v4mapped_any = {
.s6_addr = {
0, 0, 0, 0,
0, 0, 0, 0,
@@ -14,8 +16,7 @@ struct in6_addr in6addr_v4mapped_any = {
0, 0, 0, 0
}
};
-
-struct in6_addr in6addr_v4mapped_loopback = {
+static const struct in6_addr in6addr_v4mapped_loopback = {
.s6_addr = {
0, 0, 0, 0,
0, 0, 0, 0,
@@ -24,137 +25,785 @@ struct in6_addr in6addr_v4mapped_loopback = {
}
};
+#define NR_SOCKETS 8
+
FIXTURE(bind_wildcard)
{
- struct sockaddr_in addr4;
- struct sockaddr_in6 addr6;
+ int fd[NR_SOCKETS];
+ socklen_t addrlen[NR_SOCKETS];
+ union {
+ struct sockaddr addr;
+ struct sockaddr_in addr4;
+ struct sockaddr_in6 addr6;
+ } addr[NR_SOCKETS];
};
FIXTURE_VARIANT(bind_wildcard)
{
- const __u32 addr4_const;
- const struct in6_addr *addr6_const;
- int expected_errno;
+ sa_family_t family[2];
+ const void *addr[2];
+ bool ipv6_only[2];
+
+ /* 6 bind() calls below follow two bind() for the defined 2 addresses:
+ *
+ * 0.0.0.0
+ * 127.0.0.1
+ * ::
+ * ::1
+ * ::ffff:0.0.0.0
+ * ::ffff:127.0.0.1
+ */
+ int expected_errno[NR_SOCKETS];
+ int expected_reuse_errno[NR_SOCKETS];
+};
+
+/* (IPv4, IPv4) */
+FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v4_local)
+{
+ .family = {AF_INET, AF_INET},
+ .addr = {&in4addr_any, &in4addr_loopback},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v4_any)
+{
+ .family = {AF_INET, AF_INET},
+ .addr = {&in4addr_loopback, &in4addr_any},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
};
+/* (IPv4, IPv6) */
FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_any)
{
- .addr4_const = INADDR_ANY,
- .addr6_const = &in6addr_any,
- .expected_errno = EADDRINUSE,
+ .family = {AF_INET, AF_INET6},
+ .addr = {&in4addr_any, &in6addr_any},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_any_only)
+{
+ .family = {AF_INET, AF_INET6},
+ .addr = {&in4addr_any, &in6addr_any},
+ .ipv6_only = {false, true},
+ .expected_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
};
FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_local)
{
- .addr4_const = INADDR_ANY,
- .addr6_const = &in6addr_loopback,
- .expected_errno = 0,
+ .family = {AF_INET, AF_INET6},
+ .addr = {&in4addr_any, &in6addr_loopback},
+ .expected_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
};
FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_v4mapped_any)
{
- .addr4_const = INADDR_ANY,
- .addr6_const = &in6addr_v4mapped_any,
- .expected_errno = EADDRINUSE,
+ .family = {AF_INET, AF_INET6},
+ .addr = {&in4addr_any, &in6addr_v4mapped_any},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
};
FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_v4mapped_local)
{
- .addr4_const = INADDR_ANY,
- .addr6_const = &in6addr_v4mapped_loopback,
- .expected_errno = EADDRINUSE,
+ .family = {AF_INET, AF_INET6},
+ .addr = {&in4addr_any, &in6addr_v4mapped_loopback},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
};
FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_any)
{
- .addr4_const = INADDR_LOOPBACK,
- .addr6_const = &in6addr_any,
- .expected_errno = EADDRINUSE,
+ .family = {AF_INET, AF_INET6},
+ .addr = {&in4addr_loopback, &in6addr_any},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_any_only)
+{
+ .family = {AF_INET, AF_INET6},
+ .addr = {&in4addr_loopback, &in6addr_any},
+ .ipv6_only = {false, true},
+ .expected_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
};
FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_local)
{
- .addr4_const = INADDR_LOOPBACK,
- .addr6_const = &in6addr_loopback,
- .expected_errno = 0,
+ .family = {AF_INET, AF_INET6},
+ .addr = {&in4addr_loopback, &in6addr_loopback},
+ .expected_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
};
FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_v4mapped_any)
{
- .addr4_const = INADDR_LOOPBACK,
- .addr6_const = &in6addr_v4mapped_any,
- .expected_errno = EADDRINUSE,
+ .family = {AF_INET, AF_INET6},
+ .addr = {&in4addr_loopback, &in6addr_v4mapped_any},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
};
FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_v4mapped_local)
{
- .addr4_const = INADDR_LOOPBACK,
- .addr6_const = &in6addr_v4mapped_loopback,
- .expected_errno = EADDRINUSE,
+ .family = {AF_INET, AF_INET6},
+ .addr = {&in4addr_loopback, &in6addr_v4mapped_loopback},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+};
+
+/* (IPv6, IPv4) */
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_v4_any)
+{
+ .family = {AF_INET6, AF_INET},
+ .addr = {&in6addr_any, &in4addr_any},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
};
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_only_v4_any)
+{
+ .family = {AF_INET6, AF_INET},
+ .addr = {&in6addr_any, &in4addr_any},
+ .ipv6_only = {true, false},
+ .expected_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_v4_local)
+{
+ .family = {AF_INET6, AF_INET},
+ .addr = {&in6addr_any, &in4addr_loopback},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_only_v4_local)
+{
+ .family = {AF_INET6, AF_INET},
+ .addr = {&in6addr_any, &in4addr_loopback},
+ .ipv6_only = {true, false},
+ .expected_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_local_v4_any)
+{
+ .family = {AF_INET6, AF_INET},
+ .addr = {&in6addr_loopback, &in4addr_any},
+ .expected_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_local_v4_local)
+{
+ .family = {AF_INET6, AF_INET},
+ .addr = {&in6addr_loopback, &in4addr_loopback},
+ .expected_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_any_v4_any)
+{
+ .family = {AF_INET6, AF_INET},
+ .addr = {&in6addr_v4mapped_any, &in4addr_any},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_any_v4_local)
+{
+ .family = {AF_INET6, AF_INET},
+ .addr = {&in6addr_v4mapped_any, &in4addr_loopback},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_local_v4_any)
+{
+ .family = {AF_INET6, AF_INET},
+ .addr = {&in6addr_v4mapped_loopback, &in4addr_any},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_local_v4_local)
+{
+ .family = {AF_INET6, AF_INET},
+ .addr = {&in6addr_v4mapped_loopback, &in4addr_loopback},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+};
+
+/* (IPv6, IPv6) */
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_v6_any)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_any, &in6addr_any},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_only_v6_any)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_any, &in6addr_any},
+ .ipv6_only = {true, false},
+ .expected_errno = {0, EADDRINUSE,
+ 0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_v6_any_only)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_any, &in6addr_any},
+ .ipv6_only = {false, true},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_only_v6_any_only)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_any, &in6addr_any},
+ .ipv6_only = {true, true},
+ .expected_errno = {0, EADDRINUSE,
+ 0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ 0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_v6_local)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_any, &in6addr_loopback},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_only_v6_local)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_any, &in6addr_loopback},
+ .ipv6_only = {true, false},
+ .expected_errno = {0, EADDRINUSE,
+ 0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ 0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_v6_v4mapped_any)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_any, &in6addr_v4mapped_any},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_only_v6_v4mapped_any)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_any, &in6addr_v4mapped_any},
+ .ipv6_only = {true, false},
+ .expected_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_v6_v4mapped_local)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_any, &in6addr_v4mapped_loopback},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_only_v6_v4mapped_local)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_any, &in6addr_v4mapped_loopback},
+ .ipv6_only = {true, false},
+ .expected_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_local_v6_any)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_loopback, &in6addr_any},
+ .expected_errno = {0, EADDRINUSE,
+ 0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_local_v6_any_only)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_loopback, &in6addr_any},
+ .ipv6_only = {false, true},
+ .expected_errno = {0, EADDRINUSE,
+ 0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ 0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_local_v6_v4mapped_any)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_loopback, &in6addr_v4mapped_any},
+ .expected_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_local_v6_v4mapped_local)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_loopback, &in6addr_v4mapped_loopback},
+ .expected_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_any_v6_any)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_v4mapped_any, &in6addr_any},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_any_v6_any_only)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_v4mapped_any, &in6addr_any},
+ .ipv6_only = {false, true},
+ .expected_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_any_v6_local)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_v4mapped_any, &in6addr_loopback},
+ .expected_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_any_v6_v4mapped_local)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_v4mapped_any, &in6addr_v4mapped_loopback},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_loopback_v6_any)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_v4mapped_loopback, &in6addr_any},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_loopback_v6_any_only)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_v4mapped_loopback, &in6addr_any},
+ .ipv6_only = {false, true},
+ .expected_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_loopback_v6_local)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_v4mapped_loopback, &in6addr_loopback},
+ .expected_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_loopback_v6_v4mapped_any)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_v4mapped_loopback, &in6addr_v4mapped_any},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+};
+
+static void setup_addr(FIXTURE_DATA(bind_wildcard) *self, int i,
+ int family, const void *addr_const)
+{
+ if (family == AF_INET) {
+ struct sockaddr_in *addr4 = &self->addr[i].addr4;
+ const __u32 *addr4_const = addr_const;
+
+ addr4->sin_family = AF_INET;
+ addr4->sin_port = htons(0);
+ addr4->sin_addr.s_addr = htonl(*addr4_const);
+
+ self->addrlen[i] = sizeof(struct sockaddr_in);
+ } else {
+ struct sockaddr_in6 *addr6 = &self->addr[i].addr6;
+ const struct in6_addr *addr6_const = addr_const;
+
+ addr6->sin6_family = AF_INET6;
+ addr6->sin6_port = htons(0);
+ addr6->sin6_addr = *addr6_const;
+
+ self->addrlen[i] = sizeof(struct sockaddr_in6);
+ }
+}
+
FIXTURE_SETUP(bind_wildcard)
{
- self->addr4.sin_family = AF_INET;
- self->addr4.sin_port = htons(0);
- self->addr4.sin_addr.s_addr = htonl(variant->addr4_const);
+ setup_addr(self, 0, variant->family[0], variant->addr[0]);
+ setup_addr(self, 1, variant->family[1], variant->addr[1]);
+
+ setup_addr(self, 2, AF_INET, &in4addr_any);
+ setup_addr(self, 3, AF_INET, &in4addr_loopback);
- self->addr6.sin6_family = AF_INET6;
- self->addr6.sin6_port = htons(0);
- self->addr6.sin6_addr = *variant->addr6_const;
+ setup_addr(self, 4, AF_INET6, &in6addr_any);
+ setup_addr(self, 5, AF_INET6, &in6addr_loopback);
+ setup_addr(self, 6, AF_INET6, &in6addr_v4mapped_any);
+ setup_addr(self, 7, AF_INET6, &in6addr_v4mapped_loopback);
}
FIXTURE_TEARDOWN(bind_wildcard)
{
+ int i;
+
+ for (i = 0; i < NR_SOCKETS; i++)
+ close(self->fd[i]);
}
-void bind_sockets(struct __test_metadata *_metadata,
- FIXTURE_DATA(bind_wildcard) *self,
- int expected_errno,
- struct sockaddr *addr1, socklen_t addrlen1,
- struct sockaddr *addr2, socklen_t addrlen2)
+void bind_socket(struct __test_metadata *_metadata,
+ FIXTURE_DATA(bind_wildcard) *self,
+ const FIXTURE_VARIANT(bind_wildcard) *variant,
+ int i, int reuse)
{
- int fd[2];
int ret;
- fd[0] = socket(addr1->sa_family, SOCK_STREAM, 0);
- ASSERT_GT(fd[0], 0);
+ self->fd[i] = socket(self->addr[i].addr.sa_family, SOCK_STREAM, 0);
+ ASSERT_GT(self->fd[i], 0);
- ret = bind(fd[0], addr1, addrlen1);
- ASSERT_EQ(ret, 0);
+ if (i < 2 && variant->ipv6_only[i]) {
+ ret = setsockopt(self->fd[i], SOL_IPV6, IPV6_V6ONLY, &(int){1}, sizeof(int));
+ ASSERT_EQ(ret, 0);
+ }
- ret = getsockname(fd[0], addr1, &addrlen1);
- ASSERT_EQ(ret, 0);
+ if (i < 2 && reuse) {
+ ret = setsockopt(self->fd[i], SOL_SOCKET, reuse, &(int){1}, sizeof(int));
+ ASSERT_EQ(ret, 0);
+ }
- ((struct sockaddr_in *)addr2)->sin_port = ((struct sockaddr_in *)addr1)->sin_port;
+ self->addr[i].addr4.sin_port = self->addr[0].addr4.sin_port;
- fd[1] = socket(addr2->sa_family, SOCK_STREAM, 0);
- ASSERT_GT(fd[1], 0);
+ ret = bind(self->fd[i], &self->addr[i].addr, self->addrlen[i]);
- ret = bind(fd[1], addr2, addrlen2);
- if (expected_errno) {
- ASSERT_EQ(ret, -1);
- ASSERT_EQ(errno, expected_errno);
+ if (reuse) {
+ if (variant->expected_reuse_errno[i]) {
+ ASSERT_EQ(ret, -1);
+ ASSERT_EQ(errno, variant->expected_reuse_errno[i]);
+ } else {
+ ASSERT_EQ(ret, 0);
+ }
} else {
+ if (variant->expected_errno[i]) {
+ ASSERT_EQ(ret, -1);
+ ASSERT_EQ(errno, variant->expected_errno[i]);
+ } else {
+ ASSERT_EQ(ret, 0);
+ }
+ }
+
+ if (i == 0) {
+ ret = getsockname(self->fd[0], &self->addr[0].addr, &self->addrlen[0]);
ASSERT_EQ(ret, 0);
}
+}
- close(fd[1]);
- close(fd[0]);
+TEST_F(bind_wildcard, plain)
+{
+ int i;
+
+ for (i = 0; i < NR_SOCKETS; i++)
+ bind_socket(_metadata, self, variant, i, 0);
}
-TEST_F(bind_wildcard, v4_v6)
+TEST_F(bind_wildcard, reuseaddr)
{
- bind_sockets(_metadata, self, variant->expected_errno,
- (struct sockaddr *)&self->addr4, sizeof(self->addr4),
- (struct sockaddr *)&self->addr6, sizeof(self->addr6));
+ int i;
+
+ for (i = 0; i < NR_SOCKETS; i++)
+ bind_socket(_metadata, self, variant, i, SO_REUSEADDR);
}
-TEST_F(bind_wildcard, v6_v4)
+TEST_F(bind_wildcard, reuseport)
{
- bind_sockets(_metadata, self, variant->expected_errno,
- (struct sockaddr *)&self->addr6, sizeof(self->addr6),
- (struct sockaddr *)&self->addr4, sizeof(self->addr4));
+ int i;
+
+ for (i = 0; i < NR_SOCKETS; i++)
+ bind_socket(_metadata, self, variant, i, SO_REUSEPORT);
}
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index 4c424855482629..4131f3263a4826 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -383,12 +383,14 @@ do_transfer()
local stat_cookierx_last
local stat_csum_err_s
local stat_csum_err_c
+ local stat_tcpfb_last_l
stat_synrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX")
stat_ackrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
stat_cookietx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent")
stat_cookierx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv")
stat_csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr")
stat_csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr")
+ stat_tcpfb_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableFallbackACK")
timeout ${timeout_test} \
ip netns exec ${listener_ns} \
@@ -457,11 +459,13 @@ do_transfer()
local stat_cookietx_now
local stat_cookierx_now
local stat_ooo_now
+ local stat_tcpfb_now_l
stat_synrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX")
stat_ackrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
stat_cookietx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent")
stat_cookierx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv")
stat_ooo_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtTCPOFOQueue")
+ stat_tcpfb_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableFallbackACK")
expect_synrx=$((stat_synrx_last_l))
expect_ackrx=$((stat_ackrx_last_l))
@@ -508,6 +512,11 @@ do_transfer()
fi
fi
+ if [ ${stat_ooo_now} -eq 0 ] && [ ${stat_tcpfb_last_l} -ne ${stat_tcpfb_now_l} ]; then
+ mptcp_lib_pr_fail "unexpected fallback to TCP"
+ rets=1
+ fi
+
if [ $cookies -eq 2 ];then
if [ $stat_cookietx_last -ge $stat_cookietx_now ] ;then
extra+=" WARN: CookieSent: did not advance"
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 5e9211e8982568..e4403236f65548 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -729,7 +729,7 @@ pm_nl_check_endpoint()
[ -n "$_flags" ]; flags="flags $_flags"
shift
elif [ $1 = "dev" ]; then
- [ -n "$2" ]; dev="dev $1"
+ [ -n "$2" ]; dev="dev $2"
shift
elif [ $1 = "id" ]; then
_id=$2
@@ -3610,6 +3610,8 @@ endpoint_tests()
local tests_pid=$!
wait_mpj $ns2
+ pm_nl_check_endpoint "creation" \
+ $ns2 10.0.2.2 id 2 flags subflow dev ns2eth2
chk_subflow_nr "before delete" 2
chk_mptcp_info subflows 1 subflows 1
diff --git a/tools/testing/selftests/net/reuseaddr_conflict.c b/tools/testing/selftests/net/reuseaddr_conflict.c
index 7c5b12664b03b0..bfb07dc495186d 100644
--- a/tools/testing/selftests/net/reuseaddr_conflict.c
+++ b/tools/testing/selftests/net/reuseaddr_conflict.c
@@ -109,6 +109,6 @@ int main(void)
fd1 = open_port(0, 1);
if (fd1 >= 0)
error(1, 0, "Was allowed to create an ipv4 reuseport on an already bound non-reuseport socket with no ipv6");
- fprintf(stderr, "Success");
+ fprintf(stderr, "Success\n");
return 0;
}
diff --git a/tools/testing/selftests/net/tcp_ao/lib/proc.c b/tools/testing/selftests/net/tcp_ao/lib/proc.c
index 2fb6dd8adba694..8b984fa042869e 100644
--- a/tools/testing/selftests/net/tcp_ao/lib/proc.c
+++ b/tools/testing/selftests/net/tcp_ao/lib/proc.c
@@ -86,7 +86,7 @@ static void netstat_read_type(FILE *fnetstat, struct netstat **dest, char *line)
pos = strchr(line, ' ') + 1;
- if (fscanf(fnetstat, type->header_name) == EOF)
+ if (fscanf(fnetstat, "%[^ :]", type->header_name) == EOF)
test_error("fscanf(%s)", type->header_name);
if (fread(&tmp, 1, 1, fnetstat) != 1 || tmp != ':')
test_error("Unexpected netstat format (%c)", tmp);
diff --git a/tools/testing/selftests/net/tcp_ao/lib/setup.c b/tools/testing/selftests/net/tcp_ao/lib/setup.c
index 92276f916f2f30..e408b9243b2c5a 100644
--- a/tools/testing/selftests/net/tcp_ao/lib/setup.c
+++ b/tools/testing/selftests/net/tcp_ao/lib/setup.c
@@ -17,37 +17,37 @@ static pthread_mutex_t ksft_print_lock = PTHREAD_MUTEX_INITIALIZER;
void __test_msg(const char *buf)
{
pthread_mutex_lock(&ksft_print_lock);
- ksft_print_msg(buf);
+ ksft_print_msg("%s", buf);
pthread_mutex_unlock(&ksft_print_lock);
}
void __test_ok(const char *buf)
{
pthread_mutex_lock(&ksft_print_lock);
- ksft_test_result_pass(buf);
+ ksft_test_result_pass("%s", buf);
pthread_mutex_unlock(&ksft_print_lock);
}
void __test_fail(const char *buf)
{
pthread_mutex_lock(&ksft_print_lock);
- ksft_test_result_fail(buf);
+ ksft_test_result_fail("%s", buf);
pthread_mutex_unlock(&ksft_print_lock);
}
void __test_xfail(const char *buf)
{
pthread_mutex_lock(&ksft_print_lock);
- ksft_test_result_xfail(buf);
+ ksft_test_result_xfail("%s", buf);
pthread_mutex_unlock(&ksft_print_lock);
}
void __test_error(const char *buf)
{
pthread_mutex_lock(&ksft_print_lock);
- ksft_test_result_error(buf);
+ ksft_test_result_error("%s", buf);
pthread_mutex_unlock(&ksft_print_lock);
}
void __test_skip(const char *buf)
{
pthread_mutex_lock(&ksft_print_lock);
- ksft_test_result_skip(buf);
+ ksft_test_result_skip("%s", buf);
pthread_mutex_unlock(&ksft_print_lock);
}
diff --git a/tools/testing/selftests/net/tcp_ao/rst.c b/tools/testing/selftests/net/tcp_ao/rst.c
index 7df8b8700e39e9..a2fe88d35ac06e 100644
--- a/tools/testing/selftests/net/tcp_ao/rst.c
+++ b/tools/testing/selftests/net/tcp_ao/rst.c
@@ -256,8 +256,6 @@ static int test_wait_fds(int sk[], size_t nr, bool is_writable[],
static void test_client_active_rst(unsigned int port)
{
- /* one in queue, another accept()ed */
- unsigned int wait_for = backlog + 2;
int i, sk[3], err;
bool is_writable[ARRAY_SIZE(sk)] = {false};
unsigned int last = ARRAY_SIZE(sk) - 1;
@@ -275,16 +273,20 @@ static void test_client_active_rst(unsigned int port)
for (i = 0; i < last; i++) {
err = _test_connect_socket(sk[i], this_ip_dest, port,
(i == 0) ? TEST_TIMEOUT_SEC : -1);
-
if (err < 0)
test_error("failed to connect()");
}
- synchronize_threads(); /* 2: connection accept()ed, another queued */
- err = test_wait_fds(sk, last, is_writable, wait_for, TEST_TIMEOUT_SEC);
+ synchronize_threads(); /* 2: two connections: one accept()ed, another queued */
+ err = test_wait_fds(sk, last, is_writable, last, TEST_TIMEOUT_SEC);
if (err < 0)
test_error("test_wait_fds(): %d", err);
+ /* async connect() with third sk to get into request_sock_queue */
+ err = _test_connect_socket(sk[last], this_ip_dest, port, -1);
+ if (err < 0)
+ test_error("failed to connect()");
+
synchronize_threads(); /* 3: close listen socket */
if (test_client_verify(sk[0], packet_sz, quota / packet_sz, TEST_TIMEOUT_SEC))
test_fail("Failed to send data on connected socket");
@@ -292,13 +294,14 @@ static void test_client_active_rst(unsigned int port)
test_ok("Verified established tcp connection");
synchronize_threads(); /* 4: finishing up */
- err = _test_connect_socket(sk[last], this_ip_dest, port, -1);
- if (err < 0)
- test_error("failed to connect()");
synchronize_threads(); /* 5: closed active sk */
- err = test_wait_fds(sk, ARRAY_SIZE(sk), NULL,
- wait_for, TEST_TIMEOUT_SEC);
+ /*
+ * Wait for 2 connections: one accepted, another in the accept queue,
+ * the one in request_sock_queue won't get fully established, so
+ * doesn't receive an active RST, see inet_csk_listen_stop().
+ */
+ err = test_wait_fds(sk, last, NULL, last, TEST_TIMEOUT_SEC);
if (err < 0)
test_error("select(): %d", err);
diff --git a/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c b/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c
index 452de131fa3a9c..517930f9721bd9 100644
--- a/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c
+++ b/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c
@@ -21,7 +21,7 @@ static void make_listen(int sk)
static void test_vefify_ao_info(int sk, struct tcp_ao_info_opt *info,
const char *tst)
{
- struct tcp_ao_info_opt tmp;
+ struct tcp_ao_info_opt tmp = {};
socklen_t len = sizeof(tmp);
if (getsockopt(sk, IPPROTO_TCP, TCP_AO_INFO, &tmp, &len))
diff --git a/tools/testing/selftests/net/test_vxlan_mdb.sh b/tools/testing/selftests/net/test_vxlan_mdb.sh
index 74ff9fb2a6f0e1..58da5de99ac451 100755
--- a/tools/testing/selftests/net/test_vxlan_mdb.sh
+++ b/tools/testing/selftests/net/test_vxlan_mdb.sh
@@ -1177,6 +1177,7 @@ encap_params_common()
local plen=$1; shift
local enc_ethtype=$1; shift
local grp=$1; shift
+ local grp_dmac=$1; shift
local src=$1; shift
local mz=$1; shift
@@ -1195,11 +1196,11 @@ encap_params_common()
run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep2_ip src_vni 10020"
run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_dst_ip $vtep1_ip action pass"
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Destination IP - match"
- run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.20 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Destination IP - no match"
@@ -1212,20 +1213,20 @@ encap_params_common()
run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip dst_port 1111 src_vni 10020"
run_cmd "tc -n $ns2 filter replace dev veth0 ingress pref 1 handle 101 proto $enc_ethtype flower ip_proto udp dst_port 4789 action pass"
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev veth0 ingress" 101 1
log_test $? 0 "Default destination port - match"
- run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.20 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev veth0 ingress" 101 1
log_test $? 0 "Default destination port - no match"
run_cmd "tc -n $ns2 filter replace dev veth0 ingress pref 1 handle 101 proto $enc_ethtype flower ip_proto udp dst_port 1111 action pass"
- run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.20 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev veth0 ingress" 101 1
log_test $? 0 "Non-default destination port - match"
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev veth0 ingress" 101 1
log_test $? 0 "Non-default destination port - no match"
@@ -1238,11 +1239,11 @@ encap_params_common()
run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip src_vni 10020"
run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_key_id 10010 action pass"
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Default destination VNI - match"
- run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.20 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Default destination VNI - no match"
@@ -1250,11 +1251,11 @@ encap_params_common()
run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip vni 10010 src_vni 10020"
run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_key_id 10020 action pass"
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Non-default destination VNI - match"
- run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.20 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Non-default destination VNI - no match"
@@ -1272,6 +1273,7 @@ encap_params_ipv4_ipv4()
local plen=32
local enc_ethtype="ip"
local grp=239.1.1.1
+ local grp_dmac=01:00:5e:01:01:01
local src=192.0.2.129
echo
@@ -1279,7 +1281,7 @@ encap_params_ipv4_ipv4()
echo "------------------------------------------------------------------"
encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \
- $grp $src "mausezahn"
+ $grp $grp_dmac $src "mausezahn"
}
encap_params_ipv6_ipv4()
@@ -1291,6 +1293,7 @@ encap_params_ipv6_ipv4()
local plen=32
local enc_ethtype="ip"
local grp=ff0e::1
+ local grp_dmac=33:33:00:00:00:01
local src=2001:db8:100::1
echo
@@ -1298,7 +1301,7 @@ encap_params_ipv6_ipv4()
echo "------------------------------------------------------------------"
encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \
- $grp $src "mausezahn -6"
+ $grp $grp_dmac $src "mausezahn -6"
}
encap_params_ipv4_ipv6()
@@ -1310,6 +1313,7 @@ encap_params_ipv4_ipv6()
local plen=128
local enc_ethtype="ipv6"
local grp=239.1.1.1
+ local grp_dmac=01:00:5e:01:01:01
local src=192.0.2.129
echo
@@ -1317,7 +1321,7 @@ encap_params_ipv4_ipv6()
echo "------------------------------------------------------------------"
encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \
- $grp $src "mausezahn"
+ $grp $grp_dmac $src "mausezahn"
}
encap_params_ipv6_ipv6()
@@ -1329,6 +1333,7 @@ encap_params_ipv6_ipv6()
local plen=128
local enc_ethtype="ipv6"
local grp=ff0e::1
+ local grp_dmac=33:33:00:00:00:01
local src=2001:db8:100::1
echo
@@ -1336,7 +1341,7 @@ encap_params_ipv6_ipv6()
echo "------------------------------------------------------------------"
encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \
- $grp $src "mausezahn -6"
+ $grp $grp_dmac $src "mausezahn -6"
}
starg_exclude_ir_common()
@@ -1347,6 +1352,7 @@ starg_exclude_ir_common()
local vtep2_ip=$1; shift
local plen=$1; shift
local grp=$1; shift
+ local grp_dmac=$1; shift
local valid_src=$1; shift
local invalid_src=$1; shift
local mz=$1; shift
@@ -1368,14 +1374,14 @@ starg_exclude_ir_common()
run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $invalid_src dst $vtep2_ip src_vni 10010"
# Check that invalid source is not forwarded to any VTEP.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 0
log_test $? 0 "Block excluded source - first VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 102 0
log_test $? 0 "Block excluded source - second VTEP"
# Check that valid source is forwarded to both VTEPs.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Forward valid source - first VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 102 1
@@ -1385,14 +1391,14 @@ starg_exclude_ir_common()
run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep2_ip src_vni 10010"
# Check that invalid source is not forwarded to any VTEP.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Block excluded source after removal - first VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 102 1
log_test $? 0 "Block excluded source after removal - second VTEP"
# Check that valid source is forwarded to the remaining VTEP.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 2
log_test $? 0 "Forward valid source after removal - first VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 102 1
@@ -1407,6 +1413,7 @@ starg_exclude_ir_ipv4_ipv4()
local vtep2_ip=198.51.100.200
local plen=32
local grp=239.1.1.1
+ local grp_dmac=01:00:5e:01:01:01
local valid_src=192.0.2.129
local invalid_src=192.0.2.145
@@ -1415,7 +1422,7 @@ starg_exclude_ir_ipv4_ipv4()
echo "-------------------------------------------------------------"
starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
- $valid_src $invalid_src "mausezahn"
+ $grp_dmac $valid_src $invalid_src "mausezahn"
}
starg_exclude_ir_ipv6_ipv4()
@@ -1426,6 +1433,7 @@ starg_exclude_ir_ipv6_ipv4()
local vtep2_ip=198.51.100.200
local plen=32
local grp=ff0e::1
+ local grp_dmac=33:33:00:00:00:01
local valid_src=2001:db8:100::1
local invalid_src=2001:db8:200::1
@@ -1434,7 +1442,7 @@ starg_exclude_ir_ipv6_ipv4()
echo "-------------------------------------------------------------"
starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
- $valid_src $invalid_src "mausezahn -6"
+ $grp_dmac $valid_src $invalid_src "mausezahn -6"
}
starg_exclude_ir_ipv4_ipv6()
@@ -1445,6 +1453,7 @@ starg_exclude_ir_ipv4_ipv6()
local vtep2_ip=2001:db8:2000::1
local plen=128
local grp=239.1.1.1
+ local grp_dmac=01:00:5e:01:01:01
local valid_src=192.0.2.129
local invalid_src=192.0.2.145
@@ -1453,7 +1462,7 @@ starg_exclude_ir_ipv4_ipv6()
echo "-------------------------------------------------------------"
starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
- $valid_src $invalid_src "mausezahn"
+ $grp_dmac $valid_src $invalid_src "mausezahn"
}
starg_exclude_ir_ipv6_ipv6()
@@ -1464,6 +1473,7 @@ starg_exclude_ir_ipv6_ipv6()
local vtep2_ip=2001:db8:2000::1
local plen=128
local grp=ff0e::1
+ local grp_dmac=33:33:00:00:00:01
local valid_src=2001:db8:100::1
local invalid_src=2001:db8:200::1
@@ -1472,7 +1482,7 @@ starg_exclude_ir_ipv6_ipv6()
echo "-------------------------------------------------------------"
starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
- $valid_src $invalid_src "mausezahn -6"
+ $grp_dmac $valid_src $invalid_src "mausezahn -6"
}
starg_include_ir_common()
@@ -1483,6 +1493,7 @@ starg_include_ir_common()
local vtep2_ip=$1; shift
local plen=$1; shift
local grp=$1; shift
+ local grp_dmac=$1; shift
local valid_src=$1; shift
local invalid_src=$1; shift
local mz=$1; shift
@@ -1504,14 +1515,14 @@ starg_include_ir_common()
run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode include source_list $valid_src dst $vtep2_ip src_vni 10010"
# Check that invalid source is not forwarded to any VTEP.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 0
log_test $? 0 "Block excluded source - first VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 102 0
log_test $? 0 "Block excluded source - second VTEP"
# Check that valid source is forwarded to both VTEPs.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Forward valid source - first VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 102 1
@@ -1521,14 +1532,14 @@ starg_include_ir_common()
run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep2_ip src_vni 10010"
# Check that invalid source is not forwarded to any VTEP.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Block excluded source after removal - first VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 102 1
log_test $? 0 "Block excluded source after removal - second VTEP"
# Check that valid source is forwarded to the remaining VTEP.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 2
log_test $? 0 "Forward valid source after removal - first VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 102 1
@@ -1543,6 +1554,7 @@ starg_include_ir_ipv4_ipv4()
local vtep2_ip=198.51.100.200
local plen=32
local grp=239.1.1.1
+ local grp_dmac=01:00:5e:01:01:01
local valid_src=192.0.2.129
local invalid_src=192.0.2.145
@@ -1551,7 +1563,7 @@ starg_include_ir_ipv4_ipv4()
echo "-------------------------------------------------------------"
starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
- $valid_src $invalid_src "mausezahn"
+ $grp_dmac $valid_src $invalid_src "mausezahn"
}
starg_include_ir_ipv6_ipv4()
@@ -1562,6 +1574,7 @@ starg_include_ir_ipv6_ipv4()
local vtep2_ip=198.51.100.200
local plen=32
local grp=ff0e::1
+ local grp_dmac=33:33:00:00:00:01
local valid_src=2001:db8:100::1
local invalid_src=2001:db8:200::1
@@ -1570,7 +1583,7 @@ starg_include_ir_ipv6_ipv4()
echo "-------------------------------------------------------------"
starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
- $valid_src $invalid_src "mausezahn -6"
+ $grp_dmac $valid_src $invalid_src "mausezahn -6"
}
starg_include_ir_ipv4_ipv6()
@@ -1581,6 +1594,7 @@ starg_include_ir_ipv4_ipv6()
local vtep2_ip=2001:db8:2000::1
local plen=128
local grp=239.1.1.1
+ local grp_dmac=01:00:5e:01:01:01
local valid_src=192.0.2.129
local invalid_src=192.0.2.145
@@ -1589,7 +1603,7 @@ starg_include_ir_ipv4_ipv6()
echo "-------------------------------------------------------------"
starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
- $valid_src $invalid_src "mausezahn"
+ $grp_dmac $valid_src $invalid_src "mausezahn"
}
starg_include_ir_ipv6_ipv6()
@@ -1600,6 +1614,7 @@ starg_include_ir_ipv6_ipv6()
local vtep2_ip=2001:db8:2000::1
local plen=128
local grp=ff0e::1
+ local grp_dmac=33:33:00:00:00:01
local valid_src=2001:db8:100::1
local invalid_src=2001:db8:200::1
@@ -1608,7 +1623,7 @@ starg_include_ir_ipv6_ipv6()
echo "-------------------------------------------------------------"
starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
- $valid_src $invalid_src "mausezahn -6"
+ $grp_dmac $valid_src $invalid_src "mausezahn -6"
}
starg_exclude_p2mp_common()
@@ -1618,6 +1633,7 @@ starg_exclude_p2mp_common()
local mcast_grp=$1; shift
local plen=$1; shift
local grp=$1; shift
+ local grp_dmac=$1; shift
local valid_src=$1; shift
local invalid_src=$1; shift
local mz=$1; shift
@@ -1635,12 +1651,12 @@ starg_exclude_p2mp_common()
run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $invalid_src dst $mcast_grp src_vni 10010 via veth0"
# Check that invalid source is not forwarded.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 0
log_test $? 0 "Block excluded source"
# Check that valid source is forwarded.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Forward valid source"
@@ -1648,7 +1664,7 @@ starg_exclude_p2mp_common()
run_cmd "ip -n $ns2 address del $mcast_grp/$plen dev veth0"
# Check that valid source is not received anymore.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Receive of valid source after removal from group"
}
@@ -1660,6 +1676,7 @@ starg_exclude_p2mp_ipv4_ipv4()
local mcast_grp=238.1.1.1
local plen=32
local grp=239.1.1.1
+ local grp_dmac=01:00:5e:01:01:01
local valid_src=192.0.2.129
local invalid_src=192.0.2.145
@@ -1667,7 +1684,7 @@ starg_exclude_p2mp_ipv4_ipv4()
echo "Data path: (*, G) EXCLUDE - P2MP - IPv4 overlay / IPv4 underlay"
echo "---------------------------------------------------------------"
- starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
+ starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \
$valid_src $invalid_src "mausezahn"
}
@@ -1678,6 +1695,7 @@ starg_exclude_p2mp_ipv6_ipv4()
local mcast_grp=238.1.1.1
local plen=32
local grp=ff0e::1
+ local grp_dmac=33:33:00:00:00:01
local valid_src=2001:db8:100::1
local invalid_src=2001:db8:200::1
@@ -1685,7 +1703,7 @@ starg_exclude_p2mp_ipv6_ipv4()
echo "Data path: (*, G) EXCLUDE - P2MP - IPv6 overlay / IPv4 underlay"
echo "---------------------------------------------------------------"
- starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
+ starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \
$valid_src $invalid_src "mausezahn -6"
}
@@ -1696,6 +1714,7 @@ starg_exclude_p2mp_ipv4_ipv6()
local mcast_grp=ff0e::2
local plen=128
local grp=239.1.1.1
+ local grp_dmac=01:00:5e:01:01:01
local valid_src=192.0.2.129
local invalid_src=192.0.2.145
@@ -1703,7 +1722,7 @@ starg_exclude_p2mp_ipv4_ipv6()
echo "Data path: (*, G) EXCLUDE - P2MP - IPv4 overlay / IPv6 underlay"
echo "---------------------------------------------------------------"
- starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
+ starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \
$valid_src $invalid_src "mausezahn"
}
@@ -1714,6 +1733,7 @@ starg_exclude_p2mp_ipv6_ipv6()
local mcast_grp=ff0e::2
local plen=128
local grp=ff0e::1
+ local grp_dmac=33:33:00:00:00:01
local valid_src=2001:db8:100::1
local invalid_src=2001:db8:200::1
@@ -1721,7 +1741,7 @@ starg_exclude_p2mp_ipv6_ipv6()
echo "Data path: (*, G) EXCLUDE - P2MP - IPv6 overlay / IPv6 underlay"
echo "---------------------------------------------------------------"
- starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
+ starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \
$valid_src $invalid_src "mausezahn -6"
}
@@ -1732,6 +1752,7 @@ starg_include_p2mp_common()
local mcast_grp=$1; shift
local plen=$1; shift
local grp=$1; shift
+ local grp_dmac=$1; shift
local valid_src=$1; shift
local invalid_src=$1; shift
local mz=$1; shift
@@ -1749,12 +1770,12 @@ starg_include_p2mp_common()
run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode include source_list $valid_src dst $mcast_grp src_vni 10010 via veth0"
# Check that invalid source is not forwarded.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 0
log_test $? 0 "Block excluded source"
# Check that valid source is forwarded.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Forward valid source"
@@ -1762,7 +1783,7 @@ starg_include_p2mp_common()
run_cmd "ip -n $ns2 address del $mcast_grp/$plen dev veth0"
# Check that valid source is not received anymore.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Receive of valid source after removal from group"
}
@@ -1774,6 +1795,7 @@ starg_include_p2mp_ipv4_ipv4()
local mcast_grp=238.1.1.1
local plen=32
local grp=239.1.1.1
+ local grp_dmac=01:00:5e:01:01:01
local valid_src=192.0.2.129
local invalid_src=192.0.2.145
@@ -1781,7 +1803,7 @@ starg_include_p2mp_ipv4_ipv4()
echo "Data path: (*, G) INCLUDE - P2MP - IPv4 overlay / IPv4 underlay"
echo "---------------------------------------------------------------"
- starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
+ starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \
$valid_src $invalid_src "mausezahn"
}
@@ -1792,6 +1814,7 @@ starg_include_p2mp_ipv6_ipv4()
local mcast_grp=238.1.1.1
local plen=32
local grp=ff0e::1
+ local grp_dmac=33:33:00:00:00:01
local valid_src=2001:db8:100::1
local invalid_src=2001:db8:200::1
@@ -1799,7 +1822,7 @@ starg_include_p2mp_ipv6_ipv4()
echo "Data path: (*, G) INCLUDE - P2MP - IPv6 overlay / IPv4 underlay"
echo "---------------------------------------------------------------"
- starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
+ starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \
$valid_src $invalid_src "mausezahn -6"
}
@@ -1810,6 +1833,7 @@ starg_include_p2mp_ipv4_ipv6()
local mcast_grp=ff0e::2
local plen=128
local grp=239.1.1.1
+ local grp_dmac=01:00:5e:01:01:01
local valid_src=192.0.2.129
local invalid_src=192.0.2.145
@@ -1817,7 +1841,7 @@ starg_include_p2mp_ipv4_ipv6()
echo "Data path: (*, G) INCLUDE - P2MP - IPv4 overlay / IPv6 underlay"
echo "---------------------------------------------------------------"
- starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
+ starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \
$valid_src $invalid_src "mausezahn"
}
@@ -1828,6 +1852,7 @@ starg_include_p2mp_ipv6_ipv6()
local mcast_grp=ff0e::2
local plen=128
local grp=ff0e::1
+ local grp_dmac=33:33:00:00:00:01
local valid_src=2001:db8:100::1
local invalid_src=2001:db8:200::1
@@ -1835,7 +1860,7 @@ starg_include_p2mp_ipv6_ipv6()
echo "Data path: (*, G) INCLUDE - P2MP - IPv6 overlay / IPv6 underlay"
echo "---------------------------------------------------------------"
- starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
+ starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \
$valid_src $invalid_src "mausezahn -6"
}
@@ -1847,6 +1872,7 @@ egress_vni_translation_common()
local plen=$1; shift
local proto=$1; shift
local grp=$1; shift
+ local grp_dmac=$1; shift
local src=$1; shift
local mz=$1; shift
@@ -1882,20 +1908,20 @@ egress_vni_translation_common()
# Make sure that packets sent from the first VTEP over VLAN 10 are
# received by the SVI corresponding to the L3VNI (14000 / VLAN 4000) on
# the second VTEP, since it is configured as PVID.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev br0.4000 ingress" 101 1
log_test $? 0 "Egress VNI translation - PVID configured"
# Remove PVID flag from VLAN 4000 on the second VTEP and make sure
# packets are no longer received by the SVI interface.
run_cmd "bridge -n $ns2 vlan add vid 4000 dev vx0"
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev br0.4000 ingress" 101 1
log_test $? 0 "Egress VNI translation - no PVID configured"
# Reconfigure the PVID and make sure packets are received again.
run_cmd "bridge -n $ns2 vlan add vid 4000 dev vx0 pvid"
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev br0.4000 ingress" 101 2
log_test $? 0 "Egress VNI translation - PVID reconfigured"
}
@@ -1908,6 +1934,7 @@ egress_vni_translation_ipv4_ipv4()
local plen=32
local proto="ipv4"
local grp=239.1.1.1
+ local grp_dmac=01:00:5e:01:01:01
local src=192.0.2.129
echo
@@ -1915,7 +1942,7 @@ egress_vni_translation_ipv4_ipv4()
echo "----------------------------------------------------------------"
egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \
- $src "mausezahn"
+ $grp_dmac $src "mausezahn"
}
egress_vni_translation_ipv6_ipv4()
@@ -1926,6 +1953,7 @@ egress_vni_translation_ipv6_ipv4()
local plen=32
local proto="ipv6"
local grp=ff0e::1
+ local grp_dmac=33:33:00:00:00:01
local src=2001:db8:100::1
echo
@@ -1933,7 +1961,7 @@ egress_vni_translation_ipv6_ipv4()
echo "----------------------------------------------------------------"
egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \
- $src "mausezahn -6"
+ $grp_dmac $src "mausezahn -6"
}
egress_vni_translation_ipv4_ipv6()
@@ -1944,6 +1972,7 @@ egress_vni_translation_ipv4_ipv6()
local plen=128
local proto="ipv4"
local grp=239.1.1.1
+ local grp_dmac=01:00:5e:01:01:01
local src=192.0.2.129
echo
@@ -1951,7 +1980,7 @@ egress_vni_translation_ipv4_ipv6()
echo "----------------------------------------------------------------"
egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \
- $src "mausezahn"
+ $grp_dmac $src "mausezahn"
}
egress_vni_translation_ipv6_ipv6()
@@ -1962,6 +1991,7 @@ egress_vni_translation_ipv6_ipv6()
local plen=128
local proto="ipv6"
local grp=ff0e::1
+ local grp_dmac=33:33:00:00:00:01
local src=2001:db8:100::1
echo
@@ -1969,7 +1999,7 @@ egress_vni_translation_ipv6_ipv6()
echo "----------------------------------------------------------------"
egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \
- $src "mausezahn -6"
+ $grp_dmac $src "mausezahn -6"
}
all_zeros_mdb_common()
@@ -1982,12 +2012,18 @@ all_zeros_mdb_common()
local vtep4_ip=$1; shift
local plen=$1; shift
local ipv4_grp=239.1.1.1
+ local ipv4_grp_dmac=01:00:5e:01:01:01
local ipv4_unreg_grp=239.2.2.2
+ local ipv4_unreg_grp_dmac=01:00:5e:02:02:02
local ipv4_ll_grp=224.0.0.100
+ local ipv4_ll_grp_dmac=01:00:5e:00:00:64
local ipv4_src=192.0.2.129
local ipv6_grp=ff0e::1
+ local ipv6_grp_dmac=33:33:00:00:00:01
local ipv6_unreg_grp=ff0e::2
+ local ipv6_unreg_grp_dmac=33:33:00:00:00:02
local ipv6_ll_grp=ff02::1
+ local ipv6_ll_grp_dmac=33:33:00:00:00:01
local ipv6_src=2001:db8:100::1
# Install all-zeros (catchall) MDB entries for IPv4 and IPv6 traffic
@@ -2023,7 +2059,7 @@ all_zeros_mdb_common()
# Send registered IPv4 multicast and make sure it only arrives to the
# first VTEP.
- run_cmd "ip netns exec $ns1 mausezahn br0.10 -A $ipv4_src -B $ipv4_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 mausezahn br0.10 -a own -b $ipv4_grp_dmac -A $ipv4_src -B $ipv4_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Registered IPv4 multicast - first VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 102 0
@@ -2031,7 +2067,7 @@ all_zeros_mdb_common()
# Send unregistered IPv4 multicast that is not link-local and make sure
# it arrives to the first and second VTEPs.
- run_cmd "ip netns exec $ns1 mausezahn br0.10 -A $ipv4_src -B $ipv4_unreg_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 mausezahn br0.10 -a own -b $ipv4_unreg_grp_dmac -A $ipv4_src -B $ipv4_unreg_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 2
log_test $? 0 "Unregistered IPv4 multicast - first VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 102 1
@@ -2039,7 +2075,7 @@ all_zeros_mdb_common()
# Send IPv4 link-local multicast traffic and make sure it does not
# arrive to any VTEP.
- run_cmd "ip netns exec $ns1 mausezahn br0.10 -A $ipv4_src -B $ipv4_ll_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 mausezahn br0.10 -a own -b $ipv4_ll_grp_dmac -A $ipv4_src -B $ipv4_ll_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 2
log_test $? 0 "Link-local IPv4 multicast - first VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 102 1
@@ -2074,7 +2110,7 @@ all_zeros_mdb_common()
# Send registered IPv6 multicast and make sure it only arrives to the
# third VTEP.
- run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -A $ipv6_src -B $ipv6_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -a own -b $ipv6_grp_dmac -A $ipv6_src -B $ipv6_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 103 1
log_test $? 0 "Registered IPv6 multicast - third VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 104 0
@@ -2082,7 +2118,7 @@ all_zeros_mdb_common()
# Send unregistered IPv6 multicast that is not link-local and make sure
# it arrives to the third and fourth VTEPs.
- run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -A $ipv6_src -B $ipv6_unreg_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -a own -b $ipv6_unreg_grp_dmac -A $ipv6_src -B $ipv6_unreg_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 103 2
log_test $? 0 "Unregistered IPv6 multicast - third VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 104 1
@@ -2090,7 +2126,7 @@ all_zeros_mdb_common()
# Send IPv6 link-local multicast traffic and make sure it does not
# arrive to any VTEP.
- run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -A $ipv6_src -B $ipv6_ll_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -a own -b $ipv6_ll_grp_dmac -A $ipv6_src -B $ipv6_ll_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 103 2
log_test $? 0 "Link-local IPv6 multicast - third VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 104 1
@@ -2165,6 +2201,7 @@ mdb_fdb_common()
local plen=$1; shift
local proto=$1; shift
local grp=$1; shift
+ local grp_dmac=$1; shift
local src=$1; shift
local mz=$1; shift
@@ -2188,7 +2225,7 @@ mdb_fdb_common()
# Send IP multicast traffic and make sure it is forwarded by the MDB
# and only arrives to the first VTEP.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "IP multicast - first VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 102 0
@@ -2205,7 +2242,7 @@ mdb_fdb_common()
# Remove the MDB entry and make sure that IP multicast is now forwarded
# by the FDB to the second VTEP.
run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep1_ip src_vni 10010"
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "IP multicast after removal - first VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 102 2
@@ -2221,14 +2258,15 @@ mdb_fdb_ipv4_ipv4()
local plen=32
local proto="ipv4"
local grp=239.1.1.1
+ local grp_dmac=01:00:5e:01:01:01
local src=192.0.2.129
echo
echo "Data path: MDB with FDB - IPv4 overlay / IPv4 underlay"
echo "------------------------------------------------------"
- mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \
- "mausezahn"
+ mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp \
+ $grp_dmac $src "mausezahn"
}
mdb_fdb_ipv6_ipv4()
@@ -2240,14 +2278,15 @@ mdb_fdb_ipv6_ipv4()
local plen=32
local proto="ipv6"
local grp=ff0e::1
+ local grp_dmac=33:33:00:00:00:01
local src=2001:db8:100::1
echo
echo "Data path: MDB with FDB - IPv6 overlay / IPv4 underlay"
echo "------------------------------------------------------"
- mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \
- "mausezahn -6"
+ mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp \
+ $grp_dmac $src "mausezahn -6"
}
mdb_fdb_ipv4_ipv6()
@@ -2259,14 +2298,15 @@ mdb_fdb_ipv4_ipv6()
local plen=128
local proto="ipv4"
local grp=239.1.1.1
+ local grp_dmac=01:00:5e:01:01:01
local src=192.0.2.129
echo
echo "Data path: MDB with FDB - IPv4 overlay / IPv6 underlay"
echo "------------------------------------------------------"
- mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \
- "mausezahn"
+ mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp \
+ $grp_dmac $src "mausezahn"
}
mdb_fdb_ipv6_ipv6()
@@ -2278,14 +2318,15 @@ mdb_fdb_ipv6_ipv6()
local plen=128
local proto="ipv6"
local grp=ff0e::1
+ local grp_dmac=33:33:00:00:00:01
local src=2001:db8:100::1
echo
echo "Data path: MDB with FDB - IPv6 overlay / IPv6 underlay"
echo "------------------------------------------------------"
- mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \
- "mausezahn -6"
+ mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp \
+ $grp_dmac $src "mausezahn -6"
}
mdb_grp1_loop()
@@ -2320,7 +2361,9 @@ mdb_torture_common()
local vtep1_ip=$1; shift
local vtep2_ip=$1; shift
local grp1=$1; shift
+ local grp1_dmac=$1; shift
local grp2=$1; shift
+ local grp2_dmac=$1; shift
local src=$1; shift
local mz=$1; shift
local pid1
@@ -2345,9 +2388,9 @@ mdb_torture_common()
pid1=$!
mdb_grp2_loop $ns1 $vtep1_ip $vtep2_ip $grp2 &
pid2=$!
- ip netns exec $ns1 $mz br0.10 -A $src -B $grp1 -t udp sp=12345,dp=54321 -p 100 -c 0 -q &
+ ip netns exec $ns1 $mz br0.10 -a own -b $grp1_dmac -A $src -B $grp1 -t udp sp=12345,dp=54321 -p 100 -c 0 -q &
pid3=$!
- ip netns exec $ns1 $mz br0.10 -A $src -B $grp2 -t udp sp=12345,dp=54321 -p 100 -c 0 -q &
+ ip netns exec $ns1 $mz br0.10 -a own -b $grp2_dmac -A $src -B $grp2 -t udp sp=12345,dp=54321 -p 100 -c 0 -q &
pid4=$!
sleep 30
@@ -2363,15 +2406,17 @@ mdb_torture_ipv4_ipv4()
local vtep1_ip=198.51.100.100
local vtep2_ip=198.51.100.200
local grp1=239.1.1.1
+ local grp1_dmac=01:00:5e:01:01:01
local grp2=239.2.2.2
+ local grp2_dmac=01:00:5e:02:02:02
local src=192.0.2.129
echo
echo "Data path: MDB torture test - IPv4 overlay / IPv4 underlay"
echo "----------------------------------------------------------"
- mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \
- "mausezahn"
+ mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp1_dmac $grp2 \
+ $grp2_dmac $src "mausezahn"
}
mdb_torture_ipv6_ipv4()
@@ -2380,15 +2425,17 @@ mdb_torture_ipv6_ipv4()
local vtep1_ip=198.51.100.100
local vtep2_ip=198.51.100.200
local grp1=ff0e::1
+ local grp1_dmac=33:33:00:00:00:01
local grp2=ff0e::2
+ local grp2_dmac=33:33:00:00:00:02
local src=2001:db8:100::1
echo
echo "Data path: MDB torture test - IPv6 overlay / IPv4 underlay"
echo "----------------------------------------------------------"
- mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \
- "mausezahn -6"
+ mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp1_dmac $grp2 \
+ $grp2_dmac $src "mausezahn -6"
}
mdb_torture_ipv4_ipv6()
@@ -2397,15 +2444,17 @@ mdb_torture_ipv4_ipv6()
local vtep1_ip=2001:db8:1000::1
local vtep2_ip=2001:db8:2000::1
local grp1=239.1.1.1
+ local grp1_dmac=01:00:5e:01:01:01
local grp2=239.2.2.2
+ local grp2_dmac=01:00:5e:02:02:02
local src=192.0.2.129
echo
echo "Data path: MDB torture test - IPv4 overlay / IPv6 underlay"
echo "----------------------------------------------------------"
- mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \
- "mausezahn"
+ mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp1_dmac $grp2 \
+ $grp2_dmac $src "mausezahn"
}
mdb_torture_ipv6_ipv6()
@@ -2414,15 +2463,17 @@ mdb_torture_ipv6_ipv6()
local vtep1_ip=2001:db8:1000::1
local vtep2_ip=2001:db8:2000::1
local grp1=ff0e::1
+ local grp1_dmac=33:33:00:00:00:01
local grp2=ff0e::2
+ local grp2_dmac=33:33:00:00:00:02
local src=2001:db8:100::1
echo
echo "Data path: MDB torture test - IPv6 overlay / IPv6 underlay"
echo "----------------------------------------------------------"
- mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \
- "mausezahn -6"
+ mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp1_dmac $grp2 \
+ $grp2_dmac $src "mausezahn -6"
}
################################################################################
diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c
index c6eda21cefb6b8..f27a12d2a2c997 100644
--- a/tools/testing/selftests/net/tls.c
+++ b/tools/testing/selftests/net/tls.c
@@ -1615,6 +1615,40 @@ TEST_F(tls, getsockopt)
EXPECT_EQ(errno, EINVAL);
}
+TEST_F(tls, recv_efault)
+{
+ char *rec1 = "1111111111";
+ char *rec2 = "2222222222";
+ struct msghdr hdr = {};
+ struct iovec iov[2];
+ char recv_mem[12];
+ int ret;
+
+ if (self->notls)
+ SKIP(return, "no TLS support");
+
+ EXPECT_EQ(send(self->fd, rec1, 10, 0), 10);
+ EXPECT_EQ(send(self->fd, rec2, 10, 0), 10);
+
+ iov[0].iov_base = recv_mem;
+ iov[0].iov_len = sizeof(recv_mem);
+ iov[1].iov_base = NULL; /* broken iov to make process_rx_list fail */
+ iov[1].iov_len = 1;
+
+ hdr.msg_iovlen = 2;
+ hdr.msg_iov = iov;
+
+ EXPECT_EQ(recv(self->cfd, recv_mem, 1, 0), 1);
+ EXPECT_EQ(recv_mem[0], rec1[0]);
+
+ ret = recvmsg(self->cfd, &hdr, 0);
+ EXPECT_LE(ret, sizeof(recv_mem));
+ EXPECT_GE(ret, 9);
+ EXPECT_EQ(memcmp(rec1, recv_mem, 9), 0);
+ if (ret > 9)
+ EXPECT_EQ(memcmp(rec2, recv_mem + 9, ret - 9), 0);
+}
+
FIXTURE(tls_err)
{
int fd, cfd;
diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh
index 380cb15e942e42..83ed987cff340e 100755
--- a/tools/testing/selftests/net/udpgro_fwd.sh
+++ b/tools/testing/selftests/net/udpgro_fwd.sh
@@ -244,7 +244,7 @@ for family in 4 6; do
create_vxlan_pair
ip netns exec $NS_DST ethtool -K veth$DST generic-receive-offload on
ip netns exec $NS_DST ethtool -K veth$DST rx-gro-list on
- run_test "GRO frag list over UDP tunnel" $OL_NET$DST 1 1
+ run_test "GRO frag list over UDP tunnel" $OL_NET$DST 10 10
cleanup
# use NAT to circumvent GRO FWD check
@@ -258,13 +258,7 @@ for family in 4 6; do
# load arp cache before running the test to reduce the amount of
# stray traffic on top of the UDP tunnel
ip netns exec $NS_SRC $PING -q -c 1 $OL_NET$DST_NAT >/dev/null
- run_test "GRO fwd over UDP tunnel" $OL_NET$DST_NAT 1 1 $OL_NET$DST
- cleanup
-
- create_vxlan_pair
- run_bench "UDP tunnel fwd perf" $OL_NET$DST
- ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on
- run_bench "UDP tunnel GRO fwd perf" $OL_NET$DST
+ run_test "GRO fwd over UDP tunnel" $OL_NET$DST_NAT 10 10 $OL_NET$DST
cleanup
done
diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c
index 1d975bf52af339..85b3baa3f7f341 100644
--- a/tools/testing/selftests/net/udpgso.c
+++ b/tools/testing/selftests/net/udpgso.c
@@ -34,7 +34,7 @@
#endif
#ifndef UDP_MAX_SEGMENTS
-#define UDP_MAX_SEGMENTS (1 << 6UL)
+#define UDP_MAX_SEGMENTS (1 << 7UL)
#endif
#define CONST_MTU_TEST 1500
diff --git a/tools/testing/selftests/powerpc/papr_vpd/papr_vpd.c b/tools/testing/selftests/powerpc/papr_vpd/papr_vpd.c
index 505294da1b9fb5..d6f99eb9be659d 100644
--- a/tools/testing/selftests/powerpc/papr_vpd/papr_vpd.c
+++ b/tools/testing/selftests/powerpc/papr_vpd/papr_vpd.c
@@ -154,7 +154,7 @@ static int dev_papr_vpd_null_handle(void)
static int papr_vpd_close_handle_without_reading(void)
{
const int devfd = open(DEVPATH, O_RDONLY);
- struct papr_location_code lc;
+ struct papr_location_code lc = { .str = "", };
int fd;
SKIP_IF_MSG(devfd < 0 && errno == ENOENT,
diff --git a/tools/testing/selftests/riscv/hwprobe/cbo.c b/tools/testing/selftests/riscv/hwprobe/cbo.c
index c537d52fafc586..a40541bb7c7dee 100644
--- a/tools/testing/selftests/riscv/hwprobe/cbo.c
+++ b/tools/testing/selftests/riscv/hwprobe/cbo.c
@@ -19,7 +19,7 @@
#include "hwprobe.h"
#include "../../kselftest.h"
-#define MK_CBO(fn) cpu_to_le32((fn) << 20 | 10 << 15 | 2 << 12 | 0 << 7 | 15)
+#define MK_CBO(fn) le32_bswap((uint32_t)(fn) << 20 | 10 << 15 | 2 << 12 | 0 << 7 | 15)
static char mem[4096] __aligned(4096) = { [0 ... 4095] = 0xa5 };
diff --git a/tools/testing/selftests/riscv/hwprobe/hwprobe.h b/tools/testing/selftests/riscv/hwprobe/hwprobe.h
index e3fccb390c4dc9..f3de970c32227b 100644
--- a/tools/testing/selftests/riscv/hwprobe/hwprobe.h
+++ b/tools/testing/selftests/riscv/hwprobe/hwprobe.h
@@ -4,6 +4,16 @@
#include <stddef.h>
#include <asm/hwprobe.h>
+#if __BYTE_ORDER == __BIG_ENDIAN
+# define le32_bswap(_x) \
+ ((((_x) & 0x000000ffU) << 24) | \
+ (((_x) & 0x0000ff00U) << 8) | \
+ (((_x) & 0x00ff0000U) >> 8) | \
+ (((_x) & 0xff000000U) >> 24))
+#else
+# define le32_bswap(_x) (_x)
+#endif
+
/*
* Rather than relying on having a new enough libc to define this, just do it
* ourselves. This way we don't need to be coupled to a new-enough libc to
diff --git a/tools/testing/selftests/seccomp/settings b/tools/testing/selftests/seccomp/settings
index 6091b45d226baf..a953c96aa16e1e 100644
--- a/tools/testing/selftests/seccomp/settings
+++ b/tools/testing/selftests/seccomp/settings
@@ -1 +1 @@
-timeout=120
+timeout=180
diff --git a/tools/testing/selftests/syscall_user_dispatch/sud_test.c b/tools/testing/selftests/syscall_user_dispatch/sud_test.c
index b5d592d4099e85..d975a67673299f 100644
--- a/tools/testing/selftests/syscall_user_dispatch/sud_test.c
+++ b/tools/testing/selftests/syscall_user_dispatch/sud_test.c
@@ -158,6 +158,20 @@ static void handle_sigsys(int sig, siginfo_t *info, void *ucontext)
/* In preparation for sigreturn. */
SYSCALL_DISPATCH_OFF(glob_sel);
+
+ /*
+ * The tests for argument handling assume that `syscall(x) == x`. This
+ * is a NOP on x86 because the syscall number is passed in %rax, which
+ * happens to also be the function ABI return register. Other
+ * architectures may need to swizzle the arguments around.
+ */
+#if defined(__riscv)
+/* REG_A7 is not defined in libc headers */
+# define REG_A7 (REG_A0 + 7)
+
+ ((ucontext_t *)ucontext)->uc_mcontext.__gregs[REG_A0] =
+ ((ucontext_t *)ucontext)->uc_mcontext.__gregs[REG_A7];
+#endif
}
TEST(dispatch_and_return)
diff --git a/tools/testing/selftests/timers/posix_timers.c b/tools/testing/selftests/timers/posix_timers.c
index d49dd3ffd0d96a..c001dd79179d5d 100644
--- a/tools/testing/selftests/timers/posix_timers.c
+++ b/tools/testing/selftests/timers/posix_timers.c
@@ -66,7 +66,7 @@ static int check_diff(struct timeval start, struct timeval end)
diff = end.tv_usec - start.tv_usec;
diff += (end.tv_sec - start.tv_sec) * USECS_PER_SEC;
- if (abs(diff - DELAY * USECS_PER_SEC) > USECS_PER_SEC / 2) {
+ if (llabs(diff - DELAY * USECS_PER_SEC) > USECS_PER_SEC / 2) {
printf("Diff too high: %lld..", diff);
return -1;
}
@@ -184,80 +184,71 @@ static int check_timer_create(int which)
return 0;
}
-int remain;
-__thread int got_signal;
+static pthread_t ctd_thread;
+static volatile int ctd_count, ctd_failed;
-static void *distribution_thread(void *arg)
+static void ctd_sighandler(int sig)
{
- while (__atomic_load_n(&remain, __ATOMIC_RELAXED));
- return NULL;
+ if (pthread_self() != ctd_thread)
+ ctd_failed = 1;
+ ctd_count--;
}
-static void distribution_handler(int nr)
+static void *ctd_thread_func(void *arg)
{
- if (!__atomic_exchange_n(&got_signal, 1, __ATOMIC_RELAXED))
- __atomic_fetch_sub(&remain, 1, __ATOMIC_RELAXED);
-}
-
-/*
- * Test that all running threads _eventually_ receive CLOCK_PROCESS_CPUTIME_ID
- * timer signals. This primarily tests that the kernel does not favour any one.
- */
-static int check_timer_distribution(void)
-{
- int err, i;
- timer_t id;
- const int nthreads = 10;
- pthread_t threads[nthreads];
struct itimerspec val = {
.it_value.tv_sec = 0,
.it_value.tv_nsec = 1000 * 1000,
.it_interval.tv_sec = 0,
.it_interval.tv_nsec = 1000 * 1000,
};
+ timer_t id;
- remain = nthreads + 1; /* worker threads + this thread */
- signal(SIGALRM, distribution_handler);
- err = timer_create(CLOCK_PROCESS_CPUTIME_ID, NULL, &id);
- if (err < 0) {
- ksft_perror("Can't create timer");
- return -1;
- }
- err = timer_settime(id, 0, &val, NULL);
- if (err < 0) {
- ksft_perror("Can't set timer");
- return -1;
- }
+ /* 1/10 seconds to ensure the leader sleeps */
+ usleep(10000);
- for (i = 0; i < nthreads; i++) {
- err = pthread_create(&threads[i], NULL, distribution_thread,
- NULL);
- if (err) {
- ksft_print_msg("Can't create thread: %s (%d)\n",
- strerror(errno), errno);
- return -1;
- }
- }
+ ctd_count = 100;
+ if (timer_create(CLOCK_PROCESS_CPUTIME_ID, NULL, &id))
+ return "Can't create timer\n";
+ if (timer_settime(id, 0, &val, NULL))
+ return "Can't set timer\n";
- /* Wait for all threads to receive the signal. */
- while (__atomic_load_n(&remain, __ATOMIC_RELAXED));
+ while (ctd_count > 0 && !ctd_failed)
+ ;
- for (i = 0; i < nthreads; i++) {
- err = pthread_join(threads[i], NULL);
- if (err) {
- ksft_print_msg("Can't join thread: %s (%d)\n",
- strerror(errno), errno);
- return -1;
- }
- }
+ if (timer_delete(id))
+ return "Can't delete timer\n";
- if (timer_delete(id)) {
- ksft_perror("Can't delete timer");
- return -1;
- }
+ return NULL;
+}
+
+/*
+ * Test that only the running thread receives the timer signal.
+ */
+static int check_timer_distribution(void)
+{
+ const char *errmsg;
- ksft_test_result_pass("check_timer_distribution\n");
+ signal(SIGALRM, ctd_sighandler);
+
+ errmsg = "Can't create thread\n";
+ if (pthread_create(&ctd_thread, NULL, ctd_thread_func, NULL))
+ goto err;
+
+ errmsg = "Can't join thread\n";
+ if (pthread_join(ctd_thread, (void **)&errmsg) || errmsg)
+ goto err;
+
+ if (!ctd_failed)
+ ksft_test_result_pass("check signal distribution\n");
+ else if (ksft_min_kernel_version(6, 3))
+ ksft_test_result_fail("check signal distribution\n");
+ else
+ ksft_test_result_skip("check signal distribution (old kernel)\n");
return 0;
+err:
+ ksft_print_msg("%s", errmsg);
+ return -1;
}
int main(int argc, char **argv)
diff --git a/tools/testing/selftests/timers/valid-adjtimex.c b/tools/testing/selftests/timers/valid-adjtimex.c
index 48b9a803235a80..d13ebde203221a 100644
--- a/tools/testing/selftests/timers/valid-adjtimex.c
+++ b/tools/testing/selftests/timers/valid-adjtimex.c
@@ -21,9 +21,6 @@
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
-
-
-
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
@@ -62,45 +59,47 @@ int clear_time_state(void)
#define NUM_FREQ_OUTOFRANGE 4
#define NUM_FREQ_INVALID 2
+#define SHIFTED_PPM (1 << 16)
+
long valid_freq[NUM_FREQ_VALID] = {
- -499<<16,
- -450<<16,
- -400<<16,
- -350<<16,
- -300<<16,
- -250<<16,
- -200<<16,
- -150<<16,
- -100<<16,
- -75<<16,
- -50<<16,
- -25<<16,
- -10<<16,
- -5<<16,
- -1<<16,
+ -499 * SHIFTED_PPM,
+ -450 * SHIFTED_PPM,
+ -400 * SHIFTED_PPM,
+ -350 * SHIFTED_PPM,
+ -300 * SHIFTED_PPM,
+ -250 * SHIFTED_PPM,
+ -200 * SHIFTED_PPM,
+ -150 * SHIFTED_PPM,
+ -100 * SHIFTED_PPM,
+ -75 * SHIFTED_PPM,
+ -50 * SHIFTED_PPM,
+ -25 * SHIFTED_PPM,
+ -10 * SHIFTED_PPM,
+ -5 * SHIFTED_PPM,
+ -1 * SHIFTED_PPM,
-1000,
- 1<<16,
- 5<<16,
- 10<<16,
- 25<<16,
- 50<<16,
- 75<<16,
- 100<<16,
- 150<<16,
- 200<<16,
- 250<<16,
- 300<<16,
- 350<<16,
- 400<<16,
- 450<<16,
- 499<<16,
+ 1 * SHIFTED_PPM,
+ 5 * SHIFTED_PPM,
+ 10 * SHIFTED_PPM,
+ 25 * SHIFTED_PPM,
+ 50 * SHIFTED_PPM,
+ 75 * SHIFTED_PPM,
+ 100 * SHIFTED_PPM,
+ 150 * SHIFTED_PPM,
+ 200 * SHIFTED_PPM,
+ 250 * SHIFTED_PPM,
+ 300 * SHIFTED_PPM,
+ 350 * SHIFTED_PPM,
+ 400 * SHIFTED_PPM,
+ 450 * SHIFTED_PPM,
+ 499 * SHIFTED_PPM,
};
long outofrange_freq[NUM_FREQ_OUTOFRANGE] = {
- -1000<<16,
- -550<<16,
- 550<<16,
- 1000<<16,
+ -1000 * SHIFTED_PPM,
+ -550 * SHIFTED_PPM,
+ 550 * SHIFTED_PPM,
+ 1000 * SHIFTED_PPM,
};
#define LONG_MAX (~0UL>>1)
diff --git a/tools/testing/selftests/turbostat/defcolumns.py b/tools/testing/selftests/turbostat/defcolumns.py
new file mode 100755
index 00000000000000..d9b042097da7ad
--- /dev/null
+++ b/tools/testing/selftests/turbostat/defcolumns.py
@@ -0,0 +1,60 @@
+#!/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import subprocess
+from shutil import which
+
+turbostat = which('turbostat')
+if turbostat is None:
+ print('Could not find turbostat binary')
+ exit(1)
+
+timeout = which('timeout')
+if timeout is None:
+ print('Could not find timeout binary')
+ exit(1)
+
+proc_turbostat = subprocess.run([turbostat, '--list'], capture_output = True)
+if proc_turbostat.returncode != 0:
+ print(f'turbostat failed with {proc_turbostat.returncode}')
+ exit(1)
+
+#
+# By default --list reports also "usec" and "Time_Of_Day_Seconds" columns
+# which are only visible when running with --debug.
+#
+expected_columns_debug = proc_turbostat.stdout.replace(b',', b'\t').strip()
+expected_columns = expected_columns_debug.replace(b'usec\t', b'').replace(b'Time_Of_Day_Seconds\t', b'').replace(b'X2APIC\t', b'').replace(b'APIC\t', b'')
+
+#
+# Run turbostat with no options for 10 seconds and send SIGINT
+#
+timeout_argv = [timeout, '--preserve-status', '-s', 'SIGINT', '-k', '3', '1s']
+turbostat_argv = [turbostat, '-i', '0.250']
+
+print(f'Running turbostat with {turbostat_argv=}... ', end = '', flush = True)
+proc_turbostat = subprocess.run(timeout_argv + turbostat_argv, capture_output = True)
+if proc_turbostat.returncode != 0:
+ print(f'turbostat failed with {proc_turbostat.returncode}')
+ exit(1)
+actual_columns = proc_turbostat.stdout.split(b'\n')[0]
+if expected_columns != actual_columns:
+ print(f'turbostat column check failed\n{expected_columns=}\n{actual_columns=}')
+ exit(1)
+print('OK')
+
+#
+# Same, but with --debug
+#
+turbostat_argv.append('--debug')
+
+print(f'Running turbostat with {turbostat_argv=}... ', end = '', flush = True)
+proc_turbostat = subprocess.run(timeout_argv + turbostat_argv, capture_output = True)
+if proc_turbostat.returncode != 0:
+ print(f'turbostat failed with {proc_turbostat.returncode}')
+ exit(1)
+actual_columns = proc_turbostat.stdout.split(b'\n')[0]
+if expected_columns_debug != actual_columns:
+ print(f'turbostat column check failed\n{expected_columns_debug=}\n{actual_columns=}')
+ exit(1)
+print('OK')
diff --git a/tools/testing/selftests/x86/test_shadow_stack.c b/tools/testing/selftests/x86/test_shadow_stack.c
index 757e6527f67e2a..ee909a7927f9d9 100644
--- a/tools/testing/selftests/x86/test_shadow_stack.c
+++ b/tools/testing/selftests/x86/test_shadow_stack.c
@@ -556,7 +556,7 @@ struct node {
* looked at the shadow stack gaps.
* 5. See if it landed in the gap.
*/
-int test_guard_gap(void)
+int test_guard_gap_other_gaps(void)
{
void *free_area, *shstk, *test_map = (void *)0xFFFFFFFFFFFFFFFF;
struct node *head = NULL, *cur;
@@ -593,11 +593,64 @@ int test_guard_gap(void)
if (shstk - test_map - PAGE_SIZE != PAGE_SIZE)
return 1;
- printf("[OK]\tGuard gap test\n");
+ printf("[OK]\tGuard gap test, other mapping's gaps\n");
return 0;
}
+/* Tests respecting the guard gap of the mapping getting placed */
+int test_guard_gap_new_mappings_gaps(void)
+{
+ void *free_area, *shstk_start, *test_map = (void *)0xFFFFFFFFFFFFFFFF;
+ struct node *head = NULL, *cur;
+ int ret = 0;
+
+ free_area = mmap(0, PAGE_SIZE * 4, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ munmap(free_area, PAGE_SIZE * 4);
+
+ /* Test letting map_shadow_stack find a free space */
+ shstk_start = mmap(free_area, PAGE_SIZE, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (shstk_start == MAP_FAILED || shstk_start != free_area)
+ return 1;
+
+ while (test_map > shstk_start) {
+ test_map = (void *)syscall(__NR_map_shadow_stack, 0, PAGE_SIZE, 0);
+ if (test_map == MAP_FAILED) {
+ printf("[INFO]\tmap_shadow_stack MAP_FAILED\n");
+ ret = 1;
+ break;
+ }
+
+ cur = malloc(sizeof(*cur));
+ cur->mapping = test_map;
+
+ cur->next = head;
+ head = cur;
+
+ if (test_map == free_area + PAGE_SIZE) {
+ printf("[INFO]\tNew mapping has other mapping in guard gap!\n");
+ ret = 1;
+ break;
+ }
+ }
+
+ while (head) {
+ cur = head;
+ head = cur->next;
+ munmap(cur->mapping, PAGE_SIZE);
+ free(cur);
+ }
+
+ munmap(shstk_start, PAGE_SIZE);
+
+ if (!ret)
+ printf("[OK]\tGuard gap test, placement mapping's gaps\n");
+
+ return ret;
+}
+
/*
* Too complicated to pull it out of the 32 bit header, but also get the
* 64 bit one needed above. Just define a copy here.
@@ -850,9 +903,15 @@ int main(int argc, char *argv[])
goto out;
}
- if (test_guard_gap()) {
+ if (test_guard_gap_other_gaps()) {
+ ret = 1;
+ printf("[FAIL]\tGuard gap test, other mappings' gaps\n");
+ goto out;
+ }
+
+ if (test_guard_gap_new_mappings_gaps()) {
ret = 1;
- printf("[FAIL]\tGuard gap test\n");
+ printf("[FAIL]\tGuard gap test, placement mapping's gaps\n");
goto out;
}
diff --git a/tools/writeback/wb_monitor.py b/tools/writeback/wb_monitor.py
new file mode 100644
index 00000000000000..5e3591f1f9a9df
--- /dev/null
+++ b/tools/writeback/wb_monitor.py
@@ -0,0 +1,172 @@
+#!/usr/bin/env drgn
+#
+# Copyright (C) 2024 Kemeng Shi <shikemeng@huaweicloud.com>
+# Copyright (C) 2024 Huawei Inc
+
+desc = """
+This is a drgn script based on wq_monitor.py to monitor writeback info on
+backing dev. For more info on drgn, visit https://github.com/osandov/drgn.
+
+ writeback(kB) Amount of dirty pages are currently being written back to
+ disk.
+
+ reclaimable(kB) Amount of pages are currently reclaimable.
+
+ dirtied(kB) Amount of pages have been dirtied.
+
+ wrttien(kB) Amount of dirty pages have been written back to disk.
+
+ avg_wb(kBps) Smoothly estimated write bandwidth of writing dirty pages
+ back to disk.
+"""
+
+import signal
+import re
+import time
+import json
+
+import drgn
+from drgn.helpers.linux.list import list_for_each_entry
+
+import argparse
+parser = argparse.ArgumentParser(description=desc,
+ formatter_class=argparse.RawTextHelpFormatter)
+parser.add_argument('bdi', metavar='REGEX', nargs='*',
+ help='Target backing device name patterns (all if empty)')
+parser.add_argument('-i', '--interval', metavar='SECS', type=float, default=1,
+ help='Monitoring interval (0 to print once and exit)')
+parser.add_argument('-j', '--json', action='store_true',
+ help='Output in json')
+parser.add_argument('-c', '--cgroup', action='store_true',
+ help='show writeback of bdi in cgroup')
+args = parser.parse_args()
+
+bdi_list = prog['bdi_list']
+
+WB_RECLAIMABLE = prog['WB_RECLAIMABLE']
+WB_WRITEBACK = prog['WB_WRITEBACK']
+WB_DIRTIED = prog['WB_DIRTIED']
+WB_WRITTEN = prog['WB_WRITTEN']
+NR_WB_STAT_ITEMS = prog['NR_WB_STAT_ITEMS']
+
+PAGE_SHIFT = prog['PAGE_SHIFT']
+
+def K(x):
+ return x << (PAGE_SHIFT - 10)
+
+class Stats:
+ def dict(self, now):
+ return { 'timestamp' : now,
+ 'name' : self.name,
+ 'writeback' : self.stats[WB_WRITEBACK],
+ 'reclaimable' : self.stats[WB_RECLAIMABLE],
+ 'dirtied' : self.stats[WB_DIRTIED],
+ 'written' : self.stats[WB_WRITTEN],
+ 'avg_wb' : self.avg_bw, }
+
+ def table_header_str():
+ return f'{"":>16} {"writeback":>10} {"reclaimable":>12} ' \
+ f'{"dirtied":>9} {"written":>9} {"avg_bw":>9}'
+
+ def table_row_str(self):
+ out = f'{self.name[-16:]:16} ' \
+ f'{self.stats[WB_WRITEBACK]:10} ' \
+ f'{self.stats[WB_RECLAIMABLE]:12} ' \
+ f'{self.stats[WB_DIRTIED]:9} ' \
+ f'{self.stats[WB_WRITTEN]:9} ' \
+ f'{self.avg_bw:9} '
+ return out
+
+ def show_header():
+ if Stats.table_fmt:
+ print()
+ print(Stats.table_header_str())
+
+ def show_stats(self):
+ if Stats.table_fmt:
+ print(self.table_row_str())
+ else:
+ print(self.dict(Stats.now))
+
+class WbStats(Stats):
+ def __init__(self, wb):
+ bdi_name = wb.bdi.dev_name.string_().decode()
+ # avoid to use bdi.wb.memcg_css which is only defined when
+ # CONFIG_CGROUP_WRITEBACK is enabled
+ if wb == wb.bdi.wb.address_of_():
+ ino = "1"
+ else:
+ ino = str(wb.memcg_css.cgroup.kn.id.value_())
+ self.name = bdi_name + '_' + ino
+
+ self.stats = [0] * NR_WB_STAT_ITEMS
+ for i in range(NR_WB_STAT_ITEMS):
+ if wb.stat[i].count >= 0:
+ self.stats[i] = int(K(wb.stat[i].count))
+ else:
+ self.stats[i] = 0
+
+ self.avg_bw = int(K(wb.avg_write_bandwidth))
+
+class BdiStats(Stats):
+ def __init__(self, bdi):
+ self.name = bdi.dev_name.string_().decode()
+ self.stats = [0] * NR_WB_STAT_ITEMS
+ self.avg_bw = 0
+
+ def collectStats(self, wb_stats):
+ for i in range(NR_WB_STAT_ITEMS):
+ self.stats[i] += wb_stats.stats[i]
+
+ self.avg_bw += wb_stats.avg_bw
+
+exit_req = False
+
+def sigint_handler(signr, frame):
+ global exit_req
+ exit_req = True
+
+def main():
+ # handle args
+ Stats.table_fmt = not args.json
+ interval = args.interval
+ cgroup = args.cgroup
+
+ re_str = None
+ if args.bdi:
+ for r in args.bdi:
+ if re_str is None:
+ re_str = r
+ else:
+ re_str += '|' + r
+
+ filter_re = re.compile(re_str) if re_str else None
+
+ # monitoring loop
+ signal.signal(signal.SIGINT, sigint_handler)
+
+ while not exit_req:
+ Stats.now = time.time()
+
+ Stats.show_header()
+ for bdi in list_for_each_entry('struct backing_dev_info', bdi_list.address_of_(), 'bdi_list'):
+ bdi_stats = BdiStats(bdi)
+ if filter_re and not filter_re.search(bdi_stats.name):
+ continue
+
+ for wb in list_for_each_entry('struct bdi_writeback', bdi.wb_list.address_of_(), 'bdi_node'):
+ wb_stats = WbStats(wb)
+ bdi_stats.collectStats(wb_stats)
+ if cgroup:
+ wb_stats.show_stats()
+
+ bdi_stats.show_stats()
+ if cgroup and Stats.table_fmt:
+ print()
+
+ if interval == 0:
+ break
+ time.sleep(interval)
+
+if __name__ == "__main__":
+ main()
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index fb49c2a602002e..92c70c99420af1 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -832,8 +832,7 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
* mn_active_invalidate_count (see above) instead of
* mmu_invalidate_in_progress.
*/
- gfn_to_pfn_cache_invalidate_start(kvm, range->start, range->end,
- hva_range.may_block);
+ gfn_to_pfn_cache_invalidate_start(kvm, range->start, range->end);
/*
* If one or more memslots were found and thus zapped, notify arch code
@@ -2902,7 +2901,7 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma,
spinlock_t *ptl;
int r;
- r = follow_pte(vma->vm_mm, addr, &ptep, &ptl);
+ r = follow_pte(vma, addr, &ptep, &ptl);
if (r) {
/*
* get_user_pages fails for VM_IO and VM_PFNMAP vmas and does
@@ -2917,7 +2916,7 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma,
if (r)
return r;
- r = follow_pte(vma->vm_mm, addr, &ptep, &ptl);
+ r = follow_pte(vma, addr, &ptep, &ptl);
if (r)
return r;
}
diff --git a/virt/kvm/kvm_mm.h b/virt/kvm/kvm_mm.h
index ecefc7ec51af85..715f19669d01f7 100644
--- a/virt/kvm/kvm_mm.h
+++ b/virt/kvm/kvm_mm.h
@@ -26,13 +26,11 @@ kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool interruptible,
#ifdef CONFIG_HAVE_KVM_PFNCACHE
void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm,
unsigned long start,
- unsigned long end,
- bool may_block);
+ unsigned long end);
#else
static inline void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm,
unsigned long start,
- unsigned long end,
- bool may_block)
+ unsigned long end)
{
}
#endif /* HAVE_KVM_PFNCACHE */
diff --git a/virt/kvm/pfncache.c b/virt/kvm/pfncache.c
index 4e07112a24c2f6..e3453e869e92c8 100644
--- a/virt/kvm/pfncache.c
+++ b/virt/kvm/pfncache.c
@@ -23,7 +23,7 @@
* MMU notifier 'invalidate_range_start' hook.
*/
void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm, unsigned long start,
- unsigned long end, bool may_block)
+ unsigned long end)
{
struct gfn_to_pfn_cache *gpc;
@@ -57,6 +57,19 @@ void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm, unsigned long start,
spin_unlock(&kvm->gpc_lock);
}
+static bool kvm_gpc_is_valid_len(gpa_t gpa, unsigned long uhva,
+ unsigned long len)
+{
+ unsigned long offset = kvm_is_error_gpa(gpa) ? offset_in_page(uhva) :
+ offset_in_page(gpa);
+
+ /*
+ * The cached access must fit within a single page. The 'len' argument
+ * to activate() and refresh() exists only to enforce that.
+ */
+ return offset + len <= PAGE_SIZE;
+}
+
bool kvm_gpc_check(struct gfn_to_pfn_cache *gpc, unsigned long len)
{
struct kvm_memslots *slots = kvm_memslots(gpc->kvm);
@@ -74,7 +87,7 @@ bool kvm_gpc_check(struct gfn_to_pfn_cache *gpc, unsigned long len)
if (kvm_is_error_hva(gpc->uhva))
return false;
- if (offset_in_page(gpc->uhva) + len > PAGE_SIZE)
+ if (!kvm_gpc_is_valid_len(gpc->gpa, gpc->uhva, len))
return false;
if (!gpc->valid)
@@ -232,8 +245,7 @@ out_error:
return -EFAULT;
}
-static int __kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long uhva,
- unsigned long len)
+static int __kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long uhva)
{
unsigned long page_offset;
bool unmap_old = false;
@@ -247,15 +259,6 @@ static int __kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned l
if (WARN_ON_ONCE(kvm_is_error_gpa(gpa) == kvm_is_error_hva(uhva)))
return -EINVAL;
- /*
- * The cached acces must fit within a single page. The 'len' argument
- * exists only to enforce that.
- */
- page_offset = kvm_is_error_gpa(gpa) ? offset_in_page(uhva) :
- offset_in_page(gpa);
- if (page_offset + len > PAGE_SIZE)
- return -EINVAL;
-
lockdep_assert_held(&gpc->refresh_lock);
write_lock_irq(&gpc->lock);
@@ -270,6 +273,8 @@ static int __kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned l
old_uhva = PAGE_ALIGN_DOWN(gpc->uhva);
if (kvm_is_error_gpa(gpa)) {
+ page_offset = offset_in_page(uhva);
+
gpc->gpa = INVALID_GPA;
gpc->memslot = NULL;
gpc->uhva = PAGE_ALIGN_DOWN(uhva);
@@ -279,6 +284,8 @@ static int __kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned l
} else {
struct kvm_memslots *slots = kvm_memslots(gpc->kvm);
+ page_offset = offset_in_page(gpa);
+
if (gpc->gpa != gpa || gpc->generation != slots->generation ||
kvm_is_error_hva(gpc->uhva)) {
gfn_t gfn = gpa_to_gfn(gpa);
@@ -354,6 +361,9 @@ int kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, unsigned long len)
guard(mutex)(&gpc->refresh_lock);
+ if (!kvm_gpc_is_valid_len(gpc->gpa, gpc->uhva, len))
+ return -EINVAL;
+
/*
* If the GPA is valid then ignore the HVA, as a cache can be GPA-based
* or HVA-based, not both. For GPA-based caches, the HVA will be
@@ -361,7 +371,7 @@ int kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, unsigned long len)
*/
uhva = kvm_is_error_gpa(gpc->gpa) ? gpc->uhva : KVM_HVA_ERR_BAD;
- return __kvm_gpc_refresh(gpc, gpc->gpa, uhva, len);
+ return __kvm_gpc_refresh(gpc, gpc->gpa, uhva);
}
void kvm_gpc_init(struct gfn_to_pfn_cache *gpc, struct kvm *kvm)
@@ -381,6 +391,9 @@ static int __kvm_gpc_activate(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned
{
struct kvm *kvm = gpc->kvm;
+ if (!kvm_gpc_is_valid_len(gpa, uhva, len))
+ return -EINVAL;
+
guard(mutex)(&gpc->refresh_lock);
if (!gpc->active) {
@@ -400,11 +413,18 @@ static int __kvm_gpc_activate(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned
gpc->active = true;
write_unlock_irq(&gpc->lock);
}
- return __kvm_gpc_refresh(gpc, gpa, uhva, len);
+ return __kvm_gpc_refresh(gpc, gpa, uhva);
}
int kvm_gpc_activate(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len)
{
+ /*
+ * Explicitly disallow INVALID_GPA so that the magic value can be used
+ * by KVM to differentiate between GPA-based and HVA-based caches.
+ */
+ if (WARN_ON_ONCE(kvm_is_error_gpa(gpa)))
+ return -EINVAL;
+
return __kvm_gpc_activate(gpc, gpa, KVM_HVA_ERR_BAD, len);
}