aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.mailmap11
-rw-r--r--CREDITS4
-rw-r--r--Documentation/admin-guide/hw-vuln/spectre.rst44
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt12
-rw-r--r--Documentation/admin-guide/mm/zswap.rst4
-rw-r--r--Documentation/arch/x86/resctrl.rst2
-rw-r--r--Documentation/dev-tools/testing-overview.rst2
-rw-r--r--Documentation/devicetree/bindings/clock/keystone-gate.txt2
-rw-r--r--Documentation/devicetree/bindings/clock/keystone-pll.txt2
-rw-r--r--Documentation/devicetree/bindings/clock/ti/adpll.txt2
-rw-r--r--Documentation/devicetree/bindings/clock/ti/apll.txt2
-rw-r--r--Documentation/devicetree/bindings/clock/ti/autoidle.txt2
-rw-r--r--Documentation/devicetree/bindings/clock/ti/clockdomain.txt2
-rw-r--r--Documentation/devicetree/bindings/clock/ti/composite.txt2
-rw-r--r--Documentation/devicetree/bindings/clock/ti/divider.txt2
-rw-r--r--Documentation/devicetree/bindings/clock/ti/dpll.txt2
-rw-r--r--Documentation/devicetree/bindings/clock/ti/fapll.txt2
-rw-r--r--Documentation/devicetree/bindings/clock/ti/fixed-factor-clock.txt2
-rw-r--r--Documentation/devicetree/bindings/clock/ti/gate.txt2
-rw-r--r--Documentation/devicetree/bindings/clock/ti/interface.txt2
-rw-r--r--Documentation/devicetree/bindings/clock/ti/mux.txt2
-rw-r--r--Documentation/devicetree/bindings/display/msm/qcom,sm8150-mdss.yaml9
-rw-r--r--Documentation/devicetree/bindings/dts-coding-style.rst2
-rw-r--r--Documentation/devicetree/bindings/net/bluetooth/qualcomm-bluetooth.yaml4
-rw-r--r--Documentation/devicetree/bindings/pwm/mediatek,pwm-disp.yaml3
-rw-r--r--Documentation/devicetree/bindings/remoteproc/ti,davinci-rproc.txt3
-rw-r--r--Documentation/devicetree/bindings/soc/fsl/fsl,layerscape-dcfg.yaml2
-rw-r--r--Documentation/devicetree/bindings/soc/fsl/fsl,layerscape-scfg.yaml2
-rw-r--r--Documentation/devicetree/bindings/timer/arm,arch_timer_mmio.yaml2
-rw-r--r--Documentation/devicetree/bindings/ufs/qcom,ufs.yaml38
-rw-r--r--Documentation/driver-api/virtio/writing_virtio_drivers.rst1
-rw-r--r--Documentation/filesystems/bcachefs/index.rst11
-rw-r--r--Documentation/filesystems/index.rst1
-rw-r--r--Documentation/kbuild/llvm.rst2
-rw-r--r--Documentation/mm/page_owner.rst73
-rw-r--r--Documentation/networking/devlink/devlink-eswitch-attr.rst76
-rw-r--r--Documentation/networking/devlink/index.rst1
-rw-r--r--Documentation/networking/representors.rst1
-rw-r--r--Documentation/virt/kvm/x86/amd-memory-encryption.rst42
-rw-r--r--Documentation/virt/kvm/x86/msr.rst19
-rw-r--r--MAINTAINERS181
-rw-r--r--Makefile2
-rw-r--r--arch/Kconfig12
-rw-r--r--arch/arm/boot/dts/nxp/imx/imx7-mba7.dtsi2
-rw-r--r--arch/arm/boot/dts/nxp/imx/imx7s-warp.dts1
-rw-r--r--arch/arm/mach-omap2/board-n8x0.c23
-rw-r--r--arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi16
-rw-r--r--arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi40
-rw-r--r--arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi16
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi2
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi2
-rw-r--r--arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi8
-rw-r--r--arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi2
-rw-r--r--arch/arm64/include/asm/tlbflush.h20
-rw-r--r--arch/arm64/kernel/head.S36
-rw-r--r--arch/arm64/kernel/ptrace.c5
-rw-r--r--arch/arm64/kvm/arm.c13
-rw-r--r--arch/arm64/kvm/hyp/nvhe/tlb.c3
-rw-r--r--arch/arm64/kvm/hyp/pgtable.c23
-rw-r--r--arch/arm64/kvm/hyp/vhe/tlb.c3
-rw-r--r--arch/arm64/kvm/mmu.c2
-rw-r--r--arch/arm64/mm/hugetlbpage.c5
-rw-r--r--arch/arm64/mm/pageattr.c3
-rw-r--r--arch/arm64/net/bpf_jit_comp.c4
-rw-r--r--arch/loongarch/boot/dts/loongson-2k1000.dtsi7
-rw-r--r--arch/loongarch/boot/dts/loongson-2k2000-ref.dts33
-rw-r--r--arch/loongarch/boot/dts/loongson-2k2000.dtsi24
-rw-r--r--arch/loongarch/include/asm/addrspace.h1
-rw-r--r--arch/loongarch/include/asm/io.h20
-rw-r--r--arch/loongarch/include/asm/kfence.h9
-rw-r--r--arch/loongarch/include/asm/page.h26
-rw-r--r--arch/loongarch/mm/mmap.c4
-rw-r--r--arch/loongarch/mm/pgtable.c4
-rw-r--r--arch/mips/Kconfig18
-rw-r--r--arch/mips/include/asm/ptrace.h2
-rw-r--r--arch/mips/kernel/asm-offsets.c1
-rw-r--r--arch/mips/kernel/ptrace.c15
-rw-r--r--arch/mips/kernel/scall32-o32.S23
-rw-r--r--arch/mips/kernel/scall64-n32.S3
-rw-r--r--arch/mips/kernel/scall64-n64.S3
-rw-r--r--arch/mips/kernel/scall64-o32.S33
-rw-r--r--arch/nios2/kernel/prom.c6
-rw-r--r--arch/powerpc/include/asm/vdso/gettimeofday.h3
-rw-r--r--arch/riscv/Makefile2
-rw-r--r--arch/riscv/include/asm/pgtable.h6
-rw-r--r--arch/riscv/include/asm/syscall_wrapper.h3
-rw-r--r--arch/riscv/include/asm/uaccess.h4
-rw-r--r--arch/riscv/include/uapi/asm/auxvec.h2
-rw-r--r--arch/riscv/kernel/compat_vdso/Makefile2
-rw-r--r--arch/riscv/kernel/patch.c8
-rw-r--r--arch/riscv/kernel/process.c5
-rw-r--r--arch/riscv/kernel/signal.c15
-rw-r--r--arch/riscv/kernel/traps.c2
-rw-r--r--arch/riscv/kernel/vdso/Makefile1
-rw-r--r--arch/riscv/kvm/aia_aplic.c37
-rw-r--r--arch/riscv/kvm/vcpu_onereg.c2
-rw-r--r--arch/riscv/mm/tlbflush.c4
-rw-r--r--arch/riscv/net/bpf_jit_comp64.c16
-rw-r--r--arch/s390/include/asm/atomic.h44
-rw-r--r--arch/s390/include/asm/atomic_ops.h22
-rw-r--r--arch/s390/include/asm/preempt.h36
-rw-r--r--arch/s390/kernel/entry.S4
-rw-r--r--arch/s390/kernel/perf_pai_crypto.c10
-rw-r--r--arch/s390/kernel/perf_pai_ext.c10
-rw-r--r--arch/s390/mm/fault.c2
-rw-r--r--arch/s390/net/bpf_jit_comp.c46
-rw-r--r--arch/x86/Kbuild2
-rw-r--r--arch/x86/Kconfig12
-rw-r--r--arch/x86/Makefile2
-rw-r--r--arch/x86/boot/compressed/efi_mixed.S20
-rw-r--r--arch/x86/coco/core.c93
-rw-r--r--arch/x86/entry/common.c10
-rw-r--r--arch/x86/entry/entry_64.S61
-rw-r--r--arch/x86/entry/entry_64_compat.S16
-rw-r--r--arch/x86/entry/syscall_32.c21
-rw-r--r--arch/x86/entry/syscall_64.c19
-rw-r--r--arch/x86/entry/syscall_x32.c10
-rw-r--r--arch/x86/entry/vdso/Makefile1
-rw-r--r--arch/x86/events/amd/core.c39
-rw-r--r--arch/x86/events/amd/lbr.c16
-rw-r--r--arch/x86/events/core.c1
-rw-r--r--arch/x86/events/intel/ds.c8
-rw-r--r--arch/x86/hyperv/hv_apic.c16
-rw-r--r--arch/x86/hyperv/hv_proc.c22
-rw-r--r--arch/x86/include/asm/alternative.h4
-rw-r--r--arch/x86/include/asm/apic.h3
-rw-r--r--arch/x86/include/asm/asm-prototypes.h1
-rw-r--r--arch/x86/include/asm/coco.h2
-rw-r--r--arch/x86/include/asm/cpufeature.h8
-rw-r--r--arch/x86/include/asm/cpufeatures.h15
-rw-r--r--arch/x86/include/asm/disabled-features.h3
-rw-r--r--arch/x86/include/asm/msr-index.h9
-rw-r--r--arch/x86/include/asm/nospec-branch.h38
-rw-r--r--arch/x86/include/asm/required-features.h3
-rw-r--r--arch/x86/include/asm/sev.h8
-rw-r--r--arch/x86/include/asm/syscall.h11
-rw-r--r--arch/x86/include/asm/x86_init.h3
-rw-r--r--arch/x86/include/uapi/asm/kvm.h23
-rw-r--r--arch/x86/include/uapi/asm/kvm_para.h1
-rw-r--r--arch/x86/kernel/apic/apic.c6
-rw-r--r--arch/x86/kernel/callthunks.c4
-rw-r--r--arch/x86/kernel/cpu/amd.c53
-rw-r--r--arch/x86/kernel/cpu/bugs.c167
-rw-r--r--arch/x86/kernel/cpu/common.c70
-rw-r--r--arch/x86/kernel/cpu/mce/core.c4
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c2
-rw-r--r--arch/x86/kernel/cpu/resctrl/internal.h3
-rw-r--r--arch/x86/kernel/cpu/scattered.c2
-rw-r--r--arch/x86/kernel/cpu/topology.c7
-rw-r--r--arch/x86/kernel/cpu/topology_amd.c51
-rw-r--r--arch/x86/kernel/eisa.c3
-rw-r--r--arch/x86/kernel/kvm.c11
-rw-r--r--arch/x86/kernel/nmi.c24
-rw-r--r--arch/x86/kernel/probe_roms.c10
-rw-r--r--arch/x86/kernel/setup.c5
-rw-r--r--arch/x86/kernel/sev.c37
-rw-r--r--arch/x86/kernel/x86_init.c2
-rw-r--r--arch/x86/kvm/Kconfig1
-rw-r--r--arch/x86/kvm/cpuid.c42
-rw-r--r--arch/x86/kvm/reverse_cpuid.h5
-rw-r--r--arch/x86/kvm/svm/sev.c60
-rw-r--r--arch/x86/kvm/trace.h10
-rw-r--r--arch/x86/kvm/vmx/vmenter.S2
-rw-r--r--arch/x86/kvm/x86.c2
-rw-r--r--arch/x86/lib/retpoline.S15
-rw-r--r--arch/x86/mm/ident_map.c23
-rw-r--r--arch/x86/mm/mem_encrypt_amd.c18
-rw-r--r--arch/x86/mm/numa_32.c1
-rw-r--r--arch/x86/mm/pat/memtype.c49
-rw-r--r--arch/x86/net/bpf_jit_comp.c19
-rw-r--r--arch/x86/virt/Makefile2
-rw-r--r--arch/x86/virt/svm/sev.c26
-rw-r--r--block/bdev.c84
-rw-r--r--block/blk-cgroup.c9
-rw-r--r--block/blk-cgroup.h2
-rw-r--r--block/blk-core.c3
-rw-r--r--block/blk-iocost.c7
-rw-r--r--block/blk-merge.c2
-rw-r--r--block/blk-mq.c9
-rw-r--r--block/blk-settings.c19
-rw-r--r--block/blk.h1
-rw-r--r--block/ioctl.c5
-rw-r--r--drivers/accel/ivpu/ivpu_drv.c40
-rw-r--r--drivers/accel/ivpu/ivpu_drv.h3
-rw-r--r--drivers/accel/ivpu/ivpu_hw.h6
-rw-r--r--drivers/accel/ivpu/ivpu_hw_37xx.c11
-rw-r--r--drivers/accel/ivpu/ivpu_hw_40xx.c6
-rw-r--r--drivers/accel/ivpu/ivpu_ipc.c8
-rw-r--r--drivers/accel/ivpu/ivpu_mmu.c8
-rw-r--r--drivers/accel/ivpu/ivpu_pm.c14
-rw-r--r--drivers/acpi/acpica/dbnames.c8
-rw-r--r--drivers/acpi/apei/einj-core.c2
-rw-r--r--drivers/acpi/scan.c3
-rw-r--r--drivers/acpi/thermal.c22
-rw-r--r--drivers/ata/ahci.c85
-rw-r--r--drivers/ata/ahci_st.c1
-rw-r--r--drivers/ata/libata-core.c2
-rw-r--r--drivers/ata/libata-eh.c5
-rw-r--r--drivers/ata/libata-scsi.c16
-rw-r--r--drivers/ata/pata_macio.c3
-rw-r--r--drivers/ata/sata_gemini.c5
-rw-r--r--drivers/ata/sata_mv.c63
-rw-r--r--drivers/ata/sata_sx4.c6
-rw-r--r--drivers/base/core.c26
-rw-r--r--drivers/base/regmap/regcache-maple.c6
-rw-r--r--drivers/block/null_blk/main.c4
-rw-r--r--drivers/bluetooth/btqca.c8
-rw-r--r--drivers/bluetooth/hci_qca.c19
-rw-r--r--drivers/cache/sifive_ccache.c72
-rw-r--r--drivers/char/random.c10
-rw-r--r--drivers/crypto/ccp/sev-dev.c2
-rw-r--r--drivers/cxl/Kconfig13
-rw-r--r--drivers/cxl/acpi.c13
-rw-r--r--drivers/cxl/core/cdat.c152
-rw-r--r--drivers/cxl/core/mbox.c5
-rw-r--r--drivers/cxl/core/port.c114
-rw-r--r--drivers/cxl/core/regs.c5
-rw-r--r--drivers/cxl/cxl.h8
-rw-r--r--drivers/cxl/cxlmem.h2
-rw-r--r--drivers/dma-buf/st-dma-fence-chain.c6
-rw-r--r--drivers/dpll/Kconfig2
-rw-r--r--drivers/firewire/ohci.c6
-rw-r--r--drivers/firmware/arm_ffa/driver.c2
-rw-r--r--drivers/firmware/arm_scmi/powercap.c2
-rw-r--r--drivers/firmware/arm_scmi/raw_mode.c7
-rw-r--r--drivers/firmware/efi/libstub/randomalloc.c2
-rw-r--r--drivers/firmware/efi/libstub/x86-stub.c1
-rw-r--r--drivers/gpio/gpio-crystalcove.c2
-rw-r--r--drivers/gpio/gpio-lpc32xx.c1
-rw-r--r--drivers/gpio/gpio-wcove.c2
-rw-r--r--drivers/gpio/gpiolib-cdev.c58
-rw-r--r--drivers/gpio/gpiolib.c35
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c28
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_job.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.c22
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.h22
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c46
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c65
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.h20
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c72
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c15
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_v11_0.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc21.c32
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c9
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_chardev.c8
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device.c17
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c1
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process.c4
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c37
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c8
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.h2
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c6
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c19
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c57
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc.c6
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_state.c9
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c8
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce110/Makefile2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce112/Makefile2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce120/Makefile2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce60/Makefile2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce80/Makefile2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c54
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.h14
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_link_encoder.c8
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c5
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_link_encoder.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c103
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c6
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c41
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c41
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c41
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c11
-rw-r--r--drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c13
-rw-r--r--drivers/gpu/drm/amd/include/umsch_mm_4_0_api_def.h13
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c27
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h1
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h33
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_pmfw.h55
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_ppsmc.h46
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h10
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h1
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c8
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c12
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c52
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c368
-rw-r--r--drivers/gpu/drm/ast/ast_dp.c3
-rw-r--r--drivers/gpu/drm/display/drm_dp_dual_mode_helper.c4
-rw-r--r--drivers/gpu/drm/display/drm_dp_helper.c7
-rw-r--r--drivers/gpu/drm/drm_client_modeset.c3
-rw-r--r--drivers/gpu/drm/drm_prime.c7
-rw-r--r--drivers/gpu/drm/i915/Makefile7
-rw-r--r--drivers/gpu/drm/i915/display/g4x_dp.c2
-rw-r--r--drivers/gpu/drm/i915/display/icl_dsi.c3
-rw-r--r--drivers/gpu/drm/i915/display/intel_bios.c46
-rw-r--r--drivers/gpu/drm/i915/display/intel_cdclk.c42
-rw-r--r--drivers/gpu/drm/i915/display/intel_cdclk.h3
-rw-r--r--drivers/gpu/drm/i915/display/intel_cursor.c4
-rw-r--r--drivers/gpu/drm/i915/display/intel_ddi.c5
-rw-r--r--drivers/gpu/drm/i915/display/intel_display.c9
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_device.h1
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_types.h3
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp.c29
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp_hdcp.c5
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp_mst.c2
-rw-r--r--drivers/gpu/drm/i915/display/intel_dpll_mgr.c2
-rw-r--r--drivers/gpu/drm/i915/display/intel_drrs.c14
-rw-r--r--drivers/gpu/drm/i915/display/intel_drrs.h3
-rw-r--r--drivers/gpu/drm/i915/display/intel_dsb.c14
-rw-r--r--drivers/gpu/drm/i915/display/intel_fb_pin.c10
-rw-r--r--drivers/gpu/drm/i915/display/intel_psr.c89
-rw-r--r--drivers/gpu/drm/i915/display/intel_sdvo.c4
-rw-r--r--drivers/gpu/drm/i915/display/intel_vrr.c14
-rw-r--r--drivers/gpu/drm/i915/display/skl_universal_plane.c3
-rw-r--r--drivers/gpu/drm/i915/gt/gen8_ppgtt.c3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_cs.c17
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_pm.c3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_execlists_submission.c3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt.c6
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt.h9
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c39
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h13
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_regs.h6
-rw-r--r--drivers/gpu/drm/i915/gt/intel_workarounds.c31
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c23
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc.c4
-rw-r--r--drivers/gpu/drm/i915/i915_driver.c2
-rw-r--r--drivers/gpu/drm/i915/i915_hwmon.c37
-rw-r--r--drivers/gpu/drm/i915/i915_memcpy.c2
-rw-r--r--drivers/gpu/drm/i915/i915_reg.h2
-rw-r--r--drivers/gpu/drm/i915/i915_vma.c50
-rw-r--r--drivers/gpu/drm/msm/adreno/a6xx_gpu.c4
-rw-r--r--drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c2
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_2_x1e80100.h34
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c10
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c8
-rw-r--r--drivers/gpu/drm/msm/dp/dp_display.c6
-rw-r--r--drivers/gpu/drm/msm/msm_fb.c6
-rw-r--r--drivers/gpu/drm/msm/msm_kms.c4
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_bios.c13
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_dmem.c12
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_dp.c23
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_uvmm.c6
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowof.c7
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm107.c12
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/devinit/r535.c1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c7
-rw-r--r--drivers/gpu/drm/panel/panel-novatek-nt36672e.c2
-rw-r--r--drivers/gpu/drm/panel/panel-visionox-rm69299.c2
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_gpu.c6
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_mmu.c13
-rw-r--r--drivers/gpu/drm/qxl/qxl_cmd.c2
-rw-r--r--drivers/gpu/drm/qxl/qxl_ioctl.c4
-rw-r--r--drivers/gpu/drm/qxl/qxl_release.c50
-rw-r--r--drivers/gpu/drm/radeon/pptable.h10
-rw-r--r--drivers/gpu/drm/radeon/radeon_atombios.c8
-rw-r--r--drivers/gpu/drm/rockchip/rockchip_vop2_reg.c2
-rw-r--r--drivers/gpu/drm/scheduler/sched_entity.c12
-rw-r--r--drivers/gpu/drm/ttm/ttm_pool.c38
-rw-r--r--drivers/gpu/drm/v3d/v3d_irq.c4
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_blit.c35
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_bo.c7
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_bo.h2
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_drv.c27
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_drv.h3
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_gem.c32
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_kms.c11
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_kms.h4
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_prime.c15
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c44
-rw-r--r--drivers/gpu/drm/xe/Makefile4
-rw-r--r--drivers/gpu/drm/xe/display/intel_fb_bo.c8
-rw-r--r--drivers/gpu/drm/xe/display/xe_display.c5
-rw-r--r--drivers/gpu/drm/xe/regs/xe_engine_regs.h2
-rw-r--r--drivers/gpu/drm/xe/xe_bo.c59
-rw-r--r--drivers/gpu/drm/xe/xe_bo_types.h19
-rw-r--r--drivers/gpu/drm/xe/xe_device.c11
-rw-r--r--drivers/gpu/drm/xe/xe_device.h4
-rw-r--r--drivers/gpu/drm/xe/xe_device_types.h3
-rw-r--r--drivers/gpu/drm/xe/xe_exec.c79
-rw-r--r--drivers/gpu/drm/xe/xe_exec_queue.c2
-rw-r--r--drivers/gpu/drm/xe/xe_exec_queue_types.h5
-rw-r--r--drivers/gpu/drm/xe/xe_gt_pagefault.c3
-rw-r--r--drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c1
-rw-r--r--drivers/gpu/drm/xe/xe_gt_types.h7
-rw-r--r--drivers/gpu/drm/xe/xe_guc_submit.c2
-rw-r--r--drivers/gpu/drm/xe/xe_hwmon.c4
-rw-r--r--drivers/gpu/drm/xe/xe_lrc.c25
-rw-r--r--drivers/gpu/drm/xe/xe_migrate.c8
-rw-r--r--drivers/gpu/drm/xe/xe_preempt_fence.c2
-rw-r--r--drivers/gpu/drm/xe/xe_pt.c25
-rw-r--r--drivers/gpu/drm/xe/xe_query.c2
-rw-r--r--drivers/gpu/drm/xe/xe_ring_ops.c11
-rw-r--r--drivers/gpu/drm/xe/xe_sched_job.c10
-rw-r--r--drivers/gpu/drm/xe/xe_sched_job_types.h2
-rw-r--r--drivers/gpu/drm/xe/xe_vm.c131
-rw-r--r--drivers/gpu/drm/xe/xe_vm.h8
-rw-r--r--drivers/gpu/drm/xe/xe_vm_types.h8
-rw-r--r--drivers/gpu/host1x/bus.c8
-rw-r--r--drivers/hv/channel.c29
-rw-r--r--drivers/hv/connection.c29
-rw-r--r--drivers/hv/vmbus_drv.c94
-rw-r--r--drivers/i2c/busses/i2c-i801.c7
-rw-r--r--drivers/i2c/busses/i2c-pxa.c2
-rw-r--r--drivers/iommu/amd/init.c25
-rw-r--r--drivers/iommu/amd/iommu.c11
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c38
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h2
-rw-r--r--drivers/iommu/intel/iommu.c11
-rw-r--r--drivers/iommu/intel/perfmon.c2
-rw-r--r--drivers/iommu/intel/svm.c2
-rw-r--r--drivers/iommu/iommu.c11
-rw-r--r--drivers/iommu/mtk_iommu.c1
-rw-r--r--drivers/iommu/mtk_iommu_v1.c1
-rw-r--r--drivers/irqchip/irq-armada-370-xp.c2
-rw-r--r--drivers/irqchip/irq-gic-v3-its.c8
-rw-r--r--drivers/isdn/mISDN/socket.c10
-rw-r--r--drivers/md/dm-integrity.c2
-rw-r--r--drivers/md/dm-vdo/murmurhash3.c33
-rw-r--r--drivers/md/raid1.c2
-rw-r--r--drivers/media/platform/mediatek/vcodec/common/mtk_vcodec_fw_vpu.c8
-rw-r--r--drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec_drv.c5
-rw-r--r--drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec_drv.h2
-rw-r--r--drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_hevc_req_multi_if.c2
-rw-r--r--drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp8_if.c2
-rw-r--r--drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp9_if.c11
-rw-r--r--drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp9_req_lat_if.c4
-rw-r--r--drivers/media/platform/mediatek/vcodec/decoder/vdec_vpu_if.c2
-rw-r--r--drivers/media/platform/mediatek/vcodec/encoder/mtk_vcodec_enc_drv.c5
-rw-r--r--drivers/media/platform/mediatek/vcodec/encoder/mtk_vcodec_enc_drv.h2
-rw-r--r--drivers/media/platform/mediatek/vcodec/encoder/venc_vpu_if.c2
-rw-r--r--drivers/mmc/core/block.c4
-rw-r--r--drivers/mmc/host/omap.c48
-rw-r--r--drivers/mmc/host/sdhci-of-dwcmshc.c28
-rw-r--r--drivers/mmc/host/sdhci-omap.c3
-rw-r--r--drivers/mtd/devices/block2mtd.c2
-rw-r--r--drivers/net/dsa/mt7530.c267
-rw-r--r--drivers/net/dsa/mt7530.h10
-rw-r--r--drivers/net/dsa/mv88e6xxx/chip.c6
-rw-r--r--drivers/net/dsa/sja1105/sja1105_mdio.c2
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_com.c2
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_netdev.c35
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_xdp.c4
-rw-r--r--drivers/net/ethernet/amd/pds_core/core.c13
-rw-r--r--drivers/net/ethernet/amd/pds_core/core.h2
-rw-r--r--drivers/net/ethernet/amd/pds_core/dev.c3
-rw-r--r--drivers/net/ethernet/amd/pds_core/main.c1
-rw-r--r--drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c43
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.c2
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c6
-rw-r--r--drivers/net/ethernet/broadcom/genet/bcmgenet.c16
-rw-r--r--drivers/net/ethernet/freescale/fec_main.c11
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_tqp_stats.c2
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c19
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c4
-rw-r--r--drivers/net/ethernet/intel/e1000e/hw.h2
-rw-r--r--drivers/net/ethernet/intel/e1000e/ich8lan.c38
-rw-r--r--drivers/net/ethernet/intel/e1000e/netdev.c18
-rw-r--r--drivers/net/ethernet/intel/e1000e/phy.c182
-rw-r--r--drivers/net/ethernet/intel/e1000e/phy.h2
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e.h1
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_main.c13
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_register.h3
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.c82
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.h1
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c45
-rw-r--r--drivers/net/ethernet/intel/ice/ice_adminq_cmd.h3
-rw-r--r--drivers/net/ethernet/intel/ice/ice_common.c10
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ethtool.c2
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lag.c4
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lib.c18
-rw-r--r--drivers/net/ethernet/intel/ice/ice_switch.c24
-rw-r--r--drivers/net/ethernet/intel/ice/ice_switch.h4
-rw-r--r--drivers/net/ethernet/intel/ice/ice_tc_lib.c15
-rw-r--r--drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c18
-rw-r--r--drivers/net/ethernet/intel/idpf/idpf_txrx.c4
-rw-r--r--drivers/net/ethernet/intel/igc/igc_main.c4
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c16
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/cgx.c5
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c2
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c20
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c2
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c2
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c7
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/qos.c1
-rw-r--r--drivers/net/ethernet/mediatek/mtk_wed.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/qos.c33
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/selq.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c27
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c49
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tx.c7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.c9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c13
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.c17
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c44
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c82
-rw-r--r--drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c31
-rw-r--r--drivers/net/ethernet/micrel/ks8851.h3
-rw-r--r--drivers/net/ethernet/micrel/ks8851_common.c16
-rw-r--r--drivers/net/ethernet/micrel/ks8851_par.c11
-rw-r--r--drivers/net/ethernet/micrel/ks8851_spi.c11
-rw-r--r--drivers/net/ethernet/microchip/lan743x_main.c18
-rw-r--r--drivers/net/ethernet/microchip/lan743x_main.h4
-rw-r--r--drivers/net/ethernet/microchip/sparx5/sparx5_port.c4
-rw-r--r--drivers/net/ethernet/microchip/sparx5/sparx5_tc_flower.c61
-rw-r--r--drivers/net/ethernet/microsoft/mana/mana_en.c2
-rw-r--r--drivers/net/ethernet/realtek/r8169.h6
-rw-r--r--drivers/net/ethernet/realtek/r8169_leds.c35
-rw-r--r--drivers/net/ethernet/realtek/r8169_main.c49
-rw-r--r--drivers/net/ethernet/renesas/ravb_main.c92
-rw-r--r--drivers/net/ethernet/renesas/sh_eth.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/common.h1
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c47
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c56
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/mmc.h2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/mmc_core.c15
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_main.c29
-rw-r--r--drivers/net/ethernet/ti/am65-cpsw-nuss.c18
-rw-r--r--drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c8
-rw-r--r--drivers/net/ethernet/xilinx/ll_temac_main.c2
-rw-r--r--drivers/net/geneve.c4
-rw-r--r--drivers/net/hyperv/netvsc.c7
-rw-r--r--drivers/net/phy/micrel.c31
-rw-r--r--drivers/net/phy/qcom/at803x.c4
-rw-r--r--drivers/net/tun.c18
-rw-r--r--drivers/net/usb/ax88179_178a.c6
-rw-r--r--drivers/net/usb/qmi_wwan.c1
-rw-r--r--drivers/net/virtio_net.c26
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/dbg.c15
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/d3.c16
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c11
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/link.c59
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c9
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/mld-sta.c7
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/mvm.h4
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/rfi.c8
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c20
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/time-event.c5
-rw-r--r--drivers/net/wireless/intel/iwlwifi/queue/tx.c2
-rw-r--r--drivers/net/wireless/realtek/rtw89/rtw8922a.c2
-rw-r--r--drivers/net/wwan/t7xx/t7xx_cldma.c4
-rw-r--r--drivers/net/wwan/t7xx/t7xx_hif_cldma.c9
-rw-r--r--drivers/net/wwan/t7xx/t7xx_pcie_mac.c8
-rw-r--r--drivers/net/xen-netfront.c1
-rw-r--r--drivers/nvme/host/core.c41
-rw-r--r--drivers/nvme/host/fc.c4
-rw-r--r--drivers/nvme/host/nvme.h12
-rw-r--r--drivers/nvme/host/zns.c33
-rw-r--r--drivers/nvme/target/configfs.c47
-rw-r--r--drivers/nvme/target/core.c7
-rw-r--r--drivers/nvme/target/fc.c17
-rw-r--r--drivers/of/dynamic.c12
-rw-r--r--drivers/of/module.c8
-rw-r--r--drivers/pci/quirks.c8
-rw-r--r--drivers/perf/riscv_pmu.c4
-rw-r--r--drivers/pinctrl/aspeed/Makefile2
-rw-r--r--drivers/pinctrl/pinctrl-amd.c2
-rw-r--r--drivers/platform/chrome/cros_ec_uart.c28
-rw-r--r--drivers/platform/x86/acer-wmi.c9
-rw-r--r--drivers/platform/x86/amd/pmc/pmc-quirks.c9
-rw-r--r--drivers/platform/x86/amd/pmf/Makefile2
-rw-r--r--drivers/platform/x86/amd/pmf/acpi.c7
-rw-r--r--drivers/platform/x86/amd/pmf/core.c1
-rw-r--r--drivers/platform/x86/amd/pmf/pmf-quirks.c51
-rw-r--r--drivers/platform/x86/amd/pmf/pmf.h3
-rw-r--r--drivers/platform/x86/intel/hid.c9
-rw-r--r--drivers/platform/x86/intel/speed_select_if/isst_if_common.c1
-rw-r--r--drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c4
-rw-r--r--drivers/platform/x86/intel/vbtn.c11
-rw-r--r--drivers/platform/x86/lg-laptop.c2
-rw-r--r--drivers/platform/x86/toshiba_acpi.c4
-rw-r--r--drivers/pwm/core.c2
-rw-r--r--drivers/pwm/pwm-dwc-core.c1
-rw-r--r--drivers/pwm/pwm-dwc.c88
-rw-r--r--drivers/pwm/pwm-dwc.h6
-rw-r--r--drivers/ras/amd/fmpm.c57
-rw-r--r--drivers/ras/debugfs.h4
-rw-r--r--drivers/regulator/tps65132-regulator.c7
-rw-r--r--drivers/s390/cio/device.c13
-rw-r--r--drivers/s390/cio/device_fsm.c5
-rw-r--r--drivers/s390/cio/device_ops.c8
-rw-r--r--drivers/s390/cio/qdio_main.c28
-rw-r--r--drivers/s390/net/ism_drv.c37
-rw-r--r--drivers/s390/net/qeth_core_main.c38
-rw-r--r--drivers/scsi/bnx2fc/bnx2fc_tgt.c2
-rw-r--r--drivers/scsi/ch.c20
-rw-r--r--drivers/scsi/cxlflash/main.c17
-rw-r--r--drivers/scsi/hisi_sas/hisi_sas_main.c2
-rw-r--r--drivers/scsi/hisi_sas/hisi_sas_v3_hw.c10
-rw-r--r--drivers/scsi/hosts.c7
-rw-r--r--drivers/scsi/libsas/sas_expander.c53
-rw-r--r--drivers/scsi/lpfc/lpfc.h2
-rw-r--r--drivers/scsi/lpfc/lpfc_attr.c4
-rw-r--r--drivers/scsi/lpfc/lpfc_bsg.c40
-rw-r--r--drivers/scsi/lpfc/lpfc_debugfs.c12
-rw-r--r--drivers/scsi/lpfc/lpfc_els.c45
-rw-r--r--drivers/scsi/lpfc/lpfc_hbadisc.c33
-rw-r--r--drivers/scsi/lpfc/lpfc_init.c13
-rw-r--r--drivers/scsi/lpfc/lpfc_mbox.c30
-rw-r--r--drivers/scsi/lpfc/lpfc_nportdisc.c12
-rw-r--r--drivers/scsi/lpfc/lpfc_nvme.c4
-rw-r--r--drivers/scsi/lpfc/lpfc_nvmet.c2
-rw-r--r--drivers/scsi/lpfc/lpfc_scsi.c23
-rw-r--r--drivers/scsi/lpfc/lpfc_sli.c99
-rw-r--r--drivers/scsi/lpfc/lpfc_sli.h30
-rw-r--r--drivers/scsi/lpfc/lpfc_sli4.h7
-rw-r--r--drivers/scsi/lpfc/lpfc_version.h2
-rw-r--r--drivers/scsi/lpfc/lpfc_vport.c10
-rw-r--r--drivers/scsi/mpi3mr/mpi3mr_app.c2
-rw-r--r--drivers/scsi/myrb.c20
-rw-r--r--drivers/scsi/myrs.c24
-rw-r--r--drivers/scsi/pmcraid.c20
-rw-r--r--drivers/scsi/qla2xxx/qla_attr.c14
-rw-r--r--drivers/scsi/qla2xxx/qla_def.h2
-rw-r--r--drivers/scsi/qla2xxx/qla_edif.c2
-rw-r--r--drivers/scsi/qla2xxx/qla_gbl.h2
-rw-r--r--drivers/scsi/qla2xxx/qla_init.c128
-rw-r--r--drivers/scsi/qla2xxx/qla_iocb.c68
-rw-r--r--drivers/scsi/qla2xxx/qla_mbx.c2
-rw-r--r--drivers/scsi/qla2xxx/qla_os.c3
-rw-r--r--drivers/scsi/qla2xxx/qla_target.c10
-rw-r--r--drivers/scsi/qla2xxx/qla_version.h4
-rw-r--r--drivers/scsi/scsi_lib.c7
-rw-r--r--drivers/scsi/scsi_scan.c34
-rw-r--r--drivers/scsi/sd.c25
-rw-r--r--drivers/scsi/sg.c38
-rw-r--r--drivers/scsi/st.c4
-rw-r--r--drivers/spi/spi-fsl-lpspi.c14
-rw-r--r--drivers/spi/spi-pci1xxxx.c2
-rw-r--r--drivers/spi/spi-s3c64xx.c5
-rw-r--r--drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.c5
-rw-r--r--drivers/target/iscsi/iscsi_target_erl1.c3
-rw-r--r--drivers/target/target_core_configfs.c12
-rw-r--r--drivers/thermal/devfreq_cooling.c2
-rw-r--r--drivers/thermal/gov_power_allocator.c14
-rw-r--r--drivers/thermal/thermal_debugfs.c1
-rw-r--r--drivers/thermal/thermal_trip.c19
-rw-r--r--drivers/ufs/core/ufs-mcq.c2
-rw-r--r--drivers/ufs/core/ufshcd.c9
-rw-r--r--drivers/ufs/host/ufs-qcom.c14
-rw-r--r--drivers/uio/uio_hv_generic.c12
-rw-r--r--drivers/usb/class/cdc-wdm.c6
-rw-r--r--drivers/usb/core/hub.c23
-rw-r--r--drivers/usb/core/hub.h2
-rw-r--r--drivers/usb/core/port.c38
-rw-r--r--drivers/usb/core/sysfs.c16
-rw-r--r--drivers/usb/dwc2/core.h14
-rw-r--r--drivers/usb/dwc2/core_intr.c72
-rw-r--r--drivers/usb/dwc2/gadget.c10
-rw-r--r--drivers/usb/dwc2/hcd.c49
-rw-r--r--drivers/usb/dwc2/hcd_ddma.c17
-rw-r--r--drivers/usb/dwc2/hw.h2
-rw-r--r--drivers/usb/dwc2/platform.c2
-rw-r--r--drivers/usb/dwc3/core.c2
-rw-r--r--drivers/usb/dwc3/core.h2
-rw-r--r--drivers/usb/dwc3/dwc3-pci.c2
-rw-r--r--drivers/usb/dwc3/gadget.c10
-rw-r--r--drivers/usb/dwc3/host.c11
-rw-r--r--drivers/usb/gadget/udc/core.c4
-rw-r--r--drivers/usb/misc/usb-ljca.c22
-rw-r--r--drivers/usb/phy/phy-generic.c7
-rw-r--r--drivers/usb/storage/uas.c28
-rw-r--r--drivers/usb/typec/class.c7
-rw-r--r--drivers/usb/typec/tcpm/tcpm.c6
-rw-r--r--drivers/usb/typec/ucsi/ucsi.c90
-rw-r--r--drivers/usb/typec/ucsi/ucsi.h5
-rw-r--r--drivers/usb/typec/ucsi/ucsi_acpi.c75
-rw-r--r--drivers/usb/typec/ucsi/ucsi_glink.c14
-rw-r--r--drivers/vhost/vhost.c26
-rw-r--r--drivers/video/fbdev/Kconfig3
-rw-r--r--drivers/virt/vmgenid.c2
-rw-r--r--drivers/virtio/virtio.c6
-rw-r--r--fs/aio.c2
-rw-r--r--fs/bcachefs/Makefile3
-rw-r--r--fs/bcachefs/acl.c30
-rw-r--r--fs/bcachefs/alloc_background.c47
-rw-r--r--fs/bcachefs/alloc_foreground.c4
-rw-r--r--fs/bcachefs/alloc_types.h3
-rw-r--r--fs/bcachefs/backpointers.c194
-rw-r--r--fs/bcachefs/backpointers.h41
-rw-r--r--fs/bcachefs/bcachefs.h10
-rw-r--r--fs/bcachefs/bcachefs_format.h26
-rw-r--r--fs/bcachefs/bkey.h6
-rw-r--r--fs/bcachefs/bkey_methods.c8
-rw-r--r--fs/bcachefs/bset.c14
-rw-r--r--fs/bcachefs/bset.h2
-rw-r--r--fs/bcachefs/btree_cache.c78
-rw-r--r--fs/bcachefs/btree_gc.c528
-rw-r--r--fs/bcachefs/btree_io.c37
-rw-r--r--fs/bcachefs/btree_iter.c52
-rw-r--r--fs/bcachefs/btree_iter.h11
-rw-r--r--fs/bcachefs/btree_journal_iter.c109
-rw-r--r--fs/bcachefs/btree_journal_iter.h8
-rw-r--r--fs/bcachefs/btree_key_cache.c4
-rw-r--r--fs/bcachefs/btree_locking.c28
-rw-r--r--fs/bcachefs/btree_node_scan.c519
-rw-r--r--fs/bcachefs/btree_node_scan.h11
-rw-r--r--fs/bcachefs/btree_node_scan_types.h30
-rw-r--r--fs/bcachefs/btree_trans_commit.c39
-rw-r--r--fs/bcachefs/btree_types.h14
-rw-r--r--fs/bcachefs/btree_update.c6
-rw-r--r--fs/bcachefs/btree_update_interior.c405
-rw-r--r--fs/bcachefs/btree_update_interior.h31
-rw-r--r--fs/bcachefs/btree_write_buffer.c28
-rw-r--r--fs/bcachefs/buckets.c12
-rw-r--r--fs/bcachefs/buckets.h9
-rw-r--r--fs/bcachefs/chardev.c100
-rw-r--r--fs/bcachefs/checksum.c23
-rw-r--r--fs/bcachefs/checksum.h5
-rw-r--r--fs/bcachefs/compress.h8
-rw-r--r--fs/bcachefs/data_update.c29
-rw-r--r--fs/bcachefs/debug.c75
-rw-r--r--fs/bcachefs/ec.c54
-rw-r--r--fs/bcachefs/ec.h2
-rw-r--r--fs/bcachefs/errcode.h3
-rw-r--r--fs/bcachefs/error.c6
-rw-r--r--fs/bcachefs/error.h6
-rw-r--r--fs/bcachefs/extents.c70
-rw-r--r--fs/bcachefs/extents.h25
-rw-r--r--fs/bcachefs/eytzinger.c234
-rw-r--r--fs/bcachefs/eytzinger.h81
-rw-r--r--fs/bcachefs/fs-io-direct.c23
-rw-r--r--fs/bcachefs/fs-io.c16
-rw-r--r--fs/bcachefs/fs.c1
-rw-r--r--fs/bcachefs/fsck.c264
-rw-r--r--fs/bcachefs/inode.c2
-rw-r--r--fs/bcachefs/io_misc.c2
-rw-r--r--fs/bcachefs/journal_io.c17
-rw-r--r--fs/bcachefs/journal_reclaim.c2
-rw-r--r--fs/bcachefs/journal_seq_blacklist.c3
-rw-r--r--fs/bcachefs/journal_types.h1
-rw-r--r--fs/bcachefs/logged_ops.c7
-rw-r--r--fs/bcachefs/mean_and_variance_test.c28
-rw-r--r--fs/bcachefs/opts.c33
-rw-r--r--fs/bcachefs/opts.h21
-rw-r--r--fs/bcachefs/recovery.c410
-rw-r--r--fs/bcachefs/recovery.h32
-rw-r--r--fs/bcachefs/recovery_passes.c249
-rw-r--r--fs/bcachefs/recovery_passes.h17
-rw-r--r--fs/bcachefs/recovery_passes_types.h (renamed from fs/bcachefs/recovery_types.h)11
-rw-r--r--fs/bcachefs/reflink.c3
-rw-r--r--fs/bcachefs/replicas.c19
-rw-r--r--fs/bcachefs/sb-downgrade.c7
-rw-r--r--fs/bcachefs/sb-errors_types.h10
-rw-r--r--fs/bcachefs/sb-members.c53
-rw-r--r--fs/bcachefs/sb-members.h21
-rw-r--r--fs/bcachefs/snapshot.c227
-rw-r--r--fs/bcachefs/snapshot.h89
-rw-r--r--fs/bcachefs/subvolume.c72
-rw-r--r--fs/bcachefs/subvolume.h3
-rw-r--r--fs/bcachefs/subvolume_types.h2
-rw-r--r--fs/bcachefs/super-io.c20
-rw-r--r--fs/bcachefs/super.c21
-rw-r--r--fs/bcachefs/super_types.h2
-rw-r--r--fs/bcachefs/sysfs.c17
-rw-r--r--fs/bcachefs/tests.c2
-rw-r--r--fs/bcachefs/util.c143
-rw-r--r--fs/bcachefs/util.h22
-rw-r--r--fs/btrfs/delayed-inode.c3
-rw-r--r--fs/btrfs/extent-tree.c8
-rw-r--r--fs/btrfs/extent_io.c21
-rw-r--r--fs/btrfs/inode.c15
-rw-r--r--fs/btrfs/ioctl.c37
-rw-r--r--fs/btrfs/qgroup.c2
-rw-r--r--fs/btrfs/root-tree.c10
-rw-r--r--fs/btrfs/root-tree.h2
-rw-r--r--fs/btrfs/transaction.c19
-rw-r--r--fs/ceph/addr.c4
-rw-r--r--fs/ceph/caps.c4
-rw-r--r--fs/ceph/mds_client.c9
-rw-r--r--fs/ceph/mds_client.h3
-rw-r--r--fs/cramfs/inode.c2
-rw-r--r--fs/ext4/super.c8
-rw-r--r--fs/f2fs/super.c2
-rw-r--r--fs/fuse/cuse.c4
-rw-r--r--fs/fuse/dir.c1
-rw-r--r--fs/fuse/file.c12
-rw-r--r--fs/fuse/fuse_i.h7
-rw-r--r--fs/fuse/inode.c1
-rw-r--r--fs/fuse/iomode.c60
-rw-r--r--fs/jfs/jfs_logmgr.c4
-rw-r--r--fs/kernfs/file.c9
-rw-r--r--fs/namei.c7
-rw-r--r--fs/nfsd/nfs4state.c43
-rw-r--r--fs/nfsd/nfs4xdr.c47
-rw-r--r--fs/nfsd/vfs.c3
-rw-r--r--fs/nilfs2/dir.c2
-rw-r--r--fs/proc/Makefile2
-rw-r--r--fs/proc/bootconfig.c12
-rw-r--r--fs/reiserfs/journal.c2
-rw-r--r--fs/romfs/super.c2
-rw-r--r--fs/smb/client/cached_dir.c6
-rw-r--r--fs/smb/client/cifs_debug.c6
-rw-r--r--fs/smb/client/cifsfs.c11
-rw-r--r--fs/smb/client/cifsglob.h20
-rw-r--r--fs/smb/client/cifsproto.h20
-rw-r--r--fs/smb/client/cifssmb.c6
-rw-r--r--fs/smb/client/connect.c161
-rw-r--r--fs/smb/client/dfs.c51
-rw-r--r--fs/smb/client/dfs.h33
-rw-r--r--fs/smb/client/dfs_cache.c53
-rw-r--r--fs/smb/client/dir.c22
-rw-r--r--fs/smb/client/file.c111
-rw-r--r--fs/smb/client/fs_context.c27
-rw-r--r--fs/smb/client/fs_context.h14
-rw-r--r--fs/smb/client/fscache.c16
-rw-r--r--fs/smb/client/fscache.h6
-rw-r--r--fs/smb/client/inode.c5
-rw-r--r--fs/smb/client/ioctl.c6
-rw-r--r--fs/smb/client/misc.c9
-rw-r--r--fs/smb/client/smb1ops.c4
-rw-r--r--fs/smb/client/smb2misc.c4
-rw-r--r--fs/smb/client/smb2ops.c107
-rw-r--r--fs/smb/client/smb2pdu.c13
-rw-r--r--fs/smb/client/smb2transport.c2
-rw-r--r--fs/smb/client/trace.h4
-rw-r--r--fs/smb/server/ksmbd_netlink.h3
-rw-r--r--fs/smb/server/mgmt/share_config.c7
-rw-r--r--fs/smb/server/smb2ops.c10
-rw-r--r--fs/smb/server/smb2pdu.c3
-rw-r--r--fs/smb/server/transport_ipc.c37
-rw-r--r--fs/squashfs/inode.c5
-rw-r--r--fs/super.c24
-rw-r--r--fs/tracefs/event_inode.c14
-rw-r--r--fs/vboxsf/file.c1
-rw-r--r--fs/vboxsf/super.c9
-rw-r--r--fs/vboxsf/utils.c3
-rw-r--r--fs/xfs/libxfs/xfs_sb.c40
-rw-r--r--fs/xfs/libxfs/xfs_sb.h5
-rw-r--r--fs/xfs/scrub/common.c4
-rw-r--r--fs/xfs/xfs_aops.c7
-rw-r--r--fs/xfs/xfs_buf.c2
-rw-r--r--fs/xfs/xfs_icache.c8
-rw-r--r--fs/xfs/xfs_inode.c15
-rw-r--r--fs/xfs/xfs_super.c6
-rw-r--r--fs/xfs/xfs_trans.h9
-rw-r--r--fs/zonefs/super.c2
-rw-r--r--include/acpi/acpi_bus.h8
-rw-r--r--include/asm-generic/bug.h5
-rw-r--r--include/asm-generic/export.h11
-rw-r--r--include/asm-generic/hyperv-tlfs.h19
-rw-r--r--include/asm-generic/mshyperv.h14
-rw-r--r--include/kvm/arm_pmu.h2
-rw-r--r--include/linux/blkdev.h11
-rw-r--r--include/linux/bootconfig.h8
-rw-r--r--include/linux/bpf.h16
-rw-r--r--include/linux/cc_platform.h12
-rw-r--r--include/linux/compiler.h2
-rw-r--r--include/linux/device.h1
-rw-r--r--include/linux/dma-fence.h7
-rw-r--r--include/linux/energy_model.h1
-rw-r--r--include/linux/framer/framer.h4
-rw-r--r--include/linux/fs.h2
-rw-r--r--include/linux/gfp_types.h2
-rw-r--r--include/linux/gpio/driver.h17
-rw-r--r--include/linux/gpio/property.h1
-rw-r--r--include/linux/hyperv.h1
-rw-r--r--include/linux/interrupt.h3
-rw-r--r--include/linux/io_uring_types.h3
-rw-r--r--include/linux/irqflags.h2
-rw-r--r--include/linux/libata.h1
-rw-r--r--include/linux/mm.h10
-rw-r--r--include/linux/randomize_kstack.h2
-rw-r--r--include/linux/rwbase_rt.h4
-rw-r--r--include/linux/rwsem.h6
-rw-r--r--include/linux/secretmem.h4
-rw-r--r--include/linux/shmem_fs.h9
-rw-r--r--include/linux/skbuff.h7
-rw-r--r--include/linux/sockptr.h25
-rw-r--r--include/linux/stackdepot.h7
-rw-r--r--include/linux/swapops.h65
-rw-r--r--include/linux/timecounter.h11
-rw-r--r--include/linux/timekeeping.h49
-rw-r--r--include/linux/timer.h12
-rw-r--r--include/linux/u64_stats_sync.h9
-rw-r--r--include/linux/udp.h30
-rw-r--r--include/linux/virtio.h7
-rw-r--r--include/net/addrconf.h4
-rw-r--r--include/net/bluetooth/bluetooth.h9
-rw-r--r--include/net/bluetooth/hci.h9
-rw-r--r--include/net/cfg80211.h2
-rw-r--r--include/net/inet_connection_sock.h1
-rw-r--r--include/net/ip_tunnels.h33
-rw-r--r--include/net/mana/mana.h1
-rw-r--r--include/net/netfilter/nf_flow_table.h12
-rw-r--r--include/net/netfilter/nf_tables.h14
-rw-r--r--include/net/sch_generic.h1
-rw-r--r--include/net/sock.h7
-rw-r--r--include/net/xdp_sock.h2
-rw-r--r--include/scsi/scsi_driver.h1
-rw-r--r--include/scsi/scsi_host.h1
-rw-r--r--include/sound/hdaudio_ext.h3
-rw-r--r--include/sound/intel-nhlt.h10
-rw-r--r--include/sound/tas2781-tlv.h2
-rw-r--r--include/trace/events/rpcgss.h4
-rw-r--r--include/uapi/linux/kfd_ioctl.h17
-rw-r--r--include/uapi/linux/vhost.h15
-rw-r--r--include/uapi/scsi/scsi_bsg_mpi3mr.h2
-rw-r--r--include/ufs/ufshcd.h1
-rw-r--r--include/vdso/datapage.h8
-rw-r--r--init/initramfs.c2
-rw-r--r--init/main.c7
-rw-r--r--io_uring/io_uring.c57
-rw-r--r--io_uring/kbuf.c118
-rw-r--r--io_uring/kbuf.h8
-rw-r--r--io_uring/net.c1
-rw-r--r--io_uring/rw.c9
-rw-r--r--kernel/bpf/Makefile2
-rw-r--r--kernel/bpf/arena.c25
-rw-r--r--kernel/bpf/bloom_filter.c13
-rw-r--r--kernel/bpf/helpers.c2
-rw-r--r--kernel/bpf/syscall.c35
-rw-r--r--kernel/bpf/verifier.c30
-rw-r--r--kernel/cpu.c3
-rw-r--r--kernel/dma/swiotlb.c91
-rw-r--r--kernel/fork.c33
-rw-r--r--kernel/irq/manage.c9
-rw-r--r--kernel/kprobes.c18
-rw-r--r--kernel/power/suspend.c6
-rw-r--r--kernel/time/tick-common.c17
-rw-r--r--kernel/time/tick-sched.c54
-rw-r--r--kernel/time/tick-sched.h2
-rw-r--r--kernel/time/timer.c22
-rw-r--r--kernel/time/timer_migration.c32
-rw-r--r--kernel/trace/Kconfig2
-rw-r--r--kernel/trace/bpf_trace.c10
-rw-r--r--kernel/trace/ring_buffer.c6
-rw-r--r--kernel/trace/trace_events.c4
-rw-r--r--lib/bootconfig.c22
-rw-r--r--lib/checksum_kunit.c5
-rw-r--r--lib/stackdepot.c4
-rw-r--r--lib/test_ubsan.c2
-rw-r--r--mm/Makefile3
-rw-r--r--mm/gup.c68
-rw-r--r--mm/huge_memory.c6
-rw-r--r--mm/hugetlb.c10
-rw-r--r--mm/internal.h10
-rw-r--r--mm/madvise.c17
-rw-r--r--mm/memory-failure.c18
-rw-r--r--mm/memory.c4
-rw-r--r--mm/page_owner.c190
-rw-r--r--mm/shmem.c6
-rw-r--r--mm/vmalloc.c76
-rw-r--r--net/9p/client.c10
-rw-r--r--net/9p/trans_fd.c1
-rw-r--r--net/ax25/ax25_dev.c2
-rw-r--r--net/batman-adv/translation-table.c2
-rw-r--r--net/bluetooth/hci_core.c6
-rw-r--r--net/bluetooth/hci_debugfs.c48
-rw-r--r--net/bluetooth/hci_event.c25
-rw-r--r--net/bluetooth/hci_request.c4
-rw-r--r--net/bluetooth/hci_sock.c21
-rw-r--r--net/bluetooth/hci_sync.c16
-rw-r--r--net/bluetooth/iso.c46
-rw-r--r--net/bluetooth/l2cap_core.c3
-rw-r--r--net/bluetooth/l2cap_sock.c52
-rw-r--r--net/bluetooth/rfcomm/sock.c14
-rw-r--r--net/bluetooth/sco.c23
-rw-r--r--net/bridge/br_input.c15
-rw-r--r--net/bridge/br_netfilter_hooks.c6
-rw-r--r--net/bridge/br_private.h1
-rw-r--r--net/bridge/netfilter/ebtables.c6
-rw-r--r--net/bridge/netfilter/nf_conntrack_bridge.c14
-rw-r--r--net/core/dev.c8
-rw-r--r--net/core/gro.c3
-rw-r--r--net/core/sock.c4
-rw-r--r--net/core/sock_map.c6
-rw-r--r--net/hsr/hsr_device.c13
-rw-r--r--net/hsr/hsr_slave.c3
-rw-r--r--net/ipv4/fib_frontend.c5
-rw-r--r--net/ipv4/inet_connection_sock.c44
-rw-r--r--net/ipv4/inet_fragment.c70
-rw-r--r--net/ipv4/ip_fragment.c2
-rw-r--r--net/ipv4/ip_gre.c5
-rw-r--r--net/ipv4/netfilter/Kconfig1
-rw-r--r--net/ipv4/netfilter/arp_tables.c8
-rw-r--r--net/ipv4/netfilter/ip_tables.c8
-rw-r--r--net/ipv4/nexthop.c4
-rw-r--r--net/ipv4/route.c4
-rw-r--r--net/ipv4/tcp.c2
-rw-r--r--net/ipv4/udp.c7
-rw-r--r--net/ipv4/udp_offload.c23
-rw-r--r--net/ipv6/addrconf.c12
-rw-r--r--net/ipv6/ip6_fib.c21
-rw-r--r--net/ipv6/ip6_gre.c3
-rw-r--r--net/ipv6/netfilter/ip6_tables.c8
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c2
-rw-r--r--net/ipv6/udp.c2
-rw-r--r--net/ipv6/udp_offload.c8
-rw-r--r--net/mac80211/cfg.c5
-rw-r--r--net/mac80211/debug.h2
-rw-r--r--net/mac80211/ieee80211_i.h4
-rw-r--r--net/mac80211/mlme.c15
-rw-r--r--net/mptcp/protocol.c2
-rw-r--r--net/mptcp/sockopt.c4
-rw-r--r--net/mptcp/subflow.c2
-rw-r--r--net/netfilter/nf_flow_table_inet.c3
-rw-r--r--net/netfilter/nf_flow_table_ip.c10
-rw-r--r--net/netfilter/nf_tables_api.c174
-rw-r--r--net/netfilter/nft_lookup.c1
-rw-r--r--net/netfilter/nft_set_bitmap.c4
-rw-r--r--net/netfilter/nft_set_hash.c8
-rw-r--r--net/netfilter/nft_set_pipapo.c25
-rw-r--r--net/netfilter/nft_set_rbtree.c4
-rw-r--r--net/nfc/llcp_sock.c12
-rw-r--r--net/nfc/nci/core.c5
-rw-r--r--net/openvswitch/conntrack.c5
-rw-r--r--net/rds/rdma.c2
-rw-r--r--net/sched/act_skbmod.c10
-rw-r--r--net/sched/sch_api.c2
-rw-r--r--net/sched/sch_generic.c1
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_crypto.c14
-rw-r--r--net/sunrpc/svcsock.c10
-rw-r--r--net/tls/tls_sw.c7
-rw-r--r--net/unix/af_unix.c16
-rw-r--r--net/unix/garbage.c18
-rw-r--r--net/vmw_vsock/virtio_transport.c3
-rw-r--r--net/wireless/trace.h2
-rw-r--r--net/wireless/wext-core.c7
-rw-r--r--net/xdp/xsk.c2
-rw-r--r--scripts/Makefile.extrawarn10
-rw-r--r--scripts/Makefile.modfinal2
-rwxr-xr-xscripts/bpf_doc.py4
-rw-r--r--scripts/gcc-plugins/stackleak_plugin.c2
-rw-r--r--scripts/kconfig/conf.c5
-rw-r--r--scripts/kconfig/lkc.h2
-rw-r--r--scripts/kconfig/lxdialog/checklist.c2
-rw-r--r--scripts/kconfig/lxdialog/dialog.h12
-rw-r--r--scripts/kconfig/lxdialog/inputbox.c2
-rw-r--r--scripts/kconfig/lxdialog/menubox.c2
-rw-r--r--scripts/kconfig/lxdialog/textbox.c2
-rw-r--r--scripts/kconfig/lxdialog/util.c2
-rw-r--r--scripts/kconfig/lxdialog/yesno.c2
-rw-r--r--scripts/kconfig/mconf.c4
-rw-r--r--scripts/kconfig/menu.c22
-rw-r--r--scripts/kconfig/parser.y2
-rwxr-xr-xscripts/kernel-doc2
-rw-r--r--scripts/mod/modpost.c7
-rw-r--r--security/security.c4
-rw-r--r--security/selinux/selinuxfs.c12
-rw-r--r--sound/aoa/soundbus/i2sbus/core.c2
-rw-r--r--sound/core/seq/seq_ump_convert.c2
-rw-r--r--sound/hda/intel-nhlt.c26
-rw-r--r--sound/oss/dmasound/dmasound_paula.c8
-rw-r--r--sound/pci/emu10k1/emu10k1_callback.c7
-rw-r--r--sound/pci/hda/cs35l41_hda_property.c6
-rw-r--r--sound/pci/hda/cs35l56_hda.c8
-rw-r--r--sound/pci/hda/cs35l56_hda_i2c.c13
-rw-r--r--sound/pci/hda/cs35l56_hda_spi.c13
-rw-r--r--sound/pci/hda/patch_realtek.c105
-rw-r--r--sound/pci/hda/tas2781_hda_i2c.c122
-rw-r--r--sound/sh/aica.c17
-rw-r--r--sound/soc/amd/acp/acp-pci.c13
-rw-r--r--sound/soc/codecs/cs-amp-lib.c5
-rw-r--r--sound/soc/codecs/cs42l43.c12
-rw-r--r--sound/soc/codecs/es8326.c37
-rw-r--r--sound/soc/codecs/es8326.h2
-rw-r--r--sound/soc/codecs/rt1316-sdw.c8
-rw-r--r--sound/soc/codecs/rt1318-sdw.c8
-rw-r--r--sound/soc/codecs/rt5682-sdw.c16
-rw-r--r--sound/soc/codecs/rt700.c16
-rw-r--r--sound/soc/codecs/rt711-sdca-sdw.c6
-rw-r--r--sound/soc/codecs/rt711-sdca.c18
-rw-r--r--sound/soc/codecs/rt711-sdw.c8
-rw-r--r--sound/soc/codecs/rt711.c16
-rw-r--r--sound/soc/codecs/rt712-sdca-dmic.c24
-rw-r--r--sound/soc/codecs/rt712-sdca-sdw.c7
-rw-r--r--sound/soc/codecs/rt712-sdca.c20
-rw-r--r--sound/soc/codecs/rt715-sdca-sdw.c2
-rw-r--r--sound/soc/codecs/rt715-sdca.c46
-rw-r--r--sound/soc/codecs/rt715-sdw.c4
-rw-r--r--sound/soc/codecs/rt715.c24
-rw-r--r--sound/soc/codecs/rt722-sdca-sdw.c4
-rw-r--r--sound/soc/codecs/rt722-sdca.c21
-rw-r--r--sound/soc/codecs/wm_adsp.c3
-rw-r--r--sound/soc/intel/avs/boards/da7219.c1
-rw-r--r--sound/soc/intel/avs/boards/dmic.c1
-rw-r--r--sound/soc/intel/avs/boards/es8336.c1
-rw-r--r--sound/soc/intel/avs/boards/i2s_test.c1
-rw-r--r--sound/soc/intel/avs/boards/max98357a.c1
-rw-r--r--sound/soc/intel/avs/boards/max98373.c1
-rw-r--r--sound/soc/intel/avs/boards/max98927.c1
-rw-r--r--sound/soc/intel/avs/boards/nau8825.c1
-rw-r--r--sound/soc/intel/avs/boards/probe.c1
-rw-r--r--sound/soc/intel/avs/boards/rt274.c1
-rw-r--r--sound/soc/intel/avs/boards/rt286.c1
-rw-r--r--sound/soc/intel/avs/boards/rt298.c1
-rw-r--r--sound/soc/intel/avs/boards/rt5514.c1
-rw-r--r--sound/soc/intel/avs/boards/rt5663.c1
-rw-r--r--sound/soc/intel/avs/boards/rt5682.c1
-rw-r--r--sound/soc/intel/avs/boards/ssm4567.c1
-rw-r--r--sound/soc/soc-ops.c2
-rw-r--r--sound/soc/sof/amd/acp.c8
-rw-r--r--sound/soc/sof/core.c14
-rw-r--r--sound/soc/sof/intel/hda-common-ops.c3
-rw-r--r--sound/soc/sof/intel/hda-dai-ops.c11
-rw-r--r--sound/soc/sof/intel/hda-dsp.c20
-rw-r--r--sound/soc/sof/intel/hda-pcm.c29
-rw-r--r--sound/soc/sof/intel/hda-stream.c70
-rw-r--r--sound/soc/sof/intel/hda.h6
-rw-r--r--sound/soc/sof/intel/lnl.c34
-rw-r--r--sound/soc/sof/intel/mtl.c14
-rw-r--r--sound/soc/sof/intel/mtl.h10
-rw-r--r--sound/soc/sof/ipc4-mtrace.c11
-rw-r--r--sound/soc/sof/ipc4-pcm.c191
-rw-r--r--sound/soc/sof/ipc4-priv.h14
-rw-r--r--sound/soc/sof/ipc4-topology.c41
-rw-r--r--sound/soc/sof/ops.h24
-rw-r--r--sound/soc/sof/pcm.c8
-rw-r--r--sound/soc/sof/sof-audio.h9
-rw-r--r--sound/soc/sof/sof-priv.h24
-rw-r--r--sound/usb/line6/driver.c6
-rw-r--r--tools/bpf/bpftool/gen.c2
-rw-r--r--tools/hv/hv_kvp_daemon.c213
-rw-r--r--tools/include/linux/btf_ids.h2
-rw-r--r--tools/include/linux/kernel.h1
-rw-r--r--tools/include/linux/mm.h5
-rw-r--r--tools/include/linux/panic.h19
-rw-r--r--tools/lib/bpf/libbpf.c10
-rwxr-xr-xtools/net/ynl/ynl-gen-c.py7
-rw-r--r--tools/objtool/check.c2
-rw-r--r--tools/power/x86/turbostat/turbostat.818
-rw-r--r--tools/power/x86/turbostat/turbostat.c2017
-rw-r--r--tools/testing/cxl/test/cxl.c10
-rw-r--r--tools/testing/kunit/configs/all_tests.config3
-rw-r--r--tools/testing/selftests/bpf/bpf_arena_common.h2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/arena_htab.c8
-rw-r--r--tools/testing/selftests/bpf/prog_tests/arena_list.c7
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/verifier.c2
-rw-r--r--tools/testing/selftests/bpf/progs/arena_htab.c2
-rw-r--r--tools/testing/selftests/bpf/progs/arena_list.c10
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_arena.c10
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_arena_large.c68
-rw-r--r--tools/testing/selftests/dmabuf-heaps/config3
-rw-r--r--tools/testing/selftests/drivers/net/netdevsim/settings1
-rw-r--r--tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc6
-rw-r--r--tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc2
-rw-r--r--tools/testing/selftests/kselftest.h43
-rw-r--r--tools/testing/selftests/kselftest_harness.h7
-rw-r--r--tools/testing/selftests/kvm/aarch64/arch_timer.c4
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/processor.h11
-rw-r--r--tools/testing/selftests/kvm/riscv/arch_timer.c2
-rw-r--r--tools/testing/selftests/kvm/x86_64/kvm_pv_test.c39
-rw-r--r--tools/testing/selftests/mm/vm_util.h2
-rw-r--r--tools/testing/selftests/net/bind_wildcard.c783
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect.sh9
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh4
-rw-r--r--tools/testing/selftests/net/reuseaddr_conflict.c2
-rw-r--r--tools/testing/selftests/net/tcp_ao/lib/proc.c2
-rw-r--r--tools/testing/selftests/net/tcp_ao/lib/setup.c12
-rw-r--r--tools/testing/selftests/net/tcp_ao/rst.c23
-rw-r--r--tools/testing/selftests/net/tcp_ao/setsockopt-closed.c2
-rwxr-xr-xtools/testing/selftests/net/test_vxlan_mdb.sh205
-rw-r--r--tools/testing/selftests/net/tls.c34
-rwxr-xr-xtools/testing/selftests/net/udpgro_fwd.sh10
-rw-r--r--tools/testing/selftests/net/udpgso.c2
-rw-r--r--tools/testing/selftests/seccomp/settings2
-rw-r--r--tools/testing/selftests/timers/posix_timers.c105
-rw-r--r--tools/testing/selftests/timers/valid-adjtimex.c73
-rwxr-xr-xtools/testing/selftests/turbostat/defcolumns.py60
1196 files changed, 17720 insertions, 7855 deletions
diff --git a/.mailmap b/.mailmap
index dcc7af7ad4c73e..625b496bf5f451 100644
--- a/.mailmap
+++ b/.mailmap
@@ -20,6 +20,7 @@ Adam Oldham <oldhamca@gmail.com>
Adam Radford <aradford@gmail.com>
Adriana Reus <adi.reus@gmail.com> <adriana.reus@intel.com>
Adrian Bunk <bunk@stusta.de>
+Ajay Kaher <ajay.kaher@broadcom.com> <akaher@vmware.com>
Akhil P Oommen <quic_akhilpo@quicinc.com> <akhilpo@codeaurora.org>
Alan Cox <alan@lxorguk.ukuu.org.uk>
Alan Cox <root@hraefn.swansea.linux.org.uk>
@@ -36,6 +37,7 @@ Alexei Avshalom Lazar <quic_ailizaro@quicinc.com> <ailizaro@codeaurora.org>
Alexei Starovoitov <ast@kernel.org> <alexei.starovoitov@gmail.com>
Alexei Starovoitov <ast@kernel.org> <ast@fb.com>
Alexei Starovoitov <ast@kernel.org> <ast@plumgrid.com>
+Alexey Makhalov <alexey.amakhalov@broadcom.com> <amakhalov@vmware.com>
Alex Hung <alexhung@gmail.com> <alex.hung@canonical.com>
Alex Shi <alexs@kernel.org> <alex.shi@intel.com>
Alex Shi <alexs@kernel.org> <alex.shi@linaro.org>
@@ -110,6 +112,7 @@ Brendan Higgins <brendan.higgins@linux.dev> <brendanhiggins@google.com>
Brian Avery <b.avery@hp.com>
Brian King <brking@us.ibm.com>
Brian Silverman <bsilver16384@gmail.com> <brian.silverman@bluerivertech.com>
+Bryan Tan <bryan-bt.tan@broadcom.com> <bryantan@vmware.com>
Cai Huoqing <cai.huoqing@linux.dev> <caihuoqing@baidu.com>
Can Guo <quic_cang@quicinc.com> <cang@codeaurora.org>
Carl Huang <quic_cjhuang@quicinc.com> <cjhuang@codeaurora.org>
@@ -443,7 +446,8 @@ Mythri P K <mythripk@ti.com>
Nadav Amit <nadav.amit@gmail.com> <namit@vmware.com>
Nadav Amit <nadav.amit@gmail.com> <namit@cs.technion.ac.il>
Nadia Yvette Chambers <nyc@holomorphy.com> William Lee Irwin III <wli@holomorphy.com>
-Naoya Horiguchi <naoya.horiguchi@nec.com> <n-horiguchi@ah.jp.nec.com>
+Naoya Horiguchi <nao.horiguchi@gmail.com> <n-horiguchi@ah.jp.nec.com>
+Naoya Horiguchi <nao.horiguchi@gmail.com> <naoya.horiguchi@nec.com>
Nathan Chancellor <nathan@kernel.org> <natechancellor@gmail.com>
Neeraj Upadhyay <quic_neeraju@quicinc.com> <neeraju@codeaurora.org>
Neil Armstrong <neil.armstrong@linaro.org> <narmstrong@baylibre.com>
@@ -498,7 +502,8 @@ Prasad Sodagudi <quic_psodagud@quicinc.com> <psodagud@codeaurora.org>
Punit Agrawal <punitagrawal@gmail.com> <punit.agrawal@arm.com>
Qais Yousef <qyousef@layalina.io> <qais.yousef@imgtec.com>
Qais Yousef <qyousef@layalina.io> <qais.yousef@arm.com>
-Quentin Monnet <quentin@isovalent.com> <quentin.monnet@netronome.com>
+Quentin Monnet <qmo@kernel.org> <quentin.monnet@netronome.com>
+Quentin Monnet <qmo@kernel.org> <quentin@isovalent.com>
Quentin Perret <qperret@qperret.net> <quentin.perret@arm.com>
Rafael J. Wysocki <rjw@rjwysocki.net> <rjw@sisk.pl>
Rajeev Nandan <quic_rajeevny@quicinc.com> <rajeevny@codeaurora.org>
@@ -528,6 +533,7 @@ Rocky Liao <quic_rjliao@quicinc.com> <rjliao@codeaurora.org>
Roman Gushchin <roman.gushchin@linux.dev> <guro@fb.com>
Roman Gushchin <roman.gushchin@linux.dev> <guroan@gmail.com>
Roman Gushchin <roman.gushchin@linux.dev> <klamm@yandex-team.ru>
+Ronak Doshi <ronak.doshi@broadcom.com> <doshir@vmware.com>
Muchun Song <muchun.song@linux.dev> <songmuchun@bytedance.com>
Muchun Song <muchun.song@linux.dev> <smuchun@gmail.com>
Ross Zwisler <zwisler@kernel.org> <ross.zwisler@linux.intel.com>
@@ -650,6 +656,7 @@ Viresh Kumar <vireshk@kernel.org> <viresh.kumar@st.com>
Viresh Kumar <vireshk@kernel.org> <viresh.linux@gmail.com>
Viresh Kumar <viresh.kumar@linaro.org> <viresh.kumar@linaro.org>
Viresh Kumar <viresh.kumar@linaro.org> <viresh.kumar@linaro.com>
+Vishnu Dasa <vishnu.dasa@broadcom.com> <vdasa@vmware.com>
Vivek Aknurwar <quic_viveka@quicinc.com> <viveka@codeaurora.org>
Vivien Didelot <vivien.didelot@gmail.com> <vivien.didelot@savoirfairelinux.com>
Vlad Dogaru <ddvlad@gmail.com> <vlad.dogaru@intel.com>
diff --git a/CREDITS b/CREDITS
index c55c5a0ee4ff65..0107047f807bfc 100644
--- a/CREDITS
+++ b/CREDITS
@@ -3146,6 +3146,10 @@ S: Triftstra=DFe 55
S: 13353 Berlin
S: Germany
+N: Gustavo Pimental
+E: gustavo.pimentel@synopsys.com
+D: PCI driver for Synopsys DesignWare
+
N: Emanuel Pirker
E: epirker@edu.uni-klu.ac.at
D: AIC5800 IEEE 1394, RAW I/O on 1394
diff --git a/Documentation/admin-guide/hw-vuln/spectre.rst b/Documentation/admin-guide/hw-vuln/spectre.rst
index cce768afec6bed..25a04cda4c2c05 100644
--- a/Documentation/admin-guide/hw-vuln/spectre.rst
+++ b/Documentation/admin-guide/hw-vuln/spectre.rst
@@ -138,11 +138,10 @@ associated with the source address of the indirect branch. Specifically,
the BHB might be shared across privilege levels even in the presence of
Enhanced IBRS.
-Currently the only known real-world BHB attack vector is via
-unprivileged eBPF. Therefore, it's highly recommended to not enable
-unprivileged eBPF, especially when eIBRS is used (without retpolines).
-For a full mitigation against BHB attacks, it's recommended to use
-retpolines (or eIBRS combined with retpolines).
+Previously the only known real-world BHB attack vector was via unprivileged
+eBPF. Further research has found attacks that don't require unprivileged eBPF.
+For a full mitigation against BHB attacks it is recommended to set BHI_DIS_S or
+use the BHB clearing sequence.
Attack scenarios
----------------
@@ -430,6 +429,23 @@ The possible values in this file are:
'PBRSB-eIBRS: Not affected' CPU is not affected by PBRSB
=========================== =======================================================
+ - Branch History Injection (BHI) protection status:
+
+.. list-table::
+
+ * - BHI: Not affected
+ - System is not affected
+ * - BHI: Retpoline
+ - System is protected by retpoline
+ * - BHI: BHI_DIS_S
+ - System is protected by BHI_DIS_S
+ * - BHI: SW loop, KVM SW loop
+ - System is protected by software clearing sequence
+ * - BHI: Vulnerable
+ - System is vulnerable to BHI
+ * - BHI: Vulnerable, KVM: SW loop
+ - System is vulnerable; KVM is protected by software clearing sequence
+
Full mitigation might require a microcode update from the CPU
vendor. When the necessary microcode is not available, the kernel will
report vulnerability.
@@ -484,7 +500,11 @@ Spectre variant 2
Systems which support enhanced IBRS (eIBRS) enable IBRS protection once at
boot, by setting the IBRS bit, and they're automatically protected against
- Spectre v2 variant attacks.
+ some Spectre v2 variant attacks. The BHB can still influence the choice of
+ indirect branch predictor entry, and although branch predictor entries are
+ isolated between modes when eIBRS is enabled, the BHB itself is not isolated
+ between modes. Systems which support BHI_DIS_S will set it to protect against
+ BHI attacks.
On Intel's enhanced IBRS systems, this includes cross-thread branch target
injections on SMT systems (STIBP). In other words, Intel eIBRS enables
@@ -638,6 +658,18 @@ kernel command line.
spectre_v2=off. Spectre variant 1 mitigations
cannot be disabled.
+ spectre_bhi=
+
+ [X86] Control mitigation of Branch History Injection
+ (BHI) vulnerability. This setting affects the deployment
+ of the HW BHI control and the SW BHB clearing sequence.
+
+ on
+ (default) Enable the HW or SW mitigation as
+ needed.
+ off
+ Disable the mitigation.
+
For spectre_v2_user see Documentation/admin-guide/kernel-parameters.txt
Mitigation selection guide
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index bb884c14b2f679..902ecd92a29fbe 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3444,6 +3444,7 @@
retbleed=off [X86]
spec_rstack_overflow=off [X86]
spec_store_bypass_disable=off [X86,PPC]
+ spectre_bhi=off [X86]
spectre_v2_user=off [X86]
srbds=off [X86,INTEL]
ssbd=force-off [ARM64]
@@ -6063,6 +6064,15 @@
sonypi.*= [HW] Sony Programmable I/O Control Device driver
See Documentation/admin-guide/laptops/sonypi.rst
+ spectre_bhi= [X86] Control mitigation of Branch History Injection
+ (BHI) vulnerability. This setting affects the
+ deployment of the HW BHI control and the SW BHB
+ clearing sequence.
+
+ on - (default) Enable the HW or SW mitigation
+ as needed.
+ off - Disable the mitigation.
+
spectre_v2= [X86,EARLY] Control mitigation of Spectre variant 2
(indirect branch speculation) vulnerability.
The default operation protects the kernel from
@@ -6599,7 +6609,7 @@
To turn off having tracepoints sent to printk,
echo 0 > /proc/sys/kernel/tracepoint_printk
Note, echoing 1 into this file without the
- tracepoint_printk kernel cmdline option has no effect.
+ tp_printk kernel cmdline option has no effect.
The tp_printk_stop_on_boot (see below) can also be used
to stop the printing of events to console at
diff --git a/Documentation/admin-guide/mm/zswap.rst b/Documentation/admin-guide/mm/zswap.rst
index b42132969e3157..13632671adaeaa 100644
--- a/Documentation/admin-guide/mm/zswap.rst
+++ b/Documentation/admin-guide/mm/zswap.rst
@@ -155,7 +155,7 @@ Setting this parameter to 100 will disable the hysteresis.
Some users cannot tolerate the swapping that comes with zswap store failures
and zswap writebacks. Swapping can be disabled entirely (without disabling
-zswap itself) on a cgroup-basis as follows:
+zswap itself) on a cgroup-basis as follows::
echo 0 > /sys/fs/cgroup/<cgroup-name>/memory.zswap.writeback
@@ -166,7 +166,7 @@ writeback (because the same pages might be rejected again and again).
When there is a sizable amount of cold memory residing in the zswap pool, it
can be advantageous to proactively write these cold pages to swap and reclaim
the memory for other use cases. By default, the zswap shrinker is disabled.
-User can enable it as follows:
+User can enable it as follows::
echo Y > /sys/module/zswap/parameters/shrinker_enabled
diff --git a/Documentation/arch/x86/resctrl.rst b/Documentation/arch/x86/resctrl.rst
index 3712d81cb50c67..6c245582d8fb16 100644
--- a/Documentation/arch/x86/resctrl.rst
+++ b/Documentation/arch/x86/resctrl.rst
@@ -574,7 +574,7 @@ Memory b/w domain is L3 cache.
MB:<cache_id0>=bandwidth0;<cache_id1>=bandwidth1;...
Memory bandwidth Allocation specified in MiBps
----------------------------------------------
+----------------------------------------------
Memory bandwidth domain is L3 cache.
::
diff --git a/Documentation/dev-tools/testing-overview.rst b/Documentation/dev-tools/testing-overview.rst
index 0aaf6ea53608fc..1619e5e5cc9c4b 100644
--- a/Documentation/dev-tools/testing-overview.rst
+++ b/Documentation/dev-tools/testing-overview.rst
@@ -104,6 +104,8 @@ Some of these tools are listed below:
KASAN and can be used in production. See Documentation/dev-tools/kfence.rst
* lockdep is a locking correctness validator. See
Documentation/locking/lockdep-design.rst
+* Runtime Verification (RV) supports checking specific behaviours for a given
+ subsystem. See Documentation/trace/rv/runtime-verification.rst
* There are several other pieces of debug instrumentation in the kernel, many
of which can be found in lib/Kconfig.debug
diff --git a/Documentation/devicetree/bindings/clock/keystone-gate.txt b/Documentation/devicetree/bindings/clock/keystone-gate.txt
index c5aa187026e3a5..43f6fb6c939276 100644
--- a/Documentation/devicetree/bindings/clock/keystone-gate.txt
+++ b/Documentation/devicetree/bindings/clock/keystone-gate.txt
@@ -1,5 +1,3 @@
-Status: Unstable - ABI compatibility may be broken in the future
-
Binding for Keystone gate control driver which uses PSC controller IP.
This binding uses the common clock binding[1].
diff --git a/Documentation/devicetree/bindings/clock/keystone-pll.txt b/Documentation/devicetree/bindings/clock/keystone-pll.txt
index 9a3fbc66560652..69b0eb7c03c9e6 100644
--- a/Documentation/devicetree/bindings/clock/keystone-pll.txt
+++ b/Documentation/devicetree/bindings/clock/keystone-pll.txt
@@ -1,5 +1,3 @@
-Status: Unstable - ABI compatibility may be broken in the future
-
Binding for keystone PLLs. The main PLL IP typically has a multiplier,
a divider and a post divider. The additional PLL IPs like ARMPLL, DDRPLL
and PAPLL are controlled by the memory mapped register where as the Main
diff --git a/Documentation/devicetree/bindings/clock/ti/adpll.txt b/Documentation/devicetree/bindings/clock/ti/adpll.txt
index 4c8a2ce2cd7018..3122360adcf3c0 100644
--- a/Documentation/devicetree/bindings/clock/ti/adpll.txt
+++ b/Documentation/devicetree/bindings/clock/ti/adpll.txt
@@ -1,7 +1,5 @@
Binding for Texas Instruments ADPLL clock.
-Binding status: Unstable - ABI compatibility may be broken in the future
-
This binding uses the common clock binding[1]. It assumes a
register-mapped ADPLL with two to three selectable input clocks
and three to four children.
diff --git a/Documentation/devicetree/bindings/clock/ti/apll.txt b/Documentation/devicetree/bindings/clock/ti/apll.txt
index ade4dd4c30f0e1..bbd505c1199df5 100644
--- a/Documentation/devicetree/bindings/clock/ti/apll.txt
+++ b/Documentation/devicetree/bindings/clock/ti/apll.txt
@@ -1,7 +1,5 @@
Binding for Texas Instruments APLL clock.
-Binding status: Unstable - ABI compatibility may be broken in the future
-
This binding uses the common clock binding[1]. It assumes a
register-mapped APLL with usually two selectable input clocks
(reference clock and bypass clock), with analog phase locked
diff --git a/Documentation/devicetree/bindings/clock/ti/autoidle.txt b/Documentation/devicetree/bindings/clock/ti/autoidle.txt
index 7c735dde9fe971..05645a10a9e33c 100644
--- a/Documentation/devicetree/bindings/clock/ti/autoidle.txt
+++ b/Documentation/devicetree/bindings/clock/ti/autoidle.txt
@@ -1,7 +1,5 @@
Binding for Texas Instruments autoidle clock.
-Binding status: Unstable - ABI compatibility may be broken in the future
-
This binding uses the common clock binding[1]. It assumes a register mapped
clock which can be put to idle automatically by hardware based on the usage
and a configuration bit setting. Autoidle clock is never an individual
diff --git a/Documentation/devicetree/bindings/clock/ti/clockdomain.txt b/Documentation/devicetree/bindings/clock/ti/clockdomain.txt
index 9c6199249ce596..edf0b5d4276824 100644
--- a/Documentation/devicetree/bindings/clock/ti/clockdomain.txt
+++ b/Documentation/devicetree/bindings/clock/ti/clockdomain.txt
@@ -1,7 +1,5 @@
Binding for Texas Instruments clockdomain.
-Binding status: Unstable - ABI compatibility may be broken in the future
-
This binding uses the common clock binding[1] in consumer role.
Every clock on TI SoC belongs to one clockdomain, but software
only needs this information for specific clocks which require
diff --git a/Documentation/devicetree/bindings/clock/ti/composite.txt b/Documentation/devicetree/bindings/clock/ti/composite.txt
index 33ac7c9ad053c7..6f7e1331b5466c 100644
--- a/Documentation/devicetree/bindings/clock/ti/composite.txt
+++ b/Documentation/devicetree/bindings/clock/ti/composite.txt
@@ -1,7 +1,5 @@
Binding for TI composite clock.
-Binding status: Unstable - ABI compatibility may be broken in the future
-
This binding uses the common clock binding[1]. It assumes a
register-mapped composite clock with multiple different sub-types;
diff --git a/Documentation/devicetree/bindings/clock/ti/divider.txt b/Documentation/devicetree/bindings/clock/ti/divider.txt
index 9b13b32974f992..4d7c76f0b35695 100644
--- a/Documentation/devicetree/bindings/clock/ti/divider.txt
+++ b/Documentation/devicetree/bindings/clock/ti/divider.txt
@@ -1,7 +1,5 @@
Binding for TI divider clock
-Binding status: Unstable - ABI compatibility may be broken in the future
-
This binding uses the common clock binding[1]. It assumes a
register-mapped adjustable clock rate divider that does not gate and has
only one input clock or parent. By default the value programmed into
diff --git a/Documentation/devicetree/bindings/clock/ti/dpll.txt b/Documentation/devicetree/bindings/clock/ti/dpll.txt
index 37a7cb6ad07d87..14a1b72c2e7120 100644
--- a/Documentation/devicetree/bindings/clock/ti/dpll.txt
+++ b/Documentation/devicetree/bindings/clock/ti/dpll.txt
@@ -1,7 +1,5 @@
Binding for Texas Instruments DPLL clock.
-Binding status: Unstable - ABI compatibility may be broken in the future
-
This binding uses the common clock binding[1]. It assumes a
register-mapped DPLL with usually two selectable input clocks
(reference clock and bypass clock), with digital phase locked
diff --git a/Documentation/devicetree/bindings/clock/ti/fapll.txt b/Documentation/devicetree/bindings/clock/ti/fapll.txt
index c19b3f253b8cf7..88986ef39ddd24 100644
--- a/Documentation/devicetree/bindings/clock/ti/fapll.txt
+++ b/Documentation/devicetree/bindings/clock/ti/fapll.txt
@@ -1,7 +1,5 @@
Binding for Texas Instruments FAPLL clock.
-Binding status: Unstable - ABI compatibility may be broken in the future
-
This binding uses the common clock binding[1]. It assumes a
register-mapped FAPLL with usually two selectable input clocks
(reference clock and bypass clock), and one or more child
diff --git a/Documentation/devicetree/bindings/clock/ti/fixed-factor-clock.txt b/Documentation/devicetree/bindings/clock/ti/fixed-factor-clock.txt
index 518e3c1422762c..dc69477b6e98eb 100644
--- a/Documentation/devicetree/bindings/clock/ti/fixed-factor-clock.txt
+++ b/Documentation/devicetree/bindings/clock/ti/fixed-factor-clock.txt
@@ -1,7 +1,5 @@
Binding for TI fixed factor rate clock sources.
-Binding status: Unstable - ABI compatibility may be broken in the future
-
This binding uses the common clock binding[1], and also uses the autoidle
support from TI autoidle clock [2].
diff --git a/Documentation/devicetree/bindings/clock/ti/gate.txt b/Documentation/devicetree/bindings/clock/ti/gate.txt
index 4982615c01b9cb..a8e0335b006a07 100644
--- a/Documentation/devicetree/bindings/clock/ti/gate.txt
+++ b/Documentation/devicetree/bindings/clock/ti/gate.txt
@@ -1,7 +1,5 @@
Binding for Texas Instruments gate clock.
-Binding status: Unstable - ABI compatibility may be broken in the future
-
This binding uses the common clock binding[1]. This clock is
quite much similar to the basic gate-clock [2], however,
it supports a number of additional features. If no register
diff --git a/Documentation/devicetree/bindings/clock/ti/interface.txt b/Documentation/devicetree/bindings/clock/ti/interface.txt
index d3eb5ca92a7fe6..85fb1f2d2d286b 100644
--- a/Documentation/devicetree/bindings/clock/ti/interface.txt
+++ b/Documentation/devicetree/bindings/clock/ti/interface.txt
@@ -1,7 +1,5 @@
Binding for Texas Instruments interface clock.
-Binding status: Unstable - ABI compatibility may be broken in the future
-
This binding uses the common clock binding[1]. This clock is
quite much similar to the basic gate-clock [2], however,
it supports a number of additional features, including
diff --git a/Documentation/devicetree/bindings/clock/ti/mux.txt b/Documentation/devicetree/bindings/clock/ti/mux.txt
index b33f641f104321..cd56d3c1c09f3b 100644
--- a/Documentation/devicetree/bindings/clock/ti/mux.txt
+++ b/Documentation/devicetree/bindings/clock/ti/mux.txt
@@ -1,7 +1,5 @@
Binding for TI mux clock.
-Binding status: Unstable - ABI compatibility may be broken in the future
-
This binding uses the common clock binding[1]. It assumes a
register-mapped multiplexer with multiple input clock signals or
parents, one of which can be selected as output. This clock does not
diff --git a/Documentation/devicetree/bindings/display/msm/qcom,sm8150-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,sm8150-mdss.yaml
index c0d6a4fdff97e3..e6dc5494baee29 100644
--- a/Documentation/devicetree/bindings/display/msm/qcom,sm8150-mdss.yaml
+++ b/Documentation/devicetree/bindings/display/msm/qcom,sm8150-mdss.yaml
@@ -53,6 +53,15 @@ patternProperties:
compatible:
const: qcom,sm8150-dpu
+ "^displayport-controller@[0-9a-f]+$":
+ type: object
+ additionalProperties: true
+
+ properties:
+ compatible:
+ contains:
+ const: qcom,sm8150-dp
+
"^dsi@[0-9a-f]+$":
type: object
additionalProperties: true
diff --git a/Documentation/devicetree/bindings/dts-coding-style.rst b/Documentation/devicetree/bindings/dts-coding-style.rst
index a9bdd2b59dcab6..8a68331075a098 100644
--- a/Documentation/devicetree/bindings/dts-coding-style.rst
+++ b/Documentation/devicetree/bindings/dts-coding-style.rst
@@ -144,6 +144,8 @@ Example::
#dma-cells = <1>;
clocks = <&clock_controller 0>, <&clock_controller 1>;
clock-names = "bus", "host";
+ #address-cells = <1>;
+ #size-cells = <1>;
vendor,custom-property = <2>;
status = "disabled";
diff --git a/Documentation/devicetree/bindings/net/bluetooth/qualcomm-bluetooth.yaml b/Documentation/devicetree/bindings/net/bluetooth/qualcomm-bluetooth.yaml
index 528ef3572b621e..055a3351880bc1 100644
--- a/Documentation/devicetree/bindings/net/bluetooth/qualcomm-bluetooth.yaml
+++ b/Documentation/devicetree/bindings/net/bluetooth/qualcomm-bluetooth.yaml
@@ -94,6 +94,10 @@ properties:
local-bd-address: true
+ qcom,local-bd-address-broken:
+ type: boolean
+ description:
+ boot firmware is incorrectly passing the address in big-endian order
required:
- compatible
diff --git a/Documentation/devicetree/bindings/pwm/mediatek,pwm-disp.yaml b/Documentation/devicetree/bindings/pwm/mediatek,pwm-disp.yaml
index afcdeed4e88af6..bc813fe74faba5 100644
--- a/Documentation/devicetree/bindings/pwm/mediatek,pwm-disp.yaml
+++ b/Documentation/devicetree/bindings/pwm/mediatek,pwm-disp.yaml
@@ -52,6 +52,9 @@ properties:
- const: main
- const: mm
+ power-domains:
+ maxItems: 1
+
required:
- compatible
- reg
diff --git a/Documentation/devicetree/bindings/remoteproc/ti,davinci-rproc.txt b/Documentation/devicetree/bindings/remoteproc/ti,davinci-rproc.txt
index 25f8658e216ff0..48a49c516b62cb 100644
--- a/Documentation/devicetree/bindings/remoteproc/ti,davinci-rproc.txt
+++ b/Documentation/devicetree/bindings/remoteproc/ti,davinci-rproc.txt
@@ -1,9 +1,6 @@
TI Davinci DSP devices
=======================
-Binding status: Unstable - Subject to changes for DT representation of clocks
- and resets
-
The TI Davinci family of SoCs usually contains a TI DSP Core sub-system that
is used to offload some of the processor-intensive tasks or algorithms, for
achieving various system level goals.
diff --git a/Documentation/devicetree/bindings/soc/fsl/fsl,layerscape-dcfg.yaml b/Documentation/devicetree/bindings/soc/fsl/fsl,layerscape-dcfg.yaml
index 397f75909b2058..ce1a6505eb5149 100644
--- a/Documentation/devicetree/bindings/soc/fsl/fsl,layerscape-dcfg.yaml
+++ b/Documentation/devicetree/bindings/soc/fsl/fsl,layerscape-dcfg.yaml
@@ -51,7 +51,7 @@ properties:
ranges: true
patternProperties:
- "^clock-controller@[0-9a-z]+$":
+ "^clock-controller@[0-9a-f]+$":
$ref: /schemas/clock/fsl,flexspi-clock.yaml#
required:
diff --git a/Documentation/devicetree/bindings/soc/fsl/fsl,layerscape-scfg.yaml b/Documentation/devicetree/bindings/soc/fsl/fsl,layerscape-scfg.yaml
index 8d088b5fe8236b..a6a511b00a1281 100644
--- a/Documentation/devicetree/bindings/soc/fsl/fsl,layerscape-scfg.yaml
+++ b/Documentation/devicetree/bindings/soc/fsl/fsl,layerscape-scfg.yaml
@@ -41,7 +41,7 @@ properties:
ranges: true
patternProperties:
- "^interrupt-controller@[a-z0-9]+$":
+ "^interrupt-controller@[a-f0-9]+$":
$ref: /schemas/interrupt-controller/fsl,ls-extirq.yaml#
required:
diff --git a/Documentation/devicetree/bindings/timer/arm,arch_timer_mmio.yaml b/Documentation/devicetree/bindings/timer/arm,arch_timer_mmio.yaml
index 7a4a6ab85970d6..ab8f28993139e5 100644
--- a/Documentation/devicetree/bindings/timer/arm,arch_timer_mmio.yaml
+++ b/Documentation/devicetree/bindings/timer/arm,arch_timer_mmio.yaml
@@ -60,7 +60,7 @@ properties:
be implemented in an always-on power domain."
patternProperties:
- '^frame@[0-9a-z]*$':
+ '^frame@[0-9a-f]+$':
type: object
additionalProperties: false
description: A timer node has up to 8 frame sub-nodes, each with the following properties.
diff --git a/Documentation/devicetree/bindings/ufs/qcom,ufs.yaml b/Documentation/devicetree/bindings/ufs/qcom,ufs.yaml
index 10c146424baa1e..cd3680dc002f96 100644
--- a/Documentation/devicetree/bindings/ufs/qcom,ufs.yaml
+++ b/Documentation/devicetree/bindings/ufs/qcom,ufs.yaml
@@ -27,10 +27,13 @@ properties:
- qcom,msm8996-ufshc
- qcom,msm8998-ufshc
- qcom,sa8775p-ufshc
+ - qcom,sc7180-ufshc
- qcom,sc7280-ufshc
+ - qcom,sc8180x-ufshc
- qcom,sc8280xp-ufshc
- qcom,sdm845-ufshc
- qcom,sm6115-ufshc
+ - qcom,sm6125-ufshc
- qcom,sm6350-ufshc
- qcom,sm8150-ufshc
- qcom,sm8250-ufshc
@@ -42,11 +45,11 @@ properties:
- const: jedec,ufs-2.0
clocks:
- minItems: 8
+ minItems: 7
maxItems: 11
clock-names:
- minItems: 8
+ minItems: 7
maxItems: 11
dma-coherent: true
@@ -117,9 +120,35 @@ allOf:
compatible:
contains:
enum:
+ - qcom,sc7180-ufshc
+ then:
+ properties:
+ clocks:
+ minItems: 7
+ maxItems: 7
+ clock-names:
+ items:
+ - const: core_clk
+ - const: bus_aggr_clk
+ - const: iface_clk
+ - const: core_clk_unipro
+ - const: ref_clk
+ - const: tx_lane0_sync_clk
+ - const: rx_lane0_sync_clk
+ reg:
+ maxItems: 1
+ reg-names:
+ maxItems: 1
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
- qcom,msm8998-ufshc
- qcom,sa8775p-ufshc
- qcom,sc7280-ufshc
+ - qcom,sc8180x-ufshc
- qcom,sc8280xp-ufshc
- qcom,sm8250-ufshc
- qcom,sm8350-ufshc
@@ -215,6 +244,7 @@ allOf:
contains:
enum:
- qcom,sm6115-ufshc
+ - qcom,sm6125-ufshc
then:
properties:
clocks:
@@ -248,7 +278,7 @@ allOf:
reg:
maxItems: 1
clocks:
- minItems: 8
+ minItems: 7
maxItems: 8
else:
properties:
@@ -256,7 +286,7 @@ allOf:
minItems: 1
maxItems: 2
clocks:
- minItems: 8
+ minItems: 7
maxItems: 11
unevaluatedProperties: false
diff --git a/Documentation/driver-api/virtio/writing_virtio_drivers.rst b/Documentation/driver-api/virtio/writing_virtio_drivers.rst
index e14c58796d2501..e5de6f5d061a7c 100644
--- a/Documentation/driver-api/virtio/writing_virtio_drivers.rst
+++ b/Documentation/driver-api/virtio/writing_virtio_drivers.rst
@@ -97,7 +97,6 @@ like this::
static struct virtio_driver virtio_dummy_driver = {
.driver.name = KBUILD_MODNAME,
- .driver.owner = THIS_MODULE,
.id_table = id_table,
.probe = virtio_dummy_probe,
.remove = virtio_dummy_remove,
diff --git a/Documentation/filesystems/bcachefs/index.rst b/Documentation/filesystems/bcachefs/index.rst
new file mode 100644
index 00000000000000..e2bd61ccd96fff
--- /dev/null
+++ b/Documentation/filesystems/bcachefs/index.rst
@@ -0,0 +1,11 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+======================
+bcachefs Documentation
+======================
+
+.. toctree::
+ :maxdepth: 2
+ :numbered:
+
+ errorcodes
diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst
index 0ea1e44fa02823..1f9b4c905a6a7c 100644
--- a/Documentation/filesystems/index.rst
+++ b/Documentation/filesystems/index.rst
@@ -69,6 +69,7 @@ Documentation for filesystem implementations.
afs
autofs
autofs-mount-control
+ bcachefs/index
befs
bfs
btrfs
diff --git a/Documentation/kbuild/llvm.rst b/Documentation/kbuild/llvm.rst
index b1d97fafddcfc9..bb5c44f8bd1c49 100644
--- a/Documentation/kbuild/llvm.rst
+++ b/Documentation/kbuild/llvm.rst
@@ -178,7 +178,7 @@ yet. Bug reports are always welcome at the issue tracker below!
- ``LLVM=1``
* - s390
- Maintained
- - ``CC=clang``
+ - ``LLVM=1`` (LLVM >= 18.1.0), ``CC=clang`` (LLVM < 18.1.0)
* - um (User Mode)
- Maintained
- ``LLVM=1``
diff --git a/Documentation/mm/page_owner.rst b/Documentation/mm/page_owner.rst
index 0d0334cd51798b..3a45a20fc05a1f 100644
--- a/Documentation/mm/page_owner.rst
+++ b/Documentation/mm/page_owner.rst
@@ -24,10 +24,10 @@ fragmentation statistics can be obtained through gfp flag information of
each page. It is already implemented and activated if page owner is
enabled. Other usages are more than welcome.
-It can also be used to show all the stacks and their outstanding
-allocations, which gives us a quick overview of where the memory is going
-without the need to screen through all the pages and match the allocation
-and free operation.
+It can also be used to show all the stacks and their current number of
+allocated base pages, which gives us a quick overview of where the memory
+is going without the need to screen through all the pages and match the
+allocation and free operation.
page owner is disabled by default. So, if you'd like to use it, you need
to add "page_owner=on" to your boot cmdline. If the kernel is built
@@ -75,42 +75,45 @@ Usage
cat /sys/kernel/debug/page_owner_stacks/show_stacks > stacks.txt
cat stacks.txt
- prep_new_page+0xa9/0x120
- get_page_from_freelist+0x7e6/0x2140
- __alloc_pages+0x18a/0x370
- new_slab+0xc8/0x580
- ___slab_alloc+0x1f2/0xaf0
- __slab_alloc.isra.86+0x22/0x40
- kmem_cache_alloc+0x31b/0x350
- __khugepaged_enter+0x39/0x100
- dup_mmap+0x1c7/0x5ce
- copy_process+0x1afe/0x1c90
- kernel_clone+0x9a/0x3c0
- __do_sys_clone+0x66/0x90
- do_syscall_64+0x7f/0x160
- entry_SYSCALL_64_after_hwframe+0x6c/0x74
- stack_count: 234
+ post_alloc_hook+0x177/0x1a0
+ get_page_from_freelist+0xd01/0xd80
+ __alloc_pages+0x39e/0x7e0
+ allocate_slab+0xbc/0x3f0
+ ___slab_alloc+0x528/0x8a0
+ kmem_cache_alloc+0x224/0x3b0
+ sk_prot_alloc+0x58/0x1a0
+ sk_alloc+0x32/0x4f0
+ inet_create+0x427/0xb50
+ __sock_create+0x2e4/0x650
+ inet_ctl_sock_create+0x30/0x180
+ igmp_net_init+0xc1/0x130
+ ops_init+0x167/0x410
+ setup_net+0x304/0xa60
+ copy_net_ns+0x29b/0x4a0
+ create_new_namespaces+0x4a1/0x820
+ nr_base_pages: 16
...
...
echo 7000 > /sys/kernel/debug/page_owner_stacks/count_threshold
cat /sys/kernel/debug/page_owner_stacks/show_stacks> stacks_7000.txt
cat stacks_7000.txt
- prep_new_page+0xa9/0x120
- get_page_from_freelist+0x7e6/0x2140
- __alloc_pages+0x18a/0x370
- alloc_pages_mpol+0xdf/0x1e0
- folio_alloc+0x14/0x50
- filemap_alloc_folio+0xb0/0x100
- page_cache_ra_unbounded+0x97/0x180
- filemap_fault+0x4b4/0x1200
- __do_fault+0x2d/0x110
- do_pte_missing+0x4b0/0xa30
- __handle_mm_fault+0x7fa/0xb70
- handle_mm_fault+0x125/0x300
- do_user_addr_fault+0x3c9/0x840
- exc_page_fault+0x68/0x150
- asm_exc_page_fault+0x22/0x30
- stack_count: 8248
+ post_alloc_hook+0x177/0x1a0
+ get_page_from_freelist+0xd01/0xd80
+ __alloc_pages+0x39e/0x7e0
+ alloc_pages_mpol+0x22e/0x490
+ folio_alloc+0xd5/0x110
+ filemap_alloc_folio+0x78/0x230
+ page_cache_ra_order+0x287/0x6f0
+ filemap_get_pages+0x517/0x1160
+ filemap_read+0x304/0x9f0
+ xfs_file_buffered_read+0xe6/0x1d0 [xfs]
+ xfs_file_read_iter+0x1f0/0x380 [xfs]
+ __kernel_read+0x3b9/0x730
+ kernel_read_file+0x309/0x4d0
+ __do_sys_finit_module+0x381/0x730
+ do_syscall_64+0x8d/0x150
+ entry_SYSCALL_64_after_hwframe+0x62/0x6a
+ nr_base_pages: 20824
...
cat /sys/kernel/debug/page_owner > page_owner_full.txt
diff --git a/Documentation/networking/devlink/devlink-eswitch-attr.rst b/Documentation/networking/devlink/devlink-eswitch-attr.rst
new file mode 100644
index 00000000000000..08bb39ab152864
--- /dev/null
+++ b/Documentation/networking/devlink/devlink-eswitch-attr.rst
@@ -0,0 +1,76 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==========================
+Devlink E-Switch Attribute
+==========================
+
+Devlink E-Switch supports two modes of operation: legacy and switchdev.
+Legacy mode operates based on traditional MAC/VLAN steering rules. Switching
+decisions are made based on MAC addresses, VLANs, etc. There is limited ability
+to offload switching rules to hardware.
+
+On the other hand, switchdev mode allows for more advanced offloading
+capabilities of the E-Switch to hardware. In switchdev mode, more switching
+rules and logic can be offloaded to the hardware switch ASIC. It enables
+representor netdevices that represent the slow path of virtual functions (VFs)
+or scalable-functions (SFs) of the device. See more information about
+:ref:`Documentation/networking/switchdev.rst <switchdev>` and
+:ref:`Documentation/networking/representors.rst <representors>`.
+
+In addition, the devlink E-Switch also comes with other attributes listed
+in the following section.
+
+Attributes Description
+======================
+
+The following is a list of E-Switch attributes.
+
+.. list-table:: E-Switch attributes
+ :widths: 8 5 45
+
+ * - Name
+ - Type
+ - Description
+ * - ``mode``
+ - enum
+ - The mode of the device. The mode can be one of the following:
+
+ * ``legacy`` operates based on traditional MAC/VLAN steering
+ rules.
+ * ``switchdev`` allows for more advanced offloading capabilities of
+ the E-Switch to hardware.
+ * - ``inline-mode``
+ - enum
+ - Some HWs need the VF driver to put part of the packet
+ headers on the TX descriptor so the e-switch can do proper
+ matching and steering. Support for both switchdev mode and legacy mode.
+
+ * ``none`` none.
+ * ``link`` L2 mode.
+ * ``network`` L3 mode.
+ * ``transport`` L4 mode.
+ * - ``encap-mode``
+ - enum
+ - The encapsulation mode of the device. Support for both switchdev mode
+ and legacy mode. The mode can be one of the following:
+
+ * ``none`` Disable encapsulation support.
+ * ``basic`` Enable encapsulation support.
+
+Example Usage
+=============
+
+.. code:: shell
+
+ # enable switchdev mode
+ $ devlink dev eswitch set pci/0000:08:00.0 mode switchdev
+
+ # set inline-mode and encap-mode
+ $ devlink dev eswitch set pci/0000:08:00.0 inline-mode none encap-mode basic
+
+ # display devlink device eswitch attributes
+ $ devlink dev eswitch show pci/0000:08:00.0
+ pci/0000:08:00.0: mode switchdev inline-mode none encap-mode basic
+
+ # enable encap-mode with legacy mode
+ $ devlink dev eswitch set pci/0000:08:00.0 mode legacy inline-mode none encap-mode basic
diff --git a/Documentation/networking/devlink/index.rst b/Documentation/networking/devlink/index.rst
index e14d7a701b72bc..948c8c44e233f6 100644
--- a/Documentation/networking/devlink/index.rst
+++ b/Documentation/networking/devlink/index.rst
@@ -67,6 +67,7 @@ general.
devlink-selftests
devlink-trap
devlink-linecard
+ devlink-eswitch-attr
Driver-specific documentation
-----------------------------
diff --git a/Documentation/networking/representors.rst b/Documentation/networking/representors.rst
index decb39c19b9ed2..5e23386f69687f 100644
--- a/Documentation/networking/representors.rst
+++ b/Documentation/networking/representors.rst
@@ -1,4 +1,5 @@
.. SPDX-License-Identifier: GPL-2.0
+.. _representors:
=============================
Network Function Representors
diff --git a/Documentation/virt/kvm/x86/amd-memory-encryption.rst b/Documentation/virt/kvm/x86/amd-memory-encryption.rst
index 995780088eb231..84335d119ff136 100644
--- a/Documentation/virt/kvm/x86/amd-memory-encryption.rst
+++ b/Documentation/virt/kvm/x86/amd-memory-encryption.rst
@@ -46,21 +46,16 @@ SEV hardware uses ASIDs to associate a memory encryption key with a VM.
Hence, the ASID for the SEV-enabled guests must be from 1 to a maximum value
defined in the CPUID 0x8000001f[ecx] field.
-SEV Key Management
-==================
+The KVM_MEMORY_ENCRYPT_OP ioctl
+===============================
-The SEV guest key management is handled by a separate processor called the AMD
-Secure Processor (AMD-SP). Firmware running inside the AMD-SP provides a secure
-key management interface to perform common hypervisor activities such as
-encrypting bootstrap code, snapshot, migrating and debugging the guest. For more
-information, see the SEV Key Management spec [api-spec]_
-
-The main ioctl to access SEV is KVM_MEMORY_ENCRYPT_OP. If the argument
-to KVM_MEMORY_ENCRYPT_OP is NULL, the ioctl returns 0 if SEV is enabled
-and ``ENOTTY`` if it is disabled (on some older versions of Linux,
-the ioctl runs normally even with a NULL argument, and therefore will
-likely return ``EFAULT``). If non-NULL, the argument to KVM_MEMORY_ENCRYPT_OP
-must be a struct kvm_sev_cmd::
+The main ioctl to access SEV is KVM_MEMORY_ENCRYPT_OP, which operates on
+the VM file descriptor. If the argument to KVM_MEMORY_ENCRYPT_OP is NULL,
+the ioctl returns 0 if SEV is enabled and ``ENOTTY`` if it is disabled
+(on some older versions of Linux, the ioctl tries to run normally even
+with a NULL argument, and therefore will likely return ``EFAULT`` instead
+of zero if SEV is enabled). If non-NULL, the argument to
+KVM_MEMORY_ENCRYPT_OP must be a struct kvm_sev_cmd::
struct kvm_sev_cmd {
__u32 id;
@@ -87,10 +82,6 @@ guests, such as launching, running, snapshotting, migrating and decommissioning.
The KVM_SEV_INIT command is used by the hypervisor to initialize the SEV platform
context. In a typical workflow, this command should be the first command issued.
-The firmware can be initialized either by using its own non-volatile storage or
-the OS can manage the NV storage for the firmware using the module parameter
-``init_ex_path``. If the file specified by ``init_ex_path`` does not exist or
-is invalid, the OS will create or override the file with output from PSP.
Returns: 0 on success, -negative on error
@@ -434,6 +425,21 @@ issued by the hypervisor to make the guest ready for execution.
Returns: 0 on success, -negative on error
+Firmware Management
+===================
+
+The SEV guest key management is handled by a separate processor called the AMD
+Secure Processor (AMD-SP). Firmware running inside the AMD-SP provides a secure
+key management interface to perform common hypervisor activities such as
+encrypting bootstrap code, snapshot, migrating and debugging the guest. For more
+information, see the SEV Key Management spec [api-spec]_
+
+The AMD-SP firmware can be initialized either by using its own non-volatile
+storage or the OS can manage the NV storage for the firmware using
+parameter ``init_ex_path`` of the ``ccp`` module. If the file specified
+by ``init_ex_path`` does not exist or is invalid, the OS will create or
+override the file with PSP non-volatile storage.
+
References
==========
diff --git a/Documentation/virt/kvm/x86/msr.rst b/Documentation/virt/kvm/x86/msr.rst
index 9315fc385fb0be..3aecf2a70e7b43 100644
--- a/Documentation/virt/kvm/x86/msr.rst
+++ b/Documentation/virt/kvm/x86/msr.rst
@@ -193,8 +193,8 @@ data:
Asynchronous page fault (APF) control MSR.
Bits 63-6 hold 64-byte aligned physical address of a 64 byte memory area
- which must be in guest RAM and must be zeroed. This memory is expected
- to hold a copy of the following structure::
+ which must be in guest RAM. This memory is expected to hold the
+ following structure::
struct kvm_vcpu_pv_apf_data {
/* Used for 'page not present' events delivered via #PF */
@@ -204,7 +204,6 @@ data:
__u32 token;
__u8 pad[56];
- __u32 enabled;
};
Bits 5-4 of the MSR are reserved and should be zero. Bit 0 is set to 1
@@ -232,14 +231,14 @@ data:
as regular page fault, guest must reset 'flags' to '0' before it does
something that can generate normal page fault.
- Bytes 5-7 of 64 byte memory location ('token') will be written to by the
+ Bytes 4-7 of 64 byte memory location ('token') will be written to by the
hypervisor at the time of APF 'page ready' event injection. The content
- of these bytes is a token which was previously delivered as 'page not
- present' event. The event indicates the page in now available. Guest is
- supposed to write '0' to 'token' when it is done handling 'page ready'
- event and to write 1' to MSR_KVM_ASYNC_PF_ACK after clearing the location;
- writing to the MSR forces KVM to re-scan its queue and deliver the next
- pending notification.
+ of these bytes is a token which was previously delivered in CR2 as
+ 'page not present' event. The event indicates the page is now available.
+ Guest is supposed to write '0' to 'token' when it is done handling
+ 'page ready' event and to write '1' to MSR_KVM_ASYNC_PF_ACK after
+ clearing the location; writing to the MSR forces KVM to re-scan its
+ queue and deliver the next pending notification.
Note, MSR_KVM_ASYNC_PF_INT MSR specifying the interrupt vector for 'page
ready' APF delivery needs to be written to before enabling APF mechanism
diff --git a/MAINTAINERS b/MAINTAINERS
index 96496dbcae6d84..ee9cc2b40409ea 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2191,7 +2191,6 @@ N: mxs
ARM/FREESCALE LAYERSCAPE ARM ARCHITECTURE
M: Shawn Guo <shawnguo@kernel.org>
-M: Li Yang <leoyang.li@nxp.com>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/shawnguo/linux.git
@@ -2708,7 +2707,7 @@ F: sound/soc/rockchip/
N: rockchip
ARM/SAMSUNG S3C, S5P AND EXYNOS ARM ARCHITECTURES
-M: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M: Krzysztof Kozlowski <krzk@kernel.org>
R: Alim Akhtar <alim.akhtar@samsung.com>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
L: linux-samsung-soc@vger.kernel.org
@@ -3573,6 +3572,7 @@ S: Supported
C: irc://irc.oftc.net/bcache
T: git https://evilpiepirate.org/git/bcachefs.git
F: fs/bcachefs/
+F: Documentation/filesystems/bcachefs/
BDISP ST MEDIA DRIVER
M: Fabien Dessenne <fabien.dessenne@foss.st.com>
@@ -3942,8 +3942,7 @@ F: kernel/bpf/ringbuf.c
BPF [SECURITY & LSM] (Security Audit and Enforcement using BPF)
M: KP Singh <kpsingh@kernel.org>
-R: Florent Revest <revest@chromium.org>
-R: Brendan Jackman <jackmanb@chromium.org>
+R: Matt Bobrowski <mattbobrowski@google.com>
L: bpf@vger.kernel.org
S: Maintained
F: Documentation/bpf/prog_lsm.rst
@@ -3968,7 +3967,7 @@ F: kernel/bpf/bpf_lru*
F: kernel/bpf/cgroup.c
BPF [TOOLING] (bpftool)
-M: Quentin Monnet <quentin@isovalent.com>
+M: Quentin Monnet <qmo@kernel.org>
L: bpf@vger.kernel.org
S: Maintained
F: kernel/bpf/disasm.*
@@ -4870,7 +4869,6 @@ F: drivers/power/supply/cw2015_battery.c
CEPH COMMON CODE (LIBCEPH)
M: Ilya Dryomov <idryomov@gmail.com>
M: Xiubo Li <xiubli@redhat.com>
-R: Jeff Layton <jlayton@kernel.org>
L: ceph-devel@vger.kernel.org
S: Supported
W: http://ceph.com/
@@ -4882,7 +4880,6 @@ F: net/ceph/
CEPH DISTRIBUTED FILE SYSTEM CLIENT (CEPH)
M: Xiubo Li <xiubli@redhat.com>
M: Ilya Dryomov <idryomov@gmail.com>
-R: Jeff Layton <jlayton@kernel.org>
L: ceph-devel@vger.kernel.org
S: Supported
W: http://ceph.com/
@@ -5558,7 +5555,7 @@ F: drivers/cpuidle/cpuidle-big_little.c
CPUIDLE DRIVER - ARM EXYNOS
M: Daniel Lezcano <daniel.lezcano@linaro.org>
M: Kukjin Kim <kgene@kernel.org>
-R: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+R: Krzysztof Kozlowski <krzk@kernel.org>
L: linux-pm@vger.kernel.org
L: linux-samsung-soc@vger.kernel.org
S: Maintained
@@ -6157,7 +6154,6 @@ DEVICE-MAPPER (LVM)
M: Alasdair Kergon <agk@redhat.com>
M: Mike Snitzer <snitzer@kernel.org>
M: Mikulas Patocka <mpatocka@redhat.com>
-M: dm-devel@lists.linux.dev
L: dm-devel@lists.linux.dev
S: Maintained
Q: http://patchwork.kernel.org/project/dm-devel/list/
@@ -8525,7 +8521,6 @@ S: Maintained
F: drivers/video/fbdev/fsl-diu-fb.*
FREESCALE DMA DRIVER
-M: Li Yang <leoyang.li@nxp.com>
M: Zhang Wei <zw@zh-kernel.org>
L: linuxppc-dev@lists.ozlabs.org
S: Maintained
@@ -8690,10 +8685,9 @@ F: drivers/soc/fsl/qe/tsa.h
F: include/dt-bindings/soc/cpm1-fsl,tsa.h
FREESCALE QUICC ENGINE UCC ETHERNET DRIVER
-M: Li Yang <leoyang.li@nxp.com>
L: netdev@vger.kernel.org
L: linuxppc-dev@lists.ozlabs.org
-S: Maintained
+S: Orphan
F: drivers/net/ethernet/freescale/ucc_geth*
FREESCALE QUICC ENGINE UCC HDLC DRIVER
@@ -8710,10 +8704,9 @@ S: Maintained
F: drivers/tty/serial/ucc_uart.c
FREESCALE SOC DRIVERS
-M: Li Yang <leoyang.li@nxp.com>
L: linuxppc-dev@lists.ozlabs.org
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
-S: Maintained
+S: Orphan
F: Documentation/devicetree/bindings/misc/fsl,dpaa2-console.yaml
F: Documentation/devicetree/bindings/soc/fsl/
F: drivers/soc/fsl/
@@ -8747,10 +8740,9 @@ F: Documentation/devicetree/bindings/sound/fsl,qmc-audio.yaml
F: sound/soc/fsl/fsl_qmc_audio.c
FREESCALE USB PERIPHERAL DRIVERS
-M: Li Yang <leoyang.li@nxp.com>
L: linux-usb@vger.kernel.org
L: linuxppc-dev@lists.ozlabs.org
-S: Maintained
+S: Orphan
F: drivers/usb/gadget/udc/fsl*
FREESCALE USB PHY DRIVER
@@ -9002,7 +8994,7 @@ F: drivers/i2c/muxes/i2c-mux-gpio.c
F: include/linux/platform_data/i2c-mux-gpio.h
GENERIC GPIO RESET DRIVER
-M: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M: Krzysztof Kozlowski <krzk@kernel.org>
S: Maintained
F: drivers/reset/reset-gpio.c
@@ -9653,7 +9645,9 @@ L: linux-input@vger.kernel.org
S: Maintained
F: drivers/hid/hid-logitech-hidpp.c
-HIGH-RESOLUTION TIMERS, CLOCKEVENTS
+HIGH-RESOLUTION TIMERS, TIMER WHEEL, CLOCKEVENTS
+M: Anna-Maria Behnsen <anna-maria@linutronix.de>
+M: Frederic Weisbecker <frederic@kernel.org>
M: Thomas Gleixner <tglx@linutronix.de>
L: linux-kernel@vger.kernel.org
S: Maintained
@@ -9661,9 +9655,13 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers/core
F: Documentation/timers/
F: include/linux/clockchips.h
F: include/linux/hrtimer.h
+F: include/linux/timer.h
F: kernel/time/clockevents.c
F: kernel/time/hrtimer.c
-F: kernel/time/timer_*.c
+F: kernel/time/timer.c
+F: kernel/time/timer_list.c
+F: kernel/time/timer_migration.*
+F: tools/testing/selftests/timers/
HIGH-SPEED SCC DRIVER FOR AX.25
L: linux-hams@vger.kernel.org
@@ -10026,7 +10024,7 @@ F: drivers/media/platform/st/sti/hva
HWPOISON MEMORY FAILURE HANDLING
M: Miaohe Lin <linmiaohe@huawei.com>
-R: Naoya Horiguchi <naoya.horiguchi@nec.com>
+R: Naoya Horiguchi <nao.horiguchi@gmail.com>
L: linux-mm@kvack.org
S: Maintained
F: mm/hwpoison-inject.c
@@ -13134,6 +13132,7 @@ F: drivers/net/ethernet/marvell/mvpp2/
MARVELL MWIFIEX WIRELESS DRIVER
M: Brian Norris <briannorris@chromium.org>
+R: Francesco Dolcini <francesco@dolcini.it>
L: linux-wireless@vger.kernel.org
S: Odd Fixes
F: drivers/net/wireless/marvell/mwifiex/
@@ -13290,7 +13289,7 @@ F: drivers/iio/adc/max11205.c
MAXIM MAX17040 FAMILY FUEL GAUGE DRIVERS
R: Iskren Chernev <iskren.chernev@gmail.com>
-R: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+R: Krzysztof Kozlowski <krzk@kernel.org>
R: Marek Szyprowski <m.szyprowski@samsung.com>
R: Matheus Castello <matheus@castello.eng.br>
L: linux-pm@vger.kernel.org
@@ -13300,7 +13299,7 @@ F: drivers/power/supply/max17040_battery.c
MAXIM MAX17042 FAMILY FUEL GAUGE DRIVERS
R: Hans de Goede <hdegoede@redhat.com>
-R: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+R: Krzysztof Kozlowski <krzk@kernel.org>
R: Marek Szyprowski <m.szyprowski@samsung.com>
R: Sebastian Krzyszkowiak <sebastian.krzyszkowiak@puri.sm>
R: Purism Kernel Team <kernel@puri.sm>
@@ -13358,7 +13357,7 @@ F: Documentation/devicetree/bindings/power/supply/maxim,max77976.yaml
F: drivers/power/supply/max77976_charger.c
MAXIM MUIC CHARGER DRIVERS FOR EXYNOS BASED BOARDS
-M: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M: Krzysztof Kozlowski <krzk@kernel.org>
L: linux-pm@vger.kernel.org
S: Maintained
B: mailto:linux-samsung-soc@vger.kernel.org
@@ -13369,7 +13368,7 @@ F: drivers/power/supply/max77693_charger.c
MAXIM PMIC AND MUIC DRIVERS FOR EXYNOS BASED BOARDS
M: Chanwoo Choi <cw00.choi@samsung.com>
-M: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M: Krzysztof Kozlowski <krzk@kernel.org>
L: linux-kernel@vger.kernel.org
S: Maintained
B: mailto:linux-samsung-soc@vger.kernel.org
@@ -14014,6 +14013,7 @@ F: drivers/net/ethernet/mellanox/mlx4/en_*
MELLANOX ETHERNET DRIVER (mlx5e)
M: Saeed Mahameed <saeedm@nvidia.com>
+M: Tariq Toukan <tariqt@nvidia.com>
L: netdev@vger.kernel.org
S: Supported
W: http://www.mellanox.com
@@ -14081,6 +14081,7 @@ F: include/uapi/rdma/mlx4-abi.h
MELLANOX MLX5 core VPI driver
M: Saeed Mahameed <saeedm@nvidia.com>
M: Leon Romanovsky <leonro@nvidia.com>
+M: Tariq Toukan <tariqt@nvidia.com>
L: netdev@vger.kernel.org
L: linux-rdma@vger.kernel.org
S: Supported
@@ -14151,7 +14152,7 @@ F: mm/mm_init.c
F: tools/testing/memblock/
MEMORY CONTROLLER DRIVERS
-M: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M: Krzysztof Kozlowski <krzk@kernel.org>
L: linux-kernel@vger.kernel.org
S: Maintained
B: mailto:krzysztof.kozlowski@linaro.org
@@ -15532,7 +15533,7 @@ F: include/uapi/linux/nexthop.h
F: net/ipv4/nexthop.c
NFC SUBSYSTEM
-M: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M: Krzysztof Kozlowski <krzk@kernel.org>
L: netdev@vger.kernel.org
S: Maintained
F: Documentation/devicetree/bindings/net/nfc/
@@ -15627,9 +15628,10 @@ F: drivers/misc/nsm.c
F: include/uapi/linux/nsm.h
NOHZ, DYNTICKS SUPPORT
+M: Anna-Maria Behnsen <anna-maria@linutronix.de>
M: Frederic Weisbecker <frederic@kernel.org>
-M: Thomas Gleixner <tglx@linutronix.de>
M: Ingo Molnar <mingo@kernel.org>
+M: Thomas Gleixner <tglx@linutronix.de>
L: linux-kernel@vger.kernel.org
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers/nohz
@@ -15908,7 +15910,7 @@ F: Documentation/devicetree/bindings/regulator/nxp,pf8x00-regulator.yaml
F: drivers/regulator/pf8x00-regulator.c
NXP PTN5150A CC LOGIC AND EXTCON DRIVER
-M: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M: Krzysztof Kozlowski <krzk@kernel.org>
L: linux-kernel@vger.kernel.org
S: Maintained
F: Documentation/devicetree/bindings/extcon/extcon-ptn5150.yaml
@@ -16519,7 +16521,7 @@ K: of_overlay_remove
OPEN FIRMWARE AND FLATTENED DEVICE TREE BINDINGS
M: Rob Herring <robh@kernel.org>
-M: Krzysztof Kozlowski <krzysztof.kozlowski+dt@linaro.org>
+M: Krzysztof Kozlowski <krzk+dt@kernel.org>
M: Conor Dooley <conor+dt@kernel.org>
L: devicetree@vger.kernel.org
S: Maintained
@@ -16725,9 +16727,9 @@ F: include/uapi/linux/ppdev.h
PARAVIRT_OPS INTERFACE
M: Juergen Gross <jgross@suse.com>
-R: Ajay Kaher <akaher@vmware.com>
-R: Alexey Makhalov <amakhalov@vmware.com>
-R: VMware PV-Drivers Reviewers <pv-drivers@vmware.com>
+R: Ajay Kaher <ajay.kaher@broadcom.com>
+R: Alexey Makhalov <alexey.amakhalov@broadcom.com>
+R: Broadcom internal kernel review list <bcm-kernel-feedback-list@broadcom.com>
L: virtualization@lists.linux.dev
L: x86@kernel.org
S: Supported
@@ -16966,7 +16968,6 @@ F: drivers/pci/controller/dwc/pci-exynos.c
PCI DRIVER FOR SYNOPSYS DESIGNWARE
M: Jingoo Han <jingoohan1@gmail.com>
-M: Gustavo Pimentel <gustavo.pimentel@synopsys.com>
M: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
L: linux-pci@vger.kernel.org
S: Maintained
@@ -17477,7 +17478,7 @@ F: Documentation/devicetree/bindings/pinctrl/renesas,*
F: drivers/pinctrl/renesas/
PIN CONTROLLER - SAMSUNG
-M: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M: Krzysztof Kozlowski <krzk@kernel.org>
M: Sylwester Nawrocki <s.nawrocki@samsung.com>
R: Alim Akhtar <alim.akhtar@samsung.com>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
@@ -17590,15 +17591,20 @@ F: drivers/pnp/
F: include/linux/pnp.h
POSIX CLOCKS and TIMERS
+M: Anna-Maria Behnsen <anna-maria@linutronix.de>
+M: Frederic Weisbecker <frederic@kernel.org>
M: Thomas Gleixner <tglx@linutronix.de>
L: linux-kernel@vger.kernel.org
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers/core
F: fs/timerfd.c
F: include/linux/time_namespace.h
-F: include/linux/timer*
+F: include/linux/timerfd.h
+F: include/uapi/linux/time.h
+F: include/uapi/linux/timerfd.h
F: include/trace/events/timer*
-F: kernel/time/*timer*
+F: kernel/time/itimer.c
+F: kernel/time/posix-*
F: kernel/time/namespace.c
POWER MANAGEMENT CORE
@@ -18645,18 +18651,21 @@ REALTEK WIRELESS DRIVER (rtlwifi family)
M: Ping-Ke Shih <pkshih@realtek.com>
L: linux-wireless@vger.kernel.org
S: Maintained
+T: git https://github.com/pkshih/rtw.git
F: drivers/net/wireless/realtek/rtlwifi/
REALTEK WIRELESS DRIVER (rtw88)
M: Ping-Ke Shih <pkshih@realtek.com>
L: linux-wireless@vger.kernel.org
S: Maintained
+T: git https://github.com/pkshih/rtw.git
F: drivers/net/wireless/realtek/rtw88/
REALTEK WIRELESS DRIVER (rtw89)
M: Ping-Ke Shih <pkshih@realtek.com>
L: linux-wireless@vger.kernel.org
S: Maintained
+T: git https://github.com/pkshih/rtw.git
F: drivers/net/wireless/realtek/rtw89/
REDPINE WIRELESS DRIVER
@@ -18727,13 +18736,24 @@ S: Supported
F: Documentation/devicetree/bindings/i2c/renesas,iic-emev2.yaml
F: drivers/i2c/busses/i2c-emev2.c
-RENESAS ETHERNET DRIVERS
+RENESAS ETHERNET AVB DRIVER
R: Sergey Shtylyov <s.shtylyov@omp.ru>
L: netdev@vger.kernel.org
L: linux-renesas-soc@vger.kernel.org
-F: Documentation/devicetree/bindings/net/renesas,*.yaml
-F: drivers/net/ethernet/renesas/
-F: include/linux/sh_eth.h
+F: Documentation/devicetree/bindings/net/renesas,etheravb.yaml
+F: drivers/net/ethernet/renesas/Kconfig
+F: drivers/net/ethernet/renesas/Makefile
+F: drivers/net/ethernet/renesas/ravb*
+
+RENESAS ETHERNET SWITCH DRIVER
+R: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
+L: netdev@vger.kernel.org
+L: linux-renesas-soc@vger.kernel.org
+F: Documentation/devicetree/bindings/net/renesas,*ether-switch.yaml
+F: drivers/net/ethernet/renesas/Kconfig
+F: drivers/net/ethernet/renesas/Makefile
+F: drivers/net/ethernet/renesas/rcar_gen4*
+F: drivers/net/ethernet/renesas/rswitch*
RENESAS IDT821034 ASoC CODEC
M: Herve Codina <herve.codina@bootlin.com>
@@ -18843,6 +18863,16 @@ S: Supported
F: Documentation/devicetree/bindings/i2c/renesas,rzv2m.yaml
F: drivers/i2c/busses/i2c-rzv2m.c
+RENESAS SUPERH ETHERNET DRIVER
+R: Sergey Shtylyov <s.shtylyov@omp.ru>
+L: netdev@vger.kernel.org
+L: linux-renesas-soc@vger.kernel.org
+F: Documentation/devicetree/bindings/net/renesas,ether.yaml
+F: drivers/net/ethernet/renesas/Kconfig
+F: drivers/net/ethernet/renesas/Makefile
+F: drivers/net/ethernet/renesas/sh_eth*
+F: include/linux/sh_eth.h
+
RENESAS USB PHY DRIVER
M: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
L: linux-renesas-soc@vger.kernel.org
@@ -19179,12 +19209,14 @@ M: Hin-Tak Leung <hintak.leung@gmail.com>
M: Larry Finger <Larry.Finger@lwfinger.net>
L: linux-wireless@vger.kernel.org
S: Maintained
+T: git https://github.com/pkshih/rtw.git
F: drivers/net/wireless/realtek/rtl818x/rtl8187/
RTL8XXXU WIRELESS DRIVER (rtl8xxxu)
M: Jes Sorensen <Jes.Sorensen@gmail.com>
L: linux-wireless@vger.kernel.org
S: Maintained
+T: git https://github.com/pkshih/rtw.git
F: drivers/net/wireless/realtek/rtl8xxxu/
RTRS TRANSPORT DRIVERS
@@ -19414,7 +19446,7 @@ F: Documentation/devicetree/bindings/sound/samsung*
F: sound/soc/samsung/
SAMSUNG EXYNOS PSEUDO RANDOM NUMBER GENERATOR (RNG) DRIVER
-M: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M: Krzysztof Kozlowski <krzk@kernel.org>
L: linux-crypto@vger.kernel.org
L: linux-samsung-soc@vger.kernel.org
S: Maintained
@@ -19449,7 +19481,7 @@ S: Maintained
F: drivers/platform/x86/samsung-laptop.c
SAMSUNG MULTIFUNCTION PMIC DEVICE DRIVERS
-M: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M: Krzysztof Kozlowski <krzk@kernel.org>
L: linux-kernel@vger.kernel.org
L: linux-samsung-soc@vger.kernel.org
S: Maintained
@@ -19475,7 +19507,7 @@ F: drivers/media/platform/samsung/s3c-camif/
F: include/media/drv-intf/s3c_camif.h
SAMSUNG S3FWRN5 NFC DRIVER
-M: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M: Krzysztof Kozlowski <krzk@kernel.org>
S: Maintained
F: Documentation/devicetree/bindings/net/nfc/samsung,s3fwrn5.yaml
F: drivers/nfc/s3fwrn5
@@ -19496,7 +19528,7 @@ S: Supported
F: drivers/media/i2c/s5k5baf.c
SAMSUNG S5P Security SubSystem (SSS) DRIVER
-M: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M: Krzysztof Kozlowski <krzk@kernel.org>
M: Vladimir Zapolskiy <vz@mleia.com>
L: linux-crypto@vger.kernel.org
L: linux-samsung-soc@vger.kernel.org
@@ -19518,7 +19550,7 @@ F: Documentation/devicetree/bindings/media/samsung,fimc.yaml
F: drivers/media/platform/samsung/exynos4-is/
SAMSUNG SOC CLOCK DRIVERS
-M: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M: Krzysztof Kozlowski <krzk@kernel.org>
M: Sylwester Nawrocki <s.nawrocki@samsung.com>
M: Chanwoo Choi <cw00.choi@samsung.com>
R: Alim Akhtar <alim.akhtar@samsung.com>
@@ -19550,7 +19582,7 @@ F: drivers/net/ethernet/samsung/sxgbe/
SAMSUNG THERMAL DRIVER
M: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
-M: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M: Krzysztof Kozlowski <krzk@kernel.org>
L: linux-pm@vger.kernel.org
L: linux-samsung-soc@vger.kernel.org
S: Maintained
@@ -22254,13 +22286,20 @@ S: Supported
T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers/core
F: include/linux/clocksource.h
F: include/linux/time.h
+F: include/linux/timekeeper_internal.h
+F: include/linux/timekeeping.h
F: include/linux/timex.h
F: include/uapi/linux/time.h
F: include/uapi/linux/timex.h
F: kernel/time/alarmtimer.c
-F: kernel/time/clocksource.c
-F: kernel/time/ntp.c
-F: kernel/time/time*.c
+F: kernel/time/clocksource*
+F: kernel/time/ntp*
+F: kernel/time/time.c
+F: kernel/time/timeconst.bc
+F: kernel/time/timeconv.c
+F: kernel/time/timecounter.c
+F: kernel/time/timekeeping*
+F: kernel/time/time_test.c
F: tools/testing/selftests/timers/
TIPC NETWORK LAYER
@@ -22384,6 +22423,7 @@ S: Maintained
W: https://kernsec.org/wiki/index.php/Linux_Kernel_Integrity
Q: https://patchwork.kernel.org/project/linux-integrity/list/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/jarkko/linux-tpmdd.git
+F: Documentation/devicetree/bindings/tpm/
F: drivers/char/tpm/
TPS546D24 DRIVER
@@ -22530,6 +22570,7 @@ Q: https://patchwork.kernel.org/project/linux-pm/list/
B: https://bugzilla.kernel.org
T: git git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux.git turbostat
F: tools/power/x86/turbostat/
+F: tools/testing/selftests/turbostat/
TW5864 VIDEO4LINUX DRIVER
M: Bluecherry Maintainers <maintainers@bluecherrydvr.com>
@@ -23608,9 +23649,9 @@ S: Supported
F: drivers/misc/vmw_balloon.c
VMWARE HYPERVISOR INTERFACE
-M: Ajay Kaher <akaher@vmware.com>
-M: Alexey Makhalov <amakhalov@vmware.com>
-R: VMware PV-Drivers Reviewers <pv-drivers@vmware.com>
+M: Ajay Kaher <ajay.kaher@broadcom.com>
+M: Alexey Makhalov <alexey.amakhalov@broadcom.com>
+R: Broadcom internal kernel review list <bcm-kernel-feedback-list@broadcom.com>
L: virtualization@lists.linux.dev
L: x86@kernel.org
S: Supported
@@ -23619,34 +23660,34 @@ F: arch/x86/include/asm/vmware.h
F: arch/x86/kernel/cpu/vmware.c
VMWARE PVRDMA DRIVER
-M: Bryan Tan <bryantan@vmware.com>
-M: Vishnu Dasa <vdasa@vmware.com>
-R: VMware PV-Drivers Reviewers <pv-drivers@vmware.com>
+M: Bryan Tan <bryan-bt.tan@broadcom.com>
+M: Vishnu Dasa <vishnu.dasa@broadcom.com>
+R: Broadcom internal kernel review list <bcm-kernel-feedback-list@broadcom.com>
L: linux-rdma@vger.kernel.org
S: Supported
F: drivers/infiniband/hw/vmw_pvrdma/
VMWARE PVSCSI DRIVER
-M: Vishal Bhakta <vbhakta@vmware.com>
-R: VMware PV-Drivers Reviewers <pv-drivers@vmware.com>
+M: Vishal Bhakta <vishal.bhakta@broadcom.com>
+R: Broadcom internal kernel review list <bcm-kernel-feedback-list@broadcom.com>
L: linux-scsi@vger.kernel.org
S: Supported
F: drivers/scsi/vmw_pvscsi.c
F: drivers/scsi/vmw_pvscsi.h
VMWARE VIRTUAL PTP CLOCK DRIVER
-M: Jeff Sipek <jsipek@vmware.com>
-R: Ajay Kaher <akaher@vmware.com>
-R: Alexey Makhalov <amakhalov@vmware.com>
-R: VMware PV-Drivers Reviewers <pv-drivers@vmware.com>
+M: Nick Shi <nick.shi@broadcom.com>
+R: Ajay Kaher <ajay.kaher@broadcom.com>
+R: Alexey Makhalov <alexey.amakhalov@broadcom.com>
+R: Broadcom internal kernel review list <bcm-kernel-feedback-list@broadcom.com>
L: netdev@vger.kernel.org
S: Supported
F: drivers/ptp/ptp_vmw.c
VMWARE VMCI DRIVER
-M: Bryan Tan <bryantan@vmware.com>
-M: Vishnu Dasa <vdasa@vmware.com>
-R: VMware PV-Drivers Reviewers <pv-drivers@vmware.com>
+M: Bryan Tan <bryan-bt.tan@broadcom.com>
+M: Vishnu Dasa <vishnu.dasa@broadcom.com>
+R: Broadcom internal kernel review list <bcm-kernel-feedback-list@broadcom.com>
L: linux-kernel@vger.kernel.org
S: Supported
F: drivers/misc/vmw_vmci/
@@ -23661,16 +23702,16 @@ F: drivers/input/mouse/vmmouse.c
F: drivers/input/mouse/vmmouse.h
VMWARE VMXNET3 ETHERNET DRIVER
-M: Ronak Doshi <doshir@vmware.com>
-R: VMware PV-Drivers Reviewers <pv-drivers@vmware.com>
+M: Ronak Doshi <ronak.doshi@broadcom.com>
+R: Broadcom internal kernel review list <bcm-kernel-feedback-list@broadcom.com>
L: netdev@vger.kernel.org
S: Supported
F: drivers/net/vmxnet3/
VMWARE VSOCK VMCI TRANSPORT DRIVER
-M: Bryan Tan <bryantan@vmware.com>
-M: Vishnu Dasa <vdasa@vmware.com>
-R: VMware PV-Drivers Reviewers <pv-drivers@vmware.com>
+M: Bryan Tan <bryan-bt.tan@broadcom.com>
+M: Vishnu Dasa <vishnu.dasa@broadcom.com>
+R: Broadcom internal kernel review list <bcm-kernel-feedback-list@broadcom.com>
L: linux-kernel@vger.kernel.org
S: Supported
F: net/vmw_vsock/vmci_transport*
@@ -23738,7 +23779,7 @@ S: Orphan
F: drivers/mmc/host/vub300.c
W1 DALLAS'S 1-WIRE BUS
-M: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M: Krzysztof Kozlowski <krzk@kernel.org>
S: Maintained
F: Documentation/devicetree/bindings/w1/
F: Documentation/w1/
diff --git a/Makefile b/Makefile
index 763b6792d3d513..59d8a7f95d0a86 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
VERSION = 6
PATCHLEVEL = 9
SUBLEVEL = 0
-EXTRAVERSION = -rc1
+EXTRAVERSION = -rc4
NAME = Hurr durr I'ma ninja sloth
# *DOCUMENTATION*
diff --git a/arch/Kconfig b/arch/Kconfig
index 9f066785bb71d9..65afb1de48b36e 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -1172,12 +1172,12 @@ config PAGE_SIZE_LESS_THAN_256KB
config PAGE_SHIFT
int
- default 12 if PAGE_SIZE_4KB
- default 13 if PAGE_SIZE_8KB
- default 14 if PAGE_SIZE_16KB
- default 15 if PAGE_SIZE_32KB
- default 16 if PAGE_SIZE_64KB
- default 18 if PAGE_SIZE_256KB
+ default 12 if PAGE_SIZE_4KB
+ default 13 if PAGE_SIZE_8KB
+ default 14 if PAGE_SIZE_16KB
+ default 15 if PAGE_SIZE_32KB
+ default 16 if PAGE_SIZE_64KB
+ default 18 if PAGE_SIZE_256KB
# This allows to use a set of generic functions to determine mmap base
# address by giving priority to top-down scheme only if the process
diff --git a/arch/arm/boot/dts/nxp/imx/imx7-mba7.dtsi b/arch/arm/boot/dts/nxp/imx/imx7-mba7.dtsi
index 1235a71c6abe96..52869e68f833c4 100644
--- a/arch/arm/boot/dts/nxp/imx/imx7-mba7.dtsi
+++ b/arch/arm/boot/dts/nxp/imx/imx7-mba7.dtsi
@@ -666,7 +666,7 @@
bus-width = <4>;
no-1-8-v;
no-sdio;
- no-emmc;
+ no-mmc;
status = "okay";
};
diff --git a/arch/arm/boot/dts/nxp/imx/imx7s-warp.dts b/arch/arm/boot/dts/nxp/imx/imx7s-warp.dts
index ba7231b364bb8c..7bab113ca6da79 100644
--- a/arch/arm/boot/dts/nxp/imx/imx7s-warp.dts
+++ b/arch/arm/boot/dts/nxp/imx/imx7s-warp.dts
@@ -210,6 +210,7 @@
remote-endpoint = <&mipi_from_sensor>;
clock-lanes = <0>;
data-lanes = <1>;
+ link-frequencies = /bits/ 64 <330000000>;
};
};
};
diff --git a/arch/arm/mach-omap2/board-n8x0.c b/arch/arm/mach-omap2/board-n8x0.c
index 31755a378c7364..ff2a4a4d822047 100644
--- a/arch/arm/mach-omap2/board-n8x0.c
+++ b/arch/arm/mach-omap2/board-n8x0.c
@@ -79,10 +79,8 @@ static struct musb_hdrc_platform_data tusb_data = {
static struct gpiod_lookup_table tusb_gpio_table = {
.dev_id = "musb-tusb",
.table = {
- GPIO_LOOKUP("gpio-0-15", 0, "enable",
- GPIO_ACTIVE_HIGH),
- GPIO_LOOKUP("gpio-48-63", 10, "int",
- GPIO_ACTIVE_HIGH),
+ GPIO_LOOKUP("gpio-0-31", 0, "enable", GPIO_ACTIVE_HIGH),
+ GPIO_LOOKUP("gpio-32-63", 26, "int", GPIO_ACTIVE_HIGH),
{ }
},
};
@@ -140,12 +138,11 @@ static int slot1_cover_open;
static int slot2_cover_open;
static struct device *mmc_device;
-static struct gpiod_lookup_table nokia8xx_mmc_gpio_table = {
+static struct gpiod_lookup_table nokia800_mmc_gpio_table = {
.dev_id = "mmci-omap.0",
.table = {
/* Slot switch, GPIO 96 */
- GPIO_LOOKUP("gpio-80-111", 16,
- "switch", GPIO_ACTIVE_HIGH),
+ GPIO_LOOKUP("gpio-96-127", 0, "switch", GPIO_ACTIVE_HIGH),
{ }
},
};
@@ -153,12 +150,12 @@ static struct gpiod_lookup_table nokia8xx_mmc_gpio_table = {
static struct gpiod_lookup_table nokia810_mmc_gpio_table = {
.dev_id = "mmci-omap.0",
.table = {
+ /* Slot switch, GPIO 96 */
+ GPIO_LOOKUP("gpio-96-127", 0, "switch", GPIO_ACTIVE_HIGH),
/* Slot index 1, VSD power, GPIO 23 */
- GPIO_LOOKUP_IDX("gpio-16-31", 7,
- "vsd", 1, GPIO_ACTIVE_HIGH),
+ GPIO_LOOKUP_IDX("gpio-0-31", 23, "vsd", 1, GPIO_ACTIVE_HIGH),
/* Slot index 1, VIO power, GPIO 9 */
- GPIO_LOOKUP_IDX("gpio-0-15", 9,
- "vio", 1, GPIO_ACTIVE_HIGH),
+ GPIO_LOOKUP_IDX("gpio-0-31", 9, "vio", 1, GPIO_ACTIVE_HIGH),
{ }
},
};
@@ -415,8 +412,6 @@ static struct omap_mmc_platform_data *mmc_data[OMAP24XX_NR_MMC];
static void __init n8x0_mmc_init(void)
{
- gpiod_add_lookup_table(&nokia8xx_mmc_gpio_table);
-
if (board_is_n810()) {
mmc1_data.slots[0].name = "external";
@@ -429,6 +424,8 @@ static void __init n8x0_mmc_init(void)
mmc1_data.slots[1].name = "internal";
mmc1_data.slots[1].ban_openended = 1;
gpiod_add_lookup_table(&nokia810_mmc_gpio_table);
+ } else {
+ gpiod_add_lookup_table(&nokia800_mmc_gpio_table);
}
mmc1_data.nr_slots = 2;
diff --git a/arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi b/arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi
index 3c42240e78e245..4aaf5a0c1ed8af 100644
--- a/arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi
@@ -41,7 +41,7 @@ conn_subsys: bus@5b000000 {
interrupts = <GIC_SPI 267 IRQ_TYPE_LEVEL_HIGH>;
fsl,usbphy = <&usbphy1>;
fsl,usbmisc = <&usbmisc1 0>;
- clocks = <&usb2_lpcg 0>;
+ clocks = <&usb2_lpcg IMX_LPCG_CLK_6>;
ahb-burst-config = <0x0>;
tx-burst-size-dword = <0x10>;
rx-burst-size-dword = <0x10>;
@@ -58,7 +58,7 @@ conn_subsys: bus@5b000000 {
usbphy1: usbphy@5b100000 {
compatible = "fsl,imx7ulp-usbphy";
reg = <0x5b100000 0x1000>;
- clocks = <&usb2_lpcg 1>;
+ clocks = <&usb2_lpcg IMX_LPCG_CLK_7>;
power-domains = <&pd IMX_SC_R_USB_0_PHY>;
status = "disabled";
};
@@ -67,8 +67,8 @@ conn_subsys: bus@5b000000 {
interrupts = <GIC_SPI 232 IRQ_TYPE_LEVEL_HIGH>;
reg = <0x5b010000 0x10000>;
clocks = <&sdhc0_lpcg IMX_LPCG_CLK_4>,
- <&sdhc0_lpcg IMX_LPCG_CLK_0>,
- <&sdhc0_lpcg IMX_LPCG_CLK_5>;
+ <&sdhc0_lpcg IMX_LPCG_CLK_5>,
+ <&sdhc0_lpcg IMX_LPCG_CLK_0>;
clock-names = "ipg", "ahb", "per";
power-domains = <&pd IMX_SC_R_SDHC_0>;
status = "disabled";
@@ -78,8 +78,8 @@ conn_subsys: bus@5b000000 {
interrupts = <GIC_SPI 233 IRQ_TYPE_LEVEL_HIGH>;
reg = <0x5b020000 0x10000>;
clocks = <&sdhc1_lpcg IMX_LPCG_CLK_4>,
- <&sdhc1_lpcg IMX_LPCG_CLK_0>,
- <&sdhc1_lpcg IMX_LPCG_CLK_5>;
+ <&sdhc1_lpcg IMX_LPCG_CLK_5>,
+ <&sdhc1_lpcg IMX_LPCG_CLK_0>;
clock-names = "ipg", "ahb", "per";
power-domains = <&pd IMX_SC_R_SDHC_1>;
fsl,tuning-start-tap = <20>;
@@ -91,8 +91,8 @@ conn_subsys: bus@5b000000 {
interrupts = <GIC_SPI 234 IRQ_TYPE_LEVEL_HIGH>;
reg = <0x5b030000 0x10000>;
clocks = <&sdhc2_lpcg IMX_LPCG_CLK_4>,
- <&sdhc2_lpcg IMX_LPCG_CLK_0>,
- <&sdhc2_lpcg IMX_LPCG_CLK_5>;
+ <&sdhc2_lpcg IMX_LPCG_CLK_5>,
+ <&sdhc2_lpcg IMX_LPCG_CLK_0>;
clock-names = "ipg", "ahb", "per";
power-domains = <&pd IMX_SC_R_SDHC_2>;
status = "disabled";
diff --git a/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi b/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi
index cab3468b1875ee..f7a91d43a0ffe1 100644
--- a/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi
@@ -28,8 +28,8 @@ dma_subsys: bus@5a000000 {
#size-cells = <0>;
interrupts = <GIC_SPI 336 IRQ_TYPE_LEVEL_HIGH>;
interrupt-parent = <&gic>;
- clocks = <&spi0_lpcg 0>,
- <&spi0_lpcg 1>;
+ clocks = <&spi0_lpcg IMX_LPCG_CLK_0>,
+ <&spi0_lpcg IMX_LPCG_CLK_4>;
clock-names = "per", "ipg";
assigned-clocks = <&clk IMX_SC_R_SPI_0 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <60000000>;
@@ -44,8 +44,8 @@ dma_subsys: bus@5a000000 {
#size-cells = <0>;
interrupts = <GIC_SPI 337 IRQ_TYPE_LEVEL_HIGH>;
interrupt-parent = <&gic>;
- clocks = <&spi1_lpcg 0>,
- <&spi1_lpcg 1>;
+ clocks = <&spi1_lpcg IMX_LPCG_CLK_0>,
+ <&spi1_lpcg IMX_LPCG_CLK_4>;
clock-names = "per", "ipg";
assigned-clocks = <&clk IMX_SC_R_SPI_1 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <60000000>;
@@ -60,8 +60,8 @@ dma_subsys: bus@5a000000 {
#size-cells = <0>;
interrupts = <GIC_SPI 338 IRQ_TYPE_LEVEL_HIGH>;
interrupt-parent = <&gic>;
- clocks = <&spi2_lpcg 0>,
- <&spi2_lpcg 1>;
+ clocks = <&spi2_lpcg IMX_LPCG_CLK_0>,
+ <&spi2_lpcg IMX_LPCG_CLK_4>;
clock-names = "per", "ipg";
assigned-clocks = <&clk IMX_SC_R_SPI_2 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <60000000>;
@@ -76,8 +76,8 @@ dma_subsys: bus@5a000000 {
#size-cells = <0>;
interrupts = <GIC_SPI 339 IRQ_TYPE_LEVEL_HIGH>;
interrupt-parent = <&gic>;
- clocks = <&spi3_lpcg 0>,
- <&spi3_lpcg 1>;
+ clocks = <&spi3_lpcg IMX_LPCG_CLK_0>,
+ <&spi3_lpcg IMX_LPCG_CLK_4>;
clock-names = "per", "ipg";
assigned-clocks = <&clk IMX_SC_R_SPI_3 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <60000000>;
@@ -145,8 +145,8 @@ dma_subsys: bus@5a000000 {
compatible = "fsl,imx8qxp-pwm", "fsl,imx27-pwm";
reg = <0x5a190000 0x1000>;
interrupts = <GIC_SPI 127 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&adma_pwm_lpcg 1>,
- <&adma_pwm_lpcg 0>;
+ clocks = <&adma_pwm_lpcg IMX_LPCG_CLK_4>,
+ <&adma_pwm_lpcg IMX_LPCG_CLK_0>;
clock-names = "ipg", "per";
assigned-clocks = <&clk IMX_SC_R_LCD_0_PWM_0 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <24000000>;
@@ -355,8 +355,8 @@ dma_subsys: bus@5a000000 {
reg = <0x5a880000 0x10000>;
interrupts = <GIC_SPI 240 IRQ_TYPE_LEVEL_HIGH>;
interrupt-parent = <&gic>;
- clocks = <&adc0_lpcg 0>,
- <&adc0_lpcg 1>;
+ clocks = <&adc0_lpcg IMX_LPCG_CLK_0>,
+ <&adc0_lpcg IMX_LPCG_CLK_4>;
clock-names = "per", "ipg";
assigned-clocks = <&clk IMX_SC_R_ADC_0 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <24000000>;
@@ -370,8 +370,8 @@ dma_subsys: bus@5a000000 {
reg = <0x5a890000 0x10000>;
interrupts = <GIC_SPI 241 IRQ_TYPE_LEVEL_HIGH>;
interrupt-parent = <&gic>;
- clocks = <&adc1_lpcg 0>,
- <&adc1_lpcg 1>;
+ clocks = <&adc1_lpcg IMX_LPCG_CLK_0>,
+ <&adc1_lpcg IMX_LPCG_CLK_4>;
clock-names = "per", "ipg";
assigned-clocks = <&clk IMX_SC_R_ADC_1 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <24000000>;
@@ -384,8 +384,8 @@ dma_subsys: bus@5a000000 {
reg = <0x5a8d0000 0x10000>;
interrupts = <GIC_SPI 235 IRQ_TYPE_LEVEL_HIGH>;
interrupt-parent = <&gic>;
- clocks = <&can0_lpcg 1>,
- <&can0_lpcg 0>;
+ clocks = <&can0_lpcg IMX_LPCG_CLK_4>,
+ <&can0_lpcg IMX_LPCG_CLK_0>;
clock-names = "ipg", "per";
assigned-clocks = <&clk IMX_SC_R_CAN_0 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <40000000>;
@@ -405,8 +405,8 @@ dma_subsys: bus@5a000000 {
* CAN1 shares CAN0's clock and to enable CAN0's clock it
* has to be powered on.
*/
- clocks = <&can0_lpcg 1>,
- <&can0_lpcg 0>;
+ clocks = <&can0_lpcg IMX_LPCG_CLK_4>,
+ <&can0_lpcg IMX_LPCG_CLK_0>;
clock-names = "ipg", "per";
assigned-clocks = <&clk IMX_SC_R_CAN_0 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <40000000>;
@@ -426,8 +426,8 @@ dma_subsys: bus@5a000000 {
* CAN2 shares CAN0's clock and to enable CAN0's clock it
* has to be powered on.
*/
- clocks = <&can0_lpcg 1>,
- <&can0_lpcg 0>;
+ clocks = <&can0_lpcg IMX_LPCG_CLK_4>,
+ <&can0_lpcg IMX_LPCG_CLK_0>;
clock-names = "ipg", "per";
assigned-clocks = <&clk IMX_SC_R_CAN_0 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <40000000>;
diff --git a/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi b/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi
index 7e510b21bbac55..764c1a08e3b118 100644
--- a/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi
@@ -25,8 +25,8 @@ lsio_subsys: bus@5d000000 {
compatible = "fsl,imx27-pwm";
reg = <0x5d000000 0x10000>;
clock-names = "ipg", "per";
- clocks = <&pwm0_lpcg 4>,
- <&pwm0_lpcg 1>;
+ clocks = <&pwm0_lpcg IMX_LPCG_CLK_6>,
+ <&pwm0_lpcg IMX_LPCG_CLK_1>;
assigned-clocks = <&clk IMX_SC_R_PWM_0 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <24000000>;
#pwm-cells = <3>;
@@ -38,8 +38,8 @@ lsio_subsys: bus@5d000000 {
compatible = "fsl,imx27-pwm";
reg = <0x5d010000 0x10000>;
clock-names = "ipg", "per";
- clocks = <&pwm1_lpcg 4>,
- <&pwm1_lpcg 1>;
+ clocks = <&pwm1_lpcg IMX_LPCG_CLK_6>,
+ <&pwm1_lpcg IMX_LPCG_CLK_1>;
assigned-clocks = <&clk IMX_SC_R_PWM_1 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <24000000>;
#pwm-cells = <3>;
@@ -51,8 +51,8 @@ lsio_subsys: bus@5d000000 {
compatible = "fsl,imx27-pwm";
reg = <0x5d020000 0x10000>;
clock-names = "ipg", "per";
- clocks = <&pwm2_lpcg 4>,
- <&pwm2_lpcg 1>;
+ clocks = <&pwm2_lpcg IMX_LPCG_CLK_6>,
+ <&pwm2_lpcg IMX_LPCG_CLK_1>;
assigned-clocks = <&clk IMX_SC_R_PWM_2 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <24000000>;
#pwm-cells = <3>;
@@ -64,8 +64,8 @@ lsio_subsys: bus@5d000000 {
compatible = "fsl,imx27-pwm";
reg = <0x5d030000 0x10000>;
clock-names = "ipg", "per";
- clocks = <&pwm3_lpcg 4>,
- <&pwm3_lpcg 1>;
+ clocks = <&pwm3_lpcg IMX_LPCG_CLK_6>,
+ <&pwm3_lpcg IMX_LPCG_CLK_1>;
assigned-clocks = <&clk IMX_SC_R_PWM_3 IMX_SC_PM_CLK_PER>;
assigned-clock-rates = <24000000>;
#pwm-cells = <3>;
diff --git a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi
index 41c79d2ebdd620..f24b14744799e1 100644
--- a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi
@@ -14,6 +14,7 @@
pinctrl-0 = <&pinctrl_usbcon1>;
type = "micro";
label = "otg";
+ vbus-supply = <&reg_usb1_vbus>;
id-gpios = <&gpio3 21 GPIO_ACTIVE_HIGH>;
port {
@@ -183,7 +184,6 @@
};
&usb3_phy0 {
- vbus-supply = <&reg_usb1_vbus>;
status = "okay";
};
diff --git a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi
index d5c400b355af56..f5491a608b2f37 100644
--- a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi
@@ -14,6 +14,7 @@
pinctrl-0 = <&pinctrl_usbcon1>;
type = "micro";
label = "otg";
+ vbus-supply = <&reg_usb1_vbus>;
id-gpios = <&gpio3 21 GPIO_ACTIVE_HIGH>;
port {
@@ -202,7 +203,6 @@
};
&usb3_phy0 {
- vbus-supply = <&reg_usb1_vbus>;
status = "okay";
};
diff --git a/arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi b/arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi
index 11626fae5f97f3..aa9f28c4431d02 100644
--- a/arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi
@@ -153,15 +153,15 @@
};
&flexcan2 {
- clocks = <&can1_lpcg 1>,
- <&can1_lpcg 0>;
+ clocks = <&can1_lpcg IMX_LPCG_CLK_4>,
+ <&can1_lpcg IMX_LPCG_CLK_0>;
assigned-clocks = <&clk IMX_SC_R_CAN_1 IMX_SC_PM_CLK_PER>;
fsl,clk-source = /bits/ 8 <1>;
};
&flexcan3 {
- clocks = <&can2_lpcg 1>,
- <&can2_lpcg 0>;
+ clocks = <&can2_lpcg IMX_LPCG_CLK_4>,
+ <&can2_lpcg IMX_LPCG_CLK_0>;
assigned-clocks = <&clk IMX_SC_R_CAN_2 IMX_SC_PM_CLK_PER>;
fsl,clk-source = /bits/ 8 <1>;
};
diff --git a/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi b/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi
index f3a6da8b289019..5260c63db0078b 100644
--- a/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi
+++ b/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi
@@ -944,6 +944,8 @@ ap_spi_fp: &spi10 {
vddrf-supply = <&pp1300_l2c>;
vddch0-supply = <&pp3300_l10c>;
max-speed = <3200000>;
+
+ qcom,local-bd-address-broken;
};
};
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index 3b0e8248e1a41a..a75de2665d8445 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -161,12 +161,18 @@ static inline unsigned long get_trans_granule(void)
#define MAX_TLBI_RANGE_PAGES __TLBI_RANGE_PAGES(31, 3)
/*
- * Generate 'num' values from -1 to 30 with -1 rejected by the
- * __flush_tlb_range() loop below.
+ * Generate 'num' values from -1 to 31 with -1 rejected by the
+ * __flush_tlb_range() loop below. Its return value is only
+ * significant for a maximum of MAX_TLBI_RANGE_PAGES pages. If
+ * 'pages' is more than that, you must iterate over the overall
+ * range.
*/
-#define TLBI_RANGE_MASK GENMASK_ULL(4, 0)
-#define __TLBI_RANGE_NUM(pages, scale) \
- ((((pages) >> (5 * (scale) + 1)) & TLBI_RANGE_MASK) - 1)
+#define __TLBI_RANGE_NUM(pages, scale) \
+ ({ \
+ int __pages = min((pages), \
+ __TLBI_RANGE_PAGES(31, (scale))); \
+ (__pages >> (5 * (scale) + 1)) - 1; \
+ })
/*
* TLB Invalidation
@@ -379,10 +385,6 @@ static inline void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
* 3. If there is 1 page remaining, flush it through non-range operations. Range
* operations can only span an even number of pages. We save this for last to
* ensure 64KB start alignment is maintained for the LPA2 case.
- *
- * Note that certain ranges can be represented by either num = 31 and
- * scale or num = 0 and scale + 1. The loop below favours the latter
- * since num is limited to 30 by the __TLBI_RANGE_NUM() macro.
*/
#define __flush_tlb_range_op(op, start, pages, stride, \
asid, tlb_level, tlbi_user, lpa2) \
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index ce08b744aaab22..cb68adcabe0789 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -289,8 +289,28 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL)
adr_l x1, __hyp_text_end
adr_l x2, dcache_clean_poc
blr x2
+
+ mov_q x0, INIT_SCTLR_EL2_MMU_OFF
+ pre_disable_mmu_workaround
+ msr sctlr_el2, x0
+ isb
0:
mov_q x0, HCR_HOST_NVHE_FLAGS
+
+ /*
+ * Compliant CPUs advertise their VHE-onlyness with
+ * ID_AA64MMFR4_EL1.E2H0 < 0. HCR_EL2.E2H can be
+ * RES1 in that case. Publish the E2H bit early so that
+ * it can be picked up by the init_el2_state macro.
+ *
+ * Fruity CPUs seem to have HCR_EL2.E2H set to RAO/WI, but
+ * don't advertise it (they predate this relaxation).
+ */
+ mrs_s x1, SYS_ID_AA64MMFR4_EL1
+ tbz x1, #(ID_AA64MMFR4_EL1_E2H0_SHIFT + ID_AA64MMFR4_EL1_E2H0_WIDTH - 1), 1f
+
+ orr x0, x0, #HCR_E2H
+1:
msr hcr_el2, x0
isb
@@ -303,30 +323,16 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL)
mov_q x1, INIT_SCTLR_EL1_MMU_OFF
- /*
- * Compliant CPUs advertise their VHE-onlyness with
- * ID_AA64MMFR4_EL1.E2H0 < 0. HCR_EL2.E2H can be
- * RES1 in that case.
- *
- * Fruity CPUs seem to have HCR_EL2.E2H set to RES1, but
- * don't advertise it (they predate this relaxation).
- */
- mrs_s x0, SYS_ID_AA64MMFR4_EL1
- ubfx x0, x0, #ID_AA64MMFR4_EL1_E2H0_SHIFT, #ID_AA64MMFR4_EL1_E2H0_WIDTH
- tbnz x0, #(ID_AA64MMFR4_EL1_E2H0_SHIFT + ID_AA64MMFR4_EL1_E2H0_WIDTH - 1), 1f
-
mrs x0, hcr_el2
and x0, x0, #HCR_E2H
cbz x0, 2f
-1:
+
/* Set a sane SCTLR_EL1, the VHE way */
- pre_disable_mmu_workaround
msr_s SYS_SCTLR_EL12, x1
mov x2, #BOOT_CPU_FLAG_E2H
b 3f
2:
- pre_disable_mmu_workaround
msr sctlr_el1, x1
mov x2, xzr
3:
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 162b030ab9da33..0d022599eb61b3 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -761,7 +761,6 @@ static void sve_init_header_from_task(struct user_sve_header *header,
{
unsigned int vq;
bool active;
- bool fpsimd_only;
enum vec_type task_type;
memset(header, 0, sizeof(*header));
@@ -777,12 +776,10 @@ static void sve_init_header_from_task(struct user_sve_header *header,
case ARM64_VEC_SVE:
if (test_tsk_thread_flag(target, TIF_SVE_VL_INHERIT))
header->flags |= SVE_PT_VL_INHERIT;
- fpsimd_only = !test_tsk_thread_flag(target, TIF_SVE);
break;
case ARM64_VEC_SME:
if (test_tsk_thread_flag(target, TIF_SME_VL_INHERIT))
header->flags |= SVE_PT_VL_INHERIT;
- fpsimd_only = false;
break;
default:
WARN_ON_ONCE(1);
@@ -790,7 +787,7 @@ static void sve_init_header_from_task(struct user_sve_header *header,
}
if (active) {
- if (fpsimd_only) {
+ if (target->thread.fp_type == FP_STATE_FPSIMD) {
header->flags |= SVE_PT_REGS_FPSIMD;
} else {
header->flags |= SVE_PT_REGS_SVE;
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 3dee5490eea94d..c4a0a35e02c728 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -2597,14 +2597,11 @@ static __init int kvm_arm_init(void)
if (err)
goto out_hyp;
- if (is_protected_kvm_enabled()) {
- kvm_info("Protected nVHE mode initialized successfully\n");
- } else if (in_hyp_mode) {
- kvm_info("VHE mode initialized successfully\n");
- } else {
- char mode = cpus_have_final_cap(ARM64_KVM_HVHE) ? 'h' : 'n';
- kvm_info("Hyp mode (%cVHE) initialized successfully\n", mode);
- }
+ kvm_info("%s%sVHE mode initialized successfully\n",
+ in_hyp_mode ? "" : (is_protected_kvm_enabled() ?
+ "Protected " : "Hyp "),
+ in_hyp_mode ? "" : (cpus_have_final_cap(ARM64_KVM_HVHE) ?
+ "h" : "n"));
/*
* FIXME: Do something reasonable if kvm_init() fails after pKVM
diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c
index a60fb13e21924f..2fc68da4036d90 100644
--- a/arch/arm64/kvm/hyp/nvhe/tlb.c
+++ b/arch/arm64/kvm/hyp/nvhe/tlb.c
@@ -154,7 +154,8 @@ void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
/* Switch to requested VMID */
__tlb_switch_to_guest(mmu, &cxt, false);
- __flush_s2_tlb_range_op(ipas2e1is, start, pages, stride, 0);
+ __flush_s2_tlb_range_op(ipas2e1is, start, pages, stride,
+ TLBI_TTL_UNKNOWN);
dsb(ish);
__tlbi(vmalle1is);
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 3fae5830f8d2c7..5a59ef88b646f0 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -528,7 +528,7 @@ static int hyp_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx,
kvm_clear_pte(ctx->ptep);
dsb(ishst);
- __tlbi_level(vae2is, __TLBI_VADDR(ctx->addr, 0), ctx->level);
+ __tlbi_level(vae2is, __TLBI_VADDR(ctx->addr, 0), TLBI_TTL_UNKNOWN);
} else {
if (ctx->end - ctx->addr < granule)
return -EINVAL;
@@ -843,12 +843,15 @@ static bool stage2_try_break_pte(const struct kvm_pgtable_visit_ctx *ctx,
* Perform the appropriate TLB invalidation based on the
* evicted pte value (if any).
*/
- if (kvm_pte_table(ctx->old, ctx->level))
- kvm_tlb_flush_vmid_range(mmu, ctx->addr,
- kvm_granule_size(ctx->level));
- else if (kvm_pte_valid(ctx->old))
+ if (kvm_pte_table(ctx->old, ctx->level)) {
+ u64 size = kvm_granule_size(ctx->level);
+ u64 addr = ALIGN_DOWN(ctx->addr, size);
+
+ kvm_tlb_flush_vmid_range(mmu, addr, size);
+ } else if (kvm_pte_valid(ctx->old)) {
kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu,
ctx->addr, ctx->level);
+ }
}
if (stage2_pte_is_counted(ctx->old))
@@ -896,9 +899,13 @@ static void stage2_unmap_put_pte(const struct kvm_pgtable_visit_ctx *ctx,
if (kvm_pte_valid(ctx->old)) {
kvm_clear_pte(ctx->ptep);
- if (!stage2_unmap_defer_tlb_flush(pgt))
- kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu,
- ctx->addr, ctx->level);
+ if (kvm_pte_table(ctx->old, ctx->level)) {
+ kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr,
+ TLBI_TTL_UNKNOWN);
+ } else if (!stage2_unmap_defer_tlb_flush(pgt)) {
+ kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr,
+ ctx->level);
+ }
}
mm_ops->put_page(ctx->ptep);
diff --git a/arch/arm64/kvm/hyp/vhe/tlb.c b/arch/arm64/kvm/hyp/vhe/tlb.c
index b32e2940df7dc8..1a60b95381e8e9 100644
--- a/arch/arm64/kvm/hyp/vhe/tlb.c
+++ b/arch/arm64/kvm/hyp/vhe/tlb.c
@@ -171,7 +171,8 @@ void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
/* Switch to requested VMID */
__tlb_switch_to_guest(mmu, &cxt);
- __flush_s2_tlb_range_op(ipas2e1is, start, pages, stride, 0);
+ __flush_s2_tlb_range_op(ipas2e1is, start, pages, stride,
+ TLBI_TTL_UNKNOWN);
dsb(ish);
__tlbi(vmalle1is);
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 18680771cdb0ea..dc04bc7678659a 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -1637,7 +1637,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
is_iabt = kvm_vcpu_trap_is_iabt(vcpu);
- if (esr_fsc_is_permission_fault(esr)) {
+ if (esr_fsc_is_translation_fault(esr)) {
/* Beyond sanitised PARange (which is the IPA limit) */
if (fault_ipa >= BIT_ULL(get_kvm_ipa_limit())) {
kvm_inject_size_fault(vcpu);
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index 0f0e10bb0a9540..b872b003a55f3d 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -276,7 +276,10 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
pte_t *ptep = NULL;
pgdp = pgd_offset(mm, addr);
- p4dp = p4d_offset(pgdp, addr);
+ p4dp = p4d_alloc(mm, pgdp, addr);
+ if (!p4dp)
+ return NULL;
+
pudp = pud_alloc(mm, p4dp, addr);
if (!pudp)
return NULL;
diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c
index 0c4e3ecf989d43..0e270a1c51e645 100644
--- a/arch/arm64/mm/pageattr.c
+++ b/arch/arm64/mm/pageattr.c
@@ -219,9 +219,6 @@ bool kernel_page_present(struct page *page)
pte_t *ptep;
unsigned long addr = (unsigned long)page_address(page);
- if (!can_set_direct_map())
- return true;
-
pgdp = pgd_offset_k(addr);
if (pgd_none(READ_ONCE(*pgdp)))
return false;
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index c5b461dda43859..122021f9bdfc87 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -943,7 +943,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
emit(A64_UXTH(is64, dst, dst), ctx);
break;
case 32:
- emit(A64_REV32(is64, dst, dst), ctx);
+ emit(A64_REV32(0, dst, dst), ctx);
/* upper 32 bits already cleared */
break;
case 64:
@@ -1256,7 +1256,7 @@ emit_cond_jmp:
} else {
emit_a64_mov_i(1, tmp, off, ctx);
if (sign_extend)
- emit(A64_LDRSW(dst, src_adj, off_adj), ctx);
+ emit(A64_LDRSW(dst, src, tmp), ctx);
else
emit(A64_LDR32(dst, src, tmp), ctx);
}
diff --git a/arch/loongarch/boot/dts/loongson-2k1000.dtsi b/arch/loongarch/boot/dts/loongson-2k1000.dtsi
index 49a70f8c3cab22..b6aeb1f70e2a03 100644
--- a/arch/loongarch/boot/dts/loongson-2k1000.dtsi
+++ b/arch/loongarch/boot/dts/loongson-2k1000.dtsi
@@ -100,6 +100,13 @@
#size-cells = <2>;
dma-coherent;
+ isa@18000000 {
+ compatible = "isa";
+ #size-cells = <1>;
+ #address-cells = <2>;
+ ranges = <1 0x0 0x0 0x18000000 0x4000>;
+ };
+
liointc0: interrupt-controller@1fe01400 {
compatible = "loongson,liointc-2.0";
reg = <0x0 0x1fe01400 0x0 0x40>,
diff --git a/arch/loongarch/boot/dts/loongson-2k2000-ref.dts b/arch/loongarch/boot/dts/loongson-2k2000-ref.dts
index dca91caf895e3c..74b99bd234cc38 100644
--- a/arch/loongarch/boot/dts/loongson-2k2000-ref.dts
+++ b/arch/loongarch/boot/dts/loongson-2k2000-ref.dts
@@ -61,12 +61,45 @@
&gmac0 {
status = "okay";
+
+ phy-mode = "gmii";
+ phy-handle = <&phy0>;
+ mdio {
+ compatible = "snps,dwmac-mdio";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ phy0: ethernet-phy@0 {
+ reg = <2>;
+ };
+ };
};
&gmac1 {
status = "okay";
+
+ phy-mode = "gmii";
+ phy-handle = <&phy1>;
+ mdio {
+ compatible = "snps,dwmac-mdio";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ phy1: ethernet-phy@1 {
+ reg = <2>;
+ };
+ };
};
&gmac2 {
status = "okay";
+
+ phy-mode = "rgmii";
+ phy-handle = <&phy2>;
+ mdio {
+ compatible = "snps,dwmac-mdio";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ phy2: ethernet-phy@2 {
+ reg = <0>;
+ };
+ };
};
diff --git a/arch/loongarch/boot/dts/loongson-2k2000.dtsi b/arch/loongarch/boot/dts/loongson-2k2000.dtsi
index a231949b5f553a..9eab2d02cbe8bf 100644
--- a/arch/loongarch/boot/dts/loongson-2k2000.dtsi
+++ b/arch/loongarch/boot/dts/loongson-2k2000.dtsi
@@ -51,6 +51,13 @@
#address-cells = <2>;
#size-cells = <2>;
+ isa@18400000 {
+ compatible = "isa";
+ #size-cells = <1>;
+ #address-cells = <2>;
+ ranges = <1 0x0 0x0 0x18400000 0x4000>;
+ };
+
pmc: power-management@100d0000 {
compatible = "loongson,ls2k2000-pmc", "loongson,ls2k0500-pmc", "syscon";
reg = <0x0 0x100d0000 0x0 0x58>;
@@ -109,6 +116,8 @@
msi: msi-controller@1fe01140 {
compatible = "loongson,pch-msi-1.0";
reg = <0x0 0x1fe01140 0x0 0x8>;
+ interrupt-controller;
+ #interrupt-cells = <1>;
msi-controller;
loongson,msi-base-vec = <64>;
loongson,msi-num-vecs = <192>;
@@ -140,27 +149,34 @@
#address-cells = <3>;
#size-cells = <2>;
device_type = "pci";
+ msi-parent = <&msi>;
bus-range = <0x0 0xff>;
- ranges = <0x01000000 0x0 0x00008000 0x0 0x18400000 0x0 0x00008000>,
+ ranges = <0x01000000 0x0 0x00008000 0x0 0x18408000 0x0 0x00008000>,
<0x02000000 0x0 0x60000000 0x0 0x60000000 0x0 0x20000000>;
gmac0: ethernet@3,0 {
reg = <0x1800 0x0 0x0 0x0 0x0>;
- interrupts = <12 IRQ_TYPE_LEVEL_HIGH>;
+ interrupts = <12 IRQ_TYPE_LEVEL_HIGH>,
+ <13 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "macirq", "eth_lpi";
interrupt-parent = <&pic>;
status = "disabled";
};
gmac1: ethernet@3,1 {
reg = <0x1900 0x0 0x0 0x0 0x0>;
- interrupts = <14 IRQ_TYPE_LEVEL_HIGH>;
+ interrupts = <14 IRQ_TYPE_LEVEL_HIGH>,
+ <15 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "macirq", "eth_lpi";
interrupt-parent = <&pic>;
status = "disabled";
};
gmac2: ethernet@3,2 {
reg = <0x1a00 0x0 0x0 0x0 0x0>;
- interrupts = <17 IRQ_TYPE_LEVEL_HIGH>;
+ interrupts = <17 IRQ_TYPE_LEVEL_HIGH>,
+ <18 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "macirq", "eth_lpi";
interrupt-parent = <&pic>;
status = "disabled";
};
diff --git a/arch/loongarch/include/asm/addrspace.h b/arch/loongarch/include/asm/addrspace.h
index b24437e28c6eda..7bd47d65bf7a04 100644
--- a/arch/loongarch/include/asm/addrspace.h
+++ b/arch/loongarch/include/asm/addrspace.h
@@ -11,6 +11,7 @@
#define _ASM_ADDRSPACE_H
#include <linux/const.h>
+#include <linux/sizes.h>
#include <asm/loongarch.h>
diff --git a/arch/loongarch/include/asm/io.h b/arch/loongarch/include/asm/io.h
index 4a8adcca329b81..c2f9979b2979e5 100644
--- a/arch/loongarch/include/asm/io.h
+++ b/arch/loongarch/include/asm/io.h
@@ -14,11 +14,6 @@
#include <asm/pgtable-bits.h>
#include <asm/string.h>
-/*
- * Change "struct page" to physical address.
- */
-#define page_to_phys(page) ((phys_addr_t)page_to_pfn(page) << PAGE_SHIFT)
-
extern void __init __iomem *early_ioremap(u64 phys_addr, unsigned long size);
extern void __init early_iounmap(void __iomem *addr, unsigned long size);
@@ -73,6 +68,21 @@ extern void __memcpy_fromio(void *to, const volatile void __iomem *from, size_t
#define __io_aw() mmiowb()
+#ifdef CONFIG_KFENCE
+#define virt_to_phys(kaddr) \
+({ \
+ (likely((unsigned long)kaddr < vm_map_base)) ? __pa((unsigned long)kaddr) : \
+ page_to_phys(tlb_virt_to_page((unsigned long)kaddr)) + offset_in_page((unsigned long)kaddr);\
+})
+
+#define phys_to_virt(paddr) \
+({ \
+ extern char *__kfence_pool; \
+ (unlikely(__kfence_pool == NULL)) ? __va((unsigned long)paddr) : \
+ page_address(phys_to_page((unsigned long)paddr)) + offset_in_page((unsigned long)paddr);\
+})
+#endif
+
#include <asm-generic/io.h>
#define ARCH_HAS_VALID_PHYS_ADDR_RANGE
diff --git a/arch/loongarch/include/asm/kfence.h b/arch/loongarch/include/asm/kfence.h
index 6c82aea1c99398..a6a5760da3a332 100644
--- a/arch/loongarch/include/asm/kfence.h
+++ b/arch/loongarch/include/asm/kfence.h
@@ -16,6 +16,7 @@
static inline bool arch_kfence_init_pool(void)
{
int err;
+ char *kaddr, *vaddr;
char *kfence_pool = __kfence_pool;
struct vm_struct *area;
@@ -35,6 +36,14 @@ static inline bool arch_kfence_init_pool(void)
return false;
}
+ kaddr = kfence_pool;
+ vaddr = __kfence_pool;
+ while (kaddr < kfence_pool + KFENCE_POOL_SIZE) {
+ set_page_address(virt_to_page(kaddr), vaddr);
+ kaddr += PAGE_SIZE;
+ vaddr += PAGE_SIZE;
+ }
+
return true;
}
diff --git a/arch/loongarch/include/asm/page.h b/arch/loongarch/include/asm/page.h
index 44027060c54a28..e85df33f11c772 100644
--- a/arch/loongarch/include/asm/page.h
+++ b/arch/loongarch/include/asm/page.h
@@ -78,7 +78,26 @@ typedef struct { unsigned long pgprot; } pgprot_t;
struct page *dmw_virt_to_page(unsigned long kaddr);
struct page *tlb_virt_to_page(unsigned long kaddr);
-#define virt_to_pfn(kaddr) PFN_DOWN(PHYSADDR(kaddr))
+#define pfn_to_phys(pfn) __pfn_to_phys(pfn)
+#define phys_to_pfn(paddr) __phys_to_pfn(paddr)
+
+#define page_to_phys(page) pfn_to_phys(page_to_pfn(page))
+#define phys_to_page(paddr) pfn_to_page(phys_to_pfn(paddr))
+
+#ifndef CONFIG_KFENCE
+
+#define page_to_virt(page) __va(page_to_phys(page))
+#define virt_to_page(kaddr) phys_to_page(__pa(kaddr))
+
+#else
+
+#define WANT_PAGE_VIRTUAL
+
+#define page_to_virt(page) \
+({ \
+ extern char *__kfence_pool; \
+ (__kfence_pool == NULL) ? __va(page_to_phys(page)) : page_address(page); \
+})
#define virt_to_page(kaddr) \
({ \
@@ -86,6 +105,11 @@ struct page *tlb_virt_to_page(unsigned long kaddr);
dmw_virt_to_page((unsigned long)kaddr) : tlb_virt_to_page((unsigned long)kaddr);\
})
+#endif
+
+#define pfn_to_virt(pfn) page_to_virt(pfn_to_page(pfn))
+#define virt_to_pfn(kaddr) page_to_pfn(virt_to_page(kaddr))
+
extern int __virt_addr_valid(volatile void *kaddr);
#define virt_addr_valid(kaddr) __virt_addr_valid((volatile void *)(kaddr))
diff --git a/arch/loongarch/mm/mmap.c b/arch/loongarch/mm/mmap.c
index a9630a81b38abb..89af7c12e8c08d 100644
--- a/arch/loongarch/mm/mmap.c
+++ b/arch/loongarch/mm/mmap.c
@@ -4,6 +4,7 @@
*/
#include <linux/export.h>
#include <linux/io.h>
+#include <linux/kfence.h>
#include <linux/memblock.h>
#include <linux/mm.h>
#include <linux/mman.h>
@@ -111,6 +112,9 @@ int __virt_addr_valid(volatile void *kaddr)
{
unsigned long vaddr = (unsigned long)kaddr;
+ if (is_kfence_address((void *)kaddr))
+ return 1;
+
if ((vaddr < PAGE_OFFSET) || (vaddr >= vm_map_base))
return 0;
diff --git a/arch/loongarch/mm/pgtable.c b/arch/loongarch/mm/pgtable.c
index 2aae72e638713a..bda018150000e6 100644
--- a/arch/loongarch/mm/pgtable.c
+++ b/arch/loongarch/mm/pgtable.c
@@ -11,13 +11,13 @@
struct page *dmw_virt_to_page(unsigned long kaddr)
{
- return pfn_to_page(virt_to_pfn(kaddr));
+ return phys_to_page(__pa(kaddr));
}
EXPORT_SYMBOL(dmw_virt_to_page);
struct page *tlb_virt_to_page(unsigned long kaddr)
{
- return pfn_to_page(pte_pfn(*virt_to_kpte(kaddr)));
+ return phys_to_page(pfn_to_phys(pte_pfn(*virt_to_kpte(kaddr))));
}
EXPORT_SYMBOL(tlb_virt_to_page);
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 06ef440d16ce71..516dc7022bd74a 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -619,15 +619,6 @@ config MACH_EYEQ5
bool
-config FIT_IMAGE_FDT_EPM5
- bool "Include FDT for Mobileye EyeQ5 development platforms"
- depends on MACH_EYEQ5
- default n
- help
- Enable this to include the FDT for the EyeQ5 development platforms
- from Mobileye in the FIT kernel image.
- This requires u-boot on the platform.
-
config MACH_NINTENDO64
bool "Nintendo 64 console"
select CEVT_R4K
@@ -1011,6 +1002,15 @@ config CAVIUM_OCTEON_SOC
endchoice
+config FIT_IMAGE_FDT_EPM5
+ bool "Include FDT for Mobileye EyeQ5 development platforms"
+ depends on MACH_EYEQ5
+ default n
+ help
+ Enable this to include the FDT for the EyeQ5 development platforms
+ from Mobileye in the FIT kernel image.
+ This requires u-boot on the platform.
+
source "arch/mips/alchemy/Kconfig"
source "arch/mips/ath25/Kconfig"
source "arch/mips/ath79/Kconfig"
diff --git a/arch/mips/include/asm/ptrace.h b/arch/mips/include/asm/ptrace.h
index d14d0e37ad02dd..4a2b40ce39e091 100644
--- a/arch/mips/include/asm/ptrace.h
+++ b/arch/mips/include/asm/ptrace.h
@@ -159,7 +159,7 @@ extern unsigned long exception_ip(struct pt_regs *regs);
#define exception_ip(regs) exception_ip(regs)
#define profile_pc(regs) instruction_pointer(regs)
-extern asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall);
+extern asmlinkage long syscall_trace_enter(struct pt_regs *regs);
extern asmlinkage void syscall_trace_leave(struct pt_regs *regs);
extern void die(const char *, struct pt_regs *) __noreturn;
diff --git a/arch/mips/kernel/asm-offsets.c b/arch/mips/kernel/asm-offsets.c
index d1b11f66f748f0..cb1045ebab0621 100644
--- a/arch/mips/kernel/asm-offsets.c
+++ b/arch/mips/kernel/asm-offsets.c
@@ -101,6 +101,7 @@ void output_thread_info_defines(void)
OFFSET(TI_CPU, thread_info, cpu);
OFFSET(TI_PRE_COUNT, thread_info, preempt_count);
OFFSET(TI_REGS, thread_info, regs);
+ OFFSET(TI_SYSCALL, thread_info, syscall);
DEFINE(_THREAD_SIZE, THREAD_SIZE);
DEFINE(_THREAD_MASK, THREAD_MASK);
DEFINE(_IRQ_STACK_SIZE, IRQ_STACK_SIZE);
diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index 59288c13b581b8..61503a36067e9e 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -1317,16 +1317,13 @@ long arch_ptrace(struct task_struct *child, long request,
* Notification of system call entry/exit
* - triggered by current->work.syscall_trace
*/
-asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall)
+asmlinkage long syscall_trace_enter(struct pt_regs *regs)
{
user_exit();
- current_thread_info()->syscall = syscall;
-
if (test_thread_flag(TIF_SYSCALL_TRACE)) {
if (ptrace_report_syscall_entry(regs))
return -1;
- syscall = current_thread_info()->syscall;
}
#ifdef CONFIG_SECCOMP
@@ -1335,7 +1332,7 @@ asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall)
struct seccomp_data sd;
unsigned long args[6];
- sd.nr = syscall;
+ sd.nr = current_thread_info()->syscall;
sd.arch = syscall_get_arch(current);
syscall_get_arguments(current, regs, args);
for (i = 0; i < 6; i++)
@@ -1345,23 +1342,23 @@ asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall)
ret = __secure_computing(&sd);
if (ret == -1)
return ret;
- syscall = current_thread_info()->syscall;
}
#endif
if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
trace_sys_enter(regs, regs->regs[2]);
- audit_syscall_entry(syscall, regs->regs[4], regs->regs[5],
+ audit_syscall_entry(current_thread_info()->syscall,
+ regs->regs[4], regs->regs[5],
regs->regs[6], regs->regs[7]);
/*
* Negative syscall numbers are mistaken for rejected syscalls, but
* won't have had the return value set appropriately, so we do so now.
*/
- if (syscall < 0)
+ if (current_thread_info()->syscall < 0)
syscall_set_return_value(current, regs, -ENOSYS, 0);
- return syscall;
+ return current_thread_info()->syscall;
}
/*
diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S
index 18dc9b34505614..2c604717e63080 100644
--- a/arch/mips/kernel/scall32-o32.S
+++ b/arch/mips/kernel/scall32-o32.S
@@ -77,6 +77,18 @@ loads_done:
PTR_WD load_a7, bad_stack_a7
.previous
+ /*
+ * syscall number is in v0 unless we called syscall(__NR_###)
+ * where the real syscall number is in a0
+ */
+ subu t2, v0, __NR_O32_Linux
+ bnez t2, 1f /* __NR_syscall at offset 0 */
+ LONG_S a0, TI_SYSCALL($28) # Save a0 as syscall number
+ b 2f
+1:
+ LONG_S v0, TI_SYSCALL($28) # Save v0 as syscall number
+2:
+
lw t0, TI_FLAGS($28) # syscall tracing enabled?
li t1, _TIF_WORK_SYSCALL_ENTRY
and t0, t1
@@ -114,16 +126,7 @@ syscall_trace_entry:
SAVE_STATIC
move a0, sp
- /*
- * syscall number is in v0 unless we called syscall(__NR_###)
- * where the real syscall number is in a0
- */
- move a1, v0
- subu t2, v0, __NR_O32_Linux
- bnez t2, 1f /* __NR_syscall at offset 0 */
- lw a1, PT_R4(sp)
-
-1: jal syscall_trace_enter
+ jal syscall_trace_enter
bltz v0, 1f # seccomp failed? Skip syscall
diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
index 97456b2ca7dc32..97788859238c34 100644
--- a/arch/mips/kernel/scall64-n32.S
+++ b/arch/mips/kernel/scall64-n32.S
@@ -44,6 +44,8 @@ NESTED(handle_sysn32, PT_SIZE, sp)
sd a3, PT_R26(sp) # save a3 for syscall restarting
+ LONG_S v0, TI_SYSCALL($28) # Store syscall number
+
li t1, _TIF_WORK_SYSCALL_ENTRY
LONG_L t0, TI_FLAGS($28) # syscall tracing enabled?
and t0, t1, t0
@@ -72,7 +74,6 @@ syscall_common:
n32_syscall_trace_entry:
SAVE_STATIC
move a0, sp
- move a1, v0
jal syscall_trace_enter
bltz v0, 1f # seccomp failed? Skip syscall
diff --git a/arch/mips/kernel/scall64-n64.S b/arch/mips/kernel/scall64-n64.S
index e6264aa62e457f..be11ea5cc67e04 100644
--- a/arch/mips/kernel/scall64-n64.S
+++ b/arch/mips/kernel/scall64-n64.S
@@ -46,6 +46,8 @@ NESTED(handle_sys64, PT_SIZE, sp)
sd a3, PT_R26(sp) # save a3 for syscall restarting
+ LONG_S v0, TI_SYSCALL($28) # Store syscall number
+
li t1, _TIF_WORK_SYSCALL_ENTRY
LONG_L t0, TI_FLAGS($28) # syscall tracing enabled?
and t0, t1, t0
@@ -82,7 +84,6 @@ n64_syscall_exit:
syscall_trace_entry:
SAVE_STATIC
move a0, sp
- move a1, v0
jal syscall_trace_enter
bltz v0, 1f # seccomp failed? Skip syscall
diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
index d3c2616cba2269..7a5abb73e53127 100644
--- a/arch/mips/kernel/scall64-o32.S
+++ b/arch/mips/kernel/scall64-o32.S
@@ -79,6 +79,22 @@ loads_done:
PTR_WD load_a7, bad_stack_a7
.previous
+ /*
+ * absolute syscall number is in v0 unless we called syscall(__NR_###)
+ * where the real syscall number is in a0
+ * note: NR_syscall is the first O32 syscall but the macro is
+ * only defined when compiling with -mabi=32 (CONFIG_32BIT)
+ * therefore __NR_O32_Linux is used (4000)
+ */
+
+ subu t2, v0, __NR_O32_Linux
+ bnez t2, 1f /* __NR_syscall at offset 0 */
+ LONG_S a0, TI_SYSCALL($28) # Save a0 as syscall number
+ b 2f
+1:
+ LONG_S v0, TI_SYSCALL($28) # Save v0 as syscall number
+2:
+
li t1, _TIF_WORK_SYSCALL_ENTRY
LONG_L t0, TI_FLAGS($28) # syscall tracing enabled?
and t0, t1, t0
@@ -113,22 +129,7 @@ trace_a_syscall:
sd a7, PT_R11(sp) # For indirect syscalls
move a0, sp
- /*
- * absolute syscall number is in v0 unless we called syscall(__NR_###)
- * where the real syscall number is in a0
- * note: NR_syscall is the first O32 syscall but the macro is
- * only defined when compiling with -mabi=32 (CONFIG_32BIT)
- * therefore __NR_O32_Linux is used (4000)
- */
- .set push
- .set reorder
- subu t1, v0, __NR_O32_Linux
- move a1, v0
- bnez t1, 1f /* __NR_syscall at offset 0 */
- ld a1, PT_R4(sp) /* Arg1 for __NR_syscall case */
- .set pop
-
-1: jal syscall_trace_enter
+ jal syscall_trace_enter
bltz v0, 1f # seccomp failed? Skip syscall
diff --git a/arch/nios2/kernel/prom.c b/arch/nios2/kernel/prom.c
index 8d98af5c7201bb..9a8393e6b4a85e 100644
--- a/arch/nios2/kernel/prom.c
+++ b/arch/nios2/kernel/prom.c
@@ -21,7 +21,8 @@
void __init early_init_devtree(void *params)
{
- __be32 *dtb = (u32 *)__dtb_start;
+ __be32 __maybe_unused *dtb = (u32 *)__dtb_start;
+
#if defined(CONFIG_NIOS2_DTB_AT_PHYS_ADDR)
if (be32_to_cpup((__be32 *)CONFIG_NIOS2_DTB_PHYS_ADDR) ==
OF_DT_HEADER) {
@@ -30,8 +31,11 @@ void __init early_init_devtree(void *params)
return;
}
#endif
+
+#ifdef CONFIG_NIOS2_DTB_SOURCE_BOOL
if (be32_to_cpu((__be32) *dtb) == OF_DT_HEADER)
params = (void *)__dtb_start;
+#endif
early_init_dt_scan(params);
}
diff --git a/arch/powerpc/include/asm/vdso/gettimeofday.h b/arch/powerpc/include/asm/vdso/gettimeofday.h
index f0a4cf01e85c03..78302f6c258006 100644
--- a/arch/powerpc/include/asm/vdso/gettimeofday.h
+++ b/arch/powerpc/include/asm/vdso/gettimeofday.h
@@ -4,7 +4,6 @@
#ifndef __ASSEMBLY__
-#include <asm/page.h>
#include <asm/vdso/timebase.h>
#include <asm/barrier.h>
#include <asm/unistd.h>
@@ -95,7 +94,7 @@ const struct vdso_data *__arch_get_vdso_data(void);
static __always_inline
const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd)
{
- return (void *)vd + PAGE_SIZE;
+ return (void *)vd + (1U << CONFIG_PAGE_SHIFT);
}
#endif
diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index 252d63942f34eb..5b3115a1985226 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -151,7 +151,7 @@ endif
endif
vdso-install-y += arch/riscv/kernel/vdso/vdso.so.dbg
-vdso-install-$(CONFIG_COMPAT) += arch/riscv/kernel/compat_vdso/compat_vdso.so.dbg:../compat_vdso/compat_vdso.so
+vdso-install-$(CONFIG_COMPAT) += arch/riscv/kernel/compat_vdso/compat_vdso.so.dbg
ifneq ($(CONFIG_XIP_KERNEL),y)
ifeq ($(CONFIG_RISCV_M_MODE)$(CONFIG_ARCH_CANAAN),yy)
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 97fcde30e2477d..9f8ea0e33eb104 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -593,6 +593,12 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
return ptep_test_and_clear_young(vma, address, ptep);
}
+#define pgprot_nx pgprot_nx
+static inline pgprot_t pgprot_nx(pgprot_t _prot)
+{
+ return __pgprot(pgprot_val(_prot) & ~_PAGE_EXEC);
+}
+
#define pgprot_noncached pgprot_noncached
static inline pgprot_t pgprot_noncached(pgprot_t _prot)
{
diff --git a/arch/riscv/include/asm/syscall_wrapper.h b/arch/riscv/include/asm/syscall_wrapper.h
index 980094c2e9761d..ac80216549ffa6 100644
--- a/arch/riscv/include/asm/syscall_wrapper.h
+++ b/arch/riscv/include/asm/syscall_wrapper.h
@@ -36,7 +36,8 @@ asmlinkage long __riscv_sys_ni_syscall(const struct pt_regs *);
ulong) \
__attribute__((alias(__stringify(___se_##prefix##name)))); \
__diag_pop(); \
- static long noinline ___se_##prefix##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \
+ static long noinline ___se_##prefix##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \
+ __used; \
static long ___se_##prefix##name(__MAP(x,__SC_LONG,__VA_ARGS__))
#define SC_RISCV_REGS_TO_ARGS(x, ...) \
diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h
index ec0cab9fbddd0d..72ec1d9bd3f312 100644
--- a/arch/riscv/include/asm/uaccess.h
+++ b/arch/riscv/include/asm/uaccess.h
@@ -319,7 +319,7 @@ unsigned long __must_check clear_user(void __user *to, unsigned long n)
#define __get_kernel_nofault(dst, src, type, err_label) \
do { \
- long __kr_err; \
+ long __kr_err = 0; \
\
__get_user_nocheck(*((type *)(dst)), (type *)(src), __kr_err); \
if (unlikely(__kr_err)) \
@@ -328,7 +328,7 @@ do { \
#define __put_kernel_nofault(dst, src, type, err_label) \
do { \
- long __kr_err; \
+ long __kr_err = 0; \
\
__put_user_nocheck(*((type *)(src)), (type *)(dst), __kr_err); \
if (unlikely(__kr_err)) \
diff --git a/arch/riscv/include/uapi/asm/auxvec.h b/arch/riscv/include/uapi/asm/auxvec.h
index 10aaa83db89ef7..95050ebe9ad00b 100644
--- a/arch/riscv/include/uapi/asm/auxvec.h
+++ b/arch/riscv/include/uapi/asm/auxvec.h
@@ -34,7 +34,7 @@
#define AT_L3_CACHEGEOMETRY 47
/* entries in ARCH_DLINFO */
-#define AT_VECTOR_SIZE_ARCH 9
+#define AT_VECTOR_SIZE_ARCH 10
#define AT_MINSIGSTKSZ 51
#endif /* _UAPI_ASM_RISCV_AUXVEC_H */
diff --git a/arch/riscv/kernel/compat_vdso/Makefile b/arch/riscv/kernel/compat_vdso/Makefile
index 62fa393b2eb2ea..3df4cb788c1fa4 100644
--- a/arch/riscv/kernel/compat_vdso/Makefile
+++ b/arch/riscv/kernel/compat_vdso/Makefile
@@ -74,5 +74,5 @@ quiet_cmd_compat_vdsold = VDSOLD $@
rm $@.tmp
# actual build commands
-quiet_cmd_compat_vdsoas = VDSOAS $@
+quiet_cmd_compat_vdsoas = VDSOAS $@
cmd_compat_vdsoas = $(COMPAT_CC) $(a_flags) $(COMPAT_CC_FLAGS) -c -o $@ $<
diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c
index 37e87fdcf6a000..30e12b310cab73 100644
--- a/arch/riscv/kernel/patch.c
+++ b/arch/riscv/kernel/patch.c
@@ -80,6 +80,8 @@ static int __patch_insn_set(void *addr, u8 c, size_t len)
*/
lockdep_assert_held(&text_mutex);
+ preempt_disable();
+
if (across_pages)
patch_map(addr + PAGE_SIZE, FIX_TEXT_POKE1);
@@ -92,6 +94,8 @@ static int __patch_insn_set(void *addr, u8 c, size_t len)
if (across_pages)
patch_unmap(FIX_TEXT_POKE1);
+ preempt_enable();
+
return 0;
}
NOKPROBE_SYMBOL(__patch_insn_set);
@@ -122,6 +126,8 @@ static int __patch_insn_write(void *addr, const void *insn, size_t len)
if (!riscv_patch_in_stop_machine)
lockdep_assert_held(&text_mutex);
+ preempt_disable();
+
if (across_pages)
patch_map(addr + PAGE_SIZE, FIX_TEXT_POKE1);
@@ -134,6 +140,8 @@ static int __patch_insn_write(void *addr, const void *insn, size_t len)
if (across_pages)
patch_unmap(FIX_TEXT_POKE1);
+ preempt_enable();
+
return ret;
}
NOKPROBE_SYMBOL(__patch_insn_write);
diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c
index 92922dbd5b5c1f..e4bc61c4e58af9 100644
--- a/arch/riscv/kernel/process.c
+++ b/arch/riscv/kernel/process.c
@@ -27,8 +27,6 @@
#include <asm/vector.h>
#include <asm/cpufeature.h>
-register unsigned long gp_in_global __asm__("gp");
-
#if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_STACKPROTECTOR_PER_TASK)
#include <linux/stackprotector.h>
unsigned long __stack_chk_guard __read_mostly;
@@ -37,7 +35,7 @@ EXPORT_SYMBOL(__stack_chk_guard);
extern asmlinkage void ret_from_fork(void);
-void arch_cpu_idle(void)
+void noinstr arch_cpu_idle(void)
{
cpu_do_idle();
}
@@ -207,7 +205,6 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
if (unlikely(args->fn)) {
/* Kernel thread */
memset(childregs, 0, sizeof(struct pt_regs));
- childregs->gp = gp_in_global;
/* Supervisor/Machine, irqs on: */
childregs->status = SR_PP | SR_PIE;
diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c
index 501e66debf6972..5a2edd7f027e5d 100644
--- a/arch/riscv/kernel/signal.c
+++ b/arch/riscv/kernel/signal.c
@@ -119,6 +119,13 @@ static long __restore_v_state(struct pt_regs *regs, void __user *sc_vec)
struct __sc_riscv_v_state __user *state = sc_vec;
void __user *datap;
+ /*
+ * Mark the vstate as clean prior performing the actual copy,
+ * to avoid getting the vstate incorrectly clobbered by the
+ * discarded vector state.
+ */
+ riscv_v_vstate_set_restore(current, regs);
+
/* Copy everything of __sc_riscv_v_state except datap. */
err = __copy_from_user(&current->thread.vstate, &state->v_state,
offsetof(struct __riscv_v_ext_state, datap));
@@ -133,13 +140,7 @@ static long __restore_v_state(struct pt_regs *regs, void __user *sc_vec)
* Copy the whole vector content from user space datap. Use
* copy_from_user to prevent information leak.
*/
- err = copy_from_user(current->thread.vstate.datap, datap, riscv_v_vsize);
- if (unlikely(err))
- return err;
-
- riscv_v_vstate_set_restore(current, regs);
-
- return err;
+ return copy_from_user(current->thread.vstate.datap, datap, riscv_v_vsize);
}
#else
#define save_v_state(task, regs) (0)
diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
index 868d6280cf667e..05a16b1f0aee85 100644
--- a/arch/riscv/kernel/traps.c
+++ b/arch/riscv/kernel/traps.c
@@ -122,7 +122,7 @@ void do_trap(struct pt_regs *regs, int signo, int code, unsigned long addr)
print_vma_addr(KERN_CONT " in ", instruction_pointer(regs));
pr_cont("\n");
__show_regs(regs);
- dump_instr(KERN_EMERG, regs);
+ dump_instr(KERN_INFO, regs);
}
force_sig_fault(signo, code, (void __user *)addr);
diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile
index 9b517fe1b8a8ec..272c431ac5b9f8 100644
--- a/arch/riscv/kernel/vdso/Makefile
+++ b/arch/riscv/kernel/vdso/Makefile
@@ -37,6 +37,7 @@ endif
# Disable -pg to prevent insert call site
CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS)
+CFLAGS_REMOVE_hwprobe.o = $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS)
# Disable profiling and instrumentation for VDSO code
GCOV_PROFILE := n
diff --git a/arch/riscv/kvm/aia_aplic.c b/arch/riscv/kvm/aia_aplic.c
index 39e72aa016a4cc..b467ba5ed91000 100644
--- a/arch/riscv/kvm/aia_aplic.c
+++ b/arch/riscv/kvm/aia_aplic.c
@@ -137,11 +137,21 @@ static void aplic_write_pending(struct aplic *aplic, u32 irq, bool pending)
raw_spin_lock_irqsave(&irqd->lock, flags);
sm = irqd->sourcecfg & APLIC_SOURCECFG_SM_MASK;
- if (!pending &&
- ((sm == APLIC_SOURCECFG_SM_LEVEL_HIGH) ||
- (sm == APLIC_SOURCECFG_SM_LEVEL_LOW)))
+ if (sm == APLIC_SOURCECFG_SM_INACTIVE)
goto skip_write_pending;
+ if (sm == APLIC_SOURCECFG_SM_LEVEL_HIGH ||
+ sm == APLIC_SOURCECFG_SM_LEVEL_LOW) {
+ if (!pending)
+ goto skip_write_pending;
+ if ((irqd->state & APLIC_IRQ_STATE_INPUT) &&
+ sm == APLIC_SOURCECFG_SM_LEVEL_LOW)
+ goto skip_write_pending;
+ if (!(irqd->state & APLIC_IRQ_STATE_INPUT) &&
+ sm == APLIC_SOURCECFG_SM_LEVEL_HIGH)
+ goto skip_write_pending;
+ }
+
if (pending)
irqd->state |= APLIC_IRQ_STATE_PENDING;
else
@@ -187,16 +197,31 @@ static void aplic_write_enabled(struct aplic *aplic, u32 irq, bool enabled)
static bool aplic_read_input(struct aplic *aplic, u32 irq)
{
- bool ret;
- unsigned long flags;
+ u32 sourcecfg, sm, raw_input, irq_inverted;
struct aplic_irq *irqd;
+ unsigned long flags;
+ bool ret = false;
if (!irq || aplic->nr_irqs <= irq)
return false;
irqd = &aplic->irqs[irq];
raw_spin_lock_irqsave(&irqd->lock, flags);
- ret = (irqd->state & APLIC_IRQ_STATE_INPUT) ? true : false;
+
+ sourcecfg = irqd->sourcecfg;
+ if (sourcecfg & APLIC_SOURCECFG_D)
+ goto skip;
+
+ sm = sourcecfg & APLIC_SOURCECFG_SM_MASK;
+ if (sm == APLIC_SOURCECFG_SM_INACTIVE)
+ goto skip;
+
+ raw_input = (irqd->state & APLIC_IRQ_STATE_INPUT) ? 1 : 0;
+ irq_inverted = (sm == APLIC_SOURCECFG_SM_LEVEL_LOW ||
+ sm == APLIC_SOURCECFG_SM_EDGE_FALL) ? 1 : 0;
+ ret = !!(raw_input ^ irq_inverted);
+
+skip:
raw_spin_unlock_irqrestore(&irqd->lock, flags);
return ret;
diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c
index f4a6124d25c939..994adc26db4b10 100644
--- a/arch/riscv/kvm/vcpu_onereg.c
+++ b/arch/riscv/kvm/vcpu_onereg.c
@@ -986,7 +986,7 @@ static int copy_isa_ext_reg_indices(const struct kvm_vcpu *vcpu,
static inline unsigned long num_isa_ext_regs(const struct kvm_vcpu *vcpu)
{
- return copy_isa_ext_reg_indices(vcpu, NULL);;
+ return copy_isa_ext_reg_indices(vcpu, NULL);
}
static int copy_sbi_ext_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c
index 893566e004b73f..07d743f87b3f69 100644
--- a/arch/riscv/mm/tlbflush.c
+++ b/arch/riscv/mm/tlbflush.c
@@ -99,7 +99,7 @@ static void __ipi_flush_tlb_range_asid(void *info)
local_flush_tlb_range_asid(d->start, d->size, d->stride, d->asid);
}
-static void __flush_tlb_range(struct cpumask *cmask, unsigned long asid,
+static void __flush_tlb_range(const struct cpumask *cmask, unsigned long asid,
unsigned long start, unsigned long size,
unsigned long stride)
{
@@ -200,7 +200,7 @@ void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
void flush_tlb_kernel_range(unsigned long start, unsigned long end)
{
- __flush_tlb_range((struct cpumask *)cpu_online_mask, FLUSH_TLB_NO_ASID,
+ __flush_tlb_range(cpu_online_mask, FLUSH_TLB_NO_ASID,
start, end - start, PAGE_SIZE);
}
diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c
index aac19008547241..1adf2f39ce59cb 100644
--- a/arch/riscv/net/bpf_jit_comp64.c
+++ b/arch/riscv/net/bpf_jit_comp64.c
@@ -1463,6 +1463,22 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
if (ret < 0)
return ret;
+ if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
+ const struct btf_func_model *fm;
+ int idx;
+
+ fm = bpf_jit_find_kfunc_model(ctx->prog, insn);
+ if (!fm)
+ return -EINVAL;
+
+ for (idx = 0; idx < fm->nr_args; idx++) {
+ u8 reg = bpf_to_rv_reg(BPF_REG_1 + idx, ctx);
+
+ if (fm->arg_size[idx] == sizeof(int))
+ emit_sextw(reg, reg, ctx);
+ }
+ }
+
ret = emit_call(addr, fixed_addr, ctx);
if (ret)
return ret;
diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h
index 7138d189cc420a..0c4cad7d5a5b11 100644
--- a/arch/s390/include/asm/atomic.h
+++ b/arch/s390/include/asm/atomic.h
@@ -15,31 +15,31 @@
#include <asm/barrier.h>
#include <asm/cmpxchg.h>
-static inline int arch_atomic_read(const atomic_t *v)
+static __always_inline int arch_atomic_read(const atomic_t *v)
{
return __atomic_read(v);
}
#define arch_atomic_read arch_atomic_read
-static inline void arch_atomic_set(atomic_t *v, int i)
+static __always_inline void arch_atomic_set(atomic_t *v, int i)
{
__atomic_set(v, i);
}
#define arch_atomic_set arch_atomic_set
-static inline int arch_atomic_add_return(int i, atomic_t *v)
+static __always_inline int arch_atomic_add_return(int i, atomic_t *v)
{
return __atomic_add_barrier(i, &v->counter) + i;
}
#define arch_atomic_add_return arch_atomic_add_return
-static inline int arch_atomic_fetch_add(int i, atomic_t *v)
+static __always_inline int arch_atomic_fetch_add(int i, atomic_t *v)
{
return __atomic_add_barrier(i, &v->counter);
}
#define arch_atomic_fetch_add arch_atomic_fetch_add
-static inline void arch_atomic_add(int i, atomic_t *v)
+static __always_inline void arch_atomic_add(int i, atomic_t *v)
{
__atomic_add(i, &v->counter);
}
@@ -50,11 +50,11 @@ static inline void arch_atomic_add(int i, atomic_t *v)
#define arch_atomic_fetch_sub(_i, _v) arch_atomic_fetch_add(-(int)(_i), _v)
#define ATOMIC_OPS(op) \
-static inline void arch_atomic_##op(int i, atomic_t *v) \
+static __always_inline void arch_atomic_##op(int i, atomic_t *v) \
{ \
__atomic_##op(i, &v->counter); \
} \
-static inline int arch_atomic_fetch_##op(int i, atomic_t *v) \
+static __always_inline int arch_atomic_fetch_##op(int i, atomic_t *v) \
{ \
return __atomic_##op##_barrier(i, &v->counter); \
}
@@ -74,7 +74,7 @@ ATOMIC_OPS(xor)
#define arch_atomic_xchg(v, new) (arch_xchg(&((v)->counter), new))
-static inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new)
+static __always_inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new)
{
return __atomic_cmpxchg(&v->counter, old, new);
}
@@ -82,31 +82,31 @@ static inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new)
#define ATOMIC64_INIT(i) { (i) }
-static inline s64 arch_atomic64_read(const atomic64_t *v)
+static __always_inline s64 arch_atomic64_read(const atomic64_t *v)
{
return __atomic64_read(v);
}
#define arch_atomic64_read arch_atomic64_read
-static inline void arch_atomic64_set(atomic64_t *v, s64 i)
+static __always_inline void arch_atomic64_set(atomic64_t *v, s64 i)
{
__atomic64_set(v, i);
}
#define arch_atomic64_set arch_atomic64_set
-static inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v)
+static __always_inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v)
{
return __atomic64_add_barrier(i, (long *)&v->counter) + i;
}
#define arch_atomic64_add_return arch_atomic64_add_return
-static inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v)
+static __always_inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v)
{
return __atomic64_add_barrier(i, (long *)&v->counter);
}
#define arch_atomic64_fetch_add arch_atomic64_fetch_add
-static inline void arch_atomic64_add(s64 i, atomic64_t *v)
+static __always_inline void arch_atomic64_add(s64 i, atomic64_t *v)
{
__atomic64_add(i, (long *)&v->counter);
}
@@ -114,20 +114,20 @@ static inline void arch_atomic64_add(s64 i, atomic64_t *v)
#define arch_atomic64_xchg(v, new) (arch_xchg(&((v)->counter), new))
-static inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
+static __always_inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
{
return __atomic64_cmpxchg((long *)&v->counter, old, new);
}
#define arch_atomic64_cmpxchg arch_atomic64_cmpxchg
-#define ATOMIC64_OPS(op) \
-static inline void arch_atomic64_##op(s64 i, atomic64_t *v) \
-{ \
- __atomic64_##op(i, (long *)&v->counter); \
-} \
-static inline long arch_atomic64_fetch_##op(s64 i, atomic64_t *v) \
-{ \
- return __atomic64_##op##_barrier(i, (long *)&v->counter); \
+#define ATOMIC64_OPS(op) \
+static __always_inline void arch_atomic64_##op(s64 i, atomic64_t *v) \
+{ \
+ __atomic64_##op(i, (long *)&v->counter); \
+} \
+static __always_inline long arch_atomic64_fetch_##op(s64 i, atomic64_t *v) \
+{ \
+ return __atomic64_##op##_barrier(i, (long *)&v->counter); \
}
ATOMIC64_OPS(and)
diff --git a/arch/s390/include/asm/atomic_ops.h b/arch/s390/include/asm/atomic_ops.h
index 50510e08b893b5..7fa5f96a553a47 100644
--- a/arch/s390/include/asm/atomic_ops.h
+++ b/arch/s390/include/asm/atomic_ops.h
@@ -8,7 +8,7 @@
#ifndef __ARCH_S390_ATOMIC_OPS__
#define __ARCH_S390_ATOMIC_OPS__
-static inline int __atomic_read(const atomic_t *v)
+static __always_inline int __atomic_read(const atomic_t *v)
{
int c;
@@ -18,14 +18,14 @@ static inline int __atomic_read(const atomic_t *v)
return c;
}
-static inline void __atomic_set(atomic_t *v, int i)
+static __always_inline void __atomic_set(atomic_t *v, int i)
{
asm volatile(
" st %1,%0\n"
: "=R" (v->counter) : "d" (i));
}
-static inline s64 __atomic64_read(const atomic64_t *v)
+static __always_inline s64 __atomic64_read(const atomic64_t *v)
{
s64 c;
@@ -35,7 +35,7 @@ static inline s64 __atomic64_read(const atomic64_t *v)
return c;
}
-static inline void __atomic64_set(atomic64_t *v, s64 i)
+static __always_inline void __atomic64_set(atomic64_t *v, s64 i)
{
asm volatile(
" stg %1,%0\n"
@@ -45,7 +45,7 @@ static inline void __atomic64_set(atomic64_t *v, s64 i)
#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
#define __ATOMIC_OP(op_name, op_type, op_string, op_barrier) \
-static inline op_type op_name(op_type val, op_type *ptr) \
+static __always_inline op_type op_name(op_type val, op_type *ptr) \
{ \
op_type old; \
\
@@ -96,7 +96,7 @@ __ATOMIC_CONST_OPS(__atomic64_add_const, long, "agsi")
#else /* CONFIG_HAVE_MARCH_Z196_FEATURES */
#define __ATOMIC_OP(op_name, op_string) \
-static inline int op_name(int val, int *ptr) \
+static __always_inline int op_name(int val, int *ptr) \
{ \
int old, new; \
\
@@ -122,7 +122,7 @@ __ATOMIC_OPS(__atomic_xor, "xr")
#undef __ATOMIC_OPS
#define __ATOMIC64_OP(op_name, op_string) \
-static inline long op_name(long val, long *ptr) \
+static __always_inline long op_name(long val, long *ptr) \
{ \
long old, new; \
\
@@ -154,7 +154,7 @@ __ATOMIC64_OPS(__atomic64_xor, "xgr")
#endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */
-static inline int __atomic_cmpxchg(int *ptr, int old, int new)
+static __always_inline int __atomic_cmpxchg(int *ptr, int old, int new)
{
asm volatile(
" cs %[old],%[new],%[ptr]"
@@ -164,7 +164,7 @@ static inline int __atomic_cmpxchg(int *ptr, int old, int new)
return old;
}
-static inline bool __atomic_cmpxchg_bool(int *ptr, int old, int new)
+static __always_inline bool __atomic_cmpxchg_bool(int *ptr, int old, int new)
{
int old_expected = old;
@@ -176,7 +176,7 @@ static inline bool __atomic_cmpxchg_bool(int *ptr, int old, int new)
return old == old_expected;
}
-static inline long __atomic64_cmpxchg(long *ptr, long old, long new)
+static __always_inline long __atomic64_cmpxchg(long *ptr, long old, long new)
{
asm volatile(
" csg %[old],%[new],%[ptr]"
@@ -186,7 +186,7 @@ static inline long __atomic64_cmpxchg(long *ptr, long old, long new)
return old;
}
-static inline bool __atomic64_cmpxchg_bool(long *ptr, long old, long new)
+static __always_inline bool __atomic64_cmpxchg_bool(long *ptr, long old, long new)
{
long old_expected = old;
diff --git a/arch/s390/include/asm/preempt.h b/arch/s390/include/asm/preempt.h
index bf15da0fedbca5..0e3da500e98c19 100644
--- a/arch/s390/include/asm/preempt.h
+++ b/arch/s390/include/asm/preempt.h
@@ -12,12 +12,12 @@
#define PREEMPT_NEED_RESCHED 0x80000000
#define PREEMPT_ENABLED (0 + PREEMPT_NEED_RESCHED)
-static inline int preempt_count(void)
+static __always_inline int preempt_count(void)
{
return READ_ONCE(S390_lowcore.preempt_count) & ~PREEMPT_NEED_RESCHED;
}
-static inline void preempt_count_set(int pc)
+static __always_inline void preempt_count_set(int pc)
{
int old, new;
@@ -29,22 +29,22 @@ static inline void preempt_count_set(int pc)
old, new) != old);
}
-static inline void set_preempt_need_resched(void)
+static __always_inline void set_preempt_need_resched(void)
{
__atomic_and(~PREEMPT_NEED_RESCHED, &S390_lowcore.preempt_count);
}
-static inline void clear_preempt_need_resched(void)
+static __always_inline void clear_preempt_need_resched(void)
{
__atomic_or(PREEMPT_NEED_RESCHED, &S390_lowcore.preempt_count);
}
-static inline bool test_preempt_need_resched(void)
+static __always_inline bool test_preempt_need_resched(void)
{
return !(READ_ONCE(S390_lowcore.preempt_count) & PREEMPT_NEED_RESCHED);
}
-static inline void __preempt_count_add(int val)
+static __always_inline void __preempt_count_add(int val)
{
/*
* With some obscure config options and CONFIG_PROFILE_ALL_BRANCHES
@@ -59,17 +59,17 @@ static inline void __preempt_count_add(int val)
__atomic_add(val, &S390_lowcore.preempt_count);
}
-static inline void __preempt_count_sub(int val)
+static __always_inline void __preempt_count_sub(int val)
{
__preempt_count_add(-val);
}
-static inline bool __preempt_count_dec_and_test(void)
+static __always_inline bool __preempt_count_dec_and_test(void)
{
return __atomic_add(-1, &S390_lowcore.preempt_count) == 1;
}
-static inline bool should_resched(int preempt_offset)
+static __always_inline bool should_resched(int preempt_offset)
{
return unlikely(READ_ONCE(S390_lowcore.preempt_count) ==
preempt_offset);
@@ -79,45 +79,45 @@ static inline bool should_resched(int preempt_offset)
#define PREEMPT_ENABLED (0)
-static inline int preempt_count(void)
+static __always_inline int preempt_count(void)
{
return READ_ONCE(S390_lowcore.preempt_count);
}
-static inline void preempt_count_set(int pc)
+static __always_inline void preempt_count_set(int pc)
{
S390_lowcore.preempt_count = pc;
}
-static inline void set_preempt_need_resched(void)
+static __always_inline void set_preempt_need_resched(void)
{
}
-static inline void clear_preempt_need_resched(void)
+static __always_inline void clear_preempt_need_resched(void)
{
}
-static inline bool test_preempt_need_resched(void)
+static __always_inline bool test_preempt_need_resched(void)
{
return false;
}
-static inline void __preempt_count_add(int val)
+static __always_inline void __preempt_count_add(int val)
{
S390_lowcore.preempt_count += val;
}
-static inline void __preempt_count_sub(int val)
+static __always_inline void __preempt_count_sub(int val)
{
S390_lowcore.preempt_count -= val;
}
-static inline bool __preempt_count_dec_and_test(void)
+static __always_inline bool __preempt_count_dec_and_test(void)
{
return !--S390_lowcore.preempt_count && tif_need_resched();
}
-static inline bool should_resched(int preempt_offset)
+static __always_inline bool should_resched(int preempt_offset)
{
return unlikely(preempt_count() == preempt_offset &&
tif_need_resched());
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 787394978bc0f8..6a1e0fbbaa15b3 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -340,7 +340,8 @@ SYM_CODE_START(pgm_check_handler)
mvc __PT_LAST_BREAK(8,%r11),__LC_PGM_LAST_BREAK
stctg %c1,%c1,__PT_CR1(%r11)
#if IS_ENABLED(CONFIG_KVM)
- lg %r12,__LC_GMAP
+ ltg %r12,__LC_GMAP
+ jz 5f
clc __GMAP_ASCE(8,%r12), __PT_CR1(%r11)
jne 5f
BPENTER __SF_SIE_FLAGS(%r10),_TIF_ISOLATE_BP_GUEST
@@ -635,6 +636,7 @@ SYM_DATA_START_LOCAL(daton_psw)
SYM_DATA_END(daton_psw)
.section .rodata, "a"
+ .balign 8
#define SYSCALL(esame,emu) .quad __s390x_ ## esame
SYM_DATA_START(sys_call_table)
#include "asm/syscall_table.h"
diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c
index 823d652e3917f8..4ad472d130a3c0 100644
--- a/arch/s390/kernel/perf_pai_crypto.c
+++ b/arch/s390/kernel/perf_pai_crypto.c
@@ -90,7 +90,6 @@ static void paicrypt_event_destroy(struct perf_event *event)
event->cpu);
struct paicrypt_map *cpump = mp->mapptr;
- cpump->event = NULL;
static_branch_dec(&pai_key);
mutex_lock(&pai_reserve_mutex);
debug_sprintf_event(cfm_dbg, 5, "%s event %#llx cpu %d users %d"
@@ -356,10 +355,15 @@ static int paicrypt_add(struct perf_event *event, int flags)
static void paicrypt_stop(struct perf_event *event, int flags)
{
- if (!event->attr.sample_period) /* Counting */
+ struct paicrypt_mapptr *mp = this_cpu_ptr(paicrypt_root.mapptr);
+ struct paicrypt_map *cpump = mp->mapptr;
+
+ if (!event->attr.sample_period) { /* Counting */
paicrypt_read(event);
- else /* Sampling */
+ } else { /* Sampling */
perf_sched_cb_dec(event->pmu);
+ cpump->event = NULL;
+ }
event->hw.state = PERF_HES_STOPPED;
}
diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c
index 616a25606cd63d..a6da7e0cc7a66d 100644
--- a/arch/s390/kernel/perf_pai_ext.c
+++ b/arch/s390/kernel/perf_pai_ext.c
@@ -122,7 +122,6 @@ static void paiext_event_destroy(struct perf_event *event)
free_page(PAI_SAVE_AREA(event));
mutex_lock(&paiext_reserve_mutex);
- cpump->event = NULL;
if (refcount_dec_and_test(&cpump->refcnt)) /* Last reference gone */
paiext_free(mp);
paiext_root_free();
@@ -362,10 +361,15 @@ static int paiext_add(struct perf_event *event, int flags)
static void paiext_stop(struct perf_event *event, int flags)
{
- if (!event->attr.sample_period) /* Counting */
+ struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
+ struct paiext_map *cpump = mp->mapptr;
+
+ if (!event->attr.sample_period) { /* Counting */
paiext_read(event);
- else /* Sampling */
+ } else { /* Sampling */
perf_sched_cb_dec(event->pmu);
+ cpump->event = NULL;
+ }
event->hw.state = PERF_HES_STOPPED;
}
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index c421dd44ffbe03..0c66b32e0f9f1b 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -75,7 +75,7 @@ static enum fault_type get_fault_type(struct pt_regs *regs)
if (!IS_ENABLED(CONFIG_PGSTE))
return KERNEL_FAULT;
gmap = (struct gmap *)S390_lowcore.gmap;
- if (regs->cr1 == gmap->asce)
+ if (gmap && gmap->asce == regs->cr1)
return GMAP_FAULT;
return KERNEL_FAULT;
}
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index b418333bb08635..5af0402e94b88c 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -516,11 +516,12 @@ static void bpf_skip(struct bpf_jit *jit, int size)
* PLT for hotpatchable calls. The calling convention is the same as for the
* ftrace hotpatch trampolines: %r0 is return address, %r1 is clobbered.
*/
-extern const char bpf_plt[];
-extern const char bpf_plt_ret[];
-extern const char bpf_plt_target[];
-extern const char bpf_plt_end[];
-#define BPF_PLT_SIZE 32
+struct bpf_plt {
+ char code[16];
+ void *ret;
+ void *target;
+} __packed;
+extern const struct bpf_plt bpf_plt;
asm(
".pushsection .rodata\n"
" .balign 8\n"
@@ -531,15 +532,14 @@ asm(
" .balign 8\n"
"bpf_plt_ret: .quad 0\n"
"bpf_plt_target: .quad 0\n"
- "bpf_plt_end:\n"
" .popsection\n"
);
-static void bpf_jit_plt(void *plt, void *ret, void *target)
+static void bpf_jit_plt(struct bpf_plt *plt, void *ret, void *target)
{
- memcpy(plt, bpf_plt, BPF_PLT_SIZE);
- *(void **)((char *)plt + (bpf_plt_ret - bpf_plt)) = ret;
- *(void **)((char *)plt + (bpf_plt_target - bpf_plt)) = target ?: ret;
+ memcpy(plt, &bpf_plt, sizeof(*plt));
+ plt->ret = ret;
+ plt->target = target;
}
/*
@@ -662,9 +662,9 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth)
jit->prg = ALIGN(jit->prg, 8);
jit->prologue_plt = jit->prg;
if (jit->prg_buf)
- bpf_jit_plt(jit->prg_buf + jit->prg,
+ bpf_jit_plt((struct bpf_plt *)(jit->prg_buf + jit->prg),
jit->prg_buf + jit->prologue_plt_ret, NULL);
- jit->prg += BPF_PLT_SIZE;
+ jit->prg += sizeof(struct bpf_plt);
}
static int get_probe_mem_regno(const u8 *insn)
@@ -2040,9 +2040,6 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
struct bpf_jit jit;
int pass;
- if (WARN_ON_ONCE(bpf_plt_end - bpf_plt != BPF_PLT_SIZE))
- return orig_fp;
-
if (!fp->jit_requested)
return orig_fp;
@@ -2148,14 +2145,11 @@ bool bpf_jit_supports_far_kfunc_call(void)
int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
void *old_addr, void *new_addr)
{
+ struct bpf_plt expected_plt, current_plt, new_plt, *plt;
struct {
u16 opc;
s32 disp;
} __packed insn;
- char expected_plt[BPF_PLT_SIZE];
- char current_plt[BPF_PLT_SIZE];
- char new_plt[BPF_PLT_SIZE];
- char *plt;
char *ret;
int err;
@@ -2174,18 +2168,18 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
*/
} else {
/* Verify the PLT. */
- plt = (char *)ip + (insn.disp << 1);
- err = copy_from_kernel_nofault(current_plt, plt, BPF_PLT_SIZE);
+ plt = ip + (insn.disp << 1);
+ err = copy_from_kernel_nofault(&current_plt, plt,
+ sizeof(current_plt));
if (err < 0)
return err;
ret = (char *)ip + 6;
- bpf_jit_plt(expected_plt, ret, old_addr);
- if (memcmp(current_plt, expected_plt, BPF_PLT_SIZE))
+ bpf_jit_plt(&expected_plt, ret, old_addr);
+ if (memcmp(&current_plt, &expected_plt, sizeof(current_plt)))
return -EINVAL;
/* Adjust the call address. */
- bpf_jit_plt(new_plt, ret, new_addr);
- s390_kernel_write(plt + (bpf_plt_target - bpf_plt),
- new_plt + (bpf_plt_target - bpf_plt),
+ bpf_jit_plt(&new_plt, ret, new_addr);
+ s390_kernel_write(&plt->target, &new_plt.target,
sizeof(void *));
}
diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild
index 6a1f36df6a1817..cf0ad89f5639da 100644
--- a/arch/x86/Kbuild
+++ b/arch/x86/Kbuild
@@ -28,7 +28,7 @@ obj-y += net/
obj-$(CONFIG_KEXEC_FILE) += purgatory/
-obj-y += virt/svm/
+obj-y += virt/
# for cleaning
subdir- += boot tools
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 39886bab943a88..4474bf32d0a497 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2439,6 +2439,8 @@ config USE_X86_SEG_SUPPORT
# with named address spaces - see GCC PR sanitizer/111736.
#
depends on !KASAN
+ # -fsanitize=thread (KCSAN) is also incompatible.
+ depends on !KCSAN
config CC_HAS_SLS
def_bool $(cc-option,-mharden-sls=all)
@@ -2631,6 +2633,16 @@ config MITIGATION_RFDS
stored in floating point, vector and integer registers.
See also <file:Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst>
+config MITIGATION_SPECTRE_BHI
+ bool "Mitigate Spectre-BHB (Branch History Injection)"
+ depends on CPU_SUP_INTEL
+ default y
+ help
+ Enable BHI mitigations. BHI attacks are a form of Spectre V2 attacks
+ where the branch history buffer is poisoned to speculatively steer
+ indirect branches.
+ See <file:Documentation/admin-guide/hw-vuln/spectre.rst>
+
endif
config ARCH_HAS_ADD_PAGES
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 662d9d4033e6b8..5ab93fcdd691dc 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -251,8 +251,6 @@ archheaders:
libs-y += arch/x86/lib/
-core-y += arch/x86/virt/
-
# drivers-y are linked after core-y
drivers-$(CONFIG_MATH_EMULATION) += arch/x86/math-emu/
drivers-$(CONFIG_PCI) += arch/x86/pci/
diff --git a/arch/x86/boot/compressed/efi_mixed.S b/arch/x86/boot/compressed/efi_mixed.S
index 719e939050cbfa..876fc6d46a1318 100644
--- a/arch/x86/boot/compressed/efi_mixed.S
+++ b/arch/x86/boot/compressed/efi_mixed.S
@@ -15,10 +15,12 @@
*/
#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
#include <asm/msr.h>
#include <asm/page_types.h>
#include <asm/processor-flags.h>
#include <asm/segment.h>
+#include <asm/setup.h>
.code64
.text
@@ -149,6 +151,7 @@ SYM_FUNC_END(__efi64_thunk)
SYM_FUNC_START(efi32_stub_entry)
call 1f
1: popl %ecx
+ leal (efi32_boot_args - 1b)(%ecx), %ebx
/* Clear BSS */
xorl %eax, %eax
@@ -163,6 +166,7 @@ SYM_FUNC_START(efi32_stub_entry)
popl %ecx
popl %edx
popl %esi
+ movl %esi, 8(%ebx)
jmp efi32_entry
SYM_FUNC_END(efi32_stub_entry)
#endif
@@ -239,8 +243,6 @@ SYM_FUNC_END(efi_enter32)
*
* Arguments: %ecx image handle
* %edx EFI system table pointer
- * %esi struct bootparams pointer (or NULL when not using
- * the EFI handover protocol)
*
* Since this is the point of no return for ordinary execution, no registers
* are considered live except for the function parameters. [Note that the EFI
@@ -266,9 +268,18 @@ SYM_FUNC_START_LOCAL(efi32_entry)
leal (efi32_boot_args - 1b)(%ebx), %ebx
movl %ecx, 0(%ebx)
movl %edx, 4(%ebx)
- movl %esi, 8(%ebx)
movb $0x0, 12(%ebx) // efi_is64
+ /*
+ * Allocate some memory for a temporary struct boot_params, which only
+ * needs the minimal pieces that startup_32() relies on.
+ */
+ subl $PARAM_SIZE, %esp
+ movl %esp, %esi
+ movl $PAGE_SIZE, BP_kernel_alignment(%esi)
+ movl $_end - 1b, BP_init_size(%esi)
+ subl $startup_32 - 1b, BP_init_size(%esi)
+
/* Disable paging */
movl %cr0, %eax
btrl $X86_CR0_PG_BIT, %eax
@@ -294,8 +305,7 @@ SYM_FUNC_START(efi32_pe_entry)
movl 8(%ebp), %ecx // image_handle
movl 12(%ebp), %edx // sys_table
- xorl %esi, %esi
- jmp efi32_entry // pass %ecx, %edx, %esi
+ jmp efi32_entry // pass %ecx, %edx
// no other registers remain live
2: popl %edi // restore callee-save registers
diff --git a/arch/x86/coco/core.c b/arch/x86/coco/core.c
index d07be9d05cd037..b31ef2424d194b 100644
--- a/arch/x86/coco/core.c
+++ b/arch/x86/coco/core.c
@@ -3,19 +3,28 @@
* Confidential Computing Platform Capability checks
*
* Copyright (C) 2021 Advanced Micro Devices, Inc.
+ * Copyright (C) 2024 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*
* Author: Tom Lendacky <thomas.lendacky@amd.com>
*/
#include <linux/export.h>
#include <linux/cc_platform.h>
+#include <linux/string.h>
+#include <linux/random.h>
+#include <asm/archrandom.h>
#include <asm/coco.h>
#include <asm/processor.h>
enum cc_vendor cc_vendor __ro_after_init = CC_VENDOR_NONE;
u64 cc_mask __ro_after_init;
+static struct cc_attr_flags {
+ __u64 host_sev_snp : 1,
+ __resv : 63;
+} cc_flags;
+
static bool noinstr intel_cc_platform_has(enum cc_attr attr)
{
switch (attr) {
@@ -89,6 +98,9 @@ static bool noinstr amd_cc_platform_has(enum cc_attr attr)
case CC_ATTR_GUEST_SEV_SNP:
return sev_status & MSR_AMD64_SEV_SNP_ENABLED;
+ case CC_ATTR_HOST_SEV_SNP:
+ return cc_flags.host_sev_snp;
+
default:
return false;
}
@@ -148,3 +160,84 @@ u64 cc_mkdec(u64 val)
}
}
EXPORT_SYMBOL_GPL(cc_mkdec);
+
+static void amd_cc_platform_clear(enum cc_attr attr)
+{
+ switch (attr) {
+ case CC_ATTR_HOST_SEV_SNP:
+ cc_flags.host_sev_snp = 0;
+ break;
+ default:
+ break;
+ }
+}
+
+void cc_platform_clear(enum cc_attr attr)
+{
+ switch (cc_vendor) {
+ case CC_VENDOR_AMD:
+ amd_cc_platform_clear(attr);
+ break;
+ default:
+ break;
+ }
+}
+
+static void amd_cc_platform_set(enum cc_attr attr)
+{
+ switch (attr) {
+ case CC_ATTR_HOST_SEV_SNP:
+ cc_flags.host_sev_snp = 1;
+ break;
+ default:
+ break;
+ }
+}
+
+void cc_platform_set(enum cc_attr attr)
+{
+ switch (cc_vendor) {
+ case CC_VENDOR_AMD:
+ amd_cc_platform_set(attr);
+ break;
+ default:
+ break;
+ }
+}
+
+__init void cc_random_init(void)
+{
+ /*
+ * The seed is 32 bytes (in units of longs), which is 256 bits, which
+ * is the security level that the RNG is targeting.
+ */
+ unsigned long rng_seed[32 / sizeof(long)];
+ size_t i, longs;
+
+ if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
+ return;
+
+ /*
+ * Since the CoCo threat model includes the host, the only reliable
+ * source of entropy that can be neither observed nor manipulated is
+ * RDRAND. Usually, RDRAND failure is considered tolerable, but since
+ * CoCo guests have no other unobservable source of entropy, it's
+ * important to at least ensure the RNG gets some initial random seeds.
+ */
+ for (i = 0; i < ARRAY_SIZE(rng_seed); i += longs) {
+ longs = arch_get_random_longs(&rng_seed[i], ARRAY_SIZE(rng_seed) - i);
+
+ /*
+ * A zero return value means that the guest doesn't have RDRAND
+ * or the CPU is physically broken, and in both cases that
+ * means most crypto inside of the CoCo instance will be
+ * broken, defeating the purpose of CoCo in the first place. So
+ * just panic here because it's absolutely unsafe to continue
+ * executing.
+ */
+ if (longs == 0)
+ panic("RDRAND is defective.");
+ }
+ add_device_randomness(rng_seed, sizeof(rng_seed));
+ memzero_explicit(rng_seed, sizeof(rng_seed));
+}
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 6356060caaf311..6de50b80702e61 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -49,7 +49,7 @@ static __always_inline bool do_syscall_x64(struct pt_regs *regs, int nr)
if (likely(unr < NR_syscalls)) {
unr = array_index_nospec(unr, NR_syscalls);
- regs->ax = sys_call_table[unr](regs);
+ regs->ax = x64_sys_call(regs, unr);
return true;
}
return false;
@@ -66,7 +66,7 @@ static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr)
if (IS_ENABLED(CONFIG_X86_X32_ABI) && likely(xnr < X32_NR_syscalls)) {
xnr = array_index_nospec(xnr, X32_NR_syscalls);
- regs->ax = x32_sys_call_table[xnr](regs);
+ regs->ax = x32_sys_call(regs, xnr);
return true;
}
return false;
@@ -162,7 +162,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs, int nr)
if (likely(unr < IA32_NR_syscalls)) {
unr = array_index_nospec(unr, IA32_NR_syscalls);
- regs->ax = ia32_sys_call_table[unr](regs);
+ regs->ax = ia32_sys_call(regs, unr);
} else if (nr != -1) {
regs->ax = __ia32_sys_ni_syscall(regs);
}
@@ -189,7 +189,7 @@ static __always_inline bool int80_is_external(void)
}
/**
- * int80_emulation - 32-bit legacy syscall entry
+ * do_int80_emulation - 32-bit legacy syscall C entry from asm
*
* This entry point can be used by 32-bit and 64-bit programs to perform
* 32-bit system calls. Instances of INT $0x80 can be found inline in
@@ -207,7 +207,7 @@ static __always_inline bool int80_is_external(void)
* eax: system call number
* ebx, ecx, edx, esi, edi, ebp: arg1 - arg 6
*/
-DEFINE_IDTENTRY_RAW(int80_emulation)
+__visible noinstr void do_int80_emulation(struct pt_regs *regs)
{
int nr;
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 8af2a26b24f6a9..1b5be07f86698a 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -116,6 +116,7 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL)
/* clobbers %rax, make sure it is after saving the syscall nr */
IBRS_ENTER
UNTRAIN_RET
+ CLEAR_BRANCH_HISTORY
call do_syscall_64 /* returns with IRQs disabled */
@@ -1491,3 +1492,63 @@ SYM_CODE_START_NOALIGN(rewind_stack_and_make_dead)
call make_task_dead
SYM_CODE_END(rewind_stack_and_make_dead)
.popsection
+
+/*
+ * This sequence executes branches in order to remove user branch information
+ * from the branch history tracker in the Branch Predictor, therefore removing
+ * user influence on subsequent BTB lookups.
+ *
+ * It should be used on parts prior to Alder Lake. Newer parts should use the
+ * BHI_DIS_S hardware control instead. If a pre-Alder Lake part is being
+ * virtualized on newer hardware the VMM should protect against BHI attacks by
+ * setting BHI_DIS_S for the guests.
+ *
+ * CALLs/RETs are necessary to prevent Loop Stream Detector(LSD) from engaging
+ * and not clearing the branch history. The call tree looks like:
+ *
+ * call 1
+ * call 2
+ * call 2
+ * call 2
+ * call 2
+ * call 2
+ * ret
+ * ret
+ * ret
+ * ret
+ * ret
+ * ret
+ *
+ * This means that the stack is non-constant and ORC can't unwind it with %rsp
+ * alone. Therefore we unconditionally set up the frame pointer, which allows
+ * ORC to unwind properly.
+ *
+ * The alignment is for performance and not for safety, and may be safely
+ * refactored in the future if needed.
+ */
+SYM_FUNC_START(clear_bhb_loop)
+ push %rbp
+ mov %rsp, %rbp
+ movl $5, %ecx
+ ANNOTATE_INTRA_FUNCTION_CALL
+ call 1f
+ jmp 5f
+ .align 64, 0xcc
+ ANNOTATE_INTRA_FUNCTION_CALL
+1: call 2f
+ RET
+ .align 64, 0xcc
+2: movl $5, %eax
+3: jmp 4f
+ nop
+4: sub $1, %eax
+ jnz 3b
+ sub $1, %ecx
+ jnz 1b
+ RET
+5: lfence
+ pop %rbp
+ RET
+SYM_FUNC_END(clear_bhb_loop)
+EXPORT_SYMBOL_GPL(clear_bhb_loop)
+STACK_FRAME_NON_STANDARD(clear_bhb_loop)
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index eabf48c4d4b4c3..c779046cc3fe79 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -92,6 +92,7 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL)
IBRS_ENTER
UNTRAIN_RET
+ CLEAR_BRANCH_HISTORY
/*
* SYSENTER doesn't filter flags, so we need to clear NT and AC
@@ -206,6 +207,7 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_after_hwframe, SYM_L_GLOBAL)
IBRS_ENTER
UNTRAIN_RET
+ CLEAR_BRANCH_HISTORY
movq %rsp, %rdi
call do_fast_syscall_32
@@ -276,3 +278,17 @@ SYM_INNER_LABEL(entry_SYSRETL_compat_end, SYM_L_GLOBAL)
ANNOTATE_NOENDBR
int3
SYM_CODE_END(entry_SYSCALL_compat)
+
+/*
+ * int 0x80 is used by 32 bit mode as a system call entry. Normally idt entries
+ * point to C routines, however since this is a system call interface the branch
+ * history needs to be scrubbed to protect against BHI attacks, and that
+ * scrubbing needs to take place in assembly code prior to entering any C
+ * routines.
+ */
+SYM_CODE_START(int80_emulation)
+ ANNOTATE_NOENDBR
+ UNWIND_HINT_FUNC
+ CLEAR_BRANCH_HISTORY
+ jmp do_int80_emulation
+SYM_CODE_END(int80_emulation)
diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c
index 8cfc9bc73e7f8b..c2235bae17ef66 100644
--- a/arch/x86/entry/syscall_32.c
+++ b/arch/x86/entry/syscall_32.c
@@ -18,8 +18,25 @@
#include <asm/syscalls_32.h>
#undef __SYSCALL
+/*
+ * The sys_call_table[] is no longer used for system calls, but
+ * kernel/trace/trace_syscalls.c still wants to know the system
+ * call address.
+ */
+#ifdef CONFIG_X86_32
#define __SYSCALL(nr, sym) __ia32_##sym,
-
-__visible const sys_call_ptr_t ia32_sys_call_table[] = {
+const sys_call_ptr_t sys_call_table[] = {
#include <asm/syscalls_32.h>
};
+#undef __SYSCALL
+#endif
+
+#define __SYSCALL(nr, sym) case nr: return __ia32_##sym(regs);
+
+long ia32_sys_call(const struct pt_regs *regs, unsigned int nr)
+{
+ switch (nr) {
+ #include <asm/syscalls_32.h>
+ default: return __ia32_sys_ni_syscall(regs);
+ }
+};
diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c
index be120eec1fc9f9..33b3f09e6f151e 100644
--- a/arch/x86/entry/syscall_64.c
+++ b/arch/x86/entry/syscall_64.c
@@ -11,8 +11,23 @@
#include <asm/syscalls_64.h>
#undef __SYSCALL
+/*
+ * The sys_call_table[] is no longer used for system calls, but
+ * kernel/trace/trace_syscalls.c still wants to know the system
+ * call address.
+ */
#define __SYSCALL(nr, sym) __x64_##sym,
-
-asmlinkage const sys_call_ptr_t sys_call_table[] = {
+const sys_call_ptr_t sys_call_table[] = {
#include <asm/syscalls_64.h>
};
+#undef __SYSCALL
+
+#define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs);
+
+long x64_sys_call(const struct pt_regs *regs, unsigned int nr)
+{
+ switch (nr) {
+ #include <asm/syscalls_64.h>
+ default: return __x64_sys_ni_syscall(regs);
+ }
+};
diff --git a/arch/x86/entry/syscall_x32.c b/arch/x86/entry/syscall_x32.c
index bdd0e03a1265d2..03de4a93213182 100644
--- a/arch/x86/entry/syscall_x32.c
+++ b/arch/x86/entry/syscall_x32.c
@@ -11,8 +11,12 @@
#include <asm/syscalls_x32.h>
#undef __SYSCALL
-#define __SYSCALL(nr, sym) __x64_##sym,
+#define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs);
-asmlinkage const sys_call_ptr_t x32_sys_call_table[] = {
-#include <asm/syscalls_x32.h>
+long x32_sys_call(const struct pt_regs *regs, unsigned int nr)
+{
+ switch (nr) {
+ #include <asm/syscalls_x32.h>
+ default: return __x64_sys_ni_syscall(regs);
+ }
};
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index fd63051bbbbb82..3d64bcc403cfbe 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -41,6 +41,7 @@ obj-$(CONFIG_X86_X32_ABI) += vdso-image-x32.o
obj-$(CONFIG_COMPAT_32) += vdso-image-32.o vdso32-setup.o
OBJECT_FILES_NON_STANDARD_vdso-image-32.o := n
+OBJECT_FILES_NON_STANDARD_vdso-image-x32.o := n
OBJECT_FILES_NON_STANDARD_vdso-image-64.o := n
OBJECT_FILES_NON_STANDARD_vdso32-setup.o := n
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index aec16e581f5b2a..985ef3b479191f 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -250,7 +250,7 @@ static const u64 amd_perfmon_event_map[PERF_COUNT_HW_MAX] =
/*
* AMD Performance Monitor Family 17h and later:
*/
-static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] =
+static const u64 amd_zen1_perfmon_event_map[PERF_COUNT_HW_MAX] =
{
[PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
[PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
@@ -262,10 +262,39 @@ static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] =
[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x0187,
};
+static const u64 amd_zen2_perfmon_event_map[PERF_COUNT_HW_MAX] =
+{
+ [PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
+ [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = 0xff60,
+ [PERF_COUNT_HW_CACHE_MISSES] = 0x0964,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2,
+ [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3,
+ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00a9,
+};
+
+static const u64 amd_zen4_perfmon_event_map[PERF_COUNT_HW_MAX] =
+{
+ [PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
+ [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = 0xff60,
+ [PERF_COUNT_HW_CACHE_MISSES] = 0x0964,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2,
+ [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3,
+ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00a9,
+ [PERF_COUNT_HW_REF_CPU_CYCLES] = 0x100000120,
+};
+
static u64 amd_pmu_event_map(int hw_event)
{
- if (boot_cpu_data.x86 >= 0x17)
- return amd_f17h_perfmon_event_map[hw_event];
+ if (cpu_feature_enabled(X86_FEATURE_ZEN4) || boot_cpu_data.x86 >= 0x1a)
+ return amd_zen4_perfmon_event_map[hw_event];
+
+ if (cpu_feature_enabled(X86_FEATURE_ZEN2) || boot_cpu_data.x86 >= 0x19)
+ return amd_zen2_perfmon_event_map[hw_event];
+
+ if (cpu_feature_enabled(X86_FEATURE_ZEN1))
+ return amd_zen1_perfmon_event_map[hw_event];
return amd_perfmon_event_map[hw_event];
}
@@ -904,8 +933,8 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
if (!status)
goto done;
- /* Read branch records before unfreezing */
- if (status & GLOBAL_STATUS_LBRS_FROZEN) {
+ /* Read branch records */
+ if (x86_pmu.lbr_nr) {
amd_pmu_lbr_read();
status &= ~GLOBAL_STATUS_LBRS_FROZEN;
}
diff --git a/arch/x86/events/amd/lbr.c b/arch/x86/events/amd/lbr.c
index 4a1e600314d5df..5149830c7c4fa6 100644
--- a/arch/x86/events/amd/lbr.c
+++ b/arch/x86/events/amd/lbr.c
@@ -402,10 +402,12 @@ void amd_pmu_lbr_enable_all(void)
wrmsrl(MSR_AMD64_LBR_SELECT, lbr_select);
}
- rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
- rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
+ if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) {
+ rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
+ wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
+ }
- wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
+ rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg | DBG_EXTN_CFG_LBRV2EN);
}
@@ -418,10 +420,12 @@ void amd_pmu_lbr_disable_all(void)
return;
rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
- rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
-
wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg & ~DBG_EXTN_CFG_LBRV2EN);
- wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
+
+ if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) {
+ rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
+ wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
+ }
}
__init int amd_pmu_lbr_init(void)
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 09050641ce5d3c..5b0dd07b1ef19e 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -1644,6 +1644,7 @@ static void x86_pmu_del(struct perf_event *event, int flags)
while (++i < cpuc->n_events) {
cpuc->event_list[i-1] = cpuc->event_list[i];
cpuc->event_constraint[i-1] = cpuc->event_constraint[i];
+ cpuc->assign[i-1] = cpuc->assign[i];
}
cpuc->event_constraint[i-1] = NULL;
--cpuc->n_events;
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 2641ba620f12a5..e010bfed841705 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1237,11 +1237,11 @@ pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc,
struct pmu *pmu = event->pmu;
/*
- * Make sure we get updated with the first PEBS
- * event. It will trigger also during removal, but
- * that does not hurt:
+ * Make sure we get updated with the first PEBS event.
+ * During removal, ->pebs_data_cfg is still valid for
+ * the last PEBS event. Don't clear it.
*/
- if (cpuc->n_pebs == 1)
+ if ((cpuc->n_pebs == 1) && add)
cpuc->pebs_data_cfg = PEBS_UPDATE_DS_SW;
if (needed_cb != pebs_needs_sched_cb(cpuc)) {
diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c
index 5fc45543e95502..0569f579338b51 100644
--- a/arch/x86/hyperv/hv_apic.c
+++ b/arch/x86/hyperv/hv_apic.c
@@ -105,7 +105,7 @@ static bool cpu_is_self(int cpu)
* IPI implementation on Hyper-V.
*/
static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector,
- bool exclude_self)
+ bool exclude_self)
{
struct hv_send_ipi_ex *ipi_arg;
unsigned long flags;
@@ -132,8 +132,8 @@ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector,
if (!cpumask_equal(mask, cpu_present_mask) || exclude_self) {
ipi_arg->vp_set.format = HV_GENERIC_SET_SPARSE_4K;
- nr_bank = cpumask_to_vpset_skip(&(ipi_arg->vp_set), mask,
- exclude_self ? cpu_is_self : NULL);
+ nr_bank = cpumask_to_vpset_skip(&ipi_arg->vp_set, mask,
+ exclude_self ? cpu_is_self : NULL);
/*
* 'nr_bank <= 0' means some CPUs in cpumask can't be
@@ -147,7 +147,7 @@ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector,
}
status = hv_do_rep_hypercall(HVCALL_SEND_IPI_EX, 0, nr_bank,
- ipi_arg, NULL);
+ ipi_arg, NULL);
ipi_mask_ex_done:
local_irq_restore(flags);
@@ -155,7 +155,7 @@ ipi_mask_ex_done:
}
static bool __send_ipi_mask(const struct cpumask *mask, int vector,
- bool exclude_self)
+ bool exclude_self)
{
int cur_cpu, vcpu, this_cpu = smp_processor_id();
struct hv_send_ipi ipi_arg;
@@ -181,7 +181,7 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector,
return false;
}
- if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR))
+ if (vector < HV_IPI_LOW_VECTOR || vector > HV_IPI_HIGH_VECTOR)
return false;
/*
@@ -218,7 +218,7 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector,
}
status = hv_do_fast_hypercall16(HVCALL_SEND_IPI, ipi_arg.vector,
- ipi_arg.cpu_mask);
+ ipi_arg.cpu_mask);
return hv_result_success(status);
do_ex_hypercall:
@@ -241,7 +241,7 @@ static bool __send_ipi_one(int cpu, int vector)
return false;
}
- if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR))
+ if (vector < HV_IPI_LOW_VECTOR || vector > HV_IPI_HIGH_VECTOR)
return false;
if (vp >= 64)
diff --git a/arch/x86/hyperv/hv_proc.c b/arch/x86/hyperv/hv_proc.c
index 68a0843d4750f7..3fa1f2ee7b0d06 100644
--- a/arch/x86/hyperv/hv_proc.c
+++ b/arch/x86/hyperv/hv_proc.c
@@ -3,7 +3,6 @@
#include <linux/vmalloc.h>
#include <linux/mm.h>
#include <linux/clockchips.h>
-#include <linux/acpi.h>
#include <linux/hyperv.h>
#include <linux/slab.h>
#include <linux/cpuhotplug.h>
@@ -116,12 +115,11 @@ free_buf:
int hv_call_add_logical_proc(int node, u32 lp_index, u32 apic_id)
{
- struct hv_add_logical_processor_in *input;
- struct hv_add_logical_processor_out *output;
+ struct hv_input_add_logical_processor *input;
+ struct hv_output_add_logical_processor *output;
u64 status;
unsigned long flags;
int ret = HV_STATUS_SUCCESS;
- int pxm = node_to_pxm(node);
/*
* When adding a logical processor, the hypervisor may return
@@ -137,11 +135,7 @@ int hv_call_add_logical_proc(int node, u32 lp_index, u32 apic_id)
input->lp_index = lp_index;
input->apic_id = apic_id;
- input->flags = 0;
- input->proximity_domain_info.domain_id = pxm;
- input->proximity_domain_info.flags.reserved = 0;
- input->proximity_domain_info.flags.proximity_info_valid = 1;
- input->proximity_domain_info.flags.proximity_preferred = 1;
+ input->proximity_domain_info = hv_numa_node_to_pxm_info(node);
status = hv_do_hypercall(HVCALL_ADD_LOGICAL_PROCESSOR,
input, output);
local_irq_restore(flags);
@@ -166,7 +160,6 @@ int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags)
u64 status;
unsigned long irq_flags;
int ret = HV_STATUS_SUCCESS;
- int pxm = node_to_pxm(node);
/* Root VPs don't seem to need pages deposited */
if (partition_id != hv_current_partition_id) {
@@ -185,14 +178,7 @@ int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags)
input->vp_index = vp_index;
input->flags = flags;
input->subnode_type = HvSubnodeAny;
- if (node != NUMA_NO_NODE) {
- input->proximity_domain_info.domain_id = pxm;
- input->proximity_domain_info.flags.reserved = 0;
- input->proximity_domain_info.flags.proximity_info_valid = 1;
- input->proximity_domain_info.flags.proximity_preferred = 1;
- } else {
- input->proximity_domain_info.as_uint64 = 0;
- }
+ input->proximity_domain_info = hv_numa_node_to_pxm_info(node);
status = hv_do_hypercall(HVCALL_CREATE_VP, input, NULL);
local_irq_restore(irq_flags);
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index fcd20c6dc7f90c..67b68d0d17d1ec 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -117,7 +117,7 @@ extern void callthunks_patch_builtin_calls(void);
extern void callthunks_patch_module_calls(struct callthunk_sites *sites,
struct module *mod);
extern void *callthunks_translate_call_dest(void *dest);
-extern int x86_call_depth_emit_accounting(u8 **pprog, void *func);
+extern int x86_call_depth_emit_accounting(u8 **pprog, void *func, void *ip);
#else
static __always_inline void callthunks_patch_builtin_calls(void) {}
static __always_inline void
@@ -128,7 +128,7 @@ static __always_inline void *callthunks_translate_call_dest(void *dest)
return dest;
}
static __always_inline int x86_call_depth_emit_accounting(u8 **pprog,
- void *func)
+ void *func, void *ip)
{
return 0;
}
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 94ce0f7c9d3a26..e6ab0cf15ed573 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -13,6 +13,7 @@
#include <asm/mpspec.h>
#include <asm/msr.h>
#include <asm/hardirq.h>
+#include <asm/io.h>
#define ARCH_APICTIMER_STOPS_ON_C3 1
@@ -98,7 +99,7 @@ static inline void native_apic_mem_write(u32 reg, u32 v)
static inline u32 native_apic_mem_read(u32 reg)
{
- return *((volatile u32 *)(APIC_BASE + reg));
+ return readl((void __iomem *)(APIC_BASE + reg));
}
static inline void native_apic_mem_eoi(void)
diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h
index 076bf8dee70264..25466c4d213481 100644
--- a/arch/x86/include/asm/asm-prototypes.h
+++ b/arch/x86/include/asm/asm-prototypes.h
@@ -14,6 +14,7 @@
#include <asm/asm.h>
#include <asm/fred.h>
#include <asm/gsseg.h>
+#include <asm/nospec-branch.h>
#ifndef CONFIG_X86_CMPXCHG64
extern void cmpxchg8b_emu(void);
diff --git a/arch/x86/include/asm/coco.h b/arch/x86/include/asm/coco.h
index fb7388bbc212f9..c086699b0d0c59 100644
--- a/arch/x86/include/asm/coco.h
+++ b/arch/x86/include/asm/coco.h
@@ -22,6 +22,7 @@ static inline void cc_set_mask(u64 mask)
u64 cc_mkenc(u64 val);
u64 cc_mkdec(u64 val);
+void cc_random_init(void);
#else
#define cc_vendor (CC_VENDOR_NONE)
@@ -34,6 +35,7 @@ static inline u64 cc_mkdec(u64 val)
{
return val;
}
+static inline void cc_random_init(void) { }
#endif
#endif /* _ASM_X86_COCO_H */
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index a1273698fc430b..686e92d2663eee 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -33,6 +33,8 @@ enum cpuid_leafs
CPUID_7_EDX,
CPUID_8000_001F_EAX,
CPUID_8000_0021_EAX,
+ CPUID_LNX_5,
+ NR_CPUID_WORDS,
};
#define X86_CAP_FMT_NUM "%d:%d"
@@ -91,8 +93,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 18, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 19, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 20, feature_bit) || \
+ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 21, feature_bit) || \
REQUIRED_MASK_CHECK || \
- BUILD_BUG_ON_ZERO(NCAPINTS != 21))
+ BUILD_BUG_ON_ZERO(NCAPINTS != 22))
#define DISABLED_MASK_BIT_SET(feature_bit) \
( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 0, feature_bit) || \
@@ -116,8 +119,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 18, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 19, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 20, feature_bit) || \
+ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 21, feature_bit) || \
DISABLED_MASK_CHECK || \
- BUILD_BUG_ON_ZERO(NCAPINTS != 21))
+ BUILD_BUG_ON_ZERO(NCAPINTS != 22))
#define cpu_has(c, bit) \
(__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index f0337f7bcf1625..3c7434329661c6 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -13,7 +13,7 @@
/*
* Defines x86 CPU feature bits
*/
-#define NCAPINTS 21 /* N 32-bit words worth of info */
+#define NCAPINTS 22 /* N 32-bit words worth of info */
#define NBUGINTS 2 /* N 32-bit bug flags */
/*
@@ -460,6 +460,18 @@
#define X86_FEATURE_SRSO_NO (20*32+29) /* "" CPU is not affected by SRSO */
/*
+ * Extended auxiliary flags: Linux defined - for features scattered in various
+ * CPUID levels like 0x80000022, etc and Linux defined features.
+ *
+ * Reuse free bits when adding new feature flags!
+ */
+#define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* AMD LBR and PMC Freeze */
+#define X86_FEATURE_CLEAR_BHB_LOOP (21*32+ 1) /* "" Clear branch history at syscall entry using SW loop */
+#define X86_FEATURE_BHI_CTRL (21*32+ 2) /* "" BHI_DIS_S HW control available */
+#define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* "" BHI_DIS_S HW control enabled */
+#define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* "" Clear branch history at vmexit using SW loop */
+
+/*
* BUG word(s)
*/
#define X86_BUG(x) (NCAPINTS*32 + (x))
@@ -507,4 +519,5 @@
#define X86_BUG_SRSO X86_BUG(1*32 + 0) /* AMD SRSO bug */
#define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* AMD DIV0 speculation bug */
#define X86_BUG_RFDS X86_BUG(1*32 + 2) /* CPU is vulnerable to Register File Data Sampling */
+#define X86_BUG_BHI X86_BUG(1*32 + 3) /* CPU is affected by Branch History Injection */
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
index da4054fbf533e9..c492bdc97b0595 100644
--- a/arch/x86/include/asm/disabled-features.h
+++ b/arch/x86/include/asm/disabled-features.h
@@ -155,6 +155,7 @@
#define DISABLED_MASK18 (DISABLE_IBT)
#define DISABLED_MASK19 (DISABLE_SEV_SNP)
#define DISABLED_MASK20 0
-#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21)
+#define DISABLED_MASK21 0
+#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 22)
#endif /* _ASM_X86_DISABLED_FEATURES_H */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 05956bd8bacf50..e72c2b87295799 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -61,10 +61,13 @@
#define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */
#define SPEC_CTRL_RRSBA_DIS_S_SHIFT 6 /* Disable RRSBA behavior */
#define SPEC_CTRL_RRSBA_DIS_S BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT)
+#define SPEC_CTRL_BHI_DIS_S_SHIFT 10 /* Disable Branch History Injection behavior */
+#define SPEC_CTRL_BHI_DIS_S BIT(SPEC_CTRL_BHI_DIS_S_SHIFT)
/* A mask for bits which the kernel toggles when controlling mitigations */
#define SPEC_CTRL_MITIGATIONS_MASK (SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD \
- | SPEC_CTRL_RRSBA_DIS_S)
+ | SPEC_CTRL_RRSBA_DIS_S \
+ | SPEC_CTRL_BHI_DIS_S)
#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */
#define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */
@@ -163,6 +166,10 @@
* are restricted to targets in
* kernel.
*/
+#define ARCH_CAP_BHI_NO BIT(20) /*
+ * CPU is not affected by Branch
+ * History Injection.
+ */
#define ARCH_CAP_PBRSB_NO BIT(24) /*
* Not susceptible to Post-Barrier
* Return Stack Buffer Predictions.
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index fc3a8a3c7ffeec..ff5f1ecc7d1e65 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -262,11 +262,20 @@
.Lskip_rsb_\@:
.endm
+/*
+ * The CALL to srso_alias_untrain_ret() must be patched in directly at
+ * the spot where untraining must be done, ie., srso_alias_untrain_ret()
+ * must be the target of a CALL instruction instead of indirectly
+ * jumping to a wrapper which then calls it. Therefore, this macro is
+ * called outside of __UNTRAIN_RET below, for the time being, before the
+ * kernel can support nested alternatives with arbitrary nesting.
+ */
+.macro CALL_UNTRAIN_RET
#if defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_MITIGATION_SRSO)
-#define CALL_UNTRAIN_RET "call entry_untrain_ret"
-#else
-#define CALL_UNTRAIN_RET ""
+ ALTERNATIVE_2 "", "call entry_untrain_ret", X86_FEATURE_UNRET, \
+ "call srso_alias_untrain_ret", X86_FEATURE_SRSO_ALIAS
#endif
+.endm
/*
* Mitigate RETBleed for AMD/Hygon Zen uarch. Requires KERNEL CR3 because the
@@ -282,8 +291,8 @@
.macro __UNTRAIN_RET ibpb_feature, call_depth_insns
#if defined(CONFIG_MITIGATION_RETHUNK) || defined(CONFIG_MITIGATION_IBPB_ENTRY)
VALIDATE_UNRET_END
- ALTERNATIVE_3 "", \
- CALL_UNTRAIN_RET, X86_FEATURE_UNRET, \
+ CALL_UNTRAIN_RET
+ ALTERNATIVE_2 "", \
"call entry_ibpb", \ibpb_feature, \
__stringify(\call_depth_insns), X86_FEATURE_CALL_DEPTH
#endif
@@ -317,6 +326,19 @@
ALTERNATIVE "", __stringify(verw _ASM_RIP(mds_verw_sel)), X86_FEATURE_CLEAR_CPU_BUF
.endm
+#ifdef CONFIG_X86_64
+.macro CLEAR_BRANCH_HISTORY
+ ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP
+.endm
+
+.macro CLEAR_BRANCH_HISTORY_VMEXIT
+ ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT
+.endm
+#else
+#define CLEAR_BRANCH_HISTORY
+#define CLEAR_BRANCH_HISTORY_VMEXIT
+#endif
+
#else /* __ASSEMBLY__ */
#define ANNOTATE_RETPOLINE_SAFE \
@@ -342,6 +364,8 @@ extern void retbleed_return_thunk(void);
static inline void retbleed_return_thunk(void) {}
#endif
+extern void srso_alias_untrain_ret(void);
+
#ifdef CONFIG_MITIGATION_SRSO
extern void srso_return_thunk(void);
extern void srso_alias_return_thunk(void);
@@ -357,6 +381,10 @@ extern void srso_alias_return_thunk(void);
extern void entry_untrain_ret(void);
extern void entry_ibpb(void);
+#ifdef CONFIG_X86_64
+extern void clear_bhb_loop(void);
+#endif
+
extern void (*x86_return_thunk)(void);
extern void __warn_thunk(void);
diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h
index 7ba1726b71c7b8..e9187ddd3d1fdc 100644
--- a/arch/x86/include/asm/required-features.h
+++ b/arch/x86/include/asm/required-features.h
@@ -99,6 +99,7 @@
#define REQUIRED_MASK18 0
#define REQUIRED_MASK19 0
#define REQUIRED_MASK20 0
-#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21)
+#define REQUIRED_MASK21 0
+#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 22)
#endif /* _ASM_X86_REQUIRED_FEATURES_H */
diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
index 9477b4053bce2c..7f57382afee417 100644
--- a/arch/x86/include/asm/sev.h
+++ b/arch/x86/include/asm/sev.h
@@ -218,17 +218,16 @@ void early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr,
unsigned long npages);
void early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr,
unsigned long npages);
-void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op);
void snp_set_memory_shared(unsigned long vaddr, unsigned long npages);
void snp_set_memory_private(unsigned long vaddr, unsigned long npages);
void snp_set_wakeup_secondary_cpu(void);
bool snp_init(struct boot_params *bp);
void __noreturn snp_abort(void);
+void snp_dmi_setup(void);
int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio);
void snp_accept_memory(phys_addr_t start, phys_addr_t end);
u64 snp_get_unsupported_features(u64 status);
u64 sev_get_status(void);
-void kdump_sev_callback(void);
void sev_show_status(void);
#else
static inline void sev_es_ist_enter(struct pt_regs *regs) { }
@@ -244,12 +243,12 @@ static inline void __init
early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, unsigned long npages) { }
static inline void __init
early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, unsigned long npages) { }
-static inline void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op) { }
static inline void snp_set_memory_shared(unsigned long vaddr, unsigned long npages) { }
static inline void snp_set_memory_private(unsigned long vaddr, unsigned long npages) { }
static inline void snp_set_wakeup_secondary_cpu(void) { }
static inline bool snp_init(struct boot_params *bp) { return false; }
static inline void snp_abort(void) { }
+static inline void snp_dmi_setup(void) { }
static inline int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio)
{
return -ENOTTY;
@@ -258,7 +257,6 @@ static inline int snp_issue_guest_request(u64 exit_code, struct snp_req_data *in
static inline void snp_accept_memory(phys_addr_t start, phys_addr_t end) { }
static inline u64 snp_get_unsupported_features(u64 status) { return 0; }
static inline u64 sev_get_status(void) { return 0; }
-static inline void kdump_sev_callback(void) { }
static inline void sev_show_status(void) { }
#endif
@@ -270,6 +268,7 @@ int psmash(u64 pfn);
int rmp_make_private(u64 pfn, u64 gpa, enum pg_level level, u32 asid, bool immutable);
int rmp_make_shared(u64 pfn, enum pg_level level);
void snp_leak_pages(u64 pfn, unsigned int npages);
+void kdump_sev_callback(void);
#else
static inline bool snp_probe_rmptable_info(void) { return false; }
static inline int snp_lookup_rmpentry(u64 pfn, bool *assigned, int *level) { return -ENODEV; }
@@ -282,6 +281,7 @@ static inline int rmp_make_private(u64 pfn, u64 gpa, enum pg_level level, u32 as
}
static inline int rmp_make_shared(u64 pfn, enum pg_level level) { return -ENODEV; }
static inline void snp_leak_pages(u64 pfn, unsigned int npages) {}
+static inline void kdump_sev_callback(void) { }
#endif
#endif
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index f44e2f9ab65d77..2fc7bc3863ff6f 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -16,19 +16,17 @@
#include <asm/thread_info.h> /* for TS_COMPAT */
#include <asm/unistd.h>
+/* This is used purely for kernel/trace/trace_syscalls.c */
typedef long (*sys_call_ptr_t)(const struct pt_regs *);
extern const sys_call_ptr_t sys_call_table[];
-#if defined(CONFIG_X86_32)
-#define ia32_sys_call_table sys_call_table
-#else
/*
* These may not exist, but still put the prototypes in so we
* can use IS_ENABLED().
*/
-extern const sys_call_ptr_t ia32_sys_call_table[];
-extern const sys_call_ptr_t x32_sys_call_table[];
-#endif
+extern long ia32_sys_call(const struct pt_regs *, unsigned int nr);
+extern long x32_sys_call(const struct pt_regs *, unsigned int nr);
+extern long x64_sys_call(const struct pt_regs *, unsigned int nr);
/*
* Only the low 32 bits of orig_ax are meaningful, so we return int.
@@ -127,6 +125,7 @@ static inline int syscall_get_arch(struct task_struct *task)
}
bool do_syscall_64(struct pt_regs *regs, int nr);
+void do_int80_emulation(struct pt_regs *regs);
#endif /* CONFIG_X86_32 */
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index b89b40f250e6f5..6149eabe200f5b 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -30,12 +30,13 @@ struct x86_init_mpparse {
* @reserve_resources: reserve the standard resources for the
* platform
* @memory_setup: platform specific memory setup
- *
+ * @dmi_setup: platform specific DMI setup
*/
struct x86_init_resources {
void (*probe_roms)(void);
void (*reserve_resources)(void);
char *(*memory_setup)(void);
+ void (*dmi_setup)(void);
};
/**
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index ad29984d5e398d..ef11aa4cab4253 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -694,6 +694,7 @@ enum sev_cmd_id {
struct kvm_sev_cmd {
__u32 id;
+ __u32 pad0;
__u64 data;
__u32 error;
__u32 sev_fd;
@@ -704,28 +705,35 @@ struct kvm_sev_launch_start {
__u32 policy;
__u64 dh_uaddr;
__u32 dh_len;
+ __u32 pad0;
__u64 session_uaddr;
__u32 session_len;
+ __u32 pad1;
};
struct kvm_sev_launch_update_data {
__u64 uaddr;
__u32 len;
+ __u32 pad0;
};
struct kvm_sev_launch_secret {
__u64 hdr_uaddr;
__u32 hdr_len;
+ __u32 pad0;
__u64 guest_uaddr;
__u32 guest_len;
+ __u32 pad1;
__u64 trans_uaddr;
__u32 trans_len;
+ __u32 pad2;
};
struct kvm_sev_launch_measure {
__u64 uaddr;
__u32 len;
+ __u32 pad0;
};
struct kvm_sev_guest_status {
@@ -738,33 +746,43 @@ struct kvm_sev_dbg {
__u64 src_uaddr;
__u64 dst_uaddr;
__u32 len;
+ __u32 pad0;
};
struct kvm_sev_attestation_report {
__u8 mnonce[16];
__u64 uaddr;
__u32 len;
+ __u32 pad0;
};
struct kvm_sev_send_start {
__u32 policy;
+ __u32 pad0;
__u64 pdh_cert_uaddr;
__u32 pdh_cert_len;
+ __u32 pad1;
__u64 plat_certs_uaddr;
__u32 plat_certs_len;
+ __u32 pad2;
__u64 amd_certs_uaddr;
__u32 amd_certs_len;
+ __u32 pad3;
__u64 session_uaddr;
__u32 session_len;
+ __u32 pad4;
};
struct kvm_sev_send_update_data {
__u64 hdr_uaddr;
__u32 hdr_len;
+ __u32 pad0;
__u64 guest_uaddr;
__u32 guest_len;
+ __u32 pad1;
__u64 trans_uaddr;
__u32 trans_len;
+ __u32 pad2;
};
struct kvm_sev_receive_start {
@@ -772,17 +790,22 @@ struct kvm_sev_receive_start {
__u32 policy;
__u64 pdh_uaddr;
__u32 pdh_len;
+ __u32 pad0;
__u64 session_uaddr;
__u32 session_len;
+ __u32 pad1;
};
struct kvm_sev_receive_update_data {
__u64 hdr_uaddr;
__u32 hdr_len;
+ __u32 pad0;
__u64 guest_uaddr;
__u32 guest_len;
+ __u32 pad1;
__u64 trans_uaddr;
__u32 trans_len;
+ __u32 pad2;
};
#define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0)
diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
index 6bc3456a8ebf1d..a1efa7907a0b10 100644
--- a/arch/x86/include/uapi/asm/kvm_para.h
+++ b/arch/x86/include/uapi/asm/kvm_para.h
@@ -142,7 +142,6 @@ struct kvm_vcpu_pv_apf_data {
__u32 token;
__u8 pad[56];
- __u32 enabled;
};
#define KVM_PV_EOI_BIT 0
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index a42d8a6f714958..c342c4aa9c6848 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1687,11 +1687,11 @@ static int x2apic_state;
static bool x2apic_hw_locked(void)
{
- u64 ia32_cap;
+ u64 x86_arch_cap_msr;
u64 msr;
- ia32_cap = x86_read_arch_cap_msr();
- if (ia32_cap & ARCH_CAP_XAPIC_DISABLE) {
+ x86_arch_cap_msr = x86_read_arch_cap_msr();
+ if (x86_arch_cap_msr & ARCH_CAP_XAPIC_DISABLE) {
rdmsrl(MSR_IA32_XAPIC_DISABLE_STATUS, msr);
return (msr & LEGACY_XAPIC_DISABLED);
}
diff --git a/arch/x86/kernel/callthunks.c b/arch/x86/kernel/callthunks.c
index 30335182b6b0ae..e92ff0c11db814 100644
--- a/arch/x86/kernel/callthunks.c
+++ b/arch/x86/kernel/callthunks.c
@@ -314,7 +314,7 @@ static bool is_callthunk(void *addr)
return !bcmp(pad, insn_buff, tmpl_size);
}
-int x86_call_depth_emit_accounting(u8 **pprog, void *func)
+int x86_call_depth_emit_accounting(u8 **pprog, void *func, void *ip)
{
unsigned int tmpl_size = SKL_TMPL_SIZE;
u8 insn_buff[MAX_PATCH_LEN];
@@ -327,7 +327,7 @@ int x86_call_depth_emit_accounting(u8 **pprog, void *func)
return 0;
memcpy(insn_buff, skl_call_thunk_template, tmpl_size);
- apply_relocation(insn_buff, tmpl_size, *pprog,
+ apply_relocation(insn_buff, tmpl_size, ip,
skl_call_thunk_template, tmpl_size);
memcpy(*pprog, insn_buff, tmpl_size);
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 6d8677e80ddbb1..cb9eece55904d0 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -345,6 +345,28 @@ static void srat_detect_node(struct cpuinfo_x86 *c)
#endif
}
+static void bsp_determine_snp(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_ARCH_HAS_CC_PLATFORM
+ cc_vendor = CC_VENDOR_AMD;
+
+ if (cpu_has(c, X86_FEATURE_SEV_SNP)) {
+ /*
+ * RMP table entry format is not architectural and is defined by the
+ * per-processor PPR. Restrict SNP support on the known CPU models
+ * for which the RMP table entry format is currently defined for.
+ */
+ if (!cpu_has(c, X86_FEATURE_HYPERVISOR) &&
+ c->x86 >= 0x19 && snp_probe_rmptable_info()) {
+ cc_platform_set(CC_ATTR_HOST_SEV_SNP);
+ } else {
+ setup_clear_cpu_cap(X86_FEATURE_SEV_SNP);
+ cc_platform_clear(CC_ATTR_HOST_SEV_SNP);
+ }
+ }
+#endif
+}
+
static void bsp_init_amd(struct cpuinfo_x86 *c)
{
if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) {
@@ -452,21 +474,7 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
break;
}
- if (cpu_has(c, X86_FEATURE_SEV_SNP)) {
- /*
- * RMP table entry format is not architectural and it can vary by processor
- * and is defined by the per-processor PPR. Restrict SNP support on the
- * known CPU model and family for which the RMP table entry format is
- * currently defined for.
- */
- if (!boot_cpu_has(X86_FEATURE_ZEN3) &&
- !boot_cpu_has(X86_FEATURE_ZEN4) &&
- !boot_cpu_has(X86_FEATURE_ZEN5))
- setup_clear_cpu_cap(X86_FEATURE_SEV_SNP);
- else if (!snp_probe_rmptable_info())
- setup_clear_cpu_cap(X86_FEATURE_SEV_SNP);
- }
-
+ bsp_determine_snp(c);
return;
warn:
@@ -527,7 +535,6 @@ clear_sev:
static void early_init_amd(struct cpuinfo_x86 *c)
{
- u64 value;
u32 dummy;
if (c->x86 >= 0xf)
@@ -595,20 +602,6 @@ static void early_init_amd(struct cpuinfo_x86 *c)
early_detect_mem_encrypt(c);
- /* Re-enable TopologyExtensions if switched off by BIOS */
- if (c->x86 == 0x15 &&
- (c->x86_model >= 0x10 && c->x86_model <= 0x6f) &&
- !cpu_has(c, X86_FEATURE_TOPOEXT)) {
-
- if (msr_set_bit(0xc0011005, 54) > 0) {
- rdmsrl(0xc0011005, value);
- if (value & BIT_64(54)) {
- set_cpu_cap(c, X86_FEATURE_TOPOEXT);
- pr_info_once(FW_INFO "CPU: Re-enabling disabled Topology Extensions Support.\n");
- }
- }
- }
-
if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && !cpu_has(c, X86_FEATURE_IBPB_BRTYPE)) {
if (c->x86 == 0x17 && boot_cpu_has(X86_FEATURE_AMD_IBPB))
setup_force_cpu_cap(X86_FEATURE_IBPB_BRTYPE);
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index e7ba936d798b81..ca295b0c1eeee0 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -61,6 +61,8 @@ EXPORT_PER_CPU_SYMBOL_GPL(x86_spec_ctrl_current);
u64 x86_pred_cmd __ro_after_init = PRED_CMD_IBPB;
EXPORT_SYMBOL_GPL(x86_pred_cmd);
+static u64 __ro_after_init x86_arch_cap_msr;
+
static DEFINE_MUTEX(spec_ctrl_mutex);
void (*x86_return_thunk)(void) __ro_after_init = __x86_return_thunk;
@@ -144,6 +146,8 @@ void __init cpu_select_mitigations(void)
x86_spec_ctrl_base &= ~SPEC_CTRL_MITIGATIONS_MASK;
}
+ x86_arch_cap_msr = x86_read_arch_cap_msr();
+
/* Select the proper CPU mitigations before patching alternatives: */
spectre_v1_select_mitigation();
spectre_v2_select_mitigation();
@@ -301,8 +305,6 @@ static const char * const taa_strings[] = {
static void __init taa_select_mitigation(void)
{
- u64 ia32_cap;
-
if (!boot_cpu_has_bug(X86_BUG_TAA)) {
taa_mitigation = TAA_MITIGATION_OFF;
return;
@@ -341,9 +343,8 @@ static void __init taa_select_mitigation(void)
* On MDS_NO=1 CPUs if ARCH_CAP_TSX_CTRL_MSR is not set, microcode
* update is required.
*/
- ia32_cap = x86_read_arch_cap_msr();
- if ( (ia32_cap & ARCH_CAP_MDS_NO) &&
- !(ia32_cap & ARCH_CAP_TSX_CTRL_MSR))
+ if ( (x86_arch_cap_msr & ARCH_CAP_MDS_NO) &&
+ !(x86_arch_cap_msr & ARCH_CAP_TSX_CTRL_MSR))
taa_mitigation = TAA_MITIGATION_UCODE_NEEDED;
/*
@@ -401,8 +402,6 @@ static const char * const mmio_strings[] = {
static void __init mmio_select_mitigation(void)
{
- u64 ia32_cap;
-
if (!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA) ||
boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN) ||
cpu_mitigations_off()) {
@@ -413,8 +412,6 @@ static void __init mmio_select_mitigation(void)
if (mmio_mitigation == MMIO_MITIGATION_OFF)
return;
- ia32_cap = x86_read_arch_cap_msr();
-
/*
* Enable CPU buffer clear mitigation for host and VMM, if also affected
* by MDS or TAA. Otherwise, enable mitigation for VMM only.
@@ -437,7 +434,7 @@ static void __init mmio_select_mitigation(void)
* be propagated to uncore buffers, clearing the Fill buffers on idle
* is required irrespective of SMT state.
*/
- if (!(ia32_cap & ARCH_CAP_FBSDP_NO))
+ if (!(x86_arch_cap_msr & ARCH_CAP_FBSDP_NO))
static_branch_enable(&mds_idle_clear);
/*
@@ -447,10 +444,10 @@ static void __init mmio_select_mitigation(void)
* FB_CLEAR or by the presence of both MD_CLEAR and L1D_FLUSH on MDS
* affected systems.
*/
- if ((ia32_cap & ARCH_CAP_FB_CLEAR) ||
+ if ((x86_arch_cap_msr & ARCH_CAP_FB_CLEAR) ||
(boot_cpu_has(X86_FEATURE_MD_CLEAR) &&
boot_cpu_has(X86_FEATURE_FLUSH_L1D) &&
- !(ia32_cap & ARCH_CAP_MDS_NO)))
+ !(x86_arch_cap_msr & ARCH_CAP_MDS_NO)))
mmio_mitigation = MMIO_MITIGATION_VERW;
else
mmio_mitigation = MMIO_MITIGATION_UCODE_NEEDED;
@@ -508,7 +505,7 @@ static void __init rfds_select_mitigation(void)
if (rfds_mitigation == RFDS_MITIGATION_OFF)
return;
- if (x86_read_arch_cap_msr() & ARCH_CAP_RFDS_CLEAR)
+ if (x86_arch_cap_msr & ARCH_CAP_RFDS_CLEAR)
setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF);
else
rfds_mitigation = RFDS_MITIGATION_UCODE_NEEDED;
@@ -659,8 +656,6 @@ void update_srbds_msr(void)
static void __init srbds_select_mitigation(void)
{
- u64 ia32_cap;
-
if (!boot_cpu_has_bug(X86_BUG_SRBDS))
return;
@@ -669,8 +664,7 @@ static void __init srbds_select_mitigation(void)
* are only exposed to SRBDS when TSX is enabled or when CPU is affected
* by Processor MMIO Stale Data vulnerability.
*/
- ia32_cap = x86_read_arch_cap_msr();
- if ((ia32_cap & ARCH_CAP_MDS_NO) && !boot_cpu_has(X86_FEATURE_RTM) &&
+ if ((x86_arch_cap_msr & ARCH_CAP_MDS_NO) && !boot_cpu_has(X86_FEATURE_RTM) &&
!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA))
srbds_mitigation = SRBDS_MITIGATION_TSX_OFF;
else if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
@@ -813,7 +807,7 @@ static void __init gds_select_mitigation(void)
/* Will verify below that mitigation _can_ be disabled */
/* No microcode */
- if (!(x86_read_arch_cap_msr() & ARCH_CAP_GDS_CTRL)) {
+ if (!(x86_arch_cap_msr & ARCH_CAP_GDS_CTRL)) {
if (gds_mitigation == GDS_MITIGATION_FORCE) {
/*
* This only needs to be done on the boot CPU so do it
@@ -1544,20 +1538,25 @@ static enum spectre_v2_mitigation __init spectre_v2_select_retpoline(void)
return SPECTRE_V2_RETPOLINE;
}
+static bool __ro_after_init rrsba_disabled;
+
/* Disable in-kernel use of non-RSB RET predictors */
static void __init spec_ctrl_disable_kernel_rrsba(void)
{
- u64 ia32_cap;
+ if (rrsba_disabled)
+ return;
- if (!boot_cpu_has(X86_FEATURE_RRSBA_CTRL))
+ if (!(x86_arch_cap_msr & ARCH_CAP_RRSBA)) {
+ rrsba_disabled = true;
return;
+ }
- ia32_cap = x86_read_arch_cap_msr();
+ if (!boot_cpu_has(X86_FEATURE_RRSBA_CTRL))
+ return;
- if (ia32_cap & ARCH_CAP_RRSBA) {
- x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S;
- update_spec_ctrl(x86_spec_ctrl_base);
- }
+ x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S;
+ update_spec_ctrl(x86_spec_ctrl_base);
+ rrsba_disabled = true;
}
static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_mitigation mode)
@@ -1607,6 +1606,73 @@ static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_
dump_stack();
}
+/*
+ * Set BHI_DIS_S to prevent indirect branches in kernel to be influenced by
+ * branch history in userspace. Not needed if BHI_NO is set.
+ */
+static bool __init spec_ctrl_bhi_dis(void)
+{
+ if (!boot_cpu_has(X86_FEATURE_BHI_CTRL))
+ return false;
+
+ x86_spec_ctrl_base |= SPEC_CTRL_BHI_DIS_S;
+ update_spec_ctrl(x86_spec_ctrl_base);
+ setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_HW);
+
+ return true;
+}
+
+enum bhi_mitigations {
+ BHI_MITIGATION_OFF,
+ BHI_MITIGATION_ON,
+};
+
+static enum bhi_mitigations bhi_mitigation __ro_after_init =
+ IS_ENABLED(CONFIG_MITIGATION_SPECTRE_BHI) ? BHI_MITIGATION_ON : BHI_MITIGATION_OFF;
+
+static int __init spectre_bhi_parse_cmdline(char *str)
+{
+ if (!str)
+ return -EINVAL;
+
+ if (!strcmp(str, "off"))
+ bhi_mitigation = BHI_MITIGATION_OFF;
+ else if (!strcmp(str, "on"))
+ bhi_mitigation = BHI_MITIGATION_ON;
+ else
+ pr_err("Ignoring unknown spectre_bhi option (%s)", str);
+
+ return 0;
+}
+early_param("spectre_bhi", spectre_bhi_parse_cmdline);
+
+static void __init bhi_select_mitigation(void)
+{
+ if (bhi_mitigation == BHI_MITIGATION_OFF)
+ return;
+
+ /* Retpoline mitigates against BHI unless the CPU has RRSBA behavior */
+ if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) {
+ spec_ctrl_disable_kernel_rrsba();
+ if (rrsba_disabled)
+ return;
+ }
+
+ if (spec_ctrl_bhi_dis())
+ return;
+
+ if (!IS_ENABLED(CONFIG_X86_64))
+ return;
+
+ /* Mitigate KVM by default */
+ setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT);
+ pr_info("Spectre BHI mitigation: SW BHB clearing on vm exit\n");
+
+ /* Mitigate syscalls when the mitigation is forced =on */
+ setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP);
+ pr_info("Spectre BHI mitigation: SW BHB clearing on syscall\n");
+}
+
static void __init spectre_v2_select_mitigation(void)
{
enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
@@ -1718,6 +1784,9 @@ static void __init spectre_v2_select_mitigation(void)
mode == SPECTRE_V2_RETPOLINE)
spec_ctrl_disable_kernel_rrsba();
+ if (boot_cpu_has(X86_BUG_BHI))
+ bhi_select_mitigation();
+
spectre_v2_enabled = mode;
pr_info("%s\n", spectre_v2_strings[mode]);
@@ -1832,8 +1901,6 @@ static void update_indir_branch_cond(void)
/* Update the static key controlling the MDS CPU buffer clear in idle */
static void update_mds_branch_idle(void)
{
- u64 ia32_cap = x86_read_arch_cap_msr();
-
/*
* Enable the idle clearing if SMT is active on CPUs which are
* affected only by MSBDS and not any other MDS variant.
@@ -1848,7 +1915,7 @@ static void update_mds_branch_idle(void)
if (sched_smt_active()) {
static_branch_enable(&mds_idle_clear);
} else if (mmio_mitigation == MMIO_MITIGATION_OFF ||
- (ia32_cap & ARCH_CAP_FBSDP_NO)) {
+ (x86_arch_cap_msr & ARCH_CAP_FBSDP_NO)) {
static_branch_disable(&mds_idle_clear);
}
}
@@ -2695,15 +2762,15 @@ static char *stibp_state(void)
switch (spectre_v2_user_stibp) {
case SPECTRE_V2_USER_NONE:
- return ", STIBP: disabled";
+ return "; STIBP: disabled";
case SPECTRE_V2_USER_STRICT:
- return ", STIBP: forced";
+ return "; STIBP: forced";
case SPECTRE_V2_USER_STRICT_PREFERRED:
- return ", STIBP: always-on";
+ return "; STIBP: always-on";
case SPECTRE_V2_USER_PRCTL:
case SPECTRE_V2_USER_SECCOMP:
if (static_key_enabled(&switch_to_cond_stibp))
- return ", STIBP: conditional";
+ return "; STIBP: conditional";
}
return "";
}
@@ -2712,10 +2779,10 @@ static char *ibpb_state(void)
{
if (boot_cpu_has(X86_FEATURE_IBPB)) {
if (static_key_enabled(&switch_mm_always_ibpb))
- return ", IBPB: always-on";
+ return "; IBPB: always-on";
if (static_key_enabled(&switch_mm_cond_ibpb))
- return ", IBPB: conditional";
- return ", IBPB: disabled";
+ return "; IBPB: conditional";
+ return "; IBPB: disabled";
}
return "";
}
@@ -2725,14 +2792,30 @@ static char *pbrsb_eibrs_state(void)
if (boot_cpu_has_bug(X86_BUG_EIBRS_PBRSB)) {
if (boot_cpu_has(X86_FEATURE_RSB_VMEXIT_LITE) ||
boot_cpu_has(X86_FEATURE_RSB_VMEXIT))
- return ", PBRSB-eIBRS: SW sequence";
+ return "; PBRSB-eIBRS: SW sequence";
else
- return ", PBRSB-eIBRS: Vulnerable";
+ return "; PBRSB-eIBRS: Vulnerable";
} else {
- return ", PBRSB-eIBRS: Not affected";
+ return "; PBRSB-eIBRS: Not affected";
}
}
+static const char *spectre_bhi_state(void)
+{
+ if (!boot_cpu_has_bug(X86_BUG_BHI))
+ return "; BHI: Not affected";
+ else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_HW))
+ return "; BHI: BHI_DIS_S";
+ else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP))
+ return "; BHI: SW loop, KVM: SW loop";
+ else if (boot_cpu_has(X86_FEATURE_RETPOLINE) && rrsba_disabled)
+ return "; BHI: Retpoline";
+ else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT))
+ return "; BHI: Vulnerable, KVM: SW loop";
+
+ return "; BHI: Vulnerable";
+}
+
static ssize_t spectre_v2_show_state(char *buf)
{
if (spectre_v2_enabled == SPECTRE_V2_LFENCE)
@@ -2745,13 +2828,15 @@ static ssize_t spectre_v2_show_state(char *buf)
spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE)
return sysfs_emit(buf, "Vulnerable: eIBRS+LFENCE with unprivileged eBPF and SMT\n");
- return sysfs_emit(buf, "%s%s%s%s%s%s%s\n",
+ return sysfs_emit(buf, "%s%s%s%s%s%s%s%s\n",
spectre_v2_strings[spectre_v2_enabled],
ibpb_state(),
- boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
+ boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? "; IBRS_FW" : "",
stibp_state(),
- boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "",
+ boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? "; RSB filling" : "",
pbrsb_eibrs_state(),
+ spectre_bhi_state(),
+ /* this should always be at the end */
spectre_v2_module_string());
}
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 5c1e6d6be267af..605c26c009c8ac 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1120,6 +1120,7 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
#define NO_SPECTRE_V2 BIT(8)
#define NO_MMIO BIT(9)
#define NO_EIBRS_PBRSB BIT(10)
+#define NO_BHI BIT(11)
#define VULNWL(vendor, family, model, whitelist) \
X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, whitelist)
@@ -1182,18 +1183,18 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
VULNWL_INTEL(ATOM_TREMONT_D, NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB),
/* AMD Family 0xf - 0x12 */
- VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
- VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
- VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
- VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
+ VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI),
+ VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI),
+ VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI),
+ VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI),
/* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */
- VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB),
- VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB),
+ VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB | NO_BHI),
+ VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB | NO_BHI),
/* Zhaoxin Family 7 */
- VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO),
- VULNWL(ZHAOXIN, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO),
+ VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO | NO_BHI),
+ VULNWL(ZHAOXIN, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO | NO_BHI),
{}
};
@@ -1283,25 +1284,25 @@ static bool __init cpu_matches(const struct x86_cpu_id *table, unsigned long whi
u64 x86_read_arch_cap_msr(void)
{
- u64 ia32_cap = 0;
+ u64 x86_arch_cap_msr = 0;
if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
- rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap);
+ rdmsrl(MSR_IA32_ARCH_CAPABILITIES, x86_arch_cap_msr);
- return ia32_cap;
+ return x86_arch_cap_msr;
}
-static bool arch_cap_mmio_immune(u64 ia32_cap)
+static bool arch_cap_mmio_immune(u64 x86_arch_cap_msr)
{
- return (ia32_cap & ARCH_CAP_FBSDP_NO &&
- ia32_cap & ARCH_CAP_PSDP_NO &&
- ia32_cap & ARCH_CAP_SBDR_SSDP_NO);
+ return (x86_arch_cap_msr & ARCH_CAP_FBSDP_NO &&
+ x86_arch_cap_msr & ARCH_CAP_PSDP_NO &&
+ x86_arch_cap_msr & ARCH_CAP_SBDR_SSDP_NO);
}
-static bool __init vulnerable_to_rfds(u64 ia32_cap)
+static bool __init vulnerable_to_rfds(u64 x86_arch_cap_msr)
{
/* The "immunity" bit trumps everything else: */
- if (ia32_cap & ARCH_CAP_RFDS_NO)
+ if (x86_arch_cap_msr & ARCH_CAP_RFDS_NO)
return false;
/*
@@ -1309,7 +1310,7 @@ static bool __init vulnerable_to_rfds(u64 ia32_cap)
* indicate that mitigation is needed because guest is running on a
* vulnerable hardware or may migrate to such hardware:
*/
- if (ia32_cap & ARCH_CAP_RFDS_CLEAR)
+ if (x86_arch_cap_msr & ARCH_CAP_RFDS_CLEAR)
return true;
/* Only consult the blacklist when there is no enumeration: */
@@ -1318,11 +1319,11 @@ static bool __init vulnerable_to_rfds(u64 ia32_cap)
static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
{
- u64 ia32_cap = x86_read_arch_cap_msr();
+ u64 x86_arch_cap_msr = x86_read_arch_cap_msr();
/* Set ITLB_MULTIHIT bug if cpu is not in the whitelist and not mitigated */
if (!cpu_matches(cpu_vuln_whitelist, NO_ITLB_MULTIHIT) &&
- !(ia32_cap & ARCH_CAP_PSCHANGE_MC_NO))
+ !(x86_arch_cap_msr & ARCH_CAP_PSCHANGE_MC_NO))
setup_force_cpu_bug(X86_BUG_ITLB_MULTIHIT);
if (cpu_matches(cpu_vuln_whitelist, NO_SPECULATION))
@@ -1334,7 +1335,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
if (!cpu_matches(cpu_vuln_whitelist, NO_SSB) &&
- !(ia32_cap & ARCH_CAP_SSB_NO) &&
+ !(x86_arch_cap_msr & ARCH_CAP_SSB_NO) &&
!cpu_has(c, X86_FEATURE_AMD_SSB_NO))
setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS);
@@ -1345,17 +1346,17 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
* Don't use AutoIBRS when SNP is enabled because it degrades host
* userspace indirect branch performance.
*/
- if ((ia32_cap & ARCH_CAP_IBRS_ALL) ||
+ if ((x86_arch_cap_msr & ARCH_CAP_IBRS_ALL) ||
(cpu_has(c, X86_FEATURE_AUTOIBRS) &&
!cpu_feature_enabled(X86_FEATURE_SEV_SNP))) {
setup_force_cpu_cap(X86_FEATURE_IBRS_ENHANCED);
if (!cpu_matches(cpu_vuln_whitelist, NO_EIBRS_PBRSB) &&
- !(ia32_cap & ARCH_CAP_PBRSB_NO))
+ !(x86_arch_cap_msr & ARCH_CAP_PBRSB_NO))
setup_force_cpu_bug(X86_BUG_EIBRS_PBRSB);
}
if (!cpu_matches(cpu_vuln_whitelist, NO_MDS) &&
- !(ia32_cap & ARCH_CAP_MDS_NO)) {
+ !(x86_arch_cap_msr & ARCH_CAP_MDS_NO)) {
setup_force_cpu_bug(X86_BUG_MDS);
if (cpu_matches(cpu_vuln_whitelist, MSBDS_ONLY))
setup_force_cpu_bug(X86_BUG_MSBDS_ONLY);
@@ -1374,9 +1375,9 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
* TSX_CTRL check alone is not sufficient for cases when the microcode
* update is not present or running as guest that don't get TSX_CTRL.
*/
- if (!(ia32_cap & ARCH_CAP_TAA_NO) &&
+ if (!(x86_arch_cap_msr & ARCH_CAP_TAA_NO) &&
(cpu_has(c, X86_FEATURE_RTM) ||
- (ia32_cap & ARCH_CAP_TSX_CTRL_MSR)))
+ (x86_arch_cap_msr & ARCH_CAP_TSX_CTRL_MSR)))
setup_force_cpu_bug(X86_BUG_TAA);
/*
@@ -1402,7 +1403,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
* Set X86_BUG_MMIO_UNKNOWN for CPUs that are neither in the blacklist,
* nor in the whitelist and also don't enumerate MSR ARCH_CAP MMIO bits.
*/
- if (!arch_cap_mmio_immune(ia32_cap)) {
+ if (!arch_cap_mmio_immune(x86_arch_cap_msr)) {
if (cpu_matches(cpu_vuln_blacklist, MMIO))
setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA);
else if (!cpu_matches(cpu_vuln_whitelist, NO_MMIO))
@@ -1410,7 +1411,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
}
if (!cpu_has(c, X86_FEATURE_BTC_NO)) {
- if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA))
+ if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (x86_arch_cap_msr & ARCH_CAP_RSBA))
setup_force_cpu_bug(X86_BUG_RETBLEED);
}
@@ -1428,18 +1429,25 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
* disabling AVX2. The only way to do this in HW is to clear XCR0[2],
* which means that AVX will be disabled.
*/
- if (cpu_matches(cpu_vuln_blacklist, GDS) && !(ia32_cap & ARCH_CAP_GDS_NO) &&
+ if (cpu_matches(cpu_vuln_blacklist, GDS) && !(x86_arch_cap_msr & ARCH_CAP_GDS_NO) &&
boot_cpu_has(X86_FEATURE_AVX))
setup_force_cpu_bug(X86_BUG_GDS);
- if (vulnerable_to_rfds(ia32_cap))
+ if (vulnerable_to_rfds(x86_arch_cap_msr))
setup_force_cpu_bug(X86_BUG_RFDS);
+ /* When virtualized, eIBRS could be hidden, assume vulnerable */
+ if (!(x86_arch_cap_msr & ARCH_CAP_BHI_NO) &&
+ !cpu_matches(cpu_vuln_whitelist, NO_BHI) &&
+ (boot_cpu_has(X86_FEATURE_IBRS_ENHANCED) ||
+ boot_cpu_has(X86_FEATURE_HYPERVISOR)))
+ setup_force_cpu_bug(X86_BUG_BHI);
+
if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))
return;
/* Rogue Data Cache Load? No! */
- if (ia32_cap & ARCH_CAP_RDCL_NO)
+ if (x86_arch_cap_msr & ARCH_CAP_RDCL_NO)
return;
setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index b5cc557cfc3736..84d41be6d06ba4 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -2500,12 +2500,14 @@ static ssize_t set_bank(struct device *s, struct device_attribute *attr,
return -EINVAL;
b = &per_cpu(mce_banks_array, s->id)[bank];
-
if (!b->init)
return -ENODEV;
b->ctl = new;
+
+ mutex_lock(&mce_sysfs_mutex);
mce_restart();
+ mutex_unlock(&mce_sysfs_mutex);
return size;
}
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 422a4ddc2ab7c9..7b29ebda024f4e 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -108,7 +108,7 @@ static inline void k8_check_syscfg_dram_mod_en(void)
(boot_cpu_data.x86 >= 0x0f)))
return;
- if (cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+ if (cc_platform_has(CC_ATTR_HOST_SEV_SNP))
return;
rdmsr(MSR_AMD64_SYSCFG, lo, hi);
diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index c99f26ebe7a653..1a8687f8073a89 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -78,7 +78,8 @@ cpumask_any_housekeeping(const struct cpumask *mask, int exclude_cpu)
else
cpu = cpumask_any_but(mask, exclude_cpu);
- if (!IS_ENABLED(CONFIG_NO_HZ_FULL))
+ /* Only continue if tick_nohz_full_mask has been initialized. */
+ if (!tick_nohz_full_enabled())
return cpu;
/* If the CPU picked isn't marked nohz_full nothing more needs doing. */
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 0dad49a09b7a9e..af5aa2c754c222 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -28,6 +28,7 @@ static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 },
{ X86_FEATURE_INTEL_PPIN, CPUID_EBX, 0, 0x00000007, 1 },
{ X86_FEATURE_RRSBA_CTRL, CPUID_EDX, 2, 0x00000007, 2 },
+ { X86_FEATURE_BHI_CTRL, CPUID_EDX, 4, 0x00000007, 2 },
{ X86_FEATURE_CQM_LLC, CPUID_EDX, 1, 0x0000000f, 0 },
{ X86_FEATURE_CQM_OCCUP_LLC, CPUID_EDX, 0, 0x0000000f, 1 },
{ X86_FEATURE_CQM_MBM_TOTAL, CPUID_EDX, 1, 0x0000000f, 1 },
@@ -49,6 +50,7 @@ static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_BMEC, CPUID_EBX, 3, 0x80000020, 0 },
{ X86_FEATURE_PERFMON_V2, CPUID_EAX, 0, 0x80000022, 0 },
{ X86_FEATURE_AMD_LBR_V2, CPUID_EAX, 1, 0x80000022, 0 },
+ { X86_FEATURE_AMD_LBR_PMC_FREEZE, CPUID_EAX, 2, 0x80000022, 0 },
{ 0, 0, 0, 0, 0 }
};
diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c
index aaca8d235dc2bb..d17c9b71eb4a25 100644
--- a/arch/x86/kernel/cpu/topology.c
+++ b/arch/x86/kernel/cpu/topology.c
@@ -123,7 +123,6 @@ static void topo_set_cpuids(unsigned int cpu, u32 apic_id, u32 acpi_id)
early_per_cpu(x86_cpu_to_apicid, cpu) = apic_id;
early_per_cpu(x86_cpu_to_acpiid, cpu) = acpi_id;
#endif
- set_cpu_possible(cpu, true);
set_cpu_present(cpu, true);
}
@@ -210,7 +209,11 @@ static __init void topo_register_apic(u32 apic_id, u32 acpi_id, bool present)
topo_info.nr_disabled_cpus++;
}
- /* Register present and possible CPUs in the domain maps */
+ /*
+ * Register present and possible CPUs in the domain
+ * maps. cpu_possible_map will be updated in
+ * topology_init_possible_cpus() after enumeration is done.
+ */
for (dom = TOPO_SMT_DOMAIN; dom < TOPO_MAX_DOMAIN; dom++)
set_bit(topo_apicid(apic_id, dom), apic_maps[dom].map);
}
diff --git a/arch/x86/kernel/cpu/topology_amd.c b/arch/x86/kernel/cpu/topology_amd.c
index 1a8b3ad493afef..a7aa6eff4ae5ba 100644
--- a/arch/x86/kernel/cpu/topology_amd.c
+++ b/arch/x86/kernel/cpu/topology_amd.c
@@ -29,11 +29,21 @@ static bool parse_8000_0008(struct topo_scan *tscan)
if (!sft)
sft = get_count_order(ecx.cpu_nthreads + 1);
- topology_set_dom(tscan, TOPO_SMT_DOMAIN, sft, ecx.cpu_nthreads + 1);
+ /*
+ * cpu_nthreads describes the number of threads in the package
+ * sft is the number of APIC ID bits per package
+ *
+ * As the number of actual threads per core is not described in
+ * this leaf, just set the CORE domain shift and let the later
+ * parsers set SMT shift. Assume one thread per core by default
+ * which is correct if there are no other CPUID leafs to parse.
+ */
+ topology_update_dom(tscan, TOPO_SMT_DOMAIN, 0, 1);
+ topology_set_dom(tscan, TOPO_CORE_DOMAIN, sft, ecx.cpu_nthreads + 1);
return true;
}
-static void store_node(struct topo_scan *tscan, unsigned int nr_nodes, u16 node_id)
+static void store_node(struct topo_scan *tscan, u16 nr_nodes, u16 node_id)
{
/*
* Starting with Fam 17h the DIE domain could probably be used to
@@ -73,12 +83,14 @@ static bool parse_8000_001e(struct topo_scan *tscan, bool has_0xb)
tscan->c->topo.initial_apicid = leaf.ext_apic_id;
/*
- * If leaf 0xb is available, then SMT shift is set already. If not
- * take it from ecx.threads_per_core and use topo_update_dom() -
- * topology_set_dom() would propagate and overwrite the already
- * propagated CORE level.
+ * If leaf 0xb is available, then the domain shifts are set
+ * already and nothing to do here.
*/
if (!has_0xb) {
+ /*
+ * Leaf 0x80000008 set the CORE domain shift already.
+ * Update the SMT domain, but do not propagate it.
+ */
unsigned int nthreads = leaf.core_nthreads + 1;
topology_update_dom(tscan, TOPO_SMT_DOMAIN, get_count_order(nthreads), nthreads);
@@ -109,13 +121,13 @@ static bool parse_8000_001e(struct topo_scan *tscan, bool has_0xb)
static bool parse_fam10h_node_id(struct topo_scan *tscan)
{
- struct {
- union {
+ union {
+ struct {
u64 node_id : 3,
nodes_per_pkg : 3,
unused : 58;
- u64 msr;
};
+ u64 msr;
} nid;
if (!boot_cpu_has(X86_FEATURE_NODEID_MSR))
@@ -135,6 +147,26 @@ static void legacy_set_llc(struct topo_scan *tscan)
tscan->c->topo.llc_id = apicid >> tscan->dom_shifts[TOPO_CORE_DOMAIN];
}
+static void topoext_fixup(struct topo_scan *tscan)
+{
+ struct cpuinfo_x86 *c = tscan->c;
+ u64 msrval;
+
+ /* Try to re-enable TopologyExtensions if switched off by BIOS */
+ if (cpu_has(c, X86_FEATURE_TOPOEXT) || c->x86_vendor != X86_VENDOR_AMD ||
+ c->x86 != 0x15 || c->x86_model < 0x10 || c->x86_model > 0x6f)
+ return;
+
+ if (msr_set_bit(0xc0011005, 54) <= 0)
+ return;
+
+ rdmsrl(0xc0011005, msrval);
+ if (msrval & BIT_64(54)) {
+ set_cpu_cap(c, X86_FEATURE_TOPOEXT);
+ pr_info_once(FW_INFO "CPU: Re-enabling disabled Topology Extensions Support.\n");
+ }
+}
+
static void parse_topology_amd(struct topo_scan *tscan)
{
bool has_0xb = false;
@@ -164,6 +196,7 @@ static void parse_topology_amd(struct topo_scan *tscan)
void cpu_parse_topology_amd(struct topo_scan *tscan)
{
tscan->amd_nodes_per_pkg = 1;
+ topoext_fixup(tscan);
parse_topology_amd(tscan);
if (tscan->amd_nodes_per_pkg > 1)
diff --git a/arch/x86/kernel/eisa.c b/arch/x86/kernel/eisa.c
index e963344b044902..53935b4d62e305 100644
--- a/arch/x86/kernel/eisa.c
+++ b/arch/x86/kernel/eisa.c
@@ -2,6 +2,7 @@
/*
* EISA specific code
*/
+#include <linux/cc_platform.h>
#include <linux/ioport.h>
#include <linux/eisa.h>
#include <linux/io.h>
@@ -12,7 +13,7 @@ static __init int eisa_bus_probe(void)
{
void __iomem *p;
- if (xen_pv_domain() && !xen_initial_domain())
+ if ((xen_pv_domain() && !xen_initial_domain()) || cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
return 0;
p = ioremap(0x0FFFD9, 4);
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 4cadfd606e8e6a..7f0732bc0ccd23 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -65,6 +65,7 @@ static int __init parse_no_stealacc(char *arg)
early_param("no-steal-acc", parse_no_stealacc);
+static DEFINE_PER_CPU_READ_MOSTLY(bool, async_pf_enabled);
static DEFINE_PER_CPU_DECRYPTED(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64);
DEFINE_PER_CPU_DECRYPTED(struct kvm_steal_time, steal_time) __aligned(64) __visible;
static int has_steal_clock = 0;
@@ -244,7 +245,7 @@ noinstr u32 kvm_read_and_reset_apf_flags(void)
{
u32 flags = 0;
- if (__this_cpu_read(apf_reason.enabled)) {
+ if (__this_cpu_read(async_pf_enabled)) {
flags = __this_cpu_read(apf_reason.flags);
__this_cpu_write(apf_reason.flags, 0);
}
@@ -295,7 +296,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_kvm_asyncpf_interrupt)
inc_irq_stat(irq_hv_callback_count);
- if (__this_cpu_read(apf_reason.enabled)) {
+ if (__this_cpu_read(async_pf_enabled)) {
token = __this_cpu_read(apf_reason.token);
kvm_async_pf_task_wake(token);
__this_cpu_write(apf_reason.token, 0);
@@ -362,7 +363,7 @@ static void kvm_guest_cpu_init(void)
wrmsrl(MSR_KVM_ASYNC_PF_INT, HYPERVISOR_CALLBACK_VECTOR);
wrmsrl(MSR_KVM_ASYNC_PF_EN, pa);
- __this_cpu_write(apf_reason.enabled, 1);
+ __this_cpu_write(async_pf_enabled, true);
pr_debug("setup async PF for cpu %d\n", smp_processor_id());
}
@@ -383,11 +384,11 @@ static void kvm_guest_cpu_init(void)
static void kvm_pv_disable_apf(void)
{
- if (!__this_cpu_read(apf_reason.enabled))
+ if (!__this_cpu_read(async_pf_enabled))
return;
wrmsrl(MSR_KVM_ASYNC_PF_EN, 0);
- __this_cpu_write(apf_reason.enabled, 0);
+ __this_cpu_write(async_pf_enabled, false);
pr_debug("disable async PF for cpu %d\n", smp_processor_id());
}
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 9a5b372c706fcc..ed163c8c8604e3 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -580,7 +580,7 @@ EXPORT_SYMBOL_GPL(asm_exc_nmi_kvm_vmx);
static char *nmi_check_stall_msg[] = {
/* */
-/* +--------- nsp->idt_seq_snap & 0x1: CPU is in NMI handler. */
+/* +--------- nmi_seq & 0x1: CPU is currently in NMI handler. */
/* | +------ cpu_is_offline(cpu) */
/* | | +--- nsp->idt_calls_snap != atomic_long_read(&nsp->idt_calls): */
/* | | | NMI handler has been invoked. */
@@ -628,22 +628,26 @@ void nmi_backtrace_stall_check(const struct cpumask *btp)
nmi_seq = READ_ONCE(nsp->idt_nmi_seq);
if (nsp->idt_nmi_seq_snap + 1 == nmi_seq && (nmi_seq & 0x1)) {
msgp = "CPU entered NMI handler function, but has not exited";
- } else if ((nsp->idt_nmi_seq_snap & 0x1) != (nmi_seq & 0x1)) {
- msgp = "CPU is handling NMIs";
- } else {
- idx = ((nsp->idt_seq_snap & 0x1) << 2) |
+ } else if (nsp->idt_nmi_seq_snap == nmi_seq ||
+ nsp->idt_nmi_seq_snap + 1 == nmi_seq) {
+ idx = ((nmi_seq & 0x1) << 2) |
(cpu_is_offline(cpu) << 1) |
(nsp->idt_calls_snap != atomic_long_read(&nsp->idt_calls));
msgp = nmi_check_stall_msg[idx];
if (nsp->idt_ignored_snap != READ_ONCE(nsp->idt_ignored) && (idx & 0x1))
modp = ", but OK because ignore_nmis was set";
- if (nmi_seq & 0x1)
- msghp = " (CPU currently in NMI handler function)";
- else if (nsp->idt_nmi_seq_snap + 1 == nmi_seq)
+ if (nsp->idt_nmi_seq_snap + 1 == nmi_seq)
msghp = " (CPU exited one NMI handler function)";
+ else if (nmi_seq & 0x1)
+ msghp = " (CPU currently in NMI handler function)";
+ else
+ msghp = " (CPU was never in an NMI handler function)";
+ } else {
+ msgp = "CPU is handling NMIs";
}
- pr_alert("%s: CPU %d: %s%s%s, last activity: %lu jiffies ago.\n",
- __func__, cpu, msgp, modp, msghp, j - READ_ONCE(nsp->recv_jiffies));
+ pr_alert("%s: CPU %d: %s%s%s\n", __func__, cpu, msgp, modp, msghp);
+ pr_alert("%s: last activity: %lu jiffies ago.\n",
+ __func__, j - READ_ONCE(nsp->recv_jiffies));
}
}
diff --git a/arch/x86/kernel/probe_roms.c b/arch/x86/kernel/probe_roms.c
index 319fef37d9dce4..cc2c34ba7228ac 100644
--- a/arch/x86/kernel/probe_roms.c
+++ b/arch/x86/kernel/probe_roms.c
@@ -203,16 +203,6 @@ void __init probe_roms(void)
unsigned char c;
int i;
- /*
- * The ROM memory range is not part of the e820 table and is therefore not
- * pre-validated by BIOS. The kernel page table maps the ROM region as encrypted
- * memory, and SNP requires encrypted memory to be validated before access.
- * Do that here.
- */
- snp_prep_memory(video_rom_resource.start,
- ((system_rom_resource.end + 1) - video_rom_resource.start),
- SNP_PAGE_STATE_PRIVATE);
-
/* video rom */
upper = adapter_rom_resources[0].start;
for (start = video_rom_resource.start; start < upper; start += 2048) {
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index ef206500ed6f22..e125e059e2c45d 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -9,7 +9,6 @@
#include <linux/console.h>
#include <linux/crash_dump.h>
#include <linux/dma-map-ops.h>
-#include <linux/dmi.h>
#include <linux/efi.h>
#include <linux/ima.h>
#include <linux/init_ohci1394_dma.h>
@@ -36,6 +35,7 @@
#include <asm/bios_ebda.h>
#include <asm/bugs.h>
#include <asm/cacheinfo.h>
+#include <asm/coco.h>
#include <asm/cpu.h>
#include <asm/efi.h>
#include <asm/gart.h>
@@ -902,7 +902,7 @@ void __init setup_arch(char **cmdline_p)
efi_init();
reserve_ibft_region();
- dmi_setup();
+ x86_init.resources.dmi_setup();
/*
* VMware detection requires dmi to be available, so this
@@ -992,6 +992,7 @@ void __init setup_arch(char **cmdline_p)
* memory size.
*/
mem_encrypt_setup_arch();
+ cc_random_init();
efi_fake_memmap();
efi_find_mirror();
diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
index b59b09c2f28406..38ad066179d81f 100644
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -23,6 +23,7 @@
#include <linux/platform_device.h>
#include <linux/io.h>
#include <linux/psp-sev.h>
+#include <linux/dmi.h>
#include <uapi/linux/sev-guest.h>
#include <asm/init.h>
@@ -795,21 +796,6 @@ void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr
early_set_pages_state(vaddr, paddr, npages, SNP_PAGE_STATE_SHARED);
}
-void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op)
-{
- unsigned long vaddr, npages;
-
- vaddr = (unsigned long)__va(paddr);
- npages = PAGE_ALIGN(sz) >> PAGE_SHIFT;
-
- if (op == SNP_PAGE_STATE_PRIVATE)
- early_snp_set_memory_private(vaddr, paddr, npages);
- else if (op == SNP_PAGE_STATE_SHARED)
- early_snp_set_memory_shared(vaddr, paddr, npages);
- else
- WARN(1, "invalid memory op %d\n", op);
-}
-
static unsigned long __set_pages_state(struct snp_psc_desc *data, unsigned long vaddr,
unsigned long vaddr_end, int op)
{
@@ -2136,6 +2122,17 @@ void __head __noreturn snp_abort(void)
sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED);
}
+/*
+ * SEV-SNP guests should only execute dmi_setup() if EFI_CONFIG_TABLES are
+ * enabled, as the alternative (fallback) logic for DMI probing in the legacy
+ * ROM region can cause a crash since this region is not pre-validated.
+ */
+void __init snp_dmi_setup(void)
+{
+ if (efi_enabled(EFI_CONFIG_TABLES))
+ dmi_setup();
+}
+
static void dump_cpuid_table(void)
{
const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
@@ -2287,16 +2284,6 @@ static int __init snp_init_platform_device(void)
}
device_initcall(snp_init_platform_device);
-void kdump_sev_callback(void)
-{
- /*
- * Do wbinvd() on remote CPUs when SNP is enabled in order to
- * safely do SNP_SHUTDOWN on the local CPU.
- */
- if (cpu_feature_enabled(X86_FEATURE_SEV_SNP))
- wbinvd();
-}
-
void sev_show_status(void)
{
int i;
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index a42830dc151bc4..d5dc5a92635a82 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -3,6 +3,7 @@
*
* For licencing details see kernel-base/COPYING
*/
+#include <linux/dmi.h>
#include <linux/init.h>
#include <linux/ioport.h>
#include <linux/export.h>
@@ -66,6 +67,7 @@ struct x86_init_ops x86_init __initdata = {
.probe_roms = probe_roms,
.reserve_resources = reserve_standard_io_resources,
.memory_setup = e820__memory_setup_default,
+ .dmi_setup = dmi_setup,
},
.mpparse = {
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 3aaf7e86a859a2..0ebdd088f28b85 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -122,6 +122,7 @@ config KVM_AMD_SEV
default y
depends on KVM_AMD && X86_64
depends on CRYPTO_DEV_SP_PSP && !(KVM_AMD=y && CRYPTO_DEV_CCP_DD=m)
+ select ARCH_HAS_CC_PLATFORM
help
Provides support for launching Encrypted VMs (SEV) and Encrypted VMs
with Encrypted State (SEV-ES) on AMD processors.
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index adba49afb5fe63..bfc0bfcb2bc60d 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -189,15 +189,15 @@ static int kvm_cpuid_check_equal(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2
return 0;
}
-static struct kvm_hypervisor_cpuid kvm_get_hypervisor_cpuid(struct kvm_vcpu *vcpu,
- const char *sig)
+static struct kvm_hypervisor_cpuid __kvm_get_hypervisor_cpuid(struct kvm_cpuid_entry2 *entries,
+ int nent, const char *sig)
{
struct kvm_hypervisor_cpuid cpuid = {};
struct kvm_cpuid_entry2 *entry;
u32 base;
for_each_possible_hypervisor_cpuid_base(base) {
- entry = kvm_find_cpuid_entry(vcpu, base);
+ entry = cpuid_entry2_find(entries, nent, base, KVM_CPUID_INDEX_NOT_SIGNIFICANT);
if (entry) {
u32 signature[3];
@@ -217,22 +217,29 @@ static struct kvm_hypervisor_cpuid kvm_get_hypervisor_cpuid(struct kvm_vcpu *vcp
return cpuid;
}
-static struct kvm_cpuid_entry2 *__kvm_find_kvm_cpuid_features(struct kvm_vcpu *vcpu,
- struct kvm_cpuid_entry2 *entries, int nent)
+static struct kvm_hypervisor_cpuid kvm_get_hypervisor_cpuid(struct kvm_vcpu *vcpu,
+ const char *sig)
{
- u32 base = vcpu->arch.kvm_cpuid.base;
-
- if (!base)
- return NULL;
+ return __kvm_get_hypervisor_cpuid(vcpu->arch.cpuid_entries,
+ vcpu->arch.cpuid_nent, sig);
+}
- return cpuid_entry2_find(entries, nent, base | KVM_CPUID_FEATURES,
+static struct kvm_cpuid_entry2 *__kvm_find_kvm_cpuid_features(struct kvm_cpuid_entry2 *entries,
+ int nent, u32 kvm_cpuid_base)
+{
+ return cpuid_entry2_find(entries, nent, kvm_cpuid_base | KVM_CPUID_FEATURES,
KVM_CPUID_INDEX_NOT_SIGNIFICANT);
}
static struct kvm_cpuid_entry2 *kvm_find_kvm_cpuid_features(struct kvm_vcpu *vcpu)
{
- return __kvm_find_kvm_cpuid_features(vcpu, vcpu->arch.cpuid_entries,
- vcpu->arch.cpuid_nent);
+ u32 base = vcpu->arch.kvm_cpuid.base;
+
+ if (!base)
+ return NULL;
+
+ return __kvm_find_kvm_cpuid_features(vcpu->arch.cpuid_entries,
+ vcpu->arch.cpuid_nent, base);
}
void kvm_update_pv_runtime(struct kvm_vcpu *vcpu)
@@ -266,6 +273,7 @@ static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_e
int nent)
{
struct kvm_cpuid_entry2 *best;
+ struct kvm_hypervisor_cpuid kvm_cpuid;
best = cpuid_entry2_find(entries, nent, 1, KVM_CPUID_INDEX_NOT_SIGNIFICANT);
if (best) {
@@ -292,10 +300,12 @@ static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_e
cpuid_entry_has(best, X86_FEATURE_XSAVEC)))
best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
- best = __kvm_find_kvm_cpuid_features(vcpu, entries, nent);
- if (kvm_hlt_in_guest(vcpu->kvm) && best &&
- (best->eax & (1 << KVM_FEATURE_PV_UNHALT)))
- best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT);
+ kvm_cpuid = __kvm_get_hypervisor_cpuid(entries, nent, KVM_SIGNATURE);
+ if (kvm_cpuid.base) {
+ best = __kvm_find_kvm_cpuid_features(entries, nent, kvm_cpuid.base);
+ if (kvm_hlt_in_guest(vcpu->kvm) && best)
+ best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT);
+ }
if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT)) {
best = cpuid_entry2_find(entries, nent, 0x1, KVM_CPUID_INDEX_NOT_SIGNIFICANT);
diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h
index aadefcaa9561d0..2f4e155080badc 100644
--- a/arch/x86/kvm/reverse_cpuid.h
+++ b/arch/x86/kvm/reverse_cpuid.h
@@ -52,7 +52,7 @@ enum kvm_only_cpuid_leafs {
#define X86_FEATURE_IPRED_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 1)
#define KVM_X86_FEATURE_RRSBA_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 2)
#define X86_FEATURE_DDPD_U KVM_X86_FEATURE(CPUID_7_2_EDX, 3)
-#define X86_FEATURE_BHI_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 4)
+#define KVM_X86_FEATURE_BHI_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 4)
#define X86_FEATURE_MCDT_NO KVM_X86_FEATURE(CPUID_7_2_EDX, 5)
/* CPUID level 0x80000007 (EDX). */
@@ -102,10 +102,12 @@ static const struct cpuid_reg reverse_cpuid[] = {
*/
static __always_inline void reverse_cpuid_check(unsigned int x86_leaf)
{
+ BUILD_BUG_ON(NR_CPUID_WORDS != NCAPINTS);
BUILD_BUG_ON(x86_leaf == CPUID_LNX_1);
BUILD_BUG_ON(x86_leaf == CPUID_LNX_2);
BUILD_BUG_ON(x86_leaf == CPUID_LNX_3);
BUILD_BUG_ON(x86_leaf == CPUID_LNX_4);
+ BUILD_BUG_ON(x86_leaf == CPUID_LNX_5);
BUILD_BUG_ON(x86_leaf >= ARRAY_SIZE(reverse_cpuid));
BUILD_BUG_ON(reverse_cpuid[x86_leaf].function == 0);
}
@@ -126,6 +128,7 @@ static __always_inline u32 __feature_translate(int x86_feature)
KVM_X86_TRANSLATE_FEATURE(CONSTANT_TSC);
KVM_X86_TRANSLATE_FEATURE(PERFMON_V2);
KVM_X86_TRANSLATE_FEATURE(RRSBA_CTRL);
+ KVM_X86_TRANSLATE_FEATURE(BHI_CTRL);
default:
return x86_feature;
}
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index ae0ac12382b927..61a7531d41b019 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -84,9 +84,10 @@ struct enc_region {
};
/* Called with the sev_bitmap_lock held, or on shutdown */
-static int sev_flush_asids(int min_asid, int max_asid)
+static int sev_flush_asids(unsigned int min_asid, unsigned int max_asid)
{
- int ret, asid, error = 0;
+ int ret, error = 0;
+ unsigned int asid;
/* Check if there are any ASIDs to reclaim before performing a flush */
asid = find_next_bit(sev_reclaim_asid_bitmap, nr_asids, min_asid);
@@ -116,7 +117,7 @@ static inline bool is_mirroring_enc_context(struct kvm *kvm)
}
/* Must be called with the sev_bitmap_lock held */
-static bool __sev_recycle_asids(int min_asid, int max_asid)
+static bool __sev_recycle_asids(unsigned int min_asid, unsigned int max_asid)
{
if (sev_flush_asids(min_asid, max_asid))
return false;
@@ -143,8 +144,20 @@ static void sev_misc_cg_uncharge(struct kvm_sev_info *sev)
static int sev_asid_new(struct kvm_sev_info *sev)
{
- int asid, min_asid, max_asid, ret;
+ /*
+ * SEV-enabled guests must use asid from min_sev_asid to max_sev_asid.
+ * SEV-ES-enabled guest can use from 1 to min_sev_asid - 1.
+ * Note: min ASID can end up larger than the max if basic SEV support is
+ * effectively disabled by disallowing use of ASIDs for SEV guests.
+ */
+ unsigned int min_asid = sev->es_active ? 1 : min_sev_asid;
+ unsigned int max_asid = sev->es_active ? min_sev_asid - 1 : max_sev_asid;
+ unsigned int asid;
bool retry = true;
+ int ret;
+
+ if (min_asid > max_asid)
+ return -ENOTTY;
WARN_ON(sev->misc_cg);
sev->misc_cg = get_current_misc_cg();
@@ -157,12 +170,6 @@ static int sev_asid_new(struct kvm_sev_info *sev)
mutex_lock(&sev_bitmap_lock);
- /*
- * SEV-enabled guests must use asid from min_sev_asid to max_sev_asid.
- * SEV-ES-enabled guest can use from 1 to min_sev_asid - 1.
- */
- min_asid = sev->es_active ? 1 : min_sev_asid;
- max_asid = sev->es_active ? min_sev_asid - 1 : max_sev_asid;
again:
asid = find_next_zero_bit(sev_asid_bitmap, max_asid + 1, min_asid);
if (asid > max_asid) {
@@ -179,7 +186,8 @@ again:
mutex_unlock(&sev_bitmap_lock);
- return asid;
+ sev->asid = asid;
+ return 0;
e_uncharge:
sev_misc_cg_uncharge(sev);
put_misc_cg(sev->misc_cg);
@@ -187,7 +195,7 @@ e_uncharge:
return ret;
}
-static int sev_get_asid(struct kvm *kvm)
+static unsigned int sev_get_asid(struct kvm *kvm)
{
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
@@ -247,21 +255,19 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct sev_platform_init_args init_args = {0};
- int asid, ret;
+ int ret;
if (kvm->created_vcpus)
return -EINVAL;
- ret = -EBUSY;
if (unlikely(sev->active))
- return ret;
+ return -EINVAL;
sev->active = true;
sev->es_active = argp->id == KVM_SEV_ES_INIT;
- asid = sev_asid_new(sev);
- if (asid < 0)
+ ret = sev_asid_new(sev);
+ if (ret)
goto e_no_asid;
- sev->asid = asid;
init_args.probe = false;
ret = sev_platform_init(&init_args);
@@ -287,8 +293,8 @@ e_no_asid:
static int sev_bind_asid(struct kvm *kvm, unsigned int handle, int *error)
{
+ unsigned int asid = sev_get_asid(kvm);
struct sev_data_activate activate;
- int asid = sev_get_asid(kvm);
int ret;
/* activate ASID on the given handle */
@@ -2240,8 +2246,10 @@ void __init sev_hardware_setup(void)
goto out;
}
- sev_asid_count = max_sev_asid - min_sev_asid + 1;
- WARN_ON_ONCE(misc_cg_set_capacity(MISC_CG_RES_SEV, sev_asid_count));
+ if (min_sev_asid <= max_sev_asid) {
+ sev_asid_count = max_sev_asid - min_sev_asid + 1;
+ WARN_ON_ONCE(misc_cg_set_capacity(MISC_CG_RES_SEV, sev_asid_count));
+ }
sev_supported = true;
/* SEV-ES support requested? */
@@ -2272,7 +2280,9 @@ void __init sev_hardware_setup(void)
out:
if (boot_cpu_has(X86_FEATURE_SEV))
pr_info("SEV %s (ASIDs %u - %u)\n",
- sev_supported ? "enabled" : "disabled",
+ sev_supported ? min_sev_asid <= max_sev_asid ? "enabled" :
+ "unusable" :
+ "disabled",
min_sev_asid, max_sev_asid);
if (boot_cpu_has(X86_FEATURE_SEV_ES))
pr_info("SEV-ES %s (ASIDs %u - %u)\n",
@@ -2320,7 +2330,7 @@ int sev_cpu_init(struct svm_cpu_data *sd)
*/
static void sev_flush_encrypted_page(struct kvm_vcpu *vcpu, void *va)
{
- int asid = to_kvm_svm(vcpu->kvm)->sev_info.asid;
+ unsigned int asid = sev_get_asid(vcpu->kvm);
/*
* Note! The address must be a kernel address, as regular page walk
@@ -2638,7 +2648,7 @@ void sev_es_unmap_ghcb(struct vcpu_svm *svm)
void pre_sev_run(struct vcpu_svm *svm, int cpu)
{
struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, cpu);
- int asid = sev_get_asid(svm->vcpu.kvm);
+ unsigned int asid = sev_get_asid(svm->vcpu.kvm);
/* Assign the asid allocated with this SEV guest */
svm->asid = asid;
@@ -3174,7 +3184,7 @@ struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu)
unsigned long pfn;
struct page *p;
- if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+ if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
return alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
/*
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 88659de4d2a714..c6b4b1728006d5 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -735,13 +735,13 @@ TRACE_EVENT(kvm_nested_intr_vmexit,
* Tracepoint for nested #vmexit because of interrupt pending
*/
TRACE_EVENT(kvm_invlpga,
- TP_PROTO(__u64 rip, int asid, u64 address),
+ TP_PROTO(__u64 rip, unsigned int asid, u64 address),
TP_ARGS(rip, asid, address),
TP_STRUCT__entry(
- __field( __u64, rip )
- __field( int, asid )
- __field( __u64, address )
+ __field( __u64, rip )
+ __field( unsigned int, asid )
+ __field( __u64, address )
),
TP_fast_assign(
@@ -750,7 +750,7 @@ TRACE_EVENT(kvm_invlpga,
__entry->address = address;
),
- TP_printk("rip: 0x%016llx asid: %d address: 0x%016llx",
+ TP_printk("rip: 0x%016llx asid: %u address: 0x%016llx",
__entry->rip, __entry->asid, __entry->address)
);
diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
index 2bfbf758d06110..f6986dee6f8c7c 100644
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -275,6 +275,8 @@ SYM_INNER_LABEL_ALIGN(vmx_vmexit, SYM_L_GLOBAL)
call vmx_spec_ctrl_restore_host
+ CLEAR_BRANCH_HISTORY_VMEXIT
+
/* Put return value in AX */
mov %_ASM_BX, %_ASM_AX
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 47d9f03b777837..984ea2089efc31 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1621,7 +1621,7 @@ static bool kvm_is_immutable_feature_msr(u32 msr)
ARCH_CAP_PSCHANGE_MC_NO | ARCH_CAP_TSX_CTRL_MSR | ARCH_CAP_TAA_NO | \
ARCH_CAP_SBDR_SSDP_NO | ARCH_CAP_FBSDP_NO | ARCH_CAP_PSDP_NO | \
ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO | ARCH_CAP_GDS_NO | \
- ARCH_CAP_RFDS_NO | ARCH_CAP_RFDS_CLEAR)
+ ARCH_CAP_RFDS_NO | ARCH_CAP_RFDS_CLEAR | ARCH_CAP_BHI_NO)
static u64 kvm_get_arch_capabilities(void)
{
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
index 721b528da9acee..e674ccf720b9f6 100644
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -163,6 +163,7 @@ SYM_CODE_START_NOALIGN(srso_alias_untrain_ret)
lfence
jmp srso_alias_return_thunk
SYM_FUNC_END(srso_alias_untrain_ret)
+__EXPORT_THUNK(srso_alias_untrain_ret)
.popsection
.pushsection .text..__x86.rethunk_safe
@@ -224,10 +225,16 @@ SYM_CODE_START(srso_return_thunk)
SYM_CODE_END(srso_return_thunk)
#define JMP_SRSO_UNTRAIN_RET "jmp srso_untrain_ret"
-#define JMP_SRSO_ALIAS_UNTRAIN_RET "jmp srso_alias_untrain_ret"
#else /* !CONFIG_MITIGATION_SRSO */
+/* Dummy for the alternative in CALL_UNTRAIN_RET. */
+SYM_CODE_START(srso_alias_untrain_ret)
+ ANNOTATE_UNRET_SAFE
+ ANNOTATE_NOENDBR
+ ret
+ int3
+SYM_FUNC_END(srso_alias_untrain_ret)
+__EXPORT_THUNK(srso_alias_untrain_ret)
#define JMP_SRSO_UNTRAIN_RET "ud2"
-#define JMP_SRSO_ALIAS_UNTRAIN_RET "ud2"
#endif /* CONFIG_MITIGATION_SRSO */
#ifdef CONFIG_MITIGATION_UNRET_ENTRY
@@ -319,9 +326,7 @@ SYM_FUNC_END(retbleed_untrain_ret)
#if defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_MITIGATION_SRSO)
SYM_FUNC_START(entry_untrain_ret)
- ALTERNATIVE_2 JMP_RETBLEED_UNTRAIN_RET, \
- JMP_SRSO_UNTRAIN_RET, X86_FEATURE_SRSO, \
- JMP_SRSO_ALIAS_UNTRAIN_RET, X86_FEATURE_SRSO_ALIAS
+ ALTERNATIVE JMP_RETBLEED_UNTRAIN_RET, JMP_SRSO_UNTRAIN_RET, X86_FEATURE_SRSO
SYM_FUNC_END(entry_untrain_ret)
__EXPORT_THUNK(entry_untrain_ret)
diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c
index a204a332c71fc5..968d7005f4a724 100644
--- a/arch/x86/mm/ident_map.c
+++ b/arch/x86/mm/ident_map.c
@@ -26,31 +26,18 @@ static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page,
for (; addr < end; addr = next) {
pud_t *pud = pud_page + pud_index(addr);
pmd_t *pmd;
- bool use_gbpage;
next = (addr & PUD_MASK) + PUD_SIZE;
if (next > end)
next = end;
- /* if this is already a gbpage, this portion is already mapped */
- if (pud_leaf(*pud))
- continue;
-
- /* Is using a gbpage allowed? */
- use_gbpage = info->direct_gbpages;
-
- /* Don't use gbpage if it maps more than the requested region. */
- /* at the begining: */
- use_gbpage &= ((addr & ~PUD_MASK) == 0);
- /* ... or at the end: */
- use_gbpage &= ((next & ~PUD_MASK) == 0);
-
- /* Never overwrite existing mappings */
- use_gbpage &= !pud_present(*pud);
-
- if (use_gbpage) {
+ if (info->direct_gbpages) {
pud_t pudval;
+ if (pud_present(*pud))
+ continue;
+
+ addr &= PUD_MASK;
pudval = __pud((addr - info->offset) | info->page_flag);
set_pud(pud, pudval);
continue;
diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c
index 70b91de2e053ab..422602f6039b82 100644
--- a/arch/x86/mm/mem_encrypt_amd.c
+++ b/arch/x86/mm/mem_encrypt_amd.c
@@ -492,6 +492,24 @@ void __init sme_early_init(void)
*/
if (sev_status & MSR_AMD64_SEV_ENABLED)
ia32_disable();
+
+ /*
+ * Override init functions that scan the ROM region in SEV-SNP guests,
+ * as this memory is not pre-validated and would thus cause a crash.
+ */
+ if (sev_status & MSR_AMD64_SEV_SNP_ENABLED) {
+ x86_init.mpparse.find_mptable = x86_init_noop;
+ x86_init.pci.init_irq = x86_init_noop;
+ x86_init.resources.probe_roms = x86_init_noop;
+
+ /*
+ * DMI setup behavior for SEV-SNP guests depends on
+ * efi_enabled(EFI_CONFIG_TABLES), which hasn't been
+ * parsed yet. snp_dmi_setup() will run after that
+ * parsing has happened.
+ */
+ x86_init.resources.dmi_setup = snp_dmi_setup;
+ }
}
void __init mem_encrypt_free_decrypted_mem(void)
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 104544359d69cd..025fd7ea5d69f5 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -24,6 +24,7 @@
#include <linux/memblock.h>
#include <linux/init.h>
+#include <asm/pgtable_areas.h>
#include "numa_internal.h"
diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c
index 0d72183b5dd028..36b603d0cddefc 100644
--- a/arch/x86/mm/pat/memtype.c
+++ b/arch/x86/mm/pat/memtype.c
@@ -947,6 +947,38 @@ static void free_pfn_range(u64 paddr, unsigned long size)
memtype_free(paddr, paddr + size);
}
+static int get_pat_info(struct vm_area_struct *vma, resource_size_t *paddr,
+ pgprot_t *pgprot)
+{
+ unsigned long prot;
+
+ VM_WARN_ON_ONCE(!(vma->vm_flags & VM_PAT));
+
+ /*
+ * We need the starting PFN and cachemode used for track_pfn_remap()
+ * that covered the whole VMA. For most mappings, we can obtain that
+ * information from the page tables. For COW mappings, we might now
+ * suddenly have anon folios mapped and follow_phys() will fail.
+ *
+ * Fallback to using vma->vm_pgoff, see remap_pfn_range_notrack(), to
+ * detect the PFN. If we need the cachemode as well, we're out of luck
+ * for now and have to fail fork().
+ */
+ if (!follow_phys(vma, vma->vm_start, 0, &prot, paddr)) {
+ if (pgprot)
+ *pgprot = __pgprot(prot);
+ return 0;
+ }
+ if (is_cow_mapping(vma->vm_flags)) {
+ if (pgprot)
+ return -EINVAL;
+ *paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT;
+ return 0;
+ }
+ WARN_ON_ONCE(1);
+ return -EINVAL;
+}
+
/*
* track_pfn_copy is called when vma that is covering the pfnmap gets
* copied through copy_page_range().
@@ -957,20 +989,13 @@ static void free_pfn_range(u64 paddr, unsigned long size)
int track_pfn_copy(struct vm_area_struct *vma)
{
resource_size_t paddr;
- unsigned long prot;
unsigned long vma_size = vma->vm_end - vma->vm_start;
pgprot_t pgprot;
if (vma->vm_flags & VM_PAT) {
- /*
- * reserve the whole chunk covered by vma. We need the
- * starting address and protection from pte.
- */
- if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) {
- WARN_ON_ONCE(1);
+ if (get_pat_info(vma, &paddr, &pgprot))
return -EINVAL;
- }
- pgprot = __pgprot(prot);
+ /* reserve the whole chunk covered by vma. */
return reserve_pfn_range(paddr, vma_size, &pgprot, 1);
}
@@ -1045,7 +1070,6 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
unsigned long size, bool mm_wr_locked)
{
resource_size_t paddr;
- unsigned long prot;
if (vma && !(vma->vm_flags & VM_PAT))
return;
@@ -1053,11 +1077,8 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
/* free the chunk starting from pfn or the whole chunk */
paddr = (resource_size_t)pfn << PAGE_SHIFT;
if (!paddr && !size) {
- if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) {
- WARN_ON_ONCE(1);
+ if (get_pat_info(vma, &paddr, NULL))
return;
- }
-
size = vma->vm_end - vma->vm_start;
}
free_pfn_range(paddr, size);
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index a7ba8e1786452d..df5fac428408fe 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -480,7 +480,7 @@ static int emit_call(u8 **pprog, void *func, void *ip)
static int emit_rsb_call(u8 **pprog, void *func, void *ip)
{
OPTIMIZER_HIDE_VAR(func);
- x86_call_depth_emit_accounting(pprog, func);
+ ip += x86_call_depth_emit_accounting(pprog, func, ip);
return emit_patch(pprog, func, ip, 0xE8);
}
@@ -1972,20 +1972,17 @@ populate_extable:
/* call */
case BPF_JMP | BPF_CALL: {
- int offs;
+ u8 *ip = image + addrs[i - 1];
func = (u8 *) __bpf_call_base + imm32;
if (tail_call_reachable) {
RESTORE_TAIL_CALL_CNT(bpf_prog->aux->stack_depth);
- if (!imm32)
- return -EINVAL;
- offs = 7 + x86_call_depth_emit_accounting(&prog, func);
- } else {
- if (!imm32)
- return -EINVAL;
- offs = x86_call_depth_emit_accounting(&prog, func);
+ ip += 7;
}
- if (emit_call(&prog, func, image + addrs[i - 1] + offs))
+ if (!imm32)
+ return -EINVAL;
+ ip += x86_call_depth_emit_accounting(&prog, func, ip);
+ if (emit_call(&prog, func, ip))
return -EINVAL;
break;
}
@@ -2835,7 +2832,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
* Direct-call fentry stub, as such it needs accounting for the
* __fentry__ call.
*/
- x86_call_depth_emit_accounting(&prog, NULL);
+ x86_call_depth_emit_accounting(&prog, NULL, image);
}
EMIT1(0x55); /* push rbp */
EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */
diff --git a/arch/x86/virt/Makefile b/arch/x86/virt/Makefile
index 1e36502cd7383a..ea343fc392dcc8 100644
--- a/arch/x86/virt/Makefile
+++ b/arch/x86/virt/Makefile
@@ -1,2 +1,2 @@
# SPDX-License-Identifier: GPL-2.0-only
-obj-y += vmx/
+obj-y += svm/ vmx/
diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c
index cffe1157a90acf..ab0e8448bb6eb2 100644
--- a/arch/x86/virt/svm/sev.c
+++ b/arch/x86/virt/svm/sev.c
@@ -77,7 +77,7 @@ static int __mfd_enable(unsigned int cpu)
{
u64 val;
- if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+ if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
return 0;
rdmsrl(MSR_AMD64_SYSCFG, val);
@@ -98,7 +98,7 @@ static int __snp_enable(unsigned int cpu)
{
u64 val;
- if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+ if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
return 0;
rdmsrl(MSR_AMD64_SYSCFG, val);
@@ -174,11 +174,11 @@ static int __init snp_rmptable_init(void)
u64 rmptable_size;
u64 val;
- if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+ if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
return 0;
if (!amd_iommu_snp_en)
- return 0;
+ goto nosnp;
if (!probed_rmp_size)
goto nosnp;
@@ -225,7 +225,7 @@ skip_enable:
return 0;
nosnp:
- setup_clear_cpu_cap(X86_FEATURE_SEV_SNP);
+ cc_platform_clear(CC_ATTR_HOST_SEV_SNP);
return -ENOSYS;
}
@@ -246,7 +246,7 @@ static struct rmpentry *__snp_lookup_rmpentry(u64 pfn, int *level)
{
struct rmpentry *large_entry, *entry;
- if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+ if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
return ERR_PTR(-ENODEV);
entry = get_rmpentry(pfn);
@@ -363,7 +363,7 @@ int psmash(u64 pfn)
unsigned long paddr = pfn << PAGE_SHIFT;
int ret;
- if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+ if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
return -ENODEV;
if (!pfn_valid(pfn))
@@ -472,7 +472,7 @@ static int rmpupdate(u64 pfn, struct rmp_state *state)
unsigned long paddr = pfn << PAGE_SHIFT;
int ret, level;
- if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+ if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
return -ENODEV;
level = RMP_TO_PG_LEVEL(state->pagesize);
@@ -558,3 +558,13 @@ void snp_leak_pages(u64 pfn, unsigned int npages)
spin_unlock(&snp_leaked_pages_list_lock);
}
EXPORT_SYMBOL_GPL(snp_leak_pages);
+
+void kdump_sev_callback(void)
+{
+ /*
+ * Do wbinvd() on remote CPUs when SNP is enabled in order to
+ * safely do SNP_SHUTDOWN on the local CPU.
+ */
+ if (cc_platform_has(CC_ATTR_HOST_SEV_SNP))
+ wbinvd();
+}
diff --git a/block/bdev.c b/block/bdev.c
index 7a5f611c3d2e3e..b8e32d933a6369 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -583,9 +583,6 @@ static void bd_finish_claiming(struct block_device *bdev, void *holder,
mutex_unlock(&bdev->bd_holder_lock);
bd_clear_claiming(whole, holder);
mutex_unlock(&bdev_lock);
-
- if (hops && hops->get_holder)
- hops->get_holder(holder);
}
/**
@@ -608,7 +605,6 @@ EXPORT_SYMBOL(bd_abort_claiming);
static void bd_end_claim(struct block_device *bdev, void *holder)
{
struct block_device *whole = bdev_whole(bdev);
- const struct blk_holder_ops *hops = bdev->bd_holder_ops;
bool unblock = false;
/*
@@ -631,9 +627,6 @@ static void bd_end_claim(struct block_device *bdev, void *holder)
whole->bd_holder = NULL;
mutex_unlock(&bdev_lock);
- if (hops && hops->put_holder)
- hops->put_holder(holder);
-
/*
* If this was the last claim, remove holder link and unblock evpoll if
* it was a write holder.
@@ -776,17 +769,17 @@ void blkdev_put_no_open(struct block_device *bdev)
static bool bdev_writes_blocked(struct block_device *bdev)
{
- return bdev->bd_writers == -1;
+ return bdev->bd_writers < 0;
}
static void bdev_block_writes(struct block_device *bdev)
{
- bdev->bd_writers = -1;
+ bdev->bd_writers--;
}
static void bdev_unblock_writes(struct block_device *bdev)
{
- bdev->bd_writers = 0;
+ bdev->bd_writers++;
}
static bool bdev_may_open(struct block_device *bdev, blk_mode_t mode)
@@ -813,6 +806,11 @@ static void bdev_claim_write_access(struct block_device *bdev, blk_mode_t mode)
bdev->bd_writers++;
}
+static inline bool bdev_unclaimed(const struct file *bdev_file)
+{
+ return bdev_file->private_data == BDEV_I(bdev_file->f_mapping->host);
+}
+
static void bdev_yield_write_access(struct file *bdev_file)
{
struct block_device *bdev;
@@ -820,14 +818,15 @@ static void bdev_yield_write_access(struct file *bdev_file)
if (bdev_allow_write_mounted)
return;
+ if (bdev_unclaimed(bdev_file))
+ return;
+
bdev = file_bdev(bdev_file);
- /* Yield exclusive or shared write access. */
- if (bdev_file->f_mode & FMODE_WRITE) {
- if (bdev_writes_blocked(bdev))
- bdev_unblock_writes(bdev);
- else
- bdev->bd_writers--;
- }
+
+ if (bdev_file->f_mode & FMODE_WRITE_RESTRICTED)
+ bdev_unblock_writes(bdev);
+ else if (bdev_file->f_mode & FMODE_WRITE)
+ bdev->bd_writers--;
}
/**
@@ -907,6 +906,8 @@ int bdev_open(struct block_device *bdev, blk_mode_t mode, void *holder,
bdev_file->f_mode |= FMODE_BUF_RASYNC | FMODE_CAN_ODIRECT;
if (bdev_nowait(bdev))
bdev_file->f_mode |= FMODE_NOWAIT;
+ if (mode & BLK_OPEN_RESTRICT_WRITES)
+ bdev_file->f_mode |= FMODE_WRITE_RESTRICTED;
bdev_file->f_mapping = bdev->bd_inode->i_mapping;
bdev_file->f_wb_err = filemap_sample_wb_err(bdev_file->f_mapping);
bdev_file->private_data = holder;
@@ -1012,6 +1013,20 @@ struct file *bdev_file_open_by_path(const char *path, blk_mode_t mode,
}
EXPORT_SYMBOL(bdev_file_open_by_path);
+static inline void bd_yield_claim(struct file *bdev_file)
+{
+ struct block_device *bdev = file_bdev(bdev_file);
+ void *holder = bdev_file->private_data;
+
+ lockdep_assert_held(&bdev->bd_disk->open_mutex);
+
+ if (WARN_ON_ONCE(IS_ERR_OR_NULL(holder)))
+ return;
+
+ if (!bdev_unclaimed(bdev_file))
+ bd_end_claim(bdev, holder);
+}
+
void bdev_release(struct file *bdev_file)
{
struct block_device *bdev = file_bdev(bdev_file);
@@ -1036,7 +1051,7 @@ void bdev_release(struct file *bdev_file)
bdev_yield_write_access(bdev_file);
if (holder)
- bd_end_claim(bdev, holder);
+ bd_yield_claim(bdev_file);
/*
* Trigger event checking and tell drivers to flush MEDIA_CHANGE
@@ -1057,6 +1072,39 @@ put_no_open:
}
/**
+ * bdev_fput - yield claim to the block device and put the file
+ * @bdev_file: open block device
+ *
+ * Yield claim on the block device and put the file. Ensure that the
+ * block device can be reclaimed before the file is closed which is a
+ * deferred operation.
+ */
+void bdev_fput(struct file *bdev_file)
+{
+ if (WARN_ON_ONCE(bdev_file->f_op != &def_blk_fops))
+ return;
+
+ if (bdev_file->private_data) {
+ struct block_device *bdev = file_bdev(bdev_file);
+ struct gendisk *disk = bdev->bd_disk;
+
+ mutex_lock(&disk->open_mutex);
+ bdev_yield_write_access(bdev_file);
+ bd_yield_claim(bdev_file);
+ /*
+ * Tell release we already gave up our hold on the
+ * device and if write restrictions are available that
+ * we already gave up write access to the device.
+ */
+ bdev_file->private_data = BDEV_I(bdev_file->f_mapping->host);
+ mutex_unlock(&disk->open_mutex);
+ }
+
+ fput(bdev_file);
+}
+EXPORT_SYMBOL(bdev_fput);
+
+/**
* lookup_bdev() - Look up a struct block_device by name.
* @pathname: Name of the block device in the filesystem.
* @dev: Pointer to the block device's dev_t, if found.
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index bdbb557feb5a0e..059467086b1312 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1409,6 +1409,12 @@ static int blkcg_css_online(struct cgroup_subsys_state *css)
return 0;
}
+void blkg_init_queue(struct request_queue *q)
+{
+ INIT_LIST_HEAD(&q->blkg_list);
+ mutex_init(&q->blkcg_mutex);
+}
+
int blkcg_init_disk(struct gendisk *disk)
{
struct request_queue *q = disk->queue;
@@ -1416,9 +1422,6 @@ int blkcg_init_disk(struct gendisk *disk)
bool preloaded;
int ret;
- INIT_LIST_HEAD(&q->blkg_list);
- mutex_init(&q->blkcg_mutex);
-
new_blkg = blkg_alloc(&blkcg_root, disk, GFP_KERNEL);
if (!new_blkg)
return -ENOMEM;
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 78b74106bf10c5..90b3959d88cfa4 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -189,6 +189,7 @@ struct blkcg_policy {
extern struct blkcg blkcg_root;
extern bool blkcg_debug_stats;
+void blkg_init_queue(struct request_queue *q);
int blkcg_init_disk(struct gendisk *disk);
void blkcg_exit_disk(struct gendisk *disk);
@@ -482,6 +483,7 @@ struct blkcg {
};
static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; }
+static inline void blkg_init_queue(struct request_queue *q) { }
static inline int blkcg_init_disk(struct gendisk *disk) { return 0; }
static inline void blkcg_exit_disk(struct gendisk *disk) { }
static inline int blkcg_policy_register(struct blkcg_policy *pol) { return 0; }
diff --git a/block/blk-core.c b/block/blk-core.c
index a16b5abdbbf56f..b795ac177281ad 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -442,6 +442,8 @@ struct request_queue *blk_alloc_queue(struct queue_limits *lim, int node_id)
init_waitqueue_head(&q->mq_freeze_wq);
mutex_init(&q->mq_freeze_lock);
+ blkg_init_queue(q);
+
/*
* Init percpu_ref in atomic mode so that it's faster to shutdown.
* See blk_register_queue() for details.
@@ -1195,6 +1197,7 @@ void __blk_flush_plug(struct blk_plug *plug, bool from_schedule)
if (unlikely(!rq_list_empty(plug->cached_rq)))
blk_mq_free_plug_rqs(plug);
+ plug->cur_ktime = 0;
current->flags &= ~PF_BLOCK_TS;
}
diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index 9a85bfbbc45a01..baa20c85799d54 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -1347,7 +1347,7 @@ static bool iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now)
{
struct ioc *ioc = iocg->ioc;
struct blkcg_gq *blkg = iocg_to_blkg(iocg);
- u64 tdelta, delay, new_delay;
+ u64 tdelta, delay, new_delay, shift;
s64 vover, vover_pct;
u32 hwa;
@@ -1362,8 +1362,9 @@ static bool iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now)
/* calculate the current delay in effect - 1/2 every second */
tdelta = now->now - iocg->delay_at;
- if (iocg->delay)
- delay = iocg->delay >> div64_u64(tdelta, USEC_PER_SEC);
+ shift = div64_u64(tdelta, USEC_PER_SEC);
+ if (iocg->delay && shift < BITS_PER_LONG)
+ delay = iocg->delay >> shift;
else
delay = 0;
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 2a06fd33039da6..4e3483a16b7575 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -726,7 +726,7 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
* which can be mixed are set in each bio and mark @rq as mixed
* merged.
*/
-void blk_rq_set_mixed_merge(struct request *rq)
+static void blk_rq_set_mixed_merge(struct request *rq)
{
blk_opf_t ff = rq->cmd_flags & REQ_FAILFAST_MASK;
struct bio *bio;
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 555ada922cf060..32afb87efbd0ef 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -770,16 +770,11 @@ static void req_bio_endio(struct request *rq, struct bio *bio,
/*
* Partial zone append completions cannot be supported as the
* BIO fragments may end up not being written sequentially.
- * For such case, force the completed nbytes to be equal to
- * the BIO size so that bio_advance() sets the BIO remaining
- * size to 0 and we end up calling bio_endio() before returning.
*/
- if (bio->bi_iter.bi_size != nbytes) {
+ if (bio->bi_iter.bi_size != nbytes)
bio->bi_status = BLK_STS_IOERR;
- nbytes = bio->bi_iter.bi_size;
- } else {
+ else
bio->bi_iter.bi_sector = rq->__sector;
- }
}
bio_advance(bio, nbytes);
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 3c7d8d638ab59d..d2731843f2fccb 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -146,8 +146,7 @@ static int blk_validate_limits(struct queue_limits *lim)
max_hw_sectors = min_not_zero(lim->max_hw_sectors,
lim->max_dev_sectors);
if (lim->max_user_sectors) {
- if (lim->max_user_sectors > max_hw_sectors ||
- lim->max_user_sectors < PAGE_SIZE / SECTOR_SIZE)
+ if (lim->max_user_sectors < PAGE_SIZE / SECTOR_SIZE)
return -EINVAL;
lim->max_sectors = min(max_hw_sectors, lim->max_user_sectors);
} else {
@@ -183,17 +182,13 @@ static int blk_validate_limits(struct queue_limits *lim)
return -EINVAL;
/*
- * Devices that require a virtual boundary do not support scatter/gather
- * I/O natively, but instead require a descriptor list entry for each
- * page (which might not be identical to the Linux PAGE_SIZE). Because
- * of that they are not limited by our notion of "segment size".
+ * Stacking device may have both virtual boundary and max segment
+ * size limit, so allow this setting now, and long-term the two
+ * might need to move out of stacking limits since we have immutable
+ * bvec and lower layer bio splitting is supposed to handle the two
+ * correctly.
*/
- if (lim->virt_boundary_mask) {
- if (WARN_ON_ONCE(lim->max_segment_size &&
- lim->max_segment_size != UINT_MAX))
- return -EINVAL;
- lim->max_segment_size = UINT_MAX;
- } else {
+ if (!lim->virt_boundary_mask) {
/*
* The maximum segment size has an odd historic 64k default that
* drivers probably should override. Just like the I/O size we
diff --git a/block/blk.h b/block/blk.h
index 5cac4e29ae1744..d9f584984bc44b 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -339,7 +339,6 @@ int ll_back_merge_fn(struct request *req, struct bio *bio,
bool blk_attempt_req_merge(struct request_queue *q, struct request *rq,
struct request *next);
unsigned int blk_recalc_rq_segments(struct request *rq);
-void blk_rq_set_mixed_merge(struct request *rq);
bool blk_rq_merge_ok(struct request *rq, struct bio *bio);
enum elv_merge blk_try_merge(struct request *rq, struct bio *bio);
diff --git a/block/ioctl.c b/block/ioctl.c
index 0c76137adcaaa5..a9028a2c2db57b 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -96,7 +96,7 @@ static int blk_ioctl_discard(struct block_device *bdev, blk_mode_t mode,
unsigned long arg)
{
uint64_t range[2];
- uint64_t start, len;
+ uint64_t start, len, end;
struct inode *inode = bdev->bd_inode;
int err;
@@ -117,7 +117,8 @@ static int blk_ioctl_discard(struct block_device *bdev, blk_mode_t mode,
if (len & 511)
return -EINVAL;
- if (start + len > bdev_nr_bytes(bdev))
+ if (check_add_overflow(start, len, &end) ||
+ end > bdev_nr_bytes(bdev))
return -EINVAL;
filemap_invalidate_lock(inode->i_mapping);
diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c
index 39f6d1b98fd6a5..51d3f1a55d024c 100644
--- a/drivers/accel/ivpu/ivpu_drv.c
+++ b/drivers/accel/ivpu/ivpu_drv.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Copyright (C) 2020-2023 Intel Corporation
+ * Copyright (C) 2020-2024 Intel Corporation
*/
#include <linux/firmware.h>
@@ -131,22 +131,6 @@ static int ivpu_get_capabilities(struct ivpu_device *vdev, struct drm_ivpu_param
return 0;
}
-static int ivpu_get_core_clock_rate(struct ivpu_device *vdev, u64 *clk_rate)
-{
- int ret;
-
- ret = ivpu_rpm_get_if_active(vdev);
- if (ret < 0)
- return ret;
-
- *clk_rate = ret ? ivpu_hw_reg_pll_freq_get(vdev) : 0;
-
- if (ret)
- ivpu_rpm_put(vdev);
-
- return 0;
-}
-
static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
{
struct ivpu_file_priv *file_priv = file->driver_priv;
@@ -170,7 +154,7 @@ static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_f
args->value = vdev->platform;
break;
case DRM_IVPU_PARAM_CORE_CLOCK_RATE:
- ret = ivpu_get_core_clock_rate(vdev, &args->value);
+ args->value = ivpu_hw_ratio_to_freq(vdev, vdev->hw->pll.max_ratio);
break;
case DRM_IVPU_PARAM_NUM_CONTEXTS:
args->value = ivpu_get_context_count(vdev);
@@ -387,12 +371,15 @@ int ivpu_shutdown(struct ivpu_device *vdev)
{
int ret;
- ivpu_prepare_for_reset(vdev);
+ /* Save PCI state before powering down as it sometimes gets corrupted if NPU hangs */
+ pci_save_state(to_pci_dev(vdev->drm.dev));
ret = ivpu_hw_power_down(vdev);
if (ret)
ivpu_warn(vdev, "Failed to power down HW: %d\n", ret);
+ pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D3hot);
+
return ret;
}
@@ -530,7 +517,7 @@ static int ivpu_dev_init(struct ivpu_device *vdev)
vdev->context_xa_limit.min = IVPU_USER_CONTEXT_MIN_SSID;
vdev->context_xa_limit.max = IVPU_USER_CONTEXT_MAX_SSID;
atomic64_set(&vdev->unique_id_counter, 0);
- xa_init_flags(&vdev->context_xa, XA_FLAGS_ALLOC);
+ xa_init_flags(&vdev->context_xa, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ);
xa_init_flags(&vdev->submitted_jobs_xa, XA_FLAGS_ALLOC1);
xa_init_flags(&vdev->db_xa, XA_FLAGS_ALLOC1);
lockdep_set_class(&vdev->submitted_jobs_xa.xa_lock, &submitted_jobs_xa_lock_class_key);
@@ -560,11 +547,11 @@ static int ivpu_dev_init(struct ivpu_device *vdev)
/* Power up early so the rest of init code can access VPU registers */
ret = ivpu_hw_power_up(vdev);
if (ret)
- goto err_power_down;
+ goto err_shutdown;
ret = ivpu_mmu_global_context_init(vdev);
if (ret)
- goto err_power_down;
+ goto err_shutdown;
ret = ivpu_mmu_init(vdev);
if (ret)
@@ -601,10 +588,8 @@ err_mmu_rctx_fini:
ivpu_mmu_reserved_context_fini(vdev);
err_mmu_gctx_fini:
ivpu_mmu_global_context_fini(vdev);
-err_power_down:
- ivpu_hw_power_down(vdev);
- if (IVPU_WA(d3hot_after_power_off))
- pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D3hot);
+err_shutdown:
+ ivpu_shutdown(vdev);
err_xa_destroy:
xa_destroy(&vdev->db_xa);
xa_destroy(&vdev->submitted_jobs_xa);
@@ -628,9 +613,8 @@ static void ivpu_bo_unbind_all_user_contexts(struct ivpu_device *vdev)
static void ivpu_dev_fini(struct ivpu_device *vdev)
{
ivpu_pm_disable(vdev);
+ ivpu_prepare_for_reset(vdev);
ivpu_shutdown(vdev);
- if (IVPU_WA(d3hot_after_power_off))
- pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D3hot);
ivpu_jobs_abort_all(vdev);
ivpu_job_done_consumer_fini(vdev);
diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h
index 7be0500d9bb891..bb4374d0eaecc9 100644
--- a/drivers/accel/ivpu/ivpu_drv.h
+++ b/drivers/accel/ivpu/ivpu_drv.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * Copyright (C) 2020-2023 Intel Corporation
+ * Copyright (C) 2020-2024 Intel Corporation
*/
#ifndef __IVPU_DRV_H__
@@ -90,7 +90,6 @@
struct ivpu_wa_table {
bool punit_disabled;
bool clear_runtime_mem;
- bool d3hot_after_power_off;
bool interrupt_clear_with_0;
bool disable_clock_relinquish;
bool disable_d0i3_msg;
diff --git a/drivers/accel/ivpu/ivpu_hw.h b/drivers/accel/ivpu/ivpu_hw.h
index b2909168a0a690..094c659d2800b1 100644
--- a/drivers/accel/ivpu/ivpu_hw.h
+++ b/drivers/accel/ivpu/ivpu_hw.h
@@ -21,6 +21,7 @@ struct ivpu_hw_ops {
u32 (*profiling_freq_get)(struct ivpu_device *vdev);
void (*profiling_freq_drive)(struct ivpu_device *vdev, bool enable);
u32 (*reg_pll_freq_get)(struct ivpu_device *vdev);
+ u32 (*ratio_to_freq)(struct ivpu_device *vdev, u32 ratio);
u32 (*reg_telemetry_offset_get)(struct ivpu_device *vdev);
u32 (*reg_telemetry_size_get)(struct ivpu_device *vdev);
u32 (*reg_telemetry_enable_get)(struct ivpu_device *vdev);
@@ -130,6 +131,11 @@ static inline u32 ivpu_hw_reg_pll_freq_get(struct ivpu_device *vdev)
return vdev->hw->ops->reg_pll_freq_get(vdev);
};
+static inline u32 ivpu_hw_ratio_to_freq(struct ivpu_device *vdev, u32 ratio)
+{
+ return vdev->hw->ops->ratio_to_freq(vdev, ratio);
+}
+
static inline u32 ivpu_hw_reg_telemetry_offset_get(struct ivpu_device *vdev)
{
return vdev->hw->ops->reg_telemetry_offset_get(vdev);
diff --git a/drivers/accel/ivpu/ivpu_hw_37xx.c b/drivers/accel/ivpu/ivpu_hw_37xx.c
index 9a0c9498baba29..bd25e2d9fb0f45 100644
--- a/drivers/accel/ivpu/ivpu_hw_37xx.c
+++ b/drivers/accel/ivpu/ivpu_hw_37xx.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Copyright (C) 2020-2023 Intel Corporation
+ * Copyright (C) 2020-2024 Intel Corporation
*/
#include "ivpu_drv.h"
@@ -75,7 +75,6 @@ static void ivpu_hw_wa_init(struct ivpu_device *vdev)
{
vdev->wa.punit_disabled = false;
vdev->wa.clear_runtime_mem = false;
- vdev->wa.d3hot_after_power_off = true;
REGB_WR32(VPU_37XX_BUTTRESS_INTERRUPT_STAT, BUTTRESS_ALL_IRQ_MASK);
if (REGB_RD32(VPU_37XX_BUTTRESS_INTERRUPT_STAT) == BUTTRESS_ALL_IRQ_MASK) {
@@ -86,7 +85,6 @@ static void ivpu_hw_wa_init(struct ivpu_device *vdev)
IVPU_PRINT_WA(punit_disabled);
IVPU_PRINT_WA(clear_runtime_mem);
- IVPU_PRINT_WA(d3hot_after_power_off);
IVPU_PRINT_WA(interrupt_clear_with_0);
}
@@ -805,12 +803,12 @@ static void ivpu_hw_37xx_profiling_freq_drive(struct ivpu_device *vdev, bool ena
/* Profiling freq - is a debug feature. Unavailable on VPU 37XX. */
}
-static u32 ivpu_hw_37xx_pll_to_freq(u32 ratio, u32 config)
+static u32 ivpu_hw_37xx_ratio_to_freq(struct ivpu_device *vdev, u32 ratio)
{
u32 pll_clock = PLL_REF_CLK_FREQ * ratio;
u32 cpu_clock;
- if ((config & 0xff) == PLL_RATIO_4_3)
+ if ((vdev->hw->config & 0xff) == PLL_RATIO_4_3)
cpu_clock = pll_clock * 2 / 4;
else
cpu_clock = pll_clock * 2 / 5;
@@ -829,7 +827,7 @@ static u32 ivpu_hw_37xx_reg_pll_freq_get(struct ivpu_device *vdev)
if (!ivpu_is_silicon(vdev))
return PLL_SIMULATION_FREQ;
- return ivpu_hw_37xx_pll_to_freq(pll_curr_ratio, vdev->hw->config);
+ return ivpu_hw_37xx_ratio_to_freq(vdev, pll_curr_ratio);
}
static u32 ivpu_hw_37xx_reg_telemetry_offset_get(struct ivpu_device *vdev)
@@ -1052,6 +1050,7 @@ const struct ivpu_hw_ops ivpu_hw_37xx_ops = {
.profiling_freq_get = ivpu_hw_37xx_profiling_freq_get,
.profiling_freq_drive = ivpu_hw_37xx_profiling_freq_drive,
.reg_pll_freq_get = ivpu_hw_37xx_reg_pll_freq_get,
+ .ratio_to_freq = ivpu_hw_37xx_ratio_to_freq,
.reg_telemetry_offset_get = ivpu_hw_37xx_reg_telemetry_offset_get,
.reg_telemetry_size_get = ivpu_hw_37xx_reg_telemetry_size_get,
.reg_telemetry_enable_get = ivpu_hw_37xx_reg_telemetry_enable_get,
diff --git a/drivers/accel/ivpu/ivpu_hw_40xx.c b/drivers/accel/ivpu/ivpu_hw_40xx.c
index e4eddbf5d11c25..b0b88d4c89264a 100644
--- a/drivers/accel/ivpu/ivpu_hw_40xx.c
+++ b/drivers/accel/ivpu/ivpu_hw_40xx.c
@@ -980,6 +980,11 @@ static u32 ivpu_hw_40xx_reg_pll_freq_get(struct ivpu_device *vdev)
return PLL_RATIO_TO_FREQ(pll_curr_ratio);
}
+static u32 ivpu_hw_40xx_ratio_to_freq(struct ivpu_device *vdev, u32 ratio)
+{
+ return PLL_RATIO_TO_FREQ(ratio);
+}
+
static u32 ivpu_hw_40xx_reg_telemetry_offset_get(struct ivpu_device *vdev)
{
return REGB_RD32(VPU_40XX_BUTTRESS_VPU_TELEMETRY_OFFSET);
@@ -1230,6 +1235,7 @@ const struct ivpu_hw_ops ivpu_hw_40xx_ops = {
.profiling_freq_get = ivpu_hw_40xx_profiling_freq_get,
.profiling_freq_drive = ivpu_hw_40xx_profiling_freq_drive,
.reg_pll_freq_get = ivpu_hw_40xx_reg_pll_freq_get,
+ .ratio_to_freq = ivpu_hw_40xx_ratio_to_freq,
.reg_telemetry_offset_get = ivpu_hw_40xx_reg_telemetry_offset_get,
.reg_telemetry_size_get = ivpu_hw_40xx_reg_telemetry_size_get,
.reg_telemetry_enable_get = ivpu_hw_40xx_reg_telemetry_enable_get,
diff --git a/drivers/accel/ivpu/ivpu_ipc.c b/drivers/accel/ivpu/ivpu_ipc.c
index 04ac4b9840fbe5..56ff067f63e295 100644
--- a/drivers/accel/ivpu/ivpu_ipc.c
+++ b/drivers/accel/ivpu/ivpu_ipc.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Copyright (C) 2020-2023 Intel Corporation
+ * Copyright (C) 2020-2024 Intel Corporation
*/
#include <linux/genalloc.h>
@@ -501,7 +501,11 @@ int ivpu_ipc_init(struct ivpu_device *vdev)
spin_lock_init(&ipc->cons_lock);
INIT_LIST_HEAD(&ipc->cons_list);
INIT_LIST_HEAD(&ipc->cb_msg_list);
- drmm_mutex_init(&vdev->drm, &ipc->lock);
+ ret = drmm_mutex_init(&vdev->drm, &ipc->lock);
+ if (ret) {
+ ivpu_err(vdev, "Failed to initialize ipc->lock, ret %d\n", ret);
+ goto err_free_rx;
+ }
ivpu_ipc_reset(vdev);
return 0;
diff --git a/drivers/accel/ivpu/ivpu_mmu.c b/drivers/accel/ivpu/ivpu_mmu.c
index 91bd640655ab36..2e46b322c4505e 100644
--- a/drivers/accel/ivpu/ivpu_mmu.c
+++ b/drivers/accel/ivpu/ivpu_mmu.c
@@ -278,7 +278,7 @@ static const char *ivpu_mmu_event_to_str(u32 cmd)
case IVPU_MMU_EVT_F_VMS_FETCH:
return "Fetch of VMS caused external abort";
default:
- return "Unknown CMDQ command";
+ return "Unknown event";
}
}
@@ -286,15 +286,15 @@ static const char *ivpu_mmu_cmdq_err_to_str(u32 err)
{
switch (err) {
case IVPU_MMU_CERROR_NONE:
- return "No CMDQ Error";
+ return "No error";
case IVPU_MMU_CERROR_ILL:
return "Illegal command";
case IVPU_MMU_CERROR_ABT:
- return "External abort on CMDQ read";
+ return "External abort on command queue read";
case IVPU_MMU_CERROR_ATC_INV_SYNC:
return "Sync failed to complete ATS invalidation";
default:
- return "Unknown CMDQ Error";
+ return "Unknown error";
}
}
diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c
index 7cce1c928a7f4e..4f5ea466731ffe 100644
--- a/drivers/accel/ivpu/ivpu_pm.c
+++ b/drivers/accel/ivpu/ivpu_pm.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Copyright (C) 2020-2023 Intel Corporation
+ * Copyright (C) 2020-2024 Intel Corporation
*/
#include <linux/highmem.h>
@@ -58,14 +58,11 @@ static int ivpu_suspend(struct ivpu_device *vdev)
{
int ret;
- /* Save PCI state before powering down as it sometimes gets corrupted if NPU hangs */
- pci_save_state(to_pci_dev(vdev->drm.dev));
+ ivpu_prepare_for_reset(vdev);
ret = ivpu_shutdown(vdev);
if (ret)
- ivpu_err(vdev, "Failed to shutdown VPU: %d\n", ret);
-
- pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D3hot);
+ ivpu_err(vdev, "Failed to shutdown NPU: %d\n", ret);
return ret;
}
@@ -74,10 +71,10 @@ static int ivpu_resume(struct ivpu_device *vdev)
{
int ret;
- pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D0);
+retry:
pci_restore_state(to_pci_dev(vdev->drm.dev));
+ pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D0);
-retry:
ret = ivpu_hw_power_up(vdev);
if (ret) {
ivpu_err(vdev, "Failed to power up HW: %d\n", ret);
@@ -100,6 +97,7 @@ err_mmu_disable:
ivpu_mmu_disable(vdev);
err_power_down:
ivpu_hw_power_down(vdev);
+ pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D3hot);
if (!ivpu_fw_is_cold_boot(vdev)) {
ivpu_pm_prepare_cold_boot(vdev);
diff --git a/drivers/acpi/acpica/dbnames.c b/drivers/acpi/acpica/dbnames.c
index b91155ea9c343c..c9131259f717b0 100644
--- a/drivers/acpi/acpica/dbnames.c
+++ b/drivers/acpi/acpica/dbnames.c
@@ -550,8 +550,12 @@ acpi_db_walk_for_fields(acpi_handle obj_handle,
ACPI_FREE(buffer.pointer);
buffer.length = ACPI_ALLOCATE_LOCAL_BUFFER;
- acpi_evaluate_object(obj_handle, NULL, NULL, &buffer);
-
+ status = acpi_evaluate_object(obj_handle, NULL, NULL, &buffer);
+ if (ACPI_FAILURE(status)) {
+ acpi_os_printf("Could Not evaluate object %p\n",
+ obj_handle);
+ return (AE_OK);
+ }
/*
* Since this is a field unit, surround the output in braces
*/
diff --git a/drivers/acpi/apei/einj-core.c b/drivers/acpi/apei/einj-core.c
index 66e7f529e92fc2..01faca3a238a3a 100644
--- a/drivers/acpi/apei/einj-core.c
+++ b/drivers/acpi/apei/einj-core.c
@@ -851,7 +851,7 @@ err_put_table:
return rc;
}
-static void __exit einj_remove(struct platform_device *pdev)
+static void einj_remove(struct platform_device *pdev)
{
struct apei_exec_context ctx;
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 7c157bf926956b..d1464324de9519 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -1843,7 +1843,8 @@ static void acpi_scan_dep_init(struct acpi_device *adev)
if (dep->honor_dep)
adev->flags.honor_deps = 1;
- adev->dep_unmet++;
+ if (!dep->met)
+ adev->dep_unmet++;
}
}
}
diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c
index 302dce0b2b5044..d67881b50bca28 100644
--- a/drivers/acpi/thermal.c
+++ b/drivers/acpi/thermal.c
@@ -662,14 +662,15 @@ static int acpi_thermal_register_thermal_zone(struct acpi_thermal *tz,
{
int result;
- tz->thermal_zone = thermal_zone_device_register_with_trips("acpitz",
- trip_table,
- trip_count,
- tz,
- &acpi_thermal_zone_ops,
- NULL,
- passive_delay,
- tz->polling_frequency * 100);
+ if (trip_count)
+ tz->thermal_zone = thermal_zone_device_register_with_trips(
+ "acpitz", trip_table, trip_count, tz,
+ &acpi_thermal_zone_ops, NULL, passive_delay,
+ tz->polling_frequency * 100);
+ else
+ tz->thermal_zone = thermal_tripless_zone_device_register(
+ "acpitz", tz, &acpi_thermal_zone_ops, NULL);
+
if (IS_ERR(tz->thermal_zone))
return PTR_ERR(tz->thermal_zone);
@@ -901,11 +902,8 @@ static int acpi_thermal_add(struct acpi_device *device)
trip++;
}
- if (trip == trip_table) {
+ if (trip == trip_table)
pr_warn(FW_BUG "No valid trip points!\n");
- result = -ENODEV;
- goto free_memory;
- }
result = acpi_thermal_register_thermal_zone(tz, trip_table,
trip - trip_table,
diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 562302e2e57ce5..6548f10e61d9c7 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -666,6 +666,87 @@ static int mobile_lpm_policy = -1;
module_param(mobile_lpm_policy, int, 0644);
MODULE_PARM_DESC(mobile_lpm_policy, "Default LPM policy for mobile chipsets");
+static char *ahci_mask_port_map;
+module_param_named(mask_port_map, ahci_mask_port_map, charp, 0444);
+MODULE_PARM_DESC(mask_port_map,
+ "32-bits port map masks to ignore controllers ports. "
+ "Valid values are: "
+ "\"<mask>\" to apply the same mask to all AHCI controller "
+ "devices, and \"<pci_dev>=<mask>,<pci_dev>=<mask>,...\" to "
+ "specify different masks for the controllers specified, "
+ "where <pci_dev> is the PCI ID of an AHCI controller in the "
+ "form \"domain:bus:dev.func\"");
+
+static void ahci_apply_port_map_mask(struct device *dev,
+ struct ahci_host_priv *hpriv, char *mask_s)
+{
+ unsigned int mask;
+
+ if (kstrtouint(mask_s, 0, &mask)) {
+ dev_err(dev, "Invalid port map mask\n");
+ return;
+ }
+
+ hpriv->mask_port_map = mask;
+}
+
+static void ahci_get_port_map_mask(struct device *dev,
+ struct ahci_host_priv *hpriv)
+{
+ char *param, *end, *str, *mask_s;
+ char *name;
+
+ if (!strlen(ahci_mask_port_map))
+ return;
+
+ str = kstrdup(ahci_mask_port_map, GFP_KERNEL);
+ if (!str)
+ return;
+
+ /* Handle single mask case */
+ if (!strchr(str, '=')) {
+ ahci_apply_port_map_mask(dev, hpriv, str);
+ goto free;
+ }
+
+ /*
+ * Mask list case: parse the parameter to apply the mask only if
+ * the device name matches.
+ */
+ param = str;
+ end = param + strlen(param);
+ while (param && param < end && *param) {
+ name = param;
+ param = strchr(name, '=');
+ if (!param)
+ break;
+
+ *param = '\0';
+ param++;
+ if (param >= end)
+ break;
+
+ if (strcmp(dev_name(dev), name) != 0) {
+ param = strchr(param, ',');
+ if (param)
+ param++;
+ continue;
+ }
+
+ mask_s = param;
+ param = strchr(mask_s, ',');
+ if (param) {
+ *param = '\0';
+ param++;
+ }
+
+ ahci_apply_port_map_mask(dev, hpriv, mask_s);
+ }
+
+free:
+ kfree(str);
+}
+
static void ahci_pci_save_initial_config(struct pci_dev *pdev,
struct ahci_host_priv *hpriv)
{
@@ -688,6 +769,10 @@ static void ahci_pci_save_initial_config(struct pci_dev *pdev,
"Disabling your PATA port. Use the boot option 'ahci.marvell_enable=0' to avoid this.\n");
}
+ /* Handle port map masks passed as module parameter. */
+ if (ahci_mask_port_map)
+ ahci_get_port_map_mask(&pdev->dev, hpriv);
+
ahci_save_initial_config(&pdev->dev, hpriv);
}
diff --git a/drivers/ata/ahci_st.c b/drivers/ata/ahci_st.c
index d4a626f87963ba..79a8b0aa37bf37 100644
--- a/drivers/ata/ahci_st.c
+++ b/drivers/ata/ahci_st.c
@@ -30,7 +30,6 @@
#define ST_AHCI_OOBR_CIMAX_SHIFT 0
struct st_ahci_drv_data {
- struct platform_device *ahci;
struct reset_control *pwr;
struct reset_control *sw_rst;
struct reset_control *pwr_rst;
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index be3412cdb22e78..c449d60d9bb962 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -2539,7 +2539,7 @@ static void ata_dev_config_cdl(struct ata_device *dev)
bool cdl_enabled;
u64 val;
- if (ata_id_major_version(dev->id) < 12)
+ if (ata_id_major_version(dev->id) < 11)
goto not_supported;
if (!ata_log_supported(dev, ATA_LOG_IDENTIFY_DEVICE) ||
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index b0d6e69c4a5b2e..214b935c2ced79 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -712,8 +712,10 @@ void ata_scsi_port_error_handler(struct Scsi_Host *host, struct ata_port *ap)
ehc->saved_ncq_enabled |= 1 << devno;
/* If we are resuming, wake up the device */
- if (ap->pflags & ATA_PFLAG_RESUMING)
+ if (ap->pflags & ATA_PFLAG_RESUMING) {
+ dev->flags |= ATA_DFLAG_RESUMING;
ehc->i.dev_action[devno] |= ATA_EH_SET_ACTIVE;
+ }
}
}
@@ -3169,6 +3171,7 @@ static int ata_eh_revalidate_and_attach(struct ata_link *link,
return 0;
err:
+ dev->flags &= ~ATA_DFLAG_RESUMING;
*r_failed_dev = dev;
return rc;
}
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 0a0f483124c3a5..e954976891a9f5 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -4730,6 +4730,7 @@ void ata_scsi_dev_rescan(struct work_struct *work)
struct ata_link *link;
struct ata_device *dev;
unsigned long flags;
+ bool do_resume;
int ret = 0;
mutex_lock(&ap->scsi_scan_mutex);
@@ -4744,25 +4745,34 @@ void ata_scsi_dev_rescan(struct work_struct *work)
* bail out.
*/
if (ap->pflags & ATA_PFLAG_SUSPENDED)
- goto unlock;
+ goto unlock_ap;
if (!sdev)
continue;
if (scsi_device_get(sdev))
continue;
+ do_resume = dev->flags & ATA_DFLAG_RESUMING;
+
spin_unlock_irqrestore(ap->lock, flags);
+ if (do_resume) {
+ ret = scsi_resume_device(sdev);
+ if (ret == -EWOULDBLOCK)
+ goto unlock_scan;
+ dev->flags &= ~ATA_DFLAG_RESUMING;
+ }
ret = scsi_rescan_device(sdev);
scsi_device_put(sdev);
spin_lock_irqsave(ap->lock, flags);
if (ret)
- goto unlock;
+ goto unlock_ap;
}
}
-unlock:
+unlock_ap:
spin_unlock_irqrestore(ap->lock, flags);
+unlock_scan:
mutex_unlock(&ap->scsi_scan_mutex);
/* Reschedule with a delay if scsi_rescan_device() returned an error */
diff --git a/drivers/ata/pata_macio.c b/drivers/ata/pata_macio.c
index 4ac854f6b05777..88b2e9817f49df 100644
--- a/drivers/ata/pata_macio.c
+++ b/drivers/ata/pata_macio.c
@@ -1371,9 +1371,6 @@ static struct pci_driver pata_macio_pci_driver = {
.suspend = pata_macio_pci_suspend,
.resume = pata_macio_pci_resume,
#endif
- .driver = {
- .owner = THIS_MODULE,
- },
};
MODULE_DEVICE_TABLE(pci, pata_macio_pci_match);
diff --git a/drivers/ata/sata_gemini.c b/drivers/ata/sata_gemini.c
index 400b22ee99c33a..4c270999ba3ccd 100644
--- a/drivers/ata/sata_gemini.c
+++ b/drivers/ata/sata_gemini.c
@@ -200,7 +200,10 @@ int gemini_sata_start_bridge(struct sata_gemini *sg, unsigned int bridge)
pclk = sg->sata0_pclk;
else
pclk = sg->sata1_pclk;
- clk_enable(pclk);
+ ret = clk_enable(pclk);
+ if (ret)
+ return ret;
+
msleep(10);
/* Do not keep clocking a bridge that is not online */
diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c
index e82786c63fbd73..9bec0aee92e04c 100644
--- a/drivers/ata/sata_mv.c
+++ b/drivers/ata/sata_mv.c
@@ -787,37 +787,6 @@ static const struct ata_port_info mv_port_info[] = {
},
};
-static const struct pci_device_id mv_pci_tbl[] = {
- { PCI_VDEVICE(MARVELL, 0x5040), chip_504x },
- { PCI_VDEVICE(MARVELL, 0x5041), chip_504x },
- { PCI_VDEVICE(MARVELL, 0x5080), chip_5080 },
- { PCI_VDEVICE(MARVELL, 0x5081), chip_508x },
- /* RocketRAID 1720/174x have different identifiers */
- { PCI_VDEVICE(TTI, 0x1720), chip_6042 },
- { PCI_VDEVICE(TTI, 0x1740), chip_6042 },
- { PCI_VDEVICE(TTI, 0x1742), chip_6042 },
-
- { PCI_VDEVICE(MARVELL, 0x6040), chip_604x },
- { PCI_VDEVICE(MARVELL, 0x6041), chip_604x },
- { PCI_VDEVICE(MARVELL, 0x6042), chip_6042 },
- { PCI_VDEVICE(MARVELL, 0x6080), chip_608x },
- { PCI_VDEVICE(MARVELL, 0x6081), chip_608x },
-
- { PCI_VDEVICE(ADAPTEC2, 0x0241), chip_604x },
-
- /* Adaptec 1430SA */
- { PCI_VDEVICE(ADAPTEC2, 0x0243), chip_7042 },
-
- /* Marvell 7042 support */
- { PCI_VDEVICE(MARVELL, 0x7042), chip_7042 },
-
- /* Highpoint RocketRAID PCIe series */
- { PCI_VDEVICE(TTI, 0x2300), chip_7042 },
- { PCI_VDEVICE(TTI, 0x2310), chip_7042 },
-
- { } /* terminate list */
-};
-
static const struct mv_hw_ops mv5xxx_ops = {
.phy_errata = mv5_phy_errata,
.enable_leds = mv5_enable_leds,
@@ -4303,6 +4272,36 @@ static int mv_pci_init_one(struct pci_dev *pdev,
static int mv_pci_device_resume(struct pci_dev *pdev);
#endif
+static const struct pci_device_id mv_pci_tbl[] = {
+ { PCI_VDEVICE(MARVELL, 0x5040), chip_504x },
+ { PCI_VDEVICE(MARVELL, 0x5041), chip_504x },
+ { PCI_VDEVICE(MARVELL, 0x5080), chip_5080 },
+ { PCI_VDEVICE(MARVELL, 0x5081), chip_508x },
+ /* RocketRAID 1720/174x have different identifiers */
+ { PCI_VDEVICE(TTI, 0x1720), chip_6042 },
+ { PCI_VDEVICE(TTI, 0x1740), chip_6042 },
+ { PCI_VDEVICE(TTI, 0x1742), chip_6042 },
+
+ { PCI_VDEVICE(MARVELL, 0x6040), chip_604x },
+ { PCI_VDEVICE(MARVELL, 0x6041), chip_604x },
+ { PCI_VDEVICE(MARVELL, 0x6042), chip_6042 },
+ { PCI_VDEVICE(MARVELL, 0x6080), chip_608x },
+ { PCI_VDEVICE(MARVELL, 0x6081), chip_608x },
+
+ { PCI_VDEVICE(ADAPTEC2, 0x0241), chip_604x },
+
+ /* Adaptec 1430SA */
+ { PCI_VDEVICE(ADAPTEC2, 0x0243), chip_7042 },
+
+ /* Marvell 7042 support */
+ { PCI_VDEVICE(MARVELL, 0x7042), chip_7042 },
+
+ /* Highpoint RocketRAID PCIe series */
+ { PCI_VDEVICE(TTI, 0x2300), chip_7042 },
+ { PCI_VDEVICE(TTI, 0x2310), chip_7042 },
+
+ { } /* terminate list */
+};
static struct pci_driver mv_pci_driver = {
.name = DRV_NAME,
@@ -4315,6 +4314,7 @@ static struct pci_driver mv_pci_driver = {
#endif
};
+MODULE_DEVICE_TABLE(pci, mv_pci_tbl);
/**
* mv_print_info - Dump key info to kernel log for perusal.
@@ -4487,7 +4487,6 @@ static void __exit mv_exit(void)
MODULE_AUTHOR("Brett Russ");
MODULE_DESCRIPTION("SCSI low-level driver for Marvell SATA controllers");
MODULE_LICENSE("GPL v2");
-MODULE_DEVICE_TABLE(pci, mv_pci_tbl);
MODULE_VERSION(DRV_VERSION);
MODULE_ALIAS("platform:" DRV_NAME);
diff --git a/drivers/ata/sata_sx4.c b/drivers/ata/sata_sx4.c
index b51d7a9d0d90ce..a482741eb181ff 100644
--- a/drivers/ata/sata_sx4.c
+++ b/drivers/ata/sata_sx4.c
@@ -957,8 +957,7 @@ static void pdc20621_get_from_dimm(struct ata_host *host, void *psource,
offset -= (idx * window_size);
idx++;
- dist = ((long) (window_size - (offset + size))) >= 0 ? size :
- (long) (window_size - offset);
+ dist = min(size, window_size - offset);
memcpy_fromio(psource, dimm_mmio + offset / 4, dist);
psource += dist;
@@ -1005,8 +1004,7 @@ static void pdc20621_put_to_dimm(struct ata_host *host, void *psource,
readl(mmio + PDC_DIMM_WINDOW_CTLR);
offset -= (idx * window_size);
idx++;
- dist = ((long)(s32)(window_size - (offset + size))) >= 0 ? size :
- (long) (window_size - offset);
+ dist = min(size, window_size - offset);
memcpy_toio(dimm_mmio + offset / 4, psource, dist);
writel(0x01, mmio + PDC_GENERAL_CTLR);
readl(mmio + PDC_GENERAL_CTLR);
diff --git a/drivers/base/core.c b/drivers/base/core.c
index b93f3c5716aeea..5f4e03336e68ef 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -44,6 +44,7 @@ static bool fw_devlink_is_permissive(void);
static void __fw_devlink_link_to_consumers(struct device *dev);
static bool fw_devlink_drv_reg_done;
static bool fw_devlink_best_effort;
+static struct workqueue_struct *device_link_wq;
/**
* __fwnode_link_add - Create a link between two fwnode_handles.
@@ -533,12 +534,26 @@ static void devlink_dev_release(struct device *dev)
/*
* It may take a while to complete this work because of the SRCU
* synchronization in device_link_release_fn() and if the consumer or
- * supplier devices get deleted when it runs, so put it into the "long"
- * workqueue.
+ * supplier devices get deleted when it runs, so put it into the
+ * dedicated workqueue.
*/
- queue_work(system_long_wq, &link->rm_work);
+ queue_work(device_link_wq, &link->rm_work);
}
+/**
+ * device_link_wait_removal - Wait for ongoing devlink removal jobs to terminate
+ */
+void device_link_wait_removal(void)
+{
+ /*
+ * devlink removal jobs are queued in the dedicated work queue.
+ * To be sure that all removal jobs are terminated, ensure that any
+ * scheduled work has run to completion.
+ */
+ flush_workqueue(device_link_wq);
+}
+EXPORT_SYMBOL_GPL(device_link_wait_removal);
+
static struct class devlink_class = {
.name = "devlink",
.dev_groups = devlink_groups,
@@ -4164,9 +4179,14 @@ int __init devices_init(void)
sysfs_dev_char_kobj = kobject_create_and_add("char", dev_kobj);
if (!sysfs_dev_char_kobj)
goto char_kobj_err;
+ device_link_wq = alloc_workqueue("device_link_wq", 0, 0);
+ if (!device_link_wq)
+ goto wq_err;
return 0;
+ wq_err:
+ kobject_put(sysfs_dev_char_kobj);
char_kobj_err:
kobject_put(sysfs_dev_block_kobj);
block_kobj_err:
diff --git a/drivers/base/regmap/regcache-maple.c b/drivers/base/regmap/regcache-maple.c
index 41edd6a430eb45..55999a50ccc0b8 100644
--- a/drivers/base/regmap/regcache-maple.c
+++ b/drivers/base/regmap/regcache-maple.c
@@ -112,7 +112,7 @@ static int regcache_maple_drop(struct regmap *map, unsigned int min,
unsigned long *entry, *lower, *upper;
unsigned long lower_index, lower_last;
unsigned long upper_index, upper_last;
- int ret;
+ int ret = 0;
lower = NULL;
upper = NULL;
@@ -145,7 +145,7 @@ static int regcache_maple_drop(struct regmap *map, unsigned int min,
upper_index = max + 1;
upper_last = mas.last;
- upper = kmemdup(&entry[max + 1],
+ upper = kmemdup(&entry[max - mas.index + 1],
((mas.last - max) *
sizeof(unsigned long)),
map->alloc_flags);
@@ -244,7 +244,7 @@ static int regcache_maple_sync(struct regmap *map, unsigned int min,
unsigned long lmin = min;
unsigned long lmax = max;
unsigned int r, v, sync_start;
- int ret;
+ int ret = 0;
bool sync_needed = false;
map->cache_bypass = true;
diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c
index 71c39bcd872c7e..ed33cf7192d216 100644
--- a/drivers/block/null_blk/main.c
+++ b/drivers/block/null_blk/main.c
@@ -1965,10 +1965,10 @@ static int null_add_dev(struct nullb_device *dev)
out_ida_free:
ida_free(&nullb_indexes, nullb->index);
-out_cleanup_zone:
- null_free_zoned_dev(dev);
out_cleanup_disk:
put_disk(nullb->disk);
+out_cleanup_zone:
+ null_free_zoned_dev(dev);
out_cleanup_tags:
if (nullb->tag_set == &nullb->__tag_set)
blk_mq_free_tag_set(nullb->tag_set);
diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c
index b40b32fa7f1c38..19cfc342fc7bbb 100644
--- a/drivers/bluetooth/btqca.c
+++ b/drivers/bluetooth/btqca.c
@@ -826,11 +826,15 @@ EXPORT_SYMBOL_GPL(qca_uart_setup);
int qca_set_bdaddr(struct hci_dev *hdev, const bdaddr_t *bdaddr)
{
+ bdaddr_t bdaddr_swapped;
struct sk_buff *skb;
int err;
- skb = __hci_cmd_sync_ev(hdev, EDL_WRITE_BD_ADDR_OPCODE, 6, bdaddr,
- HCI_EV_VENDOR, HCI_INIT_TIMEOUT);
+ baswap(&bdaddr_swapped, bdaddr);
+
+ skb = __hci_cmd_sync_ev(hdev, EDL_WRITE_BD_ADDR_OPCODE, 6,
+ &bdaddr_swapped, HCI_EV_VENDOR,
+ HCI_INIT_TIMEOUT);
if (IS_ERR(skb)) {
err = PTR_ERR(skb);
bt_dev_err(hdev, "QCA Change address cmd failed (%d)", err);
diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c
index 8a60ad7acd7052..ecbc52eaf10109 100644
--- a/drivers/bluetooth/hci_qca.c
+++ b/drivers/bluetooth/hci_qca.c
@@ -7,7 +7,6 @@
*
* Copyright (C) 2007 Texas Instruments, Inc.
* Copyright (c) 2010, 2012, 2018 The Linux Foundation. All rights reserved.
- * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved.
*
* Acknowledgements:
* This file is based on hci_ll.c, which was...
@@ -226,6 +225,7 @@ struct qca_serdev {
struct qca_power *bt_power;
u32 init_speed;
u32 oper_speed;
+ bool bdaddr_property_broken;
const char *firmware_name;
};
@@ -1843,6 +1843,7 @@ static int qca_setup(struct hci_uart *hu)
const char *firmware_name = qca_get_firmware_name(hu);
int ret;
struct qca_btsoc_version ver;
+ struct qca_serdev *qcadev;
const char *soc_name;
ret = qca_check_speeds(hu);
@@ -1904,16 +1905,11 @@ retry:
case QCA_WCN6750:
case QCA_WCN6855:
case QCA_WCN7850:
+ set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks);
- /* Set BDA quirk bit for reading BDA value from fwnode property
- * only if that property exist in DT.
- */
- if (fwnode_property_present(dev_fwnode(hdev->dev.parent), "local-bd-address")) {
- set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks);
- bt_dev_info(hdev, "setting quirk bit to read BDA from fwnode later");
- } else {
- bt_dev_dbg(hdev, "local-bd-address` is not present in the devicetree so not setting quirk bit for BDA");
- }
+ qcadev = serdev_device_get_drvdata(hu->serdev);
+ if (qcadev->bdaddr_property_broken)
+ set_bit(HCI_QUIRK_BDADDR_PROPERTY_BROKEN, &hdev->quirks);
hci_set_aosp_capable(hdev);
@@ -2295,6 +2291,9 @@ static int qca_serdev_probe(struct serdev_device *serdev)
if (!qcadev->oper_speed)
BT_DBG("UART will pick default operating speed");
+ qcadev->bdaddr_property_broken = device_property_read_bool(&serdev->dev,
+ "qcom,local-bd-address-broken");
+
if (data)
qcadev->btsoc_type = data->soc_type;
else
diff --git a/drivers/cache/sifive_ccache.c b/drivers/cache/sifive_ccache.c
index 89ed6cd6b059eb..e9cc8b4786fbfb 100644
--- a/drivers/cache/sifive_ccache.c
+++ b/drivers/cache/sifive_ccache.c
@@ -15,6 +15,8 @@
#include <linux/of_address.h>
#include <linux/device.h>
#include <linux/bitfield.h>
+#include <linux/platform_device.h>
+#include <linux/property.h>
#include <asm/cacheflush.h>
#include <asm/cacheinfo.h>
#include <asm/dma-noncoherent.h>
@@ -247,13 +249,49 @@ static irqreturn_t ccache_int_handler(int irq, void *device)
return IRQ_HANDLED;
}
+static int sifive_ccache_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ unsigned long quirks;
+ int intr_num, rc;
+
+ quirks = (unsigned long)device_get_match_data(dev);
+
+ intr_num = platform_irq_count(pdev);
+ if (!intr_num)
+ return dev_err_probe(dev, -ENODEV, "No interrupts property\n");
+
+ for (int i = 0; i < intr_num; i++) {
+ if (i == DATA_UNCORR && (quirks & QUIRK_BROKEN_DATA_UNCORR))
+ continue;
+
+ g_irq[i] = platform_get_irq(pdev, i);
+ if (g_irq[i] < 0)
+ return g_irq[i];
+
+ rc = devm_request_irq(dev, g_irq[i], ccache_int_handler, 0, "ccache_ecc", NULL);
+ if (rc)
+ return dev_err_probe(dev, rc, "Could not request IRQ %d\n", g_irq[i]);
+ }
+
+ return 0;
+}
+
+static struct platform_driver sifive_ccache_driver = {
+ .probe = sifive_ccache_probe,
+ .driver = {
+ .name = "sifive_ccache",
+ .of_match_table = sifive_ccache_ids,
+ },
+};
+
static int __init sifive_ccache_init(void)
{
struct device_node *np;
struct resource res;
- int i, rc, intr_num;
const struct of_device_id *match;
unsigned long quirks;
+ int rc;
np = of_find_matching_node_and_match(NULL, sifive_ccache_ids, &match);
if (!np)
@@ -277,28 +315,6 @@ static int __init sifive_ccache_init(void)
goto err_unmap;
}
- intr_num = of_property_count_u32_elems(np, "interrupts");
- if (!intr_num) {
- pr_err("No interrupts property\n");
- rc = -ENODEV;
- goto err_unmap;
- }
-
- for (i = 0; i < intr_num; i++) {
- g_irq[i] = irq_of_parse_and_map(np, i);
-
- if (i == DATA_UNCORR && (quirks & QUIRK_BROKEN_DATA_UNCORR))
- continue;
-
- rc = request_irq(g_irq[i], ccache_int_handler, 0, "ccache_ecc",
- NULL);
- if (rc) {
- pr_err("Could not request IRQ %d\n", g_irq[i]);
- goto err_free_irq;
- }
- }
- of_node_put(np);
-
#ifdef CONFIG_RISCV_NONSTANDARD_CACHE_OPS
if (quirks & QUIRK_NONSTANDARD_CACHE_OPS) {
riscv_cbom_block_size = SIFIVE_CCACHE_LINE_SIZE;
@@ -315,11 +331,15 @@ static int __init sifive_ccache_init(void)
#ifdef CONFIG_DEBUG_FS
setup_sifive_debug();
#endif
+
+ rc = platform_driver_register(&sifive_ccache_driver);
+ if (rc)
+ goto err_unmap;
+
+ of_node_put(np);
+
return 0;
-err_free_irq:
- while (--i >= 0)
- free_irq(g_irq[i], NULL);
err_unmap:
iounmap(ccache_base);
err_node_put:
diff --git a/drivers/char/random.c b/drivers/char/random.c
index 456be28ba67cb4..2597cb43f43871 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -702,7 +702,7 @@ static void extract_entropy(void *buf, size_t len)
static void __cold _credit_init_bits(size_t bits)
{
- static struct execute_work set_ready;
+ static DECLARE_WORK(set_ready, crng_set_ready);
unsigned int new, orig, add;
unsigned long flags;
@@ -718,8 +718,8 @@ static void __cold _credit_init_bits(size_t bits)
if (orig < POOL_READY_BITS && new >= POOL_READY_BITS) {
crng_reseed(NULL); /* Sets crng_init to CRNG_READY under base_crng.lock. */
- if (static_key_initialized)
- execute_in_process_context(crng_set_ready, &set_ready);
+ if (static_key_initialized && system_unbound_wq)
+ queue_work(system_unbound_wq, &set_ready);
atomic_notifier_call_chain(&random_ready_notifier, 0, NULL);
wake_up_interruptible(&crng_init_wait);
kill_fasync(&fasync, SIGIO, POLL_IN);
@@ -890,8 +890,8 @@ void __init random_init(void)
/*
* If we were initialized by the cpu or bootloader before jump labels
- * are initialized, then we should enable the static branch here, where
- * it's guaranteed that jump labels have been initialized.
+ * or workqueues are initialized, then we should enable the static
+ * branch here, where it's guaranteed that these have been initialized.
*/
if (!static_branch_likely(&crng_is_ready) && crng_init >= CRNG_READY)
crng_set_ready(NULL);
diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c
index f44efbb89c346a..2102377f727b1e 100644
--- a/drivers/crypto/ccp/sev-dev.c
+++ b/drivers/crypto/ccp/sev-dev.c
@@ -1090,7 +1090,7 @@ static int __sev_snp_init_locked(int *error)
void *arg = &data;
int cmd, rc = 0;
- if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+ if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
return -ENODEV;
sev = psp->sev_data;
diff --git a/drivers/cxl/Kconfig b/drivers/cxl/Kconfig
index 67998dbd1d46b4..5f3c9c5529b960 100644
--- a/drivers/cxl/Kconfig
+++ b/drivers/cxl/Kconfig
@@ -144,17 +144,4 @@ config CXL_REGION_INVALIDATION_TEST
If unsure, or if this kernel is meant for production environments,
say N.
-config CXL_PMU
- tristate "CXL Performance Monitoring Unit"
- default CXL_BUS
- depends on PERF_EVENTS
- help
- Support performance monitoring as defined in CXL rev 3.0
- section 13.2: Performance Monitoring. CXL components may have
- one or more CXL Performance Monitoring Units (CPMUs).
-
- Say 'y/m' to enable a driver that will attach to performance
- monitoring units and provide standard perf based interfaces.
-
- If unsure say 'm'.
endif
diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c
index af5cb818f84d6b..cb8c155a2c9b3d 100644
--- a/drivers/cxl/acpi.c
+++ b/drivers/cxl/acpi.c
@@ -525,22 +525,11 @@ static int get_genport_coordinates(struct device *dev, struct cxl_dport *dport)
{
struct acpi_device *hb = to_cxl_host_bridge(NULL, dev);
u32 uid;
- int rc;
if (kstrtou32(acpi_device_uid(hb), 0, &uid))
return -EINVAL;
- rc = acpi_get_genport_coordinates(uid, dport->hb_coord);
- if (rc < 0)
- return rc;
-
- /* Adjust back to picoseconds from nanoseconds */
- for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
- dport->hb_coord[i].read_latency *= 1000;
- dport->hb_coord[i].write_latency *= 1000;
- }
-
- return 0;
+ return acpi_get_genport_coordinates(uid, dport->coord);
}
static int add_host_bridge_dport(struct device *match, void *arg)
diff --git a/drivers/cxl/core/cdat.c b/drivers/cxl/core/cdat.c
index eddbbe21450ca9..bb83867d9fec98 100644
--- a/drivers/cxl/core/cdat.c
+++ b/drivers/cxl/core/cdat.c
@@ -14,12 +14,42 @@
struct dsmas_entry {
struct range dpa_range;
u8 handle;
- struct access_coordinate coord;
+ struct access_coordinate coord[ACCESS_COORDINATE_MAX];
int entries;
int qos_class;
};
+static u32 cdat_normalize(u16 entry, u64 base, u8 type)
+{
+ u32 value;
+
+ /*
+ * Check for invalid and overflow values
+ */
+ if (entry == 0xffff || !entry)
+ return 0;
+ else if (base > (UINT_MAX / (entry)))
+ return 0;
+
+ /*
+ * CDAT fields follow the format of HMAT fields. See table 5 Device
+ * Scoped Latency and Bandwidth Information Structure in Coherent Device
+ * Attribute Table (CDAT) Specification v1.01.
+ */
+ value = entry * base;
+ switch (type) {
+ case ACPI_HMAT_ACCESS_LATENCY:
+ case ACPI_HMAT_READ_LATENCY:
+ case ACPI_HMAT_WRITE_LATENCY:
+ value = DIV_ROUND_UP(value, 1000);
+ break;
+ default:
+ break;
+ }
+ return value;
+}
+
static int cdat_dsmas_handler(union acpi_subtable_headers *header, void *arg,
const unsigned long end)
{
@@ -58,8 +88,8 @@ static int cdat_dsmas_handler(union acpi_subtable_headers *header, void *arg,
return 0;
}
-static void cxl_access_coordinate_set(struct access_coordinate *coord,
- int access, unsigned int val)
+static void __cxl_access_coordinate_set(struct access_coordinate *coord,
+ int access, unsigned int val)
{
switch (access) {
case ACPI_HMAT_ACCESS_LATENCY:
@@ -85,6 +115,13 @@ static void cxl_access_coordinate_set(struct access_coordinate *coord,
}
}
+static void cxl_access_coordinate_set(struct access_coordinate *coord,
+ int access, unsigned int val)
+{
+ for (int i = 0; i < ACCESS_COORDINATE_MAX; i++)
+ __cxl_access_coordinate_set(&coord[i], access, val);
+}
+
static int cdat_dslbis_handler(union acpi_subtable_headers *header, void *arg,
const unsigned long end)
{
@@ -97,7 +134,6 @@ static int cdat_dslbis_handler(union acpi_subtable_headers *header, void *arg,
__le16 le_val;
u64 val;
u16 len;
- int rc;
len = le16_to_cpu((__force __le16)hdr->length);
if (len != size || (unsigned long)hdr + len > end) {
@@ -124,12 +160,10 @@ static int cdat_dslbis_handler(union acpi_subtable_headers *header, void *arg,
le_base = (__force __le64)dslbis->entry_base_unit;
le_val = (__force __le16)dslbis->entry[0];
- rc = check_mul_overflow(le64_to_cpu(le_base),
- le16_to_cpu(le_val), &val);
- if (rc)
- pr_warn("DSLBIS value overflowed.\n");
+ val = cdat_normalize(le16_to_cpu(le_val), le64_to_cpu(le_base),
+ dslbis->data_type);
- cxl_access_coordinate_set(&dent->coord, dslbis->data_type, val);
+ cxl_access_coordinate_set(dent->coord, dslbis->data_type, val);
return 0;
}
@@ -163,25 +197,18 @@ static int cxl_cdat_endpoint_process(struct cxl_port *port,
static int cxl_port_perf_data_calculate(struct cxl_port *port,
struct xarray *dsmas_xa)
{
- struct access_coordinate ep_c;
- struct access_coordinate coord[ACCESS_COORDINATE_MAX];
+ struct access_coordinate ep_c[ACCESS_COORDINATE_MAX];
struct dsmas_entry *dent;
int valid_entries = 0;
unsigned long index;
int rc;
- rc = cxl_endpoint_get_perf_coordinates(port, &ep_c);
+ rc = cxl_endpoint_get_perf_coordinates(port, ep_c);
if (rc) {
dev_dbg(&port->dev, "Failed to retrieve ep perf coordinates.\n");
return rc;
}
- rc = cxl_hb_get_perf_coordinates(port, coord);
- if (rc) {
- dev_dbg(&port->dev, "Failed to retrieve hb perf coordinates.\n");
- return rc;
- }
-
struct cxl_root *cxl_root __free(put_cxl_root) = find_cxl_root(port);
if (!cxl_root)
@@ -193,18 +220,10 @@ static int cxl_port_perf_data_calculate(struct cxl_port *port,
xa_for_each(dsmas_xa, index, dent) {
int qos_class;
- cxl_coordinates_combine(&dent->coord, &dent->coord, &ep_c);
- /*
- * Keeping the host bridge coordinates separate from the dsmas
- * coordinates in order to allow calculation of access class
- * 0 and 1 for region later.
- */
- cxl_coordinates_combine(&coord[ACCESS_COORDINATE_CPU],
- &coord[ACCESS_COORDINATE_CPU],
- &dent->coord);
+ cxl_coordinates_combine(dent->coord, dent->coord, ep_c);
dent->entries = 1;
rc = cxl_root->ops->qos_class(cxl_root,
- &coord[ACCESS_COORDINATE_CPU],
+ &dent->coord[ACCESS_COORDINATE_CPU],
1, &qos_class);
if (rc != 1)
continue;
@@ -222,14 +241,17 @@ static int cxl_port_perf_data_calculate(struct cxl_port *port,
static void update_perf_entry(struct device *dev, struct dsmas_entry *dent,
struct cxl_dpa_perf *dpa_perf)
{
+ for (int i = 0; i < ACCESS_COORDINATE_MAX; i++)
+ dpa_perf->coord[i] = dent->coord[i];
dpa_perf->dpa_range = dent->dpa_range;
- dpa_perf->coord = dent->coord;
dpa_perf->qos_class = dent->qos_class;
dev_dbg(dev,
"DSMAS: dpa: %#llx qos: %d read_bw: %d write_bw %d read_lat: %d write_lat: %d\n",
dent->dpa_range.start, dpa_perf->qos_class,
- dent->coord.read_bandwidth, dent->coord.write_bandwidth,
- dent->coord.read_latency, dent->coord.write_latency);
+ dent->coord[ACCESS_COORDINATE_CPU].read_bandwidth,
+ dent->coord[ACCESS_COORDINATE_CPU].write_bandwidth,
+ dent->coord[ACCESS_COORDINATE_CPU].read_latency,
+ dent->coord[ACCESS_COORDINATE_CPU].write_latency);
}
static void cxl_memdev_set_qos_class(struct cxl_dev_state *cxlds,
@@ -461,17 +483,16 @@ static int cdat_sslbis_handler(union acpi_subtable_headers *header, void *arg,
le_base = (__force __le64)tbl->sslbis_header.entry_base_unit;
le_val = (__force __le16)tbl->entries[i].latency_or_bandwidth;
-
- if (check_mul_overflow(le64_to_cpu(le_base),
- le16_to_cpu(le_val), &val))
- dev_warn(dev, "SSLBIS value overflowed!\n");
+ val = cdat_normalize(le16_to_cpu(le_val), le64_to_cpu(le_base),
+ sslbis->data_type);
xa_for_each(&port->dports, index, dport) {
if (dsp_id == ACPI_CDAT_SSLBIS_ANY_PORT ||
- dsp_id == dport->port_id)
- cxl_access_coordinate_set(&dport->sw_coord,
+ dsp_id == dport->port_id) {
+ cxl_access_coordinate_set(dport->coord,
sslbis->data_type,
val);
+ }
}
}
@@ -493,6 +514,21 @@ void cxl_switch_parse_cdat(struct cxl_port *port)
}
EXPORT_SYMBOL_NS_GPL(cxl_switch_parse_cdat, CXL);
+static void __cxl_coordinates_combine(struct access_coordinate *out,
+ struct access_coordinate *c1,
+ struct access_coordinate *c2)
+{
+ if (c1->write_bandwidth && c2->write_bandwidth)
+ out->write_bandwidth = min(c1->write_bandwidth,
+ c2->write_bandwidth);
+ out->write_latency = c1->write_latency + c2->write_latency;
+
+ if (c1->read_bandwidth && c2->read_bandwidth)
+ out->read_bandwidth = min(c1->read_bandwidth,
+ c2->read_bandwidth);
+ out->read_latency = c1->read_latency + c2->read_latency;
+}
+
/**
* cxl_coordinates_combine - Combine the two input coordinates
*
@@ -504,15 +540,8 @@ void cxl_coordinates_combine(struct access_coordinate *out,
struct access_coordinate *c1,
struct access_coordinate *c2)
{
- if (c1->write_bandwidth && c2->write_bandwidth)
- out->write_bandwidth = min(c1->write_bandwidth,
- c2->write_bandwidth);
- out->write_latency = c1->write_latency + c2->write_latency;
-
- if (c1->read_bandwidth && c2->read_bandwidth)
- out->read_bandwidth = min(c1->read_bandwidth,
- c2->read_bandwidth);
- out->read_latency = c1->read_latency + c2->read_latency;
+ for (int i = 0; i < ACCESS_COORDINATE_MAX; i++)
+ __cxl_coordinates_combine(&out[i], &c1[i], &c2[i]);
}
MODULE_IMPORT_NS(CXL);
@@ -521,17 +550,13 @@ void cxl_region_perf_data_calculate(struct cxl_region *cxlr,
struct cxl_endpoint_decoder *cxled)
{
struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
- struct cxl_port *port = cxlmd->endpoint;
struct cxl_dev_state *cxlds = cxlmd->cxlds;
struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
- struct access_coordinate hb_coord[ACCESS_COORDINATE_MAX];
- struct access_coordinate coord;
struct range dpa = {
.start = cxled->dpa_res->start,
.end = cxled->dpa_res->end,
};
struct cxl_dpa_perf *perf;
- int rc;
switch (cxlr->mode) {
case CXL_DECODER_RAM:
@@ -549,35 +574,16 @@ void cxl_region_perf_data_calculate(struct cxl_region *cxlr,
if (!range_contains(&perf->dpa_range, &dpa))
return;
- rc = cxl_hb_get_perf_coordinates(port, hb_coord);
- if (rc) {
- dev_dbg(&port->dev, "Failed to retrieve hb perf coordinates.\n");
- return;
- }
-
for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
- /* Pickup the host bridge coords */
- cxl_coordinates_combine(&coord, &hb_coord[i], &perf->coord);
-
/* Get total bandwidth and the worst latency for the cxl region */
cxlr->coord[i].read_latency = max_t(unsigned int,
cxlr->coord[i].read_latency,
- coord.read_latency);
+ perf->coord[i].read_latency);
cxlr->coord[i].write_latency = max_t(unsigned int,
cxlr->coord[i].write_latency,
- coord.write_latency);
- cxlr->coord[i].read_bandwidth += coord.read_bandwidth;
- cxlr->coord[i].write_bandwidth += coord.write_bandwidth;
-
- /*
- * Convert latency to nanosec from picosec to be consistent
- * with the resulting latency coordinates computed by the
- * HMAT_REPORTING code.
- */
- cxlr->coord[i].read_latency =
- DIV_ROUND_UP(cxlr->coord[i].read_latency, 1000);
- cxlr->coord[i].write_latency =
- DIV_ROUND_UP(cxlr->coord[i].write_latency, 1000);
+ perf->coord[i].write_latency);
+ cxlr->coord[i].read_bandwidth += perf->coord[i].read_bandwidth;
+ cxlr->coord[i].write_bandwidth += perf->coord[i].write_bandwidth;
}
}
diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
index 9adda4795eb786..f0f54aeccc872b 100644
--- a/drivers/cxl/core/mbox.c
+++ b/drivers/cxl/core/mbox.c
@@ -915,7 +915,7 @@ static int cxl_clear_event_record(struct cxl_memdev_state *mds,
payload->handles[i++] = gen->hdr.handle;
dev_dbg(mds->cxlds.dev, "Event log '%d': Clearing %u\n", log,
- le16_to_cpu(payload->handles[i]));
+ le16_to_cpu(payload->handles[i - 1]));
if (i == max_handles) {
payload->nr_recs = i;
@@ -958,13 +958,14 @@ static void cxl_mem_get_records_log(struct cxl_memdev_state *mds,
.payload_in = &log_type,
.size_in = sizeof(log_type),
.payload_out = payload,
- .size_out = mds->payload_size,
.min_out = struct_size(payload, records, 0),
};
do {
int rc, i;
+ mbox_cmd.size_out = mds->payload_size;
+
rc = cxl_internal_send_cmd(mds, &mbox_cmd);
if (rc) {
dev_err_ratelimited(dev,
diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
index 2b0cab556072f5..762783bb091afc 100644
--- a/drivers/cxl/core/port.c
+++ b/drivers/cxl/core/port.c
@@ -2133,36 +2133,44 @@ bool schedule_cxl_memdev_detach(struct cxl_memdev *cxlmd)
}
EXPORT_SYMBOL_NS_GPL(schedule_cxl_memdev_detach, CXL);
-/**
- * cxl_hb_get_perf_coordinates - Retrieve performance numbers between initiator
- * and host bridge
- *
- * @port: endpoint cxl_port
- * @coord: output access coordinates
- *
- * Return: errno on failure, 0 on success.
- */
-int cxl_hb_get_perf_coordinates(struct cxl_port *port,
- struct access_coordinate *coord)
+static void add_latency(struct access_coordinate *c, long latency)
{
- struct cxl_port *iter = port;
- struct cxl_dport *dport;
+ for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
+ c[i].write_latency += latency;
+ c[i].read_latency += latency;
+ }
+}
- if (!is_cxl_endpoint(port))
- return -EINVAL;
+static bool coordinates_valid(struct access_coordinate *c)
+{
+ for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
+ if (c[i].read_bandwidth && c[i].write_bandwidth &&
+ c[i].read_latency && c[i].write_latency)
+ continue;
+ return false;
+ }
- dport = iter->parent_dport;
- while (iter && !is_cxl_root(to_cxl_port(iter->dev.parent))) {
- iter = to_cxl_port(iter->dev.parent);
- dport = iter->parent_dport;
+ return true;
+}
+
+static void set_min_bandwidth(struct access_coordinate *c, unsigned int bw)
+{
+ for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
+ c[i].write_bandwidth = min(c[i].write_bandwidth, bw);
+ c[i].read_bandwidth = min(c[i].read_bandwidth, bw);
}
+}
- coord[ACCESS_COORDINATE_LOCAL] =
- dport->hb_coord[ACCESS_COORDINATE_LOCAL];
- coord[ACCESS_COORDINATE_CPU] =
- dport->hb_coord[ACCESS_COORDINATE_CPU];
+static void set_access_coordinates(struct access_coordinate *out,
+ struct access_coordinate *in)
+{
+ for (int i = 0; i < ACCESS_COORDINATE_MAX; i++)
+ out[i] = in[i];
+}
- return 0;
+static bool parent_port_is_cxl_root(struct cxl_port *port)
+{
+ return is_cxl_root(to_cxl_port(port->dev.parent));
}
/**
@@ -2176,35 +2184,53 @@ int cxl_hb_get_perf_coordinates(struct cxl_port *port,
int cxl_endpoint_get_perf_coordinates(struct cxl_port *port,
struct access_coordinate *coord)
{
- struct access_coordinate c = {
- .read_bandwidth = UINT_MAX,
- .write_bandwidth = UINT_MAX,
+ struct access_coordinate c[] = {
+ {
+ .read_bandwidth = UINT_MAX,
+ .write_bandwidth = UINT_MAX,
+ },
+ {
+ .read_bandwidth = UINT_MAX,
+ .write_bandwidth = UINT_MAX,
+ },
};
struct cxl_port *iter = port;
struct cxl_dport *dport;
struct pci_dev *pdev;
unsigned int bw;
+ bool is_cxl_root;
if (!is_cxl_endpoint(port))
return -EINVAL;
- dport = iter->parent_dport;
-
/*
- * Exit the loop when the parent port of the current port is cxl root.
- * The iterative loop starts at the endpoint and gathers the
- * latency of the CXL link from the current iter to the next downstream
- * port each iteration. If the parent is cxl root then there is
- * nothing to gather.
+ * Exit the loop when the parent port of the current iter port is cxl
+ * root. The iterative loop starts at the endpoint and gathers the
+ * latency of the CXL link from the current device/port to the connected
+ * downstream port each iteration.
*/
- while (iter && !is_cxl_root(to_cxl_port(iter->dev.parent))) {
- cxl_coordinates_combine(&c, &c, &dport->sw_coord);
- c.write_latency += dport->link_latency;
- c.read_latency += dport->link_latency;
-
- iter = to_cxl_port(iter->dev.parent);
+ do {
dport = iter->parent_dport;
- }
+ iter = to_cxl_port(iter->dev.parent);
+ is_cxl_root = parent_port_is_cxl_root(iter);
+
+ /*
+ * There's no valid access_coordinate for a root port since RPs do not
+ * have CDAT and therefore needs to be skipped.
+ */
+ if (!is_cxl_root) {
+ if (!coordinates_valid(dport->coord))
+ return -EINVAL;
+ cxl_coordinates_combine(c, c, dport->coord);
+ }
+ add_latency(c, dport->link_latency);
+ } while (!is_cxl_root);
+
+ dport = iter->parent_dport;
+ /* Retrieve HB coords */
+ if (!coordinates_valid(dport->coord))
+ return -EINVAL;
+ cxl_coordinates_combine(c, c, dport->coord);
/* Get the calculated PCI paths bandwidth */
pdev = to_pci_dev(port->uport_dev->parent);
@@ -2213,10 +2239,8 @@ int cxl_endpoint_get_perf_coordinates(struct cxl_port *port,
return -ENXIO;
bw /= BITS_PER_BYTE;
- c.write_bandwidth = min(c.write_bandwidth, bw);
- c.read_bandwidth = min(c.read_bandwidth, bw);
-
- *coord = c;
+ set_min_bandwidth(c, bw);
+ set_access_coordinates(coord, c);
return 0;
}
diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c
index 372786f809555f..3c42f984eeafaa 100644
--- a/drivers/cxl/core/regs.c
+++ b/drivers/cxl/core/regs.c
@@ -271,6 +271,7 @@ EXPORT_SYMBOL_NS_GPL(cxl_map_device_regs, CXL);
static bool cxl_decode_regblock(struct pci_dev *pdev, u32 reg_lo, u32 reg_hi,
struct cxl_register_map *map)
{
+ u8 reg_type = FIELD_GET(CXL_DVSEC_REG_LOCATOR_BLOCK_ID_MASK, reg_lo);
int bar = FIELD_GET(CXL_DVSEC_REG_LOCATOR_BIR_MASK, reg_lo);
u64 offset = ((u64)reg_hi << 32) |
(reg_lo & CXL_DVSEC_REG_LOCATOR_BLOCK_OFF_LOW_MASK);
@@ -278,11 +279,11 @@ static bool cxl_decode_regblock(struct pci_dev *pdev, u32 reg_lo, u32 reg_hi,
if (offset > pci_resource_len(pdev, bar)) {
dev_warn(&pdev->dev,
"BAR%d: %pr: too small (offset: %pa, type: %d)\n", bar,
- &pdev->resource[bar], &offset, map->reg_type);
+ &pdev->resource[bar], &offset, reg_type);
return false;
}
- map->reg_type = FIELD_GET(CXL_DVSEC_REG_LOCATOR_BLOCK_ID_MASK, reg_lo);
+ map->reg_type = reg_type;
map->resource = pci_resource_start(pdev, bar) + offset;
map->max_size = pci_resource_len(pdev, bar) - offset;
return true;
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index 534e25e2f0a481..036d17db68e006 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -663,8 +663,7 @@ struct cxl_rcrb_info {
* @rch: Indicate whether this dport was enumerated in RCH or VH mode
* @port: reference to cxl_port that contains this downstream port
* @regs: Dport parsed register blocks
- * @sw_coord: access coordinates (performance) for switch from CDAT
- * @hb_coord: access coordinates (performance) from ACPI generic port (host bridge)
+ * @coord: access coordinates (bandwidth and latency performance attributes)
* @link_latency: calculated PCIe downstream latency
*/
struct cxl_dport {
@@ -675,8 +674,7 @@ struct cxl_dport {
bool rch;
struct cxl_port *port;
struct cxl_regs regs;
- struct access_coordinate sw_coord;
- struct access_coordinate hb_coord[ACCESS_COORDINATE_MAX];
+ struct access_coordinate coord[ACCESS_COORDINATE_MAX];
long link_latency;
};
@@ -884,8 +882,6 @@ void cxl_switch_parse_cdat(struct cxl_port *port);
int cxl_endpoint_get_perf_coordinates(struct cxl_port *port,
struct access_coordinate *coord);
-int cxl_hb_get_perf_coordinates(struct cxl_port *port,
- struct access_coordinate *coord);
void cxl_region_perf_data_calculate(struct cxl_region *cxlr,
struct cxl_endpoint_decoder *cxled);
diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
index 20fb3b35e89e04..36cee9c30cebd2 100644
--- a/drivers/cxl/cxlmem.h
+++ b/drivers/cxl/cxlmem.h
@@ -401,7 +401,7 @@ enum cxl_devtype {
*/
struct cxl_dpa_perf {
struct range dpa_range;
- struct access_coordinate coord;
+ struct access_coordinate coord[ACCESS_COORDINATE_MAX];
int qos_class;
};
diff --git a/drivers/dma-buf/st-dma-fence-chain.c b/drivers/dma-buf/st-dma-fence-chain.c
index 9c2a0c082a768f..ed4b323886e430 100644
--- a/drivers/dma-buf/st-dma-fence-chain.c
+++ b/drivers/dma-buf/st-dma-fence-chain.c
@@ -84,11 +84,11 @@ static int sanitycheck(void *arg)
return -ENOMEM;
chain = mock_chain(NULL, f, 1);
- if (!chain)
+ if (chain)
+ dma_fence_enable_sw_signaling(chain);
+ else
err = -ENOMEM;
- dma_fence_enable_sw_signaling(chain);
-
dma_fence_signal(f);
dma_fence_put(f);
diff --git a/drivers/dpll/Kconfig b/drivers/dpll/Kconfig
index a4cae73f20d3d0..20607ed5424358 100644
--- a/drivers/dpll/Kconfig
+++ b/drivers/dpll/Kconfig
@@ -4,4 +4,4 @@
#
config DPLL
- bool
+ bool
diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c
index 7bc71f4be64a07..38d19410a2be68 100644
--- a/drivers/firewire/ohci.c
+++ b/drivers/firewire/ohci.c
@@ -2060,6 +2060,8 @@ static void bus_reset_work(struct work_struct *work)
ohci->generation = generation;
reg_write(ohci, OHCI1394_IntEventClear, OHCI1394_busReset);
+ if (param_debug & OHCI_PARAM_DEBUG_BUSRESETS)
+ reg_write(ohci, OHCI1394_IntMaskSet, OHCI1394_busReset);
if (ohci->quirks & QUIRK_RESET_PACKET)
ohci->request_generation = generation;
@@ -2125,12 +2127,14 @@ static irqreturn_t irq_handler(int irq, void *data)
return IRQ_NONE;
/*
- * busReset and postedWriteErr must not be cleared yet
+ * busReset and postedWriteErr events must not be cleared yet
* (OHCI 1.1 clauses 7.2.3.2 and 13.2.8.1)
*/
reg_write(ohci, OHCI1394_IntEventClear,
event & ~(OHCI1394_busReset | OHCI1394_postedWriteErr));
log_irqs(ohci, event);
+ if (event & OHCI1394_busReset)
+ reg_write(ohci, OHCI1394_IntMaskClear, OHCI1394_busReset);
if (event & OHCI1394_selfIDComplete)
queue_work(selfid_workqueue, &ohci->bus_reset_work);
diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c
index f2556a8e940156..9bc2e10381afd9 100644
--- a/drivers/firmware/arm_ffa/driver.c
+++ b/drivers/firmware/arm_ffa/driver.c
@@ -790,7 +790,7 @@ static void ffa_notification_info_get(void)
part_id = packed_id_list[ids_processed++];
- if (!ids_count[list]) { /* Global Notification */
+ if (ids_count[list] == 1) { /* Global Notification */
__do_sched_recv_cb(part_id, 0, false);
continue;
}
diff --git a/drivers/firmware/arm_scmi/powercap.c b/drivers/firmware/arm_scmi/powercap.c
index ea9201e7044cbd..1fa79bba492e88 100644
--- a/drivers/firmware/arm_scmi/powercap.c
+++ b/drivers/firmware/arm_scmi/powercap.c
@@ -736,7 +736,7 @@ static void scmi_powercap_domain_init_fc(const struct scmi_protocol_handle *ph,
ph->hops->fastchannel_init(ph, POWERCAP_DESCRIBE_FASTCHANNEL,
POWERCAP_PAI_GET, 4, domain,
&fc[POWERCAP_FC_PAI].get_addr, NULL,
- &fc[POWERCAP_PAI_GET].rate_limit);
+ &fc[POWERCAP_FC_PAI].rate_limit);
*p_fc = fc;
}
diff --git a/drivers/firmware/arm_scmi/raw_mode.c b/drivers/firmware/arm_scmi/raw_mode.c
index 35057351850335..130d13e9cd6beb 100644
--- a/drivers/firmware/arm_scmi/raw_mode.c
+++ b/drivers/firmware/arm_scmi/raw_mode.c
@@ -921,7 +921,7 @@ static int scmi_dbg_raw_mode_open(struct inode *inode, struct file *filp)
rd->raw = raw;
filp->private_data = rd;
- return 0;
+ return nonseekable_open(inode, filp);
}
static int scmi_dbg_raw_mode_release(struct inode *inode, struct file *filp)
@@ -950,6 +950,7 @@ static const struct file_operations scmi_dbg_raw_mode_reset_fops = {
.open = scmi_dbg_raw_mode_open,
.release = scmi_dbg_raw_mode_release,
.write = scmi_dbg_raw_mode_reset_write,
+ .llseek = no_llseek,
.owner = THIS_MODULE,
};
@@ -959,6 +960,7 @@ static const struct file_operations scmi_dbg_raw_mode_message_fops = {
.read = scmi_dbg_raw_mode_message_read,
.write = scmi_dbg_raw_mode_message_write,
.poll = scmi_dbg_raw_mode_message_poll,
+ .llseek = no_llseek,
.owner = THIS_MODULE,
};
@@ -975,6 +977,7 @@ static const struct file_operations scmi_dbg_raw_mode_message_async_fops = {
.read = scmi_dbg_raw_mode_message_read,
.write = scmi_dbg_raw_mode_message_async_write,
.poll = scmi_dbg_raw_mode_message_poll,
+ .llseek = no_llseek,
.owner = THIS_MODULE,
};
@@ -998,6 +1001,7 @@ static const struct file_operations scmi_dbg_raw_mode_notification_fops = {
.release = scmi_dbg_raw_mode_release,
.read = scmi_test_dbg_raw_mode_notif_read,
.poll = scmi_test_dbg_raw_mode_notif_poll,
+ .llseek = no_llseek,
.owner = THIS_MODULE,
};
@@ -1021,6 +1025,7 @@ static const struct file_operations scmi_dbg_raw_mode_errors_fops = {
.release = scmi_dbg_raw_mode_release,
.read = scmi_test_dbg_raw_mode_errors_read,
.poll = scmi_test_dbg_raw_mode_errors_poll,
+ .llseek = no_llseek,
.owner = THIS_MODULE,
};
diff --git a/drivers/firmware/efi/libstub/randomalloc.c b/drivers/firmware/efi/libstub/randomalloc.c
index 7e185285955021..c41e7b2091cdd1 100644
--- a/drivers/firmware/efi/libstub/randomalloc.c
+++ b/drivers/firmware/efi/libstub/randomalloc.c
@@ -120,7 +120,7 @@ efi_status_t efi_random_alloc(unsigned long size,
continue;
}
- target = round_up(max(md->phys_addr, alloc_min), align) + target_slot * align;
+ target = round_up(max_t(u64, md->phys_addr, alloc_min), align) + target_slot * align;
pages = size / EFI_PAGE_SIZE;
status = efi_bs_call(allocate_pages, EFI_ALLOCATE_ADDRESS,
diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c
index 6a6ffc6707bd0e..d5a8182cf2e1cc 100644
--- a/drivers/firmware/efi/libstub/x86-stub.c
+++ b/drivers/firmware/efi/libstub/x86-stub.c
@@ -496,6 +496,7 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle,
hdr->vid_mode = 0xffff;
hdr->type_of_loader = 0x21;
+ hdr->initrd_addr_max = INT_MAX;
/* Convert unicode cmdline to ascii */
cmdline_ptr = efi_convert_cmdline(image, &options_size);
diff --git a/drivers/gpio/gpio-crystalcove.c b/drivers/gpio/gpio-crystalcove.c
index 1ee62cd58582b6..25db014494a4de 100644
--- a/drivers/gpio/gpio-crystalcove.c
+++ b/drivers/gpio/gpio-crystalcove.c
@@ -92,7 +92,7 @@ static inline int to_reg(int gpio, enum ctrl_register reg_type)
case 0x5e:
return GPIOPANELCTL;
default:
- return -EOPNOTSUPP;
+ return -ENOTSUPP;
}
}
diff --git a/drivers/gpio/gpio-lpc32xx.c b/drivers/gpio/gpio-lpc32xx.c
index 5ef8af8249806a..c097e310c9e841 100644
--- a/drivers/gpio/gpio-lpc32xx.c
+++ b/drivers/gpio/gpio-lpc32xx.c
@@ -529,6 +529,7 @@ static const struct of_device_id lpc32xx_gpio_of_match[] = {
{ .compatible = "nxp,lpc3220-gpio", },
{ },
};
+MODULE_DEVICE_TABLE(of, lpc32xx_gpio_of_match);
static struct platform_driver lpc32xx_gpio_driver = {
.driver = {
diff --git a/drivers/gpio/gpio-wcove.c b/drivers/gpio/gpio-wcove.c
index c18b6b47384f1b..94ca9d03c09494 100644
--- a/drivers/gpio/gpio-wcove.c
+++ b/drivers/gpio/gpio-wcove.c
@@ -104,7 +104,7 @@ static inline int to_reg(int gpio, enum ctrl_register type)
unsigned int reg = type == CTRL_IN ? GPIO_IN_CTRL_BASE : GPIO_OUT_CTRL_BASE;
if (gpio >= WCOVE_GPIO_NUM)
- return -EOPNOTSUPP;
+ return -ENOTSUPP;
return reg + gpio;
}
diff --git a/drivers/gpio/gpiolib-cdev.c b/drivers/gpio/gpiolib-cdev.c
index f384fa2787648e..d09c7d72836551 100644
--- a/drivers/gpio/gpiolib-cdev.c
+++ b/drivers/gpio/gpiolib-cdev.c
@@ -728,6 +728,25 @@ static u32 line_event_id(int level)
GPIO_V2_LINE_EVENT_FALLING_EDGE;
}
+static inline char *make_irq_label(const char *orig)
+{
+ char *new;
+
+ if (!orig)
+ return NULL;
+
+ new = kstrdup_and_replace(orig, '/', ':', GFP_KERNEL);
+ if (!new)
+ return ERR_PTR(-ENOMEM);
+
+ return new;
+}
+
+static inline void free_irq_label(const char *label)
+{
+ kfree(label);
+}
+
#ifdef CONFIG_HTE
static enum hte_return process_hw_ts_thread(void *p)
@@ -1015,6 +1034,7 @@ static int debounce_setup(struct line *line, unsigned int debounce_period_us)
{
unsigned long irqflags;
int ret, level, irq;
+ char *label;
/* try hardware */
ret = gpiod_set_debounce(line->desc, debounce_period_us);
@@ -1037,11 +1057,17 @@ static int debounce_setup(struct line *line, unsigned int debounce_period_us)
if (irq < 0)
return -ENXIO;
+ label = make_irq_label(line->req->label);
+ if (IS_ERR(label))
+ return -ENOMEM;
+
irqflags = IRQF_TRIGGER_FALLING | IRQF_TRIGGER_RISING;
ret = request_irq(irq, debounce_irq_handler, irqflags,
- line->req->label, line);
- if (ret)
+ label, line);
+ if (ret) {
+ free_irq_label(label);
return ret;
+ }
line->irq = irq;
} else {
ret = hte_edge_setup(line, GPIO_V2_LINE_FLAG_EDGE_BOTH);
@@ -1086,7 +1112,7 @@ static u32 gpio_v2_line_config_debounce_period(struct gpio_v2_line_config *lc,
static void edge_detector_stop(struct line *line)
{
if (line->irq) {
- free_irq(line->irq, line);
+ free_irq_label(free_irq(line->irq, line));
line->irq = 0;
}
@@ -1110,6 +1136,7 @@ static int edge_detector_setup(struct line *line,
unsigned long irqflags = 0;
u64 eflags;
int irq, ret;
+ char *label;
eflags = edflags & GPIO_V2_LINE_EDGE_FLAGS;
if (eflags && !kfifo_initialized(&line->req->events)) {
@@ -1146,11 +1173,17 @@ static int edge_detector_setup(struct line *line,
IRQF_TRIGGER_RISING : IRQF_TRIGGER_FALLING;
irqflags |= IRQF_ONESHOT;
+ label = make_irq_label(line->req->label);
+ if (IS_ERR(label))
+ return PTR_ERR(label);
+
/* Request a thread to read the events */
ret = request_threaded_irq(irq, edge_irq_handler, edge_irq_thread,
- irqflags, line->req->label, line);
- if (ret)
+ irqflags, label, line);
+ if (ret) {
+ free_irq_label(label);
return ret;
+ }
line->irq = irq;
return 0;
@@ -1973,7 +2006,7 @@ static void lineevent_free(struct lineevent_state *le)
blocking_notifier_chain_unregister(&le->gdev->device_notifier,
&le->device_unregistered_nb);
if (le->irq)
- free_irq(le->irq, le);
+ free_irq_label(free_irq(le->irq, le));
if (le->desc)
gpiod_free(le->desc);
kfree(le->label);
@@ -2114,6 +2147,7 @@ static int lineevent_create(struct gpio_device *gdev, void __user *ip)
int fd;
int ret;
int irq, irqflags = 0;
+ char *label;
if (copy_from_user(&eventreq, ip, sizeof(eventreq)))
return -EFAULT;
@@ -2198,15 +2232,23 @@ static int lineevent_create(struct gpio_device *gdev, void __user *ip)
if (ret)
goto out_free_le;
+ label = make_irq_label(le->label);
+ if (IS_ERR(label)) {
+ ret = PTR_ERR(label);
+ goto out_free_le;
+ }
+
/* Request a thread to read the events */
ret = request_threaded_irq(irq,
lineevent_irq_handler,
lineevent_irq_thread,
irqflags,
- le->label,
+ label,
le);
- if (ret)
+ if (ret) {
+ free_irq_label(label);
goto out_free_le;
+ }
le->irq = irq;
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index ce94e37bcbee79..94903fc1c1459f 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -1175,6 +1175,9 @@ struct gpio_device *gpio_device_find(const void *data,
list_for_each_entry_srcu(gdev, &gpio_devices, list,
srcu_read_lock_held(&gpio_devices_srcu)) {
+ if (!device_is_registered(&gdev->dev))
+ continue;
+
guard(srcu)(&gdev->srcu);
gc = srcu_dereference(gdev->chip, &gdev->srcu);
@@ -2397,6 +2400,11 @@ char *gpiochip_dup_line_label(struct gpio_chip *gc, unsigned int offset)
}
EXPORT_SYMBOL_GPL(gpiochip_dup_line_label);
+static inline const char *function_name_or_default(const char *con_id)
+{
+ return con_id ?: "(default)";
+}
+
/**
* gpiochip_request_own_desc - Allow GPIO chip to request its own descriptor
* @gc: GPIO chip
@@ -2425,10 +2433,11 @@ struct gpio_desc *gpiochip_request_own_desc(struct gpio_chip *gc,
enum gpiod_flags dflags)
{
struct gpio_desc *desc = gpiochip_get_desc(gc, hwnum);
+ const char *name = function_name_or_default(label);
int ret;
if (IS_ERR(desc)) {
- chip_err(gc, "failed to get GPIO descriptor\n");
+ chip_err(gc, "failed to get GPIO %s descriptor\n", name);
return desc;
}
@@ -2438,8 +2447,8 @@ struct gpio_desc *gpiochip_request_own_desc(struct gpio_chip *gc,
ret = gpiod_configure_flags(desc, label, lflags, dflags);
if (ret) {
- chip_err(gc, "setup of own GPIO %s failed\n", label);
gpiod_free_commit(desc);
+ chip_err(gc, "setup of own GPIO %s failed\n", name);
return ERR_PTR(ret);
}
@@ -4153,19 +4162,17 @@ static struct gpio_desc *gpiod_find_by_fwnode(struct fwnode_handle *fwnode,
enum gpiod_flags *flags,
unsigned long *lookupflags)
{
+ const char *name = function_name_or_default(con_id);
struct gpio_desc *desc = ERR_PTR(-ENOENT);
if (is_of_node(fwnode)) {
- dev_dbg(consumer, "using DT '%pfw' for '%s' GPIO lookup\n",
- fwnode, con_id);
+ dev_dbg(consumer, "using DT '%pfw' for '%s' GPIO lookup\n", fwnode, name);
desc = of_find_gpio(to_of_node(fwnode), con_id, idx, lookupflags);
} else if (is_acpi_node(fwnode)) {
- dev_dbg(consumer, "using ACPI '%pfw' for '%s' GPIO lookup\n",
- fwnode, con_id);
+ dev_dbg(consumer, "using ACPI '%pfw' for '%s' GPIO lookup\n", fwnode, name);
desc = acpi_find_gpio(fwnode, con_id, idx, flags, lookupflags);
} else if (is_software_node(fwnode)) {
- dev_dbg(consumer, "using swnode '%pfw' for '%s' GPIO lookup\n",
- fwnode, con_id);
+ dev_dbg(consumer, "using swnode '%pfw' for '%s' GPIO lookup\n", fwnode, name);
desc = swnode_find_gpio(fwnode, con_id, idx, lookupflags);
}
@@ -4181,6 +4188,7 @@ struct gpio_desc *gpiod_find_and_request(struct device *consumer,
bool platform_lookup_allowed)
{
unsigned long lookupflags = GPIO_LOOKUP_FLAGS_DEFAULT;
+ const char *name = function_name_or_default(con_id);
/*
* scoped_guard() is implemented as a for loop, meaning static
* analyzers will complain about these two not being initialized.
@@ -4203,8 +4211,7 @@ struct gpio_desc *gpiod_find_and_request(struct device *consumer,
}
if (IS_ERR(desc)) {
- dev_dbg(consumer, "No GPIO consumer %s found\n",
- con_id);
+ dev_dbg(consumer, "No GPIO consumer %s found\n", name);
return desc;
}
@@ -4226,15 +4233,14 @@ struct gpio_desc *gpiod_find_and_request(struct device *consumer,
*
* FIXME: Make this more sane and safe.
*/
- dev_info(consumer,
- "nonexclusive access to GPIO for %s\n", con_id);
+ dev_info(consumer, "nonexclusive access to GPIO for %s\n", name);
return desc;
}
ret = gpiod_configure_flags(desc, con_id, lookupflags, flags);
if (ret < 0) {
- dev_dbg(consumer, "setup of GPIO %s failed\n", con_id);
gpiod_put(desc);
+ dev_dbg(consumer, "setup of GPIO %s failed\n", name);
return ERR_PTR(ret);
}
@@ -4350,6 +4356,7 @@ EXPORT_SYMBOL_GPL(gpiod_get_optional);
int gpiod_configure_flags(struct gpio_desc *desc, const char *con_id,
unsigned long lflags, enum gpiod_flags dflags)
{
+ const char *name = function_name_or_default(con_id);
int ret;
if (lflags & GPIO_ACTIVE_LOW)
@@ -4393,7 +4400,7 @@ int gpiod_configure_flags(struct gpio_desc *desc, const char *con_id,
/* No particular flag request, return here... */
if (!(dflags & GPIOD_FLAGS_BIT_DIR_SET)) {
- gpiod_dbg(desc, "no flags found for %s\n", con_id);
+ gpiod_dbg(desc, "no flags found for GPIO %s\n", name);
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 9c62552bec344e..b3b84647207ed4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -210,6 +210,7 @@ extern int amdgpu_async_gfx_ring;
extern int amdgpu_mcbp;
extern int amdgpu_discovery;
extern int amdgpu_mes;
+extern int amdgpu_mes_log_enable;
extern int amdgpu_mes_kiq;
extern int amdgpu_noretry;
extern int amdgpu_force_asic_type;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 0a4b09709cfb14..ec888fc6ead8df 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -819,7 +819,7 @@ retry:
p->bytes_moved += ctx.bytes_moved;
if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
- amdgpu_bo_in_cpu_visible_vram(bo))
+ amdgpu_res_cpu_visible(adev, bo->tbo.resource))
p->bytes_moved_vis += ctx.bytes_moved;
if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 5dc24c971b41f0..7753a2e64d4114 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4135,18 +4135,22 @@ int amdgpu_device_init(struct amdgpu_device *adev,
adev->ip_blocks[i].status.hw = true;
}
}
+ } else if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 10) &&
+ !amdgpu_device_has_display_hardware(adev)) {
+ r = psp_gpu_reset(adev);
} else {
- tmp = amdgpu_reset_method;
- /* It should do a default reset when loading or reloading the driver,
- * regardless of the module parameter reset_method.
- */
- amdgpu_reset_method = AMD_RESET_METHOD_NONE;
- r = amdgpu_asic_reset(adev);
- amdgpu_reset_method = tmp;
- if (r) {
- dev_err(adev->dev, "asic reset on init failed\n");
- goto failed;
- }
+ tmp = amdgpu_reset_method;
+ /* It should do a default reset when loading or reloading the driver,
+ * regardless of the module parameter reset_method.
+ */
+ amdgpu_reset_method = AMD_RESET_METHOD_NONE;
+ r = amdgpu_asic_reset(adev);
+ amdgpu_reset_method = tmp;
+ }
+
+ if (r) {
+ dev_err(adev->dev, "asic reset on init failed\n");
+ goto failed;
}
}
@@ -4539,6 +4543,8 @@ int amdgpu_device_prepare(struct drm_device *dev)
if (r)
goto unprepare;
+ flush_delayed_work(&adev->gfx.gfx_off_delay_work);
+
for (i = 0; i < adev->num_ip_blocks; i++) {
if (!adev->ip_blocks[i].status.valid)
continue;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index a07e4b87d4cae0..ac5bf01fe8d2a9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -1896,6 +1896,7 @@ static int amdgpu_discovery_set_smu_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &smu_v13_0_ip_block);
break;
case IP_VERSION(14, 0, 0):
+ case IP_VERSION(14, 0, 1):
amdgpu_device_ip_block_add(adev, &smu_v14_0_ip_block);
break;
default:
@@ -2237,6 +2238,7 @@ static int amdgpu_discovery_set_umsch_mm_ip_blocks(struct amdgpu_device *adev)
{
switch (amdgpu_ip_version(adev, VCN_HWIP, 0)) {
case IP_VERSION(4, 0, 5):
+ case IP_VERSION(4, 0, 6):
if (amdgpu_umsch_mm & 0x1) {
amdgpu_device_ip_block_add(adev, &umsch_mm_v4_0_ip_block);
adev->enable_umsch_mm = true;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 80b9642f2bc4f2..e4277298cf1aad 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -195,6 +195,7 @@ int amdgpu_async_gfx_ring = 1;
int amdgpu_mcbp = -1;
int amdgpu_discovery = -1;
int amdgpu_mes;
+int amdgpu_mes_log_enable = 0;
int amdgpu_mes_kiq;
int amdgpu_noretry = -1;
int amdgpu_force_asic_type = -1;
@@ -668,6 +669,15 @@ MODULE_PARM_DESC(mes,
module_param_named(mes, amdgpu_mes, int, 0444);
/**
+ * DOC: mes_log_enable (int)
+ * Enable Micro Engine Scheduler log. This is used to enable/disable MES internal log.
+ * (0 = disabled (default), 1 = enabled)
+ */
+MODULE_PARM_DESC(mes_log_enable,
+ "Enable Micro Engine Scheduler log (0 = disabled (default), 1 = enabled)");
+module_param_named(mes_log_enable, amdgpu_mes_log_enable, int, 0444);
+
+/**
* DOC: mes_kiq (int)
* Enable Micro Engine Scheduler KIQ. This is a new engine pipe for kiq.
* (0 = disabled (default), 1 = enabled)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 4b3000c21ef2c5..e4742b65032d1d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -304,12 +304,15 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job)
dma_fence_set_error(finished, -ECANCELED);
if (finished->error < 0) {
- DRM_INFO("Skip scheduling IBs!\n");
+ dev_dbg(adev->dev, "Skip scheduling IBs in ring(%s)",
+ ring->name);
} else {
r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, job,
&fence);
if (r)
- DRM_ERROR("Error scheduling IBs (%d)\n", r);
+ dev_err(adev->dev,
+ "Error scheduling IBs (%d) in ring(%s)", r,
+ ring->name);
}
job->job_run_counter++;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index a98e03e0a51f1f..a00cf4756ad0e2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -102,7 +102,10 @@ static int amdgpu_mes_event_log_init(struct amdgpu_device *adev)
{
int r;
- r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
+ if (!amdgpu_mes_log_enable)
+ return 0;
+
+ r = amdgpu_bo_create_kernel(adev, AMDGPU_MES_LOG_BUFFER_SIZE, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_GTT,
&adev->mes.event_log_gpu_obj,
&adev->mes.event_log_gpu_addr,
@@ -1549,12 +1552,11 @@ static int amdgpu_debugfs_mes_event_log_show(struct seq_file *m, void *unused)
uint32_t *mem = (uint32_t *)(adev->mes.event_log_cpu_addr);
seq_hex_dump(m, "", DUMP_PREFIX_OFFSET, 32, 4,
- mem, PAGE_SIZE, false);
+ mem, AMDGPU_MES_LOG_BUFFER_SIZE, false);
return 0;
}
-
DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_mes_event_log);
#endif
@@ -1565,7 +1567,7 @@ void amdgpu_debugfs_mes_event_log_init(struct amdgpu_device *adev)
#if defined(CONFIG_DEBUG_FS)
struct drm_minor *minor = adev_to_drm(adev)->primary;
struct dentry *root = minor->debugfs_root;
- if (adev->enable_mes)
+ if (adev->enable_mes && amdgpu_mes_log_enable)
debugfs_create_file("amdgpu_mes_event_log", 0444, root,
adev, &amdgpu_debugfs_mes_event_log_fops);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
index 7d4f93fea937ae..4c8fc3117ef894 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -52,6 +52,7 @@ enum amdgpu_mes_priority_level {
#define AMDGPU_MES_PROC_CTX_SIZE 0x1000 /* one page area */
#define AMDGPU_MES_GANG_CTX_SIZE 0x1000 /* one page area */
+#define AMDGPU_MES_LOG_BUFFER_SIZE 0x4000 /* Maximu log buffer size for MES */
struct amdgpu_mes_funcs;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 010b0cb7693c9c..2099159a693fa0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -617,8 +617,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
return r;
if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
- bo->tbo.resource->mem_type == TTM_PL_VRAM &&
- amdgpu_bo_in_cpu_visible_vram(bo))
+ amdgpu_res_cpu_visible(adev, bo->tbo.resource))
amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved,
ctx.bytes_moved);
else
@@ -1272,23 +1271,25 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, bool evict)
void amdgpu_bo_get_memory(struct amdgpu_bo *bo,
struct amdgpu_mem_stats *stats)
{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+ struct ttm_resource *res = bo->tbo.resource;
uint64_t size = amdgpu_bo_size(bo);
struct drm_gem_object *obj;
unsigned int domain;
bool shared;
/* Abort if the BO doesn't currently have a backing store */
- if (!bo->tbo.resource)
+ if (!res)
return;
obj = &bo->tbo.base;
shared = drm_gem_object_is_shared_for_memory_stats(obj);
- domain = amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type);
+ domain = amdgpu_mem_type_to_domain(res->mem_type);
switch (domain) {
case AMDGPU_GEM_DOMAIN_VRAM:
stats->vram += size;
- if (amdgpu_bo_in_cpu_visible_vram(bo))
+ if (amdgpu_res_cpu_visible(adev, bo->tbo.resource))
stats->visible_vram += size;
if (shared)
stats->vram_shared += size;
@@ -1389,10 +1390,7 @@ vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
/* Remember that this BO was accessed by the CPU */
abo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
- if (bo->resource->mem_type != TTM_PL_VRAM)
- return 0;
-
- if (amdgpu_bo_in_cpu_visible_vram(abo))
+ if (amdgpu_res_cpu_visible(adev, bo->resource))
return 0;
/* Can't move a pinned BO to visible VRAM */
@@ -1415,7 +1413,7 @@ vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
/* this should never happen */
if (bo->resource->mem_type == TTM_PL_VRAM &&
- !amdgpu_bo_in_cpu_visible_vram(abo))
+ !amdgpu_res_cpu_visible(adev, bo->resource))
return VM_FAULT_SIGBUS;
ttm_bo_move_to_lru_tail_unlocked(bo);
@@ -1579,6 +1577,7 @@ uint32_t amdgpu_bo_get_preferred_domain(struct amdgpu_device *adev,
*/
u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, struct seq_file *m)
{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
struct dma_buf_attachment *attachment;
struct dma_buf *dma_buf;
const char *placement;
@@ -1587,10 +1586,11 @@ u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, struct seq_file *m)
if (dma_resv_trylock(bo->tbo.base.resv)) {
unsigned int domain;
+
domain = amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type);
switch (domain) {
case AMDGPU_GEM_DOMAIN_VRAM:
- if (amdgpu_bo_in_cpu_visible_vram(bo))
+ if (amdgpu_res_cpu_visible(adev, bo->tbo.resource))
placement = "VRAM VISIBLE";
else
placement = "VRAM";
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index be679c42b0b8cb..fa03d9e4874cc6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -251,28 +251,6 @@ static inline u64 amdgpu_bo_mmap_offset(struct amdgpu_bo *bo)
}
/**
- * amdgpu_bo_in_cpu_visible_vram - check if BO is (partly) in visible VRAM
- */
-static inline bool amdgpu_bo_in_cpu_visible_vram(struct amdgpu_bo *bo)
-{
- struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
- struct amdgpu_res_cursor cursor;
-
- if (!bo->tbo.resource || bo->tbo.resource->mem_type != TTM_PL_VRAM)
- return false;
-
- amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &cursor);
- while (cursor.remaining) {
- if (cursor.start < adev->gmc.visible_vram_size)
- return true;
-
- amdgpu_res_next(&cursor, cursor.size);
- }
-
- return false;
-}
-
-/**
* amdgpu_bo_explicit_sync - return whether the bo is explicitly synced
*/
static inline bool amdgpu_bo_explicit_sync(struct amdgpu_bo *bo)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 5505d646f43aa8..06f0a6534a94f7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -524,46 +524,58 @@ static ssize_t amdgpu_debugfs_mqd_read(struct file *f, char __user *buf,
{
struct amdgpu_ring *ring = file_inode(f)->i_private;
volatile u32 *mqd;
- int r;
+ u32 *kbuf;
+ int r, i;
uint32_t value, result;
if (*pos & 3 || size & 3)
return -EINVAL;
- result = 0;
+ kbuf = kmalloc(ring->mqd_size, GFP_KERNEL);
+ if (!kbuf)
+ return -ENOMEM;
r = amdgpu_bo_reserve(ring->mqd_obj, false);
if (unlikely(r != 0))
- return r;
+ goto err_free;
r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&mqd);
- if (r) {
- amdgpu_bo_unreserve(ring->mqd_obj);
- return r;
- }
+ if (r)
+ goto err_unreserve;
+ /*
+ * Copy to local buffer to avoid put_user(), which might fault
+ * and acquire mmap_sem, under reservation_ww_class_mutex.
+ */
+ for (i = 0; i < ring->mqd_size/sizeof(u32); i++)
+ kbuf[i] = mqd[i];
+
+ amdgpu_bo_kunmap(ring->mqd_obj);
+ amdgpu_bo_unreserve(ring->mqd_obj);
+
+ result = 0;
while (size) {
if (*pos >= ring->mqd_size)
- goto done;
+ break;
- value = mqd[*pos/4];
+ value = kbuf[*pos/4];
r = put_user(value, (uint32_t *)buf);
if (r)
- goto done;
+ goto err_free;
buf += 4;
result += 4;
size -= 4;
*pos += 4;
}
-done:
- amdgpu_bo_kunmap(ring->mqd_obj);
- mqd = NULL;
- amdgpu_bo_unreserve(ring->mqd_obj);
- if (r)
- return r;
-
+ kfree(kbuf);
return result;
+
+err_unreserve:
+ amdgpu_bo_unreserve(ring->mqd_obj);
+err_free:
+ kfree(kbuf);
+ return r;
}
static const struct file_operations amdgpu_debugfs_mqd_fops = {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index fc418e670fdae2..1d71729e3f6bce 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -133,7 +133,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
} else if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
!(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) &&
- amdgpu_bo_in_cpu_visible_vram(abo)) {
+ amdgpu_res_cpu_visible(adev, bo->resource)) {
/* Try evicting to the CPU inaccessible part of VRAM
* first, but only set GTT as busy placement, so this
@@ -403,40 +403,55 @@ error:
return r;
}
-/*
- * amdgpu_mem_visible - Check that memory can be accessed by ttm_bo_move_memcpy
+/**
+ * amdgpu_res_cpu_visible - Check that resource can be accessed by CPU
+ * @adev: amdgpu device
+ * @res: the resource to check
*
- * Called by amdgpu_bo_move()
+ * Returns: true if the full resource is CPU visible, false otherwise.
*/
-static bool amdgpu_mem_visible(struct amdgpu_device *adev,
- struct ttm_resource *mem)
+bool amdgpu_res_cpu_visible(struct amdgpu_device *adev,
+ struct ttm_resource *res)
{
- u64 mem_size = (u64)mem->size;
struct amdgpu_res_cursor cursor;
- u64 end;
- if (mem->mem_type == TTM_PL_SYSTEM ||
- mem->mem_type == TTM_PL_TT)
+ if (!res)
+ return false;
+
+ if (res->mem_type == TTM_PL_SYSTEM || res->mem_type == TTM_PL_TT ||
+ res->mem_type == AMDGPU_PL_PREEMPT)
return true;
- if (mem->mem_type != TTM_PL_VRAM)
+
+ if (res->mem_type != TTM_PL_VRAM)
return false;
- amdgpu_res_first(mem, 0, mem_size, &cursor);
- end = cursor.start + cursor.size;
+ amdgpu_res_first(res, 0, res->size, &cursor);
while (cursor.remaining) {
+ if ((cursor.start + cursor.size) >= adev->gmc.visible_vram_size)
+ return false;
amdgpu_res_next(&cursor, cursor.size);
+ }
- if (!cursor.remaining)
- break;
+ return true;
+}
- /* ttm_resource_ioremap only supports contiguous memory */
- if (end != cursor.start)
- return false;
+/*
+ * amdgpu_res_copyable - Check that memory can be accessed by ttm_bo_move_memcpy
+ *
+ * Called by amdgpu_bo_move()
+ */
+static bool amdgpu_res_copyable(struct amdgpu_device *adev,
+ struct ttm_resource *mem)
+{
+ if (!amdgpu_res_cpu_visible(adev, mem))
+ return false;
- end = cursor.start + cursor.size;
- }
+ /* ttm_resource_ioremap only supports contiguous memory */
+ if (mem->mem_type == TTM_PL_VRAM &&
+ !(mem->placement & TTM_PL_FLAG_CONTIGUOUS))
+ return false;
- return end <= adev->gmc.visible_vram_size;
+ return true;
}
/*
@@ -529,8 +544,8 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
if (r) {
/* Check that all memory is CPU accessible */
- if (!amdgpu_mem_visible(adev, old_mem) ||
- !amdgpu_mem_visible(adev, new_mem)) {
+ if (!amdgpu_res_copyable(adev, old_mem) ||
+ !amdgpu_res_copyable(adev, new_mem)) {
pr_err("Move buffer fallback to memcpy unavailable\n");
return r;
}
@@ -557,7 +572,6 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev,
struct ttm_resource *mem)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
- size_t bus_size = (size_t)mem->size;
switch (mem->mem_type) {
case TTM_PL_SYSTEM:
@@ -568,9 +582,6 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev,
break;
case TTM_PL_VRAM:
mem->bus.offset = mem->start << PAGE_SHIFT;
- /* check if it's visible */
- if ((mem->bus.offset + bus_size) > adev->gmc.visible_vram_size)
- return -EINVAL;
if (adev->mman.aper_base_kaddr &&
mem->placement & TTM_PL_FLAG_CONTIGUOUS)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index 65ec82141a8e01..32cf6b6f6efd96 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -139,6 +139,9 @@ int amdgpu_vram_mgr_reserve_range(struct amdgpu_vram_mgr *mgr,
int amdgpu_vram_mgr_query_page_status(struct amdgpu_vram_mgr *mgr,
uint64_t start);
+bool amdgpu_res_cpu_visible(struct amdgpu_device *adev,
+ struct ttm_resource *res);
+
int amdgpu_ttm_init(struct amdgpu_device *adev);
void amdgpu_ttm_fini(struct amdgpu_device *adev);
void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c
index ab820cf526683b..0df97c3e3a700d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c
@@ -189,10 +189,13 @@ static void setup_vpe_queue(struct amdgpu_device *adev,
mqd->rptr_val = 0;
mqd->unmapped = 1;
+ if (adev->vpe.collaborate_mode)
+ memcpy(++mqd, test->mqd_data_cpu_addr, sizeof(struct MQD_INFO));
+
qinfo->mqd_addr = test->mqd_data_gpu_addr;
qinfo->csa_addr = test->ctx_data_gpu_addr +
offsetof(struct umsch_mm_test_ctx_data, vpe_ctx_csa);
- qinfo->doorbell_offset_0 = (adev->doorbell_index.vpe_ring + 1) << 1;
+ qinfo->doorbell_offset_0 = 0;
qinfo->doorbell_offset_1 = 0;
}
@@ -287,7 +290,10 @@ static int submit_vpe_queue(struct amdgpu_device *adev, struct umsch_mm_test *te
ring[5] = 0;
mqd->wptr_val = (6 << 2);
- // WDOORBELL32(adev->umsch_mm.agdb_index[CONTEXT_PRIORITY_LEVEL_NORMAL], mqd->wptr_val);
+ if (adev->vpe.collaborate_mode)
+ (++mqd)->wptr_val = (6 << 2);
+
+ WDOORBELL32(adev->umsch_mm.agdb_index[CONTEXT_PRIORITY_LEVEL_NORMAL], mqd->wptr_val);
for (i = 0; i < adev->usec_timeout; i++) {
if (*fence == test_pattern)
@@ -571,6 +577,7 @@ int amdgpu_umsch_mm_init_microcode(struct amdgpu_umsch_mm *umsch)
switch (amdgpu_ip_version(adev, VCN_HWIP, 0)) {
case IP_VERSION(4, 0, 5):
+ case IP_VERSION(4, 0, 6):
fw_name = "amdgpu/umsch_mm_4_0_0.bin";
break;
default:
@@ -750,6 +757,7 @@ static int umsch_mm_early_init(void *handle)
switch (amdgpu_ip_version(adev, VCN_HWIP, 0)) {
case IP_VERSION(4, 0, 5):
+ case IP_VERSION(4, 0, 6):
umsch_mm_v4_0_set_funcs(&adev->umsch_mm);
break;
default:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.h
index 8258a43a6236c0..5014b5af95fd97 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.h
@@ -33,13 +33,6 @@ enum UMSCH_SWIP_ENGINE_TYPE {
UMSCH_SWIP_ENGINE_TYPE_MAX
};
-enum UMSCH_SWIP_AFFINITY_TYPE {
- UMSCH_SWIP_AFFINITY_TYPE_ANY = 0,
- UMSCH_SWIP_AFFINITY_TYPE_VCN0 = 1,
- UMSCH_SWIP_AFFINITY_TYPE_VCN1 = 2,
- UMSCH_SWIP_AFFINITY_TYPE_MAX
-};
-
enum UMSCH_CONTEXT_PRIORITY_LEVEL {
CONTEXT_PRIORITY_LEVEL_IDLE = 0,
CONTEXT_PRIORITY_LEVEL_NORMAL = 1,
@@ -51,13 +44,15 @@ enum UMSCH_CONTEXT_PRIORITY_LEVEL {
struct umsch_mm_set_resource_input {
uint32_t vmid_mask_mm_vcn;
uint32_t vmid_mask_mm_vpe;
+ uint32_t collaboration_mask_vpe;
uint32_t logging_vmid;
uint32_t engine_mask;
union {
struct {
uint32_t disable_reset : 1;
uint32_t disable_umsch_mm_log : 1;
- uint32_t reserved : 30;
+ uint32_t use_rs64mem_for_proc_ctx_csa : 1;
+ uint32_t reserved : 29;
};
uint32_t uint32_all;
};
@@ -78,15 +73,18 @@ struct umsch_mm_add_queue_input {
uint32_t doorbell_offset_1;
enum UMSCH_SWIP_ENGINE_TYPE engine_type;
uint32_t affinity;
- enum UMSCH_SWIP_AFFINITY_TYPE affinity_type;
uint64_t mqd_addr;
uint64_t h_context;
uint64_t h_queue;
uint32_t vm_context_cntl;
+ uint32_t process_csa_array_index;
+ uint32_t context_csa_array_index;
+
struct {
uint32_t is_context_suspended : 1;
- uint32_t reserved : 31;
+ uint32_t collaboration_mode : 1;
+ uint32_t reserved : 30;
};
};
@@ -94,6 +92,7 @@ struct umsch_mm_remove_queue_input {
uint32_t doorbell_offset_0;
uint32_t doorbell_offset_1;
uint64_t context_csa_addr;
+ uint32_t context_csa_array_index;
};
struct MQD_INFO {
@@ -103,6 +102,7 @@ struct MQD_INFO {
uint32_t wptr_val;
uint32_t rptr_val;
uint32_t unmapped;
+ uint32_t vmid;
};
struct amdgpu_umsch_mm;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 4299ce386322e7..94089069c9ada6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1613,6 +1613,37 @@ static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,
trace_amdgpu_vm_bo_map(bo_va, mapping);
}
+/* Validate operation parameters to prevent potential abuse */
+static int amdgpu_vm_verify_parameters(struct amdgpu_device *adev,
+ struct amdgpu_bo *bo,
+ uint64_t saddr,
+ uint64_t offset,
+ uint64_t size)
+{
+ uint64_t tmp, lpfn;
+
+ if (saddr & AMDGPU_GPU_PAGE_MASK
+ || offset & AMDGPU_GPU_PAGE_MASK
+ || size & AMDGPU_GPU_PAGE_MASK)
+ return -EINVAL;
+
+ if (check_add_overflow(saddr, size, &tmp)
+ || check_add_overflow(offset, size, &tmp)
+ || size == 0 /* which also leads to end < begin */)
+ return -EINVAL;
+
+ /* make sure object fit at this offset */
+ if (bo && offset + size > amdgpu_bo_size(bo))
+ return -EINVAL;
+
+ /* Ensure last pfn not exceed max_pfn */
+ lpfn = (saddr + size - 1) >> AMDGPU_GPU_PAGE_SHIFT;
+ if (lpfn >= adev->vm_manager.max_pfn)
+ return -EINVAL;
+
+ return 0;
+}
+
/**
* amdgpu_vm_bo_map - map bo inside a vm
*
@@ -1639,21 +1670,14 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
struct amdgpu_bo *bo = bo_va->base.bo;
struct amdgpu_vm *vm = bo_va->base.vm;
uint64_t eaddr;
+ int r;
- /* validate the parameters */
- if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK || size & ~PAGE_MASK)
- return -EINVAL;
- if (saddr + size <= saddr || offset + size <= offset)
- return -EINVAL;
-
- /* make sure object fit at this offset */
- eaddr = saddr + size - 1;
- if ((bo && offset + size > amdgpu_bo_size(bo)) ||
- (eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT))
- return -EINVAL;
+ r = amdgpu_vm_verify_parameters(adev, bo, saddr, offset, size);
+ if (r)
+ return r;
saddr /= AMDGPU_GPU_PAGE_SIZE;
- eaddr /= AMDGPU_GPU_PAGE_SIZE;
+ eaddr = saddr + (size - 1) / AMDGPU_GPU_PAGE_SIZE;
tmp = amdgpu_vm_it_iter_first(&vm->va, saddr, eaddr);
if (tmp) {
@@ -1706,17 +1730,9 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
uint64_t eaddr;
int r;
- /* validate the parameters */
- if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK || size & ~PAGE_MASK)
- return -EINVAL;
- if (saddr + size <= saddr || offset + size <= offset)
- return -EINVAL;
-
- /* make sure object fit at this offset */
- eaddr = saddr + size - 1;
- if ((bo && offset + size > amdgpu_bo_size(bo)) ||
- (eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT))
- return -EINVAL;
+ r = amdgpu_vm_verify_parameters(adev, bo, saddr, offset, size);
+ if (r)
+ return r;
/* Allocate all the needed memory */
mapping = kmalloc(sizeof(*mapping), GFP_KERNEL);
@@ -1730,7 +1746,7 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
}
saddr /= AMDGPU_GPU_PAGE_SIZE;
- eaddr /= AMDGPU_GPU_PAGE_SIZE;
+ eaddr = saddr + (size - 1) / AMDGPU_GPU_PAGE_SIZE;
mapping->start = saddr;
mapping->last = eaddr;
@@ -1817,10 +1833,14 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
struct amdgpu_bo_va_mapping *before, *after, *tmp, *next;
LIST_HEAD(removed);
uint64_t eaddr;
+ int r;
+
+ r = amdgpu_vm_verify_parameters(adev, NULL, saddr, 0, size);
+ if (r)
+ return r;
- eaddr = saddr + size - 1;
saddr /= AMDGPU_GPU_PAGE_SIZE;
- eaddr /= AMDGPU_GPU_PAGE_SIZE;
+ eaddr = saddr + (size - 1) / AMDGPU_GPU_PAGE_SIZE;
/* Allocate all the needed memory */
before = kzalloc(sizeof(*before), GFP_KERNEL);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c
index 7a65a2b128ec43..6695481f870f8a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c
@@ -396,6 +396,12 @@ static int vpe_hw_init(void *handle)
struct amdgpu_vpe *vpe = &adev->vpe;
int ret;
+ /* Power on VPE */
+ ret = amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VPE,
+ AMD_PG_STATE_UNGATE);
+ if (ret)
+ return ret;
+
ret = vpe_load_microcode(vpe);
if (ret)
return ret;
diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c
index d6f808acfb17b7..fbb43ae7624f44 100644
--- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c
+++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c
@@ -62,6 +62,11 @@ void aqua_vanjaram_doorbell_index_init(struct amdgpu_device *adev)
adev->doorbell_index.max_assignment = AMDGPU_DOORBELL_LAYOUT1_MAX_ASSIGNMENT << 1;
}
+static bool aqua_vanjaram_xcp_vcn_shared(struct amdgpu_device *adev)
+{
+ return (adev->xcp_mgr->num_xcps > adev->vcn.num_vcn_inst);
+}
+
static void aqua_vanjaram_set_xcp_id(struct amdgpu_device *adev,
uint32_t inst_idx, struct amdgpu_ring *ring)
{
@@ -87,7 +92,7 @@ static void aqua_vanjaram_set_xcp_id(struct amdgpu_device *adev,
case AMDGPU_RING_TYPE_VCN_ENC:
case AMDGPU_RING_TYPE_VCN_JPEG:
ip_blk = AMDGPU_XCP_VCN;
- if (adev->xcp_mgr->mode == AMDGPU_CPX_PARTITION_MODE)
+ if (aqua_vanjaram_xcp_vcn_shared(adev))
inst_mask = 1 << (inst_idx * 2);
break;
default:
@@ -140,10 +145,12 @@ static int aqua_vanjaram_xcp_sched_list_update(
aqua_vanjaram_xcp_gpu_sched_update(adev, ring, ring->xcp_id);
- /* VCN is shared by two partitions under CPX MODE */
+ /* VCN may be shared by two partitions under CPX MODE in certain
+ * configs.
+ */
if ((ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC ||
- ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG) &&
- adev->xcp_mgr->mode == AMDGPU_CPX_PARTITION_MODE)
+ ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG) &&
+ aqua_vanjaram_xcp_vcn_shared(adev))
aqua_vanjaram_xcp_gpu_sched_update(adev, ring, ring->xcp_id + 1);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index 1770e496c1b7ce..f7325b02a191f7 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -1635,7 +1635,7 @@ static void gfx_v11_0_setup_rb(struct amdgpu_device *adev)
active_rb_bitmap |= (0x3 << (i * rb_bitmap_width_per_sa));
}
- active_rb_bitmap |= global_active_rb_bitmap;
+ active_rb_bitmap &= global_active_rb_bitmap;
adev->gfx.config.backend_enable_mask = active_rb_bitmap;
adev->gfx.config.num_rbs = hweight32(active_rb_bitmap);
}
@@ -5465,6 +5465,7 @@ static void gfx_v11_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
/* Make sure that we can't skip the SET_Q_MODE packets when the VM
* changed in any way.
*/
+ ring->set_q_mode_offs = 0;
ring->set_q_mode_ptr = NULL;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
index 072c478665ade1..63f281a9984d98 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -411,8 +411,11 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes)
mes_set_hw_res_pkt.enable_reg_active_poll = 1;
mes_set_hw_res_pkt.enable_level_process_quantum_check = 1;
mes_set_hw_res_pkt.oversubscription_timer = 50;
- mes_set_hw_res_pkt.enable_mes_event_int_logging = 1;
- mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = mes->event_log_gpu_addr;
+ if (amdgpu_mes_log_enable) {
+ mes_set_hw_res_pkt.enable_mes_event_int_logging = 1;
+ mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr =
+ mes->event_log_gpu_addr;
+ }
return mes_v11_0_submit_pkt_and_poll_completion(mes,
&mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt),
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
index 34237a1b1f2e45..82eab49be82bb9 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
@@ -1602,19 +1602,9 @@ static int sdma_v4_4_2_set_ecc_irq_state(struct amdgpu_device *adev,
u32 sdma_cntl;
sdma_cntl = RREG32_SDMA(type, regSDMA_CNTL);
- switch (state) {
- case AMDGPU_IRQ_STATE_DISABLE:
- sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA_CNTL,
- DRAM_ECC_INT_ENABLE, 0);
- WREG32_SDMA(type, regSDMA_CNTL, sdma_cntl);
- break;
- /* sdma ecc interrupt is enabled by default
- * driver doesn't need to do anything to
- * enable the interrupt */
- case AMDGPU_IRQ_STATE_ENABLE:
- default:
- break;
- }
+ sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA_CNTL, DRAM_ECC_INT_ENABLE,
+ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+ WREG32_SDMA(type, regSDMA_CNTL, sdma_cntl);
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c
index 581a3bd11481cc..43ca63fe85ac3b 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc21.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc21.c
@@ -457,10 +457,8 @@ static bool soc21_need_full_reset(struct amdgpu_device *adev)
{
switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(11, 0, 0):
- return amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC);
case IP_VERSION(11, 0, 2):
case IP_VERSION(11, 0, 3):
- return false;
default:
return true;
}
@@ -722,7 +720,10 @@ static int soc21_common_early_init(void *handle)
AMD_PG_SUPPORT_VCN |
AMD_PG_SUPPORT_JPEG |
AMD_PG_SUPPORT_GFX_PG;
- adev->external_rev_id = adev->rev_id + 0x1;
+ if (adev->rev_id == 0)
+ adev->external_rev_id = 0x1;
+ else
+ adev->external_rev_id = adev->rev_id + 0x10;
break;
case IP_VERSION(11, 5, 1):
adev->cg_flags =
@@ -869,10 +870,35 @@ static int soc21_common_suspend(void *handle)
return soc21_common_hw_fini(adev);
}
+static bool soc21_need_reset_on_resume(struct amdgpu_device *adev)
+{
+ u32 sol_reg1, sol_reg2;
+
+ /* Will reset for the following suspend abort cases.
+ * 1) Only reset dGPU side.
+ * 2) S3 suspend got aborted and TOS is active.
+ */
+ if (!(adev->flags & AMD_IS_APU) && adev->in_s3 &&
+ !adev->suspend_complete) {
+ sol_reg1 = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81);
+ msleep(100);
+ sol_reg2 = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81);
+
+ return (sol_reg1 != sol_reg2);
+ }
+
+ return false;
+}
+
static int soc21_common_resume(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ if (soc21_need_reset_on_resume(adev)) {
+ dev_info(adev->dev, "S3 suspend aborted, resetting...");
+ soc21_asic_reset(adev);
+ }
+
return soc21_common_hw_init(adev);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c b/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c
index 8e7b763cfdb7ef..bd57896ab85d56 100644
--- a/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c
@@ -60,7 +60,7 @@ static int umsch_mm_v4_0_load_microcode(struct amdgpu_umsch_mm *umsch)
umsch->cmd_buf_curr_ptr = umsch->cmd_buf_ptr;
- if (amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(4, 0, 5)) {
+ if (amdgpu_ip_version(adev, VCN_HWIP, 0) >= IP_VERSION(4, 0, 5)) {
WREG32_SOC15(VCN, 0, regUVD_IPX_DLDO_CONFIG,
1 << UVD_IPX_DLDO_CONFIG__ONO0_PWR_CONFIG__SHIFT);
SOC15_WAIT_ON_RREG(VCN, 0, regUVD_IPX_DLDO_STATUS,
@@ -225,6 +225,8 @@ static int umsch_mm_v4_0_ring_start(struct amdgpu_umsch_mm *umsch)
WREG32_SOC15(VCN, 0, regVCN_UMSCH_RB_SIZE, ring->ring_size);
+ ring->wptr = 0;
+
data = RREG32_SOC15(VCN, 0, regVCN_RB_ENABLE);
data &= ~(VCN_RB_ENABLE__AUDIO_RB_EN_MASK);
WREG32_SOC15(VCN, 0, regVCN_RB_ENABLE, data);
@@ -248,7 +250,7 @@ static int umsch_mm_v4_0_ring_stop(struct amdgpu_umsch_mm *umsch)
data = REG_SET_FIELD(data, VCN_UMSCH_RB_DB_CTRL, EN, 0);
WREG32_SOC15(VCN, 0, regVCN_UMSCH_RB_DB_CTRL, data);
- if (amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(4, 0, 5)) {
+ if (amdgpu_ip_version(adev, VCN_HWIP, 0) >= IP_VERSION(4, 0, 5)) {
WREG32_SOC15(VCN, 0, regUVD_IPX_DLDO_CONFIG,
2 << UVD_IPX_DLDO_CONFIG__ONO0_PWR_CONFIG__SHIFT);
SOC15_WAIT_ON_RREG(VCN, 0, regUVD_IPX_DLDO_STATUS,
@@ -271,6 +273,8 @@ static int umsch_mm_v4_0_set_hw_resources(struct amdgpu_umsch_mm *umsch)
set_hw_resources.vmid_mask_mm_vcn = umsch->vmid_mask_mm_vcn;
set_hw_resources.vmid_mask_mm_vpe = umsch->vmid_mask_mm_vpe;
+ set_hw_resources.collaboration_mask_vpe =
+ adev->vpe.collaborate_mode ? 0x3 : 0x0;
set_hw_resources.engine_mask = umsch->engine_mask;
set_hw_resources.vcn0_hqd_mask[0] = umsch->vcn0_hqd_mask;
@@ -346,6 +350,7 @@ static int umsch_mm_v4_0_add_queue(struct amdgpu_umsch_mm *umsch,
add_queue.h_queue = input_ptr->h_queue;
add_queue.vm_context_cntl = input_ptr->vm_context_cntl;
add_queue.is_context_suspended = input_ptr->is_context_suspended;
+ add_queue.collaboration_mode = adev->vpe.collaborate_mode ? 1 : 0;
add_queue.api_status.api_completion_fence_addr = umsch->ring.fence_drv.gpu_addr;
add_queue.api_status.api_completion_fence_value = ++umsch->ring.fence_drv.sync_seq;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index dfa8c69532d470..55aa74cbc5325e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -779,8 +779,8 @@ static int kfd_ioctl_get_process_apertures_new(struct file *filp,
* nodes, but not more than args->num_of_nodes as that is
* the amount of memory allocated by user
*/
- pa = kzalloc((sizeof(struct kfd_process_device_apertures) *
- args->num_of_nodes), GFP_KERNEL);
+ pa = kcalloc(args->num_of_nodes, sizeof(struct kfd_process_device_apertures),
+ GFP_KERNEL);
if (!pa)
return -ENOMEM;
@@ -1523,7 +1523,7 @@ static int kfd_ioctl_get_dmabuf_info(struct file *filep,
/* Find a KFD GPU device that supports the get_dmabuf_info query */
for (i = 0; kfd_topology_enum_kfd_devices(i, &dev) == 0; i++)
- if (dev)
+ if (dev && !kfd_devcgroup_check_permission(dev))
break;
if (!dev)
return -EINVAL;
@@ -1545,7 +1545,7 @@ static int kfd_ioctl_get_dmabuf_info(struct file *filep,
if (xcp_id >= 0)
args->gpu_id = dmabuf_adev->kfd.dev->nodes[xcp_id]->id;
else
- args->gpu_id = dmabuf_adev->kfd.dev->nodes[0]->id;
+ args->gpu_id = dev->id;
args->flags = flags;
/* Copy metadata buffer to user mode */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 041ec3de55e72f..719d6d365e1501 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -960,7 +960,6 @@ void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm)
{
struct kfd_node *node;
int i;
- int count;
if (!kfd->init_complete)
return;
@@ -968,12 +967,10 @@ void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm)
/* for runtime suspend, skip locking kfd */
if (!run_pm) {
mutex_lock(&kfd_processes_mutex);
- count = ++kfd_locked;
- mutex_unlock(&kfd_processes_mutex);
-
/* For first KFD device suspend all the KFD processes */
- if (count == 1)
+ if (++kfd_locked == 1)
kfd_suspend_all_processes();
+ mutex_unlock(&kfd_processes_mutex);
}
for (i = 0; i < kfd->num_nodes; i++) {
@@ -984,7 +981,7 @@ void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm)
int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
{
- int ret, count, i;
+ int ret, i;
if (!kfd->init_complete)
return 0;
@@ -998,12 +995,10 @@ int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
/* for runtime resume, skip unlocking kfd */
if (!run_pm) {
mutex_lock(&kfd_processes_mutex);
- count = --kfd_locked;
- mutex_unlock(&kfd_processes_mutex);
-
- WARN_ONCE(count < 0, "KFD suspend / resume ref. error");
- if (count == 0)
+ if (--kfd_locked == 0)
ret = kfd_resume_all_processes();
+ WARN_ONCE(kfd_locked < 0, "KFD suspend / resume ref. error");
+ mutex_unlock(&kfd_processes_mutex);
}
return ret;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index f4d395e38683db..0b655555e16786 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -2001,6 +2001,7 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
dev_err(dev, "HIQ MQD's queue_doorbell_id0 is not 0, Queue preemption time out\n");
while (halt_if_hws_hang)
schedule();
+ kfd_hws_hang(dqm);
return -ETIME;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
index 9a06c6fb660585..40a21be6c07c9b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
@@ -339,7 +339,8 @@ static void event_interrupt_wq_v10(struct kfd_node *dev,
break;
}
kfd_signal_event_interrupt(pasid, context_id0 & 0x7fffff, 23);
- } else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE) {
+ } else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE &&
+ KFD_DBG_EC_TYPE_IS_PACKET(KFD_DEBUG_CP_BAD_OP_ECODE(context_id0))) {
kfd_set_dbg_ev_from_interrupt(dev, pasid,
KFD_DEBUG_DOORBELL_ID(context_id0),
KFD_EC_MASK(KFD_DEBUG_CP_BAD_OP_ECODE(context_id0)),
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c
index 7e2859736a558f..fe2ad0c0de9543 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c
@@ -328,7 +328,8 @@ static void event_interrupt_wq_v11(struct kfd_node *dev,
/* CP */
if (source_id == SOC15_INTSRC_CP_END_OF_PIPE)
kfd_signal_event_interrupt(pasid, context_id0, 32);
- else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE)
+ else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE &&
+ KFD_DBG_EC_TYPE_IS_PACKET(KFD_CTXID0_CP_BAD_OP_ECODE(context_id0)))
kfd_set_dbg_ev_from_interrupt(dev, pasid,
KFD_CTXID0_DOORBELL_ID(context_id0),
KFD_EC_MASK(KFD_CTXID0_CP_BAD_OP_ECODE(context_id0)),
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
index 91dd5e045b511d..c4c6a29052ac8f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
@@ -388,7 +388,8 @@ static void event_interrupt_wq_v9(struct kfd_node *dev,
break;
}
kfd_signal_event_interrupt(pasid, sq_int_data, 24);
- } else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE) {
+ } else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE &&
+ KFD_DBG_EC_TYPE_IS_PACKET(KFD_DEBUG_CP_BAD_OP_ECODE(context_id0))) {
kfd_set_dbg_ev_from_interrupt(dev, pasid,
KFD_DEBUG_DOORBELL_ID(context_id0),
KFD_EC_MASK(KFD_DEBUG_CP_BAD_OP_ECODE(context_id0)),
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 42d40560cd30d7..a81ef232fdef96 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -1473,7 +1473,7 @@ static inline void kfd_flush_tlb(struct kfd_process_device *pdd,
static inline bool kfd_flush_tlb_after_unmap(struct kfd_dev *dev)
{
- return KFD_GC_VERSION(dev) > IP_VERSION(9, 4, 2) ||
+ return KFD_GC_VERSION(dev) >= IP_VERSION(9, 4, 2) ||
(KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 1) && dev->sdma_fw_version >= 18) ||
KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 0);
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 717a60d7a4ea95..b79986412cd839 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -819,9 +819,9 @@ struct kfd_process *kfd_create_process(struct task_struct *thread)
mutex_lock(&kfd_processes_mutex);
if (kfd_is_locked()) {
- mutex_unlock(&kfd_processes_mutex);
pr_debug("KFD is locked! Cannot create process");
- return ERR_PTR(-EINVAL);
+ process = ERR_PTR(-EINVAL);
+ goto out;
}
/* A prior open of /dev/kfd could have already created the process. */
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 2851719d712161..6d2f60c61decc3 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -148,6 +148,9 @@ MODULE_FIRMWARE(FIRMWARE_NAVI12_DMCU);
#define FIRMWARE_DCN_35_DMUB "amdgpu/dcn_3_5_dmcub.bin"
MODULE_FIRMWARE(FIRMWARE_DCN_35_DMUB);
+#define FIRMWARE_DCN_351_DMUB "amdgpu/dcn_3_5_1_dmcub.bin"
+MODULE_FIRMWARE(FIRMWARE_DCN_351_DMUB);
+
/* Number of bytes in PSP header for firmware. */
#define PSP_HEADER_BYTES 0x100
@@ -3044,6 +3047,10 @@ static int dm_resume(void *handle)
/* Do mst topology probing after resuming cached state*/
drm_connector_list_iter_begin(ddev, &iter);
drm_for_each_connector_iter(connector, &iter) {
+
+ if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+ continue;
+
aconnector = to_amdgpu_dm_connector(connector);
if (aconnector->dc_link->type != dc_connection_mst_branch ||
aconnector->mst_root)
@@ -4820,9 +4827,11 @@ static int dm_init_microcode(struct amdgpu_device *adev)
fw_name_dmub = FIRMWARE_DCN_V3_2_1_DMCUB;
break;
case IP_VERSION(3, 5, 0):
- case IP_VERSION(3, 5, 1):
fw_name_dmub = FIRMWARE_DCN_35_DMUB;
break;
+ case IP_VERSION(3, 5, 1):
+ fw_name_dmub = FIRMWARE_DCN_351_DMUB;
+ break;
default:
/* ASIC doesn't support DMUB. */
return 0;
@@ -5921,6 +5930,9 @@ get_highest_refresh_rate_mode(struct amdgpu_dm_connector *aconnector,
&aconnector->base.probed_modes :
&aconnector->base.modes;
+ if (aconnector->base.connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+ return NULL;
+
if (aconnector->freesync_vid_base.clock != 0)
return &aconnector->freesync_vid_base;
@@ -6305,27 +6317,22 @@ create_stream_for_sink(struct drm_connector *connector,
if (stream->signal == SIGNAL_TYPE_HDMI_TYPE_A)
mod_build_hf_vsif_infopacket(stream, &stream->vsp_infopacket);
- else if (stream->signal == SIGNAL_TYPE_DISPLAY_PORT ||
- stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST ||
- stream->signal == SIGNAL_TYPE_EDP) {
+
+ if (stream->signal == SIGNAL_TYPE_DISPLAY_PORT ||
+ stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST ||
+ stream->signal == SIGNAL_TYPE_EDP) {
//
// should decide stream support vsc sdp colorimetry capability
// before building vsc info packet
//
- stream->use_vsc_sdp_for_colorimetry = false;
- if (aconnector->dc_sink->sink_signal == SIGNAL_TYPE_DISPLAY_PORT_MST) {
- stream->use_vsc_sdp_for_colorimetry =
- aconnector->dc_sink->is_vsc_sdp_colorimetry_supported;
- } else {
- if (stream->link->dpcd_caps.dprx_feature.bits.VSC_SDP_COLORIMETRY_SUPPORTED)
- stream->use_vsc_sdp_for_colorimetry = true;
- }
+ stream->use_vsc_sdp_for_colorimetry = stream->link->dpcd_caps.dpcd_rev.raw >= 0x14 &&
+ stream->link->dpcd_caps.dprx_feature.bits.VSC_SDP_COLORIMETRY_SUPPORTED;
+
if (stream->out_transfer_func->tf == TRANSFER_FUNCTION_GAMMA22)
tf = TRANSFER_FUNC_GAMMA_22;
mod_build_vsc_infopacket(stream, &stream->vsc_infopacket, stream->output_color_space, tf);
+ aconnector->psr_skip_count = AMDGPU_DM_PSR_ENTRY_DELAY;
- if (stream->link->psr_settings.psr_feature_enabled)
- aconnector->psr_skip_count = AMDGPU_DM_PSR_ENTRY_DELAY;
}
finish:
dc_sink_release(sink);
@@ -8764,10 +8771,10 @@ static void amdgpu_dm_commit_audio(struct drm_device *dev,
if (!drm_atomic_crtc_needs_modeset(new_crtc_state))
continue;
+notify:
if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
continue;
-notify:
aconnector = to_amdgpu_dm_connector(connector);
mutex_lock(&adev->dm.audio_lock);
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c
index 1f08c6564c3bfe..286ecd28cc6e66 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c
@@ -141,9 +141,8 @@ bool amdgpu_dm_link_setup_psr(struct dc_stream_state *stream)
* amdgpu_dm_psr_enable() - enable psr f/w
* @stream: stream state
*
- * Return: true if success
*/
-bool amdgpu_dm_psr_enable(struct dc_stream_state *stream)
+void amdgpu_dm_psr_enable(struct dc_stream_state *stream)
{
struct dc_link *link = stream->link;
unsigned int vsync_rate_hz = 0;
@@ -190,7 +189,10 @@ bool amdgpu_dm_psr_enable(struct dc_stream_state *stream)
if (link->psr_settings.psr_version < DC_PSR_VERSION_SU_1)
power_opt |= psr_power_opt_z10_static_screen;
- return dc_link_set_psr_allow_active(link, &psr_enable, false, false, &power_opt);
+ dc_link_set_psr_allow_active(link, &psr_enable, false, false, &power_opt);
+
+ if (link->ctx->dc->caps.ips_support)
+ dc_allow_idle_optimizations(link->ctx->dc, true);
}
/*
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.h
index 6806b3c9c84ba0..1fdfd183c0d91a 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.h
@@ -32,7 +32,7 @@
#define AMDGPU_DM_PSR_ENTRY_DELAY 5
void amdgpu_dm_set_psr_caps(struct dc_link *link);
-bool amdgpu_dm_psr_enable(struct dc_stream_state *stream);
+void amdgpu_dm_psr_enable(struct dc_stream_state *stream);
bool amdgpu_dm_link_setup_psr(struct dc_stream_state *stream);
bool amdgpu_dm_psr_disable(struct dc_stream_state *stream);
bool amdgpu_dm_psr_disable_all(struct amdgpu_display_manager *dm);
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c
index 16e72d623630ca..08c494a7a21bad 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c
@@ -76,10 +76,8 @@ static int amdgpu_dm_wb_encoder_atomic_check(struct drm_encoder *encoder,
static int amdgpu_dm_wb_connector_get_modes(struct drm_connector *connector)
{
- struct drm_device *dev = connector->dev;
-
- return drm_add_modes_noedid(connector, dev->mode_config.max_width,
- dev->mode_config.max_height);
+ /* Maximum resolution supported by DWB */
+ return drm_add_modes_noedid(connector, 3840, 2160);
}
static int amdgpu_dm_wb_prepare_job(struct drm_writeback_connector *wb_connector,
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c
index 12f3e8aa46d8df..6ad4f4efec5dd3 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c
@@ -99,20 +99,25 @@ static int dcn316_get_active_display_cnt_wa(
return display_count;
}
-static void dcn316_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state *context, bool disable)
+static void dcn316_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state *context,
+ bool safe_to_lower, bool disable)
{
struct dc *dc = clk_mgr_base->ctx->dc;
int i;
for (i = 0; i < dc->res_pool->pipe_count; ++i) {
- struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i];
+ struct pipe_ctx *pipe = safe_to_lower
+ ? &context->res_ctx.pipe_ctx[i]
+ : &dc->current_state->res_ctx.pipe_ctx[i];
if (pipe->top_pipe || pipe->prev_odm_pipe)
continue;
- if (pipe->stream && (pipe->stream->dpms_off || pipe->plane_state == NULL ||
- dc_is_virtual_signal(pipe->stream->signal))) {
+ if (pipe->stream && (pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal) ||
+ !pipe->stream->link_enc)) {
if (disable) {
- pipe->stream_res.tg->funcs->immediate_disable_crtc(pipe->stream_res.tg);
+ if (pipe->stream_res.tg && pipe->stream_res.tg->funcs->immediate_disable_crtc)
+ pipe->stream_res.tg->funcs->immediate_disable_crtc(pipe->stream_res.tg);
+
reset_sync_context_for_pipe(dc, context, i);
} else
pipe->stream_res.tg->funcs->enable_crtc(pipe->stream_res.tg);
@@ -207,11 +212,11 @@ static void dcn316_update_clocks(struct clk_mgr *clk_mgr_base,
}
if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz)) {
- dcn316_disable_otg_wa(clk_mgr_base, context, true);
+ dcn316_disable_otg_wa(clk_mgr_base, context, safe_to_lower, true);
clk_mgr_base->clks.dispclk_khz = new_clocks->dispclk_khz;
dcn316_smu_set_dispclk(clk_mgr, clk_mgr_base->clks.dispclk_khz);
- dcn316_disable_otg_wa(clk_mgr_base, context, false);
+ dcn316_disable_otg_wa(clk_mgr_base, context, safe_to_lower, false);
update_dispclk = true;
}
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
index c378b879c76d8c..d9c5692c86c21a 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
@@ -73,6 +73,14 @@
#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_SEL_MASK 0x00000007L
#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_DIV_MASK 0x000F0000L
+#define regCLK5_0_CLK5_spll_field_8 0x464b
+#define regCLK5_0_CLK5_spll_field_8_BASE_IDX 0
+
+#define CLK5_0_CLK5_spll_field_8__spll_ssc_en__SHIFT 0xd
+#define CLK5_0_CLK5_spll_field_8__spll_ssc_en_MASK 0x00002000L
+
+#define SMU_VER_THRESHOLD 0x5D4A00 //93.74.0
+
#define REG(reg_name) \
(ctx->clk_reg_offsets[reg ## reg_name ## _BASE_IDX] + reg ## reg_name)
@@ -409,11 +417,25 @@ static void dcn35_dump_clk_registers(struct clk_state_registers_and_bypass *regs
{
}
+static bool dcn35_is_spll_ssc_enabled(struct clk_mgr *clk_mgr_base)
+{
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+ struct dc_context *ctx = clk_mgr->base.ctx;
+ uint32_t ssc_enable;
+
+ REG_GET(CLK5_0_CLK5_spll_field_8, spll_ssc_en, &ssc_enable);
+
+ return ssc_enable == 1;
+}
+
static void init_clk_states(struct clk_mgr *clk_mgr)
{
+ struct clk_mgr_internal *clk_mgr_int = TO_CLK_MGR_INTERNAL(clk_mgr);
uint32_t ref_dtbclk = clk_mgr->clks.ref_dtbclk_khz;
memset(&(clk_mgr->clks), 0, sizeof(struct dc_clocks));
+ if (clk_mgr_int->smu_ver >= SMU_VER_THRESHOLD)
+ clk_mgr->clks.dtbclk_en = true; // request DTBCLK disable on first commit
clk_mgr->clks.ref_dtbclk_khz = ref_dtbclk; // restore ref_dtbclk
clk_mgr->clks.p_state_change_support = true;
clk_mgr->clks.prev_p_state_change_support = true;
@@ -423,7 +445,16 @@ static void init_clk_states(struct clk_mgr *clk_mgr)
void dcn35_init_clocks(struct clk_mgr *clk_mgr)
{
+ struct clk_mgr_internal *clk_mgr_int = TO_CLK_MGR_INTERNAL(clk_mgr);
init_clk_states(clk_mgr);
+
+ // to adjust dp_dto reference clock if ssc is enable otherwise to apply dprefclk
+ if (dcn35_is_spll_ssc_enabled(clk_mgr))
+ clk_mgr->dp_dto_source_clock_in_khz =
+ dce_adjust_dp_ref_freq_for_ss(clk_mgr_int, clk_mgr->dprefclk_khz);
+ else
+ clk_mgr->dp_dto_source_clock_in_khz = clk_mgr->dprefclk_khz;
+
}
static struct clk_bw_params dcn35_bw_params = {
.vram_type = Ddr4MemType,
@@ -512,6 +543,28 @@ static DpmClocks_t_dcn35 dummy_clocks;
static struct dcn35_watermarks dummy_wms = { 0 };
+static struct dcn35_ss_info_table ss_info_table = {
+ .ss_divider = 1000,
+ .ss_percentage = {0, 0, 375, 375, 375}
+};
+
+static void dcn35_read_ss_info_from_lut(struct clk_mgr_internal *clk_mgr)
+{
+ struct dc_context *ctx = clk_mgr->base.ctx;
+ uint32_t clock_source;
+
+ REG_GET(CLK1_CLK2_BYPASS_CNTL, CLK2_BYPASS_SEL, &clock_source);
+ // If it's DFS mode, clock_source is 0.
+ if (dcn35_is_spll_ssc_enabled(&clk_mgr->base) && (clock_source < ARRAY_SIZE(ss_info_table.ss_percentage))) {
+ clk_mgr->dprefclk_ss_percentage = ss_info_table.ss_percentage[clock_source];
+
+ if (clk_mgr->dprefclk_ss_percentage != 0) {
+ clk_mgr->ss_on_dprefclk = true;
+ clk_mgr->dprefclk_ss_divider = ss_info_table.ss_divider;
+ }
+ }
+}
+
static void dcn35_build_watermark_ranges(struct clk_bw_params *bw_params, struct dcn35_watermarks *table)
{
int i, num_valid_sets;
@@ -709,7 +762,7 @@ static void dcn35_clk_mgr_helper_populate_bw_params(struct clk_mgr_internal *clk
clock_table->NumFclkLevelsEnabled;
max_fclk = find_max_clk_value(clock_table->FclkClocks_Freq, num_fclk);
- num_dcfclk = (clock_table->NumFclkLevelsEnabled > NUM_DCFCLK_DPM_LEVELS) ? NUM_DCFCLK_DPM_LEVELS :
+ num_dcfclk = (clock_table->NumDcfClkLevelsEnabled > NUM_DCFCLK_DPM_LEVELS) ? NUM_DCFCLK_DPM_LEVELS :
clock_table->NumDcfClkLevelsEnabled;
for (i = 0; i < num_dcfclk; i++) {
int j;
@@ -1056,6 +1109,8 @@ void dcn35_clk_mgr_construct(
dce_clock_read_ss_info(&clk_mgr->base);
/*when clk src is from FCH, it could have ss, same clock src as DPREF clk*/
+ dcn35_read_ss_info_from_lut(&clk_mgr->base);
+
clk_mgr->base.base.bw_params = &dcn35_bw_params;
if (clk_mgr->base.base.ctx->dc->debug.pstate_enabled) {
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index e7dc128f6284b4..03b554e912a20d 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -3024,7 +3024,8 @@ static void backup_planes_and_stream_state(
scratch->blend_tf[i] = *status->plane_states[i]->blend_tf;
}
scratch->stream_state = *stream;
- scratch->out_transfer_func = *stream->out_transfer_func;
+ if (stream->out_transfer_func)
+ scratch->out_transfer_func = *stream->out_transfer_func;
}
static void restore_planes_and_stream_state(
@@ -3046,7 +3047,8 @@ static void restore_planes_and_stream_state(
*status->plane_states[i]->blend_tf = scratch->blend_tf[i];
}
*stream = scratch->stream_state;
- *stream->out_transfer_func = scratch->out_transfer_func;
+ if (stream->out_transfer_func)
+ *stream->out_transfer_func = scratch->out_transfer_func;
}
static bool update_planes_and_stream_state(struct dc *dc,
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_state.c b/drivers/gpu/drm/amd/display/dc/core/dc_state.c
index 5cc7f8da209c59..61986e5cb49196 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_state.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_state.c
@@ -436,6 +436,15 @@ bool dc_state_add_plane(
goto out;
}
+ if (stream_status->plane_count == 0 && dc->config.enable_windowed_mpo_odm)
+ /* ODM combine could prevent us from supporting more planes
+ * we will reset ODM slice count back to 1 when all planes have
+ * been removed to maximize the amount of planes supported when
+ * new planes are added.
+ */
+ resource_update_pipes_for_stream_with_slice_count(
+ state, dc->current_state, dc->res_pool, stream, 1);
+
otg_master_pipe = resource_get_otg_master_for_stream(
&state->res_ctx, stream);
if (otg_master_pipe)
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
index 970644b695cd4f..b5e0289d2fe82a 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
@@ -976,7 +976,10 @@ static bool dcn31_program_pix_clk(
struct bp_pixel_clock_parameters bp_pc_params = {0};
enum transmitter_color_depth bp_pc_colour_depth = TRANSMITTER_COLOR_DEPTH_24;
- if (clock_source->ctx->dc->clk_mgr->dp_dto_source_clock_in_khz != 0)
+ // Apply ssed(spread spectrum) dpref clock for edp only.
+ if (clock_source->ctx->dc->clk_mgr->dp_dto_source_clock_in_khz != 0
+ && pix_clk_params->signal_type == SIGNAL_TYPE_EDP
+ && encoding == DP_8b_10b_ENCODING)
dp_dto_ref_khz = clock_source->ctx->dc->clk_mgr->dp_dto_source_clock_in_khz;
// For these signal types Driver to program DP_DTO without calling VBIOS Command table
if (dc_is_dp_signal(pix_clk_params->signal_type) || dc_is_virtual_signal(pix_clk_params->signal_type)) {
@@ -1093,9 +1096,6 @@ static bool get_pixel_clk_frequency_100hz(
unsigned int modulo_hz = 0;
unsigned int dp_dto_ref_khz = clock_source->ctx->dc->clk_mgr->dprefclk_khz;
- if (clock_source->ctx->dc->clk_mgr->dp_dto_source_clock_in_khz != 0)
- dp_dto_ref_khz = clock_source->ctx->dc->clk_mgr->dp_dto_source_clock_in_khz;
-
if (clock_source->id == CLOCK_SOURCE_ID_DP_DTO) {
clock_hz = REG_READ(PHASE[inst]);
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/Makefile b/drivers/gpu/drm/amd/display/dc/dce110/Makefile
index f0777d61c2cbb6..c307f040e48fc6 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dce110/Makefile
@@ -23,7 +23,7 @@
# Makefile for the 'controller' sub-component of DAL.
# It provides the control and status of HW CRTC block.
-CFLAGS_$(AMDDALPATH)/dc/dce110/dce110_resource.o = $(call cc-disable-warning, override-init)
+CFLAGS_$(AMDDALPATH)/dc/dce110/dce110_resource.o = -Wno-override-init
DCE110 = dce110_timing_generator.o \
dce110_compressor.o dce110_opp_regamma_v.o \
diff --git a/drivers/gpu/drm/amd/display/dc/dce112/Makefile b/drivers/gpu/drm/amd/display/dc/dce112/Makefile
index 7e92effec89447..683866797709ba 100644
--- a/drivers/gpu/drm/amd/display/dc/dce112/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dce112/Makefile
@@ -23,7 +23,7 @@
# Makefile for the 'controller' sub-component of DAL.
# It provides the control and status of HW CRTC block.
-CFLAGS_$(AMDDALPATH)/dc/dce112/dce112_resource.o = $(call cc-disable-warning, override-init)
+CFLAGS_$(AMDDALPATH)/dc/dce112/dce112_resource.o = -Wno-override-init
DCE112 = dce112_compressor.o
diff --git a/drivers/gpu/drm/amd/display/dc/dce120/Makefile b/drivers/gpu/drm/amd/display/dc/dce120/Makefile
index 1e3ef68a452a56..8f508e66274805 100644
--- a/drivers/gpu/drm/amd/display/dc/dce120/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dce120/Makefile
@@ -24,7 +24,7 @@
# It provides the control and status of HW CRTC block.
-CFLAGS_$(AMDDALPATH)/dc/dce120/dce120_resource.o = $(call cc-disable-warning, override-init)
+CFLAGS_$(AMDDALPATH)/dc/dce120/dce120_resource.o = -Wno-override-init
DCE120 = dce120_timing_generator.o
diff --git a/drivers/gpu/drm/amd/display/dc/dce60/Makefile b/drivers/gpu/drm/amd/display/dc/dce60/Makefile
index fee331accc0e7c..eede83ad91fa0d 100644
--- a/drivers/gpu/drm/amd/display/dc/dce60/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dce60/Makefile
@@ -23,7 +23,7 @@
# Makefile for the 'controller' sub-component of DAL.
# It provides the control and status of HW CRTC block.
-CFLAGS_$(AMDDALPATH)/dc/dce60/dce60_resource.o = $(call cc-disable-warning, override-init)
+CFLAGS_$(AMDDALPATH)/dc/dce60/dce60_resource.o = -Wno-override-init
DCE60 = dce60_timing_generator.o dce60_hw_sequencer.o \
dce60_resource.o
diff --git a/drivers/gpu/drm/amd/display/dc/dce80/Makefile b/drivers/gpu/drm/amd/display/dc/dce80/Makefile
index 7eefffbdc9253f..fba189d26652d6 100644
--- a/drivers/gpu/drm/amd/display/dc/dce80/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dce80/Makefile
@@ -23,7 +23,7 @@
# Makefile for the 'controller' sub-component of DAL.
# It provides the control and status of HW CRTC block.
-CFLAGS_$(AMDDALPATH)/dc/dce80/dce80_resource.o = $(call cc-disable-warning, override-init)
+CFLAGS_$(AMDDALPATH)/dc/dce80/dce80_resource.o = -Wno-override-init
DCE80 = dce80_timing_generator.o
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c
index bf3386cd444d62..5ebb5730313048 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c
@@ -44,6 +44,36 @@
#define NUM_ELEMENTS(a) (sizeof(a) / sizeof((a)[0]))
+void mpc3_mpc_init(struct mpc *mpc)
+{
+ struct dcn30_mpc *mpc30 = TO_DCN30_MPC(mpc);
+ int opp_id;
+
+ mpc1_mpc_init(mpc);
+
+ for (opp_id = 0; opp_id < MAX_OPP; opp_id++) {
+ if (REG(MUX[opp_id]))
+ /* disable mpc out rate and flow control */
+ REG_UPDATE_2(MUX[opp_id], MPC_OUT_RATE_CONTROL_DISABLE,
+ 1, MPC_OUT_FLOW_CONTROL_COUNT, 0);
+ }
+}
+
+void mpc3_mpc_init_single_inst(struct mpc *mpc, unsigned int mpcc_id)
+{
+ struct dcn30_mpc *mpc30 = TO_DCN30_MPC(mpc);
+
+ mpc1_mpc_init_single_inst(mpc, mpcc_id);
+
+ /* assuming mpc out mux is connected to opp with the same index at this
+ * point in time (e.g. transitioning from vbios to driver)
+ */
+ if (mpcc_id < MAX_OPP && REG(MUX[mpcc_id]))
+ /* disable mpc out rate and flow control */
+ REG_UPDATE_2(MUX[mpcc_id], MPC_OUT_RATE_CONTROL_DISABLE,
+ 1, MPC_OUT_FLOW_CONTROL_COUNT, 0);
+}
+
bool mpc3_is_dwb_idle(
struct mpc *mpc,
int dwb_id)
@@ -80,25 +110,6 @@ void mpc3_disable_dwb_mux(
MPC_DWB0_MUX, 0xf);
}
-void mpc3_set_out_rate_control(
- struct mpc *mpc,
- int opp_id,
- bool enable,
- bool rate_2x_mode,
- struct mpc_dwb_flow_control *flow_control)
-{
- struct dcn30_mpc *mpc30 = TO_DCN30_MPC(mpc);
-
- REG_UPDATE_2(MUX[opp_id],
- MPC_OUT_RATE_CONTROL_DISABLE, !enable,
- MPC_OUT_RATE_CONTROL, rate_2x_mode);
-
- if (flow_control)
- REG_UPDATE_2(MUX[opp_id],
- MPC_OUT_FLOW_CONTROL_MODE, flow_control->flow_ctrl_mode,
- MPC_OUT_FLOW_CONTROL_COUNT, flow_control->flow_ctrl_cnt1);
-}
-
enum dc_lut_mode mpc3_get_ogam_current(struct mpc *mpc, int mpcc_id)
{
/*Contrary to DCN2 and DCN1 wherein a single status register field holds this info;
@@ -1490,8 +1501,8 @@ static const struct mpc_funcs dcn30_mpc_funcs = {
.read_mpcc_state = mpc3_read_mpcc_state,
.insert_plane = mpc1_insert_plane,
.remove_mpcc = mpc1_remove_mpcc,
- .mpc_init = mpc1_mpc_init,
- .mpc_init_single_inst = mpc1_mpc_init_single_inst,
+ .mpc_init = mpc3_mpc_init,
+ .mpc_init_single_inst = mpc3_mpc_init_single_inst,
.update_blending = mpc2_update_blending,
.cursor_lock = mpc1_cursor_lock,
.get_mpcc_for_dpp = mpc1_get_mpcc_for_dpp,
@@ -1508,7 +1519,6 @@ static const struct mpc_funcs dcn30_mpc_funcs = {
.set_dwb_mux = mpc3_set_dwb_mux,
.disable_dwb_mux = mpc3_disable_dwb_mux,
.is_dwb_idle = mpc3_is_dwb_idle,
- .set_out_rate_control = mpc3_set_out_rate_control,
.set_gamut_remap = mpc3_set_gamut_remap,
.program_shaper = mpc3_program_shaper,
.acquire_rmu = mpcc3_acquire_rmu,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.h b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.h
index 9cb96ae95a2f75..ce93003dae0113 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.h
@@ -1007,6 +1007,13 @@ void dcn30_mpc_construct(struct dcn30_mpc *mpc30,
int num_mpcc,
int num_rmu);
+void mpc3_mpc_init(
+ struct mpc *mpc);
+
+void mpc3_mpc_init_single_inst(
+ struct mpc *mpc,
+ unsigned int mpcc_id);
+
bool mpc3_program_shaper(
struct mpc *mpc,
const struct pwl_params *params,
@@ -1078,13 +1085,6 @@ bool mpc3_is_dwb_idle(
struct mpc *mpc,
int dwb_id);
-void mpc3_set_out_rate_control(
- struct mpc *mpc,
- int opp_id,
- bool enable,
- bool rate_2x_mode,
- struct mpc_dwb_flow_control *flow_control);
-
void mpc3_power_on_ogam_lut(
struct mpc *mpc, int mpcc_id,
bool power_on);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_link_encoder.c
index e224a028d68acc..8a0460e8630977 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_link_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_link_encoder.c
@@ -248,14 +248,12 @@ void dcn32_link_encoder_construct(
enc10->base.hpd_source = init_data->hpd_source;
enc10->base.connector = init_data->connector;
- enc10->base.preferred_engine = ENGINE_ID_UNKNOWN;
-
- enc10->base.features = *enc_features;
if (enc10->base.connector.id == CONNECTOR_ID_USBC)
enc10->base.features.flags.bits.DP_IS_USB_C = 1;
- if (enc10->base.connector.id == CONNECTOR_ID_USBC)
- enc10->base.features.flags.bits.DP_IS_USB_C = 1;
+ enc10->base.preferred_engine = ENGINE_ID_UNKNOWN;
+
+ enc10->base.features = *enc_features;
enc10->base.transmitter = init_data->transmitter;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c
index e789e654c38705..e408e859b35561 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c
@@ -47,7 +47,7 @@ void mpc32_mpc_init(struct mpc *mpc)
struct dcn30_mpc *mpc30 = TO_DCN30_MPC(mpc);
int mpcc_id;
- mpc1_mpc_init(mpc);
+ mpc3_mpc_init(mpc);
if (mpc->ctx->dc->debug.enable_mem_low_power.bits.mpc) {
if (mpc30->mpc_mask->MPCC_MCM_SHAPER_MEM_LOW_PWR_MODE && mpc30->mpc_mask->MPCC_MCM_3DLUT_MEM_LOW_PWR_MODE) {
@@ -991,7 +991,7 @@ static const struct mpc_funcs dcn32_mpc_funcs = {
.insert_plane = mpc1_insert_plane,
.remove_mpcc = mpc1_remove_mpcc,
.mpc_init = mpc32_mpc_init,
- .mpc_init_single_inst = mpc1_mpc_init_single_inst,
+ .mpc_init_single_inst = mpc3_mpc_init_single_inst,
.update_blending = mpc2_update_blending,
.cursor_lock = mpc1_cursor_lock,
.get_mpcc_for_dpp = mpc1_get_mpcc_for_dpp,
@@ -1008,7 +1008,6 @@ static const struct mpc_funcs dcn32_mpc_funcs = {
.set_dwb_mux = mpc3_set_dwb_mux,
.disable_dwb_mux = mpc3_disable_dwb_mux,
.is_dwb_idle = mpc3_is_dwb_idle,
- .set_out_rate_control = mpc3_set_out_rate_control,
.set_gamut_remap = mpc3_set_gamut_remap,
.program_shaper = mpc32_program_shaper,
.program_3dlut = mpc32_program_3dlut,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_link_encoder.c
index 81e349d5835bbe..da94e5309fbaf0 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_link_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_link_encoder.c
@@ -184,6 +184,8 @@ void dcn35_link_encoder_construct(
enc10->base.hpd_source = init_data->hpd_source;
enc10->base.connector = init_data->connector;
+ if (enc10->base.connector.id == CONNECTOR_ID_USBC)
+ enc10->base.features.flags.bits.DP_IS_USB_C = 1;
enc10->base.preferred_engine = ENGINE_ID_UNKNOWN;
@@ -238,8 +240,6 @@ void dcn35_link_encoder_construct(
}
enc10->base.features.flags.bits.HDMI_6GB_EN = 1;
- if (enc10->base.connector.id == CONNECTOR_ID_USBC)
- enc10->base.features.flags.bits.DP_IS_USB_C = 1;
if (bp_funcs->get_connector_speed_cap_info)
result = bp_funcs->get_connector_speed_cap_info(enc10->base.ctx->dc_bios,
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c
index 80bebfc268db0f..21e0eef3269b10 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c
@@ -166,8 +166,8 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = {
.num_states = 5,
.sr_exit_time_us = 28.0,
.sr_enter_plus_exit_time_us = 30.0,
- .sr_exit_z8_time_us = 210.0,
- .sr_enter_plus_exit_z8_time_us = 320.0,
+ .sr_exit_z8_time_us = 250.0,
+ .sr_enter_plus_exit_z8_time_us = 350.0,
.fclk_change_latency_us = 24.0,
.usr_retraining_latency_us = 2,
.writeback_latency_us = 12.0,
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c
index dc9e1b758ed6a1..b3ffab77cf8894 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn351/dcn351_fpu.c
@@ -98,55 +98,114 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_51_soc = {
.clock_limits = {
{
.state = 0,
- .dispclk_mhz = 1200.0,
- .dppclk_mhz = 1200.0,
+ .dcfclk_mhz = 400.0,
+ .fabricclk_mhz = 400.0,
+ .socclk_mhz = 600.0,
+ .dram_speed_mts = 3200.0,
+ .dispclk_mhz = 600.0,
+ .dppclk_mhz = 600.0,
.phyclk_mhz = 600.0,
.phyclk_d18_mhz = 667.0,
- .dscclk_mhz = 186.0,
+ .dscclk_mhz = 200.0,
.dtbclk_mhz = 600.0,
},
{
.state = 1,
- .dispclk_mhz = 1200.0,
- .dppclk_mhz = 1200.0,
+ .dcfclk_mhz = 600.0,
+ .fabricclk_mhz = 1000.0,
+ .socclk_mhz = 733.0,
+ .dram_speed_mts = 6400.0,
+ .dispclk_mhz = 800.0,
+ .dppclk_mhz = 800.0,
.phyclk_mhz = 810.0,
.phyclk_d18_mhz = 667.0,
- .dscclk_mhz = 209.0,
+ .dscclk_mhz = 266.7,
.dtbclk_mhz = 600.0,
},
{
.state = 2,
- .dispclk_mhz = 1200.0,
- .dppclk_mhz = 1200.0,
+ .dcfclk_mhz = 738.0,
+ .fabricclk_mhz = 1200.0,
+ .socclk_mhz = 880.0,
+ .dram_speed_mts = 7500.0,
+ .dispclk_mhz = 800.0,
+ .dppclk_mhz = 800.0,
.phyclk_mhz = 810.0,
.phyclk_d18_mhz = 667.0,
- .dscclk_mhz = 209.0,
+ .dscclk_mhz = 266.7,
.dtbclk_mhz = 600.0,
},
{
.state = 3,
- .dispclk_mhz = 1200.0,
- .dppclk_mhz = 1200.0,
+ .dcfclk_mhz = 800.0,
+ .fabricclk_mhz = 1400.0,
+ .socclk_mhz = 978.0,
+ .dram_speed_mts = 7500.0,
+ .dispclk_mhz = 960.0,
+ .dppclk_mhz = 960.0,
.phyclk_mhz = 810.0,
.phyclk_d18_mhz = 667.0,
- .dscclk_mhz = 371.0,
+ .dscclk_mhz = 320.0,
.dtbclk_mhz = 600.0,
},
{
.state = 4,
+ .dcfclk_mhz = 873.0,
+ .fabricclk_mhz = 1600.0,
+ .socclk_mhz = 1100.0,
+ .dram_speed_mts = 8533.0,
+ .dispclk_mhz = 1066.7,
+ .dppclk_mhz = 1066.7,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 355.6,
+ .dtbclk_mhz = 600.0,
+ },
+ {
+ .state = 5,
+ .dcfclk_mhz = 960.0,
+ .fabricclk_mhz = 1700.0,
+ .socclk_mhz = 1257.0,
+ .dram_speed_mts = 8533.0,
.dispclk_mhz = 1200.0,
.dppclk_mhz = 1200.0,
.phyclk_mhz = 810.0,
.phyclk_d18_mhz = 667.0,
- .dscclk_mhz = 417.0,
+ .dscclk_mhz = 400.0,
+ .dtbclk_mhz = 600.0,
+ },
+ {
+ .state = 6,
+ .dcfclk_mhz = 1067.0,
+ .fabricclk_mhz = 1850.0,
+ .socclk_mhz = 1257.0,
+ .dram_speed_mts = 8533.0,
+ .dispclk_mhz = 1371.4,
+ .dppclk_mhz = 1371.4,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 457.1,
+ .dtbclk_mhz = 600.0,
+ },
+ {
+ .state = 7,
+ .dcfclk_mhz = 1200.0,
+ .fabricclk_mhz = 2000.0,
+ .socclk_mhz = 1467.0,
+ .dram_speed_mts = 8533.0,
+ .dispclk_mhz = 1600.0,
+ .dppclk_mhz = 1600.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 533.3,
.dtbclk_mhz = 600.0,
},
},
- .num_states = 5,
+ .num_states = 8,
.sr_exit_time_us = 28.0,
.sr_enter_plus_exit_time_us = 30.0,
- .sr_exit_z8_time_us = 210.0,
- .sr_enter_plus_exit_z8_time_us = 320.0,
+ .sr_exit_z8_time_us = 250.0,
+ .sr_enter_plus_exit_z8_time_us = 350.0,
.fclk_change_latency_us = 24.0,
.usr_retraining_latency_us = 2,
.writeback_latency_us = 12.0,
@@ -177,6 +236,9 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_51_soc = {
.do_urgent_latency_adjustment = 0,
.urgent_latency_adjustment_fabric_clock_component_us = 0,
.urgent_latency_adjustment_fabric_clock_reference_mhz = 0,
+ .num_chans = 4,
+ .dram_clock_change_latency_us = 11.72,
+ .dispclk_dppclk_vco_speed_mhz = 2400.0,
};
/*
@@ -340,6 +402,8 @@ void dcn351_update_bw_bounding_box_fpu(struct dc *dc,
clock_limits[i].socclk_mhz;
dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].memclk_mhz =
clk_table->entries[i].memclk_mhz * clk_table->entries[i].wck_ratio;
+ dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dtbclk_mhz =
+ clock_limits[i].dtbclk_mhz;
dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dcfclk_levels =
clk_table->num_entries;
dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_fclk_levels =
@@ -352,6 +416,8 @@ void dcn351_update_bw_bounding_box_fpu(struct dc *dc,
clk_table->num_entries;
dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_memclk_levels =
clk_table->num_entries;
+ dc->dml2_options.bbox_overrides.clks_table.num_entries_per_clk.num_dtbclk_levels =
+ clk_table->num_entries;
}
}
@@ -551,6 +617,7 @@ void dcn351_decide_zstate_support(struct dc *dc, struct dc_state *context)
if (context->res_ctx.pipe_ctx[i].plane_state)
plane_count++;
}
+
/*dcn351 does not support z9/z10*/
if (context->stream_count == 0 || plane_count == 0) {
support = DCN_ZSTATE_SUPPORT_ALLOW_Z8_ONLY;
@@ -564,11 +631,9 @@ void dcn351_decide_zstate_support(struct dc *dc, struct dc_state *context)
dc->debug.minimum_z8_residency_time > 0 ? dc->debug.minimum_z8_residency_time : 1000;
bool allow_z8 = context->bw_ctx.dml.vba.StutterPeriod > (double)minmum_z8_residency;
-
/*for psr1/psr-su, we allow z8 and z10 based on latency, for replay with IPS enabled, it will enter ips2*/
- if (is_pwrseq0 && (is_psr || is_replay))
+ if (is_pwrseq0 && (is_psr || is_replay))
support = allow_z8 ? allow_z8 : DCN_ZSTATE_SUPPORT_DISALLOW;
-
}
context->bw_ctx.bw.dcn.clk.zstate_support = support;
}
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c
index 17a58f41fc6a85..a20f28a5d2e7b0 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c
@@ -228,17 +228,13 @@ void dml2_init_socbb_params(struct dml2_context *dml2, const struct dc *in_dc, s
break;
case dml_project_dcn35:
+ case dml_project_dcn351:
out->num_chans = 4;
out->round_trip_ping_latency_dcfclk_cycles = 106;
out->smn_latency_us = 2;
out->dispclk_dppclk_vco_speed_mhz = 3600;
break;
- case dml_project_dcn351:
- out->num_chans = 16;
- out->round_trip_ping_latency_dcfclk_cycles = 1100;
- out->smn_latency_us = 2;
- break;
}
/* ---Overrides if available--- */
if (dml2->config.bbox_overrides.dram_num_chan)
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c
index 9d5df4c0da5979..0ba1feaf96c0d7 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c
@@ -1185,7 +1185,8 @@ void dce110_disable_stream(struct pipe_ctx *pipe_ctx)
if (dccg) {
dccg->funcs->disable_symclk32_se(dccg, dp_hpo_inst);
dccg->funcs->set_dpstreamclk(dccg, REFCLK, tg->inst, dp_hpo_inst);
- dccg->funcs->set_dtbclk_dto(dccg, &dto_params);
+ if (dccg && dccg->funcs->set_dtbclk_dto)
+ dccg->funcs->set_dtbclk_dto(dccg, &dto_params);
}
} else if (dccg && dccg->funcs->disable_symclk_se) {
dccg->funcs->disable_symclk_se(dccg, stream_enc->stream_enc_inst,
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c
index 3a9cc8ac0c0793..093f4387553ce3 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c
@@ -69,29 +69,6 @@
#define FN(reg_name, field_name) \
hws->shifts->field_name, hws->masks->field_name
-static int calc_mpc_flow_ctrl_cnt(const struct dc_stream_state *stream,
- int opp_cnt)
-{
- bool hblank_halved = optc2_is_two_pixels_per_containter(&stream->timing);
- int flow_ctrl_cnt;
-
- if (opp_cnt >= 2)
- hblank_halved = true;
-
- flow_ctrl_cnt = stream->timing.h_total - stream->timing.h_addressable -
- stream->timing.h_border_left -
- stream->timing.h_border_right;
-
- if (hblank_halved)
- flow_ctrl_cnt /= 2;
-
- /* ODM combine 4:1 case */
- if (opp_cnt == 4)
- flow_ctrl_cnt /= 2;
-
- return flow_ctrl_cnt;
-}
-
static void update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable)
{
struct display_stream_compressor *dsc = pipe_ctx->stream_res.dsc;
@@ -183,10 +160,6 @@ void dcn314_update_odm(struct dc *dc, struct dc_state *context, struct pipe_ctx
struct pipe_ctx *odm_pipe;
int opp_cnt = 0;
int opp_inst[MAX_PIPES] = {0};
- bool rate_control_2x_pclk = (pipe_ctx->stream->timing.flags.INTERLACE || optc2_is_two_pixels_per_containter(&pipe_ctx->stream->timing));
- struct mpc_dwb_flow_control flow_control;
- struct mpc *mpc = dc->res_pool->mpc;
- int i;
opp_cnt = get_odm_config(pipe_ctx, opp_inst);
@@ -199,20 +172,6 @@ void dcn314_update_odm(struct dc *dc, struct dc_state *context, struct pipe_ctx
pipe_ctx->stream_res.tg->funcs->set_odm_bypass(
pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing);
- rate_control_2x_pclk = rate_control_2x_pclk || opp_cnt > 1;
- flow_control.flow_ctrl_mode = 0;
- flow_control.flow_ctrl_cnt0 = 0x80;
- flow_control.flow_ctrl_cnt1 = calc_mpc_flow_ctrl_cnt(pipe_ctx->stream, opp_cnt);
- if (mpc->funcs->set_out_rate_control) {
- for (i = 0; i < opp_cnt; ++i) {
- mpc->funcs->set_out_rate_control(
- mpc, opp_inst[i],
- true,
- rate_control_2x_pclk,
- &flow_control);
- }
- }
-
for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) {
odm_pipe->stream_res.opp->funcs->opp_pipe_clock_control(
odm_pipe->stream_res.opp,
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c
index c0b526cf178654..7668229438da22 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c
@@ -966,29 +966,6 @@ void dcn32_init_hw(struct dc *dc)
}
}
-static int calc_mpc_flow_ctrl_cnt(const struct dc_stream_state *stream,
- int opp_cnt)
-{
- bool hblank_halved = optc2_is_two_pixels_per_containter(&stream->timing);
- int flow_ctrl_cnt;
-
- if (opp_cnt >= 2)
- hblank_halved = true;
-
- flow_ctrl_cnt = stream->timing.h_total - stream->timing.h_addressable -
- stream->timing.h_border_left -
- stream->timing.h_border_right;
-
- if (hblank_halved)
- flow_ctrl_cnt /= 2;
-
- /* ODM combine 4:1 case */
- if (opp_cnt == 4)
- flow_ctrl_cnt /= 2;
-
- return flow_ctrl_cnt;
-}
-
static void update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable)
{
struct display_stream_compressor *dsc = pipe_ctx->stream_res.dsc;
@@ -1103,10 +1080,6 @@ void dcn32_update_odm(struct dc *dc, struct dc_state *context, struct pipe_ctx *
struct pipe_ctx *odm_pipe;
int opp_cnt = 0;
int opp_inst[MAX_PIPES] = {0};
- bool rate_control_2x_pclk = (pipe_ctx->stream->timing.flags.INTERLACE || optc2_is_two_pixels_per_containter(&pipe_ctx->stream->timing));
- struct mpc_dwb_flow_control flow_control;
- struct mpc *mpc = dc->res_pool->mpc;
- int i;
opp_cnt = get_odm_config(pipe_ctx, opp_inst);
@@ -1119,20 +1092,6 @@ void dcn32_update_odm(struct dc *dc, struct dc_state *context, struct pipe_ctx *
pipe_ctx->stream_res.tg->funcs->set_odm_bypass(
pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing);
- rate_control_2x_pclk = rate_control_2x_pclk || opp_cnt > 1;
- flow_control.flow_ctrl_mode = 0;
- flow_control.flow_ctrl_cnt0 = 0x80;
- flow_control.flow_ctrl_cnt1 = calc_mpc_flow_ctrl_cnt(pipe_ctx->stream, opp_cnt);
- if (mpc->funcs->set_out_rate_control) {
- for (i = 0; i < opp_cnt; ++i) {
- mpc->funcs->set_out_rate_control(
- mpc, opp_inst[i],
- true,
- rate_control_2x_pclk,
- &flow_control);
- }
- }
-
for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) {
odm_pipe->stream_res.opp->funcs->opp_pipe_clock_control(
odm_pipe->stream_res.opp,
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
index 4b92df23ff0db9..a5560b3fc39ba9 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
@@ -358,29 +358,6 @@ void dcn35_init_hw(struct dc *dc)
}
}
-static int calc_mpc_flow_ctrl_cnt(const struct dc_stream_state *stream,
- int opp_cnt)
-{
- bool hblank_halved = optc2_is_two_pixels_per_containter(&stream->timing);
- int flow_ctrl_cnt;
-
- if (opp_cnt >= 2)
- hblank_halved = true;
-
- flow_ctrl_cnt = stream->timing.h_total - stream->timing.h_addressable -
- stream->timing.h_border_left -
- stream->timing.h_border_right;
-
- if (hblank_halved)
- flow_ctrl_cnt /= 2;
-
- /* ODM combine 4:1 case */
- if (opp_cnt == 4)
- flow_ctrl_cnt /= 2;
-
- return flow_ctrl_cnt;
-}
-
static void update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable)
{
struct display_stream_compressor *dsc = pipe_ctx->stream_res.dsc;
@@ -474,10 +451,6 @@ void dcn35_update_odm(struct dc *dc, struct dc_state *context, struct pipe_ctx *
struct pipe_ctx *odm_pipe;
int opp_cnt = 0;
int opp_inst[MAX_PIPES] = {0};
- bool rate_control_2x_pclk = (pipe_ctx->stream->timing.flags.INTERLACE || optc2_is_two_pixels_per_containter(&pipe_ctx->stream->timing));
- struct mpc_dwb_flow_control flow_control;
- struct mpc *mpc = dc->res_pool->mpc;
- int i;
opp_cnt = get_odm_config(pipe_ctx, opp_inst);
@@ -490,20 +463,6 @@ void dcn35_update_odm(struct dc *dc, struct dc_state *context, struct pipe_ctx *
pipe_ctx->stream_res.tg->funcs->set_odm_bypass(
pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing);
- rate_control_2x_pclk = rate_control_2x_pclk || opp_cnt > 1;
- flow_control.flow_ctrl_mode = 0;
- flow_control.flow_ctrl_cnt0 = 0x80;
- flow_control.flow_ctrl_cnt1 = calc_mpc_flow_ctrl_cnt(pipe_ctx->stream, opp_cnt);
- if (mpc->funcs->set_out_rate_control) {
- for (i = 0; i < opp_cnt; ++i) {
- mpc->funcs->set_out_rate_control(
- mpc, opp_inst[i],
- true,
- rate_control_2x_pclk,
- &flow_control);
- }
- }
-
for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) {
odm_pipe->stream_res.opp->funcs->opp_pipe_clock_control(
odm_pipe->stream_res.opp,
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c
index ab17fa1c64e8c5..670255c9bc8228 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c
@@ -67,7 +67,7 @@ static const struct hw_sequencer_funcs dcn351_funcs = {
.prepare_bandwidth = dcn35_prepare_bandwidth,
.optimize_bandwidth = dcn35_optimize_bandwidth,
.update_bandwidth = dcn20_update_bandwidth,
- .set_drr = dcn10_set_drr,
+ .set_drr = dcn35_set_drr,
.get_position = dcn10_get_position,
.set_static_screen_control = dcn35_set_static_screen_control,
.setup_stereo = dcn10_setup_stereo,
diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c
index f07a4c7e48bc23..52eab8fccb7f16 100644
--- a/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c
@@ -267,9 +267,6 @@ static void optc32_setup_manual_trigger(struct timing_generator *optc)
OTG_V_TOTAL_MAX_SEL, 1,
OTG_FORCE_LOCK_ON_EVENT, 0,
OTG_SET_V_TOTAL_MIN_MASK, (1 << 1)); /* TRIGA */
-
- // Setup manual flow control for EOF via TRIG_A
- optc->funcs->setup_manual_trigger(optc);
}
}
diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c
index 5b486400dfdb5b..909e14261f9b49 100644
--- a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c
@@ -700,6 +700,8 @@ static const struct dc_debug_options debug_defaults_drv = {
.disable_dcc = DCC_ENABLE,
.disable_dpp_power_gate = true,
.disable_hubp_power_gate = true,
+ .disable_optc_power_gate = true, /*should the same as above two*/
+ .disable_hpo_power_gate = true, /*dmubfw force domain25 on*/
.disable_clock_gate = false,
.disable_dsc_power_gate = true,
.vsr_support = true,
@@ -742,12 +744,13 @@ static const struct dc_debug_options debug_defaults_drv = {
},
.seamless_boot_odm_combine = DML_FAIL_SOURCE_PIXEL_FORMAT,
.enable_z9_disable_interface = true, /* Allow support for the PMFW interface for disable Z9*/
+ .minimum_z8_residency_time = 2100,
.using_dml2 = true,
.support_eDP1_5 = true,
.enable_hpo_pg_support = false,
.enable_legacy_fast_update = true,
.enable_single_display_2to1_odm_policy = true,
- .disable_idle_power_optimizations = true,
+ .disable_idle_power_optimizations = false,
.dmcub_emulation = false,
.disable_boot_optimizations = false,
.disable_unbounded_requesting = false,
@@ -758,8 +761,10 @@ static const struct dc_debug_options debug_defaults_drv = {
.disable_z10 = true,
.ignore_pg = true,
.psp_disabled_wa = true,
- .ips2_eval_delay_us = 200,
- .ips2_entry_delay_us = 400
+ .ips2_eval_delay_us = 2000,
+ .ips2_entry_delay_us = 800,
+ .disable_dmub_reallow_idle = true,
+ .static_screen_wait_frames = 2,
};
static const struct dc_panel_config panel_config_defaults = {
diff --git a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c
index 738ee763f24a51..84f9b412a4f117 100644
--- a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c
+++ b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c
@@ -147,15 +147,12 @@ void mod_build_vsc_infopacket(const struct dc_stream_state *stream,
}
/* VSC packet set to 4 for PSR-SU, or 2 for PSR1 */
- if (stream->link->psr_settings.psr_feature_enabled) {
- if (stream->link->psr_settings.psr_version == DC_PSR_VERSION_SU_1)
- vsc_packet_revision = vsc_packet_rev4;
- else if (stream->link->psr_settings.psr_version == DC_PSR_VERSION_1)
- vsc_packet_revision = vsc_packet_rev2;
- }
-
- if (stream->link->replay_settings.config.replay_supported)
+ if (stream->link->psr_settings.psr_version == DC_PSR_VERSION_SU_1)
+ vsc_packet_revision = vsc_packet_rev4;
+ else if (stream->link->replay_settings.config.replay_supported)
vsc_packet_revision = vsc_packet_rev4;
+ else if (stream->link->psr_settings.psr_version == DC_PSR_VERSION_1)
+ vsc_packet_revision = vsc_packet_rev2;
/* Update to revision 5 for extended colorimetry support */
if (stream->use_vsc_sdp_for_colorimetry)
diff --git a/drivers/gpu/drm/amd/include/umsch_mm_4_0_api_def.h b/drivers/gpu/drm/amd/include/umsch_mm_4_0_api_def.h
index beadb9e42850c7..ca83e9e5c3ffb8 100644
--- a/drivers/gpu/drm/amd/include/umsch_mm_4_0_api_def.h
+++ b/drivers/gpu/drm/amd/include/umsch_mm_4_0_api_def.h
@@ -234,7 +234,8 @@ union UMSCHAPI__SET_HW_RESOURCES {
uint32_t enable_level_process_quantum_check : 1;
uint32_t is_vcn0_enabled : 1;
uint32_t is_vcn1_enabled : 1;
- uint32_t reserved : 27;
+ uint32_t use_rs64mem_for_proc_ctx_csa : 1;
+ uint32_t reserved : 26;
};
uint32_t uint32_all;
};
@@ -297,9 +298,12 @@ union UMSCHAPI__ADD_QUEUE {
struct {
uint32_t is_context_suspended : 1;
- uint32_t reserved : 31;
+ uint32_t collaboration_mode : 1;
+ uint32_t reserved : 30;
};
struct UMSCH_API_STATUS api_status;
+ uint32_t process_csa_array_index;
+ uint32_t context_csa_array_index;
};
uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
@@ -314,6 +318,7 @@ union UMSCHAPI__REMOVE_QUEUE {
uint64_t context_csa_addr;
struct UMSCH_API_STATUS api_status;
+ uint32_t context_csa_array_index;
};
uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
@@ -337,6 +342,7 @@ union UMSCHAPI__SUSPEND {
uint32_t suspend_fence_value;
struct UMSCH_API_STATUS api_status;
+ uint32_t context_csa_array_index;
};
uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
@@ -356,6 +362,7 @@ union UMSCHAPI__RESUME {
enum UMSCH_ENGINE_TYPE engine_type;
struct UMSCH_API_STATUS api_status;
+ uint32_t context_csa_array_index;
};
uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
@@ -404,6 +411,7 @@ union UMSCHAPI__UPDATE_AFFINITY {
union UMSCH_AFFINITY affinity;
uint64_t context_csa_addr;
struct UMSCH_API_STATUS api_status;
+ uint32_t context_csa_array_index;
};
uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
@@ -417,6 +425,7 @@ union UMSCHAPI__CHANGE_CONTEXT_PRIORITY_LEVEL {
uint64_t context_quantum;
uint64_t context_csa_addr;
struct UMSCH_API_STATUS api_status;
+ uint32_t context_csa_array_index;
};
uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index 246b211b1e85f7..65333141b1c1b0 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -735,7 +735,7 @@ static int smu_early_init(void *handle)
smu->adev = adev;
smu->pm_enabled = !!amdgpu_dpm;
smu->is_apu = false;
- smu->smu_baco.state = SMU_BACO_STATE_EXIT;
+ smu->smu_baco.state = SMU_BACO_STATE_NONE;
smu->smu_baco.platform_support = false;
smu->user_dpm_profile.fan_mode = -1;
@@ -1966,10 +1966,25 @@ static int smu_smc_hw_cleanup(struct smu_context *smu)
return 0;
}
+static int smu_reset_mp1_state(struct smu_context *smu)
+{
+ struct amdgpu_device *adev = smu->adev;
+ int ret = 0;
+
+ if ((!adev->in_runpm) && (!adev->in_suspend) &&
+ (!amdgpu_in_reset(adev)) && amdgpu_ip_version(adev, MP1_HWIP, 0) ==
+ IP_VERSION(13, 0, 10) &&
+ !amdgpu_device_has_display_hardware(adev))
+ ret = smu_set_mp1_state(smu, PP_MP1_STATE_UNLOAD);
+
+ return ret;
+}
+
static int smu_hw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
struct smu_context *smu = adev->powerplay.pp_handle;
+ int ret;
if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
return 0;
@@ -1987,7 +2002,15 @@ static int smu_hw_fini(void *handle)
adev->pm.dpm_enabled = false;
- return smu_smc_hw_cleanup(smu);
+ ret = smu_smc_hw_cleanup(smu);
+ if (ret)
+ return ret;
+
+ ret = smu_reset_mp1_state(smu);
+ if (ret)
+ return ret;
+
+ return 0;
}
static void smu_late_fini(void *handle)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
index a870bdd49a4e3c..1fa81575788c54 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
@@ -424,6 +424,7 @@ enum smu_reset_mode {
enum smu_baco_state {
SMU_BACO_STATE_ENTER = 0,
SMU_BACO_STATE_EXIT,
+ SMU_BACO_STATE_NONE,
};
struct smu_baco_context {
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h
index 5bb7a63c0602b7..97522c0852589d 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h
@@ -144,6 +144,37 @@ typedef struct {
uint32_t MaxGfxClk;
} DpmClocks_t;
+//Freq in MHz
+//Voltage in milli volts with 2 fractional bits
+typedef struct {
+ uint32_t DcfClocks[NUM_DCFCLK_DPM_LEVELS];
+ uint32_t DispClocks[NUM_DISPCLK_DPM_LEVELS];
+ uint32_t DppClocks[NUM_DPPCLK_DPM_LEVELS];
+ uint32_t SocClocks[NUM_SOCCLK_DPM_LEVELS];
+ uint32_t VClocks0[NUM_VCN_DPM_LEVELS];
+ uint32_t VClocks1[NUM_VCN_DPM_LEVELS];
+ uint32_t DClocks0[NUM_VCN_DPM_LEVELS];
+ uint32_t DClocks1[NUM_VCN_DPM_LEVELS];
+ uint32_t VPEClocks[NUM_VPE_DPM_LEVELS];
+ uint32_t FclkClocks_Freq[NUM_FCLK_DPM_LEVELS];
+ uint32_t FclkClocks_Voltage[NUM_FCLK_DPM_LEVELS];
+ uint32_t SocVoltage[NUM_SOC_VOLTAGE_LEVELS];
+ MemPstateTable_t MemPstateTable[NUM_MEM_PSTATE_LEVELS];
+
+ uint8_t NumDcfClkLevelsEnabled;
+ uint8_t NumDispClkLevelsEnabled; //Applies to both Dispclk and Dppclk
+ uint8_t NumSocClkLevelsEnabled;
+ uint8_t Vcn0ClkLevelsEnabled; //Applies to both Vclk0 and Dclk0
+ uint8_t Vcn1ClkLevelsEnabled; //Applies to both Vclk1 and Dclk1
+ uint8_t VpeClkLevelsEnabled;
+ uint8_t NumMemPstatesEnabled;
+ uint8_t NumFclkLevelsEnabled;
+ uint8_t spare;
+
+ uint32_t MinGfxClk;
+ uint32_t MaxGfxClk;
+} DpmClocks_t_v14_0_1;
+
typedef struct {
uint16_t CoreFrequency[16]; //Target core frequency [MHz]
uint16_t CorePower[16]; //CAC calculated core power [mW]
@@ -224,7 +255,7 @@ typedef enum {
#define TABLE_CUSTOM_DPM 2 // Called by Driver
#define TABLE_BIOS_GPIO_CONFIG 3 // Called by BIOS
#define TABLE_DPMCLOCKS 4 // Called by Driver and VBIOS
-#define TABLE_SPARE0 5 // Unused
+#define TABLE_MOMENTARY_PM 5 // Called by Tools
#define TABLE_MODERN_STDBY 6 // Called by Tools for Modern Standby Log
#define TABLE_SMU_METRICS 7 // Called by Driver and SMF/PMF
#define TABLE_COUNT 8
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_pmfw.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_pmfw.h
index 356e0f57a426ff..ddb62586008319 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_pmfw.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_pmfw.h
@@ -42,7 +42,7 @@
#define FEATURE_EDC_BIT 7
#define FEATURE_PLL_POWER_DOWN_BIT 8
#define FEATURE_VDDOFF_BIT 9
-#define FEATURE_VCN_DPM_BIT 10
+#define FEATURE_VCN_DPM_BIT 10 /* this is for both VCN0 and VCN1 */
#define FEATURE_DS_MPM_BIT 11
#define FEATURE_FCLK_DPM_BIT 12
#define FEATURE_SOCCLK_DPM_BIT 13
@@ -56,9 +56,9 @@
#define FEATURE_DS_GFXCLK_BIT 21
#define FEATURE_DS_SOCCLK_BIT 22
#define FEATURE_DS_LCLK_BIT 23
-#define FEATURE_LOW_POWER_DCNCLKS_BIT 24 // for all DISP clks
+#define FEATURE_LOW_POWER_DCNCLKS_BIT 24
#define FEATURE_DS_SHUBCLK_BIT 25
-#define FEATURE_SPARE0_BIT 26 //SPARE
+#define FEATURE_RESERVED0_BIT 26
#define FEATURE_ZSTATES_BIT 27
#define FEATURE_IOMMUL2_PG_BIT 28
#define FEATURE_DS_FCLK_BIT 29
@@ -66,8 +66,8 @@
#define FEATURE_DS_MP1CLK_BIT 31
#define FEATURE_WHISPER_MODE_BIT 32
#define FEATURE_SMU_LOW_POWER_BIT 33
-#define FEATURE_SMART_L3_RINSER_BIT 34
-#define FEATURE_SPARE1_BIT 35 //SPARE
+#define FEATURE_RESERVED1_BIT 34 /* v14_0_0 SMART_L3_RINSER; v14_0_1 RESERVED1 */
+#define FEATURE_GFX_DEM_BIT 35 /* v14_0_0 SPARE; v14_0_1 GFX_DEM */
#define FEATURE_PSI_BIT 36
#define FEATURE_PROCHOT_BIT 37
#define FEATURE_CPUOFF_BIT 38
@@ -77,11 +77,11 @@
#define FEATURE_PERF_LIMIT_BIT 42
#define FEATURE_CORE_DLDO_BIT 43
#define FEATURE_DVO_BIT 44
-#define FEATURE_DS_VCN_BIT 45
+#define FEATURE_DS_VCN_BIT 45 /* v14_0_1 this is for both VCN0 and VCN1 */
#define FEATURE_CPPC_BIT 46
#define FEATURE_CPPC_PREFERRED_CORES 47
#define FEATURE_DF_CSTATES_BIT 48
-#define FEATURE_SPARE2_BIT 49 //SPARE
+#define FEATURE_FAST_PSTATE_CLDO_BIT 49 /* v14_0_0 SPARE */
#define FEATURE_ATHUB_PG_BIT 50
#define FEATURE_VDDOFF_ECO_BIT 51
#define FEATURE_ZSTATES_ECO_BIT 52
@@ -93,8 +93,8 @@
#define FEATURE_DS_IPUCLK_BIT 58
#define FEATURE_DS_VPECLK_BIT 59
#define FEATURE_VPE_DPM_BIT 60
-#define FEATURE_SPARE_61 61
-#define FEATURE_FP_DIDT 62
+#define FEATURE_SMART_L3_RINSER_BIT 61 /* v14_0_0 SPARE*/
+#define FEATURE_PCC_BIT 62 /* v14_0_0 FP_DIDT v14_0_1 PCC_BIT */
#define NUM_FEATURES 63
// Firmware Header/Footer
@@ -151,6 +151,43 @@ typedef struct {
// MP1_EXT_SCRATCH7 = RTOS Current Job
} FwStatus_t;
+typedef struct {
+ // MP1_EXT_SCRATCH0
+ uint32_t DpmHandlerID : 8;
+ uint32_t ActivityMonitorID : 8;
+ uint32_t DpmTimerID : 8;
+ uint32_t DpmHubID : 4;
+ uint32_t DpmHubTask : 4;
+ // MP1_EXT_SCRATCH1
+ uint32_t CclkSyncStatus : 8;
+ uint32_t ZstateStatus : 4;
+ uint32_t Cpu1VddOff : 4;
+ uint32_t DstateFun : 4;
+ uint32_t DstateDev : 4;
+ uint32_t GfxOffStatus : 2;
+ uint32_t Cpu0Off : 2;
+ uint32_t Cpu1Off : 2;
+ uint32_t Cpu0VddOff : 2;
+ // MP1_EXT_SCRATCH2
+ uint32_t P2JobHandler :32;
+ // MP1_EXT_SCRATCH3
+ uint32_t PostCode :32;
+ // MP1_EXT_SCRATCH4
+ uint32_t MsgPortBusy :15;
+ uint32_t RsmuPmiP1Pending : 1;
+ uint32_t RsmuPmiP2PendingCnt : 8;
+ uint32_t DfCstateExitPending : 1;
+ uint32_t Pc6EntryPending : 1;
+ uint32_t Pc6ExitPending : 1;
+ uint32_t WarmResetPending : 1;
+ uint32_t Mp0ClkPending : 1;
+ uint32_t InWhisperMode : 1;
+ uint32_t spare2 : 2;
+ // MP1_EXT_SCRATCH5
+ uint32_t IdleMask :32;
+ // MP1_EXT_SCRATCH6 = RTOS threads' status
+ // MP1_EXT_SCRATCH7 = RTOS Current Job
+} FwStatus_t_v14_0_1;
#pragma pack(pop)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_ppsmc.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_ppsmc.h
index 8a8a57c56bc0c4..c4dc5881d8df09 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_ppsmc.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_ppsmc.h
@@ -54,14 +54,14 @@
#define PPSMC_MSG_TestMessage 0x01 ///< To check if PMFW is alive and responding. Requirement specified by PMFW team
#define PPSMC_MSG_GetPmfwVersion 0x02 ///< Get PMFW version
#define PPSMC_MSG_GetDriverIfVersion 0x03 ///< Get PMFW_DRIVER_IF version
-#define PPSMC_MSG_SPARE0 0x04 ///< SPARE
-#define PPSMC_MSG_SPARE1 0x05 ///< SPARE
-#define PPSMC_MSG_PowerDownVcn 0x06 ///< Power down VCN
-#define PPSMC_MSG_PowerUpVcn 0x07 ///< Power up VCN; VCN is power gated by default
-#define PPSMC_MSG_SetHardMinVcn 0x08 ///< For wireless display
+#define PPSMC_MSG_PowerDownVcn1 0x04 ///< Power down VCN1
+#define PPSMC_MSG_PowerUpVcn1 0x05 ///< Power up VCN1; VCN1 is power gated by default
+#define PPSMC_MSG_PowerDownVcn0 0x06 ///< Power down VCN0
+#define PPSMC_MSG_PowerUpVcn0 0x07 ///< Power up VCN0; VCN0 is power gated by default
+#define PPSMC_MSG_SetHardMinVcn0 0x08 ///< For wireless display
#define PPSMC_MSG_SetSoftMinGfxclk 0x09 ///< Set SoftMin for GFXCLK, argument is frequency in MHz
-#define PPSMC_MSG_SPARE2 0x0A ///< SPARE
-#define PPSMC_MSG_SPARE3 0x0B ///< SPARE
+#define PPSMC_MSG_SetHardMinVcn1 0x0A ///< For wireless display
+#define PPSMC_MSG_SetSoftMinVcn1 0x0B ///< Set soft min for VCN1 clocks (VCLK1 and DCLK1)
#define PPSMC_MSG_PrepareMp1ForUnload 0x0C ///< Prepare PMFW for GFX driver unload
#define PPSMC_MSG_SetDriverDramAddrHigh 0x0D ///< Set high 32 bits of DRAM address for Driver table transfer
#define PPSMC_MSG_SetDriverDramAddrLow 0x0E ///< Set low 32 bits of DRAM address for Driver table transfer
@@ -71,36 +71,32 @@
#define PPSMC_MSG_GetEnabledSmuFeatures 0x12 ///< Get enabled features in PMFW
#define PPSMC_MSG_SetHardMinSocclkByFreq 0x13 ///< Set hard min for SOC CLK
#define PPSMC_MSG_SetSoftMinFclk 0x14 ///< Set hard min for FCLK
-#define PPSMC_MSG_SetSoftMinVcn 0x15 ///< Set soft min for VCN clocks (VCLK and DCLK)
-
+#define PPSMC_MSG_SetSoftMinVcn0 0x15 ///< Set soft min for VCN0 clocks (VCLK0 and DCLK0)
#define PPSMC_MSG_EnableGfxImu 0x16 ///< Enable GFX IMU
-
-#define PPSMC_MSG_spare_0x17 0x17
-#define PPSMC_MSG_spare_0x18 0x18
+#define PPSMC_MSG_spare_0x17 0x17 ///< Get GFX clock frequency
+#define PPSMC_MSG_spare_0x18 0x18 ///< Get FCLK frequency
#define PPSMC_MSG_AllowGfxOff 0x19 ///< Inform PMFW of allowing GFXOFF entry
#define PPSMC_MSG_DisallowGfxOff 0x1A ///< Inform PMFW of disallowing GFXOFF entry
#define PPSMC_MSG_SetSoftMaxGfxClk 0x1B ///< Set soft max for GFX CLK
#define PPSMC_MSG_SetHardMinGfxClk 0x1C ///< Set hard min for GFX CLK
-
#define PPSMC_MSG_SetSoftMaxSocclkByFreq 0x1D ///< Set soft max for SOC CLK
#define PPSMC_MSG_SetSoftMaxFclkByFreq 0x1E ///< Set soft max for FCLK
-#define PPSMC_MSG_SetSoftMaxVcn 0x1F ///< Set soft max for VCN clocks (VCLK and DCLK)
-#define PPSMC_MSG_spare_0x20 0x20
-#define PPSMC_MSG_PowerDownJpeg 0x21 ///< Power down Jpeg
-#define PPSMC_MSG_PowerUpJpeg 0x22 ///< Power up Jpeg; VCN is power gated by default
-
+#define PPSMC_MSG_SetSoftMaxVcn0 0x1F ///< Set soft max for VCN0 clocks (VCLK0 and DCLK0)
+#define PPSMC_MSG_spare_0x20 0x20 ///< Set power limit percentage
+#define PPSMC_MSG_PowerDownJpeg0 0x21 ///< Power down Jpeg of VCN0
+#define PPSMC_MSG_PowerUpJpeg0 0x22 ///< Power up Jpeg of VCN0; VCN0 is power gated by default
#define PPSMC_MSG_SetHardMinFclkByFreq 0x23 ///< Set hard min for FCLK
#define PPSMC_MSG_SetSoftMinSocclkByFreq 0x24 ///< Set soft min for SOC CLK
#define PPSMC_MSG_AllowZstates 0x25 ///< Inform PMFM of allowing Zstate entry, i.e. no Miracast activity
-#define PPSMC_MSG_Reserved 0x26 ///< Not used
-#define PPSMC_MSG_Reserved1 0x27 ///< Not used, previously PPSMC_MSG_RequestActiveWgp
-#define PPSMC_MSG_Reserved2 0x28 ///< Not used, previously PPSMC_MSG_QueryActiveWgp
+#define PPSMC_MSG_PowerDownJpeg1 0x26 ///< Power down Jpeg of VCN1
+#define PPSMC_MSG_PowerUpJpeg1 0x27 ///< Power up Jpeg of VCN1; VCN1 is power gated by default
+#define PPSMC_MSG_SetSoftMaxVcn1 0x28 ///< Set soft max for VCN1 clocks (VCLK1 and DCLK1)
#define PPSMC_MSG_PowerDownIspByTile 0x29 ///< ISP is power gated by default
#define PPSMC_MSG_PowerUpIspByTile 0x2A ///< This message is used to power up ISP tiles and enable the ISP DPM
#define PPSMC_MSG_SetHardMinIspiclkByFreq 0x2B ///< Set HardMin by frequency for ISPICLK
#define PPSMC_MSG_SetHardMinIspxclkByFreq 0x2C ///< Set HardMin by frequency for ISPXCLK
-#define PPSMC_MSG_PowerDownUmsch 0x2D ///< Power down VCN.UMSCH (aka VSCH) scheduler
-#define PPSMC_MSG_PowerUpUmsch 0x2E ///< Power up VCN.UMSCH (aka VSCH) scheduler
+#define PPSMC_MSG_PowerDownUmsch 0x2D ///< Power down VCN0.UMSCH (aka VSCH) scheduler
+#define PPSMC_MSG_PowerUpUmsch 0x2E ///< Power up VCN0.UMSCH (aka VSCH) scheduler
#define PPSMC_Message_IspStutterOn_MmhubPgDis 0x2F ///< ISP StutterOn mmHub PgDis
#define PPSMC_Message_IspStutterOff_MmhubPgEn 0x30 ///< ISP StufferOff mmHub PgEn
#define PPSMC_MSG_PowerUpVpe 0x31 ///< Power up VPE
@@ -110,7 +106,9 @@
#define PPSMC_MSG_DisableLSdma 0x35 ///< Disable LSDMA
#define PPSMC_MSG_SetSoftMaxVpe 0x36 ///<
#define PPSMC_MSG_SetSoftMinVpe 0x37 ///<
-#define PPSMC_Message_Count 0x38 ///< Total number of PPSMC messages
+#define PPSMC_MSG_AllocMALLCache 0x38 ///< Allocating MALL Cache
+#define PPSMC_MSG_ReleaseMALLCache 0x39 ///< Releasing MALL Cache
+#define PPSMC_Message_Count 0x3A ///< Total number of PPSMC messages
/** @}*/
/**
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
index a941fdbf78b6b3..af427cc7dbb845 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
@@ -115,6 +115,10 @@
__SMU_DUMMY_MAP(PowerDownVcn), \
__SMU_DUMMY_MAP(PowerUpJpeg), \
__SMU_DUMMY_MAP(PowerDownJpeg), \
+ __SMU_DUMMY_MAP(PowerUpJpeg0), \
+ __SMU_DUMMY_MAP(PowerDownJpeg0), \
+ __SMU_DUMMY_MAP(PowerUpJpeg1), \
+ __SMU_DUMMY_MAP(PowerDownJpeg1), \
__SMU_DUMMY_MAP(BacoAudioD3PME), \
__SMU_DUMMY_MAP(ArmD3), \
__SMU_DUMMY_MAP(RunDcBtc), \
@@ -135,6 +139,8 @@
__SMU_DUMMY_MAP(PowerUpSdma), \
__SMU_DUMMY_MAP(SetHardMinIspclkByFreq), \
__SMU_DUMMY_MAP(SetHardMinVcn), \
+ __SMU_DUMMY_MAP(SetHardMinVcn0), \
+ __SMU_DUMMY_MAP(SetHardMinVcn1), \
__SMU_DUMMY_MAP(SetAllowFclkSwitch), \
__SMU_DUMMY_MAP(SetMinVideoGfxclkFreq), \
__SMU_DUMMY_MAP(ActiveProcessNotify), \
@@ -150,6 +156,8 @@
__SMU_DUMMY_MAP(SetPhyclkVoltageByFreq), \
__SMU_DUMMY_MAP(SetDppclkVoltageByFreq), \
__SMU_DUMMY_MAP(SetSoftMinVcn), \
+ __SMU_DUMMY_MAP(SetSoftMinVcn0), \
+ __SMU_DUMMY_MAP(SetSoftMinVcn1), \
__SMU_DUMMY_MAP(EnablePostCode), \
__SMU_DUMMY_MAP(GetGfxclkFrequency), \
__SMU_DUMMY_MAP(GetFclkFrequency), \
@@ -161,6 +169,8 @@
__SMU_DUMMY_MAP(SetSoftMaxSocclkByFreq), \
__SMU_DUMMY_MAP(SetSoftMaxFclkByFreq), \
__SMU_DUMMY_MAP(SetSoftMaxVcn), \
+ __SMU_DUMMY_MAP(SetSoftMaxVcn0), \
+ __SMU_DUMMY_MAP(SetSoftMaxVcn1), \
__SMU_DUMMY_MAP(PowerGateMmHub), \
__SMU_DUMMY_MAP(UpdatePmeRestore), \
__SMU_DUMMY_MAP(GpuChangeState), \
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h
index 3f7463c1c1a919..4af1985ae44668 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h
@@ -27,6 +27,7 @@
#define SMU14_DRIVER_IF_VERSION_INV 0xFFFFFFFF
#define SMU14_DRIVER_IF_VERSION_SMU_V14_0_0 0x7
+#define SMU14_DRIVER_IF_VERSION_SMU_V14_0_1 0x6
#define SMU14_DRIVER_IF_VERSION_SMU_V14_0_2 0x1
#define FEATURE_MASK(feature) (1ULL << feature)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
index 9c03296f92cdd4..67117ced7c6ae6 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
@@ -2751,7 +2751,13 @@ static int smu_v13_0_0_set_mp1_state(struct smu_context *smu,
switch (mp1_state) {
case PP_MP1_STATE_UNLOAD:
- ret = smu_cmn_set_mp1_state(smu, mp1_state);
+ ret = smu_cmn_send_smc_msg_with_param(smu,
+ SMU_MSG_PrepareMp1ForUnload,
+ 0x55, NULL);
+
+ if (!ret && smu->smu_baco.state == SMU_BACO_STATE_EXIT)
+ ret = smu_v13_0_disable_pmfw_state(smu);
+
break;
default:
/* Ignore others */
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c
index bb98156b2fa1d5..949131bd1ecb21 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c
@@ -226,8 +226,18 @@ static int smu_v13_0_4_system_features_control(struct smu_context *smu, bool en)
struct amdgpu_device *adev = smu->adev;
int ret = 0;
- if (!en && !adev->in_s0ix)
+ if (!en && !adev->in_s0ix) {
+ /* Adds a GFX reset as workaround just before sending the
+ * MP1_UNLOAD message to prevent GC/RLC/PMFW from entering
+ * an invalid state.
+ */
+ ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GfxDeviceDriverReset,
+ SMU_RESET_MODE_2, NULL);
+ if (ret)
+ return ret;
+
ret = smu_cmn_send_smc_msg(smu, SMU_MSG_PrepareMp1ForUnload, NULL);
+ }
return ret;
}
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c
index b06a3cc4330542..07a65e005785d6 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c
@@ -234,7 +234,7 @@ int smu_v14_0_check_fw_version(struct smu_context *smu)
smu->smc_driver_if_version = SMU14_DRIVER_IF_VERSION_SMU_V14_0_0;
break;
case IP_VERSION(14, 0, 1):
- smu->smc_driver_if_version = SMU14_DRIVER_IF_VERSION_SMU_V14_0_0;
+ smu->smc_driver_if_version = SMU14_DRIVER_IF_VERSION_SMU_V14_0_1;
break;
default:
@@ -1402,9 +1402,22 @@ int smu_v14_0_set_vcn_enable(struct smu_context *smu,
if (adev->vcn.harvest_config & (1 << i))
continue;
- ret = smu_cmn_send_smc_msg_with_param(smu, enable ?
- SMU_MSG_PowerUpVcn : SMU_MSG_PowerDownVcn,
- i << 16U, NULL);
+ if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 0) ||
+ amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 1)) {
+ if (i == 0)
+ ret = smu_cmn_send_smc_msg_with_param(smu, enable ?
+ SMU_MSG_PowerUpVcn0 : SMU_MSG_PowerDownVcn0,
+ i << 16U, NULL);
+ else if (i == 1)
+ ret = smu_cmn_send_smc_msg_with_param(smu, enable ?
+ SMU_MSG_PowerUpVcn1 : SMU_MSG_PowerDownVcn1,
+ i << 16U, NULL);
+ } else {
+ ret = smu_cmn_send_smc_msg_with_param(smu, enable ?
+ SMU_MSG_PowerUpVcn : SMU_MSG_PowerDownVcn,
+ i << 16U, NULL);
+ }
+
if (ret)
return ret;
}
@@ -1415,9 +1428,34 @@ int smu_v14_0_set_vcn_enable(struct smu_context *smu,
int smu_v14_0_set_jpeg_enable(struct smu_context *smu,
bool enable)
{
- return smu_cmn_send_smc_msg_with_param(smu, enable ?
- SMU_MSG_PowerUpJpeg : SMU_MSG_PowerDownJpeg,
- 0, NULL);
+ struct amdgpu_device *adev = smu->adev;
+ int i, ret = 0;
+
+ for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) {
+ if (adev->jpeg.harvest_config & (1 << i))
+ continue;
+
+ if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 0) ||
+ amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 1)) {
+ if (i == 0)
+ ret = smu_cmn_send_smc_msg_with_param(smu, enable ?
+ SMU_MSG_PowerUpJpeg0 : SMU_MSG_PowerDownJpeg0,
+ i << 16U, NULL);
+ else if (i == 1 && amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 1))
+ ret = smu_cmn_send_smc_msg_with_param(smu, enable ?
+ SMU_MSG_PowerUpJpeg1 : SMU_MSG_PowerDownJpeg1,
+ i << 16U, NULL);
+ } else {
+ ret = smu_cmn_send_smc_msg_with_param(smu, enable ?
+ SMU_MSG_PowerUpJpeg : SMU_MSG_PowerDownJpeg,
+ i << 16U, NULL);
+ }
+
+ if (ret)
+ return ret;
+ }
+
+ return ret;
}
int smu_v14_0_run_btc(struct smu_context *smu)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c
index 9310c4758e38ce..63399c00cc28ff 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c
@@ -70,9 +70,12 @@ static struct cmn2asic_msg_mapping smu_v14_0_0_message_map[SMU_MSG_MAX_COUNT] =
MSG_MAP(TestMessage, PPSMC_MSG_TestMessage, 1),
MSG_MAP(GetSmuVersion, PPSMC_MSG_GetPmfwVersion, 1),
MSG_MAP(GetDriverIfVersion, PPSMC_MSG_GetDriverIfVersion, 1),
- MSG_MAP(PowerDownVcn, PPSMC_MSG_PowerDownVcn, 1),
- MSG_MAP(PowerUpVcn, PPSMC_MSG_PowerUpVcn, 1),
- MSG_MAP(SetHardMinVcn, PPSMC_MSG_SetHardMinVcn, 1),
+ MSG_MAP(PowerDownVcn0, PPSMC_MSG_PowerDownVcn0, 1),
+ MSG_MAP(PowerUpVcn0, PPSMC_MSG_PowerUpVcn0, 1),
+ MSG_MAP(SetHardMinVcn0, PPSMC_MSG_SetHardMinVcn0, 1),
+ MSG_MAP(PowerDownVcn1, PPSMC_MSG_PowerDownVcn1, 1),
+ MSG_MAP(PowerUpVcn1, PPSMC_MSG_PowerUpVcn1, 1),
+ MSG_MAP(SetHardMinVcn1, PPSMC_MSG_SetHardMinVcn1, 1),
MSG_MAP(SetSoftMinGfxclk, PPSMC_MSG_SetSoftMinGfxclk, 1),
MSG_MAP(PrepareMp1ForUnload, PPSMC_MSG_PrepareMp1ForUnload, 1),
MSG_MAP(SetDriverDramAddrHigh, PPSMC_MSG_SetDriverDramAddrHigh, 1),
@@ -83,7 +86,8 @@ static struct cmn2asic_msg_mapping smu_v14_0_0_message_map[SMU_MSG_MAX_COUNT] =
MSG_MAP(GetEnabledSmuFeatures, PPSMC_MSG_GetEnabledSmuFeatures, 1),
MSG_MAP(SetHardMinSocclkByFreq, PPSMC_MSG_SetHardMinSocclkByFreq, 1),
MSG_MAP(SetSoftMinFclk, PPSMC_MSG_SetSoftMinFclk, 1),
- MSG_MAP(SetSoftMinVcn, PPSMC_MSG_SetSoftMinVcn, 1),
+ MSG_MAP(SetSoftMinVcn0, PPSMC_MSG_SetSoftMinVcn0, 1),
+ MSG_MAP(SetSoftMinVcn1, PPSMC_MSG_SetSoftMinVcn1, 1),
MSG_MAP(EnableGfxImu, PPSMC_MSG_EnableGfxImu, 1),
MSG_MAP(AllowGfxOff, PPSMC_MSG_AllowGfxOff, 1),
MSG_MAP(DisallowGfxOff, PPSMC_MSG_DisallowGfxOff, 1),
@@ -91,9 +95,12 @@ static struct cmn2asic_msg_mapping smu_v14_0_0_message_map[SMU_MSG_MAX_COUNT] =
MSG_MAP(SetHardMinGfxClk, PPSMC_MSG_SetHardMinGfxClk, 1),
MSG_MAP(SetSoftMaxSocclkByFreq, PPSMC_MSG_SetSoftMaxSocclkByFreq, 1),
MSG_MAP(SetSoftMaxFclkByFreq, PPSMC_MSG_SetSoftMaxFclkByFreq, 1),
- MSG_MAP(SetSoftMaxVcn, PPSMC_MSG_SetSoftMaxVcn, 1),
- MSG_MAP(PowerDownJpeg, PPSMC_MSG_PowerDownJpeg, 1),
- MSG_MAP(PowerUpJpeg, PPSMC_MSG_PowerUpJpeg, 1),
+ MSG_MAP(SetSoftMaxVcn0, PPSMC_MSG_SetSoftMaxVcn0, 1),
+ MSG_MAP(SetSoftMaxVcn1, PPSMC_MSG_SetSoftMaxVcn1, 1),
+ MSG_MAP(PowerDownJpeg0, PPSMC_MSG_PowerDownJpeg0, 1),
+ MSG_MAP(PowerUpJpeg0, PPSMC_MSG_PowerUpJpeg0, 1),
+ MSG_MAP(PowerDownJpeg1, PPSMC_MSG_PowerDownJpeg1, 1),
+ MSG_MAP(PowerUpJpeg1, PPSMC_MSG_PowerUpJpeg1, 1),
MSG_MAP(SetHardMinFclkByFreq, PPSMC_MSG_SetHardMinFclkByFreq, 1),
MSG_MAP(SetSoftMinSocclkByFreq, PPSMC_MSG_SetSoftMinSocclkByFreq, 1),
MSG_MAP(PowerDownIspByTile, PPSMC_MSG_PowerDownIspByTile, 1),
@@ -154,7 +161,7 @@ static int smu_v14_0_0_init_smc_tables(struct smu_context *smu)
SMU_TABLE_INIT(tables, SMU_TABLE_WATERMARKS, sizeof(Watermarks_t),
PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
- SMU_TABLE_INIT(tables, SMU_TABLE_DPMCLOCKS, sizeof(DpmClocks_t),
+ SMU_TABLE_INIT(tables, SMU_TABLE_DPMCLOCKS, max(sizeof(DpmClocks_t), sizeof(DpmClocks_t_v14_0_1)),
PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
SMU_TABLE_INIT(tables, SMU_TABLE_SMU_METRICS, sizeof(SmuMetrics_t),
PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
@@ -164,7 +171,7 @@ static int smu_v14_0_0_init_smc_tables(struct smu_context *smu)
goto err0_out;
smu_table->metrics_time = 0;
- smu_table->clocks_table = kzalloc(sizeof(DpmClocks_t), GFP_KERNEL);
+ smu_table->clocks_table = kzalloc(max(sizeof(DpmClocks_t), sizeof(DpmClocks_t_v14_0_1)), GFP_KERNEL);
if (!smu_table->clocks_table)
goto err1_out;
@@ -586,6 +593,60 @@ static int smu_v14_0_0_mode2_reset(struct smu_context *smu)
return ret;
}
+static int smu_v14_0_1_get_dpm_freq_by_index(struct smu_context *smu,
+ enum smu_clk_type clk_type,
+ uint32_t dpm_level,
+ uint32_t *freq)
+{
+ DpmClocks_t_v14_0_1 *clk_table = smu->smu_table.clocks_table;
+
+ if (!clk_table || clk_type >= SMU_CLK_COUNT)
+ return -EINVAL;
+
+ switch (clk_type) {
+ case SMU_SOCCLK:
+ if (dpm_level >= clk_table->NumSocClkLevelsEnabled)
+ return -EINVAL;
+ *freq = clk_table->SocClocks[dpm_level];
+ break;
+ case SMU_VCLK:
+ if (dpm_level >= clk_table->Vcn0ClkLevelsEnabled)
+ return -EINVAL;
+ *freq = clk_table->VClocks0[dpm_level];
+ break;
+ case SMU_DCLK:
+ if (dpm_level >= clk_table->Vcn0ClkLevelsEnabled)
+ return -EINVAL;
+ *freq = clk_table->DClocks0[dpm_level];
+ break;
+ case SMU_VCLK1:
+ if (dpm_level >= clk_table->Vcn1ClkLevelsEnabled)
+ return -EINVAL;
+ *freq = clk_table->VClocks1[dpm_level];
+ break;
+ case SMU_DCLK1:
+ if (dpm_level >= clk_table->Vcn1ClkLevelsEnabled)
+ return -EINVAL;
+ *freq = clk_table->DClocks1[dpm_level];
+ break;
+ case SMU_UCLK:
+ case SMU_MCLK:
+ if (dpm_level >= clk_table->NumMemPstatesEnabled)
+ return -EINVAL;
+ *freq = clk_table->MemPstateTable[dpm_level].MemClk;
+ break;
+ case SMU_FCLK:
+ if (dpm_level >= clk_table->NumFclkLevelsEnabled)
+ return -EINVAL;
+ *freq = clk_table->FclkClocks_Freq[dpm_level];
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int smu_v14_0_0_get_dpm_freq_by_index(struct smu_context *smu,
enum smu_clk_type clk_type,
uint32_t dpm_level,
@@ -630,6 +691,19 @@ static int smu_v14_0_0_get_dpm_freq_by_index(struct smu_context *smu,
return 0;
}
+static int smu_v14_0_common_get_dpm_freq_by_index(struct smu_context *smu,
+ enum smu_clk_type clk_type,
+ uint32_t dpm_level,
+ uint32_t *freq)
+{
+ if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 0))
+ smu_v14_0_0_get_dpm_freq_by_index(smu, clk_type, dpm_level, freq);
+ else if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 1))
+ smu_v14_0_1_get_dpm_freq_by_index(smu, clk_type, dpm_level, freq);
+
+ return 0;
+}
+
static bool smu_v14_0_0_clk_dpm_is_enabled(struct smu_context *smu,
enum smu_clk_type clk_type)
{
@@ -650,6 +724,8 @@ static bool smu_v14_0_0_clk_dpm_is_enabled(struct smu_context *smu,
break;
case SMU_VCLK:
case SMU_DCLK:
+ case SMU_VCLK1:
+ case SMU_DCLK1:
feature_id = SMU_FEATURE_VCN_DPM_BIT;
break;
default:
@@ -659,6 +735,126 @@ static bool smu_v14_0_0_clk_dpm_is_enabled(struct smu_context *smu,
return smu_cmn_feature_is_enabled(smu, feature_id);
}
+static int smu_v14_0_1_get_dpm_ultimate_freq(struct smu_context *smu,
+ enum smu_clk_type clk_type,
+ uint32_t *min,
+ uint32_t *max)
+{
+ DpmClocks_t_v14_0_1 *clk_table = smu->smu_table.clocks_table;
+ uint32_t clock_limit;
+ uint32_t max_dpm_level, min_dpm_level;
+ int ret = 0;
+
+ if (!smu_v14_0_0_clk_dpm_is_enabled(smu, clk_type)) {
+ switch (clk_type) {
+ case SMU_MCLK:
+ case SMU_UCLK:
+ clock_limit = smu->smu_table.boot_values.uclk;
+ break;
+ case SMU_FCLK:
+ clock_limit = smu->smu_table.boot_values.fclk;
+ break;
+ case SMU_GFXCLK:
+ case SMU_SCLK:
+ clock_limit = smu->smu_table.boot_values.gfxclk;
+ break;
+ case SMU_SOCCLK:
+ clock_limit = smu->smu_table.boot_values.socclk;
+ break;
+ case SMU_VCLK:
+ case SMU_VCLK1:
+ clock_limit = smu->smu_table.boot_values.vclk;
+ break;
+ case SMU_DCLK:
+ case SMU_DCLK1:
+ clock_limit = smu->smu_table.boot_values.dclk;
+ break;
+ default:
+ clock_limit = 0;
+ break;
+ }
+
+ /* clock in Mhz unit */
+ if (min)
+ *min = clock_limit / 100;
+ if (max)
+ *max = clock_limit / 100;
+
+ return 0;
+ }
+
+ if (max) {
+ switch (clk_type) {
+ case SMU_GFXCLK:
+ case SMU_SCLK:
+ *max = clk_table->MaxGfxClk;
+ break;
+ case SMU_MCLK:
+ case SMU_UCLK:
+ case SMU_FCLK:
+ max_dpm_level = 0;
+ break;
+ case SMU_SOCCLK:
+ max_dpm_level = clk_table->NumSocClkLevelsEnabled - 1;
+ break;
+ case SMU_VCLK:
+ case SMU_DCLK:
+ max_dpm_level = clk_table->Vcn0ClkLevelsEnabled - 1;
+ break;
+ case SMU_VCLK1:
+ case SMU_DCLK1:
+ max_dpm_level = clk_table->Vcn1ClkLevelsEnabled - 1;
+ break;
+ default:
+ ret = -EINVAL;
+ goto failed;
+ }
+
+ if (clk_type != SMU_GFXCLK && clk_type != SMU_SCLK) {
+ ret = smu_v14_0_common_get_dpm_freq_by_index(smu, clk_type, max_dpm_level, max);
+ if (ret)
+ goto failed;
+ }
+ }
+
+ if (min) {
+ switch (clk_type) {
+ case SMU_GFXCLK:
+ case SMU_SCLK:
+ *min = clk_table->MinGfxClk;
+ break;
+ case SMU_MCLK:
+ case SMU_UCLK:
+ min_dpm_level = clk_table->NumMemPstatesEnabled - 1;
+ break;
+ case SMU_FCLK:
+ min_dpm_level = clk_table->NumFclkLevelsEnabled - 1;
+ break;
+ case SMU_SOCCLK:
+ min_dpm_level = 0;
+ break;
+ case SMU_VCLK:
+ case SMU_DCLK:
+ case SMU_VCLK1:
+ case SMU_DCLK1:
+ min_dpm_level = 0;
+ break;
+ default:
+ ret = -EINVAL;
+ goto failed;
+ }
+
+ if (clk_type != SMU_GFXCLK && clk_type != SMU_SCLK) {
+ ret = smu_v14_0_common_get_dpm_freq_by_index(smu, clk_type, min_dpm_level, min);
+ if (ret)
+ goto failed;
+ }
+ }
+
+failed:
+ return ret;
+}
+
static int smu_v14_0_0_get_dpm_ultimate_freq(struct smu_context *smu,
enum smu_clk_type clk_type,
uint32_t *min,
@@ -729,7 +925,7 @@ static int smu_v14_0_0_get_dpm_ultimate_freq(struct smu_context *smu,
}
if (clk_type != SMU_GFXCLK && clk_type != SMU_SCLK) {
- ret = smu_v14_0_0_get_dpm_freq_by_index(smu, clk_type, max_dpm_level, max);
+ ret = smu_v14_0_common_get_dpm_freq_by_index(smu, clk_type, max_dpm_level, max);
if (ret)
goto failed;
}
@@ -761,7 +957,7 @@ static int smu_v14_0_0_get_dpm_ultimate_freq(struct smu_context *smu,
}
if (clk_type != SMU_GFXCLK && clk_type != SMU_SCLK) {
- ret = smu_v14_0_0_get_dpm_freq_by_index(smu, clk_type, min_dpm_level, min);
+ ret = smu_v14_0_common_get_dpm_freq_by_index(smu, clk_type, min_dpm_level, min);
if (ret)
goto failed;
}
@@ -771,6 +967,19 @@ failed:
return ret;
}
+static int smu_v14_0_common_get_dpm_ultimate_freq(struct smu_context *smu,
+ enum smu_clk_type clk_type,
+ uint32_t *min,
+ uint32_t *max)
+{
+ if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 0))
+ smu_v14_0_0_get_dpm_ultimate_freq(smu, clk_type, min, max);
+ else if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 1))
+ smu_v14_0_1_get_dpm_ultimate_freq(smu, clk_type, min, max);
+
+ return 0;
+}
+
static int smu_v14_0_0_get_current_clk_freq(struct smu_context *smu,
enum smu_clk_type clk_type,
uint32_t *value)
@@ -804,6 +1013,37 @@ static int smu_v14_0_0_get_current_clk_freq(struct smu_context *smu,
return smu_v14_0_0_get_smu_metrics_data(smu, member_type, value);
}
+static int smu_v14_0_1_get_dpm_level_count(struct smu_context *smu,
+ enum smu_clk_type clk_type,
+ uint32_t *count)
+{
+ DpmClocks_t_v14_0_1 *clk_table = smu->smu_table.clocks_table;
+
+ switch (clk_type) {
+ case SMU_SOCCLK:
+ *count = clk_table->NumSocClkLevelsEnabled;
+ break;
+ case SMU_VCLK:
+ case SMU_DCLK:
+ *count = clk_table->Vcn0ClkLevelsEnabled;
+ break;
+ case SMU_VCLK1:
+ case SMU_DCLK1:
+ *count = clk_table->Vcn1ClkLevelsEnabled;
+ break;
+ case SMU_MCLK:
+ *count = clk_table->NumMemPstatesEnabled;
+ break;
+ case SMU_FCLK:
+ *count = clk_table->NumFclkLevelsEnabled;
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
static int smu_v14_0_0_get_dpm_level_count(struct smu_context *smu,
enum smu_clk_type clk_type,
uint32_t *count)
@@ -833,6 +1073,18 @@ static int smu_v14_0_0_get_dpm_level_count(struct smu_context *smu,
return 0;
}
+static int smu_v14_0_common_get_dpm_level_count(struct smu_context *smu,
+ enum smu_clk_type clk_type,
+ uint32_t *count)
+{
+ if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 0))
+ smu_v14_0_0_get_dpm_level_count(smu, clk_type, count);
+ else if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 1))
+ smu_v14_0_1_get_dpm_level_count(smu, clk_type, count);
+
+ return 0;
+}
+
static int smu_v14_0_0_print_clk_levels(struct smu_context *smu,
enum smu_clk_type clk_type, char *buf)
{
@@ -859,18 +1111,20 @@ static int smu_v14_0_0_print_clk_levels(struct smu_context *smu,
case SMU_SOCCLK:
case SMU_VCLK:
case SMU_DCLK:
+ case SMU_VCLK1:
+ case SMU_DCLK1:
case SMU_MCLK:
case SMU_FCLK:
ret = smu_v14_0_0_get_current_clk_freq(smu, clk_type, &cur_value);
if (ret)
break;
- ret = smu_v14_0_0_get_dpm_level_count(smu, clk_type, &count);
+ ret = smu_v14_0_common_get_dpm_level_count(smu, clk_type, &count);
if (ret)
break;
for (i = 0; i < count; i++) {
- ret = smu_v14_0_0_get_dpm_freq_by_index(smu, clk_type, i, &value);
+ ret = smu_v14_0_common_get_dpm_freq_by_index(smu, clk_type, i, &value);
if (ret)
break;
@@ -933,8 +1187,13 @@ static int smu_v14_0_0_set_soft_freq_limited_range(struct smu_context *smu,
break;
case SMU_VCLK:
case SMU_DCLK:
- msg_set_min = SMU_MSG_SetHardMinVcn;
- msg_set_max = SMU_MSG_SetSoftMaxVcn;
+ msg_set_min = SMU_MSG_SetHardMinVcn0;
+ msg_set_max = SMU_MSG_SetSoftMaxVcn0;
+ break;
+ case SMU_VCLK1:
+ case SMU_DCLK1:
+ msg_set_min = SMU_MSG_SetHardMinVcn1;
+ msg_set_max = SMU_MSG_SetSoftMaxVcn1;
break;
default:
return -EINVAL;
@@ -964,11 +1223,11 @@ static int smu_v14_0_0_force_clk_levels(struct smu_context *smu,
case SMU_FCLK:
case SMU_VCLK:
case SMU_DCLK:
- ret = smu_v14_0_0_get_dpm_freq_by_index(smu, clk_type, soft_min_level, &min_freq);
+ ret = smu_v14_0_common_get_dpm_freq_by_index(smu, clk_type, soft_min_level, &min_freq);
if (ret)
break;
- ret = smu_v14_0_0_get_dpm_freq_by_index(smu, clk_type, soft_max_level, &max_freq);
+ ret = smu_v14_0_common_get_dpm_freq_by_index(smu, clk_type, soft_max_level, &max_freq);
if (ret)
break;
@@ -993,25 +1252,25 @@ static int smu_v14_0_0_set_performance_level(struct smu_context *smu,
switch (level) {
case AMD_DPM_FORCED_LEVEL_HIGH:
- smu_v14_0_0_get_dpm_ultimate_freq(smu, SMU_SCLK, NULL, &sclk_max);
- smu_v14_0_0_get_dpm_ultimate_freq(smu, SMU_FCLK, NULL, &fclk_max);
- smu_v14_0_0_get_dpm_ultimate_freq(smu, SMU_SOCCLK, NULL, &socclk_max);
+ smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_SCLK, NULL, &sclk_max);
+ smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_FCLK, NULL, &fclk_max);
+ smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_SOCCLK, NULL, &socclk_max);
sclk_min = sclk_max;
fclk_min = fclk_max;
socclk_min = socclk_max;
break;
case AMD_DPM_FORCED_LEVEL_LOW:
- smu_v14_0_0_get_dpm_ultimate_freq(smu, SMU_SCLK, &sclk_min, NULL);
- smu_v14_0_0_get_dpm_ultimate_freq(smu, SMU_FCLK, &fclk_min, NULL);
- smu_v14_0_0_get_dpm_ultimate_freq(smu, SMU_SOCCLK, &socclk_min, NULL);
+ smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_SCLK, &sclk_min, NULL);
+ smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_FCLK, &fclk_min, NULL);
+ smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_SOCCLK, &socclk_min, NULL);
sclk_max = sclk_min;
fclk_max = fclk_min;
socclk_max = socclk_min;
break;
case AMD_DPM_FORCED_LEVEL_AUTO:
- smu_v14_0_0_get_dpm_ultimate_freq(smu, SMU_SCLK, &sclk_min, &sclk_max);
- smu_v14_0_0_get_dpm_ultimate_freq(smu, SMU_FCLK, &fclk_min, &fclk_max);
- smu_v14_0_0_get_dpm_ultimate_freq(smu, SMU_SOCCLK, &socclk_min, &socclk_max);
+ smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_SCLK, &sclk_min, &sclk_max);
+ smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_FCLK, &fclk_min, &fclk_max);
+ smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_SOCCLK, &socclk_min, &socclk_max);
break;
case AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD:
case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK:
@@ -1060,6 +1319,18 @@ static int smu_v14_0_0_set_performance_level(struct smu_context *smu,
return ret;
}
+static int smu_v14_0_1_set_fine_grain_gfx_freq_parameters(struct smu_context *smu)
+{
+ DpmClocks_t_v14_0_1 *clk_table = smu->smu_table.clocks_table;
+
+ smu->gfx_default_hard_min_freq = clk_table->MinGfxClk;
+ smu->gfx_default_soft_max_freq = clk_table->MaxGfxClk;
+ smu->gfx_actual_hard_min_freq = 0;
+ smu->gfx_actual_soft_max_freq = 0;
+
+ return 0;
+}
+
static int smu_v14_0_0_set_fine_grain_gfx_freq_parameters(struct smu_context *smu)
{
DpmClocks_t *clk_table = smu->smu_table.clocks_table;
@@ -1072,6 +1343,16 @@ static int smu_v14_0_0_set_fine_grain_gfx_freq_parameters(struct smu_context *sm
return 0;
}
+static int smu_v14_0_common_set_fine_grain_gfx_freq_parameters(struct smu_context *smu)
+{
+ if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 0))
+ smu_v14_0_0_set_fine_grain_gfx_freq_parameters(smu);
+ else if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 1))
+ smu_v14_0_1_set_fine_grain_gfx_freq_parameters(smu);
+
+ return 0;
+}
+
static int smu_v14_0_0_set_vpe_enable(struct smu_context *smu,
bool enable)
{
@@ -1088,6 +1369,25 @@ static int smu_v14_0_0_set_umsch_mm_enable(struct smu_context *smu,
0, NULL);
}
+static int smu_14_0_1_get_dpm_table(struct smu_context *smu, struct dpm_clocks *clock_table)
+{
+ DpmClocks_t_v14_0_1 *clk_table = smu->smu_table.clocks_table;
+ uint8_t idx;
+
+ /* Only the Clock information of SOC and VPE is copied to provide VPE DPM settings for use. */
+ for (idx = 0; idx < NUM_SOCCLK_DPM_LEVELS; idx++) {
+ clock_table->SocClocks[idx].Freq = (idx < clk_table->NumSocClkLevelsEnabled) ? clk_table->SocClocks[idx]:0;
+ clock_table->SocClocks[idx].Vol = 0;
+ }
+
+ for (idx = 0; idx < NUM_VPE_DPM_LEVELS; idx++) {
+ clock_table->VPEClocks[idx].Freq = (idx < clk_table->VpeClkLevelsEnabled) ? clk_table->VPEClocks[idx]:0;
+ clock_table->VPEClocks[idx].Vol = 0;
+ }
+
+ return 0;
+}
+
static int smu_14_0_0_get_dpm_table(struct smu_context *smu, struct dpm_clocks *clock_table)
{
DpmClocks_t *clk_table = smu->smu_table.clocks_table;
@@ -1107,6 +1407,16 @@ static int smu_14_0_0_get_dpm_table(struct smu_context *smu, struct dpm_clocks *
return 0;
}
+static int smu_v14_0_common_get_dpm_table(struct smu_context *smu, struct dpm_clocks *clock_table)
+{
+ if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 0))
+ smu_14_0_0_get_dpm_table(smu, clock_table);
+ else if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 1))
+ smu_14_0_1_get_dpm_table(smu, clock_table);
+
+ return 0;
+}
+
static const struct pptable_funcs smu_v14_0_0_ppt_funcs = {
.check_fw_status = smu_v14_0_check_fw_status,
.check_fw_version = smu_v14_0_check_fw_version,
@@ -1128,16 +1438,16 @@ static const struct pptable_funcs smu_v14_0_0_ppt_funcs = {
.set_driver_table_location = smu_v14_0_set_driver_table_location,
.gfx_off_control = smu_v14_0_gfx_off_control,
.mode2_reset = smu_v14_0_0_mode2_reset,
- .get_dpm_ultimate_freq = smu_v14_0_0_get_dpm_ultimate_freq,
+ .get_dpm_ultimate_freq = smu_v14_0_common_get_dpm_ultimate_freq,
.od_edit_dpm_table = smu_v14_0_od_edit_dpm_table,
.print_clk_levels = smu_v14_0_0_print_clk_levels,
.force_clk_levels = smu_v14_0_0_force_clk_levels,
.set_performance_level = smu_v14_0_0_set_performance_level,
- .set_fine_grain_gfx_freq_parameters = smu_v14_0_0_set_fine_grain_gfx_freq_parameters,
+ .set_fine_grain_gfx_freq_parameters = smu_v14_0_common_set_fine_grain_gfx_freq_parameters,
.set_gfx_power_up_by_imu = smu_v14_0_set_gfx_power_up_by_imu,
.dpm_set_vpe_enable = smu_v14_0_0_set_vpe_enable,
.dpm_set_umsch_mm_enable = smu_v14_0_0_set_umsch_mm_enable,
- .get_dpm_clock_table = smu_14_0_0_get_dpm_table,
+ .get_dpm_clock_table = smu_v14_0_common_get_dpm_table,
};
static void smu_v14_0_0_set_smu_mailbox_registers(struct smu_context *smu)
diff --git a/drivers/gpu/drm/ast/ast_dp.c b/drivers/gpu/drm/ast/ast_dp.c
index ebb6d8ebd44eb6..1e9259416980ec 100644
--- a/drivers/gpu/drm/ast/ast_dp.c
+++ b/drivers/gpu/drm/ast/ast_dp.c
@@ -180,6 +180,7 @@ void ast_dp_set_on_off(struct drm_device *dev, bool on)
{
struct ast_device *ast = to_ast_device(dev);
u8 video_on_off = on;
+ u32 i = 0;
// Video On/Off
ast_set_index_reg_mask(ast, AST_IO_VGACRI, 0xE3, (u8) ~AST_DP_VIDEO_ENABLE, on);
@@ -192,6 +193,8 @@ void ast_dp_set_on_off(struct drm_device *dev, bool on)
ASTDP_MIRROR_VIDEO_ENABLE) != video_on_off) {
// wait 1 ms
mdelay(1);
+ if (++i > 200)
+ break;
}
}
}
diff --git a/drivers/gpu/drm/display/drm_dp_dual_mode_helper.c b/drivers/gpu/drm/display/drm_dp_dual_mode_helper.c
index bd61e20770a5be..14a2a8473682b0 100644
--- a/drivers/gpu/drm/display/drm_dp_dual_mode_helper.c
+++ b/drivers/gpu/drm/display/drm_dp_dual_mode_helper.c
@@ -52,7 +52,7 @@
* @adapter: I2C adapter for the DDC bus
* @offset: register offset
* @buffer: buffer for return data
- * @size: sizo of the buffer
+ * @size: size of the buffer
*
* Reads @size bytes from the DP dual mode adaptor registers
* starting at @offset.
@@ -116,7 +116,7 @@ EXPORT_SYMBOL(drm_dp_dual_mode_read);
* @adapter: I2C adapter for the DDC bus
* @offset: register offset
* @buffer: buffer for write data
- * @size: sizo of the buffer
+ * @size: size of the buffer
*
* Writes @size bytes to the DP dual mode adaptor registers
* starting at @offset.
diff --git a/drivers/gpu/drm/display/drm_dp_helper.c b/drivers/gpu/drm/display/drm_dp_helper.c
index 266826eac4a75b..f5d4be89786609 100644
--- a/drivers/gpu/drm/display/drm_dp_helper.c
+++ b/drivers/gpu/drm/display/drm_dp_helper.c
@@ -4111,6 +4111,13 @@ int drm_dp_bw_overhead(int lane_count, int hactive,
u32 overhead = 1000000;
int symbol_cycles;
+ if (lane_count == 0 || hactive == 0 || bpp_x16 == 0) {
+ DRM_DEBUG_KMS("Invalid BW overhead params: lane_count %d, hactive %d, bpp_x16 %d.%04d\n",
+ lane_count, hactive,
+ bpp_x16 >> 4, (bpp_x16 & 0xf) * 625);
+ return 0;
+ }
+
/*
* DP Standard v2.1 2.6.4.1
* SSC downspread and ref clock variation margin:
diff --git a/drivers/gpu/drm/drm_client_modeset.c b/drivers/gpu/drm/drm_client_modeset.c
index 871e4e2129d6da..0683a129b36285 100644
--- a/drivers/gpu/drm/drm_client_modeset.c
+++ b/drivers/gpu/drm/drm_client_modeset.c
@@ -777,6 +777,7 @@ int drm_client_modeset_probe(struct drm_client_dev *client, unsigned int width,
unsigned int total_modes_count = 0;
struct drm_client_offset *offsets;
unsigned int connector_count = 0;
+ /* points to modes protected by mode_config.mutex */
struct drm_display_mode **modes;
struct drm_crtc **crtcs;
int i, ret = 0;
@@ -845,7 +846,6 @@ int drm_client_modeset_probe(struct drm_client_dev *client, unsigned int width,
drm_client_pick_crtcs(client, connectors, connector_count,
crtcs, modes, 0, width, height);
}
- mutex_unlock(&dev->mode_config.mutex);
drm_client_modeset_release(client);
@@ -875,6 +875,7 @@ int drm_client_modeset_probe(struct drm_client_dev *client, unsigned int width,
modeset->y = offset->y;
}
}
+ mutex_unlock(&dev->mode_config.mutex);
mutex_unlock(&client->modeset_mutex);
out:
diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
index 7352bde299d547..03bd3c7bd0dc2c 100644
--- a/drivers/gpu/drm/drm_prime.c
+++ b/drivers/gpu/drm/drm_prime.c
@@ -582,7 +582,12 @@ int drm_gem_map_attach(struct dma_buf *dma_buf,
{
struct drm_gem_object *obj = dma_buf->priv;
- if (!obj->funcs->get_sg_table)
+ /*
+ * drm_gem_map_dma_buf() requires obj->get_sg_table(), but drivers
+ * that implement their own ->map_dma_buf() do not.
+ */
+ if (dma_buf->ops->map_dma_buf == drm_gem_map_dma_buf &&
+ !obj->funcs->get_sg_table)
return -ENOSYS;
return drm_gem_pin(obj);
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 3ef6ed41e62b4a..fba73c38e23569 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -33,9 +33,9 @@ endif
subdir-ccflags-$(CONFIG_DRM_I915_WERROR) += -Werror
# Fine grained warnings disable
-CFLAGS_i915_pci.o = $(call cc-disable-warning, override-init)
-CFLAGS_display/intel_display_device.o = $(call cc-disable-warning, override-init)
-CFLAGS_display/intel_fbdev.o = $(call cc-disable-warning, override-init)
+CFLAGS_i915_pci.o = -Wno-override-init
+CFLAGS_display/intel_display_device.o = -Wno-override-init
+CFLAGS_display/intel_fbdev.o = -Wno-override-init
# Support compiling the display code separately for both i915 and xe
# drivers. Define I915 when building i915.
@@ -118,6 +118,7 @@ gt-y += \
gt/intel_ggtt_fencing.o \
gt/intel_gt.o \
gt/intel_gt_buffer_pool.o \
+ gt/intel_gt_ccs_mode.o \
gt/intel_gt_clock_utils.o \
gt/intel_gt_debugfs.o \
gt/intel_gt_engines_debugfs.o \
diff --git a/drivers/gpu/drm/i915/display/g4x_dp.c b/drivers/gpu/drm/i915/display/g4x_dp.c
index dfe0b07a122d15..06ec04e667e32f 100644
--- a/drivers/gpu/drm/i915/display/g4x_dp.c
+++ b/drivers/gpu/drm/i915/display/g4x_dp.c
@@ -717,7 +717,6 @@ static void g4x_enable_dp(struct intel_atomic_state *state,
{
intel_enable_dp(state, encoder, pipe_config, conn_state);
intel_edp_backlight_on(pipe_config, conn_state);
- encoder->audio_enable(encoder, pipe_config, conn_state);
}
static void vlv_enable_dp(struct intel_atomic_state *state,
@@ -726,7 +725,6 @@ static void vlv_enable_dp(struct intel_atomic_state *state,
const struct drm_connector_state *conn_state)
{
intel_edp_backlight_on(pipe_config, conn_state);
- encoder->audio_enable(encoder, pipe_config, conn_state);
}
static void g4x_pre_enable_dp(struct intel_atomic_state *state,
diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c
index eda4a8b885904d..ac456a2275dbad 100644
--- a/drivers/gpu/drm/i915/display/icl_dsi.c
+++ b/drivers/gpu/drm/i915/display/icl_dsi.c
@@ -1155,7 +1155,6 @@ static void gen11_dsi_powerup_panel(struct intel_encoder *encoder)
}
intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_INIT_OTP);
- intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_DISPLAY_ON);
/* ensure all panel commands dispatched before enabling transcoder */
wait_for_cmds_dispatched_to_panel(encoder);
@@ -1256,6 +1255,8 @@ static void gen11_dsi_enable(struct intel_atomic_state *state,
/* step6d: enable dsi transcoder */
gen11_dsi_enable_transcoder(encoder);
+ intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_DISPLAY_ON);
+
/* step7: enable backlight */
intel_backlight_enable(crtc_state, conn_state);
intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_BACKLIGHT_ON);
diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c
index fe52c06271ef05..52bd3576835b6b 100644
--- a/drivers/gpu/drm/i915/display/intel_bios.c
+++ b/drivers/gpu/drm/i915/display/intel_bios.c
@@ -1955,16 +1955,12 @@ static int get_init_otp_deassert_fragment_len(struct drm_i915_private *i915,
* these devices we split the init OTP sequence into a deassert sequence and
* the actual init OTP part.
*/
-static void fixup_mipi_sequences(struct drm_i915_private *i915,
- struct intel_panel *panel)
+static void vlv_fixup_mipi_sequences(struct drm_i915_private *i915,
+ struct intel_panel *panel)
{
u8 *init_otp;
int len;
- /* Limit this to VLV for now. */
- if (!IS_VALLEYVIEW(i915))
- return;
-
/* Limit this to v1 vid-mode sequences */
if (panel->vbt.dsi.config->is_cmd_mode ||
panel->vbt.dsi.seq_version != 1)
@@ -2000,6 +1996,41 @@ static void fixup_mipi_sequences(struct drm_i915_private *i915,
panel->vbt.dsi.sequence[MIPI_SEQ_INIT_OTP] = init_otp + len - 1;
}
+/*
+ * Some machines (eg. Lenovo 82TQ) appear to have broken
+ * VBT sequences:
+ * - INIT_OTP is not present at all
+ * - what should be in INIT_OTP is in DISPLAY_ON
+ * - what should be in DISPLAY_ON is in BACKLIGHT_ON
+ * (along with the actual backlight stuff)
+ *
+ * To make those work we simply swap DISPLAY_ON and INIT_OTP.
+ *
+ * TODO: Do we need to limit this to specific machines,
+ * or examine the contents of the sequences to
+ * avoid false positives?
+ */
+static void icl_fixup_mipi_sequences(struct drm_i915_private *i915,
+ struct intel_panel *panel)
+{
+ if (!panel->vbt.dsi.sequence[MIPI_SEQ_INIT_OTP] &&
+ panel->vbt.dsi.sequence[MIPI_SEQ_DISPLAY_ON]) {
+ drm_dbg_kms(&i915->drm, "Broken VBT: Swapping INIT_OTP and DISPLAY_ON sequences\n");
+
+ swap(panel->vbt.dsi.sequence[MIPI_SEQ_INIT_OTP],
+ panel->vbt.dsi.sequence[MIPI_SEQ_DISPLAY_ON]);
+ }
+}
+
+static void fixup_mipi_sequences(struct drm_i915_private *i915,
+ struct intel_panel *panel)
+{
+ if (DISPLAY_VER(i915) >= 11)
+ icl_fixup_mipi_sequences(i915, panel);
+ else if (IS_VALLEYVIEW(i915))
+ vlv_fixup_mipi_sequences(i915, panel);
+}
+
static void
parse_mipi_sequence(struct drm_i915_private *i915,
struct intel_panel *panel)
@@ -3351,6 +3382,9 @@ bool intel_bios_encoder_supports_dp_dual_mode(const struct intel_bios_encoder_da
{
const struct child_device_config *child = &devdata->child;
+ if (!devdata)
+ return false;
+
if (!intel_bios_encoder_supports_dp(devdata) ||
!intel_bios_encoder_supports_hdmi(devdata))
return false;
diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c
index ed89b86ea625aa..f672bfd70d4551 100644
--- a/drivers/gpu/drm/i915/display/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/display/intel_cdclk.c
@@ -2534,7 +2534,8 @@ intel_set_cdclk_pre_plane_update(struct intel_atomic_state *state)
intel_atomic_get_old_cdclk_state(state);
const struct intel_cdclk_state *new_cdclk_state =
intel_atomic_get_new_cdclk_state(state);
- enum pipe pipe = new_cdclk_state->pipe;
+ struct intel_cdclk_config cdclk_config;
+ enum pipe pipe;
if (!intel_cdclk_changed(&old_cdclk_state->actual,
&new_cdclk_state->actual))
@@ -2543,12 +2544,25 @@ intel_set_cdclk_pre_plane_update(struct intel_atomic_state *state)
if (IS_DG2(i915))
intel_cdclk_pcode_pre_notify(state);
- if (pipe == INVALID_PIPE ||
- old_cdclk_state->actual.cdclk <= new_cdclk_state->actual.cdclk) {
- drm_WARN_ON(&i915->drm, !new_cdclk_state->base.changed);
+ if (new_cdclk_state->disable_pipes) {
+ cdclk_config = new_cdclk_state->actual;
+ pipe = INVALID_PIPE;
+ } else {
+ if (new_cdclk_state->actual.cdclk >= old_cdclk_state->actual.cdclk) {
+ cdclk_config = new_cdclk_state->actual;
+ pipe = new_cdclk_state->pipe;
+ } else {
+ cdclk_config = old_cdclk_state->actual;
+ pipe = INVALID_PIPE;
+ }
- intel_set_cdclk(i915, &new_cdclk_state->actual, pipe);
+ cdclk_config.voltage_level = max(new_cdclk_state->actual.voltage_level,
+ old_cdclk_state->actual.voltage_level);
}
+
+ drm_WARN_ON(&i915->drm, !new_cdclk_state->base.changed);
+
+ intel_set_cdclk(i915, &cdclk_config, pipe);
}
/**
@@ -2566,7 +2580,7 @@ intel_set_cdclk_post_plane_update(struct intel_atomic_state *state)
intel_atomic_get_old_cdclk_state(state);
const struct intel_cdclk_state *new_cdclk_state =
intel_atomic_get_new_cdclk_state(state);
- enum pipe pipe = new_cdclk_state->pipe;
+ enum pipe pipe;
if (!intel_cdclk_changed(&old_cdclk_state->actual,
&new_cdclk_state->actual))
@@ -2575,12 +2589,15 @@ intel_set_cdclk_post_plane_update(struct intel_atomic_state *state)
if (IS_DG2(i915))
intel_cdclk_pcode_post_notify(state);
- if (pipe != INVALID_PIPE &&
- old_cdclk_state->actual.cdclk > new_cdclk_state->actual.cdclk) {
- drm_WARN_ON(&i915->drm, !new_cdclk_state->base.changed);
+ if (!new_cdclk_state->disable_pipes &&
+ new_cdclk_state->actual.cdclk < old_cdclk_state->actual.cdclk)
+ pipe = new_cdclk_state->pipe;
+ else
+ pipe = INVALID_PIPE;
+
+ drm_WARN_ON(&i915->drm, !new_cdclk_state->base.changed);
- intel_set_cdclk(i915, &new_cdclk_state->actual, pipe);
- }
+ intel_set_cdclk(i915, &new_cdclk_state->actual, pipe);
}
static int intel_pixel_rate_to_cdclk(const struct intel_crtc_state *crtc_state)
@@ -3058,6 +3075,7 @@ static struct intel_global_state *intel_cdclk_duplicate_state(struct intel_globa
return NULL;
cdclk_state->pipe = INVALID_PIPE;
+ cdclk_state->disable_pipes = false;
return &cdclk_state->base;
}
@@ -3236,6 +3254,8 @@ int intel_modeset_calc_cdclk(struct intel_atomic_state *state)
if (ret)
return ret;
+ new_cdclk_state->disable_pipes = true;
+
drm_dbg_kms(&dev_priv->drm,
"Modeset required for cdclk change\n");
}
diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.h b/drivers/gpu/drm/i915/display/intel_cdclk.h
index 48fd7d39e0cd9c..71bc032bfef16e 100644
--- a/drivers/gpu/drm/i915/display/intel_cdclk.h
+++ b/drivers/gpu/drm/i915/display/intel_cdclk.h
@@ -51,6 +51,9 @@ struct intel_cdclk_state {
/* bitmask of active pipes */
u8 active_pipes;
+
+ /* update cdclk with pipes disabled */
+ bool disable_pipes;
};
int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state);
diff --git a/drivers/gpu/drm/i915/display/intel_cursor.c b/drivers/gpu/drm/i915/display/intel_cursor.c
index f8b33999d43fcc..0d3da55e1c24d5 100644
--- a/drivers/gpu/drm/i915/display/intel_cursor.c
+++ b/drivers/gpu/drm/i915/display/intel_cursor.c
@@ -36,12 +36,10 @@ static u32 intel_cursor_base(const struct intel_plane_state *plane_state)
{
struct drm_i915_private *dev_priv =
to_i915(plane_state->uapi.plane->dev);
- const struct drm_framebuffer *fb = plane_state->hw.fb;
- struct drm_i915_gem_object *obj = intel_fb_obj(fb);
u32 base;
if (DISPLAY_INFO(dev_priv)->cursor_needs_physical)
- base = i915_gem_object_get_dma_address(obj, 0);
+ base = plane_state->phys_dma_addr;
else
base = intel_plane_ggtt_offset(plane_state);
diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c
index c587a8efeafcf5..c17462b4c2ac19 100644
--- a/drivers/gpu/drm/i915/display/intel_ddi.c
+++ b/drivers/gpu/drm/i915/display/intel_ddi.c
@@ -4256,7 +4256,12 @@ static bool m_n_equal(const struct intel_link_m_n *m_n_1,
static bool crtcs_port_sync_compatible(const struct intel_crtc_state *crtc_state1,
const struct intel_crtc_state *crtc_state2)
{
+ /*
+ * FIXME the modeset sequence is currently wrong and
+ * can't deal with bigjoiner + port sync at the same time.
+ */
return crtc_state1->hw.active && crtc_state2->hw.active &&
+ !crtc_state1->bigjoiner_pipes && !crtc_state2->bigjoiner_pipes &&
crtc_state1->output_types == crtc_state2->output_types &&
crtc_state1->output_format == crtc_state2->output_format &&
crtc_state1->lane_count == crtc_state2->lane_count &&
diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index ab2f52d21bad8b..8af9e6128277af 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -2709,15 +2709,6 @@ static void intel_set_pipe_src_size(const struct intel_crtc_state *crtc_state)
*/
intel_de_write(dev_priv, PIPESRC(pipe),
PIPESRC_WIDTH(width - 1) | PIPESRC_HEIGHT(height - 1));
-
- if (!crtc_state->enable_psr2_su_region_et)
- return;
-
- width = drm_rect_width(&crtc_state->psr2_su_area);
- height = drm_rect_height(&crtc_state->psr2_su_area);
-
- intel_de_write(dev_priv, PIPE_SRCSZ_ERLY_TPT(pipe),
- PIPESRC_WIDTH(width - 1) | PIPESRC_HEIGHT(height - 1));
}
static bool intel_pipe_is_interlaced(const struct intel_crtc_state *crtc_state)
diff --git a/drivers/gpu/drm/i915/display/intel_display_device.h b/drivers/gpu/drm/i915/display/intel_display_device.h
index fe42688137863c..9b1bce2624b9ea 100644
--- a/drivers/gpu/drm/i915/display/intel_display_device.h
+++ b/drivers/gpu/drm/i915/display/intel_display_device.h
@@ -47,6 +47,7 @@ struct drm_printer;
#define HAS_DPT(i915) (DISPLAY_VER(i915) >= 13)
#define HAS_DSB(i915) (DISPLAY_INFO(i915)->has_dsb)
#define HAS_DSC(__i915) (DISPLAY_RUNTIME_INFO(__i915)->has_dsc)
+#define HAS_DSC_MST(__i915) (DISPLAY_VER(__i915) >= 12 && HAS_DSC(__i915))
#define HAS_FBC(i915) (DISPLAY_RUNTIME_INFO(i915)->fbc_mask != 0)
#define HAS_FPGA_DBG_UNCLAIMED(i915) (DISPLAY_INFO(i915)->has_fpga_dbg)
#define HAS_FW_BLC(i915) (DISPLAY_VER(i915) >= 3)
diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h
index e67cd5b02e84ff..bf3f942e19c3d3 100644
--- a/drivers/gpu/drm/i915/display/intel_display_types.h
+++ b/drivers/gpu/drm/i915/display/intel_display_types.h
@@ -727,6 +727,7 @@ struct intel_plane_state {
#define PLANE_HAS_FENCE BIT(0)
struct intel_fb_view view;
+ u32 phys_dma_addr; /* for cursor_needs_physical */
/* Plane pxp decryption state */
bool decrypt;
@@ -1422,6 +1423,8 @@ struct intel_crtc_state {
u32 psr2_man_track_ctl;
+ u32 pipe_srcsz_early_tpt;
+
struct drm_rect psr2_su_area;
/* Variable Refresh Rate state */
diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
index f0c3ed37b350b9..e583515f9b25a3 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -67,6 +67,7 @@
#include "intel_dp_tunnel.h"
#include "intel_dpio_phy.h"
#include "intel_dpll.h"
+#include "intel_drrs.h"
#include "intel_fifo_underrun.h"
#include "intel_hdcp.h"
#include "intel_hdmi.h"
@@ -498,7 +499,7 @@ intel_dp_set_source_rates(struct intel_dp *intel_dp)
/* The values must be in increasing order */
static const int mtl_rates[] = {
162000, 216000, 243000, 270000, 324000, 432000, 540000, 675000,
- 810000, 1000000, 1350000, 2000000,
+ 810000, 1000000, 2000000,
};
static const int icl_rates[] = {
162000, 216000, 270000, 324000, 432000, 540000, 648000, 810000,
@@ -1421,7 +1422,8 @@ static bool intel_dp_source_supports_fec(struct intel_dp *intel_dp,
if (DISPLAY_VER(dev_priv) >= 12)
return true;
- if (DISPLAY_VER(dev_priv) == 11 && encoder->port != PORT_A)
+ if (DISPLAY_VER(dev_priv) == 11 && encoder->port != PORT_A &&
+ !intel_crtc_has_type(pipe_config, INTEL_OUTPUT_DP_MST))
return true;
return false;
@@ -1916,8 +1918,9 @@ icl_dsc_compute_link_config(struct intel_dp *intel_dp,
dsc_max_bpp = min(dsc_max_bpp, pipe_bpp - 1);
for (i = 0; i < ARRAY_SIZE(valid_dsc_bpp); i++) {
- if (valid_dsc_bpp[i] < dsc_min_bpp ||
- valid_dsc_bpp[i] > dsc_max_bpp)
+ if (valid_dsc_bpp[i] < dsc_min_bpp)
+ continue;
+ if (valid_dsc_bpp[i] > dsc_max_bpp)
break;
ret = dsc_compute_link_config(intel_dp,
@@ -2683,15 +2686,6 @@ intel_dp_compute_hdr_metadata_infoframe_sdp(struct intel_dp *intel_dp,
intel_hdmi_infoframe_enable(HDMI_PACKET_TYPE_GAMUT_METADATA);
}
-static bool cpu_transcoder_has_drrs(struct drm_i915_private *i915,
- enum transcoder cpu_transcoder)
-{
- if (HAS_DOUBLE_BUFFERED_M_N(i915))
- return true;
-
- return intel_cpu_transcoder_has_m2_n2(i915, cpu_transcoder);
-}
-
static bool can_enable_drrs(struct intel_connector *connector,
const struct intel_crtc_state *pipe_config,
const struct drm_display_mode *downclock_mode)
@@ -2714,7 +2708,7 @@ static bool can_enable_drrs(struct intel_connector *connector,
if (pipe_config->has_pch_encoder)
return false;
- if (!cpu_transcoder_has_drrs(i915, pipe_config->cpu_transcoder))
+ if (!intel_cpu_transcoder_has_drrs(i915, pipe_config->cpu_transcoder))
return false;
return downclock_mode &&
@@ -2731,7 +2725,11 @@ intel_dp_drrs_compute_config(struct intel_connector *connector,
intel_panel_downclock_mode(connector, &pipe_config->hw.adjusted_mode);
int pixel_clock;
- if (has_seamless_m_n(connector))
+ /*
+ * FIXME all joined pipes share the same transcoder.
+ * Need to account for that when updating M/N live.
+ */
+ if (has_seamless_m_n(connector) && !pipe_config->bigjoiner_pipes)
pipe_config->update_m_n = true;
if (!can_enable_drrs(connector, pipe_config, downclock_mode)) {
@@ -6565,6 +6563,7 @@ intel_dp_init_connector(struct intel_digital_port *dig_port,
intel_connector->get_hw_state = intel_ddi_connector_get_hw_state;
else
intel_connector->get_hw_state = intel_connector_get_hw_state;
+ intel_connector->sync_state = intel_dp_connector_sync_state;
if (!intel_edp_init_connector(intel_dp, intel_connector)) {
intel_dp_aux_fini(intel_dp);
diff --git a/drivers/gpu/drm/i915/display/intel_dp_hdcp.c b/drivers/gpu/drm/i915/display/intel_dp_hdcp.c
index b98a87883fefb0..9db43bd81ce2fa 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_hdcp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_hdcp.c
@@ -691,12 +691,15 @@ int intel_dp_hdcp_get_remote_capability(struct intel_connector *connector,
u8 bcaps;
int ret;
+ *hdcp_capable = false;
+ *hdcp2_capable = false;
if (!intel_encoder_is_mst(connector->encoder))
return -EINVAL;
ret = _intel_dp_hdcp2_get_capability(aux, hdcp2_capable);
if (ret)
- return ret;
+ drm_dbg_kms(&i915->drm,
+ "HDCP2 DPCD capability read failed err: %d\n", ret);
ret = intel_dp_hdcp_read_bcaps(aux, i915, &bcaps);
if (ret)
diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c
index 53aec023ce92fa..b651c990af85f7 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c
@@ -1355,7 +1355,7 @@ intel_dp_mst_mode_valid_ctx(struct drm_connector *connector,
return 0;
}
- if (DISPLAY_VER(dev_priv) >= 10 &&
+ if (HAS_DSC_MST(dev_priv) &&
drm_dp_sink_supports_dsc(intel_connector->dp.dsc_dpcd)) {
/*
* TBD pass the connector BPC,
diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
index ff480f171f75a2..b6d24410740f85 100644
--- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
+++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
@@ -2554,7 +2554,7 @@ static void icl_wrpll_params_populate(struct skl_wrpll_params *params,
static bool
ehl_combo_pll_div_frac_wa_needed(struct drm_i915_private *i915)
{
- return (((IS_ELKHARTLAKE(i915) || IS_JASPERLAKE(i915)) &&
+ return ((IS_ELKHARTLAKE(i915) &&
IS_DISPLAY_STEP(i915, STEP_B0, STEP_FOREVER)) ||
IS_TIGERLAKE(i915) || IS_ALDERLAKE_S(i915) || IS_ALDERLAKE_P(i915)) &&
i915->display.dpll.ref_clks.nssc == 38400;
diff --git a/drivers/gpu/drm/i915/display/intel_drrs.c b/drivers/gpu/drm/i915/display/intel_drrs.c
index 169ef38ff18833..597f8bd6aa1a0e 100644
--- a/drivers/gpu/drm/i915/display/intel_drrs.c
+++ b/drivers/gpu/drm/i915/display/intel_drrs.c
@@ -63,6 +63,15 @@ const char *intel_drrs_type_str(enum drrs_type drrs_type)
return str[drrs_type];
}
+bool intel_cpu_transcoder_has_drrs(struct drm_i915_private *i915,
+ enum transcoder cpu_transcoder)
+{
+ if (HAS_DOUBLE_BUFFERED_M_N(i915))
+ return true;
+
+ return intel_cpu_transcoder_has_m2_n2(i915, cpu_transcoder);
+}
+
static void
intel_drrs_set_refresh_rate_pipeconf(struct intel_crtc *crtc,
enum drrs_refresh_rate refresh_rate)
@@ -312,9 +321,8 @@ static int intel_drrs_debugfs_status_show(struct seq_file *m, void *unused)
mutex_lock(&crtc->drrs.mutex);
seq_printf(m, "DRRS capable: %s\n",
- str_yes_no(crtc_state->has_drrs ||
- HAS_DOUBLE_BUFFERED_M_N(i915) ||
- intel_cpu_transcoder_has_m2_n2(i915, crtc_state->cpu_transcoder)));
+ str_yes_no(intel_cpu_transcoder_has_drrs(i915,
+ crtc_state->cpu_transcoder)));
seq_printf(m, "DRRS enabled: %s\n",
str_yes_no(crtc_state->has_drrs));
diff --git a/drivers/gpu/drm/i915/display/intel_drrs.h b/drivers/gpu/drm/i915/display/intel_drrs.h
index 8ef5f93a80ffd5..0982f95eab727a 100644
--- a/drivers/gpu/drm/i915/display/intel_drrs.h
+++ b/drivers/gpu/drm/i915/display/intel_drrs.h
@@ -9,12 +9,15 @@
#include <linux/types.h>
enum drrs_type;
+enum transcoder;
struct drm_i915_private;
struct intel_atomic_state;
struct intel_crtc;
struct intel_crtc_state;
struct intel_connector;
+bool intel_cpu_transcoder_has_drrs(struct drm_i915_private *i915,
+ enum transcoder cpu_transcoder);
const char *intel_drrs_type_str(enum drrs_type drrs_type);
bool intel_drrs_is_active(struct intel_crtc *crtc);
void intel_drrs_activate(const struct intel_crtc_state *crtc_state);
diff --git a/drivers/gpu/drm/i915/display/intel_dsb.c b/drivers/gpu/drm/i915/display/intel_dsb.c
index d62e050185e7c3..e4515bf920388e 100644
--- a/drivers/gpu/drm/i915/display/intel_dsb.c
+++ b/drivers/gpu/drm/i915/display/intel_dsb.c
@@ -340,6 +340,17 @@ static int intel_dsb_dewake_scanline(const struct intel_crtc_state *crtc_state)
return max(0, vblank_start - intel_usecs_to_scanlines(adjusted_mode, latency));
}
+static u32 dsb_chicken(struct intel_crtc *crtc)
+{
+ if (crtc->mode_flags & I915_MODE_FLAG_VRR)
+ return DSB_CTRL_WAIT_SAFE_WINDOW |
+ DSB_CTRL_NO_WAIT_VBLANK |
+ DSB_INST_WAIT_SAFE_WINDOW |
+ DSB_INST_NO_WAIT_VBLANK;
+ else
+ return 0;
+}
+
static void _intel_dsb_commit(struct intel_dsb *dsb, u32 ctrl,
int dewake_scanline)
{
@@ -361,6 +372,9 @@ static void _intel_dsb_commit(struct intel_dsb *dsb, u32 ctrl,
intel_de_write_fw(dev_priv, DSB_CTRL(pipe, dsb->id),
ctrl | DSB_ENABLE);
+ intel_de_write_fw(dev_priv, DSB_CHICKEN(pipe, dsb->id),
+ dsb_chicken(crtc));
+
intel_de_write_fw(dev_priv, DSB_HEAD(pipe, dsb->id),
intel_dsb_buffer_ggtt_offset(&dsb->dsb_buf));
diff --git a/drivers/gpu/drm/i915/display/intel_fb_pin.c b/drivers/gpu/drm/i915/display/intel_fb_pin.c
index 7b42aef37d2f72..b6df9baf481b69 100644
--- a/drivers/gpu/drm/i915/display/intel_fb_pin.c
+++ b/drivers/gpu/drm/i915/display/intel_fb_pin.c
@@ -255,6 +255,16 @@ int intel_plane_pin_fb(struct intel_plane_state *plane_state)
return PTR_ERR(vma);
plane_state->ggtt_vma = vma;
+
+ /*
+ * Pre-populate the dma address before we enter the vblank
+ * evade critical section as i915_gem_object_get_dma_address()
+ * will trigger might_sleep() even if it won't actually sleep,
+ * which is the case when the fb has already been pinned.
+ */
+ if (phys_cursor)
+ plane_state->phys_dma_addr =
+ i915_gem_object_get_dma_address(intel_fb_obj(fb), 0);
} else {
struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb);
diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c
index 6927785fd6ff2f..aabd018bd73743 100644
--- a/drivers/gpu/drm/i915/display/intel_psr.c
+++ b/drivers/gpu/drm/i915/display/intel_psr.c
@@ -1422,6 +1422,17 @@ void intel_psr_compute_config(struct intel_dp *intel_dp,
return;
}
+ /*
+ * FIXME figure out what is wrong with PSR+bigjoiner and
+ * fix it. Presumably something related to the fact that
+ * PSR is a transcoder level feature.
+ */
+ if (crtc_state->bigjoiner_pipes) {
+ drm_dbg_kms(&dev_priv->drm,
+ "PSR disabled due to bigjoiner\n");
+ return;
+ }
+
if (CAN_PANEL_REPLAY(intel_dp))
crtc_state->has_panel_replay = true;
else
@@ -1994,6 +2005,7 @@ static void psr_force_hw_tracking_exit(struct intel_dp *intel_dp)
void intel_psr2_program_trans_man_trk_ctl(const struct intel_crtc_state *crtc_state)
{
+ struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
enum transcoder cpu_transcoder = crtc_state->cpu_transcoder;
struct intel_encoder *encoder;
@@ -2013,6 +2025,12 @@ void intel_psr2_program_trans_man_trk_ctl(const struct intel_crtc_state *crtc_st
intel_de_write(dev_priv, PSR2_MAN_TRK_CTL(cpu_transcoder),
crtc_state->psr2_man_track_ctl);
+
+ if (!crtc_state->enable_psr2_su_region_et)
+ return;
+
+ intel_de_write(dev_priv, PIPE_SRCSZ_ERLY_TPT(crtc->pipe),
+ crtc_state->pipe_srcsz_early_tpt);
}
static void psr2_man_trk_ctl_calc(struct intel_crtc_state *crtc_state,
@@ -2051,6 +2069,20 @@ exit:
crtc_state->psr2_man_track_ctl = val;
}
+static u32 psr2_pipe_srcsz_early_tpt_calc(struct intel_crtc_state *crtc_state,
+ bool full_update)
+{
+ int width, height;
+
+ if (!crtc_state->enable_psr2_su_region_et || full_update)
+ return 0;
+
+ width = drm_rect_width(&crtc_state->psr2_su_area);
+ height = drm_rect_height(&crtc_state->psr2_su_area);
+
+ return PIPESRC_WIDTH(width - 1) | PIPESRC_HEIGHT(height - 1);
+}
+
static void clip_area_update(struct drm_rect *overlap_damage_area,
struct drm_rect *damage_area,
struct drm_rect *pipe_src)
@@ -2095,21 +2127,36 @@ static void intel_psr2_sel_fetch_pipe_alignment(struct intel_crtc_state *crtc_st
* cursor fully when cursor is in SU area.
*/
static void
-intel_psr2_sel_fetch_et_alignment(struct intel_crtc_state *crtc_state,
- struct intel_plane_state *cursor_state)
+intel_psr2_sel_fetch_et_alignment(struct intel_atomic_state *state,
+ struct intel_crtc *crtc)
{
- struct drm_rect inter;
+ struct intel_crtc_state *crtc_state = intel_atomic_get_new_crtc_state(state, crtc);
+ struct intel_plane_state *new_plane_state;
+ struct intel_plane *plane;
+ int i;
- if (!crtc_state->enable_psr2_su_region_et ||
- !cursor_state->uapi.visible)
+ if (!crtc_state->enable_psr2_su_region_et)
return;
- inter = crtc_state->psr2_su_area;
- if (!drm_rect_intersect(&inter, &cursor_state->uapi.dst))
- return;
+ for_each_new_intel_plane_in_state(state, plane, new_plane_state, i) {
+ struct drm_rect inter;
+
+ if (new_plane_state->uapi.crtc != crtc_state->uapi.crtc)
+ continue;
+
+ if (plane->id != PLANE_CURSOR)
+ continue;
+
+ if (!new_plane_state->uapi.visible)
+ continue;
- clip_area_update(&crtc_state->psr2_su_area, &cursor_state->uapi.dst,
- &crtc_state->pipe_src);
+ inter = crtc_state->psr2_su_area;
+ if (!drm_rect_intersect(&inter, &new_plane_state->uapi.dst))
+ continue;
+
+ clip_area_update(&crtc_state->psr2_su_area, &new_plane_state->uapi.dst,
+ &crtc_state->pipe_src);
+ }
}
/*
@@ -2152,8 +2199,7 @@ int intel_psr2_sel_fetch_update(struct intel_atomic_state *state,
{
struct drm_i915_private *dev_priv = to_i915(state->base.dev);
struct intel_crtc_state *crtc_state = intel_atomic_get_new_crtc_state(state, crtc);
- struct intel_plane_state *new_plane_state, *old_plane_state,
- *cursor_plane_state = NULL;
+ struct intel_plane_state *new_plane_state, *old_plane_state;
struct intel_plane *plane;
bool full_update = false;
int i, ret;
@@ -2238,13 +2284,6 @@ int intel_psr2_sel_fetch_update(struct intel_atomic_state *state,
damaged_area.x2 += new_plane_state->uapi.dst.x1 - src.x1;
clip_area_update(&crtc_state->psr2_su_area, &damaged_area, &crtc_state->pipe_src);
-
- /*
- * Cursor plane new state is stored to adjust su area to cover
- * cursor are fully.
- */
- if (plane->id == PLANE_CURSOR)
- cursor_plane_state = new_plane_state;
}
/*
@@ -2273,9 +2312,13 @@ int intel_psr2_sel_fetch_update(struct intel_atomic_state *state,
if (ret)
return ret;
- /* Adjust su area to cover cursor fully as necessary */
- if (cursor_plane_state)
- intel_psr2_sel_fetch_et_alignment(crtc_state, cursor_plane_state);
+ /*
+ * Adjust su area to cover cursor fully as necessary (early
+ * transport). This needs to be done after
+ * drm_atomic_add_affected_planes to ensure visible cursor is added into
+ * affected planes even when cursor is not updated by itself.
+ */
+ intel_psr2_sel_fetch_et_alignment(state, crtc);
intel_psr2_sel_fetch_pipe_alignment(crtc_state);
@@ -2338,6 +2381,8 @@ int intel_psr2_sel_fetch_update(struct intel_atomic_state *state,
skip_sel_fetch_set_loop:
psr2_man_trk_ctl_calc(crtc_state, full_update);
+ crtc_state->pipe_srcsz_early_tpt =
+ psr2_pipe_srcsz_early_tpt_calc(crtc_state, full_update);
return 0;
}
diff --git a/drivers/gpu/drm/i915/display/intel_sdvo.c b/drivers/gpu/drm/i915/display/intel_sdvo.c
index 5f9e748adc89ee..0cd9c183f6212f 100644
--- a/drivers/gpu/drm/i915/display/intel_sdvo.c
+++ b/drivers/gpu/drm/i915/display/intel_sdvo.c
@@ -1842,8 +1842,6 @@ static void intel_disable_sdvo(struct intel_atomic_state *state,
struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->uapi.crtc);
u32 temp;
- encoder->audio_disable(encoder, old_crtc_state, conn_state);
-
intel_sdvo_set_active_outputs(intel_sdvo, 0);
if (0)
intel_sdvo_set_encoder_power_state(intel_sdvo,
@@ -1935,8 +1933,6 @@ static void intel_enable_sdvo(struct intel_atomic_state *state,
intel_sdvo_set_encoder_power_state(intel_sdvo,
DRM_MODE_DPMS_ON);
intel_sdvo_set_active_outputs(intel_sdvo, intel_sdvo_connector->output_flag);
-
- encoder->audio_enable(encoder, pipe_config, conn_state);
}
static enum drm_mode_status
diff --git a/drivers/gpu/drm/i915/display/intel_vrr.c b/drivers/gpu/drm/i915/display/intel_vrr.c
index 5d905f932cb4b3..f542ee1db1d970 100644
--- a/drivers/gpu/drm/i915/display/intel_vrr.c
+++ b/drivers/gpu/drm/i915/display/intel_vrr.c
@@ -117,6 +117,13 @@ intel_vrr_compute_config(struct intel_crtc_state *crtc_state,
const struct drm_display_info *info = &connector->base.display_info;
int vmin, vmax;
+ /*
+ * FIXME all joined pipes share the same transcoder.
+ * Need to account for that during VRR toggle/push/etc.
+ */
+ if (crtc_state->bigjoiner_pipes)
+ return;
+
if (adjusted_mode->flags & DRM_MODE_FLAG_INTERLACE)
return;
@@ -187,10 +194,11 @@ void intel_vrr_set_transcoder_timings(const struct intel_crtc_state *crtc_state)
enum transcoder cpu_transcoder = crtc_state->cpu_transcoder;
/*
- * TRANS_SET_CONTEXT_LATENCY with VRR enabled
- * requires this chicken bit on ADL/DG2.
+ * This bit seems to have two meanings depending on the platform:
+ * TGL: generate VRR "safe window" for DSB vblank waits
+ * ADL/DG2: make TRANS_SET_CONTEXT_LATENCY effective with VRR
*/
- if (DISPLAY_VER(dev_priv) == 13)
+ if (IS_DISPLAY_VER(dev_priv, 12, 13))
intel_de_rmw(dev_priv, CHICKEN_TRANS(cpu_transcoder),
0, PIPE_VBLANK_WITH_DELAY);
diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c
index e941e2e4fd14c2..860574d04f881a 100644
--- a/drivers/gpu/drm/i915/display/skl_universal_plane.c
+++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c
@@ -2295,6 +2295,9 @@ static u8 skl_get_plane_caps(struct drm_i915_private *i915,
if (HAS_4TILE(i915))
caps |= INTEL_PLANE_CAP_TILING_4;
+ if (!IS_ENABLED(I915) && !HAS_FLAT_CCS(i915))
+ return caps;
+
if (skl_plane_has_rc_ccs(i915, pipe, plane_id)) {
caps |= INTEL_PLANE_CAP_CCS_RC;
if (DISPLAY_VER(i915) >= 12)
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index fa46d2308b0ed3..81bf2216371be6 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -961,6 +961,9 @@ static int gen8_init_rsvd(struct i915_address_space *vm)
struct i915_vma *vma;
int ret;
+ if (!intel_gt_needs_wa_16018031267(vm->gt))
+ return 0;
+
/* The memory will be used only by GPU. */
obj = i915_gem_object_create_lmem(i915, PAGE_SIZE,
I915_BO_ALLOC_VOLATILE |
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 1ade568ffbfa43..7a6dc371c384eb 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -908,6 +908,23 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt)
info->engine_mask &= ~BIT(GSC0);
}
+ /*
+ * Do not create the command streamer for CCS slices beyond the first.
+ * All the workload submitted to the first engine will be shared among
+ * all the slices.
+ *
+ * Once the user will be allowed to customize the CCS mode, then this
+ * check needs to be removed.
+ */
+ if (IS_DG2(gt->i915)) {
+ u8 first_ccs = __ffs(CCS_MASK(gt));
+
+ /* Mask off all the CCS engine */
+ info->engine_mask &= ~GENMASK(CCS3, CCS0);
+ /* Put back in the first CCS engine */
+ info->engine_mask |= BIT(_CCS(first_ccs));
+ }
+
return info->engine_mask;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index 96bdb93a948d1b..fb7bff27b45a34 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -279,9 +279,6 @@ static int __engine_park(struct intel_wakeref *wf)
intel_engine_park_heartbeat(engine);
intel_breadcrumbs_park(engine->breadcrumbs);
- /* Must be reset upon idling, or we may miss the busy wakeup. */
- GEM_BUG_ON(engine->sched_engine->queue_priority_hint != INT_MIN);
-
if (engine->park)
engine->park(engine);
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index 42aade0faf2d14..b061a0a0d6b082 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -3272,6 +3272,9 @@ static void execlists_park(struct intel_engine_cs *engine)
{
cancel_timer(&engine->execlists.timer);
cancel_timer(&engine->execlists.preempt);
+
+ /* Reset upon idling, or we may delay the busy wakeup. */
+ WRITE_ONCE(engine->sched_engine->queue_priority_hint, INT_MIN);
}
static void add_to_engine(struct i915_request *rq)
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index a425db5ed3a22c..6a2c2718bcc38e 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -1024,6 +1024,12 @@ enum i915_map_type intel_gt_coherent_map_type(struct intel_gt *gt,
return I915_MAP_WC;
}
+bool intel_gt_needs_wa_16018031267(struct intel_gt *gt)
+{
+ /* Wa_16018031267, Wa_16018063123 */
+ return IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 55), IP_VER(12, 71));
+}
+
bool intel_gt_needs_wa_22016122933(struct intel_gt *gt)
{
return MEDIA_VER_FULL(gt->i915) == IP_VER(13, 0) && gt->type == GT_MEDIA;
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h
index 608f5c87292857..003eb93b826fd0 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt.h
@@ -82,17 +82,18 @@ struct drm_printer;
##__VA_ARGS__); \
} while (0)
-#define NEEDS_FASTCOLOR_BLT_WABB(engine) ( \
- IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 55), IP_VER(12, 71)) && \
- engine->class == COPY_ENGINE_CLASS && engine->instance == 0)
-
static inline bool gt_is_root(struct intel_gt *gt)
{
return !gt->info.id;
}
+bool intel_gt_needs_wa_16018031267(struct intel_gt *gt);
bool intel_gt_needs_wa_22016122933(struct intel_gt *gt);
+#define NEEDS_FASTCOLOR_BLT_WABB(engine) ( \
+ intel_gt_needs_wa_16018031267(engine->gt) && \
+ engine->class == COPY_ENGINE_CLASS && engine->instance == 0)
+
static inline struct intel_gt *uc_to_gt(struct intel_uc *uc)
{
return container_of(uc, struct intel_gt, uc);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c
new file mode 100644
index 00000000000000..044219c5960a53
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#include "i915_drv.h"
+#include "intel_gt.h"
+#include "intel_gt_ccs_mode.h"
+#include "intel_gt_regs.h"
+
+void intel_gt_apply_ccs_mode(struct intel_gt *gt)
+{
+ int cslice;
+ u32 mode = 0;
+ int first_ccs = __ffs(CCS_MASK(gt));
+
+ if (!IS_DG2(gt->i915))
+ return;
+
+ /* Build the value for the fixed CCS load balancing */
+ for (cslice = 0; cslice < I915_MAX_CCS; cslice++) {
+ if (CCS_MASK(gt) & BIT(cslice))
+ /*
+ * If available, assign the cslice
+ * to the first available engine...
+ */
+ mode |= XEHP_CCS_MODE_CSLICE(cslice, first_ccs);
+
+ else
+ /*
+ * ... otherwise, mark the cslice as
+ * unavailable if no CCS dispatches here
+ */
+ mode |= XEHP_CCS_MODE_CSLICE(cslice,
+ XEHP_CCS_MODE_CSLICE_MASK);
+ }
+
+ intel_uncore_write(gt->uncore, XEHP_CCS_MODE, mode);
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h
new file mode 100644
index 00000000000000..9e5549caeb2693
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#ifndef __INTEL_GT_CCS_MODE_H__
+#define __INTEL_GT_CCS_MODE_H__
+
+struct intel_gt;
+
+void intel_gt_apply_ccs_mode(struct intel_gt *gt);
+
+#endif /* __INTEL_GT_CCS_MODE_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index 50962cfd1353ae..743fe356672274 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -1477,8 +1477,14 @@
#define ECOBITS_PPGTT_CACHE4B (0 << 8)
#define GEN12_RCU_MODE _MMIO(0x14800)
+#define XEHP_RCU_MODE_FIXED_SLICE_CCS_MODE REG_BIT(1)
#define GEN12_RCU_MODE_CCS_ENABLE REG_BIT(0)
+#define XEHP_CCS_MODE _MMIO(0x14804)
+#define XEHP_CCS_MODE_CSLICE_MASK REG_GENMASK(2, 0) /* CCS0-3 + rsvd */
+#define XEHP_CCS_MODE_CSLICE_WIDTH ilog2(XEHP_CCS_MODE_CSLICE_MASK + 1)
+#define XEHP_CCS_MODE_CSLICE(cslice, ccs) (ccs << (cslice * XEHP_CCS_MODE_CSLICE_WIDTH))
+
#define CHV_FUSE_GT _MMIO(VLV_GUNIT_BASE + 0x2168)
#define CHV_FGT_DISABLE_SS0 (1 << 10)
#define CHV_FGT_DISABLE_SS1 (1 << 11)
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index d67d44611c2834..6ec3582c973577 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -10,6 +10,7 @@
#include "intel_engine_regs.h"
#include "intel_gpu_commands.h"
#include "intel_gt.h"
+#include "intel_gt_ccs_mode.h"
#include "intel_gt_mcr.h"
#include "intel_gt_print.h"
#include "intel_gt_regs.h"
@@ -51,7 +52,8 @@
* registers belonging to BCS, VCS or VECS should be implemented in
* xcs_engine_wa_init(). Workarounds for registers not belonging to a specific
* engine's MMIO range but that are part of of the common RCS/CCS reset domain
- * should be implemented in general_render_compute_wa_init().
+ * should be implemented in general_render_compute_wa_init(). The settings
+ * about the CCS load balancing should be added in ccs_engine_wa_mode().
*
* - GT workarounds: the list of these WAs is applied whenever these registers
* revert to their default values: on GPU reset, suspend/resume [1]_, etc.
@@ -1653,6 +1655,7 @@ static void
xelpg_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
{
/* Wa_14018575942 / Wa_18018781329 */
+ wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB);
wa_mcr_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB);
/* Wa_22016670082 */
@@ -2853,6 +2856,28 @@ add_render_compute_tuning_settings(struct intel_gt *gt,
wa_write_clr(wal, GEN8_GARBCNTL, GEN12_BUS_HASH_CTL_BIT_EXC);
}
+static void ccs_engine_wa_mode(struct intel_engine_cs *engine, struct i915_wa_list *wal)
+{
+ struct intel_gt *gt = engine->gt;
+
+ if (!IS_DG2(gt->i915))
+ return;
+
+ /*
+ * Wa_14019159160: This workaround, along with others, leads to
+ * significant challenges in utilizing load balancing among the
+ * CCS slices. Consequently, an architectural decision has been
+ * made to completely disable automatic CCS load balancing.
+ */
+ wa_masked_en(wal, GEN12_RCU_MODE, XEHP_RCU_MODE_FIXED_SLICE_CCS_MODE);
+
+ /*
+ * After having disabled automatic load balancing we need to
+ * assign all slices to a single CCS. We will call it CCS mode 1
+ */
+ intel_gt_apply_ccs_mode(gt);
+}
+
/*
* The workarounds in this function apply to shared registers in
* the general render reset domain that aren't tied to a
@@ -3003,8 +3028,10 @@ engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal
* to a single RCS/CCS engine's workaround list since
* they're reset as part of the general render domain reset.
*/
- if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE)
+ if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE) {
general_render_compute_wa_init(engine, wal);
+ ccs_engine_wa_mode(engine, wal);
+ }
if (engine->class == COMPUTE_CLASS)
ccs_engine_wa_init(engine, wal);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index f3dcae4b9d455e..0f83c6d4376ffb 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -1403,14 +1403,17 @@ static void guc_cancel_busyness_worker(struct intel_guc *guc)
* Trying to pass a 'need_sync' or 'in_reset' flag all the way down through
* every possible call stack is unfeasible. It would be too intrusive to many
* areas that really don't care about the GuC backend. However, there is the
- * 'reset_in_progress' flag available, so just use that.
+ * I915_RESET_BACKOFF flag and the gt->reset.mutex can be tested for is_locked.
+ * So just use those. Note that testing both is required due to the hideously
+ * complex nature of the i915 driver's reset code paths.
*
* And note that in the case of a reset occurring during driver unload
- * (wedge_on_fini), skipping the cancel in _prepare (when the reset flag is set
- * is fine because there is another cancel in _finish (when the reset flag is
- * not).
+ * (wedged_on_fini), skipping the cancel in reset_prepare/reset_fini (when the
+ * reset flag/mutex are set) is fine because there is another explicit cancel in
+ * intel_guc_submission_fini (when the reset flag/mutex are not).
*/
- if (guc_to_gt(guc)->uc.reset_in_progress)
+ if (mutex_is_locked(&guc_to_gt(guc)->reset.mutex) ||
+ test_bit(I915_RESET_BACKOFF, &guc_to_gt(guc)->reset.flags))
cancel_delayed_work(&guc->timestamp.work);
else
cancel_delayed_work_sync(&guc->timestamp.work);
@@ -1424,8 +1427,6 @@ static void __reset_guc_busyness_stats(struct intel_guc *guc)
unsigned long flags;
ktime_t unused;
- guc_cancel_busyness_worker(guc);
-
spin_lock_irqsave(&guc->timestamp.lock, flags);
guc_update_pm_timestamp(guc, &unused);
@@ -2004,13 +2005,6 @@ void intel_guc_submission_cancel_requests(struct intel_guc *guc)
void intel_guc_submission_reset_finish(struct intel_guc *guc)
{
- /*
- * Ensure the busyness worker gets cancelled even on a fatal wedge.
- * Note that reset_prepare is not allowed to because it confuses lockdep.
- */
- if (guc_submission_initialized(guc))
- guc_cancel_busyness_worker(guc);
-
/* Reset called during driver load or during wedge? */
if (unlikely(!guc_submission_initialized(guc) ||
!intel_guc_is_fw_running(guc) ||
@@ -2136,6 +2130,7 @@ void intel_guc_submission_fini(struct intel_guc *guc)
if (!guc->submission_initialized)
return;
+ guc_fini_engine_stats(guc);
guc_flush_destroyed_contexts(guc);
guc_lrc_desc_pool_destroy_v69(guc);
i915_sched_engine_put(guc->sched_engine);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
index 6dfe5d9456c69e..399bc319180b04 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
@@ -637,6 +637,10 @@ void intel_uc_reset_finish(struct intel_uc *uc)
{
struct intel_guc *guc = &uc->guc;
+ /*
+ * NB: The wedge code path results in prepare -> prepare -> finish -> finish.
+ * So this function is sometimes called with the in-progress flag not set.
+ */
uc->reset_in_progress = false;
/* Firmware expected to be running when this function is called */
diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c
index 9ee902d5b72c49..4b9233c07a22c6 100644
--- a/drivers/gpu/drm/i915/i915_driver.c
+++ b/drivers/gpu/drm/i915/i915_driver.c
@@ -800,7 +800,7 @@ int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
goto out_cleanup_modeset2;
ret = intel_pxp_init(i915);
- if (ret != -ENODEV)
+ if (ret && ret != -ENODEV)
drm_dbg(&i915->drm, "pxp init failed with %d\n", ret);
ret = intel_display_driver_probe(i915);
diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c
index 8c3f443c8347e0..b758fd110c2045 100644
--- a/drivers/gpu/drm/i915/i915_hwmon.c
+++ b/drivers/gpu/drm/i915/i915_hwmon.c
@@ -72,12 +72,13 @@ hwm_locked_with_pm_intel_uncore_rmw(struct hwm_drvdata *ddat,
struct intel_uncore *uncore = ddat->uncore;
intel_wakeref_t wakeref;
- mutex_lock(&hwmon->hwmon_lock);
+ with_intel_runtime_pm(uncore->rpm, wakeref) {
+ mutex_lock(&hwmon->hwmon_lock);
- with_intel_runtime_pm(uncore->rpm, wakeref)
intel_uncore_rmw(uncore, reg, clear, set);
- mutex_unlock(&hwmon->hwmon_lock);
+ mutex_unlock(&hwmon->hwmon_lock);
+ }
}
/*
@@ -136,20 +137,21 @@ hwm_energy(struct hwm_drvdata *ddat, long *energy)
else
rgaddr = hwmon->rg.energy_status_all;
- mutex_lock(&hwmon->hwmon_lock);
+ with_intel_runtime_pm(uncore->rpm, wakeref) {
+ mutex_lock(&hwmon->hwmon_lock);
- with_intel_runtime_pm(uncore->rpm, wakeref)
reg_val = intel_uncore_read(uncore, rgaddr);
- if (reg_val >= ei->reg_val_prev)
- ei->accum_energy += reg_val - ei->reg_val_prev;
- else
- ei->accum_energy += UINT_MAX - ei->reg_val_prev + reg_val;
- ei->reg_val_prev = reg_val;
+ if (reg_val >= ei->reg_val_prev)
+ ei->accum_energy += reg_val - ei->reg_val_prev;
+ else
+ ei->accum_energy += UINT_MAX - ei->reg_val_prev + reg_val;
+ ei->reg_val_prev = reg_val;
- *energy = mul_u64_u32_shr(ei->accum_energy, SF_ENERGY,
- hwmon->scl_shift_energy);
- mutex_unlock(&hwmon->hwmon_lock);
+ *energy = mul_u64_u32_shr(ei->accum_energy, SF_ENERGY,
+ hwmon->scl_shift_energy);
+ mutex_unlock(&hwmon->hwmon_lock);
+ }
}
static ssize_t
@@ -404,6 +406,7 @@ hwm_power_max_write(struct hwm_drvdata *ddat, long val)
/* Block waiting for GuC reset to complete when needed */
for (;;) {
+ wakeref = intel_runtime_pm_get(ddat->uncore->rpm);
mutex_lock(&hwmon->hwmon_lock);
prepare_to_wait(&ddat->waitq, &wait, TASK_INTERRUPTIBLE);
@@ -417,14 +420,13 @@ hwm_power_max_write(struct hwm_drvdata *ddat, long val)
}
mutex_unlock(&hwmon->hwmon_lock);
+ intel_runtime_pm_put(ddat->uncore->rpm, wakeref);
schedule();
}
finish_wait(&ddat->waitq, &wait);
if (ret)
- goto unlock;
-
- wakeref = intel_runtime_pm_get(ddat->uncore->rpm);
+ goto exit;
/* Disable PL1 limit and verify, because the limit cannot be disabled on all platforms */
if (val == PL1_DISABLE) {
@@ -444,9 +446,8 @@ hwm_power_max_write(struct hwm_drvdata *ddat, long val)
intel_uncore_rmw(ddat->uncore, hwmon->rg.pkg_rapl_limit,
PKG_PWR_LIM_1_EN | PKG_PWR_LIM_1, nval);
exit:
- intel_runtime_pm_put(ddat->uncore->rpm, wakeref);
-unlock:
mutex_unlock(&hwmon->hwmon_lock);
+ intel_runtime_pm_put(ddat->uncore->rpm, wakeref);
return ret;
}
diff --git a/drivers/gpu/drm/i915/i915_memcpy.c b/drivers/gpu/drm/i915/i915_memcpy.c
index ba82277254b762..cc41974cee7462 100644
--- a/drivers/gpu/drm/i915/i915_memcpy.c
+++ b/drivers/gpu/drm/i915/i915_memcpy.c
@@ -25,6 +25,8 @@
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/cpufeature.h>
+#include <linux/bug.h>
+#include <linux/build_bug.h>
#include <asm/fpu/api.h>
#include "i915_memcpy.h"
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index e00557e1a57f0c..3b2e49ce29ba03 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -4599,7 +4599,7 @@
#define MTL_CHICKEN_TRANS(trans) _MMIO_TRANS((trans), \
_MTL_CHICKEN_TRANS_A, \
_MTL_CHICKEN_TRANS_B)
-#define PIPE_VBLANK_WITH_DELAY REG_BIT(31) /* ADL/DG2 */
+#define PIPE_VBLANK_WITH_DELAY REG_BIT(31) /* tgl+ */
#define SKL_UNMASK_VBL_TO_PIPE_IN_SRD REG_BIT(30) /* skl+ */
#define HSW_FRAME_START_DELAY_MASK REG_GENMASK(28, 27)
#define HSW_FRAME_START_DELAY(x) REG_FIELD_PREP(HSW_FRAME_START_DELAY_MASK, x)
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index d09aad34ba37fa..b70715b1411d67 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -34,6 +34,7 @@
#include "gt/intel_engine.h"
#include "gt/intel_engine_heartbeat.h"
#include "gt/intel_gt.h"
+#include "gt/intel_gt_pm.h"
#include "gt/intel_gt_requests.h"
#include "gt/intel_tlb.h"
@@ -103,12 +104,42 @@ static inline struct i915_vma *active_to_vma(struct i915_active *ref)
static int __i915_vma_active(struct i915_active *ref)
{
- return i915_vma_tryget(active_to_vma(ref)) ? 0 : -ENOENT;
+ struct i915_vma *vma = active_to_vma(ref);
+
+ if (!i915_vma_tryget(vma))
+ return -ENOENT;
+
+ /*
+ * Exclude global GTT VMA from holding a GT wakeref
+ * while active, otherwise GPU never goes idle.
+ */
+ if (!i915_vma_is_ggtt(vma)) {
+ /*
+ * Since we and our _retire() counterpart can be
+ * called asynchronously, storing a wakeref tracking
+ * handle inside struct i915_vma is not safe, and
+ * there is no other good place for that. Hence,
+ * use untracked variants of intel_gt_pm_get/put().
+ */
+ intel_gt_pm_get_untracked(vma->vm->gt);
+ }
+
+ return 0;
}
static void __i915_vma_retire(struct i915_active *ref)
{
- i915_vma_put(active_to_vma(ref));
+ struct i915_vma *vma = active_to_vma(ref);
+
+ if (!i915_vma_is_ggtt(vma)) {
+ /*
+ * Since we can be called from atomic contexts,
+ * use an async variant of intel_gt_pm_put().
+ */
+ intel_gt_pm_put_async_untracked(vma->vm->gt);
+ }
+
+ i915_vma_put(vma);
}
static struct i915_vma *
@@ -1404,7 +1435,7 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
struct i915_vma_work *work = NULL;
struct dma_fence *moving = NULL;
struct i915_vma_resource *vma_res = NULL;
- intel_wakeref_t wakeref = 0;
+ intel_wakeref_t wakeref;
unsigned int bound;
int err;
@@ -1424,8 +1455,14 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
if (err)
return err;
- if (flags & PIN_GLOBAL)
- wakeref = intel_runtime_pm_get(&vma->vm->i915->runtime_pm);
+ /*
+ * In case of a global GTT, we must hold a runtime-pm wakeref
+ * while global PTEs are updated. In other cases, we hold
+ * the rpm reference while the VMA is active. Since runtime
+ * resume may require allocations, which are forbidden inside
+ * vm->mutex, get the first rpm wakeref outside of the mutex.
+ */
+ wakeref = intel_runtime_pm_get(&vma->vm->i915->runtime_pm);
if (flags & vma->vm->bind_async_flags) {
/* lock VM */
@@ -1561,8 +1598,7 @@ err_fence:
if (work)
dma_fence_work_commit_imm(&work->base);
err_rpm:
- if (wakeref)
- intel_runtime_pm_put(&vma->vm->i915->runtime_pm, wakeref);
+ intel_runtime_pm_put(&vma->vm->i915->runtime_pm, wakeref);
if (moving)
dma_fence_put(moving);
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index 0674aca0f8a3f5..cf0b1de1c07124 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -1377,6 +1377,10 @@ static void a6xx_calc_ubwc_config(struct adreno_gpu *gpu)
if (adreno_is_a618(gpu))
gpu->ubwc_config.highest_bank_bit = 14;
+ if (adreno_is_a619(gpu))
+ /* TODO: Should be 14 but causes corruption at e.g. 1920x1200 on DP */
+ gpu->ubwc_config.highest_bank_bit = 13;
+
if (adreno_is_a619_holi(gpu))
gpu->ubwc_config.highest_bank_bit = 13;
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c
index 1f5245fc2cdc6c..a847a0f7a73c9f 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c
@@ -852,7 +852,7 @@ static void a6xx_get_shader_block(struct msm_gpu *gpu,
(block->type << 8) | i);
in += CRASHDUMP_READ(in, REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE,
- block->size, dumper->iova + A6XX_CD_DATA_OFFSET);
+ block->size, out);
out += block->size * sizeof(u32);
}
diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_2_x1e80100.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_2_x1e80100.h
index 9a9f7092c526a6..a3e60ac70689e7 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_2_x1e80100.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_2_x1e80100.h
@@ -324,6 +324,7 @@ static const struct dpu_wb_cfg x1e80100_wb[] = {
},
};
+/* TODO: INTF 3, 8 and 7 are used for MST, marked as INTF_NONE for now */
static const struct dpu_intf_cfg x1e80100_intf[] = {
{
.name = "intf_0", .id = INTF_0,
@@ -358,8 +359,8 @@ static const struct dpu_intf_cfg x1e80100_intf[] = {
.name = "intf_3", .id = INTF_3,
.base = 0x37000, .len = 0x280,
.features = INTF_SC7280_MASK,
- .type = INTF_DP,
- .controller_id = MSM_DP_CONTROLLER_1,
+ .type = INTF_NONE,
+ .controller_id = MSM_DP_CONTROLLER_0, /* pair with intf_0 for DP MST */
.prog_fetch_lines_worst_case = 24,
.intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 30),
.intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 31),
@@ -368,7 +369,7 @@ static const struct dpu_intf_cfg x1e80100_intf[] = {
.base = 0x38000, .len = 0x280,
.features = INTF_SC7280_MASK,
.type = INTF_DP,
- .controller_id = MSM_DP_CONTROLLER_2,
+ .controller_id = MSM_DP_CONTROLLER_1,
.prog_fetch_lines_worst_case = 24,
.intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 20),
.intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 21),
@@ -381,6 +382,33 @@ static const struct dpu_intf_cfg x1e80100_intf[] = {
.prog_fetch_lines_worst_case = 24,
.intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 22),
.intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 23),
+ }, {
+ .name = "intf_6", .id = INTF_6,
+ .base = 0x3A000, .len = 0x280,
+ .features = INTF_SC7280_MASK,
+ .type = INTF_DP,
+ .controller_id = MSM_DP_CONTROLLER_2,
+ .prog_fetch_lines_worst_case = 24,
+ .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 17),
+ .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 16),
+ }, {
+ .name = "intf_7", .id = INTF_7,
+ .base = 0x3b000, .len = 0x280,
+ .features = INTF_SC7280_MASK,
+ .type = INTF_NONE,
+ .controller_id = MSM_DP_CONTROLLER_2, /* pair with intf_6 for DP MST */
+ .prog_fetch_lines_worst_case = 24,
+ .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 18),
+ .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 19),
+ }, {
+ .name = "intf_8", .id = INTF_8,
+ .base = 0x3c000, .len = 0x280,
+ .features = INTF_SC7280_MASK,
+ .type = INTF_NONE,
+ .controller_id = MSM_DP_CONTROLLER_1, /* pair with intf_4 for DP MST */
+ .prog_fetch_lines_worst_case = 24,
+ .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 12),
+ .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 13),
},
};
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c
index ef871239adb2a3..68fae048a9a837 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c
@@ -459,15 +459,15 @@ int dpu_core_perf_debugfs_init(struct dpu_kms *dpu_kms, struct dentry *parent)
&perf->core_clk_rate);
debugfs_create_u32("enable_bw_release", 0600, entry,
(u32 *)&perf->enable_bw_release);
- debugfs_create_u32("threshold_low", 0600, entry,
+ debugfs_create_u32("threshold_low", 0400, entry,
(u32 *)&perf->perf_cfg->max_bw_low);
- debugfs_create_u32("threshold_high", 0600, entry,
+ debugfs_create_u32("threshold_high", 0400, entry,
(u32 *)&perf->perf_cfg->max_bw_high);
- debugfs_create_u32("min_core_ib", 0600, entry,
+ debugfs_create_u32("min_core_ib", 0400, entry,
(u32 *)&perf->perf_cfg->min_core_ib);
- debugfs_create_u32("min_llcc_ib", 0600, entry,
+ debugfs_create_u32("min_llcc_ib", 0400, entry,
(u32 *)&perf->perf_cfg->min_llcc_ib);
- debugfs_create_u32("min_dram_ib", 0600, entry,
+ debugfs_create_u32("min_dram_ib", 0400, entry,
(u32 *)&perf->perf_cfg->min_dram_ib);
debugfs_create_file("perf_mode", 0600, entry,
(u32 *)perf, &dpu_core_perf_mode_fops);
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c
index 946dd0135dffcf..6a0a74832fb64d 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c
@@ -525,14 +525,14 @@ int dpu_core_irq_register_callback(struct dpu_kms *dpu_kms,
int ret;
if (!irq_cb) {
- DPU_ERROR("invalid IRQ=[%d, %d] irq_cb:%ps\n",
- DPU_IRQ_REG(irq_idx), DPU_IRQ_BIT(irq_idx), irq_cb);
+ DPU_ERROR("IRQ=[%d, %d] NULL callback\n",
+ DPU_IRQ_REG(irq_idx), DPU_IRQ_BIT(irq_idx));
return -EINVAL;
}
if (!dpu_core_irq_is_valid(irq_idx)) {
- DPU_ERROR("invalid IRQ=[%d, %d]\n",
- DPU_IRQ_REG(irq_idx), DPU_IRQ_BIT(irq_idx));
+ DPU_ERROR("invalid IRQ=[%d, %d] irq_cb:%ps\n",
+ DPU_IRQ_REG(irq_idx), DPU_IRQ_BIT(irq_idx), irq_cb);
return -EINVAL;
}
diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c
index c4cb82af5c2f2f..ffbfde92258986 100644
--- a/drivers/gpu/drm/msm/dp/dp_display.c
+++ b/drivers/gpu/drm/msm/dp/dp_display.c
@@ -484,7 +484,7 @@ static void dp_display_handle_video_request(struct dp_display_private *dp)
}
}
-static int dp_display_handle_port_ststus_changed(struct dp_display_private *dp)
+static int dp_display_handle_port_status_changed(struct dp_display_private *dp)
{
int rc = 0;
@@ -541,7 +541,7 @@ static int dp_display_usbpd_attention_cb(struct device *dev)
drm_dbg_dp(dp->drm_dev, "hpd_state=%d sink_request=%d\n",
dp->hpd_state, sink_request);
if (sink_request & DS_PORT_STATUS_CHANGED)
- rc = dp_display_handle_port_ststus_changed(dp);
+ rc = dp_display_handle_port_status_changed(dp);
else
rc = dp_display_handle_irq_hpd(dp);
}
@@ -588,6 +588,7 @@ static int dp_hpd_plug_handle(struct dp_display_private *dp, u32 data)
ret = dp_display_usbpd_configure_cb(&pdev->dev);
if (ret) { /* link train failed */
dp->hpd_state = ST_DISCONNECTED;
+ pm_runtime_put_sync(&pdev->dev);
} else {
dp->hpd_state = ST_MAINLINK_READY;
}
@@ -645,6 +646,7 @@ static int dp_hpd_unplug_handle(struct dp_display_private *dp, u32 data)
dp_display_host_phy_exit(dp);
dp->hpd_state = ST_DISCONNECTED;
dp_display_notify_disconnect(&dp->dp_display.pdev->dev);
+ pm_runtime_put_sync(&pdev->dev);
mutex_unlock(&dp->event_mutex);
return 0;
}
diff --git a/drivers/gpu/drm/msm/msm_fb.c b/drivers/gpu/drm/msm/msm_fb.c
index e3f61c39df69b4..80166f702a0dba 100644
--- a/drivers/gpu/drm/msm/msm_fb.c
+++ b/drivers/gpu/drm/msm/msm_fb.c
@@ -89,7 +89,7 @@ int msm_framebuffer_prepare(struct drm_framebuffer *fb,
for (i = 0; i < n; i++) {
ret = msm_gem_get_and_pin_iova(fb->obj[i], aspace, &msm_fb->iova[i]);
- drm_dbg_state(fb->dev, "FB[%u]: iova[%d]: %08llx (%d)",
+ drm_dbg_state(fb->dev, "FB[%u]: iova[%d]: %08llx (%d)\n",
fb->base.id, i, msm_fb->iova[i], ret);
if (ret)
return ret;
@@ -176,7 +176,7 @@ static struct drm_framebuffer *msm_framebuffer_init(struct drm_device *dev,
const struct msm_format *format;
int ret, i, n;
- drm_dbg_state(dev, "create framebuffer: mode_cmd=%p (%dx%d@%4.4s)",
+ drm_dbg_state(dev, "create framebuffer: mode_cmd=%p (%dx%d@%4.4s)\n",
mode_cmd, mode_cmd->width, mode_cmd->height,
(char *)&mode_cmd->pixel_format);
@@ -232,7 +232,7 @@ static struct drm_framebuffer *msm_framebuffer_init(struct drm_device *dev,
refcount_set(&msm_fb->dirtyfb, 1);
- drm_dbg_state(dev, "create: FB ID: %d (%p)", fb->base.id, fb);
+ drm_dbg_state(dev, "create: FB ID: %d (%p)\n", fb->base.id, fb);
return fb;
diff --git a/drivers/gpu/drm/msm/msm_kms.c b/drivers/gpu/drm/msm/msm_kms.c
index 84c21ec2ceeae0..af6a6fcb11736f 100644
--- a/drivers/gpu/drm/msm/msm_kms.c
+++ b/drivers/gpu/drm/msm/msm_kms.c
@@ -149,7 +149,7 @@ int msm_crtc_enable_vblank(struct drm_crtc *crtc)
struct msm_kms *kms = priv->kms;
if (!kms)
return -ENXIO;
- drm_dbg_vbl(dev, "crtc=%u", crtc->base.id);
+ drm_dbg_vbl(dev, "crtc=%u\n", crtc->base.id);
return vblank_ctrl_queue_work(priv, crtc, true);
}
@@ -160,7 +160,7 @@ void msm_crtc_disable_vblank(struct drm_crtc *crtc)
struct msm_kms *kms = priv->kms;
if (!kms)
return;
- drm_dbg_vbl(dev, "crtc=%u", crtc->base.id);
+ drm_dbg_vbl(dev, "crtc=%u\n", crtc->base.id);
vblank_ctrl_queue_work(priv, crtc, false);
}
diff --git a/drivers/gpu/drm/nouveau/nouveau_bios.c b/drivers/gpu/drm/nouveau/nouveau_bios.c
index 479effcf607e26..79cfab53f80e25 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bios.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bios.c
@@ -23,6 +23,7 @@
*/
#include "nouveau_drv.h"
+#include "nouveau_bios.h"
#include "nouveau_reg.h"
#include "dispnv04/hw.h"
#include "nouveau_encoder.h"
@@ -1677,7 +1678,7 @@ apply_dcb_encoder_quirks(struct drm_device *dev, int idx, u32 *conn, u32 *conf)
*/
if (nv_match_device(dev, 0x0201, 0x1462, 0x8851)) {
if (*conn == 0xf2005014 && *conf == 0xffffffff) {
- fabricate_dcb_output(dcb, DCB_OUTPUT_TMDS, 1, 1, 1);
+ fabricate_dcb_output(dcb, DCB_OUTPUT_TMDS, 1, 1, DCB_OUTPUT_B);
return false;
}
}
@@ -1763,26 +1764,26 @@ fabricate_dcb_encoder_table(struct drm_device *dev, struct nvbios *bios)
#ifdef __powerpc__
/* Apple iMac G4 NV17 */
if (of_machine_is_compatible("PowerMac4,5")) {
- fabricate_dcb_output(dcb, DCB_OUTPUT_TMDS, 0, all_heads, 1);
- fabricate_dcb_output(dcb, DCB_OUTPUT_ANALOG, 1, all_heads, 2);
+ fabricate_dcb_output(dcb, DCB_OUTPUT_TMDS, 0, all_heads, DCB_OUTPUT_B);
+ fabricate_dcb_output(dcb, DCB_OUTPUT_ANALOG, 1, all_heads, DCB_OUTPUT_C);
return;
}
#endif
/* Make up some sane defaults */
fabricate_dcb_output(dcb, DCB_OUTPUT_ANALOG,
- bios->legacy.i2c_indices.crt, 1, 1);
+ bios->legacy.i2c_indices.crt, 1, DCB_OUTPUT_B);
if (nv04_tv_identify(dev, bios->legacy.i2c_indices.tv) >= 0)
fabricate_dcb_output(dcb, DCB_OUTPUT_TV,
bios->legacy.i2c_indices.tv,
- all_heads, 0);
+ all_heads, DCB_OUTPUT_A);
else if (bios->tmds.output0_script_ptr ||
bios->tmds.output1_script_ptr)
fabricate_dcb_output(dcb, DCB_OUTPUT_TMDS,
bios->legacy.i2c_indices.panel,
- all_heads, 1);
+ all_heads, DCB_OUTPUT_B);
}
static int
diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c
index 12feecf71e752d..6fb65b01d77804 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dmem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c
@@ -378,9 +378,9 @@ nouveau_dmem_evict_chunk(struct nouveau_dmem_chunk *chunk)
dma_addr_t *dma_addrs;
struct nouveau_fence *fence;
- src_pfns = kcalloc(npages, sizeof(*src_pfns), GFP_KERNEL);
- dst_pfns = kcalloc(npages, sizeof(*dst_pfns), GFP_KERNEL);
- dma_addrs = kcalloc(npages, sizeof(*dma_addrs), GFP_KERNEL);
+ src_pfns = kvcalloc(npages, sizeof(*src_pfns), GFP_KERNEL | __GFP_NOFAIL);
+ dst_pfns = kvcalloc(npages, sizeof(*dst_pfns), GFP_KERNEL | __GFP_NOFAIL);
+ dma_addrs = kvcalloc(npages, sizeof(*dma_addrs), GFP_KERNEL | __GFP_NOFAIL);
migrate_device_range(src_pfns, chunk->pagemap.range.start >> PAGE_SHIFT,
npages);
@@ -406,11 +406,11 @@ nouveau_dmem_evict_chunk(struct nouveau_dmem_chunk *chunk)
migrate_device_pages(src_pfns, dst_pfns, npages);
nouveau_dmem_fence_done(&fence);
migrate_device_finalize(src_pfns, dst_pfns, npages);
- kfree(src_pfns);
- kfree(dst_pfns);
+ kvfree(src_pfns);
+ kvfree(dst_pfns);
for (i = 0; i < npages; i++)
dma_unmap_page(chunk->drm->dev->dev, dma_addrs[i], PAGE_SIZE, DMA_BIDIRECTIONAL);
- kfree(dma_addrs);
+ kvfree(dma_addrs);
}
void
diff --git a/drivers/gpu/drm/nouveau/nouveau_dp.c b/drivers/gpu/drm/nouveau/nouveau_dp.c
index 7de7707ec6a895..a72c45809484ab 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dp.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dp.c
@@ -225,12 +225,18 @@ nouveau_dp_detect(struct nouveau_connector *nv_connector,
u8 *dpcd = nv_encoder->dp.dpcd;
int ret = NOUVEAU_DP_NONE, hpd;
- /* If we've already read the DPCD on an eDP device, we don't need to
- * reread it as it won't change
+ /* eDP ports don't support hotplugging - so there's no point in probing eDP ports unless we
+ * haven't probed them once before.
*/
- if (connector->connector_type == DRM_MODE_CONNECTOR_eDP &&
- dpcd[DP_DPCD_REV] != 0)
- return NOUVEAU_DP_SST;
+ if (connector->connector_type == DRM_MODE_CONNECTOR_eDP) {
+ if (connector->status == connector_status_connected)
+ return NOUVEAU_DP_SST;
+ else if (connector->status == connector_status_disconnected)
+ return NOUVEAU_DP_NONE;
+ }
+
+ // Ensure that the aux bus is enabled for probing
+ drm_dp_dpcd_set_powered(&nv_connector->aux, true);
mutex_lock(&nv_encoder->dp.hpd_irq_lock);
if (mstm) {
@@ -293,6 +299,13 @@ out:
if (mstm && !mstm->suspended && ret != NOUVEAU_DP_MST)
nv50_mstm_remove(mstm);
+ /* GSP doesn't like when we try to do aux transactions on a port it considers disconnected,
+ * and since we don't really have a usecase for that anyway - just disable the aux bus here
+ * if we've decided the connector is disconnected
+ */
+ if (ret == NOUVEAU_DP_NONE)
+ drm_dp_dpcd_set_powered(&nv_connector->aux, false);
+
mutex_unlock(&nv_encoder->dp.hpd_irq_lock);
return ret;
}
diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.c b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
index 0a0a11dc9ec03e..ee02cd833c5e43 100644
--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
@@ -812,15 +812,15 @@ op_remap(struct drm_gpuva_op_remap *r,
struct drm_gpuva_op_unmap *u = r->unmap;
struct nouveau_uvma *uvma = uvma_from_va(u->va);
u64 addr = uvma->va.va.addr;
- u64 range = uvma->va.va.range;
+ u64 end = uvma->va.va.addr + uvma->va.va.range;
if (r->prev)
addr = r->prev->va.addr + r->prev->va.range;
if (r->next)
- range = r->next->va.addr - addr;
+ end = r->next->va.addr;
- op_unmap_range(u, addr, range);
+ op_unmap_range(u, addr, end - addr);
}
static int
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
index 986e8d547c9424..060c74a80eb14b 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
@@ -420,7 +420,7 @@ gf100_gr_chan_new(struct nvkm_gr *base, struct nvkm_chan *fifoch,
return ret;
} else {
ret = nvkm_memory_map(gr->attrib_cb, 0, chan->vmm, chan->attrib_cb,
- &args, sizeof(args));;
+ &args, sizeof(args));
if (ret)
return ret;
}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowof.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowof.c
index 4bf486b5710136..cb05f7f48a98bb 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowof.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowof.c
@@ -66,11 +66,16 @@ of_init(struct nvkm_bios *bios, const char *name)
return ERR_PTR(-EINVAL);
}
+static void of_fini(void *p)
+{
+ kfree(p);
+}
+
const struct nvbios_source
nvbios_of = {
.name = "OpenFirmware",
.init = of_init,
- .fini = (void(*)(void *))kfree,
+ .fini = of_fini,
.read = of_read,
.size = of_size,
.rw = false,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm107.c b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm107.c
index 7bcbc4895ec221..271bfa038f5bc9 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm107.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm107.c
@@ -25,6 +25,7 @@
#include <subdev/bios.h>
#include <subdev/bios/init.h>
+#include <subdev/gsp.h>
void
gm107_devinit_disable(struct nvkm_devinit *init)
@@ -33,10 +34,13 @@ gm107_devinit_disable(struct nvkm_devinit *init)
u32 r021c00 = nvkm_rd32(device, 0x021c00);
u32 r021c04 = nvkm_rd32(device, 0x021c04);
- if (r021c00 & 0x00000001)
- nvkm_subdev_disable(device, NVKM_ENGINE_CE, 0);
- if (r021c00 & 0x00000004)
- nvkm_subdev_disable(device, NVKM_ENGINE_CE, 2);
+ /* gsp only wants to enable/disable display */
+ if (!nvkm_gsp_rm(device->gsp)) {
+ if (r021c00 & 0x00000001)
+ nvkm_subdev_disable(device, NVKM_ENGINE_CE, 0);
+ if (r021c00 & 0x00000004)
+ nvkm_subdev_disable(device, NVKM_ENGINE_CE, 2);
+ }
if (r021c04 & 0x00000001)
nvkm_subdev_disable(device, NVKM_ENGINE_DISP, 0);
}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/r535.c b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/r535.c
index 11b4c9c274a1a5..666eb93b1742ca 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/r535.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/r535.c
@@ -41,6 +41,7 @@ r535_devinit_new(const struct nvkm_devinit_func *hw,
rm->dtor = r535_devinit_dtor;
rm->post = hw->post;
+ rm->disable = hw->disable;
ret = nv50_devinit_new_(rm, device, type, inst, pdevinit);
if (ret)
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
index 9994cbd6f1c40c..9858c1438aa7fe 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
@@ -1112,7 +1112,7 @@ r535_gsp_rpc_set_registry(struct nvkm_gsp *gsp)
rpc->numEntries = NV_GSP_REG_NUM_ENTRIES;
str_offset = offsetof(typeof(*rpc), entries[NV_GSP_REG_NUM_ENTRIES]);
- strings = (char *)&rpc->entries[NV_GSP_REG_NUM_ENTRIES];
+ strings = (char *)rpc + str_offset;
for (i = 0; i < NV_GSP_REG_NUM_ENTRIES; i++) {
int name_len = strlen(r535_registry_entries[i].name) + 1;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c
index a7f3fc342d87e0..dd5b5a17ece0be 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c
@@ -222,8 +222,11 @@ nv50_instobj_acquire(struct nvkm_memory *memory)
void __iomem *map = NULL;
/* Already mapped? */
- if (refcount_inc_not_zero(&iobj->maps))
+ if (refcount_inc_not_zero(&iobj->maps)) {
+ /* read barrier match the wmb on refcount set */
+ smp_rmb();
return iobj->map;
+ }
/* Take the lock, and re-check that another thread hasn't
* already mapped the object in the meantime.
@@ -250,6 +253,8 @@ nv50_instobj_acquire(struct nvkm_memory *memory)
iobj->base.memory.ptrs = &nv50_instobj_fast;
else
iobj->base.memory.ptrs = &nv50_instobj_slow;
+ /* barrier to ensure the ptrs are written before refcount is set */
+ smp_wmb();
refcount_set(&iobj->maps, 1);
}
diff --git a/drivers/gpu/drm/panel/panel-novatek-nt36672e.c b/drivers/gpu/drm/panel/panel-novatek-nt36672e.c
index cb7406d7446695..c39fe0fc5d69c6 100644
--- a/drivers/gpu/drm/panel/panel-novatek-nt36672e.c
+++ b/drivers/gpu/drm/panel/panel-novatek-nt36672e.c
@@ -614,8 +614,6 @@ static void nt36672e_panel_remove(struct mipi_dsi_device *dsi)
struct nt36672e_panel *ctx = mipi_dsi_get_drvdata(dsi);
mipi_dsi_detach(ctx->dsi);
- mipi_dsi_device_unregister(ctx->dsi);
-
drm_panel_remove(&ctx->panel);
}
diff --git a/drivers/gpu/drm/panel/panel-visionox-rm69299.c b/drivers/gpu/drm/panel/panel-visionox-rm69299.c
index 775144695283f5..b15ca56a09a74a 100644
--- a/drivers/gpu/drm/panel/panel-visionox-rm69299.c
+++ b/drivers/gpu/drm/panel/panel-visionox-rm69299.c
@@ -253,8 +253,6 @@ static void visionox_rm69299_remove(struct mipi_dsi_device *dsi)
struct visionox_rm69299 *ctx = mipi_dsi_get_drvdata(dsi);
mipi_dsi_detach(ctx->dsi);
- mipi_dsi_device_unregister(ctx->dsi);
-
drm_panel_remove(&ctx->panel);
}
diff --git a/drivers/gpu/drm/panfrost/panfrost_gpu.c b/drivers/gpu/drm/panfrost/panfrost_gpu.c
index 9063ce2546422f..fd8e44992184fa 100644
--- a/drivers/gpu/drm/panfrost/panfrost_gpu.c
+++ b/drivers/gpu/drm/panfrost/panfrost_gpu.c
@@ -441,19 +441,19 @@ void panfrost_gpu_power_off(struct panfrost_device *pfdev)
gpu_write(pfdev, SHADER_PWROFF_LO, pfdev->features.shader_present);
ret = readl_relaxed_poll_timeout(pfdev->iomem + SHADER_PWRTRANS_LO,
- val, !val, 1, 1000);
+ val, !val, 1, 2000);
if (ret)
dev_err(pfdev->dev, "shader power transition timeout");
gpu_write(pfdev, TILER_PWROFF_LO, pfdev->features.tiler_present);
ret = readl_relaxed_poll_timeout(pfdev->iomem + TILER_PWRTRANS_LO,
- val, !val, 1, 1000);
+ val, !val, 1, 2000);
if (ret)
dev_err(pfdev->dev, "tiler power transition timeout");
gpu_write(pfdev, L2_PWROFF_LO, pfdev->features.l2_present);
ret = readl_poll_timeout(pfdev->iomem + L2_PWRTRANS_LO,
- val, !val, 0, 1000);
+ val, !val, 0, 2000);
if (ret)
dev_err(pfdev->dev, "l2 power transition timeout");
}
diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c
index f38385fe76bbb4..b91019cd5acb19 100644
--- a/drivers/gpu/drm/panfrost/panfrost_mmu.c
+++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c
@@ -502,11 +502,18 @@ static int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as,
mapping_set_unevictable(mapping);
for (i = page_offset; i < page_offset + NUM_FAULT_PAGES; i++) {
+ /* Can happen if the last fault only partially filled this
+ * section of the pages array before failing. In that case
+ * we skip already filled pages.
+ */
+ if (pages[i])
+ continue;
+
pages[i] = shmem_read_mapping_page(mapping, i);
if (IS_ERR(pages[i])) {
ret = PTR_ERR(pages[i]);
pages[i] = NULL;
- goto err_pages;
+ goto err_unlock;
}
}
@@ -514,7 +521,7 @@ static int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as,
ret = sg_alloc_table_from_pages(sgt, pages + page_offset,
NUM_FAULT_PAGES, 0, SZ_2M, GFP_KERNEL);
if (ret)
- goto err_pages;
+ goto err_unlock;
ret = dma_map_sgtable(pfdev->dev, sgt, DMA_BIDIRECTIONAL, 0);
if (ret)
@@ -537,8 +544,6 @@ out:
err_map:
sg_free_table(sgt);
-err_pages:
- drm_gem_shmem_put_pages(&bo->base);
err_unlock:
dma_resv_unlock(obj->resv);
err_bo:
diff --git a/drivers/gpu/drm/qxl/qxl_cmd.c b/drivers/gpu/drm/qxl/qxl_cmd.c
index 281edab518cdd3..d6ea01f3797be7 100644
--- a/drivers/gpu/drm/qxl/qxl_cmd.c
+++ b/drivers/gpu/drm/qxl/qxl_cmd.c
@@ -421,7 +421,6 @@ int qxl_surface_id_alloc(struct qxl_device *qdev,
{
uint32_t handle;
int idr_ret;
- int count = 0;
again:
idr_preload(GFP_ATOMIC);
spin_lock(&qdev->surf_id_idr_lock);
@@ -433,7 +432,6 @@ again:
handle = idr_ret;
if (handle >= qdev->rom->n_surfaces) {
- count++;
spin_lock(&qdev->surf_id_idr_lock);
idr_remove(&qdev->surf_id_idr, handle);
spin_unlock(&qdev->surf_id_idr_lock);
diff --git a/drivers/gpu/drm/qxl/qxl_ioctl.c b/drivers/gpu/drm/qxl/qxl_ioctl.c
index dd0f834d881ce1..506ae1f5e099ff 100644
--- a/drivers/gpu/drm/qxl/qxl_ioctl.c
+++ b/drivers/gpu/drm/qxl/qxl_ioctl.c
@@ -145,7 +145,7 @@ static int qxl_process_single_command(struct qxl_device *qdev,
struct qxl_release *release;
struct qxl_bo *cmd_bo;
void *fb_cmd;
- int i, ret, num_relocs;
+ int i, ret;
int unwritten;
switch (cmd->type) {
@@ -200,7 +200,6 @@ static int qxl_process_single_command(struct qxl_device *qdev,
}
/* fill out reloc info structs */
- num_relocs = 0;
for (i = 0; i < cmd->relocs_num; ++i) {
struct drm_qxl_reloc reloc;
struct drm_qxl_reloc __user *u = u64_to_user_ptr(cmd->relocs);
@@ -230,7 +229,6 @@ static int qxl_process_single_command(struct qxl_device *qdev,
reloc_info[i].dst_bo = cmd_bo;
reloc_info[i].dst_offset = reloc.dst_offset + release->release_offset;
}
- num_relocs++;
/* reserve and validate the reloc dst bo */
if (reloc.reloc_type == QXL_RELOC_TYPE_BO || reloc.src_handle) {
diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c
index 368d26da0d6a23..9febc8b73f09ef 100644
--- a/drivers/gpu/drm/qxl/qxl_release.c
+++ b/drivers/gpu/drm/qxl/qxl_release.c
@@ -58,16 +58,56 @@ static long qxl_fence_wait(struct dma_fence *fence, bool intr,
signed long timeout)
{
struct qxl_device *qdev;
+ struct qxl_release *release;
+ int count = 0, sc = 0;
+ bool have_drawable_releases;
unsigned long cur, end = jiffies + timeout;
qdev = container_of(fence->lock, struct qxl_device, release_lock);
+ release = container_of(fence, struct qxl_release, base);
+ have_drawable_releases = release->type == QXL_RELEASE_DRAWABLE;
- if (!wait_event_timeout(qdev->release_event,
- (dma_fence_is_signaled(fence) ||
- (qxl_io_notify_oom(qdev), 0)),
- timeout))
- return 0;
+retry:
+ sc++;
+
+ if (dma_fence_is_signaled(fence))
+ goto signaled;
+
+ qxl_io_notify_oom(qdev);
+
+ for (count = 0; count < 11; count++) {
+ if (!qxl_queue_garbage_collect(qdev, true))
+ break;
+
+ if (dma_fence_is_signaled(fence))
+ goto signaled;
+ }
+
+ if (dma_fence_is_signaled(fence))
+ goto signaled;
+
+ if (have_drawable_releases || sc < 4) {
+ if (sc > 2)
+ /* back off */
+ usleep_range(500, 1000);
+
+ if (time_after(jiffies, end))
+ return 0;
+
+ if (have_drawable_releases && sc > 300) {
+ DMA_FENCE_WARN(fence,
+ "failed to wait on release %llu after spincount %d\n",
+ fence->context & ~0xf0000000, sc);
+ goto signaled;
+ }
+ goto retry;
+ }
+ /*
+ * yeah, original sync_obj_wait gave up after 3 spins when
+ * have_drawable_releases is not set.
+ */
+signaled:
cur = jiffies;
if (time_after(cur, end))
return 0;
diff --git a/drivers/gpu/drm/radeon/pptable.h b/drivers/gpu/drm/radeon/pptable.h
index 94947229888ba7..b7f22597ee95e7 100644
--- a/drivers/gpu/drm/radeon/pptable.h
+++ b/drivers/gpu/drm/radeon/pptable.h
@@ -424,7 +424,7 @@ typedef struct _ATOM_PPLIB_SUMO_CLOCK_INFO{
typedef struct _ATOM_PPLIB_STATE_V2
{
//number of valid dpm levels in this state; Driver uses it to calculate the whole
- //size of the state: sizeof(ATOM_PPLIB_STATE_V2) + (ucNumDPMLevels - 1) * sizeof(UCHAR)
+ //size of the state: struct_size(ATOM_PPLIB_STATE_V2, clockInfoIndex, ucNumDPMLevels)
UCHAR ucNumDPMLevels;
//a index to the array of nonClockInfos
@@ -432,14 +432,14 @@ typedef struct _ATOM_PPLIB_STATE_V2
/**
* Driver will read the first ucNumDPMLevels in this array
*/
- UCHAR clockInfoIndex[1];
+ UCHAR clockInfoIndex[] __counted_by(ucNumDPMLevels);
} ATOM_PPLIB_STATE_V2;
typedef struct _StateArray{
//how many states we have
UCHAR ucNumEntries;
- ATOM_PPLIB_STATE_V2 states[1];
+ ATOM_PPLIB_STATE_V2 states[] __counted_by(ucNumEntries);
}StateArray;
@@ -450,7 +450,7 @@ typedef struct _ClockInfoArray{
//sizeof(ATOM_PPLIB_CLOCK_INFO)
UCHAR ucEntrySize;
- UCHAR clockInfo[1];
+ UCHAR clockInfo[] __counted_by(ucNumEntries);
}ClockInfoArray;
typedef struct _NonClockInfoArray{
@@ -460,7 +460,7 @@ typedef struct _NonClockInfoArray{
//sizeof(ATOM_PPLIB_NONCLOCK_INFO)
UCHAR ucEntrySize;
- ATOM_PPLIB_NONCLOCK_INFO nonClockInfo[1];
+ ATOM_PPLIB_NONCLOCK_INFO nonClockInfo[] __counted_by(ucNumEntries);
}NonClockInfoArray;
typedef struct _ATOM_PPLIB_Clock_Voltage_Dependency_Record
diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c
index bb1f0a3371ab5d..10793a433bf586 100644
--- a/drivers/gpu/drm/radeon/radeon_atombios.c
+++ b/drivers/gpu/drm/radeon/radeon_atombios.c
@@ -923,8 +923,12 @@ bool radeon_get_atom_connector_info_from_supported_devices_table(struct
max_device = ATOM_MAX_SUPPORTED_DEVICE_INFO;
for (i = 0; i < max_device; i++) {
- ATOM_CONNECTOR_INFO_I2C ci =
- supported_devices->info.asConnInfo[i];
+ ATOM_CONNECTOR_INFO_I2C ci;
+
+ if (frev > 1)
+ ci = supported_devices->info_2d1.asConnInfo[i];
+ else
+ ci = supported_devices->info.asConnInfo[i];
bios_connectors[i].valid = false;
diff --git a/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c b/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c
index 48170694ac6b89..18efb3fe1c000f 100644
--- a/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c
+++ b/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c
@@ -17,9 +17,7 @@
static const uint32_t formats_cluster[] = {
DRM_FORMAT_XRGB2101010,
- DRM_FORMAT_ARGB2101010,
DRM_FORMAT_XBGR2101010,
- DRM_FORMAT_ABGR2101010,
DRM_FORMAT_XRGB8888,
DRM_FORMAT_ARGB8888,
DRM_FORMAT_XBGR8888,
diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c
index 3c4f5a392b0646..58c8161289fea9 100644
--- a/drivers/gpu/drm/scheduler/sched_entity.c
+++ b/drivers/gpu/drm/scheduler/sched_entity.c
@@ -71,13 +71,19 @@ int drm_sched_entity_init(struct drm_sched_entity *entity,
entity->guilty = guilty;
entity->num_sched_list = num_sched_list;
entity->priority = priority;
+ /*
+ * It's perfectly valid to initialize an entity without having a valid
+ * scheduler attached. It's just not valid to use the scheduler before it
+ * is initialized itself.
+ */
entity->sched_list = num_sched_list > 1 ? sched_list : NULL;
RCU_INIT_POINTER(entity->last_scheduled, NULL);
RB_CLEAR_NODE(&entity->rb_tree_node);
- if (!sched_list[0]->sched_rq) {
- /* Warn drivers not to do this and to fix their DRM
- * calling order.
+ if (num_sched_list && !sched_list[0]->sched_rq) {
+ /* Since every entry covered by num_sched_list
+ * should be non-NULL and therefore we warn drivers
+ * not to do this and to fix their DRM calling order.
*/
pr_warn("%s: called with uninitialized scheduler\n", __func__);
} else if (num_sched_list) {
diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
index 112438d965ffbe..6e1fd6985ffcb7 100644
--- a/drivers/gpu/drm/ttm/ttm_pool.c
+++ b/drivers/gpu/drm/ttm/ttm_pool.c
@@ -288,17 +288,23 @@ static struct ttm_pool_type *ttm_pool_select_type(struct ttm_pool *pool,
enum ttm_caching caching,
unsigned int order)
{
- if (pool->use_dma_alloc || pool->nid != NUMA_NO_NODE)
+ if (pool->use_dma_alloc)
return &pool->caching[caching].orders[order];
#ifdef CONFIG_X86
switch (caching) {
case ttm_write_combined:
+ if (pool->nid != NUMA_NO_NODE)
+ return &pool->caching[caching].orders[order];
+
if (pool->use_dma32)
return &global_dma32_write_combined[order];
return &global_write_combined[order];
case ttm_uncached:
+ if (pool->nid != NUMA_NO_NODE)
+ return &pool->caching[caching].orders[order];
+
if (pool->use_dma32)
return &global_dma32_uncached[order];
@@ -566,11 +572,17 @@ void ttm_pool_init(struct ttm_pool *pool, struct device *dev,
pool->use_dma_alloc = use_dma_alloc;
pool->use_dma32 = use_dma32;
- if (use_dma_alloc || nid != NUMA_NO_NODE) {
- for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
- for (j = 0; j < NR_PAGE_ORDERS; ++j)
- ttm_pool_type_init(&pool->caching[i].orders[j],
- pool, i, j);
+ for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) {
+ for (j = 0; j < NR_PAGE_ORDERS; ++j) {
+ struct ttm_pool_type *pt;
+
+ /* Initialize only pool types which are actually used */
+ pt = ttm_pool_select_type(pool, i, j);
+ if (pt != &pool->caching[i].orders[j])
+ continue;
+
+ ttm_pool_type_init(pt, pool, i, j);
+ }
}
}
EXPORT_SYMBOL(ttm_pool_init);
@@ -599,10 +611,16 @@ void ttm_pool_fini(struct ttm_pool *pool)
{
unsigned int i, j;
- if (pool->use_dma_alloc || pool->nid != NUMA_NO_NODE) {
- for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
- for (j = 0; j < NR_PAGE_ORDERS; ++j)
- ttm_pool_type_fini(&pool->caching[i].orders[j]);
+ for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) {
+ for (j = 0; j < NR_PAGE_ORDERS; ++j) {
+ struct ttm_pool_type *pt;
+
+ pt = ttm_pool_select_type(pool, i, j);
+ if (pt != &pool->caching[i].orders[j])
+ continue;
+
+ ttm_pool_type_fini(pt);
+ }
}
/* We removed the pool types from the LRU, but we need to also make sure
diff --git a/drivers/gpu/drm/v3d/v3d_irq.c b/drivers/gpu/drm/v3d/v3d_irq.c
index 2e04f6cb661e4f..ce6b2fb341d1f8 100644
--- a/drivers/gpu/drm/v3d/v3d_irq.c
+++ b/drivers/gpu/drm/v3d/v3d_irq.c
@@ -105,7 +105,6 @@ v3d_irq(int irq, void *arg)
struct v3d_file_priv *file = v3d->bin_job->base.file->driver_priv;
u64 runtime = local_clock() - file->start_ns[V3D_BIN];
- file->enabled_ns[V3D_BIN] += local_clock() - file->start_ns[V3D_BIN];
file->jobs_sent[V3D_BIN]++;
v3d->queue[V3D_BIN].jobs_sent++;
@@ -126,7 +125,6 @@ v3d_irq(int irq, void *arg)
struct v3d_file_priv *file = v3d->render_job->base.file->driver_priv;
u64 runtime = local_clock() - file->start_ns[V3D_RENDER];
- file->enabled_ns[V3D_RENDER] += local_clock() - file->start_ns[V3D_RENDER];
file->jobs_sent[V3D_RENDER]++;
v3d->queue[V3D_RENDER].jobs_sent++;
@@ -147,7 +145,6 @@ v3d_irq(int irq, void *arg)
struct v3d_file_priv *file = v3d->csd_job->base.file->driver_priv;
u64 runtime = local_clock() - file->start_ns[V3D_CSD];
- file->enabled_ns[V3D_CSD] += local_clock() - file->start_ns[V3D_CSD];
file->jobs_sent[V3D_CSD]++;
v3d->queue[V3D_CSD].jobs_sent++;
@@ -195,7 +192,6 @@ v3d_hub_irq(int irq, void *arg)
struct v3d_file_priv *file = v3d->tfu_job->base.file->driver_priv;
u64 runtime = local_clock() - file->start_ns[V3D_TFU];
- file->enabled_ns[V3D_TFU] += local_clock() - file->start_ns[V3D_TFU];
file->jobs_sent[V3D_TFU]++;
v3d->queue[V3D_TFU].jobs_sent++;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_blit.c b/drivers/gpu/drm/vmwgfx/vmwgfx_blit.c
index c52c7bf1485b1f..717d624e9a0522 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_blit.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_blit.c
@@ -456,8 +456,10 @@ int vmw_bo_cpu_blit(struct ttm_buffer_object *dst,
.no_wait_gpu = false
};
u32 j, initial_line = dst_offset / dst_stride;
- struct vmw_bo_blit_line_data d;
+ struct vmw_bo_blit_line_data d = {0};
int ret = 0;
+ struct page **dst_pages = NULL;
+ struct page **src_pages = NULL;
/* Buffer objects need to be either pinned or reserved: */
if (!(dst->pin_count))
@@ -477,12 +479,35 @@ int vmw_bo_cpu_blit(struct ttm_buffer_object *dst,
return ret;
}
+ if (!src->ttm->pages && src->ttm->sg) {
+ src_pages = kvmalloc_array(src->ttm->num_pages,
+ sizeof(struct page *), GFP_KERNEL);
+ if (!src_pages)
+ return -ENOMEM;
+ ret = drm_prime_sg_to_page_array(src->ttm->sg, src_pages,
+ src->ttm->num_pages);
+ if (ret)
+ goto out;
+ }
+ if (!dst->ttm->pages && dst->ttm->sg) {
+ dst_pages = kvmalloc_array(dst->ttm->num_pages,
+ sizeof(struct page *), GFP_KERNEL);
+ if (!dst_pages) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ ret = drm_prime_sg_to_page_array(dst->ttm->sg, dst_pages,
+ dst->ttm->num_pages);
+ if (ret)
+ goto out;
+ }
+
d.mapped_dst = 0;
d.mapped_src = 0;
d.dst_addr = NULL;
d.src_addr = NULL;
- d.dst_pages = dst->ttm->pages;
- d.src_pages = src->ttm->pages;
+ d.dst_pages = dst->ttm->pages ? dst->ttm->pages : dst_pages;
+ d.src_pages = src->ttm->pages ? src->ttm->pages : src_pages;
d.dst_num_pages = PFN_UP(dst->resource->size);
d.src_num_pages = PFN_UP(src->resource->size);
d.dst_prot = ttm_io_prot(dst, dst->resource, PAGE_KERNEL);
@@ -504,6 +529,10 @@ out:
kunmap_atomic(d.src_addr);
if (d.dst_addr)
kunmap_atomic(d.dst_addr);
+ if (src_pages)
+ kvfree(src_pages);
+ if (dst_pages)
+ kvfree(dst_pages);
return ret;
}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
index bfd41ce3c8f4fc..e5eb21a471a601 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
@@ -377,7 +377,8 @@ static int vmw_bo_init(struct vmw_private *dev_priv,
{
struct ttm_operation_ctx ctx = {
.interruptible = params->bo_type != ttm_bo_type_kernel,
- .no_wait_gpu = false
+ .no_wait_gpu = false,
+ .resv = params->resv,
};
struct ttm_device *bdev = &dev_priv->bdev;
struct drm_device *vdev = &dev_priv->drm;
@@ -394,8 +395,8 @@ static int vmw_bo_init(struct vmw_private *dev_priv,
vmw_bo_placement_set(vmw_bo, params->domain, params->busy_domain);
ret = ttm_bo_init_reserved(bdev, &vmw_bo->tbo, params->bo_type,
- &vmw_bo->placement, 0, &ctx, NULL,
- NULL, destroy);
+ &vmw_bo->placement, 0, &ctx,
+ params->sg, params->resv, destroy);
if (unlikely(ret))
return ret;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h
index 0d496dc9c6af7a..f349642e6190d6 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h
@@ -55,6 +55,8 @@ struct vmw_bo_params {
enum ttm_bo_type bo_type;
size_t size;
bool pin;
+ struct dma_resv *resv;
+ struct sg_table *sg;
};
/**
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index d3e308fdfd5be8..58fb40c93100a8 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -666,11 +666,12 @@ static int vmw_dma_select_mode(struct vmw_private *dev_priv)
[vmw_dma_map_populate] = "Caching DMA mappings.",
[vmw_dma_map_bind] = "Giving up DMA mappings early."};
- /* TTM currently doesn't fully support SEV encryption. */
- if (cc_platform_has(CC_ATTR_MEM_ENCRYPT))
- return -EINVAL;
-
- if (vmw_force_coherent)
+ /*
+ * When running with SEV we always want dma mappings, because
+ * otherwise ttm tt pool pages will bounce through swiotlb running
+ * out of available space.
+ */
+ if (vmw_force_coherent || cc_platform_has(CC_ATTR_MEM_ENCRYPT))
dev_priv->map_mode = vmw_dma_alloc_coherent;
else if (vmw_restrict_iommu)
dev_priv->map_mode = vmw_dma_map_bind;
@@ -1444,12 +1445,15 @@ static void vmw_debugfs_resource_managers_init(struct vmw_private *vmw)
root, "system_ttm");
ttm_resource_manager_create_debugfs(ttm_manager_type(&vmw->bdev, TTM_PL_VRAM),
root, "vram_ttm");
- ttm_resource_manager_create_debugfs(ttm_manager_type(&vmw->bdev, VMW_PL_GMR),
- root, "gmr_ttm");
- ttm_resource_manager_create_debugfs(ttm_manager_type(&vmw->bdev, VMW_PL_MOB),
- root, "mob_ttm");
- ttm_resource_manager_create_debugfs(ttm_manager_type(&vmw->bdev, VMW_PL_SYSTEM),
- root, "system_mob_ttm");
+ if (vmw->has_gmr)
+ ttm_resource_manager_create_debugfs(ttm_manager_type(&vmw->bdev, VMW_PL_GMR),
+ root, "gmr_ttm");
+ if (vmw->has_mob) {
+ ttm_resource_manager_create_debugfs(ttm_manager_type(&vmw->bdev, VMW_PL_MOB),
+ root, "mob_ttm");
+ ttm_resource_manager_create_debugfs(ttm_manager_type(&vmw->bdev, VMW_PL_SYSTEM),
+ root, "system_mob_ttm");
+ }
}
static int vmwgfx_pm_notifier(struct notifier_block *nb, unsigned long val,
@@ -1624,6 +1628,7 @@ static const struct drm_driver driver = {
.prime_fd_to_handle = vmw_prime_fd_to_handle,
.prime_handle_to_fd = vmw_prime_handle_to_fd,
+ .gem_prime_import_sg_table = vmw_prime_import_sg_table,
.fops = &vmwgfx_driver_fops,
.name = VMWGFX_DRIVER_NAME,
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index 12efecc17df664..b019a1a1787af5 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -1130,6 +1130,9 @@ extern int vmw_prime_handle_to_fd(struct drm_device *dev,
struct drm_file *file_priv,
uint32_t handle, uint32_t flags,
int *prime_fd);
+struct drm_gem_object *vmw_prime_import_sg_table(struct drm_device *dev,
+ struct dma_buf_attachment *attach,
+ struct sg_table *table);
/*
* MemoryOBject management - vmwgfx_mob.c
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c b/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c
index 12787bb9c111d1..d6bcaf078b1f40 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c
@@ -149,6 +149,38 @@ out_no_bo:
return ret;
}
+struct drm_gem_object *vmw_prime_import_sg_table(struct drm_device *dev,
+ struct dma_buf_attachment *attach,
+ struct sg_table *table)
+{
+ int ret;
+ struct vmw_private *dev_priv = vmw_priv(dev);
+ struct drm_gem_object *gem = NULL;
+ struct vmw_bo *vbo;
+ struct vmw_bo_params params = {
+ .domain = (dev_priv->has_mob) ? VMW_BO_DOMAIN_SYS : VMW_BO_DOMAIN_VRAM,
+ .busy_domain = VMW_BO_DOMAIN_SYS,
+ .bo_type = ttm_bo_type_sg,
+ .size = attach->dmabuf->size,
+ .pin = false,
+ .resv = attach->dmabuf->resv,
+ .sg = table,
+
+ };
+
+ dma_resv_lock(params.resv, NULL);
+
+ ret = vmw_bo_create(dev_priv, &params, &vbo);
+ if (ret != 0)
+ goto out_no_bo;
+
+ vbo->tbo.base.funcs = &vmw_gem_object_funcs;
+
+ gem = &vbo->tbo.base;
+out_no_bo:
+ dma_resv_unlock(params.resv);
+ return gem;
+}
int vmw_gem_object_create_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp)
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index cd4925346ed45a..84ae4e10a2ebec 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -933,6 +933,7 @@ int vmw_du_cursor_plane_atomic_check(struct drm_plane *plane,
int vmw_du_crtc_atomic_check(struct drm_crtc *crtc,
struct drm_atomic_state *state)
{
+ struct vmw_private *vmw = vmw_priv(crtc->dev);
struct drm_crtc_state *new_state = drm_atomic_get_new_crtc_state(state,
crtc);
struct vmw_display_unit *du = vmw_crtc_to_du(new_state->crtc);
@@ -940,9 +941,13 @@ int vmw_du_crtc_atomic_check(struct drm_crtc *crtc,
bool has_primary = new_state->plane_mask &
drm_plane_mask(crtc->primary);
- /* We always want to have an active plane with an active CRTC */
- if (has_primary != new_state->enable)
- return -EINVAL;
+ /*
+ * This is fine in general, but broken userspace might expect
+ * some actual rendering so give a clue as why it's blank.
+ */
+ if (new_state->enable && !has_primary)
+ drm_dbg_driver(&vmw->drm,
+ "CRTC without a primary plane will be blank.\n");
if (new_state->connector_mask != connector_mask &&
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
index a94947b588e85f..19a843da87b789 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
@@ -243,10 +243,10 @@ struct vmw_framebuffer_bo {
static const uint32_t __maybe_unused vmw_primary_plane_formats[] = {
- DRM_FORMAT_XRGB1555,
- DRM_FORMAT_RGB565,
DRM_FORMAT_XRGB8888,
DRM_FORMAT_ARGB8888,
+ DRM_FORMAT_RGB565,
+ DRM_FORMAT_XRGB1555,
};
static const uint32_t __maybe_unused vmw_cursor_plane_formats[] = {
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_prime.c b/drivers/gpu/drm/vmwgfx/vmwgfx_prime.c
index 2d72a5ee7c0c71..c99cad44499157 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_prime.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_prime.c
@@ -75,8 +75,12 @@ int vmw_prime_fd_to_handle(struct drm_device *dev,
int fd, u32 *handle)
{
struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
+ int ret = ttm_prime_fd_to_handle(tfile, fd, handle);
- return ttm_prime_fd_to_handle(tfile, fd, handle);
+ if (ret)
+ ret = drm_gem_prime_fd_to_handle(dev, file_priv, fd, handle);
+
+ return ret;
}
int vmw_prime_handle_to_fd(struct drm_device *dev,
@@ -85,5 +89,12 @@ int vmw_prime_handle_to_fd(struct drm_device *dev,
int *prime_fd)
{
struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
- return ttm_prime_handle_to_fd(tfile, handle, flags, prime_fd);
+ int ret;
+
+ if (handle > VMWGFX_NUM_MOB)
+ ret = ttm_prime_handle_to_fd(tfile, handle, flags, prime_fd);
+ else
+ ret = drm_gem_prime_handle_to_fd(dev, file_priv, handle, flags, prime_fd);
+
+ return ret;
}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
index 4d23d0a70bcb7e..621d98b376bbbc 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
@@ -188,13 +188,18 @@ static int vmw_ttm_map_dma(struct vmw_ttm_tt *vmw_tt)
switch (dev_priv->map_mode) {
case vmw_dma_map_bind:
case vmw_dma_map_populate:
- vsgt->sgt = &vmw_tt->sgt;
- ret = sg_alloc_table_from_pages_segment(
- &vmw_tt->sgt, vsgt->pages, vsgt->num_pages, 0,
- (unsigned long)vsgt->num_pages << PAGE_SHIFT,
- dma_get_max_seg_size(dev_priv->drm.dev), GFP_KERNEL);
- if (ret)
- goto out_sg_alloc_fail;
+ if (vmw_tt->dma_ttm.page_flags & TTM_TT_FLAG_EXTERNAL) {
+ vsgt->sgt = vmw_tt->dma_ttm.sg;
+ } else {
+ vsgt->sgt = &vmw_tt->sgt;
+ ret = sg_alloc_table_from_pages_segment(&vmw_tt->sgt,
+ vsgt->pages, vsgt->num_pages, 0,
+ (unsigned long)vsgt->num_pages << PAGE_SHIFT,
+ dma_get_max_seg_size(dev_priv->drm.dev),
+ GFP_KERNEL);
+ if (ret)
+ goto out_sg_alloc_fail;
+ }
ret = vmw_ttm_map_for_dma(vmw_tt);
if (unlikely(ret != 0))
@@ -209,8 +214,9 @@ static int vmw_ttm_map_dma(struct vmw_ttm_tt *vmw_tt)
return 0;
out_map_fail:
- sg_free_table(vmw_tt->vsgt.sgt);
- vmw_tt->vsgt.sgt = NULL;
+ drm_warn(&dev_priv->drm, "VSG table map failed!");
+ sg_free_table(vsgt->sgt);
+ vsgt->sgt = NULL;
out_sg_alloc_fail:
return ret;
}
@@ -356,15 +362,17 @@ static void vmw_ttm_destroy(struct ttm_device *bdev, struct ttm_tt *ttm)
static int vmw_ttm_populate(struct ttm_device *bdev,
struct ttm_tt *ttm, struct ttm_operation_ctx *ctx)
{
- int ret;
+ bool external = (ttm->page_flags & TTM_TT_FLAG_EXTERNAL) != 0;
- /* TODO: maybe completely drop this ? */
if (ttm_tt_is_populated(ttm))
return 0;
- ret = ttm_pool_alloc(&bdev->pool, ttm, ctx);
+ if (external && ttm->sg)
+ return drm_prime_sg_to_dma_addr_array(ttm->sg,
+ ttm->dma_address,
+ ttm->num_pages);
- return ret;
+ return ttm_pool_alloc(&bdev->pool, ttm, ctx);
}
static void vmw_ttm_unpopulate(struct ttm_device *bdev,
@@ -372,6 +380,10 @@ static void vmw_ttm_unpopulate(struct ttm_device *bdev,
{
struct vmw_ttm_tt *vmw_tt = container_of(ttm, struct vmw_ttm_tt,
dma_ttm);
+ bool external = (ttm->page_flags & TTM_TT_FLAG_EXTERNAL) != 0;
+
+ if (external)
+ return;
vmw_ttm_unbind(bdev, ttm);
@@ -390,6 +402,7 @@ static struct ttm_tt *vmw_ttm_tt_create(struct ttm_buffer_object *bo,
{
struct vmw_ttm_tt *vmw_be;
int ret;
+ bool external = bo->type == ttm_bo_type_sg;
vmw_be = kzalloc(sizeof(*vmw_be), GFP_KERNEL);
if (!vmw_be)
@@ -398,7 +411,10 @@ static struct ttm_tt *vmw_ttm_tt_create(struct ttm_buffer_object *bo,
vmw_be->dev_priv = vmw_priv_from_ttm(bo->bdev);
vmw_be->mob = NULL;
- if (vmw_be->dev_priv->map_mode == vmw_dma_alloc_coherent)
+ if (external)
+ page_flags |= TTM_TT_FLAG_EXTERNAL | TTM_TT_FLAG_EXTERNAL_MAPPABLE;
+
+ if (vmw_be->dev_priv->map_mode == vmw_dma_alloc_coherent || external)
ret = ttm_sg_tt_init(&vmw_be->dma_ttm, bo, page_flags,
ttm_cached);
else
diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index 5a428ca00f10f2..c29a850859ad5a 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -172,8 +172,8 @@ subdir-ccflags-$(CONFIG_DRM_XE_DISPLAY) += \
-Ddrm_i915_gem_object=xe_bo \
-Ddrm_i915_private=xe_device
-CFLAGS_i915-display/intel_fbdev.o = $(call cc-disable-warning, override-init)
-CFLAGS_i915-display/intel_display_device.o = $(call cc-disable-warning, override-init)
+CFLAGS_i915-display/intel_fbdev.o = -Wno-override-init
+CFLAGS_i915-display/intel_display_device.o = -Wno-override-init
# Rule to build SOC code shared with i915
$(obj)/i915-soc/%.o: $(srctree)/drivers/gpu/drm/i915/soc/%.c FORCE
diff --git a/drivers/gpu/drm/xe/display/intel_fb_bo.c b/drivers/gpu/drm/xe/display/intel_fb_bo.c
index b21da7b745a5e7..a9c1f9885c6bb4 100644
--- a/drivers/gpu/drm/xe/display/intel_fb_bo.c
+++ b/drivers/gpu/drm/xe/display/intel_fb_bo.c
@@ -31,7 +31,7 @@ int intel_fb_bo_framebuffer_init(struct intel_framebuffer *intel_fb,
ret = ttm_bo_reserve(&bo->ttm, true, false, NULL);
if (ret)
- return ret;
+ goto err;
if (!(bo->flags & XE_BO_SCANOUT_BIT)) {
/*
@@ -42,12 +42,16 @@ int intel_fb_bo_framebuffer_init(struct intel_framebuffer *intel_fb,
*/
if (XE_IOCTL_DBG(i915, !list_empty(&bo->ttm.base.gpuva.list))) {
ttm_bo_unreserve(&bo->ttm);
- return -EINVAL;
+ ret = -EINVAL;
+ goto err;
}
bo->flags |= XE_BO_SCANOUT_BIT;
}
ttm_bo_unreserve(&bo->ttm);
+ return 0;
+err:
+ xe_bo_put(bo);
return ret;
}
diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c
index e4db069f0db3f1..6ec375c1c4b6c0 100644
--- a/drivers/gpu/drm/xe/display/xe_display.c
+++ b/drivers/gpu/drm/xe/display/xe_display.c
@@ -108,11 +108,6 @@ int xe_display_create(struct xe_device *xe)
xe->display.hotplug.dp_wq = alloc_ordered_workqueue("xe-dp", 0);
drmm_mutex_init(&xe->drm, &xe->sb_lock);
- drmm_mutex_init(&xe->drm, &xe->display.backlight.lock);
- drmm_mutex_init(&xe->drm, &xe->display.audio.mutex);
- drmm_mutex_init(&xe->drm, &xe->display.wm.wm_mutex);
- drmm_mutex_init(&xe->drm, &xe->display.pps.mutex);
- drmm_mutex_init(&xe->drm, &xe->display.hdcp.hdcp_mutex);
xe->enabled_irq_mask = ~0;
err = drmm_add_action_or_reset(&xe->drm, display_destroy, NULL);
diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
index 0b1266c88a6af3..deddc8be48c0af 100644
--- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
@@ -125,7 +125,7 @@
#define RING_EXECLIST_STATUS_LO(base) XE_REG((base) + 0x234)
#define RING_EXECLIST_STATUS_HI(base) XE_REG((base) + 0x234 + 4)
-#define RING_CONTEXT_CONTROL(base) XE_REG((base) + 0x244)
+#define RING_CONTEXT_CONTROL(base) XE_REG((base) + 0x244, XE_REG_OPTION_MASKED)
#define CTX_CTRL_INHIBIT_SYN_CTX_SWITCH REG_BIT(3)
#define CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT REG_BIT(0)
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 6603a0ea79c5af..9c0837b6fdfc8d 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -144,9 +144,6 @@ static void try_add_system(struct xe_device *xe, struct xe_bo *bo,
.mem_type = XE_PL_TT,
};
*c += 1;
-
- if (bo->props.preferred_mem_type == XE_BO_PROPS_INVALID)
- bo->props.preferred_mem_type = XE_PL_TT;
}
}
@@ -181,25 +178,15 @@ static void add_vram(struct xe_device *xe, struct xe_bo *bo,
}
places[*c] = place;
*c += 1;
-
- if (bo->props.preferred_mem_type == XE_BO_PROPS_INVALID)
- bo->props.preferred_mem_type = mem_type;
}
static void try_add_vram(struct xe_device *xe, struct xe_bo *bo,
u32 bo_flags, u32 *c)
{
- if (bo->props.preferred_gt == XE_GT1) {
- if (bo_flags & XE_BO_CREATE_VRAM1_BIT)
- add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM1, c);
- if (bo_flags & XE_BO_CREATE_VRAM0_BIT)
- add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM0, c);
- } else {
- if (bo_flags & XE_BO_CREATE_VRAM0_BIT)
- add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM0, c);
- if (bo_flags & XE_BO_CREATE_VRAM1_BIT)
- add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM1, c);
- }
+ if (bo_flags & XE_BO_CREATE_VRAM0_BIT)
+ add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM0, c);
+ if (bo_flags & XE_BO_CREATE_VRAM1_BIT)
+ add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM1, c);
}
static void try_add_stolen(struct xe_device *xe, struct xe_bo *bo,
@@ -223,17 +210,8 @@ static int __xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
{
u32 c = 0;
- bo->props.preferred_mem_type = XE_BO_PROPS_INVALID;
-
- /* The order of placements should indicate preferred location */
-
- if (bo->props.preferred_mem_class == DRM_XE_MEM_REGION_CLASS_SYSMEM) {
- try_add_system(xe, bo, bo_flags, &c);
- try_add_vram(xe, bo, bo_flags, &c);
- } else {
- try_add_vram(xe, bo, bo_flags, &c);
- try_add_system(xe, bo, bo_flags, &c);
- }
+ try_add_vram(xe, bo, bo_flags, &c);
+ try_add_system(xe, bo, bo_flags, &c);
try_add_stolen(xe, bo, bo_flags, &c);
if (!c)
@@ -1126,13 +1104,6 @@ static void xe_gem_object_close(struct drm_gem_object *obj,
}
}
-static bool should_migrate_to_system(struct xe_bo *bo)
-{
- struct xe_device *xe = xe_bo_device(bo);
-
- return xe_device_in_fault_mode(xe) && bo->props.cpu_atomic;
-}
-
static vm_fault_t xe_gem_fault(struct vm_fault *vmf)
{
struct ttm_buffer_object *tbo = vmf->vma->vm_private_data;
@@ -1141,7 +1112,7 @@ static vm_fault_t xe_gem_fault(struct vm_fault *vmf)
struct xe_bo *bo = ttm_to_xe_bo(tbo);
bool needs_rpm = bo->flags & XE_BO_CREATE_VRAM_MASK;
vm_fault_t ret;
- int idx, r = 0;
+ int idx;
if (needs_rpm)
xe_device_mem_access_get(xe);
@@ -1153,17 +1124,8 @@ static vm_fault_t xe_gem_fault(struct vm_fault *vmf)
if (drm_dev_enter(ddev, &idx)) {
trace_xe_bo_cpu_fault(bo);
- if (should_migrate_to_system(bo)) {
- r = xe_bo_migrate(bo, XE_PL_TT);
- if (r == -EBUSY || r == -ERESTARTSYS || r == -EINTR)
- ret = VM_FAULT_NOPAGE;
- else if (r)
- ret = VM_FAULT_SIGBUS;
- }
- if (!ret)
- ret = ttm_bo_vm_fault_reserved(vmf,
- vmf->vma->vm_page_prot,
- TTM_BO_VM_NUM_PREFAULT);
+ ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot,
+ TTM_BO_VM_NUM_PREFAULT);
drm_dev_exit(idx);
} else {
ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot);
@@ -1291,9 +1253,6 @@ struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
bo->flags = flags;
bo->cpu_caching = cpu_caching;
bo->ttm.base.funcs = &xe_gem_object_funcs;
- bo->props.preferred_mem_class = XE_BO_PROPS_INVALID;
- bo->props.preferred_gt = XE_BO_PROPS_INVALID;
- bo->props.preferred_mem_type = XE_BO_PROPS_INVALID;
bo->ttm.priority = XE_BO_PRIORITY_NORMAL;
INIT_LIST_HEAD(&bo->pinned_link);
#ifdef CONFIG_PROC_FS
diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h
index 14ef13b7b421f3..86422e113d3962 100644
--- a/drivers/gpu/drm/xe/xe_bo_types.h
+++ b/drivers/gpu/drm/xe/xe_bo_types.h
@@ -56,25 +56,6 @@ struct xe_bo {
*/
struct list_head client_link;
#endif
- /** @props: BO user controlled properties */
- struct {
- /** @preferred_mem: preferred memory class for this BO */
- s16 preferred_mem_class;
- /** @prefered_gt: preferred GT for this BO */
- s16 preferred_gt;
- /** @preferred_mem_type: preferred memory type */
- s32 preferred_mem_type;
- /**
- * @cpu_atomic: the CPU expects to do atomics operations to
- * this BO
- */
- bool cpu_atomic;
- /**
- * @device_atomic: the device expects to do atomics operations
- * to this BO
- */
- bool device_atomic;
- } props;
/** @freed: List node for delayed put. */
struct llist_node freed;
/** @created: Whether the bo has passed initial creation */
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index ca85e81fdb4438..d32ff3857e6583 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -193,6 +193,9 @@ static void xe_device_destroy(struct drm_device *dev, void *dummy)
{
struct xe_device *xe = to_xe_device(dev);
+ if (xe->preempt_fence_wq)
+ destroy_workqueue(xe->preempt_fence_wq);
+
if (xe->ordered_wq)
destroy_workqueue(xe->ordered_wq);
@@ -258,9 +261,15 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
INIT_LIST_HEAD(&xe->pinned.external_vram);
INIT_LIST_HEAD(&xe->pinned.evicted);
+ xe->preempt_fence_wq = alloc_ordered_workqueue("xe-preempt-fence-wq", 0);
xe->ordered_wq = alloc_ordered_workqueue("xe-ordered-wq", 0);
xe->unordered_wq = alloc_workqueue("xe-unordered-wq", 0, 0);
- if (!xe->ordered_wq || !xe->unordered_wq) {
+ if (!xe->ordered_wq || !xe->unordered_wq ||
+ !xe->preempt_fence_wq) {
+ /*
+ * Cleanup done in xe_device_destroy via
+ * drmm_add_action_or_reset register above
+ */
drm_err(&xe->drm, "Failed to allocate xe workqueues\n");
err = -ENOMEM;
goto err;
diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h
index 14be34d9f5434b..d413bc2c6be5a0 100644
--- a/drivers/gpu/drm/xe/xe_device.h
+++ b/drivers/gpu/drm/xe/xe_device.h
@@ -58,7 +58,7 @@ static inline struct xe_tile *xe_device_get_root_tile(struct xe_device *xe)
static inline struct xe_gt *xe_tile_get_gt(struct xe_tile *tile, u8 gt_id)
{
- if (drm_WARN_ON(&tile_to_xe(tile)->drm, gt_id > XE_MAX_GT_PER_TILE))
+ if (drm_WARN_ON(&tile_to_xe(tile)->drm, gt_id >= XE_MAX_GT_PER_TILE))
gt_id = 0;
return gt_id ? tile->media_gt : tile->primary_gt;
@@ -79,7 +79,7 @@ static inline struct xe_gt *xe_device_get_gt(struct xe_device *xe, u8 gt_id)
if (MEDIA_VER(xe) >= 13) {
gt = xe_tile_get_gt(root_tile, gt_id);
} else {
- if (drm_WARN_ON(&xe->drm, gt_id > XE_MAX_TILES_PER_DEVICE))
+ if (drm_WARN_ON(&xe->drm, gt_id >= XE_MAX_TILES_PER_DEVICE))
gt_id = 0;
gt = xe->tiles[gt_id].primary_gt;
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 9785eef2e5a4e6..8e3a222b41cf0a 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -363,6 +363,9 @@ struct xe_device {
/** @ufence_wq: user fence wait queue */
wait_queue_head_t ufence_wq;
+ /** @preempt_fence_wq: used to serialize preempt fences */
+ struct workqueue_struct *preempt_fence_wq;
+
/** @ordered_wq: used to serialize compute mode resume */
struct workqueue_struct *ordered_wq;
diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
index 826c8b38967250..cc5e0f75de3c73 100644
--- a/drivers/gpu/drm/xe/xe_exec.c
+++ b/drivers/gpu/drm/xe/xe_exec.c
@@ -94,48 +94,16 @@
* Unlock all
*/
+/*
+ * Add validation and rebinding to the drm_exec locking loop, since both can
+ * trigger eviction which may require sleeping dma_resv locks.
+ */
static int xe_exec_fn(struct drm_gpuvm_exec *vm_exec)
{
struct xe_vm *vm = container_of(vm_exec->vm, struct xe_vm, gpuvm);
- struct drm_gem_object *obj;
- unsigned long index;
- int num_fences;
- int ret;
-
- ret = drm_gpuvm_validate(vm_exec->vm, &vm_exec->exec);
- if (ret)
- return ret;
-
- /*
- * 1 fence slot for the final submit, and 1 more for every per-tile for
- * GPU bind and 1 extra for CPU bind. Note that there are potentially
- * many vma per object/dma-resv, however the fence slot will just be
- * re-used, since they are largely the same timeline and the seqno
- * should be in order. In the case of CPU bind there is dummy fence used
- * for all CPU binds, so no need to have a per-tile slot for that.
- */
- num_fences = 1 + 1 + vm->xe->info.tile_count;
- /*
- * We don't know upfront exactly how many fence slots we will need at
- * the start of the exec, since the TTM bo_validate above can consume
- * numerous fence slots. Also due to how the dma_resv_reserve_fences()
- * works it only ensures that at least that many fence slots are
- * available i.e if there are already 10 slots available and we reserve
- * two more, it can just noop without reserving anything. With this it
- * is quite possible that TTM steals some of the fence slots and then
- * when it comes time to do the vma binding and final exec stage we are
- * lacking enough fence slots, leading to some nasty BUG_ON() when
- * adding the fences. Hence just add our own fences here, after the
- * validate stage.
- */
- drm_exec_for_each_locked_object(&vm_exec->exec, index, obj) {
- ret = dma_resv_reserve_fences(obj->resv, num_fences);
- if (ret)
- return ret;
- }
-
- return 0;
+ /* The fence slot added here is intended for the exec sched job. */
+ return xe_vm_validate_rebind(vm, &vm_exec->exec, 1);
}
int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
@@ -152,7 +120,6 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
struct drm_exec *exec = &vm_exec.exec;
u32 i, num_syncs = 0, num_ufence = 0;
struct xe_sched_job *job;
- struct dma_fence *rebind_fence;
struct xe_vm *vm;
bool write_locked, skip_retry = false;
ktime_t end = 0;
@@ -290,39 +257,7 @@ retry:
goto err_exec;
}
- /*
- * Rebind any invalidated userptr or evicted BOs in the VM, non-compute
- * VM mode only.
- */
- rebind_fence = xe_vm_rebind(vm, false);
- if (IS_ERR(rebind_fence)) {
- err = PTR_ERR(rebind_fence);
- goto err_put_job;
- }
-
- /*
- * We store the rebind_fence in the VM so subsequent execs don't get
- * scheduled before the rebinds of userptrs / evicted BOs is complete.
- */
- if (rebind_fence) {
- dma_fence_put(vm->rebind_fence);
- vm->rebind_fence = rebind_fence;
- }
- if (vm->rebind_fence) {
- if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
- &vm->rebind_fence->flags)) {
- dma_fence_put(vm->rebind_fence);
- vm->rebind_fence = NULL;
- } else {
- dma_fence_get(vm->rebind_fence);
- err = drm_sched_job_add_dependency(&job->drm,
- vm->rebind_fence);
- if (err)
- goto err_put_job;
- }
- }
-
- /* Wait behind munmap style rebinds */
+ /* Wait behind rebinds */
if (!xe_vm_in_lr_mode(vm)) {
err = drm_sched_job_add_resv_dependencies(&job->drm,
xe_vm_resv(vm),
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index 11e150f4c0c1f7..ead25d5e723ea5 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -448,7 +448,7 @@ find_hw_engine(struct xe_device *xe,
{
u32 idx;
- if (eci.engine_class > ARRAY_SIZE(user_to_xe_engine_class))
+ if (eci.engine_class >= ARRAY_SIZE(user_to_xe_engine_class))
return NULL;
if (eci.gt_id >= xe->info.gt_count)
diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h
index 62b3d9d1d7cdd4..462b331950320c 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue_types.h
+++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
@@ -148,6 +148,11 @@ struct xe_exec_queue {
const struct xe_ring_ops *ring_ops;
/** @entity: DRM sched entity for this exec queue (1 to 1 relationship) */
struct drm_sched_entity *entity;
+ /**
+ * @tlb_flush_seqno: The seqno of the last rebind tlb flush performed
+ * Protected by @vm's resv. Unused if @vm == NULL.
+ */
+ u64 tlb_flush_seqno;
/** @lrc: logical ring context for this exec queue */
struct xe_lrc lrc[];
};
diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c
index 241c294270d916..fa9e9853c53ba6 100644
--- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
+++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
@@ -100,10 +100,9 @@ static int xe_pf_begin(struct drm_exec *exec, struct xe_vma *vma,
{
struct xe_bo *bo = xe_vma_bo(vma);
struct xe_vm *vm = xe_vma_vm(vma);
- unsigned int num_shared = 2; /* slots for bind + move */
int err;
- err = xe_vm_prepare_vma(exec, vma, num_shared);
+ err = xe_vm_lock_vma(exec, vma);
if (err)
return err;
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
index f03e077f81a04f..e598a4363d0190 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
@@ -61,7 +61,6 @@ int xe_gt_tlb_invalidation_init(struct xe_gt *gt)
INIT_LIST_HEAD(&gt->tlb_invalidation.pending_fences);
spin_lock_init(&gt->tlb_invalidation.pending_lock);
spin_lock_init(&gt->tlb_invalidation.lock);
- gt->tlb_invalidation.fence_context = dma_fence_context_alloc(1);
INIT_DELAYED_WORK(&gt->tlb_invalidation.fence_tdr,
xe_gt_tlb_fence_timeout);
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
index 70c615dd149865..07b2f724ec4568 100644
--- a/drivers/gpu/drm/xe/xe_gt_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -177,13 +177,6 @@ struct xe_gt {
* xe_gt_tlb_fence_timeout after the timeut interval is over.
*/
struct delayed_work fence_tdr;
- /** @tlb_invalidation.fence_context: context for TLB invalidation fences */
- u64 fence_context;
- /**
- * @tlb_invalidation.fence_seqno: seqno to TLB invalidation fences, protected by
- * tlb_invalidation.lock
- */
- u32 fence_seqno;
/** @tlb_invalidation.lock: protects TLB invalidation fences */
spinlock_t lock;
} tlb_invalidation;
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index ff77bc8da1b270..e2a4c3b5e9ff84 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -1220,7 +1220,7 @@ static int guc_exec_queue_init(struct xe_exec_queue *q)
init_waitqueue_head(&ge->suspend_wait);
timeout = (q->vm && xe_vm_in_lr_mode(q->vm)) ? MAX_SCHEDULE_TIMEOUT :
- q->sched_props.job_timeout_ms;
+ msecs_to_jiffies(q->sched_props.job_timeout_ms);
err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops,
get_submit_wq(guc),
q->lrc[0].ring.size / MAX_JOB_SIZE_BYTES, 64,
diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c
index b82233a4160624..9ac7fbe201b3c2 100644
--- a/drivers/gpu/drm/xe/xe_hwmon.c
+++ b/drivers/gpu/drm/xe/xe_hwmon.c
@@ -290,7 +290,7 @@ xe_hwmon_power1_max_interval_show(struct device *dev, struct device_attribute *a
* As y can be < 2, we compute tau4 = (4 | x) << y
* and then add 2 when doing the final right shift to account for units
*/
- tau4 = ((1 << x_w) | x) << y;
+ tau4 = (u64)((1 << x_w) | x) << y;
/* val in hwmon interface units (millisec) */
out = mul_u64_u32_shr(tau4, SF_TIME, hwmon->scl_shift_time + x_w);
@@ -330,7 +330,7 @@ xe_hwmon_power1_max_interval_store(struct device *dev, struct device_attribute *
r = FIELD_PREP(PKG_MAX_WIN, PKG_MAX_WIN_DEFAULT);
x = REG_FIELD_GET(PKG_MAX_WIN_X, r);
y = REG_FIELD_GET(PKG_MAX_WIN_Y, r);
- tau4 = ((1 << x_w) | x) << y;
+ tau4 = (u64)((1 << x_w) | x) << y;
max_win = mul_u64_u32_shr(tau4, SF_TIME, hwmon->scl_shift_time + x_w);
if (val > max_win)
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index 7ad853b0788af4..57066faf575eec 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -97,7 +97,6 @@ static void set_offsets(u32 *regs,
#define REG16(x) \
(((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
(((x) >> 2) & 0x7f)
-#define END 0
{
const u32 base = hwe->mmio_base;
@@ -168,7 +167,7 @@ static const u8 gen12_xcs_offsets[] = {
REG16(0x274),
REG16(0x270),
- END
+ 0
};
static const u8 dg2_xcs_offsets[] = {
@@ -202,7 +201,7 @@ static const u8 dg2_xcs_offsets[] = {
REG16(0x274),
REG16(0x270),
- END
+ 0
};
static const u8 gen12_rcs_offsets[] = {
@@ -298,7 +297,7 @@ static const u8 gen12_rcs_offsets[] = {
REG(0x084),
NOP(1),
- END
+ 0
};
static const u8 xehp_rcs_offsets[] = {
@@ -339,7 +338,7 @@ static const u8 xehp_rcs_offsets[] = {
LRI(1, 0),
REG(0x0c8),
- END
+ 0
};
static const u8 dg2_rcs_offsets[] = {
@@ -382,7 +381,7 @@ static const u8 dg2_rcs_offsets[] = {
LRI(1, 0),
REG(0x0c8),
- END
+ 0
};
static const u8 mtl_rcs_offsets[] = {
@@ -425,7 +424,7 @@ static const u8 mtl_rcs_offsets[] = {
LRI(1, 0),
REG(0x0c8),
- END
+ 0
};
#define XE2_CTX_COMMON \
@@ -471,7 +470,7 @@ static const u8 xe2_rcs_offsets[] = {
LRI(1, 0), /* [0x47] */
REG(0x0c8), /* [0x48] R_PWR_CLK_STATE */
- END
+ 0
};
static const u8 xe2_bcs_offsets[] = {
@@ -482,16 +481,15 @@ static const u8 xe2_bcs_offsets[] = {
REG16(0x200), /* [0x42] BCS_SWCTRL */
REG16(0x204), /* [0x44] BLIT_CCTL */
- END
+ 0
};
static const u8 xe2_xcs_offsets[] = {
XE2_CTX_COMMON,
- END
+ 0
};
-#undef END
#undef REG16
#undef REG
#undef LRI
@@ -527,9 +525,8 @@ static const u8 *reg_offsets(struct xe_device *xe, enum xe_engine_class class)
static void set_context_control(u32 *regs, struct xe_hw_engine *hwe)
{
- regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH) |
- _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) |
- CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT;
+ regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH |
+ CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
/* TODO: Timestamp */
}
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index ee1bb938c49348..2ba4fb9511f63f 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -227,7 +227,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
if (vm->flags & XE_VM_FLAG_64K && level == 1)
flags = XE_PDE_64K;
- entry = vm->pt_ops->pde_encode_bo(bo, map_ofs + (level - 1) *
+ entry = vm->pt_ops->pde_encode_bo(bo, map_ofs + (u64)(level - 1) *
XE_PAGE_SIZE, pat_index);
xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE * level, u64,
entry | flags);
@@ -235,7 +235,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
/* Write PDE's that point to our BO. */
for (i = 0; i < num_entries - num_level; i++) {
- entry = vm->pt_ops->pde_encode_bo(bo, i * XE_PAGE_SIZE,
+ entry = vm->pt_ops->pde_encode_bo(bo, (u64)i * XE_PAGE_SIZE,
pat_index);
xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE +
@@ -291,7 +291,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
#define VM_SA_UPDATE_UNIT_SIZE (XE_PAGE_SIZE / NUM_VMUSA_UNIT_PER_PAGE)
#define NUM_VMUSA_WRITES_PER_UNIT (VM_SA_UPDATE_UNIT_SIZE / sizeof(u64))
drm_suballoc_manager_init(&m->vm_update_sa,
- (map_ofs / XE_PAGE_SIZE - NUM_KERNEL_PDE) *
+ (size_t)(map_ofs / XE_PAGE_SIZE - NUM_KERNEL_PDE) *
NUM_VMUSA_UNIT_PER_PAGE, 0);
m->pt_bo = bo;
@@ -490,7 +490,7 @@ static void emit_pte(struct xe_migrate *m,
struct xe_vm *vm = m->q->vm;
u16 pat_index;
u32 ptes;
- u64 ofs = at_pt * XE_PAGE_SIZE;
+ u64 ofs = (u64)at_pt * XE_PAGE_SIZE;
u64 cur_ofs;
/* Indirect access needs compression enabled uncached PAT index */
diff --git a/drivers/gpu/drm/xe/xe_preempt_fence.c b/drivers/gpu/drm/xe/xe_preempt_fence.c
index 7bce2a332603c0..7d50c6e89d8e7d 100644
--- a/drivers/gpu/drm/xe/xe_preempt_fence.c
+++ b/drivers/gpu/drm/xe/xe_preempt_fence.c
@@ -49,7 +49,7 @@ static bool preempt_fence_enable_signaling(struct dma_fence *fence)
struct xe_exec_queue *q = pfence->q;
pfence->error = q->ops->suspend(q);
- queue_work(system_unbound_wq, &pfence->preempt_work);
+ queue_work(q->vm->xe->preempt_fence_wq, &pfence->preempt_work);
return true;
}
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index 7f54bc3e389d58..4efc8c1a3d7a99 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -1135,8 +1135,7 @@ static int invalidation_fence_init(struct xe_gt *gt,
spin_lock_irq(&gt->tlb_invalidation.lock);
dma_fence_init(&ifence->base.base, &invalidation_fence_ops,
&gt->tlb_invalidation.lock,
- gt->tlb_invalidation.fence_context,
- ++gt->tlb_invalidation.fence_seqno);
+ dma_fence_context_alloc(1), 1);
spin_unlock_irq(&gt->tlb_invalidation.lock);
INIT_LIST_HEAD(&ifence->base.link);
@@ -1236,6 +1235,13 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue
err = xe_pt_prepare_bind(tile, vma, entries, &num_entries);
if (err)
goto err;
+
+ err = dma_resv_reserve_fences(xe_vm_resv(vm), 1);
+ if (!err && !xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm)
+ err = dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv, 1);
+ if (err)
+ goto err;
+
xe_tile_assert(tile, num_entries <= ARRAY_SIZE(entries));
xe_vm_dbg_print_entries(tile_to_xe(tile), entries, num_entries);
@@ -1254,11 +1260,13 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue
* non-faulting LR, in particular on user-space batch buffer chaining,
* it needs to be done here.
*/
- if ((rebind && !xe_vm_in_lr_mode(vm) && !vm->batch_invalidate_tlb) ||
- (!rebind && xe_vm_has_scratch(vm) && xe_vm_in_preempt_fence_mode(vm))) {
+ if ((!rebind && xe_vm_has_scratch(vm) && xe_vm_in_preempt_fence_mode(vm))) {
ifence = kzalloc(sizeof(*ifence), GFP_KERNEL);
if (!ifence)
return ERR_PTR(-ENOMEM);
+ } else if (rebind && !xe_vm_in_lr_mode(vm)) {
+ /* We bump also if batch_invalidate_tlb is true */
+ vm->tlb_flush_seqno++;
}
rfence = kzalloc(sizeof(*rfence), GFP_KERNEL);
@@ -1297,7 +1305,7 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue
}
/* add shared fence now for pagetable delayed destroy */
- dma_resv_add_fence(xe_vm_resv(vm), fence, !rebind &&
+ dma_resv_add_fence(xe_vm_resv(vm), fence, rebind ||
last_munmap_rebind ?
DMA_RESV_USAGE_KERNEL :
DMA_RESV_USAGE_BOOKKEEP);
@@ -1576,6 +1584,7 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queu
struct dma_fence *fence = NULL;
struct invalidation_fence *ifence;
struct xe_range_fence *rfence;
+ int err;
LLIST_HEAD(deferred);
@@ -1593,6 +1602,12 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queu
xe_pt_calc_rfence_interval(vma, &unbind_pt_update, entries,
num_entries);
+ err = dma_resv_reserve_fences(xe_vm_resv(vm), 1);
+ if (!err && !xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm)
+ err = dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv, 1);
+ if (err)
+ return ERR_PTR(err);
+
ifence = kzalloc(sizeof(*ifence), GFP_KERNEL);
if (!ifence)
return ERR_PTR(-ENOMEM);
diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index 92bb06c0586eb4..075f9eaef03122 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -132,7 +132,7 @@ query_engine_cycles(struct xe_device *xe,
return -EINVAL;
eci = &resp.eci;
- if (eci->gt_id > XE_MAX_GT_PER_TILE)
+ if (eci->gt_id >= XE_MAX_GT_PER_TILE)
return -EINVAL;
gt = xe_device_get_gt(xe, eci->gt_id);
diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
index c4edffcd4a3206..5b2b37b598130a 100644
--- a/drivers/gpu/drm/xe/xe_ring_ops.c
+++ b/drivers/gpu/drm/xe/xe_ring_ops.c
@@ -219,10 +219,9 @@ static void __emit_job_gen12_simple(struct xe_sched_job *job, struct xe_lrc *lrc
{
u32 dw[MAX_JOB_SIZE_DW], i = 0;
u32 ppgtt_flag = get_ppgtt_flag(job);
- struct xe_vm *vm = job->q->vm;
struct xe_gt *gt = job->q->gt;
- if (vm && vm->batch_invalidate_tlb) {
+ if (job->ring_ops_flush_tlb) {
dw[i++] = preparser_disable(true);
i = emit_flush_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
seqno, true, dw, i);
@@ -270,7 +269,6 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc,
struct xe_gt *gt = job->q->gt;
struct xe_device *xe = gt_to_xe(gt);
bool decode = job->q->class == XE_ENGINE_CLASS_VIDEO_DECODE;
- struct xe_vm *vm = job->q->vm;
dw[i++] = preparser_disable(true);
@@ -282,13 +280,13 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc,
i = emit_aux_table_inv(gt, VE0_AUX_INV, dw, i);
}
- if (vm && vm->batch_invalidate_tlb)
+ if (job->ring_ops_flush_tlb)
i = emit_flush_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
seqno, true, dw, i);
dw[i++] = preparser_disable(false);
- if (!vm || !vm->batch_invalidate_tlb)
+ if (!job->ring_ops_flush_tlb)
i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
seqno, dw, i);
@@ -317,7 +315,6 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job,
struct xe_gt *gt = job->q->gt;
struct xe_device *xe = gt_to_xe(gt);
bool lacks_render = !(gt->info.engine_mask & XE_HW_ENGINE_RCS_MASK);
- struct xe_vm *vm = job->q->vm;
u32 mask_flags = 0;
dw[i++] = preparser_disable(true);
@@ -327,7 +324,7 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job,
mask_flags = PIPE_CONTROL_3D_ENGINE_FLAGS;
/* See __xe_pt_bind_vma() for a discussion on TLB invalidations. */
- i = emit_pipe_invalidate(mask_flags, vm && vm->batch_invalidate_tlb, dw, i);
+ i = emit_pipe_invalidate(mask_flags, job->ring_ops_flush_tlb, dw, i);
/* hsdes: 1809175790 */
if (has_aux_ccs(xe))
diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c
index 8151ddafb94075..b0c7fa4693cfe4 100644
--- a/drivers/gpu/drm/xe/xe_sched_job.c
+++ b/drivers/gpu/drm/xe/xe_sched_job.c
@@ -250,6 +250,16 @@ bool xe_sched_job_completed(struct xe_sched_job *job)
void xe_sched_job_arm(struct xe_sched_job *job)
{
+ struct xe_exec_queue *q = job->q;
+ struct xe_vm *vm = q->vm;
+
+ if (vm && !xe_sched_job_is_migration(q) && !xe_vm_in_lr_mode(vm) &&
+ (vm->batch_invalidate_tlb || vm->tlb_flush_seqno != q->tlb_flush_seqno)) {
+ xe_vm_assert_held(vm);
+ q->tlb_flush_seqno = vm->tlb_flush_seqno;
+ job->ring_ops_flush_tlb = true;
+ }
+
drm_sched_job_arm(&job->drm);
}
diff --git a/drivers/gpu/drm/xe/xe_sched_job_types.h b/drivers/gpu/drm/xe/xe_sched_job_types.h
index b1d83da50a53da..5e12724219fdd4 100644
--- a/drivers/gpu/drm/xe/xe_sched_job_types.h
+++ b/drivers/gpu/drm/xe/xe_sched_job_types.h
@@ -39,6 +39,8 @@ struct xe_sched_job {
} user_fence;
/** @migrate_flush_flags: Additional flush flags for migration jobs */
u32 migrate_flush_flags;
+ /** @ring_ops_flush_tlb: The ring ops need to flush TLB before payload. */
+ bool ring_ops_flush_tlb;
/** @batch_addr: batch buffer address of job */
u64 batch_addr[];
};
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index f88faef4142bde..3d4c8f342e215e 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -482,17 +482,53 @@ static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec)
return 0;
}
+/**
+ * xe_vm_validate_rebind() - Validate buffer objects and rebind vmas
+ * @vm: The vm for which we are rebinding.
+ * @exec: The struct drm_exec with the locked GEM objects.
+ * @num_fences: The number of fences to reserve for the operation, not
+ * including rebinds and validations.
+ *
+ * Validates all evicted gem objects and rebinds their vmas. Note that
+ * rebindings may cause evictions and hence the validation-rebind
+ * sequence is rerun until there are no more objects to validate.
+ *
+ * Return: 0 on success, negative error code on error. In particular,
+ * may return -EINTR or -ERESTARTSYS if interrupted, and -EDEADLK if
+ * the drm_exec transaction needs to be restarted.
+ */
+int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec,
+ unsigned int num_fences)
+{
+ struct drm_gem_object *obj;
+ unsigned long index;
+ int ret;
+
+ do {
+ ret = drm_gpuvm_validate(&vm->gpuvm, exec);
+ if (ret)
+ return ret;
+
+ ret = xe_vm_rebind(vm, false);
+ if (ret)
+ return ret;
+ } while (!list_empty(&vm->gpuvm.evict.list));
+
+ drm_exec_for_each_locked_object(exec, index, obj) {
+ ret = dma_resv_reserve_fences(obj->resv, num_fences);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm,
bool *done)
{
int err;
- /*
- * 1 fence for each preempt fence plus a fence for each tile from a
- * possible rebind
- */
- err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, vm->preempt.num_exec_queues +
- vm->xe->info.tile_count);
+ err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, 0);
if (err)
return err;
@@ -507,7 +543,7 @@ static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm,
return 0;
}
- err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, vm->preempt.num_exec_queues);
+ err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, 0);
if (err)
return err;
@@ -515,14 +551,19 @@ static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm,
if (err)
return err;
- return drm_gpuvm_validate(&vm->gpuvm, exec);
+ /*
+ * Add validation and rebinding to the locking loop since both can
+ * cause evictions which may require blocing dma_resv locks.
+ * The fence reservation here is intended for the new preempt fences
+ * we attach at the end of the rebind work.
+ */
+ return xe_vm_validate_rebind(vm, exec, vm->preempt.num_exec_queues);
}
static void preempt_rebind_work_func(struct work_struct *w)
{
struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work);
struct drm_exec exec;
- struct dma_fence *rebind_fence;
unsigned int fence_count = 0;
LIST_HEAD(preempt_fences);
ktime_t end = 0;
@@ -568,18 +609,11 @@ retry:
if (err)
goto out_unlock;
- rebind_fence = xe_vm_rebind(vm, true);
- if (IS_ERR(rebind_fence)) {
- err = PTR_ERR(rebind_fence);
+ err = xe_vm_rebind(vm, true);
+ if (err)
goto out_unlock;
- }
-
- if (rebind_fence) {
- dma_fence_wait(rebind_fence, false);
- dma_fence_put(rebind_fence);
- }
- /* Wait on munmap style VM unbinds */
+ /* Wait on rebinds and munmap style VM unbinds */
wait = dma_resv_wait_timeout(xe_vm_resv(vm),
DMA_RESV_USAGE_KERNEL,
false, MAX_SCHEDULE_TIMEOUT);
@@ -773,14 +807,14 @@ xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
struct xe_sync_entry *syncs, u32 num_syncs,
bool first_op, bool last_op);
-struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
+int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
{
- struct dma_fence *fence = NULL;
+ struct dma_fence *fence;
struct xe_vma *vma, *next;
lockdep_assert_held(&vm->lock);
if (xe_vm_in_lr_mode(vm) && !rebind_worker)
- return NULL;
+ return 0;
xe_vm_assert_held(vm);
list_for_each_entry_safe(vma, next, &vm->rebind_list,
@@ -788,17 +822,17 @@ struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
xe_assert(vm->xe, vma->tile_present);
list_del_init(&vma->combined_links.rebind);
- dma_fence_put(fence);
if (rebind_worker)
trace_xe_vma_rebind_worker(vma);
else
trace_xe_vma_rebind_exec(vma);
fence = xe_vm_bind_vma(vma, NULL, NULL, 0, false, false);
if (IS_ERR(fence))
- return fence;
+ return PTR_ERR(fence);
+ dma_fence_put(fence);
}
- return fence;
+ return 0;
}
static void xe_vma_free(struct xe_vma *vma)
@@ -1004,35 +1038,26 @@ static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence)
}
/**
- * xe_vm_prepare_vma() - drm_exec utility to lock a vma
+ * xe_vm_lock_vma() - drm_exec utility to lock a vma
* @exec: The drm_exec object we're currently locking for.
* @vma: The vma for witch we want to lock the vm resv and any attached
* object's resv.
- * @num_shared: The number of dma-fence slots to pre-allocate in the
- * objects' reservation objects.
*
* Return: 0 on success, negative error code on error. In particular
* may return -EDEADLK on WW transaction contention and -EINTR if
* an interruptible wait is terminated by a signal.
*/
-int xe_vm_prepare_vma(struct drm_exec *exec, struct xe_vma *vma,
- unsigned int num_shared)
+int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma)
{
struct xe_vm *vm = xe_vma_vm(vma);
struct xe_bo *bo = xe_vma_bo(vma);
int err;
XE_WARN_ON(!vm);
- if (num_shared)
- err = drm_exec_prepare_obj(exec, xe_vm_obj(vm), num_shared);
- else
- err = drm_exec_lock_obj(exec, xe_vm_obj(vm));
- if (!err && bo && !bo->vm) {
- if (num_shared)
- err = drm_exec_prepare_obj(exec, &bo->ttm.base, num_shared);
- else
- err = drm_exec_lock_obj(exec, &bo->ttm.base);
- }
+
+ err = drm_exec_lock_obj(exec, xe_vm_obj(vm));
+ if (!err && bo && !bo->vm)
+ err = drm_exec_lock_obj(exec, &bo->ttm.base);
return err;
}
@@ -1044,7 +1069,7 @@ static void xe_vma_destroy_unlocked(struct xe_vma *vma)
drm_exec_init(&exec, 0, 0);
drm_exec_until_all_locked(&exec) {
- err = xe_vm_prepare_vma(&exec, vma, 0);
+ err = xe_vm_lock_vma(&exec, vma);
drm_exec_retry_on_contention(&exec);
if (XE_WARN_ON(err))
break;
@@ -1552,6 +1577,16 @@ void xe_vm_close_and_put(struct xe_vm *vm)
xe->usm.num_vm_in_fault_mode--;
else if (!(vm->flags & XE_VM_FLAG_MIGRATION))
xe->usm.num_vm_in_non_fault_mode--;
+
+ if (vm->usm.asid) {
+ void *lookup;
+
+ xe_assert(xe, xe->info.has_asid);
+ xe_assert(xe, !(vm->flags & XE_VM_FLAG_MIGRATION));
+
+ lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid);
+ xe_assert(xe, lookup == vm);
+ }
mutex_unlock(&xe->usm.lock);
for_each_tile(tile, xe, id)
@@ -1567,29 +1602,19 @@ static void vm_destroy_work_func(struct work_struct *w)
struct xe_device *xe = vm->xe;
struct xe_tile *tile;
u8 id;
- void *lookup;
/* xe_vm_close_and_put was not called? */
xe_assert(xe, !vm->size);
mutex_destroy(&vm->snap_mutex);
- if (!(vm->flags & XE_VM_FLAG_MIGRATION)) {
+ if (!(vm->flags & XE_VM_FLAG_MIGRATION))
xe_device_mem_access_put(xe);
- if (xe->info.has_asid && vm->usm.asid) {
- mutex_lock(&xe->usm.lock);
- lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid);
- xe_assert(xe, lookup == vm);
- mutex_unlock(&xe->usm.lock);
- }
- }
-
for_each_tile(tile, xe, id)
XE_WARN_ON(vm->pt_root[id]);
trace_xe_vm_free(vm);
- dma_fence_put(vm->rebind_fence);
kfree(vm);
}
@@ -2512,7 +2537,7 @@ static int op_execute(struct drm_exec *exec, struct xe_vm *vm,
lockdep_assert_held_write(&vm->lock);
- err = xe_vm_prepare_vma(exec, vma, 1);
+ err = xe_vm_lock_vma(exec, vma);
if (err)
return err;
diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
index 6df1f1c7f85d98..306cd0934a190b 100644
--- a/drivers/gpu/drm/xe/xe_vm.h
+++ b/drivers/gpu/drm/xe/xe_vm.h
@@ -207,7 +207,7 @@ int __xe_vm_userptr_needs_repin(struct xe_vm *vm);
int xe_vm_userptr_check_repin(struct xe_vm *vm);
-struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker);
+int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker);
int xe_vm_invalidate_vma(struct xe_vma *vma);
@@ -242,8 +242,10 @@ bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end);
int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id);
-int xe_vm_prepare_vma(struct drm_exec *exec, struct xe_vma *vma,
- unsigned int num_shared);
+int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma);
+
+int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec,
+ unsigned int num_fences);
/**
* xe_vm_resv() - Return's the vm's reservation object
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index ae5fb565f6bf48..badf3945083d56 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -177,9 +177,6 @@ struct xe_vm {
*/
struct list_head rebind_list;
- /** @rebind_fence: rebind fence from execbuf */
- struct dma_fence *rebind_fence;
-
/**
* @destroy_work: worker to destroy VM, needed as a dma_fence signaling
* from an irq context can be last put and the destroy needs to be able
@@ -264,6 +261,11 @@ struct xe_vm {
bool capture_once;
} error_capture;
+ /**
+ * @tlb_flush_seqno: Required TLB flush seqno for the next exec.
+ * protected by the vm resv.
+ */
+ u64 tlb_flush_seqno;
/** @batch_invalidate_tlb: Always invalidate TLB before batch start */
bool batch_invalidate_tlb;
/** @xef: XE file handle for tracking this VM's drm client */
diff --git a/drivers/gpu/host1x/bus.c b/drivers/gpu/host1x/bus.c
index 783975d1384fc4..7c52757a89db9a 100644
--- a/drivers/gpu/host1x/bus.c
+++ b/drivers/gpu/host1x/bus.c
@@ -351,11 +351,6 @@ static int host1x_device_uevent(const struct device *dev,
return 0;
}
-static int host1x_dma_configure(struct device *dev)
-{
- return of_dma_configure(dev, dev->of_node, true);
-}
-
static const struct dev_pm_ops host1x_device_pm_ops = {
.suspend = pm_generic_suspend,
.resume = pm_generic_resume,
@@ -369,7 +364,6 @@ const struct bus_type host1x_bus_type = {
.name = "host1x",
.match = host1x_device_match,
.uevent = host1x_device_uevent,
- .dma_configure = host1x_dma_configure,
.pm = &host1x_device_pm_ops,
};
@@ -458,8 +452,6 @@ static int host1x_device_add(struct host1x *host1x,
device->dev.bus = &host1x_bus_type;
device->dev.parent = host1x->dev;
- of_dma_configure(&device->dev, host1x->dev->of_node, true);
-
device->dev.dma_parms = &device->dma_parms;
dma_set_max_seg_size(&device->dev, UINT_MAX);
diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index adbf674355b2b8..fb8cd8469328ee 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -153,7 +153,9 @@ void vmbus_free_ring(struct vmbus_channel *channel)
hv_ringbuffer_cleanup(&channel->inbound);
if (channel->ringbuffer_page) {
- __free_pages(channel->ringbuffer_page,
+ /* In a CoCo VM leak the memory if it didn't get re-encrypted */
+ if (!channel->ringbuffer_gpadlhandle.decrypted)
+ __free_pages(channel->ringbuffer_page,
get_order(channel->ringbuffer_pagecount
<< PAGE_SHIFT));
channel->ringbuffer_page = NULL;
@@ -436,9 +438,18 @@ static int __vmbus_establish_gpadl(struct vmbus_channel *channel,
(atomic_inc_return(&vmbus_connection.next_gpadl_handle) - 1);
ret = create_gpadl_header(type, kbuffer, size, send_offset, &msginfo);
- if (ret)
+ if (ret) {
+ gpadl->decrypted = false;
return ret;
+ }
+ /*
+ * Set the "decrypted" flag to true for the set_memory_decrypted()
+ * success case. In the failure case, the encryption state of the
+ * memory is unknown. Leave "decrypted" as true to ensure the
+ * memory will be leaked instead of going back on the free list.
+ */
+ gpadl->decrypted = true;
ret = set_memory_decrypted((unsigned long)kbuffer,
PFN_UP(size));
if (ret) {
@@ -527,9 +538,15 @@ cleanup:
kfree(msginfo);
- if (ret)
- set_memory_encrypted((unsigned long)kbuffer,
- PFN_UP(size));
+ if (ret) {
+ /*
+ * If set_memory_encrypted() fails, the decrypted flag is
+ * left as true so the memory is leaked instead of being
+ * put back on the free list.
+ */
+ if (!set_memory_encrypted((unsigned long)kbuffer, PFN_UP(size)))
+ gpadl->decrypted = false;
+ }
return ret;
}
@@ -850,6 +867,8 @@ post_msg_err:
if (ret)
pr_warn("Fail to set mem host visibility in GPADL teardown %d.\n", ret);
+ gpadl->decrypted = ret;
+
return ret;
}
EXPORT_SYMBOL_GPL(vmbus_teardown_gpadl);
diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index 3cabeeabb1cacf..f001ae880e1dbe 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -237,8 +237,17 @@ int vmbus_connect(void)
vmbus_connection.monitor_pages[0], 1);
ret |= set_memory_decrypted((unsigned long)
vmbus_connection.monitor_pages[1], 1);
- if (ret)
+ if (ret) {
+ /*
+ * If set_memory_decrypted() fails, the encryption state
+ * of the memory is unknown. So leak the memory instead
+ * of risking returning decrypted memory to the free list.
+ * For simplicity, always handle both pages the same.
+ */
+ vmbus_connection.monitor_pages[0] = NULL;
+ vmbus_connection.monitor_pages[1] = NULL;
goto cleanup;
+ }
/*
* Set_memory_decrypted() will change the memory contents if
@@ -337,13 +346,19 @@ void vmbus_disconnect(void)
vmbus_connection.int_page = NULL;
}
- set_memory_encrypted((unsigned long)vmbus_connection.monitor_pages[0], 1);
- set_memory_encrypted((unsigned long)vmbus_connection.monitor_pages[1], 1);
+ if (vmbus_connection.monitor_pages[0]) {
+ if (!set_memory_encrypted(
+ (unsigned long)vmbus_connection.monitor_pages[0], 1))
+ hv_free_hyperv_page(vmbus_connection.monitor_pages[0]);
+ vmbus_connection.monitor_pages[0] = NULL;
+ }
- hv_free_hyperv_page(vmbus_connection.monitor_pages[0]);
- hv_free_hyperv_page(vmbus_connection.monitor_pages[1]);
- vmbus_connection.monitor_pages[0] = NULL;
- vmbus_connection.monitor_pages[1] = NULL;
+ if (vmbus_connection.monitor_pages[1]) {
+ if (!set_memory_encrypted(
+ (unsigned long)vmbus_connection.monitor_pages[1], 1))
+ hv_free_hyperv_page(vmbus_connection.monitor_pages[1]);
+ vmbus_connection.monitor_pages[1] = NULL;
+ }
}
/*
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 4cb17603a8289b..12a707ab73f85c 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -131,7 +131,7 @@ static ssize_t id_show(struct device *dev, struct device_attribute *dev_attr,
if (!hv_dev->channel)
return -ENODEV;
- return sprintf(buf, "%d\n", hv_dev->channel->offermsg.child_relid);
+ return sysfs_emit(buf, "%d\n", hv_dev->channel->offermsg.child_relid);
}
static DEVICE_ATTR_RO(id);
@@ -142,7 +142,7 @@ static ssize_t state_show(struct device *dev, struct device_attribute *dev_attr,
if (!hv_dev->channel)
return -ENODEV;
- return sprintf(buf, "%d\n", hv_dev->channel->state);
+ return sysfs_emit(buf, "%d\n", hv_dev->channel->state);
}
static DEVICE_ATTR_RO(state);
@@ -153,7 +153,7 @@ static ssize_t monitor_id_show(struct device *dev,
if (!hv_dev->channel)
return -ENODEV;
- return sprintf(buf, "%d\n", hv_dev->channel->offermsg.monitorid);
+ return sysfs_emit(buf, "%d\n", hv_dev->channel->offermsg.monitorid);
}
static DEVICE_ATTR_RO(monitor_id);
@@ -164,8 +164,8 @@ static ssize_t class_id_show(struct device *dev,
if (!hv_dev->channel)
return -ENODEV;
- return sprintf(buf, "{%pUl}\n",
- &hv_dev->channel->offermsg.offer.if_type);
+ return sysfs_emit(buf, "{%pUl}\n",
+ &hv_dev->channel->offermsg.offer.if_type);
}
static DEVICE_ATTR_RO(class_id);
@@ -176,8 +176,8 @@ static ssize_t device_id_show(struct device *dev,
if (!hv_dev->channel)
return -ENODEV;
- return sprintf(buf, "{%pUl}\n",
- &hv_dev->channel->offermsg.offer.if_instance);
+ return sysfs_emit(buf, "{%pUl}\n",
+ &hv_dev->channel->offermsg.offer.if_instance);
}
static DEVICE_ATTR_RO(device_id);
@@ -186,7 +186,7 @@ static ssize_t modalias_show(struct device *dev,
{
struct hv_device *hv_dev = device_to_hv_device(dev);
- return sprintf(buf, "vmbus:%*phN\n", UUID_SIZE, &hv_dev->dev_type);
+ return sysfs_emit(buf, "vmbus:%*phN\n", UUID_SIZE, &hv_dev->dev_type);
}
static DEVICE_ATTR_RO(modalias);
@@ -199,7 +199,7 @@ static ssize_t numa_node_show(struct device *dev,
if (!hv_dev->channel)
return -ENODEV;
- return sprintf(buf, "%d\n", cpu_to_node(hv_dev->channel->target_cpu));
+ return sysfs_emit(buf, "%d\n", cpu_to_node(hv_dev->channel->target_cpu));
}
static DEVICE_ATTR_RO(numa_node);
#endif
@@ -212,9 +212,8 @@ static ssize_t server_monitor_pending_show(struct device *dev,
if (!hv_dev->channel)
return -ENODEV;
- return sprintf(buf, "%d\n",
- channel_pending(hv_dev->channel,
- vmbus_connection.monitor_pages[0]));
+ return sysfs_emit(buf, "%d\n", channel_pending(hv_dev->channel,
+ vmbus_connection.monitor_pages[0]));
}
static DEVICE_ATTR_RO(server_monitor_pending);
@@ -226,9 +225,8 @@ static ssize_t client_monitor_pending_show(struct device *dev,
if (!hv_dev->channel)
return -ENODEV;
- return sprintf(buf, "%d\n",
- channel_pending(hv_dev->channel,
- vmbus_connection.monitor_pages[1]));
+ return sysfs_emit(buf, "%d\n", channel_pending(hv_dev->channel,
+ vmbus_connection.monitor_pages[1]));
}
static DEVICE_ATTR_RO(client_monitor_pending);
@@ -240,9 +238,8 @@ static ssize_t server_monitor_latency_show(struct device *dev,
if (!hv_dev->channel)
return -ENODEV;
- return sprintf(buf, "%d\n",
- channel_latency(hv_dev->channel,
- vmbus_connection.monitor_pages[0]));
+ return sysfs_emit(buf, "%d\n", channel_latency(hv_dev->channel,
+ vmbus_connection.monitor_pages[0]));
}
static DEVICE_ATTR_RO(server_monitor_latency);
@@ -254,9 +251,8 @@ static ssize_t client_monitor_latency_show(struct device *dev,
if (!hv_dev->channel)
return -ENODEV;
- return sprintf(buf, "%d\n",
- channel_latency(hv_dev->channel,
- vmbus_connection.monitor_pages[1]));
+ return sysfs_emit(buf, "%d\n", channel_latency(hv_dev->channel,
+ vmbus_connection.monitor_pages[1]));
}
static DEVICE_ATTR_RO(client_monitor_latency);
@@ -268,9 +264,8 @@ static ssize_t server_monitor_conn_id_show(struct device *dev,
if (!hv_dev->channel)
return -ENODEV;
- return sprintf(buf, "%d\n",
- channel_conn_id(hv_dev->channel,
- vmbus_connection.monitor_pages[0]));
+ return sysfs_emit(buf, "%d\n", channel_conn_id(hv_dev->channel,
+ vmbus_connection.monitor_pages[0]));
}
static DEVICE_ATTR_RO(server_monitor_conn_id);
@@ -282,9 +277,8 @@ static ssize_t client_monitor_conn_id_show(struct device *dev,
if (!hv_dev->channel)
return -ENODEV;
- return sprintf(buf, "%d\n",
- channel_conn_id(hv_dev->channel,
- vmbus_connection.monitor_pages[1]));
+ return sysfs_emit(buf, "%d\n", channel_conn_id(hv_dev->channel,
+ vmbus_connection.monitor_pages[1]));
}
static DEVICE_ATTR_RO(client_monitor_conn_id);
@@ -303,7 +297,7 @@ static ssize_t out_intr_mask_show(struct device *dev,
if (ret < 0)
return ret;
- return sprintf(buf, "%d\n", outbound.current_interrupt_mask);
+ return sysfs_emit(buf, "%d\n", outbound.current_interrupt_mask);
}
static DEVICE_ATTR_RO(out_intr_mask);
@@ -321,7 +315,7 @@ static ssize_t out_read_index_show(struct device *dev,
&outbound);
if (ret < 0)
return ret;
- return sprintf(buf, "%d\n", outbound.current_read_index);
+ return sysfs_emit(buf, "%d\n", outbound.current_read_index);
}
static DEVICE_ATTR_RO(out_read_index);
@@ -340,7 +334,7 @@ static ssize_t out_write_index_show(struct device *dev,
&outbound);
if (ret < 0)
return ret;
- return sprintf(buf, "%d\n", outbound.current_write_index);
+ return sysfs_emit(buf, "%d\n", outbound.current_write_index);
}
static DEVICE_ATTR_RO(out_write_index);
@@ -359,7 +353,7 @@ static ssize_t out_read_bytes_avail_show(struct device *dev,
&outbound);
if (ret < 0)
return ret;
- return sprintf(buf, "%d\n", outbound.bytes_avail_toread);
+ return sysfs_emit(buf, "%d\n", outbound.bytes_avail_toread);
}
static DEVICE_ATTR_RO(out_read_bytes_avail);
@@ -378,7 +372,7 @@ static ssize_t out_write_bytes_avail_show(struct device *dev,
&outbound);
if (ret < 0)
return ret;
- return sprintf(buf, "%d\n", outbound.bytes_avail_towrite);
+ return sysfs_emit(buf, "%d\n", outbound.bytes_avail_towrite);
}
static DEVICE_ATTR_RO(out_write_bytes_avail);
@@ -396,7 +390,7 @@ static ssize_t in_intr_mask_show(struct device *dev,
if (ret < 0)
return ret;
- return sprintf(buf, "%d\n", inbound.current_interrupt_mask);
+ return sysfs_emit(buf, "%d\n", inbound.current_interrupt_mask);
}
static DEVICE_ATTR_RO(in_intr_mask);
@@ -414,7 +408,7 @@ static ssize_t in_read_index_show(struct device *dev,
if (ret < 0)
return ret;
- return sprintf(buf, "%d\n", inbound.current_read_index);
+ return sysfs_emit(buf, "%d\n", inbound.current_read_index);
}
static DEVICE_ATTR_RO(in_read_index);
@@ -432,7 +426,7 @@ static ssize_t in_write_index_show(struct device *dev,
if (ret < 0)
return ret;
- return sprintf(buf, "%d\n", inbound.current_write_index);
+ return sysfs_emit(buf, "%d\n", inbound.current_write_index);
}
static DEVICE_ATTR_RO(in_write_index);
@@ -451,7 +445,7 @@ static ssize_t in_read_bytes_avail_show(struct device *dev,
if (ret < 0)
return ret;
- return sprintf(buf, "%d\n", inbound.bytes_avail_toread);
+ return sysfs_emit(buf, "%d\n", inbound.bytes_avail_toread);
}
static DEVICE_ATTR_RO(in_read_bytes_avail);
@@ -470,7 +464,7 @@ static ssize_t in_write_bytes_avail_show(struct device *dev,
if (ret < 0)
return ret;
- return sprintf(buf, "%d\n", inbound.bytes_avail_towrite);
+ return sysfs_emit(buf, "%d\n", inbound.bytes_avail_towrite);
}
static DEVICE_ATTR_RO(in_write_bytes_avail);
@@ -480,7 +474,7 @@ static ssize_t channel_vp_mapping_show(struct device *dev,
{
struct hv_device *hv_dev = device_to_hv_device(dev);
struct vmbus_channel *channel = hv_dev->channel, *cur_sc;
- int buf_size = PAGE_SIZE, n_written, tot_written;
+ int n_written;
struct list_head *cur;
if (!channel)
@@ -488,25 +482,21 @@ static ssize_t channel_vp_mapping_show(struct device *dev,
mutex_lock(&vmbus_connection.channel_mutex);
- tot_written = snprintf(buf, buf_size, "%u:%u\n",
- channel->offermsg.child_relid, channel->target_cpu);
+ n_written = sysfs_emit(buf, "%u:%u\n",
+ channel->offermsg.child_relid,
+ channel->target_cpu);
list_for_each(cur, &channel->sc_list) {
- if (tot_written >= buf_size - 1)
- break;
cur_sc = list_entry(cur, struct vmbus_channel, sc_list);
- n_written = scnprintf(buf + tot_written,
- buf_size - tot_written,
- "%u:%u\n",
- cur_sc->offermsg.child_relid,
- cur_sc->target_cpu);
- tot_written += n_written;
+ n_written += sysfs_emit_at(buf, n_written, "%u:%u\n",
+ cur_sc->offermsg.child_relid,
+ cur_sc->target_cpu);
}
mutex_unlock(&vmbus_connection.channel_mutex);
- return tot_written;
+ return n_written;
}
static DEVICE_ATTR_RO(channel_vp_mapping);
@@ -516,7 +506,7 @@ static ssize_t vendor_show(struct device *dev,
{
struct hv_device *hv_dev = device_to_hv_device(dev);
- return sprintf(buf, "0x%x\n", hv_dev->vendor_id);
+ return sysfs_emit(buf, "0x%x\n", hv_dev->vendor_id);
}
static DEVICE_ATTR_RO(vendor);
@@ -526,7 +516,7 @@ static ssize_t device_show(struct device *dev,
{
struct hv_device *hv_dev = device_to_hv_device(dev);
- return sprintf(buf, "0x%x\n", hv_dev->device_id);
+ return sysfs_emit(buf, "0x%x\n", hv_dev->device_id);
}
static DEVICE_ATTR_RO(device);
@@ -551,7 +541,7 @@ static ssize_t driver_override_show(struct device *dev,
ssize_t len;
device_lock(dev);
- len = snprintf(buf, PAGE_SIZE, "%s\n", hv_dev->driver_override);
+ len = sysfs_emit(buf, "%s\n", hv_dev->driver_override);
device_unlock(dev);
return len;
diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c
index a6861660cb8ca7..79870dd7a0146e 100644
--- a/drivers/i2c/busses/i2c-i801.c
+++ b/drivers/i2c/busses/i2c-i801.c
@@ -536,11 +536,12 @@ static int i801_block_transaction_by_block(struct i801_priv *priv,
if (read_write == I2C_SMBUS_READ ||
command == I2C_SMBUS_BLOCK_PROC_CALL) {
- status = i801_get_block_len(priv);
- if (status < 0)
+ len = i801_get_block_len(priv);
+ if (len < 0) {
+ status = len;
goto out;
+ }
- len = status;
data->block[0] = len;
inb_p(SMBHSTCNT(priv)); /* reset the data buffer index */
for (i = 0; i < len; i++)
diff --git a/drivers/i2c/busses/i2c-pxa.c b/drivers/i2c/busses/i2c-pxa.c
index 76f79b68cef845..888ca636f3f3b0 100644
--- a/drivers/i2c/busses/i2c-pxa.c
+++ b/drivers/i2c/busses/i2c-pxa.c
@@ -324,6 +324,7 @@ static void decode_ISR(unsigned int val)
decode_bits(KERN_DEBUG "ISR", isr_bits, ARRAY_SIZE(isr_bits), val);
}
+#ifdef CONFIG_I2C_PXA_SLAVE
static const struct bits icr_bits[] = {
PXA_BIT(ICR_START, "START", NULL),
PXA_BIT(ICR_STOP, "STOP", NULL),
@@ -342,7 +343,6 @@ static const struct bits icr_bits[] = {
PXA_BIT(ICR_UR, "UR", "ur"),
};
-#ifdef CONFIG_I2C_PXA_SLAVE
static void decode_ICR(unsigned int val)
{
decode_bits(KERN_DEBUG "ICR", icr_bits, ARRAY_SIZE(icr_bits), val);
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index e7a44929f0daf7..ac6754a85f3507 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -3228,30 +3228,33 @@ out:
static void iommu_snp_enable(void)
{
#ifdef CONFIG_KVM_AMD_SEV
- if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+ if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
return;
/*
* The SNP support requires that IOMMU must be enabled, and is
- * not configured in the passthrough mode.
+ * configured with V1 page table (DTE[Mode] = 0 is not supported).
*/
if (no_iommu || iommu_default_passthrough()) {
- pr_err("SNP: IOMMU disabled or configured in passthrough mode, SNP cannot be supported.\n");
- return;
+ pr_warn("SNP: IOMMU disabled or configured in passthrough mode, SNP cannot be supported.\n");
+ goto disable_snp;
+ }
+
+ if (amd_iommu_pgtable != AMD_IOMMU_V1) {
+ pr_warn("SNP: IOMMU is configured with V2 page table mode, SNP cannot be supported.\n");
+ goto disable_snp;
}
amd_iommu_snp_en = check_feature(FEATURE_SNP);
if (!amd_iommu_snp_en) {
- pr_err("SNP: IOMMU SNP feature not enabled, SNP cannot be supported.\n");
- return;
+ pr_warn("SNP: IOMMU SNP feature not enabled, SNP cannot be supported.\n");
+ goto disable_snp;
}
pr_info("IOMMU SNP support enabled.\n");
+ return;
- /* Enforce IOMMU v1 pagetable when SNP is enabled. */
- if (amd_iommu_pgtable != AMD_IOMMU_V1) {
- pr_warn("Forcing use of AMD IOMMU v1 page table due to SNP.\n");
- amd_iommu_pgtable = AMD_IOMMU_V1;
- }
+disable_snp:
+ cc_platform_clear(CC_ATTR_HOST_SEV_SNP);
#endif
}
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index d35c1b8c8e65ce..e692217fcb2801 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -1692,26 +1692,29 @@ int amd_iommu_complete_ppr(struct pci_dev *pdev, u32 pasid,
static u16 domain_id_alloc(void)
{
+ unsigned long flags;
int id;
- spin_lock(&pd_bitmap_lock);
+ spin_lock_irqsave(&pd_bitmap_lock, flags);
id = find_first_zero_bit(amd_iommu_pd_alloc_bitmap, MAX_DOMAIN_ID);
BUG_ON(id == 0);
if (id > 0 && id < MAX_DOMAIN_ID)
__set_bit(id, amd_iommu_pd_alloc_bitmap);
else
id = 0;
- spin_unlock(&pd_bitmap_lock);
+ spin_unlock_irqrestore(&pd_bitmap_lock, flags);
return id;
}
static void domain_id_free(int id)
{
- spin_lock(&pd_bitmap_lock);
+ unsigned long flags;
+
+ spin_lock_irqsave(&pd_bitmap_lock, flags);
if (id > 0 && id < MAX_DOMAIN_ID)
__clear_bit(id, amd_iommu_pd_alloc_bitmap);
- spin_unlock(&pd_bitmap_lock);
+ spin_unlock_irqrestore(&pd_bitmap_lock, flags);
}
static void free_gcr3_tbl_level1(u64 *tbl)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 5ed036225e69bb..41f93c3ab160d0 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -1139,7 +1139,8 @@ static void arm_smmu_write_ste(struct arm_smmu_master *master, u32 sid,
* requires a breaking update, zero the V bit, write all qwords
* but 0, then set qword 0
*/
- unused_update.data[0] = entry->data[0] & (~STRTAB_STE_0_V);
+ unused_update.data[0] = entry->data[0] &
+ cpu_to_le64(~STRTAB_STE_0_V);
entry_set(smmu, sid, entry, &unused_update, 0, 1);
entry_set(smmu, sid, entry, target, 1, num_entry_qwords - 1);
entry_set(smmu, sid, entry, target, 0, 1);
@@ -1453,14 +1454,17 @@ static void arm_smmu_make_abort_ste(struct arm_smmu_ste *target)
FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT));
}
-static void arm_smmu_make_bypass_ste(struct arm_smmu_ste *target)
+static void arm_smmu_make_bypass_ste(struct arm_smmu_device *smmu,
+ struct arm_smmu_ste *target)
{
memset(target, 0, sizeof(*target));
target->data[0] = cpu_to_le64(
STRTAB_STE_0_V |
FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS));
- target->data[1] = cpu_to_le64(
- FIELD_PREP(STRTAB_STE_1_SHCFG, STRTAB_STE_1_SHCFG_INCOMING));
+
+ if (smmu->features & ARM_SMMU_FEAT_ATTR_TYPES_OVR)
+ target->data[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
+ STRTAB_STE_1_SHCFG_INCOMING));
}
static void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target,
@@ -1523,6 +1527,7 @@ static void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target,
typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr =
&pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
u64 vtcr_val;
+ struct arm_smmu_device *smmu = master->smmu;
memset(target, 0, sizeof(*target));
target->data[0] = cpu_to_le64(
@@ -1531,9 +1536,11 @@ static void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target,
target->data[1] = cpu_to_le64(
FIELD_PREP(STRTAB_STE_1_EATS,
- master->ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0) |
- FIELD_PREP(STRTAB_STE_1_SHCFG,
- STRTAB_STE_1_SHCFG_INCOMING));
+ master->ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0));
+
+ if (smmu->features & ARM_SMMU_FEAT_ATTR_TYPES_OVR)
+ target->data[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
+ STRTAB_STE_1_SHCFG_INCOMING));
vtcr_val = FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) |
FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) |
@@ -1560,7 +1567,8 @@ static void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target,
* This can safely directly manipulate the STE memory without a sync sequence
* because the STE table has not been installed in the SMMU yet.
*/
-static void arm_smmu_init_initial_stes(struct arm_smmu_ste *strtab,
+static void arm_smmu_init_initial_stes(struct arm_smmu_device *smmu,
+ struct arm_smmu_ste *strtab,
unsigned int nent)
{
unsigned int i;
@@ -1569,7 +1577,7 @@ static void arm_smmu_init_initial_stes(struct arm_smmu_ste *strtab,
if (disable_bypass)
arm_smmu_make_abort_ste(strtab);
else
- arm_smmu_make_bypass_ste(strtab);
+ arm_smmu_make_bypass_ste(smmu, strtab);
strtab++;
}
}
@@ -1597,7 +1605,7 @@ static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
return -ENOMEM;
}
- arm_smmu_init_initial_stes(desc->l2ptr, 1 << STRTAB_SPLIT);
+ arm_smmu_init_initial_stes(smmu, desc->l2ptr, 1 << STRTAB_SPLIT);
arm_smmu_write_strtab_l1_desc(strtab, desc);
return 0;
}
@@ -2637,8 +2645,9 @@ static int arm_smmu_attach_dev_identity(struct iommu_domain *domain,
struct device *dev)
{
struct arm_smmu_ste ste;
+ struct arm_smmu_master *master = dev_iommu_priv_get(dev);
- arm_smmu_make_bypass_ste(&ste);
+ arm_smmu_make_bypass_ste(master->smmu, &ste);
return arm_smmu_attach_dev_ste(dev, &ste);
}
@@ -3264,7 +3273,7 @@ static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
cfg->strtab_base_cfg = reg;
- arm_smmu_init_initial_stes(strtab, cfg->num_l1_ents);
+ arm_smmu_init_initial_stes(smmu, strtab, cfg->num_l1_ents);
return 0;
}
@@ -3777,6 +3786,9 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
return -ENXIO;
}
+ if (reg & IDR1_ATTR_TYPES_OVR)
+ smmu->features |= ARM_SMMU_FEAT_ATTR_TYPES_OVR;
+
/* Queue sizes, capped to ensure natural alignment */
smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
FIELD_GET(IDR1_CMDQS, reg));
@@ -3992,7 +4004,7 @@ static void arm_smmu_rmr_install_bypass_ste(struct arm_smmu_device *smmu)
* STE table is not programmed to HW, see
* arm_smmu_initial_bypass_stes()
*/
- arm_smmu_make_bypass_ste(
+ arm_smmu_make_bypass_ste(smmu,
arm_smmu_get_step_for_sid(smmu, rmr->sids[i]));
}
}
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index 23baf117e7e4b5..2a19bb63e5c6d2 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -44,6 +44,7 @@
#define IDR1_TABLES_PRESET (1 << 30)
#define IDR1_QUEUES_PRESET (1 << 29)
#define IDR1_REL (1 << 28)
+#define IDR1_ATTR_TYPES_OVR (1 << 27)
#define IDR1_CMDQS GENMASK(25, 21)
#define IDR1_EVTQS GENMASK(20, 16)
#define IDR1_PRIQS GENMASK(15, 11)
@@ -647,6 +648,7 @@ struct arm_smmu_device {
#define ARM_SMMU_FEAT_SVA (1 << 17)
#define ARM_SMMU_FEAT_E2H (1 << 18)
#define ARM_SMMU_FEAT_NESTING (1 << 19)
+#define ARM_SMMU_FEAT_ATTR_TYPES_OVR (1 << 20)
u32 features;
#define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0)
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 50eb9aed47cc58..a7ecd90303dc42 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -4299,9 +4299,11 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev)
}
dev_iommu_priv_set(dev, info);
- ret = device_rbtree_insert(iommu, info);
- if (ret)
- goto free;
+ if (pdev && pci_ats_supported(pdev)) {
+ ret = device_rbtree_insert(iommu, info);
+ if (ret)
+ goto free;
+ }
if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) {
ret = intel_pasid_alloc_table(dev);
@@ -4336,7 +4338,8 @@ static void intel_iommu_release_device(struct device *dev)
struct intel_iommu *iommu = info->iommu;
mutex_lock(&iommu->iopf_lock);
- device_rbtree_remove(info);
+ if (dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev)))
+ device_rbtree_remove(info);
mutex_unlock(&iommu->iopf_lock);
if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev) &&
diff --git a/drivers/iommu/intel/perfmon.c b/drivers/iommu/intel/perfmon.c
index cf43e798eca499..44083d01852dbf 100644
--- a/drivers/iommu/intel/perfmon.c
+++ b/drivers/iommu/intel/perfmon.c
@@ -438,7 +438,7 @@ static int iommu_pmu_assign_event(struct iommu_pmu *iommu_pmu,
iommu_pmu_set_filter(domain, event->attr.config1,
IOMMU_PMU_FILTER_DOMAIN, idx,
event->attr.config1);
- iommu_pmu_set_filter(pasid, event->attr.config1,
+ iommu_pmu_set_filter(pasid, event->attr.config2,
IOMMU_PMU_FILTER_PASID, idx,
event->attr.config1);
iommu_pmu_set_filter(ats, event->attr.config2,
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index c1bed89b102614..ee3b469e2da155 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -66,7 +66,7 @@ int intel_svm_enable_prq(struct intel_iommu *iommu)
struct page *pages;
int irq, ret;
- pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, PRQ_ORDER);
+ pages = alloc_pages_node(iommu->node, GFP_KERNEL | __GFP_ZERO, PRQ_ORDER);
if (!pages) {
pr_warn("IOMMU: %s: Failed to allocate page request queue\n",
iommu->name);
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 098869007c69e5..a95a483def2d2a 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -3354,6 +3354,7 @@ int iommu_attach_device_pasid(struct iommu_domain *domain,
{
/* Caller must be a probed driver on dev */
struct iommu_group *group = dev->iommu_group;
+ struct group_device *device;
void *curr;
int ret;
@@ -3363,10 +3364,18 @@ int iommu_attach_device_pasid(struct iommu_domain *domain,
if (!group)
return -ENODEV;
- if (!dev_has_iommu(dev) || dev_iommu_ops(dev) != domain->owner)
+ if (!dev_has_iommu(dev) || dev_iommu_ops(dev) != domain->owner ||
+ pasid == IOMMU_NO_PASID)
return -EINVAL;
mutex_lock(&group->mutex);
+ for_each_group_device(group, device) {
+ if (pasid >= device->dev->iommu->max_pasids) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+ }
+
curr = xa_cmpxchg(&group->pasid_array, pasid, NULL, domain, GFP_KERNEL);
if (curr) {
ret = xa_err(curr) ? : -EBUSY;
diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index b8c47f18bc2612..6a2707fe7a78c0 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -1790,6 +1790,7 @@ static const struct of_device_id mtk_iommu_of_ids[] = {
{ .compatible = "mediatek,mt8365-m4u", .data = &mt8365_data},
{}
};
+MODULE_DEVICE_TABLE(of, mtk_iommu_of_ids);
static struct platform_driver mtk_iommu_driver = {
.probe = mtk_iommu_probe,
diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c
index a9fa2a54dc9b39..d6e4002200bd33 100644
--- a/drivers/iommu/mtk_iommu_v1.c
+++ b/drivers/iommu/mtk_iommu_v1.c
@@ -600,6 +600,7 @@ static const struct of_device_id mtk_iommu_v1_of_ids[] = {
{ .compatible = "mediatek,mt2701-m4u", },
{}
};
+MODULE_DEVICE_TABLE(of, mtk_iommu_v1_of_ids);
static const struct component_master_ops mtk_iommu_v1_com_ops = {
.bind = mtk_iommu_v1_bind,
diff --git a/drivers/irqchip/irq-armada-370-xp.c b/drivers/irqchip/irq-armada-370-xp.c
index a55528469278c7..4b021a67bdfe48 100644
--- a/drivers/irqchip/irq-armada-370-xp.c
+++ b/drivers/irqchip/irq-armada-370-xp.c
@@ -316,7 +316,7 @@ static int armada_370_xp_msi_init(struct device_node *node,
return 0;
}
#else
-static void armada_370_xp_msi_reenable_percpu(void) {}
+static __maybe_unused void armada_370_xp_msi_reenable_percpu(void) {}
static inline int armada_370_xp_msi_init(struct device_node *node,
phys_addr_t main_int_phys_base)
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index fca888b36680df..2a537cbfcb0772 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -786,6 +786,7 @@ static struct its_vpe *its_build_vmapp_cmd(struct its_node *its,
struct its_cmd_block *cmd,
struct its_cmd_desc *desc)
{
+ struct its_vpe *vpe = valid_vpe(its, desc->its_vmapp_cmd.vpe);
unsigned long vpt_addr, vconf_addr;
u64 target;
bool alloc;
@@ -798,6 +799,11 @@ static struct its_vpe *its_build_vmapp_cmd(struct its_node *its,
if (is_v4_1(its)) {
alloc = !atomic_dec_return(&desc->its_vmapp_cmd.vpe->vmapp_count);
its_encode_alloc(cmd, alloc);
+ /*
+ * Unmapping a VPE is self-synchronizing on GICv4.1,
+ * no need to issue a VSYNC.
+ */
+ vpe = NULL;
}
goto out;
@@ -832,7 +838,7 @@ static struct its_vpe *its_build_vmapp_cmd(struct its_node *its,
out:
its_fixup_cmd(cmd);
- return valid_vpe(its, desc->its_vmapp_cmd.vpe);
+ return vpe;
}
static struct its_vpe *its_build_vmapti_cmd(struct its_node *its,
diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c
index 2776ca5fc33f39..b215b28cad7b76 100644
--- a/drivers/isdn/mISDN/socket.c
+++ b/drivers/isdn/mISDN/socket.c
@@ -401,23 +401,23 @@ data_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
}
static int data_sock_setsockopt(struct socket *sock, int level, int optname,
- sockptr_t optval, unsigned int len)
+ sockptr_t optval, unsigned int optlen)
{
struct sock *sk = sock->sk;
int err = 0, opt = 0;
if (*debug & DEBUG_SOCKET)
printk(KERN_DEBUG "%s(%p, %d, %x, optval, %d)\n", __func__, sock,
- level, optname, len);
+ level, optname, optlen);
lock_sock(sk);
switch (optname) {
case MISDN_TIME_STAMP:
- if (copy_from_sockptr(&opt, optval, sizeof(int))) {
- err = -EFAULT;
+ err = copy_safe_from_sockptr(&opt, sizeof(opt),
+ optval, optlen);
+ if (err)
break;
- }
if (opt)
_pms(sk)->cmask |= MISDN_TIME_STAMP;
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index 37b9f8f1ae1a27..7f3dc8ee6ab8dd 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -4221,7 +4221,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned int argc, char **argv
} else if (sscanf(opt_string, "sectors_per_bit:%llu%c", &llval, &dummy) == 1) {
log2_sectors_per_bitmap_bit = !llval ? 0 : __ilog2_u64(llval);
} else if (sscanf(opt_string, "bitmap_flush_interval:%u%c", &val, &dummy) == 1) {
- if (val >= (uint64_t)UINT_MAX * 1000 / HZ) {
+ if ((uint64_t)val >= (uint64_t)UINT_MAX * 1000 / HZ) {
r = -EINVAL;
ti->error = "Invalid bitmap_flush_interval argument";
goto bad;
diff --git a/drivers/md/dm-vdo/murmurhash3.c b/drivers/md/dm-vdo/murmurhash3.c
index 00c9b9c050011c..01d2743444ec6c 100644
--- a/drivers/md/dm-vdo/murmurhash3.c
+++ b/drivers/md/dm-vdo/murmurhash3.c
@@ -8,33 +8,14 @@
#include "murmurhash3.h"
+#include <asm/unaligned.h>
+
static inline u64 rotl64(u64 x, s8 r)
{
return (x << r) | (x >> (64 - r));
}
#define ROTL64(x, y) rotl64(x, y)
-static __always_inline u64 getblock64(const u64 *p, int i)
-{
-#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
- return p[i];
-#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
- return __builtin_bswap64(p[i]);
-#else
-#error "can't figure out byte order"
-#endif
-}
-
-static __always_inline void putblock64(u64 *p, int i, u64 value)
-{
-#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
- p[i] = value;
-#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
- p[i] = __builtin_bswap64(value);
-#else
-#error "can't figure out byte order"
-#endif
-}
/* Finalization mix - force all bits of a hash block to avalanche */
@@ -60,6 +41,8 @@ void murmurhash3_128(const void *key, const int len, const u32 seed, void *out)
const u64 c1 = 0x87c37b91114253d5LLU;
const u64 c2 = 0x4cf5ad432745937fLLU;
+ u64 *hash_out = out;
+
/* body */
const u64 *blocks = (const u64 *)(data);
@@ -67,8 +50,8 @@ void murmurhash3_128(const void *key, const int len, const u32 seed, void *out)
int i;
for (i = 0; i < nblocks; i++) {
- u64 k1 = getblock64(blocks, i * 2 + 0);
- u64 k2 = getblock64(blocks, i * 2 + 1);
+ u64 k1 = get_unaligned_le64(&blocks[i * 2]);
+ u64 k2 = get_unaligned_le64(&blocks[i * 2 + 1]);
k1 *= c1;
k1 = ROTL64(k1, 31);
@@ -170,6 +153,6 @@ void murmurhash3_128(const void *key, const int len, const u32 seed, void *out)
h1 += h2;
h2 += h1;
- putblock64((u64 *)out, 0, h1);
- putblock64((u64 *)out, 1, h2);
+ put_unaligned_le64(h1, &hash_out[0]);
+ put_unaligned_le64(h2, &hash_out[1]);
}
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index be8ac24f50b6ad..7b8a71ca66dde0 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1558,7 +1558,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
for (j = 0; j < i; j++)
if (r1_bio->bios[j])
rdev_dec_pending(conf->mirrors[j].rdev, mddev);
- free_r1bio(r1_bio);
+ mempool_free(r1_bio, &conf->r1bio_pool);
allow_barrier(conf, bio->bi_iter.bi_sector);
if (bio->bi_opf & REQ_NOWAIT) {
diff --git a/drivers/media/platform/mediatek/vcodec/common/mtk_vcodec_fw_vpu.c b/drivers/media/platform/mediatek/vcodec/common/mtk_vcodec_fw_vpu.c
index 4c34344dc7dcb8..d7027d600208fc 100644
--- a/drivers/media/platform/mediatek/vcodec/common/mtk_vcodec_fw_vpu.c
+++ b/drivers/media/platform/mediatek/vcodec/common/mtk_vcodec_fw_vpu.c
@@ -50,12 +50,12 @@ static void mtk_vcodec_vpu_reset_dec_handler(void *priv)
dev_err(&dev->plat_dev->dev, "Watchdog timeout!!");
- mutex_lock(&dev->dev_mutex);
+ mutex_lock(&dev->dev_ctx_lock);
list_for_each_entry(ctx, &dev->ctx_list, list) {
ctx->state = MTK_STATE_ABORT;
mtk_v4l2_vdec_dbg(0, ctx, "[%d] Change to state MTK_STATE_ABORT", ctx->id);
}
- mutex_unlock(&dev->dev_mutex);
+ mutex_unlock(&dev->dev_ctx_lock);
}
static void mtk_vcodec_vpu_reset_enc_handler(void *priv)
@@ -65,12 +65,12 @@ static void mtk_vcodec_vpu_reset_enc_handler(void *priv)
dev_err(&dev->plat_dev->dev, "Watchdog timeout!!");
- mutex_lock(&dev->dev_mutex);
+ mutex_lock(&dev->dev_ctx_lock);
list_for_each_entry(ctx, &dev->ctx_list, list) {
ctx->state = MTK_STATE_ABORT;
mtk_v4l2_vdec_dbg(0, ctx, "[%d] Change to state MTK_STATE_ABORT", ctx->id);
}
- mutex_unlock(&dev->dev_mutex);
+ mutex_unlock(&dev->dev_ctx_lock);
}
static const struct mtk_vcodec_fw_ops mtk_vcodec_vpu_msg = {
diff --git a/drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec_drv.c b/drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec_drv.c
index f47c98faf068b6..2073781ccadb15 100644
--- a/drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec_drv.c
+++ b/drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec_drv.c
@@ -268,7 +268,9 @@ static int fops_vcodec_open(struct file *file)
ctx->dev->vdec_pdata->init_vdec_params(ctx);
+ mutex_lock(&dev->dev_ctx_lock);
list_add(&ctx->list, &dev->ctx_list);
+ mutex_unlock(&dev->dev_ctx_lock);
mtk_vcodec_dbgfs_create(ctx);
mutex_unlock(&dev->dev_mutex);
@@ -311,7 +313,9 @@ static int fops_vcodec_release(struct file *file)
v4l2_ctrl_handler_free(&ctx->ctrl_hdl);
mtk_vcodec_dbgfs_remove(dev, ctx->id);
+ mutex_lock(&dev->dev_ctx_lock);
list_del_init(&ctx->list);
+ mutex_unlock(&dev->dev_ctx_lock);
kfree(ctx);
mutex_unlock(&dev->dev_mutex);
return 0;
@@ -404,6 +408,7 @@ static int mtk_vcodec_probe(struct platform_device *pdev)
for (i = 0; i < MTK_VDEC_HW_MAX; i++)
mutex_init(&dev->dec_mutex[i]);
mutex_init(&dev->dev_mutex);
+ mutex_init(&dev->dev_ctx_lock);
spin_lock_init(&dev->irqlock);
snprintf(dev->v4l2_dev.name, sizeof(dev->v4l2_dev.name), "%s",
diff --git a/drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec_drv.h b/drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec_drv.h
index 849b89dd205c21..85b2c0d3d8bcdd 100644
--- a/drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec_drv.h
+++ b/drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec_drv.h
@@ -241,6 +241,7 @@ struct mtk_vcodec_dec_ctx {
*
* @dec_mutex: decoder hardware lock
* @dev_mutex: video_device lock
+ * @dev_ctx_lock: the lock of context list
* @decode_workqueue: decode work queue
*
* @irqlock: protect data access by irq handler and work thread
@@ -282,6 +283,7 @@ struct mtk_vcodec_dec_dev {
/* decoder hardware mutex lock */
struct mutex dec_mutex[MTK_VDEC_HW_MAX];
struct mutex dev_mutex;
+ struct mutex dev_ctx_lock;
struct workqueue_struct *decode_workqueue;
spinlock_t irqlock;
diff --git a/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_hevc_req_multi_if.c b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_hevc_req_multi_if.c
index 06ed47df693bfd..21836dd6ef85a3 100644
--- a/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_hevc_req_multi_if.c
+++ b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_hevc_req_multi_if.c
@@ -869,7 +869,6 @@ static int vdec_hevc_slice_init(struct mtk_vcodec_dec_ctx *ctx)
inst->vpu.codec_type = ctx->current_codec;
inst->vpu.capture_type = ctx->capture_fourcc;
- ctx->drv_handle = inst;
err = vpu_dec_init(&inst->vpu);
if (err) {
mtk_vdec_err(ctx, "vdec_hevc init err=%d", err);
@@ -898,6 +897,7 @@ static int vdec_hevc_slice_init(struct mtk_vcodec_dec_ctx *ctx)
mtk_vdec_debug(ctx, "lat hevc instance >> %p, codec_type = 0x%x",
inst, inst->vpu.codec_type);
+ ctx->drv_handle = inst;
return 0;
error_free_inst:
kfree(inst);
diff --git a/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp8_if.c b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp8_if.c
index 19407f9bc773c3..987b3d71b662ac 100644
--- a/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp8_if.c
+++ b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp8_if.c
@@ -449,7 +449,7 @@ static int vdec_vp8_decode(void *h_vdec, struct mtk_vcodec_mem *bs,
inst->frm_cnt, y_fb_dma, c_fb_dma, fb);
inst->cur_fb = fb;
- dec->bs_dma = (unsigned long)bs->dma_addr;
+ dec->bs_dma = (uint64_t)bs->dma_addr;
dec->bs_sz = bs->size;
dec->cur_y_fb_dma = y_fb_dma;
dec->cur_c_fb_dma = c_fb_dma;
diff --git a/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp9_if.c b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp9_if.c
index 55355fa7009083..039082f600c813 100644
--- a/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp9_if.c
+++ b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp9_if.c
@@ -16,6 +16,7 @@
#include "../vdec_drv_base.h"
#include "../vdec_vpu_if.h"
+#define VP9_MAX_SUPER_FRAMES_NUM 8
#define VP9_SUPER_FRAME_BS_SZ 64
#define MAX_VP9_DPB_SIZE 9
@@ -133,11 +134,11 @@ struct vp9_sf_ref_fb {
*/
struct vdec_vp9_vsi {
unsigned char sf_bs_buf[VP9_SUPER_FRAME_BS_SZ];
- struct vp9_sf_ref_fb sf_ref_fb[VP9_MAX_FRM_BUF_NUM-1];
+ struct vp9_sf_ref_fb sf_ref_fb[VP9_MAX_SUPER_FRAMES_NUM];
int sf_next_ref_fb_idx;
unsigned int sf_frm_cnt;
- unsigned int sf_frm_offset[VP9_MAX_FRM_BUF_NUM-1];
- unsigned int sf_frm_sz[VP9_MAX_FRM_BUF_NUM-1];
+ unsigned int sf_frm_offset[VP9_MAX_SUPER_FRAMES_NUM];
+ unsigned int sf_frm_sz[VP9_MAX_SUPER_FRAMES_NUM];
unsigned int sf_frm_idx;
unsigned int sf_init;
struct vdec_fb fb;
@@ -526,7 +527,7 @@ static void vp9_swap_frm_bufs(struct vdec_vp9_inst *inst)
/* if this super frame and it is not last sub-frame, get next fb for
* sub-frame decode
*/
- if (vsi->sf_frm_cnt > 0 && vsi->sf_frm_idx != vsi->sf_frm_cnt - 1)
+ if (vsi->sf_frm_cnt > 0 && vsi->sf_frm_idx != vsi->sf_frm_cnt)
vsi->sf_next_ref_fb_idx = vp9_get_sf_ref_fb(inst);
}
@@ -735,7 +736,7 @@ static void get_free_fb(struct vdec_vp9_inst *inst, struct vdec_fb **out_fb)
static int validate_vsi_array_indexes(struct vdec_vp9_inst *inst,
struct vdec_vp9_vsi *vsi) {
- if (vsi->sf_frm_idx >= VP9_MAX_FRM_BUF_NUM - 1) {
+ if (vsi->sf_frm_idx > VP9_MAX_SUPER_FRAMES_NUM) {
mtk_vdec_err(inst->ctx, "Invalid vsi->sf_frm_idx=%u.", vsi->sf_frm_idx);
return -EIO;
}
diff --git a/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp9_req_lat_if.c b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp9_req_lat_if.c
index cf48d09b78d7a1..eea709d9382091 100644
--- a/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp9_req_lat_if.c
+++ b/drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp9_req_lat_if.c
@@ -1074,7 +1074,7 @@ static int vdec_vp9_slice_setup_tile_buffer(struct vdec_vp9_slice_instance *inst
unsigned int mi_row;
unsigned int mi_col;
unsigned int offset;
- unsigned int pa;
+ dma_addr_t pa;
unsigned int size;
struct vdec_vp9_slice_tiles *tiles;
unsigned char *pos;
@@ -1109,7 +1109,7 @@ static int vdec_vp9_slice_setup_tile_buffer(struct vdec_vp9_slice_instance *inst
pos = va + offset;
end = va + bs->size;
/* truncated */
- pa = (unsigned int)bs->dma_addr + offset;
+ pa = bs->dma_addr + offset;
tb = instance->tile.va;
for (i = 0; i < rows; i++) {
for (j = 0; j < cols; j++) {
diff --git a/drivers/media/platform/mediatek/vcodec/decoder/vdec_vpu_if.c b/drivers/media/platform/mediatek/vcodec/decoder/vdec_vpu_if.c
index 82e57ae983d557..da6be556727bb1 100644
--- a/drivers/media/platform/mediatek/vcodec/decoder/vdec_vpu_if.c
+++ b/drivers/media/platform/mediatek/vcodec/decoder/vdec_vpu_if.c
@@ -77,12 +77,14 @@ static bool vpu_dec_check_ap_inst(struct mtk_vcodec_dec_dev *dec_dev, struct vde
struct mtk_vcodec_dec_ctx *ctx;
int ret = false;
+ mutex_lock(&dec_dev->dev_ctx_lock);
list_for_each_entry(ctx, &dec_dev->ctx_list, list) {
if (!IS_ERR_OR_NULL(ctx) && ctx->vpu_inst == vpu) {
ret = true;
break;
}
}
+ mutex_unlock(&dec_dev->dev_ctx_lock);
return ret;
}
diff --git a/drivers/media/platform/mediatek/vcodec/encoder/mtk_vcodec_enc_drv.c b/drivers/media/platform/mediatek/vcodec/encoder/mtk_vcodec_enc_drv.c
index 6319f24bc714b5..3cb8a16222220e 100644
--- a/drivers/media/platform/mediatek/vcodec/encoder/mtk_vcodec_enc_drv.c
+++ b/drivers/media/platform/mediatek/vcodec/encoder/mtk_vcodec_enc_drv.c
@@ -177,7 +177,9 @@ static int fops_vcodec_open(struct file *file)
mtk_v4l2_venc_dbg(2, ctx, "Create instance [%d]@%p m2m_ctx=%p ",
ctx->id, ctx, ctx->m2m_ctx);
+ mutex_lock(&dev->dev_ctx_lock);
list_add(&ctx->list, &dev->ctx_list);
+ mutex_unlock(&dev->dev_ctx_lock);
mutex_unlock(&dev->dev_mutex);
mtk_v4l2_venc_dbg(0, ctx, "%s encoder [%d]", dev_name(&dev->plat_dev->dev),
@@ -212,7 +214,9 @@ static int fops_vcodec_release(struct file *file)
v4l2_fh_exit(&ctx->fh);
v4l2_ctrl_handler_free(&ctx->ctrl_hdl);
+ mutex_lock(&dev->dev_ctx_lock);
list_del_init(&ctx->list);
+ mutex_unlock(&dev->dev_ctx_lock);
kfree(ctx);
mutex_unlock(&dev->dev_mutex);
return 0;
@@ -294,6 +298,7 @@ static int mtk_vcodec_probe(struct platform_device *pdev)
mutex_init(&dev->enc_mutex);
mutex_init(&dev->dev_mutex);
+ mutex_init(&dev->dev_ctx_lock);
spin_lock_init(&dev->irqlock);
snprintf(dev->v4l2_dev.name, sizeof(dev->v4l2_dev.name), "%s",
diff --git a/drivers/media/platform/mediatek/vcodec/encoder/mtk_vcodec_enc_drv.h b/drivers/media/platform/mediatek/vcodec/encoder/mtk_vcodec_enc_drv.h
index a042f607ed8d16..0bd85d0fb379ac 100644
--- a/drivers/media/platform/mediatek/vcodec/encoder/mtk_vcodec_enc_drv.h
+++ b/drivers/media/platform/mediatek/vcodec/encoder/mtk_vcodec_enc_drv.h
@@ -178,6 +178,7 @@ struct mtk_vcodec_enc_ctx {
*
* @enc_mutex: encoder hardware lock.
* @dev_mutex: video_device lock
+ * @dev_ctx_lock: the lock of context list
* @encode_workqueue: encode work queue
*
* @enc_irq: h264 encoder irq resource
@@ -205,6 +206,7 @@ struct mtk_vcodec_enc_dev {
/* encoder hardware mutex lock */
struct mutex enc_mutex;
struct mutex dev_mutex;
+ struct mutex dev_ctx_lock;
struct workqueue_struct *encode_workqueue;
int enc_irq;
diff --git a/drivers/media/platform/mediatek/vcodec/encoder/venc_vpu_if.c b/drivers/media/platform/mediatek/vcodec/encoder/venc_vpu_if.c
index 84ad1cc6ad171e..51bb7ee141b9e5 100644
--- a/drivers/media/platform/mediatek/vcodec/encoder/venc_vpu_if.c
+++ b/drivers/media/platform/mediatek/vcodec/encoder/venc_vpu_if.c
@@ -47,12 +47,14 @@ static bool vpu_enc_check_ap_inst(struct mtk_vcodec_enc_dev *enc_dev, struct ven
struct mtk_vcodec_enc_ctx *ctx;
int ret = false;
+ mutex_lock(&enc_dev->dev_ctx_lock);
list_for_each_entry(ctx, &enc_dev->ctx_list, list) {
if (!IS_ERR_OR_NULL(ctx) && ctx->vpu_inst == vpu) {
ret = true;
break;
}
}
+ mutex_unlock(&enc_dev->dev_ctx_lock);
return ret;
}
diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c
index 64a3492e8002fb..90c51b12148e8d 100644
--- a/drivers/mmc/core/block.c
+++ b/drivers/mmc/core/block.c
@@ -413,7 +413,7 @@ static struct mmc_blk_ioc_data *mmc_blk_ioctl_copy_from_user(
struct mmc_blk_ioc_data *idata;
int err;
- idata = kmalloc(sizeof(*idata), GFP_KERNEL);
+ idata = kzalloc(sizeof(*idata), GFP_KERNEL);
if (!idata) {
err = -ENOMEM;
goto out;
@@ -488,7 +488,7 @@ static int __mmc_blk_ioctl_cmd(struct mmc_card *card, struct mmc_blk_data *md,
if (idata->flags & MMC_BLK_IOC_DROP)
return 0;
- if (idata->flags & MMC_BLK_IOC_SBC)
+ if (idata->flags & MMC_BLK_IOC_SBC && i > 0)
prev_idata = idatas[i - 1];
/*
diff --git a/drivers/mmc/host/omap.c b/drivers/mmc/host/omap.c
index 088f8ed4fdc464..a8ee0df4714823 100644
--- a/drivers/mmc/host/omap.c
+++ b/drivers/mmc/host/omap.c
@@ -1114,10 +1114,25 @@ static void mmc_omap_set_power(struct mmc_omap_slot *slot, int power_on,
host = slot->host;
- if (slot->vsd)
- gpiod_set_value(slot->vsd, power_on);
- if (slot->vio)
- gpiod_set_value(slot->vio, power_on);
+ if (power_on) {
+ if (slot->vsd) {
+ gpiod_set_value(slot->vsd, power_on);
+ msleep(1);
+ }
+ if (slot->vio) {
+ gpiod_set_value(slot->vio, power_on);
+ msleep(1);
+ }
+ } else {
+ if (slot->vio) {
+ gpiod_set_value(slot->vio, power_on);
+ msleep(50);
+ }
+ if (slot->vsd) {
+ gpiod_set_value(slot->vsd, power_on);
+ msleep(50);
+ }
+ }
if (slot->pdata->set_power != NULL)
slot->pdata->set_power(mmc_dev(slot->mmc), slot->id, power_on,
@@ -1254,18 +1269,18 @@ static int mmc_omap_new_slot(struct mmc_omap_host *host, int id)
slot->pdata = &host->pdata->slots[id];
/* Check for some optional GPIO controls */
- slot->vsd = gpiod_get_index_optional(host->dev, "vsd",
- id, GPIOD_OUT_LOW);
+ slot->vsd = devm_gpiod_get_index_optional(host->dev, "vsd",
+ id, GPIOD_OUT_LOW);
if (IS_ERR(slot->vsd))
return dev_err_probe(host->dev, PTR_ERR(slot->vsd),
"error looking up VSD GPIO\n");
- slot->vio = gpiod_get_index_optional(host->dev, "vio",
- id, GPIOD_OUT_LOW);
+ slot->vio = devm_gpiod_get_index_optional(host->dev, "vio",
+ id, GPIOD_OUT_LOW);
if (IS_ERR(slot->vio))
return dev_err_probe(host->dev, PTR_ERR(slot->vio),
"error looking up VIO GPIO\n");
- slot->cover = gpiod_get_index_optional(host->dev, "cover",
- id, GPIOD_IN);
+ slot->cover = devm_gpiod_get_index_optional(host->dev, "cover",
+ id, GPIOD_IN);
if (IS_ERR(slot->cover))
return dev_err_probe(host->dev, PTR_ERR(slot->cover),
"error looking up cover switch GPIO\n");
@@ -1379,13 +1394,6 @@ static int mmc_omap_probe(struct platform_device *pdev)
if (IS_ERR(host->virt_base))
return PTR_ERR(host->virt_base);
- host->slot_switch = gpiod_get_optional(host->dev, "switch",
- GPIOD_OUT_LOW);
- if (IS_ERR(host->slot_switch))
- return dev_err_probe(host->dev, PTR_ERR(host->slot_switch),
- "error looking up slot switch GPIO\n");
-
-
INIT_WORK(&host->slot_release_work, mmc_omap_slot_release_work);
INIT_WORK(&host->send_stop_work, mmc_omap_send_stop_work);
@@ -1404,6 +1412,12 @@ static int mmc_omap_probe(struct platform_device *pdev)
host->dev = &pdev->dev;
platform_set_drvdata(pdev, host);
+ host->slot_switch = devm_gpiod_get_optional(host->dev, "switch",
+ GPIOD_OUT_LOW);
+ if (IS_ERR(host->slot_switch))
+ return dev_err_probe(host->dev, PTR_ERR(host->slot_switch),
+ "error looking up slot switch GPIO\n");
+
host->id = pdev->id;
host->irq = irq;
host->phys_base = res->start;
diff --git a/drivers/mmc/host/sdhci-of-dwcmshc.c b/drivers/mmc/host/sdhci-of-dwcmshc.c
index ab4b964d405844..1d8f5a76096aeb 100644
--- a/drivers/mmc/host/sdhci-of-dwcmshc.c
+++ b/drivers/mmc/host/sdhci-of-dwcmshc.c
@@ -999,6 +999,17 @@ free_pltfm:
return err;
}
+static void dwcmshc_disable_card_clk(struct sdhci_host *host)
+{
+ u16 ctrl;
+
+ ctrl = sdhci_readw(host, SDHCI_CLOCK_CONTROL);
+ if (ctrl & SDHCI_CLOCK_CARD_EN) {
+ ctrl &= ~SDHCI_CLOCK_CARD_EN;
+ sdhci_writew(host, ctrl, SDHCI_CLOCK_CONTROL);
+ }
+}
+
static void dwcmshc_remove(struct platform_device *pdev)
{
struct sdhci_host *host = platform_get_drvdata(pdev);
@@ -1006,8 +1017,14 @@ static void dwcmshc_remove(struct platform_device *pdev)
struct dwcmshc_priv *priv = sdhci_pltfm_priv(pltfm_host);
struct rk35xx_priv *rk_priv = priv->priv;
+ pm_runtime_get_sync(&pdev->dev);
+ pm_runtime_disable(&pdev->dev);
+ pm_runtime_put_noidle(&pdev->dev);
+
sdhci_remove_host(host, 0);
+ dwcmshc_disable_card_clk(host);
+
clk_disable_unprepare(pltfm_host->clk);
clk_disable_unprepare(priv->bus_clk);
if (rk_priv)
@@ -1099,17 +1116,6 @@ static void dwcmshc_enable_card_clk(struct sdhci_host *host)
}
}
-static void dwcmshc_disable_card_clk(struct sdhci_host *host)
-{
- u16 ctrl;
-
- ctrl = sdhci_readw(host, SDHCI_CLOCK_CONTROL);
- if (ctrl & SDHCI_CLOCK_CARD_EN) {
- ctrl &= ~SDHCI_CLOCK_CARD_EN;
- sdhci_writew(host, ctrl, SDHCI_CLOCK_CONTROL);
- }
-}
-
static int dwcmshc_runtime_suspend(struct device *dev)
{
struct sdhci_host *host = dev_get_drvdata(dev);
diff --git a/drivers/mmc/host/sdhci-omap.c b/drivers/mmc/host/sdhci-omap.c
index e78faef67d7ab5..94076b09557198 100644
--- a/drivers/mmc/host/sdhci-omap.c
+++ b/drivers/mmc/host/sdhci-omap.c
@@ -1439,6 +1439,9 @@ static int __maybe_unused sdhci_omap_runtime_suspend(struct device *dev)
struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
struct sdhci_omap_host *omap_host = sdhci_pltfm_priv(pltfm_host);
+ if (host->tuning_mode != SDHCI_TUNING_MODE_3)
+ mmc_retune_needed(host->mmc);
+
if (omap_host->con != -EINVAL)
sdhci_runtime_suspend_host(host);
diff --git a/drivers/mtd/devices/block2mtd.c b/drivers/mtd/devices/block2mtd.c
index 97a00ec9a4d489..caacdc0a381945 100644
--- a/drivers/mtd/devices/block2mtd.c
+++ b/drivers/mtd/devices/block2mtd.c
@@ -209,7 +209,7 @@ static void block2mtd_free_device(struct block2mtd_dev *dev)
if (dev->bdev_file) {
invalidate_mapping_pages(dev->bdev_file->f_mapping, 0, -1);
- fput(dev->bdev_file);
+ bdev_fput(dev->bdev_file);
}
kfree(dev);
diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c
index 767f66c37f6b5c..8090390edaf9db 100644
--- a/drivers/net/dsa/mt7530.c
+++ b/drivers/net/dsa/mt7530.c
@@ -950,20 +950,173 @@ static void mt7530_setup_port5(struct dsa_switch *ds, phy_interface_t interface)
mutex_unlock(&priv->reg_mutex);
}
-/* On page 205, section "8.6.3 Frame filtering" of the active standard, IEEE Std
- * 802.1Q™-2022, it is stated that frames with 01:80:C2:00:00:00-0F as MAC DA
- * must only be propagated to C-VLAN and MAC Bridge components. That means
- * VLAN-aware and VLAN-unaware bridges. On the switch designs with CPU ports,
- * these frames are supposed to be processed by the CPU (software). So we make
- * the switch only forward them to the CPU port. And if received from a CPU
- * port, forward to a single port. The software is responsible of making the
- * switch conform to the latter by setting a single port as destination port on
- * the special tag.
+/* In Clause 5 of IEEE Std 802-2014, two sublayers of the data link layer (DLL)
+ * of the Open Systems Interconnection basic reference model (OSI/RM) are
+ * described; the medium access control (MAC) and logical link control (LLC)
+ * sublayers. The MAC sublayer is the one facing the physical layer.
*
- * This switch intellectual property cannot conform to this part of the standard
- * fully. Whilst the REV_UN frame tag covers the remaining :04-0D and :0F MAC
- * DAs, it also includes :22-FF which the scope of propagation is not supposed
- * to be restricted for these MAC DAs.
+ * In 8.2 of IEEE Std 802.1Q-2022, the Bridge architecture is described. A
+ * Bridge component comprises a MAC Relay Entity for interconnecting the Ports
+ * of the Bridge, at least two Ports, and higher layer entities with at least a
+ * Spanning Tree Protocol Entity included.
+ *
+ * Each Bridge Port also functions as an end station and shall provide the MAC
+ * Service to an LLC Entity. Each instance of the MAC Service is provided to a
+ * distinct LLC Entity that supports protocol identification, multiplexing, and
+ * demultiplexing, for protocol data unit (PDU) transmission and reception by
+ * one or more higher layer entities.
+ *
+ * It is described in 8.13.9 of IEEE Std 802.1Q-2022 that in a Bridge, the LLC
+ * Entity associated with each Bridge Port is modeled as being directly
+ * connected to the attached Local Area Network (LAN).
+ *
+ * On the switch with CPU port architecture, CPU port functions as Management
+ * Port, and the Management Port functionality is provided by software which
+ * functions as an end station. Software is connected to an IEEE 802 LAN that is
+ * wholly contained within the system that incorporates the Bridge. Software
+ * provides access to the LLC Entity associated with each Bridge Port by the
+ * value of the source port field on the special tag on the frame received by
+ * software.
+ *
+ * We call frames that carry control information to determine the active
+ * topology and current extent of each Virtual Local Area Network (VLAN), i.e.,
+ * spanning tree or Shortest Path Bridging (SPB) and Multiple VLAN Registration
+ * Protocol Data Units (MVRPDUs), and frames from other link constrained
+ * protocols, such as Extensible Authentication Protocol over LAN (EAPOL) and
+ * Link Layer Discovery Protocol (LLDP), link-local frames. They are not
+ * forwarded by a Bridge. Permanently configured entries in the filtering
+ * database (FDB) ensure that such frames are discarded by the Forwarding
+ * Process. In 8.6.3 of IEEE Std 802.1Q-2022, this is described in detail:
+ *
+ * Each of the reserved MAC addresses specified in Table 8-1
+ * (01-80-C2-00-00-[00,01,02,03,04,05,06,07,08,09,0A,0B,0C,0D,0E,0F]) shall be
+ * permanently configured in the FDB in C-VLAN components and ERs.
+ *
+ * Each of the reserved MAC addresses specified in Table 8-2
+ * (01-80-C2-00-00-[01,02,03,04,05,06,07,08,09,0A,0E]) shall be permanently
+ * configured in the FDB in S-VLAN components.
+ *
+ * Each of the reserved MAC addresses specified in Table 8-3
+ * (01-80-C2-00-00-[01,02,04,0E]) shall be permanently configured in the FDB in
+ * TPMR components.
+ *
+ * The FDB entries for reserved MAC addresses shall specify filtering for all
+ * Bridge Ports and all VIDs. Management shall not provide the capability to
+ * modify or remove entries for reserved MAC addresses.
+ *
+ * The addresses in Table 8-1, Table 8-2, and Table 8-3 determine the scope of
+ * propagation of PDUs within a Bridged Network, as follows:
+ *
+ * The Nearest Bridge group address (01-80-C2-00-00-0E) is an address that no
+ * conformant Two-Port MAC Relay (TPMR) component, Service VLAN (S-VLAN)
+ * component, Customer VLAN (C-VLAN) component, or MAC Bridge can forward.
+ * PDUs transmitted using this destination address, or any other addresses
+ * that appear in Table 8-1, Table 8-2, and Table 8-3
+ * (01-80-C2-00-00-[00,01,02,03,04,05,06,07,08,09,0A,0B,0C,0D,0E,0F]), can
+ * therefore travel no further than those stations that can be reached via a
+ * single individual LAN from the originating station.
+ *
+ * The Nearest non-TPMR Bridge group address (01-80-C2-00-00-03), is an
+ * address that no conformant S-VLAN component, C-VLAN component, or MAC
+ * Bridge can forward; however, this address is relayed by a TPMR component.
+ * PDUs using this destination address, or any of the other addresses that
+ * appear in both Table 8-1 and Table 8-2 but not in Table 8-3
+ * (01-80-C2-00-00-[00,03,05,06,07,08,09,0A,0B,0C,0D,0F]), will be relayed by
+ * any TPMRs but will propagate no further than the nearest S-VLAN component,
+ * C-VLAN component, or MAC Bridge.
+ *
+ * The Nearest Customer Bridge group address (01-80-C2-00-00-00) is an address
+ * that no conformant C-VLAN component, MAC Bridge can forward; however, it is
+ * relayed by TPMR components and S-VLAN components. PDUs using this
+ * destination address, or any of the other addresses that appear in Table 8-1
+ * but not in either Table 8-2 or Table 8-3 (01-80-C2-00-00-[00,0B,0C,0D,0F]),
+ * will be relayed by TPMR components and S-VLAN components but will propagate
+ * no further than the nearest C-VLAN component or MAC Bridge.
+ *
+ * Because the LLC Entity associated with each Bridge Port is provided via CPU
+ * port, we must not filter these frames but forward them to CPU port.
+ *
+ * In a Bridge, the transmission Port is majorly decided by ingress and egress
+ * rules, FDB, and spanning tree Port State functions of the Forwarding Process.
+ * For link-local frames, only CPU port should be designated as destination port
+ * in the FDB, and the other functions of the Forwarding Process must not
+ * interfere with the decision of the transmission Port. We call this process
+ * trapping frames to CPU port.
+ *
+ * Therefore, on the switch with CPU port architecture, link-local frames must
+ * be trapped to CPU port, and certain link-local frames received by a Port of a
+ * Bridge comprising a TPMR component or an S-VLAN component must be excluded
+ * from it.
+ *
+ * A Bridge of the switch with CPU port architecture cannot comprise a Two-Port
+ * MAC Relay (TPMR) component as a TPMR component supports only a subset of the
+ * functionality of a MAC Bridge. A Bridge comprising two Ports (Management Port
+ * doesn't count) of this architecture will either function as a standard MAC
+ * Bridge or a standard VLAN Bridge.
+ *
+ * Therefore, a Bridge of this architecture can only comprise S-VLAN components,
+ * C-VLAN components, or MAC Bridge components. Since there's no TPMR component,
+ * we don't need to relay PDUs using the destination addresses specified on the
+ * Nearest non-TPMR section, and the proportion of the Nearest Customer Bridge
+ * section where they must be relayed by TPMR components.
+ *
+ * One option to trap link-local frames to CPU port is to add static FDB entries
+ * with CPU port designated as destination port. However, because that
+ * Independent VLAN Learning (IVL) is being used on every VID, each entry only
+ * applies to a single VLAN Identifier (VID). For a Bridge comprising a MAC
+ * Bridge component or a C-VLAN component, there would have to be 16 times 4096
+ * entries. This switch intellectual property can only hold a maximum of 2048
+ * entries. Using this option, there also isn't a mechanism to prevent
+ * link-local frames from being discarded when the spanning tree Port State of
+ * the reception Port is discarding.
+ *
+ * The remaining option is to utilise the BPC, RGAC1, RGAC2, RGAC3, and RGAC4
+ * registers. Whilst this applies to every VID, it doesn't contain all of the
+ * reserved MAC addresses without affecting the remaining Standard Group MAC
+ * Addresses. The REV_UN frame tag utilised using the RGAC4 register covers the
+ * remaining 01-80-C2-00-00-[04,05,06,07,08,09,0A,0B,0C,0D,0F] destination
+ * addresses. It also includes the 01-80-C2-00-00-22 to 01-80-C2-00-00-FF
+ * destination addresses which may be relayed by MAC Bridges or VLAN Bridges.
+ * The latter option provides better but not complete conformance.
+ *
+ * This switch intellectual property also does not provide a mechanism to trap
+ * link-local frames with specific destination addresses to CPU port by Bridge,
+ * to conform to the filtering rules for the distinct Bridge components.
+ *
+ * Therefore, regardless of the type of the Bridge component, link-local frames
+ * with these destination addresses will be trapped to CPU port:
+ *
+ * 01-80-C2-00-00-[00,01,02,03,0E]
+ *
+ * In a Bridge comprising a MAC Bridge component or a C-VLAN component:
+ *
+ * Link-local frames with these destination addresses won't be trapped to CPU
+ * port which won't conform to IEEE Std 802.1Q-2022:
+ *
+ * 01-80-C2-00-00-[04,05,06,07,08,09,0A,0B,0C,0D,0F]
+ *
+ * In a Bridge comprising an S-VLAN component:
+ *
+ * Link-local frames with these destination addresses will be trapped to CPU
+ * port which won't conform to IEEE Std 802.1Q-2022:
+ *
+ * 01-80-C2-00-00-00
+ *
+ * Link-local frames with these destination addresses won't be trapped to CPU
+ * port which won't conform to IEEE Std 802.1Q-2022:
+ *
+ * 01-80-C2-00-00-[04,05,06,07,08,09,0A]
+ *
+ * To trap link-local frames to CPU port as conformant as this switch
+ * intellectual property can allow, link-local frames are made to be regarded as
+ * Bridge Protocol Data Units (BPDUs). This is because this switch intellectual
+ * property only lets the frames regarded as BPDUs bypass the spanning tree Port
+ * State function of the Forwarding Process.
+ *
+ * The only remaining interference is the ingress rules. When the reception Port
+ * has no PVID assigned on software, VLAN-untagged frames won't be allowed in.
+ * There doesn't seem to be a mechanism on the switch intellectual property to
+ * have link-local frames bypass this function of the Forwarding Process.
*/
static void
mt753x_trap_frames(struct mt7530_priv *priv)
@@ -971,35 +1124,43 @@ mt753x_trap_frames(struct mt7530_priv *priv)
/* Trap 802.1X PAE frames and BPDUs to the CPU port(s) and egress them
* VLAN-untagged.
*/
- mt7530_rmw(priv, MT753X_BPC, MT753X_PAE_EG_TAG_MASK |
- MT753X_PAE_PORT_FW_MASK | MT753X_BPDU_EG_TAG_MASK |
- MT753X_BPDU_PORT_FW_MASK,
- MT753X_PAE_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
- MT753X_PAE_PORT_FW(MT753X_BPDU_CPU_ONLY) |
- MT753X_BPDU_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
- MT753X_BPDU_CPU_ONLY);
+ mt7530_rmw(priv, MT753X_BPC,
+ MT753X_PAE_BPDU_FR | MT753X_PAE_EG_TAG_MASK |
+ MT753X_PAE_PORT_FW_MASK | MT753X_BPDU_EG_TAG_MASK |
+ MT753X_BPDU_PORT_FW_MASK,
+ MT753X_PAE_BPDU_FR |
+ MT753X_PAE_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
+ MT753X_PAE_PORT_FW(MT753X_BPDU_CPU_ONLY) |
+ MT753X_BPDU_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
+ MT753X_BPDU_CPU_ONLY);
/* Trap frames with :01 and :02 MAC DAs to the CPU port(s) and egress
* them VLAN-untagged.
*/
- mt7530_rmw(priv, MT753X_RGAC1, MT753X_R02_EG_TAG_MASK |
- MT753X_R02_PORT_FW_MASK | MT753X_R01_EG_TAG_MASK |
- MT753X_R01_PORT_FW_MASK,
- MT753X_R02_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
- MT753X_R02_PORT_FW(MT753X_BPDU_CPU_ONLY) |
- MT753X_R01_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
- MT753X_BPDU_CPU_ONLY);
+ mt7530_rmw(priv, MT753X_RGAC1,
+ MT753X_R02_BPDU_FR | MT753X_R02_EG_TAG_MASK |
+ MT753X_R02_PORT_FW_MASK | MT753X_R01_BPDU_FR |
+ MT753X_R01_EG_TAG_MASK | MT753X_R01_PORT_FW_MASK,
+ MT753X_R02_BPDU_FR |
+ MT753X_R02_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
+ MT753X_R02_PORT_FW(MT753X_BPDU_CPU_ONLY) |
+ MT753X_R01_BPDU_FR |
+ MT753X_R01_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
+ MT753X_BPDU_CPU_ONLY);
/* Trap frames with :03 and :0E MAC DAs to the CPU port(s) and egress
* them VLAN-untagged.
*/
- mt7530_rmw(priv, MT753X_RGAC2, MT753X_R0E_EG_TAG_MASK |
- MT753X_R0E_PORT_FW_MASK | MT753X_R03_EG_TAG_MASK |
- MT753X_R03_PORT_FW_MASK,
- MT753X_R0E_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
- MT753X_R0E_PORT_FW(MT753X_BPDU_CPU_ONLY) |
- MT753X_R03_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
- MT753X_BPDU_CPU_ONLY);
+ mt7530_rmw(priv, MT753X_RGAC2,
+ MT753X_R0E_BPDU_FR | MT753X_R0E_EG_TAG_MASK |
+ MT753X_R0E_PORT_FW_MASK | MT753X_R03_BPDU_FR |
+ MT753X_R03_EG_TAG_MASK | MT753X_R03_PORT_FW_MASK,
+ MT753X_R0E_BPDU_FR |
+ MT753X_R0E_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
+ MT753X_R0E_PORT_FW(MT753X_BPDU_CPU_ONLY) |
+ MT753X_R03_BPDU_FR |
+ MT753X_R03_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
+ MT753X_BPDU_CPU_ONLY);
}
static void
@@ -1722,14 +1883,16 @@ mt7530_port_vlan_del(struct dsa_switch *ds, int port,
static int mt753x_mirror_port_get(unsigned int id, u32 val)
{
- return (id == ID_MT7531) ? MT7531_MIRROR_PORT_GET(val) :
- MIRROR_PORT(val);
+ return (id == ID_MT7531 || id == ID_MT7988) ?
+ MT7531_MIRROR_PORT_GET(val) :
+ MIRROR_PORT(val);
}
static int mt753x_mirror_port_set(unsigned int id, u32 val)
{
- return (id == ID_MT7531) ? MT7531_MIRROR_PORT_SET(val) :
- MIRROR_PORT(val);
+ return (id == ID_MT7531 || id == ID_MT7988) ?
+ MT7531_MIRROR_PORT_SET(val) :
+ MIRROR_PORT(val);
}
static int mt753x_port_mirror_add(struct dsa_switch *ds, int port,
@@ -2268,8 +2431,6 @@ mt7530_setup(struct dsa_switch *ds)
SYS_CTRL_PHY_RST | SYS_CTRL_SW_RST |
SYS_CTRL_REG_RST);
- mt7530_pll_setup(priv);
-
/* Lower Tx driving for TRGMII path */
for (i = 0; i < NUM_TRGMII_CTRL; i++)
mt7530_write(priv, MT7530_TRGMII_TD_ODT(i),
@@ -2285,6 +2446,9 @@ mt7530_setup(struct dsa_switch *ds)
val |= MHWTRAP_MANUAL;
mt7530_write(priv, MT7530_MHWTRAP, val);
+ if ((val & HWTRAP_XTAL_MASK) == HWTRAP_XTAL_40MHZ)
+ mt7530_pll_setup(priv);
+
mt753x_trap_frames(priv);
/* Enable and reset MIB counters */
@@ -2318,6 +2482,9 @@ mt7530_setup(struct dsa_switch *ds)
PVC_EG_TAG(MT7530_VLAN_EG_CONSISTENT));
}
+ /* Allow mirroring frames received on the local port (monitor port). */
+ mt7530_set(priv, MT753X_AGC, LOCAL_EN);
+
/* Setup VLAN ID 0 for VLAN-unaware bridges */
ret = mt7530_setup_vlan0(priv);
if (ret)
@@ -2429,6 +2596,9 @@ mt7531_setup_common(struct dsa_switch *ds)
PVC_EG_TAG(MT7530_VLAN_EG_CONSISTENT));
}
+ /* Allow mirroring frames received on the local port (monitor port). */
+ mt7530_set(priv, MT753X_AGC, LOCAL_EN);
+
/* Flush the FDB table */
ret = mt7530_fdb_cmd(priv, MT7530_FDB_FLUSH, NULL);
if (ret < 0)
@@ -2504,18 +2674,25 @@ mt7531_setup(struct dsa_switch *ds)
mt7530_rmw(priv, MT7531_GPIO_MODE0, MT7531_GPIO0_MASK,
MT7531_GPIO0_INTERRUPT);
- /* Enable PHY core PLL, since phy_device has not yet been created
- * provided for phy_[read,write]_mmd_indirect is called, we provide
- * our own mt7531_ind_mmd_phy_[read,write] to complete this
- * function.
+ /* Enable Energy-Efficient Ethernet (EEE) and PHY core PLL, since
+ * phy_device has not yet been created provided for
+ * phy_[read,write]_mmd_indirect is called, we provide our own
+ * mt7531_ind_mmd_phy_[read,write] to complete this function.
*/
val = mt7531_ind_c45_phy_read(priv, MT753X_CTRL_PHY_ADDR,
MDIO_MMD_VEND2, CORE_PLL_GROUP4);
- val |= MT7531_PHY_PLL_BYPASS_MODE;
+ val |= MT7531_RG_SYSPLL_DMY2 | MT7531_PHY_PLL_BYPASS_MODE;
val &= ~MT7531_PHY_PLL_OFF;
mt7531_ind_c45_phy_write(priv, MT753X_CTRL_PHY_ADDR, MDIO_MMD_VEND2,
CORE_PLL_GROUP4, val);
+ /* Disable EEE advertisement on the switch PHYs. */
+ for (i = MT753X_CTRL_PHY_ADDR;
+ i < MT753X_CTRL_PHY_ADDR + MT7530_NUM_PHYS; i++) {
+ mt7531_ind_c45_phy_write(priv, i, MDIO_MMD_AN, MDIO_AN_EEE_ADV,
+ 0);
+ }
+
mt7531_setup_common(ds);
/* Setup VLAN ID 0 for VLAN-unaware bridges */
diff --git a/drivers/net/dsa/mt7530.h b/drivers/net/dsa/mt7530.h
index d17b318e6ee488..a08053390b285e 100644
--- a/drivers/net/dsa/mt7530.h
+++ b/drivers/net/dsa/mt7530.h
@@ -32,6 +32,10 @@ enum mt753x_id {
#define SYSC_REG_RSTCTRL 0x34
#define RESET_MCM BIT(2)
+/* Register for ARL global control */
+#define MT753X_AGC 0xc
+#define LOCAL_EN BIT(7)
+
/* Registers to mac forward control for unknown frames */
#define MT7530_MFC 0x10
#define BC_FFP(x) (((x) & 0xff) << 24)
@@ -65,6 +69,7 @@ enum mt753x_id {
/* Registers for BPDU and PAE frame control*/
#define MT753X_BPC 0x24
+#define MT753X_PAE_BPDU_FR BIT(25)
#define MT753X_PAE_EG_TAG_MASK GENMASK(24, 22)
#define MT753X_PAE_EG_TAG(x) FIELD_PREP(MT753X_PAE_EG_TAG_MASK, x)
#define MT753X_PAE_PORT_FW_MASK GENMASK(18, 16)
@@ -75,20 +80,24 @@ enum mt753x_id {
/* Register for :01 and :02 MAC DA frame control */
#define MT753X_RGAC1 0x28
+#define MT753X_R02_BPDU_FR BIT(25)
#define MT753X_R02_EG_TAG_MASK GENMASK(24, 22)
#define MT753X_R02_EG_TAG(x) FIELD_PREP(MT753X_R02_EG_TAG_MASK, x)
#define MT753X_R02_PORT_FW_MASK GENMASK(18, 16)
#define MT753X_R02_PORT_FW(x) FIELD_PREP(MT753X_R02_PORT_FW_MASK, x)
+#define MT753X_R01_BPDU_FR BIT(9)
#define MT753X_R01_EG_TAG_MASK GENMASK(8, 6)
#define MT753X_R01_EG_TAG(x) FIELD_PREP(MT753X_R01_EG_TAG_MASK, x)
#define MT753X_R01_PORT_FW_MASK GENMASK(2, 0)
/* Register for :03 and :0E MAC DA frame control */
#define MT753X_RGAC2 0x2c
+#define MT753X_R0E_BPDU_FR BIT(25)
#define MT753X_R0E_EG_TAG_MASK GENMASK(24, 22)
#define MT753X_R0E_EG_TAG(x) FIELD_PREP(MT753X_R0E_EG_TAG_MASK, x)
#define MT753X_R0E_PORT_FW_MASK GENMASK(18, 16)
#define MT753X_R0E_PORT_FW(x) FIELD_PREP(MT753X_R0E_PORT_FW_MASK, x)
+#define MT753X_R03_BPDU_FR BIT(9)
#define MT753X_R03_EG_TAG_MASK GENMASK(8, 6)
#define MT753X_R03_EG_TAG(x) FIELD_PREP(MT753X_R03_EG_TAG_MASK, x)
#define MT753X_R03_PORT_FW_MASK GENMASK(2, 0)
@@ -616,6 +625,7 @@ enum mt7531_clk_skew {
#define RG_SYSPLL_DDSFBK_EN BIT(12)
#define RG_SYSPLL_BIAS_EN BIT(11)
#define RG_SYSPLL_BIAS_LPF_EN BIT(10)
+#define MT7531_RG_SYSPLL_DMY2 BIT(6)
#define MT7531_PHY_PLL_OFF BIT(5)
#define MT7531_PHY_PLL_BYPASS_MODE BIT(4)
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 9ed1821184ece5..c95787cb908673 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -5503,8 +5503,12 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
.family = MV88E6XXX_FAMILY_6250,
.name = "Marvell 88E6020",
.num_databases = 64,
- .num_ports = 4,
+ /* Ports 2-4 are not routed to pins
+ * => usable ports 0, 1, 5, 6
+ */
+ .num_ports = 7,
.num_internal_phys = 2,
+ .invalid_port_mask = BIT(2) | BIT(3) | BIT(4),
.max_vid = 4095,
.port_base_addr = 0x8,
.phy_base_addr = 0x0,
diff --git a/drivers/net/dsa/sja1105/sja1105_mdio.c b/drivers/net/dsa/sja1105/sja1105_mdio.c
index 833e55e4b96129..52ddb4ef259e93 100644
--- a/drivers/net/dsa/sja1105/sja1105_mdio.c
+++ b/drivers/net/dsa/sja1105/sja1105_mdio.c
@@ -94,7 +94,7 @@ int sja1110_pcs_mdio_read_c45(struct mii_bus *bus, int phy, int mmd, int reg)
return tmp & 0xffff;
}
-int sja1110_pcs_mdio_write_c45(struct mii_bus *bus, int phy, int reg, int mmd,
+int sja1110_pcs_mdio_write_c45(struct mii_bus *bus, int phy, int mmd, int reg,
u16 val)
{
struct sja1105_mdio_private *mdio_priv = bus->priv;
diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c
index 9e9e4a03f1a8c9..2d8a66ea82fab7 100644
--- a/drivers/net/ethernet/amazon/ena/ena_com.c
+++ b/drivers/net/ethernet/amazon/ena/ena_com.c
@@ -351,7 +351,7 @@ static int ena_com_init_io_sq(struct ena_com_dev *ena_dev,
ENA_COM_BOUNCE_BUFFER_CNTRL_CNT;
io_sq->bounce_buf_ctrl.next_to_use = 0;
- size = io_sq->bounce_buf_ctrl.buffer_size *
+ size = (size_t)io_sq->bounce_buf_ctrl.buffer_size *
io_sq->bounce_buf_ctrl.buffers_num;
dev_node = dev_to_node(ena_dev->dmadev);
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index 09e7da1a69c9f0..be5acfa41ee0ce 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -718,8 +718,11 @@ void ena_unmap_tx_buff(struct ena_ring *tx_ring,
static void ena_free_tx_bufs(struct ena_ring *tx_ring)
{
bool print_once = true;
+ bool is_xdp_ring;
u32 i;
+ is_xdp_ring = ENA_IS_XDP_INDEX(tx_ring->adapter, tx_ring->qid);
+
for (i = 0; i < tx_ring->ring_size; i++) {
struct ena_tx_buffer *tx_info = &tx_ring->tx_buffer_info[i];
@@ -739,10 +742,15 @@ static void ena_free_tx_bufs(struct ena_ring *tx_ring)
ena_unmap_tx_buff(tx_ring, tx_info);
- dev_kfree_skb_any(tx_info->skb);
+ if (is_xdp_ring)
+ xdp_return_frame(tx_info->xdpf);
+ else
+ dev_kfree_skb_any(tx_info->skb);
}
- netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev,
- tx_ring->qid));
+
+ if (!is_xdp_ring)
+ netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev,
+ tx_ring->qid));
}
static void ena_free_all_tx_bufs(struct ena_adapter *adapter)
@@ -3481,10 +3489,11 @@ static void check_for_missing_completions(struct ena_adapter *adapter)
{
struct ena_ring *tx_ring;
struct ena_ring *rx_ring;
- int i, budget, rc;
+ int qid, budget, rc;
int io_queue_count;
io_queue_count = adapter->xdp_num_queues + adapter->num_io_queues;
+
/* Make sure the driver doesn't turn the device in other process */
smp_rmb();
@@ -3497,27 +3506,29 @@ static void check_for_missing_completions(struct ena_adapter *adapter)
if (adapter->missing_tx_completion_to == ENA_HW_HINTS_NO_TIMEOUT)
return;
- budget = ENA_MONITORED_TX_QUEUES;
+ budget = min_t(u32, io_queue_count, ENA_MONITORED_TX_QUEUES);
- for (i = adapter->last_monitored_tx_qid; i < io_queue_count; i++) {
- tx_ring = &adapter->tx_ring[i];
- rx_ring = &adapter->rx_ring[i];
+ qid = adapter->last_monitored_tx_qid;
+
+ while (budget) {
+ qid = (qid + 1) % io_queue_count;
+
+ tx_ring = &adapter->tx_ring[qid];
+ rx_ring = &adapter->rx_ring[qid];
rc = check_missing_comp_in_tx_queue(adapter, tx_ring);
if (unlikely(rc))
return;
- rc = !ENA_IS_XDP_INDEX(adapter, i) ?
+ rc = !ENA_IS_XDP_INDEX(adapter, qid) ?
check_for_rx_interrupt_queue(adapter, rx_ring) : 0;
if (unlikely(rc))
return;
budget--;
- if (!budget)
- break;
}
- adapter->last_monitored_tx_qid = i % io_queue_count;
+ adapter->last_monitored_tx_qid = qid;
}
/* trigger napi schedule after 2 consecutive detections */
diff --git a/drivers/net/ethernet/amazon/ena/ena_xdp.c b/drivers/net/ethernet/amazon/ena/ena_xdp.c
index 337c435d3ce998..5b175e7e92a10b 100644
--- a/drivers/net/ethernet/amazon/ena/ena_xdp.c
+++ b/drivers/net/ethernet/amazon/ena/ena_xdp.c
@@ -89,7 +89,7 @@ int ena_xdp_xmit_frame(struct ena_ring *tx_ring,
rc = ena_xdp_tx_map_frame(tx_ring, tx_info, xdpf, &ena_tx_ctx);
if (unlikely(rc))
- return rc;
+ goto err;
ena_tx_ctx.req_id = req_id;
@@ -112,7 +112,9 @@ int ena_xdp_xmit_frame(struct ena_ring *tx_ring,
error_unmap_dma:
ena_unmap_tx_buff(tx_ring, tx_info);
+err:
tx_info->xdpf = NULL;
+
return rc;
}
diff --git a/drivers/net/ethernet/amd/pds_core/core.c b/drivers/net/ethernet/amd/pds_core/core.c
index 9662ee72814c0c..536635e5772799 100644
--- a/drivers/net/ethernet/amd/pds_core/core.c
+++ b/drivers/net/ethernet/amd/pds_core/core.c
@@ -593,6 +593,16 @@ err_out:
pdsc_teardown(pdsc, PDSC_TEARDOWN_RECOVERY);
}
+void pdsc_pci_reset_thread(struct work_struct *work)
+{
+ struct pdsc *pdsc = container_of(work, struct pdsc, pci_reset_work);
+ struct pci_dev *pdev = pdsc->pdev;
+
+ pci_dev_get(pdev);
+ pci_reset_function(pdev);
+ pci_dev_put(pdev);
+}
+
static void pdsc_check_pci_health(struct pdsc *pdsc)
{
u8 fw_status;
@@ -607,7 +617,8 @@ static void pdsc_check_pci_health(struct pdsc *pdsc)
if (fw_status != PDS_RC_BAD_PCI)
return;
- pci_reset_function(pdsc->pdev);
+ /* prevent deadlock between pdsc_reset_prepare and pdsc_health_thread */
+ queue_work(pdsc->wq, &pdsc->pci_reset_work);
}
void pdsc_health_thread(struct work_struct *work)
diff --git a/drivers/net/ethernet/amd/pds_core/core.h b/drivers/net/ethernet/amd/pds_core/core.h
index 92d7657dd6147e..a3e17a0c187a6a 100644
--- a/drivers/net/ethernet/amd/pds_core/core.h
+++ b/drivers/net/ethernet/amd/pds_core/core.h
@@ -197,6 +197,7 @@ struct pdsc {
struct pdsc_qcq notifyqcq;
u64 last_eid;
struct pdsc_viftype *viftype_status;
+ struct work_struct pci_reset_work;
};
/** enum pds_core_dbell_bits - bitwise composition of dbell values.
@@ -313,5 +314,6 @@ int pdsc_firmware_update(struct pdsc *pdsc, const struct firmware *fw,
void pdsc_fw_down(struct pdsc *pdsc);
void pdsc_fw_up(struct pdsc *pdsc);
+void pdsc_pci_reset_thread(struct work_struct *work);
#endif /* _PDSC_H_ */
diff --git a/drivers/net/ethernet/amd/pds_core/dev.c b/drivers/net/ethernet/amd/pds_core/dev.c
index e494e1298dc9a3..495ef4ef8c103d 100644
--- a/drivers/net/ethernet/amd/pds_core/dev.c
+++ b/drivers/net/ethernet/amd/pds_core/dev.c
@@ -229,6 +229,9 @@ int pdsc_devcmd_reset(struct pdsc *pdsc)
.reset.opcode = PDS_CORE_CMD_RESET,
};
+ if (!pdsc_is_fw_running(pdsc))
+ return 0;
+
return pdsc_devcmd(pdsc, &cmd, &comp, pdsc->devcmd_timeout);
}
diff --git a/drivers/net/ethernet/amd/pds_core/main.c b/drivers/net/ethernet/amd/pds_core/main.c
index ab6133e7db422d..660268ff95623f 100644
--- a/drivers/net/ethernet/amd/pds_core/main.c
+++ b/drivers/net/ethernet/amd/pds_core/main.c
@@ -239,6 +239,7 @@ static int pdsc_init_pf(struct pdsc *pdsc)
snprintf(wq_name, sizeof(wq_name), "%s.%d", PDS_CORE_DRV_NAME, pdsc->uid);
pdsc->wq = create_singlethread_workqueue(wq_name);
INIT_WORK(&pdsc->health_work, pdsc_health_thread);
+ INIT_WORK(&pdsc->pci_reset_work, pdsc_pci_reset_thread);
timer_setup(&pdsc->wdtimer, pdsc_wdtimer_cb, 0);
pdsc->wdtimer_period = PDSC_WATCHDOG_SECS * HZ;
diff --git a/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c b/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c
index dd06b68b33ed61..72ea97c5d5d424 100644
--- a/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c
+++ b/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c
@@ -392,7 +392,9 @@ static void umac_reset(struct bcmasp_intf *intf)
umac_wl(intf, 0x0, UMC_CMD);
umac_wl(intf, UMC_CMD_SW_RESET, UMC_CMD);
usleep_range(10, 100);
- umac_wl(intf, 0x0, UMC_CMD);
+ /* We hold the umac in reset and bring it out of
+ * reset when phy link is up.
+ */
}
static void umac_set_hw_addr(struct bcmasp_intf *intf,
@@ -412,6 +414,8 @@ static void umac_enable_set(struct bcmasp_intf *intf, u32 mask,
u32 reg;
reg = umac_rl(intf, UMC_CMD);
+ if (reg & UMC_CMD_SW_RESET)
+ return;
if (enable)
reg |= mask;
else
@@ -430,7 +434,6 @@ static void umac_init(struct bcmasp_intf *intf)
umac_wl(intf, 0x800, UMC_FRM_LEN);
umac_wl(intf, 0xffff, UMC_PAUSE_CNTRL);
umac_wl(intf, 0x800, UMC_RX_MAX_PKT_SZ);
- umac_enable_set(intf, UMC_CMD_PROMISC, 1);
}
static int bcmasp_tx_poll(struct napi_struct *napi, int budget)
@@ -658,6 +661,12 @@ static void bcmasp_adj_link(struct net_device *dev)
UMC_CMD_HD_EN | UMC_CMD_RX_PAUSE_IGNORE |
UMC_CMD_TX_PAUSE_IGNORE);
reg |= cmd_bits;
+ if (reg & UMC_CMD_SW_RESET) {
+ reg &= ~UMC_CMD_SW_RESET;
+ umac_wl(intf, reg, UMC_CMD);
+ udelay(2);
+ reg |= UMC_CMD_TX_EN | UMC_CMD_RX_EN | UMC_CMD_PROMISC;
+ }
umac_wl(intf, reg, UMC_CMD);
active = phy_init_eee(phydev, 0) >= 0;
@@ -1035,19 +1044,12 @@ static int bcmasp_netif_init(struct net_device *dev, bool phy_connect)
/* Indicate that the MAC is responsible for PHY PM */
phydev->mac_managed_pm = true;
- } else if (!intf->wolopts) {
- ret = phy_resume(dev->phydev);
- if (ret)
- goto err_phy_disable;
}
umac_reset(intf);
umac_init(intf);
- /* Disable the UniMAC RX/TX */
- umac_enable_set(intf, (UMC_CMD_RX_EN | UMC_CMD_TX_EN), 0);
-
umac_set_hw_addr(intf, dev->dev_addr);
intf->old_duplex = -1;
@@ -1062,9 +1064,6 @@ static int bcmasp_netif_init(struct net_device *dev, bool phy_connect)
netif_napi_add(intf->ndev, &intf->rx_napi, bcmasp_rx_poll);
bcmasp_enable_rx(intf, 1);
- /* Turn on UniMAC TX/RX */
- umac_enable_set(intf, (UMC_CMD_RX_EN | UMC_CMD_TX_EN), 1);
-
intf->crc_fwd = !!(umac_rl(intf, UMC_CMD) & UMC_CMD_CRC_FWD);
bcmasp_netif_start(dev);
@@ -1306,7 +1305,14 @@ static void bcmasp_suspend_to_wol(struct bcmasp_intf *intf)
if (intf->wolopts & WAKE_FILTER)
bcmasp_netfilt_suspend(intf);
- /* UniMAC receive needs to be turned on */
+ /* Bring UniMAC out of reset if needed and enable RX */
+ reg = umac_rl(intf, UMC_CMD);
+ if (reg & UMC_CMD_SW_RESET)
+ reg &= ~UMC_CMD_SW_RESET;
+
+ reg |= UMC_CMD_RX_EN | UMC_CMD_PROMISC;
+ umac_wl(intf, reg, UMC_CMD);
+
umac_enable_set(intf, UMC_CMD_RX_EN, 1);
if (intf->parent->wol_irq > 0) {
@@ -1324,7 +1330,6 @@ int bcmasp_interface_suspend(struct bcmasp_intf *intf)
{
struct device *kdev = &intf->parent->pdev->dev;
struct net_device *dev = intf->ndev;
- int ret = 0;
if (!netif_running(dev))
return 0;
@@ -1334,10 +1339,6 @@ int bcmasp_interface_suspend(struct bcmasp_intf *intf)
bcmasp_netif_deinit(dev);
if (!intf->wolopts) {
- ret = phy_suspend(dev->phydev);
- if (ret)
- goto out;
-
if (intf->internal_phy)
bcmasp_ephy_enable_set(intf, false);
else
@@ -1354,11 +1355,7 @@ int bcmasp_interface_suspend(struct bcmasp_intf *intf)
clk_disable_unprepare(intf->parent->clk);
- return ret;
-
-out:
- bcmasp_netif_init(dev, false);
- return ret;
+ return 0;
}
static void bcmasp_resume_from_wol(struct bcmasp_intf *intf)
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 493b724848c8f4..57e61f9631678e 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -11758,6 +11758,8 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init)
/* VF-reps may need to be re-opened after the PF is re-opened */
if (BNXT_PF(bp))
bnxt_vf_reps_open(bp);
+ if (bp->ptp_cfg)
+ atomic_set(&bp->ptp_cfg->tx_avail, BNXT_MAX_TX_TS);
bnxt_ptp_init_rtc(bp, true);
bnxt_ptp_cfg_tstamp_filters(bp);
bnxt_cfg_usr_fltrs(bp);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
index 93f9bd55020f27..195c02dc068305 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
@@ -210,6 +210,9 @@ void bnxt_ulp_start(struct bnxt *bp, int err)
if (err)
return;
+ if (edev->ulp_tbl->msix_requested)
+ bnxt_fill_msix_vecs(bp, edev->msix_entries);
+
if (aux_priv) {
struct auxiliary_device *adev;
@@ -392,12 +395,13 @@ void bnxt_rdma_aux_device_init(struct bnxt *bp)
if (!edev)
goto aux_dev_uninit;
+ aux_priv->edev = edev;
+
ulp = kzalloc(sizeof(*ulp), GFP_KERNEL);
if (!ulp)
goto aux_dev_uninit;
edev->ulp_tbl = ulp;
- aux_priv->edev = edev;
bp->edev = edev;
bnxt_set_edev_info(edev, bp);
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index 7396e2823e3285..b1f84b37032a78 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -3280,7 +3280,7 @@ static void bcmgenet_get_hw_addr(struct bcmgenet_priv *priv,
}
/* Returns a reusable dma control register value */
-static u32 bcmgenet_dma_disable(struct bcmgenet_priv *priv)
+static u32 bcmgenet_dma_disable(struct bcmgenet_priv *priv, bool flush_rx)
{
unsigned int i;
u32 reg;
@@ -3305,6 +3305,14 @@ static u32 bcmgenet_dma_disable(struct bcmgenet_priv *priv)
udelay(10);
bcmgenet_umac_writel(priv, 0, UMAC_TX_FLUSH);
+ if (flush_rx) {
+ reg = bcmgenet_rbuf_ctrl_get(priv);
+ bcmgenet_rbuf_ctrl_set(priv, reg | BIT(0));
+ udelay(10);
+ bcmgenet_rbuf_ctrl_set(priv, reg);
+ udelay(10);
+ }
+
return dma_ctrl;
}
@@ -3368,8 +3376,8 @@ static int bcmgenet_open(struct net_device *dev)
bcmgenet_set_hw_addr(priv, dev->dev_addr);
- /* Disable RX/TX DMA and flush TX queues */
- dma_ctrl = bcmgenet_dma_disable(priv);
+ /* Disable RX/TX DMA and flush TX and RX queues */
+ dma_ctrl = bcmgenet_dma_disable(priv, true);
/* Reinitialize TDMA and RDMA and SW housekeeping */
ret = bcmgenet_init_dma(priv);
@@ -4235,7 +4243,7 @@ static int bcmgenet_resume(struct device *d)
bcmgenet_hfb_create_rxnfc_filter(priv, rule);
/* Disable RX/TX DMA and flush TX queues */
- dma_ctrl = bcmgenet_dma_disable(priv);
+ dma_ctrl = bcmgenet_dma_disable(priv, false);
/* Reinitialize TDMA and RDMA and SW housekeeping */
ret = bcmgenet_init_dma(priv);
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index d7693fdf640d53..8bd213da8fb6f5 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -2454,8 +2454,6 @@ static int fec_enet_mii_probe(struct net_device *ndev)
fep->link = 0;
fep->full_duplex = 0;
- phy_dev->mac_managed_pm = true;
-
phy_attached_info(phy_dev);
return 0;
@@ -2467,10 +2465,12 @@ static int fec_enet_mii_init(struct platform_device *pdev)
struct net_device *ndev = platform_get_drvdata(pdev);
struct fec_enet_private *fep = netdev_priv(ndev);
bool suppress_preamble = false;
+ struct phy_device *phydev;
struct device_node *node;
int err = -ENXIO;
u32 mii_speed, holdtime;
u32 bus_freq;
+ int addr;
/*
* The i.MX28 dual fec interfaces are not equal.
@@ -2584,6 +2584,13 @@ static int fec_enet_mii_init(struct platform_device *pdev)
goto err_out_free_mdiobus;
of_node_put(node);
+ /* find all the PHY devices on the bus and set mac_managed_pm to true */
+ for (addr = 0; addr < PHY_MAX_ADDR; addr++) {
+ phydev = mdiobus_get_phy(fep->mii_bus, addr);
+ if (phydev)
+ phydev->mac_managed_pm = true;
+ }
+
mii_cnt++;
/* save fec0 mii_bus */
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_tqp_stats.c b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_tqp_stats.c
index f3c9395d8351cb..618f66d9586b39 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_tqp_stats.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_tqp_stats.c
@@ -85,7 +85,7 @@ int hclge_comm_tqps_update_stats(struct hnae3_handle *handle,
hclge_comm_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_TX_STATS,
true);
- desc.data[0] = cpu_to_le32(tqp->index & 0x1ff);
+ desc.data[0] = cpu_to_le32(tqp->index);
ret = hclge_comm_cmd_send(hw, &desc, 1);
if (ret) {
dev_err(&hw->cmq.csq.pdev->dev,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
index 999a0ee162a644..941cb529d671fb 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
@@ -78,6 +78,9 @@ static const struct hns3_stats hns3_rxq_stats[] = {
#define HNS3_NIC_LB_TEST_NO_MEM_ERR 1
#define HNS3_NIC_LB_TEST_TX_CNT_ERR 2
#define HNS3_NIC_LB_TEST_RX_CNT_ERR 3
+#define HNS3_NIC_LB_TEST_UNEXECUTED 4
+
+static int hns3_get_sset_count(struct net_device *netdev, int stringset);
static int hns3_lp_setup(struct net_device *ndev, enum hnae3_loop loop, bool en)
{
@@ -418,18 +421,26 @@ static void hns3_do_external_lb(struct net_device *ndev,
static void hns3_self_test(struct net_device *ndev,
struct ethtool_test *eth_test, u64 *data)
{
+ int cnt = hns3_get_sset_count(ndev, ETH_SS_TEST);
struct hns3_nic_priv *priv = netdev_priv(ndev);
struct hnae3_handle *h = priv->ae_handle;
int st_param[HNAE3_LOOP_NONE][2];
bool if_running = netif_running(ndev);
+ int i;
+
+ /* initialize the loopback test result, avoid marking an unexcuted
+ * loopback test as PASS.
+ */
+ for (i = 0; i < cnt; i++)
+ data[i] = HNS3_NIC_LB_TEST_UNEXECUTED;
if (hns3_nic_resetting(ndev)) {
netdev_err(ndev, "dev resetting!");
- return;
+ goto failure;
}
if (!(eth_test->flags & ETH_TEST_FL_OFFLINE))
- return;
+ goto failure;
if (netif_msg_ifdown(h))
netdev_info(ndev, "self test start\n");
@@ -451,6 +462,10 @@ static void hns3_self_test(struct net_device *ndev,
if (netif_msg_ifdown(h))
netdev_info(ndev, "self test end\n");
+ return;
+
+failure:
+ eth_test->flags |= ETH_TEST_FL_FAILED;
}
static void hns3_update_limit_promisc_mode(struct net_device *netdev,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index b4afb66efe5c5c..ff6a2ed23ddb6b 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -11626,6 +11626,8 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
if (ret)
goto err_pci_uninit;
+ devl_lock(hdev->devlink);
+
/* Firmware command queue initialize */
ret = hclge_comm_cmd_queue_init(hdev->pdev, &hdev->hw.hw);
if (ret)
@@ -11805,6 +11807,7 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
hclge_task_schedule(hdev, round_jiffies_relative(HZ));
+ devl_unlock(hdev->devlink);
return 0;
err_mdiobus_unreg:
@@ -11817,6 +11820,7 @@ err_msi_uninit:
err_cmd_uninit:
hclge_comm_cmd_uninit(hdev->ae_dev, &hdev->hw.hw);
err_devlink_uninit:
+ devl_unlock(hdev->devlink);
hclge_devlink_uninit(hdev);
err_pci_uninit:
pcim_iounmap(pdev, hdev->hw.hw.io_base);
diff --git a/drivers/net/ethernet/intel/e1000e/hw.h b/drivers/net/ethernet/intel/e1000e/hw.h
index 1fef6bb5a5fbc8..4b6e7536170abc 100644
--- a/drivers/net/ethernet/intel/e1000e/hw.h
+++ b/drivers/net/ethernet/intel/e1000e/hw.h
@@ -628,6 +628,7 @@ struct e1000_phy_info {
u32 id;
u32 reset_delay_us; /* in usec */
u32 revision;
+ u32 retry_count;
enum e1000_media_type media_type;
@@ -644,6 +645,7 @@ struct e1000_phy_info {
bool polarity_correction;
bool speed_downgraded;
bool autoneg_wait_to_complete;
+ bool retry_enabled;
};
struct e1000_nvm_info {
diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c
index 19e450a5bd314f..f9e94be36e97f2 100644
--- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
@@ -222,11 +222,18 @@ out:
if (hw->mac.type >= e1000_pch_lpt) {
/* Only unforce SMBus if ME is not active */
if (!(er32(FWSM) & E1000_ICH_FWSM_FW_VALID)) {
+ /* Switching PHY interface always returns MDI error
+ * so disable retry mechanism to avoid wasting time
+ */
+ e1000e_disable_phy_retry(hw);
+
/* Unforce SMBus mode in PHY */
e1e_rphy_locked(hw, CV_SMB_CTRL, &phy_reg);
phy_reg &= ~CV_SMB_CTRL_FORCE_SMBUS;
e1e_wphy_locked(hw, CV_SMB_CTRL, phy_reg);
+ e1000e_enable_phy_retry(hw);
+
/* Unforce SMBus mode in MAC */
mac_reg = er32(CTRL_EXT);
mac_reg &= ~E1000_CTRL_EXT_FORCE_SMBUS;
@@ -310,6 +317,11 @@ static s32 e1000_init_phy_workarounds_pchlan(struct e1000_hw *hw)
goto out;
}
+ /* There is no guarantee that the PHY is accessible at this time
+ * so disable retry mechanism to avoid wasting time
+ */
+ e1000e_disable_phy_retry(hw);
+
/* The MAC-PHY interconnect may be in SMBus mode. If the PHY is
* inaccessible and resetting the PHY is not blocked, toggle the
* LANPHYPC Value bit to force the interconnect to PCIe mode.
@@ -380,6 +392,8 @@ static s32 e1000_init_phy_workarounds_pchlan(struct e1000_hw *hw)
break;
}
+ e1000e_enable_phy_retry(hw);
+
hw->phy.ops.release(hw);
if (!ret_val) {
@@ -449,6 +463,11 @@ static s32 e1000_init_phy_params_pchlan(struct e1000_hw *hw)
phy->id = e1000_phy_unknown;
+ if (hw->mac.type == e1000_pch_mtp) {
+ phy->retry_count = 2;
+ e1000e_enable_phy_retry(hw);
+ }
+
ret_val = e1000_init_phy_workarounds_pchlan(hw);
if (ret_val)
return ret_val;
@@ -1146,18 +1165,6 @@ s32 e1000_enable_ulp_lpt_lp(struct e1000_hw *hw, bool to_sx)
if (ret_val)
goto out;
- /* Force SMBus mode in PHY */
- ret_val = e1000_read_phy_reg_hv_locked(hw, CV_SMB_CTRL, &phy_reg);
- if (ret_val)
- goto release;
- phy_reg |= CV_SMB_CTRL_FORCE_SMBUS;
- e1000_write_phy_reg_hv_locked(hw, CV_SMB_CTRL, phy_reg);
-
- /* Force SMBus mode in MAC */
- mac_reg = er32(CTRL_EXT);
- mac_reg |= E1000_CTRL_EXT_FORCE_SMBUS;
- ew32(CTRL_EXT, mac_reg);
-
/* Si workaround for ULP entry flow on i127/rev6 h/w. Enable
* LPLU and disable Gig speed when entering ULP
*/
@@ -1313,6 +1320,11 @@ static s32 e1000_disable_ulp_lpt_lp(struct e1000_hw *hw, bool force)
/* Toggle LANPHYPC Value bit */
e1000_toggle_lanphypc_pch_lpt(hw);
+ /* Switching PHY interface always returns MDI error
+ * so disable retry mechanism to avoid wasting time
+ */
+ e1000e_disable_phy_retry(hw);
+
/* Unforce SMBus mode in PHY */
ret_val = e1000_read_phy_reg_hv_locked(hw, CV_SMB_CTRL, &phy_reg);
if (ret_val) {
@@ -1333,6 +1345,8 @@ static s32 e1000_disable_ulp_lpt_lp(struct e1000_hw *hw, bool force)
phy_reg &= ~CV_SMB_CTRL_FORCE_SMBUS;
e1000_write_phy_reg_hv_locked(hw, CV_SMB_CTRL, phy_reg);
+ e1000e_enable_phy_retry(hw);
+
/* Unforce SMBus mode in MAC */
mac_reg = er32(CTRL_EXT);
mac_reg &= ~E1000_CTRL_EXT_FORCE_SMBUS;
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index cc8c531ec3dff3..3692fce201959f 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -6623,6 +6623,7 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool runtime)
struct e1000_hw *hw = &adapter->hw;
u32 ctrl, ctrl_ext, rctl, status, wufc;
int retval = 0;
+ u16 smb_ctrl;
/* Runtime suspend should only enable wakeup for link changes */
if (runtime)
@@ -6696,6 +6697,23 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool runtime)
if (retval)
return retval;
}
+
+ /* Force SMBUS to allow WOL */
+ /* Switching PHY interface always returns MDI error
+ * so disable retry mechanism to avoid wasting time
+ */
+ e1000e_disable_phy_retry(hw);
+
+ e1e_rphy(hw, CV_SMB_CTRL, &smb_ctrl);
+ smb_ctrl |= CV_SMB_CTRL_FORCE_SMBUS;
+ e1e_wphy(hw, CV_SMB_CTRL, smb_ctrl);
+
+ e1000e_enable_phy_retry(hw);
+
+ /* Force SMBus mode in MAC */
+ ctrl_ext = er32(CTRL_EXT);
+ ctrl_ext |= E1000_CTRL_EXT_FORCE_SMBUS;
+ ew32(CTRL_EXT, ctrl_ext);
}
/* Ensure that the appropriate bits are set in LPI_CTRL
diff --git a/drivers/net/ethernet/intel/e1000e/phy.c b/drivers/net/ethernet/intel/e1000e/phy.c
index 5e329156d1bae0..93544f1cc2a51b 100644
--- a/drivers/net/ethernet/intel/e1000e/phy.c
+++ b/drivers/net/ethernet/intel/e1000e/phy.c
@@ -107,6 +107,16 @@ s32 e1000e_phy_reset_dsp(struct e1000_hw *hw)
return e1e_wphy(hw, M88E1000_PHY_GEN_CONTROL, 0);
}
+void e1000e_disable_phy_retry(struct e1000_hw *hw)
+{
+ hw->phy.retry_enabled = false;
+}
+
+void e1000e_enable_phy_retry(struct e1000_hw *hw)
+{
+ hw->phy.retry_enabled = true;
+}
+
/**
* e1000e_read_phy_reg_mdic - Read MDI control register
* @hw: pointer to the HW structure
@@ -118,55 +128,73 @@ s32 e1000e_phy_reset_dsp(struct e1000_hw *hw)
**/
s32 e1000e_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data)
{
+ u32 i, mdic = 0, retry_counter, retry_max;
struct e1000_phy_info *phy = &hw->phy;
- u32 i, mdic = 0;
+ bool success;
if (offset > MAX_PHY_REG_ADDRESS) {
e_dbg("PHY Address %d is out of range\n", offset);
return -E1000_ERR_PARAM;
}
+ retry_max = phy->retry_enabled ? phy->retry_count : 0;
+
/* Set up Op-code, Phy Address, and register offset in the MDI
* Control register. The MAC will take care of interfacing with the
* PHY to retrieve the desired data.
*/
- mdic = ((offset << E1000_MDIC_REG_SHIFT) |
- (phy->addr << E1000_MDIC_PHY_SHIFT) |
- (E1000_MDIC_OP_READ));
+ for (retry_counter = 0; retry_counter <= retry_max; retry_counter++) {
+ success = true;
- ew32(MDIC, mdic);
+ mdic = ((offset << E1000_MDIC_REG_SHIFT) |
+ (phy->addr << E1000_MDIC_PHY_SHIFT) |
+ (E1000_MDIC_OP_READ));
- /* Poll the ready bit to see if the MDI read completed
- * Increasing the time out as testing showed failures with
- * the lower time out
- */
- for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) {
- udelay(50);
- mdic = er32(MDIC);
- if (mdic & E1000_MDIC_READY)
- break;
- }
- if (!(mdic & E1000_MDIC_READY)) {
- e_dbg("MDI Read PHY Reg Address %d did not complete\n", offset);
- return -E1000_ERR_PHY;
- }
- if (mdic & E1000_MDIC_ERROR) {
- e_dbg("MDI Read PHY Reg Address %d Error\n", offset);
- return -E1000_ERR_PHY;
- }
- if (FIELD_GET(E1000_MDIC_REG_MASK, mdic) != offset) {
- e_dbg("MDI Read offset error - requested %d, returned %d\n",
- offset, FIELD_GET(E1000_MDIC_REG_MASK, mdic));
- return -E1000_ERR_PHY;
+ ew32(MDIC, mdic);
+
+ /* Poll the ready bit to see if the MDI read completed
+ * Increasing the time out as testing showed failures with
+ * the lower time out
+ */
+ for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) {
+ usleep_range(50, 60);
+ mdic = er32(MDIC);
+ if (mdic & E1000_MDIC_READY)
+ break;
+ }
+ if (!(mdic & E1000_MDIC_READY)) {
+ e_dbg("MDI Read PHY Reg Address %d did not complete\n",
+ offset);
+ success = false;
+ }
+ if (mdic & E1000_MDIC_ERROR) {
+ e_dbg("MDI Read PHY Reg Address %d Error\n", offset);
+ success = false;
+ }
+ if (FIELD_GET(E1000_MDIC_REG_MASK, mdic) != offset) {
+ e_dbg("MDI Read offset error - requested %d, returned %d\n",
+ offset, FIELD_GET(E1000_MDIC_REG_MASK, mdic));
+ success = false;
+ }
+
+ /* Allow some time after each MDIC transaction to avoid
+ * reading duplicate data in the next MDIC transaction.
+ */
+ if (hw->mac.type == e1000_pch2lan)
+ usleep_range(100, 150);
+
+ if (success) {
+ *data = (u16)mdic;
+ return 0;
+ }
+
+ if (retry_counter != retry_max) {
+ e_dbg("Perform retry on PHY transaction...\n");
+ mdelay(10);
+ }
}
- *data = (u16)mdic;
- /* Allow some time after each MDIC transaction to avoid
- * reading duplicate data in the next MDIC transaction.
- */
- if (hw->mac.type == e1000_pch2lan)
- udelay(100);
- return 0;
+ return -E1000_ERR_PHY;
}
/**
@@ -179,56 +207,72 @@ s32 e1000e_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data)
**/
s32 e1000e_write_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 data)
{
+ u32 i, mdic = 0, retry_counter, retry_max;
struct e1000_phy_info *phy = &hw->phy;
- u32 i, mdic = 0;
+ bool success;
if (offset > MAX_PHY_REG_ADDRESS) {
e_dbg("PHY Address %d is out of range\n", offset);
return -E1000_ERR_PARAM;
}
+ retry_max = phy->retry_enabled ? phy->retry_count : 0;
+
/* Set up Op-code, Phy Address, and register offset in the MDI
* Control register. The MAC will take care of interfacing with the
* PHY to retrieve the desired data.
*/
- mdic = (((u32)data) |
- (offset << E1000_MDIC_REG_SHIFT) |
- (phy->addr << E1000_MDIC_PHY_SHIFT) |
- (E1000_MDIC_OP_WRITE));
+ for (retry_counter = 0; retry_counter <= retry_max; retry_counter++) {
+ success = true;
- ew32(MDIC, mdic);
+ mdic = (((u32)data) |
+ (offset << E1000_MDIC_REG_SHIFT) |
+ (phy->addr << E1000_MDIC_PHY_SHIFT) |
+ (E1000_MDIC_OP_WRITE));
- /* Poll the ready bit to see if the MDI read completed
- * Increasing the time out as testing showed failures with
- * the lower time out
- */
- for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) {
- udelay(50);
- mdic = er32(MDIC);
- if (mdic & E1000_MDIC_READY)
- break;
- }
- if (!(mdic & E1000_MDIC_READY)) {
- e_dbg("MDI Write PHY Reg Address %d did not complete\n", offset);
- return -E1000_ERR_PHY;
- }
- if (mdic & E1000_MDIC_ERROR) {
- e_dbg("MDI Write PHY Red Address %d Error\n", offset);
- return -E1000_ERR_PHY;
- }
- if (FIELD_GET(E1000_MDIC_REG_MASK, mdic) != offset) {
- e_dbg("MDI Write offset error - requested %d, returned %d\n",
- offset, FIELD_GET(E1000_MDIC_REG_MASK, mdic));
- return -E1000_ERR_PHY;
- }
+ ew32(MDIC, mdic);
- /* Allow some time after each MDIC transaction to avoid
- * reading duplicate data in the next MDIC transaction.
- */
- if (hw->mac.type == e1000_pch2lan)
- udelay(100);
+ /* Poll the ready bit to see if the MDI read completed
+ * Increasing the time out as testing showed failures with
+ * the lower time out
+ */
+ for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) {
+ usleep_range(50, 60);
+ mdic = er32(MDIC);
+ if (mdic & E1000_MDIC_READY)
+ break;
+ }
+ if (!(mdic & E1000_MDIC_READY)) {
+ e_dbg("MDI Write PHY Reg Address %d did not complete\n",
+ offset);
+ success = false;
+ }
+ if (mdic & E1000_MDIC_ERROR) {
+ e_dbg("MDI Write PHY Reg Address %d Error\n", offset);
+ success = false;
+ }
+ if (FIELD_GET(E1000_MDIC_REG_MASK, mdic) != offset) {
+ e_dbg("MDI Write offset error - requested %d, returned %d\n",
+ offset, FIELD_GET(E1000_MDIC_REG_MASK, mdic));
+ success = false;
+ }
- return 0;
+ /* Allow some time after each MDIC transaction to avoid
+ * reading duplicate data in the next MDIC transaction.
+ */
+ if (hw->mac.type == e1000_pch2lan)
+ usleep_range(100, 150);
+
+ if (success)
+ return 0;
+
+ if (retry_counter != retry_max) {
+ e_dbg("Perform retry on PHY transaction...\n");
+ mdelay(10);
+ }
+ }
+
+ return -E1000_ERR_PHY;
}
/**
diff --git a/drivers/net/ethernet/intel/e1000e/phy.h b/drivers/net/ethernet/intel/e1000e/phy.h
index c48777d0952352..049bb325b4b14f 100644
--- a/drivers/net/ethernet/intel/e1000e/phy.h
+++ b/drivers/net/ethernet/intel/e1000e/phy.h
@@ -51,6 +51,8 @@ s32 e1000e_read_phy_reg_bm2(struct e1000_hw *hw, u32 offset, u16 *data);
s32 e1000e_write_phy_reg_bm2(struct e1000_hw *hw, u32 offset, u16 data);
void e1000_power_up_phy_copper(struct e1000_hw *hw);
void e1000_power_down_phy_copper(struct e1000_hw *hw);
+void e1000e_disable_phy_retry(struct e1000_hw *hw);
+void e1000e_enable_phy_retry(struct e1000_hw *hw);
s32 e1000e_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data);
s32 e1000e_write_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 data);
s32 e1000_read_phy_reg_hv(struct e1000_hw *hw, u32 offset, u16 *data);
diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index ba24f3fa92c371..2fbabcdb5bb5f3 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -955,6 +955,7 @@ struct i40e_q_vector {
struct rcu_head rcu; /* to avoid race with update stats on free */
char name[I40E_INT_NAME_STR_LEN];
bool arm_wb_state;
+ bool in_busy_poll;
int irq_num; /* IRQ assigned to this q_vector */
} ____cacheline_internodealigned_in_smp;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index f86578857e8aee..48b9ddb2b1b38b 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -1253,8 +1253,11 @@ int i40e_count_filters(struct i40e_vsi *vsi)
int bkt;
int cnt = 0;
- hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist)
- ++cnt;
+ hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) {
+ if (f->state == I40E_FILTER_NEW ||
+ f->state == I40E_FILTER_ACTIVE)
+ ++cnt;
+ }
return cnt;
}
@@ -3911,6 +3914,12 @@ static void i40e_vsi_configure_msix(struct i40e_vsi *vsi)
q_vector->tx.target_itr >> 1);
q_vector->tx.current_itr = q_vector->tx.target_itr;
+ /* Set ITR for software interrupts triggered after exiting
+ * busy-loop polling.
+ */
+ wr32(hw, I40E_PFINT_ITRN(I40E_SW_ITR, vector - 1),
+ I40E_ITR_20K);
+
wr32(hw, I40E_PFINT_RATEN(vector - 1),
i40e_intrl_usec_to_reg(vsi->int_rate_limit));
diff --git a/drivers/net/ethernet/intel/i40e/i40e_register.h b/drivers/net/ethernet/intel/i40e/i40e_register.h
index 14ab642cafdb26..432afbb6420137 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_register.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_register.h
@@ -333,8 +333,11 @@
#define I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT 3
#define I40E_PFINT_DYN_CTLN_ITR_INDX_MASK I40E_MASK(0x3, I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT)
#define I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT 5
+#define I40E_PFINT_DYN_CTLN_INTERVAL_MASK I40E_MASK(0xFFF, I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT)
#define I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_SHIFT 24
#define I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_MASK I40E_MASK(0x1, I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_SHIFT)
+#define I40E_PFINT_DYN_CTLN_SW_ITR_INDX_SHIFT 25
+#define I40E_PFINT_DYN_CTLN_SW_ITR_INDX_MASK I40E_MASK(0x3, I40E_PFINT_DYN_CTLN_SW_ITR_INDX_SHIFT)
#define I40E_PFINT_ICR0 0x00038780 /* Reset: CORER */
#define I40E_PFINT_ICR0_INTEVENT_SHIFT 0
#define I40E_PFINT_ICR0_INTEVENT_MASK I40E_MASK(0x1, I40E_PFINT_ICR0_INTEVENT_SHIFT)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 0d7177083708f2..1a12b732818eef 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -2630,7 +2630,22 @@ process_next:
return failure ? budget : (int)total_rx_packets;
}
-static inline u32 i40e_buildreg_itr(const int type, u16 itr)
+/**
+ * i40e_buildreg_itr - build a value for writing to I40E_PFINT_DYN_CTLN register
+ * @itr_idx: interrupt throttling index
+ * @interval: interrupt throttling interval value in usecs
+ * @force_swint: force software interrupt
+ *
+ * The function builds a value for I40E_PFINT_DYN_CTLN register that
+ * is used to update interrupt throttling interval for specified ITR index
+ * and optionally enforces a software interrupt. If the @itr_idx is equal
+ * to I40E_ITR_NONE then no interval change is applied and only @force_swint
+ * parameter is taken into account. If the interval change and enforced
+ * software interrupt are not requested then the built value just enables
+ * appropriate vector interrupt.
+ **/
+static u32 i40e_buildreg_itr(enum i40e_dyn_idx itr_idx, u16 interval,
+ bool force_swint)
{
u32 val;
@@ -2644,23 +2659,33 @@ static inline u32 i40e_buildreg_itr(const int type, u16 itr)
* an event in the PBA anyway so we need to rely on the automask
* to hold pending events for us until the interrupt is re-enabled
*
- * The itr value is reported in microseconds, and the register
- * value is recorded in 2 microsecond units. For this reason we
- * only need to shift by the interval shift - 1 instead of the
- * full value.
+ * We have to shift the given value as it is reported in microseconds
+ * and the register value is recorded in 2 microsecond units.
*/
- itr &= I40E_ITR_MASK;
+ interval >>= 1;
+ /* 1. Enable vector interrupt
+ * 2. Update the interval for the specified ITR index
+ * (I40E_ITR_NONE in the register is used to indicate that
+ * no interval update is requested)
+ */
val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
- (type << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) |
- (itr << (I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT - 1));
+ FIELD_PREP(I40E_PFINT_DYN_CTLN_ITR_INDX_MASK, itr_idx) |
+ FIELD_PREP(I40E_PFINT_DYN_CTLN_INTERVAL_MASK, interval);
+
+ /* 3. Enforce software interrupt trigger if requested
+ * (These software interrupts rate is limited by ITR2 that is
+ * set to 20K interrupts per second)
+ */
+ if (force_swint)
+ val |= I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK |
+ I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_MASK |
+ FIELD_PREP(I40E_PFINT_DYN_CTLN_SW_ITR_INDX_MASK,
+ I40E_SW_ITR);
return val;
}
-/* a small macro to shorten up some long lines */
-#define INTREG I40E_PFINT_DYN_CTLN
-
/* The act of updating the ITR will cause it to immediately trigger. In order
* to prevent this from throwing off adaptive update statistics we defer the
* update so that it can only happen so often. So after either Tx or Rx are
@@ -2679,8 +2704,10 @@ static inline u32 i40e_buildreg_itr(const int type, u16 itr)
static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
struct i40e_q_vector *q_vector)
{
+ enum i40e_dyn_idx itr_idx = I40E_ITR_NONE;
struct i40e_hw *hw = &vsi->back->hw;
- u32 intval;
+ u16 interval = 0;
+ u32 itr_val;
/* If we don't have MSIX, then we only need to re-enable icr0 */
if (!test_bit(I40E_FLAG_MSIX_ENA, vsi->back->flags)) {
@@ -2702,8 +2729,8 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
*/
if (q_vector->rx.target_itr < q_vector->rx.current_itr) {
/* Rx ITR needs to be reduced, this is highest priority */
- intval = i40e_buildreg_itr(I40E_RX_ITR,
- q_vector->rx.target_itr);
+ itr_idx = I40E_RX_ITR;
+ interval = q_vector->rx.target_itr;
q_vector->rx.current_itr = q_vector->rx.target_itr;
q_vector->itr_countdown = ITR_COUNTDOWN_START;
} else if ((q_vector->tx.target_itr < q_vector->tx.current_itr) ||
@@ -2712,25 +2739,36 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
/* Tx ITR needs to be reduced, this is second priority
* Tx ITR needs to be increased more than Rx, fourth priority
*/
- intval = i40e_buildreg_itr(I40E_TX_ITR,
- q_vector->tx.target_itr);
+ itr_idx = I40E_TX_ITR;
+ interval = q_vector->tx.target_itr;
q_vector->tx.current_itr = q_vector->tx.target_itr;
q_vector->itr_countdown = ITR_COUNTDOWN_START;
} else if (q_vector->rx.current_itr != q_vector->rx.target_itr) {
/* Rx ITR needs to be increased, third priority */
- intval = i40e_buildreg_itr(I40E_RX_ITR,
- q_vector->rx.target_itr);
+ itr_idx = I40E_RX_ITR;
+ interval = q_vector->rx.target_itr;
q_vector->rx.current_itr = q_vector->rx.target_itr;
q_vector->itr_countdown = ITR_COUNTDOWN_START;
} else {
/* No ITR update, lowest priority */
- intval = i40e_buildreg_itr(I40E_ITR_NONE, 0);
if (q_vector->itr_countdown)
q_vector->itr_countdown--;
}
- if (!test_bit(__I40E_VSI_DOWN, vsi->state))
- wr32(hw, INTREG(q_vector->reg_idx), intval);
+ /* Do not update interrupt control register if VSI is down */
+ if (test_bit(__I40E_VSI_DOWN, vsi->state))
+ return;
+
+ /* Update ITR interval if necessary and enforce software interrupt
+ * if we are exiting busy poll.
+ */
+ if (q_vector->in_busy_poll) {
+ itr_val = i40e_buildreg_itr(itr_idx, interval, true);
+ q_vector->in_busy_poll = false;
+ } else {
+ itr_val = i40e_buildreg_itr(itr_idx, interval, false);
+ }
+ wr32(hw, I40E_PFINT_DYN_CTLN(q_vector->reg_idx), itr_val);
}
/**
@@ -2845,6 +2883,8 @@ tx_only:
*/
if (likely(napi_complete_done(napi, work_done)))
i40e_update_enable_itr(vsi, q_vector);
+ else
+ q_vector->in_busy_poll = true;
return min(work_done, budget - 1);
}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
index abf15067eb5de4..2cdc7de6301c13 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
@@ -68,6 +68,7 @@ enum i40e_dyn_idx {
/* these are indexes into ITRN registers */
#define I40E_RX_ITR I40E_IDX_ITR0
#define I40E_TX_ITR I40E_IDX_ITR1
+#define I40E_SW_ITR I40E_IDX_ITR2
/* Supported RSS offloads */
#define I40E_DEFAULT_RSS_HENA ( \
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index 83a34e98bdc79d..232b65b9c8eacd 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -1624,8 +1624,8 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr)
{
struct i40e_hw *hw = &pf->hw;
struct i40e_vf *vf;
- int i, v;
u32 reg;
+ int i;
/* If we don't have any VFs, then there is nothing to reset */
if (!pf->num_alloc_vfs)
@@ -1636,11 +1636,10 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr)
return false;
/* Begin reset on all VFs at once */
- for (v = 0; v < pf->num_alloc_vfs; v++) {
- vf = &pf->vf[v];
+ for (vf = &pf->vf[0]; vf < &pf->vf[pf->num_alloc_vfs]; ++vf) {
/* If VF is being reset no need to trigger reset again */
if (!test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states))
- i40e_trigger_vf_reset(&pf->vf[v], flr);
+ i40e_trigger_vf_reset(vf, flr);
}
/* HW requires some time to make sure it can flush the FIFO for a VF
@@ -1649,14 +1648,13 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr)
* the VFs using a simple iterator that increments once that VF has
* finished resetting.
*/
- for (i = 0, v = 0; i < 10 && v < pf->num_alloc_vfs; i++) {
+ for (i = 0, vf = &pf->vf[0]; i < 10 && vf < &pf->vf[pf->num_alloc_vfs]; ++i) {
usleep_range(10000, 20000);
/* Check each VF in sequence, beginning with the VF to fail
* the previous check.
*/
- while (v < pf->num_alloc_vfs) {
- vf = &pf->vf[v];
+ while (vf < &pf->vf[pf->num_alloc_vfs]) {
if (!test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states)) {
reg = rd32(hw, I40E_VPGEN_VFRSTAT(vf->vf_id));
if (!(reg & I40E_VPGEN_VFRSTAT_VFRD_MASK))
@@ -1666,7 +1664,7 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr)
/* If the current VF has finished resetting, move on
* to the next VF in sequence.
*/
- v++;
+ ++vf;
}
}
@@ -1676,39 +1674,39 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr)
/* Display a warning if at least one VF didn't manage to reset in
* time, but continue on with the operation.
*/
- if (v < pf->num_alloc_vfs)
+ if (vf < &pf->vf[pf->num_alloc_vfs])
dev_err(&pf->pdev->dev, "VF reset check timeout on VF %d\n",
- pf->vf[v].vf_id);
+ vf->vf_id);
usleep_range(10000, 20000);
/* Begin disabling all the rings associated with VFs, but do not wait
* between each VF.
*/
- for (v = 0; v < pf->num_alloc_vfs; v++) {
+ for (vf = &pf->vf[0]; vf < &pf->vf[pf->num_alloc_vfs]; ++vf) {
/* On initial reset, we don't have any queues to disable */
- if (pf->vf[v].lan_vsi_idx == 0)
+ if (vf->lan_vsi_idx == 0)
continue;
/* If VF is reset in another thread just continue */
if (test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states))
continue;
- i40e_vsi_stop_rings_no_wait(pf->vsi[pf->vf[v].lan_vsi_idx]);
+ i40e_vsi_stop_rings_no_wait(pf->vsi[vf->lan_vsi_idx]);
}
/* Now that we've notified HW to disable all of the VF rings, wait
* until they finish.
*/
- for (v = 0; v < pf->num_alloc_vfs; v++) {
+ for (vf = &pf->vf[0]; vf < &pf->vf[pf->num_alloc_vfs]; ++vf) {
/* On initial reset, we don't have any queues to disable */
- if (pf->vf[v].lan_vsi_idx == 0)
+ if (vf->lan_vsi_idx == 0)
continue;
/* If VF is reset in another thread just continue */
if (test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states))
continue;
- i40e_vsi_wait_queues_disabled(pf->vsi[pf->vf[v].lan_vsi_idx]);
+ i40e_vsi_wait_queues_disabled(pf->vsi[vf->lan_vsi_idx]);
}
/* Hw may need up to 50ms to finish disabling the RX queues. We
@@ -1717,12 +1715,12 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr)
mdelay(50);
/* Finish the reset on each VF */
- for (v = 0; v < pf->num_alloc_vfs; v++) {
+ for (vf = &pf->vf[0]; vf < &pf->vf[pf->num_alloc_vfs]; ++vf) {
/* If VF is reset in another thread just continue */
if (test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states))
continue;
- i40e_cleanup_reset_vf(&pf->vf[v]);
+ i40e_cleanup_reset_vf(vf);
}
i40e_flush(hw);
@@ -3139,11 +3137,12 @@ static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg)
/* Allow to delete VF primary MAC only if it was not set
* administratively by PF or if VF is trusted.
*/
- if (ether_addr_equal(addr, vf->default_lan_addr.addr) &&
- i40e_can_vf_change_mac(vf))
- was_unimac_deleted = true;
- else
- continue;
+ if (ether_addr_equal(addr, vf->default_lan_addr.addr)) {
+ if (i40e_can_vf_change_mac(vf))
+ was_unimac_deleted = true;
+ else
+ continue;
+ }
if (i40e_del_mac_filter(vsi, al->list[i].addr)) {
ret = -EINVAL;
diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
index 8040317c95617e..1f3e7a6903e56e 100644
--- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
@@ -593,8 +593,9 @@ struct ice_aqc_recipe_data_elem {
struct ice_aqc_recipe_to_profile {
__le16 profile_id;
u8 rsvd[6];
- DECLARE_BITMAP(recipe_assoc, ICE_MAX_NUM_RECIPES);
+ __le64 recipe_assoc;
};
+static_assert(sizeof(struct ice_aqc_recipe_to_profile) == 16);
/* Add/Update/Remove/Get switch rules (indirect 0x02A0, 0x02A1, 0x02A2, 0x02A3)
*/
diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index db4b2844e1f718..d9f6cc71d900aa 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -1002,8 +1002,8 @@ static void ice_get_itr_intrl_gran(struct ice_hw *hw)
*/
int ice_init_hw(struct ice_hw *hw)
{
- struct ice_aqc_get_phy_caps_data *pcaps __free(kfree);
- void *mac_buf __free(kfree);
+ struct ice_aqc_get_phy_caps_data *pcaps __free(kfree) = NULL;
+ void *mac_buf __free(kfree) = NULL;
u16 mac_buf_len;
int status;
@@ -3272,7 +3272,7 @@ int ice_update_link_info(struct ice_port_info *pi)
return status;
if (li->link_info & ICE_AQ_MEDIA_AVAILABLE) {
- struct ice_aqc_get_phy_caps_data *pcaps __free(kfree);
+ struct ice_aqc_get_phy_caps_data *pcaps __free(kfree) = NULL;
pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL);
if (!pcaps)
@@ -3420,7 +3420,7 @@ ice_cfg_phy_fc(struct ice_port_info *pi, struct ice_aqc_set_phy_cfg_data *cfg,
int
ice_set_fc(struct ice_port_info *pi, u8 *aq_failures, bool ena_auto_link_update)
{
- struct ice_aqc_get_phy_caps_data *pcaps __free(kfree);
+ struct ice_aqc_get_phy_caps_data *pcaps __free(kfree) = NULL;
struct ice_aqc_set_phy_cfg_data cfg = { 0 };
struct ice_hw *hw;
int status;
@@ -3561,7 +3561,7 @@ int
ice_cfg_phy_fec(struct ice_port_info *pi, struct ice_aqc_set_phy_cfg_data *cfg,
enum ice_fec_mode fec)
{
- struct ice_aqc_get_phy_caps_data *pcaps __free(kfree);
+ struct ice_aqc_get_phy_caps_data *pcaps __free(kfree) = NULL;
struct ice_hw *hw;
int status;
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index 255a9c8151b451..78b833b3e1d7ef 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -941,11 +941,11 @@ static u64 ice_loopback_test(struct net_device *netdev)
struct ice_netdev_priv *np = netdev_priv(netdev);
struct ice_vsi *orig_vsi = np->vsi, *test_vsi;
struct ice_pf *pf = orig_vsi->back;
+ u8 *tx_frame __free(kfree) = NULL;
u8 broadcast[ETH_ALEN], ret = 0;
int num_frames, valid_frames;
struct ice_tx_ring *tx_ring;
struct ice_rx_ring *rx_ring;
- u8 *tx_frame __free(kfree);
int i;
netdev_info(netdev, "loopback test\n");
diff --git a/drivers/net/ethernet/intel/ice/ice_lag.c b/drivers/net/ethernet/intel/ice/ice_lag.c
index f97128b69f87ec..f0e76f0a6d6031 100644
--- a/drivers/net/ethernet/intel/ice/ice_lag.c
+++ b/drivers/net/ethernet/intel/ice/ice_lag.c
@@ -2041,7 +2041,7 @@ int ice_init_lag(struct ice_pf *pf)
/* associate recipes to profiles */
for (n = 0; n < ICE_PROFID_IPV6_GTPU_IPV6_TCP_INNER; n++) {
err = ice_aq_get_recipe_to_profile(&pf->hw, n,
- (u8 *)&recipe_bits, NULL);
+ &recipe_bits, NULL);
if (err)
continue;
@@ -2049,7 +2049,7 @@ int ice_init_lag(struct ice_pf *pf)
recipe_bits |= BIT(lag->pf_recipe) |
BIT(lag->lport_recipe);
ice_aq_map_recipe_to_profile(&pf->hw, n,
- (u8 *)&recipe_bits, NULL);
+ recipe_bits, NULL);
}
}
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index ee3f0d3e3f6dbd..558422120312ba 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -3091,7 +3091,7 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags)
{
struct ice_vsi_cfg_params params = {};
struct ice_coalesce_stored *coalesce;
- int prev_num_q_vectors = 0;
+ int prev_num_q_vectors;
struct ice_pf *pf;
int ret;
@@ -3105,13 +3105,6 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags)
if (WARN_ON(vsi->type == ICE_VSI_VF && !vsi->vf))
return -EINVAL;
- coalesce = kcalloc(vsi->num_q_vectors,
- sizeof(struct ice_coalesce_stored), GFP_KERNEL);
- if (!coalesce)
- return -ENOMEM;
-
- prev_num_q_vectors = ice_vsi_rebuild_get_coalesce(vsi, coalesce);
-
ret = ice_vsi_realloc_stat_arrays(vsi);
if (ret)
goto err_vsi_cfg;
@@ -3121,6 +3114,13 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags)
if (ret)
goto err_vsi_cfg;
+ coalesce = kcalloc(vsi->num_q_vectors,
+ sizeof(struct ice_coalesce_stored), GFP_KERNEL);
+ if (!coalesce)
+ return -ENOMEM;
+
+ prev_num_q_vectors = ice_vsi_rebuild_get_coalesce(vsi, coalesce);
+
ret = ice_vsi_cfg_tc_lan(pf, vsi);
if (ret) {
if (vsi_flags & ICE_VSI_FLAG_INIT) {
@@ -3139,8 +3139,8 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags)
err_vsi_cfg_tc_lan:
ice_vsi_decfg(vsi);
-err_vsi_cfg:
kfree(coalesce);
+err_vsi_cfg:
return ret;
}
diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c
index d4baae8c3b720a..b4ea935e830054 100644
--- a/drivers/net/ethernet/intel/ice/ice_switch.c
+++ b/drivers/net/ethernet/intel/ice/ice_switch.c
@@ -2025,12 +2025,12 @@ error_out:
* ice_aq_map_recipe_to_profile - Map recipe to packet profile
* @hw: pointer to the HW struct
* @profile_id: package profile ID to associate the recipe with
- * @r_bitmap: Recipe bitmap filled in and need to be returned as response
+ * @r_assoc: Recipe bitmap filled in and need to be returned as response
* @cd: pointer to command details structure or NULL
* Recipe to profile association (0x0291)
*/
int
-ice_aq_map_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap,
+ice_aq_map_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u64 r_assoc,
struct ice_sq_cd *cd)
{
struct ice_aqc_recipe_to_profile *cmd;
@@ -2042,7 +2042,7 @@ ice_aq_map_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap,
/* Set the recipe ID bit in the bitmask to let the device know which
* profile we are associating the recipe to
*/
- memcpy(cmd->recipe_assoc, r_bitmap, sizeof(cmd->recipe_assoc));
+ cmd->recipe_assoc = cpu_to_le64(r_assoc);
return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
}
@@ -2051,12 +2051,12 @@ ice_aq_map_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap,
* ice_aq_get_recipe_to_profile - Map recipe to packet profile
* @hw: pointer to the HW struct
* @profile_id: package profile ID to associate the recipe with
- * @r_bitmap: Recipe bitmap filled in and need to be returned as response
+ * @r_assoc: Recipe bitmap filled in and need to be returned as response
* @cd: pointer to command details structure or NULL
* Associate profile ID with given recipe (0x0293)
*/
int
-ice_aq_get_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap,
+ice_aq_get_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u64 *r_assoc,
struct ice_sq_cd *cd)
{
struct ice_aqc_recipe_to_profile *cmd;
@@ -2069,7 +2069,7 @@ ice_aq_get_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap,
status = ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
if (!status)
- memcpy(r_bitmap, cmd->recipe_assoc, sizeof(cmd->recipe_assoc));
+ *r_assoc = le64_to_cpu(cmd->recipe_assoc);
return status;
}
@@ -2108,6 +2108,7 @@ int ice_alloc_recipe(struct ice_hw *hw, u16 *rid)
static void ice_get_recp_to_prof_map(struct ice_hw *hw)
{
DECLARE_BITMAP(r_bitmap, ICE_MAX_NUM_RECIPES);
+ u64 recp_assoc;
u16 i;
for (i = 0; i < hw->switch_info->max_used_prof_index + 1; i++) {
@@ -2115,8 +2116,9 @@ static void ice_get_recp_to_prof_map(struct ice_hw *hw)
bitmap_zero(profile_to_recipe[i], ICE_MAX_NUM_RECIPES);
bitmap_zero(r_bitmap, ICE_MAX_NUM_RECIPES);
- if (ice_aq_get_recipe_to_profile(hw, i, (u8 *)r_bitmap, NULL))
+ if (ice_aq_get_recipe_to_profile(hw, i, &recp_assoc, NULL))
continue;
+ bitmap_from_arr64(r_bitmap, &recp_assoc, ICE_MAX_NUM_RECIPES);
bitmap_copy(profile_to_recipe[i], r_bitmap,
ICE_MAX_NUM_RECIPES);
for_each_set_bit(j, r_bitmap, ICE_MAX_NUM_RECIPES)
@@ -5390,22 +5392,24 @@ ice_add_adv_recipe(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups,
*/
list_for_each_entry(fvit, &rm->fv_list, list_entry) {
DECLARE_BITMAP(r_bitmap, ICE_MAX_NUM_RECIPES);
+ u64 recp_assoc;
u16 j;
status = ice_aq_get_recipe_to_profile(hw, fvit->profile_id,
- (u8 *)r_bitmap, NULL);
+ &recp_assoc, NULL);
if (status)
goto err_unroll;
+ bitmap_from_arr64(r_bitmap, &recp_assoc, ICE_MAX_NUM_RECIPES);
bitmap_or(r_bitmap, r_bitmap, rm->r_bitmap,
ICE_MAX_NUM_RECIPES);
status = ice_acquire_change_lock(hw, ICE_RES_WRITE);
if (status)
goto err_unroll;
+ bitmap_to_arr64(&recp_assoc, r_bitmap, ICE_MAX_NUM_RECIPES);
status = ice_aq_map_recipe_to_profile(hw, fvit->profile_id,
- (u8 *)r_bitmap,
- NULL);
+ recp_assoc, NULL);
ice_release_change_lock(hw);
if (status)
diff --git a/drivers/net/ethernet/intel/ice/ice_switch.h b/drivers/net/ethernet/intel/ice/ice_switch.h
index db7e501b7e0a48..89ffa1b51b5ad1 100644
--- a/drivers/net/ethernet/intel/ice/ice_switch.h
+++ b/drivers/net/ethernet/intel/ice/ice_switch.h
@@ -424,10 +424,10 @@ int ice_aq_add_recipe(struct ice_hw *hw,
struct ice_aqc_recipe_data_elem *s_recipe_list,
u16 num_recipes, struct ice_sq_cd *cd);
int
-ice_aq_get_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap,
+ice_aq_get_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u64 *r_assoc,
struct ice_sq_cd *cd);
int
-ice_aq_map_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap,
+ice_aq_map_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u64 r_assoc,
struct ice_sq_cd *cd);
#endif /* _ICE_SWITCH_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.c b/drivers/net/ethernet/intel/ice/ice_tc_lib.c
index b890410a2bc0ba..688ccb0615ab9f 100644
--- a/drivers/net/ethernet/intel/ice/ice_tc_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.c
@@ -28,6 +28,8 @@ ice_tc_count_lkups(u32 flags, struct ice_tc_flower_lyr_2_4_hdrs *headers,
* - ICE_TC_FLWR_FIELD_VLAN_TPID (present if specified)
* - Tunnel flag (present if tunnel)
*/
+ if (fltr->direction == ICE_ESWITCH_FLTR_EGRESS)
+ lkups_cnt++;
if (flags & ICE_TC_FLWR_FIELD_TENANT_ID)
lkups_cnt++;
@@ -363,6 +365,11 @@ ice_tc_fill_rules(struct ice_hw *hw, u32 flags,
/* Always add direction metadata */
ice_rule_add_direction_metadata(&list[ICE_TC_METADATA_LKUP_IDX]);
+ if (tc_fltr->direction == ICE_ESWITCH_FLTR_EGRESS) {
+ ice_rule_add_src_vsi_metadata(&list[i]);
+ i++;
+ }
+
rule_info->tun_type = ice_sw_type_from_tunnel(tc_fltr->tunnel_type);
if (tc_fltr->tunnel_type != TNL_LAST) {
i = ice_tc_fill_tunnel_outer(flags, tc_fltr, list, i);
@@ -772,7 +779,7 @@ ice_eswitch_add_tc_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr)
int ret;
int i;
- if (!flags || (flags & ICE_TC_FLWR_FIELD_ENC_SRC_L4_PORT)) {
+ if (flags & ICE_TC_FLWR_FIELD_ENC_SRC_L4_PORT) {
NL_SET_ERR_MSG_MOD(fltr->extack, "Unsupported encap field(s)");
return -EOPNOTSUPP;
}
@@ -820,6 +827,7 @@ ice_eswitch_add_tc_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr)
/* specify the cookie as filter_rule_id */
rule_info.fltr_rule_id = fltr->cookie;
+ rule_info.src_vsi = vsi->idx;
ret = ice_add_adv_rule(hw, list, lkups_cnt, &rule_info, &rule_added);
if (ret == -EEXIST) {
@@ -1481,7 +1489,10 @@ ice_parse_cls_flower(struct net_device *filter_dev, struct ice_vsi *vsi,
(BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) |
BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) |
BIT_ULL(FLOW_DISSECTOR_KEY_ENC_KEYID) |
- BIT_ULL(FLOW_DISSECTOR_KEY_ENC_PORTS))) {
+ BIT_ULL(FLOW_DISSECTOR_KEY_ENC_PORTS) |
+ BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IP) |
+ BIT_ULL(FLOW_DISSECTOR_KEY_ENC_OPTS) |
+ BIT_ULL(FLOW_DISSECTOR_KEY_ENC_CONTROL))) {
NL_SET_ERR_MSG_MOD(fltr->extack, "Tunnel key used, but device isn't a tunnel");
return -EOPNOTSUPP;
} else {
diff --git a/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c b/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c
index 80dc4bcdd3a41c..b3e1bdcb80f84d 100644
--- a/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c
+++ b/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c
@@ -26,24 +26,22 @@ static void ice_port_vlan_on(struct ice_vsi *vsi)
struct ice_vsi_vlan_ops *vlan_ops;
struct ice_pf *pf = vsi->back;
- if (ice_is_dvm_ena(&pf->hw)) {
- vlan_ops = &vsi->outer_vlan_ops;
-
- /* setup outer VLAN ops */
- vlan_ops->set_port_vlan = ice_vsi_set_outer_port_vlan;
- vlan_ops->clear_port_vlan = ice_vsi_clear_outer_port_vlan;
+ /* setup inner VLAN ops */
+ vlan_ops = &vsi->inner_vlan_ops;
- /* setup inner VLAN ops */
- vlan_ops = &vsi->inner_vlan_ops;
+ if (ice_is_dvm_ena(&pf->hw)) {
vlan_ops->add_vlan = noop_vlan_arg;
vlan_ops->del_vlan = noop_vlan_arg;
vlan_ops->ena_stripping = ice_vsi_ena_inner_stripping;
vlan_ops->dis_stripping = ice_vsi_dis_inner_stripping;
vlan_ops->ena_insertion = ice_vsi_ena_inner_insertion;
vlan_ops->dis_insertion = ice_vsi_dis_inner_insertion;
- } else {
- vlan_ops = &vsi->inner_vlan_ops;
+ /* setup outer VLAN ops */
+ vlan_ops = &vsi->outer_vlan_ops;
+ vlan_ops->set_port_vlan = ice_vsi_set_outer_port_vlan;
+ vlan_ops->clear_port_vlan = ice_vsi_clear_outer_port_vlan;
+ } else {
vlan_ops->set_port_vlan = ice_vsi_set_inner_port_vlan;
vlan_ops->clear_port_vlan = ice_vsi_clear_inner_port_vlan;
}
diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
index 6dd7a66bb8979a..f5bc4a2780745e 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
@@ -2941,6 +2941,8 @@ static int idpf_rx_process_skb_fields(struct idpf_queue *rxq,
rx_ptype = le16_get_bits(rx_desc->ptype_err_fflags0,
VIRTCHNL2_RX_FLEX_DESC_ADV_PTYPE_M);
+ skb->protocol = eth_type_trans(skb, rxq->vport->netdev);
+
decoded = rxq->vport->rx_ptype_lkup[rx_ptype];
/* If we don't know the ptype we can't do anything else with it. Just
* pass it up the stack as-is.
@@ -2951,8 +2953,6 @@ static int idpf_rx_process_skb_fields(struct idpf_queue *rxq,
/* process RSS/hash */
idpf_rx_hash(rxq, skb, rx_desc, &decoded);
- skb->protocol = eth_type_trans(skb, rxq->vport->netdev);
-
if (le16_get_bits(rx_desc->hdrlen_flags,
VIRTCHNL2_RX_FLEX_DESC_ADV_RSC_M))
return idpf_rx_rsc(rxq, skb, rx_desc, &decoded);
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 2e1cfbd82f4fd5..35ad40a803cb64 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -1642,10 +1642,6 @@ done:
if (unlikely(test_bit(IGC_RING_FLAG_TX_HWTSTAMP, &tx_ring->flags) &&
skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
- /* FIXME: add support for retrieving timestamps from
- * the other timer registers before skipping the
- * timestamping request.
- */
unsigned long flags;
u32 tstamp_flags;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
index 13a6fca31004a8..866024f2b9eeb3 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
@@ -914,7 +914,13 @@ int ixgbe_ipsec_vf_add_sa(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf)
goto err_out;
}
- xs = kzalloc(sizeof(*xs), GFP_KERNEL);
+ algo = xfrm_aead_get_byname(aes_gcm_name, IXGBE_IPSEC_AUTH_BITS, 1);
+ if (unlikely(!algo)) {
+ err = -ENOENT;
+ goto err_out;
+ }
+
+ xs = kzalloc(sizeof(*xs), GFP_ATOMIC);
if (unlikely(!xs)) {
err = -ENOMEM;
goto err_out;
@@ -930,14 +936,8 @@ int ixgbe_ipsec_vf_add_sa(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf)
memcpy(&xs->id.daddr.a4, sam->addr, sizeof(xs->id.daddr.a4));
xs->xso.dev = adapter->netdev;
- algo = xfrm_aead_get_byname(aes_gcm_name, IXGBE_IPSEC_AUTH_BITS, 1);
- if (unlikely(!algo)) {
- err = -ENOENT;
- goto err_xs;
- }
-
aead_len = sizeof(*xs->aead) + IXGBE_IPSEC_KEY_BITS / 8;
- xs->aead = kzalloc(aead_len, GFP_KERNEL);
+ xs->aead = kzalloc(aead_len, GFP_ATOMIC);
if (unlikely(!xs->aead)) {
err = -ENOMEM;
goto err_xs;
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
index 3c0f55b3e48ea4..b86f3224f0b783 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
@@ -808,6 +808,11 @@ static int cgx_lmac_enadis_pause_frm(void *cgxd, int lmac_id,
if (!is_lmac_valid(cgx, lmac_id))
return -ENODEV;
+ cfg = cgx_read(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL);
+ cfg &= ~CGX_GMP_GMI_RXX_FRM_CTL_CTL_BCK;
+ cfg |= rx_pause ? CGX_GMP_GMI_RXX_FRM_CTL_CTL_BCK : 0x0;
+ cgx_write(cgx, lmac_id, CGXX_GMP_GMI_RXX_FRM_CTL, cfg);
+
cfg = cgx_read(cgx, lmac_id, CGXX_SMUX_RX_FRM_CTL);
cfg &= ~CGX_SMUX_RX_FRM_CTL_CTL_BCK;
cfg |= rx_pause ? CGX_SMUX_RX_FRM_CTL_CTL_BCK : 0x0;
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
index 72e060cf6b6181..e9bf9231b0185d 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
@@ -160,6 +160,8 @@ static int rvu_map_cgx_lmac_pf(struct rvu *rvu)
continue;
lmac_bmap = cgx_get_lmac_bmap(rvu_cgx_pdata(cgx, rvu));
for_each_set_bit(iter, &lmac_bmap, rvu->hw->lmac_per_cgx) {
+ if (iter >= MAX_LMAC_COUNT)
+ continue;
lmac = cgx_get_lmacid(rvu_cgx_pdata(cgx, rvu),
iter);
rvu->pf2cgxlmac_map[pf] = cgxlmac_id_to_bmap(cgx, lmac);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
index d39001cdc707ee..00af8888e3291a 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
@@ -4819,18 +4819,18 @@ static int rvu_nix_block_init(struct rvu *rvu, struct nix_hw *nix_hw)
*/
rvu_write64(rvu, blkaddr, NIX_AF_CFG,
rvu_read64(rvu, blkaddr, NIX_AF_CFG) | 0x40ULL);
+ }
- /* Set chan/link to backpressure TL3 instead of TL2 */
- rvu_write64(rvu, blkaddr, NIX_AF_PSE_CHANNEL_LEVEL, 0x01);
+ /* Set chan/link to backpressure TL3 instead of TL2 */
+ rvu_write64(rvu, blkaddr, NIX_AF_PSE_CHANNEL_LEVEL, 0x01);
- /* Disable SQ manager's sticky mode operation (set TM6 = 0)
- * This sticky mode is known to cause SQ stalls when multiple
- * SQs are mapped to same SMQ and transmitting pkts at a time.
- */
- cfg = rvu_read64(rvu, blkaddr, NIX_AF_SQM_DBG_CTL_STATUS);
- cfg &= ~BIT_ULL(15);
- rvu_write64(rvu, blkaddr, NIX_AF_SQM_DBG_CTL_STATUS, cfg);
- }
+ /* Disable SQ manager's sticky mode operation (set TM6 = 0)
+ * This sticky mode is known to cause SQ stalls when multiple
+ * SQs are mapped to same SMQ and transmitting pkts at a time.
+ */
+ cfg = rvu_read64(rvu, blkaddr, NIX_AF_SQM_DBG_CTL_STATUS);
+ cfg &= ~BIT_ULL(15);
+ rvu_write64(rvu, blkaddr, NIX_AF_SQM_DBG_CTL_STATUS, cfg);
ltdefs = rvu->kpu.lt_def;
/* Calibrate X2P bus to check if CGX/LBK links are fine */
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
index e350242bbafbad..be709f83f3318c 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
@@ -1657,7 +1657,7 @@ static int npc_fwdb_detect_load_prfl_img(struct rvu *rvu, uint64_t prfl_sz,
struct npc_coalesced_kpu_prfl *img_data = NULL;
int i = 0, rc = -EINVAL;
void __iomem *kpu_prfl_addr;
- u16 offset;
+ u32 offset;
img_data = (struct npc_coalesced_kpu_prfl __force *)rvu->kpu_prfl_addr;
if (le64_to_cpu(img_data->signature) == KPU_SIGN &&
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
index b40bd0e4675148..3f46d5e0fb2ecb 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
@@ -1933,7 +1933,7 @@ int otx2_open(struct net_device *netdev)
* mcam entries are enabled to receive the packets. Hence disable the
* packet I/O.
*/
- if (err == EIO)
+ if (err == -EIO)
goto err_disable_rxtx;
else if (err)
goto err_tx_stop_queues;
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
index 87bdb93cb066e9..f4655a8c0705d7 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
@@ -689,6 +689,7 @@ static int otx2_tc_prepare_flow(struct otx2_nic *nic, struct otx2_tc_flow *node,
if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
struct flow_match_control match;
+ u32 val;
flow_rule_match_control(rule, &match);
if (match.mask->flags & FLOW_DIS_FIRST_FRAG) {
@@ -697,12 +698,14 @@ static int otx2_tc_prepare_flow(struct otx2_nic *nic, struct otx2_tc_flow *node,
}
if (match.mask->flags & FLOW_DIS_IS_FRAGMENT) {
+ val = match.key->flags & FLOW_DIS_IS_FRAGMENT;
if (ntohs(flow_spec->etype) == ETH_P_IP) {
- flow_spec->ip_flag = IPV4_FLAG_MORE;
+ flow_spec->ip_flag = val ? IPV4_FLAG_MORE : 0;
flow_mask->ip_flag = IPV4_FLAG_MORE;
req->features |= BIT_ULL(NPC_IPFRAG_IPV4);
} else if (ntohs(flow_spec->etype) == ETH_P_IPV6) {
- flow_spec->next_header = IPPROTO_FRAGMENT;
+ flow_spec->next_header = val ?
+ IPPROTO_FRAGMENT : 0;
flow_mask->next_header = 0xff;
req->features |= BIT_ULL(NPC_IPFRAG_IPV6);
} else {
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/qos.c b/drivers/net/ethernet/marvell/octeontx2/nic/qos.c
index 1e77bbf5d22a1a..1723e9912ae07c 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/qos.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/qos.c
@@ -382,6 +382,7 @@ static void otx2_qos_read_txschq_cfg_tl(struct otx2_qos_node *parent,
otx2_qos_read_txschq_cfg_tl(node, cfg);
cnt = cfg->static_node_pos[node->level];
cfg->schq_contig_list[node->level][cnt] = node->schq;
+ cfg->schq_index_used[node->level][cnt] = true;
cfg->schq_contig[node->level]++;
cfg->static_node_pos[node->level]++;
otx2_qos_read_txschq_cfg_schq(node, cfg);
diff --git a/drivers/net/ethernet/mediatek/mtk_wed.c b/drivers/net/ethernet/mediatek/mtk_wed.c
index c895e265ae0ebc..61334a71058c75 100644
--- a/drivers/net/ethernet/mediatek/mtk_wed.c
+++ b/drivers/net/ethernet/mediatek/mtk_wed.c
@@ -1074,13 +1074,13 @@ mtk_wed_dma_disable(struct mtk_wed_device *dev)
static void
mtk_wed_stop(struct mtk_wed_device *dev)
{
+ mtk_wed_dma_disable(dev);
mtk_wed_set_ext_int(dev, false);
wed_w32(dev, MTK_WED_WPDMA_INT_TRIGGER, 0);
wed_w32(dev, MTK_WED_WDMA_INT_TRIGGER, 0);
wdma_w32(dev, MTK_WDMA_INT_MASK, 0);
wdma_w32(dev, MTK_WDMA_INT_GRP2, 0);
- wed_w32(dev, MTK_WED_WPDMA_INT_MASK, 0);
if (!mtk_wed_get_rx_capa(dev))
return;
@@ -1093,7 +1093,6 @@ static void
mtk_wed_deinit(struct mtk_wed_device *dev)
{
mtk_wed_stop(dev);
- mtk_wed_dma_disable(dev);
wed_clr(dev, MTK_WED_CTRL,
MTK_WED_CTRL_WDMA_INT_AGENT_EN |
@@ -2605,9 +2604,6 @@ mtk_wed_irq_get(struct mtk_wed_device *dev, u32 mask)
static void
mtk_wed_irq_set_mask(struct mtk_wed_device *dev, u32 mask)
{
- if (!dev->running)
- return;
-
mtk_wed_set_ext_int(dev, !!mask);
wed_w32(dev, MTK_WED_INT_MASK, mask);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h
index 86f1854698b4e8..883c044852f1df 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h
@@ -95,9 +95,15 @@ static inline void mlx5e_ptp_metadata_fifo_push(struct mlx5e_ptp_metadata_fifo *
}
static inline u8
+mlx5e_ptp_metadata_fifo_peek(struct mlx5e_ptp_metadata_fifo *fifo)
+{
+ return fifo->data[fifo->mask & fifo->cc];
+}
+
+static inline void
mlx5e_ptp_metadata_fifo_pop(struct mlx5e_ptp_metadata_fifo *fifo)
{
- return fifo->data[fifo->mask & fifo->cc++];
+ fifo->cc++;
}
static inline void
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c
index e87e26f2c669c2..6743806b848060 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c
@@ -83,24 +83,25 @@ int mlx5e_open_qos_sq(struct mlx5e_priv *priv, struct mlx5e_channels *chs,
txq_ix = mlx5e_qid_from_qos(chs, node_qid);
- WARN_ON(node_qid > priv->htb_max_qos_sqs);
- if (node_qid == priv->htb_max_qos_sqs) {
- struct mlx5e_sq_stats *stats, **stats_list = NULL;
-
- if (priv->htb_max_qos_sqs == 0) {
- stats_list = kvcalloc(mlx5e_qos_max_leaf_nodes(priv->mdev),
- sizeof(*stats_list),
- GFP_KERNEL);
- if (!stats_list)
- return -ENOMEM;
- }
+ WARN_ON(node_qid >= mlx5e_htb_cur_leaf_nodes(priv->htb));
+ if (!priv->htb_qos_sq_stats) {
+ struct mlx5e_sq_stats **stats_list;
+
+ stats_list = kvcalloc(mlx5e_qos_max_leaf_nodes(priv->mdev),
+ sizeof(*stats_list), GFP_KERNEL);
+ if (!stats_list)
+ return -ENOMEM;
+
+ WRITE_ONCE(priv->htb_qos_sq_stats, stats_list);
+ }
+
+ if (!priv->htb_qos_sq_stats[node_qid]) {
+ struct mlx5e_sq_stats *stats;
+
stats = kzalloc(sizeof(*stats), GFP_KERNEL);
- if (!stats) {
- kvfree(stats_list);
+ if (!stats)
return -ENOMEM;
- }
- if (stats_list)
- WRITE_ONCE(priv->htb_qos_sq_stats, stats_list);
+
WRITE_ONCE(priv->htb_qos_sq_stats[node_qid], stats);
/* Order htb_max_qos_sqs increment after writing the array pointer.
* Pairs with smp_load_acquire in en_stats.c.
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
index 0ab9db31953025..22918b2ef7f128 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
@@ -108,7 +108,10 @@ static int mlx5e_tx_reporter_err_cqe_recover(void *ctx)
mlx5e_reset_txqsq_cc_pc(sq);
sq->stats->recover++;
clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state);
+ rtnl_lock();
mlx5e_activate_txqsq(sq);
+ rtnl_unlock();
+
if (sq->channel)
mlx5e_trigger_napi_icosq(sq->channel);
else
@@ -179,12 +182,16 @@ static int mlx5e_tx_reporter_ptpsq_unhealthy_recover(void *ctx)
carrier_ok = netif_carrier_ok(netdev);
netif_carrier_off(netdev);
+ rtnl_lock();
mlx5e_deactivate_priv_channels(priv);
+ rtnl_unlock();
mlx5e_ptp_close(chs->ptp);
err = mlx5e_ptp_open(priv, &chs->params, chs->c[0]->lag_port, &chs->ptp);
+ rtnl_lock();
mlx5e_activate_priv_channels(priv);
+ rtnl_unlock();
/* return carrier back if needed */
if (carrier_ok)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c
index bcafb4bf94154f..8d9a3b5ec973b3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c
@@ -179,6 +179,13 @@ u32 mlx5e_rqt_size(struct mlx5_core_dev *mdev, unsigned int num_channels)
return min_t(u32, rqt_size, max_cap_rqt_size);
}
+#define MLX5E_MAX_RQT_SIZE_ALLOWED_WITH_XOR8_HASH 256
+
+unsigned int mlx5e_rqt_max_num_channels_allowed_for_xor8(void)
+{
+ return MLX5E_MAX_RQT_SIZE_ALLOWED_WITH_XOR8_HASH / MLX5E_UNIFORM_SPREAD_RQT_FACTOR;
+}
+
void mlx5e_rqt_destroy(struct mlx5e_rqt *rqt)
{
mlx5_core_destroy_rqt(rqt->mdev, rqt->rqtn);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h
index e0bc30308c7700..2f9e04a8418f14 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h
@@ -38,6 +38,7 @@ static inline u32 mlx5e_rqt_get_rqtn(struct mlx5e_rqt *rqt)
}
u32 mlx5e_rqt_size(struct mlx5_core_dev *mdev, unsigned int num_channels);
+unsigned int mlx5e_rqt_max_num_channels_allowed_for_xor8(void);
int mlx5e_rqt_redirect_direct(struct mlx5e_rqt *rqt, u32 rqn, u32 *vhca_id);
int mlx5e_rqt_redirect_indir(struct mlx5e_rqt *rqt, u32 *rqns, u32 *vhca_ids,
unsigned int num_rqns,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/selq.c b/drivers/net/ethernet/mellanox/mlx5/core/en/selq.c
index f675b1926340f9..f66bbc8464645e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/selq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/selq.c
@@ -57,6 +57,7 @@ int mlx5e_selq_init(struct mlx5e_selq *selq, struct mutex *state_lock)
void mlx5e_selq_cleanup(struct mlx5e_selq *selq)
{
+ mutex_lock(selq->state_lock);
WARN_ON_ONCE(selq->is_prepared);
kvfree(selq->standby);
@@ -67,6 +68,7 @@ void mlx5e_selq_cleanup(struct mlx5e_selq *selq)
kvfree(selq->standby);
selq->standby = NULL;
+ mutex_unlock(selq->state_lock);
}
void mlx5e_selq_prepare_params(struct mlx5e_selq *selq, struct mlx5e_params *params)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
index c7f542d0b8f08c..93cf23278d93c2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
@@ -46,6 +46,10 @@ struct arfs_table {
struct hlist_head rules_hash[ARFS_HASH_SIZE];
};
+enum {
+ MLX5E_ARFS_STATE_ENABLED,
+};
+
enum arfs_type {
ARFS_IPV4_TCP,
ARFS_IPV6_TCP,
@@ -60,6 +64,7 @@ struct mlx5e_arfs_tables {
spinlock_t arfs_lock;
int last_filter_id;
struct workqueue_struct *wq;
+ unsigned long state;
};
struct arfs_tuple {
@@ -170,6 +175,8 @@ int mlx5e_arfs_enable(struct mlx5e_flow_steering *fs)
return err;
}
}
+ set_bit(MLX5E_ARFS_STATE_ENABLED, &arfs->state);
+
return 0;
}
@@ -455,6 +462,8 @@ static void arfs_del_rules(struct mlx5e_flow_steering *fs)
int i;
int j;
+ clear_bit(MLX5E_ARFS_STATE_ENABLED, &arfs->state);
+
spin_lock_bh(&arfs->arfs_lock);
mlx5e_for_each_arfs_rule(rule, htmp, arfs->arfs_tables, i, j) {
hlist_del_init(&rule->hlist);
@@ -627,17 +636,8 @@ static void arfs_handle_work(struct work_struct *work)
struct mlx5_flow_handle *rule;
arfs = mlx5e_fs_get_arfs(priv->fs);
- mutex_lock(&priv->state_lock);
- if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
- spin_lock_bh(&arfs->arfs_lock);
- hlist_del(&arfs_rule->hlist);
- spin_unlock_bh(&arfs->arfs_lock);
-
- mutex_unlock(&priv->state_lock);
- kfree(arfs_rule);
- goto out;
- }
- mutex_unlock(&priv->state_lock);
+ if (!test_bit(MLX5E_ARFS_STATE_ENABLED, &arfs->state))
+ return;
if (!arfs_rule->rule) {
rule = arfs_add_rule(priv, arfs_rule);
@@ -753,6 +753,11 @@ int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
return -EPROTONOSUPPORT;
spin_lock_bh(&arfs->arfs_lock);
+ if (!test_bit(MLX5E_ARFS_STATE_ENABLED, &arfs->state)) {
+ spin_unlock_bh(&arfs->arfs_lock);
+ return -EPERM;
+ }
+
arfs_rule = arfs_find_rule(arfs_t, &fk);
if (arfs_rule) {
if (arfs_rule->rxq == rxq_index || work_busy(&arfs_rule->arfs_work)) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index cc51ce16df14ab..67a29826bb5702 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -451,6 +451,34 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv,
mutex_lock(&priv->state_lock);
+ if (mlx5e_rx_res_get_current_hash(priv->rx_res).hfunc == ETH_RSS_HASH_XOR) {
+ unsigned int xor8_max_channels = mlx5e_rqt_max_num_channels_allowed_for_xor8();
+
+ if (count > xor8_max_channels) {
+ err = -EINVAL;
+ netdev_err(priv->netdev, "%s: Requested number of channels (%d) exceeds the maximum allowed by the XOR8 RSS hfunc (%d)\n",
+ __func__, count, xor8_max_channels);
+ goto out;
+ }
+ }
+
+ /* If RXFH is configured, changing the channels number is allowed only if
+ * it does not require resizing the RSS table. This is because the previous
+ * configuration may no longer be compatible with the new RSS table.
+ */
+ if (netif_is_rxfh_configured(priv->netdev)) {
+ int cur_rqt_size = mlx5e_rqt_size(priv->mdev, cur_params->num_channels);
+ int new_rqt_size = mlx5e_rqt_size(priv->mdev, count);
+
+ if (new_rqt_size != cur_rqt_size) {
+ err = -EINVAL;
+ netdev_err(priv->netdev,
+ "%s: RXFH is configured, block changing channels number that affects RSS table size (new: %d, current: %d)\n",
+ __func__, new_rqt_size, cur_rqt_size);
+ goto out;
+ }
+ }
+
/* Don't allow changing the number of channels if HTB offload is active,
* because the numeration of the QoS SQs will change, while per-queue
* qdiscs are attached.
@@ -561,12 +589,12 @@ static int mlx5e_get_coalesce(struct net_device *netdev,
static void
mlx5e_set_priv_channels_tx_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal)
{
- struct mlx5_core_dev *mdev = priv->mdev;
int tc;
int i;
for (i = 0; i < priv->channels.num; ++i) {
struct mlx5e_channel *c = priv->channels.c[i];
+ struct mlx5_core_dev *mdev = c->mdev;
for (tc = 0; tc < c->num_tc; tc++) {
mlx5_core_modify_cq_moderation(mdev,
@@ -580,11 +608,11 @@ mlx5e_set_priv_channels_tx_coalesce(struct mlx5e_priv *priv, struct ethtool_coal
static void
mlx5e_set_priv_channels_rx_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal)
{
- struct mlx5_core_dev *mdev = priv->mdev;
int i;
for (i = 0; i < priv->channels.num; ++i) {
struct mlx5e_channel *c = priv->channels.c[i];
+ struct mlx5_core_dev *mdev = c->mdev;
mlx5_core_modify_cq_moderation(mdev, &c->rq.cq.mcq,
coal->rx_coalesce_usecs,
@@ -1281,17 +1309,30 @@ int mlx5e_set_rxfh(struct net_device *dev, struct ethtool_rxfh_param *rxfh,
struct mlx5e_priv *priv = netdev_priv(dev);
u32 *rss_context = &rxfh->rss_context;
u8 hfunc = rxfh->hfunc;
+ unsigned int count;
int err;
mutex_lock(&priv->state_lock);
+
+ count = priv->channels.params.num_channels;
+
+ if (hfunc == ETH_RSS_HASH_XOR) {
+ unsigned int xor8_max_channels = mlx5e_rqt_max_num_channels_allowed_for_xor8();
+
+ if (count > xor8_max_channels) {
+ err = -EINVAL;
+ netdev_err(priv->netdev, "%s: Cannot set RSS hash function to XOR, current number of channels (%d) exceeds the maximum allowed for XOR8 RSS hfunc (%d)\n",
+ __func__, count, xor8_max_channels);
+ goto unlock;
+ }
+ }
+
if (*rss_context && rxfh->rss_delete) {
err = mlx5e_rx_res_rss_destroy(priv->rx_res, *rss_context);
goto unlock;
}
if (*rss_context == ETH_RXFH_CONTEXT_ALLOC) {
- unsigned int count = priv->channels.params.num_channels;
-
err = mlx5e_rx_res_rss_init(priv->rx_res, rss_context, count);
if (err)
goto unlock;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 91848eae45655f..319930c04093ba 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -209,8 +209,8 @@ static int mlx5e_devcom_init_mpv(struct mlx5e_priv *priv, u64 *data)
*data,
mlx5e_devcom_event_mpv,
priv);
- if (IS_ERR_OR_NULL(priv->devcom))
- return -EOPNOTSUPP;
+ if (IS_ERR(priv->devcom))
+ return PTR_ERR(priv->devcom);
if (mlx5_core_is_mp_master(priv->mdev)) {
mlx5_devcom_send_event(priv->devcom, MPV_DEVCOM_MASTER_UP,
@@ -5726,9 +5726,7 @@ void mlx5e_priv_cleanup(struct mlx5e_priv *priv)
kfree(priv->tx_rates);
kfree(priv->txq2sq);
destroy_workqueue(priv->wq);
- mutex_lock(&priv->state_lock);
mlx5e_selq_cleanup(&priv->selq);
- mutex_unlock(&priv->state_lock);
free_cpumask_var(priv->scratchpad.cpumask);
for (i = 0; i < priv->htb_max_qos_sqs; i++)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index 2fa076b23fbead..e21a3b4128ce88 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -398,6 +398,8 @@ mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb,
(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))) {
u8 metadata_index = be32_to_cpu(eseg->flow_table_metadata);
+ mlx5e_ptp_metadata_fifo_pop(&sq->ptpsq->metadata_freelist);
+
mlx5e_skb_cb_hwtstamp_init(skb);
mlx5e_ptp_metadata_map_put(&sq->ptpsq->metadata_map, skb,
metadata_index);
@@ -496,9 +498,6 @@ mlx5e_sq_xmit_wqe(struct mlx5e_txqsq *sq, struct sk_buff *skb,
err_drop:
stats->dropped++;
- if (unlikely(sq->ptpsq && (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)))
- mlx5e_ptp_metadata_fifo_push(&sq->ptpsq->metadata_freelist,
- be32_to_cpu(eseg->flow_table_metadata));
dev_kfree_skb_any(skb);
mlx5e_tx_flush(sq);
}
@@ -657,7 +656,7 @@ static void mlx5e_cqe_ts_id_eseg(struct mlx5e_ptpsq *ptpsq, struct sk_buff *skb,
{
if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))
eseg->flow_table_metadata =
- cpu_to_be32(mlx5e_ptp_metadata_fifo_pop(&ptpsq->metadata_freelist));
+ cpu_to_be32(mlx5e_ptp_metadata_fifo_peek(&ptpsq->metadata_freelist));
}
static void mlx5e_txwqe_build_eseg(struct mlx5e_priv *priv, struct mlx5e_txqsq *sq,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 3047d7015c5256..1789800faaeb62 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -1868,6 +1868,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
if (err)
goto abort;
+ dev->priv.eswitch = esw;
err = esw_offloads_init(esw);
if (err)
goto reps_err;
@@ -1892,11 +1893,6 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_BASIC;
else
esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE;
- if (MLX5_ESWITCH_MANAGER(dev) &&
- mlx5_esw_vport_match_metadata_supported(esw))
- esw->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA;
-
- dev->priv.eswitch = esw;
BLOCKING_INIT_NOTIFIER_HEAD(&esw->n_head);
esw_info(dev,
@@ -1908,6 +1904,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
reps_err:
mlx5_esw_vports_cleanup(esw);
+ dev->priv.eswitch = NULL;
abort:
if (esw->work_queue)
destroy_workqueue(esw->work_queue);
@@ -1926,7 +1923,6 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw)
esw_info(esw->dev, "cleanup\n");
- esw->dev->priv.eswitch = NULL;
destroy_workqueue(esw->work_queue);
WARN_ON(refcount_read(&esw->qos.refcnt));
mutex_destroy(&esw->state_lock);
@@ -1937,6 +1933,7 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw)
mutex_destroy(&esw->offloads.encap_tbl_lock);
mutex_destroy(&esw->offloads.decap_tbl_lock);
esw_offloads_cleanup(esw);
+ esw->dev->priv.eswitch = NULL;
mlx5_esw_vports_cleanup(esw);
debugfs_remove_recursive(esw->debugfs_root);
devl_params_unregister(priv_to_devlink(esw->dev), mlx5_eswitch_params,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index baaae628b0a0f6..844d3e3a65ddf0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -43,6 +43,7 @@
#include "rdma.h"
#include "en.h"
#include "fs_core.h"
+#include "lib/mlx5.h"
#include "lib/devcom.h"
#include "lib/eq.h"
#include "lib/fs_chains.h"
@@ -2476,6 +2477,10 @@ int esw_offloads_init(struct mlx5_eswitch *esw)
if (err)
return err;
+ if (MLX5_ESWITCH_MANAGER(esw->dev) &&
+ mlx5_esw_vport_match_metadata_supported(esw))
+ esw->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA;
+
err = devl_params_register(priv_to_devlink(esw->dev),
esw_devlink_params,
ARRAY_SIZE(esw_devlink_params));
@@ -3055,7 +3060,7 @@ void mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw, u64 key)
key,
mlx5_esw_offloads_devcom_event,
esw);
- if (IS_ERR_OR_NULL(esw->devcom))
+ if (IS_ERR(esw->devcom))
return;
mlx5_devcom_send_event(esw->devcom,
@@ -3707,6 +3712,12 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
if (esw_mode_from_devlink(mode, &mlx5_mode))
return -EINVAL;
+ if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV && mlx5_get_sd(esw->dev)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Can't change E-Switch mode to switchdev when multi-PF netdev (Socket Direct) is configured.");
+ return -EPERM;
+ }
+
mlx5_lag_disable_change(esw->dev);
err = mlx5_esw_try_lock(esw);
if (err < 0) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index e6bfa7e4f146ca..cf085a478e3e4c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -1664,6 +1664,16 @@ static int create_auto_flow_group(struct mlx5_flow_table *ft,
return err;
}
+static bool mlx5_pkt_reformat_cmp(struct mlx5_pkt_reformat *p1,
+ struct mlx5_pkt_reformat *p2)
+{
+ return p1->owner == p2->owner &&
+ (p1->owner == MLX5_FLOW_RESOURCE_OWNER_FW ?
+ p1->id == p2->id :
+ mlx5_fs_dr_action_get_pkt_reformat_id(p1) ==
+ mlx5_fs_dr_action_get_pkt_reformat_id(p2));
+}
+
static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1,
struct mlx5_flow_destination *d2)
{
@@ -1675,8 +1685,8 @@ static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1,
((d1->vport.flags & MLX5_FLOW_DEST_VPORT_VHCA_ID) ?
(d1->vport.vhca_id == d2->vport.vhca_id) : true) &&
((d1->vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID) ?
- (d1->vport.pkt_reformat->id ==
- d2->vport.pkt_reformat->id) : true)) ||
+ mlx5_pkt_reformat_cmp(d1->vport.pkt_reformat,
+ d2->vport.pkt_reformat) : true)) ||
(d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE &&
d1->ft == d2->ft) ||
(d1->type == MLX5_FLOW_DESTINATION_TYPE_TIR &&
@@ -1808,8 +1818,9 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg,
}
trace_mlx5_fs_set_fte(fte, false);
+ /* Link newly added rules into the tree. */
for (i = 0; i < handle->num_rules; i++) {
- if (refcount_read(&handle->rule[i]->node.refcount) == 1) {
+ if (!handle->rule[i]->node.parent) {
tree_add_node(&handle->rule[i]->node, &fte->node);
trace_mlx5_fs_add_rule(handle->rule[i]);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
index d14459e5c04fc5..69d482f7c5a299 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
@@ -703,8 +703,10 @@ int mlx5_deactivate_lag(struct mlx5_lag *ldev)
return err;
}
- if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags))
+ if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) {
mlx5_lag_port_sel_destroy(ldev);
+ ldev->buckets = 1;
+ }
if (mlx5_lag_has_drop_rule(ldev))
mlx5_lag_drop_rule_cleanup(ldev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c
index e7d59cfa8708e1..7b0766c89f4cf0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c
@@ -220,7 +220,7 @@ mlx5_devcom_register_component(struct mlx5_devcom_dev *devc,
struct mlx5_devcom_comp *comp;
if (IS_ERR_OR_NULL(devc))
- return NULL;
+ return ERR_PTR(-EINVAL);
mutex_lock(&comp_list_lock);
comp = devcom_component_get(devc, id, key, handler);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c
index 5b28084e8a03c7..dd5d186dc6148f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c
@@ -213,8 +213,8 @@ static int sd_register(struct mlx5_core_dev *dev)
sd = mlx5_get_sd(dev);
devcom = mlx5_devcom_register_component(dev->priv.devc, MLX5_DEVCOM_SD_GROUP,
sd->group_id, NULL, dev);
- if (!devcom)
- return -ENOMEM;
+ if (IS_ERR(devcom))
+ return PTR_ERR(devcom);
sd->devcom = devcom;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index c2593625c09ad6..331ce47f51a17a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -956,7 +956,7 @@ static void mlx5_register_hca_devcom_comp(struct mlx5_core_dev *dev)
mlx5_devcom_register_component(dev->priv.devc, MLX5_DEVCOM_HCA_PORTS,
mlx5_query_nic_system_image_guid(dev),
NULL, dev);
- if (IS_ERR_OR_NULL(dev->priv.hca_devcom_comp))
+ if (IS_ERR(dev->priv.hca_devcom_comp))
mlx5_core_err(dev, "Failed to register devcom HCA component\n");
}
@@ -1480,6 +1480,14 @@ int mlx5_init_one_devl_locked(struct mlx5_core_dev *dev)
if (err)
goto err_register;
+ err = mlx5_crdump_enable(dev);
+ if (err)
+ mlx5_core_err(dev, "mlx5_crdump_enable failed with error code %d\n", err);
+
+ err = mlx5_hwmon_dev_register(dev);
+ if (err)
+ mlx5_core_err(dev, "mlx5_hwmon_dev_register failed with error code %d\n", err);
+
mutex_unlock(&dev->intf_state_mutex);
return 0;
@@ -1505,7 +1513,10 @@ int mlx5_init_one(struct mlx5_core_dev *dev)
int err;
devl_lock(devlink);
+ devl_register(devlink);
err = mlx5_init_one_devl_locked(dev);
+ if (err)
+ devl_unregister(devlink);
devl_unlock(devlink);
return err;
}
@@ -1517,6 +1528,8 @@ void mlx5_uninit_one(struct mlx5_core_dev *dev)
devl_lock(devlink);
mutex_lock(&dev->intf_state_mutex);
+ mlx5_hwmon_dev_unregister(dev);
+ mlx5_crdump_disable(dev);
mlx5_unregister_device(dev);
if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
@@ -1534,6 +1547,7 @@ void mlx5_uninit_one(struct mlx5_core_dev *dev)
mlx5_function_teardown(dev, true);
out:
mutex_unlock(&dev->intf_state_mutex);
+ devl_unregister(devlink);
devl_unlock(devlink);
}
@@ -1680,16 +1694,23 @@ int mlx5_init_one_light(struct mlx5_core_dev *dev)
}
devl_lock(devlink);
+ devl_register(devlink);
+
err = mlx5_devlink_params_register(priv_to_devlink(dev));
- devl_unlock(devlink);
if (err) {
mlx5_core_warn(dev, "mlx5_devlink_param_reg err = %d\n", err);
- goto query_hca_caps_err;
+ goto params_reg_err;
}
+ devl_unlock(devlink);
return 0;
+params_reg_err:
+ devl_unregister(devlink);
+ devl_unlock(devlink);
query_hca_caps_err:
+ devl_unregister(devlink);
+ devl_unlock(devlink);
mlx5_function_disable(dev, true);
out:
dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
@@ -1702,6 +1723,7 @@ void mlx5_uninit_one_light(struct mlx5_core_dev *dev)
devl_lock(devlink);
mlx5_devlink_params_unregister(priv_to_devlink(dev));
+ devl_unregister(devlink);
devl_unlock(devlink);
if (dev->state != MLX5_DEVICE_STATE_UP)
return;
@@ -1943,16 +1965,7 @@ static int probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
goto err_init_one;
}
- err = mlx5_crdump_enable(dev);
- if (err)
- dev_err(&pdev->dev, "mlx5_crdump_enable failed with error code %d\n", err);
-
- err = mlx5_hwmon_dev_register(dev);
- if (err)
- mlx5_core_err(dev, "mlx5_hwmon_dev_register failed with error code %d\n", err);
-
pci_save_state(pdev);
- devlink_register(devlink);
return 0;
err_init_one:
@@ -1973,16 +1986,9 @@ static void remove_one(struct pci_dev *pdev)
struct devlink *devlink = priv_to_devlink(dev);
set_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state);
- /* mlx5_drain_fw_reset() and mlx5_drain_health_wq() are using
- * devlink notify APIs.
- * Hence, we must drain them before unregistering the devlink.
- */
mlx5_drain_fw_reset(dev);
mlx5_drain_health_wq(dev);
- devlink_unregister(devlink);
mlx5_sriov_disable(pdev, false);
- mlx5_hwmon_dev_unregister(dev);
- mlx5_crdump_disable(dev);
mlx5_uninit_one(dev);
mlx5_pci_close(dev);
mlx5_mdev_uninit(dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
index 4dcf995cb1a204..6bac8ad70ba60b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
@@ -19,6 +19,7 @@
#define MLX5_IRQ_CTRL_SF_MAX 8
/* min num of vectors for SFs to be enabled */
#define MLX5_IRQ_VEC_COMP_BASE_SF 2
+#define MLX5_IRQ_VEC_COMP_BASE 1
#define MLX5_EQ_SHARE_IRQ_MAX_COMP (8)
#define MLX5_EQ_SHARE_IRQ_MAX_CTRL (UINT_MAX)
@@ -246,6 +247,7 @@ static void irq_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx)
return;
}
+ vecidx -= MLX5_IRQ_VEC_COMP_BASE;
snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", vecidx);
}
@@ -585,7 +587,7 @@ struct mlx5_irq *mlx5_irq_request_vector(struct mlx5_core_dev *dev, u16 cpu,
struct mlx5_irq_table *table = mlx5_irq_table_get(dev);
struct mlx5_irq_pool *pool = table->pcif_pool;
struct irq_affinity_desc af_desc;
- int offset = 1;
+ int offset = MLX5_IRQ_VEC_COMP_BASE;
if (!pool->xa_num_irqs.max)
offset = 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c
index bc863e1f062e6b..7ebe712808275a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c
@@ -75,7 +75,6 @@ static int mlx5_sf_dev_probe(struct auxiliary_device *adev, const struct auxilia
goto peer_devlink_set_err;
}
- devlink_register(devlink);
return 0;
peer_devlink_set_err:
@@ -101,7 +100,6 @@ static void mlx5_sf_dev_remove(struct auxiliary_device *adev)
devlink = priv_to_devlink(mdev);
set_bit(MLX5_BREAK_FW_WAIT, &mdev->intf_state);
mlx5_drain_health_wq(mdev);
- devlink_unregister(devlink);
if (mlx5_dev_is_lightweight(mdev))
mlx5_uninit_one_light(mdev);
else
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c
index 64f4cc284aea41..030a5776c93740 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c
@@ -205,12 +205,11 @@ dr_dump_hex_print(char hex[DR_HEX_SIZE], char *src, u32 size)
}
static int
-dr_dump_rule_action_mem(struct seq_file *file, const u64 rule_id,
+dr_dump_rule_action_mem(struct seq_file *file, char *buff, const u64 rule_id,
struct mlx5dr_rule_action_member *action_mem)
{
struct mlx5dr_action *action = action_mem->action;
const u64 action_id = DR_DBG_PTR_TO_ID(action);
- char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH];
u64 hit_tbl_ptr, miss_tbl_ptr;
u32 hit_tbl_id, miss_tbl_id;
int ret;
@@ -488,10 +487,9 @@ dr_dump_rule_action_mem(struct seq_file *file, const u64 rule_id,
}
static int
-dr_dump_rule_mem(struct seq_file *file, struct mlx5dr_ste *ste,
+dr_dump_rule_mem(struct seq_file *file, char *buff, struct mlx5dr_ste *ste,
bool is_rx, const u64 rule_id, u8 format_ver)
{
- char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH];
char hw_ste_dump[DR_HEX_SIZE];
u32 mem_rec_type;
int ret;
@@ -522,7 +520,8 @@ dr_dump_rule_mem(struct seq_file *file, struct mlx5dr_ste *ste,
}
static int
-dr_dump_rule_rx_tx(struct seq_file *file, struct mlx5dr_rule_rx_tx *rule_rx_tx,
+dr_dump_rule_rx_tx(struct seq_file *file, char *buff,
+ struct mlx5dr_rule_rx_tx *rule_rx_tx,
bool is_rx, const u64 rule_id, u8 format_ver)
{
struct mlx5dr_ste *ste_arr[DR_RULE_MAX_STES + DR_ACTION_MAX_STES];
@@ -533,7 +532,7 @@ dr_dump_rule_rx_tx(struct seq_file *file, struct mlx5dr_rule_rx_tx *rule_rx_tx,
return 0;
while (i--) {
- ret = dr_dump_rule_mem(file, ste_arr[i], is_rx, rule_id,
+ ret = dr_dump_rule_mem(file, buff, ste_arr[i], is_rx, rule_id,
format_ver);
if (ret < 0)
return ret;
@@ -542,7 +541,8 @@ dr_dump_rule_rx_tx(struct seq_file *file, struct mlx5dr_rule_rx_tx *rule_rx_tx,
return 0;
}
-static int dr_dump_rule(struct seq_file *file, struct mlx5dr_rule *rule)
+static noinline_for_stack int
+dr_dump_rule(struct seq_file *file, struct mlx5dr_rule *rule)
{
struct mlx5dr_rule_action_member *action_mem;
const u64 rule_id = DR_DBG_PTR_TO_ID(rule);
@@ -565,19 +565,19 @@ static int dr_dump_rule(struct seq_file *file, struct mlx5dr_rule *rule)
return ret;
if (rx->nic_matcher) {
- ret = dr_dump_rule_rx_tx(file, rx, true, rule_id, format_ver);
+ ret = dr_dump_rule_rx_tx(file, buff, rx, true, rule_id, format_ver);
if (ret < 0)
return ret;
}
if (tx->nic_matcher) {
- ret = dr_dump_rule_rx_tx(file, tx, false, rule_id, format_ver);
+ ret = dr_dump_rule_rx_tx(file, buff, tx, false, rule_id, format_ver);
if (ret < 0)
return ret;
}
list_for_each_entry(action_mem, &rule->rule_actions_list, list) {
- ret = dr_dump_rule_action_mem(file, rule_id, action_mem);
+ ret = dr_dump_rule_action_mem(file, buff, rule_id, action_mem);
if (ret < 0)
return ret;
}
@@ -586,10 +586,10 @@ static int dr_dump_rule(struct seq_file *file, struct mlx5dr_rule *rule)
}
static int
-dr_dump_matcher_mask(struct seq_file *file, struct mlx5dr_match_param *mask,
+dr_dump_matcher_mask(struct seq_file *file, char *buff,
+ struct mlx5dr_match_param *mask,
u8 criteria, const u64 matcher_id)
{
- char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH];
char dump[DR_HEX_SIZE];
int ret;
@@ -681,10 +681,10 @@ dr_dump_matcher_mask(struct seq_file *file, struct mlx5dr_match_param *mask,
}
static int
-dr_dump_matcher_builder(struct seq_file *file, struct mlx5dr_ste_build *builder,
+dr_dump_matcher_builder(struct seq_file *file, char *buff,
+ struct mlx5dr_ste_build *builder,
u32 index, bool is_rx, const u64 matcher_id)
{
- char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH];
int ret;
ret = snprintf(buff, MLX5DR_DEBUG_DUMP_BUFF_LENGTH,
@@ -702,11 +702,10 @@ dr_dump_matcher_builder(struct seq_file *file, struct mlx5dr_ste_build *builder,
}
static int
-dr_dump_matcher_rx_tx(struct seq_file *file, bool is_rx,
+dr_dump_matcher_rx_tx(struct seq_file *file, char *buff, bool is_rx,
struct mlx5dr_matcher_rx_tx *matcher_rx_tx,
const u64 matcher_id)
{
- char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH];
enum dr_dump_rec_type rec_type;
u64 s_icm_addr, e_icm_addr;
int i, ret;
@@ -731,7 +730,7 @@ dr_dump_matcher_rx_tx(struct seq_file *file, bool is_rx,
return ret;
for (i = 0; i < matcher_rx_tx->num_of_builders; i++) {
- ret = dr_dump_matcher_builder(file,
+ ret = dr_dump_matcher_builder(file, buff,
&matcher_rx_tx->ste_builder[i],
i, is_rx, matcher_id);
if (ret < 0)
@@ -741,7 +740,7 @@ dr_dump_matcher_rx_tx(struct seq_file *file, bool is_rx,
return 0;
}
-static int
+static noinline_for_stack int
dr_dump_matcher(struct seq_file *file, struct mlx5dr_matcher *matcher)
{
struct mlx5dr_matcher_rx_tx *rx = &matcher->rx;
@@ -763,19 +762,19 @@ dr_dump_matcher(struct seq_file *file, struct mlx5dr_matcher *matcher)
if (ret)
return ret;
- ret = dr_dump_matcher_mask(file, &matcher->mask,
+ ret = dr_dump_matcher_mask(file, buff, &matcher->mask,
matcher->match_criteria, matcher_id);
if (ret < 0)
return ret;
if (rx->nic_tbl) {
- ret = dr_dump_matcher_rx_tx(file, true, rx, matcher_id);
+ ret = dr_dump_matcher_rx_tx(file, buff, true, rx, matcher_id);
if (ret < 0)
return ret;
}
if (tx->nic_tbl) {
- ret = dr_dump_matcher_rx_tx(file, false, tx, matcher_id);
+ ret = dr_dump_matcher_rx_tx(file, buff, false, tx, matcher_id);
if (ret < 0)
return ret;
}
@@ -803,11 +802,10 @@ dr_dump_matcher_all(struct seq_file *file, struct mlx5dr_matcher *matcher)
}
static int
-dr_dump_table_rx_tx(struct seq_file *file, bool is_rx,
+dr_dump_table_rx_tx(struct seq_file *file, char *buff, bool is_rx,
struct mlx5dr_table_rx_tx *table_rx_tx,
const u64 table_id)
{
- char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH];
enum dr_dump_rec_type rec_type;
u64 s_icm_addr;
int ret;
@@ -829,7 +827,8 @@ dr_dump_table_rx_tx(struct seq_file *file, bool is_rx,
return 0;
}
-static int dr_dump_table(struct seq_file *file, struct mlx5dr_table *table)
+static noinline_for_stack int
+dr_dump_table(struct seq_file *file, struct mlx5dr_table *table)
{
struct mlx5dr_table_rx_tx *rx = &table->rx;
struct mlx5dr_table_rx_tx *tx = &table->tx;
@@ -848,14 +847,14 @@ static int dr_dump_table(struct seq_file *file, struct mlx5dr_table *table)
return ret;
if (rx->nic_dmn) {
- ret = dr_dump_table_rx_tx(file, true, rx,
+ ret = dr_dump_table_rx_tx(file, buff, true, rx,
DR_DBG_PTR_TO_ID(table));
if (ret < 0)
return ret;
}
if (tx->nic_dmn) {
- ret = dr_dump_table_rx_tx(file, false, tx,
+ ret = dr_dump_table_rx_tx(file, buff, false, tx,
DR_DBG_PTR_TO_ID(table));
if (ret < 0)
return ret;
@@ -881,10 +880,10 @@ static int dr_dump_table_all(struct seq_file *file, struct mlx5dr_table *tbl)
}
static int
-dr_dump_send_ring(struct seq_file *file, struct mlx5dr_send_ring *ring,
+dr_dump_send_ring(struct seq_file *file, char *buff,
+ struct mlx5dr_send_ring *ring,
const u64 domain_id)
{
- char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH];
int ret;
ret = snprintf(buff, MLX5DR_DEBUG_DUMP_BUFF_LENGTH,
@@ -902,13 +901,13 @@ dr_dump_send_ring(struct seq_file *file, struct mlx5dr_send_ring *ring,
return 0;
}
-static noinline_for_stack int
+static int
dr_dump_domain_info_flex_parser(struct seq_file *file,
+ char *buff,
const char *flex_parser_name,
const u8 flex_parser_value,
const u64 domain_id)
{
- char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH];
int ret;
ret = snprintf(buff, MLX5DR_DEBUG_DUMP_BUFF_LENGTH,
@@ -925,11 +924,11 @@ dr_dump_domain_info_flex_parser(struct seq_file *file,
return 0;
}
-static noinline_for_stack int
-dr_dump_domain_info_caps(struct seq_file *file, struct mlx5dr_cmd_caps *caps,
+static int
+dr_dump_domain_info_caps(struct seq_file *file, char *buff,
+ struct mlx5dr_cmd_caps *caps,
const u64 domain_id)
{
- char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH];
struct mlx5dr_cmd_vport_cap *vport_caps;
unsigned long i, vports_num;
int ret;
@@ -969,34 +968,35 @@ dr_dump_domain_info_caps(struct seq_file *file, struct mlx5dr_cmd_caps *caps,
}
static int
-dr_dump_domain_info(struct seq_file *file, struct mlx5dr_domain_info *info,
+dr_dump_domain_info(struct seq_file *file, char *buff,
+ struct mlx5dr_domain_info *info,
const u64 domain_id)
{
int ret;
- ret = dr_dump_domain_info_caps(file, &info->caps, domain_id);
+ ret = dr_dump_domain_info_caps(file, buff, &info->caps, domain_id);
if (ret < 0)
return ret;
- ret = dr_dump_domain_info_flex_parser(file, "icmp_dw0",
+ ret = dr_dump_domain_info_flex_parser(file, buff, "icmp_dw0",
info->caps.flex_parser_id_icmp_dw0,
domain_id);
if (ret < 0)
return ret;
- ret = dr_dump_domain_info_flex_parser(file, "icmp_dw1",
+ ret = dr_dump_domain_info_flex_parser(file, buff, "icmp_dw1",
info->caps.flex_parser_id_icmp_dw1,
domain_id);
if (ret < 0)
return ret;
- ret = dr_dump_domain_info_flex_parser(file, "icmpv6_dw0",
+ ret = dr_dump_domain_info_flex_parser(file, buff, "icmpv6_dw0",
info->caps.flex_parser_id_icmpv6_dw0,
domain_id);
if (ret < 0)
return ret;
- ret = dr_dump_domain_info_flex_parser(file, "icmpv6_dw1",
+ ret = dr_dump_domain_info_flex_parser(file, buff, "icmpv6_dw1",
info->caps.flex_parser_id_icmpv6_dw1,
domain_id);
if (ret < 0)
@@ -1032,12 +1032,12 @@ dr_dump_domain(struct seq_file *file, struct mlx5dr_domain *dmn)
if (ret)
return ret;
- ret = dr_dump_domain_info(file, &dmn->info, domain_id);
+ ret = dr_dump_domain_info(file, buff, &dmn->info, domain_id);
if (ret < 0)
return ret;
if (dmn->info.supp_sw_steering) {
- ret = dr_dump_send_ring(file, dmn->send_ring, domain_id);
+ ret = dr_dump_send_ring(file, buff, dmn->send_ring, domain_id);
if (ret < 0)
return ret;
}
diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
index 3d09fa54598f1a..ba303868686a77 100644
--- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
+++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
@@ -14,6 +14,7 @@
#include <linux/module.h>
#include <linux/phy.h>
#include <linux/platform_device.h>
+#include <linux/rtnetlink.h>
#include <linux/skbuff.h>
#include "mlxbf_gige.h"
@@ -139,13 +140,10 @@ static int mlxbf_gige_open(struct net_device *netdev)
control |= MLXBF_GIGE_CONTROL_PORT_EN;
writeq(control, priv->base + MLXBF_GIGE_CONTROL);
- err = mlxbf_gige_request_irqs(priv);
- if (err)
- return err;
mlxbf_gige_cache_stats(priv);
err = mlxbf_gige_clean_port(priv);
if (err)
- goto free_irqs;
+ return err;
/* Clear driver's valid_polarity to match hardware,
* since the above call to clean_port() resets the
@@ -157,7 +155,7 @@ static int mlxbf_gige_open(struct net_device *netdev)
err = mlxbf_gige_tx_init(priv);
if (err)
- goto free_irqs;
+ goto phy_deinit;
err = mlxbf_gige_rx_init(priv);
if (err)
goto tx_deinit;
@@ -166,6 +164,10 @@ static int mlxbf_gige_open(struct net_device *netdev)
napi_enable(&priv->napi);
netif_start_queue(netdev);
+ err = mlxbf_gige_request_irqs(priv);
+ if (err)
+ goto napi_deinit;
+
/* Set bits in INT_EN that we care about */
int_en = MLXBF_GIGE_INT_EN_HW_ACCESS_ERROR |
MLXBF_GIGE_INT_EN_TX_CHECKSUM_INPUTS |
@@ -182,11 +184,17 @@ static int mlxbf_gige_open(struct net_device *netdev)
return 0;
+napi_deinit:
+ netif_stop_queue(netdev);
+ napi_disable(&priv->napi);
+ netif_napi_del(&priv->napi);
+ mlxbf_gige_rx_deinit(priv);
+
tx_deinit:
mlxbf_gige_tx_deinit(priv);
-free_irqs:
- mlxbf_gige_free_irqs(priv);
+phy_deinit:
+ phy_stop(phydev);
return err;
}
@@ -485,8 +493,13 @@ static void mlxbf_gige_shutdown(struct platform_device *pdev)
{
struct mlxbf_gige *priv = platform_get_drvdata(pdev);
- writeq(0, priv->base + MLXBF_GIGE_INT_EN);
- mlxbf_gige_clean_port(priv);
+ rtnl_lock();
+ netif_device_detach(priv->netdev);
+
+ if (netif_running(priv->netdev))
+ dev_close(priv->netdev);
+
+ rtnl_unlock();
}
static const struct acpi_device_id __maybe_unused mlxbf_gige_acpi_match[] = {
diff --git a/drivers/net/ethernet/micrel/ks8851.h b/drivers/net/ethernet/micrel/ks8851.h
index e5ec0a363aff84..31f75b4a67fd79 100644
--- a/drivers/net/ethernet/micrel/ks8851.h
+++ b/drivers/net/ethernet/micrel/ks8851.h
@@ -368,7 +368,6 @@ union ks8851_tx_hdr {
* @rdfifo: FIFO read callback
* @wrfifo: FIFO write callback
* @start_xmit: start_xmit() implementation callback
- * @rx_skb: rx_skb() implementation callback
* @flush_tx_work: flush_tx_work() implementation callback
*
* The @statelock is used to protect information in the structure which may
@@ -423,8 +422,6 @@ struct ks8851_net {
struct sk_buff *txp, bool irq);
netdev_tx_t (*start_xmit)(struct sk_buff *skb,
struct net_device *dev);
- void (*rx_skb)(struct ks8851_net *ks,
- struct sk_buff *skb);
void (*flush_tx_work)(struct ks8851_net *ks);
};
diff --git a/drivers/net/ethernet/micrel/ks8851_common.c b/drivers/net/ethernet/micrel/ks8851_common.c
index 0bf13b38b8f5b9..d4cdf3d4f55257 100644
--- a/drivers/net/ethernet/micrel/ks8851_common.c
+++ b/drivers/net/ethernet/micrel/ks8851_common.c
@@ -232,16 +232,6 @@ static void ks8851_dbg_dumpkkt(struct ks8851_net *ks, u8 *rxpkt)
}
/**
- * ks8851_rx_skb - receive skbuff
- * @ks: The device state.
- * @skb: The skbuff
- */
-static void ks8851_rx_skb(struct ks8851_net *ks, struct sk_buff *skb)
-{
- ks->rx_skb(ks, skb);
-}
-
-/**
* ks8851_rx_pkts - receive packets from the host
* @ks: The device information.
*
@@ -309,7 +299,7 @@ static void ks8851_rx_pkts(struct ks8851_net *ks)
ks8851_dbg_dumpkkt(ks, rxpkt);
skb->protocol = eth_type_trans(skb, ks->netdev);
- ks8851_rx_skb(ks, skb);
+ __netif_rx(skb);
ks->netdev->stats.rx_packets++;
ks->netdev->stats.rx_bytes += rxlen;
@@ -340,6 +330,8 @@ static irqreturn_t ks8851_irq(int irq, void *_ks)
unsigned long flags;
unsigned int status;
+ local_bh_disable();
+
ks8851_lock(ks, &flags);
status = ks8851_rdreg16(ks, KS_ISR);
@@ -416,6 +408,8 @@ static irqreturn_t ks8851_irq(int irq, void *_ks)
if (status & IRQ_LCI)
mii_check_link(&ks->mii);
+ local_bh_enable();
+
return IRQ_HANDLED;
}
diff --git a/drivers/net/ethernet/micrel/ks8851_par.c b/drivers/net/ethernet/micrel/ks8851_par.c
index 2a7f2985426703..381b9cd285ebd0 100644
--- a/drivers/net/ethernet/micrel/ks8851_par.c
+++ b/drivers/net/ethernet/micrel/ks8851_par.c
@@ -210,16 +210,6 @@ static void ks8851_wrfifo_par(struct ks8851_net *ks, struct sk_buff *txp,
iowrite16_rep(ksp->hw_addr, txp->data, len / 2);
}
-/**
- * ks8851_rx_skb_par - receive skbuff
- * @ks: The device state.
- * @skb: The skbuff
- */
-static void ks8851_rx_skb_par(struct ks8851_net *ks, struct sk_buff *skb)
-{
- netif_rx(skb);
-}
-
static unsigned int ks8851_rdreg16_par_txqcr(struct ks8851_net *ks)
{
return ks8851_rdreg16_par(ks, KS_TXQCR);
@@ -298,7 +288,6 @@ static int ks8851_probe_par(struct platform_device *pdev)
ks->rdfifo = ks8851_rdfifo_par;
ks->wrfifo = ks8851_wrfifo_par;
ks->start_xmit = ks8851_start_xmit_par;
- ks->rx_skb = ks8851_rx_skb_par;
#define STD_IRQ (IRQ_LCI | /* Link Change */ \
IRQ_RXI | /* RX done */ \
diff --git a/drivers/net/ethernet/micrel/ks8851_spi.c b/drivers/net/ethernet/micrel/ks8851_spi.c
index 2f803377c9f9dd..670c1de966db88 100644
--- a/drivers/net/ethernet/micrel/ks8851_spi.c
+++ b/drivers/net/ethernet/micrel/ks8851_spi.c
@@ -299,16 +299,6 @@ static unsigned int calc_txlen(unsigned int len)
}
/**
- * ks8851_rx_skb_spi - receive skbuff
- * @ks: The device state
- * @skb: The skbuff
- */
-static void ks8851_rx_skb_spi(struct ks8851_net *ks, struct sk_buff *skb)
-{
- netif_rx(skb);
-}
-
-/**
* ks8851_tx_work - process tx packet(s)
* @work: The work strucutre what was scheduled.
*
@@ -435,7 +425,6 @@ static int ks8851_probe_spi(struct spi_device *spi)
ks->rdfifo = ks8851_rdfifo_spi;
ks->wrfifo = ks8851_wrfifo_spi;
ks->start_xmit = ks8851_start_xmit_spi;
- ks->rx_skb = ks8851_rx_skb_spi;
ks->flush_tx_work = ks8851_flush_tx_work_spi;
#define STD_IRQ (IRQ_LCI | /* Link Change */ \
diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c
index bd8aa83b47e5ee..75a988c0bd794a 100644
--- a/drivers/net/ethernet/microchip/lan743x_main.c
+++ b/drivers/net/ethernet/microchip/lan743x_main.c
@@ -25,6 +25,8 @@
#define PCS_POWER_STATE_DOWN 0x6
#define PCS_POWER_STATE_UP 0x4
+#define RFE_RD_FIFO_TH_3_DWORDS 0x3
+
static void pci11x1x_strap_get_status(struct lan743x_adapter *adapter)
{
u32 chip_rev;
@@ -3272,6 +3274,21 @@ static void lan743x_full_cleanup(struct lan743x_adapter *adapter)
lan743x_pci_cleanup(adapter);
}
+static void pci11x1x_set_rfe_rd_fifo_threshold(struct lan743x_adapter *adapter)
+{
+ u16 rev = adapter->csr.id_rev & ID_REV_CHIP_REV_MASK_;
+
+ if (rev == ID_REV_CHIP_REV_PCI11X1X_B0_) {
+ u32 misc_ctl;
+
+ misc_ctl = lan743x_csr_read(adapter, MISC_CTL_0);
+ misc_ctl &= ~MISC_CTL_0_RFE_READ_FIFO_MASK_;
+ misc_ctl |= FIELD_PREP(MISC_CTL_0_RFE_READ_FIFO_MASK_,
+ RFE_RD_FIFO_TH_3_DWORDS);
+ lan743x_csr_write(adapter, MISC_CTL_0, misc_ctl);
+ }
+}
+
static int lan743x_hardware_init(struct lan743x_adapter *adapter,
struct pci_dev *pdev)
{
@@ -3287,6 +3304,7 @@ static int lan743x_hardware_init(struct lan743x_adapter *adapter,
pci11x1x_strap_get_status(adapter);
spin_lock_init(&adapter->eth_syslock_spinlock);
mutex_init(&adapter->sgmii_rw_lock);
+ pci11x1x_set_rfe_rd_fifo_threshold(adapter);
} else {
adapter->max_tx_channels = LAN743X_MAX_TX_CHANNELS;
adapter->used_tx_channels = LAN743X_USED_TX_CHANNELS;
diff --git a/drivers/net/ethernet/microchip/lan743x_main.h b/drivers/net/ethernet/microchip/lan743x_main.h
index be79cb0ae5af33..645bc048e52ef5 100644
--- a/drivers/net/ethernet/microchip/lan743x_main.h
+++ b/drivers/net/ethernet/microchip/lan743x_main.h
@@ -26,6 +26,7 @@
#define ID_REV_CHIP_REV_MASK_ (0x0000FFFF)
#define ID_REV_CHIP_REV_A0_ (0x00000000)
#define ID_REV_CHIP_REV_B0_ (0x00000010)
+#define ID_REV_CHIP_REV_PCI11X1X_B0_ (0x000000B0)
#define FPGA_REV (0x04)
#define FPGA_REV_GET_MINOR_(fpga_rev) (((fpga_rev) >> 8) & 0x000000FF)
@@ -311,6 +312,9 @@
#define SGMII_CTL_LINK_STATUS_SOURCE_ BIT(8)
#define SGMII_CTL_SGMII_POWER_DN_ BIT(1)
+#define MISC_CTL_0 (0x920)
+#define MISC_CTL_0_RFE_READ_FIFO_MASK_ GENMASK(6, 4)
+
/* Vendor Specific SGMII MMD details */
#define SR_VSMMD_PCS_ID1 0x0004
#define SR_VSMMD_PCS_ID2 0x0005
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_port.c b/drivers/net/ethernet/microchip/sparx5/sparx5_port.c
index 3a1b1a1f5a1951..60dd2fd603a855 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_port.c
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_port.c
@@ -731,7 +731,7 @@ static int sparx5_port_pcs_low_set(struct sparx5 *sparx5,
bool sgmii = false, inband_aneg = false;
int err;
- if (port->conf.inband) {
+ if (conf->inband) {
if (conf->portmode == PHY_INTERFACE_MODE_SGMII ||
conf->portmode == PHY_INTERFACE_MODE_QSGMII)
inband_aneg = true; /* Cisco-SGMII in-band-aneg */
@@ -948,7 +948,7 @@ int sparx5_port_pcs_set(struct sparx5 *sparx5,
if (err)
return -EINVAL;
- if (port->conf.inband) {
+ if (conf->inband) {
/* Enable/disable 1G counters in ASM */
spx5_rmw(ASM_PORT_CFG_CSC_STAT_DIS_SET(high_speed_dev),
ASM_PORT_CFG_CSC_STAT_DIS,
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_tc_flower.c b/drivers/net/ethernet/microchip/sparx5/sparx5_tc_flower.c
index 523e0c470894f7..55f255a3c9db69 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_tc_flower.c
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_tc_flower.c
@@ -36,6 +36,27 @@ struct sparx5_tc_flower_template {
u16 l3_proto; /* protocol specified in the template */
};
+/* SparX-5 VCAP fragment types:
+ * 0 = no fragment, 1 = initial fragment,
+ * 2 = suspicious fragment, 3 = valid follow-up fragment
+ */
+enum { /* key / mask */
+ FRAG_NOT = 0x03, /* 0 / 3 */
+ FRAG_SOME = 0x11, /* 1 / 1 */
+ FRAG_FIRST = 0x13, /* 1 / 3 */
+ FRAG_LATER = 0x33, /* 3 / 3 */
+ FRAG_INVAL = 0xff, /* invalid */
+};
+
+/* Flower fragment flag to VCAP fragment type mapping */
+static const u8 sparx5_vcap_frag_map[4][4] = { /* is_frag */
+ { FRAG_INVAL, FRAG_INVAL, FRAG_INVAL, FRAG_FIRST }, /* 0/0 */
+ { FRAG_NOT, FRAG_NOT, FRAG_INVAL, FRAG_INVAL }, /* 0/1 */
+ { FRAG_INVAL, FRAG_INVAL, FRAG_INVAL, FRAG_INVAL }, /* 1/0 */
+ { FRAG_SOME, FRAG_LATER, FRAG_INVAL, FRAG_FIRST } /* 1/1 */
+ /* 0/0 0/1 1/0 1/1 <-- first_frag */
+};
+
static int
sparx5_tc_flower_es0_tpid(struct vcap_tc_flower_parse_usage *st)
{
@@ -145,29 +166,27 @@ sparx5_tc_flower_handler_control_usage(struct vcap_tc_flower_parse_usage *st)
flow_rule_match_control(st->frule, &mt);
if (mt.mask->flags) {
- if (mt.mask->flags & FLOW_DIS_FIRST_FRAG) {
- if (mt.key->flags & FLOW_DIS_FIRST_FRAG) {
- value = 1; /* initial fragment */
- mask = 0x3;
- } else {
- if (mt.mask->flags & FLOW_DIS_IS_FRAGMENT) {
- value = 3; /* follow up fragment */
- mask = 0x3;
- } else {
- value = 0; /* no fragment */
- mask = 0x3;
- }
- }
- } else {
- if (mt.mask->flags & FLOW_DIS_IS_FRAGMENT) {
- value = 3; /* follow up fragment */
- mask = 0x3;
- } else {
- value = 0; /* no fragment */
- mask = 0x3;
- }
+ u8 is_frag_key = !!(mt.key->flags & FLOW_DIS_IS_FRAGMENT);
+ u8 is_frag_mask = !!(mt.mask->flags & FLOW_DIS_IS_FRAGMENT);
+ u8 is_frag_idx = (is_frag_key << 1) | is_frag_mask;
+
+ u8 first_frag_key = !!(mt.key->flags & FLOW_DIS_FIRST_FRAG);
+ u8 first_frag_mask = !!(mt.mask->flags & FLOW_DIS_FIRST_FRAG);
+ u8 first_frag_idx = (first_frag_key << 1) | first_frag_mask;
+
+ /* Lookup verdict based on the 2 + 2 input bits */
+ u8 vdt = sparx5_vcap_frag_map[is_frag_idx][first_frag_idx];
+
+ if (vdt == FRAG_INVAL) {
+ NL_SET_ERR_MSG_MOD(st->fco->common.extack,
+ "Match on invalid fragment flag combination");
+ return -EINVAL;
}
+ /* Extract VCAP fragment key and mask from verdict */
+ value = (vdt >> 4) & 0x3;
+ mask = vdt & 0x3;
+
err = vcap_rule_add_key_u32(st->vrule,
VCAP_KF_L3_FRAGMENT_TYPE,
value, mask);
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index 59287c6e6cee6f..d8af5e7e15b4d8 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -601,7 +601,7 @@ static void mana_get_rxbuf_cfg(int mtu, u32 *datasize, u32 *alloc_size,
*alloc_size = mtu + MANA_RXBUF_PAD + *headroom;
- *datasize = ALIGN(mtu + ETH_HLEN, MANA_RX_DATA_ALIGN);
+ *datasize = mtu + ETH_HLEN;
}
static int mana_pre_alloc_rxbufs(struct mana_port_context *mpc, int new_mtu)
diff --git a/drivers/net/ethernet/realtek/r8169.h b/drivers/net/ethernet/realtek/r8169.h
index 4c043052198d47..00882ffc7a029e 100644
--- a/drivers/net/ethernet/realtek/r8169.h
+++ b/drivers/net/ethernet/realtek/r8169.h
@@ -73,6 +73,7 @@ enum mac_version {
};
struct rtl8169_private;
+struct r8169_led_classdev;
void r8169_apply_firmware(struct rtl8169_private *tp);
u16 rtl8168h_2_get_adc_bias_ioffset(struct rtl8169_private *tp);
@@ -84,7 +85,8 @@ void r8169_get_led_name(struct rtl8169_private *tp, int idx,
char *buf, int buf_len);
int rtl8168_get_led_mode(struct rtl8169_private *tp);
int rtl8168_led_mod_ctrl(struct rtl8169_private *tp, u16 mask, u16 val);
-void rtl8168_init_leds(struct net_device *ndev);
+struct r8169_led_classdev *rtl8168_init_leds(struct net_device *ndev);
int rtl8125_get_led_mode(struct rtl8169_private *tp, int index);
int rtl8125_set_led_mode(struct rtl8169_private *tp, int index, u16 mode);
-void rtl8125_init_leds(struct net_device *ndev);
+struct r8169_led_classdev *rtl8125_init_leds(struct net_device *ndev);
+void r8169_remove_leds(struct r8169_led_classdev *leds);
diff --git a/drivers/net/ethernet/realtek/r8169_leds.c b/drivers/net/ethernet/realtek/r8169_leds.c
index 7c5dc9d0df855e..e10bee706bc691 100644
--- a/drivers/net/ethernet/realtek/r8169_leds.c
+++ b/drivers/net/ethernet/realtek/r8169_leds.c
@@ -146,22 +146,22 @@ static void rtl8168_setup_ldev(struct r8169_led_classdev *ldev,
led_cdev->hw_control_get_device = r8169_led_hw_control_get_device;
/* ignore errors */
- devm_led_classdev_register(&ndev->dev, led_cdev);
+ led_classdev_register(&ndev->dev, led_cdev);
}
-void rtl8168_init_leds(struct net_device *ndev)
+struct r8169_led_classdev *rtl8168_init_leds(struct net_device *ndev)
{
- /* bind resource mgmt to netdev */
- struct device *dev = &ndev->dev;
struct r8169_led_classdev *leds;
int i;
- leds = devm_kcalloc(dev, RTL8168_NUM_LEDS, sizeof(*leds), GFP_KERNEL);
+ leds = kcalloc(RTL8168_NUM_LEDS + 1, sizeof(*leds), GFP_KERNEL);
if (!leds)
- return;
+ return NULL;
for (i = 0; i < RTL8168_NUM_LEDS; i++)
rtl8168_setup_ldev(leds + i, ndev, i);
+
+ return leds;
}
static int rtl8125_led_hw_control_is_supported(struct led_classdev *led_cdev,
@@ -245,20 +245,31 @@ static void rtl8125_setup_led_ldev(struct r8169_led_classdev *ldev,
led_cdev->hw_control_get_device = r8169_led_hw_control_get_device;
/* ignore errors */
- devm_led_classdev_register(&ndev->dev, led_cdev);
+ led_classdev_register(&ndev->dev, led_cdev);
}
-void rtl8125_init_leds(struct net_device *ndev)
+struct r8169_led_classdev *rtl8125_init_leds(struct net_device *ndev)
{
- /* bind resource mgmt to netdev */
- struct device *dev = &ndev->dev;
struct r8169_led_classdev *leds;
int i;
- leds = devm_kcalloc(dev, RTL8125_NUM_LEDS, sizeof(*leds), GFP_KERNEL);
+ leds = kcalloc(RTL8125_NUM_LEDS + 1, sizeof(*leds), GFP_KERNEL);
if (!leds)
- return;
+ return NULL;
for (i = 0; i < RTL8125_NUM_LEDS; i++)
rtl8125_setup_led_ldev(leds + i, ndev, i);
+
+ return leds;
+}
+
+void r8169_remove_leds(struct r8169_led_classdev *leds)
+{
+ if (!leds)
+ return;
+
+ for (struct r8169_led_classdev *l = leds; l->ndev; l++)
+ led_classdev_unregister(&l->led);
+
+ kfree(leds);
}
diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 5c879a5c86d70b..0fc5fe564ae50b 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -647,6 +647,8 @@ struct rtl8169_private {
const char *fw_name;
struct rtl_fw *rtl_fw;
+ struct r8169_led_classdev *leds;
+
u32 ocp_base;
};
@@ -1314,17 +1316,40 @@ static void rtl8168ep_stop_cmac(struct rtl8169_private *tp)
RTL_W8(tp, IBCR0, RTL_R8(tp, IBCR0) & ~0x01);
}
+static void rtl_dash_loop_wait(struct rtl8169_private *tp,
+ const struct rtl_cond *c,
+ unsigned long usecs, int n, bool high)
+{
+ if (!tp->dash_enabled)
+ return;
+ rtl_loop_wait(tp, c, usecs, n, high);
+}
+
+static void rtl_dash_loop_wait_high(struct rtl8169_private *tp,
+ const struct rtl_cond *c,
+ unsigned long d, int n)
+{
+ rtl_dash_loop_wait(tp, c, d, n, true);
+}
+
+static void rtl_dash_loop_wait_low(struct rtl8169_private *tp,
+ const struct rtl_cond *c,
+ unsigned long d, int n)
+{
+ rtl_dash_loop_wait(tp, c, d, n, false);
+}
+
static void rtl8168dp_driver_start(struct rtl8169_private *tp)
{
r8168dp_oob_notify(tp, OOB_CMD_DRIVER_START);
- rtl_loop_wait_high(tp, &rtl_dp_ocp_read_cond, 10000, 10);
+ rtl_dash_loop_wait_high(tp, &rtl_dp_ocp_read_cond, 10000, 10);
}
static void rtl8168ep_driver_start(struct rtl8169_private *tp)
{
r8168ep_ocp_write(tp, 0x01, 0x180, OOB_CMD_DRIVER_START);
r8168ep_ocp_write(tp, 0x01, 0x30, r8168ep_ocp_read(tp, 0x30) | 0x01);
- rtl_loop_wait_high(tp, &rtl_ep_ocp_read_cond, 10000, 30);
+ rtl_dash_loop_wait_high(tp, &rtl_ep_ocp_read_cond, 10000, 30);
}
static void rtl8168_driver_start(struct rtl8169_private *tp)
@@ -1338,7 +1363,7 @@ static void rtl8168_driver_start(struct rtl8169_private *tp)
static void rtl8168dp_driver_stop(struct rtl8169_private *tp)
{
r8168dp_oob_notify(tp, OOB_CMD_DRIVER_STOP);
- rtl_loop_wait_low(tp, &rtl_dp_ocp_read_cond, 10000, 10);
+ rtl_dash_loop_wait_low(tp, &rtl_dp_ocp_read_cond, 10000, 10);
}
static void rtl8168ep_driver_stop(struct rtl8169_private *tp)
@@ -1346,7 +1371,7 @@ static void rtl8168ep_driver_stop(struct rtl8169_private *tp)
rtl8168ep_stop_cmac(tp);
r8168ep_ocp_write(tp, 0x01, 0x180, OOB_CMD_DRIVER_STOP);
r8168ep_ocp_write(tp, 0x01, 0x30, r8168ep_ocp_read(tp, 0x30) | 0x01);
- rtl_loop_wait_low(tp, &rtl_ep_ocp_read_cond, 10000, 10);
+ rtl_dash_loop_wait_low(tp, &rtl_ep_ocp_read_cond, 10000, 10);
}
static void rtl8168_driver_stop(struct rtl8169_private *tp)
@@ -5021,6 +5046,9 @@ static void rtl_remove_one(struct pci_dev *pdev)
cancel_work_sync(&tp->wk.work);
+ if (IS_ENABLED(CONFIG_R8169_LEDS))
+ r8169_remove_leds(tp->leds);
+
unregister_netdev(tp->dev);
if (tp->dash_type != RTL_DASH_NONE)
@@ -5141,6 +5169,15 @@ static int r8169_mdio_register(struct rtl8169_private *tp)
struct mii_bus *new_bus;
int ret;
+ /* On some boards with this chip version the BIOS is buggy and misses
+ * to reset the PHY page selector. This results in the PHY ID read
+ * accessing registers on a different page, returning a more or
+ * less random value. Fix this by resetting the page selector first.
+ */
+ if (tp->mac_version == RTL_GIGA_MAC_VER_25 ||
+ tp->mac_version == RTL_GIGA_MAC_VER_26)
+ r8169_mdio_write(tp, 0x1f, 0);
+
new_bus = devm_mdiobus_alloc(&pdev->dev);
if (!new_bus)
return -ENOMEM;
@@ -5469,9 +5506,9 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
if (IS_ENABLED(CONFIG_R8169_LEDS)) {
if (rtl_is_8125(tp))
- rtl8125_init_leds(dev);
+ tp->leds = rtl8125_init_leds(dev);
else if (tp->mac_version > RTL_GIGA_MAC_VER_06)
- rtl8168_init_leds(dev);
+ tp->leds = rtl8168_init_leds(dev);
}
netdev_info(dev, "%s, %pM, XID %03x, IRQ %d\n",
diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index d1be030c88483a..fcb756d77681cb 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -769,25 +769,28 @@ static bool ravb_rx_gbeth(struct net_device *ndev, int *quota, int q)
dma_addr_t dma_addr;
int rx_packets = 0;
u8 desc_status;
- u16 pkt_len;
+ u16 desc_len;
u8 die_dt;
int entry;
int limit;
int i;
- entry = priv->cur_rx[q] % priv->num_rx_ring[q];
limit = priv->dirty_rx[q] + priv->num_rx_ring[q] - priv->cur_rx[q];
stats = &priv->stats[q];
- desc = &priv->rx_ring[q].desc[entry];
- for (i = 0; i < limit && rx_packets < *quota && desc->die_dt != DT_FEMPTY; i++) {
+ for (i = 0; i < limit; i++, priv->cur_rx[q]++) {
+ entry = priv->cur_rx[q] % priv->num_rx_ring[q];
+ desc = &priv->rx_ring[q].desc[entry];
+ if (rx_packets == *quota || desc->die_dt == DT_FEMPTY)
+ break;
+
/* Descriptor type must be checked before all other reads */
dma_rmb();
desc_status = desc->msc;
- pkt_len = le16_to_cpu(desc->ds_cc) & RX_DS;
+ desc_len = le16_to_cpu(desc->ds_cc) & RX_DS;
/* We use 0-byte descriptors to mark the DMA mapping errors */
- if (!pkt_len)
+ if (!desc_len)
continue;
if (desc_status & MSC_MC)
@@ -808,25 +811,25 @@ static bool ravb_rx_gbeth(struct net_device *ndev, int *quota, int q)
switch (die_dt) {
case DT_FSINGLE:
skb = ravb_get_skb_gbeth(ndev, entry, desc);
- skb_put(skb, pkt_len);
+ skb_put(skb, desc_len);
skb->protocol = eth_type_trans(skb, ndev);
if (ndev->features & NETIF_F_RXCSUM)
ravb_rx_csum_gbeth(skb);
napi_gro_receive(&priv->napi[q], skb);
rx_packets++;
- stats->rx_bytes += pkt_len;
+ stats->rx_bytes += desc_len;
break;
case DT_FSTART:
priv->rx_1st_skb = ravb_get_skb_gbeth(ndev, entry, desc);
- skb_put(priv->rx_1st_skb, pkt_len);
+ skb_put(priv->rx_1st_skb, desc_len);
break;
case DT_FMID:
skb = ravb_get_skb_gbeth(ndev, entry, desc);
skb_copy_to_linear_data_offset(priv->rx_1st_skb,
priv->rx_1st_skb->len,
skb->data,
- pkt_len);
- skb_put(priv->rx_1st_skb, pkt_len);
+ desc_len);
+ skb_put(priv->rx_1st_skb, desc_len);
dev_kfree_skb(skb);
break;
case DT_FEND:
@@ -834,23 +837,20 @@ static bool ravb_rx_gbeth(struct net_device *ndev, int *quota, int q)
skb_copy_to_linear_data_offset(priv->rx_1st_skb,
priv->rx_1st_skb->len,
skb->data,
- pkt_len);
- skb_put(priv->rx_1st_skb, pkt_len);
+ desc_len);
+ skb_put(priv->rx_1st_skb, desc_len);
dev_kfree_skb(skb);
priv->rx_1st_skb->protocol =
eth_type_trans(priv->rx_1st_skb, ndev);
if (ndev->features & NETIF_F_RXCSUM)
- ravb_rx_csum_gbeth(skb);
+ ravb_rx_csum_gbeth(priv->rx_1st_skb);
+ stats->rx_bytes += priv->rx_1st_skb->len;
napi_gro_receive(&priv->napi[q],
priv->rx_1st_skb);
rx_packets++;
- stats->rx_bytes += pkt_len;
break;
}
}
-
- entry = (++priv->cur_rx[q]) % priv->num_rx_ring[q];
- desc = &priv->rx_ring[q].desc[entry];
}
/* Refill the RX ring buffers. */
@@ -891,30 +891,29 @@ static bool ravb_rx_rcar(struct net_device *ndev, int *quota, int q)
{
struct ravb_private *priv = netdev_priv(ndev);
const struct ravb_hw_info *info = priv->info;
- int entry = priv->cur_rx[q] % priv->num_rx_ring[q];
- int boguscnt = (priv->dirty_rx[q] + priv->num_rx_ring[q]) -
- priv->cur_rx[q];
struct net_device_stats *stats = &priv->stats[q];
struct ravb_ex_rx_desc *desc;
+ unsigned int limit, i;
struct sk_buff *skb;
dma_addr_t dma_addr;
struct timespec64 ts;
+ int rx_packets = 0;
u8 desc_status;
u16 pkt_len;
- int limit;
+ int entry;
+
+ limit = priv->dirty_rx[q] + priv->num_rx_ring[q] - priv->cur_rx[q];
+ for (i = 0; i < limit; i++, priv->cur_rx[q]++) {
+ entry = priv->cur_rx[q] % priv->num_rx_ring[q];
+ desc = &priv->rx_ring[q].ex_desc[entry];
+ if (rx_packets == *quota || desc->die_dt == DT_FEMPTY)
+ break;
- boguscnt = min(boguscnt, *quota);
- limit = boguscnt;
- desc = &priv->rx_ring[q].ex_desc[entry];
- while (desc->die_dt != DT_FEMPTY) {
/* Descriptor type must be checked before all other reads */
dma_rmb();
desc_status = desc->msc;
pkt_len = le16_to_cpu(desc->ds_cc) & RX_DS;
- if (--boguscnt < 0)
- break;
-
/* We use 0-byte descriptors to mark the DMA mapping errors */
if (!pkt_len)
continue;
@@ -960,12 +959,9 @@ static bool ravb_rx_rcar(struct net_device *ndev, int *quota, int q)
if (ndev->features & NETIF_F_RXCSUM)
ravb_rx_csum(skb);
napi_gro_receive(&priv->napi[q], skb);
- stats->rx_packets++;
+ rx_packets++;
stats->rx_bytes += pkt_len;
}
-
- entry = (++priv->cur_rx[q]) % priv->num_rx_ring[q];
- desc = &priv->rx_ring[q].ex_desc[entry];
}
/* Refill the RX ring buffers. */
@@ -995,9 +991,9 @@ static bool ravb_rx_rcar(struct net_device *ndev, int *quota, int q)
desc->die_dt = DT_FEMPTY;
}
- *quota -= limit - (++boguscnt);
-
- return boguscnt <= 0;
+ stats->rx_packets += rx_packets;
+ *quota -= rx_packets;
+ return *quota == 0;
}
/* Packet receive function for Ethernet AVB */
@@ -1324,12 +1320,12 @@ static int ravb_poll(struct napi_struct *napi, int budget)
int q = napi - priv->napi;
int mask = BIT(q);
int quota = budget;
+ bool unmask;
/* Processing RX Descriptor Ring */
/* Clear RX interrupt */
ravb_write(ndev, ~(mask | RIS0_RESERVED), RIS0);
- if (ravb_rx(ndev, &quota, q))
- goto out;
+ unmask = !ravb_rx(ndev, &quota, q);
/* Processing TX Descriptor Ring */
spin_lock_irqsave(&priv->lock, flags);
@@ -1339,6 +1335,18 @@ static int ravb_poll(struct napi_struct *napi, int budget)
netif_wake_subqueue(ndev, q);
spin_unlock_irqrestore(&priv->lock, flags);
+ /* Receive error message handling */
+ priv->rx_over_errors = priv->stats[RAVB_BE].rx_over_errors;
+ if (info->nc_queues)
+ priv->rx_over_errors += priv->stats[RAVB_NC].rx_over_errors;
+ if (priv->rx_over_errors != ndev->stats.rx_over_errors)
+ ndev->stats.rx_over_errors = priv->rx_over_errors;
+ if (priv->rx_fifo_errors != ndev->stats.rx_fifo_errors)
+ ndev->stats.rx_fifo_errors = priv->rx_fifo_errors;
+
+ if (!unmask)
+ goto out;
+
napi_complete(napi);
/* Re-enable RX/TX interrupts */
@@ -1352,14 +1360,6 @@ static int ravb_poll(struct napi_struct *napi, int budget)
}
spin_unlock_irqrestore(&priv->lock, flags);
- /* Receive error message handling */
- priv->rx_over_errors = priv->stats[RAVB_BE].rx_over_errors;
- if (info->nc_queues)
- priv->rx_over_errors += priv->stats[RAVB_NC].rx_over_errors;
- if (priv->rx_over_errors != ndev->stats.rx_over_errors)
- ndev->stats.rx_over_errors = priv->rx_over_errors;
- if (priv->rx_fifo_errors != ndev->stats.rx_fifo_errors)
- ndev->stats.rx_fifo_errors = priv->rx_fifo_errors;
out:
return budget - quota;
}
diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index 475e1e8c1d35f3..0786eb0da39143 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -50,7 +50,7 @@
* the macros available to do this only define GCC 8.
*/
__diag_push();
-__diag_ignore(GCC, 8, "-Woverride-init",
+__diag_ignore_all("-Woverride-init",
"logic to initialize all and then override some is OK");
static const u16 sh_eth_offset_gigabit[SH_ETH_MAX_REGISTER_OFFSET] = {
SH_ETH_OFFSET_DEFAULTS,
diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index a6fefe675ef152..3b7d4ac1e7be07 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -553,6 +553,7 @@ extern const struct stmmac_hwtimestamp stmmac_ptp;
extern const struct stmmac_mode_ops dwmac4_ring_mode_ops;
struct mac_link {
+ u32 caps;
u32 speed_mask;
u32 speed10;
u32 speed100;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
index b21d99faa2d04c..e1b761dcfa1dd5 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
@@ -1096,6 +1096,8 @@ static struct mac_device_info *sun8i_dwmac_setup(void *ppriv)
priv->dev->priv_flags |= IFF_UNICAST_FLT;
+ mac->link.caps = MAC_ASYM_PAUSE | MAC_SYM_PAUSE |
+ MAC_10 | MAC_100 | MAC_1000;
/* The loopback bit seems to be re-set when link change
* Simply mask it each time
* Speed 10/100/1000 are set in BIT(2)/BIT(3)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
index 3927609abc4411..8555299443f4ed 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
@@ -539,6 +539,8 @@ int dwmac1000_setup(struct stmmac_priv *priv)
if (mac->multicast_filter_bins)
mac->mcast_bits_log2 = ilog2(mac->multicast_filter_bins);
+ mac->link.caps = MAC_ASYM_PAUSE | MAC_SYM_PAUSE |
+ MAC_10 | MAC_100 | MAC_1000;
mac->link.duplex = GMAC_CONTROL_DM;
mac->link.speed10 = GMAC_CONTROL_PS;
mac->link.speed100 = GMAC_CONTROL_PS | GMAC_CONTROL_FES;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c
index a6e8d7bd95886f..7667d103cd0ebd 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c
@@ -175,6 +175,8 @@ int dwmac100_setup(struct stmmac_priv *priv)
dev_info(priv->device, "\tDWMAC100\n");
mac->pcsr = priv->ioaddr;
+ mac->link.caps = MAC_ASYM_PAUSE | MAC_SYM_PAUSE |
+ MAC_10 | MAC_100;
mac->link.duplex = MAC_CONTROL_F;
mac->link.speed10 = 0;
mac->link.speed100 = 0;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
index 6b6d0de0961975..a38226d7cc6a99 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
@@ -70,7 +70,10 @@ static void dwmac4_core_init(struct mac_device_info *hw,
static void dwmac4_phylink_get_caps(struct stmmac_priv *priv)
{
- priv->phylink_config.mac_capabilities |= MAC_2500FD;
+ if (priv->plat->tx_queues_to_use > 1)
+ priv->hw->link.caps &= ~(MAC_10HD | MAC_100HD | MAC_1000HD);
+ else
+ priv->hw->link.caps |= (MAC_10HD | MAC_100HD | MAC_1000HD);
}
static void dwmac4_rx_queue_enable(struct mac_device_info *hw,
@@ -92,19 +95,41 @@ static void dwmac4_rx_queue_priority(struct mac_device_info *hw,
u32 prio, u32 queue)
{
void __iomem *ioaddr = hw->pcsr;
- u32 base_register;
- u32 value;
+ u32 clear_mask = 0;
+ u32 ctrl2, ctrl3;
+ int i;
- base_register = (queue < 4) ? GMAC_RXQ_CTRL2 : GMAC_RXQ_CTRL3;
- if (queue >= 4)
- queue -= 4;
+ ctrl2 = readl(ioaddr + GMAC_RXQ_CTRL2);
+ ctrl3 = readl(ioaddr + GMAC_RXQ_CTRL3);
- value = readl(ioaddr + base_register);
+ /* The software must ensure that the same priority
+ * is not mapped to multiple Rx queues
+ */
+ for (i = 0; i < 4; i++)
+ clear_mask |= ((prio << GMAC_RXQCTRL_PSRQX_SHIFT(i)) &
+ GMAC_RXQCTRL_PSRQX_MASK(i));
- value &= ~GMAC_RXQCTRL_PSRQX_MASK(queue);
- value |= (prio << GMAC_RXQCTRL_PSRQX_SHIFT(queue)) &
+ ctrl2 &= ~clear_mask;
+ ctrl3 &= ~clear_mask;
+
+ /* First assign new priorities to a queue, then
+ * clear them from others queues
+ */
+ if (queue < 4) {
+ ctrl2 |= (prio << GMAC_RXQCTRL_PSRQX_SHIFT(queue)) &
GMAC_RXQCTRL_PSRQX_MASK(queue);
- writel(value, ioaddr + base_register);
+
+ writel(ctrl2, ioaddr + GMAC_RXQ_CTRL2);
+ writel(ctrl3, ioaddr + GMAC_RXQ_CTRL3);
+ } else {
+ queue -= 4;
+
+ ctrl3 |= (prio << GMAC_RXQCTRL_PSRQX_SHIFT(queue)) &
+ GMAC_RXQCTRL_PSRQX_MASK(queue);
+
+ writel(ctrl3, ioaddr + GMAC_RXQ_CTRL3);
+ writel(ctrl2, ioaddr + GMAC_RXQ_CTRL2);
+ }
}
static void dwmac4_tx_queue_priority(struct mac_device_info *hw,
@@ -1356,6 +1381,8 @@ int dwmac4_setup(struct stmmac_priv *priv)
if (mac->multicast_filter_bins)
mac->mcast_bits_log2 = ilog2(mac->multicast_filter_bins);
+ mac->link.caps = MAC_ASYM_PAUSE | MAC_SYM_PAUSE |
+ MAC_10 | MAC_100 | MAC_1000 | MAC_2500FD;
mac->link.duplex = GMAC_CONFIG_DM;
mac->link.speed10 = GMAC_CONFIG_PS;
mac->link.speed100 = GMAC_CONFIG_FES | GMAC_CONFIG_PS;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
index 1af2f89a0504ab..f8e7775bb63364 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
@@ -47,14 +47,6 @@ static void dwxgmac2_core_init(struct mac_device_info *hw,
writel(XGMAC_INT_DEFAULT_EN, ioaddr + XGMAC_INT_EN);
}
-static void xgmac_phylink_get_caps(struct stmmac_priv *priv)
-{
- priv->phylink_config.mac_capabilities |= MAC_2500FD | MAC_5000FD |
- MAC_10000FD | MAC_25000FD |
- MAC_40000FD | MAC_50000FD |
- MAC_100000FD;
-}
-
static void dwxgmac2_set_mac(void __iomem *ioaddr, bool enable)
{
u32 tx = readl(ioaddr + XGMAC_TX_CONFIG);
@@ -105,17 +97,41 @@ static void dwxgmac2_rx_queue_prio(struct mac_device_info *hw, u32 prio,
u32 queue)
{
void __iomem *ioaddr = hw->pcsr;
- u32 value, reg;
+ u32 clear_mask = 0;
+ u32 ctrl2, ctrl3;
+ int i;
- reg = (queue < 4) ? XGMAC_RXQ_CTRL2 : XGMAC_RXQ_CTRL3;
- if (queue >= 4)
+ ctrl2 = readl(ioaddr + XGMAC_RXQ_CTRL2);
+ ctrl3 = readl(ioaddr + XGMAC_RXQ_CTRL3);
+
+ /* The software must ensure that the same priority
+ * is not mapped to multiple Rx queues
+ */
+ for (i = 0; i < 4; i++)
+ clear_mask |= ((prio << XGMAC_PSRQ_SHIFT(i)) &
+ XGMAC_PSRQ(i));
+
+ ctrl2 &= ~clear_mask;
+ ctrl3 &= ~clear_mask;
+
+ /* First assign new priorities to a queue, then
+ * clear them from others queues
+ */
+ if (queue < 4) {
+ ctrl2 |= (prio << XGMAC_PSRQ_SHIFT(queue)) &
+ XGMAC_PSRQ(queue);
+
+ writel(ctrl2, ioaddr + XGMAC_RXQ_CTRL2);
+ writel(ctrl3, ioaddr + XGMAC_RXQ_CTRL3);
+ } else {
queue -= 4;
- value = readl(ioaddr + reg);
- value &= ~XGMAC_PSRQ(queue);
- value |= (prio << XGMAC_PSRQ_SHIFT(queue)) & XGMAC_PSRQ(queue);
+ ctrl3 |= (prio << XGMAC_PSRQ_SHIFT(queue)) &
+ XGMAC_PSRQ(queue);
- writel(value, ioaddr + reg);
+ writel(ctrl3, ioaddr + XGMAC_RXQ_CTRL3);
+ writel(ctrl2, ioaddr + XGMAC_RXQ_CTRL2);
+ }
}
static void dwxgmac2_tx_queue_prio(struct mac_device_info *hw, u32 prio,
@@ -1516,7 +1532,6 @@ static void dwxgmac3_fpe_configure(void __iomem *ioaddr, struct stmmac_fpe_cfg *
const struct stmmac_ops dwxgmac210_ops = {
.core_init = dwxgmac2_core_init,
- .phylink_get_caps = xgmac_phylink_get_caps,
.set_mac = dwxgmac2_set_mac,
.rx_ipc = dwxgmac2_rx_ipc,
.rx_queue_enable = dwxgmac2_rx_queue_enable,
@@ -1577,7 +1592,6 @@ static void dwxlgmac2_rx_queue_enable(struct mac_device_info *hw, u8 mode,
const struct stmmac_ops dwxlgmac2_ops = {
.core_init = dwxgmac2_core_init,
- .phylink_get_caps = xgmac_phylink_get_caps,
.set_mac = dwxgmac2_set_mac,
.rx_ipc = dwxgmac2_rx_ipc,
.rx_queue_enable = dwxlgmac2_rx_queue_enable,
@@ -1637,6 +1651,9 @@ int dwxgmac2_setup(struct stmmac_priv *priv)
if (mac->multicast_filter_bins)
mac->mcast_bits_log2 = ilog2(mac->multicast_filter_bins);
+ mac->link.caps = MAC_ASYM_PAUSE | MAC_SYM_PAUSE |
+ MAC_1000FD | MAC_2500FD | MAC_5000FD |
+ MAC_10000FD;
mac->link.duplex = 0;
mac->link.speed10 = XGMAC_CONFIG_SS_10_MII;
mac->link.speed100 = XGMAC_CONFIG_SS_100_MII;
@@ -1674,6 +1691,11 @@ int dwxlgmac2_setup(struct stmmac_priv *priv)
if (mac->multicast_filter_bins)
mac->mcast_bits_log2 = ilog2(mac->multicast_filter_bins);
+ mac->link.caps = MAC_ASYM_PAUSE | MAC_SYM_PAUSE |
+ MAC_1000FD | MAC_2500FD | MAC_5000FD |
+ MAC_10000FD | MAC_25000FD |
+ MAC_40000FD | MAC_50000FD |
+ MAC_100000FD;
mac->link.duplex = 0;
mac->link.speed1000 = XLGMAC_CONFIG_SS_1000;
mac->link.speed2500 = XLGMAC_CONFIG_SS_2500;
diff --git a/drivers/net/ethernet/stmicro/stmmac/mmc.h b/drivers/net/ethernet/stmicro/stmmac/mmc.h
index dff02d75d51971..5d1ea3e07459a3 100644
--- a/drivers/net/ethernet/stmicro/stmmac/mmc.h
+++ b/drivers/net/ethernet/stmicro/stmmac/mmc.h
@@ -52,6 +52,7 @@ struct stmmac_counters {
unsigned int mmc_tx_excessdef;
unsigned int mmc_tx_pause_frame;
unsigned int mmc_tx_vlan_frame_g;
+ unsigned int mmc_tx_oversize_g;
unsigned int mmc_tx_lpi_usec;
unsigned int mmc_tx_lpi_tran;
@@ -80,6 +81,7 @@ struct stmmac_counters {
unsigned int mmc_rx_fifo_overflow;
unsigned int mmc_rx_vlan_frames_gb;
unsigned int mmc_rx_watchdog_error;
+ unsigned int mmc_rx_error;
unsigned int mmc_rx_lpi_usec;
unsigned int mmc_rx_lpi_tran;
unsigned int mmc_rx_discard_frames_gb;
diff --git a/drivers/net/ethernet/stmicro/stmmac/mmc_core.c b/drivers/net/ethernet/stmicro/stmmac/mmc_core.c
index 7eb477faa75a38..0fab842902a850 100644
--- a/drivers/net/ethernet/stmicro/stmmac/mmc_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/mmc_core.c
@@ -53,6 +53,7 @@
#define MMC_TX_EXCESSDEF 0x6c
#define MMC_TX_PAUSE_FRAME 0x70
#define MMC_TX_VLAN_FRAME_G 0x74
+#define MMC_TX_OVERSIZE_G 0x78
/* MMC RX counter registers */
#define MMC_RX_FRAMECOUNT_GB 0x80
@@ -79,6 +80,13 @@
#define MMC_RX_FIFO_OVERFLOW 0xd4
#define MMC_RX_VLAN_FRAMES_GB 0xd8
#define MMC_RX_WATCHDOG_ERROR 0xdc
+#define MMC_RX_ERROR 0xe0
+
+#define MMC_TX_LPI_USEC 0xec
+#define MMC_TX_LPI_TRAN 0xf0
+#define MMC_RX_LPI_USEC 0xf4
+#define MMC_RX_LPI_TRAN 0xf8
+
/* IPC*/
#define MMC_RX_IPC_INTR_MASK 0x100
#define MMC_RX_IPC_INTR 0x108
@@ -283,6 +291,9 @@ static void dwmac_mmc_read(void __iomem *mmcaddr, struct stmmac_counters *mmc)
mmc->mmc_tx_excessdef += readl(mmcaddr + MMC_TX_EXCESSDEF);
mmc->mmc_tx_pause_frame += readl(mmcaddr + MMC_TX_PAUSE_FRAME);
mmc->mmc_tx_vlan_frame_g += readl(mmcaddr + MMC_TX_VLAN_FRAME_G);
+ mmc->mmc_tx_oversize_g += readl(mmcaddr + MMC_TX_OVERSIZE_G);
+ mmc->mmc_tx_lpi_usec += readl(mmcaddr + MMC_TX_LPI_USEC);
+ mmc->mmc_tx_lpi_tran += readl(mmcaddr + MMC_TX_LPI_TRAN);
/* MMC RX counter registers */
mmc->mmc_rx_framecount_gb += readl(mmcaddr + MMC_RX_FRAMECOUNT_GB);
@@ -316,6 +327,10 @@ static void dwmac_mmc_read(void __iomem *mmcaddr, struct stmmac_counters *mmc)
mmc->mmc_rx_fifo_overflow += readl(mmcaddr + MMC_RX_FIFO_OVERFLOW);
mmc->mmc_rx_vlan_frames_gb += readl(mmcaddr + MMC_RX_VLAN_FRAMES_GB);
mmc->mmc_rx_watchdog_error += readl(mmcaddr + MMC_RX_WATCHDOG_ERROR);
+ mmc->mmc_rx_error += readl(mmcaddr + MMC_RX_ERROR);
+ mmc->mmc_rx_lpi_usec += readl(mmcaddr + MMC_RX_LPI_USEC);
+ mmc->mmc_rx_lpi_tran += readl(mmcaddr + MMC_RX_LPI_TRAN);
+
/* IPv4 */
mmc->mmc_rx_ipv4_gd += readl(mmcaddr + MMC_RX_IPV4_GD);
mmc->mmc_rx_ipv4_hderr += readl(mmcaddr + MMC_RX_IPV4_HDERR);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
index e1537a57815f38..542e2633a6f522 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
@@ -212,6 +212,7 @@ static const struct stmmac_stats stmmac_mmc[] = {
STMMAC_MMC_STAT(mmc_tx_excessdef),
STMMAC_MMC_STAT(mmc_tx_pause_frame),
STMMAC_MMC_STAT(mmc_tx_vlan_frame_g),
+ STMMAC_MMC_STAT(mmc_tx_oversize_g),
STMMAC_MMC_STAT(mmc_tx_lpi_usec),
STMMAC_MMC_STAT(mmc_tx_lpi_tran),
STMMAC_MMC_STAT(mmc_rx_framecount_gb),
@@ -238,6 +239,7 @@ static const struct stmmac_stats stmmac_mmc[] = {
STMMAC_MMC_STAT(mmc_rx_fifo_overflow),
STMMAC_MMC_STAT(mmc_rx_vlan_frames_gb),
STMMAC_MMC_STAT(mmc_rx_watchdog_error),
+ STMMAC_MMC_STAT(mmc_rx_error),
STMMAC_MMC_STAT(mmc_rx_lpi_usec),
STMMAC_MMC_STAT(mmc_rx_lpi_tran),
STMMAC_MMC_STAT(mmc_rx_discard_frames_gb),
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 24cd80490d19cf..7c6fb14b555508 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -1198,17 +1198,6 @@ static int stmmac_init_phy(struct net_device *dev)
return ret;
}
-static void stmmac_set_half_duplex(struct stmmac_priv *priv)
-{
- /* Half-Duplex can only work with single tx queue */
- if (priv->plat->tx_queues_to_use > 1)
- priv->phylink_config.mac_capabilities &=
- ~(MAC_10HD | MAC_100HD | MAC_1000HD);
- else
- priv->phylink_config.mac_capabilities |=
- (MAC_10HD | MAC_100HD | MAC_1000HD);
-}
-
static int stmmac_phy_setup(struct stmmac_priv *priv)
{
struct stmmac_mdio_bus_data *mdio_bus_data;
@@ -1236,15 +1225,11 @@ static int stmmac_phy_setup(struct stmmac_priv *priv)
xpcs_get_interfaces(priv->hw->xpcs,
priv->phylink_config.supported_interfaces);
- priv->phylink_config.mac_capabilities = MAC_ASYM_PAUSE | MAC_SYM_PAUSE |
- MAC_10FD | MAC_100FD |
- MAC_1000FD;
-
- stmmac_set_half_duplex(priv);
-
/* Get the MAC specific capabilities */
stmmac_mac_phylink_get_caps(priv);
+ priv->phylink_config.mac_capabilities = priv->hw->link.caps;
+
max_speed = priv->plat->max_speed;
if (max_speed)
phylink_limit_mac_speed(&priv->phylink_config, max_speed);
@@ -7342,6 +7327,7 @@ int stmmac_reinit_queues(struct net_device *dev, u32 rx_cnt, u32 tx_cnt)
{
struct stmmac_priv *priv = netdev_priv(dev);
int ret = 0, i;
+ int max_speed;
if (netif_running(dev))
stmmac_release(dev);
@@ -7355,7 +7341,14 @@ int stmmac_reinit_queues(struct net_device *dev, u32 rx_cnt, u32 tx_cnt)
priv->rss.table[i] = ethtool_rxfh_indir_default(i,
rx_cnt);
- stmmac_set_half_duplex(priv);
+ stmmac_mac_phylink_get_caps(priv);
+
+ priv->phylink_config.mac_capabilities = priv->hw->link.caps;
+
+ max_speed = priv->plat->max_speed;
+ if (max_speed)
+ phylink_limit_mac_speed(&priv->phylink_config, max_speed);
+
stmmac_napi_add(dev);
if (netif_running(dev))
diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
index 2939a21ca74f3c..1d00e21808c1c3 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c
+++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
@@ -2793,6 +2793,8 @@ static void am65_cpsw_unregister_devlink(struct am65_cpsw_common *common)
static int am65_cpsw_nuss_register_ndevs(struct am65_cpsw_common *common)
{
+ struct am65_cpsw_rx_chn *rx_chan = &common->rx_chns;
+ struct am65_cpsw_tx_chn *tx_chan = common->tx_chns;
struct device *dev = common->dev;
struct am65_cpsw_port *port;
int ret = 0, i;
@@ -2805,6 +2807,22 @@ static int am65_cpsw_nuss_register_ndevs(struct am65_cpsw_common *common)
if (ret)
return ret;
+ /* The DMA Channels are not guaranteed to be in a clean state.
+ * Reset and disable them to ensure that they are back to the
+ * clean state and ready to be used.
+ */
+ for (i = 0; i < common->tx_ch_num; i++) {
+ k3_udma_glue_reset_tx_chn(tx_chan[i].tx_chn, &tx_chan[i],
+ am65_cpsw_nuss_tx_cleanup);
+ k3_udma_glue_disable_tx_chn(tx_chan[i].tx_chn);
+ }
+
+ for (i = 0; i < AM65_CPSW_MAX_RX_FLOWS; i++)
+ k3_udma_glue_reset_rx_chn(rx_chan->rx_chn, i, rx_chan,
+ am65_cpsw_nuss_rx_cleanup, !!i);
+
+ k3_udma_glue_disable_rx_chn(rx_chan->rx_chn);
+
ret = am65_cpsw_nuss_register_devlink(common);
if (ret)
return ret;
diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c
index 5b5d5e4310d127..2fa511227eac84 100644
--- a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c
+++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c
@@ -20,6 +20,8 @@
#include "txgbe_phy.h"
#include "txgbe_hw.h"
+#define TXGBE_I2C_CLK_DEV_NAME "i2c_dw"
+
static int txgbe_swnodes_register(struct txgbe *txgbe)
{
struct txgbe_nodes *nodes = &txgbe->nodes;
@@ -571,8 +573,8 @@ static int txgbe_clock_register(struct txgbe *txgbe)
char clk_name[32];
struct clk *clk;
- snprintf(clk_name, sizeof(clk_name), "i2c_dw.%d",
- pci_dev_id(pdev));
+ snprintf(clk_name, sizeof(clk_name), "%s.%d",
+ TXGBE_I2C_CLK_DEV_NAME, pci_dev_id(pdev));
clk = clk_register_fixed_rate(NULL, clk_name, NULL, 0, 156250000);
if (IS_ERR(clk))
@@ -634,7 +636,7 @@ static int txgbe_i2c_register(struct txgbe *txgbe)
info.parent = &pdev->dev;
info.fwnode = software_node_fwnode(txgbe->nodes.group[SWNODE_I2C]);
- info.name = "i2c_designware";
+ info.name = TXGBE_I2C_CLK_DEV_NAME;
info.id = pci_dev_id(pdev);
info.res = &DEFINE_RES_IRQ(pdev->irq);
diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c
index 9df39cf8b09750..1072e2210aed32 100644
--- a/drivers/net/ethernet/xilinx/ll_temac_main.c
+++ b/drivers/net/ethernet/xilinx/ll_temac_main.c
@@ -1443,7 +1443,7 @@ static int temac_probe(struct platform_device *pdev)
}
/* map device registers */
- lp->regs = devm_platform_ioremap_resource_byname(pdev, 0);
+ lp->regs = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(lp->regs)) {
dev_err(&pdev->dev, "could not map TEMAC registers\n");
return -ENOMEM;
diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index 2f6739fe78af2e..6c2835086b57ea 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -822,7 +822,7 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
__be16 sport;
int err;
- if (!pskb_inet_may_pull(skb))
+ if (!skb_vlan_inet_prepare(skb))
return -EINVAL;
if (!gs4)
@@ -929,7 +929,7 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev,
__be16 sport;
int err;
- if (!pskb_inet_may_pull(skb))
+ if (!skb_vlan_inet_prepare(skb))
return -EINVAL;
if (!gs6)
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index a6fcbda64ecc60..2b6ec979a62f21 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -154,8 +154,11 @@ static void free_netvsc_device(struct rcu_head *head)
int i;
kfree(nvdev->extension);
- vfree(nvdev->recv_buf);
- vfree(nvdev->send_buf);
+
+ if (!nvdev->recv_buf_gpadl_handle.decrypted)
+ vfree(nvdev->recv_buf);
+ if (!nvdev->send_buf_gpadl_handle.decrypted)
+ vfree(nvdev->send_buf);
bitmap_free(nvdev->send_section_map);
for (i = 0; i < VRSS_CHANNEL_MAX; i++) {
diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 8b8634600c5190..ddb50a0e2bc822 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -2431,6 +2431,7 @@ static int lan8814_hwtstamp(struct mii_timestamper *mii_ts,
struct lan8814_ptp_rx_ts *rx_ts, *tmp;
int txcfg = 0, rxcfg = 0;
int pkt_ts_enable;
+ int tx_mod;
ptp_priv->hwts_tx_type = config->tx_type;
ptp_priv->rx_filter = config->rx_filter;
@@ -2477,9 +2478,14 @@ static int lan8814_hwtstamp(struct mii_timestamper *mii_ts,
lanphy_write_page_reg(ptp_priv->phydev, 5, PTP_RX_TIMESTAMP_EN, pkt_ts_enable);
lanphy_write_page_reg(ptp_priv->phydev, 5, PTP_TX_TIMESTAMP_EN, pkt_ts_enable);
- if (ptp_priv->hwts_tx_type == HWTSTAMP_TX_ONESTEP_SYNC)
+ tx_mod = lanphy_read_page_reg(ptp_priv->phydev, 5, PTP_TX_MOD);
+ if (ptp_priv->hwts_tx_type == HWTSTAMP_TX_ONESTEP_SYNC) {
lanphy_write_page_reg(ptp_priv->phydev, 5, PTP_TX_MOD,
- PTP_TX_MOD_TX_PTP_SYNC_TS_INSERT_);
+ tx_mod | PTP_TX_MOD_TX_PTP_SYNC_TS_INSERT_);
+ } else if (ptp_priv->hwts_tx_type == HWTSTAMP_TX_ON) {
+ lanphy_write_page_reg(ptp_priv->phydev, 5, PTP_TX_MOD,
+ tx_mod & ~PTP_TX_MOD_TX_PTP_SYNC_TS_INSERT_);
+ }
if (config->rx_filter != HWTSTAMP_FILTER_NONE)
lan8814_config_ts_intr(ptp_priv->phydev, true);
@@ -2537,7 +2543,7 @@ static void lan8814_txtstamp(struct mii_timestamper *mii_ts,
}
}
-static void lan8814_get_sig_rx(struct sk_buff *skb, u16 *sig)
+static bool lan8814_get_sig_rx(struct sk_buff *skb, u16 *sig)
{
struct ptp_header *ptp_header;
u32 type;
@@ -2547,7 +2553,11 @@ static void lan8814_get_sig_rx(struct sk_buff *skb, u16 *sig)
ptp_header = ptp_parse_header(skb, type);
skb_pull_inline(skb, ETH_HLEN);
+ if (!ptp_header)
+ return false;
+
*sig = (__force u16)(ntohs(ptp_header->sequence_id));
+ return true;
}
static bool lan8814_match_rx_skb(struct kszphy_ptp_priv *ptp_priv,
@@ -2559,7 +2569,8 @@ static bool lan8814_match_rx_skb(struct kszphy_ptp_priv *ptp_priv,
bool ret = false;
u16 skb_sig;
- lan8814_get_sig_rx(skb, &skb_sig);
+ if (!lan8814_get_sig_rx(skb, &skb_sig))
+ return ret;
/* Iterate over all RX timestamps and match it with the received skbs */
spin_lock_irqsave(&ptp_priv->rx_ts_lock, flags);
@@ -2834,7 +2845,7 @@ static int lan8814_ptpci_adjfine(struct ptp_clock_info *ptpci, long scaled_ppm)
return 0;
}
-static void lan8814_get_sig_tx(struct sk_buff *skb, u16 *sig)
+static bool lan8814_get_sig_tx(struct sk_buff *skb, u16 *sig)
{
struct ptp_header *ptp_header;
u32 type;
@@ -2842,7 +2853,11 @@ static void lan8814_get_sig_tx(struct sk_buff *skb, u16 *sig)
type = ptp_classify_raw(skb);
ptp_header = ptp_parse_header(skb, type);
+ if (!ptp_header)
+ return false;
+
*sig = (__force u16)(ntohs(ptp_header->sequence_id));
+ return true;
}
static void lan8814_match_tx_skb(struct kszphy_ptp_priv *ptp_priv,
@@ -2856,7 +2871,8 @@ static void lan8814_match_tx_skb(struct kszphy_ptp_priv *ptp_priv,
spin_lock_irqsave(&ptp_priv->tx_queue.lock, flags);
skb_queue_walk_safe(&ptp_priv->tx_queue, skb, skb_tmp) {
- lan8814_get_sig_tx(skb, &skb_sig);
+ if (!lan8814_get_sig_tx(skb, &skb_sig))
+ continue;
if (memcmp(&skb_sig, &seq_id, sizeof(seq_id)))
continue;
@@ -2910,7 +2926,8 @@ static bool lan8814_match_skb(struct kszphy_ptp_priv *ptp_priv,
spin_lock_irqsave(&ptp_priv->rx_queue.lock, flags);
skb_queue_walk_safe(&ptp_priv->rx_queue, skb, skb_tmp) {
- lan8814_get_sig_rx(skb, &skb_sig);
+ if (!lan8814_get_sig_rx(skb, &skb_sig))
+ continue;
if (memcmp(&skb_sig, &rx_ts->seq_id, sizeof(rx_ts->seq_id)))
continue;
diff --git a/drivers/net/phy/qcom/at803x.c b/drivers/net/phy/qcom/at803x.c
index 4717c59d51d042..e79657f76bea23 100644
--- a/drivers/net/phy/qcom/at803x.c
+++ b/drivers/net/phy/qcom/at803x.c
@@ -797,7 +797,7 @@ static int at8031_parse_dt(struct phy_device *phydev)
static int at8031_probe(struct phy_device *phydev)
{
- struct at803x_priv *priv = phydev->priv;
+ struct at803x_priv *priv;
int mode_cfg;
int ccr;
int ret;
@@ -806,6 +806,8 @@ static int at8031_probe(struct phy_device *phydev)
if (ret)
return ret;
+ priv = phydev->priv;
+
/* Only supported on AR8031/AR8033, the AR8030/AR8035 use strapping
* options.
*/
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 0b3f21cba552f2..92da8c03d960c9 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -2125,14 +2125,16 @@ static ssize_t tun_put_user(struct tun_struct *tun,
tun_is_little_endian(tun), true,
vlan_hlen)) {
struct skb_shared_info *sinfo = skb_shinfo(skb);
- pr_err("unexpected GSO type: "
- "0x%x, gso_size %d, hdr_len %d\n",
- sinfo->gso_type, tun16_to_cpu(tun, gso.gso_size),
- tun16_to_cpu(tun, gso.hdr_len));
- print_hex_dump(KERN_ERR, "tun: ",
- DUMP_PREFIX_NONE,
- 16, 1, skb->head,
- min((int)tun16_to_cpu(tun, gso.hdr_len), 64), true);
+
+ if (net_ratelimit()) {
+ netdev_err(tun->dev, "unexpected GSO type: 0x%x, gso_size %d, hdr_len %d\n",
+ sinfo->gso_type, tun16_to_cpu(tun, gso.gso_size),
+ tun16_to_cpu(tun, gso.hdr_len));
+ print_hex_dump(KERN_ERR, "tun: ",
+ DUMP_PREFIX_NONE,
+ 16, 1, skb->head,
+ min((int)tun16_to_cpu(tun, gso.hdr_len), 64), true);
+ }
WARN_ON_ONCE(1);
return -EINVAL;
}
diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c
index 88e084534853dd..752f821a19901f 100644
--- a/drivers/net/usb/ax88179_178a.c
+++ b/drivers/net/usb/ax88179_178a.c
@@ -1273,6 +1273,8 @@ static void ax88179_get_mac_addr(struct usbnet *dev)
if (is_valid_ether_addr(mac)) {
eth_hw_addr_set(dev->net, mac);
+ if (!is_local_ether_addr(mac))
+ dev->net->addr_assign_type = NET_ADDR_PERM;
} else {
netdev_info(dev->net, "invalid MAC address, using random\n");
eth_hw_addr_random(dev->net);
@@ -1315,6 +1317,8 @@ static int ax88179_bind(struct usbnet *dev, struct usb_interface *intf)
netif_set_tso_max_size(dev->net, 16384);
+ ax88179_reset(dev);
+
return 0;
}
@@ -1693,7 +1697,6 @@ static const struct driver_info ax88179_info = {
.unbind = ax88179_unbind,
.status = ax88179_status,
.link_reset = ax88179_link_reset,
- .reset = ax88179_reset,
.stop = ax88179_stop,
.flags = FLAG_ETHER | FLAG_FRAMING_AX,
.rx_fixup = ax88179_rx_fixup,
@@ -1706,7 +1709,6 @@ static const struct driver_info ax88178a_info = {
.unbind = ax88179_unbind,
.status = ax88179_status,
.link_reset = ax88179_link_reset,
- .reset = ax88179_reset,
.stop = ax88179_stop,
.flags = FLAG_ETHER | FLAG_FRAMING_AX,
.rx_fixup = ax88179_rx_fixup,
diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index e2e181378f4124..edc34402e787f9 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -1431,6 +1431,7 @@ static const struct usb_device_id products[] = {
{QMI_FIXED_INTF(0x2692, 0x9025, 4)}, /* Cellient MPL200 (rebranded Qualcomm 05c6:9025) */
{QMI_QUIRK_SET_DTR(0x1546, 0x1312, 4)}, /* u-blox LARA-R6 01B */
{QMI_QUIRK_SET_DTR(0x1546, 0x1342, 4)}, /* u-blox LARA-L6 */
+ {QMI_QUIRK_SET_DTR(0x33f8, 0x0104, 4)}, /* Rolling RW101 RMNET */
/* 4. Gobi 1000 devices */
{QMI_GOBI1K_DEVICE(0x05c6, 0x9212)}, /* Acer Gobi Modem Device */
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index c22d1118a13333..115c3c5414f2a7 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -3807,6 +3807,7 @@ static int virtnet_set_rxfh(struct net_device *dev,
struct netlink_ext_ack *extack)
{
struct virtnet_info *vi = netdev_priv(dev);
+ bool update = false;
int i;
if (rxfh->hfunc != ETH_RSS_HASH_NO_CHANGE &&
@@ -3814,13 +3815,28 @@ static int virtnet_set_rxfh(struct net_device *dev,
return -EOPNOTSUPP;
if (rxfh->indir) {
+ if (!vi->has_rss)
+ return -EOPNOTSUPP;
+
for (i = 0; i < vi->rss_indir_table_size; ++i)
vi->ctrl->rss.indirection_table[i] = rxfh->indir[i];
+ update = true;
}
- if (rxfh->key)
+
+ if (rxfh->key) {
+ /* If either _F_HASH_REPORT or _F_RSS are negotiated, the
+ * device provides hash calculation capabilities, that is,
+ * hash_key is configured.
+ */
+ if (!vi->has_rss && !vi->has_rss_hash_report)
+ return -EOPNOTSUPP;
+
memcpy(vi->ctrl->rss.key, rxfh->key, vi->rss_key_size);
+ update = true;
+ }
- virtnet_commit_rss_command(vi);
+ if (update)
+ virtnet_commit_rss_command(vi);
return 0;
}
@@ -4729,13 +4745,15 @@ static int virtnet_probe(struct virtio_device *vdev)
if (virtio_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT))
vi->has_rss_hash_report = true;
- if (virtio_has_feature(vdev, VIRTIO_NET_F_RSS))
+ if (virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) {
vi->has_rss = true;
- if (vi->has_rss || vi->has_rss_hash_report) {
vi->rss_indir_table_size =
virtio_cread16(vdev, offsetof(struct virtio_net_config,
rss_max_indirection_table_length));
+ }
+
+ if (vi->has_rss || vi->has_rss_hash_report) {
vi->rss_key_size =
virtio_cread8(vdev, offsetof(struct virtio_net_config, rss_max_key_size));
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
index db6d7013df6654..c3bdf433d8f7b3 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
+++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
@@ -3081,8 +3081,6 @@ static void iwl_fw_dbg_collect_sync(struct iwl_fw_runtime *fwrt, u8 wk_idx)
struct iwl_fw_dbg_params params = {0};
struct iwl_fwrt_dump_data *dump_data =
&fwrt->dump.wks[wk_idx].dump_data;
- u32 policy;
- u32 time_point;
if (!test_bit(wk_idx, &fwrt->dump.active_wks))
return;
@@ -3113,13 +3111,16 @@ static void iwl_fw_dbg_collect_sync(struct iwl_fw_runtime *fwrt, u8 wk_idx)
iwl_fw_dbg_stop_restart_recording(fwrt, &params, false);
- policy = le32_to_cpu(dump_data->trig->apply_policy);
- time_point = le32_to_cpu(dump_data->trig->time_point);
+ if (iwl_trans_dbg_ini_valid(fwrt->trans)) {
+ u32 policy = le32_to_cpu(dump_data->trig->apply_policy);
+ u32 time_point = le32_to_cpu(dump_data->trig->time_point);
- if (policy & IWL_FW_INI_APPLY_POLICY_DUMP_COMPLETE_CMD) {
- IWL_DEBUG_FW_INFO(fwrt, "WRT: sending dump complete\n");
- iwl_send_dbg_dump_complete_cmd(fwrt, time_point, 0);
+ if (policy & IWL_FW_INI_APPLY_POLICY_DUMP_COMPLETE_CMD) {
+ IWL_DEBUG_FW_INFO(fwrt, "WRT: sending dump complete\n");
+ iwl_send_dbg_dump_complete_cmd(fwrt, time_point, 0);
+ }
}
+
if (fwrt->trans->dbg.last_tp_resetfw == IWL_FW_INI_RESET_FW_MODE_STOP_FW_ONLY)
iwl_force_nmi(fwrt->trans);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
index 553c6fffc7c66d..52518a47554e70 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
@@ -1260,15 +1260,15 @@ static int __iwl_mvm_suspend(struct ieee80211_hw *hw,
if (IS_ERR_OR_NULL(vif))
return 1;
- if (ieee80211_vif_is_mld(vif) && vif->cfg.assoc) {
+ if (hweight16(vif->active_links) > 1) {
/*
- * Select the 'best' link. May need to revisit, it seems
- * better to not optimize for throughput but rather range,
- * reliability and power here - and select 2.4 GHz ...
+ * Select the 'best' link.
+ * May need to revisit, it seems better to not optimize
+ * for throughput but rather range, reliability and
+ * power here - and select 2.4 GHz ...
*/
- primary_link =
- iwl_mvm_mld_get_primary_link(mvm, vif,
- vif->active_links);
+ primary_link = iwl_mvm_mld_get_primary_link(mvm, vif,
+ vif->active_links);
if (WARN_ONCE(primary_link < 0, "no primary link in 0x%x\n",
vif->active_links))
@@ -1277,6 +1277,8 @@ static int __iwl_mvm_suspend(struct ieee80211_hw *hw,
ret = ieee80211_set_active_links(vif, BIT(primary_link));
if (ret)
return ret;
+ } else if (vif->active_links) {
+ primary_link = __ffs(vif->active_links);
} else {
primary_link = 0;
}
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c
index 51b01f7528beec..7fe57ecd0682b8 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c
@@ -748,7 +748,9 @@ void iwl_mvm_vif_dbgfs_add_link(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
{
struct dentry *dbgfs_dir = vif->debugfs_dir;
struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
- char buf[100];
+ char buf[3 * 3 + 11 + (NL80211_WIPHY_NAME_MAXLEN + 1) +
+ (7 + IFNAMSIZ + 1) + 6 + 1];
+ char name[7 + IFNAMSIZ + 1];
/* this will happen in monitor mode */
if (!dbgfs_dir)
@@ -761,10 +763,11 @@ void iwl_mvm_vif_dbgfs_add_link(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
* find
* netdev:wlan0 -> ../../../ieee80211/phy0/netdev:wlan0/iwlmvm/
*/
- snprintf(buf, 100, "../../../%pd3/iwlmvm", dbgfs_dir);
+ snprintf(name, sizeof(name), "%pd", dbgfs_dir);
+ snprintf(buf, sizeof(buf), "../../../%pd3/iwlmvm", dbgfs_dir);
- mvmvif->dbgfs_slink = debugfs_create_symlink(dbgfs_dir->d_name.name,
- mvm->debugfs_dir, buf);
+ mvmvif->dbgfs_slink =
+ debugfs_create_symlink(name, mvm->debugfs_dir, buf);
}
void iwl_mvm_vif_dbgfs_rm_link(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/link.c b/drivers/net/wireless/intel/iwlwifi/mvm/link.c
index f13f13e6b71af1..9f69e04594e49c 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/link.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/link.c
@@ -46,6 +46,27 @@ static int iwl_mvm_link_cmd_send(struct iwl_mvm *mvm,
return ret;
}
+int iwl_mvm_set_link_mapping(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
+ struct ieee80211_bss_conf *link_conf)
+{
+ struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
+ struct iwl_mvm_vif_link_info *link_info =
+ mvmvif->link[link_conf->link_id];
+
+ if (link_info->fw_link_id == IWL_MVM_FW_LINK_ID_INVALID) {
+ link_info->fw_link_id = iwl_mvm_get_free_fw_link_id(mvm,
+ mvmvif);
+ if (link_info->fw_link_id >=
+ ARRAY_SIZE(mvm->link_id_to_link_conf))
+ return -EINVAL;
+
+ rcu_assign_pointer(mvm->link_id_to_link_conf[link_info->fw_link_id],
+ link_conf);
+ }
+
+ return 0;
+}
+
int iwl_mvm_add_link(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
struct ieee80211_bss_conf *link_conf)
{
@@ -55,19 +76,14 @@ int iwl_mvm_add_link(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
struct iwl_link_config_cmd cmd = {};
unsigned int cmd_id = WIDE_ID(MAC_CONF_GROUP, LINK_CONFIG_CMD);
u8 cmd_ver = iwl_fw_lookup_cmd_ver(mvm->fw, cmd_id, 1);
+ int ret;
if (WARN_ON_ONCE(!link_info))
return -EINVAL;
- if (link_info->fw_link_id == IWL_MVM_FW_LINK_ID_INVALID) {
- link_info->fw_link_id = iwl_mvm_get_free_fw_link_id(mvm,
- mvmvif);
- if (link_info->fw_link_id >= ARRAY_SIZE(mvm->link_id_to_link_conf))
- return -EINVAL;
-
- rcu_assign_pointer(mvm->link_id_to_link_conf[link_info->fw_link_id],
- link_conf);
- }
+ ret = iwl_mvm_set_link_mapping(mvm, vif, link_conf);
+ if (ret)
+ return ret;
/* Update SF - Disable if needed. if this fails, SF might still be on
* while many macs are bound, which is forbidden - so fail the binding.
@@ -248,6 +264,24 @@ send_cmd:
return ret;
}
+int iwl_mvm_unset_link_mapping(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
+ struct ieee80211_bss_conf *link_conf)
+{
+ struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
+ struct iwl_mvm_vif_link_info *link_info =
+ mvmvif->link[link_conf->link_id];
+
+ /* mac80211 thought we have the link, but it was never configured */
+ if (WARN_ON(!link_info ||
+ link_info->fw_link_id >=
+ ARRAY_SIZE(mvm->link_id_to_link_conf)))
+ return -EINVAL;
+
+ RCU_INIT_POINTER(mvm->link_id_to_link_conf[link_info->fw_link_id],
+ NULL);
+ return 0;
+}
+
int iwl_mvm_remove_link(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
struct ieee80211_bss_conf *link_conf)
{
@@ -257,13 +291,10 @@ int iwl_mvm_remove_link(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
struct iwl_link_config_cmd cmd = {};
int ret;
- /* mac80211 thought we have the link, but it was never configured */
- if (WARN_ON(!link_info ||
- link_info->fw_link_id >= ARRAY_SIZE(mvm->link_id_to_link_conf)))
+ ret = iwl_mvm_unset_link_mapping(mvm, vif, link_conf);
+ if (ret)
return 0;
- RCU_INIT_POINTER(mvm->link_id_to_link_conf[link_info->fw_link_id],
- NULL);
cmd.link_id = cpu_to_le32(link_info->fw_link_id);
iwl_mvm_release_fw_link_id(mvm, link_info->fw_link_id);
link_info->fw_link_id = IWL_MVM_FW_LINK_ID_INVALID;
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
index 1935630d3def00..8f4b063d6243ed 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
@@ -360,7 +360,7 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm)
if (mvm->mld_api_is_used && mvm->nvm_data->sku_cap_11be_enable &&
!iwlwifi_mod_params.disable_11ax &&
!iwlwifi_mod_params.disable_11be)
- hw->wiphy->flags |= WIPHY_FLAG_SUPPORTS_MLO;
+ hw->wiphy->flags |= WIPHY_FLAG_DISABLE_WEXT;
/* With MLD FW API, it tracks timing by itself,
* no need for any timing from the host
@@ -1577,8 +1577,14 @@ static int iwl_mvm_mac_add_interface(struct ieee80211_hw *hw,
mvmvif->mvm = mvm;
/* the first link always points to the default one */
+ mvmvif->deflink.fw_link_id = IWL_MVM_FW_LINK_ID_INVALID;
+ mvmvif->deflink.active = 0;
mvmvif->link[0] = &mvmvif->deflink;
+ ret = iwl_mvm_set_link_mapping(mvm, vif, &vif->bss_conf);
+ if (ret)
+ goto out;
+
/*
* Not much to do here. The stack will not allow interface
* types or combinations that we didn't advertise, so we
@@ -1783,6 +1789,7 @@ static void iwl_mvm_mac_remove_interface(struct ieee80211_hw *hw,
mvm->p2p_device_vif = NULL;
}
+ iwl_mvm_unset_link_mapping(mvm, vif, &vif->bss_conf);
iwl_mvm_mac_ctxt_remove(mvm, vif);
RCU_INIT_POINTER(mvm->vif_id_to_mac[mvmvif->id], NULL);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mld-sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/mld-sta.c
index 1628bf55458fcb..23e64a757cfe86 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mld-sta.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mld-sta.c
@@ -855,10 +855,15 @@ int iwl_mvm_mld_rm_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
int iwl_mvm_mld_rm_sta_id(struct iwl_mvm *mvm, u8 sta_id)
{
- int ret = iwl_mvm_mld_rm_sta_from_fw(mvm, sta_id);
+ int ret;
lockdep_assert_held(&mvm->mutex);
+ if (WARN_ON(sta_id == IWL_MVM_INVALID_STA))
+ return 0;
+
+ ret = iwl_mvm_mld_rm_sta_from_fw(mvm, sta_id);
+
RCU_INIT_POINTER(mvm->fw_id_to_mac_id[sta_id], NULL);
RCU_INIT_POINTER(mvm->fw_id_to_link_sta[sta_id], NULL);
return ret;
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
index 44571114fb154b..f0b24f00938bd5 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
@@ -1916,11 +1916,15 @@ int iwl_mvm_binding_remove_vif(struct iwl_mvm *mvm, struct ieee80211_vif *vif);
u32 iwl_mvm_get_lmac_id(struct iwl_mvm *mvm, enum nl80211_band band);
/* Links */
+int iwl_mvm_set_link_mapping(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
+ struct ieee80211_bss_conf *link_conf);
int iwl_mvm_add_link(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
struct ieee80211_bss_conf *link_conf);
int iwl_mvm_link_changed(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
struct ieee80211_bss_conf *link_conf,
u32 changes, bool active);
+int iwl_mvm_unset_link_mapping(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
+ struct ieee80211_bss_conf *link_conf);
int iwl_mvm_remove_link(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
struct ieee80211_bss_conf *link_conf);
int iwl_mvm_disable_link(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rfi.c b/drivers/net/wireless/intel/iwlwifi/mvm/rfi.c
index 2ecd32bed752ff..045c862a8fc4fc 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/rfi.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/rfi.c
@@ -132,14 +132,18 @@ struct iwl_rfi_freq_table_resp_cmd *iwl_rfi_get_freq_table(struct iwl_mvm *mvm)
if (ret)
return ERR_PTR(ret);
- if (WARN_ON_ONCE(iwl_rx_packet_payload_len(cmd.resp_pkt) != resp_size))
+ if (WARN_ON_ONCE(iwl_rx_packet_payload_len(cmd.resp_pkt) !=
+ resp_size)) {
+ iwl_free_resp(&cmd);
return ERR_PTR(-EIO);
+ }
resp = kmemdup(cmd.resp_pkt->data, resp_size, GFP_KERNEL);
+ iwl_free_resp(&cmd);
+
if (!resp)
return ERR_PTR(-ENOMEM);
- iwl_free_resp(&cmd);
return resp;
}
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
index 1484eaedf45292..ce8d83c771a70d 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
@@ -236,21 +236,13 @@ static void iwl_mvm_add_rtap_sniffer_config(struct iwl_mvm *mvm,
static void iwl_mvm_pass_packet_to_mac80211(struct iwl_mvm *mvm,
struct napi_struct *napi,
struct sk_buff *skb, int queue,
- struct ieee80211_sta *sta,
- struct ieee80211_link_sta *link_sta)
+ struct ieee80211_sta *sta)
{
if (unlikely(iwl_mvm_check_pn(mvm, skb, queue, sta))) {
kfree_skb(skb);
return;
}
- if (sta && sta->valid_links && link_sta) {
- struct ieee80211_rx_status *rx_status = IEEE80211_SKB_RXCB(skb);
-
- rx_status->link_valid = 1;
- rx_status->link_id = link_sta->link_id;
- }
-
ieee80211_rx_napi(mvm->hw, sta, skb, napi);
}
@@ -588,7 +580,7 @@ static void iwl_mvm_release_frames(struct iwl_mvm *mvm,
while ((skb = __skb_dequeue(skb_list))) {
iwl_mvm_pass_packet_to_mac80211(mvm, napi, skb,
reorder_buf->queue,
- sta, NULL /* FIXME */);
+ sta);
reorder_buf->num_stored--;
}
}
@@ -2213,6 +2205,11 @@ void iwl_mvm_rx_mpdu_mq(struct iwl_mvm *mvm, struct napi_struct *napi,
if (IS_ERR(sta))
sta = NULL;
link_sta = rcu_dereference(mvm->fw_id_to_link_sta[id]);
+
+ if (sta && sta->valid_links && link_sta) {
+ rx_status->link_valid = 1;
+ rx_status->link_id = link_sta->link_id;
+ }
}
} else if (!is_multicast_ether_addr(hdr->addr2)) {
/*
@@ -2356,8 +2353,7 @@ void iwl_mvm_rx_mpdu_mq(struct iwl_mvm *mvm, struct napi_struct *napi,
!(desc->amsdu_info & IWL_RX_MPDU_AMSDU_LAST_SUBFRAME))
rx_status->flag |= RX_FLAG_AMSDU_MORE;
- iwl_mvm_pass_packet_to_mac80211(mvm, napi, skb, queue, sta,
- link_sta);
+ iwl_mvm_pass_packet_to_mac80211(mvm, napi, skb, queue, sta);
}
out:
rcu_read_unlock();
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c
index a59d264a11c52f..ad960faceb0d8f 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c
@@ -879,9 +879,8 @@ void iwl_mvm_rx_session_protect_notif(struct iwl_mvm *mvm,
struct iwl_rx_packet *pkt = rxb_addr(rxb);
struct iwl_mvm_session_prot_notif *notif = (void *)pkt->data;
unsigned int ver =
- iwl_fw_lookup_cmd_ver(mvm->fw,
- WIDE_ID(MAC_CONF_GROUP,
- SESSION_PROTECTION_CMD), 2);
+ iwl_fw_lookup_notif_ver(mvm->fw, MAC_CONF_GROUP,
+ SESSION_PROTECTION_NOTIF, 2);
int id = le32_to_cpu(notif->mac_link_id);
struct ieee80211_vif *vif;
struct iwl_mvm_vif *mvmvif;
diff --git a/drivers/net/wireless/intel/iwlwifi/queue/tx.c b/drivers/net/wireless/intel/iwlwifi/queue/tx.c
index 33973a60d0bf41..6229c785c84576 100644
--- a/drivers/net/wireless/intel/iwlwifi/queue/tx.c
+++ b/drivers/net/wireless/intel/iwlwifi/queue/tx.c
@@ -1589,9 +1589,9 @@ void iwl_txq_reclaim(struct iwl_trans *trans, int txq_id, int ssn,
return;
tfd_num = iwl_txq_get_cmd_index(txq, ssn);
- read_ptr = iwl_txq_get_cmd_index(txq, txq->read_ptr);
spin_lock_bh(&txq->lock);
+ read_ptr = iwl_txq_get_cmd_index(txq, txq->read_ptr);
if (!test_bit(txq_id, trans->txqs.queue_used)) {
IWL_DEBUG_TX_QUEUES(trans, "Q %d inactive - ignoring idx %d\n",
diff --git a/drivers/net/wireless/realtek/rtw89/rtw8922a.c b/drivers/net/wireless/realtek/rtw89/rtw8922a.c
index 367459bd134574..708132d5be2a6a 100644
--- a/drivers/net/wireless/realtek/rtw89/rtw8922a.c
+++ b/drivers/net/wireless/realtek/rtw89/rtw8922a.c
@@ -2233,7 +2233,7 @@ static void rtw8922a_btc_init_cfg(struct rtw89_dev *rtwdev)
* Shared-Ant && BTG-path:WL mask(0x55f), others:WL THRU(0x5ff)
*/
if (btc->ant_type == BTC_ANT_SHARED && btc->btg_pos == path)
- rtw8922a_set_trx_mask(rtwdev, path, BTC_BT_TX_GROUP, 0x5ff);
+ rtw8922a_set_trx_mask(rtwdev, path, BTC_BT_TX_GROUP, 0x55f);
else
rtw8922a_set_trx_mask(rtwdev, path, BTC_BT_TX_GROUP, 0x5ff);
diff --git a/drivers/net/wwan/t7xx/t7xx_cldma.c b/drivers/net/wwan/t7xx/t7xx_cldma.c
index 9f43f256db1d06..f0a4783baf1f32 100644
--- a/drivers/net/wwan/t7xx/t7xx_cldma.c
+++ b/drivers/net/wwan/t7xx/t7xx_cldma.c
@@ -106,7 +106,7 @@ bool t7xx_cldma_tx_addr_is_set(struct t7xx_cldma_hw *hw_info, unsigned int qno)
{
u32 offset = REG_CLDMA_UL_START_ADDRL_0 + qno * ADDR_SIZE;
- return ioread64(hw_info->ap_pdn_base + offset);
+ return ioread64_lo_hi(hw_info->ap_pdn_base + offset);
}
void t7xx_cldma_hw_set_start_addr(struct t7xx_cldma_hw *hw_info, unsigned int qno, u64 address,
@@ -117,7 +117,7 @@ void t7xx_cldma_hw_set_start_addr(struct t7xx_cldma_hw *hw_info, unsigned int qn
reg = tx_rx == MTK_RX ? hw_info->ap_ao_base + REG_CLDMA_DL_START_ADDRL_0 :
hw_info->ap_pdn_base + REG_CLDMA_UL_START_ADDRL_0;
- iowrite64(address, reg + offset);
+ iowrite64_lo_hi(address, reg + offset);
}
void t7xx_cldma_hw_resume_queue(struct t7xx_cldma_hw *hw_info, unsigned int qno,
diff --git a/drivers/net/wwan/t7xx/t7xx_hif_cldma.c b/drivers/net/wwan/t7xx/t7xx_hif_cldma.c
index abc41a7089fa4f..97163e1e5783ed 100644
--- a/drivers/net/wwan/t7xx/t7xx_hif_cldma.c
+++ b/drivers/net/wwan/t7xx/t7xx_hif_cldma.c
@@ -137,8 +137,9 @@ static int t7xx_cldma_gpd_rx_from_q(struct cldma_queue *queue, int budget, bool
return -ENODEV;
}
- gpd_addr = ioread64(hw_info->ap_pdn_base + REG_CLDMA_DL_CURRENT_ADDRL_0 +
- queue->index * sizeof(u64));
+ gpd_addr = ioread64_lo_hi(hw_info->ap_pdn_base +
+ REG_CLDMA_DL_CURRENT_ADDRL_0 +
+ queue->index * sizeof(u64));
if (req->gpd_addr == gpd_addr || hwo_polling_count++ >= 100)
return 0;
@@ -316,8 +317,8 @@ static void t7xx_cldma_txq_empty_hndl(struct cldma_queue *queue)
struct t7xx_cldma_hw *hw_info = &md_ctrl->hw_info;
/* Check current processing TGPD, 64-bit address is in a table by Q index */
- ul_curr_addr = ioread64(hw_info->ap_pdn_base + REG_CLDMA_UL_CURRENT_ADDRL_0 +
- queue->index * sizeof(u64));
+ ul_curr_addr = ioread64_lo_hi(hw_info->ap_pdn_base + REG_CLDMA_UL_CURRENT_ADDRL_0 +
+ queue->index * sizeof(u64));
if (req->gpd_addr != ul_curr_addr) {
spin_unlock_irqrestore(&md_ctrl->cldma_lock, flags);
dev_err(md_ctrl->dev, "CLDMA%d queue %d is not empty\n",
diff --git a/drivers/net/wwan/t7xx/t7xx_pcie_mac.c b/drivers/net/wwan/t7xx/t7xx_pcie_mac.c
index 76da4c15e3de17..f071ec7ff23d50 100644
--- a/drivers/net/wwan/t7xx/t7xx_pcie_mac.c
+++ b/drivers/net/wwan/t7xx/t7xx_pcie_mac.c
@@ -75,7 +75,7 @@ static void t7xx_pcie_mac_atr_tables_dis(void __iomem *pbase, enum t7xx_atr_src_
for (i = 0; i < ATR_TABLE_NUM_PER_ATR; i++) {
offset = ATR_PORT_OFFSET * port + ATR_TABLE_OFFSET * i;
reg = pbase + ATR_PCIE_WIN0_T0_ATR_PARAM_SRC_ADDR + offset;
- iowrite64(0, reg);
+ iowrite64_lo_hi(0, reg);
}
}
@@ -112,17 +112,17 @@ static int t7xx_pcie_mac_atr_cfg(struct t7xx_pci_dev *t7xx_dev, struct t7xx_atr_
reg = pbase + ATR_PCIE_WIN0_T0_TRSL_ADDR + offset;
value = cfg->trsl_addr & ATR_PCIE_WIN0_ADDR_ALGMT;
- iowrite64(value, reg);
+ iowrite64_lo_hi(value, reg);
reg = pbase + ATR_PCIE_WIN0_T0_TRSL_PARAM + offset;
iowrite32(cfg->trsl_id, reg);
reg = pbase + ATR_PCIE_WIN0_T0_ATR_PARAM_SRC_ADDR + offset;
value = (cfg->src_addr & ATR_PCIE_WIN0_ADDR_ALGMT) | (atr_size << 1) | BIT(0);
- iowrite64(value, reg);
+ iowrite64_lo_hi(value, reg);
/* Ensure ATR is set */
- ioread64(reg);
+ ioread64_lo_hi(reg);
return 0;
}
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index ad29f370034e4f..8d2aee88526c69 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -285,6 +285,7 @@ static struct sk_buff *xennet_alloc_one_rx_buffer(struct netfront_queue *queue)
return NULL;
}
skb_add_rx_frag(skb, 0, page, 0, 0, PAGE_SIZE);
+ skb_mark_for_recycle(skb);
/* Align ip header to a 16 bytes boundary */
skb_reserve(skb, NET_IP_ALIGN);
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 943d72bdd794ca..27281a9a8951db 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2076,6 +2076,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
bool vwc = ns->ctrl->vwc & NVME_CTRL_VWC_PRESENT;
struct queue_limits lim;
struct nvme_id_ns_nvm *nvm = NULL;
+ struct nvme_zone_info zi = {};
struct nvme_id_ns *id;
sector_t capacity;
unsigned lbaf;
@@ -2088,9 +2089,10 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
if (id->ncap == 0) {
/* namespace not allocated or attached */
info->is_removed = true;
- ret = -ENODEV;
+ ret = -ENXIO;
goto out;
}
+ lbaf = nvme_lbaf_index(id->flbas);
if (ns->ctrl->ctratt & NVME_CTRL_ATTR_ELBAS) {
ret = nvme_identify_ns_nvm(ns->ctrl, info->nsid, &nvm);
@@ -2098,8 +2100,14 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
goto out;
}
+ if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
+ ns->head->ids.csi == NVME_CSI_ZNS) {
+ ret = nvme_query_zone_info(ns, lbaf, &zi);
+ if (ret < 0)
+ goto out;
+ }
+
blk_mq_freeze_queue(ns->disk->queue);
- lbaf = nvme_lbaf_index(id->flbas);
ns->head->lba_shift = id->lbaf[lbaf].ds;
ns->head->nuse = le64_to_cpu(id->nuse);
capacity = nvme_lba_to_sect(ns->head, le64_to_cpu(id->nsze));
@@ -2112,13 +2120,8 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
capacity = 0;
nvme_config_discard(ns, &lim);
if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
- ns->head->ids.csi == NVME_CSI_ZNS) {
- ret = nvme_update_zone_info(ns, lbaf, &lim);
- if (ret) {
- blk_mq_unfreeze_queue(ns->disk->queue);
- goto out;
- }
- }
+ ns->head->ids.csi == NVME_CSI_ZNS)
+ nvme_update_zone_info(ns, &lim, &zi);
ret = queue_limits_commit_update(ns->disk->queue, &lim);
if (ret) {
blk_mq_unfreeze_queue(ns->disk->queue);
@@ -2201,6 +2204,7 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_ns_info *info)
}
if (!ret && nvme_ns_head_multipath(ns->head)) {
+ struct queue_limits *ns_lim = &ns->disk->queue->limits;
struct queue_limits lim;
blk_mq_freeze_queue(ns->head->disk->queue);
@@ -2212,7 +2216,26 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_ns_info *info)
set_disk_ro(ns->head->disk, nvme_ns_is_readonly(ns, info));
nvme_mpath_revalidate_paths(ns);
+ /*
+ * queue_limits mixes values that are the hardware limitations
+ * for bio splitting with what is the device configuration.
+ *
+ * For NVMe the device configuration can change after e.g. a
+ * Format command, and we really want to pick up the new format
+ * value here. But we must still stack the queue limits to the
+ * least common denominator for multipathing to split the bios
+ * properly.
+ *
+ * To work around this, we explicitly set the device
+ * configuration to those that we just queried, but only stack
+ * the splitting limits in to make sure we still obey possibly
+ * lower limitations of other controllers.
+ */
lim = queue_limits_start_update(ns->head->disk->queue);
+ lim.logical_block_size = ns_lim->logical_block_size;
+ lim.physical_block_size = ns_lim->physical_block_size;
+ lim.io_min = ns_lim->io_min;
+ lim.io_opt = ns_lim->io_opt;
queue_limits_stack_bdev(&lim, ns->disk->part0, 0,
ns->head->disk->disk_name);
ret = queue_limits_commit_update(ns->head->disk->queue, &lim);
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 68a5d971657bb5..a5b29e9ad342df 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -2428,7 +2428,7 @@ nvme_fc_ctrl_get(struct nvme_fc_ctrl *ctrl)
* controller. Called after last nvme_put_ctrl() call
*/
static void
-nvme_fc_nvme_ctrl_freed(struct nvme_ctrl *nctrl)
+nvme_fc_free_ctrl(struct nvme_ctrl *nctrl)
{
struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
@@ -3384,7 +3384,7 @@ static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = {
.reg_read32 = nvmf_reg_read32,
.reg_read64 = nvmf_reg_read64,
.reg_write32 = nvmf_reg_write32,
- .free_ctrl = nvme_fc_nvme_ctrl_freed,
+ .free_ctrl = nvme_fc_free_ctrl,
.submit_async_event = nvme_fc_submit_async_event,
.delete_ctrl = nvme_fc_delete_ctrl,
.get_address = nvmf_get_address,
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 24193fcb8bd584..d0ed64dc7380e5 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -1036,10 +1036,18 @@ static inline bool nvme_disk_is_ns_head(struct gendisk *disk)
}
#endif /* CONFIG_NVME_MULTIPATH */
+struct nvme_zone_info {
+ u64 zone_size;
+ unsigned int max_open_zones;
+ unsigned int max_active_zones;
+};
+
int nvme_ns_report_zones(struct nvme_ns *ns, sector_t sector,
unsigned int nr_zones, report_zones_cb cb, void *data);
-int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf,
- struct queue_limits *lim);
+int nvme_query_zone_info(struct nvme_ns *ns, unsigned lbaf,
+ struct nvme_zone_info *zi);
+void nvme_update_zone_info(struct nvme_ns *ns, struct queue_limits *lim,
+ struct nvme_zone_info *zi);
#ifdef CONFIG_BLK_DEV_ZONED
blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns, struct request *req,
struct nvme_command *cmnd,
diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c
index 722384bcc765cd..77aa0f440a6d2a 100644
--- a/drivers/nvme/host/zns.c
+++ b/drivers/nvme/host/zns.c
@@ -35,8 +35,8 @@ static int nvme_set_max_append(struct nvme_ctrl *ctrl)
return 0;
}
-int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf,
- struct queue_limits *lim)
+int nvme_query_zone_info(struct nvme_ns *ns, unsigned lbaf,
+ struct nvme_zone_info *zi)
{
struct nvme_effects_log *log = ns->head->effects;
struct nvme_command c = { };
@@ -89,27 +89,34 @@ int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf,
goto free_data;
}
- ns->head->zsze =
- nvme_lba_to_sect(ns->head, le64_to_cpu(id->lbafe[lbaf].zsze));
- if (!is_power_of_2(ns->head->zsze)) {
+ zi->zone_size = le64_to_cpu(id->lbafe[lbaf].zsze);
+ if (!is_power_of_2(zi->zone_size)) {
dev_warn(ns->ctrl->device,
- "invalid zone size:%llu for namespace:%u\n",
- ns->head->zsze, ns->head->ns_id);
+ "invalid zone size: %llu for namespace: %u\n",
+ zi->zone_size, ns->head->ns_id);
status = -ENODEV;
goto free_data;
}
+ zi->max_open_zones = le32_to_cpu(id->mor) + 1;
+ zi->max_active_zones = le32_to_cpu(id->mar) + 1;
- blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, ns->queue);
- lim->zoned = 1;
- lim->max_open_zones = le32_to_cpu(id->mor) + 1;
- lim->max_active_zones = le32_to_cpu(id->mar) + 1;
- lim->chunk_sectors = ns->head->zsze;
- lim->max_zone_append_sectors = ns->ctrl->max_zone_append;
free_data:
kfree(id);
return status;
}
+void nvme_update_zone_info(struct nvme_ns *ns, struct queue_limits *lim,
+ struct nvme_zone_info *zi)
+{
+ lim->zoned = 1;
+ lim->max_open_zones = zi->max_open_zones;
+ lim->max_active_zones = zi->max_active_zones;
+ lim->max_zone_append_sectors = ns->ctrl->max_zone_append;
+ lim->chunk_sectors = ns->head->zsze =
+ nvme_lba_to_sect(ns->head, zi->zone_size);
+ blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, ns->queue);
+}
+
static void *nvme_zns_alloc_report_buffer(struct nvme_ns *ns,
unsigned int nr_zones, size_t *buflen)
{
diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index 77a6e817b31596..a2325330bf2214 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -1613,6 +1613,11 @@ static struct config_group *nvmet_subsys_make(struct config_group *group,
return ERR_PTR(-EINVAL);
}
+ if (sysfs_streq(name, nvmet_disc_subsys->subsysnqn)) {
+ pr_err("can't create subsystem using unique discovery NQN\n");
+ return ERR_PTR(-EINVAL);
+ }
+
subsys = nvmet_subsys_alloc(name, NVME_NQN_NVME);
if (IS_ERR(subsys))
return ERR_CAST(subsys);
@@ -2159,7 +2164,49 @@ static const struct config_item_type nvmet_hosts_type = {
static struct config_group nvmet_hosts_group;
+static ssize_t nvmet_root_discovery_nqn_show(struct config_item *item,
+ char *page)
+{
+ return snprintf(page, PAGE_SIZE, "%s\n", nvmet_disc_subsys->subsysnqn);
+}
+
+static ssize_t nvmet_root_discovery_nqn_store(struct config_item *item,
+ const char *page, size_t count)
+{
+ struct list_head *entry;
+ size_t len;
+
+ len = strcspn(page, "\n");
+ if (!len || len > NVMF_NQN_FIELD_LEN - 1)
+ return -EINVAL;
+
+ down_write(&nvmet_config_sem);
+ list_for_each(entry, &nvmet_subsystems_group.cg_children) {
+ struct config_item *item =
+ container_of(entry, struct config_item, ci_entry);
+
+ if (!strncmp(config_item_name(item), page, len)) {
+ pr_err("duplicate NQN %s\n", config_item_name(item));
+ up_write(&nvmet_config_sem);
+ return -EINVAL;
+ }
+ }
+ memset(nvmet_disc_subsys->subsysnqn, 0, NVMF_NQN_FIELD_LEN);
+ memcpy(nvmet_disc_subsys->subsysnqn, page, len);
+ up_write(&nvmet_config_sem);
+
+ return len;
+}
+
+CONFIGFS_ATTR(nvmet_root_, discovery_nqn);
+
+static struct configfs_attribute *nvmet_root_attrs[] = {
+ &nvmet_root_attr_discovery_nqn,
+ NULL,
+};
+
static const struct config_item_type nvmet_root_type = {
+ .ct_attrs = nvmet_root_attrs,
.ct_owner = THIS_MODULE,
};
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 6bbe4df0166ca5..8860a3eb71ec89 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -1541,6 +1541,13 @@ static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port,
}
down_read(&nvmet_config_sem);
+ if (!strncmp(nvmet_disc_subsys->subsysnqn, subsysnqn,
+ NVMF_NQN_SIZE)) {
+ if (kref_get_unless_zero(&nvmet_disc_subsys->ref)) {
+ up_read(&nvmet_config_sem);
+ return nvmet_disc_subsys;
+ }
+ }
list_for_each_entry(p, &port->subsystems, entry) {
if (!strncmp(p->subsys->subsysnqn, subsysnqn,
NVMF_NQN_SIZE)) {
diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index fd229f310c931f..337ee1cb09ae64 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -1115,16 +1115,21 @@ nvmet_fc_schedule_delete_assoc(struct nvmet_fc_tgt_assoc *assoc)
}
static bool
-nvmet_fc_assoc_exits(struct nvmet_fc_tgtport *tgtport, u64 association_id)
+nvmet_fc_assoc_exists(struct nvmet_fc_tgtport *tgtport, u64 association_id)
{
struct nvmet_fc_tgt_assoc *a;
+ bool found = false;
+ rcu_read_lock();
list_for_each_entry_rcu(a, &tgtport->assoc_list, a_list) {
- if (association_id == a->association_id)
- return true;
+ if (association_id == a->association_id) {
+ found = true;
+ break;
+ }
}
+ rcu_read_unlock();
- return false;
+ return found;
}
static struct nvmet_fc_tgt_assoc *
@@ -1164,13 +1169,11 @@ nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport, void *hosthandle)
ran = ran << BYTES_FOR_QID_SHIFT;
spin_lock_irqsave(&tgtport->lock, flags);
- rcu_read_lock();
- if (!nvmet_fc_assoc_exits(tgtport, ran)) {
+ if (!nvmet_fc_assoc_exists(tgtport, ran)) {
assoc->association_id = ran;
list_add_tail_rcu(&assoc->a_list, &tgtport->assoc_list);
done = true;
}
- rcu_read_unlock();
spin_unlock_irqrestore(&tgtport->lock, flags);
} while (!done);
diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c
index 3bf27052832f30..4d57a4e3410546 100644
--- a/drivers/of/dynamic.c
+++ b/drivers/of/dynamic.c
@@ -9,6 +9,7 @@
#define pr_fmt(fmt) "OF: " fmt
+#include <linux/device.h>
#include <linux/of.h>
#include <linux/spinlock.h>
#include <linux/slab.h>
@@ -667,6 +668,17 @@ void of_changeset_destroy(struct of_changeset *ocs)
{
struct of_changeset_entry *ce, *cen;
+ /*
+ * When a device is deleted, the device links to/from it are also queued
+ * for deletion. Until these device links are freed, the devices
+ * themselves aren't freed. If the device being deleted is due to an
+ * overlay change, this device might be holding a reference to a device
+ * node that will be freed. So, wait until all already pending device
+ * links are deleted before freeing a device node. This ensures we don't
+ * free any device node that has a non-zero reference count.
+ */
+ device_link_wait_removal();
+
list_for_each_entry_safe_reverse(ce, cen, &ocs->entries, node)
__of_changeset_entry_destroy(ce);
}
diff --git a/drivers/of/module.c b/drivers/of/module.c
index 0e8aa974f0f2bb..f58e624953a20f 100644
--- a/drivers/of/module.c
+++ b/drivers/of/module.c
@@ -16,6 +16,14 @@ ssize_t of_modalias(const struct device_node *np, char *str, ssize_t len)
ssize_t csize;
ssize_t tsize;
+ /*
+ * Prevent a kernel oops in vsnprintf() -- it only allows passing a
+ * NULL ptr when the length is also 0. Also filter out the negative
+ * lengths...
+ */
+ if ((len > 0 && !str) || len < 0)
+ return -EINVAL;
+
/* Name & Type */
/* %p eats all alphanum characters, so %c must be used here */
csize = snprintf(str, len, "of:N%pOFn%c%s", np, 'T',
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index bf4833221816d4..eff7f5df08e27f 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -3766,14 +3766,6 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATHEROS, 0x003e, quirk_no_bus_reset);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_CAVIUM, 0xa100, quirk_no_bus_reset);
/*
- * Apparently the LSI / Agere FW643 can't recover after a Secondary Bus
- * Reset and requires a power-off or suspend/resume and rescan. Prevent
- * use of that reset.
- */
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATT, 0x5900, quirk_no_bus_reset);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATT, 0x5901, quirk_no_bus_reset);
-
-/*
* Some TI KeyStone C667X devices do not support bus/hot reset. The PCIESS
* automatically disables LTSSM when Secondary Bus Reset is received and
* the device stops working. Prevent bus reset for these devices. With
diff --git a/drivers/perf/riscv_pmu.c b/drivers/perf/riscv_pmu.c
index c78a6fd6c57f61..b4efdddb2ad91f 100644
--- a/drivers/perf/riscv_pmu.c
+++ b/drivers/perf/riscv_pmu.c
@@ -313,6 +313,10 @@ static int riscv_pmu_event_init(struct perf_event *event)
u64 event_config = 0;
uint64_t cmask;
+ /* driver does not support branch stack sampling */
+ if (has_branch_stack(event))
+ return -EOPNOTSUPP;
+
hwc->flags = 0;
mapped_event = rvpmu->event_map(event, &event_config);
if (mapped_event < 0) {
diff --git a/drivers/pinctrl/aspeed/Makefile b/drivers/pinctrl/aspeed/Makefile
index 489ea1778353f7..db2a7600ae2bde 100644
--- a/drivers/pinctrl/aspeed/Makefile
+++ b/drivers/pinctrl/aspeed/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0-only
# Aspeed pinctrl support
-ccflags-y += $(call cc-option,-Woverride-init)
+ccflags-y += -Woverride-init
obj-$(CONFIG_PINCTRL_ASPEED) += pinctrl-aspeed.o pinmux-aspeed.o
obj-$(CONFIG_PINCTRL_ASPEED_G4) += pinctrl-aspeed-g4.o
obj-$(CONFIG_PINCTRL_ASPEED_G5) += pinctrl-aspeed-g5.o
diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c
index 49f89b70dcecb4..7f66ec73199a9c 100644
--- a/drivers/pinctrl/pinctrl-amd.c
+++ b/drivers/pinctrl/pinctrl-amd.c
@@ -1159,7 +1159,7 @@ static int amd_gpio_probe(struct platform_device *pdev)
}
ret = devm_request_irq(&pdev->dev, gpio_dev->irq, amd_gpio_irq_handler,
- IRQF_SHARED | IRQF_ONESHOT, KBUILD_MODNAME, gpio_dev);
+ IRQF_SHARED | IRQF_COND_ONESHOT, KBUILD_MODNAME, gpio_dev);
if (ret)
goto out2;
diff --git a/drivers/platform/chrome/cros_ec_uart.c b/drivers/platform/chrome/cros_ec_uart.c
index 8ea867c2a01a37..62bc24f6dcc7a8 100644
--- a/drivers/platform/chrome/cros_ec_uart.c
+++ b/drivers/platform/chrome/cros_ec_uart.c
@@ -263,12 +263,6 @@ static int cros_ec_uart_probe(struct serdev_device *serdev)
if (!ec_dev)
return -ENOMEM;
- ret = devm_serdev_device_open(dev, serdev);
- if (ret) {
- dev_err(dev, "Unable to open UART device");
- return ret;
- }
-
serdev_device_set_drvdata(serdev, ec_dev);
init_waitqueue_head(&ec_uart->response.wait_queue);
@@ -280,14 +274,6 @@ static int cros_ec_uart_probe(struct serdev_device *serdev)
return ret;
}
- ret = serdev_device_set_baudrate(serdev, ec_uart->baudrate);
- if (ret < 0) {
- dev_err(dev, "Failed to set up host baud rate (%d)", ret);
- return ret;
- }
-
- serdev_device_set_flow_control(serdev, ec_uart->flowcontrol);
-
/* Initialize ec_dev for cros_ec */
ec_dev->phys_name = dev_name(dev);
ec_dev->dev = dev;
@@ -301,6 +287,20 @@ static int cros_ec_uart_probe(struct serdev_device *serdev)
serdev_device_set_client_ops(serdev, &cros_ec_uart_client_ops);
+ ret = devm_serdev_device_open(dev, serdev);
+ if (ret) {
+ dev_err(dev, "Unable to open UART device");
+ return ret;
+ }
+
+ ret = serdev_device_set_baudrate(serdev, ec_uart->baudrate);
+ if (ret < 0) {
+ dev_err(dev, "Failed to set up host baud rate (%d)", ret);
+ return ret;
+ }
+
+ serdev_device_set_flow_control(serdev, ec_uart->flowcontrol);
+
return cros_ec_register(ec_dev);
}
diff --git a/drivers/platform/x86/acer-wmi.c b/drivers/platform/x86/acer-wmi.c
index ee2e164f86b9c2..38c932df6446ac 100644
--- a/drivers/platform/x86/acer-wmi.c
+++ b/drivers/platform/x86/acer-wmi.c
@@ -598,6 +598,15 @@ static const struct dmi_system_id acer_quirks[] __initconst = {
.driver_data = &quirk_acer_predator_v4,
},
{
+ .callback = dmi_matched,
+ .ident = "Acer Predator PH18-71",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Predator PH18-71"),
+ },
+ .driver_data = &quirk_acer_predator_v4,
+ },
+ {
.callback = set_force_caps,
.ident = "Acer Aspire Switch 10E SW3-016",
.matches = {
diff --git a/drivers/platform/x86/amd/pmc/pmc-quirks.c b/drivers/platform/x86/amd/pmc/pmc-quirks.c
index b456370166b6bb..b4f49720c87f62 100644
--- a/drivers/platform/x86/amd/pmc/pmc-quirks.c
+++ b/drivers/platform/x86/amd/pmc/pmc-quirks.c
@@ -208,6 +208,15 @@ static const struct dmi_system_id fwbug_list[] = {
DMI_MATCH(DMI_BIOS_VERSION, "03.03"),
}
},
+ {
+ .ident = "Framework Laptop 13 (Phoenix)",
+ .driver_data = &quirk_spurious_8042,
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Framework"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Laptop 13 (AMD Ryzen 7040Series)"),
+ DMI_MATCH(DMI_BIOS_VERSION, "03.05"),
+ }
+ },
{}
};
diff --git a/drivers/platform/x86/amd/pmf/Makefile b/drivers/platform/x86/amd/pmf/Makefile
index 6b26e48ce8ad2a..7d6079b02589cb 100644
--- a/drivers/platform/x86/amd/pmf/Makefile
+++ b/drivers/platform/x86/amd/pmf/Makefile
@@ -7,4 +7,4 @@
obj-$(CONFIG_AMD_PMF) += amd-pmf.o
amd-pmf-objs := core.o acpi.o sps.o \
auto-mode.o cnqf.o \
- tee-if.o spc.o
+ tee-if.o spc.o pmf-quirks.o
diff --git a/drivers/platform/x86/amd/pmf/acpi.c b/drivers/platform/x86/amd/pmf/acpi.c
index d0cf46e2fc8e8a..1157ec148880b5 100644
--- a/drivers/platform/x86/amd/pmf/acpi.c
+++ b/drivers/platform/x86/amd/pmf/acpi.c
@@ -343,7 +343,10 @@ static int apmf_if_verify_interface(struct amd_pmf_dev *pdev)
if (err)
return err;
- pdev->supported_func = output.supported_functions;
+ /* only set if not already set by a quirk */
+ if (!pdev->supported_func)
+ pdev->supported_func = output.supported_functions;
+
dev_dbg(pdev->dev, "supported functions:0x%x notifications:0x%x version:%u\n",
output.supported_functions, output.notification_mask, output.version);
@@ -437,7 +440,7 @@ int apmf_check_smart_pc(struct amd_pmf_dev *pmf_dev)
status = acpi_walk_resources(ahandle, METHOD_NAME__CRS, apmf_walk_resources, pmf_dev);
if (ACPI_FAILURE(status)) {
- dev_err(pmf_dev->dev, "acpi_walk_resources failed :%d\n", status);
+ dev_dbg(pmf_dev->dev, "acpi_walk_resources failed :%d\n", status);
return -EINVAL;
}
diff --git a/drivers/platform/x86/amd/pmf/core.c b/drivers/platform/x86/amd/pmf/core.c
index 5d4f80698a8b88..64e6e34a2a9acd 100644
--- a/drivers/platform/x86/amd/pmf/core.c
+++ b/drivers/platform/x86/amd/pmf/core.c
@@ -445,6 +445,7 @@ static int amd_pmf_probe(struct platform_device *pdev)
mutex_init(&dev->lock);
mutex_init(&dev->update_mutex);
+ amd_pmf_quirks_init(dev);
apmf_acpi_init(dev);
platform_set_drvdata(pdev, dev);
amd_pmf_dbgfs_register(dev);
diff --git a/drivers/platform/x86/amd/pmf/pmf-quirks.c b/drivers/platform/x86/amd/pmf/pmf-quirks.c
new file mode 100644
index 00000000000000..0b2eb0ae85febd
--- /dev/null
+++ b/drivers/platform/x86/amd/pmf/pmf-quirks.c
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * AMD Platform Management Framework Driver Quirks
+ *
+ * Copyright (c) 2024, Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Author: Mario Limonciello <mario.limonciello@amd.com>
+ */
+
+#include <linux/dmi.h>
+
+#include "pmf.h"
+
+struct quirk_entry {
+ u32 supported_func;
+};
+
+static struct quirk_entry quirk_no_sps_bug = {
+ .supported_func = 0x4003,
+};
+
+static const struct dmi_system_id fwbug_list[] = {
+ {
+ .ident = "ROG Zephyrus G14",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "GA403UV"),
+ },
+ .driver_data = &quirk_no_sps_bug,
+ },
+ {}
+};
+
+void amd_pmf_quirks_init(struct amd_pmf_dev *dev)
+{
+ const struct dmi_system_id *dmi_id;
+ struct quirk_entry *quirks;
+
+ dmi_id = dmi_first_match(fwbug_list);
+ if (!dmi_id)
+ return;
+
+ quirks = dmi_id->driver_data;
+ if (quirks->supported_func) {
+ dev->supported_func = quirks->supported_func;
+ pr_info("Using supported funcs quirk to avoid %s platform firmware bug\n",
+ dmi_id->ident);
+ }
+}
+
diff --git a/drivers/platform/x86/amd/pmf/pmf.h b/drivers/platform/x86/amd/pmf/pmf.h
index 8c4df5753f40d4..eeedd0c0395a89 100644
--- a/drivers/platform/x86/amd/pmf/pmf.h
+++ b/drivers/platform/x86/amd/pmf/pmf.h
@@ -720,4 +720,7 @@ int apmf_check_smart_pc(struct amd_pmf_dev *pmf_dev);
void amd_pmf_populate_ta_inputs(struct amd_pmf_dev *dev, struct ta_pmf_enact_table *in);
void amd_pmf_dump_ta_inputs(struct amd_pmf_dev *dev, struct ta_pmf_enact_table *in);
+/* Quirk infrastructure */
+void amd_pmf_quirks_init(struct amd_pmf_dev *dev);
+
#endif /* PMF_H */
diff --git a/drivers/platform/x86/intel/hid.c b/drivers/platform/x86/intel/hid.c
index 7457ca2b27a60b..c7a8276458640a 100644
--- a/drivers/platform/x86/intel/hid.c
+++ b/drivers/platform/x86/intel/hid.c
@@ -49,6 +49,8 @@ static const struct acpi_device_id intel_hid_ids[] = {
{"INTC1076", 0},
{"INTC1077", 0},
{"INTC1078", 0},
+ {"INTC107B", 0},
+ {"INTC10CB", 0},
{"", 0},
};
MODULE_DEVICE_TABLE(acpi, intel_hid_ids);
@@ -504,6 +506,7 @@ static void notify_handler(acpi_handle handle, u32 event, void *context)
struct platform_device *device = context;
struct intel_hid_priv *priv = dev_get_drvdata(&device->dev);
unsigned long long ev_index;
+ struct key_entry *ke;
int err;
/*
@@ -545,11 +548,15 @@ static void notify_handler(acpi_handle handle, u32 event, void *context)
if (event == 0xc0 || !priv->array)
return;
- if (!sparse_keymap_entry_from_scancode(priv->array, event)) {
+ ke = sparse_keymap_entry_from_scancode(priv->array, event);
+ if (!ke) {
dev_info(&device->dev, "unknown event 0x%x\n", event);
return;
}
+ if (ke->type == KE_IGNORE)
+ return;
+
wakeup:
pm_wakeup_hard_event(&device->dev);
diff --git a/drivers/platform/x86/intel/speed_select_if/isst_if_common.c b/drivers/platform/x86/intel/speed_select_if/isst_if_common.c
index 08df9494603c5e..30951f7131cd98 100644
--- a/drivers/platform/x86/intel/speed_select_if/isst_if_common.c
+++ b/drivers/platform/x86/intel/speed_select_if/isst_if_common.c
@@ -719,6 +719,7 @@ static struct miscdevice isst_if_char_driver = {
};
static const struct x86_cpu_id hpm_cpu_ids[] = {
+ X86_MATCH_INTEL_FAM6_MODEL(GRANITERAPIDS_D, NULL),
X86_MATCH_INTEL_FAM6_MODEL(GRANITERAPIDS_X, NULL),
X86_MATCH_INTEL_FAM6_MODEL(ATOM_CRESTMONT_X, NULL),
{}
diff --git a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c
index bd75d61ff8a661..ef730200a04bd9 100644
--- a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c
+++ b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c
@@ -29,7 +29,7 @@
#include "uncore-frequency-common.h"
#define UNCORE_MAJOR_VERSION 0
-#define UNCORE_MINOR_VERSION 1
+#define UNCORE_MINOR_VERSION 2
#define UNCORE_HEADER_INDEX 0
#define UNCORE_FABRIC_CLUSTER_OFFSET 8
@@ -329,7 +329,7 @@ static int uncore_probe(struct auxiliary_device *auxdev, const struct auxiliary_
goto remove_clusters;
}
- if (TPMI_MINOR_VERSION(pd_info->ufs_header_ver) != UNCORE_MINOR_VERSION)
+ if (TPMI_MINOR_VERSION(pd_info->ufs_header_ver) > UNCORE_MINOR_VERSION)
dev_info(&auxdev->dev, "Uncore: Ignore: Unsupported minor version:%lx\n",
TPMI_MINOR_VERSION(pd_info->ufs_header_ver));
diff --git a/drivers/platform/x86/intel/vbtn.c b/drivers/platform/x86/intel/vbtn.c
index 084c355c86f5fa..79bb2c801daa97 100644
--- a/drivers/platform/x86/intel/vbtn.c
+++ b/drivers/platform/x86/intel/vbtn.c
@@ -136,8 +136,6 @@ static int intel_vbtn_input_setup(struct platform_device *device)
priv->switches_dev->id.bustype = BUS_HOST;
if (priv->has_switches) {
- detect_tablet_mode(&device->dev);
-
ret = input_register_device(priv->switches_dev);
if (ret)
return ret;
@@ -258,9 +256,6 @@ static const struct dmi_system_id dmi_switches_allow_list[] = {
static bool intel_vbtn_has_switches(acpi_handle handle, bool dual_accel)
{
- unsigned long long vgbs;
- acpi_status status;
-
/* See dual_accel_detect.h for more info */
if (dual_accel)
return false;
@@ -268,8 +263,7 @@ static bool intel_vbtn_has_switches(acpi_handle handle, bool dual_accel)
if (!dmi_check_system(dmi_switches_allow_list))
return false;
- status = acpi_evaluate_integer(handle, "VGBS", NULL, &vgbs);
- return ACPI_SUCCESS(status);
+ return acpi_has_method(handle, "VGBS");
}
static int intel_vbtn_probe(struct platform_device *device)
@@ -316,6 +310,9 @@ static int intel_vbtn_probe(struct platform_device *device)
if (ACPI_FAILURE(status))
dev_err(&device->dev, "Error VBDL failed with ACPI status %d\n", status);
}
+ // Check switches after buttons since VBDL may have side effects.
+ if (has_switches)
+ detect_tablet_mode(&device->dev);
device_init_wakeup(&device->dev, true);
/*
diff --git a/drivers/platform/x86/lg-laptop.c b/drivers/platform/x86/lg-laptop.c
index ad3c39e9e9f586..e714ee6298dda8 100644
--- a/drivers/platform/x86/lg-laptop.c
+++ b/drivers/platform/x86/lg-laptop.c
@@ -736,7 +736,7 @@ static int acpi_add(struct acpi_device *device)
default:
year = 2019;
}
- pr_info("product: %s year: %d\n", product, year);
+ pr_info("product: %s year: %d\n", product ?: "unknown", year);
if (year >= 2019)
battery_limit_use_wmbb = 1;
diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c
index 291f14ef67024a..77244c9aa60d23 100644
--- a/drivers/platform/x86/toshiba_acpi.c
+++ b/drivers/platform/x86/toshiba_acpi.c
@@ -264,6 +264,7 @@ static const struct key_entry toshiba_acpi_keymap[] = {
{ KE_KEY, 0xb32, { KEY_NEXTSONG } },
{ KE_KEY, 0xb33, { KEY_PLAYPAUSE } },
{ KE_KEY, 0xb5a, { KEY_MEDIA } },
+ { KE_IGNORE, 0x0e00, { KEY_RESERVED } }, /* Wake from sleep */
{ KE_IGNORE, 0x1430, { KEY_RESERVED } }, /* Wake from sleep */
{ KE_IGNORE, 0x1501, { KEY_RESERVED } }, /* Output changed */
{ KE_IGNORE, 0x1502, { KEY_RESERVED } }, /* HDMI plugged/unplugged */
@@ -3523,9 +3524,10 @@ static void toshiba_acpi_notify(struct acpi_device *acpi_dev, u32 event)
(dev->kbd_mode == SCI_KBD_MODE_ON) ?
LED_FULL : LED_OFF);
break;
+ case 0x8e: /* Power button pressed */
+ break;
case 0x85: /* Unknown */
case 0x8d: /* Unknown */
- case 0x8e: /* Unknown */
case 0x94: /* Unknown */
case 0x95: /* Unknown */
default:
diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c
index d70f793ce4b38d..403525cc17833c 100644
--- a/drivers/pwm/core.c
+++ b/drivers/pwm/core.c
@@ -443,7 +443,7 @@ of_pwm_single_xlate(struct pwm_chip *chip, const struct of_phandle_args *args)
if (IS_ERR(pwm))
return pwm;
- if (args->args_count > 1)
+ if (args->args_count > 0)
pwm->args.period = args->args[0];
pwm->args.polarity = PWM_POLARITY_NORMAL;
diff --git a/drivers/pwm/pwm-dwc-core.c b/drivers/pwm/pwm-dwc-core.c
index 043736972cb921..c8425493b95d85 100644
--- a/drivers/pwm/pwm-dwc-core.c
+++ b/drivers/pwm/pwm-dwc-core.c
@@ -172,7 +172,6 @@ struct pwm_chip *dwc_pwm_alloc(struct device *dev)
dwc->clk_ns = 10;
chip->ops = &dwc_pwm_ops;
- dev_set_drvdata(dev, chip);
return chip;
}
EXPORT_SYMBOL_GPL(dwc_pwm_alloc);
diff --git a/drivers/pwm/pwm-dwc.c b/drivers/pwm/pwm-dwc.c
index 676eaf8d7a53f7..fb3eadf6fbc464 100644
--- a/drivers/pwm/pwm-dwc.c
+++ b/drivers/pwm/pwm-dwc.c
@@ -31,26 +31,34 @@ static const struct dwc_pwm_info ehl_pwm_info = {
.size = 0x1000,
};
-static int dwc_pwm_init_one(struct device *dev, void __iomem *base, unsigned int offset)
+static int dwc_pwm_init_one(struct device *dev, struct dwc_pwm_drvdata *ddata, unsigned int idx)
{
struct pwm_chip *chip;
struct dwc_pwm *dwc;
+ int ret;
chip = dwc_pwm_alloc(dev);
if (IS_ERR(chip))
return PTR_ERR(chip);
dwc = to_dwc_pwm(chip);
- dwc->base = base + offset;
+ dwc->base = ddata->io_base + (ddata->info->size * idx);
- return devm_pwmchip_add(dev, chip);
+ ret = devm_pwmchip_add(dev, chip);
+ if (ret)
+ return ret;
+
+ ddata->chips[idx] = chip;
+ return 0;
}
static int dwc_pwm_probe(struct pci_dev *pci, const struct pci_device_id *id)
{
const struct dwc_pwm_info *info;
struct device *dev = &pci->dev;
- int i, ret;
+ struct dwc_pwm_drvdata *ddata;
+ unsigned int idx;
+ int ret;
ret = pcim_enable_device(pci);
if (ret)
@@ -63,17 +71,25 @@ static int dwc_pwm_probe(struct pci_dev *pci, const struct pci_device_id *id)
return dev_err_probe(dev, ret, "Failed to iomap PCI BAR\n");
info = (const struct dwc_pwm_info *)id->driver_data;
-
- for (i = 0; i < info->nr; i++) {
- /*
- * No need to check for pcim_iomap_table() failure,
- * pcim_iomap_regions() already does it for us.
- */
- ret = dwc_pwm_init_one(dev, pcim_iomap_table(pci)[0], i * info->size);
+ ddata = devm_kzalloc(dev, struct_size(ddata, chips, info->nr), GFP_KERNEL);
+ if (!ddata)
+ return -ENOMEM;
+
+ /*
+ * No need to check for pcim_iomap_table() failure,
+ * pcim_iomap_regions() already does it for us.
+ */
+ ddata->io_base = pcim_iomap_table(pci)[0];
+ ddata->info = info;
+
+ for (idx = 0; idx < ddata->info->nr; idx++) {
+ ret = dwc_pwm_init_one(dev, ddata, idx);
if (ret)
return ret;
}
+ dev_set_drvdata(dev, ddata);
+
pm_runtime_put(dev);
pm_runtime_allow(dev);
@@ -88,19 +104,24 @@ static void dwc_pwm_remove(struct pci_dev *pci)
static int dwc_pwm_suspend(struct device *dev)
{
- struct pwm_chip *chip = dev_get_drvdata(dev);
- struct dwc_pwm *dwc = to_dwc_pwm(chip);
- int i;
-
- for (i = 0; i < DWC_TIMERS_TOTAL; i++) {
- if (chip->pwms[i].state.enabled) {
- dev_err(dev, "PWM %u in use by consumer (%s)\n",
- i, chip->pwms[i].label);
- return -EBUSY;
+ struct dwc_pwm_drvdata *ddata = dev_get_drvdata(dev);
+ unsigned int idx;
+
+ for (idx = 0; idx < ddata->info->nr; idx++) {
+ struct pwm_chip *chip = ddata->chips[idx];
+ struct dwc_pwm *dwc = to_dwc_pwm(chip);
+ unsigned int i;
+
+ for (i = 0; i < DWC_TIMERS_TOTAL; i++) {
+ if (chip->pwms[i].state.enabled) {
+ dev_err(dev, "PWM %u in use by consumer (%s)\n",
+ i, chip->pwms[i].label);
+ return -EBUSY;
+ }
+ dwc->ctx[i].cnt = dwc_pwm_readl(dwc, DWC_TIM_LD_CNT(i));
+ dwc->ctx[i].cnt2 = dwc_pwm_readl(dwc, DWC_TIM_LD_CNT2(i));
+ dwc->ctx[i].ctrl = dwc_pwm_readl(dwc, DWC_TIM_CTRL(i));
}
- dwc->ctx[i].cnt = dwc_pwm_readl(dwc, DWC_TIM_LD_CNT(i));
- dwc->ctx[i].cnt2 = dwc_pwm_readl(dwc, DWC_TIM_LD_CNT2(i));
- dwc->ctx[i].ctrl = dwc_pwm_readl(dwc, DWC_TIM_CTRL(i));
}
return 0;
@@ -108,14 +129,19 @@ static int dwc_pwm_suspend(struct device *dev)
static int dwc_pwm_resume(struct device *dev)
{
- struct pwm_chip *chip = dev_get_drvdata(dev);
- struct dwc_pwm *dwc = to_dwc_pwm(chip);
- int i;
-
- for (i = 0; i < DWC_TIMERS_TOTAL; i++) {
- dwc_pwm_writel(dwc, dwc->ctx[i].cnt, DWC_TIM_LD_CNT(i));
- dwc_pwm_writel(dwc, dwc->ctx[i].cnt2, DWC_TIM_LD_CNT2(i));
- dwc_pwm_writel(dwc, dwc->ctx[i].ctrl, DWC_TIM_CTRL(i));
+ struct dwc_pwm_drvdata *ddata = dev_get_drvdata(dev);
+ unsigned int idx;
+
+ for (idx = 0; idx < ddata->info->nr; idx++) {
+ struct pwm_chip *chip = ddata->chips[idx];
+ struct dwc_pwm *dwc = to_dwc_pwm(chip);
+ unsigned int i;
+
+ for (i = 0; i < DWC_TIMERS_TOTAL; i++) {
+ dwc_pwm_writel(dwc, dwc->ctx[i].cnt, DWC_TIM_LD_CNT(i));
+ dwc_pwm_writel(dwc, dwc->ctx[i].cnt2, DWC_TIM_LD_CNT2(i));
+ dwc_pwm_writel(dwc, dwc->ctx[i].ctrl, DWC_TIM_CTRL(i));
+ }
}
return 0;
diff --git a/drivers/pwm/pwm-dwc.h b/drivers/pwm/pwm-dwc.h
index a8b074841ae805..c6e2df5a612271 100644
--- a/drivers/pwm/pwm-dwc.h
+++ b/drivers/pwm/pwm-dwc.h
@@ -38,6 +38,12 @@ struct dwc_pwm_info {
unsigned int size;
};
+struct dwc_pwm_drvdata {
+ const struct dwc_pwm_info *info;
+ void __iomem *io_base;
+ struct pwm_chip *chips[];
+};
+
struct dwc_pwm_ctx {
u32 cnt;
u32 cnt2;
diff --git a/drivers/ras/amd/fmpm.c b/drivers/ras/amd/fmpm.c
index 2f4ac9591c8f5a..271dfad05d6835 100644
--- a/drivers/ras/amd/fmpm.c
+++ b/drivers/ras/amd/fmpm.c
@@ -150,6 +150,8 @@ static unsigned int max_nr_fru;
/* Total length of record including headers and list of descriptor entries. */
static size_t max_rec_len;
+#define FMPM_MAX_REC_LEN (sizeof(struct fru_rec) + (sizeof(struct cper_fru_poison_desc) * 255))
+
/* Total number of SPA entries across all FRUs. */
static unsigned int spa_nr_entries;
@@ -475,6 +477,16 @@ static void set_rec_fields(struct fru_rec *rec)
struct cper_section_descriptor *sec_desc = &rec->sec_desc;
struct cper_record_header *hdr = &rec->hdr;
+ /*
+ * This is a saved record created with fewer max_nr_entries.
+ * Update the record lengths and keep everything else as-is.
+ */
+ if (hdr->record_length && hdr->record_length < max_rec_len) {
+ pr_debug("Growing record 0x%016llx from %u to %zu bytes\n",
+ hdr->record_id, hdr->record_length, max_rec_len);
+ goto update_lengths;
+ }
+
memcpy(hdr->signature, CPER_SIG_RECORD, CPER_SIG_SIZE);
hdr->revision = CPER_RECORD_REV;
hdr->signature_end = CPER_SIG_END;
@@ -489,19 +501,21 @@ static void set_rec_fields(struct fru_rec *rec)
hdr->error_severity = CPER_SEV_RECOVERABLE;
hdr->validation_bits = 0;
- hdr->record_length = max_rec_len;
hdr->creator_id = CPER_CREATOR_FMP;
hdr->notification_type = CPER_NOTIFY_MCE;
hdr->record_id = cper_next_record_id();
hdr->flags = CPER_HW_ERROR_FLAGS_PREVERR;
sec_desc->section_offset = sizeof(struct cper_record_header);
- sec_desc->section_length = max_rec_len - sizeof(struct cper_record_header);
sec_desc->revision = CPER_SEC_REV;
sec_desc->validation_bits = 0;
sec_desc->flags = CPER_SEC_PRIMARY;
sec_desc->section_type = CPER_SECTION_TYPE_FMP;
sec_desc->section_severity = CPER_SEV_RECOVERABLE;
+
+update_lengths:
+ hdr->record_length = max_rec_len;
+ sec_desc->section_length = max_rec_len - sizeof(struct cper_record_header);
}
static int save_new_records(void)
@@ -512,16 +526,18 @@ static int save_new_records(void)
int ret = 0;
for_each_fru(i, rec) {
- if (rec->hdr.record_length)
+ /* No need to update saved records that match the current record size. */
+ if (rec->hdr.record_length == max_rec_len)
continue;
+ if (!rec->hdr.record_length)
+ set_bit(i, new_records);
+
set_rec_fields(rec);
ret = update_record_on_storage(rec);
if (ret)
goto out_clear;
-
- set_bit(i, new_records);
}
return ret;
@@ -641,12 +657,7 @@ static int get_saved_records(void)
int ret, pos;
ssize_t len;
- /*
- * Assume saved records match current max size.
- *
- * However, this may not be true depending on module parameters.
- */
- old = kmalloc(max_rec_len, GFP_KERNEL);
+ old = kmalloc(FMPM_MAX_REC_LEN, GFP_KERNEL);
if (!old) {
ret = -ENOMEM;
goto out;
@@ -663,21 +674,31 @@ static int get_saved_records(void)
* Make sure to clear temporary buffer between reads to avoid
* leftover data from records of various sizes.
*/
- memset(old, 0, max_rec_len);
+ memset(old, 0, FMPM_MAX_REC_LEN);
- len = erst_read_record(record_id, &old->hdr, max_rec_len,
+ len = erst_read_record(record_id, &old->hdr, FMPM_MAX_REC_LEN,
sizeof(struct fru_rec), &CPER_CREATOR_FMP);
if (len < 0)
continue;
- if (len > max_rec_len) {
- pr_debug("Found record larger than max_rec_len\n");
+ new = get_valid_record(old);
+ if (!new) {
+ erst_clear(record_id);
continue;
}
- new = get_valid_record(old);
- if (!new)
- erst_clear(record_id);
+ if (len > max_rec_len) {
+ unsigned int saved_nr_entries;
+
+ saved_nr_entries = len - sizeof(struct fru_rec);
+ saved_nr_entries /= sizeof(struct cper_fru_poison_desc);
+
+ pr_warn("Saved record found with %u entries.\n", saved_nr_entries);
+ pr_warn("Please increase max_nr_entries to %u.\n", saved_nr_entries);
+
+ ret = -EINVAL;
+ goto out_end;
+ }
/* Restore the record */
memcpy(new, old, len);
diff --git a/drivers/ras/debugfs.h b/drivers/ras/debugfs.h
index 4749ccdeeba122..5a2f48439258cd 100644
--- a/drivers/ras/debugfs.h
+++ b/drivers/ras/debugfs.h
@@ -4,6 +4,10 @@
#include <linux/debugfs.h>
+#if IS_ENABLED(CONFIG_DEBUG_FS)
struct dentry *ras_get_debugfs_root(void);
+#else
+static inline struct dentry *ras_get_debugfs_root(void) { return NULL; }
+#endif /* DEBUG_FS */
#endif /* __RAS_DEBUGFS_H__ */
diff --git a/drivers/regulator/tps65132-regulator.c b/drivers/regulator/tps65132-regulator.c
index a06f5f2d79329d..9c2f0dd42613d4 100644
--- a/drivers/regulator/tps65132-regulator.c
+++ b/drivers/regulator/tps65132-regulator.c
@@ -267,10 +267,17 @@ static const struct i2c_device_id tps65132_id[] = {
};
MODULE_DEVICE_TABLE(i2c, tps65132_id);
+static const struct of_device_id __maybe_unused tps65132_of_match[] = {
+ { .compatible = "ti,tps65132" },
+ {},
+};
+MODULE_DEVICE_TABLE(of, tps65132_of_match);
+
static struct i2c_driver tps65132_i2c_driver = {
.driver = {
.name = "tps65132",
.probe_type = PROBE_PREFER_ASYNCHRONOUS,
+ .of_match_table = of_match_ptr(tps65132_of_match),
},
.probe = tps65132_probe,
.id_table = tps65132_id,
diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c
index f95d12345d98a6..920f550bc313bf 100644
--- a/drivers/s390/cio/device.c
+++ b/drivers/s390/cio/device.c
@@ -363,10 +363,8 @@ int ccw_device_set_online(struct ccw_device *cdev)
spin_lock_irq(cdev->ccwlock);
ret = ccw_device_online(cdev);
- spin_unlock_irq(cdev->ccwlock);
- if (ret == 0)
- wait_event(cdev->private->wait_q, dev_fsm_final_state(cdev));
- else {
+ if (ret) {
+ spin_unlock_irq(cdev->ccwlock);
CIO_MSG_EVENT(0, "ccw_device_online returned %d, "
"device 0.%x.%04x\n",
ret, cdev->private->dev_id.ssid,
@@ -375,7 +373,12 @@ int ccw_device_set_online(struct ccw_device *cdev)
put_device(&cdev->dev);
return ret;
}
- spin_lock_irq(cdev->ccwlock);
+ /* Wait until a final state is reached */
+ while (!dev_fsm_final_state(cdev)) {
+ spin_unlock_irq(cdev->ccwlock);
+ wait_event(cdev->private->wait_q, dev_fsm_final_state(cdev));
+ spin_lock_irq(cdev->ccwlock);
+ }
/* Check if online processing was successful */
if ((cdev->private->state != DEV_STATE_ONLINE) &&
(cdev->private->state != DEV_STATE_W4SENSE)) {
diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c
index 65d8b2cfd6262d..42791fa0b80e26 100644
--- a/drivers/s390/cio/device_fsm.c
+++ b/drivers/s390/cio/device_fsm.c
@@ -504,6 +504,11 @@ callback:
ccw_device_done(cdev, DEV_STATE_ONLINE);
/* Deliver fake irb to device driver, if needed. */
if (cdev->private->flags.fake_irb) {
+ CIO_MSG_EVENT(2, "fakeirb: deliver device 0.%x.%04x intparm %lx type=%d\n",
+ cdev->private->dev_id.ssid,
+ cdev->private->dev_id.devno,
+ cdev->private->intparm,
+ cdev->private->flags.fake_irb);
create_fake_irb(&cdev->private->dma_area->irb,
cdev->private->flags.fake_irb);
cdev->private->flags.fake_irb = 0;
diff --git a/drivers/s390/cio/device_ops.c b/drivers/s390/cio/device_ops.c
index 40c97f8730751f..acd6790dba4dd1 100644
--- a/drivers/s390/cio/device_ops.c
+++ b/drivers/s390/cio/device_ops.c
@@ -208,6 +208,10 @@ int ccw_device_start_timeout_key(struct ccw_device *cdev, struct ccw1 *cpa,
if (!cdev->private->flags.fake_irb) {
cdev->private->flags.fake_irb = FAKE_CMD_IRB;
cdev->private->intparm = intparm;
+ CIO_MSG_EVENT(2, "fakeirb: queue device 0.%x.%04x intparm %lx type=%d\n",
+ cdev->private->dev_id.ssid,
+ cdev->private->dev_id.devno, intparm,
+ cdev->private->flags.fake_irb);
return 0;
} else
/* There's already a fake I/O around. */
@@ -551,6 +555,10 @@ int ccw_device_tm_start_timeout_key(struct ccw_device *cdev, struct tcw *tcw,
if (!cdev->private->flags.fake_irb) {
cdev->private->flags.fake_irb = FAKE_TM_IRB;
cdev->private->intparm = intparm;
+ CIO_MSG_EVENT(2, "fakeirb: queue device 0.%x.%04x intparm %lx type=%d\n",
+ cdev->private->dev_id.ssid,
+ cdev->private->dev_id.devno, intparm,
+ cdev->private->flags.fake_irb);
return 0;
} else
/* There's already a fake I/O around. */
diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c
index 3d9f0834c78bf1..a1cb39f4b7a279 100644
--- a/drivers/s390/cio/qdio_main.c
+++ b/drivers/s390/cio/qdio_main.c
@@ -722,8 +722,8 @@ static void qdio_handle_activate_check(struct qdio_irq *irq_ptr,
lgr_info_log();
}
-static void qdio_establish_handle_irq(struct qdio_irq *irq_ptr, int cstat,
- int dstat)
+static int qdio_establish_handle_irq(struct qdio_irq *irq_ptr, int cstat,
+ int dstat, int dcc)
{
DBF_DEV_EVENT(DBF_INFO, irq_ptr, "qest irq");
@@ -731,15 +731,18 @@ static void qdio_establish_handle_irq(struct qdio_irq *irq_ptr, int cstat,
goto error;
if (dstat & ~(DEV_STAT_DEV_END | DEV_STAT_CHN_END))
goto error;
+ if (dcc == 1)
+ return -EAGAIN;
if (!(dstat & DEV_STAT_DEV_END))
goto error;
qdio_set_state(irq_ptr, QDIO_IRQ_STATE_ESTABLISHED);
- return;
+ return 0;
error:
DBF_ERROR("%4x EQ:error", irq_ptr->schid.sch_no);
DBF_ERROR("ds: %2x cs:%2x", dstat, cstat);
qdio_set_state(irq_ptr, QDIO_IRQ_STATE_ERR);
+ return -EIO;
}
/* qdio interrupt handler */
@@ -748,7 +751,7 @@ void qdio_int_handler(struct ccw_device *cdev, unsigned long intparm,
{
struct qdio_irq *irq_ptr = cdev->private->qdio_data;
struct subchannel_id schid;
- int cstat, dstat;
+ int cstat, dstat, rc, dcc;
if (!intparm || !irq_ptr) {
ccw_device_get_schid(cdev, &schid);
@@ -768,10 +771,12 @@ void qdio_int_handler(struct ccw_device *cdev, unsigned long intparm,
qdio_irq_check_sense(irq_ptr, irb);
cstat = irb->scsw.cmd.cstat;
dstat = irb->scsw.cmd.dstat;
+ dcc = scsw_cmd_is_valid_cc(&irb->scsw) ? irb->scsw.cmd.cc : 0;
+ rc = 0;
switch (irq_ptr->state) {
case QDIO_IRQ_STATE_INACTIVE:
- qdio_establish_handle_irq(irq_ptr, cstat, dstat);
+ rc = qdio_establish_handle_irq(irq_ptr, cstat, dstat, dcc);
break;
case QDIO_IRQ_STATE_CLEANUP:
qdio_set_state(irq_ptr, QDIO_IRQ_STATE_INACTIVE);
@@ -785,12 +790,25 @@ void qdio_int_handler(struct ccw_device *cdev, unsigned long intparm,
if (cstat || dstat)
qdio_handle_activate_check(irq_ptr, intparm, cstat,
dstat);
+ else if (dcc == 1)
+ rc = -EAGAIN;
break;
case QDIO_IRQ_STATE_STOPPED:
break;
default:
WARN_ON_ONCE(1);
}
+
+ if (rc == -EAGAIN) {
+ DBF_DEV_EVENT(DBF_INFO, irq_ptr, "qint retry");
+ rc = ccw_device_start(cdev, irq_ptr->ccw, intparm, 0, 0);
+ if (!rc)
+ return;
+ DBF_ERROR("%4x RETRY ERR", irq_ptr->schid.sch_no);
+ DBF_ERROR("rc:%4x", rc);
+ qdio_set_state(irq_ptr, QDIO_IRQ_STATE_ERR);
+ }
+
wake_up(&cdev->private->wait_q);
}
diff --git a/drivers/s390/net/ism_drv.c b/drivers/s390/net/ism_drv.c
index 2c8e964425dc38..43778b088ffac5 100644
--- a/drivers/s390/net/ism_drv.c
+++ b/drivers/s390/net/ism_drv.c
@@ -292,13 +292,16 @@ out:
static void ism_free_dmb(struct ism_dev *ism, struct ism_dmb *dmb)
{
clear_bit(dmb->sba_idx, ism->sba_bitmap);
- dma_free_coherent(&ism->pdev->dev, dmb->dmb_len,
- dmb->cpu_addr, dmb->dma_addr);
+ dma_unmap_page(&ism->pdev->dev, dmb->dma_addr, dmb->dmb_len,
+ DMA_FROM_DEVICE);
+ folio_put(virt_to_folio(dmb->cpu_addr));
}
static int ism_alloc_dmb(struct ism_dev *ism, struct ism_dmb *dmb)
{
+ struct folio *folio;
unsigned long bit;
+ int rc;
if (PAGE_ALIGN(dmb->dmb_len) > dma_get_max_seg_size(&ism->pdev->dev))
return -EINVAL;
@@ -315,14 +318,30 @@ static int ism_alloc_dmb(struct ism_dev *ism, struct ism_dmb *dmb)
test_and_set_bit(dmb->sba_idx, ism->sba_bitmap))
return -EINVAL;
- dmb->cpu_addr = dma_alloc_coherent(&ism->pdev->dev, dmb->dmb_len,
- &dmb->dma_addr,
- GFP_KERNEL | __GFP_NOWARN |
- __GFP_NOMEMALLOC | __GFP_NORETRY);
- if (!dmb->cpu_addr)
- clear_bit(dmb->sba_idx, ism->sba_bitmap);
+ folio = folio_alloc(GFP_KERNEL | __GFP_NOWARN | __GFP_NOMEMALLOC |
+ __GFP_NORETRY, get_order(dmb->dmb_len));
- return dmb->cpu_addr ? 0 : -ENOMEM;
+ if (!folio) {
+ rc = -ENOMEM;
+ goto out_bit;
+ }
+
+ dmb->cpu_addr = folio_address(folio);
+ dmb->dma_addr = dma_map_page(&ism->pdev->dev,
+ virt_to_page(dmb->cpu_addr), 0,
+ dmb->dmb_len, DMA_FROM_DEVICE);
+ if (dma_mapping_error(&ism->pdev->dev, dmb->dma_addr)) {
+ rc = -ENOMEM;
+ goto out_free;
+ }
+
+ return 0;
+
+out_free:
+ kfree(dmb->cpu_addr);
+out_bit:
+ clear_bit(dmb->sba_idx, ism->sba_bitmap);
+ return rc;
}
int ism_register_dmb(struct ism_dev *ism, struct ism_dmb *dmb,
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index a0cce6872075d4..f0b8b709649f29 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -1179,6 +1179,20 @@ static int qeth_check_irb_error(struct qeth_card *card, struct ccw_device *cdev,
}
}
+/**
+ * qeth_irq() - qeth interrupt handler
+ * @cdev: ccw device
+ * @intparm: expect pointer to iob
+ * @irb: Interruption Response Block
+ *
+ * In the good path:
+ * corresponding qeth channel is locked with last used iob as active_cmd.
+ * But this function is also called for error interrupts.
+ *
+ * Caller ensures that:
+ * Interrupts are disabled; ccw device lock is held;
+ *
+ */
static void qeth_irq(struct ccw_device *cdev, unsigned long intparm,
struct irb *irb)
{
@@ -1220,11 +1234,10 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm,
iob = (struct qeth_cmd_buffer *) (addr_t)intparm;
}
- qeth_unlock_channel(card, channel);
-
rc = qeth_check_irb_error(card, cdev, irb);
if (rc) {
/* IO was terminated, free its resources. */
+ qeth_unlock_channel(card, channel);
if (iob)
qeth_cancel_cmd(iob, rc);
return;
@@ -1268,6 +1281,7 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm,
rc = qeth_get_problem(card, cdev, irb);
if (rc) {
card->read_or_write_problem = 1;
+ qeth_unlock_channel(card, channel);
if (iob)
qeth_cancel_cmd(iob, rc);
qeth_clear_ipacmd_list(card);
@@ -1276,6 +1290,26 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm,
}
}
+ if (scsw_cmd_is_valid_cc(&irb->scsw) && irb->scsw.cmd.cc == 1 && iob) {
+ /* channel command hasn't started: retry.
+ * active_cmd is still set to last iob
+ */
+ QETH_CARD_TEXT(card, 2, "irqcc1");
+ rc = ccw_device_start_timeout(cdev, __ccw_from_cmd(iob),
+ (addr_t)iob, 0, 0, iob->timeout);
+ if (rc) {
+ QETH_DBF_MESSAGE(2,
+ "ccw retry on %x failed, rc = %i\n",
+ CARD_DEVID(card), rc);
+ QETH_CARD_TEXT_(card, 2, " err%d", rc);
+ qeth_unlock_channel(card, channel);
+ qeth_cancel_cmd(iob, rc);
+ }
+ return;
+ }
+
+ qeth_unlock_channel(card, channel);
+
if (iob) {
/* sanity check: */
if (irb->scsw.cmd.count > iob->length) {
diff --git a/drivers/scsi/bnx2fc/bnx2fc_tgt.c b/drivers/scsi/bnx2fc/bnx2fc_tgt.c
index 2c246e80c1c4d6..d91659811eb3c5 100644
--- a/drivers/scsi/bnx2fc/bnx2fc_tgt.c
+++ b/drivers/scsi/bnx2fc/bnx2fc_tgt.c
@@ -833,7 +833,6 @@ static void bnx2fc_free_session_resc(struct bnx2fc_hba *hba,
BNX2FC_TGT_DBG(tgt, "Freeing up session resources\n");
- spin_lock_bh(&tgt->cq_lock);
ctx_base_ptr = tgt->ctx_base;
tgt->ctx_base = NULL;
@@ -889,7 +888,6 @@ static void bnx2fc_free_session_resc(struct bnx2fc_hba *hba,
tgt->sq, tgt->sq_dma);
tgt->sq = NULL;
}
- spin_unlock_bh(&tgt->cq_lock);
if (ctx_base_ptr)
iounmap(ctx_base_ptr);
diff --git a/drivers/scsi/ch.c b/drivers/scsi/ch.c
index 1befcd5b2a0f93..fa07a6f54003ee 100644
--- a/drivers/scsi/ch.c
+++ b/drivers/scsi/ch.c
@@ -102,7 +102,9 @@ do { \
#define MAX_RETRIES 1
-static struct class * ch_sysfs_class;
+static const struct class ch_sysfs_class = {
+ .name = "scsi_changer",
+};
typedef struct {
struct kref ref;
@@ -930,7 +932,7 @@ static int ch_probe(struct device *dev)
mutex_init(&ch->lock);
kref_init(&ch->ref);
ch->device = sd;
- class_dev = device_create(ch_sysfs_class, dev,
+ class_dev = device_create(&ch_sysfs_class, dev,
MKDEV(SCSI_CHANGER_MAJOR, ch->minor), ch,
"s%s", ch->name);
if (IS_ERR(class_dev)) {
@@ -955,7 +957,7 @@ static int ch_probe(struct device *dev)
return 0;
destroy_dev:
- device_destroy(ch_sysfs_class, MKDEV(SCSI_CHANGER_MAJOR, ch->minor));
+ device_destroy(&ch_sysfs_class, MKDEV(SCSI_CHANGER_MAJOR, ch->minor));
put_device:
scsi_device_put(sd);
remove_idr:
@@ -974,7 +976,7 @@ static int ch_remove(struct device *dev)
dev_set_drvdata(dev, NULL);
spin_unlock(&ch_index_lock);
- device_destroy(ch_sysfs_class, MKDEV(SCSI_CHANGER_MAJOR,ch->minor));
+ device_destroy(&ch_sysfs_class, MKDEV(SCSI_CHANGER_MAJOR, ch->minor));
scsi_device_put(ch->device);
kref_put(&ch->ref, ch_destroy);
return 0;
@@ -1003,11 +1005,9 @@ static int __init init_ch_module(void)
int rc;
printk(KERN_INFO "SCSI Media Changer driver v" VERSION " \n");
- ch_sysfs_class = class_create("scsi_changer");
- if (IS_ERR(ch_sysfs_class)) {
- rc = PTR_ERR(ch_sysfs_class);
+ rc = class_register(&ch_sysfs_class);
+ if (rc)
return rc;
- }
rc = register_chrdev(SCSI_CHANGER_MAJOR,"ch",&changer_fops);
if (rc < 0) {
printk("Unable to get major %d for SCSI-Changer\n",
@@ -1022,7 +1022,7 @@ static int __init init_ch_module(void)
fail2:
unregister_chrdev(SCSI_CHANGER_MAJOR, "ch");
fail1:
- class_destroy(ch_sysfs_class);
+ class_unregister(&ch_sysfs_class);
return rc;
}
@@ -1030,7 +1030,7 @@ static void __exit exit_ch_module(void)
{
scsi_unregister_driver(&ch_template.gendrv);
unregister_chrdev(SCSI_CHANGER_MAJOR, "ch");
- class_destroy(ch_sysfs_class);
+ class_unregister(&ch_sysfs_class);
idr_destroy(&ch_index_idr);
}
diff --git a/drivers/scsi/cxlflash/main.c b/drivers/scsi/cxlflash/main.c
index debd3697411974..e8382cc5cf23c0 100644
--- a/drivers/scsi/cxlflash/main.c
+++ b/drivers/scsi/cxlflash/main.c
@@ -28,7 +28,12 @@ MODULE_AUTHOR("Manoj N. Kumar <manoj@linux.vnet.ibm.com>");
MODULE_AUTHOR("Matthew R. Ochs <mrochs@linux.vnet.ibm.com>");
MODULE_LICENSE("GPL");
-static struct class *cxlflash_class;
+static char *cxlflash_devnode(const struct device *dev, umode_t *mode);
+static const struct class cxlflash_class = {
+ .name = "cxlflash",
+ .devnode = cxlflash_devnode,
+};
+
static u32 cxlflash_major;
static DECLARE_BITMAP(cxlflash_minor, CXLFLASH_MAX_ADAPTERS);
@@ -3602,7 +3607,7 @@ static int init_chrdev(struct cxlflash_cfg *cfg)
goto err1;
}
- char_dev = device_create(cxlflash_class, NULL, devno,
+ char_dev = device_create(&cxlflash_class, NULL, devno,
NULL, "cxlflash%d", minor);
if (IS_ERR(char_dev)) {
rc = PTR_ERR(char_dev);
@@ -3880,14 +3885,12 @@ static int cxlflash_class_init(void)
cxlflash_major = MAJOR(devno);
- cxlflash_class = class_create("cxlflash");
- if (IS_ERR(cxlflash_class)) {
- rc = PTR_ERR(cxlflash_class);
+ rc = class_register(&cxlflash_class);
+ if (rc) {
pr_err("%s: class_create failed rc=%d\n", __func__, rc);
goto err;
}
- cxlflash_class->devnode = cxlflash_devnode;
out:
pr_debug("%s: returning rc=%d\n", __func__, rc);
return rc;
@@ -3903,7 +3906,7 @@ static void cxlflash_class_exit(void)
{
dev_t devno = MKDEV(cxlflash_major, 0);
- class_destroy(cxlflash_class);
+ class_unregister(&cxlflash_class);
unregister_chrdev_region(devno, CXLFLASH_MAX_ADAPTERS);
}
diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c
index 097dfe4b620dce..35f8e00850d6cb 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_main.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_main.c
@@ -1797,7 +1797,7 @@ static int hisi_sas_debug_I_T_nexus_reset(struct domain_device *device)
if (dev_is_sata(device)) {
struct ata_link *link = &device->sata_dev.ap->link;
- rc = ata_wait_after_reset(link, HISI_SAS_WAIT_PHYUP_TIMEOUT,
+ rc = ata_wait_after_reset(link, jiffies + HISI_SAS_WAIT_PHYUP_TIMEOUT,
smp_ata_check_ready_type);
} else {
msleep(2000);
diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
index 7d2a33514538c2..34f96cc35342bc 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
@@ -2244,7 +2244,15 @@ slot_err_v3_hw(struct hisi_hba *hisi_hba, struct sas_task *task,
case SAS_PROTOCOL_SATA | SAS_PROTOCOL_STP:
if ((dw0 & CMPLT_HDR_RSPNS_XFRD_MSK) &&
(sipc_rx_err_type & RX_FIS_STATUS_ERR_MSK)) {
- ts->stat = SAS_PROTO_RESPONSE;
+ if (task->ata_task.use_ncq) {
+ struct domain_device *device = task->dev;
+ struct hisi_sas_device *sas_dev = device->lldd_dev;
+
+ sas_dev->dev_status = HISI_SAS_DEV_NCQ_ERR;
+ slot->abort = 1;
+ } else {
+ ts->stat = SAS_PROTO_RESPONSE;
+ }
} else if (dma_rx_err_type & RX_DATA_LEN_UNDERFLOW_MSK) {
ts->residual = trans_tx_fail_type;
ts->stat = SAS_DATA_UNDERRUN;
diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
index 4f495a41ec4aae..2d92549e524319 100644
--- a/drivers/scsi/hosts.c
+++ b/drivers/scsi/hosts.c
@@ -353,12 +353,13 @@ static void scsi_host_dev_release(struct device *dev)
if (shost->shost_state == SHOST_CREATED) {
/*
- * Free the shost_dev device name here if scsi_host_alloc()
- * and scsi_host_put() have been called but neither
+ * Free the shost_dev device name and remove the proc host dir
+ * here if scsi_host_{alloc,put}() have been called but neither
* scsi_host_add() nor scsi_remove_host() has been called.
* This avoids that the memory allocated for the shost_dev
- * name is leaked.
+ * name as well as the proc dir structure are leaked.
*/
+ scsi_proc_hostdir_rm(shost->hostt);
kfree(dev_name(&shost->shost_dev));
}
diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c
index a2204674b6808f..f6e6db8b8aba91 100644
--- a/drivers/scsi/libsas/sas_expander.c
+++ b/drivers/scsi/libsas/sas_expander.c
@@ -135,7 +135,7 @@ static int smp_execute_task(struct domain_device *dev, void *req, int req_size,
static inline void *alloc_smp_req(int size)
{
- u8 *p = kzalloc(size, GFP_KERNEL);
+ u8 *p = kzalloc(ALIGN(size, ARCH_DMA_MINALIGN), GFP_KERNEL);
if (p)
p[0] = SMP_REQUEST;
return p;
@@ -1621,6 +1621,16 @@ out_err:
/* ---------- Domain revalidation ---------- */
+static void sas_get_sas_addr_and_dev_type(struct smp_disc_resp *disc_resp,
+ u8 *sas_addr,
+ enum sas_device_type *type)
+{
+ memcpy(sas_addr, disc_resp->disc.attached_sas_addr, SAS_ADDR_SIZE);
+ *type = to_dev_type(&disc_resp->disc);
+ if (*type == SAS_PHY_UNUSED)
+ memset(sas_addr, 0, SAS_ADDR_SIZE);
+}
+
static int sas_get_phy_discover(struct domain_device *dev,
int phy_id, struct smp_disc_resp *disc_resp)
{
@@ -1674,13 +1684,8 @@ int sas_get_phy_attached_dev(struct domain_device *dev, int phy_id,
return -ENOMEM;
res = sas_get_phy_discover(dev, phy_id, disc_resp);
- if (res == 0) {
- memcpy(sas_addr, disc_resp->disc.attached_sas_addr,
- SAS_ADDR_SIZE);
- *type = to_dev_type(&disc_resp->disc);
- if (*type == 0)
- memset(sas_addr, 0, SAS_ADDR_SIZE);
- }
+ if (res == 0)
+ sas_get_sas_addr_and_dev_type(disc_resp, sas_addr, type);
kfree(disc_resp);
return res;
}
@@ -1940,6 +1945,7 @@ static int sas_rediscover_dev(struct domain_device *dev, int phy_id,
struct expander_device *ex = &dev->ex_dev;
struct ex_phy *phy = &ex->ex_phy[phy_id];
enum sas_device_type type = SAS_PHY_UNUSED;
+ struct smp_disc_resp *disc_resp;
u8 sas_addr[SAS_ADDR_SIZE];
char msg[80] = "";
int res;
@@ -1951,33 +1957,41 @@ static int sas_rediscover_dev(struct domain_device *dev, int phy_id,
SAS_ADDR(dev->sas_addr), phy_id, msg);
memset(sas_addr, 0, SAS_ADDR_SIZE);
- res = sas_get_phy_attached_dev(dev, phy_id, sas_addr, &type);
+ disc_resp = alloc_smp_resp(DISCOVER_RESP_SIZE);
+ if (!disc_resp)
+ return -ENOMEM;
+
+ res = sas_get_phy_discover(dev, phy_id, disc_resp);
switch (res) {
case SMP_RESP_NO_PHY:
phy->phy_state = PHY_NOT_PRESENT;
sas_unregister_devs_sas_addr(dev, phy_id, last);
- return res;
+ goto out_free_resp;
case SMP_RESP_PHY_VACANT:
phy->phy_state = PHY_VACANT;
sas_unregister_devs_sas_addr(dev, phy_id, last);
- return res;
+ goto out_free_resp;
case SMP_RESP_FUNC_ACC:
break;
case -ECOMM:
break;
default:
- return res;
+ goto out_free_resp;
}
+ if (res == 0)
+ sas_get_sas_addr_and_dev_type(disc_resp, sas_addr, &type);
+
if ((SAS_ADDR(sas_addr) == 0) || (res == -ECOMM)) {
phy->phy_state = PHY_EMPTY;
sas_unregister_devs_sas_addr(dev, phy_id, last);
/*
- * Even though the PHY is empty, for convenience we discover
- * the PHY to update the PHY info, like negotiated linkrate.
+ * Even though the PHY is empty, for convenience we update
+ * the PHY info, like negotiated linkrate.
*/
- sas_ex_phy_discover(dev, phy_id);
- return res;
+ if (res == 0)
+ sas_set_ex_phy(dev, phy_id, disc_resp);
+ goto out_free_resp;
} else if (SAS_ADDR(sas_addr) == SAS_ADDR(phy->attached_sas_addr) &&
dev_type_flutter(type, phy->attached_dev_type)) {
struct domain_device *ata_dev = sas_ex_to_ata(dev, phy_id);
@@ -1989,7 +2003,7 @@ static int sas_rediscover_dev(struct domain_device *dev, int phy_id,
action = ", needs recovery";
pr_debug("ex %016llx phy%02d broadcast flutter%s\n",
SAS_ADDR(dev->sas_addr), phy_id, action);
- return res;
+ goto out_free_resp;
}
/* we always have to delete the old device when we went here */
@@ -1998,7 +2012,10 @@ static int sas_rediscover_dev(struct domain_device *dev, int phy_id,
SAS_ADDR(phy->attached_sas_addr));
sas_unregister_devs_sas_addr(dev, phy_id, last);
- return sas_discover_new(dev, phy_id);
+ res = sas_discover_new(dev, phy_id);
+out_free_resp:
+ kfree(disc_resp);
+ return res;
}
/**
diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index 30d20d37554f6d..98ca7df003efb3 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -1333,7 +1333,6 @@ struct lpfc_hba {
struct timer_list fabric_block_timer;
unsigned long bit_flags;
atomic_t num_rsrc_err;
- atomic_t num_cmd_success;
unsigned long last_rsrc_error_time;
unsigned long last_ramp_down_time;
#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
@@ -1438,6 +1437,7 @@ struct lpfc_hba {
struct timer_list inactive_vmid_poll;
/* RAS Support */
+ spinlock_t ras_fwlog_lock; /* do not take while holding another lock */
struct lpfc_ras_fwlog ras_fwlog;
uint32_t iocb_cnt;
diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index 365c7e96070bb7..3c534b3cfe9186 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -5865,9 +5865,9 @@ lpfc_ras_fwlog_buffsize_set(struct lpfc_hba *phba, uint val)
if (phba->cfg_ras_fwlog_func != PCI_FUNC(phba->pcidev->devfn))
return -EINVAL;
- spin_lock_irq(&phba->hbalock);
+ spin_lock_irq(&phba->ras_fwlog_lock);
state = phba->ras_fwlog.state;
- spin_unlock_irq(&phba->hbalock);
+ spin_unlock_irq(&phba->ras_fwlog_lock);
if (state == REG_INPROGRESS) {
lpfc_printf_log(phba, KERN_ERR, LOG_SLI, "6147 RAS Logging "
diff --git a/drivers/scsi/lpfc/lpfc_bsg.c b/drivers/scsi/lpfc/lpfc_bsg.c
index d80e6e81053b0a..529df1768fa89f 100644
--- a/drivers/scsi/lpfc/lpfc_bsg.c
+++ b/drivers/scsi/lpfc/lpfc_bsg.c
@@ -2513,7 +2513,7 @@ static int lpfcdiag_loop_self_reg(struct lpfc_hba *phba, uint16_t *rpi)
return -ENOMEM;
}
- dmabuff = (struct lpfc_dmabuf *)mbox->ctx_buf;
+ dmabuff = mbox->ctx_buf;
mbox->ctx_buf = NULL;
mbox->ctx_ndlp = NULL;
status = lpfc_sli_issue_mbox_wait(phba, mbox, LPFC_MBOX_TMO);
@@ -3169,10 +3169,10 @@ lpfc_bsg_diag_loopback_run(struct bsg_job *job)
}
cmdwqe = &cmdiocbq->wqe;
- memset(cmdwqe, 0, sizeof(union lpfc_wqe));
+ memset(cmdwqe, 0, sizeof(*cmdwqe));
if (phba->sli_rev < LPFC_SLI_REV4) {
rspwqe = &rspiocbq->wqe;
- memset(rspwqe, 0, sizeof(union lpfc_wqe));
+ memset(rspwqe, 0, sizeof(*rspwqe));
}
INIT_LIST_HEAD(&head);
@@ -3376,7 +3376,7 @@ lpfc_bsg_issue_mbox_cmpl(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmboxq)
unsigned long flags;
uint8_t *pmb, *pmb_buf;
- dd_data = pmboxq->ctx_ndlp;
+ dd_data = pmboxq->ctx_u.dd_data;
/*
* The outgoing buffer is readily referred from the dma buffer,
@@ -3553,7 +3553,7 @@ lpfc_bsg_issue_mbox_ext_handle_job(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmboxq)
struct lpfc_sli_config_mbox *sli_cfg_mbx;
uint8_t *pmbx;
- dd_data = pmboxq->ctx_buf;
+ dd_data = pmboxq->ctx_u.dd_data;
/* Determine if job has been aborted */
spin_lock_irqsave(&phba->ct_ev_lock, flags);
@@ -3940,7 +3940,7 @@ lpfc_bsg_sli_cfg_read_cmd_ext(struct lpfc_hba *phba, struct bsg_job *job,
pmboxq->mbox_cmpl = lpfc_bsg_issue_read_mbox_ext_cmpl;
/* context fields to callback function */
- pmboxq->ctx_buf = dd_data;
+ pmboxq->ctx_u.dd_data = dd_data;
dd_data->type = TYPE_MBOX;
dd_data->set_job = job;
dd_data->context_un.mbox.pmboxq = pmboxq;
@@ -4112,7 +4112,7 @@ lpfc_bsg_sli_cfg_write_cmd_ext(struct lpfc_hba *phba, struct bsg_job *job,
pmboxq->mbox_cmpl = lpfc_bsg_issue_write_mbox_ext_cmpl;
/* context fields to callback function */
- pmboxq->ctx_buf = dd_data;
+ pmboxq->ctx_u.dd_data = dd_data;
dd_data->type = TYPE_MBOX;
dd_data->set_job = job;
dd_data->context_un.mbox.pmboxq = pmboxq;
@@ -4460,7 +4460,7 @@ lpfc_bsg_write_ebuf_set(struct lpfc_hba *phba, struct bsg_job *job,
pmboxq->mbox_cmpl = lpfc_bsg_issue_write_mbox_ext_cmpl;
/* context fields to callback function */
- pmboxq->ctx_buf = dd_data;
+ pmboxq->ctx_u.dd_data = dd_data;
dd_data->type = TYPE_MBOX;
dd_data->set_job = job;
dd_data->context_un.mbox.pmboxq = pmboxq;
@@ -4747,7 +4747,7 @@ lpfc_bsg_issue_mbox(struct lpfc_hba *phba, struct bsg_job *job,
if (mbox_req->inExtWLen || mbox_req->outExtWLen) {
from = pmbx;
ext = from + sizeof(MAILBOX_t);
- pmboxq->ctx_buf = ext;
+ pmboxq->ext_buf = ext;
pmboxq->in_ext_byte_len =
mbox_req->inExtWLen * sizeof(uint32_t);
pmboxq->out_ext_byte_len =
@@ -4875,7 +4875,7 @@ lpfc_bsg_issue_mbox(struct lpfc_hba *phba, struct bsg_job *job,
pmboxq->mbox_cmpl = lpfc_bsg_issue_mbox_cmpl;
/* setup context field to pass wait_queue pointer to wake function */
- pmboxq->ctx_ndlp = dd_data;
+ pmboxq->ctx_u.dd_data = dd_data;
dd_data->type = TYPE_MBOX;
dd_data->set_job = job;
dd_data->context_un.mbox.pmboxq = pmboxq;
@@ -5070,12 +5070,12 @@ lpfc_bsg_get_ras_config(struct bsg_job *job)
bsg_reply->reply_data.vendor_reply.vendor_rsp;
/* Current logging state */
- spin_lock_irq(&phba->hbalock);
+ spin_lock_irq(&phba->ras_fwlog_lock);
if (ras_fwlog->state == ACTIVE)
ras_reply->state = LPFC_RASLOG_STATE_RUNNING;
else
ras_reply->state = LPFC_RASLOG_STATE_STOPPED;
- spin_unlock_irq(&phba->hbalock);
+ spin_unlock_irq(&phba->ras_fwlog_lock);
ras_reply->log_level = phba->ras_fwlog.fw_loglevel;
ras_reply->log_buff_sz = phba->cfg_ras_fwlog_buffsize;
@@ -5132,13 +5132,13 @@ lpfc_bsg_set_ras_config(struct bsg_job *job)
if (action == LPFC_RASACTION_STOP_LOGGING) {
/* Check if already disabled */
- spin_lock_irq(&phba->hbalock);
+ spin_lock_irq(&phba->ras_fwlog_lock);
if (ras_fwlog->state != ACTIVE) {
- spin_unlock_irq(&phba->hbalock);
+ spin_unlock_irq(&phba->ras_fwlog_lock);
rc = -ESRCH;
goto ras_job_error;
}
- spin_unlock_irq(&phba->hbalock);
+ spin_unlock_irq(&phba->ras_fwlog_lock);
/* Disable logging */
lpfc_ras_stop_fwlog(phba);
@@ -5149,10 +5149,10 @@ lpfc_bsg_set_ras_config(struct bsg_job *job)
* FW-logging with new log-level. Return status
* "Logging already Running" to caller.
**/
- spin_lock_irq(&phba->hbalock);
+ spin_lock_irq(&phba->ras_fwlog_lock);
if (ras_fwlog->state != INACTIVE)
action_status = -EINPROGRESS;
- spin_unlock_irq(&phba->hbalock);
+ spin_unlock_irq(&phba->ras_fwlog_lock);
/* Enable logging */
rc = lpfc_sli4_ras_fwlog_init(phba, log_level,
@@ -5268,13 +5268,13 @@ lpfc_bsg_get_ras_fwlog(struct bsg_job *job)
goto ras_job_error;
/* Logging to be stopped before reading */
- spin_lock_irq(&phba->hbalock);
+ spin_lock_irq(&phba->ras_fwlog_lock);
if (ras_fwlog->state == ACTIVE) {
- spin_unlock_irq(&phba->hbalock);
+ spin_unlock_irq(&phba->ras_fwlog_lock);
rc = -EINPROGRESS;
goto ras_job_error;
}
- spin_unlock_irq(&phba->hbalock);
+ spin_unlock_irq(&phba->ras_fwlog_lock);
if (job->request_len <
sizeof(struct fc_bsg_request) +
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c
index ab5af10c8a16ca..a2d2b02b34187f 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.c
+++ b/drivers/scsi/lpfc/lpfc_debugfs.c
@@ -2194,12 +2194,12 @@ static int lpfc_debugfs_ras_log_data(struct lpfc_hba *phba,
memset(buffer, 0, size);
- spin_lock_irq(&phba->hbalock);
+ spin_lock_irq(&phba->ras_fwlog_lock);
if (phba->ras_fwlog.state != ACTIVE) {
- spin_unlock_irq(&phba->hbalock);
+ spin_unlock_irq(&phba->ras_fwlog_lock);
return -EINVAL;
}
- spin_unlock_irq(&phba->hbalock);
+ spin_unlock_irq(&phba->ras_fwlog_lock);
list_for_each_entry_safe(dmabuf, next,
&phba->ras_fwlog.fwlog_buff_list, list) {
@@ -2250,13 +2250,13 @@ lpfc_debugfs_ras_log_open(struct inode *inode, struct file *file)
int size;
int rc = -ENOMEM;
- spin_lock_irq(&phba->hbalock);
+ spin_lock_irq(&phba->ras_fwlog_lock);
if (phba->ras_fwlog.state != ACTIVE) {
- spin_unlock_irq(&phba->hbalock);
+ spin_unlock_irq(&phba->ras_fwlog_lock);
rc = -EINVAL;
goto out;
}
- spin_unlock_irq(&phba->hbalock);
+ spin_unlock_irq(&phba->ras_fwlog_lock);
if (check_mul_overflow(LPFC_RAS_MIN_BUFF_POST_SIZE,
phba->cfg_ras_fwlog_buffsize, &size))
diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index 28e56542e0720e..f7c28dc73bf67d 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c
@@ -4437,23 +4437,23 @@ lpfc_els_retry_delay(struct timer_list *t)
unsigned long flags;
struct lpfc_work_evt *evtp = &ndlp->els_retry_evt;
+ /* Hold a node reference for outstanding queued work */
+ if (!lpfc_nlp_get(ndlp))
+ return;
+
spin_lock_irqsave(&phba->hbalock, flags);
if (!list_empty(&evtp->evt_listp)) {
spin_unlock_irqrestore(&phba->hbalock, flags);
+ lpfc_nlp_put(ndlp);
return;
}
- /* We need to hold the node by incrementing the reference
- * count until the queued work is done
- */
- evtp->evt_arg1 = lpfc_nlp_get(ndlp);
- if (evtp->evt_arg1) {
- evtp->evt = LPFC_EVT_ELS_RETRY;
- list_add_tail(&evtp->evt_listp, &phba->work_list);
- lpfc_worker_wake_up(phba);
- }
+ evtp->evt_arg1 = ndlp;
+ evtp->evt = LPFC_EVT_ELS_RETRY;
+ list_add_tail(&evtp->evt_listp, &phba->work_list);
spin_unlock_irqrestore(&phba->hbalock, flags);
- return;
+
+ lpfc_worker_wake_up(phba);
}
/**
@@ -7238,7 +7238,7 @@ lpfc_get_rdp_info(struct lpfc_hba *phba, struct lpfc_rdp_context *rdp_context)
goto rdp_fail;
mbox->vport = rdp_context->ndlp->vport;
mbox->mbox_cmpl = lpfc_mbx_cmpl_rdp_page_a0;
- mbox->ctx_ndlp = (struct lpfc_rdp_context *)rdp_context;
+ mbox->ctx_u.rdp = rdp_context;
rc = lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT);
if (rc == MBX_NOT_FINISHED) {
lpfc_mbox_rsrc_cleanup(phba, mbox, MBOX_THD_UNLOCKED);
@@ -7290,7 +7290,7 @@ int lpfc_get_sfp_info_wait(struct lpfc_hba *phba,
mbox->in_ext_byte_len = DMP_SFF_PAGE_A0_SIZE;
mbox->out_ext_byte_len = DMP_SFF_PAGE_A0_SIZE;
mbox->mbox_offset_word = 5;
- mbox->ctx_buf = virt;
+ mbox->ext_buf = virt;
} else {
bf_set(lpfc_mbx_memory_dump_type3_length,
&mbox->u.mqe.un.mem_dump_type3, DMP_SFF_PAGE_A0_SIZE);
@@ -7298,7 +7298,6 @@ int lpfc_get_sfp_info_wait(struct lpfc_hba *phba,
mbox->u.mqe.un.mem_dump_type3.addr_hi = putPaddrHigh(mp->phys);
}
mbox->vport = phba->pport;
- mbox->ctx_ndlp = (struct lpfc_rdp_context *)rdp_context;
rc = lpfc_sli_issue_mbox_wait(phba, mbox, 30);
if (rc == MBX_NOT_FINISHED) {
@@ -7307,7 +7306,7 @@ int lpfc_get_sfp_info_wait(struct lpfc_hba *phba,
}
if (phba->sli_rev == LPFC_SLI_REV4)
- mp = (struct lpfc_dmabuf *)(mbox->ctx_buf);
+ mp = mbox->ctx_buf;
else
mp = mpsave;
@@ -7350,7 +7349,7 @@ int lpfc_get_sfp_info_wait(struct lpfc_hba *phba,
mbox->in_ext_byte_len = DMP_SFF_PAGE_A2_SIZE;
mbox->out_ext_byte_len = DMP_SFF_PAGE_A2_SIZE;
mbox->mbox_offset_word = 5;
- mbox->ctx_buf = virt;
+ mbox->ext_buf = virt;
} else {
bf_set(lpfc_mbx_memory_dump_type3_length,
&mbox->u.mqe.un.mem_dump_type3, DMP_SFF_PAGE_A2_SIZE);
@@ -7358,7 +7357,6 @@ int lpfc_get_sfp_info_wait(struct lpfc_hba *phba,
mbox->u.mqe.un.mem_dump_type3.addr_hi = putPaddrHigh(mp->phys);
}
- mbox->ctx_ndlp = (struct lpfc_rdp_context *)rdp_context;
rc = lpfc_sli_issue_mbox_wait(phba, mbox, 30);
if (bf_get(lpfc_mqe_status, &mbox->u.mqe)) {
rc = 1;
@@ -7500,9 +7498,9 @@ lpfc_els_lcb_rsp(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
int rc;
mb = &pmb->u.mb;
- lcb_context = (struct lpfc_lcb_context *)pmb->ctx_ndlp;
+ lcb_context = pmb->ctx_u.lcb;
ndlp = lcb_context->ndlp;
- pmb->ctx_ndlp = NULL;
+ memset(&pmb->ctx_u, 0, sizeof(pmb->ctx_u));
pmb->ctx_buf = NULL;
shdr = (union lpfc_sli4_cfg_shdr *)
@@ -7642,7 +7640,7 @@ lpfc_sli4_set_beacon(struct lpfc_vport *vport,
lpfc_sli4_config(phba, mbox, LPFC_MBOX_SUBSYSTEM_COMMON,
LPFC_MBOX_OPCODE_SET_BEACON_CONFIG, len,
LPFC_SLI4_MBX_EMBED);
- mbox->ctx_ndlp = (void *)lcb_context;
+ mbox->ctx_u.lcb = lcb_context;
mbox->vport = phba->pport;
mbox->mbox_cmpl = lpfc_els_lcb_rsp;
bf_set(lpfc_mbx_set_beacon_port_num, &mbox->u.mqe.un.beacon_config,
@@ -8639,9 +8637,9 @@ lpfc_els_rsp_rls_acc(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
mb = &pmb->u.mb;
ndlp = pmb->ctx_ndlp;
- rxid = (uint16_t)((unsigned long)(pmb->ctx_buf) & 0xffff);
- oxid = (uint16_t)(((unsigned long)(pmb->ctx_buf) >> 16) & 0xffff);
- pmb->ctx_buf = NULL;
+ rxid = (uint16_t)(pmb->ctx_u.ox_rx_id & 0xffff);
+ oxid = (uint16_t)((pmb->ctx_u.ox_rx_id >> 16) & 0xffff);
+ memset(&pmb->ctx_u, 0, sizeof(pmb->ctx_u));
pmb->ctx_ndlp = NULL;
if (mb->mbxStatus) {
@@ -8745,8 +8743,7 @@ lpfc_els_rcv_rls(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb,
mbox = mempool_alloc(phba->mbox_mem_pool, GFP_ATOMIC);
if (mbox) {
lpfc_read_lnk_stat(phba, mbox);
- mbox->ctx_buf = (void *)((unsigned long)
- (ox_id << 16 | ctx));
+ mbox->ctx_u.ox_rx_id = ox_id << 16 | ctx;
mbox->ctx_ndlp = lpfc_nlp_get(ndlp);
if (!mbox->ctx_ndlp)
goto node_err;
diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index a7a2309a629faf..e42fa9c822b502 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -257,7 +257,9 @@ lpfc_dev_loss_tmo_callbk(struct fc_rport *rport)
if (evtp->evt_arg1) {
evtp->evt = LPFC_EVT_DEV_LOSS;
list_add_tail(&evtp->evt_listp, &phba->work_list);
+ spin_unlock_irqrestore(&phba->hbalock, iflags);
lpfc_worker_wake_up(phba);
+ return;
}
spin_unlock_irqrestore(&phba->hbalock, iflags);
} else {
@@ -275,10 +277,7 @@ lpfc_dev_loss_tmo_callbk(struct fc_rport *rport)
lpfc_disc_state_machine(vport, ndlp, NULL,
NLP_EVT_DEVICE_RM);
}
-
}
-
- return;
}
/**
@@ -3429,7 +3428,7 @@ static void
lpfc_mbx_cmpl_read_sparam(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
{
MAILBOX_t *mb = &pmb->u.mb;
- struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *)pmb->ctx_buf;
+ struct lpfc_dmabuf *mp = pmb->ctx_buf;
struct lpfc_vport *vport = pmb->vport;
struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
struct serv_parm *sp = &vport->fc_sparam;
@@ -3737,7 +3736,7 @@ lpfc_mbx_cmpl_read_topology(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
struct lpfc_mbx_read_top *la;
struct lpfc_sli_ring *pring;
MAILBOX_t *mb = &pmb->u.mb;
- struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *)(pmb->ctx_buf);
+ struct lpfc_dmabuf *mp = pmb->ctx_buf;
uint8_t attn_type;
/* Unblock ELS traffic */
@@ -3851,8 +3850,8 @@ void
lpfc_mbx_cmpl_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
{
struct lpfc_vport *vport = pmb->vport;
- struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *)pmb->ctx_buf;
- struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *)pmb->ctx_ndlp;
+ struct lpfc_dmabuf *mp = pmb->ctx_buf;
+ struct lpfc_nodelist *ndlp = pmb->ctx_ndlp;
/* The driver calls the state machine with the pmb pointer
* but wants to make sure a stale ctx_buf isn't acted on.
@@ -4066,7 +4065,7 @@ lpfc_create_static_vport(struct lpfc_hba *phba)
* the dump routine is a single-use construct.
*/
if (pmb->ctx_buf) {
- mp = (struct lpfc_dmabuf *)pmb->ctx_buf;
+ mp = pmb->ctx_buf;
lpfc_mbuf_free(phba, mp->virt, mp->phys);
kfree(mp);
pmb->ctx_buf = NULL;
@@ -4089,7 +4088,7 @@ lpfc_create_static_vport(struct lpfc_hba *phba)
if (phba->sli_rev == LPFC_SLI_REV4) {
byte_count = pmb->u.mqe.un.mb_words[5];
- mp = (struct lpfc_dmabuf *)pmb->ctx_buf;
+ mp = pmb->ctx_buf;
if (byte_count > sizeof(struct static_vport_info) -
offset)
byte_count = sizeof(struct static_vport_info)
@@ -4169,7 +4168,7 @@ lpfc_mbx_cmpl_fabric_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
{
struct lpfc_vport *vport = pmb->vport;
MAILBOX_t *mb = &pmb->u.mb;
- struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *)pmb->ctx_ndlp;
+ struct lpfc_nodelist *ndlp = pmb->ctx_ndlp;
pmb->ctx_ndlp = NULL;
@@ -4307,7 +4306,7 @@ void
lpfc_mbx_cmpl_ns_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
{
MAILBOX_t *mb = &pmb->u.mb;
- struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *)pmb->ctx_ndlp;
+ struct lpfc_nodelist *ndlp = pmb->ctx_ndlp;
struct lpfc_vport *vport = pmb->vport;
int rc;
@@ -4431,7 +4430,7 @@ lpfc_mbx_cmpl_fc_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
{
struct lpfc_vport *vport = pmb->vport;
MAILBOX_t *mb = &pmb->u.mb;
- struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *)pmb->ctx_ndlp;
+ struct lpfc_nodelist *ndlp = pmb->ctx_ndlp;
pmb->ctx_ndlp = NULL;
if (mb->mbxStatus) {
@@ -5174,7 +5173,7 @@ lpfc_nlp_logo_unreg(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
struct lpfc_vport *vport = pmb->vport;
struct lpfc_nodelist *ndlp;
- ndlp = (struct lpfc_nodelist *)(pmb->ctx_ndlp);
+ ndlp = pmb->ctx_ndlp;
if (!ndlp)
return;
lpfc_issue_els_logo(vport, ndlp, 0);
@@ -5496,7 +5495,7 @@ lpfc_cleanup_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
if ((mb = phba->sli.mbox_active)) {
if ((mb->u.mb.mbxCommand == MBX_REG_LOGIN64) &&
!(mb->mbox_flag & LPFC_MBX_IMED_UNREG) &&
- (ndlp == (struct lpfc_nodelist *)mb->ctx_ndlp)) {
+ (ndlp == mb->ctx_ndlp)) {
mb->ctx_ndlp = NULL;
mb->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
}
@@ -5507,7 +5506,7 @@ lpfc_cleanup_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
list_for_each_entry(mb, &phba->sli.mboxq_cmpl, list) {
if ((mb->u.mb.mbxCommand != MBX_REG_LOGIN64) ||
(mb->mbox_flag & LPFC_MBX_IMED_UNREG) ||
- (ndlp != (struct lpfc_nodelist *)mb->ctx_ndlp))
+ (ndlp != mb->ctx_ndlp))
continue;
mb->ctx_ndlp = NULL;
@@ -5517,7 +5516,7 @@ lpfc_cleanup_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
list_for_each_entry_safe(mb, nextmb, &phba->sli.mboxq, list) {
if ((mb->u.mb.mbxCommand == MBX_REG_LOGIN64) &&
!(mb->mbox_flag & LPFC_MBX_IMED_UNREG) &&
- (ndlp == (struct lpfc_nodelist *)mb->ctx_ndlp)) {
+ (ndlp == mb->ctx_ndlp)) {
list_del(&mb->list);
lpfc_mbox_rsrc_cleanup(phba, mb, MBOX_THD_LOCKED);
@@ -6357,7 +6356,7 @@ void
lpfc_mbx_cmpl_fdmi_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
{
MAILBOX_t *mb = &pmb->u.mb;
- struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *)pmb->ctx_ndlp;
+ struct lpfc_nodelist *ndlp = pmb->ctx_ndlp;
struct lpfc_vport *vport = pmb->vport;
pmb->ctx_ndlp = NULL;
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 88b2e57d90c2e3..f7a0aa3625f4e1 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -460,7 +460,7 @@ lpfc_config_port_post(struct lpfc_hba *phba)
return -EIO;
}
- mp = (struct lpfc_dmabuf *)pmb->ctx_buf;
+ mp = pmb->ctx_buf;
/* This dmabuf was allocated by lpfc_read_sparam. The dmabuf is no
* longer needed. Prevent unintended ctx_buf access as the mbox is
@@ -2217,7 +2217,7 @@ lpfc_handle_latt(struct lpfc_hba *phba)
/* Cleanup any outstanding ELS commands */
lpfc_els_flush_all_cmd(phba);
psli->slistat.link_event++;
- lpfc_read_topology(phba, pmb, (struct lpfc_dmabuf *)pmb->ctx_buf);
+ lpfc_read_topology(phba, pmb, pmb->ctx_buf);
pmb->mbox_cmpl = lpfc_mbx_cmpl_read_topology;
pmb->vport = vport;
/* Block ELS IOCBs until we have processed this mbox command */
@@ -5454,7 +5454,7 @@ lpfc_sli4_async_link_evt(struct lpfc_hba *phba,
phba->sli.slistat.link_event++;
/* Create lpfc_handle_latt mailbox command from link ACQE */
- lpfc_read_topology(phba, pmb, (struct lpfc_dmabuf *)pmb->ctx_buf);
+ lpfc_read_topology(phba, pmb, pmb->ctx_buf);
pmb->mbox_cmpl = lpfc_mbx_cmpl_read_topology;
pmb->vport = phba->pport;
@@ -6347,7 +6347,7 @@ lpfc_sli4_async_fc_evt(struct lpfc_hba *phba, struct lpfc_acqe_fc_la *acqe_fc)
phba->sli.slistat.link_event++;
/* Create lpfc_handle_latt mailbox command from link ACQE */
- lpfc_read_topology(phba, pmb, (struct lpfc_dmabuf *)pmb->ctx_buf);
+ lpfc_read_topology(phba, pmb, pmb->ctx_buf);
pmb->mbox_cmpl = lpfc_mbx_cmpl_read_topology;
pmb->vport = phba->pport;
@@ -7705,6 +7705,9 @@ lpfc_setup_driver_resource_phase1(struct lpfc_hba *phba)
"NVME" : " "),
(phba->nvmet_support ? "NVMET" : " "));
+ /* ras_fwlog state */
+ spin_lock_init(&phba->ras_fwlog_lock);
+
/* Initialize the IO buffer list used by driver for SLI3 SCSI */
spin_lock_init(&phba->scsi_buf_list_get_lock);
INIT_LIST_HEAD(&phba->lpfc_scsi_buf_list_get);
@@ -13055,7 +13058,7 @@ lpfc_sli4_enable_msix(struct lpfc_hba *phba)
rc = request_threaded_irq(eqhdl->irq,
&lpfc_sli4_hba_intr_handler,
&lpfc_sli4_hba_intr_handler_th,
- IRQF_ONESHOT, name, eqhdl);
+ 0, name, eqhdl);
if (rc) {
lpfc_printf_log(phba, KERN_WARNING, LOG_INIT,
"0486 MSI-X fast-path (%d) "
diff --git a/drivers/scsi/lpfc/lpfc_mbox.c b/drivers/scsi/lpfc/lpfc_mbox.c
index f7c41958036bb7..e98f1c2b22202e 100644
--- a/drivers/scsi/lpfc/lpfc_mbox.c
+++ b/drivers/scsi/lpfc/lpfc_mbox.c
@@ -102,7 +102,7 @@ lpfc_mbox_rsrc_cleanup(struct lpfc_hba *phba, LPFC_MBOXQ_t *mbox,
{
struct lpfc_dmabuf *mp;
- mp = (struct lpfc_dmabuf *)mbox->ctx_buf;
+ mp = mbox->ctx_buf;
mbox->ctx_buf = NULL;
/* Release the generic BPL buffer memory. */
@@ -204,10 +204,8 @@ lpfc_dump_mem(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb, uint16_t offset,
uint16_t region_id)
{
MAILBOX_t *mb;
- void *ctx;
mb = &pmb->u.mb;
- ctx = pmb->ctx_buf;
/* Setup to dump VPD region */
memset(pmb, 0, sizeof (LPFC_MBOXQ_t));
@@ -219,7 +217,6 @@ lpfc_dump_mem(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb, uint16_t offset,
mb->un.varDmp.word_cnt = (DMP_RSP_SIZE / sizeof (uint32_t));
mb->un.varDmp.co = 0;
mb->un.varDmp.resp_offset = 0;
- pmb->ctx_buf = ctx;
mb->mbxOwner = OWN_HOST;
return;
}
@@ -236,11 +233,8 @@ void
lpfc_dump_wakeup_param(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
{
MAILBOX_t *mb;
- void *ctx;
mb = &pmb->u.mb;
- /* Save context so that we can restore after memset */
- ctx = pmb->ctx_buf;
/* Setup to dump VPD region */
memset(pmb, 0, sizeof(LPFC_MBOXQ_t));
@@ -254,7 +248,6 @@ lpfc_dump_wakeup_param(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
mb->un.varDmp.word_cnt = WAKE_UP_PARMS_WORD_SIZE;
mb->un.varDmp.co = 0;
mb->un.varDmp.resp_offset = 0;
- pmb->ctx_buf = ctx;
return;
}
@@ -372,7 +365,7 @@ lpfc_read_topology(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb,
/* Save address for later completion and set the owner to host so that
* the FW knows this mailbox is available for processing.
*/
- pmb->ctx_buf = (uint8_t *)mp;
+ pmb->ctx_buf = mp;
mb->mbxOwner = OWN_HOST;
return (0);
}
@@ -1816,7 +1809,7 @@ lpfc_sli4_mbox_cmd_free(struct lpfc_hba *phba, struct lpfcMboxq *mbox)
}
/* Reinitialize the context pointers to avoid stale usage. */
mbox->ctx_buf = NULL;
- mbox->context3 = NULL;
+ memset(&mbox->ctx_u, 0, sizeof(mbox->ctx_u));
kfree(mbox->sge_array);
/* Finally, free the mailbox command itself */
mempool_free(mbox, phba->mbox_mem_pool);
@@ -2366,8 +2359,7 @@ lpfc_mbx_cmpl_rdp_link_stat(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq)
{
MAILBOX_t *mb;
int rc = FAILURE;
- struct lpfc_rdp_context *rdp_context =
- (struct lpfc_rdp_context *)(mboxq->ctx_ndlp);
+ struct lpfc_rdp_context *rdp_context = mboxq->ctx_u.rdp;
mb = &mboxq->u.mb;
if (mb->mbxStatus)
@@ -2385,9 +2377,8 @@ mbx_failed:
static void
lpfc_mbx_cmpl_rdp_page_a2(struct lpfc_hba *phba, LPFC_MBOXQ_t *mbox)
{
- struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *)mbox->ctx_buf;
- struct lpfc_rdp_context *rdp_context =
- (struct lpfc_rdp_context *)(mbox->ctx_ndlp);
+ struct lpfc_dmabuf *mp = mbox->ctx_buf;
+ struct lpfc_rdp_context *rdp_context = mbox->ctx_u.rdp;
if (bf_get(lpfc_mqe_status, &mbox->u.mqe))
goto error_mbox_free;
@@ -2401,7 +2392,7 @@ lpfc_mbx_cmpl_rdp_page_a2(struct lpfc_hba *phba, LPFC_MBOXQ_t *mbox)
/* Save the dma buffer for cleanup in the final completion. */
mbox->ctx_buf = mp;
mbox->mbox_cmpl = lpfc_mbx_cmpl_rdp_link_stat;
- mbox->ctx_ndlp = (struct lpfc_rdp_context *)rdp_context;
+ mbox->ctx_u.rdp = rdp_context;
if (lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT) == MBX_NOT_FINISHED)
goto error_mbox_free;
@@ -2416,9 +2407,8 @@ void
lpfc_mbx_cmpl_rdp_page_a0(struct lpfc_hba *phba, LPFC_MBOXQ_t *mbox)
{
int rc;
- struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *)(mbox->ctx_buf);
- struct lpfc_rdp_context *rdp_context =
- (struct lpfc_rdp_context *)(mbox->ctx_ndlp);
+ struct lpfc_dmabuf *mp = mbox->ctx_buf;
+ struct lpfc_rdp_context *rdp_context = mbox->ctx_u.rdp;
if (bf_get(lpfc_mqe_status, &mbox->u.mqe))
goto error;
@@ -2448,7 +2438,7 @@ lpfc_mbx_cmpl_rdp_page_a0(struct lpfc_hba *phba, LPFC_MBOXQ_t *mbox)
mbox->u.mqe.un.mem_dump_type3.addr_hi = putPaddrHigh(mp->phys);
mbox->mbox_cmpl = lpfc_mbx_cmpl_rdp_page_a2;
- mbox->ctx_ndlp = (struct lpfc_rdp_context *)rdp_context;
+ mbox->ctx_u.rdp = rdp_context;
rc = lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT);
if (rc == MBX_NOT_FINISHED)
goto error;
diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c
index 8e425be7c7c99c..c4172791c26751 100644
--- a/drivers/scsi/lpfc/lpfc_nportdisc.c
+++ b/drivers/scsi/lpfc/lpfc_nportdisc.c
@@ -300,7 +300,7 @@ lpfc_defer_plogi_acc(struct lpfc_hba *phba, LPFC_MBOXQ_t *login_mbox)
int rc;
ndlp = login_mbox->ctx_ndlp;
- save_iocb = login_mbox->context3;
+ save_iocb = login_mbox->ctx_u.save_iocb;
if (mb->mbxStatus == MBX_SUCCESS) {
/* Now that REG_RPI completed successfully,
@@ -640,7 +640,7 @@ lpfc_rcv_plogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
if (!login_mbox->ctx_ndlp)
goto out;
- login_mbox->context3 = save_iocb; /* For PLOGI ACC */
+ login_mbox->ctx_u.save_iocb = save_iocb; /* For PLOGI ACC */
spin_lock_irq(&ndlp->lock);
ndlp->nlp_flag |= (NLP_ACC_REGLOGIN | NLP_RCV_PLOGI);
@@ -682,8 +682,8 @@ lpfc_mbx_cmpl_resume_rpi(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq)
struct lpfc_nodelist *ndlp;
uint32_t cmd;
- elsiocb = (struct lpfc_iocbq *)mboxq->ctx_buf;
- ndlp = (struct lpfc_nodelist *)mboxq->ctx_ndlp;
+ elsiocb = mboxq->ctx_u.save_iocb;
+ ndlp = mboxq->ctx_ndlp;
vport = mboxq->vport;
cmd = elsiocb->drvrTimeout;
@@ -1875,7 +1875,7 @@ lpfc_rcv_logo_reglogin_issue(struct lpfc_vport *vport,
/* cleanup any ndlp on mbox q waiting for reglogin cmpl */
if ((mb = phba->sli.mbox_active)) {
if ((mb->u.mb.mbxCommand == MBX_REG_LOGIN64) &&
- (ndlp == (struct lpfc_nodelist *)mb->ctx_ndlp)) {
+ (ndlp == mb->ctx_ndlp)) {
ndlp->nlp_flag &= ~NLP_REG_LOGIN_SEND;
lpfc_nlp_put(ndlp);
mb->ctx_ndlp = NULL;
@@ -1886,7 +1886,7 @@ lpfc_rcv_logo_reglogin_issue(struct lpfc_vport *vport,
spin_lock_irq(&phba->hbalock);
list_for_each_entry_safe(mb, nextmb, &phba->sli.mboxq, list) {
if ((mb->u.mb.mbxCommand == MBX_REG_LOGIN64) &&
- (ndlp == (struct lpfc_nodelist *)mb->ctx_ndlp)) {
+ (ndlp == mb->ctx_ndlp)) {
ndlp->nlp_flag &= ~NLP_REG_LOGIN_SEND;
lpfc_nlp_put(ndlp);
list_del(&mb->list);
diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c
index 09c53b85bcb8d6..c5792eaf3f64cb 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.c
+++ b/drivers/scsi/lpfc/lpfc_nvme.c
@@ -2616,9 +2616,9 @@ lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
/* No concern about the role change on the nvme remoteport.
* The transport will update it.
*/
- spin_lock_irq(&vport->phba->hbalock);
+ spin_lock_irq(&ndlp->lock);
ndlp->fc4_xpt_flags |= NVME_XPT_UNREG_WAIT;
- spin_unlock_irq(&vport->phba->hbalock);
+ spin_unlock_irq(&ndlp->lock);
/* Don't let the host nvme transport keep sending keep-alives
* on this remoteport. Vport is unloading, no recovery. The
diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c
index 8258b771bd009e..561ced5503c634 100644
--- a/drivers/scsi/lpfc/lpfc_nvmet.c
+++ b/drivers/scsi/lpfc/lpfc_nvmet.c
@@ -1586,7 +1586,7 @@ lpfc_nvmet_setup_io_context(struct lpfc_hba *phba)
wqe = &nvmewqe->wqe;
/* Initialize WQE */
- memset(wqe, 0, sizeof(union lpfc_wqe));
+ memset(wqe, 0, sizeof(*wqe));
ctx_buf->iocbq->cmd_dmabuf = NULL;
spin_lock(&phba->sli4_hba.sgl_list_lock);
diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index c0038eaae7b0ae..4a6e5223a22418 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -167,11 +167,10 @@ lpfc_ramp_down_queue_handler(struct lpfc_hba *phba)
struct Scsi_Host *shost;
struct scsi_device *sdev;
unsigned long new_queue_depth;
- unsigned long num_rsrc_err, num_cmd_success;
+ unsigned long num_rsrc_err;
int i;
num_rsrc_err = atomic_read(&phba->num_rsrc_err);
- num_cmd_success = atomic_read(&phba->num_cmd_success);
/*
* The error and success command counters are global per
@@ -186,20 +185,16 @@ lpfc_ramp_down_queue_handler(struct lpfc_hba *phba)
for (i = 0; i <= phba->max_vports && vports[i] != NULL; i++) {
shost = lpfc_shost_from_vport(vports[i]);
shost_for_each_device(sdev, shost) {
- new_queue_depth =
- sdev->queue_depth * num_rsrc_err /
- (num_rsrc_err + num_cmd_success);
- if (!new_queue_depth)
- new_queue_depth = sdev->queue_depth - 1;
+ if (num_rsrc_err >= sdev->queue_depth)
+ new_queue_depth = 1;
else
new_queue_depth = sdev->queue_depth -
- new_queue_depth;
+ num_rsrc_err;
scsi_change_queue_depth(sdev, new_queue_depth);
}
}
lpfc_destroy_vport_work_array(phba, vports);
atomic_set(&phba->num_rsrc_err, 0);
- atomic_set(&phba->num_cmd_success, 0);
}
/**
@@ -5336,16 +5331,6 @@ lpfc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *cmnd)
}
err = lpfc_bg_scsi_prep_dma_buf(phba, lpfc_cmd);
} else {
- if (vport->phba->cfg_enable_bg) {
- lpfc_printf_vlog(vport,
- KERN_INFO, LOG_SCSI_CMD,
- "9038 BLKGRD: rcvd PROT_NORMAL cmd: "
- "x%x reftag x%x cnt %u pt %x\n",
- cmnd->cmnd[0],
- scsi_prot_ref_tag(cmnd),
- scsi_logical_block_count(cmnd),
- (cmnd->cmnd[1]>>5));
- }
err = lpfc_scsi_prep_dma_buf(phba, lpfc_cmd);
}
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 1f8a9b5945cbae..a028e008dd1ee8 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -1217,9 +1217,9 @@ lpfc_set_rrq_active(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp,
empty = list_empty(&phba->active_rrq_list);
list_add_tail(&rrq->list, &phba->active_rrq_list);
phba->hba_flag |= HBA_RRQ_ACTIVE;
+ spin_unlock_irqrestore(&phba->hbalock, iflags);
if (empty)
lpfc_worker_wake_up(phba);
- spin_unlock_irqrestore(&phba->hbalock, iflags);
return 0;
out:
spin_unlock_irqrestore(&phba->hbalock, iflags);
@@ -2830,7 +2830,7 @@ lpfc_sli_wake_mbox_wait(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmboxq)
*/
pmboxq->mbox_flag |= LPFC_MBX_WAKE;
spin_lock_irqsave(&phba->hbalock, drvr_flag);
- pmbox_done = (struct completion *)pmboxq->context3;
+ pmbox_done = pmboxq->ctx_u.mbox_wait;
if (pmbox_done)
complete(pmbox_done);
spin_unlock_irqrestore(&phba->hbalock, drvr_flag);
@@ -2885,7 +2885,7 @@ lpfc_sli_def_mbox_cmpl(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
if (!test_bit(FC_UNLOADING, &phba->pport->load_flag) &&
pmb->u.mb.mbxCommand == MBX_REG_LOGIN64 &&
!pmb->u.mb.mbxStatus) {
- mp = (struct lpfc_dmabuf *)pmb->ctx_buf;
+ mp = pmb->ctx_buf;
if (mp) {
pmb->ctx_buf = NULL;
lpfc_mbuf_free(phba, mp->virt, mp->phys);
@@ -2914,12 +2914,12 @@ lpfc_sli_def_mbox_cmpl(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
}
if (pmb->u.mb.mbxCommand == MBX_REG_LOGIN64) {
- ndlp = (struct lpfc_nodelist *)pmb->ctx_ndlp;
+ ndlp = pmb->ctx_ndlp;
lpfc_nlp_put(ndlp);
}
if (pmb->u.mb.mbxCommand == MBX_UNREG_LOGIN) {
- ndlp = (struct lpfc_nodelist *)pmb->ctx_ndlp;
+ ndlp = pmb->ctx_ndlp;
/* Check to see if there are any deferred events to process */
if (ndlp) {
@@ -2952,7 +2952,7 @@ lpfc_sli_def_mbox_cmpl(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
/* This nlp_put pairs with lpfc_sli4_resume_rpi */
if (pmb->u.mb.mbxCommand == MBX_RESUME_RPI) {
- ndlp = (struct lpfc_nodelist *)pmb->ctx_ndlp;
+ ndlp = pmb->ctx_ndlp;
lpfc_nlp_put(ndlp);
}
@@ -5819,7 +5819,7 @@ lpfc_sli4_read_fcoe_params(struct lpfc_hba *phba)
goto out_free_mboxq;
}
- mp = (struct lpfc_dmabuf *)mboxq->ctx_buf;
+ mp = mboxq->ctx_buf;
rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL);
lpfc_printf_log(phba, KERN_INFO, LOG_MBOX | LOG_SLI,
@@ -6849,9 +6849,9 @@ lpfc_ras_stop_fwlog(struct lpfc_hba *phba)
{
struct lpfc_ras_fwlog *ras_fwlog = &phba->ras_fwlog;
- spin_lock_irq(&phba->hbalock);
+ spin_lock_irq(&phba->ras_fwlog_lock);
ras_fwlog->state = INACTIVE;
- spin_unlock_irq(&phba->hbalock);
+ spin_unlock_irq(&phba->ras_fwlog_lock);
/* Disable FW logging to host memory */
writel(LPFC_CTL_PDEV_CTL_DDL_RAS,
@@ -6894,9 +6894,9 @@ lpfc_sli4_ras_dma_free(struct lpfc_hba *phba)
ras_fwlog->lwpd.virt = NULL;
}
- spin_lock_irq(&phba->hbalock);
+ spin_lock_irq(&phba->ras_fwlog_lock);
ras_fwlog->state = INACTIVE;
- spin_unlock_irq(&phba->hbalock);
+ spin_unlock_irq(&phba->ras_fwlog_lock);
}
/**
@@ -6998,9 +6998,9 @@ lpfc_sli4_ras_mbox_cmpl(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
goto disable_ras;
}
- spin_lock_irq(&phba->hbalock);
+ spin_lock_irq(&phba->ras_fwlog_lock);
ras_fwlog->state = ACTIVE;
- spin_unlock_irq(&phba->hbalock);
+ spin_unlock_irq(&phba->ras_fwlog_lock);
mempool_free(pmb, phba->mbox_mem_pool);
return;
@@ -7032,9 +7032,9 @@ lpfc_sli4_ras_fwlog_init(struct lpfc_hba *phba,
uint32_t len = 0, fwlog_buffsize, fwlog_entry_count;
int rc = 0;
- spin_lock_irq(&phba->hbalock);
+ spin_lock_irq(&phba->ras_fwlog_lock);
ras_fwlog->state = INACTIVE;
- spin_unlock_irq(&phba->hbalock);
+ spin_unlock_irq(&phba->ras_fwlog_lock);
fwlog_buffsize = (LPFC_RAS_MIN_BUFF_POST_SIZE *
phba->cfg_ras_fwlog_buffsize);
@@ -7095,9 +7095,9 @@ lpfc_sli4_ras_fwlog_init(struct lpfc_hba *phba,
mbx_fwlog->u.request.lwpd.addr_lo = putPaddrLow(ras_fwlog->lwpd.phys);
mbx_fwlog->u.request.lwpd.addr_hi = putPaddrHigh(ras_fwlog->lwpd.phys);
- spin_lock_irq(&phba->hbalock);
+ spin_lock_irq(&phba->ras_fwlog_lock);
ras_fwlog->state = REG_INPROGRESS;
- spin_unlock_irq(&phba->hbalock);
+ spin_unlock_irq(&phba->ras_fwlog_lock);
mbox->vport = phba->pport;
mbox->mbox_cmpl = lpfc_sli4_ras_mbox_cmpl;
@@ -8766,7 +8766,7 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba)
mboxq->vport = vport;
rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL);
- mp = (struct lpfc_dmabuf *)mboxq->ctx_buf;
+ mp = mboxq->ctx_buf;
if (rc == MBX_SUCCESS) {
memcpy(&vport->fc_sparam, mp->virt, sizeof(struct serv_parm));
rc = 0;
@@ -9548,8 +9548,8 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox,
}
/* Copy the mailbox extension data */
- if (pmbox->in_ext_byte_len && pmbox->ctx_buf) {
- lpfc_sli_pcimem_bcopy(pmbox->ctx_buf,
+ if (pmbox->in_ext_byte_len && pmbox->ext_buf) {
+ lpfc_sli_pcimem_bcopy(pmbox->ext_buf,
(uint8_t *)phba->mbox_ext,
pmbox->in_ext_byte_len);
}
@@ -9562,10 +9562,10 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox,
= MAILBOX_HBA_EXT_OFFSET;
/* Copy the mailbox extension data */
- if (pmbox->in_ext_byte_len && pmbox->ctx_buf)
+ if (pmbox->in_ext_byte_len && pmbox->ext_buf)
lpfc_memcpy_to_slim(phba->MBslimaddr +
MAILBOX_HBA_EXT_OFFSET,
- pmbox->ctx_buf, pmbox->in_ext_byte_len);
+ pmbox->ext_buf, pmbox->in_ext_byte_len);
if (mbx->mbxCommand == MBX_CONFIG_PORT)
/* copy command data into host mbox for cmpl */
@@ -9688,9 +9688,9 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox,
lpfc_sli_pcimem_bcopy(phba->mbox, mbx,
MAILBOX_CMD_SIZE);
/* Copy the mailbox extension data */
- if (pmbox->out_ext_byte_len && pmbox->ctx_buf) {
+ if (pmbox->out_ext_byte_len && pmbox->ext_buf) {
lpfc_sli_pcimem_bcopy(phba->mbox_ext,
- pmbox->ctx_buf,
+ pmbox->ext_buf,
pmbox->out_ext_byte_len);
}
} else {
@@ -9698,9 +9698,9 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox,
lpfc_memcpy_from_slim(mbx, phba->MBslimaddr,
MAILBOX_CMD_SIZE);
/* Copy the mailbox extension data */
- if (pmbox->out_ext_byte_len && pmbox->ctx_buf) {
+ if (pmbox->out_ext_byte_len && pmbox->ext_buf) {
lpfc_memcpy_from_slim(
- pmbox->ctx_buf,
+ pmbox->ext_buf,
phba->MBslimaddr +
MAILBOX_HBA_EXT_OFFSET,
pmbox->out_ext_byte_len);
@@ -11373,18 +11373,18 @@ lpfc_sli_post_recovery_event(struct lpfc_hba *phba,
unsigned long iflags;
struct lpfc_work_evt *evtp = &ndlp->recovery_evt;
+ /* Hold a node reference for outstanding queued work */
+ if (!lpfc_nlp_get(ndlp))
+ return;
+
spin_lock_irqsave(&phba->hbalock, iflags);
if (!list_empty(&evtp->evt_listp)) {
spin_unlock_irqrestore(&phba->hbalock, iflags);
+ lpfc_nlp_put(ndlp);
return;
}
- /* Incrementing the reference count until the queued work is done. */
- evtp->evt_arg1 = lpfc_nlp_get(ndlp);
- if (!evtp->evt_arg1) {
- spin_unlock_irqrestore(&phba->hbalock, iflags);
- return;
- }
+ evtp->evt_arg1 = ndlp;
evtp->evt = LPFC_EVT_RECOVER_PORT;
list_add_tail(&evtp->evt_listp, &phba->work_list);
spin_unlock_irqrestore(&phba->hbalock, iflags);
@@ -13262,9 +13262,9 @@ lpfc_sli_issue_mbox_wait(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmboxq,
/* setup wake call as IOCB callback */
pmboxq->mbox_cmpl = lpfc_sli_wake_mbox_wait;
- /* setup context3 field to pass wait_queue pointer to wake function */
+ /* setup ctx_u field to pass wait_queue pointer to wake function */
init_completion(&mbox_done);
- pmboxq->context3 = &mbox_done;
+ pmboxq->ctx_u.mbox_wait = &mbox_done;
/* now issue the command */
retval = lpfc_sli_issue_mbox(phba, pmboxq, MBX_NOWAIT);
if (retval == MBX_BUSY || retval == MBX_SUCCESS) {
@@ -13272,7 +13272,7 @@ lpfc_sli_issue_mbox_wait(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmboxq,
msecs_to_jiffies(timeout * 1000));
spin_lock_irqsave(&phba->hbalock, flag);
- pmboxq->context3 = NULL;
+ pmboxq->ctx_u.mbox_wait = NULL;
/*
* if LPFC_MBX_WAKE flag is set the mailbox is completed
* else do not free the resources.
@@ -13813,10 +13813,10 @@ lpfc_sli_sp_intr_handler(int irq, void *dev_id)
lpfc_sli_pcimem_bcopy(mbox, pmbox,
MAILBOX_CMD_SIZE);
if (pmb->out_ext_byte_len &&
- pmb->ctx_buf)
+ pmb->ext_buf)
lpfc_sli_pcimem_bcopy(
phba->mbox_ext,
- pmb->ctx_buf,
+ pmb->ext_buf,
pmb->out_ext_byte_len);
}
if (pmb->mbox_flag & LPFC_MBX_IMED_UNREG) {
@@ -13830,10 +13830,8 @@ lpfc_sli_sp_intr_handler(int irq, void *dev_id)
pmbox->un.varWords[0], 0);
if (!pmbox->mbxStatus) {
- mp = (struct lpfc_dmabuf *)
- (pmb->ctx_buf);
- ndlp = (struct lpfc_nodelist *)
- pmb->ctx_ndlp;
+ mp = pmb->ctx_buf;
+ ndlp = pmb->ctx_ndlp;
/* Reg_LOGIN of dflt RPI was
* successful. new lets get
@@ -14340,8 +14338,8 @@ lpfc_sli4_sp_handle_mbox_event(struct lpfc_hba *phba, struct lpfc_mcqe *mcqe)
mcqe_status,
pmbox->un.varWords[0], 0);
if (mcqe_status == MB_CQE_STATUS_SUCCESS) {
- mp = (struct lpfc_dmabuf *)(pmb->ctx_buf);
- ndlp = (struct lpfc_nodelist *)pmb->ctx_ndlp;
+ mp = pmb->ctx_buf;
+ ndlp = pmb->ctx_ndlp;
/* Reg_LOGIN of dflt RPI was successful. Mark the
* node as having an UNREG_LOGIN in progress to stop
@@ -19823,14 +19821,15 @@ lpfc_sli4_remove_rpis(struct lpfc_hba *phba)
* lpfc_sli4_resume_rpi - Remove the rpi bitmask region
* @ndlp: pointer to lpfc nodelist data structure.
* @cmpl: completion call-back.
- * @arg: data to load as MBox 'caller buffer information'
+ * @iocbq: data to load as mbox ctx_u information
*
* This routine is invoked to remove the memory region that
* provided rpi via a bitmask.
**/
int
lpfc_sli4_resume_rpi(struct lpfc_nodelist *ndlp,
- void (*cmpl)(struct lpfc_hba *, LPFC_MBOXQ_t *), void *arg)
+ void (*cmpl)(struct lpfc_hba *, LPFC_MBOXQ_t *),
+ struct lpfc_iocbq *iocbq)
{
LPFC_MBOXQ_t *mboxq;
struct lpfc_hba *phba = ndlp->phba;
@@ -19859,7 +19858,7 @@ lpfc_sli4_resume_rpi(struct lpfc_nodelist *ndlp,
lpfc_resume_rpi(mboxq, ndlp);
if (cmpl) {
mboxq->mbox_cmpl = cmpl;
- mboxq->ctx_buf = arg;
+ mboxq->ctx_u.save_iocb = iocbq;
} else
mboxq->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
mboxq->ctx_ndlp = ndlp;
@@ -20676,7 +20675,7 @@ lpfc_sli4_get_config_region23(struct lpfc_hba *phba, char *rgn23_data)
if (lpfc_sli4_dump_cfg_rg23(phba, mboxq))
goto out;
mqe = &mboxq->u.mqe;
- mp = (struct lpfc_dmabuf *)mboxq->ctx_buf;
+ mp = mboxq->ctx_buf;
rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL);
if (rc)
goto out;
@@ -21035,7 +21034,7 @@ lpfc_cleanup_pending_mbox(struct lpfc_vport *vport)
(mb->u.mb.mbxCommand == MBX_REG_VPI))
mb->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
if (mb->u.mb.mbxCommand == MBX_REG_LOGIN64) {
- act_mbx_ndlp = (struct lpfc_nodelist *)mb->ctx_ndlp;
+ act_mbx_ndlp = mb->ctx_ndlp;
/* This reference is local to this routine. The
* reference is removed at routine exit.
@@ -21064,7 +21063,7 @@ lpfc_cleanup_pending_mbox(struct lpfc_vport *vport)
mb->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
if (mb->u.mb.mbxCommand == MBX_REG_LOGIN64) {
- ndlp = (struct lpfc_nodelist *)mb->ctx_ndlp;
+ ndlp = mb->ctx_ndlp;
/* Unregister the RPI when mailbox complete */
mb->mbox_flag |= LPFC_MBX_IMED_UNREG;
restart_loop = 1;
@@ -21084,7 +21083,7 @@ lpfc_cleanup_pending_mbox(struct lpfc_vport *vport)
while (!list_empty(&mbox_cmd_list)) {
list_remove_head(&mbox_cmd_list, mb, LPFC_MBOXQ_t, list);
if (mb->u.mb.mbxCommand == MBX_REG_LOGIN64) {
- ndlp = (struct lpfc_nodelist *)mb->ctx_ndlp;
+ ndlp = mb->ctx_ndlp;
mb->ctx_ndlp = NULL;
if (ndlp) {
spin_lock(&ndlp->lock);
diff --git a/drivers/scsi/lpfc/lpfc_sli.h b/drivers/scsi/lpfc/lpfc_sli.h
index c911a39cb46b8c..cf7c42ec030679 100644
--- a/drivers/scsi/lpfc/lpfc_sli.h
+++ b/drivers/scsi/lpfc/lpfc_sli.h
@@ -1,7 +1,7 @@
/*******************************************************************
* This file is part of the Emulex Linux Device Driver for *
* Fibre Channel Host Bus Adapters. *
- * Copyright (C) 2017-2023 Broadcom. All Rights Reserved. The term *
+ * Copyright (C) 2017-2024 Broadcom. All Rights Reserved. The term *
* “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. *
* Copyright (C) 2004-2016 Emulex. All rights reserved. *
* EMULEX and SLI are trademarks of Emulex. *
@@ -182,11 +182,29 @@ typedef struct lpfcMboxq {
struct lpfc_mqe mqe;
} u;
struct lpfc_vport *vport; /* virtual port pointer */
- void *ctx_ndlp; /* an lpfc_nodelist pointer */
- void *ctx_buf; /* an lpfc_dmabuf pointer */
- void *context3; /* a generic pointer. Code must
- * accommodate the actual datatype.
- */
+ struct lpfc_nodelist *ctx_ndlp; /* caller ndlp pointer */
+ struct lpfc_dmabuf *ctx_buf; /* caller buffer information */
+ void *ext_buf; /* extended buffer for extended mbox
+ * cmds. Not a generic pointer.
+ * Use for storing virtual address.
+ */
+
+ /* Pointers that are seldom used during mbox execution, but require
+ * a saved context.
+ */
+ union {
+ unsigned long ox_rx_id; /* Used in els_rsp_rls_acc */
+ struct lpfc_rdp_context *rdp; /* Used in get_rdp_info */
+ struct lpfc_lcb_context *lcb; /* Used in set_beacon */
+ struct completion *mbox_wait; /* Used in issue_mbox_wait */
+ struct bsg_job_data *dd_data; /* Used in bsg_issue_mbox_cmpl
+ * and
+ * bsg_issue_mbox_ext_handle_job
+ */
+ struct lpfc_iocbq *save_iocb; /* Used in defer_plogi_acc and
+ * lpfc_mbx_cmpl_resume_rpi
+ */
+ } ctx_u;
void (*mbox_cmpl) (struct lpfc_hba *, struct lpfcMboxq *);
uint8_t mbox_flag;
diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h
index 2541a8fba093fa..c1e9ec0243bacb 100644
--- a/drivers/scsi/lpfc/lpfc_sli4.h
+++ b/drivers/scsi/lpfc/lpfc_sli4.h
@@ -1,7 +1,7 @@
/*******************************************************************
* This file is part of the Emulex Linux Device Driver for *
* Fibre Channel Host Bus Adapters. *
- * Copyright (C) 2017-2023 Broadcom. All Rights Reserved. The term *
+ * Copyright (C) 2017-2024 Broadcom. All Rights Reserved. The term *
* “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. *
* Copyright (C) 2009-2016 Emulex. All rights reserved. *
* EMULEX and SLI are trademarks of Emulex. *
@@ -1118,8 +1118,9 @@ void lpfc_sli4_free_rpi(struct lpfc_hba *, int);
void lpfc_sli4_remove_rpis(struct lpfc_hba *);
void lpfc_sli4_async_event_proc(struct lpfc_hba *);
void lpfc_sli4_fcf_redisc_event_proc(struct lpfc_hba *);
-int lpfc_sli4_resume_rpi(struct lpfc_nodelist *,
- void (*)(struct lpfc_hba *, LPFC_MBOXQ_t *), void *);
+int lpfc_sli4_resume_rpi(struct lpfc_nodelist *ndlp,
+ void (*cmpl)(struct lpfc_hba *, LPFC_MBOXQ_t *),
+ struct lpfc_iocbq *iocbq);
void lpfc_sli4_els_xri_abort_event_proc(struct lpfc_hba *phba);
void lpfc_sli4_nvme_pci_offline_aborted(struct lpfc_hba *phba,
struct lpfc_io_buf *lpfc_ncmd);
diff --git a/drivers/scsi/lpfc/lpfc_version.h b/drivers/scsi/lpfc/lpfc_version.h
index 56f5889dbaf934..915f2f11fb5585 100644
--- a/drivers/scsi/lpfc/lpfc_version.h
+++ b/drivers/scsi/lpfc/lpfc_version.h
@@ -20,7 +20,7 @@
* included with this package. *
*******************************************************************/
-#define LPFC_DRIVER_VERSION "14.4.0.0"
+#define LPFC_DRIVER_VERSION "14.4.0.1"
#define LPFC_DRIVER_NAME "lpfc"
/* Used for SLI 2/3 */
diff --git a/drivers/scsi/lpfc/lpfc_vport.c b/drivers/scsi/lpfc/lpfc_vport.c
index 0f79840b949861..4439167a51882d 100644
--- a/drivers/scsi/lpfc/lpfc_vport.c
+++ b/drivers/scsi/lpfc/lpfc_vport.c
@@ -166,7 +166,7 @@ lpfc_vport_sparm(struct lpfc_hba *phba, struct lpfc_vport *vport)
}
}
- mp = (struct lpfc_dmabuf *)pmb->ctx_buf;
+ mp = pmb->ctx_buf;
memcpy(&vport->fc_sparam, mp->virt, sizeof (struct serv_parm));
memcpy(&vport->fc_nodename, &vport->fc_sparam.nodeName,
sizeof (struct lpfc_name));
@@ -674,10 +674,6 @@ lpfc_vport_delete(struct fc_vport *fc_vport)
lpfc_free_sysfs_attr(vport);
lpfc_debugfs_terminate(vport);
- /* Remove FC host to break driver binding. */
- fc_remove_host(shost);
- scsi_remove_host(shost);
-
/* Send the DA_ID and Fabric LOGO to cleanup Nameserver entries. */
ndlp = lpfc_findnode_did(vport, Fabric_DID);
if (!ndlp)
@@ -721,6 +717,10 @@ lpfc_vport_delete(struct fc_vport *fc_vport)
skip_logo:
+ /* Remove FC host to break driver binding. */
+ fc_remove_host(shost);
+ scsi_remove_host(shost);
+
lpfc_cleanup(vport);
/* Remove scsi host now. The nodes are cleaned up. */
diff --git a/drivers/scsi/mpi3mr/mpi3mr_app.c b/drivers/scsi/mpi3mr/mpi3mr_app.c
index 0380996b5ad27a..55d590b919476e 100644
--- a/drivers/scsi/mpi3mr/mpi3mr_app.c
+++ b/drivers/scsi/mpi3mr/mpi3mr_app.c
@@ -1644,7 +1644,7 @@ static long mpi3mr_bsg_process_mpt_cmds(struct bsg_job *job)
if ((mpirep_offset != 0xFF) &&
drv_bufs[mpirep_offset].bsg_buf_len) {
drv_buf_iter = &drv_bufs[mpirep_offset];
- drv_buf_iter->kern_buf_len = (sizeof(*bsg_reply_buf) - 1 +
+ drv_buf_iter->kern_buf_len = (sizeof(*bsg_reply_buf) +
mrioc->reply_sz);
bsg_reply_buf = kzalloc(drv_buf_iter->kern_buf_len, GFP_KERNEL);
diff --git a/drivers/scsi/myrb.c b/drivers/scsi/myrb.c
index ca2e932dd9b701..f684eb5e04898a 100644
--- a/drivers/scsi/myrb.c
+++ b/drivers/scsi/myrb.c
@@ -1775,9 +1775,9 @@ static ssize_t raid_state_show(struct device *dev,
name = myrb_devstate_name(ldev_info->state);
if (name)
- ret = snprintf(buf, 32, "%s\n", name);
+ ret = snprintf(buf, 64, "%s\n", name);
else
- ret = snprintf(buf, 32, "Invalid (%02X)\n",
+ ret = snprintf(buf, 64, "Invalid (%02X)\n",
ldev_info->state);
} else {
struct myrb_pdev_state *pdev_info = sdev->hostdata;
@@ -1796,9 +1796,9 @@ static ssize_t raid_state_show(struct device *dev,
else
name = myrb_devstate_name(pdev_info->state);
if (name)
- ret = snprintf(buf, 32, "%s\n", name);
+ ret = snprintf(buf, 64, "%s\n", name);
else
- ret = snprintf(buf, 32, "Invalid (%02X)\n",
+ ret = snprintf(buf, 64, "Invalid (%02X)\n",
pdev_info->state);
}
return ret;
@@ -1886,11 +1886,11 @@ static ssize_t raid_level_show(struct device *dev,
name = myrb_raidlevel_name(ldev_info->raid_level);
if (!name)
- return snprintf(buf, 32, "Invalid (%02X)\n",
+ return snprintf(buf, 64, "Invalid (%02X)\n",
ldev_info->state);
- return snprintf(buf, 32, "%s\n", name);
+ return snprintf(buf, 64, "%s\n", name);
}
- return snprintf(buf, 32, "Physical Drive\n");
+ return snprintf(buf, 64, "Physical Drive\n");
}
static DEVICE_ATTR_RO(raid_level);
@@ -1903,15 +1903,15 @@ static ssize_t rebuild_show(struct device *dev,
unsigned char status;
if (sdev->channel < myrb_logical_channel(sdev->host))
- return snprintf(buf, 32, "physical device - not rebuilding\n");
+ return snprintf(buf, 64, "physical device - not rebuilding\n");
status = myrb_get_rbld_progress(cb, &rbld_buf);
if (rbld_buf.ldev_num != sdev->id ||
status != MYRB_STATUS_SUCCESS)
- return snprintf(buf, 32, "not rebuilding\n");
+ return snprintf(buf, 64, "not rebuilding\n");
- return snprintf(buf, 32, "rebuilding block %u of %u\n",
+ return snprintf(buf, 64, "rebuilding block %u of %u\n",
rbld_buf.ldev_size - rbld_buf.blocks_left,
rbld_buf.ldev_size);
}
diff --git a/drivers/scsi/myrs.c b/drivers/scsi/myrs.c
index a1eec65a9713f5..e824be9d9bbb94 100644
--- a/drivers/scsi/myrs.c
+++ b/drivers/scsi/myrs.c
@@ -947,9 +947,9 @@ static ssize_t raid_state_show(struct device *dev,
name = myrs_devstate_name(ldev_info->dev_state);
if (name)
- ret = snprintf(buf, 32, "%s\n", name);
+ ret = snprintf(buf, 64, "%s\n", name);
else
- ret = snprintf(buf, 32, "Invalid (%02X)\n",
+ ret = snprintf(buf, 64, "Invalid (%02X)\n",
ldev_info->dev_state);
} else {
struct myrs_pdev_info *pdev_info;
@@ -958,9 +958,9 @@ static ssize_t raid_state_show(struct device *dev,
pdev_info = sdev->hostdata;
name = myrs_devstate_name(pdev_info->dev_state);
if (name)
- ret = snprintf(buf, 32, "%s\n", name);
+ ret = snprintf(buf, 64, "%s\n", name);
else
- ret = snprintf(buf, 32, "Invalid (%02X)\n",
+ ret = snprintf(buf, 64, "Invalid (%02X)\n",
pdev_info->dev_state);
}
return ret;
@@ -1066,13 +1066,13 @@ static ssize_t raid_level_show(struct device *dev,
ldev_info = sdev->hostdata;
name = myrs_raid_level_name(ldev_info->raid_level);
if (!name)
- return snprintf(buf, 32, "Invalid (%02X)\n",
+ return snprintf(buf, 64, "Invalid (%02X)\n",
ldev_info->dev_state);
} else
name = myrs_raid_level_name(MYRS_RAID_PHYSICAL);
- return snprintf(buf, 32, "%s\n", name);
+ return snprintf(buf, 64, "%s\n", name);
}
static DEVICE_ATTR_RO(raid_level);
@@ -1086,7 +1086,7 @@ static ssize_t rebuild_show(struct device *dev,
unsigned char status;
if (sdev->channel < cs->ctlr_info->physchan_present)
- return snprintf(buf, 32, "physical device - not rebuilding\n");
+ return snprintf(buf, 64, "physical device - not rebuilding\n");
ldev_info = sdev->hostdata;
ldev_num = ldev_info->ldev_num;
@@ -1098,11 +1098,11 @@ static ssize_t rebuild_show(struct device *dev,
return -EIO;
}
if (ldev_info->rbld_active) {
- return snprintf(buf, 32, "rebuilding block %zu of %zu\n",
+ return snprintf(buf, 64, "rebuilding block %zu of %zu\n",
(size_t)ldev_info->rbld_lba,
(size_t)ldev_info->cfg_devsize);
} else
- return snprintf(buf, 32, "not rebuilding\n");
+ return snprintf(buf, 64, "not rebuilding\n");
}
static ssize_t rebuild_store(struct device *dev,
@@ -1190,7 +1190,7 @@ static ssize_t consistency_check_show(struct device *dev,
unsigned short ldev_num;
if (sdev->channel < cs->ctlr_info->physchan_present)
- return snprintf(buf, 32, "physical device - not checking\n");
+ return snprintf(buf, 64, "physical device - not checking\n");
ldev_info = sdev->hostdata;
if (!ldev_info)
@@ -1198,11 +1198,11 @@ static ssize_t consistency_check_show(struct device *dev,
ldev_num = ldev_info->ldev_num;
myrs_get_ldev_info(cs, ldev_num, ldev_info);
if (ldev_info->cc_active)
- return snprintf(buf, 32, "checking block %zu of %zu\n",
+ return snprintf(buf, 64, "checking block %zu of %zu\n",
(size_t)ldev_info->cc_lba,
(size_t)ldev_info->cfg_devsize);
else
- return snprintf(buf, 32, "not checking\n");
+ return snprintf(buf, 64, "not checking\n");
}
static ssize_t consistency_check_store(struct device *dev,
diff --git a/drivers/scsi/pmcraid.c b/drivers/scsi/pmcraid.c
index e8bcc3a88732a1..0614b7e366b776 100644
--- a/drivers/scsi/pmcraid.c
+++ b/drivers/scsi/pmcraid.c
@@ -61,7 +61,9 @@ static atomic_t pmcraid_adapter_count = ATOMIC_INIT(0);
* pmcraid_minor - minor number(s) to use
*/
static unsigned int pmcraid_major;
-static struct class *pmcraid_class;
+static const struct class pmcraid_class = {
+ .name = PMCRAID_DEVFILE,
+};
static DECLARE_BITMAP(pmcraid_minor, PMCRAID_MAX_ADAPTERS);
/*
@@ -4723,7 +4725,7 @@ static int pmcraid_setup_chrdev(struct pmcraid_instance *pinstance)
if (error)
pmcraid_release_minor(minor);
else
- device_create(pmcraid_class, NULL, MKDEV(pmcraid_major, minor),
+ device_create(&pmcraid_class, NULL, MKDEV(pmcraid_major, minor),
NULL, "%s%u", PMCRAID_DEVFILE, minor);
return error;
}
@@ -4739,7 +4741,7 @@ static int pmcraid_setup_chrdev(struct pmcraid_instance *pinstance)
static void pmcraid_release_chrdev(struct pmcraid_instance *pinstance)
{
pmcraid_release_minor(MINOR(pinstance->cdev.dev));
- device_destroy(pmcraid_class,
+ device_destroy(&pmcraid_class,
MKDEV(pmcraid_major, MINOR(pinstance->cdev.dev)));
cdev_del(&pinstance->cdev);
}
@@ -5390,10 +5392,10 @@ static int __init pmcraid_init(void)
}
pmcraid_major = MAJOR(dev);
- pmcraid_class = class_create(PMCRAID_DEVFILE);
- if (IS_ERR(pmcraid_class)) {
- error = PTR_ERR(pmcraid_class);
+ error = class_register(&pmcraid_class);
+
+ if (error) {
pmcraid_err("failed to register with sysfs, error = %x\n",
error);
goto out_unreg_chrdev;
@@ -5402,7 +5404,7 @@ static int __init pmcraid_init(void)
error = pmcraid_netlink_init();
if (error) {
- class_destroy(pmcraid_class);
+ class_unregister(&pmcraid_class);
goto out_unreg_chrdev;
}
@@ -5413,7 +5415,7 @@ static int __init pmcraid_init(void)
pmcraid_err("failed to register pmcraid driver, error = %x\n",
error);
- class_destroy(pmcraid_class);
+ class_unregister(&pmcraid_class);
pmcraid_netlink_release();
out_unreg_chrdev:
@@ -5432,7 +5434,7 @@ static void __exit pmcraid_exit(void)
unregister_chrdev_region(MKDEV(pmcraid_major, 0),
PMCRAID_MAX_ADAPTERS);
pci_unregister_driver(&pmcraid_driver);
- class_destroy(pmcraid_class);
+ class_unregister(&pmcraid_class);
}
module_init(pmcraid_init);
diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c
index 44449c70a375f3..76eeba435fd046 100644
--- a/drivers/scsi/qla2xxx/qla_attr.c
+++ b/drivers/scsi/qla2xxx/qla_attr.c
@@ -2741,7 +2741,13 @@ qla2x00_dev_loss_tmo_callbk(struct fc_rport *rport)
return;
if (unlikely(pci_channel_offline(fcport->vha->hw->pdev))) {
- qla2x00_abort_all_cmds(fcport->vha, DID_NO_CONNECT << 16);
+ /* Will wait for wind down of adapter */
+ ql_dbg(ql_dbg_aer, fcport->vha, 0x900c,
+ "%s pci offline detected (id %06x)\n", __func__,
+ fcport->d_id.b24);
+ qla_pci_set_eeh_busy(fcport->vha);
+ qla2x00_eh_wait_for_pending_commands(fcport->vha, fcport->d_id.b24,
+ 0, WAIT_TARGET);
return;
}
}
@@ -2763,7 +2769,11 @@ qla2x00_terminate_rport_io(struct fc_rport *rport)
vha = fcport->vha;
if (unlikely(pci_channel_offline(fcport->vha->hw->pdev))) {
- qla2x00_abort_all_cmds(fcport->vha, DID_NO_CONNECT << 16);
+ /* Will wait for wind down of adapter */
+ ql_dbg(ql_dbg_aer, fcport->vha, 0x900b,
+ "%s pci offline detected (id %06x)\n", __func__,
+ fcport->d_id.b24);
+ qla_pci_set_eeh_busy(vha);
qla2x00_eh_wait_for_pending_commands(fcport->vha, fcport->d_id.b24,
0, WAIT_TARGET);
return;
diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index deb642607deb6f..2f49baf131e26f 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -82,7 +82,7 @@ typedef union {
#include "qla_nvme.h"
#define QLA2XXX_DRIVER_NAME "qla2xxx"
#define QLA2XXX_APIDEV "ql2xapidev"
-#define QLA2XXX_MANUFACTURER "Marvell Semiconductor, Inc."
+#define QLA2XXX_MANUFACTURER "Marvell"
/*
* We have MAILBOX_REGISTER_COUNT sized arrays in a few places,
diff --git a/drivers/scsi/qla2xxx/qla_edif.c b/drivers/scsi/qla2xxx/qla_edif.c
index 26e6b3e3af4317..dcde55c8ee5dea 100644
--- a/drivers/scsi/qla2xxx/qla_edif.c
+++ b/drivers/scsi/qla2xxx/qla_edif.c
@@ -1100,7 +1100,7 @@ qla_edif_app_getstats(scsi_qla_host_t *vha, struct bsg_job *bsg_job)
list_for_each_entry_safe(fcport, tf, &vha->vp_fcports, list) {
if (fcport->edif.enable) {
- if (pcnt > app_req.num_ports)
+ if (pcnt >= app_req.num_ports)
break;
app_reply->elem[pcnt].rekey_count =
diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h
index 09cb9413670a5e..7309310d2ab943 100644
--- a/drivers/scsi/qla2xxx/qla_gbl.h
+++ b/drivers/scsi/qla2xxx/qla_gbl.h
@@ -44,7 +44,7 @@ extern int qla2x00_fabric_login(scsi_qla_host_t *, fc_port_t *, uint16_t *);
extern int qla2x00_local_device_login(scsi_qla_host_t *, fc_port_t *);
extern int qla24xx_els_dcmd_iocb(scsi_qla_host_t *, int, port_id_t);
-extern int qla24xx_els_dcmd2_iocb(scsi_qla_host_t *, int, fc_port_t *, bool);
+extern int qla24xx_els_dcmd2_iocb(scsi_qla_host_t *, int, fc_port_t *);
extern void qla2x00_els_dcmd2_free(scsi_qla_host_t *vha,
struct els_plogi *els_plogi);
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index a314cfc5b263f2..8377624d76c98e 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -1193,8 +1193,12 @@ int qla24xx_async_gnl(struct scsi_qla_host *vha, fc_port_t *fcport)
return rval;
done_free_sp:
- /* ref: INIT */
- kref_put(&sp->cmd_kref, qla2x00_sp_release);
+ /*
+ * use qla24xx_async_gnl_sp_done to purge all pending gnl request.
+ * kref_put is call behind the scene.
+ */
+ sp->u.iocb_cmd.u.mbx.in_mb[0] = MBS_COMMAND_ERROR;
+ qla24xx_async_gnl_sp_done(sp, QLA_COMMAND_ERROR);
fcport->flags &= ~(FCF_ASYNC_SENT);
done:
fcport->flags &= ~(FCF_ASYNC_ACTIVE);
@@ -2665,6 +2669,40 @@ exit:
return rval;
}
+static void qla_enable_fce_trace(scsi_qla_host_t *vha)
+{
+ int rval;
+ struct qla_hw_data *ha = vha->hw;
+
+ if (ha->fce) {
+ ha->flags.fce_enabled = 1;
+ memset(ha->fce, 0, fce_calc_size(ha->fce_bufs));
+ rval = qla2x00_enable_fce_trace(vha,
+ ha->fce_dma, ha->fce_bufs, ha->fce_mb, &ha->fce_bufs);
+
+ if (rval) {
+ ql_log(ql_log_warn, vha, 0x8033,
+ "Unable to reinitialize FCE (%d).\n", rval);
+ ha->flags.fce_enabled = 0;
+ }
+ }
+}
+
+static void qla_enable_eft_trace(scsi_qla_host_t *vha)
+{
+ int rval;
+ struct qla_hw_data *ha = vha->hw;
+
+ if (ha->eft) {
+ memset(ha->eft, 0, EFT_SIZE);
+ rval = qla2x00_enable_eft_trace(vha, ha->eft_dma, EFT_NUM_BUFFERS);
+
+ if (rval) {
+ ql_log(ql_log_warn, vha, 0x8034,
+ "Unable to reinitialize EFT (%d).\n", rval);
+ }
+ }
+}
/*
* qla2x00_initialize_adapter
* Initialize board.
@@ -3668,9 +3706,8 @@ qla24xx_chip_diag(scsi_qla_host_t *vha)
}
static void
-qla2x00_init_fce_trace(scsi_qla_host_t *vha)
+qla2x00_alloc_fce_trace(scsi_qla_host_t *vha)
{
- int rval;
dma_addr_t tc_dma;
void *tc;
struct qla_hw_data *ha = vha->hw;
@@ -3699,27 +3736,17 @@ qla2x00_init_fce_trace(scsi_qla_host_t *vha)
return;
}
- rval = qla2x00_enable_fce_trace(vha, tc_dma, FCE_NUM_BUFFERS,
- ha->fce_mb, &ha->fce_bufs);
- if (rval) {
- ql_log(ql_log_warn, vha, 0x00bf,
- "Unable to initialize FCE (%d).\n", rval);
- dma_free_coherent(&ha->pdev->dev, FCE_SIZE, tc, tc_dma);
- return;
- }
-
ql_dbg(ql_dbg_init, vha, 0x00c0,
"Allocated (%d KB) for FCE...\n", FCE_SIZE / 1024);
- ha->flags.fce_enabled = 1;
ha->fce_dma = tc_dma;
ha->fce = tc;
+ ha->fce_bufs = FCE_NUM_BUFFERS;
}
static void
-qla2x00_init_eft_trace(scsi_qla_host_t *vha)
+qla2x00_alloc_eft_trace(scsi_qla_host_t *vha)
{
- int rval;
dma_addr_t tc_dma;
void *tc;
struct qla_hw_data *ha = vha->hw;
@@ -3744,14 +3771,6 @@ qla2x00_init_eft_trace(scsi_qla_host_t *vha)
return;
}
- rval = qla2x00_enable_eft_trace(vha, tc_dma, EFT_NUM_BUFFERS);
- if (rval) {
- ql_log(ql_log_warn, vha, 0x00c2,
- "Unable to initialize EFT (%d).\n", rval);
- dma_free_coherent(&ha->pdev->dev, EFT_SIZE, tc, tc_dma);
- return;
- }
-
ql_dbg(ql_dbg_init, vha, 0x00c3,
"Allocated (%d KB) EFT ...\n", EFT_SIZE / 1024);
@@ -3759,13 +3778,6 @@ qla2x00_init_eft_trace(scsi_qla_host_t *vha)
ha->eft = tc;
}
-static void
-qla2x00_alloc_offload_mem(scsi_qla_host_t *vha)
-{
- qla2x00_init_fce_trace(vha);
- qla2x00_init_eft_trace(vha);
-}
-
void
qla2x00_alloc_fw_dump(scsi_qla_host_t *vha)
{
@@ -3820,10 +3832,10 @@ qla2x00_alloc_fw_dump(scsi_qla_host_t *vha)
if (ha->tgt.atio_ring)
mq_size += ha->tgt.atio_q_length * sizeof(request_t);
- qla2x00_init_fce_trace(vha);
+ qla2x00_alloc_fce_trace(vha);
if (ha->fce)
fce_size = sizeof(struct qla2xxx_fce_chain) + FCE_SIZE;
- qla2x00_init_eft_trace(vha);
+ qla2x00_alloc_eft_trace(vha);
if (ha->eft)
eft_size = EFT_SIZE;
}
@@ -4253,7 +4265,6 @@ qla2x00_setup_chip(scsi_qla_host_t *vha)
struct qla_hw_data *ha = vha->hw;
struct device_reg_2xxx __iomem *reg = &ha->iobase->isp;
unsigned long flags;
- uint16_t fw_major_version;
int done_once = 0;
if (IS_P3P_TYPE(ha)) {
@@ -4320,7 +4331,6 @@ execute_fw_with_lr:
goto failed;
enable_82xx_npiv:
- fw_major_version = ha->fw_major_version;
if (IS_P3P_TYPE(ha))
qla82xx_check_md_needed(vha);
else
@@ -4349,12 +4359,11 @@ enable_82xx_npiv:
if (rval != QLA_SUCCESS)
goto failed;
- if (!fw_major_version && !(IS_P3P_TYPE(ha)))
- qla2x00_alloc_offload_mem(vha);
-
if (ql2xallocfwdump && !(IS_P3P_TYPE(ha)))
qla2x00_alloc_fw_dump(vha);
+ qla_enable_fce_trace(vha);
+ qla_enable_eft_trace(vha);
} else {
goto failed;
}
@@ -7487,12 +7496,12 @@ qla2x00_abort_isp_cleanup(scsi_qla_host_t *vha)
int
qla2x00_abort_isp(scsi_qla_host_t *vha)
{
- int rval;
uint8_t status = 0;
struct qla_hw_data *ha = vha->hw;
struct scsi_qla_host *vp, *tvp;
struct req_que *req = ha->req_q_map[0];
unsigned long flags;
+ fc_port_t *fcport;
if (vha->flags.online) {
qla2x00_abort_isp_cleanup(vha);
@@ -7561,6 +7570,15 @@ qla2x00_abort_isp(scsi_qla_host_t *vha)
"ISP Abort - ISP reg disconnect post nvmram config, exiting.\n");
return status;
}
+
+ /* User may have updated [fcp|nvme] prefer in flash */
+ list_for_each_entry(fcport, &vha->vp_fcports, list) {
+ if (NVME_PRIORITY(ha, fcport))
+ fcport->do_prli_nvme = 1;
+ else
+ fcport->do_prli_nvme = 0;
+ }
+
if (!qla2x00_restart_isp(vha)) {
clear_bit(RESET_MARKER_NEEDED, &vha->dpc_flags);
@@ -7581,31 +7599,7 @@ qla2x00_abort_isp(scsi_qla_host_t *vha)
if (IS_QLA81XX(ha) || IS_QLA8031(ha))
qla2x00_get_fw_version(vha);
- if (ha->fce) {
- ha->flags.fce_enabled = 1;
- memset(ha->fce, 0,
- fce_calc_size(ha->fce_bufs));
- rval = qla2x00_enable_fce_trace(vha,
- ha->fce_dma, ha->fce_bufs, ha->fce_mb,
- &ha->fce_bufs);
- if (rval) {
- ql_log(ql_log_warn, vha, 0x8033,
- "Unable to reinitialize FCE "
- "(%d).\n", rval);
- ha->flags.fce_enabled = 0;
- }
- }
- if (ha->eft) {
- memset(ha->eft, 0, EFT_SIZE);
- rval = qla2x00_enable_eft_trace(vha,
- ha->eft_dma, EFT_NUM_BUFFERS);
- if (rval) {
- ql_log(ql_log_warn, vha, 0x8034,
- "Unable to reinitialize EFT "
- "(%d).\n", rval);
- }
- }
} else { /* failed the ISP abort */
vha->flags.online = 1;
if (test_bit(ISP_ABORT_RETRY, &vha->dpc_flags)) {
@@ -7655,6 +7649,14 @@ qla2x00_abort_isp(scsi_qla_host_t *vha)
atomic_inc(&vp->vref_count);
spin_unlock_irqrestore(&ha->vport_slock, flags);
+ /* User may have updated [fcp|nvme] prefer in flash */
+ list_for_each_entry(fcport, &vp->vp_fcports, list) {
+ if (NVME_PRIORITY(ha, fcport))
+ fcport->do_prli_nvme = 1;
+ else
+ fcport->do_prli_nvme = 0;
+ }
+
qla2x00_vp_abort_isp(vp);
spin_lock_irqsave(&ha->vport_slock, flags);
diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c
index df90169f82440a..0b41e8a0660262 100644
--- a/drivers/scsi/qla2xxx/qla_iocb.c
+++ b/drivers/scsi/qla2xxx/qla_iocb.c
@@ -2587,6 +2587,33 @@ void
qla2x00_sp_release(struct kref *kref)
{
struct srb *sp = container_of(kref, struct srb, cmd_kref);
+ struct scsi_qla_host *vha = sp->vha;
+
+ switch (sp->type) {
+ case SRB_CT_PTHRU_CMD:
+ /* GPSC & GFPNID use fcport->ct_desc.ct_sns for both req & rsp */
+ if (sp->u.iocb_cmd.u.ctarg.req &&
+ (!sp->fcport ||
+ sp->u.iocb_cmd.u.ctarg.req != sp->fcport->ct_desc.ct_sns)) {
+ dma_free_coherent(&vha->hw->pdev->dev,
+ sp->u.iocb_cmd.u.ctarg.req_allocated_size,
+ sp->u.iocb_cmd.u.ctarg.req,
+ sp->u.iocb_cmd.u.ctarg.req_dma);
+ sp->u.iocb_cmd.u.ctarg.req = NULL;
+ }
+ if (sp->u.iocb_cmd.u.ctarg.rsp &&
+ (!sp->fcport ||
+ sp->u.iocb_cmd.u.ctarg.rsp != sp->fcport->ct_desc.ct_sns)) {
+ dma_free_coherent(&vha->hw->pdev->dev,
+ sp->u.iocb_cmd.u.ctarg.rsp_allocated_size,
+ sp->u.iocb_cmd.u.ctarg.rsp,
+ sp->u.iocb_cmd.u.ctarg.rsp_dma);
+ sp->u.iocb_cmd.u.ctarg.rsp = NULL;
+ }
+ break;
+ default:
+ break;
+ }
sp->free(sp);
}
@@ -2610,7 +2637,8 @@ static void qla2x00_els_dcmd_sp_free(srb_t *sp)
{
struct srb_iocb *elsio = &sp->u.iocb_cmd;
- kfree(sp->fcport);
+ if (sp->fcport)
+ qla2x00_free_fcport(sp->fcport);
if (elsio->u.els_logo.els_logo_pyld)
dma_free_coherent(&sp->vha->hw->pdev->dev, DMA_POOL_SIZE,
@@ -2692,7 +2720,7 @@ qla24xx_els_dcmd_iocb(scsi_qla_host_t *vha, int els_opcode,
*/
sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL);
if (!sp) {
- kfree(fcport);
+ qla2x00_free_fcport(fcport);
ql_log(ql_log_info, vha, 0x70e6,
"SRB allocation failed\n");
return -ENOMEM;
@@ -2723,6 +2751,7 @@ qla24xx_els_dcmd_iocb(scsi_qla_host_t *vha, int els_opcode,
if (!elsio->u.els_logo.els_logo_pyld) {
/* ref: INIT */
kref_put(&sp->cmd_kref, qla2x00_sp_release);
+ qla2x00_free_fcport(fcport);
return QLA_FUNCTION_FAILED;
}
@@ -2747,6 +2776,7 @@ qla24xx_els_dcmd_iocb(scsi_qla_host_t *vha, int els_opcode,
if (rval != QLA_SUCCESS) {
/* ref: INIT */
kref_put(&sp->cmd_kref, qla2x00_sp_release);
+ qla2x00_free_fcport(fcport);
return QLA_FUNCTION_FAILED;
}
@@ -3012,7 +3042,7 @@ static void qla2x00_els_dcmd2_sp_done(srb_t *sp, int res)
int
qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode,
- fc_port_t *fcport, bool wait)
+ fc_port_t *fcport)
{
srb_t *sp;
struct srb_iocb *elsio = NULL;
@@ -3027,8 +3057,7 @@ qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode,
if (!sp) {
ql_log(ql_log_info, vha, 0x70e6,
"SRB allocation failed\n");
- fcport->flags &= ~FCF_ASYNC_ACTIVE;
- return -ENOMEM;
+ goto done;
}
fcport->flags |= FCF_ASYNC_SENT;
@@ -3037,9 +3066,6 @@ qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode,
ql_dbg(ql_dbg_io, vha, 0x3073,
"%s Enter: PLOGI portid=%06x\n", __func__, fcport->d_id.b24);
- if (wait)
- sp->flags = SRB_WAKEUP_ON_COMP;
-
sp->type = SRB_ELS_DCMD;
sp->name = "ELS_DCMD";
sp->fcport = fcport;
@@ -3055,7 +3081,7 @@ qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode,
if (!elsio->u.els_plogi.els_plogi_pyld) {
rval = QLA_FUNCTION_FAILED;
- goto out;
+ goto done_free_sp;
}
resp_ptr = elsio->u.els_plogi.els_resp_pyld =
@@ -3064,7 +3090,7 @@ qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode,
if (!elsio->u.els_plogi.els_resp_pyld) {
rval = QLA_FUNCTION_FAILED;
- goto out;
+ goto done_free_sp;
}
ql_dbg(ql_dbg_io, vha, 0x3073, "PLOGI %p %p\n", ptr, resp_ptr);
@@ -3080,7 +3106,6 @@ qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode,
if (els_opcode == ELS_DCMD_PLOGI && DBELL_ACTIVE(vha)) {
struct fc_els_flogi *p = ptr;
-
p->fl_csp.sp_features |= cpu_to_be16(FC_SP_FT_SEC);
}
@@ -3089,10 +3114,11 @@ qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode,
(uint8_t *)elsio->u.els_plogi.els_plogi_pyld,
sizeof(*elsio->u.els_plogi.els_plogi_pyld));
- init_completion(&elsio->u.els_plogi.comp);
rval = qla2x00_start_sp(sp);
if (rval != QLA_SUCCESS) {
- rval = QLA_FUNCTION_FAILED;
+ fcport->flags |= FCF_LOGIN_NEEDED;
+ set_bit(RELOGIN_NEEDED, &vha->dpc_flags);
+ goto done_free_sp;
} else {
ql_dbg(ql_dbg_disc, vha, 0x3074,
"%s PLOGI sent, hdl=%x, loopid=%x, to port_id %06x from port_id %06x\n",
@@ -3100,21 +3126,15 @@ qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode,
fcport->d_id.b24, vha->d_id.b24);
}
- if (wait) {
- wait_for_completion(&elsio->u.els_plogi.comp);
-
- if (elsio->u.els_plogi.comp_status != CS_COMPLETE)
- rval = QLA_FUNCTION_FAILED;
- } else {
- goto done;
- }
+ return rval;
-out:
- fcport->flags &= ~(FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE);
+done_free_sp:
qla2x00_els_dcmd2_free(vha, &elsio->u.els_plogi);
/* ref: INIT */
kref_put(&sp->cmd_kref, qla2x00_sp_release);
done:
+ fcport->flags &= ~(FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE);
+ qla2x00_set_fcport_disc_state(fcport, DSC_DELETED);
return rval;
}
@@ -3918,7 +3938,7 @@ qla2x00_start_sp(srb_t *sp)
return -EAGAIN;
}
- pkt = __qla2x00_alloc_iocbs(sp->qpair, sp);
+ pkt = qla2x00_alloc_iocbs_ready(sp->qpair, sp);
if (!pkt) {
rval = -EAGAIN;
ql_log(ql_log_warn, vha, 0x700c,
diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c
index 21ec32b4fb2809..0cd6f3e1488249 100644
--- a/drivers/scsi/qla2xxx/qla_mbx.c
+++ b/drivers/scsi/qla2xxx/qla_mbx.c
@@ -194,7 +194,7 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp)
if (ha->flags.purge_mbox || chip_reset != ha->chip_reset ||
ha->flags.eeh_busy) {
ql_log(ql_log_warn, vha, 0xd035,
- "Error detected: purge[%d] eeh[%d] cmd=0x%x, Exiting.\n",
+ "Purge mbox: purge[%d] eeh[%d] cmd=0x%x, Exiting.\n",
ha->flags.purge_mbox, ha->flags.eeh_busy, mcp->mb[0]);
rval = QLA_ABORTED;
goto premature_exit;
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index dd674378f2f392..1e2f52210f6050 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -4602,6 +4602,7 @@ fail_free_init_cb:
ha->init_cb_dma = 0;
fail_free_vp_map:
kfree(ha->vp_map);
+ ha->vp_map = NULL;
fail:
ql_log(ql_log_fatal, NULL, 0x0030,
"Memory allocation failure.\n");
@@ -5583,7 +5584,7 @@ qla2x00_do_work(struct scsi_qla_host *vha)
break;
case QLA_EVT_ELS_PLOGI:
qla24xx_els_dcmd2_iocb(vha, ELS_DCMD_PLOGI,
- e->u.fcport.fcport, false);
+ e->u.fcport.fcport);
break;
case QLA_EVT_SA_REPLACE:
rc = qla24xx_issue_sa_replace_iocb(vha, e);
diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index 2ef2dbac0db273..d7551b1443e4a7 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -1062,6 +1062,16 @@ void qlt_free_session_done(struct work_struct *work)
"%s: sess %p logout completed\n", __func__, sess);
}
+ /* check for any straggling io left behind */
+ if (!(sess->flags & FCF_FCP2_DEVICE) &&
+ qla2x00_eh_wait_for_pending_commands(sess->vha, sess->d_id.b24, 0, WAIT_TARGET)) {
+ ql_log(ql_log_warn, vha, 0x3027,
+ "IO not return. Resetting.\n");
+ set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags);
+ qla2xxx_wake_dpc(vha);
+ qla2x00_wait_for_chip_reset(vha);
+ }
+
if (sess->logo_ack_needed) {
sess->logo_ack_needed = 0;
qla24xx_async_notify_ack(vha, sess,
diff --git a/drivers/scsi/qla2xxx/qla_version.h b/drivers/scsi/qla2xxx/qla_version.h
index d903563e969eb3..7627fd807bc3ed 100644
--- a/drivers/scsi/qla2xxx/qla_version.h
+++ b/drivers/scsi/qla2xxx/qla_version.h
@@ -6,9 +6,9 @@
/*
* Driver version
*/
-#define QLA2XXX_VERSION "10.02.09.100-k"
+#define QLA2XXX_VERSION "10.02.09.200-k"
#define QLA_DRIVER_MAJOR_VER 10
#define QLA_DRIVER_MINOR_VER 2
#define QLA_DRIVER_PATCH_VER 9
-#define QLA_DRIVER_BETA_VER 100
+#define QLA_DRIVER_BETA_VER 200
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 2e28e2360c8574..5b3230ef51fe61 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -635,10 +635,9 @@ static bool scsi_end_request(struct request *req, blk_status_t error,
if (blk_queue_add_random(q))
add_disk_randomness(req->q->disk);
- if (!blk_rq_is_passthrough(req)) {
- WARN_ON_ONCE(!(cmd->flags & SCMD_INITIALIZED));
- cmd->flags &= ~SCMD_INITIALIZED;
- }
+ WARN_ON_ONCE(!blk_rq_is_passthrough(req) &&
+ !(cmd->flags & SCMD_INITIALIZED));
+ cmd->flags = 0;
/*
* Calling rcu_barrier() is not necessary here because the
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 8d06475de17a33..ffd7e7e72933c5 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -1642,6 +1642,40 @@ int scsi_add_device(struct Scsi_Host *host, uint channel,
}
EXPORT_SYMBOL(scsi_add_device);
+int scsi_resume_device(struct scsi_device *sdev)
+{
+ struct device *dev = &sdev->sdev_gendev;
+ int ret = 0;
+
+ device_lock(dev);
+
+ /*
+ * Bail out if the device or its queue are not running. Otherwise,
+ * the rescan may block waiting for commands to be executed, with us
+ * holding the device lock. This can result in a potential deadlock
+ * in the power management core code when system resume is on-going.
+ */
+ if (sdev->sdev_state != SDEV_RUNNING ||
+ blk_queue_pm_only(sdev->request_queue)) {
+ ret = -EWOULDBLOCK;
+ goto unlock;
+ }
+
+ if (dev->driver && try_module_get(dev->driver->owner)) {
+ struct scsi_driver *drv = to_scsi_driver(dev->driver);
+
+ if (drv->resume)
+ ret = drv->resume(dev);
+ module_put(dev->driver->owner);
+ }
+
+unlock:
+ device_unlock(dev);
+
+ return ret;
+}
+EXPORT_SYMBOL(scsi_resume_device);
+
int scsi_rescan_device(struct scsi_device *sdev)
{
struct device *dev = &sdev->sdev_gendev;
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index ccff8f2e2e75bd..58fdf679341dc6 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -3920,7 +3920,7 @@ static int sd_probe(struct device *dev)
error = device_add_disk(dev, gd, NULL);
if (error) {
- put_device(&sdkp->disk_dev);
+ device_unregister(&sdkp->disk_dev);
put_disk(gd);
goto out;
}
@@ -4108,7 +4108,21 @@ static int sd_suspend_runtime(struct device *dev)
return sd_suspend_common(dev, true);
}
-static int sd_resume(struct device *dev, bool runtime)
+static int sd_resume(struct device *dev)
+{
+ struct scsi_disk *sdkp = dev_get_drvdata(dev);
+
+ sd_printk(KERN_NOTICE, sdkp, "Starting disk\n");
+
+ if (opal_unlock_from_suspend(sdkp->opal_dev)) {
+ sd_printk(KERN_NOTICE, sdkp, "OPAL unlock failed\n");
+ return -EIO;
+ }
+
+ return 0;
+}
+
+static int sd_resume_common(struct device *dev, bool runtime)
{
struct scsi_disk *sdkp = dev_get_drvdata(dev);
int ret;
@@ -4124,7 +4138,7 @@ static int sd_resume(struct device *dev, bool runtime)
sd_printk(KERN_NOTICE, sdkp, "Starting disk\n");
ret = sd_start_stop_device(sdkp, 1);
if (!ret) {
- opal_unlock_from_suspend(sdkp->opal_dev);
+ sd_resume(dev);
sdkp->suspended = false;
}
@@ -4143,7 +4157,7 @@ static int sd_resume_system(struct device *dev)
return 0;
}
- return sd_resume(dev, false);
+ return sd_resume_common(dev, false);
}
static int sd_resume_runtime(struct device *dev)
@@ -4170,7 +4184,7 @@ static int sd_resume_runtime(struct device *dev)
"Failed to clear sense data\n");
}
- return sd_resume(dev, true);
+ return sd_resume_common(dev, true);
}
static const struct dev_pm_ops sd_pm_ops = {
@@ -4193,6 +4207,7 @@ static struct scsi_driver sd_template = {
.pm = &sd_pm_ops,
},
.rescan = sd_rescan,
+ .resume = sd_resume,
.init_command = sd_init_command,
.uninit_command = sd_uninit_command,
.done = sd_done,
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 86210e4dd0d353..baf870a03ecf6c 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -285,6 +285,7 @@ sg_open(struct inode *inode, struct file *filp)
int dev = iminor(inode);
int flags = filp->f_flags;
struct request_queue *q;
+ struct scsi_device *device;
Sg_device *sdp;
Sg_fd *sfp;
int retval;
@@ -301,11 +302,12 @@ sg_open(struct inode *inode, struct file *filp)
/* This driver's module count bumped by fops_get in <linux/fs.h> */
/* Prevent the device driver from vanishing while we sleep */
- retval = scsi_device_get(sdp->device);
+ device = sdp->device;
+ retval = scsi_device_get(device);
if (retval)
goto sg_put;
- retval = scsi_autopm_get_device(sdp->device);
+ retval = scsi_autopm_get_device(device);
if (retval)
goto sdp_put;
@@ -313,7 +315,7 @@ sg_open(struct inode *inode, struct file *filp)
* check if O_NONBLOCK. Permits SCSI commands to be issued
* during error recovery. Tread carefully. */
if (!((flags & O_NONBLOCK) ||
- scsi_block_when_processing_errors(sdp->device))) {
+ scsi_block_when_processing_errors(device))) {
retval = -ENXIO;
/* we are in error recovery for this device */
goto error_out;
@@ -344,7 +346,7 @@ sg_open(struct inode *inode, struct file *filp)
if (sdp->open_cnt < 1) { /* no existing opens */
sdp->sgdebug = 0;
- q = sdp->device->request_queue;
+ q = device->request_queue;
sdp->sg_tablesize = queue_max_segments(q);
}
sfp = sg_add_sfp(sdp);
@@ -370,10 +372,11 @@ out_undo:
error_mutex_locked:
mutex_unlock(&sdp->open_rel_lock);
error_out:
- scsi_autopm_put_device(sdp->device);
+ scsi_autopm_put_device(device);
sdp_put:
- scsi_device_put(sdp->device);
- goto sg_put;
+ kref_put(&sdp->d_ref, sg_device_destroy);
+ scsi_device_put(device);
+ return retval;
}
/* Release resources associated with a successful sg_open()
@@ -1424,7 +1427,9 @@ static const struct file_operations sg_fops = {
.llseek = no_llseek,
};
-static struct class *sg_sysfs_class;
+static const struct class sg_sysfs_class = {
+ .name = "scsi_generic"
+};
static int sg_sysfs_valid = 0;
@@ -1526,7 +1531,7 @@ sg_add_device(struct device *cl_dev)
if (sg_sysfs_valid) {
struct device *sg_class_member;
- sg_class_member = device_create(sg_sysfs_class, cl_dev->parent,
+ sg_class_member = device_create(&sg_sysfs_class, cl_dev->parent,
MKDEV(SCSI_GENERIC_MAJOR,
sdp->index),
sdp, "%s", sdp->name);
@@ -1616,7 +1621,7 @@ sg_remove_device(struct device *cl_dev)
read_unlock_irqrestore(&sdp->sfd_lock, iflags);
sysfs_remove_link(&scsidp->sdev_gendev.kobj, "generic");
- device_destroy(sg_sysfs_class, MKDEV(SCSI_GENERIC_MAJOR, sdp->index));
+ device_destroy(&sg_sysfs_class, MKDEV(SCSI_GENERIC_MAJOR, sdp->index));
cdev_del(sdp->cdev);
sdp->cdev = NULL;
@@ -1687,11 +1692,9 @@ init_sg(void)
SG_MAX_DEVS, "sg");
if (rc)
return rc;
- sg_sysfs_class = class_create("scsi_generic");
- if ( IS_ERR(sg_sysfs_class) ) {
- rc = PTR_ERR(sg_sysfs_class);
+ rc = class_register(&sg_sysfs_class);
+ if (rc)
goto err_out;
- }
sg_sysfs_valid = 1;
rc = scsi_register_interface(&sg_interface);
if (0 == rc) {
@@ -1700,7 +1703,7 @@ init_sg(void)
#endif /* CONFIG_SCSI_PROC_FS */
return 0;
}
- class_destroy(sg_sysfs_class);
+ class_unregister(&sg_sysfs_class);
register_sg_sysctls();
err_out:
unregister_chrdev_region(MKDEV(SCSI_GENERIC_MAJOR, 0), SG_MAX_DEVS);
@@ -1715,7 +1718,7 @@ exit_sg(void)
remove_proc_subtree("scsi/sg", NULL);
#endif /* CONFIG_SCSI_PROC_FS */
scsi_unregister_interface(&sg_interface);
- class_destroy(sg_sysfs_class);
+ class_unregister(&sg_sysfs_class);
sg_sysfs_valid = 0;
unregister_chrdev_region(MKDEV(SCSI_GENERIC_MAJOR, 0),
SG_MAX_DEVS);
@@ -2207,6 +2210,7 @@ sg_remove_sfp_usercontext(struct work_struct *work)
{
struct sg_fd *sfp = container_of(work, struct sg_fd, ew.work);
struct sg_device *sdp = sfp->parentdp;
+ struct scsi_device *device = sdp->device;
Sg_request *srp;
unsigned long iflags;
@@ -2232,8 +2236,8 @@ sg_remove_sfp_usercontext(struct work_struct *work)
"sg_remove_sfp: sfp=0x%p\n", sfp));
kfree(sfp);
- scsi_device_put(sdp->device);
kref_put(&sdp->d_ref, sg_device_destroy);
+ scsi_device_put(device);
module_put(THIS_MODULE);
}
diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c
index 338aa8c429682c..5a9bcf8e0792e5 100644
--- a/drivers/scsi/st.c
+++ b/drivers/scsi/st.c
@@ -87,7 +87,7 @@ static int try_rdio = 1;
static int try_wdio = 1;
static int debug_flag;
-static struct class st_sysfs_class;
+static const struct class st_sysfs_class;
static const struct attribute_group *st_dev_groups[];
static const struct attribute_group *st_drv_groups[];
@@ -4438,7 +4438,7 @@ static void scsi_tape_release(struct kref *kref)
return;
}
-static struct class st_sysfs_class = {
+static const struct class st_sysfs_class = {
.name = "scsi_tape",
.dev_groups = st_dev_groups,
};
diff --git a/drivers/spi/spi-fsl-lpspi.c b/drivers/spi/spi-fsl-lpspi.c
index 079035db7dd859..92a662d1b55cf2 100644
--- a/drivers/spi/spi-fsl-lpspi.c
+++ b/drivers/spi/spi-fsl-lpspi.c
@@ -852,39 +852,39 @@ static int fsl_lpspi_probe(struct platform_device *pdev)
fsl_lpspi->base = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
if (IS_ERR(fsl_lpspi->base)) {
ret = PTR_ERR(fsl_lpspi->base);
- goto out_controller_put;
+ return ret;
}
fsl_lpspi->base_phys = res->start;
irq = platform_get_irq(pdev, 0);
if (irq < 0) {
ret = irq;
- goto out_controller_put;
+ return ret;
}
ret = devm_request_irq(&pdev->dev, irq, fsl_lpspi_isr, 0,
dev_name(&pdev->dev), fsl_lpspi);
if (ret) {
dev_err(&pdev->dev, "can't get irq%d: %d\n", irq, ret);
- goto out_controller_put;
+ return ret;
}
fsl_lpspi->clk_per = devm_clk_get(&pdev->dev, "per");
if (IS_ERR(fsl_lpspi->clk_per)) {
ret = PTR_ERR(fsl_lpspi->clk_per);
- goto out_controller_put;
+ return ret;
}
fsl_lpspi->clk_ipg = devm_clk_get(&pdev->dev, "ipg");
if (IS_ERR(fsl_lpspi->clk_ipg)) {
ret = PTR_ERR(fsl_lpspi->clk_ipg);
- goto out_controller_put;
+ return ret;
}
/* enable the clock */
ret = fsl_lpspi_init_rpm(fsl_lpspi);
if (ret)
- goto out_controller_put;
+ return ret;
ret = pm_runtime_get_sync(fsl_lpspi->dev);
if (ret < 0) {
@@ -945,8 +945,6 @@ out_pm_get:
pm_runtime_dont_use_autosuspend(fsl_lpspi->dev);
pm_runtime_put_sync(fsl_lpspi->dev);
pm_runtime_disable(fsl_lpspi->dev);
-out_controller_put:
- spi_controller_put(controller);
return ret;
}
diff --git a/drivers/spi/spi-pci1xxxx.c b/drivers/spi/spi-pci1xxxx.c
index 969965d7bc98b5..cc18d320370f97 100644
--- a/drivers/spi/spi-pci1xxxx.c
+++ b/drivers/spi/spi-pci1xxxx.c
@@ -725,6 +725,8 @@ static int pci1xxxx_spi_probe(struct pci_dev *pdev, const struct pci_device_id *
spi_bus->spi_int[iter] = devm_kzalloc(&pdev->dev,
sizeof(struct pci1xxxx_spi_internal),
GFP_KERNEL);
+ if (!spi_bus->spi_int[iter])
+ return -ENOMEM;
spi_sub_ptr = spi_bus->spi_int[iter];
spi_sub_ptr->spi_host = devm_spi_alloc_host(dev, sizeof(struct spi_controller));
if (!spi_sub_ptr->spi_host)
diff --git a/drivers/spi/spi-s3c64xx.c b/drivers/spi/spi-s3c64xx.c
index 9fcbe040cb2f2e..f726d86704287e 100644
--- a/drivers/spi/spi-s3c64xx.c
+++ b/drivers/spi/spi-s3c64xx.c
@@ -430,7 +430,7 @@ static bool s3c64xx_spi_can_dma(struct spi_controller *host,
struct s3c64xx_spi_driver_data *sdd = spi_controller_get_devdata(host);
if (sdd->rx_dma.ch && sdd->tx_dma.ch)
- return xfer->len > sdd->fifo_depth;
+ return xfer->len >= sdd->fifo_depth;
return false;
}
@@ -826,10 +826,9 @@ static int s3c64xx_spi_transfer_one(struct spi_controller *host,
return status;
}
- if (!is_polling(sdd) && (xfer->len > fifo_len) &&
+ if (!is_polling(sdd) && xfer->len >= fifo_len &&
sdd->rx_dma.ch && sdd->tx_dma.ch) {
use_dma = 1;
-
} else if (xfer->len >= fifo_len) {
tx_buf = xfer->tx_buf;
rx_buf = xfer->rx_buf;
diff --git a/drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.c b/drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.c
index 258aa0e37f5544..4c3684dd902ed4 100644
--- a/drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.c
+++ b/drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.c
@@ -937,8 +937,9 @@ static int create_component(struct vchiq_mmal_instance *instance,
/* build component create message */
m.h.type = MMAL_MSG_TYPE_COMPONENT_CREATE;
m.u.component_create.client_component = component->client_component;
- strncpy(m.u.component_create.name, name,
- sizeof(m.u.component_create.name));
+ strscpy_pad(m.u.component_create.name, name,
+ sizeof(m.u.component_create.name));
+ m.u.component_create.pid = 0;
ret = send_synchronous_mmal_msg(instance, &m,
sizeof(m.u.component_create),
diff --git a/drivers/target/iscsi/iscsi_target_erl1.c b/drivers/target/iscsi/iscsi_target_erl1.c
index 6797200211836d..d9a6242264b787 100644
--- a/drivers/target/iscsi/iscsi_target_erl1.c
+++ b/drivers/target/iscsi/iscsi_target_erl1.c
@@ -583,7 +583,7 @@ int iscsit_dataout_datapduinorder_no_fbit(
struct iscsi_pdu *pdu)
{
int i, send_recovery_r2t = 0, recovery = 0;
- u32 length = 0, offset = 0, pdu_count = 0, xfer_len = 0;
+ u32 length = 0, offset = 0, pdu_count = 0;
struct iscsit_conn *conn = cmd->conn;
struct iscsi_pdu *first_pdu = NULL;
@@ -596,7 +596,6 @@ int iscsit_dataout_datapduinorder_no_fbit(
if (cmd->pdu_list[i].seq_no == pdu->seq_no) {
if (!first_pdu)
first_pdu = &cmd->pdu_list[i];
- xfer_len += cmd->pdu_list[i].length;
pdu_count++;
} else if (pdu_count)
break;
diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c
index c1fbcdd1618264..c40217f44b1bc5 100644
--- a/drivers/target/target_core_configfs.c
+++ b/drivers/target/target_core_configfs.c
@@ -3672,6 +3672,8 @@ static int __init target_core_init_configfs(void)
{
struct configfs_subsystem *subsys = &target_core_fabrics;
struct t10_alua_lu_gp *lu_gp;
+ struct cred *kern_cred;
+ const struct cred *old_cred;
int ret;
pr_debug("TARGET_CORE[0]: Loading Generic Kernel Storage"
@@ -3748,11 +3750,21 @@ static int __init target_core_init_configfs(void)
if (ret < 0)
goto out;
+ /* We use the kernel credentials to access the target directory */
+ kern_cred = prepare_kernel_cred(&init_task);
+ if (!kern_cred) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ old_cred = override_creds(kern_cred);
target_init_dbroot();
+ revert_creds(old_cred);
+ put_cred(kern_cred);
return 0;
out:
+ target_xcopy_release_pt();
configfs_unregister_subsystem(subsys);
core_dev_release_virtual_lun0();
rd_module_exit();
diff --git a/drivers/thermal/devfreq_cooling.c b/drivers/thermal/devfreq_cooling.c
index 50dec24e967a00..8fd7cf1932cd44 100644
--- a/drivers/thermal/devfreq_cooling.c
+++ b/drivers/thermal/devfreq_cooling.c
@@ -214,7 +214,7 @@ static int devfreq_cooling_get_requested_power(struct thermal_cooling_device *cd
res = dfc->power_ops->get_real_power(df, power, freq, voltage);
if (!res) {
- state = dfc->capped_state;
+ state = dfc->max_state - dfc->capped_state;
/* Convert EM power into milli-Watts first */
rcu_read_lock();
diff --git a/drivers/thermal/gov_power_allocator.c b/drivers/thermal/gov_power_allocator.c
index 1b17dc4c219cc9..e25e48d76aa79c 100644
--- a/drivers/thermal/gov_power_allocator.c
+++ b/drivers/thermal/gov_power_allocator.c
@@ -606,7 +606,7 @@ static int allocate_actors_buffer(struct power_allocator_params *params,
/* There might be no cooling devices yet. */
if (!num_actors) {
- ret = -EINVAL;
+ ret = 0;
goto clean_state;
}
@@ -679,11 +679,6 @@ static int power_allocator_bind(struct thermal_zone_device *tz)
return -ENOMEM;
get_governor_trips(tz, params);
- if (!params->trip_max) {
- dev_warn(&tz->device, "power_allocator: missing trip_max\n");
- kfree(params);
- return -EINVAL;
- }
ret = check_power_actors(tz, params);
if (ret < 0) {
@@ -714,9 +709,10 @@ static int power_allocator_bind(struct thermal_zone_device *tz)
else
params->sustainable_power = tz->tzp->sustainable_power;
- estimate_pid_constants(tz, tz->tzp->sustainable_power,
- params->trip_switch_on,
- params->trip_max->temperature);
+ if (params->trip_max)
+ estimate_pid_constants(tz, tz->tzp->sustainable_power,
+ params->trip_switch_on,
+ params->trip_max->temperature);
reset_pid_controller(params);
diff --git a/drivers/thermal/thermal_debugfs.c b/drivers/thermal/thermal_debugfs.c
index c617e8b9f0ddfe..d78d54ae2605e8 100644
--- a/drivers/thermal/thermal_debugfs.c
+++ b/drivers/thermal/thermal_debugfs.c
@@ -616,6 +616,7 @@ void thermal_debug_tz_trip_up(struct thermal_zone_device *tz,
tze->trip_stats[trip_id].timestamp = now;
tze->trip_stats[trip_id].max = max(tze->trip_stats[trip_id].max, temperature);
tze->trip_stats[trip_id].min = min(tze->trip_stats[trip_id].min, temperature);
+ tze->trip_stats[trip_id].count++;
tze->trip_stats[trip_id].avg = tze->trip_stats[trip_id].avg +
(temperature - tze->trip_stats[trip_id].avg) /
tze->trip_stats[trip_id].count;
diff --git a/drivers/thermal/thermal_trip.c b/drivers/thermal/thermal_trip.c
index 09f6050dd04161..497abf0d47cac5 100644
--- a/drivers/thermal/thermal_trip.c
+++ b/drivers/thermal/thermal_trip.c
@@ -65,7 +65,6 @@ void __thermal_zone_set_trips(struct thermal_zone_device *tz)
{
const struct thermal_trip *trip;
int low = -INT_MAX, high = INT_MAX;
- bool same_trip = false;
int ret;
lockdep_assert_held(&tz->lock);
@@ -74,36 +73,22 @@ void __thermal_zone_set_trips(struct thermal_zone_device *tz)
return;
for_each_trip(tz, trip) {
- bool low_set = false;
int trip_low;
trip_low = trip->temperature - trip->hysteresis;
- if (trip_low < tz->temperature && trip_low > low) {
+ if (trip_low < tz->temperature && trip_low > low)
low = trip_low;
- low_set = true;
- same_trip = false;
- }
if (trip->temperature > tz->temperature &&
- trip->temperature < high) {
+ trip->temperature < high)
high = trip->temperature;
- same_trip = low_set;
- }
}
/* No need to change trip points */
if (tz->prev_low_trip == low && tz->prev_high_trip == high)
return;
- /*
- * If "high" and "low" are the same, skip the change unless this is the
- * first time.
- */
- if (same_trip && (tz->prev_low_trip != -INT_MAX ||
- tz->prev_high_trip != INT_MAX))
- return;
-
tz->prev_low_trip = low;
tz->prev_high_trip = high;
diff --git a/drivers/ufs/core/ufs-mcq.c b/drivers/ufs/core/ufs-mcq.c
index 8db81f1a12d5fc..768bf87cd80d3f 100644
--- a/drivers/ufs/core/ufs-mcq.c
+++ b/drivers/ufs/core/ufs-mcq.c
@@ -94,7 +94,7 @@ void ufshcd_mcq_config_mac(struct ufs_hba *hba, u32 max_active_cmds)
val = ufshcd_readl(hba, REG_UFS_MCQ_CFG);
val &= ~MCQ_CFG_MAC_MASK;
- val |= FIELD_PREP(MCQ_CFG_MAC_MASK, max_active_cmds);
+ val |= FIELD_PREP(MCQ_CFG_MAC_MASK, max_active_cmds - 1);
ufshcd_writel(hba, val, REG_UFS_MCQ_CFG);
}
EXPORT_SYMBOL_GPL(ufshcd_mcq_config_mac);
diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
index e30fd125988d7a..a0f8e930167d70 100644
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -3217,7 +3217,9 @@ retry:
/* MCQ mode */
if (is_mcq_enabled(hba)) {
- err = ufshcd_clear_cmd(hba, lrbp->task_tag);
+ /* successfully cleared the command, retry if needed */
+ if (ufshcd_clear_cmd(hba, lrbp->task_tag) == 0)
+ err = -EAGAIN;
hba->dev_cmd.complete = NULL;
return err;
}
@@ -9791,7 +9793,10 @@ static int __ufshcd_wl_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op)
/* UFS device & link must be active before we enter in this function */
if (!ufshcd_is_ufs_dev_active(hba) || !ufshcd_is_link_active(hba)) {
- ret = -EINVAL;
+ /* Wait err handler finish or trigger err recovery */
+ if (!ufshcd_eh_in_progress(hba))
+ ufshcd_force_error_recovery(hba);
+ ret = -EBUSY;
goto enable_scaling;
}
diff --git a/drivers/ufs/host/ufs-qcom.c b/drivers/ufs/host/ufs-qcom.c
index 8d68bd21ae7332..7a00004bfd0361 100644
--- a/drivers/ufs/host/ufs-qcom.c
+++ b/drivers/ufs/host/ufs-qcom.c
@@ -47,7 +47,7 @@ enum {
TSTBUS_MAX,
};
-#define QCOM_UFS_MAX_GEAR 4
+#define QCOM_UFS_MAX_GEAR 5
#define QCOM_UFS_MAX_LANE 2
enum {
@@ -67,26 +67,32 @@ static const struct __ufs_qcom_bw_table {
[MODE_PWM][UFS_PWM_G2][UFS_LANE_1] = { 1844, 1000 },
[MODE_PWM][UFS_PWM_G3][UFS_LANE_1] = { 3688, 1000 },
[MODE_PWM][UFS_PWM_G4][UFS_LANE_1] = { 7376, 1000 },
+ [MODE_PWM][UFS_PWM_G5][UFS_LANE_1] = { 14752, 1000 },
[MODE_PWM][UFS_PWM_G1][UFS_LANE_2] = { 1844, 1000 },
[MODE_PWM][UFS_PWM_G2][UFS_LANE_2] = { 3688, 1000 },
[MODE_PWM][UFS_PWM_G3][UFS_LANE_2] = { 7376, 1000 },
[MODE_PWM][UFS_PWM_G4][UFS_LANE_2] = { 14752, 1000 },
+ [MODE_PWM][UFS_PWM_G5][UFS_LANE_2] = { 29504, 1000 },
[MODE_HS_RA][UFS_HS_G1][UFS_LANE_1] = { 127796, 1000 },
[MODE_HS_RA][UFS_HS_G2][UFS_LANE_1] = { 255591, 1000 },
[MODE_HS_RA][UFS_HS_G3][UFS_LANE_1] = { 1492582, 102400 },
[MODE_HS_RA][UFS_HS_G4][UFS_LANE_1] = { 2915200, 204800 },
+ [MODE_HS_RA][UFS_HS_G5][UFS_LANE_1] = { 5836800, 409600 },
[MODE_HS_RA][UFS_HS_G1][UFS_LANE_2] = { 255591, 1000 },
[MODE_HS_RA][UFS_HS_G2][UFS_LANE_2] = { 511181, 1000 },
[MODE_HS_RA][UFS_HS_G3][UFS_LANE_2] = { 1492582, 204800 },
[MODE_HS_RA][UFS_HS_G4][UFS_LANE_2] = { 2915200, 409600 },
+ [MODE_HS_RA][UFS_HS_G5][UFS_LANE_2] = { 5836800, 819200 },
[MODE_HS_RB][UFS_HS_G1][UFS_LANE_1] = { 149422, 1000 },
[MODE_HS_RB][UFS_HS_G2][UFS_LANE_1] = { 298189, 1000 },
[MODE_HS_RB][UFS_HS_G3][UFS_LANE_1] = { 1492582, 102400 },
[MODE_HS_RB][UFS_HS_G4][UFS_LANE_1] = { 2915200, 204800 },
+ [MODE_HS_RB][UFS_HS_G5][UFS_LANE_1] = { 5836800, 409600 },
[MODE_HS_RB][UFS_HS_G1][UFS_LANE_2] = { 298189, 1000 },
[MODE_HS_RB][UFS_HS_G2][UFS_LANE_2] = { 596378, 1000 },
[MODE_HS_RB][UFS_HS_G3][UFS_LANE_2] = { 1492582, 204800 },
[MODE_HS_RB][UFS_HS_G4][UFS_LANE_2] = { 2915200, 409600 },
+ [MODE_HS_RB][UFS_HS_G5][UFS_LANE_2] = { 5836800, 819200 },
[MODE_MAX][0][0] = { 7643136, 307200 },
};
@@ -1210,8 +1216,10 @@ static int ufs_qcom_set_core_clk_ctrl(struct ufs_hba *hba, bool is_scale_up)
list_for_each_entry(clki, head, list) {
if (!IS_ERR_OR_NULL(clki->clk) &&
- !strcmp(clki->name, "core_clk_unipro")) {
- if (is_scale_up)
+ !strcmp(clki->name, "core_clk_unipro")) {
+ if (!clki->max_freq)
+ cycles_in_1us = 150; /* default for backwards compatibility */
+ else if (is_scale_up)
cycles_in_1us = ceil(clki->max_freq, (1000 * 1000));
else
cycles_in_1us = ceil(clk_get_rate(clki->clk), (1000 * 1000));
diff --git a/drivers/uio/uio_hv_generic.c b/drivers/uio/uio_hv_generic.c
index 20d9762331bd76..6be3462b109ff2 100644
--- a/drivers/uio/uio_hv_generic.c
+++ b/drivers/uio/uio_hv_generic.c
@@ -181,12 +181,14 @@ hv_uio_cleanup(struct hv_device *dev, struct hv_uio_private_data *pdata)
{
if (pdata->send_gpadl.gpadl_handle) {
vmbus_teardown_gpadl(dev->channel, &pdata->send_gpadl);
- vfree(pdata->send_buf);
+ if (!pdata->send_gpadl.decrypted)
+ vfree(pdata->send_buf);
}
if (pdata->recv_gpadl.gpadl_handle) {
vmbus_teardown_gpadl(dev->channel, &pdata->recv_gpadl);
- vfree(pdata->recv_buf);
+ if (!pdata->recv_gpadl.decrypted)
+ vfree(pdata->recv_buf);
}
}
@@ -295,7 +297,8 @@ hv_uio_probe(struct hv_device *dev,
ret = vmbus_establish_gpadl(channel, pdata->recv_buf,
RECV_BUFFER_SIZE, &pdata->recv_gpadl);
if (ret) {
- vfree(pdata->recv_buf);
+ if (!pdata->recv_gpadl.decrypted)
+ vfree(pdata->recv_buf);
goto fail_close;
}
@@ -317,7 +320,8 @@ hv_uio_probe(struct hv_device *dev,
ret = vmbus_establish_gpadl(channel, pdata->send_buf,
SEND_BUFFER_SIZE, &pdata->send_gpadl);
if (ret) {
- vfree(pdata->send_buf);
+ if (!pdata->send_gpadl.decrypted)
+ vfree(pdata->send_buf);
goto fail_close;
}
diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c
index c553decb546107..c8262e2f291778 100644
--- a/drivers/usb/class/cdc-wdm.c
+++ b/drivers/usb/class/cdc-wdm.c
@@ -485,6 +485,7 @@ out_free_mem:
static int service_outstanding_interrupt(struct wdm_device *desc)
{
int rv = 0;
+ int used;
/* submit read urb only if the device is waiting for it */
if (!desc->resp_count || !--desc->resp_count)
@@ -499,7 +500,10 @@ static int service_outstanding_interrupt(struct wdm_device *desc)
goto out;
}
- set_bit(WDM_RESPONDING, &desc->flags);
+ used = test_and_set_bit(WDM_RESPONDING, &desc->flags);
+ if (used)
+ goto out;
+
spin_unlock_irq(&desc->iuspin);
rv = usb_submit_urb(desc->response, GFP_KERNEL);
spin_lock_irq(&desc->iuspin);
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 3ee8455585b6be..9446660e231bb3 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -130,7 +130,6 @@ EXPORT_SYMBOL_GPL(ehci_cf_port_reset_rwsem);
#define HUB_DEBOUNCE_STEP 25
#define HUB_DEBOUNCE_STABLE 100
-static void hub_release(struct kref *kref);
static int usb_reset_and_verify_device(struct usb_device *udev);
static int hub_port_disable(struct usb_hub *hub, int port1, int set_state);
static bool hub_port_warm_reset_required(struct usb_hub *hub, int port1,
@@ -720,14 +719,14 @@ static void kick_hub_wq(struct usb_hub *hub)
*/
intf = to_usb_interface(hub->intfdev);
usb_autopm_get_interface_no_resume(intf);
- kref_get(&hub->kref);
+ hub_get(hub);
if (queue_work(hub_wq, &hub->events))
return;
/* the work has already been scheduled */
usb_autopm_put_interface_async(intf);
- kref_put(&hub->kref, hub_release);
+ hub_put(hub);
}
void usb_kick_hub_wq(struct usb_device *hdev)
@@ -1095,7 +1094,7 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type)
goto init2;
goto init3;
}
- kref_get(&hub->kref);
+ hub_get(hub);
/* The superspeed hub except for root hub has to use Hub Depth
* value as an offset into the route string to locate the bits
@@ -1343,7 +1342,7 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type)
device_unlock(&hdev->dev);
}
- kref_put(&hub->kref, hub_release);
+ hub_put(hub);
}
/* Implement the continuations for the delays above */
@@ -1759,6 +1758,16 @@ static void hub_release(struct kref *kref)
kfree(hub);
}
+void hub_get(struct usb_hub *hub)
+{
+ kref_get(&hub->kref);
+}
+
+void hub_put(struct usb_hub *hub)
+{
+ kref_put(&hub->kref, hub_release);
+}
+
static unsigned highspeed_hubs;
static void hub_disconnect(struct usb_interface *intf)
@@ -1807,7 +1816,7 @@ static void hub_disconnect(struct usb_interface *intf)
onboard_hub_destroy_pdevs(&hub->onboard_hub_devs);
- kref_put(&hub->kref, hub_release);
+ hub_put(hub);
}
static bool hub_descriptor_is_sane(struct usb_host_interface *desc)
@@ -5934,7 +5943,7 @@ out_hdev_lock:
/* Balance the stuff in kick_hub_wq() and allow autosuspend */
usb_autopm_put_interface(intf);
- kref_put(&hub->kref, hub_release);
+ hub_put(hub);
kcov_remote_stop();
}
diff --git a/drivers/usb/core/hub.h b/drivers/usb/core/hub.h
index 43ce21c96a5114..183b69dc295547 100644
--- a/drivers/usb/core/hub.h
+++ b/drivers/usb/core/hub.h
@@ -129,6 +129,8 @@ extern void usb_hub_remove_port_device(struct usb_hub *hub,
extern int usb_hub_set_port_power(struct usb_device *hdev, struct usb_hub *hub,
int port1, bool set);
extern struct usb_hub *usb_hub_to_struct_hub(struct usb_device *hdev);
+extern void hub_get(struct usb_hub *hub);
+extern void hub_put(struct usb_hub *hub);
extern int hub_port_debounce(struct usb_hub *hub, int port1,
bool must_be_connected);
extern int usb_clear_port_feature(struct usb_device *hdev,
diff --git a/drivers/usb/core/port.c b/drivers/usb/core/port.c
index 5b5e613a11e599..686c01af03e63a 100644
--- a/drivers/usb/core/port.c
+++ b/drivers/usb/core/port.c
@@ -56,11 +56,22 @@ static ssize_t disable_show(struct device *dev,
u16 portstatus, unused;
bool disabled;
int rc;
+ struct kernfs_node *kn;
+ hub_get(hub);
rc = usb_autopm_get_interface(intf);
if (rc < 0)
- return rc;
+ goto out_hub_get;
+ /*
+ * Prevent deadlock if another process is concurrently
+ * trying to unregister hdev.
+ */
+ kn = sysfs_break_active_protection(&dev->kobj, &attr->attr);
+ if (!kn) {
+ rc = -ENODEV;
+ goto out_autopm;
+ }
usb_lock_device(hdev);
if (hub->disconnected) {
rc = -ENODEV;
@@ -70,9 +81,13 @@ static ssize_t disable_show(struct device *dev,
usb_hub_port_status(hub, port1, &portstatus, &unused);
disabled = !usb_port_is_power_on(hub, portstatus);
-out_hdev_lock:
+ out_hdev_lock:
usb_unlock_device(hdev);
+ sysfs_unbreak_active_protection(kn);
+ out_autopm:
usb_autopm_put_interface(intf);
+ out_hub_get:
+ hub_put(hub);
if (rc)
return rc;
@@ -90,15 +105,26 @@ static ssize_t disable_store(struct device *dev, struct device_attribute *attr,
int port1 = port_dev->portnum;
bool disabled;
int rc;
+ struct kernfs_node *kn;
rc = kstrtobool(buf, &disabled);
if (rc)
return rc;
+ hub_get(hub);
rc = usb_autopm_get_interface(intf);
if (rc < 0)
- return rc;
+ goto out_hub_get;
+ /*
+ * Prevent deadlock if another process is concurrently
+ * trying to unregister hdev.
+ */
+ kn = sysfs_break_active_protection(&dev->kobj, &attr->attr);
+ if (!kn) {
+ rc = -ENODEV;
+ goto out_autopm;
+ }
usb_lock_device(hdev);
if (hub->disconnected) {
rc = -ENODEV;
@@ -119,9 +145,13 @@ static ssize_t disable_store(struct device *dev, struct device_attribute *attr,
if (!rc)
rc = count;
-out_hdev_lock:
+ out_hdev_lock:
usb_unlock_device(hdev);
+ sysfs_unbreak_active_protection(kn);
+ out_autopm:
usb_autopm_put_interface(intf);
+ out_hub_get:
+ hub_put(hub);
return rc;
}
diff --git a/drivers/usb/core/sysfs.c b/drivers/usb/core/sysfs.c
index f98263e21c2a71..d83231d6736ac6 100644
--- a/drivers/usb/core/sysfs.c
+++ b/drivers/usb/core/sysfs.c
@@ -1217,14 +1217,24 @@ static ssize_t interface_authorized_store(struct device *dev,
{
struct usb_interface *intf = to_usb_interface(dev);
bool val;
+ struct kernfs_node *kn;
if (kstrtobool(buf, &val) != 0)
return -EINVAL;
- if (val)
+ if (val) {
usb_authorize_interface(intf);
- else
- usb_deauthorize_interface(intf);
+ } else {
+ /*
+ * Prevent deadlock if another process is concurrently
+ * trying to unregister intf.
+ */
+ kn = sysfs_break_active_protection(&dev->kobj, &attr->attr);
+ if (kn) {
+ usb_deauthorize_interface(intf);
+ sysfs_unbreak_active_protection(kn);
+ }
+ }
return count;
}
diff --git a/drivers/usb/dwc2/core.h b/drivers/usb/dwc2/core.h
index c92a1da46a0147..a141f83aba0cce 100644
--- a/drivers/usb/dwc2/core.h
+++ b/drivers/usb/dwc2/core.h
@@ -729,8 +729,14 @@ struct dwc2_dregs_backup {
* struct dwc2_hregs_backup - Holds host registers state before
* entering partial power down
* @hcfg: Backup of HCFG register
+ * @hflbaddr: Backup of HFLBADDR register
* @haintmsk: Backup of HAINTMSK register
+ * @hcchar: Backup of HCCHAR register
+ * @hcsplt: Backup of HCSPLT register
* @hcintmsk: Backup of HCINTMSK register
+ * @hctsiz: Backup of HCTSIZ register
+ * @hdma: Backup of HCDMA register
+ * @hcdmab: Backup of HCDMAB register
* @hprt0: Backup of HPTR0 register
* @hfir: Backup of HFIR register
* @hptxfsiz: Backup of HPTXFSIZ register
@@ -738,8 +744,14 @@ struct dwc2_dregs_backup {
*/
struct dwc2_hregs_backup {
u32 hcfg;
+ u32 hflbaddr;
u32 haintmsk;
+ u32 hcchar[MAX_EPS_CHANNELS];
+ u32 hcsplt[MAX_EPS_CHANNELS];
u32 hcintmsk[MAX_EPS_CHANNELS];
+ u32 hctsiz[MAX_EPS_CHANNELS];
+ u32 hcidma[MAX_EPS_CHANNELS];
+ u32 hcidmab[MAX_EPS_CHANNELS];
u32 hprt0;
u32 hfir;
u32 hptxfsiz;
@@ -1086,6 +1098,7 @@ struct dwc2_hsotg {
bool needs_byte_swap;
/* DWC OTG HW Release versions */
+#define DWC2_CORE_REV_4_30a 0x4f54430a
#define DWC2_CORE_REV_2_71a 0x4f54271a
#define DWC2_CORE_REV_2_72a 0x4f54272a
#define DWC2_CORE_REV_2_80a 0x4f54280a
@@ -1323,6 +1336,7 @@ int dwc2_backup_global_registers(struct dwc2_hsotg *hsotg);
int dwc2_restore_global_registers(struct dwc2_hsotg *hsotg);
void dwc2_enable_acg(struct dwc2_hsotg *hsotg);
+void dwc2_wakeup_from_lpm_l1(struct dwc2_hsotg *hsotg, bool remotewakeup);
/* This function should be called on every hardware interrupt. */
irqreturn_t dwc2_handle_common_intr(int irq, void *dev);
diff --git a/drivers/usb/dwc2/core_intr.c b/drivers/usb/dwc2/core_intr.c
index 158ede7538548e..26d752a4c3ca95 100644
--- a/drivers/usb/dwc2/core_intr.c
+++ b/drivers/usb/dwc2/core_intr.c
@@ -297,7 +297,8 @@ static void dwc2_handle_session_req_intr(struct dwc2_hsotg *hsotg)
/* Exit gadget mode clock gating. */
if (hsotg->params.power_down ==
- DWC2_POWER_DOWN_PARAM_NONE && hsotg->bus_suspended)
+ DWC2_POWER_DOWN_PARAM_NONE && hsotg->bus_suspended &&
+ !hsotg->params.no_clock_gating)
dwc2_gadget_exit_clock_gating(hsotg, 0);
}
@@ -322,10 +323,11 @@ static void dwc2_handle_session_req_intr(struct dwc2_hsotg *hsotg)
* @hsotg: Programming view of DWC_otg controller
*
*/
-static void dwc2_wakeup_from_lpm_l1(struct dwc2_hsotg *hsotg)
+void dwc2_wakeup_from_lpm_l1(struct dwc2_hsotg *hsotg, bool remotewakeup)
{
u32 glpmcfg;
- u32 i = 0;
+ u32 pcgctl;
+ u32 dctl;
if (hsotg->lx_state != DWC2_L1) {
dev_err(hsotg->dev, "Core isn't in DWC2_L1 state\n");
@@ -334,37 +336,57 @@ static void dwc2_wakeup_from_lpm_l1(struct dwc2_hsotg *hsotg)
glpmcfg = dwc2_readl(hsotg, GLPMCFG);
if (dwc2_is_device_mode(hsotg)) {
- dev_dbg(hsotg->dev, "Exit from L1 state\n");
+ dev_dbg(hsotg->dev, "Exit from L1 state, remotewakeup=%d\n", remotewakeup);
glpmcfg &= ~GLPMCFG_ENBLSLPM;
- glpmcfg &= ~GLPMCFG_HIRD_THRES_EN;
+ glpmcfg &= ~GLPMCFG_HIRD_THRES_MASK;
dwc2_writel(hsotg, glpmcfg, GLPMCFG);
- do {
- glpmcfg = dwc2_readl(hsotg, GLPMCFG);
+ pcgctl = dwc2_readl(hsotg, PCGCTL);
+ pcgctl &= ~PCGCTL_ENBL_SLEEP_GATING;
+ dwc2_writel(hsotg, pcgctl, PCGCTL);
- if (!(glpmcfg & (GLPMCFG_COREL1RES_MASK |
- GLPMCFG_L1RESUMEOK | GLPMCFG_SLPSTS)))
- break;
+ glpmcfg = dwc2_readl(hsotg, GLPMCFG);
+ if (glpmcfg & GLPMCFG_ENBESL) {
+ glpmcfg |= GLPMCFG_RSTRSLPSTS;
+ dwc2_writel(hsotg, glpmcfg, GLPMCFG);
+ }
+
+ if (remotewakeup) {
+ if (dwc2_hsotg_wait_bit_set(hsotg, GLPMCFG, GLPMCFG_L1RESUMEOK, 1000)) {
+ dev_warn(hsotg->dev, "%s: timeout GLPMCFG_L1RESUMEOK\n", __func__);
+ goto fail;
+ return;
+ }
+
+ dctl = dwc2_readl(hsotg, DCTL);
+ dctl |= DCTL_RMTWKUPSIG;
+ dwc2_writel(hsotg, dctl, DCTL);
- udelay(1);
- } while (++i < 200);
+ if (dwc2_hsotg_wait_bit_set(hsotg, GINTSTS, GINTSTS_WKUPINT, 1000)) {
+ dev_warn(hsotg->dev, "%s: timeout GINTSTS_WKUPINT\n", __func__);
+ goto fail;
+ return;
+ }
+ }
- if (i == 200) {
- dev_err(hsotg->dev, "Failed to exit L1 sleep state in 200us.\n");
+ glpmcfg = dwc2_readl(hsotg, GLPMCFG);
+ if (glpmcfg & GLPMCFG_COREL1RES_MASK || glpmcfg & GLPMCFG_SLPSTS ||
+ glpmcfg & GLPMCFG_L1RESUMEOK) {
+ goto fail;
return;
}
- dwc2_gadget_init_lpm(hsotg);
+
+ /* Inform gadget to exit from L1 */
+ call_gadget(hsotg, resume);
+ /* Change to L0 state */
+ hsotg->lx_state = DWC2_L0;
+ hsotg->bus_suspended = false;
+fail: dwc2_gadget_init_lpm(hsotg);
} else {
/* TODO */
dev_err(hsotg->dev, "Host side LPM is not supported.\n");
return;
}
-
- /* Change to L0 state */
- hsotg->lx_state = DWC2_L0;
-
- /* Inform gadget to exit from L1 */
- call_gadget(hsotg, resume);
}
/*
@@ -385,7 +407,7 @@ static void dwc2_handle_wakeup_detected_intr(struct dwc2_hsotg *hsotg)
dev_dbg(hsotg->dev, "%s lxstate = %d\n", __func__, hsotg->lx_state);
if (hsotg->lx_state == DWC2_L1) {
- dwc2_wakeup_from_lpm_l1(hsotg);
+ dwc2_wakeup_from_lpm_l1(hsotg, false);
return;
}
@@ -408,7 +430,8 @@ static void dwc2_handle_wakeup_detected_intr(struct dwc2_hsotg *hsotg)
/* Exit gadget mode clock gating. */
if (hsotg->params.power_down ==
- DWC2_POWER_DOWN_PARAM_NONE && hsotg->bus_suspended)
+ DWC2_POWER_DOWN_PARAM_NONE && hsotg->bus_suspended &&
+ !hsotg->params.no_clock_gating)
dwc2_gadget_exit_clock_gating(hsotg, 0);
} else {
/* Change to L0 state */
@@ -425,7 +448,8 @@ static void dwc2_handle_wakeup_detected_intr(struct dwc2_hsotg *hsotg)
}
if (hsotg->params.power_down ==
- DWC2_POWER_DOWN_PARAM_NONE && hsotg->bus_suspended)
+ DWC2_POWER_DOWN_PARAM_NONE && hsotg->bus_suspended &&
+ !hsotg->params.no_clock_gating)
dwc2_host_exit_clock_gating(hsotg, 1);
/*
diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c
index b517a7216de22a..b2f6da5b65ccd0 100644
--- a/drivers/usb/dwc2/gadget.c
+++ b/drivers/usb/dwc2/gadget.c
@@ -1415,6 +1415,10 @@ static int dwc2_hsotg_ep_queue(struct usb_ep *ep, struct usb_request *req,
ep->name, req, req->length, req->buf, req->no_interrupt,
req->zero, req->short_not_ok);
+ if (hs->lx_state == DWC2_L1) {
+ dwc2_wakeup_from_lpm_l1(hs, true);
+ }
+
/* Prevent new request submission when controller is suspended */
if (hs->lx_state != DWC2_L0) {
dev_dbg(hs->dev, "%s: submit request only in active state\n",
@@ -3727,6 +3731,12 @@ irq_retry:
if (hsotg->in_ppd && hsotg->lx_state == DWC2_L2)
dwc2_exit_partial_power_down(hsotg, 0, true);
+ /* Exit gadget mode clock gating. */
+ if (hsotg->params.power_down ==
+ DWC2_POWER_DOWN_PARAM_NONE && hsotg->bus_suspended &&
+ !hsotg->params.no_clock_gating)
+ dwc2_gadget_exit_clock_gating(hsotg, 0);
+
hsotg->lx_state = DWC2_L0;
}
diff --git a/drivers/usb/dwc2/hcd.c b/drivers/usb/dwc2/hcd.c
index 35c7a4df8e7175..dd5b1c5691e11e 100644
--- a/drivers/usb/dwc2/hcd.c
+++ b/drivers/usb/dwc2/hcd.c
@@ -2701,8 +2701,11 @@ enum dwc2_transaction_type dwc2_hcd_select_transactions(
hsotg->available_host_channels--;
}
qh = list_entry(qh_ptr, struct dwc2_qh, qh_list_entry);
- if (dwc2_assign_and_init_hc(hsotg, qh))
+ if (dwc2_assign_and_init_hc(hsotg, qh)) {
+ if (hsotg->params.uframe_sched)
+ hsotg->available_host_channels++;
break;
+ }
/*
* Move the QH from the periodic ready schedule to the
@@ -2735,8 +2738,11 @@ enum dwc2_transaction_type dwc2_hcd_select_transactions(
hsotg->available_host_channels--;
}
- if (dwc2_assign_and_init_hc(hsotg, qh))
+ if (dwc2_assign_and_init_hc(hsotg, qh)) {
+ if (hsotg->params.uframe_sched)
+ hsotg->available_host_channels++;
break;
+ }
/*
* Move the QH from the non-periodic inactive schedule to the
@@ -4143,6 +4149,8 @@ void dwc2_host_complete(struct dwc2_hsotg *hsotg, struct dwc2_qtd *qtd,
urb->actual_length);
if (usb_pipetype(urb->pipe) == PIPE_ISOCHRONOUS) {
+ if (!hsotg->params.dma_desc_enable)
+ urb->start_frame = qtd->qh->start_active_frame;
urb->error_count = dwc2_hcd_urb_get_error_count(qtd->urb);
for (i = 0; i < urb->number_of_packets; ++i) {
urb->iso_frame_desc[i].actual_length =
@@ -4649,7 +4657,7 @@ static int _dwc2_hcd_urb_enqueue(struct usb_hcd *hcd, struct urb *urb,
}
if (hsotg->params.power_down == DWC2_POWER_DOWN_PARAM_NONE &&
- hsotg->bus_suspended) {
+ hsotg->bus_suspended && !hsotg->params.no_clock_gating) {
if (dwc2_is_device_mode(hsotg))
dwc2_gadget_exit_clock_gating(hsotg, 0);
else
@@ -5406,9 +5414,16 @@ int dwc2_backup_host_registers(struct dwc2_hsotg *hsotg)
/* Backup Host regs */
hr = &hsotg->hr_backup;
hr->hcfg = dwc2_readl(hsotg, HCFG);
+ hr->hflbaddr = dwc2_readl(hsotg, HFLBADDR);
hr->haintmsk = dwc2_readl(hsotg, HAINTMSK);
- for (i = 0; i < hsotg->params.host_channels; ++i)
+ for (i = 0; i < hsotg->params.host_channels; ++i) {
+ hr->hcchar[i] = dwc2_readl(hsotg, HCCHAR(i));
+ hr->hcsplt[i] = dwc2_readl(hsotg, HCSPLT(i));
hr->hcintmsk[i] = dwc2_readl(hsotg, HCINTMSK(i));
+ hr->hctsiz[i] = dwc2_readl(hsotg, HCTSIZ(i));
+ hr->hcidma[i] = dwc2_readl(hsotg, HCDMA(i));
+ hr->hcidmab[i] = dwc2_readl(hsotg, HCDMAB(i));
+ }
hr->hprt0 = dwc2_read_hprt0(hsotg);
hr->hfir = dwc2_readl(hsotg, HFIR);
@@ -5442,10 +5457,17 @@ int dwc2_restore_host_registers(struct dwc2_hsotg *hsotg)
hr->valid = false;
dwc2_writel(hsotg, hr->hcfg, HCFG);
+ dwc2_writel(hsotg, hr->hflbaddr, HFLBADDR);
dwc2_writel(hsotg, hr->haintmsk, HAINTMSK);
- for (i = 0; i < hsotg->params.host_channels; ++i)
+ for (i = 0; i < hsotg->params.host_channels; ++i) {
+ dwc2_writel(hsotg, hr->hcchar[i], HCCHAR(i));
+ dwc2_writel(hsotg, hr->hcsplt[i], HCSPLT(i));
dwc2_writel(hsotg, hr->hcintmsk[i], HCINTMSK(i));
+ dwc2_writel(hsotg, hr->hctsiz[i], HCTSIZ(i));
+ dwc2_writel(hsotg, hr->hcidma[i], HCDMA(i));
+ dwc2_writel(hsotg, hr->hcidmab[i], HCDMAB(i));
+ }
dwc2_writel(hsotg, hr->hprt0, HPRT0);
dwc2_writel(hsotg, hr->hfir, HFIR);
@@ -5610,10 +5632,12 @@ int dwc2_host_exit_hibernation(struct dwc2_hsotg *hsotg, int rem_wakeup,
dwc2_writel(hsotg, hr->hcfg, HCFG);
/* De-assert Wakeup Logic */
- gpwrdn = dwc2_readl(hsotg, GPWRDN);
- gpwrdn &= ~GPWRDN_PMUACTV;
- dwc2_writel(hsotg, gpwrdn, GPWRDN);
- udelay(10);
+ if (!(rem_wakeup && hsotg->hw_params.snpsid >= DWC2_CORE_REV_4_30a)) {
+ gpwrdn = dwc2_readl(hsotg, GPWRDN);
+ gpwrdn &= ~GPWRDN_PMUACTV;
+ dwc2_writel(hsotg, gpwrdn, GPWRDN);
+ udelay(10);
+ }
hprt0 = hr->hprt0;
hprt0 |= HPRT0_PWR;
@@ -5638,6 +5662,13 @@ int dwc2_host_exit_hibernation(struct dwc2_hsotg *hsotg, int rem_wakeup,
hprt0 |= HPRT0_RES;
dwc2_writel(hsotg, hprt0, HPRT0);
+ /* De-assert Wakeup Logic */
+ if ((rem_wakeup && hsotg->hw_params.snpsid >= DWC2_CORE_REV_4_30a)) {
+ gpwrdn = dwc2_readl(hsotg, GPWRDN);
+ gpwrdn &= ~GPWRDN_PMUACTV;
+ dwc2_writel(hsotg, gpwrdn, GPWRDN);
+ udelay(10);
+ }
/* Wait for Resume time and then program HPRT again */
mdelay(100);
hprt0 &= ~HPRT0_RES;
diff --git a/drivers/usb/dwc2/hcd_ddma.c b/drivers/usb/dwc2/hcd_ddma.c
index 6b4d825e97a2d9..79582b102c7eda 100644
--- a/drivers/usb/dwc2/hcd_ddma.c
+++ b/drivers/usb/dwc2/hcd_ddma.c
@@ -559,7 +559,7 @@ static void dwc2_init_isoc_dma_desc(struct dwc2_hsotg *hsotg,
idx = qh->td_last;
inc = qh->host_interval;
hsotg->frame_number = dwc2_hcd_get_frame_number(hsotg);
- cur_idx = dwc2_frame_list_idx(hsotg->frame_number);
+ cur_idx = idx;
next_idx = dwc2_desclist_idx_inc(qh->td_last, inc, qh->dev_speed);
/*
@@ -866,6 +866,8 @@ static int dwc2_cmpl_host_isoc_dma_desc(struct dwc2_hsotg *hsotg,
{
struct dwc2_dma_desc *dma_desc;
struct dwc2_hcd_iso_packet_desc *frame_desc;
+ u16 frame_desc_idx;
+ struct urb *usb_urb = qtd->urb->priv;
u16 remain = 0;
int rc = 0;
@@ -878,8 +880,11 @@ static int dwc2_cmpl_host_isoc_dma_desc(struct dwc2_hsotg *hsotg,
DMA_FROM_DEVICE);
dma_desc = &qh->desc_list[idx];
+ frame_desc_idx = (idx - qtd->isoc_td_first) & (usb_urb->number_of_packets - 1);
- frame_desc = &qtd->urb->iso_descs[qtd->isoc_frame_index_last];
+ frame_desc = &qtd->urb->iso_descs[frame_desc_idx];
+ if (idx == qtd->isoc_td_first)
+ usb_urb->start_frame = dwc2_hcd_get_frame_number(hsotg);
dma_desc->buf = (u32)(qtd->urb->dma + frame_desc->offset);
if (chan->ep_is_in)
remain = (dma_desc->status & HOST_DMA_ISOC_NBYTES_MASK) >>
@@ -900,7 +905,7 @@ static int dwc2_cmpl_host_isoc_dma_desc(struct dwc2_hsotg *hsotg,
frame_desc->status = 0;
}
- if (++qtd->isoc_frame_index == qtd->urb->packet_count) {
+ if (++qtd->isoc_frame_index == usb_urb->number_of_packets) {
/*
* urb->status is not used for isoc transfers here. The
* individual frame_desc status are used instead.
@@ -1005,11 +1010,11 @@ static void dwc2_complete_isoc_xfer_ddma(struct dwc2_hsotg *hsotg,
return;
idx = dwc2_desclist_idx_inc(idx, qh->host_interval,
chan->speed);
- if (!rc)
+ if (rc == 0)
continue;
- if (rc == DWC2_CMPL_DONE)
- break;
+ if (rc == DWC2_CMPL_DONE || rc == DWC2_CMPL_STOP)
+ goto stop_scan;
/* rc == DWC2_CMPL_STOP */
diff --git a/drivers/usb/dwc2/hw.h b/drivers/usb/dwc2/hw.h
index 13abdd5f675299..12f8c7f86dc980 100644
--- a/drivers/usb/dwc2/hw.h
+++ b/drivers/usb/dwc2/hw.h
@@ -698,7 +698,7 @@
#define TXSTS_QTOP_TOKEN_MASK (0x3 << 25)
#define TXSTS_QTOP_TOKEN_SHIFT 25
#define TXSTS_QTOP_TERMINATE BIT(24)
-#define TXSTS_QSPCAVAIL_MASK (0xff << 16)
+#define TXSTS_QSPCAVAIL_MASK (0x7f << 16)
#define TXSTS_QSPCAVAIL_SHIFT 16
#define TXSTS_FSPCAVAIL_MASK (0xffff << 0)
#define TXSTS_FSPCAVAIL_SHIFT 0
diff --git a/drivers/usb/dwc2/platform.c b/drivers/usb/dwc2/platform.c
index b1d48019e944f3..7b84416dfc2b11 100644
--- a/drivers/usb/dwc2/platform.c
+++ b/drivers/usb/dwc2/platform.c
@@ -331,7 +331,7 @@ static void dwc2_driver_remove(struct platform_device *dev)
/* Exit clock gating when driver is removed. */
if (hsotg->params.power_down == DWC2_POWER_DOWN_PARAM_NONE &&
- hsotg->bus_suspended) {
+ hsotg->bus_suspended && !hsotg->params.no_clock_gating) {
if (dwc2_is_device_mode(hsotg))
dwc2_gadget_exit_clock_gating(hsotg, 0);
else
diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
index 3e55838c000144..31684cdaaae305 100644
--- a/drivers/usb/dwc3/core.c
+++ b/drivers/usb/dwc3/core.c
@@ -1519,6 +1519,8 @@ static void dwc3_get_properties(struct dwc3 *dwc)
else
dwc->sysdev = dwc->dev;
+ dwc->sys_wakeup = device_may_wakeup(dwc->sysdev);
+
ret = device_property_read_string(dev, "usb-psy-name", &usb_psy_name);
if (ret >= 0) {
dwc->usb_psy = power_supply_get_by_name(usb_psy_name);
diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h
index c07edfc954f72e..7e80dd3d466b88 100644
--- a/drivers/usb/dwc3/core.h
+++ b/drivers/usb/dwc3/core.h
@@ -1133,6 +1133,7 @@ struct dwc3_scratchpad_array {
* 3 - Reserved
* @dis_metastability_quirk: set to disable metastability quirk.
* @dis_split_quirk: set to disable split boundary.
+ * @sys_wakeup: set if the device may do system wakeup.
* @wakeup_configured: set if the device is configured for remote wakeup.
* @suspended: set to track suspend event due to U3/L2.
* @imod_interval: set the interrupt moderation interval in 250ns
@@ -1357,6 +1358,7 @@ struct dwc3 {
unsigned dis_split_quirk:1;
unsigned async_callbacks:1;
+ unsigned sys_wakeup:1;
unsigned wakeup_configured:1;
unsigned suspended:1;
diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c
index 39564e17f3b07a..497deed38c0c1e 100644
--- a/drivers/usb/dwc3/dwc3-pci.c
+++ b/drivers/usb/dwc3/dwc3-pci.c
@@ -51,7 +51,6 @@
#define PCI_DEVICE_ID_INTEL_MTLP 0x7ec1
#define PCI_DEVICE_ID_INTEL_MTLS 0x7f6f
#define PCI_DEVICE_ID_INTEL_MTL 0x7e7e
-#define PCI_DEVICE_ID_INTEL_ARLH 0x7ec1
#define PCI_DEVICE_ID_INTEL_ARLH_PCH 0x777e
#define PCI_DEVICE_ID_INTEL_TGL 0x9a15
#define PCI_DEVICE_ID_AMD_MR 0x163a
@@ -423,7 +422,6 @@ static const struct pci_device_id dwc3_pci_id_table[] = {
{ PCI_DEVICE_DATA(INTEL, MTLP, &dwc3_pci_intel_swnode) },
{ PCI_DEVICE_DATA(INTEL, MTL, &dwc3_pci_intel_swnode) },
{ PCI_DEVICE_DATA(INTEL, MTLS, &dwc3_pci_intel_swnode) },
- { PCI_DEVICE_DATA(INTEL, ARLH, &dwc3_pci_intel_swnode) },
{ PCI_DEVICE_DATA(INTEL, ARLH_PCH, &dwc3_pci_intel_swnode) },
{ PCI_DEVICE_DATA(INTEL, TGL, &dwc3_pci_intel_swnode) },
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 40c52dbc28d3b4..4df2661f66751b 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -2955,6 +2955,9 @@ static int dwc3_gadget_start(struct usb_gadget *g,
dwc->gadget_driver = driver;
spin_unlock_irqrestore(&dwc->lock, flags);
+ if (dwc->sys_wakeup)
+ device_wakeup_enable(dwc->sysdev);
+
return 0;
}
@@ -2970,6 +2973,9 @@ static int dwc3_gadget_stop(struct usb_gadget *g)
struct dwc3 *dwc = gadget_to_dwc(g);
unsigned long flags;
+ if (dwc->sys_wakeup)
+ device_wakeup_disable(dwc->sysdev);
+
spin_lock_irqsave(&dwc->lock, flags);
dwc->gadget_driver = NULL;
dwc->max_cfg_eps = 0;
@@ -4651,6 +4657,10 @@ int dwc3_gadget_init(struct dwc3 *dwc)
else
dwc3_gadget_set_speed(dwc->gadget, dwc->maximum_speed);
+ /* No system wakeup if no gadget driver bound */
+ if (dwc->sys_wakeup)
+ device_wakeup_disable(dwc->sysdev);
+
return 0;
err5:
diff --git a/drivers/usb/dwc3/host.c b/drivers/usb/dwc3/host.c
index 5a5cb6ce9946d3..0204787df81d50 100644
--- a/drivers/usb/dwc3/host.c
+++ b/drivers/usb/dwc3/host.c
@@ -173,6 +173,14 @@ int dwc3_host_init(struct dwc3 *dwc)
goto err;
}
+ if (dwc->sys_wakeup) {
+ /* Restore wakeup setting if switched from device */
+ device_wakeup_enable(dwc->sysdev);
+
+ /* Pass on wakeup setting to the new xhci platform device */
+ device_init_wakeup(&xhci->dev, true);
+ }
+
return 0;
err:
platform_device_put(xhci);
@@ -181,6 +189,9 @@ err:
void dwc3_host_exit(struct dwc3 *dwc)
{
+ if (dwc->sys_wakeup)
+ device_init_wakeup(&dwc->xhci->dev, false);
+
platform_device_unregister(dwc->xhci);
dwc->xhci = NULL;
}
diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c
index 9d4150124fdb82..b3a9d18a8dcd19 100644
--- a/drivers/usb/gadget/udc/core.c
+++ b/drivers/usb/gadget/udc/core.c
@@ -292,7 +292,9 @@ int usb_ep_queue(struct usb_ep *ep,
{
int ret = 0;
- if (WARN_ON_ONCE(!ep->enabled && ep->address)) {
+ if (!ep->enabled && ep->address) {
+ pr_debug("USB gadget: queue request to disabled ep 0x%x (%s)\n",
+ ep->address, ep->name);
ret = -ESHUTDOWN;
goto out;
}
diff --git a/drivers/usb/misc/usb-ljca.c b/drivers/usb/misc/usb-ljca.c
index 35770e608c6497..2d30fc1be30669 100644
--- a/drivers/usb/misc/usb-ljca.c
+++ b/drivers/usb/misc/usb-ljca.c
@@ -518,8 +518,10 @@ static int ljca_new_client_device(struct ljca_adapter *adap, u8 type, u8 id,
int ret;
client = kzalloc(sizeof *client, GFP_KERNEL);
- if (!client)
+ if (!client) {
+ kfree(data);
return -ENOMEM;
+ }
client->type = type;
client->id = id;
@@ -535,8 +537,10 @@ static int ljca_new_client_device(struct ljca_adapter *adap, u8 type, u8 id,
auxdev->dev.release = ljca_auxdev_release;
ret = auxiliary_device_init(auxdev);
- if (ret)
+ if (ret) {
+ kfree(data);
goto err_free;
+ }
ljca_auxdev_acpi_bind(adap, auxdev, adr, id);
@@ -590,12 +594,8 @@ static int ljca_enumerate_gpio(struct ljca_adapter *adap)
valid_pin[i] = get_unaligned_le32(&desc->bank_desc[i].valid_pins);
bitmap_from_arr32(gpio_info->valid_pin_map, valid_pin, gpio_num);
- ret = ljca_new_client_device(adap, LJCA_CLIENT_GPIO, 0, "ljca-gpio",
+ return ljca_new_client_device(adap, LJCA_CLIENT_GPIO, 0, "ljca-gpio",
gpio_info, LJCA_GPIO_ACPI_ADR);
- if (ret)
- kfree(gpio_info);
-
- return ret;
}
static int ljca_enumerate_i2c(struct ljca_adapter *adap)
@@ -629,10 +629,8 @@ static int ljca_enumerate_i2c(struct ljca_adapter *adap)
ret = ljca_new_client_device(adap, LJCA_CLIENT_I2C, i,
"ljca-i2c", i2c_info,
LJCA_I2C1_ACPI_ADR + i);
- if (ret) {
- kfree(i2c_info);
+ if (ret)
return ret;
- }
}
return 0;
@@ -669,10 +667,8 @@ static int ljca_enumerate_spi(struct ljca_adapter *adap)
ret = ljca_new_client_device(adap, LJCA_CLIENT_SPI, i,
"ljca-spi", spi_info,
LJCA_SPI1_ACPI_ADR + i);
- if (ret) {
- kfree(spi_info);
+ if (ret)
return ret;
- }
}
return 0;
diff --git a/drivers/usb/phy/phy-generic.c b/drivers/usb/phy/phy-generic.c
index 8f735a86cd1972..fdcffebf415cda 100644
--- a/drivers/usb/phy/phy-generic.c
+++ b/drivers/usb/phy/phy-generic.c
@@ -262,13 +262,6 @@ int usb_phy_gen_create_phy(struct device *dev, struct usb_phy_generic *nop)
return dev_err_probe(dev, PTR_ERR(nop->vbus_draw),
"could not get vbus regulator\n");
- nop->vbus_draw = devm_regulator_get_exclusive(dev, "vbus");
- if (PTR_ERR(nop->vbus_draw) == -ENODEV)
- nop->vbus_draw = NULL;
- if (IS_ERR(nop->vbus_draw))
- return dev_err_probe(dev, PTR_ERR(nop->vbus_draw),
- "could not get vbus regulator\n");
-
nop->dev = dev;
nop->phy.dev = nop->dev;
nop->phy.label = "nop-xceiv";
diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c
index 71ace274761f18..08953f0d4532aa 100644
--- a/drivers/usb/storage/uas.c
+++ b/drivers/usb/storage/uas.c
@@ -533,7 +533,7 @@ static struct urb *uas_alloc_cmd_urb(struct uas_dev_info *devinfo, gfp_t gfp,
* daft to me.
*/
-static struct urb *uas_submit_sense_urb(struct scsi_cmnd *cmnd, gfp_t gfp)
+static int uas_submit_sense_urb(struct scsi_cmnd *cmnd, gfp_t gfp)
{
struct uas_dev_info *devinfo = cmnd->device->hostdata;
struct urb *urb;
@@ -541,30 +541,28 @@ static struct urb *uas_submit_sense_urb(struct scsi_cmnd *cmnd, gfp_t gfp)
urb = uas_alloc_sense_urb(devinfo, gfp, cmnd);
if (!urb)
- return NULL;
+ return -ENOMEM;
usb_anchor_urb(urb, &devinfo->sense_urbs);
err = usb_submit_urb(urb, gfp);
if (err) {
usb_unanchor_urb(urb);
uas_log_cmd_state(cmnd, "sense submit err", err);
usb_free_urb(urb);
- return NULL;
}
- return urb;
+ return err;
}
static int uas_submit_urbs(struct scsi_cmnd *cmnd,
struct uas_dev_info *devinfo)
{
struct uas_cmd_info *cmdinfo = scsi_cmd_priv(cmnd);
- struct urb *urb;
int err;
lockdep_assert_held(&devinfo->lock);
if (cmdinfo->state & SUBMIT_STATUS_URB) {
- urb = uas_submit_sense_urb(cmnd, GFP_ATOMIC);
- if (!urb)
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ err = uas_submit_sense_urb(cmnd, GFP_ATOMIC);
+ if (err)
+ return err;
cmdinfo->state &= ~SUBMIT_STATUS_URB;
}
@@ -572,7 +570,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd,
cmdinfo->data_in_urb = uas_alloc_data_urb(devinfo, GFP_ATOMIC,
cmnd, DMA_FROM_DEVICE);
if (!cmdinfo->data_in_urb)
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ return -ENOMEM;
cmdinfo->state &= ~ALLOC_DATA_IN_URB;
}
@@ -582,7 +580,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd,
if (err) {
usb_unanchor_urb(cmdinfo->data_in_urb);
uas_log_cmd_state(cmnd, "data in submit err", err);
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ return err;
}
cmdinfo->state &= ~SUBMIT_DATA_IN_URB;
cmdinfo->state |= DATA_IN_URB_INFLIGHT;
@@ -592,7 +590,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd,
cmdinfo->data_out_urb = uas_alloc_data_urb(devinfo, GFP_ATOMIC,
cmnd, DMA_TO_DEVICE);
if (!cmdinfo->data_out_urb)
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ return -ENOMEM;
cmdinfo->state &= ~ALLOC_DATA_OUT_URB;
}
@@ -602,7 +600,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd,
if (err) {
usb_unanchor_urb(cmdinfo->data_out_urb);
uas_log_cmd_state(cmnd, "data out submit err", err);
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ return err;
}
cmdinfo->state &= ~SUBMIT_DATA_OUT_URB;
cmdinfo->state |= DATA_OUT_URB_INFLIGHT;
@@ -611,7 +609,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd,
if (cmdinfo->state & ALLOC_CMD_URB) {
cmdinfo->cmd_urb = uas_alloc_cmd_urb(devinfo, GFP_ATOMIC, cmnd);
if (!cmdinfo->cmd_urb)
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ return -ENOMEM;
cmdinfo->state &= ~ALLOC_CMD_URB;
}
@@ -621,7 +619,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd,
if (err) {
usb_unanchor_urb(cmdinfo->cmd_urb);
uas_log_cmd_state(cmnd, "cmd submit err", err);
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ return err;
}
cmdinfo->cmd_urb = NULL;
cmdinfo->state &= ~SUBMIT_CMD_URB;
@@ -698,7 +696,7 @@ static int uas_queuecommand_lck(struct scsi_cmnd *cmnd)
* of queueing, no matter how fatal the error
*/
if (err == -ENODEV) {
- set_host_byte(cmnd, DID_ERROR);
+ set_host_byte(cmnd, DID_NO_CONNECT);
scsi_done(cmnd);
goto zombie;
}
diff --git a/drivers/usb/typec/class.c b/drivers/usb/typec/class.c
index 389c7f0b8d9358..9610e647a8d480 100644
--- a/drivers/usb/typec/class.c
+++ b/drivers/usb/typec/class.c
@@ -1310,6 +1310,7 @@ static ssize_t select_usb_power_delivery_store(struct device *dev,
{
struct typec_port *port = to_typec_port(dev);
struct usb_power_delivery *pd;
+ int ret;
if (!port->ops || !port->ops->pd_set)
return -EOPNOTSUPP;
@@ -1318,7 +1319,11 @@ static ssize_t select_usb_power_delivery_store(struct device *dev,
if (!pd)
return -EINVAL;
- return port->ops->pd_set(port, pd);
+ ret = port->ops->pd_set(port, pd);
+ if (ret)
+ return ret;
+
+ return size;
}
static ssize_t select_usb_power_delivery_show(struct device *dev,
diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c
index ae2b6c94482d5f..c26fb70c3ec6c0 100644
--- a/drivers/usb/typec/tcpm/tcpm.c
+++ b/drivers/usb/typec/tcpm/tcpm.c
@@ -6861,7 +6861,7 @@ static int tcpm_pd_set(struct typec_port *p, struct usb_power_delivery *pd)
if (data->source_desc.pdo[0]) {
for (i = 0; i < PDO_MAX_OBJECTS && data->source_desc.pdo[i]; i++)
- port->snk_pdo[i] = data->source_desc.pdo[i];
+ port->src_pdo[i] = data->source_desc.pdo[i];
port->nr_src_pdo = i + 1;
}
@@ -6910,7 +6910,9 @@ static int tcpm_pd_set(struct typec_port *p, struct usb_power_delivery *pd)
port->port_source_caps = data->source_cap;
port->port_sink_caps = data->sink_cap;
+ typec_port_set_usb_power_delivery(p, NULL);
port->selected_pd = pd;
+ typec_port_set_usb_power_delivery(p, port->selected_pd);
unlock:
mutex_unlock(&port->lock);
return ret;
@@ -6943,9 +6945,7 @@ static void tcpm_port_unregister_pd(struct tcpm_port *port)
port->port_source_caps = NULL;
for (i = 0; i < port->pd_count; i++) {
usb_power_delivery_unregister_capabilities(port->pd_list[i]->sink_cap);
- kfree(port->pd_list[i]->sink_cap);
usb_power_delivery_unregister_capabilities(port->pd_list[i]->source_cap);
- kfree(port->pd_list[i]->source_cap);
devm_kfree(port->dev, port->pd_list[i]);
port->pd_list[i] = NULL;
usb_power_delivery_unregister(port->pds[i]);
diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c
index cf52cb34d28592..31d8a46ae5e7cb 100644
--- a/drivers/usb/typec/ucsi/ucsi.c
+++ b/drivers/usb/typec/ucsi/ucsi.c
@@ -151,8 +151,12 @@ static int ucsi_exec_command(struct ucsi *ucsi, u64 cmd)
if (!(cci & UCSI_CCI_COMMAND_COMPLETE))
return -EIO;
- if (cci & UCSI_CCI_NOT_SUPPORTED)
+ if (cci & UCSI_CCI_NOT_SUPPORTED) {
+ if (ucsi_acknowledge_command(ucsi) < 0)
+ dev_err(ucsi->dev,
+ "ACK of unsupported command failed\n");
return -EOPNOTSUPP;
+ }
if (cci & UCSI_CCI_ERROR) {
if (cmd == UCSI_GET_ERROR_STATUS)
@@ -1133,17 +1137,21 @@ static int ucsi_check_cable(struct ucsi_connector *con)
if (ret < 0)
return ret;
- ret = ucsi_get_cable_identity(con);
- if (ret < 0)
- return ret;
+ if (con->ucsi->cap.features & UCSI_CAP_GET_PD_MESSAGE) {
+ ret = ucsi_get_cable_identity(con);
+ if (ret < 0)
+ return ret;
+ }
- ret = ucsi_register_plug(con);
- if (ret < 0)
- return ret;
+ if (con->ucsi->cap.features & UCSI_CAP_ALT_MODE_DETAILS) {
+ ret = ucsi_register_plug(con);
+ if (ret < 0)
+ return ret;
- ret = ucsi_register_altmodes(con, UCSI_RECIPIENT_SOP_P);
- if (ret < 0)
- return ret;
+ ret = ucsi_register_altmodes(con, UCSI_RECIPIENT_SOP_P);
+ if (ret < 0)
+ return ret;
+ }
return 0;
}
@@ -1189,8 +1197,10 @@ static void ucsi_handle_connector_change(struct work_struct *work)
ucsi_register_partner(con);
ucsi_partner_task(con, ucsi_check_connection, 1, HZ);
ucsi_partner_task(con, ucsi_check_connector_capability, 1, HZ);
- ucsi_partner_task(con, ucsi_get_partner_identity, 1, HZ);
- ucsi_partner_task(con, ucsi_check_cable, 1, HZ);
+ if (con->ucsi->cap.features & UCSI_CAP_GET_PD_MESSAGE)
+ ucsi_partner_task(con, ucsi_get_partner_identity, 1, HZ);
+ if (con->ucsi->cap.features & UCSI_CAP_CABLE_DETAILS)
+ ucsi_partner_task(con, ucsi_check_cable, 1, HZ);
if (UCSI_CONSTAT_PWR_OPMODE(con->status.flags) ==
UCSI_CONSTAT_PWR_OPMODE_PD)
@@ -1215,11 +1225,11 @@ static void ucsi_handle_connector_change(struct work_struct *work)
if (con->status.change & UCSI_CONSTAT_CAM_CHANGE)
ucsi_partner_task(con, ucsi_check_altmodes, 1, 0);
- clear_bit(EVENT_PENDING, &con->ucsi->flags);
-
mutex_lock(&ucsi->ppm_lock);
+ clear_bit(EVENT_PENDING, &con->ucsi->flags);
ret = ucsi_acknowledge_connector_change(ucsi);
mutex_unlock(&ucsi->ppm_lock);
+
if (ret)
dev_err(ucsi->dev, "%s: ACK failed (%d)", __func__, ret);
@@ -1237,7 +1247,7 @@ void ucsi_connector_change(struct ucsi *ucsi, u8 num)
struct ucsi_connector *con = &ucsi->connector[num - 1];
if (!(ucsi->ntfy & UCSI_ENABLE_NTFY_CONNECTOR_CHANGE)) {
- dev_dbg(ucsi->dev, "Bogus connector change event\n");
+ dev_dbg(ucsi->dev, "Early connector change event\n");
return;
}
@@ -1260,13 +1270,47 @@ static int ucsi_reset_connector(struct ucsi_connector *con, bool hard)
static int ucsi_reset_ppm(struct ucsi *ucsi)
{
- u64 command = UCSI_PPM_RESET;
+ u64 command;
unsigned long tmo;
u32 cci;
int ret;
mutex_lock(&ucsi->ppm_lock);
+ ret = ucsi->ops->read(ucsi, UCSI_CCI, &cci, sizeof(cci));
+ if (ret < 0)
+ goto out;
+
+ /*
+ * If UCSI_CCI_RESET_COMPLETE is already set we must clear
+ * the flag before we start another reset. Send a
+ * UCSI_SET_NOTIFICATION_ENABLE command to achieve this.
+ * Ignore a timeout and try the reset anyway if this fails.
+ */
+ if (cci & UCSI_CCI_RESET_COMPLETE) {
+ command = UCSI_SET_NOTIFICATION_ENABLE;
+ ret = ucsi->ops->async_write(ucsi, UCSI_CONTROL, &command,
+ sizeof(command));
+ if (ret < 0)
+ goto out;
+
+ tmo = jiffies + msecs_to_jiffies(UCSI_TIMEOUT_MS);
+ do {
+ ret = ucsi->ops->read(ucsi, UCSI_CCI,
+ &cci, sizeof(cci));
+ if (ret < 0)
+ goto out;
+ if (cci & UCSI_CCI_COMMAND_COMPLETE)
+ break;
+ if (time_is_before_jiffies(tmo))
+ break;
+ msleep(20);
+ } while (1);
+
+ WARN_ON(cci & UCSI_CCI_RESET_COMPLETE);
+ }
+
+ command = UCSI_PPM_RESET;
ret = ucsi->ops->async_write(ucsi, UCSI_CONTROL, &command,
sizeof(command));
if (ret < 0)
@@ -1589,8 +1633,10 @@ static int ucsi_register_port(struct ucsi *ucsi, struct ucsi_connector *con)
ucsi_register_partner(con);
ucsi_pwr_opmode_change(con);
ucsi_port_psy_changed(con);
- ucsi_get_partner_identity(con);
- ucsi_check_cable(con);
+ if (con->ucsi->cap.features & UCSI_CAP_GET_PD_MESSAGE)
+ ucsi_get_partner_identity(con);
+ if (con->ucsi->cap.features & UCSI_CAP_CABLE_DETAILS)
+ ucsi_check_cable(con);
}
/* Only notify USB controller if partner supports USB data */
@@ -1636,6 +1682,7 @@ static int ucsi_init(struct ucsi *ucsi)
{
struct ucsi_connector *con, *connector;
u64 command, ntfy;
+ u32 cci;
int ret;
int i;
@@ -1688,6 +1735,13 @@ static int ucsi_init(struct ucsi *ucsi)
ucsi->connector = connector;
ucsi->ntfy = ntfy;
+
+ ret = ucsi->ops->read(ucsi, UCSI_CCI, &cci, sizeof(cci));
+ if (ret)
+ return ret;
+ if (UCSI_CCI_CONNECTOR(READ_ONCE(cci)))
+ ucsi_connector_change(ucsi, cci);
+
return 0;
err_unregister:
diff --git a/drivers/usb/typec/ucsi/ucsi.h b/drivers/usb/typec/ucsi/ucsi.h
index 32daf5f5865053..0e7c92eb1b2278 100644
--- a/drivers/usb/typec/ucsi/ucsi.h
+++ b/drivers/usb/typec/ucsi/ucsi.h
@@ -206,7 +206,7 @@ struct ucsi_capability {
#define UCSI_CAP_ATTR_POWER_OTHER BIT(10)
#define UCSI_CAP_ATTR_POWER_VBUS BIT(14)
u8 num_connectors;
- u8 features;
+ u16 features;
#define UCSI_CAP_SET_UOM BIT(0)
#define UCSI_CAP_SET_PDM BIT(1)
#define UCSI_CAP_ALT_MODE_DETAILS BIT(2)
@@ -215,7 +215,8 @@ struct ucsi_capability {
#define UCSI_CAP_CABLE_DETAILS BIT(5)
#define UCSI_CAP_EXT_SUPPLY_NOTIFICATIONS BIT(6)
#define UCSI_CAP_PD_RESET BIT(7)
- u16 reserved_1;
+#define UCSI_CAP_GET_PD_MESSAGE BIT(8)
+ u8 reserved_1;
u8 num_alt_modes;
u8 reserved_2;
u16 bc_version;
diff --git a/drivers/usb/typec/ucsi/ucsi_acpi.c b/drivers/usb/typec/ucsi/ucsi_acpi.c
index 928eacbeb21ac4..7b3ac133ef8618 100644
--- a/drivers/usb/typec/ucsi/ucsi_acpi.c
+++ b/drivers/usb/typec/ucsi/ucsi_acpi.c
@@ -23,10 +23,11 @@ struct ucsi_acpi {
void *base;
struct completion complete;
unsigned long flags;
+#define UCSI_ACPI_SUPPRESS_EVENT 0
+#define UCSI_ACPI_COMMAND_PENDING 1
+#define UCSI_ACPI_ACK_PENDING 2
guid_t guid;
u64 cmd;
- bool dell_quirk_probed;
- bool dell_quirk_active;
};
static int ucsi_acpi_dsm(struct ucsi_acpi *ua, int func)
@@ -79,9 +80,9 @@ static int ucsi_acpi_sync_write(struct ucsi *ucsi, unsigned int offset,
int ret;
if (ack)
- set_bit(ACK_PENDING, &ua->flags);
+ set_bit(UCSI_ACPI_ACK_PENDING, &ua->flags);
else
- set_bit(COMMAND_PENDING, &ua->flags);
+ set_bit(UCSI_ACPI_COMMAND_PENDING, &ua->flags);
ret = ucsi_acpi_async_write(ucsi, offset, val, val_len);
if (ret)
@@ -92,9 +93,9 @@ static int ucsi_acpi_sync_write(struct ucsi *ucsi, unsigned int offset,
out_clear_bit:
if (ack)
- clear_bit(ACK_PENDING, &ua->flags);
+ clear_bit(UCSI_ACPI_ACK_PENDING, &ua->flags);
else
- clear_bit(COMMAND_PENDING, &ua->flags);
+ clear_bit(UCSI_ACPI_COMMAND_PENDING, &ua->flags);
return ret;
}
@@ -129,51 +130,40 @@ static const struct ucsi_operations ucsi_zenbook_ops = {
};
/*
- * Some Dell laptops expect that an ACK command with the
- * UCSI_ACK_CONNECTOR_CHANGE bit set is followed by a (separate)
- * ACK command that only has the UCSI_ACK_COMMAND_COMPLETE bit set.
- * If this is not done events are not delivered to OSPM and
- * subsequent commands will timeout.
+ * Some Dell laptops don't like ACK commands with the
+ * UCSI_ACK_CONNECTOR_CHANGE but not the UCSI_ACK_COMMAND_COMPLETE
+ * bit set. To work around this send a dummy command and bundle the
+ * UCSI_ACK_CONNECTOR_CHANGE with the UCSI_ACK_COMMAND_COMPLETE
+ * for the dummy command.
*/
static int
ucsi_dell_sync_write(struct ucsi *ucsi, unsigned int offset,
const void *val, size_t val_len)
{
struct ucsi_acpi *ua = ucsi_get_drvdata(ucsi);
- u64 cmd = *(u64 *)val, ack = 0;
+ u64 cmd = *(u64 *)val;
+ u64 dummycmd = UCSI_GET_CAPABILITY;
int ret;
- if (UCSI_COMMAND(cmd) == UCSI_ACK_CC_CI &&
- cmd & UCSI_ACK_CONNECTOR_CHANGE)
- ack = UCSI_ACK_CC_CI | UCSI_ACK_COMMAND_COMPLETE;
-
- ret = ucsi_acpi_sync_write(ucsi, offset, val, val_len);
- if (ret != 0)
- return ret;
- if (ack == 0)
- return ret;
-
- if (!ua->dell_quirk_probed) {
- ua->dell_quirk_probed = true;
-
- cmd = UCSI_GET_CAPABILITY;
- ret = ucsi_acpi_sync_write(ucsi, UCSI_CONTROL, &cmd,
- sizeof(cmd));
- if (ret == 0)
- return ucsi_acpi_sync_write(ucsi, UCSI_CONTROL,
- &ack, sizeof(ack));
- if (ret != -ETIMEDOUT)
+ if (cmd == (UCSI_ACK_CC_CI | UCSI_ACK_CONNECTOR_CHANGE)) {
+ cmd |= UCSI_ACK_COMMAND_COMPLETE;
+
+ /*
+ * The UCSI core thinks it is sending a connector change ack
+ * and will accept new connector change events. We don't want
+ * this to happen for the dummy command as its response will
+ * still report the very event that the core is trying to clear.
+ */
+ set_bit(UCSI_ACPI_SUPPRESS_EVENT, &ua->flags);
+ ret = ucsi_acpi_sync_write(ucsi, UCSI_CONTROL, &dummycmd,
+ sizeof(dummycmd));
+ clear_bit(UCSI_ACPI_SUPPRESS_EVENT, &ua->flags);
+
+ if (ret < 0)
return ret;
-
- ua->dell_quirk_active = true;
- dev_err(ua->dev, "Firmware bug: Additional ACK required after ACKing a connector change.\n");
- dev_err(ua->dev, "Firmware bug: Enabling workaround\n");
}
- if (!ua->dell_quirk_active)
- return ret;
-
- return ucsi_acpi_sync_write(ucsi, UCSI_CONTROL, &ack, sizeof(ack));
+ return ucsi_acpi_sync_write(ucsi, UCSI_CONTROL, &cmd, sizeof(cmd));
}
static const struct ucsi_operations ucsi_dell_ops = {
@@ -209,13 +199,14 @@ static void ucsi_acpi_notify(acpi_handle handle, u32 event, void *data)
if (ret)
return;
- if (UCSI_CCI_CONNECTOR(cci))
+ if (UCSI_CCI_CONNECTOR(cci) &&
+ !test_bit(UCSI_ACPI_SUPPRESS_EVENT, &ua->flags))
ucsi_connector_change(ua->ucsi, UCSI_CCI_CONNECTOR(cci));
if (cci & UCSI_CCI_ACK_COMPLETE && test_bit(ACK_PENDING, &ua->flags))
complete(&ua->complete);
if (cci & UCSI_CCI_COMMAND_COMPLETE &&
- test_bit(COMMAND_PENDING, &ua->flags))
+ test_bit(UCSI_ACPI_COMMAND_PENDING, &ua->flags))
complete(&ua->complete);
}
diff --git a/drivers/usb/typec/ucsi/ucsi_glink.c b/drivers/usb/typec/ucsi/ucsi_glink.c
index 932e7bf6944735..ce08eb33e5beca 100644
--- a/drivers/usb/typec/ucsi/ucsi_glink.c
+++ b/drivers/usb/typec/ucsi/ucsi_glink.c
@@ -255,6 +255,20 @@ static void pmic_glink_ucsi_notify(struct work_struct *work)
static void pmic_glink_ucsi_register(struct work_struct *work)
{
struct pmic_glink_ucsi *ucsi = container_of(work, struct pmic_glink_ucsi, register_work);
+ int orientation;
+ int i;
+
+ for (i = 0; i < PMIC_GLINK_MAX_PORTS; i++) {
+ if (!ucsi->port_orientation[i])
+ continue;
+ orientation = gpiod_get_value(ucsi->port_orientation[i]);
+
+ if (orientation >= 0) {
+ typec_switch_set(ucsi->port_switch[i],
+ orientation ? TYPEC_ORIENTATION_REVERSE
+ : TYPEC_ORIENTATION_NORMAL);
+ }
+ }
ucsi_register(ucsi->ucsi);
}
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 045f666b4f12a2..8995730ce0bfc8 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -2515,7 +2515,7 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
vq->avail_idx = vhost16_to_cpu(vq, avail_idx);
if (unlikely((u16)(vq->avail_idx - last_avail_idx) > vq->num)) {
- vq_err(vq, "Guest moved used index from %u to %u",
+ vq_err(vq, "Guest moved avail index from %u to %u",
last_avail_idx, vq->avail_idx);
return -EFAULT;
}
@@ -2799,9 +2799,19 @@ bool vhost_vq_avail_empty(struct vhost_dev *dev, struct vhost_virtqueue *vq)
r = vhost_get_avail_idx(vq, &avail_idx);
if (unlikely(r))
return false;
+
vq->avail_idx = vhost16_to_cpu(vq, avail_idx);
+ if (vq->avail_idx != vq->last_avail_idx) {
+ /* Since we have updated avail_idx, the following
+ * call to vhost_get_vq_desc() will read available
+ * ring entries. Make sure that read happens after
+ * the avail_idx read.
+ */
+ smp_rmb();
+ return false;
+ }
- return vq->avail_idx == vq->last_avail_idx;
+ return true;
}
EXPORT_SYMBOL_GPL(vhost_vq_avail_empty);
@@ -2838,9 +2848,19 @@ bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
&vq->avail->idx, r);
return false;
}
+
vq->avail_idx = vhost16_to_cpu(vq, avail_idx);
+ if (vq->avail_idx != vq->last_avail_idx) {
+ /* Since we have updated avail_idx, the following
+ * call to vhost_get_vq_desc() will read available
+ * ring entries. Make sure that read happens after
+ * the avail_idx read.
+ */
+ smp_rmb();
+ return true;
+ }
- return vq->avail_idx != vq->last_avail_idx;
+ return false;
}
EXPORT_SYMBOL_GPL(vhost_enable_notify);
diff --git a/drivers/video/fbdev/Kconfig b/drivers/video/fbdev/Kconfig
index e3179e987cdb32..197b6d5268e941 100644
--- a/drivers/video/fbdev/Kconfig
+++ b/drivers/video/fbdev/Kconfig
@@ -494,6 +494,7 @@ config FB_SBUS_HELPERS
select FB_CFB_COPYAREA
select FB_CFB_FILLRECT
select FB_CFB_IMAGEBLIT
+ select FB_IOMEM_FOPS
config FB_BW2
bool "BWtwo support"
@@ -514,6 +515,7 @@ config FB_CG6
depends on (FB = y) && (SPARC && FB_SBUS)
select FB_CFB_COPYAREA
select FB_CFB_IMAGEBLIT
+ select FB_IOMEM_FOPS
help
This is the frame buffer device driver for the CGsix (GX, TurboGX)
frame buffer.
@@ -523,6 +525,7 @@ config FB_FFB
depends on FB_SBUS && SPARC64
select FB_CFB_COPYAREA
select FB_CFB_IMAGEBLIT
+ select FB_IOMEM_FOPS
help
This is the frame buffer device driver for the Creator, Creator3D,
and Elite3D graphics boards.
diff --git a/drivers/virt/vmgenid.c b/drivers/virt/vmgenid.c
index b67a28da47026d..a1c467a0e9f719 100644
--- a/drivers/virt/vmgenid.c
+++ b/drivers/virt/vmgenid.c
@@ -68,7 +68,6 @@ out:
static void vmgenid_notify(struct acpi_device *device, u32 event)
{
struct vmgenid_state *state = acpi_driver_data(device);
- char *envp[] = { "NEW_VMGENID=1", NULL };
u8 old_id[VMGENID_SIZE];
memcpy(old_id, state->this_id, sizeof(old_id));
@@ -76,7 +75,6 @@ static void vmgenid_notify(struct acpi_device *device, u32 event)
if (!memcmp(old_id, state->this_id, sizeof(old_id)))
return;
add_vmfork_randomness(state->this_id, sizeof(state->this_id));
- kobject_uevent_env(&device->dev.kobj, KOBJ_CHANGE, envp);
}
static const struct acpi_device_id vmgenid_ids[] = {
diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index f173587893cb34..9510c551dce864 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -362,14 +362,16 @@ static const struct bus_type virtio_bus = {
.remove = virtio_dev_remove,
};
-int register_virtio_driver(struct virtio_driver *driver)
+int __register_virtio_driver(struct virtio_driver *driver, struct module *owner)
{
/* Catch this early. */
BUG_ON(driver->feature_table_size && !driver->feature_table);
driver->driver.bus = &virtio_bus;
+ driver->driver.owner = owner;
+
return driver_register(&driver->driver);
}
-EXPORT_SYMBOL_GPL(register_virtio_driver);
+EXPORT_SYMBOL_GPL(__register_virtio_driver);
void unregister_virtio_driver(struct virtio_driver *driver)
{
diff --git a/fs/aio.c b/fs/aio.c
index 9cdaa2faa53633..0f4f531c97800c 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1202,8 +1202,8 @@ static void aio_complete(struct aio_kiocb *iocb)
spin_lock_irqsave(&ctx->wait.lock, flags);
list_for_each_entry_safe(curr, next, &ctx->wait.head, w.entry)
if (avail >= curr->min_nr) {
- list_del_init_careful(&curr->w.entry);
wake_up_process(curr->w.private);
+ list_del_init_careful(&curr->w.entry);
}
spin_unlock_irqrestore(&ctx->wait.lock, flags);
}
diff --git a/fs/bcachefs/Makefile b/fs/bcachefs/Makefile
index b02796c8a59533..66ca0bbee63949 100644
--- a/fs/bcachefs/Makefile
+++ b/fs/bcachefs/Makefile
@@ -17,6 +17,7 @@ bcachefs-y := \
btree_journal_iter.o \
btree_key_cache.o \
btree_locking.o \
+ btree_node_scan.o \
btree_trans_commit.o \
btree_update.o \
btree_update_interior.o \
@@ -37,6 +38,7 @@ bcachefs-y := \
error.o \
extents.o \
extent_update.o \
+ eytzinger.o \
fs.o \
fs-common.o \
fs-ioctl.o \
@@ -67,6 +69,7 @@ bcachefs-y := \
quota.o \
rebalance.o \
recovery.o \
+ recovery_passes.o \
reflink.o \
replicas.o \
sb-clean.o \
diff --git a/fs/bcachefs/acl.c b/fs/bcachefs/acl.c
index 3640f417cce118..5c180fdc3efbdf 100644
--- a/fs/bcachefs/acl.c
+++ b/fs/bcachefs/acl.c
@@ -281,7 +281,6 @@ struct posix_acl *bch2_get_acl(struct mnt_idmap *idmap,
struct xattr_search_key search = X_SEARCH(acl_to_xattr_type(type), "", 0);
struct btree_trans *trans = bch2_trans_get(c);
struct btree_iter iter = { NULL };
- struct bkey_s_c_xattr xattr;
struct posix_acl *acl = NULL;
struct bkey_s_c k;
int ret;
@@ -290,28 +289,27 @@ retry:
ret = bch2_hash_lookup(trans, &iter, bch2_xattr_hash_desc,
&hash, inode_inum(inode), &search, 0);
- if (ret) {
- if (!bch2_err_matches(ret, ENOENT))
- acl = ERR_PTR(ret);
- goto out;
- }
+ if (ret)
+ goto err;
k = bch2_btree_iter_peek_slot(&iter);
ret = bkey_err(k);
- if (ret) {
- acl = ERR_PTR(ret);
- goto out;
- }
+ if (ret)
+ goto err;
- xattr = bkey_s_c_to_xattr(k);
+ struct bkey_s_c_xattr xattr = bkey_s_c_to_xattr(k);
acl = bch2_acl_from_disk(trans, xattr_val(xattr.v),
- le16_to_cpu(xattr.v->x_val_len));
+ le16_to_cpu(xattr.v->x_val_len));
+ ret = PTR_ERR_OR_ZERO(acl);
+err:
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ goto retry;
- if (!IS_ERR(acl))
+ if (ret)
+ acl = !bch2_err_matches(ret, ENOENT) ? ERR_PTR(ret) : NULL;
+
+ if (!IS_ERR_OR_NULL(acl))
set_cached_acl(&inode->v, type, acl);
-out:
- if (bch2_err_matches(PTR_ERR_OR_ZERO(acl), BCH_ERR_transaction_restart))
- goto retry;
bch2_trans_iter_exit(trans, &iter);
bch2_trans_put(trans);
diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c
index 893e38f9db807f..4ff56fa4d53920 100644
--- a/fs/bcachefs/alloc_background.c
+++ b/fs/bcachefs/alloc_background.c
@@ -1713,34 +1713,37 @@ static int bch2_discard_one_bucket(struct btree_trans *trans,
if (ret)
goto out;
- if (BCH_ALLOC_V4_NEED_INC_GEN(&a->v)) {
- a->v.gen++;
- SET_BCH_ALLOC_V4_NEED_INC_GEN(&a->v, false);
- goto write;
- }
-
- if (a->v.journal_seq > c->journal.flushed_seq_ondisk) {
- if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info) {
- bch2_trans_inconsistent(trans,
- "clearing need_discard but journal_seq %llu > flushed_seq %llu\n"
- "%s",
- a->v.journal_seq,
- c->journal.flushed_seq_ondisk,
- (bch2_bkey_val_to_text(&buf, c, k), buf.buf));
+ if (a->v.dirty_sectors) {
+ if (bch2_trans_inconsistent_on(c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info,
+ trans, "attempting to discard bucket with dirty data\n%s",
+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
ret = -EIO;
- }
goto out;
}
if (a->v.data_type != BCH_DATA_need_discard) {
- if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info) {
- bch2_trans_inconsistent(trans,
- "bucket incorrectly set in need_discard btree\n"
- "%s",
- (bch2_bkey_val_to_text(&buf, c, k), buf.buf));
- ret = -EIO;
+ if (data_type_is_empty(a->v.data_type) &&
+ BCH_ALLOC_V4_NEED_INC_GEN(&a->v)) {
+ a->v.gen++;
+ SET_BCH_ALLOC_V4_NEED_INC_GEN(&a->v, false);
+ goto write;
}
+ if (bch2_trans_inconsistent_on(c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info,
+ trans, "bucket incorrectly set in need_discard btree\n"
+ "%s",
+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
+ ret = -EIO;
+ goto out;
+ }
+
+ if (a->v.journal_seq > c->journal.flushed_seq_ondisk) {
+ if (bch2_trans_inconsistent_on(c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info,
+ trans, "clearing need_discard but journal_seq %llu > flushed_seq %llu\n%s",
+ a->v.journal_seq,
+ c->journal.flushed_seq_ondisk,
+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
+ ret = -EIO;
goto out;
}
@@ -1835,6 +1838,7 @@ static int bch2_clear_bucket_needs_discard(struct btree_trans *trans, struct bpo
if (ret)
goto err;
+ BUG_ON(a->v.dirty_sectors);
SET_BCH_ALLOC_V4_NEED_DISCARD(&a->v, false);
a->v.data_type = alloc_data_type(a->v, a->v.data_type);
@@ -1942,6 +1946,7 @@ static int invalidate_one_bucket(struct btree_trans *trans,
goto out;
BUG_ON(a->v.data_type != BCH_DATA_cached);
+ BUG_ON(a->v.dirty_sectors);
if (!a->v.cached_sectors)
bch_err(c, "invalidating empty bucket, confused");
diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c
index 214b15c84d1f32..a1fc30adf9129d 100644
--- a/fs/bcachefs/alloc_foreground.c
+++ b/fs/bcachefs/alloc_foreground.c
@@ -188,8 +188,10 @@ long bch2_bucket_alloc_new_fs(struct bch_dev *ca)
static inline unsigned open_buckets_reserved(enum bch_watermark watermark)
{
switch (watermark) {
- case BCH_WATERMARK_reclaim:
+ case BCH_WATERMARK_interior_updates:
return 0;
+ case BCH_WATERMARK_reclaim:
+ return OPEN_BUCKETS_COUNT / 6;
case BCH_WATERMARK_btree:
case BCH_WATERMARK_btree_copygc:
return OPEN_BUCKETS_COUNT / 4;
diff --git a/fs/bcachefs/alloc_types.h b/fs/bcachefs/alloc_types.h
index b91b7a46105608..c2226e947c41fb 100644
--- a/fs/bcachefs/alloc_types.h
+++ b/fs/bcachefs/alloc_types.h
@@ -22,7 +22,8 @@ struct bucket_alloc_state {
x(copygc) \
x(btree) \
x(btree_copygc) \
- x(reclaim)
+ x(reclaim) \
+ x(interior_updates)
enum bch_watermark {
#define x(name) BCH_WATERMARK_##name,
diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c
index 8cb35ea572cb95..fadb1078903d29 100644
--- a/fs/bcachefs/backpointers.c
+++ b/fs/bcachefs/backpointers.c
@@ -8,6 +8,7 @@
#include "btree_update.h"
#include "btree_update_interior.h"
#include "btree_write_buffer.h"
+#include "checksum.h"
#include "error.h"
#include <linux/mm.h>
@@ -29,8 +30,7 @@ static bool extent_matches_bp(struct bch_fs *c,
if (p.ptr.cached)
continue;
- bch2_extent_ptr_to_bp(c, btree_id, level, k, p,
- &bucket2, &bp2);
+ bch2_extent_ptr_to_bp(c, btree_id, level, k, p, entry, &bucket2, &bp2);
if (bpos_eq(bucket, bucket2) &&
!memcmp(&bp, &bp2, sizeof(bp)))
return true;
@@ -44,13 +44,20 @@ int bch2_backpointer_invalid(struct bch_fs *c, struct bkey_s_c k,
struct printbuf *err)
{
struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k);
+
+ /* these will be caught by fsck */
+ if (!bch2_dev_exists2(c, bp.k->p.inode))
+ return 0;
+
+ struct bch_dev *ca = bch_dev_bkey_exists(c, bp.k->p.inode);
struct bpos bucket = bp_pos_to_bucket(c, bp.k->p);
int ret = 0;
- bkey_fsck_err_on(!bpos_eq(bp.k->p, bucket_pos_to_bp(c, bucket, bp.v->bucket_offset)),
+ bkey_fsck_err_on((bp.v->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT) >= ca->mi.bucket_size ||
+ !bpos_eq(bp.k->p, bucket_pos_to_bp(c, bucket, bp.v->bucket_offset)),
c, err,
- backpointer_pos_wrong,
- "backpointer at wrong pos");
+ backpointer_bucket_offset_wrong,
+ "backpointer bucket_offset wrong");
fsck_err:
return ret;
}
@@ -378,7 +385,7 @@ static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_
backpointer_to_missing_alloc,
"backpointer for nonexistent alloc key: %llu:%llu:0\n%s",
alloc_iter.pos.inode, alloc_iter.pos.offset,
- (bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) {
+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
ret = bch2_btree_delete_at(trans, bp_iter, 0);
goto out;
}
@@ -414,6 +421,84 @@ struct extents_to_bp_state {
struct bkey_buf last_flushed;
};
+static int drop_dev_and_update(struct btree_trans *trans, enum btree_id btree,
+ struct bkey_s_c extent, unsigned dev)
+{
+ struct bkey_i *n = bch2_bkey_make_mut_noupdate(trans, extent);
+ int ret = PTR_ERR_OR_ZERO(n);
+ if (ret)
+ return ret;
+
+ bch2_bkey_drop_device(bkey_i_to_s(n), dev);
+ return bch2_btree_insert_trans(trans, btree, n, 0);
+}
+
+static int check_extent_checksum(struct btree_trans *trans,
+ enum btree_id btree, struct bkey_s_c extent,
+ enum btree_id o_btree, struct bkey_s_c extent2, unsigned dev)
+{
+ struct bch_fs *c = trans->c;
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(extent);
+ const union bch_extent_entry *entry;
+ struct extent_ptr_decoded p;
+ struct printbuf buf = PRINTBUF;
+ void *data_buf = NULL;
+ struct bio *bio = NULL;
+ size_t bytes;
+ int ret = 0;
+
+ if (bkey_is_btree_ptr(extent.k))
+ return false;
+
+ bkey_for_each_ptr_decode(extent.k, ptrs, p, entry)
+ if (p.ptr.dev == dev)
+ goto found;
+ BUG();
+found:
+ if (!p.crc.csum_type)
+ return false;
+
+ bytes = p.crc.compressed_size << 9;
+
+ struct bch_dev *ca = bch_dev_bkey_exists(c, dev);
+ if (!bch2_dev_get_ioref(ca, READ))
+ return false;
+
+ data_buf = kvmalloc(bytes, GFP_KERNEL);
+ if (!data_buf) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ bio = bio_alloc(ca->disk_sb.bdev, 1, REQ_OP_READ, GFP_KERNEL);
+ bio->bi_iter.bi_sector = p.ptr.offset;
+ bch2_bio_map(bio, data_buf, bytes);
+ ret = submit_bio_wait(bio);
+ if (ret)
+ goto err;
+
+ prt_str(&buf, "extents pointing to same space, but first extent checksum bad:");
+ prt_printf(&buf, "\n %s ", bch2_btree_id_str(btree));
+ bch2_bkey_val_to_text(&buf, c, extent);
+ prt_printf(&buf, "\n %s ", bch2_btree_id_str(o_btree));
+ bch2_bkey_val_to_text(&buf, c, extent2);
+
+ struct nonce nonce = extent_nonce(extent.k->version, p.crc);
+ struct bch_csum csum = bch2_checksum(c, p.crc.csum_type, nonce, data_buf, bytes);
+ if (fsck_err_on(bch2_crc_cmp(csum, p.crc.csum),
+ c, dup_backpointer_to_bad_csum_extent,
+ "%s", buf.buf))
+ ret = drop_dev_and_update(trans, btree, extent, dev) ?: 1;
+fsck_err:
+err:
+ if (bio)
+ bio_put(bio);
+ kvfree(data_buf);
+ percpu_ref_put(&ca->io_ref);
+ printbuf_exit(&buf);
+ return ret;
+}
+
static int check_bp_exists(struct btree_trans *trans,
struct extents_to_bp_state *s,
struct bpos bucket,
@@ -421,7 +506,8 @@ static int check_bp_exists(struct btree_trans *trans,
struct bkey_s_c orig_k)
{
struct bch_fs *c = trans->c;
- struct btree_iter bp_iter = { NULL };
+ struct btree_iter bp_iter = {};
+ struct btree_iter other_extent_iter = {};
struct printbuf buf = PRINTBUF;
struct bkey_s_c bp_k;
struct bkey_buf tmp;
@@ -429,13 +515,19 @@ static int check_bp_exists(struct btree_trans *trans,
bch2_bkey_buf_init(&tmp);
+ if (!bch2_dev_bucket_exists(c, bucket)) {
+ prt_str(&buf, "extent for nonexistent device:bucket ");
+ bch2_bpos_to_text(&buf, bucket);
+ prt_str(&buf, "\n ");
+ bch2_bkey_val_to_text(&buf, c, orig_k);
+ bch_err(c, "%s", buf.buf);
+ return -BCH_ERR_fsck_repair_unimplemented;
+ }
+
if (bpos_lt(bucket, s->bucket_start) ||
bpos_gt(bucket, s->bucket_end))
return 0;
- if (!bch2_dev_bucket_exists(c, bucket))
- goto missing;
-
bp_k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers,
bucket_pos_to_bp(c, bucket, bp.bucket_offset),
0);
@@ -461,21 +553,94 @@ static int check_bp_exists(struct btree_trans *trans,
ret = -BCH_ERR_transaction_restart_write_buffer_flush;
goto out;
}
- goto missing;
+
+ goto check_existing_bp;
}
out:
err:
fsck_err:
+ bch2_trans_iter_exit(trans, &other_extent_iter);
bch2_trans_iter_exit(trans, &bp_iter);
bch2_bkey_buf_exit(&tmp, c);
printbuf_exit(&buf);
return ret;
+check_existing_bp:
+ /* Do we have a backpointer for a different extent? */
+ if (bp_k.k->type != KEY_TYPE_backpointer)
+ goto missing;
+
+ struct bch_backpointer other_bp = *bkey_s_c_to_backpointer(bp_k).v;
+
+ struct bkey_s_c other_extent =
+ bch2_backpointer_get_key(trans, &other_extent_iter, bp_k.k->p, other_bp, 0);
+ ret = bkey_err(other_extent);
+ if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node)
+ ret = 0;
+ if (ret)
+ goto err;
+
+ if (!other_extent.k)
+ goto missing;
+
+ if (bch2_extents_match(orig_k, other_extent)) {
+ printbuf_reset(&buf);
+ prt_printf(&buf, "duplicate versions of same extent, deleting smaller\n ");
+ bch2_bkey_val_to_text(&buf, c, orig_k);
+ prt_str(&buf, "\n ");
+ bch2_bkey_val_to_text(&buf, c, other_extent);
+ bch_err(c, "%s", buf.buf);
+
+ if (other_extent.k->size <= orig_k.k->size) {
+ ret = drop_dev_and_update(trans, other_bp.btree_id, other_extent, bucket.inode);
+ if (ret)
+ goto err;
+ goto out;
+ } else {
+ ret = drop_dev_and_update(trans, bp.btree_id, orig_k, bucket.inode);
+ if (ret)
+ goto err;
+ goto missing;
+ }
+ }
+
+ ret = check_extent_checksum(trans, other_bp.btree_id, other_extent, bp.btree_id, orig_k, bucket.inode);
+ if (ret < 0)
+ goto err;
+ if (ret) {
+ ret = 0;
+ goto missing;
+ }
+
+ ret = check_extent_checksum(trans, bp.btree_id, orig_k, other_bp.btree_id, other_extent, bucket.inode);
+ if (ret < 0)
+ goto err;
+ if (ret) {
+ ret = 0;
+ goto out;
+ }
+
+ printbuf_reset(&buf);
+ prt_printf(&buf, "duplicate extents pointing to same space on dev %llu\n ", bucket.inode);
+ bch2_bkey_val_to_text(&buf, c, orig_k);
+ prt_str(&buf, "\n ");
+ bch2_bkey_val_to_text(&buf, c, other_extent);
+ bch_err(c, "%s", buf.buf);
+ ret = -BCH_ERR_fsck_repair_unimplemented;
+ goto err;
missing:
+ printbuf_reset(&buf);
prt_printf(&buf, "missing backpointer for btree=%s l=%u ",
bch2_btree_id_str(bp.btree_id), bp.level);
bch2_bkey_val_to_text(&buf, c, orig_k);
- prt_printf(&buf, "\nbp pos ");
- bch2_bpos_to_text(&buf, bp_iter.pos);
+ prt_printf(&buf, "\n got: ");
+ bch2_bkey_val_to_text(&buf, c, bp_k);
+
+ struct bkey_i_backpointer n_bp_k;
+ bkey_backpointer_init(&n_bp_k.k_i);
+ n_bp_k.k.p = bucket_pos_to_bp(trans->c, bucket, bp.bucket_offset);
+ n_bp_k.v = bp;
+ prt_printf(&buf, "\n want: ");
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&n_bp_k.k_i));
if (fsck_err(c, ptr_to_missing_backpointer, "%s", buf.buf))
ret = bch2_bucket_backpointer_mod(trans, bucket, bp, orig_k, true);
@@ -502,8 +667,7 @@ static int check_extent_to_backpointers(struct btree_trans *trans,
if (p.ptr.cached)
continue;
- bch2_extent_ptr_to_bp(c, btree, level,
- k, p, &bucket_pos, &bp);
+ bch2_extent_ptr_to_bp(c, btree, level, k, p, entry, &bucket_pos, &bp);
ret = check_bp_exists(trans, s, bucket_pos, bp, k);
if (ret)
diff --git a/fs/bcachefs/backpointers.h b/fs/bcachefs/backpointers.h
index 327365a9feac4e..85949b9fd880ce 100644
--- a/fs/bcachefs/backpointers.h
+++ b/fs/bcachefs/backpointers.h
@@ -53,14 +53,11 @@ static inline struct bpos bucket_pos_to_bp(const struct bch_fs *c,
u64 bucket_offset)
{
struct bch_dev *ca = bch_dev_bkey_exists(c, bucket.inode);
- struct bpos ret;
-
- ret = POS(bucket.inode,
- (bucket_to_sector(ca, bucket.offset) <<
- MAX_EXTENT_COMPRESS_RATIO_SHIFT) + bucket_offset);
+ struct bpos ret = POS(bucket.inode,
+ (bucket_to_sector(ca, bucket.offset) <<
+ MAX_EXTENT_COMPRESS_RATIO_SHIFT) + bucket_offset);
EBUG_ON(!bkey_eq(bucket, bp_pos_to_bucket(c, ret)));
-
return ret;
}
@@ -90,20 +87,40 @@ static inline int bch2_bucket_backpointer_mod(struct btree_trans *trans,
return bch2_trans_update_buffered(trans, BTREE_ID_backpointers, &bp_k.k_i);
}
-static inline enum bch_data_type bkey_ptr_data_type(enum btree_id btree_id, unsigned level,
- struct bkey_s_c k, struct extent_ptr_decoded p)
+static inline enum bch_data_type bch2_bkey_ptr_data_type(struct bkey_s_c k,
+ struct extent_ptr_decoded p,
+ const union bch_extent_entry *entry)
{
- return level ? BCH_DATA_btree :
- p.has_ec ? BCH_DATA_stripe :
- BCH_DATA_user;
+ switch (k.k->type) {
+ case KEY_TYPE_btree_ptr:
+ case KEY_TYPE_btree_ptr_v2:
+ return BCH_DATA_btree;
+ case KEY_TYPE_extent:
+ case KEY_TYPE_reflink_v:
+ return p.has_ec ? BCH_DATA_stripe : BCH_DATA_user;
+ case KEY_TYPE_stripe: {
+ const struct bch_extent_ptr *ptr = &entry->ptr;
+ struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k);
+
+ BUG_ON(ptr < s.v->ptrs ||
+ ptr >= s.v->ptrs + s.v->nr_blocks);
+
+ return ptr >= s.v->ptrs + s.v->nr_blocks - s.v->nr_redundant
+ ? BCH_DATA_parity
+ : BCH_DATA_user;
+ }
+ default:
+ BUG();
+ }
}
static inline void bch2_extent_ptr_to_bp(struct bch_fs *c,
enum btree_id btree_id, unsigned level,
struct bkey_s_c k, struct extent_ptr_decoded p,
+ const union bch_extent_entry *entry,
struct bpos *bucket_pos, struct bch_backpointer *bp)
{
- enum bch_data_type data_type = bkey_ptr_data_type(btree_id, level, k, p);
+ enum bch_data_type data_type = bch2_bkey_ptr_data_type(k, p, entry);
s64 sectors = level ? btree_sectors(c) : k.k->size;
u32 bucket_offset;
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index 799aa32b6b4d99..91c3c1fef233d1 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -209,7 +209,7 @@
#include "fifo.h"
#include "nocow_locking_types.h"
#include "opts.h"
-#include "recovery_types.h"
+#include "recovery_passes_types.h"
#include "sb-errors_types.h"
#include "seqmutex.h"
#include "time_stats.h"
@@ -456,6 +456,7 @@ enum bch_time_stats {
#include "alloc_types.h"
#include "btree_types.h"
+#include "btree_node_scan_types.h"
#include "btree_write_buffer_types.h"
#include "buckets_types.h"
#include "buckets_waiting_for_journal_types.h"
@@ -614,6 +615,7 @@ struct bch_dev {
*/
#define BCH_FS_FLAGS() \
+ x(new_fs) \
x(started) \
x(may_go_rw) \
x(rw) \
@@ -707,6 +709,8 @@ struct btree_trans_buf {
x(stripe_delete) \
x(reflink) \
x(fallocate) \
+ x(fsync) \
+ x(dio_write) \
x(discard) \
x(discard_fast) \
x(invalidate) \
@@ -796,6 +800,7 @@ struct bch_fs {
u64 features;
u64 compat;
unsigned long errors_silent[BITS_TO_LONGS(BCH_SB_ERR_MAX)];
+ u64 btrees_lost_data;
} sb;
@@ -810,7 +815,6 @@ struct bch_fs {
/* snapshot.c: */
struct snapshot_table __rcu *snapshots;
- size_t snapshot_table_size;
struct mutex snapshot_table_lock;
struct rw_semaphore snapshot_create_lock;
@@ -1104,6 +1108,8 @@ struct bch_fs {
struct journal_keys journal_keys;
struct list_head journal_iters;
+ struct find_btree_nodes found_btree_nodes;
+
u64 last_bucket_seq_cleanup;
u64 counters_on_mount[BCH_COUNTER_NR];
diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h
index bff8750ac0d743..085987435a5ea3 100644
--- a/fs/bcachefs/bcachefs_format.h
+++ b/fs/bcachefs/bcachefs_format.h
@@ -578,7 +578,8 @@ struct bch_member {
__le64 nbuckets; /* device size */
__le16 first_bucket; /* index of first bucket used */
__le16 bucket_size; /* sectors */
- __le32 pad;
+ __u8 btree_bitmap_shift;
+ __u8 pad[3];
__le64 last_mount; /* time_t */
__le64 flags;
@@ -587,6 +588,7 @@ struct bch_member {
__le64 errors_at_reset[BCH_MEMBER_ERROR_NR];
__le64 errors_reset_time;
__le64 seq;
+ __le64 btree_allocated_bitmap;
};
#define BCH_MEMBER_V1_BYTES 56
@@ -818,6 +820,7 @@ struct bch_sb_field_ext {
struct bch_sb_field field;
__le64 recovery_passes_required[2];
__le64 errors_silent[8];
+ __le64 btrees_lost_data;
};
struct bch_sb_field_downgrade_entry {
@@ -875,7 +878,8 @@ struct bch_sb_field_downgrade {
x(rebalance_work, BCH_VERSION(1, 3)) \
x(member_seq, BCH_VERSION(1, 4)) \
x(subvolume_fs_parent, BCH_VERSION(1, 5)) \
- x(btree_subvolume_children, BCH_VERSION(1, 6))
+ x(btree_subvolume_children, BCH_VERSION(1, 6)) \
+ x(mi_btree_bitmap, BCH_VERSION(1, 7))
enum bcachefs_metadata_version {
bcachefs_metadata_version_min = 9,
@@ -1313,7 +1317,7 @@ static inline __u64 __bset_magic(struct bch_sb *sb)
x(write_buffer_keys, 11) \
x(datetime, 12)
-enum {
+enum bch_jset_entry_type {
#define x(f, nr) BCH_JSET_ENTRY_##f = nr,
BCH_JSET_ENTRY_TYPES()
#undef x
@@ -1359,7 +1363,7 @@ struct jset_entry_blacklist_v2 {
x(inodes, 1) \
x(key_version, 2)
-enum {
+enum bch_fs_usage_type {
#define x(f, nr) BCH_FS_USAGE_##f = nr,
BCH_FS_USAGE_TYPES()
#undef x
@@ -1534,6 +1538,20 @@ enum btree_id {
BTREE_ID_NR
};
+static inline bool btree_id_is_alloc(enum btree_id id)
+{
+ switch (id) {
+ case BTREE_ID_alloc:
+ case BTREE_ID_backpointers:
+ case BTREE_ID_need_discard:
+ case BTREE_ID_freespace:
+ case BTREE_ID_bucket_gens:
+ return true;
+ default:
+ return false;
+ }
+}
+
#define BTREE_MAX_DEPTH 4U
/* Btree nodes */
diff --git a/fs/bcachefs/bkey.h b/fs/bcachefs/bkey.h
index cf23ff47bed8be..3a45d128f608db 100644
--- a/fs/bcachefs/bkey.h
+++ b/fs/bcachefs/bkey.h
@@ -314,6 +314,12 @@ static inline unsigned bkeyp_key_u64s(const struct bkey_format *format,
return bkey_packed(k) ? format->key_u64s : BKEY_U64s;
}
+static inline bool bkeyp_u64s_valid(const struct bkey_format *f,
+ const struct bkey_packed *k)
+{
+ return ((unsigned) k->u64s - bkeyp_key_u64s(f, k) <= U8_MAX - BKEY_U64s);
+}
+
static inline unsigned bkeyp_key_bytes(const struct bkey_format *format,
const struct bkey_packed *k)
{
diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c
index 5e52684764eb14..db336a43fc083a 100644
--- a/fs/bcachefs/bkey_methods.c
+++ b/fs/bcachefs/bkey_methods.c
@@ -171,11 +171,15 @@ int __bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k,
if (type >= BKEY_TYPE_NR)
return 0;
- bkey_fsck_err_on((flags & BKEY_INVALID_COMMIT) &&
+ bkey_fsck_err_on((type == BKEY_TYPE_btree ||
+ (flags & BKEY_INVALID_COMMIT)) &&
!(bch2_key_types_allowed[type] & BIT_ULL(k.k->type)), c, err,
bkey_invalid_type_for_btree,
"invalid key type for btree %s (%s)",
- bch2_btree_node_type_str(type), bch2_bkey_types[k.k->type]);
+ bch2_btree_node_type_str(type),
+ k.k->type < KEY_TYPE_MAX
+ ? bch2_bkey_types[k.k->type]
+ : "(unknown)");
if (btree_node_type_is_extents(type) && !bkey_whiteout(k.k)) {
bkey_fsck_err_on(k.k->size == 0, c, err,
diff --git a/fs/bcachefs/bset.c b/fs/bcachefs/bset.c
index 3fd1085b6c61ee..3bb477840eab6b 100644
--- a/fs/bcachefs/bset.c
+++ b/fs/bcachefs/bset.c
@@ -134,18 +134,24 @@ void bch2_dump_btree_node_iter(struct btree *b,
printbuf_exit(&buf);
}
-#ifdef CONFIG_BCACHEFS_DEBUG
-
-void __bch2_verify_btree_nr_keys(struct btree *b)
+struct btree_nr_keys bch2_btree_node_count_keys(struct btree *b)
{
struct bset_tree *t;
struct bkey_packed *k;
- struct btree_nr_keys nr = { 0 };
+ struct btree_nr_keys nr = {};
for_each_bset(b, t)
bset_tree_for_each_key(b, t, k)
if (!bkey_deleted(k))
btree_keys_account_key_add(&nr, t - b->set, k);
+ return nr;
+}
+
+#ifdef CONFIG_BCACHEFS_DEBUG
+
+void __bch2_verify_btree_nr_keys(struct btree *b)
+{
+ struct btree_nr_keys nr = bch2_btree_node_count_keys(b);
BUG_ON(memcmp(&nr, &b->nr, sizeof(nr)));
}
diff --git a/fs/bcachefs/bset.h b/fs/bcachefs/bset.h
index 79c77baaa38386..120a79fd456bd5 100644
--- a/fs/bcachefs/bset.h
+++ b/fs/bcachefs/bset.h
@@ -458,6 +458,8 @@ struct bkey_s_c bch2_btree_node_iter_peek_unpack(struct btree_node_iter *,
/* Accounting: */
+struct btree_nr_keys bch2_btree_node_count_keys(struct btree *);
+
static inline void btree_keys_account_key(struct btree_nr_keys *n,
unsigned bset,
struct bkey_packed *k,
diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c
index 562561a9a510e8..02c70e813face0 100644
--- a/fs/bcachefs/btree_cache.c
+++ b/fs/bcachefs/btree_cache.c
@@ -709,9 +709,31 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans,
struct bch_fs *c = trans->c;
struct btree_cache *bc = &c->btree_cache;
struct btree *b;
- u32 seq;
- BUG_ON(level + 1 >= BTREE_MAX_DEPTH);
+ if (unlikely(level >= BTREE_MAX_DEPTH)) {
+ int ret = bch2_fs_topology_error(c, "attempting to get btree node at level %u, >= max depth %u",
+ level, BTREE_MAX_DEPTH);
+ return ERR_PTR(ret);
+ }
+
+ if (unlikely(!bkey_is_btree_ptr(&k->k))) {
+ struct printbuf buf = PRINTBUF;
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k));
+
+ int ret = bch2_fs_topology_error(c, "attempting to get btree node with non-btree key %s", buf.buf);
+ printbuf_exit(&buf);
+ return ERR_PTR(ret);
+ }
+
+ if (unlikely(k->k.u64s > BKEY_BTREE_PTR_U64s_MAX)) {
+ struct printbuf buf = PRINTBUF;
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k));
+
+ int ret = bch2_fs_topology_error(c, "attempting to get btree node with too big key %s", buf.buf);
+ printbuf_exit(&buf);
+ return ERR_PTR(ret);
+ }
+
/*
* Parent node must be locked, else we could read in a btree node that's
* been freed:
@@ -752,34 +774,26 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans,
}
set_btree_node_read_in_flight(b);
-
six_unlock_write(&b->c.lock);
- seq = six_lock_seq(&b->c.lock);
- six_unlock_intent(&b->c.lock);
- /* Unlock before doing IO: */
- if (path && sync)
- bch2_trans_unlock_noassert(trans);
-
- bch2_btree_node_read(trans, b, sync);
+ if (path) {
+ u32 seq = six_lock_seq(&b->c.lock);
- if (!sync)
- return NULL;
+ /* Unlock before doing IO: */
+ six_unlock_intent(&b->c.lock);
+ bch2_trans_unlock_noassert(trans);
- if (path) {
- int ret = bch2_trans_relock(trans) ?:
- bch2_btree_path_relock_intent(trans, path);
- if (ret) {
- BUG_ON(!trans->restarted);
- return ERR_PTR(ret);
- }
- }
+ bch2_btree_node_read(trans, b, sync);
- if (!six_relock_type(&b->c.lock, lock_type, seq)) {
- BUG_ON(!path);
+ if (!sync)
+ return NULL;
- trace_and_count(c, trans_restart_relock_after_fill, trans, _THIS_IP_, path);
- return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_relock_after_fill));
+ if (!six_relock_type(&b->c.lock, lock_type, seq))
+ b = NULL;
+ } else {
+ bch2_btree_node_read(trans, b, sync);
+ if (lock_type == SIX_LOCK_read)
+ six_lock_downgrade(&b->c.lock);
}
return b;
@@ -808,7 +822,8 @@ static noinline void btree_bad_header(struct bch_fs *c, struct btree *b)
prt_printf(&buf, "\nmax ");
bch2_bpos_to_text(&buf, b->data->max_key);
- bch2_fs_inconsistent(c, "%s", buf.buf);
+ bch2_fs_topology_error(c, "%s", buf.buf);
+
printbuf_exit(&buf);
}
@@ -1111,18 +1126,19 @@ int bch2_btree_node_prefetch(struct btree_trans *trans,
{
struct bch_fs *c = trans->c;
struct btree_cache *bc = &c->btree_cache;
- struct btree *b;
BUG_ON(path && !btree_node_locked(path, level + 1));
BUG_ON(level >= BTREE_MAX_DEPTH);
- b = btree_cache_find(bc, k);
+ struct btree *b = btree_cache_find(bc, k);
if (b)
return 0;
b = bch2_btree_node_fill(trans, path, k, btree_id,
level, SIX_LOCK_read, false);
- return PTR_ERR_OR_ZERO(b);
+ if (!IS_ERR_OR_NULL(b))
+ six_unlock_read(&b->c.lock);
+ return bch2_trans_relock(trans) ?: PTR_ERR_OR_ZERO(b);
}
void bch2_btree_node_evict(struct btree_trans *trans, const struct bkey_i *k)
@@ -1134,6 +1150,8 @@ void bch2_btree_node_evict(struct btree_trans *trans, const struct bkey_i *k)
b = btree_cache_find(bc, k);
if (!b)
return;
+
+ BUG_ON(b == btree_node_root(trans->c, b));
wait_on_io:
/* not allowed to wait on io with btree locks held: */
@@ -1145,6 +1163,8 @@ wait_on_io:
btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_intent);
btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_write);
+ if (unlikely(b->hash_val != btree_ptr_hash_val(k)))
+ goto out;
if (btree_node_dirty(b)) {
__bch2_btree_node_write(c, b, BTREE_WRITE_cache_reclaim);
@@ -1159,7 +1179,7 @@ wait_on_io:
btree_node_data_free(c, b);
bch2_btree_node_hash_remove(bc, b);
mutex_unlock(&bc->lock);
-
+out:
six_unlock_write(&b->c.lock);
six_unlock_intent(&b->c.lock);
}
diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c
index bdaed29f084a4d..ecbd9598f69fd0 100644
--- a/fs/bcachefs/btree_gc.c
+++ b/fs/bcachefs/btree_gc.c
@@ -7,11 +7,13 @@
#include "bcachefs.h"
#include "alloc_background.h"
#include "alloc_foreground.h"
+#include "backpointers.h"
#include "bkey_methods.h"
#include "bkey_buf.h"
#include "btree_journal_iter.h"
#include "btree_key_cache.h"
#include "btree_locking.h"
+#include "btree_node_scan.h"
#include "btree_update_interior.h"
#include "btree_io.h"
#include "btree_gc.h"
@@ -24,7 +26,7 @@
#include "journal.h"
#include "keylist.h"
#include "move.h"
-#include "recovery.h"
+#include "recovery_passes.h"
#include "reflink.h"
#include "replicas.h"
#include "super-io.h"
@@ -40,6 +42,7 @@
#define DROP_THIS_NODE 10
#define DROP_PREV_NODE 11
+#define DID_FILL_FROM_SCAN 12
static struct bkey_s unsafe_bkey_s_c_to_s(struct bkey_s_c k)
{
@@ -70,90 +73,6 @@ static inline void gc_pos_set(struct bch_fs *c, struct gc_pos new_pos)
__gc_pos_set(c, new_pos);
}
-/*
- * Missing: if an interior btree node is empty, we need to do something -
- * perhaps just kill it
- */
-static int bch2_gc_check_topology(struct bch_fs *c,
- struct btree *b,
- struct bkey_buf *prev,
- struct bkey_buf cur,
- bool is_last)
-{
- struct bpos node_start = b->data->min_key;
- struct bpos node_end = b->data->max_key;
- struct bpos expected_start = bkey_deleted(&prev->k->k)
- ? node_start
- : bpos_successor(prev->k->k.p);
- struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF;
- int ret = 0;
-
- if (cur.k->k.type == KEY_TYPE_btree_ptr_v2) {
- struct bkey_i_btree_ptr_v2 *bp = bkey_i_to_btree_ptr_v2(cur.k);
-
- if (!bpos_eq(expected_start, bp->v.min_key)) {
- bch2_topology_error(c);
-
- if (bkey_deleted(&prev->k->k)) {
- prt_printf(&buf1, "start of node: ");
- bch2_bpos_to_text(&buf1, node_start);
- } else {
- bch2_bkey_val_to_text(&buf1, c, bkey_i_to_s_c(prev->k));
- }
- bch2_bkey_val_to_text(&buf2, c, bkey_i_to_s_c(cur.k));
-
- if (__fsck_err(c,
- FSCK_CAN_FIX|
- FSCK_CAN_IGNORE|
- FSCK_NO_RATELIMIT,
- btree_node_topology_bad_min_key,
- "btree node with incorrect min_key at btree %s level %u:\n"
- " prev %s\n"
- " cur %s",
- bch2_btree_id_str(b->c.btree_id), b->c.level,
- buf1.buf, buf2.buf) && should_restart_for_topology_repair(c)) {
- bch_info(c, "Halting mark and sweep to start topology repair pass");
- ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology);
- goto err;
- } else {
- set_bit(BCH_FS_initial_gc_unfixed, &c->flags);
- }
- }
- }
-
- if (is_last && !bpos_eq(cur.k->k.p, node_end)) {
- bch2_topology_error(c);
-
- printbuf_reset(&buf1);
- printbuf_reset(&buf2);
-
- bch2_bkey_val_to_text(&buf1, c, bkey_i_to_s_c(cur.k));
- bch2_bpos_to_text(&buf2, node_end);
-
- if (__fsck_err(c, FSCK_CAN_FIX|FSCK_CAN_IGNORE|FSCK_NO_RATELIMIT,
- btree_node_topology_bad_max_key,
- "btree node with incorrect max_key at btree %s level %u:\n"
- " %s\n"
- " expected %s",
- bch2_btree_id_str(b->c.btree_id), b->c.level,
- buf1.buf, buf2.buf) &&
- should_restart_for_topology_repair(c)) {
- bch_info(c, "Halting mark and sweep to start topology repair pass");
- ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology);
- goto err;
- } else {
- set_bit(BCH_FS_initial_gc_unfixed, &c->flags);
- }
- }
-
- bch2_bkey_buf_copy(prev, c, cur.k);
-err:
-fsck_err:
- printbuf_exit(&buf2);
- printbuf_exit(&buf1);
- return ret;
-}
-
static void btree_ptr_to_v2(struct btree *b, struct bkey_i_btree_ptr_v2 *dst)
{
switch (b->key.k.type) {
@@ -212,6 +131,17 @@ static int set_node_min(struct bch_fs *c, struct btree *b, struct bpos new_min)
struct bkey_i_btree_ptr_v2 *new;
int ret;
+ if (c->opts.verbose) {
+ struct printbuf buf = PRINTBUF;
+
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
+ prt_str(&buf, " -> ");
+ bch2_bpos_to_text(&buf, new_min);
+
+ bch_info(c, "%s(): %s", __func__, buf.buf);
+ printbuf_exit(&buf);
+ }
+
new = kmalloc_array(BKEY_BTREE_PTR_U64s_MAX, sizeof(u64), GFP_KERNEL);
if (!new)
return -BCH_ERR_ENOMEM_gc_repair_key;
@@ -237,6 +167,17 @@ static int set_node_max(struct bch_fs *c, struct btree *b, struct bpos new_max)
struct bkey_i_btree_ptr_v2 *new;
int ret;
+ if (c->opts.verbose) {
+ struct printbuf buf = PRINTBUF;
+
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
+ prt_str(&buf, " -> ");
+ bch2_bpos_to_text(&buf, new_max);
+
+ bch_info(c, "%s(): %s", __func__, buf.buf);
+ printbuf_exit(&buf);
+ }
+
ret = bch2_journal_key_delete(c, b->c.btree_id, b->c.level + 1, b->key.k.p);
if (ret)
return ret;
@@ -268,127 +209,138 @@ static int set_node_max(struct bch_fs *c, struct btree *b, struct bpos new_max)
return 0;
}
-static int btree_repair_node_boundaries(struct bch_fs *c, struct btree *b,
- struct btree *prev, struct btree *cur)
+static int btree_check_node_boundaries(struct bch_fs *c, struct btree *b,
+ struct btree *prev, struct btree *cur,
+ struct bpos *pulled_from_scan)
{
struct bpos expected_start = !prev
? b->data->min_key
: bpos_successor(prev->key.k.p);
- struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF;
+ struct printbuf buf = PRINTBUF;
int ret = 0;
- if (!prev) {
- prt_printf(&buf1, "start of node: ");
- bch2_bpos_to_text(&buf1, b->data->min_key);
- } else {
- bch2_bkey_val_to_text(&buf1, c, bkey_i_to_s_c(&prev->key));
+ BUG_ON(b->key.k.type == KEY_TYPE_btree_ptr_v2 &&
+ !bpos_eq(bkey_i_to_btree_ptr_v2(&b->key)->v.min_key,
+ b->data->min_key));
+
+ if (bpos_eq(expected_start, cur->data->min_key))
+ return 0;
+
+ prt_printf(&buf, " at btree %s level %u:\n parent: ",
+ bch2_btree_id_str(b->c.btree_id), b->c.level);
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
+
+ if (prev) {
+ prt_printf(&buf, "\n prev: ");
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&prev->key));
}
- bch2_bkey_val_to_text(&buf2, c, bkey_i_to_s_c(&cur->key));
-
- if (prev &&
- bpos_gt(expected_start, cur->data->min_key) &&
- BTREE_NODE_SEQ(cur->data) > BTREE_NODE_SEQ(prev->data)) {
- /* cur overwrites prev: */
-
- if (mustfix_fsck_err_on(bpos_ge(prev->data->min_key,
- cur->data->min_key), c,
- btree_node_topology_overwritten_by_next_node,
- "btree node overwritten by next node at btree %s level %u:\n"
- " node %s\n"
- " next %s",
- bch2_btree_id_str(b->c.btree_id), b->c.level,
- buf1.buf, buf2.buf)) {
- ret = DROP_PREV_NODE;
- goto out;
- }
+ prt_str(&buf, "\n next: ");
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&cur->key));
- if (mustfix_fsck_err_on(!bpos_eq(prev->key.k.p,
- bpos_predecessor(cur->data->min_key)), c,
- btree_node_topology_bad_max_key,
- "btree node with incorrect max_key at btree %s level %u:\n"
- " node %s\n"
- " next %s",
- bch2_btree_id_str(b->c.btree_id), b->c.level,
- buf1.buf, buf2.buf))
- ret = set_node_max(c, prev,
- bpos_predecessor(cur->data->min_key));
- } else {
- /* prev overwrites cur: */
-
- if (mustfix_fsck_err_on(bpos_ge(expected_start,
- cur->data->max_key), c,
- btree_node_topology_overwritten_by_prev_node,
- "btree node overwritten by prev node at btree %s level %u:\n"
- " prev %s\n"
- " node %s",
- bch2_btree_id_str(b->c.btree_id), b->c.level,
- buf1.buf, buf2.buf)) {
- ret = DROP_THIS_NODE;
- goto out;
- }
+ if (bpos_lt(expected_start, cur->data->min_key)) { /* gap */
+ if (b->c.level == 1 &&
+ bpos_lt(*pulled_from_scan, cur->data->min_key)) {
+ ret = bch2_get_scanned_nodes(c, b->c.btree_id, 0,
+ expected_start,
+ bpos_predecessor(cur->data->min_key));
+ if (ret)
+ goto err;
- if (mustfix_fsck_err_on(!bpos_eq(expected_start, cur->data->min_key), c,
- btree_node_topology_bad_min_key,
- "btree node with incorrect min_key at btree %s level %u:\n"
- " prev %s\n"
- " node %s",
- bch2_btree_id_str(b->c.btree_id), b->c.level,
- buf1.buf, buf2.buf))
- ret = set_node_min(c, cur, expected_start);
+ *pulled_from_scan = cur->data->min_key;
+ ret = DID_FILL_FROM_SCAN;
+ } else {
+ if (mustfix_fsck_err(c, btree_node_topology_bad_min_key,
+ "btree node with incorrect min_key%s", buf.buf))
+ ret = set_node_min(c, cur, expected_start);
+ }
+ } else { /* overlap */
+ if (prev && BTREE_NODE_SEQ(cur->data) > BTREE_NODE_SEQ(prev->data)) { /* cur overwrites prev */
+ if (bpos_ge(prev->data->min_key, cur->data->min_key)) { /* fully? */
+ if (mustfix_fsck_err(c, btree_node_topology_overwritten_by_next_node,
+ "btree node overwritten by next node%s", buf.buf))
+ ret = DROP_PREV_NODE;
+ } else {
+ if (mustfix_fsck_err(c, btree_node_topology_bad_max_key,
+ "btree node with incorrect max_key%s", buf.buf))
+ ret = set_node_max(c, prev,
+ bpos_predecessor(cur->data->min_key));
+ }
+ } else {
+ if (bpos_ge(expected_start, cur->data->max_key)) { /* fully? */
+ if (mustfix_fsck_err(c, btree_node_topology_overwritten_by_prev_node,
+ "btree node overwritten by prev node%s", buf.buf))
+ ret = DROP_THIS_NODE;
+ } else {
+ if (mustfix_fsck_err(c, btree_node_topology_bad_min_key,
+ "btree node with incorrect min_key%s", buf.buf))
+ ret = set_node_min(c, cur, expected_start);
+ }
+ }
}
-out:
+err:
fsck_err:
- printbuf_exit(&buf2);
- printbuf_exit(&buf1);
+ printbuf_exit(&buf);
return ret;
}
static int btree_repair_node_end(struct bch_fs *c, struct btree *b,
- struct btree *child)
+ struct btree *child, struct bpos *pulled_from_scan)
{
- struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF;
+ struct printbuf buf = PRINTBUF;
int ret = 0;
- bch2_bkey_val_to_text(&buf1, c, bkey_i_to_s_c(&child->key));
- bch2_bpos_to_text(&buf2, b->key.k.p);
+ if (bpos_eq(child->key.k.p, b->key.k.p))
+ return 0;
- if (mustfix_fsck_err_on(!bpos_eq(child->key.k.p, b->key.k.p), c,
- btree_node_topology_bad_max_key,
- "btree node with incorrect max_key at btree %s level %u:\n"
- " %s\n"
- " expected %s",
- bch2_btree_id_str(b->c.btree_id), b->c.level,
- buf1.buf, buf2.buf)) {
- ret = set_node_max(c, child, b->key.k.p);
- if (ret)
- goto err;
+ prt_printf(&buf, "at btree %s level %u:\n parent: ",
+ bch2_btree_id_str(b->c.btree_id), b->c.level);
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
+
+ prt_str(&buf, "\n child: ");
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&child->key));
+
+ if (mustfix_fsck_err(c, btree_node_topology_bad_max_key,
+ "btree node with incorrect max_key%s", buf.buf)) {
+ if (b->c.level == 1 &&
+ bpos_lt(*pulled_from_scan, b->key.k.p)) {
+ ret = bch2_get_scanned_nodes(c, b->c.btree_id, 0,
+ bpos_successor(child->key.k.p), b->key.k.p);
+ if (ret)
+ goto err;
+
+ *pulled_from_scan = b->key.k.p;
+ ret = DID_FILL_FROM_SCAN;
+ } else {
+ ret = set_node_max(c, child, b->key.k.p);
+ }
}
err:
fsck_err:
- printbuf_exit(&buf2);
- printbuf_exit(&buf1);
+ printbuf_exit(&buf);
return ret;
}
-static int bch2_btree_repair_topology_recurse(struct btree_trans *trans, struct btree *b)
+static int bch2_btree_repair_topology_recurse(struct btree_trans *trans, struct btree *b,
+ struct bpos *pulled_from_scan)
{
struct bch_fs *c = trans->c;
struct btree_and_journal_iter iter;
struct bkey_s_c k;
struct bkey_buf prev_k, cur_k;
struct btree *prev = NULL, *cur = NULL;
- bool have_child, dropped_children = false;
+ bool have_child, new_pass = false;
struct printbuf buf = PRINTBUF;
int ret = 0;
if (!b->c.level)
return 0;
-again:
- prev = NULL;
- have_child = dropped_children = false;
+
bch2_bkey_buf_init(&prev_k);
bch2_bkey_buf_init(&cur_k);
+again:
+ cur = prev = NULL;
+ have_child = new_pass = false;
bch2_btree_and_journal_iter_init_node_iter(trans, &iter, b);
iter.prefetch = true;
@@ -415,11 +367,17 @@ again:
b->c.level - 1,
buf.buf)) {
bch2_btree_node_evict(trans, cur_k.k);
+ cur = NULL;
ret = bch2_journal_key_delete(c, b->c.btree_id,
b->c.level, cur_k.k->k.p);
- cur = NULL;
if (ret)
break;
+
+ if (!btree_id_is_alloc(b->c.btree_id)) {
+ ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_scan_for_btree_nodes);
+ if (ret)
+ break;
+ }
continue;
}
@@ -427,7 +385,23 @@ again:
if (ret)
break;
- ret = btree_repair_node_boundaries(c, b, prev, cur);
+ if (bch2_btree_node_is_stale(c, cur)) {
+ bch_info(c, "btree node %s older than nodes found by scanning", buf.buf);
+ six_unlock_read(&cur->c.lock);
+ bch2_btree_node_evict(trans, cur_k.k);
+ ret = bch2_journal_key_delete(c, b->c.btree_id,
+ b->c.level, cur_k.k->k.p);
+ cur = NULL;
+ if (ret)
+ break;
+ continue;
+ }
+
+ ret = btree_check_node_boundaries(c, b, prev, cur, pulled_from_scan);
+ if (ret == DID_FILL_FROM_SCAN) {
+ new_pass = true;
+ ret = 0;
+ }
if (ret == DROP_THIS_NODE) {
six_unlock_read(&cur->c.lock);
@@ -445,6 +419,7 @@ again:
prev = NULL;
if (ret == DROP_PREV_NODE) {
+ bch_info(c, "dropped prev node");
bch2_btree_node_evict(trans, prev_k.k);
ret = bch2_journal_key_delete(c, b->c.btree_id,
b->c.level, prev_k.k->k.p);
@@ -452,8 +427,6 @@ again:
break;
bch2_btree_and_journal_iter_exit(&iter);
- bch2_bkey_buf_exit(&prev_k, c);
- bch2_bkey_buf_exit(&cur_k, c);
goto again;
} else if (ret)
break;
@@ -465,7 +438,11 @@ again:
if (!ret && !IS_ERR_OR_NULL(prev)) {
BUG_ON(cur);
- ret = btree_repair_node_end(c, b, prev);
+ ret = btree_repair_node_end(c, b, prev, pulled_from_scan);
+ if (ret == DID_FILL_FROM_SCAN) {
+ new_pass = true;
+ ret = 0;
+ }
}
if (!IS_ERR_OR_NULL(prev))
@@ -479,6 +456,10 @@ again:
goto err;
bch2_btree_and_journal_iter_exit(&iter);
+
+ if (new_pass)
+ goto again;
+
bch2_btree_and_journal_iter_init_node_iter(trans, &iter, b);
iter.prefetch = true;
@@ -495,7 +476,7 @@ again:
if (ret)
goto err;
- ret = bch2_btree_repair_topology_recurse(trans, cur);
+ ret = bch2_btree_repair_topology_recurse(trans, cur, pulled_from_scan);
six_unlock_read(&cur->c.lock);
cur = NULL;
@@ -503,7 +484,7 @@ again:
bch2_btree_node_evict(trans, cur_k.k);
ret = bch2_journal_key_delete(c, b->c.btree_id,
b->c.level, cur_k.k->k.p);
- dropped_children = true;
+ new_pass = true;
}
if (ret)
@@ -530,12 +511,14 @@ fsck_err:
six_unlock_read(&cur->c.lock);
bch2_btree_and_journal_iter_exit(&iter);
- bch2_bkey_buf_exit(&prev_k, c);
- bch2_bkey_buf_exit(&cur_k, c);
- if (!ret && dropped_children)
+ if (!ret && new_pass)
goto again;
+ BUG_ON(!ret && bch2_btree_node_check_topology(trans, b));
+
+ bch2_bkey_buf_exit(&prev_k, c);
+ bch2_bkey_buf_exit(&cur_k, c);
printbuf_exit(&buf);
return ret;
}
@@ -543,32 +526,63 @@ fsck_err:
int bch2_check_topology(struct bch_fs *c)
{
struct btree_trans *trans = bch2_trans_get(c);
- struct btree *b;
- unsigned i;
+ struct bpos pulled_from_scan = POS_MIN;
int ret = 0;
- for (i = 0; i < btree_id_nr_alive(c) && !ret; i++) {
+ for (unsigned i = 0; i < btree_id_nr_alive(c) && !ret; i++) {
struct btree_root *r = bch2_btree_id_root(c, i);
+ bool reconstructed_root = false;
- if (!r->alive)
- continue;
+ if (r->error) {
+ ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_scan_for_btree_nodes);
+ if (ret)
+ break;
+reconstruct_root:
+ bch_info(c, "btree root %s unreadable, must recover from scan", bch2_btree_id_str(i));
- b = r->b;
- if (btree_node_fake(b))
- continue;
+ r->alive = false;
+ r->error = 0;
+
+ if (!bch2_btree_has_scanned_nodes(c, i)) {
+ mustfix_fsck_err(c, btree_root_unreadable_and_scan_found_nothing,
+ "no nodes found for btree %s, continue?", bch2_btree_id_str(i));
+ bch2_btree_root_alloc_fake(c, i, 0);
+ } else {
+ bch2_btree_root_alloc_fake(c, i, 1);
+ bch2_shoot_down_journal_keys(c, i, 1, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX);
+ ret = bch2_get_scanned_nodes(c, i, 0, POS_MIN, SPOS_MAX);
+ if (ret)
+ break;
+ }
+
+ reconstructed_root = true;
+ }
+
+ struct btree *b = r->b;
btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read);
- ret = bch2_btree_repair_topology_recurse(trans, b);
+ ret = bch2_btree_repair_topology_recurse(trans, b, &pulled_from_scan);
six_unlock_read(&b->c.lock);
if (ret == DROP_THIS_NODE) {
- bch_err(c, "empty btree root - repair unimplemented");
- ret = -BCH_ERR_fsck_repair_unimplemented;
+ bch2_btree_node_hash_remove(&c->btree_cache, b);
+ mutex_lock(&c->btree_cache.lock);
+ list_move(&b->list, &c->btree_cache.freeable);
+ mutex_unlock(&c->btree_cache.lock);
+
+ r->b = NULL;
+
+ if (!reconstructed_root)
+ goto reconstruct_root;
+
+ bch_err(c, "empty btree root %s", bch2_btree_id_str(i));
+ bch2_btree_root_alloc_fake(c, i, 0);
+ r->alive = false;
+ ret = 0;
}
}
-
+fsck_err:
bch2_trans_put(trans);
-
return ret;
}
@@ -591,7 +605,7 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id
bkey_for_each_ptr_decode(k->k, ptrs_c, p, entry_c) {
struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
struct bucket *g = PTR_GC_BUCKET(ca, &p.ptr);
- enum bch_data_type data_type = bch2_bkey_ptr_data_type(*k, &entry_c->ptr);
+ enum bch_data_type data_type = bch2_bkey_ptr_data_type(*k, p, entry_c);
if (fsck_err_on(!g->gen_valid,
c, ptr_to_missing_alloc_key,
@@ -657,7 +671,8 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id
continue;
if (fsck_err_on(bucket_data_type(g->data_type) &&
- bucket_data_type(g->data_type) != data_type, c,
+ bucket_data_type(g->data_type) !=
+ bucket_data_type(data_type), c,
ptr_bucket_data_type_mismatch,
"bucket %u:%zu different types of data in same bucket: %s, %s\n"
"while marking %s",
@@ -698,18 +713,13 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id
}
if (do_update) {
- struct bkey_ptrs ptrs;
- union bch_extent_entry *entry;
- struct bch_extent_ptr *ptr;
- struct bkey_i *new;
-
if (is_root) {
bch_err(c, "cannot update btree roots yet");
ret = -EINVAL;
goto err;
}
- new = kmalloc(bkey_bytes(k->k), GFP_KERNEL);
+ struct bkey_i *new = kmalloc(bkey_bytes(k->k), GFP_KERNEL);
if (!new) {
ret = -BCH_ERR_ENOMEM_gc_repair_key;
bch_err_msg(c, ret, "allocating new key");
@@ -724,7 +734,7 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id
* btree node isn't there anymore, the read path will
* sort it out:
*/
- ptrs = bch2_bkey_ptrs(bkey_i_to_s(new));
+ struct bkey_ptrs ptrs = bch2_bkey_ptrs(bkey_i_to_s(new));
bkey_for_each_ptr(ptrs, ptr) {
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
struct bucket *g = PTR_GC_BUCKET(ca, ptr);
@@ -732,19 +742,26 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id
ptr->gen = g->gen;
}
} else {
- bch2_bkey_drop_ptrs(bkey_i_to_s(new), ptr, ({
- struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
- struct bucket *g = PTR_GC_BUCKET(ca, ptr);
- enum bch_data_type data_type = bch2_bkey_ptr_data_type(*k, ptr);
-
- (ptr->cached &&
- (!g->gen_valid || gen_cmp(ptr->gen, g->gen) > 0)) ||
- (!ptr->cached &&
- gen_cmp(ptr->gen, g->gen) < 0) ||
- gen_cmp(g->gen, ptr->gen) > BUCKET_GC_GEN_MAX ||
- (g->data_type &&
- g->data_type != data_type);
- }));
+ struct bkey_ptrs ptrs;
+ union bch_extent_entry *entry;
+restart_drop_ptrs:
+ ptrs = bch2_bkey_ptrs(bkey_i_to_s(new));
+ bkey_for_each_ptr_decode(bkey_i_to_s(new).k, ptrs, p, entry) {
+ struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
+ struct bucket *g = PTR_GC_BUCKET(ca, &p.ptr);
+ enum bch_data_type data_type = bch2_bkey_ptr_data_type(bkey_i_to_s_c(new), p, entry);
+
+ if ((p.ptr.cached &&
+ (!g->gen_valid || gen_cmp(p.ptr.gen, g->gen) > 0)) ||
+ (!p.ptr.cached &&
+ gen_cmp(p.ptr.gen, g->gen) < 0) ||
+ gen_cmp(g->gen, p.ptr.gen) > BUCKET_GC_GEN_MAX ||
+ (g->data_type &&
+ g->data_type != data_type)) {
+ bch2_bkey_drop_ptr(bkey_i_to_s(new), &entry->ptr);
+ goto restart_drop_ptrs;
+ }
+ }
again:
ptrs = bch2_bkey_ptrs(bkey_i_to_s(new));
bkey_extent_entry_for_each(ptrs, entry) {
@@ -774,12 +791,6 @@ found:
}
}
- ret = bch2_journal_key_insert_take(c, btree_id, level, new);
- if (ret) {
- kfree(new);
- goto err;
- }
-
if (level)
bch2_btree_node_update_key_early(trans, btree_id, level - 1, *k, new);
@@ -793,6 +804,12 @@ found:
bch_info(c, "new key %s", buf.buf);
}
+ ret = bch2_journal_key_insert_take(c, btree_id, level, new);
+ if (ret) {
+ kfree(new);
+ goto err;
+ }
+
*k = bkey_i_to_s_c(new);
}
err:
@@ -811,6 +828,7 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id,
struct bch_fs *c = trans->c;
struct bkey deleted = KEY(0, 0, 0);
struct bkey_s_c old = (struct bkey_s_c) { &deleted, NULL };
+ struct printbuf buf = PRINTBUF;
int ret = 0;
deleted.p = k->k->p;
@@ -819,10 +837,6 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id,
BUG_ON(bch2_journal_seq_verify &&
k->k->version.lo > atomic64_read(&c->journal.seq));
- ret = bch2_check_fix_ptrs(trans, btree_id, level, is_root, k);
- if (ret)
- goto err;
-
if (fsck_err_on(k->k->version.lo > atomic64_read(&c->key_version), c,
bkey_version_in_future,
"key version number higher than recorded: %llu > %llu",
@@ -831,52 +845,57 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id,
atomic64_set(&c->key_version, k->k->version.lo);
}
+ ret = bch2_check_fix_ptrs(trans, btree_id, level, is_root, k);
+ if (ret)
+ goto err;
+
+ if (mustfix_fsck_err_on(level && !bch2_dev_btree_bitmap_marked(c, *k),
+ c, btree_bitmap_not_marked,
+ "btree ptr not marked in member info btree allocated bitmap\n %s",
+ (bch2_bkey_val_to_text(&buf, c, *k),
+ buf.buf))) {
+ mutex_lock(&c->sb_lock);
+ bch2_dev_btree_bitmap_mark(c, *k);
+ bch2_write_super(c);
+ mutex_unlock(&c->sb_lock);
+ }
+
ret = commit_do(trans, NULL, NULL, 0,
- bch2_key_trigger(trans, btree_id, level, old, unsafe_bkey_s_c_to_s(*k), BTREE_TRIGGER_GC));
+ bch2_key_trigger(trans, btree_id, level, old,
+ unsafe_bkey_s_c_to_s(*k), BTREE_TRIGGER_GC));
fsck_err:
err:
+ printbuf_exit(&buf);
bch_err_fn(c, ret);
return ret;
}
static int btree_gc_mark_node(struct btree_trans *trans, struct btree *b, bool initial)
{
- struct bch_fs *c = trans->c;
struct btree_node_iter iter;
struct bkey unpacked;
struct bkey_s_c k;
- struct bkey_buf prev, cur;
int ret = 0;
+ ret = bch2_btree_node_check_topology(trans, b);
+ if (ret)
+ return ret;
+
if (!btree_node_type_needs_gc(btree_node_type(b)))
return 0;
bch2_btree_node_iter_init_from_start(&iter, b);
- bch2_bkey_buf_init(&prev);
- bch2_bkey_buf_init(&cur);
- bkey_init(&prev.k->k);
while ((k = bch2_btree_node_iter_peek_unpack(&iter, b, &unpacked)).k) {
ret = bch2_gc_mark_key(trans, b->c.btree_id, b->c.level, false,
&k, initial);
if (ret)
- break;
+ return ret;
bch2_btree_node_iter_advance(&iter, b);
-
- if (b->c.level) {
- bch2_bkey_buf_reassemble(&cur, c, k);
-
- ret = bch2_gc_check_topology(c, b, &prev, cur,
- bch2_btree_node_iter_end(&iter));
- if (ret)
- break;
- }
}
- bch2_bkey_buf_exit(&cur, c);
- bch2_bkey_buf_exit(&prev, c);
- return ret;
+ return 0;
}
static int bch2_gc_btree(struct btree_trans *trans, enum btree_id btree_id,
@@ -925,14 +944,16 @@ static int bch2_gc_btree_init_recurse(struct btree_trans *trans, struct btree *b
struct bch_fs *c = trans->c;
struct btree_and_journal_iter iter;
struct bkey_s_c k;
- struct bkey_buf cur, prev;
+ struct bkey_buf cur;
struct printbuf buf = PRINTBUF;
int ret = 0;
+ ret = bch2_btree_node_check_topology(trans, b);
+ if (ret)
+ return ret;
+
bch2_btree_and_journal_iter_init_node_iter(trans, &iter, b);
- bch2_bkey_buf_init(&prev);
bch2_bkey_buf_init(&cur);
- bkey_init(&prev.k->k);
while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
BUG_ON(bpos_lt(k.k->p, b->data->min_key));
@@ -943,20 +964,7 @@ static int bch2_gc_btree_init_recurse(struct btree_trans *trans, struct btree *b
if (ret)
goto fsck_err;
- if (b->c.level) {
- bch2_bkey_buf_reassemble(&cur, c, k);
- k = bkey_i_to_s_c(cur.k);
-
- bch2_btree_and_journal_iter_advance(&iter);
-
- ret = bch2_gc_check_topology(c, b,
- &prev, cur,
- !bch2_btree_and_journal_iter_peek(&iter).k);
- if (ret)
- goto fsck_err;
- } else {
- bch2_btree_and_journal_iter_advance(&iter);
- }
+ bch2_btree_and_journal_iter_advance(&iter);
}
if (b->c.level > target_depth) {
@@ -1015,7 +1023,6 @@ static int bch2_gc_btree_init_recurse(struct btree_trans *trans, struct btree *b
}
fsck_err:
bch2_bkey_buf_exit(&cur, c);
- bch2_bkey_buf_exit(&prev, c);
bch2_btree_and_journal_iter_exit(&iter);
printbuf_exit(&buf);
return ret;
@@ -1033,9 +1040,6 @@ static int bch2_gc_btree_init(struct btree_trans *trans,
b = bch2_btree_id_root(c, btree_id)->b;
- if (btree_node_fake(b))
- return 0;
-
six_lock_read(&b->c.lock, NULL, NULL);
printbuf_reset(&buf);
bch2_bpos_to_text(&buf, b->data->min_key);
diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c
index 34df8ccc5fecc2..9678b2375bedde 100644
--- a/fs/bcachefs/btree_io.c
+++ b/fs/bcachefs/btree_io.c
@@ -654,6 +654,7 @@ void bch2_btree_node_drop_keys_outside_node(struct btree *b)
*/
bch2_bset_set_no_aux_tree(b, b->set);
bch2_btree_build_aux_trees(b);
+ b->nr = bch2_btree_node_count_keys(b);
struct bkey_s_c k;
struct bkey unpacked;
@@ -830,7 +831,7 @@ static int bset_key_invalid(struct bch_fs *c, struct btree *b,
(rw == WRITE ? bch2_bkey_val_invalid(c, k, READ, err) : 0);
}
-static bool __bkey_valid(struct bch_fs *c, struct btree *b,
+static bool bkey_packed_valid(struct bch_fs *c, struct btree *b,
struct bset *i, struct bkey_packed *k)
{
if (bkey_p_next(k) > vstruct_last(i))
@@ -839,7 +840,7 @@ static bool __bkey_valid(struct bch_fs *c, struct btree *b,
if (k->format > KEY_FORMAT_CURRENT)
return false;
- if (k->u64s < bkeyp_key_u64s(&b->format, k))
+ if (!bkeyp_u64s_valid(&b->format, k))
return false;
struct printbuf buf = PRINTBUF;
@@ -883,11 +884,13 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b,
"invalid bkey format %u", k->format))
goto drop_this_key;
- if (btree_err_on(k->u64s < bkeyp_key_u64s(&b->format, k),
+ if (btree_err_on(!bkeyp_u64s_valid(&b->format, k),
-BCH_ERR_btree_node_read_err_fixable,
c, NULL, b, i,
btree_node_bkey_bad_u64s,
- "k->u64s too small (%u < %u)", k->u64s, bkeyp_key_u64s(&b->format, k)))
+ "bad k->u64s %u (min %u max %lu)", k->u64s,
+ bkeyp_key_u64s(&b->format, k),
+ U8_MAX - BKEY_U64s + bkeyp_key_u64s(&b->format, k)))
goto drop_this_key;
if (!write)
@@ -946,13 +949,12 @@ drop_this_key:
* do
*/
- if (!__bkey_valid(c, b, i, (void *) ((u64 *) k + next_good_key))) {
+ if (!bkey_packed_valid(c, b, i, (void *) ((u64 *) k + next_good_key))) {
for (next_good_key = 1;
next_good_key < (u64 *) vstruct_last(i) - (u64 *) k;
next_good_key++)
- if (__bkey_valid(c, b, i, (void *) ((u64 *) k + next_good_key)))
+ if (bkey_packed_valid(c, b, i, (void *) ((u64 *) k + next_good_key)))
goto got_good_key;
-
}
/*
@@ -1263,10 +1265,12 @@ out:
return retry_read;
fsck_err:
if (ret == -BCH_ERR_btree_node_read_err_want_retry ||
- ret == -BCH_ERR_btree_node_read_err_must_retry)
+ ret == -BCH_ERR_btree_node_read_err_must_retry) {
retry_read = 1;
- else
+ } else {
set_btree_node_read_error(b);
+ bch2_btree_lost_data(c, b->c.btree_id);
+ }
goto out;
}
@@ -1327,6 +1331,7 @@ start:
if (!can_retry) {
set_btree_node_read_error(b);
+ bch2_btree_lost_data(c, b->c.btree_id);
break;
}
}
@@ -1335,7 +1340,9 @@ start:
rb->start_time);
bio_put(&rb->bio);
- if (saw_error && !btree_node_read_error(b)) {
+ if (saw_error &&
+ !btree_node_read_error(b) &&
+ c->curr_recovery_pass != BCH_RECOVERY_PASS_scan_for_btree_nodes) {
printbuf_reset(&buf);
bch2_bpos_to_text(&buf, b->key.k.p);
bch_err_ratelimited(c, "%s: rewriting btree node at btree=%s level=%u %s due to error",
@@ -1526,9 +1533,10 @@ fsck_err:
ret = -1;
}
- if (ret)
+ if (ret) {
set_btree_node_read_error(b);
- else if (*saw_error)
+ bch2_btree_lost_data(c, b->c.btree_id);
+ } else if (*saw_error)
bch2_btree_node_rewrite_async(c, b);
for (i = 0; i < ra->nr; i++) {
@@ -1657,13 +1665,14 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b,
prt_str(&buf, "btree node read error: no device to read from\n at ");
bch2_btree_pos_to_text(&buf, c, b);
- bch_err(c, "%s", buf.buf);
+ bch_err_ratelimited(c, "%s", buf.buf);
if (c->recovery_passes_explicit & BIT_ULL(BCH_RECOVERY_PASS_check_topology) &&
c->curr_recovery_pass > BCH_RECOVERY_PASS_check_topology)
bch2_fatal_error(c);
set_btree_node_read_error(b);
+ bch2_btree_lost_data(c, b->c.btree_id);
clear_btree_node_read_in_flight(b);
wake_up_bit(&b->flags, BTREE_NODE_read_in_flight);
printbuf_exit(&buf);
@@ -1860,7 +1869,7 @@ static void btree_node_write_work(struct work_struct *work)
} else {
ret = bch2_trans_do(c, NULL, NULL, 0,
bch2_btree_node_update_key_get_iter(trans, b, &wbio->key,
- BCH_WATERMARK_reclaim|
+ BCH_WATERMARK_interior_updates|
BCH_TRANS_COMMIT_journal_reclaim|
BCH_TRANS_COMMIT_no_enospc|
BCH_TRANS_COMMIT_no_check_rw,
diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c
index 51bcdc6c6d1cda..2a211a4bebd153 100644
--- a/fs/bcachefs/btree_iter.c
+++ b/fs/bcachefs/btree_iter.c
@@ -927,8 +927,22 @@ static __always_inline int btree_path_down(struct btree_trans *trans,
if (ret)
goto err;
} else {
- bch2_bkey_buf_unpack(&tmp, c, l->b,
- bch2_btree_node_iter_peek(&l->iter, l->b));
+ struct bkey_packed *k = bch2_btree_node_iter_peek(&l->iter, l->b);
+ if (!k) {
+ struct printbuf buf = PRINTBUF;
+
+ prt_str(&buf, "node not found at pos ");
+ bch2_bpos_to_text(&buf, path->pos);
+ prt_str(&buf, " within parent node ");
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&l->b->key));
+
+ bch2_fs_fatal_error(c, "%s", buf.buf);
+ printbuf_exit(&buf);
+ ret = -BCH_ERR_btree_need_topology_repair;
+ goto err;
+ }
+
+ bch2_bkey_buf_unpack(&tmp, c, l->b, k);
if ((flags & BTREE_ITER_PREFETCH) &&
c->opts.btree_node_prefetch) {
@@ -962,7 +976,6 @@ err:
return ret;
}
-
static int bch2_btree_path_traverse_all(struct btree_trans *trans)
{
struct bch_fs *c = trans->c;
@@ -2790,6 +2803,31 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
struct btree_transaction_stats *s = btree_trans_stats(trans);
s->max_mem = max(s->max_mem, new_bytes);
+ if (trans->used_mempool) {
+ if (trans->mem_bytes >= new_bytes)
+ goto out_change_top;
+
+ /* No more space from mempool item, need malloc new one */
+ new_mem = kmalloc(new_bytes, GFP_NOWAIT|__GFP_NOWARN);
+ if (unlikely(!new_mem)) {
+ bch2_trans_unlock(trans);
+
+ new_mem = kmalloc(new_bytes, GFP_KERNEL);
+ if (!new_mem)
+ return ERR_PTR(-BCH_ERR_ENOMEM_trans_kmalloc);
+
+ ret = bch2_trans_relock(trans);
+ if (ret) {
+ kfree(new_mem);
+ return ERR_PTR(ret);
+ }
+ }
+ memcpy(new_mem, trans->mem, trans->mem_top);
+ trans->used_mempool = false;
+ mempool_free(trans->mem, &c->btree_trans_mem_pool);
+ goto out_new_mem;
+ }
+
new_mem = krealloc(trans->mem, new_bytes, GFP_NOWAIT|__GFP_NOWARN);
if (unlikely(!new_mem)) {
bch2_trans_unlock(trans);
@@ -2798,6 +2836,8 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
if (!new_mem && new_bytes <= BTREE_TRANS_MEM_MAX) {
new_mem = mempool_alloc(&c->btree_trans_mem_pool, GFP_KERNEL);
new_bytes = BTREE_TRANS_MEM_MAX;
+ memcpy(new_mem, trans->mem, trans->mem_top);
+ trans->used_mempool = true;
kfree(trans->mem);
}
@@ -2811,7 +2851,7 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
if (ret)
return ERR_PTR(ret);
}
-
+out_new_mem:
trans->mem = new_mem;
trans->mem_bytes = new_bytes;
@@ -2819,7 +2859,7 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
trace_and_count(c, trans_restart_mem_realloced, trans, _RET_IP_, new_bytes);
return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_mem_realloced));
}
-
+out_change_top:
p = trans->mem + trans->mem_top;
trans->mem_top += size;
memset(p, 0, size);
@@ -3093,7 +3133,7 @@ void bch2_trans_put(struct btree_trans *trans)
if (paths_allocated != trans->_paths_allocated)
kvfree_rcu_mightsleep(paths_allocated);
- if (trans->mem_bytes == BTREE_TRANS_MEM_MAX)
+ if (trans->used_mempool)
mempool_free(trans->mem, &c->btree_trans_mem_pool);
else
kfree(trans->mem);
diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h
index 24772538e4cc74..1c70836dd7cce4 100644
--- a/fs/bcachefs/btree_iter.h
+++ b/fs/bcachefs/btree_iter.h
@@ -498,8 +498,13 @@ static inline void set_btree_iter_dontneed(struct btree_iter *iter)
{
struct btree_trans *trans = iter->trans;
- if (!trans->restarted)
- btree_iter_path(trans, iter)->preserve = false;
+ if (!iter->path || trans->restarted)
+ return;
+
+ struct btree_path *path = btree_iter_path(trans, iter);
+ path->preserve = false;
+ if (path->ref == 1)
+ path->should_be_locked = false;
}
void *__bch2_trans_kmalloc(struct btree_trans *, size_t);
@@ -642,7 +647,7 @@ int __bch2_btree_trans_too_many_iters(struct btree_trans *);
static inline int btree_trans_too_many_iters(struct btree_trans *trans)
{
- if (bitmap_weight(trans->paths_allocated, trans->nr_paths) > BTREE_ITER_INITIAL - 8)
+ if (bitmap_weight(trans->paths_allocated, trans->nr_paths) > BTREE_ITER_NORMAL_LIMIT - 8)
return __bch2_btree_trans_too_many_iters(trans);
return 0;
diff --git a/fs/bcachefs/btree_journal_iter.c b/fs/bcachefs/btree_journal_iter.c
index 50e04356d72c8e..1e8cf49a693531 100644
--- a/fs/bcachefs/btree_journal_iter.c
+++ b/fs/bcachefs/btree_journal_iter.c
@@ -130,12 +130,30 @@ struct bkey_i *bch2_journal_keys_peek_slot(struct bch_fs *c, enum btree_id btree
return bch2_journal_keys_peek_upto(c, btree_id, level, pos, pos, &idx);
}
+static void journal_iter_verify(struct journal_iter *iter)
+{
+ struct journal_keys *keys = iter->keys;
+ size_t gap_size = keys->size - keys->nr;
+
+ BUG_ON(iter->idx >= keys->gap &&
+ iter->idx < keys->gap + gap_size);
+
+ if (iter->idx < keys->size) {
+ struct journal_key *k = keys->data + iter->idx;
+
+ int cmp = cmp_int(k->btree_id, iter->btree_id) ?:
+ cmp_int(k->level, iter->level);
+ BUG_ON(cmp < 0);
+ }
+}
+
static void journal_iters_fix(struct bch_fs *c)
{
struct journal_keys *keys = &c->journal_keys;
/* The key we just inserted is immediately before the gap: */
size_t gap_end = keys->gap + (keys->size - keys->nr);
- struct btree_and_journal_iter *iter;
+ struct journal_key *new_key = &keys->data[keys->gap - 1];
+ struct journal_iter *iter;
/*
* If an iterator points one after the key we just inserted, decrement
@@ -143,9 +161,14 @@ static void journal_iters_fix(struct bch_fs *c)
* decrement was unnecessary, bch2_btree_and_journal_iter_peek() will
* handle that:
*/
- list_for_each_entry(iter, &c->journal_iters, journal.list)
- if (iter->journal.idx == gap_end)
- iter->journal.idx = keys->gap - 1;
+ list_for_each_entry(iter, &c->journal_iters, list) {
+ journal_iter_verify(iter);
+ if (iter->idx == gap_end &&
+ new_key->btree_id == iter->btree_id &&
+ new_key->level == iter->level)
+ iter->idx = keys->gap - 1;
+ journal_iter_verify(iter);
+ }
}
static void journal_iters_move_gap(struct bch_fs *c, size_t old_gap, size_t new_gap)
@@ -192,7 +215,12 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id,
if (idx > keys->gap)
idx -= keys->size - keys->nr;
+ size_t old_gap = keys->gap;
+
if (keys->nr == keys->size) {
+ journal_iters_move_gap(c, old_gap, keys->size);
+ old_gap = keys->size;
+
struct journal_keys new_keys = {
.nr = keys->nr,
.size = max_t(size_t, keys->size, 8) * 2,
@@ -216,7 +244,7 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id,
keys->gap = keys->nr;
}
- journal_iters_move_gap(c, keys->gap, idx);
+ journal_iters_move_gap(c, old_gap, idx);
move_gap(keys, idx);
@@ -261,6 +289,22 @@ int bch2_journal_key_delete(struct bch_fs *c, enum btree_id id,
return bch2_journal_key_insert(c, id, level, &whiteout);
}
+bool bch2_key_deleted_in_journal(struct btree_trans *trans, enum btree_id btree,
+ unsigned level, struct bpos pos)
+{
+ struct journal_keys *keys = &trans->c->journal_keys;
+ size_t idx = bch2_journal_key_search(keys, btree, level, pos);
+
+ if (!trans->journal_replay_not_finished)
+ return false;
+
+ return (idx < keys->size &&
+ keys->data[idx].btree_id == btree &&
+ keys->data[idx].level == level &&
+ bpos_eq(keys->data[idx].k->k.p, pos) &&
+ bkey_deleted(&keys->data[idx].k->k));
+}
+
void bch2_journal_key_overwritten(struct bch_fs *c, enum btree_id btree,
unsigned level, struct bpos pos)
{
@@ -285,16 +329,21 @@ static void bch2_journal_iter_advance(struct journal_iter *iter)
static struct bkey_s_c bch2_journal_iter_peek(struct journal_iter *iter)
{
- struct journal_key *k = iter->keys->data + iter->idx;
+ journal_iter_verify(iter);
+
+ while (iter->idx < iter->keys->size) {
+ struct journal_key *k = iter->keys->data + iter->idx;
+
+ int cmp = cmp_int(k->btree_id, iter->btree_id) ?:
+ cmp_int(k->level, iter->level);
+ if (cmp > 0)
+ break;
+ BUG_ON(cmp);
- while (k < iter->keys->data + iter->keys->size &&
- k->btree_id == iter->btree_id &&
- k->level == iter->level) {
if (!k->overwritten)
return bkey_i_to_s_c(k->k);
bch2_journal_iter_advance(iter);
- k = iter->keys->data + iter->idx;
}
return bkey_s_c_null;
@@ -314,6 +363,8 @@ static void bch2_journal_iter_init(struct bch_fs *c,
iter->level = level;
iter->keys = &c->journal_keys;
iter->idx = bch2_journal_key_search(&c->journal_keys, id, level, pos);
+
+ journal_iter_verify(iter);
}
static struct bkey_s_c bch2_journal_iter_peek_btree(struct btree_and_journal_iter *iter)
@@ -363,7 +414,7 @@ static void btree_and_journal_iter_prefetch(struct btree_and_journal_iter *_iter
struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *iter)
{
- struct bkey_s_c btree_k, journal_k, ret;
+ struct bkey_s_c btree_k, journal_k = bkey_s_c_null, ret;
if (iter->prefetch && iter->journal.level)
btree_and_journal_iter_prefetch(iter);
@@ -375,9 +426,10 @@ again:
bpos_lt(btree_k.k->p, iter->pos))
bch2_journal_iter_advance_btree(iter);
- while ((journal_k = bch2_journal_iter_peek(&iter->journal)).k &&
- bpos_lt(journal_k.k->p, iter->pos))
- bch2_journal_iter_advance(&iter->journal);
+ if (iter->trans->journal_replay_not_finished)
+ while ((journal_k = bch2_journal_iter_peek(&iter->journal)).k &&
+ bpos_lt(journal_k.k->p, iter->pos))
+ bch2_journal_iter_advance(&iter->journal);
ret = journal_k.k &&
(!btree_k.k || bpos_le(journal_k.k->p, btree_k.k->p))
@@ -417,10 +469,15 @@ void __bch2_btree_and_journal_iter_init_node_iter(struct btree_trans *trans,
iter->trans = trans;
iter->b = b;
iter->node_iter = node_iter;
- bch2_journal_iter_init(trans->c, &iter->journal, b->c.btree_id, b->c.level, pos);
- INIT_LIST_HEAD(&iter->journal.list);
iter->pos = b->data->min_key;
iter->at_end = false;
+ INIT_LIST_HEAD(&iter->journal.list);
+
+ if (trans->journal_replay_not_finished) {
+ bch2_journal_iter_init(trans->c, &iter->journal, b->c.btree_id, b->c.level, pos);
+ if (!test_bit(BCH_FS_may_go_rw, &trans->c->flags))
+ list_add(&iter->journal.list, &trans->c->journal_iters);
+ }
}
/*
@@ -435,7 +492,6 @@ void bch2_btree_and_journal_iter_init_node_iter(struct btree_trans *trans,
bch2_btree_node_iter_init_from_start(&node_iter, b);
__bch2_btree_and_journal_iter_init_node_iter(trans, iter, b, node_iter, b->data->min_key);
- list_add(&iter->journal.list, &trans->c->journal_iters);
}
/* sort and dedup all keys in the journal: */
@@ -548,3 +604,22 @@ int bch2_journal_keys_sort(struct bch_fs *c)
bch_verbose(c, "Journal keys: %zu read, %zu after sorting and compacting", nr_read, keys->nr);
return 0;
}
+
+void bch2_shoot_down_journal_keys(struct bch_fs *c, enum btree_id btree,
+ unsigned level_min, unsigned level_max,
+ struct bpos start, struct bpos end)
+{
+ struct journal_keys *keys = &c->journal_keys;
+ size_t dst = 0;
+
+ move_gap(keys, keys->nr);
+
+ darray_for_each(*keys, i)
+ if (!(i->btree_id == btree &&
+ i->level >= level_min &&
+ i->level <= level_max &&
+ bpos_ge(i->k->k.p, start) &&
+ bpos_le(i->k->k.p, end)))
+ keys->data[dst++] = *i;
+ keys->nr = keys->gap = dst;
+}
diff --git a/fs/bcachefs/btree_journal_iter.h b/fs/bcachefs/btree_journal_iter.h
index c9d19da3ea0480..af25046ebcaa76 100644
--- a/fs/bcachefs/btree_journal_iter.h
+++ b/fs/bcachefs/btree_journal_iter.h
@@ -40,8 +40,8 @@ int bch2_journal_key_insert(struct bch_fs *, enum btree_id,
unsigned, struct bkey_i *);
int bch2_journal_key_delete(struct bch_fs *, enum btree_id,
unsigned, struct bpos);
-void bch2_journal_key_overwritten(struct bch_fs *, enum btree_id,
- unsigned, struct bpos);
+bool bch2_key_deleted_in_journal(struct btree_trans *, enum btree_id, unsigned, struct bpos);
+void bch2_journal_key_overwritten(struct bch_fs *, enum btree_id, unsigned, struct bpos);
void bch2_btree_and_journal_iter_advance(struct btree_and_journal_iter *);
struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *);
@@ -66,4 +66,8 @@ void bch2_journal_entries_free(struct bch_fs *);
int bch2_journal_keys_sort(struct bch_fs *);
+void bch2_shoot_down_journal_keys(struct bch_fs *, enum btree_id,
+ unsigned, unsigned,
+ struct bpos, struct bpos);
+
#endif /* _BCACHEFS_BTREE_JOURNAL_ITER_H */
diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c
index 581edcb0911bfa..88a3582a32757e 100644
--- a/fs/bcachefs/btree_key_cache.c
+++ b/fs/bcachefs/btree_key_cache.c
@@ -169,6 +169,7 @@ static void bkey_cached_move_to_freelist(struct btree_key_cache *bc,
} else {
mutex_lock(&bc->lock);
list_move_tail(&ck->list, &bc->freed_pcpu);
+ bc->nr_freed_pcpu++;
mutex_unlock(&bc->lock);
}
}
@@ -245,6 +246,7 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path,
if (!list_empty(&bc->freed_pcpu)) {
ck = list_last_entry(&bc->freed_pcpu, struct bkey_cached, list);
list_del_init(&ck->list);
+ bc->nr_freed_pcpu--;
}
mutex_unlock(&bc->lock);
}
@@ -659,7 +661,7 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans,
commit_flags |= BCH_WATERMARK_reclaim;
if (ck->journal.seq != journal_last_seq(j) ||
- j->watermark == BCH_WATERMARK_stripe)
+ !test_bit(JOURNAL_SPACE_LOW, &c->journal.flags))
commit_flags |= BCH_TRANS_COMMIT_no_journal_res;
ret = bch2_btree_iter_traverse(&b_iter) ?:
diff --git a/fs/bcachefs/btree_locking.c b/fs/bcachefs/btree_locking.c
index b9b151e693ed60..f2caf491957efc 100644
--- a/fs/bcachefs/btree_locking.c
+++ b/fs/bcachefs/btree_locking.c
@@ -440,33 +440,7 @@ void bch2_btree_node_lock_write_nofail(struct btree_trans *trans,
struct btree_path *path,
struct btree_bkey_cached_common *b)
{
- struct btree_path *linked;
- unsigned i, iter;
- int ret;
-
- /*
- * XXX BIG FAT NOTICE
- *
- * Drop all read locks before taking a write lock:
- *
- * This is a hack, because bch2_btree_node_lock_write_nofail() is a
- * hack - but by dropping read locks first, this should never fail, and
- * we only use this in code paths where whatever read locks we've
- * already taken are no longer needed:
- */
-
- trans_for_each_path(trans, linked, iter) {
- if (!linked->nodes_locked)
- continue;
-
- for (i = 0; i < BTREE_MAX_DEPTH; i++)
- if (btree_node_read_locked(linked, i)) {
- btree_node_unlock(trans, linked, i);
- btree_path_set_dirty(linked, BTREE_ITER_NEED_RELOCK);
- }
- }
-
- ret = __btree_node_lock_write(trans, path, b, true);
+ int ret = __btree_node_lock_write(trans, path, b, true);
BUG_ON(ret);
}
diff --git a/fs/bcachefs/btree_node_scan.c b/fs/bcachefs/btree_node_scan.c
new file mode 100644
index 00000000000000..866bd278439f8b
--- /dev/null
+++ b/fs/bcachefs/btree_node_scan.c
@@ -0,0 +1,519 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "btree_cache.h"
+#include "btree_io.h"
+#include "btree_journal_iter.h"
+#include "btree_node_scan.h"
+#include "btree_update_interior.h"
+#include "buckets.h"
+#include "error.h"
+#include "journal_io.h"
+#include "recovery_passes.h"
+
+#include <linux/kthread.h>
+#include <linux/sort.h>
+
+struct find_btree_nodes_worker {
+ struct closure *cl;
+ struct find_btree_nodes *f;
+ struct bch_dev *ca;
+};
+
+static void found_btree_node_to_text(struct printbuf *out, struct bch_fs *c, const struct found_btree_node *n)
+{
+ prt_printf(out, "%s l=%u seq=%u cookie=%llx ", bch2_btree_id_str(n->btree_id), n->level, n->seq, n->cookie);
+ bch2_bpos_to_text(out, n->min_key);
+ prt_str(out, "-");
+ bch2_bpos_to_text(out, n->max_key);
+
+ if (n->range_updated)
+ prt_str(out, " range updated");
+ if (n->overwritten)
+ prt_str(out, " overwritten");
+
+ for (unsigned i = 0; i < n->nr_ptrs; i++) {
+ prt_char(out, ' ');
+ bch2_extent_ptr_to_text(out, c, n->ptrs + i);
+ }
+}
+
+static void found_btree_nodes_to_text(struct printbuf *out, struct bch_fs *c, found_btree_nodes nodes)
+{
+ printbuf_indent_add(out, 2);
+ darray_for_each(nodes, i) {
+ found_btree_node_to_text(out, c, i);
+ prt_newline(out);
+ }
+ printbuf_indent_sub(out, 2);
+}
+
+static void found_btree_node_to_key(struct bkey_i *k, const struct found_btree_node *f)
+{
+ struct bkey_i_btree_ptr_v2 *bp = bkey_btree_ptr_v2_init(k);
+
+ set_bkey_val_u64s(&bp->k, sizeof(struct bch_btree_ptr_v2) / sizeof(u64) + f->nr_ptrs);
+ bp->k.p = f->max_key;
+ bp->v.seq = cpu_to_le64(f->cookie);
+ bp->v.sectors_written = 0;
+ bp->v.flags = 0;
+ bp->v.min_key = f->min_key;
+ SET_BTREE_PTR_RANGE_UPDATED(&bp->v, f->range_updated);
+ memcpy(bp->v.start, f->ptrs, sizeof(struct bch_extent_ptr) * f->nr_ptrs);
+}
+
+static bool found_btree_node_is_readable(struct btree_trans *trans,
+ const struct found_btree_node *f)
+{
+ struct { __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX); } k;
+
+ found_btree_node_to_key(&k.k, f);
+
+ struct btree *b = bch2_btree_node_get_noiter(trans, &k.k, f->btree_id, f->level, false);
+ bool ret = !IS_ERR_OR_NULL(b);
+ if (ret)
+ six_unlock_read(&b->c.lock);
+
+ /*
+ * We might update this node's range; if that happens, we need the node
+ * to be re-read so the read path can trim keys that are no longer in
+ * this node
+ */
+ if (b != btree_node_root(trans->c, b))
+ bch2_btree_node_evict(trans, &k.k);
+ return ret;
+}
+
+static int found_btree_node_cmp_cookie(const void *_l, const void *_r)
+{
+ const struct found_btree_node *l = _l;
+ const struct found_btree_node *r = _r;
+
+ return cmp_int(l->btree_id, r->btree_id) ?:
+ cmp_int(l->level, r->level) ?:
+ cmp_int(l->cookie, r->cookie);
+}
+
+/*
+ * Given two found btree nodes, if their sequence numbers are equal, take the
+ * one that's readable:
+ */
+static int found_btree_node_cmp_time(const struct found_btree_node *l,
+ const struct found_btree_node *r)
+{
+ return cmp_int(l->seq, r->seq);
+}
+
+static int found_btree_node_cmp_pos(const void *_l, const void *_r)
+{
+ const struct found_btree_node *l = _l;
+ const struct found_btree_node *r = _r;
+
+ return cmp_int(l->btree_id, r->btree_id) ?:
+ -cmp_int(l->level, r->level) ?:
+ bpos_cmp(l->min_key, r->min_key) ?:
+ -found_btree_node_cmp_time(l, r);
+}
+
+static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca,
+ struct bio *bio, struct btree_node *bn, u64 offset)
+{
+ struct bch_fs *c = container_of(f, struct bch_fs, found_btree_nodes);
+
+ bio_reset(bio, ca->disk_sb.bdev, REQ_OP_READ);
+ bio->bi_iter.bi_sector = offset;
+ bch2_bio_map(bio, bn, PAGE_SIZE);
+
+ submit_bio_wait(bio);
+ if (bch2_dev_io_err_on(bio->bi_status, ca, BCH_MEMBER_ERROR_read,
+ "IO error in try_read_btree_node() at %llu: %s",
+ offset, bch2_blk_status_to_str(bio->bi_status)))
+ return;
+
+ if (le64_to_cpu(bn->magic) != bset_magic(c))
+ return;
+
+ if (bch2_csum_type_is_encryption(BSET_CSUM_TYPE(&bn->keys))) {
+ struct nonce nonce = btree_nonce(&bn->keys, 0);
+ unsigned bytes = (void *) &bn->keys - (void *) &bn->flags;
+
+ bch2_encrypt(c, BSET_CSUM_TYPE(&bn->keys), nonce, &bn->flags, bytes);
+ }
+
+ if (btree_id_is_alloc(BTREE_NODE_ID(bn)))
+ return;
+
+ if (BTREE_NODE_LEVEL(bn) >= BTREE_MAX_DEPTH)
+ return;
+
+ rcu_read_lock();
+ struct found_btree_node n = {
+ .btree_id = BTREE_NODE_ID(bn),
+ .level = BTREE_NODE_LEVEL(bn),
+ .seq = BTREE_NODE_SEQ(bn),
+ .cookie = le64_to_cpu(bn->keys.seq),
+ .min_key = bn->min_key,
+ .max_key = bn->max_key,
+ .nr_ptrs = 1,
+ .ptrs[0].type = 1 << BCH_EXTENT_ENTRY_ptr,
+ .ptrs[0].offset = offset,
+ .ptrs[0].dev = ca->dev_idx,
+ .ptrs[0].gen = *bucket_gen(ca, sector_to_bucket(ca, offset)),
+ };
+ rcu_read_unlock();
+
+ if (bch2_trans_run(c, found_btree_node_is_readable(trans, &n))) {
+ mutex_lock(&f->lock);
+ if (BSET_BIG_ENDIAN(&bn->keys) != CPU_BIG_ENDIAN) {
+ bch_err(c, "try_read_btree_node() can't handle endian conversion");
+ f->ret = -EINVAL;
+ goto unlock;
+ }
+
+ if (darray_push(&f->nodes, n))
+ f->ret = -ENOMEM;
+unlock:
+ mutex_unlock(&f->lock);
+ }
+}
+
+static int read_btree_nodes_worker(void *p)
+{
+ struct find_btree_nodes_worker *w = p;
+ struct bch_fs *c = container_of(w->f, struct bch_fs, found_btree_nodes);
+ struct bch_dev *ca = w->ca;
+ void *buf = (void *) __get_free_page(GFP_KERNEL);
+ struct bio *bio = bio_alloc(NULL, 1, 0, GFP_KERNEL);
+ unsigned long last_print = jiffies;
+
+ if (!buf || !bio) {
+ bch_err(c, "read_btree_nodes_worker: error allocating bio/buf");
+ w->f->ret = -ENOMEM;
+ goto err;
+ }
+
+ for (u64 bucket = ca->mi.first_bucket; bucket < ca->mi.nbuckets; bucket++)
+ for (unsigned bucket_offset = 0;
+ bucket_offset + btree_sectors(c) <= ca->mi.bucket_size;
+ bucket_offset += btree_sectors(c)) {
+ if (time_after(jiffies, last_print + HZ * 30)) {
+ u64 cur_sector = bucket * ca->mi.bucket_size + bucket_offset;
+ u64 end_sector = ca->mi.nbuckets * ca->mi.bucket_size;
+
+ bch_info(ca, "%s: %2u%% done", __func__,
+ (unsigned) div64_u64(cur_sector * 100, end_sector));
+ last_print = jiffies;
+ }
+
+ u64 sector = bucket * ca->mi.bucket_size + bucket_offset;
+
+ if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_mi_btree_bitmap &&
+ !bch2_dev_btree_bitmap_marked_sectors(ca, sector, btree_sectors(c)))
+ continue;
+
+ try_read_btree_node(w->f, ca, bio, buf, sector);
+ }
+err:
+ bio_put(bio);
+ free_page((unsigned long) buf);
+ percpu_ref_get(&ca->io_ref);
+ closure_put(w->cl);
+ kfree(w);
+ return 0;
+}
+
+static int read_btree_nodes(struct find_btree_nodes *f)
+{
+ struct bch_fs *c = container_of(f, struct bch_fs, found_btree_nodes);
+ struct closure cl;
+ int ret = 0;
+
+ closure_init_stack(&cl);
+
+ for_each_online_member(c, ca) {
+ if (!(ca->mi.data_allowed & BIT(BCH_DATA_btree)))
+ continue;
+
+ struct find_btree_nodes_worker *w = kmalloc(sizeof(*w), GFP_KERNEL);
+ struct task_struct *t;
+
+ if (!w) {
+ percpu_ref_put(&ca->io_ref);
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ percpu_ref_get(&ca->io_ref);
+ closure_get(&cl);
+ w->cl = &cl;
+ w->f = f;
+ w->ca = ca;
+
+ t = kthread_run(read_btree_nodes_worker, w, "read_btree_nodes/%s", ca->name);
+ ret = IS_ERR_OR_NULL(t);
+ if (ret) {
+ percpu_ref_put(&ca->io_ref);
+ closure_put(&cl);
+ f->ret = ret;
+ bch_err(c, "error starting kthread: %i", ret);
+ break;
+ }
+ }
+err:
+ closure_sync(&cl);
+ return f->ret ?: ret;
+}
+
+static void bubble_up(struct found_btree_node *n, struct found_btree_node *end)
+{
+ while (n + 1 < end &&
+ found_btree_node_cmp_pos(n, n + 1) > 0) {
+ swap(n[0], n[1]);
+ n++;
+ }
+}
+
+static int handle_overwrites(struct bch_fs *c,
+ struct found_btree_node *start,
+ struct found_btree_node *end)
+{
+ struct found_btree_node *n;
+again:
+ for (n = start + 1;
+ n < end &&
+ n->btree_id == start->btree_id &&
+ n->level == start->level &&
+ bpos_lt(n->min_key, start->max_key);
+ n++) {
+ int cmp = found_btree_node_cmp_time(start, n);
+
+ if (cmp > 0) {
+ if (bpos_cmp(start->max_key, n->max_key) >= 0)
+ n->overwritten = true;
+ else {
+ n->range_updated = true;
+ n->min_key = bpos_successor(start->max_key);
+ n->range_updated = true;
+ bubble_up(n, end);
+ goto again;
+ }
+ } else if (cmp < 0) {
+ BUG_ON(bpos_cmp(n->min_key, start->min_key) <= 0);
+
+ start->max_key = bpos_predecessor(n->min_key);
+ start->range_updated = true;
+ } else {
+ struct printbuf buf = PRINTBUF;
+
+ prt_str(&buf, "overlapping btree nodes with same seq! halting\n ");
+ found_btree_node_to_text(&buf, c, start);
+ prt_str(&buf, "\n ");
+ found_btree_node_to_text(&buf, c, n);
+ bch_err(c, "%s", buf.buf);
+ printbuf_exit(&buf);
+ return -BCH_ERR_fsck_repair_unimplemented;
+ }
+ }
+
+ return 0;
+}
+
+int bch2_scan_for_btree_nodes(struct bch_fs *c)
+{
+ struct find_btree_nodes *f = &c->found_btree_nodes;
+ struct printbuf buf = PRINTBUF;
+ size_t dst;
+ int ret = 0;
+
+ if (f->nodes.nr)
+ return 0;
+
+ mutex_init(&f->lock);
+
+ ret = read_btree_nodes(f);
+ if (ret)
+ return ret;
+
+ if (!f->nodes.nr) {
+ bch_err(c, "%s: no btree nodes found", __func__);
+ ret = -EINVAL;
+ goto err;
+ }
+
+ if (0 && c->opts.verbose) {
+ printbuf_reset(&buf);
+ prt_printf(&buf, "%s: nodes found:\n", __func__);
+ found_btree_nodes_to_text(&buf, c, f->nodes);
+ bch2_print_string_as_lines(KERN_INFO, buf.buf);
+ }
+
+ sort(f->nodes.data, f->nodes.nr, sizeof(f->nodes.data[0]), found_btree_node_cmp_cookie, NULL);
+
+ dst = 0;
+ darray_for_each(f->nodes, i) {
+ struct found_btree_node *prev = dst ? f->nodes.data + dst - 1 : NULL;
+
+ if (prev &&
+ prev->cookie == i->cookie) {
+ if (prev->nr_ptrs == ARRAY_SIZE(prev->ptrs)) {
+ bch_err(c, "%s: found too many replicas for btree node", __func__);
+ ret = -EINVAL;
+ goto err;
+ }
+ prev->ptrs[prev->nr_ptrs++] = i->ptrs[0];
+ } else {
+ f->nodes.data[dst++] = *i;
+ }
+ }
+ f->nodes.nr = dst;
+
+ sort(f->nodes.data, f->nodes.nr, sizeof(f->nodes.data[0]), found_btree_node_cmp_pos, NULL);
+
+ if (0 && c->opts.verbose) {
+ printbuf_reset(&buf);
+ prt_printf(&buf, "%s: nodes after merging replicas:\n", __func__);
+ found_btree_nodes_to_text(&buf, c, f->nodes);
+ bch2_print_string_as_lines(KERN_INFO, buf.buf);
+ }
+
+ dst = 0;
+ darray_for_each(f->nodes, i) {
+ if (i->overwritten)
+ continue;
+
+ ret = handle_overwrites(c, i, &darray_top(f->nodes));
+ if (ret)
+ goto err;
+
+ BUG_ON(i->overwritten);
+ f->nodes.data[dst++] = *i;
+ }
+ f->nodes.nr = dst;
+
+ if (c->opts.verbose) {
+ printbuf_reset(&buf);
+ prt_printf(&buf, "%s: nodes found after overwrites:\n", __func__);
+ found_btree_nodes_to_text(&buf, c, f->nodes);
+ bch2_print_string_as_lines(KERN_INFO, buf.buf);
+ }
+
+ eytzinger0_sort(f->nodes.data, f->nodes.nr, sizeof(f->nodes.data[0]), found_btree_node_cmp_pos, NULL);
+err:
+ printbuf_exit(&buf);
+ return ret;
+}
+
+static int found_btree_node_range_start_cmp(const void *_l, const void *_r)
+{
+ const struct found_btree_node *l = _l;
+ const struct found_btree_node *r = _r;
+
+ return cmp_int(l->btree_id, r->btree_id) ?:
+ -cmp_int(l->level, r->level) ?:
+ bpos_cmp(l->max_key, r->min_key);
+}
+
+#define for_each_found_btree_node_in_range(_f, _search, _idx) \
+ for (size_t _idx = eytzinger0_find_gt((_f)->nodes.data, (_f)->nodes.nr, \
+ sizeof((_f)->nodes.data[0]), \
+ found_btree_node_range_start_cmp, &search); \
+ _idx < (_f)->nodes.nr && \
+ (_f)->nodes.data[_idx].btree_id == _search.btree_id && \
+ (_f)->nodes.data[_idx].level == _search.level && \
+ bpos_lt((_f)->nodes.data[_idx].min_key, _search.max_key); \
+ _idx = eytzinger0_next(_idx, (_f)->nodes.nr))
+
+bool bch2_btree_node_is_stale(struct bch_fs *c, struct btree *b)
+{
+ struct find_btree_nodes *f = &c->found_btree_nodes;
+
+ struct found_btree_node search = {
+ .btree_id = b->c.btree_id,
+ .level = b->c.level,
+ .min_key = b->data->min_key,
+ .max_key = b->key.k.p,
+ };
+
+ for_each_found_btree_node_in_range(f, search, idx)
+ if (f->nodes.data[idx].seq > BTREE_NODE_SEQ(b->data))
+ return true;
+ return false;
+}
+
+bool bch2_btree_has_scanned_nodes(struct bch_fs *c, enum btree_id btree)
+{
+ struct found_btree_node search = {
+ .btree_id = btree,
+ .level = 0,
+ .min_key = POS_MIN,
+ .max_key = SPOS_MAX,
+ };
+
+ for_each_found_btree_node_in_range(&c->found_btree_nodes, search, idx)
+ return true;
+ return false;
+}
+
+int bch2_get_scanned_nodes(struct bch_fs *c, enum btree_id btree,
+ unsigned level, struct bpos node_min, struct bpos node_max)
+{
+ if (btree_id_is_alloc(btree))
+ return 0;
+
+ struct find_btree_nodes *f = &c->found_btree_nodes;
+
+ int ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_scan_for_btree_nodes);
+ if (ret)
+ return ret;
+
+ if (c->opts.verbose) {
+ struct printbuf buf = PRINTBUF;
+
+ prt_printf(&buf, "recovering %s l=%u ", bch2_btree_id_str(btree), level);
+ bch2_bpos_to_text(&buf, node_min);
+ prt_str(&buf, " - ");
+ bch2_bpos_to_text(&buf, node_max);
+
+ bch_info(c, "%s(): %s", __func__, buf.buf);
+ printbuf_exit(&buf);
+ }
+
+ struct found_btree_node search = {
+ .btree_id = btree,
+ .level = level,
+ .min_key = node_min,
+ .max_key = node_max,
+ };
+
+ for_each_found_btree_node_in_range(f, search, idx) {
+ struct found_btree_node n = f->nodes.data[idx];
+
+ n.range_updated |= bpos_lt(n.min_key, node_min);
+ n.min_key = bpos_max(n.min_key, node_min);
+
+ n.range_updated |= bpos_gt(n.max_key, node_max);
+ n.max_key = bpos_min(n.max_key, node_max);
+
+ struct { __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX); } tmp;
+
+ found_btree_node_to_key(&tmp.k, &n);
+
+ struct printbuf buf = PRINTBUF;
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&tmp.k));
+ bch_verbose(c, "%s(): recovering %s", __func__, buf.buf);
+ printbuf_exit(&buf);
+
+ BUG_ON(bch2_bkey_invalid(c, bkey_i_to_s_c(&tmp.k), BKEY_TYPE_btree, 0, NULL));
+
+ ret = bch2_journal_key_insert(c, btree, level + 1, &tmp.k);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+void bch2_find_btree_nodes_exit(struct find_btree_nodes *f)
+{
+ darray_exit(&f->nodes);
+}
diff --git a/fs/bcachefs/btree_node_scan.h b/fs/bcachefs/btree_node_scan.h
new file mode 100644
index 00000000000000..08687b209787be
--- /dev/null
+++ b/fs/bcachefs/btree_node_scan.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_BTREE_NODE_SCAN_H
+#define _BCACHEFS_BTREE_NODE_SCAN_H
+
+int bch2_scan_for_btree_nodes(struct bch_fs *);
+bool bch2_btree_node_is_stale(struct bch_fs *, struct btree *);
+bool bch2_btree_has_scanned_nodes(struct bch_fs *, enum btree_id);
+int bch2_get_scanned_nodes(struct bch_fs *, enum btree_id, unsigned, struct bpos, struct bpos);
+void bch2_find_btree_nodes_exit(struct find_btree_nodes *);
+
+#endif /* _BCACHEFS_BTREE_NODE_SCAN_H */
diff --git a/fs/bcachefs/btree_node_scan_types.h b/fs/bcachefs/btree_node_scan_types.h
new file mode 100644
index 00000000000000..abb7b27d556a9f
--- /dev/null
+++ b/fs/bcachefs/btree_node_scan_types.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_BTREE_NODE_SCAN_TYPES_H
+#define _BCACHEFS_BTREE_NODE_SCAN_TYPES_H
+
+#include "darray.h"
+
+struct found_btree_node {
+ bool range_updated:1;
+ bool overwritten:1;
+ u8 btree_id;
+ u8 level;
+ u32 seq;
+ u64 cookie;
+
+ struct bpos min_key;
+ struct bpos max_key;
+
+ unsigned nr_ptrs;
+ struct bch_extent_ptr ptrs[BCH_REPLICAS_MAX];
+};
+
+typedef DARRAY(struct found_btree_node) found_btree_nodes;
+
+struct find_btree_nodes {
+ int ret;
+ struct mutex lock;
+ found_btree_nodes nodes;
+};
+
+#endif /* _BCACHEFS_BTREE_NODE_SCAN_TYPES_H */
diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c
index 30d69a6d133eec..bbec91e8e6506f 100644
--- a/fs/bcachefs/btree_trans_commit.c
+++ b/fs/bcachefs/btree_trans_commit.c
@@ -318,7 +318,7 @@ static inline void btree_insert_entry_checks(struct btree_trans *trans,
!(i->flags & BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) &&
test_bit(JOURNAL_REPLAY_DONE, &trans->c->journal.flags) &&
i->k->k.p.snapshot &&
- bch2_snapshot_is_internal_node(trans->c, i->k->k.p.snapshot));
+ bch2_snapshot_is_internal_node(trans->c, i->k->k.p.snapshot) > 0);
}
static __always_inline int bch2_trans_journal_res_get(struct btree_trans *trans,
@@ -397,12 +397,13 @@ static int btree_key_can_insert_cached(struct btree_trans *trans, unsigned flags
struct bkey_cached *ck = (void *) path->l[0].b;
unsigned new_u64s;
struct bkey_i *new_k;
+ unsigned watermark = flags & BCH_WATERMARK_MASK;
EBUG_ON(path->level);
- if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags) &&
- bch2_btree_key_cache_must_wait(c) &&
- !(flags & BCH_TRANS_COMMIT_journal_reclaim))
+ if (watermark < BCH_WATERMARK_reclaim &&
+ !test_bit(BKEY_CACHED_DIRTY, &ck->flags) &&
+ bch2_btree_key_cache_must_wait(c))
return -BCH_ERR_btree_insert_need_journal_reclaim;
/*
@@ -499,9 +500,8 @@ static int run_one_trans_trigger(struct btree_trans *trans, struct btree_insert_
}
static int run_btree_triggers(struct btree_trans *trans, enum btree_id btree_id,
- struct btree_insert_entry *btree_id_start)
+ unsigned btree_id_start)
{
- struct btree_insert_entry *i;
bool trans_trigger_run;
int ret, overwrite;
@@ -514,13 +514,13 @@ static int run_btree_triggers(struct btree_trans *trans, enum btree_id btree_id,
do {
trans_trigger_run = false;
- for (i = btree_id_start;
- i < trans->updates + trans->nr_updates && i->btree_id <= btree_id;
+ for (unsigned i = btree_id_start;
+ i < trans->nr_updates && trans->updates[i].btree_id <= btree_id;
i++) {
- if (i->btree_id != btree_id)
+ if (trans->updates[i].btree_id != btree_id)
continue;
- ret = run_one_trans_trigger(trans, i, overwrite);
+ ret = run_one_trans_trigger(trans, trans->updates + i, overwrite);
if (ret < 0)
return ret;
if (ret)
@@ -534,8 +534,7 @@ static int run_btree_triggers(struct btree_trans *trans, enum btree_id btree_id,
static int bch2_trans_commit_run_triggers(struct btree_trans *trans)
{
- struct btree_insert_entry *btree_id_start = trans->updates;
- unsigned btree_id = 0;
+ unsigned btree_id = 0, btree_id_start = 0;
int ret = 0;
/*
@@ -549,8 +548,8 @@ static int bch2_trans_commit_run_triggers(struct btree_trans *trans)
if (btree_id == BTREE_ID_alloc)
continue;
- while (btree_id_start < trans->updates + trans->nr_updates &&
- btree_id_start->btree_id < btree_id)
+ while (btree_id_start < trans->nr_updates &&
+ trans->updates[btree_id_start].btree_id < btree_id)
btree_id_start++;
ret = run_btree_triggers(trans, btree_id, btree_id_start);
@@ -558,11 +557,13 @@ static int bch2_trans_commit_run_triggers(struct btree_trans *trans)
return ret;
}
- trans_for_each_update(trans, i) {
+ for (unsigned idx = 0; idx < trans->nr_updates; idx++) {
+ struct btree_insert_entry *i = trans->updates + idx;
+
if (i->btree_id > BTREE_ID_alloc)
break;
if (i->btree_id == BTREE_ID_alloc) {
- ret = run_btree_triggers(trans, BTREE_ID_alloc, i);
+ ret = run_btree_triggers(trans, BTREE_ID_alloc, idx);
if (ret)
return ret;
break;
@@ -826,7 +827,8 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, unsigned flags
struct bch_fs *c = trans->c;
int ret = 0, u64s_delta = 0;
- trans_for_each_update(trans, i) {
+ for (unsigned idx = 0; idx < trans->nr_updates; idx++) {
+ struct btree_insert_entry *i = trans->updates + idx;
if (i->cached)
continue;
@@ -887,6 +889,7 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags,
int ret, unsigned long trace_ip)
{
struct bch_fs *c = trans->c;
+ enum bch_watermark watermark = flags & BCH_WATERMARK_MASK;
switch (ret) {
case -BCH_ERR_btree_insert_btree_node_full:
@@ -905,7 +908,7 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags,
* flag
*/
if ((flags & BCH_TRANS_COMMIT_journal_reclaim) &&
- (flags & BCH_WATERMARK_MASK) != BCH_WATERMARK_reclaim) {
+ watermark < BCH_WATERMARK_reclaim) {
ret = -BCH_ERR_journal_reclaim_would_deadlock;
break;
}
diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h
index 9404d96c38f3b3..e0c982a4195c76 100644
--- a/fs/bcachefs/btree_types.h
+++ b/fs/bcachefs/btree_types.h
@@ -364,7 +364,21 @@ struct btree_insert_entry {
unsigned long ip_allocated;
};
+/* Number of btree paths we preallocate, usually enough */
#define BTREE_ITER_INITIAL 64
+/*
+ * Lmiit for btree_trans_too_many_iters(); this is enough that almost all code
+ * paths should run inside this limit, and if they don't it usually indicates a
+ * bug (leaking/duplicated btree paths).
+ *
+ * exception: some fsck paths
+ *
+ * bugs with excessive path usage seem to have possibly been eliminated now, so
+ * we might consider eliminating this (and btree_trans_too_many_iter()) at some
+ * point.
+ */
+#define BTREE_ITER_NORMAL_LIMIT 256
+/* never exceed limit */
#define BTREE_ITER_MAX (1U << 10)
struct btree_trans_commit_hook;
diff --git a/fs/bcachefs/btree_update.c b/fs/bcachefs/btree_update.c
index a4b40c1656a54b..8e47e260eba59b 100644
--- a/fs/bcachefs/btree_update.c
+++ b/fs/bcachefs/btree_update.c
@@ -38,6 +38,9 @@ static noinline int extent_front_merge(struct btree_trans *trans,
struct bkey_i *update;
int ret;
+ if (unlikely(trans->journal_replay_not_finished))
+ return 0;
+
update = bch2_bkey_make_mut_noupdate(trans, k);
ret = PTR_ERR_OR_ZERO(update);
if (ret)
@@ -69,6 +72,9 @@ static noinline int extent_back_merge(struct btree_trans *trans,
struct bch_fs *c = trans->c;
int ret;
+ if (unlikely(trans->journal_replay_not_finished))
+ return 0;
+
ret = bch2_key_has_snapshot_overwrites(trans, iter->btree_id, insert->k.p) ?:
bch2_key_has_snapshot_overwrites(trans, iter->btree_id, k.k->p);
if (ret < 0)
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c
index b2f5f2e50f7e19..6030c396754f6f 100644
--- a/fs/bcachefs/btree_update_interior.c
+++ b/fs/bcachefs/btree_update_interior.c
@@ -2,6 +2,7 @@
#include "bcachefs.h"
#include "alloc_foreground.h"
+#include "bkey_buf.h"
#include "bkey_methods.h"
#include "btree_cache.h"
#include "btree_gc.h"
@@ -18,12 +19,21 @@
#include "journal.h"
#include "journal_reclaim.h"
#include "keylist.h"
+#include "recovery_passes.h"
#include "replicas.h"
+#include "sb-members.h"
#include "super-io.h"
#include "trace.h"
#include <linux/random.h>
+static const char * const bch2_btree_update_modes[] = {
+#define x(t) #t,
+ BTREE_UPDATE_MODES()
+#undef x
+ NULL
+};
+
static int bch2_btree_insert_node(struct btree_update *, struct btree_trans *,
btree_path_idx_t, struct btree *, struct keylist *);
static void bch2_btree_update_add_new_node(struct btree_update *, struct btree *);
@@ -44,56 +54,103 @@ static btree_path_idx_t get_unlocked_mut_path(struct btree_trans *trans,
return path_idx;
}
-/* Debug code: */
-
/*
* Verify that child nodes correctly span parent node's range:
*/
-static void btree_node_interior_verify(struct bch_fs *c, struct btree *b)
+int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b)
{
-#ifdef CONFIG_BCACHEFS_DEBUG
- struct bpos next_node = b->data->min_key;
- struct btree_node_iter iter;
+ struct bch_fs *c = trans->c;
+ struct bpos node_min = b->key.k.type == KEY_TYPE_btree_ptr_v2
+ ? bkey_i_to_btree_ptr_v2(&b->key)->v.min_key
+ : b->data->min_key;
+ struct btree_and_journal_iter iter;
struct bkey_s_c k;
- struct bkey_s_c_btree_ptr_v2 bp;
- struct bkey unpacked;
- struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF;
+ struct printbuf buf = PRINTBUF;
+ struct bkey_buf prev;
+ int ret = 0;
- BUG_ON(!b->c.level);
+ BUG_ON(b->key.k.type == KEY_TYPE_btree_ptr_v2 &&
+ !bpos_eq(bkey_i_to_btree_ptr_v2(&b->key)->v.min_key,
+ b->data->min_key));
- if (!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags))
- return;
+ if (!b->c.level)
+ return 0;
- bch2_btree_node_iter_init_from_start(&iter, b);
+ bch2_bkey_buf_init(&prev);
+ bkey_init(&prev.k->k);
+ bch2_btree_and_journal_iter_init_node_iter(trans, &iter, b);
- while (1) {
- k = bch2_btree_node_iter_peek_unpack(&iter, b, &unpacked);
+ while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
if (k.k->type != KEY_TYPE_btree_ptr_v2)
- break;
- bp = bkey_s_c_to_btree_ptr_v2(k);
+ goto out;
- if (!bpos_eq(next_node, bp.v->min_key)) {
- bch2_dump_btree_node(c, b);
- bch2_bpos_to_text(&buf1, next_node);
- bch2_bpos_to_text(&buf2, bp.v->min_key);
- panic("expected next min_key %s got %s\n", buf1.buf, buf2.buf);
- }
+ struct bkey_s_c_btree_ptr_v2 bp = bkey_s_c_to_btree_ptr_v2(k);
- bch2_btree_node_iter_advance(&iter, b);
+ struct bpos expected_min = bkey_deleted(&prev.k->k)
+ ? node_min
+ : bpos_successor(prev.k->k.p);
- if (bch2_btree_node_iter_end(&iter)) {
- if (!bpos_eq(k.k->p, b->key.k.p)) {
- bch2_dump_btree_node(c, b);
- bch2_bpos_to_text(&buf1, b->key.k.p);
- bch2_bpos_to_text(&buf2, k.k->p);
- panic("expected end %s got %s\n", buf1.buf, buf2.buf);
- }
- break;
+ if (!bpos_eq(expected_min, bp.v->min_key)) {
+ bch2_topology_error(c);
+
+ printbuf_reset(&buf);
+ prt_str(&buf, "end of prev node doesn't match start of next node\n"),
+ prt_printf(&buf, " in btree %s level %u node ",
+ bch2_btree_id_str(b->c.btree_id), b->c.level);
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
+ prt_str(&buf, "\n prev ");
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(prev.k));
+ prt_str(&buf, "\n next ");
+ bch2_bkey_val_to_text(&buf, c, k);
+
+ need_fsck_err(c, btree_node_topology_bad_min_key, "%s", buf.buf);
+ goto topology_repair;
}
- next_node = bpos_successor(k.k->p);
+ bch2_bkey_buf_reassemble(&prev, c, k);
+ bch2_btree_and_journal_iter_advance(&iter);
+ }
+
+ if (bkey_deleted(&prev.k->k)) {
+ bch2_topology_error(c);
+
+ printbuf_reset(&buf);
+ prt_str(&buf, "empty interior node\n");
+ prt_printf(&buf, " in btree %s level %u node ",
+ bch2_btree_id_str(b->c.btree_id), b->c.level);
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
+
+ need_fsck_err(c, btree_node_topology_empty_interior_node, "%s", buf.buf);
+ goto topology_repair;
+ } else if (!bpos_eq(prev.k->k.p, b->key.k.p)) {
+ bch2_topology_error(c);
+
+ printbuf_reset(&buf);
+ prt_str(&buf, "last child node doesn't end at end of parent node\n");
+ prt_printf(&buf, " in btree %s level %u node ",
+ bch2_btree_id_str(b->c.btree_id), b->c.level);
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
+ prt_str(&buf, "\n last key ");
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(prev.k));
+
+ need_fsck_err(c, btree_node_topology_bad_max_key, "%s", buf.buf);
+ goto topology_repair;
}
-#endif
+out:
+fsck_err:
+ bch2_btree_and_journal_iter_exit(&iter);
+ bch2_bkey_buf_exit(&prev, c);
+ printbuf_exit(&buf);
+ return ret;
+topology_repair:
+ if ((c->recovery_passes_explicit & BIT_ULL(BCH_RECOVERY_PASS_check_topology)) &&
+ c->curr_recovery_pass > BCH_RECOVERY_PASS_check_topology) {
+ bch2_inconsistent_error(c);
+ ret = -BCH_ERR_btree_need_topology_repair;
+ } else {
+ ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology);
+ }
+ goto out;
}
/* Calculate ideal packed bkey format for new btree nodes: */
@@ -254,7 +311,7 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans,
struct open_buckets obs = { .nr = 0 };
struct bch_devs_list devs_have = (struct bch_devs_list) { 0 };
enum bch_watermark watermark = flags & BCH_WATERMARK_MASK;
- unsigned nr_reserve = watermark > BCH_WATERMARK_reclaim
+ unsigned nr_reserve = watermark < BCH_WATERMARK_reclaim
? BTREE_NODE_RESERVE
: 0;
int ret;
@@ -549,6 +606,26 @@ static void btree_update_add_key(struct btree_update *as,
bch2_keylist_push(keys);
}
+static bool btree_update_new_nodes_marked_sb(struct btree_update *as)
+{
+ for_each_keylist_key(&as->new_keys, k)
+ if (!bch2_dev_btree_bitmap_marked(as->c, bkey_i_to_s_c(k)))
+ return false;
+ return true;
+}
+
+static void btree_update_new_nodes_mark_sb(struct btree_update *as)
+{
+ struct bch_fs *c = as->c;
+
+ mutex_lock(&c->sb_lock);
+ for_each_keylist_key(&as->new_keys, k)
+ bch2_dev_btree_bitmap_mark(c, bkey_i_to_s_c(k));
+
+ bch2_write_super(c);
+ mutex_unlock(&c->sb_lock);
+}
+
/*
* The transactional part of an interior btree node update, where we journal the
* update we did to the interior node and update alloc info:
@@ -606,6 +683,9 @@ static void btree_update_nodes_written(struct btree_update *as)
if (ret)
goto err;
+ if (!btree_update_new_nodes_marked_sb(as))
+ btree_update_new_nodes_mark_sb(as);
+
/*
* Wait for any in flight writes to finish before we free the old nodes
* on disk:
@@ -638,7 +718,7 @@ static void btree_update_nodes_written(struct btree_update *as)
* which may require allocations as well.
*/
ret = commit_do(trans, &as->disk_res, &journal_seq,
- BCH_WATERMARK_reclaim|
+ BCH_WATERMARK_interior_updates|
BCH_TRANS_COMMIT_no_enospc|
BCH_TRANS_COMMIT_no_check_rw|
BCH_TRANS_COMMIT_journal_reclaim,
@@ -648,9 +728,13 @@ static void btree_update_nodes_written(struct btree_update *as)
bch2_fs_fatal_err_on(ret && !bch2_journal_error(&c->journal), c,
"%s", bch2_err_str(ret));
err:
- if (as->b) {
-
- b = as->b;
+ /*
+ * We have to be careful because another thread might be getting ready
+ * to free as->b and calling btree_update_reparent() on us - we'll
+ * recheck under btree_update_lock below:
+ */
+ b = READ_ONCE(as->b);
+ if (b) {
btree_path_idx_t path_idx = get_unlocked_mut_path(trans,
as->btree_id, b->c.level, b->key.k.p);
struct btree_path *path = trans->paths + path_idx;
@@ -794,15 +878,17 @@ static void btree_update_updated_node(struct btree_update *as, struct btree *b)
{
struct bch_fs *c = as->c;
- mutex_lock(&c->btree_interior_update_lock);
- list_add_tail(&as->unwritten_list, &c->btree_interior_updates_unwritten);
-
- BUG_ON(as->mode != BTREE_INTERIOR_NO_UPDATE);
+ BUG_ON(as->mode != BTREE_UPDATE_none);
+ BUG_ON(as->update_level_end < b->c.level);
BUG_ON(!btree_node_dirty(b));
BUG_ON(!b->c.level);
- as->mode = BTREE_INTERIOR_UPDATING_NODE;
+ mutex_lock(&c->btree_interior_update_lock);
+ list_add_tail(&as->unwritten_list, &c->btree_interior_updates_unwritten);
+
+ as->mode = BTREE_UPDATE_node;
as->b = b;
+ as->update_level_end = b->c.level;
set_btree_node_write_blocked(b);
list_add(&as->write_blocked_list, &b->write_blocked);
@@ -824,7 +910,7 @@ static void btree_update_reparent(struct btree_update *as,
lockdep_assert_held(&c->btree_interior_update_lock);
child->b = NULL;
- child->mode = BTREE_INTERIOR_UPDATING_AS;
+ child->mode = BTREE_UPDATE_update;
bch2_journal_pin_copy(&c->journal, &as->journal, &child->journal,
bch2_update_reparent_journal_pin_flush);
@@ -835,7 +921,7 @@ static void btree_update_updated_root(struct btree_update *as, struct btree *b)
struct bkey_i *insert = &b->key;
struct bch_fs *c = as->c;
- BUG_ON(as->mode != BTREE_INTERIOR_NO_UPDATE);
+ BUG_ON(as->mode != BTREE_UPDATE_none);
BUG_ON(as->journal_u64s + jset_u64s(insert->k.u64s) >
ARRAY_SIZE(as->journal_entries));
@@ -849,7 +935,7 @@ static void btree_update_updated_root(struct btree_update *as, struct btree *b)
mutex_lock(&c->btree_interior_update_lock);
list_add_tail(&as->unwritten_list, &c->btree_interior_updates_unwritten);
- as->mode = BTREE_INTERIOR_UPDATING_ROOT;
+ as->mode = BTREE_UPDATE_root;
mutex_unlock(&c->btree_interior_update_lock);
}
@@ -1027,7 +1113,7 @@ static void bch2_btree_update_done(struct btree_update *as, struct btree_trans *
struct bch_fs *c = as->c;
u64 start_time = as->start_time;
- BUG_ON(as->mode == BTREE_INTERIOR_NO_UPDATE);
+ BUG_ON(as->mode == BTREE_UPDATE_none);
if (as->took_gc_lock)
up_read(&as->c->gc_lock);
@@ -1044,7 +1130,7 @@ static void bch2_btree_update_done(struct btree_update *as, struct btree_trans *
static struct btree_update *
bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
- unsigned level, bool split, unsigned flags)
+ unsigned level_start, bool split, unsigned flags)
{
struct bch_fs *c = trans->c;
struct btree_update *as;
@@ -1052,7 +1138,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
int disk_res_flags = (flags & BCH_TRANS_COMMIT_no_enospc)
? BCH_DISK_RESERVATION_NOFAIL : 0;
unsigned nr_nodes[2] = { 0, 0 };
- unsigned update_level = level;
+ unsigned level_end = level_start;
enum bch_watermark watermark = flags & BCH_WATERMARK_MASK;
int ret = 0;
u32 restart_count = trans->restart_count;
@@ -1067,34 +1153,30 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
flags &= ~BCH_WATERMARK_MASK;
flags |= watermark;
- if (watermark < c->journal.watermark) {
- struct journal_res res = { 0 };
- unsigned journal_flags = watermark|JOURNAL_RES_GET_CHECK;
-
- if ((flags & BCH_TRANS_COMMIT_journal_reclaim) &&
- watermark != BCH_WATERMARK_reclaim)
- journal_flags |= JOURNAL_RES_GET_NONBLOCK;
+ if (watermark < BCH_WATERMARK_reclaim &&
+ test_bit(JOURNAL_SPACE_LOW, &c->journal.flags)) {
+ if (flags & BCH_TRANS_COMMIT_journal_reclaim)
+ return ERR_PTR(-BCH_ERR_journal_reclaim_would_deadlock);
- ret = drop_locks_do(trans,
- bch2_journal_res_get(&c->journal, &res, 1, journal_flags));
- if (bch2_err_matches(ret, BCH_ERR_operation_blocked))
- ret = -BCH_ERR_journal_reclaim_would_deadlock;
+ bch2_trans_unlock(trans);
+ wait_event(c->journal.wait, !test_bit(JOURNAL_SPACE_LOW, &c->journal.flags));
+ ret = bch2_trans_relock(trans);
if (ret)
return ERR_PTR(ret);
}
while (1) {
- nr_nodes[!!update_level] += 1 + split;
- update_level++;
+ nr_nodes[!!level_end] += 1 + split;
+ level_end++;
- ret = bch2_btree_path_upgrade(trans, path, update_level + 1);
+ ret = bch2_btree_path_upgrade(trans, path, level_end + 1);
if (ret)
return ERR_PTR(ret);
- if (!btree_path_node(path, update_level)) {
+ if (!btree_path_node(path, level_end)) {
/* Allocating new root? */
nr_nodes[1] += split;
- update_level = BTREE_MAX_DEPTH;
+ level_end = BTREE_MAX_DEPTH;
break;
}
@@ -1102,11 +1184,11 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
* Always check for space for two keys, even if we won't have to
* split at prior level - it might have been a merge instead:
*/
- if (bch2_btree_node_insert_fits(path->l[update_level].b,
+ if (bch2_btree_node_insert_fits(path->l[level_end].b,
BKEY_BTREE_PTR_U64s_MAX * 2))
break;
- split = path->l[update_level].b->nr.live_u64s > BTREE_SPLIT_THRESHOLD(c);
+ split = path->l[level_end].b->nr.live_u64s > BTREE_SPLIT_THRESHOLD(c);
}
if (!down_read_trylock(&c->gc_lock)) {
@@ -1120,13 +1202,15 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
as = mempool_alloc(&c->btree_interior_update_pool, GFP_NOFS);
memset(as, 0, sizeof(*as));
closure_init(&as->cl, NULL);
- as->c = c;
- as->start_time = start_time;
- as->ip_started = _RET_IP_;
- as->mode = BTREE_INTERIOR_NO_UPDATE;
- as->took_gc_lock = true;
- as->btree_id = path->btree_id;
- as->update_level = update_level;
+ as->c = c;
+ as->start_time = start_time;
+ as->ip_started = _RET_IP_;
+ as->mode = BTREE_UPDATE_none;
+ as->watermark = watermark;
+ as->took_gc_lock = true;
+ as->btree_id = path->btree_id;
+ as->update_level_start = level_start;
+ as->update_level_end = level_end;
INIT_LIST_HEAD(&as->list);
INIT_LIST_HEAD(&as->unwritten_list);
INIT_LIST_HEAD(&as->write_blocked_list);
@@ -1168,7 +1252,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
*/
if (bch2_err_matches(ret, ENOSPC) &&
(flags & BCH_TRANS_COMMIT_journal_reclaim) &&
- watermark != BCH_WATERMARK_reclaim) {
+ watermark < BCH_WATERMARK_reclaim) {
ret = -BCH_ERR_journal_reclaim_would_deadlock;
goto err;
}
@@ -1220,23 +1304,29 @@ static void bch2_btree_set_root_inmem(struct bch_fs *c, struct btree *b)
bch2_recalc_btree_reserve(c);
}
-static void bch2_btree_set_root(struct btree_update *as,
- struct btree_trans *trans,
- struct btree_path *path,
- struct btree *b)
+static int bch2_btree_set_root(struct btree_update *as,
+ struct btree_trans *trans,
+ struct btree_path *path,
+ struct btree *b,
+ bool nofail)
{
struct bch_fs *c = as->c;
- struct btree *old;
trace_and_count(c, btree_node_set_root, trans, b);
- old = btree_node_root(c, b);
+ struct btree *old = btree_node_root(c, b);
/*
* Ensure no one is using the old root while we switch to the
* new root:
*/
- bch2_btree_node_lock_write_nofail(trans, path, &old->c);
+ if (nofail) {
+ bch2_btree_node_lock_write_nofail(trans, path, &old->c);
+ } else {
+ int ret = bch2_btree_node_lock_write(trans, path, &old->c);
+ if (ret)
+ return ret;
+ }
bch2_btree_set_root_inmem(c, b);
@@ -1250,6 +1340,7 @@ static void bch2_btree_set_root(struct btree_update *as,
* depend on the new root would have to update the new root.
*/
bch2_btree_node_unlock_write(trans, path, old);
+ return 0;
}
/* Interior node updates: */
@@ -1316,12 +1407,12 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as,
}
static void
-__bch2_btree_insert_keys_interior(struct btree_update *as,
- struct btree_trans *trans,
- struct btree_path *path,
- struct btree *b,
- struct btree_node_iter node_iter,
- struct keylist *keys)
+bch2_btree_insert_keys_interior(struct btree_update *as,
+ struct btree_trans *trans,
+ struct btree_path *path,
+ struct btree *b,
+ struct btree_node_iter node_iter,
+ struct keylist *keys)
{
struct bkey_i *insert = bch2_keylist_front(keys);
struct bkey_packed *k;
@@ -1380,9 +1471,16 @@ static void __btree_split_node(struct btree_update *as,
if (bkey_deleted(k))
continue;
+ uk = bkey_unpack_key(b, k);
+
+ if (b->c.level &&
+ u64s < n1_u64s &&
+ u64s + k->u64s >= n1_u64s &&
+ bch2_key_deleted_in_journal(trans, b->c.btree_id, b->c.level, uk.p))
+ n1_u64s += k->u64s;
+
i = u64s >= n1_u64s;
u64s += k->u64s;
- uk = bkey_unpack_key(b, k);
if (!i)
n1_pos = uk.p;
bch2_bkey_format_add_key(&format[i], &uk);
@@ -1441,8 +1539,7 @@ static void __btree_split_node(struct btree_update *as,
bch2_verify_btree_nr_keys(n[i]);
- if (b->c.level)
- btree_node_interior_verify(as->c, n[i]);
+ BUG_ON(bch2_btree_node_check_topology(trans, n[i]));
}
}
@@ -1471,9 +1568,9 @@ static void btree_split_insert_keys(struct btree_update *as,
bch2_btree_node_iter_init(&node_iter, b, &bch2_keylist_front(keys)->k.p);
- __bch2_btree_insert_keys_interior(as, trans, path, b, node_iter, keys);
+ bch2_btree_insert_keys_interior(as, trans, path, b, node_iter, keys);
- btree_node_interior_verify(as->c, b);
+ BUG_ON(bch2_btree_node_check_topology(trans, b));
}
}
@@ -1488,9 +1585,14 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans,
u64 start_time = local_clock();
int ret = 0;
+ bch2_verify_btree_nr_keys(b);
BUG_ON(!parent && (b != btree_node_root(c, b)));
BUG_ON(parent && !btree_node_intent_locked(trans->paths + path, b->c.level + 1));
+ ret = bch2_btree_node_check_topology(trans, b);
+ if (ret)
+ return ret;
+
bch2_btree_interior_update_will_free_node(as, b);
if (b->nr.live_u64s > BTREE_SPLIT_THRESHOLD(c)) {
@@ -1581,15 +1683,16 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans,
if (parent) {
/* Split a non root node */
ret = bch2_btree_insert_node(as, trans, path, parent, &as->parent_keys);
- if (ret)
- goto err;
} else if (n3) {
- bch2_btree_set_root(as, trans, trans->paths + path, n3);
+ ret = bch2_btree_set_root(as, trans, trans->paths + path, n3, false);
} else {
/* Root filled up but didn't need to be split */
- bch2_btree_set_root(as, trans, trans->paths + path, n1);
+ ret = bch2_btree_set_root(as, trans, trans->paths + path, n1, false);
}
+ if (ret)
+ goto err;
+
if (n3) {
bch2_btree_update_get_open_buckets(as, n3);
bch2_btree_node_write(c, n3, SIX_LOCK_intent, 0);
@@ -1646,27 +1749,6 @@ err:
goto out;
}
-static void
-bch2_btree_insert_keys_interior(struct btree_update *as,
- struct btree_trans *trans,
- struct btree_path *path,
- struct btree *b,
- struct keylist *keys)
-{
- struct btree_path *linked;
- unsigned i;
-
- __bch2_btree_insert_keys_interior(as, trans, path, b,
- path->l[b->c.level].iter, keys);
-
- btree_update_updated_node(as, b);
-
- trans_for_each_path_with_node(trans, b, linked, i)
- bch2_btree_node_iter_peek(&linked->l[b->c.level].iter, b);
-
- bch2_trans_verify_paths(trans);
-}
-
/**
* bch2_btree_insert_node - insert bkeys into a given btree node
*
@@ -1687,7 +1769,8 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t
struct keylist *keys)
{
struct bch_fs *c = as->c;
- struct btree_path *path = trans->paths + path_idx;
+ struct btree_path *path = trans->paths + path_idx, *linked;
+ unsigned i;
int old_u64s = le16_to_cpu(btree_bset_last(b)->u64s);
int old_live_u64s = b->nr.live_u64s;
int live_u64s_added, u64s_added;
@@ -1710,9 +1793,19 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t
goto split;
}
- btree_node_interior_verify(c, b);
+ ret = bch2_btree_node_check_topology(trans, b);
+ if (ret) {
+ bch2_btree_node_unlock_write(trans, path, b);
+ return ret;
+ }
+
+ bch2_btree_insert_keys_interior(as, trans, path, b,
+ path->l[b->c.level].iter, keys);
+
+ trans_for_each_path_with_node(trans, b, linked, i)
+ bch2_btree_node_iter_peek(&linked->l[b->c.level].iter, b);
- bch2_btree_insert_keys_interior(as, trans, path, b, keys);
+ bch2_trans_verify_paths(trans);
live_u64s_added = (int) b->nr.live_u64s - old_live_u64s;
u64s_added = (int) le16_to_cpu(btree_bset_last(b)->u64s) - old_u64s;
@@ -1726,16 +1819,17 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t
bch2_maybe_compact_whiteouts(c, b))
bch2_trans_node_reinit_iter(trans, b);
+ btree_update_updated_node(as, b);
bch2_btree_node_unlock_write(trans, path, b);
- btree_node_interior_verify(c, b);
+ BUG_ON(bch2_btree_node_check_topology(trans, b));
return 0;
split:
/*
* We could attempt to avoid the transaction restart, by calling
* bch2_btree_path_upgrade() and allocating more nodes:
*/
- if (b->c.level >= as->update_level) {
+ if (b->c.level >= as->update_level_end) {
trace_and_count(c, trans_restart_split_race, trans, _THIS_IP_, b);
return btree_trans_restart(trans, BCH_ERR_transaction_restart_split_race);
}
@@ -1801,7 +1895,9 @@ static void __btree_increase_depth(struct btree_update *as, struct btree_trans *
bch2_keylist_add(&as->parent_keys, &b->key);
btree_split_insert_keys(as, trans, path_idx, n, &as->parent_keys);
- bch2_btree_set_root(as, trans, path, n);
+ int ret = bch2_btree_set_root(as, trans, path, n, true);
+ BUG_ON(ret);
+
bch2_btree_update_get_open_buckets(as, n);
bch2_btree_node_write(c, n, SIX_LOCK_intent, 0);
bch2_trans_node_add(trans, path, n);
@@ -1818,9 +1914,12 @@ int bch2_btree_increase_depth(struct btree_trans *trans, btree_path_idx_t path,
{
struct bch_fs *c = trans->c;
struct btree *b = bch2_btree_id_root(c, trans->paths[path].btree_id)->b;
+
+ if (btree_node_fake(b))
+ return bch2_btree_split_leaf(trans, path, flags);
+
struct btree_update *as =
- bch2_btree_update_start(trans, trans->paths + path,
- b->c.level, true, flags);
+ bch2_btree_update_start(trans, trans->paths + path, b->c.level, true, flags);
if (IS_ERR(as))
return PTR_ERR(as);
@@ -1851,6 +1950,18 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
BUG_ON(!trans->paths[path].should_be_locked);
BUG_ON(!btree_node_locked(&trans->paths[path], level));
+ /*
+ * Work around a deadlock caused by the btree write buffer not doing
+ * merges and leaving tons of merges for us to do - we really don't need
+ * to be doing merges at all from the interior update path, and if the
+ * interior update path is generating too many new interior updates we
+ * deadlock:
+ */
+ if ((flags & BCH_WATERMARK_MASK) == BCH_WATERMARK_interior_updates)
+ return 0;
+
+ flags &= ~BCH_WATERMARK_MASK;
+
b = trans->paths[path].l[level].b;
if ((sib == btree_prev_sib && bpos_eq(b->data->min_key, POS_MIN)) ||
@@ -1996,6 +2107,10 @@ err:
bch2_path_put(trans, new_path, true);
bch2_path_put(trans, sib_path, true);
bch2_trans_verify_locks(trans);
+ if (ret == -BCH_ERR_journal_reclaim_would_deadlock)
+ ret = 0;
+ if (!ret)
+ ret = bch2_trans_relock(trans);
return ret;
err_free_update:
bch2_btree_node_free_never_used(as, trans, n);
@@ -2041,12 +2156,13 @@ int bch2_btree_node_rewrite(struct btree_trans *trans,
if (parent) {
bch2_keylist_add(&as->parent_keys, &n->key);
ret = bch2_btree_insert_node(as, trans, iter->path, parent, &as->parent_keys);
- if (ret)
- goto err;
} else {
- bch2_btree_set_root(as, trans, btree_iter_path(trans, iter), n);
+ ret = bch2_btree_set_root(as, trans, btree_iter_path(trans, iter), n, false);
}
+ if (ret)
+ goto err;
+
bch2_btree_update_get_open_buckets(as, n);
bch2_btree_node_write(c, n, SIX_LOCK_intent, 0);
@@ -2391,7 +2507,7 @@ void bch2_btree_set_root_for_read(struct bch_fs *c, struct btree *b)
bch2_btree_set_root_inmem(c, b);
}
-static int __bch2_btree_root_alloc(struct btree_trans *trans, enum btree_id id)
+static int __bch2_btree_root_alloc_fake(struct btree_trans *trans, enum btree_id id, unsigned level)
{
struct bch_fs *c = trans->c;
struct closure cl;
@@ -2410,7 +2526,7 @@ static int __bch2_btree_root_alloc(struct btree_trans *trans, enum btree_id id)
set_btree_node_fake(b);
set_btree_node_need_rewrite(b);
- b->c.level = 0;
+ b->c.level = level;
b->c.btree_id = id;
bkey_btree_ptr_init(&b->key);
@@ -2437,9 +2553,23 @@ static int __bch2_btree_root_alloc(struct btree_trans *trans, enum btree_id id)
return 0;
}
-void bch2_btree_root_alloc(struct bch_fs *c, enum btree_id id)
+void bch2_btree_root_alloc_fake(struct bch_fs *c, enum btree_id id, unsigned level)
+{
+ bch2_trans_run(c, __bch2_btree_root_alloc_fake(trans, id, level));
+}
+
+static void bch2_btree_update_to_text(struct printbuf *out, struct btree_update *as)
{
- bch2_trans_run(c, __bch2_btree_root_alloc(trans, id));
+ prt_printf(out, "%ps: btree=%s l=%u-%u watermark=%s mode=%s nodes_written=%u cl.remaining=%u journal_seq=%llu\n",
+ (void *) as->ip_started,
+ bch2_btree_id_str(as->btree_id),
+ as->update_level_start,
+ as->update_level_end,
+ bch2_watermarks[as->watermark],
+ bch2_btree_update_modes[as->mode],
+ as->nodes_written,
+ closure_nr_remaining(&as->cl),
+ as->journal.seq);
}
void bch2_btree_updates_to_text(struct printbuf *out, struct bch_fs *c)
@@ -2448,12 +2578,7 @@ void bch2_btree_updates_to_text(struct printbuf *out, struct bch_fs *c)
mutex_lock(&c->btree_interior_update_lock);
list_for_each_entry(as, &c->btree_interior_update_list, list)
- prt_printf(out, "%ps: mode=%u nodes_written=%u cl.remaining=%u journal_seq=%llu\n",
- (void *) as->ip_started,
- as->mode,
- as->nodes_written,
- closure_nr_remaining(&as->cl),
- as->journal.seq);
+ bch2_btree_update_to_text(out, as);
mutex_unlock(&c->btree_interior_update_lock);
}
diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h
index f651dd48aaa049..c1a479ebaad121 100644
--- a/fs/bcachefs/btree_update_interior.h
+++ b/fs/bcachefs/btree_update_interior.h
@@ -10,6 +10,20 @@
#define BTREE_UPDATE_JOURNAL_RES (BTREE_UPDATE_NODES_MAX * (BKEY_BTREE_PTR_U64s_MAX + 1))
+int bch2_btree_node_check_topology(struct btree_trans *, struct btree *);
+
+#define BTREE_UPDATE_MODES() \
+ x(none) \
+ x(node) \
+ x(root) \
+ x(update)
+
+enum btree_update_mode {
+#define x(n) BTREE_UPDATE_##n,
+ BTREE_UPDATE_MODES()
+#undef x
+};
+
/*
* Tracks an in progress split/rewrite of a btree node and the update to the
* parent node:
@@ -37,24 +51,19 @@ struct btree_update {
struct list_head list;
struct list_head unwritten_list;
- /* What kind of update are we doing? */
- enum {
- BTREE_INTERIOR_NO_UPDATE,
- BTREE_INTERIOR_UPDATING_NODE,
- BTREE_INTERIOR_UPDATING_ROOT,
- BTREE_INTERIOR_UPDATING_AS,
- } mode;
-
+ enum btree_update_mode mode;
+ enum bch_watermark watermark;
unsigned nodes_written:1;
unsigned took_gc_lock:1;
enum btree_id btree_id;
- unsigned update_level;
+ unsigned update_level_start;
+ unsigned update_level_end;
struct disk_reservation disk_res;
/*
- * BTREE_INTERIOR_UPDATING_NODE:
+ * BTREE_UPDATE_node:
* The update that made the new nodes visible was a regular update to an
* existing interior node - @b. We can't write out the update to @b
* until the new nodes we created are finished writing, so we block @b
@@ -163,7 +172,7 @@ int bch2_btree_node_update_key_get_iter(struct btree_trans *, struct btree *,
struct bkey_i *, unsigned, bool);
void bch2_btree_set_root_for_read(struct bch_fs *, struct btree *);
-void bch2_btree_root_alloc(struct bch_fs *, enum btree_id);
+void bch2_btree_root_alloc_fake(struct bch_fs *, enum btree_id, unsigned);
static inline unsigned btree_update_reserve_required(struct bch_fs *c,
struct btree *b)
diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c
index 5cbad8445782c4..36a6f42aba5e6f 100644
--- a/fs/bcachefs/btree_write_buffer.c
+++ b/fs/bcachefs/btree_write_buffer.c
@@ -11,6 +11,7 @@
#include "journal_reclaim.h"
#include <linux/prefetch.h>
+#include <linux/sort.h>
static int bch2_btree_write_buffer_journal_flush(struct journal *,
struct journal_entry_pin *, u64);
@@ -46,6 +47,14 @@ static inline bool wb_key_ref_cmp(const struct wb_key_ref *l, const struct wb_ke
#endif
}
+static int wb_key_seq_cmp(const void *_l, const void *_r)
+{
+ const struct btree_write_buffered_key *l = _l;
+ const struct btree_write_buffered_key *r = _r;
+
+ return cmp_int(l->journal_seq, r->journal_seq);
+}
+
/* Compare excluding idx, the low 24 bits: */
static inline bool wb_key_eq(const void *_l, const void *_r)
{
@@ -307,6 +316,16 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
bpos_gt(k->k.k.p, path->l[0].b->key.k.p)) {
bch2_btree_node_unlock_write(trans, path, path->l[0].b);
write_locked = false;
+
+ ret = lockrestart_do(trans,
+ bch2_btree_iter_traverse(&iter) ?:
+ bch2_foreground_maybe_merge(trans, iter.path, 0,
+ BCH_WATERMARK_reclaim|
+ BCH_TRANS_COMMIT_journal_reclaim|
+ BCH_TRANS_COMMIT_no_check_rw|
+ BCH_TRANS_COMMIT_no_enospc));
+ if (ret)
+ goto err;
}
}
@@ -357,6 +376,11 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
*/
trace_and_count(c, write_buffer_flush_slowpath, trans, slowpath, wb->flushing.keys.nr);
+ sort(wb->flushing.keys.data,
+ wb->flushing.keys.nr,
+ sizeof(wb->flushing.keys.data[0]),
+ wb_key_seq_cmp, NULL);
+
darray_for_each(wb->flushing.keys, i) {
if (!i->journal_seq)
continue;
@@ -368,10 +392,10 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
ret = commit_do(trans, NULL, NULL,
BCH_WATERMARK_reclaim|
+ BCH_TRANS_COMMIT_journal_reclaim|
BCH_TRANS_COMMIT_no_check_rw|
BCH_TRANS_COMMIT_no_enospc|
- BCH_TRANS_COMMIT_no_journal_res|
- BCH_TRANS_COMMIT_journal_reclaim,
+ BCH_TRANS_COMMIT_no_journal_res ,
btree_write_buffered_insert(trans, i));
if (ret)
goto err;
diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c
index 96edf2c34d433d..941401a210f569 100644
--- a/fs/bcachefs/buckets.c
+++ b/fs/bcachefs/buckets.c
@@ -525,6 +525,7 @@ int bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
"different types of data in same bucket: %s, %s",
bch2_data_type_str(g->data_type),
bch2_data_type_str(data_type))) {
+ BUG();
ret = -EIO;
goto err;
}
@@ -628,6 +629,7 @@ int bch2_check_bucket_ref(struct btree_trans *trans,
bch2_data_type_str(ptr_data_type),
(printbuf_reset(&buf),
bch2_bkey_val_to_text(&buf, c, k), buf.buf));
+ BUG();
ret = -EIO;
goto err;
}
@@ -815,14 +817,14 @@ static int __mark_pointer(struct btree_trans *trans,
static int bch2_trigger_pointer(struct btree_trans *trans,
enum btree_id btree_id, unsigned level,
struct bkey_s_c k, struct extent_ptr_decoded p,
- s64 *sectors,
- unsigned flags)
+ const union bch_extent_entry *entry,
+ s64 *sectors, unsigned flags)
{
bool insert = !(flags & BTREE_TRIGGER_OVERWRITE);
struct bpos bucket;
struct bch_backpointer bp;
- bch2_extent_ptr_to_bp(trans->c, btree_id, level, k, p, &bucket, &bp);
+ bch2_extent_ptr_to_bp(trans->c, btree_id, level, k, p, entry, &bucket, &bp);
*sectors = insert ? bp.bucket_len : -((s64) bp.bucket_len);
if (flags & BTREE_TRIGGER_TRANSACTIONAL) {
@@ -851,7 +853,7 @@ static int bch2_trigger_pointer(struct btree_trans *trans,
if (flags & BTREE_TRIGGER_GC) {
struct bch_fs *c = trans->c;
struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
- enum bch_data_type data_type = bkey_ptr_data_type(btree_id, level, k, p);
+ enum bch_data_type data_type = bch2_bkey_ptr_data_type(k, p, entry);
percpu_down_read(&c->mark_lock);
struct bucket *g = PTR_GC_BUCKET(ca, &p.ptr);
@@ -979,7 +981,7 @@ static int __trigger_extent(struct btree_trans *trans,
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
s64 disk_sectors;
- ret = bch2_trigger_pointer(trans, btree_id, level, k, p, &disk_sectors, flags);
+ ret = bch2_trigger_pointer(trans, btree_id, level, k, p, entry, &disk_sectors, flags);
if (ret < 0)
return ret;
diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h
index 6387e039f78975..f9af5adabe8363 100644
--- a/fs/bcachefs/buckets.h
+++ b/fs/bcachefs/buckets.h
@@ -226,6 +226,7 @@ static inline u64 bch2_dev_buckets_reserved(struct bch_dev *ca, enum bch_waterma
fallthrough;
case BCH_WATERMARK_btree_copygc:
case BCH_WATERMARK_reclaim:
+ case BCH_WATERMARK_interior_updates:
break;
}
@@ -394,14 +395,6 @@ static inline const char *bch2_data_type_str(enum bch_data_type type)
: "(invalid data type)";
}
-static inline void bch2_prt_data_type(struct printbuf *out, enum bch_data_type type)
-{
- if (type < BCH_DATA_NR)
- prt_str(out, __bch2_data_types[type]);
- else
- prt_printf(out, "(invalid data type %u)", type);
-}
-
/* disk reservations: */
static inline void bch2_disk_reservation_put(struct bch_fs *c,
diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c
index 38defa19d52d70..72781aad6ba70c 100644
--- a/fs/bcachefs/chardev.c
+++ b/fs/bcachefs/chardev.c
@@ -7,7 +7,7 @@
#include "chardev.h"
#include "journal.h"
#include "move.h"
-#include "recovery.h"
+#include "recovery_passes.h"
#include "replicas.h"
#include "super.h"
#include "super-io.h"
@@ -134,42 +134,38 @@ static long bch2_ioctl_incremental(struct bch_ioctl_incremental __user *user_arg
struct fsck_thread {
struct thread_with_stdio thr;
struct bch_fs *c;
- char **devs;
- size_t nr_devs;
struct bch_opts opts;
};
static void bch2_fsck_thread_exit(struct thread_with_stdio *_thr)
{
struct fsck_thread *thr = container_of(_thr, struct fsck_thread, thr);
- if (thr->devs)
- for (size_t i = 0; i < thr->nr_devs; i++)
- kfree(thr->devs[i]);
- kfree(thr->devs);
kfree(thr);
}
static int bch2_fsck_offline_thread_fn(struct thread_with_stdio *stdio)
{
struct fsck_thread *thr = container_of(stdio, struct fsck_thread, thr);
- struct bch_fs *c = bch2_fs_open(thr->devs, thr->nr_devs, thr->opts);
-
- if (IS_ERR(c))
- return PTR_ERR(c);
+ struct bch_fs *c = thr->c;
- int ret = 0;
- if (test_bit(BCH_FS_errors_fixed, &c->flags))
- ret |= 1;
- if (test_bit(BCH_FS_error, &c->flags))
- ret |= 4;
+ int ret = PTR_ERR_OR_ZERO(c);
+ if (ret)
+ return ret;
- bch2_fs_stop(c);
+ ret = bch2_fs_start(thr->c);
+ if (ret)
+ goto err;
- if (ret & 1)
+ if (test_bit(BCH_FS_errors_fixed, &c->flags)) {
bch2_stdio_redirect_printf(&stdio->stdio, false, "%s: errors fixed\n", c->name);
- if (ret & 4)
+ ret |= 1;
+ }
+ if (test_bit(BCH_FS_error, &c->flags)) {
bch2_stdio_redirect_printf(&stdio->stdio, false, "%s: still has errors\n", c->name);
-
+ ret |= 4;
+ }
+err:
+ bch2_fs_stop(c);
return ret;
}
@@ -182,7 +178,7 @@ static long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_a
{
struct bch_ioctl_fsck_offline arg;
struct fsck_thread *thr = NULL;
- u64 *devs = NULL;
+ darray_str(devs) = {};
long ret = 0;
if (copy_from_user(&arg, user_arg, sizeof(arg)))
@@ -194,29 +190,32 @@ static long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_a
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- if (!(devs = kcalloc(arg.nr_devs, sizeof(*devs), GFP_KERNEL)) ||
- !(thr = kzalloc(sizeof(*thr), GFP_KERNEL)) ||
- !(thr->devs = kcalloc(arg.nr_devs, sizeof(*thr->devs), GFP_KERNEL))) {
- ret = -ENOMEM;
- goto err;
- }
+ for (size_t i = 0; i < arg.nr_devs; i++) {
+ u64 dev_u64;
+ ret = copy_from_user_errcode(&dev_u64, &user_arg->devs[i], sizeof(u64));
+ if (ret)
+ goto err;
- thr->opts = bch2_opts_empty();
- thr->nr_devs = arg.nr_devs;
+ char *dev_str = strndup_user((char __user *)(unsigned long) dev_u64, PATH_MAX);
+ ret = PTR_ERR_OR_ZERO(dev_str);
+ if (ret)
+ goto err;
- if (copy_from_user(devs, &user_arg->devs[0],
- array_size(sizeof(user_arg->devs[0]), arg.nr_devs))) {
- ret = -EINVAL;
- goto err;
+ ret = darray_push(&devs, dev_str);
+ if (ret) {
+ kfree(dev_str);
+ goto err;
+ }
}
- for (size_t i = 0; i < arg.nr_devs; i++) {
- thr->devs[i] = strndup_user((char __user *)(unsigned long) devs[i], PATH_MAX);
- ret = PTR_ERR_OR_ZERO(thr->devs[i]);
- if (ret)
- goto err;
+ thr = kzalloc(sizeof(*thr), GFP_KERNEL);
+ if (!thr) {
+ ret = -ENOMEM;
+ goto err;
}
+ thr->opts = bch2_opts_empty();
+
if (arg.opts) {
char *optstr = strndup_user((char __user *)(unsigned long) arg.opts, 1 << 16);
@@ -230,15 +229,26 @@ static long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_a
opt_set(thr->opts, stdio, (u64)(unsigned long)&thr->thr.stdio);
+ /* We need request_key() to be called before we punt to kthread: */
+ opt_set(thr->opts, nostart, true);
+
+ thr->c = bch2_fs_open(devs.data, arg.nr_devs, thr->opts);
+
+ if (!IS_ERR(thr->c) &&
+ thr->c->opts.errors == BCH_ON_ERROR_panic)
+ thr->c->opts.errors = BCH_ON_ERROR_ro;
+
ret = bch2_run_thread_with_stdio(&thr->thr, &bch2_offline_fsck_ops);
-err:
- if (ret < 0) {
- if (thr)
- bch2_fsck_thread_exit(&thr->thr);
- pr_err("ret %s", bch2_err_str(ret));
- }
- kfree(devs);
+out:
+ darray_for_each(devs, i)
+ kfree(*i);
+ darray_exit(&devs);
return ret;
+err:
+ if (thr)
+ bch2_fsck_thread_exit(&thr->thr);
+ pr_err("ret %s", bch2_err_str(ret));
+ goto out;
}
static long bch2_global_ioctl(unsigned cmd, void __user *arg)
diff --git a/fs/bcachefs/checksum.c b/fs/bcachefs/checksum.c
index 4701457f6381ca..7ed779b411f61e 100644
--- a/fs/bcachefs/checksum.c
+++ b/fs/bcachefs/checksum.c
@@ -429,15 +429,20 @@ int bch2_rechecksum_bio(struct bch_fs *c, struct bio *bio,
extent_nonce(version, crc_old), bio);
if (bch2_crc_cmp(merged, crc_old.csum) && !c->opts.no_data_io) {
- bch_err(c, "checksum error in %s() (memory corruption or bug?)\n"
- "expected %0llx:%0llx got %0llx:%0llx (old type %s new type %s)",
- __func__,
- crc_old.csum.hi,
- crc_old.csum.lo,
- merged.hi,
- merged.lo,
- bch2_csum_types[crc_old.csum_type],
- bch2_csum_types[new_csum_type]);
+ struct printbuf buf = PRINTBUF;
+ prt_printf(&buf, "checksum error in %s() (memory corruption or bug?)\n"
+ "expected %0llx:%0llx got %0llx:%0llx (old type ",
+ __func__,
+ crc_old.csum.hi,
+ crc_old.csum.lo,
+ merged.hi,
+ merged.lo);
+ bch2_prt_csum_type(&buf, crc_old.csum_type);
+ prt_str(&buf, " new type ");
+ bch2_prt_csum_type(&buf, new_csum_type);
+ prt_str(&buf, ")");
+ bch_err(c, "%s", buf.buf);
+ printbuf_exit(&buf);
return -EIO;
}
diff --git a/fs/bcachefs/checksum.h b/fs/bcachefs/checksum.h
index 1b8c2c1016dc63..e40499fde9a401 100644
--- a/fs/bcachefs/checksum.h
+++ b/fs/bcachefs/checksum.h
@@ -61,11 +61,12 @@ static inline void bch2_csum_err_msg(struct printbuf *out,
struct bch_csum expected,
struct bch_csum got)
{
- prt_printf(out, "checksum error: got ");
+ prt_str(out, "checksum error, type ");
+ bch2_prt_csum_type(out, type);
+ prt_str(out, ": got ");
bch2_csum_to_text(out, type, got);
prt_str(out, " should be ");
bch2_csum_to_text(out, type, expected);
- prt_printf(out, " type %s", bch2_csum_types[type]);
}
int bch2_chacha_encrypt_key(struct bch_key *, struct nonce, void *, size_t);
diff --git a/fs/bcachefs/compress.h b/fs/bcachefs/compress.h
index 58c2eb45570ff0..607fd5e232c902 100644
--- a/fs/bcachefs/compress.h
+++ b/fs/bcachefs/compress.h
@@ -47,14 +47,6 @@ static inline enum bch_compression_type bch2_compression_opt_to_type(unsigned v)
return __bch2_compression_opt_to_type[bch2_compression_decode(v).type];
}
-static inline void bch2_prt_compression_type(struct printbuf *out, enum bch_compression_type type)
-{
- if (type < BCH_COMPRESSION_TYPE_NR)
- prt_str(out, __bch2_compression_types[type]);
- else
- prt_printf(out, "(invalid compression type %u)", type);
-}
-
int bch2_bio_uncompress_inplace(struct bch_fs *, struct bio *,
struct bch_extent_crc_unpacked *);
int bch2_bio_uncompress(struct bch_fs *, struct bio *, struct bio *,
diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c
index 4150feca42a2e6..0022b51ce3c09c 100644
--- a/fs/bcachefs/data_update.c
+++ b/fs/bcachefs/data_update.c
@@ -14,6 +14,7 @@
#include "move.h"
#include "nocow_locking.h"
#include "rebalance.h"
+#include "snapshot.h"
#include "subvolume.h"
#include "trace.h"
@@ -509,6 +510,14 @@ int bch2_data_update_init(struct btree_trans *trans,
unsigned ptrs_locked = 0;
int ret = 0;
+ /*
+ * fs is corrupt we have a key for a snapshot node that doesn't exist,
+ * and we have to check for this because we go rw before repairing the
+ * snapshots table - just skip it, we can move it later.
+ */
+ if (unlikely(k.k->p.snapshot && !bch2_snapshot_equiv(c, k.k->p.snapshot)))
+ return -BCH_ERR_data_update_done;
+
bch2_bkey_buf_init(&m->k);
bch2_bkey_buf_reassemble(&m->k, c, k);
m->btree_id = btree_id;
@@ -571,8 +580,7 @@ int bch2_data_update_init(struct btree_trans *trans,
move_ctxt_wait_event(ctxt,
(locked = bch2_bucket_nocow_trylock(&c->nocow_locks,
PTR_BUCKET_POS(c, &p.ptr), 0)) ||
- (!atomic_read(&ctxt->read_sectors) &&
- !atomic_read(&ctxt->write_sectors)));
+ list_empty(&ctxt->ios));
if (!locked)
bch2_bucket_nocow_lock(&c->nocow_locks,
@@ -590,6 +598,8 @@ int bch2_data_update_init(struct btree_trans *trans,
i++;
}
+ unsigned durability_required = max(0, (int) (io_opts.data_replicas - durability_have));
+
/*
* If current extent durability is less than io_opts.data_replicas,
* we're not trying to rereplicate the extent up to data_replicas here -
@@ -599,7 +609,7 @@ int bch2_data_update_init(struct btree_trans *trans,
* rereplicate, currently, so that users don't get an unexpected -ENOSPC
*/
if (!(m->data_opts.write_flags & BCH_WRITE_CACHED) &&
- durability_have >= io_opts.data_replicas) {
+ !durability_required) {
m->data_opts.kill_ptrs |= m->data_opts.rewrite_ptrs;
m->data_opts.rewrite_ptrs = 0;
/* if iter == NULL, it's just a promote */
@@ -608,11 +618,18 @@ int bch2_data_update_init(struct btree_trans *trans,
goto done;
}
- m->op.nr_replicas = min(durability_removing, io_opts.data_replicas - durability_have) +
+ m->op.nr_replicas = min(durability_removing, durability_required) +
m->data_opts.extra_replicas;
- m->op.nr_replicas_required = m->op.nr_replicas;
- BUG_ON(!m->op.nr_replicas);
+ /*
+ * If device(s) were set to durability=0 after data was written to them
+ * we can end up with a duribilty=0 extent, and the normal algorithm
+ * that tries not to increase durability doesn't work:
+ */
+ if (!(durability_have + durability_removing))
+ m->op.nr_replicas = max((unsigned) m->op.nr_replicas, 1);
+
+ m->op.nr_replicas_required = m->op.nr_replicas;
if (reserve_sectors) {
ret = bch2_disk_reservation_add(c, &m->op.res, reserve_sectors,
diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c
index 208ce6f0fc4317..cd99b739941447 100644
--- a/fs/bcachefs/debug.c
+++ b/fs/bcachefs/debug.c
@@ -13,6 +13,7 @@
#include "btree_iter.h"
#include "btree_locking.h"
#include "btree_update.h"
+#include "btree_update_interior.h"
#include "buckets.h"
#include "debug.h"
#include "error.h"
@@ -668,7 +669,7 @@ static ssize_t bch2_journal_pins_read(struct file *file, char __user *buf,
i->size = size;
i->ret = 0;
- do {
+ while (1) {
err = flush_buf(i);
if (err)
return err;
@@ -676,9 +677,12 @@ static ssize_t bch2_journal_pins_read(struct file *file, char __user *buf,
if (!i->size)
break;
+ if (done)
+ break;
+
done = bch2_journal_seq_pins_to_text(&i->buf, &c->journal, &i->iter);
i->iter++;
- } while (!done);
+ }
if (i->buf.allocation_failure)
return -ENOMEM;
@@ -693,13 +697,45 @@ static const struct file_operations journal_pins_ops = {
.read = bch2_journal_pins_read,
};
+static ssize_t bch2_btree_updates_read(struct file *file, char __user *buf,
+ size_t size, loff_t *ppos)
+{
+ struct dump_iter *i = file->private_data;
+ struct bch_fs *c = i->c;
+ int err;
+
+ i->ubuf = buf;
+ i->size = size;
+ i->ret = 0;
+
+ if (!i->iter) {
+ bch2_btree_updates_to_text(&i->buf, c);
+ i->iter++;
+ }
+
+ err = flush_buf(i);
+ if (err)
+ return err;
+
+ if (i->buf.allocation_failure)
+ return -ENOMEM;
+
+ return i->ret;
+}
+
+static const struct file_operations btree_updates_ops = {
+ .owner = THIS_MODULE,
+ .open = bch2_dump_open,
+ .release = bch2_dump_release,
+ .read = bch2_btree_updates_read,
+};
+
static int btree_transaction_stats_open(struct inode *inode, struct file *file)
{
struct bch_fs *c = inode->i_private;
struct dump_iter *i;
i = kzalloc(sizeof(struct dump_iter), GFP_KERNEL);
-
if (!i)
return -ENOMEM;
@@ -866,6 +902,20 @@ void bch2_fs_debug_exit(struct bch_fs *c)
debugfs_remove_recursive(c->fs_debug_dir);
}
+static void bch2_fs_debug_btree_init(struct bch_fs *c, struct btree_debug *bd)
+{
+ struct dentry *d;
+
+ d = debugfs_create_dir(bch2_btree_id_str(bd->id), c->btree_debug_dir);
+
+ debugfs_create_file("keys", 0400, d, bd, &btree_debug_ops);
+
+ debugfs_create_file("formats", 0400, d, bd, &btree_format_debug_ops);
+
+ debugfs_create_file("bfloat-failed", 0400, d, bd,
+ &bfloat_failed_debug_ops);
+}
+
void bch2_fs_debug_init(struct bch_fs *c)
{
struct btree_debug *bd;
@@ -888,6 +938,9 @@ void bch2_fs_debug_init(struct bch_fs *c)
debugfs_create_file("journal_pins", 0400, c->fs_debug_dir,
c->btree_debug, &journal_pins_ops);
+ debugfs_create_file("btree_updates", 0400, c->fs_debug_dir,
+ c->btree_debug, &btree_updates_ops);
+
debugfs_create_file("btree_transaction_stats", 0400, c->fs_debug_dir,
c, &btree_transaction_stats_op);
@@ -902,21 +955,7 @@ void bch2_fs_debug_init(struct bch_fs *c)
bd < c->btree_debug + ARRAY_SIZE(c->btree_debug);
bd++) {
bd->id = bd - c->btree_debug;
- debugfs_create_file(bch2_btree_id_str(bd->id),
- 0400, c->btree_debug_dir, bd,
- &btree_debug_ops);
-
- snprintf(name, sizeof(name), "%s-formats",
- bch2_btree_id_str(bd->id));
-
- debugfs_create_file(name, 0400, c->btree_debug_dir, bd,
- &btree_format_debug_ops);
-
- snprintf(name, sizeof(name), "%s-bfloat-failed",
- bch2_btree_id_str(bd->id));
-
- debugfs_create_file(name, 0400, c->btree_debug_dir, bd,
- &bfloat_failed_debug_ops);
+ bch2_fs_debug_btree_init(c, bd);
}
}
diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c
index 082075244e16ae..556a217108d32e 100644
--- a/fs/bcachefs/ec.c
+++ b/fs/bcachefs/ec.c
@@ -131,29 +131,33 @@ fsck_err:
void bch2_stripe_to_text(struct printbuf *out, struct bch_fs *c,
struct bkey_s_c k)
{
- const struct bch_stripe *s = bkey_s_c_to_stripe(k).v;
- unsigned i, nr_data = s->nr_blocks - s->nr_redundant;
+ const struct bch_stripe *sp = bkey_s_c_to_stripe(k).v;
+ struct bch_stripe s = {};
+
+ memcpy(&s, sp, min(sizeof(s), bkey_val_bytes(k.k)));
+
+ unsigned nr_data = s.nr_blocks - s.nr_redundant;
+
+ prt_printf(out, "algo %u sectors %u blocks %u:%u csum ",
+ s.algorithm,
+ le16_to_cpu(s.sectors),
+ nr_data,
+ s.nr_redundant);
+ bch2_prt_csum_type(out, s.csum_type);
+ prt_printf(out, " gran %u", 1U << s.csum_granularity_bits);
+
+ for (unsigned i = 0; i < s.nr_blocks; i++) {
+ const struct bch_extent_ptr *ptr = sp->ptrs + i;
+
+ if ((void *) ptr >= bkey_val_end(k))
+ break;
+
+ bch2_extent_ptr_to_text(out, c, ptr);
- prt_printf(out, "algo %u sectors %u blocks %u:%u csum %u gran %u",
- s->algorithm,
- le16_to_cpu(s->sectors),
- nr_data,
- s->nr_redundant,
- s->csum_type,
- 1U << s->csum_granularity_bits);
-
- for (i = 0; i < s->nr_blocks; i++) {
- const struct bch_extent_ptr *ptr = s->ptrs + i;
- struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
- u32 offset;
- u64 b = sector_to_bucket_and_offset(ca, ptr->offset, &offset);
-
- prt_printf(out, " %u:%llu:%u", ptr->dev, b, offset);
- if (i < nr_data)
- prt_printf(out, "#%u", stripe_blockcount_get(s, i));
- prt_printf(out, " gen %u", ptr->gen);
- if (ptr_stale(ca, ptr))
- prt_printf(out, " stale");
+ if (s.csum_type < BCH_CSUM_NR &&
+ i < nr_data &&
+ stripe_blockcount_offset(&s, i) < bkey_val_bytes(k.k))
+ prt_printf(out, "#%u", stripe_blockcount_get(sp, i));
}
}
@@ -607,10 +611,8 @@ static void ec_validate_checksums(struct bch_fs *c, struct ec_stripe_buf *buf)
struct printbuf err = PRINTBUF;
struct bch_dev *ca = bch_dev_bkey_exists(c, v->ptrs[i].dev);
- prt_printf(&err, "stripe checksum error: expected %0llx:%0llx got %0llx:%0llx (type %s)\n",
- want.hi, want.lo,
- got.hi, got.lo,
- bch2_csum_types[v->csum_type]);
+ prt_str(&err, "stripe ");
+ bch2_csum_err_msg(&err, v->csum_type, want, got);
prt_printf(&err, " for %ps at %u of\n ", (void *) _RET_IP_, i);
bch2_bkey_val_to_text(&err, c, bkey_i_to_s_c(&buf->key));
bch_err_ratelimited(ca, "%s", err.buf);
diff --git a/fs/bcachefs/ec.h b/fs/bcachefs/ec.h
index f4369b02e805f0..f042616888b0a1 100644
--- a/fs/bcachefs/ec.h
+++ b/fs/bcachefs/ec.h
@@ -32,6 +32,8 @@ static inline unsigned stripe_csums_per_device(const struct bch_stripe *s)
static inline unsigned stripe_csum_offset(const struct bch_stripe *s,
unsigned dev, unsigned csum_idx)
{
+ EBUG_ON(s->csum_type >= BCH_CSUM_NR);
+
unsigned csum_bytes = bch_crc_bytes[s->csum_type];
return sizeof(struct bch_stripe) +
diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h
index af25d8ec60f221..01a79fa3eacb21 100644
--- a/fs/bcachefs/errcode.h
+++ b/fs/bcachefs/errcode.h
@@ -252,7 +252,8 @@
x(BCH_ERR_nopromote, nopromote_in_flight) \
x(BCH_ERR_nopromote, nopromote_no_writes) \
x(BCH_ERR_nopromote, nopromote_enomem) \
- x(0, need_inode_lock)
+ x(0, need_inode_lock) \
+ x(0, invalid_snapshot_node)
enum bch_errcode {
BCH_ERR_START = 2048,
diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c
index 043431206799d8..82a6656c941c5f 100644
--- a/fs/bcachefs/error.c
+++ b/fs/bcachefs/error.c
@@ -1,7 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
#include "error.h"
-#include "recovery.h"
+#include "journal.h"
+#include "recovery_passes.h"
#include "super.h"
#include "thread_with_file.h"
@@ -16,7 +17,8 @@ bool bch2_inconsistent_error(struct bch_fs *c)
return false;
case BCH_ON_ERROR_ro:
if (bch2_fs_emergency_read_only(c))
- bch_err(c, "inconsistency detected - emergency read only");
+ bch_err(c, "inconsistency detected - emergency read only at journal seq %llu",
+ journal_cur_seq(&c->journal));
return true;
case BCH_ON_ERROR_panic:
panic(bch2_fmt(c, "panic after error"));
diff --git a/fs/bcachefs/error.h b/fs/bcachefs/error.h
index ae1d6674c512d4..36caedf72d89ab 100644
--- a/fs/bcachefs/error.h
+++ b/fs/bcachefs/error.h
@@ -32,6 +32,12 @@ bool bch2_inconsistent_error(struct bch_fs *);
int bch2_topology_error(struct bch_fs *);
+#define bch2_fs_topology_error(c, ...) \
+({ \
+ bch_err(c, "btree topology error: " __VA_ARGS__); \
+ bch2_topology_error(c); \
+})
+
#define bch2_fs_inconsistent(c, ...) \
({ \
bch_err(c, __VA_ARGS__); \
diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c
index 61395b113df9bd..1a331e53920485 100644
--- a/fs/bcachefs/extents.c
+++ b/fs/bcachefs/extents.c
@@ -189,13 +189,18 @@ int bch2_btree_ptr_v2_invalid(struct bch_fs *c, struct bkey_s_c k,
enum bkey_invalid_flags flags,
struct printbuf *err)
{
+ struct bkey_s_c_btree_ptr_v2 bp = bkey_s_c_to_btree_ptr_v2(k);
int ret = 0;
- bkey_fsck_err_on(bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX, c, err,
- btree_ptr_v2_val_too_big,
+ bkey_fsck_err_on(bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX,
+ c, err, btree_ptr_v2_val_too_big,
"value too big (%zu > %zu)",
bkey_val_u64s(k.k), BKEY_BTREE_PTR_VAL_U64s_MAX);
+ bkey_fsck_err_on(bpos_ge(bp.v->min_key, bp.k->p),
+ c, err, btree_ptr_v2_min_key_bad,
+ "min_key > key");
+
ret = bch2_bkey_ptrs_invalid(c, k, flags, err);
fsck_err:
return ret;
@@ -973,6 +978,33 @@ bool bch2_extent_normalize(struct bch_fs *c, struct bkey_s k)
return bkey_deleted(k.k);
}
+void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *c, const struct bch_extent_ptr *ptr)
+{
+ struct bch_dev *ca = c && ptr->dev < c->sb.nr_devices && c->devs[ptr->dev]
+ ? bch_dev_bkey_exists(c, ptr->dev)
+ : NULL;
+
+ if (!ca) {
+ prt_printf(out, "ptr: %u:%llu gen %u%s", ptr->dev,
+ (u64) ptr->offset, ptr->gen,
+ ptr->cached ? " cached" : "");
+ } else {
+ u32 offset;
+ u64 b = sector_to_bucket_and_offset(ca, ptr->offset, &offset);
+
+ prt_printf(out, "ptr: %u:%llu:%u gen %u",
+ ptr->dev, b, offset, ptr->gen);
+ if (ptr->cached)
+ prt_str(out, " cached");
+ if (ptr->unwritten)
+ prt_str(out, " unwritten");
+ if (b >= ca->mi.first_bucket &&
+ b < ca->mi.nbuckets &&
+ ptr_stale(ca, ptr))
+ prt_printf(out, " stale");
+ }
+}
+
void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
struct bkey_s_c k)
{
@@ -988,42 +1020,22 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
prt_printf(out, " ");
switch (__extent_entry_type(entry)) {
- case BCH_EXTENT_ENTRY_ptr: {
- const struct bch_extent_ptr *ptr = entry_to_ptr(entry);
- struct bch_dev *ca = c && ptr->dev < c->sb.nr_devices && c->devs[ptr->dev]
- ? bch_dev_bkey_exists(c, ptr->dev)
- : NULL;
-
- if (!ca) {
- prt_printf(out, "ptr: %u:%llu gen %u%s", ptr->dev,
- (u64) ptr->offset, ptr->gen,
- ptr->cached ? " cached" : "");
- } else {
- u32 offset;
- u64 b = sector_to_bucket_and_offset(ca, ptr->offset, &offset);
-
- prt_printf(out, "ptr: %u:%llu:%u gen %u",
- ptr->dev, b, offset, ptr->gen);
- if (ptr->cached)
- prt_str(out, " cached");
- if (ptr->unwritten)
- prt_str(out, " unwritten");
- if (ca && ptr_stale(ca, ptr))
- prt_printf(out, " stale");
- }
+ case BCH_EXTENT_ENTRY_ptr:
+ bch2_extent_ptr_to_text(out, c, entry_to_ptr(entry));
break;
- }
+
case BCH_EXTENT_ENTRY_crc32:
case BCH_EXTENT_ENTRY_crc64:
case BCH_EXTENT_ENTRY_crc128: {
struct bch_extent_crc_unpacked crc =
bch2_extent_crc_unpack(k.k, entry_to_crc(entry));
- prt_printf(out, "crc: c_size %u size %u offset %u nonce %u csum %s compress ",
+ prt_printf(out, "crc: c_size %u size %u offset %u nonce %u csum ",
crc.compressed_size,
crc.uncompressed_size,
- crc.offset, crc.nonce,
- bch2_csum_types[crc.csum_type]);
+ crc.offset, crc.nonce);
+ bch2_prt_csum_type(out, crc.csum_type);
+ prt_str(out, " compress ");
bch2_prt_compression_type(out, crc.compression_type);
break;
}
diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h
index fd2669cdd76f3b..528e817eacbdad 100644
--- a/fs/bcachefs/extents.h
+++ b/fs/bcachefs/extents.h
@@ -596,30 +596,6 @@ static inline struct bch_devs_list bch2_bkey_cached_devs(struct bkey_s_c k)
return ret;
}
-static inline unsigned bch2_bkey_ptr_data_type(struct bkey_s_c k, const struct bch_extent_ptr *ptr)
-{
- switch (k.k->type) {
- case KEY_TYPE_btree_ptr:
- case KEY_TYPE_btree_ptr_v2:
- return BCH_DATA_btree;
- case KEY_TYPE_extent:
- case KEY_TYPE_reflink_v:
- return BCH_DATA_user;
- case KEY_TYPE_stripe: {
- struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k);
-
- BUG_ON(ptr < s.v->ptrs ||
- ptr >= s.v->ptrs + s.v->nr_blocks);
-
- return ptr >= s.v->ptrs + s.v->nr_blocks - s.v->nr_redundant
- ? BCH_DATA_parity
- : BCH_DATA_user;
- }
- default:
- BUG();
- }
-}
-
unsigned bch2_bkey_nr_ptrs(struct bkey_s_c);
unsigned bch2_bkey_nr_ptrs_allocated(struct bkey_s_c);
unsigned bch2_bkey_nr_ptrs_fully_allocated(struct bkey_s_c);
@@ -700,6 +676,7 @@ bch2_extent_has_ptr(struct bkey_s_c, struct extent_ptr_decoded, struct bkey_s);
void bch2_extent_ptr_set_cached(struct bkey_s, struct bch_extent_ptr *);
bool bch2_extent_normalize(struct bch_fs *, struct bkey_s);
+void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *, const struct bch_extent_ptr *);
void bch2_bkey_ptrs_to_text(struct printbuf *, struct bch_fs *,
struct bkey_s_c);
int bch2_bkey_ptrs_invalid(struct bch_fs *, struct bkey_s_c,
diff --git a/fs/bcachefs/eytzinger.c b/fs/bcachefs/eytzinger.c
new file mode 100644
index 00000000000000..0f955c3c76a7bc
--- /dev/null
+++ b/fs/bcachefs/eytzinger.c
@@ -0,0 +1,234 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "eytzinger.h"
+
+/**
+ * is_aligned - is this pointer & size okay for word-wide copying?
+ * @base: pointer to data
+ * @size: size of each element
+ * @align: required alignment (typically 4 or 8)
+ *
+ * Returns true if elements can be copied using word loads and stores.
+ * The size must be a multiple of the alignment, and the base address must
+ * be if we do not have CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS.
+ *
+ * For some reason, gcc doesn't know to optimize "if (a & mask || b & mask)"
+ * to "if ((a | b) & mask)", so we do that by hand.
+ */
+__attribute_const__ __always_inline
+static bool is_aligned(const void *base, size_t size, unsigned char align)
+{
+ unsigned char lsbits = (unsigned char)size;
+
+ (void)base;
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+ lsbits |= (unsigned char)(uintptr_t)base;
+#endif
+ return (lsbits & (align - 1)) == 0;
+}
+
+/**
+ * swap_words_32 - swap two elements in 32-bit chunks
+ * @a: pointer to the first element to swap
+ * @b: pointer to the second element to swap
+ * @n: element size (must be a multiple of 4)
+ *
+ * Exchange the two objects in memory. This exploits base+index addressing,
+ * which basically all CPUs have, to minimize loop overhead computations.
+ *
+ * For some reason, on x86 gcc 7.3.0 adds a redundant test of n at the
+ * bottom of the loop, even though the zero flag is still valid from the
+ * subtract (since the intervening mov instructions don't alter the flags).
+ * Gcc 8.1.0 doesn't have that problem.
+ */
+static void swap_words_32(void *a, void *b, size_t n)
+{
+ do {
+ u32 t = *(u32 *)(a + (n -= 4));
+ *(u32 *)(a + n) = *(u32 *)(b + n);
+ *(u32 *)(b + n) = t;
+ } while (n);
+}
+
+/**
+ * swap_words_64 - swap two elements in 64-bit chunks
+ * @a: pointer to the first element to swap
+ * @b: pointer to the second element to swap
+ * @n: element size (must be a multiple of 8)
+ *
+ * Exchange the two objects in memory. This exploits base+index
+ * addressing, which basically all CPUs have, to minimize loop overhead
+ * computations.
+ *
+ * We'd like to use 64-bit loads if possible. If they're not, emulating
+ * one requires base+index+4 addressing which x86 has but most other
+ * processors do not. If CONFIG_64BIT, we definitely have 64-bit loads,
+ * but it's possible to have 64-bit loads without 64-bit pointers (e.g.
+ * x32 ABI). Are there any cases the kernel needs to worry about?
+ */
+static void swap_words_64(void *a, void *b, size_t n)
+{
+ do {
+#ifdef CONFIG_64BIT
+ u64 t = *(u64 *)(a + (n -= 8));
+ *(u64 *)(a + n) = *(u64 *)(b + n);
+ *(u64 *)(b + n) = t;
+#else
+ /* Use two 32-bit transfers to avoid base+index+4 addressing */
+ u32 t = *(u32 *)(a + (n -= 4));
+ *(u32 *)(a + n) = *(u32 *)(b + n);
+ *(u32 *)(b + n) = t;
+
+ t = *(u32 *)(a + (n -= 4));
+ *(u32 *)(a + n) = *(u32 *)(b + n);
+ *(u32 *)(b + n) = t;
+#endif
+ } while (n);
+}
+
+/**
+ * swap_bytes - swap two elements a byte at a time
+ * @a: pointer to the first element to swap
+ * @b: pointer to the second element to swap
+ * @n: element size
+ *
+ * This is the fallback if alignment doesn't allow using larger chunks.
+ */
+static void swap_bytes(void *a, void *b, size_t n)
+{
+ do {
+ char t = ((char *)a)[--n];
+ ((char *)a)[n] = ((char *)b)[n];
+ ((char *)b)[n] = t;
+ } while (n);
+}
+
+/*
+ * The values are arbitrary as long as they can't be confused with
+ * a pointer, but small integers make for the smallest compare
+ * instructions.
+ */
+#define SWAP_WORDS_64 (swap_r_func_t)0
+#define SWAP_WORDS_32 (swap_r_func_t)1
+#define SWAP_BYTES (swap_r_func_t)2
+#define SWAP_WRAPPER (swap_r_func_t)3
+
+struct wrapper {
+ cmp_func_t cmp;
+ swap_func_t swap_func;
+};
+
+/*
+ * The function pointer is last to make tail calls most efficient if the
+ * compiler decides not to inline this function.
+ */
+static void do_swap(void *a, void *b, size_t size, swap_r_func_t swap_func, const void *priv)
+{
+ if (swap_func == SWAP_WRAPPER) {
+ ((const struct wrapper *)priv)->swap_func(a, b, (int)size);
+ return;
+ }
+
+ if (swap_func == SWAP_WORDS_64)
+ swap_words_64(a, b, size);
+ else if (swap_func == SWAP_WORDS_32)
+ swap_words_32(a, b, size);
+ else if (swap_func == SWAP_BYTES)
+ swap_bytes(a, b, size);
+ else
+ swap_func(a, b, (int)size, priv);
+}
+
+#define _CMP_WRAPPER ((cmp_r_func_t)0L)
+
+static int do_cmp(const void *a, const void *b, cmp_r_func_t cmp, const void *priv)
+{
+ if (cmp == _CMP_WRAPPER)
+ return ((const struct wrapper *)priv)->cmp(a, b);
+ return cmp(a, b, priv);
+}
+
+static inline int eytzinger0_do_cmp(void *base, size_t n, size_t size,
+ cmp_r_func_t cmp_func, const void *priv,
+ size_t l, size_t r)
+{
+ return do_cmp(base + inorder_to_eytzinger0(l, n) * size,
+ base + inorder_to_eytzinger0(r, n) * size,
+ cmp_func, priv);
+}
+
+static inline void eytzinger0_do_swap(void *base, size_t n, size_t size,
+ swap_r_func_t swap_func, const void *priv,
+ size_t l, size_t r)
+{
+ do_swap(base + inorder_to_eytzinger0(l, n) * size,
+ base + inorder_to_eytzinger0(r, n) * size,
+ size, swap_func, priv);
+}
+
+void eytzinger0_sort_r(void *base, size_t n, size_t size,
+ cmp_r_func_t cmp_func,
+ swap_r_func_t swap_func,
+ const void *priv)
+{
+ int i, c, r;
+
+ /* called from 'sort' without swap function, let's pick the default */
+ if (swap_func == SWAP_WRAPPER && !((struct wrapper *)priv)->swap_func)
+ swap_func = NULL;
+
+ if (!swap_func) {
+ if (is_aligned(base, size, 8))
+ swap_func = SWAP_WORDS_64;
+ else if (is_aligned(base, size, 4))
+ swap_func = SWAP_WORDS_32;
+ else
+ swap_func = SWAP_BYTES;
+ }
+
+ /* heapify */
+ for (i = n / 2 - 1; i >= 0; --i) {
+ for (r = i; r * 2 + 1 < n; r = c) {
+ c = r * 2 + 1;
+
+ if (c + 1 < n &&
+ eytzinger0_do_cmp(base, n, size, cmp_func, priv, c, c + 1) < 0)
+ c++;
+
+ if (eytzinger0_do_cmp(base, n, size, cmp_func, priv, r, c) >= 0)
+ break;
+
+ eytzinger0_do_swap(base, n, size, swap_func, priv, r, c);
+ }
+ }
+
+ /* sort */
+ for (i = n - 1; i > 0; --i) {
+ eytzinger0_do_swap(base, n, size, swap_func, priv, 0, i);
+
+ for (r = 0; r * 2 + 1 < i; r = c) {
+ c = r * 2 + 1;
+
+ if (c + 1 < i &&
+ eytzinger0_do_cmp(base, n, size, cmp_func, priv, c, c + 1) < 0)
+ c++;
+
+ if (eytzinger0_do_cmp(base, n, size, cmp_func, priv, r, c) >= 0)
+ break;
+
+ eytzinger0_do_swap(base, n, size, swap_func, priv, r, c);
+ }
+ }
+}
+
+void eytzinger0_sort(void *base, size_t n, size_t size,
+ cmp_func_t cmp_func,
+ swap_func_t swap_func)
+{
+ struct wrapper w = {
+ .cmp = cmp_func,
+ .swap_func = swap_func,
+ };
+
+ return eytzinger0_sort_r(base, n, size, _CMP_WRAPPER, SWAP_WRAPPER, &w);
+}
diff --git a/fs/bcachefs/eytzinger.h b/fs/bcachefs/eytzinger.h
index b04750dbf870bc..24840aee335c0f 100644
--- a/fs/bcachefs/eytzinger.h
+++ b/fs/bcachefs/eytzinger.h
@@ -5,23 +5,33 @@
#include <linux/bitops.h>
#include <linux/log2.h>
-#include "util.h"
+#ifdef EYTZINGER_DEBUG
+#define EYTZINGER_BUG_ON(cond) BUG_ON(cond)
+#else
+#define EYTZINGER_BUG_ON(cond)
+#endif
/*
* Traversal for trees in eytzinger layout - a full binary tree layed out in an
- * array
- */
-
-/*
- * One based indexing version:
+ * array.
+ *
+ * Consider using an eytzinger tree any time you would otherwise be doing binary
+ * search over an array. Binary search is a worst case scenario for branch
+ * prediction and prefetching, but in an eytzinger tree every node's children
+ * are adjacent in memory, thus we can prefetch children before knowing the
+ * result of the comparison, assuming multiple nodes fit on a cacheline.
*
- * With one based indexing each level of the tree starts at a power of two -
- * good for cacheline alignment:
+ * Two variants are provided, for one based indexing and zero based indexing.
+ *
+ * Zero based indexing is more convenient, but one based indexing has better
+ * alignment and thus better performance because each new level of the tree
+ * starts at a power of two, and thus if element 0 was cacheline aligned, each
+ * new level will be as well.
*/
static inline unsigned eytzinger1_child(unsigned i, unsigned child)
{
- EBUG_ON(child > 1);
+ EYTZINGER_BUG_ON(child > 1);
return (i << 1) + child;
}
@@ -58,7 +68,7 @@ static inline unsigned eytzinger1_last(unsigned size)
static inline unsigned eytzinger1_next(unsigned i, unsigned size)
{
- EBUG_ON(i > size);
+ EYTZINGER_BUG_ON(i > size);
if (eytzinger1_right_child(i) <= size) {
i = eytzinger1_right_child(i);
@@ -74,7 +84,7 @@ static inline unsigned eytzinger1_next(unsigned i, unsigned size)
static inline unsigned eytzinger1_prev(unsigned i, unsigned size)
{
- EBUG_ON(i > size);
+ EYTZINGER_BUG_ON(i > size);
if (eytzinger1_left_child(i) <= size) {
i = eytzinger1_left_child(i) + 1;
@@ -101,7 +111,7 @@ static inline unsigned __eytzinger1_to_inorder(unsigned i, unsigned size,
unsigned shift = __fls(size) - b;
int s;
- EBUG_ON(!i || i > size);
+ EYTZINGER_BUG_ON(!i || i > size);
i ^= 1U << b;
i <<= 1;
@@ -126,7 +136,7 @@ static inline unsigned __inorder_to_eytzinger1(unsigned i, unsigned size,
unsigned shift;
int s;
- EBUG_ON(!i || i > size);
+ EYTZINGER_BUG_ON(!i || i > size);
/*
* sign bit trick:
@@ -164,7 +174,7 @@ static inline unsigned inorder_to_eytzinger1(unsigned i, unsigned size)
static inline unsigned eytzinger0_child(unsigned i, unsigned child)
{
- EBUG_ON(child > 1);
+ EYTZINGER_BUG_ON(child > 1);
return (i << 1) + 1 + child;
}
@@ -231,11 +241,9 @@ static inline unsigned inorder_to_eytzinger0(unsigned i, unsigned size)
(_i) != -1; \
(_i) = eytzinger0_next((_i), (_size)))
-typedef int (*eytzinger_cmp_fn)(const void *l, const void *r, size_t size);
-
/* return greatest node <= @search, or -1 if not found */
-static inline ssize_t eytzinger0_find_le(void *base, size_t nr, size_t size,
- eytzinger_cmp_fn cmp, const void *search)
+static inline int eytzinger0_find_le(void *base, size_t nr, size_t size,
+ cmp_func_t cmp, const void *search)
{
unsigned i, n = 0;
@@ -244,21 +252,38 @@ static inline ssize_t eytzinger0_find_le(void *base, size_t nr, size_t size,
do {
i = n;
- n = eytzinger0_child(i, cmp(search, base + i * size, size) >= 0);
+ n = eytzinger0_child(i, cmp(base + i * size, search) <= 0);
} while (n < nr);
if (n & 1) {
- /* @i was greater than @search, return previous node: */
-
- if (i == eytzinger0_first(nr))
- return -1;
-
+ /*
+ * @i was greater than @search, return previous node:
+ *
+ * if @i was leftmost/smallest element,
+ * eytzinger0_prev(eytzinger0_first())) returns -1, as expected
+ */
return eytzinger0_prev(i, nr);
} else {
return i;
}
}
+static inline int eytzinger0_find_gt(void *base, size_t nr, size_t size,
+ cmp_func_t cmp, const void *search)
+{
+ ssize_t idx = eytzinger0_find_le(base, nr, size, cmp, search);
+
+ /*
+ * if eytitzinger0_find_le() returned -1 - no element was <= search - we
+ * want to return the first element; next/prev identities mean this work
+ * as expected
+ *
+ * similarly if find_le() returns last element, we should return -1;
+ * identities mean this all works out:
+ */
+ return eytzinger0_next(idx, nr);
+}
+
#define eytzinger0_find(base, nr, size, _cmp, search) \
({ \
void *_base = (base); \
@@ -269,13 +294,13 @@ static inline ssize_t eytzinger0_find_le(void *base, size_t nr, size_t size,
int _res; \
\
while (_i < _nr && \
- (_res = _cmp(_search, _base + _i * _size, _size))) \
+ (_res = _cmp(_search, _base + _i * _size))) \
_i = eytzinger0_child(_i, _res > 0); \
_i; \
})
-void eytzinger0_sort(void *, size_t, size_t,
- int (*cmp_func)(const void *, const void *, size_t),
- void (*swap_func)(void *, void *, size_t));
+void eytzinger0_sort_r(void *, size_t, size_t,
+ cmp_r_func_t, swap_r_func_t, const void *);
+void eytzinger0_sort(void *, size_t, size_t, cmp_func_t, swap_func_t);
#endif /* _EYTZINGER_H */
diff --git a/fs/bcachefs/fs-io-direct.c b/fs/bcachefs/fs-io-direct.c
index 33cb6da3a5ad28..b889370a508811 100644
--- a/fs/bcachefs/fs-io-direct.c
+++ b/fs/bcachefs/fs-io-direct.c
@@ -387,6 +387,8 @@ static __always_inline long bch2_dio_write_done(struct dio_write *dio)
ret = dio->op.error ?: ((long) dio->written << 9);
bio_put(&dio->op.wbio.bio);
+ bch2_write_ref_put(dio->op.c, BCH_WRITE_REF_dio_write);
+
/* inode->i_dio_count is our ref on inode and thus bch_fs */
inode_dio_end(&inode->v);
@@ -536,7 +538,7 @@ static __always_inline long bch2_dio_write_loop(struct dio_write *dio)
if (likely(!dio->iter.count) || dio->op.error)
break;
- bio_reset(bio, NULL, REQ_OP_WRITE);
+ bio_reset(bio, NULL, REQ_OP_WRITE | REQ_SYNC | REQ_IDLE);
}
out:
return bch2_dio_write_done(dio);
@@ -590,22 +592,25 @@ ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter)
prefetch(&inode->ei_inode);
prefetch((void *) &inode->ei_inode + 64);
+ if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_dio_write))
+ return -EROFS;
+
inode_lock(&inode->v);
ret = generic_write_checks(req, iter);
if (unlikely(ret <= 0))
- goto err;
+ goto err_put_write_ref;
ret = file_remove_privs(file);
if (unlikely(ret))
- goto err;
+ goto err_put_write_ref;
ret = file_update_time(file);
if (unlikely(ret))
- goto err;
+ goto err_put_write_ref;
if (unlikely((req->ki_pos|iter->count) & (block_bytes(c) - 1)))
- goto err;
+ goto err_put_write_ref;
inode_dio_begin(&inode->v);
bch2_pagecache_block_get(inode);
@@ -618,7 +623,7 @@ ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter)
bio = bio_alloc_bioset(NULL,
bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS),
- REQ_OP_WRITE,
+ REQ_OP_WRITE | REQ_SYNC | REQ_IDLE,
GFP_KERNEL,
&c->dio_write_bioset);
dio = container_of(bio, struct dio_write, op.wbio.bio);
@@ -645,7 +650,7 @@ ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter)
}
ret = bch2_dio_write_loop(dio);
-err:
+out:
if (locked)
inode_unlock(&inode->v);
return ret;
@@ -653,7 +658,9 @@ err_put_bio:
bch2_pagecache_block_put(inode);
bio_put(bio);
inode_dio_end(&inode->v);
- goto err;
+err_put_write_ref:
+ bch2_write_ref_put(c, BCH_WRITE_REF_dio_write);
+ goto out;
}
void bch2_fs_fs_io_direct_exit(struct bch_fs *c)
diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c
index 8c70123b6a0c80..20b40477425f49 100644
--- a/fs/bcachefs/fs-io.c
+++ b/fs/bcachefs/fs-io.c
@@ -174,18 +174,18 @@ void __bch2_i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode,
static int bch2_flush_inode(struct bch_fs *c,
struct bch_inode_info *inode)
{
- struct bch_inode_unpacked u;
- int ret;
-
if (c->opts.journal_flush_disabled)
return 0;
- ret = bch2_inode_find_by_inum(c, inode_inum(inode), &u);
- if (ret)
- return ret;
+ if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_fsync))
+ return -EROFS;
- return bch2_journal_flush_seq(&c->journal, u.bi_journal_seq) ?:
- bch2_inode_flush_nocow_writes(c, inode);
+ struct bch_inode_unpacked u;
+ int ret = bch2_inode_find_by_inum(c, inode_inum(inode), &u) ?:
+ bch2_journal_flush_seq(&c->journal, u.bi_journal_seq) ?:
+ bch2_inode_flush_nocow_writes(c, inode);
+ bch2_write_ref_put(c, BCH_WRITE_REF_fsync);
+ return ret;
}
int bch2_fsync(struct file *file, loff_t start, loff_t end, int datasync)
diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c
index 0ccee05f6887b3..b5ea9fa1259d14 100644
--- a/fs/bcachefs/fs.c
+++ b/fs/bcachefs/fs.c
@@ -1997,6 +1997,7 @@ out:
return dget(sb->s_root);
err_put_super:
+ __bch2_fs_stop(c);
deactivate_locked_super(sb);
return ERR_PTR(bch2_err_class(ret));
}
diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c
index 47d4eefaba7ba0..8e2010212cc371 100644
--- a/fs/bcachefs/fsck.c
+++ b/fs/bcachefs/fsck.c
@@ -12,7 +12,7 @@
#include "fsck.h"
#include "inode.h"
#include "keylist.h"
-#include "recovery.h"
+#include "recovery_passes.h"
#include "snapshot.h"
#include "super.h"
#include "xattr.h"
@@ -63,9 +63,7 @@ static int subvol_lookup(struct btree_trans *trans, u32 subvol,
u32 *snapshot, u64 *inum)
{
struct bch_subvolume s;
- int ret;
-
- ret = bch2_subvolume_get(trans, subvol, false, 0, &s);
+ int ret = bch2_subvolume_get(trans, subvol, false, 0, &s);
*snapshot = le32_to_cpu(s.snapshot);
*inum = le64_to_cpu(s.inode);
@@ -158,9 +156,10 @@ static int __remove_dirent(struct btree_trans *trans, struct bpos pos)
bch2_trans_iter_init(trans, &iter, BTREE_ID_dirents, pos, BTREE_ITER_INTENT);
- ret = bch2_hash_delete_at(trans, bch2_dirent_hash_desc,
- &dir_hash_info, &iter,
- BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
+ ret = bch2_btree_iter_traverse(&iter) ?:
+ bch2_hash_delete_at(trans, bch2_dirent_hash_desc,
+ &dir_hash_info, &iter,
+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
bch2_trans_iter_exit(trans, &iter);
err:
bch_err_fn(c, ret);
@@ -169,7 +168,8 @@ err:
/* Get lost+found, create if it doesn't exist: */
static int lookup_lostfound(struct btree_trans *trans, u32 snapshot,
- struct bch_inode_unpacked *lostfound)
+ struct bch_inode_unpacked *lostfound,
+ u64 reattaching_inum)
{
struct bch_fs *c = trans->c;
struct qstr lostfound_str = QSTR("lost+found");
@@ -184,19 +184,36 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot,
return ret;
subvol_inum root_inum = { .subvol = le32_to_cpu(st.master_subvol) };
- u32 subvol_snapshot;
- ret = subvol_lookup(trans, le32_to_cpu(st.master_subvol),
- &subvol_snapshot, &root_inum.inum);
- bch_err_msg(c, ret, "looking up root subvol");
+ struct bch_subvolume subvol;
+ ret = bch2_subvolume_get(trans, le32_to_cpu(st.master_subvol),
+ false, 0, &subvol);
+ bch_err_msg(c, ret, "looking up root subvol %u for snapshot %u",
+ le32_to_cpu(st.master_subvol), snapshot);
if (ret)
return ret;
+ if (!subvol.inode) {
+ struct btree_iter iter;
+ struct bkey_i_subvolume *subvol = bch2_bkey_get_mut_typed(trans, &iter,
+ BTREE_ID_subvolumes, POS(0, le32_to_cpu(st.master_subvol)),
+ 0, subvolume);
+ ret = PTR_ERR_OR_ZERO(subvol);
+ if (ret)
+ return ret;
+
+ subvol->v.inode = cpu_to_le64(reattaching_inum);
+ bch2_trans_iter_exit(trans, &iter);
+ }
+
+ root_inum.inum = le64_to_cpu(subvol.inode);
+
struct bch_inode_unpacked root_inode;
struct bch_hash_info root_hash_info;
u32 root_inode_snapshot = snapshot;
ret = lookup_inode(trans, root_inum.inum, &root_inode, &root_inode_snapshot);
- bch_err_msg(c, ret, "looking up root inode");
+ bch_err_msg(c, ret, "looking up root inode %llu for subvol %u",
+ root_inum.inum, le32_to_cpu(st.master_subvol));
if (ret)
return ret;
@@ -292,7 +309,7 @@ static int reattach_inode(struct btree_trans *trans,
snprintf(name_buf, sizeof(name_buf), "%llu", inode->bi_inum);
}
- ret = lookup_lostfound(trans, dirent_snapshot, &lostfound);
+ ret = lookup_lostfound(trans, dirent_snapshot, &lostfound, inode->bi_inum);
if (ret)
return ret;
@@ -363,6 +380,112 @@ static int reattach_subvol(struct btree_trans *trans, struct bkey_s_c_subvolume
return ret;
}
+static int reconstruct_subvol(struct btree_trans *trans, u32 snapshotid, u32 subvolid, u64 inum)
+{
+ struct bch_fs *c = trans->c;
+
+ if (!bch2_snapshot_is_leaf(c, snapshotid)) {
+ bch_err(c, "need to reconstruct subvol, but have interior node snapshot");
+ return -BCH_ERR_fsck_repair_unimplemented;
+ }
+
+ /*
+ * If inum isn't set, that means we're being called from check_dirents,
+ * not check_inodes - the root of this subvolume doesn't exist or we
+ * would have found it there:
+ */
+ if (!inum) {
+ struct btree_iter inode_iter = {};
+ struct bch_inode_unpacked new_inode;
+ u64 cpu = raw_smp_processor_id();
+
+ bch2_inode_init_early(c, &new_inode);
+ bch2_inode_init_late(&new_inode, bch2_current_time(c), 0, 0, S_IFDIR|0755, 0, NULL);
+
+ new_inode.bi_subvol = subvolid;
+
+ int ret = bch2_inode_create(trans, &inode_iter, &new_inode, snapshotid, cpu) ?:
+ bch2_btree_iter_traverse(&inode_iter) ?:
+ bch2_inode_write(trans, &inode_iter, &new_inode);
+ bch2_trans_iter_exit(trans, &inode_iter);
+ if (ret)
+ return ret;
+
+ inum = new_inode.bi_inum;
+ }
+
+ bch_info(c, "reconstructing subvol %u with root inode %llu", subvolid, inum);
+
+ struct bkey_i_subvolume *new_subvol = bch2_trans_kmalloc(trans, sizeof(*new_subvol));
+ int ret = PTR_ERR_OR_ZERO(new_subvol);
+ if (ret)
+ return ret;
+
+ bkey_subvolume_init(&new_subvol->k_i);
+ new_subvol->k.p.offset = subvolid;
+ new_subvol->v.snapshot = cpu_to_le32(snapshotid);
+ new_subvol->v.inode = cpu_to_le64(inum);
+ ret = bch2_btree_insert_trans(trans, BTREE_ID_subvolumes, &new_subvol->k_i, 0);
+ if (ret)
+ return ret;
+
+ struct btree_iter iter;
+ struct bkey_i_snapshot *s = bch2_bkey_get_mut_typed(trans, &iter,
+ BTREE_ID_snapshots, POS(0, snapshotid),
+ 0, snapshot);
+ ret = PTR_ERR_OR_ZERO(s);
+ bch_err_msg(c, ret, "getting snapshot %u", snapshotid);
+ if (ret)
+ return ret;
+
+ u32 snapshot_tree = le32_to_cpu(s->v.tree);
+
+ s->v.subvol = cpu_to_le32(subvolid);
+ SET_BCH_SNAPSHOT_SUBVOL(&s->v, true);
+ bch2_trans_iter_exit(trans, &iter);
+
+ struct bkey_i_snapshot_tree *st = bch2_bkey_get_mut_typed(trans, &iter,
+ BTREE_ID_snapshot_trees, POS(0, snapshot_tree),
+ 0, snapshot_tree);
+ ret = PTR_ERR_OR_ZERO(st);
+ bch_err_msg(c, ret, "getting snapshot tree %u", snapshot_tree);
+ if (ret)
+ return ret;
+
+ if (!st->v.master_subvol)
+ st->v.master_subvol = cpu_to_le32(subvolid);
+
+ bch2_trans_iter_exit(trans, &iter);
+ return 0;
+}
+
+static int reconstruct_inode(struct btree_trans *trans, u32 snapshot, u64 inum, u64 size, unsigned mode)
+{
+ struct bch_fs *c = trans->c;
+ struct bch_inode_unpacked new_inode;
+
+ bch2_inode_init_early(c, &new_inode);
+ bch2_inode_init_late(&new_inode, bch2_current_time(c), 0, 0, mode|0755, 0, NULL);
+ new_inode.bi_size = size;
+ new_inode.bi_inum = inum;
+
+ return __bch2_fsck_write_inode(trans, &new_inode, snapshot);
+}
+
+static int reconstruct_reg_inode(struct btree_trans *trans, u32 snapshot, u64 inum)
+{
+ struct btree_iter iter = {};
+
+ bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, SPOS(inum, U64_MAX, snapshot), 0);
+ struct bkey_s_c k = bch2_btree_iter_peek_prev(&iter);
+ bch2_trans_iter_exit(trans, &iter);
+ int ret = bkey_err(k);
+ if (ret)
+ return ret;
+
+ return reconstruct_inode(trans, snapshot, inum, k.k->p.offset << 9, S_IFREG);
+}
+
struct snapshots_seen_entry {
u32 id;
u32 equiv;
@@ -1064,6 +1187,11 @@ static int check_inode(struct btree_trans *trans,
if (ret && !bch2_err_matches(ret, ENOENT))
goto err;
+ if (ret && (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_subvolumes))) {
+ ret = reconstruct_subvol(trans, k.k->p.snapshot, u.bi_subvol, u.bi_inum);
+ goto do_update;
+ }
+
if (fsck_err_on(ret,
c, inode_bi_subvol_missing,
"inode %llu:%u bi_subvol points to missing subvolume %u",
@@ -1081,7 +1209,7 @@ static int check_inode(struct btree_trans *trans,
do_update = true;
}
}
-
+do_update:
if (do_update) {
ret = __bch2_fsck_write_inode(trans, &u, iter->pos.snapshot);
bch_err_msg(c, ret, "in fsck updating inode");
@@ -1130,8 +1258,8 @@ static int check_i_sectors_notnested(struct btree_trans *trans, struct inode_wal
i->count = count2;
if (i->count != count2) {
- bch_err(c, "fsck counted i_sectors wrong for inode %llu:%u: got %llu should be %llu",
- w->last_pos.inode, i->snapshot, i->count, count2);
+ bch_err_ratelimited(c, "fsck counted i_sectors wrong for inode %llu:%u: got %llu should be %llu",
+ w->last_pos.inode, i->snapshot, i->count, count2);
return -BCH_ERR_internal_fsck_err;
}
@@ -1371,10 +1499,6 @@ static int check_overlapping_extents(struct btree_trans *trans,
goto err;
}
- ret = extent_ends_at(c, extent_ends, seen, k);
- if (ret)
- goto err;
-
extent_ends->last_pos = k.k->p;
err:
return ret;
@@ -1438,6 +1562,17 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
goto err;
if (k.k->type != KEY_TYPE_whiteout) {
+ if (!i && (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_inodes))) {
+ ret = reconstruct_reg_inode(trans, k.k->p.snapshot, k.k->p.inode) ?:
+ bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
+ if (ret)
+ goto err;
+
+ inode->last_pos.inode--;
+ ret = -BCH_ERR_transaction_restart_nested;
+ goto err;
+ }
+
if (fsck_err_on(!i, c, extent_in_missing_inode,
"extent in missing inode:\n %s",
(printbuf_reset(&buf),
@@ -1504,6 +1639,12 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
i->seen_this_pos = true;
}
+
+ if (k.k->type != KEY_TYPE_whiteout) {
+ ret = extent_ends_at(c, extent_ends, s, k);
+ if (ret)
+ goto err;
+ }
out:
err:
fsck_err:
@@ -1584,8 +1725,8 @@ static int check_subdir_count_notnested(struct btree_trans *trans, struct inode_
return count2;
if (i->count != count2) {
- bch_err(c, "fsck counted subdirectories wrong: got %llu should be %llu",
- i->count, count2);
+ bch_err_ratelimited(c, "fsck counted subdirectories wrong for inum %llu:%u: got %llu should be %llu",
+ w->last_pos.inode, i->snapshot, i->count, count2);
i->count = count2;
if (i->inode.bi_nlink == i->count)
continue;
@@ -1782,6 +1923,7 @@ static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter *
u32 parent_subvol = le32_to_cpu(d.v->d_parent_subvol);
u32 target_subvol = le32_to_cpu(d.v->d_child_subvol);
u32 parent_snapshot;
+ u32 new_parent_subvol = 0;
u64 parent_inum;
struct printbuf buf = PRINTBUF;
int ret = 0;
@@ -1790,6 +1932,27 @@ static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter *
if (ret && !bch2_err_matches(ret, ENOENT))
return ret;
+ if (ret ||
+ (!ret && !bch2_snapshot_is_ancestor(c, parent_snapshot, d.k->p.snapshot))) {
+ int ret2 = find_snapshot_subvol(trans, d.k->p.snapshot, &new_parent_subvol);
+ if (ret2 && !bch2_err_matches(ret, ENOENT))
+ return ret2;
+ }
+
+ if (ret &&
+ !new_parent_subvol &&
+ (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_subvolumes))) {
+ /*
+ * Couldn't find a subvol for dirent's snapshot - but we lost
+ * subvols, so we need to reconstruct:
+ */
+ ret = reconstruct_subvol(trans, d.k->p.snapshot, parent_subvol, 0);
+ if (ret)
+ return ret;
+
+ parent_snapshot = d.k->p.snapshot;
+ }
+
if (fsck_err_on(ret, c, dirent_to_missing_parent_subvol,
"dirent parent_subvol points to missing subvolume\n%s",
(bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf)) ||
@@ -1798,10 +1961,10 @@ static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter *
"dirent not visible in parent_subvol (not an ancestor of subvol snap %u)\n%s",
parent_snapshot,
(bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) {
- u32 new_parent_subvol;
- ret = find_snapshot_subvol(trans, d.k->p.snapshot, &new_parent_subvol);
- if (ret)
- goto err;
+ if (!new_parent_subvol) {
+ bch_err(c, "could not find a subvol for snapshot %u", d.k->p.snapshot);
+ return -BCH_ERR_fsck_repair_unimplemented;
+ }
struct bkey_i_dirent *new_dirent = bch2_bkey_make_mut_typed(trans, iter, &d.s_c, 0, dirent);
ret = PTR_ERR_OR_ZERO(new_dirent);
@@ -1847,9 +2010,16 @@ static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter *
ret = lookup_inode(trans, target_inum, &subvol_root, &target_snapshot);
if (ret && !bch2_err_matches(ret, ENOENT))
- return ret;
+ goto err;
+
+ if (ret) {
+ bch_err(c, "subvol %u points to missing inode root %llu", target_subvol, target_inum);
+ ret = -BCH_ERR_fsck_repair_unimplemented;
+ ret = 0;
+ goto err;
+ }
- if (fsck_err_on(parent_subvol != subvol_root.bi_parent_subvol,
+ if (fsck_err_on(!ret && parent_subvol != subvol_root.bi_parent_subvol,
c, inode_bi_parent_wrong,
"subvol root %llu has wrong bi_parent_subvol: got %u, should be %u",
target_inum,
@@ -1857,13 +2027,13 @@ static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter *
subvol_root.bi_parent_subvol = parent_subvol;
ret = __bch2_fsck_write_inode(trans, &subvol_root, target_snapshot);
if (ret)
- return ret;
+ goto err;
}
ret = check_dirent_target(trans, iter, d, &subvol_root,
target_snapshot);
if (ret)
- return ret;
+ goto err;
out:
err:
fsck_err:
@@ -1880,7 +2050,6 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
struct snapshots_seen *s)
{
struct bch_fs *c = trans->c;
- struct bkey_s_c_dirent d;
struct inode_walker_entry *i;
struct printbuf buf = PRINTBUF;
struct bpos equiv;
@@ -1919,6 +2088,17 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
*hash_info = bch2_hash_info_init(c, &dir->inodes.data[0].inode);
dir->first_this_inode = false;
+ if (!i && (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_inodes))) {
+ ret = reconstruct_inode(trans, k.k->p.snapshot, k.k->p.inode, 0, S_IFDIR) ?:
+ bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
+ if (ret)
+ goto err;
+
+ dir->last_pos.inode--;
+ ret = -BCH_ERR_transaction_restart_nested;
+ goto err;
+ }
+
if (fsck_err_on(!i, c, dirent_in_missing_dir_inode,
"dirent in nonexisting directory:\n%s",
(printbuf_reset(&buf),
@@ -1953,7 +2133,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
if (k.k->type != KEY_TYPE_dirent)
goto out;
- d = bkey_s_c_to_dirent(k);
+ struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
if (d.v->d_type == DT_SUBVOL) {
ret = check_dirent_to_subvol(trans, iter, d);
@@ -2098,17 +2278,21 @@ static int check_root_trans(struct btree_trans *trans)
if (mustfix_fsck_err_on(ret, c, root_subvol_missing,
"root subvol missing")) {
- struct bkey_i_subvolume root_subvol;
+ struct bkey_i_subvolume *root_subvol =
+ bch2_trans_kmalloc(trans, sizeof(*root_subvol));
+ ret = PTR_ERR_OR_ZERO(root_subvol);
+ if (ret)
+ goto err;
snapshot = U32_MAX;
inum = BCACHEFS_ROOT_INO;
- bkey_subvolume_init(&root_subvol.k_i);
- root_subvol.k.p.offset = BCACHEFS_ROOT_SUBVOL;
- root_subvol.v.flags = 0;
- root_subvol.v.snapshot = cpu_to_le32(snapshot);
- root_subvol.v.inode = cpu_to_le64(inum);
- ret = bch2_btree_insert_trans(trans, BTREE_ID_subvolumes, &root_subvol.k_i, 0);
+ bkey_subvolume_init(&root_subvol->k_i);
+ root_subvol->k.p.offset = BCACHEFS_ROOT_SUBVOL;
+ root_subvol->v.flags = 0;
+ root_subvol->v.snapshot = cpu_to_le32(snapshot);
+ root_subvol->v.inode = cpu_to_le64(inum);
+ ret = bch2_btree_insert_trans(trans, BTREE_ID_subvolumes, &root_subvol->k_i, 0);
bch_err_msg(c, ret, "writing root subvol");
if (ret)
goto err;
diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c
index 2b5e06770ab39e..ca4a066e9a5428 100644
--- a/fs/bcachefs/inode.c
+++ b/fs/bcachefs/inode.c
@@ -552,8 +552,8 @@ static void __bch2_inode_unpacked_to_text(struct printbuf *out,
prt_printf(out, "bi_sectors=%llu", inode->bi_sectors);
prt_newline(out);
- prt_newline(out);
prt_printf(out, "bi_version=%llu", inode->bi_version);
+ prt_newline(out);
#define x(_name, _bits) \
prt_printf(out, #_name "=%llu", (u64) inode->_name); \
diff --git a/fs/bcachefs/io_misc.c b/fs/bcachefs/io_misc.c
index 1baf78594ccaf8..82f9170dab3fdc 100644
--- a/fs/bcachefs/io_misc.c
+++ b/fs/bcachefs/io_misc.c
@@ -264,6 +264,7 @@ static int __bch2_resume_logged_op_truncate(struct btree_trans *trans,
ret = 0;
err:
bch2_logged_op_finish(trans, op_k);
+ bch_err_fn(c, ret);
return ret;
}
@@ -476,6 +477,7 @@ case LOGGED_OP_FINSERT_finish:
break;
}
err:
+ bch_err_fn(c, ret);
bch2_logged_op_finish(trans, op_k);
bch2_trans_iter_exit(trans, &iter);
return ret;
diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c
index 725fcf46f6312c..9aa28b52ab926c 100644
--- a/fs/bcachefs/journal_io.c
+++ b/fs/bcachefs/journal_io.c
@@ -247,7 +247,7 @@ static void journal_entry_err_msg(struct printbuf *out,
if (entry) {
prt_str(out, " type=");
- prt_str(out, bch2_jset_entry_types[entry->type]);
+ bch2_prt_jset_entry_type(out, entry->type);
}
if (!jset) {
@@ -403,7 +403,8 @@ static void journal_entry_btree_keys_to_text(struct printbuf *out, struct bch_fs
jset_entry_for_each_key(entry, k) {
if (!first) {
prt_newline(out);
- prt_printf(out, "%s: ", bch2_jset_entry_types[entry->type]);
+ bch2_prt_jset_entry_type(out, entry->type);
+ prt_str(out, ": ");
}
prt_printf(out, "btree=%s l=%u ", bch2_btree_id_str(entry->btree_id), entry->level);
bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(k));
@@ -563,9 +564,9 @@ static void journal_entry_usage_to_text(struct printbuf *out, struct bch_fs *c,
struct jset_entry_usage *u =
container_of(entry, struct jset_entry_usage, entry);
- prt_printf(out, "type=%s v=%llu",
- bch2_fs_usage_types[u->entry.btree_id],
- le64_to_cpu(u->v));
+ prt_str(out, "type=");
+ bch2_prt_fs_usage_type(out, u->entry.btree_id);
+ prt_printf(out, " v=%llu", le64_to_cpu(u->v));
}
static int journal_entry_data_usage_validate(struct bch_fs *c,
@@ -827,11 +828,11 @@ int bch2_journal_entry_validate(struct bch_fs *c,
void bch2_journal_entry_to_text(struct printbuf *out, struct bch_fs *c,
struct jset_entry *entry)
{
+ bch2_prt_jset_entry_type(out, entry->type);
+
if (entry->type < BCH_JSET_ENTRY_NR) {
- prt_printf(out, "%s: ", bch2_jset_entry_types[entry->type]);
+ prt_str(out, ": ");
bch2_jset_entry_ops[entry->type].to_text(out, c, entry);
- } else {
- prt_printf(out, "(unknown type %u)", entry->type);
}
}
diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c
index ab811c0dad26ac..04a577848b015c 100644
--- a/fs/bcachefs/journal_reclaim.c
+++ b/fs/bcachefs/journal_reclaim.c
@@ -67,6 +67,8 @@ void bch2_journal_set_watermark(struct journal *j)
track_event_change(&c->times[BCH_TIME_blocked_write_buffer_full], low_on_wb))
trace_and_count(c, journal_full, c);
+ mod_bit(JOURNAL_SPACE_LOW, &j->flags, low_on_space || low_on_pin);
+
swap(watermark, j->watermark);
if (watermark > j->watermark)
journal_wake(j);
diff --git a/fs/bcachefs/journal_seq_blacklist.c b/fs/bcachefs/journal_seq_blacklist.c
index b5303874fc35b3..37a024e034d495 100644
--- a/fs/bcachefs/journal_seq_blacklist.c
+++ b/fs/bcachefs/journal_seq_blacklist.c
@@ -95,8 +95,7 @@ out:
return ret ?: bch2_blacklist_table_initialize(c);
}
-static int journal_seq_blacklist_table_cmp(const void *_l,
- const void *_r, size_t size)
+static int journal_seq_blacklist_table_cmp(const void *_l, const void *_r)
{
const struct journal_seq_blacklist_table_entry *l = _l;
const struct journal_seq_blacklist_table_entry *r = _r;
diff --git a/fs/bcachefs/journal_types.h b/fs/bcachefs/journal_types.h
index 8c053cb64ca5ee..b5161b5d76a008 100644
--- a/fs/bcachefs/journal_types.h
+++ b/fs/bcachefs/journal_types.h
@@ -134,6 +134,7 @@ enum journal_flags {
JOURNAL_STARTED,
JOURNAL_MAY_SKIP_FLUSH,
JOURNAL_NEED_FLUSH_WRITE,
+ JOURNAL_SPACE_LOW,
};
/* Reasons we may fail to get a journal reservation: */
diff --git a/fs/bcachefs/logged_ops.c b/fs/bcachefs/logged_ops.c
index 9fac838d123e8e..b82f8209041ffb 100644
--- a/fs/bcachefs/logged_ops.c
+++ b/fs/bcachefs/logged_ops.c
@@ -37,7 +37,6 @@ static int resume_logged_op(struct btree_trans *trans, struct btree_iter *iter,
const struct bch_logged_op_fn *fn = logged_op_fn(k.k->type);
struct bkey_buf sk;
u32 restart_count = trans->restart_count;
- int ret;
if (!fn)
return 0;
@@ -45,11 +44,11 @@ static int resume_logged_op(struct btree_trans *trans, struct btree_iter *iter,
bch2_bkey_buf_init(&sk);
bch2_bkey_buf_reassemble(&sk, c, k);
- ret = drop_locks_do(trans, (bch2_fs_lazy_rw(c), 0)) ?:
- fn->resume(trans, sk.k) ?: trans_was_restarted(trans, restart_count);
+ fn->resume(trans, sk.k);
bch2_bkey_buf_exit(&sk, c);
- return ret;
+
+ return trans_was_restarted(trans, restart_count);
}
int bch2_resume_logged_ops(struct bch_fs *c)
diff --git a/fs/bcachefs/mean_and_variance_test.c b/fs/bcachefs/mean_and_variance_test.c
index db63b3f3b338ad..4c298e74723db3 100644
--- a/fs/bcachefs/mean_and_variance_test.c
+++ b/fs/bcachefs/mean_and_variance_test.c
@@ -136,20 +136,8 @@ static void mean_and_variance_test_1(struct kunit *test)
d, mean, stddev, weighted_mean, weighted_stddev);
}
-static void mean_and_variance_test_2(struct kunit *test)
-{
- s64 d[] = { 100, 10, 10, 10, 10, 10, 10 };
- s64 mean[] = { 10, 10, 10, 10, 10, 10, 10 };
- s64 stddev[] = { 9, 9, 9, 9, 9, 9, 9 };
- s64 weighted_mean[] = { 32, 27, 22, 19, 17, 15, 14 };
- s64 weighted_stddev[] = { 38, 35, 31, 27, 24, 21, 18 };
-
- do_mean_and_variance_test(test, 10, 6, ARRAY_SIZE(d), 2,
- d, mean, stddev, weighted_mean, weighted_stddev);
-}
-
/* Test behaviour where we switch from one steady state to another: */
-static void mean_and_variance_test_3(struct kunit *test)
+static void mean_and_variance_test_2(struct kunit *test)
{
s64 d[] = { 100, 100, 100, 100, 100 };
s64 mean[] = { 22, 32, 40, 46, 50 };
@@ -161,18 +149,6 @@ static void mean_and_variance_test_3(struct kunit *test)
d, mean, stddev, weighted_mean, weighted_stddev);
}
-static void mean_and_variance_test_4(struct kunit *test)
-{
- s64 d[] = { 100, 100, 100, 100, 100 };
- s64 mean[] = { 10, 11, 12, 13, 14 };
- s64 stddev[] = { 9, 13, 15, 17, 19 };
- s64 weighted_mean[] = { 32, 49, 61, 71, 78 };
- s64 weighted_stddev[] = { 38, 44, 44, 41, 38 };
-
- do_mean_and_variance_test(test, 10, 6, ARRAY_SIZE(d), 2,
- d, mean, stddev, weighted_mean, weighted_stddev);
-}
-
static void mean_and_variance_fast_divpow2(struct kunit *test)
{
s64 i;
@@ -230,8 +206,6 @@ static struct kunit_case mean_and_variance_test_cases[] = {
KUNIT_CASE(mean_and_variance_weighted_advanced_test),
KUNIT_CASE(mean_and_variance_test_1),
KUNIT_CASE(mean_and_variance_test_2),
- KUNIT_CASE(mean_and_variance_test_3),
- KUNIT_CASE(mean_and_variance_test_4),
{}
};
diff --git a/fs/bcachefs/opts.c b/fs/bcachefs/opts.c
index 08ea0cfc4aef08..bb068fd724656c 100644
--- a/fs/bcachefs/opts.c
+++ b/fs/bcachefs/opts.c
@@ -7,6 +7,7 @@
#include "disk_groups.h"
#include "error.h"
#include "opts.h"
+#include "recovery_passes.h"
#include "super-io.h"
#include "util.h"
@@ -42,7 +43,7 @@ const char * const __bch2_btree_ids[] = {
NULL
};
-const char * const bch2_csum_types[] = {
+static const char * const __bch2_csum_types[] = {
BCH_CSUM_TYPES()
NULL
};
@@ -52,7 +53,7 @@ const char * const bch2_csum_opts[] = {
NULL
};
-const char * const __bch2_compression_types[] = {
+static const char * const __bch2_compression_types[] = {
BCH_COMPRESSION_TYPES()
NULL
};
@@ -82,18 +83,39 @@ const char * const bch2_member_states[] = {
NULL
};
-const char * const bch2_jset_entry_types[] = {
+static const char * const __bch2_jset_entry_types[] = {
BCH_JSET_ENTRY_TYPES()
NULL
};
-const char * const bch2_fs_usage_types[] = {
+static const char * const __bch2_fs_usage_types[] = {
BCH_FS_USAGE_TYPES()
NULL
};
#undef x
+static void prt_str_opt_boundscheck(struct printbuf *out, const char * const opts[],
+ unsigned nr, const char *type, unsigned idx)
+{
+ if (idx < nr)
+ prt_str(out, opts[idx]);
+ else
+ prt_printf(out, "(unknown %s %u)", type, idx);
+}
+
+#define PRT_STR_OPT_BOUNDSCHECKED(name, type) \
+void bch2_prt_##name(struct printbuf *out, type t) \
+{ \
+ prt_str_opt_boundscheck(out, __bch2_##name##s, ARRAY_SIZE(__bch2_##name##s) - 1, #name, t);\
+}
+
+PRT_STR_OPT_BOUNDSCHECKED(jset_entry_type, enum bch_jset_entry_type);
+PRT_STR_OPT_BOUNDSCHECKED(fs_usage_type, enum bch_fs_usage_type);
+PRT_STR_OPT_BOUNDSCHECKED(data_type, enum bch_data_type);
+PRT_STR_OPT_BOUNDSCHECKED(csum_type, enum bch_csum_type);
+PRT_STR_OPT_BOUNDSCHECKED(compression_type, enum bch_compression_type);
+
static int bch2_opt_fix_errors_parse(struct bch_fs *c, const char *val, u64 *res,
struct printbuf *err)
{
@@ -205,6 +227,9 @@ const struct bch_option bch2_opt_table[] = {
#define OPT_STR(_choices) .type = BCH_OPT_STR, \
.min = 0, .max = ARRAY_SIZE(_choices), \
.choices = _choices
+#define OPT_STR_NOLIMIT(_choices) .type = BCH_OPT_STR, \
+ .min = 0, .max = U64_MAX, \
+ .choices = _choices
#define OPT_FN(_fn) .type = BCH_OPT_FN, .fn = _fn
#define x(_name, _bits, _flags, _type, _sb_opt, _default, _hint, _help) \
diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h
index 136083c11f3a3a..84e452835a17d8 100644
--- a/fs/bcachefs/opts.h
+++ b/fs/bcachefs/opts.h
@@ -16,18 +16,20 @@ extern const char * const bch2_version_upgrade_opts[];
extern const char * const bch2_sb_features[];
extern const char * const bch2_sb_compat[];
extern const char * const __bch2_btree_ids[];
-extern const char * const bch2_csum_types[];
extern const char * const bch2_csum_opts[];
-extern const char * const __bch2_compression_types[];
extern const char * const bch2_compression_opts[];
extern const char * const bch2_str_hash_types[];
extern const char * const bch2_str_hash_opts[];
extern const char * const __bch2_data_types[];
extern const char * const bch2_member_states[];
-extern const char * const bch2_jset_entry_types[];
-extern const char * const bch2_fs_usage_types[];
extern const char * const bch2_d_types[];
+void bch2_prt_jset_entry_type(struct printbuf *, enum bch_jset_entry_type);
+void bch2_prt_fs_usage_type(struct printbuf *, enum bch_fs_usage_type);
+void bch2_prt_data_type(struct printbuf *, enum bch_data_type);
+void bch2_prt_csum_type(struct printbuf *, enum bch_csum_type);
+void bch2_prt_compression_type(struct printbuf *, enum bch_compression_type);
+
static inline const char *bch2_d_type_str(unsigned d_type)
{
return (d_type < BCH_DT_MAX ? bch2_d_types[d_type] : NULL) ?: "(bad d_type)";
@@ -362,12 +364,17 @@ enum fsck_err_opts {
OPT_FS|OPT_MOUNT, \
OPT_BOOL(), \
BCH2_NO_SB_OPT, false, \
- NULL, "Don't replay the journal") \
- x(keep_journal, u8, \
+ NULL, "Exit recovery immediately prior to journal replay")\
+ x(recovery_pass_last, u8, \
+ OPT_FS|OPT_MOUNT, \
+ OPT_STR_NOLIMIT(bch2_recovery_passes), \
+ BCH2_NO_SB_OPT, 0, \
+ NULL, "Exit recovery after specified pass") \
+ x(retain_recovery_info, u8, \
0, \
OPT_BOOL(), \
BCH2_NO_SB_OPT, false, \
- NULL, "Don't free journal entries/keys after startup")\
+ NULL, "Don't free journal entries/keys, scanned btree nodes after startup")\
x(read_entire_journal, u8, \
0, \
OPT_BOOL(), \
diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c
index 03f9d6afe46788..0f328aba9760ba 100644
--- a/fs/bcachefs/recovery.c
+++ b/fs/bcachefs/recovery.c
@@ -1,35 +1,31 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
-#include "backpointers.h"
-#include "bkey_buf.h"
#include "alloc_background.h"
-#include "btree_gc.h"
+#include "bkey_buf.h"
#include "btree_journal_iter.h"
+#include "btree_node_scan.h"
#include "btree_update.h"
#include "btree_update_interior.h"
#include "btree_io.h"
#include "buckets.h"
#include "dirent.h"
-#include "ec.h"
#include "errcode.h"
#include "error.h"
#include "fs-common.h"
-#include "fsck.h"
#include "journal_io.h"
#include "journal_reclaim.h"
#include "journal_seq_blacklist.h"
-#include "lru.h"
#include "logged_ops.h"
#include "move.h"
#include "quota.h"
#include "rebalance.h"
#include "recovery.h"
+#include "recovery_passes.h"
#include "replicas.h"
#include "sb-clean.h"
#include "sb-downgrade.h"
#include "snapshot.h"
-#include "subvolume.h"
#include "super-io.h"
#include <linux/sort.h>
@@ -37,22 +33,22 @@
#define QSTR(n) { { { .len = strlen(n) } }, .name = n }
-static bool btree_id_is_alloc(enum btree_id id)
+void bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree)
{
- switch (id) {
- case BTREE_ID_alloc:
- case BTREE_ID_backpointers:
- case BTREE_ID_need_discard:
- case BTREE_ID_freespace:
- case BTREE_ID_bucket_gens:
- return true;
- default:
- return false;
+ u64 b = BIT_ULL(btree);
+
+ if (!(c->sb.btrees_lost_data & b)) {
+ bch_err(c, "flagging btree %s lost data", bch2_btree_id_str(btree));
+
+ mutex_lock(&c->sb_lock);
+ bch2_sb_field_get(c->disk_sb.sb, ext)->btrees_lost_data |= cpu_to_le64(b);
+ bch2_write_super(c);
+ mutex_unlock(&c->sb_lock);
}
}
/* for -o reconstruct_alloc: */
-static void do_reconstruct_alloc(struct bch_fs *c)
+static void bch2_reconstruct_alloc(struct bch_fs *c)
{
bch2_journal_log_msg(c, "dropping alloc info");
bch_info(c, "dropping and reconstructing all alloc info");
@@ -87,15 +83,17 @@ static void do_reconstruct_alloc(struct bch_fs *c)
c->recovery_passes_explicit |= bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0]));
- struct journal_keys *keys = &c->journal_keys;
- size_t src, dst;
- move_gap(keys, keys->nr);
-
- for (src = 0, dst = 0; src < keys->nr; src++)
- if (!btree_id_is_alloc(keys->data[src].btree_id))
- keys->data[dst++] = keys->data[src];
- keys->nr = keys->gap = dst;
+ bch2_shoot_down_journal_keys(c, BTREE_ID_alloc,
+ 0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX);
+ bch2_shoot_down_journal_keys(c, BTREE_ID_backpointers,
+ 0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX);
+ bch2_shoot_down_journal_keys(c, BTREE_ID_need_discard,
+ 0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX);
+ bch2_shoot_down_journal_keys(c, BTREE_ID_freespace,
+ 0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX);
+ bch2_shoot_down_journal_keys(c, BTREE_ID_bucket_gens,
+ 0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX);
}
/*
@@ -186,7 +184,7 @@ static int journal_sort_seq_cmp(const void *_l, const void *_r)
return cmp_int(l->journal_seq, r->journal_seq);
}
-static int bch2_journal_replay(struct bch_fs *c)
+int bch2_journal_replay(struct bch_fs *c)
{
struct journal_keys *keys = &c->journal_keys;
DARRAY(struct journal_key *) keys_sorted = { 0 };
@@ -194,6 +192,7 @@ static int bch2_journal_replay(struct bch_fs *c)
u64 start_seq = c->journal_replay_seq_start;
u64 end_seq = c->journal_replay_seq_start;
struct btree_trans *trans = bch2_trans_get(c);
+ bool immediate_flush = false;
int ret = 0;
if (keys->nr) {
@@ -215,6 +214,13 @@ static int bch2_journal_replay(struct bch_fs *c)
darray_for_each(*keys, k) {
cond_resched();
+ /*
+ * k->allocated means the key wasn't read in from the journal,
+ * rather it was from early repair code
+ */
+ if (k->allocated)
+ immediate_flush = true;
+
/* Skip fastpath if we're low on space in the journal */
ret = c->journal.watermark ? -1 :
commit_do(trans, NULL, NULL,
@@ -266,7 +272,8 @@ static int bch2_journal_replay(struct bch_fs *c)
bch2_trans_put(trans);
trans = NULL;
- if (!c->opts.keep_journal)
+ if (!c->opts.retain_recovery_info &&
+ c->recovery_pass_done >= BCH_RECOVERY_PASS_journal_replay)
bch2_journal_keys_put_initial(c);
replay_now_at(j, j->replay_journal_seq_end);
@@ -274,6 +281,12 @@ static int bch2_journal_replay(struct bch_fs *c)
bch2_journal_set_replay_done(j);
+ /* if we did any repair, flush it immediately */
+ if (immediate_flush) {
+ bch2_journal_flush_all_pins(&c->journal);
+ ret = bch2_journal_meta(&c->journal);
+ }
+
if (keys->nr)
bch2_journal_log_msg(c, "journal replay finished");
err:
@@ -423,10 +436,9 @@ static int journal_replay_early(struct bch_fs *c,
static int read_btree_roots(struct bch_fs *c)
{
- unsigned i;
int ret = 0;
- for (i = 0; i < btree_id_nr_alive(c); i++) {
+ for (unsigned i = 0; i < btree_id_nr_alive(c); i++) {
struct btree_root *r = bch2_btree_id_root(c, i);
if (!r->alive)
@@ -435,186 +447,46 @@ static int read_btree_roots(struct bch_fs *c)
if (btree_id_is_alloc(i) && c->opts.reconstruct_alloc)
continue;
- if (r->error) {
- __fsck_err(c,
- btree_id_is_alloc(i)
- ? FSCK_CAN_IGNORE : 0,
- btree_root_bkey_invalid,
- "invalid btree root %s",
- bch2_btree_id_str(i));
- if (i == BTREE_ID_alloc)
+ if (mustfix_fsck_err_on((ret = r->error),
+ c, btree_root_bkey_invalid,
+ "invalid btree root %s",
+ bch2_btree_id_str(i)) ||
+ mustfix_fsck_err_on((ret = r->error = bch2_btree_root_read(c, i, &r->key, r->level)),
+ c, btree_root_read_error,
+ "error reading btree root %s l=%u: %s",
+ bch2_btree_id_str(i), r->level, bch2_err_str(ret))) {
+ if (btree_id_is_alloc(i)) {
+ c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_allocations);
+ c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_alloc_info);
+ c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_lrus);
+ c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_extents_to_backpointers);
+ c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_alloc_to_lru_refs);
c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info);
- }
+ r->error = 0;
+ } else if (!(c->recovery_passes_explicit & BIT_ULL(BCH_RECOVERY_PASS_scan_for_btree_nodes))) {
+ bch_info(c, "will run btree node scan");
+ c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_scan_for_btree_nodes);
+ c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_topology);
+ }
- ret = bch2_btree_root_read(c, i, &r->key, r->level);
- if (ret) {
- fsck_err(c,
- btree_root_read_error,
- "error reading btree root %s",
- bch2_btree_id_str(i));
- if (btree_id_is_alloc(i))
- c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info);
ret = 0;
+ bch2_btree_lost_data(c, i);
}
}
- for (i = 0; i < BTREE_ID_NR; i++) {
+ for (unsigned i = 0; i < BTREE_ID_NR; i++) {
struct btree_root *r = bch2_btree_id_root(c, i);
- if (!r->b) {
+ if (!r->b && !r->error) {
r->alive = false;
r->level = 0;
- bch2_btree_root_alloc(c, i);
+ bch2_btree_root_alloc_fake(c, i, 0);
}
}
fsck_err:
return ret;
}
-static int bch2_initialize_subvolumes(struct bch_fs *c)
-{
- struct bkey_i_snapshot_tree root_tree;
- struct bkey_i_snapshot root_snapshot;
- struct bkey_i_subvolume root_volume;
- int ret;
-
- bkey_snapshot_tree_init(&root_tree.k_i);
- root_tree.k.p.offset = 1;
- root_tree.v.master_subvol = cpu_to_le32(1);
- root_tree.v.root_snapshot = cpu_to_le32(U32_MAX);
-
- bkey_snapshot_init(&root_snapshot.k_i);
- root_snapshot.k.p.offset = U32_MAX;
- root_snapshot.v.flags = 0;
- root_snapshot.v.parent = 0;
- root_snapshot.v.subvol = cpu_to_le32(BCACHEFS_ROOT_SUBVOL);
- root_snapshot.v.tree = cpu_to_le32(1);
- SET_BCH_SNAPSHOT_SUBVOL(&root_snapshot.v, true);
-
- bkey_subvolume_init(&root_volume.k_i);
- root_volume.k.p.offset = BCACHEFS_ROOT_SUBVOL;
- root_volume.v.flags = 0;
- root_volume.v.snapshot = cpu_to_le32(U32_MAX);
- root_volume.v.inode = cpu_to_le64(BCACHEFS_ROOT_INO);
-
- ret = bch2_btree_insert(c, BTREE_ID_snapshot_trees, &root_tree.k_i, NULL, 0) ?:
- bch2_btree_insert(c, BTREE_ID_snapshots, &root_snapshot.k_i, NULL, 0) ?:
- bch2_btree_insert(c, BTREE_ID_subvolumes, &root_volume.k_i, NULL, 0);
- bch_err_fn(c, ret);
- return ret;
-}
-
-static int __bch2_fs_upgrade_for_subvolumes(struct btree_trans *trans)
-{
- struct btree_iter iter;
- struct bkey_s_c k;
- struct bch_inode_unpacked inode;
- int ret;
-
- k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes,
- SPOS(0, BCACHEFS_ROOT_INO, U32_MAX), 0);
- ret = bkey_err(k);
- if (ret)
- return ret;
-
- if (!bkey_is_inode(k.k)) {
- bch_err(trans->c, "root inode not found");
- ret = -BCH_ERR_ENOENT_inode;
- goto err;
- }
-
- ret = bch2_inode_unpack(k, &inode);
- BUG_ON(ret);
-
- inode.bi_subvol = BCACHEFS_ROOT_SUBVOL;
-
- ret = bch2_inode_write(trans, &iter, &inode);
-err:
- bch2_trans_iter_exit(trans, &iter);
- return ret;
-}
-
-/* set bi_subvol on root inode */
-noinline_for_stack
-static int bch2_fs_upgrade_for_subvolumes(struct bch_fs *c)
-{
- int ret = bch2_trans_do(c, NULL, NULL, BCH_TRANS_COMMIT_lazy_rw,
- __bch2_fs_upgrade_for_subvolumes(trans));
- bch_err_fn(c, ret);
- return ret;
-}
-
-const char * const bch2_recovery_passes[] = {
-#define x(_fn, ...) #_fn,
- BCH_RECOVERY_PASSES()
-#undef x
- NULL
-};
-
-static int bch2_check_allocations(struct bch_fs *c)
-{
- return bch2_gc(c, true, c->opts.norecovery);
-}
-
-static int bch2_set_may_go_rw(struct bch_fs *c)
-{
- struct journal_keys *keys = &c->journal_keys;
-
- /*
- * After we go RW, the journal keys buffer can't be modified (except for
- * setting journal_key->overwritten: it will be accessed by multiple
- * threads
- */
- move_gap(keys, keys->nr);
-
- set_bit(BCH_FS_may_go_rw, &c->flags);
-
- if (keys->nr || c->opts.fsck || !c->sb.clean)
- return bch2_fs_read_write_early(c);
- return 0;
-}
-
-struct recovery_pass_fn {
- int (*fn)(struct bch_fs *);
- unsigned when;
-};
-
-static struct recovery_pass_fn recovery_pass_fns[] = {
-#define x(_fn, _id, _when) { .fn = bch2_##_fn, .when = _when },
- BCH_RECOVERY_PASSES()
-#undef x
-};
-
-u64 bch2_recovery_passes_to_stable(u64 v)
-{
- static const u8 map[] = {
-#define x(n, id, ...) [BCH_RECOVERY_PASS_##n] = BCH_RECOVERY_PASS_STABLE_##n,
- BCH_RECOVERY_PASSES()
-#undef x
- };
-
- u64 ret = 0;
- for (unsigned i = 0; i < ARRAY_SIZE(map); i++)
- if (v & BIT_ULL(i))
- ret |= BIT_ULL(map[i]);
- return ret;
-}
-
-u64 bch2_recovery_passes_from_stable(u64 v)
-{
- static const u8 map[] = {
-#define x(n, id, ...) [BCH_RECOVERY_PASS_STABLE_##n] = BCH_RECOVERY_PASS_##n,
- BCH_RECOVERY_PASSES()
-#undef x
- };
-
- u64 ret = 0;
- for (unsigned i = 0; i < ARRAY_SIZE(map); i++)
- if (v & BIT_ULL(i))
- ret |= BIT_ULL(map[i]);
- return ret;
-}
-
static bool check_version_upgrade(struct bch_fs *c)
{
unsigned latest_version = bcachefs_metadata_version_current;
@@ -687,96 +559,6 @@ static bool check_version_upgrade(struct bch_fs *c)
return false;
}
-u64 bch2_fsck_recovery_passes(void)
-{
- u64 ret = 0;
-
- for (unsigned i = 0; i < ARRAY_SIZE(recovery_pass_fns); i++)
- if (recovery_pass_fns[i].when & PASS_FSCK)
- ret |= BIT_ULL(i);
- return ret;
-}
-
-static bool should_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass)
-{
- struct recovery_pass_fn *p = recovery_pass_fns + pass;
-
- if (c->opts.norecovery && pass > BCH_RECOVERY_PASS_snapshots_read)
- return false;
- if (c->recovery_passes_explicit & BIT_ULL(pass))
- return true;
- if ((p->when & PASS_FSCK) && c->opts.fsck)
- return true;
- if ((p->when & PASS_UNCLEAN) && !c->sb.clean)
- return true;
- if (p->when & PASS_ALWAYS)
- return true;
- return false;
-}
-
-static int bch2_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass)
-{
- struct recovery_pass_fn *p = recovery_pass_fns + pass;
- int ret;
-
- if (!(p->when & PASS_SILENT))
- bch2_print(c, KERN_INFO bch2_log_msg(c, "%s..."),
- bch2_recovery_passes[pass]);
- ret = p->fn(c);
- if (ret)
- return ret;
- if (!(p->when & PASS_SILENT))
- bch2_print(c, KERN_CONT " done\n");
-
- return 0;
-}
-
-static int bch2_run_recovery_passes(struct bch_fs *c)
-{
- int ret = 0;
-
- while (c->curr_recovery_pass < ARRAY_SIZE(recovery_pass_fns)) {
- if (should_run_recovery_pass(c, c->curr_recovery_pass)) {
- unsigned pass = c->curr_recovery_pass;
-
- ret = bch2_run_recovery_pass(c, c->curr_recovery_pass);
- if (bch2_err_matches(ret, BCH_ERR_restart_recovery) ||
- (ret && c->curr_recovery_pass < pass))
- continue;
- if (ret)
- break;
-
- c->recovery_passes_complete |= BIT_ULL(c->curr_recovery_pass);
- }
- c->curr_recovery_pass++;
- c->recovery_pass_done = max(c->recovery_pass_done, c->curr_recovery_pass);
- }
-
- return ret;
-}
-
-int bch2_run_online_recovery_passes(struct bch_fs *c)
-{
- int ret = 0;
-
- for (unsigned i = 0; i < ARRAY_SIZE(recovery_pass_fns); i++) {
- struct recovery_pass_fn *p = recovery_pass_fns + i;
-
- if (!(p->when & PASS_ONLINE))
- continue;
-
- ret = bch2_run_recovery_pass(c, i);
- if (bch2_err_matches(ret, BCH_ERR_restart_recovery)) {
- i = c->curr_recovery_pass;
- continue;
- }
- if (ret)
- break;
- }
-
- return ret;
-}
-
int bch2_fs_recovery(struct bch_fs *c)
{
struct bch_sb_field_clean *clean = NULL;
@@ -809,24 +591,14 @@ int bch2_fs_recovery(struct bch_fs *c)
goto err;
}
- if (c->opts.fsck && c->opts.norecovery) {
- bch_err(c, "cannot select both norecovery and fsck");
- ret = -EINVAL;
- goto err;
- }
+ if (c->opts.norecovery)
+ c->opts.recovery_pass_last = BCH_RECOVERY_PASS_journal_replay - 1;
if (!c->opts.nochanges) {
mutex_lock(&c->sb_lock);
+ struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
bool write_sb = false;
- struct bch_sb_field_ext *ext =
- bch2_sb_field_get_minsize(&c->disk_sb, ext, sizeof(*ext) / sizeof(u64));
- if (!ext) {
- ret = -BCH_ERR_ENOSPC_sb;
- mutex_unlock(&c->sb_lock);
- goto err;
- }
-
if (BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb)) {
ext->recovery_passes_required[0] |=
cpu_to_le64(bch2_recovery_passes_to_stable(BIT_ULL(BCH_RECOVERY_PASS_check_topology)));
@@ -885,7 +657,7 @@ int bch2_fs_recovery(struct bch_fs *c)
goto err;
}
- if (!c->sb.clean || c->opts.fsck || c->opts.keep_journal) {
+ if (!c->sb.clean || c->opts.fsck || c->opts.retain_recovery_info) {
struct genradix_iter iter;
struct journal_replay **i;
@@ -965,7 +737,7 @@ use_clean:
c->journal_replay_seq_end = blacklist_seq - 1;
if (c->opts.reconstruct_alloc)
- do_reconstruct_alloc(c);
+ bch2_reconstruct_alloc(c);
zero_out_btree_mem_ptr(&c->journal_keys);
@@ -1017,6 +789,12 @@ use_clean:
clear_bit(BCH_FS_fsck_running, &c->flags);
+ /* fsync if we fixed errors */
+ if (test_bit(BCH_FS_errors_fixed, &c->flags)) {
+ bch2_journal_flush_all_pins(&c->journal);
+ bch2_journal_meta(&c->journal);
+ }
+
/* If we fixed errors, verify that fs is actually clean now: */
if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) &&
test_bit(BCH_FS_errors_fixed, &c->flags) &&
@@ -1051,6 +829,7 @@ use_clean:
}
mutex_lock(&c->sb_lock);
+ struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
bool write_sb = false;
if (BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb) != le16_to_cpu(c->disk_sb.sb->version)) {
@@ -1064,15 +843,18 @@ use_clean:
write_sb = true;
}
- if (!test_bit(BCH_FS_error, &c->flags)) {
- struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
- if (ext &&
- (!bch2_is_zero(ext->recovery_passes_required, sizeof(ext->recovery_passes_required)) ||
- !bch2_is_zero(ext->errors_silent, sizeof(ext->errors_silent)))) {
- memset(ext->recovery_passes_required, 0, sizeof(ext->recovery_passes_required));
- memset(ext->errors_silent, 0, sizeof(ext->errors_silent));
- write_sb = true;
- }
+ if (!test_bit(BCH_FS_error, &c->flags) &&
+ !bch2_is_zero(ext->errors_silent, sizeof(ext->errors_silent))) {
+ memset(ext->errors_silent, 0, sizeof(ext->errors_silent));
+ write_sb = true;
+ }
+
+ if (c->opts.fsck &&
+ !test_bit(BCH_FS_error, &c->flags) &&
+ c->recovery_pass_done == BCH_RECOVERY_PASS_NR - 1 &&
+ ext->btrees_lost_data) {
+ ext->btrees_lost_data = 0;
+ write_sb = true;
}
if (c->opts.fsck &&
@@ -1113,9 +895,10 @@ use_clean:
out:
bch2_flush_fsck_errs(c);
- if (!c->opts.keep_journal &&
- test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags))
+ if (!c->opts.retain_recovery_info) {
bch2_journal_keys_put_initial(c);
+ bch2_find_btree_nodes_exit(&c->found_btree_nodes);
+ }
kfree(clean);
if (!ret &&
@@ -1141,6 +924,7 @@ int bch2_fs_initialize(struct bch_fs *c)
int ret;
bch_notice(c, "initializing new filesystem");
+ set_bit(BCH_FS_new_fs, &c->flags);
mutex_lock(&c->sb_lock);
c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_extents_above_btree_updates_done);
@@ -1155,11 +939,11 @@ int bch2_fs_initialize(struct bch_fs *c)
}
mutex_unlock(&c->sb_lock);
- c->curr_recovery_pass = ARRAY_SIZE(recovery_pass_fns);
+ c->curr_recovery_pass = BCH_RECOVERY_PASS_NR;
set_bit(BCH_FS_may_go_rw, &c->flags);
for (unsigned i = 0; i < BTREE_ID_NR; i++)
- bch2_btree_root_alloc(c, i);
+ bch2_btree_root_alloc_fake(c, i, 0);
for_each_member_device(c, ca)
bch2_dev_usage_init(ca);
@@ -1230,7 +1014,7 @@ int bch2_fs_initialize(struct bch_fs *c)
if (ret)
goto err;
- c->recovery_pass_done = ARRAY_SIZE(recovery_pass_fns) - 1;
+ c->recovery_pass_done = BCH_RECOVERY_PASS_NR - 1;
if (enabled_qtypes(c)) {
ret = bch2_fs_quota_read(c);
diff --git a/fs/bcachefs/recovery.h b/fs/bcachefs/recovery.h
index 4e9d24719b2e85..4bf818de1f2feb 100644
--- a/fs/bcachefs/recovery.h
+++ b/fs/bcachefs/recovery.h
@@ -2,37 +2,9 @@
#ifndef _BCACHEFS_RECOVERY_H
#define _BCACHEFS_RECOVERY_H
-extern const char * const bch2_recovery_passes[];
+void bch2_btree_lost_data(struct bch_fs *, enum btree_id);
-u64 bch2_recovery_passes_to_stable(u64 v);
-u64 bch2_recovery_passes_from_stable(u64 v);
-
-/*
- * For when we need to rewind recovery passes and run a pass we skipped:
- */
-static inline int bch2_run_explicit_recovery_pass(struct bch_fs *c,
- enum bch_recovery_pass pass)
-{
- if (c->recovery_passes_explicit & BIT_ULL(pass))
- return 0;
-
- bch_info(c, "running explicit recovery pass %s (%u), currently at %s (%u)",
- bch2_recovery_passes[pass], pass,
- bch2_recovery_passes[c->curr_recovery_pass], c->curr_recovery_pass);
-
- c->recovery_passes_explicit |= BIT_ULL(pass);
-
- if (c->curr_recovery_pass >= pass) {
- c->curr_recovery_pass = pass;
- c->recovery_passes_complete &= (1ULL << pass) >> 1;
- return -BCH_ERR_restart_recovery;
- } else {
- return 0;
- }
-}
-
-int bch2_run_online_recovery_passes(struct bch_fs *);
-u64 bch2_fsck_recovery_passes(void);
+int bch2_journal_replay(struct bch_fs *);
int bch2_fs_recovery(struct bch_fs *);
int bch2_fs_initialize(struct bch_fs *);
diff --git a/fs/bcachefs/recovery_passes.c b/fs/bcachefs/recovery_passes.c
new file mode 100644
index 00000000000000..0cec0f7d970352
--- /dev/null
+++ b/fs/bcachefs/recovery_passes.c
@@ -0,0 +1,249 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "alloc_background.h"
+#include "backpointers.h"
+#include "btree_gc.h"
+#include "btree_node_scan.h"
+#include "ec.h"
+#include "fsck.h"
+#include "inode.h"
+#include "journal.h"
+#include "lru.h"
+#include "logged_ops.h"
+#include "rebalance.h"
+#include "recovery.h"
+#include "recovery_passes.h"
+#include "snapshot.h"
+#include "subvolume.h"
+#include "super.h"
+#include "super-io.h"
+
+const char * const bch2_recovery_passes[] = {
+#define x(_fn, ...) #_fn,
+ BCH_RECOVERY_PASSES()
+#undef x
+ NULL
+};
+
+static int bch2_check_allocations(struct bch_fs *c)
+{
+ return bch2_gc(c, true, false);
+}
+
+static int bch2_set_may_go_rw(struct bch_fs *c)
+{
+ struct journal_keys *keys = &c->journal_keys;
+
+ /*
+ * After we go RW, the journal keys buffer can't be modified (except for
+ * setting journal_key->overwritten: it will be accessed by multiple
+ * threads
+ */
+ move_gap(keys, keys->nr);
+
+ set_bit(BCH_FS_may_go_rw, &c->flags);
+
+ if (keys->nr || c->opts.fsck || !c->sb.clean || c->recovery_passes_explicit)
+ return bch2_fs_read_write_early(c);
+ return 0;
+}
+
+struct recovery_pass_fn {
+ int (*fn)(struct bch_fs *);
+ unsigned when;
+};
+
+static struct recovery_pass_fn recovery_pass_fns[] = {
+#define x(_fn, _id, _when) { .fn = bch2_##_fn, .when = _when },
+ BCH_RECOVERY_PASSES()
+#undef x
+};
+
+static const u8 passes_to_stable_map[] = {
+#define x(n, id, ...) [BCH_RECOVERY_PASS_##n] = BCH_RECOVERY_PASS_STABLE_##n,
+ BCH_RECOVERY_PASSES()
+#undef x
+};
+
+static enum bch_recovery_pass_stable bch2_recovery_pass_to_stable(enum bch_recovery_pass pass)
+{
+ return passes_to_stable_map[pass];
+}
+
+u64 bch2_recovery_passes_to_stable(u64 v)
+{
+ u64 ret = 0;
+ for (unsigned i = 0; i < ARRAY_SIZE(passes_to_stable_map); i++)
+ if (v & BIT_ULL(i))
+ ret |= BIT_ULL(passes_to_stable_map[i]);
+ return ret;
+}
+
+u64 bch2_recovery_passes_from_stable(u64 v)
+{
+ static const u8 map[] = {
+#define x(n, id, ...) [BCH_RECOVERY_PASS_STABLE_##n] = BCH_RECOVERY_PASS_##n,
+ BCH_RECOVERY_PASSES()
+#undef x
+ };
+
+ u64 ret = 0;
+ for (unsigned i = 0; i < ARRAY_SIZE(map); i++)
+ if (v & BIT_ULL(i))
+ ret |= BIT_ULL(map[i]);
+ return ret;
+}
+
+/*
+ * For when we need to rewind recovery passes and run a pass we skipped:
+ */
+int bch2_run_explicit_recovery_pass(struct bch_fs *c,
+ enum bch_recovery_pass pass)
+{
+ if (c->recovery_passes_explicit & BIT_ULL(pass))
+ return 0;
+
+ bch_info(c, "running explicit recovery pass %s (%u), currently at %s (%u)",
+ bch2_recovery_passes[pass], pass,
+ bch2_recovery_passes[c->curr_recovery_pass], c->curr_recovery_pass);
+
+ c->recovery_passes_explicit |= BIT_ULL(pass);
+
+ if (c->curr_recovery_pass >= pass) {
+ c->curr_recovery_pass = pass;
+ c->recovery_passes_complete &= (1ULL << pass) >> 1;
+ return -BCH_ERR_restart_recovery;
+ } else {
+ return 0;
+ }
+}
+
+int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *c,
+ enum bch_recovery_pass pass)
+{
+ enum bch_recovery_pass_stable s = bch2_recovery_pass_to_stable(pass);
+
+ mutex_lock(&c->sb_lock);
+ struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
+
+ if (!test_bit_le64(s, ext->recovery_passes_required)) {
+ __set_bit_le64(s, ext->recovery_passes_required);
+ bch2_write_super(c);
+ }
+ mutex_unlock(&c->sb_lock);
+
+ return bch2_run_explicit_recovery_pass(c, pass);
+}
+
+static void bch2_clear_recovery_pass_required(struct bch_fs *c,
+ enum bch_recovery_pass pass)
+{
+ enum bch_recovery_pass_stable s = bch2_recovery_pass_to_stable(pass);
+
+ mutex_lock(&c->sb_lock);
+ struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
+
+ if (test_bit_le64(s, ext->recovery_passes_required)) {
+ __clear_bit_le64(s, ext->recovery_passes_required);
+ bch2_write_super(c);
+ }
+ mutex_unlock(&c->sb_lock);
+}
+
+u64 bch2_fsck_recovery_passes(void)
+{
+ u64 ret = 0;
+
+ for (unsigned i = 0; i < ARRAY_SIZE(recovery_pass_fns); i++)
+ if (recovery_pass_fns[i].when & PASS_FSCK)
+ ret |= BIT_ULL(i);
+ return ret;
+}
+
+static bool should_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass)
+{
+ struct recovery_pass_fn *p = recovery_pass_fns + pass;
+
+ if (c->recovery_passes_explicit & BIT_ULL(pass))
+ return true;
+ if ((p->when & PASS_FSCK) && c->opts.fsck)
+ return true;
+ if ((p->when & PASS_UNCLEAN) && !c->sb.clean)
+ return true;
+ if (p->when & PASS_ALWAYS)
+ return true;
+ return false;
+}
+
+static int bch2_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass)
+{
+ struct recovery_pass_fn *p = recovery_pass_fns + pass;
+ int ret;
+
+ if (!(p->when & PASS_SILENT))
+ bch2_print(c, KERN_INFO bch2_log_msg(c, "%s..."),
+ bch2_recovery_passes[pass]);
+ ret = p->fn(c);
+ if (ret)
+ return ret;
+ if (!(p->when & PASS_SILENT))
+ bch2_print(c, KERN_CONT " done\n");
+
+ return 0;
+}
+
+int bch2_run_online_recovery_passes(struct bch_fs *c)
+{
+ int ret = 0;
+
+ for (unsigned i = 0; i < ARRAY_SIZE(recovery_pass_fns); i++) {
+ struct recovery_pass_fn *p = recovery_pass_fns + i;
+
+ if (!(p->when & PASS_ONLINE))
+ continue;
+
+ ret = bch2_run_recovery_pass(c, i);
+ if (bch2_err_matches(ret, BCH_ERR_restart_recovery)) {
+ i = c->curr_recovery_pass;
+ continue;
+ }
+ if (ret)
+ break;
+ }
+
+ return ret;
+}
+
+int bch2_run_recovery_passes(struct bch_fs *c)
+{
+ int ret = 0;
+
+ while (c->curr_recovery_pass < ARRAY_SIZE(recovery_pass_fns)) {
+ if (c->opts.recovery_pass_last &&
+ c->curr_recovery_pass > c->opts.recovery_pass_last)
+ break;
+
+ if (should_run_recovery_pass(c, c->curr_recovery_pass)) {
+ unsigned pass = c->curr_recovery_pass;
+
+ ret = bch2_run_recovery_pass(c, c->curr_recovery_pass);
+ if (bch2_err_matches(ret, BCH_ERR_restart_recovery) ||
+ (ret && c->curr_recovery_pass < pass))
+ continue;
+ if (ret)
+ break;
+
+ c->recovery_passes_complete |= BIT_ULL(c->curr_recovery_pass);
+ }
+
+ c->recovery_pass_done = max(c->recovery_pass_done, c->curr_recovery_pass);
+
+ if (!test_bit(BCH_FS_error, &c->flags))
+ bch2_clear_recovery_pass_required(c, c->curr_recovery_pass);
+
+ c->curr_recovery_pass++;
+ }
+
+ return ret;
+}
diff --git a/fs/bcachefs/recovery_passes.h b/fs/bcachefs/recovery_passes.h
new file mode 100644
index 00000000000000..99b464e127b80b
--- /dev/null
+++ b/fs/bcachefs/recovery_passes.h
@@ -0,0 +1,17 @@
+#ifndef _BCACHEFS_RECOVERY_PASSES_H
+#define _BCACHEFS_RECOVERY_PASSES_H
+
+extern const char * const bch2_recovery_passes[];
+
+u64 bch2_recovery_passes_to_stable(u64 v);
+u64 bch2_recovery_passes_from_stable(u64 v);
+
+u64 bch2_fsck_recovery_passes(void);
+
+int bch2_run_explicit_recovery_pass(struct bch_fs *, enum bch_recovery_pass);
+int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *, enum bch_recovery_pass);
+
+int bch2_run_online_recovery_passes(struct bch_fs *);
+int bch2_run_recovery_passes(struct bch_fs *);
+
+#endif /* _BCACHEFS_RECOVERY_PASSES_H */
diff --git a/fs/bcachefs/recovery_types.h b/fs/bcachefs/recovery_passes_types.h
index 4959e95e7c7465..773aea9a0080fd 100644
--- a/fs/bcachefs/recovery_types.h
+++ b/fs/bcachefs/recovery_passes_types.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _BCACHEFS_RECOVERY_TYPES_H
-#define _BCACHEFS_RECOVERY_TYPES_H
+#ifndef _BCACHEFS_RECOVERY_PASSES_TYPES_H
+#define _BCACHEFS_RECOVERY_PASSES_TYPES_H
#define PASS_SILENT BIT(0)
#define PASS_FSCK BIT(1)
@@ -13,6 +13,7 @@
* must never change:
*/
#define BCH_RECOVERY_PASSES() \
+ x(scan_for_btree_nodes, 37, 0) \
x(check_topology, 4, 0) \
x(alloc_read, 0, PASS_ALWAYS) \
x(stripes_read, 1, PASS_ALWAYS) \
@@ -31,13 +32,13 @@
x(check_alloc_to_lru_refs, 15, PASS_ONLINE|PASS_FSCK) \
x(fs_freespace_init, 16, PASS_ALWAYS|PASS_SILENT) \
x(bucket_gens_init, 17, 0) \
+ x(reconstruct_snapshots, 38, 0) \
x(check_snapshot_trees, 18, PASS_ONLINE|PASS_FSCK) \
x(check_snapshots, 19, PASS_ONLINE|PASS_FSCK) \
x(check_subvols, 20, PASS_ONLINE|PASS_FSCK) \
x(check_subvol_children, 35, PASS_ONLINE|PASS_FSCK) \
x(delete_dead_snapshots, 21, PASS_ONLINE|PASS_FSCK) \
x(fs_upgrade_for_subvolumes, 22, 0) \
- x(resume_logged_ops, 23, PASS_ALWAYS) \
x(check_inodes, 24, PASS_FSCK) \
x(check_extents, 25, PASS_FSCK) \
x(check_indirect_extents, 26, PASS_FSCK) \
@@ -47,6 +48,7 @@
x(check_subvolume_structure, 36, PASS_ONLINE|PASS_FSCK) \
x(check_directory_structure, 30, PASS_ONLINE|PASS_FSCK) \
x(check_nlinks, 31, PASS_FSCK) \
+ x(resume_logged_ops, 23, PASS_ALWAYS) \
x(delete_dead_inodes, 32, PASS_FSCK|PASS_UNCLEAN) \
x(fix_reflink_p, 33, 0) \
x(set_fs_needs_rebalance, 34, 0) \
@@ -56,6 +58,7 @@ enum bch_recovery_pass {
#define x(n, id, when) BCH_RECOVERY_PASS_##n,
BCH_RECOVERY_PASSES()
#undef x
+ BCH_RECOVERY_PASS_NR
};
/* But we also need stable identifiers that can be used in the superblock */
@@ -65,4 +68,4 @@ enum bch_recovery_pass_stable {
#undef x
};
-#endif /* _BCACHEFS_RECOVERY_TYPES_H */
+#endif /* _BCACHEFS_RECOVERY_PASSES_TYPES_H */
diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c
index c47c66c2b394dc..ff7864731a073d 100644
--- a/fs/bcachefs/reflink.c
+++ b/fs/bcachefs/reflink.c
@@ -185,8 +185,7 @@ not_found:
} else {
bkey_error_init(update);
update->k.p = p.k->p;
- update->k.p.offset = next_idx;
- update->k.size = next_idx - *idx;
+ update->k.size = p.k->size;
set_bkey_val_u64s(&update->k, 0);
}
diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c
index cc2672c120312c..678b9c20e2514b 100644
--- a/fs/bcachefs/replicas.c
+++ b/fs/bcachefs/replicas.c
@@ -6,12 +6,15 @@
#include "replicas.h"
#include "super-io.h"
+#include <linux/sort.h>
+
static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *,
struct bch_replicas_cpu *);
/* Some (buggy!) compilers don't allow memcmp to be passed as a pointer */
-static int bch2_memcmp(const void *l, const void *r, size_t size)
+static int bch2_memcmp(const void *l, const void *r, const void *priv)
{
+ size_t size = (size_t) priv;
return memcmp(l, r, size);
}
@@ -39,7 +42,8 @@ void bch2_replicas_entry_sort(struct bch_replicas_entry_v1 *e)
static void bch2_cpu_replicas_sort(struct bch_replicas_cpu *r)
{
- eytzinger0_sort(r->entries, r->nr, r->entry_size, bch2_memcmp, NULL);
+ eytzinger0_sort_r(r->entries, r->nr, r->entry_size,
+ bch2_memcmp, NULL, (void *)(size_t)r->entry_size);
}
static void bch2_replicas_entry_v0_to_text(struct printbuf *out,
@@ -228,7 +232,7 @@ static inline int __replicas_entry_idx(struct bch_replicas_cpu *r,
verify_replicas_entry(search);
-#define entry_cmp(_l, _r, size) memcmp(_l, _r, entry_size)
+#define entry_cmp(_l, _r) memcmp(_l, _r, entry_size)
idx = eytzinger0_find(r->entries, r->nr, r->entry_size,
entry_cmp, search);
#undef entry_cmp
@@ -824,10 +828,11 @@ static int bch2_cpu_replicas_validate(struct bch_replicas_cpu *cpu_r,
{
unsigned i;
- sort_cmp_size(cpu_r->entries,
- cpu_r->nr,
- cpu_r->entry_size,
- bch2_memcmp, NULL);
+ sort_r(cpu_r->entries,
+ cpu_r->nr,
+ cpu_r->entry_size,
+ bch2_memcmp, NULL,
+ (void *)(size_t)cpu_r->entry_size);
for (i = 0; i < cpu_r->nr; i++) {
struct bch_replicas_entry_v1 *e =
diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c
index e4396cb0bacb03..a98ef940b7a328 100644
--- a/fs/bcachefs/sb-downgrade.c
+++ b/fs/bcachefs/sb-downgrade.c
@@ -7,7 +7,7 @@
#include "bcachefs.h"
#include "darray.h"
-#include "recovery.h"
+#include "recovery_passes.h"
#include "sb-downgrade.h"
#include "sb-errors.h"
#include "super-io.h"
@@ -51,7 +51,10 @@
BCH_FSCK_ERR_subvol_fs_path_parent_wrong) \
x(btree_subvolume_children, \
BIT_ULL(BCH_RECOVERY_PASS_check_subvols), \
- BCH_FSCK_ERR_subvol_children_not_set)
+ BCH_FSCK_ERR_subvol_children_not_set) \
+ x(mi_btree_bitmap, \
+ BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \
+ BCH_FSCK_ERR_btree_bitmap_not_marked)
#define DOWNGRADE_TABLE()
diff --git a/fs/bcachefs/sb-errors_types.h b/fs/bcachefs/sb-errors_types.h
index 5178bf579f7c53..4ca6e7b0d8aaed 100644
--- a/fs/bcachefs/sb-errors_types.h
+++ b/fs/bcachefs/sb-errors_types.h
@@ -130,7 +130,7 @@
x(bucket_gens_nonzero_for_invalid_buckets, 122) \
x(need_discard_freespace_key_to_invalid_dev_bucket, 123) \
x(need_discard_freespace_key_bad, 124) \
- x(backpointer_pos_wrong, 125) \
+ x(backpointer_bucket_offset_wrong, 125) \
x(backpointer_to_missing_device, 126) \
x(backpointer_to_missing_alloc, 127) \
x(backpointer_to_missing_ptr, 128) \
@@ -265,7 +265,13 @@
x(subvol_children_bad, 257) \
x(subvol_loop, 258) \
x(subvol_unreachable, 259) \
- x(btree_node_bkey_bad_u64s, 260)
+ x(btree_node_bkey_bad_u64s, 260) \
+ x(btree_node_topology_empty_interior_node, 261) \
+ x(btree_ptr_v2_min_key_bad, 262) \
+ x(btree_root_unreadable_and_scan_found_nothing, 263) \
+ x(snapshot_node_missing, 264) \
+ x(dup_backpointer_to_bad_csum_extent, 265) \
+ x(btree_bitmap_not_marked, 266)
enum bch_sb_error_id {
#define x(t, n) BCH_FSCK_ERR_##t = n,
diff --git a/fs/bcachefs/sb-members.c b/fs/bcachefs/sb-members.c
index eff5ce18c69c06..522a969345e528 100644
--- a/fs/bcachefs/sb-members.c
+++ b/fs/bcachefs/sb-members.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
+#include "btree_cache.h"
#include "disk_groups.h"
#include "opts.h"
#include "replicas.h"
@@ -426,3 +427,55 @@ void bch2_dev_errors_reset(struct bch_dev *ca)
bch2_write_super(c);
mutex_unlock(&c->sb_lock);
}
+
+/*
+ * Per member "range has btree nodes" bitmap:
+ *
+ * This is so that if we ever have to run the btree node scan to repair we don't
+ * have to scan full devices:
+ */
+
+bool bch2_dev_btree_bitmap_marked(struct bch_fs *c, struct bkey_s_c k)
+{
+ bkey_for_each_ptr(bch2_bkey_ptrs_c(k), ptr)
+ if (!bch2_dev_btree_bitmap_marked_sectors(bch_dev_bkey_exists(c, ptr->dev),
+ ptr->offset, btree_sectors(c)))
+ return false;
+ return true;
+}
+
+static void __bch2_dev_btree_bitmap_mark(struct bch_sb_field_members_v2 *mi, unsigned dev,
+ u64 start, unsigned sectors)
+{
+ struct bch_member *m = __bch2_members_v2_get_mut(mi, dev);
+ u64 bitmap = le64_to_cpu(m->btree_allocated_bitmap);
+
+ u64 end = start + sectors;
+
+ int resize = ilog2(roundup_pow_of_two(end)) - (m->btree_bitmap_shift + 6);
+ if (resize > 0) {
+ u64 new_bitmap = 0;
+
+ for (unsigned i = 0; i < 64; i++)
+ if (bitmap & BIT_ULL(i))
+ new_bitmap |= BIT_ULL(i >> resize);
+ bitmap = new_bitmap;
+ m->btree_bitmap_shift += resize;
+ }
+
+ for (unsigned bit = sectors >> m->btree_bitmap_shift;
+ bit << m->btree_bitmap_shift < end;
+ bit++)
+ bitmap |= BIT_ULL(bit);
+
+ m->btree_allocated_bitmap = cpu_to_le64(bitmap);
+}
+
+void bch2_dev_btree_bitmap_mark(struct bch_fs *c, struct bkey_s_c k)
+{
+ lockdep_assert_held(&c->sb_lock);
+
+ struct bch_sb_field_members_v2 *mi = bch2_sb_field_get(c->disk_sb.sb, members_v2);
+ bkey_for_each_ptr(bch2_bkey_ptrs_c(k), ptr)
+ __bch2_dev_btree_bitmap_mark(mi, ptr->dev, ptr->offset, btree_sectors(c));
+}
diff --git a/fs/bcachefs/sb-members.h b/fs/bcachefs/sb-members.h
index be0a941832715a..b27c3e4467cf28 100644
--- a/fs/bcachefs/sb-members.h
+++ b/fs/bcachefs/sb-members.h
@@ -3,6 +3,7 @@
#define _BCACHEFS_SB_MEMBERS_H
#include "darray.h"
+#include "bkey_types.h"
extern char * const bch2_member_error_strs[];
@@ -220,6 +221,8 @@ static inline struct bch_member_cpu bch2_mi_to_cpu(struct bch_member *mi)
: 1,
.freespace_initialized = BCH_MEMBER_FREESPACE_INITIALIZED(mi),
.valid = bch2_member_exists(mi),
+ .btree_bitmap_shift = mi->btree_bitmap_shift,
+ .btree_allocated_bitmap = le64_to_cpu(mi->btree_allocated_bitmap),
};
}
@@ -228,4 +231,22 @@ void bch2_sb_members_from_cpu(struct bch_fs *);
void bch2_dev_io_errors_to_text(struct printbuf *, struct bch_dev *);
void bch2_dev_errors_reset(struct bch_dev *);
+static inline bool bch2_dev_btree_bitmap_marked_sectors(struct bch_dev *ca, u64 start, unsigned sectors)
+{
+ u64 end = start + sectors;
+
+ if (end > 64 << ca->mi.btree_bitmap_shift)
+ return false;
+
+ for (unsigned bit = sectors >> ca->mi.btree_bitmap_shift;
+ bit << ca->mi.btree_bitmap_shift < end;
+ bit++)
+ if (!(ca->mi.btree_allocated_bitmap & BIT_ULL(bit)))
+ return false;
+ return true;
+}
+
+bool bch2_dev_btree_bitmap_marked(struct bch_fs *, struct bkey_s_c);
+void bch2_dev_btree_bitmap_mark(struct bch_fs *, struct bkey_s_c);
+
#endif /* _BCACHEFS_SB_MEMBERS_H */
diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c
index 39debe814bf392..544322d5c25170 100644
--- a/fs/bcachefs/snapshot.c
+++ b/fs/bcachefs/snapshot.c
@@ -8,6 +8,7 @@
#include "errcode.h"
#include "error.h"
#include "fs.h"
+#include "recovery_passes.h"
#include "snapshot.h"
#include <linux/random.h>
@@ -93,8 +94,10 @@ static int bch2_snapshot_tree_create(struct btree_trans *trans,
static bool __bch2_snapshot_is_ancestor_early(struct snapshot_table *t, u32 id, u32 ancestor)
{
- while (id && id < ancestor)
- id = __snapshot_t(t, id)->parent;
+ while (id && id < ancestor) {
+ const struct snapshot_t *s = __snapshot_t(t, id);
+ id = s ? s->parent : 0;
+ }
return id == ancestor;
}
@@ -110,6 +113,8 @@ static bool bch2_snapshot_is_ancestor_early(struct bch_fs *c, u32 id, u32 ancest
static inline u32 get_ancestor_below(struct snapshot_table *t, u32 id, u32 ancestor)
{
const struct snapshot_t *s = __snapshot_t(t, id);
+ if (!s)
+ return 0;
if (s->skip[2] <= ancestor)
return s->skip[2];
@@ -120,6 +125,15 @@ static inline u32 get_ancestor_below(struct snapshot_table *t, u32 id, u32 ances
return s->parent;
}
+static bool test_ancestor_bitmap(struct snapshot_table *t, u32 id, u32 ancestor)
+{
+ const struct snapshot_t *s = __snapshot_t(t, id);
+ if (!s)
+ return false;
+
+ return test_bit(ancestor - id - 1, s->is_ancestor);
+}
+
bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor)
{
bool ret;
@@ -127,7 +141,7 @@ bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor)
rcu_read_lock();
struct snapshot_table *t = rcu_dereference(c->snapshots);
- if (unlikely(c->recovery_pass_done <= BCH_RECOVERY_PASS_check_snapshots)) {
+ if (unlikely(c->recovery_pass_done < BCH_RECOVERY_PASS_check_snapshots)) {
ret = __bch2_snapshot_is_ancestor_early(t, id, ancestor);
goto out;
}
@@ -135,13 +149,11 @@ bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor)
while (id && id < ancestor - IS_ANCESTOR_BITMAP)
id = get_ancestor_below(t, id, ancestor);
- if (id && id < ancestor) {
- ret = test_bit(ancestor - id - 1, __snapshot_t(t, id)->is_ancestor);
+ ret = id && id < ancestor
+ ? test_ancestor_bitmap(t, id, ancestor)
+ : id == ancestor;
- EBUG_ON(ret != __bch2_snapshot_is_ancestor_early(t, id, ancestor));
- } else {
- ret = id == ancestor;
- }
+ EBUG_ON(ret != __bch2_snapshot_is_ancestor_early(t, id, ancestor));
out:
rcu_read_unlock();
@@ -151,36 +163,39 @@ out:
static noinline struct snapshot_t *__snapshot_t_mut(struct bch_fs *c, u32 id)
{
size_t idx = U32_MAX - id;
- size_t new_size;
struct snapshot_table *new, *old;
- new_size = max(16UL, roundup_pow_of_two(idx + 1));
+ size_t new_bytes = kmalloc_size_roundup(struct_size(new, s, idx + 1));
+ size_t new_size = (new_bytes - sizeof(*new)) / sizeof(new->s[0]);
- new = kvzalloc(struct_size(new, s, new_size), GFP_KERNEL);
+ new = kvzalloc(new_bytes, GFP_KERNEL);
if (!new)
return NULL;
+ new->nr = new_size;
+
old = rcu_dereference_protected(c->snapshots, true);
if (old)
- memcpy(new->s,
- rcu_dereference_protected(c->snapshots, true)->s,
- sizeof(new->s[0]) * c->snapshot_table_size);
+ memcpy(new->s, old->s, sizeof(old->s[0]) * old->nr);
rcu_assign_pointer(c->snapshots, new);
- c->snapshot_table_size = new_size;
- kvfree_rcu_mightsleep(old);
+ kvfree_rcu(old, rcu);
- return &rcu_dereference_protected(c->snapshots, true)->s[idx];
+ return &rcu_dereference_protected(c->snapshots,
+ lockdep_is_held(&c->snapshot_table_lock))->s[idx];
}
static inline struct snapshot_t *snapshot_t_mut(struct bch_fs *c, u32 id)
{
size_t idx = U32_MAX - id;
+ struct snapshot_table *table =
+ rcu_dereference_protected(c->snapshots,
+ lockdep_is_held(&c->snapshot_table_lock));
lockdep_assert_held(&c->snapshot_table_lock);
- if (likely(idx < c->snapshot_table_size))
- return &rcu_dereference_protected(c->snapshots, true)->s[idx];
+ if (likely(table && idx < table->nr))
+ return &table->s[idx];
return __snapshot_t_mut(c, id);
}
@@ -567,6 +582,13 @@ static int check_snapshot_tree(struct btree_trans *trans,
u32 subvol_id;
ret = bch2_snapshot_tree_master_subvol(trans, root_id, &subvol_id);
+ bch_err_fn(c, ret);
+
+ if (bch2_err_matches(ret, ENOENT)) { /* nothing to be done here */
+ ret = 0;
+ goto err;
+ }
+
if (ret)
goto err;
@@ -724,7 +746,6 @@ static int check_snapshot(struct btree_trans *trans,
u32 parent_id = bch2_snapshot_parent_early(c, k.k->p.offset);
u32 real_depth;
struct printbuf buf = PRINTBUF;
- bool should_have_subvol;
u32 i, id;
int ret = 0;
@@ -770,7 +791,7 @@ static int check_snapshot(struct btree_trans *trans,
}
}
- should_have_subvol = BCH_SNAPSHOT_SUBVOL(&s) &&
+ bool should_have_subvol = BCH_SNAPSHOT_SUBVOL(&s) &&
!BCH_SNAPSHOT_DELETED(&s);
if (should_have_subvol) {
@@ -872,6 +893,154 @@ int bch2_check_snapshots(struct bch_fs *c)
return ret;
}
+static int check_snapshot_exists(struct btree_trans *trans, u32 id)
+{
+ struct bch_fs *c = trans->c;
+
+ if (bch2_snapshot_equiv(c, id))
+ return 0;
+
+ u32 tree_id;
+ int ret = bch2_snapshot_tree_create(trans, id, 0, &tree_id);
+ if (ret)
+ return ret;
+
+ struct bkey_i_snapshot *snapshot = bch2_trans_kmalloc(trans, sizeof(*snapshot));
+ ret = PTR_ERR_OR_ZERO(snapshot);
+ if (ret)
+ return ret;
+
+ bkey_snapshot_init(&snapshot->k_i);
+ snapshot->k.p = POS(0, id);
+ snapshot->v.tree = cpu_to_le32(tree_id);
+ snapshot->v.btime.lo = cpu_to_le64(bch2_current_time(c));
+
+ return bch2_btree_insert_trans(trans, BTREE_ID_snapshots, &snapshot->k_i, 0) ?:
+ bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0,
+ bkey_s_c_null, bkey_i_to_s(&snapshot->k_i), 0) ?:
+ bch2_snapshot_set_equiv(trans, bkey_i_to_s_c(&snapshot->k_i));
+}
+
+/* Figure out which snapshot nodes belong in the same tree: */
+struct snapshot_tree_reconstruct {
+ enum btree_id btree;
+ struct bpos cur_pos;
+ snapshot_id_list cur_ids;
+ DARRAY(snapshot_id_list) trees;
+};
+
+static void snapshot_tree_reconstruct_exit(struct snapshot_tree_reconstruct *r)
+{
+ darray_for_each(r->trees, i)
+ darray_exit(i);
+ darray_exit(&r->trees);
+ darray_exit(&r->cur_ids);
+}
+
+static inline bool same_snapshot(struct snapshot_tree_reconstruct *r, struct bpos pos)
+{
+ return r->btree == BTREE_ID_inodes
+ ? r->cur_pos.offset == pos.offset
+ : r->cur_pos.inode == pos.inode;
+}
+
+static inline bool snapshot_id_lists_have_common(snapshot_id_list *l, snapshot_id_list *r)
+{
+ darray_for_each(*l, i)
+ if (snapshot_list_has_id(r, *i))
+ return true;
+ return false;
+}
+
+static void snapshot_id_list_to_text(struct printbuf *out, snapshot_id_list *s)
+{
+ bool first = true;
+ darray_for_each(*s, i) {
+ if (!first)
+ prt_char(out, ' ');
+ first = false;
+ prt_printf(out, "%u", *i);
+ }
+}
+
+static int snapshot_tree_reconstruct_next(struct bch_fs *c, struct snapshot_tree_reconstruct *r)
+{
+ if (r->cur_ids.nr) {
+ darray_for_each(r->trees, i)
+ if (snapshot_id_lists_have_common(i, &r->cur_ids)) {
+ int ret = snapshot_list_merge(c, i, &r->cur_ids);
+ if (ret)
+ return ret;
+ goto out;
+ }
+ darray_push(&r->trees, r->cur_ids);
+ darray_init(&r->cur_ids);
+ }
+out:
+ r->cur_ids.nr = 0;
+ return 0;
+}
+
+static int get_snapshot_trees(struct bch_fs *c, struct snapshot_tree_reconstruct *r, struct bpos pos)
+{
+ if (!same_snapshot(r, pos))
+ snapshot_tree_reconstruct_next(c, r);
+ r->cur_pos = pos;
+ return snapshot_list_add_nodup(c, &r->cur_ids, pos.snapshot);
+}
+
+int bch2_reconstruct_snapshots(struct bch_fs *c)
+{
+ struct btree_trans *trans = bch2_trans_get(c);
+ struct printbuf buf = PRINTBUF;
+ struct snapshot_tree_reconstruct r = {};
+ int ret = 0;
+
+ for (unsigned btree = 0; btree < BTREE_ID_NR; btree++) {
+ if (btree_type_has_snapshots(btree)) {
+ r.btree = btree;
+
+ ret = for_each_btree_key(trans, iter, btree, POS_MIN,
+ BTREE_ITER_ALL_SNAPSHOTS|BTREE_ITER_PREFETCH, k, ({
+ get_snapshot_trees(c, &r, k.k->p);
+ }));
+ if (ret)
+ goto err;
+
+ snapshot_tree_reconstruct_next(c, &r);
+ }
+ }
+
+ darray_for_each(r.trees, t) {
+ printbuf_reset(&buf);
+ snapshot_id_list_to_text(&buf, t);
+
+ darray_for_each(*t, id) {
+ if (fsck_err_on(!bch2_snapshot_equiv(c, *id),
+ c, snapshot_node_missing,
+ "snapshot node %u from tree %s missing", *id, buf.buf)) {
+ if (t->nr > 1) {
+ bch_err(c, "cannot reconstruct snapshot trees with multiple nodes");
+ ret = -BCH_ERR_fsck_repair_unimplemented;
+ goto err;
+ }
+
+ ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
+ check_snapshot_exists(trans, *id));
+ if (ret)
+ goto err;
+ }
+ }
+ }
+fsck_err:
+err:
+ bch2_trans_put(trans);
+ snapshot_tree_reconstruct_exit(&r);
+ printbuf_exit(&buf);
+ bch_err_fn(c, ret);
+ return ret;
+}
+
/*
* Mark a snapshot as deleted, for future cleanup:
*/
@@ -1682,6 +1851,20 @@ int bch2_snapshots_read(struct bch_fs *c)
POS_MIN, 0, k,
(set_is_ancestor_bitmap(c, k.k->p.offset), 0)));
bch_err_fn(c, ret);
+
+ /*
+ * It's important that we check if we need to reconstruct snapshots
+ * before going RW, so we mark that pass as required in the superblock -
+ * otherwise, we could end up deleting keys with missing snapshot nodes
+ * instead
+ */
+ BUG_ON(!test_bit(BCH_FS_new_fs, &c->flags) &&
+ test_bit(BCH_FS_may_go_rw, &c->flags));
+
+ if (bch2_err_matches(ret, EIO) ||
+ (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_snapshots)))
+ ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_reconstruct_snapshots);
+
return ret;
}
diff --git a/fs/bcachefs/snapshot.h b/fs/bcachefs/snapshot.h
index 7c66ffc06385dd..b7d2fed37c4f31 100644
--- a/fs/bcachefs/snapshot.h
+++ b/fs/bcachefs/snapshot.h
@@ -33,7 +33,11 @@ int bch2_mark_snapshot(struct btree_trans *, enum btree_id, unsigned,
static inline struct snapshot_t *__snapshot_t(struct snapshot_table *t, u32 id)
{
- return &t->s[U32_MAX - id];
+ u32 idx = U32_MAX - id;
+
+ return likely(t && idx < t->nr)
+ ? &t->s[idx]
+ : NULL;
}
static inline const struct snapshot_t *snapshot_t(struct bch_fs *c, u32 id)
@@ -44,7 +48,8 @@ static inline const struct snapshot_t *snapshot_t(struct bch_fs *c, u32 id)
static inline u32 bch2_snapshot_tree(struct bch_fs *c, u32 id)
{
rcu_read_lock();
- id = snapshot_t(c, id)->tree;
+ const struct snapshot_t *s = snapshot_t(c, id);
+ id = s ? s->tree : 0;
rcu_read_unlock();
return id;
@@ -52,7 +57,8 @@ static inline u32 bch2_snapshot_tree(struct bch_fs *c, u32 id)
static inline u32 __bch2_snapshot_parent_early(struct bch_fs *c, u32 id)
{
- return snapshot_t(c, id)->parent;
+ const struct snapshot_t *s = snapshot_t(c, id);
+ return s ? s->parent : 0;
}
static inline u32 bch2_snapshot_parent_early(struct bch_fs *c, u32 id)
@@ -66,19 +72,19 @@ static inline u32 bch2_snapshot_parent_early(struct bch_fs *c, u32 id)
static inline u32 __bch2_snapshot_parent(struct bch_fs *c, u32 id)
{
-#ifdef CONFIG_BCACHEFS_DEBUG
- u32 parent = snapshot_t(c, id)->parent;
+ const struct snapshot_t *s = snapshot_t(c, id);
+ if (!s)
+ return 0;
- if (parent &&
- snapshot_t(c, id)->depth != snapshot_t(c, parent)->depth + 1)
+ u32 parent = s->parent;
+ if (IS_ENABLED(CONFIG_BCACHEFS_DEBU) &&
+ parent &&
+ s->depth != snapshot_t(c, parent)->depth + 1)
panic("id %u depth=%u parent %u depth=%u\n",
id, snapshot_t(c, id)->depth,
parent, snapshot_t(c, parent)->depth);
return parent;
-#else
- return snapshot_t(c, id)->parent;
-#endif
}
static inline u32 bch2_snapshot_parent(struct bch_fs *c, u32 id)
@@ -116,7 +122,8 @@ static inline u32 bch2_snapshot_root(struct bch_fs *c, u32 id)
static inline u32 __bch2_snapshot_equiv(struct bch_fs *c, u32 id)
{
- return snapshot_t(c, id)->equiv;
+ const struct snapshot_t *s = snapshot_t(c, id);
+ return s ? s->equiv : 0;
}
static inline u32 bch2_snapshot_equiv(struct bch_fs *c, u32 id)
@@ -133,38 +140,22 @@ static inline bool bch2_snapshot_is_equiv(struct bch_fs *c, u32 id)
return id == bch2_snapshot_equiv(c, id);
}
-static inline bool bch2_snapshot_is_internal_node(struct bch_fs *c, u32 id)
+static inline int bch2_snapshot_is_internal_node(struct bch_fs *c, u32 id)
{
- const struct snapshot_t *s;
- bool ret;
-
rcu_read_lock();
- s = snapshot_t(c, id);
- ret = s->children[0];
+ const struct snapshot_t *s = snapshot_t(c, id);
+ int ret = s ? s->children[0] : -BCH_ERR_invalid_snapshot_node;
rcu_read_unlock();
return ret;
}
-static inline u32 bch2_snapshot_is_leaf(struct bch_fs *c, u32 id)
-{
- return !bch2_snapshot_is_internal_node(c, id);
-}
-
-static inline u32 bch2_snapshot_sibling(struct bch_fs *c, u32 id)
+static inline int bch2_snapshot_is_leaf(struct bch_fs *c, u32 id)
{
- const struct snapshot_t *s;
- u32 parent = __bch2_snapshot_parent(c, id);
-
- if (!parent)
- return 0;
-
- s = snapshot_t(c, __bch2_snapshot_parent(c, id));
- if (id == s->children[0])
- return s->children[1];
- if (id == s->children[1])
- return s->children[0];
- return 0;
+ int ret = bch2_snapshot_is_internal_node(c, id);
+ if (ret < 0)
+ return ret;
+ return !ret;
}
static inline u32 bch2_snapshot_depth(struct bch_fs *c, u32 parent)
@@ -218,15 +209,34 @@ static inline bool snapshot_list_has_ancestor(struct bch_fs *c, snapshot_id_list
static inline int snapshot_list_add(struct bch_fs *c, snapshot_id_list *s, u32 id)
{
- int ret;
-
BUG_ON(snapshot_list_has_id(s, id));
- ret = darray_push(s, id);
+ int ret = darray_push(s, id);
if (ret)
bch_err(c, "error reallocating snapshot_id_list (size %zu)", s->size);
return ret;
}
+static inline int snapshot_list_add_nodup(struct bch_fs *c, snapshot_id_list *s, u32 id)
+{
+ int ret = snapshot_list_has_id(s, id)
+ ? 0
+ : darray_push(s, id);
+ if (ret)
+ bch_err(c, "error reallocating snapshot_id_list (size %zu)", s->size);
+ return ret;
+}
+
+static inline int snapshot_list_merge(struct bch_fs *c, snapshot_id_list *dst, snapshot_id_list *src)
+{
+ darray_for_each(*src, i) {
+ int ret = snapshot_list_add_nodup(c, dst, *i);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
int bch2_snapshot_lookup(struct btree_trans *trans, u32 id,
struct bch_snapshot *s);
int bch2_snapshot_get_subvol(struct btree_trans *, u32,
@@ -238,6 +248,7 @@ int bch2_snapshot_node_create(struct btree_trans *, u32,
int bch2_check_snapshot_trees(struct bch_fs *);
int bch2_check_snapshots(struct bch_fs *);
+int bch2_reconstruct_snapshots(struct bch_fs *);
int bch2_snapshot_node_set_deleted(struct btree_trans *, u32);
void bch2_delete_dead_snapshots_work(struct work_struct *);
@@ -249,7 +260,7 @@ static inline int bch2_key_has_snapshot_overwrites(struct btree_trans *trans,
struct bpos pos)
{
if (!btree_type_has_snapshots(id) ||
- bch2_snapshot_is_leaf(trans->c, pos.snapshot))
+ bch2_snapshot_is_leaf(trans->c, pos.snapshot) > 0)
return 0;
return __bch2_key_has_snapshot_overwrites(trans, id, pos);
diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c
index ce7aed12194238..88a79c82327687 100644
--- a/fs/bcachefs/subvolume.c
+++ b/fs/bcachefs/subvolume.c
@@ -595,6 +595,78 @@ err:
return ret;
}
+int bch2_initialize_subvolumes(struct bch_fs *c)
+{
+ struct bkey_i_snapshot_tree root_tree;
+ struct bkey_i_snapshot root_snapshot;
+ struct bkey_i_subvolume root_volume;
+ int ret;
+
+ bkey_snapshot_tree_init(&root_tree.k_i);
+ root_tree.k.p.offset = 1;
+ root_tree.v.master_subvol = cpu_to_le32(1);
+ root_tree.v.root_snapshot = cpu_to_le32(U32_MAX);
+
+ bkey_snapshot_init(&root_snapshot.k_i);
+ root_snapshot.k.p.offset = U32_MAX;
+ root_snapshot.v.flags = 0;
+ root_snapshot.v.parent = 0;
+ root_snapshot.v.subvol = cpu_to_le32(BCACHEFS_ROOT_SUBVOL);
+ root_snapshot.v.tree = cpu_to_le32(1);
+ SET_BCH_SNAPSHOT_SUBVOL(&root_snapshot.v, true);
+
+ bkey_subvolume_init(&root_volume.k_i);
+ root_volume.k.p.offset = BCACHEFS_ROOT_SUBVOL;
+ root_volume.v.flags = 0;
+ root_volume.v.snapshot = cpu_to_le32(U32_MAX);
+ root_volume.v.inode = cpu_to_le64(BCACHEFS_ROOT_INO);
+
+ ret = bch2_btree_insert(c, BTREE_ID_snapshot_trees, &root_tree.k_i, NULL, 0) ?:
+ bch2_btree_insert(c, BTREE_ID_snapshots, &root_snapshot.k_i, NULL, 0) ?:
+ bch2_btree_insert(c, BTREE_ID_subvolumes, &root_volume.k_i, NULL, 0);
+ bch_err_fn(c, ret);
+ return ret;
+}
+
+static int __bch2_fs_upgrade_for_subvolumes(struct btree_trans *trans)
+{
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ struct bch_inode_unpacked inode;
+ int ret;
+
+ k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes,
+ SPOS(0, BCACHEFS_ROOT_INO, U32_MAX), 0);
+ ret = bkey_err(k);
+ if (ret)
+ return ret;
+
+ if (!bkey_is_inode(k.k)) {
+ bch_err(trans->c, "root inode not found");
+ ret = -BCH_ERR_ENOENT_inode;
+ goto err;
+ }
+
+ ret = bch2_inode_unpack(k, &inode);
+ BUG_ON(ret);
+
+ inode.bi_subvol = BCACHEFS_ROOT_SUBVOL;
+
+ ret = bch2_inode_write(trans, &iter, &inode);
+err:
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+/* set bi_subvol on root inode */
+int bch2_fs_upgrade_for_subvolumes(struct bch_fs *c)
+{
+ int ret = bch2_trans_do(c, NULL, NULL, BCH_TRANS_COMMIT_lazy_rw,
+ __bch2_fs_upgrade_for_subvolumes(trans));
+ bch_err_fn(c, ret);
+ return ret;
+}
+
int bch2_fs_subvolumes_init(struct bch_fs *c)
{
INIT_WORK(&c->snapshot_delete_work, bch2_delete_dead_snapshots_work);
diff --git a/fs/bcachefs/subvolume.h b/fs/bcachefs/subvolume.h
index 903c05162c0688..d2015d549bd2a3 100644
--- a/fs/bcachefs/subvolume.h
+++ b/fs/bcachefs/subvolume.h
@@ -37,6 +37,9 @@ void bch2_delete_dead_snapshots_async(struct bch_fs *);
int bch2_subvolume_unlink(struct btree_trans *, u32);
int bch2_subvolume_create(struct btree_trans *, u64, u32, u32, u32 *, u32 *, bool);
+int bch2_initialize_subvolumes(struct bch_fs *);
+int bch2_fs_upgrade_for_subvolumes(struct bch_fs *);
+
int bch2_fs_subvolumes_init(struct bch_fs *);
#endif /* _BCACHEFS_SUBVOLUME_H */
diff --git a/fs/bcachefs/subvolume_types.h b/fs/bcachefs/subvolume_types.h
index ae644adfc39168..9b10c8947828e0 100644
--- a/fs/bcachefs/subvolume_types.h
+++ b/fs/bcachefs/subvolume_types.h
@@ -20,6 +20,8 @@ struct snapshot_t {
};
struct snapshot_table {
+ struct rcu_head rcu;
+ size_t nr;
#ifndef RUST_BINDGEN
DECLARE_FLEX_ARRAY(struct snapshot_t, s);
#else
diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c
index ad28e370b6404c..08ea3dbbbe97ce 100644
--- a/fs/bcachefs/super-io.c
+++ b/fs/bcachefs/super-io.c
@@ -8,7 +8,7 @@
#include "journal.h"
#include "journal_sb.h"
#include "journal_seq_blacklist.h"
-#include "recovery.h"
+#include "recovery_passes.h"
#include "replicas.h"
#include "quota.h"
#include "sb-clean.h"
@@ -143,7 +143,7 @@ void bch2_free_super(struct bch_sb_handle *sb)
{
kfree(sb->bio);
if (!IS_ERR_OR_NULL(sb->s_bdev_file))
- fput(sb->s_bdev_file);
+ bdev_fput(sb->s_bdev_file);
kfree(sb->holder);
kfree(sb->sb_name);
@@ -527,9 +527,11 @@ static void bch2_sb_update(struct bch_fs *c)
memset(c->sb.errors_silent, 0, sizeof(c->sb.errors_silent));
struct bch_sb_field_ext *ext = bch2_sb_field_get(src, ext);
- if (ext)
+ if (ext) {
le_bitvector_to_cpu(c->sb.errors_silent, (void *) ext->errors_silent,
sizeof(c->sb.errors_silent) * 8);
+ c->sb.btrees_lost_data = le64_to_cpu(ext->btrees_lost_data);
+ }
for_each_member_device(c, ca) {
struct bch_member m = bch2_sb_member_get(src, ca->dev_idx);
@@ -698,8 +700,11 @@ retry:
return -ENOMEM;
sb->sb_name = kstrdup(path, GFP_KERNEL);
- if (!sb->sb_name)
- return -ENOMEM;
+ if (!sb->sb_name) {
+ ret = -ENOMEM;
+ prt_printf(&err, "error allocating memory for sb_name");
+ goto err;
+ }
#ifndef __KERNEL__
if (opt_get(*opts, direct_io) == false)
@@ -1162,6 +1167,11 @@ static void bch2_sb_ext_to_text(struct printbuf *out, struct bch_sb *sb,
kfree(errors_silent);
}
+
+ prt_printf(out, "Btrees with missing data:");
+ prt_tab(out);
+ prt_bitflags(out, __bch2_btree_ids, le64_to_cpu(e->btrees_lost_data));
+ prt_newline(out);
}
static const struct bch_sb_field_ops bch_sb_field_ops_ext = {
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index 1ad6e5cd9476c8..8daf80a38d60c6 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -15,6 +15,7 @@
#include "btree_gc.h"
#include "btree_journal_iter.h"
#include "btree_key_cache.h"
+#include "btree_node_scan.h"
#include "btree_update_interior.h"
#include "btree_io.h"
#include "btree_write_buffer.h"
@@ -287,8 +288,13 @@ static void __bch2_fs_read_only(struct bch_fs *c)
if (test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags) &&
!test_bit(BCH_FS_emergency_ro, &c->flags))
set_bit(BCH_FS_clean_shutdown, &c->flags);
+
bch2_fs_journal_stop(&c->journal);
+ bch_info(c, "%sshutdown complete, journal seq %llu",
+ test_bit(BCH_FS_clean_shutdown, &c->flags) ? "" : "un",
+ c->journal.seq_ondisk);
+
/*
* After stopping journal:
*/
@@ -365,7 +371,7 @@ void bch2_fs_read_only(struct bch_fs *c)
!test_bit(BCH_FS_emergency_ro, &c->flags) &&
test_bit(BCH_FS_started, &c->flags) &&
test_bit(BCH_FS_clean_shutdown, &c->flags) &&
- !c->opts.norecovery) {
+ c->recovery_pass_done >= BCH_RECOVERY_PASS_journal_replay) {
BUG_ON(c->journal.last_empty_seq != journal_cur_seq(&c->journal));
BUG_ON(atomic_read(&c->btree_cache.dirty));
BUG_ON(atomic_long_read(&c->btree_key_cache.nr_dirty));
@@ -510,7 +516,8 @@ err:
int bch2_fs_read_write(struct bch_fs *c)
{
- if (c->opts.norecovery)
+ if (c->opts.recovery_pass_last &&
+ c->opts.recovery_pass_last < BCH_RECOVERY_PASS_journal_replay)
return -BCH_ERR_erofs_norecovery;
if (c->opts.nochanges)
@@ -535,6 +542,7 @@ static void __bch2_fs_free(struct bch_fs *c)
for (i = 0; i < BCH_TIME_STAT_NR; i++)
bch2_time_stats_exit(&c->times[i]);
+ bch2_find_btree_nodes_exit(&c->found_btree_nodes);
bch2_free_pending_node_rewrites(c);
bch2_fs_sb_errors_exit(c);
bch2_fs_counters_exit(c);
@@ -559,6 +567,7 @@ static void __bch2_fs_free(struct bch_fs *c)
bch2_io_clock_exit(&c->io_clock[READ]);
bch2_fs_compress_exit(c);
bch2_journal_keys_put_initial(c);
+ bch2_find_btree_nodes_exit(&c->found_btree_nodes);
BUG_ON(atomic_read(&c->journal_keys.ref));
bch2_fs_btree_write_buffer_exit(c);
percpu_free_rwsem(&c->mark_lock);
@@ -1015,8 +1024,16 @@ int bch2_fs_start(struct bch_fs *c)
for_each_online_member(c, ca)
bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx)->last_mount = cpu_to_le64(now);
+ struct bch_sb_field_ext *ext =
+ bch2_sb_field_get_minsize(&c->disk_sb, ext, sizeof(*ext) / sizeof(u64));
mutex_unlock(&c->sb_lock);
+ if (!ext) {
+ bch_err(c, "insufficient space in superblock for sb_field_ext");
+ ret = -BCH_ERR_ENOSPC_sb;
+ goto err;
+ }
+
for_each_rw_member(c, ca)
bch2_dev_allocator_add(c, ca);
bch2_recalc_capacity(c);
diff --git a/fs/bcachefs/super_types.h b/fs/bcachefs/super_types.h
index ec784d975f6655..11bcef170c2c22 100644
--- a/fs/bcachefs/super_types.h
+++ b/fs/bcachefs/super_types.h
@@ -37,6 +37,8 @@ struct bch_member_cpu {
u8 durability;
u8 freespace_initialized;
u8 valid;
+ u8 btree_bitmap_shift;
+ u64 btree_allocated_bitmap;
};
#endif /* _BCACHEFS_SUPER_TYPES_H */
diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c
index c86a93a8d8fc81..5be92fe3f4ea4e 100644
--- a/fs/bcachefs/sysfs.c
+++ b/fs/bcachefs/sysfs.c
@@ -17,7 +17,6 @@
#include "btree_iter.h"
#include "btree_key_cache.h"
#include "btree_update.h"
-#include "btree_update_interior.h"
#include "btree_gc.h"
#include "buckets.h"
#include "clock.h"
@@ -26,6 +25,7 @@
#include "ec.h"
#include "inode.h"
#include "journal.h"
+#include "journal_reclaim.h"
#include "keylist.h"
#include "move.h"
#include "movinggc.h"
@@ -139,6 +139,7 @@ do { \
write_attribute(trigger_gc);
write_attribute(trigger_discards);
write_attribute(trigger_invalidates);
+write_attribute(trigger_journal_flush);
write_attribute(prune_cache);
write_attribute(btree_wakeup);
rw_attribute(btree_gc_periodic);
@@ -166,7 +167,6 @@ read_attribute(btree_write_stats);
read_attribute(btree_cache_size);
read_attribute(compression_stats);
read_attribute(journal_debug);
-read_attribute(btree_updates);
read_attribute(btree_cache);
read_attribute(btree_key_cache);
read_attribute(stripes_heap);
@@ -415,9 +415,6 @@ SHOW(bch2_fs)
if (attr == &sysfs_journal_debug)
bch2_journal_debug_to_text(out, &c->journal);
- if (attr == &sysfs_btree_updates)
- bch2_btree_updates_to_text(out, c);
-
if (attr == &sysfs_btree_cache)
bch2_btree_cache_to_text(out, c);
@@ -505,7 +502,7 @@ STORE(bch2_fs)
/* Debugging: */
- if (!test_bit(BCH_FS_rw, &c->flags))
+ if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_sysfs))
return -EROFS;
if (attr == &sysfs_prune_cache) {
@@ -538,6 +535,11 @@ STORE(bch2_fs)
if (attr == &sysfs_trigger_invalidates)
bch2_do_invalidates(c);
+ if (attr == &sysfs_trigger_journal_flush) {
+ bch2_journal_flush_all_pins(&c->journal);
+ bch2_journal_meta(&c->journal);
+ }
+
#ifdef CONFIG_BCACHEFS_TESTS
if (attr == &sysfs_perf_test) {
char *tmp = kstrdup(buf, GFP_KERNEL), *p = tmp;
@@ -558,6 +560,7 @@ STORE(bch2_fs)
size = ret;
}
#endif
+ bch2_write_ref_put(c, BCH_WRITE_REF_sysfs);
return size;
}
SYSFS_OPS(bch2_fs);
@@ -639,7 +642,6 @@ SYSFS_OPS(bch2_fs_internal);
struct attribute *bch2_fs_internal_files[] = {
&sysfs_flags,
&sysfs_journal_debug,
- &sysfs_btree_updates,
&sysfs_btree_cache,
&sysfs_btree_key_cache,
&sysfs_new_stripes,
@@ -657,6 +659,7 @@ struct attribute *bch2_fs_internal_files[] = {
&sysfs_trigger_gc,
&sysfs_trigger_discards,
&sysfs_trigger_invalidates,
+ &sysfs_trigger_journal_flush,
&sysfs_prune_cache,
&sysfs_btree_wakeup,
diff --git a/fs/bcachefs/tests.c b/fs/bcachefs/tests.c
index b3fe9fc577470f..bfec656f94c075 100644
--- a/fs/bcachefs/tests.c
+++ b/fs/bcachefs/tests.c
@@ -672,7 +672,7 @@ static int __do_delete(struct btree_trans *trans, struct bpos pos)
bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs, pos,
BTREE_ITER_INTENT);
- k = bch2_btree_iter_peek(&iter);
+ k = bch2_btree_iter_peek_upto(&iter, POS(0, U64_MAX));
ret = bkey_err(k);
if (ret)
goto err;
diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c
index 216fadf16928b9..92c6ad75e702ab 100644
--- a/fs/bcachefs/util.c
+++ b/fs/bcachefs/util.c
@@ -707,149 +707,6 @@ void memcpy_from_bio(void *dst, struct bio *src, struct bvec_iter src_iter)
}
}
-static int alignment_ok(const void *base, size_t align)
-{
- return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ||
- ((unsigned long)base & (align - 1)) == 0;
-}
-
-static void u32_swap(void *a, void *b, size_t size)
-{
- u32 t = *(u32 *)a;
- *(u32 *)a = *(u32 *)b;
- *(u32 *)b = t;
-}
-
-static void u64_swap(void *a, void *b, size_t size)
-{
- u64 t = *(u64 *)a;
- *(u64 *)a = *(u64 *)b;
- *(u64 *)b = t;
-}
-
-static void generic_swap(void *a, void *b, size_t size)
-{
- char t;
-
- do {
- t = *(char *)a;
- *(char *)a++ = *(char *)b;
- *(char *)b++ = t;
- } while (--size > 0);
-}
-
-static inline int do_cmp(void *base, size_t n, size_t size,
- int (*cmp_func)(const void *, const void *, size_t),
- size_t l, size_t r)
-{
- return cmp_func(base + inorder_to_eytzinger0(l, n) * size,
- base + inorder_to_eytzinger0(r, n) * size,
- size);
-}
-
-static inline void do_swap(void *base, size_t n, size_t size,
- void (*swap_func)(void *, void *, size_t),
- size_t l, size_t r)
-{
- swap_func(base + inorder_to_eytzinger0(l, n) * size,
- base + inorder_to_eytzinger0(r, n) * size,
- size);
-}
-
-void eytzinger0_sort(void *base, size_t n, size_t size,
- int (*cmp_func)(const void *, const void *, size_t),
- void (*swap_func)(void *, void *, size_t))
-{
- int i, c, r;
-
- if (!swap_func) {
- if (size == 4 && alignment_ok(base, 4))
- swap_func = u32_swap;
- else if (size == 8 && alignment_ok(base, 8))
- swap_func = u64_swap;
- else
- swap_func = generic_swap;
- }
-
- /* heapify */
- for (i = n / 2 - 1; i >= 0; --i) {
- for (r = i; r * 2 + 1 < n; r = c) {
- c = r * 2 + 1;
-
- if (c + 1 < n &&
- do_cmp(base, n, size, cmp_func, c, c + 1) < 0)
- c++;
-
- if (do_cmp(base, n, size, cmp_func, r, c) >= 0)
- break;
-
- do_swap(base, n, size, swap_func, r, c);
- }
- }
-
- /* sort */
- for (i = n - 1; i > 0; --i) {
- do_swap(base, n, size, swap_func, 0, i);
-
- for (r = 0; r * 2 + 1 < i; r = c) {
- c = r * 2 + 1;
-
- if (c + 1 < i &&
- do_cmp(base, n, size, cmp_func, c, c + 1) < 0)
- c++;
-
- if (do_cmp(base, n, size, cmp_func, r, c) >= 0)
- break;
-
- do_swap(base, n, size, swap_func, r, c);
- }
- }
-}
-
-void sort_cmp_size(void *base, size_t num, size_t size,
- int (*cmp_func)(const void *, const void *, size_t),
- void (*swap_func)(void *, void *, size_t size))
-{
- /* pre-scale counters for performance */
- int i = (num/2 - 1) * size, n = num * size, c, r;
-
- if (!swap_func) {
- if (size == 4 && alignment_ok(base, 4))
- swap_func = u32_swap;
- else if (size == 8 && alignment_ok(base, 8))
- swap_func = u64_swap;
- else
- swap_func = generic_swap;
- }
-
- /* heapify */
- for ( ; i >= 0; i -= size) {
- for (r = i; r * 2 + size < n; r = c) {
- c = r * 2 + size;
- if (c < n - size &&
- cmp_func(base + c, base + c + size, size) < 0)
- c += size;
- if (cmp_func(base + r, base + c, size) >= 0)
- break;
- swap_func(base + r, base + c, size);
- }
- }
-
- /* sort */
- for (i = n - size; i > 0; i -= size) {
- swap_func(base, base + i, size);
- for (r = 0; r * 2 + size < i; r = c) {
- c = r * 2 + size;
- if (c < i - size &&
- cmp_func(base + c, base + c + size, size) < 0)
- c += size;
- if (cmp_func(base + r, base + c, size) >= 0)
- break;
- swap_func(base + r, base + c, size);
- }
- }
-}
-
#if 0
void eytzinger1_test(void)
{
diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h
index 175aee3074c7d5..5cf885b09986ac 100644
--- a/fs/bcachefs/util.h
+++ b/fs/bcachefs/util.h
@@ -631,10 +631,6 @@ static inline void memset_u64s_tail(void *s, int c, unsigned bytes)
memset(s + bytes, c, rem);
}
-void sort_cmp_size(void *base, size_t num, size_t size,
- int (*cmp_func)(const void *, const void *, size_t),
- void (*swap_func)(void *, void *, size_t));
-
/* just the memmove, doesn't update @_nr */
#define __array_insert_item(_array, _nr, _pos) \
memmove(&(_array)[(_pos) + 1], \
@@ -792,9 +788,27 @@ static inline int copy_from_user_errcode(void *to, const void __user *from, unsi
#endif
+static inline void mod_bit(long nr, volatile unsigned long *addr, bool v)
+{
+ if (v)
+ set_bit(nr, addr);
+ else
+ clear_bit(nr, addr);
+}
+
static inline void __set_bit_le64(size_t bit, __le64 *addr)
{
addr[bit / 64] |= cpu_to_le64(BIT_ULL(bit % 64));
}
+static inline void __clear_bit_le64(size_t bit, __le64 *addr)
+{
+ addr[bit / 64] &= ~cpu_to_le64(BIT_ULL(bit % 64));
+}
+
+static inline bool test_bit_le64(size_t bit, __le64 *addr)
+{
+ return (addr[bit / 64] & cpu_to_le64(BIT_ULL(bit % 64))) != 0;
+}
+
#endif /* _BCACHEFS_UTIL_H */
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index dd6f566a383f00..121ab890bd0557 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1133,6 +1133,9 @@ __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
if (ret)
return ret;
+ ret = btrfs_record_root_in_trans(trans, node->root);
+ if (ret)
+ return ret;
ret = btrfs_update_delayed_inode(trans, node->root, path, node);
return ret;
}
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index beedd6ed64d39b..257d044bca9158 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3464,6 +3464,14 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
if (root_id != BTRFS_TREE_LOG_OBJECTID) {
struct btrfs_ref generic_ref = { 0 };
+ /*
+ * Assert that the extent buffer is not cleared due to
+ * EXTENT_BUFFER_ZONED_ZEROOUT. Please refer
+ * btrfs_clear_buffer_dirty() and btree_csum_one_bio() for
+ * detail.
+ */
+ ASSERT(btrfs_header_bytenr(buf) != 0);
+
btrfs_init_generic_ref(&generic_ref, BTRFS_DROP_DELAYED_REF,
buf->start, buf->len, parent,
btrfs_header_owner(buf));
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 61594eaf1f8969..2776112dbdf8d4 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -681,31 +681,21 @@ static void end_bbio_data_read(struct btrfs_bio *bbio)
int btrfs_alloc_page_array(unsigned int nr_pages, struct page **page_array,
gfp_t extra_gfp)
{
+ const gfp_t gfp = GFP_NOFS | extra_gfp;
unsigned int allocated;
for (allocated = 0; allocated < nr_pages;) {
unsigned int last = allocated;
- allocated = alloc_pages_bulk_array(GFP_NOFS | extra_gfp,
- nr_pages, page_array);
-
- if (allocated == nr_pages)
- return 0;
-
- /*
- * During this iteration, no page could be allocated, even
- * though alloc_pages_bulk_array() falls back to alloc_page()
- * if it could not bulk-allocate. So we must be out of memory.
- */
- if (allocated == last) {
+ allocated = alloc_pages_bulk_array(gfp, nr_pages, page_array);
+ if (unlikely(allocated == last)) {
+ /* No progress, fail and do cleanup. */
for (int i = 0; i < allocated; i++) {
__free_page(page_array[i]);
page_array[i] = NULL;
}
return -ENOMEM;
}
-
- memalloc_retry_wait(GFP_NOFS);
}
return 0;
}
@@ -4154,7 +4144,7 @@ void btrfs_clear_buffer_dirty(struct btrfs_trans_handle *trans,
* The actual zeroout of the buffer will happen later in
* btree_csum_one_bio.
*/
- if (btrfs_is_zoned(fs_info)) {
+ if (btrfs_is_zoned(fs_info) && test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
set_bit(EXTENT_BUFFER_ZONED_ZEROOUT, &eb->bflags);
return;
}
@@ -4193,6 +4183,7 @@ void set_extent_buffer_dirty(struct extent_buffer *eb)
num_folios = num_extent_folios(eb);
WARN_ON(atomic_read(&eb->refs) == 0);
WARN_ON(!test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags));
+ WARN_ON(test_bit(EXTENT_BUFFER_ZONED_ZEROOUT, &eb->bflags));
if (!was_dirty) {
bool subpage = eb->fs_info->nodesize < PAGE_SIZE;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 37701531eeb1ba..c65fe5de40220d 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2533,7 +2533,7 @@ void btrfs_clear_delalloc_extent(struct btrfs_inode *inode,
*/
if (bits & EXTENT_CLEAR_META_RESV &&
root != fs_info->tree_root)
- btrfs_delalloc_release_metadata(inode, len, false);
+ btrfs_delalloc_release_metadata(inode, len, true);
/* For sanity tests. */
if (btrfs_is_testing(fs_info))
@@ -4503,6 +4503,7 @@ int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry)
struct btrfs_trans_handle *trans;
struct btrfs_block_rsv block_rsv;
u64 root_flags;
+ u64 qgroup_reserved = 0;
int ret;
down_write(&fs_info->subvol_sem);
@@ -4547,12 +4548,20 @@ int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry)
ret = btrfs_subvolume_reserve_metadata(root, &block_rsv, 5, true);
if (ret)
goto out_undead;
+ qgroup_reserved = block_rsv.qgroup_rsv_reserved;
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
goto out_release;
}
+ ret = btrfs_record_root_in_trans(trans, root);
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
+ goto out_end_trans;
+ }
+ btrfs_qgroup_convert_reserved_meta(root, qgroup_reserved);
+ qgroup_reserved = 0;
trans->block_rsv = &block_rsv;
trans->bytes_reserved = block_rsv.size;
@@ -4611,7 +4620,9 @@ out_end_trans:
ret = btrfs_end_transaction(trans);
inode->i_flags |= S_DEAD;
out_release:
- btrfs_subvolume_release_metadata(root, &block_rsv);
+ btrfs_block_rsv_release(fs_info, &block_rsv, (u64)-1, NULL);
+ if (qgroup_reserved)
+ btrfs_qgroup_free_meta_prealloc(root, qgroup_reserved);
out_undead:
if (ret) {
spin_lock(&dest->root_item_lock);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 294e31edec9d3b..55f3ba6a831ca1 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -613,6 +613,7 @@ static noinline int create_subvol(struct mnt_idmap *idmap,
int ret;
dev_t anon_dev;
u64 objectid;
+ u64 qgroup_reserved = 0;
root_item = kzalloc(sizeof(*root_item), GFP_KERNEL);
if (!root_item)
@@ -650,13 +651,18 @@ static noinline int create_subvol(struct mnt_idmap *idmap,
trans_num_items, false);
if (ret)
goto out_new_inode_args;
+ qgroup_reserved = block_rsv.qgroup_rsv_reserved;
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
- btrfs_subvolume_release_metadata(root, &block_rsv);
- goto out_new_inode_args;
+ goto out_release_rsv;
}
+ ret = btrfs_record_root_in_trans(trans, BTRFS_I(dir)->root);
+ if (ret)
+ goto out;
+ btrfs_qgroup_convert_reserved_meta(root, qgroup_reserved);
+ qgroup_reserved = 0;
trans->block_rsv = &block_rsv;
trans->bytes_reserved = block_rsv.size;
/* Tree log can't currently deal with an inode which is a new root. */
@@ -767,9 +773,11 @@ static noinline int create_subvol(struct mnt_idmap *idmap,
out:
trans->block_rsv = NULL;
trans->bytes_reserved = 0;
- btrfs_subvolume_release_metadata(root, &block_rsv);
-
btrfs_end_transaction(trans);
+out_release_rsv:
+ btrfs_block_rsv_release(fs_info, &block_rsv, (u64)-1, NULL);
+ if (qgroup_reserved)
+ btrfs_qgroup_free_meta_prealloc(root, qgroup_reserved);
out_new_inode_args:
btrfs_new_inode_args_destroy(&new_inode_args);
out_inode:
@@ -791,6 +799,8 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
struct btrfs_pending_snapshot *pending_snapshot;
unsigned int trans_num_items;
struct btrfs_trans_handle *trans;
+ struct btrfs_block_rsv *block_rsv;
+ u64 qgroup_reserved = 0;
int ret;
/* We do not support snapshotting right now. */
@@ -827,19 +837,19 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
goto free_pending;
}
- btrfs_init_block_rsv(&pending_snapshot->block_rsv,
- BTRFS_BLOCK_RSV_TEMP);
+ block_rsv = &pending_snapshot->block_rsv;
+ btrfs_init_block_rsv(block_rsv, BTRFS_BLOCK_RSV_TEMP);
/*
* 1 to add dir item
* 1 to add dir index
* 1 to update parent inode item
*/
trans_num_items = create_subvol_num_items(inherit) + 3;
- ret = btrfs_subvolume_reserve_metadata(BTRFS_I(dir)->root,
- &pending_snapshot->block_rsv,
+ ret = btrfs_subvolume_reserve_metadata(BTRFS_I(dir)->root, block_rsv,
trans_num_items, false);
if (ret)
goto free_pending;
+ qgroup_reserved = block_rsv->qgroup_rsv_reserved;
pending_snapshot->dentry = dentry;
pending_snapshot->root = root;
@@ -852,6 +862,13 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
ret = PTR_ERR(trans);
goto fail;
}
+ ret = btrfs_record_root_in_trans(trans, BTRFS_I(dir)->root);
+ if (ret) {
+ btrfs_end_transaction(trans);
+ goto fail;
+ }
+ btrfs_qgroup_convert_reserved_meta(root, qgroup_reserved);
+ qgroup_reserved = 0;
trans->pending_snapshot = pending_snapshot;
@@ -881,7 +898,9 @@ fail:
if (ret && pending_snapshot->snap)
pending_snapshot->snap->anon_dev = 0;
btrfs_put_root(pending_snapshot->snap);
- btrfs_subvolume_release_metadata(root, &pending_snapshot->block_rsv);
+ btrfs_block_rsv_release(fs_info, block_rsv, (u64)-1, NULL);
+ if (qgroup_reserved)
+ btrfs_qgroup_free_meta_prealloc(root, qgroup_reserved);
free_pending:
if (pending_snapshot->anon_dev)
free_anon_bdev(pending_snapshot->anon_dev);
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 5f90f0605b12f7..cf8820ce7aa297 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -4495,6 +4495,8 @@ void btrfs_qgroup_convert_reserved_meta(struct btrfs_root *root, int num_bytes)
BTRFS_QGROUP_RSV_META_PREALLOC);
trace_qgroup_meta_convert(root, num_bytes);
qgroup_convert_meta(fs_info, root->root_key.objectid, num_bytes);
+ if (!sb_rdonly(fs_info->sb))
+ add_root_meta_rsv(root, num_bytes, BTRFS_QGROUP_RSV_META_PERTRANS);
}
/*
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 4bb538a372ce56..7007f9e0c97282 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -548,13 +548,3 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
}
return ret;
}
-
-void btrfs_subvolume_release_metadata(struct btrfs_root *root,
- struct btrfs_block_rsv *rsv)
-{
- struct btrfs_fs_info *fs_info = root->fs_info;
- u64 qgroup_to_release;
-
- btrfs_block_rsv_release(fs_info, rsv, (u64)-1, &qgroup_to_release);
- btrfs_qgroup_convert_reserved_meta(root, qgroup_to_release);
-}
diff --git a/fs/btrfs/root-tree.h b/fs/btrfs/root-tree.h
index 6f929cf3bd4967..8f5739e732b9b6 100644
--- a/fs/btrfs/root-tree.h
+++ b/fs/btrfs/root-tree.h
@@ -18,8 +18,6 @@ struct btrfs_trans_handle;
int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
struct btrfs_block_rsv *rsv,
int nitems, bool use_global_rsv);
-void btrfs_subvolume_release_metadata(struct btrfs_root *root,
- struct btrfs_block_rsv *rsv);
int btrfs_add_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
u64 ref_id, u64 dirid, u64 sequence,
const struct fscrypt_str *name);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 46e8426adf4f15..85f359e0e0a7f2 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -745,14 +745,6 @@ again:
h->reloc_reserved = reloc_reserved;
}
- /*
- * Now that we have found a transaction to be a part of, convert the
- * qgroup reservation from prealloc to pertrans. A different transaction
- * can't race in and free our pertrans out from under us.
- */
- if (qgroup_reserved)
- btrfs_qgroup_convert_reserved_meta(root, qgroup_reserved);
-
got_it:
if (!current->journal_info)
current->journal_info = h;
@@ -786,8 +778,15 @@ got_it:
* not just freed.
*/
btrfs_end_transaction(h);
- return ERR_PTR(ret);
+ goto reserve_fail;
}
+ /*
+ * Now that we have found a transaction to be a part of, convert the
+ * qgroup reservation from prealloc to pertrans. A different transaction
+ * can't race in and free our pertrans out from under us.
+ */
+ if (qgroup_reserved)
+ btrfs_qgroup_convert_reserved_meta(root, qgroup_reserved);
return h;
@@ -1495,6 +1494,7 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans)
radix_tree_tag_clear(&fs_info->fs_roots_radix,
(unsigned long)root->root_key.objectid,
BTRFS_ROOT_TRANS_TAG);
+ btrfs_qgroup_free_meta_all_pertrans(root);
spin_unlock(&fs_info->fs_roots_radix_lock);
btrfs_free_log(trans, root);
@@ -1519,7 +1519,6 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans)
if (ret2)
return ret2;
spin_lock(&fs_info->fs_roots_radix_lock);
- btrfs_qgroup_free_meta_all_pertrans(root);
}
}
spin_unlock(&fs_info->fs_roots_radix_lock);
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 1340d77124ae4d..ee9caf7916fb95 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -795,8 +795,10 @@ static int ceph_writepage(struct page *page, struct writeback_control *wbc)
ihold(inode);
if (wbc->sync_mode == WB_SYNC_NONE &&
- ceph_inode_to_fs_client(inode)->write_congested)
+ ceph_inode_to_fs_client(inode)->write_congested) {
+ redirty_page_for_writepage(wbc, page);
return AOP_WRITEPAGE_ACTIVATE;
+ }
wait_on_page_fscache(page);
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 55051ad09c1919..c4941ba245ac3d 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -4783,13 +4783,13 @@ int ceph_drop_caps_for_unlink(struct inode *inode)
doutc(mdsc->fsc->client, "%p %llx.%llx\n", inode,
ceph_vinop(inode));
- spin_lock(&mdsc->cap_unlink_delay_lock);
+ spin_lock(&mdsc->cap_delay_lock);
ci->i_ceph_flags |= CEPH_I_FLUSH;
if (!list_empty(&ci->i_cap_delay_list))
list_del_init(&ci->i_cap_delay_list);
list_add_tail(&ci->i_cap_delay_list,
&mdsc->cap_unlink_delay_list);
- spin_unlock(&mdsc->cap_unlink_delay_lock);
+ spin_unlock(&mdsc->cap_delay_lock);
/*
* Fire the work immediately, because the MDS maybe
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 3ab9c268a8bb39..360b686c3c67cf 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2504,7 +2504,7 @@ static void ceph_cap_unlink_work(struct work_struct *work)
struct ceph_client *cl = mdsc->fsc->client;
doutc(cl, "begin\n");
- spin_lock(&mdsc->cap_unlink_delay_lock);
+ spin_lock(&mdsc->cap_delay_lock);
while (!list_empty(&mdsc->cap_unlink_delay_list)) {
struct ceph_inode_info *ci;
struct inode *inode;
@@ -2516,15 +2516,15 @@ static void ceph_cap_unlink_work(struct work_struct *work)
inode = igrab(&ci->netfs.inode);
if (inode) {
- spin_unlock(&mdsc->cap_unlink_delay_lock);
+ spin_unlock(&mdsc->cap_delay_lock);
doutc(cl, "on %p %llx.%llx\n", inode,
ceph_vinop(inode));
ceph_check_caps(ci, CHECK_CAPS_FLUSH);
iput(inode);
- spin_lock(&mdsc->cap_unlink_delay_lock);
+ spin_lock(&mdsc->cap_delay_lock);
}
}
- spin_unlock(&mdsc->cap_unlink_delay_lock);
+ spin_unlock(&mdsc->cap_delay_lock);
doutc(cl, "done\n");
}
@@ -5404,7 +5404,6 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
INIT_LIST_HEAD(&mdsc->cap_wait_list);
spin_lock_init(&mdsc->cap_delay_lock);
INIT_LIST_HEAD(&mdsc->cap_unlink_delay_list);
- spin_lock_init(&mdsc->cap_unlink_delay_lock);
INIT_LIST_HEAD(&mdsc->snap_flush_list);
spin_lock_init(&mdsc->snap_flush_lock);
mdsc->last_cap_flush_tid = 1;
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 03f8ff00874f72..b88e8041522412 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -461,9 +461,8 @@ struct ceph_mds_client {
struct delayed_work delayed_work; /* delayed work */
unsigned long last_renew_caps; /* last time we renewed our caps */
struct list_head cap_delay_list; /* caps with delayed release */
- spinlock_t cap_delay_lock; /* protects cap_delay_list */
struct list_head cap_unlink_delay_list; /* caps with delayed release for unlink */
- spinlock_t cap_unlink_delay_lock; /* protects cap_unlink_delay_list */
+ spinlock_t cap_delay_lock; /* protects cap_delay_list and cap_unlink_delay_list */
struct list_head snap_flush_list; /* cap_snaps ready to flush */
spinlock_t snap_flush_lock;
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 39e75131fd5aa0..9901057a15ba79 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -495,7 +495,7 @@ static void cramfs_kill_sb(struct super_block *sb)
sb->s_mtd = NULL;
} else if (IS_ENABLED(CONFIG_CRAMFS_BLOCKDEV) && sb->s_bdev) {
sync_blockdev(sb->s_bdev);
- fput(sb->s_bdev_file);
+ bdev_fput(sb->s_bdev_file);
}
kfree(sbi);
}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index cfb8449c731f9a..044135796f2b6e 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -5668,7 +5668,7 @@ failed_mount:
brelse(sbi->s_sbh);
if (sbi->s_journal_bdev_file) {
invalidate_bdev(file_bdev(sbi->s_journal_bdev_file));
- fput(sbi->s_journal_bdev_file);
+ bdev_fput(sbi->s_journal_bdev_file);
}
out_fail:
invalidate_bdev(sb->s_bdev);
@@ -5913,7 +5913,7 @@ static struct file *ext4_get_journal_blkdev(struct super_block *sb,
out_bh:
brelse(bh);
out_bdev:
- fput(bdev_file);
+ bdev_fput(bdev_file);
return ERR_PTR(errno);
}
@@ -5952,7 +5952,7 @@ static journal_t *ext4_open_dev_journal(struct super_block *sb,
out_journal:
jbd2_journal_destroy(journal);
out_bdev:
- fput(bdev_file);
+ bdev_fput(bdev_file);
return ERR_PTR(errno);
}
@@ -7327,7 +7327,7 @@ static void ext4_kill_sb(struct super_block *sb)
kill_block_super(sb);
if (bdev_file)
- fput(bdev_file);
+ bdev_fput(bdev_file);
}
static struct file_system_type ext4_fs_type = {
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index a6867f26f14183..a4bc26dfdb1af5 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1558,7 +1558,7 @@ static void destroy_device_list(struct f2fs_sb_info *sbi)
for (i = 0; i < sbi->s_ndevs; i++) {
if (i > 0)
- fput(FDEV(i).bdev_file);
+ bdev_fput(FDEV(i).bdev_file);
#ifdef CONFIG_BLK_DEV_ZONED
kvfree(FDEV(i).blkz_seq);
#endif
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
index b6cad106c37e44..0b2da7b7e2ad01 100644
--- a/fs/fuse/cuse.c
+++ b/fs/fuse/cuse.c
@@ -310,6 +310,10 @@ struct cuse_init_args {
/**
* cuse_process_init_reply - finish initializing CUSE channel
*
+ * @fm: The fuse mount information containing the CUSE connection.
+ * @args: The arguments passed to the init reply.
+ * @error: The error code signifying if any error occurred during the process.
+ *
* This function creates the character device and sets up all the
* required data structures for it. Please read the comment at the
* top of this file for high level overview.
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 4a6df591add61c..2b0d4781f39484 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1321,6 +1321,7 @@ retry:
err = fuse_do_statx(inode, file, stat);
if (err == -ENOSYS) {
fc->no_statx = 1;
+ err = 0;
goto retry;
}
} else {
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index a56e7bffd0004e..b57ce41576407b 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1362,7 +1362,7 @@ static void fuse_dio_lock(struct kiocb *iocb, struct iov_iter *from,
bool *exclusive)
{
struct inode *inode = file_inode(iocb->ki_filp);
- struct fuse_file *ff = iocb->ki_filp->private_data;
+ struct fuse_inode *fi = get_fuse_inode(inode);
*exclusive = fuse_dio_wr_exclusive_lock(iocb, from);
if (*exclusive) {
@@ -1377,7 +1377,7 @@ static void fuse_dio_lock(struct kiocb *iocb, struct iov_iter *from,
* have raced, so check it again.
*/
if (fuse_io_past_eof(iocb, from) ||
- fuse_file_uncached_io_start(inode, ff, NULL) != 0) {
+ fuse_inode_uncached_io_start(fi, NULL) != 0) {
inode_unlock_shared(inode);
inode_lock(inode);
*exclusive = true;
@@ -1388,13 +1388,13 @@ static void fuse_dio_lock(struct kiocb *iocb, struct iov_iter *from,
static void fuse_dio_unlock(struct kiocb *iocb, bool exclusive)
{
struct inode *inode = file_inode(iocb->ki_filp);
- struct fuse_file *ff = iocb->ki_filp->private_data;
+ struct fuse_inode *fi = get_fuse_inode(inode);
if (exclusive) {
inode_unlock(inode);
} else {
/* Allow opens in caching mode after last parallel dio end */
- fuse_file_uncached_io_end(inode, ff);
+ fuse_inode_uncached_io_end(fi);
inode_unlock_shared(inode);
}
}
@@ -2574,8 +2574,10 @@ static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma)
* First mmap of direct_io file enters caching inode io mode.
* Also waits for parallel dio writers to go into serial mode
* (exclusive instead of shared lock).
+ * After first mmap, the inode stays in caching io mode until
+ * the direct_io file release.
*/
- rc = fuse_file_cached_io_start(inode, ff);
+ rc = fuse_file_cached_io_open(inode, ff);
if (rc)
return rc;
}
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index b24084b60864ee..f2391961031374 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -1394,9 +1394,10 @@ int fuse_fileattr_set(struct mnt_idmap *idmap,
struct dentry *dentry, struct fileattr *fa);
/* iomode.c */
-int fuse_file_cached_io_start(struct inode *inode, struct fuse_file *ff);
-int fuse_file_uncached_io_start(struct inode *inode, struct fuse_file *ff, struct fuse_backing *fb);
-void fuse_file_uncached_io_end(struct inode *inode, struct fuse_file *ff);
+int fuse_file_cached_io_open(struct inode *inode, struct fuse_file *ff);
+int fuse_inode_uncached_io_start(struct fuse_inode *fi,
+ struct fuse_backing *fb);
+void fuse_inode_uncached_io_end(struct fuse_inode *fi);
int fuse_file_io_open(struct file *file, struct inode *inode);
void fuse_file_io_release(struct fuse_file *ff, struct inode *inode);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 3a5d888783353c..99e44ea7d8756d 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -175,6 +175,7 @@ static void fuse_evict_inode(struct inode *inode)
}
}
if (S_ISREG(inode->i_mode) && !fuse_is_bad(inode)) {
+ WARN_ON(fi->iocachectr != 0);
WARN_ON(!list_empty(&fi->write_files));
WARN_ON(!list_empty(&fi->queued_writes));
}
diff --git a/fs/fuse/iomode.c b/fs/fuse/iomode.c
index c653ddcf057872..c99e285f3183ef 100644
--- a/fs/fuse/iomode.c
+++ b/fs/fuse/iomode.c
@@ -21,12 +21,13 @@ static inline bool fuse_is_io_cache_wait(struct fuse_inode *fi)
}
/*
- * Start cached io mode.
+ * Called on cached file open() and on first mmap() of direct_io file.
+ * Takes cached_io inode mode reference to be dropped on file release.
*
* Blocks new parallel dio writes and waits for the in-progress parallel dio
* writes to complete.
*/
-int fuse_file_cached_io_start(struct inode *inode, struct fuse_file *ff)
+int fuse_file_cached_io_open(struct inode *inode, struct fuse_file *ff)
{
struct fuse_inode *fi = get_fuse_inode(inode);
@@ -67,10 +68,9 @@ int fuse_file_cached_io_start(struct inode *inode, struct fuse_file *ff)
return 0;
}
-static void fuse_file_cached_io_end(struct inode *inode, struct fuse_file *ff)
+static void fuse_file_cached_io_release(struct fuse_file *ff,
+ struct fuse_inode *fi)
{
- struct fuse_inode *fi = get_fuse_inode(inode);
-
spin_lock(&fi->lock);
WARN_ON(fi->iocachectr <= 0);
WARN_ON(ff->iomode != IOM_CACHED);
@@ -82,16 +82,15 @@ static void fuse_file_cached_io_end(struct inode *inode, struct fuse_file *ff)
}
/* Start strictly uncached io mode where cache access is not allowed */
-int fuse_file_uncached_io_start(struct inode *inode, struct fuse_file *ff, struct fuse_backing *fb)
+int fuse_inode_uncached_io_start(struct fuse_inode *fi, struct fuse_backing *fb)
{
- struct fuse_inode *fi = get_fuse_inode(inode);
struct fuse_backing *oldfb;
int err = 0;
spin_lock(&fi->lock);
/* deny conflicting backing files on same fuse inode */
oldfb = fuse_inode_backing(fi);
- if (oldfb && oldfb != fb) {
+ if (fb && oldfb && oldfb != fb) {
err = -EBUSY;
goto unlock;
}
@@ -99,12 +98,10 @@ int fuse_file_uncached_io_start(struct inode *inode, struct fuse_file *ff, struc
err = -ETXTBSY;
goto unlock;
}
- WARN_ON(ff->iomode != IOM_NONE);
fi->iocachectr--;
- ff->iomode = IOM_UNCACHED;
/* fuse inode holds a single refcount of backing file */
- if (!oldfb) {
+ if (fb && !oldfb) {
oldfb = fuse_inode_backing_set(fi, fb);
WARN_ON_ONCE(oldfb != NULL);
} else {
@@ -115,15 +112,29 @@ unlock:
return err;
}
-void fuse_file_uncached_io_end(struct inode *inode, struct fuse_file *ff)
+/* Takes uncached_io inode mode reference to be dropped on file release */
+static int fuse_file_uncached_io_open(struct inode *inode,
+ struct fuse_file *ff,
+ struct fuse_backing *fb)
{
struct fuse_inode *fi = get_fuse_inode(inode);
+ int err;
+
+ err = fuse_inode_uncached_io_start(fi, fb);
+ if (err)
+ return err;
+
+ WARN_ON(ff->iomode != IOM_NONE);
+ ff->iomode = IOM_UNCACHED;
+ return 0;
+}
+
+void fuse_inode_uncached_io_end(struct fuse_inode *fi)
+{
struct fuse_backing *oldfb = NULL;
spin_lock(&fi->lock);
WARN_ON(fi->iocachectr >= 0);
- WARN_ON(ff->iomode != IOM_UNCACHED);
- ff->iomode = IOM_NONE;
fi->iocachectr++;
if (!fi->iocachectr) {
wake_up(&fi->direct_io_waitq);
@@ -134,6 +145,15 @@ void fuse_file_uncached_io_end(struct inode *inode, struct fuse_file *ff)
fuse_backing_put(oldfb);
}
+/* Drop uncached_io reference from passthrough open */
+static void fuse_file_uncached_io_release(struct fuse_file *ff,
+ struct fuse_inode *fi)
+{
+ WARN_ON(ff->iomode != IOM_UNCACHED);
+ ff->iomode = IOM_NONE;
+ fuse_inode_uncached_io_end(fi);
+}
+
/*
* Open flags that are allowed in combination with FOPEN_PASSTHROUGH.
* A combination of FOPEN_PASSTHROUGH and FOPEN_DIRECT_IO means that read/write
@@ -163,7 +183,7 @@ static int fuse_file_passthrough_open(struct inode *inode, struct file *file)
return PTR_ERR(fb);
/* First passthrough file open denies caching inode io mode */
- err = fuse_file_uncached_io_start(inode, ff, fb);
+ err = fuse_file_uncached_io_open(inode, ff, fb);
if (!err)
return 0;
@@ -216,7 +236,7 @@ int fuse_file_io_open(struct file *file, struct inode *inode)
if (ff->open_flags & FOPEN_PASSTHROUGH)
err = fuse_file_passthrough_open(inode, file);
else
- err = fuse_file_cached_io_start(inode, ff);
+ err = fuse_file_cached_io_open(inode, ff);
if (err)
goto fail;
@@ -236,8 +256,10 @@ fail:
/* No more pending io and no new io possible to inode via open/mmapped file */
void fuse_file_io_release(struct fuse_file *ff, struct inode *inode)
{
+ struct fuse_inode *fi = get_fuse_inode(inode);
+
/*
- * Last parallel dio close allows caching inode io mode.
+ * Last passthrough file close allows caching inode io mode.
* Last caching file close exits caching inode io mode.
*/
switch (ff->iomode) {
@@ -245,10 +267,10 @@ void fuse_file_io_release(struct fuse_file *ff, struct inode *inode)
/* Nothing to do */
break;
case IOM_UNCACHED:
- fuse_file_uncached_io_end(inode, ff);
+ fuse_file_uncached_io_release(ff, fi);
break;
case IOM_CACHED:
- fuse_file_cached_io_end(inode, ff);
+ fuse_file_cached_io_release(ff, fi);
break;
}
}
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index 73389c68e25170..9609349e92e5e1 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -1141,7 +1141,7 @@ journal_found:
lbmLogShutdown(log);
close: /* close external log device */
- fput(bdev_file);
+ bdev_fput(bdev_file);
free: /* free log descriptor */
mutex_unlock(&jfs_log_mutex);
@@ -1485,7 +1485,7 @@ int lmLogClose(struct super_block *sb)
bdev_file = log->bdev_file;
rc = lmLogShutdown(log);
- fput(bdev_file);
+ bdev_fput(bdev_file);
kfree(log);
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index e9df2f87072c68..8502ef68459b98 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -636,11 +636,18 @@ static int kernfs_fop_open(struct inode *inode, struct file *file)
* each file a separate locking class. Let's differentiate on
* whether the file has mmap or not for now.
*
- * Both paths of the branch look the same. They're supposed to
+ * For similar reasons, writable and readonly files are given different
+ * lockdep key, because the writable file /sys/power/resume may call vfs
+ * lookup helpers for arbitrary paths and readonly files can be read by
+ * overlayfs from vfs helpers when sysfs is a lower layer of overalyfs.
+ *
+ * All three cases look the same. They're supposed to
* look that way and give @of->mutex different static lockdep keys.
*/
if (has_mmap)
mutex_init(&of->mutex);
+ else if (file->f_mode & FMODE_WRITE)
+ mutex_init(&of->mutex);
else
mutex_init(&of->mutex);
diff --git a/fs/namei.c b/fs/namei.c
index ceb9ddf8dfdd4e..c5b2a25be7d048 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -4050,6 +4050,8 @@ retry:
case 0: case S_IFREG:
error = vfs_create(idmap, path.dentry->d_inode,
dentry, mode, true);
+ if (!error)
+ security_path_post_mknod(idmap, dentry);
break;
case S_IFCHR: case S_IFBLK:
error = vfs_mknod(idmap, path.dentry->d_inode,
@@ -4060,11 +4062,6 @@ retry:
dentry, mode, 0);
break;
}
-
- if (error)
- goto out2;
-
- security_path_post_mknod(idmap, dentry);
out2:
done_path_create(&path, dentry);
if (retry_estale(error, lookup_flags)) {
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 1a93c7fcf76c55..84d4093ca71317 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -3042,12 +3042,9 @@ static void
nfsd4_cb_recall_any_release(struct nfsd4_callback *cb)
{
struct nfs4_client *clp = cb->cb_clp;
- struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
- spin_lock(&nn->client_lock);
clear_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags);
- put_client_renew_locked(clp);
- spin_unlock(&nn->client_lock);
+ drop_client(clp);
}
static int
@@ -3831,15 +3828,20 @@ nfsd4_create_session(struct svc_rqst *rqstp,
else
cs_slot = &unconf->cl_cs_slot;
status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0);
- if (status) {
- if (status == nfserr_replay_cache) {
- status = nfsd4_replay_create_session(cr_ses, cs_slot);
- goto out_free_conn;
- }
+ switch (status) {
+ case nfs_ok:
+ cs_slot->sl_seqid++;
+ cr_ses->seqid = cs_slot->sl_seqid;
+ break;
+ case nfserr_replay_cache:
+ status = nfsd4_replay_create_session(cr_ses, cs_slot);
+ fallthrough;
+ case nfserr_jukebox:
+ /* The server MUST NOT cache NFS4ERR_DELAY */
+ goto out_free_conn;
+ default:
goto out_cache_error;
}
- cs_slot->sl_seqid++;
- cr_ses->seqid = cs_slot->sl_seqid;
/* RFC 8881 Section 18.36.4 Phase 3: Client ID confirmation. */
if (conf) {
@@ -3859,10 +3861,8 @@ nfsd4_create_session(struct svc_rqst *rqstp,
old = find_confirmed_client_by_name(&unconf->cl_name, nn);
if (old) {
status = mark_client_expired_locked(old);
- if (status) {
- old = NULL;
- goto out_cache_error;
- }
+ if (status)
+ goto out_expired_error;
trace_nfsd_clid_replaced(&old->cl_clientid);
}
move_to_confirmed(unconf);
@@ -3894,6 +3894,17 @@ nfsd4_create_session(struct svc_rqst *rqstp,
expire_client(old);
return status;
+out_expired_error:
+ old = NULL;
+ /*
+ * Revert the slot seq_nr change so the server will process
+ * the client's resend instead of returning a cached response.
+ */
+ if (status == nfserr_jukebox) {
+ cs_slot->sl_seqid--;
+ cr_ses->seqid = cs_slot->sl_seqid;
+ goto out_free_conn;
+ }
out_cache_error:
nfsd4_cache_create_session(cr_ses, cs_slot, status);
out_free_conn:
@@ -6602,7 +6613,7 @@ deleg_reaper(struct nfsd_net *nn)
list_add(&clp->cl_ra_cblist, &cblist);
/* release in nfsd4_cb_recall_any_release */
- atomic_inc(&clp->cl_rpc_users);
+ kref_get(&clp->cl_nfsdfs.cl_ref);
set_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags);
clp->cl_ra_time = ktime_get_boottime_seconds();
}
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index fac938f563ad02..1955481832e037 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -3490,11 +3490,13 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
struct dentry *dentry, const u32 *bmval,
int ignore_crossmnt)
{
+ DECLARE_BITMAP(attr_bitmap, ARRAY_SIZE(nfsd4_enc_fattr4_encode_ops));
struct nfsd4_fattr_args args;
struct svc_fh *tempfh = NULL;
int starting_len = xdr->buf->len;
__be32 *attrlen_p, status;
int attrlen_offset;
+ u32 attrmask[3];
int err;
struct nfsd4_compoundres *resp = rqstp->rq_resp;
u32 minorversion = resp->cstate.minorversion;
@@ -3502,10 +3504,6 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
.mnt = exp->ex_path.mnt,
.dentry = dentry,
};
- union {
- u32 attrmask[3];
- unsigned long mask[2];
- } u;
unsigned long bit;
bool file_modified = false;
u64 size = 0;
@@ -3521,20 +3519,19 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
/*
* Make a local copy of the attribute bitmap that can be modified.
*/
- memset(&u, 0, sizeof(u));
- u.attrmask[0] = bmval[0];
- u.attrmask[1] = bmval[1];
- u.attrmask[2] = bmval[2];
+ attrmask[0] = bmval[0];
+ attrmask[1] = bmval[1];
+ attrmask[2] = bmval[2];
args.rdattr_err = 0;
if (exp->ex_fslocs.migrated) {
- status = fattr_handle_absent_fs(&u.attrmask[0], &u.attrmask[1],
- &u.attrmask[2], &args.rdattr_err);
+ status = fattr_handle_absent_fs(&attrmask[0], &attrmask[1],
+ &attrmask[2], &args.rdattr_err);
if (status)
goto out;
}
args.size = 0;
- if (u.attrmask[0] & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) {
+ if (attrmask[0] & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) {
status = nfsd4_deleg_getattr_conflict(rqstp, d_inode(dentry),
&file_modified, &size);
if (status)
@@ -3553,16 +3550,16 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
if (!(args.stat.result_mask & STATX_BTIME))
/* underlying FS does not offer btime so we can't share it */
- u.attrmask[1] &= ~FATTR4_WORD1_TIME_CREATE;
- if ((u.attrmask[0] & (FATTR4_WORD0_FILES_AVAIL | FATTR4_WORD0_FILES_FREE |
+ attrmask[1] &= ~FATTR4_WORD1_TIME_CREATE;
+ if ((attrmask[0] & (FATTR4_WORD0_FILES_AVAIL | FATTR4_WORD0_FILES_FREE |
FATTR4_WORD0_FILES_TOTAL | FATTR4_WORD0_MAXNAME)) ||
- (u.attrmask[1] & (FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE |
+ (attrmask[1] & (FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE |
FATTR4_WORD1_SPACE_TOTAL))) {
err = vfs_statfs(&path, &args.statfs);
if (err)
goto out_nfserr;
}
- if ((u.attrmask[0] & (FATTR4_WORD0_FILEHANDLE | FATTR4_WORD0_FSID)) &&
+ if ((attrmask[0] & (FATTR4_WORD0_FILEHANDLE | FATTR4_WORD0_FSID)) &&
!fhp) {
tempfh = kmalloc(sizeof(struct svc_fh), GFP_KERNEL);
status = nfserr_jukebox;
@@ -3577,10 +3574,10 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
args.fhp = fhp;
args.acl = NULL;
- if (u.attrmask[0] & FATTR4_WORD0_ACL) {
+ if (attrmask[0] & FATTR4_WORD0_ACL) {
err = nfsd4_get_nfs4_acl(rqstp, dentry, &args.acl);
if (err == -EOPNOTSUPP)
- u.attrmask[0] &= ~FATTR4_WORD0_ACL;
+ attrmask[0] &= ~FATTR4_WORD0_ACL;
else if (err == -EINVAL) {
status = nfserr_attrnotsupp;
goto out;
@@ -3592,17 +3589,17 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
args.context = NULL;
- if ((u.attrmask[2] & FATTR4_WORD2_SECURITY_LABEL) ||
- u.attrmask[0] & FATTR4_WORD0_SUPPORTED_ATTRS) {
+ if ((attrmask[2] & FATTR4_WORD2_SECURITY_LABEL) ||
+ attrmask[0] & FATTR4_WORD0_SUPPORTED_ATTRS) {
if (exp->ex_flags & NFSEXP_SECURITY_LABEL)
err = security_inode_getsecctx(d_inode(dentry),
&args.context, &args.contextlen);
else
err = -EOPNOTSUPP;
args.contextsupport = (err == 0);
- if (u.attrmask[2] & FATTR4_WORD2_SECURITY_LABEL) {
+ if (attrmask[2] & FATTR4_WORD2_SECURITY_LABEL) {
if (err == -EOPNOTSUPP)
- u.attrmask[2] &= ~FATTR4_WORD2_SECURITY_LABEL;
+ attrmask[2] &= ~FATTR4_WORD2_SECURITY_LABEL;
else if (err)
goto out_nfserr;
}
@@ -3610,8 +3607,8 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
#endif /* CONFIG_NFSD_V4_SECURITY_LABEL */
/* attrmask */
- status = nfsd4_encode_bitmap4(xdr, u.attrmask[0],
- u.attrmask[1], u.attrmask[2]);
+ status = nfsd4_encode_bitmap4(xdr, attrmask[0], attrmask[1],
+ attrmask[2]);
if (status)
goto out;
@@ -3620,7 +3617,9 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
attrlen_p = xdr_reserve_space(xdr, XDR_UNIT);
if (!attrlen_p)
goto out_resource;
- for_each_set_bit(bit, (const unsigned long *)&u.mask,
+ bitmap_from_arr32(attr_bitmap, attrmask,
+ ARRAY_SIZE(nfsd4_enc_fattr4_encode_ops));
+ for_each_set_bit(bit, attr_bitmap,
ARRAY_SIZE(nfsd4_enc_fattr4_encode_ops)) {
status = nfsd4_enc_fattr4_encode_ops[bit](xdr, &args);
if (status != nfs_ok)
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 6a9464262fae6b..2e41eb4c3cec76 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1852,7 +1852,7 @@ retry:
trap = lock_rename(tdentry, fdentry);
if (IS_ERR(trap)) {
err = (rqstp->rq_vers == 2) ? nfserr_acces : nfserr_xdev;
- goto out;
+ goto out_want_write;
}
err = fh_fill_pre_attrs(ffhp);
if (err != nfs_ok)
@@ -1922,6 +1922,7 @@ retry:
}
out_unlock:
unlock_rename(tdentry, fdentry);
+out_want_write:
fh_drop_write(ffhp);
/*
diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c
index bc846b904b68d4..aee40db7a036fb 100644
--- a/fs/nilfs2/dir.c
+++ b/fs/nilfs2/dir.c
@@ -240,7 +240,7 @@ nilfs_filetype_table[NILFS_FT_MAX] = {
#define S_SHIFT 12
static unsigned char
-nilfs_type_by_mode[S_IFMT >> S_SHIFT] = {
+nilfs_type_by_mode[(S_IFMT >> S_SHIFT) + 1] = {
[S_IFREG >> S_SHIFT] = NILFS_FT_REG_FILE,
[S_IFDIR >> S_SHIFT] = NILFS_FT_DIR,
[S_IFCHR >> S_SHIFT] = NILFS_FT_CHRDEV,
diff --git a/fs/proc/Makefile b/fs/proc/Makefile
index bd08616ed8bad7..7b4db9c56e6a77 100644
--- a/fs/proc/Makefile
+++ b/fs/proc/Makefile
@@ -5,7 +5,7 @@
obj-y += proc.o
-CFLAGS_task_mmu.o += $(call cc-option,-Wno-override-init,)
+CFLAGS_task_mmu.o += -Wno-override-init
proc-y := nommu.o task_nommu.o
proc-$(CONFIG_MMU) := task_mmu.o
diff --git a/fs/proc/bootconfig.c b/fs/proc/bootconfig.c
index 902b326e1e5607..87dcaae32ff87b 100644
--- a/fs/proc/bootconfig.c
+++ b/fs/proc/bootconfig.c
@@ -62,12 +62,12 @@ static int __init copy_xbc_key_value_list(char *dst, size_t size)
break;
dst += ret;
}
- if (ret >= 0 && boot_command_line[0]) {
- ret = snprintf(dst, rest(dst, end), "# Parameters from bootloader:\n# %s\n",
- boot_command_line);
- if (ret > 0)
- dst += ret;
- }
+ }
+ if (cmdline_has_extra_options() && ret >= 0 && boot_command_line[0]) {
+ ret = snprintf(dst, rest(dst, end), "# Parameters from bootloader:\n# %s\n",
+ boot_command_line);
+ if (ret > 0)
+ dst += ret;
}
out:
kfree(key);
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 6474529c425306..e539ccd39e1ee7 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -2589,7 +2589,7 @@ static void journal_list_init(struct super_block *sb)
static void release_journal_dev(struct reiserfs_journal *journal)
{
if (journal->j_bdev_file) {
- fput(journal->j_bdev_file);
+ bdev_fput(journal->j_bdev_file);
journal->j_bdev_file = NULL;
}
}
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index 2be227532f3997..2cbb924620747f 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -594,7 +594,7 @@ static void romfs_kill_sb(struct super_block *sb)
#ifdef CONFIG_ROMFS_ON_BLOCK
if (sb->s_bdev) {
sync_blockdev(sb->s_bdev);
- fput(sb->s_bdev_file);
+ bdev_fput(sb->s_bdev_file);
}
#endif
}
diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c
index a0017724d52393..0ff2491c311d8a 100644
--- a/fs/smb/client/cached_dir.c
+++ b/fs/smb/client/cached_dir.c
@@ -417,6 +417,7 @@ smb2_close_cached_fid(struct kref *ref)
{
struct cached_fid *cfid = container_of(ref, struct cached_fid,
refcount);
+ int rc;
spin_lock(&cfid->cfids->cfid_list_lock);
if (cfid->on_list) {
@@ -430,9 +431,10 @@ smb2_close_cached_fid(struct kref *ref)
cfid->dentry = NULL;
if (cfid->is_open) {
- SMB2_close(0, cfid->tcon, cfid->fid.persistent_fid,
+ rc = SMB2_close(0, cfid->tcon, cfid->fid.persistent_fid,
cfid->fid.volatile_fid);
- atomic_dec(&cfid->tcon->num_remote_opens);
+ if (rc) /* should we retry on -EBUSY or -EAGAIN? */
+ cifs_dbg(VFS, "close cached dir rc %d\n", rc);
}
free_cached_dir(cfid);
diff --git a/fs/smb/client/cifs_debug.c b/fs/smb/client/cifs_debug.c
index 226d4835c92db8..c71ae5c043060e 100644
--- a/fs/smb/client/cifs_debug.c
+++ b/fs/smb/client/cifs_debug.c
@@ -250,6 +250,8 @@ static int cifs_debug_files_proc_show(struct seq_file *m, void *v)
spin_lock(&cifs_tcp_ses_lock);
list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) {
list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
+ if (cifs_ses_exiting(ses))
+ continue;
list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
spin_lock(&tcon->open_file_lock);
list_for_each_entry(cfile, &tcon->openFileList, tlist) {
@@ -676,6 +678,8 @@ static ssize_t cifs_stats_proc_write(struct file *file,
}
#endif /* CONFIG_CIFS_STATS2 */
list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
+ if (cifs_ses_exiting(ses))
+ continue;
list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
atomic_set(&tcon->num_smbs_sent, 0);
spin_lock(&tcon->stat_lock);
@@ -755,6 +759,8 @@ static int cifs_stats_proc_show(struct seq_file *m, void *v)
}
#endif /* STATS2 */
list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
+ if (cifs_ses_exiting(ses))
+ continue;
list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
i++;
seq_printf(m, "\n%d) %s", i, tcon->tree_name);
diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c
index aa6f1ecb7c0e8f..d41eedbff674ab 100644
--- a/fs/smb/client/cifsfs.c
+++ b/fs/smb/client/cifsfs.c
@@ -156,6 +156,7 @@ struct workqueue_struct *decrypt_wq;
struct workqueue_struct *fileinfo_put_wq;
struct workqueue_struct *cifsoplockd_wq;
struct workqueue_struct *deferredclose_wq;
+struct workqueue_struct *serverclose_wq;
__u32 cifs_lock_secret;
/*
@@ -1888,6 +1889,13 @@ init_cifs(void)
goto out_destroy_cifsoplockd_wq;
}
+ serverclose_wq = alloc_workqueue("serverclose",
+ WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
+ if (!serverclose_wq) {
+ rc = -ENOMEM;
+ goto out_destroy_serverclose_wq;
+ }
+
rc = cifs_init_inodecache();
if (rc)
goto out_destroy_deferredclose_wq;
@@ -1962,6 +1970,8 @@ out_destroy_decrypt_wq:
destroy_workqueue(decrypt_wq);
out_destroy_cifsiod_wq:
destroy_workqueue(cifsiod_wq);
+out_destroy_serverclose_wq:
+ destroy_workqueue(serverclose_wq);
out_clean_proc:
cifs_proc_clean();
return rc;
@@ -1991,6 +2001,7 @@ exit_cifs(void)
destroy_workqueue(cifsoplockd_wq);
destroy_workqueue(decrypt_wq);
destroy_workqueue(fileinfo_put_wq);
+ destroy_workqueue(serverclose_wq);
destroy_workqueue(cifsiod_wq);
cifs_proc_clean();
}
diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h
index 7ed9d05f6890b4..d6669ce4ae87f0 100644
--- a/fs/smb/client/cifsglob.h
+++ b/fs/smb/client/cifsglob.h
@@ -442,10 +442,10 @@ struct smb_version_operations {
/* set fid protocol-specific info */
void (*set_fid)(struct cifsFileInfo *, struct cifs_fid *, __u32);
/* close a file */
- void (*close)(const unsigned int, struct cifs_tcon *,
+ int (*close)(const unsigned int, struct cifs_tcon *,
struct cifs_fid *);
/* close a file, returning file attributes and timestamps */
- void (*close_getattr)(const unsigned int xid, struct cifs_tcon *tcon,
+ int (*close_getattr)(const unsigned int xid, struct cifs_tcon *tcon,
struct cifsFileInfo *pfile_info);
/* send a flush request to the server */
int (*flush)(const unsigned int, struct cifs_tcon *, struct cifs_fid *);
@@ -1077,6 +1077,7 @@ struct cifs_ses {
and after mount option parsing we fill it */
char *domainName;
char *password;
+ char *password2; /* When key rotation used, new password may be set before it expires */
char workstation_name[CIFS_MAX_WORKSTATION_LEN];
struct session_key auth_key;
struct ntlmssp_auth *ntlmssp; /* ciphertext, flags, server challenge */
@@ -1281,7 +1282,6 @@ struct cifs_tcon {
struct cached_fids *cfids;
/* BB add field for back pointer to sb struct(s)? */
#ifdef CONFIG_CIFS_DFS_UPCALL
- struct list_head dfs_ses_list;
struct delayed_work dfs_cache_work;
#endif
struct delayed_work query_interfaces; /* query interfaces workqueue job */
@@ -1440,6 +1440,7 @@ struct cifsFileInfo {
bool swapfile:1;
bool oplock_break_cancelled:1;
bool status_file_deleted:1; /* file has been deleted */
+ bool offload:1; /* offload final part of _put to a wq */
unsigned int oplock_epoch; /* epoch from the lease break */
__u32 oplock_level; /* oplock/lease level from the lease break */
int count;
@@ -1448,6 +1449,7 @@ struct cifsFileInfo {
struct cifs_search_info srch_inf;
struct work_struct oplock_break; /* work for oplock breaks */
struct work_struct put; /* work for the final part of _put */
+ struct work_struct serverclose; /* work for serverclose */
struct delayed_work deferred;
bool deferred_close_scheduled; /* Flag to indicate close is scheduled */
char *symlink_target;
@@ -1804,7 +1806,6 @@ struct cifs_mount_ctx {
struct TCP_Server_Info *server;
struct cifs_ses *ses;
struct cifs_tcon *tcon;
- struct list_head dfs_ses_list;
};
static inline void __free_dfs_info_param(struct dfs_info3_param *param)
@@ -2105,6 +2106,7 @@ extern struct workqueue_struct *decrypt_wq;
extern struct workqueue_struct *fileinfo_put_wq;
extern struct workqueue_struct *cifsoplockd_wq;
extern struct workqueue_struct *deferredclose_wq;
+extern struct workqueue_struct *serverclose_wq;
extern __u32 cifs_lock_secret;
extern mempool_t *cifs_sm_req_poolp;
@@ -2324,4 +2326,14 @@ struct smb2_compound_vars {
struct kvec ea_iov;
};
+static inline bool cifs_ses_exiting(struct cifs_ses *ses)
+{
+ bool ret;
+
+ spin_lock(&ses->ses_lock);
+ ret = ses->ses_status == SES_EXITING;
+ spin_unlock(&ses->ses_lock);
+ return ret;
+}
+
#endif /* _CIFS_GLOB_H */
diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h
index 0723e1b57256b8..8e0a348f1f660e 100644
--- a/fs/smb/client/cifsproto.h
+++ b/fs/smb/client/cifsproto.h
@@ -725,31 +725,31 @@ struct super_block *cifs_get_tcon_super(struct cifs_tcon *tcon);
void cifs_put_tcon_super(struct super_block *sb);
int cifs_wait_for_server_reconnect(struct TCP_Server_Info *server, bool retry);
-/* Put references of @ses and @ses->dfs_root_ses */
+/* Put references of @ses and its children */
static inline void cifs_put_smb_ses(struct cifs_ses *ses)
{
- struct cifs_ses *rses = ses->dfs_root_ses;
+ struct cifs_ses *next;
- __cifs_put_smb_ses(ses);
- if (rses)
- __cifs_put_smb_ses(rses);
+ do {
+ next = ses->dfs_root_ses;
+ __cifs_put_smb_ses(ses);
+ } while ((ses = next));
}
-/* Get an active reference of @ses and @ses->dfs_root_ses.
+/* Get an active reference of @ses and its children.
*
* NOTE: make sure to call this function when incrementing reference count of
* @ses to ensure that any DFS root session attached to it (@ses->dfs_root_ses)
* will also get its reference count incremented.
*
- * cifs_put_smb_ses() will put both references, so call it when you're done.
+ * cifs_put_smb_ses() will put all references, so call it when you're done.
*/
static inline void cifs_smb_ses_inc_refcount(struct cifs_ses *ses)
{
lockdep_assert_held(&cifs_tcp_ses_lock);
- ses->ses_count++;
- if (ses->dfs_root_ses)
- ses->dfs_root_ses->ses_count++;
+ for (; ses; ses = ses->dfs_root_ses)
+ ses->ses_count++;
}
static inline bool dfs_src_pathname_equal(const char *s1, const char *s2)
diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c
index 5aee555515730d..23b5709ddc311c 100644
--- a/fs/smb/client/cifssmb.c
+++ b/fs/smb/client/cifssmb.c
@@ -5854,10 +5854,8 @@ SetEARetry:
parm_data->list.EA_flags = 0;
/* we checked above that name len is less than 255 */
parm_data->list.name_len = (__u8)name_len;
- /* EA names are always ASCII */
- if (ea_name)
- strncpy(parm_data->list.name, ea_name, name_len);
- parm_data->list.name[name_len] = '\0';
+ /* EA names are always ASCII and NUL-terminated */
+ strscpy(parm_data->list.name, ea_name ?: "", name_len + 1);
parm_data->list.value_len = cpu_to_le16(ea_value_len);
/* caller ensures that ea_value_len is less than 64K but
we need to ensure that it fits within the smb */
diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c
index 9b85b5341822e7..4e35970681bf05 100644
--- a/fs/smb/client/connect.c
+++ b/fs/smb/client/connect.c
@@ -175,6 +175,8 @@ cifs_signal_cifsd_for_reconnect(struct TCP_Server_Info *server,
spin_lock(&cifs_tcp_ses_lock);
list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
+ if (cifs_ses_exiting(ses))
+ continue;
spin_lock(&ses->chan_lock);
for (i = 0; i < ses->chan_count; i++) {
if (!ses->chans[i].server)
@@ -232,7 +234,13 @@ cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server,
spin_lock(&cifs_tcp_ses_lock);
list_for_each_entry_safe(ses, nses, &pserver->smb_ses_list, smb_ses_list) {
- /* check if iface is still active */
+ spin_lock(&ses->ses_lock);
+ if (ses->ses_status == SES_EXITING) {
+ spin_unlock(&ses->ses_lock);
+ continue;
+ }
+ spin_unlock(&ses->ses_lock);
+
spin_lock(&ses->chan_lock);
if (cifs_ses_get_chan_index(ses, server) ==
CIFS_INVAL_CHAN_INDEX) {
@@ -1860,6 +1868,9 @@ static int match_session(struct cifs_ses *ses, struct smb3_fs_context *ctx)
ctx->sectype != ses->sectype)
return 0;
+ if (ctx->dfs_root_ses != ses->dfs_root_ses)
+ return 0;
+
/*
* If an existing session is limited to less channels than
* requested, it should not be reused
@@ -1963,31 +1974,6 @@ out:
return rc;
}
-/**
- * cifs_free_ipc - helper to release the session IPC tcon
- * @ses: smb session to unmount the IPC from
- *
- * Needs to be called everytime a session is destroyed.
- *
- * On session close, the IPC is closed and the server must release all tcons of the session.
- * No need to send a tree disconnect here.
- *
- * Besides, it will make the server to not close durable and resilient files on session close, as
- * specified in MS-SMB2 3.3.5.6 Receiving an SMB2 LOGOFF Request.
- */
-static int
-cifs_free_ipc(struct cifs_ses *ses)
-{
- struct cifs_tcon *tcon = ses->tcon_ipc;
-
- if (tcon == NULL)
- return 0;
-
- tconInfoFree(tcon);
- ses->tcon_ipc = NULL;
- return 0;
-}
-
static struct cifs_ses *
cifs_find_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx)
{
@@ -2019,48 +2005,52 @@ cifs_find_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx)
void __cifs_put_smb_ses(struct cifs_ses *ses)
{
struct TCP_Server_Info *server = ses->server;
+ struct cifs_tcon *tcon;
unsigned int xid;
size_t i;
+ bool do_logoff;
int rc;
+ spin_lock(&cifs_tcp_ses_lock);
spin_lock(&ses->ses_lock);
- if (ses->ses_status == SES_EXITING) {
+ cifs_dbg(FYI, "%s: id=0x%llx ses_count=%d ses_status=%u ipc=%s\n",
+ __func__, ses->Suid, ses->ses_count, ses->ses_status,
+ ses->tcon_ipc ? ses->tcon_ipc->tree_name : "none");
+ if (ses->ses_status == SES_EXITING || --ses->ses_count > 0) {
spin_unlock(&ses->ses_lock);
+ spin_unlock(&cifs_tcp_ses_lock);
return;
}
- spin_unlock(&ses->ses_lock);
+ /* ses_count can never go negative */
+ WARN_ON(ses->ses_count < 0);
- cifs_dbg(FYI, "%s: ses_count=%d\n", __func__, ses->ses_count);
- cifs_dbg(FYI,
- "%s: ses ipc: %s\n", __func__, ses->tcon_ipc ? ses->tcon_ipc->tree_name : "NONE");
+ spin_lock(&ses->chan_lock);
+ cifs_chan_clear_need_reconnect(ses, server);
+ spin_unlock(&ses->chan_lock);
- spin_lock(&cifs_tcp_ses_lock);
- if (--ses->ses_count > 0) {
- spin_unlock(&cifs_tcp_ses_lock);
- return;
- }
- spin_lock(&ses->ses_lock);
- if (ses->ses_status == SES_GOOD)
- ses->ses_status = SES_EXITING;
+ do_logoff = ses->ses_status == SES_GOOD && server->ops->logoff;
+ ses->ses_status = SES_EXITING;
+ tcon = ses->tcon_ipc;
+ ses->tcon_ipc = NULL;
spin_unlock(&ses->ses_lock);
spin_unlock(&cifs_tcp_ses_lock);
- /* ses_count can never go negative */
- WARN_ON(ses->ses_count < 0);
-
- spin_lock(&ses->ses_lock);
- if (ses->ses_status == SES_EXITING && server->ops->logoff) {
- spin_unlock(&ses->ses_lock);
- cifs_free_ipc(ses);
+ /*
+ * On session close, the IPC is closed and the server must release all
+ * tcons of the session. No need to send a tree disconnect here.
+ *
+ * Besides, it will make the server to not close durable and resilient
+ * files on session close, as specified in MS-SMB2 3.3.5.6 Receiving an
+ * SMB2 LOGOFF Request.
+ */
+ tconInfoFree(tcon);
+ if (do_logoff) {
xid = get_xid();
rc = server->ops->logoff(xid, ses);
if (rc)
cifs_server_dbg(VFS, "%s: Session Logoff failure rc=%d\n",
__func__, rc);
_free_xid(xid);
- } else {
- spin_unlock(&ses->ses_lock);
- cifs_free_ipc(ses);
}
spin_lock(&cifs_tcp_ses_lock);
@@ -2193,6 +2183,7 @@ cifs_set_cifscreds(struct smb3_fs_context *ctx, struct cifs_ses *ses)
}
++delim;
+ /* BB consider adding support for password2 (Key Rotation) for multiuser in future */
ctx->password = kstrndup(delim, len, GFP_KERNEL);
if (!ctx->password) {
cifs_dbg(FYI, "Unable to allocate %zd bytes for password\n",
@@ -2216,6 +2207,7 @@ cifs_set_cifscreds(struct smb3_fs_context *ctx, struct cifs_ses *ses)
kfree(ctx->username);
ctx->username = NULL;
kfree_sensitive(ctx->password);
+ /* no need to free ctx->password2 since not allocated in this path */
ctx->password = NULL;
goto out_key_put;
}
@@ -2327,6 +2319,12 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx)
if (!ses->password)
goto get_ses_fail;
}
+ /* ctx->password freed at unmount */
+ if (ctx->password2) {
+ ses->password2 = kstrdup(ctx->password2, GFP_KERNEL);
+ if (!ses->password2)
+ goto get_ses_fail;
+ }
if (ctx->domainname) {
ses->domainName = kstrdup(ctx->domainname, GFP_KERNEL);
if (!ses->domainName)
@@ -2373,9 +2371,9 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx)
* need to lock before changing something in the session.
*/
spin_lock(&cifs_tcp_ses_lock);
+ if (ctx->dfs_root_ses)
+ cifs_smb_ses_inc_refcount(ctx->dfs_root_ses);
ses->dfs_root_ses = ctx->dfs_root_ses;
- if (ses->dfs_root_ses)
- ses->dfs_root_ses->ses_count++;
list_add(&ses->smb_ses_list, &server->smb_ses_list);
spin_unlock(&cifs_tcp_ses_lock);
@@ -3326,6 +3324,9 @@ void cifs_mount_put_conns(struct cifs_mount_ctx *mnt_ctx)
cifs_put_smb_ses(mnt_ctx->ses);
else if (mnt_ctx->server)
cifs_put_tcp_session(mnt_ctx->server, 0);
+ mnt_ctx->ses = NULL;
+ mnt_ctx->tcon = NULL;
+ mnt_ctx->server = NULL;
mnt_ctx->cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_POSIX_PATHS;
free_xid(mnt_ctx->xid);
}
@@ -3604,8 +3605,6 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx)
bool isdfs;
int rc;
- INIT_LIST_HEAD(&mnt_ctx.dfs_ses_list);
-
rc = dfs_mount_share(&mnt_ctx, &isdfs);
if (rc)
goto error;
@@ -3636,7 +3635,6 @@ out:
return rc;
error:
- dfs_put_root_smb_sessions(&mnt_ctx.dfs_ses_list);
cifs_mount_put_conns(&mnt_ctx);
return rc;
}
@@ -3651,6 +3649,18 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx)
goto error;
rc = cifs_mount_get_tcon(&mnt_ctx);
+ if (!rc) {
+ /*
+ * Prevent superblock from being created with any missing
+ * connections.
+ */
+ if (WARN_ON(!mnt_ctx.server))
+ rc = -EHOSTDOWN;
+ else if (WARN_ON(!mnt_ctx.ses))
+ rc = -EACCES;
+ else if (WARN_ON(!mnt_ctx.tcon))
+ rc = -ENOENT;
+ }
if (rc)
goto error;
@@ -3988,13 +3998,14 @@ cifs_set_vol_auth(struct smb3_fs_context *ctx, struct cifs_ses *ses)
}
static struct cifs_tcon *
-cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid)
+__cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid)
{
int rc;
struct cifs_tcon *master_tcon = cifs_sb_master_tcon(cifs_sb);
struct cifs_ses *ses;
struct cifs_tcon *tcon = NULL;
struct smb3_fs_context *ctx;
+ char *origin_fullpath = NULL;
ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
if (ctx == NULL)
@@ -4018,6 +4029,7 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid)
ctx->sign = master_tcon->ses->sign;
ctx->seal = master_tcon->seal;
ctx->witness = master_tcon->use_witness;
+ ctx->dfs_root_ses = master_tcon->ses->dfs_root_ses;
rc = cifs_set_vol_auth(ctx, master_tcon->ses);
if (rc) {
@@ -4037,12 +4049,39 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid)
goto out;
}
+#ifdef CONFIG_CIFS_DFS_UPCALL
+ spin_lock(&master_tcon->tc_lock);
+ if (master_tcon->origin_fullpath) {
+ spin_unlock(&master_tcon->tc_lock);
+ origin_fullpath = dfs_get_path(cifs_sb, cifs_sb->ctx->source);
+ if (IS_ERR(origin_fullpath)) {
+ tcon = ERR_CAST(origin_fullpath);
+ origin_fullpath = NULL;
+ cifs_put_smb_ses(ses);
+ goto out;
+ }
+ } else {
+ spin_unlock(&master_tcon->tc_lock);
+ }
+#endif
+
tcon = cifs_get_tcon(ses, ctx);
if (IS_ERR(tcon)) {
cifs_put_smb_ses(ses);
goto out;
}
+#ifdef CONFIG_CIFS_DFS_UPCALL
+ if (origin_fullpath) {
+ spin_lock(&tcon->tc_lock);
+ tcon->origin_fullpath = origin_fullpath;
+ spin_unlock(&tcon->tc_lock);
+ origin_fullpath = NULL;
+ queue_delayed_work(dfscache_wq, &tcon->dfs_cache_work,
+ dfs_cache_get_ttl() * HZ);
+ }
+#endif
+
#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
if (cap_unix(ses))
reset_cifs_unix_caps(0, tcon, NULL, ctx);
@@ -4051,11 +4090,23 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid)
out:
kfree(ctx->username);
kfree_sensitive(ctx->password);
+ kfree(origin_fullpath);
kfree(ctx);
return tcon;
}
+static struct cifs_tcon *
+cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid)
+{
+ struct cifs_tcon *ret;
+
+ cifs_mount_lock();
+ ret = __cifs_construct_tcon(cifs_sb, fsuid);
+ cifs_mount_unlock();
+ return ret;
+}
+
struct cifs_tcon *
cifs_sb_master_tcon(struct cifs_sb_info *cifs_sb)
{
diff --git a/fs/smb/client/dfs.c b/fs/smb/client/dfs.c
index 449c59830039bc..3ec965547e3d4d 100644
--- a/fs/smb/client/dfs.c
+++ b/fs/smb/client/dfs.c
@@ -66,33 +66,20 @@ static int get_session(struct cifs_mount_ctx *mnt_ctx, const char *full_path)
}
/*
- * Track individual DFS referral servers used by new DFS mount.
- *
- * On success, their lifetime will be shared by final tcon (dfs_ses_list).
- * Otherwise, they will be put by dfs_put_root_smb_sessions() in cifs_mount().
+ * Get an active reference of @ses so that next call to cifs_put_tcon() won't
+ * release it as any new DFS referrals must go through its IPC tcon.
*/
-static int add_root_smb_session(struct cifs_mount_ctx *mnt_ctx)
+static void add_root_smb_session(struct cifs_mount_ctx *mnt_ctx)
{
struct smb3_fs_context *ctx = mnt_ctx->fs_ctx;
- struct dfs_root_ses *root_ses;
struct cifs_ses *ses = mnt_ctx->ses;
if (ses) {
- root_ses = kmalloc(sizeof(*root_ses), GFP_KERNEL);
- if (!root_ses)
- return -ENOMEM;
-
- INIT_LIST_HEAD(&root_ses->list);
-
spin_lock(&cifs_tcp_ses_lock);
cifs_smb_ses_inc_refcount(ses);
spin_unlock(&cifs_tcp_ses_lock);
- root_ses->ses = ses;
- list_add_tail(&root_ses->list, &mnt_ctx->dfs_ses_list);
}
- /* Select new DFS referral server so that new referrals go through it */
ctx->dfs_root_ses = ses;
- return 0;
}
static inline int parse_dfs_target(struct smb3_fs_context *ctx,
@@ -185,11 +172,8 @@ again:
continue;
}
- if (is_refsrv) {
- rc = add_root_smb_session(mnt_ctx);
- if (rc)
- goto out;
- }
+ if (is_refsrv)
+ add_root_smb_session(mnt_ctx);
rc = ref_walk_advance(rw);
if (!rc) {
@@ -232,6 +216,7 @@ static int __dfs_mount_share(struct cifs_mount_ctx *mnt_ctx)
struct smb3_fs_context *ctx = mnt_ctx->fs_ctx;
struct cifs_tcon *tcon;
char *origin_fullpath;
+ bool new_tcon = true;
int rc;
origin_fullpath = dfs_get_path(cifs_sb, ctx->source);
@@ -239,6 +224,18 @@ static int __dfs_mount_share(struct cifs_mount_ctx *mnt_ctx)
return PTR_ERR(origin_fullpath);
rc = dfs_referral_walk(mnt_ctx);
+ if (!rc) {
+ /*
+ * Prevent superblock from being created with any missing
+ * connections.
+ */
+ if (WARN_ON(!mnt_ctx->server))
+ rc = -EHOSTDOWN;
+ else if (WARN_ON(!mnt_ctx->ses))
+ rc = -EACCES;
+ else if (WARN_ON(!mnt_ctx->tcon))
+ rc = -ENOENT;
+ }
if (rc)
goto out;
@@ -247,15 +244,14 @@ static int __dfs_mount_share(struct cifs_mount_ctx *mnt_ctx)
if (!tcon->origin_fullpath) {
tcon->origin_fullpath = origin_fullpath;
origin_fullpath = NULL;
+ } else {
+ new_tcon = false;
}
spin_unlock(&tcon->tc_lock);
- if (list_empty(&tcon->dfs_ses_list)) {
- list_replace_init(&mnt_ctx->dfs_ses_list, &tcon->dfs_ses_list);
+ if (new_tcon) {
queue_delayed_work(dfscache_wq, &tcon->dfs_cache_work,
dfs_cache_get_ttl() * HZ);
- } else {
- dfs_put_root_smb_sessions(&mnt_ctx->dfs_ses_list);
}
out:
@@ -298,7 +294,6 @@ int dfs_mount_share(struct cifs_mount_ctx *mnt_ctx, bool *isdfs)
if (rc)
return rc;
- ctx->dfs_root_ses = mnt_ctx->ses;
/*
* If called with 'nodfs' mount option, then skip DFS resolving. Otherwise unconditionally
* try to get an DFS referral (even cached) to determine whether it is an DFS mount.
@@ -324,7 +319,9 @@ int dfs_mount_share(struct cifs_mount_ctx *mnt_ctx, bool *isdfs)
*isdfs = true;
add_root_smb_session(mnt_ctx);
- return __dfs_mount_share(mnt_ctx);
+ rc = __dfs_mount_share(mnt_ctx);
+ dfs_put_root_smb_sessions(mnt_ctx);
+ return rc;
}
/* Update dfs referral path of superblock */
diff --git a/fs/smb/client/dfs.h b/fs/smb/client/dfs.h
index 875ab7ae57fcdf..e5c4dcf837503a 100644
--- a/fs/smb/client/dfs.h
+++ b/fs/smb/client/dfs.h
@@ -7,7 +7,9 @@
#define _CIFS_DFS_H
#include "cifsglob.h"
+#include "cifsproto.h"
#include "fs_context.h"
+#include "dfs_cache.h"
#include "cifs_unicode.h"
#include <linux/namei.h>
@@ -114,11 +116,6 @@ static inline void ref_walk_set_tgt_hint(struct dfs_ref_walk *rw)
ref_walk_tit(rw));
}
-struct dfs_root_ses {
- struct list_head list;
- struct cifs_ses *ses;
-};
-
int dfs_parse_target_referral(const char *full_path, const struct dfs_info3_param *ref,
struct smb3_fs_context *ctx);
int dfs_mount_share(struct cifs_mount_ctx *mnt_ctx, bool *isdfs);
@@ -133,20 +130,32 @@ static inline int dfs_get_referral(struct cifs_mount_ctx *mnt_ctx, const char *p
{
struct smb3_fs_context *ctx = mnt_ctx->fs_ctx;
struct cifs_sb_info *cifs_sb = mnt_ctx->cifs_sb;
+ struct cifs_ses *rses = ctx->dfs_root_ses ?: mnt_ctx->ses;
- return dfs_cache_find(mnt_ctx->xid, ctx->dfs_root_ses, cifs_sb->local_nls,
+ return dfs_cache_find(mnt_ctx->xid, rses, cifs_sb->local_nls,
cifs_remap(cifs_sb), path, ref, tl);
}
-static inline void dfs_put_root_smb_sessions(struct list_head *head)
+/*
+ * cifs_get_smb_ses() already guarantees an active reference of
+ * @ses->dfs_root_ses when a new session is created, so we need to put extra
+ * references of all DFS root sessions that were used across the mount process
+ * in dfs_mount_share().
+ */
+static inline void dfs_put_root_smb_sessions(struct cifs_mount_ctx *mnt_ctx)
{
- struct dfs_root_ses *root, *tmp;
+ const struct smb3_fs_context *ctx = mnt_ctx->fs_ctx;
+ struct cifs_ses *ses = ctx->dfs_root_ses;
+ struct cifs_ses *cur;
+
+ if (!ses)
+ return;
- list_for_each_entry_safe(root, tmp, head, list) {
- list_del_init(&root->list);
- cifs_put_smb_ses(root->ses);
- kfree(root);
+ for (cur = ses; cur; cur = cur->dfs_root_ses) {
+ if (cur->dfs_root_ses)
+ cifs_put_smb_ses(cur->dfs_root_ses);
}
+ cifs_put_smb_ses(ses);
}
#endif /* _CIFS_DFS_H */
diff --git a/fs/smb/client/dfs_cache.c b/fs/smb/client/dfs_cache.c
index 508d831fabe378..11c8efecf7aa12 100644
--- a/fs/smb/client/dfs_cache.c
+++ b/fs/smb/client/dfs_cache.c
@@ -1172,8 +1172,8 @@ static bool is_ses_good(struct cifs_ses *ses)
return ret;
}
-/* Refresh dfs referral of tcon and mark it for reconnect if needed */
-static int __refresh_tcon(const char *path, struct cifs_ses *ses, bool force_refresh)
+/* Refresh dfs referral of @ses and mark it for reconnect if needed */
+static void __refresh_ses_referral(struct cifs_ses *ses, bool force_refresh)
{
struct TCP_Server_Info *server = ses->server;
DFS_CACHE_TGT_LIST(old_tl);
@@ -1181,10 +1181,21 @@ static int __refresh_tcon(const char *path, struct cifs_ses *ses, bool force_ref
bool needs_refresh = false;
struct cache_entry *ce;
unsigned int xid;
+ char *path = NULL;
int rc = 0;
xid = get_xid();
+ mutex_lock(&server->refpath_lock);
+ if (server->leaf_fullpath) {
+ path = kstrdup(server->leaf_fullpath + 1, GFP_ATOMIC);
+ if (!path)
+ rc = -ENOMEM;
+ }
+ mutex_unlock(&server->refpath_lock);
+ if (!path)
+ goto out;
+
down_read(&htable_rw_lock);
ce = lookup_cache_entry(path);
needs_refresh = force_refresh || IS_ERR(ce) || cache_entry_expired(ce);
@@ -1218,19 +1229,17 @@ out:
free_xid(xid);
dfs_cache_free_tgts(&old_tl);
dfs_cache_free_tgts(&new_tl);
- return rc;
+ kfree(path);
}
-static int refresh_tcon(struct cifs_tcon *tcon, bool force_refresh)
+static inline void refresh_ses_referral(struct cifs_ses *ses)
{
- struct TCP_Server_Info *server = tcon->ses->server;
- struct cifs_ses *ses = tcon->ses;
+ __refresh_ses_referral(ses, false);
+}
- mutex_lock(&server->refpath_lock);
- if (server->leaf_fullpath)
- __refresh_tcon(server->leaf_fullpath + 1, ses, force_refresh);
- mutex_unlock(&server->refpath_lock);
- return 0;
+static inline void force_refresh_ses_referral(struct cifs_ses *ses)
+{
+ __refresh_ses_referral(ses, true);
}
/**
@@ -1271,34 +1280,20 @@ int dfs_cache_remount_fs(struct cifs_sb_info *cifs_sb)
*/
cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_USE_PREFIX_PATH;
- return refresh_tcon(tcon, true);
+ force_refresh_ses_referral(tcon->ses);
+ return 0;
}
/* Refresh all DFS referrals related to DFS tcon */
void dfs_cache_refresh(struct work_struct *work)
{
- struct TCP_Server_Info *server;
- struct dfs_root_ses *rses;
struct cifs_tcon *tcon;
struct cifs_ses *ses;
tcon = container_of(work, struct cifs_tcon, dfs_cache_work.work);
- ses = tcon->ses;
- server = ses->server;
- mutex_lock(&server->refpath_lock);
- if (server->leaf_fullpath)
- __refresh_tcon(server->leaf_fullpath + 1, ses, false);
- mutex_unlock(&server->refpath_lock);
-
- list_for_each_entry(rses, &tcon->dfs_ses_list, list) {
- ses = rses->ses;
- server = ses->server;
- mutex_lock(&server->refpath_lock);
- if (server->leaf_fullpath)
- __refresh_tcon(server->leaf_fullpath + 1, ses, false);
- mutex_unlock(&server->refpath_lock);
- }
+ for (ses = tcon->ses; ses; ses = ses->dfs_root_ses)
+ refresh_ses_referral(ses);
queue_delayed_work(dfscache_wq, &tcon->dfs_cache_work,
atomic_read(&dfs_cache_ttl) * HZ);
diff --git a/fs/smb/client/dir.c b/fs/smb/client/dir.c
index 89333d9bce36eb..864b194dbaa0a0 100644
--- a/fs/smb/client/dir.c
+++ b/fs/smb/client/dir.c
@@ -189,6 +189,7 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned
int disposition;
struct TCP_Server_Info *server = tcon->ses->server;
struct cifs_open_parms oparms;
+ int rdwr_for_fscache = 0;
*oplock = 0;
if (tcon->ses->server->oplocks)
@@ -200,6 +201,10 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned
return PTR_ERR(full_path);
}
+ /* If we're caching, we need to be able to fill in around partial writes. */
+ if (cifs_fscache_enabled(inode) && (oflags & O_ACCMODE) == O_WRONLY)
+ rdwr_for_fscache = 1;
+
#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
if (tcon->unix_ext && cap_unix(tcon->ses) && !tcon->broken_posix_open &&
(CIFS_UNIX_POSIX_PATH_OPS_CAP &
@@ -276,6 +281,8 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned
desired_access |= GENERIC_READ; /* is this too little? */
if (OPEN_FMODE(oflags) & FMODE_WRITE)
desired_access |= GENERIC_WRITE;
+ if (rdwr_for_fscache == 1)
+ desired_access |= GENERIC_READ;
disposition = FILE_OVERWRITE_IF;
if ((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
@@ -304,6 +311,7 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned
if (!tcon->unix_ext && (mode & S_IWUGO) == 0)
create_options |= CREATE_OPTION_READONLY;
+retry_open:
oparms = (struct cifs_open_parms) {
.tcon = tcon,
.cifs_sb = cifs_sb,
@@ -317,8 +325,15 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned
rc = server->ops->open(xid, &oparms, oplock, buf);
if (rc) {
cifs_dbg(FYI, "cifs_create returned 0x%x\n", rc);
+ if (rc == -EACCES && rdwr_for_fscache == 1) {
+ desired_access &= ~GENERIC_READ;
+ rdwr_for_fscache = 2;
+ goto retry_open;
+ }
goto out;
}
+ if (rdwr_for_fscache == 2)
+ cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE);
#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
/*
@@ -612,11 +627,18 @@ int cifs_mknod(struct mnt_idmap *idmap, struct inode *inode,
goto mknod_out;
}
+ trace_smb3_mknod_enter(xid, tcon->ses->Suid, tcon->tid, full_path);
+
rc = tcon->ses->server->ops->make_node(xid, inode, direntry, tcon,
full_path, mode,
device_number);
mknod_out:
+ if (rc)
+ trace_smb3_mknod_err(xid, tcon->ses->Suid, tcon->tid, rc);
+ else
+ trace_smb3_mknod_done(xid, tcon->ses->Suid, tcon->tid);
+
free_dentry_path(page);
free_xid(xid);
cifs_put_tlink(tlink);
diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c
index 16aadce492b2ec..9be37d0fe724e9 100644
--- a/fs/smb/client/file.c
+++ b/fs/smb/client/file.c
@@ -206,12 +206,12 @@ cifs_mark_open_files_invalid(struct cifs_tcon *tcon)
*/
}
-static inline int cifs_convert_flags(unsigned int flags)
+static inline int cifs_convert_flags(unsigned int flags, int rdwr_for_fscache)
{
if ((flags & O_ACCMODE) == O_RDONLY)
return GENERIC_READ;
else if ((flags & O_ACCMODE) == O_WRONLY)
- return GENERIC_WRITE;
+ return rdwr_for_fscache == 1 ? (GENERIC_READ | GENERIC_WRITE) : GENERIC_WRITE;
else if ((flags & O_ACCMODE) == O_RDWR) {
/* GENERIC_ALL is too much permission to request
can cause unnecessary access denied on create */
@@ -348,11 +348,16 @@ static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_
int create_options = CREATE_NOT_DIR;
struct TCP_Server_Info *server = tcon->ses->server;
struct cifs_open_parms oparms;
+ int rdwr_for_fscache = 0;
if (!server->ops->open)
return -ENOSYS;
- desired_access = cifs_convert_flags(f_flags);
+ /* If we're caching, we need to be able to fill in around partial writes. */
+ if (cifs_fscache_enabled(inode) && (f_flags & O_ACCMODE) == O_WRONLY)
+ rdwr_for_fscache = 1;
+
+ desired_access = cifs_convert_flags(f_flags, rdwr_for_fscache);
/*********************************************************************
* open flag mapping table:
@@ -389,6 +394,7 @@ static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_
if (f_flags & O_DIRECT)
create_options |= CREATE_NO_BUFFER;
+retry_open:
oparms = (struct cifs_open_parms) {
.tcon = tcon,
.cifs_sb = cifs_sb,
@@ -400,8 +406,16 @@ static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_
};
rc = server->ops->open(xid, &oparms, oplock, buf);
- if (rc)
+ if (rc) {
+ if (rc == -EACCES && rdwr_for_fscache == 1) {
+ desired_access = cifs_convert_flags(f_flags, 0);
+ rdwr_for_fscache = 2;
+ goto retry_open;
+ }
return rc;
+ }
+ if (rdwr_for_fscache == 2)
+ cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE);
/* TODO: Add support for calling posix query info but with passing in fid */
if (tcon->unix_ext)
@@ -445,6 +459,7 @@ cifs_down_write(struct rw_semaphore *sem)
}
static void cifsFileInfo_put_work(struct work_struct *work);
+void serverclose_work(struct work_struct *work);
struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
struct tcon_link *tlink, __u32 oplock,
@@ -491,6 +506,7 @@ struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
cfile->tlink = cifs_get_tlink(tlink);
INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
INIT_WORK(&cfile->put, cifsFileInfo_put_work);
+ INIT_WORK(&cfile->serverclose, serverclose_work);
INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close);
mutex_init(&cfile->fh_mutex);
spin_lock_init(&cfile->file_info_lock);
@@ -582,6 +598,40 @@ static void cifsFileInfo_put_work(struct work_struct *work)
cifsFileInfo_put_final(cifs_file);
}
+void serverclose_work(struct work_struct *work)
+{
+ struct cifsFileInfo *cifs_file = container_of(work,
+ struct cifsFileInfo, serverclose);
+
+ struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
+
+ struct TCP_Server_Info *server = tcon->ses->server;
+ int rc = 0;
+ int retries = 0;
+ int MAX_RETRIES = 4;
+
+ do {
+ if (server->ops->close_getattr)
+ rc = server->ops->close_getattr(0, tcon, cifs_file);
+ else if (server->ops->close)
+ rc = server->ops->close(0, tcon, &cifs_file->fid);
+
+ if (rc == -EBUSY || rc == -EAGAIN) {
+ retries++;
+ msleep(250);
+ }
+ } while ((rc == -EBUSY || rc == -EAGAIN) && (retries < MAX_RETRIES)
+ );
+
+ if (retries == MAX_RETRIES)
+ pr_warn("Serverclose failed %d times, giving up\n", MAX_RETRIES);
+
+ if (cifs_file->offload)
+ queue_work(fileinfo_put_wq, &cifs_file->put);
+ else
+ cifsFileInfo_put_final(cifs_file);
+}
+
/**
* cifsFileInfo_put - release a reference of file priv data
*
@@ -622,10 +672,13 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
struct cifs_fid fid = {};
struct cifs_pending_open open;
bool oplock_break_cancelled;
+ bool serverclose_offloaded = false;
spin_lock(&tcon->open_file_lock);
spin_lock(&cifsi->open_file_lock);
spin_lock(&cifs_file->file_info_lock);
+
+ cifs_file->offload = offload;
if (--cifs_file->count > 0) {
spin_unlock(&cifs_file->file_info_lock);
spin_unlock(&cifsi->open_file_lock);
@@ -667,13 +720,20 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
struct TCP_Server_Info *server = tcon->ses->server;
unsigned int xid;
+ int rc = 0;
xid = get_xid();
if (server->ops->close_getattr)
- server->ops->close_getattr(xid, tcon, cifs_file);
+ rc = server->ops->close_getattr(xid, tcon, cifs_file);
else if (server->ops->close)
- server->ops->close(xid, tcon, &cifs_file->fid);
+ rc = server->ops->close(xid, tcon, &cifs_file->fid);
_free_xid(xid);
+
+ if (rc == -EBUSY || rc == -EAGAIN) {
+ // Server close failed, hence offloading it as an async op
+ queue_work(serverclose_wq, &cifs_file->serverclose);
+ serverclose_offloaded = true;
+ }
}
if (oplock_break_cancelled)
@@ -681,10 +741,15 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
cifs_del_pending_open(&open);
- if (offload)
- queue_work(fileinfo_put_wq, &cifs_file->put);
- else
- cifsFileInfo_put_final(cifs_file);
+ // if serverclose has been offloaded to wq (on failure), it will
+ // handle offloading put as well. If serverclose not offloaded,
+ // we need to handle offloading put here.
+ if (!serverclose_offloaded) {
+ if (offload)
+ queue_work(fileinfo_put_wq, &cifs_file->put);
+ else
+ cifsFileInfo_put_final(cifs_file);
+ }
}
int cifs_open(struct inode *inode, struct file *file)
@@ -834,11 +899,11 @@ int cifs_open(struct inode *inode, struct file *file)
use_cache:
fscache_use_cookie(cifs_inode_cookie(file_inode(file)),
file->f_mode & FMODE_WRITE);
- if (file->f_flags & O_DIRECT &&
- (!((file->f_flags & O_ACCMODE) != O_RDONLY) ||
- file->f_flags & O_APPEND))
- cifs_invalidate_cache(file_inode(file),
- FSCACHE_INVAL_DIO_WRITE);
+ if (!(file->f_flags & O_DIRECT))
+ goto out;
+ if ((file->f_flags & (O_ACCMODE | O_APPEND)) == O_RDONLY)
+ goto out;
+ cifs_invalidate_cache(file_inode(file), FSCACHE_INVAL_DIO_WRITE);
out:
free_dentry_path(page);
@@ -903,6 +968,7 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
int disposition = FILE_OPEN;
int create_options = CREATE_NOT_DIR;
struct cifs_open_parms oparms;
+ int rdwr_for_fscache = 0;
xid = get_xid();
mutex_lock(&cfile->fh_mutex);
@@ -966,7 +1032,11 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
}
#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
- desired_access = cifs_convert_flags(cfile->f_flags);
+ /* If we're caching, we need to be able to fill in around partial writes. */
+ if (cifs_fscache_enabled(inode) && (cfile->f_flags & O_ACCMODE) == O_WRONLY)
+ rdwr_for_fscache = 1;
+
+ desired_access = cifs_convert_flags(cfile->f_flags, rdwr_for_fscache);
/* O_SYNC also has bit for O_DSYNC so following check picks up either */
if (cfile->f_flags & O_SYNC)
@@ -978,6 +1048,7 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
if (server->ops->get_lease_key)
server->ops->get_lease_key(inode, &cfile->fid);
+retry_open:
oparms = (struct cifs_open_parms) {
.tcon = tcon,
.cifs_sb = cifs_sb,
@@ -1003,6 +1074,11 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
/* indicate that we need to relock the file */
oparms.reconnect = true;
}
+ if (rc == -EACCES && rdwr_for_fscache == 1) {
+ desired_access = cifs_convert_flags(cfile->f_flags, 0);
+ rdwr_for_fscache = 2;
+ goto retry_open;
+ }
if (rc) {
mutex_unlock(&cfile->fh_mutex);
@@ -1011,6 +1087,9 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
goto reopen_error_exit;
}
+ if (rdwr_for_fscache == 2)
+ cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE);
+
#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
reopen_success:
#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c
index bdcbe6ff2739ab..6c727d8c31e870 100644
--- a/fs/smb/client/fs_context.c
+++ b/fs/smb/client/fs_context.c
@@ -37,7 +37,7 @@
#include "rfc1002pdu.h"
#include "fs_context.h"
-static DEFINE_MUTEX(cifs_mount_mutex);
+DEFINE_MUTEX(cifs_mount_mutex);
static const match_table_t cifs_smb_version_tokens = {
{ Smb_1, SMB1_VERSION_STRING },
@@ -162,6 +162,7 @@ const struct fs_parameter_spec smb3_fs_parameters[] = {
fsparam_string("username", Opt_user),
fsparam_string("pass", Opt_pass),
fsparam_string("password", Opt_pass),
+ fsparam_string("password2", Opt_pass2),
fsparam_string("ip", Opt_ip),
fsparam_string("addr", Opt_ip),
fsparam_string("domain", Opt_domain),
@@ -345,6 +346,7 @@ smb3_fs_context_dup(struct smb3_fs_context *new_ctx, struct smb3_fs_context *ctx
new_ctx->nodename = NULL;
new_ctx->username = NULL;
new_ctx->password = NULL;
+ new_ctx->password2 = NULL;
new_ctx->server_hostname = NULL;
new_ctx->domainname = NULL;
new_ctx->UNC = NULL;
@@ -357,6 +359,7 @@ smb3_fs_context_dup(struct smb3_fs_context *new_ctx, struct smb3_fs_context *ctx
DUP_CTX_STR(prepath);
DUP_CTX_STR(username);
DUP_CTX_STR(password);
+ DUP_CTX_STR(password2);
DUP_CTX_STR(server_hostname);
DUP_CTX_STR(UNC);
DUP_CTX_STR(source);
@@ -783,9 +786,9 @@ static int smb3_get_tree(struct fs_context *fc)
if (err)
return err;
- mutex_lock(&cifs_mount_mutex);
+ cifs_mount_lock();
ret = smb3_get_tree_common(fc);
- mutex_unlock(&cifs_mount_mutex);
+ cifs_mount_unlock();
return ret;
}
@@ -905,6 +908,8 @@ static int smb3_reconfigure(struct fs_context *fc)
else {
kfree_sensitive(ses->password);
ses->password = kstrdup(ctx->password, GFP_KERNEL);
+ kfree_sensitive(ses->password2);
+ ses->password2 = kstrdup(ctx->password2, GFP_KERNEL);
}
STEAL_STRING(cifs_sb, ctx, domainname);
STEAL_STRING(cifs_sb, ctx, nodename);
@@ -1305,6 +1310,18 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
goto cifs_parse_mount_err;
}
break;
+ case Opt_pass2:
+ kfree_sensitive(ctx->password2);
+ ctx->password2 = NULL;
+ if (strlen(param->string) == 0)
+ break;
+
+ ctx->password2 = kstrdup(param->string, GFP_KERNEL);
+ if (ctx->password2 == NULL) {
+ cifs_errorf(fc, "OOM when copying password2 string\n");
+ goto cifs_parse_mount_err;
+ }
+ break;
case Opt_ip:
if (strlen(param->string) == 0) {
ctx->got_ip = false;
@@ -1608,6 +1625,8 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
cifs_parse_mount_err:
kfree_sensitive(ctx->password);
ctx->password = NULL;
+ kfree_sensitive(ctx->password2);
+ ctx->password2 = NULL;
return -EINVAL;
}
@@ -1713,6 +1732,8 @@ smb3_cleanup_fs_context_contents(struct smb3_fs_context *ctx)
ctx->username = NULL;
kfree_sensitive(ctx->password);
ctx->password = NULL;
+ kfree_sensitive(ctx->password2);
+ ctx->password2 = NULL;
kfree(ctx->server_hostname);
ctx->server_hostname = NULL;
kfree(ctx->UNC);
diff --git a/fs/smb/client/fs_context.h b/fs/smb/client/fs_context.h
index 7863f2248c4df8..a947bddeba273e 100644
--- a/fs/smb/client/fs_context.h
+++ b/fs/smb/client/fs_context.h
@@ -145,6 +145,7 @@ enum cifs_param {
Opt_source,
Opt_user,
Opt_pass,
+ Opt_pass2,
Opt_ip,
Opt_domain,
Opt_srcaddr,
@@ -177,6 +178,7 @@ struct smb3_fs_context {
char *username;
char *password;
+ char *password2;
char *domainname;
char *source;
char *server_hostname;
@@ -304,4 +306,16 @@ extern void smb3_update_mnt_flags(struct cifs_sb_info *cifs_sb);
#define MAX_CACHED_FIDS 16
extern char *cifs_sanitize_prepath(char *prepath, gfp_t gfp);
+extern struct mutex cifs_mount_mutex;
+
+static inline void cifs_mount_lock(void)
+{
+ mutex_lock(&cifs_mount_mutex);
+}
+
+static inline void cifs_mount_unlock(void)
+{
+ mutex_unlock(&cifs_mount_mutex);
+}
+
#endif
diff --git a/fs/smb/client/fscache.c b/fs/smb/client/fscache.c
index c4a3cb736881ae..340efce8f05295 100644
--- a/fs/smb/client/fscache.c
+++ b/fs/smb/client/fscache.c
@@ -12,6 +12,16 @@
#include "cifs_fs_sb.h"
#include "cifsproto.h"
+/*
+ * Key for fscache inode. [!] Contents must match comparisons in cifs_find_inode().
+ */
+struct cifs_fscache_inode_key {
+
+ __le64 uniqueid; /* server inode number */
+ __le64 createtime; /* creation time on server */
+ u8 type; /* S_IFMT file type */
+} __packed;
+
static void cifs_fscache_fill_volume_coherency(
struct cifs_tcon *tcon,
struct cifs_fscache_volume_coherency_data *cd)
@@ -97,15 +107,19 @@ void cifs_fscache_release_super_cookie(struct cifs_tcon *tcon)
void cifs_fscache_get_inode_cookie(struct inode *inode)
{
struct cifs_fscache_inode_coherency_data cd;
+ struct cifs_fscache_inode_key key;
struct cifsInodeInfo *cifsi = CIFS_I(inode);
struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
+ key.uniqueid = cpu_to_le64(cifsi->uniqueid);
+ key.createtime = cpu_to_le64(cifsi->createtime);
+ key.type = (inode->i_mode & S_IFMT) >> 12;
cifs_fscache_fill_coherency(&cifsi->netfs.inode, &cd);
cifsi->netfs.cache =
fscache_acquire_cookie(tcon->fscache, 0,
- &cifsi->uniqueid, sizeof(cifsi->uniqueid),
+ &key, sizeof(key),
&cd, sizeof(cd),
i_size_read(&cifsi->netfs.inode));
if (cifsi->netfs.cache)
diff --git a/fs/smb/client/fscache.h b/fs/smb/client/fscache.h
index a3d73720914f88..1f2ea9f5cc9a8a 100644
--- a/fs/smb/client/fscache.h
+++ b/fs/smb/client/fscache.h
@@ -109,6 +109,11 @@ static inline void cifs_readahead_to_fscache(struct inode *inode,
__cifs_readahead_to_fscache(inode, pos, len);
}
+static inline bool cifs_fscache_enabled(struct inode *inode)
+{
+ return fscache_cookie_enabled(cifs_inode_cookie(inode));
+}
+
#else /* CONFIG_CIFS_FSCACHE */
static inline
void cifs_fscache_fill_coherency(struct inode *inode,
@@ -124,6 +129,7 @@ static inline void cifs_fscache_release_inode_cookie(struct inode *inode) {}
static inline void cifs_fscache_unuse_inode_cookie(struct inode *inode, bool update) {}
static inline struct fscache_cookie *cifs_inode_cookie(struct inode *inode) { return NULL; }
static inline void cifs_invalidate_cache(struct inode *inode, unsigned int flags) {}
+static inline bool cifs_fscache_enabled(struct inode *inode) { return false; }
static inline int cifs_fscache_query_occupancy(struct inode *inode,
pgoff_t first, unsigned int nr_pages,
diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c
index d28ab0af604936..60afab5c83d410 100644
--- a/fs/smb/client/inode.c
+++ b/fs/smb/client/inode.c
@@ -1105,7 +1105,8 @@ static int cifs_get_fattr(struct cifs_open_info_data *data,
} else {
cifs_open_info_to_fattr(fattr, data, sb);
}
- if (!rc && fattr->cf_flags & CIFS_FATTR_DELETE_PENDING)
+ if (!rc && *inode &&
+ (fattr->cf_flags & CIFS_FATTR_DELETE_PENDING))
cifs_mark_open_handles_for_deleted_file(*inode, full_path);
break;
case -EREMOTE:
@@ -1351,6 +1352,8 @@ cifs_find_inode(struct inode *inode, void *opaque)
{
struct cifs_fattr *fattr = opaque;
+ /* [!] The compared values must be the same in struct cifs_fscache_inode_key. */
+
/* don't match inode with different uniqueid */
if (CIFS_I(inode)->uniqueid != fattr->cf_uniqueid)
return 0;
diff --git a/fs/smb/client/ioctl.c b/fs/smb/client/ioctl.c
index c012dfdba80d45..855ac5a62edfaa 100644
--- a/fs/smb/client/ioctl.c
+++ b/fs/smb/client/ioctl.c
@@ -247,7 +247,9 @@ static int cifs_dump_full_key(struct cifs_tcon *tcon, struct smb3_full_key_debug
spin_lock(&cifs_tcp_ses_lock);
list_for_each_entry(server_it, &cifs_tcp_ses_list, tcp_ses_list) {
list_for_each_entry(ses_it, &server_it->smb_ses_list, smb_ses_list) {
- if (ses_it->Suid == out.session_id) {
+ spin_lock(&ses_it->ses_lock);
+ if (ses_it->ses_status != SES_EXITING &&
+ ses_it->Suid == out.session_id) {
ses = ses_it;
/*
* since we are using the session outside the crit
@@ -255,9 +257,11 @@ static int cifs_dump_full_key(struct cifs_tcon *tcon, struct smb3_full_key_debug
* so increment its refcount
*/
cifs_smb_ses_inc_refcount(ses);
+ spin_unlock(&ses_it->ses_lock);
found = true;
goto search_end;
}
+ spin_unlock(&ses_it->ses_lock);
}
}
search_end:
diff --git a/fs/smb/client/misc.c b/fs/smb/client/misc.c
index c3771fc81328ff..7d15a1969b8184 100644
--- a/fs/smb/client/misc.c
+++ b/fs/smb/client/misc.c
@@ -98,6 +98,7 @@ sesInfoFree(struct cifs_ses *buf_to_free)
kfree(buf_to_free->serverDomain);
kfree(buf_to_free->serverNOS);
kfree_sensitive(buf_to_free->password);
+ kfree_sensitive(buf_to_free->password2);
kfree(buf_to_free->user_name);
kfree(buf_to_free->domainName);
kfree_sensitive(buf_to_free->auth_key.response);
@@ -138,9 +139,6 @@ tcon_info_alloc(bool dir_leases_enabled)
atomic_set(&ret_buf->num_local_opens, 0);
atomic_set(&ret_buf->num_remote_opens, 0);
ret_buf->stats_from_time = ktime_get_real_seconds();
-#ifdef CONFIG_CIFS_DFS_UPCALL
- INIT_LIST_HEAD(&ret_buf->dfs_ses_list);
-#endif
return ret_buf;
}
@@ -156,9 +154,6 @@ tconInfoFree(struct cifs_tcon *tcon)
atomic_dec(&tconInfoAllocCount);
kfree(tcon->nativeFileSystem);
kfree_sensitive(tcon->password);
-#ifdef CONFIG_CIFS_DFS_UPCALL
- dfs_put_root_smb_sessions(&tcon->dfs_ses_list);
-#endif
kfree(tcon->origin_fullpath);
kfree(tcon);
}
@@ -487,6 +482,8 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv)
/* look up tcon based on tid & uid */
spin_lock(&cifs_tcp_ses_lock);
list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
+ if (cifs_ses_exiting(ses))
+ continue;
list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
if (tcon->tid != buf->Tid)
continue;
diff --git a/fs/smb/client/smb1ops.c b/fs/smb/client/smb1ops.c
index a9eaba8083b0d6..212ec6f66ec65b 100644
--- a/fs/smb/client/smb1ops.c
+++ b/fs/smb/client/smb1ops.c
@@ -753,11 +753,11 @@ cifs_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock)
cinode->can_cache_brlcks = CIFS_CACHE_WRITE(cinode);
}
-static void
+static int
cifs_close_file(const unsigned int xid, struct cifs_tcon *tcon,
struct cifs_fid *fid)
{
- CIFSSMBClose(xid, tcon, fid->netfid);
+ return CIFSSMBClose(xid, tcon, fid->netfid);
}
static int
diff --git a/fs/smb/client/smb2misc.c b/fs/smb/client/smb2misc.c
index 82b84a4941dd2f..cc72be5a93a933 100644
--- a/fs/smb/client/smb2misc.c
+++ b/fs/smb/client/smb2misc.c
@@ -622,6 +622,8 @@ smb2_is_valid_lease_break(char *buffer, struct TCP_Server_Info *server)
/* look up tcon based on tid & uid */
spin_lock(&cifs_tcp_ses_lock);
list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
+ if (cifs_ses_exiting(ses))
+ continue;
list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
spin_lock(&tcon->open_file_lock);
cifs_stats_inc(
@@ -697,6 +699,8 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
/* look up tcon based on tid & uid */
spin_lock(&cifs_tcp_ses_lock);
list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
+ if (cifs_ses_exiting(ses))
+ continue;
list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
spin_lock(&tcon->open_file_lock);
diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c
index 2ed456948f34ca..78c94d0350fe99 100644
--- a/fs/smb/client/smb2ops.c
+++ b/fs/smb/client/smb2ops.c
@@ -1412,14 +1412,14 @@ smb2_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock)
memcpy(cfile->fid.create_guid, fid->create_guid, 16);
}
-static void
+static int
smb2_close_file(const unsigned int xid, struct cifs_tcon *tcon,
struct cifs_fid *fid)
{
- SMB2_close(xid, tcon, fid->persistent_fid, fid->volatile_fid);
+ return SMB2_close(xid, tcon, fid->persistent_fid, fid->volatile_fid);
}
-static void
+static int
smb2_close_getattr(const unsigned int xid, struct cifs_tcon *tcon,
struct cifsFileInfo *cfile)
{
@@ -1430,7 +1430,7 @@ smb2_close_getattr(const unsigned int xid, struct cifs_tcon *tcon,
rc = __SMB2_close(xid, tcon, cfile->fid.persistent_fid,
cfile->fid.volatile_fid, &file_inf);
if (rc)
- return;
+ return rc;
inode = d_inode(cfile->dentry);
@@ -1459,6 +1459,7 @@ smb2_close_getattr(const unsigned int xid, struct cifs_tcon *tcon,
/* End of file and Attributes should not have to be updated on close */
spin_unlock(&inode->i_lock);
+ return rc;
}
static int
@@ -2480,6 +2481,8 @@ smb2_is_network_name_deleted(char *buf, struct TCP_Server_Info *server)
spin_lock(&cifs_tcp_ses_lock);
list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
+ if (cifs_ses_exiting(ses))
+ continue;
list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
if (tcon->tid == le32_to_cpu(shdr->Id.SyncId.TreeId)) {
spin_lock(&tcon->tc_lock);
@@ -3913,7 +3916,7 @@ smb21_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock,
strcat(message, "W");
}
if (!new_oplock)
- strncpy(message, "None", sizeof(message));
+ strscpy(message, "None");
cinode->oplock = new_oplock;
cifs_dbg(FYI, "%s Lease granted on inode %p\n", message,
@@ -4961,68 +4964,84 @@ static int smb2_next_header(struct TCP_Server_Info *server, char *buf,
return 0;
}
-int cifs_sfu_make_node(unsigned int xid, struct inode *inode,
- struct dentry *dentry, struct cifs_tcon *tcon,
- const char *full_path, umode_t mode, dev_t dev)
+static int __cifs_sfu_make_node(unsigned int xid, struct inode *inode,
+ struct dentry *dentry, struct cifs_tcon *tcon,
+ const char *full_path, umode_t mode, dev_t dev)
{
- struct cifs_open_info_data buf = {};
struct TCP_Server_Info *server = tcon->ses->server;
struct cifs_open_parms oparms;
struct cifs_io_parms io_parms = {};
struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
struct cifs_fid fid;
unsigned int bytes_written;
- struct win_dev *pdev;
+ struct win_dev pdev = {};
struct kvec iov[2];
__u32 oplock = server->oplocks ? REQ_OPLOCK : 0;
int rc;
- if (!S_ISCHR(mode) && !S_ISBLK(mode) && !S_ISFIFO(mode))
+ switch (mode & S_IFMT) {
+ case S_IFCHR:
+ strscpy(pdev.type, "IntxCHR");
+ pdev.major = cpu_to_le64(MAJOR(dev));
+ pdev.minor = cpu_to_le64(MINOR(dev));
+ break;
+ case S_IFBLK:
+ strscpy(pdev.type, "IntxBLK");
+ pdev.major = cpu_to_le64(MAJOR(dev));
+ pdev.minor = cpu_to_le64(MINOR(dev));
+ break;
+ case S_IFIFO:
+ strscpy(pdev.type, "LnxFIFO");
+ break;
+ default:
return -EPERM;
+ }
- oparms = (struct cifs_open_parms) {
- .tcon = tcon,
- .cifs_sb = cifs_sb,
- .desired_access = GENERIC_WRITE,
- .create_options = cifs_create_options(cifs_sb, CREATE_NOT_DIR |
- CREATE_OPTION_SPECIAL),
- .disposition = FILE_CREATE,
- .path = full_path,
- .fid = &fid,
- };
+ oparms = CIFS_OPARMS(cifs_sb, tcon, full_path, GENERIC_WRITE,
+ FILE_CREATE, CREATE_NOT_DIR |
+ CREATE_OPTION_SPECIAL, ACL_NO_MODE);
+ oparms.fid = &fid;
- rc = server->ops->open(xid, &oparms, &oplock, &buf);
+ rc = server->ops->open(xid, &oparms, &oplock, NULL);
if (rc)
return rc;
- /*
- * BB Do not bother to decode buf since no local inode yet to put
- * timestamps in, but we can reuse it safely.
- */
- pdev = (struct win_dev *)&buf.fi;
io_parms.pid = current->tgid;
io_parms.tcon = tcon;
- io_parms.length = sizeof(*pdev);
- iov[1].iov_base = pdev;
- iov[1].iov_len = sizeof(*pdev);
- if (S_ISCHR(mode)) {
- memcpy(pdev->type, "IntxCHR", 8);
- pdev->major = cpu_to_le64(MAJOR(dev));
- pdev->minor = cpu_to_le64(MINOR(dev));
- } else if (S_ISBLK(mode)) {
- memcpy(pdev->type, "IntxBLK", 8);
- pdev->major = cpu_to_le64(MAJOR(dev));
- pdev->minor = cpu_to_le64(MINOR(dev));
- } else if (S_ISFIFO(mode)) {
- memcpy(pdev->type, "LnxFIFO", 8);
- }
+ io_parms.length = sizeof(pdev);
+ iov[1].iov_base = &pdev;
+ iov[1].iov_len = sizeof(pdev);
rc = server->ops->sync_write(xid, &fid, &io_parms,
&bytes_written, iov, 1);
server->ops->close(xid, tcon, &fid);
- d_drop(dentry);
- /* FIXME: add code here to set EAs */
- cifs_free_open_info(&buf);
+ return rc;
+}
+
+int cifs_sfu_make_node(unsigned int xid, struct inode *inode,
+ struct dentry *dentry, struct cifs_tcon *tcon,
+ const char *full_path, umode_t mode, dev_t dev)
+{
+ struct inode *new = NULL;
+ int rc;
+
+ rc = __cifs_sfu_make_node(xid, inode, dentry, tcon,
+ full_path, mode, dev);
+ if (rc)
+ return rc;
+
+ if (tcon->posix_extensions) {
+ rc = smb311_posix_get_inode_info(&new, full_path, NULL,
+ inode->i_sb, xid);
+ } else if (tcon->unix_ext) {
+ rc = cifs_get_inode_info_unix(&new, full_path,
+ inode->i_sb, xid);
+ } else {
+ rc = cifs_get_inode_info(&new, full_path, NULL,
+ inode->i_sb, xid, NULL);
+ }
+ if (!rc)
+ d_instantiate(dentry, new);
return rc;
}
diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c
index 3ea688558e6c9b..86c647a947ccd1 100644
--- a/fs/smb/client/smb2pdu.c
+++ b/fs/smb/client/smb2pdu.c
@@ -367,6 +367,17 @@ again:
}
rc = cifs_setup_session(0, ses, server, nls_codepage);
+ if ((rc == -EACCES) || (rc == -EKEYEXPIRED) || (rc == -EKEYREVOKED)) {
+ /*
+ * Try alternate password for next reconnect (key rotation
+ * could be enabled on the server e.g.) if an alternate
+ * password is available and the current password is expired,
+ * but do not swap on non pwd related errors like host down
+ */
+ if (ses->password2)
+ swap(ses->password2, ses->password);
+ }
+
if ((rc == -EACCES) && !tcon->retry) {
mutex_unlock(&ses->session_mutex);
rc = -EHOSTDOWN;
@@ -3628,9 +3639,9 @@ replay_again:
memcpy(&pbuf->network_open_info,
&rsp->network_open_info,
sizeof(pbuf->network_open_info));
+ atomic_dec(&tcon->num_remote_opens);
}
- atomic_dec(&tcon->num_remote_opens);
close_exit:
SMB2_close_free(&rqst);
free_rsp_buf(resp_buftype, rsp);
diff --git a/fs/smb/client/smb2transport.c b/fs/smb/client/smb2transport.c
index 5a3ca62d2f07f7..1d6e54f7879e6a 100644
--- a/fs/smb/client/smb2transport.c
+++ b/fs/smb/client/smb2transport.c
@@ -659,7 +659,7 @@ smb2_sign_rqst(struct smb_rqst *rqst, struct TCP_Server_Info *server)
}
spin_unlock(&server->srv_lock);
if (!is_binding && !server->session_estab) {
- strncpy(shdr->Signature, "BSRSPYL", 8);
+ strscpy(shdr->Signature, "BSRSPYL");
return 0;
}
diff --git a/fs/smb/client/trace.h b/fs/smb/client/trace.h
index f9c1fd32d0b8c3..5e83cb9da9028e 100644
--- a/fs/smb/client/trace.h
+++ b/fs/smb/client/trace.h
@@ -375,6 +375,7 @@ DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(get_reparse_compound_enter);
DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(delete_enter);
DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(mkdir_enter);
DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(tdis_enter);
+DEFINE_SMB3_INF_COMPOUND_ENTER_EVENT(mknod_enter);
DECLARE_EVENT_CLASS(smb3_inf_compound_done_class,
TP_PROTO(unsigned int xid,
@@ -415,7 +416,7 @@ DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(query_wsl_ea_compound_done);
DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(delete_done);
DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(mkdir_done);
DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(tdis_done);
-
+DEFINE_SMB3_INF_COMPOUND_DONE_EVENT(mknod_done);
DECLARE_EVENT_CLASS(smb3_inf_compound_err_class,
TP_PROTO(unsigned int xid,
@@ -461,6 +462,7 @@ DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(query_wsl_ea_compound_err);
DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(mkdir_err);
DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(delete_err);
DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(tdis_err);
+DEFINE_SMB3_INF_COMPOUND_ERR_EVENT(mknod_err);
/*
* For logging SMB3 Status code and Command for responses which return errors
diff --git a/fs/smb/server/ksmbd_netlink.h b/fs/smb/server/ksmbd_netlink.h
index 8ca8a45c4c621c..686b321c5a8bb5 100644
--- a/fs/smb/server/ksmbd_netlink.h
+++ b/fs/smb/server/ksmbd_netlink.h
@@ -167,7 +167,8 @@ struct ksmbd_share_config_response {
__u16 force_uid;
__u16 force_gid;
__s8 share_name[KSMBD_REQ_MAX_SHARE_NAME];
- __u32 reserved[112]; /* Reserved room */
+ __u32 reserved[111]; /* Reserved room */
+ __u32 payload_sz;
__u32 veto_list_sz;
__s8 ____payload[];
};
diff --git a/fs/smb/server/mgmt/share_config.c b/fs/smb/server/mgmt/share_config.c
index 328a412259dc1b..a2f0a2edceb8ae 100644
--- a/fs/smb/server/mgmt/share_config.c
+++ b/fs/smb/server/mgmt/share_config.c
@@ -158,7 +158,12 @@ static struct ksmbd_share_config *share_config_request(struct unicode_map *um,
share->name = kstrdup(name, GFP_KERNEL);
if (!test_share_config_flag(share, KSMBD_SHARE_FLAG_PIPE)) {
- share->path = kstrdup(ksmbd_share_config_path(resp),
+ int path_len = PATH_MAX;
+
+ if (resp->payload_sz)
+ path_len = resp->payload_sz - resp->veto_list_sz;
+
+ share->path = kstrndup(ksmbd_share_config_path(resp), path_len,
GFP_KERNEL);
if (share->path)
share->path_sz = strlen(share->path);
diff --git a/fs/smb/server/smb2ops.c b/fs/smb/server/smb2ops.c
index a45f7dca482e01..606aa3c5189a28 100644
--- a/fs/smb/server/smb2ops.c
+++ b/fs/smb/server/smb2ops.c
@@ -228,6 +228,11 @@ void init_smb3_0_server(struct ksmbd_conn *conn)
conn->cli_cap & SMB2_GLOBAL_CAP_ENCRYPTION)
conn->vals->capabilities |= SMB2_GLOBAL_CAP_ENCRYPTION;
+ if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION ||
+ (!(server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION_OFF) &&
+ conn->cli_cap & SMB2_GLOBAL_CAP_ENCRYPTION))
+ conn->vals->capabilities |= SMB2_GLOBAL_CAP_ENCRYPTION;
+
if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB3_MULTICHANNEL)
conn->vals->capabilities |= SMB2_GLOBAL_CAP_MULTI_CHANNEL;
}
@@ -278,11 +283,6 @@ int init_smb3_11_server(struct ksmbd_conn *conn)
conn->vals->capabilities |= SMB2_GLOBAL_CAP_LEASING |
SMB2_GLOBAL_CAP_DIRECTORY_LEASING;
- if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION ||
- (!(server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION_OFF) &&
- conn->cli_cap & SMB2_GLOBAL_CAP_ENCRYPTION))
- conn->vals->capabilities |= SMB2_GLOBAL_CAP_ENCRYPTION;
-
if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB3_MULTICHANNEL)
conn->vals->capabilities |= SMB2_GLOBAL_CAP_MULTI_CHANNEL;
diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c
index d478fa0c57abdb..5723bbf372d7cc 100644
--- a/fs/smb/server/smb2pdu.c
+++ b/fs/smb/server/smb2pdu.c
@@ -5857,8 +5857,9 @@ static int smb2_rename(struct ksmbd_work *work,
if (!file_info->ReplaceIfExists)
flags = RENAME_NOREPLACE;
- smb_break_all_levII_oplock(work, fp, 0);
rc = ksmbd_vfs_rename(work, &fp->filp->f_path, new_name, flags);
+ if (!rc)
+ smb_break_all_levII_oplock(work, fp, 0);
out:
kfree(new_name);
return rc;
diff --git a/fs/smb/server/transport_ipc.c b/fs/smb/server/transport_ipc.c
index f29bb03f0dc47b..8752ac82c557bf 100644
--- a/fs/smb/server/transport_ipc.c
+++ b/fs/smb/server/transport_ipc.c
@@ -65,6 +65,7 @@ struct ipc_msg_table_entry {
struct hlist_node ipc_table_hlist;
void *response;
+ unsigned int msg_sz;
};
static struct delayed_work ipc_timer_work;
@@ -275,6 +276,7 @@ static int handle_response(int type, void *payload, size_t sz)
}
memcpy(entry->response, payload, sz);
+ entry->msg_sz = sz;
wake_up_interruptible(&entry->wait);
ret = 0;
break;
@@ -453,6 +455,34 @@ out:
return ret;
}
+static int ipc_validate_msg(struct ipc_msg_table_entry *entry)
+{
+ unsigned int msg_sz = entry->msg_sz;
+
+ if (entry->type == KSMBD_EVENT_RPC_REQUEST) {
+ struct ksmbd_rpc_command *resp = entry->response;
+
+ msg_sz = sizeof(struct ksmbd_rpc_command) + resp->payload_sz;
+ } else if (entry->type == KSMBD_EVENT_SPNEGO_AUTHEN_REQUEST) {
+ struct ksmbd_spnego_authen_response *resp = entry->response;
+
+ msg_sz = sizeof(struct ksmbd_spnego_authen_response) +
+ resp->session_key_len + resp->spnego_blob_len;
+ } else if (entry->type == KSMBD_EVENT_SHARE_CONFIG_REQUEST) {
+ struct ksmbd_share_config_response *resp = entry->response;
+
+ if (resp->payload_sz) {
+ if (resp->payload_sz < resp->veto_list_sz)
+ return -EINVAL;
+
+ msg_sz = sizeof(struct ksmbd_share_config_response) +
+ resp->payload_sz;
+ }
+ }
+
+ return entry->msg_sz != msg_sz ? -EINVAL : 0;
+}
+
static void *ipc_msg_send_request(struct ksmbd_ipc_msg *msg, unsigned int handle)
{
struct ipc_msg_table_entry entry;
@@ -477,6 +507,13 @@ static void *ipc_msg_send_request(struct ksmbd_ipc_msg *msg, unsigned int handle
ret = wait_event_interruptible_timeout(entry.wait,
entry.response != NULL,
IPC_WAIT_TIMEOUT);
+ if (entry.response) {
+ ret = ipc_validate_msg(&entry);
+ if (ret) {
+ kvfree(entry.response);
+ entry.response = NULL;
+ }
+ }
out:
down_write(&ipc_msg_table_lock);
hash_del(&entry.ipc_table_hlist);
diff --git a/fs/squashfs/inode.c b/fs/squashfs/inode.c
index aa3411354e66d0..16bd693d0b3aa2 100644
--- a/fs/squashfs/inode.c
+++ b/fs/squashfs/inode.c
@@ -48,6 +48,10 @@ static int squashfs_new_inode(struct super_block *sb, struct inode *inode,
gid_t i_gid;
int err;
+ inode->i_ino = le32_to_cpu(sqsh_ino->inode_number);
+ if (inode->i_ino == 0)
+ return -EINVAL;
+
err = squashfs_get_id(sb, le16_to_cpu(sqsh_ino->uid), &i_uid);
if (err)
return err;
@@ -58,7 +62,6 @@ static int squashfs_new_inode(struct super_block *sb, struct inode *inode,
i_uid_write(inode, i_uid);
i_gid_write(inode, i_gid);
- inode->i_ino = le32_to_cpu(sqsh_ino->inode_number);
inode_set_mtime(inode, le32_to_cpu(sqsh_ino->mtime), 0);
inode_set_atime(inode, inode_get_mtime_sec(inode), 0);
inode_set_ctime(inode, inode_get_mtime_sec(inode), 0);
diff --git a/fs/super.c b/fs/super.c
index 71d9779c42b10a..69ce6c60096847 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1515,29 +1515,11 @@ static int fs_bdev_thaw(struct block_device *bdev)
return error;
}
-static void fs_bdev_super_get(void *data)
-{
- struct super_block *sb = data;
-
- spin_lock(&sb_lock);
- sb->s_count++;
- spin_unlock(&sb_lock);
-}
-
-static void fs_bdev_super_put(void *data)
-{
- struct super_block *sb = data;
-
- put_super(sb);
-}
-
const struct blk_holder_ops fs_holder_ops = {
.mark_dead = fs_bdev_mark_dead,
.sync = fs_bdev_sync,
.freeze = fs_bdev_freeze,
.thaw = fs_bdev_thaw,
- .get_holder = fs_bdev_super_get,
- .put_holder = fs_bdev_super_put,
};
EXPORT_SYMBOL_GPL(fs_holder_ops);
@@ -1562,7 +1544,7 @@ int setup_bdev_super(struct super_block *sb, int sb_flags,
* writable from userspace even for a read-only block device.
*/
if ((mode & BLK_OPEN_WRITE) && bdev_read_only(bdev)) {
- fput(bdev_file);
+ bdev_fput(bdev_file);
return -EACCES;
}
@@ -1573,7 +1555,7 @@ int setup_bdev_super(struct super_block *sb, int sb_flags,
if (atomic_read(&bdev->bd_fsfreeze_count) > 0) {
if (fc)
warnf(fc, "%pg: Can't mount, blockdev is frozen", bdev);
- fput(bdev_file);
+ bdev_fput(bdev_file);
return -EBUSY;
}
spin_lock(&sb_lock);
@@ -1693,7 +1675,7 @@ void kill_block_super(struct super_block *sb)
generic_shutdown_super(sb);
if (bdev) {
sync_blockdev(bdev);
- fput(sb->s_bdev_file);
+ bdev_fput(sb->s_bdev_file);
}
}
diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c
index dc067eeb638744..894c6ca1e50020 100644
--- a/fs/tracefs/event_inode.c
+++ b/fs/tracefs/event_inode.c
@@ -336,6 +336,7 @@ static void update_inode_attr(struct dentry *dentry, struct inode *inode,
/**
* lookup_file - look up a file in the tracefs filesystem
+ * @parent_ei: Pointer to the eventfs_inode that represents parent of the file
* @dentry: the dentry to look up
* @mode: the permission that the file should have.
* @attr: saved attributes changed by user
@@ -389,6 +390,7 @@ static struct dentry *lookup_file(struct eventfs_inode *parent_ei,
/**
* lookup_dir_entry - look up a dir in the tracefs filesystem
* @dentry: the directory to look up
+ * @pei: Pointer to the parent eventfs_inode if available
* @ei: the eventfs_inode that represents the directory to create
*
* This function will look up a dentry for a directory represented by
@@ -478,16 +480,20 @@ void eventfs_d_release(struct dentry *dentry)
/**
* lookup_file_dentry - create a dentry for a file of an eventfs_inode
+ * @dentry: The parent dentry under which the new file's dentry will be created
* @ei: the eventfs_inode that the file will be created under
* @idx: the index into the entry_attrs[] of the @ei
- * @parent: The parent dentry of the created file.
- * @name: The name of the file to create
* @mode: The mode of the file.
* @data: The data to use to set the inode of the file with on open()
* @fops: The fops of the file to be created.
*
- * Create a dentry for a file of an eventfs_inode @ei and place it into the
- * address located at @e_dentry.
+ * This function creates a dentry for a file associated with an
+ * eventfs_inode @ei. It uses the entry attributes specified by @idx,
+ * if available. The file will have the specified @mode and its inode will be
+ * set up with @data upon open. The file operations will be set to @fops.
+ *
+ * Return: Returns a pointer to the newly created file's dentry or an error
+ * pointer.
*/
static struct dentry *
lookup_file_dentry(struct dentry *dentry,
diff --git a/fs/vboxsf/file.c b/fs/vboxsf/file.c
index 2307f8037efc3d..118dedef8ebe8d 100644
--- a/fs/vboxsf/file.c
+++ b/fs/vboxsf/file.c
@@ -218,6 +218,7 @@ const struct file_operations vboxsf_reg_fops = {
.release = vboxsf_file_release,
.fsync = noop_fsync,
.splice_read = filemap_splice_read,
+ .setlease = simple_nosetlease,
};
const struct inode_operations vboxsf_reg_iops = {
diff --git a/fs/vboxsf/super.c b/fs/vboxsf/super.c
index cabe8ac4fefc5d..ffb1d565da3981 100644
--- a/fs/vboxsf/super.c
+++ b/fs/vboxsf/super.c
@@ -151,11 +151,11 @@ static int vboxsf_fill_super(struct super_block *sb, struct fs_context *fc)
if (!sbi->nls) {
vbg_err("vboxsf: Count not load '%s' nls\n", nls_name);
err = -EINVAL;
- goto fail_free;
+ goto fail_destroy_idr;
}
}
- sbi->bdi_id = ida_simple_get(&vboxsf_bdi_ida, 0, 0, GFP_KERNEL);
+ sbi->bdi_id = ida_alloc(&vboxsf_bdi_ida, GFP_KERNEL);
if (sbi->bdi_id < 0) {
err = sbi->bdi_id;
goto fail_free;
@@ -221,9 +221,10 @@ fail_unmap:
vboxsf_unmap_folder(sbi->root);
fail_free:
if (sbi->bdi_id >= 0)
- ida_simple_remove(&vboxsf_bdi_ida, sbi->bdi_id);
+ ida_free(&vboxsf_bdi_ida, sbi->bdi_id);
if (sbi->nls)
unload_nls(sbi->nls);
+fail_destroy_idr:
idr_destroy(&sbi->ino_idr);
kfree(sbi);
return err;
@@ -268,7 +269,7 @@ static void vboxsf_put_super(struct super_block *sb)
vboxsf_unmap_folder(sbi->root);
if (sbi->bdi_id >= 0)
- ida_simple_remove(&vboxsf_bdi_ida, sbi->bdi_id);
+ ida_free(&vboxsf_bdi_ida, sbi->bdi_id);
if (sbi->nls)
unload_nls(sbi->nls);
diff --git a/fs/vboxsf/utils.c b/fs/vboxsf/utils.c
index 72ac9320e6a35f..9515bbf0b54ce8 100644
--- a/fs/vboxsf/utils.c
+++ b/fs/vboxsf/utils.c
@@ -440,7 +440,6 @@ int vboxsf_nlscpy(struct vboxsf_sbi *sbi, char *name, size_t name_bound_len,
{
const char *in;
char *out;
- size_t out_len;
size_t out_bound_len;
size_t in_bound_len;
@@ -448,7 +447,6 @@ int vboxsf_nlscpy(struct vboxsf_sbi *sbi, char *name, size_t name_bound_len,
in_bound_len = utf8_len;
out = name;
- out_len = 0;
/* Reserve space for terminating 0 */
out_bound_len = name_bound_len - 1;
@@ -469,7 +467,6 @@ int vboxsf_nlscpy(struct vboxsf_sbi *sbi, char *name, size_t name_bound_len,
out += nb;
out_bound_len -= nb;
- out_len += nb;
}
*out = 0;
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index d991eec0543683..73a4b895de6704 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -530,7 +530,8 @@ xfs_validate_sb_common(
}
if (!xfs_validate_stripe_geometry(mp, XFS_FSB_TO_B(mp, sbp->sb_unit),
- XFS_FSB_TO_B(mp, sbp->sb_width), 0, false))
+ XFS_FSB_TO_B(mp, sbp->sb_width), 0,
+ xfs_buf_daddr(bp) == XFS_SB_DADDR, false))
return -EFSCORRUPTED;
/*
@@ -1323,8 +1324,10 @@ xfs_sb_get_secondary(
}
/*
- * sunit, swidth, sectorsize(optional with 0) should be all in bytes,
- * so users won't be confused by values in error messages.
+ * sunit, swidth, sectorsize(optional with 0) should be all in bytes, so users
+ * won't be confused by values in error messages. This function returns false
+ * if the stripe geometry is invalid and the caller is unable to repair the
+ * stripe configuration later in the mount process.
*/
bool
xfs_validate_stripe_geometry(
@@ -1332,20 +1335,21 @@ xfs_validate_stripe_geometry(
__s64 sunit,
__s64 swidth,
int sectorsize,
+ bool may_repair,
bool silent)
{
if (swidth > INT_MAX) {
if (!silent)
xfs_notice(mp,
"stripe width (%lld) is too large", swidth);
- return false;
+ goto check_override;
}
if (sunit > swidth) {
if (!silent)
xfs_notice(mp,
"stripe unit (%lld) is larger than the stripe width (%lld)", sunit, swidth);
- return false;
+ goto check_override;
}
if (sectorsize && (int)sunit % sectorsize) {
@@ -1353,21 +1357,21 @@ xfs_validate_stripe_geometry(
xfs_notice(mp,
"stripe unit (%lld) must be a multiple of the sector size (%d)",
sunit, sectorsize);
- return false;
+ goto check_override;
}
if (sunit && !swidth) {
if (!silent)
xfs_notice(mp,
"invalid stripe unit (%lld) and stripe width of 0", sunit);
- return false;
+ goto check_override;
}
if (!sunit && swidth) {
if (!silent)
xfs_notice(mp,
"invalid stripe width (%lld) and stripe unit of 0", swidth);
- return false;
+ goto check_override;
}
if (sunit && (int)swidth % (int)sunit) {
@@ -1375,9 +1379,27 @@ xfs_validate_stripe_geometry(
xfs_notice(mp,
"stripe width (%lld) must be a multiple of the stripe unit (%lld)",
swidth, sunit);
- return false;
+ goto check_override;
}
return true;
+
+check_override:
+ if (!may_repair)
+ return false;
+ /*
+ * During mount, mp->m_dalign will not be set unless the sunit mount
+ * option was set. If it was set, ignore the bad stripe alignment values
+ * and allow the validation and overwrite later in the mount process to
+ * attempt to overwrite the bad stripe alignment values with the values
+ * supplied by mount options.
+ */
+ if (!mp->m_dalign)
+ return false;
+ if (!silent)
+ xfs_notice(mp,
+"Will try to correct with specified mount options sunit (%d) and swidth (%d)",
+ BBTOB(mp->m_dalign), BBTOB(mp->m_swidth));
+ return true;
}
/*
diff --git a/fs/xfs/libxfs/xfs_sb.h b/fs/xfs/libxfs/xfs_sb.h
index 2e8e8d63d4eb22..37b1ed1bc2095e 100644
--- a/fs/xfs/libxfs/xfs_sb.h
+++ b/fs/xfs/libxfs/xfs_sb.h
@@ -35,8 +35,9 @@ extern int xfs_sb_get_secondary(struct xfs_mount *mp,
struct xfs_trans *tp, xfs_agnumber_t agno,
struct xfs_buf **bpp);
-extern bool xfs_validate_stripe_geometry(struct xfs_mount *mp,
- __s64 sunit, __s64 swidth, int sectorsize, bool silent);
+bool xfs_validate_stripe_geometry(struct xfs_mount *mp,
+ __s64 sunit, __s64 swidth, int sectorsize, bool may_repair,
+ bool silent);
uint8_t xfs_compute_rextslog(xfs_rtbxlen_t rtextents);
diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c
index abff79a77c72b6..47a20cf5205f00 100644
--- a/fs/xfs/scrub/common.c
+++ b/fs/xfs/scrub/common.c
@@ -1044,9 +1044,7 @@ xchk_irele(
struct xfs_scrub *sc,
struct xfs_inode *ip)
{
- if (current->journal_info != NULL) {
- ASSERT(current->journal_info == sc->tp);
-
+ if (sc->tp) {
/*
* If we are in a transaction, we /cannot/ drop the inode
* ourselves, because the VFS will trigger writeback, which
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 1698507d1ac73a..3f428620ebf2a3 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -503,13 +503,6 @@ xfs_vm_writepages(
{
struct xfs_writepage_ctx wpc = { };
- /*
- * Writing back data in a transaction context can result in recursive
- * transactions. This is bad, so issue a warning and get out of here.
- */
- if (WARN_ON_ONCE(current->journal_info))
- return 0;
-
xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED);
return iomap_writepages(mapping, wbc, &wpc.ctx, &xfs_writeback_ops);
}
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 1a18c381127e21..f0fa02264edaae 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -2030,7 +2030,7 @@ xfs_free_buftarg(
fs_put_dax(btp->bt_daxdev, btp->bt_mount);
/* the main block device is closed by kill_block_super */
if (btp->bt_bdev != btp->bt_mount->m_super->s_bdev)
- fput(btp->bt_bdev_file);
+ bdev_fput(btp->bt_bdev_file);
kfree(btp);
}
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index e64265bc0b3371..74f1812b03cbd2 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -2039,8 +2039,10 @@ xfs_inodegc_want_queue_work(
* - Memory shrinkers queued the inactivation worker and it hasn't finished.
* - The queue depth exceeds the maximum allowable percpu backlog.
*
- * Note: If the current thread is running a transaction, we don't ever want to
- * wait for other transactions because that could introduce a deadlock.
+ * Note: If we are in a NOFS context here (e.g. current thread is running a
+ * transaction) the we don't want to block here as inodegc progress may require
+ * filesystem resources we hold to make progress and that could result in a
+ * deadlock. Hence we skip out of here if we are in a scoped NOFS context.
*/
static inline bool
xfs_inodegc_want_flush_work(
@@ -2048,7 +2050,7 @@ xfs_inodegc_want_flush_work(
unsigned int items,
unsigned int shrinker_hits)
{
- if (current->journal_info)
+ if (current->flags & PF_MEMALLOC_NOFS)
return false;
if (shrinker_hits > 0)
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index ea48774f6b76d3..d55b42b2480d6c 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1301,8 +1301,19 @@ xfs_link(
*/
if (unlikely((tdp->i_diflags & XFS_DIFLAG_PROJINHERIT) &&
tdp->i_projid != sip->i_projid)) {
- error = -EXDEV;
- goto error_return;
+ /*
+ * Project quota setup skips special files which can
+ * leave inodes in a PROJINHERIT directory without a
+ * project ID set. We need to allow links to be made
+ * to these "project-less" inodes because userspace
+ * expects them to succeed after project ID setup,
+ * but everything else should be rejected.
+ */
+ if (!special_file(VFS_I(sip)->i_mode) ||
+ sip->i_projid != 0) {
+ error = -EXDEV;
+ goto error_return;
+ }
}
if (!resblks) {
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index c21f10ab0f5dbe..bce020374c5eba 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -485,7 +485,7 @@ xfs_open_devices(
mp->m_logdev_targp = mp->m_ddev_targp;
/* Handle won't be used, drop it */
if (logdev_file)
- fput(logdev_file);
+ bdev_fput(logdev_file);
}
return 0;
@@ -497,10 +497,10 @@ xfs_open_devices(
xfs_free_buftarg(mp->m_ddev_targp);
out_close_rtdev:
if (rtdev_file)
- fput(rtdev_file);
+ bdev_fput(rtdev_file);
out_close_logdev:
if (logdev_file)
- fput(logdev_file);
+ bdev_fput(logdev_file);
return error;
}
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 3f7e3a09a49ff4..1636663707dc04 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -268,19 +268,14 @@ static inline void
xfs_trans_set_context(
struct xfs_trans *tp)
{
- ASSERT(current->journal_info == NULL);
tp->t_pflags = memalloc_nofs_save();
- current->journal_info = tp;
}
static inline void
xfs_trans_clear_context(
struct xfs_trans *tp)
{
- if (current->journal_info == tp) {
- memalloc_nofs_restore(tp->t_pflags);
- current->journal_info = NULL;
- }
+ memalloc_nofs_restore(tp->t_pflags);
}
static inline void
@@ -288,10 +283,8 @@ xfs_trans_switch_context(
struct xfs_trans *old_tp,
struct xfs_trans *new_tp)
{
- ASSERT(current->journal_info == old_tp);
new_tp->t_pflags = old_tp->t_pflags;
old_tp->t_pflags = 0;
- current->journal_info = new_tp;
}
#endif /* __XFS_TRANS_H__ */
diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
index c6a124e8d565fe..964fa7f2400335 100644
--- a/fs/zonefs/super.c
+++ b/fs/zonefs/super.c
@@ -1048,7 +1048,7 @@ static int zonefs_init_zgroup(struct super_block *sb,
zonefs_info(sb, "Zone group \"%s\" has %u file%s\n",
zonefs_zgroup_name(ztype),
zgroup->g_nr_zones,
- zgroup->g_nr_zones > 1 ? "s" : "");
+ str_plural(zgroup->g_nr_zones));
return 0;
}
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 5de954e2b18aaa..e7796f373d0dac 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -911,17 +911,19 @@ static inline bool acpi_int_uid_match(struct acpi_device *adev, u64 uid2)
* acpi_dev_hid_uid_match - Match device by supplied HID and UID
* @adev: ACPI device to match.
* @hid2: Hardware ID of the device.
- * @uid2: Unique ID of the device, pass 0 or NULL to not check _UID.
+ * @uid2: Unique ID of the device, pass NULL to not check _UID.
*
* Matches HID and UID in @adev with given @hid2 and @uid2. Absence of @uid2
* will be treated as a match. If user wants to validate @uid2, it should be
* done before calling this function.
*
- * Returns: %true if matches or @uid2 is 0 or NULL, %false otherwise.
+ * Returns: %true if matches or @uid2 is NULL, %false otherwise.
*/
#define acpi_dev_hid_uid_match(adev, hid2, uid2) \
(acpi_dev_hid_match(adev, hid2) && \
- (!(uid2) || acpi_dev_uid_match(adev, uid2)))
+ /* Distinguish integer 0 from NULL @uid2 */ \
+ (_Generic(uid2, ACPI_STR_TYPES(!(uid2)), default: 0) || \
+ acpi_dev_uid_match(adev, uid2)))
void acpi_dev_clear_dependencies(struct acpi_device *supplier);
bool acpi_dev_ready_for_enumeration(const struct acpi_device *device);
diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h
index 6e794420bd398c..b7de3a4eade1c2 100644
--- a/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h
@@ -156,7 +156,10 @@ extern __printf(1, 2) void __warn_printk(const char *fmt, ...);
#else /* !CONFIG_BUG */
#ifndef HAVE_ARCH_BUG
-#define BUG() do {} while (1)
+#define BUG() do { \
+ do {} while (1); \
+ unreachable(); \
+} while (0)
#endif
#ifndef HAVE_ARCH_BUG_ON
diff --git a/include/asm-generic/export.h b/include/asm-generic/export.h
deleted file mode 100644
index 570cd4da72105c..00000000000000
--- a/include/asm-generic/export.h
+++ /dev/null
@@ -1,11 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-#ifndef __ASM_GENERIC_EXPORT_H
-#define __ASM_GENERIC_EXPORT_H
-
-/*
- * <asm/export.h> and <asm-generic/export.h> are deprecated.
- * Please include <linux/export.h> directly.
- */
-#include <linux/export.h>
-
-#endif
diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h
index 87e3d49a4e29bf..814207e7c37fcf 100644
--- a/include/asm-generic/hyperv-tlfs.h
+++ b/include/asm-generic/hyperv-tlfs.h
@@ -512,13 +512,9 @@ struct hv_proximity_domain_flags {
u32 proximity_info_valid : 1;
} __packed;
-/* Not a union in windows but useful for zeroing */
-union hv_proximity_domain_info {
- struct {
- u32 domain_id;
- struct hv_proximity_domain_flags flags;
- };
- u64 as_uint64;
+struct hv_proximity_domain_info {
+ u32 domain_id;
+ struct hv_proximity_domain_flags flags;
} __packed;
struct hv_lp_startup_status {
@@ -532,14 +528,13 @@ struct hv_lp_startup_status {
} __packed;
/* HvAddLogicalProcessor hypercall */
-struct hv_add_logical_processor_in {
+struct hv_input_add_logical_processor {
u32 lp_index;
u32 apic_id;
- union hv_proximity_domain_info proximity_domain_info;
- u64 flags;
+ struct hv_proximity_domain_info proximity_domain_info;
} __packed;
-struct hv_add_logical_processor_out {
+struct hv_output_add_logical_processor {
struct hv_lp_startup_status startup_status;
} __packed;
@@ -560,7 +555,7 @@ struct hv_create_vp {
u8 padding[3];
u8 subnode_type;
u64 subnode_id;
- union hv_proximity_domain_info proximity_domain_info;
+ struct hv_proximity_domain_info proximity_domain_info;
u64 flags;
} __packed;
diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
index 99935779682dc2..8fe7aaab25990a 100644
--- a/include/asm-generic/mshyperv.h
+++ b/include/asm-generic/mshyperv.h
@@ -21,6 +21,7 @@
#include <linux/types.h>
#include <linux/atomic.h>
#include <linux/bitops.h>
+#include <acpi/acpi_numa.h>
#include <linux/cpumask.h>
#include <linux/nmi.h>
#include <asm/ptrace.h>
@@ -67,6 +68,19 @@ extern u64 hv_do_fast_hypercall8(u16 control, u64 input8);
bool hv_isolation_type_snp(void);
bool hv_isolation_type_tdx(void);
+static inline struct hv_proximity_domain_info hv_numa_node_to_pxm_info(int node)
+{
+ struct hv_proximity_domain_info pxm_info = {};
+
+ if (node != NUMA_NO_NODE) {
+ pxm_info.domain_id = node_to_pxm(node);
+ pxm_info.flags.proximity_info_valid = 1;
+ pxm_info.flags.proximity_preferred = 1;
+ }
+
+ return pxm_info;
+}
+
/* Helper functions that provide a consistent pattern for checking Hyper-V hypercall status. */
static inline int hv_result(u64 status)
{
diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h
index eb4c369a79eb31..35d4ca4f6122c7 100644
--- a/include/kvm/arm_pmu.h
+++ b/include/kvm/arm_pmu.h
@@ -86,7 +86,7 @@ void kvm_vcpu_pmu_resync_el0(void);
*/
#define kvm_pmu_update_vcpu_events(vcpu) \
do { \
- if (!has_vhe() && kvm_vcpu_has_pmu(vcpu)) \
+ if (!has_vhe() && kvm_arm_support_pmu_v3()) \
vcpu->arch.pmu.events = *kvm_get_pmu_events(); \
} while (0)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index c3e8f7cf96be9e..172c918799995f 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1505,16 +1505,6 @@ struct blk_holder_ops {
* Thaw the file system mounted on the block device.
*/
int (*thaw)(struct block_device *bdev);
-
- /*
- * If needed, get a reference to the holder.
- */
- void (*get_holder)(void *holder);
-
- /*
- * Release the holder.
- */
- void (*put_holder)(void *holder);
};
/*
@@ -1585,6 +1575,7 @@ static inline int early_lookup_bdev(const char *pathname, dev_t *dev)
int bdev_freeze(struct block_device *bdev);
int bdev_thaw(struct block_device *bdev);
+void bdev_fput(struct file *bdev_file);
struct io_comp_batch {
struct request *req_list;
diff --git a/include/linux/bootconfig.h b/include/linux/bootconfig.h
index ca73940e26df83..3f4b4ac527ca28 100644
--- a/include/linux/bootconfig.h
+++ b/include/linux/bootconfig.h
@@ -10,6 +10,7 @@
#ifdef __KERNEL__
#include <linux/kernel.h>
#include <linux/types.h>
+bool __init cmdline_has_extra_options(void);
#else /* !__KERNEL__ */
/*
* NOTE: This is only for tools/bootconfig, because tools/bootconfig will
@@ -287,7 +288,12 @@ int __init xbc_init(const char *buf, size_t size, const char **emsg, int *epos);
int __init xbc_get_info(int *node_size, size_t *data_size);
/* XBC cleanup data structures */
-void __init xbc_exit(void);
+void __init _xbc_exit(bool early);
+
+static inline void xbc_exit(void)
+{
+ _xbc_exit(false);
+}
/* XBC embedded bootconfig data in kernel */
#ifdef CONFIG_BOOT_CONFIG_EMBED
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 4f20f62f9d63da..890e152d553ea3 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1574,12 +1574,26 @@ struct bpf_link {
enum bpf_link_type type;
const struct bpf_link_ops *ops;
struct bpf_prog *prog;
- struct work_struct work;
+ /* rcu is used before freeing, work can be used to schedule that
+ * RCU-based freeing before that, so they never overlap
+ */
+ union {
+ struct rcu_head rcu;
+ struct work_struct work;
+ };
};
struct bpf_link_ops {
void (*release)(struct bpf_link *link);
+ /* deallocate link resources callback, called without RCU grace period
+ * waiting
+ */
void (*dealloc)(struct bpf_link *link);
+ /* deallocate link resources callback, called after RCU grace period;
+ * if underlying BPF program is sleepable we go through tasks trace
+ * RCU GP and then "classic" RCU GP
+ */
+ void (*dealloc_deferred)(struct bpf_link *link);
int (*detach)(struct bpf_link *link);
int (*update_prog)(struct bpf_link *link, struct bpf_prog *new_prog,
struct bpf_prog *old_prog);
diff --git a/include/linux/cc_platform.h b/include/linux/cc_platform.h
index cb0d6cd1c12f24..60693a14589462 100644
--- a/include/linux/cc_platform.h
+++ b/include/linux/cc_platform.h
@@ -90,6 +90,14 @@ enum cc_attr {
* Examples include TDX Guest.
*/
CC_ATTR_HOTPLUG_DISABLED,
+
+ /**
+ * @CC_ATTR_HOST_SEV_SNP: AMD SNP enabled on the host.
+ *
+ * The host kernel is running with the necessary features
+ * enabled to run SEV-SNP guests.
+ */
+ CC_ATTR_HOST_SEV_SNP,
};
#ifdef CONFIG_ARCH_HAS_CC_PLATFORM
@@ -107,10 +115,14 @@ enum cc_attr {
* * FALSE - Specified Confidential Computing attribute is not active
*/
bool cc_platform_has(enum cc_attr attr);
+void cc_platform_set(enum cc_attr attr);
+void cc_platform_clear(enum cc_attr attr);
#else /* !CONFIG_ARCH_HAS_CC_PLATFORM */
static inline bool cc_platform_has(enum cc_attr attr) { return false; }
+static inline void cc_platform_set(enum cc_attr attr) { }
+static inline void cc_platform_clear(enum cc_attr attr) { }
#endif /* CONFIG_ARCH_HAS_CC_PLATFORM */
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index c00cc6c0878a1e..8c252e073bd810 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -268,7 +268,7 @@ static inline void *offset_to_ptr(const int *off)
* - When one operand is a null pointer constant (i.e. when x is an integer
* constant expression) and the other is an object pointer (i.e. our
* third operand), the conditional operator returns the type of the
- * object pointer operand (i.e. "int *). Here, within the sizeof(), we
+ * object pointer operand (i.e. "int *"). Here, within the sizeof(), we
* would then get:
* sizeof(*((int *)(...)) == sizeof(int) == 4
* - When one operand is a void pointer (i.e. when x is not an integer
diff --git a/include/linux/device.h b/include/linux/device.h
index 97c4b046c09d94..b9f5464f44ed81 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -1247,6 +1247,7 @@ void device_link_del(struct device_link *link);
void device_link_remove(void *consumer, struct device *supplier);
void device_links_supplier_sync_state_pause(void);
void device_links_supplier_sync_state_resume(void);
+void device_link_wait_removal(void);
/* Create alias, so I can be autoloaded. */
#define MODULE_ALIAS_CHARDEV(major,minor) \
diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
index e06bad467f55ef..c3f9bb6602ba21 100644
--- a/include/linux/dma-fence.h
+++ b/include/linux/dma-fence.h
@@ -682,4 +682,11 @@ static inline bool dma_fence_is_container(struct dma_fence *fence)
return dma_fence_is_array(fence) || dma_fence_is_chain(fence);
}
+#define DMA_FENCE_WARN(f, fmt, args...) \
+ do { \
+ struct dma_fence *__ff = (f); \
+ pr_warn("f %llu#%llu: " fmt, __ff->context, __ff->seqno,\
+ ##args); \
+ } while (0)
+
#endif /* __LINUX_DMA_FENCE_H */
diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h
index 770755df852f14..70cd7258cd29f5 100644
--- a/include/linux/energy_model.h
+++ b/include/linux/energy_model.h
@@ -245,7 +245,6 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd,
* max utilization to the allowed CPU capacity before calculating
* effective performance.
*/
- max_util = map_util_perf(max_util);
max_util = min(max_util, allowed_cpu_cap);
/*
diff --git a/include/linux/framer/framer.h b/include/linux/framer/framer.h
index 9a9b88962c296f..2b85fe9e7f9a76 100644
--- a/include/linux/framer/framer.h
+++ b/include/linux/framer/framer.h
@@ -181,12 +181,12 @@ static inline int framer_notifier_unregister(struct framer *framer,
return -ENOSYS;
}
-struct framer *framer_get(struct device *dev, const char *con_id)
+static inline struct framer *framer_get(struct device *dev, const char *con_id)
{
return ERR_PTR(-ENOSYS);
}
-void framer_put(struct device *dev, struct framer *framer)
+static inline void framer_put(struct device *dev, struct framer *framer)
{
}
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 00fc429b0af0fb..8dfd53b52744a4 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -121,6 +121,8 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
#define FMODE_PWRITE ((__force fmode_t)0x10)
/* File is opened for execution with sys_execve / sys_uselib */
#define FMODE_EXEC ((__force fmode_t)0x20)
+/* File writes are restricted (block device specific) */
+#define FMODE_WRITE_RESTRICTED ((__force fmode_t)0x40)
/* 32bit hashes as llseek() offset (for directories) */
#define FMODE_32BITHASH ((__force fmode_t)0x200)
/* 64bit hashes as llseek() offset (for directories) */
diff --git a/include/linux/gfp_types.h b/include/linux/gfp_types.h
index 868c8fb1bbc1c2..13becafe41df00 100644
--- a/include/linux/gfp_types.h
+++ b/include/linux/gfp_types.h
@@ -2,6 +2,8 @@
#ifndef __LINUX_GFP_TYPES_H
#define __LINUX_GFP_TYPES_H
+#include <linux/bits.h>
+
/* The typedef is in types.h but we want the documentation here */
#if 0
/**
diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index dc75f802e28476..f8617eaf08bad7 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -646,8 +646,6 @@ int devm_gpiochip_add_data_with_key(struct device *dev, struct gpio_chip *gc,
struct gpio_device *gpio_device_find(const void *data,
int (*match)(struct gpio_chip *gc,
const void *data));
-struct gpio_device *gpio_device_find_by_label(const char *label);
-struct gpio_device *gpio_device_find_by_fwnode(const struct fwnode_handle *fwnode);
struct gpio_device *gpio_device_get(struct gpio_device *gdev);
void gpio_device_put(struct gpio_device *gdev);
@@ -814,6 +812,9 @@ struct gpio_device *gpiod_to_gpio_device(struct gpio_desc *desc);
int gpio_device_get_base(struct gpio_device *gdev);
const char *gpio_device_get_label(struct gpio_device *gdev);
+struct gpio_device *gpio_device_find_by_label(const char *label);
+struct gpio_device *gpio_device_find_by_fwnode(const struct fwnode_handle *fwnode);
+
#else /* CONFIG_GPIOLIB */
#include <asm/bug.h>
@@ -843,6 +844,18 @@ static inline const char *gpio_device_get_label(struct gpio_device *gdev)
return NULL;
}
+static inline struct gpio_device *gpio_device_find_by_label(const char *label)
+{
+ WARN_ON(1);
+ return NULL;
+}
+
+static inline struct gpio_device *gpio_device_find_by_fwnode(const struct fwnode_handle *fwnode)
+{
+ WARN_ON(1);
+ return NULL;
+}
+
static inline int gpiochip_lock_as_irq(struct gpio_chip *gc,
unsigned int offset)
{
diff --git a/include/linux/gpio/property.h b/include/linux/gpio/property.h
index 6c75c8bd44a0bb..1a14e239221f7e 100644
--- a/include/linux/gpio/property.h
+++ b/include/linux/gpio/property.h
@@ -2,7 +2,6 @@
#ifndef __LINUX_GPIO_PROPERTY_H
#define __LINUX_GPIO_PROPERTY_H
-#include <dt-bindings/gpio/gpio.h> /* for GPIO_* flags */
#include <linux/property.h>
#define PROPERTY_ENTRY_GPIO(_name_, _chip_node_, _idx_, _flags_) \
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 6ef0557b4bff8e..96ceb4095425eb 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -832,6 +832,7 @@ struct vmbus_gpadl {
u32 gpadl_handle;
u32 size;
void *buffer;
+ bool decrypted;
};
struct vmbus_channel {
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 76121c2bb4f824..5c9bdd3ffccc89 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -67,6 +67,8 @@
* later.
* IRQF_NO_DEBUG - Exclude from runnaway detection for IPI and similar handlers,
* depends on IRQF_PERCPU.
+ * IRQF_COND_ONESHOT - Agree to do IRQF_ONESHOT if already set for a shared
+ * interrupt.
*/
#define IRQF_SHARED 0x00000080
#define IRQF_PROBE_SHARED 0x00000100
@@ -82,6 +84,7 @@
#define IRQF_COND_SUSPEND 0x00040000
#define IRQF_NO_AUTOEN 0x00080000
#define IRQF_NO_DEBUG 0x00100000
+#define IRQF_COND_ONESHOT 0x00200000
#define IRQF_TIMER (__IRQF_TIMER | IRQF_NO_SUSPEND | IRQF_NO_THREAD)
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index e248936250852d..ac333ea81d3195 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -13,7 +13,7 @@ enum {
* A hint to not wake right away but delay until there are enough of
* tw's queued to match the number of CQEs the task is waiting for.
*
- * Must not be used wirh requests generating more than one CQE.
+ * Must not be used with requests generating more than one CQE.
* It's also ignored unless IORING_SETUP_DEFER_TASKRUN is set.
*/
IOU_F_TWQ_LAZY_WAKE = 1,
@@ -294,7 +294,6 @@ struct io_ring_ctx {
struct io_submit_state submit_state;
- struct io_buffer_list *io_bl;
struct xarray io_bl_xa;
struct io_hash_table cancel_table_locked;
diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
index 147feebd508cab..3f003d5fde5341 100644
--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -114,7 +114,7 @@ do { \
# define lockdep_softirq_enter() do { } while (0)
# define lockdep_softirq_exit() do { } while (0)
# define lockdep_hrtimer_enter(__hrtimer) false
-# define lockdep_hrtimer_exit(__context) do { } while (0)
+# define lockdep_hrtimer_exit(__context) do { (void)(__context); } while (0)
# define lockdep_posixtimer_enter() do { } while (0)
# define lockdep_posixtimer_exit() do { } while (0)
# define lockdep_irq_work_enter(__work) do { } while (0)
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 26d68115afb826..324d792e7c7864 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -107,6 +107,7 @@ enum {
ATA_DFLAG_NCQ_PRIO_ENABLED = (1 << 20), /* Priority cmds sent to dev */
ATA_DFLAG_CDL_ENABLED = (1 << 21), /* cmd duration limits is enabled */
+ ATA_DFLAG_RESUMING = (1 << 22), /* Device is resuming */
ATA_DFLAG_DETACH = (1 << 24),
ATA_DFLAG_DETACHED = (1 << 25),
ATA_DFLAG_DA = (1 << 26), /* device supports Device Attention */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0436b919f1c7fc..7b0ee64225de9c 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2207,11 +2207,6 @@ static inline int arch_make_folio_accessible(struct folio *folio)
*/
#include <linux/vmstat.h>
-static __always_inline void *lowmem_page_address(const struct page *page)
-{
- return page_to_virt(page);
-}
-
#if defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL)
#define HASHED_PAGE_VIRTUAL
#endif
@@ -2234,6 +2229,11 @@ void set_page_address(struct page *page, void *virtual);
void page_address_init(void);
#endif
+static __always_inline void *lowmem_page_address(const struct page *page)
+{
+ return page_to_virt(page);
+}
+
#if !defined(HASHED_PAGE_VIRTUAL) && !defined(WANT_PAGE_VIRTUAL)
#define page_address(page) lowmem_page_address(page)
#define set_page_address(page, address) do { } while(0)
diff --git a/include/linux/randomize_kstack.h b/include/linux/randomize_kstack.h
index 5d868505a94e43..6d92b68efbf6c3 100644
--- a/include/linux/randomize_kstack.h
+++ b/include/linux/randomize_kstack.h
@@ -80,7 +80,7 @@ DECLARE_PER_CPU(u32, kstack_offset);
if (static_branch_maybe(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT, \
&randomize_kstack_offset)) { \
u32 offset = raw_cpu_read(kstack_offset); \
- offset ^= (rand); \
+ offset = ror32(offset, 5) ^ (rand); \
raw_cpu_write(kstack_offset, offset); \
} \
} while (0)
diff --git a/include/linux/rwbase_rt.h b/include/linux/rwbase_rt.h
index 29c4e4f243e47d..f2394a409c9d5e 100644
--- a/include/linux/rwbase_rt.h
+++ b/include/linux/rwbase_rt.h
@@ -31,9 +31,9 @@ static __always_inline bool rw_base_is_locked(const struct rwbase_rt *rwb)
return atomic_read(&rwb->readers) != READER_BIAS;
}
-static inline void rw_base_assert_held_write(const struct rwbase_rt *rwb)
+static __always_inline bool rw_base_is_write_locked(const struct rwbase_rt *rwb)
{
- WARN_ON(atomic_read(&rwb->readers) != WRITER_BIAS);
+ return atomic_read(&rwb->readers) == WRITER_BIAS;
}
static __always_inline bool rw_base_is_contended(const struct rwbase_rt *rwb)
diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index 4f1c18992f768f..c8b543d428b0a8 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -167,14 +167,14 @@ static __always_inline int rwsem_is_locked(const struct rw_semaphore *sem)
return rw_base_is_locked(&sem->rwbase);
}
-static inline void rwsem_assert_held_nolockdep(const struct rw_semaphore *sem)
+static __always_inline void rwsem_assert_held_nolockdep(const struct rw_semaphore *sem)
{
WARN_ON(!rwsem_is_locked(sem));
}
-static inline void rwsem_assert_held_write_nolockdep(const struct rw_semaphore *sem)
+static __always_inline void rwsem_assert_held_write_nolockdep(const struct rw_semaphore *sem)
{
- rw_base_assert_held_write(sem);
+ WARN_ON(!rw_base_is_write_locked(&sem->rwbase));
}
static __always_inline int rwsem_is_contended(struct rw_semaphore *sem)
diff --git a/include/linux/secretmem.h b/include/linux/secretmem.h
index 35f3a4a8ceb1e3..acf7e1a3f3def9 100644
--- a/include/linux/secretmem.h
+++ b/include/linux/secretmem.h
@@ -13,10 +13,10 @@ static inline bool folio_is_secretmem(struct folio *folio)
/*
* Using folio_mapping() is quite slow because of the actual call
* instruction.
- * We know that secretmem pages are not compound and LRU so we can
+ * We know that secretmem pages are not compound, so we can
* save a couple of cycles here.
*/
- if (folio_test_large(folio) || !folio_test_lru(folio))
+ if (folio_test_large(folio))
return false;
mapping = (struct address_space *)
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index a4c15db2f5e540..3fb18f7eb73eaf 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -110,8 +110,17 @@ extern struct page *shmem_read_mapping_page_gfp(struct address_space *mapping,
extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end);
int shmem_unuse(unsigned int type);
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
extern bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force,
struct mm_struct *mm, unsigned long vm_flags);
+#else
+static __always_inline bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force,
+ struct mm_struct *mm, unsigned long vm_flags)
+{
+ return false;
+}
+#endif
+
#ifdef CONFIG_SHMEM
extern unsigned long shmem_swap_usage(struct vm_area_struct *vma);
#else
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 0c7c67b3a87b23..9d24aec064e888 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -753,8 +753,6 @@ typedef unsigned char *sk_buff_data_t;
* @list: queue head
* @ll_node: anchor in an llist (eg socket defer_list)
* @sk: Socket we are owned by
- * @ip_defrag_offset: (aka @sk) alternate use of @sk, used in
- * fragmentation management
* @dev: Device we arrived on/are leaving by
* @dev_scratch: (aka @dev) alternate use of @dev when @dev would be %NULL
* @cb: Control buffer. Free for use by every layer. Put private vars here
@@ -875,10 +873,7 @@ struct sk_buff {
struct llist_node ll_node;
};
- union {
- struct sock *sk;
- int ip_defrag_offset;
- };
+ struct sock *sk;
union {
ktime_t tstamp;
diff --git a/include/linux/sockptr.h b/include/linux/sockptr.h
index 307961b41541a6..317200cd3a603e 100644
--- a/include/linux/sockptr.h
+++ b/include/linux/sockptr.h
@@ -50,11 +50,36 @@ static inline int copy_from_sockptr_offset(void *dst, sockptr_t src,
return 0;
}
+/* Deprecated.
+ * This is unsafe, unless caller checked user provided optlen.
+ * Prefer copy_safe_from_sockptr() instead.
+ */
static inline int copy_from_sockptr(void *dst, sockptr_t src, size_t size)
{
return copy_from_sockptr_offset(dst, src, 0, size);
}
+/**
+ * copy_safe_from_sockptr: copy a struct from sockptr
+ * @dst: Destination address, in kernel space. This buffer must be @ksize
+ * bytes long.
+ * @ksize: Size of @dst struct.
+ * @optval: Source address. (in user or kernel space)
+ * @optlen: Size of @optval data.
+ *
+ * Returns:
+ * * -EINVAL: @optlen < @ksize
+ * * -EFAULT: access to userspace failed.
+ * * 0 : @ksize bytes were copied
+ */
+static inline int copy_safe_from_sockptr(void *dst, size_t ksize,
+ sockptr_t optval, unsigned int optlen)
+{
+ if (optlen < ksize)
+ return -EINVAL;
+ return copy_from_sockptr(dst, optval, ksize);
+}
+
static inline int copy_struct_from_sockptr(void *dst, size_t ksize,
sockptr_t src, size_t usize)
{
diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h
index 3c6caa5abc7c42..e9ec32fb97d4a7 100644
--- a/include/linux/stackdepot.h
+++ b/include/linux/stackdepot.h
@@ -44,10 +44,9 @@ typedef u32 depot_stack_handle_t;
union handle_parts {
depot_stack_handle_t handle;
struct {
- /* pool_index is offset by 1 */
- u32 pool_index : DEPOT_POOL_INDEX_BITS;
- u32 offset : DEPOT_OFFSET_BITS;
- u32 extra : STACK_DEPOT_EXTRA_BITS;
+ u32 pool_index_plus_1 : DEPOT_POOL_INDEX_BITS;
+ u32 offset : DEPOT_OFFSET_BITS;
+ u32 extra : STACK_DEPOT_EXTRA_BITS;
};
};
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index 48b700ba1d188a..a5c560a2f8c258 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -390,6 +390,35 @@ static inline bool is_migration_entry_dirty(swp_entry_t entry)
}
#endif /* CONFIG_MIGRATION */
+#ifdef CONFIG_MEMORY_FAILURE
+
+/*
+ * Support for hardware poisoned pages
+ */
+static inline swp_entry_t make_hwpoison_entry(struct page *page)
+{
+ BUG_ON(!PageLocked(page));
+ return swp_entry(SWP_HWPOISON, page_to_pfn(page));
+}
+
+static inline int is_hwpoison_entry(swp_entry_t entry)
+{
+ return swp_type(entry) == SWP_HWPOISON;
+}
+
+#else
+
+static inline swp_entry_t make_hwpoison_entry(struct page *page)
+{
+ return swp_entry(0, 0);
+}
+
+static inline int is_hwpoison_entry(swp_entry_t swp)
+{
+ return 0;
+}
+#endif
+
typedef unsigned long pte_marker;
#define PTE_MARKER_UFFD_WP BIT(0)
@@ -483,8 +512,9 @@ static inline struct folio *pfn_swap_entry_folio(swp_entry_t entry)
/*
* A pfn swap entry is a special type of swap entry that always has a pfn stored
- * in the swap offset. They are used to represent unaddressable device memory
- * and to restrict access to a page undergoing migration.
+ * in the swap offset. They can either be used to represent unaddressable device
+ * memory, to restrict access to a page undergoing migration or to represent a
+ * pfn which has been hwpoisoned and unmapped.
*/
static inline bool is_pfn_swap_entry(swp_entry_t entry)
{
@@ -492,7 +522,7 @@ static inline bool is_pfn_swap_entry(swp_entry_t entry)
BUILD_BUG_ON(SWP_TYPE_SHIFT < SWP_PFN_BITS);
return is_migration_entry(entry) || is_device_private_entry(entry) ||
- is_device_exclusive_entry(entry);
+ is_device_exclusive_entry(entry) || is_hwpoison_entry(entry);
}
struct page_vma_mapped_walk;
@@ -561,35 +591,6 @@ static inline int is_pmd_migration_entry(pmd_t pmd)
}
#endif /* CONFIG_ARCH_ENABLE_THP_MIGRATION */
-#ifdef CONFIG_MEMORY_FAILURE
-
-/*
- * Support for hardware poisoned pages
- */
-static inline swp_entry_t make_hwpoison_entry(struct page *page)
-{
- BUG_ON(!PageLocked(page));
- return swp_entry(SWP_HWPOISON, page_to_pfn(page));
-}
-
-static inline int is_hwpoison_entry(swp_entry_t entry)
-{
- return swp_type(entry) == SWP_HWPOISON;
-}
-
-#else
-
-static inline swp_entry_t make_hwpoison_entry(struct page *page)
-{
- return swp_entry(0, 0);
-}
-
-static inline int is_hwpoison_entry(swp_entry_t swp)
-{
- return 0;
-}
-#endif
-
static inline int non_swap_entry(swp_entry_t entry)
{
return swp_type(entry) >= MAX_SWAPFILES;
diff --git a/include/linux/timecounter.h b/include/linux/timecounter.h
index c6540ceea14303..0982d1d52b24d9 100644
--- a/include/linux/timecounter.h
+++ b/include/linux/timecounter.h
@@ -22,7 +22,7 @@
*
* @read: returns the current cycle value
* @mask: bitmask for two's complement
- * subtraction of non 64 bit counters,
+ * subtraction of non-64-bit counters,
* see CYCLECOUNTER_MASK() helper macro
* @mult: cycle to nanosecond multiplier
* @shift: cycle to nanosecond divisor (power of two)
@@ -35,7 +35,7 @@ struct cyclecounter {
};
/**
- * struct timecounter - layer above a %struct cyclecounter which counts nanoseconds
+ * struct timecounter - layer above a &struct cyclecounter which counts nanoseconds
* Contains the state needed by timecounter_read() to detect
* cycle counter wrap around. Initialize with
* timecounter_init(). Also used to convert cycle counts into the
@@ -66,6 +66,8 @@ struct timecounter {
* @cycles: Cycles
* @mask: bit mask for maintaining the 'frac' field
* @frac: pointer to storage for the fractional nanoseconds.
+ *
+ * Returns: cycle counter cycles converted to nanoseconds
*/
static inline u64 cyclecounter_cyc2ns(const struct cyclecounter *cc,
u64 cycles, u64 mask, u64 *frac)
@@ -79,6 +81,7 @@ static inline u64 cyclecounter_cyc2ns(const struct cyclecounter *cc,
/**
* timecounter_adjtime - Shifts the time of the clock.
+ * @tc: The &struct timecounter to adjust
* @delta: Desired change in nanoseconds.
*/
static inline void timecounter_adjtime(struct timecounter *tc, s64 delta)
@@ -107,6 +110,8 @@ extern void timecounter_init(struct timecounter *tc,
*
* In other words, keeps track of time since the same epoch as
* the function which generated the initial time stamp.
+ *
+ * Returns: nanoseconds since the initial time stamp
*/
extern u64 timecounter_read(struct timecounter *tc);
@@ -123,6 +128,8 @@ extern u64 timecounter_read(struct timecounter *tc);
*
* This allows conversion of cycle counter values which were generated
* in the past.
+ *
+ * Returns: cycle counter converted to nanoseconds since the initial time stamp
*/
extern u64 timecounter_cyc2time(const struct timecounter *tc,
u64 cycle_tstamp);
diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index 7e50cbd97f86e3..0ea7823b7f31f6 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -22,14 +22,14 @@ extern int do_sys_settimeofday64(const struct timespec64 *tv,
const struct timezone *tz);
/*
- * ktime_get() family: read the current time in a multitude of ways,
+ * ktime_get() family - read the current time in a multitude of ways.
*
* The default time reference is CLOCK_MONOTONIC, starting at
* boot time but not counting the time spent in suspend.
* For other references, use the functions with "real", "clocktai",
* "boottime" and "raw" suffixes.
*
- * To get the time in a different format, use the ones wit
+ * To get the time in a different format, use the ones with
* "ns", "ts64" and "seconds" suffix.
*
* See Documentation/core-api/timekeeping.rst for more details.
@@ -74,6 +74,8 @@ extern u32 ktime_get_resolution_ns(void);
/**
* ktime_get_real - get the real (wall-) time in ktime_t format
+ *
+ * Returns: real (wall) time in ktime_t format
*/
static inline ktime_t ktime_get_real(void)
{
@@ -86,10 +88,12 @@ static inline ktime_t ktime_get_coarse_real(void)
}
/**
- * ktime_get_boottime - Returns monotonic time since boot in ktime_t format
+ * ktime_get_boottime - Get monotonic time since boot in ktime_t format
*
* This is similar to CLOCK_MONTONIC/ktime_get, but also includes the
* time spent in suspend.
+ *
+ * Returns: monotonic time since boot in ktime_t format
*/
static inline ktime_t ktime_get_boottime(void)
{
@@ -102,7 +106,9 @@ static inline ktime_t ktime_get_coarse_boottime(void)
}
/**
- * ktime_get_clocktai - Returns the TAI time of day in ktime_t format
+ * ktime_get_clocktai - Get the TAI time of day in ktime_t format
+ *
+ * Returns: the TAI time of day in ktime_t format
*/
static inline ktime_t ktime_get_clocktai(void)
{
@@ -144,32 +150,60 @@ static inline u64 ktime_get_coarse_clocktai_ns(void)
/**
* ktime_mono_to_real - Convert monotonic time to clock realtime
+ * @mono: monotonic time to convert
+ *
+ * Returns: time converted to realtime clock
*/
static inline ktime_t ktime_mono_to_real(ktime_t mono)
{
return ktime_mono_to_any(mono, TK_OFFS_REAL);
}
+/**
+ * ktime_get_ns - Get the current time in nanoseconds
+ *
+ * Returns: current time converted to nanoseconds
+ */
static inline u64 ktime_get_ns(void)
{
return ktime_to_ns(ktime_get());
}
+/**
+ * ktime_get_real_ns - Get the current real/wall time in nanoseconds
+ *
+ * Returns: current real time converted to nanoseconds
+ */
static inline u64 ktime_get_real_ns(void)
{
return ktime_to_ns(ktime_get_real());
}
+/**
+ * ktime_get_boottime_ns - Get the monotonic time since boot in nanoseconds
+ *
+ * Returns: current boottime converted to nanoseconds
+ */
static inline u64 ktime_get_boottime_ns(void)
{
return ktime_to_ns(ktime_get_boottime());
}
+/**
+ * ktime_get_clocktai_ns - Get the current TAI time of day in nanoseconds
+ *
+ * Returns: current TAI time converted to nanoseconds
+ */
static inline u64 ktime_get_clocktai_ns(void)
{
return ktime_to_ns(ktime_get_clocktai());
}
+/**
+ * ktime_get_raw_ns - Get the raw monotonic time in nanoseconds
+ *
+ * Returns: current raw monotonic time converted to nanoseconds
+ */
static inline u64 ktime_get_raw_ns(void)
{
return ktime_to_ns(ktime_get_raw());
@@ -224,8 +258,8 @@ extern bool timekeeping_rtc_skipresume(void);
extern void timekeeping_inject_sleeptime64(const struct timespec64 *delta);
-/*
- * struct ktime_timestanps - Simultaneous mono/boot/real timestamps
+/**
+ * struct ktime_timestamps - Simultaneous mono/boot/real timestamps
* @mono: Monotonic timestamp
* @boot: Boottime timestamp
* @real: Realtime timestamp
@@ -242,7 +276,8 @@ struct ktime_timestamps {
* @cycles: Clocksource counter value to produce the system times
* @real: Realtime system time
* @raw: Monotonic raw system time
- * @clock_was_set_seq: The sequence number of clock was set events
+ * @cs_id: Clocksource ID
+ * @clock_was_set_seq: The sequence number of clock-was-set events
* @cs_was_changed_seq: The sequence number of clocksource change events
*/
struct system_time_snapshot {
diff --git a/include/linux/timer.h b/include/linux/timer.h
index 14a633ba61d643..e67ecd1cbc97d6 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -22,7 +22,7 @@
#define __TIMER_LOCKDEP_MAP_INITIALIZER(_kn)
#endif
-/**
+/*
* @TIMER_DEFERRABLE: A deferrable timer will work normally when the
* system is busy, but will not cause a CPU to come out of idle just
* to service it; instead, the timer will be serviced when the CPU
@@ -140,7 +140,7 @@ static inline void destroy_timer_on_stack(struct timer_list *timer) { }
* or not. Callers must ensure serialization wrt. other operations done
* to this timer, eg. interrupt contexts, or other CPUs on SMP.
*
- * return value: 1 if the timer is pending, 0 if not.
+ * Returns: 1 if the timer is pending, 0 if not.
*/
static inline int timer_pending(const struct timer_list * timer)
{
@@ -175,6 +175,10 @@ extern int timer_shutdown(struct timer_list *timer);
* See timer_delete_sync() for detailed explanation.
*
* Do not use in new code. Use timer_delete_sync() instead.
+ *
+ * Returns:
+ * * %0 - The timer was not pending
+ * * %1 - The timer was pending and deactivated
*/
static inline int del_timer_sync(struct timer_list *timer)
{
@@ -188,6 +192,10 @@ static inline int del_timer_sync(struct timer_list *timer)
* See timer_delete() for detailed explanation.
*
* Do not use in new code. Use timer_delete() instead.
+ *
+ * Returns:
+ * * %0 - The timer was not pending
+ * * %1 - The timer was pending and deactivated
*/
static inline int del_timer(struct timer_list *timer)
{
diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h
index ffe48e69b3f3ae..457879938fc198 100644
--- a/include/linux/u64_stats_sync.h
+++ b/include/linux/u64_stats_sync.h
@@ -135,10 +135,11 @@ static inline void u64_stats_inc(u64_stats_t *p)
p->v++;
}
-static inline void u64_stats_init(struct u64_stats_sync *syncp)
-{
- seqcount_init(&syncp->seq);
-}
+#define u64_stats_init(syncp) \
+ do { \
+ struct u64_stats_sync *__s = (syncp); \
+ seqcount_init(&__s->seq); \
+ } while (0)
static inline void __u64_stats_update_begin(struct u64_stats_sync *syncp)
{
diff --git a/include/linux/udp.h b/include/linux/udp.h
index 3748e82b627b70..e398e1dbd2d365 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -108,7 +108,7 @@ struct udp_sock {
#define udp_assign_bit(nr, sk, val) \
assign_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags, val)
-#define UDP_MAX_SEGMENTS (1 << 6UL)
+#define UDP_MAX_SEGMENTS (1 << 7UL)
#define udp_sk(ptr) container_of_const(ptr, struct udp_sock, inet.sk)
@@ -150,6 +150,24 @@ static inline void udp_cmsg_recv(struct msghdr *msg, struct sock *sk,
}
}
+DECLARE_STATIC_KEY_FALSE(udp_encap_needed_key);
+#if IS_ENABLED(CONFIG_IPV6)
+DECLARE_STATIC_KEY_FALSE(udpv6_encap_needed_key);
+#endif
+
+static inline bool udp_encap_needed(void)
+{
+ if (static_branch_unlikely(&udp_encap_needed_key))
+ return true;
+
+#if IS_ENABLED(CONFIG_IPV6)
+ if (static_branch_unlikely(&udpv6_encap_needed_key))
+ return true;
+#endif
+
+ return false;
+}
+
static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb)
{
if (!skb_is_gso(skb))
@@ -163,6 +181,16 @@ static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb)
!udp_test_bit(ACCEPT_FRAGLIST, sk))
return true;
+ /* GSO packets lacking the SKB_GSO_UDP_TUNNEL/_CSUM bits might still
+ * land in a tunnel as the socket check in udp_gro_receive cannot be
+ * foolproof.
+ */
+ if (udp_encap_needed() &&
+ READ_ONCE(udp_sk(sk)->encap_rcv) &&
+ !(skb_shinfo(skb)->gso_type &
+ (SKB_GSO_UDP_TUNNEL | SKB_GSO_UDP_TUNNEL_CSUM)))
+ return true;
+
return false;
}
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index b0201747a263a9..26c4325aa3734e 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -170,7 +170,7 @@ size_t virtio_max_dma_size(const struct virtio_device *vdev);
/**
* struct virtio_driver - operations for a virtio I/O driver
- * @driver: underlying device driver (populate name and owner).
+ * @driver: underlying device driver (populate name).
* @id_table: the ids serviced by this driver.
* @feature_table: an array of feature numbers supported by this driver.
* @feature_table_size: number of entries in the feature table array.
@@ -208,7 +208,10 @@ static inline struct virtio_driver *drv_to_virtio(struct device_driver *drv)
return container_of(drv, struct virtio_driver, driver);
}
-int register_virtio_driver(struct virtio_driver *drv);
+/* use a macro to avoid include chaining to get THIS_MODULE */
+#define register_virtio_driver(drv) \
+ __register_virtio_driver(drv, THIS_MODULE)
+int __register_virtio_driver(struct virtio_driver *drv, struct module *owner);
void unregister_virtio_driver(struct virtio_driver *drv);
/* module_virtio_driver() - Helper macro for drivers that don't do
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 9d06eb945509ec..62a407db1bf5ff 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -438,6 +438,10 @@ static inline void in6_ifa_hold(struct inet6_ifaddr *ifp)
refcount_inc(&ifp->refcnt);
}
+static inline bool in6_ifa_hold_safe(struct inet6_ifaddr *ifp)
+{
+ return refcount_inc_not_zero(&ifp->refcnt);
+}
/*
* compute link-local solicited-node multicast address
diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index 9fe95a22abeb7e..eaec5d6caa29d2 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -585,6 +585,15 @@ static inline struct sk_buff *bt_skb_sendmmsg(struct sock *sk,
return skb;
}
+static inline int bt_copy_from_sockptr(void *dst, size_t dst_size,
+ sockptr_t src, size_t src_size)
+{
+ if (dst_size > src_size)
+ return -EINVAL;
+
+ return copy_from_sockptr(dst, src, dst_size);
+}
+
int bt_to_errno(u16 code);
__u8 bt_status(int err);
diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 8701ca5f31eec3..5c12761cbc0e21 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -176,6 +176,15 @@ enum {
*/
HCI_QUIRK_USE_BDADDR_PROPERTY,
+ /* When this quirk is set, the Bluetooth Device Address provided by
+ * the 'local-bd-address' fwnode property is incorrectly specified in
+ * big-endian order.
+ *
+ * This quirk can be set before hci_register_dev is called or
+ * during the hdev->setup vendor callback.
+ */
+ HCI_QUIRK_BDADDR_PROPERTY_BROKEN,
+
/* When this quirk is set, the duplicate filtering during
* scanning is based on Bluetooth devices addresses. To allow
* RSSI based updates, restart scanning if needed.
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 2e2be4fd2bb653..1e09329acc4268 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -4991,6 +4991,7 @@ struct cfg80211_ops {
* set this flag to update channels on beacon hints.
* @WIPHY_FLAG_SUPPORTS_NSTR_NONPRIMARY: support connection to non-primary link
* of an NSTR mobile AP MLD.
+ * @WIPHY_FLAG_DISABLE_WEXT: disable wireless extensions for this device
*/
enum wiphy_flags {
WIPHY_FLAG_SUPPORTS_EXT_KEK_KCK = BIT(0),
@@ -5002,6 +5003,7 @@ enum wiphy_flags {
WIPHY_FLAG_4ADDR_STATION = BIT(6),
WIPHY_FLAG_CONTROL_PORT_PROTOCOL = BIT(7),
WIPHY_FLAG_IBSS_RSN = BIT(8),
+ WIPHY_FLAG_DISABLE_WEXT = BIT(9),
WIPHY_FLAG_MESH_AUTH = BIT(10),
WIPHY_FLAG_SUPPORTS_EXT_KCK_32 = BIT(11),
WIPHY_FLAG_SUPPORTS_NSTR_NONPRIMARY = BIT(12),
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 9ab4bf704e8643..ccf171f7eb60d4 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -175,6 +175,7 @@ void inet_csk_init_xmit_timers(struct sock *sk,
void (*delack_handler)(struct timer_list *),
void (*keepalive_handler)(struct timer_list *));
void inet_csk_clear_xmit_timers(struct sock *sk);
+void inet_csk_clear_xmit_timers_sync(struct sock *sk);
static inline void inet_csk_schedule_ack(struct sock *sk)
{
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 5cd64bb2104df3..c286cc2e766ee0 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -361,6 +361,39 @@ static inline bool pskb_inet_may_pull(struct sk_buff *skb)
return pskb_network_may_pull(skb, nhlen);
}
+/* Variant of pskb_inet_may_pull().
+ */
+static inline bool skb_vlan_inet_prepare(struct sk_buff *skb)
+{
+ int nhlen = 0, maclen = ETH_HLEN;
+ __be16 type = skb->protocol;
+
+ /* Essentially this is skb_protocol(skb, true)
+ * And we get MAC len.
+ */
+ if (eth_type_vlan(type))
+ type = __vlan_get_protocol(skb, type, &maclen);
+
+ switch (type) {
+#if IS_ENABLED(CONFIG_IPV6)
+ case htons(ETH_P_IPV6):
+ nhlen = sizeof(struct ipv6hdr);
+ break;
+#endif
+ case htons(ETH_P_IP):
+ nhlen = sizeof(struct iphdr);
+ break;
+ }
+ /* For ETH_P_IPV6/ETH_P_IP we make sure to pull
+ * a base network header in skb->head.
+ */
+ if (!pskb_may_pull(skb, maclen + nhlen))
+ return false;
+
+ skb_set_network_header(skb, maclen);
+ return true;
+}
+
static inline int ip_encap_hlen(struct ip_tunnel_encap *e)
{
const struct ip_tunnel_encap_ops *ops;
diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
index 76147feb0d10ae..4eeedf14711b30 100644
--- a/include/net/mana/mana.h
+++ b/include/net/mana/mana.h
@@ -39,7 +39,6 @@ enum TRI_STATE {
#define COMP_ENTRY_SIZE 64
#define RX_BUFFERS_PER_QUEUE 512
-#define MANA_RX_DATA_ALIGN 64
#define MAX_SEND_BUFFERS_PER_QUEUE 256
diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index a763dd327c6ea9..9abb7ee40d72fc 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -336,7 +336,7 @@ int nf_flow_rule_route_ipv6(struct net *net, struct flow_offload *flow,
int nf_flow_table_offload_init(void);
void nf_flow_table_offload_exit(void);
-static inline __be16 nf_flow_pppoe_proto(const struct sk_buff *skb)
+static inline __be16 __nf_flow_pppoe_proto(const struct sk_buff *skb)
{
__be16 proto;
@@ -352,6 +352,16 @@ static inline __be16 nf_flow_pppoe_proto(const struct sk_buff *skb)
return 0;
}
+static inline bool nf_flow_pppoe_proto(struct sk_buff *skb, __be16 *inner_proto)
+{
+ if (!pskb_may_pull(skb, PPPOE_SES_HLEN))
+ return false;
+
+ *inner_proto = __nf_flow_pppoe_proto(skb);
+
+ return true;
+}
+
#define NF_FLOW_TABLE_STAT_INC(net, count) __this_cpu_inc((net)->ft.stat->count)
#define NF_FLOW_TABLE_STAT_DEC(net, count) __this_cpu_dec((net)->ft.stat->count)
#define NF_FLOW_TABLE_STAT_INC_ATOMIC(net, count) \
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index e27c28b612e464..3f1ed467f951f6 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -307,9 +307,23 @@ static inline void *nft_elem_priv_cast(const struct nft_elem_priv *priv)
return (void *)priv;
}
+
+/**
+ * enum nft_iter_type - nftables set iterator type
+ *
+ * @NFT_ITER_READ: read-only iteration over set elements
+ * @NFT_ITER_UPDATE: iteration under mutex to update set element state
+ */
+enum nft_iter_type {
+ NFT_ITER_UNSPEC,
+ NFT_ITER_READ,
+ NFT_ITER_UPDATE,
+};
+
struct nft_set;
struct nft_set_iter {
u8 genmask;
+ enum nft_iter_type type:8;
unsigned int count;
unsigned int skip;
int err;
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index cefe0c4bdae34c..41ca14e81d55f9 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -117,6 +117,7 @@ struct Qdisc {
struct qdisc_skb_head q;
struct gnet_stats_basic_sync bstats;
struct gnet_stats_queue qstats;
+ int owner;
unsigned long state;
unsigned long state2; /* must be written under qdisc spinlock */
struct Qdisc *next_sched;
diff --git a/include/net/sock.h b/include/net/sock.h
index b5e00702acc1f0..f57bfd8a2ad2de 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1759,6 +1759,13 @@ static inline void sock_owned_by_me(const struct sock *sk)
#endif
}
+static inline void sock_not_owned_by_me(const struct sock *sk)
+{
+#ifdef CONFIG_LOCKDEP
+ WARN_ON_ONCE(lockdep_sock_is_held(sk) && debug_locks);
+#endif
+}
+
static inline bool sock_owned_by_user(const struct sock *sk)
{
sock_owned_by_me(sk);
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index 3cb4dc9bd70e44..3d54de168a6d9d 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -188,6 +188,8 @@ static inline void xsk_tx_metadata_complete(struct xsk_tx_metadata_compl *compl,
{
if (!compl)
return;
+ if (!compl->tx_timestamp)
+ return;
*compl->tx_timestamp = ops->tmo_fill_timestamp(priv);
}
diff --git a/include/scsi/scsi_driver.h b/include/scsi/scsi_driver.h
index 4ce1988b2ba01c..f40915d2eceef4 100644
--- a/include/scsi/scsi_driver.h
+++ b/include/scsi/scsi_driver.h
@@ -12,6 +12,7 @@ struct request;
struct scsi_driver {
struct device_driver gendrv;
+ int (*resume)(struct device *);
void (*rescan)(struct device *);
blk_status_t (*init_command)(struct scsi_cmnd *);
void (*uninit_command)(struct scsi_cmnd *);
diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index b259d42a1e1aff..129001f600fc95 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -767,6 +767,7 @@ scsi_template_proc_dir(const struct scsi_host_template *sht);
#define scsi_template_proc_dir(sht) NULL
#endif
extern void scsi_scan_host(struct Scsi_Host *);
+extern int scsi_resume_device(struct scsi_device *sdev);
extern int scsi_rescan_device(struct scsi_device *sdev);
extern void scsi_remove_host(struct Scsi_Host *);
extern struct Scsi_Host *scsi_host_get(struct Scsi_Host *);
diff --git a/include/sound/hdaudio_ext.h b/include/sound/hdaudio_ext.h
index a8bebac1e4b28d..957295364a5e3c 100644
--- a/include/sound/hdaudio_ext.h
+++ b/include/sound/hdaudio_ext.h
@@ -56,6 +56,9 @@ struct hdac_ext_stream {
u32 pphcldpl;
u32 pphcldpu;
+ u32 pplcllpl;
+ u32 pplcllpu;
+
bool decoupled:1;
bool link_locked:1;
bool link_prepared;
diff --git a/include/sound/intel-nhlt.h b/include/sound/intel-nhlt.h
index 53470d6a28d659..24dbe16684ae33 100644
--- a/include/sound/intel-nhlt.h
+++ b/include/sound/intel-nhlt.h
@@ -143,6 +143,9 @@ intel_nhlt_get_endpoint_blob(struct device *dev, struct nhlt_acpi_table *nhlt,
u32 bus_id, u8 link_type, u8 vbps, u8 bps,
u8 num_ch, u32 rate, u8 dir, u8 dev_type);
+int intel_nhlt_ssp_device_type(struct device *dev, struct nhlt_acpi_table *nhlt,
+ u8 virtual_bus_id);
+
#else
static inline struct nhlt_acpi_table *intel_nhlt_init(struct device *dev)
@@ -184,6 +187,13 @@ intel_nhlt_get_endpoint_blob(struct device *dev, struct nhlt_acpi_table *nhlt,
return NULL;
}
+static inline int intel_nhlt_ssp_device_type(struct device *dev,
+ struct nhlt_acpi_table *nhlt,
+ u8 virtual_bus_id)
+{
+ return -EINVAL;
+}
+
#endif
#endif
diff --git a/include/sound/tas2781-tlv.h b/include/sound/tas2781-tlv.h
index 4038dd421150a3..1dc59005d241fb 100644
--- a/include/sound/tas2781-tlv.h
+++ b/include/sound/tas2781-tlv.h
@@ -15,7 +15,7 @@
#ifndef __TAS2781_TLV_H__
#define __TAS2781_TLV_H__
-static const DECLARE_TLV_DB_SCALE(dvc_tlv, -10000, 100, 0);
+static const __maybe_unused DECLARE_TLV_DB_SCALE(dvc_tlv, -10000, 100, 0);
static const DECLARE_TLV_DB_SCALE(amp_vol_tlv, 1100, 50, 0);
#endif
diff --git a/include/trace/events/rpcgss.h b/include/trace/events/rpcgss.h
index ba2d96a1bc2f94..f50fcafc69de20 100644
--- a/include/trace/events/rpcgss.h
+++ b/include/trace/events/rpcgss.h
@@ -609,7 +609,7 @@ TRACE_EVENT(rpcgss_context,
__field(unsigned int, timeout)
__field(u32, window_size)
__field(int, len)
- __string(acceptor, data)
+ __string_len(acceptor, data, len)
),
TP_fast_assign(
@@ -618,7 +618,7 @@ TRACE_EVENT(rpcgss_context,
__entry->timeout = timeout;
__entry->window_size = window_size;
__entry->len = len;
- strncpy(__get_str(acceptor), data, len);
+ __assign_str(acceptor, data);
),
TP_printk("win_size=%u expiry=%lu now=%lu timeout=%u acceptor=%.*s",
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index 9ce46edc62a5b1..2040a470ddb41b 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -913,14 +913,25 @@ enum kfd_dbg_trap_exception_code {
KFD_EC_MASK(EC_DEVICE_NEW))
#define KFD_EC_MASK_PROCESS (KFD_EC_MASK(EC_PROCESS_RUNTIME) | \
KFD_EC_MASK(EC_PROCESS_DEVICE_REMOVE))
+#define KFD_EC_MASK_PACKET (KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_DIM_INVALID) | \
+ KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_GROUP_SEGMENT_SIZE_INVALID) | \
+ KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_CODE_INVALID) | \
+ KFD_EC_MASK(EC_QUEUE_PACKET_RESERVED) | \
+ KFD_EC_MASK(EC_QUEUE_PACKET_UNSUPPORTED) | \
+ KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_WORK_GROUP_SIZE_INVALID) | \
+ KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_REGISTER_INVALID) | \
+ KFD_EC_MASK(EC_QUEUE_PACKET_VENDOR_UNSUPPORTED))
/* Checks for exception code types for KFD search */
+#define KFD_DBG_EC_IS_VALID(ecode) (ecode > EC_NONE && ecode < EC_MAX)
#define KFD_DBG_EC_TYPE_IS_QUEUE(ecode) \
- (!!(KFD_EC_MASK(ecode) & KFD_EC_MASK_QUEUE))
+ (KFD_DBG_EC_IS_VALID(ecode) && !!(KFD_EC_MASK(ecode) & KFD_EC_MASK_QUEUE))
#define KFD_DBG_EC_TYPE_IS_DEVICE(ecode) \
- (!!(KFD_EC_MASK(ecode) & KFD_EC_MASK_DEVICE))
+ (KFD_DBG_EC_IS_VALID(ecode) && !!(KFD_EC_MASK(ecode) & KFD_EC_MASK_DEVICE))
#define KFD_DBG_EC_TYPE_IS_PROCESS(ecode) \
- (!!(KFD_EC_MASK(ecode) & KFD_EC_MASK_PROCESS))
+ (KFD_DBG_EC_IS_VALID(ecode) && !!(KFD_EC_MASK(ecode) & KFD_EC_MASK_PROCESS))
+#define KFD_DBG_EC_TYPE_IS_PACKET(ecode) \
+ (KFD_DBG_EC_IS_VALID(ecode) && !!(KFD_EC_MASK(ecode) & KFD_EC_MASK_PACKET))
/* Runtime enable states */
diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h
index bea69739061346..b95dd84eef2db2 100644
--- a/include/uapi/linux/vhost.h
+++ b/include/uapi/linux/vhost.h
@@ -179,12 +179,6 @@
/* Get the config size */
#define VHOST_VDPA_GET_CONFIG_SIZE _IOR(VHOST_VIRTIO, 0x79, __u32)
-/* Get the count of all virtqueues */
-#define VHOST_VDPA_GET_VQS_COUNT _IOR(VHOST_VIRTIO, 0x80, __u32)
-
-/* Get the number of virtqueue groups. */
-#define VHOST_VDPA_GET_GROUP_NUM _IOR(VHOST_VIRTIO, 0x81, __u32)
-
/* Get the number of address spaces. */
#define VHOST_VDPA_GET_AS_NUM _IOR(VHOST_VIRTIO, 0x7A, unsigned int)
@@ -228,10 +222,17 @@
#define VHOST_VDPA_GET_VRING_DESC_GROUP _IOWR(VHOST_VIRTIO, 0x7F, \
struct vhost_vring_state)
+
+/* Get the count of all virtqueues */
+#define VHOST_VDPA_GET_VQS_COUNT _IOR(VHOST_VIRTIO, 0x80, __u32)
+
+/* Get the number of virtqueue groups. */
+#define VHOST_VDPA_GET_GROUP_NUM _IOR(VHOST_VIRTIO, 0x81, __u32)
+
/* Get the queue size of a specific virtqueue.
* userspace set the vring index in vhost_vring_state.index
* kernel set the queue size in vhost_vring_state.num
*/
-#define VHOST_VDPA_GET_VRING_SIZE _IOWR(VHOST_VIRTIO, 0x80, \
+#define VHOST_VDPA_GET_VRING_SIZE _IOWR(VHOST_VIRTIO, 0x82, \
struct vhost_vring_state)
#endif
diff --git a/include/uapi/scsi/scsi_bsg_mpi3mr.h b/include/uapi/scsi/scsi_bsg_mpi3mr.h
index c72ce387286ad9..30a5c1a5937645 100644
--- a/include/uapi/scsi/scsi_bsg_mpi3mr.h
+++ b/include/uapi/scsi/scsi_bsg_mpi3mr.h
@@ -382,7 +382,7 @@ struct mpi3mr_bsg_in_reply_buf {
__u8 mpi_reply_type;
__u8 rsvd1;
__u16 rsvd2;
- __u8 reply_buf[1];
+ __u8 reply_buf[];
};
/**
diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h
index cb2afcebbdf514..a35e12f8e68baa 100644
--- a/include/ufs/ufshcd.h
+++ b/include/ufs/ufshcd.h
@@ -328,6 +328,7 @@ struct ufs_pwr_mode_info {
* @op_runtime_config: called to config Operation and runtime regs Pointers
* @get_outstanding_cqs: called to get outstanding completion queues
* @config_esi: called to config Event Specific Interrupt
+ * @config_scsi_dev: called to configure SCSI device parameters
*/
struct ufs_hba_variant_ops {
const char *name;
diff --git a/include/vdso/datapage.h b/include/vdso/datapage.h
index 5d5c0b8efff2d4..c71ddb6d46914b 100644
--- a/include/vdso/datapage.h
+++ b/include/vdso/datapage.h
@@ -19,12 +19,6 @@
#include <vdso/time32.h>
#include <vdso/time64.h>
-#ifdef CONFIG_ARM64
-#include <asm/page-def.h>
-#else
-#include <asm/page.h>
-#endif
-
#ifdef CONFIG_ARCH_HAS_VDSO_DATA
#include <asm/vdso/data.h>
#else
@@ -132,7 +126,7 @@ extern struct vdso_data _timens_data[CS_BASES] __attribute__((visibility("hidden
*/
union vdso_data_store {
struct vdso_data data[CS_BASES];
- u8 page[PAGE_SIZE];
+ u8 page[1U << CONFIG_PAGE_SHIFT];
};
/*
diff --git a/init/initramfs.c b/init/initramfs.c
index 3127e0bf7bbd15..a298a3854a8018 100644
--- a/init/initramfs.c
+++ b/init/initramfs.c
@@ -367,7 +367,7 @@ static int __init do_name(void)
if (S_ISREG(mode)) {
int ml = maybe_link();
if (ml >= 0) {
- int openflags = O_WRONLY|O_CREAT;
+ int openflags = O_WRONLY|O_CREAT|O_LARGEFILE;
if (ml != 1)
openflags |= O_TRUNC;
wfile = filp_open(collected, openflags, mode);
diff --git a/init/main.c b/init/main.c
index 2ca52474d0c303..5dcf5274c09c7a 100644
--- a/init/main.c
+++ b/init/main.c
@@ -487,6 +487,11 @@ static int __init warn_bootconfig(char *str)
early_param("bootconfig", warn_bootconfig);
+bool __init cmdline_has_extra_options(void)
+{
+ return extra_command_line || extra_init_args;
+}
+
/* Change NUL term back to "=", to make "param" the whole string. */
static void __init repair_env_string(char *param, char *val)
{
@@ -631,6 +636,8 @@ static void __init setup_command_line(char *command_line)
if (!saved_command_line)
panic("%s: Failed to allocate %zu bytes\n", __func__, len + ilen);
+ len = xlen + strlen(command_line) + 1;
+
static_command_line = memblock_alloc(len, SMP_CACHE_BYTES);
if (!static_command_line)
panic("%s: Failed to allocate %zu bytes\n", __func__, len);
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 5d4b448fdc5038..c170a2b8d2cf21 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -147,6 +147,7 @@ static bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
static void io_queue_sqe(struct io_kiocb *req);
struct kmem_cache *req_cachep;
+static struct workqueue_struct *iou_wq __ro_after_init;
static int __read_mostly sysctl_io_uring_disabled;
static int __read_mostly sysctl_io_uring_group = -1;
@@ -350,7 +351,6 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
err:
kfree(ctx->cancel_table.hbs);
kfree(ctx->cancel_table_locked.hbs);
- kfree(ctx->io_bl);
xa_destroy(&ctx->io_bl_xa);
kfree(ctx);
return NULL;
@@ -1982,10 +1982,15 @@ fail:
err = -EBADFD;
if (!io_file_can_poll(req))
goto fail;
- err = -ECANCELED;
- if (io_arm_poll_handler(req, issue_flags) != IO_APOLL_OK)
- goto fail;
- return;
+ if (req->file->f_flags & O_NONBLOCK ||
+ req->file->f_mode & FMODE_NOWAIT) {
+ err = -ECANCELED;
+ if (io_arm_poll_handler(req, issue_flags) != IO_APOLL_OK)
+ goto fail;
+ return;
+ } else {
+ req->flags &= ~REQ_F_APOLL_MULTISHOT;
+ }
}
if (req->flags & REQ_F_FORCE_ASYNC) {
@@ -2597,19 +2602,6 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
if (__io_cqring_events_user(ctx) >= min_events)
return 0;
- if (sig) {
-#ifdef CONFIG_COMPAT
- if (in_compat_syscall())
- ret = set_compat_user_sigmask((const compat_sigset_t __user *)sig,
- sigsz);
- else
-#endif
- ret = set_user_sigmask(sig, sigsz);
-
- if (ret)
- return ret;
- }
-
init_waitqueue_func_entry(&iowq.wq, io_wake_function);
iowq.wq.private = current;
INIT_LIST_HEAD(&iowq.wq.entry);
@@ -2628,6 +2620,19 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
io_napi_adjust_timeout(ctx, &iowq, &ts);
}
+ if (sig) {
+#ifdef CONFIG_COMPAT
+ if (in_compat_syscall())
+ ret = set_compat_user_sigmask((const compat_sigset_t __user *)sig,
+ sigsz);
+ else
+#endif
+ ret = set_user_sigmask(sig, sigsz);
+
+ if (ret)
+ return ret;
+ }
+
io_napi_busy_loop(ctx, &iowq);
trace_io_uring_cqring_wait(ctx, min_events);
@@ -2926,7 +2931,6 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
io_napi_free(ctx);
kfree(ctx->cancel_table.hbs);
kfree(ctx->cancel_table_locked.hbs);
- kfree(ctx->io_bl);
xa_destroy(&ctx->io_bl_xa);
kfree(ctx);
}
@@ -3161,7 +3165,7 @@ static __cold void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
* noise and overhead, there's no discernable change in runtime
* over using system_wq.
*/
- queue_work(system_unbound_wq, &ctx->exit_work);
+ queue_work(iou_wq, &ctx->exit_work);
}
static int io_uring_release(struct inode *inode, struct file *file)
@@ -3443,14 +3447,15 @@ static void *io_uring_validate_mmap_request(struct file *file,
ptr = ctx->sq_sqes;
break;
case IORING_OFF_PBUF_RING: {
+ struct io_buffer_list *bl;
unsigned int bgid;
bgid = (offset & ~IORING_OFF_MMAP_MASK) >> IORING_OFF_PBUF_SHIFT;
- rcu_read_lock();
- ptr = io_pbuf_get_address(ctx, bgid);
- rcu_read_unlock();
- if (!ptr)
- return ERR_PTR(-EINVAL);
+ bl = io_pbuf_get_bl(ctx, bgid);
+ if (IS_ERR(bl))
+ return bl;
+ ptr = bl->buf_ring;
+ io_put_bl(ctx, bl);
break;
}
default:
@@ -4185,6 +4190,8 @@ static int __init io_uring_init(void)
io_buf_cachep = KMEM_CACHE(io_buffer,
SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT);
+ iou_wq = alloc_workqueue("iou_exit", WQ_UNBOUND, 64);
+
#ifdef CONFIG_SYSCTL
register_sysctl_init("kernel", kernel_io_uring_disabled_table);
#endif
diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c
index 693c26da4ee1a3..3aa16e27f5099a 100644
--- a/io_uring/kbuf.c
+++ b/io_uring/kbuf.c
@@ -17,8 +17,6 @@
#define IO_BUFFER_LIST_BUF_PER_PAGE (PAGE_SIZE / sizeof(struct io_uring_buf))
-#define BGID_ARRAY 64
-
/* BIDs are addressed by a 16-bit field in a CQE */
#define MAX_BIDS_PER_BGID (1 << 16)
@@ -40,13 +38,9 @@ struct io_buf_free {
int inuse;
};
-static struct io_buffer_list *__io_buffer_get_list(struct io_ring_ctx *ctx,
- struct io_buffer_list *bl,
- unsigned int bgid)
+static inline struct io_buffer_list *__io_buffer_get_list(struct io_ring_ctx *ctx,
+ unsigned int bgid)
{
- if (bl && bgid < BGID_ARRAY)
- return &bl[bgid];
-
return xa_load(&ctx->io_bl_xa, bgid);
}
@@ -55,7 +49,7 @@ static inline struct io_buffer_list *io_buffer_get_list(struct io_ring_ctx *ctx,
{
lockdep_assert_held(&ctx->uring_lock);
- return __io_buffer_get_list(ctx, ctx->io_bl, bgid);
+ return __io_buffer_get_list(ctx, bgid);
}
static int io_buffer_add_list(struct io_ring_ctx *ctx,
@@ -67,11 +61,7 @@ static int io_buffer_add_list(struct io_ring_ctx *ctx,
* always under the ->uring_lock, but the RCU lookup from mmap does.
*/
bl->bgid = bgid;
- smp_store_release(&bl->is_ready, 1);
-
- if (bgid < BGID_ARRAY)
- return 0;
-
+ atomic_set(&bl->refs, 1);
return xa_err(xa_store(&ctx->io_bl_xa, bgid, bl, GFP_KERNEL));
}
@@ -208,24 +198,6 @@ void __user *io_buffer_select(struct io_kiocb *req, size_t *len,
return ret;
}
-static __cold int io_init_bl_list(struct io_ring_ctx *ctx)
-{
- struct io_buffer_list *bl;
- int i;
-
- bl = kcalloc(BGID_ARRAY, sizeof(struct io_buffer_list), GFP_KERNEL);
- if (!bl)
- return -ENOMEM;
-
- for (i = 0; i < BGID_ARRAY; i++) {
- INIT_LIST_HEAD(&bl[i].buf_list);
- bl[i].bgid = i;
- }
-
- smp_store_release(&ctx->io_bl, bl);
- return 0;
-}
-
/*
* Mark the given mapped range as free for reuse
*/
@@ -294,24 +266,24 @@ static int __io_remove_buffers(struct io_ring_ctx *ctx,
return i;
}
+void io_put_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl)
+{
+ if (atomic_dec_and_test(&bl->refs)) {
+ __io_remove_buffers(ctx, bl, -1U);
+ kfree_rcu(bl, rcu);
+ }
+}
+
void io_destroy_buffers(struct io_ring_ctx *ctx)
{
struct io_buffer_list *bl;
struct list_head *item, *tmp;
struct io_buffer *buf;
unsigned long index;
- int i;
-
- for (i = 0; i < BGID_ARRAY; i++) {
- if (!ctx->io_bl)
- break;
- __io_remove_buffers(ctx, &ctx->io_bl[i], -1U);
- }
xa_for_each(&ctx->io_bl_xa, index, bl) {
xa_erase(&ctx->io_bl_xa, bl->bgid);
- __io_remove_buffers(ctx, bl, -1U);
- kfree_rcu(bl, rcu);
+ io_put_bl(ctx, bl);
}
/*
@@ -489,12 +461,6 @@ int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags)
io_ring_submit_lock(ctx, issue_flags);
- if (unlikely(p->bgid < BGID_ARRAY && !ctx->io_bl)) {
- ret = io_init_bl_list(ctx);
- if (ret)
- goto err;
- }
-
bl = io_buffer_get_list(ctx, p->bgid);
if (unlikely(!bl)) {
bl = kzalloc(sizeof(*bl), GFP_KERNEL_ACCOUNT);
@@ -507,14 +473,9 @@ int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags)
if (ret) {
/*
* Doesn't need rcu free as it was never visible, but
- * let's keep it consistent throughout. Also can't
- * be a lower indexed array group, as adding one
- * where lookup failed cannot happen.
+ * let's keep it consistent throughout.
*/
- if (p->bgid >= BGID_ARRAY)
- kfree_rcu(bl, rcu);
- else
- WARN_ON_ONCE(1);
+ kfree_rcu(bl, rcu);
goto err;
}
}
@@ -679,12 +640,6 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
if (reg.ring_entries >= 65536)
return -EINVAL;
- if (unlikely(reg.bgid < BGID_ARRAY && !ctx->io_bl)) {
- int ret = io_init_bl_list(ctx);
- if (ret)
- return ret;
- }
-
bl = io_buffer_get_list(ctx, reg.bgid);
if (bl) {
/* if mapped buffer ring OR classic exists, don't allow */
@@ -733,11 +688,8 @@ int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
if (!bl->is_buf_ring)
return -EINVAL;
- __io_remove_buffers(ctx, bl, -1U);
- if (bl->bgid >= BGID_ARRAY) {
- xa_erase(&ctx->io_bl_xa, bl->bgid);
- kfree_rcu(bl, rcu);
- }
+ xa_erase(&ctx->io_bl_xa, bl->bgid);
+ io_put_bl(ctx, bl);
return 0;
}
@@ -767,23 +719,35 @@ int io_register_pbuf_status(struct io_ring_ctx *ctx, void __user *arg)
return 0;
}
-void *io_pbuf_get_address(struct io_ring_ctx *ctx, unsigned long bgid)
+struct io_buffer_list *io_pbuf_get_bl(struct io_ring_ctx *ctx,
+ unsigned long bgid)
{
struct io_buffer_list *bl;
+ bool ret;
- bl = __io_buffer_get_list(ctx, smp_load_acquire(&ctx->io_bl), bgid);
-
- if (!bl || !bl->is_mmap)
- return NULL;
/*
- * Ensure the list is fully setup. Only strictly needed for RCU lookup
- * via mmap, and in that case only for the array indexed groups. For
- * the xarray lookups, it's either visible and ready, or not at all.
+ * We have to be a bit careful here - we're inside mmap and cannot grab
+ * the uring_lock. This means the buffer_list could be simultaneously
+ * going away, if someone is trying to be sneaky. Look it up under rcu
+ * so we know it's not going away, and attempt to grab a reference to
+ * it. If the ref is already zero, then fail the mapping. If successful,
+ * the caller will call io_put_bl() to drop the the reference at at the
+ * end. This may then safely free the buffer_list (and drop the pages)
+ * at that point, vm_insert_pages() would've already grabbed the
+ * necessary vma references.
*/
- if (!smp_load_acquire(&bl->is_ready))
- return NULL;
-
- return bl->buf_ring;
+ rcu_read_lock();
+ bl = xa_load(&ctx->io_bl_xa, bgid);
+ /* must be a mmap'able buffer ring and have pages */
+ ret = false;
+ if (bl && bl->is_mmap)
+ ret = atomic_inc_not_zero(&bl->refs);
+ rcu_read_unlock();
+
+ if (ret)
+ return bl;
+
+ return ERR_PTR(-EINVAL);
}
/*
diff --git a/io_uring/kbuf.h b/io_uring/kbuf.h
index 1c7b654ee7263a..df365b8860cf1e 100644
--- a/io_uring/kbuf.h
+++ b/io_uring/kbuf.h
@@ -25,12 +25,12 @@ struct io_buffer_list {
__u16 head;
__u16 mask;
+ atomic_t refs;
+
/* ring mapped provided buffers */
__u8 is_buf_ring;
/* ring mapped provided buffers, but mmap'ed by application */
__u8 is_mmap;
- /* bl is visible from an RCU point of view for lookup */
- __u8 is_ready;
};
struct io_buffer {
@@ -61,7 +61,9 @@ void __io_put_kbuf(struct io_kiocb *req, unsigned issue_flags);
bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags);
-void *io_pbuf_get_address(struct io_ring_ctx *ctx, unsigned long bgid);
+void io_put_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl);
+struct io_buffer_list *io_pbuf_get_bl(struct io_ring_ctx *ctx,
+ unsigned long bgid);
static inline bool io_kbuf_recycle_ring(struct io_kiocb *req)
{
diff --git a/io_uring/net.c b/io_uring/net.c
index 1e7665ff6ef702..4afb475d41974b 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -1276,6 +1276,7 @@ int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags)
if (req_has_async_data(req)) {
kmsg = req->async_data;
+ kmsg->msg.msg_control_user = sr->msg_control;
} else {
ret = io_sendmsg_copy_hdr(req, &iomsg);
if (ret)
diff --git a/io_uring/rw.c b/io_uring/rw.c
index 0585ebcc9773d3..c8d48287439e5a 100644
--- a/io_uring/rw.c
+++ b/io_uring/rw.c
@@ -937,6 +937,13 @@ int io_read_mshot(struct io_kiocb *req, unsigned int issue_flags)
ret = __io_read(req, issue_flags);
/*
+ * If the file doesn't support proper NOWAIT, then disable multishot
+ * and stay in single shot mode.
+ */
+ if (!io_file_supports_nowait(req))
+ req->flags &= ~REQ_F_APOLL_MULTISHOT;
+
+ /*
* If we get -EAGAIN, recycle our buffer and just let normal poll
* handling arm it.
*/
@@ -955,7 +962,7 @@ int io_read_mshot(struct io_kiocb *req, unsigned int issue_flags)
/*
* Any successful return value will keep the multishot read armed.
*/
- if (ret > 0) {
+ if (ret > 0 && req->flags & REQ_F_APOLL_MULTISHOT) {
/*
* Put our buffer and post a CQE. If we fail to post a CQE, then
* jump to the termination path. This request is then done.
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index 368c5d86b5b7c8..e497011261b897 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -4,7 +4,7 @@ ifneq ($(CONFIG_BPF_JIT_ALWAYS_ON),y)
# ___bpf_prog_run() needs GCSE disabled on x86; see 3193c0836f203 for details
cflags-nogcse-$(CONFIG_X86)$(CONFIG_CC_IS_GCC) := -fno-gcse
endif
-CFLAGS_core.o += $(call cc-disable-warning, override-init) $(cflags-nogcse-yy)
+CFLAGS_core.o += -Wno-override-init $(cflags-nogcse-yy)
obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o log.o token.o
obj-$(CONFIG_BPF_SYSCALL) += bpf_iter.o map_iter.o task_iter.o prog_iter.o link_iter.o
diff --git a/kernel/bpf/arena.c b/kernel/bpf/arena.c
index 86571e760dd613..343c3456c8ddf0 100644
--- a/kernel/bpf/arena.c
+++ b/kernel/bpf/arena.c
@@ -38,7 +38,7 @@
/* number of bytes addressable by LDX/STX insn with 16-bit 'off' field */
#define GUARD_SZ (1ull << sizeof(((struct bpf_insn *)0)->off) * 8)
-#define KERN_VM_SZ ((1ull << 32) + GUARD_SZ)
+#define KERN_VM_SZ (SZ_4G + GUARD_SZ)
struct bpf_arena {
struct bpf_map map;
@@ -110,7 +110,7 @@ static struct bpf_map *arena_map_alloc(union bpf_attr *attr)
return ERR_PTR(-EINVAL);
vm_range = (u64)attr->max_entries * PAGE_SIZE;
- if (vm_range > (1ull << 32))
+ if (vm_range > SZ_4G)
return ERR_PTR(-E2BIG);
if ((attr->map_extra >> 32) != ((attr->map_extra + vm_range - 1) >> 32))
@@ -301,7 +301,7 @@ static unsigned long arena_get_unmapped_area(struct file *filp, unsigned long ad
if (pgoff)
return -EINVAL;
- if (len > (1ull << 32))
+ if (len > SZ_4G)
return -E2BIG;
/* if user_vm_start was specified at arena creation time */
@@ -322,7 +322,7 @@ static unsigned long arena_get_unmapped_area(struct file *filp, unsigned long ad
if (WARN_ON_ONCE(arena->user_vm_start))
/* checks at map creation time should prevent this */
return -EFAULT;
- return round_up(ret, 1ull << 32);
+ return round_up(ret, SZ_4G);
}
static int arena_map_mmap(struct bpf_map *map, struct vm_area_struct *vma)
@@ -346,7 +346,7 @@ static int arena_map_mmap(struct bpf_map *map, struct vm_area_struct *vma)
return -EBUSY;
/* Earlier checks should prevent this */
- if (WARN_ON_ONCE(vma->vm_end - vma->vm_start > (1ull << 32) || vma->vm_pgoff))
+ if (WARN_ON_ONCE(vma->vm_end - vma->vm_start > SZ_4G || vma->vm_pgoff))
return -EFAULT;
if (remember_vma(arena, vma))
@@ -420,7 +420,7 @@ static long arena_alloc_pages(struct bpf_arena *arena, long uaddr, long page_cnt
if (uaddr & ~PAGE_MASK)
return 0;
pgoff = compute_pgoff(arena, uaddr);
- if (pgoff + page_cnt > page_cnt_max)
+ if (pgoff > page_cnt_max - page_cnt)
/* requested address will be outside of user VMA */
return 0;
}
@@ -447,7 +447,13 @@ static long arena_alloc_pages(struct bpf_arena *arena, long uaddr, long page_cnt
goto out;
uaddr32 = (u32)(arena->user_vm_start + pgoff * PAGE_SIZE);
- /* Earlier checks make sure that uaddr32 + page_cnt * PAGE_SIZE will not overflow 32-bit */
+ /* Earlier checks made sure that uaddr32 + page_cnt * PAGE_SIZE - 1
+ * will not overflow 32-bit. Lower 32-bit need to represent
+ * contiguous user address range.
+ * Map these pages at kern_vm_start base.
+ * kern_vm_start + uaddr32 + page_cnt * PAGE_SIZE - 1 can overflow
+ * lower 32-bit and it's ok.
+ */
ret = vm_area_map_pages(arena->kern_vm, kern_vm_start + uaddr32,
kern_vm_start + uaddr32 + page_cnt * PAGE_SIZE, pages);
if (ret) {
@@ -510,6 +516,11 @@ static void arena_free_pages(struct bpf_arena *arena, long uaddr, long page_cnt)
if (!page)
continue;
if (page_cnt == 1 && page_mapped(page)) /* mapped by some user process */
+ /* Optimization for the common case of page_cnt==1:
+ * If page wasn't mapped into some user vma there
+ * is no need to call zap_pages which is slow. When
+ * page_cnt is big it's faster to do the batched zap.
+ */
zap_pages(arena, full_uaddr, 1);
vm_area_unmap_pages(arena->kern_vm, kaddr, kaddr + PAGE_SIZE);
__free_page(page);
diff --git a/kernel/bpf/bloom_filter.c b/kernel/bpf/bloom_filter.c
index addf3dd57b59b5..35e1ddca74d210 100644
--- a/kernel/bpf/bloom_filter.c
+++ b/kernel/bpf/bloom_filter.c
@@ -80,6 +80,18 @@ static int bloom_map_get_next_key(struct bpf_map *map, void *key, void *next_key
return -EOPNOTSUPP;
}
+/* Called from syscall */
+static int bloom_map_alloc_check(union bpf_attr *attr)
+{
+ if (attr->value_size > KMALLOC_MAX_SIZE)
+ /* if value_size is bigger, the user space won't be able to
+ * access the elements.
+ */
+ return -E2BIG;
+
+ return 0;
+}
+
static struct bpf_map *bloom_map_alloc(union bpf_attr *attr)
{
u32 bitset_bytes, bitset_mask, nr_hash_funcs, nr_bits;
@@ -191,6 +203,7 @@ static u64 bloom_map_mem_usage(const struct bpf_map *map)
BTF_ID_LIST_SINGLE(bpf_bloom_map_btf_ids, struct, bpf_bloom_filter)
const struct bpf_map_ops bloom_filter_map_ops = {
.map_meta_equal = bpf_map_meta_equal,
+ .map_alloc_check = bloom_map_alloc_check,
.map_alloc = bloom_map_alloc,
.map_free = bloom_map_free,
.map_get_next_key = bloom_map_get_next_key,
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index a895878595710b..449b9a5d3fe3f3 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -2548,7 +2548,7 @@ __bpf_kfunc void bpf_throw(u64 cookie)
__bpf_kfunc_end_defs();
BTF_KFUNCS_START(generic_btf_ids)
-#ifdef CONFIG_KEXEC_CORE
+#ifdef CONFIG_CRASH_DUMP
BTF_ID_FLAGS(func, crash_kexec, KF_DESTRUCTIVE)
#endif
BTF_ID_FLAGS(func, bpf_obj_new_impl, KF_ACQUIRE | KF_RET_NULL)
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index ae2ff73bde7e79..c287925471f68e 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -3024,17 +3024,46 @@ void bpf_link_inc(struct bpf_link *link)
atomic64_inc(&link->refcnt);
}
+static void bpf_link_defer_dealloc_rcu_gp(struct rcu_head *rcu)
+{
+ struct bpf_link *link = container_of(rcu, struct bpf_link, rcu);
+
+ /* free bpf_link and its containing memory */
+ link->ops->dealloc_deferred(link);
+}
+
+static void bpf_link_defer_dealloc_mult_rcu_gp(struct rcu_head *rcu)
+{
+ if (rcu_trace_implies_rcu_gp())
+ bpf_link_defer_dealloc_rcu_gp(rcu);
+ else
+ call_rcu(rcu, bpf_link_defer_dealloc_rcu_gp);
+}
+
/* bpf_link_free is guaranteed to be called from process context */
static void bpf_link_free(struct bpf_link *link)
{
+ bool sleepable = false;
+
bpf_link_free_id(link->id);
if (link->prog) {
+ sleepable = link->prog->sleepable;
/* detach BPF program, clean up used resources */
link->ops->release(link);
bpf_prog_put(link->prog);
}
- /* free bpf_link and its containing memory */
- link->ops->dealloc(link);
+ if (link->ops->dealloc_deferred) {
+ /* schedule BPF link deallocation; if underlying BPF program
+ * is sleepable, we need to first wait for RCU tasks trace
+ * sync, then go through "classic" RCU grace period
+ */
+ if (sleepable)
+ call_rcu_tasks_trace(&link->rcu, bpf_link_defer_dealloc_mult_rcu_gp);
+ else
+ call_rcu(&link->rcu, bpf_link_defer_dealloc_rcu_gp);
+ }
+ if (link->ops->dealloc)
+ link->ops->dealloc(link);
}
static void bpf_link_put_deferred(struct work_struct *work)
@@ -3544,7 +3573,7 @@ static int bpf_raw_tp_link_fill_link_info(const struct bpf_link *link,
static const struct bpf_link_ops bpf_raw_tp_link_lops = {
.release = bpf_raw_tp_link_release,
- .dealloc = bpf_raw_tp_link_dealloc,
+ .dealloc_deferred = bpf_raw_tp_link_dealloc,
.show_fdinfo = bpf_raw_tp_link_show_fdinfo,
.fill_link_info = bpf_raw_tp_link_fill_link_info,
};
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 63749ad5ac6b8d..98188379d5c77d 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5682,6 +5682,13 @@ static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno)
return reg->type == PTR_TO_FLOW_KEYS;
}
+static bool is_arena_reg(struct bpf_verifier_env *env, int regno)
+{
+ const struct bpf_reg_state *reg = reg_state(env, regno);
+
+ return reg->type == PTR_TO_ARENA;
+}
+
static u32 *reg2btf_ids[__BPF_REG_TYPE_MAX] = {
#ifdef CONFIG_NET
[PTR_TO_SOCKET] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK],
@@ -6694,6 +6701,11 @@ static int check_stack_access_within_bounds(
err = check_stack_slot_within_bounds(env, min_off, state, type);
if (!err && max_off > 0)
err = -EINVAL; /* out of stack access into non-negative offsets */
+ if (!err && access_size < 0)
+ /* access_size should not be negative (or overflow an int); others checks
+ * along the way should have prevented such an access.
+ */
+ err = -EFAULT; /* invalid negative access size; integer overflow? */
if (err) {
if (tnum_is_const(reg->var_off)) {
@@ -7019,7 +7031,8 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i
if (is_ctx_reg(env, insn->dst_reg) ||
is_pkt_reg(env, insn->dst_reg) ||
is_flow_key_reg(env, insn->dst_reg) ||
- is_sk_reg(env, insn->dst_reg)) {
+ is_sk_reg(env, insn->dst_reg) ||
+ is_arena_reg(env, insn->dst_reg)) {
verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n",
insn->dst_reg,
reg_type_str(env, reg_state(env, insn->dst_reg)->type));
@@ -14014,6 +14027,10 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
verbose(env, "addr_space_cast insn can only convert between address space 1 and 0\n");
return -EINVAL;
}
+ if (!env->prog->aux->arena) {
+ verbose(env, "addr_space_cast insn can only be used in a program that has an associated arena\n");
+ return -EINVAL;
+ }
} else {
if ((insn->off != 0 && insn->off != 8 && insn->off != 16 &&
insn->off != 32) || insn->imm) {
@@ -14046,8 +14063,11 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
if (insn->imm) {
/* off == BPF_ADDR_SPACE_CAST */
mark_reg_unknown(env, regs, insn->dst_reg);
- if (insn->imm == 1) /* cast from as(1) to as(0) */
+ if (insn->imm == 1) { /* cast from as(1) to as(0) */
dst_reg->type = PTR_TO_ARENA;
+ /* PTR_TO_ARENA is 32-bit */
+ dst_reg->subreg_def = env->insn_idx + 1;
+ }
} else if (insn->off == 0) {
/* case: R1 = R2
* copy register state to dest reg
@@ -18359,15 +18379,18 @@ static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env)
}
if (!env->prog->jit_requested) {
verbose(env, "JIT is required to use arena\n");
+ fdput(f);
return -EOPNOTSUPP;
}
if (!bpf_jit_supports_arena()) {
verbose(env, "JIT doesn't support arena\n");
+ fdput(f);
return -EOPNOTSUPP;
}
env->prog->aux->arena = (void *)map;
if (!bpf_arena_get_user_vm_start(env->prog->aux->arena)) {
verbose(env, "arena's user address must be set via map_extra or mmap()\n");
+ fdput(f);
return -EINVAL;
}
}
@@ -19601,8 +19624,9 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
(((struct bpf_map *)env->prog->aux->arena)->map_flags & BPF_F_NO_USER_CONV)) {
/* convert to 32-bit mov that clears upper 32-bit */
insn->code = BPF_ALU | BPF_MOV | BPF_X;
- /* clear off, so it's a normal 'wX = wY' from JIT pov */
+ /* clear off and imm, so it's a normal 'wX = wY' from JIT pov */
insn->off = 0;
+ insn->imm = 0;
} /* cast from as(0) to as(1) should be handled by JIT */
goto next_insn;
}
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 8f6affd051f775..07ad53b7f11952 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -3207,7 +3207,8 @@ enum cpu_mitigations {
};
static enum cpu_mitigations cpu_mitigations __ro_after_init =
- CPU_MITIGATIONS_AUTO;
+ IS_ENABLED(CONFIG_SPECULATION_MITIGATIONS) ? CPU_MITIGATIONS_AUTO :
+ CPU_MITIGATIONS_OFF;
static int __init mitigations_parse_cmdline(char *arg)
{
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 86fe172b595823..a5e0dfc44d24e2 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -69,11 +69,14 @@
* @alloc_size: Size of the allocated buffer.
* @list: The free list describing the number of free entries available
* from each index.
+ * @pad_slots: Number of preceding padding slots. Valid only in the first
+ * allocated non-padding slot.
*/
struct io_tlb_slot {
phys_addr_t orig_addr;
size_t alloc_size;
- unsigned int list;
+ unsigned short list;
+ unsigned short pad_slots;
};
static bool swiotlb_force_bounce;
@@ -287,6 +290,7 @@ static void swiotlb_init_io_tlb_pool(struct io_tlb_pool *mem, phys_addr_t start,
mem->nslabs - i);
mem->slots[i].orig_addr = INVALID_PHYS_ADDR;
mem->slots[i].alloc_size = 0;
+ mem->slots[i].pad_slots = 0;
}
memset(vaddr, 0, bytes);
@@ -821,12 +825,30 @@ void swiotlb_dev_init(struct device *dev)
#endif
}
-/*
- * Return the offset into a iotlb slot required to keep the device happy.
+/**
+ * swiotlb_align_offset() - Get required offset into an IO TLB allocation.
+ * @dev: Owning device.
+ * @align_mask: Allocation alignment mask.
+ * @addr: DMA address.
+ *
+ * Return the minimum offset from the start of an IO TLB allocation which is
+ * required for a given buffer address and allocation alignment to keep the
+ * device happy.
+ *
+ * First, the address bits covered by min_align_mask must be identical in the
+ * original address and the bounce buffer address. High bits are preserved by
+ * choosing a suitable IO TLB slot, but bits below IO_TLB_SHIFT require extra
+ * padding bytes before the bounce buffer.
+ *
+ * Second, @align_mask specifies which bits of the first allocated slot must
+ * be zero. This may require allocating additional padding slots, and then the
+ * offset (in bytes) from the first such padding slot is returned.
*/
-static unsigned int swiotlb_align_offset(struct device *dev, u64 addr)
+static unsigned int swiotlb_align_offset(struct device *dev,
+ unsigned int align_mask, u64 addr)
{
- return addr & dma_get_min_align_mask(dev) & (IO_TLB_SIZE - 1);
+ return addr & dma_get_min_align_mask(dev) &
+ (align_mask | (IO_TLB_SIZE - 1));
}
/*
@@ -841,27 +863,23 @@ static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size
size_t alloc_size = mem->slots[index].alloc_size;
unsigned long pfn = PFN_DOWN(orig_addr);
unsigned char *vaddr = mem->vaddr + tlb_addr - mem->start;
- unsigned int tlb_offset, orig_addr_offset;
+ int tlb_offset;
if (orig_addr == INVALID_PHYS_ADDR)
return;
- tlb_offset = tlb_addr & (IO_TLB_SIZE - 1);
- orig_addr_offset = swiotlb_align_offset(dev, orig_addr);
- if (tlb_offset < orig_addr_offset) {
- dev_WARN_ONCE(dev, 1,
- "Access before mapping start detected. orig offset %u, requested offset %u.\n",
- orig_addr_offset, tlb_offset);
- return;
- }
-
- tlb_offset -= orig_addr_offset;
- if (tlb_offset > alloc_size) {
- dev_WARN_ONCE(dev, 1,
- "Buffer overflow detected. Allocation size: %zu. Mapping size: %zu+%u.\n",
- alloc_size, size, tlb_offset);
- return;
- }
+ /*
+ * It's valid for tlb_offset to be negative. This can happen when the
+ * "offset" returned by swiotlb_align_offset() is non-zero, and the
+ * tlb_addr is pointing within the first "offset" bytes of the second
+ * or subsequent slots of the allocated swiotlb area. While it's not
+ * valid for tlb_addr to be pointing within the first "offset" bytes
+ * of the first slot, there's no way to check for such an error since
+ * this function can't distinguish the first slot from the second and
+ * subsequent slots.
+ */
+ tlb_offset = (tlb_addr & (IO_TLB_SIZE - 1)) -
+ swiotlb_align_offset(dev, 0, orig_addr);
orig_addr += tlb_offset;
alloc_size -= tlb_offset;
@@ -1005,7 +1023,7 @@ static int swiotlb_search_pool_area(struct device *dev, struct io_tlb_pool *pool
unsigned long max_slots = get_max_slots(boundary_mask);
unsigned int iotlb_align_mask = dma_get_min_align_mask(dev);
unsigned int nslots = nr_slots(alloc_size), stride;
- unsigned int offset = swiotlb_align_offset(dev, orig_addr);
+ unsigned int offset = swiotlb_align_offset(dev, 0, orig_addr);
unsigned int index, slots_checked, count = 0, i;
unsigned long flags;
unsigned int slot_base;
@@ -1328,11 +1346,12 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
unsigned long attrs)
{
struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
- unsigned int offset = swiotlb_align_offset(dev, orig_addr);
+ unsigned int offset;
struct io_tlb_pool *pool;
unsigned int i;
int index;
phys_addr_t tlb_addr;
+ unsigned short pad_slots;
if (!mem || !mem->nslabs) {
dev_warn_ratelimited(dev,
@@ -1349,6 +1368,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
return (phys_addr_t)DMA_MAPPING_ERROR;
}
+ offset = swiotlb_align_offset(dev, alloc_align_mask, orig_addr);
index = swiotlb_find_slots(dev, orig_addr,
alloc_size + offset, alloc_align_mask, &pool);
if (index == -1) {
@@ -1364,6 +1384,10 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
* This is needed when we sync the memory. Then we sync the buffer if
* needed.
*/
+ pad_slots = offset >> IO_TLB_SHIFT;
+ offset &= (IO_TLB_SIZE - 1);
+ index += pad_slots;
+ pool->slots[index].pad_slots = pad_slots;
for (i = 0; i < nr_slots(alloc_size + offset); i++)
pool->slots[index + i].orig_addr = slot_addr(orig_addr, i);
tlb_addr = slot_addr(pool->start, index) + offset;
@@ -1384,13 +1408,17 @@ static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr)
{
struct io_tlb_pool *mem = swiotlb_find_pool(dev, tlb_addr);
unsigned long flags;
- unsigned int offset = swiotlb_align_offset(dev, tlb_addr);
- int index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT;
- int nslots = nr_slots(mem->slots[index].alloc_size + offset);
- int aindex = index / mem->area_nslabs;
- struct io_tlb_area *area = &mem->areas[aindex];
+ unsigned int offset = swiotlb_align_offset(dev, 0, tlb_addr);
+ int index, nslots, aindex;
+ struct io_tlb_area *area;
int count, i;
+ index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT;
+ index -= mem->slots[index].pad_slots;
+ nslots = nr_slots(mem->slots[index].alloc_size + offset);
+ aindex = index / mem->area_nslabs;
+ area = &mem->areas[aindex];
+
/*
* Return the buffer to the free list by setting the corresponding
* entries to indicate the number of contiguous entries available.
@@ -1413,6 +1441,7 @@ static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr)
mem->slots[i].list = ++count;
mem->slots[i].orig_addr = INVALID_PHYS_ADDR;
mem->slots[i].alloc_size = 0;
+ mem->slots[i].pad_slots = 0;
}
/*
@@ -1647,9 +1676,6 @@ DEFINE_DEBUGFS_ATTRIBUTE(fops_io_tlb_hiwater, io_tlb_hiwater_get,
static void swiotlb_create_debugfs_files(struct io_tlb_mem *mem,
const char *dirname)
{
- atomic_long_set(&mem->total_used, 0);
- atomic_long_set(&mem->used_hiwater, 0);
-
mem->debugfs = debugfs_create_dir(dirname, io_tlb_default_mem.debugfs);
if (!mem->nslabs)
return;
@@ -1660,7 +1686,6 @@ static void swiotlb_create_debugfs_files(struct io_tlb_mem *mem,
debugfs_create_file("io_tlb_used_hiwater", 0600, mem->debugfs, mem,
&fops_io_tlb_hiwater);
#ifdef CONFIG_SWIOTLB_DYNAMIC
- atomic_long_set(&mem->transient_nslabs, 0);
debugfs_create_file("io_tlb_transient_nslabs", 0400, mem->debugfs,
mem, &fops_io_tlb_transient_used);
#endif
diff --git a/kernel/fork.c b/kernel/fork.c
index 39a5046c2f0bf4..aebb3e6c96dc62 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -714,6 +714,23 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
} else if (anon_vma_fork(tmp, mpnt))
goto fail_nomem_anon_vma_fork;
vm_flags_clear(tmp, VM_LOCKED_MASK);
+ /*
+ * Copy/update hugetlb private vma information.
+ */
+ if (is_vm_hugetlb_page(tmp))
+ hugetlb_dup_vma_private(tmp);
+
+ /*
+ * Link the vma into the MT. After using __mt_dup(), memory
+ * allocation is not necessary here, so it cannot fail.
+ */
+ vma_iter_bulk_store(&vmi, tmp);
+
+ mm->map_count++;
+
+ if (tmp->vm_ops && tmp->vm_ops->open)
+ tmp->vm_ops->open(tmp);
+
file = tmp->vm_file;
if (file) {
struct address_space *mapping = file->f_mapping;
@@ -730,25 +747,9 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
i_mmap_unlock_write(mapping);
}
- /*
- * Copy/update hugetlb private vma information.
- */
- if (is_vm_hugetlb_page(tmp))
- hugetlb_dup_vma_private(tmp);
-
- /*
- * Link the vma into the MT. After using __mt_dup(), memory
- * allocation is not necessary here, so it cannot fail.
- */
- vma_iter_bulk_store(&vmi, tmp);
-
- mm->map_count++;
if (!(tmp->vm_flags & VM_WIPEONFORK))
retval = copy_page_range(tmp, mpnt);
- if (tmp->vm_ops && tmp->vm_ops->open)
- tmp->vm_ops->open(tmp);
-
if (retval) {
mpnt = vma_next(&vmi);
goto loop_out;
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index ad3eaf2ab95961..bf9ae8a8686ff6 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -1643,8 +1643,13 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
}
if (!((old->flags & new->flags) & IRQF_SHARED) ||
- (oldtype != (new->flags & IRQF_TRIGGER_MASK)) ||
- ((old->flags ^ new->flags) & IRQF_ONESHOT))
+ (oldtype != (new->flags & IRQF_TRIGGER_MASK)))
+ goto mismatch;
+
+ if ((old->flags & IRQF_ONESHOT) &&
+ (new->flags & IRQF_COND_ONESHOT))
+ new->flags |= IRQF_ONESHOT;
+ else if ((old->flags ^ new->flags) & IRQF_ONESHOT)
goto mismatch;
/* All handlers must agree on per-cpuness */
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 9d9095e8179286..65adc815fc6e63 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1567,10 +1567,17 @@ static int check_kprobe_address_safe(struct kprobe *p,
jump_label_lock();
preempt_disable();
- /* Ensure it is not in reserved area nor out of text */
- if (!(core_kernel_text((unsigned long) p->addr) ||
- is_module_text_address((unsigned long) p->addr)) ||
- in_gate_area_no_mm((unsigned long) p->addr) ||
+ /* Ensure the address is in a text area, and find a module if exists. */
+ *probed_mod = NULL;
+ if (!core_kernel_text((unsigned long) p->addr)) {
+ *probed_mod = __module_text_address((unsigned long) p->addr);
+ if (!(*probed_mod)) {
+ ret = -EINVAL;
+ goto out;
+ }
+ }
+ /* Ensure it is not in reserved area. */
+ if (in_gate_area_no_mm((unsigned long) p->addr) ||
within_kprobe_blacklist((unsigned long) p->addr) ||
jump_label_text_reserved(p->addr, p->addr) ||
static_call_text_reserved(p->addr, p->addr) ||
@@ -1580,8 +1587,7 @@ static int check_kprobe_address_safe(struct kprobe *p,
goto out;
}
- /* Check if 'p' is probing a module. */
- *probed_mod = __module_text_address((unsigned long) p->addr);
+ /* Get module refcount and reject __init functions for loaded modules. */
if (*probed_mod) {
/*
* We must hold a refcount of the probed module while updating
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index e3ae93bbcb9b50..09f8397bae15fb 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -106,6 +106,12 @@ static void s2idle_enter(void)
swait_event_exclusive(s2idle_wait_head,
s2idle_state == S2IDLE_STATE_WAKE);
+ /*
+ * Kick all CPUs to ensure that they resume their timers and restore
+ * consistent system state.
+ */
+ wake_up_all_idle_cpus();
+
cpus_read_unlock();
raw_spin_lock_irq(&s2idle_lock);
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index fb0fdec8719a13..d88b13076b7944 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -7,6 +7,7 @@
* Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
* Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner
*/
+#include <linux/compiler.h>
#include <linux/cpu.h>
#include <linux/err.h>
#include <linux/hrtimer.h>
@@ -84,7 +85,7 @@ int tick_is_oneshot_available(void)
*/
static void tick_periodic(int cpu)
{
- if (tick_do_timer_cpu == cpu) {
+ if (READ_ONCE(tick_do_timer_cpu) == cpu) {
raw_spin_lock(&jiffies_lock);
write_seqcount_begin(&jiffies_seq);
@@ -215,8 +216,8 @@ static void tick_setup_device(struct tick_device *td,
* If no cpu took the do_timer update, assign it to
* this cpu:
*/
- if (tick_do_timer_cpu == TICK_DO_TIMER_BOOT) {
- tick_do_timer_cpu = cpu;
+ if (READ_ONCE(tick_do_timer_cpu) == TICK_DO_TIMER_BOOT) {
+ WRITE_ONCE(tick_do_timer_cpu, cpu);
tick_next_period = ktime_get();
#ifdef CONFIG_NO_HZ_FULL
/*
@@ -232,7 +233,7 @@ static void tick_setup_device(struct tick_device *td,
!tick_nohz_full_cpu(cpu)) {
tick_take_do_timer_from_boot();
tick_do_timer_boot_cpu = -1;
- WARN_ON(tick_do_timer_cpu != cpu);
+ WARN_ON(READ_ONCE(tick_do_timer_cpu) != cpu);
#endif
}
@@ -406,10 +407,10 @@ void tick_assert_timekeeping_handover(void)
int tick_cpu_dying(unsigned int dying_cpu)
{
/*
- * If the current CPU is the timekeeper, it's the only one that
- * can safely hand over its duty. Also all online CPUs are in
- * stop machine, guaranteed not to be idle, therefore it's safe
- * to pick any online successor.
+ * If the current CPU is the timekeeper, it's the only one that can
+ * safely hand over its duty. Also all online CPUs are in stop
+ * machine, guaranteed not to be idle, therefore there is no
+ * concurrency and it's safe to pick any online successor.
*/
if (tick_do_timer_cpu == dying_cpu)
tick_do_timer_cpu = cpumask_first(cpu_online_mask);
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 269e21590df536..71a792cd893620 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -8,6 +8,7 @@
*
* Started by: Thomas Gleixner and Ingo Molnar
*/
+#include <linux/compiler.h>
#include <linux/cpu.h>
#include <linux/err.h>
#include <linux/hrtimer.h>
@@ -204,7 +205,7 @@ static inline void tick_sched_flag_clear(struct tick_sched *ts,
static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
{
- int cpu = smp_processor_id();
+ int tick_cpu, cpu = smp_processor_id();
/*
* Check if the do_timer duty was dropped. We don't care about
@@ -216,16 +217,18 @@ static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
* If nohz_full is enabled, this should not happen because the
* 'tick_do_timer_cpu' CPU never relinquishes.
*/
- if (IS_ENABLED(CONFIG_NO_HZ_COMMON) &&
- unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) {
+ tick_cpu = READ_ONCE(tick_do_timer_cpu);
+
+ if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && unlikely(tick_cpu == TICK_DO_TIMER_NONE)) {
#ifdef CONFIG_NO_HZ_FULL
WARN_ON_ONCE(tick_nohz_full_running);
#endif
- tick_do_timer_cpu = cpu;
+ WRITE_ONCE(tick_do_timer_cpu, cpu);
+ tick_cpu = cpu;
}
/* Check if jiffies need an update */
- if (tick_do_timer_cpu == cpu)
+ if (tick_cpu == cpu)
tick_do_update_jiffies64(now);
/*
@@ -610,7 +613,7 @@ bool tick_nohz_cpu_hotpluggable(unsigned int cpu)
* timers, workqueues, timekeeping, ...) on behalf of full dynticks
* CPUs. It must remain online when nohz full is enabled.
*/
- if (tick_nohz_full_running && tick_do_timer_cpu == cpu)
+ if (tick_nohz_full_running && READ_ONCE(tick_do_timer_cpu) == cpu)
return false;
return true;
}
@@ -697,6 +700,7 @@ bool tick_nohz_tick_stopped_cpu(int cpu)
/**
* tick_nohz_update_jiffies - update jiffies when idle was interrupted
+ * @now: current ktime_t
*
* Called from interrupt entry when the CPU was idle
*
@@ -794,7 +798,7 @@ static u64 get_cpu_sleep_time_us(struct tick_sched *ts, ktime_t *sleeptime,
* This time is measured via accounting rather than sampling,
* and is as accurate as ktime_get() is.
*
- * This function returns -1 if NOHZ is not enabled.
+ * Return: -1 if NOHZ is not enabled, else total idle time of the @cpu
*/
u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time)
{
@@ -820,7 +824,7 @@ EXPORT_SYMBOL_GPL(get_cpu_idle_time_us);
* This time is measured via accounting rather than sampling,
* and is as accurate as ktime_get() is.
*
- * This function returns -1 if NOHZ is not enabled.
+ * Return: -1 if NOHZ is not enabled, else total iowait time of @cpu
*/
u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time)
{
@@ -890,6 +894,7 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
{
u64 basemono, next_tick, delta, expires;
unsigned long basejiff;
+ int tick_cpu;
basemono = get_jiffies_update(&basejiff);
ts->last_jiffies = basejiff;
@@ -946,9 +951,9 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
* Otherwise we can sleep as long as we want.
*/
delta = timekeeping_max_deferment();
- if (cpu != tick_do_timer_cpu &&
- (tick_do_timer_cpu != TICK_DO_TIMER_NONE ||
- !tick_sched_flag_test(ts, TS_FLAG_DO_TIMER_LAST)))
+ tick_cpu = READ_ONCE(tick_do_timer_cpu);
+ if (tick_cpu != cpu &&
+ (tick_cpu != TICK_DO_TIMER_NONE || !tick_sched_flag_test(ts, TS_FLAG_DO_TIMER_LAST)))
delta = KTIME_MAX;
/* Calculate the next expiry time */
@@ -969,6 +974,7 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
unsigned long basejiff = ts->last_jiffies;
u64 basemono = ts->timer_expires_base;
bool timer_idle = tick_sched_flag_test(ts, TS_FLAG_STOPPED);
+ int tick_cpu;
u64 expires;
/* Make sure we won't be trying to stop it twice in a row. */
@@ -1006,10 +1012,11 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
* do_timer() never gets invoked. Keep track of the fact that it
* was the one which had the do_timer() duty last.
*/
- if (cpu == tick_do_timer_cpu) {
- tick_do_timer_cpu = TICK_DO_TIMER_NONE;
+ tick_cpu = READ_ONCE(tick_do_timer_cpu);
+ if (tick_cpu == cpu) {
+ WRITE_ONCE(tick_do_timer_cpu, TICK_DO_TIMER_NONE);
tick_sched_flag_set(ts, TS_FLAG_DO_TIMER_LAST);
- } else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) {
+ } else if (tick_cpu != TICK_DO_TIMER_NONE) {
tick_sched_flag_clear(ts, TS_FLAG_DO_TIMER_LAST);
}
@@ -1172,15 +1179,17 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
return false;
if (tick_nohz_full_enabled()) {
+ int tick_cpu = READ_ONCE(tick_do_timer_cpu);
+
/*
* Keep the tick alive to guarantee timekeeping progression
* if there are full dynticks CPUs around
*/
- if (tick_do_timer_cpu == cpu)
+ if (tick_cpu == cpu)
return false;
/* Should not happen for nohz-full */
- if (WARN_ON_ONCE(tick_do_timer_cpu == TICK_DO_TIMER_NONE))
+ if (WARN_ON_ONCE(tick_cpu == TICK_DO_TIMER_NONE))
return false;
}
@@ -1287,6 +1296,8 @@ void tick_nohz_irq_exit(void)
/**
* tick_nohz_idle_got_tick - Check whether or not the tick handler has run
+ *
+ * Return: %true if the tick handler has run, otherwise %false
*/
bool tick_nohz_idle_got_tick(void)
{
@@ -1305,6 +1316,8 @@ bool tick_nohz_idle_got_tick(void)
* stopped, it returns the next hrtimer.
*
* Called from power state control code with interrupts disabled
+ *
+ * Return: the next expiration time
*/
ktime_t tick_nohz_get_next_hrtimer(void)
{
@@ -1320,6 +1333,8 @@ ktime_t tick_nohz_get_next_hrtimer(void)
* The return value of this function and/or the value returned by it through the
* @delta_next pointer can be negative which must be taken into account by its
* callers.
+ *
+ * Return: the expected length of the current sleep
*/
ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next)
{
@@ -1357,8 +1372,11 @@ ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next)
/**
* tick_nohz_get_idle_calls_cpu - return the current idle calls counter value
* for a particular CPU.
+ * @cpu: target CPU number
*
* Called from the schedutil frequency scaling governor in scheduler context.
+ *
+ * Return: the current idle calls counter value for @cpu
*/
unsigned long tick_nohz_get_idle_calls_cpu(int cpu)
{
@@ -1371,6 +1389,8 @@ unsigned long tick_nohz_get_idle_calls_cpu(int cpu)
* tick_nohz_get_idle_calls - return the current idle calls counter value
*
* Called from the schedutil frequency scaling governor in scheduler context.
+ *
+ * Return: the current idle calls counter value for the current CPU
*/
unsigned long tick_nohz_get_idle_calls(void)
{
@@ -1559,7 +1579,7 @@ early_param("skew_tick", skew_tick);
/**
* tick_setup_sched_timer - setup the tick emulation timer
- * @mode: tick_nohz_mode to setup for
+ * @hrtimer: whether to use the hrtimer or not
*/
void tick_setup_sched_timer(bool hrtimer)
{
diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h
index e11c4dc65bcb24..b4a7822f495d34 100644
--- a/kernel/time/tick-sched.h
+++ b/kernel/time/tick-sched.h
@@ -46,8 +46,8 @@ struct tick_device {
* @next_tick: Next tick to be fired when in dynticks mode.
* @idle_jiffies: jiffies at the entry to idle for idle time accounting
* @idle_waketime: Time when the idle was interrupted
+ * @idle_sleeptime_seq: sequence counter for data consistency
* @idle_entrytime: Time when the idle call was entered
- * @nohz_mode: Mode - one state of tick_nohz_mode
* @last_jiffies: Base jiffies snapshot when next event was last computed
* @timer_expires_base: Base time clock monotonic for @timer_expires
* @timer_expires: Anticipated timer expiration time (in case sched tick is stopped)
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index dee29f1f5b75f3..3baf2fbe6848f0 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -64,15 +64,15 @@ EXPORT_SYMBOL(jiffies_64);
/*
* The timer wheel has LVL_DEPTH array levels. Each level provides an array of
- * LVL_SIZE buckets. Each level is driven by its own clock and therefor each
+ * LVL_SIZE buckets. Each level is driven by its own clock and therefore each
* level has a different granularity.
*
- * The level granularity is: LVL_CLK_DIV ^ lvl
+ * The level granularity is: LVL_CLK_DIV ^ level
* The level clock frequency is: HZ / (LVL_CLK_DIV ^ level)
*
* The array level of a newly armed timer depends on the relative expiry
* time. The farther the expiry time is away the higher the array level and
- * therefor the granularity becomes.
+ * therefore the granularity becomes.
*
* Contrary to the original timer wheel implementation, which aims for 'exact'
* expiry of the timers, this implementation removes the need for recascading
@@ -207,7 +207,7 @@ EXPORT_SYMBOL(jiffies_64);
* struct timer_base - Per CPU timer base (number of base depends on config)
* @lock: Lock protecting the timer_base
* @running_timer: When expiring timers, the lock is dropped. To make
- * sure not to race agains deleting/modifying a
+ * sure not to race against deleting/modifying a
* currently running timer, the pointer is set to the
* timer, which expires at the moment. If no timer is
* running, the pointer is NULL.
@@ -737,7 +737,7 @@ static bool timer_is_static_object(void *addr)
}
/*
- * fixup_init is called when:
+ * timer_fixup_init is called when:
* - an active object is initialized
*/
static bool timer_fixup_init(void *addr, enum debug_obj_state state)
@@ -761,7 +761,7 @@ static void stub_timer(struct timer_list *unused)
}
/*
- * fixup_activate is called when:
+ * timer_fixup_activate is called when:
* - an active object is activated
* - an unknown non-static object is activated
*/
@@ -783,7 +783,7 @@ static bool timer_fixup_activate(void *addr, enum debug_obj_state state)
}
/*
- * fixup_free is called when:
+ * timer_fixup_free is called when:
* - an active object is freed
*/
static bool timer_fixup_free(void *addr, enum debug_obj_state state)
@@ -801,7 +801,7 @@ static bool timer_fixup_free(void *addr, enum debug_obj_state state)
}
/*
- * fixup_assert_init is called when:
+ * timer_fixup_assert_init is called when:
* - an untracked/uninit-ed object is found
*/
static bool timer_fixup_assert_init(void *addr, enum debug_obj_state state)
@@ -914,7 +914,7 @@ static void do_init_timer(struct timer_list *timer,
* @key: lockdep class key of the fake lock used for tracking timer
* sync lock dependencies
*
- * init_timer_key() must be done to a timer prior calling *any* of the
+ * init_timer_key() must be done to a timer prior to calling *any* of the
* other timer functions.
*/
void init_timer_key(struct timer_list *timer,
@@ -1417,7 +1417,7 @@ static int __timer_delete(struct timer_list *timer, bool shutdown)
* If @shutdown is set then the lock has to be taken whether the
* timer is pending or not to protect against a concurrent rearm
* which might hit between the lockless pending check and the lock
- * aquisition. By taking the lock it is ensured that such a newly
+ * acquisition. By taking the lock it is ensured that such a newly
* enqueued timer is dequeued and cannot end up with
* timer->function == NULL in the expiry code.
*
@@ -2306,7 +2306,7 @@ static inline u64 __get_next_timer_interrupt(unsigned long basej, u64 basem,
/*
* When timer base is not set idle, undo the effect of
- * tmigr_cpu_deactivate() to prevent inconsitent states - active
+ * tmigr_cpu_deactivate() to prevent inconsistent states - active
* timer base but inactive timer migration hierarchy.
*
* When timer base was already marked idle, nothing will be
diff --git a/kernel/time/timer_migration.c b/kernel/time/timer_migration.c
index c63a0afdcebed5..ccba875d2234fe 100644
--- a/kernel/time/timer_migration.c
+++ b/kernel/time/timer_migration.c
@@ -751,6 +751,33 @@ bool tmigr_update_events(struct tmigr_group *group, struct tmigr_group *child,
first_childevt = evt = data->evt;
+ /*
+ * Walking the hierarchy is required in any case when a
+ * remote expiry was done before. This ensures to not lose
+ * already queued events in non active groups (see section
+ * "Required event and timerqueue update after a remote
+ * expiry" in the documentation at the top).
+ *
+ * The two call sites which are executed without a remote expiry
+ * before, are not prevented from propagating changes through
+ * the hierarchy by the return:
+ * - When entering this path by tmigr_new_timer(), @evt->ignore
+ * is never set.
+ * - tmigr_inactive_up() takes care of the propagation by
+ * itself and ignores the return value. But an immediate
+ * return is possible if there is a parent, sparing group
+ * locking at this level, because the upper walking call to
+ * the parent will take care about removing this event from
+ * within the group and update next_expiry accordingly.
+ *
+ * However if there is no parent, ie: the hierarchy has only a
+ * single level so @group is the top level group, make sure the
+ * first event information of the group is updated properly and
+ * also handled properly, so skip this fast return path.
+ */
+ if (evt->ignore && !remote && group->parent)
+ return true;
+
raw_spin_lock(&group->lock);
childstate.state = 0;
@@ -762,8 +789,11 @@ bool tmigr_update_events(struct tmigr_group *group, struct tmigr_group *child,
* queue when the expiry time changed only or when it could be ignored.
*/
if (timerqueue_node_queued(&evt->nextevt)) {
- if ((evt->nextevt.expires == nextexp) && !evt->ignore)
+ if ((evt->nextevt.expires == nextexp) && !evt->ignore) {
+ /* Make sure not to miss a new CPU event with the same expiry */
+ evt->cpu = first_childevt->cpu;
goto check_toplvl;
+ }
if (!timerqueue_del(&group->events, &evt->nextevt))
WRITE_ONCE(group->next_expiry, KTIME_MAX);
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 61c541c36596d9..47345bf1d4a9f7 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -965,7 +965,7 @@ config FTRACE_RECORD_RECURSION
config FTRACE_RECORD_RECURSION_SIZE
int "Max number of recursed functions to record"
- default 128
+ default 128
depends on FTRACE_RECORD_RECURSION
help
This defines the limit of number of functions that can be
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 0a5c4efc73c367..9dc605f08a2314 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -2728,7 +2728,7 @@ static int bpf_kprobe_multi_link_fill_link_info(const struct bpf_link *link,
static const struct bpf_link_ops bpf_kprobe_multi_link_lops = {
.release = bpf_kprobe_multi_link_release,
- .dealloc = bpf_kprobe_multi_link_dealloc,
+ .dealloc_deferred = bpf_kprobe_multi_link_dealloc,
.fill_link_info = bpf_kprobe_multi_link_fill_link_info,
};
@@ -3157,6 +3157,9 @@ static void bpf_uprobe_multi_link_release(struct bpf_link *link)
umulti_link = container_of(link, struct bpf_uprobe_multi_link, link);
bpf_uprobe_unregister(&umulti_link->path, umulti_link->uprobes, umulti_link->cnt);
+ if (umulti_link->task)
+ put_task_struct(umulti_link->task);
+ path_put(&umulti_link->path);
}
static void bpf_uprobe_multi_link_dealloc(struct bpf_link *link)
@@ -3164,9 +3167,6 @@ static void bpf_uprobe_multi_link_dealloc(struct bpf_link *link)
struct bpf_uprobe_multi_link *umulti_link;
umulti_link = container_of(link, struct bpf_uprobe_multi_link, link);
- if (umulti_link->task)
- put_task_struct(umulti_link->task);
- path_put(&umulti_link->path);
kvfree(umulti_link->uprobes);
kfree(umulti_link);
}
@@ -3242,7 +3242,7 @@ static int bpf_uprobe_multi_link_fill_link_info(const struct bpf_link *link,
static const struct bpf_link_ops bpf_uprobe_multi_link_lops = {
.release = bpf_uprobe_multi_link_release,
- .dealloc = bpf_uprobe_multi_link_dealloc,
+ .dealloc_deferred = bpf_uprobe_multi_link_dealloc,
.fill_link_info = bpf_uprobe_multi_link_fill_link_info,
};
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 25476ead681b84..6511dc3a00da84 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1393,7 +1393,6 @@ static void rb_tail_page_update(struct ring_buffer_per_cpu *cpu_buffer,
old_write = local_add_return(RB_WRITE_INTCNT, &next_page->write);
old_entries = local_add_return(RB_WRITE_INTCNT, &next_page->entries);
- local_inc(&cpu_buffer->pages_touched);
/*
* Just make sure we have seen our old_write and synchronize
* with any interrupts that come in.
@@ -1430,8 +1429,9 @@ static void rb_tail_page_update(struct ring_buffer_per_cpu *cpu_buffer,
*/
local_set(&next_page->page->commit, 0);
- /* Again, either we update tail_page or an interrupt does */
- (void)cmpxchg(&cpu_buffer->tail_page, tail_page, next_page);
+ /* Either we update tail_page or an interrupt does */
+ if (try_cmpxchg(&cpu_buffer->tail_page, &tail_page, next_page))
+ local_inc(&cpu_buffer->pages_touched);
}
}
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 7c364b87352eed..52f75c36bbca49 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -1670,6 +1670,7 @@ static int trace_format_open(struct inode *inode, struct file *file)
return 0;
}
+#ifdef CONFIG_PERF_EVENTS
static ssize_t
event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
{
@@ -1684,6 +1685,7 @@ event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
}
+#endif
static ssize_t
event_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
@@ -2152,10 +2154,12 @@ static const struct file_operations ftrace_event_format_fops = {
.release = seq_release,
};
+#ifdef CONFIG_PERF_EVENTS
static const struct file_operations ftrace_event_id_fops = {
.read = event_id_read,
.llseek = default_llseek,
};
+#endif
static const struct file_operations ftrace_event_filter_fops = {
.open = tracing_open_file_tr,
diff --git a/lib/bootconfig.c b/lib/bootconfig.c
index c59d26068a6401..97f8911ea339e6 100644
--- a/lib/bootconfig.c
+++ b/lib/bootconfig.c
@@ -61,9 +61,12 @@ static inline void * __init xbc_alloc_mem(size_t size)
return memblock_alloc(size, SMP_CACHE_BYTES);
}
-static inline void __init xbc_free_mem(void *addr, size_t size)
+static inline void __init xbc_free_mem(void *addr, size_t size, bool early)
{
- memblock_free(addr, size);
+ if (early)
+ memblock_free(addr, size);
+ else if (addr)
+ memblock_free_late(__pa(addr), size);
}
#else /* !__KERNEL__ */
@@ -73,7 +76,7 @@ static inline void *xbc_alloc_mem(size_t size)
return malloc(size);
}
-static inline void xbc_free_mem(void *addr, size_t size)
+static inline void xbc_free_mem(void *addr, size_t size, bool early)
{
free(addr);
}
@@ -898,19 +901,20 @@ static int __init xbc_parse_tree(void)
}
/**
- * xbc_exit() - Clean up all parsed bootconfig
+ * _xbc_exit() - Clean up all parsed bootconfig
+ * @early: Set true if this is called before budy system is initialized.
*
* This clears all data structures of parsed bootconfig on memory.
* If you need to reuse xbc_init() with new boot config, you can
* use this.
*/
-void __init xbc_exit(void)
+void __init _xbc_exit(bool early)
{
- xbc_free_mem(xbc_data, xbc_data_size);
+ xbc_free_mem(xbc_data, xbc_data_size, early);
xbc_data = NULL;
xbc_data_size = 0;
xbc_node_num = 0;
- xbc_free_mem(xbc_nodes, sizeof(struct xbc_node) * XBC_NODE_MAX);
+ xbc_free_mem(xbc_nodes, sizeof(struct xbc_node) * XBC_NODE_MAX, early);
xbc_nodes = NULL;
brace_index = 0;
}
@@ -963,7 +967,7 @@ int __init xbc_init(const char *data, size_t size, const char **emsg, int *epos)
if (!xbc_nodes) {
if (emsg)
*emsg = "Failed to allocate bootconfig nodes";
- xbc_exit();
+ _xbc_exit(true);
return -ENOMEM;
}
memset(xbc_nodes, 0, sizeof(struct xbc_node) * XBC_NODE_MAX);
@@ -977,7 +981,7 @@ int __init xbc_init(const char *data, size_t size, const char **emsg, int *epos)
*epos = xbc_err_pos;
if (emsg)
*emsg = xbc_err_msg;
- xbc_exit();
+ _xbc_exit(true);
} else
ret = xbc_node_num;
diff --git a/lib/checksum_kunit.c b/lib/checksum_kunit.c
index bf70850035c76f..404dba36bae380 100644
--- a/lib/checksum_kunit.c
+++ b/lib/checksum_kunit.c
@@ -594,13 +594,15 @@ static void test_ip_fast_csum(struct kunit *test)
static void test_csum_ipv6_magic(struct kunit *test)
{
-#if defined(CONFIG_NET)
const struct in6_addr *saddr;
const struct in6_addr *daddr;
unsigned int len;
unsigned char proto;
__wsum csum;
+ if (!IS_ENABLED(CONFIG_NET))
+ return;
+
const int daddr_offset = sizeof(struct in6_addr);
const int len_offset = sizeof(struct in6_addr) + sizeof(struct in6_addr);
const int proto_offset = sizeof(struct in6_addr) + sizeof(struct in6_addr) +
@@ -618,7 +620,6 @@ static void test_csum_ipv6_magic(struct kunit *test)
CHECK_EQ(to_sum16(expected_csum_ipv6_magic[i]),
csum_ipv6_magic(saddr, daddr, len, proto, csum));
}
-#endif /* !CONFIG_NET */
}
static struct kunit_case __refdata checksum_test_cases[] = {
diff --git a/lib/stackdepot.c b/lib/stackdepot.c
index af6cc19a200331..68c97387aa54e2 100644
--- a/lib/stackdepot.c
+++ b/lib/stackdepot.c
@@ -330,7 +330,7 @@ static struct stack_record *depot_pop_free_pool(void **prealloc, size_t size)
stack = current_pool + pool_offset;
/* Pre-initialize handle once. */
- stack->handle.pool_index = pool_index + 1;
+ stack->handle.pool_index_plus_1 = pool_index + 1;
stack->handle.offset = pool_offset >> DEPOT_STACK_ALIGN;
stack->handle.extra = 0;
INIT_LIST_HEAD(&stack->hash_list);
@@ -441,7 +441,7 @@ static struct stack_record *depot_fetch_stack(depot_stack_handle_t handle)
const int pools_num_cached = READ_ONCE(pools_num);
union handle_parts parts = { .handle = handle };
void *pool;
- u32 pool_index = parts.pool_index - 1;
+ u32 pool_index = parts.pool_index_plus_1 - 1;
size_t offset = parts.offset << DEPOT_STACK_ALIGN;
struct stack_record *stack;
diff --git a/lib/test_ubsan.c b/lib/test_ubsan.c
index 276c12140ee26d..c288df9372ede1 100644
--- a/lib/test_ubsan.c
+++ b/lib/test_ubsan.c
@@ -134,7 +134,7 @@ static const test_ubsan_fp test_ubsan_array[] = {
};
/* Excluded because they Oops the module. */
-static const test_ubsan_fp skip_ubsan_array[] = {
+static __used const test_ubsan_fp skip_ubsan_array[] = {
test_ubsan_divrem_overflow,
};
diff --git a/mm/Makefile b/mm/Makefile
index e4b5b75aaec9c1..4abb40b911ec43 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -29,8 +29,7 @@ KCOV_INSTRUMENT_mmzone.o := n
KCOV_INSTRUMENT_vmstat.o := n
KCOV_INSTRUMENT_failslab.o := n
-CFLAGS_init-mm.o += $(call cc-disable-warning, override-init)
-CFLAGS_init-mm.o += $(call cc-disable-warning, initializer-overrides)
+CFLAGS_init-mm.o += -Wno-override-init
mmu-y := nommu.o
mmu-$(CONFIG_MMU) := highmem.o memory.o mincore.o \
diff --git a/mm/gup.c b/mm/gup.c
index df83182ec72d5d..1611e73b1121b1 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1206,6 +1206,22 @@ static long __get_user_pages(struct mm_struct *mm,
/* first iteration or cross vma bound */
if (!vma || start >= vma->vm_end) {
+ /*
+ * MADV_POPULATE_(READ|WRITE) wants to handle VMA
+ * lookups+error reporting differently.
+ */
+ if (gup_flags & FOLL_MADV_POPULATE) {
+ vma = vma_lookup(mm, start);
+ if (!vma) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ if (check_vma_flags(vma, gup_flags)) {
+ ret = -EINVAL;
+ goto out;
+ }
+ goto retry;
+ }
vma = gup_vma_lookup(mm, start);
if (!vma && in_gate_area(mm, start)) {
ret = get_gate_page(mm, start & PAGE_MASK,
@@ -1653,20 +1669,22 @@ long populate_vma_page_range(struct vm_area_struct *vma,
if (vma->vm_flags & VM_LOCKONFAULT)
return nr_pages;
+ /* ... similarly, we've never faulted in PROT_NONE pages */
+ if (!vma_is_accessible(vma))
+ return -EFAULT;
+
gup_flags = FOLL_TOUCH;
/*
* We want to touch writable mappings with a write fault in order
* to break COW, except for shared mappings because these don't COW
* and we would not want to dirty them for nothing.
+ *
+ * Otherwise, do a read fault, and use FOLL_FORCE in case it's not
+ * readable (ie write-only or executable).
*/
if ((vma->vm_flags & (VM_WRITE | VM_SHARED)) == VM_WRITE)
gup_flags |= FOLL_WRITE;
-
- /*
- * We want mlock to succeed for regions that have any permissions
- * other than PROT_NONE.
- */
- if (vma_is_accessible(vma))
+ else
gup_flags |= FOLL_FORCE;
if (locked)
@@ -1683,35 +1701,35 @@ long populate_vma_page_range(struct vm_area_struct *vma,
}
/*
- * faultin_vma_page_range() - populate (prefault) page tables inside the
- * given VMA range readable/writable
+ * faultin_page_range() - populate (prefault) page tables inside the
+ * given range readable/writable
*
* This takes care of mlocking the pages, too, if VM_LOCKED is set.
*
- * @vma: target vma
+ * @mm: the mm to populate page tables in
* @start: start address
* @end: end address
* @write: whether to prefault readable or writable
* @locked: whether the mmap_lock is still held
*
- * Returns either number of processed pages in the vma, or a negative error
- * code on error (see __get_user_pages()).
+ * Returns either number of processed pages in the MM, or a negative error
+ * code on error (see __get_user_pages()). Note that this function reports
+ * errors related to VMAs, such as incompatible mappings, as expected by
+ * MADV_POPULATE_(READ|WRITE).
*
- * vma->vm_mm->mmap_lock must be held. The range must be page-aligned and
- * covered by the VMA. If it's released, *@locked will be set to 0.
+ * The range must be page-aligned.
+ *
+ * mm->mmap_lock must be held. If it's released, *@locked will be set to 0.
*/
-long faultin_vma_page_range(struct vm_area_struct *vma, unsigned long start,
- unsigned long end, bool write, int *locked)
+long faultin_page_range(struct mm_struct *mm, unsigned long start,
+ unsigned long end, bool write, int *locked)
{
- struct mm_struct *mm = vma->vm_mm;
unsigned long nr_pages = (end - start) / PAGE_SIZE;
int gup_flags;
long ret;
VM_BUG_ON(!PAGE_ALIGNED(start));
VM_BUG_ON(!PAGE_ALIGNED(end));
- VM_BUG_ON_VMA(start < vma->vm_start, vma);
- VM_BUG_ON_VMA(end > vma->vm_end, vma);
mmap_assert_locked(mm);
/*
@@ -1723,19 +1741,13 @@ long faultin_vma_page_range(struct vm_area_struct *vma, unsigned long start,
* a poisoned page.
* !FOLL_FORCE: Require proper access permissions.
*/
- gup_flags = FOLL_TOUCH | FOLL_HWPOISON | FOLL_UNLOCKABLE;
+ gup_flags = FOLL_TOUCH | FOLL_HWPOISON | FOLL_UNLOCKABLE |
+ FOLL_MADV_POPULATE;
if (write)
gup_flags |= FOLL_WRITE;
- /*
- * We want to report -EINVAL instead of -EFAULT for any permission
- * problems or incompatible mappings.
- */
- if (check_vma_flags(vma, gup_flags))
- return -EINVAL;
-
- ret = __get_user_pages(mm, start, nr_pages, gup_flags,
- NULL, locked);
+ ret = __get_user_pages_locked(mm, start, nr_pages, NULL, locked,
+ gup_flags);
lru_add_drain();
return ret;
}
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 9859aa4f755380..89f58c7603b255 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2259,9 +2259,6 @@ int move_pages_huge_pmd(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, pm
goto unlock_ptls;
}
- folio_move_anon_rmap(src_folio, dst_vma);
- WRITE_ONCE(src_folio->index, linear_page_index(dst_vma, dst_addr));
-
src_pmdval = pmdp_huge_clear_flush(src_vma, src_addr, src_pmd);
/* Folio got pinned from under us. Put it back and fail the move. */
if (folio_maybe_dma_pinned(src_folio)) {
@@ -2270,6 +2267,9 @@ int move_pages_huge_pmd(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, pm
goto unlock_ptls;
}
+ folio_move_anon_rmap(src_folio, dst_vma);
+ WRITE_ONCE(src_folio->index, linear_page_index(dst_vma, dst_addr));
+
_dst_pmd = mk_huge_pmd(&src_folio->page, dst_vma->vm_page_prot);
/* Follow mremap() behavior and treat the entry dirty after the move */
_dst_pmd = pmd_mkwrite(pmd_mkdirty(_dst_pmd), dst_vma);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 23ef240ba48a60..31d00eee028f11 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -7044,9 +7044,13 @@ long hugetlb_change_protection(struct vm_area_struct *vma,
if (!pte_same(pte, newpte))
set_huge_pte_at(mm, address, ptep, newpte, psize);
} else if (unlikely(is_pte_marker(pte))) {
- /* No other markers apply for now. */
- WARN_ON_ONCE(!pte_marker_uffd_wp(pte));
- if (uffd_wp_resolve)
+ /*
+ * Do nothing on a poison marker; page is
+ * corrupted, permissons do not apply. Here
+ * pte_marker_uffd_wp()==true implies !poison
+ * because they're mutual exclusive.
+ */
+ if (pte_marker_uffd_wp(pte) && uffd_wp_resolve)
/* Safe to modify directly (non-present->none). */
huge_pte_clear(mm, address, ptep, psize);
} else if (!huge_pte_none(pte)) {
diff --git a/mm/internal.h b/mm/internal.h
index 7e486f2c502cee..07ad2675a88b47 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -686,9 +686,8 @@ struct anon_vma *folio_anon_vma(struct folio *folio);
void unmap_mapping_folio(struct folio *folio);
extern long populate_vma_page_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end, int *locked);
-extern long faultin_vma_page_range(struct vm_area_struct *vma,
- unsigned long start, unsigned long end,
- bool write, int *locked);
+extern long faultin_page_range(struct mm_struct *mm, unsigned long start,
+ unsigned long end, bool write, int *locked);
extern bool mlock_future_ok(struct mm_struct *mm, unsigned long flags,
unsigned long bytes);
@@ -1127,10 +1126,13 @@ enum {
FOLL_FAST_ONLY = 1 << 20,
/* allow unlocking the mmap lock */
FOLL_UNLOCKABLE = 1 << 21,
+ /* VMA lookup+checks compatible with MADV_POPULATE_(READ|WRITE) */
+ FOLL_MADV_POPULATE = 1 << 22,
};
#define INTERNAL_GUP_FLAGS (FOLL_TOUCH | FOLL_TRIED | FOLL_REMOTE | FOLL_PIN | \
- FOLL_FAST_ONLY | FOLL_UNLOCKABLE)
+ FOLL_FAST_ONLY | FOLL_UNLOCKABLE | \
+ FOLL_MADV_POPULATE)
/*
* Indicates for which pages that are write-protected in the page table,
diff --git a/mm/madvise.c b/mm/madvise.c
index 44a498c94158c8..1a073fcc4c0c02 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -908,27 +908,14 @@ static long madvise_populate(struct vm_area_struct *vma,
{
const bool write = behavior == MADV_POPULATE_WRITE;
struct mm_struct *mm = vma->vm_mm;
- unsigned long tmp_end;
int locked = 1;
long pages;
*prev = vma;
while (start < end) {
- /*
- * We might have temporarily dropped the lock. For example,
- * our VMA might have been split.
- */
- if (!vma || start >= vma->vm_end) {
- vma = vma_lookup(mm, start);
- if (!vma)
- return -ENOMEM;
- }
-
- tmp_end = min_t(unsigned long, end, vma->vm_end);
/* Populate (prefault) page tables readable/writable. */
- pages = faultin_vma_page_range(vma, start, tmp_end, write,
- &locked);
+ pages = faultin_page_range(mm, start, end, write, &locked);
if (!locked) {
mmap_read_lock(mm);
locked = 1;
@@ -949,7 +936,7 @@ static long madvise_populate(struct vm_area_struct *vma,
pr_warn_once("%s: unhandled return value: %ld\n",
__func__, pages);
fallthrough;
- case -ENOMEM:
+ case -ENOMEM: /* No VMA or out of memory. */
return -ENOMEM;
}
}
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 9349948f1abfd1..9e62a00b46ddee 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -154,11 +154,23 @@ static int __page_handle_poison(struct page *page)
{
int ret;
- zone_pcp_disable(page_zone(page));
+ /*
+ * zone_pcp_disable() can't be used here. It will
+ * hold pcp_batch_high_lock and dissolve_free_huge_page() might hold
+ * cpu_hotplug_lock via static_key_slow_dec() when hugetlb vmemmap
+ * optimization is enabled. This will break current lock dependency
+ * chain and leads to deadlock.
+ * Disabling pcp before dissolving the page was a deterministic
+ * approach because we made sure that those pages cannot end up in any
+ * PCP list. Draining PCP lists expels those pages to the buddy system,
+ * but nothing guarantees that those pages do not get back to a PCP
+ * queue if we need to refill those.
+ */
ret = dissolve_free_huge_page(page);
- if (!ret)
+ if (!ret) {
+ drain_all_pages(page_zone(page));
ret = take_page_off_buddy(page);
- zone_pcp_enable(page_zone(page));
+ }
return ret;
}
diff --git a/mm/memory.c b/mm/memory.c
index 904f70b994985a..d2155ced45f8f8 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -5973,6 +5973,10 @@ int follow_phys(struct vm_area_struct *vma,
goto out;
pte = ptep_get(ptep);
+ /* Never return PFNs of anon folios in COW mappings. */
+ if (vm_normal_folio(vma, address, pte))
+ goto unlock;
+
if ((flags & FOLL_WRITE) && !pte_write(pte))
goto unlock;
diff --git a/mm/page_owner.c b/mm/page_owner.c
index d17d1351ec84af..742f432e5bf06f 100644
--- a/mm/page_owner.c
+++ b/mm/page_owner.c
@@ -118,7 +118,6 @@ static __init void init_page_owner(void)
register_dummy_stack();
register_failure_stack();
register_early_stack();
- static_branch_enable(&page_owner_inited);
init_early_allocated_pages();
/* Initialize dummy and failure stacks and link them to stack_list */
dummy_stack.stack_record = __stack_depot_get_stack_record(dummy_handle);
@@ -129,6 +128,7 @@ static __init void init_page_owner(void)
refcount_set(&failure_stack.stack_record->count, 1);
dummy_stack.next = &failure_stack;
stack_list = &dummy_stack;
+ static_branch_enable(&page_owner_inited);
}
struct page_ext_operations page_owner_ops = {
@@ -196,7 +196,8 @@ static void add_stack_record_to_list(struct stack_record *stack_record,
spin_unlock_irqrestore(&stack_list_lock, flags);
}
-static void inc_stack_record_count(depot_stack_handle_t handle, gfp_t gfp_mask)
+static void inc_stack_record_count(depot_stack_handle_t handle, gfp_t gfp_mask,
+ int nr_base_pages)
{
struct stack_record *stack_record = __stack_depot_get_stack_record(handle);
@@ -217,20 +218,74 @@ static void inc_stack_record_count(depot_stack_handle_t handle, gfp_t gfp_mask)
/* Add the new stack_record to our list */
add_stack_record_to_list(stack_record, gfp_mask);
}
- refcount_inc(&stack_record->count);
+ refcount_add(nr_base_pages, &stack_record->count);
}
-static void dec_stack_record_count(depot_stack_handle_t handle)
+static void dec_stack_record_count(depot_stack_handle_t handle,
+ int nr_base_pages)
{
struct stack_record *stack_record = __stack_depot_get_stack_record(handle);
- if (stack_record)
- refcount_dec(&stack_record->count);
+ if (!stack_record)
+ return;
+
+ if (refcount_sub_and_test(nr_base_pages, &stack_record->count))
+ pr_warn("%s: refcount went to 0 for %u handle\n", __func__,
+ handle);
}
-void __reset_page_owner(struct page *page, unsigned short order)
+static inline void __update_page_owner_handle(struct page_ext *page_ext,
+ depot_stack_handle_t handle,
+ unsigned short order,
+ gfp_t gfp_mask,
+ short last_migrate_reason, u64 ts_nsec,
+ pid_t pid, pid_t tgid, char *comm)
{
int i;
+ struct page_owner *page_owner;
+
+ for (i = 0; i < (1 << order); i++) {
+ page_owner = get_page_owner(page_ext);
+ page_owner->handle = handle;
+ page_owner->order = order;
+ page_owner->gfp_mask = gfp_mask;
+ page_owner->last_migrate_reason = last_migrate_reason;
+ page_owner->pid = pid;
+ page_owner->tgid = tgid;
+ page_owner->ts_nsec = ts_nsec;
+ strscpy(page_owner->comm, comm,
+ sizeof(page_owner->comm));
+ __set_bit(PAGE_EXT_OWNER, &page_ext->flags);
+ __set_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags);
+ page_ext = page_ext_next(page_ext);
+ }
+}
+
+static inline void __update_page_owner_free_handle(struct page_ext *page_ext,
+ depot_stack_handle_t handle,
+ unsigned short order,
+ pid_t pid, pid_t tgid,
+ u64 free_ts_nsec)
+{
+ int i;
+ struct page_owner *page_owner;
+
+ for (i = 0; i < (1 << order); i++) {
+ page_owner = get_page_owner(page_ext);
+ /* Only __reset_page_owner() wants to clear the bit */
+ if (handle) {
+ __clear_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags);
+ page_owner->free_handle = handle;
+ }
+ page_owner->free_ts_nsec = free_ts_nsec;
+ page_owner->free_pid = current->pid;
+ page_owner->free_tgid = current->tgid;
+ page_ext = page_ext_next(page_ext);
+ }
+}
+
+void __reset_page_owner(struct page *page, unsigned short order)
+{
struct page_ext *page_ext;
depot_stack_handle_t handle;
depot_stack_handle_t alloc_handle;
@@ -245,16 +300,10 @@ void __reset_page_owner(struct page *page, unsigned short order)
alloc_handle = page_owner->handle;
handle = save_stack(GFP_NOWAIT | __GFP_NOWARN);
- for (i = 0; i < (1 << order); i++) {
- __clear_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags);
- page_owner->free_handle = handle;
- page_owner->free_ts_nsec = free_ts_nsec;
- page_owner->free_pid = current->pid;
- page_owner->free_tgid = current->tgid;
- page_ext = page_ext_next(page_ext);
- page_owner = get_page_owner(page_ext);
- }
+ __update_page_owner_free_handle(page_ext, handle, order, current->pid,
+ current->tgid, free_ts_nsec);
page_ext_put(page_ext);
+
if (alloc_handle != early_handle)
/*
* early_handle is being set as a handle for all those
@@ -263,39 +312,14 @@ void __reset_page_owner(struct page *page, unsigned short order)
* the machinery is not ready yet, we cannot decrement
* their refcount either.
*/
- dec_stack_record_count(alloc_handle);
-}
-
-static inline void __set_page_owner_handle(struct page_ext *page_ext,
- depot_stack_handle_t handle,
- unsigned short order, gfp_t gfp_mask)
-{
- struct page_owner *page_owner;
- int i;
- u64 ts_nsec = local_clock();
-
- for (i = 0; i < (1 << order); i++) {
- page_owner = get_page_owner(page_ext);
- page_owner->handle = handle;
- page_owner->order = order;
- page_owner->gfp_mask = gfp_mask;
- page_owner->last_migrate_reason = -1;
- page_owner->pid = current->pid;
- page_owner->tgid = current->tgid;
- page_owner->ts_nsec = ts_nsec;
- strscpy(page_owner->comm, current->comm,
- sizeof(page_owner->comm));
- __set_bit(PAGE_EXT_OWNER, &page_ext->flags);
- __set_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags);
-
- page_ext = page_ext_next(page_ext);
- }
+ dec_stack_record_count(alloc_handle, 1 << order);
}
noinline void __set_page_owner(struct page *page, unsigned short order,
gfp_t gfp_mask)
{
struct page_ext *page_ext;
+ u64 ts_nsec = local_clock();
depot_stack_handle_t handle;
handle = save_stack(gfp_mask);
@@ -303,9 +327,11 @@ noinline void __set_page_owner(struct page *page, unsigned short order,
page_ext = page_ext_get(page);
if (unlikely(!page_ext))
return;
- __set_page_owner_handle(page_ext, handle, order, gfp_mask);
+ __update_page_owner_handle(page_ext, handle, order, gfp_mask, -1,
+ current->pid, current->tgid, ts_nsec,
+ current->comm);
page_ext_put(page_ext);
- inc_stack_record_count(handle, gfp_mask);
+ inc_stack_record_count(handle, gfp_mask, 1 << order);
}
void __set_page_owner_migrate_reason(struct page *page, int reason)
@@ -340,9 +366,12 @@ void __split_page_owner(struct page *page, int old_order, int new_order)
void __folio_copy_owner(struct folio *newfolio, struct folio *old)
{
+ int i;
struct page_ext *old_ext;
struct page_ext *new_ext;
- struct page_owner *old_page_owner, *new_page_owner;
+ struct page_owner *old_page_owner;
+ struct page_owner *new_page_owner;
+ depot_stack_handle_t migrate_handle;
old_ext = page_ext_get(&old->page);
if (unlikely(!old_ext))
@@ -356,30 +385,32 @@ void __folio_copy_owner(struct folio *newfolio, struct folio *old)
old_page_owner = get_page_owner(old_ext);
new_page_owner = get_page_owner(new_ext);
- new_page_owner->order = old_page_owner->order;
- new_page_owner->gfp_mask = old_page_owner->gfp_mask;
- new_page_owner->last_migrate_reason =
- old_page_owner->last_migrate_reason;
- new_page_owner->handle = old_page_owner->handle;
- new_page_owner->pid = old_page_owner->pid;
- new_page_owner->tgid = old_page_owner->tgid;
- new_page_owner->free_pid = old_page_owner->free_pid;
- new_page_owner->free_tgid = old_page_owner->free_tgid;
- new_page_owner->ts_nsec = old_page_owner->ts_nsec;
- new_page_owner->free_ts_nsec = old_page_owner->ts_nsec;
- strcpy(new_page_owner->comm, old_page_owner->comm);
-
+ migrate_handle = new_page_owner->handle;
+ __update_page_owner_handle(new_ext, old_page_owner->handle,
+ old_page_owner->order, old_page_owner->gfp_mask,
+ old_page_owner->last_migrate_reason,
+ old_page_owner->ts_nsec, old_page_owner->pid,
+ old_page_owner->tgid, old_page_owner->comm);
+ /*
+ * Do not proactively clear PAGE_EXT_OWNER{_ALLOCATED} bits as the folio
+ * will be freed after migration. Keep them until then as they may be
+ * useful.
+ */
+ __update_page_owner_free_handle(new_ext, 0, old_page_owner->order,
+ old_page_owner->free_pid,
+ old_page_owner->free_tgid,
+ old_page_owner->free_ts_nsec);
/*
- * We don't clear the bit on the old folio as it's going to be freed
- * after migration. Until then, the info can be useful in case of
- * a bug, and the overall stats will be off a bit only temporarily.
- * Also, migrate_misplaced_transhuge_page() can still fail the
- * migration and then we want the old folio to retain the info. But
- * in that case we also don't need to explicitly clear the info from
- * the new page, which will be freed.
+ * We linked the original stack to the new folio, we need to do the same
+ * for the new one and the old folio otherwise there will be an imbalance
+ * when subtracting those pages from the stack.
*/
- __set_bit(PAGE_EXT_OWNER, &new_ext->flags);
- __set_bit(PAGE_EXT_OWNER_ALLOCATED, &new_ext->flags);
+ for (i = 0; i < (1 << new_page_owner->order); i++) {
+ old_page_owner->handle = migrate_handle;
+ old_ext = page_ext_next(old_ext);
+ old_page_owner = get_page_owner(old_ext);
+ }
+
page_ext_put(new_ext);
page_ext_put(old_ext);
}
@@ -787,8 +818,9 @@ static void init_pages_in_zone(pg_data_t *pgdat, struct zone *zone)
goto ext_put_continue;
/* Found early allocated page */
- __set_page_owner_handle(page_ext, early_handle,
- 0, 0);
+ __update_page_owner_handle(page_ext, early_handle, 0, 0,
+ -1, local_clock(), current->pid,
+ current->tgid, current->comm);
count++;
ext_put_continue:
page_ext_put(page_ext);
@@ -840,13 +872,11 @@ static void *stack_start(struct seq_file *m, loff_t *ppos)
* value of stack_list.
*/
stack = smp_load_acquire(&stack_list);
+ m->private = stack;
} else {
stack = m->private;
- stack = stack->next;
}
- m->private = stack;
-
return stack;
}
@@ -861,11 +891,11 @@ static void *stack_next(struct seq_file *m, void *v, loff_t *ppos)
return stack;
}
-static unsigned long page_owner_stack_threshold;
+static unsigned long page_owner_pages_threshold;
static int stack_print(struct seq_file *m, void *v)
{
- int i, stack_count;
+ int i, nr_base_pages;
struct stack *stack = v;
unsigned long *entries;
unsigned long nr_entries;
@@ -876,14 +906,14 @@ static int stack_print(struct seq_file *m, void *v)
nr_entries = stack_record->size;
entries = stack_record->entries;
- stack_count = refcount_read(&stack_record->count) - 1;
+ nr_base_pages = refcount_read(&stack_record->count) - 1;
- if (stack_count < 1 || stack_count < page_owner_stack_threshold)
+ if (nr_base_pages < 1 || nr_base_pages < page_owner_pages_threshold)
return 0;
for (i = 0; i < nr_entries; i++)
seq_printf(m, " %pS\n", (void *)entries[i]);
- seq_printf(m, "stack_count: %d\n\n", stack_count);
+ seq_printf(m, "nr_base_pages: %d\n\n", nr_base_pages);
return 0;
}
@@ -913,13 +943,13 @@ static const struct file_operations page_owner_stack_operations = {
static int page_owner_threshold_get(void *data, u64 *val)
{
- *val = READ_ONCE(page_owner_stack_threshold);
+ *val = READ_ONCE(page_owner_pages_threshold);
return 0;
}
static int page_owner_threshold_set(void *data, u64 val)
{
- WRITE_ONCE(page_owner_stack_threshold, val);
+ WRITE_ONCE(page_owner_pages_threshold, val);
return 0;
}
diff --git a/mm/shmem.c b/mm/shmem.c
index 0aad0d9a621b80..94ab99b6b574a4 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -748,12 +748,6 @@ static long shmem_unused_huge_count(struct super_block *sb,
#define shmem_huge SHMEM_HUGE_DENY
-bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force,
- struct mm_struct *mm, unsigned long vm_flags)
-{
- return false;
-}
-
static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
struct shrink_control *sc, unsigned long nr_to_split)
{
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 22aa63f4ef6322..68fa001648cc1c 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -989,6 +989,27 @@ unsigned long vmalloc_nr_pages(void)
return atomic_long_read(&nr_vmalloc_pages);
}
+static struct vmap_area *__find_vmap_area(unsigned long addr, struct rb_root *root)
+{
+ struct rb_node *n = root->rb_node;
+
+ addr = (unsigned long)kasan_reset_tag((void *)addr);
+
+ while (n) {
+ struct vmap_area *va;
+
+ va = rb_entry(n, struct vmap_area, rb_node);
+ if (addr < va->va_start)
+ n = n->rb_left;
+ else if (addr >= va->va_end)
+ n = n->rb_right;
+ else
+ return va;
+ }
+
+ return NULL;
+}
+
/* Look up the first VA which satisfies addr < va_end, NULL if none. */
static struct vmap_area *
__find_vmap_area_exceed_addr(unsigned long addr, struct rb_root *root)
@@ -1025,47 +1046,39 @@ __find_vmap_area_exceed_addr(unsigned long addr, struct rb_root *root)
static struct vmap_node *
find_vmap_area_exceed_addr_lock(unsigned long addr, struct vmap_area **va)
{
- struct vmap_node *vn, *va_node = NULL;
- struct vmap_area *va_lowest;
+ unsigned long va_start_lowest;
+ struct vmap_node *vn;
int i;
- for (i = 0; i < nr_vmap_nodes; i++) {
+repeat:
+ for (i = 0, va_start_lowest = 0; i < nr_vmap_nodes; i++) {
vn = &vmap_nodes[i];
spin_lock(&vn->busy.lock);
- va_lowest = __find_vmap_area_exceed_addr(addr, &vn->busy.root);
- if (va_lowest) {
- if (!va_node || va_lowest->va_start < (*va)->va_start) {
- if (va_node)
- spin_unlock(&va_node->busy.lock);
-
- *va = va_lowest;
- va_node = vn;
- continue;
- }
- }
+ *va = __find_vmap_area_exceed_addr(addr, &vn->busy.root);
+
+ if (*va)
+ if (!va_start_lowest || (*va)->va_start < va_start_lowest)
+ va_start_lowest = (*va)->va_start;
spin_unlock(&vn->busy.lock);
}
- return va_node;
-}
-
-static struct vmap_area *__find_vmap_area(unsigned long addr, struct rb_root *root)
-{
- struct rb_node *n = root->rb_node;
+ /*
+ * Check if found VA exists, it might have gone away. In this case we
+ * repeat the search because a VA has been removed concurrently and we
+ * need to proceed to the next one, which is a rare case.
+ */
+ if (va_start_lowest) {
+ vn = addr_to_node(va_start_lowest);
- addr = (unsigned long)kasan_reset_tag((void *)addr);
+ spin_lock(&vn->busy.lock);
+ *va = __find_vmap_area(va_start_lowest, &vn->busy.root);
- while (n) {
- struct vmap_area *va;
+ if (*va)
+ return vn;
- va = rb_entry(n, struct vmap_area, rb_node);
- if (addr < va->va_start)
- n = n->rb_left;
- else if (addr >= va->va_end)
- n = n->rb_right;
- else
- return va;
+ spin_unlock(&vn->busy.lock);
+ goto repeat;
}
return NULL;
@@ -2343,6 +2356,9 @@ struct vmap_area *find_vmap_area(unsigned long addr)
struct vmap_area *va;
int i, j;
+ if (unlikely(!vmap_initialized))
+ return NULL;
+
/*
* An addr_to_node_id(addr) converts an address to a node index
* where a VA is located. If VA spans several zones and passed
diff --git a/net/9p/client.c b/net/9p/client.c
index e265a0ca6bddd4..f7e90b4769bba9 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -1583,7 +1583,7 @@ p9_client_read_once(struct p9_fid *fid, u64 offset, struct iov_iter *to,
received = rsize;
}
- p9_debug(P9_DEBUG_9P, "<<< RREAD count %d\n", count);
+ p9_debug(P9_DEBUG_9P, "<<< RREAD count %d\n", received);
if (non_zc) {
int n = copy_to_iter(dataptr, received, to);
@@ -1609,9 +1609,6 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err)
int total = 0;
*err = 0;
- p9_debug(P9_DEBUG_9P, ">>> TWRITE fid %d offset %llu count %zd\n",
- fid->fid, offset, iov_iter_count(from));
-
while (iov_iter_count(from)) {
int count = iov_iter_count(from);
int rsize = fid->iounit;
@@ -1623,6 +1620,9 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err)
if (count < rsize)
rsize = count;
+ p9_debug(P9_DEBUG_9P, ">>> TWRITE fid %d offset %llu count %d (/%d)\n",
+ fid->fid, offset, rsize, count);
+
/* Don't bother zerocopy for small IO (< 1024) */
if (clnt->trans_mod->zc_request && rsize > 1024) {
req = p9_client_zc_rpc(clnt, P9_TWRITE, NULL, from, 0,
@@ -1650,7 +1650,7 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err)
written = rsize;
}
- p9_debug(P9_DEBUG_9P, "<<< RWRITE count %d\n", count);
+ p9_debug(P9_DEBUG_9P, "<<< RWRITE count %d\n", written);
p9_req_put(clnt, req);
iov_iter_revert(from, count - written - iov_iter_count(from));
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 1a3948b8c493ed..196060dc6138af 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -95,7 +95,6 @@ struct p9_poll_wait {
* @unsent_req_list: accounting for requests that haven't been sent
* @rreq: read request
* @wreq: write request
- * @req: current request being processed (if any)
* @tmp_buf: temporary buffer to read in header
* @rc: temporary fcall for reading current frame
* @wpos: write position for current frame
diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c
index c5462486dbca10..282ec581c07201 100644
--- a/net/ax25/ax25_dev.c
+++ b/net/ax25/ax25_dev.c
@@ -105,7 +105,7 @@ void ax25_dev_device_down(struct net_device *dev)
spin_lock_bh(&ax25_dev_lock);
#ifdef CONFIG_AX25_DAMA_SLAVE
- ax25_ds_del_timer(ax25_dev);
+ timer_shutdown_sync(&ax25_dev->dama.slave_timer);
#endif
/*
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index b95c36765d045c..2243cec18ecc86 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -3948,7 +3948,7 @@ void batadv_tt_local_resize_to_mtu(struct net_device *soft_iface)
spin_lock_bh(&bat_priv->tt.commit_lock);
- while (true) {
+ while (timeout) {
table_size = batadv_tt_local_table_transmit_size(bat_priv);
if (packet_size_max >= table_size)
break;
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 1690ae57a09dbb..a7028d38c1f5cc 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -2874,7 +2874,7 @@ static void hci_cancel_cmd_sync(struct hci_dev *hdev, int err)
cancel_delayed_work_sync(&hdev->ncmd_timer);
atomic_set(&hdev->cmd_cnt, 1);
- hci_cmd_sync_cancel_sync(hdev, -err);
+ hci_cmd_sync_cancel_sync(hdev, err);
}
/* Suspend HCI device */
@@ -2894,7 +2894,7 @@ int hci_suspend_dev(struct hci_dev *hdev)
return 0;
/* Cancel potentially blocking sync operation before suspend */
- hci_cancel_cmd_sync(hdev, -EHOSTDOWN);
+ hci_cancel_cmd_sync(hdev, EHOSTDOWN);
hci_req_sync_lock(hdev);
ret = hci_suspend_sync(hdev);
@@ -4210,7 +4210,7 @@ static void hci_send_cmd_sync(struct hci_dev *hdev, struct sk_buff *skb)
err = hci_send_frame(hdev, skb);
if (err < 0) {
- hci_cmd_sync_cancel_sync(hdev, err);
+ hci_cmd_sync_cancel_sync(hdev, -err);
return;
}
diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c
index 233453807b5099..ce3ff2fa72e58a 100644
--- a/net/bluetooth/hci_debugfs.c
+++ b/net/bluetooth/hci_debugfs.c
@@ -218,10 +218,12 @@ static int conn_info_min_age_set(void *data, u64 val)
{
struct hci_dev *hdev = data;
- if (val == 0 || val > hdev->conn_info_max_age)
+ hci_dev_lock(hdev);
+ if (val == 0 || val > hdev->conn_info_max_age) {
+ hci_dev_unlock(hdev);
return -EINVAL;
+ }
- hci_dev_lock(hdev);
hdev->conn_info_min_age = val;
hci_dev_unlock(hdev);
@@ -246,10 +248,12 @@ static int conn_info_max_age_set(void *data, u64 val)
{
struct hci_dev *hdev = data;
- if (val == 0 || val < hdev->conn_info_min_age)
+ hci_dev_lock(hdev);
+ if (val == 0 || val < hdev->conn_info_min_age) {
+ hci_dev_unlock(hdev);
return -EINVAL;
+ }
- hci_dev_lock(hdev);
hdev->conn_info_max_age = val;
hci_dev_unlock(hdev);
@@ -567,10 +571,12 @@ static int sniff_min_interval_set(void *data, u64 val)
{
struct hci_dev *hdev = data;
- if (val == 0 || val % 2 || val > hdev->sniff_max_interval)
+ hci_dev_lock(hdev);
+ if (val == 0 || val % 2 || val > hdev->sniff_max_interval) {
+ hci_dev_unlock(hdev);
return -EINVAL;
+ }
- hci_dev_lock(hdev);
hdev->sniff_min_interval = val;
hci_dev_unlock(hdev);
@@ -595,10 +601,12 @@ static int sniff_max_interval_set(void *data, u64 val)
{
struct hci_dev *hdev = data;
- if (val == 0 || val % 2 || val < hdev->sniff_min_interval)
+ hci_dev_lock(hdev);
+ if (val == 0 || val % 2 || val < hdev->sniff_min_interval) {
+ hci_dev_unlock(hdev);
return -EINVAL;
+ }
- hci_dev_lock(hdev);
hdev->sniff_max_interval = val;
hci_dev_unlock(hdev);
@@ -850,10 +858,12 @@ static int conn_min_interval_set(void *data, u64 val)
{
struct hci_dev *hdev = data;
- if (val < 0x0006 || val > 0x0c80 || val > hdev->le_conn_max_interval)
+ hci_dev_lock(hdev);
+ if (val < 0x0006 || val > 0x0c80 || val > hdev->le_conn_max_interval) {
+ hci_dev_unlock(hdev);
return -EINVAL;
+ }
- hci_dev_lock(hdev);
hdev->le_conn_min_interval = val;
hci_dev_unlock(hdev);
@@ -878,10 +888,12 @@ static int conn_max_interval_set(void *data, u64 val)
{
struct hci_dev *hdev = data;
- if (val < 0x0006 || val > 0x0c80 || val < hdev->le_conn_min_interval)
+ hci_dev_lock(hdev);
+ if (val < 0x0006 || val > 0x0c80 || val < hdev->le_conn_min_interval) {
+ hci_dev_unlock(hdev);
return -EINVAL;
+ }
- hci_dev_lock(hdev);
hdev->le_conn_max_interval = val;
hci_dev_unlock(hdev);
@@ -990,10 +1002,12 @@ static int adv_min_interval_set(void *data, u64 val)
{
struct hci_dev *hdev = data;
- if (val < 0x0020 || val > 0x4000 || val > hdev->le_adv_max_interval)
+ hci_dev_lock(hdev);
+ if (val < 0x0020 || val > 0x4000 || val > hdev->le_adv_max_interval) {
+ hci_dev_unlock(hdev);
return -EINVAL;
+ }
- hci_dev_lock(hdev);
hdev->le_adv_min_interval = val;
hci_dev_unlock(hdev);
@@ -1018,10 +1032,12 @@ static int adv_max_interval_set(void *data, u64 val)
{
struct hci_dev *hdev = data;
- if (val < 0x0020 || val > 0x4000 || val < hdev->le_adv_min_interval)
+ hci_dev_lock(hdev);
+ if (val < 0x0020 || val > 0x4000 || val < hdev->le_adv_min_interval) {
+ hci_dev_unlock(hdev);
return -EINVAL;
+ }
- hci_dev_lock(hdev);
hdev->le_adv_max_interval = val;
hci_dev_unlock(hdev);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 4ae2248240121c..a8b8cfebe0180c 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -3208,6 +3208,31 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data,
if (test_bit(HCI_ENCRYPT, &hdev->flags))
set_bit(HCI_CONN_ENCRYPT, &conn->flags);
+ /* "Link key request" completed ahead of "connect request" completes */
+ if (ev->encr_mode == 1 && !test_bit(HCI_CONN_ENCRYPT, &conn->flags) &&
+ ev->link_type == ACL_LINK) {
+ struct link_key *key;
+ struct hci_cp_read_enc_key_size cp;
+
+ key = hci_find_link_key(hdev, &ev->bdaddr);
+ if (key) {
+ set_bit(HCI_CONN_ENCRYPT, &conn->flags);
+
+ if (!(hdev->commands[20] & 0x10)) {
+ conn->enc_key_size = HCI_LINK_KEY_SIZE;
+ } else {
+ cp.handle = cpu_to_le16(conn->handle);
+ if (hci_send_cmd(hdev, HCI_OP_READ_ENC_KEY_SIZE,
+ sizeof(cp), &cp)) {
+ bt_dev_err(hdev, "sending read key size failed");
+ conn->enc_key_size = HCI_LINK_KEY_SIZE;
+ }
+ }
+
+ hci_encrypt_cfm(conn, ev->status);
+ }
+ }
+
/* Get remote features */
if (conn->type == ACL_LINK) {
struct hci_cp_read_remote_features cp;
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index 00e02138003ece..efea25eb56ce03 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -105,8 +105,10 @@ void hci_req_sync_complete(struct hci_dev *hdev, u8 result, u16 opcode,
if (hdev->req_status == HCI_REQ_PEND) {
hdev->req_result = result;
hdev->req_status = HCI_REQ_DONE;
- if (skb)
+ if (skb) {
+ kfree_skb(hdev->req_skb);
hdev->req_skb = skb_get(skb);
+ }
wake_up_interruptible(&hdev->req_wait_q);
}
}
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 4ee1b976678b25..703b84bd48d5be 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -1946,10 +1946,9 @@ static int hci_sock_setsockopt_old(struct socket *sock, int level, int optname,
switch (optname) {
case HCI_DATA_DIR:
- if (copy_from_sockptr(&opt, optval, sizeof(opt))) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, len);
+ if (err)
break;
- }
if (opt)
hci_pi(sk)->cmsg_mask |= HCI_CMSG_DIR;
@@ -1958,10 +1957,9 @@ static int hci_sock_setsockopt_old(struct socket *sock, int level, int optname,
break;
case HCI_TIME_STAMP:
- if (copy_from_sockptr(&opt, optval, sizeof(opt))) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, len);
+ if (err)
break;
- }
if (opt)
hci_pi(sk)->cmsg_mask |= HCI_CMSG_TSTAMP;
@@ -1979,11 +1977,9 @@ static int hci_sock_setsockopt_old(struct socket *sock, int level, int optname,
uf.event_mask[1] = *((u32 *) f->event_mask + 1);
}
- len = min_t(unsigned int, len, sizeof(uf));
- if (copy_from_sockptr(&uf, optval, len)) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&uf, sizeof(uf), optval, len);
+ if (err)
break;
- }
if (!capable(CAP_NET_RAW)) {
uf.type_mask &= hci_sec_filter.type_mask;
@@ -2042,10 +2038,9 @@ static int hci_sock_setsockopt(struct socket *sock, int level, int optname,
goto done;
}
- if (copy_from_sockptr(&opt, optval, sizeof(opt))) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, len);
+ if (err)
break;
- }
hci_pi(sk)->mtu = opt;
break;
diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
index f6b662369322b3..c5d8799046ccff 100644
--- a/net/bluetooth/hci_sync.c
+++ b/net/bluetooth/hci_sync.c
@@ -617,7 +617,10 @@ void hci_cmd_sync_cancel_sync(struct hci_dev *hdev, int err)
bt_dev_dbg(hdev, "err 0x%2.2x", err);
if (hdev->req_status == HCI_REQ_PEND) {
- hdev->req_result = err;
+ /* req_result is __u32 so error must be positive to be properly
+ * propagated.
+ */
+ hdev->req_result = err < 0 ? -err : err;
hdev->req_status = HCI_REQ_CANCELED;
wake_up_interruptible(&hdev->req_wait_q);
@@ -2811,8 +2814,8 @@ static int hci_le_set_ext_scan_param_sync(struct hci_dev *hdev, u8 type,
if (qos->bcast.in.phy & BT_ISO_PHY_CODED) {
cp->scanning_phys |= LE_SCAN_PHY_CODED;
hci_le_scan_phy_params(phy, type,
- interval,
- window);
+ interval * 3,
+ window * 3);
num_phy++;
phy++;
}
@@ -2832,7 +2835,7 @@ static int hci_le_set_ext_scan_param_sync(struct hci_dev *hdev, u8 type,
if (scan_coded(hdev)) {
cp->scanning_phys |= LE_SCAN_PHY_CODED;
- hci_le_scan_phy_params(phy, type, interval, window);
+ hci_le_scan_phy_params(phy, type, interval * 3, window * 3);
num_phy++;
phy++;
}
@@ -3416,7 +3419,10 @@ static void hci_dev_get_bd_addr_from_property(struct hci_dev *hdev)
if (ret < 0 || !bacmp(&ba, BDADDR_ANY))
return;
- bacpy(&hdev->public_addr, &ba);
+ if (test_bit(HCI_QUIRK_BDADDR_PROPERTY_BROKEN, &hdev->quirks))
+ baswap(&hdev->public_addr, &ba);
+ else
+ bacpy(&hdev->public_addr, &ba);
}
struct hci_init_stage {
diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c
index c8793e57f4b547..ef0cc80b4c0cc1 100644
--- a/net/bluetooth/iso.c
+++ b/net/bluetooth/iso.c
@@ -1451,8 +1451,8 @@ static bool check_ucast_qos(struct bt_iso_qos *qos)
static bool check_bcast_qos(struct bt_iso_qos *qos)
{
- if (qos->bcast.sync_factor == 0x00)
- return false;
+ if (!qos->bcast.sync_factor)
+ qos->bcast.sync_factor = 0x01;
if (qos->bcast.packing > 0x01)
return false;
@@ -1475,6 +1475,9 @@ static bool check_bcast_qos(struct bt_iso_qos *qos)
if (qos->bcast.skip > 0x01f3)
return false;
+ if (!qos->bcast.sync_timeout)
+ qos->bcast.sync_timeout = BT_ISO_SYNC_TIMEOUT;
+
if (qos->bcast.sync_timeout < 0x000a || qos->bcast.sync_timeout > 0x4000)
return false;
@@ -1484,6 +1487,9 @@ static bool check_bcast_qos(struct bt_iso_qos *qos)
if (qos->bcast.mse > 0x1f)
return false;
+ if (!qos->bcast.timeout)
+ qos->bcast.sync_timeout = BT_ISO_SYNC_TIMEOUT;
+
if (qos->bcast.timeout < 0x000a || qos->bcast.timeout > 0x4000)
return false;
@@ -1494,7 +1500,7 @@ static int iso_sock_setsockopt(struct socket *sock, int level, int optname,
sockptr_t optval, unsigned int optlen)
{
struct sock *sk = sock->sk;
- int len, err = 0;
+ int err = 0;
struct bt_iso_qos qos = default_qos;
u32 opt;
@@ -1509,10 +1515,9 @@ static int iso_sock_setsockopt(struct socket *sock, int level, int optname,
break;
}
- if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen);
+ if (err)
break;
- }
if (opt)
set_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags);
@@ -1521,10 +1526,9 @@ static int iso_sock_setsockopt(struct socket *sock, int level, int optname,
break;
case BT_PKT_STATUS:
- if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen);
+ if (err)
break;
- }
if (opt)
set_bit(BT_SK_PKT_STATUS, &bt_sk(sk)->flags);
@@ -1539,17 +1543,9 @@ static int iso_sock_setsockopt(struct socket *sock, int level, int optname,
break;
}
- len = min_t(unsigned int, sizeof(qos), optlen);
-
- if (copy_from_sockptr(&qos, optval, len)) {
- err = -EFAULT;
- break;
- }
-
- if (len == sizeof(qos.ucast) && !check_ucast_qos(&qos)) {
- err = -EINVAL;
+ err = bt_copy_from_sockptr(&qos, sizeof(qos), optval, optlen);
+ if (err)
break;
- }
iso_pi(sk)->qos = qos;
iso_pi(sk)->qos_user_set = true;
@@ -1564,18 +1560,16 @@ static int iso_sock_setsockopt(struct socket *sock, int level, int optname,
}
if (optlen > sizeof(iso_pi(sk)->base)) {
- err = -EOVERFLOW;
+ err = -EINVAL;
break;
}
- len = min_t(unsigned int, sizeof(iso_pi(sk)->base), optlen);
-
- if (copy_from_sockptr(iso_pi(sk)->base, optval, len)) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(iso_pi(sk)->base, optlen, optval,
+ optlen);
+ if (err)
break;
- }
- iso_pi(sk)->base_len = len;
+ iso_pi(sk)->base_len = optlen;
break;
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 467b242d8be071..dc089740879363 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -4054,8 +4054,7 @@ static int l2cap_connect_req(struct l2cap_conn *conn,
return -EPROTO;
hci_dev_lock(hdev);
- if (hci_dev_test_flag(hdev, HCI_MGMT) &&
- !test_and_set_bit(HCI_CONN_MGMT_CONNECTED, &hcon->flags))
+ if (hci_dev_test_flag(hdev, HCI_MGMT))
mgmt_device_connected(hdev, hcon, NULL, 0);
hci_dev_unlock(hdev);
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index 4287aa6cc988e3..e7d810b23082f5 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -727,7 +727,7 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname,
struct sock *sk = sock->sk;
struct l2cap_chan *chan = l2cap_pi(sk)->chan;
struct l2cap_options opts;
- int len, err = 0;
+ int err = 0;
u32 opt;
BT_DBG("sk %p", sk);
@@ -754,11 +754,9 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname,
opts.max_tx = chan->max_tx;
opts.txwin_size = chan->tx_win;
- len = min_t(unsigned int, sizeof(opts), optlen);
- if (copy_from_sockptr(&opts, optval, len)) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&opts, sizeof(opts), optval, optlen);
+ if (err)
break;
- }
if (opts.txwin_size > L2CAP_DEFAULT_EXT_WINDOW) {
err = -EINVAL;
@@ -801,10 +799,9 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname,
break;
case L2CAP_LM:
- if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen);
+ if (err)
break;
- }
if (opt & L2CAP_LM_FIPS) {
err = -EINVAL;
@@ -885,7 +882,7 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
struct bt_security sec;
struct bt_power pwr;
struct l2cap_conn *conn;
- int len, err = 0;
+ int err = 0;
u32 opt;
u16 mtu;
u8 mode;
@@ -911,11 +908,9 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
sec.level = BT_SECURITY_LOW;
- len = min_t(unsigned int, sizeof(sec), optlen);
- if (copy_from_sockptr(&sec, optval, len)) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&sec, sizeof(sec), optval, optlen);
+ if (err)
break;
- }
if (sec.level < BT_SECURITY_LOW ||
sec.level > BT_SECURITY_FIPS) {
@@ -960,10 +955,9 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
break;
}
- if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen);
+ if (err)
break;
- }
if (opt) {
set_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags);
@@ -975,10 +969,9 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
break;
case BT_FLUSHABLE:
- if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen);
+ if (err)
break;
- }
if (opt > BT_FLUSHABLE_ON) {
err = -EINVAL;
@@ -1010,11 +1003,9 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
pwr.force_active = BT_POWER_FORCE_ACTIVE_ON;
- len = min_t(unsigned int, sizeof(pwr), optlen);
- if (copy_from_sockptr(&pwr, optval, len)) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&pwr, sizeof(pwr), optval, optlen);
+ if (err)
break;
- }
if (pwr.force_active)
set_bit(FLAG_FORCE_ACTIVE, &chan->flags);
@@ -1023,10 +1014,9 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
break;
case BT_CHANNEL_POLICY:
- if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen);
+ if (err)
break;
- }
err = -EOPNOTSUPP;
break;
@@ -1055,10 +1045,9 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
break;
}
- if (copy_from_sockptr(&mtu, optval, sizeof(u16))) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&mtu, sizeof(mtu), optval, optlen);
+ if (err)
break;
- }
if (chan->mode == L2CAP_MODE_EXT_FLOWCTL &&
sk->sk_state == BT_CONNECTED)
@@ -1086,10 +1075,9 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
break;
}
- if (copy_from_sockptr(&mode, optval, sizeof(u8))) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&mode, sizeof(mode), optval, optlen);
+ if (err)
break;
- }
BT_DBG("mode %u", mode);
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index b54e8a530f55a1..29aa07e9db9d71 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -629,7 +629,7 @@ static int rfcomm_sock_setsockopt_old(struct socket *sock, int optname,
switch (optname) {
case RFCOMM_LM:
- if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
+ if (bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen)) {
err = -EFAULT;
break;
}
@@ -664,7 +664,6 @@ static int rfcomm_sock_setsockopt(struct socket *sock, int level, int optname,
struct sock *sk = sock->sk;
struct bt_security sec;
int err = 0;
- size_t len;
u32 opt;
BT_DBG("sk %p", sk);
@@ -686,11 +685,9 @@ static int rfcomm_sock_setsockopt(struct socket *sock, int level, int optname,
sec.level = BT_SECURITY_LOW;
- len = min_t(unsigned int, sizeof(sec), optlen);
- if (copy_from_sockptr(&sec, optval, len)) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&sec, sizeof(sec), optval, optlen);
+ if (err)
break;
- }
if (sec.level > BT_SECURITY_HIGH) {
err = -EINVAL;
@@ -706,10 +703,9 @@ static int rfcomm_sock_setsockopt(struct socket *sock, int level, int optname,
break;
}
- if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen);
+ if (err)
break;
- }
if (opt)
set_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags);
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 43daf965a01e4a..368e026f4d15ca 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -824,7 +824,7 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname,
sockptr_t optval, unsigned int optlen)
{
struct sock *sk = sock->sk;
- int len, err = 0;
+ int err = 0;
struct bt_voice voice;
u32 opt;
struct bt_codecs *codecs;
@@ -843,10 +843,9 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname,
break;
}
- if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen);
+ if (err)
break;
- }
if (opt)
set_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags);
@@ -863,11 +862,10 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname,
voice.setting = sco_pi(sk)->setting;
- len = min_t(unsigned int, sizeof(voice), optlen);
- if (copy_from_sockptr(&voice, optval, len)) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&voice, sizeof(voice), optval,
+ optlen);
+ if (err)
break;
- }
/* Explicitly check for these values */
if (voice.setting != BT_VOICE_TRANSPARENT &&
@@ -890,10 +888,9 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname,
break;
case BT_PKT_STATUS:
- if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen);
+ if (err)
break;
- }
if (opt)
set_bit(BT_SK_PKT_STATUS, &bt_sk(sk)->flags);
@@ -934,9 +931,9 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname,
break;
}
- if (copy_from_sockptr(buffer, optval, optlen)) {
+ err = bt_copy_from_sockptr(buffer, optlen, optval, optlen);
+ if (err) {
hci_dev_put(hdev);
- err = -EFAULT;
break;
}
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index f21097e7348278..ceaa5a89b947fc 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -30,7 +30,7 @@ br_netif_receive_skb(struct net *net, struct sock *sk, struct sk_buff *skb)
return netif_receive_skb(skb);
}
-static int br_pass_frame_up(struct sk_buff *skb)
+static int br_pass_frame_up(struct sk_buff *skb, bool promisc)
{
struct net_device *indev, *brdev = BR_INPUT_SKB_CB(skb)->brdev;
struct net_bridge *br = netdev_priv(brdev);
@@ -65,6 +65,8 @@ static int br_pass_frame_up(struct sk_buff *skb)
br_multicast_count(br, NULL, skb, br_multicast_igmp_type(skb),
BR_MCAST_DIR_TX);
+ BR_INPUT_SKB_CB(skb)->promisc = promisc;
+
return NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN,
dev_net(indev), NULL, skb, indev, NULL,
br_netif_receive_skb);
@@ -82,6 +84,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
struct net_bridge_mcast *brmctx;
struct net_bridge_vlan *vlan;
struct net_bridge *br;
+ bool promisc;
u16 vid = 0;
u8 state;
@@ -137,7 +140,9 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
if (p->flags & BR_LEARNING)
br_fdb_update(br, p, eth_hdr(skb)->h_source, vid, 0);
- local_rcv = !!(br->dev->flags & IFF_PROMISC);
+ promisc = !!(br->dev->flags & IFF_PROMISC);
+ local_rcv = promisc;
+
if (is_multicast_ether_addr(eth_hdr(skb)->h_dest)) {
/* by definition the broadcast is also a multicast address */
if (is_broadcast_ether_addr(eth_hdr(skb)->h_dest)) {
@@ -200,7 +205,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
unsigned long now = jiffies;
if (test_bit(BR_FDB_LOCAL, &dst->flags))
- return br_pass_frame_up(skb);
+ return br_pass_frame_up(skb, false);
if (now != dst->used)
dst->used = now;
@@ -213,7 +218,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
}
if (local_rcv)
- return br_pass_frame_up(skb);
+ return br_pass_frame_up(skb, promisc);
out:
return 0;
@@ -386,6 +391,8 @@ static rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
goto forward;
}
+ BR_INPUT_SKB_CB(skb)->promisc = false;
+
/* The else clause should be hit when nf_hook():
* - returns < 0 (drop/error)
* - returns = 0 (stolen/nf_queue)
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 35e10c5a766d55..22e35623c148ac 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -600,11 +600,17 @@ static unsigned int br_nf_local_in(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
+ bool promisc = BR_INPUT_SKB_CB(skb)->promisc;
struct nf_conntrack *nfct = skb_nfct(skb);
const struct nf_ct_hook *ct_hook;
struct nf_conn *ct;
int ret;
+ if (promisc) {
+ nf_reset_ct(skb);
+ return NF_ACCEPT;
+ }
+
if (!nfct || skb->pkt_type == PACKET_HOST)
return NF_ACCEPT;
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 86ea5e6689b5ce..d4bedc87b1d8f1 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -589,6 +589,7 @@ struct br_input_skb_cb {
#endif
u8 proxyarp_replied:1;
u8 src_port_isolated:1;
+ u8 promisc:1;
#ifdef CONFIG_BRIDGE_VLAN_FILTERING
u8 vlan_filtered:1;
#endif
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 99d82676f780ac..cbd0e3586c3f61 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1111,6 +1111,8 @@ static int do_replace(struct net *net, sockptr_t arg, unsigned int len)
struct ebt_table_info *newinfo;
struct ebt_replace tmp;
+ if (len < sizeof(tmp))
+ return -EINVAL;
if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
return -EFAULT;
@@ -1423,6 +1425,8 @@ static int update_counters(struct net *net, sockptr_t arg, unsigned int len)
{
struct ebt_replace hlp;
+ if (len < sizeof(hlp))
+ return -EINVAL;
if (copy_from_sockptr(&hlp, arg, sizeof(hlp)))
return -EFAULT;
@@ -2352,6 +2356,8 @@ static int compat_update_counters(struct net *net, sockptr_t arg,
{
struct compat_ebt_replace hlp;
+ if (len < sizeof(hlp))
+ return -EINVAL;
if (copy_from_sockptr(&hlp, arg, sizeof(hlp)))
return -EFAULT;
diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c
index 6f877e31709bad..c3c51b9a68265b 100644
--- a/net/bridge/netfilter/nf_conntrack_bridge.c
+++ b/net/bridge/netfilter/nf_conntrack_bridge.c
@@ -294,18 +294,24 @@ static unsigned int nf_ct_bridge_pre(void *priv, struct sk_buff *skb,
static unsigned int nf_ct_bridge_in(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
- enum ip_conntrack_info ctinfo;
+ bool promisc = BR_INPUT_SKB_CB(skb)->promisc;
+ struct nf_conntrack *nfct = skb_nfct(skb);
struct nf_conn *ct;
- if (skb->pkt_type == PACKET_HOST)
+ if (promisc) {
+ nf_reset_ct(skb);
+ return NF_ACCEPT;
+ }
+
+ if (!nfct || skb->pkt_type == PACKET_HOST)
return NF_ACCEPT;
/* nf_conntrack_confirm() cannot handle concurrent clones,
* this happens for broad/multicast frames with e.g. macvlan on top
* of the bridge device.
*/
- ct = nf_ct_get(skb, &ctinfo);
- if (!ct || nf_ct_is_confirmed(ct) || nf_ct_is_template(ct))
+ ct = container_of(nfct, struct nf_conn, ct_general);
+ if (nf_ct_is_confirmed(ct) || nf_ct_is_template(ct))
return NF_ACCEPT;
/* let inet prerouting call conntrack again */
diff --git a/net/core/dev.c b/net/core/dev.c
index 9a67003e49db87..331848eca7d310 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -429,7 +429,7 @@ EXPORT_PER_CPU_SYMBOL(softnet_data);
* PP consumers must pay attention to run APIs in the appropriate context
* (e.g. NAPI context).
*/
-static DEFINE_PER_CPU_ALIGNED(struct page_pool *, system_page_pool);
+static DEFINE_PER_CPU(struct page_pool *, system_page_pool);
#ifdef CONFIG_LOCKDEP
/*
@@ -3775,6 +3775,10 @@ no_lock_out:
return rc;
}
+ if (unlikely(READ_ONCE(q->owner) == smp_processor_id())) {
+ kfree_skb_reason(skb, SKB_DROP_REASON_TC_RECLASSIFY_LOOP);
+ return NET_XMIT_DROP;
+ }
/*
* Heuristic to force contended enqueues to serialize on a
* separate lock before trying to get qdisc main lock.
@@ -3814,7 +3818,9 @@ no_lock_out:
qdisc_run_end(q);
rc = NET_XMIT_SUCCESS;
} else {
+ WRITE_ONCE(q->owner, smp_processor_id());
rc = dev_qdisc_enqueue(skb, q, &to_free, txq);
+ WRITE_ONCE(q->owner, -1);
if (qdisc_run_begin(q)) {
if (unlikely(contended)) {
spin_unlock(&q->busylock);
diff --git a/net/core/gro.c b/net/core/gro.c
index ee30d4f0c03876..83f35d99a682c2 100644
--- a/net/core/gro.c
+++ b/net/core/gro.c
@@ -192,8 +192,9 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
}
merge:
- /* sk owenrship - if any - completely transferred to the aggregated packet */
+ /* sk ownership - if any - completely transferred to the aggregated packet */
skb->destructor = NULL;
+ skb->sk = NULL;
delta_truesize = skb->truesize;
if (offset > headlen) {
unsigned int eat = offset - headlen;
diff --git a/net/core/sock.c b/net/core/sock.c
index 43bf3818c19e82..0963689a59506a 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -482,7 +482,7 @@ int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
unsigned long flags;
struct sk_buff_head *list = &sk->sk_receive_queue;
- if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) {
+ if (atomic_read(&sk->sk_rmem_alloc) >= READ_ONCE(sk->sk_rcvbuf)) {
atomic_inc(&sk->sk_drops);
trace_sock_rcvqueue_full(sk, skb);
return -ENOMEM;
@@ -552,7 +552,7 @@ int __sk_receive_skb(struct sock *sk, struct sk_buff *skb,
skb->dev = NULL;
- if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) {
+ if (sk_rcvqueues_full(sk, READ_ONCE(sk->sk_rcvbuf))) {
atomic_inc(&sk->sk_drops);
goto discard_and_relse;
}
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 27d733c0f65e16..8598466a380578 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -411,6 +411,9 @@ static int __sock_map_delete(struct bpf_stab *stab, struct sock *sk_test,
struct sock *sk;
int err = 0;
+ if (irqs_disabled())
+ return -EOPNOTSUPP; /* locks here are hardirq-unsafe */
+
spin_lock_bh(&stab->lock);
sk = *psk;
if (!sk_test || sk_test == sk)
@@ -933,6 +936,9 @@ static long sock_hash_delete_elem(struct bpf_map *map, void *key)
struct bpf_shtab_elem *elem;
int ret = -ENOENT;
+ if (irqs_disabled())
+ return -EOPNOTSUPP; /* locks here are hardirq-unsafe */
+
hash = sock_hash_bucket_hash(key, key_size);
bucket = sock_hash_select_bucket(htab, hash);
diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index c98b5b71ad7c32..e9d45133d6412e 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -132,30 +132,29 @@ static int hsr_dev_open(struct net_device *dev)
{
struct hsr_priv *hsr;
struct hsr_port *port;
- char designation;
+ const char *designation = NULL;
hsr = netdev_priv(dev);
- designation = '\0';
hsr_for_each_port(hsr, port) {
if (port->type == HSR_PT_MASTER)
continue;
switch (port->type) {
case HSR_PT_SLAVE_A:
- designation = 'A';
+ designation = "Slave A";
break;
case HSR_PT_SLAVE_B:
- designation = 'B';
+ designation = "Slave B";
break;
default:
- designation = '?';
+ designation = "Unknown";
}
if (!is_slave_up(port->dev))
- netdev_warn(dev, "Slave %c (%s) is not up; please bring it up to get a fully working HSR network\n",
+ netdev_warn(dev, "%s (%s) is not up; please bring it up to get a fully working HSR network\n",
designation, port->dev->name);
}
- if (designation == '\0')
+ if (!designation)
netdev_warn(dev, "No slave devices configured\n");
return 0;
diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c
index e5742f2a2d522a..1b6457f357bdb2 100644
--- a/net/hsr/hsr_slave.c
+++ b/net/hsr/hsr_slave.c
@@ -220,7 +220,8 @@ void hsr_del_port(struct hsr_port *port)
netdev_update_features(master->dev);
dev_set_mtu(master->dev, hsr_get_max_mtu(hsr));
netdev_rx_handler_unregister(port->dev);
- dev_set_promiscuity(port->dev, -1);
+ if (!port->hsr->fwd_offloaded)
+ dev_set_promiscuity(port->dev, -1);
netdev_upper_dev_unlink(port->dev, master->dev);
}
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 48741352a88a72..c484b1c0fc00a7 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1050,6 +1050,11 @@ next:
e++;
}
}
+
+ /* Don't let NLM_DONE coalesce into a message, even if it could.
+ * Some user space expects NLM_DONE in a separate recv().
+ */
+ err = skb->len;
out:
cb->args[1] = e;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 7d8090f109ef4e..3b38610958ee4b 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -203,8 +203,15 @@ static bool __inet_bhash2_conflict(const struct sock *sk, struct sock *sk2,
kuid_t sk_uid, bool relax,
bool reuseport_cb_ok, bool reuseport_ok)
{
- if (sk->sk_family == AF_INET && ipv6_only_sock(sk2))
- return false;
+ if (ipv6_only_sock(sk2)) {
+ if (sk->sk_family == AF_INET)
+ return false;
+
+#if IS_ENABLED(CONFIG_IPV6)
+ if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
+ return false;
+#endif
+ }
return inet_bind_conflict(sk, sk2, sk_uid, relax,
reuseport_cb_ok, reuseport_ok);
@@ -287,6 +294,7 @@ static bool inet_bhash2_addr_any_conflict(const struct sock *sk, int port, int l
struct sock_reuseport *reuseport_cb;
struct inet_bind_hashbucket *head2;
struct inet_bind2_bucket *tb2;
+ bool conflict = false;
bool reuseport_cb_ok;
rcu_read_lock();
@@ -299,18 +307,20 @@ static bool inet_bhash2_addr_any_conflict(const struct sock *sk, int port, int l
spin_lock(&head2->lock);
- inet_bind_bucket_for_each(tb2, &head2->chain)
- if (inet_bind2_bucket_match_addr_any(tb2, net, port, l3mdev, sk))
- break;
+ inet_bind_bucket_for_each(tb2, &head2->chain) {
+ if (!inet_bind2_bucket_match_addr_any(tb2, net, port, l3mdev, sk))
+ continue;
- if (tb2 && inet_bhash2_conflict(sk, tb2, uid, relax, reuseport_cb_ok,
- reuseport_ok)) {
- spin_unlock(&head2->lock);
- return true;
+ if (!inet_bhash2_conflict(sk, tb2, uid, relax, reuseport_cb_ok, reuseport_ok))
+ continue;
+
+ conflict = true;
+ break;
}
spin_unlock(&head2->lock);
- return false;
+
+ return conflict;
}
/*
@@ -771,6 +781,20 @@ void inet_csk_clear_xmit_timers(struct sock *sk)
}
EXPORT_SYMBOL(inet_csk_clear_xmit_timers);
+void inet_csk_clear_xmit_timers_sync(struct sock *sk)
+{
+ struct inet_connection_sock *icsk = inet_csk(sk);
+
+ /* ongoing timer handlers need to acquire socket lock. */
+ sock_not_owned_by_me(sk);
+
+ icsk->icsk_pending = icsk->icsk_ack.pending = 0;
+
+ sk_stop_timer_sync(sk, &icsk->icsk_retransmit_timer);
+ sk_stop_timer_sync(sk, &icsk->icsk_delack_timer);
+ sk_stop_timer_sync(sk, &sk->sk_timer);
+}
+
void inet_csk_delete_keepalive_timer(struct sock *sk)
{
sk_stop_timer(sk, &sk->sk_timer);
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 7072fc0783ef56..c88c9034d63004 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -24,6 +24,8 @@
#include <net/ip.h>
#include <net/ipv6.h>
+#include "../core/sock_destructor.h"
+
/* Use skb->cb to track consecutive/adjacent fragments coming at
* the end of the queue. Nodes in the rb-tree queue will
* contain "runs" of one or more adjacent fragments.
@@ -39,6 +41,7 @@ struct ipfrag_skb_cb {
};
struct sk_buff *next_frag;
int frag_run_len;
+ int ip_defrag_offset;
};
#define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb))
@@ -396,12 +399,12 @@ int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb,
*/
if (!last)
fragrun_create(q, skb); /* First fragment. */
- else if (last->ip_defrag_offset + last->len < end) {
+ else if (FRAG_CB(last)->ip_defrag_offset + last->len < end) {
/* This is the common case: skb goes to the end. */
/* Detect and discard overlaps. */
- if (offset < last->ip_defrag_offset + last->len)
+ if (offset < FRAG_CB(last)->ip_defrag_offset + last->len)
return IPFRAG_OVERLAP;
- if (offset == last->ip_defrag_offset + last->len)
+ if (offset == FRAG_CB(last)->ip_defrag_offset + last->len)
fragrun_append_to_last(q, skb);
else
fragrun_create(q, skb);
@@ -418,13 +421,13 @@ int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb,
parent = *rbn;
curr = rb_to_skb(parent);
- curr_run_end = curr->ip_defrag_offset +
+ curr_run_end = FRAG_CB(curr)->ip_defrag_offset +
FRAG_CB(curr)->frag_run_len;
- if (end <= curr->ip_defrag_offset)
+ if (end <= FRAG_CB(curr)->ip_defrag_offset)
rbn = &parent->rb_left;
else if (offset >= curr_run_end)
rbn = &parent->rb_right;
- else if (offset >= curr->ip_defrag_offset &&
+ else if (offset >= FRAG_CB(curr)->ip_defrag_offset &&
end <= curr_run_end)
return IPFRAG_DUP;
else
@@ -438,7 +441,7 @@ int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb,
rb_insert_color(&skb->rbnode, &q->rb_fragments);
}
- skb->ip_defrag_offset = offset;
+ FRAG_CB(skb)->ip_defrag_offset = offset;
return IPFRAG_OK;
}
@@ -448,13 +451,28 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb,
struct sk_buff *parent)
{
struct sk_buff *fp, *head = skb_rb_first(&q->rb_fragments);
- struct sk_buff **nextp;
+ void (*destructor)(struct sk_buff *);
+ unsigned int orig_truesize = 0;
+ struct sk_buff **nextp = NULL;
+ struct sock *sk = skb->sk;
int delta;
+ if (sk && is_skb_wmem(skb)) {
+ /* TX: skb->sk might have been passed as argument to
+ * dst->output and must remain valid until tx completes.
+ *
+ * Move sk to reassembled skb and fix up wmem accounting.
+ */
+ orig_truesize = skb->truesize;
+ destructor = skb->destructor;
+ }
+
if (head != skb) {
fp = skb_clone(skb, GFP_ATOMIC);
- if (!fp)
- return NULL;
+ if (!fp) {
+ head = skb;
+ goto out_restore_sk;
+ }
FRAG_CB(fp)->next_frag = FRAG_CB(skb)->next_frag;
if (RB_EMPTY_NODE(&skb->rbnode))
FRAG_CB(parent)->next_frag = fp;
@@ -463,6 +481,12 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb,
&q->rb_fragments);
if (q->fragments_tail == skb)
q->fragments_tail = fp;
+
+ if (orig_truesize) {
+ /* prevent skb_morph from releasing sk */
+ skb->sk = NULL;
+ skb->destructor = NULL;
+ }
skb_morph(skb, head);
FRAG_CB(skb)->next_frag = FRAG_CB(head)->next_frag;
rb_replace_node(&head->rbnode, &skb->rbnode,
@@ -470,13 +494,13 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb,
consume_skb(head);
head = skb;
}
- WARN_ON(head->ip_defrag_offset != 0);
+ WARN_ON(FRAG_CB(head)->ip_defrag_offset != 0);
delta = -head->truesize;
/* Head of list must not be cloned. */
if (skb_unclone(head, GFP_ATOMIC))
- return NULL;
+ goto out_restore_sk;
delta += head->truesize;
if (delta)
@@ -492,7 +516,7 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb,
clone = alloc_skb(0, GFP_ATOMIC);
if (!clone)
- return NULL;
+ goto out_restore_sk;
skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
skb_frag_list_init(head);
for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
@@ -509,6 +533,21 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb,
nextp = &skb_shinfo(head)->frag_list;
}
+out_restore_sk:
+ if (orig_truesize) {
+ int ts_delta = head->truesize - orig_truesize;
+
+ /* if this reassembled skb is fragmented later,
+ * fraglist skbs will get skb->sk assigned from head->sk,
+ * and each frag skb will be released via sock_wfree.
+ *
+ * Update sk_wmem_alloc.
+ */
+ head->sk = sk;
+ head->destructor = destructor;
+ refcount_add(ts_delta, &sk->sk_wmem_alloc);
+ }
+
return nextp;
}
EXPORT_SYMBOL(inet_frag_reasm_prepare);
@@ -516,6 +555,8 @@ EXPORT_SYMBOL(inet_frag_reasm_prepare);
void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head,
void *reasm_data, bool try_coalesce)
{
+ struct sock *sk = is_skb_wmem(head) ? head->sk : NULL;
+ const unsigned int head_truesize = head->truesize;
struct sk_buff **nextp = reasm_data;
struct rb_node *rbn;
struct sk_buff *fp;
@@ -579,6 +620,9 @@ void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head,
head->prev = NULL;
head->tstamp = q->stamp;
head->mono_delivery_time = q->mono_delivery_time;
+
+ if (sk)
+ refcount_add(sum_truesize - head_truesize, &sk->sk_wmem_alloc);
}
EXPORT_SYMBOL(inet_frag_reasm_finish);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index a4941f53b52372..fb947d1613fe2b 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -384,6 +384,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
}
skb_dst_drop(skb);
+ skb_orphan(skb);
return -EINPROGRESS;
insert_error:
@@ -487,7 +488,6 @@ int ip_defrag(struct net *net, struct sk_buff *skb, u32 user)
struct ipq *qp;
__IP_INC_STATS(net, IPSTATS_MIB_REASMREQDS);
- skb_orphan(skb);
/* Lookup (or create) queue header */
qp = ip_find(net, ip_hdr(skb), user, vif);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 7b16c211b90447..57ddcd8c62f67e 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -280,8 +280,13 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
tpi->flags | TUNNEL_NO_KEY,
iph->saddr, iph->daddr, 0);
} else {
+ if (unlikely(!pskb_may_pull(skb,
+ gre_hdr_len + sizeof(*ershdr))))
+ return PACKET_REJECT;
+
ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len);
ver = ershdr->ver;
+ iph = ip_hdr(skb);
tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
tpi->flags | TUNNEL_KEY,
iph->saddr, iph->daddr, tpi->key);
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 8f6e950163a792..1b991b889506a9 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -329,6 +329,7 @@ config NFT_COMPAT_ARP
config IP_NF_ARPFILTER
tristate "arptables-legacy packet filtering support"
select IP_NF_ARPTABLES
+ select NETFILTER_FAMILY_ARP
depends on NETFILTER_XTABLES
help
ARP packet filtering defines a table `filter', which has a series of
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 2407066b0fec11..14365b20f1c5c0 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -956,6 +956,8 @@ static int do_replace(struct net *net, sockptr_t arg, unsigned int len)
void *loc_cpu_entry;
struct arpt_entry *iter;
+ if (len < sizeof(tmp))
+ return -EINVAL;
if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
return -EFAULT;
@@ -964,6 +966,8 @@ static int do_replace(struct net *net, sockptr_t arg, unsigned int len)
return -ENOMEM;
if (tmp.num_counters == 0)
return -EINVAL;
+ if ((u64)len < (u64)tmp.size + sizeof(tmp))
+ return -EINVAL;
tmp.name[sizeof(tmp.name)-1] = 0;
@@ -1254,6 +1258,8 @@ static int compat_do_replace(struct net *net, sockptr_t arg, unsigned int len)
void *loc_cpu_entry;
struct arpt_entry *iter;
+ if (len < sizeof(tmp))
+ return -EINVAL;
if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
return -EFAULT;
@@ -1262,6 +1268,8 @@ static int compat_do_replace(struct net *net, sockptr_t arg, unsigned int len)
return -ENOMEM;
if (tmp.num_counters == 0)
return -EINVAL;
+ if ((u64)len < (u64)tmp.size + sizeof(tmp))
+ return -EINVAL;
tmp.name[sizeof(tmp.name)-1] = 0;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 7da1df4997d057..fe89a056eb06c4 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1108,6 +1108,8 @@ do_replace(struct net *net, sockptr_t arg, unsigned int len)
void *loc_cpu_entry;
struct ipt_entry *iter;
+ if (len < sizeof(tmp))
+ return -EINVAL;
if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
return -EFAULT;
@@ -1116,6 +1118,8 @@ do_replace(struct net *net, sockptr_t arg, unsigned int len)
return -ENOMEM;
if (tmp.num_counters == 0)
return -EINVAL;
+ if ((u64)len < (u64)tmp.size + sizeof(tmp))
+ return -EINVAL;
tmp.name[sizeof(tmp.name)-1] = 0;
@@ -1492,6 +1496,8 @@ compat_do_replace(struct net *net, sockptr_t arg, unsigned int len)
void *loc_cpu_entry;
struct ipt_entry *iter;
+ if (len < sizeof(tmp))
+ return -EINVAL;
if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
return -EFAULT;
@@ -1500,6 +1506,8 @@ compat_do_replace(struct net *net, sockptr_t arg, unsigned int len)
return -ENOMEM;
if (tmp.num_counters == 0)
return -EINVAL;
+ if ((u64)len < (u64)tmp.size + sizeof(tmp))
+ return -EINVAL;
tmp.name[sizeof(tmp.name)-1] = 0;
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index 74928a9d1aa48b..535856b0f0edce 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -768,8 +768,10 @@ static int nh_grp_hw_stats_update(struct nexthop *nh, bool *hw_stats_used)
struct net *net = nh->net;
int err;
- if (nexthop_notifiers_is_empty(net))
+ if (nexthop_notifiers_is_empty(net)) {
+ *hw_stats_used = false;
return 0;
+ }
err = nh_notifier_grp_hw_stats_init(&info, nh);
if (err)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index c8f76f56dc1653..d36ace160d426f 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -926,13 +926,11 @@ void ip_rt_send_redirect(struct sk_buff *skb)
icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, gw);
peer->rate_last = jiffies;
++peer->n_redirects;
-#ifdef CONFIG_IP_ROUTE_VERBOSE
- if (log_martians &&
+ if (IS_ENABLED(CONFIG_IP_ROUTE_VERBOSE) && log_martians &&
peer->n_redirects == ip_rt_redirect_number)
net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n",
&ip_hdr(skb)->saddr, inet_iif(skb),
&ip_hdr(skb)->daddr, &gw);
-#endif
}
out_put_peer:
inet_putpeer(peer);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index d20b62d521712a..e767721b3a588b 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2931,6 +2931,8 @@ void tcp_close(struct sock *sk, long timeout)
lock_sock(sk);
__tcp_close(sk, timeout);
release_sock(sk);
+ if (!sk->sk_net_refcnt)
+ inet_csk_clear_xmit_timers_sync(sk);
sock_put(sk);
}
EXPORT_SYMBOL(tcp_close);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 661d0e0d273f61..c02bf011d4a6f4 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -582,6 +582,13 @@ static inline bool __udp_is_mcast_sock(struct net *net, const struct sock *sk,
}
DEFINE_STATIC_KEY_FALSE(udp_encap_needed_key);
+EXPORT_SYMBOL(udp_encap_needed_key);
+
+#if IS_ENABLED(CONFIG_IPV6)
+DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key);
+EXPORT_SYMBOL(udpv6_encap_needed_key);
+#endif
+
void udp_encap_enable(void)
{
static_branch_inc(&udp_encap_needed_key);
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index b9880743765c6c..3498dd1d0694dc 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -449,8 +449,9 @@ static int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb)
NAPI_GRO_CB(p)->count++;
p->data_len += skb->len;
- /* sk owenrship - if any - completely transferred to the aggregated packet */
+ /* sk ownership - if any - completely transferred to the aggregated packet */
skb->destructor = NULL;
+ skb->sk = NULL;
p->truesize += skb->truesize;
p->len += skb->len;
@@ -551,11 +552,19 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
unsigned int off = skb_gro_offset(skb);
int flush = 1;
- /* we can do L4 aggregation only if the packet can't land in a tunnel
- * otherwise we could corrupt the inner stream
+ /* We can do L4 aggregation only if the packet can't land in a tunnel
+ * otherwise we could corrupt the inner stream. Detecting such packets
+ * cannot be foolproof and the aggregation might still happen in some
+ * cases. Such packets should be caught in udp_unexpected_gso later.
*/
NAPI_GRO_CB(skb)->is_flist = 0;
if (!sk || !udp_sk(sk)->gro_receive) {
+ /* If the packet was locally encapsulated in a UDP tunnel that
+ * wasn't detected above, do not GRO.
+ */
+ if (skb->encapsulation)
+ goto out;
+
if (skb->dev->features & NETIF_F_GRO_FRAGLIST)
NAPI_GRO_CB(skb)->is_flist = sk ? !udp_test_bit(GRO_ENABLED, sk) : 1;
@@ -719,13 +728,7 @@ INDIRECT_CALLABLE_SCOPE int udp4_gro_complete(struct sk_buff *skb, int nhoff)
skb_shinfo(skb)->gso_type |= (SKB_GSO_FRAGLIST|SKB_GSO_UDP_L4);
skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
- if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
- if (skb->csum_level < SKB_MAX_CSUM_LEVEL)
- skb->csum_level++;
- } else {
- skb->ip_summed = CHECKSUM_UNNECESSARY;
- skb->csum_level = 0;
- }
+ __skb_incr_checksum_unnecessary(skb);
return 0;
}
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 247bd4d8ee45a6..779aa6ecdd499b 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2091,9 +2091,10 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add
if (ipv6_addr_equal(&ifp->addr, addr)) {
if (!dev || ifp->idev->dev == dev ||
!(ifp->scope&(IFA_LINK|IFA_HOST) || strict)) {
- result = ifp;
- in6_ifa_hold(ifp);
- break;
+ if (in6_ifa_hold_safe(ifp)) {
+ result = ifp;
+ break;
+ }
}
}
}
@@ -5416,10 +5417,11 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
err = 0;
if (fillargs.ifindex) {
- err = -ENODEV;
dev = dev_get_by_index_rcu(tgt_net, fillargs.ifindex);
- if (!dev)
+ if (!dev) {
+ err = -ENODEV;
goto done;
+ }
idev = __in6_dev_get(dev);
if (idev)
err = in6_dump_addrs(idev, skb, cb,
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 5c558dc1c68386..c1f62352a48145 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -651,19 +651,19 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
if (!w) {
/* New dump:
*
- * 1. hook callback destructor.
- */
- cb->args[3] = (long)cb->done;
- cb->done = fib6_dump_done;
-
- /*
- * 2. allocate and initialize walker.
+ * 1. allocate and initialize walker.
*/
w = kzalloc(sizeof(*w), GFP_ATOMIC);
if (!w)
return -ENOMEM;
w->func = fib6_dump_node;
cb->args[2] = (long)w;
+
+ /* 2. hook callback destructor.
+ */
+ cb->args[3] = (long)cb->done;
+ cb->done = fib6_dump_done;
+
}
arg.skb = skb;
@@ -1385,7 +1385,10 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt,
struct nl_info *info, struct netlink_ext_ack *extack)
{
struct fib6_table *table = rt->fib6_table;
- struct fib6_node *fn, *pn = NULL;
+ struct fib6_node *fn;
+#ifdef CONFIG_IPV6_SUBTREES
+ struct fib6_node *pn = NULL;
+#endif
int err = -ENOMEM;
int allow_create = 1;
int replace_required = 0;
@@ -1409,9 +1412,9 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt,
goto out;
}
+#ifdef CONFIG_IPV6_SUBTREES
pn = fn;
-#ifdef CONFIG_IPV6_SUBTREES
if (rt->fib6_src.plen) {
struct fib6_node *sn;
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index ca7e77e842835a..c89aef524df9a2 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -528,6 +528,9 @@ static int ip6erspan_rcv(struct sk_buff *skb,
struct ip6_tnl *tunnel;
u8 ver;
+ if (unlikely(!pskb_may_pull(skb, sizeof(*ershdr))))
+ return PACKET_REJECT;
+
ipv6h = ipv6_hdr(skb);
ershdr = (struct erspan_base_hdr *)skb->data;
ver = ershdr->ver;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index fd9f049d6d41e7..131f7bb2110d3a 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1125,6 +1125,8 @@ do_replace(struct net *net, sockptr_t arg, unsigned int len)
void *loc_cpu_entry;
struct ip6t_entry *iter;
+ if (len < sizeof(tmp))
+ return -EINVAL;
if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
return -EFAULT;
@@ -1133,6 +1135,8 @@ do_replace(struct net *net, sockptr_t arg, unsigned int len)
return -ENOMEM;
if (tmp.num_counters == 0)
return -EINVAL;
+ if ((u64)len < (u64)tmp.size + sizeof(tmp))
+ return -EINVAL;
tmp.name[sizeof(tmp.name)-1] = 0;
@@ -1501,6 +1505,8 @@ compat_do_replace(struct net *net, sockptr_t arg, unsigned int len)
void *loc_cpu_entry;
struct ip6t_entry *iter;
+ if (len < sizeof(tmp))
+ return -EINVAL;
if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
return -EFAULT;
@@ -1509,6 +1515,8 @@ compat_do_replace(struct net *net, sockptr_t arg, unsigned int len)
return -ENOMEM;
if (tmp.num_counters == 0)
return -EINVAL;
+ if ((u64)len < (u64)tmp.size + sizeof(tmp))
+ return -EINVAL;
tmp.name[sizeof(tmp.name)-1] = 0;
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 1a51a44571c372..d0dcbaca19943a 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -294,6 +294,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
}
skb_dst_drop(skb);
+ skb_orphan(skb);
return -EINPROGRESS;
insert_error:
@@ -469,7 +470,6 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
hdr = ipv6_hdr(skb);
fhdr = (struct frag_hdr *)skb_transport_header(skb);
- skb_orphan(skb);
fq = fq_find(net, fhdr->identification, user, hdr,
skb->dev ? skb->dev->ifindex : 0);
if (fq == NULL) {
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 7c1e6469d091d2..8b1dd7f512491d 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -447,7 +447,7 @@ csum_copy_err:
goto try_again;
}
-DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key);
+DECLARE_STATIC_KEY_FALSE(udpv6_encap_needed_key);
void udpv6_encap_enable(void)
{
static_branch_inc(&udpv6_encap_needed_key);
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 312bcaeea96fb7..bbd347de00b450 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -174,13 +174,7 @@ INDIRECT_CALLABLE_SCOPE int udp6_gro_complete(struct sk_buff *skb, int nhoff)
skb_shinfo(skb)->gso_type |= (SKB_GSO_FRAGLIST|SKB_GSO_UDP_L4);
skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
- if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
- if (skb->csum_level < SKB_MAX_CSUM_LEVEL)
- skb->csum_level++;
- } else {
- skb->ip_summed = CHECKSUM_UNNECESSARY;
- skb->csum_level = 0;
- }
+ __skb_incr_checksum_unnecessary(skb);
return 0;
}
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index f03452dc716d5d..f67c1d0218121d 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -2199,15 +2199,14 @@ static int ieee80211_change_station(struct wiphy *wiphy,
}
if (sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN &&
- sta->sdata->u.vlan.sta) {
- ieee80211_clear_fast_rx(sta);
+ sta->sdata->u.vlan.sta)
RCU_INIT_POINTER(sta->sdata->u.vlan.sta, NULL);
- }
if (test_sta_flag(sta, WLAN_STA_AUTHORIZED))
ieee80211_vif_dec_num_mcast(sta->sdata);
sta->sdata = vlansdata;
+ ieee80211_check_fast_rx(sta);
ieee80211_check_fast_xmit(sta);
if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) {
diff --git a/net/mac80211/debug.h b/net/mac80211/debug.h
index 49da401c53408b..35a8ba25fa57fd 100644
--- a/net/mac80211/debug.h
+++ b/net/mac80211/debug.h
@@ -158,7 +158,7 @@ do { \
_sdata_dbg(print, sdata, "[link %d] " fmt, \
link_id, ##__VA_ARGS__); \
else \
- _sdata_dbg(1, sdata, fmt, ##__VA_ARGS__); \
+ _sdata_dbg(print, sdata, fmt, ##__VA_ARGS__); \
} while (0)
#define link_dbg(link, fmt, ...) \
_link_id_dbg(1, (link)->sdata, (link)->link_id, \
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index b6fead612b66b5..bd507d6b65e3f6 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -131,7 +131,7 @@ struct ieee80211_bss {
};
/**
- * enum ieee80211_corrupt_data_flags - BSS data corruption flags
+ * enum ieee80211_bss_corrupt_data_flags - BSS data corruption flags
* @IEEE80211_BSS_CORRUPT_BEACON: last beacon frame received was corrupted
* @IEEE80211_BSS_CORRUPT_PROBE_RESP: last probe response received was corrupted
*
@@ -144,7 +144,7 @@ enum ieee80211_bss_corrupt_data_flags {
};
/**
- * enum ieee80211_valid_data_flags - BSS valid data flags
+ * enum ieee80211_bss_valid_data_flags - BSS valid data flags
* @IEEE80211_BSS_VALID_WMM: WMM/UAPSD data was gathered from non-corrupt IE
* @IEEE80211_BSS_VALID_RATES: Supported rates were gathered from non-corrupt IE
* @IEEE80211_BSS_VALID_ERP: ERP flag was gathered from non-corrupt IE
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 47a2cba8313f04..96b70006b7fc0b 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -5874,6 +5874,15 @@ static int ieee80211_ttlm_set_links(struct ieee80211_sub_if_data *sdata,
}
if (sdata->vif.active_links != active_links) {
+ /* usable links are affected when active_links are changed,
+ * so notify the driver about the status change
+ */
+ changed |= BSS_CHANGED_MLD_VALID_LINKS;
+ active_links &= sdata->vif.active_links;
+ if (!active_links)
+ active_links =
+ BIT(__ffs(sdata->vif.valid_links &
+ ~dormant_links));
ret = ieee80211_set_active_links(&sdata->vif, active_links);
if (ret) {
sdata_info(sdata, "Failed to set TTLM active links\n");
@@ -5888,7 +5897,6 @@ static int ieee80211_ttlm_set_links(struct ieee80211_sub_if_data *sdata,
goto out;
}
- changed |= BSS_CHANGED_MLD_VALID_LINKS;
sdata->vif.suspended_links = suspended_links;
if (sdata->vif.suspended_links)
changed |= BSS_CHANGED_MLD_TTLM;
@@ -7652,7 +7660,7 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata,
sdata_info(sdata,
"failed to insert STA entry for the AP (error %d)\n",
err);
- goto out_err;
+ goto out_release_chan;
}
} else
WARN_ON_ONCE(!ether_addr_equal(link->u.mgd.bssid, cbss->bssid));
@@ -7663,8 +7671,9 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata,
return 0;
+out_release_chan:
+ ieee80211_link_release_channel(link);
out_err:
- ieee80211_link_release_channel(&sdata->deflink);
ieee80211_vif_set_links(sdata, 0, 0);
return err;
}
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 3a1967bc7bad63..7e74b812e366ae 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -3937,8 +3937,6 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
mptcp_set_state(newsk, TCP_CLOSE);
}
} else {
- MPTCP_INC_STATS(sock_net(ssk),
- MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK);
tcpfallback:
newsk->sk_kern_sock = kern;
lock_sock(newsk);
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index dcd1c76d2a3ba1..73fdf423de44ee 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -1493,6 +1493,10 @@ int mptcp_set_rcvlowat(struct sock *sk, int val)
struct mptcp_subflow_context *subflow;
int space, cap;
+ /* bpf can land here with a wrong sk type */
+ if (sk->sk_protocol == IPPROTO_TCP)
+ return -EINVAL;
+
if (sk->sk_userlocks & SOCK_RCVBUF_LOCK)
cap = sk->sk_rcvbuf >> 1;
else
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 1626dd20c68f1f..6042a47da61be8 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -905,6 +905,8 @@ dispose_child:
return child;
fallback:
+ if (fallback)
+ SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK);
mptcp_subflow_drop_ctx(child);
return child;
}
diff --git a/net/netfilter/nf_flow_table_inet.c b/net/netfilter/nf_flow_table_inet.c
index 9505f9d188ff25..6eef15648b7b08 100644
--- a/net/netfilter/nf_flow_table_inet.c
+++ b/net/netfilter/nf_flow_table_inet.c
@@ -21,7 +21,8 @@ nf_flow_offload_inet_hook(void *priv, struct sk_buff *skb,
proto = veth->h_vlan_encapsulated_proto;
break;
case htons(ETH_P_PPP_SES):
- proto = nf_flow_pppoe_proto(skb);
+ if (!nf_flow_pppoe_proto(skb, &proto))
+ return NF_ACCEPT;
break;
default:
proto = skb->protocol;
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index e45fade7640961..5383bed3d3e002 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -157,7 +157,7 @@ static void nf_flow_tuple_encap(struct sk_buff *skb,
tuple->encap[i].proto = skb->protocol;
break;
case htons(ETH_P_PPP_SES):
- phdr = (struct pppoe_hdr *)skb_mac_header(skb);
+ phdr = (struct pppoe_hdr *)skb_network_header(skb);
tuple->encap[i].id = ntohs(phdr->sid);
tuple->encap[i].proto = skb->protocol;
break;
@@ -273,10 +273,11 @@ static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb,
return NF_STOLEN;
}
-static bool nf_flow_skb_encap_protocol(const struct sk_buff *skb, __be16 proto,
+static bool nf_flow_skb_encap_protocol(struct sk_buff *skb, __be16 proto,
u32 *offset)
{
struct vlan_ethhdr *veth;
+ __be16 inner_proto;
switch (skb->protocol) {
case htons(ETH_P_8021Q):
@@ -287,7 +288,8 @@ static bool nf_flow_skb_encap_protocol(const struct sk_buff *skb, __be16 proto,
}
break;
case htons(ETH_P_PPP_SES):
- if (nf_flow_pppoe_proto(skb) == proto) {
+ if (nf_flow_pppoe_proto(skb, &inner_proto) &&
+ inner_proto == proto) {
*offset += PPPOE_SES_HLEN;
return true;
}
@@ -316,7 +318,7 @@ static void nf_flow_encap_pop(struct sk_buff *skb,
skb_reset_network_header(skb);
break;
case htons(ETH_P_PPP_SES):
- skb->protocol = nf_flow_pppoe_proto(skb);
+ skb->protocol = __nf_flow_pppoe_proto(skb);
skb_pull(skb, PPPOE_SES_HLEN);
skb_reset_network_header(skb);
break;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 5fa3d3540c93c0..167074283ea91d 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -594,6 +594,12 @@ static int nft_mapelem_deactivate(const struct nft_ctx *ctx,
const struct nft_set_iter *iter,
struct nft_elem_priv *elem_priv)
{
+ struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv);
+
+ if (!nft_set_elem_active(ext, iter->genmask))
+ return 0;
+
+ nft_set_elem_change_active(ctx->net, set, ext);
nft_setelem_data_deactivate(ctx->net, set, elem_priv);
return 0;
@@ -617,6 +623,7 @@ static void nft_map_catchall_deactivate(const struct nft_ctx *ctx,
if (!nft_set_elem_active(ext, genmask))
continue;
+ nft_set_elem_change_active(ctx->net, set, ext);
nft_setelem_data_deactivate(ctx->net, set, catchall->elem);
break;
}
@@ -626,6 +633,7 @@ static void nft_map_deactivate(const struct nft_ctx *ctx, struct nft_set *set)
{
struct nft_set_iter iter = {
.genmask = nft_genmask_next(ctx->net),
+ .type = NFT_ITER_UPDATE,
.fn = nft_mapelem_deactivate,
};
@@ -1200,6 +1208,26 @@ static void nf_tables_table_disable(struct net *net, struct nft_table *table)
__NFT_TABLE_F_WAS_AWAKEN | \
__NFT_TABLE_F_WAS_ORPHAN)
+static bool nft_table_pending_update(const struct nft_ctx *ctx)
+{
+ struct nftables_pernet *nft_net = nft_pernet(ctx->net);
+ struct nft_trans *trans;
+
+ if (ctx->table->flags & __NFT_TABLE_F_UPDATE)
+ return true;
+
+ list_for_each_entry(trans, &nft_net->commit_list, list) {
+ if (trans->ctx.table == ctx->table &&
+ ((trans->msg_type == NFT_MSG_NEWCHAIN &&
+ nft_trans_chain_update(trans)) ||
+ (trans->msg_type == NFT_MSG_DELCHAIN &&
+ nft_is_base_chain(trans->ctx.chain))))
+ return true;
+ }
+
+ return false;
+}
+
static int nf_tables_updtable(struct nft_ctx *ctx)
{
struct nft_trans *trans;
@@ -1226,7 +1254,7 @@ static int nf_tables_updtable(struct nft_ctx *ctx)
return -EOPNOTSUPP;
/* No dormant off/on/off/on games in single transaction */
- if (ctx->table->flags & __NFT_TABLE_F_UPDATE)
+ if (nft_table_pending_update(ctx))
return -EINVAL;
trans = nft_trans_alloc(ctx, NFT_MSG_NEWTABLE,
@@ -2430,6 +2458,9 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
struct nft_stats __percpu *stats = NULL;
struct nft_chain_hook hook = {};
+ if (table->flags & __NFT_TABLE_F_UPDATE)
+ return -EINVAL;
+
if (flags & NFT_CHAIN_BINDING)
return -EOPNOTSUPP;
@@ -2631,6 +2662,13 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
}
}
+ if (table->flags & __NFT_TABLE_F_UPDATE &&
+ !list_empty(&hook.list)) {
+ NL_SET_BAD_ATTR(extack, attr);
+ err = -EOPNOTSUPP;
+ goto err_hooks;
+ }
+
if (!(table->flags & NFT_TABLE_F_DORMANT) &&
nft_is_base_chain(chain) &&
!list_empty(&hook.list)) {
@@ -2860,6 +2898,9 @@ static int nft_delchain_hook(struct nft_ctx *ctx,
struct nft_trans *trans;
int err;
+ if (ctx->table->flags & __NFT_TABLE_F_UPDATE)
+ return -EOPNOTSUPP;
+
err = nft_chain_parse_hook(ctx->net, basechain, nla, &chain_hook,
ctx->family, chain->flags, extack);
if (err < 0)
@@ -2944,7 +2985,8 @@ static int nf_tables_delchain(struct sk_buff *skb, const struct nfnl_info *info,
nft_ctx_init(&ctx, net, skb, info->nlh, family, table, chain, nla);
if (nla[NFTA_CHAIN_HOOK]) {
- if (chain->flags & NFT_CHAIN_HW_OFFLOAD)
+ if (NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_DESTROYCHAIN ||
+ chain->flags & NFT_CHAIN_HW_OFFLOAD)
return -EOPNOTSUPP;
if (nft_is_base_chain(chain)) {
@@ -3026,7 +3068,7 @@ static const struct nft_expr_type *__nft_expr_type_get(u8 family,
{
const struct nft_expr_type *type, *candidate = NULL;
- list_for_each_entry(type, &nf_tables_expressions, list) {
+ list_for_each_entry_rcu(type, &nf_tables_expressions, list) {
if (!nla_strcmp(nla, type->name)) {
if (!type->family && !candidate)
candidate = type;
@@ -3058,9 +3100,13 @@ static const struct nft_expr_type *nft_expr_type_get(struct net *net,
if (nla == NULL)
return ERR_PTR(-EINVAL);
+ rcu_read_lock();
type = __nft_expr_type_get(family, nla);
- if (type != NULL && try_module_get(type->owner))
+ if (type != NULL && try_module_get(type->owner)) {
+ rcu_read_unlock();
return type;
+ }
+ rcu_read_unlock();
lockdep_nfnl_nft_mutex_not_held();
#ifdef CONFIG_MODULES
@@ -3841,6 +3887,9 @@ int nft_setelem_validate(const struct nft_ctx *ctx, struct nft_set *set,
const struct nft_data *data;
int err;
+ if (!nft_set_elem_active(ext, iter->genmask))
+ return 0;
+
if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) &&
*nft_set_ext_flags(ext) & NFT_SET_ELEM_INTERVAL_END)
return 0;
@@ -3864,17 +3913,20 @@ int nft_setelem_validate(const struct nft_ctx *ctx, struct nft_set *set,
int nft_set_catchall_validate(const struct nft_ctx *ctx, struct nft_set *set)
{
- u8 genmask = nft_genmask_next(ctx->net);
+ struct nft_set_iter dummy_iter = {
+ .genmask = nft_genmask_next(ctx->net),
+ };
struct nft_set_elem_catchall *catchall;
+
struct nft_set_ext *ext;
int ret = 0;
list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
ext = nft_set_elem_ext(set, catchall->elem);
- if (!nft_set_elem_active(ext, genmask))
+ if (!nft_set_elem_active(ext, dummy_iter.genmask))
continue;
- ret = nft_setelem_validate(ctx, set, NULL, catchall->elem);
+ ret = nft_setelem_validate(ctx, set, &dummy_iter, catchall->elem);
if (ret < 0)
return ret;
}
@@ -5363,6 +5415,11 @@ static int nf_tables_bind_check_setelem(const struct nft_ctx *ctx,
const struct nft_set_iter *iter,
struct nft_elem_priv *elem_priv)
{
+ const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv);
+
+ if (!nft_set_elem_active(ext, iter->genmask))
+ return 0;
+
return nft_setelem_data_validate(ctx, set, elem_priv);
}
@@ -5407,6 +5464,7 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
}
iter.genmask = nft_genmask_next(ctx->net);
+ iter.type = NFT_ITER_UPDATE;
iter.skip = 0;
iter.count = 0;
iter.err = 0;
@@ -5454,6 +5512,13 @@ static int nft_mapelem_activate(const struct nft_ctx *ctx,
const struct nft_set_iter *iter,
struct nft_elem_priv *elem_priv)
{
+ struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv);
+
+ /* called from abort path, reverse check to undo changes. */
+ if (nft_set_elem_active(ext, iter->genmask))
+ return 0;
+
+ nft_clear(ctx->net, ext);
nft_setelem_data_activate(ctx->net, set, elem_priv);
return 0;
@@ -5471,6 +5536,7 @@ static void nft_map_catchall_activate(const struct nft_ctx *ctx,
if (!nft_set_elem_active(ext, genmask))
continue;
+ nft_clear(ctx->net, ext);
nft_setelem_data_activate(ctx->net, set, catchall->elem);
break;
}
@@ -5480,6 +5546,7 @@ static void nft_map_activate(const struct nft_ctx *ctx, struct nft_set *set)
{
struct nft_set_iter iter = {
.genmask = nft_genmask_next(ctx->net),
+ .type = NFT_ITER_UPDATE,
.fn = nft_mapelem_activate,
};
@@ -5744,6 +5811,9 @@ static int nf_tables_dump_setelem(const struct nft_ctx *ctx,
const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv);
struct nft_set_dump_args *args;
+ if (!nft_set_elem_active(ext, iter->genmask))
+ return 0;
+
if (nft_set_elem_expired(ext) || nft_set_elem_is_dead(ext))
return 0;
@@ -5854,6 +5924,7 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
args.skb = skb;
args.reset = dump_ctx->reset;
args.iter.genmask = nft_genmask_cur(net);
+ args.iter.type = NFT_ITER_READ;
args.iter.skip = cb->args[0];
args.iter.count = 0;
args.iter.err = 0;
@@ -6593,7 +6664,7 @@ static void nft_setelem_activate(struct net *net, struct nft_set *set,
struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv);
if (nft_setelem_is_catchall(set, elem_priv)) {
- nft_set_elem_change_active(net, set, ext);
+ nft_clear(net, ext);
} else {
set->ops->activate(net, set, elem_priv);
}
@@ -7152,6 +7223,16 @@ void nft_data_hold(const struct nft_data *data, enum nft_data_types type)
}
}
+static int nft_setelem_active_next(const struct net *net,
+ const struct nft_set *set,
+ struct nft_elem_priv *elem_priv)
+{
+ const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv);
+ u8 genmask = nft_genmask_next(net);
+
+ return nft_set_elem_active(ext, genmask);
+}
+
static void nft_setelem_data_activate(const struct net *net,
const struct nft_set *set,
struct nft_elem_priv *elem_priv)
@@ -7275,8 +7356,12 @@ static int nft_setelem_flush(const struct nft_ctx *ctx,
const struct nft_set_iter *iter,
struct nft_elem_priv *elem_priv)
{
+ const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv);
struct nft_trans *trans;
+ if (!nft_set_elem_active(ext, iter->genmask))
+ return 0;
+
trans = nft_trans_alloc_gfp(ctx, NFT_MSG_DELSETELEM,
sizeof(struct nft_trans_elem), GFP_ATOMIC);
if (!trans)
@@ -7338,6 +7423,7 @@ static int nft_set_flush(struct nft_ctx *ctx, struct nft_set *set, u8 genmask)
{
struct nft_set_iter iter = {
.genmask = genmask,
+ .type = NFT_ITER_UPDATE,
.fn = nft_setelem_flush,
};
@@ -7573,7 +7659,7 @@ static const struct nft_object_type *__nft_obj_type_get(u32 objtype, u8 family)
{
const struct nft_object_type *type;
- list_for_each_entry(type, &nf_tables_objects, list) {
+ list_for_each_entry_rcu(type, &nf_tables_objects, list) {
if (type->family != NFPROTO_UNSPEC &&
type->family != family)
continue;
@@ -7589,9 +7675,13 @@ nft_obj_type_get(struct net *net, u32 objtype, u8 family)
{
const struct nft_object_type *type;
+ rcu_read_lock();
type = __nft_obj_type_get(objtype, family);
- if (type != NULL && try_module_get(type->owner))
+ if (type != NULL && try_module_get(type->owner)) {
+ rcu_read_unlock();
return type;
+ }
+ rcu_read_unlock();
lockdep_nfnl_nft_mutex_not_held();
#ifdef CONFIG_MODULES
@@ -8263,11 +8353,12 @@ static int nft_flowtable_parse_hook(const struct nft_ctx *ctx,
return err;
}
+/* call under rcu_read_lock */
static const struct nf_flowtable_type *__nft_flowtable_type_get(u8 family)
{
const struct nf_flowtable_type *type;
- list_for_each_entry(type, &nf_tables_flowtables, list) {
+ list_for_each_entry_rcu(type, &nf_tables_flowtables, list) {
if (family == type->family)
return type;
}
@@ -8279,9 +8370,13 @@ nft_flowtable_type_get(struct net *net, u8 family)
{
const struct nf_flowtable_type *type;
+ rcu_read_lock();
type = __nft_flowtable_type_get(family);
- if (type != NULL && try_module_get(type->owner))
+ if (type != NULL && try_module_get(type->owner)) {
+ rcu_read_unlock();
return type;
+ }
+ rcu_read_unlock();
lockdep_nfnl_nft_mutex_not_held();
#ifdef CONFIG_MODULES
@@ -10182,9 +10277,11 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
if (nft_trans_chain_update(trans)) {
nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN,
&nft_trans_chain_hooks(trans));
- nft_netdev_unregister_hooks(net,
- &nft_trans_chain_hooks(trans),
- true);
+ if (!(trans->ctx.table->flags & NFT_TABLE_F_DORMANT)) {
+ nft_netdev_unregister_hooks(net,
+ &nft_trans_chain_hooks(trans),
+ true);
+ }
} else {
nft_chain_del(trans->ctx.chain);
nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN,
@@ -10423,10 +10520,11 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
struct nft_trans *trans, *next;
LIST_HEAD(set_update_list);
struct nft_trans_elem *te;
+ int err = 0;
if (action == NFNL_ABORT_VALIDATE &&
nf_tables_validate(net) < 0)
- return -EAGAIN;
+ err = -EAGAIN;
list_for_each_entry_safe_reverse(trans, next, &nft_net->commit_list,
list) {
@@ -10460,9 +10558,11 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
break;
case NFT_MSG_NEWCHAIN:
if (nft_trans_chain_update(trans)) {
- nft_netdev_unregister_hooks(net,
- &nft_trans_chain_hooks(trans),
- true);
+ if (!(trans->ctx.table->flags & NFT_TABLE_F_DORMANT)) {
+ nft_netdev_unregister_hooks(net,
+ &nft_trans_chain_hooks(trans),
+ true);
+ }
free_percpu(nft_trans_chain_stats(trans));
kfree(nft_trans_chain_name(trans));
nft_trans_destroy(trans);
@@ -10554,8 +10654,10 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
case NFT_MSG_DESTROYSETELEM:
te = (struct nft_trans_elem *)trans->data;
- nft_setelem_data_activate(net, te->set, te->elem_priv);
- nft_setelem_activate(net, te->set, te->elem_priv);
+ if (!nft_setelem_active_next(net, te->set, te->elem_priv)) {
+ nft_setelem_data_activate(net, te->set, te->elem_priv);
+ nft_setelem_activate(net, te->set, te->elem_priv);
+ }
if (!nft_setelem_is_catchall(te->set, te->elem_priv))
te->set->ndeact--;
@@ -10616,12 +10718,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
nf_tables_abort_release(trans);
}
- if (action == NFNL_ABORT_AUTOLOAD)
- nf_tables_module_autoload(net);
- else
- nf_tables_module_autoload_cleanup(net);
-
- return 0;
+ return err;
}
static int nf_tables_abort(struct net *net, struct sk_buff *skb,
@@ -10634,6 +10731,17 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb,
gc_seq = nft_gc_seq_begin(nft_net);
ret = __nf_tables_abort(net, action);
nft_gc_seq_end(nft_net, gc_seq);
+
+ WARN_ON_ONCE(!list_empty(&nft_net->commit_list));
+
+ /* module autoload needs to happen after GC sequence update because it
+ * temporarily releases and grabs mutex again.
+ */
+ if (action == NFNL_ABORT_AUTOLOAD)
+ nf_tables_module_autoload(net);
+ else
+ nf_tables_module_autoload_cleanup(net);
+
mutex_unlock(&nft_net->commit_mutex);
return ret;
@@ -10737,6 +10845,9 @@ static int nf_tables_loop_check_setelem(const struct nft_ctx *ctx,
{
const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv);
+ if (!nft_set_elem_active(ext, iter->genmask))
+ return 0;
+
if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) &&
*nft_set_ext_flags(ext) & NFT_SET_ELEM_INTERVAL_END)
return 0;
@@ -10821,6 +10932,7 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx,
continue;
iter.genmask = nft_genmask_next(ctx->net);
+ iter.type = NFT_ITER_UPDATE;
iter.skip = 0;
iter.count = 0;
iter.err = 0;
@@ -11439,9 +11551,10 @@ static void __net_exit nf_tables_exit_net(struct net *net)
gc_seq = nft_gc_seq_begin(nft_net);
- if (!list_empty(&nft_net->commit_list) ||
- !list_empty(&nft_net->module_list))
- __nf_tables_abort(net, NFNL_ABORT_NONE);
+ WARN_ON_ONCE(!list_empty(&nft_net->commit_list));
+
+ if (!list_empty(&nft_net->module_list))
+ nf_tables_module_autoload_cleanup(net);
__nft_release_tables(net);
@@ -11533,6 +11646,7 @@ static void __exit nf_tables_module_exit(void)
unregister_netdevice_notifier(&nf_tables_flowtable_notifier);
nft_chain_filter_fini();
nft_chain_route_fini();
+ nf_tables_trans_destroy_flush_work();
unregister_pernet_subsys(&nf_tables_net_ops);
cancel_work_sync(&trans_gc_work);
cancel_work_sync(&trans_destroy_work);
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index a0055f510e31e9..b314ca728a2912 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -216,6 +216,7 @@ static int nft_lookup_validate(const struct nft_ctx *ctx,
return 0;
iter.genmask = nft_genmask_next(ctx->net);
+ iter.type = NFT_ITER_UPDATE;
iter.skip = 0;
iter.count = 0;
iter.err = 0;
diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c
index 32df7a16835da3..1caa04619dc6da 100644
--- a/net/netfilter/nft_set_bitmap.c
+++ b/net/netfilter/nft_set_bitmap.c
@@ -172,7 +172,7 @@ static void nft_bitmap_activate(const struct net *net,
nft_bitmap_location(set, nft_set_ext_key(&be->ext), &idx, &off);
/* Enter 11 state. */
priv->bitmap[idx] |= (genmask << off);
- nft_set_elem_change_active(net, set, &be->ext);
+ nft_clear(net, &be->ext);
}
static void nft_bitmap_flush(const struct net *net,
@@ -222,8 +222,6 @@ static void nft_bitmap_walk(const struct nft_ctx *ctx,
list_for_each_entry_rcu(be, &priv->list, head) {
if (iter->count < iter->skip)
goto cont;
- if (!nft_set_elem_active(&be->ext, iter->genmask))
- goto cont;
iter->err = iter->fn(ctx, set, iter, &be->priv);
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index 6968a3b342367c..daa56dda737ae2 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -199,7 +199,7 @@ static void nft_rhash_activate(const struct net *net, const struct nft_set *set,
{
struct nft_rhash_elem *he = nft_elem_priv_cast(elem_priv);
- nft_set_elem_change_active(net, set, &he->ext);
+ nft_clear(net, &he->ext);
}
static void nft_rhash_flush(const struct net *net,
@@ -286,8 +286,6 @@ static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set,
if (iter->count < iter->skip)
goto cont;
- if (!nft_set_elem_active(&he->ext, iter->genmask))
- goto cont;
iter->err = iter->fn(ctx, set, iter, &he->priv);
if (iter->err < 0)
@@ -599,7 +597,7 @@ static void nft_hash_activate(const struct net *net, const struct nft_set *set,
{
struct nft_hash_elem *he = nft_elem_priv_cast(elem_priv);
- nft_set_elem_change_active(net, set, &he->ext);
+ nft_clear(net, &he->ext);
}
static void nft_hash_flush(const struct net *net,
@@ -652,8 +650,6 @@ static void nft_hash_walk(const struct nft_ctx *ctx, struct nft_set *set,
hlist_for_each_entry_rcu(he, &priv->table[i], node) {
if (iter->count < iter->skip)
goto cont;
- if (!nft_set_elem_active(&he->ext, iter->genmask))
- goto cont;
iter->err = iter->fn(ctx, set, iter, &he->priv);
if (iter->err < 0)
diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index df8de509024637..187138afac45d4 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -1847,7 +1847,7 @@ static void nft_pipapo_activate(const struct net *net,
{
struct nft_pipapo_elem *e = nft_elem_priv_cast(elem_priv);
- nft_set_elem_change_active(net, set, &e->ext);
+ nft_clear(net, &e->ext);
}
/**
@@ -2077,6 +2077,8 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set,
rules_fx = rules_f0;
nft_pipapo_for_each_field(f, i, m) {
+ bool last = i == m->field_count - 1;
+
if (!pipapo_match_field(f, start, rules_fx,
match_start, match_end))
break;
@@ -2089,16 +2091,18 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set,
match_start += NFT_PIPAPO_GROUPS_PADDED_SIZE(f);
match_end += NFT_PIPAPO_GROUPS_PADDED_SIZE(f);
- }
- if (i == m->field_count) {
- priv->dirty = true;
- pipapo_drop(m, rulemap);
- return;
+ if (last && f->mt[rulemap[i].to].e == e) {
+ priv->dirty = true;
+ pipapo_drop(m, rulemap);
+ return;
+ }
}
first_rule += rules_f0;
}
+
+ WARN_ON_ONCE(1); /* elem_priv not found */
}
/**
@@ -2115,13 +2119,15 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set,
struct nft_set_iter *iter)
{
struct nft_pipapo *priv = nft_set_priv(set);
- struct net *net = read_pnet(&set->net);
const struct nft_pipapo_match *m;
const struct nft_pipapo_field *f;
unsigned int i, r;
+ WARN_ON_ONCE(iter->type != NFT_ITER_READ &&
+ iter->type != NFT_ITER_UPDATE);
+
rcu_read_lock();
- if (iter->genmask == nft_genmask_cur(net))
+ if (iter->type == NFT_ITER_READ)
m = rcu_dereference(priv->match);
else
m = priv->clone;
@@ -2143,9 +2149,6 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set,
e = f->mt[r].e;
- if (!nft_set_elem_active(&e->ext, iter->genmask))
- goto cont;
-
iter->err = iter->fn(ctx, set, iter, &e->priv);
if (iter->err < 0)
goto out;
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index 9944fe479e5361..b7ea21327549b3 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -532,7 +532,7 @@ static void nft_rbtree_activate(const struct net *net,
{
struct nft_rbtree_elem *rbe = nft_elem_priv_cast(elem_priv);
- nft_set_elem_change_active(net, set, &rbe->ext);
+ nft_clear(net, &rbe->ext);
}
static void nft_rbtree_flush(const struct net *net,
@@ -600,8 +600,6 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx,
if (iter->count < iter->skip)
goto cont;
- if (!nft_set_elem_active(&rbe->ext, iter->genmask))
- goto cont;
iter->err = iter->fn(ctx, set, iter, &rbe->priv);
if (iter->err < 0) {
diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index 819157bbb5a2c6..d5344563e525c9 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -252,10 +252,10 @@ static int nfc_llcp_setsockopt(struct socket *sock, int level, int optname,
break;
}
- if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
- err = -EFAULT;
+ err = copy_safe_from_sockptr(&opt, sizeof(opt),
+ optval, optlen);
+ if (err)
break;
- }
if (opt > LLCP_MAX_RW) {
err = -EINVAL;
@@ -274,10 +274,10 @@ static int nfc_llcp_setsockopt(struct socket *sock, int level, int optname,
break;
}
- if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
- err = -EFAULT;
+ err = copy_safe_from_sockptr(&opt, sizeof(opt),
+ optval, optlen);
+ if (err)
break;
- }
if (opt > LLCP_MAX_MIUX) {
err = -EINVAL;
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index cdad47b140fa4b..0d26c8ec9993ea 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -1516,6 +1516,11 @@ static void nci_rx_work(struct work_struct *work)
nfc_send_to_raw_sock(ndev->nfc_dev, skb,
RAW_PAYLOAD_NCI, NFC_DIRECTION_RX);
+ if (!nci_plen(skb->data)) {
+ kfree_skb(skb);
+ break;
+ }
+
/* Process frame */
switch (nci_mt(skb->data)) {
case NCI_MT_RSP_PKT:
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 3019a4406ca4f7..74b63cdb59923a 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -1380,8 +1380,9 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
if (ct_info.timeout[0]) {
if (nf_ct_set_timeout(net, ct_info.ct, family, key->ip.proto,
ct_info.timeout))
- pr_info_ratelimited("Failed to associated timeout "
- "policy `%s'\n", ct_info.timeout);
+ OVS_NLERR(log,
+ "Failed to associated timeout policy '%s'",
+ ct_info.timeout);
else
ct_info.nf_ct_timeout = rcu_dereference(
nf_ct_timeout_find(ct_info.ct)->timeout);
diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index a4e3c5de998be4..00dbcd4d28e680 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -302,7 +302,7 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args,
}
ret = PTR_ERR(trans_private);
/* Trigger connection so that its ready for the next retry */
- if (ret == -ENODEV)
+ if (ret == -ENODEV && cp)
rds_conn_connect_if_down(cp->cp_conn);
goto out;
}
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index 39945b139c4817..cd0accaf844a18 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -241,13 +241,13 @@ static int tcf_skbmod_dump(struct sk_buff *skb, struct tc_action *a,
struct tcf_skbmod *d = to_skbmod(a);
unsigned char *b = skb_tail_pointer(skb);
struct tcf_skbmod_params *p;
- struct tc_skbmod opt = {
- .index = d->tcf_index,
- .refcnt = refcount_read(&d->tcf_refcnt) - ref,
- .bindcnt = atomic_read(&d->tcf_bindcnt) - bind,
- };
+ struct tc_skbmod opt;
struct tcf_t t;
+ memset(&opt, 0, sizeof(opt));
+ opt.index = d->tcf_index;
+ opt.refcnt = refcount_read(&d->tcf_refcnt) - ref,
+ opt.bindcnt = atomic_read(&d->tcf_bindcnt) - bind;
spin_lock_bh(&d->tcf_lock);
opt.action = d->tcf_action;
p = rcu_dereference_protected(d->skbmod_p,
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 65e05b0c98e461..60239378d43fb7 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -809,7 +809,7 @@ void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len)
notify = !sch->q.qlen && !WARN_ON_ONCE(!n &&
!qdisc_is_offloaded);
/* TODO: perform the search on a per txq basis */
- sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
+ sch = qdisc_lookup_rcu(qdisc_dev(sch), TC_H_MAJ(parentid));
if (sch == NULL) {
WARN_ON_ONCE(parentid != TC_H_ROOT);
break;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index ff533649377750..4a2c763e2d1166 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -974,6 +974,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
sch->enqueue = ops->enqueue;
sch->dequeue = ops->dequeue;
sch->dev_queue = dev_queue;
+ sch->owner = -1;
netdev_hold(dev, &sch->dev_tracker, GFP_KERNEL);
refcount_set(&sch->refcnt, 1);
diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index b2c1b683a88ee2..d2b02710ab0709 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -921,8 +921,6 @@ out_err:
* Caller provides the truncation length of the output token (h) in
* cksumout.len.
*
- * Note that for RPCSEC, the "initial cipher state" is always all zeroes.
- *
* Return values:
* %GSS_S_COMPLETE: Digest computed, @cksumout filled in
* %GSS_S_FAILURE: Call failed
@@ -933,19 +931,22 @@ u32 krb5_etm_checksum(struct crypto_sync_skcipher *cipher,
int body_offset, struct xdr_netobj *cksumout)
{
unsigned int ivsize = crypto_sync_skcipher_ivsize(cipher);
- static const u8 iv[GSS_KRB5_MAX_BLOCKSIZE];
struct ahash_request *req;
struct scatterlist sg[1];
+ u8 *iv, *checksumdata;
int err = -ENOMEM;
- u8 *checksumdata;
checksumdata = kmalloc(crypto_ahash_digestsize(tfm), GFP_KERNEL);
if (!checksumdata)
return GSS_S_FAILURE;
+ /* For RPCSEC, the "initial cipher state" is always all zeroes. */
+ iv = kzalloc(ivsize, GFP_KERNEL);
+ if (!iv)
+ goto out_free_mem;
req = ahash_request_alloc(tfm, GFP_KERNEL);
if (!req)
- goto out_free_cksumdata;
+ goto out_free_mem;
ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL);
err = crypto_ahash_init(req);
if (err)
@@ -969,7 +970,8 @@ u32 krb5_etm_checksum(struct crypto_sync_skcipher *cipher,
out_free_ahash:
ahash_request_free(req);
-out_free_cksumdata:
+out_free_mem:
+ kfree(iv);
kfree_sensitive(checksumdata);
return err ? GSS_S_FAILURE : GSS_S_COMPLETE;
}
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 545017a3daa4d6..6b3f01beb294b9 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1206,15 +1206,6 @@ err_noclose:
* MSG_SPLICE_PAGES is used exclusively to reduce the number of
* copy operations in this path. Therefore the caller must ensure
* that the pages backing @xdr are unchanging.
- *
- * Note that the send is non-blocking. The caller has incremented
- * the reference count on each page backing the RPC message, and
- * the network layer will "put" these pages when transmission is
- * complete.
- *
- * This is safe for our RPC services because the memory backing
- * the head and tail components is never kmalloc'd. These always
- * come from pages in the svc_rqst::rq_pages array.
*/
static int svc_tcp_sendmsg(struct svc_sock *svsk, struct svc_rqst *rqstp,
rpc_fraghdr marker, unsigned int *sentp)
@@ -1244,6 +1235,7 @@ static int svc_tcp_sendmsg(struct svc_sock *svsk, struct svc_rqst *rqstp,
iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec,
1 + count, sizeof(marker) + rqstp->rq_res.len);
ret = sock_sendmsg(svsk->sk_sock, &msg);
+ page_frag_free(buf);
if (ret < 0)
return ret;
*sentp += ret;
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 211f57164cb611..b783231668c651 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -1976,10 +1976,10 @@ int tls_sw_recvmsg(struct sock *sk,
if (unlikely(flags & MSG_ERRQUEUE))
return sock_recv_errqueue(sk, msg, len, SOL_IP, IP_RECVERR);
- psock = sk_psock_get(sk);
err = tls_rx_reader_lock(sk, ctx, flags & MSG_DONTWAIT);
if (err < 0)
return err;
+ psock = sk_psock_get(sk);
bpf_strp_enabled = sk_psock_strp_enabled(psock);
/* If crypto failed the connection is broken */
@@ -2152,12 +2152,15 @@ recv_end:
}
/* Drain records from the rx_list & copy if required */
- if (is_peek || is_kvec)
+ if (is_peek)
err = process_rx_list(ctx, msg, &control, copied + peeked,
decrypted - peeked, is_peek, NULL);
else
err = process_rx_list(ctx, msg, &control, 0,
async_copy_bytes, is_peek, NULL);
+
+ /* we could have copied less than we wanted, and possibly nothing */
+ decrypted += max(err, 0) - async_copy_bytes;
}
copied += decrypted;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 5b41e2321209ae..9a6ad5974dff5e 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -2663,9 +2663,13 @@ static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,
WRITE_ONCE(u->oob_skb, NULL);
consume_skb(skb);
}
- } else if (!(flags & MSG_PEEK)) {
+ } else if (flags & MSG_PEEK) {
+ skb = NULL;
+ } else {
skb_unlink(skb, &sk->sk_receive_queue);
- consume_skb(skb);
+ WRITE_ONCE(u->oob_skb, NULL);
+ if (!WARN_ON_ONCE(skb_unref(skb)))
+ kfree_skb(skb);
skb = skb_peek(&sk->sk_receive_queue);
}
}
@@ -2739,18 +2743,16 @@ redo:
last = skb = skb_peek(&sk->sk_receive_queue);
last_len = last ? last->len : 0;
+again:
#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
if (skb) {
skb = manage_oob(skb, sk, flags, copied);
- if (!skb) {
+ if (!skb && copied) {
unix_state_unlock(sk);
- if (copied)
- break;
- goto redo;
+ break;
}
}
#endif
-again:
if (skb == NULL) {
if (copied >= target)
goto unlock;
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index fa39b626523851..6433a414acf862 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -274,11 +274,22 @@ static void __unix_gc(struct work_struct *work)
* receive queues. Other, non candidate sockets _can_ be
* added to queue, so we must make sure only to touch
* candidates.
+ *
+ * Embryos, though never candidates themselves, affect which
+ * candidates are reachable by the garbage collector. Before
+ * being added to a listener's queue, an embryo may already
+ * receive data carrying SCM_RIGHTS, potentially making the
+ * passed socket a candidate that is not yet reachable by the
+ * collector. It becomes reachable once the embryo is
+ * enqueued. Therefore, we must ensure that no SCM-laden
+ * embryo appears in a (candidate) listener's queue between
+ * consecutive scan_children() calls.
*/
list_for_each_entry_safe(u, next, &gc_inflight_list, link) {
+ struct sock *sk = &u->sk;
long total_refs;
- total_refs = file_count(u->sk.sk_socket->file);
+ total_refs = file_count(sk->sk_socket->file);
WARN_ON_ONCE(!u->inflight);
WARN_ON_ONCE(total_refs < u->inflight);
@@ -286,6 +297,11 @@ static void __unix_gc(struct work_struct *work)
list_move_tail(&u->link, &gc_candidates);
__set_bit(UNIX_GC_CANDIDATE, &u->gc_flags);
__set_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags);
+
+ if (sk->sk_state == TCP_LISTEN) {
+ unix_state_lock(sk);
+ unix_state_unlock(sk);
+ }
}
}
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index 1748268e0694f2..ee5d306a96d0f8 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -120,7 +120,6 @@ virtio_transport_send_pkt_work(struct work_struct *work)
if (!skb)
break;
- virtio_transport_deliver_tap_pkt(skb);
reply = virtio_vsock_skb_reply(skb);
sgs = vsock->out_sgs;
sg_init_one(sgs[out_sg], virtio_vsock_hdr(skb),
@@ -170,6 +169,8 @@ virtio_transport_send_pkt_work(struct work_struct *work)
break;
}
+ virtio_transport_deliver_tap_pkt(skb);
+
if (reply) {
struct virtqueue *rx_vq = vsock->vqs[VSOCK_VQ_RX];
int val;
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index e039e66ab37774..cbbf347c6b2e09 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -1024,7 +1024,7 @@ TRACE_EVENT(rdev_get_mpp,
TRACE_EVENT(rdev_dump_mpp,
TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, int _idx,
u8 *dst, u8 *mpp),
- TP_ARGS(wiphy, netdev, _idx, mpp, dst),
+ TP_ARGS(wiphy, netdev, _idx, dst, mpp),
TP_STRUCT__entry(
WIPHY_ENTRY
NETDEV_ENTRY
diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c
index a161c64d1765e6..838ad6541a17d8 100644
--- a/net/wireless/wext-core.c
+++ b/net/wireless/wext-core.c
@@ -4,6 +4,7 @@
* Authors : Jean Tourrilhes - HPL - <jt@hpl.hp.com>
* Copyright (c) 1997-2007 Jean Tourrilhes, All Rights Reserved.
* Copyright 2009 Johannes Berg <johannes@sipsolutions.net>
+ * Copyright (C) 2024 Intel Corporation
*
* (As all part of the Linux kernel, this file is GPL)
*/
@@ -662,7 +663,8 @@ struct iw_statistics *get_wireless_stats(struct net_device *dev)
dev->ieee80211_ptr->wiphy->wext &&
dev->ieee80211_ptr->wiphy->wext->get_wireless_stats) {
wireless_warn_cfg80211_wext();
- if (dev->ieee80211_ptr->wiphy->flags & WIPHY_FLAG_SUPPORTS_MLO)
+ if (dev->ieee80211_ptr->wiphy->flags & (WIPHY_FLAG_SUPPORTS_MLO |
+ WIPHY_FLAG_DISABLE_WEXT))
return NULL;
return dev->ieee80211_ptr->wiphy->wext->get_wireless_stats(dev);
}
@@ -704,7 +706,8 @@ static iw_handler get_handler(struct net_device *dev, unsigned int cmd)
#ifdef CONFIG_CFG80211_WEXT
if (dev->ieee80211_ptr && dev->ieee80211_ptr->wiphy) {
wireless_warn_cfg80211_wext();
- if (dev->ieee80211_ptr->wiphy->flags & WIPHY_FLAG_SUPPORTS_MLO)
+ if (dev->ieee80211_ptr->wiphy->flags & (WIPHY_FLAG_SUPPORTS_MLO |
+ WIPHY_FLAG_DISABLE_WEXT))
return NULL;
handlers = dev->ieee80211_ptr->wiphy->wext;
}
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 3404d076a8a3e6..727aa20be4bde8 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -1417,6 +1417,8 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
struct xsk_queue **q;
int entries;
+ if (optlen < sizeof(entries))
+ return -EINVAL;
if (copy_from_sockptr(&entries, optval, sizeof(entries)))
return -EFAULT;
diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn
index 3ce5d503a6da98..c5af566e911ae7 100644
--- a/scripts/Makefile.extrawarn
+++ b/scripts/Makefile.extrawarn
@@ -114,6 +114,8 @@ KBUILD_CFLAGS += $(call cc-disable-warning, format-overflow)
KBUILD_CFLAGS += $(call cc-disable-warning, format-truncation)
KBUILD_CFLAGS += $(call cc-disable-warning, stringop-truncation)
+KBUILD_CFLAGS += -Wno-override-init # alias for -Wno-initializer-overrides in clang
+
ifdef CONFIG_CC_IS_CLANG
# Clang before clang-16 would warn on default argument promotions.
ifneq ($(call clang-min-version, 160000),y)
@@ -151,10 +153,6 @@ KBUILD_CFLAGS += -Wtype-limits
KBUILD_CFLAGS += $(call cc-option, -Wmaybe-uninitialized)
KBUILD_CFLAGS += $(call cc-option, -Wunused-macros)
-ifdef CONFIG_CC_IS_CLANG
-KBUILD_CFLAGS += -Winitializer-overrides
-endif
-
KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN2
else
@@ -164,9 +162,7 @@ KBUILD_CFLAGS += -Wno-missing-field-initializers
KBUILD_CFLAGS += -Wno-type-limits
KBUILD_CFLAGS += -Wno-shift-negative-value
-ifdef CONFIG_CC_IS_CLANG
-KBUILD_CFLAGS += -Wno-initializer-overrides
-else
+ifdef CONFIG_CC_IS_GCC
KBUILD_CFLAGS += -Wno-maybe-uninitialized
endif
diff --git a/scripts/Makefile.modfinal b/scripts/Makefile.modfinal
index 8568d256d6fbff..79fcf27316864f 100644
--- a/scripts/Makefile.modfinal
+++ b/scripts/Makefile.modfinal
@@ -23,7 +23,7 @@ modname = $(notdir $(@:.mod.o=))
part-of-module = y
quiet_cmd_cc_o_c = CC [M] $@
- cmd_cc_o_c = $(CC) $(filter-out $(CC_FLAGS_CFI) $(CFLAGS_GCOV), $(c_flags)) -c -o $@ $<
+ cmd_cc_o_c = $(CC) $(filter-out $(CC_FLAGS_CFI) $(CFLAGS_GCOV) $(CFLAGS_KCSAN), $(c_flags)) -c -o $@ $<
%.mod.o: %.mod.c FORCE
$(call if_changed_dep,cc_o_c)
diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py
index 4606944984ee2e..c55878bddfddc4 100755
--- a/scripts/bpf_doc.py
+++ b/scripts/bpf_doc.py
@@ -414,8 +414,8 @@ class PrinterRST(Printer):
version = version.stdout.decode().rstrip()
except:
try:
- version = subprocess.run(['make', 'kernelversion'], cwd=linuxRoot,
- capture_output=True, check=True)
+ version = subprocess.run(['make', '-s', '--no-print-directory', 'kernelversion'],
+ cwd=linuxRoot, capture_output=True, check=True)
version = version.stdout.decode().rstrip()
except:
return 'Linux'
diff --git a/scripts/gcc-plugins/stackleak_plugin.c b/scripts/gcc-plugins/stackleak_plugin.c
index c5c2ce113c9232..d20c47d21ad835 100644
--- a/scripts/gcc-plugins/stackleak_plugin.c
+++ b/scripts/gcc-plugins/stackleak_plugin.c
@@ -467,6 +467,8 @@ static bool stackleak_gate(void)
return false;
if (STRING_EQUAL(section, ".entry.text"))
return false;
+ if (STRING_EQUAL(section, ".head.text"))
+ return false;
}
return track_frame_size >= 0;
diff --git a/scripts/kconfig/conf.c b/scripts/kconfig/conf.c
index b5730061872bae..965bb40c50e517 100644
--- a/scripts/kconfig/conf.c
+++ b/scripts/kconfig/conf.c
@@ -552,11 +552,6 @@ static int conf_choice(struct menu *menu)
continue;
}
sym_set_tristate_value(child->sym, yes);
- for (child = child->list; child; child = child->next) {
- indent += 2;
- conf(child);
- indent -= 2;
- }
return 1;
}
}
diff --git a/scripts/kconfig/lkc.h b/scripts/kconfig/lkc.h
index e69d7c59d93027..e7cc9e985c4f06 100644
--- a/scripts/kconfig/lkc.h
+++ b/scripts/kconfig/lkc.h
@@ -89,7 +89,7 @@ void menu_add_visibility(struct expr *dep);
struct property *menu_add_prompt(enum prop_type type, char *prompt, struct expr *dep);
void menu_add_expr(enum prop_type type, struct expr *expr, struct expr *dep);
void menu_add_symbol(enum prop_type type, struct symbol *sym, struct expr *dep);
-void menu_finalize(struct menu *parent);
+void menu_finalize(void);
void menu_set_type(int type);
extern struct menu rootmenu;
diff --git a/scripts/kconfig/lxdialog/checklist.c b/scripts/kconfig/lxdialog/checklist.c
index 31d0a89fbeb7ac..75493302fb8570 100644
--- a/scripts/kconfig/lxdialog/checklist.c
+++ b/scripts/kconfig/lxdialog/checklist.c
@@ -119,7 +119,7 @@ int dialog_checklist(const char *title, const char *prompt, int height,
}
do_resize:
- if (getmaxy(stdscr) < (height + CHECKLIST_HEIGTH_MIN))
+ if (getmaxy(stdscr) < (height + CHECKLIST_HEIGHT_MIN))
return -ERRDISPLAYTOOSMALL;
if (getmaxx(stdscr) < (width + CHECKLIST_WIDTH_MIN))
return -ERRDISPLAYTOOSMALL;
diff --git a/scripts/kconfig/lxdialog/dialog.h b/scripts/kconfig/lxdialog/dialog.h
index 2d15ba893fbf89..f6c2ebe6d1f91d 100644
--- a/scripts/kconfig/lxdialog/dialog.h
+++ b/scripts/kconfig/lxdialog/dialog.h
@@ -162,17 +162,17 @@ int on_key_esc(WINDOW *win);
int on_key_resize(void);
/* minimum (re)size values */
-#define CHECKLIST_HEIGTH_MIN 6 /* For dialog_checklist() */
+#define CHECKLIST_HEIGHT_MIN 6 /* For dialog_checklist() */
#define CHECKLIST_WIDTH_MIN 6
-#define INPUTBOX_HEIGTH_MIN 2 /* For dialog_inputbox() */
+#define INPUTBOX_HEIGHT_MIN 2 /* For dialog_inputbox() */
#define INPUTBOX_WIDTH_MIN 2
-#define MENUBOX_HEIGTH_MIN 15 /* For dialog_menu() */
+#define MENUBOX_HEIGHT_MIN 15 /* For dialog_menu() */
#define MENUBOX_WIDTH_MIN 65
-#define TEXTBOX_HEIGTH_MIN 8 /* For dialog_textbox() */
+#define TEXTBOX_HEIGHT_MIN 8 /* For dialog_textbox() */
#define TEXTBOX_WIDTH_MIN 8
-#define YESNO_HEIGTH_MIN 4 /* For dialog_yesno() */
+#define YESNO_HEIGHT_MIN 4 /* For dialog_yesno() */
#define YESNO_WIDTH_MIN 4
-#define WINDOW_HEIGTH_MIN 19 /* For init_dialog() */
+#define WINDOW_HEIGHT_MIN 19 /* For init_dialog() */
#define WINDOW_WIDTH_MIN 80
int init_dialog(const char *backtitle);
diff --git a/scripts/kconfig/lxdialog/inputbox.c b/scripts/kconfig/lxdialog/inputbox.c
index 1dcfb288ee6363..3c6e24b20f5be6 100644
--- a/scripts/kconfig/lxdialog/inputbox.c
+++ b/scripts/kconfig/lxdialog/inputbox.c
@@ -43,7 +43,7 @@ int dialog_inputbox(const char *title, const char *prompt, int height, int width
strcpy(instr, init);
do_resize:
- if (getmaxy(stdscr) <= (height - INPUTBOX_HEIGTH_MIN))
+ if (getmaxy(stdscr) <= (height - INPUTBOX_HEIGHT_MIN))
return -ERRDISPLAYTOOSMALL;
if (getmaxx(stdscr) <= (width - INPUTBOX_WIDTH_MIN))
return -ERRDISPLAYTOOSMALL;
diff --git a/scripts/kconfig/lxdialog/menubox.c b/scripts/kconfig/lxdialog/menubox.c
index 0e333284e947bc..6e6244df0c56e3 100644
--- a/scripts/kconfig/lxdialog/menubox.c
+++ b/scripts/kconfig/lxdialog/menubox.c
@@ -172,7 +172,7 @@ int dialog_menu(const char *title, const char *prompt,
do_resize:
height = getmaxy(stdscr);
width = getmaxx(stdscr);
- if (height < MENUBOX_HEIGTH_MIN || width < MENUBOX_WIDTH_MIN)
+ if (height < MENUBOX_HEIGHT_MIN || width < MENUBOX_WIDTH_MIN)
return -ERRDISPLAYTOOSMALL;
height -= 4;
diff --git a/scripts/kconfig/lxdialog/textbox.c b/scripts/kconfig/lxdialog/textbox.c
index 058ed0e5bbd545..0abaf635978f98 100644
--- a/scripts/kconfig/lxdialog/textbox.c
+++ b/scripts/kconfig/lxdialog/textbox.c
@@ -175,7 +175,7 @@ int dialog_textbox(const char *title, const char *tbuf, int initial_height,
do_resize:
getmaxyx(stdscr, height, width);
- if (height < TEXTBOX_HEIGTH_MIN || width < TEXTBOX_WIDTH_MIN)
+ if (height < TEXTBOX_HEIGHT_MIN || width < TEXTBOX_WIDTH_MIN)
return -ERRDISPLAYTOOSMALL;
if (initial_height != 0)
height = initial_height;
diff --git a/scripts/kconfig/lxdialog/util.c b/scripts/kconfig/lxdialog/util.c
index 3fb7508b68a240..f18e2a89f6135d 100644
--- a/scripts/kconfig/lxdialog/util.c
+++ b/scripts/kconfig/lxdialog/util.c
@@ -291,7 +291,7 @@ int init_dialog(const char *backtitle)
getyx(stdscr, saved_y, saved_x);
getmaxyx(stdscr, height, width);
- if (height < WINDOW_HEIGTH_MIN || width < WINDOW_WIDTH_MIN) {
+ if (height < WINDOW_HEIGHT_MIN || width < WINDOW_WIDTH_MIN) {
endwin();
return -ERRDISPLAYTOOSMALL;
}
diff --git a/scripts/kconfig/lxdialog/yesno.c b/scripts/kconfig/lxdialog/yesno.c
index bcaac9b7bab2ca..b57d25e1549fe4 100644
--- a/scripts/kconfig/lxdialog/yesno.c
+++ b/scripts/kconfig/lxdialog/yesno.c
@@ -32,7 +32,7 @@ int dialog_yesno(const char *title, const char *prompt, int height, int width)
WINDOW *dialog;
do_resize:
- if (getmaxy(stdscr) < (height + YESNO_HEIGTH_MIN))
+ if (getmaxy(stdscr) < (height + YESNO_HEIGHT_MIN))
return -ERRDISPLAYTOOSMALL;
if (getmaxx(stdscr) < (width + YESNO_WIDTH_MIN))
return -ERRDISPLAYTOOSMALL;
diff --git a/scripts/kconfig/mconf.c b/scripts/kconfig/mconf.c
index f4bb391d50cf99..c0969097447da5 100644
--- a/scripts/kconfig/mconf.c
+++ b/scripts/kconfig/mconf.c
@@ -659,9 +659,9 @@ static void conf_choice(struct menu *menu)
dialog_clear();
res = dialog_checklist(prompt ? prompt : "Main Menu",
radiolist_instructions,
- MENUBOX_HEIGTH_MIN,
+ MENUBOX_HEIGHT_MIN,
MENUBOX_WIDTH_MIN,
- CHECKLIST_HEIGTH_MIN);
+ CHECKLIST_HEIGHT_MIN);
selected = item_activate_selected();
switch (res) {
case 0:
diff --git a/scripts/kconfig/menu.c b/scripts/kconfig/menu.c
index 8498481e6afe36..3b822cd110f478 100644
--- a/scripts/kconfig/menu.c
+++ b/scripts/kconfig/menu.c
@@ -282,7 +282,7 @@ static void sym_check_prop(struct symbol *sym)
}
}
-void menu_finalize(struct menu *parent)
+static void _menu_finalize(struct menu *parent, bool inside_choice)
{
struct menu *menu, *last_menu;
struct symbol *sym;
@@ -296,7 +296,12 @@ void menu_finalize(struct menu *parent)
* and propagate parent dependencies before moving on.
*/
- if (sym && sym_is_choice(sym)) {
+ bool is_choice = false;
+
+ if (sym && sym_is_choice(sym))
+ is_choice = true;
+
+ if (is_choice) {
if (sym->type == S_UNKNOWN) {
/* find the first choice value to find out choice type */
current_entry = parent;
@@ -394,7 +399,7 @@ void menu_finalize(struct menu *parent)
}
}
- if (sym && sym_is_choice(sym))
+ if (is_choice)
expr_free(parentdep);
/*
@@ -402,8 +407,8 @@ void menu_finalize(struct menu *parent)
* moving on
*/
for (menu = parent->list; menu; menu = menu->next)
- menu_finalize(menu);
- } else if (sym) {
+ _menu_finalize(menu, is_choice);
+ } else if (!inside_choice && sym) {
/*
* Automatic submenu creation. If sym is a symbol and A, B, C,
* ... are consecutive items (symbols, menus, ifs, etc.) that
@@ -463,7 +468,7 @@ void menu_finalize(struct menu *parent)
/* Superset, put in submenu */
expr_free(dep2);
next:
- menu_finalize(menu);
+ _menu_finalize(menu, false);
menu->parent = parent;
last_menu = menu;
}
@@ -582,6 +587,11 @@ void menu_finalize(struct menu *parent)
}
}
+void menu_finalize(void)
+{
+ _menu_finalize(&rootmenu, false);
+}
+
bool menu_has_prompt(struct menu *menu)
{
if (!menu->prompt)
diff --git a/scripts/kconfig/parser.y b/scripts/kconfig/parser.y
index b45bfaf0a02b12..7fb996612c9660 100644
--- a/scripts/kconfig/parser.y
+++ b/scripts/kconfig/parser.y
@@ -515,7 +515,7 @@ void conf_parse(const char *name)
menu_add_prompt(P_MENU, "Main menu", NULL);
}
- menu_finalize(&rootmenu);
+ menu_finalize();
menu = &rootmenu;
while (menu) {
diff --git a/scripts/kernel-doc b/scripts/kernel-doc
index 967f1abb0edbd8..cb1be22afc65ff 100755
--- a/scripts/kernel-doc
+++ b/scripts/kernel-doc
@@ -1541,7 +1541,7 @@ sub create_parameterlist($$$$) {
save_struct_actual($2);
push_parameter($2, "$type $1", $arg, $file, $declaration_name);
- } elsif ($param =~ m/(.*?):(\d+)/) {
+ } elsif ($param =~ m/(.*?):(\w+)/) {
if ($type ne "") { # skip unnamed bit-fields
save_struct_actual($1);
push_parameter($1, "$type:$2", $arg, $file, $declaration_name)
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index 6b37039c9e927b..2f5b91da5afa9e 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -1007,6 +1007,8 @@ static Elf_Sym *find_fromsym(struct elf_info *elf, Elf_Addr addr,
static Elf_Sym *find_tosym(struct elf_info *elf, Elf_Addr addr, Elf_Sym *sym)
{
+ Elf_Sym *new_sym;
+
/* If the supplied symbol has a valid name, return it */
if (is_valid_name(elf, sym))
return sym;
@@ -1015,8 +1017,9 @@ static Elf_Sym *find_tosym(struct elf_info *elf, Elf_Addr addr, Elf_Sym *sym)
* Strive to find a better symbol name, but the resulting name may not
* match the symbol referenced in the original code.
*/
- return symsearch_find_nearest(elf, addr, get_secindex(elf, sym),
- true, 20);
+ new_sym = symsearch_find_nearest(elf, addr, get_secindex(elf, sym),
+ true, 20);
+ return new_sym ? new_sym : sym;
}
static bool is_executable_section(struct elf_info *elf, unsigned int secndx)
diff --git a/security/security.c b/security/security.c
index 7e118858b545c1..0a9a0ac3f26624 100644
--- a/security/security.c
+++ b/security/security.c
@@ -1793,11 +1793,11 @@ int security_path_mknod(const struct path *dir, struct dentry *dentry,
EXPORT_SYMBOL(security_path_mknod);
/**
- * security_path_post_mknod() - Update inode security field after file creation
+ * security_path_post_mknod() - Update inode security after reg file creation
* @idmap: idmap of the mount
* @dentry: new file
*
- * Update inode security field after a file has been created.
+ * Update inode security field after a regular file has been created.
*/
void security_path_post_mknod(struct mnt_idmap *idmap, struct dentry *dentry)
{
diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c
index 0619a1cbbfbe41..074d6c2714eb55 100644
--- a/security/selinux/selinuxfs.c
+++ b/security/selinux/selinuxfs.c
@@ -2123,7 +2123,6 @@ static struct file_system_type sel_fs_type = {
.kill_sb = sel_kill_sb,
};
-static struct vfsmount *selinuxfs_mount __ro_after_init;
struct path selinux_null __ro_after_init;
static int __init init_sel_fs(void)
@@ -2145,18 +2144,21 @@ static int __init init_sel_fs(void)
return err;
}
- selinux_null.mnt = selinuxfs_mount = kern_mount(&sel_fs_type);
- if (IS_ERR(selinuxfs_mount)) {
+ selinux_null.mnt = kern_mount(&sel_fs_type);
+ if (IS_ERR(selinux_null.mnt)) {
pr_err("selinuxfs: could not mount!\n");
- err = PTR_ERR(selinuxfs_mount);
- selinuxfs_mount = NULL;
+ err = PTR_ERR(selinux_null.mnt);
+ selinux_null.mnt = NULL;
+ return err;
}
+
selinux_null.dentry = d_hash_and_lookup(selinux_null.mnt->mnt_root,
&null_name);
if (IS_ERR(selinux_null.dentry)) {
pr_err("selinuxfs: could not lookup null!\n");
err = PTR_ERR(selinux_null.dentry);
selinux_null.dentry = NULL;
+ return err;
}
return err;
diff --git a/sound/aoa/soundbus/i2sbus/core.c b/sound/aoa/soundbus/i2sbus/core.c
index b8ff5cccd0c811..5431d2c4942106 100644
--- a/sound/aoa/soundbus/i2sbus/core.c
+++ b/sound/aoa/soundbus/i2sbus/core.c
@@ -158,7 +158,7 @@ static int i2sbus_add_dev(struct macio_dev *macio,
struct device_node *child, *sound = NULL;
struct resource *r;
int i, layout = 0, rlen, ok = force;
- char node_name[6];
+ char node_name[8];
static const char *rnames[] = { "i2sbus: %pOFn (control)",
"i2sbus: %pOFn (tx)",
"i2sbus: %pOFn (rx)" };
diff --git a/sound/core/seq/seq_ump_convert.c b/sound/core/seq/seq_ump_convert.c
index b141024830ecc8..ee6ac649df836d 100644
--- a/sound/core/seq/seq_ump_convert.c
+++ b/sound/core/seq/seq_ump_convert.c
@@ -428,7 +428,7 @@ static int cvt_ump_midi2_to_midi1(struct snd_seq_client *dest,
midi1->note.group = midi2->note.group;
midi1->note.status = midi2->note.status;
midi1->note.channel = midi2->note.channel;
- switch (midi2->note.status << 4) {
+ switch (midi2->note.status) {
case UMP_MSG_STATUS_NOTE_ON:
case UMP_MSG_STATUS_NOTE_OFF:
midi1->note.note = midi2->note.note;
diff --git a/sound/hda/intel-nhlt.c b/sound/hda/intel-nhlt.c
index 696a958d93e9c3..088cff799e0bee 100644
--- a/sound/hda/intel-nhlt.c
+++ b/sound/hda/intel-nhlt.c
@@ -343,3 +343,29 @@ intel_nhlt_get_endpoint_blob(struct device *dev, struct nhlt_acpi_table *nhlt,
return NULL;
}
EXPORT_SYMBOL(intel_nhlt_get_endpoint_blob);
+
+int intel_nhlt_ssp_device_type(struct device *dev, struct nhlt_acpi_table *nhlt,
+ u8 virtual_bus_id)
+{
+ struct nhlt_endpoint *epnt;
+ int i;
+
+ if (!nhlt)
+ return -EINVAL;
+
+ epnt = (struct nhlt_endpoint *)nhlt->desc;
+ for (i = 0; i < nhlt->endpoint_count; i++) {
+ /* for SSP link the virtual bus id is the SSP port number */
+ if (epnt->linktype == NHLT_LINK_SSP &&
+ epnt->virtual_bus_id == virtual_bus_id) {
+ dev_dbg(dev, "SSP%d: dev_type=%d\n", virtual_bus_id,
+ epnt->device_type);
+ return epnt->device_type;
+ }
+
+ epnt = (struct nhlt_endpoint *)((u8 *)epnt + epnt->length);
+ }
+
+ return -EINVAL;
+}
+EXPORT_SYMBOL(intel_nhlt_ssp_device_type);
diff --git a/sound/oss/dmasound/dmasound_paula.c b/sound/oss/dmasound/dmasound_paula.c
index 0ba8f0c4cd99a2..3a593da09280dc 100644
--- a/sound/oss/dmasound/dmasound_paula.c
+++ b/sound/oss/dmasound/dmasound_paula.c
@@ -725,7 +725,13 @@ static void __exit amiga_audio_remove(struct platform_device *pdev)
dmasound_deinit();
}
-static struct platform_driver amiga_audio_driver = {
+/*
+ * amiga_audio_remove() lives in .exit.text. For drivers registered via
+ * module_platform_driver_probe() this is ok because they cannot get unbound at
+ * runtime. So mark the driver struct with __refdata to prevent modpost
+ * triggering a section mismatch warning.
+ */
+static struct platform_driver amiga_audio_driver __refdata = {
.remove_new = __exit_p(amiga_audio_remove),
.driver = {
.name = "amiga-audio",
diff --git a/sound/pci/emu10k1/emu10k1_callback.c b/sound/pci/emu10k1/emu10k1_callback.c
index d36234b88fb421..941bfbf812ed30 100644
--- a/sound/pci/emu10k1/emu10k1_callback.c
+++ b/sound/pci/emu10k1/emu10k1_callback.c
@@ -255,7 +255,7 @@ lookup_voices(struct snd_emux *emu, struct snd_emu10k1 *hw,
/* check if sample is finished playing (non-looping only) */
if (bp != best + V_OFF && bp != best + V_FREE &&
(vp->reg.sample_mode & SNDRV_SFNT_SAMPLE_SINGLESHOT)) {
- val = snd_emu10k1_ptr_read(hw, CCCA_CURRADDR, vp->ch) - 64;
+ val = snd_emu10k1_ptr_read(hw, CCCA_CURRADDR, vp->ch);
if (val >= vp->reg.loopstart)
bp = best + V_OFF;
}
@@ -362,7 +362,7 @@ start_voice(struct snd_emux_voice *vp)
map = (hw->silent_page.addr << hw->address_mode) | (hw->address_mode ? MAP_PTI_MASK1 : MAP_PTI_MASK0);
- addr = vp->reg.start + 64;
+ addr = vp->reg.start;
temp = vp->reg.parm.filterQ;
ccca = (temp << 28) | addr;
if (vp->apitch < 0xe400)
@@ -430,9 +430,6 @@ start_voice(struct snd_emux_voice *vp)
/* Q & current address (Q 4bit value, MSB) */
CCCA, ccca,
- /* cache */
- CCR, REG_VAL_PUT(CCR_CACHEINVALIDSIZE, 64),
-
/* reset volume */
VTFT, vtarget | vp->ftarget,
CVCF, vtarget | CVCF_CURRENTFILTER_MASK,
diff --git a/sound/pci/hda/cs35l41_hda_property.c b/sound/pci/hda/cs35l41_hda_property.c
index 72ec872afb8d27..8fb688e4141485 100644
--- a/sound/pci/hda/cs35l41_hda_property.c
+++ b/sound/pci/hda/cs35l41_hda_property.c
@@ -108,7 +108,10 @@ static const struct cs35l41_config cs35l41_config_table[] = {
{ "10431F12", 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 1000, 4500, 24 },
{ "10431F1F", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, -1, 0, 0, 0, 0 },
{ "10431F62", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 0, 0, 0 },
+ { "10433A60", 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 1000, 4500, 24 },
{ "17AA386F", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, -1, -1, 0, 0, 0 },
+ { "17AA3877", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 0, 0, 0 },
+ { "17AA3878", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 0, 0, 0 },
{ "17AA38A9", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 2, -1, 0, 0, 0 },
{ "17AA38AB", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 2, -1, 0, 0, 0 },
{ "17AA38B4", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 0, 0, 0 },
@@ -496,7 +499,10 @@ static const struct cs35l41_prop_model cs35l41_prop_model_table[] = {
{ "CSC3551", "10431F12", generic_dsd_config },
{ "CSC3551", "10431F1F", generic_dsd_config },
{ "CSC3551", "10431F62", generic_dsd_config },
+ { "CSC3551", "10433A60", generic_dsd_config },
{ "CSC3551", "17AA386F", generic_dsd_config },
+ { "CSC3551", "17AA3877", generic_dsd_config },
+ { "CSC3551", "17AA3878", generic_dsd_config },
{ "CSC3551", "17AA38A9", generic_dsd_config },
{ "CSC3551", "17AA38AB", generic_dsd_config },
{ "CSC3551", "17AA38B4", generic_dsd_config },
diff --git a/sound/pci/hda/cs35l56_hda.c b/sound/pci/hda/cs35l56_hda.c
index 41974b3897a723..1a3f84599cb584 100644
--- a/sound/pci/hda/cs35l56_hda.c
+++ b/sound/pci/hda/cs35l56_hda.c
@@ -1024,8 +1024,8 @@ int cs35l56_hda_common_probe(struct cs35l56_hda *cs35l56, int hid, int id)
goto err;
}
- dev_dbg(cs35l56->base.dev, "DSP system name: '%s', amp name: '%s'\n",
- cs35l56->system_name, cs35l56->amp_name);
+ dev_info(cs35l56->base.dev, "DSP system name: '%s', amp name: '%s'\n",
+ cs35l56->system_name, cs35l56->amp_name);
regmap_multi_reg_write(cs35l56->base.regmap, cs35l56_hda_dai_config,
ARRAY_SIZE(cs35l56_hda_dai_config));
@@ -1045,14 +1045,14 @@ int cs35l56_hda_common_probe(struct cs35l56_hda *cs35l56, int hid, int id)
pm_runtime_mark_last_busy(cs35l56->base.dev);
pm_runtime_enable(cs35l56->base.dev);
+ cs35l56->base.init_done = true;
+
ret = component_add(cs35l56->base.dev, &cs35l56_hda_comp_ops);
if (ret) {
dev_err(cs35l56->base.dev, "Register component failed: %d\n", ret);
goto pm_err;
}
- cs35l56->base.init_done = true;
-
return 0;
pm_err:
diff --git a/sound/pci/hda/cs35l56_hda_i2c.c b/sound/pci/hda/cs35l56_hda_i2c.c
index 13beee807308f1..40f2f97944d54c 100644
--- a/sound/pci/hda/cs35l56_hda_i2c.c
+++ b/sound/pci/hda/cs35l56_hda_i2c.c
@@ -56,10 +56,19 @@ static const struct i2c_device_id cs35l56_hda_i2c_id[] = {
{}
};
+static const struct acpi_device_id cs35l56_acpi_hda_match[] = {
+ { "CSC3554", 0 },
+ { "CSC3556", 0 },
+ { "CSC3557", 0 },
+ {}
+};
+MODULE_DEVICE_TABLE(acpi, cs35l56_acpi_hda_match);
+
static struct i2c_driver cs35l56_hda_i2c_driver = {
.driver = {
- .name = "cs35l56-hda",
- .pm = &cs35l56_hda_pm_ops,
+ .name = "cs35l56-hda",
+ .acpi_match_table = cs35l56_acpi_hda_match,
+ .pm = &cs35l56_hda_pm_ops,
},
.id_table = cs35l56_hda_i2c_id,
.probe = cs35l56_hda_i2c_probe,
diff --git a/sound/pci/hda/cs35l56_hda_spi.c b/sound/pci/hda/cs35l56_hda_spi.c
index a3b2fa76663d36..7f02155fe61e3c 100644
--- a/sound/pci/hda/cs35l56_hda_spi.c
+++ b/sound/pci/hda/cs35l56_hda_spi.c
@@ -56,10 +56,19 @@ static const struct spi_device_id cs35l56_hda_spi_id[] = {
{}
};
+static const struct acpi_device_id cs35l56_acpi_hda_match[] = {
+ { "CSC3554", 0 },
+ { "CSC3556", 0 },
+ { "CSC3557", 0 },
+ {}
+};
+MODULE_DEVICE_TABLE(acpi, cs35l56_acpi_hda_match);
+
static struct spi_driver cs35l56_hda_spi_driver = {
.driver = {
- .name = "cs35l56-hda",
- .pm = &cs35l56_hda_pm_ops,
+ .name = "cs35l56-hda",
+ .acpi_match_table = cs35l56_acpi_hda_match,
+ .pm = &cs35l56_hda_pm_ops,
},
.id_table = cs35l56_hda_spi_id,
.probe = cs35l56_hda_spi_probe,
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index a17c36a36aa537..70d80b6af3fe37 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -6875,11 +6875,38 @@ static void alc287_fixup_legion_16ithg6_speakers(struct hda_codec *cdc, const st
comp_generic_fixup(cdc, action, "i2c", "CLSA0101", "-%s:00-cs35l41-hda.%d", 2);
}
+static void cs35l56_fixup_i2c_two(struct hda_codec *cdc, const struct hda_fixup *fix, int action)
+{
+ comp_generic_fixup(cdc, action, "i2c", "CSC3556", "-%s:00-cs35l56-hda.%d", 2);
+}
+
+static void cs35l56_fixup_i2c_four(struct hda_codec *cdc, const struct hda_fixup *fix, int action)
+{
+ comp_generic_fixup(cdc, action, "i2c", "CSC3556", "-%s:00-cs35l56-hda.%d", 4);
+}
+
+static void cs35l56_fixup_spi_two(struct hda_codec *cdc, const struct hda_fixup *fix, int action)
+{
+ comp_generic_fixup(cdc, action, "spi", "CSC3556", "-%s:00-cs35l56-hda.%d", 2);
+}
+
static void cs35l56_fixup_spi_four(struct hda_codec *cdc, const struct hda_fixup *fix, int action)
{
comp_generic_fixup(cdc, action, "spi", "CSC3556", "-%s:00-cs35l56-hda.%d", 4);
}
+static void alc285_fixup_asus_ga403u(struct hda_codec *cdc, const struct hda_fixup *fix, int action)
+{
+ /*
+ * The same SSID has been re-used in different hardware, they have
+ * different codecs and the newer GA403U has a ALC285.
+ */
+ if (cdc->core.vendor_id == 0x10ec0285)
+ cs35l56_fixup_i2c_two(cdc, fix, action);
+ else
+ alc_fixup_inv_dmic(cdc, fix, action);
+}
+
static void tas2781_fixup_i2c(struct hda_codec *cdc,
const struct hda_fixup *fix, int action)
{
@@ -7436,6 +7463,14 @@ enum {
ALC256_FIXUP_ACER_SFG16_MICMUTE_LED,
ALC256_FIXUP_HEADPHONE_AMP_VOL,
ALC245_FIXUP_HP_SPECTRE_X360_EU0XXX,
+ ALC285_FIXUP_CS35L56_SPI_2,
+ ALC285_FIXUP_CS35L56_I2C_2,
+ ALC285_FIXUP_CS35L56_I2C_4,
+ ALC285_FIXUP_ASUS_GA403U,
+ ALC285_FIXUP_ASUS_GA403U_HEADSET_MIC,
+ ALC285_FIXUP_ASUS_GA403U_I2C_SPEAKER2_TO_DAC1,
+ ALC285_FIXUP_ASUS_GU605_SPI_2_HEADSET_MIC,
+ ALC285_FIXUP_ASUS_GU605_SPI_SPEAKER2_TO_DAC1
};
/* A special fixup for Lenovo C940 and Yoga Duet 7;
@@ -9643,6 +9678,54 @@ static const struct hda_fixup alc269_fixups[] = {
.type = HDA_FIXUP_FUNC,
.v.func = alc245_fixup_hp_spectre_x360_eu0xxx,
},
+ [ALC285_FIXUP_CS35L56_SPI_2] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = cs35l56_fixup_spi_two,
+ },
+ [ALC285_FIXUP_CS35L56_I2C_2] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = cs35l56_fixup_i2c_two,
+ },
+ [ALC285_FIXUP_CS35L56_I2C_4] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = cs35l56_fixup_i2c_four,
+ },
+ [ALC285_FIXUP_ASUS_GA403U] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = alc285_fixup_asus_ga403u,
+ },
+ [ALC285_FIXUP_ASUS_GA403U_HEADSET_MIC] = {
+ .type = HDA_FIXUP_PINS,
+ .v.pins = (const struct hda_pintbl[]) {
+ { 0x19, 0x03a11050 },
+ { 0x1b, 0x03a11c30 },
+ { }
+ },
+ .chained = true,
+ .chain_id = ALC285_FIXUP_ASUS_GA403U_I2C_SPEAKER2_TO_DAC1
+ },
+ [ALC285_FIXUP_ASUS_GU605_SPI_SPEAKER2_TO_DAC1] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = alc285_fixup_speaker2_to_dac1,
+ .chained = true,
+ .chain_id = ALC285_FIXUP_ASUS_GU605_SPI_2_HEADSET_MIC,
+ },
+ [ALC285_FIXUP_ASUS_GU605_SPI_2_HEADSET_MIC] = {
+ .type = HDA_FIXUP_PINS,
+ .v.pins = (const struct hda_pintbl[]) {
+ { 0x19, 0x03a11050 },
+ { 0x1b, 0x03a11c30 },
+ { }
+ },
+ .chained = true,
+ .chain_id = ALC285_FIXUP_CS35L56_SPI_2
+ },
+ [ALC285_FIXUP_ASUS_GA403U_I2C_SPEAKER2_TO_DAC1] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = alc285_fixup_speaker2_to_dac1,
+ .chained = true,
+ .chain_id = ALC285_FIXUP_ASUS_GA403U,
+ },
};
static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -10037,6 +10120,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x8ca7, "HP ZBook Fury", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8cdd, "HP Spectre", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x103c, 0x8cde, "HP Spectre", ALC287_FIXUP_CS35L41_I2C_2),
+ SND_PCI_QUIRK(0x103c, 0x8cdf, "HP SnowWhite", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8ce0, "HP SnowWhite", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8cf5, "HP ZBook Studio 16", ALC245_FIXUP_CS35L41_SPI_4_HP_GPIO_LED),
SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC),
SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300),
@@ -10096,7 +10181,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1043, 0x1a83, "ASUS UM5302LA", ALC294_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x1043, 0x1a8f, "ASUS UX582ZS", ALC245_FIXUP_CS35L41_SPI_2),
SND_PCI_QUIRK(0x1043, 0x1b11, "ASUS UX431DA", ALC294_FIXUP_ASUS_COEF_1B),
- SND_PCI_QUIRK(0x1043, 0x1b13, "Asus U41SV", ALC269_FIXUP_INV_DMIC),
+ SND_PCI_QUIRK(0x1043, 0x1b13, "ASUS U41SV/GA403U", ALC285_FIXUP_ASUS_GA403U_HEADSET_MIC),
SND_PCI_QUIRK(0x1043, 0x1b93, "ASUS G614JVR/JIR", ALC245_FIXUP_CS35L41_SPI_2),
SND_PCI_QUIRK(0x1043, 0x1bbd, "ASUS Z550MA", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1043, 0x1c03, "ASUS UM3406HA", ALC287_FIXUP_CS35L41_I2C_2),
@@ -10104,6 +10189,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1043, 0x1c33, "ASUS UX5304MA", ALC245_FIXUP_CS35L41_SPI_2),
SND_PCI_QUIRK(0x1043, 0x1c43, "ASUS UX8406MA", ALC245_FIXUP_CS35L41_SPI_2),
SND_PCI_QUIRK(0x1043, 0x1c62, "ASUS GU603", ALC289_FIXUP_ASUS_GA401),
+ SND_PCI_QUIRK(0x1043, 0x1c63, "ASUS GU605M", ALC285_FIXUP_ASUS_GU605_SPI_SPEAKER2_TO_DAC1),
SND_PCI_QUIRK(0x1043, 0x1c92, "ASUS ROG Strix G15", ALC285_FIXUP_ASUS_G533Z_PINS),
SND_PCI_QUIRK(0x1043, 0x1c9f, "ASUS G614JU/JV/JI", ALC285_FIXUP_ASUS_HEADSET_MIC),
SND_PCI_QUIRK(0x1043, 0x1caf, "ASUS G634JY/JZ/JI/JG", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS),
@@ -10115,11 +10201,14 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1043, 0x1d42, "ASUS Zephyrus G14 2022", ALC289_FIXUP_ASUS_GA401),
SND_PCI_QUIRK(0x1043, 0x1d4e, "ASUS TM420", ALC256_FIXUP_ASUS_HPE),
SND_PCI_QUIRK(0x1043, 0x1da2, "ASUS UP6502ZA/ZD", ALC245_FIXUP_CS35L41_SPI_2),
+ SND_PCI_QUIRK(0x1043, 0x1df3, "ASUS UM5606", ALC285_FIXUP_CS35L56_I2C_4),
SND_PCI_QUIRK(0x1043, 0x1e02, "ASUS UX3402ZA", ALC245_FIXUP_CS35L41_SPI_2),
SND_PCI_QUIRK(0x1043, 0x1e11, "ASUS Zephyrus G15", ALC289_FIXUP_ASUS_GA502),
SND_PCI_QUIRK(0x1043, 0x1e12, "ASUS UM3402", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x1043, 0x1e51, "ASUS Zephyrus M15", ALC294_FIXUP_ASUS_GU502_PINS),
SND_PCI_QUIRK(0x1043, 0x1e5e, "ASUS ROG Strix G513", ALC294_FIXUP_ASUS_G513_PINS),
+ SND_PCI_QUIRK(0x1043, 0x1e63, "ASUS H7606W", ALC285_FIXUP_CS35L56_I2C_2),
+ SND_PCI_QUIRK(0x1043, 0x1e83, "ASUS GA605W", ALC285_FIXUP_CS35L56_I2C_2),
SND_PCI_QUIRK(0x1043, 0x1e8e, "ASUS Zephyrus G15", ALC289_FIXUP_ASUS_GA401),
SND_PCI_QUIRK(0x1043, 0x1ee2, "ASUS UM6702RA/RC", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x1043, 0x1c52, "ASUS Zephyrus G15 2022", ALC289_FIXUP_ASUS_GA401),
@@ -10133,7 +10222,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1043, 0x3a30, "ASUS G814JVR/JIR", ALC245_FIXUP_CS35L41_SPI_2),
SND_PCI_QUIRK(0x1043, 0x3a40, "ASUS G814JZR", ALC245_FIXUP_CS35L41_SPI_2),
SND_PCI_QUIRK(0x1043, 0x3a50, "ASUS G834JYR/JZR", ALC245_FIXUP_CS35L41_SPI_2),
- SND_PCI_QUIRK(0x1043, 0x3a60, "ASUS G634JYR/JZR", ALC245_FIXUP_CS35L41_SPI_2),
+ SND_PCI_QUIRK(0x1043, 0x3a60, "ASUS G634JYR/JZR", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS),
SND_PCI_QUIRK(0x1043, 0x831a, "ASUS P901", ALC269_FIXUP_STEREO_DMIC),
SND_PCI_QUIRK(0x1043, 0x834a, "ASUS S101", ALC269_FIXUP_STEREO_DMIC),
SND_PCI_QUIRK(0x1043, 0x8398, "ASUS P1005", ALC269_FIXUP_STEREO_DMIC),
@@ -10159,7 +10248,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x10ec, 0x1254, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK),
SND_PCI_QUIRK(0x10ec, 0x12cc, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK),
SND_PCI_QUIRK(0x10ec, 0x12f6, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK),
- SND_PCI_QUIRK(0x10f7, 0x8338, "Panasonic CF-SZ6", ALC269_FIXUP_HEADSET_MODE),
+ SND_PCI_QUIRK(0x10f7, 0x8338, "Panasonic CF-SZ6", ALC269_FIXUP_ASPIRE_HEADSET_MIC),
SND_PCI_QUIRK(0x144d, 0xc109, "Samsung Ativ book 9 (NP900X3G)", ALC269_FIXUP_INV_DMIC),
SND_PCI_QUIRK(0x144d, 0xc169, "Samsung Notebook 9 Pen (NP930SBE-K01US)", ALC298_FIXUP_SAMSUNG_AMP),
SND_PCI_QUIRK(0x144d, 0xc176, "Samsung Notebook 9 Pro (NP930MBE-K04US)", ALC298_FIXUP_SAMSUNG_AMP),
@@ -10177,6 +10266,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1462, 0xb120, "MSI Cubi MS-B120", ALC283_FIXUP_HEADSET_MIC),
SND_PCI_QUIRK(0x1462, 0xb171, "Cubi N 8GL (MS-B171)", ALC283_FIXUP_HEADSET_MIC),
SND_PCI_QUIRK(0x152d, 0x1082, "Quanta NL3", ALC269_FIXUP_LIFEBOOK),
+ SND_PCI_QUIRK(0x152d, 0x1262, "Huawei NBLB-WAX9N", ALC2XX_FIXUP_HEADSET_MIC),
SND_PCI_QUIRK(0x1558, 0x0353, "Clevo V35[05]SN[CDE]Q", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1558, 0x1323, "Clevo N130ZU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1558, 0x1325, "Clevo N15[01][CW]U", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
@@ -10282,6 +10372,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x17aa, 0x222e, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
SND_PCI_QUIRK(0x17aa, 0x2231, "Thinkpad T560", ALC292_FIXUP_TPT460),
SND_PCI_QUIRK(0x17aa, 0x2233, "Thinkpad", ALC292_FIXUP_TPT460),
+ SND_PCI_QUIRK(0x17aa, 0x2234, "Thinkpad ICE-1", ALC287_FIXUP_TAS2781_I2C),
SND_PCI_QUIRK(0x17aa, 0x2245, "Thinkpad T470", ALC298_FIXUP_TPT470_DOCK),
SND_PCI_QUIRK(0x17aa, 0x2246, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
SND_PCI_QUIRK(0x17aa, 0x2247, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
@@ -10333,6 +10424,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x17aa, 0x3869, "Lenovo Yoga7 14IAL7", ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN),
SND_PCI_QUIRK(0x17aa, 0x386f, "Legion 7i 16IAX7", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x17aa, 0x3870, "Lenovo Yoga 7 14ARB7", ALC287_FIXUP_YOGA7_14ARB7_I2C),
+ SND_PCI_QUIRK(0x17aa, 0x3877, "Lenovo Legion 7 Slim 16ARHA7", ALC287_FIXUP_CS35L41_I2C_2),
+ SND_PCI_QUIRK(0x17aa, 0x3878, "Lenovo Legion 7 Slim 16ARHA7", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x17aa, 0x387d, "Yoga S780-16 pro Quad AAC", ALC287_FIXUP_TAS2781_I2C),
SND_PCI_QUIRK(0x17aa, 0x387e, "Yoga S780-16 pro Quad YC", ALC287_FIXUP_TAS2781_I2C),
SND_PCI_QUIRK(0x17aa, 0x3881, "YB9 dual power mode2 YC", ALC287_FIXUP_TAS2781_I2C),
@@ -10341,8 +10434,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x17aa, 0x3886, "Y780 VECO DUAL", ALC287_FIXUP_TAS2781_I2C),
SND_PCI_QUIRK(0x17aa, 0x38a7, "Y780P AMD YG dual", ALC287_FIXUP_TAS2781_I2C),
SND_PCI_QUIRK(0x17aa, 0x38a8, "Y780P AMD VECO dual", ALC287_FIXUP_TAS2781_I2C),
- SND_PCI_QUIRK(0x17aa, 0x38a9, "Thinkbook 16P", ALC287_FIXUP_CS35L41_I2C_2),
- SND_PCI_QUIRK(0x17aa, 0x38ab, "Thinkbook 16P", ALC287_FIXUP_CS35L41_I2C_2),
+ SND_PCI_QUIRK(0x17aa, 0x38a9, "Thinkbook 16P", ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD),
+ SND_PCI_QUIRK(0x17aa, 0x38ab, "Thinkbook 16P", ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD),
SND_PCI_QUIRK(0x17aa, 0x38b4, "Legion Slim 7 16IRH8", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x17aa, 0x38b5, "Legion Slim 7 16IRH8", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x17aa, 0x38b6, "Legion Slim 7 16APH8", ALC287_FIXUP_CS35L41_I2C_2),
@@ -10403,6 +10496,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1d05, 0x1147, "TongFang GMxTGxx", ALC269_FIXUP_NO_SHUTUP),
SND_PCI_QUIRK(0x1d05, 0x115c, "TongFang GMxTGxx", ALC269_FIXUP_NO_SHUTUP),
SND_PCI_QUIRK(0x1d05, 0x121b, "TongFang GMxAGxx", ALC269_FIXUP_NO_SHUTUP),
+ SND_PCI_QUIRK(0x1d05, 0x1387, "TongFang GMxIXxx", ALC2XX_FIXUP_HEADSET_MIC),
+ SND_PCI_QUIRK(0x1d17, 0x3288, "Haier Boyue G42", ALC269VC_FIXUP_ACER_VCOPPERBOX_PINS),
SND_PCI_QUIRK(0x1d72, 0x1602, "RedmiBook", ALC255_FIXUP_XIAOMI_HEADSET_MIC),
SND_PCI_QUIRK(0x1d72, 0x1701, "XiaomiNotebook Pro", ALC298_FIXUP_DELL1_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1d72, 0x1901, "RedmiBook 14", ALC256_FIXUP_ASUS_HEADSET_MIC),
diff --git a/sound/pci/hda/tas2781_hda_i2c.c b/sound/pci/hda/tas2781_hda_i2c.c
index 4475cea8e9f703..75f7674c66ee7a 100644
--- a/sound/pci/hda/tas2781_hda_i2c.c
+++ b/sound/pci/hda/tas2781_hda_i2c.c
@@ -89,7 +89,7 @@ struct tas2781_hda {
struct snd_kcontrol *dsp_prog_ctl;
struct snd_kcontrol *dsp_conf_ctl;
struct snd_kcontrol *prof_ctl;
- struct snd_kcontrol *snd_ctls[3];
+ struct snd_kcontrol *snd_ctls[2];
};
static int tas2781_get_i2c_res(struct acpi_resource *ares, void *data)
@@ -161,8 +161,6 @@ static void tas2781_hda_playback_hook(struct device *dev, int action)
pm_runtime_put_autosuspend(dev);
break;
default:
- dev_dbg(tas_hda->dev, "Playback action not supported: %d\n",
- action);
break;
}
}
@@ -185,8 +183,15 @@ static int tasdevice_get_profile_id(struct snd_kcontrol *kcontrol,
{
struct tasdevice_priv *tas_priv = snd_kcontrol_chip(kcontrol);
+ mutex_lock(&tas_priv->codec_lock);
+
ucontrol->value.integer.value[0] = tas_priv->rcabin.profile_cfg_id;
+ dev_dbg(tas_priv->dev, "%s: kcontrol %s: %d\n",
+ __func__, kcontrol->id.name, tas_priv->rcabin.profile_cfg_id);
+
+ mutex_unlock(&tas_priv->codec_lock);
+
return 0;
}
@@ -200,11 +205,19 @@ static int tasdevice_set_profile_id(struct snd_kcontrol *kcontrol,
val = clamp(nr_profile, 0, max);
+ mutex_lock(&tas_priv->codec_lock);
+
+ dev_dbg(tas_priv->dev, "%s: kcontrol %s: %d -> %d\n",
+ __func__, kcontrol->id.name,
+ tas_priv->rcabin.profile_cfg_id, val);
+
if (tas_priv->rcabin.profile_cfg_id != val) {
tas_priv->rcabin.profile_cfg_id = val;
ret = 1;
}
+ mutex_unlock(&tas_priv->codec_lock);
+
return ret;
}
@@ -241,8 +254,15 @@ static int tasdevice_program_get(struct snd_kcontrol *kcontrol,
{
struct tasdevice_priv *tas_priv = snd_kcontrol_chip(kcontrol);
+ mutex_lock(&tas_priv->codec_lock);
+
ucontrol->value.integer.value[0] = tas_priv->cur_prog;
+ dev_dbg(tas_priv->dev, "%s: kcontrol %s: %d\n",
+ __func__, kcontrol->id.name, tas_priv->cur_prog);
+
+ mutex_unlock(&tas_priv->codec_lock);
+
return 0;
}
@@ -257,11 +277,18 @@ static int tasdevice_program_put(struct snd_kcontrol *kcontrol,
val = clamp(nr_program, 0, max);
+ mutex_lock(&tas_priv->codec_lock);
+
+ dev_dbg(tas_priv->dev, "%s: kcontrol %s: %d -> %d\n",
+ __func__, kcontrol->id.name, tas_priv->cur_prog, val);
+
if (tas_priv->cur_prog != val) {
tas_priv->cur_prog = val;
ret = 1;
}
+ mutex_unlock(&tas_priv->codec_lock);
+
return ret;
}
@@ -270,8 +297,15 @@ static int tasdevice_config_get(struct snd_kcontrol *kcontrol,
{
struct tasdevice_priv *tas_priv = snd_kcontrol_chip(kcontrol);
+ mutex_lock(&tas_priv->codec_lock);
+
ucontrol->value.integer.value[0] = tas_priv->cur_conf;
+ dev_dbg(tas_priv->dev, "%s: kcontrol %s: %d\n",
+ __func__, kcontrol->id.name, tas_priv->cur_conf);
+
+ mutex_unlock(&tas_priv->codec_lock);
+
return 0;
}
@@ -286,54 +320,39 @@ static int tasdevice_config_put(struct snd_kcontrol *kcontrol,
val = clamp(nr_config, 0, max);
+ mutex_lock(&tas_priv->codec_lock);
+
+ dev_dbg(tas_priv->dev, "%s: kcontrol %s: %d -> %d\n",
+ __func__, kcontrol->id.name, tas_priv->cur_conf, val);
+
if (tas_priv->cur_conf != val) {
tas_priv->cur_conf = val;
ret = 1;
}
+ mutex_unlock(&tas_priv->codec_lock);
+
return ret;
}
-/*
- * tas2781_digital_getvol - get the volum control
- * @kcontrol: control pointer
- * @ucontrol: User data
- * Customer Kcontrol for tas2781 is primarily for regmap booking, paging
- * depends on internal regmap mechanism.
- * tas2781 contains book and page two-level register map, especially
- * book switching will set the register BXXP00R7F, after switching to the
- * correct book, then leverage the mechanism for paging to access the
- * register.
- */
-static int tas2781_digital_getvol(struct snd_kcontrol *kcontrol,
+static int tas2781_amp_getvol(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_value *ucontrol)
{
struct tasdevice_priv *tas_priv = snd_kcontrol_chip(kcontrol);
struct soc_mixer_control *mc =
(struct soc_mixer_control *)kcontrol->private_value;
+ int ret;
- return tasdevice_digital_getvol(tas_priv, ucontrol, mc);
-}
+ mutex_lock(&tas_priv->codec_lock);
-static int tas2781_amp_getvol(struct snd_kcontrol *kcontrol,
- struct snd_ctl_elem_value *ucontrol)
-{
- struct tasdevice_priv *tas_priv = snd_kcontrol_chip(kcontrol);
- struct soc_mixer_control *mc =
- (struct soc_mixer_control *)kcontrol->private_value;
+ ret = tasdevice_amp_getvol(tas_priv, ucontrol, mc);
- return tasdevice_amp_getvol(tas_priv, ucontrol, mc);
-}
+ dev_dbg(tas_priv->dev, "%s: kcontrol %s: %ld\n",
+ __func__, kcontrol->id.name, ucontrol->value.integer.value[0]);
-static int tas2781_digital_putvol(struct snd_kcontrol *kcontrol,
- struct snd_ctl_elem_value *ucontrol)
-{
- struct tasdevice_priv *tas_priv = snd_kcontrol_chip(kcontrol);
- struct soc_mixer_control *mc =
- (struct soc_mixer_control *)kcontrol->private_value;
+ mutex_unlock(&tas_priv->codec_lock);
- /* The check of the given value is in tasdevice_digital_putvol. */
- return tasdevice_digital_putvol(tas_priv, ucontrol, mc);
+ return ret;
}
static int tas2781_amp_putvol(struct snd_kcontrol *kcontrol,
@@ -342,9 +361,19 @@ static int tas2781_amp_putvol(struct snd_kcontrol *kcontrol,
struct tasdevice_priv *tas_priv = snd_kcontrol_chip(kcontrol);
struct soc_mixer_control *mc =
(struct soc_mixer_control *)kcontrol->private_value;
+ int ret;
+
+ mutex_lock(&tas_priv->codec_lock);
+
+ dev_dbg(tas_priv->dev, "%s: kcontrol %s: -> %ld\n",
+ __func__, kcontrol->id.name, ucontrol->value.integer.value[0]);
/* The check of the given value is in tasdevice_amp_putvol. */
- return tasdevice_amp_putvol(tas_priv, ucontrol, mc);
+ ret = tasdevice_amp_putvol(tas_priv, ucontrol, mc);
+
+ mutex_unlock(&tas_priv->codec_lock);
+
+ return ret;
}
static int tas2781_force_fwload_get(struct snd_kcontrol *kcontrol,
@@ -352,9 +381,13 @@ static int tas2781_force_fwload_get(struct snd_kcontrol *kcontrol,
{
struct tasdevice_priv *tas_priv = snd_kcontrol_chip(kcontrol);
+ mutex_lock(&tas_priv->codec_lock);
+
ucontrol->value.integer.value[0] = (int)tas_priv->force_fwload_status;
- dev_dbg(tas_priv->dev, "%s : Force FWload %s\n", __func__,
- tas_priv->force_fwload_status ? "ON" : "OFF");
+ dev_dbg(tas_priv->dev, "%s: kcontrol %s: %d\n",
+ __func__, kcontrol->id.name, tas_priv->force_fwload_status);
+
+ mutex_unlock(&tas_priv->codec_lock);
return 0;
}
@@ -365,14 +398,20 @@ static int tas2781_force_fwload_put(struct snd_kcontrol *kcontrol,
struct tasdevice_priv *tas_priv = snd_kcontrol_chip(kcontrol);
bool change, val = (bool)ucontrol->value.integer.value[0];
+ mutex_lock(&tas_priv->codec_lock);
+
+ dev_dbg(tas_priv->dev, "%s: kcontrol %s: %d -> %d\n",
+ __func__, kcontrol->id.name,
+ tas_priv->force_fwload_status, val);
+
if (tas_priv->force_fwload_status == val)
change = false;
else {
change = true;
tas_priv->force_fwload_status = val;
}
- dev_dbg(tas_priv->dev, "%s : Force FWload %s\n", __func__,
- tas_priv->force_fwload_status ? "ON" : "OFF");
+
+ mutex_unlock(&tas_priv->codec_lock);
return change;
}
@@ -381,9 +420,6 @@ static const struct snd_kcontrol_new tas2781_snd_controls[] = {
ACARD_SINGLE_RANGE_EXT_TLV("Speaker Analog Gain", TAS2781_AMP_LEVEL,
1, 0, 20, 0, tas2781_amp_getvol,
tas2781_amp_putvol, amp_vol_tlv),
- ACARD_SINGLE_RANGE_EXT_TLV("Speaker Digital Gain", TAS2781_DVC_LVL,
- 0, 0, 200, 1, tas2781_digital_getvol,
- tas2781_digital_putvol, dvc_tlv),
ACARD_SINGLE_BOOL_EXT("Speaker Force Firmware Load", 0,
tas2781_force_fwload_get, tas2781_force_fwload_put),
};
@@ -478,10 +514,10 @@ static int tas2563_save_calibration(struct tasdevice_priv *tas_priv)
static void tas2781_apply_calib(struct tasdevice_priv *tas_priv)
{
static const unsigned char page_array[CALIB_MAX] = {
- 0x17, 0x18, 0x18, 0x0d, 0x18
+ 0x17, 0x18, 0x18, 0x13, 0x18,
};
static const unsigned char rgno_array[CALIB_MAX] = {
- 0x74, 0x0c, 0x14, 0x3c, 0x7c
+ 0x74, 0x0c, 0x14, 0x70, 0x7c,
};
unsigned char *data;
int i, j, rc;
diff --git a/sound/sh/aica.c b/sound/sh/aica.c
index 320ac792c7fe24..3182c634464d42 100644
--- a/sound/sh/aica.c
+++ b/sound/sh/aica.c
@@ -278,7 +278,8 @@ static void run_spu_dma(struct work_struct *work)
dreamcastcard->clicks++;
if (unlikely(dreamcastcard->clicks >= AICA_PERIOD_NUMBER))
dreamcastcard->clicks %= AICA_PERIOD_NUMBER;
- mod_timer(&dreamcastcard->timer, jiffies + 1);
+ if (snd_pcm_running(dreamcastcard->substream))
+ mod_timer(&dreamcastcard->timer, jiffies + 1);
}
}
@@ -290,6 +291,8 @@ static void aica_period_elapsed(struct timer_list *t)
/*timer function - so cannot sleep */
int play_period;
struct snd_pcm_runtime *runtime;
+ if (!snd_pcm_running(substream))
+ return;
runtime = substream->runtime;
dreamcastcard = substream->pcm->private_data;
/* Have we played out an additional period? */
@@ -350,12 +353,19 @@ static int snd_aicapcm_pcm_open(struct snd_pcm_substream
return 0;
}
+static int snd_aicapcm_pcm_sync_stop(struct snd_pcm_substream *substream)
+{
+ struct snd_card_aica *dreamcastcard = substream->pcm->private_data;
+
+ del_timer_sync(&dreamcastcard->timer);
+ cancel_work_sync(&dreamcastcard->spu_dma_work);
+ return 0;
+}
+
static int snd_aicapcm_pcm_close(struct snd_pcm_substream
*substream)
{
struct snd_card_aica *dreamcastcard = substream->pcm->private_data;
- flush_work(&(dreamcastcard->spu_dma_work));
- del_timer(&dreamcastcard->timer);
dreamcastcard->substream = NULL;
kfree(dreamcastcard->channel);
spu_disable();
@@ -401,6 +411,7 @@ static const struct snd_pcm_ops snd_aicapcm_playback_ops = {
.prepare = snd_aicapcm_pcm_prepare,
.trigger = snd_aicapcm_pcm_trigger,
.pointer = snd_aicapcm_pcm_pointer,
+ .sync_stop = snd_aicapcm_pcm_sync_stop,
};
/* TO DO: set up to handle more than one pcm instance */
diff --git a/sound/soc/amd/acp/acp-pci.c b/sound/soc/amd/acp/acp-pci.c
index 8c8b1dcac6281c..5f35b90eab8d3f 100644
--- a/sound/soc/amd/acp/acp-pci.c
+++ b/sound/soc/amd/acp/acp-pci.c
@@ -115,7 +115,10 @@ static int acp_pci_probe(struct pci_dev *pci, const struct pci_device_id *pci_id
goto unregister_dmic_dev;
}
- acp_init(chip);
+ ret = acp_init(chip);
+ if (ret)
+ goto unregister_dmic_dev;
+
res = devm_kcalloc(&pci->dev, num_res, sizeof(struct resource), GFP_KERNEL);
if (!res) {
ret = -ENOMEM;
@@ -133,11 +136,9 @@ static int acp_pci_probe(struct pci_dev *pci, const struct pci_device_id *pci_id
}
}
- if (flag == FLAG_AMD_LEGACY_ONLY_DMIC) {
- ret = check_acp_pdm(pci, chip);
- if (ret < 0)
- goto skip_pdev_creation;
- }
+ ret = check_acp_pdm(pci, chip);
+ if (ret < 0)
+ goto skip_pdev_creation;
chip->flag = flag;
memset(&pdevinfo, 0, sizeof(pdevinfo));
diff --git a/sound/soc/codecs/cs-amp-lib.c b/sound/soc/codecs/cs-amp-lib.c
index 01ef4db5407da5..287ac01a387357 100644
--- a/sound/soc/codecs/cs-amp-lib.c
+++ b/sound/soc/codecs/cs-amp-lib.c
@@ -56,6 +56,11 @@ static int _cs_amp_write_cal_coeffs(struct cs_dsp *dsp,
dev_dbg(dsp->dev, "Calibration: Ambient=%#x, Status=%#x, CalR=%d\n",
data->calAmbient, data->calStatus, data->calR);
+ if (list_empty(&dsp->ctl_list)) {
+ dev_info(dsp->dev, "Calibration disabled due to missing firmware controls\n");
+ return -ENOENT;
+ }
+
ret = cs_amp_write_cal_coeff(dsp, controls, controls->ambient, data->calAmbient);
if (ret)
return ret;
diff --git a/sound/soc/codecs/cs42l43.c b/sound/soc/codecs/cs42l43.c
index 860d5cda67bffe..94685449f0f48c 100644
--- a/sound/soc/codecs/cs42l43.c
+++ b/sound/soc/codecs/cs42l43.c
@@ -2364,7 +2364,8 @@ static int cs42l43_codec_runtime_resume(struct device *dev)
static int cs42l43_codec_suspend(struct device *dev)
{
- struct cs42l43 *cs42l43 = dev_get_drvdata(dev);
+ struct cs42l43_codec *priv = dev_get_drvdata(dev);
+ struct cs42l43 *cs42l43 = priv->core;
disable_irq(cs42l43->irq);
@@ -2373,7 +2374,8 @@ static int cs42l43_codec_suspend(struct device *dev)
static int cs42l43_codec_suspend_noirq(struct device *dev)
{
- struct cs42l43 *cs42l43 = dev_get_drvdata(dev);
+ struct cs42l43_codec *priv = dev_get_drvdata(dev);
+ struct cs42l43 *cs42l43 = priv->core;
enable_irq(cs42l43->irq);
@@ -2382,7 +2384,8 @@ static int cs42l43_codec_suspend_noirq(struct device *dev)
static int cs42l43_codec_resume(struct device *dev)
{
- struct cs42l43 *cs42l43 = dev_get_drvdata(dev);
+ struct cs42l43_codec *priv = dev_get_drvdata(dev);
+ struct cs42l43 *cs42l43 = priv->core;
enable_irq(cs42l43->irq);
@@ -2391,7 +2394,8 @@ static int cs42l43_codec_resume(struct device *dev)
static int cs42l43_codec_resume_noirq(struct device *dev)
{
- struct cs42l43 *cs42l43 = dev_get_drvdata(dev);
+ struct cs42l43_codec *priv = dev_get_drvdata(dev);
+ struct cs42l43 *cs42l43 = priv->core;
disable_irq(cs42l43->irq);
diff --git a/sound/soc/codecs/es8326.c b/sound/soc/codecs/es8326.c
index 15289dadafea09..17bd6b5160772e 100644
--- a/sound/soc/codecs/es8326.c
+++ b/sound/soc/codecs/es8326.c
@@ -412,9 +412,9 @@ static const struct _coeff_div coeff_div_v3[] = {
{125, 48000, 6000000, 0x04, 0x04, 0x1F, 0x2D, 0x8A, 0x0A, 0x27, 0x27},
{128, 8000, 1024000, 0x60, 0x00, 0x05, 0x75, 0x8A, 0x1B, 0x1F, 0x7F},
- {128, 16000, 2048000, 0x20, 0x00, 0x31, 0x35, 0x8A, 0x1B, 0x1F, 0x3F},
- {128, 44100, 5644800, 0xE0, 0x00, 0x01, 0x2D, 0xCA, 0x0A, 0x1F, 0x1F},
- {128, 48000, 6144000, 0xE0, 0x00, 0x01, 0x2D, 0xCA, 0x0A, 0x1F, 0x1F},
+ {128, 16000, 2048000, 0x20, 0x00, 0x31, 0x35, 0x08, 0x19, 0x1F, 0x3F},
+ {128, 44100, 5644800, 0xE0, 0x00, 0x01, 0x2D, 0x48, 0x08, 0x1F, 0x1F},
+ {128, 48000, 6144000, 0xE0, 0x00, 0x01, 0x2D, 0x48, 0x08, 0x1F, 0x1F},
{144, 8000, 1152000, 0x20, 0x00, 0x03, 0x35, 0x8A, 0x1B, 0x23, 0x47},
{144, 16000, 2304000, 0x20, 0x00, 0x11, 0x35, 0x8A, 0x1B, 0x23, 0x47},
{192, 8000, 1536000, 0x60, 0x02, 0x0D, 0x75, 0x8A, 0x1B, 0x1F, 0x7F},
@@ -423,10 +423,10 @@ static const struct _coeff_div coeff_div_v3[] = {
{200, 48000, 9600000, 0x04, 0x04, 0x0F, 0x2D, 0xCA, 0x0A, 0x1F, 0x1F},
{250, 48000, 12000000, 0x04, 0x04, 0x0F, 0x2D, 0xCA, 0x0A, 0x27, 0x27},
- {256, 8000, 2048000, 0x60, 0x00, 0x31, 0x35, 0x8A, 0x1B, 0x1F, 0x7F},
- {256, 16000, 4096000, 0x20, 0x00, 0x01, 0x35, 0x8A, 0x1B, 0x1F, 0x3F},
- {256, 44100, 11289600, 0xE0, 0x00, 0x30, 0x2D, 0xCA, 0x0A, 0x1F, 0x1F},
- {256, 48000, 12288000, 0xE0, 0x00, 0x30, 0x2D, 0xCA, 0x0A, 0x1F, 0x1F},
+ {256, 8000, 2048000, 0x60, 0x00, 0x31, 0x35, 0x08, 0x19, 0x1F, 0x7F},
+ {256, 16000, 4096000, 0x20, 0x00, 0x01, 0x35, 0x08, 0x19, 0x1F, 0x3F},
+ {256, 44100, 11289600, 0xE0, 0x01, 0x01, 0x2D, 0x48, 0x08, 0x1F, 0x1F},
+ {256, 48000, 12288000, 0xE0, 0x01, 0x01, 0x2D, 0x48, 0x08, 0x1F, 0x1F},
{288, 8000, 2304000, 0x20, 0x00, 0x01, 0x35, 0x8A, 0x1B, 0x23, 0x47},
{384, 8000, 3072000, 0x60, 0x02, 0x05, 0x75, 0x8A, 0x1B, 0x1F, 0x7F},
{384, 16000, 6144000, 0x20, 0x02, 0x03, 0x35, 0x8A, 0x1B, 0x1F, 0x3F},
@@ -435,10 +435,10 @@ static const struct _coeff_div coeff_div_v3[] = {
{400, 48000, 19200000, 0xE4, 0x04, 0x35, 0x6d, 0xCA, 0x0A, 0x1F, 0x1F},
{500, 48000, 24000000, 0xF8, 0x04, 0x3F, 0x6D, 0xCA, 0x0A, 0x1F, 0x1F},
- {512, 8000, 4096000, 0x60, 0x00, 0x01, 0x35, 0x8A, 0x1B, 0x1F, 0x7F},
- {512, 16000, 8192000, 0x20, 0x00, 0x30, 0x35, 0x8A, 0x1B, 0x1F, 0x3F},
- {512, 44100, 22579200, 0xE0, 0x00, 0x00, 0x2D, 0xCA, 0x0A, 0x1F, 0x1F},
- {512, 48000, 24576000, 0xE0, 0x00, 0x00, 0x2D, 0xCA, 0x0A, 0x1F, 0x1F},
+ {512, 8000, 4096000, 0x60, 0x00, 0x01, 0x08, 0x19, 0x1B, 0x1F, 0x7F},
+ {512, 16000, 8192000, 0x20, 0x00, 0x30, 0x35, 0x08, 0x19, 0x1F, 0x3F},
+ {512, 44100, 22579200, 0xE0, 0x00, 0x00, 0x2D, 0x48, 0x08, 0x1F, 0x1F},
+ {512, 48000, 24576000, 0xE0, 0x00, 0x00, 0x2D, 0x48, 0x08, 0x1F, 0x1F},
{768, 8000, 6144000, 0x60, 0x02, 0x11, 0x35, 0x8A, 0x1B, 0x1F, 0x7F},
{768, 16000, 12288000, 0x20, 0x02, 0x01, 0x35, 0x8A, 0x1B, 0x1F, 0x3F},
{768, 32000, 24576000, 0xE0, 0x02, 0x30, 0x2D, 0xCA, 0x0A, 0x1F, 0x1F},
@@ -835,7 +835,6 @@ static void es8326_jack_detect_handler(struct work_struct *work)
dev_dbg(comp->dev, "Report hp remove event\n");
snd_soc_jack_report(es8326->jack, 0, SND_JACK_HEADSET);
/* mute adc when mic path switch */
- regmap_write(es8326->regmap, ES8326_ADC_SCALE, 0x33);
regmap_write(es8326->regmap, ES8326_ADC1_SRC, 0x44);
regmap_write(es8326->regmap, ES8326_ADC2_SRC, 0x66);
es8326->hp = 0;
@@ -843,6 +842,7 @@ static void es8326_jack_detect_handler(struct work_struct *work)
regmap_update_bits(es8326->regmap, ES8326_HPDET_TYPE, 0x03, 0x01);
regmap_write(es8326->regmap, ES8326_SYS_BIAS, 0x0a);
regmap_update_bits(es8326->regmap, ES8326_HP_DRIVER_REF, 0x0f, 0x03);
+ regmap_write(es8326->regmap, ES8326_INT_SOURCE, ES8326_INT_SRC_PIN9);
/*
* Inverted HPJACK_POL bit to trigger one IRQ to double check HP Removal event
*/
@@ -865,6 +865,8 @@ static void es8326_jack_detect_handler(struct work_struct *work)
* set auto-check mode, then restart jack_detect_work after 400ms.
* Don't report jack status.
*/
+ regmap_write(es8326->regmap, ES8326_INT_SOURCE,
+ (ES8326_INT_SRC_PIN9 | ES8326_INT_SRC_BUTTON));
regmap_update_bits(es8326->regmap, ES8326_HPDET_TYPE, 0x03, 0x01);
es8326_enable_micbias(es8326->component);
usleep_range(50000, 70000);
@@ -891,7 +893,6 @@ static void es8326_jack_detect_handler(struct work_struct *work)
snd_soc_jack_report(es8326->jack,
SND_JACK_HEADSET, SND_JACK_HEADSET);
- regmap_write(es8326->regmap, ES8326_ADC_SCALE, 0x33);
regmap_update_bits(es8326->regmap, ES8326_PGA_PDN,
0x08, 0x08);
regmap_update_bits(es8326->regmap, ES8326_PGAGAIN,
@@ -987,7 +988,7 @@ static int es8326_resume(struct snd_soc_component *component)
regmap_write(es8326->regmap, ES8326_VMIDSEL, 0x0E);
regmap_write(es8326->regmap, ES8326_ANA_LP, 0xf0);
usleep_range(10000, 15000);
- regmap_write(es8326->regmap, ES8326_HPJACK_TIMER, 0xe9);
+ regmap_write(es8326->regmap, ES8326_HPJACK_TIMER, 0xd9);
regmap_write(es8326->regmap, ES8326_ANA_MICBIAS, 0xcb);
/* set headphone default type and detect pin */
regmap_write(es8326->regmap, ES8326_HPDET_TYPE, 0x83);
@@ -1038,8 +1039,7 @@ static int es8326_resume(struct snd_soc_component *component)
es8326_enable_micbias(es8326->component);
usleep_range(50000, 70000);
regmap_update_bits(es8326->regmap, ES8326_HPDET_TYPE, 0x03, 0x00);
- regmap_write(es8326->regmap, ES8326_INT_SOURCE,
- (ES8326_INT_SRC_PIN9 | ES8326_INT_SRC_BUTTON));
+ regmap_write(es8326->regmap, ES8326_INT_SOURCE, ES8326_INT_SRC_PIN9);
regmap_write(es8326->regmap, ES8326_INTOUT_IO,
es8326->interrupt_clk);
regmap_write(es8326->regmap, ES8326_SDINOUT1_IO,
@@ -1060,6 +1060,8 @@ static int es8326_resume(struct snd_soc_component *component)
es8326->hp = 0;
es8326->hpl_vol = 0x03;
es8326->hpr_vol = 0x03;
+
+ es8326_irq(es8326->irq, es8326);
return 0;
}
@@ -1070,6 +1072,9 @@ static int es8326_suspend(struct snd_soc_component *component)
cancel_delayed_work_sync(&es8326->jack_detect_work);
es8326_disable_micbias(component);
es8326->calibrated = false;
+ regmap_write(es8326->regmap, ES8326_CLK_MUX, 0x2d);
+ regmap_write(es8326->regmap, ES8326_DAC2HPMIX, 0x00);
+ regmap_write(es8326->regmap, ES8326_ANA_PDN, 0x3b);
regmap_write(es8326->regmap, ES8326_CLK_CTL, ES8326_CLK_OFF);
regcache_cache_only(es8326->regmap, true);
regcache_mark_dirty(es8326->regmap);
diff --git a/sound/soc/codecs/es8326.h b/sound/soc/codecs/es8326.h
index ee12caef810532..c3e52e7bdef57d 100644
--- a/sound/soc/codecs/es8326.h
+++ b/sound/soc/codecs/es8326.h
@@ -104,7 +104,7 @@
#define ES8326_MUTE (3 << 0)
/* ES8326_CLK_CTL */
-#define ES8326_CLK_ON (0x7e << 0)
+#define ES8326_CLK_ON (0x7f << 0)
#define ES8326_CLK_OFF (0 << 0)
/* ES8326_CLK_INV */
diff --git a/sound/soc/codecs/rt1316-sdw.c b/sound/soc/codecs/rt1316-sdw.c
index 47511f70119ae3..0b3bf920bcab23 100644
--- a/sound/soc/codecs/rt1316-sdw.c
+++ b/sound/soc/codecs/rt1316-sdw.c
@@ -537,7 +537,7 @@ static int rt1316_sdw_hw_params(struct snd_pcm_substream *substream,
retval = sdw_stream_add_slave(rt1316->sdw_slave, &stream_config,
&port_config, 1, sdw_stream);
if (retval) {
- dev_err(dai->dev, "Unable to configure port\n");
+ dev_err(dai->dev, "%s: Unable to configure port\n", __func__);
return retval;
}
@@ -577,12 +577,12 @@ static int rt1316_sdw_parse_dt(struct rt1316_sdw_priv *rt1316, struct device *de
if (rt1316->bq_params_cnt) {
rt1316->bq_params = devm_kzalloc(dev, rt1316->bq_params_cnt, GFP_KERNEL);
if (!rt1316->bq_params) {
- dev_err(dev, "Could not allocate bq_params memory\n");
+ dev_err(dev, "%s: Could not allocate bq_params memory\n", __func__);
ret = -ENOMEM;
} else {
ret = device_property_read_u8_array(dev, "realtek,bq-params", rt1316->bq_params, rt1316->bq_params_cnt);
if (ret < 0)
- dev_err(dev, "Could not read list of realtek,bq-params\n");
+ dev_err(dev, "%s: Could not read list of realtek,bq-params\n", __func__);
}
}
@@ -759,7 +759,7 @@ static int __maybe_unused rt1316_dev_resume(struct device *dev)
time = wait_for_completion_timeout(&slave->initialization_complete,
msecs_to_jiffies(RT1316_PROBE_TIMEOUT));
if (!time) {
- dev_err(&slave->dev, "Initialization not complete, timed out\n");
+ dev_err(&slave->dev, "%s: Initialization not complete, timed out\n", __func__);
sdw_show_ping_status(slave->bus, true);
return -ETIMEDOUT;
diff --git a/sound/soc/codecs/rt1318-sdw.c b/sound/soc/codecs/rt1318-sdw.c
index ff364bde4a0849..462c9a4b1be5dd 100644
--- a/sound/soc/codecs/rt1318-sdw.c
+++ b/sound/soc/codecs/rt1318-sdw.c
@@ -606,7 +606,7 @@ static int rt1318_sdw_hw_params(struct snd_pcm_substream *substream,
retval = sdw_stream_add_slave(rt1318->sdw_slave, &stream_config,
&port_config, 1, sdw_stream);
if (retval) {
- dev_err(dai->dev, "Unable to configure port\n");
+ dev_err(dai->dev, "%s: Unable to configure port\n", __func__);
return retval;
}
@@ -631,8 +631,8 @@ static int rt1318_sdw_hw_params(struct snd_pcm_substream *substream,
sampling_rate = RT1318_SDCA_RATE_192000HZ;
break;
default:
- dev_err(component->dev, "Rate %d is not supported\n",
- params_rate(params));
+ dev_err(component->dev, "%s: Rate %d is not supported\n",
+ __func__, params_rate(params));
return -EINVAL;
}
@@ -835,7 +835,7 @@ static int __maybe_unused rt1318_dev_resume(struct device *dev)
time = wait_for_completion_timeout(&slave->initialization_complete,
msecs_to_jiffies(RT1318_PROBE_TIMEOUT));
if (!time) {
- dev_err(&slave->dev, "Initialization not complete, timed out\n");
+ dev_err(&slave->dev, "%s: Initialization not complete, timed out\n", __func__);
return -ETIMEDOUT;
}
diff --git a/sound/soc/codecs/rt5682-sdw.c b/sound/soc/codecs/rt5682-sdw.c
index e67c2e19cb1a72..f9ee42c13dbac3 100644
--- a/sound/soc/codecs/rt5682-sdw.c
+++ b/sound/soc/codecs/rt5682-sdw.c
@@ -132,7 +132,7 @@ static int rt5682_sdw_hw_params(struct snd_pcm_substream *substream,
retval = sdw_stream_add_slave(rt5682->slave, &stream_config,
&port_config, 1, sdw_stream);
if (retval) {
- dev_err(dai->dev, "Unable to configure port\n");
+ dev_err(dai->dev, "%s: Unable to configure port\n", __func__);
return retval;
}
@@ -315,8 +315,8 @@ static int rt5682_sdw_init(struct device *dev, struct regmap *regmap,
&rt5682_sdw_indirect_regmap);
if (IS_ERR(rt5682->regmap)) {
ret = PTR_ERR(rt5682->regmap);
- dev_err(dev, "Failed to allocate register map: %d\n",
- ret);
+ dev_err(dev, "%s: Failed to allocate register map: %d\n",
+ __func__, ret);
return ret;
}
@@ -400,7 +400,7 @@ static int rt5682_io_init(struct device *dev, struct sdw_slave *slave)
}
if (val != DEVICE_ID) {
- dev_err(dev, "Device with ID register %x is not rt5682\n", val);
+ dev_err(dev, "%s: Device with ID register %x is not rt5682\n", __func__, val);
ret = -ENODEV;
goto err_nodev;
}
@@ -648,7 +648,7 @@ static int rt5682_bus_config(struct sdw_slave *slave,
ret = rt5682_clock_config(&slave->dev);
if (ret < 0)
- dev_err(&slave->dev, "Invalid clk config");
+ dev_err(&slave->dev, "%s: Invalid clk config", __func__);
return ret;
}
@@ -763,19 +763,19 @@ static int __maybe_unused rt5682_dev_resume(struct device *dev)
return 0;
if (!slave->unattach_request) {
+ mutex_lock(&rt5682->disable_irq_lock);
if (rt5682->disable_irq == true) {
- mutex_lock(&rt5682->disable_irq_lock);
sdw_write_no_pm(slave, SDW_SCP_INTMASK1, SDW_SCP_INT1_IMPL_DEF);
rt5682->disable_irq = false;
- mutex_unlock(&rt5682->disable_irq_lock);
}
+ mutex_unlock(&rt5682->disable_irq_lock);
goto regmap_sync;
}
time = wait_for_completion_timeout(&slave->initialization_complete,
msecs_to_jiffies(RT5682_PROBE_TIMEOUT));
if (!time) {
- dev_err(&slave->dev, "Initialization not complete, timed out\n");
+ dev_err(&slave->dev, "%s: Initialization not complete, timed out\n", __func__);
sdw_show_ping_status(slave->bus, true);
return -ETIMEDOUT;
diff --git a/sound/soc/codecs/rt700.c b/sound/soc/codecs/rt700.c
index 0ebf344a1b6094..434b926f96c837 100644
--- a/sound/soc/codecs/rt700.c
+++ b/sound/soc/codecs/rt700.c
@@ -37,8 +37,8 @@ static int rt700_index_write(struct regmap *regmap,
ret = regmap_write(regmap, addr, value);
if (ret < 0)
- pr_err("Failed to set private value: %06x <= %04x ret=%d\n",
- addr, value, ret);
+ pr_err("%s: Failed to set private value: %06x <= %04x ret=%d\n",
+ __func__, addr, value, ret);
return ret;
}
@@ -52,8 +52,8 @@ static int rt700_index_read(struct regmap *regmap,
*value = 0;
ret = regmap_read(regmap, addr, value);
if (ret < 0)
- pr_err("Failed to get private value: %06x => %04x ret=%d\n",
- addr, *value, ret);
+ pr_err("%s: Failed to get private value: %06x => %04x ret=%d\n",
+ __func__, addr, *value, ret);
return ret;
}
@@ -930,14 +930,14 @@ static int rt700_pcm_hw_params(struct snd_pcm_substream *substream,
port_config.num += 2;
break;
default:
- dev_err(component->dev, "Invalid DAI id %d\n", dai->id);
+ dev_err(component->dev, "%s: Invalid DAI id %d\n", __func__, dai->id);
return -EINVAL;
}
retval = sdw_stream_add_slave(rt700->slave, &stream_config,
&port_config, 1, sdw_stream);
if (retval) {
- dev_err(dai->dev, "Unable to configure port\n");
+ dev_err(dai->dev, "%s: Unable to configure port\n", __func__);
return retval;
}
@@ -945,8 +945,8 @@ static int rt700_pcm_hw_params(struct snd_pcm_substream *substream,
/* bit 3:0 Number of Channel */
val |= (params_channels(params) - 1);
} else {
- dev_err(component->dev, "Unsupported channels %d\n",
- params_channels(params));
+ dev_err(component->dev, "%s: Unsupported channels %d\n",
+ __func__, params_channels(params));
return -EINVAL;
}
diff --git a/sound/soc/codecs/rt711-sdca-sdw.c b/sound/soc/codecs/rt711-sdca-sdw.c
index 935e597022d324..2636c2eea4bc8b 100644
--- a/sound/soc/codecs/rt711-sdca-sdw.c
+++ b/sound/soc/codecs/rt711-sdca-sdw.c
@@ -438,20 +438,20 @@ static int __maybe_unused rt711_sdca_dev_resume(struct device *dev)
return 0;
if (!slave->unattach_request) {
+ mutex_lock(&rt711->disable_irq_lock);
if (rt711->disable_irq == true) {
- mutex_lock(&rt711->disable_irq_lock);
sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK1, SDW_SCP_SDCA_INTMASK_SDCA_0);
sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK2, SDW_SCP_SDCA_INTMASK_SDCA_8);
rt711->disable_irq = false;
- mutex_unlock(&rt711->disable_irq_lock);
}
+ mutex_unlock(&rt711->disable_irq_lock);
goto regmap_sync;
}
time = wait_for_completion_timeout(&slave->initialization_complete,
msecs_to_jiffies(RT711_PROBE_TIMEOUT));
if (!time) {
- dev_err(&slave->dev, "Initialization not complete, timed out\n");
+ dev_err(&slave->dev, "%s: Initialization not complete, timed out\n", __func__);
sdw_show_ping_status(slave->bus, true);
return -ETIMEDOUT;
diff --git a/sound/soc/codecs/rt711-sdca.c b/sound/soc/codecs/rt711-sdca.c
index 447154cb60104d..1e8dbfc3ecd969 100644
--- a/sound/soc/codecs/rt711-sdca.c
+++ b/sound/soc/codecs/rt711-sdca.c
@@ -36,8 +36,8 @@ static int rt711_sdca_index_write(struct rt711_sdca_priv *rt711,
ret = regmap_write(regmap, addr, value);
if (ret < 0)
dev_err(&rt711->slave->dev,
- "Failed to set private value: %06x <= %04x ret=%d\n",
- addr, value, ret);
+ "%s: Failed to set private value: %06x <= %04x ret=%d\n",
+ __func__, addr, value, ret);
return ret;
}
@@ -52,8 +52,8 @@ static int rt711_sdca_index_read(struct rt711_sdca_priv *rt711,
ret = regmap_read(regmap, addr, value);
if (ret < 0)
dev_err(&rt711->slave->dev,
- "Failed to get private value: %06x => %04x ret=%d\n",
- addr, *value, ret);
+ "%s: Failed to get private value: %06x => %04x ret=%d\n",
+ __func__, addr, *value, ret);
return ret;
}
@@ -1293,13 +1293,13 @@ static int rt711_sdca_pcm_hw_params(struct snd_pcm_substream *substream,
retval = sdw_stream_add_slave(rt711->slave, &stream_config,
&port_config, 1, sdw_stream);
if (retval) {
- dev_err(dai->dev, "Unable to configure port\n");
+ dev_err(dai->dev, "%s: Unable to configure port\n", __func__);
return retval;
}
if (params_channels(params) > 16) {
- dev_err(component->dev, "Unsupported channels %d\n",
- params_channels(params));
+ dev_err(component->dev, "%s: Unsupported channels %d\n",
+ __func__, params_channels(params));
return -EINVAL;
}
@@ -1318,8 +1318,8 @@ static int rt711_sdca_pcm_hw_params(struct snd_pcm_substream *substream,
sampling_rate = RT711_SDCA_RATE_192000HZ;
break;
default:
- dev_err(component->dev, "Rate %d is not supported\n",
- params_rate(params));
+ dev_err(component->dev, "%s: Rate %d is not supported\n",
+ __func__, params_rate(params));
return -EINVAL;
}
diff --git a/sound/soc/codecs/rt711-sdw.c b/sound/soc/codecs/rt711-sdw.c
index 3f5773310ae8cc..0d3b43dd22e63d 100644
--- a/sound/soc/codecs/rt711-sdw.c
+++ b/sound/soc/codecs/rt711-sdw.c
@@ -408,7 +408,7 @@ static int rt711_bus_config(struct sdw_slave *slave,
ret = rt711_clock_config(&slave->dev);
if (ret < 0)
- dev_err(&slave->dev, "Invalid clk config");
+ dev_err(&slave->dev, "%s: Invalid clk config", __func__);
return ret;
}
@@ -536,19 +536,19 @@ static int __maybe_unused rt711_dev_resume(struct device *dev)
return 0;
if (!slave->unattach_request) {
+ mutex_lock(&rt711->disable_irq_lock);
if (rt711->disable_irq == true) {
- mutex_lock(&rt711->disable_irq_lock);
sdw_write_no_pm(slave, SDW_SCP_INTMASK1, SDW_SCP_INT1_IMPL_DEF);
rt711->disable_irq = false;
- mutex_unlock(&rt711->disable_irq_lock);
}
+ mutex_unlock(&rt711->disable_irq_lock);
goto regmap_sync;
}
time = wait_for_completion_timeout(&slave->initialization_complete,
msecs_to_jiffies(RT711_PROBE_TIMEOUT));
if (!time) {
- dev_err(&slave->dev, "Initialization not complete, timed out\n");
+ dev_err(&slave->dev, "%s: Initialization not complete, timed out\n", __func__);
return -ETIMEDOUT;
}
diff --git a/sound/soc/codecs/rt711.c b/sound/soc/codecs/rt711.c
index 66eaed13b0d6a0..5446f9506a1672 100644
--- a/sound/soc/codecs/rt711.c
+++ b/sound/soc/codecs/rt711.c
@@ -37,8 +37,8 @@ static int rt711_index_write(struct regmap *regmap,
ret = regmap_write(regmap, addr, value);
if (ret < 0)
- pr_err("Failed to set private value: %06x <= %04x ret=%d\n",
- addr, value, ret);
+ pr_err("%s: Failed to set private value: %06x <= %04x ret=%d\n",
+ __func__, addr, value, ret);
return ret;
}
@@ -52,8 +52,8 @@ static int rt711_index_read(struct regmap *regmap,
*value = 0;
ret = regmap_read(regmap, addr, value);
if (ret < 0)
- pr_err("Failed to get private value: %06x => %04x ret=%d\n",
- addr, *value, ret);
+ pr_err("%s: Failed to get private value: %06x => %04x ret=%d\n",
+ __func__, addr, *value, ret);
return ret;
}
@@ -428,7 +428,7 @@ static void rt711_jack_init(struct rt711_priv *rt711)
RT711_HP_JD_FINAL_RESULT_CTL_JD12);
break;
default:
- dev_warn(rt711->component->dev, "Wrong JD source\n");
+ dev_warn(rt711->component->dev, "%s: Wrong JD source\n", __func__);
break;
}
@@ -1020,7 +1020,7 @@ static int rt711_pcm_hw_params(struct snd_pcm_substream *substream,
retval = sdw_stream_add_slave(rt711->slave, &stream_config,
&port_config, 1, sdw_stream);
if (retval) {
- dev_err(dai->dev, "Unable to configure port\n");
+ dev_err(dai->dev, "%s: Unable to configure port\n", __func__);
return retval;
}
@@ -1028,8 +1028,8 @@ static int rt711_pcm_hw_params(struct snd_pcm_substream *substream,
/* bit 3:0 Number of Channel */
val |= (params_channels(params) - 1);
} else {
- dev_err(component->dev, "Unsupported channels %d\n",
- params_channels(params));
+ dev_err(component->dev, "%s: Unsupported channels %d\n",
+ __func__, params_channels(params));
return -EINVAL;
}
diff --git a/sound/soc/codecs/rt712-sdca-dmic.c b/sound/soc/codecs/rt712-sdca-dmic.c
index 0926b26619bd45..012b79e72cf6b6 100644
--- a/sound/soc/codecs/rt712-sdca-dmic.c
+++ b/sound/soc/codecs/rt712-sdca-dmic.c
@@ -139,8 +139,8 @@ static int rt712_sdca_dmic_index_write(struct rt712_sdca_dmic_priv *rt712,
ret = regmap_write(regmap, addr, value);
if (ret < 0)
dev_err(&rt712->slave->dev,
- "Failed to set private value: %06x <= %04x ret=%d\n",
- addr, value, ret);
+ "%s: Failed to set private value: %06x <= %04x ret=%d\n",
+ __func__, addr, value, ret);
return ret;
}
@@ -155,8 +155,8 @@ static int rt712_sdca_dmic_index_read(struct rt712_sdca_dmic_priv *rt712,
ret = regmap_read(regmap, addr, value);
if (ret < 0)
dev_err(&rt712->slave->dev,
- "Failed to get private value: %06x => %04x ret=%d\n",
- addr, *value, ret);
+ "%s: Failed to get private value: %06x => %04x ret=%d\n",
+ __func__, addr, *value, ret);
return ret;
}
@@ -317,7 +317,8 @@ static int rt712_sdca_dmic_set_gain_put(struct snd_kcontrol *kcontrol,
for (i = 0; i < p->count; i++) {
err = regmap_write(rt712->mbq_regmap, p->reg_base + i, gain_val[i]);
if (err < 0)
- dev_err(&rt712->slave->dev, "0x%08x can't be set\n", p->reg_base + i);
+ dev_err(&rt712->slave->dev, "%s: 0x%08x can't be set\n",
+ __func__, p->reg_base + i);
}
return changed;
@@ -667,13 +668,13 @@ static int rt712_sdca_dmic_hw_params(struct snd_pcm_substream *substream,
retval = sdw_stream_add_slave(rt712->slave, &stream_config,
&port_config, 1, sdw_stream);
if (retval) {
- dev_err(dai->dev, "Unable to configure port\n");
+ dev_err(dai->dev, "%s: Unable to configure port\n", __func__);
return retval;
}
if (params_channels(params) > 4) {
- dev_err(component->dev, "Unsupported channels %d\n",
- params_channels(params));
+ dev_err(component->dev, "%s: Unsupported channels %d\n",
+ __func__, params_channels(params));
return -EINVAL;
}
@@ -698,8 +699,8 @@ static int rt712_sdca_dmic_hw_params(struct snd_pcm_substream *substream,
sampling_rate = RT712_SDCA_RATE_192000HZ;
break;
default:
- dev_err(component->dev, "Rate %d is not supported\n",
- params_rate(params));
+ dev_err(component->dev, "%s: Rate %d is not supported\n",
+ __func__, params_rate(params));
return -EINVAL;
}
@@ -923,7 +924,8 @@ static int __maybe_unused rt712_sdca_dmic_dev_resume(struct device *dev)
time = wait_for_completion_timeout(&slave->initialization_complete,
msecs_to_jiffies(RT712_PROBE_TIMEOUT));
if (!time) {
- dev_err(&slave->dev, "Initialization not complete, timed out\n");
+ dev_err(&slave->dev, "%s: Initialization not complete, timed out\n",
+ __func__);
sdw_show_ping_status(slave->bus, true);
return -ETIMEDOUT;
diff --git a/sound/soc/codecs/rt712-sdca-sdw.c b/sound/soc/codecs/rt712-sdca-sdw.c
index 01ac555cd79b84..4e9ab3ef135b34 100644
--- a/sound/soc/codecs/rt712-sdca-sdw.c
+++ b/sound/soc/codecs/rt712-sdca-sdw.c
@@ -438,20 +438,21 @@ static int __maybe_unused rt712_sdca_dev_resume(struct device *dev)
return 0;
if (!slave->unattach_request) {
+ mutex_lock(&rt712->disable_irq_lock);
if (rt712->disable_irq == true) {
- mutex_lock(&rt712->disable_irq_lock);
+
sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK1, SDW_SCP_SDCA_INTMASK_SDCA_0);
sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK2, SDW_SCP_SDCA_INTMASK_SDCA_8);
rt712->disable_irq = false;
- mutex_unlock(&rt712->disable_irq_lock);
}
+ mutex_unlock(&rt712->disable_irq_lock);
goto regmap_sync;
}
time = wait_for_completion_timeout(&slave->initialization_complete,
msecs_to_jiffies(RT712_PROBE_TIMEOUT));
if (!time) {
- dev_err(&slave->dev, "Initialization not complete, timed out\n");
+ dev_err(&slave->dev, "%s: Initialization not complete, timed out\n", __func__);
sdw_show_ping_status(slave->bus, true);
return -ETIMEDOUT;
diff --git a/sound/soc/codecs/rt712-sdca.c b/sound/soc/codecs/rt712-sdca.c
index 6954fbe7ec5f3b..b503de9fda80e7 100644
--- a/sound/soc/codecs/rt712-sdca.c
+++ b/sound/soc/codecs/rt712-sdca.c
@@ -34,8 +34,8 @@ static int rt712_sdca_index_write(struct rt712_sdca_priv *rt712,
ret = regmap_write(regmap, addr, value);
if (ret < 0)
dev_err(&rt712->slave->dev,
- "Failed to set private value: %06x <= %04x ret=%d\n",
- addr, value, ret);
+ "%s: Failed to set private value: %06x <= %04x ret=%d\n",
+ __func__, addr, value, ret);
return ret;
}
@@ -50,8 +50,8 @@ static int rt712_sdca_index_read(struct rt712_sdca_priv *rt712,
ret = regmap_read(regmap, addr, value);
if (ret < 0)
dev_err(&rt712->slave->dev,
- "Failed to get private value: %06x => %04x ret=%d\n",
- addr, *value, ret);
+ "%s: Failed to get private value: %06x => %04x ret=%d\n",
+ __func__, addr, *value, ret);
return ret;
}
@@ -1060,13 +1060,13 @@ static int rt712_sdca_pcm_hw_params(struct snd_pcm_substream *substream,
retval = sdw_stream_add_slave(rt712->slave, &stream_config,
&port_config, 1, sdw_stream);
if (retval) {
- dev_err(dai->dev, "Unable to configure port\n");
+ dev_err(dai->dev, "%s: Unable to configure port\n", __func__);
return retval;
}
if (params_channels(params) > 16) {
- dev_err(component->dev, "Unsupported channels %d\n",
- params_channels(params));
+ dev_err(component->dev, "%s: Unsupported channels %d\n",
+ __func__, params_channels(params));
return -EINVAL;
}
@@ -1085,8 +1085,8 @@ static int rt712_sdca_pcm_hw_params(struct snd_pcm_substream *substream,
sampling_rate = RT712_SDCA_RATE_192000HZ;
break;
default:
- dev_err(component->dev, "Rate %d is not supported\n",
- params_rate(params));
+ dev_err(component->dev, "%s: Rate %d is not supported\n",
+ __func__, params_rate(params));
return -EINVAL;
}
@@ -1106,7 +1106,7 @@ static int rt712_sdca_pcm_hw_params(struct snd_pcm_substream *substream,
sampling_rate);
break;
default:
- dev_err(component->dev, "Wrong DAI id\n");
+ dev_err(component->dev, "%s: Wrong DAI id\n", __func__);
return -EINVAL;
}
diff --git a/sound/soc/codecs/rt715-sdca-sdw.c b/sound/soc/codecs/rt715-sdca-sdw.c
index ab54a67a27ebbf..ee450126106f96 100644
--- a/sound/soc/codecs/rt715-sdca-sdw.c
+++ b/sound/soc/codecs/rt715-sdca-sdw.c
@@ -237,7 +237,7 @@ static int __maybe_unused rt715_dev_resume(struct device *dev)
time = wait_for_completion_timeout(&slave->enumeration_complete,
msecs_to_jiffies(RT715_PROBE_TIMEOUT));
if (!time) {
- dev_err(&slave->dev, "Enumeration not complete, timed out\n");
+ dev_err(&slave->dev, "%s: Enumeration not complete, timed out\n", __func__);
sdw_show_ping_status(slave->bus, true);
return -ETIMEDOUT;
diff --git a/sound/soc/codecs/rt715-sdca.c b/sound/soc/codecs/rt715-sdca.c
index 4533eedd7e189f..3fb7b9adb61de6 100644
--- a/sound/soc/codecs/rt715-sdca.c
+++ b/sound/soc/codecs/rt715-sdca.c
@@ -41,8 +41,8 @@ static int rt715_sdca_index_write(struct rt715_sdca_priv *rt715,
ret = regmap_write(regmap, addr, value);
if (ret < 0)
dev_err(&rt715->slave->dev,
- "Failed to set private value: %08x <= %04x %d\n",
- addr, value, ret);
+ "%s: Failed to set private value: %08x <= %04x %d\n",
+ __func__, addr, value, ret);
return ret;
}
@@ -59,8 +59,8 @@ static int rt715_sdca_index_read(struct rt715_sdca_priv *rt715,
ret = regmap_read(regmap, addr, value);
if (ret < 0)
dev_err(&rt715->slave->dev,
- "Failed to get private value: %06x => %04x ret=%d\n",
- addr, *value, ret);
+ "%s: Failed to get private value: %06x => %04x ret=%d\n",
+ __func__, addr, *value, ret);
return ret;
}
@@ -152,8 +152,8 @@ static int rt715_sdca_set_amp_gain_put(struct snd_kcontrol *kcontrol,
mc->shift);
ret = regmap_write(rt715->mbq_regmap, mc->reg + i, gain_val);
if (ret != 0) {
- dev_err(component->dev, "Failed to write 0x%x=0x%x\n",
- mc->reg + i, gain_val);
+ dev_err(component->dev, "%s: Failed to write 0x%x=0x%x\n",
+ __func__, mc->reg + i, gain_val);
return ret;
}
}
@@ -188,8 +188,8 @@ static int rt715_sdca_set_amp_gain_4ch_put(struct snd_kcontrol *kcontrol,
ret = regmap_write(rt715->mbq_regmap, reg_base + i,
gain_val);
if (ret != 0) {
- dev_err(component->dev, "Failed to write 0x%x=0x%x\n",
- reg_base + i, gain_val);
+ dev_err(component->dev, "%s: Failed to write 0x%x=0x%x\n",
+ __func__, reg_base + i, gain_val);
return ret;
}
}
@@ -224,8 +224,8 @@ static int rt715_sdca_set_amp_gain_8ch_put(struct snd_kcontrol *kcontrol,
reg = i < 7 ? reg_base + i : (reg_base - 1) | BIT(15);
ret = regmap_write(rt715->mbq_regmap, reg, gain_val);
if (ret != 0) {
- dev_err(component->dev, "Failed to write 0x%x=0x%x\n",
- reg, gain_val);
+ dev_err(component->dev, "%s: Failed to write 0x%x=0x%x\n",
+ __func__, reg, gain_val);
return ret;
}
}
@@ -246,8 +246,8 @@ static int rt715_sdca_set_amp_gain_get(struct snd_kcontrol *kcontrol,
for (i = 0; i < 2; i++) {
ret = regmap_read(rt715->mbq_regmap, mc->reg + i, &val);
if (ret < 0) {
- dev_err(component->dev, "Failed to read 0x%x, ret=%d\n",
- mc->reg + i, ret);
+ dev_err(component->dev, "%s: Failed to read 0x%x, ret=%d\n",
+ __func__, mc->reg + i, ret);
return ret;
}
ucontrol->value.integer.value[i] = rt715_sdca_get_gain(val, mc->shift);
@@ -271,8 +271,8 @@ static int rt715_sdca_set_amp_gain_4ch_get(struct snd_kcontrol *kcontrol,
for (i = 0; i < 4; i++) {
ret = regmap_read(rt715->mbq_regmap, reg_base + i, &val);
if (ret < 0) {
- dev_err(component->dev, "Failed to read 0x%x, ret=%d\n",
- reg_base + i, ret);
+ dev_err(component->dev, "%s: Failed to read 0x%x, ret=%d\n",
+ __func__, reg_base + i, ret);
return ret;
}
ucontrol->value.integer.value[i] = rt715_sdca_get_gain(val, gain_sft);
@@ -297,8 +297,8 @@ static int rt715_sdca_set_amp_gain_8ch_get(struct snd_kcontrol *kcontrol,
for (i = 0; i < 8; i += 2) {
ret = regmap_read(rt715->mbq_regmap, reg_base + i, &val_l);
if (ret < 0) {
- dev_err(component->dev, "Failed to read 0x%x, ret=%d\n",
- reg_base + i, ret);
+ dev_err(component->dev, "%s: Failed to read 0x%x, ret=%d\n",
+ __func__, reg_base + i, ret);
return ret;
}
ucontrol->value.integer.value[i] = (val_l >> gain_sft) / 10;
@@ -306,8 +306,8 @@ static int rt715_sdca_set_amp_gain_8ch_get(struct snd_kcontrol *kcontrol,
reg = (i == 6) ? (reg_base - 1) | BIT(15) : reg_base + 1 + i;
ret = regmap_read(rt715->mbq_regmap, reg, &val_r);
if (ret < 0) {
- dev_err(component->dev, "Failed to read 0x%x, ret=%d\n",
- reg, ret);
+ dev_err(component->dev, "%s: Failed to read 0x%x, ret=%d\n",
+ __func__, reg, ret);
return ret;
}
ucontrol->value.integer.value[i + 1] = (val_r >> gain_sft) / 10;
@@ -834,15 +834,15 @@ static int rt715_sdca_pcm_hw_params(struct snd_pcm_substream *substream,
0xaf00);
break;
default:
- dev_err(component->dev, "Invalid DAI id %d\n", dai->id);
+ dev_err(component->dev, "%s: Invalid DAI id %d\n", __func__, dai->id);
return -EINVAL;
}
retval = sdw_stream_add_slave(rt715->slave, &stream_config,
&port_config, 1, sdw_stream);
if (retval) {
- dev_err(component->dev, "Unable to configure port, retval:%d\n",
- retval);
+ dev_err(component->dev, "%s: Unable to configure port, retval:%d\n",
+ __func__, retval);
return retval;
}
@@ -893,8 +893,8 @@ static int rt715_sdca_pcm_hw_params(struct snd_pcm_substream *substream,
val = 0xf;
break;
default:
- dev_err(component->dev, "Unsupported sample rate %d\n",
- params_rate(params));
+ dev_err(component->dev, "%s: Unsupported sample rate %d\n",
+ __func__, params_rate(params));
return -EINVAL;
}
diff --git a/sound/soc/codecs/rt715-sdw.c b/sound/soc/codecs/rt715-sdw.c
index 21f37babd148a4..7e13868ff99f03 100644
--- a/sound/soc/codecs/rt715-sdw.c
+++ b/sound/soc/codecs/rt715-sdw.c
@@ -482,7 +482,7 @@ static int rt715_bus_config(struct sdw_slave *slave,
ret = rt715_clock_config(&slave->dev);
if (ret < 0)
- dev_err(&slave->dev, "Invalid clk config");
+ dev_err(&slave->dev, "%s: Invalid clk config", __func__);
return 0;
}
@@ -554,7 +554,7 @@ static int __maybe_unused rt715_dev_resume(struct device *dev)
time = wait_for_completion_timeout(&slave->initialization_complete,
msecs_to_jiffies(RT715_PROBE_TIMEOUT));
if (!time) {
- dev_err(&slave->dev, "Initialization not complete, timed out\n");
+ dev_err(&slave->dev, "%s: Initialization not complete, timed out\n", __func__);
sdw_show_ping_status(slave->bus, true);
return -ETIMEDOUT;
diff --git a/sound/soc/codecs/rt715.c b/sound/soc/codecs/rt715.c
index 9f732a5abd53f3..299c9b12377c6a 100644
--- a/sound/soc/codecs/rt715.c
+++ b/sound/soc/codecs/rt715.c
@@ -40,8 +40,8 @@ static int rt715_index_write(struct regmap *regmap, unsigned int reg,
ret = regmap_write(regmap, addr, value);
if (ret < 0) {
- pr_err("Failed to set private value: %08x <= %04x %d\n",
- addr, value, ret);
+ pr_err("%s: Failed to set private value: %08x <= %04x %d\n",
+ __func__, addr, value, ret);
}
return ret;
@@ -55,8 +55,8 @@ static int rt715_index_write_nid(struct regmap *regmap,
ret = regmap_write(regmap, addr, value);
if (ret < 0)
- pr_err("Failed to set private value: %06x <= %04x ret=%d\n",
- addr, value, ret);
+ pr_err("%s: Failed to set private value: %06x <= %04x ret=%d\n",
+ __func__, addr, value, ret);
return ret;
}
@@ -70,8 +70,8 @@ static int rt715_index_read_nid(struct regmap *regmap,
*value = 0;
ret = regmap_read(regmap, addr, value);
if (ret < 0)
- pr_err("Failed to get private value: %06x => %04x ret=%d\n",
- addr, *value, ret);
+ pr_err("%s: Failed to get private value: %06x => %04x ret=%d\n",
+ __func__, addr, *value, ret);
return ret;
}
@@ -862,14 +862,14 @@ static int rt715_pcm_hw_params(struct snd_pcm_substream *substream,
rt715_index_write(rt715->regmap, RT715_SDW_INPUT_SEL, 0xa000);
break;
default:
- dev_err(component->dev, "Invalid DAI id %d\n", dai->id);
+ dev_err(component->dev, "%s: Invalid DAI id %d\n", __func__, dai->id);
return -EINVAL;
}
retval = sdw_stream_add_slave(rt715->slave, &stream_config,
&port_config, 1, sdw_stream);
if (retval) {
- dev_err(dai->dev, "Unable to configure port\n");
+ dev_err(dai->dev, "%s: Unable to configure port\n", __func__);
return retval;
}
@@ -883,8 +883,8 @@ static int rt715_pcm_hw_params(struct snd_pcm_substream *substream,
val |= 0x0 << 8;
break;
default:
- dev_err(component->dev, "Unsupported sample rate %d\n",
- params_rate(params));
+ dev_err(component->dev, "%s: Unsupported sample rate %d\n",
+ __func__, params_rate(params));
return -EINVAL;
}
@@ -892,8 +892,8 @@ static int rt715_pcm_hw_params(struct snd_pcm_substream *substream,
/* bit 3:0 Number of Channel */
val |= (params_channels(params) - 1);
} else {
- dev_err(component->dev, "Unsupported channels %d\n",
- params_channels(params));
+ dev_err(component->dev, "%s: Unsupported channels %d\n",
+ __func__, params_channels(params));
return -EINVAL;
}
diff --git a/sound/soc/codecs/rt722-sdca-sdw.c b/sound/soc/codecs/rt722-sdca-sdw.c
index eb76f4c675b67f..65d584c1886e81 100644
--- a/sound/soc/codecs/rt722-sdca-sdw.c
+++ b/sound/soc/codecs/rt722-sdca-sdw.c
@@ -467,13 +467,13 @@ static int __maybe_unused rt722_sdca_dev_resume(struct device *dev)
return 0;
if (!slave->unattach_request) {
+ mutex_lock(&rt722->disable_irq_lock);
if (rt722->disable_irq == true) {
- mutex_lock(&rt722->disable_irq_lock);
sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK1, SDW_SCP_SDCA_INTMASK_SDCA_6);
sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK2, SDW_SCP_SDCA_INTMASK_SDCA_8);
rt722->disable_irq = false;
- mutex_unlock(&rt722->disable_irq_lock);
}
+ mutex_unlock(&rt722->disable_irq_lock);
goto regmap_sync;
}
diff --git a/sound/soc/codecs/rt722-sdca.c b/sound/soc/codecs/rt722-sdca.c
index 0e1c65a20392ad..e0ea3a23f7cc68 100644
--- a/sound/soc/codecs/rt722-sdca.c
+++ b/sound/soc/codecs/rt722-sdca.c
@@ -35,8 +35,8 @@ int rt722_sdca_index_write(struct rt722_sdca_priv *rt722,
ret = regmap_write(regmap, addr, value);
if (ret < 0)
dev_err(&rt722->slave->dev,
- "Failed to set private value: %06x <= %04x ret=%d\n",
- addr, value, ret);
+ "%s: Failed to set private value: %06x <= %04x ret=%d\n",
+ __func__, addr, value, ret);
return ret;
}
@@ -51,8 +51,8 @@ int rt722_sdca_index_read(struct rt722_sdca_priv *rt722,
ret = regmap_read(regmap, addr, value);
if (ret < 0)
dev_err(&rt722->slave->dev,
- "Failed to get private value: %06x => %04x ret=%d\n",
- addr, *value, ret);
+ "%s: Failed to get private value: %06x => %04x ret=%d\n",
+ __func__, addr, *value, ret);
return ret;
}
@@ -663,7 +663,8 @@ static int rt722_sdca_dmic_set_gain_put(struct snd_kcontrol *kcontrol,
for (i = 0; i < p->count; i++) {
err = regmap_write(rt722->mbq_regmap, p->reg_base + i, gain_val[i]);
if (err < 0)
- dev_err(&rt722->slave->dev, "%#08x can't be set\n", p->reg_base + i);
+ dev_err(&rt722->slave->dev, "%s: %#08x can't be set\n",
+ __func__, p->reg_base + i);
}
return changed;
@@ -1211,13 +1212,13 @@ static int rt722_sdca_pcm_hw_params(struct snd_pcm_substream *substream,
retval = sdw_stream_add_slave(rt722->slave, &stream_config,
&port_config, 1, sdw_stream);
if (retval) {
- dev_err(dai->dev, "Unable to configure port\n");
+ dev_err(dai->dev, "%s: Unable to configure port\n", __func__);
return retval;
}
if (params_channels(params) > 16) {
- dev_err(component->dev, "Unsupported channels %d\n",
- params_channels(params));
+ dev_err(component->dev, "%s: Unsupported channels %d\n",
+ __func__, params_channels(params));
return -EINVAL;
}
@@ -1236,8 +1237,8 @@ static int rt722_sdca_pcm_hw_params(struct snd_pcm_substream *substream,
sampling_rate = RT722_SDCA_RATE_192000HZ;
break;
default:
- dev_err(component->dev, "Rate %d is not supported\n",
- params_rate(params));
+ dev_err(component->dev, "%s: Rate %d is not supported\n",
+ __func__, params_rate(params));
return -EINVAL;
}
diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c
index e451c009f2d999..7d5c096e06cd32 100644
--- a/sound/soc/codecs/wm_adsp.c
+++ b/sound/soc/codecs/wm_adsp.c
@@ -683,11 +683,12 @@ static void wm_adsp_control_remove(struct cs_dsp_coeff_ctl *cs_ctl)
int wm_adsp_write_ctl(struct wm_adsp *dsp, const char *name, int type,
unsigned int alg, void *buf, size_t len)
{
- struct cs_dsp_coeff_ctl *cs_ctl = cs_dsp_get_ctl(&dsp->cs_dsp, name, type, alg);
+ struct cs_dsp_coeff_ctl *cs_ctl;
struct wm_coeff_ctl *ctl;
int ret;
mutex_lock(&dsp->cs_dsp.pwr_lock);
+ cs_ctl = cs_dsp_get_ctl(&dsp->cs_dsp, name, type, alg);
ret = cs_dsp_coeff_write_ctrl(cs_ctl, 0, buf, len);
mutex_unlock(&dsp->cs_dsp.pwr_lock);
diff --git a/sound/soc/intel/avs/boards/da7219.c b/sound/soc/intel/avs/boards/da7219.c
index c018f84fe02529..fc072dc58968cb 100644
--- a/sound/soc/intel/avs/boards/da7219.c
+++ b/sound/soc/intel/avs/boards/da7219.c
@@ -296,5 +296,6 @@ static struct platform_driver avs_da7219_driver = {
module_platform_driver(avs_da7219_driver);
+MODULE_DESCRIPTION("Intel da7219 machine driver");
MODULE_AUTHOR("Cezary Rojewski <cezary.rojewski@intel.com>");
MODULE_LICENSE("GPL");
diff --git a/sound/soc/intel/avs/boards/dmic.c b/sound/soc/intel/avs/boards/dmic.c
index ba2bc7f689eb60..d9e5e85f523358 100644
--- a/sound/soc/intel/avs/boards/dmic.c
+++ b/sound/soc/intel/avs/boards/dmic.c
@@ -96,4 +96,5 @@ static struct platform_driver avs_dmic_driver = {
module_platform_driver(avs_dmic_driver);
+MODULE_DESCRIPTION("Intel DMIC machine driver");
MODULE_LICENSE("GPL");
diff --git a/sound/soc/intel/avs/boards/es8336.c b/sound/soc/intel/avs/boards/es8336.c
index 1090082e7d5bfc..5c90a600757734 100644
--- a/sound/soc/intel/avs/boards/es8336.c
+++ b/sound/soc/intel/avs/boards/es8336.c
@@ -326,4 +326,5 @@ static struct platform_driver avs_es8336_driver = {
module_platform_driver(avs_es8336_driver);
+MODULE_DESCRIPTION("Intel es8336 machine driver");
MODULE_LICENSE("GPL");
diff --git a/sound/soc/intel/avs/boards/i2s_test.c b/sound/soc/intel/avs/boards/i2s_test.c
index 28f254eb0d03fc..027373d6a16d60 100644
--- a/sound/soc/intel/avs/boards/i2s_test.c
+++ b/sound/soc/intel/avs/boards/i2s_test.c
@@ -204,4 +204,5 @@ static struct platform_driver avs_i2s_test_driver = {
module_platform_driver(avs_i2s_test_driver);
+MODULE_DESCRIPTION("Intel i2s test machine driver");
MODULE_LICENSE("GPL");
diff --git a/sound/soc/intel/avs/boards/max98357a.c b/sound/soc/intel/avs/boards/max98357a.c
index a83b95f25129f9..1ff85e4d8e160b 100644
--- a/sound/soc/intel/avs/boards/max98357a.c
+++ b/sound/soc/intel/avs/boards/max98357a.c
@@ -154,4 +154,5 @@ static struct platform_driver avs_max98357a_driver = {
module_platform_driver(avs_max98357a_driver)
+MODULE_DESCRIPTION("Intel max98357a machine driver");
MODULE_LICENSE("GPL");
diff --git a/sound/soc/intel/avs/boards/max98373.c b/sound/soc/intel/avs/boards/max98373.c
index 3b980a025e6f69..8d31586b73eaec 100644
--- a/sound/soc/intel/avs/boards/max98373.c
+++ b/sound/soc/intel/avs/boards/max98373.c
@@ -211,4 +211,5 @@ static struct platform_driver avs_max98373_driver = {
module_platform_driver(avs_max98373_driver)
+MODULE_DESCRIPTION("Intel max98373 machine driver");
MODULE_LICENSE("GPL");
diff --git a/sound/soc/intel/avs/boards/max98927.c b/sound/soc/intel/avs/boards/max98927.c
index 86dd2b228df3a5..572ec58073d06b 100644
--- a/sound/soc/intel/avs/boards/max98927.c
+++ b/sound/soc/intel/avs/boards/max98927.c
@@ -208,4 +208,5 @@ static struct platform_driver avs_max98927_driver = {
module_platform_driver(avs_max98927_driver)
+MODULE_DESCRIPTION("Intel max98927 machine driver");
MODULE_LICENSE("GPL");
diff --git a/sound/soc/intel/avs/boards/nau8825.c b/sound/soc/intel/avs/boards/nau8825.c
index 1c1e2083f474df..55db75efae4142 100644
--- a/sound/soc/intel/avs/boards/nau8825.c
+++ b/sound/soc/intel/avs/boards/nau8825.c
@@ -313,4 +313,5 @@ static struct platform_driver avs_nau8825_driver = {
module_platform_driver(avs_nau8825_driver)
+MODULE_DESCRIPTION("Intel nau8825 machine driver");
MODULE_LICENSE("GPL");
diff --git a/sound/soc/intel/avs/boards/probe.c b/sound/soc/intel/avs/boards/probe.c
index a9469b5ecb402f..8be6887bbc6e81 100644
--- a/sound/soc/intel/avs/boards/probe.c
+++ b/sound/soc/intel/avs/boards/probe.c
@@ -69,4 +69,5 @@ static struct platform_driver avs_probe_mb_driver = {
module_platform_driver(avs_probe_mb_driver);
+MODULE_DESCRIPTION("Intel probe machine driver");
MODULE_LICENSE("GPL");
diff --git a/sound/soc/intel/avs/boards/rt274.c b/sound/soc/intel/avs/boards/rt274.c
index bfcb8845fd15d0..1cf52421608753 100644
--- a/sound/soc/intel/avs/boards/rt274.c
+++ b/sound/soc/intel/avs/boards/rt274.c
@@ -276,4 +276,5 @@ static struct platform_driver avs_rt274_driver = {
module_platform_driver(avs_rt274_driver);
+MODULE_DESCRIPTION("Intel rt274 machine driver");
MODULE_LICENSE("GPL");
diff --git a/sound/soc/intel/avs/boards/rt286.c b/sound/soc/intel/avs/boards/rt286.c
index 28d7d86b1cc99d..4740bba1057032 100644
--- a/sound/soc/intel/avs/boards/rt286.c
+++ b/sound/soc/intel/avs/boards/rt286.c
@@ -247,4 +247,5 @@ static struct platform_driver avs_rt286_driver = {
module_platform_driver(avs_rt286_driver);
+MODULE_DESCRIPTION("Intel rt286 machine driver");
MODULE_LICENSE("GPL");
diff --git a/sound/soc/intel/avs/boards/rt298.c b/sound/soc/intel/avs/boards/rt298.c
index 80f490b9e11842..6e409e29f69746 100644
--- a/sound/soc/intel/avs/boards/rt298.c
+++ b/sound/soc/intel/avs/boards/rt298.c
@@ -266,4 +266,5 @@ static struct platform_driver avs_rt298_driver = {
module_platform_driver(avs_rt298_driver);
+MODULE_DESCRIPTION("Intel rt298 machine driver");
MODULE_LICENSE("GPL");
diff --git a/sound/soc/intel/avs/boards/rt5514.c b/sound/soc/intel/avs/boards/rt5514.c
index 60105f453ae235..097ae5f73241ef 100644
--- a/sound/soc/intel/avs/boards/rt5514.c
+++ b/sound/soc/intel/avs/boards/rt5514.c
@@ -192,4 +192,5 @@ static struct platform_driver avs_rt5514_driver = {
module_platform_driver(avs_rt5514_driver);
+MODULE_DESCRIPTION("Intel rt5514 machine driver");
MODULE_LICENSE("GPL");
diff --git a/sound/soc/intel/avs/boards/rt5663.c b/sound/soc/intel/avs/boards/rt5663.c
index b4762c2a7bf2d1..1880c315cc4d1f 100644
--- a/sound/soc/intel/avs/boards/rt5663.c
+++ b/sound/soc/intel/avs/boards/rt5663.c
@@ -265,4 +265,5 @@ static struct platform_driver avs_rt5663_driver = {
module_platform_driver(avs_rt5663_driver);
+MODULE_DESCRIPTION("Intel rt5663 machine driver");
MODULE_LICENSE("GPL");
diff --git a/sound/soc/intel/avs/boards/rt5682.c b/sound/soc/intel/avs/boards/rt5682.c
index 243f979fda98a4..594a971ded9eb2 100644
--- a/sound/soc/intel/avs/boards/rt5682.c
+++ b/sound/soc/intel/avs/boards/rt5682.c
@@ -341,5 +341,6 @@ static struct platform_driver avs_rt5682_driver = {
module_platform_driver(avs_rt5682_driver)
+MODULE_DESCRIPTION("Intel rt5682 machine driver");
MODULE_AUTHOR("Cezary Rojewski <cezary.rojewski@intel.com>");
MODULE_LICENSE("GPL");
diff --git a/sound/soc/intel/avs/boards/ssm4567.c b/sound/soc/intel/avs/boards/ssm4567.c
index 4a0e136835ff5d..d6f7f046c24e5d 100644
--- a/sound/soc/intel/avs/boards/ssm4567.c
+++ b/sound/soc/intel/avs/boards/ssm4567.c
@@ -200,4 +200,5 @@ static struct platform_driver avs_ssm4567_driver = {
module_platform_driver(avs_ssm4567_driver)
+MODULE_DESCRIPTION("Intel ssm4567 machine driver");
MODULE_LICENSE("GPL");
diff --git a/sound/soc/soc-ops.c b/sound/soc/soc-ops.c
index 2d25748ca70662..b27e89ff6a1673 100644
--- a/sound/soc/soc-ops.c
+++ b/sound/soc/soc-ops.c
@@ -263,7 +263,7 @@ int snd_soc_get_volsw(struct snd_kcontrol *kcontrol,
int max = mc->max;
int min = mc->min;
int sign_bit = mc->sign_bit;
- unsigned int mask = (1 << fls(max)) - 1;
+ unsigned int mask = (1ULL << fls(max)) - 1;
unsigned int invert = mc->invert;
int val;
int ret;
diff --git a/sound/soc/sof/amd/acp.c b/sound/soc/sof/amd/acp.c
index be7dc1e02284ab..c12c7f82052947 100644
--- a/sound/soc/sof/amd/acp.c
+++ b/sound/soc/sof/amd/acp.c
@@ -704,6 +704,10 @@ int amd_sof_acp_probe(struct snd_sof_dev *sdev)
goto unregister_dev;
}
+ ret = acp_init(sdev);
+ if (ret < 0)
+ goto free_smn_dev;
+
sdev->ipc_irq = pci->irq;
ret = request_threaded_irq(sdev->ipc_irq, acp_irq_handler, acp_irq_thread,
IRQF_SHARED, "AudioDSP", sdev);
@@ -713,10 +717,6 @@ int amd_sof_acp_probe(struct snd_sof_dev *sdev)
goto free_smn_dev;
}
- ret = acp_init(sdev);
- if (ret < 0)
- goto free_ipc_irq;
-
/* scan SoundWire capabilities exposed by DSDT */
ret = acp_sof_scan_sdw_devices(sdev, chip->sdw_acpi_dev_addr);
if (ret < 0) {
diff --git a/sound/soc/sof/core.c b/sound/soc/sof/core.c
index 9b00ede2a486a2..cc84d4c81be9d3 100644
--- a/sound/soc/sof/core.c
+++ b/sound/soc/sof/core.c
@@ -339,8 +339,7 @@ static int sof_init_environment(struct snd_sof_dev *sdev)
ret = snd_sof_probe(sdev);
if (ret < 0) {
dev_err(sdev->dev, "failed to probe DSP %d\n", ret);
- sof_ops_free(sdev);
- return ret;
+ goto err_sof_probe;
}
/* check machine info */
@@ -358,15 +357,18 @@ static int sof_init_environment(struct snd_sof_dev *sdev)
ret = validate_sof_ops(sdev);
if (ret < 0) {
snd_sof_remove(sdev);
+ snd_sof_remove_late(sdev);
return ret;
}
}
+ return 0;
+
err_machine_check:
- if (ret) {
- snd_sof_remove(sdev);
- sof_ops_free(sdev);
- }
+ snd_sof_remove(sdev);
+err_sof_probe:
+ snd_sof_remove_late(sdev);
+ sof_ops_free(sdev);
return ret;
}
diff --git a/sound/soc/sof/intel/hda-common-ops.c b/sound/soc/sof/intel/hda-common-ops.c
index 2b385cddc385c5..d71bb66b999116 100644
--- a/sound/soc/sof/intel/hda-common-ops.c
+++ b/sound/soc/sof/intel/hda-common-ops.c
@@ -57,6 +57,9 @@ struct snd_sof_dsp_ops sof_hda_common_ops = {
.pcm_pointer = hda_dsp_pcm_pointer,
.pcm_ack = hda_dsp_pcm_ack,
+ .get_dai_frame_counter = hda_dsp_get_stream_llp,
+ .get_host_byte_counter = hda_dsp_get_stream_ldp,
+
/* firmware loading */
.load_firmware = snd_sof_load_firmware_raw,
diff --git a/sound/soc/sof/intel/hda-dai-ops.c b/sound/soc/sof/intel/hda-dai-ops.c
index c50ca9e72d3738..b073720b4cf432 100644
--- a/sound/soc/sof/intel/hda-dai-ops.c
+++ b/sound/soc/sof/intel/hda-dai-ops.c
@@ -7,6 +7,7 @@
#include <sound/pcm_params.h>
#include <sound/hdaudio_ext.h>
+#include <sound/hda_register.h>
#include <sound/hda-mlink.h>
#include <sound/sof/ipc4/header.h>
#include <uapi/sound/sof/header.h>
@@ -362,6 +363,16 @@ static int hda_trigger(struct snd_sof_dev *sdev, struct snd_soc_dai *cpu_dai,
case SNDRV_PCM_TRIGGER_STOP:
case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
snd_hdac_ext_stream_clear(hext_stream);
+
+ /*
+ * Save the LLP registers in case the stream is
+ * restarting due PAUSE_RELEASE, or START without a pcm
+ * close/open since in this case the LLP register is not reset
+ * to 0 and the delay calculation will return with invalid
+ * results.
+ */
+ hext_stream->pplcllpl = readl(hext_stream->pplc_addr + AZX_REG_PPLCLLPL);
+ hext_stream->pplcllpu = readl(hext_stream->pplc_addr + AZX_REG_PPLCLLPU);
break;
default:
dev_err(sdev->dev, "unknown trigger command %d\n", cmd);
diff --git a/sound/soc/sof/intel/hda-dsp.c b/sound/soc/sof/intel/hda-dsp.c
index 31ffa1a8f2ac04..ef5c915db8ffb4 100644
--- a/sound/soc/sof/intel/hda-dsp.c
+++ b/sound/soc/sof/intel/hda-dsp.c
@@ -681,17 +681,27 @@ static int hda_suspend(struct snd_sof_dev *sdev, bool runtime_suspend)
struct sof_intel_hda_dev *hda = sdev->pdata->hw_pdata;
const struct sof_intel_dsp_desc *chip = hda->desc;
struct hdac_bus *bus = sof_to_bus(sdev);
+ bool imr_lost = false;
int ret, j;
/*
- * The memory used for IMR boot loses its content in deeper than S3 state
- * We must not try IMR boot on next power up (as it will fail).
- *
+ * The memory used for IMR boot loses its content in deeper than S3
+ * state on CAVS platforms.
+ * On ACE platforms due to the system architecture the IMR content is
+ * lost at S3 state already, they are tailored for s2idle use.
+ * We must not try IMR boot on next power up in these cases as it will
+ * fail.
+ */
+ if (sdev->system_suspend_target > SOF_SUSPEND_S3 ||
+ (chip->hw_ip_version >= SOF_INTEL_ACE_1_0 &&
+ sdev->system_suspend_target == SOF_SUSPEND_S3))
+ imr_lost = true;
+
+ /*
* In case of firmware crash or boot failure set the skip_imr_boot to true
* as well in order to try to re-load the firmware to do a 'cold' boot.
*/
- if (sdev->system_suspend_target > SOF_SUSPEND_S3 ||
- sdev->fw_state == SOF_FW_CRASHED ||
+ if (imr_lost || sdev->fw_state == SOF_FW_CRASHED ||
sdev->fw_state == SOF_FW_BOOT_FAILED)
hda->skip_imr_boot = true;
diff --git a/sound/soc/sof/intel/hda-pcm.c b/sound/soc/sof/intel/hda-pcm.c
index 18f07364d21984..d7b446f3f973e3 100644
--- a/sound/soc/sof/intel/hda-pcm.c
+++ b/sound/soc/sof/intel/hda-pcm.c
@@ -259,8 +259,37 @@ int hda_dsp_pcm_open(struct snd_sof_dev *sdev,
snd_pcm_hw_constraint_mask64(substream->runtime, SNDRV_PCM_HW_PARAM_FORMAT,
SNDRV_PCM_FMTBIT_S16 | SNDRV_PCM_FMTBIT_S32);
+ /*
+ * The dsp_max_burst_size_in_ms is the length of the maximum burst size
+ * of the host DMA in the ALSA buffer.
+ *
+ * On playback start the DMA will transfer dsp_max_burst_size_in_ms
+ * amount of data in one initial burst to fill up the host DMA buffer.
+ * Consequent DMA burst sizes are shorter and their length can vary.
+ * To make sure that userspace allocate large enough ALSA buffer we need
+ * to place a constraint on the buffer time.
+ *
+ * On capture the DMA will transfer 1ms chunks.
+ *
+ * Exact dsp_max_burst_size_in_ms constraint is racy, so set the
+ * constraint to a minimum of 2x dsp_max_burst_size_in_ms.
+ */
+ if (spcm->stream[direction].dsp_max_burst_size_in_ms)
+ snd_pcm_hw_constraint_minmax(substream->runtime,
+ SNDRV_PCM_HW_PARAM_BUFFER_TIME,
+ spcm->stream[direction].dsp_max_burst_size_in_ms * USEC_PER_MSEC * 2,
+ UINT_MAX);
+
/* binding pcm substream to hda stream */
substream->runtime->private_data = &dsp_stream->hstream;
+
+ /*
+ * Reset the llp cache values (they are used for LLP compensation in
+ * case the counter is not reset)
+ */
+ dsp_stream->pplcllpl = 0;
+ dsp_stream->pplcllpu = 0;
+
return 0;
}
diff --git a/sound/soc/sof/intel/hda-stream.c b/sound/soc/sof/intel/hda-stream.c
index b387b1a69d7ea3..0c189d3b19c1af 100644
--- a/sound/soc/sof/intel/hda-stream.c
+++ b/sound/soc/sof/intel/hda-stream.c
@@ -1063,3 +1063,73 @@ snd_pcm_uframes_t hda_dsp_stream_get_position(struct hdac_stream *hstream,
return pos;
}
+
+#define merge_u64(u32_u, u32_l) (((u64)(u32_u) << 32) | (u32_l))
+
+/**
+ * hda_dsp_get_stream_llp - Retrieve the LLP (Linear Link Position) of the stream
+ * @sdev: SOF device
+ * @component: ASoC component
+ * @substream: PCM substream
+ *
+ * Returns the raw Linear Link Position value
+ */
+u64 hda_dsp_get_stream_llp(struct snd_sof_dev *sdev,
+ struct snd_soc_component *component,
+ struct snd_pcm_substream *substream)
+{
+ struct hdac_stream *hstream = substream->runtime->private_data;
+ struct hdac_ext_stream *hext_stream = stream_to_hdac_ext_stream(hstream);
+ u32 llp_l, llp_u;
+
+ /*
+ * The pplc_addr have been calculated during probe in
+ * hda_dsp_stream_init():
+ * pplc_addr = sdev->bar[HDA_DSP_PP_BAR] +
+ * SOF_HDA_PPLC_BASE +
+ * SOF_HDA_PPLC_MULTI * total_stream +
+ * SOF_HDA_PPLC_INTERVAL * stream_index
+ *
+ * Use this pre-calculated address to avoid repeated re-calculation.
+ */
+ llp_l = readl(hext_stream->pplc_addr + AZX_REG_PPLCLLPL);
+ llp_u = readl(hext_stream->pplc_addr + AZX_REG_PPLCLLPU);
+
+ /* Compensate the LLP counter with the saved offset */
+ if (hext_stream->pplcllpl || hext_stream->pplcllpu)
+ return merge_u64(llp_u, llp_l) -
+ merge_u64(hext_stream->pplcllpu, hext_stream->pplcllpl);
+
+ return merge_u64(llp_u, llp_l);
+}
+
+/**
+ * hda_dsp_get_stream_ldp - Retrieve the LDP (Linear DMA Position) of the stream
+ * @sdev: SOF device
+ * @component: ASoC component
+ * @substream: PCM substream
+ *
+ * Returns the raw Linear Link Position value
+ */
+u64 hda_dsp_get_stream_ldp(struct snd_sof_dev *sdev,
+ struct snd_soc_component *component,
+ struct snd_pcm_substream *substream)
+{
+ struct hdac_stream *hstream = substream->runtime->private_data;
+ struct hdac_ext_stream *hext_stream = stream_to_hdac_ext_stream(hstream);
+ u32 ldp_l, ldp_u;
+
+ /*
+ * The pphc_addr have been calculated during probe in
+ * hda_dsp_stream_init():
+ * pphc_addr = sdev->bar[HDA_DSP_PP_BAR] +
+ * SOF_HDA_PPHC_BASE +
+ * SOF_HDA_PPHC_INTERVAL * stream_index
+ *
+ * Use this pre-calculated address to avoid repeated re-calculation.
+ */
+ ldp_l = readl(hext_stream->pphc_addr + AZX_REG_PPHCLDPL);
+ ldp_u = readl(hext_stream->pphc_addr + AZX_REG_PPHCLDPU);
+
+ return ((u64)ldp_u << 32) | ldp_l;
+}
diff --git a/sound/soc/sof/intel/hda.h b/sound/soc/sof/intel/hda.h
index b36eb7c7891335..81a1d4606d3cde 100644
--- a/sound/soc/sof/intel/hda.h
+++ b/sound/soc/sof/intel/hda.h
@@ -662,6 +662,12 @@ bool hda_dsp_check_stream_irq(struct snd_sof_dev *sdev);
snd_pcm_uframes_t hda_dsp_stream_get_position(struct hdac_stream *hstream,
int direction, bool can_sleep);
+u64 hda_dsp_get_stream_llp(struct snd_sof_dev *sdev,
+ struct snd_soc_component *component,
+ struct snd_pcm_substream *substream);
+u64 hda_dsp_get_stream_ldp(struct snd_sof_dev *sdev,
+ struct snd_soc_component *component,
+ struct snd_pcm_substream *substream);
struct hdac_ext_stream *
hda_dsp_stream_get(struct snd_sof_dev *sdev, int direction, u32 flags);
diff --git a/sound/soc/sof/intel/lnl.c b/sound/soc/sof/intel/lnl.c
index 7ae017a00184e5..aeb4350cce6bba 100644
--- a/sound/soc/sof/intel/lnl.c
+++ b/sound/soc/sof/intel/lnl.c
@@ -29,15 +29,17 @@ static const struct snd_sof_debugfs_map lnl_dsp_debugfs[] = {
};
/* this helps allows the DSP to setup DMIC/SSP */
-static int hdac_bus_offload_dmic_ssp(struct hdac_bus *bus)
+static int hdac_bus_offload_dmic_ssp(struct hdac_bus *bus, bool enable)
{
int ret;
- ret = hdac_bus_eml_enable_offload(bus, true, AZX_REG_ML_LEPTR_ID_INTEL_SSP, true);
+ ret = hdac_bus_eml_enable_offload(bus, true,
+ AZX_REG_ML_LEPTR_ID_INTEL_SSP, enable);
if (ret < 0)
return ret;
- ret = hdac_bus_eml_enable_offload(bus, true, AZX_REG_ML_LEPTR_ID_INTEL_DMIC, true);
+ ret = hdac_bus_eml_enable_offload(bus, true,
+ AZX_REG_ML_LEPTR_ID_INTEL_DMIC, enable);
if (ret < 0)
return ret;
@@ -52,7 +54,19 @@ static int lnl_hda_dsp_probe(struct snd_sof_dev *sdev)
if (ret < 0)
return ret;
- return hdac_bus_offload_dmic_ssp(sof_to_bus(sdev));
+ return hdac_bus_offload_dmic_ssp(sof_to_bus(sdev), true);
+}
+
+static void lnl_hda_dsp_remove(struct snd_sof_dev *sdev)
+{
+ int ret;
+
+ ret = hdac_bus_offload_dmic_ssp(sof_to_bus(sdev), false);
+ if (ret < 0)
+ dev_warn(sdev->dev,
+ "Failed to disable offload for DMIC/SSP: %d\n", ret);
+
+ hda_dsp_remove(sdev);
}
static int lnl_hda_dsp_resume(struct snd_sof_dev *sdev)
@@ -63,7 +77,7 @@ static int lnl_hda_dsp_resume(struct snd_sof_dev *sdev)
if (ret < 0)
return ret;
- return hdac_bus_offload_dmic_ssp(sof_to_bus(sdev));
+ return hdac_bus_offload_dmic_ssp(sof_to_bus(sdev), true);
}
static int lnl_hda_dsp_runtime_resume(struct snd_sof_dev *sdev)
@@ -74,7 +88,7 @@ static int lnl_hda_dsp_runtime_resume(struct snd_sof_dev *sdev)
if (ret < 0)
return ret;
- return hdac_bus_offload_dmic_ssp(sof_to_bus(sdev));
+ return hdac_bus_offload_dmic_ssp(sof_to_bus(sdev), true);
}
static int lnl_dsp_post_fw_run(struct snd_sof_dev *sdev)
@@ -97,9 +111,11 @@ int sof_lnl_ops_init(struct snd_sof_dev *sdev)
/* common defaults */
memcpy(&sof_lnl_ops, &sof_hda_common_ops, sizeof(struct snd_sof_dsp_ops));
- /* probe */
- if (!sdev->dspless_mode_selected)
+ /* probe/remove */
+ if (!sdev->dspless_mode_selected) {
sof_lnl_ops.probe = lnl_hda_dsp_probe;
+ sof_lnl_ops.remove = lnl_hda_dsp_remove;
+ }
/* shutdown */
sof_lnl_ops.shutdown = hda_dsp_shutdown;
@@ -134,8 +150,6 @@ int sof_lnl_ops_init(struct snd_sof_dev *sdev)
sof_lnl_ops.runtime_resume = lnl_hda_dsp_runtime_resume;
}
- sof_lnl_ops.get_stream_position = mtl_dsp_get_stream_hda_link_position;
-
/* dsp core get/put */
sof_lnl_ops.core_get = mtl_dsp_core_get;
sof_lnl_ops.core_put = mtl_dsp_core_put;
diff --git a/sound/soc/sof/intel/mtl.c b/sound/soc/sof/intel/mtl.c
index df05dc77b8d5e3..060c34988e90d1 100644
--- a/sound/soc/sof/intel/mtl.c
+++ b/sound/soc/sof/intel/mtl.c
@@ -626,18 +626,6 @@ static int mtl_dsp_disable_interrupts(struct snd_sof_dev *sdev)
return mtl_enable_interrupts(sdev, false);
}
-u64 mtl_dsp_get_stream_hda_link_position(struct snd_sof_dev *sdev,
- struct snd_soc_component *component,
- struct snd_pcm_substream *substream)
-{
- struct hdac_stream *hstream = substream->runtime->private_data;
- u32 llp_l, llp_u;
-
- llp_l = snd_sof_dsp_read(sdev, HDA_DSP_HDA_BAR, MTL_PPLCLLPL(hstream->index));
- llp_u = snd_sof_dsp_read(sdev, HDA_DSP_HDA_BAR, MTL_PPLCLLPU(hstream->index));
- return ((u64)llp_u << 32) | llp_l;
-}
-
int mtl_dsp_core_get(struct snd_sof_dev *sdev, int core)
{
const struct sof_ipc_pm_ops *pm_ops = sdev->ipc->ops->pm;
@@ -707,8 +695,6 @@ int sof_mtl_ops_init(struct snd_sof_dev *sdev)
sof_mtl_ops.core_get = mtl_dsp_core_get;
sof_mtl_ops.core_put = mtl_dsp_core_put;
- sof_mtl_ops.get_stream_position = mtl_dsp_get_stream_hda_link_position;
-
sdev->private = kzalloc(sizeof(struct sof_ipc4_fw_data), GFP_KERNEL);
if (!sdev->private)
return -ENOMEM;
diff --git a/sound/soc/sof/intel/mtl.h b/sound/soc/sof/intel/mtl.h
index cc5a1f46fd0956..ea8c1b83f7127d 100644
--- a/sound/soc/sof/intel/mtl.h
+++ b/sound/soc/sof/intel/mtl.h
@@ -6,12 +6,6 @@
* Copyright(c) 2020-2022 Intel Corporation. All rights reserved.
*/
-/* HDA Registers */
-#define MTL_PPLCLLPL_BASE 0x948
-#define MTL_PPLCLLPU_STRIDE 0x10
-#define MTL_PPLCLLPL(x) (MTL_PPLCLLPL_BASE + (x) * MTL_PPLCLLPU_STRIDE)
-#define MTL_PPLCLLPU(x) (MTL_PPLCLLPL_BASE + 0x4 + (x) * MTL_PPLCLLPU_STRIDE)
-
/* DSP Registers */
#define MTL_HFDSSCS 0x1000
#define MTL_HFDSSCS_SPA_MASK BIT(16)
@@ -103,9 +97,5 @@ int mtl_dsp_ipc_get_window_offset(struct snd_sof_dev *sdev, u32 id);
void mtl_ipc_dump(struct snd_sof_dev *sdev);
-u64 mtl_dsp_get_stream_hda_link_position(struct snd_sof_dev *sdev,
- struct snd_soc_component *component,
- struct snd_pcm_substream *substream);
-
int mtl_dsp_core_get(struct snd_sof_dev *sdev, int core);
int mtl_dsp_core_put(struct snd_sof_dev *sdev, int core);
diff --git a/sound/soc/sof/ipc4-mtrace.c b/sound/soc/sof/ipc4-mtrace.c
index 9f1e33ee882612..0e04bea9432dda 100644
--- a/sound/soc/sof/ipc4-mtrace.c
+++ b/sound/soc/sof/ipc4-mtrace.c
@@ -4,6 +4,7 @@
#include <linux/debugfs.h>
#include <linux/sched/signal.h>
+#include <linux/sched/clock.h>
#include <sound/sof/ipc4/header.h>
#include "sof-priv.h"
#include "ipc4-priv.h"
@@ -412,7 +413,6 @@ static int ipc4_mtrace_enable(struct snd_sof_dev *sdev)
const struct sof_ipc_ops *iops = sdev->ipc->ops;
struct sof_ipc4_msg msg;
u64 system_time;
- ktime_t kt;
int ret;
if (priv->mtrace_state != SOF_MTRACE_DISABLED)
@@ -424,9 +424,12 @@ static int ipc4_mtrace_enable(struct snd_sof_dev *sdev)
msg.primary |= SOF_IPC4_MOD_INSTANCE(SOF_IPC4_MOD_INIT_BASEFW_INSTANCE_ID);
msg.extension = SOF_IPC4_MOD_EXT_MSG_PARAM_ID(SOF_IPC4_FW_PARAM_SYSTEM_TIME);
- /* The system time is in usec, UTC, epoch is 1601-01-01 00:00:00 */
- kt = ktime_add_us(ktime_get_real(), FW_EPOCH_DELTA * USEC_PER_SEC);
- system_time = ktime_to_us(kt);
+ /*
+ * local_clock() is used to align with dmesg, so both kernel and firmware logs have
+ * the same base and a minor delta due to the IPC. system time is in us format but
+ * local_clock() returns the time in ns, so convert to ns.
+ */
+ system_time = div64_u64(local_clock(), NSEC_PER_USEC);
msg.data_size = sizeof(system_time);
msg.data_ptr = &system_time;
ret = iops->set_get_data(sdev, &msg, msg.data_size, true);
diff --git a/sound/soc/sof/ipc4-pcm.c b/sound/soc/sof/ipc4-pcm.c
index 0f332c8cdbe6af..e915f9f87a6c35 100644
--- a/sound/soc/sof/ipc4-pcm.c
+++ b/sound/soc/sof/ipc4-pcm.c
@@ -15,6 +15,28 @@
#include "ipc4-topology.h"
#include "ipc4-fw-reg.h"
+/**
+ * struct sof_ipc4_timestamp_info - IPC4 timestamp info
+ * @host_copier: the host copier of the pcm stream
+ * @dai_copier: the dai copier of the pcm stream
+ * @stream_start_offset: reported by fw in memory window (converted to frames)
+ * @stream_end_offset: reported by fw in memory window (converted to frames)
+ * @llp_offset: llp offset in memory window
+ * @boundary: wrap boundary should be used for the LLP frame counter
+ * @delay: Calculated and stored in pointer callback. The stored value is
+ * returned in the delay callback.
+ */
+struct sof_ipc4_timestamp_info {
+ struct sof_ipc4_copier *host_copier;
+ struct sof_ipc4_copier *dai_copier;
+ u64 stream_start_offset;
+ u64 stream_end_offset;
+ u32 llp_offset;
+
+ u64 boundary;
+ snd_pcm_sframes_t delay;
+};
+
static int sof_ipc4_set_multi_pipeline_state(struct snd_sof_dev *sdev, u32 state,
struct ipc4_pipeline_set_state_data *trigger_list)
{
@@ -423,8 +445,19 @@ static int sof_ipc4_trigger_pipelines(struct snd_soc_component *component,
}
/* return if this is the final state */
- if (state == SOF_IPC4_PIPE_PAUSED)
+ if (state == SOF_IPC4_PIPE_PAUSED) {
+ struct sof_ipc4_timestamp_info *time_info;
+
+ /*
+ * Invalidate the stream_start_offset to make sure that it is
+ * going to be updated if the stream resumes
+ */
+ time_info = spcm->stream[substream->stream].private;
+ if (time_info)
+ time_info->stream_start_offset = SOF_IPC4_INVALID_STREAM_POSITION;
+
goto free;
+ }
skip_pause_transition:
/* else set the RUNNING/RESET state in the DSP */
ret = sof_ipc4_set_multi_pipeline_state(sdev, state, trigger_list);
@@ -464,14 +497,12 @@ static int sof_ipc4_pcm_trigger(struct snd_soc_component *component,
/* determine the pipeline state */
switch (cmd) {
- case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
- state = SOF_IPC4_PIPE_PAUSED;
- break;
case SNDRV_PCM_TRIGGER_PAUSE_RELEASE:
case SNDRV_PCM_TRIGGER_RESUME:
case SNDRV_PCM_TRIGGER_START:
state = SOF_IPC4_PIPE_RUNNING;
break;
+ case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
case SNDRV_PCM_TRIGGER_SUSPEND:
case SNDRV_PCM_TRIGGER_STOP:
state = SOF_IPC4_PIPE_PAUSED;
@@ -703,6 +734,10 @@ static int sof_ipc4_pcm_setup(struct snd_sof_dev *sdev, struct snd_sof_pcm *spcm
if (abi_version < SOF_IPC4_FW_REGS_ABI_VER)
support_info = false;
+ /* For delay reporting the get_host_byte_counter callback is needed */
+ if (!sof_ops(sdev) || !sof_ops(sdev)->get_host_byte_counter)
+ support_info = false;
+
for_each_pcm_streams(stream) {
pipeline_list = &spcm->stream[stream].pipeline_list;
@@ -835,7 +870,6 @@ static int sof_ipc4_get_stream_start_offset(struct snd_sof_dev *sdev,
struct sof_ipc4_copier *host_copier = time_info->host_copier;
struct sof_ipc4_copier *dai_copier = time_info->dai_copier;
struct sof_ipc4_pipeline_registers ppl_reg;
- u64 stream_start_position;
u32 dai_sample_size;
u32 ch, node_index;
u32 offset;
@@ -852,38 +886,51 @@ static int sof_ipc4_get_stream_start_offset(struct snd_sof_dev *sdev,
if (ppl_reg.stream_start_offset == SOF_IPC4_INVALID_STREAM_POSITION)
return -EINVAL;
- stream_start_position = ppl_reg.stream_start_offset;
ch = dai_copier->data.out_format.fmt_cfg;
ch = SOF_IPC4_AUDIO_FORMAT_CFG_CHANNELS_COUNT(ch);
dai_sample_size = (dai_copier->data.out_format.bit_depth >> 3) * ch;
- /* convert offset to sample count */
- do_div(stream_start_position, dai_sample_size);
- time_info->stream_start_offset = stream_start_position;
+
+ /* convert offsets to frame count */
+ time_info->stream_start_offset = ppl_reg.stream_start_offset;
+ do_div(time_info->stream_start_offset, dai_sample_size);
+ time_info->stream_end_offset = ppl_reg.stream_end_offset;
+ do_div(time_info->stream_end_offset, dai_sample_size);
+
+ /*
+ * Calculate the wrap boundary need to be used for delay calculation
+ * The host counter is in bytes, it will wrap earlier than the frames
+ * based link counter.
+ */
+ time_info->boundary = div64_u64(~((u64)0),
+ frames_to_bytes(substream->runtime, 1));
+ /* Initialize the delay value to 0 (no delay) */
+ time_info->delay = 0;
return 0;
}
-static snd_pcm_sframes_t sof_ipc4_pcm_delay(struct snd_soc_component *component,
- struct snd_pcm_substream *substream)
+static int sof_ipc4_pcm_pointer(struct snd_soc_component *component,
+ struct snd_pcm_substream *substream,
+ snd_pcm_uframes_t *pointer)
{
struct snd_sof_dev *sdev = snd_soc_component_get_drvdata(component);
struct snd_soc_pcm_runtime *rtd = snd_soc_substream_to_rtd(substream);
struct sof_ipc4_timestamp_info *time_info;
struct sof_ipc4_llp_reading_slot llp;
- snd_pcm_uframes_t head_ptr, tail_ptr;
+ snd_pcm_uframes_t head_cnt, tail_cnt;
struct snd_sof_pcm_stream *stream;
+ u64 dai_cnt, host_cnt, host_ptr;
struct snd_sof_pcm *spcm;
- u64 tmp_ptr;
int ret;
spcm = snd_sof_find_spcm_dai(component, rtd);
if (!spcm)
- return 0;
+ return -EOPNOTSUPP;
stream = &spcm->stream[substream->stream];
time_info = stream->private;
if (!time_info)
- return 0;
+ return -EOPNOTSUPP;
/*
* stream_start_offset is updated to memory window by FW based on
@@ -893,45 +940,116 @@ static snd_pcm_sframes_t sof_ipc4_pcm_delay(struct snd_soc_component *component,
if (time_info->stream_start_offset == SOF_IPC4_INVALID_STREAM_POSITION) {
ret = sof_ipc4_get_stream_start_offset(sdev, substream, stream, time_info);
if (ret < 0)
- return 0;
+ return -EOPNOTSUPP;
}
+ /* For delay calculation we need the host counter */
+ host_cnt = snd_sof_pcm_get_host_byte_counter(sdev, component, substream);
+ host_ptr = host_cnt;
+
+ /* convert the host_cnt to frames */
+ host_cnt = div64_u64(host_cnt, frames_to_bytes(substream->runtime, 1));
+
/*
- * HDaudio links don't support the LLP counter reported by firmware
- * the link position is read directly from hardware registers.
+ * If the LLP counter is not reported by firmware in the SRAM window
+ * then read the dai (link) counter via host accessible means if
+ * available.
*/
if (!time_info->llp_offset) {
- tmp_ptr = snd_sof_pcm_get_stream_position(sdev, component, substream);
- if (!tmp_ptr)
- return 0;
+ dai_cnt = snd_sof_pcm_get_dai_frame_counter(sdev, component, substream);
+ if (!dai_cnt)
+ return -EOPNOTSUPP;
} else {
sof_mailbox_read(sdev, time_info->llp_offset, &llp, sizeof(llp));
- tmp_ptr = ((u64)llp.reading.llp_u << 32) | llp.reading.llp_l;
+ dai_cnt = ((u64)llp.reading.llp_u << 32) | llp.reading.llp_l;
}
+ dai_cnt += time_info->stream_end_offset;
- /* In two cases dai dma position is not accurate
+ /* In two cases dai dma counter is not accurate
* (1) dai pipeline is started before host pipeline
- * (2) multiple streams mixed into one. Each stream has the same dai dma position
+ * (2) multiple streams mixed into one. Each stream has the same dai dma
+ * counter
*
- * Firmware calculates correct stream_start_offset for all cases including above two.
- * Driver subtracts stream_start_offset from dai dma position to get accurate one
+ * Firmware calculates correct stream_start_offset for all cases
+ * including above two.
+ * Driver subtracts stream_start_offset from dai dma counter to get
+ * accurate one
*/
- tmp_ptr -= time_info->stream_start_offset;
- /* Calculate the delay taking into account that both pointer can wrap */
- div64_u64_rem(tmp_ptr, substream->runtime->boundary, &tmp_ptr);
+ /*
+ * On stream start the dai counter might not yet have reached the
+ * stream_start_offset value which means that no frames have left the
+ * DSP yet from the audio stream (on playback, capture streams have
+ * offset of 0 as we start capturing right away).
+ * In this case we need to adjust the distance between the counters by
+ * increasing the host counter by (offset - dai_counter).
+ * Otherwise the dai_counter needs to be adjusted to reflect the number
+ * of valid frames passed on the DAI side.
+ *
+ * The delay is the difference between the counters on the two
+ * sides of the DSP.
+ */
+ if (dai_cnt < time_info->stream_start_offset) {
+ host_cnt += time_info->stream_start_offset - dai_cnt;
+ dai_cnt = 0;
+ } else {
+ dai_cnt -= time_info->stream_start_offset;
+ }
+
+ /* Wrap the dai counter at the boundary where the host counter wraps */
+ div64_u64_rem(dai_cnt, time_info->boundary, &dai_cnt);
+
if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) {
- head_ptr = substream->runtime->status->hw_ptr;
- tail_ptr = tmp_ptr;
+ head_cnt = host_cnt;
+ tail_cnt = dai_cnt;
} else {
- head_ptr = tmp_ptr;
- tail_ptr = substream->runtime->status->hw_ptr;
+ head_cnt = dai_cnt;
+ tail_cnt = host_cnt;
+ }
+
+ if (head_cnt < tail_cnt) {
+ time_info->delay = time_info->boundary - tail_cnt + head_cnt;
+ goto out;
}
- if (head_ptr < tail_ptr)
- return substream->runtime->boundary - tail_ptr + head_ptr;
+ time_info->delay = head_cnt - tail_cnt;
+
+out:
+ /*
+ * Convert the host byte counter to PCM pointer which wraps in buffer
+ * and it is in frames
+ */
+ div64_u64_rem(host_ptr, snd_pcm_lib_buffer_bytes(substream), &host_ptr);
+ *pointer = bytes_to_frames(substream->runtime, host_ptr);
+
+ return 0;
+}
+
+static snd_pcm_sframes_t sof_ipc4_pcm_delay(struct snd_soc_component *component,
+ struct snd_pcm_substream *substream)
+{
+ struct snd_soc_pcm_runtime *rtd = snd_soc_substream_to_rtd(substream);
+ struct sof_ipc4_timestamp_info *time_info;
+ struct snd_sof_pcm_stream *stream;
+ struct snd_sof_pcm *spcm;
+
+ spcm = snd_sof_find_spcm_dai(component, rtd);
+ if (!spcm)
+ return 0;
+
+ stream = &spcm->stream[substream->stream];
+ time_info = stream->private;
+ /*
+ * Report the stored delay value calculated in the pointer callback.
+ * In the unlikely event that the calculation was skipped/aborted, the
+ * default 0 delay returned.
+ */
+ if (time_info)
+ return time_info->delay;
+
+ /* No delay information available, report 0 as delay */
+ return 0;
- return head_ptr - tail_ptr;
}
const struct sof_ipc_pcm_ops ipc4_pcm_ops = {
@@ -941,6 +1059,7 @@ const struct sof_ipc_pcm_ops ipc4_pcm_ops = {
.dai_link_fixup = sof_ipc4_pcm_dai_link_fixup,
.pcm_setup = sof_ipc4_pcm_setup,
.pcm_free = sof_ipc4_pcm_free,
+ .pointer = sof_ipc4_pcm_pointer,
.delay = sof_ipc4_pcm_delay,
.ipc_first_on_start = true,
.platform_stop_during_hw_free = true,
diff --git a/sound/soc/sof/ipc4-priv.h b/sound/soc/sof/ipc4-priv.h
index f3b908b093f956..afed618a15f061 100644
--- a/sound/soc/sof/ipc4-priv.h
+++ b/sound/soc/sof/ipc4-priv.h
@@ -92,20 +92,6 @@ struct sof_ipc4_fw_data {
struct mutex pipeline_state_mutex; /* protect pipeline triggers, ref counts and states */
};
-/**
- * struct sof_ipc4_timestamp_info - IPC4 timestamp info
- * @host_copier: the host copier of the pcm stream
- * @dai_copier: the dai copier of the pcm stream
- * @stream_start_offset: reported by fw in memory window
- * @llp_offset: llp offset in memory window
- */
-struct sof_ipc4_timestamp_info {
- struct sof_ipc4_copier *host_copier;
- struct sof_ipc4_copier *dai_copier;
- u64 stream_start_offset;
- u32 llp_offset;
-};
-
extern const struct sof_ipc_fw_loader_ops ipc4_loader_ops;
extern const struct sof_ipc_tplg_ops ipc4_tplg_ops;
extern const struct sof_ipc_tplg_control_ops tplg_ipc4_control_ops;
diff --git a/sound/soc/sof/ipc4-topology.c b/sound/soc/sof/ipc4-topology.c
index da4a83afb87a8a..5cca0584212609 100644
--- a/sound/soc/sof/ipc4-topology.c
+++ b/sound/soc/sof/ipc4-topology.c
@@ -412,8 +412,9 @@ static int sof_ipc4_widget_setup_pcm(struct snd_sof_widget *swidget)
struct sof_ipc4_available_audio_format *available_fmt;
struct snd_soc_component *scomp = swidget->scomp;
struct sof_ipc4_copier *ipc4_copier;
+ struct snd_sof_pcm *spcm;
int node_type = 0;
- int ret;
+ int ret, dir;
ipc4_copier = kzalloc(sizeof(*ipc4_copier), GFP_KERNEL);
if (!ipc4_copier)
@@ -447,6 +448,25 @@ static int sof_ipc4_widget_setup_pcm(struct snd_sof_widget *swidget)
}
dev_dbg(scomp->dev, "host copier '%s' node_type %u\n", swidget->widget->name, node_type);
+ spcm = snd_sof_find_spcm_comp(scomp, swidget->comp_id, &dir);
+ if (!spcm)
+ goto skip_gtw_cfg;
+
+ if (dir == SNDRV_PCM_STREAM_PLAYBACK) {
+ struct snd_sof_pcm_stream *sps = &spcm->stream[dir];
+
+ sof_update_ipc_object(scomp, &sps->dsp_max_burst_size_in_ms,
+ SOF_COPIER_DEEP_BUFFER_TOKENS,
+ swidget->tuples,
+ swidget->num_tuples, sizeof(u32), 1);
+ /* Set default DMA buffer size if it is not specified in topology */
+ if (!sps->dsp_max_burst_size_in_ms)
+ sps->dsp_max_burst_size_in_ms = SOF_IPC4_MIN_DMA_BUFFER_SIZE;
+ } else {
+ /* Capture data is copied from DSP to host in 1ms bursts */
+ spcm->stream[dir].dsp_max_burst_size_in_ms = 1;
+ }
+
skip_gtw_cfg:
ipc4_copier->gtw_attr = kzalloc(sizeof(*ipc4_copier->gtw_attr), GFP_KERNEL);
if (!ipc4_copier->gtw_attr) {
@@ -1356,6 +1376,7 @@ static int snd_sof_get_nhlt_endpoint_data(struct snd_sof_dev *sdev, struct snd_s
int sample_rate, channel_count;
int bit_depth, ret;
u32 nhlt_type;
+ int dev_type = 0;
/* convert to NHLT type */
switch (linktype) {
@@ -1371,18 +1392,30 @@ static int snd_sof_get_nhlt_endpoint_data(struct snd_sof_dev *sdev, struct snd_s
&bit_depth);
if (ret < 0)
return ret;
+
+ /*
+ * We need to know the type of the external device attached to a SSP
+ * port to retrieve the blob from NHLT. However, device type is not
+ * specified in topology.
+ * Query the type for the port and then pass that information back
+ * to the blob lookup function.
+ */
+ dev_type = intel_nhlt_ssp_device_type(sdev->dev, ipc4_data->nhlt,
+ dai_index);
+ if (dev_type < 0)
+ return dev_type;
break;
default:
return 0;
}
- dev_dbg(sdev->dev, "dai index %d nhlt type %d direction %d\n",
- dai_index, nhlt_type, dir);
+ dev_dbg(sdev->dev, "dai index %d nhlt type %d direction %d dev type %d\n",
+ dai_index, nhlt_type, dir, dev_type);
/* find NHLT blob with matching params */
cfg = intel_nhlt_get_endpoint_blob(sdev->dev, ipc4_data->nhlt, dai_index, nhlt_type,
bit_depth, bit_depth, channel_count, sample_rate,
- dir, 0);
+ dir, dev_type);
if (!cfg) {
dev_err(sdev->dev,
diff --git a/sound/soc/sof/ops.h b/sound/soc/sof/ops.h
index 6cf21e829e0727..3cd748e1346091 100644
--- a/sound/soc/sof/ops.h
+++ b/sound/soc/sof/ops.h
@@ -523,12 +523,26 @@ static inline int snd_sof_pcm_platform_ack(struct snd_sof_dev *sdev,
return 0;
}
-static inline u64 snd_sof_pcm_get_stream_position(struct snd_sof_dev *sdev,
- struct snd_soc_component *component,
- struct snd_pcm_substream *substream)
+static inline u64
+snd_sof_pcm_get_dai_frame_counter(struct snd_sof_dev *sdev,
+ struct snd_soc_component *component,
+ struct snd_pcm_substream *substream)
{
- if (sof_ops(sdev) && sof_ops(sdev)->get_stream_position)
- return sof_ops(sdev)->get_stream_position(sdev, component, substream);
+ if (sof_ops(sdev) && sof_ops(sdev)->get_dai_frame_counter)
+ return sof_ops(sdev)->get_dai_frame_counter(sdev, component,
+ substream);
+
+ return 0;
+}
+
+static inline u64
+snd_sof_pcm_get_host_byte_counter(struct snd_sof_dev *sdev,
+ struct snd_soc_component *component,
+ struct snd_pcm_substream *substream)
+{
+ if (sof_ops(sdev) && sof_ops(sdev)->get_host_byte_counter)
+ return sof_ops(sdev)->get_host_byte_counter(sdev, component,
+ substream);
return 0;
}
diff --git a/sound/soc/sof/pcm.c b/sound/soc/sof/pcm.c
index 33d576b1764783..f03cee94bce626 100644
--- a/sound/soc/sof/pcm.c
+++ b/sound/soc/sof/pcm.c
@@ -388,13 +388,21 @@ static snd_pcm_uframes_t sof_pcm_pointer(struct snd_soc_component *component,
{
struct snd_soc_pcm_runtime *rtd = snd_soc_substream_to_rtd(substream);
struct snd_sof_dev *sdev = snd_soc_component_get_drvdata(component);
+ const struct sof_ipc_pcm_ops *pcm_ops = sof_ipc_get_ops(sdev, pcm);
struct snd_sof_pcm *spcm;
snd_pcm_uframes_t host, dai;
+ int ret = -EOPNOTSUPP;
/* nothing to do for BE */
if (rtd->dai_link->no_pcm)
return 0;
+ if (pcm_ops && pcm_ops->pointer)
+ ret = pcm_ops->pointer(component, substream, &host);
+
+ if (ret != -EOPNOTSUPP)
+ return ret ? ret : host;
+
/* use dsp ops pointer callback directly if set */
if (sof_ops(sdev)->pcm_pointer)
return sof_ops(sdev)->pcm_pointer(sdev, substream);
diff --git a/sound/soc/sof/sof-audio.h b/sound/soc/sof/sof-audio.h
index 9ea2ac5adac79e..86bbb531e142c7 100644
--- a/sound/soc/sof/sof-audio.h
+++ b/sound/soc/sof/sof-audio.h
@@ -103,7 +103,10 @@ struct snd_sof_dai_config_data {
* additional memory in the SOF PCM stream structure
* @pcm_free: Function pointer for PCM free that can be used for freeing any
* additional memory in the SOF PCM stream structure
- * @delay: Function pointer for pcm delay calculation
+ * @pointer: Function pointer for pcm pointer
+ * Note: the @pointer callback may return -EOPNOTSUPP which should be
+ * handled in a same way as if the callback is not provided
+ * @delay: Function pointer for pcm delay reporting
* @reset_hw_params_during_stop: Flag indicating whether the hw_params should be reset during the
* STOP pcm trigger
* @ipc_first_on_start: Send IPC before invoking platform trigger during
@@ -124,6 +127,9 @@ struct sof_ipc_pcm_ops {
int (*dai_link_fixup)(struct snd_soc_pcm_runtime *rtd, struct snd_pcm_hw_params *params);
int (*pcm_setup)(struct snd_sof_dev *sdev, struct snd_sof_pcm *spcm);
void (*pcm_free)(struct snd_sof_dev *sdev, struct snd_sof_pcm *spcm);
+ int (*pointer)(struct snd_soc_component *component,
+ struct snd_pcm_substream *substream,
+ snd_pcm_uframes_t *pointer);
snd_pcm_sframes_t (*delay)(struct snd_soc_component *component,
struct snd_pcm_substream *substream);
bool reset_hw_params_during_stop;
@@ -322,6 +328,7 @@ struct snd_sof_pcm_stream {
struct work_struct period_elapsed_work;
struct snd_soc_dapm_widget_list *list; /* list of connected DAPM widgets */
bool d0i3_compatible; /* DSP can be in D0I3 when this pcm is opened */
+ unsigned int dsp_max_burst_size_in_ms; /* The maximum size of the host DMA burst in ms */
/*
* flag to indicate that the DSP pipelines should be kept
* active or not while suspending the stream
diff --git a/sound/soc/sof/sof-priv.h b/sound/soc/sof/sof-priv.h
index d453a4ce3b219d..d3c436f826046b 100644
--- a/sound/soc/sof/sof-priv.h
+++ b/sound/soc/sof/sof-priv.h
@@ -262,13 +262,25 @@ struct snd_sof_dsp_ops {
int (*pcm_ack)(struct snd_sof_dev *sdev, struct snd_pcm_substream *substream); /* optional */
/*
- * optional callback to retrieve the link DMA position for the substream
- * when the position is not reported in the shared SRAM windows but
- * instead from a host-accessible hardware counter.
+ * optional callback to retrieve the number of frames left/arrived from/to
+ * the DSP on the DAI side (link/codec/DMIC/etc).
+ *
+ * The callback is used when the firmware does not provide this information
+ * via the shared SRAM window and it can be retrieved by host.
*/
- u64 (*get_stream_position)(struct snd_sof_dev *sdev,
- struct snd_soc_component *component,
- struct snd_pcm_substream *substream); /* optional */
+ u64 (*get_dai_frame_counter)(struct snd_sof_dev *sdev,
+ struct snd_soc_component *component,
+ struct snd_pcm_substream *substream); /* optional */
+
+ /*
+ * Optional callback to retrieve the number of bytes left/arrived from/to
+ * the DSP on the host side (bytes between host ALSA buffer and DSP).
+ *
+ * The callback is needed for ALSA delay reporting.
+ */
+ u64 (*get_host_byte_counter)(struct snd_sof_dev *sdev,
+ struct snd_soc_component *component,
+ struct snd_pcm_substream *substream); /* optional */
/* host read DSP stream data */
int (*ipc_msg_data)(struct snd_sof_dev *sdev,
diff --git a/sound/usb/line6/driver.c b/sound/usb/line6/driver.c
index b67617b68e509d..f4437015d43a75 100644
--- a/sound/usb/line6/driver.c
+++ b/sound/usb/line6/driver.c
@@ -202,7 +202,7 @@ int line6_send_raw_message_async(struct usb_line6 *line6, const char *buffer,
struct urb *urb;
/* create message: */
- msg = kmalloc(sizeof(struct message), GFP_ATOMIC);
+ msg = kzalloc(sizeof(struct message), GFP_ATOMIC);
if (msg == NULL)
return -ENOMEM;
@@ -688,7 +688,7 @@ static int line6_init_cap_control(struct usb_line6 *line6)
int ret;
/* initialize USB buffers: */
- line6->buffer_listen = kmalloc(LINE6_BUFSIZE_LISTEN, GFP_KERNEL);
+ line6->buffer_listen = kzalloc(LINE6_BUFSIZE_LISTEN, GFP_KERNEL);
if (!line6->buffer_listen)
return -ENOMEM;
@@ -697,7 +697,7 @@ static int line6_init_cap_control(struct usb_line6 *line6)
return -ENOMEM;
if (line6->properties->capabilities & LINE6_CAP_CONTROL_MIDI) {
- line6->buffer_message = kmalloc(LINE6_MIDI_MESSAGE_MAXLEN, GFP_KERNEL);
+ line6->buffer_message = kzalloc(LINE6_MIDI_MESSAGE_MAXLEN, GFP_KERNEL);
if (!line6->buffer_message)
return -ENOMEM;
diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
index 4fa4ade1ce7445..540c0f2c4fda07 100644
--- a/tools/bpf/bpftool/gen.c
+++ b/tools/bpf/bpftool/gen.c
@@ -121,7 +121,7 @@ static bool get_datasec_ident(const char *sec_name, char *buf, size_t buf_sz)
int i, n;
/* recognize hard coded LLVM section name */
- if (strcmp(sec_name, ".arena.1") == 0) {
+ if (strcmp(sec_name, ".addr_space.1") == 0) {
/* this is the name to use in skeleton */
snprintf(buf, buf_sz, "arena");
return true;
diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c
index 318e2dad27e048..ae57bf69ad4af3 100644
--- a/tools/hv/hv_kvp_daemon.c
+++ b/tools/hv/hv_kvp_daemon.c
@@ -76,6 +76,12 @@ enum {
DNS
};
+enum {
+ IPV4 = 1,
+ IPV6,
+ IP_TYPE_MAX
+};
+
static int in_hand_shake;
static char *os_name = "";
@@ -102,6 +108,11 @@ static struct utsname uts_buf;
#define MAX_FILE_NAME 100
#define ENTRIES_PER_BLOCK 50
+/*
+ * Change this entry if the number of addresses increases in future
+ */
+#define MAX_IP_ENTRIES 64
+#define OUTSTR_BUF_SIZE ((INET6_ADDRSTRLEN + 1) * MAX_IP_ENTRIES)
struct kvp_record {
char key[HV_KVP_EXCHANGE_MAX_KEY_SIZE];
@@ -1171,6 +1182,18 @@ static int process_ip_string(FILE *f, char *ip_string, int type)
return 0;
}
+int ip_version_check(const char *input_addr)
+{
+ struct in6_addr addr;
+
+ if (inet_pton(AF_INET, input_addr, &addr))
+ return IPV4;
+ else if (inet_pton(AF_INET6, input_addr, &addr))
+ return IPV6;
+
+ return -EINVAL;
+}
+
/*
* Only IPv4 subnet strings needs to be converted to plen
* For IPv6 the subnet is already privided in plen format
@@ -1197,14 +1220,75 @@ static int kvp_subnet_to_plen(char *subnet_addr_str)
return plen;
}
+static int process_dns_gateway_nm(FILE *f, char *ip_string, int type,
+ int ip_sec)
+{
+ char addr[INET6_ADDRSTRLEN], *output_str;
+ int ip_offset = 0, error = 0, ip_ver;
+ char *param_name;
+
+ if (type == DNS)
+ param_name = "dns";
+ else if (type == GATEWAY)
+ param_name = "gateway";
+ else
+ return -EINVAL;
+
+ output_str = (char *)calloc(OUTSTR_BUF_SIZE, sizeof(char));
+ if (!output_str)
+ return -ENOMEM;
+
+ while (1) {
+ memset(addr, 0, sizeof(addr));
+
+ if (!parse_ip_val_buffer(ip_string, &ip_offset, addr,
+ (MAX_IP_ADDR_SIZE * 2)))
+ break;
+
+ ip_ver = ip_version_check(addr);
+ if (ip_ver < 0)
+ continue;
+
+ if ((ip_ver == IPV4 && ip_sec == IPV4) ||
+ (ip_ver == IPV6 && ip_sec == IPV6)) {
+ /*
+ * do a bound check to avoid out-of bound writes
+ */
+ if ((OUTSTR_BUF_SIZE - strlen(output_str)) >
+ (strlen(addr) + 1)) {
+ strncat(output_str, addr,
+ OUTSTR_BUF_SIZE -
+ strlen(output_str) - 1);
+ strncat(output_str, ",",
+ OUTSTR_BUF_SIZE -
+ strlen(output_str) - 1);
+ }
+ } else {
+ continue;
+ }
+ }
+
+ if (strlen(output_str)) {
+ /*
+ * This is to get rid of that extra comma character
+ * in the end of the string
+ */
+ output_str[strlen(output_str) - 1] = '\0';
+ error = fprintf(f, "%s=%s\n", param_name, output_str);
+ }
+
+ free(output_str);
+ return error;
+}
+
static int process_ip_string_nm(FILE *f, char *ip_string, char *subnet,
- int is_ipv6)
+ int ip_sec)
{
char addr[INET6_ADDRSTRLEN];
char subnet_addr[INET6_ADDRSTRLEN];
- int error, i = 0;
+ int error = 0, i = 0;
int ip_offset = 0, subnet_offset = 0;
- int plen;
+ int plen, ip_ver;
memset(addr, 0, sizeof(addr));
memset(subnet_addr, 0, sizeof(subnet_addr));
@@ -1216,10 +1300,16 @@ static int process_ip_string_nm(FILE *f, char *ip_string, char *subnet,
subnet_addr,
(MAX_IP_ADDR_SIZE *
2))) {
- if (!is_ipv6)
+ ip_ver = ip_version_check(addr);
+ if (ip_ver < 0)
+ continue;
+
+ if (ip_ver == IPV4 && ip_sec == IPV4)
plen = kvp_subnet_to_plen((char *)subnet_addr);
- else
+ else if (ip_ver == IPV6 && ip_sec == IPV6)
plen = atoi(subnet_addr);
+ else
+ continue;
if (plen < 0)
return plen;
@@ -1233,17 +1323,16 @@ static int process_ip_string_nm(FILE *f, char *ip_string, char *subnet,
memset(subnet_addr, 0, sizeof(subnet_addr));
}
- return 0;
+ return error;
}
static int kvp_set_ip_info(char *if_name, struct hv_kvp_ipaddr_value *new_val)
{
- int error = 0;
+ int error = 0, ip_ver;
char if_filename[PATH_MAX];
char nm_filename[PATH_MAX];
FILE *ifcfg_file, *nmfile;
char cmd[PATH_MAX];
- int is_ipv6 = 0;
char *mac_addr;
int str_len;
@@ -1421,52 +1510,94 @@ static int kvp_set_ip_info(char *if_name, struct hv_kvp_ipaddr_value *new_val)
if (error)
goto setval_error;
- if (new_val->addr_family & ADDR_FAMILY_IPV6) {
- error = fprintf(nmfile, "\n[ipv6]\n");
- if (error < 0)
- goto setval_error;
- is_ipv6 = 1;
- } else {
- error = fprintf(nmfile, "\n[ipv4]\n");
- if (error < 0)
- goto setval_error;
- }
-
/*
* Now we populate the keyfile format
+ *
+ * The keyfile format expects the IPv6 and IPv4 configuration in
+ * different sections. Therefore we iterate through the list twice,
+ * once to populate the IPv4 section and the next time for IPv6
*/
+ ip_ver = IPV4;
+ do {
+ if (ip_ver == IPV4) {
+ error = fprintf(nmfile, "\n[ipv4]\n");
+ if (error < 0)
+ goto setval_error;
+ } else {
+ error = fprintf(nmfile, "\n[ipv6]\n");
+ if (error < 0)
+ goto setval_error;
+ }
- if (new_val->dhcp_enabled) {
- error = kvp_write_file(nmfile, "method", "", "auto");
- if (error < 0)
- goto setval_error;
- } else {
- error = kvp_write_file(nmfile, "method", "", "manual");
+ /*
+ * Write the configuration for ipaddress, netmask, gateway and
+ * name services
+ */
+ error = process_ip_string_nm(nmfile, (char *)new_val->ip_addr,
+ (char *)new_val->sub_net,
+ ip_ver);
if (error < 0)
goto setval_error;
- }
- /*
- * Write the configuration for ipaddress, netmask, gateway and
- * name services
- */
- error = process_ip_string_nm(nmfile, (char *)new_val->ip_addr,
- (char *)new_val->sub_net, is_ipv6);
- if (error < 0)
- goto setval_error;
+ /*
+ * As dhcp_enabled is only valid for ipv4, we do not set dhcp
+ * methods for ipv6 based on dhcp_enabled flag.
+ *
+ * For ipv4, set method to manual only when dhcp_enabled is
+ * false and specific ipv4 addresses are configured. If neither
+ * dhcp_enabled is true and no ipv4 addresses are configured,
+ * set method to 'disabled'.
+ *
+ * For ipv6, set method to manual when we configure ipv6
+ * addresses. Otherwise set method to 'auto' so that SLAAC from
+ * RA may be used.
+ */
+ if (ip_ver == IPV4) {
+ if (new_val->dhcp_enabled) {
+ error = kvp_write_file(nmfile, "method", "",
+ "auto");
+ if (error < 0)
+ goto setval_error;
+ } else if (error) {
+ error = kvp_write_file(nmfile, "method", "",
+ "manual");
+ if (error < 0)
+ goto setval_error;
+ } else {
+ error = kvp_write_file(nmfile, "method", "",
+ "disabled");
+ if (error < 0)
+ goto setval_error;
+ }
+ } else if (ip_ver == IPV6) {
+ if (error) {
+ error = kvp_write_file(nmfile, "method", "",
+ "manual");
+ if (error < 0)
+ goto setval_error;
+ } else {
+ error = kvp_write_file(nmfile, "method", "",
+ "auto");
+ if (error < 0)
+ goto setval_error;
+ }
+ }
- /* we do not want ipv4 addresses in ipv6 section and vice versa */
- if (is_ipv6 != is_ipv4((char *)new_val->gate_way)) {
- error = fprintf(nmfile, "gateway=%s\n", (char *)new_val->gate_way);
+ error = process_dns_gateway_nm(nmfile,
+ (char *)new_val->gate_way,
+ GATEWAY, ip_ver);
if (error < 0)
goto setval_error;
- }
- if (is_ipv6 != is_ipv4((char *)new_val->dns_addr)) {
- error = fprintf(nmfile, "dns=%s\n", (char *)new_val->dns_addr);
+ error = process_dns_gateway_nm(nmfile,
+ (char *)new_val->dns_addr, DNS,
+ ip_ver);
if (error < 0)
goto setval_error;
- }
+
+ ip_ver++;
+ } while (ip_ver < IP_TYPE_MAX);
+
fclose(nmfile);
fclose(ifcfg_file);
diff --git a/tools/include/linux/btf_ids.h b/tools/include/linux/btf_ids.h
index 72535f00572f6e..72ea363d434db0 100644
--- a/tools/include/linux/btf_ids.h
+++ b/tools/include/linux/btf_ids.h
@@ -3,6 +3,8 @@
#ifndef _LINUX_BTF_IDS_H
#define _LINUX_BTF_IDS_H
+#include <linux/types.h> /* for u32 */
+
struct btf_id_set {
u32 cnt;
u32 ids[];
diff --git a/tools/include/linux/kernel.h b/tools/include/linux/kernel.h
index 4b0673bf52c2e6..07cfad817d5390 100644
--- a/tools/include/linux/kernel.h
+++ b/tools/include/linux/kernel.h
@@ -8,6 +8,7 @@
#include <linux/build_bug.h>
#include <linux/compiler.h>
#include <linux/math.h>
+#include <linux/panic.h>
#include <endian.h>
#include <byteswap.h>
diff --git a/tools/include/linux/mm.h b/tools/include/linux/mm.h
index f3c82ab5b14cd7..7d73da0980473f 100644
--- a/tools/include/linux/mm.h
+++ b/tools/include/linux/mm.h
@@ -37,4 +37,9 @@ static inline void totalram_pages_add(long count)
{
}
+static inline int early_pfn_to_nid(unsigned long pfn)
+{
+ return 0;
+}
+
#endif
diff --git a/tools/include/linux/panic.h b/tools/include/linux/panic.h
new file mode 100644
index 00000000000000..9c8f17a41ce8ed
--- /dev/null
+++ b/tools/include/linux/panic.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TOOLS_LINUX_PANIC_H
+#define _TOOLS_LINUX_PANIC_H
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+static inline void panic(const char *fmt, ...)
+{
+ va_list argp;
+
+ va_start(argp, fmt);
+ vfprintf(stderr, fmt, argp);
+ va_end(argp);
+ exit(-1);
+}
+
+#endif
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index efab29b8935bd9..a2061fcd612d7f 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -498,7 +498,7 @@ struct bpf_struct_ops {
#define KSYMS_SEC ".ksyms"
#define STRUCT_OPS_SEC ".struct_ops"
#define STRUCT_OPS_LINK_SEC ".struct_ops.link"
-#define ARENA_SEC ".arena.1"
+#define ARENA_SEC ".addr_space.1"
enum libbpf_map_type {
LIBBPF_MAP_UNSPEC,
@@ -1650,6 +1650,10 @@ static int sys_memfd_create(const char *name, unsigned flags)
return syscall(__NR_memfd_create, name, flags);
}
+#ifndef MFD_CLOEXEC
+#define MFD_CLOEXEC 0x0001U
+#endif
+
static int create_placeholder_fd(void)
{
int fd;
@@ -5352,8 +5356,8 @@ retry:
goto err_out;
}
if (map->def.type == BPF_MAP_TYPE_ARENA) {
- map->mmaped = mmap((void *)map->map_extra, bpf_map_mmap_sz(map),
- PROT_READ | PROT_WRITE,
+ map->mmaped = mmap((void *)(long)map->map_extra,
+ bpf_map_mmap_sz(map), PROT_READ | PROT_WRITE,
map->map_extra ? MAP_SHARED | MAP_FIXED : MAP_SHARED,
map->fd, 0);
if (map->mmaped == MAP_FAILED) {
diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py
index 6b7eb2d2aaf188..a451cbfbd781d9 100755
--- a/tools/net/ynl/ynl-gen-c.py
+++ b/tools/net/ynl/ynl-gen-c.py
@@ -228,8 +228,11 @@ class Type(SpecAttr):
presence = ''
for i in range(0, len(ref)):
presence = f"{var}->{'.'.join(ref[:i] + [''])}_present.{ref[i]}"
- if self.presence_type() == 'bit':
- code.append(presence + ' = 1;')
+ # Every layer below last is a nest, so we know it uses bit presence
+ # last layer is "self" and may be a complex type
+ if i == len(ref) - 1 and self.presence_type() != 'bit':
+ continue
+ code.append(presence + ' = 1;')
code += self._setter_lines(ri, member, presence)
func_name = f"{op_prefix(ri, direction, deref=deref)}_set_{'_'.join(ref)}"
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 0b10ad00866867..0a33d9195b7a91 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -585,7 +585,7 @@ static int add_dead_ends(struct objtool_file *file)
struct section *rsec;
struct reloc *reloc;
struct instruction *insn;
- unsigned long offset;
+ uint64_t offset;
/*
* Check for manually annotated dead ends.
diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
index 8f08c3fd498d5b..0d3672e5d9ed15 100644
--- a/tools/power/x86/turbostat/turbostat.8
+++ b/tools/power/x86/turbostat/turbostat.8
@@ -67,6 +67,10 @@ The column name "all" can be used to enable all disabled-by-default built-in cou
.PP
\fB--quiet\fP Do not decode and print the system configuration header information.
.PP
++\fB--no-msr\fP Disable all the uses of the MSR driver.
++.PP
++\fB--no-perf\fP Disable all the uses of the perf API.
++.PP
\fB--interval seconds\fP overrides the default 5.0 second measurement interval.
.PP
\fB--num_iterations num\fP number of the measurement iterations.
@@ -125,9 +129,17 @@ The system configuration dump (if --quiet is not used) is followed by statistics
.PP
\fBPkgTmp\fP Degrees Celsius reported by the per-package Package Thermal Monitor.
.PP
-\fBGFX%rc6\fP The percentage of time the GPU is in the "render C6" state, rc6, during the measurement interval. From /sys/class/drm/card0/power/rc6_residency_ms.
+\fBGFX%rc6\fP The percentage of time the GPU is in the "render C6" state, rc6, during the measurement interval. From /sys/class/drm/card0/power/rc6_residency_ms or /sys/class/drm/card0/gt/gt0/rc6_residency_ms or /sys/class/drm/card0/device/tile0/gtN/gtidle/idle_residency_ms depending on the graphics driver being used.
.PP
-\fBGFXMHz\fP Instantaneous snapshot of what sysfs presents at the end of the measurement interval. From /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz.
+\fBGFXMHz\fP Instantaneous snapshot of what sysfs presents at the end of the measurement interval. From /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz or /sys/class/drm/card0/gt_cur_freq_mhz or /sys/class/drm/card0/gt/gt0/rps_cur_freq_mhz or /sys/class/drm/card0/device/tile0/gtN/freq0/cur_freq depending on the graphics driver being used.
+.PP
+\fBGFXAMHz\fP Instantaneous snapshot of what sysfs presents at the end of the measurement interval. From /sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz or /sys/class/drm/card0/gt_act_freq_mhz or /sys/class/drm/card0/gt/gt0/rps_act_freq_mhz or /sys/class/drm/card0/device/tile0/gtN/freq0/act_freq depending on the graphics driver being used.
+.PP
+\fBSAM%mc6\fP The percentage of time the SA Media is in the "module C6" state, mc6, during the measurement interval. From /sys/class/drm/card0/gt/gt1/rc6_residency_ms or /sys/class/drm/card0/device/tile0/gtN/gtidle/idle_residency_ms depending on the graphics driver being used.
+.PP
+\fBSAMMHz\fP Instantaneous snapshot of what sysfs presents at the end of the measurement interval. From /sys/class/drm/card0/gt/gt1/rps_cur_freq_mhz or /sys/class/drm/card0/device/tile0/gtN/freq0/cur_freq depending on the graphics driver being used.
+.PP
+\fBSAMAMHz\fP Instantaneous snapshot of what sysfs presents at the end of the measurement interval. From /sys/class/drm/card0/gt/gt1/rps_act_freq_mhz or /sys/class/drm/card0/device/tile0/gtN/freq0/act_freq depending on the graphics driver being used.
.PP
\fBPkg%pc2, Pkg%pc3, Pkg%pc6, Pkg%pc7\fP percentage residency in hardware package idle states. These numbers are from hardware residency counters.
.PP
@@ -370,7 +382,7 @@ below the processor's base frequency.
Busy% = MPERF_delta/TSC_delta
-Bzy_MHz = TSC_delta/APERF_delta/MPERF_delta/measurement_interval
+Bzy_MHz = TSC_delta*APERF_delta/MPERF_delta/measurement_interval
Note that these calculations depend on TSC_delta, so they
are not reliable during intervals when TSC_MHz is not running at the base frequency.
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 7a334377f92b97..98256468e24806 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -3,7 +3,7 @@
* turbostat -- show CPU frequency and C-state residency
* on modern Intel and AMD processors.
*
- * Copyright (c) 2023 Intel Corporation.
+ * Copyright (c) 2024 Intel Corporation.
* Len Brown <len.brown@intel.com>
*/
@@ -36,6 +36,8 @@
#include <linux/perf_event.h>
#include <asm/unistd.h>
#include <stdbool.h>
+#include <assert.h>
+#include <linux/kernel.h>
#define UNUSED(x) (void)(x)
@@ -53,9 +55,13 @@
#define NAME_BYTES 20
#define PATH_BYTES 128
+#define MAX_NOFILE 0x8000
+
enum counter_scope { SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE };
enum counter_type { COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC };
enum counter_format { FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT };
+enum amperf_source { AMPERF_SOURCE_PERF, AMPERF_SOURCE_MSR };
+enum rapl_source { RAPL_SOURCE_NONE, RAPL_SOURCE_PERF, RAPL_SOURCE_MSR };
struct msr_counter {
unsigned int msr_num;
@@ -127,6 +133,9 @@ struct msr_counter bic[] = {
{ 0x0, "IPC", "", 0, 0, 0, NULL, 0 },
{ 0x0, "CoreThr", "", 0, 0, 0, NULL, 0 },
{ 0x0, "UncMHz", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "SAM%mc6", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "SAMMHz", "", 0, 0, 0, NULL, 0 },
+ { 0x0, "SAMAMHz", "", 0, 0, 0, NULL, 0 },
};
#define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
@@ -185,11 +194,14 @@ struct msr_counter bic[] = {
#define BIC_IPC (1ULL << 52)
#define BIC_CORE_THROT_CNT (1ULL << 53)
#define BIC_UNCORE_MHZ (1ULL << 54)
+#define BIC_SAM_mc6 (1ULL << 55)
+#define BIC_SAMMHz (1ULL << 56)
+#define BIC_SAMACTMHz (1ULL << 57)
#define BIC_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die )
#define BIC_THERMAL_PWR ( BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__)
-#define BIC_FREQUENCY ( BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz | BIC_UNCORE_MHZ)
-#define BIC_IDLE ( BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX)
+#define BIC_FREQUENCY (BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz | BIC_SAMMHz | BIC_SAMACTMHz | BIC_UNCORE_MHZ)
+#define BIC_IDLE (BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6)
#define BIC_OTHER ( BIC_IRQ | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC)
#define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC)
@@ -204,10 +216,13 @@ unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs | BIC_APIC | BIC
#define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT)
#define BIC_IS_ENABLED(COUNTER_BIT) (bic_enabled & COUNTER_BIT)
+struct amperf_group_fd;
+
char *proc_stat = "/proc/stat";
FILE *outf;
int *fd_percpu;
int *fd_instr_count_percpu;
+struct amperf_group_fd *fd_amperf_percpu; /* File descriptors for perf group with APERF and MPERF counters. */
struct timeval interval_tv = { 5, 0 };
struct timespec interval_ts = { 5, 0 };
@@ -242,11 +257,8 @@ char *output_buffer, *outp;
unsigned int do_dts;
unsigned int do_ptm;
unsigned int do_ipc;
-unsigned long long gfx_cur_rc6_ms;
unsigned long long cpuidle_cur_cpu_lpi_us;
unsigned long long cpuidle_cur_sys_lpi_us;
-unsigned int gfx_cur_mhz;
-unsigned int gfx_act_mhz;
unsigned int tj_max;
unsigned int tj_max_override;
double rapl_power_units, rapl_time_units;
@@ -263,6 +275,28 @@ unsigned int has_hwp_epp; /* IA32_HWP_REQUEST[bits 31:24] */
unsigned int has_hwp_pkg; /* IA32_HWP_REQUEST_PKG */
unsigned int first_counter_read = 1;
int ignore_stdin;
+bool no_msr;
+bool no_perf;
+enum amperf_source amperf_source;
+
+enum gfx_sysfs_idx {
+ GFX_rc6,
+ GFX_MHz,
+ GFX_ACTMHz,
+ SAM_mc6,
+ SAM_MHz,
+ SAM_ACTMHz,
+ GFX_MAX
+};
+
+struct gfx_sysfs_info {
+ const char *path;
+ FILE *fp;
+ unsigned int val;
+ unsigned long long val_ull;
+};
+
+static struct gfx_sysfs_info gfx_info[GFX_MAX];
int get_msr(int cpu, off_t offset, unsigned long long *msr);
@@ -652,6 +686,7 @@ static const struct platform_features icx_features = {
.bclk_freq = BCLK_100MHZ,
.supported_cstates = CC1 | CC6 | PC2 | PC6,
.cst_limit = CST_LIMIT_ICX,
+ .has_msr_core_c1_res = 1,
.has_irtl_msrs = 1,
.has_cst_prewake_bit = 1,
.trl_msrs = TRL_BASE | TRL_CORECOUNT,
@@ -948,6 +983,175 @@ size_t cpu_present_setsize, cpu_effective_setsize, cpu_allowed_setsize, cpu_affi
#define MAX_ADDED_THREAD_COUNTERS 24
#define BITMASK_SIZE 32
+/* Indexes used to map data read from perf and MSRs into global variables */
+enum rapl_rci_index {
+ RAPL_RCI_INDEX_ENERGY_PKG = 0,
+ RAPL_RCI_INDEX_ENERGY_CORES = 1,
+ RAPL_RCI_INDEX_DRAM = 2,
+ RAPL_RCI_INDEX_GFX = 3,
+ RAPL_RCI_INDEX_PKG_PERF_STATUS = 4,
+ RAPL_RCI_INDEX_DRAM_PERF_STATUS = 5,
+ RAPL_RCI_INDEX_CORE_ENERGY = 6,
+ NUM_RAPL_COUNTERS,
+};
+
+enum rapl_unit {
+ RAPL_UNIT_INVALID,
+ RAPL_UNIT_JOULES,
+ RAPL_UNIT_WATTS,
+};
+
+struct rapl_counter_info_t {
+ unsigned long long data[NUM_RAPL_COUNTERS];
+ enum rapl_source source[NUM_RAPL_COUNTERS];
+ unsigned long long flags[NUM_RAPL_COUNTERS];
+ double scale[NUM_RAPL_COUNTERS];
+ enum rapl_unit unit[NUM_RAPL_COUNTERS];
+
+ union {
+ /* Active when source == RAPL_SOURCE_MSR */
+ struct {
+ unsigned long long msr[NUM_RAPL_COUNTERS];
+ unsigned long long msr_mask[NUM_RAPL_COUNTERS];
+ int msr_shift[NUM_RAPL_COUNTERS];
+ };
+ };
+
+ int fd_perf;
+};
+
+/* struct rapl_counter_info_t for each RAPL domain */
+struct rapl_counter_info_t *rapl_counter_info_perdomain;
+
+#define RAPL_COUNTER_FLAG_USE_MSR_SUM (1u << 1)
+
+struct rapl_counter_arch_info {
+ int feature_mask; /* Mask for testing if the counter is supported on host */
+ const char *perf_subsys;
+ const char *perf_name;
+ unsigned long long msr;
+ unsigned long long msr_mask;
+ int msr_shift; /* Positive mean shift right, negative mean shift left */
+ double *platform_rapl_msr_scale; /* Scale applied to values read by MSR (platform dependent, filled at runtime) */
+ unsigned int rci_index; /* Maps data from perf counters to global variables */
+ unsigned long long bic;
+ double compat_scale; /* Some counters require constant scaling to be in the same range as other, similar ones */
+ unsigned long long flags;
+};
+
+static const struct rapl_counter_arch_info rapl_counter_arch_infos[] = {
+ {
+ .feature_mask = RAPL_PKG,
+ .perf_subsys = "power",
+ .perf_name = "energy-pkg",
+ .msr = MSR_PKG_ENERGY_STATUS,
+ .msr_mask = 0xFFFFFFFFFFFFFFFF,
+ .msr_shift = 0,
+ .platform_rapl_msr_scale = &rapl_energy_units,
+ .rci_index = RAPL_RCI_INDEX_ENERGY_PKG,
+ .bic = BIC_PkgWatt | BIC_Pkg_J,
+ .compat_scale = 1.0,
+ .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
+ },
+ {
+ .feature_mask = RAPL_AMD_F17H,
+ .perf_subsys = "power",
+ .perf_name = "energy-pkg",
+ .msr = MSR_PKG_ENERGY_STAT,
+ .msr_mask = 0xFFFFFFFFFFFFFFFF,
+ .msr_shift = 0,
+ .platform_rapl_msr_scale = &rapl_energy_units,
+ .rci_index = RAPL_RCI_INDEX_ENERGY_PKG,
+ .bic = BIC_PkgWatt | BIC_Pkg_J,
+ .compat_scale = 1.0,
+ .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
+ },
+ {
+ .feature_mask = RAPL_CORE_ENERGY_STATUS,
+ .perf_subsys = "power",
+ .perf_name = "energy-cores",
+ .msr = MSR_PP0_ENERGY_STATUS,
+ .msr_mask = 0xFFFFFFFFFFFFFFFF,
+ .msr_shift = 0,
+ .platform_rapl_msr_scale = &rapl_energy_units,
+ .rci_index = RAPL_RCI_INDEX_ENERGY_CORES,
+ .bic = BIC_CorWatt | BIC_Cor_J,
+ .compat_scale = 1.0,
+ .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
+ },
+ {
+ .feature_mask = RAPL_DRAM,
+ .perf_subsys = "power",
+ .perf_name = "energy-ram",
+ .msr = MSR_DRAM_ENERGY_STATUS,
+ .msr_mask = 0xFFFFFFFFFFFFFFFF,
+ .msr_shift = 0,
+ .platform_rapl_msr_scale = &rapl_dram_energy_units,
+ .rci_index = RAPL_RCI_INDEX_DRAM,
+ .bic = BIC_RAMWatt | BIC_RAM_J,
+ .compat_scale = 1.0,
+ .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
+ },
+ {
+ .feature_mask = RAPL_GFX,
+ .perf_subsys = "power",
+ .perf_name = "energy-gpu",
+ .msr = MSR_PP1_ENERGY_STATUS,
+ .msr_mask = 0xFFFFFFFFFFFFFFFF,
+ .msr_shift = 0,
+ .platform_rapl_msr_scale = &rapl_energy_units,
+ .rci_index = RAPL_RCI_INDEX_GFX,
+ .bic = BIC_GFXWatt | BIC_GFX_J,
+ .compat_scale = 1.0,
+ .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
+ },
+ {
+ .feature_mask = RAPL_PKG_PERF_STATUS,
+ .perf_subsys = NULL,
+ .perf_name = NULL,
+ .msr = MSR_PKG_PERF_STATUS,
+ .msr_mask = 0xFFFFFFFFFFFFFFFF,
+ .msr_shift = 0,
+ .platform_rapl_msr_scale = &rapl_time_units,
+ .rci_index = RAPL_RCI_INDEX_PKG_PERF_STATUS,
+ .bic = BIC_PKG__,
+ .compat_scale = 100.0,
+ .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
+ },
+ {
+ .feature_mask = RAPL_DRAM_PERF_STATUS,
+ .perf_subsys = NULL,
+ .perf_name = NULL,
+ .msr = MSR_DRAM_PERF_STATUS,
+ .msr_mask = 0xFFFFFFFFFFFFFFFF,
+ .msr_shift = 0,
+ .platform_rapl_msr_scale = &rapl_time_units,
+ .rci_index = RAPL_RCI_INDEX_DRAM_PERF_STATUS,
+ .bic = BIC_RAM__,
+ .compat_scale = 100.0,
+ .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
+ },
+ {
+ .feature_mask = RAPL_AMD_F17H,
+ .perf_subsys = NULL,
+ .perf_name = NULL,
+ .msr = MSR_CORE_ENERGY_STAT,
+ .msr_mask = 0xFFFFFFFF,
+ .msr_shift = 0,
+ .platform_rapl_msr_scale = &rapl_energy_units,
+ .rci_index = RAPL_RCI_INDEX_CORE_ENERGY,
+ .bic = BIC_CorWatt | BIC_Cor_J,
+ .compat_scale = 1.0,
+ .flags = 0,
+ },
+};
+
+struct rapl_counter {
+ unsigned long long raw_value;
+ enum rapl_unit unit;
+ double scale;
+};
+
struct thread_data {
struct timeval tv_begin;
struct timeval tv_end;
@@ -974,7 +1178,7 @@ struct core_data {
unsigned long long c7;
unsigned long long mc6_us; /* duplicate as per-core for now, even though per module */
unsigned int core_temp_c;
- unsigned int core_energy; /* MSR_CORE_ENERGY_STAT */
+ struct rapl_counter core_energy; /* MSR_CORE_ENERGY_STAT */
unsigned int core_id;
unsigned long long core_throt_cnt;
unsigned long long counter[MAX_ADDED_COUNTERS];
@@ -989,8 +1193,8 @@ struct pkg_data {
unsigned long long pc8;
unsigned long long pc9;
unsigned long long pc10;
- unsigned long long cpu_lpi;
- unsigned long long sys_lpi;
+ long long cpu_lpi;
+ long long sys_lpi;
unsigned long long pkg_wtd_core_c0;
unsigned long long pkg_any_core_c0;
unsigned long long pkg_any_gfxe_c0;
@@ -998,13 +1202,16 @@ struct pkg_data {
long long gfx_rc6_ms;
unsigned int gfx_mhz;
unsigned int gfx_act_mhz;
+ long long sam_mc6_ms;
+ unsigned int sam_mhz;
+ unsigned int sam_act_mhz;
unsigned int package_id;
- unsigned long long energy_pkg; /* MSR_PKG_ENERGY_STATUS */
- unsigned long long energy_dram; /* MSR_DRAM_ENERGY_STATUS */
- unsigned long long energy_cores; /* MSR_PP0_ENERGY_STATUS */
- unsigned long long energy_gfx; /* MSR_PP1_ENERGY_STATUS */
- unsigned long long rapl_pkg_perf_status; /* MSR_PKG_PERF_STATUS */
- unsigned long long rapl_dram_perf_status; /* MSR_DRAM_PERF_STATUS */
+ struct rapl_counter energy_pkg; /* MSR_PKG_ENERGY_STATUS */
+ struct rapl_counter energy_dram; /* MSR_DRAM_ENERGY_STATUS */
+ struct rapl_counter energy_cores; /* MSR_PP0_ENERGY_STATUS */
+ struct rapl_counter energy_gfx; /* MSR_PP1_ENERGY_STATUS */
+ struct rapl_counter rapl_pkg_perf_status; /* MSR_PKG_PERF_STATUS */
+ struct rapl_counter rapl_dram_perf_status; /* MSR_DRAM_PERF_STATUS */
unsigned int pkg_temp_c;
unsigned int uncore_mhz;
unsigned long long counter[MAX_ADDED_COUNTERS];
@@ -1150,6 +1357,38 @@ struct sys_counters {
struct msr_counter *pp;
} sys;
+void free_sys_counters(void)
+{
+ struct msr_counter *p = sys.tp, *pnext = NULL;
+
+ while (p) {
+ pnext = p->next;
+ free(p);
+ p = pnext;
+ }
+
+ p = sys.cp, pnext = NULL;
+ while (p) {
+ pnext = p->next;
+ free(p);
+ p = pnext;
+ }
+
+ p = sys.pp, pnext = NULL;
+ while (p) {
+ pnext = p->next;
+ free(p);
+ p = pnext;
+ }
+
+ sys.added_thread_counters = 0;
+ sys.added_core_counters = 0;
+ sys.added_package_counters = 0;
+ sys.tp = NULL;
+ sys.cp = NULL;
+ sys.pp = NULL;
+}
+
struct system_summary {
struct thread_data threads;
struct core_data cores;
@@ -1280,34 +1519,60 @@ int get_msr_fd(int cpu)
sprintf(pathname, "/dev/cpu/%d/msr", cpu);
fd = open(pathname, O_RDONLY);
if (fd < 0)
- err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname);
+ err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, "
+ "or run with --no-msr, or run as root", pathname);
fd_percpu[cpu] = fd;
return fd;
}
+static void bic_disable_msr_access(void)
+{
+ const unsigned long bic_msrs =
+ BIC_SMI |
+ BIC_CPU_c1 |
+ BIC_CPU_c3 |
+ BIC_CPU_c6 |
+ BIC_CPU_c7 |
+ BIC_Mod_c6 |
+ BIC_CoreTmp |
+ BIC_Totl_c0 |
+ BIC_Any_c0 |
+ BIC_GFX_c0 |
+ BIC_CPUGFX |
+ BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_PkgTmp;
+
+ bic_enabled &= ~bic_msrs;
+
+ free_sys_counters();
+}
+
static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags)
{
+ assert(!no_perf);
+
return syscall(__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags);
}
-static int perf_instr_count_open(int cpu_num)
+static long open_perf_counter(int cpu, unsigned int type, unsigned int config, int group_fd, __u64 read_format)
{
- struct perf_event_attr pea;
- int fd;
+ struct perf_event_attr attr;
+ const pid_t pid = -1;
+ const unsigned long flags = 0;
- memset(&pea, 0, sizeof(struct perf_event_attr));
- pea.type = PERF_TYPE_HARDWARE;
- pea.size = sizeof(struct perf_event_attr);
- pea.config = PERF_COUNT_HW_INSTRUCTIONS;
+ assert(!no_perf);
- /* counter for cpu_num, including user + kernel and all processes */
- fd = perf_event_open(&pea, -1, cpu_num, -1, 0);
- if (fd == -1) {
- warnx("capget(CAP_PERFMON) failed, try \"# setcap cap_sys_admin=ep %s\"", progname);
- BIC_NOT_PRESENT(BIC_IPC);
- }
+ memset(&attr, 0, sizeof(struct perf_event_attr));
+
+ attr.type = type;
+ attr.size = sizeof(struct perf_event_attr);
+ attr.config = config;
+ attr.disabled = 0;
+ attr.sample_type = PERF_SAMPLE_IDENTIFIER;
+ attr.read_format = read_format;
+
+ const int fd = perf_event_open(&attr, pid, cpu, group_fd, flags);
return fd;
}
@@ -1317,7 +1582,7 @@ int get_instr_count_fd(int cpu)
if (fd_instr_count_percpu[cpu])
return fd_instr_count_percpu[cpu];
- fd_instr_count_percpu[cpu] = perf_instr_count_open(cpu);
+ fd_instr_count_percpu[cpu] = open_perf_counter(cpu, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, -1, 0);
return fd_instr_count_percpu[cpu];
}
@@ -1326,6 +1591,8 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr)
{
ssize_t retval;
+ assert(!no_msr);
+
retval = pread(get_msr_fd(cpu), msr, sizeof(*msr), offset);
if (retval != sizeof *msr)
@@ -1334,6 +1601,21 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr)
return 0;
}
+int probe_msr(int cpu, off_t offset)
+{
+ ssize_t retval;
+ unsigned long long dummy;
+
+ assert(!no_msr);
+
+ retval = pread(get_msr_fd(cpu), &dummy, sizeof(dummy), offset);
+
+ if (retval != sizeof(dummy))
+ return 1;
+
+ return 0;
+}
+
#define MAX_DEFERRED 16
char *deferred_add_names[MAX_DEFERRED];
char *deferred_skip_names[MAX_DEFERRED];
@@ -1369,6 +1651,8 @@ void help(void)
" Override default 5-second measurement interval\n"
" -J, --Joules displays energy in Joules instead of Watts\n"
" -l, --list list column headers only\n"
+ " -M, --no-msr Disable all uses of the MSR driver\n"
+ " -P, --no-perf Disable all uses of the perf API\n"
" -n, --num_iterations num\n"
" number of the measurement iterations\n"
" -N, --header_iterations num\n"
@@ -1573,6 +1857,15 @@ void print_header(char *delim)
if (DO_BIC(BIC_GFXACTMHz))
outp += sprintf(outp, "%sGFXAMHz", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_SAM_mc6))
+ outp += sprintf(outp, "%sSAM%%mc6", (printed++ ? delim : ""));
+
+ if (DO_BIC(BIC_SAMMHz))
+ outp += sprintf(outp, "%sSAMMHz", (printed++ ? delim : ""));
+
+ if (DO_BIC(BIC_SAMACTMHz))
+ outp += sprintf(outp, "%sSAMAMHz", (printed++ ? delim : ""));
+
if (DO_BIC(BIC_Totl_c0))
outp += sprintf(outp, "%sTotl%%C0", (printed++ ? delim : ""));
if (DO_BIC(BIC_Any_c0))
@@ -1671,26 +1964,35 @@ int dump_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p
outp += sprintf(outp, "SMI: %d\n", t->smi_count);
for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
- outp += sprintf(outp, "tADDED [%d] msr0x%x: %08llX\n", i, mp->msr_num, t->counter[i]);
+ outp +=
+ sprintf(outp, "tADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num,
+ t->counter[i], mp->path);
}
}
- if (c) {
+ if (c && is_cpu_first_thread_in_core(t, c, p)) {
outp += sprintf(outp, "core: %d\n", c->core_id);
outp += sprintf(outp, "c3: %016llX\n", c->c3);
outp += sprintf(outp, "c6: %016llX\n", c->c6);
outp += sprintf(outp, "c7: %016llX\n", c->c7);
outp += sprintf(outp, "DTS: %dC\n", c->core_temp_c);
outp += sprintf(outp, "cpu_throt_count: %016llX\n", c->core_throt_cnt);
- outp += sprintf(outp, "Joules: %0X\n", c->core_energy);
+
+ const unsigned long long energy_value = c->core_energy.raw_value * c->core_energy.scale;
+ const double energy_scale = c->core_energy.scale;
+
+ if (c->core_energy.unit == RAPL_UNIT_JOULES)
+ outp += sprintf(outp, "Joules: %0llX (scale: %lf)\n", energy_value, energy_scale);
for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
- outp += sprintf(outp, "cADDED [%d] msr0x%x: %08llX\n", i, mp->msr_num, c->counter[i]);
+ outp +=
+ sprintf(outp, "cADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num,
+ c->counter[i], mp->path);
}
outp += sprintf(outp, "mc6_us: %016llX\n", c->mc6_us);
}
- if (p) {
+ if (p && is_cpu_first_core_in_package(t, c, p)) {
outp += sprintf(outp, "package: %d\n", p->package_id);
outp += sprintf(outp, "Weighted cores: %016llX\n", p->pkg_wtd_core_c0);
@@ -1710,16 +2012,18 @@ int dump_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p
outp += sprintf(outp, "pc10: %016llX\n", p->pc10);
outp += sprintf(outp, "cpu_lpi: %016llX\n", p->cpu_lpi);
outp += sprintf(outp, "sys_lpi: %016llX\n", p->sys_lpi);
- outp += sprintf(outp, "Joules PKG: %0llX\n", p->energy_pkg);
- outp += sprintf(outp, "Joules COR: %0llX\n", p->energy_cores);
- outp += sprintf(outp, "Joules GFX: %0llX\n", p->energy_gfx);
- outp += sprintf(outp, "Joules RAM: %0llX\n", p->energy_dram);
- outp += sprintf(outp, "Throttle PKG: %0llX\n", p->rapl_pkg_perf_status);
- outp += sprintf(outp, "Throttle RAM: %0llX\n", p->rapl_dram_perf_status);
+ outp += sprintf(outp, "Joules PKG: %0llX\n", p->energy_pkg.raw_value);
+ outp += sprintf(outp, "Joules COR: %0llX\n", p->energy_cores.raw_value);
+ outp += sprintf(outp, "Joules GFX: %0llX\n", p->energy_gfx.raw_value);
+ outp += sprintf(outp, "Joules RAM: %0llX\n", p->energy_dram.raw_value);
+ outp += sprintf(outp, "Throttle PKG: %0llX\n", p->rapl_pkg_perf_status.raw_value);
+ outp += sprintf(outp, "Throttle RAM: %0llX\n", p->rapl_dram_perf_status.raw_value);
outp += sprintf(outp, "PTM: %dC\n", p->pkg_temp_c);
for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
- outp += sprintf(outp, "pADDED [%d] msr0x%x: %08llX\n", i, mp->msr_num, p->counter[i]);
+ outp +=
+ sprintf(outp, "pADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num,
+ p->counter[i], mp->path);
}
}
@@ -1728,6 +2032,23 @@ int dump_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p
return 0;
}
+double rapl_counter_get_value(const struct rapl_counter *c, enum rapl_unit desired_unit, double interval)
+{
+ assert(desired_unit != RAPL_UNIT_INVALID);
+
+ /*
+ * For now we don't expect anything other than joules,
+ * so just simplify the logic.
+ */
+ assert(c->unit == RAPL_UNIT_JOULES);
+
+ const double scaled = c->raw_value * c->scale;
+
+ if (desired_unit == RAPL_UNIT_WATTS)
+ return scaled / interval;
+ return scaled;
+}
+
/*
* column formatting convention & formats
*/
@@ -1921,9 +2242,11 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
if (DO_BIC(BIC_CorWatt) && platform->has_per_core_rapl)
outp +=
- sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units / interval_float);
+ sprintf(outp, fmt8, (printed++ ? delim : ""),
+ rapl_counter_get_value(&c->core_energy, RAPL_UNIT_WATTS, interval_float));
if (DO_BIC(BIC_Cor_J) && platform->has_per_core_rapl)
- outp += sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units);
+ outp += sprintf(outp, fmt8, (printed++ ? delim : ""),
+ rapl_counter_get_value(&c->core_energy, RAPL_UNIT_JOULES, interval_float));
/* print per-package data only for 1st core in package */
if (!is_cpu_first_core_in_package(t, c, p))
@@ -1951,6 +2274,24 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
if (DO_BIC(BIC_GFXACTMHz))
outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_act_mhz);
+ /* SAMmc6 */
+ if (DO_BIC(BIC_SAM_mc6)) {
+ if (p->sam_mc6_ms == -1) { /* detect GFX counter reset */
+ outp += sprintf(outp, "%s**.**", (printed++ ? delim : ""));
+ } else {
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
+ p->sam_mc6_ms / 10.0 / interval_float);
+ }
+ }
+
+ /* SAMMHz */
+ if (DO_BIC(BIC_SAMMHz))
+ outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->sam_mhz);
+
+ /* SAMACTMHz */
+ if (DO_BIC(BIC_SAMACTMHz))
+ outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->sam_act_mhz);
+
/* Totl%C0, Any%C0 GFX%C0 CPUGFX% */
if (DO_BIC(BIC_Totl_c0))
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_wtd_core_c0 / tsc);
@@ -1976,43 +2317,59 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
if (DO_BIC(BIC_Pkgpc10))
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc10 / tsc);
- if (DO_BIC(BIC_CPU_LPI))
- outp +=
- sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->cpu_lpi / 1000000.0 / interval_float);
- if (DO_BIC(BIC_SYS_LPI))
- outp +=
- sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->sys_lpi / 1000000.0 / interval_float);
+ if (DO_BIC(BIC_CPU_LPI)) {
+ if (p->cpu_lpi >= 0)
+ outp +=
+ sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
+ 100.0 * p->cpu_lpi / 1000000.0 / interval_float);
+ else
+ outp += sprintf(outp, "%s(neg)", (printed++ ? delim : ""));
+ }
+ if (DO_BIC(BIC_SYS_LPI)) {
+ if (p->sys_lpi >= 0)
+ outp +=
+ sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
+ 100.0 * p->sys_lpi / 1000000.0 / interval_float);
+ else
+ outp += sprintf(outp, "%s(neg)", (printed++ ? delim : ""));
+ }
if (DO_BIC(BIC_PkgWatt))
outp +=
- sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units / interval_float);
-
+ sprintf(outp, fmt8, (printed++ ? delim : ""),
+ rapl_counter_get_value(&p->energy_pkg, RAPL_UNIT_WATTS, interval_float));
if (DO_BIC(BIC_CorWatt) && !platform->has_per_core_rapl)
outp +=
- sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units / interval_float);
+ sprintf(outp, fmt8, (printed++ ? delim : ""),
+ rapl_counter_get_value(&p->energy_cores, RAPL_UNIT_WATTS, interval_float));
if (DO_BIC(BIC_GFXWatt))
outp +=
- sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units / interval_float);
+ sprintf(outp, fmt8, (printed++ ? delim : ""),
+ rapl_counter_get_value(&p->energy_gfx, RAPL_UNIT_WATTS, interval_float));
if (DO_BIC(BIC_RAMWatt))
outp +=
sprintf(outp, fmt8, (printed++ ? delim : ""),
- p->energy_dram * rapl_dram_energy_units / interval_float);
+ rapl_counter_get_value(&p->energy_dram, RAPL_UNIT_WATTS, interval_float));
if (DO_BIC(BIC_Pkg_J))
- outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units);
+ outp += sprintf(outp, fmt8, (printed++ ? delim : ""),
+ rapl_counter_get_value(&p->energy_pkg, RAPL_UNIT_JOULES, interval_float));
if (DO_BIC(BIC_Cor_J) && !platform->has_per_core_rapl)
- outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units);
+ outp += sprintf(outp, fmt8, (printed++ ? delim : ""),
+ rapl_counter_get_value(&p->energy_cores, RAPL_UNIT_JOULES, interval_float));
if (DO_BIC(BIC_GFX_J))
- outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units);
+ outp += sprintf(outp, fmt8, (printed++ ? delim : ""),
+ rapl_counter_get_value(&p->energy_gfx, RAPL_UNIT_JOULES, interval_float));
if (DO_BIC(BIC_RAM_J))
- outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_dram * rapl_dram_energy_units);
+ outp += sprintf(outp, fmt8, (printed++ ? delim : ""),
+ rapl_counter_get_value(&p->energy_dram, RAPL_UNIT_JOULES, interval_float));
if (DO_BIC(BIC_PKG__))
outp +=
sprintf(outp, fmt8, (printed++ ? delim : ""),
- 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
+ rapl_counter_get_value(&p->rapl_pkg_perf_status, RAPL_UNIT_WATTS, interval_float));
if (DO_BIC(BIC_RAM__))
outp +=
sprintf(outp, fmt8, (printed++ ? delim : ""),
- 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float);
+ rapl_counter_get_value(&p->rapl_dram_perf_status, RAPL_UNIT_WATTS, interval_float));
/* UncMHz */
if (DO_BIC(BIC_UNCORE_MHZ))
outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->uncore_mhz);
@@ -2121,12 +2478,22 @@ int delta_package(struct pkg_data *new, struct pkg_data *old)
old->gfx_mhz = new->gfx_mhz;
old->gfx_act_mhz = new->gfx_act_mhz;
- old->energy_pkg = new->energy_pkg - old->energy_pkg;
- old->energy_cores = new->energy_cores - old->energy_cores;
- old->energy_gfx = new->energy_gfx - old->energy_gfx;
- old->energy_dram = new->energy_dram - old->energy_dram;
- old->rapl_pkg_perf_status = new->rapl_pkg_perf_status - old->rapl_pkg_perf_status;
- old->rapl_dram_perf_status = new->rapl_dram_perf_status - old->rapl_dram_perf_status;
+ /* flag an error when mc6 counter resets/wraps */
+ if (old->sam_mc6_ms > new->sam_mc6_ms)
+ old->sam_mc6_ms = -1;
+ else
+ old->sam_mc6_ms = new->sam_mc6_ms - old->sam_mc6_ms;
+
+ old->sam_mhz = new->sam_mhz;
+ old->sam_act_mhz = new->sam_act_mhz;
+
+ old->energy_pkg.raw_value = new->energy_pkg.raw_value - old->energy_pkg.raw_value;
+ old->energy_cores.raw_value = new->energy_cores.raw_value - old->energy_cores.raw_value;
+ old->energy_gfx.raw_value = new->energy_gfx.raw_value - old->energy_gfx.raw_value;
+ old->energy_dram.raw_value = new->energy_dram.raw_value - old->energy_dram.raw_value;
+ old->rapl_pkg_perf_status.raw_value = new->rapl_pkg_perf_status.raw_value - old->rapl_pkg_perf_status.raw_value;
+ old->rapl_dram_perf_status.raw_value =
+ new->rapl_dram_perf_status.raw_value - old->rapl_dram_perf_status.raw_value;
for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
if (mp->format == FORMAT_RAW)
@@ -2150,7 +2517,7 @@ void delta_core(struct core_data *new, struct core_data *old)
old->core_throt_cnt = new->core_throt_cnt;
old->mc6_us = new->mc6_us - old->mc6_us;
- DELTA_WRAP32(new->core_energy, old->core_energy);
+ DELTA_WRAP32(new->core_energy.raw_value, old->core_energy.raw_value);
for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
if (mp->format == FORMAT_RAW)
@@ -2277,6 +2644,13 @@ int delta_cpu(struct thread_data *t, struct core_data *c,
return retval;
}
+void rapl_counter_clear(struct rapl_counter *c)
+{
+ c->raw_value = 0;
+ c->scale = 0.0;
+ c->unit = RAPL_UNIT_INVALID;
+}
+
void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
{
int i;
@@ -2304,7 +2678,7 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
c->c7 = 0;
c->mc6_us = 0;
c->core_temp_c = 0;
- c->core_energy = 0;
+ rapl_counter_clear(&c->core_energy);
c->core_throt_cnt = 0;
p->pkg_wtd_core_c0 = 0;
@@ -2325,18 +2699,21 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
p->cpu_lpi = 0;
p->sys_lpi = 0;
- p->energy_pkg = 0;
- p->energy_dram = 0;
- p->energy_cores = 0;
- p->energy_gfx = 0;
- p->rapl_pkg_perf_status = 0;
- p->rapl_dram_perf_status = 0;
+ rapl_counter_clear(&p->energy_pkg);
+ rapl_counter_clear(&p->energy_dram);
+ rapl_counter_clear(&p->energy_cores);
+ rapl_counter_clear(&p->energy_gfx);
+ rapl_counter_clear(&p->rapl_pkg_perf_status);
+ rapl_counter_clear(&p->rapl_dram_perf_status);
p->pkg_temp_c = 0;
p->gfx_rc6_ms = 0;
p->uncore_mhz = 0;
p->gfx_mhz = 0;
p->gfx_act_mhz = 0;
+ p->sam_mc6_ms = 0;
+ p->sam_mhz = 0;
+ p->sam_act_mhz = 0;
for (i = 0, mp = sys.tp; mp; i++, mp = mp->next)
t->counter[i] = 0;
@@ -2347,6 +2724,20 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
p->counter[i] = 0;
}
+void rapl_counter_accumulate(struct rapl_counter *dst, const struct rapl_counter *src)
+{
+ /* Copy unit and scale from src if dst is not initialized */
+ if (dst->unit == RAPL_UNIT_INVALID) {
+ dst->unit = src->unit;
+ dst->scale = src->scale;
+ }
+
+ assert(dst->unit == src->unit);
+ assert(dst->scale == src->scale);
+
+ dst->raw_value += src->raw_value;
+}
+
int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
{
int i;
@@ -2393,7 +2784,7 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c);
average.cores.core_throt_cnt = MAX(average.cores.core_throt_cnt, c->core_throt_cnt);
- average.cores.core_energy += c->core_energy;
+ rapl_counter_accumulate(&average.cores.core_energy, &c->core_energy);
for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
if (mp->format == FORMAT_RAW)
@@ -2428,25 +2819,29 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
average.packages.cpu_lpi = p->cpu_lpi;
average.packages.sys_lpi = p->sys_lpi;
- average.packages.energy_pkg += p->energy_pkg;
- average.packages.energy_dram += p->energy_dram;
- average.packages.energy_cores += p->energy_cores;
- average.packages.energy_gfx += p->energy_gfx;
+ rapl_counter_accumulate(&average.packages.energy_pkg, &p->energy_pkg);
+ rapl_counter_accumulate(&average.packages.energy_dram, &p->energy_dram);
+ rapl_counter_accumulate(&average.packages.energy_cores, &p->energy_cores);
+ rapl_counter_accumulate(&average.packages.energy_gfx, &p->energy_gfx);
average.packages.gfx_rc6_ms = p->gfx_rc6_ms;
average.packages.uncore_mhz = p->uncore_mhz;
average.packages.gfx_mhz = p->gfx_mhz;
average.packages.gfx_act_mhz = p->gfx_act_mhz;
+ average.packages.sam_mc6_ms = p->sam_mc6_ms;
+ average.packages.sam_mhz = p->sam_mhz;
+ average.packages.sam_act_mhz = p->sam_act_mhz;
average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c);
- average.packages.rapl_pkg_perf_status += p->rapl_pkg_perf_status;
- average.packages.rapl_dram_perf_status += p->rapl_dram_perf_status;
+ rapl_counter_accumulate(&average.packages.rapl_pkg_perf_status, &p->rapl_pkg_perf_status);
+ rapl_counter_accumulate(&average.packages.rapl_dram_perf_status, &p->rapl_dram_perf_status);
for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
- if (mp->format == FORMAT_RAW)
- continue;
- average.packages.counter[i] += p->counter[i];
+ if ((mp->format == FORMAT_RAW) && (topo.num_packages == 0))
+ average.packages.counter[i] = p->counter[i];
+ else
+ average.packages.counter[i] += p->counter[i];
}
return 0;
}
@@ -2578,6 +2973,7 @@ unsigned long long snapshot_sysfs_counter(char *path)
int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp)
{
if (mp->msr_num != 0) {
+ assert(!no_msr);
if (get_msr(cpu, mp->msr_num, counterp))
return -1;
} else {
@@ -2599,7 +2995,7 @@ unsigned long long get_uncore_mhz(int package, int die)
{
char path[128];
- sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/current_freq_khz", package,
+ sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_%02d_die_%02d/current_freq_khz", package,
die);
return (snapshot_sysfs_counter(path) / 1000);
@@ -2627,6 +3023,9 @@ int get_epb(int cpu)
return epb;
msr_fallback:
+ if (no_msr)
+ return -1;
+
get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr);
return msr & 0xf;
@@ -2700,6 +3099,351 @@ int get_core_throt_cnt(int cpu, unsigned long long *cnt)
return 0;
}
+struct amperf_group_fd {
+ int aperf; /* Also the group descriptor */
+ int mperf;
+};
+
+static int read_perf_counter_info(const char *const path, const char *const parse_format, void *value_ptr)
+{
+ int fdmt;
+ int bytes_read;
+ char buf[64];
+ int ret = -1;
+
+ fdmt = open(path, O_RDONLY, 0);
+ if (fdmt == -1) {
+ if (debug)
+ fprintf(stderr, "Failed to parse perf counter info %s\n", path);
+ ret = -1;
+ goto cleanup_and_exit;
+ }
+
+ bytes_read = read(fdmt, buf, sizeof(buf) - 1);
+ if (bytes_read <= 0 || bytes_read >= (int)sizeof(buf)) {
+ if (debug)
+ fprintf(stderr, "Failed to parse perf counter info %s\n", path);
+ ret = -1;
+ goto cleanup_and_exit;
+ }
+
+ buf[bytes_read] = '\0';
+
+ if (sscanf(buf, parse_format, value_ptr) != 1) {
+ if (debug)
+ fprintf(stderr, "Failed to parse perf counter info %s\n", path);
+ ret = -1;
+ goto cleanup_and_exit;
+ }
+
+ ret = 0;
+
+cleanup_and_exit:
+ close(fdmt);
+ return ret;
+}
+
+static unsigned int read_perf_counter_info_n(const char *const path, const char *const parse_format)
+{
+ unsigned int v;
+ int status;
+
+ status = read_perf_counter_info(path, parse_format, &v);
+ if (status)
+ v = -1;
+
+ return v;
+}
+
+static unsigned int read_msr_type(void)
+{
+ const char *const path = "/sys/bus/event_source/devices/msr/type";
+ const char *const format = "%u";
+
+ return read_perf_counter_info_n(path, format);
+}
+
+static unsigned int read_aperf_config(void)
+{
+ const char *const path = "/sys/bus/event_source/devices/msr/events/aperf";
+ const char *const format = "event=%x";
+
+ return read_perf_counter_info_n(path, format);
+}
+
+static unsigned int read_mperf_config(void)
+{
+ const char *const path = "/sys/bus/event_source/devices/msr/events/mperf";
+ const char *const format = "event=%x";
+
+ return read_perf_counter_info_n(path, format);
+}
+
+static unsigned int read_perf_type(const char *subsys)
+{
+ const char *const path_format = "/sys/bus/event_source/devices/%s/type";
+ const char *const format = "%u";
+ char path[128];
+
+ snprintf(path, sizeof(path), path_format, subsys);
+
+ return read_perf_counter_info_n(path, format);
+}
+
+static unsigned int read_rapl_config(const char *subsys, const char *event_name)
+{
+ const char *const path_format = "/sys/bus/event_source/devices/%s/events/%s";
+ const char *const format = "event=%x";
+ char path[128];
+
+ snprintf(path, sizeof(path), path_format, subsys, event_name);
+
+ return read_perf_counter_info_n(path, format);
+}
+
+static unsigned int read_perf_rapl_unit(const char *subsys, const char *event_name)
+{
+ const char *const path_format = "/sys/bus/event_source/devices/%s/events/%s.unit";
+ const char *const format = "%s";
+ char path[128];
+ char unit_buffer[16];
+
+ snprintf(path, sizeof(path), path_format, subsys, event_name);
+
+ read_perf_counter_info(path, format, &unit_buffer);
+ if (strcmp("Joules", unit_buffer) == 0)
+ return RAPL_UNIT_JOULES;
+
+ return RAPL_UNIT_INVALID;
+}
+
+static double read_perf_rapl_scale(const char *subsys, const char *event_name)
+{
+ const char *const path_format = "/sys/bus/event_source/devices/%s/events/%s.scale";
+ const char *const format = "%lf";
+ char path[128];
+ double scale;
+
+ snprintf(path, sizeof(path), path_format, subsys, event_name);
+
+ if (read_perf_counter_info(path, format, &scale))
+ return 0.0;
+
+ return scale;
+}
+
+static struct amperf_group_fd open_amperf_fd(int cpu)
+{
+ const unsigned int msr_type = read_msr_type();
+ const unsigned int aperf_config = read_aperf_config();
+ const unsigned int mperf_config = read_mperf_config();
+ struct amperf_group_fd fds = {.aperf = -1, .mperf = -1 };
+
+ fds.aperf = open_perf_counter(cpu, msr_type, aperf_config, -1, PERF_FORMAT_GROUP);
+ fds.mperf = open_perf_counter(cpu, msr_type, mperf_config, fds.aperf, PERF_FORMAT_GROUP);
+
+ return fds;
+}
+
+static int get_amperf_fd(int cpu)
+{
+ assert(fd_amperf_percpu);
+
+ if (fd_amperf_percpu[cpu].aperf)
+ return fd_amperf_percpu[cpu].aperf;
+
+ fd_amperf_percpu[cpu] = open_amperf_fd(cpu);
+
+ return fd_amperf_percpu[cpu].aperf;
+}
+
+/* Read APERF, MPERF and TSC using the perf API. */
+static int read_aperf_mperf_tsc_perf(struct thread_data *t, int cpu)
+{
+ union {
+ struct {
+ unsigned long nr_entries;
+ unsigned long aperf;
+ unsigned long mperf;
+ };
+
+ unsigned long as_array[3];
+ } cnt;
+
+ const int fd_amperf = get_amperf_fd(cpu);
+
+ /*
+ * Read the TSC with rdtsc, because we want the absolute value and not
+ * the offset from the start of the counter.
+ */
+ t->tsc = rdtsc();
+
+ const int n = read(fd_amperf, &cnt.as_array[0], sizeof(cnt.as_array));
+
+ if (n != sizeof(cnt.as_array))
+ return -2;
+
+ t->aperf = cnt.aperf * aperf_mperf_multiplier;
+ t->mperf = cnt.mperf * aperf_mperf_multiplier;
+
+ return 0;
+}
+
+/* Read APERF, MPERF and TSC using the MSR driver and rdtsc instruction. */
+static int read_aperf_mperf_tsc_msr(struct thread_data *t, int cpu)
+{
+ unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time;
+ int aperf_mperf_retry_count = 0;
+
+ /*
+ * The TSC, APERF and MPERF must be read together for
+ * APERF/MPERF and MPERF/TSC to give accurate results.
+ *
+ * Unfortunately, APERF and MPERF are read by
+ * individual system call, so delays may occur
+ * between them. If the time to read them
+ * varies by a large amount, we re-read them.
+ */
+
+ /*
+ * This initial dummy APERF read has been seen to
+ * reduce jitter in the subsequent reads.
+ */
+
+ if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
+ return -3;
+
+retry:
+ t->tsc = rdtsc(); /* re-read close to APERF */
+
+ tsc_before = t->tsc;
+
+ if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
+ return -3;
+
+ tsc_between = rdtsc();
+
+ if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf))
+ return -4;
+
+ tsc_after = rdtsc();
+
+ aperf_time = tsc_between - tsc_before;
+ mperf_time = tsc_after - tsc_between;
+
+ /*
+ * If the system call latency to read APERF and MPERF
+ * differ by more than 2x, then try again.
+ */
+ if ((aperf_time > (2 * mperf_time)) || (mperf_time > (2 * aperf_time))) {
+ aperf_mperf_retry_count++;
+ if (aperf_mperf_retry_count < 5)
+ goto retry;
+ else
+ warnx("cpu%d jitter %lld %lld", cpu, aperf_time, mperf_time);
+ }
+ aperf_mperf_retry_count = 0;
+
+ t->aperf = t->aperf * aperf_mperf_multiplier;
+ t->mperf = t->mperf * aperf_mperf_multiplier;
+
+ return 0;
+}
+
+size_t rapl_counter_info_count_perf(const struct rapl_counter_info_t *rci)
+{
+ size_t ret = 0;
+
+ for (int i = 0; i < NUM_RAPL_COUNTERS; ++i)
+ if (rci->source[i] == RAPL_SOURCE_PERF)
+ ++ret;
+
+ return ret;
+}
+
+void write_rapl_counter(struct rapl_counter *rc, struct rapl_counter_info_t *rci, unsigned int idx)
+{
+ rc->raw_value = rci->data[idx];
+ rc->unit = rci->unit[idx];
+ rc->scale = rci->scale[idx];
+}
+
+int get_rapl_counters(int cpu, int domain, struct core_data *c, struct pkg_data *p)
+{
+ unsigned long long perf_data[NUM_RAPL_COUNTERS + 1];
+ struct rapl_counter_info_t *rci = &rapl_counter_info_perdomain[domain];
+
+ if (debug)
+ fprintf(stderr, "%s: cpu%d domain%d\n", __func__, cpu, domain);
+
+ assert(rapl_counter_info_perdomain);
+
+ /*
+ * If we have any perf counters to read, read them all now, in bulk
+ */
+ if (rci->fd_perf != -1) {
+ size_t num_perf_counters = rapl_counter_info_count_perf(rci);
+ const ssize_t expected_read_size = (num_perf_counters + 1) * sizeof(unsigned long long);
+ const ssize_t actual_read_size = read(rci->fd_perf, &perf_data[0], sizeof(perf_data));
+
+ if (actual_read_size != expected_read_size)
+ err(-1, "%s: failed to read perf_data (%zu %zu)", __func__, expected_read_size,
+ actual_read_size);
+ }
+
+ for (unsigned int i = 0, pi = 1; i < NUM_RAPL_COUNTERS; ++i) {
+ switch (rci->source[i]) {
+ case RAPL_SOURCE_NONE:
+ break;
+
+ case RAPL_SOURCE_PERF:
+ assert(pi < ARRAY_SIZE(perf_data));
+ assert(rci->fd_perf != -1);
+
+ if (debug)
+ fprintf(stderr, "Reading rapl counter via perf at %u (%llu %e %lf)\n",
+ i, perf_data[pi], rci->scale[i], perf_data[pi] * rci->scale[i]);
+
+ rci->data[i] = perf_data[pi];
+
+ ++pi;
+ break;
+
+ case RAPL_SOURCE_MSR:
+ if (debug)
+ fprintf(stderr, "Reading rapl counter via msr at %u\n", i);
+
+ assert(!no_msr);
+ if (rci->flags[i] & RAPL_COUNTER_FLAG_USE_MSR_SUM) {
+ if (get_msr_sum(cpu, rci->msr[i], &rci->data[i]))
+ return -13 - i;
+ } else {
+ if (get_msr(cpu, rci->msr[i], &rci->data[i]))
+ return -13 - i;
+ }
+
+ rci->data[i] &= rci->msr_mask[i];
+ if (rci->msr_shift[i] >= 0)
+ rci->data[i] >>= abs(rci->msr_shift[i]);
+ else
+ rci->data[i] <<= abs(rci->msr_shift[i]);
+
+ break;
+ }
+ }
+
+ _Static_assert(NUM_RAPL_COUNTERS == 7);
+ write_rapl_counter(&p->energy_pkg, rci, RAPL_RCI_INDEX_ENERGY_PKG);
+ write_rapl_counter(&p->energy_cores, rci, RAPL_RCI_INDEX_ENERGY_CORES);
+ write_rapl_counter(&p->energy_dram, rci, RAPL_RCI_INDEX_DRAM);
+ write_rapl_counter(&p->energy_gfx, rci, RAPL_RCI_INDEX_GFX);
+ write_rapl_counter(&p->rapl_pkg_perf_status, rci, RAPL_RCI_INDEX_PKG_PERF_STATUS);
+ write_rapl_counter(&p->rapl_dram_perf_status, rci, RAPL_RCI_INDEX_DRAM_PERF_STATUS);
+ write_rapl_counter(&c->core_energy, rci, RAPL_RCI_INDEX_CORE_ENERGY);
+
+ return 0;
+}
+
/*
* get_counters(...)
* migrate to cpu
@@ -2709,12 +3453,12 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
{
int cpu = t->cpu_id;
unsigned long long msr;
- int aperf_mperf_retry_count = 0;
struct msr_counter *mp;
int i;
+ int status;
if (cpu_migrate(cpu)) {
- fprintf(outf, "get_counters: Could not migrate to CPU %d\n", cpu);
+ fprintf(outf, "%s: Could not migrate to CPU %d\n", __func__, cpu);
return -1;
}
@@ -2722,63 +3466,26 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
if (first_counter_read)
get_apic_id(t);
-retry:
+
t->tsc = rdtsc(); /* we are running on local CPU of interest */
if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || DO_BIC(BIC_IPC)
|| soft_c1_residency_display(BIC_Avg_MHz)) {
- unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time;
-
- /*
- * The TSC, APERF and MPERF must be read together for
- * APERF/MPERF and MPERF/TSC to give accurate results.
- *
- * Unfortunately, APERF and MPERF are read by
- * individual system call, so delays may occur
- * between them. If the time to read them
- * varies by a large amount, we re-read them.
- */
-
- /*
- * This initial dummy APERF read has been seen to
- * reduce jitter in the subsequent reads.
- */
-
- if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
- return -3;
-
- t->tsc = rdtsc(); /* re-read close to APERF */
-
- tsc_before = t->tsc;
+ int status = -1;
- if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
- return -3;
+ assert(!no_perf || !no_msr);
- tsc_between = rdtsc();
-
- if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf))
- return -4;
-
- tsc_after = rdtsc();
-
- aperf_time = tsc_between - tsc_before;
- mperf_time = tsc_after - tsc_between;
-
- /*
- * If the system call latency to read APERF and MPERF
- * differ by more than 2x, then try again.
- */
- if ((aperf_time > (2 * mperf_time)) || (mperf_time > (2 * aperf_time))) {
- aperf_mperf_retry_count++;
- if (aperf_mperf_retry_count < 5)
- goto retry;
- else
- warnx("cpu%d jitter %lld %lld", cpu, aperf_time, mperf_time);
+ switch (amperf_source) {
+ case AMPERF_SOURCE_PERF:
+ status = read_aperf_mperf_tsc_perf(t, cpu);
+ break;
+ case AMPERF_SOURCE_MSR:
+ status = read_aperf_mperf_tsc_msr(t, cpu);
+ break;
}
- aperf_mperf_retry_count = 0;
- t->aperf = t->aperf * aperf_mperf_multiplier;
- t->mperf = t->mperf * aperf_mperf_multiplier;
+ if (status != 0)
+ return status;
}
if (DO_BIC(BIC_IPC))
@@ -2806,6 +3513,12 @@ retry:
if (!is_cpu_first_thread_in_core(t, c, p))
goto done;
+ if (platform->has_per_core_rapl) {
+ status = get_rapl_counters(cpu, c->core_id, c, p);
+ if (status != 0)
+ return status;
+ }
+
if (DO_BIC(BIC_CPU_c3) || soft_c1_residency_display(BIC_CPU_c3)) {
if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3))
return -6;
@@ -2846,12 +3559,6 @@ retry:
if (DO_BIC(BIC_CORE_THROT_CNT))
get_core_throt_cnt(cpu, &c->core_throt_cnt);
- if (platform->rapl_msrs & RAPL_AMD_F17H) {
- if (get_msr(cpu, MSR_CORE_ENERGY_STAT, &msr))
- return -14;
- c->core_energy = msr & 0xFFFFFFFF;
- }
-
for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
if (get_mp(cpu, mp, &c->counter[i]))
return -10;
@@ -2911,59 +3618,39 @@ retry:
if (DO_BIC(BIC_SYS_LPI))
p->sys_lpi = cpuidle_cur_sys_lpi_us;
- if (platform->rapl_msrs & RAPL_PKG) {
- if (get_msr_sum(cpu, MSR_PKG_ENERGY_STATUS, &msr))
- return -13;
- p->energy_pkg = msr;
- }
- if (platform->rapl_msrs & RAPL_CORE_ENERGY_STATUS) {
- if (get_msr_sum(cpu, MSR_PP0_ENERGY_STATUS, &msr))
- return -14;
- p->energy_cores = msr;
- }
- if (platform->rapl_msrs & RAPL_DRAM) {
- if (get_msr_sum(cpu, MSR_DRAM_ENERGY_STATUS, &msr))
- return -15;
- p->energy_dram = msr;
- }
- if (platform->rapl_msrs & RAPL_GFX) {
- if (get_msr_sum(cpu, MSR_PP1_ENERGY_STATUS, &msr))
- return -16;
- p->energy_gfx = msr;
- }
- if (platform->rapl_msrs & RAPL_PKG_PERF_STATUS) {
- if (get_msr_sum(cpu, MSR_PKG_PERF_STATUS, &msr))
- return -16;
- p->rapl_pkg_perf_status = msr;
- }
- if (platform->rapl_msrs & RAPL_DRAM_PERF_STATUS) {
- if (get_msr_sum(cpu, MSR_DRAM_PERF_STATUS, &msr))
- return -16;
- p->rapl_dram_perf_status = msr;
- }
- if (platform->rapl_msrs & RAPL_AMD_F17H) {
- if (get_msr_sum(cpu, MSR_PKG_ENERGY_STAT, &msr))
- return -13;
- p->energy_pkg = msr;
+ if (!platform->has_per_core_rapl) {
+ status = get_rapl_counters(cpu, p->package_id, c, p);
+ if (status != 0)
+ return status;
}
+
if (DO_BIC(BIC_PkgTmp)) {
if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
return -17;
p->pkg_temp_c = tj_max - ((msr >> 16) & 0x7F);
}
- if (DO_BIC(BIC_GFX_rc6))
- p->gfx_rc6_ms = gfx_cur_rc6_ms;
-
/* n.b. assume die0 uncore frequency applies to whole package */
if (DO_BIC(BIC_UNCORE_MHZ))
p->uncore_mhz = get_uncore_mhz(p->package_id, 0);
+ if (DO_BIC(BIC_GFX_rc6))
+ p->gfx_rc6_ms = gfx_info[GFX_rc6].val_ull;
+
if (DO_BIC(BIC_GFXMHz))
- p->gfx_mhz = gfx_cur_mhz;
+ p->gfx_mhz = gfx_info[GFX_MHz].val;
if (DO_BIC(BIC_GFXACTMHz))
- p->gfx_act_mhz = gfx_act_mhz;
+ p->gfx_act_mhz = gfx_info[GFX_ACTMHz].val;
+
+ if (DO_BIC(BIC_SAM_mc6))
+ p->sam_mc6_ms = gfx_info[SAM_mc6].val_ull;
+
+ if (DO_BIC(BIC_SAMMHz))
+ p->sam_mhz = gfx_info[SAM_MHz].val;
+
+ if (DO_BIC(BIC_SAMACTMHz))
+ p->sam_act_mhz = gfx_info[SAM_ACTMHz].val;
for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
if (get_mp(cpu, mp, &p->counter[i]))
@@ -3053,7 +3740,7 @@ void probe_cst_limit(void)
unsigned long long msr;
int *pkg_cstate_limits;
- if (!platform->has_nhm_msrs)
+ if (!platform->has_nhm_msrs || no_msr)
return;
switch (platform->cst_limit) {
@@ -3097,7 +3784,7 @@ static void dump_platform_info(void)
unsigned long long msr;
unsigned int ratio;
- if (!platform->has_nhm_msrs)
+ if (!platform->has_nhm_msrs || no_msr)
return;
get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
@@ -3115,7 +3802,7 @@ static void dump_power_ctl(void)
{
unsigned long long msr;
- if (!platform->has_nhm_msrs)
+ if (!platform->has_nhm_msrs || no_msr)
return;
get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr);
@@ -3321,7 +4008,7 @@ static void dump_cst_cfg(void)
{
unsigned long long msr;
- if (!platform->has_nhm_msrs)
+ if (!platform->has_nhm_msrs || no_msr)
return;
get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
@@ -3393,7 +4080,7 @@ void print_irtl(void)
{
unsigned long long msr;
- if (!platform->has_irtl_msrs)
+ if (!platform->has_irtl_msrs || no_msr)
return;
if (platform->supported_cstates & PC3) {
@@ -3443,12 +4130,64 @@ void free_fd_percpu(void)
{
int i;
+ if (!fd_percpu)
+ return;
+
for (i = 0; i < topo.max_cpu_num + 1; ++i) {
if (fd_percpu[i] != 0)
close(fd_percpu[i]);
}
free(fd_percpu);
+ fd_percpu = NULL;
+}
+
+void free_fd_amperf_percpu(void)
+{
+ int i;
+
+ if (!fd_amperf_percpu)
+ return;
+
+ for (i = 0; i < topo.max_cpu_num + 1; ++i) {
+ if (fd_amperf_percpu[i].mperf != 0)
+ close(fd_amperf_percpu[i].mperf);
+
+ if (fd_amperf_percpu[i].aperf != 0)
+ close(fd_amperf_percpu[i].aperf);
+ }
+
+ free(fd_amperf_percpu);
+ fd_amperf_percpu = NULL;
+}
+
+void free_fd_instr_count_percpu(void)
+{
+ if (!fd_instr_count_percpu)
+ return;
+
+ for (int i = 0; i < topo.max_cpu_num + 1; ++i) {
+ if (fd_instr_count_percpu[i] != 0)
+ close(fd_instr_count_percpu[i]);
+ }
+
+ free(fd_instr_count_percpu);
+ fd_instr_count_percpu = NULL;
+}
+
+void free_fd_rapl_percpu(void)
+{
+ if (!rapl_counter_info_perdomain)
+ return;
+
+ const int num_domains = platform->has_per_core_rapl ? topo.num_cores : topo.num_packages;
+
+ for (int domain_id = 0; domain_id < num_domains; ++domain_id) {
+ if (rapl_counter_info_perdomain[domain_id].fd_perf != -1)
+ close(rapl_counter_info_perdomain[domain_id].fd_perf);
+ }
+
+ free(rapl_counter_info_perdomain);
}
void free_all_buffers(void)
@@ -3492,6 +4231,9 @@ void free_all_buffers(void)
outp = NULL;
free_fd_percpu();
+ free_fd_instr_count_percpu();
+ free_fd_amperf_percpu();
+ free_fd_rapl_percpu();
free(irq_column_2_cpu);
free(irqs_per_cpu);
@@ -3825,11 +4567,17 @@ static void update_effective_set(bool startup)
err(1, "%s: cpu str malformat %s\n", PATH_EFFECTIVE_CPUS, cpu_effective_str);
}
+void linux_perf_init(void);
+void rapl_perf_init(void);
+
void re_initialize(void)
{
free_all_buffers();
setup_all_buffers(false);
- fprintf(outf, "turbostat: re-initialized with num_cpus %d, allowed_cpus %d\n", topo.num_cpus, topo.allowed_cpus);
+ linux_perf_init();
+ rapl_perf_init();
+ fprintf(outf, "turbostat: re-initialized with num_cpus %d, allowed_cpus %d\n", topo.num_cpus,
+ topo.allowed_cpus);
}
void set_max_cpu_num(void)
@@ -3940,85 +4688,43 @@ int snapshot_proc_interrupts(void)
}
/*
- * snapshot_gfx_rc6_ms()
+ * snapshot_graphics()
*
- * record snapshot of
- * /sys/class/drm/card0/power/rc6_residency_ms
+ * record snapshot of specified graphics sysfs knob
*
* return 1 if config change requires a restart, else return 0
*/
-int snapshot_gfx_rc6_ms(void)
+int snapshot_graphics(int idx)
{
FILE *fp;
int retval;
- fp = fopen_or_die("/sys/class/drm/card0/power/rc6_residency_ms", "r");
-
- retval = fscanf(fp, "%lld", &gfx_cur_rc6_ms);
- if (retval != 1)
- err(1, "GFX rc6");
-
- fclose(fp);
-
- return 0;
-}
-
-/*
- * snapshot_gfx_mhz()
- *
- * fall back to /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz
- * when /sys/class/drm/card0/gt_cur_freq_mhz is not available.
- *
- * return 1 if config change requires a restart, else return 0
- */
-int snapshot_gfx_mhz(void)
-{
- static FILE *fp;
- int retval;
-
- if (fp == NULL) {
- fp = fopen("/sys/class/drm/card0/gt_cur_freq_mhz", "r");
- if (!fp)
- fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", "r");
- } else {
- rewind(fp);
- fflush(fp);
- }
-
- retval = fscanf(fp, "%d", &gfx_cur_mhz);
- if (retval != 1)
- err(1, "GFX MHz");
-
- return 0;
-}
-
-/*
- * snapshot_gfx_cur_mhz()
- *
- * fall back to /sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz
- * when /sys/class/drm/card0/gt_act_freq_mhz is not available.
- *
- * return 1 if config change requires a restart, else return 0
- */
-int snapshot_gfx_act_mhz(void)
-{
- static FILE *fp;
- int retval;
-
- if (fp == NULL) {
- fp = fopen("/sys/class/drm/card0/gt_act_freq_mhz", "r");
- if (!fp)
- fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", "r");
- } else {
- rewind(fp);
- fflush(fp);
+ switch (idx) {
+ case GFX_rc6:
+ case SAM_mc6:
+ fp = fopen_or_die(gfx_info[idx].path, "r");
+ retval = fscanf(fp, "%lld", &gfx_info[idx].val_ull);
+ if (retval != 1)
+ err(1, "rc6");
+ fclose(fp);
+ return 0;
+ case GFX_MHz:
+ case GFX_ACTMHz:
+ case SAM_MHz:
+ case SAM_ACTMHz:
+ if (gfx_info[idx].fp == NULL) {
+ gfx_info[idx].fp = fopen_or_die(gfx_info[idx].path, "r");
+ } else {
+ rewind(gfx_info[idx].fp);
+ fflush(gfx_info[idx].fp);
+ }
+ retval = fscanf(gfx_info[idx].fp, "%d", &gfx_info[idx].val);
+ if (retval != 1)
+ err(1, "MHz");
+ return 0;
+ default:
+ return -EINVAL;
}
-
- retval = fscanf(fp, "%d", &gfx_act_mhz);
- if (retval != 1)
- err(1, "GFX ACT MHz");
-
- return 0;
}
/*
@@ -4083,13 +4789,22 @@ int snapshot_proc_sysfs_files(void)
return 1;
if (DO_BIC(BIC_GFX_rc6))
- snapshot_gfx_rc6_ms();
+ snapshot_graphics(GFX_rc6);
if (DO_BIC(BIC_GFXMHz))
- snapshot_gfx_mhz();
+ snapshot_graphics(GFX_MHz);
if (DO_BIC(BIC_GFXACTMHz))
- snapshot_gfx_act_mhz();
+ snapshot_graphics(GFX_ACTMHz);
+
+ if (DO_BIC(BIC_SAM_mc6))
+ snapshot_graphics(SAM_mc6);
+
+ if (DO_BIC(BIC_SAMMHz))
+ snapshot_graphics(SAM_MHz);
+
+ if (DO_BIC(BIC_SAMACTMHz))
+ snapshot_graphics(SAM_ACTMHz);
if (DO_BIC(BIC_CPU_LPI))
snapshot_cpu_lpi_us();
@@ -4173,6 +4888,8 @@ int get_msr_sum(int cpu, off_t offset, unsigned long long *msr)
int ret, idx;
unsigned long long msr_cur, msr_last;
+ assert(!no_msr);
+
if (!per_cpu_msr_sum)
return 1;
@@ -4201,6 +4918,8 @@ static int update_msr_sum(struct thread_data *t, struct core_data *c, struct pkg
UNUSED(c);
UNUSED(p);
+ assert(!no_msr);
+
for (i = IDX_PKG_ENERGY; i < IDX_COUNT; i++) {
unsigned long long msr_cur, msr_last;
off_t offset;
@@ -4280,7 +4999,8 @@ release_msr:
/*
* set_my_sched_priority(pri)
- * return previous
+ * return previous priority on success
+ * return value < -20 on failure
*/
int set_my_sched_priority(int priority)
{
@@ -4290,16 +5010,16 @@ int set_my_sched_priority(int priority)
errno = 0;
original_priority = getpriority(PRIO_PROCESS, 0);
if (errno && (original_priority == -1))
- err(errno, "getpriority");
+ return -21;
retval = setpriority(PRIO_PROCESS, 0, priority);
if (retval)
- errx(retval, "capget(CAP_SYS_NICE) failed,try \"# setcap cap_sys_nice=ep %s\"", progname);
+ return -21;
errno = 0;
retval = getpriority(PRIO_PROCESS, 0);
if (retval != priority)
- err(retval, "getpriority(%d) != setpriority(%d)", retval, priority);
+ return -21;
return original_priority;
}
@@ -4314,6 +5034,9 @@ void turbostat_loop()
/*
* elevate own priority for interval mode
+ *
+ * ignore on error - we probably don't have permission to set it, but
+ * it's not a big deal
*/
set_my_sched_priority(-20);
@@ -4399,10 +5122,13 @@ void check_dev_msr()
struct stat sb;
char pathname[32];
+ if (no_msr)
+ return;
+
sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
if (stat(pathname, &sb))
if (system("/sbin/modprobe msr > /dev/null 2>&1"))
- err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" ");
+ no_msr = 1;
}
/*
@@ -4414,47 +5140,51 @@ int check_for_cap_sys_rawio(void)
{
cap_t caps;
cap_flag_value_t cap_flag_value;
+ int ret = 0;
caps = cap_get_proc();
if (caps == NULL)
- err(-6, "cap_get_proc\n");
+ return 1;
- if (cap_get_flag(caps, CAP_SYS_RAWIO, CAP_EFFECTIVE, &cap_flag_value))
- err(-6, "cap_get\n");
+ if (cap_get_flag(caps, CAP_SYS_RAWIO, CAP_EFFECTIVE, &cap_flag_value)) {
+ ret = 1;
+ goto free_and_exit;
+ }
if (cap_flag_value != CAP_SET) {
- warnx("capget(CAP_SYS_RAWIO) failed," " try \"# setcap cap_sys_rawio=ep %s\"", progname);
- return 1;
+ ret = 1;
+ goto free_and_exit;
}
+free_and_exit:
if (cap_free(caps) == -1)
err(-6, "cap_free\n");
- return 0;
+ return ret;
}
-void check_permissions(void)
+void check_msr_permission(void)
{
- int do_exit = 0;
+ int failed = 0;
char pathname[32];
+ if (no_msr)
+ return;
+
/* check for CAP_SYS_RAWIO */
- do_exit += check_for_cap_sys_rawio();
+ failed += check_for_cap_sys_rawio();
/* test file permissions */
sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
if (euidaccess(pathname, R_OK)) {
- do_exit++;
- warn("/dev/cpu/0/msr open failed, try chown or chmod +r /dev/cpu/*/msr");
+ failed++;
}
- /* if all else fails, thell them to be root */
- if (do_exit)
- if (getuid() != 0)
- warnx("... or simply run as root");
-
- if (do_exit)
- exit(-6);
+ if (failed) {
+ warnx("Failed to access %s. Some of the counters may not be available\n"
+ "\tRun as root to enable them or use %s to disable the access explicitly", pathname, "--no-msr");
+ no_msr = 1;
+ }
}
void probe_bclk(void)
@@ -4462,7 +5192,7 @@ void probe_bclk(void)
unsigned long long msr;
unsigned int base_ratio;
- if (!platform->has_nhm_msrs)
+ if (!platform->has_nhm_msrs || no_msr)
return;
if (platform->bclk_freq == BCLK_100MHZ)
@@ -4502,7 +5232,7 @@ static void dump_turbo_ratio_info(void)
if (!has_turbo)
return;
- if (!platform->has_nhm_msrs)
+ if (!platform->has_nhm_msrs || no_msr)
return;
if (platform->trl_msrs & TRL_LIMIT2)
@@ -4567,20 +5297,15 @@ static void dump_sysfs_file(char *path)
static void probe_intel_uncore_frequency(void)
{
int i, j;
- char path[128];
+ char path[256];
if (!genuine_intel)
return;
- if (access("/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00", R_OK))
- return;
-
- /* Cluster level sysfs not supported yet. */
- if (!access("/sys/devices/system/cpu/intel_uncore_frequency/uncore00", R_OK))
- return;
+ if (access("/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00/current_freq_khz", R_OK))
+ goto probe_cluster;
- if (!access("/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00/current_freq_khz", R_OK))
- BIC_PRESENT(BIC_UNCORE_MHZ);
+ BIC_PRESENT(BIC_UNCORE_MHZ);
if (quiet)
return;
@@ -4588,40 +5313,178 @@ static void probe_intel_uncore_frequency(void)
for (i = 0; i < topo.num_packages; ++i) {
for (j = 0; j < topo.num_die; ++j) {
int k, l;
+ char path_base[128];
+
+ sprintf(path_base, "/sys/devices/system/cpu/intel_uncore_frequency/package_%02d_die_%02d", i,
+ j);
- sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/min_freq_khz",
- i, j);
+ sprintf(path, "%s/min_freq_khz", path_base);
k = read_sysfs_int(path);
- sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/max_freq_khz",
- i, j);
+ sprintf(path, "%s/max_freq_khz", path_base);
l = read_sysfs_int(path);
- fprintf(outf, "Uncore Frequency pkg%d die%d: %d - %d MHz ", i, j, k / 1000, l / 1000);
+ fprintf(outf, "Uncore Frequency package%d die%d: %d - %d MHz ", i, j, k / 1000, l / 1000);
- sprintf(path,
- "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/initial_min_freq_khz",
- i, j);
+ sprintf(path, "%s/initial_min_freq_khz", path_base);
k = read_sysfs_int(path);
- sprintf(path,
- "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/initial_max_freq_khz",
- i, j);
+ sprintf(path, "%s/initial_max_freq_khz", path_base);
l = read_sysfs_int(path);
- fprintf(outf, "(%d - %d MHz)\n", k / 1000, l / 1000);
+ fprintf(outf, "(%d - %d MHz)", k / 1000, l / 1000);
+
+ sprintf(path, "%s/current_freq_khz", path_base);
+ k = read_sysfs_int(path);
+ fprintf(outf, " %d MHz\n", k / 1000);
}
}
+ return;
+
+probe_cluster:
+ if (access("/sys/devices/system/cpu/intel_uncore_frequency/uncore00/current_freq_khz", R_OK))
+ return;
+
+ if (quiet)
+ return;
+
+ for (i = 0;; ++i) {
+ int k, l;
+ char path_base[128];
+ int package_id, domain_id, cluster_id;
+
+ sprintf(path_base, "/sys/devices/system/cpu/intel_uncore_frequency/uncore%02d", i);
+
+ if (access(path_base, R_OK))
+ break;
+
+ sprintf(path, "%s/package_id", path_base);
+ package_id = read_sysfs_int(path);
+
+ sprintf(path, "%s/domain_id", path_base);
+ domain_id = read_sysfs_int(path);
+
+ sprintf(path, "%s/fabric_cluster_id", path_base);
+ cluster_id = read_sysfs_int(path);
+
+ sprintf(path, "%s/min_freq_khz", path_base);
+ k = read_sysfs_int(path);
+ sprintf(path, "%s/max_freq_khz", path_base);
+ l = read_sysfs_int(path);
+ fprintf(outf, "Uncore Frequency package%d domain%d cluster%d: %d - %d MHz ", package_id, domain_id,
+ cluster_id, k / 1000, l / 1000);
+
+ sprintf(path, "%s/initial_min_freq_khz", path_base);
+ k = read_sysfs_int(path);
+ sprintf(path, "%s/initial_max_freq_khz", path_base);
+ l = read_sysfs_int(path);
+ fprintf(outf, "(%d - %d MHz)", k / 1000, l / 1000);
+
+ sprintf(path, "%s/current_freq_khz", path_base);
+ k = read_sysfs_int(path);
+ fprintf(outf, " %d MHz\n", k / 1000);
+ }
}
static void probe_graphics(void)
{
+ /* Xe graphics sysfs knobs */
+ if (!access("/sys/class/drm/card0/device/tile0/gt0/gtidle/idle_residency_ms", R_OK)) {
+ FILE *fp;
+ char buf[8];
+ bool gt0_is_gt;
+ int idx;
+
+ fp = fopen("/sys/class/drm/card0/device/tile0/gt0/gtidle/name", "r");
+ if (!fp)
+ goto next;
+
+ if (!fread(buf, sizeof(char), 7, fp)) {
+ fclose(fp);
+ goto next;
+ }
+ fclose(fp);
+
+ if (!strncmp(buf, "gt0-rc", strlen("gt0-rc")))
+ gt0_is_gt = true;
+ else if (!strncmp(buf, "gt0-mc", strlen("gt0-mc")))
+ gt0_is_gt = false;
+ else
+ goto next;
+
+ idx = gt0_is_gt ? GFX_rc6 : SAM_mc6;
+ gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt0/gtidle/idle_residency_ms";
+
+ idx = gt0_is_gt ? GFX_MHz : SAM_MHz;
+ if (!access("/sys/class/drm/card0/device/tile0/gt0/freq0/cur_freq", R_OK))
+ gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt0/freq0/cur_freq";
+
+ idx = gt0_is_gt ? GFX_ACTMHz : SAM_ACTMHz;
+ if (!access("/sys/class/drm/card0/device/tile0/gt0/freq0/act_freq", R_OK))
+ gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt0/freq0/act_freq";
+
+ idx = gt0_is_gt ? SAM_mc6 : GFX_rc6;
+ if (!access("/sys/class/drm/card0/device/tile0/gt1/gtidle/idle_residency_ms", R_OK))
+ gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt1/gtidle/idle_residency_ms";
+
+ idx = gt0_is_gt ? SAM_MHz : GFX_MHz;
+ if (!access("/sys/class/drm/card0/device/tile0/gt1/freq0/cur_freq", R_OK))
+ gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt1/freq0/cur_freq";
+
+ idx = gt0_is_gt ? SAM_ACTMHz : GFX_ACTMHz;
+ if (!access("/sys/class/drm/card0/device/tile0/gt1/freq0/act_freq", R_OK))
+ gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt1/freq0/act_freq";
+
+ goto end;
+ }
+
+next:
+ /* New i915 graphics sysfs knobs */
+ if (!access("/sys/class/drm/card0/gt/gt0/rc6_residency_ms", R_OK)) {
+ gfx_info[GFX_rc6].path = "/sys/class/drm/card0/gt/gt0/rc6_residency_ms";
+
+ if (!access("/sys/class/drm/card0/gt/gt0/rps_cur_freq_mhz", R_OK))
+ gfx_info[GFX_MHz].path = "/sys/class/drm/card0/gt/gt0/rps_cur_freq_mhz";
+
+ if (!access("/sys/class/drm/card0/gt/gt0/rps_act_freq_mhz", R_OK))
+ gfx_info[GFX_ACTMHz].path = "/sys/class/drm/card0/gt/gt0/rps_act_freq_mhz";
+
+ if (!access("/sys/class/drm/card0/gt/gt1/rc6_residency_ms", R_OK))
+ gfx_info[SAM_mc6].path = "/sys/class/drm/card0/gt/gt1/rc6_residency_ms";
+
+ if (!access("/sys/class/drm/card0/gt/gt1/rps_cur_freq_mhz", R_OK))
+ gfx_info[SAM_MHz].path = "/sys/class/drm/card0/gt/gt1/rps_cur_freq_mhz";
+
+ if (!access("/sys/class/drm/card0/gt/gt1/rps_act_freq_mhz", R_OK))
+ gfx_info[SAM_ACTMHz].path = "/sys/class/drm/card0/gt/gt1/rps_act_freq_mhz";
+
+ goto end;
+ }
+
+ /* Fall back to traditional i915 graphics sysfs knobs */
if (!access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK))
- BIC_PRESENT(BIC_GFX_rc6);
+ gfx_info[GFX_rc6].path = "/sys/class/drm/card0/power/rc6_residency_ms";
+
+ if (!access("/sys/class/drm/card0/gt_cur_freq_mhz", R_OK))
+ gfx_info[GFX_MHz].path = "/sys/class/drm/card0/gt_cur_freq_mhz";
+ else if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK))
+ gfx_info[GFX_MHz].path = "/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz";
- if (!access("/sys/class/drm/card0/gt_cur_freq_mhz", R_OK) ||
- !access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK))
- BIC_PRESENT(BIC_GFXMHz);
- if (!access("/sys/class/drm/card0/gt_act_freq_mhz", R_OK) ||
- !access("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", R_OK))
+ if (!access("/sys/class/drm/card0/gt_act_freq_mhz", R_OK))
+ gfx_info[GFX_ACTMHz].path = "/sys/class/drm/card0/gt_act_freq_mhz";
+ else if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", R_OK))
+ gfx_info[GFX_ACTMHz].path = "/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz";
+
+end:
+ if (gfx_info[GFX_rc6].path)
+ BIC_PRESENT(BIC_GFX_rc6);
+ if (gfx_info[GFX_MHz].path)
+ BIC_PRESENT(BIC_GFXMHz);
+ if (gfx_info[GFX_ACTMHz].path)
BIC_PRESENT(BIC_GFXACTMHz);
+ if (gfx_info[SAM_mc6].path)
+ BIC_PRESENT(BIC_SAM_mc6);
+ if (gfx_info[SAM_MHz].path)
+ BIC_PRESENT(BIC_SAMMHz);
+ if (gfx_info[SAM_ACTMHz].path)
+ BIC_PRESENT(BIC_SAMACTMHz);
}
static void dump_sysfs_cstate_config(void)
@@ -4783,6 +5646,9 @@ int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p)
UNUSED(c);
UNUSED(p);
+ if (no_msr)
+ return 0;
+
if (!has_hwp)
return 0;
@@ -4869,6 +5735,9 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data
UNUSED(c);
UNUSED(p);
+ if (no_msr)
+ return 0;
+
cpu = t->cpu_id;
/* per-package */
@@ -4983,31 +5852,18 @@ void rapl_probe_intel(void)
unsigned long long msr;
unsigned int time_unit;
double tdp;
+ const unsigned long long bic_watt_bits = BIC_PkgWatt | BIC_CorWatt | BIC_RAMWatt | BIC_GFXWatt;
+ const unsigned long long bic_joules_bits = BIC_Pkg_J | BIC_Cor_J | BIC_RAM_J | BIC_GFX_J;
- if (rapl_joules) {
- if (platform->rapl_msrs & RAPL_PKG_ENERGY_STATUS)
- BIC_PRESENT(BIC_Pkg_J);
- if (platform->rapl_msrs & RAPL_CORE_ENERGY_STATUS)
- BIC_PRESENT(BIC_Cor_J);
- if (platform->rapl_msrs & RAPL_DRAM_ENERGY_STATUS)
- BIC_PRESENT(BIC_RAM_J);
- if (platform->rapl_msrs & RAPL_GFX_ENERGY_STATUS)
- BIC_PRESENT(BIC_GFX_J);
- } else {
- if (platform->rapl_msrs & RAPL_PKG_ENERGY_STATUS)
- BIC_PRESENT(BIC_PkgWatt);
- if (platform->rapl_msrs & RAPL_CORE_ENERGY_STATUS)
- BIC_PRESENT(BIC_CorWatt);
- if (platform->rapl_msrs & RAPL_DRAM_ENERGY_STATUS)
- BIC_PRESENT(BIC_RAMWatt);
- if (platform->rapl_msrs & RAPL_GFX_ENERGY_STATUS)
- BIC_PRESENT(BIC_GFXWatt);
- }
+ if (rapl_joules)
+ bic_enabled &= ~bic_watt_bits;
+ else
+ bic_enabled &= ~bic_joules_bits;
- if (platform->rapl_msrs & RAPL_PKG_PERF_STATUS)
- BIC_PRESENT(BIC_PKG__);
- if (platform->rapl_msrs & RAPL_DRAM_PERF_STATUS)
- BIC_PRESENT(BIC_RAM__);
+ if (!(platform->rapl_msrs & RAPL_PKG_PERF_STATUS))
+ bic_enabled &= ~BIC_PKG__;
+ if (!(platform->rapl_msrs & RAPL_DRAM_PERF_STATUS))
+ bic_enabled &= ~BIC_RAM__;
/* units on package 0, verify later other packages match */
if (get_msr(base_cpu, MSR_RAPL_POWER_UNIT, &msr))
@@ -5041,14 +5897,13 @@ void rapl_probe_amd(void)
{
unsigned long long msr;
double tdp;
+ const unsigned long long bic_watt_bits = BIC_PkgWatt | BIC_CorWatt;
+ const unsigned long long bic_joules_bits = BIC_Pkg_J | BIC_Cor_J;
- if (rapl_joules) {
- BIC_PRESENT(BIC_Pkg_J);
- BIC_PRESENT(BIC_Cor_J);
- } else {
- BIC_PRESENT(BIC_PkgWatt);
- BIC_PRESENT(BIC_CorWatt);
- }
+ if (rapl_joules)
+ bic_enabled &= ~bic_watt_bits;
+ else
+ bic_enabled &= ~bic_joules_bits;
if (get_msr(base_cpu, MSR_RAPL_PWR_UNIT, &msr))
return;
@@ -5202,7 +6057,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
*/
void probe_rapl(void)
{
- if (!platform->rapl_msrs)
+ if (!platform->rapl_msrs || no_msr)
return;
if (genuine_intel)
@@ -5258,7 +6113,7 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk
}
/* Temperature Target MSR is Nehalem and newer only */
- if (!platform->has_nhm_msrs)
+ if (!platform->has_nhm_msrs || no_msr)
goto guess;
if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr))
@@ -5305,6 +6160,9 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p
UNUSED(c);
UNUSED(p);
+ if (no_msr)
+ return 0;
+
if (!(do_dts || do_ptm))
return 0;
@@ -5402,6 +6260,9 @@ void decode_feature_control_msr(void)
{
unsigned long long msr;
+ if (no_msr)
+ return;
+
if (!get_msr(base_cpu, MSR_IA32_FEAT_CTL, &msr))
fprintf(outf, "cpu%d: MSR_IA32_FEATURE_CONTROL: 0x%08llx (%sLocked %s)\n",
base_cpu, msr, msr & FEAT_CTL_LOCKED ? "" : "UN-", msr & (1 << 18) ? "SGX" : "");
@@ -5411,6 +6272,9 @@ void decode_misc_enable_msr(void)
{
unsigned long long msr;
+ if (no_msr)
+ return;
+
if (!genuine_intel)
return;
@@ -5428,6 +6292,9 @@ void decode_misc_feature_control(void)
{
unsigned long long msr;
+ if (no_msr)
+ return;
+
if (!platform->has_msr_misc_feature_control)
return;
@@ -5449,6 +6316,9 @@ void decode_misc_pwr_mgmt_msr(void)
{
unsigned long long msr;
+ if (no_msr)
+ return;
+
if (!platform->has_msr_misc_pwr_mgmt)
return;
@@ -5468,6 +6338,9 @@ void decode_c6_demotion_policy_msr(void)
{
unsigned long long msr;
+ if (no_msr)
+ return;
+
if (!platform->has_msr_c6_demotion_policy_config)
return;
@@ -5489,7 +6362,8 @@ void print_dev_latency(void)
fd = open(path, O_RDONLY);
if (fd < 0) {
- warnx("capget(CAP_SYS_ADMIN) failed, try \"# setcap cap_sys_admin=ep %s\"", progname);
+ if (debug)
+ warnx("Read %s failed", path);
return;
}
@@ -5504,23 +6378,260 @@ void print_dev_latency(void)
close(fd);
}
+static int has_instr_count_access(void)
+{
+ int fd;
+ int has_access;
+
+ if (no_perf)
+ return 0;
+
+ fd = open_perf_counter(base_cpu, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, -1, 0);
+ has_access = fd != -1;
+
+ if (fd != -1)
+ close(fd);
+
+ if (!has_access)
+ warnx("Failed to access %s. Some of the counters may not be available\n"
+ "\tRun as root to enable them or use %s to disable the access explicitly",
+ "instructions retired perf counter", "--no-perf");
+
+ return has_access;
+}
+
+bool is_aperf_access_required(void)
+{
+ return BIC_IS_ENABLED(BIC_Avg_MHz)
+ || BIC_IS_ENABLED(BIC_Busy)
+ || BIC_IS_ENABLED(BIC_Bzy_MHz)
+ || BIC_IS_ENABLED(BIC_IPC);
+}
+
+int add_rapl_perf_counter_(int cpu, struct rapl_counter_info_t *rci, const struct rapl_counter_arch_info *cai,
+ double *scale_, enum rapl_unit *unit_)
+{
+ if (no_perf)
+ return -1;
+
+ const double scale = read_perf_rapl_scale(cai->perf_subsys, cai->perf_name);
+
+ if (scale == 0.0)
+ return -1;
+
+ const enum rapl_unit unit = read_perf_rapl_unit(cai->perf_subsys, cai->perf_name);
+
+ if (unit == RAPL_UNIT_INVALID)
+ return -1;
+
+ const unsigned int rapl_type = read_perf_type(cai->perf_subsys);
+ const unsigned int rapl_energy_pkg_config = read_rapl_config(cai->perf_subsys, cai->perf_name);
+
+ const int fd_counter =
+ open_perf_counter(cpu, rapl_type, rapl_energy_pkg_config, rci->fd_perf, PERF_FORMAT_GROUP);
+ if (fd_counter == -1)
+ return -1;
+
+ /* If it's the first counter opened, make it a group descriptor */
+ if (rci->fd_perf == -1)
+ rci->fd_perf = fd_counter;
+
+ *scale_ = scale;
+ *unit_ = unit;
+ return fd_counter;
+}
+
+int add_rapl_perf_counter(int cpu, struct rapl_counter_info_t *rci, const struct rapl_counter_arch_info *cai,
+ double *scale, enum rapl_unit *unit)
+{
+ int ret = add_rapl_perf_counter_(cpu, rci, cai, scale, unit);
+
+ if (debug)
+ fprintf(stderr, "%s: %d (cpu: %d)\n", __func__, ret, cpu);
+
+ return ret;
+}
+
/*
* Linux-perf manages the HW instructions-retired counter
* by enabling when requested, and hiding rollover
*/
void linux_perf_init(void)
{
- if (!BIC_IS_ENABLED(BIC_IPC))
- return;
-
if (access("/proc/sys/kernel/perf_event_paranoid", F_OK))
return;
- fd_instr_count_percpu = calloc(topo.max_cpu_num + 1, sizeof(int));
- if (fd_instr_count_percpu == NULL)
- err(-1, "calloc fd_instr_count_percpu");
+ if (BIC_IS_ENABLED(BIC_IPC) && has_aperf) {
+ fd_instr_count_percpu = calloc(topo.max_cpu_num + 1, sizeof(int));
+ if (fd_instr_count_percpu == NULL)
+ err(-1, "calloc fd_instr_count_percpu");
+ }
+
+ const bool aperf_required = is_aperf_access_required();
+
+ if (aperf_required && has_aperf && amperf_source == AMPERF_SOURCE_PERF) {
+ fd_amperf_percpu = calloc(topo.max_cpu_num + 1, sizeof(*fd_amperf_percpu));
+ if (fd_amperf_percpu == NULL)
+ err(-1, "calloc fd_amperf_percpu");
+ }
+}
+
+void rapl_perf_init(void)
+{
+ const int num_domains = platform->has_per_core_rapl ? topo.num_cores : topo.num_packages;
+ bool *domain_visited = calloc(num_domains, sizeof(bool));
+
+ rapl_counter_info_perdomain = calloc(num_domains, sizeof(*rapl_counter_info_perdomain));
+ if (rapl_counter_info_perdomain == NULL)
+ err(-1, "calloc rapl_counter_info_percpu");
+
+ /*
+ * Initialize rapl_counter_info_percpu
+ */
+ for (int domain_id = 0; domain_id < num_domains; ++domain_id) {
+ struct rapl_counter_info_t *rci = &rapl_counter_info_perdomain[domain_id];
+
+ rci->fd_perf = -1;
+ for (size_t i = 0; i < NUM_RAPL_COUNTERS; ++i) {
+ rci->data[i] = 0;
+ rci->source[i] = RAPL_SOURCE_NONE;
+ }
+ }
- BIC_PRESENT(BIC_IPC);
+ /*
+ * Open/probe the counters
+ * If can't get it via perf, fallback to MSR
+ */
+ for (size_t i = 0; i < ARRAY_SIZE(rapl_counter_arch_infos); ++i) {
+
+ const struct rapl_counter_arch_info *const cai = &rapl_counter_arch_infos[i];
+ bool has_counter = 0;
+ double scale;
+ enum rapl_unit unit;
+ int next_domain;
+
+ memset(domain_visited, 0, num_domains * sizeof(*domain_visited));
+
+ for (int cpu = 0; cpu < topo.max_cpu_num + 1; ++cpu) {
+
+ if (cpu_is_not_allowed(cpu))
+ continue;
+
+ /* Skip already seen and handled RAPL domains */
+ next_domain =
+ platform->has_per_core_rapl ? cpus[cpu].physical_core_id : cpus[cpu].physical_package_id;
+
+ if (domain_visited[next_domain])
+ continue;
+
+ domain_visited[next_domain] = 1;
+
+ struct rapl_counter_info_t *rci = &rapl_counter_info_perdomain[next_domain];
+
+ /* Check if the counter is enabled and accessible */
+ if (BIC_IS_ENABLED(cai->bic) && (platform->rapl_msrs & cai->feature_mask)) {
+
+ /* Use perf API for this counter */
+ if (!no_perf && cai->perf_name
+ && add_rapl_perf_counter(cpu, rci, cai, &scale, &unit) != -1) {
+ rci->source[cai->rci_index] = RAPL_SOURCE_PERF;
+ rci->scale[cai->rci_index] = scale * cai->compat_scale;
+ rci->unit[cai->rci_index] = unit;
+ rci->flags[cai->rci_index] = cai->flags;
+
+ /* Use MSR for this counter */
+ } else if (!no_msr && cai->msr && probe_msr(cpu, cai->msr) == 0) {
+ rci->source[cai->rci_index] = RAPL_SOURCE_MSR;
+ rci->msr[cai->rci_index] = cai->msr;
+ rci->msr_mask[cai->rci_index] = cai->msr_mask;
+ rci->msr_shift[cai->rci_index] = cai->msr_shift;
+ rci->unit[cai->rci_index] = RAPL_UNIT_JOULES;
+ rci->scale[cai->rci_index] = *cai->platform_rapl_msr_scale * cai->compat_scale;
+ rci->flags[cai->rci_index] = cai->flags;
+ }
+ }
+
+ if (rci->source[cai->rci_index] != RAPL_SOURCE_NONE)
+ has_counter = 1;
+ }
+
+ /* If any CPU has access to the counter, make it present */
+ if (has_counter)
+ BIC_PRESENT(cai->bic);
+ }
+
+ free(domain_visited);
+}
+
+static int has_amperf_access_via_msr(void)
+{
+ if (no_msr)
+ return 0;
+
+ if (probe_msr(base_cpu, MSR_IA32_APERF))
+ return 0;
+
+ if (probe_msr(base_cpu, MSR_IA32_MPERF))
+ return 0;
+
+ return 1;
+}
+
+static int has_amperf_access_via_perf(void)
+{
+ struct amperf_group_fd fds;
+
+ /*
+ * Cache the last result, so we don't warn the user multiple times
+ *
+ * Negative means cached, no access
+ * Zero means not cached
+ * Positive means cached, has access
+ */
+ static int has_access_cached;
+
+ if (no_perf)
+ return 0;
+
+ if (has_access_cached != 0)
+ return has_access_cached > 0;
+
+ fds = open_amperf_fd(base_cpu);
+ has_access_cached = (fds.aperf != -1) && (fds.mperf != -1);
+
+ if (fds.aperf == -1)
+ warnx("Failed to access %s. Some of the counters may not be available\n"
+ "\tRun as root to enable them or use %s to disable the access explicitly",
+ "APERF perf counter", "--no-perf");
+ else
+ close(fds.aperf);
+
+ if (fds.mperf == -1)
+ warnx("Failed to access %s. Some of the counters may not be available\n"
+ "\tRun as root to enable them or use %s to disable the access explicitly",
+ "MPERF perf counter", "--no-perf");
+ else
+ close(fds.mperf);
+
+ if (has_access_cached == 0)
+ has_access_cached = -1;
+
+ return has_access_cached > 0;
+}
+
+/* Check if we can access APERF and MPERF */
+static int has_amperf_access(void)
+{
+ if (!is_aperf_access_required())
+ return 0;
+
+ if (!no_msr && has_amperf_access_via_msr())
+ return 1;
+
+ if (!no_perf && has_amperf_access_via_perf())
+ return 1;
+
+ return 0;
}
void probe_cstates(void)
@@ -5563,7 +6674,7 @@ void probe_cstates(void)
if (platform->has_msr_module_c6_res_ms)
BIC_PRESENT(BIC_Mod_c6);
- if (platform->has_ext_cst_msrs) {
+ if (platform->has_ext_cst_msrs && !no_msr) {
BIC_PRESENT(BIC_Totl_c0);
BIC_PRESENT(BIC_Any_c0);
BIC_PRESENT(BIC_GFX_c0);
@@ -5623,6 +6734,7 @@ void process_cpuid()
unsigned int eax, ebx, ecx, edx;
unsigned int fms, family, model, stepping, ecx_flags, edx_flags;
unsigned long long ucode_patch = 0;
+ bool ucode_patch_valid = false;
eax = ebx = ecx = edx = 0;
@@ -5650,8 +6762,12 @@ void process_cpuid()
ecx_flags = ecx;
edx_flags = edx;
- if (get_msr(sched_getcpu(), MSR_IA32_UCODE_REV, &ucode_patch))
- warnx("get_msr(UCODE)");
+ if (!no_msr) {
+ if (get_msr(sched_getcpu(), MSR_IA32_UCODE_REV, &ucode_patch))
+ warnx("get_msr(UCODE)");
+ else
+ ucode_patch_valid = true;
+ }
/*
* check max extended function levels of CPUID.
@@ -5662,9 +6778,12 @@ void process_cpuid()
__cpuid(0x80000000, max_extended_level, ebx, ecx, edx);
if (!quiet) {
- fprintf(outf, "CPUID(1): family:model:stepping 0x%x:%x:%x (%d:%d:%d) microcode 0x%x\n",
- family, model, stepping, family, model, stepping,
- (unsigned int)((ucode_patch >> 32) & 0xFFFFFFFF));
+ fprintf(outf, "CPUID(1): family:model:stepping 0x%x:%x:%x (%d:%d:%d)",
+ family, model, stepping, family, model, stepping);
+ if (ucode_patch_valid)
+ fprintf(outf, " microcode 0x%x", (unsigned int)((ucode_patch >> 32) & 0xFFFFFFFF));
+ fputc('\n', outf);
+
fprintf(outf, "CPUID(0x80000000): max_extended_levels: 0x%x\n", max_extended_level);
fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s %s\n",
ecx_flags & (1 << 0) ? "SSE3" : "-",
@@ -5700,10 +6819,11 @@ void process_cpuid()
__cpuid(0x6, eax, ebx, ecx, edx);
has_aperf = ecx & (1 << 0);
- if (has_aperf) {
+ if (has_aperf && has_amperf_access()) {
BIC_PRESENT(BIC_Avg_MHz);
BIC_PRESENT(BIC_Busy);
BIC_PRESENT(BIC_Bzy_MHz);
+ BIC_PRESENT(BIC_IPC);
}
do_dts = eax & (1 << 0);
if (do_dts)
@@ -5786,6 +6906,15 @@ void process_cpuid()
base_mhz = max_mhz = bus_mhz = edx = 0;
__cpuid(0x16, base_mhz, max_mhz, bus_mhz, edx);
+
+ bclk = bus_mhz;
+
+ base_hz = base_mhz * 1000000;
+ has_base_hz = 1;
+
+ if (platform->enable_tsc_tweak)
+ tsc_tweak = base_hz / tsc_hz;
+
if (!quiet)
fprintf(outf, "CPUID(0x16): base_mhz: %d max_mhz: %d bus_mhz: %d\n",
base_mhz, max_mhz, bus_mhz);
@@ -5814,7 +6943,7 @@ void probe_pm_features(void)
probe_thermal();
- if (platform->has_nhm_msrs)
+ if (platform->has_nhm_msrs && !no_msr)
BIC_PRESENT(BIC_SMI);
if (!quiet)
@@ -6142,6 +7271,7 @@ void topology_update(void)
topo.allowed_packages = 0;
for_all_cpus(update_topo, ODD_COUNTERS);
}
+
void setup_all_buffers(bool startup)
{
topology_probe(startup);
@@ -6169,21 +7299,129 @@ void set_base_cpu(void)
err(-ENODEV, "No valid cpus found");
}
+static void set_amperf_source(void)
+{
+ amperf_source = AMPERF_SOURCE_PERF;
+
+ const bool aperf_required = is_aperf_access_required();
+
+ if (no_perf || !aperf_required || !has_amperf_access_via_perf())
+ amperf_source = AMPERF_SOURCE_MSR;
+
+ if (quiet || !debug)
+ return;
+
+ fprintf(outf, "aperf/mperf source preference: %s\n", amperf_source == AMPERF_SOURCE_MSR ? "msr" : "perf");
+}
+
+bool has_added_counters(void)
+{
+ /*
+ * It only makes sense to call this after the command line is parsed,
+ * otherwise sys structure is not populated.
+ */
+
+ return sys.added_core_counters | sys.added_thread_counters | sys.added_package_counters;
+}
+
+bool is_msr_access_required(void)
+{
+ if (no_msr)
+ return false;
+
+ if (has_added_counters())
+ return true;
+
+ return BIC_IS_ENABLED(BIC_SMI)
+ || BIC_IS_ENABLED(BIC_CPU_c1)
+ || BIC_IS_ENABLED(BIC_CPU_c3)
+ || BIC_IS_ENABLED(BIC_CPU_c6)
+ || BIC_IS_ENABLED(BIC_CPU_c7)
+ || BIC_IS_ENABLED(BIC_Mod_c6)
+ || BIC_IS_ENABLED(BIC_CoreTmp)
+ || BIC_IS_ENABLED(BIC_Totl_c0)
+ || BIC_IS_ENABLED(BIC_Any_c0)
+ || BIC_IS_ENABLED(BIC_GFX_c0)
+ || BIC_IS_ENABLED(BIC_CPUGFX)
+ || BIC_IS_ENABLED(BIC_Pkgpc3)
+ || BIC_IS_ENABLED(BIC_Pkgpc6)
+ || BIC_IS_ENABLED(BIC_Pkgpc2)
+ || BIC_IS_ENABLED(BIC_Pkgpc7)
+ || BIC_IS_ENABLED(BIC_Pkgpc8)
+ || BIC_IS_ENABLED(BIC_Pkgpc9)
+ || BIC_IS_ENABLED(BIC_Pkgpc10)
+ /* TODO: Multiplex access with perf */
+ || BIC_IS_ENABLED(BIC_CorWatt)
+ || BIC_IS_ENABLED(BIC_Cor_J)
+ || BIC_IS_ENABLED(BIC_PkgWatt)
+ || BIC_IS_ENABLED(BIC_CorWatt)
+ || BIC_IS_ENABLED(BIC_GFXWatt)
+ || BIC_IS_ENABLED(BIC_RAMWatt)
+ || BIC_IS_ENABLED(BIC_Pkg_J)
+ || BIC_IS_ENABLED(BIC_Cor_J)
+ || BIC_IS_ENABLED(BIC_GFX_J)
+ || BIC_IS_ENABLED(BIC_RAM_J)
+ || BIC_IS_ENABLED(BIC_PKG__)
+ || BIC_IS_ENABLED(BIC_RAM__)
+ || BIC_IS_ENABLED(BIC_PkgTmp)
+ || (is_aperf_access_required() && !has_amperf_access_via_perf());
+}
+
+void check_msr_access(void)
+{
+ if (!is_msr_access_required())
+ no_msr = 1;
+
+ check_dev_msr();
+ check_msr_permission();
+
+ if (no_msr)
+ bic_disable_msr_access();
+}
+
+void check_perf_access(void)
+{
+ const bool intrcount_required = BIC_IS_ENABLED(BIC_IPC);
+
+ if (no_perf || !intrcount_required || !has_instr_count_access())
+ bic_enabled &= ~BIC_IPC;
+
+ const bool aperf_required = is_aperf_access_required();
+
+ if (!aperf_required || !has_amperf_access()) {
+ bic_enabled &= ~BIC_Avg_MHz;
+ bic_enabled &= ~BIC_Busy;
+ bic_enabled &= ~BIC_Bzy_MHz;
+ bic_enabled &= ~BIC_IPC;
+ }
+}
+
void turbostat_init()
{
setup_all_buffers(true);
set_base_cpu();
- check_dev_msr();
- check_permissions();
+ check_msr_access();
+ check_perf_access();
process_cpuid();
probe_pm_features();
+ set_amperf_source();
linux_perf_init();
+ rapl_perf_init();
for_all_cpus(get_cpu_type, ODD_COUNTERS);
for_all_cpus(get_cpu_type, EVEN_COUNTERS);
if (DO_BIC(BIC_IPC))
(void)get_instr_count_fd(base_cpu);
+
+ /*
+ * If TSC tweak is needed, but couldn't get it,
+ * disable more BICs, since it can't be reported accurately.
+ */
+ if (platform->enable_tsc_tweak && !has_base_hz) {
+ bic_enabled &= ~BIC_Busy;
+ bic_enabled &= ~BIC_Bzy_MHz;
+ }
}
int fork_it(char **argv)
@@ -6259,7 +7497,7 @@ int get_and_dump_counters(void)
void print_version()
{
- fprintf(outf, "turbostat version 2023.11.07 - Len Brown <lenb@kernel.org>\n");
+ fprintf(outf, "turbostat version 2024.04.08 - Len Brown <lenb@kernel.org>\n");
}
#define COMMAND_LINE_SIZE 2048
@@ -6291,6 +7529,9 @@ int add_counter(unsigned int msr_num, char *path, char *name,
{
struct msr_counter *msrp;
+ if (no_msr && msr_num)
+ errx(1, "Requested MSR counter 0x%x, but in --no-msr mode", msr_num);
+
msrp = calloc(1, sizeof(struct msr_counter));
if (msrp == NULL) {
perror("calloc");
@@ -6595,6 +7836,8 @@ void cmdline(int argc, char **argv)
{ "list", no_argument, 0, 'l' },
{ "out", required_argument, 0, 'o' },
{ "quiet", no_argument, 0, 'q' },
+ { "no-msr", no_argument, 0, 'M' },
+ { "no-perf", no_argument, 0, 'P' },
{ "show", required_argument, 0, 's' },
{ "Summary", no_argument, 0, 'S' },
{ "TCC", required_argument, 0, 'T' },
@@ -6604,7 +7847,25 @@ void cmdline(int argc, char **argv)
progname = argv[0];
- while ((opt = getopt_long_only(argc, argv, "+C:c:Dde:hi:Jn:o:qST:v", long_options, &option_index)) != -1) {
+ /*
+ * Parse some options early, because they may make other options invalid,
+ * like adding the MSR counter with --add and at the same time using --no-msr.
+ */
+ while ((opt = getopt_long_only(argc, argv, "MP", long_options, &option_index)) != -1) {
+ switch (opt) {
+ case 'M':
+ no_msr = 1;
+ break;
+ case 'P':
+ no_perf = 1;
+ break;
+ default:
+ break;
+ }
+ }
+ optind = 0;
+
+ while ((opt = getopt_long_only(argc, argv, "+C:c:Dde:hi:Jn:o:qMST:v", long_options, &option_index)) != -1) {
switch (opt) {
case 'a':
parse_add_command(optarg);
@@ -6662,6 +7923,10 @@ void cmdline(int argc, char **argv)
case 'q':
quiet = 1;
break;
+ case 'M':
+ case 'P':
+ /* Parsed earlier */
+ break;
case 'n':
num_iterations = strtod(optarg, NULL);
@@ -6704,6 +7969,22 @@ void cmdline(int argc, char **argv)
}
}
+void set_rlimit(void)
+{
+ struct rlimit limit;
+
+ if (getrlimit(RLIMIT_NOFILE, &limit) < 0)
+ err(1, "Failed to get rlimit");
+
+ if (limit.rlim_max < MAX_NOFILE)
+ limit.rlim_max = MAX_NOFILE;
+ if (limit.rlim_cur < MAX_NOFILE)
+ limit.rlim_cur = MAX_NOFILE;
+
+ if (setrlimit(RLIMIT_NOFILE, &limit) < 0)
+ err(1, "Failed to set rlimit");
+}
+
int main(int argc, char **argv)
{
int fd, ret;
@@ -6729,9 +8010,13 @@ skip_cgroup_setting:
probe_sysfs();
+ if (!getuid())
+ set_rlimit();
+
turbostat_init();
- msr_sum_record();
+ if (!no_msr)
+ msr_sum_record();
/* dump counters and exit */
if (dump_only)
diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c
index 908e0d0839369c..61c69297e7978f 100644
--- a/tools/testing/cxl/test/cxl.c
+++ b/tools/testing/cxl/test/cxl.c
@@ -986,10 +986,12 @@ static void dpa_perf_setup(struct cxl_port *endpoint, struct range *range,
{
dpa_perf->qos_class = FAKE_QTG_ID;
dpa_perf->dpa_range = *range;
- dpa_perf->coord.read_latency = 500;
- dpa_perf->coord.write_latency = 500;
- dpa_perf->coord.read_bandwidth = 1000;
- dpa_perf->coord.write_bandwidth = 1000;
+ for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
+ dpa_perf->coord[i].read_latency = 500;
+ dpa_perf->coord[i].write_latency = 500;
+ dpa_perf->coord[i].read_bandwidth = 1000;
+ dpa_perf->coord[i].write_bandwidth = 1000;
+ }
}
static void mock_cxl_endpoint_parse_cdat(struct cxl_port *port)
diff --git a/tools/testing/kunit/configs/all_tests.config b/tools/testing/kunit/configs/all_tests.config
index aa5ec149f96c16..b3b00269a52aac 100644
--- a/tools/testing/kunit/configs/all_tests.config
+++ b/tools/testing/kunit/configs/all_tests.config
@@ -28,6 +28,8 @@ CONFIG_MCTP_FLOWS=y
CONFIG_INET=y
CONFIG_MPTCP=y
+CONFIG_NETDEVICES=y
+CONFIG_WLAN=y
CONFIG_CFG80211=y
CONFIG_MAC80211=y
CONFIG_WLAN_VENDOR_INTEL=y
@@ -38,6 +40,7 @@ CONFIG_DAMON_VADDR=y
CONFIG_DAMON_PADDR=y
CONFIG_DEBUG_FS=y
CONFIG_DAMON_DBGFS=y
+CONFIG_DAMON_DBGFS_DEPRECATED=y
CONFIG_REGMAP_BUILD=y
diff --git a/tools/testing/selftests/bpf/bpf_arena_common.h b/tools/testing/selftests/bpf/bpf_arena_common.h
index bcf195c64a45c1..567491f3e1b51b 100644
--- a/tools/testing/selftests/bpf/bpf_arena_common.h
+++ b/tools/testing/selftests/bpf/bpf_arena_common.h
@@ -32,7 +32,7 @@
*/
#endif
-#if defined(__BPF_FEATURE_ARENA_CAST) && !defined(BPF_ARENA_FORCE_ASM)
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST) && !defined(BPF_ARENA_FORCE_ASM)
#define __arena __attribute__((address_space(1)))
#define cast_kern(ptr) /* nop for bpf prog. emitted by LLVM */
#define cast_user(ptr) /* nop for bpf prog. emitted by LLVM */
diff --git a/tools/testing/selftests/bpf/prog_tests/arena_htab.c b/tools/testing/selftests/bpf/prog_tests/arena_htab.c
index 0766702de84657..d69fd2465f5367 100644
--- a/tools/testing/selftests/bpf/prog_tests/arena_htab.c
+++ b/tools/testing/selftests/bpf/prog_tests/arena_htab.c
@@ -3,12 +3,14 @@
#include <test_progs.h>
#include <sys/mman.h>
#include <network_helpers.h>
-
+#include <sys/user.h>
+#ifndef PAGE_SIZE /* on some archs it comes in sys/user.h */
+#include <unistd.h>
+#define PAGE_SIZE getpagesize()
+#endif
#include "arena_htab_asm.skel.h"
#include "arena_htab.skel.h"
-#define PAGE_SIZE 4096
-
#include "bpf_arena_htab.h"
static void test_arena_htab_common(struct htab *htab)
diff --git a/tools/testing/selftests/bpf/prog_tests/arena_list.c b/tools/testing/selftests/bpf/prog_tests/arena_list.c
index e61886debab127..d15867cddde06a 100644
--- a/tools/testing/selftests/bpf/prog_tests/arena_list.c
+++ b/tools/testing/selftests/bpf/prog_tests/arena_list.c
@@ -3,8 +3,11 @@
#include <test_progs.h>
#include <sys/mman.h>
#include <network_helpers.h>
-
-#define PAGE_SIZE 4096
+#include <sys/user.h>
+#ifndef PAGE_SIZE /* on some archs it comes in sys/user.h */
+#include <unistd.h>
+#define PAGE_SIZE getpagesize()
+#endif
#include "bpf_arena_list.h"
#include "arena_list.skel.h"
diff --git a/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c b/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c
index 053f4d6da77a48..cc184e4420f6e3 100644
--- a/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c
+++ b/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c
@@ -2,6 +2,7 @@
/* Copyright (c) 2021 Facebook */
#include <sys/syscall.h>
+#include <limits.h>
#include <test_progs.h>
#include "bloom_filter_map.skel.h"
@@ -21,6 +22,11 @@ static void test_fail_cases(void)
if (!ASSERT_LT(fd, 0, "bpf_map_create bloom filter invalid value size 0"))
close(fd);
+ /* Invalid value size: too big */
+ fd = bpf_map_create(BPF_MAP_TYPE_BLOOM_FILTER, NULL, 0, INT32_MAX, 100, NULL);
+ if (!ASSERT_LT(fd, 0, "bpf_map_create bloom filter invalid value too large"))
+ close(fd);
+
/* Invalid max entries size */
fd = bpf_map_create(BPF_MAP_TYPE_BLOOM_FILTER, NULL, 0, sizeof(value), 0, NULL);
if (!ASSERT_LT(fd, 0, "bpf_map_create bloom filter invalid max entries size"))
diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c
index 985273832f891c..c4f9f306646ed3 100644
--- a/tools/testing/selftests/bpf/prog_tests/verifier.c
+++ b/tools/testing/selftests/bpf/prog_tests/verifier.c
@@ -5,6 +5,7 @@
#include "cap_helpers.h"
#include "verifier_and.skel.h"
#include "verifier_arena.skel.h"
+#include "verifier_arena_large.skel.h"
#include "verifier_array_access.skel.h"
#include "verifier_basic_stack.skel.h"
#include "verifier_bitfield_write.skel.h"
@@ -120,6 +121,7 @@ static void run_tests_aux(const char *skel_name,
void test_verifier_and(void) { RUN(verifier_and); }
void test_verifier_arena(void) { RUN(verifier_arena); }
+void test_verifier_arena_large(void) { RUN(verifier_arena_large); }
void test_verifier_basic_stack(void) { RUN(verifier_basic_stack); }
void test_verifier_bitfield_write(void) { RUN(verifier_bitfield_write); }
void test_verifier_bounds(void) { RUN(verifier_bounds); }
diff --git a/tools/testing/selftests/bpf/progs/arena_htab.c b/tools/testing/selftests/bpf/progs/arena_htab.c
index b7bb712cacfdcc..1e6ac187a6a0ce 100644
--- a/tools/testing/selftests/bpf/progs/arena_htab.c
+++ b/tools/testing/selftests/bpf/progs/arena_htab.c
@@ -22,7 +22,7 @@ int zero = 0;
SEC("syscall")
int arena_htab_llvm(void *ctx)
{
-#if defined(__BPF_FEATURE_ARENA_CAST) || defined(BPF_ARENA_FORCE_ASM)
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST) || defined(BPF_ARENA_FORCE_ASM)
struct htab __arena *htab;
__u64 i;
diff --git a/tools/testing/selftests/bpf/progs/arena_list.c b/tools/testing/selftests/bpf/progs/arena_list.c
index cd35b844843560..c0422c58cee2c5 100644
--- a/tools/testing/selftests/bpf/progs/arena_list.c
+++ b/tools/testing/selftests/bpf/progs/arena_list.c
@@ -30,13 +30,13 @@ int list_sum;
int cnt;
bool skip = false;
-#ifdef __BPF_FEATURE_ARENA_CAST
+#ifdef __BPF_FEATURE_ADDR_SPACE_CAST
long __arena arena_sum;
int __arena test_val = 1;
struct arena_list_head __arena global_head;
#else
-long arena_sum SEC(".arena.1");
-int test_val SEC(".arena.1");
+long arena_sum SEC(".addr_space.1");
+int test_val SEC(".addr_space.1");
#endif
int zero;
@@ -44,7 +44,7 @@ int zero;
SEC("syscall")
int arena_list_add(void *ctx)
{
-#ifdef __BPF_FEATURE_ARENA_CAST
+#ifdef __BPF_FEATURE_ADDR_SPACE_CAST
__u64 i;
list_head = &global_head;
@@ -66,7 +66,7 @@ int arena_list_add(void *ctx)
SEC("syscall")
int arena_list_del(void *ctx)
{
-#ifdef __BPF_FEATURE_ARENA_CAST
+#ifdef __BPF_FEATURE_ADDR_SPACE_CAST
struct elem __arena *n;
int sum = 0;
diff --git a/tools/testing/selftests/bpf/progs/verifier_arena.c b/tools/testing/selftests/bpf/progs/verifier_arena.c
index 5540b05ff9ee13..93144ae6df7412 100644
--- a/tools/testing/selftests/bpf/progs/verifier_arena.c
+++ b/tools/testing/selftests/bpf/progs/verifier_arena.c
@@ -12,14 +12,18 @@ struct {
__uint(type, BPF_MAP_TYPE_ARENA);
__uint(map_flags, BPF_F_MMAPABLE);
__uint(max_entries, 2); /* arena of two pages close to 32-bit boundary*/
- __ulong(map_extra, (1ull << 44) | (~0u - __PAGE_SIZE * 2 + 1)); /* start of mmap() region */
+#ifdef __TARGET_ARCH_arm64
+ __ulong(map_extra, (1ull << 32) | (~0u - __PAGE_SIZE * 2 + 1)); /* start of mmap() region */
+#else
+ __ulong(map_extra, (1ull << 44) | (~0u - __PAGE_SIZE * 2 + 1)); /* start of mmap() region */
+#endif
} arena SEC(".maps");
SEC("syscall")
__success __retval(0)
int basic_alloc1(void *ctx)
{
-#if defined(__BPF_FEATURE_ARENA_CAST)
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
volatile int __arena *page1, *page2, *no_page, *page3;
page1 = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
@@ -58,7 +62,7 @@ SEC("syscall")
__success __retval(0)
int basic_alloc2(void *ctx)
{
-#if defined(__BPF_FEATURE_ARENA_CAST)
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
volatile char __arena *page1, *page2, *page3, *page4;
page1 = bpf_arena_alloc_pages(&arena, NULL, 2, NUMA_NO_NODE, 0);
diff --git a/tools/testing/selftests/bpf/progs/verifier_arena_large.c b/tools/testing/selftests/bpf/progs/verifier_arena_large.c
new file mode 100644
index 00000000000000..ef66ea460264c1
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_arena_large.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+#include "bpf_arena_common.h"
+
+#define ARENA_SIZE (1ull << 32)
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARENA);
+ __uint(map_flags, BPF_F_MMAPABLE);
+ __uint(max_entries, ARENA_SIZE / PAGE_SIZE);
+} arena SEC(".maps");
+
+SEC("syscall")
+__success __retval(0)
+int big_alloc1(void *ctx)
+{
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+ volatile char __arena *page1, *page2, *no_page, *page3;
+ void __arena *base;
+
+ page1 = base = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
+ if (!page1)
+ return 1;
+ *page1 = 1;
+ page2 = bpf_arena_alloc_pages(&arena, base + ARENA_SIZE - PAGE_SIZE,
+ 1, NUMA_NO_NODE, 0);
+ if (!page2)
+ return 2;
+ *page2 = 2;
+ no_page = bpf_arena_alloc_pages(&arena, base + ARENA_SIZE,
+ 1, NUMA_NO_NODE, 0);
+ if (no_page)
+ return 3;
+ if (*page1 != 1)
+ return 4;
+ if (*page2 != 2)
+ return 5;
+ bpf_arena_free_pages(&arena, (void __arena *)page1, 1);
+ if (*page2 != 2)
+ return 6;
+ if (*page1 != 0) /* use-after-free should return 0 */
+ return 7;
+ page3 = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
+ if (!page3)
+ return 8;
+ *page3 = 3;
+ if (page1 != page3)
+ return 9;
+ if (*page2 != 2)
+ return 10;
+ if (*(page1 + PAGE_SIZE) != 0)
+ return 11;
+ if (*(page1 - PAGE_SIZE) != 0)
+ return 12;
+ if (*(page2 + PAGE_SIZE) != 0)
+ return 13;
+ if (*(page2 - PAGE_SIZE) != 0)
+ return 14;
+#endif
+ return 0;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/dmabuf-heaps/config b/tools/testing/selftests/dmabuf-heaps/config
new file mode 100644
index 00000000000000..be091f1cdfa04e
--- /dev/null
+++ b/tools/testing/selftests/dmabuf-heaps/config
@@ -0,0 +1,3 @@
+CONFIG_DMABUF_HEAPS=y
+CONFIG_DMABUF_HEAPS_SYSTEM=y
+CONFIG_DRM_VGEM=y
diff --git a/tools/testing/selftests/drivers/net/netdevsim/settings b/tools/testing/selftests/drivers/net/netdevsim/settings
new file mode 100644
index 00000000000000..a62d2fa1275c6b
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/settings
@@ -0,0 +1 @@
+timeout=600
diff --git a/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc b/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc
index b1ede624986676..b7c8f29c09a978 100644
--- a/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc
+++ b/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc
@@ -18,7 +18,7 @@ echo 'sched:*' > set_event
yield
-count=`cat trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l`
+count=`head -n 100 trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l`
if [ $count -lt 3 ]; then
fail "at least fork, exec and exit events should be recorded"
fi
@@ -29,7 +29,7 @@ echo 1 > events/sched/enable
yield
-count=`cat trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l`
+count=`head -n 100 trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l`
if [ $count -lt 3 ]; then
fail "at least fork, exec and exit events should be recorded"
fi
@@ -40,7 +40,7 @@ echo 0 > events/sched/enable
yield
-count=`cat trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l`
+count=`head -n 100 trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l`
if [ $count -ne 0 ]; then
fail "any of scheduler events should not be recorded"
fi
diff --git a/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc b/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc
index 2de7c61d1ae308..3f74c09c56b624 100644
--- a/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc
+++ b/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc
@@ -24,7 +24,7 @@ echo 0 > events/enable
echo "Get the most frequently calling function"
sample_events
-target_func=`cut -d: -f3 trace | sed 's/call_site=\([^+]*\)+0x.*/\1/' | sort | uniq -c | sort | tail -n 1 | sed 's/^[ 0-9]*//'`
+target_func=`cat trace | grep -o 'call_site=\([^+]*\)' | sed 's/call_site=//' | sort | uniq -c | sort | tail -n 1 | sed 's/^[ 0-9]*//'`
if [ -z "$target_func" ]; then
exit_fail
fi
diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h
index 541bf192e30e6b..14bbab0cce1352 100644
--- a/tools/testing/selftests/kselftest.h
+++ b/tools/testing/selftests/kselftest.h
@@ -51,6 +51,7 @@
#include <stdarg.h>
#include <string.h>
#include <stdio.h>
+#include <sys/utsname.h>
#endif
#ifndef ARRAY_SIZE
@@ -79,6 +80,9 @@
#define KSFT_XPASS 3
#define KSFT_SKIP 4
+#ifndef __noreturn
+#define __noreturn __attribute__((__noreturn__))
+#endif
#define __printf(a, b) __attribute__((format(printf, a, b)))
/* counters */
@@ -288,24 +292,26 @@ void ksft_test_result_code(int exit_code, const char *test_name,
}
/* Docs seem to call for double space if directive is absent */
- if (!directive[0] && msg[0])
+ if (!directive[0] && msg)
directive = " # ";
- va_start(args, msg);
printf("%s %u %s%s", tap_code, ksft_test_num(), test_name, directive);
errno = saved_errno;
- vprintf(msg, args);
+ if (msg) {
+ va_start(args, msg);
+ vprintf(msg, args);
+ va_end(args);
+ }
printf("\n");
- va_end(args);
}
-static inline int ksft_exit_pass(void)
+static inline __noreturn int ksft_exit_pass(void)
{
ksft_print_cnts();
exit(KSFT_PASS);
}
-static inline int ksft_exit_fail(void)
+static inline __noreturn int ksft_exit_fail(void)
{
ksft_print_cnts();
exit(KSFT_FAIL);
@@ -332,7 +338,7 @@ static inline int ksft_exit_fail(void)
ksft_cnt.ksft_xfail + \
ksft_cnt.ksft_xskip)
-static inline __printf(1, 2) int ksft_exit_fail_msg(const char *msg, ...)
+static inline __noreturn __printf(1, 2) int ksft_exit_fail_msg(const char *msg, ...)
{
int saved_errno = errno;
va_list args;
@@ -347,19 +353,19 @@ static inline __printf(1, 2) int ksft_exit_fail_msg(const char *msg, ...)
exit(KSFT_FAIL);
}
-static inline int ksft_exit_xfail(void)
+static inline __noreturn int ksft_exit_xfail(void)
{
ksft_print_cnts();
exit(KSFT_XFAIL);
}
-static inline int ksft_exit_xpass(void)
+static inline __noreturn int ksft_exit_xpass(void)
{
ksft_print_cnts();
exit(KSFT_XPASS);
}
-static inline __printf(1, 2) int ksft_exit_skip(const char *msg, ...)
+static inline __noreturn __printf(1, 2) int ksft_exit_skip(const char *msg, ...)
{
int saved_errno = errno;
va_list args;
@@ -388,4 +394,21 @@ static inline __printf(1, 2) int ksft_exit_skip(const char *msg, ...)
exit(KSFT_SKIP);
}
+static inline int ksft_min_kernel_version(unsigned int min_major,
+ unsigned int min_minor)
+{
+#ifdef NOLIBC
+ ksft_print_msg("NOLIBC: Can't check kernel version: Function not implemented\n");
+ return 0;
+#else
+ unsigned int major, minor;
+ struct utsname info;
+
+ if (uname(&info) || sscanf(info.release, "%u.%u.", &major, &minor) != 2)
+ ksft_exit_fail_msg("Can't parse kernel version\n");
+
+ return major > min_major || (major == min_major && minor >= min_minor);
+#endif
+}
+
#endif /* __KSELFTEST_H */
diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h
index 4fd735e48ee7ee..ba3ddeda24bf52 100644
--- a/tools/testing/selftests/kselftest_harness.h
+++ b/tools/testing/selftests/kselftest_harness.h
@@ -383,6 +383,7 @@
FIXTURE_DATA(fixture_name) self; \
pid_t child = 1; \
int status = 0; \
+ bool jmp = false; \
memset(&self, 0, sizeof(FIXTURE_DATA(fixture_name))); \
if (setjmp(_metadata->env) == 0) { \
/* Use the same _metadata. */ \
@@ -399,8 +400,10 @@
_metadata->exit_code = KSFT_FAIL; \
} \
} \
+ else \
+ jmp = true; \
if (child == 0) { \
- if (_metadata->setup_completed && !_metadata->teardown_parent) \
+ if (_metadata->setup_completed && !_metadata->teardown_parent && !jmp) \
fixture_name##_teardown(_metadata, &self, variant->data); \
_exit(0); \
} \
@@ -1202,7 +1205,7 @@ void __run_test(struct __fixture_metadata *f,
diagnostic = "unknown";
ksft_test_result_code(t->exit_code, test_name,
- diagnostic ? "%s" : "", diagnostic);
+ diagnostic ? "%s" : NULL, diagnostic);
}
static int test_harness_run(int argc, char **argv)
diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer.c b/tools/testing/selftests/kvm/aarch64/arch_timer.c
index ddba2c2fb5deb1..4eaba83cdcf3d2 100644
--- a/tools/testing/selftests/kvm/aarch64/arch_timer.c
+++ b/tools/testing/selftests/kvm/aarch64/arch_timer.c
@@ -135,8 +135,8 @@ static void guest_run_stage(struct test_vcpu_shared_data *shared_data,
irq_iter = READ_ONCE(shared_data->nr_iter);
__GUEST_ASSERT(config_iter + 1 == irq_iter,
- "config_iter + 1 = 0x%lx, irq_iter = 0x%lx.\n"
- " Guest timer interrupt was not trigged within the specified\n"
+ "config_iter + 1 = 0x%x, irq_iter = 0x%x.\n"
+ " Guest timer interrupt was not triggered within the specified\n"
" interval, try to increase the error margin by [-e] option.\n",
config_iter + 1, irq_iter);
}
diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 3bd03b088dda60..81ce37ec407dd1 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -1037,8 +1037,19 @@ static inline void vcpu_set_cpuid(struct kvm_vcpu *vcpu)
void vcpu_set_cpuid_property(struct kvm_vcpu *vcpu,
struct kvm_x86_cpu_property property,
uint32_t value);
+void vcpu_set_cpuid_maxphyaddr(struct kvm_vcpu *vcpu, uint8_t maxphyaddr);
void vcpu_clear_cpuid_entry(struct kvm_vcpu *vcpu, uint32_t function);
+
+static inline bool vcpu_cpuid_has(struct kvm_vcpu *vcpu,
+ struct kvm_x86_cpu_feature feature)
+{
+ struct kvm_cpuid_entry2 *entry;
+
+ entry = __vcpu_get_cpuid_entry(vcpu, feature.function, feature.index);
+ return *((&entry->eax) + feature.reg) & BIT(feature.bit);
+}
+
void vcpu_set_or_clear_cpuid_feature(struct kvm_vcpu *vcpu,
struct kvm_x86_cpu_feature feature,
bool set);
diff --git a/tools/testing/selftests/kvm/riscv/arch_timer.c b/tools/testing/selftests/kvm/riscv/arch_timer.c
index e22848f747c015..0f9cabd99fd451 100644
--- a/tools/testing/selftests/kvm/riscv/arch_timer.c
+++ b/tools/testing/selftests/kvm/riscv/arch_timer.c
@@ -60,7 +60,7 @@ static void guest_run(struct test_vcpu_shared_data *shared_data)
irq_iter = READ_ONCE(shared_data->nr_iter);
__GUEST_ASSERT(config_iter + 1 == irq_iter,
"config_iter + 1 = 0x%x, irq_iter = 0x%x.\n"
- " Guest timer interrupt was not trigged within the specified\n"
+ " Guest timer interrupt was not triggered within the specified\n"
" interval, try to increase the error margin by [-e] option.\n",
config_iter + 1, irq_iter);
}
diff --git a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c
index 9e2879af7c201f..40cc59f4e65013 100644
--- a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c
+++ b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c
@@ -133,6 +133,43 @@ static void enter_guest(struct kvm_vcpu *vcpu)
}
}
+static void test_pv_unhalt(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct kvm_cpuid_entry2 *ent;
+ u32 kvm_sig_old;
+
+ pr_info("testing KVM_FEATURE_PV_UNHALT\n");
+
+ TEST_REQUIRE(KVM_CAP_X86_DISABLE_EXITS);
+
+ /* KVM_PV_UNHALT test */
+ vm = vm_create_with_one_vcpu(&vcpu, guest_main);
+ vcpu_set_cpuid_feature(vcpu, X86_FEATURE_KVM_PV_UNHALT);
+
+ TEST_ASSERT(vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT),
+ "Enabling X86_FEATURE_KVM_PV_UNHALT had no effect");
+
+ /* Make sure KVM clears vcpu->arch.kvm_cpuid */
+ ent = vcpu_get_cpuid_entry(vcpu, KVM_CPUID_SIGNATURE);
+ kvm_sig_old = ent->ebx;
+ ent->ebx = 0xdeadbeef;
+ vcpu_set_cpuid(vcpu);
+
+ vm_enable_cap(vm, KVM_CAP_X86_DISABLE_EXITS, KVM_X86_DISABLE_EXITS_HLT);
+ ent = vcpu_get_cpuid_entry(vcpu, KVM_CPUID_SIGNATURE);
+ ent->ebx = kvm_sig_old;
+ vcpu_set_cpuid(vcpu);
+
+ TEST_ASSERT(!vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT),
+ "KVM_FEATURE_PV_UNHALT is set with KVM_CAP_X86_DISABLE_EXITS");
+
+ /* FIXME: actually test KVM_FEATURE_PV_UNHALT feature */
+
+ kvm_vm_free(vm);
+}
+
int main(void)
{
struct kvm_vcpu *vcpu;
@@ -151,4 +188,6 @@ int main(void)
enter_guest(vcpu);
kvm_vm_free(vm);
+
+ test_pv_unhalt();
}
diff --git a/tools/testing/selftests/mm/vm_util.h b/tools/testing/selftests/mm/vm_util.h
index c02990bbd56f4c..9007c420d52c52 100644
--- a/tools/testing/selftests/mm/vm_util.h
+++ b/tools/testing/selftests/mm/vm_util.h
@@ -3,7 +3,7 @@
#include <stdbool.h>
#include <sys/mman.h>
#include <err.h>
-#include <string.h> /* ffsl() */
+#include <strings.h> /* ffsl() */
#include <unistd.h> /* _SC_PAGESIZE */
#define BIT_ULL(nr) (1ULL << (nr))
diff --git a/tools/testing/selftests/net/bind_wildcard.c b/tools/testing/selftests/net/bind_wildcard.c
index a2662348cdb1a2..b7b54d646b937c 100644
--- a/tools/testing/selftests/net/bind_wildcard.c
+++ b/tools/testing/selftests/net/bind_wildcard.c
@@ -6,7 +6,9 @@
#include "../kselftest_harness.h"
-struct in6_addr in6addr_v4mapped_any = {
+static const __u32 in4addr_any = INADDR_ANY;
+static const __u32 in4addr_loopback = INADDR_LOOPBACK;
+static const struct in6_addr in6addr_v4mapped_any = {
.s6_addr = {
0, 0, 0, 0,
0, 0, 0, 0,
@@ -14,8 +16,7 @@ struct in6_addr in6addr_v4mapped_any = {
0, 0, 0, 0
}
};
-
-struct in6_addr in6addr_v4mapped_loopback = {
+static const struct in6_addr in6addr_v4mapped_loopback = {
.s6_addr = {
0, 0, 0, 0,
0, 0, 0, 0,
@@ -24,137 +25,785 @@ struct in6_addr in6addr_v4mapped_loopback = {
}
};
+#define NR_SOCKETS 8
+
FIXTURE(bind_wildcard)
{
- struct sockaddr_in addr4;
- struct sockaddr_in6 addr6;
+ int fd[NR_SOCKETS];
+ socklen_t addrlen[NR_SOCKETS];
+ union {
+ struct sockaddr addr;
+ struct sockaddr_in addr4;
+ struct sockaddr_in6 addr6;
+ } addr[NR_SOCKETS];
};
FIXTURE_VARIANT(bind_wildcard)
{
- const __u32 addr4_const;
- const struct in6_addr *addr6_const;
- int expected_errno;
+ sa_family_t family[2];
+ const void *addr[2];
+ bool ipv6_only[2];
+
+ /* 6 bind() calls below follow two bind() for the defined 2 addresses:
+ *
+ * 0.0.0.0
+ * 127.0.0.1
+ * ::
+ * ::1
+ * ::ffff:0.0.0.0
+ * ::ffff:127.0.0.1
+ */
+ int expected_errno[NR_SOCKETS];
+ int expected_reuse_errno[NR_SOCKETS];
+};
+
+/* (IPv4, IPv4) */
+FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v4_local)
+{
+ .family = {AF_INET, AF_INET},
+ .addr = {&in4addr_any, &in4addr_loopback},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v4_any)
+{
+ .family = {AF_INET, AF_INET},
+ .addr = {&in4addr_loopback, &in4addr_any},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
};
+/* (IPv4, IPv6) */
FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_any)
{
- .addr4_const = INADDR_ANY,
- .addr6_const = &in6addr_any,
- .expected_errno = EADDRINUSE,
+ .family = {AF_INET, AF_INET6},
+ .addr = {&in4addr_any, &in6addr_any},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_any_only)
+{
+ .family = {AF_INET, AF_INET6},
+ .addr = {&in4addr_any, &in6addr_any},
+ .ipv6_only = {false, true},
+ .expected_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
};
FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_local)
{
- .addr4_const = INADDR_ANY,
- .addr6_const = &in6addr_loopback,
- .expected_errno = 0,
+ .family = {AF_INET, AF_INET6},
+ .addr = {&in4addr_any, &in6addr_loopback},
+ .expected_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
};
FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_v4mapped_any)
{
- .addr4_const = INADDR_ANY,
- .addr6_const = &in6addr_v4mapped_any,
- .expected_errno = EADDRINUSE,
+ .family = {AF_INET, AF_INET6},
+ .addr = {&in4addr_any, &in6addr_v4mapped_any},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
};
FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_v4mapped_local)
{
- .addr4_const = INADDR_ANY,
- .addr6_const = &in6addr_v4mapped_loopback,
- .expected_errno = EADDRINUSE,
+ .family = {AF_INET, AF_INET6},
+ .addr = {&in4addr_any, &in6addr_v4mapped_loopback},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
};
FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_any)
{
- .addr4_const = INADDR_LOOPBACK,
- .addr6_const = &in6addr_any,
- .expected_errno = EADDRINUSE,
+ .family = {AF_INET, AF_INET6},
+ .addr = {&in4addr_loopback, &in6addr_any},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_any_only)
+{
+ .family = {AF_INET, AF_INET6},
+ .addr = {&in4addr_loopback, &in6addr_any},
+ .ipv6_only = {false, true},
+ .expected_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
};
FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_local)
{
- .addr4_const = INADDR_LOOPBACK,
- .addr6_const = &in6addr_loopback,
- .expected_errno = 0,
+ .family = {AF_INET, AF_INET6},
+ .addr = {&in4addr_loopback, &in6addr_loopback},
+ .expected_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
};
FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_v4mapped_any)
{
- .addr4_const = INADDR_LOOPBACK,
- .addr6_const = &in6addr_v4mapped_any,
- .expected_errno = EADDRINUSE,
+ .family = {AF_INET, AF_INET6},
+ .addr = {&in4addr_loopback, &in6addr_v4mapped_any},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
};
FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_v4mapped_local)
{
- .addr4_const = INADDR_LOOPBACK,
- .addr6_const = &in6addr_v4mapped_loopback,
- .expected_errno = EADDRINUSE,
+ .family = {AF_INET, AF_INET6},
+ .addr = {&in4addr_loopback, &in6addr_v4mapped_loopback},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+};
+
+/* (IPv6, IPv4) */
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_v4_any)
+{
+ .family = {AF_INET6, AF_INET},
+ .addr = {&in6addr_any, &in4addr_any},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
};
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_only_v4_any)
+{
+ .family = {AF_INET6, AF_INET},
+ .addr = {&in6addr_any, &in4addr_any},
+ .ipv6_only = {true, false},
+ .expected_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_v4_local)
+{
+ .family = {AF_INET6, AF_INET},
+ .addr = {&in6addr_any, &in4addr_loopback},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_only_v4_local)
+{
+ .family = {AF_INET6, AF_INET},
+ .addr = {&in6addr_any, &in4addr_loopback},
+ .ipv6_only = {true, false},
+ .expected_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_local_v4_any)
+{
+ .family = {AF_INET6, AF_INET},
+ .addr = {&in6addr_loopback, &in4addr_any},
+ .expected_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_local_v4_local)
+{
+ .family = {AF_INET6, AF_INET},
+ .addr = {&in6addr_loopback, &in4addr_loopback},
+ .expected_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_any_v4_any)
+{
+ .family = {AF_INET6, AF_INET},
+ .addr = {&in6addr_v4mapped_any, &in4addr_any},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_any_v4_local)
+{
+ .family = {AF_INET6, AF_INET},
+ .addr = {&in6addr_v4mapped_any, &in4addr_loopback},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_local_v4_any)
+{
+ .family = {AF_INET6, AF_INET},
+ .addr = {&in6addr_v4mapped_loopback, &in4addr_any},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_local_v4_local)
+{
+ .family = {AF_INET6, AF_INET},
+ .addr = {&in6addr_v4mapped_loopback, &in4addr_loopback},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+};
+
+/* (IPv6, IPv6) */
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_v6_any)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_any, &in6addr_any},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_only_v6_any)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_any, &in6addr_any},
+ .ipv6_only = {true, false},
+ .expected_errno = {0, EADDRINUSE,
+ 0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_v6_any_only)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_any, &in6addr_any},
+ .ipv6_only = {false, true},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_only_v6_any_only)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_any, &in6addr_any},
+ .ipv6_only = {true, true},
+ .expected_errno = {0, EADDRINUSE,
+ 0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ 0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_v6_local)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_any, &in6addr_loopback},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_only_v6_local)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_any, &in6addr_loopback},
+ .ipv6_only = {true, false},
+ .expected_errno = {0, EADDRINUSE,
+ 0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ 0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_v6_v4mapped_any)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_any, &in6addr_v4mapped_any},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_only_v6_v4mapped_any)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_any, &in6addr_v4mapped_any},
+ .ipv6_only = {true, false},
+ .expected_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_v6_v4mapped_local)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_any, &in6addr_v4mapped_loopback},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_only_v6_v4mapped_local)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_any, &in6addr_v4mapped_loopback},
+ .ipv6_only = {true, false},
+ .expected_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_local_v6_any)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_loopback, &in6addr_any},
+ .expected_errno = {0, EADDRINUSE,
+ 0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_local_v6_any_only)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_loopback, &in6addr_any},
+ .ipv6_only = {false, true},
+ .expected_errno = {0, EADDRINUSE,
+ 0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ 0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_local_v6_v4mapped_any)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_loopback, &in6addr_v4mapped_any},
+ .expected_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_local_v6_v4mapped_local)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_loopback, &in6addr_v4mapped_loopback},
+ .expected_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_any_v6_any)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_v4mapped_any, &in6addr_any},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_any_v6_any_only)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_v4mapped_any, &in6addr_any},
+ .ipv6_only = {false, true},
+ .expected_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_any_v6_local)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_v4mapped_any, &in6addr_loopback},
+ .expected_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_any_v6_v4mapped_local)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_v4mapped_any, &in6addr_v4mapped_loopback},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_loopback_v6_any)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_v4mapped_loopback, &in6addr_any},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_loopback_v6_any_only)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_v4mapped_loopback, &in6addr_any},
+ .ipv6_only = {false, true},
+ .expected_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_loopback_v6_local)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_v4mapped_loopback, &in6addr_loopback},
+ .expected_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_loopback_v6_v4mapped_any)
+{
+ .family = {AF_INET6, AF_INET6},
+ .addr = {&in6addr_v4mapped_loopback, &in6addr_v4mapped_any},
+ .expected_errno = {0, EADDRINUSE,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+ .expected_reuse_errno = {0, 0,
+ EADDRINUSE, EADDRINUSE,
+ EADDRINUSE, 0,
+ EADDRINUSE, EADDRINUSE},
+};
+
+static void setup_addr(FIXTURE_DATA(bind_wildcard) *self, int i,
+ int family, const void *addr_const)
+{
+ if (family == AF_INET) {
+ struct sockaddr_in *addr4 = &self->addr[i].addr4;
+ const __u32 *addr4_const = addr_const;
+
+ addr4->sin_family = AF_INET;
+ addr4->sin_port = htons(0);
+ addr4->sin_addr.s_addr = htonl(*addr4_const);
+
+ self->addrlen[i] = sizeof(struct sockaddr_in);
+ } else {
+ struct sockaddr_in6 *addr6 = &self->addr[i].addr6;
+ const struct in6_addr *addr6_const = addr_const;
+
+ addr6->sin6_family = AF_INET6;
+ addr6->sin6_port = htons(0);
+ addr6->sin6_addr = *addr6_const;
+
+ self->addrlen[i] = sizeof(struct sockaddr_in6);
+ }
+}
+
FIXTURE_SETUP(bind_wildcard)
{
- self->addr4.sin_family = AF_INET;
- self->addr4.sin_port = htons(0);
- self->addr4.sin_addr.s_addr = htonl(variant->addr4_const);
+ setup_addr(self, 0, variant->family[0], variant->addr[0]);
+ setup_addr(self, 1, variant->family[1], variant->addr[1]);
+
+ setup_addr(self, 2, AF_INET, &in4addr_any);
+ setup_addr(self, 3, AF_INET, &in4addr_loopback);
- self->addr6.sin6_family = AF_INET6;
- self->addr6.sin6_port = htons(0);
- self->addr6.sin6_addr = *variant->addr6_const;
+ setup_addr(self, 4, AF_INET6, &in6addr_any);
+ setup_addr(self, 5, AF_INET6, &in6addr_loopback);
+ setup_addr(self, 6, AF_INET6, &in6addr_v4mapped_any);
+ setup_addr(self, 7, AF_INET6, &in6addr_v4mapped_loopback);
}
FIXTURE_TEARDOWN(bind_wildcard)
{
+ int i;
+
+ for (i = 0; i < NR_SOCKETS; i++)
+ close(self->fd[i]);
}
-void bind_sockets(struct __test_metadata *_metadata,
- FIXTURE_DATA(bind_wildcard) *self,
- int expected_errno,
- struct sockaddr *addr1, socklen_t addrlen1,
- struct sockaddr *addr2, socklen_t addrlen2)
+void bind_socket(struct __test_metadata *_metadata,
+ FIXTURE_DATA(bind_wildcard) *self,
+ const FIXTURE_VARIANT(bind_wildcard) *variant,
+ int i, int reuse)
{
- int fd[2];
int ret;
- fd[0] = socket(addr1->sa_family, SOCK_STREAM, 0);
- ASSERT_GT(fd[0], 0);
+ self->fd[i] = socket(self->addr[i].addr.sa_family, SOCK_STREAM, 0);
+ ASSERT_GT(self->fd[i], 0);
- ret = bind(fd[0], addr1, addrlen1);
- ASSERT_EQ(ret, 0);
+ if (i < 2 && variant->ipv6_only[i]) {
+ ret = setsockopt(self->fd[i], SOL_IPV6, IPV6_V6ONLY, &(int){1}, sizeof(int));
+ ASSERT_EQ(ret, 0);
+ }
- ret = getsockname(fd[0], addr1, &addrlen1);
- ASSERT_EQ(ret, 0);
+ if (i < 2 && reuse) {
+ ret = setsockopt(self->fd[i], SOL_SOCKET, reuse, &(int){1}, sizeof(int));
+ ASSERT_EQ(ret, 0);
+ }
- ((struct sockaddr_in *)addr2)->sin_port = ((struct sockaddr_in *)addr1)->sin_port;
+ self->addr[i].addr4.sin_port = self->addr[0].addr4.sin_port;
- fd[1] = socket(addr2->sa_family, SOCK_STREAM, 0);
- ASSERT_GT(fd[1], 0);
+ ret = bind(self->fd[i], &self->addr[i].addr, self->addrlen[i]);
- ret = bind(fd[1], addr2, addrlen2);
- if (expected_errno) {
- ASSERT_EQ(ret, -1);
- ASSERT_EQ(errno, expected_errno);
+ if (reuse) {
+ if (variant->expected_reuse_errno[i]) {
+ ASSERT_EQ(ret, -1);
+ ASSERT_EQ(errno, variant->expected_reuse_errno[i]);
+ } else {
+ ASSERT_EQ(ret, 0);
+ }
} else {
+ if (variant->expected_errno[i]) {
+ ASSERT_EQ(ret, -1);
+ ASSERT_EQ(errno, variant->expected_errno[i]);
+ } else {
+ ASSERT_EQ(ret, 0);
+ }
+ }
+
+ if (i == 0) {
+ ret = getsockname(self->fd[0], &self->addr[0].addr, &self->addrlen[0]);
ASSERT_EQ(ret, 0);
}
+}
- close(fd[1]);
- close(fd[0]);
+TEST_F(bind_wildcard, plain)
+{
+ int i;
+
+ for (i = 0; i < NR_SOCKETS; i++)
+ bind_socket(_metadata, self, variant, i, 0);
}
-TEST_F(bind_wildcard, v4_v6)
+TEST_F(bind_wildcard, reuseaddr)
{
- bind_sockets(_metadata, self, variant->expected_errno,
- (struct sockaddr *)&self->addr4, sizeof(self->addr4),
- (struct sockaddr *)&self->addr6, sizeof(self->addr6));
+ int i;
+
+ for (i = 0; i < NR_SOCKETS; i++)
+ bind_socket(_metadata, self, variant, i, SO_REUSEADDR);
}
-TEST_F(bind_wildcard, v6_v4)
+TEST_F(bind_wildcard, reuseport)
{
- bind_sockets(_metadata, self, variant->expected_errno,
- (struct sockaddr *)&self->addr6, sizeof(self->addr6),
- (struct sockaddr *)&self->addr4, sizeof(self->addr4));
+ int i;
+
+ for (i = 0; i < NR_SOCKETS; i++)
+ bind_socket(_metadata, self, variant, i, SO_REUSEPORT);
}
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index 4c424855482629..4131f3263a4826 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -383,12 +383,14 @@ do_transfer()
local stat_cookierx_last
local stat_csum_err_s
local stat_csum_err_c
+ local stat_tcpfb_last_l
stat_synrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX")
stat_ackrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
stat_cookietx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent")
stat_cookierx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv")
stat_csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr")
stat_csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr")
+ stat_tcpfb_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableFallbackACK")
timeout ${timeout_test} \
ip netns exec ${listener_ns} \
@@ -457,11 +459,13 @@ do_transfer()
local stat_cookietx_now
local stat_cookierx_now
local stat_ooo_now
+ local stat_tcpfb_now_l
stat_synrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX")
stat_ackrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
stat_cookietx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent")
stat_cookierx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv")
stat_ooo_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtTCPOFOQueue")
+ stat_tcpfb_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableFallbackACK")
expect_synrx=$((stat_synrx_last_l))
expect_ackrx=$((stat_ackrx_last_l))
@@ -508,6 +512,11 @@ do_transfer()
fi
fi
+ if [ ${stat_ooo_now} -eq 0 ] && [ ${stat_tcpfb_last_l} -ne ${stat_tcpfb_now_l} ]; then
+ mptcp_lib_pr_fail "unexpected fallback to TCP"
+ rets=1
+ fi
+
if [ $cookies -eq 2 ];then
if [ $stat_cookietx_last -ge $stat_cookietx_now ] ;then
extra+=" WARN: CookieSent: did not advance"
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 5e9211e8982568..e4403236f65548 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -729,7 +729,7 @@ pm_nl_check_endpoint()
[ -n "$_flags" ]; flags="flags $_flags"
shift
elif [ $1 = "dev" ]; then
- [ -n "$2" ]; dev="dev $1"
+ [ -n "$2" ]; dev="dev $2"
shift
elif [ $1 = "id" ]; then
_id=$2
@@ -3610,6 +3610,8 @@ endpoint_tests()
local tests_pid=$!
wait_mpj $ns2
+ pm_nl_check_endpoint "creation" \
+ $ns2 10.0.2.2 id 2 flags subflow dev ns2eth2
chk_subflow_nr "before delete" 2
chk_mptcp_info subflows 1 subflows 1
diff --git a/tools/testing/selftests/net/reuseaddr_conflict.c b/tools/testing/selftests/net/reuseaddr_conflict.c
index 7c5b12664b03b0..bfb07dc495186d 100644
--- a/tools/testing/selftests/net/reuseaddr_conflict.c
+++ b/tools/testing/selftests/net/reuseaddr_conflict.c
@@ -109,6 +109,6 @@ int main(void)
fd1 = open_port(0, 1);
if (fd1 >= 0)
error(1, 0, "Was allowed to create an ipv4 reuseport on an already bound non-reuseport socket with no ipv6");
- fprintf(stderr, "Success");
+ fprintf(stderr, "Success\n");
return 0;
}
diff --git a/tools/testing/selftests/net/tcp_ao/lib/proc.c b/tools/testing/selftests/net/tcp_ao/lib/proc.c
index 2fb6dd8adba694..8b984fa042869e 100644
--- a/tools/testing/selftests/net/tcp_ao/lib/proc.c
+++ b/tools/testing/selftests/net/tcp_ao/lib/proc.c
@@ -86,7 +86,7 @@ static void netstat_read_type(FILE *fnetstat, struct netstat **dest, char *line)
pos = strchr(line, ' ') + 1;
- if (fscanf(fnetstat, type->header_name) == EOF)
+ if (fscanf(fnetstat, "%[^ :]", type->header_name) == EOF)
test_error("fscanf(%s)", type->header_name);
if (fread(&tmp, 1, 1, fnetstat) != 1 || tmp != ':')
test_error("Unexpected netstat format (%c)", tmp);
diff --git a/tools/testing/selftests/net/tcp_ao/lib/setup.c b/tools/testing/selftests/net/tcp_ao/lib/setup.c
index 92276f916f2f30..e408b9243b2c5a 100644
--- a/tools/testing/selftests/net/tcp_ao/lib/setup.c
+++ b/tools/testing/selftests/net/tcp_ao/lib/setup.c
@@ -17,37 +17,37 @@ static pthread_mutex_t ksft_print_lock = PTHREAD_MUTEX_INITIALIZER;
void __test_msg(const char *buf)
{
pthread_mutex_lock(&ksft_print_lock);
- ksft_print_msg(buf);
+ ksft_print_msg("%s", buf);
pthread_mutex_unlock(&ksft_print_lock);
}
void __test_ok(const char *buf)
{
pthread_mutex_lock(&ksft_print_lock);
- ksft_test_result_pass(buf);
+ ksft_test_result_pass("%s", buf);
pthread_mutex_unlock(&ksft_print_lock);
}
void __test_fail(const char *buf)
{
pthread_mutex_lock(&ksft_print_lock);
- ksft_test_result_fail(buf);
+ ksft_test_result_fail("%s", buf);
pthread_mutex_unlock(&ksft_print_lock);
}
void __test_xfail(const char *buf)
{
pthread_mutex_lock(&ksft_print_lock);
- ksft_test_result_xfail(buf);
+ ksft_test_result_xfail("%s", buf);
pthread_mutex_unlock(&ksft_print_lock);
}
void __test_error(const char *buf)
{
pthread_mutex_lock(&ksft_print_lock);
- ksft_test_result_error(buf);
+ ksft_test_result_error("%s", buf);
pthread_mutex_unlock(&ksft_print_lock);
}
void __test_skip(const char *buf)
{
pthread_mutex_lock(&ksft_print_lock);
- ksft_test_result_skip(buf);
+ ksft_test_result_skip("%s", buf);
pthread_mutex_unlock(&ksft_print_lock);
}
diff --git a/tools/testing/selftests/net/tcp_ao/rst.c b/tools/testing/selftests/net/tcp_ao/rst.c
index 7df8b8700e39e9..a2fe88d35ac06e 100644
--- a/tools/testing/selftests/net/tcp_ao/rst.c
+++ b/tools/testing/selftests/net/tcp_ao/rst.c
@@ -256,8 +256,6 @@ static int test_wait_fds(int sk[], size_t nr, bool is_writable[],
static void test_client_active_rst(unsigned int port)
{
- /* one in queue, another accept()ed */
- unsigned int wait_for = backlog + 2;
int i, sk[3], err;
bool is_writable[ARRAY_SIZE(sk)] = {false};
unsigned int last = ARRAY_SIZE(sk) - 1;
@@ -275,16 +273,20 @@ static void test_client_active_rst(unsigned int port)
for (i = 0; i < last; i++) {
err = _test_connect_socket(sk[i], this_ip_dest, port,
(i == 0) ? TEST_TIMEOUT_SEC : -1);
-
if (err < 0)
test_error("failed to connect()");
}
- synchronize_threads(); /* 2: connection accept()ed, another queued */
- err = test_wait_fds(sk, last, is_writable, wait_for, TEST_TIMEOUT_SEC);
+ synchronize_threads(); /* 2: two connections: one accept()ed, another queued */
+ err = test_wait_fds(sk, last, is_writable, last, TEST_TIMEOUT_SEC);
if (err < 0)
test_error("test_wait_fds(): %d", err);
+ /* async connect() with third sk to get into request_sock_queue */
+ err = _test_connect_socket(sk[last], this_ip_dest, port, -1);
+ if (err < 0)
+ test_error("failed to connect()");
+
synchronize_threads(); /* 3: close listen socket */
if (test_client_verify(sk[0], packet_sz, quota / packet_sz, TEST_TIMEOUT_SEC))
test_fail("Failed to send data on connected socket");
@@ -292,13 +294,14 @@ static void test_client_active_rst(unsigned int port)
test_ok("Verified established tcp connection");
synchronize_threads(); /* 4: finishing up */
- err = _test_connect_socket(sk[last], this_ip_dest, port, -1);
- if (err < 0)
- test_error("failed to connect()");
synchronize_threads(); /* 5: closed active sk */
- err = test_wait_fds(sk, ARRAY_SIZE(sk), NULL,
- wait_for, TEST_TIMEOUT_SEC);
+ /*
+ * Wait for 2 connections: one accepted, another in the accept queue,
+ * the one in request_sock_queue won't get fully established, so
+ * doesn't receive an active RST, see inet_csk_listen_stop().
+ */
+ err = test_wait_fds(sk, last, NULL, last, TEST_TIMEOUT_SEC);
if (err < 0)
test_error("select(): %d", err);
diff --git a/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c b/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c
index 452de131fa3a9c..517930f9721bd9 100644
--- a/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c
+++ b/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c
@@ -21,7 +21,7 @@ static void make_listen(int sk)
static void test_vefify_ao_info(int sk, struct tcp_ao_info_opt *info,
const char *tst)
{
- struct tcp_ao_info_opt tmp;
+ struct tcp_ao_info_opt tmp = {};
socklen_t len = sizeof(tmp);
if (getsockopt(sk, IPPROTO_TCP, TCP_AO_INFO, &tmp, &len))
diff --git a/tools/testing/selftests/net/test_vxlan_mdb.sh b/tools/testing/selftests/net/test_vxlan_mdb.sh
index 74ff9fb2a6f0e1..58da5de99ac451 100755
--- a/tools/testing/selftests/net/test_vxlan_mdb.sh
+++ b/tools/testing/selftests/net/test_vxlan_mdb.sh
@@ -1177,6 +1177,7 @@ encap_params_common()
local plen=$1; shift
local enc_ethtype=$1; shift
local grp=$1; shift
+ local grp_dmac=$1; shift
local src=$1; shift
local mz=$1; shift
@@ -1195,11 +1196,11 @@ encap_params_common()
run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep2_ip src_vni 10020"
run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_dst_ip $vtep1_ip action pass"
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Destination IP - match"
- run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.20 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Destination IP - no match"
@@ -1212,20 +1213,20 @@ encap_params_common()
run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip dst_port 1111 src_vni 10020"
run_cmd "tc -n $ns2 filter replace dev veth0 ingress pref 1 handle 101 proto $enc_ethtype flower ip_proto udp dst_port 4789 action pass"
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev veth0 ingress" 101 1
log_test $? 0 "Default destination port - match"
- run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.20 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev veth0 ingress" 101 1
log_test $? 0 "Default destination port - no match"
run_cmd "tc -n $ns2 filter replace dev veth0 ingress pref 1 handle 101 proto $enc_ethtype flower ip_proto udp dst_port 1111 action pass"
- run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.20 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev veth0 ingress" 101 1
log_test $? 0 "Non-default destination port - match"
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev veth0 ingress" 101 1
log_test $? 0 "Non-default destination port - no match"
@@ -1238,11 +1239,11 @@ encap_params_common()
run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip src_vni 10020"
run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_key_id 10010 action pass"
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Default destination VNI - match"
- run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.20 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Default destination VNI - no match"
@@ -1250,11 +1251,11 @@ encap_params_common()
run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip vni 10010 src_vni 10020"
run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_key_id 10020 action pass"
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Non-default destination VNI - match"
- run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.20 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Non-default destination VNI - no match"
@@ -1272,6 +1273,7 @@ encap_params_ipv4_ipv4()
local plen=32
local enc_ethtype="ip"
local grp=239.1.1.1
+ local grp_dmac=01:00:5e:01:01:01
local src=192.0.2.129
echo
@@ -1279,7 +1281,7 @@ encap_params_ipv4_ipv4()
echo "------------------------------------------------------------------"
encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \
- $grp $src "mausezahn"
+ $grp $grp_dmac $src "mausezahn"
}
encap_params_ipv6_ipv4()
@@ -1291,6 +1293,7 @@ encap_params_ipv6_ipv4()
local plen=32
local enc_ethtype="ip"
local grp=ff0e::1
+ local grp_dmac=33:33:00:00:00:01
local src=2001:db8:100::1
echo
@@ -1298,7 +1301,7 @@ encap_params_ipv6_ipv4()
echo "------------------------------------------------------------------"
encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \
- $grp $src "mausezahn -6"
+ $grp $grp_dmac $src "mausezahn -6"
}
encap_params_ipv4_ipv6()
@@ -1310,6 +1313,7 @@ encap_params_ipv4_ipv6()
local plen=128
local enc_ethtype="ipv6"
local grp=239.1.1.1
+ local grp_dmac=01:00:5e:01:01:01
local src=192.0.2.129
echo
@@ -1317,7 +1321,7 @@ encap_params_ipv4_ipv6()
echo "------------------------------------------------------------------"
encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \
- $grp $src "mausezahn"
+ $grp $grp_dmac $src "mausezahn"
}
encap_params_ipv6_ipv6()
@@ -1329,6 +1333,7 @@ encap_params_ipv6_ipv6()
local plen=128
local enc_ethtype="ipv6"
local grp=ff0e::1
+ local grp_dmac=33:33:00:00:00:01
local src=2001:db8:100::1
echo
@@ -1336,7 +1341,7 @@ encap_params_ipv6_ipv6()
echo "------------------------------------------------------------------"
encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \
- $grp $src "mausezahn -6"
+ $grp $grp_dmac $src "mausezahn -6"
}
starg_exclude_ir_common()
@@ -1347,6 +1352,7 @@ starg_exclude_ir_common()
local vtep2_ip=$1; shift
local plen=$1; shift
local grp=$1; shift
+ local grp_dmac=$1; shift
local valid_src=$1; shift
local invalid_src=$1; shift
local mz=$1; shift
@@ -1368,14 +1374,14 @@ starg_exclude_ir_common()
run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $invalid_src dst $vtep2_ip src_vni 10010"
# Check that invalid source is not forwarded to any VTEP.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 0
log_test $? 0 "Block excluded source - first VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 102 0
log_test $? 0 "Block excluded source - second VTEP"
# Check that valid source is forwarded to both VTEPs.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Forward valid source - first VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 102 1
@@ -1385,14 +1391,14 @@ starg_exclude_ir_common()
run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep2_ip src_vni 10010"
# Check that invalid source is not forwarded to any VTEP.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Block excluded source after removal - first VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 102 1
log_test $? 0 "Block excluded source after removal - second VTEP"
# Check that valid source is forwarded to the remaining VTEP.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 2
log_test $? 0 "Forward valid source after removal - first VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 102 1
@@ -1407,6 +1413,7 @@ starg_exclude_ir_ipv4_ipv4()
local vtep2_ip=198.51.100.200
local plen=32
local grp=239.1.1.1
+ local grp_dmac=01:00:5e:01:01:01
local valid_src=192.0.2.129
local invalid_src=192.0.2.145
@@ -1415,7 +1422,7 @@ starg_exclude_ir_ipv4_ipv4()
echo "-------------------------------------------------------------"
starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
- $valid_src $invalid_src "mausezahn"
+ $grp_dmac $valid_src $invalid_src "mausezahn"
}
starg_exclude_ir_ipv6_ipv4()
@@ -1426,6 +1433,7 @@ starg_exclude_ir_ipv6_ipv4()
local vtep2_ip=198.51.100.200
local plen=32
local grp=ff0e::1
+ local grp_dmac=33:33:00:00:00:01
local valid_src=2001:db8:100::1
local invalid_src=2001:db8:200::1
@@ -1434,7 +1442,7 @@ starg_exclude_ir_ipv6_ipv4()
echo "-------------------------------------------------------------"
starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
- $valid_src $invalid_src "mausezahn -6"
+ $grp_dmac $valid_src $invalid_src "mausezahn -6"
}
starg_exclude_ir_ipv4_ipv6()
@@ -1445,6 +1453,7 @@ starg_exclude_ir_ipv4_ipv6()
local vtep2_ip=2001:db8:2000::1
local plen=128
local grp=239.1.1.1
+ local grp_dmac=01:00:5e:01:01:01
local valid_src=192.0.2.129
local invalid_src=192.0.2.145
@@ -1453,7 +1462,7 @@ starg_exclude_ir_ipv4_ipv6()
echo "-------------------------------------------------------------"
starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
- $valid_src $invalid_src "mausezahn"
+ $grp_dmac $valid_src $invalid_src "mausezahn"
}
starg_exclude_ir_ipv6_ipv6()
@@ -1464,6 +1473,7 @@ starg_exclude_ir_ipv6_ipv6()
local vtep2_ip=2001:db8:2000::1
local plen=128
local grp=ff0e::1
+ local grp_dmac=33:33:00:00:00:01
local valid_src=2001:db8:100::1
local invalid_src=2001:db8:200::1
@@ -1472,7 +1482,7 @@ starg_exclude_ir_ipv6_ipv6()
echo "-------------------------------------------------------------"
starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
- $valid_src $invalid_src "mausezahn -6"
+ $grp_dmac $valid_src $invalid_src "mausezahn -6"
}
starg_include_ir_common()
@@ -1483,6 +1493,7 @@ starg_include_ir_common()
local vtep2_ip=$1; shift
local plen=$1; shift
local grp=$1; shift
+ local grp_dmac=$1; shift
local valid_src=$1; shift
local invalid_src=$1; shift
local mz=$1; shift
@@ -1504,14 +1515,14 @@ starg_include_ir_common()
run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode include source_list $valid_src dst $vtep2_ip src_vni 10010"
# Check that invalid source is not forwarded to any VTEP.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 0
log_test $? 0 "Block excluded source - first VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 102 0
log_test $? 0 "Block excluded source - second VTEP"
# Check that valid source is forwarded to both VTEPs.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Forward valid source - first VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 102 1
@@ -1521,14 +1532,14 @@ starg_include_ir_common()
run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep2_ip src_vni 10010"
# Check that invalid source is not forwarded to any VTEP.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Block excluded source after removal - first VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 102 1
log_test $? 0 "Block excluded source after removal - second VTEP"
# Check that valid source is forwarded to the remaining VTEP.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 2
log_test $? 0 "Forward valid source after removal - first VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 102 1
@@ -1543,6 +1554,7 @@ starg_include_ir_ipv4_ipv4()
local vtep2_ip=198.51.100.200
local plen=32
local grp=239.1.1.1
+ local grp_dmac=01:00:5e:01:01:01
local valid_src=192.0.2.129
local invalid_src=192.0.2.145
@@ -1551,7 +1563,7 @@ starg_include_ir_ipv4_ipv4()
echo "-------------------------------------------------------------"
starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
- $valid_src $invalid_src "mausezahn"
+ $grp_dmac $valid_src $invalid_src "mausezahn"
}
starg_include_ir_ipv6_ipv4()
@@ -1562,6 +1574,7 @@ starg_include_ir_ipv6_ipv4()
local vtep2_ip=198.51.100.200
local plen=32
local grp=ff0e::1
+ local grp_dmac=33:33:00:00:00:01
local valid_src=2001:db8:100::1
local invalid_src=2001:db8:200::1
@@ -1570,7 +1583,7 @@ starg_include_ir_ipv6_ipv4()
echo "-------------------------------------------------------------"
starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
- $valid_src $invalid_src "mausezahn -6"
+ $grp_dmac $valid_src $invalid_src "mausezahn -6"
}
starg_include_ir_ipv4_ipv6()
@@ -1581,6 +1594,7 @@ starg_include_ir_ipv4_ipv6()
local vtep2_ip=2001:db8:2000::1
local plen=128
local grp=239.1.1.1
+ local grp_dmac=01:00:5e:01:01:01
local valid_src=192.0.2.129
local invalid_src=192.0.2.145
@@ -1589,7 +1603,7 @@ starg_include_ir_ipv4_ipv6()
echo "-------------------------------------------------------------"
starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
- $valid_src $invalid_src "mausezahn"
+ $grp_dmac $valid_src $invalid_src "mausezahn"
}
starg_include_ir_ipv6_ipv6()
@@ -1600,6 +1614,7 @@ starg_include_ir_ipv6_ipv6()
local vtep2_ip=2001:db8:2000::1
local plen=128
local grp=ff0e::1
+ local grp_dmac=33:33:00:00:00:01
local valid_src=2001:db8:100::1
local invalid_src=2001:db8:200::1
@@ -1608,7 +1623,7 @@ starg_include_ir_ipv6_ipv6()
echo "-------------------------------------------------------------"
starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
- $valid_src $invalid_src "mausezahn -6"
+ $grp_dmac $valid_src $invalid_src "mausezahn -6"
}
starg_exclude_p2mp_common()
@@ -1618,6 +1633,7 @@ starg_exclude_p2mp_common()
local mcast_grp=$1; shift
local plen=$1; shift
local grp=$1; shift
+ local grp_dmac=$1; shift
local valid_src=$1; shift
local invalid_src=$1; shift
local mz=$1; shift
@@ -1635,12 +1651,12 @@ starg_exclude_p2mp_common()
run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $invalid_src dst $mcast_grp src_vni 10010 via veth0"
# Check that invalid source is not forwarded.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 0
log_test $? 0 "Block excluded source"
# Check that valid source is forwarded.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Forward valid source"
@@ -1648,7 +1664,7 @@ starg_exclude_p2mp_common()
run_cmd "ip -n $ns2 address del $mcast_grp/$plen dev veth0"
# Check that valid source is not received anymore.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Receive of valid source after removal from group"
}
@@ -1660,6 +1676,7 @@ starg_exclude_p2mp_ipv4_ipv4()
local mcast_grp=238.1.1.1
local plen=32
local grp=239.1.1.1
+ local grp_dmac=01:00:5e:01:01:01
local valid_src=192.0.2.129
local invalid_src=192.0.2.145
@@ -1667,7 +1684,7 @@ starg_exclude_p2mp_ipv4_ipv4()
echo "Data path: (*, G) EXCLUDE - P2MP - IPv4 overlay / IPv4 underlay"
echo "---------------------------------------------------------------"
- starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
+ starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \
$valid_src $invalid_src "mausezahn"
}
@@ -1678,6 +1695,7 @@ starg_exclude_p2mp_ipv6_ipv4()
local mcast_grp=238.1.1.1
local plen=32
local grp=ff0e::1
+ local grp_dmac=33:33:00:00:00:01
local valid_src=2001:db8:100::1
local invalid_src=2001:db8:200::1
@@ -1685,7 +1703,7 @@ starg_exclude_p2mp_ipv6_ipv4()
echo "Data path: (*, G) EXCLUDE - P2MP - IPv6 overlay / IPv4 underlay"
echo "---------------------------------------------------------------"
- starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
+ starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \
$valid_src $invalid_src "mausezahn -6"
}
@@ -1696,6 +1714,7 @@ starg_exclude_p2mp_ipv4_ipv6()
local mcast_grp=ff0e::2
local plen=128
local grp=239.1.1.1
+ local grp_dmac=01:00:5e:01:01:01
local valid_src=192.0.2.129
local invalid_src=192.0.2.145
@@ -1703,7 +1722,7 @@ starg_exclude_p2mp_ipv4_ipv6()
echo "Data path: (*, G) EXCLUDE - P2MP - IPv4 overlay / IPv6 underlay"
echo "---------------------------------------------------------------"
- starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
+ starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \
$valid_src $invalid_src "mausezahn"
}
@@ -1714,6 +1733,7 @@ starg_exclude_p2mp_ipv6_ipv6()
local mcast_grp=ff0e::2
local plen=128
local grp=ff0e::1
+ local grp_dmac=33:33:00:00:00:01
local valid_src=2001:db8:100::1
local invalid_src=2001:db8:200::1
@@ -1721,7 +1741,7 @@ starg_exclude_p2mp_ipv6_ipv6()
echo "Data path: (*, G) EXCLUDE - P2MP - IPv6 overlay / IPv6 underlay"
echo "---------------------------------------------------------------"
- starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
+ starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \
$valid_src $invalid_src "mausezahn -6"
}
@@ -1732,6 +1752,7 @@ starg_include_p2mp_common()
local mcast_grp=$1; shift
local plen=$1; shift
local grp=$1; shift
+ local grp_dmac=$1; shift
local valid_src=$1; shift
local invalid_src=$1; shift
local mz=$1; shift
@@ -1749,12 +1770,12 @@ starg_include_p2mp_common()
run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode include source_list $valid_src dst $mcast_grp src_vni 10010 via veth0"
# Check that invalid source is not forwarded.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 0
log_test $? 0 "Block excluded source"
# Check that valid source is forwarded.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Forward valid source"
@@ -1762,7 +1783,7 @@ starg_include_p2mp_common()
run_cmd "ip -n $ns2 address del $mcast_grp/$plen dev veth0"
# Check that valid source is not received anymore.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Receive of valid source after removal from group"
}
@@ -1774,6 +1795,7 @@ starg_include_p2mp_ipv4_ipv4()
local mcast_grp=238.1.1.1
local plen=32
local grp=239.1.1.1
+ local grp_dmac=01:00:5e:01:01:01
local valid_src=192.0.2.129
local invalid_src=192.0.2.145
@@ -1781,7 +1803,7 @@ starg_include_p2mp_ipv4_ipv4()
echo "Data path: (*, G) INCLUDE - P2MP - IPv4 overlay / IPv4 underlay"
echo "---------------------------------------------------------------"
- starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
+ starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \
$valid_src $invalid_src "mausezahn"
}
@@ -1792,6 +1814,7 @@ starg_include_p2mp_ipv6_ipv4()
local mcast_grp=238.1.1.1
local plen=32
local grp=ff0e::1
+ local grp_dmac=33:33:00:00:00:01
local valid_src=2001:db8:100::1
local invalid_src=2001:db8:200::1
@@ -1799,7 +1822,7 @@ starg_include_p2mp_ipv6_ipv4()
echo "Data path: (*, G) INCLUDE - P2MP - IPv6 overlay / IPv4 underlay"
echo "---------------------------------------------------------------"
- starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
+ starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \
$valid_src $invalid_src "mausezahn -6"
}
@@ -1810,6 +1833,7 @@ starg_include_p2mp_ipv4_ipv6()
local mcast_grp=ff0e::2
local plen=128
local grp=239.1.1.1
+ local grp_dmac=01:00:5e:01:01:01
local valid_src=192.0.2.129
local invalid_src=192.0.2.145
@@ -1817,7 +1841,7 @@ starg_include_p2mp_ipv4_ipv6()
echo "Data path: (*, G) INCLUDE - P2MP - IPv4 overlay / IPv6 underlay"
echo "---------------------------------------------------------------"
- starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
+ starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \
$valid_src $invalid_src "mausezahn"
}
@@ -1828,6 +1852,7 @@ starg_include_p2mp_ipv6_ipv6()
local mcast_grp=ff0e::2
local plen=128
local grp=ff0e::1
+ local grp_dmac=33:33:00:00:00:01
local valid_src=2001:db8:100::1
local invalid_src=2001:db8:200::1
@@ -1835,7 +1860,7 @@ starg_include_p2mp_ipv6_ipv6()
echo "Data path: (*, G) INCLUDE - P2MP - IPv6 overlay / IPv6 underlay"
echo "---------------------------------------------------------------"
- starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
+ starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \
$valid_src $invalid_src "mausezahn -6"
}
@@ -1847,6 +1872,7 @@ egress_vni_translation_common()
local plen=$1; shift
local proto=$1; shift
local grp=$1; shift
+ local grp_dmac=$1; shift
local src=$1; shift
local mz=$1; shift
@@ -1882,20 +1908,20 @@ egress_vni_translation_common()
# Make sure that packets sent from the first VTEP over VLAN 10 are
# received by the SVI corresponding to the L3VNI (14000 / VLAN 4000) on
# the second VTEP, since it is configured as PVID.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev br0.4000 ingress" 101 1
log_test $? 0 "Egress VNI translation - PVID configured"
# Remove PVID flag from VLAN 4000 on the second VTEP and make sure
# packets are no longer received by the SVI interface.
run_cmd "bridge -n $ns2 vlan add vid 4000 dev vx0"
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev br0.4000 ingress" 101 1
log_test $? 0 "Egress VNI translation - no PVID configured"
# Reconfigure the PVID and make sure packets are received again.
run_cmd "bridge -n $ns2 vlan add vid 4000 dev vx0 pvid"
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev br0.4000 ingress" 101 2
log_test $? 0 "Egress VNI translation - PVID reconfigured"
}
@@ -1908,6 +1934,7 @@ egress_vni_translation_ipv4_ipv4()
local plen=32
local proto="ipv4"
local grp=239.1.1.1
+ local grp_dmac=01:00:5e:01:01:01
local src=192.0.2.129
echo
@@ -1915,7 +1942,7 @@ egress_vni_translation_ipv4_ipv4()
echo "----------------------------------------------------------------"
egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \
- $src "mausezahn"
+ $grp_dmac $src "mausezahn"
}
egress_vni_translation_ipv6_ipv4()
@@ -1926,6 +1953,7 @@ egress_vni_translation_ipv6_ipv4()
local plen=32
local proto="ipv6"
local grp=ff0e::1
+ local grp_dmac=33:33:00:00:00:01
local src=2001:db8:100::1
echo
@@ -1933,7 +1961,7 @@ egress_vni_translation_ipv6_ipv4()
echo "----------------------------------------------------------------"
egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \
- $src "mausezahn -6"
+ $grp_dmac $src "mausezahn -6"
}
egress_vni_translation_ipv4_ipv6()
@@ -1944,6 +1972,7 @@ egress_vni_translation_ipv4_ipv6()
local plen=128
local proto="ipv4"
local grp=239.1.1.1
+ local grp_dmac=01:00:5e:01:01:01
local src=192.0.2.129
echo
@@ -1951,7 +1980,7 @@ egress_vni_translation_ipv4_ipv6()
echo "----------------------------------------------------------------"
egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \
- $src "mausezahn"
+ $grp_dmac $src "mausezahn"
}
egress_vni_translation_ipv6_ipv6()
@@ -1962,6 +1991,7 @@ egress_vni_translation_ipv6_ipv6()
local plen=128
local proto="ipv6"
local grp=ff0e::1
+ local grp_dmac=33:33:00:00:00:01
local src=2001:db8:100::1
echo
@@ -1969,7 +1999,7 @@ egress_vni_translation_ipv6_ipv6()
echo "----------------------------------------------------------------"
egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \
- $src "mausezahn -6"
+ $grp_dmac $src "mausezahn -6"
}
all_zeros_mdb_common()
@@ -1982,12 +2012,18 @@ all_zeros_mdb_common()
local vtep4_ip=$1; shift
local plen=$1; shift
local ipv4_grp=239.1.1.1
+ local ipv4_grp_dmac=01:00:5e:01:01:01
local ipv4_unreg_grp=239.2.2.2
+ local ipv4_unreg_grp_dmac=01:00:5e:02:02:02
local ipv4_ll_grp=224.0.0.100
+ local ipv4_ll_grp_dmac=01:00:5e:00:00:64
local ipv4_src=192.0.2.129
local ipv6_grp=ff0e::1
+ local ipv6_grp_dmac=33:33:00:00:00:01
local ipv6_unreg_grp=ff0e::2
+ local ipv6_unreg_grp_dmac=33:33:00:00:00:02
local ipv6_ll_grp=ff02::1
+ local ipv6_ll_grp_dmac=33:33:00:00:00:01
local ipv6_src=2001:db8:100::1
# Install all-zeros (catchall) MDB entries for IPv4 and IPv6 traffic
@@ -2023,7 +2059,7 @@ all_zeros_mdb_common()
# Send registered IPv4 multicast and make sure it only arrives to the
# first VTEP.
- run_cmd "ip netns exec $ns1 mausezahn br0.10 -A $ipv4_src -B $ipv4_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 mausezahn br0.10 -a own -b $ipv4_grp_dmac -A $ipv4_src -B $ipv4_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "Registered IPv4 multicast - first VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 102 0
@@ -2031,7 +2067,7 @@ all_zeros_mdb_common()
# Send unregistered IPv4 multicast that is not link-local and make sure
# it arrives to the first and second VTEPs.
- run_cmd "ip netns exec $ns1 mausezahn br0.10 -A $ipv4_src -B $ipv4_unreg_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 mausezahn br0.10 -a own -b $ipv4_unreg_grp_dmac -A $ipv4_src -B $ipv4_unreg_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 2
log_test $? 0 "Unregistered IPv4 multicast - first VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 102 1
@@ -2039,7 +2075,7 @@ all_zeros_mdb_common()
# Send IPv4 link-local multicast traffic and make sure it does not
# arrive to any VTEP.
- run_cmd "ip netns exec $ns1 mausezahn br0.10 -A $ipv4_src -B $ipv4_ll_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 mausezahn br0.10 -a own -b $ipv4_ll_grp_dmac -A $ipv4_src -B $ipv4_ll_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 2
log_test $? 0 "Link-local IPv4 multicast - first VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 102 1
@@ -2074,7 +2110,7 @@ all_zeros_mdb_common()
# Send registered IPv6 multicast and make sure it only arrives to the
# third VTEP.
- run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -A $ipv6_src -B $ipv6_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -a own -b $ipv6_grp_dmac -A $ipv6_src -B $ipv6_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 103 1
log_test $? 0 "Registered IPv6 multicast - third VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 104 0
@@ -2082,7 +2118,7 @@ all_zeros_mdb_common()
# Send unregistered IPv6 multicast that is not link-local and make sure
# it arrives to the third and fourth VTEPs.
- run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -A $ipv6_src -B $ipv6_unreg_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -a own -b $ipv6_unreg_grp_dmac -A $ipv6_src -B $ipv6_unreg_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 103 2
log_test $? 0 "Unregistered IPv6 multicast - third VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 104 1
@@ -2090,7 +2126,7 @@ all_zeros_mdb_common()
# Send IPv6 link-local multicast traffic and make sure it does not
# arrive to any VTEP.
- run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -A $ipv6_src -B $ipv6_ll_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -a own -b $ipv6_ll_grp_dmac -A $ipv6_src -B $ipv6_ll_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 103 2
log_test $? 0 "Link-local IPv6 multicast - third VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 104 1
@@ -2165,6 +2201,7 @@ mdb_fdb_common()
local plen=$1; shift
local proto=$1; shift
local grp=$1; shift
+ local grp_dmac=$1; shift
local src=$1; shift
local mz=$1; shift
@@ -2188,7 +2225,7 @@ mdb_fdb_common()
# Send IP multicast traffic and make sure it is forwarded by the MDB
# and only arrives to the first VTEP.
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "IP multicast - first VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 102 0
@@ -2205,7 +2242,7 @@ mdb_fdb_common()
# Remove the MDB entry and make sure that IP multicast is now forwarded
# by the FDB to the second VTEP.
run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep1_ip src_vni 10010"
- run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
tc_check_packets "$ns2" "dev vx0 ingress" 101 1
log_test $? 0 "IP multicast after removal - first VTEP"
tc_check_packets "$ns2" "dev vx0 ingress" 102 2
@@ -2221,14 +2258,15 @@ mdb_fdb_ipv4_ipv4()
local plen=32
local proto="ipv4"
local grp=239.1.1.1
+ local grp_dmac=01:00:5e:01:01:01
local src=192.0.2.129
echo
echo "Data path: MDB with FDB - IPv4 overlay / IPv4 underlay"
echo "------------------------------------------------------"
- mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \
- "mausezahn"
+ mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp \
+ $grp_dmac $src "mausezahn"
}
mdb_fdb_ipv6_ipv4()
@@ -2240,14 +2278,15 @@ mdb_fdb_ipv6_ipv4()
local plen=32
local proto="ipv6"
local grp=ff0e::1
+ local grp_dmac=33:33:00:00:00:01
local src=2001:db8:100::1
echo
echo "Data path: MDB with FDB - IPv6 overlay / IPv4 underlay"
echo "------------------------------------------------------"
- mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \
- "mausezahn -6"
+ mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp \
+ $grp_dmac $src "mausezahn -6"
}
mdb_fdb_ipv4_ipv6()
@@ -2259,14 +2298,15 @@ mdb_fdb_ipv4_ipv6()
local plen=128
local proto="ipv4"
local grp=239.1.1.1
+ local grp_dmac=01:00:5e:01:01:01
local src=192.0.2.129
echo
echo "Data path: MDB with FDB - IPv4 overlay / IPv6 underlay"
echo "------------------------------------------------------"
- mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \
- "mausezahn"
+ mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp \
+ $grp_dmac $src "mausezahn"
}
mdb_fdb_ipv6_ipv6()
@@ -2278,14 +2318,15 @@ mdb_fdb_ipv6_ipv6()
local plen=128
local proto="ipv6"
local grp=ff0e::1
+ local grp_dmac=33:33:00:00:00:01
local src=2001:db8:100::1
echo
echo "Data path: MDB with FDB - IPv6 overlay / IPv6 underlay"
echo "------------------------------------------------------"
- mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \
- "mausezahn -6"
+ mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp \
+ $grp_dmac $src "mausezahn -6"
}
mdb_grp1_loop()
@@ -2320,7 +2361,9 @@ mdb_torture_common()
local vtep1_ip=$1; shift
local vtep2_ip=$1; shift
local grp1=$1; shift
+ local grp1_dmac=$1; shift
local grp2=$1; shift
+ local grp2_dmac=$1; shift
local src=$1; shift
local mz=$1; shift
local pid1
@@ -2345,9 +2388,9 @@ mdb_torture_common()
pid1=$!
mdb_grp2_loop $ns1 $vtep1_ip $vtep2_ip $grp2 &
pid2=$!
- ip netns exec $ns1 $mz br0.10 -A $src -B $grp1 -t udp sp=12345,dp=54321 -p 100 -c 0 -q &
+ ip netns exec $ns1 $mz br0.10 -a own -b $grp1_dmac -A $src -B $grp1 -t udp sp=12345,dp=54321 -p 100 -c 0 -q &
pid3=$!
- ip netns exec $ns1 $mz br0.10 -A $src -B $grp2 -t udp sp=12345,dp=54321 -p 100 -c 0 -q &
+ ip netns exec $ns1 $mz br0.10 -a own -b $grp2_dmac -A $src -B $grp2 -t udp sp=12345,dp=54321 -p 100 -c 0 -q &
pid4=$!
sleep 30
@@ -2363,15 +2406,17 @@ mdb_torture_ipv4_ipv4()
local vtep1_ip=198.51.100.100
local vtep2_ip=198.51.100.200
local grp1=239.1.1.1
+ local grp1_dmac=01:00:5e:01:01:01
local grp2=239.2.2.2
+ local grp2_dmac=01:00:5e:02:02:02
local src=192.0.2.129
echo
echo "Data path: MDB torture test - IPv4 overlay / IPv4 underlay"
echo "----------------------------------------------------------"
- mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \
- "mausezahn"
+ mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp1_dmac $grp2 \
+ $grp2_dmac $src "mausezahn"
}
mdb_torture_ipv6_ipv4()
@@ -2380,15 +2425,17 @@ mdb_torture_ipv6_ipv4()
local vtep1_ip=198.51.100.100
local vtep2_ip=198.51.100.200
local grp1=ff0e::1
+ local grp1_dmac=33:33:00:00:00:01
local grp2=ff0e::2
+ local grp2_dmac=33:33:00:00:00:02
local src=2001:db8:100::1
echo
echo "Data path: MDB torture test - IPv6 overlay / IPv4 underlay"
echo "----------------------------------------------------------"
- mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \
- "mausezahn -6"
+ mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp1_dmac $grp2 \
+ $grp2_dmac $src "mausezahn -6"
}
mdb_torture_ipv4_ipv6()
@@ -2397,15 +2444,17 @@ mdb_torture_ipv4_ipv6()
local vtep1_ip=2001:db8:1000::1
local vtep2_ip=2001:db8:2000::1
local grp1=239.1.1.1
+ local grp1_dmac=01:00:5e:01:01:01
local grp2=239.2.2.2
+ local grp2_dmac=01:00:5e:02:02:02
local src=192.0.2.129
echo
echo "Data path: MDB torture test - IPv4 overlay / IPv6 underlay"
echo "----------------------------------------------------------"
- mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \
- "mausezahn"
+ mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp1_dmac $grp2 \
+ $grp2_dmac $src "mausezahn"
}
mdb_torture_ipv6_ipv6()
@@ -2414,15 +2463,17 @@ mdb_torture_ipv6_ipv6()
local vtep1_ip=2001:db8:1000::1
local vtep2_ip=2001:db8:2000::1
local grp1=ff0e::1
+ local grp1_dmac=33:33:00:00:00:01
local grp2=ff0e::2
+ local grp2_dmac=33:33:00:00:00:02
local src=2001:db8:100::1
echo
echo "Data path: MDB torture test - IPv6 overlay / IPv6 underlay"
echo "----------------------------------------------------------"
- mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \
- "mausezahn -6"
+ mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp1_dmac $grp2 \
+ $grp2_dmac $src "mausezahn -6"
}
################################################################################
diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c
index c6eda21cefb6b8..f27a12d2a2c997 100644
--- a/tools/testing/selftests/net/tls.c
+++ b/tools/testing/selftests/net/tls.c
@@ -1615,6 +1615,40 @@ TEST_F(tls, getsockopt)
EXPECT_EQ(errno, EINVAL);
}
+TEST_F(tls, recv_efault)
+{
+ char *rec1 = "1111111111";
+ char *rec2 = "2222222222";
+ struct msghdr hdr = {};
+ struct iovec iov[2];
+ char recv_mem[12];
+ int ret;
+
+ if (self->notls)
+ SKIP(return, "no TLS support");
+
+ EXPECT_EQ(send(self->fd, rec1, 10, 0), 10);
+ EXPECT_EQ(send(self->fd, rec2, 10, 0), 10);
+
+ iov[0].iov_base = recv_mem;
+ iov[0].iov_len = sizeof(recv_mem);
+ iov[1].iov_base = NULL; /* broken iov to make process_rx_list fail */
+ iov[1].iov_len = 1;
+
+ hdr.msg_iovlen = 2;
+ hdr.msg_iov = iov;
+
+ EXPECT_EQ(recv(self->cfd, recv_mem, 1, 0), 1);
+ EXPECT_EQ(recv_mem[0], rec1[0]);
+
+ ret = recvmsg(self->cfd, &hdr, 0);
+ EXPECT_LE(ret, sizeof(recv_mem));
+ EXPECT_GE(ret, 9);
+ EXPECT_EQ(memcmp(rec1, recv_mem, 9), 0);
+ if (ret > 9)
+ EXPECT_EQ(memcmp(rec2, recv_mem + 9, ret - 9), 0);
+}
+
FIXTURE(tls_err)
{
int fd, cfd;
diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh
index 380cb15e942e42..83ed987cff340e 100755
--- a/tools/testing/selftests/net/udpgro_fwd.sh
+++ b/tools/testing/selftests/net/udpgro_fwd.sh
@@ -244,7 +244,7 @@ for family in 4 6; do
create_vxlan_pair
ip netns exec $NS_DST ethtool -K veth$DST generic-receive-offload on
ip netns exec $NS_DST ethtool -K veth$DST rx-gro-list on
- run_test "GRO frag list over UDP tunnel" $OL_NET$DST 1 1
+ run_test "GRO frag list over UDP tunnel" $OL_NET$DST 10 10
cleanup
# use NAT to circumvent GRO FWD check
@@ -258,13 +258,7 @@ for family in 4 6; do
# load arp cache before running the test to reduce the amount of
# stray traffic on top of the UDP tunnel
ip netns exec $NS_SRC $PING -q -c 1 $OL_NET$DST_NAT >/dev/null
- run_test "GRO fwd over UDP tunnel" $OL_NET$DST_NAT 1 1 $OL_NET$DST
- cleanup
-
- create_vxlan_pair
- run_bench "UDP tunnel fwd perf" $OL_NET$DST
- ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on
- run_bench "UDP tunnel GRO fwd perf" $OL_NET$DST
+ run_test "GRO fwd over UDP tunnel" $OL_NET$DST_NAT 10 10 $OL_NET$DST
cleanup
done
diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c
index 1d975bf52af339..85b3baa3f7f341 100644
--- a/tools/testing/selftests/net/udpgso.c
+++ b/tools/testing/selftests/net/udpgso.c
@@ -34,7 +34,7 @@
#endif
#ifndef UDP_MAX_SEGMENTS
-#define UDP_MAX_SEGMENTS (1 << 6UL)
+#define UDP_MAX_SEGMENTS (1 << 7UL)
#endif
#define CONST_MTU_TEST 1500
diff --git a/tools/testing/selftests/seccomp/settings b/tools/testing/selftests/seccomp/settings
index 6091b45d226baf..a953c96aa16e1e 100644
--- a/tools/testing/selftests/seccomp/settings
+++ b/tools/testing/selftests/seccomp/settings
@@ -1 +1 @@
-timeout=120
+timeout=180
diff --git a/tools/testing/selftests/timers/posix_timers.c b/tools/testing/selftests/timers/posix_timers.c
index d49dd3ffd0d96a..c001dd79179d5d 100644
--- a/tools/testing/selftests/timers/posix_timers.c
+++ b/tools/testing/selftests/timers/posix_timers.c
@@ -66,7 +66,7 @@ static int check_diff(struct timeval start, struct timeval end)
diff = end.tv_usec - start.tv_usec;
diff += (end.tv_sec - start.tv_sec) * USECS_PER_SEC;
- if (abs(diff - DELAY * USECS_PER_SEC) > USECS_PER_SEC / 2) {
+ if (llabs(diff - DELAY * USECS_PER_SEC) > USECS_PER_SEC / 2) {
printf("Diff too high: %lld..", diff);
return -1;
}
@@ -184,80 +184,71 @@ static int check_timer_create(int which)
return 0;
}
-int remain;
-__thread int got_signal;
+static pthread_t ctd_thread;
+static volatile int ctd_count, ctd_failed;
-static void *distribution_thread(void *arg)
+static void ctd_sighandler(int sig)
{
- while (__atomic_load_n(&remain, __ATOMIC_RELAXED));
- return NULL;
+ if (pthread_self() != ctd_thread)
+ ctd_failed = 1;
+ ctd_count--;
}
-static void distribution_handler(int nr)
+static void *ctd_thread_func(void *arg)
{
- if (!__atomic_exchange_n(&got_signal, 1, __ATOMIC_RELAXED))
- __atomic_fetch_sub(&remain, 1, __ATOMIC_RELAXED);
-}
-
-/*
- * Test that all running threads _eventually_ receive CLOCK_PROCESS_CPUTIME_ID
- * timer signals. This primarily tests that the kernel does not favour any one.
- */
-static int check_timer_distribution(void)
-{
- int err, i;
- timer_t id;
- const int nthreads = 10;
- pthread_t threads[nthreads];
struct itimerspec val = {
.it_value.tv_sec = 0,
.it_value.tv_nsec = 1000 * 1000,
.it_interval.tv_sec = 0,
.it_interval.tv_nsec = 1000 * 1000,
};
+ timer_t id;
- remain = nthreads + 1; /* worker threads + this thread */
- signal(SIGALRM, distribution_handler);
- err = timer_create(CLOCK_PROCESS_CPUTIME_ID, NULL, &id);
- if (err < 0) {
- ksft_perror("Can't create timer");
- return -1;
- }
- err = timer_settime(id, 0, &val, NULL);
- if (err < 0) {
- ksft_perror("Can't set timer");
- return -1;
- }
+ /* 1/10 seconds to ensure the leader sleeps */
+ usleep(10000);
- for (i = 0; i < nthreads; i++) {
- err = pthread_create(&threads[i], NULL, distribution_thread,
- NULL);
- if (err) {
- ksft_print_msg("Can't create thread: %s (%d)\n",
- strerror(errno), errno);
- return -1;
- }
- }
+ ctd_count = 100;
+ if (timer_create(CLOCK_PROCESS_CPUTIME_ID, NULL, &id))
+ return "Can't create timer\n";
+ if (timer_settime(id, 0, &val, NULL))
+ return "Can't set timer\n";
- /* Wait for all threads to receive the signal. */
- while (__atomic_load_n(&remain, __ATOMIC_RELAXED));
+ while (ctd_count > 0 && !ctd_failed)
+ ;
- for (i = 0; i < nthreads; i++) {
- err = pthread_join(threads[i], NULL);
- if (err) {
- ksft_print_msg("Can't join thread: %s (%d)\n",
- strerror(errno), errno);
- return -1;
- }
- }
+ if (timer_delete(id))
+ return "Can't delete timer\n";
- if (timer_delete(id)) {
- ksft_perror("Can't delete timer");
- return -1;
- }
+ return NULL;
+}
+
+/*
+ * Test that only the running thread receives the timer signal.
+ */
+static int check_timer_distribution(void)
+{
+ const char *errmsg;
- ksft_test_result_pass("check_timer_distribution\n");
+ signal(SIGALRM, ctd_sighandler);
+
+ errmsg = "Can't create thread\n";
+ if (pthread_create(&ctd_thread, NULL, ctd_thread_func, NULL))
+ goto err;
+
+ errmsg = "Can't join thread\n";
+ if (pthread_join(ctd_thread, (void **)&errmsg) || errmsg)
+ goto err;
+
+ if (!ctd_failed)
+ ksft_test_result_pass("check signal distribution\n");
+ else if (ksft_min_kernel_version(6, 3))
+ ksft_test_result_fail("check signal distribution\n");
+ else
+ ksft_test_result_skip("check signal distribution (old kernel)\n");
return 0;
+err:
+ ksft_print_msg("%s", errmsg);
+ return -1;
}
int main(int argc, char **argv)
diff --git a/tools/testing/selftests/timers/valid-adjtimex.c b/tools/testing/selftests/timers/valid-adjtimex.c
index 48b9a803235a80..d13ebde203221a 100644
--- a/tools/testing/selftests/timers/valid-adjtimex.c
+++ b/tools/testing/selftests/timers/valid-adjtimex.c
@@ -21,9 +21,6 @@
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
-
-
-
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
@@ -62,45 +59,47 @@ int clear_time_state(void)
#define NUM_FREQ_OUTOFRANGE 4
#define NUM_FREQ_INVALID 2
+#define SHIFTED_PPM (1 << 16)
+
long valid_freq[NUM_FREQ_VALID] = {
- -499<<16,
- -450<<16,
- -400<<16,
- -350<<16,
- -300<<16,
- -250<<16,
- -200<<16,
- -150<<16,
- -100<<16,
- -75<<16,
- -50<<16,
- -25<<16,
- -10<<16,
- -5<<16,
- -1<<16,
+ -499 * SHIFTED_PPM,
+ -450 * SHIFTED_PPM,
+ -400 * SHIFTED_PPM,
+ -350 * SHIFTED_PPM,
+ -300 * SHIFTED_PPM,
+ -250 * SHIFTED_PPM,
+ -200 * SHIFTED_PPM,
+ -150 * SHIFTED_PPM,
+ -100 * SHIFTED_PPM,
+ -75 * SHIFTED_PPM,
+ -50 * SHIFTED_PPM,
+ -25 * SHIFTED_PPM,
+ -10 * SHIFTED_PPM,
+ -5 * SHIFTED_PPM,
+ -1 * SHIFTED_PPM,
-1000,
- 1<<16,
- 5<<16,
- 10<<16,
- 25<<16,
- 50<<16,
- 75<<16,
- 100<<16,
- 150<<16,
- 200<<16,
- 250<<16,
- 300<<16,
- 350<<16,
- 400<<16,
- 450<<16,
- 499<<16,
+ 1 * SHIFTED_PPM,
+ 5 * SHIFTED_PPM,
+ 10 * SHIFTED_PPM,
+ 25 * SHIFTED_PPM,
+ 50 * SHIFTED_PPM,
+ 75 * SHIFTED_PPM,
+ 100 * SHIFTED_PPM,
+ 150 * SHIFTED_PPM,
+ 200 * SHIFTED_PPM,
+ 250 * SHIFTED_PPM,
+ 300 * SHIFTED_PPM,
+ 350 * SHIFTED_PPM,
+ 400 * SHIFTED_PPM,
+ 450 * SHIFTED_PPM,
+ 499 * SHIFTED_PPM,
};
long outofrange_freq[NUM_FREQ_OUTOFRANGE] = {
- -1000<<16,
- -550<<16,
- 550<<16,
- 1000<<16,
+ -1000 * SHIFTED_PPM,
+ -550 * SHIFTED_PPM,
+ 550 * SHIFTED_PPM,
+ 1000 * SHIFTED_PPM,
};
#define LONG_MAX (~0UL>>1)
diff --git a/tools/testing/selftests/turbostat/defcolumns.py b/tools/testing/selftests/turbostat/defcolumns.py
new file mode 100755
index 00000000000000..d9b042097da7ad
--- /dev/null
+++ b/tools/testing/selftests/turbostat/defcolumns.py
@@ -0,0 +1,60 @@
+#!/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import subprocess
+from shutil import which
+
+turbostat = which('turbostat')
+if turbostat is None:
+ print('Could not find turbostat binary')
+ exit(1)
+
+timeout = which('timeout')
+if timeout is None:
+ print('Could not find timeout binary')
+ exit(1)
+
+proc_turbostat = subprocess.run([turbostat, '--list'], capture_output = True)
+if proc_turbostat.returncode != 0:
+ print(f'turbostat failed with {proc_turbostat.returncode}')
+ exit(1)
+
+#
+# By default --list reports also "usec" and "Time_Of_Day_Seconds" columns
+# which are only visible when running with --debug.
+#
+expected_columns_debug = proc_turbostat.stdout.replace(b',', b'\t').strip()
+expected_columns = expected_columns_debug.replace(b'usec\t', b'').replace(b'Time_Of_Day_Seconds\t', b'').replace(b'X2APIC\t', b'').replace(b'APIC\t', b'')
+
+#
+# Run turbostat with no options for 10 seconds and send SIGINT
+#
+timeout_argv = [timeout, '--preserve-status', '-s', 'SIGINT', '-k', '3', '1s']
+turbostat_argv = [turbostat, '-i', '0.250']
+
+print(f'Running turbostat with {turbostat_argv=}... ', end = '', flush = True)
+proc_turbostat = subprocess.run(timeout_argv + turbostat_argv, capture_output = True)
+if proc_turbostat.returncode != 0:
+ print(f'turbostat failed with {proc_turbostat.returncode}')
+ exit(1)
+actual_columns = proc_turbostat.stdout.split(b'\n')[0]
+if expected_columns != actual_columns:
+ print(f'turbostat column check failed\n{expected_columns=}\n{actual_columns=}')
+ exit(1)
+print('OK')
+
+#
+# Same, but with --debug
+#
+turbostat_argv.append('--debug')
+
+print(f'Running turbostat with {turbostat_argv=}... ', end = '', flush = True)
+proc_turbostat = subprocess.run(timeout_argv + turbostat_argv, capture_output = True)
+if proc_turbostat.returncode != 0:
+ print(f'turbostat failed with {proc_turbostat.returncode}')
+ exit(1)
+actual_columns = proc_turbostat.stdout.split(b'\n')[0]
+if expected_columns_debug != actual_columns:
+ print(f'turbostat column check failed\n{expected_columns_debug=}\n{actual_columns=}')
+ exit(1)
+print('OK')