aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorStephen Rothwell <sfr@canb.auug.org.au>2024-01-08 09:33:34 +1100
committerStephen Rothwell <sfr@canb.auug.org.au>2024-01-08 09:33:34 +1100
commit41b81a87747024d4bacbaa36d7a85a5699508d92 (patch)
tree851a11f137be9464b91d5254ab595897f50a30ab
parentd59a04b8e254c13955af6f72a767574e6e432bae (diff)
parentdb32cf8e280b46726065c518e90761bb0229bacf (diff)
downloadlinux-next-41b81a87747024d4bacbaa36d7a85a5699508d92.tar.gz
Merge branch 'for-next/core' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux
-rw-r--r--Documentation/admin-guide/perf/dwc_pcie_pmu.rst94
-rw-r--r--Documentation/admin-guide/perf/imx-ddr.rst45
-rw-r--r--Documentation/admin-guide/perf/index.rst1
-rw-r--r--Documentation/arch/arm64/arm-acpi.rst2
-rw-r--r--Documentation/arch/arm64/perf.rst72
-rw-r--r--Documentation/devicetree/bindings/perf/fsl-imx-ddr.yaml3
-rw-r--r--MAINTAINERS7
-rw-r--r--arch/arm/kernel/perf_event_v6.c28
-rw-r--r--arch/arm/kernel/perf_event_v7.c50
-rw-r--r--arch/arm/kernel/perf_event_xscale.c44
-rw-r--r--arch/arm64/Kconfig2
-rw-r--r--arch/arm64/Makefile2
-rw-r--r--arch/arm64/boot/Makefile2
-rwxr-xr-xarch/arm64/boot/install.sh3
-rw-r--r--arch/arm64/include/asm/assembler.h2
-rw-r--r--arch/arm64/include/asm/cache.h6
-rw-r--r--arch/arm64/include/asm/cpufeature.h6
-rw-r--r--arch/arm64/include/asm/fpsimdmacros.h8
-rw-r--r--arch/arm64/include/asm/kernel-pgtable.h27
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h7
-rw-r--r--arch/arm64/include/asm/kvm_pgtable.h2
-rw-r--r--arch/arm64/include/asm/memory.h7
-rw-r--r--arch/arm64/include/asm/pgtable-prot.h2
-rw-r--r--arch/arm64/include/asm/processor.h3
-rw-r--r--arch/arm64/include/asm/simd.h11
-rw-r--r--arch/arm64/include/asm/stacktrace/common.h19
-rw-r--r--arch/arm64/include/asm/stacktrace/nvhe.h2
-rw-r--r--arch/arm64/include/asm/sysreg.h25
-rw-r--r--arch/arm64/include/asm/thread_info.h1
-rw-r--r--arch/arm64/include/asm/tlb.h15
-rw-r--r--arch/arm64/include/asm/tlbflush.h100
-rw-r--r--arch/arm64/kernel/cpufeature.c160
-rw-r--r--arch/arm64/kernel/cpuinfo.c5
-rw-r--r--arch/arm64/kernel/fpsimd.c169
-rw-r--r--arch/arm64/kernel/head.S2
-rw-r--r--arch/arm64/kernel/idreg-override.c153
-rw-r--r--arch/arm64/kernel/irq.c7
-rw-r--r--arch/arm64/kernel/kaslr.c7
-rw-r--r--arch/arm64/kernel/pi/Makefile1
-rw-r--r--arch/arm64/kernel/smp.c12
-rw-r--r--arch/arm64/kernel/stacktrace.c146
-rw-r--r--arch/arm64/kernel/vdso32/Makefile8
-rw-r--r--arch/arm64/kvm/hyp/nvhe/pkvm.c2
-rw-r--r--arch/arm64/kvm/hyp/nvhe/tlb.c61
-rw-r--r--arch/arm64/kvm/hyp/vhe/tlb.c13
-rw-r--r--arch/arm64/kvm/pmu-emul.c8
-rw-r--r--arch/arm64/kvm/sys_regs.c4
-rw-r--r--arch/arm64/lib/copy_page.S11
-rw-r--r--arch/arm64/mm/mmu.c6
-rw-r--r--arch/arm64/tools/cpucaps2
-rw-r--r--arch/arm64/tools/sysreg325
-rw-r--r--drivers/base/arch_numa.c2
-rw-r--r--drivers/firmware/efi/libstub/Makefile.zboot4
-rw-r--r--drivers/infiniband/hw/erdma/erdma_hw.h2
-rw-r--r--drivers/pci/access.c12
-rw-r--r--drivers/pci/pcie/aspm.c65
-rw-r--r--drivers/perf/Kconfig7
-rw-r--r--drivers/perf/Makefile1
-rw-r--r--drivers/perf/apple_m1_cpu_pmu.c6
-rw-r--r--drivers/perf/arm-cmn.c2
-rw-r--r--drivers/perf/arm_dsu_pmu.c6
-rw-r--r--drivers/perf/arm_pmu.c12
-rw-r--r--drivers/perf/arm_pmuv3.c242
-rw-r--r--drivers/perf/arm_spe_pmu.c22
-rw-r--r--drivers/perf/dwc_pcie_pmu.c792
-rw-r--r--drivers/perf/fsl_imx8_ddr_perf.c45
-rw-r--r--drivers/perf/fsl_imx9_ddr_perf.c6
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_uc_pmu.c4
-rw-r--r--include/asm-generic/numa.h2
-rw-r--r--include/linux/pci.h2
-rw-r--r--include/linux/pci_ids.h2
-rw-r--r--include/linux/perf/arm_pmu.h28
-rw-r--r--include/linux/perf/arm_pmuv3.h34
-rw-r--r--tools/include/perf/arm_pmuv3.h43
-rw-r--r--tools/testing/selftests/arm64/abi/tpidr2.c18
-rw-r--r--tools/testing/selftests/arm64/fp/sve-test.S10
-rw-r--r--tools/testing/selftests/arm64/fp/vec-syscfg.c14
-rw-r--r--tools/testing/selftests/arm64/fp/za-test.S6
-rw-r--r--tools/testing/selftests/arm64/fp/zt-test.S5
-rw-r--r--tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c5
80 files changed, 2292 insertions, 807 deletions
diff --git a/Documentation/admin-guide/perf/dwc_pcie_pmu.rst b/Documentation/admin-guide/perf/dwc_pcie_pmu.rst
new file mode 100644
index 00000000000000..d47cd229d7106f
--- /dev/null
+++ b/Documentation/admin-guide/perf/dwc_pcie_pmu.rst
@@ -0,0 +1,94 @@
+======================================================================
+Synopsys DesignWare Cores (DWC) PCIe Performance Monitoring Unit (PMU)
+======================================================================
+
+DesignWare Cores (DWC) PCIe PMU
+===============================
+
+The PMU is a PCIe configuration space register block provided by each PCIe Root
+Port in a Vendor-Specific Extended Capability named RAS D.E.S (Debug, Error
+injection, and Statistics).
+
+As the name indicates, the RAS DES capability supports system level
+debugging, AER error injection, and collection of statistics. To facilitate
+collection of statistics, Synopsys DesignWare Cores PCIe controller
+provides the following two features:
+
+- one 64-bit counter for Time Based Analysis (RX/TX data throughput and
+ time spent in each low-power LTSSM state) and
+- one 32-bit counter for Event Counting (error and non-error events for
+ a specified lane)
+
+Note: There is no interrupt for counter overflow.
+
+Time Based Analysis
+-------------------
+
+Using this feature you can obtain information regarding RX/TX data
+throughput and time spent in each low-power LTSSM state by the controller.
+The PMU measures data in two categories:
+
+- Group#0: Percentage of time the controller stays in LTSSM states.
+- Group#1: Amount of data processed (Units of 16 bytes).
+
+Lane Event counters
+-------------------
+
+Using this feature you can obtain Error and Non-Error information in
+specific lane by the controller. The PMU event is selected by all of:
+
+- Group i
+- Event j within the Group i
+- Lane k
+
+Some of the events only exist for specific configurations.
+
+DesignWare Cores (DWC) PCIe PMU Driver
+=======================================
+
+This driver adds PMU devices for each PCIe Root Port named based on the BDF of
+the Root Port. For example,
+
+ 30:03.0 PCI bridge: Device 1ded:8000 (rev 01)
+
+the PMU device name for this Root Port is dwc_rootport_3018.
+
+The DWC PCIe PMU driver registers a perf PMU driver, which provides
+description of available events and configuration options in sysfs, see
+/sys/bus/event_source/devices/dwc_rootport_{bdf}.
+
+The "format" directory describes format of the config fields of the
+perf_event_attr structure. The "events" directory provides configuration
+templates for all documented events. For example,
+"Rx_PCIe_TLP_Data_Payload" is an equivalent of "eventid=0x22,type=0x1".
+
+The "perf list" command shall list the available events from sysfs, e.g.::
+
+ $# perf list | grep dwc_rootport
+ <...>
+ dwc_rootport_3018/Rx_PCIe_TLP_Data_Payload/ [Kernel PMU event]
+ <...>
+ dwc_rootport_3018/rx_memory_read,lane=?/ [Kernel PMU event]
+
+Time Based Analysis Event Usage
+-------------------------------
+
+Example usage of counting PCIe RX TLP data payload (Units of bytes)::
+
+ $# perf stat -a -e dwc_rootport_3018/Rx_PCIe_TLP_Data_Payload/
+
+The average RX/TX bandwidth can be calculated using the following formula:
+
+ PCIe RX Bandwidth = Rx_PCIe_TLP_Data_Payload / Measure_Time_Window
+ PCIe TX Bandwidth = Tx_PCIe_TLP_Data_Payload / Measure_Time_Window
+
+Lane Event Usage
+-------------------------------
+
+Each lane has the same event set and to avoid generating a list of hundreds
+of events, the user need to specify the lane ID explicitly, e.g.::
+
+ $# perf stat -a -e dwc_rootport_3018/rx_memory_read,lane=4/
+
+The driver does not support sampling, therefore "perf record" will not
+work. Per-task (without "-a") perf sessions are not supported.
diff --git a/Documentation/admin-guide/perf/imx-ddr.rst b/Documentation/admin-guide/perf/imx-ddr.rst
index 90926d0fb8eca4..77418ae5a29075 100644
--- a/Documentation/admin-guide/perf/imx-ddr.rst
+++ b/Documentation/admin-guide/perf/imx-ddr.rst
@@ -13,8 +13,8 @@ is one register for each counter. Counter 0 is special in that it always counts
interrupt is raised. If any other counter overflows, it continues counting, and
no interrupt is raised.
-The "format" directory describes format of the config (event ID) and config1
-(AXI filtering) fields of the perf_event_attr structure, see /sys/bus/event_source/
+The "format" directory describes format of the config (event ID) and config1/2
+(AXI filter setting) fields of the perf_event_attr structure, see /sys/bus/event_source/
devices/imx8_ddr0/format/. The "events" directory describes the events types
hardware supported that can be used with perf tool, see /sys/bus/event_source/
devices/imx8_ddr0/events/. The "caps" directory describes filter features implemented
@@ -28,12 +28,11 @@ in DDR PMU, see /sys/bus/events_source/devices/imx8_ddr0/caps/.
AXI filtering is only used by CSV modes 0x41 (axid-read) and 0x42 (axid-write)
to count reading or writing matches filter setting. Filter setting is various
from different DRAM controller implementations, which is distinguished by quirks
-in the driver. You also can dump info from userspace, filter in "caps" directory
-indicates whether PMU supports AXI ID filter or not; enhanced_filter indicates
-whether PMU supports enhanced AXI ID filter or not. Value 0 for un-supported, and
-value 1 for supported.
+in the driver. You also can dump info from userspace, "caps" directory show the
+type of AXI filter (filter, enhanced_filter and super_filter). Value 0 for
+un-supported, and value 1 for supported.
-* With DDR_CAP_AXI_ID_FILTER quirk(filter: 1, enhanced_filter: 0).
+* With DDR_CAP_AXI_ID_FILTER quirk(filter: 1, enhanced_filter: 0, super_filter: 0).
Filter is defined with two configuration parts:
--AXI_ID defines AxID matching value.
--AXI_MASKING defines which bits of AxID are meaningful for the matching.
@@ -65,7 +64,37 @@ value 1 for supported.
perf stat -a -e imx8_ddr0/axid-read,axi_id=0x12/ cmd, which will monitor ARID=0x12
-* With DDR_CAP_AXI_ID_FILTER_ENHANCED quirk(filter: 1, enhanced_filter: 1).
+* With DDR_CAP_AXI_ID_FILTER_ENHANCED quirk(filter: 1, enhanced_filter: 1, super_filter: 0).
This is an extension to the DDR_CAP_AXI_ID_FILTER quirk which permits
counting the number of bytes (as opposed to the number of bursts) from DDR
read and write transactions concurrently with another set of data counters.
+
+* With DDR_CAP_AXI_ID_PORT_CHANNEL_FILTER quirk(filter: 0, enhanced_filter: 0, super_filter: 1).
+ There is a limitation in previous AXI filter, it cannot filter different IDs
+ at the same time as the filter is shared between counters. This quirk is the
+ extension of AXI ID filter. One improvement is that counter 1-3 has their own
+ filter, means that it supports concurrently filter various IDs. Another
+ improvement is that counter 1-3 supports AXI PORT and CHANNEL selection. Support
+ selecting address channel or data channel.
+
+ Filter is defined with 2 configuration registers per counter 1-3.
+ --Counter N MASK COMP register - including AXI_ID and AXI_MASKING.
+ --Counter N MUX CNTL register - including AXI CHANNEL and AXI PORT.
+
+ - 0: address channel
+ - 1: data channel
+
+ PMU in DDR subsystem, only one single port0 exists, so axi_port is reserved
+ which should be 0.
+
+ .. code-block:: bash
+
+ perf stat -a -e imx8_ddr0/axid-read,axi_mask=0xMMMM,axi_id=0xDDDD,axi_channel=0xH/ cmd
+ perf stat -a -e imx8_ddr0/axid-write,axi_mask=0xMMMM,axi_id=0xDDDD,axi_channel=0xH/ cmd
+
+ .. note::
+
+ axi_channel is inverted in userspace, and it will be reverted in driver
+ automatically. So that users do not need specify axi_channel if want to
+ monitor data channel from DDR transactions, since data channel is more
+ meaningful.
diff --git a/Documentation/admin-guide/perf/index.rst b/Documentation/admin-guide/perf/index.rst
index a2e6f2c81146b0..f4a4513c526f05 100644
--- a/Documentation/admin-guide/perf/index.rst
+++ b/Documentation/admin-guide/perf/index.rst
@@ -19,6 +19,7 @@ Performance monitor support
arm_dsu_pmu
thunderx2-pmu
alibaba_pmu
+ dwc_pcie_pmu
nvidia-pmu
meson-ddr-pmu
cxl
diff --git a/Documentation/arch/arm64/arm-acpi.rst b/Documentation/arch/arm64/arm-acpi.rst
index a46c34fa96044c..e59e4505d0d999 100644
--- a/Documentation/arch/arm64/arm-acpi.rst
+++ b/Documentation/arch/arm64/arm-acpi.rst
@@ -130,7 +130,7 @@ When an Arm system boots, it can either have DT information, ACPI tables,
or in some very unusual cases, both. If no command line parameters are used,
the kernel will try to use DT for device enumeration; if there is no DT
present, the kernel will try to use ACPI tables, but only if they are present.
-In neither is available, the kernel will not boot. If acpi=force is used
+If neither is available, the kernel will not boot. If acpi=force is used
on the command line, the kernel will attempt to use ACPI tables first, but
fall back to DT if there are no ACPI tables present. The basic idea is that
the kernel will not fail to boot unless it absolutely has no other choice.
diff --git a/Documentation/arch/arm64/perf.rst b/Documentation/arch/arm64/perf.rst
index 1f87b57c233240..997fd716b82f76 100644
--- a/Documentation/arch/arm64/perf.rst
+++ b/Documentation/arch/arm64/perf.rst
@@ -164,3 +164,75 @@ and should be used to mask the upper bits as needed.
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/perf/arch/arm64/tests/user-events.c
.. _tools/lib/perf/tests/test-evsel.c:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/lib/perf/tests/test-evsel.c
+
+Event Counting Threshold
+==========================================
+
+Overview
+--------
+
+FEAT_PMUv3_TH (Armv8.8) permits a PMU counter to increment only on
+events whose count meets a specified threshold condition. For example if
+threshold_compare is set to 2 ('Greater than or equal'), and the
+threshold is set to 2, then the PMU counter will now only increment by
+when an event would have previously incremented the PMU counter by 2 or
+more on a single processor cycle.
+
+To increment by 1 after passing the threshold condition instead of the
+number of events on that cycle, add the 'threshold_count' option to the
+commandline.
+
+How-to
+------
+
+These are the parameters for controlling the feature:
+
+.. list-table::
+ :header-rows: 1
+
+ * - Parameter
+ - Description
+ * - threshold
+ - Value to threshold the event by. A value of 0 means that
+ thresholding is disabled and the other parameters have no effect.
+ * - threshold_compare
+ - | Comparison function to use, with the following values supported:
+ |
+ | 0: Not-equal
+ | 1: Equals
+ | 2: Greater-than-or-equal
+ | 3: Less-than
+ * - threshold_count
+ - If this is set, count by 1 after passing the threshold condition
+ instead of the value of the event on this cycle.
+
+The threshold, threshold_compare and threshold_count values can be
+provided per event, for example:
+
+.. code-block:: sh
+
+ perf stat -e stall_slot/threshold=2,threshold_compare=2/ \
+ -e dtlb_walk/threshold=10,threshold_compare=3,threshold_count/
+
+In this example the stall_slot event will count by 2 or more on every
+cycle where 2 or more stalls happen. And dtlb_walk will count by 1 on
+every cycle where the number of dtlb walks were less than 10.
+
+The maximum supported threshold value can be read from the caps of each
+PMU, for example:
+
+.. code-block:: sh
+
+ cat /sys/bus/event_source/devices/armv8_pmuv3/caps/threshold_max
+
+ 0x000000ff
+
+If a value higher than this is given, then opening the event will result
+in an error. The highest possible maximum is 4095, as the config field
+for threshold is limited to 12 bits, and the Perf tool will refuse to
+parse higher values.
+
+If the PMU doesn't support FEAT_PMUv3_TH, then threshold_max will read
+0, and attempting to set a threshold value will also result in an error.
+threshold_max will also read as 0 on aarch32 guests, even if the host
+is running on hardware with the feature.
diff --git a/Documentation/devicetree/bindings/perf/fsl-imx-ddr.yaml b/Documentation/devicetree/bindings/perf/fsl-imx-ddr.yaml
index e9fad4b3de6840..6c96a4204e5d68 100644
--- a/Documentation/devicetree/bindings/perf/fsl-imx-ddr.yaml
+++ b/Documentation/devicetree/bindings/perf/fsl-imx-ddr.yaml
@@ -27,6 +27,9 @@ properties:
- fsl,imx8mq-ddr-pmu
- fsl,imx8mp-ddr-pmu
- const: fsl,imx8m-ddr-pmu
+ - items:
+ - const: fsl,imx8dxl-ddr-pmu
+ - const: fsl,imx8-ddr-pmu
reg:
maxItems: 1
diff --git a/MAINTAINERS b/MAINTAINERS
index 880cda026e6345..a2e352a29165d5 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -21016,6 +21016,13 @@ L: linux-mmc@vger.kernel.org
S: Maintained
F: drivers/mmc/host/dw_mmc*
+SYNOPSYS DESIGNWARE PCIE PMU DRIVER
+M: Shuai Xue <xueshuai@linux.alibaba.com>
+M: Jing Zhang <renyu.zj@linux.alibaba.com>
+S: Supported
+F: Documentation/admin-guide/perf/dwc_pcie_pmu.rst
+F: drivers/perf/dwc_pcie_pmu.c
+
SYNOPSYS HSDK RESET CONTROLLER DRIVER
M: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
S: Supported
diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c
index 1ae99deeec5491..8fc080c9e4fb18 100644
--- a/arch/arm/kernel/perf_event_v6.c
+++ b/arch/arm/kernel/perf_event_v6.c
@@ -268,10 +268,8 @@ static inline void armv6pmu_write_counter(struct perf_event *event, u64 value)
static void armv6pmu_enable_event(struct perf_event *event)
{
- unsigned long val, mask, evt, flags;
- struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+ unsigned long val, mask, evt;
struct hw_perf_event *hwc = &event->hw;
- struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
int idx = hwc->idx;
if (ARMV6_CYCLE_COUNTER == idx) {
@@ -294,12 +292,10 @@ static void armv6pmu_enable_event(struct perf_event *event)
* Mask out the current event and set the counter to count the event
* that we're interested in.
*/
- raw_spin_lock_irqsave(&events->pmu_lock, flags);
val = armv6_pmcr_read();
val &= ~mask;
val |= evt;
armv6_pmcr_write(val);
- raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
}
static irqreturn_t
@@ -362,26 +358,20 @@ armv6pmu_handle_irq(struct arm_pmu *cpu_pmu)
static void armv6pmu_start(struct arm_pmu *cpu_pmu)
{
- unsigned long flags, val;
- struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
+ unsigned long val;
- raw_spin_lock_irqsave(&events->pmu_lock, flags);
val = armv6_pmcr_read();
val |= ARMV6_PMCR_ENABLE;
armv6_pmcr_write(val);
- raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
}
static void armv6pmu_stop(struct arm_pmu *cpu_pmu)
{
- unsigned long flags, val;
- struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
+ unsigned long val;
- raw_spin_lock_irqsave(&events->pmu_lock, flags);
val = armv6_pmcr_read();
val &= ~ARMV6_PMCR_ENABLE;
armv6_pmcr_write(val);
- raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
}
static int
@@ -419,10 +409,8 @@ static void armv6pmu_clear_event_idx(struct pmu_hw_events *cpuc,
static void armv6pmu_disable_event(struct perf_event *event)
{
- unsigned long val, mask, evt, flags;
- struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+ unsigned long val, mask, evt;
struct hw_perf_event *hwc = &event->hw;
- struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
int idx = hwc->idx;
if (ARMV6_CYCLE_COUNTER == idx) {
@@ -444,20 +432,16 @@ static void armv6pmu_disable_event(struct perf_event *event)
* of ETM bus signal assertion cycles. The external reporting should
* be disabled and so this should never increment.
*/
- raw_spin_lock_irqsave(&events->pmu_lock, flags);
val = armv6_pmcr_read();
val &= ~mask;
val |= evt;
armv6_pmcr_write(val);
- raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
}
static void armv6mpcore_pmu_disable_event(struct perf_event *event)
{
- unsigned long val, mask, flags, evt = 0;
- struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+ unsigned long val, mask, evt = 0;
struct hw_perf_event *hwc = &event->hw;
- struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
int idx = hwc->idx;
if (ARMV6_CYCLE_COUNTER == idx) {
@@ -475,12 +459,10 @@ static void armv6mpcore_pmu_disable_event(struct perf_event *event)
* Unlike UP ARMv6, we don't have a way of stopping the counters. We
* simply disable the interrupt reporting.
*/
- raw_spin_lock_irqsave(&events->pmu_lock, flags);
val = armv6_pmcr_read();
val &= ~mask;
val |= evt;
armv6_pmcr_write(val);
- raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
}
static int armv6_map_event(struct perf_event *event)
diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c
index eb2190477da10a..a3322e2b3ea44d 100644
--- a/arch/arm/kernel/perf_event_v7.c
+++ b/arch/arm/kernel/perf_event_v7.c
@@ -870,10 +870,8 @@ static void armv7_pmnc_dump_regs(struct arm_pmu *cpu_pmu)
static void armv7pmu_enable_event(struct perf_event *event)
{
- unsigned long flags;
struct hw_perf_event *hwc = &event->hw;
struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
- struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
int idx = hwc->idx;
if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) {
@@ -886,7 +884,6 @@ static void armv7pmu_enable_event(struct perf_event *event)
* Enable counter and interrupt, and set the counter to count
* the event that we're interested in.
*/
- raw_spin_lock_irqsave(&events->pmu_lock, flags);
/*
* Disable counter
@@ -910,16 +907,12 @@ static void armv7pmu_enable_event(struct perf_event *event)
* Enable counter
*/
armv7_pmnc_enable_counter(idx);
-
- raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
}
static void armv7pmu_disable_event(struct perf_event *event)
{
- unsigned long flags;
struct hw_perf_event *hwc = &event->hw;
struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
- struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
int idx = hwc->idx;
if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) {
@@ -931,7 +924,6 @@ static void armv7pmu_disable_event(struct perf_event *event)
/*
* Disable counter and interrupt
*/
- raw_spin_lock_irqsave(&events->pmu_lock, flags);
/*
* Disable counter
@@ -942,8 +934,6 @@ static void armv7pmu_disable_event(struct perf_event *event)
* Disable interrupt for this counter
*/
armv7_pmnc_disable_intens(idx);
-
- raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
}
static irqreturn_t armv7pmu_handle_irq(struct arm_pmu *cpu_pmu)
@@ -1009,24 +999,14 @@ static irqreturn_t armv7pmu_handle_irq(struct arm_pmu *cpu_pmu)
static void armv7pmu_start(struct arm_pmu *cpu_pmu)
{
- unsigned long flags;
- struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
-
- raw_spin_lock_irqsave(&events->pmu_lock, flags);
/* Enable all counters */
armv7_pmnc_write(armv7_pmnc_read() | ARMV7_PMNC_E);
- raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
}
static void armv7pmu_stop(struct arm_pmu *cpu_pmu)
{
- unsigned long flags;
- struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
-
- raw_spin_lock_irqsave(&events->pmu_lock, flags);
/* Disable all counters */
armv7_pmnc_write(armv7_pmnc_read() & ~ARMV7_PMNC_E);
- raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
}
static int armv7pmu_get_event_idx(struct pmu_hw_events *cpuc,
@@ -1072,8 +1052,10 @@ static int armv7pmu_set_event_filter(struct hw_perf_event *event,
{
unsigned long config_base = 0;
- if (attr->exclude_idle)
- return -EPERM;
+ if (attr->exclude_idle) {
+ pr_debug("ARM performance counters do not support mode exclusion\n");
+ return -EOPNOTSUPP;
+ }
if (attr->exclude_user)
config_base |= ARMV7_EXCLUDE_USER;
if (attr->exclude_kernel)
@@ -1492,14 +1474,10 @@ static void krait_clearpmu(u32 config_base)
static void krait_pmu_disable_event(struct perf_event *event)
{
- unsigned long flags;
struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx;
- struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
- struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
/* Disable counter and interrupt */
- raw_spin_lock_irqsave(&events->pmu_lock, flags);
/* Disable counter */
armv7_pmnc_disable_counter(idx);
@@ -1512,23 +1490,17 @@ static void krait_pmu_disable_event(struct perf_event *event)
/* Disable interrupt for this counter */
armv7_pmnc_disable_intens(idx);
-
- raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
}
static void krait_pmu_enable_event(struct perf_event *event)
{
- unsigned long flags;
struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx;
- struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
- struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
/*
* Enable counter and interrupt, and set the counter to count
* the event that we're interested in.
*/
- raw_spin_lock_irqsave(&events->pmu_lock, flags);
/* Disable counter */
armv7_pmnc_disable_counter(idx);
@@ -1548,8 +1520,6 @@ static void krait_pmu_enable_event(struct perf_event *event)
/* Enable counter */
armv7_pmnc_enable_counter(idx);
-
- raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
}
static void krait_pmu_reset(void *info)
@@ -1825,14 +1795,10 @@ static void scorpion_clearpmu(u32 config_base)
static void scorpion_pmu_disable_event(struct perf_event *event)
{
- unsigned long flags;
struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx;
- struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
- struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
/* Disable counter and interrupt */
- raw_spin_lock_irqsave(&events->pmu_lock, flags);
/* Disable counter */
armv7_pmnc_disable_counter(idx);
@@ -1845,23 +1811,17 @@ static void scorpion_pmu_disable_event(struct perf_event *event)
/* Disable interrupt for this counter */
armv7_pmnc_disable_intens(idx);
-
- raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
}
static void scorpion_pmu_enable_event(struct perf_event *event)
{
- unsigned long flags;
struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx;
- struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
- struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
/*
* Enable counter and interrupt, and set the counter to count
* the event that we're interested in.
*/
- raw_spin_lock_irqsave(&events->pmu_lock, flags);
/* Disable counter */
armv7_pmnc_disable_counter(idx);
@@ -1881,8 +1841,6 @@ static void scorpion_pmu_enable_event(struct perf_event *event)
/* Enable counter */
armv7_pmnc_enable_counter(idx);
-
- raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
}
static void scorpion_pmu_reset(void *info)
diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c
index f6cdcacfb96dbc..7a2ba1c689a7f5 100644
--- a/arch/arm/kernel/perf_event_xscale.c
+++ b/arch/arm/kernel/perf_event_xscale.c
@@ -203,10 +203,8 @@ xscale1pmu_handle_irq(struct arm_pmu *cpu_pmu)
static void xscale1pmu_enable_event(struct perf_event *event)
{
- unsigned long val, mask, evt, flags;
- struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+ unsigned long val, mask, evt;
struct hw_perf_event *hwc = &event->hw;
- struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
int idx = hwc->idx;
switch (idx) {
@@ -229,20 +227,16 @@ static void xscale1pmu_enable_event(struct perf_event *event)
return;
}
- raw_spin_lock_irqsave(&events->pmu_lock, flags);
val = xscale1pmu_read_pmnc();
val &= ~mask;
val |= evt;
xscale1pmu_write_pmnc(val);
- raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
}
static void xscale1pmu_disable_event(struct perf_event *event)
{
- unsigned long val, mask, evt, flags;
- struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+ unsigned long val, mask, evt;
struct hw_perf_event *hwc = &event->hw;
- struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
int idx = hwc->idx;
switch (idx) {
@@ -263,12 +257,10 @@ static void xscale1pmu_disable_event(struct perf_event *event)
return;
}
- raw_spin_lock_irqsave(&events->pmu_lock, flags);
val = xscale1pmu_read_pmnc();
val &= ~mask;
val |= evt;
xscale1pmu_write_pmnc(val);
- raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
}
static int
@@ -300,26 +292,20 @@ static void xscalepmu_clear_event_idx(struct pmu_hw_events *cpuc,
static void xscale1pmu_start(struct arm_pmu *cpu_pmu)
{
- unsigned long flags, val;
- struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
+ unsigned long val;
- raw_spin_lock_irqsave(&events->pmu_lock, flags);
val = xscale1pmu_read_pmnc();
val |= XSCALE_PMU_ENABLE;
xscale1pmu_write_pmnc(val);
- raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
}
static void xscale1pmu_stop(struct arm_pmu *cpu_pmu)
{
- unsigned long flags, val;
- struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
+ unsigned long val;
- raw_spin_lock_irqsave(&events->pmu_lock, flags);
val = xscale1pmu_read_pmnc();
val &= ~XSCALE_PMU_ENABLE;
xscale1pmu_write_pmnc(val);
- raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
}
static inline u64 xscale1pmu_read_counter(struct perf_event *event)
@@ -549,10 +535,8 @@ xscale2pmu_handle_irq(struct arm_pmu *cpu_pmu)
static void xscale2pmu_enable_event(struct perf_event *event)
{
- unsigned long flags, ien, evtsel;
- struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+ unsigned long ien, evtsel;
struct hw_perf_event *hwc = &event->hw;
- struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
int idx = hwc->idx;
ien = xscale2pmu_read_int_enable();
@@ -587,18 +571,14 @@ static void xscale2pmu_enable_event(struct perf_event *event)
return;
}
- raw_spin_lock_irqsave(&events->pmu_lock, flags);
xscale2pmu_write_event_select(evtsel);
xscale2pmu_write_int_enable(ien);
- raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
}
static void xscale2pmu_disable_event(struct perf_event *event)
{
- unsigned long flags, ien, evtsel, of_flags;
- struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+ unsigned long ien, evtsel, of_flags;
struct hw_perf_event *hwc = &event->hw;
- struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
int idx = hwc->idx;
ien = xscale2pmu_read_int_enable();
@@ -638,11 +618,9 @@ static void xscale2pmu_disable_event(struct perf_event *event)
return;
}
- raw_spin_lock_irqsave(&events->pmu_lock, flags);
xscale2pmu_write_event_select(evtsel);
xscale2pmu_write_int_enable(ien);
xscale2pmu_write_overflow_flags(of_flags);
- raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
}
static int
@@ -663,26 +641,20 @@ out:
static void xscale2pmu_start(struct arm_pmu *cpu_pmu)
{
- unsigned long flags, val;
- struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
+ unsigned long val;
- raw_spin_lock_irqsave(&events->pmu_lock, flags);
val = xscale2pmu_read_pmnc() & ~XSCALE_PMU_CNT64;
val |= XSCALE_PMU_ENABLE;
xscale2pmu_write_pmnc(val);
- raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
}
static void xscale2pmu_stop(struct arm_pmu *cpu_pmu)
{
- unsigned long flags, val;
- struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
+ unsigned long val;
- raw_spin_lock_irqsave(&events->pmu_lock, flags);
val = xscale2pmu_read_pmnc();
val &= ~XSCALE_PMU_ENABLE;
xscale2pmu_write_pmnc(val);
- raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
}
static inline u64 xscale2pmu_read_counter(struct perf_event *event)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 442539fd06fea3..c5321223ac67ba 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1550,7 +1550,7 @@ config ARCH_FORCE_MAX_ORDER
Don't change if unsure.
config UNMAP_KERNEL_AT_EL0
- bool "Unmap kernel when running in userspace (aka \"KAISER\")" if EXPERT
+ bool "Unmap kernel when running in userspace (KPTI)" if EXPERT
default y
help
Speculation attacks against some high-performance processors can
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 9a2d3723cd0fa9..47ecc4cff9d25b 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -200,7 +200,7 @@ endif
endif
vdso-install-y += arch/arm64/kernel/vdso/vdso.so.dbg
-vdso-install-$(CONFIG_COMPAT_VDSO) += arch/arm64/kernel/vdso32/vdso.so.dbg:vdso32.so
+vdso-install-$(CONFIG_COMPAT_VDSO) += arch/arm64/kernel/vdso32/vdso32.so.dbg
include $(srctree)/scripts/Makefile.defconf
diff --git a/arch/arm64/boot/Makefile b/arch/arm64/boot/Makefile
index 1761f5972443fc..a5a7873711173b 100644
--- a/arch/arm64/boot/Makefile
+++ b/arch/arm64/boot/Makefile
@@ -44,7 +44,7 @@ EFI_ZBOOT_BFD_TARGET := elf64-littleaarch64
EFI_ZBOOT_MACH_TYPE := ARM64
EFI_ZBOOT_FORWARD_CFI := $(CONFIG_ARM64_BTI_KERNEL)
-EFI_ZBOOT_OBJCOPY_FLAGS = --add-symbol zboot_code_size=0x$(shell \
+EFI_ZBOOT_OBJCOPY_FLAGS = --add-symbol zboot_code_size=0x$$( \
$(NM) vmlinux|grep _kernel_codesize|cut -d' ' -f1)
include $(srctree)/drivers/firmware/efi/libstub/Makefile.zboot
diff --git a/arch/arm64/boot/install.sh b/arch/arm64/boot/install.sh
index 7399d706967a4f..9b7a09808a3dda 100755
--- a/arch/arm64/boot/install.sh
+++ b/arch/arm64/boot/install.sh
@@ -17,7 +17,8 @@
# $3 - kernel map file
# $4 - default install path (blank if root directory)
-if [ "$(basename $2)" = "Image.gz" ]; then
+if [ "$(basename $2)" = "Image.gz" ] || [ "$(basename $2)" = "vmlinuz.efi" ]
+then
# Compressed install
echo "Installing compressed kernel"
base=vmlinuz
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 376a980f2bad08..7b1975bf4b90e7 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -12,7 +12,7 @@
#ifndef __ASM_ASSEMBLER_H
#define __ASM_ASSEMBLER_H
-#include <asm-generic/export.h>
+#include <linux/export.h>
#include <asm/alternative.h>
#include <asm/asm-bug.h>
diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h
index ceb368d33bf4ea..06a4670bdb0b9b 100644
--- a/arch/arm64/include/asm/cache.h
+++ b/arch/arm64/include/asm/cache.h
@@ -58,7 +58,6 @@ static inline unsigned int arch_slab_minalign(void)
#define CTR_L1IP(ctr) SYS_FIELD_GET(CTR_EL0, L1Ip, ctr)
#define ICACHEF_ALIASING 0
-#define ICACHEF_VPIPT 1
extern unsigned long __icache_flags;
/*
@@ -70,11 +69,6 @@ static inline int icache_is_aliasing(void)
return test_bit(ICACHEF_ALIASING, &__icache_flags);
}
-static __always_inline int icache_is_vpipt(void)
-{
- return test_bit(ICACHEF_VPIPT, &__icache_flags);
-}
-
static inline u32 cache_type_cwg(void)
{
return SYS_FIELD_GET(CTR_EL0, CWG, read_cpuid_cachetype());
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index f6d416fe49b097..21c824edf8ce4a 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -617,6 +617,7 @@ static inline bool id_aa64pfr1_mte(u64 pfr1)
return val >= ID_AA64PFR1_EL1_MTE_MTE2;
}
+void __init setup_boot_cpu_features(void);
void __init setup_system_features(void);
void __init setup_user_features(void);
@@ -819,6 +820,11 @@ static inline bool system_supports_tlb_range(void)
return alternative_has_cap_unlikely(ARM64_HAS_TLB_RANGE);
}
+static inline bool system_supports_lpa2(void)
+{
+ return cpus_have_final_cap(ARM64_HAS_LPA2);
+}
+
int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt);
bool try_emulate_mrs(struct pt_regs *regs, u32 isn);
diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h
index cdf6a35e399440..cda81d009c9bd7 100644
--- a/arch/arm64/include/asm/fpsimdmacros.h
+++ b/arch/arm64/include/asm/fpsimdmacros.h
@@ -242,14 +242,6 @@
| (\nx << 5)
.endm
-/*
- * Zero the entire ZA array
- * ZERO ZA
- */
-.macro zero_za
- .inst 0xc00800ff
-.endm
-
.macro __for from:req, to:req
.if (\from) == (\to)
_for__body %\from
diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h
index 85d26143faa598..83ddb14b95a5e3 100644
--- a/arch/arm64/include/asm/kernel-pgtable.h
+++ b/arch/arm64/include/asm/kernel-pgtable.h
@@ -37,27 +37,12 @@
/*
- * If KASLR is enabled, then an offset K is added to the kernel address
- * space. The bottom 21 bits of this offset are zero to guarantee 2MB
- * alignment for PA and VA.
- *
- * For each pagetable level of the swapper, we know that the shift will
- * be larger than 21 (for the 4KB granule case we use section maps thus
- * the smallest shift is actually 30) thus there is the possibility that
- * KASLR can increase the number of pagetable entries by 1, so we make
- * room for this extra entry.
- *
- * Note KASLR cannot increase the number of required entries for a level
- * by more than one because it increments both the virtual start and end
- * addresses equally (the extra entry comes from the case where the end
- * address is just pushed over a boundary and the start address isn't).
+ * A relocatable kernel may execute from an address that differs from the one at
+ * which it was linked. In the worst case, its runtime placement may intersect
+ * with two adjacent PGDIR entries, which means that an additional page table
+ * may be needed at each subordinate level.
*/
-
-#ifdef CONFIG_RANDOMIZE_BASE
-#define EARLY_KASLR (1)
-#else
-#define EARLY_KASLR (0)
-#endif
+#define EXTRA_PAGE __is_defined(CONFIG_RELOCATABLE)
#define SPAN_NR_ENTRIES(vstart, vend, shift) \
((((vend) - 1) >> (shift)) - ((vstart) >> (shift)) + 1)
@@ -83,7 +68,7 @@
+ EARLY_PGDS((vstart), (vend), add) /* each PGDIR needs a next level page table */ \
+ EARLY_PUDS((vstart), (vend), add) /* each PUD needs a next level page table */ \
+ EARLY_PMDS((vstart), (vend), add)) /* each PMD needs a next level page table */
-#define INIT_DIR_SIZE (PAGE_SIZE * EARLY_PAGES(KIMAGE_VADDR, _end, EARLY_KASLR))
+#define INIT_DIR_SIZE (PAGE_SIZE * EARLY_PAGES(KIMAGE_VADDR, _end, EXTRA_PAGE))
/* the initial ID map may need two extra pages if it needs to be extended */
#if VA_BITS < 48
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 49e0d4b36bd009..e3e793d0ec3041 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -244,13 +244,6 @@ static inline size_t __invalidate_icache_max_range(void)
static inline void __invalidate_icache_guest_page(void *va, size_t size)
{
/*
- * VPIPT I-cache maintenance must be done from EL2. See comment in the
- * nVHE flavor of __kvm_tlb_flush_vmid_ipa().
- */
- if (icache_is_vpipt() && read_sysreg(CurrentEL) != CurrentEL_EL2)
- return;
-
- /*
* Blow the whole I-cache if it is aliasing (i.e. VIPT) or the
* invalidation range exceeds our arbitrary limit on invadations by
* cache line.
diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
index d3e354bb8351d7..10068500d60194 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -25,6 +25,8 @@
#define KVM_PGTABLE_MIN_BLOCK_LEVEL 2U
#endif
+#define kvm_lpa2_is_enabled() false
+
static inline u64 kvm_get_parange(u64 mmfr0)
{
u64 parange = cpuid_feature_extract_unsigned_field(mmfr0,
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 0f139cb4467b10..d82305ab420f74 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -208,6 +208,7 @@
#include <linux/types.h>
#include <asm/boot.h>
#include <asm/bug.h>
+#include <asm/sections.h>
#if VA_BITS > 48
extern u64 vabits_actual;
@@ -219,15 +220,12 @@ extern s64 memstart_addr;
/* PHYS_OFFSET - the physical address of the start of memory. */
#define PHYS_OFFSET ({ VM_BUG_ON(memstart_addr & 1); memstart_addr; })
-/* the virtual base of the kernel image */
-extern u64 kimage_vaddr;
-
/* the offset between the kernel virtual and physical mappings */
extern u64 kimage_voffset;
static inline unsigned long kaslr_offset(void)
{
- return kimage_vaddr - KIMAGE_VADDR;
+ return (u64)&_text - KIMAGE_VADDR;
}
#ifdef CONFIG_RANDOMIZE_BASE
@@ -433,6 +431,5 @@ void dump_mem_limit(void);
#define INIT_MEMBLOCK_MEMORY_REGIONS (INIT_MEMBLOCK_REGIONS * 8)
#endif
-#include <asm-generic/memory_model.h>
#endif /* __ASM_MEMORY_H */
diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
index e9624f6326dde8..483dbfa39c4c9c 100644
--- a/arch/arm64/include/asm/pgtable-prot.h
+++ b/arch/arm64/include/asm/pgtable-prot.h
@@ -71,6 +71,8 @@ extern bool arm64_use_ng_mappings;
#define PTE_MAYBE_NG (arm64_use_ng_mappings ? PTE_NG : 0)
#define PMD_MAYBE_NG (arm64_use_ng_mappings ? PMD_SECT_NG : 0)
+#define lpa2_is_enabled() false
+
/*
* If we have userspace only BTI we don't want to mark kernel pages
* guarded even if the system does support BTI.
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index e5bc54522e7112..5b0a04810b236b 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -167,6 +167,9 @@ struct thread_struct {
unsigned long fault_address; /* fault info */
unsigned long fault_code; /* ESR_EL1 value */
struct debug_info debug; /* debugging */
+
+ struct user_fpsimd_state kernel_fpsimd_state;
+ unsigned int kernel_fpsimd_cpu;
#ifdef CONFIG_ARM64_PTR_AUTH
struct ptrauth_keys_user keys_user;
#ifdef CONFIG_ARM64_PTR_AUTH_KERNEL
diff --git a/arch/arm64/include/asm/simd.h b/arch/arm64/include/asm/simd.h
index 6a75d7ecdcaa2d..8e86c9e70e4831 100644
--- a/arch/arm64/include/asm/simd.h
+++ b/arch/arm64/include/asm/simd.h
@@ -12,8 +12,6 @@
#include <linux/preempt.h>
#include <linux/types.h>
-DECLARE_PER_CPU(bool, fpsimd_context_busy);
-
#ifdef CONFIG_KERNEL_MODE_NEON
/*
@@ -28,17 +26,10 @@ static __must_check inline bool may_use_simd(void)
/*
* We must make sure that the SVE has been initialized properly
* before using the SIMD in kernel.
- * fpsimd_context_busy is only set while preemption is disabled,
- * and is clear whenever preemption is enabled. Since
- * this_cpu_read() is atomic w.r.t. preemption, fpsimd_context_busy
- * cannot change under our feet -- if it's set we cannot be
- * migrated, and if it's clear we cannot be migrated to a CPU
- * where it is set.
*/
return !WARN_ON(!system_capabilities_finalized()) &&
system_supports_fpsimd() &&
- !in_hardirq() && !irqs_disabled() && !in_nmi() &&
- !this_cpu_read(fpsimd_context_busy);
+ !in_hardirq() && !irqs_disabled() && !in_nmi();
}
#else /* ! CONFIG_KERNEL_MODE_NEON */
diff --git a/arch/arm64/include/asm/stacktrace/common.h b/arch/arm64/include/asm/stacktrace/common.h
index 508f734de46ee2..f63dc654e545f4 100644
--- a/arch/arm64/include/asm/stacktrace/common.h
+++ b/arch/arm64/include/asm/stacktrace/common.h
@@ -9,7 +9,6 @@
#ifndef __ASM_STACKTRACE_COMMON_H
#define __ASM_STACKTRACE_COMMON_H
-#include <linux/kprobes.h>
#include <linux/types.h>
struct stack_info {
@@ -23,12 +22,6 @@ struct stack_info {
* @fp: The fp value in the frame record (or the real fp)
* @pc: The lr value in the frame record (or the real lr)
*
- * @kr_cur: When KRETPROBES is selected, holds the kretprobe instance
- * associated with the most recently encountered replacement lr
- * value.
- *
- * @task: The task being unwound.
- *
* @stack: The stack currently being unwound.
* @stacks: An array of stacks which can be unwound.
* @nr_stacks: The number of stacks in @stacks.
@@ -36,10 +29,6 @@ struct stack_info {
struct unwind_state {
unsigned long fp;
unsigned long pc;
-#ifdef CONFIG_KRETPROBES
- struct llist_node *kr_cur;
-#endif
- struct task_struct *task;
struct stack_info stack;
struct stack_info *stacks;
@@ -66,14 +55,8 @@ static inline bool stackinfo_on_stack(const struct stack_info *info,
return true;
}
-static inline void unwind_init_common(struct unwind_state *state,
- struct task_struct *task)
+static inline void unwind_init_common(struct unwind_state *state)
{
- state->task = task;
-#ifdef CONFIG_KRETPROBES
- state->kr_cur = NULL;
-#endif
-
state->stack = stackinfo_get_unknown();
}
diff --git a/arch/arm64/include/asm/stacktrace/nvhe.h b/arch/arm64/include/asm/stacktrace/nvhe.h
index 25ab83a315a76d..44759281d0d437 100644
--- a/arch/arm64/include/asm/stacktrace/nvhe.h
+++ b/arch/arm64/include/asm/stacktrace/nvhe.h
@@ -31,7 +31,7 @@ static inline void kvm_nvhe_unwind_init(struct unwind_state *state,
unsigned long fp,
unsigned long pc)
{
- unwind_init_common(state, NULL);
+ unwind_init_common(state);
state->fp = fp;
state->pc = pc;
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 5e65f51c10d244..c3b19b376c8672 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -645,6 +645,7 @@
#define OP_AT_S1E0W sys_insn(AT_Op0, 0, AT_CRn, 8, 3)
#define OP_AT_S1E1RP sys_insn(AT_Op0, 0, AT_CRn, 9, 0)
#define OP_AT_S1E1WP sys_insn(AT_Op0, 0, AT_CRn, 9, 1)
+#define OP_AT_S1E1A sys_insn(AT_Op0, 0, AT_CRn, 9, 2)
#define OP_AT_S1E2R sys_insn(AT_Op0, 4, AT_CRn, 8, 0)
#define OP_AT_S1E2W sys_insn(AT_Op0, 4, AT_CRn, 8, 1)
#define OP_AT_S12E1R sys_insn(AT_Op0, 4, AT_CRn, 8, 4)
@@ -781,10 +782,16 @@
#define OP_TLBI_VMALLS12E1NXS sys_insn(1, 4, 9, 7, 6)
/* Misc instructions */
+#define OP_GCSPUSHX sys_insn(1, 0, 7, 7, 4)
+#define OP_GCSPOPCX sys_insn(1, 0, 7, 7, 5)
+#define OP_GCSPOPX sys_insn(1, 0, 7, 7, 6)
+#define OP_GCSPUSHM sys_insn(1, 3, 7, 7, 0)
+
#define OP_BRB_IALL sys_insn(1, 1, 7, 2, 4)
#define OP_BRB_INJ sys_insn(1, 1, 7, 2, 5)
#define OP_CFP_RCTX sys_insn(1, 3, 7, 3, 4)
#define OP_DVP_RCTX sys_insn(1, 3, 7, 3, 5)
+#define OP_COSP_RCTX sys_insn(1, 3, 7, 3, 6)
#define OP_CPP_RCTX sys_insn(1, 3, 7, 3, 7)
/* Common SCTLR_ELx flags. */
@@ -871,10 +878,12 @@
/* id_aa64mmfr0 */
#define ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MIN 0x0
+#define ID_AA64MMFR0_EL1_TGRAN4_LPA2 ID_AA64MMFR0_EL1_TGRAN4_52_BIT
#define ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MAX 0x7
#define ID_AA64MMFR0_EL1_TGRAN64_SUPPORTED_MIN 0x0
#define ID_AA64MMFR0_EL1_TGRAN64_SUPPORTED_MAX 0x7
#define ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MIN 0x1
+#define ID_AA64MMFR0_EL1_TGRAN16_LPA2 ID_AA64MMFR0_EL1_TGRAN16_52_BIT
#define ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MAX 0xf
#define ARM64_MIN_PARANGE_BITS 32
@@ -882,6 +891,7 @@
#define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_DEFAULT 0x0
#define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_NONE 0x1
#define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_MIN 0x2
+#define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_LPA2 0x3
#define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_MAX 0x7
#ifdef CONFIG_ARM64_PA_BITS_52
@@ -892,11 +902,13 @@
#if defined(CONFIG_ARM64_4K_PAGES)
#define ID_AA64MMFR0_EL1_TGRAN_SHIFT ID_AA64MMFR0_EL1_TGRAN4_SHIFT
+#define ID_AA64MMFR0_EL1_TGRAN_LPA2 ID_AA64MMFR0_EL1_TGRAN4_52_BIT
#define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MIN
#define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MAX
#define ID_AA64MMFR0_EL1_TGRAN_2_SHIFT ID_AA64MMFR0_EL1_TGRAN4_2_SHIFT
#elif defined(CONFIG_ARM64_16K_PAGES)
#define ID_AA64MMFR0_EL1_TGRAN_SHIFT ID_AA64MMFR0_EL1_TGRAN16_SHIFT
+#define ID_AA64MMFR0_EL1_TGRAN_LPA2 ID_AA64MMFR0_EL1_TGRAN16_52_BIT
#define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MIN ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MIN
#define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MAX ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MAX
#define ID_AA64MMFR0_EL1_TGRAN_2_SHIFT ID_AA64MMFR0_EL1_TGRAN16_2_SHIFT
@@ -1039,6 +1051,19 @@
#define PIRx_ELx_PERM(idx, perm) ((perm) << ((idx) * 4))
+/*
+ * Permission Overlay Extension (POE) permission encodings.
+ */
+#define POE_NONE UL(0x0)
+#define POE_R UL(0x1)
+#define POE_X UL(0x2)
+#define POE_RX UL(0x3)
+#define POE_W UL(0x4)
+#define POE_RW UL(0x5)
+#define POE_XW UL(0x6)
+#define POE_RXW UL(0x7)
+#define POE_MASK UL(0xf)
+
#define ARM64_FEATURE_FIELD_BITS 4
/* Defined for compatibility only, do not add new users. */
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index 553d1bc559c603..e72a3bf9e56348 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -80,6 +80,7 @@ void arch_setup_new_exec(void);
#define TIF_TAGGED_ADDR 26 /* Allow tagged user addresses */
#define TIF_SME 27 /* SME in use */
#define TIF_SME_VL_INHERIT 28 /* Inherit SME vl_onexec across exec */
+#define TIF_KERNEL_FPSTATE 29 /* Task is in a kernel mode FPSIMD section */
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h
index 846c563689a818..0150deb332afd7 100644
--- a/arch/arm64/include/asm/tlb.h
+++ b/arch/arm64/include/asm/tlb.h
@@ -22,15 +22,15 @@ static void tlb_flush(struct mmu_gather *tlb);
#include <asm-generic/tlb.h>
/*
- * get the tlbi levels in arm64. Default value is 0 if more than one
- * of cleared_* is set or neither is set.
- * Arm64 doesn't support p4ds now.
+ * get the tlbi levels in arm64. Default value is TLBI_TTL_UNKNOWN if more than
+ * one of cleared_* is set or neither is set - this elides the level hinting to
+ * the hardware.
*/
static inline int tlb_get_level(struct mmu_gather *tlb)
{
/* The TTL field is only valid for the leaf entry. */
if (tlb->freed_tables)
- return 0;
+ return TLBI_TTL_UNKNOWN;
if (tlb->cleared_ptes && !(tlb->cleared_pmds ||
tlb->cleared_puds ||
@@ -47,7 +47,12 @@ static inline int tlb_get_level(struct mmu_gather *tlb)
tlb->cleared_p4ds))
return 1;
- return 0;
+ if (tlb->cleared_p4ds && !(tlb->cleared_ptes ||
+ tlb->cleared_pmds ||
+ tlb->cleared_puds))
+ return 0;
+
+ return TLBI_TTL_UNKNOWN;
}
static inline void tlb_flush(struct mmu_gather *tlb)
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index bb2c2833a98722..1deb5d789c2e23 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -94,19 +94,22 @@ static inline unsigned long get_trans_granule(void)
* When ARMv8.4-TTL exists, TLBI operations take an additional hint for
* the level at which the invalidation must take place. If the level is
* wrong, no invalidation may take place. In the case where the level
- * cannot be easily determined, a 0 value for the level parameter will
- * perform a non-hinted invalidation.
+ * cannot be easily determined, the value TLBI_TTL_UNKNOWN will perform
+ * a non-hinted invalidation. Any provided level outside the hint range
+ * will also cause fall-back to non-hinted invalidation.
*
* For Stage-2 invalidation, use the level values provided to that effect
* in asm/stage2_pgtable.h.
*/
#define TLBI_TTL_MASK GENMASK_ULL(47, 44)
+#define TLBI_TTL_UNKNOWN INT_MAX
+
#define __tlbi_level(op, addr, level) do { \
u64 arg = addr; \
\
if (alternative_has_cap_unlikely(ARM64_HAS_ARMv8_4_TTL) && \
- level) { \
+ level >= 0 && level <= 3) { \
u64 ttl = level & 3; \
ttl |= get_trans_granule() << 2; \
arg &= ~TLBI_TTL_MASK; \
@@ -122,28 +125,34 @@ static inline unsigned long get_trans_granule(void)
} while (0)
/*
- * This macro creates a properly formatted VA operand for the TLB RANGE.
- * The value bit assignments are:
+ * This macro creates a properly formatted VA operand for the TLB RANGE. The
+ * value bit assignments are:
*
* +----------+------+-------+-------+-------+----------------------+
* | ASID | TG | SCALE | NUM | TTL | BADDR |
* +-----------------+-------+-------+-------+----------------------+
* |63 48|47 46|45 44|43 39|38 37|36 0|
*
- * The address range is determined by below formula:
- * [BADDR, BADDR + (NUM + 1) * 2^(5*SCALE + 1) * PAGESIZE)
+ * The address range is determined by below formula: [BADDR, BADDR + (NUM + 1) *
+ * 2^(5*SCALE + 1) * PAGESIZE)
+ *
+ * Note that the first argument, baddr, is pre-shifted; If LPA2 is in use, BADDR
+ * holds addr[52:16]. Else BADDR holds page number. See for example ARM DDI
+ * 0487J.a section C5.5.60 "TLBI VAE1IS, TLBI VAE1ISNXS, TLB Invalidate by VA,
+ * EL1, Inner Shareable".
*
*/
-#define __TLBI_VADDR_RANGE(addr, asid, scale, num, ttl) \
- ({ \
- unsigned long __ta = (addr) >> PAGE_SHIFT; \
- __ta &= GENMASK_ULL(36, 0); \
- __ta |= (unsigned long)(ttl) << 37; \
- __ta |= (unsigned long)(num) << 39; \
- __ta |= (unsigned long)(scale) << 44; \
- __ta |= get_trans_granule() << 46; \
- __ta |= (unsigned long)(asid) << 48; \
- __ta; \
+#define __TLBI_VADDR_RANGE(baddr, asid, scale, num, ttl) \
+ ({ \
+ unsigned long __ta = (baddr); \
+ unsigned long __ttl = (ttl >= 1 && ttl <= 3) ? ttl : 0; \
+ __ta &= GENMASK_ULL(36, 0); \
+ __ta |= __ttl << 37; \
+ __ta |= (unsigned long)(num) << 39; \
+ __ta |= (unsigned long)(scale) << 44; \
+ __ta |= get_trans_granule() << 46; \
+ __ta |= (unsigned long)(asid) << 48; \
+ __ta; \
})
/* These macros are used by the TLBI RANGE feature. */
@@ -216,12 +225,16 @@ static inline unsigned long get_trans_granule(void)
* CPUs, ensuring that any walk-cache entries associated with the
* translation are also invalidated.
*
- * __flush_tlb_range(vma, start, end, stride, last_level)
+ * __flush_tlb_range(vma, start, end, stride, last_level, tlb_level)
* Invalidate the virtual-address range '[start, end)' on all
* CPUs for the user address space corresponding to 'vma->mm'.
* The invalidation operations are issued at a granularity
* determined by 'stride' and only affect any walk-cache entries
- * if 'last_level' is equal to false.
+ * if 'last_level' is equal to false. tlb_level is the level at
+ * which the invalidation must take place. If the level is wrong,
+ * no invalidation may take place. In the case where the level
+ * cannot be easily determined, the value TLBI_TTL_UNKNOWN will
+ * perform a non-hinted invalidation.
*
*
* Finally, take a look at asm/tlb.h to see how tlb_flush() is implemented
@@ -345,34 +358,44 @@ static inline void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
* @tlb_level: Translation Table level hint, if known
* @tlbi_user: If 'true', call an additional __tlbi_user()
* (typically for user ASIDs). 'flase' for IPA instructions
+ * @lpa2: If 'true', the lpa2 scheme is used as set out below
*
* When the CPU does not support TLB range operations, flush the TLB
* entries one by one at the granularity of 'stride'. If the TLB
* range ops are supported, then:
*
- * 1. If 'pages' is odd, flush the first page through non-range
- * operations;
+ * 1. If FEAT_LPA2 is in use, the start address of a range operation must be
+ * 64KB aligned, so flush pages one by one until the alignment is reached
+ * using the non-range operations. This step is skipped if LPA2 is not in
+ * use.
+ *
+ * 2. The minimum range granularity is decided by 'scale', so multiple range
+ * TLBI operations may be required. Start from scale = 3, flush the largest
+ * possible number of pages ((num+1)*2^(5*scale+1)) that fit into the
+ * requested range, then decrement scale and continue until one or zero pages
+ * are left. We must start from highest scale to ensure 64KB start alignment
+ * is maintained in the LPA2 case.
*
- * 2. For remaining pages: the minimum range granularity is decided
- * by 'scale', so multiple range TLBI operations may be required.
- * Start from scale = 0, flush the corresponding number of pages
- * ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it
- * until no pages left.
+ * 3. If there is 1 page remaining, flush it through non-range operations. Range
+ * operations can only span an even number of pages. We save this for last to
+ * ensure 64KB start alignment is maintained for the LPA2 case.
*
* Note that certain ranges can be represented by either num = 31 and
* scale or num = 0 and scale + 1. The loop below favours the latter
* since num is limited to 30 by the __TLBI_RANGE_NUM() macro.
*/
#define __flush_tlb_range_op(op, start, pages, stride, \
- asid, tlb_level, tlbi_user) \
+ asid, tlb_level, tlbi_user, lpa2) \
do { \
int num = 0; \
- int scale = 0; \
+ int scale = 3; \
+ int shift = lpa2 ? 16 : PAGE_SHIFT; \
unsigned long addr; \
\
while (pages > 0) { \
if (!system_supports_tlb_range() || \
- pages % 2 == 1) { \
+ pages == 1 || \
+ (lpa2 && start != ALIGN(start, SZ_64K))) { \
addr = __TLBI_VADDR(start, asid); \
__tlbi_level(op, addr, tlb_level); \
if (tlbi_user) \
@@ -384,20 +407,20 @@ do { \
\
num = __TLBI_RANGE_NUM(pages, scale); \
if (num >= 0) { \
- addr = __TLBI_VADDR_RANGE(start, asid, scale, \
- num, tlb_level); \
+ addr = __TLBI_VADDR_RANGE(start >> shift, asid, \
+ scale, num, tlb_level); \
__tlbi(r##op, addr); \
if (tlbi_user) \
__tlbi_user(r##op, addr); \
start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; \
pages -= __TLBI_RANGE_PAGES(num, scale); \
} \
- scale++; \
+ scale--; \
} \
} while (0)
#define __flush_s2_tlb_range_op(op, start, pages, stride, tlb_level) \
- __flush_tlb_range_op(op, start, pages, stride, 0, tlb_level, false)
+ __flush_tlb_range_op(op, start, pages, stride, 0, tlb_level, false, kvm_lpa2_is_enabled());
static inline void __flush_tlb_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end,
@@ -427,9 +450,11 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
asid = ASID(vma->vm_mm);
if (last_level)
- __flush_tlb_range_op(vale1is, start, pages, stride, asid, tlb_level, true);
+ __flush_tlb_range_op(vale1is, start, pages, stride, asid,
+ tlb_level, true, lpa2_is_enabled());
else
- __flush_tlb_range_op(vae1is, start, pages, stride, asid, tlb_level, true);
+ __flush_tlb_range_op(vae1is, start, pages, stride, asid,
+ tlb_level, true, lpa2_is_enabled());
dsb(ish);
mmu_notifier_arch_invalidate_secondary_tlbs(vma->vm_mm, start, end);
@@ -441,9 +466,10 @@ static inline void flush_tlb_range(struct vm_area_struct *vma,
/*
* We cannot use leaf-only invalidation here, since we may be invalidating
* table entries as part of collapsing hugepages or moving page tables.
- * Set the tlb_level to 0 because we can not get enough information here.
+ * Set the tlb_level to TLBI_TTL_UNKNOWN because we can not get enough
+ * information here.
*/
- __flush_tlb_range(vma, start, end, PAGE_SIZE, false, 0);
+ __flush_tlb_range(vma, start, end, PAGE_SIZE, false, TLBI_TTL_UNKNOWN);
}
static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end)
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 91d2d671496911..01a4c1d7fc09a8 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -1081,25 +1081,6 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info)
if (id_aa64pfr1_mte(info->reg_id_aa64pfr1))
init_cpu_ftr_reg(SYS_GMID_EL1, info->reg_gmid);
-
- /*
- * Initialize the indirect array of CPU capabilities pointers before we
- * handle the boot CPU below.
- */
- init_cpucap_indirect_list();
-
- /*
- * Detect broken pseudo-NMI. Must be called _before_ the call to
- * setup_boot_cpu_capabilities() since it interacts with
- * can_use_gic_priorities().
- */
- detect_system_supports_pseudo_nmi();
-
- /*
- * Detect and enable early CPU capabilities based on the boot CPU,
- * after we have initialised the CPU feature infrastructure.
- */
- setup_boot_cpu_capabilities();
}
static void update_cpu_ftr_reg(struct arm64_ftr_reg *reg, u64 new)
@@ -1584,16 +1565,6 @@ static bool has_useable_gicv3_cpuif(const struct arm64_cpu_capabilities *entry,
return has_sre;
}
-static bool has_no_hw_prefetch(const struct arm64_cpu_capabilities *entry, int __unused)
-{
- u32 midr = read_cpuid_id();
-
- /* Cavium ThunderX pass 1.x and 2.x */
- return midr_is_cpu_model_range(midr, MIDR_THUNDERX,
- MIDR_CPU_VAR_REV(0, 0),
- MIDR_CPU_VAR_REV(1, MIDR_REVISION_MASK));
-}
-
static bool has_cache_idc(const struct arm64_cpu_capabilities *entry,
int scope)
{
@@ -1768,6 +1739,39 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry,
return !meltdown_safe;
}
+#if defined(ID_AA64MMFR0_EL1_TGRAN_LPA2) && defined(ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_LPA2)
+static bool has_lpa2_at_stage1(u64 mmfr0)
+{
+ unsigned int tgran;
+
+ tgran = cpuid_feature_extract_unsigned_field(mmfr0,
+ ID_AA64MMFR0_EL1_TGRAN_SHIFT);
+ return tgran == ID_AA64MMFR0_EL1_TGRAN_LPA2;
+}
+
+static bool has_lpa2_at_stage2(u64 mmfr0)
+{
+ unsigned int tgran;
+
+ tgran = cpuid_feature_extract_unsigned_field(mmfr0,
+ ID_AA64MMFR0_EL1_TGRAN_2_SHIFT);
+ return tgran == ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_LPA2;
+}
+
+static bool has_lpa2(const struct arm64_cpu_capabilities *entry, int scope)
+{
+ u64 mmfr0;
+
+ mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
+ return has_lpa2_at_stage1(mmfr0) && has_lpa2_at_stage2(mmfr0);
+}
+#else
+static bool has_lpa2(const struct arm64_cpu_capabilities *entry, int scope)
+{
+ return false;
+}
+#endif
+
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
#define KPTI_NG_TEMP_VA (-(1UL << PMD_SHIFT))
@@ -1840,7 +1844,7 @@ static int __init __kpti_install_ng_mappings(void *__unused)
static void __init kpti_install_ng_mappings(void)
{
/* Check whether KPTI is going to be used */
- if (!cpus_have_cap(ARM64_UNMAP_KERNEL_AT_EL0))
+ if (!arm64_kernel_unmapped_at_el0())
return;
/*
@@ -2326,12 +2330,6 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
},
#endif /* CONFIG_ARM64_LSE_ATOMICS */
{
- .desc = "Software prefetching using PRFM",
- .capability = ARM64_HAS_NO_HW_PREFETCH,
- .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE,
- .matches = has_no_hw_prefetch,
- },
- {
.desc = "Virtualization Host Extensions",
.capability = ARM64_HAS_VIRT_HOST_EXTN,
.type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE,
@@ -2735,6 +2733,12 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = has_cpuid_feature,
ARM64_CPUID_FIELDS(ID_AA64MMFR2_EL1, EVT, IMP)
},
+ {
+ .desc = "52-bit Virtual Addressing for KVM (LPA2)",
+ .capability = ARM64_HAS_LPA2,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = has_lpa2,
+ },
{},
};
@@ -3275,14 +3279,6 @@ void check_local_cpu_capabilities(void)
verify_local_cpu_capabilities();
}
-static void __init setup_boot_cpu_capabilities(void)
-{
- /* Detect capabilities with either SCOPE_BOOT_CPU or SCOPE_LOCAL_CPU */
- update_cpu_capabilities(SCOPE_BOOT_CPU | SCOPE_LOCAL_CPU);
- /* Enable the SCOPE_BOOT_CPU capabilities alone right away */
- enable_cpu_capabilities(SCOPE_BOOT_CPU);
-}
-
bool this_cpu_has_cap(unsigned int n)
{
if (!WARN_ON(preemptible()) && n < ARM64_NCAPS) {
@@ -3338,37 +3334,52 @@ unsigned long cpu_get_elf_hwcap2(void)
return elf_hwcap[1];
}
-void __init setup_system_features(void)
+static void __init setup_boot_cpu_capabilities(void)
{
- int i;
/*
- * The system-wide safe feature feature register values have been
- * finalized. Finalize and log the available system capabilities.
+ * The boot CPU's feature register values have been recorded. Detect
+ * boot cpucaps and local cpucaps for the boot CPU, then enable and
+ * patch alternatives for the available boot cpucaps.
*/
- update_cpu_capabilities(SCOPE_SYSTEM);
- if (IS_ENABLED(CONFIG_ARM64_SW_TTBR0_PAN) &&
- !cpus_have_cap(ARM64_HAS_PAN))
- pr_info("emulated: Privileged Access Never (PAN) using TTBR0_EL1 switching\n");
+ update_cpu_capabilities(SCOPE_BOOT_CPU | SCOPE_LOCAL_CPU);
+ enable_cpu_capabilities(SCOPE_BOOT_CPU);
+ apply_boot_alternatives();
+}
+void __init setup_boot_cpu_features(void)
+{
/*
- * Enable all the available capabilities which have not been enabled
- * already.
+ * Initialize the indirect array of CPU capabilities pointers before we
+ * handle the boot CPU.
*/
- enable_cpu_capabilities(SCOPE_ALL & ~SCOPE_BOOT_CPU);
+ init_cpucap_indirect_list();
- kpti_install_ng_mappings();
+ /*
+ * Detect broken pseudo-NMI. Must be called _before_ the call to
+ * setup_boot_cpu_capabilities() since it interacts with
+ * can_use_gic_priorities().
+ */
+ detect_system_supports_pseudo_nmi();
- sve_setup();
- sme_setup();
+ setup_boot_cpu_capabilities();
+}
+static void __init setup_system_capabilities(void)
+{
/*
- * Check for sane CTR_EL0.CWG value.
+ * The system-wide safe feature register values have been finalized.
+ * Detect, enable, and patch alternatives for the available system
+ * cpucaps.
*/
- if (!cache_type_cwg())
- pr_warn("No Cache Writeback Granule information, assuming %d\n",
- ARCH_DMA_MINALIGN);
+ update_cpu_capabilities(SCOPE_SYSTEM);
+ enable_cpu_capabilities(SCOPE_ALL & ~SCOPE_BOOT_CPU);
+ apply_alternatives_all();
- for (i = 0; i < ARM64_NCAPS; i++) {
+ /*
+ * Log any cpucaps with a cpumask as these aren't logged by
+ * update_cpu_capabilities().
+ */
+ for (int i = 0; i < ARM64_NCAPS; i++) {
const struct arm64_cpu_capabilities *caps = cpucap_ptrs[i];
if (caps && caps->cpus && caps->desc &&
@@ -3376,6 +3387,29 @@ void __init setup_system_features(void)
pr_info("detected: %s on CPU%*pbl\n",
caps->desc, cpumask_pr_args(caps->cpus));
}
+
+ /*
+ * TTBR0 PAN doesn't have its own cpucap, so log it manually.
+ */
+ if (system_uses_ttbr0_pan())
+ pr_info("emulated: Privileged Access Never (PAN) using TTBR0_EL1 switching\n");
+}
+
+void __init setup_system_features(void)
+{
+ setup_system_capabilities();
+
+ kpti_install_ng_mappings();
+
+ sve_setup();
+ sme_setup();
+
+ /*
+ * Check for sane CTR_EL0.CWG value.
+ */
+ if (!cache_type_cwg())
+ pr_warn("No Cache Writeback Granule information, assuming %d\n",
+ ARCH_DMA_MINALIGN);
}
void __init setup_user_features(void)
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index a257da7b56fe6a..47043c0d95ec3d 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -36,8 +36,6 @@ static struct cpuinfo_arm64 boot_cpu_data;
static inline const char *icache_policy_str(int l1ip)
{
switch (l1ip) {
- case CTR_EL0_L1Ip_VPIPT:
- return "VPIPT";
case CTR_EL0_L1Ip_VIPT:
return "VIPT";
case CTR_EL0_L1Ip_PIPT:
@@ -388,9 +386,6 @@ static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info)
switch (l1ip) {
case CTR_EL0_L1Ip_PIPT:
break;
- case CTR_EL0_L1Ip_VPIPT:
- set_bit(ICACHEF_VPIPT, &__icache_flags);
- break;
case CTR_EL0_L1Ip_VIPT:
default:
/* Assume aliasing */
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 1559c706d32d1d..505f389be3e0df 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -85,13 +85,13 @@
* softirq kicks in. Upon vcpu_put(), KVM will save the vcpu FP state and
* flag the register state as invalid.
*
- * In order to allow softirq handlers to use FPSIMD, kernel_neon_begin() may
- * save the task's FPSIMD context back to task_struct from softirq context.
- * To prevent this from racing with the manipulation of the task's FPSIMD state
- * from task context and thereby corrupting the state, it is necessary to
- * protect any manipulation of a task's fpsimd_state or TIF_FOREIGN_FPSTATE
- * flag with {, __}get_cpu_fpsimd_context(). This will still allow softirqs to
- * run but prevent them to use FPSIMD.
+ * In order to allow softirq handlers to use FPSIMD, kernel_neon_begin() may be
+ * called from softirq context, which will save the task's FPSIMD context back
+ * to task_struct. To prevent this from racing with the manipulation of the
+ * task's FPSIMD state from task context and thereby corrupting the state, it
+ * is necessary to protect any manipulation of a task's fpsimd_state or
+ * TIF_FOREIGN_FPSTATE flag with get_cpu_fpsimd_context(), which will suspend
+ * softirq servicing entirely until put_cpu_fpsimd_context() is called.
*
* For a certain task, the sequence may look something like this:
* - the task gets scheduled in; if both the task's fpsimd_cpu field
@@ -209,27 +209,14 @@ static inline void sme_free(struct task_struct *t) { }
#endif
-DEFINE_PER_CPU(bool, fpsimd_context_busy);
-EXPORT_PER_CPU_SYMBOL(fpsimd_context_busy);
-
static void fpsimd_bind_task_to_cpu(void);
-static void __get_cpu_fpsimd_context(void)
-{
- bool busy = __this_cpu_xchg(fpsimd_context_busy, true);
-
- WARN_ON(busy);
-}
-
/*
* Claim ownership of the CPU FPSIMD context for use by the calling context.
*
* The caller may freely manipulate the FPSIMD context metadata until
* put_cpu_fpsimd_context() is called.
*
- * The double-underscore version must only be called if you know the task
- * can't be preempted.
- *
* On RT kernels local_bh_disable() is not sufficient because it only
* serializes soft interrupt related sections via a local lock, but stays
* preemptible. Disabling preemption is the right choice here as bottom
@@ -242,14 +229,6 @@ static void get_cpu_fpsimd_context(void)
local_bh_disable();
else
preempt_disable();
- __get_cpu_fpsimd_context();
-}
-
-static void __put_cpu_fpsimd_context(void)
-{
- bool busy = __this_cpu_xchg(fpsimd_context_busy, false);
-
- WARN_ON(!busy); /* No matching get_cpu_fpsimd_context()? */
}
/*
@@ -261,18 +240,12 @@ static void __put_cpu_fpsimd_context(void)
*/
static void put_cpu_fpsimd_context(void)
{
- __put_cpu_fpsimd_context();
if (!IS_ENABLED(CONFIG_PREEMPT_RT))
local_bh_enable();
else
preempt_enable();
}
-static bool have_cpu_fpsimd_context(void)
-{
- return !preemptible() && __this_cpu_read(fpsimd_context_busy);
-}
-
unsigned int task_get_vl(const struct task_struct *task, enum vec_type type)
{
return task->thread.vl[type];
@@ -383,7 +356,8 @@ static void task_fpsimd_load(void)
bool restore_ffr;
WARN_ON(!system_supports_fpsimd());
- WARN_ON(!have_cpu_fpsimd_context());
+ WARN_ON(preemptible());
+ WARN_ON(test_thread_flag(TIF_KERNEL_FPSTATE));
if (system_supports_sve() || system_supports_sme()) {
switch (current->thread.fp_type) {
@@ -406,7 +380,7 @@ static void task_fpsimd_load(void)
default:
/*
* This indicates either a bug in
- * fpsimd_save() or memory corruption, we
+ * fpsimd_save_user_state() or memory corruption, we
* should always record an explicit format
* when we save. We always at least have the
* memory allocated for FPSMID registers so
@@ -457,7 +431,7 @@ static void task_fpsimd_load(void)
* than via current, if we are saving KVM state then it will have
* ensured that the type of registers to save is set in last->to_save.
*/
-static void fpsimd_save(void)
+static void fpsimd_save_user_state(void)
{
struct cpu_fp_state const *last =
this_cpu_ptr(&fpsimd_last_state);
@@ -467,7 +441,7 @@ static void fpsimd_save(void)
unsigned int vl;
WARN_ON(!system_supports_fpsimd());
- WARN_ON(!have_cpu_fpsimd_context());
+ WARN_ON(preemptible());
if (test_thread_flag(TIF_FOREIGN_FPSTATE))
return;
@@ -888,7 +862,7 @@ int vec_set_vector_length(struct task_struct *task, enum vec_type type,
if (task == current) {
get_cpu_fpsimd_context();
- fpsimd_save();
+ fpsimd_save_user_state();
}
fpsimd_flush_task_state(task);
@@ -1171,7 +1145,7 @@ void __init sve_setup(void)
unsigned long b;
int max_bit;
- if (!cpus_have_cap(ARM64_SVE))
+ if (!system_supports_sve())
return;
/*
@@ -1301,7 +1275,7 @@ void __init sme_setup(void)
struct vl_info *info = &vl_info[ARM64_VEC_SME];
int min_bit, max_bit;
- if (!cpus_have_cap(ARM64_SME))
+ if (!system_supports_sme())
return;
/*
@@ -1500,6 +1474,34 @@ void do_fpsimd_exc(unsigned long esr, struct pt_regs *regs)
current);
}
+static void fpsimd_load_kernel_state(struct task_struct *task)
+{
+ struct cpu_fp_state *last = this_cpu_ptr(&fpsimd_last_state);
+
+ /*
+ * Elide the load if this CPU holds the most recent kernel mode
+ * FPSIMD context of the current task.
+ */
+ if (last->st == &task->thread.kernel_fpsimd_state &&
+ task->thread.kernel_fpsimd_cpu == smp_processor_id())
+ return;
+
+ fpsimd_load_state(&task->thread.kernel_fpsimd_state);
+}
+
+static void fpsimd_save_kernel_state(struct task_struct *task)
+{
+ struct cpu_fp_state cpu_fp_state = {
+ .st = &task->thread.kernel_fpsimd_state,
+ .to_save = FP_STATE_FPSIMD,
+ };
+
+ fpsimd_save_state(&task->thread.kernel_fpsimd_state);
+ fpsimd_bind_state_to_cpu(&cpu_fp_state);
+
+ task->thread.kernel_fpsimd_cpu = smp_processor_id();
+}
+
void fpsimd_thread_switch(struct task_struct *next)
{
bool wrong_task, wrong_cpu;
@@ -1507,24 +1509,31 @@ void fpsimd_thread_switch(struct task_struct *next)
if (!system_supports_fpsimd())
return;
- __get_cpu_fpsimd_context();
+ WARN_ON_ONCE(!irqs_disabled());
/* Save unsaved fpsimd state, if any: */
- fpsimd_save();
-
- /*
- * Fix up TIF_FOREIGN_FPSTATE to correctly describe next's
- * state. For kernel threads, FPSIMD registers are never loaded
- * and wrong_task and wrong_cpu will always be true.
- */
- wrong_task = __this_cpu_read(fpsimd_last_state.st) !=
- &next->thread.uw.fpsimd_state;
- wrong_cpu = next->thread.fpsimd_cpu != smp_processor_id();
+ if (test_thread_flag(TIF_KERNEL_FPSTATE))
+ fpsimd_save_kernel_state(current);
+ else
+ fpsimd_save_user_state();
- update_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE,
- wrong_task || wrong_cpu);
+ if (test_tsk_thread_flag(next, TIF_KERNEL_FPSTATE)) {
+ fpsimd_load_kernel_state(next);
+ set_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE);
+ } else {
+ /*
+ * Fix up TIF_FOREIGN_FPSTATE to correctly describe next's
+ * state. For kernel threads, FPSIMD registers are never
+ * loaded with user mode FPSIMD state and so wrong_task and
+ * wrong_cpu will always be true.
+ */
+ wrong_task = __this_cpu_read(fpsimd_last_state.st) !=
+ &next->thread.uw.fpsimd_state;
+ wrong_cpu = next->thread.fpsimd_cpu != smp_processor_id();
- __put_cpu_fpsimd_context();
+ update_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE,
+ wrong_task || wrong_cpu);
+ }
}
static void fpsimd_flush_thread_vl(enum vec_type type)
@@ -1614,7 +1623,7 @@ void fpsimd_preserve_current_state(void)
return;
get_cpu_fpsimd_context();
- fpsimd_save();
+ fpsimd_save_user_state();
put_cpu_fpsimd_context();
}
@@ -1826,13 +1835,15 @@ static void fpsimd_flush_cpu_state(void)
*/
void fpsimd_save_and_flush_cpu_state(void)
{
+ unsigned long flags;
+
if (!system_supports_fpsimd())
return;
WARN_ON(preemptible());
- __get_cpu_fpsimd_context();
- fpsimd_save();
+ local_irq_save(flags);
+ fpsimd_save_user_state();
fpsimd_flush_cpu_state();
- __put_cpu_fpsimd_context();
+ local_irq_restore(flags);
}
#ifdef CONFIG_KERNEL_MODE_NEON
@@ -1864,10 +1875,37 @@ void kernel_neon_begin(void)
get_cpu_fpsimd_context();
/* Save unsaved fpsimd state, if any: */
- fpsimd_save();
+ if (test_thread_flag(TIF_KERNEL_FPSTATE)) {
+ BUG_ON(IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq());
+ fpsimd_save_kernel_state(current);
+ } else {
+ fpsimd_save_user_state();
+
+ /*
+ * Set the thread flag so that the kernel mode FPSIMD state
+ * will be context switched along with the rest of the task
+ * state.
+ *
+ * On non-PREEMPT_RT, softirqs may interrupt task level kernel
+ * mode FPSIMD, but the task will not be preemptible so setting
+ * TIF_KERNEL_FPSTATE for those would be both wrong (as it
+ * would mark the task context FPSIMD state as requiring a
+ * context switch) and unnecessary.
+ *
+ * On PREEMPT_RT, softirqs are serviced from a separate thread,
+ * which is scheduled as usual, and this guarantees that these
+ * softirqs are not interrupting use of the FPSIMD in kernel
+ * mode in task context. So in this case, setting the flag here
+ * is always appropriate.
+ */
+ if (IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq())
+ set_thread_flag(TIF_KERNEL_FPSTATE);
+ }
/* Invalidate any task state remaining in the fpsimd regs: */
fpsimd_flush_cpu_state();
+
+ put_cpu_fpsimd_context();
}
EXPORT_SYMBOL_GPL(kernel_neon_begin);
@@ -1885,7 +1923,16 @@ void kernel_neon_end(void)
if (!system_supports_fpsimd())
return;
- put_cpu_fpsimd_context();
+ /*
+ * If we are returning from a nested use of kernel mode FPSIMD, restore
+ * the task context kernel mode FPSIMD state. This can only happen when
+ * running in softirq context on non-PREEMPT_RT.
+ */
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT) && in_serving_softirq() &&
+ test_thread_flag(TIF_KERNEL_FPSTATE))
+ fpsimd_load_kernel_state(current);
+ else
+ clear_thread_flag(TIF_KERNEL_FPSTATE);
}
EXPORT_SYMBOL_GPL(kernel_neon_end);
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 7b236994f0e150..cab7f91949d8f5 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -482,7 +482,7 @@ SYM_FUNC_START_LOCAL(__primary_switched)
str_l x21, __fdt_pointer, x5 // Save FDT pointer
- ldr_l x4, kimage_vaddr // Save the offset between
+ adrp x4, _text // Save the offset between
sub x4, x4, x0 // the kernel virtual and
str_l x4, kimage_voffset, x5 // physical mappings
diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c
index 3addc09f874615..e30fd9e32ef3a1 100644
--- a/arch/arm64/kernel/idreg-override.c
+++ b/arch/arm64/kernel/idreg-override.c
@@ -21,14 +21,25 @@
static u64 __boot_status __initdata;
+// temporary __prel64 related definitions
+// to be removed when this code is moved under pi/
+
+#define __prel64_initconst __initconst
+
+#define PREL64(type, name) union { type *name; }
+
+#define prel64_pointer(__d) (__d)
+
+typedef bool filter_t(u64 val);
+
struct ftr_set_desc {
char name[FTR_DESC_NAME_LEN];
- struct arm64_ftr_override *override;
+ PREL64(struct arm64_ftr_override, override);
struct {
char name[FTR_DESC_FIELD_LEN];
u8 shift;
u8 width;
- bool (*filter)(u64 val);
+ PREL64(filter_t, filter);
} fields[];
};
@@ -46,7 +57,7 @@ static bool __init mmfr1_vh_filter(u64 val)
val == 0);
}
-static const struct ftr_set_desc mmfr1 __initconst = {
+static const struct ftr_set_desc mmfr1 __prel64_initconst = {
.name = "id_aa64mmfr1",
.override = &id_aa64mmfr1_override,
.fields = {
@@ -70,7 +81,7 @@ static bool __init pfr0_sve_filter(u64 val)
return true;
}
-static const struct ftr_set_desc pfr0 __initconst = {
+static const struct ftr_set_desc pfr0 __prel64_initconst = {
.name = "id_aa64pfr0",
.override = &id_aa64pfr0_override,
.fields = {
@@ -94,7 +105,7 @@ static bool __init pfr1_sme_filter(u64 val)
return true;
}
-static const struct ftr_set_desc pfr1 __initconst = {
+static const struct ftr_set_desc pfr1 __prel64_initconst = {
.name = "id_aa64pfr1",
.override = &id_aa64pfr1_override,
.fields = {
@@ -105,7 +116,7 @@ static const struct ftr_set_desc pfr1 __initconst = {
},
};
-static const struct ftr_set_desc isar1 __initconst = {
+static const struct ftr_set_desc isar1 __prel64_initconst = {
.name = "id_aa64isar1",
.override = &id_aa64isar1_override,
.fields = {
@@ -117,7 +128,7 @@ static const struct ftr_set_desc isar1 __initconst = {
},
};
-static const struct ftr_set_desc isar2 __initconst = {
+static const struct ftr_set_desc isar2 __prel64_initconst = {
.name = "id_aa64isar2",
.override = &id_aa64isar2_override,
.fields = {
@@ -128,7 +139,7 @@ static const struct ftr_set_desc isar2 __initconst = {
},
};
-static const struct ftr_set_desc smfr0 __initconst = {
+static const struct ftr_set_desc smfr0 __prel64_initconst = {
.name = "id_aa64smfr0",
.override = &id_aa64smfr0_override,
.fields = {
@@ -149,7 +160,7 @@ static bool __init hvhe_filter(u64 val)
ID_AA64MMFR1_EL1_VH_SHIFT));
}
-static const struct ftr_set_desc sw_features __initconst = {
+static const struct ftr_set_desc sw_features __prel64_initconst = {
.name = "arm64_sw",
.override = &arm64_sw_feature_override,
.fields = {
@@ -159,22 +170,23 @@ static const struct ftr_set_desc sw_features __initconst = {
},
};
-static const struct ftr_set_desc * const regs[] __initconst = {
- &mmfr1,
- &pfr0,
- &pfr1,
- &isar1,
- &isar2,
- &smfr0,
- &sw_features,
+static const
+PREL64(const struct ftr_set_desc, reg) regs[] __prel64_initconst = {
+ { &mmfr1 },
+ { &pfr0 },
+ { &pfr1 },
+ { &isar1 },
+ { &isar2 },
+ { &smfr0 },
+ { &sw_features },
};
static const struct {
char alias[FTR_ALIAS_NAME_LEN];
char feature[FTR_ALIAS_OPTION_LEN];
} aliases[] __initconst = {
- { "kvm-arm.mode=nvhe", "id_aa64mmfr1.vh=0" },
- { "kvm-arm.mode=protected", "id_aa64mmfr1.vh=0" },
+ { "kvm_arm.mode=nvhe", "id_aa64mmfr1.vh=0" },
+ { "kvm_arm.mode=protected", "id_aa64mmfr1.vh=0" },
{ "arm64.nosve", "id_aa64pfr0.sve=0" },
{ "arm64.nosme", "id_aa64pfr1.sme=0" },
{ "arm64.nobti", "id_aa64pfr1.bt=0" },
@@ -187,45 +199,61 @@ static const struct {
{ "nokaslr", "arm64_sw.nokaslr=1" },
};
-static int __init parse_nokaslr(char *unused)
+static int __init parse_hexdigit(const char *p, u64 *v)
{
- /* nokaslr param handling is done by early cpufeature code */
+ // skip "0x" if it comes next
+ if (p[0] == '0' && tolower(p[1]) == 'x')
+ p += 2;
+
+ // check whether the RHS is a single hex digit
+ if (!isxdigit(p[0]) || (p[1] && !isspace(p[1])))
+ return -EINVAL;
+
+ *v = tolower(*p) - (isdigit(*p) ? '0' : 'a' - 10);
return 0;
}
-early_param("nokaslr", parse_nokaslr);
-static int __init find_field(const char *cmdline,
+static int __init find_field(const char *cmdline, char *opt, int len,
const struct ftr_set_desc *reg, int f, u64 *v)
{
- char opt[FTR_DESC_NAME_LEN + FTR_DESC_FIELD_LEN + 2];
- int len;
+ int flen = strlen(reg->fields[f].name);
- len = snprintf(opt, ARRAY_SIZE(opt), "%s.%s=",
- reg->name, reg->fields[f].name);
+ // append '<fieldname>=' to obtain '<name>.<fieldname>='
+ memcpy(opt + len, reg->fields[f].name, flen);
+ len += flen;
+ opt[len++] = '=';
- if (!parameqn(cmdline, opt, len))
+ if (memcmp(cmdline, opt, len))
return -1;
- return kstrtou64(cmdline + len, 0, v);
+ return parse_hexdigit(cmdline + len, v);
}
static void __init match_options(const char *cmdline)
{
+ char opt[FTR_DESC_NAME_LEN + FTR_DESC_FIELD_LEN + 2];
int i;
for (i = 0; i < ARRAY_SIZE(regs); i++) {
+ const struct ftr_set_desc *reg = prel64_pointer(regs[i].reg);
+ struct arm64_ftr_override *override;
+ int len = strlen(reg->name);
int f;
- if (!regs[i]->override)
- continue;
+ override = prel64_pointer(reg->override);
- for (f = 0; strlen(regs[i]->fields[f].name); f++) {
- u64 shift = regs[i]->fields[f].shift;
- u64 width = regs[i]->fields[f].width ?: 4;
+ // set opt[] to '<name>.'
+ memcpy(opt, reg->name, len);
+ opt[len++] = '.';
+
+ for (f = 0; reg->fields[f].name[0] != '\0'; f++) {
+ u64 shift = reg->fields[f].shift;
+ u64 width = reg->fields[f].width ?: 4;
u64 mask = GENMASK_ULL(shift + width - 1, shift);
+ bool (*filter)(u64 val);
u64 v;
- if (find_field(cmdline, regs[i], f, &v))
+ if (find_field(cmdline, opt, len, reg, f, &v))
continue;
/*
@@ -233,16 +261,16 @@ static void __init match_options(const char *cmdline)
* it by setting the value to the all-ones while
* clearing the mask... Yes, this is fragile.
*/
- if (regs[i]->fields[f].filter &&
- !regs[i]->fields[f].filter(v)) {
- regs[i]->override->val |= mask;
- regs[i]->override->mask &= ~mask;
+ filter = prel64_pointer(reg->fields[f].filter);
+ if (filter && !filter(v)) {
+ override->val |= mask;
+ override->mask &= ~mask;
continue;
}
- regs[i]->override->val &= ~mask;
- regs[i]->override->val |= (v << shift) & mask;
- regs[i]->override->mask |= mask;
+ override->val &= ~mask;
+ override->val |= (v << shift) & mask;
+ override->mask |= mask;
return;
}
@@ -258,23 +286,29 @@ static __init void __parse_cmdline(const char *cmdline, bool parse_aliases)
cmdline = skip_spaces(cmdline);
- for (len = 0; cmdline[len] && !isspace(cmdline[len]); len++);
- if (!len)
+ /* terminate on "--" appearing on the command line by itself */
+ if (cmdline[0] == '-' && cmdline[1] == '-' && isspace(cmdline[2]))
return;
- len = min(len, ARRAY_SIZE(buf) - 1);
- memcpy(buf, cmdline, len);
- buf[len] = '\0';
-
- if (strcmp(buf, "--") == 0)
+ for (len = 0; cmdline[len] && !isspace(cmdline[len]); len++) {
+ if (len >= sizeof(buf) - 1)
+ break;
+ if (cmdline[len] == '-')
+ buf[len] = '_';
+ else
+ buf[len] = cmdline[len];
+ }
+ if (!len)
return;
+ buf[len] = 0;
+
cmdline += len;
match_options(buf);
for (i = 0; parse_aliases && i < ARRAY_SIZE(aliases); i++)
- if (parameq(buf, aliases[i].alias))
+ if (!memcmp(buf, aliases[i].alias, len + 1))
__parse_cmdline(aliases[i].feature, false);
} while (1);
}
@@ -316,13 +350,16 @@ void init_feature_override(u64 boot_status);
asmlinkage void __init init_feature_override(u64 boot_status)
{
+ struct arm64_ftr_override *override;
+ const struct ftr_set_desc *reg;
int i;
for (i = 0; i < ARRAY_SIZE(regs); i++) {
- if (regs[i]->override) {
- regs[i]->override->val = 0;
- regs[i]->override->mask = 0;
- }
+ reg = prel64_pointer(regs[i].reg);
+ override = prel64_pointer(reg->override);
+
+ override->val = 0;
+ override->mask = 0;
}
__boot_status = boot_status;
@@ -330,9 +367,9 @@ asmlinkage void __init init_feature_override(u64 boot_status)
parse_cmdline();
for (i = 0; i < ARRAY_SIZE(regs); i++) {
- if (regs[i]->override)
- dcache_clean_inval_poc((unsigned long)regs[i]->override,
- (unsigned long)regs[i]->override +
- sizeof(*regs[i]->override));
+ reg = prel64_pointer(regs[i].reg);
+ override = prel64_pointer(reg->override);
+ dcache_clean_inval_poc((unsigned long)override,
+ (unsigned long)(override + 1));
}
}
diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c
index 6ad5c6ef532962..85087e2df56498 100644
--- a/arch/arm64/kernel/irq.c
+++ b/arch/arm64/kernel/irq.c
@@ -22,6 +22,7 @@
#include <linux/vmalloc.h>
#include <asm/daifflags.h>
#include <asm/exception.h>
+#include <asm/numa.h>
#include <asm/softirq_stack.h>
#include <asm/stacktrace.h>
#include <asm/vmap_stack.h>
@@ -47,17 +48,17 @@ static void init_irq_scs(void)
for_each_possible_cpu(cpu)
per_cpu(irq_shadow_call_stack_ptr, cpu) =
- scs_alloc(cpu_to_node(cpu));
+ scs_alloc(early_cpu_to_node(cpu));
}
#ifdef CONFIG_VMAP_STACK
-static void init_irq_stacks(void)
+static void __init init_irq_stacks(void)
{
int cpu;
unsigned long *p;
for_each_possible_cpu(cpu) {
- p = arch_alloc_vmap_stack(IRQ_STACK_SIZE, cpu_to_node(cpu));
+ p = arch_alloc_vmap_stack(IRQ_STACK_SIZE, early_cpu_to_node(cpu));
per_cpu(irq_stack_ptr, cpu) = p;
}
}
diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c
index 94a269cd1f07a0..12c7f3c8ba764f 100644
--- a/arch/arm64/kernel/kaslr.c
+++ b/arch/arm64/kernel/kaslr.c
@@ -36,3 +36,10 @@ void __init kaslr_init(void)
pr_info("KASLR enabled\n");
__kaslr_is_enabled = true;
}
+
+static int __init parse_nokaslr(char *unused)
+{
+ /* nokaslr param handling is done by early cpufeature code */
+ return 0;
+}
+early_param("nokaslr", parse_nokaslr);
diff --git a/arch/arm64/kernel/pi/Makefile b/arch/arm64/kernel/pi/Makefile
index 4c0ea3cd4ea406..c844a0546d7f0e 100644
--- a/arch/arm64/kernel/pi/Makefile
+++ b/arch/arm64/kernel/pi/Makefile
@@ -3,6 +3,7 @@
KBUILD_CFLAGS := $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) -fpie \
-Os -DDISABLE_BRANCH_PROFILING $(DISABLE_STACKLEAK_PLUGIN) \
+ $(DISABLE_LATENT_ENTROPY_PLUGIN) \
$(call cc-option,-mbranch-protection=none) \
-I$(srctree)/scripts/dtc/libfdt -fno-stack-protector \
-include $(srctree)/include/linux/hidden.h \
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index defbab84e9e5c7..4ced34f62dab59 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -439,9 +439,8 @@ static void __init hyp_mode_check(void)
void __init smp_cpus_done(unsigned int max_cpus)
{
pr_info("SMP: Total of %d processors activated.\n", num_online_cpus());
- setup_system_features();
hyp_mode_check();
- apply_alternatives_all();
+ setup_system_features();
setup_user_features();
mark_linear_text_alias_ro();
}
@@ -454,14 +453,9 @@ void __init smp_prepare_boot_cpu(void)
* freed shortly, so we must move over to the runtime per-cpu area.
*/
set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
- cpuinfo_store_boot_cpu();
- /*
- * We now know enough about the boot CPU to apply the
- * alternatives that cannot wait until interrupt handling
- * and/or scheduling is enabled.
- */
- apply_boot_alternatives();
+ cpuinfo_store_boot_cpu();
+ setup_boot_cpu_features();
/* Conditionally switch to GIC PMR for interrupt masking */
if (system_uses_irq_prio_masking())
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index 17f66a74c745c8..7f88028a00c02c 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -8,6 +8,7 @@
#include <linux/efi.h>
#include <linux/export.h>
#include <linux/ftrace.h>
+#include <linux/kprobes.h>
#include <linux/sched.h>
#include <linux/sched/debug.h>
#include <linux/sched/task_stack.h>
@@ -19,6 +20,31 @@
#include <asm/stacktrace.h>
/*
+ * Kernel unwind state
+ *
+ * @common: Common unwind state.
+ * @task: The task being unwound.
+ * @kr_cur: When KRETPROBES is selected, holds the kretprobe instance
+ * associated with the most recently encountered replacement lr
+ * value.
+ */
+struct kunwind_state {
+ struct unwind_state common;
+ struct task_struct *task;
+#ifdef CONFIG_KRETPROBES
+ struct llist_node *kr_cur;
+#endif
+};
+
+static __always_inline void
+kunwind_init(struct kunwind_state *state,
+ struct task_struct *task)
+{
+ unwind_init_common(&state->common);
+ state->task = task;
+}
+
+/*
* Start an unwind from a pt_regs.
*
* The unwind will begin at the PC within the regs.
@@ -26,13 +52,13 @@
* The regs must be on a stack currently owned by the calling task.
*/
static __always_inline void
-unwind_init_from_regs(struct unwind_state *state,
- struct pt_regs *regs)
+kunwind_init_from_regs(struct kunwind_state *state,
+ struct pt_regs *regs)
{
- unwind_init_common(state, current);
+ kunwind_init(state, current);
- state->fp = regs->regs[29];
- state->pc = regs->pc;
+ state->common.fp = regs->regs[29];
+ state->common.pc = regs->pc;
}
/*
@@ -44,12 +70,12 @@ unwind_init_from_regs(struct unwind_state *state,
* The function which invokes this must be noinline.
*/
static __always_inline void
-unwind_init_from_caller(struct unwind_state *state)
+kunwind_init_from_caller(struct kunwind_state *state)
{
- unwind_init_common(state, current);
+ kunwind_init(state, current);
- state->fp = (unsigned long)__builtin_frame_address(1);
- state->pc = (unsigned long)__builtin_return_address(0);
+ state->common.fp = (unsigned long)__builtin_frame_address(1);
+ state->common.pc = (unsigned long)__builtin_return_address(0);
}
/*
@@ -63,35 +89,38 @@ unwind_init_from_caller(struct unwind_state *state)
* call this for the current task.
*/
static __always_inline void
-unwind_init_from_task(struct unwind_state *state,
- struct task_struct *task)
+kunwind_init_from_task(struct kunwind_state *state,
+ struct task_struct *task)
{
- unwind_init_common(state, task);
+ kunwind_init(state, task);
- state->fp = thread_saved_fp(task);
- state->pc = thread_saved_pc(task);
+ state->common.fp = thread_saved_fp(task);
+ state->common.pc = thread_saved_pc(task);
}
static __always_inline int
-unwind_recover_return_address(struct unwind_state *state)
+kunwind_recover_return_address(struct kunwind_state *state)
{
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
if (state->task->ret_stack &&
- (state->pc == (unsigned long)return_to_handler)) {
+ (state->common.pc == (unsigned long)return_to_handler)) {
unsigned long orig_pc;
- orig_pc = ftrace_graph_ret_addr(state->task, NULL, state->pc,
- (void *)state->fp);
- if (WARN_ON_ONCE(state->pc == orig_pc))
+ orig_pc = ftrace_graph_ret_addr(state->task, NULL,
+ state->common.pc,
+ (void *)state->common.fp);
+ if (WARN_ON_ONCE(state->common.pc == orig_pc))
return -EINVAL;
- state->pc = orig_pc;
+ state->common.pc = orig_pc;
}
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
#ifdef CONFIG_KRETPROBES
- if (is_kretprobe_trampoline(state->pc)) {
- state->pc = kretprobe_find_ret_addr(state->task,
- (void *)state->fp,
- &state->kr_cur);
+ if (is_kretprobe_trampoline(state->common.pc)) {
+ unsigned long orig_pc;
+ orig_pc = kretprobe_find_ret_addr(state->task,
+ (void *)state->common.fp,
+ &state->kr_cur);
+ state->common.pc = orig_pc;
}
#endif /* CONFIG_KRETPROBES */
@@ -106,38 +135,40 @@ unwind_recover_return_address(struct unwind_state *state)
* and the location (but not the fp value) of B.
*/
static __always_inline int
-unwind_next(struct unwind_state *state)
+kunwind_next(struct kunwind_state *state)
{
struct task_struct *tsk = state->task;
- unsigned long fp = state->fp;
+ unsigned long fp = state->common.fp;
int err;
/* Final frame; nothing to unwind */
if (fp == (unsigned long)task_pt_regs(tsk)->stackframe)
return -ENOENT;
- err = unwind_next_frame_record(state);
+ err = unwind_next_frame_record(&state->common);
if (err)
return err;
- state->pc = ptrauth_strip_kernel_insn_pac(state->pc);
+ state->common.pc = ptrauth_strip_kernel_insn_pac(state->common.pc);
- return unwind_recover_return_address(state);
+ return kunwind_recover_return_address(state);
}
+typedef bool (*kunwind_consume_fn)(const struct kunwind_state *state, void *cookie);
+
static __always_inline void
-unwind(struct unwind_state *state, stack_trace_consume_fn consume_entry,
- void *cookie)
+do_kunwind(struct kunwind_state *state, kunwind_consume_fn consume_state,
+ void *cookie)
{
- if (unwind_recover_return_address(state))
+ if (kunwind_recover_return_address(state))
return;
while (1) {
int ret;
- if (!consume_entry(cookie, state->pc))
+ if (!consume_state(state, cookie))
break;
- ret = unwind_next(state);
+ ret = kunwind_next(state);
if (ret < 0)
break;
}
@@ -172,9 +203,10 @@ unwind(struct unwind_state *state, stack_trace_consume_fn consume_entry,
: stackinfo_get_unknown(); \
})
-noinline noinstr void arch_stack_walk(stack_trace_consume_fn consume_entry,
- void *cookie, struct task_struct *task,
- struct pt_regs *regs)
+static __always_inline void
+kunwind_stack_walk(kunwind_consume_fn consume_state,
+ void *cookie, struct task_struct *task,
+ struct pt_regs *regs)
{
struct stack_info stacks[] = {
stackinfo_get_task(task),
@@ -190,22 +222,48 @@ noinline noinstr void arch_stack_walk(stack_trace_consume_fn consume_entry,
STACKINFO_EFI,
#endif
};
- struct unwind_state state = {
- .stacks = stacks,
- .nr_stacks = ARRAY_SIZE(stacks),
+ struct kunwind_state state = {
+ .common = {
+ .stacks = stacks,
+ .nr_stacks = ARRAY_SIZE(stacks),
+ },
};
if (regs) {
if (task != current)
return;
- unwind_init_from_regs(&state, regs);
+ kunwind_init_from_regs(&state, regs);
} else if (task == current) {
- unwind_init_from_caller(&state);
+ kunwind_init_from_caller(&state);
} else {
- unwind_init_from_task(&state, task);
+ kunwind_init_from_task(&state, task);
}
- unwind(&state, consume_entry, cookie);
+ do_kunwind(&state, consume_state, cookie);
+}
+
+struct kunwind_consume_entry_data {
+ stack_trace_consume_fn consume_entry;
+ void *cookie;
+};
+
+static bool
+arch_kunwind_consume_entry(const struct kunwind_state *state, void *cookie)
+{
+ struct kunwind_consume_entry_data *data = cookie;
+ return data->consume_entry(data->cookie, state->common.pc);
+}
+
+noinline noinstr void arch_stack_walk(stack_trace_consume_fn consume_entry,
+ void *cookie, struct task_struct *task,
+ struct pt_regs *regs)
+{
+ struct kunwind_consume_entry_data data = {
+ .consume_entry = consume_entry,
+ .cookie = cookie,
+ };
+
+ kunwind_stack_walk(arch_kunwind_consume_entry, &data, task, regs);
}
static bool dump_backtrace_entry(void *arg, unsigned long where)
diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile
index 1f911a76c5af39..2266fcdff78a07 100644
--- a/arch/arm64/kernel/vdso32/Makefile
+++ b/arch/arm64/kernel/vdso32/Makefile
@@ -118,7 +118,7 @@ endif
VDSO_CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os
# Build rules
-targets := $(c-obj-vdso) $(c-obj-vdso-gettimeofday) $(asm-obj-vdso) vdso.so vdso.so.dbg vdso.so.raw
+targets := $(c-obj-vdso) $(c-obj-vdso-gettimeofday) $(asm-obj-vdso) vdso.so vdso32.so.dbg vdso.so.raw
c-obj-vdso := $(addprefix $(obj)/, $(c-obj-vdso))
c-obj-vdso-gettimeofday := $(addprefix $(obj)/, $(c-obj-vdso-gettimeofday))
asm-obj-vdso := $(addprefix $(obj)/, $(asm-obj-vdso))
@@ -127,15 +127,15 @@ obj-vdso := $(c-obj-vdso) $(c-obj-vdso-gettimeofday) $(asm-obj-vdso)
targets += vdso.lds
CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
-include/generated/vdso32-offsets.h: $(obj)/vdso.so.dbg FORCE
+include/generated/vdso32-offsets.h: $(obj)/vdso32.so.dbg FORCE
$(call if_changed,vdsosym)
# Strip rule for vdso.so
$(obj)/vdso.so: OBJCOPYFLAGS := -S
-$(obj)/vdso.so: $(obj)/vdso.so.dbg FORCE
+$(obj)/vdso.so: $(obj)/vdso32.so.dbg FORCE
$(call if_changed,objcopy)
-$(obj)/vdso.so.dbg: $(obj)/vdso.so.raw $(obj)/$(munge) FORCE
+$(obj)/vdso32.so.dbg: $(obj)/vdso.so.raw $(obj)/$(munge) FORCE
$(call if_changed,vdsomunge)
# Link rule for the .so file, .lds has to be first
diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
index 9d23a51d7f7525..b29f15418c0aff 100644
--- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
+++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
@@ -12,7 +12,7 @@
#include <nvhe/pkvm.h>
#include <nvhe/trap_handler.h>
-/* Used by icache_is_vpipt(). */
+/* Used by icache_is_aliasing(). */
unsigned long __icache_flags;
/* Used by kvm_get_vttbr(). */
diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c
index 1b265713d6bede..a60fb13e21924f 100644
--- a/arch/arm64/kvm/hyp/nvhe/tlb.c
+++ b/arch/arm64/kvm/hyp/nvhe/tlb.c
@@ -105,28 +105,6 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu,
dsb(ish);
isb();
- /*
- * If the host is running at EL1 and we have a VPIPT I-cache,
- * then we must perform I-cache maintenance at EL2 in order for
- * it to have an effect on the guest. Since the guest cannot hit
- * I-cache lines allocated with a different VMID, we don't need
- * to worry about junk out of guest reset (we nuke the I-cache on
- * VMID rollover), but we do need to be careful when remapping
- * executable pages for the same guest. This can happen when KSM
- * takes a CoW fault on an executable page, copies the page into
- * a page that was previously mapped in the guest and then needs
- * to invalidate the guest view of the I-cache for that page
- * from EL1. To solve this, we invalidate the entire I-cache when
- * unmapping a page from a guest if we have a VPIPT I-cache but
- * the host is running at EL1. As above, we could do better if
- * we had the VA.
- *
- * The moral of this story is: if you have a VPIPT I-cache, then
- * you should be running with VHE enabled.
- */
- if (icache_is_vpipt())
- icache_inval_all_pou();
-
__tlb_switch_to_host(&cxt);
}
@@ -157,28 +135,6 @@ void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
dsb(nsh);
isb();
- /*
- * If the host is running at EL1 and we have a VPIPT I-cache,
- * then we must perform I-cache maintenance at EL2 in order for
- * it to have an effect on the guest. Since the guest cannot hit
- * I-cache lines allocated with a different VMID, we don't need
- * to worry about junk out of guest reset (we nuke the I-cache on
- * VMID rollover), but we do need to be careful when remapping
- * executable pages for the same guest. This can happen when KSM
- * takes a CoW fault on an executable page, copies the page into
- * a page that was previously mapped in the guest and then needs
- * to invalidate the guest view of the I-cache for that page
- * from EL1. To solve this, we invalidate the entire I-cache when
- * unmapping a page from a guest if we have a VPIPT I-cache but
- * the host is running at EL1. As above, we could do better if
- * we had the VA.
- *
- * The moral of this story is: if you have a VPIPT I-cache, then
- * you should be running with VHE enabled.
- */
- if (icache_is_vpipt())
- icache_inval_all_pou();
-
__tlb_switch_to_host(&cxt);
}
@@ -205,10 +161,6 @@ void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
dsb(ish);
isb();
- /* See the comment in __kvm_tlb_flush_vmid_ipa() */
- if (icache_is_vpipt())
- icache_inval_all_pou();
-
__tlb_switch_to_host(&cxt);
}
@@ -246,18 +198,5 @@ void __kvm_flush_vm_context(void)
/* Same remark as in __tlb_switch_to_guest() */
dsb(ish);
__tlbi(alle1is);
-
- /*
- * VIPT and PIPT caches are not affected by VMID, so no maintenance
- * is necessary across a VMID rollover.
- *
- * VPIPT caches constrain lookup and maintenance to the active VMID,
- * so we need to invalidate lines with a stale VMID to avoid an ABA
- * race after multiple rollovers.
- *
- */
- if (icache_is_vpipt())
- asm volatile("ic ialluis");
-
dsb(ish);
}
diff --git a/arch/arm64/kvm/hyp/vhe/tlb.c b/arch/arm64/kvm/hyp/vhe/tlb.c
index b636b4111dbf50..b32e2940df7dc8 100644
--- a/arch/arm64/kvm/hyp/vhe/tlb.c
+++ b/arch/arm64/kvm/hyp/vhe/tlb.c
@@ -216,18 +216,5 @@ void __kvm_flush_vm_context(void)
{
dsb(ishst);
__tlbi(alle1is);
-
- /*
- * VIPT and PIPT caches are not affected by VMID, so no maintenance
- * is necessary across a VMID rollover.
- *
- * VPIPT caches constrain lookup and maintenance to the active VMID,
- * so we need to invalidate lines with a stale VMID to avoid an ABA
- * race after multiple rollovers.
- *
- */
- if (icache_is_vpipt())
- asm volatile("ic ialluis");
-
dsb(ish);
}
diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
index fe99b3dab6ce5d..3d9467ff73bcbf 100644
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -267,9 +267,8 @@ void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu)
u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu)
{
- u64 val = kvm_vcpu_read_pmcr(vcpu) >> ARMV8_PMU_PMCR_N_SHIFT;
+ u64 val = FIELD_GET(ARMV8_PMU_PMCR_N, kvm_vcpu_read_pmcr(vcpu));
- val &= ARMV8_PMU_PMCR_N_MASK;
if (val == 0)
return BIT(ARMV8_PMU_CYCLE_IDX);
else
@@ -1136,8 +1135,7 @@ u8 kvm_arm_pmu_get_pmuver_limit(void)
*/
u64 kvm_vcpu_read_pmcr(struct kvm_vcpu *vcpu)
{
- u64 pmcr = __vcpu_sys_reg(vcpu, PMCR_EL0) &
- ~(ARMV8_PMU_PMCR_N_MASK << ARMV8_PMU_PMCR_N_SHIFT);
+ u64 pmcr = __vcpu_sys_reg(vcpu, PMCR_EL0);
- return pmcr | ((u64)vcpu->kvm->arch.pmcr_n << ARMV8_PMU_PMCR_N_SHIFT);
+ return u64_replace_bits(pmcr, vcpu->kvm->arch.pmcr_n, ARMV8_PMU_PMCR_N);
}
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 4735e1b37fb3e0..ff45d688bd7daa 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -877,7 +877,7 @@ static bool pmu_counter_idx_valid(struct kvm_vcpu *vcpu, u64 idx)
u64 pmcr, val;
pmcr = kvm_vcpu_read_pmcr(vcpu);
- val = (pmcr >> ARMV8_PMU_PMCR_N_SHIFT) & ARMV8_PMU_PMCR_N_MASK;
+ val = FIELD_GET(ARMV8_PMU_PMCR_N, pmcr);
if (idx >= val && idx != ARMV8_PMU_CYCLE_IDX) {
kvm_inject_undefined(vcpu);
return false;
@@ -1143,7 +1143,7 @@ static int get_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
static int set_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
u64 val)
{
- u8 new_n = (val >> ARMV8_PMU_PMCR_N_SHIFT) & ARMV8_PMU_PMCR_N_MASK;
+ u8 new_n = FIELD_GET(ARMV8_PMU_PMCR_N, val);
struct kvm *kvm = vcpu->kvm;
mutex_lock(&kvm->arch.config_lock);
diff --git a/arch/arm64/lib/copy_page.S b/arch/arm64/lib/copy_page.S
index c336d2ffdec559..6a56d7cf309da5 100644
--- a/arch/arm64/lib/copy_page.S
+++ b/arch/arm64/lib/copy_page.S
@@ -18,13 +18,6 @@
* x1 - src
*/
SYM_FUNC_START(__pi_copy_page)
-alternative_if ARM64_HAS_NO_HW_PREFETCH
- // Prefetch three cache lines ahead.
- prfm pldl1strm, [x1, #128]
- prfm pldl1strm, [x1, #256]
- prfm pldl1strm, [x1, #384]
-alternative_else_nop_endif
-
ldp x2, x3, [x1]
ldp x4, x5, [x1, #16]
ldp x6, x7, [x1, #32]
@@ -39,10 +32,6 @@ alternative_else_nop_endif
1:
tst x0, #(PAGE_SIZE - 1)
-alternative_if ARM64_HAS_NO_HW_PREFETCH
- prfm pldl1strm, [x1, #384]
-alternative_else_nop_endif
-
stnp x2, x3, [x0, #-256]
ldp x2, x3, [x1]
stnp x4, x5, [x0, #16 - 256]
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 15f6347d23b690..1ac7467d34c9c3 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -52,9 +52,6 @@ u64 vabits_actual __ro_after_init = VA_BITS_MIN;
EXPORT_SYMBOL(vabits_actual);
#endif
-u64 kimage_vaddr __ro_after_init = (u64)&_text;
-EXPORT_SYMBOL(kimage_vaddr);
-
u64 kimage_voffset __ro_after_init;
EXPORT_SYMBOL(kimage_voffset);
@@ -674,6 +671,9 @@ static int __init map_entry_trampoline(void)
{
int i;
+ if (!arm64_kernel_unmapped_at_el0())
+ return 0;
+
pgprot_t prot = kernel_exec_prot();
phys_addr_t pa_start = __pa_symbol(__entry_tramp_text_start);
diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
index b98c38288a9d35..1e07d74d7a6c93 100644
--- a/arch/arm64/tools/cpucaps
+++ b/arch/arm64/tools/cpucaps
@@ -37,10 +37,10 @@ HAS_GIC_PRIO_MASKING
HAS_GIC_PRIO_RELAXED_SYNC
HAS_HCX
HAS_LDAPR
+HAS_LPA2
HAS_LSE_ATOMICS
HAS_MOPS
HAS_NESTED_VIRT
-HAS_NO_HW_PREFETCH
HAS_PAN
HAS_S1PIE
HAS_RAS_EXTN
diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg
index 96cbeeab4eec33..4c9b679343674a 100644
--- a/arch/arm64/tools/sysreg
+++ b/arch/arm64/tools/sysreg
@@ -1002,6 +1002,27 @@ UnsignedEnum 3:0 BT
EndEnum
EndSysreg
+Sysreg ID_AA64PFR2_EL1 3 0 0 4 2
+Res0 63:36
+UnsignedEnum 35:32 FPMR
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+Res0 31:12
+UnsignedEnum 11:8 MTEFAR
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+UnsignedEnum 7:4 MTESTOREONLY
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+UnsignedEnum 3:0 MTEPERM
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+EndSysreg
+
Sysreg ID_AA64ZFR0_EL1 3 0 0 4 4
Res0 63:60
UnsignedEnum 59:56 F64MM
@@ -1058,7 +1079,11 @@ UnsignedEnum 63 FA64
0b0 NI
0b1 IMP
EndEnum
-Res0 62:60
+Res0 62:61
+UnsignedEnum 60 LUTv2
+ 0b0 NI
+ 0b1 IMP
+EndEnum
UnsignedEnum 59:56 SMEver
0b0000 SME
0b0001 SME2
@@ -1086,7 +1111,14 @@ UnsignedEnum 42 F16F16
0b0 NI
0b1 IMP
EndEnum
-Res0 41:40
+UnsignedEnum 41 F8F16
+ 0b0 NI
+ 0b1 IMP
+EndEnum
+UnsignedEnum 40 F8F32
+ 0b0 NI
+ 0b1 IMP
+EndEnum
UnsignedEnum 39:36 I8I32
0b0000 NI
0b1111 IMP
@@ -1107,7 +1139,49 @@ UnsignedEnum 32 F32F32
0b0 NI
0b1 IMP
EndEnum
-Res0 31:0
+Res0 31
+UnsignedEnum 30 SF8FMA
+ 0b0 NI
+ 0b1 IMP
+EndEnum
+UnsignedEnum 29 SF8DP4
+ 0b0 NI
+ 0b1 IMP
+EndEnum
+UnsignedEnum 28 SF8DP2
+ 0b0 NI
+ 0b1 IMP
+EndEnum
+Res0 27:0
+EndSysreg
+
+Sysreg ID_AA64FPFR0_EL1 3 0 0 4 7
+Res0 63:32
+UnsignedEnum 31 F8CVT
+ 0b0 NI
+ 0b1 IMP
+EndEnum
+UnsignedEnum 30 F8FMA
+ 0b0 NI
+ 0b1 IMP
+EndEnum
+UnsignedEnum 29 F8DP4
+ 0b0 NI
+ 0b1 IMP
+EndEnum
+UnsignedEnum 28 F8DP2
+ 0b0 NI
+ 0b1 IMP
+EndEnum
+Res0 27:2
+UnsignedEnum 1 F8E4M3
+ 0b0 NI
+ 0b1 IMP
+EndEnum
+UnsignedEnum 0 F8E5M2
+ 0b0 NI
+ 0b1 IMP
+EndEnum
EndSysreg
Sysreg ID_AA64DFR0_EL1 3 0 0 5 0
@@ -1115,7 +1189,10 @@ Enum 63:60 HPMN0
0b0000 UNPREDICTABLE
0b0001 DEF
EndEnum
-Res0 59:56
+UnsignedEnum 59:56 ExtTrcBuff
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
UnsignedEnum 55:52 BRBE
0b0000 NI
0b0001 IMP
@@ -1327,6 +1404,7 @@ UnsignedEnum 11:8 API
0b0011 PAuth2
0b0100 FPAC
0b0101 FPACCOMBINE
+ 0b0110 PAuth_LR
EndEnum
UnsignedEnum 7:4 APA
0b0000 NI
@@ -1335,6 +1413,7 @@ UnsignedEnum 7:4 APA
0b0011 PAuth2
0b0100 FPAC
0b0101 FPACCOMBINE
+ 0b0110 PAuth_LR
EndEnum
UnsignedEnum 3:0 DPB
0b0000 NI
@@ -1344,7 +1423,14 @@ EndEnum
EndSysreg
Sysreg ID_AA64ISAR2_EL1 3 0 0 6 2
-Res0 63:56
+UnsignedEnum 63:60 ATS1A
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+UnsignedEnum 59:56 LUT
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
UnsignedEnum 55:52 CSSC
0b0000 NI
0b0001 IMP
@@ -1353,7 +1439,19 @@ UnsignedEnum 51:48 RPRFM
0b0000 NI
0b0001 IMP
EndEnum
-Res0 47:32
+Res0 47:44
+UnsignedEnum 43:40 PRFMSLC
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+UnsignedEnum 39:36 SYSINSTR_128
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+UnsignedEnum 35:32 SYSREG_128
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
UnsignedEnum 31:28 CLRBHB
0b0000 NI
0b0001 IMP
@@ -1377,6 +1475,7 @@ UnsignedEnum 15:12 APA3
0b0011 PAuth2
0b0100 FPAC
0b0101 FPACCOMBINE
+ 0b0110 PAuth_LR
EndEnum
UnsignedEnum 11:8 GPA3
0b0000 NI
@@ -1392,6 +1491,23 @@ UnsignedEnum 3:0 WFxT
EndEnum
EndSysreg
+Sysreg ID_AA64ISAR3_EL1 3 0 0 6 3
+Res0 63:12
+UnsignedEnum 11:8 TLBIW
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+UnsignedEnum 7:4 FAMINMAX
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
+UnsignedEnum 3:0 CPA
+ 0b0000 NI
+ 0b0001 IMP
+ 0b0010 CPA2
+EndEnum
+EndSysreg
+
Sysreg ID_AA64MMFR0_EL1 3 0 0 7 0
UnsignedEnum 63:60 ECV
0b0000 NI
@@ -1680,7 +1796,8 @@ Field 63 TIDCP
Field 62 SPINTMASK
Field 61 NMI
Field 60 EnTP2
-Res0 59:58
+Field 59 TCSO
+Field 58 TCSO0
Field 57 EPAN
Field 56 EnALS
Field 55 EnAS0
@@ -1709,7 +1826,7 @@ EndEnum
Field 37 ITFSB
Field 36 BT1
Field 35 BT0
-Res0 34
+Field 34 EnFPM
Field 33 MSCEn
Field 32 CMOW
Field 31 EnIA
@@ -1747,7 +1864,8 @@ Field 0 M
EndSysreg
SysregFields CPACR_ELx
-Res0 63:29
+Res0 63:30
+Field 29 E0POE
Field 28 TTA
Res0 27:26
Field 25:24 SMEN
@@ -1790,6 +1908,41 @@ Sysreg SMCR_EL1 3 0 1 2 6
Fields SMCR_ELx
EndSysreg
+SysregFields GCSCR_ELx
+Res0 63:10
+Field 9 STREn
+Field 8 PUSHMEn
+Res0 7
+Field 6 EXLOCKEN
+Field 5 RVCHKEN
+Res0 4:1
+Field 0 PCRSEL
+EndSysregFields
+
+Sysreg GCSCR_EL1 3 0 2 5 0
+Fields GCSCR_ELx
+EndSysreg
+
+SysregFields GCSPR_ELx
+Field 63:3 PTR
+Res0 2:0
+EndSysregFields
+
+Sysreg GCSPR_EL1 3 0 2 5 1
+Fields GCSPR_ELx
+EndSysreg
+
+Sysreg GCSCRE0_EL1 3 0 2 5 2
+Res0 63:11
+Field 10 nTR
+Field 9 STREn
+Field 8 PUSHMEn
+Res0 7:6
+Field 5 RVCHKEN
+Res0 4:1
+Field 0 PCRSEL
+EndSysreg
+
Sysreg ALLINT 3 0 4 3 0
Res0 63:14
Field 13 ALLINT
@@ -1933,10 +2086,18 @@ Sysreg CONTEXTIDR_EL1 3 0 13 0 1
Fields CONTEXTIDR_ELx
EndSysreg
+Sysreg RCWSMASK_EL1 3 0 13 0 3
+Field 63:0 RCWSMASK
+EndSysreg
+
Sysreg TPIDR_EL1 3 0 13 0 4
Field 63:0 ThreadID
EndSysreg
+Sysreg RCWMASK_EL1 3 0 13 0 6
+Field 63:0 RCWMASK
+EndSysreg
+
Sysreg SCXTNUM_EL1 3 0 13 0 7
Field 63:0 SoftwareContextNumber
EndSysreg
@@ -2004,9 +2165,10 @@ Field 27:24 CWG
Field 23:20 ERG
Field 19:16 DminLine
Enum 15:14 L1Ip
- 0b00 VPIPT
+ # This was named as VPIPT in the ARM but now documented as reserved
+ 0b00 RESERVED_VPIPT
# This is named as AIVIVT in the ARM but documented as reserved
- 0b01 RESERVED
+ 0b01 RESERVED_AIVIVT
0b10 VIPT
0b11 PIPT
EndEnum
@@ -2020,12 +2182,39 @@ Field 4 DZP
Field 3:0 BS
EndSysreg
+Sysreg GCSPR_EL0 3 3 2 5 1
+Fields GCSPR_ELx
+EndSysreg
+
Sysreg SVCR 3 3 4 2 2
Res0 63:2
Field 1 ZA
Field 0 SM
EndSysreg
+Sysreg FPMR 3 3 4 4 2
+Res0 63:38
+Field 37:32 LSCALE2
+Field 31:24 NSCALE
+Res0 23
+Field 22:16 LSCALE
+Field 15 OSC
+Field 14 OSM
+Res0 13:9
+UnsignedEnum 8:6 F8D
+ 0b000 E5M2
+ 0b001 E4M3
+EndEnum
+UnsignedEnum 5:3 F8S2
+ 0b000 E5M2
+ 0b001 E4M3
+EndEnum
+UnsignedEnum 2:0 F8S1
+ 0b000 E5M2
+ 0b001 E4M3
+EndEnum
+EndSysreg
+
SysregFields HFGxTR_EL2
Field 63 nAMAIR2_EL1
Field 62 nMAIR2_EL1
@@ -2102,7 +2291,9 @@ Fields HFGxTR_EL2
EndSysreg
Sysreg HFGITR_EL2 3 4 1 1 6
-Res0 63:61
+Res0 63
+Field 62 ATS1E1A
+Res0 61
Field 60 COSPRCTX
Field 59 nGCSEPP
Field 58 nGCSSTR_EL1
@@ -2295,12 +2486,57 @@ Field 1 DBGBVRn_EL1
Field 0 DBGBCRn_EL1
EndSysreg
+Sysreg HAFGRTR_EL2 3 4 3 1 6
+Res0 63:50
+Field 49 AMEVTYPER115_EL0
+Field 48 AMEVCNTR115_EL0
+Field 47 AMEVTYPER114_EL0
+Field 46 AMEVCNTR114_EL0
+Field 45 AMEVTYPER113_EL0
+Field 44 AMEVCNTR113_EL0
+Field 43 AMEVTYPER112_EL0
+Field 42 AMEVCNTR112_EL0
+Field 41 AMEVTYPER111_EL0
+Field 40 AMEVCNTR111_EL0
+Field 39 AMEVTYPER110_EL0
+Field 38 AMEVCNTR110_EL0
+Field 37 AMEVTYPER19_EL0
+Field 36 AMEVCNTR19_EL0
+Field 35 AMEVTYPER18_EL0
+Field 34 AMEVCNTR18_EL0
+Field 33 AMEVTYPER17_EL0
+Field 32 AMEVCNTR17_EL0
+Field 31 AMEVTYPER16_EL0
+Field 30 AMEVCNTR16_EL0
+Field 29 AMEVTYPER15_EL0
+Field 28 AMEVCNTR15_EL0
+Field 27 AMEVTYPER14_EL0
+Field 26 AMEVCNTR14_EL0
+Field 25 AMEVTYPER13_EL0
+Field 24 AMEVCNTR13_EL0
+Field 23 AMEVTYPER12_EL0
+Field 22 AMEVCNTR12_EL0
+Field 21 AMEVTYPER11_EL0
+Field 20 AMEVCNTR11_EL0
+Field 19 AMEVTYPER10_EL0
+Field 18 AMEVCNTR10_EL0
+Field 17 AMCNTEN1
+Res0 16:5
+Field 4 AMEVCNTR03_EL0
+Field 3 AMEVCNTR02_EL0
+Field 2 AMEVCNTR01_EL0
+Field 1 AMEVCNTR00_EL0
+Field 0 AMCNTEN0
+EndSysreg
+
Sysreg ZCR_EL2 3 4 1 2 0
Fields ZCR_ELx
EndSysreg
Sysreg HCRX_EL2 3 4 1 2 2
-Res0 63:23
+Res0 63:25
+Field 24 PACMEn
+Field 23 EnFPM
Field 22 GCSEn
Field 21 EnIDCP128
Field 20 EnSDERR
@@ -2348,6 +2584,14 @@ Sysreg SMCR_EL2 3 4 1 2 6
Fields SMCR_ELx
EndSysreg
+Sysreg GCSCR_EL2 3 4 2 5 0
+Fields GCSCR_ELx
+EndSysreg
+
+Sysreg GCSPR_EL2 3 4 2 5 1
+Fields GCSPR_ELx
+EndSysreg
+
Sysreg DACR32_EL2 3 4 3 0 0
Res0 63:32
Field 31:30 D15
@@ -2407,6 +2651,14 @@ Sysreg SMCR_EL12 3 5 1 2 6
Fields SMCR_ELx
EndSysreg
+Sysreg GCSCR_EL12 3 5 2 5 0
+Fields GCSCR_ELx
+EndSysreg
+
+Sysreg GCSPR_EL12 3 5 2 5 1
+Fields GCSPR_ELx
+EndSysreg
+
Sysreg FAR_EL12 3 5 6 0 0
Field 63:0 ADDR
EndSysreg
@@ -2471,6 +2723,33 @@ Field 1 PIE
Field 0 PnCH
EndSysreg
+SysregFields MAIR2_ELx
+Field 63:56 Attr7
+Field 55:48 Attr6
+Field 47:40 Attr5
+Field 39:32 Attr4
+Field 31:24 Attr3
+Field 23:16 Attr2
+Field 15:8 Attr1
+Field 7:0 Attr0
+EndSysregFields
+
+Sysreg MAIR2_EL1 3 0 10 2 1
+Fields MAIR2_ELx
+EndSysreg
+
+Sysreg MAIR2_EL2 3 4 10 1 1
+Fields MAIR2_ELx
+EndSysreg
+
+Sysreg AMAIR2_EL1 3 0 10 3 1
+Field 63:0 ImpDef
+EndSysreg
+
+Sysreg AMAIR2_EL2 3 4 10 3 1
+Field 63:0 ImpDef
+EndSysreg
+
SysregFields PIRx_ELx
Field 63:60 Perm15
Field 59:56 Perm14
@@ -2510,6 +2789,26 @@ Sysreg PIR_EL2 3 4 10 2 3
Fields PIRx_ELx
EndSysreg
+Sysreg POR_EL0 3 3 10 2 4
+Fields PIRx_ELx
+EndSysreg
+
+Sysreg POR_EL1 3 0 10 2 4
+Fields PIRx_ELx
+EndSysreg
+
+Sysreg POR_EL12 3 5 10 2 4
+Fields PIRx_ELx
+EndSysreg
+
+Sysreg S2POR_EL1 3 0 10 2 5
+Fields PIRx_ELx
+EndSysreg
+
+Sysreg S2PIR_EL2 3 4 10 2 5
+Fields PIRx_ELx
+EndSysreg
+
Sysreg LORSA_EL1 3 0 10 4 0
Res0 63:52
Field 51:16 SA
diff --git a/drivers/base/arch_numa.c b/drivers/base/arch_numa.c
index eaa31e567d1ece..5b59d133b6af4f 100644
--- a/drivers/base/arch_numa.c
+++ b/drivers/base/arch_numa.c
@@ -144,7 +144,7 @@ void __init early_map_cpu_to_node(unsigned int cpu, int nid)
unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
EXPORT_SYMBOL(__per_cpu_offset);
-static int __init early_cpu_to_node(int cpu)
+int __init early_cpu_to_node(int cpu)
{
return cpu_to_node_map[cpu];
}
diff --git a/drivers/firmware/efi/libstub/Makefile.zboot b/drivers/firmware/efi/libstub/Makefile.zboot
index 2c489627a80789..65ffd0b760b2fb 100644
--- a/drivers/firmware/efi/libstub/Makefile.zboot
+++ b/drivers/firmware/efi/libstub/Makefile.zboot
@@ -5,8 +5,8 @@
# EFI_ZBOOT_FORWARD_CFI
quiet_cmd_copy_and_pad = PAD $@
- cmd_copy_and_pad = cp $< $@ && \
- truncate -s $(shell hexdump -s16 -n4 -e '"%u"' $<) $@
+ cmd_copy_and_pad = cp $< $@; \
+ truncate -s $$(hexdump -s16 -n4 -e '"%u"' $<) $@
# Pad the file to the size of the uncompressed image in memory, including BSS
$(obj)/vmlinux.bin: $(obj)/$(EFI_ZBOOT_PAYLOAD) FORCE
diff --git a/drivers/infiniband/hw/erdma/erdma_hw.h b/drivers/infiniband/hw/erdma/erdma_hw.h
index 9d316fdc6f9a55..a155519a862f83 100644
--- a/drivers/infiniband/hw/erdma/erdma_hw.h
+++ b/drivers/infiniband/hw/erdma/erdma_hw.h
@@ -11,8 +11,6 @@
#include <linux/types.h>
/* PCIe device related definition. */
-#define PCI_VENDOR_ID_ALIBABA 0x1ded
-
#define ERDMA_PCI_WIDTH 64
#define ERDMA_FUNC_BAR 0
#define ERDMA_MISX_BAR 2
diff --git a/drivers/pci/access.c b/drivers/pci/access.c
index 6554a2e89d3612..6449056b57dd30 100644
--- a/drivers/pci/access.c
+++ b/drivers/pci/access.c
@@ -598,3 +598,15 @@ int pci_write_config_dword(const struct pci_dev *dev, int where,
return pci_bus_write_config_dword(dev->bus, dev->devfn, where, val);
}
EXPORT_SYMBOL(pci_write_config_dword);
+
+void pci_clear_and_set_config_dword(const struct pci_dev *dev, int pos,
+ u32 clear, u32 set)
+{
+ u32 val;
+
+ pci_read_config_dword(dev, pos, &val);
+ val &= ~clear;
+ val |= set;
+ pci_write_config_dword(dev, pos, val);
+}
+EXPORT_SYMBOL(pci_clear_and_set_config_dword);
diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
index 060f4b3c8698fd..5a0066ecc3c5ad 100644
--- a/drivers/pci/pcie/aspm.c
+++ b/drivers/pci/pcie/aspm.c
@@ -426,17 +426,6 @@ static void pcie_aspm_check_latency(struct pci_dev *endpoint)
}
}
-static void pci_clear_and_set_dword(struct pci_dev *pdev, int pos,
- u32 clear, u32 set)
-{
- u32 val;
-
- pci_read_config_dword(pdev, pos, &val);
- val &= ~clear;
- val |= set;
- pci_write_config_dword(pdev, pos, val);
-}
-
/* Calculate L1.2 PM substate timing parameters */
static void aspm_calc_l12_info(struct pcie_link_state *link,
u32 parent_l1ss_cap, u32 child_l1ss_cap)
@@ -501,10 +490,12 @@ static void aspm_calc_l12_info(struct pcie_link_state *link,
cl1_2_enables = cctl1 & PCI_L1SS_CTL1_L1_2_MASK;
if (pl1_2_enables || cl1_2_enables) {
- pci_clear_and_set_dword(child, child->l1ss + PCI_L1SS_CTL1,
- PCI_L1SS_CTL1_L1_2_MASK, 0);
- pci_clear_and_set_dword(parent, parent->l1ss + PCI_L1SS_CTL1,
- PCI_L1SS_CTL1_L1_2_MASK, 0);
+ pci_clear_and_set_config_dword(child,
+ child->l1ss + PCI_L1SS_CTL1,
+ PCI_L1SS_CTL1_L1_2_MASK, 0);
+ pci_clear_and_set_config_dword(parent,
+ parent->l1ss + PCI_L1SS_CTL1,
+ PCI_L1SS_CTL1_L1_2_MASK, 0);
}
/* Program T_POWER_ON times in both ports */
@@ -512,22 +503,26 @@ static void aspm_calc_l12_info(struct pcie_link_state *link,
pci_write_config_dword(child, child->l1ss + PCI_L1SS_CTL2, ctl2);
/* Program Common_Mode_Restore_Time in upstream device */
- pci_clear_and_set_dword(parent, parent->l1ss + PCI_L1SS_CTL1,
- PCI_L1SS_CTL1_CM_RESTORE_TIME, ctl1);
+ pci_clear_and_set_config_dword(parent, parent->l1ss + PCI_L1SS_CTL1,
+ PCI_L1SS_CTL1_CM_RESTORE_TIME, ctl1);
/* Program LTR_L1.2_THRESHOLD time in both ports */
- pci_clear_and_set_dword(parent, parent->l1ss + PCI_L1SS_CTL1,
- PCI_L1SS_CTL1_LTR_L12_TH_VALUE |
- PCI_L1SS_CTL1_LTR_L12_TH_SCALE, ctl1);
- pci_clear_and_set_dword(child, child->l1ss + PCI_L1SS_CTL1,
- PCI_L1SS_CTL1_LTR_L12_TH_VALUE |
- PCI_L1SS_CTL1_LTR_L12_TH_SCALE, ctl1);
+ pci_clear_and_set_config_dword(parent, parent->l1ss + PCI_L1SS_CTL1,
+ PCI_L1SS_CTL1_LTR_L12_TH_VALUE |
+ PCI_L1SS_CTL1_LTR_L12_TH_SCALE,
+ ctl1);
+ pci_clear_and_set_config_dword(child, child->l1ss + PCI_L1SS_CTL1,
+ PCI_L1SS_CTL1_LTR_L12_TH_VALUE |
+ PCI_L1SS_CTL1_LTR_L12_TH_SCALE,
+ ctl1);
if (pl1_2_enables || cl1_2_enables) {
- pci_clear_and_set_dword(parent, parent->l1ss + PCI_L1SS_CTL1, 0,
- pl1_2_enables);
- pci_clear_and_set_dword(child, child->l1ss + PCI_L1SS_CTL1, 0,
- cl1_2_enables);
+ pci_clear_and_set_config_dword(parent,
+ parent->l1ss + PCI_L1SS_CTL1, 0,
+ pl1_2_enables);
+ pci_clear_and_set_config_dword(child,
+ child->l1ss + PCI_L1SS_CTL1, 0,
+ cl1_2_enables);
}
}
@@ -687,10 +682,10 @@ static void pcie_config_aspm_l1ss(struct pcie_link_state *link, u32 state)
*/
/* Disable all L1 substates */
- pci_clear_and_set_dword(child, child->l1ss + PCI_L1SS_CTL1,
- PCI_L1SS_CTL1_L1SS_MASK, 0);
- pci_clear_and_set_dword(parent, parent->l1ss + PCI_L1SS_CTL1,
- PCI_L1SS_CTL1_L1SS_MASK, 0);
+ pci_clear_and_set_config_dword(child, child->l1ss + PCI_L1SS_CTL1,
+ PCI_L1SS_CTL1_L1SS_MASK, 0);
+ pci_clear_and_set_config_dword(parent, parent->l1ss + PCI_L1SS_CTL1,
+ PCI_L1SS_CTL1_L1SS_MASK, 0);
/*
* If needed, disable L1, and it gets enabled later
* in pcie_config_aspm_link().
@@ -713,10 +708,10 @@ static void pcie_config_aspm_l1ss(struct pcie_link_state *link, u32 state)
val |= PCI_L1SS_CTL1_PCIPM_L1_2;
/* Enable what we need to enable */
- pci_clear_and_set_dword(parent, parent->l1ss + PCI_L1SS_CTL1,
- PCI_L1SS_CTL1_L1SS_MASK, val);
- pci_clear_and_set_dword(child, child->l1ss + PCI_L1SS_CTL1,
- PCI_L1SS_CTL1_L1SS_MASK, val);
+ pci_clear_and_set_config_dword(parent, parent->l1ss + PCI_L1SS_CTL1,
+ PCI_L1SS_CTL1_L1SS_MASK, val);
+ pci_clear_and_set_config_dword(child, child->l1ss + PCI_L1SS_CTL1,
+ PCI_L1SS_CTL1_L1SS_MASK, val);
}
static void pcie_config_aspm_dev(struct pci_dev *pdev, u32 val)
diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
index 273d67ecf6d253..ec6e0d9194a1c5 100644
--- a/drivers/perf/Kconfig
+++ b/drivers/perf/Kconfig
@@ -217,6 +217,13 @@ config MARVELL_CN10K_DDR_PMU
Enable perf support for Marvell DDR Performance monitoring
event on CN10K platform.
+config DWC_PCIE_PMU
+ tristate "Synopsys DesignWare PCIe PMU"
+ depends on PCI
+ help
+ Enable perf support for Synopsys DesignWare PCIe PMU Performance
+ monitoring event on platform including the Alibaba Yitian 710.
+
source "drivers/perf/arm_cspmu/Kconfig"
source "drivers/perf/amlogic/Kconfig"
diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile
index 16b3ec4db916d0..a06338e3401c9f 100644
--- a/drivers/perf/Makefile
+++ b/drivers/perf/Makefile
@@ -23,6 +23,7 @@ obj-$(CONFIG_MARVELL_CN10K_TAD_PMU) += marvell_cn10k_tad_pmu.o
obj-$(CONFIG_MARVELL_CN10K_DDR_PMU) += marvell_cn10k_ddr_pmu.o
obj-$(CONFIG_APPLE_M1_CPU_PMU) += apple_m1_cpu_pmu.o
obj-$(CONFIG_ALIBABA_UNCORE_DRW_PMU) += alibaba_uncore_drw_pmu.o
+obj-$(CONFIG_DWC_PCIE_PMU) += dwc_pcie_pmu.o
obj-$(CONFIG_ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += arm_cspmu/
obj-$(CONFIG_MESON_DDR_PMU) += amlogic/
obj-$(CONFIG_CXL_PMU) += cxl_pmu.o
diff --git a/drivers/perf/apple_m1_cpu_pmu.c b/drivers/perf/apple_m1_cpu_pmu.c
index cd2de44b61b91f..f322e5ca1114b9 100644
--- a/drivers/perf/apple_m1_cpu_pmu.c
+++ b/drivers/perf/apple_m1_cpu_pmu.c
@@ -524,8 +524,10 @@ static int m1_pmu_set_event_filter(struct hw_perf_event *event,
{
unsigned long config_base = 0;
- if (!attr->exclude_guest)
- return -EINVAL;
+ if (!attr->exclude_guest) {
+ pr_debug("ARM performance counters do not support mode exclusion\n");
+ return -EOPNOTSUPP;
+ }
if (!attr->exclude_kernel)
config_base |= M1_PMU_CFG_COUNT_KERNEL;
if (!attr->exclude_user)
diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
index 847b0dc41293d2..c584165b13babd 100644
--- a/drivers/perf/arm-cmn.c
+++ b/drivers/perf/arm-cmn.c
@@ -811,7 +811,7 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj,
#define CMN_EVENT_HNF_OCC(_model, _name, _event) \
CMN_EVENT_HN_OCC(_model, hnf_##_name, CMN_TYPE_HNF, _event)
#define CMN_EVENT_HNF_CLS(_model, _name, _event) \
- CMN_EVENT_HN_CLS(_model, hnf_##_name, CMN_TYPE_HNS, _event)
+ CMN_EVENT_HN_CLS(_model, hnf_##_name, CMN_TYPE_HNF, _event)
#define CMN_EVENT_HNF_SNT(_model, _name, _event) \
CMN_EVENT_HN_SNT(_model, hnf_##_name, CMN_TYPE_HNF, _event)
diff --git a/drivers/perf/arm_dsu_pmu.c b/drivers/perf/arm_dsu_pmu.c
index 8223c49bd082c2..7ec4498e312f73 100644
--- a/drivers/perf/arm_dsu_pmu.c
+++ b/drivers/perf/arm_dsu_pmu.c
@@ -371,7 +371,7 @@ static inline u32 dsu_pmu_get_reset_overflow(void)
return __dsu_pmu_get_reset_overflow();
}
-/**
+/*
* dsu_pmu_set_event_period: Set the period for the counter.
*
* All DSU PMU event counters, except the cycle counter are 32bit
@@ -602,7 +602,7 @@ static struct dsu_pmu *dsu_pmu_alloc(struct platform_device *pdev)
return dsu_pmu;
}
-/**
+/*
* dsu_pmu_dt_get_cpus: Get the list of CPUs in the cluster
* from device tree.
*/
@@ -632,7 +632,7 @@ static int dsu_pmu_dt_get_cpus(struct device *dev, cpumask_t *mask)
return 0;
}
-/**
+/*
* dsu_pmu_acpi_get_cpus: Get the list of CPUs in the cluster
* from ACPI.
*/
diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index d712a19e47ac15..8458fe2cebb4fb 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -445,7 +445,7 @@ __hw_perf_event_init(struct perf_event *event)
{
struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
struct hw_perf_event *hwc = &event->hw;
- int mapping;
+ int mapping, ret;
hwc->flags = 0;
mapping = armpmu->map_event(event);
@@ -470,11 +470,10 @@ __hw_perf_event_init(struct perf_event *event)
/*
* Check whether we need to exclude the counter from certain modes.
*/
- if (armpmu->set_event_filter &&
- armpmu->set_event_filter(hwc, &event->attr)) {
- pr_debug("ARM performance counters do not support "
- "mode exclusion\n");
- return -EOPNOTSUPP;
+ if (armpmu->set_event_filter) {
+ ret = armpmu->set_event_filter(hwc, &event->attr);
+ if (ret)
+ return ret;
}
/*
@@ -893,7 +892,6 @@ struct arm_pmu *armpmu_alloc(void)
struct pmu_hw_events *events;
events = per_cpu_ptr(pmu->hw_events, cpu);
- raw_spin_lock_init(&events->pmu_lock);
events->percpu_pmu = pmu;
}
diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c
index 6ca7be05229c10..23fa6c5da82c4d 100644
--- a/drivers/perf/arm_pmuv3.c
+++ b/drivers/perf/arm_pmuv3.c
@@ -15,6 +15,7 @@
#include <clocksource/arm_arch_timer.h>
#include <linux/acpi.h>
+#include <linux/bitfield.h>
#include <linux/clocksource.h>
#include <linux/of.h>
#include <linux/perf/arm_pmu.h>
@@ -169,7 +170,11 @@ armv8pmu_events_sysfs_show(struct device *dev,
PMU_EVENT_ATTR_ID(name, armv8pmu_events_sysfs_show, config)
static struct attribute *armv8_pmuv3_event_attrs[] = {
- ARMV8_EVENT_ATTR(sw_incr, ARMV8_PMUV3_PERFCTR_SW_INCR),
+ /*
+ * Don't expose the sw_incr event in /sys. It's not usable as writes to
+ * PMSWINC_EL0 will trap as PMUSERENR.{SW,EN}=={0,0} and event rotation
+ * means we don't have a fixed event<->counter relationship regardless.
+ */
ARMV8_EVENT_ATTR(l1i_cache_refill, ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL),
ARMV8_EVENT_ATTR(l1i_tlb_refill, ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL),
ARMV8_EVENT_ATTR(l1d_cache_refill, ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL),
@@ -294,26 +299,66 @@ static const struct attribute_group armv8_pmuv3_events_attr_group = {
.is_visible = armv8pmu_event_attr_is_visible,
};
-PMU_FORMAT_ATTR(event, "config:0-15");
-PMU_FORMAT_ATTR(long, "config1:0");
-PMU_FORMAT_ATTR(rdpmc, "config1:1");
+/* User ABI */
+#define ATTR_CFG_FLD_event_CFG config
+#define ATTR_CFG_FLD_event_LO 0
+#define ATTR_CFG_FLD_event_HI 15
+#define ATTR_CFG_FLD_long_CFG config1
+#define ATTR_CFG_FLD_long_LO 0
+#define ATTR_CFG_FLD_long_HI 0
+#define ATTR_CFG_FLD_rdpmc_CFG config1
+#define ATTR_CFG_FLD_rdpmc_LO 1
+#define ATTR_CFG_FLD_rdpmc_HI 1
+#define ATTR_CFG_FLD_threshold_count_CFG config1 /* PMEVTYPER.TC[0] */
+#define ATTR_CFG_FLD_threshold_count_LO 2
+#define ATTR_CFG_FLD_threshold_count_HI 2
+#define ATTR_CFG_FLD_threshold_compare_CFG config1 /* PMEVTYPER.TC[2:1] */
+#define ATTR_CFG_FLD_threshold_compare_LO 3
+#define ATTR_CFG_FLD_threshold_compare_HI 4
+#define ATTR_CFG_FLD_threshold_CFG config1 /* PMEVTYPER.TH */
+#define ATTR_CFG_FLD_threshold_LO 5
+#define ATTR_CFG_FLD_threshold_HI 16
+
+GEN_PMU_FORMAT_ATTR(event);
+GEN_PMU_FORMAT_ATTR(long);
+GEN_PMU_FORMAT_ATTR(rdpmc);
+GEN_PMU_FORMAT_ATTR(threshold_count);
+GEN_PMU_FORMAT_ATTR(threshold_compare);
+GEN_PMU_FORMAT_ATTR(threshold);
static int sysctl_perf_user_access __read_mostly;
-static inline bool armv8pmu_event_is_64bit(struct perf_event *event)
+static bool armv8pmu_event_is_64bit(struct perf_event *event)
+{
+ return ATTR_CFG_GET_FLD(&event->attr, long);
+}
+
+static bool armv8pmu_event_want_user_access(struct perf_event *event)
{
- return event->attr.config1 & 0x1;
+ return ATTR_CFG_GET_FLD(&event->attr, rdpmc);
}
-static inline bool armv8pmu_event_want_user_access(struct perf_event *event)
+static u8 armv8pmu_event_threshold_control(struct perf_event_attr *attr)
{
- return event->attr.config1 & 0x2;
+ u8 th_compare = ATTR_CFG_GET_FLD(attr, threshold_compare);
+ u8 th_count = ATTR_CFG_GET_FLD(attr, threshold_count);
+
+ /*
+ * The count bit is always the bottom bit of the full control field, and
+ * the comparison is the upper two bits, but it's not explicitly
+ * labelled in the Arm ARM. For the Perf interface we split it into two
+ * fields, so reconstruct it here.
+ */
+ return (th_compare << 1) | th_count;
}
static struct attribute *armv8_pmuv3_format_attrs[] = {
&format_attr_event.attr,
&format_attr_long.attr,
&format_attr_rdpmc.attr,
+ &format_attr_threshold.attr,
+ &format_attr_threshold_compare.attr,
+ &format_attr_threshold_count.attr,
NULL,
};
@@ -327,7 +372,7 @@ static ssize_t slots_show(struct device *dev, struct device_attribute *attr,
{
struct pmu *pmu = dev_get_drvdata(dev);
struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu);
- u32 slots = cpu_pmu->reg_pmmir & ARMV8_PMU_SLOTS_MASK;
+ u32 slots = FIELD_GET(ARMV8_PMU_SLOTS, cpu_pmu->reg_pmmir);
return sysfs_emit(page, "0x%08x\n", slots);
}
@@ -339,8 +384,7 @@ static ssize_t bus_slots_show(struct device *dev, struct device_attribute *attr,
{
struct pmu *pmu = dev_get_drvdata(dev);
struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu);
- u32 bus_slots = (cpu_pmu->reg_pmmir >> ARMV8_PMU_BUS_SLOTS_SHIFT)
- & ARMV8_PMU_BUS_SLOTS_MASK;
+ u32 bus_slots = FIELD_GET(ARMV8_PMU_BUS_SLOTS, cpu_pmu->reg_pmmir);
return sysfs_emit(page, "0x%08x\n", bus_slots);
}
@@ -352,8 +396,7 @@ static ssize_t bus_width_show(struct device *dev, struct device_attribute *attr,
{
struct pmu *pmu = dev_get_drvdata(dev);
struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu);
- u32 bus_width = (cpu_pmu->reg_pmmir >> ARMV8_PMU_BUS_WIDTH_SHIFT)
- & ARMV8_PMU_BUS_WIDTH_MASK;
+ u32 bus_width = FIELD_GET(ARMV8_PMU_BUS_WIDTH, cpu_pmu->reg_pmmir);
u32 val = 0;
/* Encoded as Log2(number of bytes), plus one */
@@ -365,10 +408,38 @@ static ssize_t bus_width_show(struct device *dev, struct device_attribute *attr,
static DEVICE_ATTR_RO(bus_width);
+static u32 threshold_max(struct arm_pmu *cpu_pmu)
+{
+ /*
+ * PMMIR.THWIDTH is readable and non-zero on aarch32, but it would be
+ * impossible to write the threshold in the upper 32 bits of PMEVTYPER.
+ */
+ if (IS_ENABLED(CONFIG_ARM))
+ return 0;
+
+ /*
+ * The largest value that can be written to PMEVTYPER<n>_EL0.TH is
+ * (2 ^ PMMIR.THWIDTH) - 1.
+ */
+ return (1 << FIELD_GET(ARMV8_PMU_THWIDTH, cpu_pmu->reg_pmmir)) - 1;
+}
+
+static ssize_t threshold_max_show(struct device *dev,
+ struct device_attribute *attr, char *page)
+{
+ struct pmu *pmu = dev_get_drvdata(dev);
+ struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu);
+
+ return sysfs_emit(page, "0x%08x\n", threshold_max(cpu_pmu));
+}
+
+static DEVICE_ATTR_RO(threshold_max);
+
static struct attribute *armv8_pmuv3_caps_attrs[] = {
&dev_attr_slots.attr,
&dev_attr_bus_slots.attr,
&dev_attr_bus_width.attr,
+ &dev_attr_threshold_max.attr,
NULL,
};
@@ -397,7 +468,7 @@ static bool armv8pmu_has_long_event(struct arm_pmu *cpu_pmu)
return (IS_ENABLED(CONFIG_ARM64) && is_pmuv3p5(cpu_pmu->pmuver));
}
-static inline bool armv8pmu_event_has_user_read(struct perf_event *event)
+static bool armv8pmu_event_has_user_read(struct perf_event *event)
{
return event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT;
}
@@ -407,7 +478,7 @@ static inline bool armv8pmu_event_has_user_read(struct perf_event *event)
* except when we have allocated the 64bit cycle counter (for CPU
* cycles event) or when user space counter access is enabled.
*/
-static inline bool armv8pmu_event_is_chained(struct perf_event *event)
+static bool armv8pmu_event_is_chained(struct perf_event *event)
{
int idx = event->hw.idx;
struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
@@ -428,36 +499,36 @@ static inline bool armv8pmu_event_is_chained(struct perf_event *event)
#define ARMV8_IDX_TO_COUNTER(x) \
(((x) - ARMV8_IDX_COUNTER0) & ARMV8_PMU_COUNTER_MASK)
-static inline u64 armv8pmu_pmcr_read(void)
+static u64 armv8pmu_pmcr_read(void)
{
return read_pmcr();
}
-static inline void armv8pmu_pmcr_write(u64 val)
+static void armv8pmu_pmcr_write(u64 val)
{
val &= ARMV8_PMU_PMCR_MASK;
isb();
write_pmcr(val);
}
-static inline int armv8pmu_has_overflowed(u32 pmovsr)
+static int armv8pmu_has_overflowed(u32 pmovsr)
{
return pmovsr & ARMV8_PMU_OVERFLOWED_MASK;
}
-static inline int armv8pmu_counter_has_overflowed(u32 pmnc, int idx)
+static int armv8pmu_counter_has_overflowed(u32 pmnc, int idx)
{
return pmnc & BIT(ARMV8_IDX_TO_COUNTER(idx));
}
-static inline u64 armv8pmu_read_evcntr(int idx)
+static u64 armv8pmu_read_evcntr(int idx)
{
u32 counter = ARMV8_IDX_TO_COUNTER(idx);
return read_pmevcntrn(counter);
}
-static inline u64 armv8pmu_read_hw_counter(struct perf_event *event)
+static u64 armv8pmu_read_hw_counter(struct perf_event *event)
{
int idx = event->hw.idx;
u64 val = armv8pmu_read_evcntr(idx);
@@ -519,14 +590,14 @@ static u64 armv8pmu_read_counter(struct perf_event *event)
return armv8pmu_unbias_long_counter(event, value);
}
-static inline void armv8pmu_write_evcntr(int idx, u64 value)
+static void armv8pmu_write_evcntr(int idx, u64 value)
{
u32 counter = ARMV8_IDX_TO_COUNTER(idx);
write_pmevcntrn(counter, value);
}
-static inline void armv8pmu_write_hw_counter(struct perf_event *event,
+static void armv8pmu_write_hw_counter(struct perf_event *event,
u64 value)
{
int idx = event->hw.idx;
@@ -552,15 +623,22 @@ static void armv8pmu_write_counter(struct perf_event *event, u64 value)
armv8pmu_write_hw_counter(event, value);
}
-static inline void armv8pmu_write_evtype(int idx, u32 val)
+static void armv8pmu_write_evtype(int idx, unsigned long val)
{
u32 counter = ARMV8_IDX_TO_COUNTER(idx);
+ unsigned long mask = ARMV8_PMU_EVTYPE_EVENT |
+ ARMV8_PMU_INCLUDE_EL2 |
+ ARMV8_PMU_EXCLUDE_EL0 |
+ ARMV8_PMU_EXCLUDE_EL1;
- val &= ARMV8_PMU_EVTYPE_MASK;
+ if (IS_ENABLED(CONFIG_ARM64))
+ mask |= ARMV8_PMU_EVTYPE_TC | ARMV8_PMU_EVTYPE_TH;
+
+ val &= mask;
write_pmevtypern(counter, val);
}
-static inline void armv8pmu_write_event_type(struct perf_event *event)
+static void armv8pmu_write_event_type(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx;
@@ -594,7 +672,7 @@ static u32 armv8pmu_event_cnten_mask(struct perf_event *event)
return mask;
}
-static inline void armv8pmu_enable_counter(u32 mask)
+static void armv8pmu_enable_counter(u32 mask)
{
/*
* Make sure event configuration register writes are visible before we
@@ -604,7 +682,7 @@ static inline void armv8pmu_enable_counter(u32 mask)
write_pmcntenset(mask);
}
-static inline void armv8pmu_enable_event_counter(struct perf_event *event)
+static void armv8pmu_enable_event_counter(struct perf_event *event)
{
struct perf_event_attr *attr = &event->attr;
u32 mask = armv8pmu_event_cnten_mask(event);
@@ -616,7 +694,7 @@ static inline void armv8pmu_enable_event_counter(struct perf_event *event)
armv8pmu_enable_counter(mask);
}
-static inline void armv8pmu_disable_counter(u32 mask)
+static void armv8pmu_disable_counter(u32 mask)
{
write_pmcntenclr(mask);
/*
@@ -626,7 +704,7 @@ static inline void armv8pmu_disable_counter(u32 mask)
isb();
}
-static inline void armv8pmu_disable_event_counter(struct perf_event *event)
+static void armv8pmu_disable_event_counter(struct perf_event *event)
{
struct perf_event_attr *attr = &event->attr;
u32 mask = armv8pmu_event_cnten_mask(event);
@@ -638,18 +716,18 @@ static inline void armv8pmu_disable_event_counter(struct perf_event *event)
armv8pmu_disable_counter(mask);
}
-static inline void armv8pmu_enable_intens(u32 mask)
+static void armv8pmu_enable_intens(u32 mask)
{
write_pmintenset(mask);
}
-static inline void armv8pmu_enable_event_irq(struct perf_event *event)
+static void armv8pmu_enable_event_irq(struct perf_event *event)
{
u32 counter = ARMV8_IDX_TO_COUNTER(event->hw.idx);
armv8pmu_enable_intens(BIT(counter));
}
-static inline void armv8pmu_disable_intens(u32 mask)
+static void armv8pmu_disable_intens(u32 mask)
{
write_pmintenclr(mask);
isb();
@@ -658,13 +736,13 @@ static inline void armv8pmu_disable_intens(u32 mask)
isb();
}
-static inline void armv8pmu_disable_event_irq(struct perf_event *event)
+static void armv8pmu_disable_event_irq(struct perf_event *event)
{
u32 counter = ARMV8_IDX_TO_COUNTER(event->hw.idx);
armv8pmu_disable_intens(BIT(counter));
}
-static inline u32 armv8pmu_getreset_flags(void)
+static u32 armv8pmu_getreset_flags(void)
{
u32 value;
@@ -672,7 +750,7 @@ static inline u32 armv8pmu_getreset_flags(void)
value = read_pmovsclr();
/* Write to clear flags */
- value &= ARMV8_PMU_OVSR_MASK;
+ value &= ARMV8_PMU_OVERFLOWED_MASK;
write_pmovsclr(value);
return value;
@@ -914,9 +992,15 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event,
struct perf_event_attr *attr)
{
unsigned long config_base = 0;
-
- if (attr->exclude_idle)
- return -EPERM;
+ struct perf_event *perf_event = container_of(attr, struct perf_event,
+ attr);
+ struct arm_pmu *cpu_pmu = to_arm_pmu(perf_event->pmu);
+ u32 th;
+
+ if (attr->exclude_idle) {
+ pr_debug("ARM performance counters do not support mode exclusion\n");
+ return -EOPNOTSUPP;
+ }
/*
* If we're running in hyp mode, then we *are* the hypervisor.
@@ -946,6 +1030,22 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event,
config_base |= ARMV8_PMU_EXCLUDE_EL0;
/*
+ * If FEAT_PMUv3_TH isn't implemented, then THWIDTH (threshold_max) will
+ * be 0 and will also trigger this check, preventing it from being used.
+ */
+ th = ATTR_CFG_GET_FLD(attr, threshold);
+ if (th > threshold_max(cpu_pmu)) {
+ pr_debug("PMU event threshold exceeds max value\n");
+ return -EINVAL;
+ }
+
+ if (IS_ENABLED(CONFIG_ARM64) && th) {
+ config_base |= FIELD_PREP(ARMV8_PMU_EVTYPE_TH, th);
+ config_base |= FIELD_PREP(ARMV8_PMU_EVTYPE_TC,
+ armv8pmu_event_threshold_control(attr));
+ }
+
+ /*
* Install the filter into config_base as this is used to
* construct the event type.
*/
@@ -1107,8 +1207,7 @@ static void __armv8pmu_probe_pmu(void *info)
probe->present = true;
/* Read the nb of CNTx counters supported from PMNC */
- cpu_pmu->num_events = (armv8pmu_pmcr_read() >> ARMV8_PMU_PMCR_N_SHIFT)
- & ARMV8_PMU_PMCR_N_MASK;
+ cpu_pmu->num_events = FIELD_GET(ARMV8_PMU_PMCR_N, armv8pmu_pmcr_read());
/* Add the CPU cycles counter */
cpu_pmu->num_events += 1;
@@ -1221,6 +1320,12 @@ static int name##_pmu_init(struct arm_pmu *cpu_pmu) \
return armv8_pmu_init(cpu_pmu, #name, armv8_pmuv3_map_event); \
}
+#define PMUV3_INIT_MAP_EVENT(name, map_event) \
+static int name##_pmu_init(struct arm_pmu *cpu_pmu) \
+{ \
+ return armv8_pmu_init(cpu_pmu, #name, map_event); \
+}
+
PMUV3_INIT_SIMPLE(armv8_pmuv3)
PMUV3_INIT_SIMPLE(armv8_cortex_a34)
@@ -1247,51 +1352,24 @@ PMUV3_INIT_SIMPLE(armv8_neoverse_v1)
PMUV3_INIT_SIMPLE(armv8_nvidia_carmel)
PMUV3_INIT_SIMPLE(armv8_nvidia_denver)
-static int armv8_a35_pmu_init(struct arm_pmu *cpu_pmu)
-{
- return armv8_pmu_init(cpu_pmu, "armv8_cortex_a35", armv8_a53_map_event);
-}
-
-static int armv8_a53_pmu_init(struct arm_pmu *cpu_pmu)
-{
- return armv8_pmu_init(cpu_pmu, "armv8_cortex_a53", armv8_a53_map_event);
-}
-
-static int armv8_a57_pmu_init(struct arm_pmu *cpu_pmu)
-{
- return armv8_pmu_init(cpu_pmu, "armv8_cortex_a57", armv8_a57_map_event);
-}
-
-static int armv8_a72_pmu_init(struct arm_pmu *cpu_pmu)
-{
- return armv8_pmu_init(cpu_pmu, "armv8_cortex_a72", armv8_a57_map_event);
-}
-
-static int armv8_a73_pmu_init(struct arm_pmu *cpu_pmu)
-{
- return armv8_pmu_init(cpu_pmu, "armv8_cortex_a73", armv8_a73_map_event);
-}
-
-static int armv8_thunder_pmu_init(struct arm_pmu *cpu_pmu)
-{
- return armv8_pmu_init(cpu_pmu, "armv8_cavium_thunder", armv8_thunder_map_event);
-}
-
-static int armv8_vulcan_pmu_init(struct arm_pmu *cpu_pmu)
-{
- return armv8_pmu_init(cpu_pmu, "armv8_brcm_vulcan", armv8_vulcan_map_event);
-}
+PMUV3_INIT_MAP_EVENT(armv8_cortex_a35, armv8_a53_map_event)
+PMUV3_INIT_MAP_EVENT(armv8_cortex_a53, armv8_a53_map_event)
+PMUV3_INIT_MAP_EVENT(armv8_cortex_a57, armv8_a57_map_event)
+PMUV3_INIT_MAP_EVENT(armv8_cortex_a72, armv8_a57_map_event)
+PMUV3_INIT_MAP_EVENT(armv8_cortex_a73, armv8_a73_map_event)
+PMUV3_INIT_MAP_EVENT(armv8_cavium_thunder, armv8_thunder_map_event)
+PMUV3_INIT_MAP_EVENT(armv8_brcm_vulcan, armv8_vulcan_map_event)
static const struct of_device_id armv8_pmu_of_device_ids[] = {
{.compatible = "arm,armv8-pmuv3", .data = armv8_pmuv3_pmu_init},
{.compatible = "arm,cortex-a34-pmu", .data = armv8_cortex_a34_pmu_init},
- {.compatible = "arm,cortex-a35-pmu", .data = armv8_a35_pmu_init},
- {.compatible = "arm,cortex-a53-pmu", .data = armv8_a53_pmu_init},
+ {.compatible = "arm,cortex-a35-pmu", .data = armv8_cortex_a35_pmu_init},
+ {.compatible = "arm,cortex-a53-pmu", .data = armv8_cortex_a53_pmu_init},
{.compatible = "arm,cortex-a55-pmu", .data = armv8_cortex_a55_pmu_init},
- {.compatible = "arm,cortex-a57-pmu", .data = armv8_a57_pmu_init},
+ {.compatible = "arm,cortex-a57-pmu", .data = armv8_cortex_a57_pmu_init},
{.compatible = "arm,cortex-a65-pmu", .data = armv8_cortex_a65_pmu_init},
- {.compatible = "arm,cortex-a72-pmu", .data = armv8_a72_pmu_init},
- {.compatible = "arm,cortex-a73-pmu", .data = armv8_a73_pmu_init},
+ {.compatible = "arm,cortex-a72-pmu", .data = armv8_cortex_a72_pmu_init},
+ {.compatible = "arm,cortex-a73-pmu", .data = armv8_cortex_a73_pmu_init},
{.compatible = "arm,cortex-a75-pmu", .data = armv8_cortex_a75_pmu_init},
{.compatible = "arm,cortex-a76-pmu", .data = armv8_cortex_a76_pmu_init},
{.compatible = "arm,cortex-a77-pmu", .data = armv8_cortex_a77_pmu_init},
@@ -1309,8 +1387,8 @@ static const struct of_device_id armv8_pmu_of_device_ids[] = {
{.compatible = "arm,neoverse-n1-pmu", .data = armv8_neoverse_n1_pmu_init},
{.compatible = "arm,neoverse-n2-pmu", .data = armv9_neoverse_n2_pmu_init},
{.compatible = "arm,neoverse-v1-pmu", .data = armv8_neoverse_v1_pmu_init},
- {.compatible = "cavium,thunder-pmu", .data = armv8_thunder_pmu_init},
- {.compatible = "brcm,vulcan-pmu", .data = armv8_vulcan_pmu_init},
+ {.compatible = "cavium,thunder-pmu", .data = armv8_cavium_thunder_pmu_init},
+ {.compatible = "brcm,vulcan-pmu", .data = armv8_brcm_vulcan_pmu_init},
{.compatible = "nvidia,carmel-pmu", .data = armv8_nvidia_carmel_pmu_init},
{.compatible = "nvidia,denver-pmu", .data = armv8_nvidia_denver_pmu_init},
{},
diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c
index d2b0cbf0e0c41e..b622d75d8c9e43 100644
--- a/drivers/perf/arm_spe_pmu.c
+++ b/drivers/perf/arm_spe_pmu.c
@@ -206,28 +206,6 @@ static const struct attribute_group arm_spe_pmu_cap_group = {
#define ATTR_CFG_FLD_inv_event_filter_LO 0
#define ATTR_CFG_FLD_inv_event_filter_HI 63
-/* Why does everything I do descend into this? */
-#define __GEN_PMU_FORMAT_ATTR(cfg, lo, hi) \
- (lo) == (hi) ? #cfg ":" #lo "\n" : #cfg ":" #lo "-" #hi
-
-#define _GEN_PMU_FORMAT_ATTR(cfg, lo, hi) \
- __GEN_PMU_FORMAT_ATTR(cfg, lo, hi)
-
-#define GEN_PMU_FORMAT_ATTR(name) \
- PMU_FORMAT_ATTR(name, \
- _GEN_PMU_FORMAT_ATTR(ATTR_CFG_FLD_##name##_CFG, \
- ATTR_CFG_FLD_##name##_LO, \
- ATTR_CFG_FLD_##name##_HI))
-
-#define _ATTR_CFG_GET_FLD(attr, cfg, lo, hi) \
- ((((attr)->cfg) >> lo) & GENMASK(hi - lo, 0))
-
-#define ATTR_CFG_GET_FLD(attr, name) \
- _ATTR_CFG_GET_FLD(attr, \
- ATTR_CFG_FLD_##name##_CFG, \
- ATTR_CFG_FLD_##name##_LO, \
- ATTR_CFG_FLD_##name##_HI)
-
GEN_PMU_FORMAT_ATTR(ts_enable);
GEN_PMU_FORMAT_ATTR(pa_enable);
GEN_PMU_FORMAT_ATTR(pct_enable);
diff --git a/drivers/perf/dwc_pcie_pmu.c b/drivers/perf/dwc_pcie_pmu.c
new file mode 100644
index 00000000000000..957058ad0099e2
--- /dev/null
+++ b/drivers/perf/dwc_pcie_pmu.c
@@ -0,0 +1,792 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Synopsys DesignWare PCIe PMU driver
+ *
+ * Copyright (C) 2021-2023 Alibaba Inc.
+ */
+
+#include <linux/bitfield.h>
+#include <linux/bitops.h>
+#include <linux/cpuhotplug.h>
+#include <linux/cpumask.h>
+#include <linux/device.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/perf_event.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+#include <linux/smp.h>
+#include <linux/sysfs.h>
+#include <linux/types.h>
+
+#define DWC_PCIE_VSEC_RAS_DES_ID 0x02
+#define DWC_PCIE_EVENT_CNT_CTL 0x8
+
+/*
+ * Event Counter Data Select includes two parts:
+ * - 27-24: Group number(4-bit: 0..0x7)
+ * - 23-16: Event number(8-bit: 0..0x13) within the Group
+ *
+ * Put them together as in TRM.
+ */
+#define DWC_PCIE_CNT_EVENT_SEL GENMASK(27, 16)
+#define DWC_PCIE_CNT_LANE_SEL GENMASK(11, 8)
+#define DWC_PCIE_CNT_STATUS BIT(7)
+#define DWC_PCIE_CNT_ENABLE GENMASK(4, 2)
+#define DWC_PCIE_PER_EVENT_OFF 0x1
+#define DWC_PCIE_PER_EVENT_ON 0x3
+#define DWC_PCIE_EVENT_CLEAR GENMASK(1, 0)
+#define DWC_PCIE_EVENT_PER_CLEAR 0x1
+
+#define DWC_PCIE_EVENT_CNT_DATA 0xC
+
+#define DWC_PCIE_TIME_BASED_ANAL_CTL 0x10
+#define DWC_PCIE_TIME_BASED_REPORT_SEL GENMASK(31, 24)
+#define DWC_PCIE_TIME_BASED_DURATION_SEL GENMASK(15, 8)
+#define DWC_PCIE_DURATION_MANUAL_CTL 0x0
+#define DWC_PCIE_DURATION_1MS 0x1
+#define DWC_PCIE_DURATION_10MS 0x2
+#define DWC_PCIE_DURATION_100MS 0x3
+#define DWC_PCIE_DURATION_1S 0x4
+#define DWC_PCIE_DURATION_2S 0x5
+#define DWC_PCIE_DURATION_4S 0x6
+#define DWC_PCIE_DURATION_4US 0xFF
+#define DWC_PCIE_TIME_BASED_TIMER_START BIT(0)
+#define DWC_PCIE_TIME_BASED_CNT_ENABLE 0x1
+
+#define DWC_PCIE_TIME_BASED_ANAL_DATA_REG_LOW 0x14
+#define DWC_PCIE_TIME_BASED_ANAL_DATA_REG_HIGH 0x18
+
+/* Event attributes */
+#define DWC_PCIE_CONFIG_EVENTID GENMASK(15, 0)
+#define DWC_PCIE_CONFIG_TYPE GENMASK(19, 16)
+#define DWC_PCIE_CONFIG_LANE GENMASK(27, 20)
+
+#define DWC_PCIE_EVENT_ID(event) FIELD_GET(DWC_PCIE_CONFIG_EVENTID, (event)->attr.config)
+#define DWC_PCIE_EVENT_TYPE(event) FIELD_GET(DWC_PCIE_CONFIG_TYPE, (event)->attr.config)
+#define DWC_PCIE_EVENT_LANE(event) FIELD_GET(DWC_PCIE_CONFIG_LANE, (event)->attr.config)
+
+enum dwc_pcie_event_type {
+ DWC_PCIE_TIME_BASE_EVENT,
+ DWC_PCIE_LANE_EVENT,
+ DWC_PCIE_EVENT_TYPE_MAX,
+};
+
+#define DWC_PCIE_LANE_EVENT_MAX_PERIOD GENMASK_ULL(31, 0)
+#define DWC_PCIE_MAX_PERIOD GENMASK_ULL(63, 0)
+
+struct dwc_pcie_pmu {
+ struct pmu pmu;
+ struct pci_dev *pdev; /* Root Port device */
+ u16 ras_des_offset;
+ u32 nr_lanes;
+
+ struct list_head pmu_node;
+ struct hlist_node cpuhp_node;
+ struct perf_event *event[DWC_PCIE_EVENT_TYPE_MAX];
+ int on_cpu;
+};
+
+#define to_dwc_pcie_pmu(p) (container_of(p, struct dwc_pcie_pmu, pmu))
+
+static int dwc_pcie_pmu_hp_state;
+static struct list_head dwc_pcie_dev_info_head =
+ LIST_HEAD_INIT(dwc_pcie_dev_info_head);
+static bool notify;
+
+struct dwc_pcie_dev_info {
+ struct platform_device *plat_dev;
+ struct pci_dev *pdev;
+ struct list_head dev_node;
+};
+
+struct dwc_pcie_vendor_id {
+ int vendor_id;
+};
+
+static const struct dwc_pcie_vendor_id dwc_pcie_vendor_ids[] = {
+ {.vendor_id = PCI_VENDOR_ID_ALIBABA },
+ {} /* terminator */
+};
+
+static ssize_t cpumask_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct dwc_pcie_pmu *pcie_pmu = to_dwc_pcie_pmu(dev_get_drvdata(dev));
+
+ return cpumap_print_to_pagebuf(true, buf, cpumask_of(pcie_pmu->on_cpu));
+}
+static DEVICE_ATTR_RO(cpumask);
+
+static struct attribute *dwc_pcie_pmu_cpumask_attrs[] = {
+ &dev_attr_cpumask.attr,
+ NULL
+};
+
+static struct attribute_group dwc_pcie_cpumask_attr_group = {
+ .attrs = dwc_pcie_pmu_cpumask_attrs,
+};
+
+struct dwc_pcie_format_attr {
+ struct device_attribute attr;
+ u64 field;
+ int config;
+};
+
+PMU_FORMAT_ATTR(eventid, "config:0-15");
+PMU_FORMAT_ATTR(type, "config:16-19");
+PMU_FORMAT_ATTR(lane, "config:20-27");
+
+static struct attribute *dwc_pcie_format_attrs[] = {
+ &format_attr_type.attr,
+ &format_attr_eventid.attr,
+ &format_attr_lane.attr,
+ NULL,
+};
+
+static struct attribute_group dwc_pcie_format_attrs_group = {
+ .name = "format",
+ .attrs = dwc_pcie_format_attrs,
+};
+
+struct dwc_pcie_event_attr {
+ struct device_attribute attr;
+ enum dwc_pcie_event_type type;
+ u16 eventid;
+ u8 lane;
+};
+
+static ssize_t dwc_pcie_event_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct dwc_pcie_event_attr *eattr;
+
+ eattr = container_of(attr, typeof(*eattr), attr);
+
+ if (eattr->type == DWC_PCIE_LANE_EVENT)
+ return sysfs_emit(buf, "eventid=0x%x,type=0x%x,lane=?\n",
+ eattr->eventid, eattr->type);
+ else if (eattr->type == DWC_PCIE_TIME_BASE_EVENT)
+ return sysfs_emit(buf, "eventid=0x%x,type=0x%x\n",
+ eattr->eventid, eattr->type);
+
+ return 0;
+}
+
+#define DWC_PCIE_EVENT_ATTR(_name, _type, _eventid, _lane) \
+ (&((struct dwc_pcie_event_attr[]) {{ \
+ .attr = __ATTR(_name, 0444, dwc_pcie_event_show, NULL), \
+ .type = _type, \
+ .eventid = _eventid, \
+ .lane = _lane, \
+ }})[0].attr.attr)
+
+#define DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(_name, _eventid) \
+ DWC_PCIE_EVENT_ATTR(_name, DWC_PCIE_TIME_BASE_EVENT, _eventid, 0)
+#define DWC_PCIE_PMU_LANE_EVENT_ATTR(_name, _eventid) \
+ DWC_PCIE_EVENT_ATTR(_name, DWC_PCIE_LANE_EVENT, _eventid, 0)
+
+static struct attribute *dwc_pcie_pmu_time_event_attrs[] = {
+ /* Group #0 */
+ DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(one_cycle, 0x00),
+ DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(TX_L0S, 0x01),
+ DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(RX_L0S, 0x02),
+ DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(L0, 0x03),
+ DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(L1, 0x04),
+ DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(L1_1, 0x05),
+ DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(L1_2, 0x06),
+ DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(CFG_RCVRY, 0x07),
+ DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(TX_RX_L0S, 0x08),
+ DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(L1_AUX, 0x09),
+
+ /* Group #1 */
+ DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(Tx_PCIe_TLP_Data_Payload, 0x20),
+ DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(Rx_PCIe_TLP_Data_Payload, 0x21),
+ DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(Tx_CCIX_TLP_Data_Payload, 0x22),
+ DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(Rx_CCIX_TLP_Data_Payload, 0x23),
+
+ /*
+ * Leave it to the user to specify the lane ID to avoid generating
+ * a list of hundreds of events.
+ */
+ DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_ack_dllp, 0x600),
+ DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_update_fc_dllp, 0x601),
+ DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_ack_dllp, 0x602),
+ DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_update_fc_dllp, 0x603),
+ DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_nulified_tlp, 0x604),
+ DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_nulified_tlp, 0x605),
+ DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_duplicate_tl, 0x606),
+ DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_memory_write, 0x700),
+ DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_memory_read, 0x701),
+ DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_configuration_write, 0x702),
+ DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_configuration_read, 0x703),
+ DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_io_write, 0x704),
+ DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_io_read, 0x705),
+ DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_completion_without_data, 0x706),
+ DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_completion_with_data, 0x707),
+ DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_message_tlp, 0x708),
+ DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_atomic, 0x709),
+ DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_tlp_with_prefix, 0x70A),
+ DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_memory_write, 0x70B),
+ DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_memory_read, 0x70C),
+ DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_io_write, 0x70F),
+ DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_io_read, 0x710),
+ DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_completion_without_data, 0x711),
+ DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_completion_with_data, 0x712),
+ DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_message_tlp, 0x713),
+ DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_atomic, 0x714),
+ DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_tlp_with_prefix, 0x715),
+ DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_ccix_tlp, 0x716),
+ DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_ccix_tlp, 0x717),
+ NULL
+};
+
+static const struct attribute_group dwc_pcie_event_attrs_group = {
+ .name = "events",
+ .attrs = dwc_pcie_pmu_time_event_attrs,
+};
+
+static const struct attribute_group *dwc_pcie_attr_groups[] = {
+ &dwc_pcie_event_attrs_group,
+ &dwc_pcie_format_attrs_group,
+ &dwc_pcie_cpumask_attr_group,
+ NULL
+};
+
+static void dwc_pcie_pmu_lane_event_enable(struct dwc_pcie_pmu *pcie_pmu,
+ bool enable)
+{
+ struct pci_dev *pdev = pcie_pmu->pdev;
+ u16 ras_des_offset = pcie_pmu->ras_des_offset;
+
+ if (enable)
+ pci_clear_and_set_config_dword(pdev,
+ ras_des_offset + DWC_PCIE_EVENT_CNT_CTL,
+ DWC_PCIE_CNT_ENABLE, DWC_PCIE_PER_EVENT_ON);
+ else
+ pci_clear_and_set_config_dword(pdev,
+ ras_des_offset + DWC_PCIE_EVENT_CNT_CTL,
+ DWC_PCIE_CNT_ENABLE, DWC_PCIE_PER_EVENT_OFF);
+}
+
+static void dwc_pcie_pmu_time_based_event_enable(struct dwc_pcie_pmu *pcie_pmu,
+ bool enable)
+{
+ struct pci_dev *pdev = pcie_pmu->pdev;
+ u16 ras_des_offset = pcie_pmu->ras_des_offset;
+
+ pci_clear_and_set_config_dword(pdev,
+ ras_des_offset + DWC_PCIE_TIME_BASED_ANAL_CTL,
+ DWC_PCIE_TIME_BASED_TIMER_START, enable);
+}
+
+static u64 dwc_pcie_pmu_read_lane_event_counter(struct perf_event *event)
+{
+ struct dwc_pcie_pmu *pcie_pmu = to_dwc_pcie_pmu(event->pmu);
+ struct pci_dev *pdev = pcie_pmu->pdev;
+ u16 ras_des_offset = pcie_pmu->ras_des_offset;
+ u32 val;
+
+ pci_read_config_dword(pdev, ras_des_offset + DWC_PCIE_EVENT_CNT_DATA, &val);
+
+ return val;
+}
+
+static u64 dwc_pcie_pmu_read_time_based_counter(struct perf_event *event)
+{
+ struct dwc_pcie_pmu *pcie_pmu = to_dwc_pcie_pmu(event->pmu);
+ struct pci_dev *pdev = pcie_pmu->pdev;
+ int event_id = DWC_PCIE_EVENT_ID(event);
+ u16 ras_des_offset = pcie_pmu->ras_des_offset;
+ u32 lo, hi, ss;
+ u64 val;
+
+ /*
+ * The 64-bit value of the data counter is spread across two
+ * registers that are not synchronized. In order to read them
+ * atomically, ensure that the high 32 bits match before and after
+ * reading the low 32 bits.
+ */
+ pci_read_config_dword(pdev,
+ ras_des_offset + DWC_PCIE_TIME_BASED_ANAL_DATA_REG_HIGH, &hi);
+ do {
+ /* snapshot the high 32 bits */
+ ss = hi;
+
+ pci_read_config_dword(
+ pdev, ras_des_offset + DWC_PCIE_TIME_BASED_ANAL_DATA_REG_LOW,
+ &lo);
+ pci_read_config_dword(
+ pdev, ras_des_offset + DWC_PCIE_TIME_BASED_ANAL_DATA_REG_HIGH,
+ &hi);
+ } while (hi != ss);
+
+ val = ((u64)hi << 32) | lo;
+ /*
+ * The Group#1 event measures the amount of data processed in 16-byte
+ * units. Simplify the end-user interface by multiplying the counter
+ * at the point of read.
+ */
+ if (event_id >= 0x20 && event_id <= 0x23)
+ val *= 16;
+
+ return val;
+}
+
+static void dwc_pcie_pmu_event_update(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ enum dwc_pcie_event_type type = DWC_PCIE_EVENT_TYPE(event);
+ u64 delta, prev, now = 0;
+
+ do {
+ prev = local64_read(&hwc->prev_count);
+
+ if (type == DWC_PCIE_LANE_EVENT)
+ now = dwc_pcie_pmu_read_lane_event_counter(event);
+ else if (type == DWC_PCIE_TIME_BASE_EVENT)
+ now = dwc_pcie_pmu_read_time_based_counter(event);
+
+ } while (local64_cmpxchg(&hwc->prev_count, prev, now) != prev);
+
+ delta = (now - prev) & DWC_PCIE_MAX_PERIOD;
+ /* 32-bit counter for Lane Event Counting */
+ if (type == DWC_PCIE_LANE_EVENT)
+ delta &= DWC_PCIE_LANE_EVENT_MAX_PERIOD;
+
+ local64_add(delta, &event->count);
+}
+
+static int dwc_pcie_pmu_event_init(struct perf_event *event)
+{
+ struct dwc_pcie_pmu *pcie_pmu = to_dwc_pcie_pmu(event->pmu);
+ enum dwc_pcie_event_type type = DWC_PCIE_EVENT_TYPE(event);
+ struct perf_event *sibling;
+ u32 lane;
+
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
+ /* We don't support sampling */
+ if (is_sampling_event(event))
+ return -EINVAL;
+
+ /* We cannot support task bound events */
+ if (event->cpu < 0 || event->attach_state & PERF_ATTACH_TASK)
+ return -EINVAL;
+
+ if (event->group_leader != event &&
+ !is_software_event(event->group_leader))
+ return -EINVAL;
+
+ for_each_sibling_event(sibling, event->group_leader) {
+ if (sibling->pmu != event->pmu && !is_software_event(sibling))
+ return -EINVAL;
+ }
+
+ if (type < 0 || type >= DWC_PCIE_EVENT_TYPE_MAX)
+ return -EINVAL;
+
+ if (type == DWC_PCIE_LANE_EVENT) {
+ lane = DWC_PCIE_EVENT_LANE(event);
+ if (lane < 0 || lane >= pcie_pmu->nr_lanes)
+ return -EINVAL;
+ }
+
+ event->cpu = pcie_pmu->on_cpu;
+
+ return 0;
+}
+
+static void dwc_pcie_pmu_event_start(struct perf_event *event, int flags)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct dwc_pcie_pmu *pcie_pmu = to_dwc_pcie_pmu(event->pmu);
+ enum dwc_pcie_event_type type = DWC_PCIE_EVENT_TYPE(event);
+
+ hwc->state = 0;
+ local64_set(&hwc->prev_count, 0);
+
+ if (type == DWC_PCIE_LANE_EVENT)
+ dwc_pcie_pmu_lane_event_enable(pcie_pmu, true);
+ else if (type == DWC_PCIE_TIME_BASE_EVENT)
+ dwc_pcie_pmu_time_based_event_enable(pcie_pmu, true);
+}
+
+static void dwc_pcie_pmu_event_stop(struct perf_event *event, int flags)
+{
+ struct dwc_pcie_pmu *pcie_pmu = to_dwc_pcie_pmu(event->pmu);
+ enum dwc_pcie_event_type type = DWC_PCIE_EVENT_TYPE(event);
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (event->hw.state & PERF_HES_STOPPED)
+ return;
+
+ if (type == DWC_PCIE_LANE_EVENT)
+ dwc_pcie_pmu_lane_event_enable(pcie_pmu, false);
+ else if (type == DWC_PCIE_TIME_BASE_EVENT)
+ dwc_pcie_pmu_time_based_event_enable(pcie_pmu, false);
+
+ dwc_pcie_pmu_event_update(event);
+ hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
+}
+
+static int dwc_pcie_pmu_event_add(struct perf_event *event, int flags)
+{
+ struct dwc_pcie_pmu *pcie_pmu = to_dwc_pcie_pmu(event->pmu);
+ struct pci_dev *pdev = pcie_pmu->pdev;
+ struct hw_perf_event *hwc = &event->hw;
+ enum dwc_pcie_event_type type = DWC_PCIE_EVENT_TYPE(event);
+ int event_id = DWC_PCIE_EVENT_ID(event);
+ int lane = DWC_PCIE_EVENT_LANE(event);
+ u16 ras_des_offset = pcie_pmu->ras_des_offset;
+ u32 ctrl;
+
+ /* one counter for each type and it is in use */
+ if (pcie_pmu->event[type])
+ return -ENOSPC;
+
+ pcie_pmu->event[type] = event;
+ hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+
+ if (type == DWC_PCIE_LANE_EVENT) {
+ /* EVENT_COUNTER_DATA_REG needs clear manually */
+ ctrl = FIELD_PREP(DWC_PCIE_CNT_EVENT_SEL, event_id) |
+ FIELD_PREP(DWC_PCIE_CNT_LANE_SEL, lane) |
+ FIELD_PREP(DWC_PCIE_CNT_ENABLE, DWC_PCIE_PER_EVENT_OFF) |
+ FIELD_PREP(DWC_PCIE_EVENT_CLEAR, DWC_PCIE_EVENT_PER_CLEAR);
+ pci_write_config_dword(pdev, ras_des_offset + DWC_PCIE_EVENT_CNT_CTL,
+ ctrl);
+ } else if (type == DWC_PCIE_TIME_BASE_EVENT) {
+ /*
+ * TIME_BASED_ANAL_DATA_REG is a 64 bit register, we can safely
+ * use it with any manually controlled duration. And it is
+ * cleared when next measurement starts.
+ */
+ ctrl = FIELD_PREP(DWC_PCIE_TIME_BASED_REPORT_SEL, event_id) |
+ FIELD_PREP(DWC_PCIE_TIME_BASED_DURATION_SEL,
+ DWC_PCIE_DURATION_MANUAL_CTL) |
+ DWC_PCIE_TIME_BASED_CNT_ENABLE;
+ pci_write_config_dword(
+ pdev, ras_des_offset + DWC_PCIE_TIME_BASED_ANAL_CTL, ctrl);
+ }
+
+ if (flags & PERF_EF_START)
+ dwc_pcie_pmu_event_start(event, PERF_EF_RELOAD);
+
+ perf_event_update_userpage(event);
+
+ return 0;
+}
+
+static void dwc_pcie_pmu_event_del(struct perf_event *event, int flags)
+{
+ struct dwc_pcie_pmu *pcie_pmu = to_dwc_pcie_pmu(event->pmu);
+ enum dwc_pcie_event_type type = DWC_PCIE_EVENT_TYPE(event);
+
+ dwc_pcie_pmu_event_stop(event, flags | PERF_EF_UPDATE);
+ perf_event_update_userpage(event);
+ pcie_pmu->event[type] = NULL;
+}
+
+static void dwc_pcie_pmu_remove_cpuhp_instance(void *hotplug_node)
+{
+ cpuhp_state_remove_instance_nocalls(dwc_pcie_pmu_hp_state, hotplug_node);
+}
+
+/*
+ * Find the binded DES capability device info of a PCI device.
+ * @pdev: The PCI device.
+ */
+static struct dwc_pcie_dev_info *dwc_pcie_find_dev_info(struct pci_dev *pdev)
+{
+ struct dwc_pcie_dev_info *dev_info;
+
+ list_for_each_entry(dev_info, &dwc_pcie_dev_info_head, dev_node)
+ if (dev_info->pdev == pdev)
+ return dev_info;
+
+ return NULL;
+}
+
+static void dwc_pcie_unregister_pmu(void *data)
+{
+ struct dwc_pcie_pmu *pcie_pmu = data;
+
+ perf_pmu_unregister(&pcie_pmu->pmu);
+}
+
+static bool dwc_pcie_match_des_cap(struct pci_dev *pdev)
+{
+ const struct dwc_pcie_vendor_id *vid;
+ u16 vsec = 0;
+ u32 val;
+
+ if (!pci_is_pcie(pdev) || !(pci_pcie_type(pdev) == PCI_EXP_TYPE_ROOT_PORT))
+ return false;
+
+ for (vid = dwc_pcie_vendor_ids; vid->vendor_id; vid++) {
+ vsec = pci_find_vsec_capability(pdev, vid->vendor_id,
+ DWC_PCIE_VSEC_RAS_DES_ID);
+ if (vsec)
+ break;
+ }
+ if (!vsec)
+ return false;
+
+ pci_read_config_dword(pdev, vsec + PCI_VNDR_HEADER, &val);
+ if (PCI_VNDR_HEADER_REV(val) != 0x04)
+ return false;
+
+ pci_dbg(pdev,
+ "Detected PCIe Vendor-Specific Extended Capability RAS DES\n");
+ return true;
+}
+
+static void dwc_pcie_unregister_dev(struct dwc_pcie_dev_info *dev_info)
+{
+ platform_device_unregister(dev_info->plat_dev);
+ list_del(&dev_info->dev_node);
+ kfree(dev_info);
+}
+
+static int dwc_pcie_register_dev(struct pci_dev *pdev)
+{
+ struct platform_device *plat_dev;
+ struct dwc_pcie_dev_info *dev_info;
+ u32 bdf;
+
+ bdf = PCI_DEVID(pdev->bus->number, pdev->devfn);
+ plat_dev = platform_device_register_data(NULL, "dwc_pcie_pmu", bdf,
+ pdev, sizeof(*pdev));
+
+ if (IS_ERR(plat_dev))
+ return PTR_ERR(plat_dev);
+
+ dev_info = kzalloc(sizeof(*dev_info), GFP_KERNEL);
+ if (!dev_info)
+ return -ENOMEM;
+
+ /* Cache platform device to handle pci device hotplug */
+ dev_info->plat_dev = plat_dev;
+ dev_info->pdev = pdev;
+ list_add(&dev_info->dev_node, &dwc_pcie_dev_info_head);
+
+ return 0;
+}
+
+static int dwc_pcie_pmu_notifier(struct notifier_block *nb,
+ unsigned long action, void *data)
+{
+ struct device *dev = data;
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct dwc_pcie_dev_info *dev_info;
+
+ switch (action) {
+ case BUS_NOTIFY_ADD_DEVICE:
+ if (!dwc_pcie_match_des_cap(pdev))
+ return NOTIFY_DONE;
+ if (dwc_pcie_register_dev(pdev))
+ return NOTIFY_BAD;
+ break;
+ case BUS_NOTIFY_DEL_DEVICE:
+ dev_info = dwc_pcie_find_dev_info(pdev);
+ if (!dev_info)
+ return NOTIFY_DONE;
+ dwc_pcie_unregister_dev(dev_info);
+ break;
+ }
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block dwc_pcie_pmu_nb = {
+ .notifier_call = dwc_pcie_pmu_notifier,
+};
+
+static int dwc_pcie_pmu_probe(struct platform_device *plat_dev)
+{
+ struct pci_dev *pdev = plat_dev->dev.platform_data;
+ struct dwc_pcie_pmu *pcie_pmu;
+ char *name;
+ u32 bdf, val;
+ u16 vsec;
+ int ret;
+
+ vsec = pci_find_vsec_capability(pdev, pdev->vendor,
+ DWC_PCIE_VSEC_RAS_DES_ID);
+ pci_read_config_dword(pdev, vsec + PCI_VNDR_HEADER, &val);
+ bdf = PCI_DEVID(pdev->bus->number, pdev->devfn);
+ name = devm_kasprintf(&plat_dev->dev, GFP_KERNEL, "dwc_rootport_%x", bdf);
+ if (!name)
+ return -ENOMEM;
+
+ pcie_pmu = devm_kzalloc(&plat_dev->dev, sizeof(*pcie_pmu), GFP_KERNEL);
+ if (!pcie_pmu)
+ return -ENOMEM;
+
+ pcie_pmu->pdev = pdev;
+ pcie_pmu->ras_des_offset = vsec;
+ pcie_pmu->nr_lanes = pcie_get_width_cap(pdev);
+ pcie_pmu->on_cpu = -1;
+ pcie_pmu->pmu = (struct pmu){
+ .name = name,
+ .parent = &pdev->dev,
+ .module = THIS_MODULE,
+ .attr_groups = dwc_pcie_attr_groups,
+ .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
+ .task_ctx_nr = perf_invalid_context,
+ .event_init = dwc_pcie_pmu_event_init,
+ .add = dwc_pcie_pmu_event_add,
+ .del = dwc_pcie_pmu_event_del,
+ .start = dwc_pcie_pmu_event_start,
+ .stop = dwc_pcie_pmu_event_stop,
+ .read = dwc_pcie_pmu_event_update,
+ };
+
+ /* Add this instance to the list used by the offline callback */
+ ret = cpuhp_state_add_instance(dwc_pcie_pmu_hp_state,
+ &pcie_pmu->cpuhp_node);
+ if (ret) {
+ pci_err(pdev, "Error %d registering hotplug @%x\n", ret, bdf);
+ return ret;
+ }
+
+ /* Unwind when platform driver removes */
+ ret = devm_add_action_or_reset(&plat_dev->dev,
+ dwc_pcie_pmu_remove_cpuhp_instance,
+ &pcie_pmu->cpuhp_node);
+ if (ret)
+ return ret;
+
+ ret = perf_pmu_register(&pcie_pmu->pmu, name, -1);
+ if (ret) {
+ pci_err(pdev, "Error %d registering PMU @%x\n", ret, bdf);
+ return ret;
+ }
+ ret = devm_add_action_or_reset(&plat_dev->dev, dwc_pcie_unregister_pmu,
+ pcie_pmu);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static int dwc_pcie_pmu_online_cpu(unsigned int cpu, struct hlist_node *cpuhp_node)
+{
+ struct dwc_pcie_pmu *pcie_pmu;
+
+ pcie_pmu = hlist_entry_safe(cpuhp_node, struct dwc_pcie_pmu, cpuhp_node);
+ if (pcie_pmu->on_cpu == -1)
+ pcie_pmu->on_cpu = cpumask_local_spread(
+ 0, dev_to_node(&pcie_pmu->pdev->dev));
+
+ return 0;
+}
+
+static int dwc_pcie_pmu_offline_cpu(unsigned int cpu, struct hlist_node *cpuhp_node)
+{
+ struct dwc_pcie_pmu *pcie_pmu;
+ struct pci_dev *pdev;
+ int node;
+ cpumask_t mask;
+ unsigned int target;
+
+ pcie_pmu = hlist_entry_safe(cpuhp_node, struct dwc_pcie_pmu, cpuhp_node);
+ /* Nothing to do if this CPU doesn't own the PMU */
+ if (cpu != pcie_pmu->on_cpu)
+ return 0;
+
+ pcie_pmu->on_cpu = -1;
+ pdev = pcie_pmu->pdev;
+ node = dev_to_node(&pdev->dev);
+ if (cpumask_and(&mask, cpumask_of_node(node), cpu_online_mask) &&
+ cpumask_andnot(&mask, &mask, cpumask_of(cpu)))
+ target = cpumask_any(&mask);
+ else
+ target = cpumask_any_but(cpu_online_mask, cpu);
+
+ if (target >= nr_cpu_ids) {
+ pci_err(pdev, "There is no CPU to set\n");
+ return 0;
+ }
+
+ /* This PMU does NOT support interrupt, just migrate context. */
+ perf_pmu_migrate_context(&pcie_pmu->pmu, cpu, target);
+ pcie_pmu->on_cpu = target;
+
+ return 0;
+}
+
+static struct platform_driver dwc_pcie_pmu_driver = {
+ .probe = dwc_pcie_pmu_probe,
+ .driver = {.name = "dwc_pcie_pmu",},
+};
+
+static int __init dwc_pcie_pmu_init(void)
+{
+ struct pci_dev *pdev = NULL;
+ bool found = false;
+ int ret;
+
+ for_each_pci_dev(pdev) {
+ if (!dwc_pcie_match_des_cap(pdev))
+ continue;
+
+ ret = dwc_pcie_register_dev(pdev);
+ if (ret) {
+ pci_dev_put(pdev);
+ return ret;
+ }
+
+ found = true;
+ }
+ if (!found)
+ return -ENODEV;
+
+ ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
+ "perf/dwc_pcie_pmu:online",
+ dwc_pcie_pmu_online_cpu,
+ dwc_pcie_pmu_offline_cpu);
+ if (ret < 0)
+ return ret;
+
+ dwc_pcie_pmu_hp_state = ret;
+
+ ret = platform_driver_register(&dwc_pcie_pmu_driver);
+ if (ret)
+ goto platform_driver_register_err;
+
+ ret = bus_register_notifier(&pci_bus_type, &dwc_pcie_pmu_nb);
+ if (ret)
+ goto platform_driver_register_err;
+ notify = true;
+
+ return 0;
+
+platform_driver_register_err:
+ cpuhp_remove_multi_state(dwc_pcie_pmu_hp_state);
+
+ return ret;
+}
+
+static void __exit dwc_pcie_pmu_exit(void)
+{
+ struct dwc_pcie_dev_info *dev_info, *tmp;
+
+ if (notify)
+ bus_unregister_notifier(&pci_bus_type, &dwc_pcie_pmu_nb);
+ list_for_each_entry_safe(dev_info, tmp, &dwc_pcie_dev_info_head, dev_node)
+ dwc_pcie_unregister_dev(dev_info);
+ platform_driver_unregister(&dwc_pcie_pmu_driver);
+ cpuhp_remove_multi_state(dwc_pcie_pmu_hp_state);
+}
+
+module_init(dwc_pcie_pmu_init);
+module_exit(dwc_pcie_pmu_exit);
+
+MODULE_DESCRIPTION("PMU driver for DesignWare Cores PCI Express Controller");
+MODULE_AUTHOR("Shuai Xue <xueshuai@linux.alibaba.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/perf/fsl_imx8_ddr_perf.c b/drivers/perf/fsl_imx8_ddr_perf.c
index 92611c98120f12..7dbfaee372c762 100644
--- a/drivers/perf/fsl_imx8_ddr_perf.c
+++ b/drivers/perf/fsl_imx8_ddr_perf.c
@@ -19,6 +19,8 @@
#define COUNTER_READ 0x20
#define COUNTER_DPCR1 0x30
+#define COUNTER_MUX_CNTL 0x50
+#define COUNTER_MASK_COMP 0x54
#define CNTL_OVER 0x1
#define CNTL_CLEAR 0x2
@@ -32,6 +34,13 @@
#define CNTL_CSV_SHIFT 24
#define CNTL_CSV_MASK (0xFFU << CNTL_CSV_SHIFT)
+#define READ_PORT_SHIFT 0
+#define READ_PORT_MASK (0x7 << READ_PORT_SHIFT)
+#define READ_CHANNEL_REVERT 0x00000008 /* bit 3 for read channel select */
+#define WRITE_PORT_SHIFT 8
+#define WRITE_PORT_MASK (0x7 << WRITE_PORT_SHIFT)
+#define WRITE_CHANNEL_REVERT 0x00000800 /* bit 11 for write channel select */
+
#define EVENT_CYCLES_ID 0
#define EVENT_CYCLES_COUNTER 0
#define NUM_COUNTERS 4
@@ -50,6 +59,7 @@ static DEFINE_IDA(ddr_ida);
/* DDR Perf hardware feature */
#define DDR_CAP_AXI_ID_FILTER 0x1 /* support AXI ID filter */
#define DDR_CAP_AXI_ID_FILTER_ENHANCED 0x3 /* support enhanced AXI ID filter */
+#define DDR_CAP_AXI_ID_PORT_CHANNEL_FILTER 0x4 /* support AXI ID PORT CHANNEL filter */
struct fsl_ddr_devtype_data {
unsigned int quirks; /* quirks needed for different DDR Perf core */
@@ -82,6 +92,11 @@ static const struct fsl_ddr_devtype_data imx8mp_devtype_data = {
.identifier = "i.MX8MP",
};
+static const struct fsl_ddr_devtype_data imx8dxl_devtype_data = {
+ .quirks = DDR_CAP_AXI_ID_PORT_CHANNEL_FILTER,
+ .identifier = "i.MX8DXL",
+};
+
static const struct of_device_id imx_ddr_pmu_dt_ids[] = {
{ .compatible = "fsl,imx8-ddr-pmu", .data = &imx8_devtype_data},
{ .compatible = "fsl,imx8m-ddr-pmu", .data = &imx8m_devtype_data},
@@ -89,6 +104,7 @@ static const struct of_device_id imx_ddr_pmu_dt_ids[] = {
{ .compatible = "fsl,imx8mm-ddr-pmu", .data = &imx8mm_devtype_data},
{ .compatible = "fsl,imx8mn-ddr-pmu", .data = &imx8mn_devtype_data},
{ .compatible = "fsl,imx8mp-ddr-pmu", .data = &imx8mp_devtype_data},
+ { .compatible = "fsl,imx8dxl-ddr-pmu", .data = &imx8dxl_devtype_data},
{ /* sentinel */ }
};
MODULE_DEVICE_TABLE(of, imx_ddr_pmu_dt_ids);
@@ -144,6 +160,7 @@ static const struct attribute_group ddr_perf_identifier_attr_group = {
enum ddr_perf_filter_capabilities {
PERF_CAP_AXI_ID_FILTER = 0,
PERF_CAP_AXI_ID_FILTER_ENHANCED,
+ PERF_CAP_AXI_ID_PORT_CHANNEL_FILTER,
PERF_CAP_AXI_ID_FEAT_MAX,
};
@@ -157,6 +174,8 @@ static u32 ddr_perf_filter_cap_get(struct ddr_pmu *pmu, int cap)
case PERF_CAP_AXI_ID_FILTER_ENHANCED:
quirks &= DDR_CAP_AXI_ID_FILTER_ENHANCED;
return quirks == DDR_CAP_AXI_ID_FILTER_ENHANCED;
+ case PERF_CAP_AXI_ID_PORT_CHANNEL_FILTER:
+ return !!(quirks & DDR_CAP_AXI_ID_PORT_CHANNEL_FILTER);
default:
WARN(1, "unknown filter cap %d\n", cap);
}
@@ -187,6 +206,7 @@ static ssize_t ddr_perf_filter_cap_show(struct device *dev,
static struct attribute *ddr_perf_filter_cap_attr[] = {
PERF_FILTER_EXT_ATTR_ENTRY(filter, PERF_CAP_AXI_ID_FILTER),
PERF_FILTER_EXT_ATTR_ENTRY(enhanced_filter, PERF_CAP_AXI_ID_FILTER_ENHANCED),
+ PERF_FILTER_EXT_ATTR_ENTRY(super_filter, PERF_CAP_AXI_ID_PORT_CHANNEL_FILTER),
NULL,
};
@@ -272,11 +292,15 @@ static const struct attribute_group ddr_perf_events_attr_group = {
PMU_FORMAT_ATTR(event, "config:0-7");
PMU_FORMAT_ATTR(axi_id, "config1:0-15");
PMU_FORMAT_ATTR(axi_mask, "config1:16-31");
+PMU_FORMAT_ATTR(axi_port, "config2:0-2");
+PMU_FORMAT_ATTR(axi_channel, "config2:3-3");
static struct attribute *ddr_perf_format_attrs[] = {
&format_attr_event.attr,
&format_attr_axi_id.attr,
&format_attr_axi_mask.attr,
+ &format_attr_axi_port.attr,
+ &format_attr_axi_channel.attr,
NULL,
};
@@ -530,6 +554,7 @@ static int ddr_perf_event_add(struct perf_event *event, int flags)
int counter;
int cfg = event->attr.config;
int cfg1 = event->attr.config1;
+ int cfg2 = event->attr.config2;
if (pmu->devtype_data->quirks & DDR_CAP_AXI_ID_FILTER) {
int i;
@@ -553,6 +578,26 @@ static int ddr_perf_event_add(struct perf_event *event, int flags)
return -EOPNOTSUPP;
}
+ if (pmu->devtype_data->quirks & DDR_CAP_AXI_ID_PORT_CHANNEL_FILTER) {
+ if (ddr_perf_is_filtered(event)) {
+ /* revert axi id masking(axi_mask) value */
+ cfg1 ^= AXI_MASKING_REVERT;
+ writel(cfg1, pmu->base + COUNTER_MASK_COMP + ((counter - 1) << 4));
+
+ if (cfg == 0x41) {
+ /* revert axi read channel(axi_channel) value */
+ cfg2 ^= READ_CHANNEL_REVERT;
+ cfg2 |= FIELD_PREP(READ_PORT_MASK, cfg2);
+ } else {
+ /* revert axi write channel(axi_channel) value */
+ cfg2 ^= WRITE_CHANNEL_REVERT;
+ cfg2 |= FIELD_PREP(WRITE_PORT_MASK, cfg2);
+ }
+
+ writel(cfg2, pmu->base + COUNTER_MUX_CNTL + ((counter - 1) << 4));
+ }
+ }
+
pmu->events[counter] = event;
hwc->idx = counter;
diff --git a/drivers/perf/fsl_imx9_ddr_perf.c b/drivers/perf/fsl_imx9_ddr_perf.c
index 5cf770a1bc3124..9685645bfe04d9 100644
--- a/drivers/perf/fsl_imx9_ddr_perf.c
+++ b/drivers/perf/fsl_imx9_ddr_perf.c
@@ -617,7 +617,7 @@ static int ddr_perf_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, pmu);
- pmu->id = ida_simple_get(&ddr_ida, 0, 0, GFP_KERNEL);
+ pmu->id = ida_alloc(&ddr_ida, GFP_KERNEL);
name = devm_kasprintf(&pdev->dev, GFP_KERNEL, DDR_PERF_DEV_NAME "%d", pmu->id);
if (!name) {
ret = -ENOMEM;
@@ -674,7 +674,7 @@ cpuhp_instance_err:
cpuhp_remove_multi_state(pmu->cpuhp_state);
cpuhp_state_err:
format_string_err:
- ida_simple_remove(&ddr_ida, pmu->id);
+ ida_free(&ddr_ida, pmu->id);
dev_warn(&pdev->dev, "i.MX9 DDR Perf PMU failed (%d), disabled\n", ret);
return ret;
}
@@ -688,7 +688,7 @@ static int ddr_perf_remove(struct platform_device *pdev)
perf_pmu_unregister(&pmu->pmu);
- ida_simple_remove(&ddr_ida, pmu->id);
+ ida_free(&ddr_ida, pmu->id);
return 0;
}
diff --git a/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c
index 63da05e5831c1f..636fb79647c8cc 100644
--- a/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c
@@ -383,8 +383,8 @@ static struct attribute *hisi_uc_pmu_events_attr[] = {
HISI_PMU_EVENT_ATTR(cpu_rd, 0x10),
HISI_PMU_EVENT_ATTR(cpu_rd64, 0x17),
HISI_PMU_EVENT_ATTR(cpu_rs64, 0x19),
- HISI_PMU_EVENT_ATTR(cpu_mru, 0x1a),
- HISI_PMU_EVENT_ATTR(cycles, 0x9c),
+ HISI_PMU_EVENT_ATTR(cpu_mru, 0x1c),
+ HISI_PMU_EVENT_ATTR(cycles, 0x95),
HISI_PMU_EVENT_ATTR(spipe_hit, 0xb3),
HISI_PMU_EVENT_ATTR(hpipe_hit, 0xdb),
HISI_PMU_EVENT_ATTR(cring_rxdat_cnt, 0xfa),
diff --git a/include/asm-generic/numa.h b/include/asm-generic/numa.h
index 1a3ad6d2983308..c32e0cf23c9096 100644
--- a/include/asm-generic/numa.h
+++ b/include/asm-generic/numa.h
@@ -35,6 +35,7 @@ int __init numa_add_memblk(int nodeid, u64 start, u64 end);
void __init numa_set_distance(int from, int to, int distance);
void __init numa_free_distance(void);
void __init early_map_cpu_to_node(unsigned int cpu, int nid);
+int __init early_cpu_to_node(int cpu);
void numa_store_cpu_info(unsigned int cpu);
void numa_add_cpu(unsigned int cpu);
void numa_remove_cpu(unsigned int cpu);
@@ -46,6 +47,7 @@ static inline void numa_add_cpu(unsigned int cpu) { }
static inline void numa_remove_cpu(unsigned int cpu) { }
static inline void arch_numa_init(void) { }
static inline void early_map_cpu_to_node(unsigned int cpu, int nid) { }
+static inline int early_cpu_to_node(int cpu) { return 0; }
#endif /* CONFIG_NUMA */
diff --git a/include/linux/pci.h b/include/linux/pci.h
index dea043bc1e383a..58a4c976c39bde 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1239,6 +1239,8 @@ int pci_read_config_dword(const struct pci_dev *dev, int where, u32 *val);
int pci_write_config_byte(const struct pci_dev *dev, int where, u8 val);
int pci_write_config_word(const struct pci_dev *dev, int where, u16 val);
int pci_write_config_dword(const struct pci_dev *dev, int where, u32 val);
+void pci_clear_and_set_config_dword(const struct pci_dev *dev, int pos,
+ u32 clear, u32 set);
int pcie_capability_read_word(struct pci_dev *dev, int pos, u16 *val);
int pcie_capability_read_dword(struct pci_dev *dev, int pos, u32 *val);
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 275799b5f535cf..844ffdac8d7d16 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2605,6 +2605,8 @@
#define PCI_VENDOR_ID_TEKRAM 0x1de1
#define PCI_DEVICE_ID_TEKRAM_DC290 0xdc29
+#define PCI_VENDOR_ID_ALIBABA 0x1ded
+
#define PCI_VENDOR_ID_TEHUTI 0x1fc9
#define PCI_DEVICE_ID_TEHUTI_3009 0x3009
#define PCI_DEVICE_ID_TEHUTI_3010 0x3010
diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h
index 143fbc10ecfe03..b3b34f6670cfb9 100644
--- a/include/linux/perf/arm_pmu.h
+++ b/include/linux/perf/arm_pmu.h
@@ -60,12 +60,6 @@ struct pmu_hw_events {
DECLARE_BITMAP(used_mask, ARMPMU_MAX_HWEVENTS);
/*
- * Hardware lock to serialize accesses to PMU registers. Needed for the
- * read/modify/write sequences.
- */
- raw_spinlock_t pmu_lock;
-
- /*
* When using percpu IRQs, we need a percpu dev_id. Place it here as we
* already have to allocate this struct per cpu.
*/
@@ -189,4 +183,26 @@ void armpmu_free_irq(int irq, int cpu);
#define ARMV8_SPE_PDEV_NAME "arm,spe-v1"
#define ARMV8_TRBE_PDEV_NAME "arm,trbe"
+/* Why does everything I do descend into this? */
+#define __GEN_PMU_FORMAT_ATTR(cfg, lo, hi) \
+ (lo) == (hi) ? #cfg ":" #lo "\n" : #cfg ":" #lo "-" #hi
+
+#define _GEN_PMU_FORMAT_ATTR(cfg, lo, hi) \
+ __GEN_PMU_FORMAT_ATTR(cfg, lo, hi)
+
+#define GEN_PMU_FORMAT_ATTR(name) \
+ PMU_FORMAT_ATTR(name, \
+ _GEN_PMU_FORMAT_ATTR(ATTR_CFG_FLD_##name##_CFG, \
+ ATTR_CFG_FLD_##name##_LO, \
+ ATTR_CFG_FLD_##name##_HI))
+
+#define _ATTR_CFG_GET_FLD(attr, cfg, lo, hi) \
+ ((((attr)->cfg) >> lo) & GENMASK_ULL(hi - lo, 0))
+
+#define ATTR_CFG_GET_FLD(attr, name) \
+ _ATTR_CFG_GET_FLD(attr, \
+ ATTR_CFG_FLD_##name##_CFG, \
+ ATTR_CFG_FLD_##name##_LO, \
+ ATTR_CFG_FLD_##name##_HI)
+
#endif /* __ARM_PMU_H__ */
diff --git a/include/linux/perf/arm_pmuv3.h b/include/linux/perf/arm_pmuv3.h
index 9c226adf938a2a..46377e134d67cb 100644
--- a/include/linux/perf/arm_pmuv3.h
+++ b/include/linux/perf/arm_pmuv3.h
@@ -215,21 +215,27 @@
#define ARMV8_PMU_PMCR_DP (1 << 5) /* Disable CCNT if non-invasive debug*/
#define ARMV8_PMU_PMCR_LC (1 << 6) /* Overflow on 64 bit cycle counter */
#define ARMV8_PMU_PMCR_LP (1 << 7) /* Long event counter enable */
-#define ARMV8_PMU_PMCR_N_SHIFT 11 /* Number of counters supported */
-#define ARMV8_PMU_PMCR_N_MASK 0x1f
-#define ARMV8_PMU_PMCR_MASK 0xff /* Mask for writable bits */
+#define ARMV8_PMU_PMCR_N GENMASK(15, 11) /* Number of counters supported */
+/* Mask for writable bits */
+#define ARMV8_PMU_PMCR_MASK (ARMV8_PMU_PMCR_E | ARMV8_PMU_PMCR_P | \
+ ARMV8_PMU_PMCR_C | ARMV8_PMU_PMCR_D | \
+ ARMV8_PMU_PMCR_X | ARMV8_PMU_PMCR_DP | \
+ ARMV8_PMU_PMCR_LC | ARMV8_PMU_PMCR_LP)
/*
* PMOVSR: counters overflow flag status reg
*/
-#define ARMV8_PMU_OVSR_MASK 0xffffffff /* Mask for writable bits */
-#define ARMV8_PMU_OVERFLOWED_MASK ARMV8_PMU_OVSR_MASK
+#define ARMV8_PMU_OVSR_P GENMASK(30, 0)
+#define ARMV8_PMU_OVSR_C BIT(31)
+/* Mask for writable bits is both P and C fields */
+#define ARMV8_PMU_OVERFLOWED_MASK (ARMV8_PMU_OVSR_P | ARMV8_PMU_OVSR_C)
/*
* PMXEVTYPER: Event selection reg
*/
-#define ARMV8_PMU_EVTYPE_MASK 0xc800ffff /* Mask for writable bits */
-#define ARMV8_PMU_EVTYPE_EVENT 0xffff /* Mask for EVENT bits */
+#define ARMV8_PMU_EVTYPE_EVENT GENMASK(15, 0) /* Mask for EVENT bits */
+#define ARMV8_PMU_EVTYPE_TH GENMASK_ULL(43, 32) /* arm64 only */
+#define ARMV8_PMU_EVTYPE_TC GENMASK_ULL(63, 61) /* arm64 only */
/*
* Event filters for PMUv3
@@ -244,19 +250,19 @@
/*
* PMUSERENR: user enable reg
*/
-#define ARMV8_PMU_USERENR_MASK 0xf /* Mask for writable bits */
#define ARMV8_PMU_USERENR_EN (1 << 0) /* PMU regs can be accessed at EL0 */
#define ARMV8_PMU_USERENR_SW (1 << 1) /* PMSWINC can be written at EL0 */
#define ARMV8_PMU_USERENR_CR (1 << 2) /* Cycle counter can be read at EL0 */
#define ARMV8_PMU_USERENR_ER (1 << 3) /* Event counter can be read at EL0 */
+/* Mask for writable bits */
+#define ARMV8_PMU_USERENR_MASK (ARMV8_PMU_USERENR_EN | ARMV8_PMU_USERENR_SW | \
+ ARMV8_PMU_USERENR_CR | ARMV8_PMU_USERENR_ER)
/* PMMIR_EL1.SLOTS mask */
-#define ARMV8_PMU_SLOTS_MASK 0xff
-
-#define ARMV8_PMU_BUS_SLOTS_SHIFT 8
-#define ARMV8_PMU_BUS_SLOTS_MASK 0xff
-#define ARMV8_PMU_BUS_WIDTH_SHIFT 16
-#define ARMV8_PMU_BUS_WIDTH_MASK 0xf
+#define ARMV8_PMU_SLOTS GENMASK(7, 0)
+#define ARMV8_PMU_BUS_SLOTS GENMASK(15, 8)
+#define ARMV8_PMU_BUS_WIDTH GENMASK(19, 16)
+#define ARMV8_PMU_THWIDTH GENMASK(23, 20)
/*
* This code is really good
diff --git a/tools/include/perf/arm_pmuv3.h b/tools/include/perf/arm_pmuv3.h
index e822d49fb5b88e..1e397d55384ed9 100644
--- a/tools/include/perf/arm_pmuv3.h
+++ b/tools/include/perf/arm_pmuv3.h
@@ -218,45 +218,54 @@
#define ARMV8_PMU_PMCR_DP (1 << 5) /* Disable CCNT if non-invasive debug*/
#define ARMV8_PMU_PMCR_LC (1 << 6) /* Overflow on 64 bit cycle counter */
#define ARMV8_PMU_PMCR_LP (1 << 7) /* Long event counter enable */
-#define ARMV8_PMU_PMCR_N_SHIFT 11 /* Number of counters supported */
-#define ARMV8_PMU_PMCR_N_MASK 0x1f
-#define ARMV8_PMU_PMCR_MASK 0xff /* Mask for writable bits */
+#define ARMV8_PMU_PMCR_N GENMASK(15, 11) /* Number of counters supported */
+/* Mask for writable bits */
+#define ARMV8_PMU_PMCR_MASK (ARMV8_PMU_PMCR_E | ARMV8_PMU_PMCR_P | \
+ ARMV8_PMU_PMCR_C | ARMV8_PMU_PMCR_D | \
+ ARMV8_PMU_PMCR_X | ARMV8_PMU_PMCR_DP | \
+ ARMV8_PMU_PMCR_LC | ARMV8_PMU_PMCR_LP)
/*
* PMOVSR: counters overflow flag status reg
*/
-#define ARMV8_PMU_OVSR_MASK 0xffffffff /* Mask for writable bits */
-#define ARMV8_PMU_OVERFLOWED_MASK ARMV8_PMU_OVSR_MASK
+#define ARMV8_PMU_OVSR_P GENMASK(30, 0)
+#define ARMV8_PMU_OVSR_C BIT(31)
+/* Mask for writable bits is both P and C fields */
+#define ARMV8_PMU_OVERFLOWED_MASK (ARMV8_PMU_OVSR_P | ARMV8_PMU_OVSR_C)
/*
* PMXEVTYPER: Event selection reg
*/
-#define ARMV8_PMU_EVTYPE_MASK 0xc800ffff /* Mask for writable bits */
-#define ARMV8_PMU_EVTYPE_EVENT 0xffff /* Mask for EVENT bits */
+#define ARMV8_PMU_EVTYPE_EVENT GENMASK(15, 0) /* Mask for EVENT bits */
+#define ARMV8_PMU_EVTYPE_TH GENMASK(43, 32)
+#define ARMV8_PMU_EVTYPE_TC GENMASK(63, 61)
/*
* Event filters for PMUv3
*/
-#define ARMV8_PMU_EXCLUDE_EL1 (1U << 31)
-#define ARMV8_PMU_EXCLUDE_EL0 (1U << 30)
-#define ARMV8_PMU_INCLUDE_EL2 (1U << 27)
+#define ARMV8_PMU_EXCLUDE_EL1 (1U << 31)
+#define ARMV8_PMU_EXCLUDE_EL0 (1U << 30)
+#define ARMV8_PMU_EXCLUDE_NS_EL1 (1U << 29)
+#define ARMV8_PMU_EXCLUDE_NS_EL0 (1U << 28)
+#define ARMV8_PMU_INCLUDE_EL2 (1U << 27)
+#define ARMV8_PMU_EXCLUDE_EL3 (1U << 26)
/*
* PMUSERENR: user enable reg
*/
-#define ARMV8_PMU_USERENR_MASK 0xf /* Mask for writable bits */
#define ARMV8_PMU_USERENR_EN (1 << 0) /* PMU regs can be accessed at EL0 */
#define ARMV8_PMU_USERENR_SW (1 << 1) /* PMSWINC can be written at EL0 */
#define ARMV8_PMU_USERENR_CR (1 << 2) /* Cycle counter can be read at EL0 */
#define ARMV8_PMU_USERENR_ER (1 << 3) /* Event counter can be read at EL0 */
+/* Mask for writable bits */
+#define ARMV8_PMU_USERENR_MASK (ARMV8_PMU_USERENR_EN | ARMV8_PMU_USERENR_SW | \
+ ARMV8_PMU_USERENR_CR | ARMV8_PMU_USERENR_ER)
/* PMMIR_EL1.SLOTS mask */
-#define ARMV8_PMU_SLOTS_MASK 0xff
-
-#define ARMV8_PMU_BUS_SLOTS_SHIFT 8
-#define ARMV8_PMU_BUS_SLOTS_MASK 0xff
-#define ARMV8_PMU_BUS_WIDTH_SHIFT 16
-#define ARMV8_PMU_BUS_WIDTH_MASK 0xf
+#define ARMV8_PMU_SLOTS GENMASK(7, 0)
+#define ARMV8_PMU_BUS_SLOTS GENMASK(15, 8)
+#define ARMV8_PMU_BUS_WIDTH GENMASK(19, 16)
+#define ARMV8_PMU_THWIDTH GENMASK(23, 20)
/*
* This code is really good
diff --git a/tools/testing/selftests/arm64/abi/tpidr2.c b/tools/testing/selftests/arm64/abi/tpidr2.c
index 351a098b503ad2..02ee3a91b780aa 100644
--- a/tools/testing/selftests/arm64/abi/tpidr2.c
+++ b/tools/testing/selftests/arm64/abi/tpidr2.c
@@ -254,6 +254,12 @@ static int write_clone_read(void)
putnum(++tests_run); \
putstr(" " #name "\n");
+#define skip_test(name) \
+ tests_skipped++; \
+ putstr("ok "); \
+ putnum(++tests_run); \
+ putstr(" # SKIP " #name "\n");
+
int main(int argc, char **argv)
{
int ret, i;
@@ -283,13 +289,11 @@ int main(int argc, char **argv)
} else {
putstr("# SME support not present\n");
- for (i = 0; i < EXPECTED_TESTS; i++) {
- putstr("ok ");
- putnum(i);
- putstr(" skipped, TPIDR2 not supported\n");
- }
-
- tests_skipped += EXPECTED_TESTS;
+ skip_test(default_value);
+ skip_test(write_read);
+ skip_test(write_sleep_read);
+ skip_test(write_fork_read);
+ skip_test(write_clone_read);
}
print_summary();
diff --git a/tools/testing/selftests/arm64/fp/sve-test.S b/tools/testing/selftests/arm64/fp/sve-test.S
index 547d077e351727..fff60e2a25addf 100644
--- a/tools/testing/selftests/arm64/fp/sve-test.S
+++ b/tools/testing/selftests/arm64/fp/sve-test.S
@@ -515,6 +515,10 @@ function barf
mov x11, x1 // actual data
mov x12, x2 // data size
+#ifdef SSVE
+ mrs x13, S3_3_C4_C2_2
+#endif
+
puts "Mismatch: PID="
mov x0, x20
bl putdec
@@ -534,6 +538,12 @@ function barf
bl dumphex
puts "]\n"
+#ifdef SSVE
+ puts "\tSVCR: "
+ mov x0, x13
+ bl putdecn
+#endif
+
mov x8, #__NR_getpid
svc #0
// fpsimd.c acitivty log dump hack
diff --git a/tools/testing/selftests/arm64/fp/vec-syscfg.c b/tools/testing/selftests/arm64/fp/vec-syscfg.c
index 5f648b97a06fea..ea9c7d47790f9d 100644
--- a/tools/testing/selftests/arm64/fp/vec-syscfg.c
+++ b/tools/testing/selftests/arm64/fp/vec-syscfg.c
@@ -66,6 +66,11 @@ static struct vec_data vec_data[] = {
},
};
+static bool vec_type_supported(struct vec_data *data)
+{
+ return getauxval(data->hwcap_type) & data->hwcap;
+}
+
static int stdio_read_integer(FILE *f, const char *what, int *val)
{
int n = 0;
@@ -564,8 +569,11 @@ static void prctl_set_all_vqs(struct vec_data *data)
return;
}
- for (i = 0; i < ARRAY_SIZE(vec_data); i++)
+ for (i = 0; i < ARRAY_SIZE(vec_data); i++) {
+ if (!vec_type_supported(&vec_data[i]))
+ continue;
orig_vls[i] = vec_data[i].rdvl();
+ }
for (vq = SVE_VQ_MIN; vq <= SVE_VQ_MAX; vq++) {
vl = sve_vl_from_vq(vq);
@@ -594,7 +602,7 @@ static void prctl_set_all_vqs(struct vec_data *data)
if (&vec_data[i] == data)
continue;
- if (!(getauxval(vec_data[i].hwcap_type) & vec_data[i].hwcap))
+ if (!vec_type_supported(&vec_data[i]))
continue;
if (vec_data[i].rdvl() != orig_vls[i]) {
@@ -765,7 +773,7 @@ int main(void)
struct vec_data *data = &vec_data[i];
unsigned long supported;
- supported = getauxval(data->hwcap_type) & data->hwcap;
+ supported = vec_type_supported(data);
if (!supported)
all_supported = false;
diff --git a/tools/testing/selftests/arm64/fp/za-test.S b/tools/testing/selftests/arm64/fp/za-test.S
index 9dcd7091139754..095b4553164096 100644
--- a/tools/testing/selftests/arm64/fp/za-test.S
+++ b/tools/testing/selftests/arm64/fp/za-test.S
@@ -333,6 +333,9 @@ function barf
// mov w8, #__NR_exit
// svc #0
// end hack
+
+ mrs x13, S3_3_C4_C2_2
+
smstop
mov x10, x0 // expected data
mov x11, x1 // actual data
@@ -356,6 +359,9 @@ function barf
mov x1, x12
bl dumphex
puts "]\n"
+ puts "\tSVCR: "
+ mov x0, x13
+ bl putdecn
mov x8, #__NR_getpid
svc #0
diff --git a/tools/testing/selftests/arm64/fp/zt-test.S b/tools/testing/selftests/arm64/fp/zt-test.S
index d632863976380b..b5c81e81a37946 100644
--- a/tools/testing/selftests/arm64/fp/zt-test.S
+++ b/tools/testing/selftests/arm64/fp/zt-test.S
@@ -267,6 +267,8 @@ function barf
// mov w8, #__NR_exit
// svc #0
// end hack
+
+ mrs x13, S3_3_C4_C2_2
smstop
mov x10, x0 // expected data
mov x11, x1 // actual data
@@ -287,6 +289,9 @@ function barf
mov x1, x12
bl dumphex
puts "]\n"
+ puts "\tSVCR: "
+ mov x0, x13
+ bl putdecn
mov x8, #__NR_getpid
svc #0
diff --git a/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c b/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c
index 5ea78986e665f5..9d51b56913496e 100644
--- a/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c
+++ b/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c
@@ -42,13 +42,12 @@ struct pmreg_sets {
static uint64_t get_pmcr_n(uint64_t pmcr)
{
- return (pmcr >> ARMV8_PMU_PMCR_N_SHIFT) & ARMV8_PMU_PMCR_N_MASK;
+ return FIELD_GET(ARMV8_PMU_PMCR_N, pmcr);
}
static void set_pmcr_n(uint64_t *pmcr, uint64_t pmcr_n)
{
- *pmcr = *pmcr & ~(ARMV8_PMU_PMCR_N_MASK << ARMV8_PMU_PMCR_N_SHIFT);
- *pmcr |= (pmcr_n << ARMV8_PMU_PMCR_N_SHIFT);
+ u64p_replace_bits((__u64 *) pmcr, pmcr_n, ARMV8_PMU_PMCR_N);
}
static uint64_t get_counters_mask(uint64_t n)