From 724a75ac9542fe1f8aaa587da4d3863d8ea292fc Mon Sep 17 00:00:00 2001 From: Jamie Cunliffe Date: Fri, 20 Oct 2023 16:50:56 +0100 Subject: arm64: rust: Enable Rust support for AArch64 This commit provides the build flags for Rust for AArch64. The core Rust support already in the kernel does the rest. This enables the PAC ret and BTI options in the Rust build flags to match the options that are used when building C. The Rust samples have been tested with this commit. Signed-off-by: Jamie Cunliffe Acked-by: Will Deacon Tested-by: Dirk Behme Tested-by: Boqun Feng Acked-by: Miguel Ojeda Acked-by: Catalin Marinas Tested-by: Alice Ryhl Tested-by: Fabien Parent Link: https://lore.kernel.org/r/20231020155056.3495121-3-Jamie.Cunliffe@arm.com Signed-off-by: Catalin Marinas --- Documentation/rust/arch-support.rst | 1 + 1 file changed, 1 insertion(+) (limited to 'Documentation') diff --git a/Documentation/rust/arch-support.rst b/Documentation/rust/arch-support.rst index 73203ba1e9011..5c4fa9f5d1cdb 100644 --- a/Documentation/rust/arch-support.rst +++ b/Documentation/rust/arch-support.rst @@ -15,6 +15,7 @@ support corresponds to ``S`` values in the ``MAINTAINERS`` file. ============= ================ ============================================== Architecture Level of support Constraints ============= ================ ============================================== +``arm64`` Maintained Little Endian only. ``loongarch`` Maintained - ``um`` Maintained ``x86_64`` only. ``x86`` Maintained ``x86_64`` only. -- cgit 1.2.3-korg From 7255cfb19941b4681e545be47b9f13b61b1b4cb6 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 6 Feb 2024 10:27:57 +0000 Subject: dt-bindings/perf: Add Arm CoreSight PMU Add a binding for implementations of the Arm CoreSight Performance Monitoring Unit Architecture. Not to be confused with CoreSight debug and trace, the PMU architecture defines a standard MMIO interface for event counters following a similar design to the CPU PMU architecture, where the implementation and most of its features are discoverable through ID registers. Reviewed-by: Rob Herring Signed-off-by: Robin Murphy Reviewed-by: Suzuki K Poulose Link: https://lore.kernel.org/r/c62a86ef177bec5c6d12176c605de900e9e40c87.1706718007.git.robin.murphy@arm.com Signed-off-by: Will Deacon --- .../bindings/perf/arm,coresight-pmu.yaml | 39 ++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 Documentation/devicetree/bindings/perf/arm,coresight-pmu.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/perf/arm,coresight-pmu.yaml b/Documentation/devicetree/bindings/perf/arm,coresight-pmu.yaml new file mode 100644 index 0000000000000..985b62990f802 --- /dev/null +++ b/Documentation/devicetree/bindings/perf/arm,coresight-pmu.yaml @@ -0,0 +1,39 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/perf/arm,coresight-pmu.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Arm Coresight Performance Monitoring Unit Architecture + +maintainers: + - Robin Murphy + +properties: + compatible: + const: arm,coresight-pmu + + reg: + items: + - description: Register page 0 + - description: Register page 1, if the PMU implements the dual-page extension + minItems: 1 + + interrupts: + items: + - description: Overflow interrupt + + cpus: + description: If the PMU is associated with a particular CPU or subset of CPUs, + array of phandles to the appropriate CPU node(s) + + reg-io-width: + description: Granularity at which PMU register accesses are single-copy atomic + default: 4 + enum: [4, 8] + +required: + - compatible + - reg + +additionalProperties: false -- cgit 1.2.3-korg From 8c10cc104b73abdfd87a23ae50b1c90c2c917027 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 9 Feb 2024 18:39:16 +0000 Subject: arm64: errata: Don't enable workarounds for "rare" errata by default Arm classifies some of its CPU errata as "rare", indicating that the hardware error is unlikely to occur in practice. Given that the cost of errata workarounds can often be significant in terms of power and performance, don't enable workarounds for "rare" errata by default and update our documentation to reflect that. Cc: James Morse Signed-off-by: Will Deacon Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20240209183916.25860-1-will@kernel.org Signed-off-by: Catalin Marinas --- Documentation/arch/arm64/silicon-errata.rst | 5 +++-- arch/arm64/Kconfig | 25 +++++++++++-------------- 2 files changed, 14 insertions(+), 16 deletions(-) (limited to 'Documentation') diff --git a/Documentation/arch/arm64/silicon-errata.rst b/Documentation/arch/arm64/silicon-errata.rst index e8c2ce1f9df68..fa16b895c997c 100644 --- a/Documentation/arch/arm64/silicon-errata.rst +++ b/Documentation/arch/arm64/silicon-errata.rst @@ -35,8 +35,9 @@ can be triggered by Linux). For software workarounds that may adversely impact systems unaffected by the erratum in question, a Kconfig entry is added under "Kernel Features" -> "ARM errata workarounds via the alternatives framework". -These are enabled by default and patched in at runtime when an affected -CPU is detected. For less-intrusive workarounds, a Kconfig option is not +With the exception of workarounds for errata deemed "rare" by Arm, these +are enabled by default and patched in at runtime when an affected CPU is +detected. For less-intrusive workarounds, a Kconfig option is not available and the code is structured (preferably with a comment) in such a way that the erratum will not be hit. diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index aa7c1d4351396..430fabf20f173 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -547,9 +547,8 @@ config ARM64_ERRATUM_832075 If unsure, say Y. config ARM64_ERRATUM_834220 - bool "Cortex-A57: 834220: Stage 2 translation fault might be incorrectly reported in presence of a Stage 1 fault" + bool "Cortex-A57: 834220: Stage 2 translation fault might be incorrectly reported in presence of a Stage 1 fault (rare)" depends on KVM - default y help This option adds an alternative code sequence to work around ARM erratum 834220 on Cortex-A57 parts up to r1p2. @@ -565,7 +564,7 @@ config ARM64_ERRATUM_834220 as it depends on the alternative framework, which will only patch the kernel if an affected CPU is detected. - If unsure, say Y. + If unsure, say N. config ARM64_ERRATUM_1742098 bool "Cortex-A57/A72: 1742098: ELR recorded incorrectly on interrupt taken between cryptographic instructions in a sequence" @@ -692,8 +691,7 @@ config ARM64_WORKAROUND_REPEAT_TLBI bool config ARM64_ERRATUM_2441007 - bool "Cortex-A55: Completion of affected memory accesses might not be guaranteed by completion of a TLBI" - default y + bool "Cortex-A55: Completion of affected memory accesses might not be guaranteed by completion of a TLBI (rare)" select ARM64_WORKAROUND_REPEAT_TLBI help This option adds a workaround for ARM Cortex-A55 erratum #2441007. @@ -706,11 +704,10 @@ config ARM64_ERRATUM_2441007 Work around this by adding the affected CPUs to the list that needs TLB sequences to be done twice. - If unsure, say Y. + If unsure, say N. config ARM64_ERRATUM_1286807 - bool "Cortex-A76: Modification of the translation table for a virtual address might lead to read-after-read ordering violation" - default y + bool "Cortex-A76: Modification of the translation table for a virtual address might lead to read-after-read ordering violation (rare)" select ARM64_WORKAROUND_REPEAT_TLBI help This option adds a workaround for ARM Cortex-A76 erratum 1286807. @@ -724,6 +721,8 @@ config ARM64_ERRATUM_1286807 invalidated has been observed by other observers. The workaround repeats the TLBI+DSB operation. + If unsure, say N. + config ARM64_ERRATUM_1463225 bool "Cortex-A76: Software Step might prevent interrupt recognition" default y @@ -743,8 +742,7 @@ config ARM64_ERRATUM_1463225 If unsure, say Y. config ARM64_ERRATUM_1542419 - bool "Neoverse-N1: workaround mis-ordering of instruction fetches" - default y + bool "Neoverse-N1: workaround mis-ordering of instruction fetches (rare)" help This option adds a workaround for ARM Neoverse-N1 erratum 1542419. @@ -756,7 +754,7 @@ config ARM64_ERRATUM_1542419 Workaround the issue by hiding the DIC feature from EL0. This forces user-space to perform cache maintenance. - If unsure, say Y. + If unsure, say N. config ARM64_ERRATUM_1508412 bool "Cortex-A77: 1508412: workaround deadlock on sequence of NC/Device load and store exclusive or PAR read" @@ -931,8 +929,7 @@ config ARM64_ERRATUM_2224489 If unsure, say Y. config ARM64_ERRATUM_2441009 - bool "Cortex-A510: Completion of affected memory accesses might not be guaranteed by completion of a TLBI" - default y + bool "Cortex-A510: Completion of affected memory accesses might not be guaranteed by completion of a TLBI (rare)" select ARM64_WORKAROUND_REPEAT_TLBI help This option adds a workaround for ARM Cortex-A510 erratum #2441009. @@ -945,7 +942,7 @@ config ARM64_ERRATUM_2441009 Work around this by adding the affected CPUs to the list that needs TLB sequences to be done twice. - If unsure, say Y. + If unsure, say N. config ARM64_ERRATUM_2064142 bool "Cortex-A510: 2064142: workaround TRBE register writes while disabled" -- cgit 1.2.3-korg From b4725d3e460349aa55836a2a128009c578215b90 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 24 Jan 2024 18:12:35 +0000 Subject: arm64/sve: Remove bitrotted comment about syscall behaviour When we documented that we always clear state not shared with FPSIMD we didn't catch all of the places that mentioned that state might not be cleared, remove a lingering reference. Reported-by: Edmund Grimley-Evans Reviewed-by: Dave Martin Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20240124-arm64-sve-sme-doc-v2-1-fe3964fb3c19@kernel.org Signed-off-by: Catalin Marinas --- Documentation/arch/arm64/sve.rst | 5 ----- 1 file changed, 5 deletions(-) (limited to 'Documentation') diff --git a/Documentation/arch/arm64/sve.rst b/Documentation/arch/arm64/sve.rst index 0d9a426e9f858..b45a2da19bf1d 100644 --- a/Documentation/arch/arm64/sve.rst +++ b/Documentation/arch/arm64/sve.rst @@ -117,11 +117,6 @@ the SVE instruction set architecture. * The SVE registers are not used to pass arguments to or receive results from any syscall. -* In practice the affected registers/bits will be preserved or will be replaced - with zeros on return from a syscall, but userspace should not make - assumptions about this. The kernel behaviour may vary on a case-by-case - basis. - * All other SVE state of a thread, including the currently configured vector length, the state of the PR_SVE_VL_INHERIT flag, and the deferred vector length (if any), is preserved across all syscalls, subject to the specific -- cgit 1.2.3-korg From ae35792764bcba1dc13d2de62480eae8b97c2c41 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 24 Jan 2024 18:12:36 +0000 Subject: arm64/sme: Fix cut'n'paste in ABI document The ABI for SME is very like that for SVE so bits of the ABI were copied but not adequately search and replaced, fix that. Reported-by: Edmund Grimley-Evans Signed-off-by: Mark Brown Reviewed-by: Dave Martin Link: https://lore.kernel.org/r/20240124-arm64-sve-sme-doc-v2-2-fe3964fb3c19@kernel.org Signed-off-by: Catalin Marinas --- Documentation/arch/arm64/sme.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'Documentation') diff --git a/Documentation/arch/arm64/sme.rst b/Documentation/arch/arm64/sme.rst index 3d0e53ecac4fe..3133d0e91b484 100644 --- a/Documentation/arch/arm64/sme.rst +++ b/Documentation/arch/arm64/sme.rst @@ -238,12 +238,12 @@ prctl(PR_SME_SET_VL, unsigned long arg) bits of Z0..Z31 except for Z0 bits [127:0] .. Z31 bits [127:0] to become unspecified, including both streaming and non-streaming SVE state. Calling PR_SME_SET_VL with vl equal to the thread's current vector - length, or calling PR_SME_SET_VL with the PR_SVE_SET_VL_ONEXEC flag, + length, or calling PR_SME_SET_VL with the PR_SME_SET_VL_ONEXEC flag, does not constitute a change to the vector length for this purpose. * Changing the vector length causes PSTATE.ZA and PSTATE.SM to be cleared. Calling PR_SME_SET_VL with vl equal to the thread's current vector - length, or calling PR_SME_SET_VL with the PR_SVE_SET_VL_ONEXEC flag, + length, or calling PR_SME_SET_VL with the PR_SME_SET_VL_ONEXEC flag, does not constitute a change to the vector length for this purpose. -- cgit 1.2.3-korg From 3fd97cf3234c5ffbd420319d1d510e4940183843 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 24 Jan 2024 18:12:37 +0000 Subject: arm64/fp: Clarify effect of setting an unsupported system VL The documentation for system vector length configuration does not cover all cases where unsupported values are written, tighten it up. Reported-by: Edmund Grimley-Evans Signed-off-by: Mark Brown Reviewed-by: Dave Martin Link: https://lore.kernel.org/r/20240124-arm64-sve-sme-doc-v2-3-fe3964fb3c19@kernel.org Signed-off-by: Catalin Marinas --- Documentation/arch/arm64/sme.rst | 5 ++--- Documentation/arch/arm64/sve.rst | 5 ++--- 2 files changed, 4 insertions(+), 6 deletions(-) (limited to 'Documentation') diff --git a/Documentation/arch/arm64/sme.rst b/Documentation/arch/arm64/sme.rst index 3133d0e91b484..f4376c06f4472 100644 --- a/Documentation/arch/arm64/sme.rst +++ b/Documentation/arch/arm64/sme.rst @@ -379,9 +379,8 @@ The regset data starts with struct user_za_header, containing: /proc/sys/abi/sme_default_vector_length Writing the text representation of an integer to this file sets the system - default vector length to the specified value, unless the value is greater - than the maximum vector length supported by the system in which case the - default vector length is set to that maximum. + default vector length to the specified value rounded to a supported value + using the same rules as for setting vector length via PR_SME_SET_VL. The result can be determined by reopening the file and reading its contents. diff --git a/Documentation/arch/arm64/sve.rst b/Documentation/arch/arm64/sve.rst index b45a2da19bf1d..8d8837fc39ec7 100644 --- a/Documentation/arch/arm64/sve.rst +++ b/Documentation/arch/arm64/sve.rst @@ -423,9 +423,8 @@ The regset data starts with struct user_sve_header, containing: /proc/sys/abi/sve_default_vector_length Writing the text representation of an integer to this file sets the system - default vector length to the specified value, unless the value is greater - than the maximum vector length supported by the system in which case the - default vector length is set to that maximum. + default vector length to the specified value rounded to a supported value + using the same rules as for setting vector length via PR_SVE_SET_VL. The result can be determined by reopening the file and reading its contents. -- cgit 1.2.3-korg From e47c18c3b25e63807005d0cb40f7743cb7512388 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 24 Jan 2024 18:12:38 +0000 Subject: arm64/sme: Remove spurious 'is' in SME documentation Just a typographical error. Reported-by: Edmund Grimley-Evans Reviewed-by: Dave Martin Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20240124-arm64-sve-sme-doc-v2-4-fe3964fb3c19@kernel.org Signed-off-by: Catalin Marinas --- Documentation/arch/arm64/sme.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/arch/arm64/sme.rst b/Documentation/arch/arm64/sme.rst index f4376c06f4472..be317d4574174 100644 --- a/Documentation/arch/arm64/sme.rst +++ b/Documentation/arch/arm64/sme.rst @@ -75,7 +75,7 @@ model features for SME is included in Appendix A. 2. Vector lengths ------------------ -SME defines a second vector length similar to the SVE vector length which is +SME defines a second vector length similar to the SVE vector length which controls the size of the streaming mode SVE vectors and the ZA matrix array. The ZA matrix is square with each side having as many bytes as a streaming mode SVE vector. -- cgit 1.2.3-korg From 89a032923d4ba23907405744aa86265822f057f8 Mon Sep 17 00:00:00 2001 From: Junhao He Date: Fri, 23 Feb 2024 18:33:59 +0800 Subject: docs: perf: Update usage for target filter of hisi-pcie-pmu One of the "port" and "bdf" target filter interface must be set, and the related events should preferably used in the same group. Update the usage in the documentation. Signed-off-by: Junhao He Signed-off-by: Yicong Yang Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240223103359.18669-9-yangyicong@huawei.com Signed-off-by: Will Deacon --- Documentation/admin-guide/perf/hisi-pcie-pmu.rst | 31 ++++++++++++++++++------ 1 file changed, 23 insertions(+), 8 deletions(-) (limited to 'Documentation') diff --git a/Documentation/admin-guide/perf/hisi-pcie-pmu.rst b/Documentation/admin-guide/perf/hisi-pcie-pmu.rst index 7e863662e2d42..678d3865560c3 100644 --- a/Documentation/admin-guide/perf/hisi-pcie-pmu.rst +++ b/Documentation/admin-guide/perf/hisi-pcie-pmu.rst @@ -37,9 +37,20 @@ Example usage of perf:: hisi_pcie0_core0/rx_mwr_cnt/ [kernel PMU event] ------------------------------------------ - $# perf stat -e hisi_pcie0_core0/rx_mwr_latency/ - $# perf stat -e hisi_pcie0_core0/rx_mwr_cnt/ - $# perf stat -g -e hisi_pcie0_core0/rx_mwr_latency/ -e hisi_pcie0_core0/rx_mwr_cnt/ + $# perf stat -e hisi_pcie0_core0/rx_mwr_latency,port=0xffff/ + $# perf stat -e hisi_pcie0_core0/rx_mwr_cnt,port=0xffff/ + +The related events usually used to calculate the bandwidth, latency or others. +They need to start and end counting at the same time, therefore related events +are best used in the same event group to get the expected value. There are two +ways to know if they are related events: +a) By event name, such as the latency events "xxx_latency, xxx_cnt" or + bandwidth events "xxx_flux, xxx_time". +b) By event type, such as "event=0xXXXX, event=0x1XXXX". + +Example usage of perf group:: + + $# perf stat -e "{hisi_pcie0_core0/rx_mwr_latency,port=0xffff/,hisi_pcie0_core0/rx_mwr_cnt,port=0xffff/}" The current driver does not support sampling. So "perf record" is unsupported. Also attach to a task is unsupported for PCIe PMU. @@ -51,8 +62,12 @@ Filter options PMU could only monitor the performance of traffic downstream target Root Ports or downstream target Endpoint. PCIe PMU driver support "port" and - "bdf" interfaces for users, and these two interfaces aren't supported at the - same time. + "bdf" interfaces for users. + Please notice that, one of these two interfaces must be set, and these two + interfaces aren't supported at the same time. If they are both set, only + "port" filter is valid. + If "port" filter not being set or is set explicitly to zero (default), the + "bdf" filter will be in effect, because "bdf=0" meaning 0000:000:00.0. - port @@ -95,7 +110,7 @@ Filter options Example usage of perf:: - $# perf stat -e hisi_pcie0_core0/rx_mrd_flux,trig_len=0x4,trig_mode=1/ sleep 5 + $# perf stat -e hisi_pcie0_core0/rx_mrd_flux,port=0xffff,trig_len=0x4,trig_mode=1/ sleep 5 3. Threshold filter @@ -109,7 +124,7 @@ Filter options Example usage of perf:: - $# perf stat -e hisi_pcie0_core0/rx_mrd_flux,thr_len=0x4,thr_mode=1/ sleep 5 + $# perf stat -e hisi_pcie0_core0/rx_mrd_flux,port=0xffff,thr_len=0x4,thr_mode=1/ sleep 5 4. TLP Length filter @@ -127,4 +142,4 @@ Filter options Example usage of perf:: - $# perf stat -e hisi_pcie0_core0/rx_mrd_flux,len_mode=0x1/ sleep 5 + $# perf stat -e hisi_pcie0_core0/rx_mrd_flux,port=0xffff,len_mode=0x1/ sleep 5 -- cgit 1.2.3-korg From 66461b43b0c05da2e7c606b9eea7f1f3b565b9c3 Mon Sep 17 00:00:00 2001 From: Ji Sheng Teoh Date: Thu, 29 Feb 2024 15:27:18 +0800 Subject: dt-bindings: perf: starfive: Add JH8100 StarLink PMU Add device tree binding for StarFive's JH8100 StarLink PMU (Performance Monitor Unit). Signed-off-by: Ji Sheng Teoh Reviewed-by: Conor Dooley Link: https://lore.kernel.org/r/20240229072720.3987876-3-jisheng.teoh@starfivetech.com Signed-off-by: Will Deacon --- .../perf/starfive,jh8100-starlink-pmu.yaml | 46 ++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 Documentation/devicetree/bindings/perf/starfive,jh8100-starlink-pmu.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/perf/starfive,jh8100-starlink-pmu.yaml b/Documentation/devicetree/bindings/perf/starfive,jh8100-starlink-pmu.yaml new file mode 100644 index 0000000000000..915c6b8140262 --- /dev/null +++ b/Documentation/devicetree/bindings/perf/starfive,jh8100-starlink-pmu.yaml @@ -0,0 +1,46 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/perf/starfive,jh8100-starlink-pmu.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: StarFive JH8100 StarLink PMU + +maintainers: + - Ji Sheng Teoh + +description: + StarFive's JH8100 StarLink PMU integrates one or more CPU cores with a + shared L3 memory system. The PMU support overflow interrupt, up to + 16 programmable 64bit event counters, and an independent 64bit cycle + counter. StarFive's JH8100 StarLink PMU is accessed via MMIO. + +properties: + compatible: + const: starfive,jh8100-starlink-pmu + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + +required: + - compatible + - reg + - interrupts + +additionalProperties: false + +examples: + - | + soc { + #address-cells = <2>; + #size-cells = <2>; + + pmu@12900000 { + compatible = "starfive,jh8100-starlink-pmu"; + reg = <0x0 0x12900000 0x0 0x10000>; + interrupts = <34>; + }; + }; -- cgit 1.2.3-korg From 49925c1c5a6c93a857b3dffcce3a7fb48ec72cbb Mon Sep 17 00:00:00 2001 From: Ji Sheng Teoh Date: Thu, 29 Feb 2024 15:27:19 +0800 Subject: docs: perf: Add description for StarFive's StarLink PMU StarFive StarLink PMU support monitoring L3 memory system PMU events. Add documentation to describe StarFive StarLink PMU support and it's usage. Signed-off-by: Ji Sheng Teoh Link: https://lore.kernel.org/r/20240229072720.3987876-4-jisheng.teoh@starfivetech.com Signed-off-by: Will Deacon --- Documentation/admin-guide/perf/index.rst | 1 + .../admin-guide/perf/starfive_starlink_pmu.rst | 46 ++++++++++++++++++++++ 2 files changed, 47 insertions(+) create mode 100644 Documentation/admin-guide/perf/starfive_starlink_pmu.rst (limited to 'Documentation') diff --git a/Documentation/admin-guide/perf/index.rst b/Documentation/admin-guide/perf/index.rst index f4a4513c526f0..7eb3dcd6f4da6 100644 --- a/Documentation/admin-guide/perf/index.rst +++ b/Documentation/admin-guide/perf/index.rst @@ -13,6 +13,7 @@ Performance monitor support imx-ddr qcom_l2_pmu qcom_l3_pmu + starfive_starlink_pmu arm-ccn arm-cmn xgene-pmu diff --git a/Documentation/admin-guide/perf/starfive_starlink_pmu.rst b/Documentation/admin-guide/perf/starfive_starlink_pmu.rst new file mode 100644 index 0000000000000..2932ddb4eb765 --- /dev/null +++ b/Documentation/admin-guide/perf/starfive_starlink_pmu.rst @@ -0,0 +1,46 @@ +================================================ +StarFive StarLink Performance Monitor Unit (PMU) +================================================ + +StarFive StarLink Performance Monitor Unit (PMU) exists within the +StarLink Coherent Network on Chip (CNoC) that connects multiple CPU +clusters with an L3 memory system. + +The uncore PMU supports overflow interrupt, up to 16 programmable 64bit +event counters, and an independent 64bit cycle counter. +The PMU can only be accessed via Memory Mapped I/O and are common to the +cores connected to the same PMU. + +Driver exposes supported PMU events in sysfs "events" directory under:: + + /sys/bus/event_source/devices/starfive_starlink_pmu/events/ + +Driver exposes cpu used to handle PMU events in sysfs "cpumask" directory +under:: + + /sys/bus/event_source/devices/starfive_starlink_pmu/cpumask/ + +Driver describes the format of config (event ID) in sysfs "format" directory +under:: + + /sys/bus/event_source/devices/starfive_starlink_pmu/format/ + +Example of perf usage:: + + $ perf list + + starfive_starlink_pmu/cycles/ [Kernel PMU event] + starfive_starlink_pmu/read_hit/ [Kernel PMU event] + starfive_starlink_pmu/read_miss/ [Kernel PMU event] + starfive_starlink_pmu/read_request/ [Kernel PMU event] + starfive_starlink_pmu/release_request/ [Kernel PMU event] + starfive_starlink_pmu/write_hit/ [Kernel PMU event] + starfive_starlink_pmu/write_miss/ [Kernel PMU event] + starfive_starlink_pmu/write_request/ [Kernel PMU event] + starfive_starlink_pmu/writeback/ [Kernel PMU event] + + + $ perf stat -a -e /starfive_starlink_pmu/cycles/ sleep 1 + +Sampling is not supported. As a result, "perf record" is not supported. +Attaching to a task is not supported, only system-wide counting is supported. -- cgit 1.2.3-korg From b037e40a6af2b056f7f15d9aabe7e9a9a7149ff3 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Tue, 5 Mar 2024 20:25:17 +0800 Subject: docs: perf: Fix build warning of hisi-pcie-pmu.rst `make htmldocs SPHINXDIRS="admin-guide"` shows below warnings: Documentation/admin-guide/perf/hisi-pcie-pmu.rst:48: ERROR: Unexpected indentation. Documentation/admin-guide/perf/hisi-pcie-pmu.rst:49: WARNING: Block quote ends without a blank line; unexpected unindent. Fix this. Closes: https://lore.kernel.org/lkml/20231011172250.5a6498e5@canb.auug.org.au/ Fixes: 89a032923d4b ("docs: perf: Update usage for target filter of hisi-pcie-pmu") Signed-off-by: Yicong Yang Link: https://lore.kernel.org/r/20240305122517.12179-1-yangyicong@huawei.com Signed-off-by: Will Deacon --- Documentation/admin-guide/perf/hisi-pcie-pmu.rst | 1 + 1 file changed, 1 insertion(+) (limited to 'Documentation') diff --git a/Documentation/admin-guide/perf/hisi-pcie-pmu.rst b/Documentation/admin-guide/perf/hisi-pcie-pmu.rst index 678d3865560c3..5541ff40e06a8 100644 --- a/Documentation/admin-guide/perf/hisi-pcie-pmu.rst +++ b/Documentation/admin-guide/perf/hisi-pcie-pmu.rst @@ -44,6 +44,7 @@ The related events usually used to calculate the bandwidth, latency or others. They need to start and end counting at the same time, therefore related events are best used in the same event group to get the expected value. There are two ways to know if they are related events: + a) By event name, such as the latency events "xxx_latency, xxx_cnt" or bandwidth events "xxx_flux, xxx_time". b) By event type, such as "event=0xXXXX, event=0x1XXXX". -- cgit 1.2.3-korg From c1932cac7902a8b0f7355515917dedc5412eb15d Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 6 Mar 2024 23:14:51 +0000 Subject: arm64/hwcap: Define hwcaps for 2023 DPISA features The 2023 architecture extensions include a large number of floating point features, most of which simply add new instructions. Add hwcaps so that userspace can enumerate these features. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20240306-arm64-2023-dpisa-v5-6-c568edc8ed7f@kernel.org Signed-off-by: Catalin Marinas --- Documentation/arch/arm64/elf_hwcaps.rst | 49 +++++++++++++++++++++++++++++++++ arch/arm64/include/asm/hwcap.h | 15 ++++++++++ arch/arm64/include/uapi/asm/hwcap.h | 15 ++++++++++ arch/arm64/kernel/cpufeature.c | 35 +++++++++++++++++++++++ arch/arm64/kernel/cpuinfo.c | 15 ++++++++++ 5 files changed, 129 insertions(+) (limited to 'Documentation') diff --git a/Documentation/arch/arm64/elf_hwcaps.rst b/Documentation/arch/arm64/elf_hwcaps.rst index ced7b335e2e00..448c1664879bc 100644 --- a/Documentation/arch/arm64/elf_hwcaps.rst +++ b/Documentation/arch/arm64/elf_hwcaps.rst @@ -317,6 +317,55 @@ HWCAP2_LRCPC3 HWCAP2_LSE128 Functionality implied by ID_AA64ISAR0_EL1.Atomic == 0b0011. +HWCAP2_FPMR + Functionality implied by ID_AA64PFR2_EL1.FMR == 0b0001. + +HWCAP2_LUT + Functionality implied by ID_AA64ISAR2_EL1.LUT == 0b0001. + +HWCAP2_FAMINMAX + Functionality implied by ID_AA64ISAR3_EL1.FAMINMAX == 0b0001. + +HWCAP2_F8CVT + Functionality implied by ID_AA64FPFR0_EL1.F8CVT == 0b1. + +HWCAP2_F8FMA + Functionality implied by ID_AA64FPFR0_EL1.F8FMA == 0b1. + +HWCAP2_F8DP4 + Functionality implied by ID_AA64FPFR0_EL1.F8DP4 == 0b1. + +HWCAP2_F8DP2 + Functionality implied by ID_AA64FPFR0_EL1.F8DP2 == 0b1. + +HWCAP2_F8E4M3 + Functionality implied by ID_AA64FPFR0_EL1.F8E4M3 == 0b1. + +HWCAP2_F8E5M2 + Functionality implied by ID_AA64FPFR0_EL1.F8E5M2 == 0b1. + +HWCAP2_SME_LUTV2 + Functionality implied by ID_AA64SMFR0_EL1.LUTv2 == 0b1. + +HWCAP2_SME_F8F16 + Functionality implied by ID_AA64SMFR0_EL1.F8F16 == 0b1. + +HWCAP2_SME_F8F32 + Functionality implied by ID_AA64SMFR0_EL1.F8F32 == 0b1. + +HWCAP2_SME_SF8FMA + Functionality implied by ID_AA64SMFR0_EL1.SF8FMA == 0b1. + +HWCAP2_SME_SF8DP4 + Functionality implied by ID_AA64SMFR0_EL1.SF8DP4 == 0b1. + +HWCAP2_SME_SF8DP2 + Functionality implied by ID_AA64SMFR0_EL1.SF8DP2 == 0b1. + +HWCAP2_SME_SF8DP4 + Functionality implied by ID_AA64SMFR0_EL1.SF8DP4 == 0b1. + + 4. Unused AT_HWCAP bits ----------------------- diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index cd71e09ea14d0..4edd3b61df112 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -142,6 +142,21 @@ #define KERNEL_HWCAP_SVE_B16B16 __khwcap2_feature(SVE_B16B16) #define KERNEL_HWCAP_LRCPC3 __khwcap2_feature(LRCPC3) #define KERNEL_HWCAP_LSE128 __khwcap2_feature(LSE128) +#define KERNEL_HWCAP_FPMR __khwcap2_feature(FPMR) +#define KERNEL_HWCAP_LUT __khwcap2_feature(LUT) +#define KERNEL_HWCAP_FAMINMAX __khwcap2_feature(FAMINMAX) +#define KERNEL_HWCAP_F8CVT __khwcap2_feature(F8CVT) +#define KERNEL_HWCAP_F8FMA __khwcap2_feature(F8FMA) +#define KERNEL_HWCAP_F8DP4 __khwcap2_feature(F8DP4) +#define KERNEL_HWCAP_F8DP2 __khwcap2_feature(F8DP2) +#define KERNEL_HWCAP_F8E4M3 __khwcap2_feature(F8E4M3) +#define KERNEL_HWCAP_F8E5M2 __khwcap2_feature(F8E5M2) +#define KERNEL_HWCAP_SME_LUTV2 __khwcap2_feature(SME_LUTV2) +#define KERNEL_HWCAP_SME_F8F16 __khwcap2_feature(SME_F8F16) +#define KERNEL_HWCAP_SME_F8F32 __khwcap2_feature(SME_F8F32) +#define KERNEL_HWCAP_SME_SF8FMA __khwcap2_feature(SME_SF8FMA) +#define KERNEL_HWCAP_SME_SF8DP4 __khwcap2_feature(SME_SF8DP4) +#define KERNEL_HWCAP_SME_SF8DP2 __khwcap2_feature(SME_SF8DP2) /* * This yields a mask that user programs can use to figure out what diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index 5023599fa278f..285610e626f5f 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -107,5 +107,20 @@ #define HWCAP2_SVE_B16B16 (1UL << 45) #define HWCAP2_LRCPC3 (1UL << 46) #define HWCAP2_LSE128 (1UL << 47) +#define HWCAP2_FPMR (1UL << 48) +#define HWCAP2_LUT (1UL << 49) +#define HWCAP2_FAMINMAX (1UL << 50) +#define HWCAP2_F8CVT (1UL << 51) +#define HWCAP2_F8FMA (1UL << 52) +#define HWCAP2_F8DP4 (1UL << 53) +#define HWCAP2_F8DP2 (1UL << 54) +#define HWCAP2_F8E4M3 (1UL << 55) +#define HWCAP2_F8E5M2 (1UL << 56) +#define HWCAP2_SME_LUTV2 (1UL << 57) +#define HWCAP2_SME_F8F16 (1UL << 58) +#define HWCAP2_SME_F8F32 (1UL << 59) +#define HWCAP2_SME_SF8FMA (1UL << 60) +#define HWCAP2_SME_SF8DP4 (1UL << 61) +#define HWCAP2_SME_SF8DP2 (1UL << 62) #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 0263565f617ae..aefda789f5104 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -220,6 +220,7 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = { }; static const struct arm64_ftr_bits ftr_id_aa64isar2[] = { + ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_LUT_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_CSSC_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_RPRFM_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_CLRBHB_SHIFT, 4, 0), @@ -235,6 +236,7 @@ static const struct arm64_ftr_bits ftr_id_aa64isar2[] = { }; static const struct arm64_ftr_bits ftr_id_aa64isar3[] = { + ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR3_EL1_FAMINMAX_SHIFT, 4, 0), ARM64_FTR_END, }; @@ -303,6 +305,8 @@ static const struct arm64_ftr_bits ftr_id_aa64zfr0[] = { static const struct arm64_ftr_bits ftr_id_aa64smfr0[] = { ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_FA64_SHIFT, 1, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_LUTv2_SHIFT, 1, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_SMEver_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), @@ -315,6 +319,10 @@ static const struct arm64_ftr_bits ftr_id_aa64smfr0[] = { FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_B16B16_SHIFT, 1, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F16F16_SHIFT, 1, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F8F16_SHIFT, 1, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F8F32_SHIFT, 1, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_I8I32_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), @@ -325,10 +333,22 @@ static const struct arm64_ftr_bits ftr_id_aa64smfr0[] = { FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_BI32I32_SHIFT, 1, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F32F32_SHIFT, 1, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_SF8FMA_SHIFT, 1, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_SF8DP4_SHIFT, 1, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), + FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_SF8DP2_SHIFT, 1, 0), ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_id_aa64fpfr0[] = { + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64FPFR0_EL1_F8CVT_SHIFT, 1, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64FPFR0_EL1_F8FMA_SHIFT, 1, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64FPFR0_EL1_F8DP4_SHIFT, 1, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64FPFR0_EL1_F8DP2_SHIFT, 1, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64FPFR0_EL1_F8E4M3_SHIFT, 1, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64FPFR0_EL1_F8E5M2_SHIFT, 1, 0), ARM64_FTR_END, }; @@ -2859,6 +2879,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(ID_AA64PFR0_EL1, AdvSIMD, IMP, CAP_HWCAP, KERNEL_HWCAP_ASIMD), HWCAP_CAP(ID_AA64PFR0_EL1, AdvSIMD, FP16, CAP_HWCAP, KERNEL_HWCAP_ASIMDHP), HWCAP_CAP(ID_AA64PFR0_EL1, DIT, IMP, CAP_HWCAP, KERNEL_HWCAP_DIT), + HWCAP_CAP(ID_AA64PFR2_EL1, FPMR, IMP, CAP_HWCAP, KERNEL_HWCAP_FPMR), HWCAP_CAP(ID_AA64ISAR1_EL1, DPB, IMP, CAP_HWCAP, KERNEL_HWCAP_DCPOP), HWCAP_CAP(ID_AA64ISAR1_EL1, DPB, DPB2, CAP_HWCAP, KERNEL_HWCAP_DCPODP), HWCAP_CAP(ID_AA64ISAR1_EL1, JSCVT, IMP, CAP_HWCAP, KERNEL_HWCAP_JSCVT), @@ -2872,6 +2893,8 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(ID_AA64ISAR1_EL1, BF16, EBF16, CAP_HWCAP, KERNEL_HWCAP_EBF16), HWCAP_CAP(ID_AA64ISAR1_EL1, DGH, IMP, CAP_HWCAP, KERNEL_HWCAP_DGH), HWCAP_CAP(ID_AA64ISAR1_EL1, I8MM, IMP, CAP_HWCAP, KERNEL_HWCAP_I8MM), + HWCAP_CAP(ID_AA64ISAR2_EL1, LUT, IMP, CAP_HWCAP, KERNEL_HWCAP_LUT), + HWCAP_CAP(ID_AA64ISAR3_EL1, FAMINMAX, IMP, CAP_HWCAP, KERNEL_HWCAP_FAMINMAX), HWCAP_CAP(ID_AA64MMFR2_EL1, AT, IMP, CAP_HWCAP, KERNEL_HWCAP_USCAT), #ifdef CONFIG_ARM64_SVE HWCAP_CAP(ID_AA64PFR0_EL1, SVE, IMP, CAP_HWCAP, KERNEL_HWCAP_SVE), @@ -2912,6 +2935,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { #ifdef CONFIG_ARM64_SME HWCAP_CAP(ID_AA64PFR1_EL1, SME, IMP, CAP_HWCAP, KERNEL_HWCAP_SME), HWCAP_CAP(ID_AA64SMFR0_EL1, FA64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_FA64), + HWCAP_CAP(ID_AA64SMFR0_EL1, LUTv2, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_LUTV2), HWCAP_CAP(ID_AA64SMFR0_EL1, SMEver, SME2p1, CAP_HWCAP, KERNEL_HWCAP_SME2P1), HWCAP_CAP(ID_AA64SMFR0_EL1, SMEver, SME2, CAP_HWCAP, KERNEL_HWCAP_SME2), HWCAP_CAP(ID_AA64SMFR0_EL1, I16I64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I64), @@ -2919,12 +2943,23 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(ID_AA64SMFR0_EL1, I16I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I32), HWCAP_CAP(ID_AA64SMFR0_EL1, B16B16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16B16), HWCAP_CAP(ID_AA64SMFR0_EL1, F16F16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F16), + HWCAP_CAP(ID_AA64SMFR0_EL1, F8F16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F8F16), + HWCAP_CAP(ID_AA64SMFR0_EL1, F8F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F8F32), HWCAP_CAP(ID_AA64SMFR0_EL1, I8I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I8I32), HWCAP_CAP(ID_AA64SMFR0_EL1, F16F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F32), HWCAP_CAP(ID_AA64SMFR0_EL1, B16F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16F32), HWCAP_CAP(ID_AA64SMFR0_EL1, BI32I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_BI32I32), HWCAP_CAP(ID_AA64SMFR0_EL1, F32F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F32F32), + HWCAP_CAP(ID_AA64SMFR0_EL1, SF8FMA, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8FMA), + HWCAP_CAP(ID_AA64SMFR0_EL1, SF8DP4, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8DP4), + HWCAP_CAP(ID_AA64SMFR0_EL1, SF8DP2, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8DP2), #endif /* CONFIG_ARM64_SME */ + HWCAP_CAP(ID_AA64FPFR0_EL1, F8CVT, IMP, CAP_HWCAP, KERNEL_HWCAP_F8CVT), + HWCAP_CAP(ID_AA64FPFR0_EL1, F8FMA, IMP, CAP_HWCAP, KERNEL_HWCAP_F8FMA), + HWCAP_CAP(ID_AA64FPFR0_EL1, F8DP4, IMP, CAP_HWCAP, KERNEL_HWCAP_F8DP4), + HWCAP_CAP(ID_AA64FPFR0_EL1, F8DP2, IMP, CAP_HWCAP, KERNEL_HWCAP_F8DP2), + HWCAP_CAP(ID_AA64FPFR0_EL1, F8E4M3, IMP, CAP_HWCAP, KERNEL_HWCAP_F8E4M3), + HWCAP_CAP(ID_AA64FPFR0_EL1, F8E5M2, IMP, CAP_HWCAP, KERNEL_HWCAP_F8E5M2), {}, }; diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 12b1920601568..f0abb150f73ef 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -128,6 +128,21 @@ static const char *const hwcap_str[] = { [KERNEL_HWCAP_SVE_B16B16] = "sveb16b16", [KERNEL_HWCAP_LRCPC3] = "lrcpc3", [KERNEL_HWCAP_LSE128] = "lse128", + [KERNEL_HWCAP_FPMR] = "fpmr", + [KERNEL_HWCAP_LUT] = "lut", + [KERNEL_HWCAP_FAMINMAX] = "faminmax", + [KERNEL_HWCAP_F8CVT] = "f8cvt", + [KERNEL_HWCAP_F8FMA] = "f8fma", + [KERNEL_HWCAP_F8DP4] = "f8dp4", + [KERNEL_HWCAP_F8DP2] = "f8dp2", + [KERNEL_HWCAP_F8E4M3] = "f8e4m3", + [KERNEL_HWCAP_F8E5M2] = "f8e5m2", + [KERNEL_HWCAP_SME_LUTV2] = "smelutv2", + [KERNEL_HWCAP_SME_F8F16] = "smef8f16", + [KERNEL_HWCAP_SME_F8F32] = "smef8f32", + [KERNEL_HWCAP_SME_SF8FMA] = "smesf8fma", + [KERNEL_HWCAP_SME_SF8DP4] = "smesf8dp4", + [KERNEL_HWCAP_SME_SF8DP2] = "smesf8dp2", }; #ifdef CONFIG_COMPAT -- cgit 1.2.3-korg