aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJens Axboe <axboe@kernel.dk>2021-11-08 06:49:26 -0700
committerJens Axboe <axboe@kernel.dk>2021-11-08 06:49:26 -0700
commit2970c1a5c492f364a9b95c5fa1c005cf60b0fe35 (patch)
tree1b1865f8633b8ea24daf43769f1dddb4fde6148a
parent6b75d88fa81b122cce37ebf17428a849ccd3d0f1 (diff)
parenta15058eaefffc37c31326b59fa08b267b2def603 (diff)
downloadlinux-block-pgo.tar.gz
Merge branch 'for-next/clang/pgo' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux into pgopgo
* 'for-next/clang/pgo' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux: pgo: rectify comment to proper kernel-doc syntax pgo: Clean up prf_open() error paths pgo: Fix sleep in atomic section in prf_open() pgo: Limit allocate_node() to vmlinux sections pgo: rename the raw profile file to vmlinux.profraw MAINTAINERS: Expand and relocate PGO entry pgo: Add Clang's Profile Guided Optimization infrastructure Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r--Documentation/dev-tools/index.rst1
-rw-r--r--Documentation/dev-tools/pgo.rst127
-rw-r--r--MAINTAINERS13
-rw-r--r--Makefile3
-rw-r--r--arch/Kconfig1
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/boot/Makefile1
-rw-r--r--arch/x86/boot/compressed/Makefile1
-rw-r--r--arch/x86/crypto/Makefile3
-rw-r--r--arch/x86/entry/vdso/Makefile1
-rw-r--r--arch/x86/kernel/Makefile3
-rw-r--r--arch/x86/kernel/vmlinux.lds.S2
-rw-r--r--arch/x86/platform/efi/Makefile1
-rw-r--r--arch/x86/purgatory/Makefile1
-rw-r--r--arch/x86/realmode/rm/Makefile1
-rw-r--r--arch/x86/um/vdso/Makefile1
-rw-r--r--drivers/firmware/efi/libstub/Makefile1
-rw-r--r--include/asm-generic/vmlinux.lds.h32
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/pgo/Kconfig37
-rw-r--r--kernel/pgo/Makefile5
-rw-r--r--kernel/pgo/fs.c413
-rw-r--r--kernel/pgo/instrument.c188
-rw-r--r--kernel/pgo/pgo.h211
-rw-r--r--scripts/Makefile.lib10
25 files changed, 1059 insertions, 0 deletions
diff --git a/Documentation/dev-tools/index.rst b/Documentation/dev-tools/index.rst
index 010a2af1e7d9eb..f15630bb01e279 100644
--- a/Documentation/dev-tools/index.rst
+++ b/Documentation/dev-tools/index.rst
@@ -32,6 +32,7 @@ Documentation/dev-tools/testing-overview.rst
kgdb
kselftest
kunit/index
+ pgo
.. only:: subproject and html
diff --git a/Documentation/dev-tools/pgo.rst b/Documentation/dev-tools/pgo.rst
new file mode 100644
index 00000000000000..0200449c4843e8
--- /dev/null
+++ b/Documentation/dev-tools/pgo.rst
@@ -0,0 +1,127 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===============================
+Using PGO with the Linux kernel
+===============================
+
+Clang's profiling kernel support (PGO_) enables profiling of the Linux kernel
+when building with Clang. The profiling data is exported via the ``pgo``
+debugfs directory.
+
+.. _PGO: https://clang.llvm.org/docs/UsersManual.html#profile-guided-optimization
+
+
+Preparation
+===========
+
+Configure the kernel with:
+
+.. code-block:: make
+
+ CONFIG_DEBUG_FS=y
+ CONFIG_PGO_CLANG=y
+
+Note that kernels compiled with profiling flags will be significantly larger
+and run slower.
+
+Profiling data will only become accessible once debugfs has been mounted:
+
+.. code-block:: sh
+
+ mount -t debugfs none /sys/kernel/debug
+
+
+Customization
+=============
+
+You can enable or disable profiling for individual file and directories by
+adding a line similar to the following to the respective kernel Makefile:
+
+- For a single file (e.g. main.o)
+
+ .. code-block:: make
+
+ PGO_PROFILE_main.o := y
+
+- For all files in one directory
+
+ .. code-block:: make
+
+ PGO_PROFILE := y
+
+To exclude files from being profiled use
+
+ .. code-block:: make
+
+ PGO_PROFILE_main.o := n
+
+and
+
+ .. code-block:: make
+
+ PGO_PROFILE := n
+
+Only files which are linked to the main kernel image or are compiled as kernel
+modules are supported by this mechanism.
+
+
+Files
+=====
+
+The PGO kernel support creates the following files in debugfs:
+
+``/sys/kernel/debug/pgo``
+ Parent directory for all PGO-related files.
+
+``/sys/kernel/debug/pgo/reset``
+ Global reset file: resets all coverage data to zero when written to.
+
+``/sys/kernel/debug/pgo/vmlinux.profraw``
+ The raw PGO data that must be processed with ``llvm_profdata``.
+
+
+Workflow
+========
+
+The PGO kernel can be run on the host or test machines. The data though should
+be analyzed with Clang's tools from the same Clang version as the kernel was
+compiled. Clang's tolerant of version skew, but it's easier to use the same
+Clang version.
+
+The profiling data is useful for optimizing the kernel, analyzing coverage,
+etc. Clang offers tools to perform these tasks.
+
+Here is an example workflow for profiling an instrumented kernel with PGO and
+using the result to optimize the kernel:
+
+1) Install the kernel on the TEST machine.
+
+2) Reset the data counters right before running the load tests
+
+ .. code-block:: sh
+
+ $ echo 1 > /sys/kernel/debug/pgo/reset
+
+3) Run the load tests.
+
+4) Collect the raw profile data
+
+ .. code-block:: sh
+
+ $ cp -a /sys/kernel/debug/pgo/vmlinux.profraw /tmp/vmlinux.profraw
+
+5) (Optional) Download the raw profile data to the HOST machine.
+
+6) Process the raw profile data
+
+ .. code-block:: sh
+
+ $ llvm-profdata merge --output=vmlinux.profdata vmlinux.profraw
+
+ Note that multiple raw profile data files can be merged during this step.
+
+7) Rebuild the kernel using the processed profile data (PGO disabled)
+
+ .. code-block:: sh
+
+ $ make LLVM=1 KCFLAGS=-fprofile-use=vmlinux.profdata ...
diff --git a/MAINTAINERS b/MAINTAINERS
index 9096c64d8d099b..b042a7e6fae866 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4625,6 +4625,19 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/cla
F: include/linux/cfi.h
F: kernel/cfi.c
+CLANG PROFILE GUIDED OPTIMIZATION SUPPORT
+M: Sami Tolvanen <samitolvanen@google.com>
+M: Bill Wendling <wcw@google.com>
+M: Kees Cook <keescook@chromium.org>
+R: Nathan Chancellor <nathan@kernel.org>
+R: Nick Desaulniers <ndesaulniers@google.com>
+L: clang-built-linux@googlegroups.com
+S: Supported
+B: https://github.com/ClangBuiltLinux/linux/issues
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/clang/features
+F: Documentation/dev-tools/pgo.rst
+F: kernel/pgo/
+
CLEANCACHE API
M: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
L: linux-kernel@vger.kernel.org
diff --git a/Makefile b/Makefile
index ccae1485281413..27891044f8d916 100644
--- a/Makefile
+++ b/Makefile
@@ -677,6 +677,9 @@ endif # KBUILD_EXTMOD
# Defaults to vmlinux, but the arch makefile usually adds further targets
all: vmlinux
+CFLAGS_PGO_CLANG := -fprofile-generate
+export CFLAGS_PGO_CLANG
+
CFLAGS_GCOV := -fprofile-arcs -ftest-coverage
ifdef CONFIG_CC_IS_GCC
CFLAGS_GCOV += -fno-tree-loop-im
diff --git a/arch/Kconfig b/arch/Kconfig
index 26b8ed11639da4..fa69deeb147485 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -1303,6 +1303,7 @@ config DYNAMIC_SIGFRAME
bool
source "kernel/gcov/Kconfig"
+source "kernel/pgo/Kconfig"
source "scripts/gcc-plugins/Kconfig"
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 95dd1ee01546ac..2553380322eabe 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -108,6 +108,7 @@ config X86
select ARCH_SUPPORTS_KMAP_LOCAL_FORCE_MAP if NR_CPUS <= 4096
select ARCH_SUPPORTS_LTO_CLANG
select ARCH_SUPPORTS_LTO_CLANG_THIN
+ select ARCH_SUPPORTS_PGO_CLANG if X86_64
select ARCH_USE_BUILTIN_BSWAP
select ARCH_USE_MEMTEST
select ARCH_USE_QUEUED_RWLOCKS
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index b5aecb524a8aa6..5e0816f5c367ac 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -71,6 +71,7 @@ KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__
KBUILD_CFLAGS += $(call cc-option,-fmacro-prefix-map=$(srctree)/=)
KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
GCOV_PROFILE := n
+PGO_PROFILE := n
UBSAN_SANITIZE := n
$(obj)/bzImage: asflags-y := $(SVGA_MODE)
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 431bf7f846c3ca..218626b8acc534 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -56,6 +56,7 @@ CFLAGS_sev.o += -I$(objtree)/arch/x86/lib/
KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__
GCOV_PROFILE := n
+PGO_PROFILE := n
UBSAN_SANITIZE :=n
KBUILD_LDFLAGS := -m elf_$(UTS_MACHINE)
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index f307c93fc90a7a..a177ff8ad782a8 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -86,6 +86,9 @@ nhpoly1305-sse2-y := nh-sse2-x86_64.o nhpoly1305-sse2-glue.o
obj-$(CONFIG_CRYPTO_NHPOLY1305_AVX2) += nhpoly1305-avx2.o
nhpoly1305-avx2-y := nh-avx2-x86_64.o nhpoly1305-avx2-glue.o
+# Disable PGO for curve25519-x86_64. With PGO enabled, clang runs out of
+# registers for some of the functions.
+PGO_PROFILE_curve25519-x86_64.o := n
obj-$(CONFIG_CRYPTO_CURVE25519_X86) += curve25519-x86_64.o
obj-$(CONFIG_CRYPTO_SM4_AESNI_AVX_X86_64) += sm4-aesni-avx-x86_64.o
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index a2dddcc189f692..73e207ad3257e2 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -180,6 +180,7 @@ quiet_cmd_vdso = VDSO $@
VDSO_LDFLAGS = -shared --hash-style=both --build-id=sha1 \
$(call ld-option, --eh-frame-hdr) -Bsymbolic
GCOV_PROFILE := n
+PGO_PROFILE := n
quiet_cmd_vdso_and_check = VDSO $@
cmd_vdso_and_check = $(cmd_vdso); $(cmd_vdso_check)
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 2ff3e600f4269c..5cd1e99519fd72 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -32,6 +32,9 @@ KASAN_SANITIZE_paravirt.o := n
KASAN_SANITIZE_sev.o := n
KASAN_SANITIZE_cc_platform.o := n
+# Cannot write to profiling regions before the page tables are set up.
+PGO_PROFILE_head$(BITS).o := n
+
# With some compiler versions the generated code results in boot hangs, caused
# by several compilation units. To be safe, disable all instrumentation.
KCSAN_SANITIZE := n
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 3d6dc12d198f7c..ea720b5c050abb 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -184,6 +184,8 @@ SECTIONS
BUG_TABLE
+ PGO_CLANG_DATA
+
ORC_UNWIND_TABLE
. = ALIGN(PAGE_SIZE);
diff --git a/arch/x86/platform/efi/Makefile b/arch/x86/platform/efi/Makefile
index 84b09c230cbd5f..5f22b31446ad4a 100644
--- a/arch/x86/platform/efi/Makefile
+++ b/arch/x86/platform/efi/Makefile
@@ -2,6 +2,7 @@
OBJECT_FILES_NON_STANDARD_efi_thunk_$(BITS).o := y
KASAN_SANITIZE := n
GCOV_PROFILE := n
+PGO_PROFILE := n
obj-$(CONFIG_EFI) += quirks.o efi.o efi_$(BITS).o efi_stub_$(BITS).o
obj-$(CONFIG_EFI_MIXED) += efi_thunk_$(BITS).o
diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile
index 95ea17a9d20cb7..36f20e99da0bce 100644
--- a/arch/x86/purgatory/Makefile
+++ b/arch/x86/purgatory/Makefile
@@ -23,6 +23,7 @@ targets += purgatory.ro purgatory.chk
# Sanitizer, etc. runtimes are unavailable and cannot be linked here.
GCOV_PROFILE := n
+PGO_PROFILE := n
KASAN_SANITIZE := n
UBSAN_SANITIZE := n
KCSAN_SANITIZE := n
diff --git a/arch/x86/realmode/rm/Makefile b/arch/x86/realmode/rm/Makefile
index 83f1b6a56449fe..21797192f958fb 100644
--- a/arch/x86/realmode/rm/Makefile
+++ b/arch/x86/realmode/rm/Makefile
@@ -76,4 +76,5 @@ KBUILD_CFLAGS := $(REALMODE_CFLAGS) -D_SETUP -D_WAKEUP \
KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__
KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
GCOV_PROFILE := n
+PGO_PROFILE := n
UBSAN_SANITIZE := n
diff --git a/arch/x86/um/vdso/Makefile b/arch/x86/um/vdso/Makefile
index 5943387e3f3570..54f5768f585303 100644
--- a/arch/x86/um/vdso/Makefile
+++ b/arch/x86/um/vdso/Makefile
@@ -64,6 +64,7 @@ quiet_cmd_vdso = VDSO $@
VDSO_LDFLAGS = -fPIC -shared -Wl,--hash-style=sysv
GCOV_PROFILE := n
+PGO_PROFILE := n
#
# Install the unstripped copy of vdso*.so listed in $(vdso-install-y).
diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile
index d0537573501e93..2c20e503cccb28 100644
--- a/drivers/firmware/efi/libstub/Makefile
+++ b/drivers/firmware/efi/libstub/Makefile
@@ -43,6 +43,7 @@ KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_SCS), $(KBUILD_CFLAGS))
KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_LTO), $(KBUILD_CFLAGS))
GCOV_PROFILE := n
+PGO_PROFILE := n
# Sanitizer runtimes are unavailable and cannot be linked here.
KASAN_SANITIZE := n
KCSAN_SANITIZE := n
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 42f3866bca6978..2189839fe895f7 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -331,6 +331,37 @@
#define DTPM_TABLE()
#endif
+#ifdef CONFIG_PGO_CLANG
+#define PGO_CLANG_DATA \
+ __llvm_prf_data : AT(ADDR(__llvm_prf_data) - LOAD_OFFSET) { \
+ __llvm_prf_data_start = .; \
+ *(__llvm_prf_data) \
+ __llvm_prf_data_end = .; \
+ } \
+ __llvm_prf_cnts : AT(ADDR(__llvm_prf_cnts) - LOAD_OFFSET) { \
+ __llvm_prf_cnts_start = .; \
+ *(__llvm_prf_cnts) \
+ __llvm_prf_cnts_end = .; \
+ } \
+ __llvm_prf_names : AT(ADDR(__llvm_prf_names) - LOAD_OFFSET) { \
+ __llvm_prf_names_start = .; \
+ *(__llvm_prf_names) \
+ __llvm_prf_names_end = .; \
+ } \
+ __llvm_prf_vals : AT(ADDR(__llvm_prf_vals) - LOAD_OFFSET) { \
+ __llvm_prf_vals_start = .; \
+ *(__llvm_prf_vals) \
+ __llvm_prf_vals_end = .; \
+ } \
+ __llvm_prf_vnds : AT(ADDR(__llvm_prf_vnds) - LOAD_OFFSET) { \
+ __llvm_prf_vnds_start = .; \
+ *(__llvm_prf_vnds) \
+ __llvm_prf_vnds_end = .; \
+ }
+#else
+#define PGO_CLANG_DATA
+#endif
+
#define KERNEL_DTB() \
STRUCT_ALIGN(); \
__dtb_start = .; \
@@ -1148,6 +1179,7 @@
CONSTRUCTORS \
} \
BUG_TABLE \
+ PGO_CLANG_DATA
#define INIT_TEXT_SECTION(inittext_align) \
. = ALIGN(inittext_align); \
diff --git a/kernel/Makefile b/kernel/Makefile
index 186c49582f45b6..2c8143232df648 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -114,6 +114,7 @@ obj-$(CONFIG_KCSAN) += kcsan/
obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o
obj-$(CONFIG_HAVE_STATIC_CALL_INLINE) += static_call.o
obj-$(CONFIG_CFI_CLANG) += cfi.o
+obj-$(CONFIG_PGO_CLANG) += pgo/
obj-$(CONFIG_PERF_EVENTS) += events/
diff --git a/kernel/pgo/Kconfig b/kernel/pgo/Kconfig
new file mode 100644
index 00000000000000..ce7fe04f303d9d
--- /dev/null
+++ b/kernel/pgo/Kconfig
@@ -0,0 +1,37 @@
+# SPDX-License-Identifier: GPL-2.0-only
+menu "Profile Guided Optimization (PGO) (EXPERIMENTAL)"
+
+config ARCH_SUPPORTS_PGO_CLANG
+ bool
+
+config PGO_CLANG
+ bool "Enable clang's PGO-based kernel profiling"
+ depends on DEBUG_FS
+ depends on ARCH_SUPPORTS_PGO_CLANG
+ depends on CC_IS_CLANG
+ depends on !ARCH_WANTS_NO_INSTR || CC_HAS_NO_PROFILE_FN_ATTR
+ help
+ This option enables clang's PGO (Profile Guided Optimization) based
+ code profiling to better optimize the kernel.
+
+ If unsure, say N.
+
+ Run a representative workload for your application on a kernel
+ compiled with this option and download the raw profile file from
+ /sys/kernel/debug/pgo/vmlinux.profraw. This file needs to be
+ processed with llvm-profdata. It may be merged with other collected
+ raw profiles.
+
+ Copy the processed profile file into vmlinux.profdata, and enable
+ KCFLAGS=-fprofile-use=vmlinux.profdata to produce an optimized
+ kernel.
+
+ Note that a kernel compiled with profiling flags will be
+ significantly larger and run slower. Also be sure to exclude files
+ from profiling which are not linked to the kernel image to prevent
+ linker errors.
+
+ Note that the debugfs filesystem has to be mounted to access
+ profiling data.
+
+endmenu
diff --git a/kernel/pgo/Makefile b/kernel/pgo/Makefile
new file mode 100644
index 00000000000000..41e27cefd9a47b
--- /dev/null
+++ b/kernel/pgo/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+GCOV_PROFILE := n
+PGO_PROFILE := n
+
+obj-y += fs.o instrument.o
diff --git a/kernel/pgo/fs.c b/kernel/pgo/fs.c
new file mode 100644
index 00000000000000..3c5aa7c2a4cefa
--- /dev/null
+++ b/kernel/pgo/fs.c
@@ -0,0 +1,413 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 Google, Inc.
+ *
+ * Author:
+ * Sami Tolvanen <samitolvanen@google.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#define pr_fmt(fmt) "pgo: " fmt
+
+#include <linux/kernel.h>
+#include <linux/debugfs.h>
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/mm.h>
+#include "pgo.h"
+
+static struct dentry *directory;
+
+struct prf_private_data {
+ void *buffer;
+ size_t size;
+};
+
+/*
+ * Raw profile data format:
+ *
+ * - llvm_prf_header
+ * - __llvm_prf_data
+ * - __llvm_prf_cnts
+ * - __llvm_prf_names
+ * - zero padding to 8 bytes
+ * - for each llvm_prf_data in __llvm_prf_data:
+ * - llvm_prf_value_data
+ * - llvm_prf_value_record + site count array
+ * - llvm_prf_value_node_data
+ * ...
+ * ...
+ * ...
+ */
+
+static void prf_fill_header(void **buffer)
+{
+ struct llvm_prf_header *header = *(struct llvm_prf_header **)buffer;
+
+#ifdef CONFIG_64BIT
+ header->magic = LLVM_INSTR_PROF_RAW_MAGIC_64;
+#else
+ header->magic = LLVM_INSTR_PROF_RAW_MAGIC_32;
+#endif
+ header->version = LLVM_VARIANT_MASK_IR_PROF | LLVM_INSTR_PROF_RAW_VERSION;
+ header->data_size = prf_data_count();
+ header->padding_bytes_before_counters = 0;
+ header->counters_size = prf_cnts_count();
+ header->padding_bytes_after_counters = 0;
+ header->names_size = prf_names_count();
+ header->counters_delta = (u64)__llvm_prf_cnts_start;
+ header->names_delta = (u64)__llvm_prf_names_start;
+ header->value_kind_last = LLVM_INSTR_PROF_IPVK_LAST;
+
+ *buffer += sizeof(*header);
+}
+
+/*
+ * Copy the source into the buffer, incrementing the pointer into buffer in the
+ * process.
+ */
+static void prf_copy_to_buffer(void **buffer, void *src, unsigned long size)
+{
+ memcpy(*buffer, src, size);
+ *buffer += size;
+}
+
+static u32 __prf_get_value_size(struct llvm_prf_data *p, u32 *value_kinds)
+{
+ struct llvm_prf_value_node **nodes =
+ (struct llvm_prf_value_node **)p->values;
+ u32 kinds = 0;
+ u32 size = 0;
+ unsigned int kind;
+ unsigned int n;
+ unsigned int s = 0;
+
+ for (kind = 0; kind < ARRAY_SIZE(p->num_value_sites); kind++) {
+ unsigned int sites = p->num_value_sites[kind];
+
+ if (!sites)
+ continue;
+
+ /* Record + site count array */
+ size += prf_get_value_record_size(sites);
+ kinds++;
+
+ if (!nodes)
+ continue;
+
+ for (n = 0; n < sites; n++) {
+ u32 count = 0;
+ struct llvm_prf_value_node *site = nodes[s + n];
+
+ while (site && ++count <= U8_MAX)
+ site = site->next;
+
+ size += count *
+ sizeof(struct llvm_prf_value_node_data);
+ }
+
+ s += sites;
+ }
+
+ if (size)
+ size += sizeof(struct llvm_prf_value_data);
+
+ if (value_kinds)
+ *value_kinds = kinds;
+
+ return size;
+}
+
+static u32 prf_get_value_size(void)
+{
+ u32 size = 0;
+ struct llvm_prf_data *p;
+
+ for (p = __llvm_prf_data_start; p < __llvm_prf_data_end; p++)
+ size += __prf_get_value_size(p, NULL);
+
+ return size;
+}
+
+/* Serialize the profiling's value. */
+static void prf_serialize_value(struct llvm_prf_data *p, void **buffer)
+{
+ struct llvm_prf_value_data header;
+ struct llvm_prf_value_node **nodes =
+ (struct llvm_prf_value_node **)p->values;
+ unsigned int kind;
+ unsigned int n;
+ unsigned int s = 0;
+
+ header.total_size = __prf_get_value_size(p, &header.num_value_kinds);
+
+ if (!header.num_value_kinds)
+ /* Nothing to write. */
+ return;
+
+ prf_copy_to_buffer(buffer, &header, sizeof(header));
+
+ for (kind = 0; kind < ARRAY_SIZE(p->num_value_sites); kind++) {
+ struct llvm_prf_value_record *record;
+ u8 *counts;
+ unsigned int sites = p->num_value_sites[kind];
+
+ if (!sites)
+ continue;
+
+ /* Profiling value record. */
+ record = *(struct llvm_prf_value_record **)buffer;
+ *buffer += prf_get_value_record_header_size();
+
+ record->kind = kind;
+ record->num_value_sites = sites;
+
+ /* Site count array. */
+ counts = *(u8 **)buffer;
+ *buffer += prf_get_value_record_site_count_size(sites);
+
+ /*
+ * If we don't have nodes, we can skip updating the site count
+ * array, because the buffer is zero filled.
+ */
+ if (!nodes)
+ continue;
+
+ for (n = 0; n < sites; n++) {
+ u32 count = 0;
+ struct llvm_prf_value_node *site = nodes[s + n];
+
+ while (site && ++count <= U8_MAX) {
+ prf_copy_to_buffer(buffer, site,
+ sizeof(struct llvm_prf_value_node_data));
+ site = site->next;
+ }
+
+ counts[n] = (u8)count;
+ }
+
+ s += sites;
+ }
+}
+
+static void prf_serialize_values(void **buffer)
+{
+ struct llvm_prf_data *p;
+
+ for (p = __llvm_prf_data_start; p < __llvm_prf_data_end; p++)
+ prf_serialize_value(p, buffer);
+}
+
+static inline unsigned long prf_get_padding(unsigned long size)
+{
+ return 7 & (sizeof(u64) - size % sizeof(u64));
+}
+
+/* Note: caller *must* hold pgo_lock */
+static unsigned long prf_buffer_size(void)
+{
+ return sizeof(struct llvm_prf_header) +
+ prf_data_size() +
+ prf_cnts_size() +
+ prf_names_size() +
+ prf_get_padding(prf_names_size()) +
+ prf_get_value_size();
+}
+
+/*
+ * Serialize the profiling data into a format LLVM's tools can understand.
+ * Returns actual buffer size in p->size.
+ * Note: p->buffer must point into vzalloc()'d
+ * area of at least prf_buffer_size() in size.
+ * Note: caller *must* hold pgo_lock.
+ */
+static int prf_serialize(struct prf_private_data *p, size_t buf_size)
+{
+ void *buffer;
+
+ /* get buffer size, again. */
+ p->size = prf_buffer_size();
+
+ /* check for unlikely overflow. */
+ if (p->size > buf_size)
+ return -EAGAIN;
+
+ buffer = p->buffer;
+
+ prf_fill_header(&buffer);
+ prf_copy_to_buffer(&buffer, __llvm_prf_data_start, prf_data_size());
+ prf_copy_to_buffer(&buffer, __llvm_prf_cnts_start, prf_cnts_size());
+ prf_copy_to_buffer(&buffer, __llvm_prf_names_start, prf_names_size());
+ buffer += prf_get_padding(prf_names_size());
+
+ prf_serialize_values(&buffer);
+
+ return 0;
+}
+
+/* open() implementation for PGO. Creates a copy of the profiling data set. */
+static int prf_open(struct inode *inode, struct file *file)
+{
+ struct prf_private_data *data;
+ unsigned long flags;
+ size_t buf_size;
+ int err = -EINVAL;
+
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ /* Get initial buffer size. */
+ flags = prf_lock();
+ data->size = prf_buffer_size();
+ prf_unlock(flags);
+
+ do {
+ vfree(data->buffer);
+
+ /* Allocate, round up to page size. */
+ buf_size = PAGE_ALIGN(data->size);
+ data->buffer = vzalloc(buf_size);
+
+ if (!data->buffer) {
+ err = -ENOMEM;
+ break;
+ }
+
+ /*
+ * Try serialize and get actual
+ * data length in data->size.
+ */
+ flags = prf_lock();
+ err = prf_serialize(data, buf_size);
+ prf_unlock(flags);
+ /* In unlikely case, try again. */
+ } while (err == -EAGAIN);
+
+ if (err < 0) {
+ if (data)
+ vfree(data->buffer);
+ kfree(data);
+ } else {
+ file->private_data = data;
+ }
+
+ return err;
+}
+
+/* read() implementation for PGO. */
+static ssize_t prf_read(struct file *file, char __user *buf, size_t count,
+ loff_t *ppos)
+{
+ struct prf_private_data *data = file->private_data;
+
+ if (WARN_ON_ONCE(!data))
+ return -ENOMEM;
+
+ return simple_read_from_buffer(buf, count, ppos, data->buffer,
+ data->size);
+}
+
+/* release() implementation for PGO. Release resources allocated by open(). */
+static int prf_release(struct inode *inode, struct file *file)
+{
+ struct prf_private_data *data = file->private_data;
+
+ if (data) {
+ vfree(data->buffer);
+ kfree(data);
+ }
+
+ return 0;
+}
+
+static const struct file_operations prf_fops = {
+ .owner = THIS_MODULE,
+ .open = prf_open,
+ .read = prf_read,
+ .llseek = default_llseek,
+ .release = prf_release
+};
+
+/* write() implementation for resetting PGO's profile data. */
+static ssize_t reset_write(struct file *file, const char __user *addr,
+ size_t len, loff_t *pos)
+{
+ struct llvm_prf_data *data;
+
+ memset(__llvm_prf_cnts_start, 0, prf_cnts_size());
+
+ for (data = __llvm_prf_data_start; data < __llvm_prf_data_end; data++) {
+ struct llvm_prf_value_node **vnodes;
+ u64 current_vsite_count;
+ u32 i;
+
+ if (!data->values)
+ continue;
+
+ current_vsite_count = 0;
+ vnodes = (struct llvm_prf_value_node **)data->values;
+
+ for (i = LLVM_INSTR_PROF_IPVK_FIRST; i <= LLVM_INSTR_PROF_IPVK_LAST; i++)
+ current_vsite_count += data->num_value_sites[i];
+
+ for (i = 0; i < current_vsite_count; i++) {
+ struct llvm_prf_value_node *current_vnode = vnodes[i];
+
+ while (current_vnode) {
+ current_vnode->count = 0;
+ current_vnode = current_vnode->next;
+ }
+ }
+ }
+
+ return len;
+}
+
+static const struct file_operations prf_reset_fops = {
+ .owner = THIS_MODULE,
+ .write = reset_write,
+ .llseek = noop_llseek,
+};
+
+/* Create debugfs entries. */
+static int __init pgo_init(void)
+{
+ directory = debugfs_create_dir("pgo", NULL);
+ if (!directory)
+ goto err_remove;
+
+ if (!debugfs_create_file("vmlinux.profraw", 0600, directory, NULL,
+ &prf_fops))
+ goto err_remove;
+
+ if (!debugfs_create_file("reset", 0200, directory, NULL,
+ &prf_reset_fops))
+ goto err_remove;
+
+ return 0;
+
+err_remove:
+ pr_err("initialization failed\n");
+ return -EIO;
+}
+
+/* Remove debugfs entries. */
+static void __exit pgo_exit(void)
+{
+ debugfs_remove_recursive(directory);
+}
+
+module_init(pgo_init);
+module_exit(pgo_exit);
diff --git a/kernel/pgo/instrument.c b/kernel/pgo/instrument.c
new file mode 100644
index 00000000000000..8b54fb6be336d4
--- /dev/null
+++ b/kernel/pgo/instrument.c
@@ -0,0 +1,188 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 Google, Inc.
+ *
+ * Author:
+ * Sami Tolvanen <samitolvanen@google.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#define pr_fmt(fmt) "pgo: " fmt
+
+#include <asm/sections.h>
+#include <linux/bitops.h>
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+#include "pgo.h"
+
+/*
+ * This lock guards both profile count updating and serialization of the
+ * profiling data. Keeping both of these activities separate via locking
+ * ensures that we don't try to serialize data that's only partially updated.
+ */
+static DEFINE_SPINLOCK(pgo_lock);
+static int current_node;
+
+unsigned long prf_lock(void)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&pgo_lock, flags);
+
+ return flags;
+}
+
+void prf_unlock(unsigned long flags)
+{
+ spin_unlock_irqrestore(&pgo_lock, flags);
+}
+
+/*
+ * Return a newly allocated profiling value node which contains the tracked
+ * value by the value profiler.
+ * Note: caller *must* hold pgo_lock.
+ */
+static struct llvm_prf_value_node *allocate_node(struct llvm_prf_data *p,
+ u32 index, u64 value)
+{
+ const int max_vnds = prf_vnds_count();
+
+ /*
+ * Check that p is within vmlinux __llvm_prf_data section.
+ * If not, don't allocate since we can't handle modules yet.
+ */
+ if (!memory_contains(__llvm_prf_data_start,
+ __llvm_prf_data_end, p, sizeof(*p)))
+ return NULL;
+
+ if (WARN_ON_ONCE(current_node >= max_vnds))
+ return NULL; /* Out of nodes */
+
+ /* reserve vnode for vmlinux */
+ return &__llvm_prf_vnds_start[current_node++];
+}
+
+/*
+ * Counts the number of times a target value is seen.
+ *
+ * Records the target value for the index if not seen before. Otherwise,
+ * increments the counter associated w/ the target value.
+ */
+void __llvm_profile_instrument_target(u64 target_value, void *data, u32 index)
+{
+ struct llvm_prf_data *p = (struct llvm_prf_data *)data;
+ struct llvm_prf_value_node **counters;
+ struct llvm_prf_value_node *curr;
+ struct llvm_prf_value_node *min = NULL;
+ struct llvm_prf_value_node *prev = NULL;
+ u64 min_count = U64_MAX;
+ u8 values = 0;
+ unsigned long flags;
+
+ if (!p || !p->values)
+ return;
+
+ counters = (struct llvm_prf_value_node **)p->values;
+ curr = counters[index];
+
+ while (curr) {
+ if (target_value == curr->value) {
+ curr->count++;
+ return;
+ }
+
+ if (curr->count < min_count) {
+ min_count = curr->count;
+ min = curr;
+ }
+
+ prev = curr;
+ curr = curr->next;
+ values++;
+ }
+
+ if (values >= LLVM_INSTR_PROF_MAX_NUM_VAL_PER_SITE) {
+ if (!min->count || !(--min->count)) {
+ curr = min;
+ curr->value = target_value;
+ curr->count++;
+ }
+ return;
+ }
+
+ /* Lock when updating the value node structure. */
+ flags = prf_lock();
+
+ curr = allocate_node(p, index, target_value);
+ if (!curr)
+ goto out;
+
+ curr->value = target_value;
+ curr->count++;
+
+ if (!counters[index])
+ counters[index] = curr;
+ else if (prev && !prev->next)
+ prev->next = curr;
+
+out:
+ prf_unlock(flags);
+}
+EXPORT_SYMBOL(__llvm_profile_instrument_target);
+
+/* Counts the number of times a range of targets values are seen. */
+void __llvm_profile_instrument_range(u64 target_value, void *data,
+ u32 index, s64 precise_start,
+ s64 precise_last, s64 large_value)
+{
+ if (large_value != S64_MIN && (s64)target_value >= large_value)
+ target_value = large_value;
+ else if ((s64)target_value < precise_start ||
+ (s64)target_value > precise_last)
+ target_value = precise_last + 1;
+
+ __llvm_profile_instrument_target(target_value, data, index);
+}
+EXPORT_SYMBOL(__llvm_profile_instrument_range);
+
+static u64 inst_prof_get_range_rep_value(u64 value)
+{
+ if (value <= 8)
+ /* The first ranges are individually tracked, use it as is. */
+ return value;
+ else if (value >= 513)
+ /* The last range is mapped to its lowest value. */
+ return 513;
+ else if (hweight64(value) == 1)
+ /* If it's a power of two, use it as is. */
+ return value;
+
+ /* Otherwise, take to the previous power of two + 1. */
+ return ((u64)1 << (64 - __builtin_clzll(value) - 1)) + 1;
+}
+
+/*
+ * The target values are partitioned into multiple ranges. The range spec is
+ * defined in compiler-rt/include/profile/InstrProfData.inc.
+ */
+void __llvm_profile_instrument_memop(u64 target_value, void *data,
+ u32 counter_index)
+{
+ u64 rep_value;
+
+ /* Map the target value to the representative value of its range. */
+ rep_value = inst_prof_get_range_rep_value(target_value);
+ __llvm_profile_instrument_target(rep_value, data, counter_index);
+}
+EXPORT_SYMBOL(__llvm_profile_instrument_memop);
diff --git a/kernel/pgo/pgo.h b/kernel/pgo/pgo.h
new file mode 100644
index 00000000000000..04fbf3bcde1ec9
--- /dev/null
+++ b/kernel/pgo/pgo.h
@@ -0,0 +1,211 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019 Google, Inc.
+ *
+ * Author:
+ * Sami Tolvanen <samitolvanen@google.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _PGO_H
+#define _PGO_H
+
+/*
+ * Note: These internal LLVM definitions must match the compiler version.
+ * See llvm/include/llvm/ProfileData/InstrProfData.inc in LLVM's source code.
+ */
+
+#define LLVM_INSTR_PROF_RAW_MAGIC_64 \
+ ((u64)255 << 56 | \
+ (u64)'l' << 48 | \
+ (u64)'p' << 40 | \
+ (u64)'r' << 32 | \
+ (u64)'o' << 24 | \
+ (u64)'f' << 16 | \
+ (u64)'r' << 8 | \
+ (u64)129)
+#define LLVM_INSTR_PROF_RAW_MAGIC_32 \
+ ((u64)255 << 56 | \
+ (u64)'l' << 48 | \
+ (u64)'p' << 40 | \
+ (u64)'r' << 32 | \
+ (u64)'o' << 24 | \
+ (u64)'f' << 16 | \
+ (u64)'R' << 8 | \
+ (u64)129)
+
+#define LLVM_INSTR_PROF_RAW_VERSION 5
+#define LLVM_INSTR_PROF_DATA_ALIGNMENT 8
+#define LLVM_INSTR_PROF_IPVK_FIRST 0
+#define LLVM_INSTR_PROF_IPVK_LAST 1
+#define LLVM_INSTR_PROF_MAX_NUM_VAL_PER_SITE 255
+
+#define LLVM_VARIANT_MASK_IR_PROF (0x1ULL << 56)
+#define LLVM_VARIANT_MASK_CSIR_PROF (0x1ULL << 57)
+
+/**
+ * struct llvm_prf_header - represents the raw profile header data structure.
+ * @magic: the magic token for the file format.
+ * @version: the version of the file format.
+ * @data_size: the number of entries in the profile data section.
+ * @padding_bytes_before_counters: the number of padding bytes before the
+ * counters.
+ * @counters_size: the size in bytes of the LLVM profile section containing the
+ * counters.
+ * @padding_bytes_after_counters: the number of padding bytes after the
+ * counters.
+ * @names_size: the size in bytes of the LLVM profile section containing the
+ * counters' names.
+ * @counters_delta: the beginning of the LLMV profile counters section.
+ * @names_delta: the beginning of the LLMV profile names section.
+ * @value_kind_last: the last profile value kind.
+ */
+struct llvm_prf_header {
+ u64 magic;
+ u64 version;
+ u64 data_size;
+ u64 padding_bytes_before_counters;
+ u64 counters_size;
+ u64 padding_bytes_after_counters;
+ u64 names_size;
+ u64 counters_delta;
+ u64 names_delta;
+ u64 value_kind_last;
+};
+
+/**
+ * struct llvm_prf_data - represents the per-function control structure.
+ * @name_ref: the reference to the function's name.
+ * @func_hash: the hash value of the function.
+ * @counter_ptr: a pointer to the profile counter.
+ * @function_ptr: a pointer to the function.
+ * @values: the profiling values associated with this function.
+ * @num_counters: the number of counters in the function.
+ * @num_value_sites: the number of value profile sites.
+ */
+struct llvm_prf_data {
+ const u64 name_ref;
+ const u64 func_hash;
+ const void *counter_ptr;
+ const void *function_ptr;
+ void *values;
+ const u32 num_counters;
+ const u16 num_value_sites[LLVM_INSTR_PROF_IPVK_LAST + 1];
+} __aligned(LLVM_INSTR_PROF_DATA_ALIGNMENT);
+
+/**
+ * struct llvm_prf_value_node_data - represents the data part of the struct
+ * llvm_prf_value_node data structure.
+ * @value: the value counters.
+ * @count: the counters' count.
+ */
+struct llvm_prf_value_node_data {
+ u64 value;
+ u64 count;
+};
+
+/**
+ * struct llvm_prf_value_node - represents an internal data structure used by
+ * the value profiler.
+ * @value: the value counters.
+ * @count: the counters' count.
+ * @next: the next value node.
+ */
+struct llvm_prf_value_node {
+ u64 value;
+ u64 count;
+ struct llvm_prf_value_node *next;
+};
+
+/**
+ * struct llvm_prf_value_data - represents the value profiling data in indexed
+ * format.
+ * @total_size: the total size in bytes including this field.
+ * @num_value_kinds: the number of value profile kinds that has value profile
+ * data.
+ */
+struct llvm_prf_value_data {
+ u32 total_size;
+ u32 num_value_kinds;
+};
+
+/**
+ * struct llvm_prf_value_record - represents the on-disk layout of the value
+ * profile data of a particular kind for one function.
+ * @kind: the kind of the value profile record.
+ * @num_value_sites: the number of value profile sites.
+ * @site_count_array: the first element of the array that stores the number
+ * of profiled values for each value site.
+ */
+struct llvm_prf_value_record {
+ u32 kind;
+ u32 num_value_sites;
+ u8 site_count_array[];
+};
+
+#define prf_get_value_record_header_size() \
+ offsetof(struct llvm_prf_value_record, site_count_array)
+#define prf_get_value_record_site_count_size(sites) \
+ roundup((sites), 8)
+#define prf_get_value_record_size(sites) \
+ (prf_get_value_record_header_size() + \
+ prf_get_value_record_site_count_size((sites)))
+
+/* Data sections */
+extern struct llvm_prf_data __llvm_prf_data_start[];
+extern struct llvm_prf_data __llvm_prf_data_end[];
+
+extern u64 __llvm_prf_cnts_start[];
+extern u64 __llvm_prf_cnts_end[];
+
+extern char __llvm_prf_names_start[];
+extern char __llvm_prf_names_end[];
+
+extern struct llvm_prf_value_node __llvm_prf_vnds_start[];
+extern struct llvm_prf_value_node __llvm_prf_vnds_end[];
+
+/* Locking for vnodes */
+extern unsigned long prf_lock(void);
+extern void prf_unlock(unsigned long flags);
+
+/* Declarations for LLVM instrumentation. */
+void __llvm_profile_instrument_target(u64 target_value, void *data, u32 index);
+void __llvm_profile_instrument_range(u64 target_value, void *data,
+ u32 index, s64 precise_start,
+ s64 precise_last, s64 large_value);
+void __llvm_profile_instrument_memop(u64 target_value, void *data,
+ u32 counter_index);
+
+#define __DEFINE_PRF_SIZE(s) \
+ static inline unsigned long prf_ ## s ## _size(void) \
+ { \
+ unsigned long start = \
+ (unsigned long)__llvm_prf_ ## s ## _start; \
+ unsigned long end = \
+ (unsigned long)__llvm_prf_ ## s ## _end; \
+ return roundup(end - start, \
+ sizeof(__llvm_prf_ ## s ## _start[0])); \
+ } \
+ static inline unsigned long prf_ ## s ## _count(void) \
+ { \
+ return prf_ ## s ## _size() / \
+ sizeof(__llvm_prf_ ## s ## _start[0]); \
+ }
+
+__DEFINE_PRF_SIZE(data);
+__DEFINE_PRF_SIZE(cnts);
+__DEFINE_PRF_SIZE(names);
+__DEFINE_PRF_SIZE(vnds);
+
+#undef __DEFINE_PRF_SIZE
+
+#endif /* _PGO_H */
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index 56d50eb0cd8004..8b24ede279c3cd 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -151,6 +151,16 @@ _c_flags += $(if $(patsubst n%,, \
endif
#
+# Enable clang's PGO profiling flags for a file or directory depending on
+# variables PGO_PROFILE_obj.o and PGO_PROFILE.
+#
+ifeq ($(CONFIG_PGO_CLANG),y)
+_c_flags += $(if $(patsubst n%,, \
+ $(PGO_PROFILE_$(basetarget).o)$(PGO_PROFILE)y), \
+ $(CFLAGS_PGO_CLANG))
+endif
+
+#
# Enable address sanitizer flags for kernel except some files or directories
# we don't want to check (depends on variables KASAN_SANITIZE_obj.o, KASAN_SANITIZE)
#