summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrew Morton <akpm@linux-foundation.org>2024-04-26 12:14:25 -0700
committerAndrew Morton <akpm@linux-foundation.org>2024-04-26 12:14:25 -0700
commite8ced5e0b21645db3c6662e0654ea9bccdffe341 (patch)
tree7a61c41a58bc8a3bd451692dab334d2c1c19d657
parent8b6b345ef694e544a7866d82e397bde667607d48 (diff)
download25-new-e8ced5e0b21645db3c6662e0654ea9bccdffe341.tar.gz
foo
-rw-r--r--patches/bitops-optimize-fns-for-improved-performance.patch125
-rw-r--r--patches/kmsan-compiler_types-declare-__no_sanitize_or_inline.patch46
-rw-r--r--patches/mm-vmalloc-fix-return-value-of-vb_alloc-if-size-is-0.patch34
-rw-r--r--pc/bitops-optimize-fns-for-improved-performance.pc1
-rw-r--r--pc/devel-series7
-rw-r--r--pc/kmsan-compiler_types-declare-__no_sanitize_or_inline.pc1
-rw-r--r--pc/mm-vmalloc-fix-return-value-of-vb_alloc-if-size-is-0.pc1
-rw-r--r--txt/bitops-optimize-fns-for-improved-performance.txt95
-rw-r--r--txt/kmsan-compiler_types-declare-__no_sanitize_or_inline.txt19
-rw-r--r--txt/mm-vmalloc-fix-return-value-of-vb_alloc-if-size-is-0.txt16
10 files changed, 344 insertions, 1 deletions
diff --git a/patches/bitops-optimize-fns-for-improved-performance.patch b/patches/bitops-optimize-fns-for-improved-performance.patch
new file mode 100644
index 000000000..edfd0c9bb
--- /dev/null
+++ b/patches/bitops-optimize-fns-for-improved-performance.patch
@@ -0,0 +1,125 @@
+From: Kuan-Wei Chiu <visitorckw@gmail.com>
+Subject: bitops: optimize fns() for improved performance
+Date: Fri, 26 Apr 2024 11:51:52 +0800
+
+The current fns() repeatedly uses __ffs() to find the index of the least
+significant bit and then clears the corresponding bit using __clear_bit().
+The method for clearing the least significant bit can be optimized by
+using word &= word - 1 instead.
+
+Typically, the execution time of one __ffs() plus one __clear_bit() is
+longer than that of a bitwise AND operation and a subtraction. To improve
+performance, the loop for clearing the least significant bit has been
+replaced with word &= word - 1, followed by a single __ffs() operation to
+obtain the answer. This change reduces the number of __ffs() iterations
+from n to just one, enhancing overall performance.
+
+The following microbenchmark data, conducted on my x86-64 machine, shows
+the execution time (in microseconds) required for 1000000 test data
+generated by get_random_u64() and executed by fns() under different values
+of n:
+
++-----+---------------+---------------+
+| n | time_old | time_new |
++-----+---------------+---------------+
+| 0 | 29194 | 25878 |
+| 1 | 25510 | 25497 |
+| 2 | 27836 | 25721 |
+| 3 | 30140 | 25673 |
+| 4 | 32569 | 25426 |
+| 5 | 34792 | 25690 |
+| 6 | 37117 | 25651 |
+| 7 | 39742 | 25383 |
+| 8 | 42360 | 25657 |
+| 9 | 44672 | 25897 |
+| 10 | 47237 | 25819 |
+| 11 | 49884 | 26530 |
+| 12 | 51864 | 26647 |
+| 13 | 54265 | 28915 |
+| 14 | 56440 | 28373 |
+| 15 | 58839 | 28616 |
+| 16 | 62383 | 29128 |
+| 17 | 64257 | 30041 |
+| 18 | 66805 | 29773 |
+| 19 | 69368 | 33203 |
+| 20 | 72942 | 33688 |
+| 21 | 77006 | 34518 |
+| 22 | 80926 | 34298 |
+| 23 | 85723 | 35586 |
+| 24 | 90324 | 36376 |
+| 25 | 95992 | 37465 |
+| 26 | 101101 | 37599 |
+| 27 | 106520 | 37466 |
+| 28 | 113287 | 38163 |
+| 29 | 120552 | 38810 |
+| 30 | 128040 | 39373 |
+| 31 | 135624 | 40500 |
+| 32 | 142580 | 40343 |
+| 33 | 148915 | 40460 |
+| 34 | 154005 | 41294 |
+| 35 | 157996 | 41730 |
+| 36 | 160806 | 41523 |
+| 37 | 162975 | 42088 |
+| 38 | 163426 | 41530 |
+| 39 | 164872 | 41789 |
+| 40 | 164477 | 42505 |
+| 41 | 164758 | 41879 |
+| 42 | 164182 | 41415 |
+| 43 | 164842 | 42119 |
+| 44 | 164881 | 42297 |
+| 45 | 164870 | 42145 |
+| 46 | 164673 | 42066 |
+| 47 | 164616 | 42051 |
+| 48 | 165055 | 41902 |
+| 49 | 164847 | 41862 |
+| 50 | 165171 | 41960 |
+| 51 | 164851 | 42089 |
+| 52 | 164763 | 41717 |
+| 53 | 164635 | 42154 |
+| 54 | 164757 | 41983 |
+| 55 | 165095 | 41419 |
+| 56 | 164641 | 42381 |
+| 57 | 164601 | 41654 |
+| 58 | 164864 | 41834 |
+| 59 | 164594 | 41920 |
+| 60 | 165207 | 42020 |
+| 61 | 165056 | 41185 |
+| 62 | 165160 | 41722 |
+| 63 | 164923 | 41702 |
+| 64 | 164777 | 41880 |
++-----+---------------+---------------+
+
+Link: https://lkml.kernel.org/r/20240426035152.956702-1-visitorckw@gmail.com
+Signed-off-by: Kuan-Wei Chiu <visitorckw@gmail.com>
+Cc: Ching-Chun (Jim) Huang <jserv@ccns.ncku.edu.tw>
+Cc: Yury Norov <yury.norov@gmail.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+---
+
+ include/linux/bitops.h | 12 ++++--------
+ 1 file changed, 4 insertions(+), 8 deletions(-)
+
+--- a/include/linux/bitops.h~bitops-optimize-fns-for-improved-performance
++++ a/include/linux/bitops.h
+@@ -254,16 +254,12 @@ static inline unsigned long __ffs64(u64
+ */
+ static inline unsigned long fns(unsigned long word, unsigned int n)
+ {
+- unsigned int bit;
++ unsigned int i;
+
+- while (word) {
+- bit = __ffs(word);
+- if (n-- == 0)
+- return bit;
+- __clear_bit(bit, &word);
+- }
++ for (i = 0; word && i < n; i++)
++ word &= word - 1;
+
+- return BITS_PER_LONG;
++ return word ? __ffs(word) : BITS_PER_LONG;
+ }
+
+ /**
+_
diff --git a/patches/kmsan-compiler_types-declare-__no_sanitize_or_inline.patch b/patches/kmsan-compiler_types-declare-__no_sanitize_or_inline.patch
new file mode 100644
index 000000000..eefee79aa
--- /dev/null
+++ b/patches/kmsan-compiler_types-declare-__no_sanitize_or_inline.patch
@@ -0,0 +1,46 @@
+From: Alexander Potapenko <glider@google.com>
+Subject: kmsan: compiler_types: declare __no_sanitize_or_inline
+Date: Fri, 26 Apr 2024 11:16:22 +0200
+
+It turned out that KMSAN instruments READ_ONCE_NOCHECK(), resulting in
+false positive reports, because __no_sanitize_or_inline enforced inlining.
+
+Properly declare __no_sanitize_or_inline under __SANITIZE_MEMORY__, so
+that it does not __always_inline the annotated function.
+
+Link: https://lkml.kernel.org/r/20240426091622.3846771-1-glider@google.com
+Fixes: 5de0ce85f5a4 ("kmsan: mark noinstr as __no_sanitize_memory")
+Signed-off-by: Alexander Potapenko <glider@google.com>
+Reported-by: syzbot+355c5bb8c1445c871ee8@syzkaller.appspotmail.com
+Link: https://lkml.kernel.org/r/000000000000826ac1061675b0e3@google.com
+Cc: <stable@vger.kernel.org>
+Reviewed-by: Marco Elver <elver@google.com>
+Cc: Dmitry Vyukov <dvyukov@google.com>
+Cc: Miguel Ojeda <ojeda@kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+---
+
+ include/linux/compiler_types.h | 11 +++++++++++
+ 1 file changed, 11 insertions(+)
+
+--- a/include/linux/compiler_types.h~kmsan-compiler_types-declare-__no_sanitize_or_inline
++++ a/include/linux/compiler_types.h
+@@ -278,6 +278,17 @@ struct ftrace_likely_data {
+ # define __no_kcsan
+ #endif
+
++#ifdef __SANITIZE_MEMORY__
++/*
++ * Similarly to KASAN and KCSAN, KMSAN loses function attributes of inlined
++ * functions, therefore disabling KMSAN checks also requires disabling inlining.
++ *
++ * __no_sanitize_or_inline effectively prevents KMSAN from reporting errors
++ * within the function and marks all its outputs as initialized.
++ */
++# define __no_sanitize_or_inline __no_kmsan_checks notrace __maybe_unused
++#endif
++
+ #ifndef __no_sanitize_or_inline
+ #define __no_sanitize_or_inline __always_inline
+ #endif
+_
diff --git a/patches/mm-vmalloc-fix-return-value-of-vb_alloc-if-size-is-0.patch b/patches/mm-vmalloc-fix-return-value-of-vb_alloc-if-size-is-0.patch
new file mode 100644
index 000000000..edf288128
--- /dev/null
+++ b/patches/mm-vmalloc-fix-return-value-of-vb_alloc-if-size-is-0.patch
@@ -0,0 +1,34 @@
+From: "Hailong.Liu" <hailong.liu@oppo.com>
+Subject: mm/vmalloc: fix return value of vb_alloc if size is 0
+Date: Fri, 26 Apr 2024 10:41:49 +0800
+
+vm_map_ram() uses IS_ERR() to validate the return value of vb_alloc(). If
+vm_map_ram(page, 0, 0) is executed, vb_alloc(0, GFP_KERNEL) would return
+NULL. In such a case, IS_ERR() cannot handle the return value and lead to
+kernel panic by vmap_pages_range_noflush() at last. To resolve this
+issue, return ERR_PTR(-EINVAL) if the size is 0.
+
+Link: https://lkml.kernel.org/r/20240426024149.21176-1-hailong.liu@oppo.com
+Reviewed-by: Barry Song <baohua@kernel.org>
+Reviewed-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
+Signed-off-by: Hailong.Liu <hailong.liu@oppo.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Cc: Lorenzo Stoakes <lstoakes@gmail.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+---
+
+ mm/vmalloc.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/mm/vmalloc.c~mm-vmalloc-fix-return-value-of-vb_alloc-if-size-is-0
++++ a/mm/vmalloc.c
+@@ -2710,7 +2710,7 @@ static void *vb_alloc(unsigned long size
+ * get_order(0) returns funny result. Just warn and terminate
+ * early.
+ */
+- return NULL;
++ return ERR_PTR(-EINVAL);
+ }
+ order = get_order(size);
+
+_
diff --git a/pc/bitops-optimize-fns-for-improved-performance.pc b/pc/bitops-optimize-fns-for-improved-performance.pc
new file mode 100644
index 000000000..2ceed3e3f
--- /dev/null
+++ b/pc/bitops-optimize-fns-for-improved-performance.pc
@@ -0,0 +1 @@
+include/linux/bitops.h
diff --git a/pc/devel-series b/pc/devel-series
index bf2e48f0d..6be538c22 100644
--- a/pc/devel-series
+++ b/pc/devel-series
@@ -141,9 +141,12 @@ maintainers-update-urls-for-keys-keyrings_integrity-and-tpm-device-driver.patch
tools-fix-userspace-compilation-with-new-test_xarray-changes.patch
lib-test_xarrayc-fix-error-assumptions-on-check_xa_multi_store_adv_add.patch
#
+kmsan-compiler_types-declare-__no_sanitize_or_inline.patch
#
mm-use-memalloc_nofs_save-in-page_cache_ra_order.patch
#
+mm-vmalloc-fix-return-value-of-vb_alloc-if-size-is-0.patch
+#
### hfe
#
#ENDBRANCH mm-hotfixes-unstable
@@ -691,7 +694,7 @@ mm-correct-calculation-of-wbs-bg_thresh-in-cgroup-domain.patch
mm-call-__wb_calc_thresh-instead-of-wb_calc_thresh-in-wb_over_bg_thresh.patch
mm-remove-stale-comment-__folio_mark_dirty.patch
#
-#mm-fix-race-between-__split_huge_pmd_locked-and-gup-fast.patch: https://lkml.kernel.org/r/922F6794-1C32-4862-8A67-90D4F7DBC474@nvidia.com
+#mm-fix-race-between-__split_huge_pmd_locked-and-gup-fast.patch: https://lkml.kernel.org/r/922F6794-1C32-4862-8A67-90D4F7DBC474@nvidia.com check review
mm-fix-race-between-__split_huge_pmd_locked-and-gup-fast.patch
#
mm-simplify-thp_vma_allowable_order.patch
@@ -879,4 +882,6 @@ scripts-gdb-fix-parameter-handling-in-lx_per_cpu.patch
scripts-gdb-make-get_thread_info-accept-pointers.patch
scripts-gdb-fix-detection-of-current-cpu-in-kgdb.patch
#
+bitops-optimize-fns-for-improved-performance.patch
+#
#ENDBRANCH mm-nonmm-unstable
diff --git a/pc/kmsan-compiler_types-declare-__no_sanitize_or_inline.pc b/pc/kmsan-compiler_types-declare-__no_sanitize_or_inline.pc
new file mode 100644
index 000000000..32ec11627
--- /dev/null
+++ b/pc/kmsan-compiler_types-declare-__no_sanitize_or_inline.pc
@@ -0,0 +1 @@
+include/linux/compiler_types.h
diff --git a/pc/mm-vmalloc-fix-return-value-of-vb_alloc-if-size-is-0.pc b/pc/mm-vmalloc-fix-return-value-of-vb_alloc-if-size-is-0.pc
new file mode 100644
index 000000000..ba2d8ce46
--- /dev/null
+++ b/pc/mm-vmalloc-fix-return-value-of-vb_alloc-if-size-is-0.pc
@@ -0,0 +1 @@
+mm/vmalloc.c
diff --git a/txt/bitops-optimize-fns-for-improved-performance.txt b/txt/bitops-optimize-fns-for-improved-performance.txt
new file mode 100644
index 000000000..40c10ca9b
--- /dev/null
+++ b/txt/bitops-optimize-fns-for-improved-performance.txt
@@ -0,0 +1,95 @@
+From: Kuan-Wei Chiu <visitorckw@gmail.com>
+Subject: bitops: optimize fns() for improved performance
+Date: Fri, 26 Apr 2024 11:51:52 +0800
+
+The current fns() repeatedly uses __ffs() to find the index of the least
+significant bit and then clears the corresponding bit using __clear_bit().
+The method for clearing the least significant bit can be optimized by
+using word &= word - 1 instead.
+
+Typically, the execution time of one __ffs() plus one __clear_bit() is
+longer than that of a bitwise AND operation and a subtraction. To improve
+performance, the loop for clearing the least significant bit has been
+replaced with word &= word - 1, followed by a single __ffs() operation to
+obtain the answer. This change reduces the number of __ffs() iterations
+from n to just one, enhancing overall performance.
+
+The following microbenchmark data, conducted on my x86-64 machine, shows
+the execution time (in microseconds) required for 1000000 test data
+generated by get_random_u64() and executed by fns() under different values
+of n:
+
++-----+---------------+---------------+
+| n | time_old | time_new |
++-----+---------------+---------------+
+| 0 | 29194 | 25878 |
+| 1 | 25510 | 25497 |
+| 2 | 27836 | 25721 |
+| 3 | 30140 | 25673 |
+| 4 | 32569 | 25426 |
+| 5 | 34792 | 25690 |
+| 6 | 37117 | 25651 |
+| 7 | 39742 | 25383 |
+| 8 | 42360 | 25657 |
+| 9 | 44672 | 25897 |
+| 10 | 47237 | 25819 |
+| 11 | 49884 | 26530 |
+| 12 | 51864 | 26647 |
+| 13 | 54265 | 28915 |
+| 14 | 56440 | 28373 |
+| 15 | 58839 | 28616 |
+| 16 | 62383 | 29128 |
+| 17 | 64257 | 30041 |
+| 18 | 66805 | 29773 |
+| 19 | 69368 | 33203 |
+| 20 | 72942 | 33688 |
+| 21 | 77006 | 34518 |
+| 22 | 80926 | 34298 |
+| 23 | 85723 | 35586 |
+| 24 | 90324 | 36376 |
+| 25 | 95992 | 37465 |
+| 26 | 101101 | 37599 |
+| 27 | 106520 | 37466 |
+| 28 | 113287 | 38163 |
+| 29 | 120552 | 38810 |
+| 30 | 128040 | 39373 |
+| 31 | 135624 | 40500 |
+| 32 | 142580 | 40343 |
+| 33 | 148915 | 40460 |
+| 34 | 154005 | 41294 |
+| 35 | 157996 | 41730 |
+| 36 | 160806 | 41523 |
+| 37 | 162975 | 42088 |
+| 38 | 163426 | 41530 |
+| 39 | 164872 | 41789 |
+| 40 | 164477 | 42505 |
+| 41 | 164758 | 41879 |
+| 42 | 164182 | 41415 |
+| 43 | 164842 | 42119 |
+| 44 | 164881 | 42297 |
+| 45 | 164870 | 42145 |
+| 46 | 164673 | 42066 |
+| 47 | 164616 | 42051 |
+| 48 | 165055 | 41902 |
+| 49 | 164847 | 41862 |
+| 50 | 165171 | 41960 |
+| 51 | 164851 | 42089 |
+| 52 | 164763 | 41717 |
+| 53 | 164635 | 42154 |
+| 54 | 164757 | 41983 |
+| 55 | 165095 | 41419 |
+| 56 | 164641 | 42381 |
+| 57 | 164601 | 41654 |
+| 58 | 164864 | 41834 |
+| 59 | 164594 | 41920 |
+| 60 | 165207 | 42020 |
+| 61 | 165056 | 41185 |
+| 62 | 165160 | 41722 |
+| 63 | 164923 | 41702 |
+| 64 | 164777 | 41880 |
++-----+---------------+---------------+
+
+Link: https://lkml.kernel.org/r/20240426035152.956702-1-visitorckw@gmail.com
+Signed-off-by: Kuan-Wei Chiu <visitorckw@gmail.com>
+Cc: Ching-Chun (Jim) Huang <jserv@ccns.ncku.edu.tw>
+Cc: Yury Norov <yury.norov@gmail.com>
diff --git a/txt/kmsan-compiler_types-declare-__no_sanitize_or_inline.txt b/txt/kmsan-compiler_types-declare-__no_sanitize_or_inline.txt
new file mode 100644
index 000000000..28332b784
--- /dev/null
+++ b/txt/kmsan-compiler_types-declare-__no_sanitize_or_inline.txt
@@ -0,0 +1,19 @@
+From: Alexander Potapenko <glider@google.com>
+Subject: kmsan: compiler_types: declare __no_sanitize_or_inline
+Date: Fri, 26 Apr 2024 11:16:22 +0200
+
+It turned out that KMSAN instruments READ_ONCE_NOCHECK(), resulting in
+false positive reports, because __no_sanitize_or_inline enforced inlining.
+
+Properly declare __no_sanitize_or_inline under __SANITIZE_MEMORY__, so
+that it does not __always_inline the annotated function.
+
+Link: https://lkml.kernel.org/r/20240426091622.3846771-1-glider@google.com
+Fixes: 5de0ce85f5a4 ("kmsan: mark noinstr as __no_sanitize_memory")
+Signed-off-by: Alexander Potapenko <glider@google.com>
+Reported-by: syzbot+355c5bb8c1445c871ee8@syzkaller.appspotmail.com
+Link: https://lkml.kernel.org/r/000000000000826ac1061675b0e3@google.com
+Cc: <stable@vger.kernel.org>
+Reviewed-by: Marco Elver <elver@google.com>
+Cc: Dmitry Vyukov <dvyukov@google.com>
+Cc: Miguel Ojeda <ojeda@kernel.org>
diff --git a/txt/mm-vmalloc-fix-return-value-of-vb_alloc-if-size-is-0.txt b/txt/mm-vmalloc-fix-return-value-of-vb_alloc-if-size-is-0.txt
new file mode 100644
index 000000000..3029b5d0c
--- /dev/null
+++ b/txt/mm-vmalloc-fix-return-value-of-vb_alloc-if-size-is-0.txt
@@ -0,0 +1,16 @@
+From: "Hailong.Liu" <hailong.liu@oppo.com>
+Subject: mm/vmalloc: fix return value of vb_alloc if size is 0
+Date: Fri, 26 Apr 2024 10:41:49 +0800
+
+vm_map_ram() uses IS_ERR() to validate the return value of vb_alloc(). If
+vm_map_ram(page, 0, 0) is executed, vb_alloc(0, GFP_KERNEL) would return
+NULL. In such a case, IS_ERR() cannot handle the return value and lead to
+kernel panic by vmap_pages_range_noflush() at last. To resolve this
+issue, return ERR_PTR(-EINVAL) if the size is 0.
+
+Link: https://lkml.kernel.org/r/20240426024149.21176-1-hailong.liu@oppo.com
+Reviewed-by: Barry Song <baohua@kernel.org>
+Reviewed-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
+Signed-off-by: Hailong.Liu <hailong.liu@oppo.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Cc: Lorenzo Stoakes <lstoakes@gmail.com>