diff options
author | Andrew Morton <akpm@linux-foundation.org> | 2024-04-26 12:14:25 -0700 |
---|---|---|
committer | Andrew Morton <akpm@linux-foundation.org> | 2024-04-26 12:14:25 -0700 |
commit | e8ced5e0b21645db3c6662e0654ea9bccdffe341 (patch) | |
tree | 7a61c41a58bc8a3bd451692dab334d2c1c19d657 | |
parent | 8b6b345ef694e544a7866d82e397bde667607d48 (diff) | |
download | 25-new-e8ced5e0b21645db3c6662e0654ea9bccdffe341.tar.gz |
foo
10 files changed, 344 insertions, 1 deletions
diff --git a/patches/bitops-optimize-fns-for-improved-performance.patch b/patches/bitops-optimize-fns-for-improved-performance.patch new file mode 100644 index 000000000..edfd0c9bb --- /dev/null +++ b/patches/bitops-optimize-fns-for-improved-performance.patch @@ -0,0 +1,125 @@ +From: Kuan-Wei Chiu <visitorckw@gmail.com> +Subject: bitops: optimize fns() for improved performance +Date: Fri, 26 Apr 2024 11:51:52 +0800 + +The current fns() repeatedly uses __ffs() to find the index of the least +significant bit and then clears the corresponding bit using __clear_bit(). +The method for clearing the least significant bit can be optimized by +using word &= word - 1 instead. + +Typically, the execution time of one __ffs() plus one __clear_bit() is +longer than that of a bitwise AND operation and a subtraction. To improve +performance, the loop for clearing the least significant bit has been +replaced with word &= word - 1, followed by a single __ffs() operation to +obtain the answer. This change reduces the number of __ffs() iterations +from n to just one, enhancing overall performance. + +The following microbenchmark data, conducted on my x86-64 machine, shows +the execution time (in microseconds) required for 1000000 test data +generated by get_random_u64() and executed by fns() under different values +of n: + ++-----+---------------+---------------+ +| n | time_old | time_new | ++-----+---------------+---------------+ +| 0 | 29194 | 25878 | +| 1 | 25510 | 25497 | +| 2 | 27836 | 25721 | +| 3 | 30140 | 25673 | +| 4 | 32569 | 25426 | +| 5 | 34792 | 25690 | +| 6 | 37117 | 25651 | +| 7 | 39742 | 25383 | +| 8 | 42360 | 25657 | +| 9 | 44672 | 25897 | +| 10 | 47237 | 25819 | +| 11 | 49884 | 26530 | +| 12 | 51864 | 26647 | +| 13 | 54265 | 28915 | +| 14 | 56440 | 28373 | +| 15 | 58839 | 28616 | +| 16 | 62383 | 29128 | +| 17 | 64257 | 30041 | +| 18 | 66805 | 29773 | +| 19 | 69368 | 33203 | +| 20 | 72942 | 33688 | +| 21 | 77006 | 34518 | +| 22 | 80926 | 34298 | +| 23 | 85723 | 35586 | +| 24 | 90324 | 36376 | +| 25 | 95992 | 37465 | +| 26 | 101101 | 37599 | +| 27 | 106520 | 37466 | +| 28 | 113287 | 38163 | +| 29 | 120552 | 38810 | +| 30 | 128040 | 39373 | +| 31 | 135624 | 40500 | +| 32 | 142580 | 40343 | +| 33 | 148915 | 40460 | +| 34 | 154005 | 41294 | +| 35 | 157996 | 41730 | +| 36 | 160806 | 41523 | +| 37 | 162975 | 42088 | +| 38 | 163426 | 41530 | +| 39 | 164872 | 41789 | +| 40 | 164477 | 42505 | +| 41 | 164758 | 41879 | +| 42 | 164182 | 41415 | +| 43 | 164842 | 42119 | +| 44 | 164881 | 42297 | +| 45 | 164870 | 42145 | +| 46 | 164673 | 42066 | +| 47 | 164616 | 42051 | +| 48 | 165055 | 41902 | +| 49 | 164847 | 41862 | +| 50 | 165171 | 41960 | +| 51 | 164851 | 42089 | +| 52 | 164763 | 41717 | +| 53 | 164635 | 42154 | +| 54 | 164757 | 41983 | +| 55 | 165095 | 41419 | +| 56 | 164641 | 42381 | +| 57 | 164601 | 41654 | +| 58 | 164864 | 41834 | +| 59 | 164594 | 41920 | +| 60 | 165207 | 42020 | +| 61 | 165056 | 41185 | +| 62 | 165160 | 41722 | +| 63 | 164923 | 41702 | +| 64 | 164777 | 41880 | ++-----+---------------+---------------+ + +Link: https://lkml.kernel.org/r/20240426035152.956702-1-visitorckw@gmail.com +Signed-off-by: Kuan-Wei Chiu <visitorckw@gmail.com> +Cc: Ching-Chun (Jim) Huang <jserv@ccns.ncku.edu.tw> +Cc: Yury Norov <yury.norov@gmail.com> +Signed-off-by: Andrew Morton <akpm@linux-foundation.org> +--- + + include/linux/bitops.h | 12 ++++-------- + 1 file changed, 4 insertions(+), 8 deletions(-) + +--- a/include/linux/bitops.h~bitops-optimize-fns-for-improved-performance ++++ a/include/linux/bitops.h +@@ -254,16 +254,12 @@ static inline unsigned long __ffs64(u64 + */ + static inline unsigned long fns(unsigned long word, unsigned int n) + { +- unsigned int bit; ++ unsigned int i; + +- while (word) { +- bit = __ffs(word); +- if (n-- == 0) +- return bit; +- __clear_bit(bit, &word); +- } ++ for (i = 0; word && i < n; i++) ++ word &= word - 1; + +- return BITS_PER_LONG; ++ return word ? __ffs(word) : BITS_PER_LONG; + } + + /** +_ diff --git a/patches/kmsan-compiler_types-declare-__no_sanitize_or_inline.patch b/patches/kmsan-compiler_types-declare-__no_sanitize_or_inline.patch new file mode 100644 index 000000000..eefee79aa --- /dev/null +++ b/patches/kmsan-compiler_types-declare-__no_sanitize_or_inline.patch @@ -0,0 +1,46 @@ +From: Alexander Potapenko <glider@google.com> +Subject: kmsan: compiler_types: declare __no_sanitize_or_inline +Date: Fri, 26 Apr 2024 11:16:22 +0200 + +It turned out that KMSAN instruments READ_ONCE_NOCHECK(), resulting in +false positive reports, because __no_sanitize_or_inline enforced inlining. + +Properly declare __no_sanitize_or_inline under __SANITIZE_MEMORY__, so +that it does not __always_inline the annotated function. + +Link: https://lkml.kernel.org/r/20240426091622.3846771-1-glider@google.com +Fixes: 5de0ce85f5a4 ("kmsan: mark noinstr as __no_sanitize_memory") +Signed-off-by: Alexander Potapenko <glider@google.com> +Reported-by: syzbot+355c5bb8c1445c871ee8@syzkaller.appspotmail.com +Link: https://lkml.kernel.org/r/000000000000826ac1061675b0e3@google.com +Cc: <stable@vger.kernel.org> +Reviewed-by: Marco Elver <elver@google.com> +Cc: Dmitry Vyukov <dvyukov@google.com> +Cc: Miguel Ojeda <ojeda@kernel.org> +Signed-off-by: Andrew Morton <akpm@linux-foundation.org> +--- + + include/linux/compiler_types.h | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +--- a/include/linux/compiler_types.h~kmsan-compiler_types-declare-__no_sanitize_or_inline ++++ a/include/linux/compiler_types.h +@@ -278,6 +278,17 @@ struct ftrace_likely_data { + # define __no_kcsan + #endif + ++#ifdef __SANITIZE_MEMORY__ ++/* ++ * Similarly to KASAN and KCSAN, KMSAN loses function attributes of inlined ++ * functions, therefore disabling KMSAN checks also requires disabling inlining. ++ * ++ * __no_sanitize_or_inline effectively prevents KMSAN from reporting errors ++ * within the function and marks all its outputs as initialized. ++ */ ++# define __no_sanitize_or_inline __no_kmsan_checks notrace __maybe_unused ++#endif ++ + #ifndef __no_sanitize_or_inline + #define __no_sanitize_or_inline __always_inline + #endif +_ diff --git a/patches/mm-vmalloc-fix-return-value-of-vb_alloc-if-size-is-0.patch b/patches/mm-vmalloc-fix-return-value-of-vb_alloc-if-size-is-0.patch new file mode 100644 index 000000000..edf288128 --- /dev/null +++ b/patches/mm-vmalloc-fix-return-value-of-vb_alloc-if-size-is-0.patch @@ -0,0 +1,34 @@ +From: "Hailong.Liu" <hailong.liu@oppo.com> +Subject: mm/vmalloc: fix return value of vb_alloc if size is 0 +Date: Fri, 26 Apr 2024 10:41:49 +0800 + +vm_map_ram() uses IS_ERR() to validate the return value of vb_alloc(). If +vm_map_ram(page, 0, 0) is executed, vb_alloc(0, GFP_KERNEL) would return +NULL. In such a case, IS_ERR() cannot handle the return value and lead to +kernel panic by vmap_pages_range_noflush() at last. To resolve this +issue, return ERR_PTR(-EINVAL) if the size is 0. + +Link: https://lkml.kernel.org/r/20240426024149.21176-1-hailong.liu@oppo.com +Reviewed-by: Barry Song <baohua@kernel.org> +Reviewed-by: Uladzislau Rezki (Sony) <urezki@gmail.com> +Signed-off-by: Hailong.Liu <hailong.liu@oppo.com> +Reviewed-by: Christoph Hellwig <hch@lst.de> +Cc: Lorenzo Stoakes <lstoakes@gmail.com> +Signed-off-by: Andrew Morton <akpm@linux-foundation.org> +--- + + mm/vmalloc.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/mm/vmalloc.c~mm-vmalloc-fix-return-value-of-vb_alloc-if-size-is-0 ++++ a/mm/vmalloc.c +@@ -2710,7 +2710,7 @@ static void *vb_alloc(unsigned long size + * get_order(0) returns funny result. Just warn and terminate + * early. + */ +- return NULL; ++ return ERR_PTR(-EINVAL); + } + order = get_order(size); + +_ diff --git a/pc/bitops-optimize-fns-for-improved-performance.pc b/pc/bitops-optimize-fns-for-improved-performance.pc new file mode 100644 index 000000000..2ceed3e3f --- /dev/null +++ b/pc/bitops-optimize-fns-for-improved-performance.pc @@ -0,0 +1 @@ +include/linux/bitops.h diff --git a/pc/devel-series b/pc/devel-series index bf2e48f0d..6be538c22 100644 --- a/pc/devel-series +++ b/pc/devel-series @@ -141,9 +141,12 @@ maintainers-update-urls-for-keys-keyrings_integrity-and-tpm-device-driver.patch tools-fix-userspace-compilation-with-new-test_xarray-changes.patch lib-test_xarrayc-fix-error-assumptions-on-check_xa_multi_store_adv_add.patch # +kmsan-compiler_types-declare-__no_sanitize_or_inline.patch # mm-use-memalloc_nofs_save-in-page_cache_ra_order.patch # +mm-vmalloc-fix-return-value-of-vb_alloc-if-size-is-0.patch +# ### hfe # #ENDBRANCH mm-hotfixes-unstable @@ -691,7 +694,7 @@ mm-correct-calculation-of-wbs-bg_thresh-in-cgroup-domain.patch mm-call-__wb_calc_thresh-instead-of-wb_calc_thresh-in-wb_over_bg_thresh.patch mm-remove-stale-comment-__folio_mark_dirty.patch # -#mm-fix-race-between-__split_huge_pmd_locked-and-gup-fast.patch: https://lkml.kernel.org/r/922F6794-1C32-4862-8A67-90D4F7DBC474@nvidia.com +#mm-fix-race-between-__split_huge_pmd_locked-and-gup-fast.patch: https://lkml.kernel.org/r/922F6794-1C32-4862-8A67-90D4F7DBC474@nvidia.com check review mm-fix-race-between-__split_huge_pmd_locked-and-gup-fast.patch # mm-simplify-thp_vma_allowable_order.patch @@ -879,4 +882,6 @@ scripts-gdb-fix-parameter-handling-in-lx_per_cpu.patch scripts-gdb-make-get_thread_info-accept-pointers.patch scripts-gdb-fix-detection-of-current-cpu-in-kgdb.patch # +bitops-optimize-fns-for-improved-performance.patch +# #ENDBRANCH mm-nonmm-unstable diff --git a/pc/kmsan-compiler_types-declare-__no_sanitize_or_inline.pc b/pc/kmsan-compiler_types-declare-__no_sanitize_or_inline.pc new file mode 100644 index 000000000..32ec11627 --- /dev/null +++ b/pc/kmsan-compiler_types-declare-__no_sanitize_or_inline.pc @@ -0,0 +1 @@ +include/linux/compiler_types.h diff --git a/pc/mm-vmalloc-fix-return-value-of-vb_alloc-if-size-is-0.pc b/pc/mm-vmalloc-fix-return-value-of-vb_alloc-if-size-is-0.pc new file mode 100644 index 000000000..ba2d8ce46 --- /dev/null +++ b/pc/mm-vmalloc-fix-return-value-of-vb_alloc-if-size-is-0.pc @@ -0,0 +1 @@ +mm/vmalloc.c diff --git a/txt/bitops-optimize-fns-for-improved-performance.txt b/txt/bitops-optimize-fns-for-improved-performance.txt new file mode 100644 index 000000000..40c10ca9b --- /dev/null +++ b/txt/bitops-optimize-fns-for-improved-performance.txt @@ -0,0 +1,95 @@ +From: Kuan-Wei Chiu <visitorckw@gmail.com> +Subject: bitops: optimize fns() for improved performance +Date: Fri, 26 Apr 2024 11:51:52 +0800 + +The current fns() repeatedly uses __ffs() to find the index of the least +significant bit and then clears the corresponding bit using __clear_bit(). +The method for clearing the least significant bit can be optimized by +using word &= word - 1 instead. + +Typically, the execution time of one __ffs() plus one __clear_bit() is +longer than that of a bitwise AND operation and a subtraction. To improve +performance, the loop for clearing the least significant bit has been +replaced with word &= word - 1, followed by a single __ffs() operation to +obtain the answer. This change reduces the number of __ffs() iterations +from n to just one, enhancing overall performance. + +The following microbenchmark data, conducted on my x86-64 machine, shows +the execution time (in microseconds) required for 1000000 test data +generated by get_random_u64() and executed by fns() under different values +of n: + ++-----+---------------+---------------+ +| n | time_old | time_new | ++-----+---------------+---------------+ +| 0 | 29194 | 25878 | +| 1 | 25510 | 25497 | +| 2 | 27836 | 25721 | +| 3 | 30140 | 25673 | +| 4 | 32569 | 25426 | +| 5 | 34792 | 25690 | +| 6 | 37117 | 25651 | +| 7 | 39742 | 25383 | +| 8 | 42360 | 25657 | +| 9 | 44672 | 25897 | +| 10 | 47237 | 25819 | +| 11 | 49884 | 26530 | +| 12 | 51864 | 26647 | +| 13 | 54265 | 28915 | +| 14 | 56440 | 28373 | +| 15 | 58839 | 28616 | +| 16 | 62383 | 29128 | +| 17 | 64257 | 30041 | +| 18 | 66805 | 29773 | +| 19 | 69368 | 33203 | +| 20 | 72942 | 33688 | +| 21 | 77006 | 34518 | +| 22 | 80926 | 34298 | +| 23 | 85723 | 35586 | +| 24 | 90324 | 36376 | +| 25 | 95992 | 37465 | +| 26 | 101101 | 37599 | +| 27 | 106520 | 37466 | +| 28 | 113287 | 38163 | +| 29 | 120552 | 38810 | +| 30 | 128040 | 39373 | +| 31 | 135624 | 40500 | +| 32 | 142580 | 40343 | +| 33 | 148915 | 40460 | +| 34 | 154005 | 41294 | +| 35 | 157996 | 41730 | +| 36 | 160806 | 41523 | +| 37 | 162975 | 42088 | +| 38 | 163426 | 41530 | +| 39 | 164872 | 41789 | +| 40 | 164477 | 42505 | +| 41 | 164758 | 41879 | +| 42 | 164182 | 41415 | +| 43 | 164842 | 42119 | +| 44 | 164881 | 42297 | +| 45 | 164870 | 42145 | +| 46 | 164673 | 42066 | +| 47 | 164616 | 42051 | +| 48 | 165055 | 41902 | +| 49 | 164847 | 41862 | +| 50 | 165171 | 41960 | +| 51 | 164851 | 42089 | +| 52 | 164763 | 41717 | +| 53 | 164635 | 42154 | +| 54 | 164757 | 41983 | +| 55 | 165095 | 41419 | +| 56 | 164641 | 42381 | +| 57 | 164601 | 41654 | +| 58 | 164864 | 41834 | +| 59 | 164594 | 41920 | +| 60 | 165207 | 42020 | +| 61 | 165056 | 41185 | +| 62 | 165160 | 41722 | +| 63 | 164923 | 41702 | +| 64 | 164777 | 41880 | ++-----+---------------+---------------+ + +Link: https://lkml.kernel.org/r/20240426035152.956702-1-visitorckw@gmail.com +Signed-off-by: Kuan-Wei Chiu <visitorckw@gmail.com> +Cc: Ching-Chun (Jim) Huang <jserv@ccns.ncku.edu.tw> +Cc: Yury Norov <yury.norov@gmail.com> diff --git a/txt/kmsan-compiler_types-declare-__no_sanitize_or_inline.txt b/txt/kmsan-compiler_types-declare-__no_sanitize_or_inline.txt new file mode 100644 index 000000000..28332b784 --- /dev/null +++ b/txt/kmsan-compiler_types-declare-__no_sanitize_or_inline.txt @@ -0,0 +1,19 @@ +From: Alexander Potapenko <glider@google.com> +Subject: kmsan: compiler_types: declare __no_sanitize_or_inline +Date: Fri, 26 Apr 2024 11:16:22 +0200 + +It turned out that KMSAN instruments READ_ONCE_NOCHECK(), resulting in +false positive reports, because __no_sanitize_or_inline enforced inlining. + +Properly declare __no_sanitize_or_inline under __SANITIZE_MEMORY__, so +that it does not __always_inline the annotated function. + +Link: https://lkml.kernel.org/r/20240426091622.3846771-1-glider@google.com +Fixes: 5de0ce85f5a4 ("kmsan: mark noinstr as __no_sanitize_memory") +Signed-off-by: Alexander Potapenko <glider@google.com> +Reported-by: syzbot+355c5bb8c1445c871ee8@syzkaller.appspotmail.com +Link: https://lkml.kernel.org/r/000000000000826ac1061675b0e3@google.com +Cc: <stable@vger.kernel.org> +Reviewed-by: Marco Elver <elver@google.com> +Cc: Dmitry Vyukov <dvyukov@google.com> +Cc: Miguel Ojeda <ojeda@kernel.org> diff --git a/txt/mm-vmalloc-fix-return-value-of-vb_alloc-if-size-is-0.txt b/txt/mm-vmalloc-fix-return-value-of-vb_alloc-if-size-is-0.txt new file mode 100644 index 000000000..3029b5d0c --- /dev/null +++ b/txt/mm-vmalloc-fix-return-value-of-vb_alloc-if-size-is-0.txt @@ -0,0 +1,16 @@ +From: "Hailong.Liu" <hailong.liu@oppo.com> +Subject: mm/vmalloc: fix return value of vb_alloc if size is 0 +Date: Fri, 26 Apr 2024 10:41:49 +0800 + +vm_map_ram() uses IS_ERR() to validate the return value of vb_alloc(). If +vm_map_ram(page, 0, 0) is executed, vb_alloc(0, GFP_KERNEL) would return +NULL. In such a case, IS_ERR() cannot handle the return value and lead to +kernel panic by vmap_pages_range_noflush() at last. To resolve this +issue, return ERR_PTR(-EINVAL) if the size is 0. + +Link: https://lkml.kernel.org/r/20240426024149.21176-1-hailong.liu@oppo.com +Reviewed-by: Barry Song <baohua@kernel.org> +Reviewed-by: Uladzislau Rezki (Sony) <urezki@gmail.com> +Signed-off-by: Hailong.Liu <hailong.liu@oppo.com> +Reviewed-by: Christoph Hellwig <hch@lst.de> +Cc: Lorenzo Stoakes <lstoakes@gmail.com> |