summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrew Morton <akpm@linux-foundation.org>2024-04-18 13:42:04 -0700
committerAndrew Morton <akpm@linux-foundation.org>2024-04-18 13:42:04 -0700
commit578a2c2d8e5c25cc32ea3ab3515c903b7c45ba04 (patch)
tree44ef1c7e567c0114204658f181b4dd971daede14
parentc65c0e14247748216c988a1b18897d1258afaaf7 (diff)
download25-new-578a2c2d8e5c25cc32ea3ab3515c903b7c45ba04.tar.gz
foo
-rw-r--r--patches/crash-add-prefix-for-crash-dumping-messages.patch56
-rw-r--r--patches/hugetlb-check-for-anon_vma-prior-to-folio-allocation.patch58
-rw-r--r--patches/hugetlb-convert-hugetlb_no_page-to-use-struct-vm_fault.patch20
-rw-r--r--patches/init-fix-allocated-page-overlapping-with-ptr_err.patch66
-rw-r--r--patches/mm-arm64-override-clear_young_dirty_ptes-batch-helper.patch154
-rw-r--r--patches/mm-huge_memory-improve-split_huge_page_to_list_to_order-return-value-documentation.patch51
-rw-r--r--patches/mm-madvise-introduce-clear_young_dirty_ptes-batch-helper.patch190
-rw-r--r--patches/mm-madvise-optimize-lazyfreeing-with-mthp-in-madvise_free.patch172
-rw-r--r--patches/mm-memory-add-any_dirty-optional-pointer-to-folio_pte_batch.patch142
-rw-r--r--patches/mm-swapfile-check-usable-swap-device-in-__folio_throttle_swaprate.patch58
-rw-r--r--patches/mm-zswap-fix-shrinker-null-crash-with-cgroup_disable=memory.patch76
-rw-r--r--patches/old/null-pointer-dereference-while-shrinking-zswap.patch (renamed from patches/null-pointer-dereference-while-shrinking-zswap.patch)0
-rw-r--r--patches/stackdepot-respect-__gfp_nolockdep-allocation-flag.patch90
-rw-r--r--pc/crash-add-prefix-for-crash-dumping-messages.pc2
-rw-r--r--pc/devel-series33
-rw-r--r--pc/hugetlb-check-for-anon_vma-prior-to-folio-allocation.pc1
-rw-r--r--pc/init-fix-allocated-page-overlapping-with-ptr_err.pc1
-rw-r--r--pc/mm-arm64-override-clear_young_dirty_ptes-batch-helper.pc2
-rw-r--r--pc/mm-huge_memory-improve-split_huge_page_to_list_to_order-return-value-documentation.pc1
-rw-r--r--pc/mm-madvise-introduce-clear_young_dirty_ptes-batch-helper.pc3
-rw-r--r--pc/mm-madvise-optimize-lazyfreeing-with-mthp-in-madvise_free.pc1
-rw-r--r--pc/mm-memory-add-any_dirty-optional-pointer-to-folio_pte_batch.pc3
-rw-r--r--pc/mm-swapfile-check-usable-swap-device-in-__folio_throttle_swaprate.pc1
-rw-r--r--pc/mm-zswap-fix-shrinker-null-crash-with-cgroup_disable=memory.pc (renamed from pc/null-pointer-dereference-while-shrinking-zswap.pc)0
-rw-r--r--pc/stackdepot-respect-__gfp_nolockdep-allocation-flag.pc1
-rw-r--r--txt/crash-add-prefix-for-crash-dumping-messages.txt17
-rw-r--r--txt/hugetlb-check-for-anon_vma-prior-to-folio-allocation.txt19
-rw-r--r--txt/init-fix-allocated-page-overlapping-with-ptr_err.txt49
-rw-r--r--txt/mm-arm64-override-clear_young_dirty_ptes-batch-helper.txt23
-rw-r--r--txt/mm-create-folio_flag_false-and-folio_type_ops-macros.txt2
-rw-r--r--txt/mm-huge_memory-improve-split_huge_page_to_list_to_order-return-value-documentation.txt15
-rw-r--r--txt/mm-madvise-introduce-clear_young_dirty_ptes-batch-helper.txt61
-rw-r--r--txt/mm-madvise-optimize-lazyfreeing-with-mthp-in-madvise_free.txt47
-rw-r--r--txt/mm-memory-add-any_dirty-optional-pointer-to-folio_pte_batch.txt24
-rw-r--r--txt/mm-page_table_check-support-userfault-wr-protect-entries.txt2
-rw-r--r--txt/mm-support-page_mapcount-on-page_has_type-pages.txt2
-rw-r--r--txt/mm-swapfile-check-usable-swap-device-in-__folio_throttle_swaprate.txt18
-rw-r--r--txt/mm-zswap-fix-shrinker-null-crash-with-cgroup_disable=memory.txt35
-rw-r--r--txt/old/null-pointer-dereference-while-shrinking-zswap.txt (renamed from txt/null-pointer-dereference-while-shrinking-zswap.txt)0
-rw-r--r--txt/stackdepot-respect-__gfp_nolockdep-allocation-flag.txt68
40 files changed, 1548 insertions, 16 deletions
diff --git a/patches/crash-add-prefix-for-crash-dumping-messages.patch b/patches/crash-add-prefix-for-crash-dumping-messages.patch
new file mode 100644
index 000000000..8e0449aa0
--- /dev/null
+++ b/patches/crash-add-prefix-for-crash-dumping-messages.patch
@@ -0,0 +1,56 @@
+From: Baoquan He <bhe@redhat.com>
+Subject: crash: add prefix for crash dumping messages
+Date: Thu, 18 Apr 2024 11:58:43 +0800
+
+Add pr_fmt() to kernel/crash_core.c to add the module name to debugging
+message printed as prefix.
+
+And also add prefix 'crashkernel:' to two lines of message printing code
+in kernel/crash_reserve.c. In kernel/crash_reserve.c, almost all
+debugging messages have 'crashkernel:' prefix or there's keyword
+crashkernel at the beginning or in the middle, adding pr_fmt() makes it
+redundant.
+
+Link: https://lkml.kernel.org/r/20240418035843.1562887-1-bhe@redhat.com
+Signed-off-by: Baoquan He <bhe@redhat.com>
+Cc: Dave Young <dyoung@redhat.com>
+Cc: Jiri Slaby <jirislaby@kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+---
+
+ kernel/crash_core.c | 2 ++
+ kernel/crash_reserve.c | 4 ++--
+ 2 files changed, 4 insertions(+), 2 deletions(-)
+
+--- a/kernel/crash_core.c~crash-add-prefix-for-crash-dumping-messages
++++ a/kernel/crash_core.c
+@@ -4,6 +4,8 @@
+ * Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com>
+ */
+
++#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
++
+ #include <linux/buildid.h>
+ #include <linux/init.h>
+ #include <linux/utsname.h>
+--- a/kernel/crash_reserve.c~crash-add-prefix-for-crash-dumping-messages
++++ a/kernel/crash_reserve.c
+@@ -109,7 +109,7 @@ static int __init parse_crashkernel_mem(
+
+ size = memparse(cur, &tmp);
+ if (cur == tmp) {
+- pr_warn("Memory value expected\n");
++ pr_warn("crashkernel: Memory value expected\n");
+ return -EINVAL;
+ }
+ cur = tmp;
+@@ -132,7 +132,7 @@ static int __init parse_crashkernel_mem(
+ cur++;
+ *crash_base = memparse(cur, &tmp);
+ if (cur == tmp) {
+- pr_warn("Memory value expected after '@'\n");
++ pr_warn("crahskernel: Memory value expected after '@'\n");
+ return -EINVAL;
+ }
+ }
+_
diff --git a/patches/hugetlb-check-for-anon_vma-prior-to-folio-allocation.patch b/patches/hugetlb-check-for-anon_vma-prior-to-folio-allocation.patch
new file mode 100644
index 000000000..8448eb11a
--- /dev/null
+++ b/patches/hugetlb-check-for-anon_vma-prior-to-folio-allocation.patch
@@ -0,0 +1,58 @@
+From: "Vishal Moola (Oracle)" <vishal.moola@gmail.com>
+Subject: hugetlb: check for anon_vma prior to folio allocation
+Date: Mon, 15 Apr 2024 14:17:47 -0700
+
+Commit 9acad7ba3e25 ("hugetlb: use vmf_anon_prepare() instead of
+anon_vma_prepare()") may bailout after allocating a folio if we do not
+hold the mmap lock. When this occurs, vmf_anon_prepare() will release the
+vma lock. Hugetlb then attempts to call restore_reserve_on_error(), which
+depends on the vma lock being held.
+
+We can move vmf_anon_prepare() prior to the folio allocation in order to
+avoid calling restore_reserve_on_error() without the vma lock.
+
+Link: https://lkml.kernel.org/r/ZiFqSrSRLhIV91og@fedora
+Fixes: 9acad7ba3e25 ("hugetlb: use vmf_anon_prepare() instead of anon_vma_prepare()")
+Reported-by: syzbot+ad1b592fc4483655438b@syzkaller.appspotmail.com
+Signed-off-by: Vishal Moola (Oracle) <vishal.moola@gmail.com>
+Cc: Muchun Song <muchun.song@linux.dev>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+---
+
+ mm/hugetlb.c | 11 +++++++----
+ 1 file changed, 7 insertions(+), 4 deletions(-)
+
+--- a/mm/hugetlb.c~hugetlb-check-for-anon_vma-prior-to-folio-allocation
++++ a/mm/hugetlb.c
+@@ -6261,6 +6261,12 @@ static vm_fault_t hugetlb_no_page(struct
+ VM_UFFD_MISSING);
+ }
+
++ if (!(vma->vm_flags & VM_MAYSHARE)) {
++ ret = vmf_anon_prepare(vmf);
++ if (unlikely(ret))
++ goto out;
++ }
++
+ folio = alloc_hugetlb_folio(vma, haddr, 0);
+ if (IS_ERR(folio)) {
+ /*
+@@ -6297,15 +6303,12 @@ static vm_fault_t hugetlb_no_page(struct
+ */
+ restore_reserve_on_error(h, vma, haddr, folio);
+ folio_put(folio);
++ ret = VM_FAULT_SIGBUS;
+ goto out;
+ }
+ new_pagecache_folio = true;
+ } else {
+ folio_lock(folio);
+-
+- ret = vmf_anon_prepare(vmf);
+- if (unlikely(ret))
+- goto backout_unlocked;
+ anon_rmap = 1;
+ }
+ } else {
+_
diff --git a/patches/hugetlb-convert-hugetlb_no_page-to-use-struct-vm_fault.patch b/patches/hugetlb-convert-hugetlb_no_page-to-use-struct-vm_fault.patch
index 88611c178..8d3c318e0 100644
--- a/patches/hugetlb-convert-hugetlb_no_page-to-use-struct-vm_fault.patch
+++ b/patches/hugetlb-convert-hugetlb_no_page-to-use-struct-vm_fault.patch
@@ -64,8 +64,8 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
ret = 0;
goto out;
}
-@@ -6256,7 +6252,7 @@ static vm_fault_t hugetlb_no_page(struct
- VM_UFFD_MISSING);
+@@ -6262,7 +6258,7 @@ static vm_fault_t hugetlb_no_page(struct
+ goto out;
}
- folio = alloc_hugetlb_folio(vma, haddr, 0);
@@ -73,7 +73,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
if (IS_ERR(folio)) {
/*
* Returning error will result in faulting task being
-@@ -6270,18 +6266,20 @@ static vm_fault_t hugetlb_no_page(struct
+@@ -6276,18 +6272,20 @@ static vm_fault_t hugetlb_no_page(struct
* here. Before returning error, get ptl and make
* sure there really is no pte entry.
*/
@@ -97,7 +97,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
if (err) {
/*
* err can't be -EEXIST which implies someone
-@@ -6290,7 +6288,8 @@ static vm_fault_t hugetlb_no_page(struct
+@@ -6296,7 +6294,8 @@ static vm_fault_t hugetlb_no_page(struct
* to the page cache. So it's safe to call
* restore_reserve_on_error() here.
*/
@@ -105,9 +105,9 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+ restore_reserve_on_error(h, vma, vmf->address,
+ folio);
folio_put(folio);
+ ret = VM_FAULT_SIGBUS;
goto out;
- }
-@@ -6320,7 +6319,7 @@ static vm_fault_t hugetlb_no_page(struct
+@@ -6323,7 +6322,7 @@ static vm_fault_t hugetlb_no_page(struct
folio_unlock(folio);
folio_put(folio);
/* See comment in userfaultfd_missing() block above */
@@ -116,7 +116,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
ret = 0;
goto out;
}
-@@ -6335,23 +6334,23 @@ static vm_fault_t hugetlb_no_page(struct
+@@ -6338,23 +6337,23 @@ static vm_fault_t hugetlb_no_page(struct
* any allocations necessary to record that reservation occur outside
* the spinlock.
*/
@@ -146,7 +146,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
else
hugetlb_add_file_rmap(folio);
new_pte = make_huge_pte(vma, &folio->page, ((vma->vm_flags & VM_WRITE)
-@@ -6360,17 +6359,18 @@ static vm_fault_t hugetlb_no_page(struct
+@@ -6363,17 +6362,18 @@ static vm_fault_t hugetlb_no_page(struct
* If this pte was previously wr-protected, keep it wr-protected even
* if populated.
*/
@@ -170,7 +170,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
/*
* Only set hugetlb_migratable in newly allocated pages. Existing pages
-@@ -6387,10 +6387,10 @@ out:
+@@ -6390,10 +6390,10 @@ out:
return ret;
backout:
@@ -183,7 +183,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
folio_unlock(folio);
folio_put(folio);
-@@ -6486,8 +6486,7 @@ vm_fault_t hugetlb_fault(struct mm_struc
+@@ -6489,8 +6489,7 @@ vm_fault_t hugetlb_fault(struct mm_struc
* hugetlb_no_page will drop vma lock and hugetlb fault
* mutex internally, which make us return immediately.
*/
diff --git a/patches/init-fix-allocated-page-overlapping-with-ptr_err.patch b/patches/init-fix-allocated-page-overlapping-with-ptr_err.patch
new file mode 100644
index 000000000..0e839c0ee
--- /dev/null
+++ b/patches/init-fix-allocated-page-overlapping-with-ptr_err.patch
@@ -0,0 +1,66 @@
+From: Nam Cao <namcao@linutronix.de>
+Subject: init: fix allocated page overlapping with PTR_ERR
+Date: Thu, 18 Apr 2024 12:29:43 +0200
+
+There is nothing preventing kernel memory allocators from allocating a
+page that overlaps with PTR_ERR(), except for architecture-specific code
+that setup memblock.
+
+It was discovered that RISCV architecture doesn't setup memblock corectly,
+leading to a page overlapping with PTR_ERR() being allocated, and
+subsequently crashing the kernel (link in Close: )
+
+The reported crash has nothing to do with PTR_ERR(): the last page (at
+address 0xfffff000) being allocated leads to an unexpected arithmetic
+overflow in ext4; but still, this page shouldn't be allocated in the first
+place.
+
+Because PTR_ERR() is an architecture-independent thing, we shouldn't ask
+every single architecture to set this up. There may be other
+architectures beside RISCV that have the same problem.
+
+Fix this once and for all by reserving the physical memory page that may
+be mapped to the last virtual memory page as part of low memory.
+
+Unfortunately, this means if there is actual memory at this reserved
+location, that memory will become inaccessible. However, if this page is
+not reserved, it can only be accessed as high memory, so this doesn't
+matter if high memory is not supported. Even if high memory is supported,
+it is still only one page.
+
+Closes: https://lore.kernel.org/linux-riscv/878r1ibpdn.fsf@all.your.base.are.belong.to.us
+Link: https://lkml.kernel.org/r/20240418102943.180510-1-namcao@linutronix.de
+Signed-off-by: Nam Cao <namcao@linutronix.de>
+Reported-by: Björn Töpel <bjorn@kernel.org>
+Tested-by: Björn Töpel <bjorn@kernel.org>
+Reviewed-by: Mike Rapoport (IBM) <rppt@kernel.org>
+Cc: Andreas Dilger <adilger@dilger.ca>
+Cc: Arnd Bergmann <arnd@arndb.de>
+Cc: Changbin Du <changbin.du@huawei.com>
+Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
+Cc: Geert Uytterhoeven <geert+renesas@glider.be>
+Cc: Ingo Molnar <mingo@kernel.org>
+Cc: Krister Johansen <kjlx@templeofstupid.com>
+Cc: Luis Chamberlain <mcgrof@kernel.org>
+Cc: Nick Desaulniers <ndesaulniers@google.com>
+Cc: Stephen Rothwell <sfr@canb.auug.org.au>
+Cc: Tejun Heo <tj@kernel.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+---
+
+ init/main.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/init/main.c~init-fix-allocated-page-overlapping-with-ptr_err
++++ a/init/main.c
+@@ -900,6 +900,7 @@ void start_kernel(void)
+ page_address_init();
+ pr_notice("%s", linux_banner);
+ early_security_init();
++ memblock_reserve(__pa(-PAGE_SIZE), PAGE_SIZE); /* reserve last page for ERR_PTR */
+ setup_arch(&command_line);
+ setup_boot_config();
+ setup_command_line(command_line);
+_
diff --git a/patches/mm-arm64-override-clear_young_dirty_ptes-batch-helper.patch b/patches/mm-arm64-override-clear_young_dirty_ptes-batch-helper.patch
new file mode 100644
index 000000000..6e777bb5a
--- /dev/null
+++ b/patches/mm-arm64-override-clear_young_dirty_ptes-batch-helper.patch
@@ -0,0 +1,154 @@
+From: Lance Yang <ioworker0@gmail.com>
+Subject: mm/arm64: override clear_young_dirty_ptes() batch helper
+Date: Thu, 18 Apr 2024 21:44:33 +0800
+
+The per-pte get_and_clear/modify/set approach would result in
+unfolding/refolding for contpte mappings on arm64. So we need to override
+clear_young_dirty_ptes() for arm64 to avoid it.
+
+Link: https://lkml.kernel.org/r/20240418134435.6092-3-ioworker0@gmail.com
+Signed-off-by: Lance Yang <ioworker0@gmail.com>
+Suggested-by: Barry Song <21cnbao@gmail.com>
+Suggested-by: Ryan Roberts <ryan.roberts@arm.com>
+Reviewed-by: Ryan Roberts <ryan.roberts@arm.com>
+Cc: David Hildenbrand <david@redhat.com>
+Cc: Jeff Xie <xiehuan09@gmail.com>
+Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Minchan Kim <minchan@kernel.org>
+Cc: Muchun Song <songmuchun@bytedance.com>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: Yin Fengwei <fengwei.yin@intel.com>
+Cc: Zach O'Keefe <zokeefe@google.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+---
+
+ arch/arm64/include/asm/pgtable.h | 55 +++++++++++++++++++++++++++++
+ arch/arm64/mm/contpte.c | 29 +++++++++++++++
+ 2 files changed, 84 insertions(+)
+
+--- a/arch/arm64/include/asm/pgtable.h~mm-arm64-override-clear_young_dirty_ptes-batch-helper
++++ a/arch/arm64/include/asm/pgtable.h
+@@ -1223,6 +1223,46 @@ static inline void __wrprotect_ptes(stru
+ __ptep_set_wrprotect(mm, address, ptep);
+ }
+
++static inline void __clear_young_dirty_pte(struct vm_area_struct *vma,
++ unsigned long addr, pte_t *ptep,
++ pte_t pte, cydp_t flags)
++{
++ pte_t old_pte;
++
++ do {
++ old_pte = pte;
++
++ if (flags & CYDP_CLEAR_YOUNG)
++ pte = pte_mkold(pte);
++ if (flags & CYDP_CLEAR_DIRTY)
++ pte = pte_mkclean(pte);
++
++ pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep),
++ pte_val(old_pte), pte_val(pte));
++ } while (pte_val(pte) != pte_val(old_pte));
++}
++
++static inline void __clear_young_dirty_ptes(struct vm_area_struct *vma,
++ unsigned long addr, pte_t *ptep,
++ unsigned int nr, cydp_t flags)
++{
++ pte_t pte;
++
++ for (;;) {
++ pte = __ptep_get(ptep);
++
++ if (flags == (CYDP_CLEAR_YOUNG | CYDP_CLEAR_DIRTY))
++ __set_pte(ptep, pte_mkclean(pte_mkold(pte)));
++ else
++ __clear_young_dirty_pte(vma, addr, ptep, pte, flags);
++
++ if (--nr == 0)
++ break;
++ ptep++;
++ addr += PAGE_SIZE;
++ }
++}
++
+ #ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ #define __HAVE_ARCH_PMDP_SET_WRPROTECT
+ static inline void pmdp_set_wrprotect(struct mm_struct *mm,
+@@ -1379,6 +1419,9 @@ extern void contpte_wrprotect_ptes(struc
+ extern int contpte_ptep_set_access_flags(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep,
+ pte_t entry, int dirty);
++extern void contpte_clear_young_dirty_ptes(struct vm_area_struct *vma,
++ unsigned long addr, pte_t *ptep,
++ unsigned int nr, cydp_t flags);
+
+ static __always_inline void contpte_try_fold(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep, pte_t pte)
+@@ -1603,6 +1646,17 @@ static inline int ptep_set_access_flags(
+ return contpte_ptep_set_access_flags(vma, addr, ptep, entry, dirty);
+ }
+
++#define clear_young_dirty_ptes clear_young_dirty_ptes
++static inline void clear_young_dirty_ptes(struct vm_area_struct *vma,
++ unsigned long addr, pte_t *ptep,
++ unsigned int nr, cydp_t flags)
++{
++ if (likely(nr == 1 && !pte_cont(__ptep_get(ptep))))
++ __clear_young_dirty_ptes(vma, addr, ptep, nr, flags);
++ else
++ contpte_clear_young_dirty_ptes(vma, addr, ptep, nr, flags);
++}
++
+ #else /* CONFIG_ARM64_CONTPTE */
+
+ #define ptep_get __ptep_get
+@@ -1622,6 +1676,7 @@ static inline int ptep_set_access_flags(
+ #define wrprotect_ptes __wrprotect_ptes
+ #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
+ #define ptep_set_access_flags __ptep_set_access_flags
++#define clear_young_dirty_ptes __clear_young_dirty_ptes
+
+ #endif /* CONFIG_ARM64_CONTPTE */
+
+--- a/arch/arm64/mm/contpte.c~mm-arm64-override-clear_young_dirty_ptes-batch-helper
++++ a/arch/arm64/mm/contpte.c
+@@ -361,6 +361,35 @@ void contpte_wrprotect_ptes(struct mm_st
+ }
+ EXPORT_SYMBOL_GPL(contpte_wrprotect_ptes);
+
++void contpte_clear_young_dirty_ptes(struct vm_area_struct *vma,
++ unsigned long addr, pte_t *ptep,
++ unsigned int nr, cydp_t flags)
++{
++ /*
++ * We can safely clear access/dirty without needing to unfold from
++ * the architectures perspective, even when contpte is set. If the
++ * range starts or ends midway through a contpte block, we can just
++ * expand to include the full contpte block. While this is not
++ * exactly what the core-mm asked for, it tracks access/dirty per
++ * folio, not per page. And since we only create a contpte block
++ * when it is covered by a single folio, we can get away with
++ * clearing access/dirty for the whole block.
++ */
++ unsigned long start = addr;
++ unsigned long end = start + nr;
++
++ if (pte_cont(__ptep_get(ptep + nr - 1)))
++ end = ALIGN(end, CONT_PTE_SIZE);
++
++ if (pte_cont(__ptep_get(ptep))) {
++ start = ALIGN_DOWN(start, CONT_PTE_SIZE);
++ ptep = contpte_align_down(ptep);
++ }
++
++ __clear_young_dirty_ptes(vma, start, ptep, end - start, flags);
++}
++EXPORT_SYMBOL_GPL(contpte_clear_young_dirty_ptes);
++
+ int contpte_ptep_set_access_flags(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep,
+ pte_t entry, int dirty)
+_
diff --git a/patches/mm-huge_memory-improve-split_huge_page_to_list_to_order-return-value-documentation.patch b/patches/mm-huge_memory-improve-split_huge_page_to_list_to_order-return-value-documentation.patch
new file mode 100644
index 000000000..6158d9b29
--- /dev/null
+++ b/patches/mm-huge_memory-improve-split_huge_page_to_list_to_order-return-value-documentation.patch
@@ -0,0 +1,51 @@
+From: David Hildenbrand <david@redhat.com>
+Subject: mm/huge_memory: improve split_huge_page_to_list_to_order() return value documentation
+Date: Thu, 18 Apr 2024 17:18:34 +0200
+
+The documentation is wrong and relying on it almost resulted in BUGs in
+new callers: we return -EAGAIN on unexpected folio references, not -EBUSY.
+
+Let's fix that and also document which other return values we can
+currently see and why they could happen.
+
+Link: https://lkml.kernel.org/r/20240418151834.216557-1-david@redhat.com
+Signed-off-by: David Hildenbrand <david@redhat.com>
+Cc: John Hubbard <jhubbard@nvidia.com>
+Cc: Zi Yan <ziy@nvidia.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+---
+
+ mm/huge_memory.c | 13 ++++++++++---
+ 1 file changed, 10 insertions(+), 3 deletions(-)
+
+--- a/mm/huge_memory.c~mm-huge_memory-improve-split_huge_page_to_list_to_order-return-value-documentation
++++ a/mm/huge_memory.c
+@@ -2956,7 +2956,7 @@ bool can_split_folio(struct folio *folio
+ *
+ * 3) The folio must not be pinned. Any unexpected folio references, including
+ * GUP pins, will result in the folio not getting split; instead, the caller
+- * will receive an -EBUSY.
++ * will receive an -EAGAIN.
+ *
+ * 4) @new_order > 1, usually. Splitting to order-1 anonymous folios is not
+ * supported for non-file-backed folios, because folio->_deferred_list, which
+@@ -2975,8 +2975,15 @@ bool can_split_folio(struct folio *folio
+ *
+ * Returns 0 if the huge page was split successfully.
+ *
+- * Returns -EBUSY if @page's folio is pinned, or if the anon_vma disappeared
+- * from under us.
++ * Returns -EAGAIN if the folio has unexpected reference (e.g., GUP).
++ *
++ * Returns -EBUSY when trying to split the huge zeropage, if the folio is
++ * under writeback, if fs-specific folio metadata cannot currently be
++ * released, or if some unexpected race happened (e.g., anon VMA disappeared,
++ * truncation).
++ *
++ * Returns -EINVAL when trying to split to an order that is incompatible
++ * with the folio. Splitting to order 0 is compatible with all folios.
+ */
+ int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
+ unsigned int new_order)
+_
diff --git a/patches/mm-madvise-introduce-clear_young_dirty_ptes-batch-helper.patch b/patches/mm-madvise-introduce-clear_young_dirty_ptes-batch-helper.patch
new file mode 100644
index 000000000..38a1b498d
--- /dev/null
+++ b/patches/mm-madvise-introduce-clear_young_dirty_ptes-batch-helper.patch
@@ -0,0 +1,190 @@
+From: Lance Yang <ioworker0@gmail.com>
+Subject: mm/madvise: introduce clear_young_dirty_ptes() batch helper
+Date: Thu, 18 Apr 2024 21:44:32 +0800
+
+Patch series "mm/madvise: enhance lazyfreeing with mTHP in madvise_free",
+v10.
+
+This patchset adds support for lazyfreeing multi-size THP (mTHP) without
+needing to first split the large folio via split_folio(). However, we
+still need to split a large folio that is not fully mapped within the
+target range.
+
+If a large folio is locked or shared, or if we fail to split it, we just
+leave it in place and advance to the next PTE in the range. But note that
+the behavior is changed; previously, any failure of this sort would cause
+the entire operation to give up. As large folios become more common,
+sticking to the old way could result in wasted opportunities.
+
+Performance Testing
+===================
+
+On an Intel I5 CPU, lazyfreeing a 1GiB VMA backed by PTE-mapped folios of
+the same size results in the following runtimes for madvise(MADV_FREE) in
+seconds (shorter is better):
+
+Folio Size | Old | New | Change
+------------------------------------------
+ 4KiB | 0.590251 | 0.590259 | 0%
+ 16KiB | 2.990447 | 0.185655 | -94%
+ 32KiB | 2.547831 | 0.104870 | -95%
+ 64KiB | 2.457796 | 0.052812 | -97%
+ 128KiB | 2.281034 | 0.032777 | -99%
+ 256KiB | 2.230387 | 0.017496 | -99%
+ 512KiB | 2.189106 | 0.010781 | -99%
+ 1024KiB | 2.183949 | 0.007753 | -99%
+ 2048KiB | 0.002799 | 0.002804 | 0%
+
+
+This patch (of 4):
+
+This commit introduces clear_young_dirty_ptes() to replace mkold_ptes().
+By doing so, we can use the same function for both use cases
+(madvise_pageout and madvise_free), and it also provides the flexibility
+to only clear the dirty flag in the future if needed.
+
+Link: https://lkml.kernel.org/r/20240418134435.6092-1-ioworker0@gmail.com
+Link: https://lkml.kernel.org/r/20240418134435.6092-2-ioworker0@gmail.com
+Signed-off-by: Lance Yang <ioworker0@gmail.com>
+Suggested-by: Ryan Roberts <ryan.roberts@arm.com>
+Acked-by: David Hildenbrand <david@redhat.com>
+Reviewed-by: Ryan Roberts <ryan.roberts@arm.com>
+Cc: Barry Song <21cnbao@gmail.com>
+Cc: Jeff Xie <xiehuan09@gmail.com>
+Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Minchan Kim <minchan@kernel.org>
+Cc: Muchun Song <songmuchun@bytedance.com>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: Yin Fengwei <fengwei.yin@intel.com>
+Cc: Zach O'Keefe <zokeefe@google.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+---
+
+ include/linux/mm_types.h | 9 ++++
+ include/linux/pgtable.h | 74 ++++++++++++++++++++++---------------
+ mm/madvise.c | 3 +
+ 3 files changed, 55 insertions(+), 31 deletions(-)
+
+--- a/include/linux/mm_types.h~mm-madvise-introduce-clear_young_dirty_ptes-batch-helper
++++ a/include/linux/mm_types.h
+@@ -1368,6 +1368,15 @@ enum fault_flag {
+
+ typedef unsigned int __bitwise zap_flags_t;
+
++/* Flags for clear_young_dirty_ptes(). */
++typedef int __bitwise cydp_t;
++
++/* Clear the access bit */
++#define CYDP_CLEAR_YOUNG ((__force cydp_t)BIT(0))
++
++/* Clear the dirty bit */
++#define CYDP_CLEAR_DIRTY ((__force cydp_t)BIT(1))
++
+ /*
+ * FOLL_PIN and FOLL_LONGTERM may be used in various combinations with each
+ * other. Here is what they mean, and how to use them:
+--- a/include/linux/pgtable.h~mm-madvise-introduce-clear_young_dirty_ptes-batch-helper
++++ a/include/linux/pgtable.h
+@@ -361,36 +361,6 @@ static inline int ptep_test_and_clear_yo
+ }
+ #endif
+
+-#ifndef mkold_ptes
+-/**
+- * mkold_ptes - Mark PTEs that map consecutive pages of the same folio as old.
+- * @vma: VMA the pages are mapped into.
+- * @addr: Address the first page is mapped at.
+- * @ptep: Page table pointer for the first entry.
+- * @nr: Number of entries to mark old.
+- *
+- * May be overridden by the architecture; otherwise, implemented as a simple
+- * loop over ptep_test_and_clear_young().
+- *
+- * Note that PTE bits in the PTE range besides the PFN can differ. For example,
+- * some PTEs might be write-protected.
+- *
+- * Context: The caller holds the page table lock. The PTEs map consecutive
+- * pages that belong to the same folio. The PTEs are all in the same PMD.
+- */
+-static inline void mkold_ptes(struct vm_area_struct *vma, unsigned long addr,
+- pte_t *ptep, unsigned int nr)
+-{
+- for (;;) {
+- ptep_test_and_clear_young(vma, addr, ptep);
+- if (--nr == 0)
+- break;
+- ptep++;
+- addr += PAGE_SIZE;
+- }
+-}
+-#endif
+-
+ #ifndef __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
+ #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG)
+ static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
+@@ -489,6 +459,50 @@ static inline pte_t ptep_get_and_clear(s
+ }
+ #endif
+
++#ifndef clear_young_dirty_ptes
++/**
++ * clear_young_dirty_ptes - Mark PTEs that map consecutive pages of the
++ * same folio as old/clean.
++ * @mm: Address space the pages are mapped into.
++ * @addr: Address the first page is mapped at.
++ * @ptep: Page table pointer for the first entry.
++ * @nr: Number of entries to mark old/clean.
++ * @flags: Flags to modify the PTE batch semantics.
++ *
++ * May be overridden by the architecture; otherwise, implemented by
++ * get_and_clear/modify/set for each pte in the range.
++ *
++ * Note that PTE bits in the PTE range besides the PFN can differ. For example,
++ * some PTEs might be write-protected.
++ *
++ * Context: The caller holds the page table lock. The PTEs map consecutive
++ * pages that belong to the same folio. The PTEs are all in the same PMD.
++ */
++static inline void clear_young_dirty_ptes(struct vm_area_struct *vma,
++ unsigned long addr, pte_t *ptep,
++ unsigned int nr, cydp_t flags)
++{
++ pte_t pte;
++
++ for (;;) {
++ if (flags == CYDP_CLEAR_YOUNG)
++ ptep_test_and_clear_young(vma, addr, ptep);
++ else {
++ pte = ptep_get_and_clear(vma->vm_mm, addr, ptep);
++ if (flags & CYDP_CLEAR_YOUNG)
++ pte = pte_mkold(pte);
++ if (flags & CYDP_CLEAR_DIRTY)
++ pte = pte_mkclean(pte);
++ set_pte_at(vma->vm_mm, addr, ptep, pte);
++ }
++ if (--nr == 0)
++ break;
++ ptep++;
++ addr += PAGE_SIZE;
++ }
++}
++#endif
++
+ static inline void ptep_clear(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep)
+ {
+--- a/mm/madvise.c~mm-madvise-introduce-clear_young_dirty_ptes-batch-helper
++++ a/mm/madvise.c
+@@ -507,7 +507,8 @@ restart:
+ continue;
+
+ if (!pageout && pte_young(ptent)) {
+- mkold_ptes(vma, addr, pte, nr);
++ clear_young_dirty_ptes(vma, addr, pte, nr,
++ CYDP_CLEAR_YOUNG);
+ tlb_remove_tlb_entries(tlb, pte, nr, addr);
+ }
+
+_
diff --git a/patches/mm-madvise-optimize-lazyfreeing-with-mthp-in-madvise_free.patch b/patches/mm-madvise-optimize-lazyfreeing-with-mthp-in-madvise_free.patch
new file mode 100644
index 000000000..ce8bf7ebe
--- /dev/null
+++ b/patches/mm-madvise-optimize-lazyfreeing-with-mthp-in-madvise_free.patch
@@ -0,0 +1,172 @@
+From: Lance Yang <ioworker0@gmail.com>
+Subject: mm/madvise: optimize lazyfreeing with mTHP in madvise_free
+Date: Thu, 18 Apr 2024 21:44:35 +0800
+
+This patch optimizes lazyfreeing with PTE-mapped mTHP[1] (Inspired by
+David Hildenbrand[2]). We aim to avoid unnecessary folio splitting if the
+large folio is fully mapped within the target range.
+
+If a large folio is locked or shared, or if we fail to split it, we just
+leave it in place and advance to the next PTE in the range. But note that
+the behavior is changed; previously, any failure of this sort would cause
+the entire operation to give up. As large folios become more common,
+sticking to the old way could result in wasted opportunities.
+
+On an Intel I5 CPU, lazyfreeing a 1GiB VMA backed by PTE-mapped folios of
+the same size results in the following runtimes for madvise(MADV_FREE) in
+seconds (shorter is better):
+
+Folio Size | Old | New | Change
+------------------------------------------
+ 4KiB | 0.590251 | 0.590259 | 0%
+ 16KiB | 2.990447 | 0.185655 | -94%
+ 32KiB | 2.547831 | 0.104870 | -95%
+ 64KiB | 2.457796 | 0.052812 | -97%
+ 128KiB | 2.281034 | 0.032777 | -99%
+ 256KiB | 2.230387 | 0.017496 | -99%
+ 512KiB | 2.189106 | 0.010781 | -99%
+ 1024KiB | 2.183949 | 0.007753 | -99%
+ 2048KiB | 0.002799 | 0.002804 | 0%
+
+[1] https://lkml.kernel.org/r/20231207161211.2374093-5-ryan.roberts@arm.com
+[2] https://lore.kernel.org/linux-mm/20240214204435.167852-1-david@redhat.com
+
+Link: https://lkml.kernel.org/r/20240418134435.6092-5-ioworker0@gmail.com
+Signed-off-by: Lance Yang <ioworker0@gmail.com>
+Reviewed-by: Ryan Roberts <ryan.roberts@arm.com>
+Acked-by: David Hildenbrand <david@redhat.com>
+Cc: Barry Song <21cnbao@gmail.com>
+Cc: Jeff Xie <xiehuan09@gmail.com>
+Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Minchan Kim <minchan@kernel.org>
+Cc: Muchun Song <songmuchun@bytedance.com>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: Yin Fengwei <fengwei.yin@intel.com>
+Cc: Zach O'Keefe <zokeefe@google.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+---
+
+ mm/madvise.c | 85 +++++++++++++++++++++++++------------------------
+ 1 file changed, 44 insertions(+), 41 deletions(-)
+
+--- a/mm/madvise.c~mm-madvise-optimize-lazyfreeing-with-mthp-in-madvise_free
++++ a/mm/madvise.c
+@@ -643,6 +643,7 @@ static int madvise_free_pte_range(pmd_t
+ unsigned long end, struct mm_walk *walk)
+
+ {
++ const cydp_t cydp_flags = CYDP_CLEAR_YOUNG | CYDP_CLEAR_DIRTY;
+ struct mmu_gather *tlb = walk->private;
+ struct mm_struct *mm = tlb->mm;
+ struct vm_area_struct *vma = walk->vma;
+@@ -697,44 +698,57 @@ static int madvise_free_pte_range(pmd_t
+ continue;
+
+ /*
+- * If pmd isn't transhuge but the folio is large and
+- * is owned by only this process, split it and
+- * deactivate all pages.
++ * If we encounter a large folio, only split it if it is not
++ * fully mapped within the range we are operating on. Otherwise
++ * leave it as is so that it can be marked as lazyfree. If we
++ * fail to split a folio, leave it in place and advance to the
++ * next pte in the range.
+ */
+ if (folio_test_large(folio)) {
+- int err;
++ bool any_young, any_dirty;
+
+- if (folio_likely_mapped_shared(folio))
+- break;
+- if (!folio_trylock(folio))
+- break;
+- folio_get(folio);
+- arch_leave_lazy_mmu_mode();
+- pte_unmap_unlock(start_pte, ptl);
+- start_pte = NULL;
+- err = split_folio(folio);
+- folio_unlock(folio);
+- folio_put(folio);
+- if (err)
+- break;
+- start_pte = pte =
+- pte_offset_map_lock(mm, pmd, addr, &ptl);
+- if (!start_pte)
+- break;
+- arch_enter_lazy_mmu_mode();
+- pte--;
+- addr -= PAGE_SIZE;
+- continue;
++ nr = madvise_folio_pte_batch(addr, end, folio, pte,
++ ptent, &any_young, &any_dirty);
++
++ if (nr < folio_nr_pages(folio)) {
++ int err;
++
++ if (folio_likely_mapped_shared(folio))
++ continue;
++ if (!folio_trylock(folio))
++ continue;
++ folio_get(folio);
++ arch_leave_lazy_mmu_mode();
++ pte_unmap_unlock(start_pte, ptl);
++ start_pte = NULL;
++ err = split_folio(folio);
++ folio_unlock(folio);
++ folio_put(folio);
++ pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
++ start_pte = pte;
++ if (!start_pte)
++ break;
++ arch_enter_lazy_mmu_mode();
++ if (!err)
++ nr = 0;
++ continue;
++ }
++
++ if (any_young)
++ ptent = pte_mkyoung(ptent);
++ if (any_dirty)
++ ptent = pte_mkdirty(ptent);
+ }
+
+ if (folio_test_swapcache(folio) || folio_test_dirty(folio)) {
+ if (!folio_trylock(folio))
+ continue;
+ /*
+- * If folio is shared with others, we mustn't clear
+- * the folio's dirty flag.
++ * If we have a large folio at this point, we know it is
++ * fully mapped so if its mapcount is the same as its
++ * number of pages, it must be exclusive.
+ */
+- if (folio_mapcount(folio) != 1) {
++ if (folio_mapcount(folio) != folio_nr_pages(folio)) {
+ folio_unlock(folio);
+ continue;
+ }
+@@ -750,19 +764,8 @@ static int madvise_free_pte_range(pmd_t
+ }
+
+ if (pte_young(ptent) || pte_dirty(ptent)) {
+- /*
+- * Some of architecture(ex, PPC) don't update TLB
+- * with set_pte_at and tlb_remove_tlb_entry so for
+- * the portability, remap the pte with old|clean
+- * after pte clearing.
+- */
+- ptent = ptep_get_and_clear_full(mm, addr, pte,
+- tlb->fullmm);
+-
+- ptent = pte_mkold(ptent);
+- ptent = pte_mkclean(ptent);
+- set_pte_at(mm, addr, pte, ptent);
+- tlb_remove_tlb_entry(tlb, pte, addr);
++ clear_young_dirty_ptes(vma, addr, pte, nr, cydp_flags);
++ tlb_remove_tlb_entries(tlb, pte, nr, addr);
+ }
+ folio_mark_lazyfree(folio);
+ }
+_
diff --git a/patches/mm-memory-add-any_dirty-optional-pointer-to-folio_pte_batch.patch b/patches/mm-memory-add-any_dirty-optional-pointer-to-folio_pte_batch.patch
new file mode 100644
index 000000000..64221bb7d
--- /dev/null
+++ b/patches/mm-memory-add-any_dirty-optional-pointer-to-folio_pte_batch.patch
@@ -0,0 +1,142 @@
+From: Lance Yang <ioworker0@gmail.com>
+Subject: mm/memory: add any_dirty optional pointer to folio_pte_batch()
+Date: Thu, 18 Apr 2024 21:44:34 +0800
+
+This commit adds the any_dirty pointer as an optional parameter to
+folio_pte_batch() function. By using both the any_young and any_dirty
+pointers, madvise_free can make smarter decisions about whether to clear
+the PTEs when marking large folios as lazyfree.
+
+Link: https://lkml.kernel.org/r/20240418134435.6092-4-ioworker0@gmail.com
+Signed-off-by: Lance Yang <ioworker0@gmail.com>
+Suggested-by: David Hildenbrand <david@redhat.com>
+Acked-by: David Hildenbrand <david@redhat.com>
+Cc: Barry Song <21cnbao@gmail.com>
+Cc: Jeff Xie <xiehuan09@gmail.com>
+Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Minchan Kim <minchan@kernel.org>
+Cc: Muchun Song <songmuchun@bytedance.com>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: Ryan Roberts <ryan.roberts@arm.com>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: Yin Fengwei <fengwei.yin@intel.com>
+Cc: Zach O'Keefe <zokeefe@google.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+---
+
+ mm/internal.h | 12 ++++++++++--
+ mm/madvise.c | 19 ++++++++++++++-----
+ mm/memory.c | 4 ++--
+ 3 files changed, 26 insertions(+), 9 deletions(-)
+
+--- a/mm/internal.h~mm-memory-add-any_dirty-optional-pointer-to-folio_pte_batch
++++ a/mm/internal.h
+@@ -134,6 +134,8 @@ static inline pte_t __pte_batch_clear_ig
+ * first one is writable.
+ * @any_young: Optional pointer to indicate whether any entry except the
+ * first one is young.
++ * @any_dirty: Optional pointer to indicate whether any entry except the
++ * first one is dirty.
+ *
+ * Detect a PTE batch: consecutive (present) PTEs that map consecutive
+ * pages of the same large folio.
+@@ -149,18 +151,20 @@ static inline pte_t __pte_batch_clear_ig
+ */
+ static inline int folio_pte_batch(struct folio *folio, unsigned long addr,
+ pte_t *start_ptep, pte_t pte, int max_nr, fpb_t flags,
+- bool *any_writable, bool *any_young)
++ bool *any_writable, bool *any_young, bool *any_dirty)
+ {
+ unsigned long folio_end_pfn = folio_pfn(folio) + folio_nr_pages(folio);
+ const pte_t *end_ptep = start_ptep + max_nr;
+ pte_t expected_pte, *ptep;
+- bool writable, young;
++ bool writable, young, dirty;
+ int nr;
+
+ if (any_writable)
+ *any_writable = false;
+ if (any_young)
+ *any_young = false;
++ if (any_dirty)
++ *any_dirty = false;
+
+ VM_WARN_ON_FOLIO(!pte_present(pte), folio);
+ VM_WARN_ON_FOLIO(!folio_test_large(folio) || max_nr < 1, folio);
+@@ -176,6 +180,8 @@ static inline int folio_pte_batch(struct
+ writable = !!pte_write(pte);
+ if (any_young)
+ young = !!pte_young(pte);
++ if (any_dirty)
++ dirty = !!pte_dirty(pte);
+ pte = __pte_batch_clear_ignored(pte, flags);
+
+ if (!pte_same(pte, expected_pte))
+@@ -193,6 +199,8 @@ static inline int folio_pte_batch(struct
+ *any_writable |= writable;
+ if (any_young)
+ *any_young |= young;
++ if (any_dirty)
++ *any_dirty |= dirty;
+
+ nr = pte_batch_hint(ptep, pte);
+ expected_pte = pte_advance_pfn(expected_pte, nr);
+--- a/mm/madvise.c~mm-memory-add-any_dirty-optional-pointer-to-folio_pte_batch
++++ a/mm/madvise.c
+@@ -321,6 +321,18 @@ static inline bool can_do_file_pageout(s
+ file_permission(vma->vm_file, MAY_WRITE) == 0;
+ }
+
++static inline int madvise_folio_pte_batch(unsigned long addr, unsigned long end,
++ struct folio *folio, pte_t *ptep,
++ pte_t pte, bool *any_young,
++ bool *any_dirty)
++{
++ const fpb_t fpb_flags = FPB_IGNORE_DIRTY | FPB_IGNORE_SOFT_DIRTY;
++ int max_nr = (end - addr) / PAGE_SIZE;
++
++ return folio_pte_batch(folio, addr, ptep, pte, max_nr, fpb_flags, NULL,
++ any_young, any_dirty);
++}
++
+ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
+ unsigned long addr, unsigned long end,
+ struct mm_walk *walk)
+@@ -456,13 +468,10 @@ restart:
+ * next pte in the range.
+ */
+ if (folio_test_large(folio)) {
+- const fpb_t fpb_flags = FPB_IGNORE_DIRTY |
+- FPB_IGNORE_SOFT_DIRTY;
+- int max_nr = (end - addr) / PAGE_SIZE;
+ bool any_young;
+
+- nr = folio_pte_batch(folio, addr, pte, ptent, max_nr,
+- fpb_flags, NULL, &any_young);
++ nr = madvise_folio_pte_batch(addr, end, folio, pte,
++ ptent, &any_young, NULL);
+ if (any_young)
+ ptent = pte_mkyoung(ptent);
+
+--- a/mm/memory.c~mm-memory-add-any_dirty-optional-pointer-to-folio_pte_batch
++++ a/mm/memory.c
+@@ -989,7 +989,7 @@ copy_present_ptes(struct vm_area_struct
+ flags |= FPB_IGNORE_SOFT_DIRTY;
+
+ nr = folio_pte_batch(folio, addr, src_pte, pte, max_nr, flags,
+- &any_writable, NULL);
++ &any_writable, NULL, NULL);
+ folio_ref_add(folio, nr);
+ if (folio_test_anon(folio)) {
+ if (unlikely(folio_try_dup_anon_rmap_ptes(folio, page,
+@@ -1558,7 +1558,7 @@ static inline int zap_present_ptes(struc
+ */
+ if (unlikely(folio_test_large(folio) && max_nr != 1)) {
+ nr = folio_pte_batch(folio, addr, pte, ptent, max_nr, fpb_flags,
+- NULL, NULL);
++ NULL, NULL, NULL);
+
+ zap_present_folio_ptes(tlb, vma, folio, page, pte, ptent, nr,
+ addr, details, rss, force_flush,
+_
diff --git a/patches/mm-swapfile-check-usable-swap-device-in-__folio_throttle_swaprate.patch b/patches/mm-swapfile-check-usable-swap-device-in-__folio_throttle_swaprate.patch
new file mode 100644
index 000000000..e1aeb16b7
--- /dev/null
+++ b/patches/mm-swapfile-check-usable-swap-device-in-__folio_throttle_swaprate.patch
@@ -0,0 +1,58 @@
+From: Kefeng Wang <wangkefeng.wang@huawei.com>
+Subject: mm: swapfile: check usable swap device in __folio_throttle_swaprate()
+Date: Thu, 18 Apr 2024 21:56:44 +0800
+
+Skip blk_cgroup_congested() if there is no usable swap device since no
+swapin/out will occur, Thereby avoid taking swap_lock. The difference is
+shown below from perf date of CoW pagefault,
+
+ perf report -g -i perf.data.swapoff | egrep "blk_cgroup_congested|__folio_throttle_swaprate"
+ 1.01% 0.16% page_fault2_pro [kernel.kallsyms] [k] __folio_throttle_swaprate
+ 0.83% 0.80% page_fault2_pro [kernel.kallsyms] [k] blk_cgroup_congested
+
+ perf report -g -i perf.data.swapon | egrep "blk_cgroup_congested|__folio_throttle_swaprate"
+ 0.15% 0.15% page_fault2_pro [kernel.kallsyms] [k] __folio_throttle_swaprate
+
+Link: https://lkml.kernel.org/r/20240418135644.2736748-1-wangkefeng.wang@huawei.com
+Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
+Cc: Tejun Heo <tj@kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+---
+
+ mm/swapfile.c | 13 ++++++++++---
+ 1 file changed, 10 insertions(+), 3 deletions(-)
+
+--- a/mm/swapfile.c~mm-swapfile-check-usable-swap-device-in-__folio_throttle_swaprate
++++ a/mm/swapfile.c
+@@ -2444,13 +2444,17 @@ static void reinsert_swap_info(struct sw
+ spin_unlock(&swap_lock);
+ }
+
++static bool __has_usable_swap(void)
++{
++ return !plist_head_empty(&swap_active_head);
++}
++
+ bool has_usable_swap(void)
+ {
+- bool ret = true;
++ bool ret;
+
+ spin_lock(&swap_lock);
+- if (plist_head_empty(&swap_active_head))
+- ret = false;
++ ret = __has_usable_swap();
+ spin_unlock(&swap_lock);
+ return ret;
+ }
+@@ -3710,6 +3714,9 @@ void __folio_throttle_swaprate(struct fo
+ if (!(gfp & __GFP_IO))
+ return;
+
++ if (!__has_usable_swap())
++ return;
++
+ if (!blk_cgroup_congested())
+ return;
+
+_
diff --git a/patches/mm-zswap-fix-shrinker-null-crash-with-cgroup_disable=memory.patch b/patches/mm-zswap-fix-shrinker-null-crash-with-cgroup_disable=memory.patch
new file mode 100644
index 000000000..21eaef699
--- /dev/null
+++ b/patches/mm-zswap-fix-shrinker-null-crash-with-cgroup_disable=memory.patch
@@ -0,0 +1,76 @@
+From: Johannes Weiner <hannes@cmpxchg.org>
+Subject: mm: zswap: fix shrinker NULL crash with cgroup_disable=memory
+Date: Thu, 18 Apr 2024 08:26:28 -0400
+
+Christian reports a NULL deref in zswap that he bisected down to the zswap
+shrinker. The issue also cropped up in the bug trackers of libguestfs [1]
+and the Red Hat bugzilla [2].
+
+The problem is that when memcg is disabled with the boot time flag, the
+zswap shrinker might get called with sc->memcg == NULL. This is okay in
+many places, like the lruvec operations. But it crashes in
+memcg_page_state() - which is only used due to the non-node accounting of
+cgroup's the zswap memory to begin with.
+
+Nhat spotted that the memcg can be NULL in the memcg-disabled case, and I
+was then able to reproduce the crash locally as well.
+
+[1] https://github.com/libguestfs/libguestfs/issues/139
+[2] https://bugzilla.redhat.com/show_bug.cgi?id=2275252
+
+Link: https://lkml.kernel.org/r/20240418124043.GC1055428@cmpxchg.org
+Link: https://lkml.kernel.org/r/20240417143324.GA1055428@cmpxchg.org
+Fixes: b5ba474f3f51 ("zswap: shrink zswap pool based on memory pressure")
+Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
+Reported-by: Christian Heusel <christian@heusel.eu>
+Debugged-by: Nhat Pham <nphamcs@gmail.com>
+Suggested-by: Nhat Pham <nphamcs@gmail.com>
+Tested-by: Christian Heusel <christian@heusel.eu>
+Cc: Chengming Zhou <chengming.zhou@linux.dev>
+Cc: Dan Streetman <ddstreet@ieee.org>
+Cc: Richard W.M. Jones <rjones@redhat.com>
+Cc: Seth Jennings <sjenning@redhat.com>
+Cc: Vitaly Wool <vitaly.wool@konsulko.com>
+Cc: Yosry Ahmed <yosryahmed@google.com>
+Cc: <stable@vger.kernel.org> [v6.8]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+---
+
+ mm/zswap.c | 25 ++++++++++++++++---------
+ 1 file changed, 16 insertions(+), 9 deletions(-)
+
+--- a/mm/zswap.c~mm-zswap-fix-shrinker-null-crash-with-cgroup_disable=memory
++++ a/mm/zswap.c
+@@ -1331,15 +1331,22 @@ static unsigned long zswap_shrinker_coun
+ if (!gfp_has_io_fs(sc->gfp_mask))
+ return 0;
+
+-#ifdef CONFIG_MEMCG_KMEM
+- mem_cgroup_flush_stats(memcg);
+- nr_backing = memcg_page_state(memcg, MEMCG_ZSWAP_B) >> PAGE_SHIFT;
+- nr_stored = memcg_page_state(memcg, MEMCG_ZSWAPPED);
+-#else
+- /* use pool stats instead of memcg stats */
+- nr_backing = zswap_pool_total_size >> PAGE_SHIFT;
+- nr_stored = atomic_read(&zswap_nr_stored);
+-#endif
++ /*
++ * For memcg, use the cgroup-wide ZSWAP stats since we don't
++ * have them per-node and thus per-lruvec. Careful if memcg is
++ * runtime-disabled: we can get sc->memcg == NULL, which is ok
++ * for the lruvec, but not for memcg_page_state().
++ *
++ * Without memcg, use the zswap pool-wide metrics.
++ */
++ if (!mem_cgroup_disabled()) {
++ mem_cgroup_flush_stats(memcg);
++ nr_backing = memcg_page_state(memcg, MEMCG_ZSWAP_B) >> PAGE_SHIFT;
++ nr_stored = memcg_page_state(memcg, MEMCG_ZSWAPPED);
++ } else {
++ nr_backing = zswap_pool_total_size >> PAGE_SHIFT;
++ nr_stored = atomic_read(&zswap_nr_stored);
++ }
+
+ if (!nr_stored)
+ return 0;
+_
diff --git a/patches/null-pointer-dereference-while-shrinking-zswap.patch b/patches/old/null-pointer-dereference-while-shrinking-zswap.patch
index a94d69056..a94d69056 100644
--- a/patches/null-pointer-dereference-while-shrinking-zswap.patch
+++ b/patches/old/null-pointer-dereference-while-shrinking-zswap.patch
diff --git a/patches/stackdepot-respect-__gfp_nolockdep-allocation-flag.patch b/patches/stackdepot-respect-__gfp_nolockdep-allocation-flag.patch
new file mode 100644
index 000000000..09dab83ca
--- /dev/null
+++ b/patches/stackdepot-respect-__gfp_nolockdep-allocation-flag.patch
@@ -0,0 +1,90 @@
+From: Andrey Ryabinin <ryabinin.a.a@gmail.com>
+Subject: stackdepot: respect __GFP_NOLOCKDEP allocation flag
+Date: Thu, 18 Apr 2024 16:11:33 +0200
+
+If stack_depot_save_flags() allocates memory it always drops
+__GFP_NOLOCKDEP flag. So when KASAN tries to track __GFP_NOLOCKDEP
+allocation we may end up with lockdep splat like bellow:
+
+======================================================
+ WARNING: possible circular locking dependency detected
+ 6.9.0-rc3+ #49 Not tainted
+ ------------------------------------------------------
+ kswapd0/149 is trying to acquire lock:
+ ffff88811346a920
+(&xfs_nondir_ilock_class){++++}-{4:4}, at: xfs_reclaim_inode+0x3ac/0x590
+[xfs]
+
+ but task is already holding lock:
+ ffffffff8bb33100 (fs_reclaim){+.+.}-{0:0}, at:
+balance_pgdat+0x5d9/0xad0
+
+ which lock already depends on the new lock.
+
+ the existing dependency chain (in reverse order) is:
+ -> #1 (fs_reclaim){+.+.}-{0:0}:
+ __lock_acquire+0x7da/0x1030
+ lock_acquire+0x15d/0x400
+ fs_reclaim_acquire+0xb5/0x100
+ prepare_alloc_pages.constprop.0+0xc5/0x230
+ __alloc_pages+0x12a/0x3f0
+ alloc_pages_mpol+0x175/0x340
+ stack_depot_save_flags+0x4c5/0x510
+ kasan_save_stack+0x30/0x40
+ kasan_save_track+0x10/0x30
+ __kasan_slab_alloc+0x83/0x90
+ kmem_cache_alloc+0x15e/0x4a0
+ __alloc_object+0x35/0x370
+ __create_object+0x22/0x90
+ __kmalloc_node_track_caller+0x477/0x5b0
+ krealloc+0x5f/0x110
+ xfs_iext_insert_raw+0x4b2/0x6e0 [xfs]
+ xfs_iext_insert+0x2e/0x130 [xfs]
+ xfs_iread_bmbt_block+0x1a9/0x4d0 [xfs]
+ xfs_btree_visit_block+0xfb/0x290 [xfs]
+ xfs_btree_visit_blocks+0x215/0x2c0 [xfs]
+ xfs_iread_extents+0x1a2/0x2e0 [xfs]
+ xfs_buffered_write_iomap_begin+0x376/0x10a0 [xfs]
+ iomap_iter+0x1d1/0x2d0
+ iomap_file_buffered_write+0x120/0x1a0
+ xfs_file_buffered_write+0x128/0x4b0 [xfs]
+ vfs_write+0x675/0x890
+ ksys_write+0xc3/0x160
+ do_syscall_64+0x94/0x170
+ entry_SYSCALL_64_after_hwframe+0x71/0x79
+
+Always preserve __GFP_NOLOCKDEP to fix this.
+
+Link: https://lkml.kernel.org/r/20240418141133.22950-1-ryabinin.a.a@gmail.com
+Fixes: cd11016e5f52 ("mm, kasan: stackdepot implementation. Enable stackdepot for SLAB")
+Signed-off-by: Andrey Ryabinin <ryabinin.a.a@gmail.com>
+Reported-by: Xiubo Li <xiubli@redhat.com>
+Closes: https://lore.kernel.org/all/a0caa289-ca02-48eb-9bf2-d86fd47b71f4@redhat.com/
+Reported-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
+Closes: https://lore.kernel.org/all/f9ff999a-e170-b66b-7caf-293f2b147ac2@opensource.wdc.com/
+Suggested-by: Dave Chinner <david@fromorbit.com>
+Cc: Christoph Hellwig <hch@infradead.org>
+Cc: Alexander Potapenko <glider@google.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+---
+
+ lib/stackdepot.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/lib/stackdepot.c~stackdepot-respect-__gfp_nolockdep-allocation-flag
++++ a/lib/stackdepot.c
+@@ -627,10 +627,10 @@ depot_stack_handle_t stack_depot_save_fl
+ /*
+ * Zero out zone modifiers, as we don't have specific zone
+ * requirements. Keep the flags related to allocation in atomic
+- * contexts and I/O.
++ * contexts, I/O, nolockdep.
+ */
+ alloc_flags &= ~GFP_ZONEMASK;
+- alloc_flags &= (GFP_ATOMIC | GFP_KERNEL);
++ alloc_flags &= (GFP_ATOMIC | GFP_KERNEL | __GFP_NOLOCKDEP);
+ alloc_flags |= __GFP_NOWARN;
+ page = alloc_pages(alloc_flags, DEPOT_POOL_ORDER);
+ if (page)
+_
diff --git a/pc/crash-add-prefix-for-crash-dumping-messages.pc b/pc/crash-add-prefix-for-crash-dumping-messages.pc
new file mode 100644
index 000000000..76b71fde8
--- /dev/null
+++ b/pc/crash-add-prefix-for-crash-dumping-messages.pc
@@ -0,0 +1,2 @@
+kernel/crash_core.c
+kernel/crash_reserve.c
diff --git a/pc/devel-series b/pc/devel-series
index 96756605d..c7370b154 100644
--- a/pc/devel-series
+++ b/pc/devel-series
@@ -109,10 +109,24 @@ selftests-harness-remove-use-of-line_max-fix-fix-fix.patch
#
selftests-mm-fix-unused-and-uninitialized-variable-warning.patch
#
-null-pointer-dereference-while-shrinking-zswap.patch
#
mm-hugetlb-fix-missing-hugetlb_lock-for-resv-uncharge.patch
#
+mm-create-folio_flag_false-and-folio_type_ops-macros.patch
+mm-support-page_mapcount-on-page_has_type-pages.patch
+mm-turn-folio_test_hugetlb-into-a-pagetype.patch
+mm-turn-folio_test_hugetlb-into-a-pagetype-fix.patch
+#
+#mm-zswap-fix-shrinker-null-crash-with-cgroup_disable=memory.patch: https://lkml.kernel.org/r/CAJD7tkaPMQqQtfxcLWraz-vnbAxZKxuJRJ7vKuDOCCXtpBSF1A@mail.gmail.com
+mm-zswap-fix-shrinker-null-crash-with-cgroup_disable=memory.patch
+#
+#hugetlb-check-for-anon_vma-prior-to-folio-allocation.patch: syzbot testing
+hugetlb-check-for-anon_vma-prior-to-folio-allocation.patch
+#
+stackdepot-respect-__gfp_nolockdep-allocation-flag.patch
+#
+init-fix-allocated-page-overlapping-with-ptr_err.patch
+#
### hfe
#
#ENDBRANCH mm-hotfixes-unstable
@@ -234,11 +248,7 @@ mm-change-inlined-allocation-helpers-to-account-at-the-call-site.patch
#
mm-always-initialise-folio-_deferred_list.patch
mm-always-initialise-folio-_deferred_list-fix.patch
-mm-create-folio_flag_false-and-folio_type_ops-macros.patch
mm-remove-folio_prep_large_rmappable.patch
-mm-support-page_mapcount-on-page_has_type-pages.patch
-mm-turn-folio_test_hugetlb-into-a-pagetype.patch
-mm-turn-folio_test_hugetlb-into-a-pagetype-fix.patch
mm-remove-a-call-to-compound_head-from-is_page_hwpoison.patch
#mm-free-up-pg_slab.patch: check review https://lkml.kernel.org/r/202403312344.c0d273ab-oliver.sang@intel.com
mm-free-up-pg_slab.patch
@@ -606,10 +616,12 @@ mm-filemap-batch-mm-counter-updating-in-filemap_map_pages.patch
#
mm-page_alloc-allowing-mthp-compaction-to-capture-the-freed-page-directly.patch
#
+#mseal-wire-up-mseal-syscall.patch: https://lkml.kernel.org/r/CAJuCfpFLwJg4n7wPpT+u9vC4XHoLE_BPPZ0tDKf7W45hGky4_Q@mail.gmail.com
mseal-wire-up-mseal-syscall.patch
mseal-add-mseal-syscall.patch
selftest-mm-mseal-memory-sealing.patch
mseal-add-documentation.patch
+#selftest-mm-mseal-read-only-elf-memory-segment.patch: https://lkml.kernel.org/r/CA+G9fYvacWNZsmizotfcwD35xBq0999_EAV0wZgwjdi46yivgg@mail.gmail.com
selftest-mm-mseal-read-only-elf-memory-segment.patch
selftest-mm-mseal-read-only-elf-memory-segment-fix.patch
#
@@ -645,6 +657,15 @@ mm-hugetlb-assert-hugetlb_lock-in-__hugetlb_cgroup_commit_charge.patch
#
mm-page_table_check-support-userfault-wr-protect-entries.patch
#
+mm-huge_memory-improve-split_huge_page_to_list_to_order-return-value-documentation.patch
+#
+mm-swapfile-check-usable-swap-device-in-__folio_throttle_swaprate.patch
+#
+mm-madvise-introduce-clear_young_dirty_ptes-batch-helper.patch
+mm-arm64-override-clear_young_dirty_ptes-batch-helper.patch
+mm-memory-add-any_dirty-optional-pointer-to-folio_pte_batch.patch
+mm-madvise-optimize-lazyfreeing-with-mthp-in-madvise_free.patch
+#
#
#
#
@@ -797,4 +818,6 @@ selftests-exec-make-binaries-position-independent.patch
#
cpumask-delete-unused-reset_cpu_possible_mask.patch
#
+crash-add-prefix-for-crash-dumping-messages.patch
+#
#ENDBRANCH mm-nonmm-unstable
diff --git a/pc/hugetlb-check-for-anon_vma-prior-to-folio-allocation.pc b/pc/hugetlb-check-for-anon_vma-prior-to-folio-allocation.pc
new file mode 100644
index 000000000..6dc98425d
--- /dev/null
+++ b/pc/hugetlb-check-for-anon_vma-prior-to-folio-allocation.pc
@@ -0,0 +1 @@
+mm/hugetlb.c
diff --git a/pc/init-fix-allocated-page-overlapping-with-ptr_err.pc b/pc/init-fix-allocated-page-overlapping-with-ptr_err.pc
new file mode 100644
index 000000000..1b7210806
--- /dev/null
+++ b/pc/init-fix-allocated-page-overlapping-with-ptr_err.pc
@@ -0,0 +1 @@
+init/main.c
diff --git a/pc/mm-arm64-override-clear_young_dirty_ptes-batch-helper.pc b/pc/mm-arm64-override-clear_young_dirty_ptes-batch-helper.pc
new file mode 100644
index 000000000..ba3c28b5d
--- /dev/null
+++ b/pc/mm-arm64-override-clear_young_dirty_ptes-batch-helper.pc
@@ -0,0 +1,2 @@
+arch/arm64/include/asm/pgtable.h
+arch/arm64/mm/contpte.c
diff --git a/pc/mm-huge_memory-improve-split_huge_page_to_list_to_order-return-value-documentation.pc b/pc/mm-huge_memory-improve-split_huge_page_to_list_to_order-return-value-documentation.pc
new file mode 100644
index 000000000..b35bccbe3
--- /dev/null
+++ b/pc/mm-huge_memory-improve-split_huge_page_to_list_to_order-return-value-documentation.pc
@@ -0,0 +1 @@
+mm/huge_memory.c
diff --git a/pc/mm-madvise-introduce-clear_young_dirty_ptes-batch-helper.pc b/pc/mm-madvise-introduce-clear_young_dirty_ptes-batch-helper.pc
new file mode 100644
index 000000000..006118fb2
--- /dev/null
+++ b/pc/mm-madvise-introduce-clear_young_dirty_ptes-batch-helper.pc
@@ -0,0 +1,3 @@
+include/linux/mm_types.h
+include/linux/pgtable.h
+mm/madvise.c
diff --git a/pc/mm-madvise-optimize-lazyfreeing-with-mthp-in-madvise_free.pc b/pc/mm-madvise-optimize-lazyfreeing-with-mthp-in-madvise_free.pc
new file mode 100644
index 000000000..74d58a564
--- /dev/null
+++ b/pc/mm-madvise-optimize-lazyfreeing-with-mthp-in-madvise_free.pc
@@ -0,0 +1 @@
+mm/madvise.c
diff --git a/pc/mm-memory-add-any_dirty-optional-pointer-to-folio_pte_batch.pc b/pc/mm-memory-add-any_dirty-optional-pointer-to-folio_pte_batch.pc
new file mode 100644
index 000000000..8491d45ba
--- /dev/null
+++ b/pc/mm-memory-add-any_dirty-optional-pointer-to-folio_pte_batch.pc
@@ -0,0 +1,3 @@
+mm/internal.h
+mm/madvise.c
+mm/memory.c
diff --git a/pc/mm-swapfile-check-usable-swap-device-in-__folio_throttle_swaprate.pc b/pc/mm-swapfile-check-usable-swap-device-in-__folio_throttle_swaprate.pc
new file mode 100644
index 000000000..b6b7df785
--- /dev/null
+++ b/pc/mm-swapfile-check-usable-swap-device-in-__folio_throttle_swaprate.pc
@@ -0,0 +1 @@
+mm/swapfile.c
diff --git a/pc/null-pointer-dereference-while-shrinking-zswap.pc b/pc/mm-zswap-fix-shrinker-null-crash-with-cgroup_disable=memory.pc
index 7f1f05d5c..7f1f05d5c 100644
--- a/pc/null-pointer-dereference-while-shrinking-zswap.pc
+++ b/pc/mm-zswap-fix-shrinker-null-crash-with-cgroup_disable=memory.pc
diff --git a/pc/stackdepot-respect-__gfp_nolockdep-allocation-flag.pc b/pc/stackdepot-respect-__gfp_nolockdep-allocation-flag.pc
new file mode 100644
index 000000000..0e968ad16
--- /dev/null
+++ b/pc/stackdepot-respect-__gfp_nolockdep-allocation-flag.pc
@@ -0,0 +1 @@
+lib/stackdepot.c
diff --git a/txt/crash-add-prefix-for-crash-dumping-messages.txt b/txt/crash-add-prefix-for-crash-dumping-messages.txt
new file mode 100644
index 000000000..bf92d3291
--- /dev/null
+++ b/txt/crash-add-prefix-for-crash-dumping-messages.txt
@@ -0,0 +1,17 @@
+From: Baoquan He <bhe@redhat.com>
+Subject: crash: add prefix for crash dumping messages
+Date: Thu, 18 Apr 2024 11:58:43 +0800
+
+Add pr_fmt() to kernel/crash_core.c to add the module name to debugging
+message printed as prefix.
+
+And also add prefix 'crashkernel:' to two lines of message printing code
+in kernel/crash_reserve.c. In kernel/crash_reserve.c, almost all
+debugging messages have 'crashkernel:' prefix or there's keyword
+crashkernel at the beginning or in the middle, adding pr_fmt() makes it
+redundant.
+
+Link: https://lkml.kernel.org/r/20240418035843.1562887-1-bhe@redhat.com
+Signed-off-by: Baoquan He <bhe@redhat.com>
+Cc: Dave Young <dyoung@redhat.com>
+Cc: Jiri Slaby <jirislaby@kernel.org>
diff --git a/txt/hugetlb-check-for-anon_vma-prior-to-folio-allocation.txt b/txt/hugetlb-check-for-anon_vma-prior-to-folio-allocation.txt
new file mode 100644
index 000000000..a21998914
--- /dev/null
+++ b/txt/hugetlb-check-for-anon_vma-prior-to-folio-allocation.txt
@@ -0,0 +1,19 @@
+From: "Vishal Moola (Oracle)" <vishal.moola@gmail.com>
+Subject: hugetlb: check for anon_vma prior to folio allocation
+Date: Mon, 15 Apr 2024 14:17:47 -0700
+
+Commit 9acad7ba3e25 ("hugetlb: use vmf_anon_prepare() instead of
+anon_vma_prepare()") may bailout after allocating a folio if we do not
+hold the mmap lock. When this occurs, vmf_anon_prepare() will release the
+vma lock. Hugetlb then attempts to call restore_reserve_on_error(), which
+depends on the vma lock being held.
+
+We can move vmf_anon_prepare() prior to the folio allocation in order to
+avoid calling restore_reserve_on_error() without the vma lock.
+
+Link: https://lkml.kernel.org/r/ZiFqSrSRLhIV91og@fedora
+Fixes: 9acad7ba3e25 ("hugetlb: use vmf_anon_prepare() instead of anon_vma_prepare()")
+Reported-by: syzbot+ad1b592fc4483655438b@syzkaller.appspotmail.com
+Signed-off-by: Vishal Moola (Oracle) <vishal.moola@gmail.com>
+Cc: Muchun Song <muchun.song@linux.dev>
+Cc: <stable@vger.kernel.org>
diff --git a/txt/init-fix-allocated-page-overlapping-with-ptr_err.txt b/txt/init-fix-allocated-page-overlapping-with-ptr_err.txt
new file mode 100644
index 000000000..5d68e1b92
--- /dev/null
+++ b/txt/init-fix-allocated-page-overlapping-with-ptr_err.txt
@@ -0,0 +1,49 @@
+From: Nam Cao <namcao@linutronix.de>
+Subject: init: fix allocated page overlapping with PTR_ERR
+Date: Thu, 18 Apr 2024 12:29:43 +0200
+
+There is nothing preventing kernel memory allocators from allocating a
+page that overlaps with PTR_ERR(), except for architecture-specific code
+that setup memblock.
+
+It was discovered that RISCV architecture doesn't setup memblock corectly,
+leading to a page overlapping with PTR_ERR() being allocated, and
+subsequently crashing the kernel (link in Close: )
+
+The reported crash has nothing to do with PTR_ERR(): the last page (at
+address 0xfffff000) being allocated leads to an unexpected arithmetic
+overflow in ext4; but still, this page shouldn't be allocated in the first
+place.
+
+Because PTR_ERR() is an architecture-independent thing, we shouldn't ask
+every single architecture to set this up. There may be other
+architectures beside RISCV that have the same problem.
+
+Fix this once and for all by reserving the physical memory page that may
+be mapped to the last virtual memory page as part of low memory.
+
+Unfortunately, this means if there is actual memory at this reserved
+location, that memory will become inaccessible. However, if this page is
+not reserved, it can only be accessed as high memory, so this doesn't
+matter if high memory is not supported. Even if high memory is supported,
+it is still only one page.
+
+Closes: https://lore.kernel.org/linux-riscv/878r1ibpdn.fsf@all.your.base.are.belong.to.us
+Link: https://lkml.kernel.org/r/20240418102943.180510-1-namcao@linutronix.de
+Signed-off-by: Nam Cao <namcao@linutronix.de>
+Reported-by: Björn Töpel <bjorn@kernel.org>
+Tested-by: Björn Töpel <bjorn@kernel.org>
+Reviewed-by: Mike Rapoport (IBM) <rppt@kernel.org>
+Cc: Andreas Dilger <adilger@dilger.ca>
+Cc: Arnd Bergmann <arnd@arndb.de>
+Cc: Changbin Du <changbin.du@huawei.com>
+Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
+Cc: Geert Uytterhoeven <geert+renesas@glider.be>
+Cc: Ingo Molnar <mingo@kernel.org>
+Cc: Krister Johansen <kjlx@templeofstupid.com>
+Cc: Luis Chamberlain <mcgrof@kernel.org>
+Cc: Nick Desaulniers <ndesaulniers@google.com>
+Cc: Stephen Rothwell <sfr@canb.auug.org.au>
+Cc: Tejun Heo <tj@kernel.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: <stable@vger.kernel.org>
diff --git a/txt/mm-arm64-override-clear_young_dirty_ptes-batch-helper.txt b/txt/mm-arm64-override-clear_young_dirty_ptes-batch-helper.txt
new file mode 100644
index 000000000..43fe8f8d4
--- /dev/null
+++ b/txt/mm-arm64-override-clear_young_dirty_ptes-batch-helper.txt
@@ -0,0 +1,23 @@
+From: Lance Yang <ioworker0@gmail.com>
+Subject: mm/arm64: override clear_young_dirty_ptes() batch helper
+Date: Thu, 18 Apr 2024 21:44:33 +0800
+
+The per-pte get_and_clear/modify/set approach would result in
+unfolding/refolding for contpte mappings on arm64. So we need to override
+clear_young_dirty_ptes() for arm64 to avoid it.
+
+Link: https://lkml.kernel.org/r/20240418134435.6092-3-ioworker0@gmail.com
+Signed-off-by: Lance Yang <ioworker0@gmail.com>
+Suggested-by: Barry Song <21cnbao@gmail.com>
+Suggested-by: Ryan Roberts <ryan.roberts@arm.com>
+Reviewed-by: Ryan Roberts <ryan.roberts@arm.com>
+Cc: David Hildenbrand <david@redhat.com>
+Cc: Jeff Xie <xiehuan09@gmail.com>
+Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Minchan Kim <minchan@kernel.org>
+Cc: Muchun Song <songmuchun@bytedance.com>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: Yin Fengwei <fengwei.yin@intel.com>
+Cc: Zach O'Keefe <zokeefe@google.com>
diff --git a/txt/mm-create-folio_flag_false-and-folio_type_ops-macros.txt b/txt/mm-create-folio_flag_false-and-folio_type_ops-macros.txt
index acacd697a..46a9f7b37 100644
--- a/txt/mm-create-folio_flag_false-and-folio_type_ops-macros.txt
+++ b/txt/mm-create-folio_flag_false-and-folio_type_ops-macros.txt
@@ -7,9 +7,11 @@ FOLIO_FLAG_FALSE from PAGEFLAG_FALSE and FOLIO_TYPE_OPS from
PAGE_TYPE_OPS.
Link: https://lkml.kernel.org/r/20240321142448.1645400-3-willy@infradead.org
+Fixes: 9c5ccf2db04b ("mm: remove HUGETLB_PAGE_DTOR")
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: David Hildenbrand <david@redhat.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Oscar Salvador <osalvador@suse.de>
+Cc: <stable@vger.kernel.org>
diff --git a/txt/mm-huge_memory-improve-split_huge_page_to_list_to_order-return-value-documentation.txt b/txt/mm-huge_memory-improve-split_huge_page_to_list_to_order-return-value-documentation.txt
new file mode 100644
index 000000000..528263ad1
--- /dev/null
+++ b/txt/mm-huge_memory-improve-split_huge_page_to_list_to_order-return-value-documentation.txt
@@ -0,0 +1,15 @@
+From: David Hildenbrand <david@redhat.com>
+Subject: mm/huge_memory: improve split_huge_page_to_list_to_order() return value documentation
+Date: Thu, 18 Apr 2024 17:18:34 +0200
+
+The documentation is wrong and relying on it almost resulted in BUGs in
+new callers: we return -EAGAIN on unexpected folio references, not -EBUSY.
+
+Let's fix that and also document which other return values we can
+currently see and why they could happen.
+
+Link: https://lkml.kernel.org/r/20240418151834.216557-1-david@redhat.com
+Signed-off-by: David Hildenbrand <david@redhat.com>
+Cc: John Hubbard <jhubbard@nvidia.com>
+Cc: Zi Yan <ziy@nvidia.com>
+Cc: Matthew Wilcox <willy@infradead.org>
diff --git a/txt/mm-madvise-introduce-clear_young_dirty_ptes-batch-helper.txt b/txt/mm-madvise-introduce-clear_young_dirty_ptes-batch-helper.txt
new file mode 100644
index 000000000..9d5731b50
--- /dev/null
+++ b/txt/mm-madvise-introduce-clear_young_dirty_ptes-batch-helper.txt
@@ -0,0 +1,61 @@
+From: Lance Yang <ioworker0@gmail.com>
+Subject: mm/madvise: introduce clear_young_dirty_ptes() batch helper
+Date: Thu, 18 Apr 2024 21:44:32 +0800
+
+Patch series "mm/madvise: enhance lazyfreeing with mTHP in madvise_free",
+v10.
+
+This patchset adds support for lazyfreeing multi-size THP (mTHP) without
+needing to first split the large folio via split_folio(). However, we
+still need to split a large folio that is not fully mapped within the
+target range.
+
+If a large folio is locked or shared, or if we fail to split it, we just
+leave it in place and advance to the next PTE in the range. But note that
+the behavior is changed; previously, any failure of this sort would cause
+the entire operation to give up. As large folios become more common,
+sticking to the old way could result in wasted opportunities.
+
+Performance Testing
+===================
+
+On an Intel I5 CPU, lazyfreeing a 1GiB VMA backed by PTE-mapped folios of
+the same size results in the following runtimes for madvise(MADV_FREE) in
+seconds (shorter is better):
+
+Folio Size | Old | New | Change
+------------------------------------------
+ 4KiB | 0.590251 | 0.590259 | 0%
+ 16KiB | 2.990447 | 0.185655 | -94%
+ 32KiB | 2.547831 | 0.104870 | -95%
+ 64KiB | 2.457796 | 0.052812 | -97%
+ 128KiB | 2.281034 | 0.032777 | -99%
+ 256KiB | 2.230387 | 0.017496 | -99%
+ 512KiB | 2.189106 | 0.010781 | -99%
+ 1024KiB | 2.183949 | 0.007753 | -99%
+ 2048KiB | 0.002799 | 0.002804 | 0%
+
+
+This patch (of 4):
+
+This commit introduces clear_young_dirty_ptes() to replace mkold_ptes().
+By doing so, we can use the same function for both use cases
+(madvise_pageout and madvise_free), and it also provides the flexibility
+to only clear the dirty flag in the future if needed.
+
+Link: https://lkml.kernel.org/r/20240418134435.6092-1-ioworker0@gmail.com
+Link: https://lkml.kernel.org/r/20240418134435.6092-2-ioworker0@gmail.com
+Signed-off-by: Lance Yang <ioworker0@gmail.com>
+Suggested-by: Ryan Roberts <ryan.roberts@arm.com>
+Acked-by: David Hildenbrand <david@redhat.com>
+Reviewed-by: Ryan Roberts <ryan.roberts@arm.com>
+Cc: Barry Song <21cnbao@gmail.com>
+Cc: Jeff Xie <xiehuan09@gmail.com>
+Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Minchan Kim <minchan@kernel.org>
+Cc: Muchun Song <songmuchun@bytedance.com>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: Yin Fengwei <fengwei.yin@intel.com>
+Cc: Zach O'Keefe <zokeefe@google.com>
diff --git a/txt/mm-madvise-optimize-lazyfreeing-with-mthp-in-madvise_free.txt b/txt/mm-madvise-optimize-lazyfreeing-with-mthp-in-madvise_free.txt
new file mode 100644
index 000000000..51f30897c
--- /dev/null
+++ b/txt/mm-madvise-optimize-lazyfreeing-with-mthp-in-madvise_free.txt
@@ -0,0 +1,47 @@
+From: Lance Yang <ioworker0@gmail.com>
+Subject: mm/madvise: optimize lazyfreeing with mTHP in madvise_free
+Date: Thu, 18 Apr 2024 21:44:35 +0800
+
+This patch optimizes lazyfreeing with PTE-mapped mTHP[1] (Inspired by
+David Hildenbrand[2]). We aim to avoid unnecessary folio splitting if the
+large folio is fully mapped within the target range.
+
+If a large folio is locked or shared, or if we fail to split it, we just
+leave it in place and advance to the next PTE in the range. But note that
+the behavior is changed; previously, any failure of this sort would cause
+the entire operation to give up. As large folios become more common,
+sticking to the old way could result in wasted opportunities.
+
+On an Intel I5 CPU, lazyfreeing a 1GiB VMA backed by PTE-mapped folios of
+the same size results in the following runtimes for madvise(MADV_FREE) in
+seconds (shorter is better):
+
+Folio Size | Old | New | Change
+------------------------------------------
+ 4KiB | 0.590251 | 0.590259 | 0%
+ 16KiB | 2.990447 | 0.185655 | -94%
+ 32KiB | 2.547831 | 0.104870 | -95%
+ 64KiB | 2.457796 | 0.052812 | -97%
+ 128KiB | 2.281034 | 0.032777 | -99%
+ 256KiB | 2.230387 | 0.017496 | -99%
+ 512KiB | 2.189106 | 0.010781 | -99%
+ 1024KiB | 2.183949 | 0.007753 | -99%
+ 2048KiB | 0.002799 | 0.002804 | 0%
+
+[1] https://lkml.kernel.org/r/20231207161211.2374093-5-ryan.roberts@arm.com
+[2] https://lore.kernel.org/linux-mm/20240214204435.167852-1-david@redhat.com
+
+Link: https://lkml.kernel.org/r/20240418134435.6092-5-ioworker0@gmail.com
+Signed-off-by: Lance Yang <ioworker0@gmail.com>
+Reviewed-by: Ryan Roberts <ryan.roberts@arm.com>
+Acked-by: David Hildenbrand <david@redhat.com>
+Cc: Barry Song <21cnbao@gmail.com>
+Cc: Jeff Xie <xiehuan09@gmail.com>
+Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Minchan Kim <minchan@kernel.org>
+Cc: Muchun Song <songmuchun@bytedance.com>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: Yin Fengwei <fengwei.yin@intel.com>
+Cc: Zach O'Keefe <zokeefe@google.com>
diff --git a/txt/mm-memory-add-any_dirty-optional-pointer-to-folio_pte_batch.txt b/txt/mm-memory-add-any_dirty-optional-pointer-to-folio_pte_batch.txt
new file mode 100644
index 000000000..5fdfd3742
--- /dev/null
+++ b/txt/mm-memory-add-any_dirty-optional-pointer-to-folio_pte_batch.txt
@@ -0,0 +1,24 @@
+From: Lance Yang <ioworker0@gmail.com>
+Subject: mm/memory: add any_dirty optional pointer to folio_pte_batch()
+Date: Thu, 18 Apr 2024 21:44:34 +0800
+
+This commit adds the any_dirty pointer as an optional parameter to
+folio_pte_batch() function. By using both the any_young and any_dirty
+pointers, madvise_free can make smarter decisions about whether to clear
+the PTEs when marking large folios as lazyfree.
+
+Link: https://lkml.kernel.org/r/20240418134435.6092-4-ioworker0@gmail.com
+Signed-off-by: Lance Yang <ioworker0@gmail.com>
+Suggested-by: David Hildenbrand <david@redhat.com>
+Acked-by: David Hildenbrand <david@redhat.com>
+Cc: Barry Song <21cnbao@gmail.com>
+Cc: Jeff Xie <xiehuan09@gmail.com>
+Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Minchan Kim <minchan@kernel.org>
+Cc: Muchun Song <songmuchun@bytedance.com>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: Ryan Roberts <ryan.roberts@arm.com>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: Yin Fengwei <fengwei.yin@intel.com>
+Cc: Zach O'Keefe <zokeefe@google.com>
diff --git a/txt/mm-page_table_check-support-userfault-wr-protect-entries.txt b/txt/mm-page_table_check-support-userfault-wr-protect-entries.txt
index 046beb6cb..180b38c6a 100644
--- a/txt/mm-page_table_check-support-userfault-wr-protect-entries.txt
+++ b/txt/mm-page_table_check-support-userfault-wr-protect-entries.txt
@@ -48,7 +48,7 @@ better now.
Link: https://lkml.kernel.org/r/20240417212549.2766883-1-peterx@redhat.com
Signed-off-by: Peter Xu <peterx@redhat.com>
-Cc: Pasha Tatashin <pasha.tatashin@soleen.com>
+Reviewed-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Nadav Amit <nadav.amit@gmail.com>
diff --git a/txt/mm-support-page_mapcount-on-page_has_type-pages.txt b/txt/mm-support-page_mapcount-on-page_has_type-pages.txt
index f226b6859..beafd94ce 100644
--- a/txt/mm-support-page_mapcount-on-page_has_type-pages.txt
+++ b/txt/mm-support-page_mapcount-on-page_has_type-pages.txt
@@ -7,9 +7,11 @@ works. It is more convenient for users to not have to filter out these
pages.
Link: https://lkml.kernel.org/r/20240321142448.1645400-5-willy@infradead.org
+Fixes: 9c5ccf2db04b ("mm: remove HUGETLB_PAGE_DTOR")
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: David Hildenbrand <david@redhat.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Oscar Salvador <osalvador@suse.de>
+Cc: <stable@vger.kernel.org>
diff --git a/txt/mm-swapfile-check-usable-swap-device-in-__folio_throttle_swaprate.txt b/txt/mm-swapfile-check-usable-swap-device-in-__folio_throttle_swaprate.txt
new file mode 100644
index 000000000..65cc6c631
--- /dev/null
+++ b/txt/mm-swapfile-check-usable-swap-device-in-__folio_throttle_swaprate.txt
@@ -0,0 +1,18 @@
+From: Kefeng Wang <wangkefeng.wang@huawei.com>
+Subject: mm: swapfile: check usable swap device in __folio_throttle_swaprate()
+Date: Thu, 18 Apr 2024 21:56:44 +0800
+
+Skip blk_cgroup_congested() if there is no usable swap device since no
+swapin/out will occur, Thereby avoid taking swap_lock. The difference is
+shown below from perf date of CoW pagefault,
+
+ perf report -g -i perf.data.swapoff | egrep "blk_cgroup_congested|__folio_throttle_swaprate"
+ 1.01% 0.16% page_fault2_pro [kernel.kallsyms] [k] __folio_throttle_swaprate
+ 0.83% 0.80% page_fault2_pro [kernel.kallsyms] [k] blk_cgroup_congested
+
+ perf report -g -i perf.data.swapon | egrep "blk_cgroup_congested|__folio_throttle_swaprate"
+ 0.15% 0.15% page_fault2_pro [kernel.kallsyms] [k] __folio_throttle_swaprate
+
+Link: https://lkml.kernel.org/r/20240418135644.2736748-1-wangkefeng.wang@huawei.com
+Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
+Cc: Tejun Heo <tj@kernel.org>
diff --git a/txt/mm-zswap-fix-shrinker-null-crash-with-cgroup_disable=memory.txt b/txt/mm-zswap-fix-shrinker-null-crash-with-cgroup_disable=memory.txt
new file mode 100644
index 000000000..09615713a
--- /dev/null
+++ b/txt/mm-zswap-fix-shrinker-null-crash-with-cgroup_disable=memory.txt
@@ -0,0 +1,35 @@
+From: Johannes Weiner <hannes@cmpxchg.org>
+Subject: mm: zswap: fix shrinker NULL crash with cgroup_disable=memory
+Date: Thu, 18 Apr 2024 08:26:28 -0400
+
+Christian reports a NULL deref in zswap that he bisected down to the zswap
+shrinker. The issue also cropped up in the bug trackers of libguestfs [1]
+and the Red Hat bugzilla [2].
+
+The problem is that when memcg is disabled with the boot time flag, the
+zswap shrinker might get called with sc->memcg == NULL. This is okay in
+many places, like the lruvec operations. But it crashes in
+memcg_page_state() - which is only used due to the non-node accounting of
+cgroup's the zswap memory to begin with.
+
+Nhat spotted that the memcg can be NULL in the memcg-disabled case, and I
+was then able to reproduce the crash locally as well.
+
+[1] https://github.com/libguestfs/libguestfs/issues/139
+[2] https://bugzilla.redhat.com/show_bug.cgi?id=2275252
+
+Link: https://lkml.kernel.org/r/20240418124043.GC1055428@cmpxchg.org
+Link: https://lkml.kernel.org/r/20240417143324.GA1055428@cmpxchg.org
+Fixes: b5ba474f3f51 ("zswap: shrink zswap pool based on memory pressure")
+Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
+Reported-by: Christian Heusel <christian@heusel.eu>
+Debugged-by: Nhat Pham <nphamcs@gmail.com>
+Suggested-by: Nhat Pham <nphamcs@gmail.com>
+Tested-by: Christian Heusel <christian@heusel.eu>
+Acked-by: Yosry Ahmed <yosryahmed@google.com>
+Cc: Chengming Zhou <chengming.zhou@linux.dev>
+Cc: Dan Streetman <ddstreet@ieee.org>
+Cc: Richard W.M. Jones <rjones@redhat.com>
+Cc: Seth Jennings <sjenning@redhat.com>
+Cc: Vitaly Wool <vitaly.wool@konsulko.com>
+Cc: <stable@vger.kernel.org> [v6.8]
diff --git a/txt/null-pointer-dereference-while-shrinking-zswap.txt b/txt/old/null-pointer-dereference-while-shrinking-zswap.txt
index f437585b2..f437585b2 100644
--- a/txt/null-pointer-dereference-while-shrinking-zswap.txt
+++ b/txt/old/null-pointer-dereference-while-shrinking-zswap.txt
diff --git a/txt/stackdepot-respect-__gfp_nolockdep-allocation-flag.txt b/txt/stackdepot-respect-__gfp_nolockdep-allocation-flag.txt
new file mode 100644
index 000000000..824f42ed9
--- /dev/null
+++ b/txt/stackdepot-respect-__gfp_nolockdep-allocation-flag.txt
@@ -0,0 +1,68 @@
+From: Andrey Ryabinin <ryabinin.a.a@gmail.com>
+Subject: stackdepot: respect __GFP_NOLOCKDEP allocation flag
+Date: Thu, 18 Apr 2024 16:11:33 +0200
+
+If stack_depot_save_flags() allocates memory it always drops
+__GFP_NOLOCKDEP flag. So when KASAN tries to track __GFP_NOLOCKDEP
+allocation we may end up with lockdep splat like bellow:
+
+======================================================
+ WARNING: possible circular locking dependency detected
+ 6.9.0-rc3+ #49 Not tainted
+ ------------------------------------------------------
+ kswapd0/149 is trying to acquire lock:
+ ffff88811346a920
+(&xfs_nondir_ilock_class){++++}-{4:4}, at: xfs_reclaim_inode+0x3ac/0x590
+[xfs]
+
+ but task is already holding lock:
+ ffffffff8bb33100 (fs_reclaim){+.+.}-{0:0}, at:
+balance_pgdat+0x5d9/0xad0
+
+ which lock already depends on the new lock.
+
+ the existing dependency chain (in reverse order) is:
+ -> #1 (fs_reclaim){+.+.}-{0:0}:
+ __lock_acquire+0x7da/0x1030
+ lock_acquire+0x15d/0x400
+ fs_reclaim_acquire+0xb5/0x100
+ prepare_alloc_pages.constprop.0+0xc5/0x230
+ __alloc_pages+0x12a/0x3f0
+ alloc_pages_mpol+0x175/0x340
+ stack_depot_save_flags+0x4c5/0x510
+ kasan_save_stack+0x30/0x40
+ kasan_save_track+0x10/0x30
+ __kasan_slab_alloc+0x83/0x90
+ kmem_cache_alloc+0x15e/0x4a0
+ __alloc_object+0x35/0x370
+ __create_object+0x22/0x90
+ __kmalloc_node_track_caller+0x477/0x5b0
+ krealloc+0x5f/0x110
+ xfs_iext_insert_raw+0x4b2/0x6e0 [xfs]
+ xfs_iext_insert+0x2e/0x130 [xfs]
+ xfs_iread_bmbt_block+0x1a9/0x4d0 [xfs]
+ xfs_btree_visit_block+0xfb/0x290 [xfs]
+ xfs_btree_visit_blocks+0x215/0x2c0 [xfs]
+ xfs_iread_extents+0x1a2/0x2e0 [xfs]
+ xfs_buffered_write_iomap_begin+0x376/0x10a0 [xfs]
+ iomap_iter+0x1d1/0x2d0
+ iomap_file_buffered_write+0x120/0x1a0
+ xfs_file_buffered_write+0x128/0x4b0 [xfs]
+ vfs_write+0x675/0x890
+ ksys_write+0xc3/0x160
+ do_syscall_64+0x94/0x170
+ entry_SYSCALL_64_after_hwframe+0x71/0x79
+
+Always preserve __GFP_NOLOCKDEP to fix this.
+
+Link: https://lkml.kernel.org/r/20240418141133.22950-1-ryabinin.a.a@gmail.com
+Fixes: cd11016e5f52 ("mm, kasan: stackdepot implementation. Enable stackdepot for SLAB")
+Signed-off-by: Andrey Ryabinin <ryabinin.a.a@gmail.com>
+Reported-by: Xiubo Li <xiubli@redhat.com>
+Closes: https://lore.kernel.org/all/a0caa289-ca02-48eb-9bf2-d86fd47b71f4@redhat.com/
+Reported-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
+Closes: https://lore.kernel.org/all/f9ff999a-e170-b66b-7caf-293f2b147ac2@opensource.wdc.com/
+Suggested-by: Dave Chinner <david@fromorbit.com>
+Cc: Christoph Hellwig <hch@infradead.org>
+Cc: Alexander Potapenko <glider@google.com>
+Cc: <stable@vger.kernel.org>