foo

author: Andrew Morton <akpm@linux-foundation.org> 2024-04-18 13:42:04 -0700
committer: Andrew Morton <akpm@linux-foundation.org> 2024-04-18 13:42:04 -0700
commit: 578a2c2d8e5c25cc32ea3ab3515c903b7c45ba04 (patch)
tree: 44ef1c7e567c0114204658f181b4dd971daede14
parent: c65c0e14247748216c988a1b18897d1258afaaf7 (diff)
download: 25-new-578a2c2d8e5c25cc32ea3ab3515c903b7c45ba04.tar.gz
40 files changed, 1548 insertions, 16 deletions
diff --git a/patches/crash-add-prefix-for-crash-dumping-messages.patch b/patches/crash-add-prefix-for-crash-dumping-messages.patch
new file mode 100644
index 000000000..8e0449aa0
--- /dev/null
+++ b/patches/crash-add-prefix-for-crash-dumping-messages.patch
@@ -0,0 +1,56 @@
+From: Baoquan He <bhe@redhat.com>
+Subject: crash: add prefix for crash dumping messages
+Date: Thu, 18 Apr 2024 11:58:43 +0800
+
+Add pr_fmt() to kernel/crash_core.c to add the module name to debugging
+message printed as prefix.
+
+And also add prefix 'crashkernel:' to two lines of message printing code
+in kernel/crash_reserve.c. In kernel/crash_reserve.c, almost all
+debugging messages have 'crashkernel:' prefix or there's keyword
+crashkernel at the beginning or in the middle, adding pr_fmt() makes it
+redundant.
+
+Link: https://lkml.kernel.org/r/20240418035843.1562887-1-bhe@redhat.com
+Signed-off-by: Baoquan He <bhe@redhat.com>
+Cc: Dave Young <dyoung@redhat.com>
+Cc: Jiri Slaby <jirislaby@kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+---
+
+ kernel/crash_core.c    |    2 ++
+ kernel/crash_reserve.c |    4 ++--
+ 2 files changed, 4 insertions(+), 2 deletions(-)
+
+--- a/kernel/crash_core.c~crash-add-prefix-for-crash-dumping-messages
++++ a/kernel/crash_core.c
+@@ -4,6 +4,8 @@
+  * Copyright (C) 2002-2004 Eric Biederman  <ebiederm@xmission.com>
+  */
+ 
++#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
++
+ #include <linux/buildid.h>
+ #include <linux/init.h>
+ #include <linux/utsname.h>
+--- a/kernel/crash_reserve.c~crash-add-prefix-for-crash-dumping-messages
++++ a/kernel/crash_reserve.c
+@@ -109,7 +109,7 @@ static int __init parse_crashkernel_mem(
+ 
+ 		size = memparse(cur, &tmp);
+ 		if (cur == tmp) {
+-			pr_warn("Memory value expected\n");
++			pr_warn("crashkernel: Memory value expected\n");
+ 			return -EINVAL;
+ 		}
+ 		cur = tmp;
+@@ -132,7 +132,7 @@ static int __init parse_crashkernel_mem(
+ 			cur++;
+ 			*crash_base = memparse(cur, &tmp);
+ 			if (cur == tmp) {
+-				pr_warn("Memory value expected after '@'\n");
++				pr_warn("crahskernel: Memory value expected after '@'\n");
+ 				return -EINVAL;
+ 			}
+ 		}
+_
diff --git a/patches/hugetlb-check-for-anon_vma-prior-to-folio-allocation.patch b/patches/hugetlb-check-for-anon_vma-prior-to-folio-allocation.patch
new file mode 100644
index 000000000..8448eb11a
--- /dev/null
+++ b/patches/hugetlb-check-for-anon_vma-prior-to-folio-allocation.patch
@@ -0,0 +1,58 @@
+From: "Vishal Moola (Oracle)" <vishal.moola@gmail.com>
+Subject: hugetlb: check for anon_vma prior to folio allocation
+Date: Mon, 15 Apr 2024 14:17:47 -0700
+
+Commit 9acad7ba3e25 ("hugetlb: use vmf_anon_prepare() instead of
+anon_vma_prepare()") may bailout after allocating a folio if we do not
+hold the mmap lock.  When this occurs, vmf_anon_prepare() will release the
+vma lock.  Hugetlb then attempts to call restore_reserve_on_error(), which
+depends on the vma lock being held.
+
+We can move vmf_anon_prepare() prior to the folio allocation in order to
+avoid calling restore_reserve_on_error() without the vma lock.
+
+Link: https://lkml.kernel.org/r/ZiFqSrSRLhIV91og@fedora
+Fixes: 9acad7ba3e25 ("hugetlb: use vmf_anon_prepare() instead of anon_vma_prepare()")
+Reported-by: syzbot+ad1b592fc4483655438b@syzkaller.appspotmail.com
+Signed-off-by: Vishal Moola (Oracle) <vishal.moola@gmail.com>
+Cc: Muchun Song <muchun.song@linux.dev>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+---
+
+ mm/hugetlb.c |   11 +++++++----
+ 1 file changed, 7 insertions(+), 4 deletions(-)
+
+--- a/mm/hugetlb.c~hugetlb-check-for-anon_vma-prior-to-folio-allocation
++++ a/mm/hugetlb.c
+@@ -6261,6 +6261,12 @@ static vm_fault_t hugetlb_no_page(struct
+ 							VM_UFFD_MISSING);
+ 		}
+ 
++		if (!(vma->vm_flags & VM_MAYSHARE)) {
++			ret = vmf_anon_prepare(vmf);
++			if (unlikely(ret))
++				goto out;
++		}
++
+ 		folio = alloc_hugetlb_folio(vma, haddr, 0);
+ 		if (IS_ERR(folio)) {
+ 			/*
+@@ -6297,15 +6303,12 @@ static vm_fault_t hugetlb_no_page(struct
+ 				 */
+ 				restore_reserve_on_error(h, vma, haddr, folio);
+ 				folio_put(folio);
++				ret = VM_FAULT_SIGBUS;
+ 				goto out;
+ 			}
+ 			new_pagecache_folio = true;
+ 		} else {
+ 			folio_lock(folio);
+-
+-			ret = vmf_anon_prepare(vmf);
+-			if (unlikely(ret))
+-				goto backout_unlocked;
+ 			anon_rmap = 1;
+ 		}
+ 	} else {
+_
diff --git a/patches/hugetlb-convert-hugetlb_no_page-to-use-struct-vm_fault.patch b/patches/hugetlb-convert-hugetlb_no_page-to-use-struct-vm_fault.patch
index 88611c178..8d3c318e0 100644
--- a/patches/hugetlb-convert-hugetlb_no_page-to-use-struct-vm_fault.patch
+++ b/patches/hugetlb-convert-hugetlb_no_page-to-use-struct-vm_fault.patch
@@ -64,8 +64,8 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
  				ret = 0;
  				goto out;
  			}
-@@ -6256,7 +6252,7 @@ static vm_fault_t hugetlb_no_page(struct
- 							VM_UFFD_MISSING);
+@@ -6262,7 +6258,7 @@ static vm_fault_t hugetlb_no_page(struct
+ 				goto out;
  		}
  
 -		folio = alloc_hugetlb_folio(vma, haddr, 0);
@@ -73,7 +73,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
  		if (IS_ERR(folio)) {
  			/*
  			 * Returning error will result in faulting task being
-@@ -6270,18 +6266,20 @@ static vm_fault_t hugetlb_no_page(struct
+@@ -6276,18 +6272,20 @@ static vm_fault_t hugetlb_no_page(struct
  			 * here.  Before returning error, get ptl and make
  			 * sure there really is no pte entry.
  			 */
@@ -97,7 +97,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
  			if (err) {
  				/*
  				 * err can't be -EEXIST which implies someone
-@@ -6290,7 +6288,8 @@ static vm_fault_t hugetlb_no_page(struct
+@@ -6296,7 +6294,8 @@ static vm_fault_t hugetlb_no_page(struct
  				 * to the page cache. So it's safe to call
  				 * restore_reserve_on_error() here.
  				 */
@@ -105,9 +105,9 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
 +				restore_reserve_on_error(h, vma, vmf->address,
 +							folio);
  				folio_put(folio);
+ 				ret = VM_FAULT_SIGBUS;
  				goto out;
- 			}
-@@ -6320,7 +6319,7 @@ static vm_fault_t hugetlb_no_page(struct
+@@ -6323,7 +6322,7 @@ static vm_fault_t hugetlb_no_page(struct
  			folio_unlock(folio);
  			folio_put(folio);
  			/* See comment in userfaultfd_missing() block above */
@@ -116,7 +116,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
  				ret = 0;
  				goto out;
  			}
-@@ -6335,23 +6334,23 @@ static vm_fault_t hugetlb_no_page(struct
+@@ -6338,23 +6337,23 @@ static vm_fault_t hugetlb_no_page(struct
  	 * any allocations necessary to record that reservation occur outside
  	 * the spinlock.
  	 */
@@ -146,7 +146,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
  	else
  		hugetlb_add_file_rmap(folio);
  	new_pte = make_huge_pte(vma, &folio->page, ((vma->vm_flags & VM_WRITE)
-@@ -6360,17 +6359,18 @@ static vm_fault_t hugetlb_no_page(struct
+@@ -6363,17 +6362,18 @@ static vm_fault_t hugetlb_no_page(struct
  	 * If this pte was previously wr-protected, keep it wr-protected even
  	 * if populated.
  	 */
@@ -170,7 +170,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
  
  	/*
  	 * Only set hugetlb_migratable in newly allocated pages.  Existing pages
-@@ -6387,10 +6387,10 @@ out:
+@@ -6390,10 +6390,10 @@ out:
  	return ret;
  
  backout:
@@ -183,7 +183,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
  
  	folio_unlock(folio);
  	folio_put(folio);
-@@ -6486,8 +6486,7 @@ vm_fault_t hugetlb_fault(struct mm_struc
+@@ -6489,8 +6489,7 @@ vm_fault_t hugetlb_fault(struct mm_struc
  		 * hugetlb_no_page will drop vma lock and hugetlb fault
  		 * mutex internally, which make us return immediately.
  		 */
diff --git a/patches/init-fix-allocated-page-overlapping-with-ptr_err.patch b/patches/init-fix-allocated-page-overlapping-with-ptr_err.patch
new file mode 100644
index 000000000..0e839c0ee
--- /dev/null
+++ b/patches/init-fix-allocated-page-overlapping-with-ptr_err.patch
@@ -0,0 +1,66 @@
+From: Nam Cao <namcao@linutronix.de>
+Subject: init: fix allocated page overlapping with PTR_ERR
+Date: Thu, 18 Apr 2024 12:29:43 +0200
+
+There is nothing preventing kernel memory allocators from allocating a
+page that overlaps with PTR_ERR(), except for architecture-specific code
+that setup memblock.
+
+It was discovered that RISCV architecture doesn't setup memblock corectly,
+leading to a page overlapping with PTR_ERR() being allocated, and
+subsequently crashing the kernel (link in Close: )
+
+The reported crash has nothing to do with PTR_ERR(): the last page (at
+address 0xfffff000) being allocated leads to an unexpected arithmetic
+overflow in ext4; but still, this page shouldn't be allocated in the first
+place.
+
+Because PTR_ERR() is an architecture-independent thing, we shouldn't ask
+every single architecture to set this up.  There may be other
+architectures beside RISCV that have the same problem.
+
+Fix this once and for all by reserving the physical memory page that may
+be mapped to the last virtual memory page as part of low memory.
+
+Unfortunately, this means if there is actual memory at this reserved
+location, that memory will become inaccessible.  However, if this page is
+not reserved, it can only be accessed as high memory, so this doesn't
+matter if high memory is not supported.  Even if high memory is supported,
+it is still only one page.
+
+Closes: https://lore.kernel.org/linux-riscv/878r1ibpdn.fsf@all.your.base.are.belong.to.us
+Link: https://lkml.kernel.org/r/20240418102943.180510-1-namcao@linutronix.de
+Signed-off-by: Nam Cao <namcao@linutronix.de>
+Reported-by: Björn Töpel <bjorn@kernel.org>
+Tested-by: Björn Töpel <bjorn@kernel.org>
+Reviewed-by: Mike Rapoport (IBM) <rppt@kernel.org>
+Cc: Andreas Dilger <adilger@dilger.ca>
+Cc: Arnd Bergmann <arnd@arndb.de>
+Cc: Changbin Du <changbin.du@huawei.com>
+Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
+Cc: Geert Uytterhoeven <geert+renesas@glider.be>
+Cc: Ingo Molnar <mingo@kernel.org>
+Cc: Krister Johansen <kjlx@templeofstupid.com>
+Cc: Luis Chamberlain <mcgrof@kernel.org>
+Cc: Nick Desaulniers <ndesaulniers@google.com>
+Cc: Stephen Rothwell <sfr@canb.auug.org.au>
+Cc: Tejun Heo <tj@kernel.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+---
+
+ init/main.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/init/main.c~init-fix-allocated-page-overlapping-with-ptr_err
++++ a/init/main.c
+@@ -900,6 +900,7 @@ void start_kernel(void)
+ 	page_address_init();
+ 	pr_notice("%s", linux_banner);
+ 	early_security_init();
++	memblock_reserve(__pa(-PAGE_SIZE), PAGE_SIZE); /* reserve last page for ERR_PTR */
+ 	setup_arch(&command_line);
+ 	setup_boot_config();
+ 	setup_command_line(command_line);
+_
diff --git a/patches/mm-arm64-override-clear_young_dirty_ptes-batch-helper.patch b/patches/mm-arm64-override-clear_young_dirty_ptes-batch-helper.patch
new file mode 100644
index 000000000..6e777bb5a
--- /dev/null
+++ b/patches/mm-arm64-override-clear_young_dirty_ptes-batch-helper.patch
@@ -0,0 +1,154 @@
+From: Lance Yang <ioworker0@gmail.com>
+Subject: mm/arm64: override clear_young_dirty_ptes() batch helper
+Date: Thu, 18 Apr 2024 21:44:33 +0800
+
+The per-pte get_and_clear/modify/set approach would result in
+unfolding/refolding for contpte mappings on arm64.  So we need to override
+clear_young_dirty_ptes() for arm64 to avoid it.
+
+Link: https://lkml.kernel.org/r/20240418134435.6092-3-ioworker0@gmail.com
+Signed-off-by: Lance Yang <ioworker0@gmail.com>
+Suggested-by: Barry Song <21cnbao@gmail.com>
+Suggested-by: Ryan Roberts <ryan.roberts@arm.com>
+Reviewed-by: Ryan Roberts <ryan.roberts@arm.com>
+Cc: David Hildenbrand <david@redhat.com>
+Cc: Jeff Xie <xiehuan09@gmail.com>
+Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Minchan Kim <minchan@kernel.org>
+Cc: Muchun Song <songmuchun@bytedance.com>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: Yin Fengwei <fengwei.yin@intel.com>
+Cc: Zach O'Keefe <zokeefe@google.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+---
+
+ arch/arm64/include/asm/pgtable.h |   55 +++++++++++++++++++++++++++++
+ arch/arm64/mm/contpte.c          |   29 +++++++++++++++
+ 2 files changed, 84 insertions(+)
+
+--- a/arch/arm64/include/asm/pgtable.h~mm-arm64-override-clear_young_dirty_ptes-batch-helper
++++ a/arch/arm64/include/asm/pgtable.h
+@@ -1223,6 +1223,46 @@ static inline void __wrprotect_ptes(stru
+ 		__ptep_set_wrprotect(mm, address, ptep);
+ }
+ 
++static inline void __clear_young_dirty_pte(struct vm_area_struct *vma,
++					   unsigned long addr, pte_t *ptep,
++					   pte_t pte, cydp_t flags)
++{
++	pte_t old_pte;
++
++	do {
++		old_pte = pte;
++
++		if (flags & CYDP_CLEAR_YOUNG)
++			pte = pte_mkold(pte);
++		if (flags & CYDP_CLEAR_DIRTY)
++			pte = pte_mkclean(pte);
++
++		pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep),
++					       pte_val(old_pte), pte_val(pte));
++	} while (pte_val(pte) != pte_val(old_pte));
++}
++
++static inline void __clear_young_dirty_ptes(struct vm_area_struct *vma,
++					    unsigned long addr, pte_t *ptep,
++					    unsigned int nr, cydp_t flags)
++{
++	pte_t pte;
++
++	for (;;) {
++		pte = __ptep_get(ptep);
++
++		if (flags == (CYDP_CLEAR_YOUNG | CYDP_CLEAR_DIRTY))
++			__set_pte(ptep, pte_mkclean(pte_mkold(pte)));
++		else
++			__clear_young_dirty_pte(vma, addr, ptep, pte, flags);
++
++		if (--nr == 0)
++			break;
++		ptep++;
++		addr += PAGE_SIZE;
++	}
++}
++
+ #ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ #define __HAVE_ARCH_PMDP_SET_WRPROTECT
+ static inline void pmdp_set_wrprotect(struct mm_struct *mm,
+@@ -1379,6 +1419,9 @@ extern void contpte_wrprotect_ptes(struc
+ extern int contpte_ptep_set_access_flags(struct vm_area_struct *vma,
+ 				unsigned long addr, pte_t *ptep,
+ 				pte_t entry, int dirty);
++extern void contpte_clear_young_dirty_ptes(struct vm_area_struct *vma,
++				unsigned long addr, pte_t *ptep,
++				unsigned int nr, cydp_t flags);
+ 
+ static __always_inline void contpte_try_fold(struct mm_struct *mm,
+ 				unsigned long addr, pte_t *ptep, pte_t pte)
+@@ -1603,6 +1646,17 @@ static inline int ptep_set_access_flags(
+ 	return contpte_ptep_set_access_flags(vma, addr, ptep, entry, dirty);
+ }
+ 
++#define clear_young_dirty_ptes clear_young_dirty_ptes
++static inline void clear_young_dirty_ptes(struct vm_area_struct *vma,
++					  unsigned long addr, pte_t *ptep,
++					  unsigned int nr, cydp_t flags)
++{
++	if (likely(nr == 1 && !pte_cont(__ptep_get(ptep))))
++		__clear_young_dirty_ptes(vma, addr, ptep, nr, flags);
++	else
++		contpte_clear_young_dirty_ptes(vma, addr, ptep, nr, flags);
++}
++
+ #else /* CONFIG_ARM64_CONTPTE */
+ 
+ #define ptep_get				__ptep_get
+@@ -1622,6 +1676,7 @@ static inline int ptep_set_access_flags(
+ #define wrprotect_ptes				__wrprotect_ptes
+ #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
+ #define ptep_set_access_flags			__ptep_set_access_flags
++#define clear_young_dirty_ptes			__clear_young_dirty_ptes
+ 
+ #endif /* CONFIG_ARM64_CONTPTE */
+ 
+--- a/arch/arm64/mm/contpte.c~mm-arm64-override-clear_young_dirty_ptes-batch-helper
++++ a/arch/arm64/mm/contpte.c
+@@ -361,6 +361,35 @@ void contpte_wrprotect_ptes(struct mm_st
+ }
+ EXPORT_SYMBOL_GPL(contpte_wrprotect_ptes);
+ 
++void contpte_clear_young_dirty_ptes(struct vm_area_struct *vma,
++				    unsigned long addr, pte_t *ptep,
++				    unsigned int nr, cydp_t flags)
++{
++	/*
++	 * We can safely clear access/dirty without needing to unfold from
++	 * the architectures perspective, even when contpte is set. If the
++	 * range starts or ends midway through a contpte block, we can just
++	 * expand to include the full contpte block. While this is not
++	 * exactly what the core-mm asked for, it tracks access/dirty per
++	 * folio, not per page. And since we only create a contpte block
++	 * when it is covered by a single folio, we can get away with
++	 * clearing access/dirty for the whole block.
++	 */
++	unsigned long start = addr;
++	unsigned long end = start + nr;
++
++	if (pte_cont(__ptep_get(ptep + nr - 1)))
++		end = ALIGN(end, CONT_PTE_SIZE);
++
++	if (pte_cont(__ptep_get(ptep))) {
++		start = ALIGN_DOWN(start, CONT_PTE_SIZE);
++		ptep = contpte_align_down(ptep);
++	}
++
++	__clear_young_dirty_ptes(vma, start, ptep, end - start, flags);
++}
++EXPORT_SYMBOL_GPL(contpte_clear_young_dirty_ptes);
++
+ int contpte_ptep_set_access_flags(struct vm_area_struct *vma,
+ 					unsigned long addr, pte_t *ptep,
+ 					pte_t entry, int dirty)
+_
diff --git a/patches/mm-huge_memory-improve-split_huge_page_to_list_to_order-return-value-documentation.patch b/patches/mm-huge_memory-improve-split_huge_page_to_list_to_order-return-value-documentation.patch
new file mode 100644
index 000000000..6158d9b29
--- /dev/null
+++ b/patches/mm-huge_memory-improve-split_huge_page_to_list_to_order-return-value-documentation.patch
@@ -0,0 +1,51 @@
+From: David Hildenbrand <david@redhat.com>
+Subject: mm/huge_memory: improve split_huge_page_to_list_to_order() return value documentation
+Date: Thu, 18 Apr 2024 17:18:34 +0200
+
+The documentation is wrong and relying on it almost resulted in BUGs in
+new callers: we return -EAGAIN on unexpected folio references, not -EBUSY.
+
+Let's fix that and also document which other return values we can
+currently see and why they could happen.
+
+Link: https://lkml.kernel.org/r/20240418151834.216557-1-david@redhat.com
+Signed-off-by: David Hildenbrand <david@redhat.com>
+Cc: John Hubbard <jhubbard@nvidia.com>
+Cc: Zi Yan <ziy@nvidia.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+---
+
+ mm/huge_memory.c |   13 ++++++++++---
+ 1 file changed, 10 insertions(+), 3 deletions(-)
+
+--- a/mm/huge_memory.c~mm-huge_memory-improve-split_huge_page_to_list_to_order-return-value-documentation
++++ a/mm/huge_memory.c
+@@ -2956,7 +2956,7 @@ bool can_split_folio(struct folio *folio
+  *
+  * 3) The folio must not be pinned. Any unexpected folio references, including
+  *    GUP pins, will result in the folio not getting split; instead, the caller
+- *    will receive an -EBUSY.
++ *    will receive an -EAGAIN.
+  *
+  * 4) @new_order > 1, usually. Splitting to order-1 anonymous folios is not
+  *    supported for non-file-backed folios, because folio->_deferred_list, which
+@@ -2975,8 +2975,15 @@ bool can_split_folio(struct folio *folio
+  *
+  * Returns 0 if the huge page was split successfully.
+  *
+- * Returns -EBUSY if @page's folio is pinned, or if the anon_vma disappeared
+- * from under us.
++ * Returns -EAGAIN if the folio has unexpected reference (e.g., GUP).
++ *
++ * Returns -EBUSY when trying to split the huge zeropage, if the folio is
++ * under writeback, if fs-specific folio metadata cannot currently be
++ * released, or if some unexpected race happened (e.g., anon VMA disappeared,
++ * truncation).
++ *
++ * Returns -EINVAL when trying to split to an order that is incompatible
++ * with the folio. Splitting to order 0 is compatible with all folios.
+  */
+ int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
+ 				     unsigned int new_order)
+_
diff --git a/patches/mm-madvise-introduce-clear_young_dirty_ptes-batch-helper.patch b/patches/mm-madvise-introduce-clear_young_dirty_ptes-batch-helper.patch
new file mode 100644
index 000000000..38a1b498d
--- /dev/null
+++ b/patches/mm-madvise-introduce-clear_young_dirty_ptes-batch-helper.patch
@@ -0,0 +1,190 @@
+From: Lance Yang <ioworker0@gmail.com>
+Subject: mm/madvise: introduce clear_young_dirty_ptes() batch helper
+Date: Thu, 18 Apr 2024 21:44:32 +0800
+
+Patch series "mm/madvise: enhance lazyfreeing with mTHP in madvise_free",
+v10.
+
+This patchset adds support for lazyfreeing multi-size THP (mTHP) without
+needing to first split the large folio via split_folio().  However, we
+still need to split a large folio that is not fully mapped within the
+target range.
+
+If a large folio is locked or shared, or if we fail to split it, we just
+leave it in place and advance to the next PTE in the range.  But note that
+the behavior is changed; previously, any failure of this sort would cause
+the entire operation to give up.  As large folios become more common,
+sticking to the old way could result in wasted opportunities.
+
+Performance Testing
+===================
+
+On an Intel I5 CPU, lazyfreeing a 1GiB VMA backed by PTE-mapped folios of
+the same size results in the following runtimes for madvise(MADV_FREE) in
+seconds (shorter is better):
+
+Folio Size |   Old    |   New    | Change
+------------------------------------------
+      4KiB | 0.590251 | 0.590259 |    0%
+     16KiB | 2.990447 | 0.185655 |  -94%
+     32KiB | 2.547831 | 0.104870 |  -95%
+     64KiB | 2.457796 | 0.052812 |  -97%
+    128KiB | 2.281034 | 0.032777 |  -99%
+    256KiB | 2.230387 | 0.017496 |  -99%
+    512KiB | 2.189106 | 0.010781 |  -99%
+   1024KiB | 2.183949 | 0.007753 |  -99%
+   2048KiB | 0.002799 | 0.002804 |    0%
+
+
+This patch (of 4):
+
+This commit introduces clear_young_dirty_ptes() to replace mkold_ptes(). 
+By doing so, we can use the same function for both use cases
+(madvise_pageout and madvise_free), and it also provides the flexibility
+to only clear the dirty flag in the future if needed.
+
+Link: https://lkml.kernel.org/r/20240418134435.6092-1-ioworker0@gmail.com
+Link: https://lkml.kernel.org/r/20240418134435.6092-2-ioworker0@gmail.com
+Signed-off-by: Lance Yang <ioworker0@gmail.com>
+Suggested-by: Ryan Roberts <ryan.roberts@arm.com>
+Acked-by: David Hildenbrand <david@redhat.com>
+Reviewed-by: Ryan Roberts <ryan.roberts@arm.com>
+Cc: Barry Song <21cnbao@gmail.com>
+Cc: Jeff Xie <xiehuan09@gmail.com>
+Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Minchan Kim <minchan@kernel.org>
+Cc: Muchun Song <songmuchun@bytedance.com>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: Yin Fengwei <fengwei.yin@intel.com>
+Cc: Zach O'Keefe <zokeefe@google.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+---
+
+ include/linux/mm_types.h |    9 ++++
+ include/linux/pgtable.h  |   74 ++++++++++++++++++++++---------------
+ mm/madvise.c             |    3 +
+ 3 files changed, 55 insertions(+), 31 deletions(-)
+
+--- a/include/linux/mm_types.h~mm-madvise-introduce-clear_young_dirty_ptes-batch-helper
++++ a/include/linux/mm_types.h
+@@ -1368,6 +1368,15 @@ enum fault_flag {
+ 
+ typedef unsigned int __bitwise zap_flags_t;
+ 
++/* Flags for clear_young_dirty_ptes(). */
++typedef int __bitwise cydp_t;
++
++/* Clear the access bit */
++#define CYDP_CLEAR_YOUNG		((__force cydp_t)BIT(0))
++
++/* Clear the dirty bit */
++#define CYDP_CLEAR_DIRTY		((__force cydp_t)BIT(1))
++
+ /*
+  * FOLL_PIN and FOLL_LONGTERM may be used in various combinations with each
+  * other. Here is what they mean, and how to use them:
+--- a/include/linux/pgtable.h~mm-madvise-introduce-clear_young_dirty_ptes-batch-helper
++++ a/include/linux/pgtable.h
+@@ -361,36 +361,6 @@ static inline int ptep_test_and_clear_yo
+ }
+ #endif
+ 
+-#ifndef mkold_ptes
+-/**
+- * mkold_ptes - Mark PTEs that map consecutive pages of the same folio as old.
+- * @vma: VMA the pages are mapped into.
+- * @addr: Address the first page is mapped at.
+- * @ptep: Page table pointer for the first entry.
+- * @nr: Number of entries to mark old.
+- *
+- * May be overridden by the architecture; otherwise, implemented as a simple
+- * loop over ptep_test_and_clear_young().
+- *
+- * Note that PTE bits in the PTE range besides the PFN can differ. For example,
+- * some PTEs might be write-protected.
+- *
+- * Context: The caller holds the page table lock.  The PTEs map consecutive
+- * pages that belong to the same folio.  The PTEs are all in the same PMD.
+- */
+-static inline void mkold_ptes(struct vm_area_struct *vma, unsigned long addr,
+-		pte_t *ptep, unsigned int nr)
+-{
+-	for (;;) {
+-		ptep_test_and_clear_young(vma, addr, ptep);
+-		if (--nr == 0)
+-			break;
+-		ptep++;
+-		addr += PAGE_SIZE;
+-	}
+-}
+-#endif
+-
+ #ifndef __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
+ #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG)
+ static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
+@@ -489,6 +459,50 @@ static inline pte_t ptep_get_and_clear(s
+ }
+ #endif
+ 
++#ifndef clear_young_dirty_ptes
++/**
++ * clear_young_dirty_ptes - Mark PTEs that map consecutive pages of the
++ *		same folio as old/clean.
++ * @mm: Address space the pages are mapped into.
++ * @addr: Address the first page is mapped at.
++ * @ptep: Page table pointer for the first entry.
++ * @nr: Number of entries to mark old/clean.
++ * @flags: Flags to modify the PTE batch semantics.
++ *
++ * May be overridden by the architecture; otherwise, implemented by
++ * get_and_clear/modify/set for each pte in the range.
++ *
++ * Note that PTE bits in the PTE range besides the PFN can differ. For example,
++ * some PTEs might be write-protected.
++ *
++ * Context: The caller holds the page table lock.  The PTEs map consecutive
++ * pages that belong to the same folio.  The PTEs are all in the same PMD.
++ */
++static inline void clear_young_dirty_ptes(struct vm_area_struct *vma,
++					  unsigned long addr, pte_t *ptep,
++					  unsigned int nr, cydp_t flags)
++{
++	pte_t pte;
++
++	for (;;) {
++		if (flags == CYDP_CLEAR_YOUNG)
++			ptep_test_and_clear_young(vma, addr, ptep);
++		else {
++			pte = ptep_get_and_clear(vma->vm_mm, addr, ptep);
++			if (flags & CYDP_CLEAR_YOUNG)
++				pte = pte_mkold(pte);
++			if (flags & CYDP_CLEAR_DIRTY)
++				pte = pte_mkclean(pte);
++			set_pte_at(vma->vm_mm, addr, ptep, pte);
++		}
++		if (--nr == 0)
++			break;
++		ptep++;
++		addr += PAGE_SIZE;
++	}
++}
++#endif
++
+ static inline void ptep_clear(struct mm_struct *mm, unsigned long addr,
+ 			      pte_t *ptep)
+ {
+--- a/mm/madvise.c~mm-madvise-introduce-clear_young_dirty_ptes-batch-helper
++++ a/mm/madvise.c
+@@ -507,7 +507,8 @@ restart:
+ 			continue;
+ 
+ 		if (!pageout && pte_young(ptent)) {
+-			mkold_ptes(vma, addr, pte, nr);
++			clear_young_dirty_ptes(vma, addr, pte, nr,
++					       CYDP_CLEAR_YOUNG);
+ 			tlb_remove_tlb_entries(tlb, pte, nr, addr);
+ 		}
+ 
+_
diff --git a/patches/mm-madvise-optimize-lazyfreeing-with-mthp-in-madvise_free.patch b/patches/mm-madvise-optimize-lazyfreeing-with-mthp-in-madvise_free.patch
new file mode 100644
index 000000000..ce8bf7ebe
--- /dev/null
+++ b/patches/mm-madvise-optimize-lazyfreeing-with-mthp-in-madvise_free.patch
@@ -0,0 +1,172 @@
+From: Lance Yang <ioworker0@gmail.com>
+Subject: mm/madvise: optimize lazyfreeing with mTHP in madvise_free
+Date: Thu, 18 Apr 2024 21:44:35 +0800
+
+This patch optimizes lazyfreeing with PTE-mapped mTHP[1] (Inspired by
+David Hildenbrand[2]).  We aim to avoid unnecessary folio splitting if the
+large folio is fully mapped within the target range.
+
+If a large folio is locked or shared, or if we fail to split it, we just
+leave it in place and advance to the next PTE in the range.  But note that
+the behavior is changed; previously, any failure of this sort would cause
+the entire operation to give up.  As large folios become more common,
+sticking to the old way could result in wasted opportunities.
+
+On an Intel I5 CPU, lazyfreeing a 1GiB VMA backed by PTE-mapped folios of
+the same size results in the following runtimes for madvise(MADV_FREE) in
+seconds (shorter is better):
+
+Folio Size |   Old    |   New    | Change
+------------------------------------------
+      4KiB | 0.590251 | 0.590259 |    0%
+     16KiB | 2.990447 | 0.185655 |  -94%
+     32KiB | 2.547831 | 0.104870 |  -95%
+     64KiB | 2.457796 | 0.052812 |  -97%
+    128KiB | 2.281034 | 0.032777 |  -99%
+    256KiB | 2.230387 | 0.017496 |  -99%
+    512KiB | 2.189106 | 0.010781 |  -99%
+   1024KiB | 2.183949 | 0.007753 |  -99%
+   2048KiB | 0.002799 | 0.002804 |    0%
+
+[1] https://lkml.kernel.org/r/20231207161211.2374093-5-ryan.roberts@arm.com
+[2] https://lore.kernel.org/linux-mm/20240214204435.167852-1-david@redhat.com
+
+Link: https://lkml.kernel.org/r/20240418134435.6092-5-ioworker0@gmail.com
+Signed-off-by: Lance Yang <ioworker0@gmail.com>
+Reviewed-by: Ryan Roberts <ryan.roberts@arm.com>
+Acked-by: David Hildenbrand <david@redhat.com>
+Cc: Barry Song <21cnbao@gmail.com>
+Cc: Jeff Xie <xiehuan09@gmail.com>
+Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Minchan Kim <minchan@kernel.org>
+Cc: Muchun Song <songmuchun@bytedance.com>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: Yin Fengwei <fengwei.yin@intel.com>
+Cc: Zach O'Keefe <zokeefe@google.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+---
+
+ mm/madvise.c |   85 +++++++++++++++++++++++++------------------------
+ 1 file changed, 44 insertions(+), 41 deletions(-)
+
+--- a/mm/madvise.c~mm-madvise-optimize-lazyfreeing-with-mthp-in-madvise_free
++++ a/mm/madvise.c
+@@ -643,6 +643,7 @@ static int madvise_free_pte_range(pmd_t
+ 				unsigned long end, struct mm_walk *walk)
+ 
+ {
++	const cydp_t cydp_flags = CYDP_CLEAR_YOUNG | CYDP_CLEAR_DIRTY;
+ 	struct mmu_gather *tlb = walk->private;
+ 	struct mm_struct *mm = tlb->mm;
+ 	struct vm_area_struct *vma = walk->vma;
+@@ -697,44 +698,57 @@ static int madvise_free_pte_range(pmd_t
+ 			continue;
+ 
+ 		/*
+-		 * If pmd isn't transhuge but the folio is large and
+-		 * is owned by only this process, split it and
+-		 * deactivate all pages.
++		 * If we encounter a large folio, only split it if it is not
++		 * fully mapped within the range we are operating on. Otherwise
++		 * leave it as is so that it can be marked as lazyfree. If we
++		 * fail to split a folio, leave it in place and advance to the
++		 * next pte in the range.
+ 		 */
+ 		if (folio_test_large(folio)) {
+-			int err;
++			bool any_young, any_dirty;
+ 
+-			if (folio_likely_mapped_shared(folio))
+-				break;
+-			if (!folio_trylock(folio))
+-				break;
+-			folio_get(folio);
+-			arch_leave_lazy_mmu_mode();
+-			pte_unmap_unlock(start_pte, ptl);
+-			start_pte = NULL;
+-			err = split_folio(folio);
+-			folio_unlock(folio);
+-			folio_put(folio);
+-			if (err)
+-				break;
+-			start_pte = pte =
+-				pte_offset_map_lock(mm, pmd, addr, &ptl);
+-			if (!start_pte)
+-				break;
+-			arch_enter_lazy_mmu_mode();
+-			pte--;
+-			addr -= PAGE_SIZE;
+-			continue;
++			nr = madvise_folio_pte_batch(addr, end, folio, pte,
++						     ptent, &any_young, &any_dirty);
++
++			if (nr < folio_nr_pages(folio)) {
++				int err;
++
++				if (folio_likely_mapped_shared(folio))
++					continue;
++				if (!folio_trylock(folio))
++					continue;
++				folio_get(folio);
++				arch_leave_lazy_mmu_mode();
++				pte_unmap_unlock(start_pte, ptl);
++				start_pte = NULL;
++				err = split_folio(folio);
++				folio_unlock(folio);
++				folio_put(folio);
++				pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
++				start_pte = pte;
++				if (!start_pte)
++					break;
++				arch_enter_lazy_mmu_mode();
++				if (!err)
++					nr = 0;
++				continue;
++			}
++
++			if (any_young)
++				ptent = pte_mkyoung(ptent);
++			if (any_dirty)
++				ptent = pte_mkdirty(ptent);
+ 		}
+ 
+ 		if (folio_test_swapcache(folio) || folio_test_dirty(folio)) {
+ 			if (!folio_trylock(folio))
+ 				continue;
+ 			/*
+-			 * If folio is shared with others, we mustn't clear
+-			 * the folio's dirty flag.
++			 * If we have a large folio at this point, we know it is
++			 * fully mapped so if its mapcount is the same as its
++			 * number of pages, it must be exclusive.
+ 			 */
+-			if (folio_mapcount(folio) != 1) {
++			if (folio_mapcount(folio) != folio_nr_pages(folio)) {
+ 				folio_unlock(folio);
+ 				continue;
+ 			}
+@@ -750,19 +764,8 @@ static int madvise_free_pte_range(pmd_t
+ 		}
+ 
+ 		if (pte_young(ptent) || pte_dirty(ptent)) {
+-			/*
+-			 * Some of architecture(ex, PPC) don't update TLB
+-			 * with set_pte_at and tlb_remove_tlb_entry so for
+-			 * the portability, remap the pte with old|clean
+-			 * after pte clearing.
+-			 */
+-			ptent = ptep_get_and_clear_full(mm, addr, pte,
+-							tlb->fullmm);
+-
+-			ptent = pte_mkold(ptent);
+-			ptent = pte_mkclean(ptent);
+-			set_pte_at(mm, addr, pte, ptent);
+-			tlb_remove_tlb_entry(tlb, pte, addr);
++			clear_young_dirty_ptes(vma, addr, pte, nr, cydp_flags);
++			tlb_remove_tlb_entries(tlb, pte, nr, addr);
+ 		}
+ 		folio_mark_lazyfree(folio);
+ 	}
+_
diff --git a/patches/mm-memory-add-any_dirty-optional-pointer-to-folio_pte_batch.patch b/patches/mm-memory-add-any_dirty-optional-pointer-to-folio_pte_batch.patch
new file mode 100644
index 000000000..64221bb7d
--- /dev/null
+++ b/patches/mm-memory-add-any_dirty-optional-pointer-to-folio_pte_batch.patch
@@ -0,0 +1,142 @@
+From: Lance Yang <ioworker0@gmail.com>
+Subject: mm/memory: add any_dirty optional pointer to folio_pte_batch()
+Date: Thu, 18 Apr 2024 21:44:34 +0800
+
+This commit adds the any_dirty pointer as an optional parameter to
+folio_pte_batch() function.  By using both the any_young and any_dirty
+pointers, madvise_free can make smarter decisions about whether to clear
+the PTEs when marking large folios as lazyfree.
+
+Link: https://lkml.kernel.org/r/20240418134435.6092-4-ioworker0@gmail.com
+Signed-off-by: Lance Yang <ioworker0@gmail.com>
+Suggested-by: David Hildenbrand <david@redhat.com>
+Acked-by: David Hildenbrand <david@redhat.com>
+Cc: Barry Song <21cnbao@gmail.com>
+Cc: Jeff Xie <xiehuan09@gmail.com>
+Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Minchan Kim <minchan@kernel.org>
+Cc: Muchun Song <songmuchun@bytedance.com>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: Ryan Roberts <ryan.roberts@arm.com>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: Yin Fengwei <fengwei.yin@intel.com>
+Cc: Zach O'Keefe <zokeefe@google.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+---
+
+ mm/internal.h |   12 ++++++++++--
+ mm/madvise.c  |   19 ++++++++++++++-----
+ mm/memory.c   |    4 ++--
+ 3 files changed, 26 insertions(+), 9 deletions(-)
+
+--- a/mm/internal.h~mm-memory-add-any_dirty-optional-pointer-to-folio_pte_batch
++++ a/mm/internal.h
+@@ -134,6 +134,8 @@ static inline pte_t __pte_batch_clear_ig
+  *		  first one is writable.
+  * @any_young: Optional pointer to indicate whether any entry except the
+  *		  first one is young.
++ * @any_dirty: Optional pointer to indicate whether any entry except the
++ *		  first one is dirty.
+  *
+  * Detect a PTE batch: consecutive (present) PTEs that map consecutive
+  * pages of the same large folio.
+@@ -149,18 +151,20 @@ static inline pte_t __pte_batch_clear_ig
+  */
+ static inline int folio_pte_batch(struct folio *folio, unsigned long addr,
+ 		pte_t *start_ptep, pte_t pte, int max_nr, fpb_t flags,
+-		bool *any_writable, bool *any_young)
++		bool *any_writable, bool *any_young, bool *any_dirty)
+ {
+ 	unsigned long folio_end_pfn = folio_pfn(folio) + folio_nr_pages(folio);
+ 	const pte_t *end_ptep = start_ptep + max_nr;
+ 	pte_t expected_pte, *ptep;
+-	bool writable, young;
++	bool writable, young, dirty;
+ 	int nr;
+ 
+ 	if (any_writable)
+ 		*any_writable = false;
+ 	if (any_young)
+ 		*any_young = false;
++	if (any_dirty)
++		*any_dirty = false;
+ 
+ 	VM_WARN_ON_FOLIO(!pte_present(pte), folio);
+ 	VM_WARN_ON_FOLIO(!folio_test_large(folio) || max_nr < 1, folio);
+@@ -176,6 +180,8 @@ static inline int folio_pte_batch(struct
+ 			writable = !!pte_write(pte);
+ 		if (any_young)
+ 			young = !!pte_young(pte);
++		if (any_dirty)
++			dirty = !!pte_dirty(pte);
+ 		pte = __pte_batch_clear_ignored(pte, flags);
+ 
+ 		if (!pte_same(pte, expected_pte))
+@@ -193,6 +199,8 @@ static inline int folio_pte_batch(struct
+ 			*any_writable |= writable;
+ 		if (any_young)
+ 			*any_young |= young;
++		if (any_dirty)
++			*any_dirty |= dirty;
+ 
+ 		nr = pte_batch_hint(ptep, pte);
+ 		expected_pte = pte_advance_pfn(expected_pte, nr);
+--- a/mm/madvise.c~mm-memory-add-any_dirty-optional-pointer-to-folio_pte_batch
++++ a/mm/madvise.c
+@@ -321,6 +321,18 @@ static inline bool can_do_file_pageout(s
+ 	       file_permission(vma->vm_file, MAY_WRITE) == 0;
+ }
+ 
++static inline int madvise_folio_pte_batch(unsigned long addr, unsigned long end,
++					  struct folio *folio, pte_t *ptep,
++					  pte_t pte, bool *any_young,
++					  bool *any_dirty)
++{
++	const fpb_t fpb_flags = FPB_IGNORE_DIRTY | FPB_IGNORE_SOFT_DIRTY;
++	int max_nr = (end - addr) / PAGE_SIZE;
++
++	return folio_pte_batch(folio, addr, ptep, pte, max_nr, fpb_flags, NULL,
++			       any_young, any_dirty);
++}
++
+ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
+ 				unsigned long addr, unsigned long end,
+ 				struct mm_walk *walk)
+@@ -456,13 +468,10 @@ restart:
+ 		 * next pte in the range.
+ 		 */
+ 		if (folio_test_large(folio)) {
+-			const fpb_t fpb_flags = FPB_IGNORE_DIRTY |
+-						FPB_IGNORE_SOFT_DIRTY;
+-			int max_nr = (end - addr) / PAGE_SIZE;
+ 			bool any_young;
+ 
+-			nr = folio_pte_batch(folio, addr, pte, ptent, max_nr,
+-					     fpb_flags, NULL, &any_young);
++			nr = madvise_folio_pte_batch(addr, end, folio, pte,
++						     ptent, &any_young, NULL);
+ 			if (any_young)
+ 				ptent = pte_mkyoung(ptent);
+ 
+--- a/mm/memory.c~mm-memory-add-any_dirty-optional-pointer-to-folio_pte_batch
++++ a/mm/memory.c
+@@ -989,7 +989,7 @@ copy_present_ptes(struct vm_area_struct
+ 			flags |= FPB_IGNORE_SOFT_DIRTY;
+ 
+ 		nr = folio_pte_batch(folio, addr, src_pte, pte, max_nr, flags,
+-				     &any_writable, NULL);
++				     &any_writable, NULL, NULL);
+ 		folio_ref_add(folio, nr);
+ 		if (folio_test_anon(folio)) {
+ 			if (unlikely(folio_try_dup_anon_rmap_ptes(folio, page,
+@@ -1558,7 +1558,7 @@ static inline int zap_present_ptes(struc
+ 	 */
+ 	if (unlikely(folio_test_large(folio) && max_nr != 1)) {
+ 		nr = folio_pte_batch(folio, addr, pte, ptent, max_nr, fpb_flags,
+-				     NULL, NULL);
++				     NULL, NULL, NULL);
+ 
+ 		zap_present_folio_ptes(tlb, vma, folio, page, pte, ptent, nr,
+ 				       addr, details, rss, force_flush,
+_
diff --git a/patches/mm-swapfile-check-usable-swap-device-in-__folio_throttle_swaprate.patch b/patches/mm-swapfile-check-usable-swap-device-in-__folio_throttle_swaprate.patch
new file mode 100644
index 000000000..e1aeb16b7
--- /dev/null
+++ b/patches/mm-swapfile-check-usable-swap-device-in-__folio_throttle_swaprate.patch
@@ -0,0 +1,58 @@
+From: Kefeng Wang <wangkefeng.wang@huawei.com>
+Subject: mm: swapfile: check usable swap device in __folio_throttle_swaprate()
+Date: Thu, 18 Apr 2024 21:56:44 +0800
+
+Skip blk_cgroup_congested() if there is no usable swap device since no
+swapin/out will occur, Thereby avoid taking swap_lock.  The difference is
+shown below from perf date of CoW pagefault,
+
+ perf report -g -i perf.data.swapoff  | egrep "blk_cgroup_congested|__folio_throttle_swaprate"
+     1.01%     0.16%  page_fault2_pro  [kernel.kallsyms]      [k] __folio_throttle_swaprate
+     0.83%     0.80%  page_fault2_pro  [kernel.kallsyms]      [k] blk_cgroup_congested
+
+ perf report -g -i perf.data.swapon   | egrep "blk_cgroup_congested|__folio_throttle_swaprate"
+     0.15%     0.15%  page_fault2_pro  [kernel.kallsyms]      [k] __folio_throttle_swaprate
+
+Link: https://lkml.kernel.org/r/20240418135644.2736748-1-wangkefeng.wang@huawei.com
+Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
+Cc: Tejun Heo <tj@kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+---
+
+ mm/swapfile.c |   13 ++++++++++---
+ 1 file changed, 10 insertions(+), 3 deletions(-)
+
+--- a/mm/swapfile.c~mm-swapfile-check-usable-swap-device-in-__folio_throttle_swaprate
++++ a/mm/swapfile.c
+@@ -2444,13 +2444,17 @@ static void reinsert_swap_info(struct sw
+ 	spin_unlock(&swap_lock);
+ }
+ 
++static bool __has_usable_swap(void)
++{
++	return !plist_head_empty(&swap_active_head);
++}
++
+ bool has_usable_swap(void)
+ {
+-	bool ret = true;
++	bool ret;
+ 
+ 	spin_lock(&swap_lock);
+-	if (plist_head_empty(&swap_active_head))
+-		ret = false;
++	ret = __has_usable_swap();
+ 	spin_unlock(&swap_lock);
+ 	return ret;
+ }
+@@ -3710,6 +3714,9 @@ void __folio_throttle_swaprate(struct fo
+ 	if (!(gfp & __GFP_IO))
+ 		return;
+ 
++	if (!__has_usable_swap())
++		return;
++
+ 	if (!blk_cgroup_congested())
+ 		return;
+ 
+_
diff --git a/patches/mm-zswap-fix-shrinker-null-crash-with-cgroup_disable=memory.patch b/patches/mm-zswap-fix-shrinker-null-crash-with-cgroup_disable=memory.patch
new file mode 100644
index 000000000..21eaef699
--- /dev/null
+++ b/patches/mm-zswap-fix-shrinker-null-crash-with-cgroup_disable=memory.patch
@@ -0,0 +1,76 @@
+From: Johannes Weiner <hannes@cmpxchg.org>
+Subject: mm: zswap: fix shrinker NULL crash with cgroup_disable=memory
+Date: Thu, 18 Apr 2024 08:26:28 -0400
+
+Christian reports a NULL deref in zswap that he bisected down to the zswap
+shrinker.  The issue also cropped up in the bug trackers of libguestfs [1]
+and the Red Hat bugzilla [2].
+
+The problem is that when memcg is disabled with the boot time flag, the
+zswap shrinker might get called with sc->memcg == NULL.  This is okay in
+many places, like the lruvec operations.  But it crashes in
+memcg_page_state() - which is only used due to the non-node accounting of
+cgroup's the zswap memory to begin with.
+
+Nhat spotted that the memcg can be NULL in the memcg-disabled case, and I
+was then able to reproduce the crash locally as well.
+
+[1] https://github.com/libguestfs/libguestfs/issues/139
+[2] https://bugzilla.redhat.com/show_bug.cgi?id=2275252
+
+Link: https://lkml.kernel.org/r/20240418124043.GC1055428@cmpxchg.org
+Link: https://lkml.kernel.org/r/20240417143324.GA1055428@cmpxchg.org
+Fixes: b5ba474f3f51 ("zswap: shrink zswap pool based on memory pressure")
+Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
+Reported-by: Christian Heusel <christian@heusel.eu>
+Debugged-by: Nhat Pham <nphamcs@gmail.com>
+Suggested-by: Nhat Pham <nphamcs@gmail.com>
+Tested-by: Christian Heusel <christian@heusel.eu>
+Cc: Chengming Zhou <chengming.zhou@linux.dev>
+Cc: Dan Streetman <ddstreet@ieee.org>
+Cc: Richard W.M. Jones <rjones@redhat.com>
+Cc: Seth Jennings <sjenning@redhat.com>
+Cc: Vitaly Wool <vitaly.wool@konsulko.com>
+Cc: Yosry Ahmed <yosryahmed@google.com>
+Cc: <stable@vger.kernel.org>	[v6.8]
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+---
+
+ mm/zswap.c |   25 ++++++++++++++++---------
+ 1 file changed, 16 insertions(+), 9 deletions(-)
+
+--- a/mm/zswap.c~mm-zswap-fix-shrinker-null-crash-with-cgroup_disable=memory
++++ a/mm/zswap.c
+@@ -1331,15 +1331,22 @@ static unsigned long zswap_shrinker_coun
+ 	if (!gfp_has_io_fs(sc->gfp_mask))
+ 		return 0;
+ 
+-#ifdef CONFIG_MEMCG_KMEM
+-	mem_cgroup_flush_stats(memcg);
+-	nr_backing = memcg_page_state(memcg, MEMCG_ZSWAP_B) >> PAGE_SHIFT;
+-	nr_stored = memcg_page_state(memcg, MEMCG_ZSWAPPED);
+-#else
+-	/* use pool stats instead of memcg stats */
+-	nr_backing = zswap_pool_total_size >> PAGE_SHIFT;
+-	nr_stored = atomic_read(&zswap_nr_stored);
+-#endif
++	/*
++	 * For memcg, use the cgroup-wide ZSWAP stats since we don't
++	 * have them per-node and thus per-lruvec. Careful if memcg is
++	 * runtime-disabled: we can get sc->memcg == NULL, which is ok
++	 * for the lruvec, but not for memcg_page_state().
++	 *
++	 * Without memcg, use the zswap pool-wide metrics.
++	 */
++	if (!mem_cgroup_disabled()) {
++		mem_cgroup_flush_stats(memcg);
++		nr_backing = memcg_page_state(memcg, MEMCG_ZSWAP_B) >> PAGE_SHIFT;
++		nr_stored = memcg_page_state(memcg, MEMCG_ZSWAPPED);
++	} else {
++		nr_backing = zswap_pool_total_size >> PAGE_SHIFT;
++		nr_stored = atomic_read(&zswap_nr_stored);
++	}
+ 
+ 	if (!nr_stored)
+ 		return 0;
+_
diff --git a/patches/null-pointer-dereference-while-shrinking-zswap.patch b/patches/old/null-pointer-dereference-while-shrinking-zswap.patch
index a94d69056..a94d69056 100644
--- a/patches/null-pointer-dereference-while-shrinking-zswap.patch
+++ b/patches/old/null-pointer-dereference-while-shrinking-zswap.patch
diff --git a/patches/stackdepot-respect-__gfp_nolockdep-allocation-flag.patch b/patches/stackdepot-respect-__gfp_nolockdep-allocation-flag.patch
new file mode 100644
index 000000000..09dab83ca
--- /dev/null
+++ b/patches/stackdepot-respect-__gfp_nolockdep-allocation-flag.patch
@@ -0,0 +1,90 @@
+From: Andrey Ryabinin <ryabinin.a.a@gmail.com>
+Subject: stackdepot: respect __GFP_NOLOCKDEP allocation flag
+Date: Thu, 18 Apr 2024 16:11:33 +0200
+
+If stack_depot_save_flags() allocates memory it always drops
+__GFP_NOLOCKDEP flag.  So when KASAN tries to track __GFP_NOLOCKDEP
+allocation we may end up with lockdep splat like bellow:
+
+======================================================
+ WARNING: possible circular locking dependency detected
+ 6.9.0-rc3+ #49 Not tainted
+ ------------------------------------------------------
+ kswapd0/149 is trying to acquire lock:
+ ffff88811346a920
+(&xfs_nondir_ilock_class){++++}-{4:4}, at: xfs_reclaim_inode+0x3ac/0x590
+[xfs]
+
+ but task is already holding lock:
+ ffffffff8bb33100 (fs_reclaim){+.+.}-{0:0}, at:
+balance_pgdat+0x5d9/0xad0
+
+ which lock already depends on the new lock.
+
+ the existing dependency chain (in reverse order) is:
+ -> #1 (fs_reclaim){+.+.}-{0:0}:
+        __lock_acquire+0x7da/0x1030
+        lock_acquire+0x15d/0x400
+        fs_reclaim_acquire+0xb5/0x100
+ prepare_alloc_pages.constprop.0+0xc5/0x230
+        __alloc_pages+0x12a/0x3f0
+        alloc_pages_mpol+0x175/0x340
+        stack_depot_save_flags+0x4c5/0x510
+        kasan_save_stack+0x30/0x40
+        kasan_save_track+0x10/0x30
+        __kasan_slab_alloc+0x83/0x90
+        kmem_cache_alloc+0x15e/0x4a0
+        __alloc_object+0x35/0x370
+        __create_object+0x22/0x90
+ __kmalloc_node_track_caller+0x477/0x5b0
+        krealloc+0x5f/0x110
+        xfs_iext_insert_raw+0x4b2/0x6e0 [xfs]
+        xfs_iext_insert+0x2e/0x130 [xfs]
+        xfs_iread_bmbt_block+0x1a9/0x4d0 [xfs]
+        xfs_btree_visit_block+0xfb/0x290 [xfs]
+        xfs_btree_visit_blocks+0x215/0x2c0 [xfs]
+        xfs_iread_extents+0x1a2/0x2e0 [xfs]
+ xfs_buffered_write_iomap_begin+0x376/0x10a0 [xfs]
+        iomap_iter+0x1d1/0x2d0
+ iomap_file_buffered_write+0x120/0x1a0
+        xfs_file_buffered_write+0x128/0x4b0 [xfs]
+        vfs_write+0x675/0x890
+        ksys_write+0xc3/0x160
+        do_syscall_64+0x94/0x170
+ entry_SYSCALL_64_after_hwframe+0x71/0x79
+
+Always preserve __GFP_NOLOCKDEP to fix this.
+
+Link: https://lkml.kernel.org/r/20240418141133.22950-1-ryabinin.a.a@gmail.com
+Fixes: cd11016e5f52 ("mm, kasan: stackdepot implementation. Enable stackdepot for SLAB")
+Signed-off-by: Andrey Ryabinin <ryabinin.a.a@gmail.com>
+Reported-by: Xiubo Li <xiubli@redhat.com>
+Closes: https://lore.kernel.org/all/a0caa289-ca02-48eb-9bf2-d86fd47b71f4@redhat.com/
+Reported-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
+Closes: https://lore.kernel.org/all/f9ff999a-e170-b66b-7caf-293f2b147ac2@opensource.wdc.com/
+Suggested-by: Dave Chinner <david@fromorbit.com>
+Cc: Christoph Hellwig <hch@infradead.org>
+Cc: Alexander Potapenko <glider@google.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+---
+
+ lib/stackdepot.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/lib/stackdepot.c~stackdepot-respect-__gfp_nolockdep-allocation-flag
++++ a/lib/stackdepot.c
+@@ -627,10 +627,10 @@ depot_stack_handle_t stack_depot_save_fl
+ 		/*
+ 		 * Zero out zone modifiers, as we don't have specific zone
+ 		 * requirements. Keep the flags related to allocation in atomic
+-		 * contexts and I/O.
++		 * contexts, I/O, nolockdep.
+ 		 */
+ 		alloc_flags &= ~GFP_ZONEMASK;
+-		alloc_flags &= (GFP_ATOMIC | GFP_KERNEL);
++		alloc_flags &= (GFP_ATOMIC | GFP_KERNEL | __GFP_NOLOCKDEP);
+ 		alloc_flags |= __GFP_NOWARN;
+ 		page = alloc_pages(alloc_flags, DEPOT_POOL_ORDER);
+ 		if (page)
+_
diff --git a/pc/crash-add-prefix-for-crash-dumping-messages.pc b/pc/crash-add-prefix-for-crash-dumping-messages.pc
new file mode 100644
index 000000000..76b71fde8
--- /dev/null
+++ b/pc/crash-add-prefix-for-crash-dumping-messages.pc
@@ -0,0 +1,2 @@
+kernel/crash_core.c
+kernel/crash_reserve.c
diff --git a/pc/devel-series b/pc/devel-series
index 96756605d..c7370b154 100644
--- a/pc/devel-series
+++ b/pc/devel-series
@@ -109,10 +109,24 @@ selftests-harness-remove-use-of-line_max-fix-fix-fix.patch
 #
 selftests-mm-fix-unused-and-uninitialized-variable-warning.patch
 #
-null-pointer-dereference-while-shrinking-zswap.patch
 #
 mm-hugetlb-fix-missing-hugetlb_lock-for-resv-uncharge.patch
 #
+mm-create-folio_flag_false-and-folio_type_ops-macros.patch
+mm-support-page_mapcount-on-page_has_type-pages.patch
+mm-turn-folio_test_hugetlb-into-a-pagetype.patch
+mm-turn-folio_test_hugetlb-into-a-pagetype-fix.patch
+#
+#mm-zswap-fix-shrinker-null-crash-with-cgroup_disable=memory.patch: https://lkml.kernel.org/r/CAJD7tkaPMQqQtfxcLWraz-vnbAxZKxuJRJ7vKuDOCCXtpBSF1A@mail.gmail.com
+mm-zswap-fix-shrinker-null-crash-with-cgroup_disable=memory.patch
+#
+#hugetlb-check-for-anon_vma-prior-to-folio-allocation.patch: syzbot testing
+hugetlb-check-for-anon_vma-prior-to-folio-allocation.patch
+#
+stackdepot-respect-__gfp_nolockdep-allocation-flag.patch
+#
+init-fix-allocated-page-overlapping-with-ptr_err.patch
+#
 ### hfe
 #
 #ENDBRANCH mm-hotfixes-unstable
@@ -234,11 +248,7 @@ mm-change-inlined-allocation-helpers-to-account-at-the-call-site.patch
 #
 mm-always-initialise-folio-_deferred_list.patch
 mm-always-initialise-folio-_deferred_list-fix.patch
-mm-create-folio_flag_false-and-folio_type_ops-macros.patch
 mm-remove-folio_prep_large_rmappable.patch
-mm-support-page_mapcount-on-page_has_type-pages.patch
-mm-turn-folio_test_hugetlb-into-a-pagetype.patch
-mm-turn-folio_test_hugetlb-into-a-pagetype-fix.patch
 mm-remove-a-call-to-compound_head-from-is_page_hwpoison.patch
 #mm-free-up-pg_slab.patch: check review https://lkml.kernel.org/r/202403312344.c0d273ab-oliver.sang@intel.com
 mm-free-up-pg_slab.patch
@@ -606,10 +616,12 @@ mm-filemap-batch-mm-counter-updating-in-filemap_map_pages.patch
 #
 mm-page_alloc-allowing-mthp-compaction-to-capture-the-freed-page-directly.patch
 #
+#mseal-wire-up-mseal-syscall.patch: https://lkml.kernel.org/r/CAJuCfpFLwJg4n7wPpT+u9vC4XHoLE_BPPZ0tDKf7W45hGky4_Q@mail.gmail.com
 mseal-wire-up-mseal-syscall.patch
 mseal-add-mseal-syscall.patch
 selftest-mm-mseal-memory-sealing.patch
 mseal-add-documentation.patch
+#selftest-mm-mseal-read-only-elf-memory-segment.patch: https://lkml.kernel.org/r/CA+G9fYvacWNZsmizotfcwD35xBq0999_EAV0wZgwjdi46yivgg@mail.gmail.com
 selftest-mm-mseal-read-only-elf-memory-segment.patch
 selftest-mm-mseal-read-only-elf-memory-segment-fix.patch
 #
@@ -645,6 +657,15 @@ mm-hugetlb-assert-hugetlb_lock-in-__hugetlb_cgroup_commit_charge.patch
 #
 mm-page_table_check-support-userfault-wr-protect-entries.patch
 #
+mm-huge_memory-improve-split_huge_page_to_list_to_order-return-value-documentation.patch
+#
+mm-swapfile-check-usable-swap-device-in-__folio_throttle_swaprate.patch
+#
+mm-madvise-introduce-clear_young_dirty_ptes-batch-helper.patch
+mm-arm64-override-clear_young_dirty_ptes-batch-helper.patch
+mm-memory-add-any_dirty-optional-pointer-to-folio_pte_batch.patch
+mm-madvise-optimize-lazyfreeing-with-mthp-in-madvise_free.patch
+#
 #
 #
 #
@@ -797,4 +818,6 @@ selftests-exec-make-binaries-position-independent.patch
 #
 cpumask-delete-unused-reset_cpu_possible_mask.patch
 #
+crash-add-prefix-for-crash-dumping-messages.patch
+#
 #ENDBRANCH mm-nonmm-unstable
diff --git a/pc/hugetlb-check-for-anon_vma-prior-to-folio-allocation.pc b/pc/hugetlb-check-for-anon_vma-prior-to-folio-allocation.pc
new file mode 100644
index 000000000..6dc98425d
--- /dev/null
+++ b/pc/hugetlb-check-for-anon_vma-prior-to-folio-allocation.pc
@@ -0,0 +1 @@
+mm/hugetlb.c
diff --git a/pc/init-fix-allocated-page-overlapping-with-ptr_err.pc b/pc/init-fix-allocated-page-overlapping-with-ptr_err.pc
new file mode 100644
index 000000000..1b7210806
--- /dev/null
+++ b/pc/init-fix-allocated-page-overlapping-with-ptr_err.pc
@@ -0,0 +1 @@
+init/main.c
diff --git a/pc/mm-arm64-override-clear_young_dirty_ptes-batch-helper.pc b/pc/mm-arm64-override-clear_young_dirty_ptes-batch-helper.pc
new file mode 100644
index 000000000..ba3c28b5d
--- /dev/null
+++ b/pc/mm-arm64-override-clear_young_dirty_ptes-batch-helper.pc
@@ -0,0 +1,2 @@
+arch/arm64/include/asm/pgtable.h
+arch/arm64/mm/contpte.c
diff --git a/pc/mm-huge_memory-improve-split_huge_page_to_list_to_order-return-value-documentation.pc b/pc/mm-huge_memory-improve-split_huge_page_to_list_to_order-return-value-documentation.pc
new file mode 100644
index 000000000..b35bccbe3
--- /dev/null
+++ b/pc/mm-huge_memory-improve-split_huge_page_to_list_to_order-return-value-documentation.pc
@@ -0,0 +1 @@
+mm/huge_memory.c
diff --git a/pc/mm-madvise-introduce-clear_young_dirty_ptes-batch-helper.pc b/pc/mm-madvise-introduce-clear_young_dirty_ptes-batch-helper.pc
new file mode 100644
index 000000000..006118fb2
--- /dev/null
+++ b/pc/mm-madvise-introduce-clear_young_dirty_ptes-batch-helper.pc
@@ -0,0 +1,3 @@
+include/linux/mm_types.h
+include/linux/pgtable.h
+mm/madvise.c
diff --git a/pc/mm-madvise-optimize-lazyfreeing-with-mthp-in-madvise_free.pc b/pc/mm-madvise-optimize-lazyfreeing-with-mthp-in-madvise_free.pc
new file mode 100644
index 000000000..74d58a564
--- /dev/null
+++ b/pc/mm-madvise-optimize-lazyfreeing-with-mthp-in-madvise_free.pc
@@ -0,0 +1 @@
+mm/madvise.c
diff --git a/pc/mm-memory-add-any_dirty-optional-pointer-to-folio_pte_batch.pc b/pc/mm-memory-add-any_dirty-optional-pointer-to-folio_pte_batch.pc
new file mode 100644
index 000000000..8491d45ba
--- /dev/null
+++ b/pc/mm-memory-add-any_dirty-optional-pointer-to-folio_pte_batch.pc
@@ -0,0 +1,3 @@
+mm/internal.h
+mm/madvise.c
+mm/memory.c
diff --git a/pc/mm-swapfile-check-usable-swap-device-in-__folio_throttle_swaprate.pc b/pc/mm-swapfile-check-usable-swap-device-in-__folio_throttle_swaprate.pc
new file mode 100644
index 000000000..b6b7df785
--- /dev/null
+++ b/pc/mm-swapfile-check-usable-swap-device-in-__folio_throttle_swaprate.pc
@@ -0,0 +1 @@
+mm/swapfile.c
diff --git a/pc/null-pointer-dereference-while-shrinking-zswap.pc b/pc/mm-zswap-fix-shrinker-null-crash-with-cgroup_disable=memory.pc
index 7f1f05d5c..7f1f05d5c 100644
--- a/pc/null-pointer-dereference-while-shrinking-zswap.pc
+++ b/pc/mm-zswap-fix-shrinker-null-crash-with-cgroup_disable=memory.pc
diff --git a/pc/stackdepot-respect-__gfp_nolockdep-allocation-flag.pc b/pc/stackdepot-respect-__gfp_nolockdep-allocation-flag.pc
new file mode 100644
index 000000000..0e968ad16
--- /dev/null
+++ b/pc/stackdepot-respect-__gfp_nolockdep-allocation-flag.pc
@@ -0,0 +1 @@
+lib/stackdepot.c
diff --git a/txt/crash-add-prefix-for-crash-dumping-messages.txt b/txt/crash-add-prefix-for-crash-dumping-messages.txt
new file mode 100644
index 000000000..bf92d3291
--- /dev/null
+++ b/txt/crash-add-prefix-for-crash-dumping-messages.txt
@@ -0,0 +1,17 @@
+From: Baoquan He <bhe@redhat.com>
+Subject: crash: add prefix for crash dumping messages
+Date: Thu, 18 Apr 2024 11:58:43 +0800
+
+Add pr_fmt() to kernel/crash_core.c to add the module name to debugging
+message printed as prefix.
+
+And also add prefix 'crashkernel:' to two lines of message printing code
+in kernel/crash_reserve.c. In kernel/crash_reserve.c, almost all
+debugging messages have 'crashkernel:' prefix or there's keyword
+crashkernel at the beginning or in the middle, adding pr_fmt() makes it
+redundant.
+
+Link: https://lkml.kernel.org/r/20240418035843.1562887-1-bhe@redhat.com
+Signed-off-by: Baoquan He <bhe@redhat.com>
+Cc: Dave Young <dyoung@redhat.com>
+Cc: Jiri Slaby <jirislaby@kernel.org>
diff --git a/txt/hugetlb-check-for-anon_vma-prior-to-folio-allocation.txt b/txt/hugetlb-check-for-anon_vma-prior-to-folio-allocation.txt
new file mode 100644
index 000000000..a21998914
--- /dev/null
+++ b/txt/hugetlb-check-for-anon_vma-prior-to-folio-allocation.txt
@@ -0,0 +1,19 @@
+From: "Vishal Moola (Oracle)" <vishal.moola@gmail.com>
+Subject: hugetlb: check for anon_vma prior to folio allocation
+Date: Mon, 15 Apr 2024 14:17:47 -0700
+
+Commit 9acad7ba3e25 ("hugetlb: use vmf_anon_prepare() instead of
+anon_vma_prepare()") may bailout after allocating a folio if we do not
+hold the mmap lock.  When this occurs, vmf_anon_prepare() will release the
+vma lock.  Hugetlb then attempts to call restore_reserve_on_error(), which
+depends on the vma lock being held.
+
+We can move vmf_anon_prepare() prior to the folio allocation in order to
+avoid calling restore_reserve_on_error() without the vma lock.
+
+Link: https://lkml.kernel.org/r/ZiFqSrSRLhIV91og@fedora
+Fixes: 9acad7ba3e25 ("hugetlb: use vmf_anon_prepare() instead of anon_vma_prepare()")
+Reported-by: syzbot+ad1b592fc4483655438b@syzkaller.appspotmail.com
+Signed-off-by: Vishal Moola (Oracle) <vishal.moola@gmail.com>
+Cc: Muchun Song <muchun.song@linux.dev>
+Cc: <stable@vger.kernel.org>
diff --git a/txt/init-fix-allocated-page-overlapping-with-ptr_err.txt b/txt/init-fix-allocated-page-overlapping-with-ptr_err.txt
new file mode 100644
index 000000000..5d68e1b92
--- /dev/null
+++ b/txt/init-fix-allocated-page-overlapping-with-ptr_err.txt
@@ -0,0 +1,49 @@
+From: Nam Cao <namcao@linutronix.de>
+Subject: init: fix allocated page overlapping with PTR_ERR
+Date: Thu, 18 Apr 2024 12:29:43 +0200
+
+There is nothing preventing kernel memory allocators from allocating a
+page that overlaps with PTR_ERR(), except for architecture-specific code
+that setup memblock.
+
+It was discovered that RISCV architecture doesn't setup memblock corectly,
+leading to a page overlapping with PTR_ERR() being allocated, and
+subsequently crashing the kernel (link in Close: )
+
+The reported crash has nothing to do with PTR_ERR(): the last page (at
+address 0xfffff000) being allocated leads to an unexpected arithmetic
+overflow in ext4; but still, this page shouldn't be allocated in the first
+place.
+
+Because PTR_ERR() is an architecture-independent thing, we shouldn't ask
+every single architecture to set this up.  There may be other
+architectures beside RISCV that have the same problem.
+
+Fix this once and for all by reserving the physical memory page that may
+be mapped to the last virtual memory page as part of low memory.
+
+Unfortunately, this means if there is actual memory at this reserved
+location, that memory will become inaccessible.  However, if this page is
+not reserved, it can only be accessed as high memory, so this doesn't
+matter if high memory is not supported.  Even if high memory is supported,
+it is still only one page.
+
+Closes: https://lore.kernel.org/linux-riscv/878r1ibpdn.fsf@all.your.base.are.belong.to.us
+Link: https://lkml.kernel.org/r/20240418102943.180510-1-namcao@linutronix.de
+Signed-off-by: Nam Cao <namcao@linutronix.de>
+Reported-by: Björn Töpel <bjorn@kernel.org>
+Tested-by: Björn Töpel <bjorn@kernel.org>
+Reviewed-by: Mike Rapoport (IBM) <rppt@kernel.org>
+Cc: Andreas Dilger <adilger@dilger.ca>
+Cc: Arnd Bergmann <arnd@arndb.de>
+Cc: Changbin Du <changbin.du@huawei.com>
+Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
+Cc: Geert Uytterhoeven <geert+renesas@glider.be>
+Cc: Ingo Molnar <mingo@kernel.org>
+Cc: Krister Johansen <kjlx@templeofstupid.com>
+Cc: Luis Chamberlain <mcgrof@kernel.org>
+Cc: Nick Desaulniers <ndesaulniers@google.com>
+Cc: Stephen Rothwell <sfr@canb.auug.org.au>
+Cc: Tejun Heo <tj@kernel.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: <stable@vger.kernel.org>
diff --git a/txt/mm-arm64-override-clear_young_dirty_ptes-batch-helper.txt b/txt/mm-arm64-override-clear_young_dirty_ptes-batch-helper.txt
new file mode 100644
index 000000000..43fe8f8d4
--- /dev/null
+++ b/txt/mm-arm64-override-clear_young_dirty_ptes-batch-helper.txt
@@ -0,0 +1,23 @@
+From: Lance Yang <ioworker0@gmail.com>
+Subject: mm/arm64: override clear_young_dirty_ptes() batch helper
+Date: Thu, 18 Apr 2024 21:44:33 +0800
+
+The per-pte get_and_clear/modify/set approach would result in
+unfolding/refolding for contpte mappings on arm64.  So we need to override
+clear_young_dirty_ptes() for arm64 to avoid it.
+
+Link: https://lkml.kernel.org/r/20240418134435.6092-3-ioworker0@gmail.com
+Signed-off-by: Lance Yang <ioworker0@gmail.com>
+Suggested-by: Barry Song <21cnbao@gmail.com>
+Suggested-by: Ryan Roberts <ryan.roberts@arm.com>
+Reviewed-by: Ryan Roberts <ryan.roberts@arm.com>
+Cc: David Hildenbrand <david@redhat.com>
+Cc: Jeff Xie <xiehuan09@gmail.com>
+Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Minchan Kim <minchan@kernel.org>
+Cc: Muchun Song <songmuchun@bytedance.com>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: Yin Fengwei <fengwei.yin@intel.com>
+Cc: Zach O'Keefe <zokeefe@google.com>
diff --git a/txt/mm-create-folio_flag_false-and-folio_type_ops-macros.txt b/txt/mm-create-folio_flag_false-and-folio_type_ops-macros.txt
index acacd697a..46a9f7b37 100644
--- a/txt/mm-create-folio_flag_false-and-folio_type_ops-macros.txt
+++ b/txt/mm-create-folio_flag_false-and-folio_type_ops-macros.txt
@@ -7,9 +7,11 @@ FOLIO_FLAG_FALSE from PAGEFLAG_FALSE and FOLIO_TYPE_OPS from
 PAGE_TYPE_OPS.
 
 Link: https://lkml.kernel.org/r/20240321142448.1645400-3-willy@infradead.org
+Fixes: 9c5ccf2db04b ("mm: remove HUGETLB_PAGE_DTOR")
 Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
 Reviewed-by: David Hildenbrand <david@redhat.com>
 Acked-by: Vlastimil Babka <vbabka@suse.cz>
 Cc: Miaohe Lin <linmiaohe@huawei.com>
 Cc: Muchun Song <muchun.song@linux.dev>
 Cc: Oscar Salvador <osalvador@suse.de>
+Cc: <stable@vger.kernel.org>
diff --git a/txt/mm-huge_memory-improve-split_huge_page_to_list_to_order-return-value-documentation.txt b/txt/mm-huge_memory-improve-split_huge_page_to_list_to_order-return-value-documentation.txt
new file mode 100644
index 000000000..528263ad1
--- /dev/null
+++ b/txt/mm-huge_memory-improve-split_huge_page_to_list_to_order-return-value-documentation.txt
@@ -0,0 +1,15 @@
+From: David Hildenbrand <david@redhat.com>
+Subject: mm/huge_memory: improve split_huge_page_to_list_to_order() return value documentation
+Date: Thu, 18 Apr 2024 17:18:34 +0200
+
+The documentation is wrong and relying on it almost resulted in BUGs in
+new callers: we return -EAGAIN on unexpected folio references, not -EBUSY.
+
+Let's fix that and also document which other return values we can
+currently see and why they could happen.
+
+Link: https://lkml.kernel.org/r/20240418151834.216557-1-david@redhat.com
+Signed-off-by: David Hildenbrand <david@redhat.com>
+Cc: John Hubbard <jhubbard@nvidia.com>
+Cc: Zi Yan <ziy@nvidia.com>
+Cc: Matthew Wilcox <willy@infradead.org>
diff --git a/txt/mm-madvise-introduce-clear_young_dirty_ptes-batch-helper.txt b/txt/mm-madvise-introduce-clear_young_dirty_ptes-batch-helper.txt
new file mode 100644
index 000000000..9d5731b50
--- /dev/null
+++ b/txt/mm-madvise-introduce-clear_young_dirty_ptes-batch-helper.txt
@@ -0,0 +1,61 @@
+From: Lance Yang <ioworker0@gmail.com>
+Subject: mm/madvise: introduce clear_young_dirty_ptes() batch helper
+Date: Thu, 18 Apr 2024 21:44:32 +0800
+
+Patch series "mm/madvise: enhance lazyfreeing with mTHP in madvise_free",
+v10.
+
+This patchset adds support for lazyfreeing multi-size THP (mTHP) without
+needing to first split the large folio via split_folio().  However, we
+still need to split a large folio that is not fully mapped within the
+target range.
+
+If a large folio is locked or shared, or if we fail to split it, we just
+leave it in place and advance to the next PTE in the range.  But note that
+the behavior is changed; previously, any failure of this sort would cause
+the entire operation to give up.  As large folios become more common,
+sticking to the old way could result in wasted opportunities.
+
+Performance Testing
+===================
+
+On an Intel I5 CPU, lazyfreeing a 1GiB VMA backed by PTE-mapped folios of
+the same size results in the following runtimes for madvise(MADV_FREE) in
+seconds (shorter is better):
+
+Folio Size |   Old    |   New    | Change
+------------------------------------------
+      4KiB | 0.590251 | 0.590259 |    0%
+     16KiB | 2.990447 | 0.185655 |  -94%
+     32KiB | 2.547831 | 0.104870 |  -95%
+     64KiB | 2.457796 | 0.052812 |  -97%
+    128KiB | 2.281034 | 0.032777 |  -99%
+    256KiB | 2.230387 | 0.017496 |  -99%
+    512KiB | 2.189106 | 0.010781 |  -99%
+   1024KiB | 2.183949 | 0.007753 |  -99%
+   2048KiB | 0.002799 | 0.002804 |    0%
+
+
+This patch (of 4):
+
+This commit introduces clear_young_dirty_ptes() to replace mkold_ptes(). 
+By doing so, we can use the same function for both use cases
+(madvise_pageout and madvise_free), and it also provides the flexibility
+to only clear the dirty flag in the future if needed.
+
+Link: https://lkml.kernel.org/r/20240418134435.6092-1-ioworker0@gmail.com
+Link: https://lkml.kernel.org/r/20240418134435.6092-2-ioworker0@gmail.com
+Signed-off-by: Lance Yang <ioworker0@gmail.com>
+Suggested-by: Ryan Roberts <ryan.roberts@arm.com>
+Acked-by: David Hildenbrand <david@redhat.com>
+Reviewed-by: Ryan Roberts <ryan.roberts@arm.com>
+Cc: Barry Song <21cnbao@gmail.com>
+Cc: Jeff Xie <xiehuan09@gmail.com>
+Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Minchan Kim <minchan@kernel.org>
+Cc: Muchun Song <songmuchun@bytedance.com>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: Yin Fengwei <fengwei.yin@intel.com>
+Cc: Zach O'Keefe <zokeefe@google.com>
diff --git a/txt/mm-madvise-optimize-lazyfreeing-with-mthp-in-madvise_free.txt b/txt/mm-madvise-optimize-lazyfreeing-with-mthp-in-madvise_free.txt
new file mode 100644
index 000000000..51f30897c
--- /dev/null
+++ b/txt/mm-madvise-optimize-lazyfreeing-with-mthp-in-madvise_free.txt
@@ -0,0 +1,47 @@
+From: Lance Yang <ioworker0@gmail.com>
+Subject: mm/madvise: optimize lazyfreeing with mTHP in madvise_free
+Date: Thu, 18 Apr 2024 21:44:35 +0800
+
+This patch optimizes lazyfreeing with PTE-mapped mTHP[1] (Inspired by
+David Hildenbrand[2]).  We aim to avoid unnecessary folio splitting if the
+large folio is fully mapped within the target range.
+
+If a large folio is locked or shared, or if we fail to split it, we just
+leave it in place and advance to the next PTE in the range.  But note that
+the behavior is changed; previously, any failure of this sort would cause
+the entire operation to give up.  As large folios become more common,
+sticking to the old way could result in wasted opportunities.
+
+On an Intel I5 CPU, lazyfreeing a 1GiB VMA backed by PTE-mapped folios of
+the same size results in the following runtimes for madvise(MADV_FREE) in
+seconds (shorter is better):
+
+Folio Size |   Old    |   New    | Change
+------------------------------------------
+      4KiB | 0.590251 | 0.590259 |    0%
+     16KiB | 2.990447 | 0.185655 |  -94%
+     32KiB | 2.547831 | 0.104870 |  -95%
+     64KiB | 2.457796 | 0.052812 |  -97%
+    128KiB | 2.281034 | 0.032777 |  -99%
+    256KiB | 2.230387 | 0.017496 |  -99%
+    512KiB | 2.189106 | 0.010781 |  -99%
+   1024KiB | 2.183949 | 0.007753 |  -99%
+   2048KiB | 0.002799 | 0.002804 |    0%
+
+[1] https://lkml.kernel.org/r/20231207161211.2374093-5-ryan.roberts@arm.com
+[2] https://lore.kernel.org/linux-mm/20240214204435.167852-1-david@redhat.com
+
+Link: https://lkml.kernel.org/r/20240418134435.6092-5-ioworker0@gmail.com
+Signed-off-by: Lance Yang <ioworker0@gmail.com>
+Reviewed-by: Ryan Roberts <ryan.roberts@arm.com>
+Acked-by: David Hildenbrand <david@redhat.com>
+Cc: Barry Song <21cnbao@gmail.com>
+Cc: Jeff Xie <xiehuan09@gmail.com>
+Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Minchan Kim <minchan@kernel.org>
+Cc: Muchun Song <songmuchun@bytedance.com>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: Yin Fengwei <fengwei.yin@intel.com>
+Cc: Zach O'Keefe <zokeefe@google.com>
diff --git a/txt/mm-memory-add-any_dirty-optional-pointer-to-folio_pte_batch.txt b/txt/mm-memory-add-any_dirty-optional-pointer-to-folio_pte_batch.txt
new file mode 100644
index 000000000..5fdfd3742
--- /dev/null
+++ b/txt/mm-memory-add-any_dirty-optional-pointer-to-folio_pte_batch.txt
@@ -0,0 +1,24 @@
+From: Lance Yang <ioworker0@gmail.com>
+Subject: mm/memory: add any_dirty optional pointer to folio_pte_batch()
+Date: Thu, 18 Apr 2024 21:44:34 +0800
+
+This commit adds the any_dirty pointer as an optional parameter to
+folio_pte_batch() function.  By using both the any_young and any_dirty
+pointers, madvise_free can make smarter decisions about whether to clear
+the PTEs when marking large folios as lazyfree.
+
+Link: https://lkml.kernel.org/r/20240418134435.6092-4-ioworker0@gmail.com
+Signed-off-by: Lance Yang <ioworker0@gmail.com>
+Suggested-by: David Hildenbrand <david@redhat.com>
+Acked-by: David Hildenbrand <david@redhat.com>
+Cc: Barry Song <21cnbao@gmail.com>
+Cc: Jeff Xie <xiehuan09@gmail.com>
+Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Minchan Kim <minchan@kernel.org>
+Cc: Muchun Song <songmuchun@bytedance.com>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: Ryan Roberts <ryan.roberts@arm.com>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: Yin Fengwei <fengwei.yin@intel.com>
+Cc: Zach O'Keefe <zokeefe@google.com>
diff --git a/txt/mm-page_table_check-support-userfault-wr-protect-entries.txt b/txt/mm-page_table_check-support-userfault-wr-protect-entries.txt
index 046beb6cb..180b38c6a 100644
--- a/txt/mm-page_table_check-support-userfault-wr-protect-entries.txt
+++ b/txt/mm-page_table_check-support-userfault-wr-protect-entries.txt
@@ -48,7 +48,7 @@ better now.
 
 Link: https://lkml.kernel.org/r/20240417212549.2766883-1-peterx@redhat.com
 Signed-off-by: Peter Xu <peterx@redhat.com>
-Cc: Pasha Tatashin <pasha.tatashin@soleen.com>
+Reviewed-by: Pasha Tatashin <pasha.tatashin@soleen.com>
 Cc: Axel Rasmussen <axelrasmussen@google.com>
 Cc: David Hildenbrand <david@redhat.com>
 Cc: Nadav Amit <nadav.amit@gmail.com>
diff --git a/txt/mm-support-page_mapcount-on-page_has_type-pages.txt b/txt/mm-support-page_mapcount-on-page_has_type-pages.txt
index f226b6859..beafd94ce 100644
--- a/txt/mm-support-page_mapcount-on-page_has_type-pages.txt
+++ b/txt/mm-support-page_mapcount-on-page_has_type-pages.txt
@@ -7,9 +7,11 @@ works.  It is more convenient for users to not have to filter out these
 pages.
 
 Link: https://lkml.kernel.org/r/20240321142448.1645400-5-willy@infradead.org
+Fixes: 9c5ccf2db04b ("mm: remove HUGETLB_PAGE_DTOR")
 Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
 Reviewed-by: David Hildenbrand <david@redhat.com>
 Acked-by: Vlastimil Babka <vbabka@suse.cz>
 Cc: Miaohe Lin <linmiaohe@huawei.com>
 Cc: Muchun Song <muchun.song@linux.dev>
 Cc: Oscar Salvador <osalvador@suse.de>
+Cc: <stable@vger.kernel.org>
diff --git a/txt/mm-swapfile-check-usable-swap-device-in-__folio_throttle_swaprate.txt b/txt/mm-swapfile-check-usable-swap-device-in-__folio_throttle_swaprate.txt
new file mode 100644
index 000000000..65cc6c631
--- /dev/null
+++ b/txt/mm-swapfile-check-usable-swap-device-in-__folio_throttle_swaprate.txt
@@ -0,0 +1,18 @@
+From: Kefeng Wang <wangkefeng.wang@huawei.com>
+Subject: mm: swapfile: check usable swap device in __folio_throttle_swaprate()
+Date: Thu, 18 Apr 2024 21:56:44 +0800
+
+Skip blk_cgroup_congested() if there is no usable swap device since no
+swapin/out will occur, Thereby avoid taking swap_lock.  The difference is
+shown below from perf date of CoW pagefault,
+
+ perf report -g -i perf.data.swapoff  | egrep "blk_cgroup_congested|__folio_throttle_swaprate"
+     1.01%     0.16%  page_fault2_pro  [kernel.kallsyms]      [k] __folio_throttle_swaprate
+     0.83%     0.80%  page_fault2_pro  [kernel.kallsyms]      [k] blk_cgroup_congested
+
+ perf report -g -i perf.data.swapon   | egrep "blk_cgroup_congested|__folio_throttle_swaprate"
+     0.15%     0.15%  page_fault2_pro  [kernel.kallsyms]      [k] __folio_throttle_swaprate
+
+Link: https://lkml.kernel.org/r/20240418135644.2736748-1-wangkefeng.wang@huawei.com
+Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
+Cc: Tejun Heo <tj@kernel.org>
diff --git a/txt/mm-zswap-fix-shrinker-null-crash-with-cgroup_disable=memory.txt b/txt/mm-zswap-fix-shrinker-null-crash-with-cgroup_disable=memory.txt
new file mode 100644
index 000000000..09615713a
--- /dev/null
+++ b/txt/mm-zswap-fix-shrinker-null-crash-with-cgroup_disable=memory.txt
@@ -0,0 +1,35 @@
+From: Johannes Weiner <hannes@cmpxchg.org>
+Subject: mm: zswap: fix shrinker NULL crash with cgroup_disable=memory
+Date: Thu, 18 Apr 2024 08:26:28 -0400
+
+Christian reports a NULL deref in zswap that he bisected down to the zswap
+shrinker.  The issue also cropped up in the bug trackers of libguestfs [1]
+and the Red Hat bugzilla [2].
+
+The problem is that when memcg is disabled with the boot time flag, the
+zswap shrinker might get called with sc->memcg == NULL.  This is okay in
+many places, like the lruvec operations.  But it crashes in
+memcg_page_state() - which is only used due to the non-node accounting of
+cgroup's the zswap memory to begin with.
+
+Nhat spotted that the memcg can be NULL in the memcg-disabled case, and I
+was then able to reproduce the crash locally as well.
+
+[1] https://github.com/libguestfs/libguestfs/issues/139
+[2] https://bugzilla.redhat.com/show_bug.cgi?id=2275252
+
+Link: https://lkml.kernel.org/r/20240418124043.GC1055428@cmpxchg.org
+Link: https://lkml.kernel.org/r/20240417143324.GA1055428@cmpxchg.org
+Fixes: b5ba474f3f51 ("zswap: shrink zswap pool based on memory pressure")
+Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
+Reported-by: Christian Heusel <christian@heusel.eu>
+Debugged-by: Nhat Pham <nphamcs@gmail.com>
+Suggested-by: Nhat Pham <nphamcs@gmail.com>
+Tested-by: Christian Heusel <christian@heusel.eu>
+Acked-by: Yosry Ahmed <yosryahmed@google.com>
+Cc: Chengming Zhou <chengming.zhou@linux.dev>
+Cc: Dan Streetman <ddstreet@ieee.org>
+Cc: Richard W.M. Jones <rjones@redhat.com>
+Cc: Seth Jennings <sjenning@redhat.com>
+Cc: Vitaly Wool <vitaly.wool@konsulko.com>
+Cc: <stable@vger.kernel.org>	[v6.8]
diff --git a/txt/null-pointer-dereference-while-shrinking-zswap.txt b/txt/old/null-pointer-dereference-while-shrinking-zswap.txt
index f437585b2..f437585b2 100644
--- a/txt/null-pointer-dereference-while-shrinking-zswap.txt
+++ b/txt/old/null-pointer-dereference-while-shrinking-zswap.txt
diff --git a/txt/stackdepot-respect-__gfp_nolockdep-allocation-flag.txt b/txt/stackdepot-respect-__gfp_nolockdep-allocation-flag.txt
new file mode 100644
index 000000000..824f42ed9
--- /dev/null
+++ b/txt/stackdepot-respect-__gfp_nolockdep-allocation-flag.txt
@@ -0,0 +1,68 @@
+From: Andrey Ryabinin <ryabinin.a.a@gmail.com>
+Subject: stackdepot: respect __GFP_NOLOCKDEP allocation flag
+Date: Thu, 18 Apr 2024 16:11:33 +0200
+
+If stack_depot_save_flags() allocates memory it always drops
+__GFP_NOLOCKDEP flag.  So when KASAN tries to track __GFP_NOLOCKDEP
+allocation we may end up with lockdep splat like bellow:
+
+======================================================
+ WARNING: possible circular locking dependency detected
+ 6.9.0-rc3+ #49 Not tainted
+ ------------------------------------------------------
+ kswapd0/149 is trying to acquire lock:
+ ffff88811346a920
+(&xfs_nondir_ilock_class){++++}-{4:4}, at: xfs_reclaim_inode+0x3ac/0x590
+[xfs]
+
+ but task is already holding lock:
+ ffffffff8bb33100 (fs_reclaim){+.+.}-{0:0}, at:
+balance_pgdat+0x5d9/0xad0
+
+ which lock already depends on the new lock.
+
+ the existing dependency chain (in reverse order) is:
+ -> #1 (fs_reclaim){+.+.}-{0:0}:
+        __lock_acquire+0x7da/0x1030
+        lock_acquire+0x15d/0x400
+        fs_reclaim_acquire+0xb5/0x100
+ prepare_alloc_pages.constprop.0+0xc5/0x230
+        __alloc_pages+0x12a/0x3f0
+        alloc_pages_mpol+0x175/0x340
+        stack_depot_save_flags+0x4c5/0x510
+        kasan_save_stack+0x30/0x40
+        kasan_save_track+0x10/0x30
+        __kasan_slab_alloc+0x83/0x90
+        kmem_cache_alloc+0x15e/0x4a0
+        __alloc_object+0x35/0x370
+        __create_object+0x22/0x90
+ __kmalloc_node_track_caller+0x477/0x5b0
+        krealloc+0x5f/0x110
+        xfs_iext_insert_raw+0x4b2/0x6e0 [xfs]
+        xfs_iext_insert+0x2e/0x130 [xfs]
+        xfs_iread_bmbt_block+0x1a9/0x4d0 [xfs]
+        xfs_btree_visit_block+0xfb/0x290 [xfs]
+        xfs_btree_visit_blocks+0x215/0x2c0 [xfs]
+        xfs_iread_extents+0x1a2/0x2e0 [xfs]
+ xfs_buffered_write_iomap_begin+0x376/0x10a0 [xfs]
+        iomap_iter+0x1d1/0x2d0
+ iomap_file_buffered_write+0x120/0x1a0
+        xfs_file_buffered_write+0x128/0x4b0 [xfs]
+        vfs_write+0x675/0x890
+        ksys_write+0xc3/0x160
+        do_syscall_64+0x94/0x170
+ entry_SYSCALL_64_after_hwframe+0x71/0x79
+
+Always preserve __GFP_NOLOCKDEP to fix this.
+
+Link: https://lkml.kernel.org/r/20240418141133.22950-1-ryabinin.a.a@gmail.com
+Fixes: cd11016e5f52 ("mm, kasan: stackdepot implementation. Enable stackdepot for SLAB")
+Signed-off-by: Andrey Ryabinin <ryabinin.a.a@gmail.com>
+Reported-by: Xiubo Li <xiubli@redhat.com>
+Closes: https://lore.kernel.org/all/a0caa289-ca02-48eb-9bf2-d86fd47b71f4@redhat.com/
+Reported-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
+Closes: https://lore.kernel.org/all/f9ff999a-e170-b66b-7caf-293f2b147ac2@opensource.wdc.com/
+Suggested-by: Dave Chinner <david@fromorbit.com>
+Cc: Christoph Hellwig <hch@infradead.org>
+Cc: Alexander Potapenko <glider@google.com>
+Cc: <stable@vger.kernel.org>
author	Andrew Morton <akpm@linux-foundation.org>	2024-04-18 13:42:04 -0700
committer	Andrew Morton <akpm@linux-foundation.org>	2024-04-18 13:42:04 -0700
commit	578a2c2d8e5c25cc32ea3ab3515c903b7c45ba04 (patch)
tree	44ef1c7e567c0114204658f181b4dd971daede14
parent	c65c0e14247748216c988a1b18897d1258afaaf7 (diff)
download	25-new-578a2c2d8e5c25cc32ea3ab3515c903b7c45ba04.tar.gz