mm/compaction: enable compacting >0 order folios.

migrate_pages() supports >0 order folio migration and during compaction, even if compaction_alloc() cannot provide >0 order free pages, migrate_pages() can split the source page and try to migrate the base pages from the split. It can be a baseline and start point for adding support for compacting >0 order folios. Link: https://lkml.kernel.org/r/20240220183220.1451315-3-zi.yan@sent.com Signed-off-by: Zi Yan <ziy@nvidia.com> Suggested-by: Huang Ying <ying.huang@intel.com> Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com> Reviewed-by: Vlastimil Babka <vbabka@suse.cz> Tested-by: Baolin Wang <baolin.wang@linux.alibaba.com> Tested-by: Yu Zhao <yuzhao@google.com> Cc: Adam Manzanares <a.manzanares@samsung.com> Cc: David Hildenbrand <david@redhat.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Kemeng Shi <shikemeng@huaweicloud.com> Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Luis Chamberlain <mcgrof@kernel.org> Cc: Matthew Wilcox (Oracle) <willy@infradead.org> Cc: Mel Gorman <mgorman@techsingularity.net> Cc: Ryan Roberts <ryan.roberts@arm.com> Cc: Vishal Moola (Oracle) <vishal.moola@gmail.com> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Yin Fengwei <fengwei.yin@intel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
author: Zi Yan <ziy@nvidia.com> 2024-02-20 13:32:18 -0500
committer: Andrew Morton <akpm@linux-foundation.org> 2024-02-23 17:48:33 -0800
commit: ee6f62fd34f0bb99ef93f799bcf5fc6a6b24945b (patch)
tree: ae3eb3dc218048d9314296e102a0531945417544 /mm
parent: 5267fe5d092e80a83740e5a1f6d5638d88ac7309 (diff)
download: linux-ee6f62fd34f0bb99ef93f799bcf5fc6a6b24945b.tar.gz
1 files changed, 76 insertions, 25 deletions
diff --git a/mm/compaction.c b/mm/compaction.c
index 52ff6b9344c707..2ba9ba49b0e9ff 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -40,9 +40,22 @@ static inline void count_compact_events(enum vm_event_item item, long delta)
 {
 	count_vm_events(item, delta);
 }
+
+/*
+ * order == -1 is expected when compacting proactively via
+ * 1. /proc/sys/vm/compact_memory
+ * 2. /sys/devices/system/node/nodex/compact
+ * 3. /proc/sys/vm/compaction_proactiveness
+ */
+static inline bool is_via_compact_memory(int order)
+{
+	return order == -1;
+}
+
 #else
 #define count_compact_event(item) do { } while (0)
 #define count_compact_events(item, delta) do { } while (0)
+static inline bool is_via_compact_memory(int order) { return false; }
 #endif
 
 #if defined CONFIG_COMPACTION || defined CONFIG_CMA
@@ -817,6 +830,32 @@ static bool too_many_isolated(struct compact_control *cc)
 }
 
 /**
+ * skip_isolation_on_order() - determine when to skip folio isolation based on
+ *			       folio order and compaction target order
+ * @order:		to-be-isolated folio order
+ * @target_order:	compaction target order
+ *
+ * This avoids unnecessary folio isolations during compaction.
+ */
+static bool skip_isolation_on_order(int order, int target_order)
+{
+	/*
+	 * Unless we are performing global compaction (i.e.,
+	 * is_via_compact_memory), skip any folios that are larger than the
+	 * target order: we wouldn't be here if we'd have a free folio with
+	 * the desired target_order, so migrating this folio would likely fail
+	 * later.
+	 */
+	if (!is_via_compact_memory(target_order) && order >= target_order)
+		return true;
+	/*
+	 * We limit memory compaction to pageblocks and won't try
+	 * creating free blocks of memory that are larger than that.
+	 */
+	return order >= pageblock_order;
+}
+
+/**
  * isolate_migratepages_block() - isolate all migrate-able pages within
  *				  a single pageblock
  * @cc:		Compaction control structure.
@@ -947,7 +986,22 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
 			valid_page = page;
 		}
 
-		if (PageHuge(page) && cc->alloc_contig) {
+		if (PageHuge(page)) {
+			/*
+			 * skip hugetlbfs if we are not compacting for pages
+			 * bigger than its order. THPs and other compound pages
+			 * are handled below.
+			 */
+			if (!cc->alloc_contig) {
+				const unsigned int order = compound_order(page);
+
+				if (order <= MAX_PAGE_ORDER) {
+					low_pfn += (1UL << order) - 1;
+					nr_scanned += (1UL << order) - 1;
+				}
+				goto isolate_fail;
+			}
+			/* for alloc_contig case */
 			if (locked) {
 				unlock_page_lruvec_irqrestore(locked, flags);
 				locked = NULL;
@@ -1008,21 +1062,24 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
 		}
 
 		/*
-		 * Regardless of being on LRU, compound pages such as THP and
-		 * hugetlbfs are not to be compacted unless we are attempting
-		 * an allocation much larger than the huge page size (eg CMA).
-		 * We can potentially save a lot of iterations if we skip them
-		 * at once. The check is racy, but we can consider only valid
-		 * values and the only danger is skipping too much.
+		 * Regardless of being on LRU, compound pages such as THP
+		 * (hugetlbfs is handled above) are not to be compacted unless
+		 * we are attempting an allocation larger than the compound
+		 * page size. We can potentially save a lot of iterations if we
+		 * skip them at once. The check is racy, but we can consider
+		 * only valid values and the only danger is skipping too much.
 		 */
 		if (PageCompound(page) && !cc->alloc_contig) {
 			const unsigned int order = compound_order(page);
 
-			if (likely(order <= MAX_PAGE_ORDER)) {
-				low_pfn += (1UL << order) - 1;
-				nr_scanned += (1UL << order) - 1;
+			/* Skip based on page order and compaction target order. */
+			if (skip_isolation_on_order(order, cc->order)) {
+				if (order <= MAX_PAGE_ORDER) {
+					low_pfn += (1UL << order) - 1;
+					nr_scanned += (1UL << order) - 1;
+				}
+				goto isolate_fail;
 			}
-			goto isolate_fail;
 		}
 
 		/*
@@ -1165,10 +1222,11 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
 			}
 
 			/*
-			 * folio become large since the non-locked check,
-			 * and it's on LRU.
+			 * Check LRU folio order under the lock
 			 */
-			if (unlikely(folio_test_large(folio) && !cc->alloc_contig)) {
+			if (unlikely(skip_isolation_on_order(folio_order(folio),
+							     cc->order) &&
+				     !cc->alloc_contig)) {
 				low_pfn += folio_nr_pages(folio) - 1;
 				nr_scanned += folio_nr_pages(folio) - 1;
 				folio_set_lru(folio);
@@ -1788,6 +1846,10 @@ static struct folio *compaction_alloc(struct folio *src, unsigned long data)
 	struct compact_control *cc = (struct compact_control *)data;
 	struct folio *dst;
 
+	/* this makes migrate_pages() split the source page and retry */
+	if (folio_test_large(src))
+		return NULL;
+
 	if (list_empty(&cc->freepages)) {
 		isolate_freepages(cc);
 
@@ -2091,17 +2153,6 @@ static isolate_migrate_t isolate_migratepages(struct compact_control *cc)
 }
 
 /*
- * order == -1 is expected when compacting proactively via
- * 1. /proc/sys/vm/compact_memory
- * 2. /sys/devices/system/node/nodex/compact
- * 3. /proc/sys/vm/compaction_proactiveness
- */
-static inline bool is_via_compact_memory(int order)
-{
-	return order == -1;
-}
-
-/*
  * Determine whether kswapd is (or recently was!) running on this node.
  *
  * pgdat_kswapd_lock() pins pgdat->kswapd, so a concurrent kswapd_stop() can't
author	Zi Yan <ziy@nvidia.com>	2024-02-20 13:32:18 -0500
committer	Andrew Morton <akpm@linux-foundation.org>	2024-02-23 17:48:33 -0800
commit	ee6f62fd34f0bb99ef93f799bcf5fc6a6b24945b (patch)
tree	ae3eb3dc218048d9314296e102a0531945417544 /mm
parent	5267fe5d092e80a83740e5a1f6d5638d88ac7309 (diff)
download	linux-ee6f62fd34f0bb99ef93f799bcf5fc6a6b24945b.tar.gz