diff options
author | Andrew Morton <akpm@linux-foundation.org> | 2024-04-05 13:29:33 -0700 |
---|---|---|
committer | Andrew Morton <akpm@linux-foundation.org> | 2024-04-05 13:29:33 -0700 |
commit | e460e6d35e659ea5506474de220e638365211271 (patch) | |
tree | ee8a6cf4a5d1f04306cb2002ce5f95b9d9559709 | |
parent | e606de60f3fac927d616ee7539d1852a2e9a1cfa (diff) | |
download | 25-new-e460e6d35e659ea5506474de220e638365211271.tar.gz |
foo
76 files changed, 1935 insertions, 384 deletions
diff --git a/patches/maintainers-change-vmwarecom-addresses-to-broadcomcom.patch b/patches/maintainers-change-vmwarecom-addresses-to-broadcomcom.patch deleted file mode 100644 index dd30dd634..000000000 --- a/patches/maintainers-change-vmwarecom-addresses-to-broadcomcom.patch +++ /dev/null @@ -1,169 +0,0 @@ -From: Alexey Makhalov <alexey.makhalov@broadcom.com> -Subject: MAINTAINERS: change vmware.com addresses to broadcom.com -Date: Tue, 2 Apr 2024 16:23:34 -0700 - -Update all remaining vmware.com email addresses to actual broadcom.com. - -Add corresponding .mailmap entries for maintainers who contributed in the -past as the vmware.com address will start bouncing soon. - -Maintainership update. Jeff Sipek has left VMware, Nick Shi will be -maintaining VMware PTP. - -Link: https://lkml.kernel.org/r/20240402232334.33167-1-alexey.makhalov@broadcom.com -Signed-off-by: Alexey Makhalov <alexey.makhalov@broadcom.com> -Acked-by: Florian Fainelli <florian.fainelli@broadcom.com> -Acked-by: Ajay Kaher <ajay.kaher@broadcom.com> -Acked-by: Ronak Doshi <ronak.doshi@broadcom.com> -Acked-by: Nick Shi <nick.shi@broadcom.com> -Acked-by: Bryan Tan <bryan-bt.tan@broadcom.com> -Acked-by: Vishnu Dasa <vishnu.dasa@broadcom.com> -Acked-by: Vishal Bhakta <vishal.bhakta@broadcom.com> -Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> -Signed-off-by: Andrew Morton <akpm@linux-foundation.org> ---- - - .mailmap | 5 +++++ - MAINTAINERS | 46 +++++++++++++++++++++++----------------------- - 2 files changed, 28 insertions(+), 23 deletions(-) - ---- a/.mailmap~maintainers-change-vmwarecom-addresses-to-broadcomcom -+++ a/.mailmap -@@ -20,6 +20,7 @@ Adam Oldham <oldhamca@gmail.com> - Adam Radford <aradford@gmail.com> - Adriana Reus <adi.reus@gmail.com> <adriana.reus@intel.com> - Adrian Bunk <bunk@stusta.de> -+Ajay Kaher <ajay.kaher@broadcom.com> <akaher@vmware.com> - Akhil P Oommen <quic_akhilpo@quicinc.com> <akhilpo@codeaurora.org> - Alan Cox <alan@lxorguk.ukuu.org.uk> - Alan Cox <root@hraefn.swansea.linux.org.uk> -@@ -36,6 +37,7 @@ Alexei Avshalom Lazar <quic_ailizaro@qui - Alexei Starovoitov <ast@kernel.org> <alexei.starovoitov@gmail.com> - Alexei Starovoitov <ast@kernel.org> <ast@fb.com> - Alexei Starovoitov <ast@kernel.org> <ast@plumgrid.com> -+Alexey Makhalov <alexey.amakhalov@broadcom.com> <amakhalov@vmware.com> - Alex Hung <alexhung@gmail.com> <alex.hung@canonical.com> - Alex Shi <alexs@kernel.org> <alex.shi@intel.com> - Alex Shi <alexs@kernel.org> <alex.shi@linaro.org> -@@ -110,6 +112,7 @@ Brendan Higgins <brendan.higgins@linux.d - Brian Avery <b.avery@hp.com> - Brian King <brking@us.ibm.com> - Brian Silverman <bsilver16384@gmail.com> <brian.silverman@bluerivertech.com> -+Bryan Tan <bryan-bt.tan@broadcom.com> <bryantan@vmware.com> - Cai Huoqing <cai.huoqing@linux.dev> <caihuoqing@baidu.com> - Can Guo <quic_cang@quicinc.com> <cang@codeaurora.org> - Carl Huang <quic_cjhuang@quicinc.com> <cjhuang@codeaurora.org> -@@ -529,6 +532,7 @@ Rocky Liao <quic_rjliao@quicinc.com> <rj - Roman Gushchin <roman.gushchin@linux.dev> <guro@fb.com> - Roman Gushchin <roman.gushchin@linux.dev> <guroan@gmail.com> - Roman Gushchin <roman.gushchin@linux.dev> <klamm@yandex-team.ru> -+Ronak Doshi <ronak.doshi@broadcom.com> <doshir@vmware.com> - Muchun Song <muchun.song@linux.dev> <songmuchun@bytedance.com> - Muchun Song <muchun.song@linux.dev> <smuchun@gmail.com> - Ross Zwisler <zwisler@kernel.org> <ross.zwisler@linux.intel.com> -@@ -651,6 +655,7 @@ Viresh Kumar <vireshk@kernel.org> <vires - Viresh Kumar <vireshk@kernel.org> <viresh.linux@gmail.com> - Viresh Kumar <viresh.kumar@linaro.org> <viresh.kumar@linaro.org> - Viresh Kumar <viresh.kumar@linaro.org> <viresh.kumar@linaro.com> -+Vishnu Dasa <vishnu.dasa@broadcom.com> <vdasa@vmware.com> - Vivek Aknurwar <quic_viveka@quicinc.com> <viveka@codeaurora.org> - Vivien Didelot <vivien.didelot@gmail.com> <vivien.didelot@savoirfairelinux.com> - Vlad Dogaru <ddvlad@gmail.com> <vlad.dogaru@intel.com> ---- a/MAINTAINERS~maintainers-change-vmwarecom-addresses-to-broadcomcom -+++ a/MAINTAINERS -@@ -16731,9 +16731,9 @@ F: include/uapi/linux/ppdev.h - - PARAVIRT_OPS INTERFACE - M: Juergen Gross <jgross@suse.com> --R: Ajay Kaher <akaher@vmware.com> --R: Alexey Makhalov <amakhalov@vmware.com> --R: VMware PV-Drivers Reviewers <pv-drivers@vmware.com> -+R: Ajay Kaher <ajay.kaher@broadcom.com> -+R: Alexey Makhalov <alexey.amakhalov@broadcom.com> -+R: Broadcom internal kernel review list <bcm-kernel-feedback-list@broadcom.com> - L: virtualization@lists.linux.dev - L: x86@kernel.org - S: Supported -@@ -23652,9 +23652,9 @@ S: Supported - F: drivers/misc/vmw_balloon.c - - VMWARE HYPERVISOR INTERFACE --M: Ajay Kaher <akaher@vmware.com> --M: Alexey Makhalov <amakhalov@vmware.com> --R: VMware PV-Drivers Reviewers <pv-drivers@vmware.com> -+M: Ajay Kaher <ajay.kaher@broadcom.com> -+M: Alexey Makhalov <alexey.amakhalov@broadcom.com> -+R: Broadcom internal kernel review list <bcm-kernel-feedback-list@broadcom.com> - L: virtualization@lists.linux.dev - L: x86@kernel.org - S: Supported -@@ -23663,34 +23663,34 @@ F: arch/x86/include/asm/vmware.h - F: arch/x86/kernel/cpu/vmware.c - - VMWARE PVRDMA DRIVER --M: Bryan Tan <bryantan@vmware.com> --M: Vishnu Dasa <vdasa@vmware.com> --R: VMware PV-Drivers Reviewers <pv-drivers@vmware.com> -+M: Bryan Tan <bryan-bt.tan@broadcom.com> -+M: Vishnu Dasa <vishnu.dasa@broadcom.com> -+R: Broadcom internal kernel review list <bcm-kernel-feedback-list@broadcom.com> - L: linux-rdma@vger.kernel.org - S: Supported - F: drivers/infiniband/hw/vmw_pvrdma/ - - VMWARE PVSCSI DRIVER --M: Vishal Bhakta <vbhakta@vmware.com> --R: VMware PV-Drivers Reviewers <pv-drivers@vmware.com> -+M: Vishal Bhakta <vishal.bhakta@broadcom.com> -+R: Broadcom internal kernel review list <bcm-kernel-feedback-list@broadcom.com> - L: linux-scsi@vger.kernel.org - S: Supported - F: drivers/scsi/vmw_pvscsi.c - F: drivers/scsi/vmw_pvscsi.h - - VMWARE VIRTUAL PTP CLOCK DRIVER --M: Jeff Sipek <jsipek@vmware.com> --R: Ajay Kaher <akaher@vmware.com> --R: Alexey Makhalov <amakhalov@vmware.com> --R: VMware PV-Drivers Reviewers <pv-drivers@vmware.com> -+M: Nick Shi <nick.shi@broadcom.com> -+R: Ajay Kaher <ajay.kaher@broadcom.com> -+R: Alexey Makhalov <alexey.amakhalov@broadcom.com> -+R: Broadcom internal kernel review list <bcm-kernel-feedback-list@broadcom.com> - L: netdev@vger.kernel.org - S: Supported - F: drivers/ptp/ptp_vmw.c - - VMWARE VMCI DRIVER --M: Bryan Tan <bryantan@vmware.com> --M: Vishnu Dasa <vdasa@vmware.com> --R: VMware PV-Drivers Reviewers <pv-drivers@vmware.com> -+M: Bryan Tan <bryan-bt.tan@broadcom.com> -+M: Vishnu Dasa <vishnu.dasa@broadcom.com> -+R: Broadcom internal kernel review list <bcm-kernel-feedback-list@broadcom.com> - L: linux-kernel@vger.kernel.org - S: Supported - F: drivers/misc/vmw_vmci/ -@@ -23705,16 +23705,16 @@ F: drivers/input/mouse/vmmouse.c - F: drivers/input/mouse/vmmouse.h - - VMWARE VMXNET3 ETHERNET DRIVER --M: Ronak Doshi <doshir@vmware.com> --R: VMware PV-Drivers Reviewers <pv-drivers@vmware.com> -+M: Ronak Doshi <ronak.doshi@broadcom.com> -+R: Broadcom internal kernel review list <bcm-kernel-feedback-list@broadcom.com> - L: netdev@vger.kernel.org - S: Supported - F: drivers/net/vmxnet3/ - - VMWARE VSOCK VMCI TRANSPORT DRIVER --M: Bryan Tan <bryantan@vmware.com> --M: Vishnu Dasa <vdasa@vmware.com> --R: VMware PV-Drivers Reviewers <pv-drivers@vmware.com> -+M: Bryan Tan <bryan-bt.tan@broadcom.com> -+M: Vishnu Dasa <vishnu.dasa@broadcom.com> -+R: Broadcom internal kernel review list <bcm-kernel-feedback-list@broadcom.com> - L: linux-kernel@vger.kernel.org - S: Supported - F: net/vmw_vsock/vmci_transport* -_ diff --git a/patches/memory-tier-create-cpuless-memory-tiers-after-obtaining-hmat-info.patch b/patches/memory-tier-create-cpuless-memory-tiers-after-obtaining-hmat-info.patch new file mode 100644 index 000000000..e203796cd --- /dev/null +++ b/patches/memory-tier-create-cpuless-memory-tiers-after-obtaining-hmat-info.patch @@ -0,0 +1,256 @@ +From: "Ho-Ren (Jack) Chuang" <horenchuang@bytedance.com> +Subject: memory tier: create CPUless memory tiers after obtaining HMAT info +Date: Fri, 5 Apr 2024 00:07:06 +0000 + +The current implementation treats emulated memory devices, such as CXL1.1 +type3 memory, as normal DRAM when they are emulated as normal memory +(E820_TYPE_RAM). However, these emulated devices have different +characteristics than traditional DRAM, making it important to distinguish +them. Thus, we modify the tiered memory initialization process to +introduce a delay specifically for CPUless NUMA nodes. This delay ensures +that the memory tier initialization for these nodes is deferred until HMAT +information is obtained during the boot process. Finally, demotion tables +are recalculated at the end. + +* late_initcall(memory_tier_late_init); + Some device drivers may have initialized memory tiers between + `memory_tier_init()` and `memory_tier_late_init()`, potentially bringing + online memory nodes and configuring memory tiers. They should be + excluded in the late init. + +* Handle cases where there is no HMAT when creating memory tiers + There is a scenario where a CPUless node does not provide HMAT + information. If no HMAT is specified, it falls back to using the + default DRAM tier. + +* Introduce another new lock `default_dram_perf_lock` for adist + calculation In the current implementation, iterating through CPUlist + nodes requires holding the `memory_tier_lock`. However, + `mt_calc_adistance()` will end up trying to acquire the same lock, + leading to a potential deadlock. Therefore, we propose introducing a + standalone `default_dram_perf_lock` to protect `default_dram_perf_*`. + This approach not only avoids deadlock but also prevents holding a large + lock simultaneously. + +* Upgrade `set_node_memory_tier` to support additional cases, including + default DRAM, late CPUless, and hot-plugged initializations. To cover + hot-plugged memory nodes, `mt_calc_adistance()` and + `mt_find_alloc_memory_type()` are moved into `set_node_memory_tier()` to + handle cases where memtype is not initialized and where HMAT information + is available. + +* Introduce `default_memory_types` for those memory types that are not + initialized by device drivers. Because late initialized memory and + default DRAM memory need to be managed, a default memory type is created + for storing all memory types that are not initialized by device drivers + and as a fallback. + +Link: https://lkml.kernel.org/r/20240405000707.2670063-3-horenchuang@bytedance.com +Signed-off-by: Ho-Ren (Jack) Chuang <horenchuang@bytedance.com> +Signed-off-by: Hao Xiang <hao.xiang@bytedance.com> +Reviewed-by: "Huang, Ying" <ying.huang@intel.com> +Cc: Alistair Popple <apopple@nvidia.com> +Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> +Cc: Dan Williams <dan.j.williams@intel.com> +Cc: Dave Jiang <dave.jiang@intel.com> +Cc: Gregory Price <gourry.memverge@gmail.com> +Cc: Jonathan Cameron <Jonathan.Cameron@huawei.com> +Cc: Michal Hocko <mhocko@suse.com> +Cc: Ravi Jonnalagadda <ravis.opensrc@micron.com> +Cc: SeongJae Park <sj@kernel.org> +Cc: Tejun Heo <tj@kernel.org> +Cc: Vishal Verma <vishal.l.verma@intel.com> +Cc: Jonathan Cameron <Jonathan.Cameron@huawie.com> +Signed-off-by: Andrew Morton <akpm@linux-foundation.org> +--- + + mm/memory-tiers.c | 94 ++++++++++++++++++++++++++++++++------------ + 1 file changed, 70 insertions(+), 24 deletions(-) + +--- a/mm/memory-tiers.c~memory-tier-create-cpuless-memory-tiers-after-obtaining-hmat-info ++++ a/mm/memory-tiers.c +@@ -36,6 +36,11 @@ struct node_memory_type_map { + + static DEFINE_MUTEX(memory_tier_lock); + static LIST_HEAD(memory_tiers); ++/* ++ * The list is used to store all memory types that are not created ++ * by a device driver. ++ */ ++static LIST_HEAD(default_memory_types); + static struct node_memory_type_map node_memory_types[MAX_NUMNODES]; + struct memory_dev_type *default_dram_type; + +@@ -108,6 +113,8 @@ static struct demotion_nodes *node_demot + + static BLOCKING_NOTIFIER_HEAD(mt_adistance_algorithms); + ++/* The lock is used to protect `default_dram_perf*` info and nid. */ ++static DEFINE_MUTEX(default_dram_perf_lock); + static bool default_dram_perf_error; + static struct access_coordinate default_dram_perf; + static int default_dram_perf_ref_nid = NUMA_NO_NODE; +@@ -505,7 +512,8 @@ static inline void __init_node_memory_ty + static struct memory_tier *set_node_memory_tier(int node) + { + struct memory_tier *memtier; +- struct memory_dev_type *memtype; ++ struct memory_dev_type *memtype = default_dram_type; ++ int adist = MEMTIER_ADISTANCE_DRAM; + pg_data_t *pgdat = NODE_DATA(node); + + +@@ -514,7 +522,16 @@ static struct memory_tier *set_node_memo + if (!node_state(node, N_MEMORY)) + return ERR_PTR(-EINVAL); + +- __init_node_memory_type(node, default_dram_type); ++ mt_calc_adistance(node, &adist); ++ if (!node_memory_types[node].memtype) { ++ memtype = mt_find_alloc_memory_type(adist, &default_memory_types); ++ if (IS_ERR(memtype)) { ++ memtype = default_dram_type; ++ pr_info("Failed to allocate a memory type. Fall back.\n"); ++ } ++ } ++ ++ __init_node_memory_type(node, memtype); + + memtype = node_memory_types[node].memtype; + node_set(node, memtype->nodes); +@@ -652,6 +669,35 @@ void mt_put_memory_types(struct list_hea + } + EXPORT_SYMBOL_GPL(mt_put_memory_types); + ++/* ++ * This is invoked via `late_initcall()` to initialize memory tiers for ++ * CPU-less memory nodes after driver initialization, which is ++ * expected to provide `adistance` algorithms. ++ */ ++static int __init memory_tier_late_init(void) ++{ ++ int nid; ++ ++ guard(mutex)(&memory_tier_lock); ++ for_each_node_state(nid, N_MEMORY) { ++ /* ++ * Some device drivers may have initialized memory tiers ++ * between `memory_tier_init()` and `memory_tier_late_init()`, ++ * potentially bringing online memory nodes and ++ * configuring memory tiers. Exclude them here. ++ */ ++ if (node_memory_types[nid].memtype) ++ continue; ++ ++ set_node_memory_tier(nid); ++ } ++ ++ establish_demotion_targets(); ++ ++ return 0; ++} ++late_initcall(memory_tier_late_init); ++ + static void dump_hmem_attrs(struct access_coordinate *coord, const char *prefix) + { + pr_info( +@@ -663,25 +709,19 @@ static void dump_hmem_attrs(struct acces + int mt_set_default_dram_perf(int nid, struct access_coordinate *perf, + const char *source) + { +- int rc = 0; +- +- mutex_lock(&memory_tier_lock); +- if (default_dram_perf_error) { +- rc = -EIO; +- goto out; +- } ++ guard(mutex)(&default_dram_perf_lock); ++ if (default_dram_perf_error) ++ return -EIO; + + if (perf->read_latency + perf->write_latency == 0 || +- perf->read_bandwidth + perf->write_bandwidth == 0) { +- rc = -EINVAL; +- goto out; +- } ++ perf->read_bandwidth + perf->write_bandwidth == 0) ++ return -EINVAL; + + if (default_dram_perf_ref_nid == NUMA_NO_NODE) { + default_dram_perf = *perf; + default_dram_perf_ref_nid = nid; + default_dram_perf_ref_source = kstrdup(source, GFP_KERNEL); +- goto out; ++ return 0; + } + + /* +@@ -709,27 +749,25 @@ int mt_set_default_dram_perf(int nid, st + pr_info( + " disable default DRAM node performance based abstract distance algorithm.\n"); + default_dram_perf_error = true; +- rc = -EINVAL; ++ return -EINVAL; + } + +-out: +- mutex_unlock(&memory_tier_lock); +- return rc; ++ return 0; + } + + int mt_perf_to_adistance(struct access_coordinate *perf, int *adist) + { ++ guard(mutex)(&default_dram_perf_lock); + if (default_dram_perf_error) + return -EIO; + +- if (default_dram_perf_ref_nid == NUMA_NO_NODE) +- return -ENOENT; +- + if (perf->read_latency + perf->write_latency == 0 || + perf->read_bandwidth + perf->write_bandwidth == 0) + return -EINVAL; + +- mutex_lock(&memory_tier_lock); ++ if (default_dram_perf_ref_nid == NUMA_NO_NODE) ++ return -ENOENT; ++ + /* + * The abstract distance of a memory node is in direct proportion to + * its memory latency (read + write) and inversely proportional to its +@@ -742,7 +780,6 @@ int mt_perf_to_adistance(struct access_c + (default_dram_perf.read_latency + default_dram_perf.write_latency) * + (default_dram_perf.read_bandwidth + default_dram_perf.write_bandwidth) / + (perf->read_bandwidth + perf->write_bandwidth); +- mutex_unlock(&memory_tier_lock); + + return 0; + } +@@ -855,7 +892,8 @@ static int __init memory_tier_init(void) + * For now we can have 4 faster memory tiers with smaller adistance + * than default DRAM tier. + */ +- default_dram_type = alloc_memory_type(MEMTIER_ADISTANCE_DRAM); ++ default_dram_type = mt_find_alloc_memory_type(MEMTIER_ADISTANCE_DRAM, ++ &default_memory_types); + if (IS_ERR(default_dram_type)) + panic("%s() failed to allocate default DRAM tier\n", __func__); + +@@ -865,6 +903,14 @@ static int __init memory_tier_init(void) + * types assigned. + */ + for_each_node_state(node, N_MEMORY) { ++ if (!node_state(node, N_CPU)) ++ /* ++ * Defer memory tier initialization on ++ * CPUless numa nodes. These will be initialized ++ * after firmware and devices are initialized. ++ */ ++ continue; ++ + memtier = set_node_memory_tier(node); + if (IS_ERR(memtier)) + /* +_ diff --git a/patches/memory-tier-dax-kmem-introduce-an-abstract-layer-for-finding-allocating-and-putting-memory-types.patch b/patches/memory-tier-dax-kmem-introduce-an-abstract-layer-for-finding-allocating-and-putting-memory-types.patch new file mode 100644 index 000000000..304fb7d0b --- /dev/null +++ b/patches/memory-tier-dax-kmem-introduce-an-abstract-layer-for-finding-allocating-and-putting-memory-types.patch @@ -0,0 +1,171 @@ +From: "Ho-Ren (Jack) Chuang" <horenchuang@bytedance.com> +Subject: memory tier: dax/kmem: introduce an abstract layer for finding, allocating, and putting memory types +Date: Fri, 5 Apr 2024 00:07:05 +0000 + +Patch series "Improved Memory Tier Creation for CPUless NUMA Nodes", v11. + +When a memory device, such as CXL1.1 type3 memory, is emulated as normal +memory (E820_TYPE_RAM), the memory device is indistinguishable from normal +DRAM in terms of memory tiering with the current implementation. The +current memory tiering assigns all detected normal memory nodes to the +same DRAM tier. This results in normal memory devices with different +attributions being unable to be assigned to the correct memory tier, +leading to the inability to migrate pages between different types of +memory. +https://lore.kernel.org/linux-mm/PH0PR08MB7955E9F08CCB64F23963B5C3A860A@PH0PR08MB7955.namprd08.prod.outlook.com/T/ + +This patchset automatically resolves the issues. It delays the +initialization of memory tiers for CPUless NUMA nodes until they obtain +HMAT information and after all devices are initialized at boot time, +eliminating the need for user intervention. If no HMAT is specified, it +falls back to using `default_dram_type`. + +Example usecase: +We have CXL memory on the host, and we create VMs with a new system memory +device backed by host CXL memory. We inject CXL memory performance +attributes through QEMU, and the guest now sees memory nodes with +performance attributes in HMAT. With this change, we enable the guest +kernel to construct the correct memory tiering for the memory nodes. + + +This patch (of 2): + +Since different memory devices require finding, allocating, and putting +memory types, these common steps are abstracted in this patch, enhancing +the scalability and conciseness of the code. + +Link: https://lkml.kernel.org/r/20240405000707.2670063-1-horenchuang@bytedance.com +Link: https://lkml.kernel.org/r/20240405000707.2670063-2-horenchuang@bytedance.com +Signed-off-by: Ho-Ren (Jack) Chuang <horenchuang@bytedance.com> +Reviewed-by: "Huang, Ying" <ying.huang@intel.com> +Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawie.com> +Cc: Alistair Popple <apopple@nvidia.com> +Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> +Cc: Dan Williams <dan.j.williams@intel.com> +Cc: Dave Jiang <dave.jiang@intel.com> +Cc: Gregory Price <gourry.memverge@gmail.com> +Cc: Hao Xiang <hao.xiang@bytedance.com> +Cc: Jonathan Cameron <Jonathan.Cameron@huawei.com> +Cc: Michal Hocko <mhocko@suse.com> +Cc: Ravi Jonnalagadda <ravis.opensrc@micron.com> +Cc: SeongJae Park <sj@kernel.org> +Cc: Tejun Heo <tj@kernel.org> +Cc: Vishal Verma <vishal.l.verma@intel.com> +Signed-off-by: Andrew Morton <akpm@linux-foundation.org> +--- + + drivers/dax/kmem.c | 30 ++++-------------------------- + include/linux/memory-tiers.h | 13 +++++++++++++ + mm/memory-tiers.c | 29 +++++++++++++++++++++++++++++ + 3 files changed, 46 insertions(+), 26 deletions(-) + +--- a/drivers/dax/kmem.c~memory-tier-dax-kmem-introduce-an-abstract-layer-for-finding-allocating-and-putting-memory-types ++++ a/drivers/dax/kmem.c +@@ -55,36 +55,14 @@ static LIST_HEAD(kmem_memory_types); + + static struct memory_dev_type *kmem_find_alloc_memory_type(int adist) + { +- bool found = false; +- struct memory_dev_type *mtype; +- +- mutex_lock(&kmem_memory_type_lock); +- list_for_each_entry(mtype, &kmem_memory_types, list) { +- if (mtype->adistance == adist) { +- found = true; +- break; +- } +- } +- if (!found) { +- mtype = alloc_memory_type(adist); +- if (!IS_ERR(mtype)) +- list_add(&mtype->list, &kmem_memory_types); +- } +- mutex_unlock(&kmem_memory_type_lock); +- +- return mtype; ++ guard(mutex)(&kmem_memory_type_lock); ++ return mt_find_alloc_memory_type(adist, &kmem_memory_types); + } + + static void kmem_put_memory_types(void) + { +- struct memory_dev_type *mtype, *mtn; +- +- mutex_lock(&kmem_memory_type_lock); +- list_for_each_entry_safe(mtype, mtn, &kmem_memory_types, list) { +- list_del(&mtype->list); +- put_memory_type(mtype); +- } +- mutex_unlock(&kmem_memory_type_lock); ++ guard(mutex)(&kmem_memory_type_lock); ++ mt_put_memory_types(&kmem_memory_types); + } + + static int dev_dax_kmem_probe(struct dev_dax *dev_dax) +--- a/include/linux/memory-tiers.h~memory-tier-dax-kmem-introduce-an-abstract-layer-for-finding-allocating-and-putting-memory-types ++++ a/include/linux/memory-tiers.h +@@ -48,6 +48,9 @@ int mt_calc_adistance(int node, int *adi + int mt_set_default_dram_perf(int nid, struct access_coordinate *perf, + const char *source); + int mt_perf_to_adistance(struct access_coordinate *perf, int *adist); ++struct memory_dev_type *mt_find_alloc_memory_type(int adist, ++ struct list_head *memory_types); ++void mt_put_memory_types(struct list_head *memory_types); + #ifdef CONFIG_MIGRATION + int next_demotion_node(int node); + void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets); +@@ -136,5 +139,15 @@ static inline int mt_perf_to_adistance(s + { + return -EIO; + } ++ ++static inline struct memory_dev_type *mt_find_alloc_memory_type(int adist, ++ struct list_head *memory_types) ++{ ++ return NULL; ++} ++ ++static inline void mt_put_memory_types(struct list_head *memory_types) ++{ ++} + #endif /* CONFIG_NUMA */ + #endif /* _LINUX_MEMORY_TIERS_H */ +--- a/mm/memory-tiers.c~memory-tier-dax-kmem-introduce-an-abstract-layer-for-finding-allocating-and-putting-memory-types ++++ a/mm/memory-tiers.c +@@ -623,6 +623,35 @@ void clear_node_memory_type(int node, st + } + EXPORT_SYMBOL_GPL(clear_node_memory_type); + ++struct memory_dev_type *mt_find_alloc_memory_type(int adist, struct list_head *memory_types) ++{ ++ struct memory_dev_type *mtype; ++ ++ list_for_each_entry(mtype, memory_types, list) ++ if (mtype->adistance == adist) ++ return mtype; ++ ++ mtype = alloc_memory_type(adist); ++ if (IS_ERR(mtype)) ++ return mtype; ++ ++ list_add(&mtype->list, memory_types); ++ ++ return mtype; ++} ++EXPORT_SYMBOL_GPL(mt_find_alloc_memory_type); ++ ++void mt_put_memory_types(struct list_head *memory_types) ++{ ++ struct memory_dev_type *mtype, *mtn; ++ ++ list_for_each_entry_safe(mtype, mtn, memory_types, list) { ++ list_del(&mtype->list); ++ put_memory_type(mtype); ++ } ++} ++EXPORT_SYMBOL_GPL(mt_put_memory_types); ++ + static void dump_hmem_attrs(struct access_coordinate *coord, const char *prefix) + { + pr_info( +_ diff --git a/patches/mm-add-per-order-mthp-anon_alloc-and-anon_alloc_fallback-counters.patch b/patches/mm-add-per-order-mthp-anon_alloc-and-anon_alloc_fallback-counters.patch new file mode 100644 index 000000000..7ad2424fe --- /dev/null +++ b/patches/mm-add-per-order-mthp-anon_alloc-and-anon_alloc_fallback-counters.patch @@ -0,0 +1,172 @@ +From: Barry Song <v-songbaohua@oppo.com> +Subject: mm: add per-order mTHP anon_alloc and anon_alloc_fallback counters +Date: Fri, 5 Apr 2024 23:27:03 +1300 + +Patch series "mm: add per-order mTHP alloc and swpout counters", v4. + +The patchset introduces a framework to facilitate mTHP counters, starting +with the allocation and swap-out counters. Currently, only four new nodes +are appended to the stats directory for each mTHP size. + +/sys/kernel/mm/transparent_hugepage/hugepages-<size>/stats + anon_alloc + anon_alloc_fallback + anon_swpout + anon_swpout_fallback + +These four nodes are crucial for us to monitor the fragmentation levels of +both the buddy system and the swapfile. In the future, we may consider +adding additional nodes for further insights. + + +This patch (of 2): + +Profiling a system blindly with mTHP has become challenging due to the +lack of visibility into its operations. Presenting the success rate of +mTHP allocations appears to be pressing need. + +Recently, I've been experiencing significant difficulty debugging +performance improvements and regressions without these figures. It's +crucial for us to understand the true effectiveness of mTHP in real-world +scenarios, especially in systems with fragmented memory. + +This patch sets up the framework for per-order mTHP counters, starting +with the introduction of anon_alloc and anon_alloc_fallback counters. +Incorporating additional counters should now be straightforward as well. + +Link: https://lkml.kernel.org/r/20240405102704.77559-1-21cnbao@gmail.com +Link: https://lkml.kernel.org/r/20240405102704.77559-2-21cnbao@gmail.com +Signed-off-by: Barry Song <v-songbaohua@oppo.com> +Cc: Chris Li <chrisl@kernel.org> +Cc: David Hildenbrand <david@redhat.com> +Cc: Domenico Cerasuolo <cerasuolodomenico@gmail.com> +Cc: Kairui Song <kasong@tencent.com> +Cc: Matthew Wilcox (Oracle) <willy@infradead.org> +Cc: Peter Xu <peterx@redhat.com> +Cc: Ryan Roberts <ryan.roberts@arm.com> +Cc: Suren Baghdasaryan <surenb@google.com> +Cc: Yosry Ahmed <yosryahmed@google.com> +Cc: Yu Zhao <yuzhao@google.com> +Signed-off-by: Andrew Morton <akpm@linux-foundation.org> +--- + + include/linux/huge_mm.h | 19 +++++++++++++++ + mm/huge_memory.c | 48 ++++++++++++++++++++++++++++++++++++++ + mm/memory.c | 2 + + 3 files changed, 69 insertions(+) + +--- a/include/linux/huge_mm.h~mm-add-per-order-mthp-anon_alloc-and-anon_alloc_fallback-counters ++++ a/include/linux/huge_mm.h +@@ -264,6 +264,25 @@ unsigned long thp_vma_allowable_orders(s + enforce_sysfs, orders); + } + ++enum mthp_stat_item { ++ MTHP_STAT_ANON_ALLOC, ++ MTHP_STAT_ANON_ALLOC_FALLBACK, ++ __MTHP_STAT_COUNT ++}; ++ ++struct mthp_stat { ++ unsigned long stats[PMD_ORDER + 1][__MTHP_STAT_COUNT]; ++}; ++ ++DECLARE_PER_CPU(struct mthp_stat, mthp_stats); ++ ++static inline void count_mthp_stat(int order, enum mthp_stat_item item) ++{ ++ if (unlikely(order > PMD_ORDER)) ++ return; ++ this_cpu_inc(mthp_stats.stats[order][item]); ++} ++ + #define transparent_hugepage_use_zero_page() \ + (transparent_hugepage_flags & \ + (1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG)) +--- a/mm/huge_memory.c~mm-add-per-order-mthp-anon_alloc-and-anon_alloc_fallback-counters ++++ a/mm/huge_memory.c +@@ -526,6 +526,46 @@ static const struct kobj_type thpsize_kt + .sysfs_ops = &kobj_sysfs_ops, + }; + ++DEFINE_PER_CPU(struct mthp_stat, mthp_stats) = {{{0}}}; ++ ++static unsigned long sum_mthp_stat(int order, enum mthp_stat_item item) ++{ ++ unsigned long sum = 0; ++ int cpu; ++ ++ for_each_online_cpu(cpu) { ++ struct mthp_stat *this = &per_cpu(mthp_stats, cpu); ++ ++ sum += this->stats[order][item]; ++ } ++ ++ return sum; ++} ++ ++#define DEFINE_MTHP_STAT_ATTR(_name, _index) \ ++static ssize_t _name##_show(struct kobject *kobj, \ ++ struct kobj_attribute *attr, char *buf) \ ++{ \ ++ int order = to_thpsize(kobj)->order; \ ++ \ ++ return sysfs_emit(buf, "%lu\n", sum_mthp_stat(order, _index)); \ ++} \ ++static struct kobj_attribute _name##_attr = __ATTR_RO(_name) ++ ++DEFINE_MTHP_STAT_ATTR(anon_alloc, MTHP_STAT_ANON_ALLOC); ++DEFINE_MTHP_STAT_ATTR(anon_alloc_fallback, MTHP_STAT_ANON_ALLOC_FALLBACK); ++ ++static struct attribute *stats_attrs[] = { ++ &anon_alloc_attr.attr, ++ &anon_alloc_fallback_attr.attr, ++ NULL, ++}; ++ ++static struct attribute_group stats_attr_group = { ++ .name = "stats", ++ .attrs = stats_attrs, ++}; ++ + static struct thpsize *thpsize_create(int order, struct kobject *parent) + { + unsigned long size = (PAGE_SIZE << order) / SZ_1K; +@@ -549,6 +589,12 @@ static struct thpsize *thpsize_create(in + return ERR_PTR(ret); + } + ++ ret = sysfs_create_group(&thpsize->kobj, &stats_attr_group); ++ if (ret) { ++ kobject_put(&thpsize->kobj); ++ return ERR_PTR(ret); ++ } ++ + thpsize->order = order; + return thpsize; + } +@@ -1050,8 +1096,10 @@ vm_fault_t do_huge_pmd_anonymous_page(st + folio = vma_alloc_folio(gfp, HPAGE_PMD_ORDER, vma, haddr, true); + if (unlikely(!folio)) { + count_vm_event(THP_FAULT_FALLBACK); ++ count_mthp_stat(HPAGE_PMD_ORDER, MTHP_STAT_ANON_ALLOC_FALLBACK); + return VM_FAULT_FALLBACK; + } ++ count_mthp_stat(HPAGE_PMD_ORDER, MTHP_STAT_ANON_ALLOC); + return __do_huge_pmd_anonymous_page(vmf, &folio->page, gfp); + } + +--- a/mm/memory.c~mm-add-per-order-mthp-anon_alloc-and-anon_alloc_fallback-counters ++++ a/mm/memory.c +@@ -4374,8 +4374,10 @@ static struct folio *alloc_anon_folio(st + } + folio_throttle_swaprate(folio, gfp); + clear_huge_page(&folio->page, vmf->address, 1 << order); ++ count_mthp_stat(order, MTHP_STAT_ANON_ALLOC); + return folio; + } ++ count_mthp_stat(order, MTHP_STAT_ANON_ALLOC_FALLBACK); + next: + order = next_order(&orders, order); + } +_ diff --git a/patches/mm-add-per-order-mthp-anon_swpout-and-anon_swpout_fallback-counters.patch b/patches/mm-add-per-order-mthp-anon_swpout-and-anon_swpout_fallback-counters.patch new file mode 100644 index 000000000..85dbca063 --- /dev/null +++ b/patches/mm-add-per-order-mthp-anon_swpout-and-anon_swpout_fallback-counters.patch @@ -0,0 +1,93 @@ +From: Barry Song <v-songbaohua@oppo.com> +Subject: mm: add per-order mTHP anon_swpout and anon_swpout_fallback counters +Date: Fri, 5 Apr 2024 23:27:04 +1300 + +This helps to display the fragmentation situation of the swapfile, knowing +the proportion of how much we haven't split large folios. So far, we only +support non-split swapout for anon memory, with the possibility of +expanding to shmem in the future. So, we add the "anon" prefix to the +counter names. + +Link: https://lkml.kernel.org/r/20240405102704.77559-3-21cnbao@gmail.com +Signed-off-by: Barry Song <v-songbaohua@oppo.com> +Cc: Chris Li <chrisl@kernel.org> +Cc: David Hildenbrand <david@redhat.com> +Cc: Domenico Cerasuolo <cerasuolodomenico@gmail.com> +Cc: Kairui Song <kasong@tencent.com> +Cc: Matthew Wilcox (Oracle) <willy@infradead.org> +Cc: Peter Xu <peterx@redhat.com> +Cc: Ryan Roberts <ryan.roberts@arm.com> +Cc: Suren Baghdasaryan <surenb@google.com> +Cc: Yosry Ahmed <yosryahmed@google.com> +Cc: Yu Zhao <yuzhao@google.com> +Signed-off-by: Andrew Morton <akpm@linux-foundation.org> +--- + + include/linux/huge_mm.h | 2 ++ + mm/huge_memory.c | 4 ++++ + mm/page_io.c | 6 +++++- + mm/vmscan.c | 3 +++ + 4 files changed, 14 insertions(+), 1 deletion(-) + +--- a/include/linux/huge_mm.h~mm-add-per-order-mthp-anon_swpout-and-anon_swpout_fallback-counters ++++ a/include/linux/huge_mm.h +@@ -267,6 +267,8 @@ unsigned long thp_vma_allowable_orders(s + enum mthp_stat_item { + MTHP_STAT_ANON_ALLOC, + MTHP_STAT_ANON_ALLOC_FALLBACK, ++ MTHP_STAT_ANON_SWPOUT, ++ MTHP_STAT_ANON_SWPOUT_FALLBACK, + __MTHP_STAT_COUNT + }; + +--- a/mm/huge_memory.c~mm-add-per-order-mthp-anon_swpout-and-anon_swpout_fallback-counters ++++ a/mm/huge_memory.c +@@ -554,10 +554,14 @@ static struct kobj_attribute _name##_att + + DEFINE_MTHP_STAT_ATTR(anon_alloc, MTHP_STAT_ANON_ALLOC); + DEFINE_MTHP_STAT_ATTR(anon_alloc_fallback, MTHP_STAT_ANON_ALLOC_FALLBACK); ++DEFINE_MTHP_STAT_ATTR(anon_swpout, MTHP_STAT_ANON_SWPOUT); ++DEFINE_MTHP_STAT_ATTR(anon_swpout_fallback, MTHP_STAT_ANON_SWPOUT_FALLBACK); + + static struct attribute *stats_attrs[] = { + &anon_alloc_attr.attr, + &anon_alloc_fallback_attr.attr, ++ &anon_swpout_attr.attr, ++ &anon_swpout_fallback_attr.attr, + NULL, + }; + +--- a/mm/page_io.c~mm-add-per-order-mthp-anon_swpout-and-anon_swpout_fallback-counters ++++ a/mm/page_io.c +@@ -212,13 +212,17 @@ int swap_writepage(struct page *page, st + + static inline void count_swpout_vm_event(struct folio *folio) + { ++ long nr_pages = folio_nr_pages(folio); ++ + #ifdef CONFIG_TRANSPARENT_HUGEPAGE + if (unlikely(folio_test_pmd_mappable(folio))) { + count_memcg_folio_events(folio, THP_SWPOUT, 1); + count_vm_event(THP_SWPOUT); + } ++ if (nr_pages > 0) ++ count_mthp_stat(folio_order(folio), MTHP_STAT_ANON_SWPOUT); + #endif +- count_vm_events(PSWPOUT, folio_nr_pages(folio)); ++ count_vm_events(PSWPOUT, nr_pages); + } + + #if defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP) +--- a/mm/vmscan.c~mm-add-per-order-mthp-anon_swpout-and-anon_swpout_fallback-counters ++++ a/mm/vmscan.c +@@ -1230,6 +1230,9 @@ retry: + count_vm_event( + THP_SWPOUT_FALLBACK); + } ++ if (nr_pages > 0) ++ count_mthp_stat(get_order(nr_pages * PAGE_SIZE), ++ MTHP_STAT_ANON_SWPOUT_FALLBACK); + #endif + if (!add_to_swap(folio)) + goto activate_locked_split; +_ diff --git a/patches/mm-combine-__folio_put_small-__folio_put_large-and-__folio_put.patch b/patches/mm-combine-__folio_put_small-__folio_put_large-and-__folio_put.patch new file mode 100644 index 000000000..2a5c813d6 --- /dev/null +++ b/patches/mm-combine-__folio_put_small-__folio_put_large-and-__folio_put.patch @@ -0,0 +1,68 @@ +From: "Matthew Wilcox (Oracle)" <willy@infradead.org> +Subject: mm: combine __folio_put_small, __folio_put_large and __folio_put +Date: Fri, 5 Apr 2024 16:32:26 +0100 + +It's now obvious that __folio_put_small() and __folio_put_large() do +almost exactly the same thing. Inline them both into __folio_put(). + +Link: https://lkml.kernel.org/r/20240405153228.2563754-5-willy@infradead.org +Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> +Reviewed-by: Zi Yan <ziy@nvidia.com> +Signed-off-by: Andrew Morton <akpm@linux-foundation.org> +--- + + mm/swap.c | 32 ++++++-------------------------- + 1 file changed, 6 insertions(+), 26 deletions(-) + +--- a/mm/swap.c~mm-combine-__folio_put_small-__folio_put_large-and-__folio_put ++++ a/mm/swap.c +@@ -112,42 +112,22 @@ static void page_cache_release(struct fo + unlock_page_lruvec_irqrestore(lruvec, flags); + } + +-static void __folio_put_small(struct folio *folio) +-{ +- page_cache_release(folio); +- mem_cgroup_uncharge(folio); +- free_unref_page(&folio->page, 0); +-} +- +-static void __folio_put_large(struct folio *folio) ++void __folio_put(struct folio *folio) + { +- /* +- * __page_cache_release() is supposed to be called for thp, not for +- * hugetlb. This is because hugetlb page does never have PageLRU set +- * (it's never listed to any LRU lists) and no memcg routines should +- * be called for hugetlb (it has a separate hugetlb_cgroup.) +- */ +- if (folio_test_hugetlb(folio)) { ++ if (unlikely(folio_is_zone_device(folio))) { ++ free_zone_device_page(&folio->page); ++ return; ++ } else if (folio_test_hugetlb(folio)) { + free_huge_folio(folio); + return; + } + + page_cache_release(folio); +- if (folio_test_large_rmappable(folio)) ++ if (folio_test_large(folio) && folio_test_large_rmappable(folio)) + folio_undo_large_rmappable(folio); + mem_cgroup_uncharge(folio); + free_unref_page(&folio->page, folio_order(folio)); + } +- +-void __folio_put(struct folio *folio) +-{ +- if (unlikely(folio_is_zone_device(folio))) +- free_zone_device_page(&folio->page); +- else if (unlikely(folio_test_large(folio))) +- __folio_put_large(folio); +- else +- __folio_put_small(folio); +-} + EXPORT_SYMBOL(__folio_put); + + /** +_ diff --git a/patches/mm-combine-free_the_page-and-free_unref_page.patch b/patches/mm-combine-free_the_page-and-free_unref_page.patch new file mode 100644 index 000000000..334ce13cc --- /dev/null +++ b/patches/mm-combine-free_the_page-and-free_unref_page.patch @@ -0,0 +1,96 @@ +From: "Matthew Wilcox (Oracle)" <willy@infradead.org> +Subject: mm: combine free_the_page() and free_unref_page() +Date: Fri, 5 Apr 2024 16:32:24 +0100 + +The pcp_allowed_order() check in free_the_page() was only being skipped by +__folio_put_small() which is about to be rearranged. + +Link: https://lkml.kernel.org/r/20240405153228.2563754-3-willy@infradead.org +Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> +Reviewed-by: Zi Yan <ziy@nvidia.com> +Signed-off-by: Andrew Morton <akpm@linux-foundation.org> +--- + + mm/page_alloc.c | 25 +++++++++++-------------- + 1 file changed, 11 insertions(+), 14 deletions(-) + +--- a/mm/page_alloc.c~mm-combine-free_the_page-and-free_unref_page ++++ a/mm/page_alloc.c +@@ -541,14 +541,6 @@ static inline bool pcp_allowed_order(uns + return false; + } + +-static inline void free_the_page(struct page *page, unsigned int order) +-{ +- if (pcp_allowed_order(order)) /* Via pcp? */ +- free_unref_page(page, order); +- else +- __free_pages_ok(page, order, FPI_NONE); +-} +- + /* + * Higher-order pages are called "compound pages". They are structured thusly: + * +@@ -584,7 +576,7 @@ void destroy_large_folio(struct folio *f + folio_undo_large_rmappable(folio); + + mem_cgroup_uncharge(folio); +- free_the_page(&folio->page, folio_order(folio)); ++ free_unref_page(&folio->page, folio_order(folio)); + } + + static inline void set_buddy_order(struct page *page, unsigned int order) +@@ -2578,6 +2570,11 @@ void free_unref_page(struct page *page, + unsigned long pfn = page_to_pfn(page); + int migratetype; + ++ if (!pcp_allowed_order(order)) { ++ __free_pages_ok(page, order, FPI_NONE); ++ return; ++ } ++ + if (!free_pages_prepare(page, order)) + return; + +@@ -4760,11 +4757,11 @@ void __free_pages(struct page *page, uns + struct alloc_tag *tag = pgalloc_tag_get(page); + + if (put_page_testzero(page)) +- free_the_page(page, order); ++ free_unref_page(page, order); + else if (!head) { + pgalloc_tag_sub_pages(tag, (1 << order) - 1); + while (order-- > 0) +- free_the_page(page + (1 << order), order); ++ free_unref_page(page + (1 << order), order); + } + } + EXPORT_SYMBOL(__free_pages); +@@ -4826,7 +4823,7 @@ void __page_frag_cache_drain(struct page + VM_BUG_ON_PAGE(page_ref_count(page) == 0, page); + + if (page_ref_sub_and_test(page, count)) +- free_the_page(page, compound_order(page)); ++ free_unref_page(page, compound_order(page)); + } + EXPORT_SYMBOL(__page_frag_cache_drain); + +@@ -4867,7 +4864,7 @@ refill: + goto refill; + + if (unlikely(nc->pfmemalloc)) { +- free_the_page(page, compound_order(page)); ++ free_unref_page(page, compound_order(page)); + goto refill; + } + +@@ -4911,7 +4908,7 @@ void page_frag_free(void *addr) + struct page *page = virt_to_head_page(addr); + + if (unlikely(put_page_testzero(page))) +- free_the_page(page, compound_order(page)); ++ free_unref_page(page, compound_order(page)); + } + EXPORT_SYMBOL(page_frag_free); + +_ diff --git a/patches/mm-convert-free_zone_device_page-to-free_zone_device_folio.patch b/patches/mm-convert-free_zone_device_page-to-free_zone_device_folio.patch new file mode 100644 index 000000000..0885f9b09 --- /dev/null +++ b/patches/mm-convert-free_zone_device_page-to-free_zone_device_folio.patch @@ -0,0 +1,111 @@ +From: "Matthew Wilcox (Oracle)" <willy@infradead.org> +Subject: mm: convert free_zone_device_page to free_zone_device_folio +Date: Fri, 5 Apr 2024 16:32:27 +0100 + +Both callers already have a folio; pass it in and save a few calls to +compound_head(). + +Link: https://lkml.kernel.org/r/20240405153228.2563754-6-willy@infradead.org +Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> +Reviewed-by: Zi Yan <ziy@nvidia.com> +Signed-off-by: Andrew Morton <akpm@linux-foundation.org> +--- + + mm/internal.h | 2 +- + mm/memremap.c | 30 ++++++++++++++++-------------- + mm/swap.c | 4 ++-- + 3 files changed, 19 insertions(+), 17 deletions(-) + +--- a/mm/internal.h~mm-convert-free_zone_device_page-to-free_zone_device_folio ++++ a/mm/internal.h +@@ -1149,7 +1149,7 @@ void __vunmap_range_noflush(unsigned lon + int numa_migrate_prep(struct folio *folio, struct vm_fault *vmf, + unsigned long addr, int page_nid, int *flags); + +-void free_zone_device_page(struct page *page); ++void free_zone_device_folio(struct folio *folio); + int migrate_device_coherent_page(struct page *page); + + /* +--- a/mm/memremap.c~mm-convert-free_zone_device_page-to-free_zone_device_folio ++++ a/mm/memremap.c +@@ -456,21 +456,23 @@ struct dev_pagemap *get_dev_pagemap(unsi + } + EXPORT_SYMBOL_GPL(get_dev_pagemap); + +-void free_zone_device_page(struct page *page) ++void free_zone_device_folio(struct folio *folio) + { +- if (WARN_ON_ONCE(!page->pgmap->ops || !page->pgmap->ops->page_free)) ++ if (WARN_ON_ONCE(!folio->page.pgmap->ops || ++ !folio->page.pgmap->ops->page_free)) + return; + +- mem_cgroup_uncharge(page_folio(page)); ++ mem_cgroup_uncharge(folio); + + /* + * Note: we don't expect anonymous compound pages yet. Once supported + * and we could PTE-map them similar to THP, we'd have to clear + * PG_anon_exclusive on all tail pages. + */ +- VM_BUG_ON_PAGE(PageAnon(page) && PageCompound(page), page); +- if (PageAnon(page)) +- __ClearPageAnonExclusive(page); ++ if (folio_test_anon(folio)) { ++ VM_BUG_ON_FOLIO(folio_test_large(folio), folio); ++ __ClearPageAnonExclusive(folio_page(folio, 0)); ++ } + + /* + * When a device managed page is freed, the folio->mapping field +@@ -481,20 +483,20 @@ void free_zone_device_page(struct page * + * + * For other types of ZONE_DEVICE pages, migration is either + * handled differently or not done at all, so there is no need +- * to clear page->mapping. ++ * to clear folio->mapping. + */ +- page->mapping = NULL; +- page->pgmap->ops->page_free(page); ++ folio->mapping = NULL; ++ folio->page.pgmap->ops->page_free(folio_page(folio, 0)); + +- if (page->pgmap->type != MEMORY_DEVICE_PRIVATE && +- page->pgmap->type != MEMORY_DEVICE_COHERENT) ++ if (folio->page.pgmap->type != MEMORY_DEVICE_PRIVATE && ++ folio->page.pgmap->type != MEMORY_DEVICE_COHERENT) + /* +- * Reset the page count to 1 to prepare for handing out the page ++ * Reset the refcount to 1 to prepare for handing out the page + * again. + */ +- set_page_count(page, 1); ++ folio_set_count(folio, 1); + else +- put_dev_pagemap(page->pgmap); ++ put_dev_pagemap(folio->page.pgmap); + } + + void zone_device_page_init(struct page *page) +--- a/mm/swap.c~mm-convert-free_zone_device_page-to-free_zone_device_folio ++++ a/mm/swap.c +@@ -115,7 +115,7 @@ static void page_cache_release(struct fo + void __folio_put(struct folio *folio) + { + if (unlikely(folio_is_zone_device(folio))) { +- free_zone_device_page(&folio->page); ++ free_zone_device_folio(folio); + return; + } else if (folio_test_hugetlb(folio)) { + free_huge_folio(folio); +@@ -984,7 +984,7 @@ void folios_put_refs(struct folio_batch + if (put_devmap_managed_page_refs(&folio->page, nr_refs)) + continue; + if (folio_ref_sub_and_test(folio, nr_refs)) +- free_zone_device_page(&folio->page); ++ free_zone_device_folio(folio); + continue; + } + +_ diff --git a/patches/mm-convert-pagecache_isize_extended-to-use-a-folio.patch b/patches/mm-convert-pagecache_isize_extended-to-use-a-folio.patch new file mode 100644 index 000000000..9e3e0bebc --- /dev/null +++ b/patches/mm-convert-pagecache_isize_extended-to-use-a-folio.patch @@ -0,0 +1,84 @@ +From: "Matthew Wilcox (Oracle)" <willy@infradead.org> +Subject: mm: convert pagecache_isize_extended to use a folio +Date: Fri, 5 Apr 2024 19:00:36 +0100 + +Remove four hidden calls to compound_head(). Also exit early if the +filesystem block size is >= PAGE_SIZE instead of just equal to PAGE_SIZE. + +Link: https://lkml.kernel.org/r/20240405180038.2618624-1-willy@infradead.org +Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> +Reviewed-by: Pankaj Raghav <p.raghav@samsung.com> +Signed-off-by: Andrew Morton <akpm@linux-foundation.org> +--- + + mm/truncate.c | 36 +++++++++++++++++------------------- + 1 file changed, 17 insertions(+), 19 deletions(-) + +--- a/mm/truncate.c~mm-convert-pagecache_isize_extended-to-use-a-folio ++++ a/mm/truncate.c +@@ -764,15 +764,15 @@ EXPORT_SYMBOL(truncate_setsize); + * @from: original inode size + * @to: new inode size + * +- * Handle extension of inode size either caused by extending truncate or by +- * write starting after current i_size. We mark the page straddling current +- * i_size RO so that page_mkwrite() is called on the nearest write access to +- * the page. This way filesystem can be sure that page_mkwrite() is called on +- * the page before user writes to the page via mmap after the i_size has been +- * changed. ++ * Handle extension of inode size either caused by extending truncate or ++ * by write starting after current i_size. We mark the page straddling ++ * current i_size RO so that page_mkwrite() is called on the first ++ * write access to the page. The filesystem will update its per-block ++ * information before user writes to the page via mmap after the i_size ++ * has been changed. + * + * The function must be called after i_size is updated so that page fault +- * coming after we unlock the page will already see the new i_size. ++ * coming after we unlock the folio will already see the new i_size. + * The function must be called while we still hold i_rwsem - this not only + * makes sure i_size is stable but also that userspace cannot observe new + * i_size value before we are prepared to store mmap writes at new inode size. +@@ -781,31 +781,29 @@ void pagecache_isize_extended(struct ino + { + int bsize = i_blocksize(inode); + loff_t rounded_from; +- struct page *page; +- pgoff_t index; ++ struct folio *folio; + + WARN_ON(to > inode->i_size); + +- if (from >= to || bsize == PAGE_SIZE) ++ if (from >= to || bsize >= PAGE_SIZE) + return; + /* Page straddling @from will not have any hole block created? */ + rounded_from = round_up(from, bsize); + if (to <= rounded_from || !(rounded_from & (PAGE_SIZE - 1))) + return; + +- index = from >> PAGE_SHIFT; +- page = find_lock_page(inode->i_mapping, index); +- /* Page not cached? Nothing to do */ +- if (!page) ++ folio = filemap_lock_folio(inode->i_mapping, from / PAGE_SIZE); ++ /* Folio not cached? Nothing to do */ ++ if (IS_ERR(folio)) + return; + /* +- * See clear_page_dirty_for_io() for details why set_page_dirty() ++ * See folio_clear_dirty_for_io() for details why folio_mark_dirty() + * is needed. + */ +- if (page_mkclean(page)) +- set_page_dirty(page); +- unlock_page(page); +- put_page(page); ++ if (folio_mkclean(folio)) ++ folio_mark_dirty(folio); ++ folio_unlock(folio); ++ folio_put(folio); + } + EXPORT_SYMBOL(pagecache_isize_extended); + +_ diff --git a/patches/mm-free-non-hugetlb-large-folios-in-a-batch.patch b/patches/mm-free-non-hugetlb-large-folios-in-a-batch.patch new file mode 100644 index 000000000..4c2d9d591 --- /dev/null +++ b/patches/mm-free-non-hugetlb-large-folios-in-a-batch.patch @@ -0,0 +1,47 @@ +From: "Matthew Wilcox (Oracle)" <willy@infradead.org> +Subject: mm: free non-hugetlb large folios in a batch +Date: Fri, 5 Apr 2024 16:32:23 +0100 + +Patch series "Clean up __folio_put()". + +With all the changes over the last few years, __folio_put_small and +__folio_put_large have become almost identical to each other ... except +you can't tell because they're spread over two files. Rearrange it all so +that you can tell, and then inline them both into __folio_put(). + + +This patch (of 5): + +free_unref_folios() can now handle non-hugetlb large folios, so keep +normal large folios in the batch. hugetlb folios still need to be handled +specially. I believe that folios freed using put_pages_list() cannot be +accounted to a memcg (or the small folios would trip the "page still +charged to cgroup" warning), but put an assertion in to check that. + +Link: https://lkml.kernel.org/r/20240405153228.2563754-1-willy@infradead.org +Link: https://lkml.kernel.org/r/20240405153228.2563754-2-willy@infradead.org +Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> +Reviewed-by: Zi Yan <ziy@nvidia.com> +Signed-off-by: Andrew Morton <akpm@linux-foundation.org> +--- + + mm/swap.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/mm/swap.c~mm-free-non-hugetlb-large-folios-in-a-batch ++++ a/mm/swap.c +@@ -158,10 +158,11 @@ void put_pages_list(struct list_head *pa + list_for_each_entry_safe(folio, next, pages, lru) { + if (!folio_put_testzero(folio)) + continue; +- if (folio_test_large(folio)) { +- __folio_put_large(folio); ++ if (folio_test_hugetlb(folio)) { ++ free_huge_folio(folio); + continue; + } ++ VM_BUG_ON_FOLIO(folio_memcg(folio), folio); + /* LRU flag must be clear because it's passed using the lru */ + if (folio_batch_add(&fbatch, folio) > 0) + continue; +_ diff --git a/patches/mm-inline-destroy_large_folio-into-__folio_put_large.patch b/patches/mm-inline-destroy_large_folio-into-__folio_put_large.patch new file mode 100644 index 000000000..0b2a39a46 --- /dev/null +++ b/patches/mm-inline-destroy_large_folio-into-__folio_put_large.patch @@ -0,0 +1,74 @@ +From: "Matthew Wilcox (Oracle)" <willy@infradead.org> +Subject: mm: inline destroy_large_folio() into __folio_put_large() +Date: Fri, 5 Apr 2024 16:32:25 +0100 + +destroy_large_folio() has only one caller, move its contents there. + +Link: https://lkml.kernel.org/r/20240405153228.2563754-4-willy@infradead.org +Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> +Reviewed-by: Zi Yan <ziy@nvidia.com> +Signed-off-by: Andrew Morton <akpm@linux-foundation.org> +--- + + include/linux/mm.h | 2 -- + mm/page_alloc.c | 14 -------------- + mm/swap.c | 13 ++++++++++--- + 3 files changed, 10 insertions(+), 19 deletions(-) + +--- a/include/linux/mm.h~mm-inline-destroy_large_folio-into-__folio_put_large ++++ a/include/linux/mm.h +@@ -1318,8 +1318,6 @@ void folio_copy(struct folio *dst, struc + + unsigned long nr_free_buffer_pages(void); + +-void destroy_large_folio(struct folio *folio); +- + /* Returns the number of bytes in this potentially compound page. */ + static inline unsigned long page_size(struct page *page) + { +--- a/mm/page_alloc.c~mm-inline-destroy_large_folio-into-__folio_put_large ++++ a/mm/page_alloc.c +@@ -565,20 +565,6 @@ void prep_compound_page(struct page *pag + prep_compound_head(page, order); + } + +-void destroy_large_folio(struct folio *folio) +-{ +- if (folio_test_hugetlb(folio)) { +- free_huge_folio(folio); +- return; +- } +- +- if (folio_test_large_rmappable(folio)) +- folio_undo_large_rmappable(folio); +- +- mem_cgroup_uncharge(folio); +- free_unref_page(&folio->page, folio_order(folio)); +-} +- + static inline void set_buddy_order(struct page *page, unsigned int order) + { + set_page_private(page, order); +--- a/mm/swap.c~mm-inline-destroy_large_folio-into-__folio_put_large ++++ a/mm/swap.c +@@ -127,9 +127,16 @@ static void __folio_put_large(struct fol + * (it's never listed to any LRU lists) and no memcg routines should + * be called for hugetlb (it has a separate hugetlb_cgroup.) + */ +- if (!folio_test_hugetlb(folio)) +- page_cache_release(folio); +- destroy_large_folio(folio); ++ if (folio_test_hugetlb(folio)) { ++ free_huge_folio(folio); ++ return; ++ } ++ ++ page_cache_release(folio); ++ if (folio_test_large_rmappable(folio)) ++ folio_undo_large_rmappable(folio); ++ mem_cgroup_uncharge(folio); ++ free_unref_page(&folio->page, folio_order(folio)); + } + + void __folio_put(struct folio *folio) +_ diff --git a/patches/mm-page_alloc-fix-freelist-movement-during-block-conversion-fix.patch b/patches/mm-page_alloc-fix-freelist-movement-during-block-conversion-fix.patch new file mode 100644 index 000000000..73e121fcf --- /dev/null +++ b/patches/mm-page_alloc-fix-freelist-movement-during-block-conversion-fix.patch @@ -0,0 +1,54 @@ +From: Baolin Wang <baolin.wang@linux.alibaba.com> +Subject: mm-page_alloc-fix-freelist-movement-during-block-conversion-fix +Date: Fri, 5 Apr 2024 20:11:47 +0800 + +fix allocation failures with CONFIG_CMA + +The original code logic was that if the 'migratetype' type allocation is +failed, it would first try CMA page allocation and then attempt to +fallback to other migratetype allocations. Now it has been changed so +that if CMA allocation fails, it will directly return. This change has +caused a regression when I running the thpcompact benchmark, resulting in +a significant reduction in the percentage of THPs like below: + +thpcompact Percentage Faults Huge + K6.9-rc2 K6.9-rc2 + this patch +Percentage huge-1 78.18 ( 0.00%) 42.49 ( -45.65%) +Percentage huge-3 86.70 ( 0.00%) 35.13 ( -59.49%) +Percentage huge-5 90.26 ( 0.00%) 52.35 ( -42.00%) +Percentage huge-7 92.34 ( 0.00%) 31.84 ( -65.52%) +Percentage huge-12 91.18 ( 0.00%) 45.85 ( -49.72%) +Percentage huge-18 89.00 ( 0.00%) 29.18 ( -67.22%) +Percentage huge-24 90.52 ( 0.00%) 46.68 ( -48.43%) +Percentage huge-30 94.44 ( 0.00%) 38.35 ( -59.39%) +Percentage huge-32 93.09 ( 0.00%) 39.37 ( -57.70%) + +After making the following modifications, the regression is gone. + +Link: https://lkml.kernel.org/r/a97697e0-45b0-4f71-b087-fdc7a1d43c0e@linux.alibaba.com +Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com> +Cc: David Hildenbrand <david@redhat.com> +Cc: "Huang, Ying" <ying.huang@intel.com> +Cc: Johannes Weiner <hannes@cmpxchg.org> +Cc: Mel Gorman <mgorman@techsingularity.net> +Cc: Vlastimil Babka <vbabka@suse.cz> +Cc: Zi Yan <ziy@nvidia.com> +Signed-off-by: Andrew Morton <akpm@linux-foundation.org> +--- + + mm/page_alloc.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/mm/page_alloc.c~mm-page_alloc-fix-freelist-movement-during-block-conversion-fix ++++ a/mm/page_alloc.c +@@ -2139,7 +2139,8 @@ __rmqueue(struct zone *zone, unsigned in + if (unlikely(!page)) { + if (alloc_flags & ALLOC_CMA) + page = __rmqueue_cma_fallback(zone, order); +- else ++ ++ if (!page) + page = __rmqueue_fallback(zone, order, migratetype, + alloc_flags); + } +_ diff --git a/patches/mm-secretmem-fix-gup-fast-succeeding-on-secretmem-folios.patch b/patches/mm-secretmem-fix-gup-fast-succeeding-on-secretmem-folios.patch deleted file mode 100644 index 8a5be764b..000000000 --- a/patches/mm-secretmem-fix-gup-fast-succeeding-on-secretmem-folios.patch +++ /dev/null @@ -1,56 +0,0 @@ -From: David Hildenbrand <david@redhat.com> -Subject: mm/secretmem: fix GUP-fast succeeding on secretmem folios -Date: Tue, 26 Mar 2024 15:32:08 +0100 - -folio_is_secretmem() currently relies on secretmem folios being LRU -folios, to save some cycles. - -However, folios might reside in a folio batch without the LRU flag set, or -temporarily have their LRU flag cleared. Consequently, the LRU flag is -unreliable for this purpose. - -In particular, this is the case when secretmem_fault() allocates a fresh -page and calls filemap_add_folio()->folio_add_lru(). The folio might be -added to the per-cpu folio batch and won't get the LRU flag set until the -batch was drained using e.g., lru_add_drain(). - -Consequently, folio_is_secretmem() might not detect secretmem folios and -GUP-fast can succeed in grabbing a secretmem folio, crashing the kernel -when we would later try reading/writing to the folio, because the folio -has been unmapped from the directmap. - -Fix it by removing that unreliable check. - -Link: https://lkml.kernel.org/r/20240326143210.291116-2-david@redhat.com -Fixes: 1507f51255c9 ("mm: introduce memfd_secret system call to create "secret" memory areas") -Signed-off-by: David Hildenbrand <david@redhat.com> -Reported-by: xingwei lee <xrivendell7@gmail.com> -Reported-by: yue sun <samsun1006219@gmail.com> -Closes: https://lore.kernel.org/lkml/CABOYnLyevJeravW=QrH0JUPYEcDN160aZFb7kwndm-J2rmz0HQ@mail.gmail.com/ -Debugged-by: Miklos Szeredi <miklos@szeredi.hu> -Tested-by: Miklos Szeredi <mszeredi@redhat.com> -Reviewed-by: Mike Rapoport (IBM) <rppt@kernel.org> -Cc: Lorenzo Stoakes <lstoakes@gmail.com> -Cc: <stable@vger.kernel.org> -Signed-off-by: Andrew Morton <akpm@linux-foundation.org> ---- - - include/linux/secretmem.h | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - ---- a/include/linux/secretmem.h~mm-secretmem-fix-gup-fast-succeeding-on-secretmem-folios -+++ a/include/linux/secretmem.h -@@ -13,10 +13,10 @@ static inline bool folio_is_secretmem(st - /* - * Using folio_mapping() is quite slow because of the actual call - * instruction. -- * We know that secretmem pages are not compound and LRU so we can -+ * We know that secretmem pages are not compound, so we can - * save a couple of cycles here. - */ -- if (folio_test_large(folio) || !folio_test_lru(folio)) -+ if (folio_test_large(folio)) - return false; - - mapping = (struct address_space *) -_ diff --git a/patches/mm-set-pageblock_order-to-hpage_pmd_order-in-case-with-config_hugetlb_page-but-thp-enabled.patch b/patches/mm-set-pageblock_order-to-hpage_pmd_order-in-case-with-config_hugetlb_page-but-thp-enabled.patch new file mode 100644 index 000000000..987afec1f --- /dev/null +++ b/patches/mm-set-pageblock_order-to-hpage_pmd_order-in-case-with-config_hugetlb_page-but-thp-enabled.patch @@ -0,0 +1,46 @@ +From: Baolin Wang <baolin.wang@linux.alibaba.com> +Subject: mm: set pageblock_order to HPAGE_PMD_ORDER in case with !CONFIG_HUGETLB_PAGE but THP enabled +Date: Fri, 5 Apr 2024 20:24:16 +0800 + +As Vlastimil suggested in previous discussion[1], it doesn't make sense to +set pageblock_order as MAX_PAGE_ORDER when hugetlbfs is not enabled and +THP is enabled. Instead, it should be set to HPAGE_PMD_ORDER. + +[1] https://lore.kernel.org/all/76457ec5-d789-449b-b8ca-dcb6ceb12445@suse.cz/ +Link: https://lkml.kernel.org/r/3d57d253070035bdc0f6d6e5681ce1ed0e1934f7.1712286863.git.baolin.wang@linux.alibaba.com +Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com> +Suggested-by: Vlastimil Babka <vbabka@suse.cz> +Acked-by: Vlastimil Babka <vbabka@suse.cz> +Reviewed-by: Zi Yan <ziy@nvidia.com> +Cc: Mel Gorman <mgorman@techsingularity.net> +Signed-off-by: Andrew Morton <akpm@linux-foundation.org> +--- + + include/linux/pageblock-flags.h | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +--- a/include/linux/pageblock-flags.h~mm-set-pageblock_order-to-hpage_pmd_order-in-case-with-config_hugetlb_page-but-thp-enabled ++++ a/include/linux/pageblock-flags.h +@@ -28,7 +28,7 @@ enum pageblock_bits { + NR_PAGEBLOCK_BITS + }; + +-#ifdef CONFIG_HUGETLB_PAGE ++#if defined(CONFIG_HUGETLB_PAGE) + + #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE + +@@ -45,7 +45,11 @@ extern unsigned int pageblock_order; + + #endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */ + +-#else /* CONFIG_HUGETLB_PAGE */ ++#elif defined(CONFIG_TRANSPARENT_HUGEPAGE) ++ ++#define pageblock_order min_t(unsigned int, HPAGE_PMD_ORDER, MAX_PAGE_ORDER) ++ ++#else /* CONFIG_TRANSPARENT_HUGEPAGE */ + + /* If huge pages are not used, group by MAX_ORDER_NR_PAGES */ + #define pageblock_order MAX_PAGE_ORDER +_ diff --git a/patches/init-open-output-files-from-cpio-unpacking-with-o_largefile.patch b/patches/old/init-open-output-files-from-cpio-unpacking-with-o_largefile.patch index dbae0d771..dbae0d771 100644 --- a/patches/init-open-output-files-from-cpio-unpacking-with-o_largefile.patch +++ b/patches/old/init-open-output-files-from-cpio-unpacking-with-o_largefile.patch diff --git a/patches/old/maintainers-change-vmwarecom-addresses-to-broadcomcom.patch b/patches/old/maintainers-change-vmwarecom-addresses-to-broadcomcom.patch index 7fdd3972c..dd30dd634 100644 --- a/patches/old/maintainers-change-vmwarecom-addresses-to-broadcomcom.patch +++ b/patches/old/maintainers-change-vmwarecom-addresses-to-broadcomcom.patch @@ -1,57 +1,169 @@ -From: Zack Rusin <zack.rusin@broadcom.com> +From: Alexey Makhalov <alexey.makhalov@broadcom.com> Subject: MAINTAINERS: change vmware.com addresses to broadcom.com -Date: Sun, 24 Dec 2023 00:20:36 -0500 +Date: Tue, 2 Apr 2024 16:23:34 -0700 -Update the email addresses for vmwgfx and vmmouse to reflect the fact that -VMware is now part of Broadcom. +Update all remaining vmware.com email addresses to actual broadcom.com. -Add a .mailmap entry because the vmware.com address will start bouncing -soon. +Add corresponding .mailmap entries for maintainers who contributed in the +past as the vmware.com address will start bouncing soon. -Link: https://lkml.kernel.org/r/20231224052036.603621-1-zack.rusin@broadcom.com -Signed-off-by: Zack Rusin <zack.rusin@broadcom.com> +Maintainership update. Jeff Sipek has left VMware, Nick Shi will be +maintaining VMware PTP. + +Link: https://lkml.kernel.org/r/20240402232334.33167-1-alexey.makhalov@broadcom.com +Signed-off-by: Alexey Makhalov <alexey.makhalov@broadcom.com> Acked-by: Florian Fainelli <florian.fainelli@broadcom.com> -Cc: Ian Forbes <ian.forbes@broadcom.com> -Cc: Martin Krastev <martin.krastev@broadcom.com> -Cc: Maaz Mombasawala <maaz.mombasawala@broadcom.com> +Acked-by: Ajay Kaher <ajay.kaher@broadcom.com> +Acked-by: Ronak Doshi <ronak.doshi@broadcom.com> +Acked-by: Nick Shi <nick.shi@broadcom.com> +Acked-by: Bryan Tan <bryan-bt.tan@broadcom.com> +Acked-by: Vishnu Dasa <vishnu.dasa@broadcom.com> +Acked-by: Vishal Bhakta <vishal.bhakta@broadcom.com> +Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> --- - .mailmap | 1 + - MAINTAINERS | 9 ++++----- - 2 files changed, 5 insertions(+), 5 deletions(-) + .mailmap | 5 +++++ + MAINTAINERS | 46 +++++++++++++++++++++++----------------------- + 2 files changed, 28 insertions(+), 23 deletions(-) --- a/.mailmap~maintainers-change-vmwarecom-addresses-to-broadcomcom +++ a/.mailmap -@@ -631,4 +631,5 @@ Wolfram Sang <wsa@kernel.org> <w.sang@pe - Wolfram Sang <wsa@kernel.org> <wsa@the-dreams.de> - Yakir Yang <kuankuan.y@gmail.com> <ykk@rock-chips.com> - Yusuke Goda <goda.yusuke@renesas.com> -+Zack Rusin <zack.rusin@broadcom.com> <zackr@vmware.com> - Zhu Yanjun <zyjzyj2000@gmail.com> <yanjunz@nvidia.com> +@@ -20,6 +20,7 @@ Adam Oldham <oldhamca@gmail.com> + Adam Radford <aradford@gmail.com> + Adriana Reus <adi.reus@gmail.com> <adriana.reus@intel.com> + Adrian Bunk <bunk@stusta.de> ++Ajay Kaher <ajay.kaher@broadcom.com> <akaher@vmware.com> + Akhil P Oommen <quic_akhilpo@quicinc.com> <akhilpo@codeaurora.org> + Alan Cox <alan@lxorguk.ukuu.org.uk> + Alan Cox <root@hraefn.swansea.linux.org.uk> +@@ -36,6 +37,7 @@ Alexei Avshalom Lazar <quic_ailizaro@qui + Alexei Starovoitov <ast@kernel.org> <alexei.starovoitov@gmail.com> + Alexei Starovoitov <ast@kernel.org> <ast@fb.com> + Alexei Starovoitov <ast@kernel.org> <ast@plumgrid.com> ++Alexey Makhalov <alexey.amakhalov@broadcom.com> <amakhalov@vmware.com> + Alex Hung <alexhung@gmail.com> <alex.hung@canonical.com> + Alex Shi <alexs@kernel.org> <alex.shi@intel.com> + Alex Shi <alexs@kernel.org> <alex.shi@linaro.org> +@@ -110,6 +112,7 @@ Brendan Higgins <brendan.higgins@linux.d + Brian Avery <b.avery@hp.com> + Brian King <brking@us.ibm.com> + Brian Silverman <bsilver16384@gmail.com> <brian.silverman@bluerivertech.com> ++Bryan Tan <bryan-bt.tan@broadcom.com> <bryantan@vmware.com> + Cai Huoqing <cai.huoqing@linux.dev> <caihuoqing@baidu.com> + Can Guo <quic_cang@quicinc.com> <cang@codeaurora.org> + Carl Huang <quic_cjhuang@quicinc.com> <cjhuang@codeaurora.org> +@@ -529,6 +532,7 @@ Rocky Liao <quic_rjliao@quicinc.com> <rj + Roman Gushchin <roman.gushchin@linux.dev> <guro@fb.com> + Roman Gushchin <roman.gushchin@linux.dev> <guroan@gmail.com> + Roman Gushchin <roman.gushchin@linux.dev> <klamm@yandex-team.ru> ++Ronak Doshi <ronak.doshi@broadcom.com> <doshir@vmware.com> + Muchun Song <muchun.song@linux.dev> <songmuchun@bytedance.com> + Muchun Song <muchun.song@linux.dev> <smuchun@gmail.com> + Ross Zwisler <zwisler@kernel.org> <ross.zwisler@linux.intel.com> +@@ -651,6 +655,7 @@ Viresh Kumar <vireshk@kernel.org> <vires + Viresh Kumar <vireshk@kernel.org> <viresh.linux@gmail.com> + Viresh Kumar <viresh.kumar@linaro.org> <viresh.kumar@linaro.org> + Viresh Kumar <viresh.kumar@linaro.org> <viresh.kumar@linaro.com> ++Vishnu Dasa <vishnu.dasa@broadcom.com> <vdasa@vmware.com> + Vivek Aknurwar <quic_viveka@quicinc.com> <viveka@codeaurora.org> + Vivien Didelot <vivien.didelot@gmail.com> <vivien.didelot@savoirfairelinux.com> + Vlad Dogaru <ddvlad@gmail.com> <vlad.dogaru@intel.com> --- a/MAINTAINERS~maintainers-change-vmwarecom-addresses-to-broadcomcom +++ a/MAINTAINERS -@@ -6903,8 +6903,8 @@ T: git git://anongit.freedesktop.org/drm - F: drivers/gpu/drm/vboxvideo/ +@@ -16731,9 +16731,9 @@ F: include/uapi/linux/ppdev.h + + PARAVIRT_OPS INTERFACE + M: Juergen Gross <jgross@suse.com> +-R: Ajay Kaher <akaher@vmware.com> +-R: Alexey Makhalov <amakhalov@vmware.com> +-R: VMware PV-Drivers Reviewers <pv-drivers@vmware.com> ++R: Ajay Kaher <ajay.kaher@broadcom.com> ++R: Alexey Makhalov <alexey.amakhalov@broadcom.com> ++R: Broadcom internal kernel review list <bcm-kernel-feedback-list@broadcom.com> + L: virtualization@lists.linux.dev + L: x86@kernel.org + S: Supported +@@ -23652,9 +23652,9 @@ S: Supported + F: drivers/misc/vmw_balloon.c + + VMWARE HYPERVISOR INTERFACE +-M: Ajay Kaher <akaher@vmware.com> +-M: Alexey Makhalov <amakhalov@vmware.com> +-R: VMware PV-Drivers Reviewers <pv-drivers@vmware.com> ++M: Ajay Kaher <ajay.kaher@broadcom.com> ++M: Alexey Makhalov <alexey.amakhalov@broadcom.com> ++R: Broadcom internal kernel review list <bcm-kernel-feedback-list@broadcom.com> + L: virtualization@lists.linux.dev + L: x86@kernel.org + S: Supported +@@ -23663,34 +23663,34 @@ F: arch/x86/include/asm/vmware.h + F: arch/x86/kernel/cpu/vmware.c + + VMWARE PVRDMA DRIVER +-M: Bryan Tan <bryantan@vmware.com> +-M: Vishnu Dasa <vdasa@vmware.com> +-R: VMware PV-Drivers Reviewers <pv-drivers@vmware.com> ++M: Bryan Tan <bryan-bt.tan@broadcom.com> ++M: Vishnu Dasa <vishnu.dasa@broadcom.com> ++R: Broadcom internal kernel review list <bcm-kernel-feedback-list@broadcom.com> + L: linux-rdma@vger.kernel.org + S: Supported + F: drivers/infiniband/hw/vmw_pvrdma/ - DRM DRIVER FOR VMWARE VIRTUAL GPU --M: Zack Rusin <zackr@vmware.com> --R: VMware Graphics Reviewers <linux-graphics-maintainer@vmware.com> -+M: Zack Rusin <zack.rusin@broadcom.com> + VMWARE PVSCSI DRIVER +-M: Vishal Bhakta <vbhakta@vmware.com> +-R: VMware PV-Drivers Reviewers <pv-drivers@vmware.com> ++M: Vishal Bhakta <vishal.bhakta@broadcom.com> ++R: Broadcom internal kernel review list <bcm-kernel-feedback-list@broadcom.com> + L: linux-scsi@vger.kernel.org + S: Supported + F: drivers/scsi/vmw_pvscsi.c + F: drivers/scsi/vmw_pvscsi.h + + VMWARE VIRTUAL PTP CLOCK DRIVER +-M: Jeff Sipek <jsipek@vmware.com> +-R: Ajay Kaher <akaher@vmware.com> +-R: Alexey Makhalov <amakhalov@vmware.com> +-R: VMware PV-Drivers Reviewers <pv-drivers@vmware.com> ++M: Nick Shi <nick.shi@broadcom.com> ++R: Ajay Kaher <ajay.kaher@broadcom.com> ++R: Alexey Makhalov <alexey.amakhalov@broadcom.com> ++R: Broadcom internal kernel review list <bcm-kernel-feedback-list@broadcom.com> + L: netdev@vger.kernel.org + S: Supported + F: drivers/ptp/ptp_vmw.c + + VMWARE VMCI DRIVER +-M: Bryan Tan <bryantan@vmware.com> +-M: Vishnu Dasa <vdasa@vmware.com> +-R: VMware PV-Drivers Reviewers <pv-drivers@vmware.com> ++M: Bryan Tan <bryan-bt.tan@broadcom.com> ++M: Vishnu Dasa <vishnu.dasa@broadcom.com> ++R: Broadcom internal kernel review list <bcm-kernel-feedback-list@broadcom.com> + L: linux-kernel@vger.kernel.org + S: Supported + F: drivers/misc/vmw_vmci/ +@@ -23705,16 +23705,16 @@ F: drivers/input/mouse/vmmouse.c + F: drivers/input/mouse/vmmouse.h + + VMWARE VMXNET3 ETHERNET DRIVER +-M: Ronak Doshi <doshir@vmware.com> +-R: VMware PV-Drivers Reviewers <pv-drivers@vmware.com> ++M: Ronak Doshi <ronak.doshi@broadcom.com> +R: Broadcom internal kernel review list <bcm-kernel-feedback-list@broadcom.com> - L: dri-devel@lists.freedesktop.org + L: netdev@vger.kernel.org S: Supported - T: git git://anongit.freedesktop.org/drm/drm-misc -@@ -23203,9 +23203,8 @@ F: drivers/misc/vmw_vmci/ - F: include/linux/vmw_vmci* + F: drivers/net/vmxnet3/ - VMWARE VMMOUSE SUBDRIVER --M: Zack Rusin <zackr@vmware.com> --R: VMware Graphics Reviewers <linux-graphics-maintainer@vmware.com> + VMWARE VSOCK VMCI TRANSPORT DRIVER +-M: Bryan Tan <bryantan@vmware.com> +-M: Vishnu Dasa <vdasa@vmware.com> -R: VMware PV-Drivers Reviewers <pv-drivers@vmware.com> -+M: Zack Rusin <zack.rusin@broadcom.com> ++M: Bryan Tan <bryan-bt.tan@broadcom.com> ++M: Vishnu Dasa <vishnu.dasa@broadcom.com> +R: Broadcom internal kernel review list <bcm-kernel-feedback-list@broadcom.com> - L: linux-input@vger.kernel.org + L: linux-kernel@vger.kernel.org S: Supported - F: drivers/input/mouse/vmmouse.c + F: net/vmw_vsock/vmci_transport* _ diff --git a/patches/old/mm-secretmem-fix-gup-fast-succeeding-on-secretmem-folios.patch b/patches/old/mm-secretmem-fix-gup-fast-succeeding-on-secretmem-folios.patch index 065f79a89..8a5be764b 100644 --- a/patches/old/mm-secretmem-fix-gup-fast-succeeding-on-secretmem-folios.patch +++ b/patches/old/mm-secretmem-fix-gup-fast-succeeding-on-secretmem-folios.patch @@ -1,42 +1,55 @@ From: David Hildenbrand <david@redhat.com> Subject: mm/secretmem: fix GUP-fast succeeding on secretmem folios -Date: Mon, 25 Mar 2024 14:41:12 +0100 +Date: Tue, 26 Mar 2024 15:32:08 +0100 -folio_is_secretmem() states that secretmem folios cannot be LRU folios: so -we may only exit early if we find an LRU folio. Yet, we exit early if we -find a folio that is not a secretmem folio. +folio_is_secretmem() currently relies on secretmem folios being LRU +folios, to save some cycles. -Consequently, folio_is_secretmem() fails to detect secretmem folios and, -therefore, we can succeed in grabbing a secretmem folio during GUP-fast, -crashing the kernel when we later try reading/writing to the folio, -because the folio has been unmapped from the directmap. +However, folios might reside in a folio batch without the LRU flag set, or +temporarily have their LRU flag cleared. Consequently, the LRU flag is +unreliable for this purpose. -Link: https://lkml.kernel.org/r/20240325134114.257544-2-david@redhat.com +In particular, this is the case when secretmem_fault() allocates a fresh +page and calls filemap_add_folio()->folio_add_lru(). The folio might be +added to the per-cpu folio batch and won't get the LRU flag set until the +batch was drained using e.g., lru_add_drain(). + +Consequently, folio_is_secretmem() might not detect secretmem folios and +GUP-fast can succeed in grabbing a secretmem folio, crashing the kernel +when we would later try reading/writing to the folio, because the folio +has been unmapped from the directmap. + +Fix it by removing that unreliable check. + +Link: https://lkml.kernel.org/r/20240326143210.291116-2-david@redhat.com Fixes: 1507f51255c9 ("mm: introduce memfd_secret system call to create "secret" memory areas") Signed-off-by: David Hildenbrand <david@redhat.com> Reported-by: xingwei lee <xrivendell7@gmail.com> Reported-by: yue sun <samsun1006219@gmail.com> Closes: https://lore.kernel.org/lkml/CABOYnLyevJeravW=QrH0JUPYEcDN160aZFb7kwndm-J2rmz0HQ@mail.gmail.com/ Debugged-by: Miklos Szeredi <miklos@szeredi.hu> -Reviewed-by: Mike Rapoport (IBM) <rppt@kernel.org> Tested-by: Miklos Szeredi <mszeredi@redhat.com> +Reviewed-by: Mike Rapoport (IBM) <rppt@kernel.org> Cc: Lorenzo Stoakes <lstoakes@gmail.com> -Cc: "Mike Rapoport (IBM)" <rppt@kernel.org> Cc: <stable@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> --- - include/linux/secretmem.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) + include/linux/secretmem.h | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) --- a/include/linux/secretmem.h~mm-secretmem-fix-gup-fast-succeeding-on-secretmem-folios +++ a/include/linux/secretmem.h -@@ -16,7 +16,7 @@ static inline bool folio_is_secretmem(st - * We know that secretmem pages are not compound and LRU so we can +@@ -13,10 +13,10 @@ static inline bool folio_is_secretmem(st + /* + * Using folio_mapping() is quite slow because of the actual call + * instruction. +- * We know that secretmem pages are not compound and LRU so we can ++ * We know that secretmem pages are not compound, so we can * save a couple of cycles here. */ - if (folio_test_large(folio) || !folio_test_lru(folio)) -+ if (folio_test_large(folio) || folio_test_lru(folio)) ++ if (folio_test_large(folio)) return false; mapping = (struct address_space *) diff --git a/patches/mm-vmalloc-bail-out-early-in-find_vmap_area-if-vmap-is-not-init.patch b/patches/old/mm-vmalloc-bail-out-early-in-find_vmap_area-if-vmap-is-not-init.patch index 1e9427e71..1e9427e71 100644 --- a/patches/mm-vmalloc-bail-out-early-in-find_vmap_area-if-vmap-is-not-init.patch +++ b/patches/old/mm-vmalloc-bail-out-early-in-find_vmap_area-if-vmap-is-not-init.patch diff --git a/patches/mm-vmalloc-fix-lockdep-warning-fix.patch b/patches/old/mm-vmalloc-fix-lockdep-warning-fix.patch index 08356ec5b..08356ec5b 100644 --- a/patches/mm-vmalloc-fix-lockdep-warning-fix.patch +++ b/patches/old/mm-vmalloc-fix-lockdep-warning-fix.patch diff --git a/patches/mm-vmalloc-fix-lockdep-warning.patch b/patches/old/mm-vmalloc-fix-lockdep-warning.patch index 73c63e600..73c63e600 100644 --- a/patches/mm-vmalloc-fix-lockdep-warning.patch +++ b/patches/old/mm-vmalloc-fix-lockdep-warning.patch diff --git a/patches/selftests-mm-include-stringsh-for-ffsl.patch b/patches/old/selftests-mm-include-stringsh-for-ffsl.patch index f4366f47c..f4366f47c 100644 --- a/patches/selftests-mm-include-stringsh-for-ffsl.patch +++ b/patches/old/selftests-mm-include-stringsh-for-ffsl.patch diff --git a/patches/stackdepot-rename-pool_index-to-pool_index_plus_1.patch b/patches/old/stackdepot-rename-pool_index-to-pool_index_plus_1.patch index c3cb90832..c3cb90832 100644 --- a/patches/stackdepot-rename-pool_index-to-pool_index_plus_1.patch +++ b/patches/old/stackdepot-rename-pool_index-to-pool_index_plus_1.patch diff --git a/patches/x86-mm-pat-fix-vm_pat-handling-in-cow-mappings.patch b/patches/old/x86-mm-pat-fix-vm_pat-handling-in-cow-mappings.patch index cdfc9a974..cdfc9a974 100644 --- a/patches/x86-mm-pat-fix-vm_pat-handling-in-cow-mappings.patch +++ b/patches/old/x86-mm-pat-fix-vm_pat-handling-in-cow-mappings.patch diff --git a/patches/proc-rewrite-stable_page_flags-fix-2.patch b/patches/proc-rewrite-stable_page_flags-fix-2.patch new file mode 100644 index 000000000..ef2eb9180 --- /dev/null +++ b/patches/proc-rewrite-stable_page_flags-fix-2.patch @@ -0,0 +1,56 @@ +From: Matthew Wilcox <willy@infradead.org> +Subject: proc-rewrite-stable_page_flags-fix-2 +Date: Fri, 5 Apr 2024 20:23:32 +0100 + +fix warnings + +fs/proc/page.c:156 stable_page_flags() warn: bit shifter 'PG_lru' used for logical '&' +fs/proc/page.c:207 stable_page_flags() warn: bit shifter 'KPF_HUGE' used for logical '&' + +Link: https://lkml.kernel.org/r/ZhBPtCYfSuFuUMEz@casper.infradead.org +Signed-off-by: Matthew Wilcox <willy@infradead.org> +Reported-by: Dan Carpenter <dan.carpenter@linaro.org> +Signed-off-by: Andrew Morton <akpm@linux-foundation.org> +--- + + fs/proc/page.c | 3 ++- + tools/cgroup/memcg_slabinfo.py | 5 ++--- + 2 files changed, 4 insertions(+), 4 deletions(-) + +--- a/fs/proc/page.c~proc-rewrite-stable_page_flags-fix-2 ++++ a/fs/proc/page.c +@@ -175,6 +175,8 @@ u64 stable_page_flags(const struct page + u |= 1 << KPF_OFFLINE; + if (PageTable(page)) + u |= 1 << KPF_PGTABLE; ++ if (folio_test_slab(folio)) ++ u |= 1 << KPF_SLAB; + + #if defined(CONFIG_PAGE_IDLE_FLAG) && defined(CONFIG_64BIT) + u |= kpf_copy_bit(k, KPF_IDLE, PG_idle); +@@ -184,7 +186,6 @@ u64 stable_page_flags(const struct page + #endif + + u |= kpf_copy_bit(k, KPF_LOCKED, PG_locked); +- u |= kpf_copy_bit(k, KPF_SLAB, PG_slab); + u |= kpf_copy_bit(k, KPF_ERROR, PG_error); + u |= kpf_copy_bit(k, KPF_DIRTY, PG_dirty); + u |= kpf_copy_bit(k, KPF_UPTODATE, PG_uptodate); +--- a/tools/cgroup/memcg_slabinfo.py~proc-rewrite-stable_page_flags-fix-2 ++++ a/tools/cgroup/memcg_slabinfo.py +@@ -146,12 +146,11 @@ def detect_kernel_config(): + + + def for_each_slab(prog): +- PGSlab = 1 << prog.constant('PG_slab') +- PGHead = 1 << prog.constant('PG_head') ++ PGSlab = ~prog.constant('PG_slab') + + for page in for_each_page(prog): + try: +- if page.flags.value_() & PGSlab: ++ if page.page_type.value_() == PGSlab: + yield cast('struct slab *', page) + except FaultError: + pass +_ diff --git a/pc/devel-series b/pc/devel-series index e25323c2d..8a9b9746c 100644 --- a/pc/devel-series +++ b/pc/devel-series @@ -1,6 +1,6 @@ #linus.patch #BRANCH mm-hotfixes-stable -#mm-hotfixes-stable.patch +mm-hotfixes-stable.patch # # # @@ -52,22 +52,25 @@ ##crash-use-macro-to-add-crashk_res-into-iomem-early-for-specific-arch.patch ### # -mm-secretmem-fix-gup-fast-succeeding-on-secretmem-folios.patch -# -init-open-output-files-from-cpio-unpacking-with-o_largefile.patch -# -mm-vmalloc-bail-out-early-in-find_vmap_area-if-vmap-is-not-init.patch -# -mm-vmalloc-fix-lockdep-warning.patch -mm-vmalloc-fix-lockdep-warning-fix.patch -# -selftests-mm-include-stringsh-for-ffsl.patch -# -maintainers-change-vmwarecom-addresses-to-broadcomcom.patch +##mm-secretmem-fix-gup-fast-succeeding-on-secretmem-folios.patch +### +##init-open-output-files-from-cpio-unpacking-with-o_largefile.patch +### +##mm-vmalloc-bail-out-early-in-find_vmap_area-if-vmap-is-not-init.patch +### +##mm-vmalloc-fix-lockdep-warning.patch +### +##selftests-mm-include-stringsh-for-ffsl.patch +### +##maintainers-change-vmwarecom-addresses-to-broadcomcom.patch +### +##x86-mm-pat-fix-vm_pat-handling-in-cow-mappings.patch +### +##stackdepot-rename-pool_index-to-pool_index_plus_1.patch +### +### # -x86-mm-pat-fix-vm_pat-handling-in-cow-mappings.patch # -stackdepot-rename-pool_index-to-pool_index_plus_1.patch # #userfaultfd-change-src_folio-after-ensuring-its-unpinned-in-uffdio_move.patch: acks? userfaultfd-change-src_folio-after-ensuring-its-unpinned-in-uffdio_move.patch @@ -139,7 +142,7 @@ fix-missing-vmalloch-includes-fix-2.patch fix-missing-vmalloch-includes-fix-3.patch fix-missing-vmalloch-includes-fix-4.patch fix-missing-vmalloch-includes-fix-5.patch -#asm-generic-ioh-kill-vmalloch-dependency.patch: https://lkml.kernel.org/r/202403290536.7f9zGl5Q-lkp@intel.com https://lkml.kernel.org/r/202404031246.aq5Yr5KO-lkp@intel.com +#asm-generic-ioh-kill-vmalloch-dependency.patch: https://lkml.kernel.org/r/202403290536.7f9zGl5Q-lkp@intel.com https://lkml.kernel.org/r/202404031246.aq5Yr5KO-lkp@intel.com https://lkml.kernel.org/r/202404050934.bdQwGSAA-lkp@intel.com https://lkml.kernel.org/r/202404050828.5pKgmCLu-lkp@intel.com asm-generic-ioh-kill-vmalloch-dependency.patch mm-slub-mark-slab_free_freelist_hook-__always_inline.patch scripts-kallysms-always-include-__start-and-__stop-symbols.patch @@ -151,6 +154,7 @@ slab-objext-introduce-objext_flags-as-extension-to-page_memcg_data_flags.patch lib-code-tagging-framework.patch lib-code-tagging-module-support.patch lib-prevent-module-unloading-if-memory-is-not-freed.patch +#lib-add-allocation-tagging-support-for-memory-allocation-profiling.patch: https://lkml.kernel.org/r/6b5281d8-2998-4e66-b65e-45a0e68e5780@moroto.mountain lib-add-allocation-tagging-support-for-memory-allocation-profiling.patch lib-add-allocation-tagging-support-for-memory-allocation-profiling-fix.patch lib-add-allocation-tagging-support-for-memory-allocation-profiling-fix-2.patch @@ -226,6 +230,7 @@ mm-page_alloc-fix-up-block-types-when-merging-compatible-blocks.patch mm-page_alloc-move-free-pages-when-converting-block-during-isolation.patch mm-page_alloc-fix-move_freepages_block-range-error.patch mm-page_alloc-fix-freelist-movement-during-block-conversion.patch +mm-page_alloc-fix-freelist-movement-during-block-conversion-fix.patch mm-page_alloc-close-migratetype-race-between-freeing-and-stealing.patch mm-page_alloc-set-migratetype-inside-move_freepages.patch mm-page_isolation-prepare-for-hygienic-freelists.patch @@ -309,6 +314,7 @@ slub-remove-use-of-page-flags.patch remove-references-to-page-flags-in-documentation.patch proc-rewrite-stable_page_flags.patch proc-rewrite-stable_page_flags-fix.patch +proc-rewrite-stable_page_flags-fix-2.patch # # mm-slab-move-memcg-charging-to-post-alloc-hook.patch @@ -438,6 +444,7 @@ proc-convert-smaps_pmd_entry-to-use-a-folio.patch mm-page_alloc-use-the-correct-thp-order-for-thp-pcp.patch # mm-swap-remove-cluster_flag_huge-from-swap_cluster_info-flags.patch +#mm-swap-free_swap_and_cache_nr-as-batched-free_swap_and_cache.patch: https://lkml.kernel.org/r/051052af-3b56-4290-98d3-fd5a1eb11ce1@redhat.com https://lkml.kernel.org/r/7d5b2f03-dc36-477d-8d5c-4eb8d45db398@redhat.com mm-swap-free_swap_and_cache_nr-as-batched-free_swap_and_cache.patch mm-swap-simplify-struct-percpu_cluster.patch mm-swap-allow-storage-of-all-mthp-orders.patch @@ -476,6 +483,24 @@ selftests-mm-fix-additional-build-errors-for-selftests.patch mm-cma-drop-incorrect-alignment-check-in-cma_init_reserved_mem.patch mm-hugetlb-pass-correct-order_per_bit-to-cma_declare_contiguous_nid.patch # +mm-convert-pagecache_isize_extended-to-use-a-folio.patch +# +mm-free-non-hugetlb-large-folios-in-a-batch.patch +mm-combine-free_the_page-and-free_unref_page.patch +mm-inline-destroy_large_folio-into-__folio_put_large.patch +mm-combine-__folio_put_small-__folio_put_large-and-__folio_put.patch +mm-convert-free_zone_device_page-to-free_zone_device_folio.patch +# +mm-set-pageblock_order-to-hpage_pmd_order-in-case-with-config_hugetlb_page-but-thp-enabled.patch +# +#mm-add-per-order-mthp-anon_alloc-and-anon_alloc_fallback-counters.patch+1: docs? acks? +mm-add-per-order-mthp-anon_alloc-and-anon_alloc_fallback-counters.patch +mm-add-per-order-mthp-anon_swpout-and-anon_swpout_fallback-counters.patch +# +memory-tier-dax-kmem-introduce-an-abstract-layer-for-finding-allocating-and-putting-memory-types.patch +#memory-tier-create-cpuless-memory-tiers-after-obtaining-hmat-info.patch: https://lkml.kernel.org/r/20240405150244.00004b49@Huawei.com +memory-tier-create-cpuless-memory-tiers-after-obtaining-hmat-info.patch +# # # # diff --git a/pc/init-open-output-files-from-cpio-unpacking-with-o_largefile.pc b/pc/init-open-output-files-from-cpio-unpacking-with-o_largefile.pc deleted file mode 100644 index c724db162..000000000 --- a/pc/init-open-output-files-from-cpio-unpacking-with-o_largefile.pc +++ /dev/null @@ -1 +0,0 @@ -init/initramfs.c diff --git a/pc/maintainers-change-vmwarecom-addresses-to-broadcomcom.pc b/pc/maintainers-change-vmwarecom-addresses-to-broadcomcom.pc deleted file mode 100644 index edc8c7f9d..000000000 --- a/pc/maintainers-change-vmwarecom-addresses-to-broadcomcom.pc +++ /dev/null @@ -1,2 +0,0 @@ -.mailmap -MAINTAINERS diff --git a/pc/memory-tier-create-cpuless-memory-tiers-after-obtaining-hmat-info.pc b/pc/memory-tier-create-cpuless-memory-tiers-after-obtaining-hmat-info.pc new file mode 100644 index 000000000..7610341a8 --- /dev/null +++ b/pc/memory-tier-create-cpuless-memory-tiers-after-obtaining-hmat-info.pc @@ -0,0 +1 @@ +mm/memory-tiers.c diff --git a/pc/memory-tier-dax-kmem-introduce-an-abstract-layer-for-finding-allocating-and-putting-memory-types.pc b/pc/memory-tier-dax-kmem-introduce-an-abstract-layer-for-finding-allocating-and-putting-memory-types.pc new file mode 100644 index 000000000..e7f08abc4 --- /dev/null +++ b/pc/memory-tier-dax-kmem-introduce-an-abstract-layer-for-finding-allocating-and-putting-memory-types.pc @@ -0,0 +1,3 @@ +drivers/dax/kmem.c +include/linux/memory-tiers.h +mm/memory-tiers.c diff --git a/pc/mm-add-per-order-mthp-anon_alloc-and-anon_alloc_fallback-counters.pc b/pc/mm-add-per-order-mthp-anon_alloc-and-anon_alloc_fallback-counters.pc new file mode 100644 index 000000000..c3c5586ce --- /dev/null +++ b/pc/mm-add-per-order-mthp-anon_alloc-and-anon_alloc_fallback-counters.pc @@ -0,0 +1,3 @@ +include/linux/huge_mm.h +mm/huge_memory.c +mm/memory.c diff --git a/pc/mm-add-per-order-mthp-anon_swpout-and-anon_swpout_fallback-counters.pc b/pc/mm-add-per-order-mthp-anon_swpout-and-anon_swpout_fallback-counters.pc new file mode 100644 index 000000000..f238d5812 --- /dev/null +++ b/pc/mm-add-per-order-mthp-anon_swpout-and-anon_swpout_fallback-counters.pc @@ -0,0 +1,4 @@ +include/linux/huge_mm.h +mm/huge_memory.c +mm/page_io.c +mm/vmscan.c diff --git a/pc/mm-combine-__folio_put_small-__folio_put_large-and-__folio_put.pc b/pc/mm-combine-__folio_put_small-__folio_put_large-and-__folio_put.pc new file mode 100644 index 000000000..edb691fab --- /dev/null +++ b/pc/mm-combine-__folio_put_small-__folio_put_large-and-__folio_put.pc @@ -0,0 +1 @@ +mm/swap.c diff --git a/pc/mm-combine-free_the_page-and-free_unref_page.pc b/pc/mm-combine-free_the_page-and-free_unref_page.pc new file mode 100644 index 000000000..5a02802e5 --- /dev/null +++ b/pc/mm-combine-free_the_page-and-free_unref_page.pc @@ -0,0 +1 @@ +mm/page_alloc.c diff --git a/pc/mm-convert-free_zone_device_page-to-free_zone_device_folio.pc b/pc/mm-convert-free_zone_device_page-to-free_zone_device_folio.pc new file mode 100644 index 000000000..5be226534 --- /dev/null +++ b/pc/mm-convert-free_zone_device_page-to-free_zone_device_folio.pc @@ -0,0 +1,3 @@ +mm/internal.h +mm/memremap.c +mm/swap.c diff --git a/pc/mm-convert-pagecache_isize_extended-to-use-a-folio.pc b/pc/mm-convert-pagecache_isize_extended-to-use-a-folio.pc new file mode 100644 index 000000000..f98ff6a5d --- /dev/null +++ b/pc/mm-convert-pagecache_isize_extended-to-use-a-folio.pc @@ -0,0 +1 @@ +mm/truncate.c diff --git a/pc/mm-free-non-hugetlb-large-folios-in-a-batch.pc b/pc/mm-free-non-hugetlb-large-folios-in-a-batch.pc new file mode 100644 index 000000000..edb691fab --- /dev/null +++ b/pc/mm-free-non-hugetlb-large-folios-in-a-batch.pc @@ -0,0 +1 @@ +mm/swap.c diff --git a/pc/mm-inline-destroy_large_folio-into-__folio_put_large.pc b/pc/mm-inline-destroy_large_folio-into-__folio_put_large.pc new file mode 100644 index 000000000..1e3544ced --- /dev/null +++ b/pc/mm-inline-destroy_large_folio-into-__folio_put_large.pc @@ -0,0 +1,3 @@ +include/linux/mm.h +mm/page_alloc.c +mm/swap.c diff --git a/pc/mm-page_alloc-fix-freelist-movement-during-block-conversion-fix.pc b/pc/mm-page_alloc-fix-freelist-movement-during-block-conversion-fix.pc new file mode 100644 index 000000000..5a02802e5 --- /dev/null +++ b/pc/mm-page_alloc-fix-freelist-movement-during-block-conversion-fix.pc @@ -0,0 +1 @@ +mm/page_alloc.c diff --git a/pc/mm-secretmem-fix-gup-fast-succeeding-on-secretmem-folios.pc b/pc/mm-secretmem-fix-gup-fast-succeeding-on-secretmem-folios.pc deleted file mode 100644 index c8d2228c3..000000000 --- a/pc/mm-secretmem-fix-gup-fast-succeeding-on-secretmem-folios.pc +++ /dev/null @@ -1 +0,0 @@ -include/linux/secretmem.h diff --git a/pc/mm-set-pageblock_order-to-hpage_pmd_order-in-case-with-config_hugetlb_page-but-thp-enabled.pc b/pc/mm-set-pageblock_order-to-hpage_pmd_order-in-case-with-config_hugetlb_page-but-thp-enabled.pc new file mode 100644 index 000000000..58b7a5531 --- /dev/null +++ b/pc/mm-set-pageblock_order-to-hpage_pmd_order-in-case-with-config_hugetlb_page-but-thp-enabled.pc @@ -0,0 +1 @@ +include/linux/pageblock-flags.h diff --git a/pc/mm-vmalloc-bail-out-early-in-find_vmap_area-if-vmap-is-not-init.pc b/pc/mm-vmalloc-bail-out-early-in-find_vmap_area-if-vmap-is-not-init.pc deleted file mode 100644 index ba2d8ce46..000000000 --- a/pc/mm-vmalloc-bail-out-early-in-find_vmap_area-if-vmap-is-not-init.pc +++ /dev/null @@ -1 +0,0 @@ -mm/vmalloc.c diff --git a/pc/mm-vmalloc-fix-lockdep-warning-fix.pc b/pc/mm-vmalloc-fix-lockdep-warning-fix.pc deleted file mode 100644 index ba2d8ce46..000000000 --- a/pc/mm-vmalloc-fix-lockdep-warning-fix.pc +++ /dev/null @@ -1 +0,0 @@ -mm/vmalloc.c diff --git a/pc/mm-vmalloc-fix-lockdep-warning.pc b/pc/mm-vmalloc-fix-lockdep-warning.pc deleted file mode 100644 index ba2d8ce46..000000000 --- a/pc/mm-vmalloc-fix-lockdep-warning.pc +++ /dev/null @@ -1 +0,0 @@ -mm/vmalloc.c diff --git a/pc/proc-rewrite-stable_page_flags-fix-2.pc b/pc/proc-rewrite-stable_page_flags-fix-2.pc new file mode 100644 index 000000000..a0c1788ef --- /dev/null +++ b/pc/proc-rewrite-stable_page_flags-fix-2.pc @@ -0,0 +1,2 @@ +fs/proc/page.c +tools/cgroup/memcg_slabinfo.py diff --git a/pc/selftests-mm-include-stringsh-for-ffsl.pc b/pc/selftests-mm-include-stringsh-for-ffsl.pc deleted file mode 100644 index 374e2500f..000000000 --- a/pc/selftests-mm-include-stringsh-for-ffsl.pc +++ /dev/null @@ -1 +0,0 @@ -tools/testing/selftests/mm/vm_util.h diff --git a/pc/stackdepot-rename-pool_index-to-pool_index_plus_1.pc b/pc/stackdepot-rename-pool_index-to-pool_index_plus_1.pc deleted file mode 100644 index dd7d35a51..000000000 --- a/pc/stackdepot-rename-pool_index-to-pool_index_plus_1.pc +++ /dev/null @@ -1,2 +0,0 @@ -include/linux/stackdepot.h -lib/stackdepot.c diff --git a/pc/x86-mm-pat-fix-vm_pat-handling-in-cow-mappings.pc b/pc/x86-mm-pat-fix-vm_pat-handling-in-cow-mappings.pc deleted file mode 100644 index b0bab7009..000000000 --- a/pc/x86-mm-pat-fix-vm_pat-handling-in-cow-mappings.pc +++ /dev/null @@ -1,2 +0,0 @@ -arch/x86/mm/pat/memtype.c -mm/memory.c diff --git a/txt/hugetlb-convert-hugetlb_no_page-to-use-struct-vm_fault.txt b/txt/hugetlb-convert-hugetlb_no_page-to-use-struct-vm_fault.txt index 8716ebaf7..d6850ae16 100644 --- a/txt/hugetlb-convert-hugetlb_no_page-to-use-struct-vm_fault.txt +++ b/txt/hugetlb-convert-hugetlb_no_page-to-use-struct-vm_fault.txt @@ -8,5 +8,6 @@ variables into a single struct. Link: https://lkml.kernel.org/r/20240401202651.31440-3-vishal.moola@gmail.com Signed-off-by: Vishal Moola (Oracle) <vishal.moola@gmail.com> +Reviewed-by: Oscar Salvador <osalvador@suse.de> Cc: Matthew Wilcox (Oracle) <willy@infradead.org> Cc: Muchun Song <muchun.song@linux.dev> diff --git a/txt/hugetlb-convert-hugetlb_wp-to-use-struct-vm_fault.txt b/txt/hugetlb-convert-hugetlb_wp-to-use-struct-vm_fault.txt index f4253c786..a438ac407 100644 --- a/txt/hugetlb-convert-hugetlb_wp-to-use-struct-vm_fault.txt +++ b/txt/hugetlb-convert-hugetlb_wp-to-use-struct-vm_fault.txt @@ -8,5 +8,6 @@ struct. Link: https://lkml.kernel.org/r/20240401202651.31440-4-vishal.moola@gmail.com Signed-off-by: Vishal Moola (Oracle) <vishal.moola@gmail.com> +Reviewed-by: Oscar Salvador <osalvador@suse.de> Cc: Matthew Wilcox (Oracle) <willy@infradead.org> Cc: Muchun Song <muchun.song@linux.dev> diff --git a/txt/maintainers-change-vmwarecom-addresses-to-broadcomcom.txt b/txt/maintainers-change-vmwarecom-addresses-to-broadcomcom.txt deleted file mode 100644 index a795c888c..000000000 --- a/txt/maintainers-change-vmwarecom-addresses-to-broadcomcom.txt +++ /dev/null @@ -1,22 +0,0 @@ -From: Alexey Makhalov <alexey.makhalov@broadcom.com> -Subject: MAINTAINERS: change vmware.com addresses to broadcom.com -Date: Tue, 2 Apr 2024 16:23:34 -0700 - -Update all remaining vmware.com email addresses to actual broadcom.com. - -Add corresponding .mailmap entries for maintainers who contributed in the -past as the vmware.com address will start bouncing soon. - -Maintainership update. Jeff Sipek has left VMware, Nick Shi will be -maintaining VMware PTP. - -Link: https://lkml.kernel.org/r/20240402232334.33167-1-alexey.makhalov@broadcom.com -Signed-off-by: Alexey Makhalov <alexey.makhalov@broadcom.com> -Acked-by: Florian Fainelli <florian.fainelli@broadcom.com> -Acked-by: Ajay Kaher <ajay.kaher@broadcom.com> -Acked-by: Ronak Doshi <ronak.doshi@broadcom.com> -Acked-by: Nick Shi <nick.shi@broadcom.com> -Acked-by: Bryan Tan <bryan-bt.tan@broadcom.com> -Acked-by: Vishnu Dasa <vishnu.dasa@broadcom.com> -Acked-by: Vishal Bhakta <vishal.bhakta@broadcom.com> -Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> diff --git a/txt/memory-tier-create-cpuless-memory-tiers-after-obtaining-hmat-info.txt b/txt/memory-tier-create-cpuless-memory-tiers-after-obtaining-hmat-info.txt new file mode 100644 index 000000000..fdcb26090 --- /dev/null +++ b/txt/memory-tier-create-cpuless-memory-tiers-after-obtaining-hmat-info.txt @@ -0,0 +1,63 @@ +From: "Ho-Ren (Jack) Chuang" <horenchuang@bytedance.com> +Subject: memory tier: create CPUless memory tiers after obtaining HMAT info +Date: Fri, 5 Apr 2024 00:07:06 +0000 + +The current implementation treats emulated memory devices, such as CXL1.1 +type3 memory, as normal DRAM when they are emulated as normal memory +(E820_TYPE_RAM). However, these emulated devices have different +characteristics than traditional DRAM, making it important to distinguish +them. Thus, we modify the tiered memory initialization process to +introduce a delay specifically for CPUless NUMA nodes. This delay ensures +that the memory tier initialization for these nodes is deferred until HMAT +information is obtained during the boot process. Finally, demotion tables +are recalculated at the end. + +* late_initcall(memory_tier_late_init); + Some device drivers may have initialized memory tiers between + `memory_tier_init()` and `memory_tier_late_init()`, potentially bringing + online memory nodes and configuring memory tiers. They should be + excluded in the late init. + +* Handle cases where there is no HMAT when creating memory tiers + There is a scenario where a CPUless node does not provide HMAT + information. If no HMAT is specified, it falls back to using the + default DRAM tier. + +* Introduce another new lock `default_dram_perf_lock` for adist + calculation In the current implementation, iterating through CPUlist + nodes requires holding the `memory_tier_lock`. However, + `mt_calc_adistance()` will end up trying to acquire the same lock, + leading to a potential deadlock. Therefore, we propose introducing a + standalone `default_dram_perf_lock` to protect `default_dram_perf_*`. + This approach not only avoids deadlock but also prevents holding a large + lock simultaneously. + +* Upgrade `set_node_memory_tier` to support additional cases, including + default DRAM, late CPUless, and hot-plugged initializations. To cover + hot-plugged memory nodes, `mt_calc_adistance()` and + `mt_find_alloc_memory_type()` are moved into `set_node_memory_tier()` to + handle cases where memtype is not initialized and where HMAT information + is available. + +* Introduce `default_memory_types` for those memory types that are not + initialized by device drivers. Because late initialized memory and + default DRAM memory need to be managed, a default memory type is created + for storing all memory types that are not initialized by device drivers + and as a fallback. + +Link: https://lkml.kernel.org/r/20240405000707.2670063-3-horenchuang@bytedance.com +Signed-off-by: Ho-Ren (Jack) Chuang <horenchuang@bytedance.com> +Signed-off-by: Hao Xiang <hao.xiang@bytedance.com> +Reviewed-by: "Huang, Ying" <ying.huang@intel.com> +Cc: Alistair Popple <apopple@nvidia.com> +Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> +Cc: Dan Williams <dan.j.williams@intel.com> +Cc: Dave Jiang <dave.jiang@intel.com> +Cc: Gregory Price <gourry.memverge@gmail.com> +Cc: Jonathan Cameron <Jonathan.Cameron@huawei.com> +Cc: Michal Hocko <mhocko@suse.com> +Cc: Ravi Jonnalagadda <ravis.opensrc@micron.com> +Cc: SeongJae Park <sj@kernel.org> +Cc: Tejun Heo <tj@kernel.org> +Cc: Vishal Verma <vishal.l.verma@intel.com> +Cc: Jonathan Cameron <Jonathan.Cameron@huawie.com> diff --git a/txt/memory-tier-dax-kmem-introduce-an-abstract-layer-for-finding-allocating-and-putting-memory-types.txt b/txt/memory-tier-dax-kmem-introduce-an-abstract-layer-for-finding-allocating-and-putting-memory-types.txt new file mode 100644 index 000000000..5239288dd --- /dev/null +++ b/txt/memory-tier-dax-kmem-introduce-an-abstract-layer-for-finding-allocating-and-putting-memory-types.txt @@ -0,0 +1,53 @@ +From: "Ho-Ren (Jack) Chuang" <horenchuang@bytedance.com> +Subject: memory tier: dax/kmem: introduce an abstract layer for finding, allocating, and putting memory types +Date: Fri, 5 Apr 2024 00:07:05 +0000 + +Patch series "Improved Memory Tier Creation for CPUless NUMA Nodes", v11. + +When a memory device, such as CXL1.1 type3 memory, is emulated as normal +memory (E820_TYPE_RAM), the memory device is indistinguishable from normal +DRAM in terms of memory tiering with the current implementation. The +current memory tiering assigns all detected normal memory nodes to the +same DRAM tier. This results in normal memory devices with different +attributions being unable to be assigned to the correct memory tier, +leading to the inability to migrate pages between different types of +memory. +https://lore.kernel.org/linux-mm/PH0PR08MB7955E9F08CCB64F23963B5C3A860A@PH0PR08MB7955.namprd08.prod.outlook.com/T/ + +This patchset automatically resolves the issues. It delays the +initialization of memory tiers for CPUless NUMA nodes until they obtain +HMAT information and after all devices are initialized at boot time, +eliminating the need for user intervention. If no HMAT is specified, it +falls back to using `default_dram_type`. + +Example usecase: +We have CXL memory on the host, and we create VMs with a new system memory +device backed by host CXL memory. We inject CXL memory performance +attributes through QEMU, and the guest now sees memory nodes with +performance attributes in HMAT. With this change, we enable the guest +kernel to construct the correct memory tiering for the memory nodes. + + +This patch (of 2): + +Since different memory devices require finding, allocating, and putting +memory types, these common steps are abstracted in this patch, enhancing +the scalability and conciseness of the code. + +Link: https://lkml.kernel.org/r/20240405000707.2670063-1-horenchuang@bytedance.com +Link: https://lkml.kernel.org/r/20240405000707.2670063-2-horenchuang@bytedance.com +Signed-off-by: Ho-Ren (Jack) Chuang <horenchuang@bytedance.com> +Reviewed-by: "Huang, Ying" <ying.huang@intel.com> +Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawie.com> +Cc: Alistair Popple <apopple@nvidia.com> +Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> +Cc: Dan Williams <dan.j.williams@intel.com> +Cc: Dave Jiang <dave.jiang@intel.com> +Cc: Gregory Price <gourry.memverge@gmail.com> +Cc: Hao Xiang <hao.xiang@bytedance.com> +Cc: Jonathan Cameron <Jonathan.Cameron@huawei.com> +Cc: Michal Hocko <mhocko@suse.com> +Cc: Ravi Jonnalagadda <ravis.opensrc@micron.com> +Cc: SeongJae Park <sj@kernel.org> +Cc: Tejun Heo <tj@kernel.org> +Cc: Vishal Verma <vishal.l.verma@intel.com> diff --git a/txt/mm-add-per-order-mthp-anon_alloc-and-anon_alloc_fallback-counters.txt b/txt/mm-add-per-order-mthp-anon_alloc-and-anon_alloc_fallback-counters.txt new file mode 100644 index 000000000..f1625f7c3 --- /dev/null +++ b/txt/mm-add-per-order-mthp-anon_alloc-and-anon_alloc_fallback-counters.txt @@ -0,0 +1,49 @@ +From: Barry Song <v-songbaohua@oppo.com> +Subject: mm: add per-order mTHP anon_alloc and anon_alloc_fallback counters +Date: Fri, 5 Apr 2024 23:27:03 +1300 + +Patch series "mm: add per-order mTHP alloc and swpout counters", v4. + +The patchset introduces a framework to facilitate mTHP counters, starting +with the allocation and swap-out counters. Currently, only four new nodes +are appended to the stats directory for each mTHP size. + +/sys/kernel/mm/transparent_hugepage/hugepages-<size>/stats + anon_alloc + anon_alloc_fallback + anon_swpout + anon_swpout_fallback + +These four nodes are crucial for us to monitor the fragmentation levels of +both the buddy system and the swapfile. In the future, we may consider +adding additional nodes for further insights. + + +This patch (of 2): + +Profiling a system blindly with mTHP has become challenging due to the +lack of visibility into its operations. Presenting the success rate of +mTHP allocations appears to be pressing need. + +Recently, I've been experiencing significant difficulty debugging +performance improvements and regressions without these figures. It's +crucial for us to understand the true effectiveness of mTHP in real-world +scenarios, especially in systems with fragmented memory. + +This patch sets up the framework for per-order mTHP counters, starting +with the introduction of anon_alloc and anon_alloc_fallback counters. +Incorporating additional counters should now be straightforward as well. + +Link: https://lkml.kernel.org/r/20240405102704.77559-1-21cnbao@gmail.com +Link: https://lkml.kernel.org/r/20240405102704.77559-2-21cnbao@gmail.com +Signed-off-by: Barry Song <v-songbaohua@oppo.com> +Cc: Chris Li <chrisl@kernel.org> +Cc: David Hildenbrand <david@redhat.com> +Cc: Domenico Cerasuolo <cerasuolodomenico@gmail.com> +Cc: Kairui Song <kasong@tencent.com> +Cc: Matthew Wilcox (Oracle) <willy@infradead.org> +Cc: Peter Xu <peterx@redhat.com> +Cc: Ryan Roberts <ryan.roberts@arm.com> +Cc: Suren Baghdasaryan <surenb@google.com> +Cc: Yosry Ahmed <yosryahmed@google.com> +Cc: Yu Zhao <yuzhao@google.com> diff --git a/txt/mm-add-per-order-mthp-anon_swpout-and-anon_swpout_fallback-counters.txt b/txt/mm-add-per-order-mthp-anon_swpout-and-anon_swpout_fallback-counters.txt new file mode 100644 index 000000000..b21321f7f --- /dev/null +++ b/txt/mm-add-per-order-mthp-anon_swpout-and-anon_swpout_fallback-counters.txt @@ -0,0 +1,22 @@ +From: Barry Song <v-songbaohua@oppo.com> +Subject: mm: add per-order mTHP anon_swpout and anon_swpout_fallback counters +Date: Fri, 5 Apr 2024 23:27:04 +1300 + +This helps to display the fragmentation situation of the swapfile, knowing +the proportion of how much we haven't split large folios. So far, we only +support non-split swapout for anon memory, with the possibility of +expanding to shmem in the future. So, we add the "anon" prefix to the +counter names. + +Link: https://lkml.kernel.org/r/20240405102704.77559-3-21cnbao@gmail.com +Signed-off-by: Barry Song <v-songbaohua@oppo.com> +Cc: Chris Li <chrisl@kernel.org> +Cc: David Hildenbrand <david@redhat.com> +Cc: Domenico Cerasuolo <cerasuolodomenico@gmail.com> +Cc: Kairui Song <kasong@tencent.com> +Cc: Matthew Wilcox (Oracle) <willy@infradead.org> +Cc: Peter Xu <peterx@redhat.com> +Cc: Ryan Roberts <ryan.roberts@arm.com> +Cc: Suren Baghdasaryan <surenb@google.com> +Cc: Yosry Ahmed <yosryahmed@google.com> +Cc: Yu Zhao <yuzhao@google.com> diff --git a/txt/mm-combine-__folio_put_small-__folio_put_large-and-__folio_put.txt b/txt/mm-combine-__folio_put_small-__folio_put_large-and-__folio_put.txt new file mode 100644 index 000000000..9008d7abe --- /dev/null +++ b/txt/mm-combine-__folio_put_small-__folio_put_large-and-__folio_put.txt @@ -0,0 +1,10 @@ +From: "Matthew Wilcox (Oracle)" <willy@infradead.org> +Subject: mm: combine __folio_put_small, __folio_put_large and __folio_put +Date: Fri, 5 Apr 2024 16:32:26 +0100 + +It's now obvious that __folio_put_small() and __folio_put_large() do +almost exactly the same thing. Inline them both into __folio_put(). + +Link: https://lkml.kernel.org/r/20240405153228.2563754-5-willy@infradead.org +Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> +Reviewed-by: Zi Yan <ziy@nvidia.com> diff --git a/txt/mm-combine-free_the_page-and-free_unref_page.txt b/txt/mm-combine-free_the_page-and-free_unref_page.txt new file mode 100644 index 000000000..4578fba17 --- /dev/null +++ b/txt/mm-combine-free_the_page-and-free_unref_page.txt @@ -0,0 +1,10 @@ +From: "Matthew Wilcox (Oracle)" <willy@infradead.org> +Subject: mm: combine free_the_page() and free_unref_page() +Date: Fri, 5 Apr 2024 16:32:24 +0100 + +The pcp_allowed_order() check in free_the_page() was only being skipped by +__folio_put_small() which is about to be rearranged. + +Link: https://lkml.kernel.org/r/20240405153228.2563754-3-willy@infradead.org +Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> +Reviewed-by: Zi Yan <ziy@nvidia.com> diff --git a/txt/mm-convert-free_zone_device_page-to-free_zone_device_folio.txt b/txt/mm-convert-free_zone_device_page-to-free_zone_device_folio.txt new file mode 100644 index 000000000..e65295435 --- /dev/null +++ b/txt/mm-convert-free_zone_device_page-to-free_zone_device_folio.txt @@ -0,0 +1,10 @@ +From: "Matthew Wilcox (Oracle)" <willy@infradead.org> +Subject: mm: convert free_zone_device_page to free_zone_device_folio +Date: Fri, 5 Apr 2024 16:32:27 +0100 + +Both callers already have a folio; pass it in and save a few calls to +compound_head(). + +Link: https://lkml.kernel.org/r/20240405153228.2563754-6-willy@infradead.org +Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> +Reviewed-by: Zi Yan <ziy@nvidia.com> diff --git a/txt/mm-convert-pagecache_isize_extended-to-use-a-folio.txt b/txt/mm-convert-pagecache_isize_extended-to-use-a-folio.txt new file mode 100644 index 000000000..ac30a1912 --- /dev/null +++ b/txt/mm-convert-pagecache_isize_extended-to-use-a-folio.txt @@ -0,0 +1,10 @@ +From: "Matthew Wilcox (Oracle)" <willy@infradead.org> +Subject: mm: convert pagecache_isize_extended to use a folio +Date: Fri, 5 Apr 2024 19:00:36 +0100 + +Remove four hidden calls to compound_head(). Also exit early if the +filesystem block size is >= PAGE_SIZE instead of just equal to PAGE_SIZE. + +Link: https://lkml.kernel.org/r/20240405180038.2618624-1-willy@infradead.org +Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> +Reviewed-by: Pankaj Raghav <p.raghav@samsung.com> diff --git a/txt/mm-free-non-hugetlb-large-folios-in-a-batch.txt b/txt/mm-free-non-hugetlb-large-folios-in-a-batch.txt new file mode 100644 index 000000000..2945f8d9f --- /dev/null +++ b/txt/mm-free-non-hugetlb-large-folios-in-a-batch.txt @@ -0,0 +1,24 @@ +From: "Matthew Wilcox (Oracle)" <willy@infradead.org> +Subject: mm: free non-hugetlb large folios in a batch +Date: Fri, 5 Apr 2024 16:32:23 +0100 + +Patch series "Clean up __folio_put()". + +With all the changes over the last few years, __folio_put_small and +__folio_put_large have become almost identical to each other ... except +you can't tell because they're spread over two files. Rearrange it all so +that you can tell, and then inline them both into __folio_put(). + + +This patch (of 5): + +free_unref_folios() can now handle non-hugetlb large folios, so keep +normal large folios in the batch. hugetlb folios still need to be handled +specially. I believe that folios freed using put_pages_list() cannot be +accounted to a memcg (or the small folios would trip the "page still +charged to cgroup" warning), but put an assertion in to check that. + +Link: https://lkml.kernel.org/r/20240405153228.2563754-1-willy@infradead.org +Link: https://lkml.kernel.org/r/20240405153228.2563754-2-willy@infradead.org +Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> +Reviewed-by: Zi Yan <ziy@nvidia.com> diff --git a/txt/mm-inline-destroy_large_folio-into-__folio_put_large.txt b/txt/mm-inline-destroy_large_folio-into-__folio_put_large.txt new file mode 100644 index 000000000..e8ba7634c --- /dev/null +++ b/txt/mm-inline-destroy_large_folio-into-__folio_put_large.txt @@ -0,0 +1,9 @@ +From: "Matthew Wilcox (Oracle)" <willy@infradead.org> +Subject: mm: inline destroy_large_folio() into __folio_put_large() +Date: Fri, 5 Apr 2024 16:32:25 +0100 + +destroy_large_folio() has only one caller, move its contents there. + +Link: https://lkml.kernel.org/r/20240405153228.2563754-4-willy@infradead.org +Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> +Reviewed-by: Zi Yan <ziy@nvidia.com> diff --git a/txt/mm-page_alloc-fix-freelist-movement-during-block-conversion-fix.txt b/txt/mm-page_alloc-fix-freelist-movement-during-block-conversion-fix.txt new file mode 100644 index 000000000..a1985f349 --- /dev/null +++ b/txt/mm-page_alloc-fix-freelist-movement-during-block-conversion-fix.txt @@ -0,0 +1,35 @@ +From: Baolin Wang <baolin.wang@linux.alibaba.com> +Subject: mm-page_alloc-fix-freelist-movement-during-block-conversion-fix +Date: Fri, 5 Apr 2024 20:11:47 +0800 + +fix allocation failures with CONFIG_CMA + +The original code logic was that if the 'migratetype' type allocation is +failed, it would first try CMA page allocation and then attempt to +fallback to other migratetype allocations. Now it has been changed so +that if CMA allocation fails, it will directly return. This change has +caused a regression when I running the thpcompact benchmark, resulting in +a significant reduction in the percentage of THPs like below: + +thpcompact Percentage Faults Huge + K6.9-rc2 K6.9-rc2 + this patch +Percentage huge-1 78.18 ( 0.00%) 42.49 ( -45.65%) +Percentage huge-3 86.70 ( 0.00%) 35.13 ( -59.49%) +Percentage huge-5 90.26 ( 0.00%) 52.35 ( -42.00%) +Percentage huge-7 92.34 ( 0.00%) 31.84 ( -65.52%) +Percentage huge-12 91.18 ( 0.00%) 45.85 ( -49.72%) +Percentage huge-18 89.00 ( 0.00%) 29.18 ( -67.22%) +Percentage huge-24 90.52 ( 0.00%) 46.68 ( -48.43%) +Percentage huge-30 94.44 ( 0.00%) 38.35 ( -59.39%) +Percentage huge-32 93.09 ( 0.00%) 39.37 ( -57.70%) + +After making the following modifications, the regression is gone. + +Link: https://lkml.kernel.org/r/a97697e0-45b0-4f71-b087-fdc7a1d43c0e@linux.alibaba.com +Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com> +Cc: David Hildenbrand <david@redhat.com> +Cc: "Huang, Ying" <ying.huang@intel.com> +Cc: Johannes Weiner <hannes@cmpxchg.org> +Cc: Mel Gorman <mgorman@techsingularity.net> +Cc: Vlastimil Babka <vbabka@suse.cz> +Cc: Zi Yan <ziy@nvidia.com> diff --git a/txt/mm-page_alloc-fix-freelist-movement-during-block-conversion.txt b/txt/mm-page_alloc-fix-freelist-movement-during-block-conversion.txt index 9a2b5b9c1..1157148f0 100644 --- a/txt/mm-page_alloc-fix-freelist-movement-during-block-conversion.txt +++ b/txt/mm-page_alloc-fix-freelist-movement-during-block-conversion.txt @@ -29,3 +29,4 @@ Reviewed-by: Vlastimil Babka <vbabka@suse.cz> Cc: David Hildenbrand <david@redhat.com> Cc: Mel Gorman <mgorman@techsingularity.net> Cc: Zi Yan <ziy@nvidia.com> +Cc: Baolin Wang <baolin.wang@linux.alibaba.com> diff --git a/txt/mm-page_alloc-use-the-correct-thp-order-for-thp-pcp.txt b/txt/mm-page_alloc-use-the-correct-thp-order-for-thp-pcp.txt index 49461f4ec..c538f503b 100644 --- a/txt/mm-page_alloc-use-the-correct-thp-order-for-thp-pcp.txt +++ b/txt/mm-page_alloc-use-the-correct-thp-order-for-thp-pcp.txt @@ -17,3 +17,4 @@ Fixes: 44042b449872 ("mm/page_alloc: allow high-order pages to be stored on the Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com> Acked-by: Vlastimil Babka <vbabka@suse.cz> Cc: Mel Gorman <mgorman@techsingularity.net> +Reviewed-by: Barry Song <baohua@kernel.org> diff --git a/txt/mm-secretmem-fix-gup-fast-succeeding-on-secretmem-folios.txt b/txt/mm-secretmem-fix-gup-fast-succeeding-on-secretmem-folios.txt deleted file mode 100644 index 3fb4c6650..000000000 --- a/txt/mm-secretmem-fix-gup-fast-succeeding-on-secretmem-folios.txt +++ /dev/null @@ -1,34 +0,0 @@ -From: David Hildenbrand <david@redhat.com> -Subject: mm/secretmem: fix GUP-fast succeeding on secretmem folios -Date: Tue, 26 Mar 2024 15:32:08 +0100 - -folio_is_secretmem() currently relies on secretmem folios being LRU -folios, to save some cycles. - -However, folios might reside in a folio batch without the LRU flag set, or -temporarily have their LRU flag cleared. Consequently, the LRU flag is -unreliable for this purpose. - -In particular, this is the case when secretmem_fault() allocates a fresh -page and calls filemap_add_folio()->folio_add_lru(). The folio might be -added to the per-cpu folio batch and won't get the LRU flag set until the -batch was drained using e.g., lru_add_drain(). - -Consequently, folio_is_secretmem() might not detect secretmem folios and -GUP-fast can succeed in grabbing a secretmem folio, crashing the kernel -when we would later try reading/writing to the folio, because the folio -has been unmapped from the directmap. - -Fix it by removing that unreliable check. - -Link: https://lkml.kernel.org/r/20240326143210.291116-2-david@redhat.com -Fixes: 1507f51255c9 ("mm: introduce memfd_secret system call to create "secret" memory areas") -Signed-off-by: David Hildenbrand <david@redhat.com> -Reported-by: xingwei lee <xrivendell7@gmail.com> -Reported-by: yue sun <samsun1006219@gmail.com> -Closes: https://lore.kernel.org/lkml/CABOYnLyevJeravW=QrH0JUPYEcDN160aZFb7kwndm-J2rmz0HQ@mail.gmail.com/ -Debugged-by: Miklos Szeredi <miklos@szeredi.hu> -Tested-by: Miklos Szeredi <mszeredi@redhat.com> -Reviewed-by: Mike Rapoport (IBM) <rppt@kernel.org> -Cc: Lorenzo Stoakes <lstoakes@gmail.com> -Cc: <stable@vger.kernel.org> diff --git a/txt/mm-set-pageblock_order-to-hpage_pmd_order-in-case-with-config_hugetlb_page-but-thp-enabled.txt b/txt/mm-set-pageblock_order-to-hpage_pmd_order-in-case-with-config_hugetlb_page-but-thp-enabled.txt new file mode 100644 index 000000000..eac7c7335 --- /dev/null +++ b/txt/mm-set-pageblock_order-to-hpage_pmd_order-in-case-with-config_hugetlb_page-but-thp-enabled.txt @@ -0,0 +1,15 @@ +From: Baolin Wang <baolin.wang@linux.alibaba.com> +Subject: mm: set pageblock_order to HPAGE_PMD_ORDER in case with !CONFIG_HUGETLB_PAGE but THP enabled +Date: Fri, 5 Apr 2024 20:24:16 +0800 + +As Vlastimil suggested in previous discussion[1], it doesn't make sense to +set pageblock_order as MAX_PAGE_ORDER when hugetlbfs is not enabled and +THP is enabled. Instead, it should be set to HPAGE_PMD_ORDER. + +[1] https://lore.kernel.org/all/76457ec5-d789-449b-b8ca-dcb6ceb12445@suse.cz/ +Link: https://lkml.kernel.org/r/3d57d253070035bdc0f6d6e5681ce1ed0e1934f7.1712286863.git.baolin.wang@linux.alibaba.com +Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com> +Suggested-by: Vlastimil Babka <vbabka@suse.cz> +Acked-by: Vlastimil Babka <vbabka@suse.cz> +Reviewed-by: Zi Yan <ziy@nvidia.com> +Cc: Mel Gorman <mgorman@techsingularity.net> diff --git a/txt/mm-swap-remove-cluster_flag_huge-from-swap_cluster_info-flags.txt b/txt/mm-swap-remove-cluster_flag_huge-from-swap_cluster_info-flags.txt index d2f64e9ee..793277203 100644 --- a/txt/mm-swap-remove-cluster_flag_huge-from-swap_cluster_info-flags.txt +++ b/txt/mm-swap-remove-cluster_flag_huge-from-swap_cluster_info-flags.txt @@ -86,8 +86,8 @@ Link: https://lkml.kernel.org/r/20240403114032.1162100-2-ryan.roberts@arm.com Signed-off-by: Ryan Roberts <ryan.roberts@arm.com> Reviewed-by: "Huang, Ying" <ying.huang@intel.com> Acked-by: Chris Li <chrisl@kernel.org> +Acked-by: David Hildenbrand <david@redhat.com> Cc: Barry Song <21cnbao@gmail.com> -Cc: David Hildenbrand <david@redhat.com> Cc: Gao Xiang <xiang@kernel.org> Cc: Kefeng Wang <wangkefeng.wang@huawei.com> Cc: Lance Yang <ioworker0@gmail.com> diff --git a/txt/init-open-output-files-from-cpio-unpacking-with-o_largefile.txt b/txt/old/init-open-output-files-from-cpio-unpacking-with-o_largefile.txt index 517fd3dfb..517fd3dfb 100644 --- a/txt/init-open-output-files-from-cpio-unpacking-with-o_largefile.txt +++ b/txt/old/init-open-output-files-from-cpio-unpacking-with-o_largefile.txt diff --git a/txt/old/maintainers-change-vmwarecom-addresses-to-broadcomcom.txt b/txt/old/maintainers-change-vmwarecom-addresses-to-broadcomcom.txt index d0ece09e8..a795c888c 100644 --- a/txt/old/maintainers-change-vmwarecom-addresses-to-broadcomcom.txt +++ b/txt/old/maintainers-change-vmwarecom-addresses-to-broadcomcom.txt @@ -1,16 +1,22 @@ -From: Zack Rusin <zack.rusin@broadcom.com> +From: Alexey Makhalov <alexey.makhalov@broadcom.com> Subject: MAINTAINERS: change vmware.com addresses to broadcom.com -Date: Sun, 24 Dec 2023 00:20:36 -0500 +Date: Tue, 2 Apr 2024 16:23:34 -0700 -Update the email addresses for vmwgfx and vmmouse to reflect the fact that -VMware is now part of Broadcom. +Update all remaining vmware.com email addresses to actual broadcom.com. -Add a .mailmap entry because the vmware.com address will start bouncing -soon. +Add corresponding .mailmap entries for maintainers who contributed in the +past as the vmware.com address will start bouncing soon. -Link: https://lkml.kernel.org/r/20231224052036.603621-1-zack.rusin@broadcom.com -Signed-off-by: Zack Rusin <zack.rusin@broadcom.com> +Maintainership update. Jeff Sipek has left VMware, Nick Shi will be +maintaining VMware PTP. + +Link: https://lkml.kernel.org/r/20240402232334.33167-1-alexey.makhalov@broadcom.com +Signed-off-by: Alexey Makhalov <alexey.makhalov@broadcom.com> Acked-by: Florian Fainelli <florian.fainelli@broadcom.com> -Cc: Ian Forbes <ian.forbes@broadcom.com> -Cc: Martin Krastev <martin.krastev@broadcom.com> -Cc: Maaz Mombasawala <maaz.mombasawala@broadcom.com> +Acked-by: Ajay Kaher <ajay.kaher@broadcom.com> +Acked-by: Ronak Doshi <ronak.doshi@broadcom.com> +Acked-by: Nick Shi <nick.shi@broadcom.com> +Acked-by: Bryan Tan <bryan-bt.tan@broadcom.com> +Acked-by: Vishnu Dasa <vishnu.dasa@broadcom.com> +Acked-by: Vishal Bhakta <vishal.bhakta@broadcom.com> +Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> diff --git a/txt/old/mm-secretmem-fix-gup-fast-succeeding-on-secretmem-folios.txt b/txt/old/mm-secretmem-fix-gup-fast-succeeding-on-secretmem-folios.txt index 6c3d7d409..3fb4c6650 100644 --- a/txt/old/mm-secretmem-fix-gup-fast-succeeding-on-secretmem-folios.txt +++ b/txt/old/mm-secretmem-fix-gup-fast-succeeding-on-secretmem-folios.txt @@ -1,25 +1,34 @@ From: David Hildenbrand <david@redhat.com> Subject: mm/secretmem: fix GUP-fast succeeding on secretmem folios -Date: Mon, 25 Mar 2024 14:41:12 +0100 +Date: Tue, 26 Mar 2024 15:32:08 +0100 -folio_is_secretmem() states that secretmem folios cannot be LRU folios: so -we may only exit early if we find an LRU folio. Yet, we exit early if we -find a folio that is not a secretmem folio. +folio_is_secretmem() currently relies on secretmem folios being LRU +folios, to save some cycles. -Consequently, folio_is_secretmem() fails to detect secretmem folios and, -therefore, we can succeed in grabbing a secretmem folio during GUP-fast, -crashing the kernel when we later try reading/writing to the folio, -because the folio has been unmapped from the directmap. +However, folios might reside in a folio batch without the LRU flag set, or +temporarily have their LRU flag cleared. Consequently, the LRU flag is +unreliable for this purpose. -Link: https://lkml.kernel.org/r/20240325134114.257544-2-david@redhat.com +In particular, this is the case when secretmem_fault() allocates a fresh +page and calls filemap_add_folio()->folio_add_lru(). The folio might be +added to the per-cpu folio batch and won't get the LRU flag set until the +batch was drained using e.g., lru_add_drain(). + +Consequently, folio_is_secretmem() might not detect secretmem folios and +GUP-fast can succeed in grabbing a secretmem folio, crashing the kernel +when we would later try reading/writing to the folio, because the folio +has been unmapped from the directmap. + +Fix it by removing that unreliable check. + +Link: https://lkml.kernel.org/r/20240326143210.291116-2-david@redhat.com Fixes: 1507f51255c9 ("mm: introduce memfd_secret system call to create "secret" memory areas") Signed-off-by: David Hildenbrand <david@redhat.com> Reported-by: xingwei lee <xrivendell7@gmail.com> Reported-by: yue sun <samsun1006219@gmail.com> Closes: https://lore.kernel.org/lkml/CABOYnLyevJeravW=QrH0JUPYEcDN160aZFb7kwndm-J2rmz0HQ@mail.gmail.com/ Debugged-by: Miklos Szeredi <miklos@szeredi.hu> -Reviewed-by: Mike Rapoport (IBM) <rppt@kernel.org> Tested-by: Miklos Szeredi <mszeredi@redhat.com> +Reviewed-by: Mike Rapoport (IBM) <rppt@kernel.org> Cc: Lorenzo Stoakes <lstoakes@gmail.com> -Cc: "Mike Rapoport (IBM)" <rppt@kernel.org> Cc: <stable@vger.kernel.org> diff --git a/txt/mm-vmalloc-bail-out-early-in-find_vmap_area-if-vmap-is-not-init.txt b/txt/old/mm-vmalloc-bail-out-early-in-find_vmap_area-if-vmap-is-not-init.txt index 75351b174..75351b174 100644 --- a/txt/mm-vmalloc-bail-out-early-in-find_vmap_area-if-vmap-is-not-init.txt +++ b/txt/old/mm-vmalloc-bail-out-early-in-find_vmap_area-if-vmap-is-not-init.txt diff --git a/txt/mm-vmalloc-fix-lockdep-warning-fix.txt b/txt/old/mm-vmalloc-fix-lockdep-warning-fix.txt index c1c889261..c1c889261 100644 --- a/txt/mm-vmalloc-fix-lockdep-warning-fix.txt +++ b/txt/old/mm-vmalloc-fix-lockdep-warning-fix.txt diff --git a/txt/mm-vmalloc-fix-lockdep-warning.txt b/txt/old/mm-vmalloc-fix-lockdep-warning.txt index a1543b263..a1543b263 100644 --- a/txt/mm-vmalloc-fix-lockdep-warning.txt +++ b/txt/old/mm-vmalloc-fix-lockdep-warning.txt diff --git a/txt/selftests-mm-include-stringsh-for-ffsl.txt b/txt/old/selftests-mm-include-stringsh-for-ffsl.txt index 151cf0db1..151cf0db1 100644 --- a/txt/selftests-mm-include-stringsh-for-ffsl.txt +++ b/txt/old/selftests-mm-include-stringsh-for-ffsl.txt diff --git a/txt/stackdepot-rename-pool_index-to-pool_index_plus_1.txt b/txt/old/stackdepot-rename-pool_index-to-pool_index_plus_1.txt index 35b3e10b5..35b3e10b5 100644 --- a/txt/stackdepot-rename-pool_index-to-pool_index_plus_1.txt +++ b/txt/old/stackdepot-rename-pool_index-to-pool_index_plus_1.txt diff --git a/txt/x86-mm-pat-fix-vm_pat-handling-in-cow-mappings.txt b/txt/old/x86-mm-pat-fix-vm_pat-handling-in-cow-mappings.txt index 73816f472..73816f472 100644 --- a/txt/x86-mm-pat-fix-vm_pat-handling-in-cow-mappings.txt +++ b/txt/old/x86-mm-pat-fix-vm_pat-handling-in-cow-mappings.txt diff --git a/txt/proc-rewrite-stable_page_flags-fix-2.txt b/txt/proc-rewrite-stable_page_flags-fix-2.txt new file mode 100644 index 000000000..a48352c42 --- /dev/null +++ b/txt/proc-rewrite-stable_page_flags-fix-2.txt @@ -0,0 +1,12 @@ +From: Matthew Wilcox <willy@infradead.org> +Subject: proc-rewrite-stable_page_flags-fix-2 +Date: Fri, 5 Apr 2024 20:23:32 +0100 + +fix warnings + +fs/proc/page.c:156 stable_page_flags() warn: bit shifter 'PG_lru' used for logical '&' +fs/proc/page.c:207 stable_page_flags() warn: bit shifter 'KPF_HUGE' used for logical '&' + +Link: https://lkml.kernel.org/r/ZhBPtCYfSuFuUMEz@casper.infradead.org +Signed-off-by: Matthew Wilcox <willy@infradead.org> +Reported-by: Dan Carpenter <dan.carpenter@linaro.org> |