From 25efb2ffdf991177e740b2f63e92b4ec7d310a92 Mon Sep 17 00:00:00 2001 From: Simon Gander Date: Fri, 10 Apr 2020 14:32:16 -0700 Subject: hfsplus: fix crash and filesystem corruption when deleting files When removing files containing extended attributes, the hfsplus driver may remove the wrong entries from the attributes b-tree, causing major filesystem damage and in some cases even kernel crashes. To remove a file, all its extended attributes have to be removed as well. The driver does this by looking up all keys in the attributes b-tree with the cnid of the file. Each of these entries then gets deleted using the key used for searching, which doesn't contain the attribute's name when it should. Since the key doesn't contain the name, the deletion routine will not find the correct entry and instead remove the one in front of it. If parent nodes have to be modified, these become corrupt as well. This causes invalid links and unsorted entries that not even macOS's fsck_hfs is able to fix. To fix this, modify the search key before an entry is deleted from the attributes b-tree by copying the found entry's key into the search key, therefore ensuring that the correct entry gets removed from the tree. Signed-off-by: Simon Gander Signed-off-by: Andrew Morton Reviewed-by: Anton Altaparmakov Cc: Link: http://lkml.kernel.org/r/20200327155541.1521-1-simon@tuxera.com Signed-off-by: Linus Torvalds --- fs/hfsplus/attributes.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/hfsplus/attributes.c b/fs/hfsplus/attributes.c index e6d554476db41c..eeebe80c6be4aa 100644 --- a/fs/hfsplus/attributes.c +++ b/fs/hfsplus/attributes.c @@ -292,6 +292,10 @@ static int __hfsplus_delete_attr(struct inode *inode, u32 cnid, return -ENOENT; } + /* Avoid btree corruption */ + hfs_bnode_read(fd->bnode, fd->search_key, + fd->keyoffset, fd->keylength); + err = hfs_brec_remove(fd); if (err) return err; -- cgit 1.2.3-korg From 9b8b17541f13809d06f6f873325305ddbb760e3e Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 10 Apr 2020 14:32:19 -0700 Subject: mm, memcg: do not high throttle allocators based on wraparound If a cgroup violates its memory.high constraints, we may end up unduly penalising it. For example, for the following hierarchy: A: max high, 20 usage A/B: 9 high, 10 usage A/C: max high, 10 usage We would end up doing the following calculation below when calculating high delay for A/B: A/B: 10 - 9 = 1... A: 20 - PAGE_COUNTER_MAX = 21, so set max_overage to 21. This gets worse with higher disparities in usage in the parent. I have no idea how this disappeared from the final version of the patch, but it is certainly Not Good(tm). This wasn't obvious in testing because, for a simple cgroup hierarchy with only one child, the result is usually roughly the same. It's only in more complex hierarchies that things go really awry (although still, the effects are limited to a maximum of 2 seconds in schedule_timeout_killable at a maximum). [chris@chrisdown.name: changelog] Fixes: e26733e0d0ec ("mm, memcg: throttle allocators based on ancestral memory.high") Signed-off-by: Jakub Kicinski Signed-off-by: Chris Down Signed-off-by: Andrew Morton Acked-by: Michal Hocko Cc: Johannes Weiner Cc: [5.4.x] Link: http://lkml.kernel.org/r/20200331152424.GA1019937@chrisdown.name Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 05b4ec2c6499da..5beea03dd58ad8 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2336,6 +2336,9 @@ static unsigned long calculate_high_delay(struct mem_cgroup *memcg, usage = page_counter_read(&memcg->memory); high = READ_ONCE(memcg->high); + if (usage <= high) + continue; + /* * Prevent division by 0 in overage calculation by acting as if * it was a threshold of 1 page -- cgit 1.2.3-korg From b991cee567bf045097d9426719d7f1477bd7dc59 Mon Sep 17 00:00:00 2001 From: Qiujun Huang Date: Fri, 10 Apr 2020 14:32:22 -0700 Subject: mm, slab_common: fix a typo in comment "eariler"->"earlier" There is a typo in comment, fix it. s/eariler/earlier/ Signed-off-by: Qiujun Huang Signed-off-by: Andrew Morton Reviewed-by: Andrew Morton Acked-by: Christoph Lameter Link: http://lkml.kernel.org/r/20200405160544.1246-1-hqjagain@gmail.com Signed-off-by: Linus Torvalds --- mm/slab_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/slab_common.c b/mm/slab_common.c index 93ec4a574d8d5d..23c7500eea7d91 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -731,7 +731,7 @@ static void kmemcg_rcufn(struct rcu_head *head) /* * We need to grab blocking locks. Bounce to ->work. The * work item shares the space with the RCU head and can't be - * initialized eariler. + * initialized earlier. */ INIT_WORK(&s->memcg_params.work, kmemcg_workfn); queue_work(memcg_kmem_cache_wq, &s->memcg_params.work); -- cgit 1.2.3-korg From 2370ae4b1d5aa7eb70bd7539a420e791d4b0123b Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 10 Apr 2020 14:32:25 -0700 Subject: docs: mm: slab.h: fix a broken cross-reference There is a typo at the cross-reference link, causing this warning: include/linux/slab.h:11: WARNING: undefined label: memory-allocation (if the link has no caption the label must precede a section header) Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Andrew Morton Cc: Jonathan Corbet Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Link: http://lkml.kernel.org/r/0aeac24235d356ebd935d11e147dcc6edbb6465c.1586359676.git.mchehab+huawei@kernel.org Signed-off-by: Linus Torvalds --- include/linux/slab.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/slab.h b/include/linux/slab.h index 03a389358562ab..6d454886bcafbe 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -501,7 +501,7 @@ static __always_inline void *kmalloc_large(size_t size, gfp_t flags) * :ref:`Documentation/core-api/mm-api.rst ` * * The recommended usage of the @flags is described at - * :ref:`Documentation/core-api/memory-allocation.rst ` + * :ref:`Documentation/core-api/memory-allocation.rst ` * * Below is a brief outline of the most useful GFP flags * -- cgit 1.2.3-korg From e6a0a7ad1c2b6608e294fbbce60c42ba5a1304ce Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 10 Apr 2020 14:32:29 -0700 Subject: mm/page_alloc.c: fix kernel-doc warning Add description of function parameter 'mt' to fix kernel-doc warning: mm/page_alloc.c:3246: warning: Function parameter or member 'mt' not described in '__putback_isolated_page' Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Acked-by: Pankaj Gupta Link: http://lkml.kernel.org/r/02998bd4-0b82-2f15-2570-f86130304d1e@infradead.org Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 114c56c3685d9f..d98441ab103605 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3224,6 +3224,7 @@ int __isolate_free_page(struct page *page, unsigned int order) * __putback_isolated_page - Return a now-isolated page back where we got it * @page: Page that was isolated * @order: Order of the isolated page + * @mt: The page's pageblock's migratetype * * This function is meant to return a page pulled from the free lists via * __isolate_free_page back to the free lists they were pulled from. -- cgit 1.2.3-korg From 8b885f53b03e3b14003083daf64a6ed16bf561b3 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Fri, 10 Apr 2020 14:32:32 -0700 Subject: mm/page_alloc: make pcpu_drain_mutex and pcpu_drain static Fix the following sparse warning: mm/page_alloc.c:106:1: warning: symbol 'pcpu_drain_mutex' was not declared. Should it be static? mm/page_alloc.c:107:1: warning: symbol '__pcpu_scope_pcpu_drain' was not declared. Should it be static? Reported-by: Hulk Robot Signed-off-by: Jason Yan Signed-off-by: Andrew Morton Link: http://lkml.kernel.org/r/20200407023925.46438-1-yanaijie@huawei.com Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index d98441ab103605..69827d4fa0527d 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -103,8 +103,8 @@ struct pcpu_drain { struct zone *zone; struct work_struct work; }; -DEFINE_MUTEX(pcpu_drain_mutex); -DEFINE_PER_CPU(struct pcpu_drain, pcpu_drain); +static DEFINE_MUTEX(pcpu_drain_mutex); +static DEFINE_PER_CPU(struct pcpu_drain, pcpu_drain); #ifdef CONFIG_GCC_PLUGIN_LATENT_ENTROPY volatile unsigned long latent_entropy __latent_entropy; -- cgit 1.2.3-korg From 783fda856e1034dee90a873f7654c418212d12d7 Mon Sep 17 00:00:00 2001 From: Changwei Ge Date: Fri, 10 Apr 2020 14:32:38 -0700 Subject: ocfs2: no need try to truncate file beyond i_size Linux fallocate(2) with FALLOC_FL_PUNCH_HOLE mode set, its offset can exceed the inode size. Ocfs2 now doesn't allow that offset beyond inode size. This restriction is not necessary and violates fallocate(2) semantics. If fallocate(2) offset is beyond inode size, just return success and do nothing further. Otherwise, ocfs2 will crash the kernel. kernel BUG at fs/ocfs2//alloc.c:7264! ocfs2_truncate_inline+0x20f/0x360 [ocfs2] ocfs2_remove_inode_range+0x23c/0xcb0 [ocfs2] __ocfs2_change_file_space+0x4a5/0x650 [ocfs2] ocfs2_fallocate+0x83/0xa0 [ocfs2] vfs_fallocate+0x148/0x230 SyS_fallocate+0x48/0x80 do_syscall_64+0x79/0x170 Signed-off-by: Changwei Ge Signed-off-by: Andrew Morton Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Gang He Cc: Jun Piao Cc: Link: http://lkml.kernel.org/r/20200407082754.17565-1-chge@linux.alibaba.com Signed-off-by: Linus Torvalds --- fs/ocfs2/alloc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 65b3abbcce4ef3..2f834add165bf0 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -7402,6 +7402,10 @@ int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh, struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; struct ocfs2_inline_data *idata = &di->id2.i_data; + /* No need to punch hole beyond i_size. */ + if (start >= i_size_read(inode)) + return 0; + if (end > i_size_read(inode)) end = i_size_read(inode); -- cgit 1.2.3-korg From 8676af1ff2d28e64e5636147821bda7524cf007d Mon Sep 17 00:00:00 2001 From: Aslan Bakirov Date: Fri, 10 Apr 2020 14:32:42 -0700 Subject: mm: cma: NUMA node interface I've noticed that there is no interface exposed by CMA which would let me to declare contigous memory on particular NUMA node. This patchset adds the ability to try to allocate contiguous memory on a specific node. It will fallback to other nodes if the specified one doesn't work. Implement a new method for declaring contigous memory on particular node and keep cma_declare_contiguous() as a wrapper. [akpm@linux-foundation.org: build fix] Signed-off-by: Aslan Bakirov Signed-off-by: Roman Gushchin Signed-off-by: Andrew Morton Acked-by: Michal Hocko Cc: Andreas Schaufler Cc: Mike Kravetz Cc: Rik van Riel Cc: Joonsoo Kim Link: http://lkml.kernel.org/r/20200407163840.92263-2-guro@fb.com Signed-off-by: Linus Torvalds --- include/linux/cma.h | 14 ++++++++++++-- include/linux/memblock.h | 3 +++ mm/cma.c | 16 +++++++++------- mm/memblock.c | 2 +- 4 files changed, 25 insertions(+), 10 deletions(-) diff --git a/include/linux/cma.h b/include/linux/cma.h index 190184b5ff32d8..6ff79fefd01fd2 100644 --- a/include/linux/cma.h +++ b/include/linux/cma.h @@ -4,6 +4,7 @@ #include #include +#include /* * There is always at least global CMA area and a few optional @@ -24,10 +25,19 @@ extern phys_addr_t cma_get_base(const struct cma *cma); extern unsigned long cma_get_size(const struct cma *cma); extern const char *cma_get_name(const struct cma *cma); -extern int __init cma_declare_contiguous(phys_addr_t base, +extern int __init cma_declare_contiguous_nid(phys_addr_t base, phys_addr_t size, phys_addr_t limit, phys_addr_t alignment, unsigned int order_per_bit, - bool fixed, const char *name, struct cma **res_cma); + bool fixed, const char *name, struct cma **res_cma, + int nid); +static inline int __init cma_declare_contiguous(phys_addr_t base, + phys_addr_t size, phys_addr_t limit, + phys_addr_t alignment, unsigned int order_per_bit, + bool fixed, const char *name, struct cma **res_cma) +{ + return cma_declare_contiguous_nid(base, size, limit, alignment, + order_per_bit, fixed, name, res_cma, NUMA_NO_NODE); +} extern int cma_init_reserved_mem(phys_addr_t base, phys_addr_t size, unsigned int order_per_bit, const char *name, diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 079d17d964101e..6bc37a731d27bd 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -348,6 +348,9 @@ static inline int memblock_get_region_node(const struct memblock_region *r) phys_addr_t memblock_phys_alloc_range(phys_addr_t size, phys_addr_t align, phys_addr_t start, phys_addr_t end); +phys_addr_t memblock_alloc_range_nid(phys_addr_t size, + phys_addr_t align, phys_addr_t start, + phys_addr_t end, int nid, bool exact_nid); phys_addr_t memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid); static inline phys_addr_t memblock_phys_alloc(phys_addr_t size, diff --git a/mm/cma.c b/mm/cma.c index be55d1988c6754..0463ad2ce06b75 100644 --- a/mm/cma.c +++ b/mm/cma.c @@ -220,7 +220,7 @@ int __init cma_init_reserved_mem(phys_addr_t base, phys_addr_t size, } /** - * cma_declare_contiguous() - reserve custom contiguous area + * cma_declare_contiguous_nid() - reserve custom contiguous area * @base: Base address of the reserved area optional, use 0 for any * @size: Size of the reserved area (in bytes), * @limit: End address of the reserved memory (optional, 0 for any). @@ -229,6 +229,7 @@ int __init cma_init_reserved_mem(phys_addr_t base, phys_addr_t size, * @fixed: hint about where to place the reserved area * @name: The name of the area. See function cma_init_reserved_mem() * @res_cma: Pointer to store the created cma region. + * @nid: nid of the free area to find, %NUMA_NO_NODE for any node * * This function reserves memory from early allocator. It should be * called by arch specific code once the early allocator (memblock or bootmem) @@ -238,10 +239,11 @@ int __init cma_init_reserved_mem(phys_addr_t base, phys_addr_t size, * If @fixed is true, reserve contiguous area at exactly @base. If false, * reserve in range from @base to @limit. */ -int __init cma_declare_contiguous(phys_addr_t base, +int __init cma_declare_contiguous_nid(phys_addr_t base, phys_addr_t size, phys_addr_t limit, phys_addr_t alignment, unsigned int order_per_bit, - bool fixed, const char *name, struct cma **res_cma) + bool fixed, const char *name, struct cma **res_cma, + int nid) { phys_addr_t memblock_end = memblock_end_of_DRAM(); phys_addr_t highmem_start; @@ -336,14 +338,14 @@ int __init cma_declare_contiguous(phys_addr_t base, * memory in case of failure. */ if (base < highmem_start && limit > highmem_start) { - addr = memblock_phys_alloc_range(size, alignment, - highmem_start, limit); + addr = memblock_alloc_range_nid(size, alignment, + highmem_start, limit, nid, false); limit = highmem_start; } if (!addr) { - addr = memblock_phys_alloc_range(size, alignment, base, - limit); + addr = memblock_alloc_range_nid(size, alignment, base, + limit, nid, false); if (!addr) { ret = -ENOMEM; goto err; diff --git a/mm/memblock.c b/mm/memblock.c index 4d06bbaded0fd8..c79ba6f9920c5d 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -1349,7 +1349,7 @@ __next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone, * Return: * Physical address of allocated memory block on success, %0 on failure. */ -static phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size, +phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size, phys_addr_t align, phys_addr_t start, phys_addr_t end, int nid, bool exact_nid) -- cgit 1.2.3-korg From cf11e85fc08cc6a4fe3ac2ba2e610c962bf20bc3 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Fri, 10 Apr 2020 14:32:45 -0700 Subject: mm: hugetlb: optionally allocate gigantic hugepages using cma Commit 944d9fec8d7a ("hugetlb: add support for gigantic page allocation at runtime") has added the run-time allocation of gigantic pages. However it actually works only at early stages of the system loading, when the majority of memory is free. After some time the memory gets fragmented by non-movable pages, so the chances to find a contiguous 1GB block are getting close to zero. Even dropping caches manually doesn't help a lot. At large scale rebooting servers in order to allocate gigantic hugepages is quite expensive and complex. At the same time keeping some constant percentage of memory in reserved hugepages even if the workload isn't using it is a big waste: not all workloads can benefit from using 1 GB pages. The following solution can solve the problem: 1) On boot time a dedicated cma area* is reserved. The size is passed as a kernel argument. 2) Run-time allocations of gigantic hugepages are performed using the cma allocator and the dedicated cma area In this case gigantic hugepages can be allocated successfully with a high probability, however the memory isn't completely wasted if nobody is using 1GB hugepages: it can be used for pagecache, anon memory, THPs, etc. * On a multi-node machine a per-node cma area is allocated on each node. Following gigantic hugetlb allocation are using the first available numa node if the mask isn't specified by a user. Usage: 1) configure the kernel to allocate a cma area for hugetlb allocations: pass hugetlb_cma=10G as a kernel argument 2) allocate hugetlb pages as usual, e.g. echo 10 > /sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages If the option isn't enabled or the allocation of the cma area failed, the current behavior of the system is preserved. x86 and arm-64 are covered by this patch, other architectures can be trivially added later. The patch contains clean-ups and fixes proposed and implemented by Aslan Bakirov and Randy Dunlap. It also contains ideas and suggestions proposed by Rik van Riel, Michal Hocko and Mike Kravetz. Thanks! Signed-off-by: Roman Gushchin Signed-off-by: Andrew Morton Tested-by: Andreas Schaufler Acked-by: Mike Kravetz Acked-by: Michal Hocko Cc: Aslan Bakirov Cc: Randy Dunlap Cc: Rik van Riel Cc: Joonsoo Kim Link: http://lkml.kernel.org/r/20200407163840.92263-3-guro@fb.com Signed-off-by: Linus Torvalds --- Documentation/admin-guide/kernel-parameters.txt | 8 ++ arch/arm64/mm/init.c | 6 ++ arch/x86/kernel/setup.c | 4 + include/linux/hugetlb.h | 12 +++ mm/hugetlb.c | 109 ++++++++++++++++++++++++ 5 files changed, 139 insertions(+) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 86aae1fa099a7b..d7df9a8302c492 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1475,6 +1475,14 @@ hpet_mmap= [X86, HPET_MMAP] Allow userspace to mmap HPET registers. Default set by CONFIG_HPET_MMAP_DEFAULT. + hugetlb_cma= [HW] The size of a cma area used for allocation + of gigantic hugepages. + Format: nn[KMGTPE] + + Reserve a cma area of given size and allocate gigantic + hugepages using the cma allocator. If enabled, the + boot-time allocation of gigantic hugepages is skipped. + hugepages= [HW,X86-32,IA-64] HugeTLB pages to allocate at boot. hugepagesz= [HW,IA-64,PPC,X86-64] The size of the HugeTLB pages. On x86-64 and powerpc, this option can be specified diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index b65dffdfb20190..e42727e3568ea9 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -457,6 +458,11 @@ void __init arm64_memblock_init(void) high_memory = __va(memblock_end_of_DRAM() - 1) + 1; dma_contiguous_reserve(arm64_dma32_phys_limit); + +#ifdef CONFIG_ARM64_4K_PAGES + hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT); +#endif + } void __init bootmem_init(void) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index e6b545047f3811..4b3fa6cd3106d4 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -1157,6 +1158,9 @@ void __init setup_arch(char **cmdline_p) initmem_init(); dma_contiguous_reserve(max_pfn_mapped << PAGE_SHIFT); + if (boot_cpu_has(X86_FEATURE_GBPAGES)) + hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT); + /* * Reserve memory for crash kernel after SRAT is parsed so that it * won't consume hotpluggable memory. diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 5ea05879a0a94a..43a1cef8f0f163 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -895,4 +895,16 @@ static inline spinlock_t *huge_pte_lock(struct hstate *h, return ptl; } +#if defined(CONFIG_HUGETLB_PAGE) && defined(CONFIG_CMA) +extern void __init hugetlb_cma_reserve(int order); +extern void __init hugetlb_cma_check(void); +#else +static inline __init void hugetlb_cma_reserve(int order) +{ +} +static inline __init void hugetlb_cma_check(void) +{ +} +#endif + #endif /* _LINUX_HUGETLB_H */ diff --git a/mm/hugetlb.c b/mm/hugetlb.c index f5fb53fdfa02d2..cd459155d28a7a 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -44,6 +45,9 @@ int hugetlb_max_hstate __read_mostly; unsigned int default_hstate_idx; struct hstate hstates[HUGE_MAX_HSTATE]; + +static struct cma *hugetlb_cma[MAX_NUMNODES]; + /* * Minimum page order among possible hugepage sizes, set to a proper value * at boot time. @@ -1228,6 +1232,14 @@ static void destroy_compound_gigantic_page(struct page *page, static void free_gigantic_page(struct page *page, unsigned int order) { + /* + * If the page isn't allocated using the cma allocator, + * cma_release() returns false. + */ + if (IS_ENABLED(CONFIG_CMA) && + cma_release(hugetlb_cma[page_to_nid(page)], page, 1 << order)) + return; + free_contig_range(page_to_pfn(page), 1 << order); } @@ -1237,6 +1249,21 @@ static struct page *alloc_gigantic_page(struct hstate *h, gfp_t gfp_mask, { unsigned long nr_pages = 1UL << huge_page_order(h); + if (IS_ENABLED(CONFIG_CMA)) { + struct page *page; + int node; + + for_each_node_mask(node, *nodemask) { + if (!hugetlb_cma[node]) + continue; + + page = cma_alloc(hugetlb_cma[node], nr_pages, + huge_page_order(h), true); + if (page) + return page; + } + } + return alloc_contig_pages(nr_pages, gfp_mask, nid, nodemask); } @@ -1281,8 +1308,14 @@ static void update_and_free_page(struct hstate *h, struct page *page) set_compound_page_dtor(page, NULL_COMPOUND_DTOR); set_page_refcounted(page); if (hstate_is_gigantic(h)) { + /* + * Temporarily drop the hugetlb_lock, because + * we might block in free_gigantic_page(). + */ + spin_unlock(&hugetlb_lock); destroy_compound_gigantic_page(page, huge_page_order(h)); free_gigantic_page(page, huge_page_order(h)); + spin_lock(&hugetlb_lock); } else { __free_pages(page, huge_page_order(h)); } @@ -2539,6 +2572,10 @@ static void __init hugetlb_hstate_alloc_pages(struct hstate *h) for (i = 0; i < h->max_huge_pages; ++i) { if (hstate_is_gigantic(h)) { + if (IS_ENABLED(CONFIG_CMA) && hugetlb_cma[0]) { + pr_warn_once("HugeTLB: hugetlb_cma is enabled, skip boot time allocation\n"); + break; + } if (!alloc_bootmem_huge_page(h)) break; } else if (!alloc_pool_huge_page(h, @@ -3194,6 +3231,7 @@ static int __init hugetlb_init(void) default_hstate.max_huge_pages = default_hstate_max_huge_pages; } + hugetlb_cma_check(); hugetlb_init_hstates(); gather_bootmem_prealloc(); report_hugepages(); @@ -5506,3 +5544,74 @@ void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason) spin_unlock(&hugetlb_lock); } } + +#ifdef CONFIG_CMA +static unsigned long hugetlb_cma_size __initdata; +static bool cma_reserve_called __initdata; + +static int __init cmdline_parse_hugetlb_cma(char *p) +{ + hugetlb_cma_size = memparse(p, &p); + return 0; +} + +early_param("hugetlb_cma", cmdline_parse_hugetlb_cma); + +void __init hugetlb_cma_reserve(int order) +{ + unsigned long size, reserved, per_node; + int nid; + + cma_reserve_called = true; + + if (!hugetlb_cma_size) + return; + + if (hugetlb_cma_size < (PAGE_SIZE << order)) { + pr_warn("hugetlb_cma: cma area should be at least %lu MiB\n", + (PAGE_SIZE << order) / SZ_1M); + return; + } + + /* + * If 3 GB area is requested on a machine with 4 numa nodes, + * let's allocate 1 GB on first three nodes and ignore the last one. + */ + per_node = DIV_ROUND_UP(hugetlb_cma_size, nr_online_nodes); + pr_info("hugetlb_cma: reserve %lu MiB, up to %lu MiB per node\n", + hugetlb_cma_size / SZ_1M, per_node / SZ_1M); + + reserved = 0; + for_each_node_state(nid, N_ONLINE) { + int res; + + size = min(per_node, hugetlb_cma_size - reserved); + size = round_up(size, PAGE_SIZE << order); + + res = cma_declare_contiguous_nid(0, size, 0, PAGE_SIZE << order, + 0, false, "hugetlb", + &hugetlb_cma[nid], nid); + if (res) { + pr_warn("hugetlb_cma: reservation failed: err %d, node %d", + res, nid); + continue; + } + + reserved += size; + pr_info("hugetlb_cma: reserved %lu MiB on node %d\n", + size / SZ_1M, nid); + + if (reserved >= hugetlb_cma_size) + break; + } +} + +void __init hugetlb_cma_check(void) +{ + if (!hugetlb_cma_size || cma_reserve_called) + return; + + pr_warn("hugetlb_cma: the option isn't supported by current arch\n"); +} + +#endif /* CONFIG_CMA */ -- cgit 1.2.3-korg From 09ef5283fd96ac424ef0e569626f359bf9ab86c9 Mon Sep 17 00:00:00 2001 From: Jaewon Kim Date: Fri, 10 Apr 2020 14:32:48 -0700 Subject: mm/mmap.c: initialize align_offset explicitly for vm_unmapped_area On passing requirement to vm_unmapped_area, arch_get_unmapped_area and arch_get_unmapped_area_topdown did not set align_offset. Internally on both unmapped_area and unmapped_area_topdown, if info->align_mask is 0, then info->align_offset was meaningless. But commit df529cabb7a2 ("mm: mmap: add trace point of vm_unmapped_area") always prints info->align_offset even though it is uninitialized. Fix this uninitialized value issue by setting it to 0 explicitly. Before: vm_unmapped_area: addr=0x755b155000 err=0 total_vm=0x15aaf0 flags=0x1 len=0x109000 lo=0x8000 hi=0x75eed48000 mask=0x0 ofs=0x4022 After: vm_unmapped_area: addr=0x74a4ca1000 err=0 total_vm=0x168ab1 flags=0x1 len=0x9000 lo=0x8000 hi=0x753d94b000 mask=0x0 ofs=0x0 Signed-off-by: Jaewon Kim Signed-off-by: Andrew Morton Reviewed-by: Andrew Morton Cc: Matthew Wilcox (Oracle) Cc: Michel Lespinasse Cc: Borislav Petkov Link: http://lkml.kernel.org/r/20200409094035.19457-1-jaewon31.kim@samsung.com Signed-off-by: Linus Torvalds --- mm/mmap.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/mmap.c b/mm/mmap.c index 8d77dbbb80fe72..de07bbc0e21f63 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2123,6 +2123,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, info.low_limit = mm->mmap_base; info.high_limit = mmap_end; info.align_mask = 0; + info.align_offset = 0; return vm_unmapped_area(&info); } #endif @@ -2164,6 +2165,7 @@ arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr, info.low_limit = max(PAGE_SIZE, mmap_min_addr); info.high_limit = arch_get_mmap_base(addr, mm->mmap_base); info.align_mask = 0; + info.align_offset = 0; addr = vm_unmapped_area(&info); /* -- cgit 1.2.3-korg From 8efd6f5b1732c4ac88b4bb6908d481d95804fa1c Mon Sep 17 00:00:00 2001 From: Arjun Roy Date: Fri, 10 Apr 2020 14:32:51 -0700 Subject: mm/memory.c: refactor insert_page to prepare for batched-lock insert Add helper methods for vm_insert_page()/insert_page() to prepare for vm_insert_pages(), which batch-inserts pages to reduce spinlock operations when inserting multiple consecutive pages into the user page table. The intention of this patch-set is to reduce atomic ops for tcp zerocopy receives, which normally hits the same spinlock multiple times consecutively. Signed-off-by: Arjun Roy Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: Andrew Morton Cc: David Miller Cc: Matthew Wilcox Cc: Jason Gunthorpe Cc: Stephen Rothwell Link: http://lkml.kernel.org/r/20200128025958.43490-1-arjunroy.kdev@gmail.com Signed-off-by: Linus Torvalds --- mm/memory.c | 39 ++++++++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 19874d133a66fc..52a3303458cb6c 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1442,6 +1442,27 @@ pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr, return pte_alloc_map_lock(mm, pmd, addr, ptl); } +static int validate_page_before_insert(struct page *page) +{ + if (PageAnon(page) || PageSlab(page) || page_has_type(page)) + return -EINVAL; + flush_dcache_page(page); + return 0; +} + +static int insert_page_into_pte_locked(struct mm_struct *mm, pte_t *pte, + unsigned long addr, struct page *page, pgprot_t prot) +{ + if (!pte_none(*pte)) + return -EBUSY; + /* Ok, finally just insert the thing.. */ + get_page(page); + inc_mm_counter_fast(mm, mm_counter_file(page)); + page_add_file_rmap(page, false); + set_pte_at(mm, addr, pte, mk_pte(page, prot)); + return 0; +} + /* * This is the old fallback for page remapping. * @@ -1457,26 +1478,14 @@ static int insert_page(struct vm_area_struct *vma, unsigned long addr, pte_t *pte; spinlock_t *ptl; - retval = -EINVAL; - if (PageAnon(page) || PageSlab(page) || page_has_type(page)) + retval = validate_page_before_insert(page); + if (retval) goto out; retval = -ENOMEM; - flush_dcache_page(page); pte = get_locked_pte(mm, addr, &ptl); if (!pte) goto out; - retval = -EBUSY; - if (!pte_none(*pte)) - goto out_unlock; - - /* Ok, finally just insert the thing.. */ - get_page(page); - inc_mm_counter_fast(mm, mm_counter_file(page)); - page_add_file_rmap(page, false); - set_pte_at(mm, addr, pte, mk_pte(page, prot)); - - retval = 0; -out_unlock: + retval = insert_page_into_pte_locked(mm, pte, addr, page, prot); pte_unmap_unlock(pte, ptl); out: return retval; -- cgit 1.2.3-korg From 251a0ffeaeee2a900765d98d44880943dce1047d Mon Sep 17 00:00:00 2001 From: Arjun Roy Date: Fri, 10 Apr 2020 14:32:54 -0700 Subject: mm: bring sparc pte_index() semantics inline with other platforms pte_index() on platforms other than sparc return a numerical index. On sparc, it returns a pte_t*. This presents an issue for vm_insert_pages(), which relies on pte_index() to find the offset for a pte within a pmd, for batched inserts. This patch: 1. Modifies pte_index() for sparc to return a numerical index, like other platforms, 2. Defines pte_entry() for sparc which returns a pte_t* (as pte_index() used to), 3. Converts existing sparc callers for pte_index() to use pte_entry(). [sfr@canb.auug.org.au: remove pte_entry and just directly modified pte_offset_kernel instead] Signed-off-by: Arjun Roy Signed-off-by: Stephen Rothwell Signed-off-by: Andrew Morton Reviewed-by: Mike Rapoport Cc: Eric Dumazet Cc: Soheil Hassas Yeganeh Cc: David Miller Cc: Matthew Wilcox Cc: Arjun Roy Cc: Jason Gunthorpe Link: http://lkml.kernel.org/r/20200227105045.6b421d9f@canb.auug.org.au Signed-off-by: Linus Torvalds --- arch/sparc/include/asm/pgtable_64.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index 65494c3a420ef9..da527b27cf7dc3 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -907,11 +907,11 @@ static inline unsigned long pud_pfn(pud_t pud) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))) /* Find an entry in the third-level page table.. */ -#define pte_index(dir, address) \ - ((pte_t *) __pmd_page(*(dir)) + \ - ((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))) -#define pte_offset_kernel pte_index -#define pte_offset_map pte_index +#define pte_index(address) \ + ((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) +#define pte_offset_kernel(dir, address) \ + ((pte_t *) __pmd_page(*(dir)) + pte_index(address)) +#define pte_offset_map(dir, address) pte_offset_kernel((dir), (address)) #define pte_unmap(pte) do { } while (0) /* We cannot include at this point yet: */ -- cgit 1.2.3-korg From c97078bd219cbe1a878b24bb4e61d312f19ece1f Mon Sep 17 00:00:00 2001 From: Arjun Roy Date: Fri, 10 Apr 2020 14:32:58 -0700 Subject: mm: define pte_index as macro for x86 pte_index() is either defined as a macro (e.g. sparc64) or as an inlined function (e.g. x86). vm_insert_pages() depends on pte_index but it is not defined on all platforms (e.g. m68k). To fix compilation of vm_insert_pages() on architectures not providing pte_index(), we perform the following fix: 0. For platforms where it is meaningful, and defined as a macro, no change is needed. 1. For platforms where it is meaningful and defined as an inlined function, and we want to use it with vm_insert_pages(), we define a degenerate macro of the form: #define pte_index pte_index 2. vm_insert_pages() checks for the existence of a pte_index macro definition. If found, it implements a batched insert. If not found, it devolves to calling vm_insert_page() in a loop. This patch implements step 1 for x86. v3 of this patch fixes a compilation warning for an unused method. v2 of this patch moved a macro definition to a more readable location. Signed-off-by: Arjun Roy Signed-off-by: Andrew Morton Cc: David Miller Cc: Eric Dumazet Cc: Jason Gunthorpe Cc: Matthew Wilcox Cc: Soheil Hassas Yeganeh Cc: Stephen Rothwell Link: http://lkml.kernel.org/r/20200228054714.204424-1-arjunroy.kdev@gmail.com Signed-off-by: Linus Torvalds --- arch/x86/include/asm/pgtable.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 28838d79019156..abad0da0973a88 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -860,7 +860,10 @@ static inline unsigned long pmd_index(unsigned long address) * * this function returns the index of the entry in the pte page which would * control the given virtual address + * + * Also define macro so we can test if pte_index is defined for arch. */ +#define pte_index pte_index static inline unsigned long pte_index(unsigned long address) { return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1); -- cgit 1.2.3-korg From 8cd3984d81d5fd5e18bccb12d7d228a114ec2508 Mon Sep 17 00:00:00 2001 From: Arjun Roy Date: Fri, 10 Apr 2020 14:33:01 -0700 Subject: mm/memory.c: add vm_insert_pages() Add the ability to insert multiple pages at once to a user VM with lower PTE spinlock operations. The intention of this patch-set is to reduce atomic ops for tcp zerocopy receives, which normally hits the same spinlock multiple times consecutively. [akpm@linux-foundation.org: pte_alloc() no longer takes the `addr' argument] [arjunroy@google.com: add missing page_count() check to vm_insert_pages()] Link: http://lkml.kernel.org/r/20200214005929.104481-1-arjunroy.kdev@gmail.com [arjunroy@google.com: vm_insert_pages() checks if pte_index defined] Link: http://lkml.kernel.org/r/20200228054714.204424-2-arjunroy.kdev@gmail.com Signed-off-by: Arjun Roy Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: Andrew Morton Cc: David Miller Cc: Matthew Wilcox Cc: Jason Gunthorpe Cc: Stephen Rothwell Link: http://lkml.kernel.org/r/20200128025958.43490-2-arjunroy.kdev@gmail.com Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 + mm/memory.c | 129 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 129 insertions(+), 2 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index e2f938c5a9d8ca..ed896cedd4c417 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2689,6 +2689,8 @@ struct vm_area_struct *find_extend_vma(struct mm_struct *, unsigned long addr); int remap_pfn_range(struct vm_area_struct *, unsigned long addr, unsigned long pfn, unsigned long size, pgprot_t); int vm_insert_page(struct vm_area_struct *, unsigned long addr, struct page *); +int vm_insert_pages(struct vm_area_struct *vma, unsigned long addr, + struct page **pages, unsigned long *num); int vm_map_pages(struct vm_area_struct *vma, struct page **pages, unsigned long num); int vm_map_pages_zero(struct vm_area_struct *vma, struct page **pages, diff --git a/mm/memory.c b/mm/memory.c index 52a3303458cb6c..f703fe8c834602 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1419,8 +1419,7 @@ void zap_vma_ptes(struct vm_area_struct *vma, unsigned long address, } EXPORT_SYMBOL_GPL(zap_vma_ptes); -pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr, - spinlock_t **ptl) +static pmd_t *walk_to_pmd(struct mm_struct *mm, unsigned long addr) { pgd_t *pgd; p4d_t *p4d; @@ -1439,6 +1438,16 @@ pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr, return NULL; VM_BUG_ON(pmd_trans_huge(*pmd)); + return pmd; +} + +pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr, + spinlock_t **ptl) +{ + pmd_t *pmd = walk_to_pmd(mm, addr); + + if (!pmd) + return NULL; return pte_alloc_map_lock(mm, pmd, addr, ptl); } @@ -1491,6 +1500,122 @@ out: return retval; } +#ifdef pte_index +static int insert_page_in_batch_locked(struct mm_struct *mm, pmd_t *pmd, + unsigned long addr, struct page *page, pgprot_t prot) +{ + int err; + + if (!page_count(page)) + return -EINVAL; + err = validate_page_before_insert(page); + return err ? err : insert_page_into_pte_locked( + mm, pte_offset_map(pmd, addr), addr, page, prot); +} + +/* insert_pages() amortizes the cost of spinlock operations + * when inserting pages in a loop. Arch *must* define pte_index. + */ +static int insert_pages(struct vm_area_struct *vma, unsigned long addr, + struct page **pages, unsigned long *num, pgprot_t prot) +{ + pmd_t *pmd = NULL; + spinlock_t *pte_lock = NULL; + struct mm_struct *const mm = vma->vm_mm; + unsigned long curr_page_idx = 0; + unsigned long remaining_pages_total = *num; + unsigned long pages_to_write_in_pmd; + int ret; +more: + ret = -EFAULT; + pmd = walk_to_pmd(mm, addr); + if (!pmd) + goto out; + + pages_to_write_in_pmd = min_t(unsigned long, + remaining_pages_total, PTRS_PER_PTE - pte_index(addr)); + + /* Allocate the PTE if necessary; takes PMD lock once only. */ + ret = -ENOMEM; + if (pte_alloc(mm, pmd)) + goto out; + pte_lock = pte_lockptr(mm, pmd); + + while (pages_to_write_in_pmd) { + int pte_idx = 0; + const int batch_size = min_t(int, pages_to_write_in_pmd, 8); + + spin_lock(pte_lock); + for (; pte_idx < batch_size; ++pte_idx) { + int err = insert_page_in_batch_locked(mm, pmd, + addr, pages[curr_page_idx], prot); + if (unlikely(err)) { + spin_unlock(pte_lock); + ret = err; + remaining_pages_total -= pte_idx; + goto out; + } + addr += PAGE_SIZE; + ++curr_page_idx; + } + spin_unlock(pte_lock); + pages_to_write_in_pmd -= batch_size; + remaining_pages_total -= batch_size; + } + if (remaining_pages_total) + goto more; + ret = 0; +out: + *num = remaining_pages_total; + return ret; +} +#endif /* ifdef pte_index */ + +/** + * vm_insert_pages - insert multiple pages into user vma, batching the pmd lock. + * @vma: user vma to map to + * @addr: target start user address of these pages + * @pages: source kernel pages + * @num: in: number of pages to map. out: number of pages that were *not* + * mapped. (0 means all pages were successfully mapped). + * + * Preferred over vm_insert_page() when inserting multiple pages. + * + * In case of error, we may have mapped a subset of the provided + * pages. It is the caller's responsibility to account for this case. + * + * The same restrictions apply as in vm_insert_page(). + */ +int vm_insert_pages(struct vm_area_struct *vma, unsigned long addr, + struct page **pages, unsigned long *num) +{ +#ifdef pte_index + const unsigned long end_addr = addr + (*num * PAGE_SIZE) - 1; + + if (addr < vma->vm_start || end_addr >= vma->vm_end) + return -EFAULT; + if (!(vma->vm_flags & VM_MIXEDMAP)) { + BUG_ON(down_read_trylock(&vma->vm_mm->mmap_sem)); + BUG_ON(vma->vm_flags & VM_PFNMAP); + vma->vm_flags |= VM_MIXEDMAP; + } + /* Defer page refcount checking till we're about to map that page. */ + return insert_pages(vma, addr, pages, num, vma->vm_page_prot); +#else + unsigned long idx = 0, pgcount = *num; + int err; + + for (; idx < pgcount; ++idx) { + err = vm_insert_page(vma, addr + (PAGE_SIZE * idx), pages[idx]); + if (err) + break; + } + *num = pgcount - idx; + return err; +#endif /* ifdef pte_index */ +} +EXPORT_SYMBOL(vm_insert_pages); + /** * vm_insert_page - insert single page into user vma * @vma: user vma to map to -- cgit 1.2.3-korg From c62da0c35d58518ddb26ff641d2485596567fd96 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Fri, 10 Apr 2020 14:33:05 -0700 Subject: mm/vma: define a default value for VM_DATA_DEFAULT_FLAGS There are many platforms with exact same value for VM_DATA_DEFAULT_FLAGS This creates a default value for VM_DATA_DEFAULT_FLAGS in line with the existing VM_STACK_DEFAULT_FLAGS. While here, also define some more macros with standard VMA access flag combinations that are used frequently across many platforms. Apart from simplification, this reduces code duplication as well. Signed-off-by: Anshuman Khandual Signed-off-by: Andrew Morton Reviewed-by: Vlastimil Babka Acked-by: Geert Uytterhoeven Cc: Richard Henderson Cc: Vineet Gupta Cc: Russell King Cc: Catalin Marinas Cc: Mark Salter Cc: Guo Ren Cc: Yoshinori Sato Cc: Brian Cain Cc: Tony Luck Cc: Michal Simek Cc: Ralf Baechle Cc: Paul Burton Cc: Nick Hu Cc: Ley Foon Tan Cc: Jonas Bonn Cc: "James E.J. Bottomley" Cc: Michael Ellerman Cc: Paul Walmsley Cc: Heiko Carstens Cc: Rich Felker Cc: "David S. Miller" Cc: Guan Xuetao Cc: Thomas Gleixner Cc: Jeff Dike Cc: Chris Zankel Link: http://lkml.kernel.org/r/1583391014-8170-2-git-send-email-anshuman.khandual@arm.com Signed-off-by: Linus Torvalds --- arch/alpha/include/asm/page.h | 3 --- arch/arc/include/asm/page.h | 2 +- arch/arm/include/asm/page.h | 4 +--- arch/arm64/include/asm/page.h | 4 +--- arch/c6x/include/asm/page.h | 5 +---- arch/csky/include/asm/page.h | 3 --- arch/h8300/include/asm/page.h | 2 -- arch/hexagon/include/asm/page.h | 3 +-- arch/ia64/include/asm/page.h | 5 +---- arch/m68k/include/asm/page.h | 3 --- arch/microblaze/include/asm/page.h | 2 -- arch/mips/include/asm/page.h | 5 +---- arch/nds32/include/asm/page.h | 3 --- arch/nios2/include/asm/page.h | 3 +-- arch/openrisc/include/asm/page.h | 5 ----- arch/parisc/include/asm/page.h | 3 --- arch/powerpc/include/asm/page.h | 9 ++------- arch/powerpc/include/asm/page_64.h | 7 ++----- arch/riscv/include/asm/page.h | 3 +-- arch/s390/include/asm/page.h | 3 +-- arch/sh/include/asm/page.h | 3 --- arch/sparc/include/asm/page_32.h | 3 --- arch/sparc/include/asm/page_64.h | 3 --- arch/unicore32/include/asm/page.h | 3 --- arch/x86/include/asm/page_types.h | 4 +--- arch/x86/um/asm/vm-flags.h | 10 ++-------- arch/xtensa/include/asm/page.h | 3 --- include/linux/mm.h | 14 ++++++++++++++ 28 files changed, 31 insertions(+), 89 deletions(-) diff --git a/arch/alpha/include/asm/page.h b/arch/alpha/include/asm/page.h index f3fb2848470ab6..e241bd88880fdf 100644 --- a/arch/alpha/include/asm/page.h +++ b/arch/alpha/include/asm/page.h @@ -90,9 +90,6 @@ typedef struct page *pgtable_t; #define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) #endif /* CONFIG_DISCONTIGMEM */ -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) - #include #include diff --git a/arch/arc/include/asm/page.h b/arch/arc/include/asm/page.h index 0a32e8cfd074d7..b0dfed0f12be01 100644 --- a/arch/arc/include/asm/page.h +++ b/arch/arc/include/asm/page.h @@ -102,7 +102,7 @@ typedef pte_t * pgtable_t; #define virt_addr_valid(kaddr) pfn_valid(virt_to_pfn(kaddr)) /* Default Permissions for stack/heaps pages (Non Executable) */ -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_NON_EXEC #define WANT_PAGE_VIRTUAL 1 diff --git a/arch/arm/include/asm/page.h b/arch/arm/include/asm/page.h index c2b75cba26df1d..11b058a72a5b83 100644 --- a/arch/arm/include/asm/page.h +++ b/arch/arm/include/asm/page.h @@ -161,9 +161,7 @@ extern int pfn_valid(unsigned long); #endif /* !__ASSEMBLY__ */ -#define VM_DATA_DEFAULT_FLAGS \ - (((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) | \ - VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_TSK_EXEC #include diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h index 75d6cd23a67903..c01b52add37719 100644 --- a/arch/arm64/include/asm/page.h +++ b/arch/arm64/include/asm/page.h @@ -36,9 +36,7 @@ extern int pfn_valid(unsigned long); #endif /* !__ASSEMBLY__ */ -#define VM_DATA_DEFAULT_FLAGS \ - (((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) | \ - VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_TSK_EXEC #include diff --git a/arch/c6x/include/asm/page.h b/arch/c6x/include/asm/page.h index 70db1e7632bc0c..40079899084d1c 100644 --- a/arch/c6x/include/asm/page.h +++ b/arch/c6x/include/asm/page.h @@ -2,10 +2,7 @@ #ifndef _ASM_C6X_PAGE_H #define _ASM_C6X_PAGE_H -#define VM_DATA_DEFAULT_FLAGS \ - (VM_READ | VM_WRITE | \ - ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_TSK_EXEC #include diff --git a/arch/csky/include/asm/page.h b/arch/csky/include/asm/page.h index 9738eacefdc7e4..9b98bf31d57cec 100644 --- a/arch/csky/include/asm/page.h +++ b/arch/csky/include/asm/page.h @@ -85,9 +85,6 @@ extern unsigned long va_pa_offset; PHYS_OFFSET_OFFSET) #define virt_to_page(x) (mem_map + MAP_NR(x)) -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) - #define pfn_to_kaddr(x) __va(PFN_PHYS(x)) #include diff --git a/arch/h8300/include/asm/page.h b/arch/h8300/include/asm/page.h index 8da5124ad34402..53e03754423940 100644 --- a/arch/h8300/include/asm/page.h +++ b/arch/h8300/include/asm/page.h @@ -6,8 +6,6 @@ #include #define MAP_NR(addr) (((uintptr_t)(addr)-PAGE_OFFSET) >> PAGE_SHIFT) -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) #ifndef __ASSEMBLY__ extern unsigned long rom_length; diff --git a/arch/hexagon/include/asm/page.h b/arch/hexagon/include/asm/page.h index ee31f36f48f344..7cbf719c578ec4 100644 --- a/arch/hexagon/include/asm/page.h +++ b/arch/hexagon/include/asm/page.h @@ -93,8 +93,7 @@ struct page; #define virt_to_page(kaddr) pfn_to_page(PFN_DOWN(__pa(kaddr))) /* Default vm area behavior is non-executable. */ -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_NON_EXEC #define pfn_valid(pfn) ((pfn) < max_mapnr) #define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) diff --git a/arch/ia64/include/asm/page.h b/arch/ia64/include/asm/page.h index 5798bd2b462c46..b69a5499d75b8a 100644 --- a/arch/ia64/include/asm/page.h +++ b/arch/ia64/include/asm/page.h @@ -218,10 +218,7 @@ get_order (unsigned long size) #define PAGE_OFFSET RGN_BASE(RGN_KERNEL) -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC | \ - (((current->personality & READ_IMPLIES_EXEC) != 0) \ - ? VM_EXEC : 0)) +#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_TSK_EXEC #define GATE_ADDR RGN_BASE(RGN_GATE) diff --git a/arch/m68k/include/asm/page.h b/arch/m68k/include/asm/page.h index da546487e177d5..2614a1206f2f9a 100644 --- a/arch/m68k/include/asm/page.h +++ b/arch/m68k/include/asm/page.h @@ -65,9 +65,6 @@ extern unsigned long _ramend; #define __phys_to_pfn(paddr) ((unsigned long)((paddr) >> PAGE_SHIFT)) #define __pfn_to_phys(pfn) PFN_PHYS(pfn) -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) - #include #endif /* _M68K_PAGE_H */ diff --git a/arch/microblaze/include/asm/page.h b/arch/microblaze/include/asm/page.h index ae7215c94706d8..b13463d39b38bc 100644 --- a/arch/microblaze/include/asm/page.h +++ b/arch/microblaze/include/asm/page.h @@ -194,8 +194,6 @@ extern int page_is_ram(unsigned long pfn); #ifdef CONFIG_MMU -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) #endif /* CONFIG_MMU */ #endif /* __KERNEL__ */ diff --git a/arch/mips/include/asm/page.h b/arch/mips/include/asm/page.h index 0ba4ce6e2bf3ae..e2f503fc7a847d 100644 --- a/arch/mips/include/asm/page.h +++ b/arch/mips/include/asm/page.h @@ -253,10 +253,7 @@ extern bool __virt_addr_valid(const volatile void *kaddr); #define virt_addr_valid(kaddr) \ __virt_addr_valid((const volatile void *) (kaddr)) -#define VM_DATA_DEFAULT_FLAGS \ - (VM_READ | VM_WRITE | \ - ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_TSK_EXEC #include #include diff --git a/arch/nds32/include/asm/page.h b/arch/nds32/include/asm/page.h index 86b32014c5f915..add33a7f02c892 100644 --- a/arch/nds32/include/asm/page.h +++ b/arch/nds32/include/asm/page.h @@ -59,9 +59,6 @@ typedef struct page *pgtable_t; #endif /* !__ASSEMBLY__ */ -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) - #endif /* __KERNEL__ */ #endif diff --git a/arch/nios2/include/asm/page.h b/arch/nios2/include/asm/page.h index 79fcac61f6efb5..6a989819a7c1db 100644 --- a/arch/nios2/include/asm/page.h +++ b/arch/nios2/include/asm/page.h @@ -98,8 +98,7 @@ static inline bool pfn_valid(unsigned long pfn) # define virt_to_page(vaddr) pfn_to_page(PFN_DOWN(virt_to_phys(vaddr))) # define virt_addr_valid(vaddr) pfn_valid(PFN_DOWN(virt_to_phys(vaddr))) -# define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +# define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_NON_EXEC #include diff --git a/arch/openrisc/include/asm/page.h b/arch/openrisc/include/asm/page.h index 01069db594547c..aab6e64d6db42e 100644 --- a/arch/openrisc/include/asm/page.h +++ b/arch/openrisc/include/asm/page.h @@ -86,11 +86,6 @@ typedef struct page *pgtable_t; #endif /* __ASSEMBLY__ */ - -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) - - #include #include diff --git a/arch/parisc/include/asm/page.h b/arch/parisc/include/asm/page.h index 796ae29e9b9a95..6b3f6740a6a678 100644 --- a/arch/parisc/include/asm/page.h +++ b/arch/parisc/include/asm/page.h @@ -180,9 +180,6 @@ extern int npmem_ranges; #define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT) #define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT) -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) - #include #include #include diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index 080a0bf8e54bb9..3ee8df0f66e036 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -240,13 +240,8 @@ static inline bool pfn_valid(unsigned long pfn) * and needs to be executable. This means the whole heap ends * up being executable. */ -#define VM_DATA_DEFAULT_FLAGS32 \ - (((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) | \ - VM_READ | VM_WRITE | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) - -#define VM_DATA_DEFAULT_FLAGS64 (VM_READ | VM_WRITE | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_DATA_DEFAULT_FLAGS32 VM_DATA_FLAGS_TSK_EXEC +#define VM_DATA_DEFAULT_FLAGS64 VM_DATA_FLAGS_NON_EXEC #ifdef __powerpc64__ #include diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h index 5962797f784ae7..79a9b7c6a132ca 100644 --- a/arch/powerpc/include/asm/page_64.h +++ b/arch/powerpc/include/asm/page_64.h @@ -94,11 +94,8 @@ extern u64 ppc64_pft_size; * stack by default, so in the absence of a PT_GNU_STACK program header * we turn execute permission off. */ -#define VM_STACK_DEFAULT_FLAGS32 (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) - -#define VM_STACK_DEFAULT_FLAGS64 (VM_READ | VM_WRITE | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_STACK_DEFAULT_FLAGS32 VM_DATA_FLAGS_EXEC +#define VM_STACK_DEFAULT_FLAGS64 VM_DATA_FLAGS_NON_EXEC #define VM_STACK_DEFAULT_FLAGS \ (is_32bit_task() ? \ diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h index 8ca1930caa44ed..2d50f76efe4819 100644 --- a/arch/riscv/include/asm/page.h +++ b/arch/riscv/include/asm/page.h @@ -137,8 +137,7 @@ extern phys_addr_t __phys_addr_symbol(unsigned long x); #define virt_addr_valid(vaddr) (pfn_valid(virt_to_pfn(vaddr))) -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_NON_EXEC #include #include diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index f2d4c1bd34291d..cc98f9b78fd4b5 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -181,8 +181,7 @@ int arch_make_page_accessible(struct page *page); #define virt_addr_valid(kaddr) pfn_valid(virt_to_pfn(kaddr)) -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_NON_EXEC #include #include diff --git a/arch/sh/include/asm/page.h b/arch/sh/include/asm/page.h index 5eef8be3e59f9e..ea8d68f58e391d 100644 --- a/arch/sh/include/asm/page.h +++ b/arch/sh/include/asm/page.h @@ -182,9 +182,6 @@ typedef struct page *pgtable_t; #endif #define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) - #include #include diff --git a/arch/sparc/include/asm/page_32.h b/arch/sparc/include/asm/page_32.h index b76d59edec8c35..4782600028364a 100644 --- a/arch/sparc/include/asm/page_32.h +++ b/arch/sparc/include/asm/page_32.h @@ -133,9 +133,6 @@ extern unsigned long pfn_base; #define pfn_valid(pfn) (((pfn) >= (pfn_base)) && (((pfn)-(pfn_base)) < max_mapnr)) #define virt_addr_valid(kaddr) ((((unsigned long)(kaddr)-PAGE_OFFSET)>>PAGE_SHIFT) < max_mapnr) -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) - #include #include diff --git a/arch/sparc/include/asm/page_64.h b/arch/sparc/include/asm/page_64.h index e80f2d5bf62f76..254dffd85fb147 100644 --- a/arch/sparc/include/asm/page_64.h +++ b/arch/sparc/include/asm/page_64.h @@ -158,9 +158,6 @@ extern unsigned long PAGE_OFFSET; #endif /* !(__ASSEMBLY__) */ -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) - #include #endif /* _SPARC64_PAGE_H */ diff --git a/arch/unicore32/include/asm/page.h b/arch/unicore32/include/asm/page.h index 8a89335673f9e9..96d6bdf180bd37 100644 --- a/arch/unicore32/include/asm/page.h +++ b/arch/unicore32/include/asm/page.h @@ -69,9 +69,6 @@ extern int pfn_valid(unsigned long); #endif /* !__ASSEMBLY__ */ -#define VM_DATA_DEFAULT_FLAGS \ - (VM_READ | VM_WRITE | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) - #include #endif diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index c85e15010f48da..e27aa6be63208d 100644 --- a/arch/x86/include/asm/page_types.h +++ b/arch/x86/include/asm/page_types.h @@ -35,9 +35,7 @@ #define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET) -#define VM_DATA_DEFAULT_FLAGS \ - (((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0 ) | \ - VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_TSK_EXEC #define __PHYSICAL_START ALIGN(CONFIG_PHYSICAL_START, \ CONFIG_PHYSICAL_ALIGN) diff --git a/arch/x86/um/asm/vm-flags.h b/arch/x86/um/asm/vm-flags.h index 7c297e9e241387..df7a3896f5dd77 100644 --- a/arch/x86/um/asm/vm-flags.h +++ b/arch/x86/um/asm/vm-flags.h @@ -9,17 +9,11 @@ #ifdef CONFIG_X86_32 -#define VM_DATA_DEFAULT_FLAGS \ - (VM_READ | VM_WRITE | \ - ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0 ) | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_TSK_EXEC #else -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) -#define VM_STACK_DEFAULT_FLAGS (VM_GROWSDOWN | VM_READ | VM_WRITE | \ - VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_STACK_DEFAULT_FLAGS (VM_GROWSDOWN | VM_DATA_FLAGS_EXEC) #endif #endif diff --git a/arch/xtensa/include/asm/page.h b/arch/xtensa/include/asm/page.h index f4771c29c7e952..37ce25ef92d6f2 100644 --- a/arch/xtensa/include/asm/page.h +++ b/arch/xtensa/include/asm/page.h @@ -203,8 +203,5 @@ static inline unsigned long ___pa(unsigned long va) #endif /* __ASSEMBLY__ */ -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) - #include #endif /* _XTENSA_PAGE_H */ diff --git a/include/linux/mm.h b/include/linux/mm.h index ed896cedd4c417..33076fa149c836 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -343,6 +343,20 @@ extern unsigned int kobjsize(const void *objp); /* Bits set in the VMA until the stack is in its final location */ #define VM_STACK_INCOMPLETE_SETUP (VM_RAND_READ | VM_SEQ_READ) +#define TASK_EXEC ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) + +/* Common data flag combinations */ +#define VM_DATA_FLAGS_TSK_EXEC (VM_READ | VM_WRITE | TASK_EXEC | \ + VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_DATA_FLAGS_NON_EXEC (VM_READ | VM_WRITE | VM_MAYREAD | \ + VM_MAYWRITE | VM_MAYEXEC) +#define VM_DATA_FLAGS_EXEC (VM_READ | VM_WRITE | VM_EXEC | \ + VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) + +#ifndef VM_DATA_DEFAULT_FLAGS /* arch can override this */ +#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_EXEC +#endif + #ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */ #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS #endif -- cgit 1.2.3-korg From 6cb4d9a2870d2062e34c93bfef4d52fca3fe42d1 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Fri, 10 Apr 2020 14:33:09 -0700 Subject: mm/vma: introduce VM_ACCESS_FLAGS There are many places where all basic VMA access flags (read, write, exec) are initialized or checked against as a group. One such example is during page fault. Existing vma_is_accessible() wrapper already creates the notion of VMA accessibility as a group access permissions. Hence lets just create VM_ACCESS_FLAGS (VM_READ|VM_WRITE|VM_EXEC) which will not only reduce code duplication but also extend the VMA accessibility concept in general. Signed-off-by: Anshuman Khandual Signed-off-by: Andrew Morton Reviewed-by: Vlastimil Babka Cc: Russell King Cc: Catalin Marinas Cc: Mark Salter Cc: Nick Hu Cc: Ley Foon Tan Cc: Michael Ellerman Cc: Heiko Carstens Cc: Yoshinori Sato Cc: Guan Xuetao Cc: Dave Hansen Cc: Thomas Gleixner Cc: Rob Springer Cc: Greg Kroah-Hartman Cc: Geert Uytterhoeven Link: http://lkml.kernel.org/r/1583391014-8170-3-git-send-email-anshuman.khandual@arm.com Signed-off-by: Linus Torvalds --- arch/arm/mm/fault.c | 2 +- arch/arm64/mm/fault.c | 2 +- arch/nds32/mm/fault.c | 2 +- arch/powerpc/mm/book3s64/pkeys.c | 2 +- arch/s390/mm/fault.c | 2 +- arch/unicore32/mm/fault.c | 2 +- arch/x86/mm/pkeys.c | 2 +- drivers/staging/gasket/gasket_core.c | 2 +- include/linux/mm.h | 6 +++++- mm/mmap.c | 2 +- mm/mprotect.c | 4 ++-- 11 files changed, 16 insertions(+), 12 deletions(-) diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index b598e6978b2995..2dd5c41cbb8d47 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -189,7 +189,7 @@ void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs) */ static inline bool access_error(unsigned int fsr, struct vm_area_struct *vma) { - unsigned int mask = VM_READ | VM_WRITE | VM_EXEC; + unsigned int mask = VM_ACCESS_FLAGS; if ((fsr & FSR_WRITE) && !(fsr & FSR_CM)) mask = VM_WRITE; diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 1027851d469adb..c9cedc0432d25d 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -445,7 +445,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, const struct fault_info *inf; struct mm_struct *mm = current->mm; vm_fault_t fault, major = 0; - unsigned long vm_flags = VM_READ | VM_WRITE | VM_EXEC; + unsigned long vm_flags = VM_ACCESS_FLAGS; unsigned int mm_flags = FAULT_FLAG_DEFAULT; if (kprobe_page_fault(regs, esr)) diff --git a/arch/nds32/mm/fault.c b/arch/nds32/mm/fault.c index 0cf0c08c7da21f..f331e533edc2b1 100644 --- a/arch/nds32/mm/fault.c +++ b/arch/nds32/mm/fault.c @@ -79,7 +79,7 @@ void do_page_fault(unsigned long entry, unsigned long addr, struct vm_area_struct *vma; int si_code; vm_fault_t fault; - unsigned int mask = VM_READ | VM_WRITE | VM_EXEC; + unsigned int mask = VM_ACCESS_FLAGS; unsigned int flags = FAULT_FLAG_DEFAULT; error_code = error_code & (ITYPE_mskINST | ITYPE_mskETYPE); diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c index 07527f1ed1089f..1199fc2bfaec91 100644 --- a/arch/powerpc/mm/book3s64/pkeys.c +++ b/arch/powerpc/mm/book3s64/pkeys.c @@ -315,7 +315,7 @@ int __execute_only_pkey(struct mm_struct *mm) static inline bool vma_is_pkey_exec_only(struct vm_area_struct *vma) { /* Do this check first since the vm_flags should be hot */ - if ((vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)) != VM_EXEC) + if ((vma->vm_flags & VM_ACCESS_FLAGS) != VM_EXEC) return false; return (vma_pkey(vma) == vma->vm_mm->context.execute_only_pkey); diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index d56f67745e3e31..9822a1fd1c6bea 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -580,7 +580,7 @@ void do_dat_exception(struct pt_regs *regs) int access; vm_fault_t fault; - access = VM_READ | VM_EXEC | VM_WRITE; + access = VM_ACCESS_FLAGS; fault = do_exception(regs, access); if (unlikely(fault)) do_fault_error(regs, access, fault); diff --git a/arch/unicore32/mm/fault.c b/arch/unicore32/mm/fault.c index a9bd08fbe5884a..3022104aa61372 100644 --- a/arch/unicore32/mm/fault.c +++ b/arch/unicore32/mm/fault.c @@ -149,7 +149,7 @@ void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs) */ static inline bool access_error(unsigned int fsr, struct vm_area_struct *vma) { - unsigned int mask = VM_READ | VM_WRITE | VM_EXEC; + unsigned int mask = VM_ACCESS_FLAGS; if (!(fsr ^ 0x12)) /* write? */ mask = VM_WRITE; diff --git a/arch/x86/mm/pkeys.c b/arch/x86/mm/pkeys.c index c6f84c0b5d7a5d..8873ed1438a978 100644 --- a/arch/x86/mm/pkeys.c +++ b/arch/x86/mm/pkeys.c @@ -63,7 +63,7 @@ int __execute_only_pkey(struct mm_struct *mm) static inline bool vma_is_pkey_exec_only(struct vm_area_struct *vma) { /* Do this check first since the vm_flags should be hot */ - if ((vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)) != VM_EXEC) + if ((vma->vm_flags & VM_ACCESS_FLAGS) != VM_EXEC) return false; if (vma_pkey(vma) != vma->vm_mm->context.execute_only_pkey) return false; diff --git a/drivers/staging/gasket/gasket_core.c b/drivers/staging/gasket/gasket_core.c index cd181a64f73785..8e0575fcb4c8c7 100644 --- a/drivers/staging/gasket/gasket_core.c +++ b/drivers/staging/gasket/gasket_core.c @@ -689,7 +689,7 @@ static bool gasket_mmap_has_permissions(struct gasket_dev *gasket_dev, /* Make sure that no wrong flags are set. */ requested_permissions = - (vma->vm_flags & (VM_WRITE | VM_READ | VM_EXEC)); + (vma->vm_flags & VM_ACCESS_FLAGS); if (requested_permissions & ~(bar_permissions)) { dev_dbg(gasket_dev->dev, "Attempting to map a region with requested permissions 0x%x, but region has permissions 0x%x.\n", diff --git a/include/linux/mm.h b/include/linux/mm.h index 33076fa149c836..4db1522d7c4884 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -369,6 +369,10 @@ extern unsigned int kobjsize(const void *objp); #define VM_STACK_FLAGS (VM_STACK | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT) +/* VMA basic access permission flags */ +#define VM_ACCESS_FLAGS (VM_READ | VM_WRITE | VM_EXEC) + + /* * Special vmas that are non-mergable, non-mlock()able. */ @@ -646,7 +650,7 @@ static inline bool vma_is_foreign(struct vm_area_struct *vma) static inline bool vma_is_accessible(struct vm_area_struct *vma) { - return vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC); + return vma->vm_flags & VM_ACCESS_FLAGS; } #ifdef CONFIG_SHMEM diff --git a/mm/mmap.c b/mm/mmap.c index de07bbc0e21f63..f609e9ec4a253f 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1224,7 +1224,7 @@ static int anon_vma_compatible(struct vm_area_struct *a, struct vm_area_struct * return a->vm_end == b->vm_start && mpol_equal(vma_policy(a), vma_policy(b)) && a->vm_file == b->vm_file && - !((a->vm_flags ^ b->vm_flags) & ~(VM_READ|VM_WRITE|VM_EXEC|VM_SOFTDIRTY)) && + !((a->vm_flags ^ b->vm_flags) & ~(VM_ACCESS_FLAGS | VM_SOFTDIRTY)) && b->vm_pgoff == a->vm_pgoff + ((b->vm_start - a->vm_start) >> PAGE_SHIFT); } diff --git a/mm/mprotect.c b/mm/mprotect.c index 1d823b0503299b..494192ca954b42 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -419,7 +419,7 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, */ if (arch_has_pfn_modify_check() && (vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) && - (newflags & (VM_READ|VM_WRITE|VM_EXEC)) == 0) { + (newflags & VM_ACCESS_FLAGS) == 0) { pgprot_t new_pgprot = vm_get_page_prot(newflags); error = walk_page_range(current->mm, start, end, @@ -598,7 +598,7 @@ static int do_mprotect_pkey(unsigned long start, size_t len, newflags |= (vma->vm_flags & ~mask_off_old_flags); /* newflags >> 4 shift VM_MAY% in place of VM_% */ - if ((newflags & ~(newflags >> 4)) & (VM_READ | VM_WRITE | VM_EXEC)) { + if ((newflags & ~(newflags >> 4)) & VM_ACCESS_FLAGS) { error = -EACCES; goto out; } -- cgit 1.2.3-korg From 78e7c5af080b86e9f28afac5a8307ddab1d2c1a3 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Fri, 10 Apr 2020 14:33:13 -0700 Subject: mm/special: create generic fallbacks for pte_special() and pte_mkspecial() Currently there are many platforms that dont enable ARCH_HAS_PTE_SPECIAL but required to define quite similar fallback stubs for special page table entry helpers such as pte_special() and pte_mkspecial(), as they get build in generic MM without a config check. This creates two generic fallback stub definitions for these helpers, eliminating much code duplication. mips platform has a special case where pte_special() and pte_mkspecial() visibility is wider than what ARCH_HAS_PTE_SPECIAL enablement requires. This restricts those symbol visibility in order to avoid redefinitions which is now exposed through this new generic stubs and subsequent build failure. arm platform set_pte_at() definition needs to be moved into a C file just to prevent a build failure. [anshuman.khandual@arm.com: use defined(CONFIG_ARCH_HAS_PTE_SPECIAL) in mips per Thomas] Link: http://lkml.kernel.org/r/1583851924-21603-1-git-send-email-anshuman.khandual@arm.com Signed-off-by: Anshuman Khandual Signed-off-by: Andrew Morton Acked-by: Guo Ren [csky] Acked-by: Geert Uytterhoeven [m68k] Acked-by: Stafford Horne [openrisc] Acked-by: Helge Deller [parisc] Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Matt Turner Cc: Russell King Cc: Brian Cain Cc: Tony Luck Cc: Fenghua Yu Cc: Sam Creasey Cc: Michal Simek Cc: Ralf Baechle Cc: Paul Burton Cc: Nick Hu Cc: Greentime Hu Cc: Vincent Chen Cc: Ley Foon Tan Cc: Jonas Bonn Cc: Stefan Kristiansson Cc: "James E.J. Bottomley" Cc: "David S. Miller" Cc: Jeff Dike Cc: Richard Weinberger Cc: Anton Ivanov Cc: Guan Xuetao Cc: Chris Zankel Cc: Max Filippov Cc: Thomas Bogendoerfer Link: http://lkml.kernel.org/r/1583802551-15406-1-git-send-email-anshuman.khandual@arm.com Signed-off-by: Linus Torvalds --- arch/alpha/include/asm/pgtable.h | 2 -- arch/arm/include/asm/pgtable-2level.h | 2 -- arch/arm/include/asm/pgtable.h | 15 ++--------- arch/arm/mm/mmu.c | 14 ++++++++++ arch/csky/include/asm/pgtable.h | 3 --- arch/hexagon/include/asm/pgtable.h | 2 -- arch/ia64/include/asm/pgtable.h | 2 -- arch/m68k/include/asm/mcf_pgtable.h | 10 -------- arch/m68k/include/asm/motorola_pgtable.h | 2 -- arch/m68k/include/asm/sun3_pgtable.h | 2 -- arch/microblaze/include/asm/pgtable.h | 4 --- arch/mips/include/asm/pgtable.h | 44 ++++++++++++++++++++++---------- arch/nds32/include/asm/pgtable.h | 9 ------- arch/nios2/include/asm/pgtable.h | 3 --- arch/openrisc/include/asm/pgtable.h | 2 -- arch/parisc/include/asm/pgtable.h | 2 -- arch/sparc/include/asm/pgtable_32.h | 7 ----- arch/um/include/asm/pgtable.h | 10 -------- arch/unicore32/include/asm/pgtable.h | 3 --- arch/xtensa/include/asm/pgtable.h | 3 --- include/linux/mm.h | 12 +++++++++ 21 files changed, 58 insertions(+), 95 deletions(-) diff --git a/arch/alpha/include/asm/pgtable.h b/arch/alpha/include/asm/pgtable.h index 299791ce14b6b9..0267aa8a4f8647 100644 --- a/arch/alpha/include/asm/pgtable.h +++ b/arch/alpha/include/asm/pgtable.h @@ -268,7 +268,6 @@ extern inline void pud_clear(pud_t * pudp) { pud_val(*pudp) = 0; } extern inline int pte_write(pte_t pte) { return !(pte_val(pte) & _PAGE_FOW); } extern inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } extern inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } -extern inline int pte_special(pte_t pte) { return 0; } extern inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) |= _PAGE_FOW; return pte; } extern inline pte_t pte_mkclean(pte_t pte) { pte_val(pte) &= ~(__DIRTY_BITS); return pte; } @@ -276,7 +275,6 @@ extern inline pte_t pte_mkold(pte_t pte) { pte_val(pte) &= ~(__ACCESS_BITS); ret extern inline pte_t pte_mkwrite(pte_t pte) { pte_val(pte) &= ~_PAGE_FOW; return pte; } extern inline pte_t pte_mkdirty(pte_t pte) { pte_val(pte) |= __DIRTY_BITS; return pte; } extern inline pte_t pte_mkyoung(pte_t pte) { pte_val(pte) |= __ACCESS_BITS; return pte; } -extern inline pte_t pte_mkspecial(pte_t pte) { return pte; } #define PAGE_DIR_OFFSET(tsk,address) pgd_offset((tsk),(address)) diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h index 0d3ea35c97fe37..9e084a464a9784 100644 --- a/arch/arm/include/asm/pgtable-2level.h +++ b/arch/arm/include/asm/pgtable-2level.h @@ -211,8 +211,6 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr) #define pmd_addr_end(addr,end) (end) #define set_pte_ext(ptep,pte,ext) cpu_set_pte_ext(ptep,pte,ext) -#define pte_special(pte) (0) -static inline pte_t pte_mkspecial(pte_t pte) { return pte; } /* * We don't have huge page support for short descriptors, for the moment diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index 0483cf4133152f..befc8fcec98f75 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -243,19 +243,8 @@ static inline void __sync_icache_dcache(pte_t pteval) extern void __sync_icache_dcache(pte_t pteval); #endif -static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pteval) -{ - unsigned long ext = 0; - - if (addr < TASK_SIZE && pte_valid_user(pteval)) { - if (!pte_special(pteval)) - __sync_icache_dcache(pteval); - ext |= PTE_EXT_NG; - } - - set_pte_ext(ptep, pteval, ext); -} +void set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pteval); static inline pte_t clear_pte_bit(pte_t pte, pgprot_t prot) { diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 69a337df619f18..ec8d0008bfa1c8 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -1646,3 +1646,17 @@ void __init early_mm_init(const struct machine_desc *mdesc) build_mem_type_table(); early_paging_init(mdesc); } + +void set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pteval) +{ + unsigned long ext = 0; + + if (addr < TASK_SIZE && pte_valid_user(pteval)) { + if (!pte_special(pteval)) + __sync_icache_dcache(pteval); + ext |= PTE_EXT_NG; + } + + set_pte_ext(ptep, pteval, ext); +} diff --git a/arch/csky/include/asm/pgtable.h b/arch/csky/include/asm/pgtable.h index 9b7764cb76450f..9ab4a445ad99c9 100644 --- a/arch/csky/include/asm/pgtable.h +++ b/arch/csky/include/asm/pgtable.h @@ -110,9 +110,6 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; extern void load_pgd(unsigned long pg_dir); extern pte_t invalid_pte_table[PTRS_PER_PTE]; -static inline int pte_special(pte_t pte) { return 0; } -static inline pte_t pte_mkspecial(pte_t pte) { return pte; } - static inline void set_pte(pte_t *p, pte_t pte) { *p = pte; diff --git a/arch/hexagon/include/asm/pgtable.h b/arch/hexagon/include/asm/pgtable.h index 2fec20ad939eed..d383e8bea5b240 100644 --- a/arch/hexagon/include/asm/pgtable.h +++ b/arch/hexagon/include/asm/pgtable.h @@ -158,8 +158,6 @@ extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; /* located in head.S */ /* Seems to be zero even in architectures where the zero page is firewalled? */ #define FIRST_USER_ADDRESS 0UL -#define pte_special(pte) 0 -#define pte_mkspecial(pte) (pte) /* HUGETLB not working currently */ #ifdef CONFIG_HUGETLB_PAGE diff --git a/arch/ia64/include/asm/pgtable.h b/arch/ia64/include/asm/pgtable.h index d602e7c622dbf6..0e7b645b76c6f9 100644 --- a/arch/ia64/include/asm/pgtable.h +++ b/arch/ia64/include/asm/pgtable.h @@ -298,7 +298,6 @@ extern unsigned long VMALLOC_END; #define pte_exec(pte) ((pte_val(pte) & _PAGE_AR_RX) != 0) #define pte_dirty(pte) ((pte_val(pte) & _PAGE_D) != 0) #define pte_young(pte) ((pte_val(pte) & _PAGE_A) != 0) -#define pte_special(pte) 0 /* * Note: we convert AR_RWX to AR_RX and AR_RW to AR_R by clearing the 2nd bit in the @@ -311,7 +310,6 @@ extern unsigned long VMALLOC_END; #define pte_mkclean(pte) (__pte(pte_val(pte) & ~_PAGE_D)) #define pte_mkdirty(pte) (__pte(pte_val(pte) | _PAGE_D)) #define pte_mkhuge(pte) (__pte(pte_val(pte))) -#define pte_mkspecial(pte) (pte) /* * Because ia64's Icache and Dcache is not coherent (on a cpu), we need to diff --git a/arch/m68k/include/asm/mcf_pgtable.h b/arch/m68k/include/asm/mcf_pgtable.h index b9f45aeded2596..0031cd387b7559 100644 --- a/arch/m68k/include/asm/mcf_pgtable.h +++ b/arch/m68k/include/asm/mcf_pgtable.h @@ -235,11 +235,6 @@ static inline int pte_young(pte_t pte) return pte_val(pte) & CF_PAGE_ACCESSED; } -static inline int pte_special(pte_t pte) -{ - return 0; -} - static inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) &= ~CF_PAGE_WRITABLE; @@ -312,11 +307,6 @@ static inline pte_t pte_mkcache(pte_t pte) return pte; } -static inline pte_t pte_mkspecial(pte_t pte) -{ - return pte; -} - #define swapper_pg_dir kernel_pg_dir extern pgd_t kernel_pg_dir[PTRS_PER_PGD]; diff --git a/arch/m68k/include/asm/motorola_pgtable.h b/arch/m68k/include/asm/motorola_pgtable.h index 4b91a470ad58ea..48f19f0ab1e7f8 100644 --- a/arch/m68k/include/asm/motorola_pgtable.h +++ b/arch/m68k/include/asm/motorola_pgtable.h @@ -174,7 +174,6 @@ static inline void pud_set(pud_t *pudp, pmd_t *pmdp) static inline int pte_write(pte_t pte) { return !(pte_val(pte) & _PAGE_RONLY); } static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } -static inline int pte_special(pte_t pte) { return 0; } static inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) |= _PAGE_RONLY; return pte; } static inline pte_t pte_mkclean(pte_t pte) { pte_val(pte) &= ~_PAGE_DIRTY; return pte; } @@ -192,7 +191,6 @@ static inline pte_t pte_mkcache(pte_t pte) pte_val(pte) = (pte_val(pte) & _CACHEMASK040) | m68k_supervisor_cachemode; return pte; } -static inline pte_t pte_mkspecial(pte_t pte) { return pte; } #define PAGE_DIR_OFFSET(tsk,address) pgd_offset((tsk),(address)) diff --git a/arch/m68k/include/asm/sun3_pgtable.h b/arch/m68k/include/asm/sun3_pgtable.h index bc415526481078..0caa18a084370c 100644 --- a/arch/m68k/include/asm/sun3_pgtable.h +++ b/arch/m68k/include/asm/sun3_pgtable.h @@ -155,7 +155,6 @@ static inline void pmd_clear (pmd_t *pmdp) { pmd_val (*pmdp) = 0; } static inline int pte_write(pte_t pte) { return pte_val(pte) & SUN3_PAGE_WRITEABLE; } static inline int pte_dirty(pte_t pte) { return pte_val(pte) & SUN3_PAGE_MODIFIED; } static inline int pte_young(pte_t pte) { return pte_val(pte) & SUN3_PAGE_ACCESSED; } -static inline int pte_special(pte_t pte) { return 0; } static inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) &= ~SUN3_PAGE_WRITEABLE; return pte; } static inline pte_t pte_mkclean(pte_t pte) { pte_val(pte) &= ~SUN3_PAGE_MODIFIED; return pte; } @@ -168,7 +167,6 @@ static inline pte_t pte_mknocache(pte_t pte) { pte_val(pte) |= SUN3_PAGE_NOCACHE //static inline pte_t pte_mkcache(pte_t pte) { pte_val(pte) &= SUN3_PAGE_NOCACHE; return pte; } // until then, use: static inline pte_t pte_mkcache(pte_t pte) { return pte; } -static inline pte_t pte_mkspecial(pte_t pte) { return pte; } extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; extern pgd_t kernel_pg_dir[PTRS_PER_PGD]; diff --git a/arch/microblaze/include/asm/pgtable.h b/arch/microblaze/include/asm/pgtable.h index 45b30878fc17a8..6b056f6545d85d 100644 --- a/arch/microblaze/include/asm/pgtable.h +++ b/arch/microblaze/include/asm/pgtable.h @@ -77,10 +77,6 @@ extern pte_t *va_to_pte(unsigned long address); * Undefined behaviour if not.. */ -static inline int pte_special(pte_t pte) { return 0; } - -static inline pte_t pte_mkspecial(pte_t pte) { return pte; } - /* Start and end of the vmalloc area. */ /* Make sure to map the vmalloc area above the pinned kernel memory area of 32Mb. */ diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h index aef5378f909c1f..f1801e7a4b15e7 100644 --- a/arch/mips/include/asm/pgtable.h +++ b/arch/mips/include/asm/pgtable.h @@ -269,6 +269,36 @@ cache_sync_done: */ extern pgd_t swapper_pg_dir[]; +/* + * Platform specific pte_special() and pte_mkspecial() definitions + * are required only when ARCH_HAS_PTE_SPECIAL is enabled. + */ +#if defined(CONFIG_ARCH_HAS_PTE_SPECIAL) +#if defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32) +static inline int pte_special(pte_t pte) +{ + return pte.pte_low & _PAGE_SPECIAL; +} + +static inline pte_t pte_mkspecial(pte_t pte) +{ + pte.pte_low |= _PAGE_SPECIAL; + return pte; +} +#else +static inline int pte_special(pte_t pte) +{ + return pte_val(pte) & _PAGE_SPECIAL; +} + +static inline pte_t pte_mkspecial(pte_t pte) +{ + pte_val(pte) |= _PAGE_SPECIAL; + return pte; +} +#endif +#endif /* CONFIG_ARCH_HAS_PTE_SPECIAL */ + /* * The following only work if pte_present() is true. * Undefined behaviour if not.. @@ -277,7 +307,6 @@ extern pgd_t swapper_pg_dir[]; static inline int pte_write(pte_t pte) { return pte.pte_low & _PAGE_WRITE; } static inline int pte_dirty(pte_t pte) { return pte.pte_low & _PAGE_MODIFIED; } static inline int pte_young(pte_t pte) { return pte.pte_low & _PAGE_ACCESSED; } -static inline int pte_special(pte_t pte) { return pte.pte_low & _PAGE_SPECIAL; } static inline pte_t pte_wrprotect(pte_t pte) { @@ -338,17 +367,10 @@ static inline pte_t pte_mkyoung(pte_t pte) } return pte; } - -static inline pte_t pte_mkspecial(pte_t pte) -{ - pte.pte_low |= _PAGE_SPECIAL; - return pte; -} #else static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_WRITE; } static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_MODIFIED; } static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } -static inline int pte_special(pte_t pte) { return pte_val(pte) & _PAGE_SPECIAL; } static inline pte_t pte_wrprotect(pte_t pte) { @@ -392,12 +414,6 @@ static inline pte_t pte_mkyoung(pte_t pte) return pte; } -static inline pte_t pte_mkspecial(pte_t pte) -{ - pte_val(pte) |= _PAGE_SPECIAL; - return pte; -} - #ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT static inline int pte_huge(pte_t pte) { return pte_val(pte) & _PAGE_HUGE; } diff --git a/arch/nds32/include/asm/pgtable.h b/arch/nds32/include/asm/pgtable.h index 6abc58ac406dcd..476cc4dd17096d 100644 --- a/arch/nds32/include/asm/pgtable.h +++ b/arch/nds32/include/asm/pgtable.h @@ -286,15 +286,6 @@ PTE_BIT_FUNC(mkclean, &=~_PAGE_D); PTE_BIT_FUNC(mkdirty, |=_PAGE_D); PTE_BIT_FUNC(mkold, &=~_PAGE_YOUNG); PTE_BIT_FUNC(mkyoung, |=_PAGE_YOUNG); -static inline int pte_special(pte_t pte) -{ - return 0; -} - -static inline pte_t pte_mkspecial(pte_t pte) -{ - return pte; -} /* * Mark the prot value as uncacheable and unbufferable. diff --git a/arch/nios2/include/asm/pgtable.h b/arch/nios2/include/asm/pgtable.h index 99985d8b71664a..f98b7f4519ba36 100644 --- a/arch/nios2/include/asm/pgtable.h +++ b/arch/nios2/include/asm/pgtable.h @@ -113,7 +113,6 @@ static inline int pte_dirty(pte_t pte) \ { return pte_val(pte) & _PAGE_DIRTY; } static inline int pte_young(pte_t pte) \ { return pte_val(pte) & _PAGE_ACCESSED; } -static inline int pte_special(pte_t pte) { return 0; } #define pgprot_noncached pgprot_noncached @@ -168,8 +167,6 @@ static inline pte_t pte_mkdirty(pte_t pte) return pte; } -static inline pte_t pte_mkspecial(pte_t pte) { return pte; } - static inline pte_t pte_mkyoung(pte_t pte) { pte_val(pte) |= _PAGE_ACCESSED; diff --git a/arch/openrisc/include/asm/pgtable.h b/arch/openrisc/include/asm/pgtable.h index 248d22d8faa7b1..7f3fb9ceb0839f 100644 --- a/arch/openrisc/include/asm/pgtable.h +++ b/arch/openrisc/include/asm/pgtable.h @@ -236,8 +236,6 @@ static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_WRITE; } static inline int pte_exec(pte_t pte) { return pte_val(pte) & _PAGE_EXEC; } static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } -static inline int pte_special(pte_t pte) { return 0; } -static inline pte_t pte_mkspecial(pte_t pte) { return pte; } static inline pte_t pte_wrprotect(pte_t pte) { diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h index f0a3659505362c..9832c73a70211d 100644 --- a/arch/parisc/include/asm/pgtable.h +++ b/arch/parisc/include/asm/pgtable.h @@ -377,7 +377,6 @@ static inline void pud_clear(pud_t *pud) { static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_WRITE; } -static inline int pte_special(pte_t pte) { return 0; } static inline pte_t pte_mkclean(pte_t pte) { pte_val(pte) &= ~_PAGE_DIRTY; return pte; } static inline pte_t pte_mkold(pte_t pte) { pte_val(pte) &= ~_PAGE_ACCESSED; return pte; } @@ -385,7 +384,6 @@ static inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) &= ~_PAGE_WRITE; ret static inline pte_t pte_mkdirty(pte_t pte) { pte_val(pte) |= _PAGE_DIRTY; return pte; } static inline pte_t pte_mkyoung(pte_t pte) { pte_val(pte) |= _PAGE_ACCESSED; return pte; } static inline pte_t pte_mkwrite(pte_t pte) { pte_val(pte) |= _PAGE_WRITE; return pte; } -static inline pte_t pte_mkspecial(pte_t pte) { return pte; } /* * Huge pte definitions. diff --git a/arch/sparc/include/asm/pgtable_32.h b/arch/sparc/include/asm/pgtable_32.h index 6d6f44c0cad9b9..0de659ae0ba428 100644 --- a/arch/sparc/include/asm/pgtable_32.h +++ b/arch/sparc/include/asm/pgtable_32.h @@ -223,11 +223,6 @@ static inline int pte_young(pte_t pte) return pte_val(pte) & SRMMU_REF; } -static inline int pte_special(pte_t pte) -{ - return 0; -} - static inline pte_t pte_wrprotect(pte_t pte) { return __pte(pte_val(pte) & ~SRMMU_WRITE); @@ -258,8 +253,6 @@ static inline pte_t pte_mkyoung(pte_t pte) return __pte(pte_val(pte) | SRMMU_REF); } -#define pte_mkspecial(pte) (pte) - #define pfn_pte(pfn, prot) mk_pte(pfn_to_page(pfn), prot) static inline unsigned long pte_pfn(pte_t pte) diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h index 2daa58df2190fb..b5ddf5d98bd50e 100644 --- a/arch/um/include/asm/pgtable.h +++ b/arch/um/include/asm/pgtable.h @@ -167,11 +167,6 @@ static inline int pte_newprot(pte_t pte) return(pte_present(pte) && (pte_get_bits(pte, _PAGE_NEWPROT))); } -static inline int pte_special(pte_t pte) -{ - return 0; -} - /* * ================================= * Flags setting section. @@ -247,11 +242,6 @@ static inline pte_t pte_mknewpage(pte_t pte) return(pte); } -static inline pte_t pte_mkspecial(pte_t pte) -{ - return(pte); -} - static inline void set_pte(pte_t *pteptr, pte_t pteval) { pte_copy(*pteptr, pteval); diff --git a/arch/unicore32/include/asm/pgtable.h b/arch/unicore32/include/asm/pgtable.h index c8f7ba12f309b2..3b8731b3a93786 100644 --- a/arch/unicore32/include/asm/pgtable.h +++ b/arch/unicore32/include/asm/pgtable.h @@ -177,7 +177,6 @@ extern struct page *empty_zero_page; #define pte_dirty(pte) (pte_val(pte) & PTE_DIRTY) #define pte_young(pte) (pte_val(pte) & PTE_YOUNG) #define pte_exec(pte) (pte_val(pte) & PTE_EXEC) -#define pte_special(pte) (0) #define PTE_BIT_FUNC(fn, op) \ static inline pte_t pte_##fn(pte_t pte) { pte_val(pte) op; return pte; } @@ -189,8 +188,6 @@ PTE_BIT_FUNC(mkdirty, |= PTE_DIRTY); PTE_BIT_FUNC(mkold, &= ~PTE_YOUNG); PTE_BIT_FUNC(mkyoung, |= PTE_YOUNG); -static inline pte_t pte_mkspecial(pte_t pte) { return pte; } - /* * Mark the prot value as uncacheable. */ diff --git a/arch/xtensa/include/asm/pgtable.h b/arch/xtensa/include/asm/pgtable.h index 27ac17c9da0969..8be0c0568c50c1 100644 --- a/arch/xtensa/include/asm/pgtable.h +++ b/arch/xtensa/include/asm/pgtable.h @@ -266,7 +266,6 @@ static inline void paging_init(void) { } static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_WRITABLE; } static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } -static inline int pte_special(pte_t pte) { return 0; } static inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) &= ~(_PAGE_WRITABLE | _PAGE_HW_WRITE); return pte; } @@ -280,8 +279,6 @@ static inline pte_t pte_mkyoung(pte_t pte) { pte_val(pte) |= _PAGE_ACCESSED; return pte; } static inline pte_t pte_mkwrite(pte_t pte) { pte_val(pte) |= _PAGE_WRITABLE; return pte; } -static inline pte_t pte_mkspecial(pte_t pte) - { return pte; } #define pgprot_noncached(prot) (__pgprot(pgprot_val(prot) & ~_PAGE_CA_MASK)) diff --git a/include/linux/mm.h b/include/linux/mm.h index 4db1522d7c4884..5a323422d783d0 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1927,6 +1927,18 @@ static inline void sync_mm_rss(struct mm_struct *mm) } #endif +#ifndef CONFIG_ARCH_HAS_PTE_SPECIAL +static inline int pte_special(pte_t pte) +{ + return 0; +} + +static inline pte_t pte_mkspecial(pte_t pte) +{ + return pte; +} +#endif + #ifndef CONFIG_ARCH_HAS_PTE_DEVMAP static inline int pte_devmap(pte_t pte) { -- cgit 1.2.3-korg From 96c6b598135e7cec66161e8943823470c7c8954e Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Fri, 10 Apr 2020 14:33:17 -0700 Subject: mm/memory_hotplug: drop the flags field from struct mhp_restrictions Patch series "Allow setting caching mode in arch_add_memory() for P2PDMA", v4. Currently, the page tables created using memremap_pages() are always created with the PAGE_KERNEL cacheing mode. However, the P2PDMA code is creating pages for PCI BAR memory which should never be accessed through the cache and instead use either WC or UC. This still works in most cases, on x86, because the MTRR registers typically override the caching settings in the page tables for all of the IO memory to be UC-. However, this tends not to work so well on other arches or some rare x86 machines that have firmware which does not setup the MTRR registers in this way. Instead of this, this series proposes a change to arch_add_memory() to take the pgprot required by the mapping which allows us to explicitly set pagetable entries for P2PDMA memory to UC. This changes is pretty routine for most of the arches: x86_64, arm64 and powerpc simply need to thread the pgprot through to where the page tables are setup. x86_32 unfortunately sets up the page tables at boot so must use _set_memory_prot() to change their caching mode. ia64, s390 and sh don't appear to have an easy way to change the page tables so, for now at least, we just return -EINVAL on such mappings and thus they will not support P2PDMA memory until the work for this is done. This should be fine as they don't yet support ZONE_DEVICE. This patch (of 7): This variable is not used anywhere and should therefore be removed from the structure. Signed-off-by: Logan Gunthorpe Signed-off-by: Andrew Morton Reviewed-by: David Hildenbrand Reviewed-by: Dan Williams Acked-by: Michal Hocko Cc: Christoph Hellwig Cc: Catalin Marinas Cc: Will Deacon Cc: Benjamin Herrenschmidt Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: Dave Hansen Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Eric Badger Cc: "H. Peter Anvin" Cc: Jason Gunthorpe Cc: Michael Ellerman Cc: Paul Mackerras Link: http://lkml.kernel.org/r/20200306170846.9333-2-logang@deltatee.com Signed-off-by: Linus Torvalds --- include/linux/memory_hotplug.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index ef55115320fb91..7c1bcff11672aa 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -59,11 +59,9 @@ enum { /* * Restrictions for the memory hotplug: - * flags: MHP_ flags * altmap: alternative allocator for memmap array */ struct mhp_restrictions { - unsigned long flags; struct vmem_altmap *altmap; }; -- cgit 1.2.3-korg From f5637d3b42ab0465ef71d5fb8461bce97fba95e8 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Fri, 10 Apr 2020 14:33:21 -0700 Subject: mm/memory_hotplug: rename mhp_restrictions to mhp_params The mhp_restrictions struct really doesn't specify anything resembling a restriction anymore so rename it to be mhp_params as it is a list of extended parameters. Signed-off-by: Logan Gunthorpe Signed-off-by: Andrew Morton Reviewed-by: David Hildenbrand Reviewed-by: Dan Williams Acked-by: Michal Hocko Cc: Andy Lutomirski Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Catalin Marinas Cc: Christoph Hellwig Cc: Dave Hansen Cc: Eric Badger Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jason Gunthorpe Cc: Michael Ellerman Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Link: http://lkml.kernel.org/r/20200306170846.9333-3-logang@deltatee.com Signed-off-by: Linus Torvalds --- arch/arm64/mm/mmu.c | 4 ++-- arch/ia64/mm/init.c | 4 ++-- arch/powerpc/mm/mem.c | 4 ++-- arch/s390/mm/init.c | 6 +++--- arch/sh/mm/init.c | 4 ++-- arch/x86/mm/init_32.c | 4 ++-- arch/x86/mm/init_64.c | 8 ++++---- include/linux/memory_hotplug.h | 16 ++++++++-------- mm/memory_hotplug.c | 8 ++++---- mm/memremap.c | 8 ++++---- 10 files changed, 33 insertions(+), 33 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 9b08f7c7e6f0f7..6d4e9c2b4ed015 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -1374,7 +1374,7 @@ static void __remove_pgd_mapping(pgd_t *pgdir, unsigned long start, u64 size) } int arch_add_memory(int nid, u64 start, u64 size, - struct mhp_restrictions *restrictions) + struct mhp_params *params) { int ret, flags = 0; @@ -1387,7 +1387,7 @@ int arch_add_memory(int nid, u64 start, u64 size, memblock_clear_nomap(start, size); ret = __add_pages(nid, start >> PAGE_SHIFT, size >> PAGE_SHIFT, - restrictions); + params); if (ret) __remove_pgd_mapping(swapper_pg_dir, __phys_to_virt(start), size); diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index b01d68a2d5d973..97bbc23ea1e3d8 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -670,13 +670,13 @@ mem_init (void) #ifdef CONFIG_MEMORY_HOTPLUG int arch_add_memory(int nid, u64 start, u64 size, - struct mhp_restrictions *restrictions) + struct mhp_params *params) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; int ret; - ret = __add_pages(nid, start_pfn, nr_pages, restrictions); + ret = __add_pages(nid, start_pfn, nr_pages, params); if (ret) printk("%s: Problem encountered in __add_pages() as ret=%d\n", __func__, ret); diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 9b4f5fb719e0f2..e1cc581158163d 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -122,7 +122,7 @@ static void flush_dcache_range_chunked(unsigned long start, unsigned long stop, } int __ref arch_add_memory(int nid, u64 start, u64 size, - struct mhp_restrictions *restrictions) + struct mhp_params *params) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; @@ -138,7 +138,7 @@ int __ref arch_add_memory(int nid, u64 start, u64 size, return -EFAULT; } - return __add_pages(nid, start_pfn, nr_pages, restrictions); + return __add_pages(nid, start_pfn, nr_pages, params); } void __ref arch_remove_memory(int nid, u64 start, u64 size, diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index ac44bd76db4be1..e9e4a7abd0cc55 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -268,20 +268,20 @@ device_initcall(s390_cma_mem_init); #endif /* CONFIG_CMA */ int arch_add_memory(int nid, u64 start, u64 size, - struct mhp_restrictions *restrictions) + struct mhp_params *params) { unsigned long start_pfn = PFN_DOWN(start); unsigned long size_pages = PFN_DOWN(size); int rc; - if (WARN_ON_ONCE(restrictions->altmap)) + if (WARN_ON_ONCE(params->altmap)) return -EINVAL; rc = vmem_add_mapping(start, size); if (rc) return rc; - rc = __add_pages(nid, start_pfn, size_pages, restrictions); + rc = __add_pages(nid, start_pfn, size_pages, params); if (rc) vmem_remove_mapping(start, size); return rc; diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index d1b1ff2be17aa7..e5114c053364b1 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -406,14 +406,14 @@ void __init mem_init(void) #ifdef CONFIG_MEMORY_HOTPLUG int arch_add_memory(int nid, u64 start, u64 size, - struct mhp_restrictions *restrictions) + struct mhp_params *params) { unsigned long start_pfn = PFN_DOWN(start); unsigned long nr_pages = size >> PAGE_SHIFT; int ret; /* We only have ZONE_NORMAL, so this is easy.. */ - ret = __add_pages(nid, start_pfn, nr_pages, restrictions); + ret = __add_pages(nid, start_pfn, nr_pages, params); if (unlikely(ret)) printk("%s: Failed, __add_pages() == %d\n", __func__, ret); diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index de73992b8432d0..d736c862550391 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -819,12 +819,12 @@ void __init mem_init(void) #ifdef CONFIG_MEMORY_HOTPLUG int arch_add_memory(int nid, u64 start, u64 size, - struct mhp_restrictions *restrictions) + struct mhp_params *params) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; - return __add_pages(nid, start_pfn, nr_pages, restrictions); + return __add_pages(nid, start_pfn, nr_pages, params); } void arch_remove_memory(int nid, u64 start, u64 size, diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 0a14711d3a931e..faa86a9a3b0d82 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -843,11 +843,11 @@ static void update_end_of_memory_vars(u64 start, u64 size) } int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, - struct mhp_restrictions *restrictions) + struct mhp_params *params) { int ret; - ret = __add_pages(nid, start_pfn, nr_pages, restrictions); + ret = __add_pages(nid, start_pfn, nr_pages, params); WARN_ON_ONCE(ret); /* update max_pfn, max_low_pfn and high_memory */ @@ -858,14 +858,14 @@ int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, } int arch_add_memory(int nid, u64 start, u64 size, - struct mhp_restrictions *restrictions) + struct mhp_params *params) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; init_memory_mapping(start, start + size); - return add_pages(nid, start_pfn, nr_pages, restrictions); + return add_pages(nid, start_pfn, nr_pages, params); } #define PAGE_INUSE 0xFD diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 7c1bcff11672aa..75f0f6304735b8 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -58,10 +58,10 @@ enum { }; /* - * Restrictions for the memory hotplug: - * altmap: alternative allocator for memmap array + * Extended parameters for memory hotplug: + * altmap: alternative allocator for memmap array (optional) */ -struct mhp_restrictions { +struct mhp_params { struct vmem_altmap *altmap; }; @@ -112,7 +112,7 @@ extern int restore_online_page_callback(online_page_callback_t callback); extern int try_online_node(int nid); extern int arch_add_memory(int nid, u64 start, u64 size, - struct mhp_restrictions *restrictions); + struct mhp_params *params); extern u64 max_mem_size; extern int memhp_online_type_from_str(const char *str); @@ -133,17 +133,17 @@ extern void __remove_pages(unsigned long start_pfn, unsigned long nr_pages, /* reasonably generic interface to expand the physical pages */ extern int __add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, - struct mhp_restrictions *restrictions); + struct mhp_params *params); #ifndef CONFIG_ARCH_HAS_ADD_PAGES static inline int add_pages(int nid, unsigned long start_pfn, - unsigned long nr_pages, struct mhp_restrictions *restrictions) + unsigned long nr_pages, struct mhp_params *params) { - return __add_pages(nid, start_pfn, nr_pages, restrictions); + return __add_pages(nid, start_pfn, nr_pages, params); } #else /* ARCH_HAS_ADD_PAGES */ int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, - struct mhp_restrictions *restrictions); + struct mhp_params *params); #endif /* ARCH_HAS_ADD_PAGES */ #ifdef CONFIG_NUMA diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 635e8e2865988e..fbfe7b40f552dd 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -304,12 +304,12 @@ static int check_hotplug_memory_addressable(unsigned long pfn, * add the new pages. */ int __ref __add_pages(int nid, unsigned long pfn, unsigned long nr_pages, - struct mhp_restrictions *restrictions) + struct mhp_params *params) { const unsigned long end_pfn = pfn + nr_pages; unsigned long cur_nr_pages; int err; - struct vmem_altmap *altmap = restrictions->altmap; + struct vmem_altmap *altmap = params->altmap; err = check_hotplug_memory_addressable(pfn, nr_pages); if (err) @@ -1002,7 +1002,7 @@ static int online_memory_block(struct memory_block *mem, void *arg) */ int __ref add_memory_resource(int nid, struct resource *res) { - struct mhp_restrictions restrictions = {}; + struct mhp_params params = {}; u64 start, size; bool new_node = false; int ret; @@ -1030,7 +1030,7 @@ int __ref add_memory_resource(int nid, struct resource *res) new_node = ret; /* call arch's memory hotadd */ - ret = arch_add_memory(nid, start, size, &restrictions); + ret = arch_add_memory(nid, start, size, ¶ms); if (ret < 0) goto error; diff --git a/mm/memremap.c b/mm/memremap.c index bbf457c4f166bb..b0b5170843ffeb 100644 --- a/mm/memremap.c +++ b/mm/memremap.c @@ -184,7 +184,7 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid) { struct resource *res = &pgmap->res; struct dev_pagemap *conflict_pgmap; - struct mhp_restrictions restrictions = { + struct mhp_params params = { /* * We do not want any optional features only our own memmap */ @@ -302,7 +302,7 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid) */ if (pgmap->type == MEMORY_DEVICE_PRIVATE) { error = add_pages(nid, PHYS_PFN(res->start), - PHYS_PFN(resource_size(res)), &restrictions); + PHYS_PFN(resource_size(res)), ¶ms); } else { error = kasan_add_zero_shadow(__va(res->start), resource_size(res)); if (error) { @@ -311,7 +311,7 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid) } error = arch_add_memory(nid, res->start, resource_size(res), - &restrictions); + ¶ms); } if (!error) { @@ -319,7 +319,7 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid) zone = &NODE_DATA(nid)->node_zones[ZONE_DEVICE]; move_pfn_range_to_zone(zone, PHYS_PFN(res->start), - PHYS_PFN(resource_size(res)), restrictions.altmap); + PHYS_PFN(resource_size(res)), params.altmap); } mem_hotplug_done(); -- cgit 1.2.3-korg From c164fbb40c43f8041f4d05ec9996d8ee343c92b1 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Fri, 10 Apr 2020 14:33:24 -0700 Subject: x86/mm: thread pgprot_t through init_memory_mapping() In preparation to support a pgprot_t argument for arch_add_memory(). It's required to move the prototype of init_memory_mapping() seeing the original location came before the definition of pgprot_t. Signed-off-by: Logan Gunthorpe Signed-off-by: Andrew Morton Reviewed-by: Dan Williams Acked-by: Michal Hocko Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: Dave Hansen Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Benjamin Herrenschmidt Cc: Catalin Marinas Cc: Christoph Hellwig Cc: David Hildenbrand Cc: Eric Badger Cc: Jason Gunthorpe Cc: Michael Ellerman Cc: Paul Mackerras Cc: Will Deacon Link: http://lkml.kernel.org/r/20200306170846.9333-4-logang@deltatee.com Signed-off-by: Linus Torvalds --- arch/x86/include/asm/page_types.h | 3 --- arch/x86/include/asm/pgtable.h | 3 +++ arch/x86/kernel/amd_gart_64.c | 3 ++- arch/x86/mm/init.c | 9 +++++---- arch/x86/mm/init_32.c | 3 ++- arch/x86/mm/init_64.c | 32 ++++++++++++++++++-------------- arch/x86/mm/mm_internal.h | 3 ++- arch/x86/platform/uv/bios_uv.c | 3 ++- 8 files changed, 34 insertions(+), 25 deletions(-) diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index e27aa6be63208d..a506a411474d4a 100644 --- a/arch/x86/include/asm/page_types.h +++ b/arch/x86/include/asm/page_types.h @@ -71,9 +71,6 @@ static inline phys_addr_t get_max_mapped(void) bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long end_pfn); -extern unsigned long init_memory_mapping(unsigned long start, - unsigned long end); - extern void initmem_init(void); #endif /* !__ASSEMBLY__ */ diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index abad0da0973a88..4d02e64af1b3f4 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -1081,6 +1081,9 @@ static inline void __meminit init_trampoline_default(void) void __init poking_init(void); +unsigned long init_memory_mapping(unsigned long start, + unsigned long end, pgprot_t prot); + # ifdef CONFIG_RANDOMIZE_MEMORY void __meminit init_trampoline(void); # else diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c index 4e5f502360480e..16133819415c7d 100644 --- a/arch/x86/kernel/amd_gart_64.c +++ b/arch/x86/kernel/amd_gart_64.c @@ -744,7 +744,8 @@ int __init gart_iommu_init(void) start_pfn = PFN_DOWN(aper_base); if (!pfn_range_is_mapped(start_pfn, end_pfn)) - init_memory_mapping(start_pfn<> PAGE_SHIFT, ret >> PAGE_SHIFT); @@ -521,7 +522,7 @@ static unsigned long __init init_range_memory_mapping( */ can_use_brk_pgt = max(start, (u64)pgt_buf_end<= min(end, (u64)pgt_buf_top<> PAGE_SHIFT, - PAGE_KERNEL_LARGE), + prot), init); spin_unlock(&init_mm.page_table_lock); paddr_last = paddr_next; @@ -669,7 +672,7 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end, static unsigned long __meminit phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end, - unsigned long page_size_mask, bool init) + unsigned long page_size_mask, pgprot_t prot, bool init) { unsigned long vaddr, vaddr_end, vaddr_next, paddr_next, paddr_last; @@ -679,7 +682,7 @@ phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end, if (!pgtable_l5_enabled()) return phys_pud_init((pud_t *) p4d_page, paddr, paddr_end, - page_size_mask, init); + page_size_mask, prot, init); for (; vaddr < vaddr_end; vaddr = vaddr_next) { p4d_t *p4d = p4d_page + p4d_index(vaddr); @@ -702,13 +705,13 @@ phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end, if (!p4d_none(*p4d)) { pud = pud_offset(p4d, 0); paddr_last = phys_pud_init(pud, paddr, __pa(vaddr_end), - page_size_mask, init); + page_size_mask, prot, init); continue; } pud = alloc_low_page(); paddr_last = phys_pud_init(pud, paddr, __pa(vaddr_end), - page_size_mask, init); + page_size_mask, prot, init); spin_lock(&init_mm.page_table_lock); p4d_populate_init(&init_mm, p4d, pud, init); @@ -722,7 +725,7 @@ static unsigned long __meminit __kernel_physical_mapping_init(unsigned long paddr_start, unsigned long paddr_end, unsigned long page_size_mask, - bool init) + pgprot_t prot, bool init) { bool pgd_changed = false; unsigned long vaddr, vaddr_start, vaddr_end, vaddr_next, paddr_last; @@ -743,13 +746,13 @@ __kernel_physical_mapping_init(unsigned long paddr_start, paddr_last = phys_p4d_init(p4d, __pa(vaddr), __pa(vaddr_end), page_size_mask, - init); + prot, init); continue; } p4d = alloc_low_page(); paddr_last = phys_p4d_init(p4d, __pa(vaddr), __pa(vaddr_end), - page_size_mask, init); + page_size_mask, prot, init); spin_lock(&init_mm.page_table_lock); if (pgtable_l5_enabled()) @@ -778,10 +781,10 @@ __kernel_physical_mapping_init(unsigned long paddr_start, unsigned long __meminit kernel_physical_mapping_init(unsigned long paddr_start, unsigned long paddr_end, - unsigned long page_size_mask) + unsigned long page_size_mask, pgprot_t prot) { return __kernel_physical_mapping_init(paddr_start, paddr_end, - page_size_mask, true); + page_size_mask, prot, true); } /* @@ -796,7 +799,8 @@ kernel_physical_mapping_change(unsigned long paddr_start, unsigned long page_size_mask) { return __kernel_physical_mapping_init(paddr_start, paddr_end, - page_size_mask, false); + page_size_mask, PAGE_KERNEL, + false); } #ifndef CONFIG_NUMA @@ -863,7 +867,7 @@ int arch_add_memory(int nid, u64 start, u64 size, unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; - init_memory_mapping(start, start + size); + init_memory_mapping(start, start + size, PAGE_KERNEL); return add_pages(nid, start_pfn, nr_pages, params); } diff --git a/arch/x86/mm/mm_internal.h b/arch/x86/mm/mm_internal.h index eeae142062ed4b..3f37b5c80bb32f 100644 --- a/arch/x86/mm/mm_internal.h +++ b/arch/x86/mm/mm_internal.h @@ -12,7 +12,8 @@ void early_ioremap_page_table_range_init(void); unsigned long kernel_physical_mapping_init(unsigned long start, unsigned long end, - unsigned long page_size_mask); + unsigned long page_size_mask, + pgprot_t prot); unsigned long kernel_physical_mapping_change(unsigned long start, unsigned long end, unsigned long page_size_mask); diff --git a/arch/x86/platform/uv/bios_uv.c b/arch/x86/platform/uv/bios_uv.c index 607f58147311c7..c60255da5a6cd0 100644 --- a/arch/x86/platform/uv/bios_uv.c +++ b/arch/x86/platform/uv/bios_uv.c @@ -352,7 +352,8 @@ void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size, if (type == EFI_MEMORY_MAPPED_IO) return ioremap(phys_addr, size); - last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size); + last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size, + PAGE_KERNEL); if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size) { unsigned long top = last_map_pfn << PAGE_SHIFT; efi_ioremap(top, size - (top - phys_addr), type, attribute); -- cgit 1.2.3-korg From 30796e18c29942c4d64bf89c4135c975393ec1ad Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Fri, 10 Apr 2020 14:33:28 -0700 Subject: x86/mm: introduce __set_memory_prot() For use in the 32bit arch_add_memory() to set the pgprot type of the memory to add. Signed-off-by: Logan Gunthorpe Signed-off-by: Andrew Morton Reviewed-by: Dan Williams Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: Dave Hansen Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Benjamin Herrenschmidt Cc: Catalin Marinas Cc: Christoph Hellwig Cc: David Hildenbrand Cc: Eric Badger Cc: Jason Gunthorpe Cc: Michael Ellerman Cc: Michal Hocko Cc: Paul Mackerras Cc: Will Deacon Link: http://lkml.kernel.org/r/20200306170846.9333-5-logang@deltatee.com Signed-off-by: Linus Torvalds --- arch/x86/include/asm/set_memory.h | 1 + arch/x86/mm/pat/set_memory.c | 13 +++++++++++++ 2 files changed, 14 insertions(+) diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h index 950532ccbc4adc..ec2c0a094b5da3 100644 --- a/arch/x86/include/asm/set_memory.h +++ b/arch/x86/include/asm/set_memory.h @@ -34,6 +34,7 @@ * The caller is required to take care of these. */ +int __set_memory_prot(unsigned long addr, int numpages, pgprot_t prot); int _set_memory_uc(unsigned long addr, int numpages); int _set_memory_wc(unsigned long addr, int numpages); int _set_memory_wt(unsigned long addr, int numpages); diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 6d5424069e2b1c..59eca6a94ce796 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -1795,6 +1795,19 @@ static inline int cpa_clear_pages_array(struct page **pages, int numpages, CPA_PAGES_ARRAY, pages); } +/* + * _set_memory_prot is an internal helper for callers that have been passed + * a pgprot_t value from upper layers and a reservation has already been taken. + * If you want to set the pgprot to a specific page protocol, use the + * set_memory_xx() functions. + */ +int __set_memory_prot(unsigned long addr, int numpages, pgprot_t prot) +{ + return change_page_attr_set_clr(&addr, numpages, prot, + __pgprot(~pgprot_val(prot)), 0, 0, + NULL); +} + int _set_memory_uc(unsigned long addr, int numpages) { /* -- cgit 1.2.3-korg From 4e00c5affdd4b04e6392001716333971932f3d0c Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Fri, 10 Apr 2020 14:33:32 -0700 Subject: powerpc/mm: thread pgprot_t through create_section_mapping() In prepartion to support a pgprot_t argument for arch_add_memory(). Signed-off-by: Logan Gunthorpe Signed-off-by: Andrew Morton Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Catalin Marinas Cc: Christoph Hellwig Cc: Dan Williams Cc: Dave Hansen Cc: David Hildenbrand Cc: Eric Badger Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jason Gunthorpe Cc: Michal Hocko Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Link: http://lkml.kernel.org/r/20200306170846.9333-6-logang@deltatee.com Signed-off-by: Linus Torvalds --- arch/powerpc/include/asm/book3s/64/hash.h | 3 ++- arch/powerpc/include/asm/book3s/64/radix.h | 3 ++- arch/powerpc/include/asm/sparsemem.h | 3 ++- arch/powerpc/mm/book3s64/hash_utils.c | 5 +++-- arch/powerpc/mm/book3s64/pgtable.c | 7 ++++--- arch/powerpc/mm/book3s64/radix_pgtable.c | 18 +++++++++++------- arch/powerpc/mm/mem.c | 5 +++-- 7 files changed, 27 insertions(+), 17 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h index 2781ebf6add4fc..6fc4520092c7b5 100644 --- a/arch/powerpc/include/asm/book3s/64/hash.h +++ b/arch/powerpc/include/asm/book3s/64/hash.h @@ -251,7 +251,8 @@ extern int __meminit hash__vmemmap_create_mapping(unsigned long start, extern void hash__vmemmap_remove_mapping(unsigned long start, unsigned long page_size); -int hash__create_section_mapping(unsigned long start, unsigned long end, int nid); +int hash__create_section_mapping(unsigned long start, unsigned long end, + int nid, pgprot_t prot); int hash__remove_section_mapping(unsigned long start, unsigned long end); #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h index a1c60d5b50af7d..08c222d5b764b4 100644 --- a/arch/powerpc/include/asm/book3s/64/radix.h +++ b/arch/powerpc/include/asm/book3s/64/radix.h @@ -294,7 +294,8 @@ static inline unsigned long radix__get_tree_size(void) } #ifdef CONFIG_MEMORY_HOTPLUG -int radix__create_section_mapping(unsigned long start, unsigned long end, int nid); +int radix__create_section_mapping(unsigned long start, unsigned long end, + int nid, pgprot_t prot); int radix__remove_section_mapping(unsigned long start, unsigned long end); #endif /* CONFIG_MEMORY_HOTPLUG */ #endif /* __ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/sparsemem.h b/arch/powerpc/include/asm/sparsemem.h index 3192d454a733ad..c89b32443cff30 100644 --- a/arch/powerpc/include/asm/sparsemem.h +++ b/arch/powerpc/include/asm/sparsemem.h @@ -13,7 +13,8 @@ #endif /* CONFIG_SPARSEMEM */ #ifdef CONFIG_MEMORY_HOTPLUG -extern int create_section_mapping(unsigned long start, unsigned long end, int nid); +extern int create_section_mapping(unsigned long start, unsigned long end, + int nid, pgprot_t prot); extern int remove_section_mapping(unsigned long start, unsigned long end); #ifdef CONFIG_PPC_BOOK3S_64 diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 7e5714a69a5842..8ed2411c3f3956 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -809,7 +809,8 @@ int resize_hpt_for_hotplug(unsigned long new_mem_size) return 0; } -int hash__create_section_mapping(unsigned long start, unsigned long end, int nid) +int hash__create_section_mapping(unsigned long start, unsigned long end, + int nid, pgprot_t prot) { int rc; @@ -819,7 +820,7 @@ int hash__create_section_mapping(unsigned long start, unsigned long end, int nid } rc = htab_bolt_mapping(start, end, __pa(start), - pgprot_val(PAGE_KERNEL), mmu_linear_psize, + pgprot_val(prot), mmu_linear_psize, mmu_kernel_ssize); if (rc < 0) { diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c index 2bf7e1b4fd826c..e0bb69c616e4a2 100644 --- a/arch/powerpc/mm/book3s64/pgtable.c +++ b/arch/powerpc/mm/book3s64/pgtable.c @@ -171,12 +171,13 @@ void mmu_cleanup_all(void) } #ifdef CONFIG_MEMORY_HOTPLUG -int __meminit create_section_mapping(unsigned long start, unsigned long end, int nid) +int __meminit create_section_mapping(unsigned long start, unsigned long end, + int nid, pgprot_t prot) { if (radix_enabled()) - return radix__create_section_mapping(start, end, nid); + return radix__create_section_mapping(start, end, nid, prot); - return hash__create_section_mapping(start, end, nid); + return hash__create_section_mapping(start, end, nid, prot); } int __meminit remove_section_mapping(unsigned long start, unsigned long end) diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index 2a9a0cd7949009..8f9edf07063ad5 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -254,7 +254,7 @@ static unsigned long next_boundary(unsigned long addr, unsigned long end) static int __meminit create_physical_mapping(unsigned long start, unsigned long end, - int nid) + int nid, pgprot_t _prot) { unsigned long vaddr, addr, mapping_size = 0; bool prev_exec, exec = false; @@ -290,7 +290,7 @@ static int __meminit create_physical_mapping(unsigned long start, prot = PAGE_KERNEL_X; exec = true; } else { - prot = PAGE_KERNEL; + prot = _prot; exec = false; } @@ -334,7 +334,7 @@ static void __init radix_init_pgtable(void) WARN_ON(create_physical_mapping(reg->base, reg->base + reg->size, - -1)); + -1, PAGE_KERNEL)); } /* Find out how many PID bits are supported */ @@ -713,8 +713,10 @@ static int __meminit stop_machine_change_mapping(void *data) spin_unlock(&init_mm.page_table_lock); pte_clear(&init_mm, params->aligned_start, params->pte); - create_physical_mapping(__pa(params->aligned_start), __pa(params->start), -1); - create_physical_mapping(__pa(params->end), __pa(params->aligned_end), -1); + create_physical_mapping(__pa(params->aligned_start), + __pa(params->start), -1, PAGE_KERNEL); + create_physical_mapping(__pa(params->end), __pa(params->aligned_end), + -1, PAGE_KERNEL); spin_lock(&init_mm.page_table_lock); return 0; } @@ -871,14 +873,16 @@ static void __meminit remove_pagetable(unsigned long start, unsigned long end) radix__flush_tlb_kernel_range(start, end); } -int __meminit radix__create_section_mapping(unsigned long start, unsigned long end, int nid) +int __meminit radix__create_section_mapping(unsigned long start, + unsigned long end, int nid, + pgprot_t prot) { if (end >= RADIX_VMALLOC_START) { pr_warn("Outside the supported range\n"); return -1; } - return create_physical_mapping(__pa(start), __pa(end), nid); + return create_physical_mapping(__pa(start), __pa(end), nid, prot); } int __meminit radix__remove_section_mapping(unsigned long start, unsigned long end) diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index e1cc581158163d..bf63ab04db63ed 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -90,7 +90,8 @@ int memory_add_physaddr_to_nid(u64 start) } #endif -int __weak create_section_mapping(unsigned long start, unsigned long end, int nid) +int __weak create_section_mapping(unsigned long start, unsigned long end, + int nid, pgprot_t prot) { return -ENODEV; } @@ -131,7 +132,7 @@ int __ref arch_add_memory(int nid, u64 start, u64 size, resize_hpt_for_hotplug(memblock_phys_mem_size()); start = (unsigned long)__va(start); - rc = create_section_mapping(start, start + size, nid); + rc = create_section_mapping(start, start + size, nid, PAGE_KERNEL); if (rc) { pr_warn("Unable to create mapping for hot added memory 0x%llx..0x%llx: %d\n", start, start + size, rc); -- cgit 1.2.3-korg From bfeb022f8fe4c5afdcfd7a3d868fac9765f9bcad Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Fri, 10 Apr 2020 14:33:36 -0700 Subject: mm/memory_hotplug: add pgprot_t to mhp_params devm_memremap_pages() is currently used by the PCI P2PDMA code to create struct page mappings for IO memory. At present, these mappings are created with PAGE_KERNEL which implies setting the PAT bits to be WB. However, on x86, an mtrr register will typically override this and force the cache type to be UC-. In the case firmware doesn't set this register it is effectively WB and will typically result in a machine check exception when it's accessed. Other arches are not currently likely to function correctly seeing they don't have any MTRR registers to fall back on. To solve this, provide a way to specify the pgprot value explicitly to arch_add_memory(). Of the arches that support MEMORY_HOTPLUG: x86_64, and arm64 need a simple change to pass the pgprot_t down to their respective functions which set up the page tables. For x86_32, set the page tables explicitly using _set_memory_prot() (seeing they are already mapped). For ia64, s390 and sh, reject anything but PAGE_KERNEL settings -- this should be fine, for now, seeing these architectures don't support ZONE_DEVICE. A check in __add_pages() is also added to ensure the pgprot parameter was set for all arches. Signed-off-by: Logan Gunthorpe Signed-off-by: Andrew Morton Acked-by: David Hildenbrand Acked-by: Michal Hocko Acked-by: Dan Williams Cc: Andy Lutomirski Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Catalin Marinas Cc: Christoph Hellwig Cc: Dave Hansen Cc: Eric Badger Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jason Gunthorpe Cc: Michael Ellerman Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Link: http://lkml.kernel.org/r/20200306170846.9333-7-logang@deltatee.com Signed-off-by: Linus Torvalds --- arch/arm64/mm/mmu.c | 3 ++- arch/ia64/mm/init.c | 3 +++ arch/powerpc/mm/mem.c | 3 ++- arch/s390/mm/init.c | 3 +++ arch/sh/mm/init.c | 3 +++ arch/x86/mm/init_32.c | 12 ++++++++++++ arch/x86/mm/init_64.c | 2 +- include/linux/memory_hotplug.h | 3 +++ mm/memory_hotplug.c | 5 ++++- mm/memremap.c | 6 +++--- 10 files changed, 36 insertions(+), 7 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 6d4e9c2b4ed015..a374e4f51a6259 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -1382,7 +1382,8 @@ int arch_add_memory(int nid, u64 start, u64 size, flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; __create_pgd_mapping(swapper_pg_dir, start, __phys_to_virt(start), - size, PAGE_KERNEL, __pgd_pgtable_alloc, flags); + size, params->pgprot, __pgd_pgtable_alloc, + flags); memblock_clear_nomap(start, size); diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 97bbc23ea1e3d8..d637b4ea314773 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -676,6 +676,9 @@ int arch_add_memory(int nid, u64 start, u64 size, unsigned long nr_pages = size >> PAGE_SHIFT; int ret; + if (WARN_ON_ONCE(params->pgprot.pgprot != PAGE_KERNEL.pgprot)) + return -EINVAL; + ret = __add_pages(nid, start_pfn, nr_pages, params); if (ret) printk("%s: Problem encountered in __add_pages() as ret=%d\n", diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index bf63ab04db63ed..041ed7cfd341a2 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -132,7 +132,8 @@ int __ref arch_add_memory(int nid, u64 start, u64 size, resize_hpt_for_hotplug(memblock_phys_mem_size()); start = (unsigned long)__va(start); - rc = create_section_mapping(start, start + size, nid, PAGE_KERNEL); + rc = create_section_mapping(start, start + size, nid, + params->pgprot); if (rc) { pr_warn("Unable to create mapping for hot added memory 0x%llx..0x%llx: %d\n", start, start + size, rc); diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index e9e4a7abd0cc55..87b2d024e75af4 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -277,6 +277,9 @@ int arch_add_memory(int nid, u64 start, u64 size, if (WARN_ON_ONCE(params->altmap)) return -EINVAL; + if (WARN_ON_ONCE(params->pgprot.pgprot != PAGE_KERNEL.pgprot)) + return -EINVAL; + rc = vmem_add_mapping(start, size); if (rc) return rc; diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index e5114c053364b1..b9de2d4fa57e34 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -412,6 +412,9 @@ int arch_add_memory(int nid, u64 start, u64 size, unsigned long nr_pages = size >> PAGE_SHIFT; int ret; + if (WARN_ON_ONCE(params->pgprot.pgprot != PAGE_KERNEL.pgprot) + return -EINVAL; + /* We only have ZONE_NORMAL, so this is easy.. */ ret = __add_pages(nid, start_pfn, nr_pages, params); if (unlikely(ret)) diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index ac75a839780463..4222a010057a9c 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -824,6 +824,18 @@ int arch_add_memory(int nid, u64 start, u64 size, { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; + int ret; + + /* + * The page tables were already mapped at boot so if the caller + * requests a different mapping type then we must change all the + * pages with __set_memory_prot(). + */ + if (params->pgprot.pgprot != PAGE_KERNEL.pgprot) { + ret = __set_memory_prot(start, nr_pages, params->pgprot); + if (ret) + return ret; + } return __add_pages(nid, start_pfn, nr_pages, params); } diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 7480de74310573..3b289c2f75cdf9 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -867,7 +867,7 @@ int arch_add_memory(int nid, u64 start, u64 size, unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; - init_memory_mapping(start, start + size, PAGE_KERNEL); + init_memory_mapping(start, start + size, params->pgprot); return add_pages(nid, start_pfn, nr_pages, params); } diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 75f0f6304735b8..93d9ada74ddd69 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -60,9 +60,12 @@ enum { /* * Extended parameters for memory hotplug: * altmap: alternative allocator for memmap array (optional) + * pgprot: page protection flags to apply to newly created page tables + * (required) */ struct mhp_params { struct vmem_altmap *altmap; + pgprot_t pgprot; }; /* diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index fbfe7b40f552dd..fc0aad0bc1f548 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -311,6 +311,9 @@ int __ref __add_pages(int nid, unsigned long pfn, unsigned long nr_pages, int err; struct vmem_altmap *altmap = params->altmap; + if (WARN_ON_ONCE(!params->pgprot.pgprot)) + return -EINVAL; + err = check_hotplug_memory_addressable(pfn, nr_pages); if (err) return err; @@ -1002,7 +1005,7 @@ static int online_memory_block(struct memory_block *mem, void *arg) */ int __ref add_memory_resource(int nid, struct resource *res) { - struct mhp_params params = {}; + struct mhp_params params = { .pgprot = PAGE_KERNEL }; u64 start, size; bool new_node = false; int ret; diff --git a/mm/memremap.c b/mm/memremap.c index b0b5170843ffeb..bc167cde32379b 100644 --- a/mm/memremap.c +++ b/mm/memremap.c @@ -189,8 +189,8 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid) * We do not want any optional features only our own memmap */ .altmap = pgmap_altmap(pgmap), + .pgprot = PAGE_KERNEL, }; - pgprot_t pgprot = PAGE_KERNEL; int error, is_ram; bool need_devmap_managed = true; @@ -282,8 +282,8 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid) if (nid < 0) nid = numa_mem_id(); - error = track_pfn_remap(NULL, &pgprot, PHYS_PFN(res->start), 0, - resource_size(res)); + error = track_pfn_remap(NULL, ¶ms.pgprot, PHYS_PFN(res->start), + 0, resource_size(res)); if (error) goto err_pfn_remap; -- cgit 1.2.3-korg From a50d8d98a87f33efa07adfa20747e13a93839a4b Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Fri, 10 Apr 2020 14:33:39 -0700 Subject: mm/memremap: set caching mode for PCI P2PDMA memory to WC PCI BAR IO memory should never be mapped as WB, however prior to this the PAT bits were set WB and it was typically overridden by MTRR registers set by the firmware. Set PCI P2PDMA memory to be UC as this is what it currently, typically, ends up being mapped as on x86 after the MTRR registers override the cache setting. Future use-cases may need to generalize this by adding flags to select the caching type, as some P2PDMA cases may not want UC. However, those use-cases are not upstream yet and this can be changed when they arrive. Signed-off-by: Logan Gunthorpe Signed-off-by: Andrew Morton Reviewed-by: Dan Williams Cc: Christoph Hellwig Cc: Jason Gunthorpe Cc: Andy Lutomirski Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Catalin Marinas Cc: Dave Hansen Cc: David Hildenbrand Cc: Eric Badger Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Michael Ellerman Cc: Michal Hocko Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Link: http://lkml.kernel.org/r/20200306170846.9333-8-logang@deltatee.com Signed-off-by: Linus Torvalds --- mm/memremap.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/memremap.c b/mm/memremap.c index bc167cde32379b..03e38b7a38f1a4 100644 --- a/mm/memremap.c +++ b/mm/memremap.c @@ -217,7 +217,10 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid) } break; case MEMORY_DEVICE_DEVDAX: + need_devmap_managed = false; + break; case MEMORY_DEVICE_PCI_P2PDMA: + params.pgprot = pgprot_noncached(params.pgprot); need_devmap_managed = false; break; default: -- cgit 1.2.3-korg From d7d27cfc5cf0766a26a8f56868c5ad5434735126 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 10 Apr 2020 14:33:43 -0700 Subject: kmod: make request_module() return an error when autoloading is disabled Patch series "module autoloading fixes and cleanups", v5. This series fixes a bug where request_module() was reporting success to kernel code when module autoloading had been completely disabled via 'echo > /proc/sys/kernel/modprobe'. It also addresses the issues raised on the original thread (https://lkml.kernel.org/lkml/20200310223731.126894-1-ebiggers@kernel.org/T/#u) bydocumenting the modprobe sysctl, adding a self-test for the empty path case, and downgrading a user-reachable WARN_ONCE(). This patch (of 4): It's long been possible to disable kernel module autoloading completely (while still allowing manual module insertion) by setting /proc/sys/kernel/modprobe to the empty string. This can be preferable to setting it to a nonexistent file since it avoids the overhead of an attempted execve(), avoids potential deadlocks, and avoids the call to security_kernel_module_request() and thus on SELinux-based systems eliminates the need to write SELinux rules to dontaudit module_request. However, when module autoloading is disabled in this way, request_module() returns 0. This is broken because callers expect 0 to mean that the module was successfully loaded. Apparently this was never noticed because this method of disabling module autoloading isn't used much, and also most callers don't use the return value of request_module() since it's always necessary to check whether the module registered its functionality or not anyway. But improperly returning 0 can indeed confuse a few callers, for example get_fs_type() in fs/filesystems.c where it causes a WARNING to be hit: if (!fs && (request_module("fs-%.*s", len, name) == 0)) { fs = __get_fs_type(name, len); WARN_ONCE(!fs, "request_module fs-%.*s succeeded, but still no fs?\n", len, name); } This is easily reproduced with: echo > /proc/sys/kernel/modprobe mount -t NONEXISTENT none / It causes: request_module fs-NONEXISTENT succeeded, but still no fs? WARNING: CPU: 1 PID: 1106 at fs/filesystems.c:275 get_fs_type+0xd6/0xf0 [...] This should actually use pr_warn_once() rather than WARN_ONCE(), since it's also user-reachable if userspace immediately unloads the module. Regardless, request_module() should correctly return an error when it fails. So let's make it return -ENOENT, which matches the error when the modprobe binary doesn't exist. I've also sent patches to document and test this case. Signed-off-by: Eric Biggers Signed-off-by: Andrew Morton Reviewed-by: Kees Cook Reviewed-by: Jessica Yu Acked-by: Luis Chamberlain Cc: Alexei Starovoitov Cc: Greg Kroah-Hartman Cc: Jeff Vander Stoep Cc: Ben Hutchings Cc: Josh Triplett Cc: Link: http://lkml.kernel.org/r/20200310223731.126894-1-ebiggers@kernel.org Link: http://lkml.kernel.org/r/20200312202552.241885-1-ebiggers@kernel.org Signed-off-by: Linus Torvalds --- kernel/kmod.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/kmod.c b/kernel/kmod.c index 8b2b311afa957d..37c3c4b97b8ea6 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -120,7 +120,7 @@ out: * invoke it. * * If module auto-loading support is disabled then this function - * becomes a no-operation. + * simply returns -ENOENT. */ int __request_module(bool wait, const char *fmt, ...) { @@ -137,7 +137,7 @@ int __request_module(bool wait, const char *fmt, ...) WARN_ON_ONCE(wait && current_is_async()); if (!modprobe_path[0]) - return 0; + return -ENOENT; va_start(args, fmt); ret = vsnprintf(module_name, MODULE_NAME_LEN, fmt, args); -- cgit 1.2.3-korg From 26c5d78c976ca298e59a56f6101a97b618ba3539 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 10 Apr 2020 14:33:47 -0700 Subject: fs/filesystems.c: downgrade user-reachable WARN_ONCE() to pr_warn_once() After request_module(), nothing is stopping the module from being unloaded until someone takes a reference to it via try_get_module(). The WARN_ONCE() in get_fs_type() is thus user-reachable, via userspace running 'rmmod' concurrently. Since WARN_ONCE() is for kernel bugs only, not for user-reachable situations, downgrade this warning to pr_warn_once(). Keep it printed once only, since the intent of this warning is to detect a bug in modprobe at boot time. Printing the warning more than once wouldn't really provide any useful extra information. Fixes: 41124db869b7 ("fs: warn in case userspace lied about modprobe return") Signed-off-by: Eric Biggers Signed-off-by: Andrew Morton Reviewed-by: Jessica Yu Cc: Alexei Starovoitov Cc: Greg Kroah-Hartman Cc: Jeff Vander Stoep Cc: Jessica Yu Cc: Kees Cook Cc: Luis Chamberlain Cc: NeilBrown Cc: [4.13+] Link: http://lkml.kernel.org/r/20200312202552.241885-3-ebiggers@kernel.org Signed-off-by: Linus Torvalds --- fs/filesystems.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/filesystems.c b/fs/filesystems.c index 77bf5f95362da5..90b8d879fbaf3d 100644 --- a/fs/filesystems.c +++ b/fs/filesystems.c @@ -272,7 +272,9 @@ struct file_system_type *get_fs_type(const char *name) fs = __get_fs_type(name, len); if (!fs && (request_module("fs-%.*s", len, name) == 0)) { fs = __get_fs_type(name, len); - WARN_ONCE(!fs, "request_module fs-%.*s succeeded, but still no fs?\n", len, name); + if (!fs) + pr_warn_once("request_module fs-%.*s succeeded, but still no fs?\n", + len, name); } if (dot && fs && !(fs->fs_flags & FS_HAS_SUBTYPE)) { -- cgit 1.2.3-korg From 6e71582506258c9b2efd8f164706f2af2256cf16 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 10 Apr 2020 14:33:50 -0700 Subject: docs: admin-guide: document the kernel.modprobe sysctl Document the kernel.modprobe sysctl in the same place that all the other kernel.* sysctls are documented. Make sure to mention how to use this sysctl to completely disable module autoloading, and how this sysctl relates to CONFIG_STATIC_USERMODEHELPER. [ebiggers@google.com: v5] Link: http://lkml.kernel.org/r/20200318230515.171692-4-ebiggers@kernel.org Signed-off-by: Eric Biggers Signed-off-by: Andrew Morton Cc: Alexei Starovoitov Cc: Greg Kroah-Hartman Cc: Jeff Vander Stoep Cc: Jessica Yu Cc: Kees Cook Cc: Luis Chamberlain Cc: NeilBrown Link: http://lkml.kernel.org/r/20200312202552.241885-4-ebiggers@kernel.org Signed-off-by: Linus Torvalds --- Documentation/admin-guide/sysctl/kernel.rst | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index 335696d3360da6..39c95c0e13d30d 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -446,6 +446,27 @@ Notes: successful IPC object allocation. If an IPC object allocation syscall fails, it is undefined if the value remains unmodified or is reset to -1. +modprobe: +========= + +The path to the usermode helper for autoloading kernel modules, by +default "/sbin/modprobe". This binary is executed when the kernel +requests a module. For example, if userspace passes an unknown +filesystem type to mount(), then the kernel will automatically request +the corresponding filesystem module by executing this usermode helper. +This usermode helper should insert the needed module into the kernel. + +This sysctl only affects module autoloading. It has no effect on the +ability to explicitly insert modules. + +If this sysctl is set to the empty string, then module autoloading is +completely disabled. The kernel will not try to execute a usermode +helper at all, nor will it call the kernel_module_request LSM hook. + +If CONFIG_STATIC_USERMODEHELPER=y is set in the kernel configuration, +then the configured static usermode helper overrides this sysctl, +except that the empty string is still accepted to completely disable +module autoloading as described above. nmi_watchdog ============ -- cgit 1.2.3-korg From 6d573a07528308eb77ec072c010819c359bebf6e Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 10 Apr 2020 14:33:53 -0700 Subject: selftests: kmod: fix handling test numbers above 9 get_test_count() and get_test_enabled() were broken for test numbers above 9 due to awk interpreting a field specification like '$0010' as octal rather than decimal. Fix it by stripping the leading zeroes. Signed-off-by: Eric Biggers Signed-off-by: Andrew Morton Acked-by: Luis Chamberlain Cc: Alexei Starovoitov Cc: Greg Kroah-Hartman Cc: Jeff Vander Stoep Cc: Jessica Yu Cc: Kees Cook Cc: NeilBrown Link: http://lkml.kernel.org/r/20200318230515.171692-5-ebiggers@kernel.org Signed-off-by: Linus Torvalds --- tools/testing/selftests/kmod/kmod.sh | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/kmod/kmod.sh b/tools/testing/selftests/kmod/kmod.sh index 8b944cf042f6c8..315a43111e0467 100755 --- a/tools/testing/selftests/kmod/kmod.sh +++ b/tools/testing/selftests/kmod/kmod.sh @@ -505,18 +505,23 @@ function test_num() fi } -function get_test_count() +function get_test_data() { test_num $1 - TEST_DATA=$(echo $ALL_TESTS | awk '{print $'$1'}') + local field_num=$(echo $1 | sed 's/^0*//') + echo $ALL_TESTS | awk '{print $'$field_num'}' +} + +function get_test_count() +{ + TEST_DATA=$(get_test_data $1) LAST_TWO=${TEST_DATA#*:*} echo ${LAST_TWO%:*} } function get_test_enabled() { - test_num $1 - TEST_DATA=$(echo $ALL_TESTS | awk '{print $'$1'}') + TEST_DATA=$(get_test_data $1) echo ${TEST_DATA#*:*:} } -- cgit 1.2.3-korg From 23756e551f35aaa9400a7e8a2660494115221801 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 10 Apr 2020 14:33:57 -0700 Subject: selftests: kmod: test disabling module autoloading Test that request_module() fails with -ENOENT when /proc/sys/kernel/modprobe contains (a) a nonexistent path, and (b) an empty path. Case (b) is a regression test for the patch "kmod: make request_module() return an error when autoloading is disabled". Tested with 'kmod.sh -t 0010 && kmod.sh -t 0011', and also simply with 'kmod.sh' to run all kmod tests. Signed-off-by: Eric Biggers Signed-off-by: Andrew Morton Acked-by: Luis Chamberlain Cc: Alexei Starovoitov Cc: Greg Kroah-Hartman Cc: Jeff Vander Stoep Cc: Jessica Yu Cc: Kees Cook Cc: NeilBrown Link: http://lkml.kernel.org/r/20200312202552.241885-5-ebiggers@kernel.org Signed-off-by: Linus Torvalds --- tools/testing/selftests/kmod/kmod.sh | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/tools/testing/selftests/kmod/kmod.sh b/tools/testing/selftests/kmod/kmod.sh index 315a43111e0467..3702dbcc90a773 100755 --- a/tools/testing/selftests/kmod/kmod.sh +++ b/tools/testing/selftests/kmod/kmod.sh @@ -61,6 +61,8 @@ ALL_TESTS="$ALL_TESTS 0006:10:1" ALL_TESTS="$ALL_TESTS 0007:5:1" ALL_TESTS="$ALL_TESTS 0008:150:1" ALL_TESTS="$ALL_TESTS 0009:150:1" +ALL_TESTS="$ALL_TESTS 0010:1:1" +ALL_TESTS="$ALL_TESTS 0011:1:1" # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 @@ -149,6 +151,7 @@ function load_req_mod() test_finish() { + echo "$MODPROBE" > /proc/sys/kernel/modprobe echo "Test completed" } @@ -443,6 +446,30 @@ kmod_test_0009() config_expect_result ${FUNCNAME[0]} SUCCESS } +kmod_test_0010() +{ + kmod_defaults_driver + config_num_threads 1 + echo "/KMOD_TEST_NONEXISTENT" > /proc/sys/kernel/modprobe + config_trigger ${FUNCNAME[0]} + config_expect_result ${FUNCNAME[0]} -ENOENT + echo "$MODPROBE" > /proc/sys/kernel/modprobe +} + +kmod_test_0011() +{ + kmod_defaults_driver + config_num_threads 1 + # This causes the kernel to not even try executing modprobe. The error + # code is still -ENOENT like when modprobe doesn't exist, so we can't + # easily test for the exact difference. But this still is a useful test + # since there was a bug where request_module() returned 0 in this case. + echo > /proc/sys/kernel/modprobe + config_trigger ${FUNCNAME[0]} + config_expect_result ${FUNCNAME[0]} -ENOENT + echo "$MODPROBE" > /proc/sys/kernel/modprobe +} + list_tests() { echo "Test ID list:" @@ -460,6 +487,8 @@ list_tests() echo "0007 x $(get_test_count 0007) - multithreaded tests with default setup test request_module() and get_fs_type()" echo "0008 x $(get_test_count 0008) - multithreaded - push kmod_concurrent over max_modprobes for request_module()" echo "0009 x $(get_test_count 0009) - multithreaded - push kmod_concurrent over max_modprobes for get_fs_type()" + echo "0010 x $(get_test_count 0010) - test nonexistent modprobe path" + echo "0011 x $(get_test_count 0011) - test completely disabling module autoloading" } usage() @@ -616,6 +645,7 @@ test_reqs allow_user_defaults load_req_mod +MODPROBE=$( Date: Fri, 10 Apr 2020 14:34:00 -0700 Subject: change email address for Pali Rohár MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For security reasons I stopped using gmail account and kernel address is now up-to-date alias to my personal address. People periodically send me emails to address which they found in source code of drivers, so this change reflects state where people can contact me. [ Added .mailmap entry as per Joe Perches - Linus ] Signed-off-by: Pali Rohár Signed-off-by: Andrew Morton Cc: Greg Kroah-Hartman Cc: Joe Perches Link: http://lkml.kernel.org/r/20200307104237.8199-1-pali@kernel.org Signed-off-by: Linus Torvalds --- .mailmap | 1 + Documentation/ABI/testing/sysfs-platform-dell-laptop | 8 ++++---- MAINTAINERS | 16 ++++++++-------- arch/arm/mach-omap2/omap-secure.c | 2 +- arch/arm/mach-omap2/omap-secure.h | 2 +- arch/arm/mach-omap2/omap-smc.S | 2 +- drivers/char/hw_random/omap3-rom-rng.c | 4 ++-- drivers/hwmon/dell-smm-hwmon.c | 4 ++-- drivers/platform/x86/dell-laptop.c | 4 ++-- drivers/platform/x86/dell-rbtn.c | 4 ++-- drivers/platform/x86/dell-rbtn.h | 2 +- drivers/platform/x86/dell-smbios-base.c | 4 ++-- drivers/platform/x86/dell-smbios-smm.c | 2 +- drivers/platform/x86/dell-smbios.h | 2 +- drivers/platform/x86/dell-smo8800.c | 2 +- drivers/platform/x86/dell-wmi.c | 4 ++-- drivers/power/supply/bq2415x_charger.c | 4 ++-- drivers/power/supply/bq27xxx_battery.c | 2 +- drivers/power/supply/isp1704_charger.c | 2 +- drivers/power/supply/rx51_battery.c | 4 ++-- fs/udf/ecma_167.h | 2 +- fs/udf/osta_udf.h | 2 +- include/linux/power/bq2415x_charger.h | 2 +- tools/laptop/freefall/freefall.c | 2 +- 24 files changed, 42 insertions(+), 41 deletions(-) diff --git a/.mailmap b/.mailmap index 9198a93c2f5cb0..893266d1f7b00c 100644 --- a/.mailmap +++ b/.mailmap @@ -210,6 +210,7 @@ Oleksij Rempel Oleksij Rempel Oleksij Rempel Oleksij Rempel +Pali Rohár Paolo 'Blaisorblade' Giarrusso Patrick Mochel Paul Burton diff --git a/Documentation/ABI/testing/sysfs-platform-dell-laptop b/Documentation/ABI/testing/sysfs-platform-dell-laptop index 8c6a0b8e113133..9b917c7453dee7 100644 --- a/Documentation/ABI/testing/sysfs-platform-dell-laptop +++ b/Documentation/ABI/testing/sysfs-platform-dell-laptop @@ -2,7 +2,7 @@ What: /sys/class/leds/dell::kbd_backlight/als_enabled Date: December 2014 KernelVersion: 3.19 Contact: Gabriele Mazzotta , - Pali Rohár + Pali Rohár Description: This file allows to control the automatic keyboard illumination mode on some systems that have an ambient @@ -13,7 +13,7 @@ What: /sys/class/leds/dell::kbd_backlight/als_setting Date: December 2014 KernelVersion: 3.19 Contact: Gabriele Mazzotta , - Pali Rohár + Pali Rohár Description: This file allows to specifiy the on/off threshold value, as reported by the ambient light sensor. @@ -22,7 +22,7 @@ What: /sys/class/leds/dell::kbd_backlight/start_triggers Date: December 2014 KernelVersion: 3.19 Contact: Gabriele Mazzotta , - Pali Rohár + Pali Rohár Description: This file allows to control the input triggers that turn on the keyboard backlight illumination that is @@ -45,7 +45,7 @@ What: /sys/class/leds/dell::kbd_backlight/stop_timeout Date: December 2014 KernelVersion: 3.19 Contact: Gabriele Mazzotta , - Pali Rohár + Pali Rohár Description: This file allows to specify the interval after which the keyboard illumination is disabled because of inactivity. diff --git a/MAINTAINERS b/MAINTAINERS index d5b1878f28154a..ff043097ea0e4d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -727,7 +727,7 @@ L: linux-alpha@vger.kernel.org F: arch/alpha/ ALPS PS/2 TOUCHPAD DRIVER -R: Pali Rohár +R: Pali Rohár F: drivers/input/mouse/alps.* ALTERA I2C CONTROLLER DRIVER @@ -4774,23 +4774,23 @@ F: drivers/net/fddi/defza.* DELL LAPTOP DRIVER M: Matthew Garrett -M: Pali Rohár +M: Pali Rohár L: platform-driver-x86@vger.kernel.org S: Maintained F: drivers/platform/x86/dell-laptop.c DELL LAPTOP FREEFALL DRIVER -M: Pali Rohár +M: Pali Rohár S: Maintained F: drivers/platform/x86/dell-smo8800.c DELL LAPTOP RBTN DRIVER -M: Pali Rohár +M: Pali Rohár S: Maintained F: drivers/platform/x86/dell-rbtn.* DELL LAPTOP SMM DRIVER -M: Pali Rohár +M: Pali Rohár S: Maintained F: drivers/hwmon/dell-smm-hwmon.c F: include/uapi/linux/i8k.h @@ -4802,7 +4802,7 @@ S: Maintained F: drivers/platform/x86/dell_rbu.c DELL SMBIOS DRIVER -M: Pali Rohár +M: Pali Rohár M: Mario Limonciello L: platform-driver-x86@vger.kernel.org S: Maintained @@ -4835,7 +4835,7 @@ F: drivers/platform/x86/dell-wmi-descriptor.c DELL WMI NOTIFICATIONS DRIVER M: Matthew Garrett -M: Pali Rohár +M: Pali Rohár S: Maintained F: drivers/platform/x86/dell-wmi.c @@ -11950,7 +11950,7 @@ F: drivers/media/i2c/et8ek8 F: drivers/media/i2c/ad5820.c NOKIA N900 POWER SUPPLY DRIVERS -R: Pali Rohár +R: Pali Rohár F: include/linux/power/bq2415x_charger.h F: include/linux/power/bq27xxx_battery.h F: drivers/power/supply/bq2415x_charger.c diff --git a/arch/arm/mach-omap2/omap-secure.c b/arch/arm/mach-omap2/omap-secure.c index d00e3c72e37dd2..f70d561f37f713 100644 --- a/arch/arm/mach-omap2/omap-secure.c +++ b/arch/arm/mach-omap2/omap-secure.c @@ -5,7 +5,7 @@ * Copyright (C) 2011 Texas Instruments, Inc. * Santosh Shilimkar * Copyright (C) 2012 Ivaylo Dimitrov - * Copyright (C) 2013 Pali Rohár + * Copyright (C) 2013 Pali Rohár */ #include diff --git a/arch/arm/mach-omap2/omap-secure.h b/arch/arm/mach-omap2/omap-secure.h index ba8c486c0454bd..4aaa95706d39f2 100644 --- a/arch/arm/mach-omap2/omap-secure.h +++ b/arch/arm/mach-omap2/omap-secure.h @@ -5,7 +5,7 @@ * Copyright (C) 2011 Texas Instruments, Inc. * Santosh Shilimkar * Copyright (C) 2012 Ivaylo Dimitrov - * Copyright (C) 2013 Pali Rohár + * Copyright (C) 2013 Pali Rohár */ #ifndef OMAP_ARCH_OMAP_SECURE_H #define OMAP_ARCH_OMAP_SECURE_H diff --git a/arch/arm/mach-omap2/omap-smc.S b/arch/arm/mach-omap2/omap-smc.S index d4832845a4e8b7..7376f528034db1 100644 --- a/arch/arm/mach-omap2/omap-smc.S +++ b/arch/arm/mach-omap2/omap-smc.S @@ -6,7 +6,7 @@ * Written by Santosh Shilimkar * * Copyright (C) 2012 Ivaylo Dimitrov - * Copyright (C) 2013 Pali Rohár + * Copyright (C) 2013 Pali Rohár */ #include diff --git a/drivers/char/hw_random/omap3-rom-rng.c b/drivers/char/hw_random/omap3-rom-rng.c index a431c5cbe2be9d..e0d77fa048fb6c 100644 --- a/drivers/char/hw_random/omap3-rom-rng.c +++ b/drivers/char/hw_random/omap3-rom-rng.c @@ -4,7 +4,7 @@ * Copyright (C) 2009 Nokia Corporation * Author: Juha Yrjola * - * Copyright (C) 2013 Pali Rohár + * Copyright (C) 2013 Pali Rohár * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any @@ -178,5 +178,5 @@ module_platform_driver(omap3_rom_rng_driver); MODULE_ALIAS("platform:omap3-rom-rng"); MODULE_AUTHOR("Juha Yrjola"); -MODULE_AUTHOR("Pali Rohár "); +MODULE_AUTHOR("Pali Rohár "); MODULE_LICENSE("GPL"); diff --git a/drivers/hwmon/dell-smm-hwmon.c b/drivers/hwmon/dell-smm-hwmon.c index d4c83009d62520..ab719d372b0dee 100644 --- a/drivers/hwmon/dell-smm-hwmon.c +++ b/drivers/hwmon/dell-smm-hwmon.c @@ -7,7 +7,7 @@ * Hwmon integration: * Copyright (C) 2011 Jean Delvare * Copyright (C) 2013, 2014 Guenter Roeck - * Copyright (C) 2014, 2015 Pali Rohár + * Copyright (C) 2014, 2015 Pali Rohár */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -86,7 +86,7 @@ static unsigned int auto_fan; #define I8K_HWMON_HAVE_FAN3 (1 << 12) MODULE_AUTHOR("Massimo Dal Zotto (dz@debian.org)"); -MODULE_AUTHOR("Pali Rohár "); +MODULE_AUTHOR("Pali Rohár "); MODULE_DESCRIPTION("Dell laptop SMM BIOS hwmon driver"); MODULE_LICENSE("GPL"); MODULE_ALIAS("i8k"); diff --git a/drivers/platform/x86/dell-laptop.c b/drivers/platform/x86/dell-laptop.c index 74e988f839e854..f8d3e3bd1bb5d5 100644 --- a/drivers/platform/x86/dell-laptop.c +++ b/drivers/platform/x86/dell-laptop.c @@ -4,7 +4,7 @@ * * Copyright (c) Red Hat * Copyright (c) 2014 Gabriele Mazzotta - * Copyright (c) 2014 Pali Rohár + * Copyright (c) 2014 Pali Rohár * * Based on documentation in the libsmbios package: * Copyright (C) 2005-2014 Dell Inc. @@ -2295,6 +2295,6 @@ module_exit(dell_exit); MODULE_AUTHOR("Matthew Garrett "); MODULE_AUTHOR("Gabriele Mazzotta "); -MODULE_AUTHOR("Pali Rohár "); +MODULE_AUTHOR("Pali Rohár "); MODULE_DESCRIPTION("Dell laptop driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/platform/x86/dell-rbtn.c b/drivers/platform/x86/dell-rbtn.c index a6b856cd86bd62..a89fad47ff1394 100644 --- a/drivers/platform/x86/dell-rbtn.c +++ b/drivers/platform/x86/dell-rbtn.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later /* Dell Airplane Mode Switch driver - Copyright (C) 2014-2015 Pali Rohár + Copyright (C) 2014-2015 Pali Rohár */ @@ -495,5 +495,5 @@ MODULE_PARM_DESC(auto_remove_rfkill, "Automatically remove rfkill devices when " "(default true)"); MODULE_DEVICE_TABLE(acpi, rbtn_ids); MODULE_DESCRIPTION("Dell Airplane Mode Switch driver"); -MODULE_AUTHOR("Pali Rohár "); +MODULE_AUTHOR("Pali Rohár "); MODULE_LICENSE("GPL"); diff --git a/drivers/platform/x86/dell-rbtn.h b/drivers/platform/x86/dell-rbtn.h index 0fdc8164445892..5e030f926c589e 100644 --- a/drivers/platform/x86/dell-rbtn.h +++ b/drivers/platform/x86/dell-rbtn.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ /* Dell Airplane Mode Switch driver - Copyright (C) 2014-2015 Pali Rohár + Copyright (C) 2014-2015 Pali Rohár */ diff --git a/drivers/platform/x86/dell-smbios-base.c b/drivers/platform/x86/dell-smbios-base.c index fe59b0ebff3149..2e2cd565926aac 100644 --- a/drivers/platform/x86/dell-smbios-base.c +++ b/drivers/platform/x86/dell-smbios-base.c @@ -4,7 +4,7 @@ * * Copyright (c) Red Hat * Copyright (c) 2014 Gabriele Mazzotta - * Copyright (c) 2014 Pali Rohár + * Copyright (c) 2014 Pali Rohár * * Based on documentation in the libsmbios package: * Copyright (C) 2005-2014 Dell Inc. @@ -645,7 +645,7 @@ module_exit(dell_smbios_exit); MODULE_AUTHOR("Matthew Garrett "); MODULE_AUTHOR("Gabriele Mazzotta "); -MODULE_AUTHOR("Pali Rohár "); +MODULE_AUTHOR("Pali Rohár "); MODULE_AUTHOR("Mario Limonciello "); MODULE_DESCRIPTION("Common functions for kernel modules using Dell SMBIOS"); MODULE_LICENSE("GPL"); diff --git a/drivers/platform/x86/dell-smbios-smm.c b/drivers/platform/x86/dell-smbios-smm.c index d6854d1c41198d..97c52a839a3e26 100644 --- a/drivers/platform/x86/dell-smbios-smm.c +++ b/drivers/platform/x86/dell-smbios-smm.c @@ -4,7 +4,7 @@ * * Copyright (c) Red Hat * Copyright (c) 2014 Gabriele Mazzotta - * Copyright (c) 2014 Pali Rohár + * Copyright (c) 2014 Pali Rohár * Copyright (c) 2017 Dell Inc. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt diff --git a/drivers/platform/x86/dell-smbios.h b/drivers/platform/x86/dell-smbios.h index a7ff9803f41a76..75fa8ea0476dc5 100644 --- a/drivers/platform/x86/dell-smbios.h +++ b/drivers/platform/x86/dell-smbios.h @@ -4,7 +4,7 @@ * * Copyright (c) Red Hat * Copyright (c) 2014 Gabriele Mazzotta - * Copyright (c) 2014 Pali Rohár + * Copyright (c) 2014 Pali Rohár * * Based on documentation in the libsmbios package: * Copyright (C) 2005-2014 Dell Inc. diff --git a/drivers/platform/x86/dell-smo8800.c b/drivers/platform/x86/dell-smo8800.c index b531fe8ab7e0c9..5d9304a7de1b0c 100644 --- a/drivers/platform/x86/dell-smo8800.c +++ b/drivers/platform/x86/dell-smo8800.c @@ -3,7 +3,7 @@ * dell-smo8800.c - Dell Latitude ACPI SMO88XX freefall sensor driver * * Copyright (C) 2012 Sonal Santan - * Copyright (C) 2014 Pali Rohár + * Copyright (C) 2014 Pali Rohár * * This is loosely based on lis3lv02d driver. */ diff --git a/drivers/platform/x86/dell-wmi.c b/drivers/platform/x86/dell-wmi.c index 6669db2555fb0e..86e8dd6a8b33f4 100644 --- a/drivers/platform/x86/dell-wmi.c +++ b/drivers/platform/x86/dell-wmi.c @@ -3,7 +3,7 @@ * Dell WMI hotkeys * * Copyright (C) 2008 Red Hat - * Copyright (C) 2014-2015 Pali Rohár + * Copyright (C) 2014-2015 Pali Rohár * * Portions based on wistron_btns.c: * Copyright (C) 2005 Miloslav Trmac @@ -29,7 +29,7 @@ #include "dell-wmi-descriptor.h" MODULE_AUTHOR("Matthew Garrett "); -MODULE_AUTHOR("Pali Rohár "); +MODULE_AUTHOR("Pali Rohár "); MODULE_DESCRIPTION("Dell laptop WMI hotkeys driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/power/supply/bq2415x_charger.c b/drivers/power/supply/bq2415x_charger.c index 532f6e4fcafbfa..a1f00ae1c18017 100644 --- a/drivers/power/supply/bq2415x_charger.c +++ b/drivers/power/supply/bq2415x_charger.c @@ -2,7 +2,7 @@ /* * bq2415x charger driver * - * Copyright (C) 2011-2013 Pali Rohár + * Copyright (C) 2011-2013 Pali Rohár * * Datasheets: * http://www.ti.com/product/bq24150 @@ -1788,6 +1788,6 @@ static struct i2c_driver bq2415x_driver = { }; module_i2c_driver(bq2415x_driver); -MODULE_AUTHOR("Pali Rohár "); +MODULE_AUTHOR("Pali Rohár "); MODULE_DESCRIPTION("bq2415x charger driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/power/supply/bq27xxx_battery.c b/drivers/power/supply/bq27xxx_battery.c index 664e50103eaaff..942c92127b6d56 100644 --- a/drivers/power/supply/bq27xxx_battery.c +++ b/drivers/power/supply/bq27xxx_battery.c @@ -4,7 +4,7 @@ * Copyright (C) 2008 Rodolfo Giometti * Copyright (C) 2008 Eurotech S.p.A. * Copyright (C) 2010-2011 Lars-Peter Clausen - * Copyright (C) 2011 Pali Rohár + * Copyright (C) 2011 Pali Rohár * Copyright (C) 2017 Liam Breck * * Based on a previous work by Copyright (C) 2008 Texas Instruments, Inc. diff --git a/drivers/power/supply/isp1704_charger.c b/drivers/power/supply/isp1704_charger.c index 4812ac1ff2dfc2..b6efc454e4f02d 100644 --- a/drivers/power/supply/isp1704_charger.c +++ b/drivers/power/supply/isp1704_charger.c @@ -3,7 +3,7 @@ * ISP1704 USB Charger Detection driver * * Copyright (C) 2010 Nokia Corporation - * Copyright (C) 2012 - 2013 Pali Rohár + * Copyright (C) 2012 - 2013 Pali Rohár */ #include diff --git a/drivers/power/supply/rx51_battery.c b/drivers/power/supply/rx51_battery.c index 8548b639ff2f6d..6e488ecf4dcb24 100644 --- a/drivers/power/supply/rx51_battery.c +++ b/drivers/power/supply/rx51_battery.c @@ -2,7 +2,7 @@ /* * Nokia RX-51 battery driver * - * Copyright (C) 2012 Pali Rohár + * Copyright (C) 2012 Pali Rohár */ #include @@ -278,6 +278,6 @@ static struct platform_driver rx51_battery_driver = { module_platform_driver(rx51_battery_driver); MODULE_ALIAS("platform:rx51-battery"); -MODULE_AUTHOR("Pali Rohár "); +MODULE_AUTHOR("Pali Rohár "); MODULE_DESCRIPTION("Nokia RX-51 battery driver"); MODULE_LICENSE("GPL"); diff --git a/fs/udf/ecma_167.h b/fs/udf/ecma_167.h index 3fd85464abd5f6..736ebc5dc4418b 100644 --- a/fs/udf/ecma_167.h +++ b/fs/udf/ecma_167.h @@ -5,7 +5,7 @@ * http://www.ecma.ch * * Copyright (c) 2001-2002 Ben Fennema - * Copyright (c) 2017-2019 Pali Rohár + * Copyright (c) 2017-2019 Pali Rohár * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/fs/udf/osta_udf.h b/fs/udf/osta_udf.h index 35e61b2cacfe51..d5fbfab3ddb68e 100644 --- a/fs/udf/osta_udf.h +++ b/fs/udf/osta_udf.h @@ -5,7 +5,7 @@ * http://www.osta.org * * Copyright (c) 2001-2004 Ben Fennema - * Copyright (c) 2017-2019 Pali Rohár + * Copyright (c) 2017-2019 Pali Rohár * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/include/linux/power/bq2415x_charger.h b/include/linux/power/bq2415x_charger.h index 7a91b357e3aca0..4ca08321e2516b 100644 --- a/include/linux/power/bq2415x_charger.h +++ b/include/linux/power/bq2415x_charger.h @@ -2,7 +2,7 @@ /* * bq2415x charger driver * - * Copyright (C) 2011-2013 Pali Rohár + * Copyright (C) 2011-2013 Pali Rohár */ #ifndef BQ2415X_CHARGER_H diff --git a/tools/laptop/freefall/freefall.c b/tools/laptop/freefall/freefall.c index d29a86cda87f32..d77d7861787ca5 100644 --- a/tools/laptop/freefall/freefall.c +++ b/tools/laptop/freefall/freefall.c @@ -4,7 +4,7 @@ * Copyright 2008 Eric Piel * Copyright 2009 Pavel Machek * Copyright 2012 Sonal Santan - * Copyright 2014 Pali Rohár + * Copyright 2014 Pali Rohár */ #include -- cgit 1.2.3-korg From cb8d9937e85559ea0ad1f4f83df8ad1288fed47d Mon Sep 17 00:00:00 2001 From: kbuild test robot Date: Fri, 10 Apr 2020 14:34:03 -0700 Subject: drivers/dma/tegra20-apb-dma.c: fix platform_get_irq.cocci warnings Remove dev_err() messages after platform_get_irq*() failures. platform_get_irq() already prints an error. Generated by: scripts/coccinelle/api/platform_get_irq.cocci Fixes: 6c41ac96ad92 ("dmaengine: tegra-apb: Support COMPILE_TEST") Signed-off-by: kbuild test robot Signed-off-by: Julia Lawall Signed-off-by: Andrew Morton Reviewed-by: Dmitry Osipenko Acked-by: Thierry Reding Cc: Laxman Dewangan Cc: Vinod Koul Cc: Stephen Warren Cc: Jon Hunter Link: http://lkml.kernel.org/r/alpine.DEB.2.21.2002271133450.2973@hadrien Signed-off-by: Linus Torvalds --- drivers/dma/tegra20-apb-dma.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/dma/tegra20-apb-dma.c b/drivers/dma/tegra20-apb-dma.c index 265303a396caea..f6a2f42ffc5144 100644 --- a/drivers/dma/tegra20-apb-dma.c +++ b/drivers/dma/tegra20-apb-dma.c @@ -1493,7 +1493,6 @@ static int tegra_dma_probe(struct platform_device *pdev) irq = platform_get_irq(pdev, i); if (irq < 0) { ret = irq; - dev_err(&pdev->dev, "No irq resource for chan %d\n", i); goto err_pm_disable; } -- cgit 1.2.3-korg From 3bfa7e141b0bbb818b25e0daafb65aee92e49ac4 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Fri, 10 Apr 2020 14:34:06 -0700 Subject: fs/seq_file.c: seq_read(): add info message about buggy .next functions Patch series "seq_file .next functions should increase position index". In Aug 2018 NeilBrown noticed commit 1f4aace60b0e ("fs/seq_file.c: simplify seq_file iteration code and interface") "Some ->next functions do not increment *pos when they return NULL... Note that such ->next functions are buggy and should be fixed. A simple demonstration is dd if=/proc/swaps bs=1000 skip=1 Choose any block size larger than the size of /proc/swaps. This will always show the whole last line of /proc/swaps" Described problem is still actual. If you make lseek into middle of last output line following read will output end of last line and whole last line once again. $ dd if=/proc/swaps bs=1 # usual output Filename Type Size Used Priority /dev/dm-0 partition 4194812 97536 -2 104+0 records in 104+0 records out 104 bytes copied $ dd if=/proc/swaps bs=40 skip=1 # last line was generated twice dd: /proc/swaps: cannot skip to specified offset v/dm-0 partition 4194812 97536 -2 /dev/dm-0 partition 4194812 97536 -2 3+1 records in 3+1 records out 131 bytes copied There are lot of other affected files, I've found 30+ including /proc/net/ip_tables_matches and /proc/sysvipc/* I've sent patches into maillists of affected subsystems already, this patch-set fixes the problem in files related to pstore, tracing, gcov, sysvipc and other subsystems processed via linux-kernel@ mailing list directly https://bugzilla.kernel.org/show_bug.cgi?id=206283 This patch (of 4): Add debug code to seq_read() to detect missed or out-of-tree incorrect .next seq_file functions. [akpm@linux-foundation.org: s/pr_info/pr_info_ratelimited/, per Qian Cai] https://bugzilla.kernel.org/show_bug.cgi?id=206283 Signed-off-by: Vasily Averin Signed-off-by: Andrew Morton Cc: NeilBrown Cc: Al Viro Cc: Steven Rostedt Cc: Davidlohr Bueso Cc: Ingo Molnar Cc: Manfred Spraul Cc: Peter Oberparleiter Cc: Waiman Long Link: http://lkml.kernel.org/r/244674e5-760c-86bd-d08a-047042881748@virtuozzo.com Link: http://lkml.kernel.org/r/7c24087c-e280-e580-5b0c-0cdaeb14cd18@virtuozzo.com Signed-off-by: Linus Torvalds --- fs/seq_file.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/seq_file.c b/fs/seq_file.c index 79781ebd2145e4..70f5fdf99bf690 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -232,9 +232,12 @@ Fill: loff_t pos = m->index; p = m->op->next(m, p, &m->index); - if (pos == m->index) - /* Buggy ->next function */ + if (pos == m->index) { + pr_info_ratelimited("buggy seq_file .next function %ps " + "did not updated position index\n", + m->op->next); m->index++; + } if (!p || IS_ERR(p)) { err = PTR_ERR(p); break; -- cgit 1.2.3-korg From f4d74ef6220c1eda0875da30457bef5c7111ab06 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Fri, 10 Apr 2020 14:34:10 -0700 Subject: kernel/gcov/fs.c: gcov_seq_next() should increase position index If seq_file .next function does not change position index, read after some lseek can generate unexpected output. https://bugzilla.kernel.org/show_bug.cgi?id=206283 Signed-off-by: Vasily Averin Signed-off-by: Andrew Morton Acked-by: Peter Oberparleiter Cc: Al Viro Cc: Davidlohr Bueso Cc: Ingo Molnar Cc: Manfred Spraul Cc: NeilBrown Cc: Steven Rostedt Cc: Waiman Long Link: http://lkml.kernel.org/r/f65c6ee7-bd00-f910-2f8a-37cc67e4ff88@virtuozzo.com Signed-off-by: Linus Torvalds --- kernel/gcov/fs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/gcov/fs.c b/kernel/gcov/fs.c index 5e891c3c2d93b3..82babf5aa077b7 100644 --- a/kernel/gcov/fs.c +++ b/kernel/gcov/fs.c @@ -108,9 +108,9 @@ static void *gcov_seq_next(struct seq_file *seq, void *data, loff_t *pos) { struct gcov_iterator *iter = data; + (*pos)++; if (gcov_iter_next(iter)) return NULL; - (*pos)++; return iter; } -- cgit 1.2.3-korg From 89163f93c6f969da5811af5377cc10173583123b Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Fri, 10 Apr 2020 14:34:13 -0700 Subject: ipc/util.c: sysvipc_find_ipc() should increase position index If seq_file .next function does not change position index, read after some lseek can generate unexpected output. https://bugzilla.kernel.org/show_bug.cgi?id=206283 Signed-off-by: Vasily Averin Signed-off-by: Andrew Morton Acked-by: Waiman Long Cc: Davidlohr Bueso Cc: Manfred Spraul Cc: Al Viro Cc: Ingo Molnar Cc: NeilBrown Cc: Peter Oberparleiter Cc: Steven Rostedt Link: http://lkml.kernel.org/r/b7a20945-e315-8bb0-21e6-3875c14a8494@virtuozzo.com Signed-off-by: Linus Torvalds --- ipc/util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ipc/util.c b/ipc/util.c index 97638eb2d7cb1c..7acccfded7cb03 100644 --- a/ipc/util.c +++ b/ipc/util.c @@ -764,13 +764,13 @@ static struct kern_ipc_perm *sysvipc_find_ipc(struct ipc_ids *ids, loff_t pos, total++; } + *new_pos = pos + 1; if (total >= ids->in_use) return NULL; for (; pos < ipc_mni; pos++) { ipc = idr_find(&ids->ipcs_idr, pos); if (ipc != NULL) { - *new_pos = pos + 1; rcu_read_lock(); ipc_lock_object(ipc); return ipc; -- cgit 1.2.3-korg