aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFeng Zhou <zhoufeng.zf@bytedance.com>2021-07-30 16:09:52 +0100
committerMark Brown <broonie@kernel.org>2021-07-30 16:09:52 +0100
commitc97ea4f827f0a9a8a3c5e0030ce4d76b51af02b1 (patch)
tree7092123ba75f2902a372580fedbbb5aaee066b8d
parentc0f93bc1b7a15a45bc3a8b6f414d93779c2766e3 (diff)
downloadlinux-2.6-c97ea4f827f0a9a8a3c5e0030ce4d76b51af02b1.tar.gz
fs/proc/kcore.c: add mmap interface
When we do the kernel monitor, use the DRGN (https://github.com/osandov/drgn) access to kernel data structures, found that the system calls a lot. DRGN is implemented by reading /proc/kcore. After looking at the kcore code, it is found that kcore does not implement mmap, resulting in frequent context switching triggered by read. Therefore, we want to add mmap interface to optimize performance. Since vmalloc and module areas will change with allocation and release, consistency cannot be guaranteed, so mmap interface only maps KCORE_TEXT and KCORE_RAM. The test results: 1. the default version of kcore real 11.00 user 8.53 sys 3.59 % time seconds usecs/call calls errors syscall ------ ----------- ----------- --------- --------- ---------------- 99.64 128.578319 12 11168701 pread64 ... ------ ----------- ----------- --------- --------- ---------------- 100.00 129.042853 11193748 966 total 2. added kcore for the mmap interface real 6.44 user 7.32 sys 0.24 % time seconds usecs/call calls errors syscall ------ ----------- ----------- --------- --------- ---------------- 32.94 0.130120 24 5317 315 futex 11.66 0.046077 21 2231 1 lstat 9.23 0.036449 177 206 mmap ... ------ ----------- ----------- --------- --------- ---------------- 100.00 0.395077 25435 971 total The test results show that the number of system calls and time consumption are significantly reduced. Link: https://lkml.kernel.org/r/20210704062208.7898-1-zhoufeng.zf@bytedance.com Co-developed-by: Ying Chen <chenying.kernel@bytedance.com> Signed-off-by: Ying Chen <chenying.kernel@bytedance.com> Signed-off-by: Feng Zhou <zhoufeng.zf@bytedance.com> Cc: Alexey Dobriyan <adobriyan@gmail.com> Cc: Mike Rapoport <rppt@kernel.org> Cc: Muchun Song <songmuchun@bytedance.com> Cc: Chengming Zhou <zhouchengming@bytedance.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Mark Brown <broonie@kernel.org>
-rw-r--r--fs/proc/kcore.c73
1 files changed, 73 insertions, 0 deletions
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 982e694aae77de..3f148759a5fd1d 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -614,11 +614,84 @@ static int release_kcore(struct inode *inode, struct file *file)
return 0;
}
+static vm_fault_t mmap_kcore_fault(struct vm_fault *vmf)
+{
+ return VM_FAULT_SIGBUS;
+}
+
+static const struct vm_operations_struct kcore_mmap_ops = {
+ .fault = mmap_kcore_fault,
+};
+
+static int mmap_kcore(struct file *file, struct vm_area_struct *vma)
+{
+ size_t size = vma->vm_end - vma->vm_start;
+ u64 start, end, pfn;
+ int nphdr;
+ size_t data_offset;
+ size_t phdrs_len, notes_len;
+ struct kcore_list *m = NULL;
+ int ret = 0;
+
+ down_read(&kclist_lock);
+
+ get_kcore_size(&nphdr, &phdrs_len, &notes_len, &data_offset);
+
+ data_offset &= PAGE_MASK;
+ start = (u64)vma->vm_pgoff << PAGE_SHIFT;
+ if (start < data_offset) {
+ ret = -EINVAL;
+ goto out;
+ }
+ start = kc_offset_to_vaddr(start - data_offset);
+ end = start + size;
+
+ list_for_each_entry(m, &kclist_head, list) {
+ if (start >= m->addr && end <= m->addr + m->size)
+ break;
+ }
+
+ if (&m->list == &kclist_head) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (vma->vm_flags & (VM_WRITE | VM_EXEC)) {
+ ret = -EPERM;
+ goto out;
+ }
+
+ vma->vm_flags &= ~(VM_MAYWRITE | VM_MAYEXEC);
+ vma->vm_flags |= VM_MIXEDMAP;
+ vma->vm_ops = &kcore_mmap_ops;
+
+ if (kern_addr_valid(start)) {
+ if (m->type == KCORE_RAM)
+ pfn = __pa(start) >> PAGE_SHIFT;
+ else if (m->type == KCORE_TEXT)
+ pfn = __pa_symbol(start) >> PAGE_SHIFT;
+ else {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ ret = remap_pfn_range(vma, vma->vm_start, pfn, size,
+ vma->vm_page_prot);
+ } else {
+ ret = -EFAULT;
+ }
+
+out:
+ up_read(&kclist_lock);
+ return ret;
+}
+
static const struct proc_ops kcore_proc_ops = {
.proc_read = read_kcore,
.proc_open = open_kcore,
.proc_release = release_kcore,
.proc_lseek = default_llseek,
+ .proc_mmap = mmap_kcore,
};
/* just remember that we have to update kcore */