aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrea Arcangeli <aarcange@redhat.com>2019-11-04 18:00:01 -0500
committerPaolo Bonzini <pbonzini@redhat.com>2019-11-15 11:43:58 +0100
commit74c504a6d70ab29b2c28bee62f5f39e3dd847ea2 (patch)
treee182fad414ff5a6ea7d8d3e13996886c517f9bdf
parent3dcb2a3fa5a0c903fd754bfba2b8defb9f191974 (diff)
downloadpowerpc-74c504a6d70ab29b2c28bee62f5f39e3dd847ea2.tar.gz
x86: retpolines: eliminate retpoline from msr event handlers
It's enough to check the value and issue the direct call. After this commit is applied, here the most common retpolines executed under a high resolution timer workload in the guest on a VMX host: [..] @[ trace_retpoline+1 __trace_retpoline+30 __x86_indirect_thunk_rax+33 do_syscall_64+89 entry_SYSCALL_64_after_hwframe+68 ]: 267 @[]: 2256 @[ trace_retpoline+1 __trace_retpoline+30 __x86_indirect_thunk_rax+33 __kvm_wait_lapic_expire+284 vmx_vcpu_run.part.97+1091 vcpu_enter_guest+377 kvm_arch_vcpu_ioctl_run+261 kvm_vcpu_ioctl+559 do_vfs_ioctl+164 ksys_ioctl+96 __x64_sys_ioctl+22 do_syscall_64+89 entry_SYSCALL_64_after_hwframe+68 ]: 2390 @[]: 33410 @total: 315707 Note the highest hit above is __delay so probably not worth optimizing even if it would be more frequent than 2k hits per sec. Signed-off-by: Andrea Arcangeli <aarcange@redhat.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
-rw-r--r--arch/x86/events/intel/core.c11
1 files changed, 11 insertions, 0 deletions
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index fcef678c342304..937363b803c19d 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3323,8 +3323,19 @@ static int intel_pmu_hw_config(struct perf_event *event)
return 0;
}
+#ifdef CONFIG_RETPOLINE
+static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr);
+static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr);
+#endif
+
struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
{
+#ifdef CONFIG_RETPOLINE
+ if (x86_pmu.guest_get_msrs == intel_guest_get_msrs)
+ return intel_guest_get_msrs(nr);
+ else if (x86_pmu.guest_get_msrs == core_guest_get_msrs)
+ return core_guest_get_msrs(nr);
+#endif
if (x86_pmu.guest_get_msrs)
return x86_pmu.guest_get_msrs(nr);
*nr = 0;