commit d2b2ea81b693098865444f199ec1e61e75747654 Merge: 29e3e17... 3f46faa... Author: Arnd Bergmann Date: Fri Jul 6 19:25:32 2007 +0200 Merge branches 'netdev-merge' and 'perfmon2' into perfmon2-merge commit 29e3e17760cc6add6c95aaeb76d11893c9f7229a Merge: 2f18e1c... fb8e39e... Author: Arnd Bergmann Date: Fri Jul 6 19:18:10 2007 +0200 Merge branch 'netdev-merge' into perfmon2-merge Conflicts: arch/powerpc/Kconfig include/asm-powerpc/systbl.h include/asm-powerpc/unistd.h commit 2f18e1caf74987f749eba3af99dc39e3e07f0e85 Author: Kevin Corry Date: Fri Jun 29 08:56:00 2007 -0500 Perfmon2: Add support for Cell PMU's hardware-sampling. commit fa0876715a1a02a5170e023ebb39e6cb68abacf7 Author: Stephane Eranian Date: Sun Jun 24 16:22:04 2007 -0500 Update to Perfmon2 version 070621: powerpc code. commit e332e9217b88f537cbf8b882398592f54c557b97 Author: Stephane Eranian Date: Sun Jun 24 15:26:17 2007 -0500 Update to Perfmon2 version 070621: x86-64 code. commit 9e5443f91fc5dd66469ab09331b5d2bee06bb1db Author: Stephane Eranian Date: Sun Jun 24 15:10:58 2007 -0500 Update to Perfmon2 version 070621: mips code. commit f06e44db987b70306896ff5cdfd84023b224949b Author: Stephane Eranian Date: Sat Jun 23 17:22:04 2007 -0500 Update to Perfmon2 version 070621: ia64 code. commit fb9f0a0e62583e51f242b19a817629e75e5961c0 Author: Kevin Corry Date: Sat Jun 23 17:19:55 2007 -0500 Update to Perfmon2 version 070621: i386 code. commit 8c649c0b5e0a210015b1eb45ec82d87b76e7c9d9 Author: Stephane Eranian Date: Sat Jun 23 17:06:31 2007 -0500 Update to Perfmon2 version 070621: core Perfmon2 code. commit 68971f94498ea868c6a3e6d6351ff7120edf14ad Merge: 90ec31a... 0864a4e... Author: Kevin Corry Date: Fri Jun 22 09:09:16 2007 -0500 Merge with git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git commit 90ec31acbcfa76df7ab5c7c98a0b9d6b3e7ec1ce Author: Kevin Corry Date: Thu Jun 21 11:57:54 2007 -0500 Add TIF_PERFMON_WORK and TIF_PERFMON_CTXSW flags. This patch has already been picked up in the powerpc tree, but not yet in mainline. This patch is required for the mainline-based Perfmon2 tree to compile on powerpc. commit 483a8ce2532afb62378abfe6758ba833be719a63 Author: Stephane Eranian Date: Thu Jun 21 11:28:50 2007 -0500 Perfmon2: powerpc modifications. Modifications to powerpc architecture code needed for Perfmon2. commit b1a5a6fc3dcb6bff4f19377785a21692fc47620f Author: Stephane Eranian Date: Wed Jun 20 16:12:37 2007 -0500 Perfmon2: new powerpc architecture code commit c4db69f1389171c8977534c8944d7128dee794d3 Author: Stephane Eranian Date: Wed Jun 20 14:45:03 2007 -0500 Perfmon2: x86-64 modifications. Modifications to x86-64 architecture code needed for Perfmon2. commit 77323002a9ffd5367f7d0288619733a15dd403ef Author: Stephane Eranian Date: Wed Jun 20 15:17:55 2007 -0500 Perfmon2: new x86-64 architecture code commit ba8ce1c95c79fafe561bafeff3c5e480a9104d1a Author: Stephane Eranian Date: Wed Jun 20 14:28:23 2007 -0500 Perfmon2: mips modifications. Modifications to mips architecture code needed for Perfmon2. commit 0af167473835e52268d7f5c004e2ec9463a357f4 Author: Stephane Eranian Date: Wed Jun 20 15:15:41 2007 -0500 Perfmon2: new mips architecture code commit 884b8c725e227901f0a548b55803a01cc6f156f6 Author: Stephane Eranian Date: Wed Jun 20 14:11:17 2007 -0500 Perfmon2: ia64 modifications. Modifications to ia64 architecture code needed for Perfmon2. commit af04b61bf1b16df0a1f2c96d2b02b12bf1c39cdd Author: Stephane Eranian Date: Wed Jun 20 15:14:10 2007 -0500 Perfmon2: new ia64 architecture code commit ff7f2d460449a9167c0389ab19d12be2c927aebd Author: Stephane Eranian Date: Thu Jun 21 11:25:24 2007 -0500 Perfmon2: i386 modifications. Modifications to i386 architecture code needed for Perfmon2. commit dedfc8b61584483c2511c7afba39bed6159ecee2 Author: Stephane Eranian Date: Wed Jun 20 15:06:45 2007 -0500 Perfmon2: new i386 architecture code commit 9e4067e8ce29fcec3e32a1d89339ada5904eaa7b Author: Stephane Eranian Date: Wed Jun 20 16:04:03 2007 -0500 Perfmon2 arch-independent modifications. Modifications to architecture-independent code needed for Perfmon2. commit 94b60799f0c2e4a1f320ab9025ccfadc2e57d6be Author: Stephane Eranian Date: Thu Jun 21 11:23:22 2007 -0500 Perfmon2: new core code Index: linux-2.6/Makefile =================================================================== --- linux-2.6.orig/Makefile +++ linux-2.6/Makefile @@ -553,7 +553,7 @@ export mod_strip_cmd ifeq ($(KBUILD_EXTMOD),) -core-y += kernel/ mm/ fs/ ipc/ security/ crypto/ block/ +core-y += kernel/ mm/ fs/ ipc/ security/ crypto/ block/ perfmon/ vmlinux-dirs := $(patsubst %/,%,$(filter %/, $(init-y) $(init-m) \ $(core-y) $(core-m) $(drivers-y) $(drivers-m) \ Index: linux-2.6/arch/i386/Kconfig =================================================================== --- linux-2.6.orig/arch/i386/Kconfig +++ linux-2.6/arch/i386/Kconfig @@ -910,6 +910,8 @@ config COMPAT_VDSO If unsure, say Y. +source "arch/i386/perfmon/Kconfig" + endmenu config ARCH_ENABLE_MEMORY_HOTPLUG Index: linux-2.6/arch/i386/Makefile =================================================================== --- linux-2.6.orig/arch/i386/Makefile +++ linux-2.6/arch/i386/Makefile @@ -99,6 +99,7 @@ mflags-y += -Iinclude/asm-i386/mach-defa head-y := arch/i386/kernel/head.o arch/i386/kernel/init_task.o libs-y += arch/i386/lib/ +core-$(CONFIG_PERFMON) += arch/i386/perfmon/ core-y += arch/i386/kernel/ \ arch/i386/mm/ \ arch/i386/$(mcore-y)/ \ Index: linux-2.6/arch/i386/kernel/apic.c =================================================================== --- linux-2.6.orig/arch/i386/kernel/apic.c +++ linux-2.6/arch/i386/kernel/apic.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -562,6 +563,8 @@ static void local_apic_timer_interrupt(v per_cpu(irq_stat, cpu).apic_timer_irqs++; evt->event_handler(evt); + + pfm_handle_switch_timeout(); } /* @@ -1325,6 +1328,9 @@ void __init apic_intr_init(void) #ifdef CONFIG_X86_MCE_P4THERMAL set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); #endif +#ifdef CONFIG_PERFMON + set_intr_gate(LOCAL_PERFMON_VECTOR, pmu_interrupt); +#endif } /** Index: linux-2.6/arch/i386/kernel/cpu/common.c =================================================================== --- linux-2.6.orig/arch/i386/kernel/cpu/common.c +++ linux-2.6/arch/i386/kernel/cpu/common.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -718,6 +719,8 @@ void __cpuinit cpu_init(void) current_thread_info()->status = 0; clear_used_math(); mxcsr_feature_mask_init(); + + pfm_init_percpu(); } #ifdef CONFIG_HOTPLUG_CPU Index: linux-2.6/arch/i386/kernel/entry.S =================================================================== --- linux-2.6.orig/arch/i386/kernel/entry.S +++ linux-2.6/arch/i386/kernel/entry.S @@ -465,7 +465,7 @@ ENDPROC(system_call) ALIGN RING0_PTREGS_FRAME # can't unwind into user space anyway work_pending: - testb $_TIF_NEED_RESCHED, %cl + testw $(_TIF_NEED_RESCHED|_TIF_PERFMON_WORK), %cx jz work_notifysig work_resched: call schedule Index: linux-2.6/arch/i386/kernel/process.c =================================================================== --- linux-2.6.orig/arch/i386/kernel/process.c +++ linux-2.6/arch/i386/kernel/process.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -384,6 +385,7 @@ void exit_thread(void) tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET; put_cpu(); } + pfm_exit_thread(current); } void flush_thread(void) @@ -435,6 +437,8 @@ int copy_thread(int nr, unsigned long cl savesegment(gs,p->thread.gs); + pfm_copy_thread(p); + tsk = current; if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, @@ -538,8 +542,9 @@ int dump_task_regs(struct task_struct *t return 1; } -static noinline void __switch_to_xtra(struct task_struct *next_p, - struct tss_struct *tss) +static noinline void __switch_to_xtra(struct task_struct *prev_p, + struct task_struct *next_p, + struct tss_struct *tss) { struct thread_struct *next; @@ -555,6 +560,10 @@ static noinline void __switch_to_xtra(st set_debugreg(next->debugreg[7], 7); } + if (test_tsk_thread_flag(next_p, TIF_PERFMON_CTXSW) + || test_tsk_thread_flag(prev_p, TIF_PERFMON_CTXSW)) + pfm_ctxsw(prev_p, next_p); + if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { /* * Disable the bitmap via an invalid offset. We still cache @@ -690,8 +699,8 @@ struct task_struct fastcall * __switch_t * Now maybe handle debug registers and/or IO bitmaps */ if (unlikely((task_thread_info(next_p)->flags & _TIF_WORK_CTXSW) - || test_tsk_thread_flag(prev_p, TIF_IO_BITMAP))) - __switch_to_xtra(next_p, tss); + || (task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW))) + __switch_to_xtra(prev_p, next_p, tss); disable_tsc(prev_p, next_p); Index: linux-2.6/arch/i386/kernel/signal.c =================================================================== --- linux-2.6.orig/arch/i386/kernel/signal.c +++ linux-2.6/arch/i386/kernel/signal.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -652,6 +653,9 @@ void do_notify_resume(struct pt_regs *re clear_thread_flag(TIF_SINGLESTEP); } + if (thread_info_flags & _TIF_PERFMON_WORK) + pfm_handle_work(regs); + /* deal with pending signal delivery */ if (thread_info_flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK)) do_signal(regs); Index: linux-2.6/arch/i386/kernel/smpboot.c =================================================================== --- linux-2.6.orig/arch/i386/kernel/smpboot.c +++ linux-2.6/arch/i386/kernel/smpboot.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include @@ -1207,6 +1208,7 @@ int __cpu_disable(void) cpu_clear(cpu, map); fixup_irqs(map); + pfm_cpu_disable(); /* It's now safe to remove this processor from the online map */ cpu_clear(cpu, cpu_online_map); return 0; Index: linux-2.6/arch/i386/kernel/syscall_table.S =================================================================== --- linux-2.6.orig/arch/i386/kernel/syscall_table.S +++ linux-2.6/arch/i386/kernel/syscall_table.S @@ -323,3 +323,15 @@ ENTRY(sys_call_table) .long sys_signalfd .long sys_timerfd .long sys_eventfd + .long sys_pfm_create_context + .long sys_pfm_write_pmcs /* 325 */ + .long sys_pfm_write_pmds + .long sys_pfm_read_pmds + .long sys_pfm_load_context + .long sys_pfm_start + .long sys_pfm_stop /* 330 */ + .long sys_pfm_restart + .long sys_pfm_create_evtsets + .long sys_pfm_getinfo_evtsets + .long sys_pfm_delete_evtsets + .long sys_pfm_unload_context /* 335 */ Index: linux-2.6/arch/i386/oprofile/Makefile =================================================================== --- linux-2.6.orig/arch/i386/oprofile/Makefile +++ linux-2.6/arch/i386/oprofile/Makefile @@ -10,3 +10,4 @@ oprofile-y := $(DRIVER_OBJS) init.o b oprofile-$(CONFIG_X86_LOCAL_APIC) += nmi_int.o op_model_athlon.o \ op_model_ppro.o op_model_p4.o oprofile-$(CONFIG_X86_IO_APIC) += nmi_timer_int.o +oprofile-$(CONFIG_PERFMON) += perfmon.o Index: linux-2.6/arch/i386/oprofile/init.c =================================================================== --- linux-2.6.orig/arch/i386/oprofile/init.c +++ linux-2.6/arch/i386/oprofile/init.c @@ -15,9 +15,11 @@ * with the NMI mode driver. */ +extern int op_perfmon_init(struct oprofile_operations * ops); extern int op_nmi_init(struct oprofile_operations * ops); extern int op_nmi_timer_init(struct oprofile_operations * ops); extern void op_nmi_exit(void); +extern void op_perfmon_exit(void); extern void x86_backtrace(struct pt_regs * const regs, unsigned int depth); @@ -27,8 +29,12 @@ int __init oprofile_arch_init(struct opr ret = -ENODEV; +#ifdef CONFIG_PERFMON + ret = op_perfmon_init(ops); +#endif #ifdef CONFIG_X86_LOCAL_APIC - ret = op_nmi_init(ops); + if (ret < 0) + ret = op_nmi_init(ops); #endif #ifdef CONFIG_X86_IO_APIC if (ret < 0) @@ -42,6 +48,9 @@ int __init oprofile_arch_init(struct opr void oprofile_arch_exit(void) { +#ifdef CONFIG_PERFMON + op_perfmon_exit(); +#endif #ifdef CONFIG_X86_LOCAL_APIC op_nmi_exit(); #endif Index: linux-2.6/arch/i386/oprofile/nmi_int.c =================================================================== --- linux-2.6.orig/arch/i386/oprofile/nmi_int.c +++ linux-2.6/arch/i386/oprofile/nmi_int.c @@ -465,6 +465,7 @@ int __init op_nmi_init(struct oprofile_o ops->start = nmi_start; ops->stop = nmi_stop; ops->cpu_type = cpu_type; + ops->implementation = "oprofile"; printk(KERN_INFO "oprofile: using NMI interrupt.\n"); return 0; } Index: linux-2.6/arch/i386/oprofile/nmi_timer_int.c =================================================================== --- linux-2.6.orig/arch/i386/oprofile/nmi_timer_int.c +++ linux-2.6/arch/i386/oprofile/nmi_timer_int.c @@ -64,6 +64,7 @@ int __init op_nmi_timer_init(struct opro ops->start = timer_start; ops->stop = timer_stop; ops->cpu_type = "timer"; + ops->implementation = "nmi_timer"; printk(KERN_INFO "oprofile: using NMI timer interrupt.\n"); return 0; } Index: linux-2.6/arch/i386/oprofile/perfmon.c =================================================================== --- /dev/null +++ linux-2.6/arch/i386/oprofile/perfmon.c @@ -0,0 +1,161 @@ +/** + * @file perfmon.c + * + * @remark Copyright 2003 OProfile authors + * @remark Read the file COPYING + * + * @author John Levon + */ + +#include +#include +#include +#include + +static int allow_ints; + +static int +perfmon_handler(void *buf, struct pfm_ovfl_arg *arg, + unsigned long ip, u64 stamp, void *data) +{ + struct pt_regs * const regs = data; + int event = arg->pmd_eventid; + + PFM_DBG_ovfl("oprofile overflow ip=%lx, event=%d", + instruction_pointer(regs), event); + + arg->ovfl_ctrl = PFM_OVFL_CTRL_RESET; + + /* the owner of the oprofile event buffer may have exited + * without perfmon being shutdown (e.g. SIGSEGV) + */ + if (allow_ints) + oprofile_add_sample(regs, event); + return 0; +} + + +static int perfmon_start(void) +{ + allow_ints = 1; + return 0; +} + + +static void perfmon_stop(void) +{ + allow_ints = 0; +} + +static struct pfm_smpl_fmt oprofile_fmt = { + .fmt_name = "OProfile", + .fmt_handler = perfmon_handler, + .fmt_flags = PFM_FMT_BUILTIN_FLAG, + .owner = THIS_MODULE +}; + +/* all the ops are handled via userspace for i386 oprofile using perfmon */ + +static int using_perfmon; + +static int __init ppro_init(char ** cpu_type) +{ + __u8 cpu_model = boot_cpu_data.x86_model; + + if (cpu_model == 14) + *cpu_type = "i386/core"; + else if (cpu_model == 15) + *cpu_type = "i386/core_2"; + else if (cpu_model > 0xd) + return 0; + else if (cpu_model == 9) { + *cpu_type = "i386/p6_mobile"; + } else if (cpu_model > 5) { + *cpu_type = "i386/piii"; + } else if (cpu_model > 2) { + *cpu_type = "i386/pii"; + } else { + *cpu_type = "i386/ppro"; + } + return 1; +} + +static int __init p4_init(char ** cpu_type) +{ +#ifndef CONFIG_SMP + *cpu_type = "i386/p4"; + return 1; +#else + switch (smp_num_siblings) { + case 1: + *cpu_type = "i386/p4"; + return 1; + + case 2: + *cpu_type = "i386/p4-ht"; + return 1; + } +#endif + return 0; +} + +static char *get_cpu_type(void) +{ + char *cpu_type = "??/??"; + + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_AMD: + /* Needs to be at least an Athlon (or hammer in 32bit mode) */ + switch (boot_cpu_data.x86) { + case 6: + cpu_type = "i386/athlon"; + break; + case 0xf: + /* Actually it could be i386/hammer too, but give + user space a consistent name. */ + cpu_type = "x86-64/hammer"; + break; + } + break; + case X86_VENDOR_INTEL: + switch (boot_cpu_data.x86) { + /* Pentium IV */ + case 0xf: + p4_init(&cpu_type); + break; + + /* A P6-class processor */ + case 6: + ppro_init(&cpu_type); + break; + } + break; + } + return cpu_type; +} + + +int __init op_perfmon_init(struct oprofile_operations * ops) +{ + int ret = pfm_fmt_register(&oprofile_fmt); + if (ret) + return -ENODEV; + + ops->cpu_type = get_cpu_type(); + ops->start = perfmon_start; + ops->stop = perfmon_stop; + ops->implementation = "perfmon2"; + using_perfmon = 1; + printk(KERN_INFO "oprofile: using perfmon.\n"); + return 0; +} + + +void __exit op_perfmon_exit(void) +{ + if (!using_perfmon) + return; + + pfm_fmt_unregister(&oprofile_fmt); +} + Index: linux-2.6/arch/i386/perfmon/Kconfig =================================================================== --- /dev/null +++ linux-2.6/arch/i386/perfmon/Kconfig @@ -0,0 +1,65 @@ +menu "Hardware Performance Monitoring support" +config PERFMON + bool "Perfmon2 performance monitoring interface" + select X86_LOCAL_APIC + default n + help + Enables the perfmon2 interface to access the hardware + performance counters. See for + more details. + +config PERFMON_DEBUG + bool "Perfmon debugging" + default n + depends on PERFMON + help + Enables perfmon debugging support + +config PERFMON_P6 + tristate "Support for Intel P6/Pentium M processor hardware performance counters" + depends on PERFMON + default n + help + Enables support for Intel P6-style hardware performance counters. + To be used for with Intel Pentium III, PentiumPro, Pentium M processors. + +config I386_PERFMON_P4 + tristate "Support for 32-bit Intel Pentium 4/Xeon hardware performance counters" + depends on PERFMON + default n + help + Enables support for 32-bit Intel Pentium 4/Xeon hardware performance + counters. + +config I386_PERFMON_PEBS + tristate "Support for Intel Precise Event-Based Sampling (PEBS)" + depends on PERFMON + default n + help + Enables support for 32-bit Precise Event-Based Sampling (PEBS) on the Intel + Pentium 4, Xeon, and Core-based processors which support it. + +config I386_PERFMON_CORE + tristate "Support for Intel Core-based performance counters" + depends on PERFMON + default n + help + Enables 32-bit support for Intel Core-based performance counters. Enable + this option to support Intel Core 2 Duo processors. + +config I386_PERFMON_INTEL_ARCH + tristate "Support for Intel architectural performance counters" + depends on PERFMON + default n + help + Enables 32-bit support for Intel architectural performance counters. This + architecture was introduced by Intel Core Solo/Core Duo processors. + +config I386_PERFMON_K8 + tristate "Support 32-bit mode AMD Athlon64/Opteron64 hardware performance counters" + depends on PERFMON + default n + help + Enables support for 32-bit mode AND Althon64/Opterton64 hardware performance counters. +endmenu + Index: linux-2.6/arch/i386/perfmon/Makefile =================================================================== --- /dev/null +++ linux-2.6/arch/i386/perfmon/Makefile @@ -0,0 +1,14 @@ +# +# Copyright (c) 2005-2007 Hewlett-Packard Development Company, L.P. +# Contributed by Stephane Eranian +# +obj-$(CONFIG_PERFMON) += perfmon.o +obj-$(CONFIG_PERFMON_P6) += perfmon_p6.o +obj-$(CONFIG_I386_PERFMON_P4) += perfmon_p4.o +obj-$(CONFIG_I386_PERFMON_CORE) += perfmon_core.o +obj-$(CONFIG_I386_PERFMON_INTEL_ARCH) += perfmon_intel_arch.o +obj-$(CONFIG_I386_PERFMON_PEBS) += perfmon_pebs_smpl.o +obj-$(CONFIG_I386_PERFMON_K8) += perfmon_k8.o + +perfmon_k8-$(subst m,y,$(CONFIG_I386_PERFMON_K8)) += ../../x86_64/perfmon/perfmon_k8.o +perfmon_core-$(subst m,y,$(CONFIG_I386_PERFMON_CORE)) += ../../x86_64/perfmon/perfmon_core.o Index: linux-2.6/arch/i386/perfmon/perfmon.c =================================================================== --- /dev/null +++ linux-2.6/arch/i386/perfmon/perfmon.c @@ -0,0 +1,1302 @@ +/* + * This file implements the X86 specific support for the perfmon2 interface + * + * Copyright (c) 2005-2007 Hewlett-Packard Development Company, L.P. + * Contributed by Stephane Eranian + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + * 02111-1307 USA + */ +#include +#include +#include +#include + +#include +#include +#include + +DEFINE_PER_CPU(unsigned long, real_iip); + +static int pfm_using_nmi; + +struct pfm_ds_area { + unsigned long bts_buf_base; + unsigned long bts_index; + unsigned long bts_abs_max; + unsigned long bts_intr_thres; + unsigned long pebs_buf_base; + unsigned long pebs_index; + unsigned long pebs_abs_max; + unsigned long pebs_intr_thres; + u64 pebs_cnt_reset; +}; + +static int (*pfm_has_ovfl)(struct pfm_context *); +static int (*pfm_stop_save)(struct pfm_context *ctx, + struct pfm_event_set *set); + +static inline int get_smt_id(void) +{ +#ifdef CONFIG_SMP + int cpu = smp_processor_id(); + return (cpu != first_cpu(cpu_sibling_map[cpu])); +#else + return 0; +#endif +} + +void __pfm_write_reg_p4(const struct pfm_arch_ext_reg *xreg, u64 val) +{ + u64 pmi; + int smt_id; + + smt_id = get_smt_id(); + /* + * HT is only supported by P4-style PMU + * + * Adjust for T1 if necessary: + * + * - move the T0_OS/T0_USR bits into T1 slots + * - move the OVF_PMI_T0 bits into T1 slot + * + * The P4/EM64T T1 is cleared by description table. + * User only works with T0. + */ + if (smt_id) { + if (xreg->reg_type & PFM_REGT_ESCR) { + + /* copy T0_USR & T0_OS to T1 */ + val |= ((val & 0xc) >> 2); + + /* clear bits T0_USR & T0_OS */ + val &= ~0xc; + + } else if (xreg->reg_type & PFM_REGT_CCCR) { + pmi = (val >> 26) & 0x1; + if (pmi) { + val &=~(1UL<<26); + val |= 1UL<<27; + } + } + } + if (xreg->addrs[smt_id]) + wrmsrl(xreg->addrs[smt_id], val); +} + +void __pfm_read_reg_p4(const struct pfm_arch_ext_reg *xreg, u64 *val) +{ + int smt_id; + + smt_id = get_smt_id(); + + if (likely(xreg->addrs[smt_id])) { + rdmsrl(xreg->addrs[smt_id], *val); + /* + * HT is only supported by P4-style PMU + * + * move the Tx_OS and Tx_USR bits into + * T0 slots setting the T1 slots to zero + */ + if (xreg->reg_type & PFM_REGT_ESCR) { + if (smt_id) + *val |= (((*val) & 0x3) << 2); + + /* + * zero out bits that are reserved + * (including T1_OS and T1_USR) + */ + *val &= PFM_ESCR_RSVD; + } + } else + *val = 0; +} + +/* + * called from NMI interrupt handler + */ +static void __kprobes __pfm_arch_quiesce_pmu_percpu(void) +{ + struct pfm_arch_pmu_info *arch_info; + unsigned int i; + + arch_info = pfm_pmu_conf->arch_info; + + /* + * quiesce PMU by clearing registers that have enable bits + * (start/stop capabilities). + */ + for (i = 0; i < arch_info->max_ena; i++) + if (test_bit(i, cast_ulp(arch_info->enable_mask))) + pfm_arch_write_pmc(NULL, i, 0); +} + +/* + * unfreeze PMU from pfm_do_interrupt_handler(). + * ctx may be NULL for spurious interrupts. + * interrupts are masked. + */ +void pfm_arch_intr_unfreeze_pmu(struct pfm_context *ctx) +{ + struct pfm_arch_context *ctx_arch; + + if (ctx == NULL) + return; + + PFM_DBG_ovfl("state=%d", ctx->state); + + ctx->flags.started = 1; + + if (ctx->state == PFM_CTX_MASKED) + return; + + ctx_arch = pfm_ctx_arch(ctx); + + pfm_arch_restore_pmcs(ctx, ctx->active_set); + + if (ctx_arch->flags & PFM_X86_USE_DS) + wrmsrl(MSR_IA32_DS_AREA, ctx_arch->ds_area); +} + +/* + * Called from pfm_ctxsw(). Task is guaranteed to be current. + * set cannot be NULL. Context is locked. Interrupts are masked. + * Caller has already restored all PMD and PMC registers. + * + * must reactivate monitoring + */ +void pfm_arch_ctxswin_thread(struct task_struct *task, struct pfm_context *ctx, + struct pfm_event_set *set) +{ + struct pfm_arch_context *ctx_arch; + + ctx_arch = pfm_ctx_arch(ctx); + + /* + * reload DS management area pointer. Pointer + * not managed as a PMC thus it is not restored + * with the rest of the registers. + */ + if (ctx_arch->flags & PFM_X86_USE_DS) + wrmsrl(MSR_IA32_DS_AREA, ctx_arch->ds_area); + + if (set->npend_ovfls) + __get_cpu_var(real_iip) = ctx_arch->saved_real_iip; +} + +static int pfm_stop_save_p6(struct pfm_context *ctx, + struct pfm_event_set *set) +{ + struct pfm_arch_pmu_info *arch_info = pfm_pmu_conf->arch_info; + u64 used_mask[PFM_PMC_BV]; + u64 *cnt_mask; + u64 val, wmask, ovfl_mask; + u32 i, count; + + if (ctx->state == PFM_CTX_MASKED) + return 1; + + wmask = 1ULL << pfm_pmu_conf->counter_width; + + bitmap_and(cast_ulp(used_mask), + cast_ulp(set->used_pmcs), + cast_ulp(arch_info->enable_mask), + arch_info->max_ena); + + count = bitmap_weight(cast_ulp(used_mask), pfm_pmu_conf->regs.max_pmc); + + /* + * stop monitoring + * Unfortunately, this is very expensive! + * wrmsrl() is serializing. + */ + for (i = 0; count; i++) { + if (test_bit(i, cast_ulp(used_mask))) { + wrmsrl(pfm_pmu_conf->pmc_desc[i].hw_addr, 0); + count--; + } + } + + /* + * if we already having a pending overflow condition, we simply + * return to take care of this first. + */ + if (set->npend_ovfls) { + __get_cpu_var(pfm_stats).ccnt6++; + return 1; + } + + ovfl_mask = pfm_pmu_conf->ovfl_mask; + cnt_mask = pfm_pmu_conf->regs.cnt_pmds; + + /* + * check for pending overflows and save PMDs (combo) + * Must check for counting PMDs because of virtual PMDs + */ + count = set->nused_pmds; + for (i = 0; count; i++) { + if (test_bit(i, cast_ulp(set->used_pmds))) { + val = pfm_arch_read_pmd(ctx, i); + if (likely(test_bit(i, cast_ulp(cnt_mask)))) { + if (!(val & wmask)) { + __set_bit(i, cast_ulp(set->povfl_pmds)); + set->npend_ovfls++; + } + val = (set->pmds[i].value & ~ovfl_mask) | (val & ovfl_mask); + } + set->pmds[i].value = val; + count--; + } + } + /* 0 means: no need to save PMDs at upper level */ + return 0; +} + +static int pfm_stop_save_amd64(struct pfm_context *ctx, + struct pfm_event_set *set) +{ + return pfm_stop_save_p6(ctx, set); +} + +static int pfm_stop_save_intel_core(struct pfm_context *ctx, + struct pfm_event_set *set) +{ + struct pfm_arch_pmu_info *arch_info = pfm_pmu_conf->arch_info; + struct pfm_arch_context *ctx_arch; + struct pfm_ds_area *ds; + u64 used_mask[PFM_PMC_BV]; + u64 *cnt_mask; + u64 val, wmask, ovfl_mask; + u32 i, count; + + if (ctx->state == PFM_CTX_MASKED) + return 1; + + ctx_arch = pfm_ctx_arch(ctx); + + /* + * if PEBS used, clear DS area pointer + */ + if (ctx_arch->flags & PFM_X86_USE_DS) + wrmsrl(MSR_IA32_DS_AREA, 0); + + wmask = 1ULL << pfm_pmu_conf->counter_width; + + /* + * used enable pmc bitmask + */ + bitmap_and(cast_ulp(used_mask), + cast_ulp(set->used_pmcs), + cast_ulp(arch_info->enable_mask), + arch_info->max_ena); + + count = bitmap_weight(cast_ulp(used_mask), arch_info->max_ena); + + /* + * stop monitoring + * Unfortunately, this is very expensive! + * wrmsrl() is serializing. + */ + for (i = 0; count; i++) { + if (test_bit(i, cast_ulp(used_mask))) { + wrmsrl(pfm_pmu_conf->pmc_desc[i].hw_addr, 0); + count--; + } + } + + /* + * if we already having a pending overflow condition, we simply + * return to take care of this first. + */ + if (set->npend_ovfls) + return 1; + + ovfl_mask = pfm_pmu_conf->ovfl_mask; + cnt_mask = pfm_pmu_conf->regs.cnt_pmds; + + /* + * check for pending overflows and save PMDs (combo) + * Must check for counting PMDs because of virtual PMDs + * + * XXX: should use the ovf_status register instead, yet + * we would have to check if NMI is used and fallback + * to individual pmd inspection. + */ + count = set->nused_pmds; + for (i = 0; count; i++) { + if (test_bit(i, cast_ulp(set->used_pmds))) { + val = pfm_arch_read_pmd(ctx, i); + if (likely(test_bit(i, cast_ulp(cnt_mask)))) { + if (!(val & wmask)) { + __set_bit(i, cast_ulp(set->povfl_pmds)); + set->npend_ovfls++; + } + val = (set->pmds[i].value & ~ovfl_mask) | (val & ovfl_mask); + } + set->pmds[i].value = val; + count--; + } + } + + /* + * check for PEBS buffer full and set the corresponding PMD overflow + */ + if (ctx_arch->flags & PFM_X86_USE_PEBS) { + + ds = (struct pfm_ds_area *)ctx_arch->ds_area; + + PFM_DBG("ds=%p pebs_idx=0x%lx thres=0x%lx", + ds, + ds->pebs_index, + ds->pebs_intr_thres); + + if (ds->pebs_index >= ds->pebs_intr_thres + && test_bit(arch_info->pebs_ctr_idx, + cast_ulp(set->used_pmds))) { + __set_bit(arch_info->pebs_ctr_idx, + cast_ulp(set->povfl_pmds)); + set->npend_ovfls++; + } + } + /* 0 means: no need to save PMDs at upper level */ + return 0; +} + +static int pfm_stop_save_p4(struct pfm_context *ctx, + struct pfm_event_set *set) +{ + struct pfm_arch_pmu_info *arch_info = pfm_pmu_conf->arch_info; + struct pfm_arch_context *ctx_arch; + struct pfm_arch_ext_reg *xrc, *xrd; + u64 used_mask[PFM_PMC_BV]; + u32 i, j, count; + u16 max_pmc; + u64 cccr, ctr1, ctr2, ovfl_mask; + + if (ctx->state == PFM_CTX_MASKED) + return 1; + + ctx_arch = pfm_ctx_arch(ctx); + max_pmc = pfm_pmu_conf->regs.max_pmc; + xrc = arch_info->pmc_addrs; + xrd = arch_info->pmd_addrs; + ovfl_mask = pfm_pmu_conf->ovfl_mask; + + /* + * build used enable PMC bitmask + * if user did not set any CCCR, then mask is + * empty and there is nothing to do because nothing + * was started + */ + bitmap_and(cast_ulp(used_mask), + cast_ulp(set->used_pmcs), + cast_ulp(arch_info->enable_mask), + arch_info->max_ena); + + count = bitmap_weight(cast_ulp(used_mask), arch_info->max_ena); + + PFM_DBG_ovfl("npend=%u ena_mask=0x%llx u_pmcs=0x%llx count=%u num=%u", + set->npend_ovfls, + (unsigned long long)arch_info->enable_mask[0], + (unsigned long long)set->used_pmcs[0], + count, arch_info->max_ena); + /* + * stop clear DS area pointer + */ + if (ctx_arch->flags & PFM_X86_USE_DS) + wrmsrl(MSR_IA32_DS_AREA, 0); + + /* + * ensures we do not destroy pending overflow + * information. If pended interrupts are already + * known, then we just stop monitoring. + */ + if (set->npend_ovfls) { + /* + * clear enable bit + * unfortunately, this is very expensive! + */ + for (i = 0; count; i++) { + if (test_bit(i, cast_ulp(used_mask))) { + __pfm_write_reg_p4(xrc+i, 0); + count--; + } + } + /* need save PMDs at upper level */ + return 1; + } + + /* + * stop monitoring AND collect pending overflow information AND + * save pmds. + * + * We need to access the CCCR twice, once to get overflow info + * and a second to stop monitoring (which destroys the OVF flag) + * Similarly, we need to read the counter twice to check whether + * it did overflow between the CCR read and the CCCR write. + */ + for (i = 0; count; i++) { + if (test_bit(i, cast_ulp(used_mask))) { + /* + * controlled counter + */ + j = xrc[i].ctr; + + /* read CCCR (PMC) value */ + __pfm_read_reg_p4(xrc+i, &cccr); + + /* read counter (PMD) controlled by PMC */ + __pfm_read_reg_p4(xrd+j, &ctr1); + + /* clear CCCR value: stop counter but destroy OVF */ + __pfm_write_reg_p4(xrc+i, 0); + + /* read counter controlled by CCCR again */ + __pfm_read_reg_p4(xrd+j, &ctr2); + + /* + * there is an overflow if either: + * - CCCR.ovf is set (and we just cleared it) + * - ctr2 < ctr1 + * in that case we set the bit corresponding to the + * overflowed PMD in povfl_pmds. + */ + if ((cccr & (1ULL<<31)) || (ctr2 < ctr1)) { + __set_bit(j, cast_ulp(set->povfl_pmds)); + set->npend_ovfls++; + } + ctr2 = (set->pmds[j].value & ~ovfl_mask) | (ctr2 & ovfl_mask); + set->pmds[j].value = ctr2; + count--; + } + } + /* + * check for PEBS buffer full and set the corresponding PMD overflow + */ + if (ctx_arch->flags & PFM_X86_USE_PEBS) { + struct pfm_ds_area *ds; + ds = (struct pfm_ds_area *)ctx_arch->ds_area; + PFM_DBG("ds=%p pebs_idx=0x%lx thres=0x%lx", ds, ds->pebs_index, ds->pebs_intr_thres); + if (ds->pebs_index >= ds->pebs_intr_thres + && test_bit(arch_info->pebs_ctr_idx, cast_ulp(set->used_pmds))) { + __set_bit(arch_info->pebs_ctr_idx, cast_ulp(set->povfl_pmds)); + set->npend_ovfls++; + } + } + + /* 0 means: no need to save the PMD at higher level */ + return 0; +} + +/* + * Called from pfm_stop() and idle notifier + * + * Interrupts are masked. Context is locked. Set is the active set. + * + * For per-thread: + * task is not necessarily current. If not current task, then + * task is guaranteed stopped and off any cpu. Access to PMU + * is not guaranteed. Interrupts are masked. Context is locked. + * Set is the active set. + * + * For system-wide: + * task is current + * + * must disable active monitoring. ctx cannot be NULL + */ +void pfm_arch_stop(struct task_struct *task, struct pfm_context *ctx, + struct pfm_event_set *set) +{ + /* + * no need to go through stop_save() + * if we are already stopped + */ + if (!ctx->flags.started) + return; + /* + * on x86, masked is equivalent to stopped, thus we have + * nothing to do here + */ + if (task == current) + pfm_stop_save(ctx, set); +} + +/* + * Called from pfm_ctxsw(). Task is guaranteed to be current. + * Context is locked. Interrupts are masked. Monitoring may be active. + * PMU access is guaranteed. PMC and PMD registers are live in PMU. + * + * Must stop monitoring, save pending overflow information + * + * Return: + * non-zero : did not save PMDs (as part of stopping the PMU) + * 0 : saved PMDs (no need to save them in caller) + */ +int pfm_arch_ctxswout_thread(struct task_struct *task, struct pfm_context *ctx, + struct pfm_event_set *set) +{ + /* + * disable lazy restore of PMCS on ctxswin because + * we modify some of them. + */ + set->priv_flags |= PFM_SETFL_PRIV_MOD_PMCS; + + if (set->npend_ovfls) { + struct pfm_arch_context *ctx_arch; + ctx_arch = pfm_ctx_arch(ctx); + ctx_arch->saved_real_iip = __get_cpu_var(real_iip); + } + return pfm_stop_save(ctx, set); +} + +/* + * called from pfm_start() and idle notifier + * + * Interrupts are masked. Context is locked. Set is the active set. + * + * For per-thread: + * Task is not necessarily current. If not current task, then task + * is guaranteed stopped and off any cpu. No access to PMU is task + * is not current. + * + * For system-wide: + * task is always current + * + * must enable active monitoring. + */ +void pfm_arch_start(struct task_struct *task, struct pfm_context *ctx, + struct pfm_event_set *set) +{ + struct pfm_arch_context *ctx_arch; + u64 *mask; + u16 i, num; + + /* + * pfm_start issue while context is masked as no effect. + * This comes from the fact that on x86, masking and stopping + * use the same mechanism, i.e., clearing the enable bits + * of the PMC registers. + */ + if (ctx->state == PFM_CTX_MASKED) + return; + + /* + * cannot restore PMC if no access to PMU. Will be done + * when the thread is switched back in + */ + if (task != current) + return; + + ctx_arch = pfm_ctx_arch(ctx); + + /* + * we must actually install all implemented pmcs registers because + * until started, we do not write any PMC registers. + * Note that registers used by other subsystems (e.g. NMI) are + * removed from pmcs. + * + * The available registers that are actually not used get their default + * value such that counters do not count anything. As such, we can + * afford to write all of them but then stop only the one we use. + * + * XXX: we may be able to optimize this for non-P4 PMU has pmcs are + * independent from each others. + */ + num = pfm_pmu_conf->regs.num_pmcs; + mask = pfm_pmu_conf->regs.pmcs; + for (i = 0; num; i++) { + if (test_bit(i, cast_ulp(mask))) { + pfm_arch_write_pmc(ctx, i, set->pmcs[i]); + num--; + } + } + + /* + * reload DS area pointer. + */ + if (ctx_arch->flags & PFM_X86_USE_DS) + wrmsrl(MSR_IA32_DS_AREA, ctx_arch->ds_area); + +} + +/* + * function called from pfm_switch_sets(), pfm_context_load_thread(), + * pfm_context_load_sys(), pfm_ctxsw() + * + * context is locked. Interrupts are masked. Set cannot be NULL. + * Access to the PMU is guaranteed. + * + * function must restore PMD registers + */ +void pfm_arch_restore_pmds(struct pfm_context *ctx, struct pfm_event_set *set) +{ + u64 *used_pmds; + u16 i, num; + + used_pmds = set->used_pmds; + num = set->nused_pmds; + + /* + * we can restore only the PMD we use because: + * - you can only read with pfm_read_pmds() the registers + * declared used via pfm_write_pmds(), smpl_pmds, reset_pmds + * + * - if cr4.pce=1, only counters are exposed to user. No + * address is ever exposed by counters. + */ + for (i = 0; num; i++) { + if (likely(test_bit(i, cast_ulp(used_pmds)))) { + pfm_write_pmd(ctx, i, set->pmds[i].value); + num--; + } + } +} + +/* + * function called from pfm_switch_sets(), pfm_context_load_thread(), + * pfm_context_load_sys(), pfm_ctxsw(). + * Context is locked. Interrupts are masked. set cannot be NULL. + * Access to the PMU is guaranteed. + * + * function must restore all PMC registers from set + */ +void pfm_arch_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set) +{ + struct pfm_arch_pmu_info *arch_info = pfm_pmu_conf->arch_info; + u64 *mask; + u16 i, num; + + /* + * - by default, no PMC measures anything + * - on ctxswout, all used PMCs are disabled (cccr cleared) + * + * we need to restore the PMC (incl enable bits) only if + * not masked and user issued pfm_start() + */ + if (ctx->state == PFM_CTX_MASKED || ctx->flags.started == 0) + return; + + /* + * In general, writing MSRs is very expensive, so try to be smart. + * + * P6-style, Core-style: + * - pmc are totally independent of each other, there is + * possible side-effect from stale pmcs. Therefore we only + * restore the registers we use + * P4-style: + * - must restore everything because there are some dependencies + * (e.g., ESCR and CCCR) + */ + if (arch_info->pmu_style == PFM_X86_PMU_P4) { + num = pfm_pmu_conf->regs.num_pmcs; + mask = pfm_pmu_conf->regs.pmcs; + } else { + num = set->nused_pmcs; + mask = set->used_pmcs; + } + for (i = 0; num; i++) { + if (test_bit(i, cast_ulp(mask))) { + pfm_arch_write_pmc(ctx, i, set->pmcs[i]); + num--; + } + } +} + +/* + * invoked only when NMI is used. Called from the LOCAL_PERFMON_VECTOR + * handler to copy P4 overflow state captured when the NMI triggered. + * Given that on P4, stopping monitoring destroy the overflow information + * we save it in pfm_has_ovfl_p4() where monitoring is also stopped. + * + * Here we propagate the overflow state to current active set. The + * freeze_pmu() call we not overwrite this state because npend_ovfls + * is non-zero. + */ +static void pfm_p4_copy_nmi_state(void) +{ + struct pfm_context *ctx; + struct pfm_arch_context *ctx_arch; + struct pfm_event_set *set; + + ctx = __get_cpu_var(pmu_ctx); + if (!ctx) + return; + + ctx_arch = pfm_ctx_arch(ctx); + set = ctx->active_set; + + if (ctx_arch->p4->npend_ovfls) { + set->npend_ovfls = ctx_arch->p4->npend_ovfls; + + bitmap_copy(cast_ulp(set->povfl_pmds), + cast_ulp(ctx_arch->p4->povfl_pmds), + pfm_pmu_conf->regs.max_pmd); + + ctx_arch->p4->npend_ovfls = 0; + } +} + +/* + * The PMU interrupt is handled through an interrupt gate, therefore + * the CPU automatically clears the EFLAGS.IF, i.e., masking interrupts. + * + * The perfmon interrupt handler MUST run with interrupts disabled due + * to possible race with other, higher priority interrupts, such as timer + * or IPI function calls. + * + * See description in IA-32 architecture manual, Vol 3 section 5.8.1 + */ +fastcall void smp_pmu_interrupt(struct pt_regs *regs) +{ + struct pfm_arch_pmu_info *arch_info; + unsigned long iip; + + ack_APIC_irq(); + + irq_enter(); + + /* + * when using NMI, pfm_handle_nmi() gets called + * first. It stops monitoring and record the + * iip into real_iip, then it repost the interrupt + * using the lower priority vector LOCAL_PERFMON_VECTOR + * + * On P4, due to the difficulty of detecting overflows + * and stoppping the PMU, pfm_handle_nmi() needs to + * record npend_ovfl and ovfl_pmds in ctx_arch. So + * here we simply copy them back to the set. + */ + if (pfm_using_nmi) { + arch_info = pfm_pmu_conf->arch_info; + iip = __get_cpu_var(real_iip); + if (arch_info->pmu_style == PFM_X86_PMU_P4) + pfm_p4_copy_nmi_state(); + } else + iip = instruction_pointer(regs); + + pfm_interrupt_handler(iip, regs); + + /* + * On Intel P6, Pentium M, P4, Intel Core: + * - it is necessary to clear the MASK field for the LVTPC + * vector. Otherwise interrupts remain masked. See + * section 8.5.1 + * AMD X86-64: + * - the documentation does not stipulate the behavior. + * To be safe, we also rewrite the vector to clear the + * mask field + */ + if (cpu_data->x86_vendor == X86_VENDOR_INTEL) + apic_write(APIC_LVTPC, LOCAL_PERFMON_VECTOR); + + irq_exit(); +} + +/* + * detect is counters have overflowed. + * return: + * 0 : no overflow + * 1 : at least one overflow + * + * used by AMD K8 and Intel architectural PMU + */ +static int __kprobes pfm_has_ovfl_p6(struct pfm_context *ctx) +{ + struct pfm_arch_pmu_info *arch_info = pfm_pmu_conf->arch_info; + struct pfm_arch_ext_reg *xrd; + u64 *cnt_mask; + u64 wmask, val; + u16 i, num; + + cnt_mask = pfm_pmu_conf->regs.cnt_pmds; + num = pfm_pmu_conf->regs.num_counters; + wmask = 1ULL << pfm_pmu_conf->counter_width; + xrd = arch_info->pmd_addrs; + + for (i = 0; num; i++) { + if (test_bit(i, cast_ulp(cnt_mask))) { + rdmsrl(xrd[i].addrs[0], val); + if (!(val & wmask)) + return 1; + num--; + } + } + return 0; +} + +static int __kprobes pfm_has_ovfl_amd64(struct pfm_context *ctx) +{ + return pfm_has_ovfl_p6(ctx); +} + +/* + * detect is counters have overflowed. + * return: + * 0 : no overflow + * 1 : at least one overflow + * + * used by Intel P4 + */ +static int __kprobes pfm_has_ovfl_p4(struct pfm_context *ctx) +{ + struct pfm_arch_ext_reg *xrc, *xrd; + struct pfm_arch_context *ctx_arch; + struct pfm_arch_p4_context *p4; + struct pfm_arch_pmu_info *arch_info = pfm_pmu_conf->arch_info; + u64 ena_mask[PFM_PMC_BV]; + u64 cccr, ctr1, ctr2; + int n, i, j; + + ctx_arch = pfm_ctx_arch(ctx); + xrc = arch_info->pmc_addrs; + xrd = arch_info->pmd_addrs; + p4 = ctx_arch->p4; + + bitmap_and(cast_ulp(ena_mask), + cast_ulp(pfm_pmu_conf->regs.pmcs), + cast_ulp(arch_info->enable_mask), + arch_info->max_ena); + + n = bitmap_weight(cast_ulp(ena_mask), arch_info->max_ena); + + for(i=0; n; i++) { + if (!test_bit(i, cast_ulp(ena_mask))) + continue; + /* + * controlled counter + */ + j = xrc[i].ctr; + + /* read CCCR (PMC) value */ + __pfm_read_reg_p4(xrc+i, &cccr); + + /* read counter (PMD) controlled by PMC */ + __pfm_read_reg_p4(xrd+j, &ctr1); + + /* clear CCCR value: stop counter but destroy OVF */ + __pfm_write_reg_p4(xrc+i, 0); + + /* read counter controlled by CCCR again */ + __pfm_read_reg_p4(xrd+j, &ctr2); + + /* + * there is an overflow if either: + * - CCCR.ovf is set (and we just cleared it) + * - ctr2 < ctr1 + * in that case we set the bit corresponding to the + * overflowed PMD in povfl_pmds. + */ + if ((cccr & (1ULL<<31)) || (ctr2 < ctr1)) { + __set_bit(j, cast_ulp(ctx_arch->p4->povfl_pmds)); + ctx_arch->p4->npend_ovfls++; + } + p4->saved_cccrs[i] = cccr; + n--; + } + /* + * if there was no overflow, then it means the NMI was not really + * for us, so we have to resume monitoring + */ + if (unlikely(!ctx_arch->p4->npend_ovfls)) { + for(i=0; n; i++) { + if (!test_bit(i, cast_ulp(ena_mask))) + continue; + __pfm_write_reg_p4(xrc+i, ctx_arch->p4->saved_cccrs[i]); + } + } + return 0; +} + +/* + * detect is counters have overflowed. + * return: + * 0 : no overflow + * 1 : at least one overflow + * + * used by Intel Core-based processors + */ +static int __kprobes pfm_has_ovfl_intel_core(struct pfm_context *ctx) +{ + struct pfm_arch_pmu_info *arch_info = pfm_pmu_conf->arch_info; + struct pfm_arch_ext_reg *xrd; + u64 *cnt_mask; + u64 wmask, val; + u16 i, num; + + cnt_mask = pfm_pmu_conf->regs.cnt_pmds; + num = pfm_pmu_conf->regs.num_counters; + wmask = 1ULL << pfm_pmu_conf->counter_width; + xrd = arch_info->pmd_addrs; + + for (i = 0; num; i++) { + if (test_bit(i, cast_ulp(cnt_mask))) { + rdmsrl(xrd[i].addrs[0], val); + if (!(val & wmask)) + return 1; + num--; + } + } + return 0; +} + +/* + * called from notify_die() notifier from an trap handler path. We only + * care about NMI related callbacks, and ignore everything else. + * + * Cannot grab any locks, include the perfmon context lock + * + * Must detect if NMI interrupt comes from perfmon, and if so it must + * stop the PMU and repost a lower-priority interrupt. The perfmon interrupt + * handler needs to grab the context lock, thus is cannot be run directly + * from the NMI interrupt call path. + */ +static int __kprobes pfm_handle_nmi(struct notifier_block *nb, unsigned long val, + void *data) +{ + struct die_args *args = data; + struct pfm_context *ctx; + + /* + * only NMI related calls + */ + if (val != DIE_NMI_IPI) + return NOTIFY_DONE; + + /* + * perfmon not active on this processor + */ + ctx = __get_cpu_var(pmu_ctx); + if (ctx == NULL) { + PFM_DBG_ovfl("ctx NULL"); + return NOTIFY_DONE; + } + + /* + * detect if we have overflows, i.e., NMI interrupt + * caused by PMU + */ + if (!pfm_has_ovfl(ctx)) { + PFM_DBG_ovfl("no ovfl"); + return NOTIFY_DONE; + } + + /* + * we stop the PMU to avoid further overflow before this + * one is treated by lower priority interrupt handler + */ + __pfm_arch_quiesce_pmu_percpu(); + + /* + * record actual instruction pointer + */ + __get_cpu_var(real_iip) = instruction_pointer(args->regs); + + /* + * post lower priority interrupt (LOCAL_PERFMON_VECTOR) + */ + pfm_arch_resend_irq(); + + __get_cpu_var(pfm_stats).ovfl_intr_nmi_count++; + + /* + * we need to rewrite the APIC vector on Intel + */ + if (cpu_data->x86_vendor == X86_VENDOR_INTEL) + apic_write(APIC_LVTPC, APIC_DM_NMI); + + /* + * the notification was for us + */ + return NOTIFY_STOP; +} + +static struct notifier_block pfm_nmi_nb={ + .notifier_call = pfm_handle_nmi +}; + +/* + * called from pfm_register_pmu_config() after the new + * config has been validated. The pfm_session_lock + * is held. + * + * return: + * < 0 : if error + * 0 : if success + */ +int pfm_arch_pmu_config_init(struct pfm_pmu_config *cfg) +{ + struct pfm_arch_pmu_info *arch_info = cfg->arch_info; + + /* + * ensure that PMU description is able to deal with NMI watchdog using + * the performance counters + */ + if ( nmi_watchdog == NMI_LOCAL_APIC + && !(arch_info->flags & PFM_X86_FL_USE_NMI)) { + PFM_INFO("NMI watchdog uses counters, PMU module cannot handle"); + return -EINVAL; + } + + /* + * adust stop routine based on PMU model + * + * P6 : P6, Pentium M, AMD K8, Intel architectural perfmon + * P4 : Xeon, EM64T, P4 + * Core: Core 2, + */ + switch(arch_info->pmu_style) { + case PFM_X86_PMU_P4: + pfm_stop_save = pfm_stop_save_p4; + pfm_has_ovfl = pfm_has_ovfl_p4; + break; + case PFM_X86_PMU_P6: + pfm_stop_save = pfm_stop_save_p6; + pfm_has_ovfl = pfm_has_ovfl_p6; + break; + case PFM_X86_PMU_CORE: + pfm_stop_save = pfm_stop_save_intel_core; + pfm_has_ovfl = pfm_has_ovfl_intel_core; + break; + case PFM_X86_PMU_AMD64: + pfm_stop_save = pfm_stop_save_amd64; + pfm_has_ovfl = pfm_has_ovfl_amd64; + break; + default: + PFM_INFO("unknown pmu_style=%d", arch_info->pmu_style); + return -EINVAL; + } + + /* + * determine interrupt type to use + */ + if (arch_info->flags & PFM_X86_FL_USE_NMI) { + register_die_notifier(&pfm_nmi_nb); + PFM_INFO("intr_type=NMI"); + pfm_using_nmi = 1; + } else { + PFM_INFO("intr_type=regular"); + } + return 0; +} + +void pfm_arch_pmu_config_remove(void) +{ + if (pfm_using_nmi) + unregister_die_notifier(&pfm_nmi_nb); + + pfm_using_nmi = 0; +} + +char *pfm_arch_get_pmu_module_name(void) +{ + switch(cpu_data->x86) { + case 6: + switch(cpu_data->x86_model) { + case 3: /* Pentium II */ + case 7 ... 11: + case 13: + return "perfmon_p6"; + case 15: + return "perfmon_core"; + default: + goto try_arch; + } + case 15: + case 16: + /* All Opteron processors */ + if (cpu_data->x86_vendor == X86_VENDOR_AMD) + return "perfmon_k8"; + + switch(cpu_data->x86_model) { + case 0 ... 6: + return "perfmon_p4"; + } + /* FALL THROUGH */ + default: +try_arch: + if (boot_cpu_has(X86_FEATURE_ARCH_PERFMON)) + return "perfmon_intel_arch"; + return NULL; + } + return NULL; +} + +void pfm_arch_resend_irq(void) +{ + unsigned long val, dest; + /* + * we cannot use hw_resend_irq() because it goes to + * the I/O APIC. We need to go to the Local APIC. + * + * The "int vec" is not the right solution either + * because it triggers a software intr. We need + * to regenerate the interrupt and have it pended + * until we unmask interrupts. + * + * Instead we send ourself an IPI on the perfmon + * vector. + */ + val = APIC_DEST_SELF|APIC_INT_ASSERT| + APIC_DM_FIXED|LOCAL_PERFMON_VECTOR; + + dest = apic_read(APIC_ID); + apic_write(APIC_ICR2, dest); + apic_write(APIC_ICR, val); +} + +DEFINE_PER_CPU(unsigned long, saved_lvtpc); + +static void pfm_arch_pmu_acquire_percpu(void *data) +{ + int vec; + + __get_cpu_var(saved_lvtpc) = apic_read(APIC_LVTPC); + + vec = pfm_using_nmi ? APIC_DM_NMI : LOCAL_PERFMON_VECTOR; + apic_write(APIC_LVTPC, vec); + + PFM_DBG("LTVPC=0x%lx saved=0x%lx", + (unsigned long)apic_read(APIC_LVTPC), + (unsigned long)__get_cpu_var(saved_lvtpc)); +} + +static void pfm_arch_pmu_release_percpu(void *data) +{ + PFM_DBG("restoring LVTPC=0x%lx", __get_cpu_var(saved_lvtpc)); + apic_write(APIC_LVTPC, __get_cpu_var(saved_lvtpc)); +} + + +/* + * called from pfm_acquire_pmu() with + * pfm_pmu_conf.regs copied from pfm_pmu_conf.full_regs + * needs to adjust regs to match current PMU availabilityy + * + * Caller does recalculate all max/num/first limits on the + * pfm_pmu_conf.regs structure. + * + * interrupts are not masked + * + * + * XXX: until reserve_*_nmi() get fixed by Bjorn to work + * correctly whenever the NMI watchdog is not used. We skip + * the allocation. Yet we do the percpu initialization. + */ +int pfm_arch_pmu_acquire(void) +{ + struct pfm_arch_pmu_info *arch_info; + struct pfm_regmap_desc *d; + struct pfm_arch_ext_reg *pc; + u16 i, n, ena = 0; + + arch_info = pfm_pmu_conf->arch_info; + pc = arch_info->pmc_addrs; + + bitmap_zero(cast_ulp(arch_info->enable_mask), PFM_MAX_PMCS); + + d = pfm_pmu_conf->pmc_desc; + n = pfm_pmu_conf->regs.num_pmcs; + for(i=0; n; i++, d++) { + /* + * skip not implemented registers (including those + * already removed by the module) + */ + if (!(d->type & PFM_REG_I)) + continue; + + n--; + + if (d->type & PFM_REG_V) + continue; + + /* + * reserve register with lower-level allocator + */ + if (!reserve_evntsel(d->hw_addr)) { + PFM_DBG("pmc%d (%s) in use elsewhere, disabling", i, d->desc); + __clear_bit(i, cast_ulp(pfm_pmu_conf->regs.pmcs)); + } else { + if (pc[i].reg_type & PFM_REGT_EN) { + __set_bit(i, cast_ulp(arch_info->enable_mask)); + ena++; + arch_info->max_ena = i + 1; + } + } + } + + PFM_DBG("%u PMCs with enable capability", ena); + if (!ena) { + PFM_INFO("no registers with start/stop capability," + "try rebooting with nmi_watchdog=0"); + goto undo; + } + + d = pfm_pmu_conf->pmd_desc; + n = pfm_pmu_conf->regs.num_pmds; + for(i=0; n; i++, d++) { + if (!(d->type & PFM_REG_I)) + continue; + n--; + + if (d->type & PFM_REG_V) + continue; + + if (!reserve_perfctr(d->hw_addr)) { + PFM_DBG("pmd%d (%s) in use elsewhere, disabling", i, d->desc); + __clear_bit(i, cast_ulp(pfm_pmu_conf->regs.pmds)); + __clear_bit(i, cast_ulp(pfm_pmu_conf->regs.cnt_pmds)); + __clear_bit(i, cast_ulp(pfm_pmu_conf->regs.rw_pmds)); + } + } + + /* + * program APIC + */ + on_each_cpu(pfm_arch_pmu_acquire_percpu, NULL, 0, 1); + + return 0; +undo: + pfm_pmu_conf->regs = pfm_pmu_conf->full_regs; + return -EBUSY; +} + +/* + * called from pfm_pmu_release() + * interrupts are not masked + */ +void pfm_arch_pmu_release(void) +{ + struct pfm_regmap_desc *d; + u16 i, n; + + d = pfm_pmu_conf->pmc_desc; + n = pfm_pmu_conf->regs.num_pmcs; + for(i=0; n; i++, d++) { + if (!test_bit(i, cast_ulp(pfm_pmu_conf->regs.pmcs))) + continue; + release_evntsel(d->hw_addr); + n--; + PFM_DBG("pmc%u released", i); + } + d = pfm_pmu_conf->pmd_desc; + n = pfm_pmu_conf->regs.num_pmds; + for(i=0; n; i++, d++) { + if (!test_bit(i, cast_ulp(pfm_pmu_conf->regs.pmds))) + continue; + release_perfctr(d->hw_addr); + n--; + PFM_DBG("pmd%u released", i); + } + on_each_cpu(pfm_arch_pmu_release_percpu, NULL, 0, 1); +} Index: linux-2.6/arch/i386/perfmon/perfmon_gen_ia32.c =================================================================== --- /dev/null +++ linux-2.6/arch/i386/perfmon/perfmon_gen_ia32.c @@ -0,0 +1,290 @@ +/* + * This file contains the IA-32 architectural perfmon register description tables. + * + * The IA-32 architectural perfmon (PMU) was introduced with Intel Core Solo + * and Core Duo processors. + * + * Copyright (c) 2006 Hewlett-Packard Development Company, L.P. + * Contributed by Stephane Eranian + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + * 02111-1307 USA + */ +#include +#include +#include +#include +#include + +MODULE_AUTHOR("Stephane Eranian "); +MODULE_DESCRIPTION("Generic IA-32 PMU description table"); +MODULE_LICENSE("GPL"); + +static int force; +MODULE_PARM_DESC(force, "bool: force module to load succesfully"); +module_param(force, bool, 0600); + +/* + * - upper 32 bits are reserved + * - INT: APIC enable bit is reserved (forced to 1) + * - bit 21 is reserved + * + * RSVD: reserved bits are 1 + */ +#define PFM_GEN_IA32_PMC_RSVD ((~((1ULL<<32)-1)) \ + | (1ULL<<20) \ + | (1ULL<<21)) + +/* + * force Local APIC interrupt on overflow + * disable with NO_EMUL64 + */ +#define PFM_GEN_IA32_PMC_VAL (1ULL<<20) +#define PFM_GEN_IA32_NO64 (1ULL<<20) + +/* + * architectuture specifies that: + * IA32_PMCx MSR starts at 0xc1 & occupy a contiguous block of MSR addr + * IA32_PERFEVTSELx MSR starts at 0x186 & occupy a contiguous block of MSR addr + */ +#define MSR_GEN_PERFEVTSEL_BASE MSR_P6_EVNTSEL0 +#define MSR_GEN_PMC_BASE MSR_P6_PERFCTR0 + +#define PFM_GEN_IA32_SEL(n) { \ + .addrs[0] = MSR_GEN_PERFEVTSEL_BASE+(n), \ + .addrs[1] = 0, \ + .ctr = n, \ + .reg_type = PFM_REGT_EN} + +#define PFM_GEN_IA32_CTR(n) { \ + .addrs[0] = MSR_GEN_PMC_BASE+(n), \ + .addrs[1] = 0, \ + .ctr = n, \ + .reg_type = PFM_REGT_CTR} + +struct pmu_eax { + unsigned int version:8; + unsigned int num_cnt:8; + unsigned int cnt_width:8; + unsigned int ebx_length:8; +}; + +/* + * physical addresses of MSR controlling the perfevtsel and counter registers + */ +struct pfm_arch_pmu_info pfm_gen_ia32_pmu_info={ + .pmc_addrs = { + PFM_GEN_IA32_SEL(0) , PFM_GEN_IA32_SEL(1), PFM_GEN_IA32_SEL(2), PFM_GEN_IA32_SEL(3), + PFM_GEN_IA32_SEL(4) , PFM_GEN_IA32_SEL(5), PFM_GEN_IA32_SEL(6), PFM_GEN_IA32_SEL(7), + PFM_GEN_IA32_SEL(8) , PFM_GEN_IA32_SEL(9), PFM_GEN_IA32_SEL(10), PFM_GEN_IA32_SEL(11), + PFM_GEN_IA32_SEL(12), PFM_GEN_IA32_SEL(13), PFM_GEN_IA32_SEL(14), PFM_GEN_IA32_SEL(15), + PFM_GEN_IA32_SEL(16), PFM_GEN_IA32_SEL(17), PFM_GEN_IA32_SEL(18), PFM_GEN_IA32_SEL(19), + PFM_GEN_IA32_SEL(20), PFM_GEN_IA32_SEL(21), PFM_GEN_IA32_SEL(22), PFM_GEN_IA32_SEL(23), + PFM_GEN_IA32_SEL(24), PFM_GEN_IA32_SEL(25), PFM_GEN_IA32_SEL(26), PFM_GEN_IA32_SEL(27), + PFM_GEN_IA32_SEL(28), PFM_GEN_IA32_SEL(29), PFM_GEN_IA32_SEL(30), PFM_GEN_IA32_SEL(31) + }, + .pmd_addrs = { + PFM_GEN_IA32_CTR(0) , PFM_GEN_IA32_CTR(1), PFM_GEN_IA32_CTR(2), PFM_GEN_IA32_CTR(3), + PFM_GEN_IA32_CTR(4) , PFM_GEN_IA32_CTR(5), PFM_GEN_IA32_CTR(6), PFM_GEN_IA32_CTR(7), + PFM_GEN_IA32_CTR(8) , PFM_GEN_IA32_CTR(9), PFM_GEN_IA32_CTR(10), PFM_GEN_IA32_CTR(11), + PFM_GEN_IA32_CTR(12), PFM_GEN_IA32_CTR(13), PFM_GEN_IA32_CTR(14), PFM_GEN_IA32_CTR(15), + PFM_GEN_IA32_CTR(16), PFM_GEN_IA32_CTR(17), PFM_GEN_IA32_CTR(18), PFM_GEN_IA32_CTR(19), + PFM_GEN_IA32_CTR(20), PFM_GEN_IA32_CTR(21), PFM_GEN_IA32_CTR(22), PFM_GEN_IA32_CTR(23), + PFM_GEN_IA32_CTR(24), PFM_GEN_IA32_CTR(25), PFM_GEN_IA32_CTR(26), PFM_GEN_IA32_CTR(27), + PFM_GEN_IA32_CTR(28), PFM_GEN_IA32_CTR(29), PFM_GEN_IA32_CTR(30), PFM_GEN_IA32_CTR(31) + }, + .pmu_style = PFM_X86_PMU_P6 +}; + +#define PFM_GEN_IA32_C(n) { \ + .type = PFM_REG_I64, \ + .desc = "PERFEVTSEL"#n, \ + .dfl_val = PFM_GEN_IA32_PMC_VAL, \ + .rsvd_msk = PFM_GEN_IA32_PMC_RSVD, \ + .no_emul64_msk = PFM_GEN_IA32_NO64, \ + .hw_addr = MSR_GEN_PERFEVTSEL_BASE+(n) \ + } + +#define PFM_GEN_IA32_D(n) { \ + .type = PFM_REG_C, \ + .desc = "PMC"#n, \ + .dfl_val = 0, \ + .rsvd_msk = 0, \ + .no_emul64_msk = 0, \ + .hw_addr = MSR_GEN_PMC_BASE+(n) \ + } + +static struct pfm_reg_desc pfm_gen_ia32_pmc_desc[]={ +/* pmc0 */ PFM_GEN_IA32_C(0), PFM_GEN_IA32_C(1), PFM_GEN_IA32_C(2), PFM_GEN_IA32_C(3), +/* pmc4 */ PFM_GEN_IA32_C(4), PFM_GEN_IA32_C(5), PFM_GEN_IA32_C(6), PFM_GEN_IA32_C(7), +/* pmc8 */ PFM_GEN_IA32_C(8), PFM_GEN_IA32_C(9), PFM_GEN_IA32_C(10), PFM_GEN_IA32_C(11), +/* pmc12 */ PFM_GEN_IA32_C(12), PFM_GEN_IA32_C(13), PFM_GEN_IA32_C(14), PFM_GEN_IA32_C(15), +/* pmc16 */ PFM_GEN_IA32_C(16), PFM_GEN_IA32_C(17), PFM_GEN_IA32_C(18), PFM_GEN_IA32_C(19), +/* pmc20 */ PFM_GEN_IA32_C(20), PFM_GEN_IA32_C(21), PFM_GEN_IA32_C(22), PFM_GEN_IA32_C(23), +/* pmc24 */ PFM_GEN_IA32_C(24), PFM_GEN_IA32_C(25), PFM_GEN_IA32_C(26), PFM_GEN_IA32_C(27), +/* pmc28 */ PFM_GEN_IA32_C(28), PFM_GEN_IA32_C(29), PFM_GEN_IA32_C(30), PFM_GEN_IA32_C(31) +}; + +static struct pfm_reg_desc pfm_gen_ia32_pmd_desc[]={ +/* pmd0 */ PFM_GEN_IA32_D(0), PFM_GEN_IA32_D(1), PFM_GEN_IA32_D(2), PFM_GEN_IA32_D(3), +/* pmd4 */ PFM_GEN_IA32_D(4), PFM_GEN_IA32_D(5), PFM_GEN_IA32_D(6), PFM_GEN_IA32_D(7), +/* pmd8 */ PFM_GEN_IA32_D(8), PFM_GEN_IA32_D(9), PFM_GEN_IA32_D(10), PFM_GEN_IA32_D(11), +/* pmd12 */ PFM_GEN_IA32_D(12), PFM_GEN_IA32_D(13), PFM_GEN_IA32_D(14), PFM_GEN_IA32_D(15), +/* pmd16 */ PFM_GEN_IA32_D(16), PFM_GEN_IA32_D(17), PFM_GEN_IA32_D(18), PFM_GEN_IA32_D(19), +/* pmd20 */ PFM_GEN_IA32_D(20), PFM_GEN_IA32_D(21), PFM_GEN_IA32_D(22), PFM_GEN_IA32_D(23), +/* pmd24 */ PFM_GEN_IA32_D(24), PFM_GEN_IA32_D(25), PFM_GEN_IA32_D(26), PFM_GEN_IA32_D(27), +/* pmd28 */ PFM_GEN_IA32_D(28), PFM_GEN_IA32_D(29), PFM_GEN_IA32_D(30), PFM_GEN_IA32_D(31) +}; +#define PFM_GEN_IA32_MAX_PMCS ARRAY_SIZE(pfm_gen_ia32_pmc_desc) + +#define MSR_IA32_MISC_ENABLE_PERF_AVAIL (1<<7) /* read-only status bit */ + +static struct pfm_pmu_config pfm_gen_ia32_pmu_conf; + +static int pfm_gen_ia32_probe_pmu(void) +{ + union { + unsigned int val; + struct pmu_eax eax; + } eax; + unsigned int ebx, ecx, edx; + unsigned int num_cnt; + + if (cpu_data->x86_vendor != X86_VENDOR_INTEL) { + PFM_INFO("not an Intel processor"); + return -1; + } + + /* + * ensure CPUID instruction exists + */ + if (cpu_data->x86 < 5) { + PFM_INFO("processor family too old"); + return -1; + } + + if (force == 0) { + /* + * check if CPU supports 0xa function of CPUID + * 0xa started with Core Duo/Solo. Needed to detect if + * architected PMU is present + */ + cpuid(0x0, &eax.val, &ebx, &ecx, &edx); + if (eax.val < 0xa) { + PFM_INFO("CPUID 0xa function not supported\n"); + return -1; + } + + cpuid(0xa, &eax.val, &ebx, &ecx, &edx); + if (eax.eax.version < 1) { + PFM_INFO("architectural perfmon not supported\n"); + return -1; + } + + /* + * ensure that when all moduels are linked in, we picked the right + * one for Intel Core-based processors, as they accept architectural + * perfmon, but implement extensions which are only visible with + * perfmon_core module + */ + if (cpu_data->x86 == 6 && cpu_data->x86_model == 15) { + PFM_INFO("use perfmon_core for Core-based processors"); + return -1; + } + } else { + eax.eax.num_cnt = 2; + eax.eax.cnt_width = 31; + } + + num_cnt = eax.eax.num_cnt; + + /* + * sanity check number of counters + */ + if (num_cnt == 0 || num_cnt >= PFM_MAX_HW_PMCS) { + PFM_INFO("invalid number of counters %u\n", eax.eax.num_cnt); + return -1; + } + /* + * instead of dynamically generating the description table + * and MSR addresses, we have a default description with a reasonably + * large number of counters (32). We believe this is plenty for quite + * some time. Thus allows us to have a much simpler probing and + * initialization routine, especially because we have no dynamic + * allocation, especially for the counter names. + * + * When HW supports more that what we haev prepared for, then we limit + * the number of counters we support and print a message. + */ + if (num_cnt >= PFM_GEN_IA32_MAX_PMCS) { + printk(KERN_INFO "perfmon: Limiting number of counters to %zu," + "HW supports %u", PFM_GEN_IA32_MAX_PMCS, num_cnt); + num_cnt = PFM_GEN_IA32_MAX_PMCS; + } + + if (eax.eax.cnt_width > 63) { + PFM_INFO("invalid counter width %u\n", eax.eax.cnt_width); + return -1; + } + + if (!cpu_has_apic) { + PFM_INFO("no Local APIC, unsupported"); + return -1; + } + + if (nmi_watchdog == NMI_LOCAL_APIC) { + PFM_INFO("NMI watchdog using PERFEVTSEL0/PERTCTR0, disabling them for perfmon"); + pfm_gen_ia32_pmc_desc[0].type = PFM_REG_NA; + pfm_gen_ia32_pmd_desc[0].type = PFM_REG_NA; + pfm_gen_ia32_pmu_info.pmc_addrs[0].reg_type = PFM_REGT_NA; + pfm_gen_ia32_pmu_info.pmd_addrs[0].reg_type = PFM_REGT_NA; + } + pfm_gen_ia32_pmu_conf.num_pmc_entries = num_cnt; + pfm_gen_ia32_pmu_conf.num_pmd_entries = num_cnt; + + return 0; +} + +/* + * Counters may have model-specific width. Yet the documentation says + * that only the lower 32 bits can be written to. bits [w-32] + * are sign extensions of bit 31. As such the effective width of + * a counter is 31 bits only. + * See IA-32 Intel Architecture Software developer manual Vol 3b: + * system programming and section 18.17.2 in particular. + */ +static struct pfm_pmu_config pfm_gen_ia32_pmu_conf={ + .pmu_name = "Intel architectural", + .pmd_desc = pfm_gen_ia32_pmd_desc, + .counter_width = 31, + .pmc_desc = pfm_gen_ia32_pmc_desc, + .probe_pmu = pfm_gen_ia32_probe_pmu, + .version = "1.0", + .flags = PFM_PMU_BUILTIN_FLAG, + .owner = THIS_MODULE, + .arch_info = &pfm_gen_ia32_pmu_info +}; + +static int __init pfm_gen_ia32_pmu_init_module(void) +{ + return pfm_pmu_register(&pfm_gen_ia32_pmu_conf); +} + +static void __exit pfm_gen_ia32_pmu_cleanup_module(void) +{ + pfm_pmu_unregister(&pfm_gen_ia32_pmu_conf); +} + +module_init(pfm_gen_ia32_pmu_init_module); +module_exit(pfm_gen_ia32_pmu_cleanup_module); Index: linux-2.6/arch/i386/perfmon/perfmon_intel_arch.c =================================================================== --- /dev/null +++ linux-2.6/arch/i386/perfmon/perfmon_intel_arch.c @@ -0,0 +1,261 @@ +/* + * This file contains the Intel architectural perfmon register v1 + * description tables. + * + * Architectural perfmon was introduced with Intel Core Solo and + * Core Duo processors. + * + * Copyright (c) 2006-2007 Hewlett-Packard Development Company, L.P. + * Contributed by Stephane Eranian + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + * 02111-1307 USA + */ +#include +#include +#include +#include +#include + +MODULE_AUTHOR("Stephane Eranian "); +MODULE_DESCRIPTION("Intel architectural perfmon v1"); +MODULE_LICENSE("GPL"); + +static int force, force_nmi; +MODULE_PARM_DESC(force, "bool: force module to load succesfully"); +MODULE_PARM_DESC(force_nmi, "bool: force use of NMI for PMU interrupt"); +module_param(force, bool, 0600); +module_param(force_nmi, bool, 0600); + +/* + * - upper 32 bits are reserved + * - INT: APIC enable bit is reserved (forced to 1) + * - bit 21 is reserved + * + * RSVD: reserved bits are 1 + */ +#define PFM_INTEL_ARCH_PMC_RSVD ((~((1ULL<<32)-1)) \ + | (1ULL<<20) \ + | (1ULL<<21)) + +/* + * force Local APIC interrupt on overflow + * disable with NO_EMUL64 + */ +#define PFM_INTEL_ARCH_PMC_VAL (1ULL<<20) +#define PFM_INTEL_ARCH_NO64 (1ULL<<20) + +/* + * architectuture specifies that: + * IA32_PMCx MSR : starts at 0xc1 & occupy a contiguous block of MSR + * IA32_PERFEVTSELx MSR : starts at 0x186 & occupy a contiguous block of MSR + */ +#define MSR_GEN_SEL_BASE MSR_P6_EVNTSEL0 +#define MSR_GEN_PMC_BASE MSR_P6_PERFCTR0 + +#define PFM_INTEL_ARCH_SEL(n) { \ + .addrs[0] = MSR_GEN_SEL_BASE+(n), \ + .addrs[1] = 0, \ + .ctr = n, \ + .reg_type = PFM_REGT_EN} + +#define PFM_INTEL_ARCH_CTR(n) { \ + .addrs[0] = MSR_GEN_PMC_BASE+(n), \ + .addrs[1] = 0, \ + .ctr = n, \ + .reg_type = PFM_REGT_CTR} + +struct pmu_eax { + unsigned int version:8; + unsigned int num_cnt:8; /* up to 256 counters? */ + unsigned int cnt_width:8; + unsigned int ebx_length:8; +}; + +/* + * physical addresses of MSR controlling the perfevtsel and counter registers + */ +struct pfm_arch_pmu_info pfm_intel_arch_pmu_info={ + .pmc_addrs = { + PFM_INTEL_ARCH_SEL(0) , PFM_INTEL_ARCH_SEL(1), PFM_INTEL_ARCH_SEL(2), PFM_INTEL_ARCH_SEL(3), + PFM_INTEL_ARCH_SEL(4) , PFM_INTEL_ARCH_SEL(5), PFM_INTEL_ARCH_SEL(6), PFM_INTEL_ARCH_SEL(7), + PFM_INTEL_ARCH_SEL(8) , PFM_INTEL_ARCH_SEL(9), PFM_INTEL_ARCH_SEL(10), PFM_INTEL_ARCH_SEL(11), + PFM_INTEL_ARCH_SEL(12), PFM_INTEL_ARCH_SEL(13), PFM_INTEL_ARCH_SEL(14), PFM_INTEL_ARCH_SEL(15), + PFM_INTEL_ARCH_SEL(16), PFM_INTEL_ARCH_SEL(17), PFM_INTEL_ARCH_SEL(18), PFM_INTEL_ARCH_SEL(19), + PFM_INTEL_ARCH_SEL(20), PFM_INTEL_ARCH_SEL(21), PFM_INTEL_ARCH_SEL(22), PFM_INTEL_ARCH_SEL(23), + PFM_INTEL_ARCH_SEL(24), PFM_INTEL_ARCH_SEL(25), PFM_INTEL_ARCH_SEL(26), PFM_INTEL_ARCH_SEL(27), + PFM_INTEL_ARCH_SEL(28), PFM_INTEL_ARCH_SEL(29), PFM_INTEL_ARCH_SEL(30), PFM_INTEL_ARCH_SEL(31) + }, + .pmd_addrs = { + PFM_INTEL_ARCH_CTR(0) , PFM_INTEL_ARCH_CTR(1), PFM_INTEL_ARCH_CTR(2), PFM_INTEL_ARCH_CTR(3), + PFM_INTEL_ARCH_CTR(4) , PFM_INTEL_ARCH_CTR(5), PFM_INTEL_ARCH_CTR(6), PFM_INTEL_ARCH_CTR(7), + PFM_INTEL_ARCH_CTR(8) , PFM_INTEL_ARCH_CTR(9), PFM_INTEL_ARCH_CTR(10), PFM_INTEL_ARCH_CTR(11), + PFM_INTEL_ARCH_CTR(12), PFM_INTEL_ARCH_CTR(13), PFM_INTEL_ARCH_CTR(14), PFM_INTEL_ARCH_CTR(15), + PFM_INTEL_ARCH_CTR(16), PFM_INTEL_ARCH_CTR(17), PFM_INTEL_ARCH_CTR(18), PFM_INTEL_ARCH_CTR(19), + PFM_INTEL_ARCH_CTR(20), PFM_INTEL_ARCH_CTR(21), PFM_INTEL_ARCH_CTR(22), PFM_INTEL_ARCH_CTR(23), + PFM_INTEL_ARCH_CTR(24), PFM_INTEL_ARCH_CTR(25), PFM_INTEL_ARCH_CTR(26), PFM_INTEL_ARCH_CTR(27), + PFM_INTEL_ARCH_CTR(28), PFM_INTEL_ARCH_CTR(29), PFM_INTEL_ARCH_CTR(30), PFM_INTEL_ARCH_CTR(31) + }, + .pmu_style = PFM_X86_PMU_P6 +}; + +#define PFM_INTEL_ARCH_C(n) { \ + .type = PFM_REG_I64, \ + .desc = "PERFEVTSEL"#n, \ + .dfl_val = PFM_INTEL_ARCH_PMC_VAL, \ + .rsvd_msk = PFM_INTEL_ARCH_PMC_RSVD, \ + .no_emul64_msk = PFM_INTEL_ARCH_NO64, \ + .hw_addr = MSR_GEN_SEL_BASE+(n) \ + } + +#define PFM_INTEL_ARCH_D(n) { \ + .type = PFM_REG_C, \ + .desc = "PMC"#n, \ + .dfl_val = 0, \ + .rsvd_msk = 0, \ + .no_emul64_msk = 0, \ + .hw_addr = MSR_GEN_PMC_BASE+(n) \ + } + +static struct pfm_regmap_desc pfm_intel_arch_pmc_desc[]={ +/* pmc0 */ PFM_INTEL_ARCH_C(0), PFM_INTEL_ARCH_C(1), PFM_INTEL_ARCH_C(2), PFM_INTEL_ARCH_C(3), +/* pmc4 */ PFM_INTEL_ARCH_C(4), PFM_INTEL_ARCH_C(5), PFM_INTEL_ARCH_C(6), PFM_INTEL_ARCH_C(7), +/* pmc8 */ PFM_INTEL_ARCH_C(8), PFM_INTEL_ARCH_C(9), PFM_INTEL_ARCH_C(10), PFM_INTEL_ARCH_C(11), +/* pmc12 */ PFM_INTEL_ARCH_C(12), PFM_INTEL_ARCH_C(13), PFM_INTEL_ARCH_C(14), PFM_INTEL_ARCH_C(15), +/* pmc16 */ PFM_INTEL_ARCH_C(16), PFM_INTEL_ARCH_C(17), PFM_INTEL_ARCH_C(18), PFM_INTEL_ARCH_C(19), +/* pmc20 */ PFM_INTEL_ARCH_C(20), PFM_INTEL_ARCH_C(21), PFM_INTEL_ARCH_C(22), PFM_INTEL_ARCH_C(23), +/* pmc24 */ PFM_INTEL_ARCH_C(24), PFM_INTEL_ARCH_C(25), PFM_INTEL_ARCH_C(26), PFM_INTEL_ARCH_C(27), +/* pmc28 */ PFM_INTEL_ARCH_C(28), PFM_INTEL_ARCH_C(29), PFM_INTEL_ARCH_C(30), PFM_INTEL_ARCH_C(31) +}; + +static struct pfm_regmap_desc pfm_intel_arch_pmd_desc[]={ +/* pmd0 */ PFM_INTEL_ARCH_D(0), PFM_INTEL_ARCH_D(1), PFM_INTEL_ARCH_D(2), PFM_INTEL_ARCH_D(3), +/* pmd4 */ PFM_INTEL_ARCH_D(4), PFM_INTEL_ARCH_D(5), PFM_INTEL_ARCH_D(6), PFM_INTEL_ARCH_D(7), +/* pmd8 */ PFM_INTEL_ARCH_D(8), PFM_INTEL_ARCH_D(9), PFM_INTEL_ARCH_D(10), PFM_INTEL_ARCH_D(11), +/* pmd12 */ PFM_INTEL_ARCH_D(12), PFM_INTEL_ARCH_D(13), PFM_INTEL_ARCH_D(14), PFM_INTEL_ARCH_D(15), +/* pmd16 */ PFM_INTEL_ARCH_D(16), PFM_INTEL_ARCH_D(17), PFM_INTEL_ARCH_D(18), PFM_INTEL_ARCH_D(19), +/* pmd20 */ PFM_INTEL_ARCH_D(20), PFM_INTEL_ARCH_D(21), PFM_INTEL_ARCH_D(22), PFM_INTEL_ARCH_D(23), +/* pmd24 */ PFM_INTEL_ARCH_D(24), PFM_INTEL_ARCH_D(25), PFM_INTEL_ARCH_D(26), PFM_INTEL_ARCH_D(27), +/* pmd28 */ PFM_INTEL_ARCH_D(28), PFM_INTEL_ARCH_D(29), PFM_INTEL_ARCH_D(30), PFM_INTEL_ARCH_D(31) +}; +#define PFM_INTEL_ARCH_MAX_PMCS ARRAY_SIZE(pfm_intel_arch_pmc_desc) + +static struct pfm_pmu_config pfm_intel_arch_pmu_conf; + + +static int pfm_intel_arch_probe_pmu(void) +{ + union { + unsigned int val; + struct pmu_eax eax; + } eax; + unsigned int ebx, ecx, edx; + unsigned int num_cnt; + + if (!cpu_has_arch_perfmon) { + PFM_INFO("no support for Intel architectural PMU"); + return -1; + } + + if (force == 0) { + cpuid(0xa, &eax.val, &ebx, &ecx, &edx); + } else { + eax.eax.num_cnt = 2; + eax.eax.cnt_width = 31; + } + + /* number of counters */ + num_cnt = eax.eax.num_cnt; + + /* + * sanity check number of counters + */ + if (num_cnt == 0 || num_cnt >= PFM_MAX_PMCS) { + PFM_INFO("invalid number of counters %u\n", eax.eax.num_cnt); + return -1; + } + /* + * instead of dynamically generating the description table + * and MSR addresses, we have a default description with a reasonably + * large number of counters (32). We believe this is plenty for quite + * some time. This allows us to have a much simpler probing and + * initialization routine, especially because we have no dynamic + * allocation. + * + * When HW supports more that what we prepared for, then we limit + * the number of counters we support and print a message. + */ + if (num_cnt >= PFM_INTEL_ARCH_MAX_PMCS) { + printk(KERN_INFO "perfmon: Limiting number of counters to %zu," + "HW supports %u", PFM_INTEL_ARCH_MAX_PMCS, num_cnt); + num_cnt = PFM_INTEL_ARCH_MAX_PMCS; + } + + if (eax.eax.cnt_width > 63) { + PFM_INFO("invalid counter width %u\n", eax.eax.cnt_width); + return -1; + } + + if (!cpu_has_apic) { + PFM_INFO("no Local APIC, try rebooting with lapic"); + return -1; + } + + pfm_intel_arch_pmu_conf.num_pmc_entries = num_cnt; + pfm_intel_arch_pmu_conf.num_pmd_entries = num_cnt; + + PFM_INFO("nmi_watchdog=%d nmi_active=%d force_nmi=%d", + nmi_watchdog, atomic_read(&nmi_active), force_nmi); + + /* + * NMI using PMU? + * Actual removal of NMI counter is done by pfm_pmu_acquire() + */ + if (nmi_watchdog == NMI_LOCAL_APIC || force_nmi) + pfm_intel_arch_pmu_info.flags |= PFM_X86_FL_USE_NMI; + + return 0; +} + +/* + * Counters may have model-specific width. Yet the documentation says + * that only the lower 32 bits can be written to. bits [w-32] + * are sign extensions of bit 31. As such the effective width of + * a counter is 31 bits only. + * See IA-32 Intel Architecture Software developer manual Vol 3B + */ +static struct pfm_pmu_config pfm_intel_arch_pmu_conf={ + .pmu_name = "Intel architectural v1", + .pmd_desc = pfm_intel_arch_pmd_desc, + .counter_width = 31, + .pmc_desc = pfm_intel_arch_pmc_desc, + .probe_pmu = pfm_intel_arch_probe_pmu, + .version = "1.0", + .flags = PFM_PMU_BUILTIN_FLAG, + .owner = THIS_MODULE, + .arch_info = &pfm_intel_arch_pmu_info +}; + +static int __init pfm_intel_arch_pmu_init_module(void) +{ + return pfm_pmu_register(&pfm_intel_arch_pmu_conf); +} + +static void __exit pfm_intel_arch_pmu_cleanup_module(void) +{ + pfm_pmu_unregister(&pfm_intel_arch_pmu_conf); +} + +module_init(pfm_intel_arch_pmu_init_module); +module_exit(pfm_intel_arch_pmu_cleanup_module); Index: linux-2.6/arch/i386/perfmon/perfmon_p4.c =================================================================== --- /dev/null +++ linux-2.6/arch/i386/perfmon/perfmon_p4.c @@ -0,0 +1,414 @@ +/* + * This file contains the P4/Xeon PMU register description tables + * for both 32 and 64 bit modes. + * + * Copyright (c) 2005 Intel Corporation + * Contributed by Bryan Wilkerson + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + * 02111-1307 USA + */ +#include +#include +#include +#include +#include + +MODULE_AUTHOR("Bryan Wilkerson "); +MODULE_DESCRIPTION("P4/Xeon/EM64T PMU description table"); +MODULE_LICENSE("GPL"); + +static int force; +MODULE_PARM_DESC(force, "bool: force module to load succesfully"); +module_param(force, bool, 0600); + +static int force_nmi; +MODULE_PARM_DESC(force_nmi, "bool: force use of NMI for PMU interrupt"); +module_param(force_nmi, bool, 0600); + +/* + * CCCR default value: + * - OVF_PMI_T0=1 (bit 26) + * - OVF_PMI_T1=0 (bit 27) (set if necessary in pfm_write_reg()) + * - all other bits are zero + * + * OVF_PMI is forced to zero if PFM_REGFL_NO_EMUL64 is set on CCCR + */ +#define PFM_CCCR_DFL (1ULL<<26) | (3ULL<<16) + +/* + * CCCR reserved fields: + * - bits 0-11, 25-29, 31-63 + * - OVF_PMI (26-27), override with REGFL_NO_EMUL64 + * + * RSVD: reserved bits must be 1 + */ +#define PFM_CCCR_RSVD ~((0xfull<<12) \ + | (0x7full<<18) \ + | (0x1ull<<30)) + +#define PFM_P4_NO64 (3ULL<<26) /* use 3 even in non HT mode */ + +/* + * With HyperThreading enabled: + * + * The ESCRs and CCCRs are divided in half with the top half + * belonging to logical processor 0 and the bottom half going to + * logical processor 1. Thus only half of the PMU resources are + * accessible to applications. + * + * PEBS is not available due to the fact that: + * - MSR_PEBS_MATRIX_VERT is shared between the threads + * - IA32_PEBS_ENABLE is shared between the threads + * + * With HyperThreading disabled: + * + * The full set of PMU resources is exposed to applications. + * + * The mapping is chosen such that PMCxx -> MSR is the same + * in HT and non HT mode, if register is present in HT mode. + * + */ +#define PFM_REGT_NHTESCR (PFM_REGT_ESCR|PFM_REGT_NOHT) +#define PFM_REGT_NHTCCCR (PFM_REGT_CCCR|PFM_REGT_NOHT|PFM_REGT_EN) +#define PFM_REGT_NHTPEBS (PFM_REGT_PEBS|PFM_REGT_NOHT|PFM_REGT_EN) +#define PFM_REGT_NHTCTR (PFM_REGT_CTR|PFM_REGT_NOHT) +#define PFM_REGT_ENAC (PFM_REGT_CCCR|PFM_REGT_EN) + +static struct pfm_arch_pmu_info pfm_p4_pmu_info={ + .pmc_addrs = { + /*pmc 0 */ {{MSR_P4_BPU_ESCR0, MSR_P4_BPU_ESCR1}, 0, PFM_REGT_ESCR}, /* BPU_ESCR0,1 */ + /*pmc 1 */ {{MSR_P4_IS_ESCR0, MSR_P4_IS_ESCR1}, 0, PFM_REGT_ESCR}, /* IS_ESCR0,1 */ + /*pmc 2 */ {{MSR_P4_MOB_ESCR0, MSR_P4_MOB_ESCR1}, 0, PFM_REGT_ESCR}, /* MOB_ESCR0,1 */ + /*pmc 3 */ {{MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1}, 0, PFM_REGT_ESCR}, /* ITLB_ESCR0,1 */ + /*pmc 4 */ {{MSR_P4_PMH_ESCR0, MSR_P4_PMH_ESCR1}, 0, PFM_REGT_ESCR}, /* PMH_ESCR0,1 */ + /*pmc 5 */ {{MSR_P4_IX_ESCR0, MSR_P4_IX_ESCR1}, 0, PFM_REGT_ESCR}, /* IX_ESCR0,1 */ + /*pmc 6 */ {{MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1}, 0, PFM_REGT_ESCR}, /* FSB_ESCR0,1 */ + /*pmc 7 */ {{MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1}, 0, PFM_REGT_ESCR}, /* BSU_ESCR0,1 */ + /*pmc 8 */ {{MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1}, 0, PFM_REGT_ESCR}, /* MS_ESCR0,1 */ + /*pmc 9 */ {{MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1}, 0, PFM_REGT_ESCR}, /* TC_ESCR0,1 */ + /*pmc 10*/ {{MSR_P4_TBPU_ESCR0, MSR_P4_TBPU_ESCR1}, 0, PFM_REGT_ESCR}, /* TBPU_ESCR0,1 */ + /*pmc 11*/ {{MSR_P4_FLAME_ESCR0, MSR_P4_FLAME_ESCR1}, 0, PFM_REGT_ESCR}, /* FLAME_ESCR0,1 */ + /*pmc 12*/ {{MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1}, 0, PFM_REGT_ESCR}, /* FIRM_ESCR0,1 */ + /*pmc 13*/ {{MSR_P4_SAAT_ESCR0, MSR_P4_SAAT_ESCR1}, 0, PFM_REGT_ESCR}, /* SAAT_ESCR0,1 */ + /*pmc 14*/ {{MSR_P4_U2L_ESCR0, MSR_P4_U2L_ESCR1}, 0, PFM_REGT_ESCR}, /* U2L_ESCR0,1 */ + /*pmc 15*/ {{MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1}, 0, PFM_REGT_ESCR}, /* DAC_ESCR0,1 */ + /*pmc 16*/ {{MSR_P4_IQ_ESCR0, MSR_P4_IQ_ESCR1}, 0, PFM_REGT_ESCR}, /* IQ_ESCR0,1 (only model 1 and 2) */ + /*pmc 17*/ {{MSR_P4_ALF_ESCR0, MSR_P4_ALF_ESCR1}, 0, PFM_REGT_ESCR}, /* ALF_ESCR0,1 */ + /*pmc 18*/ {{MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1}, 0, PFM_REGT_ESCR}, /* RAT_ESCR0,1 */ + /*pmc 19*/ {{MSR_P4_SSU_ESCR0, 0}, 0, PFM_REGT_ESCR}, /* SSU_ESCR0 */ + /*pmc 20*/ {{MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1}, 0, PFM_REGT_ESCR}, /* CRU_ESCR0,1 */ + /*pmc 21*/ {{MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3}, 0, PFM_REGT_ESCR}, /* CRU_ESCR2,3 */ + /*pmc 22*/ {{MSR_P4_CRU_ESCR4, MSR_P4_CRU_ESCR5}, 0, PFM_REGT_ESCR}, /* CRU_ESCR4,5 */ + + /*pmc 23*/ {{MSR_P4_BPU_CCCR0, MSR_P4_BPU_CCCR2}, 0, PFM_REGT_ENAC}, /* BPU_CCCR0,2 */ + /*pmc 24*/ {{MSR_P4_BPU_CCCR1, MSR_P4_BPU_CCCR3}, 1, PFM_REGT_ENAC}, /* BPU_CCCR1,3 */ + /*pmc 25*/ {{MSR_P4_MS_CCCR0, MSR_P4_MS_CCCR2}, 2, PFM_REGT_ENAC}, /* MS_CCCR0,2 */ + /*pmc 26*/ {{MSR_P4_MS_CCCR1, MSR_P4_MS_CCCR3}, 3, PFM_REGT_ENAC}, /* MS_CCCR1,3 */ + /*pmc 27*/ {{MSR_P4_FLAME_CCCR0, MSR_P4_FLAME_CCCR2}, 4, PFM_REGT_ENAC}, /* FLAME_CCCR0,2 */ + /*pmc 28*/ {{MSR_P4_FLAME_CCCR1, MSR_P4_FLAME_CCCR3}, 5, PFM_REGT_ENAC}, /* FLAME_CCCR1,3 */ + /*pmc 29*/ {{MSR_P4_IQ_CCCR0, MSR_P4_IQ_CCCR2}, 6, PFM_REGT_ENAC}, /* IQ_CCCR0,2 */ + /*pmc 30*/ {{MSR_P4_IQ_CCCR1, MSR_P4_IQ_CCCR3}, 7, PFM_REGT_ENAC}, /* IQ_CCCR1,3 */ + /*pmc 31*/ {{MSR_P4_IQ_CCCR4, MSR_P4_IQ_CCCR5}, 8, PFM_REGT_ENAC}, /* IQ_CCCR4,5 */ + /* non HT extensions */ + /*pmc 32*/ {{MSR_P4_BPU_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* BPU_ESCR1 */ + /*pmc 33*/ {{MSR_P4_IS_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* IS_ESCR1 */ + /*pmc 34*/ {{MSR_P4_MOB_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* MOB_ESCR1 */ + /*pmc 35*/ {{MSR_P4_ITLB_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* ITLB_ESCR1 */ + /*pmc 36*/ {{MSR_P4_PMH_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* PMH_ESCR1 */ + /*pmc 37*/ {{MSR_P4_IX_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* IX_ESCR1 */ + /*pmc 38*/ {{MSR_P4_FSB_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* FSB_ESCR1 */ + /*pmc 39*/ {{MSR_P4_BSU_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* BSU_ESCR1 */ + /*pmc 40*/ {{MSR_P4_MS_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* MS_ESCR1 */ + /*pmc 41*/ {{MSR_P4_TC_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* TC_ESCR1 */ + /*pmc 42*/ {{MSR_P4_TBPU_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* TBPU_ESCR1 */ + /*pmc 43*/ {{MSR_P4_FLAME_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* FLAME_ESCR1 */ + /*pmc 44*/ {{MSR_P4_FIRM_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* FIRM_ESCR1 */ + /*pmc 45*/ {{MSR_P4_SAAT_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* SAAT_ESCR1 */ + /*pmc 46*/ {{MSR_P4_U2L_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* U2L_ESCR1 */ + /*pmc 47*/ {{MSR_P4_DAC_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* DAC_ESCR1 */ + /*pmc 48*/ {{MSR_P4_IQ_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* IQ_ESCR1 (only model 1 and 2) */ + /*pmc 49*/ {{MSR_P4_ALF_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* ALF_ESCR1 */ + /*pmc 50*/ {{MSR_P4_RAT_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* RAT_ESCR1 */ + /*pmc 51*/ {{MSR_P4_CRU_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* CRU_ESCR1 */ + /*pmc 52*/ {{MSR_P4_CRU_ESCR3, 0}, 0, PFM_REGT_NHTESCR}, /* CRU_ESCR3 */ + /*pmc 53*/ {{MSR_P4_CRU_ESCR5, 0}, 0, PFM_REGT_NHTESCR}, /* CRU_ESCR5 */ + /*pmc 54*/ {{MSR_P4_BPU_CCCR1, 0}, 9, PFM_REGT_NHTCCCR}, /* BPU_CCCR1 */ + /*pmc 55*/ {{MSR_P4_BPU_CCCR3, 0},10, PFM_REGT_NHTCCCR}, /* BPU_CCCR3 */ + /*pmc 56*/ {{MSR_P4_MS_CCCR1, 0},11, PFM_REGT_NHTCCCR}, /* MS_CCCR1 */ + /*pmc 57*/ {{MSR_P4_MS_CCCR3, 0},12, PFM_REGT_NHTCCCR}, /* MS_CCCR3 */ + /*pmc 58*/ {{MSR_P4_FLAME_CCCR1, 0},13, PFM_REGT_NHTCCCR}, /* FLAME_CCCR1 */ + /*pmc 59*/ {{MSR_P4_FLAME_CCCR3, 0},14, PFM_REGT_NHTCCCR}, /* FLAME_CCCR3 */ + /*pmc 60*/ {{MSR_P4_IQ_CCCR2, 0},15, PFM_REGT_NHTCCCR}, /* IQ_CCCR2 */ + /*pmc 61*/ {{MSR_P4_IQ_CCCR3, 0},16, PFM_REGT_NHTCCCR}, /* IQ_CCCR3 */ + /*pmc 62*/ {{MSR_P4_IQ_CCCR5, 0},17, PFM_REGT_NHTCCCR}, /* IQ_CCCR5 */ + /*pmc 63*/ {{0x3f2, 0}, 0, PFM_REGT_NHTPEBS},/* PEBS_MATRIX_VERT */ + /*pmc 64*/ {{0x3f1, 0}, 0, PFM_REGT_NHTPEBS} /* PEBS_ENABLE */ + }, + + .pmd_addrs = { + /*pmd 0 */ {{MSR_P4_BPU_PERFCTR0, MSR_P4_BPU_PERFCTR2}, 0, PFM_REGT_CTR}, /* BPU_CTR0,2 */ + /*pmd 1 */ {{MSR_P4_BPU_PERFCTR1, MSR_P4_BPU_PERFCTR3}, 0, PFM_REGT_CTR}, /* BPU_CTR1,3 */ + /*pmd 2 */ {{MSR_P4_MS_PERFCTR0, MSR_P4_MS_PERFCTR2}, 0, PFM_REGT_CTR}, /* MS_CTR0,2 */ + /*pmd 3 */ {{MSR_P4_MS_PERFCTR1, MSR_P4_MS_PERFCTR3}, 0, PFM_REGT_CTR}, /* MS_CTR1,3 */ + /*pmd 4 */ {{MSR_P4_FLAME_PERFCTR0, MSR_P4_FLAME_PERFCTR2}, 0, PFM_REGT_CTR}, /* FLAME_CTR0,2 */ + /*pmd 5 */ {{MSR_P4_FLAME_PERFCTR1, MSR_P4_FLAME_PERFCTR3}, 0, PFM_REGT_CTR}, /* FLAME_CTR1,3 */ + /*pmd 6 */ {{MSR_P4_IQ_PERFCTR0, MSR_P4_IQ_PERFCTR2}, 0, PFM_REGT_CTR}, /* IQ_CTR0,2 */ + /*pmd 7 */ {{MSR_P4_IQ_PERFCTR1, MSR_P4_IQ_PERFCTR3}, 0, PFM_REGT_CTR}, /* IQ_CTR1,3 */ + /*pmd 8 */ {{MSR_P4_IQ_PERFCTR4, MSR_P4_IQ_PERFCTR5}, 0, PFM_REGT_CTR}, /* IQ_CTR4,5 */ + /* + * non HT extensions + */ + /*pmd 9 */ {{MSR_P4_BPU_PERFCTR2, 0}, 0, PFM_REGT_NHTCTR}, /* BPU_CTR2 */ + /*pmd 10*/ {{MSR_P4_BPU_PERFCTR3, 0}, 0, PFM_REGT_NHTCTR}, /* BPU_CTR3 */ + /*pmd 11*/ {{MSR_P4_MS_PERFCTR2, 0}, 0, PFM_REGT_NHTCTR}, /* MS_CTR2 */ + /*pmd 12*/ {{MSR_P4_MS_PERFCTR3, 0}, 0, PFM_REGT_NHTCTR}, /* MS_CTR3 */ + /*pmd 13*/ {{MSR_P4_FLAME_PERFCTR2, 0}, 0, PFM_REGT_NHTCTR}, /* FLAME_CTR2 */ + /*pmd 14*/ {{MSR_P4_FLAME_PERFCTR3, 0}, 0, PFM_REGT_NHTCTR}, /* FLAME_CTR3 */ + /*pmd 15*/ {{MSR_P4_IQ_PERFCTR2, 0}, 0, PFM_REGT_NHTCTR}, /* IQ_CTR2 */ + /*pmd 16*/ {{MSR_P4_IQ_PERFCTR3, 0}, 0, PFM_REGT_NHTCTR}, /* IQ_CTR3 */ + /*pmd 17*/ {{MSR_P4_IQ_PERFCTR5, 0}, 0, PFM_REGT_NHTCTR}, /* IQ_CTR5 */ + }, + .pebs_ctr_idx = 8, /* thread0: IQ_CTR4, thread1: IQ_CTR5 */ + .pmu_style = PFM_X86_PMU_P4 +}; + +static struct pfm_regmap_desc pfm_p4_pmc_desc[]={ +/* pmc0 */ PMC_D(PFM_REG_I, "BPU_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_BPU_ESCR0), +/* pmc1 */ PMC_D(PFM_REG_I, "IS_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_IQ_ESCR0), +/* pmc2 */ PMC_D(PFM_REG_I, "MOB_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_MOB_ESCR0), +/* pmc3 */ PMC_D(PFM_REG_I, "ITLB_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_ITLB_ESCR0), +/* pmc4 */ PMC_D(PFM_REG_I, "PMH_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_PMH_ESCR0), +/* pmc5 */ PMC_D(PFM_REG_I, "IX_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_IX_ESCR0), +/* pmc6 */ PMC_D(PFM_REG_I, "FSB_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_FSB_ESCR0), +/* pmc7 */ PMC_D(PFM_REG_I, "BSU_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_BSU_ESCR0), +/* pmc8 */ PMC_D(PFM_REG_I, "MS_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_MS_ESCR0), +/* pmc9 */ PMC_D(PFM_REG_I, "TC_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_TC_ESCR0), +/* pmc10 */ PMC_D(PFM_REG_I, "TBPU_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_TBPU_ESCR0), +/* pmc11 */ PMC_D(PFM_REG_I, "FLAME_ESCR0", 0x0, PFM_ESCR_RSVD, 0, MSR_P4_FLAME_ESCR0), +/* pmc12 */ PMC_D(PFM_REG_I, "FIRM_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_FIRM_ESCR0), +/* pmc13 */ PMC_D(PFM_REG_I, "SAAT_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_SAAT_ESCR0), +/* pmc14 */ PMC_D(PFM_REG_I, "U2L_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_U2L_ESCR0), +/* pmc15 */ PMC_D(PFM_REG_I, "DAC_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_DAC_ESCR0), +/* pmc16 */ PMC_D(PFM_REG_I, "IQ_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_IQ_ESCR0), /* only model 1 and 2*/ +/* pmc17 */ PMC_D(PFM_REG_I, "ALF_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_ALF_ESCR0), +/* pmc18 */ PMC_D(PFM_REG_I, "RAT_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_RAT_ESCR0), +/* pmc19 */ PMC_D(PFM_REG_I, "SSU_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_SSU_ESCR0), +/* pmc20 */ PMC_D(PFM_REG_I, "CRU_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_CRU_ESCR0), +/* pmc21 */ PMC_D(PFM_REG_I, "CRU_ESCR2" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_CRU_ESCR2), +/* pmc22 */ PMC_D(PFM_REG_I, "CRU_ESCR4" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_CRU_ESCR4), +/* pmc23 */ PMC_D(PFM_REG_I64, "BPU_CCCR0" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_BPU_CCCR0), +/* pmc24 */ PMC_D(PFM_REG_I64, "BPU_CCCR1" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_BPU_CCCR1), +/* pmc25 */ PMC_D(PFM_REG_I64, "MS_CCCR0" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_MS_CCCR0), +/* pmc26 */ PMC_D(PFM_REG_I64, "MS_CCCR1" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_MS_CCCR1), +/* pmc27 */ PMC_D(PFM_REG_I64, "FLAME_CCCR0", PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_FLAME_CCCR0), +/* pmc28 */ PMC_D(PFM_REG_I64, "FLAME_CCCR1", PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_FLAME_CCCR1), +/* pmc29 */ PMC_D(PFM_REG_I64, "IQ_CCCR0" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_IQ_CCCR0), +/* pmc30 */ PMC_D(PFM_REG_I64, "IQ_CCCR1" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_IQ_CCCR1), +/* pmc31 */ PMC_D(PFM_REG_I64, "IQ_CCCR4" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_IQ_CCCR4), + /* No HT extension */ +/* pmc32 */ PMC_D(PFM_REG_I, "BPU_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_BPU_ESCR1), +/* pmc33 */ PMC_D(PFM_REG_I, "IS_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_IS_ESCR1), +/* pmc34 */ PMC_D(PFM_REG_I, "MOB_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_MOB_ESCR1), +/* pmc35 */ PMC_D(PFM_REG_I, "ITLB_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_ITLB_ESCR1), +/* pmc36 */ PMC_D(PFM_REG_I, "PMH_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_PMH_ESCR1), +/* pmc37 */ PMC_D(PFM_REG_I, "IX_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_IX_ESCR1), +/* pmc38 */ PMC_D(PFM_REG_I, "FSB_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_FSB_ESCR1), +/* pmc39 */ PMC_D(PFM_REG_I, "BSU_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_BSU_ESCR1), +/* pmc40 */ PMC_D(PFM_REG_I, "MS_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_MS_ESCR1), +/* pmc41 */ PMC_D(PFM_REG_I, "TC_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_TC_ESCR1), +/* pmc42 */ PMC_D(PFM_REG_I, "TBPU_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_TBPU_ESCR1), +/* pmc43 */ PMC_D(PFM_REG_I, "FLAME_ESCR1", 0x0, PFM_ESCR_RSVD, 0, MSR_P4_FLAME_ESCR1), +/* pmc44 */ PMC_D(PFM_REG_I, "FIRM_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_FIRM_ESCR1), +/* pmc45 */ PMC_D(PFM_REG_I, "SAAT_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_SAAT_ESCR1), +/* pmc46 */ PMC_D(PFM_REG_I, "U2L_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_U2L_ESCR1), +/* pmc47 */ PMC_D(PFM_REG_I, "DAC_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_DAC_ESCR1), +/* pmc48 */ PMC_D(PFM_REG_I, "IQ_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_IQ_ESCR1), /* only model 1 and 2 */ +/* pmc49 */ PMC_D(PFM_REG_I, "ALF_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_ALF_ESCR1), +/* pmc50 */ PMC_D(PFM_REG_I, "RAT_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_RAT_ESCR1), +/* pmc51 */ PMC_D(PFM_REG_I, "CRU_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_CRU_ESCR1), +/* pmc52 */ PMC_D(PFM_REG_I, "CRU_ESCR3" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_CRU_ESCR3), +/* pmc53 */ PMC_D(PFM_REG_I, "CRU_ESCR5" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_CRU_ESCR5), +/* pmc54 */ PMC_D(PFM_REG_I64, "BPU_CCCR2" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_BPU_CCCR2), +/* pmc55 */ PMC_D(PFM_REG_I64, "BPU_CCCR3" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_BPU_CCCR3), +/* pmc56 */ PMC_D(PFM_REG_I64, "MS_CCCR2" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_MS_CCCR2), +/* pmc57 */ PMC_D(PFM_REG_I64, "MS_CCCR3" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_MS_CCCR3), +/* pmc58 */ PMC_D(PFM_REG_I64, "FLAME_CCCR2", PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_FLAME_CCCR2), +/* pmc59 */ PMC_D(PFM_REG_I64, "FLAME_CCCR3", PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_FLAME_CCCR3), +/* pmc60 */ PMC_D(PFM_REG_I64, "IQ_CCCR2" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_IQ_CCCR2), +/* pmc61 */ PMC_D(PFM_REG_I64, "IQ_CCCR3" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_IQ_CCCR3), +/* pmc62 */ PMC_D(PFM_REG_I64, "IQ_CCCR5" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_IQ_CCCR5), +/* pmc63 */ PMC_D(PFM_REG_I, "PEBS_MATRIX_VERT", 0, 0xffffffffffffffecULL, 0, 0x3f2), +/* pmc64 */ PMC_D(PFM_REG_I, "PEBS_ENABLE", 0, 0xfffffffff8ffe000ULL, 0, 0x3f1) +}; +#define PFM_P4_NUM_PMCS ARRAY_SIZE(pfm_p4_pmc_desc) + +/* + * See section 15.10.6.6 for details about the IQ block + */ +static struct pfm_regmap_desc pfm_p4_pmd_desc[]={ +/* pmd0 */ PMD_D(PFM_REG_C, "BPU_CTR0", MSR_P4_BPU_PERFCTR0), +/* pmd1 */ PMD_D(PFM_REG_C, "BPU_CTR1", MSR_P4_BPU_PERFCTR1), +/* pmd2 */ PMD_D(PFM_REG_C, "MS_CTR0", MSR_P4_MS_PERFCTR0), +/* pmd3 */ PMD_D(PFM_REG_C, "MS_CTR1", MSR_P4_MS_PERFCTR1), +/* pmd4 */ PMD_D(PFM_REG_C, "FLAME_CTR0", MSR_P4_FLAME_PERFCTR0), +/* pmd5 */ PMD_D(PFM_REG_C, "FLAME_CTR1", MSR_P4_FLAME_PERFCTR1), +/* pmd6 */ PMD_D(PFM_REG_C, "IQ_CTR0", MSR_P4_IQ_PERFCTR0), +/* pmd7 */ PMD_D(PFM_REG_C, "IQ_CTR1", MSR_P4_IQ_PERFCTR1), +/* pmd8 */ PMD_D(PFM_REG_C, "IQ_CTR4", MSR_P4_IQ_PERFCTR4), + /* no HT extension */ +/* pmd9 */ PMD_D(PFM_REG_C, "BPU_CTR2", MSR_P4_BPU_PERFCTR2), +/* pmd10 */ PMD_D(PFM_REG_C, "BPU_CTR3", MSR_P4_BPU_PERFCTR3), +/* pmd11 */ PMD_D(PFM_REG_C, "MS_CTR2", MSR_P4_MS_PERFCTR2), +/* pmd12 */ PMD_D(PFM_REG_C, "MS_CTR3", MSR_P4_MS_PERFCTR3), +/* pmd13 */ PMD_D(PFM_REG_C, "FLAME_CTR2", MSR_P4_FLAME_PERFCTR2), +/* pmd14 */ PMD_D(PFM_REG_C, "FLAME_CTR3", MSR_P4_FLAME_PERFCTR3), +/* pmd15 */ PMD_D(PFM_REG_C, "IQ_CTR2", MSR_P4_IQ_PERFCTR1), +/* pmd16 */ PMD_D(PFM_REG_C, "IQ_CTR3", MSR_P4_IQ_PERFCTR3), +/* pmd17 */ PMD_D(PFM_REG_C, "IQ_CTR5", MSR_P4_IQ_PERFCTR5) +}; +#define PFM_P4_NUM_PMDS ARRAY_SIZE(pfm_p4_pmd_desc) + +/* + * Due to hotplug CPU support, threads may not necessarily + * be activated at the time the module is inserted. We need + * to check whether they could be activated by looking at + * the present CPU (present != online). + */ +static int pfm_p4_probe_pmu(void) +{ + unsigned int i; + int ht_enabled; + + /* + * only works on Intel processors + */ + if (cpu_data->x86_vendor != X86_VENDOR_INTEL) { + PFM_INFO("not running on Intel processor"); + return -1; + } + + if (cpu_data->x86 != 15) { + PFM_INFO("unsupported family=%d", cpu_data->x86); + return -1; + } + + switch(cpu_data->x86_model) { + case 0 ... 2: + break; + case 3 ... 6: + /* + * IQ_ESCR0, IQ_ESCR1 only present on model 1, 2 + */ + pfm_p4_pmc_desc[16].type = PFM_REG_NA; + pfm_p4_pmc_desc[48].type = PFM_REG_NA; + break; + default: + /* + * do not know if they all work the same, so reject + * for now + */ + if (!force) { + PFM_INFO("unsupported model %d", cpu_data->x86_model); + return -1; + } + } + + /* + * check for local APIC (required) + */ + if (!cpu_has_apic) { + PFM_INFO("no local APIC, unsupported"); + return -1; + } +#ifdef CONFIG_SMP + ht_enabled = (cpus_weight(cpu_core_map[smp_processor_id()]) + / cpu_data->x86_max_cores) > 1; +#else + ht_enabled = 0; +#endif + if (cpu_has_ht) { + + PFM_INFO("HyperThreading supported, status %s", + ht_enabled ? "on": "off"); + /* + * disable registers not supporting HT + */ + if (ht_enabled) { + PFM_INFO("disabling half the registers for HT"); + for (i = 0; i < PFM_P4_NUM_PMCS; i++) { + if (pfm_p4_pmu_info.pmc_addrs[(i)].reg_type & + PFM_REGT_NOHT) + pfm_p4_pmc_desc[i].type = PFM_REG_NA; + } + for (i = 0; i < PFM_P4_NUM_PMDS; i++) { + if (pfm_p4_pmu_info.pmd_addrs[(i)].reg_type & + PFM_REGT_NOHT) + pfm_p4_pmd_desc[i].type = PFM_REG_NA; + } + } + } + + if (cpu_has_ds) { + PFM_INFO("Data Save Area (DS) supported"); + + pfm_p4_pmu_info.flags = PFM_X86_FL_PMU_DS; + + if (cpu_has_pebs) { + /* + * PEBS does not work with HyperThreading enabled + */ + if (ht_enabled) { + PFM_INFO("PEBS supported, status off (because of HT)"); + } else { + pfm_p4_pmu_info.flags |= PFM_X86_FL_PMU_PEBS; + PFM_INFO("PEBS supported, status on"); + } + } + } + /* + * NMI using PMU? + * Actual removal of NMI counter is done by pfm_pmu_acquire() + */ + if (nmi_watchdog == NMI_LOCAL_APIC || force_nmi) + pfm_p4_pmu_info.flags |= PFM_X86_FL_USE_NMI; + return 0; +} + +static struct pfm_pmu_config pfm_p4_pmu_conf={ + .pmu_name = "Intel P4", + .counter_width = 40, + .pmd_desc = pfm_p4_pmd_desc, + .pmc_desc = pfm_p4_pmc_desc, + .num_pmc_entries = PFM_P4_NUM_PMCS, + .num_pmd_entries = PFM_P4_NUM_PMDS, + .probe_pmu = pfm_p4_probe_pmu, + .version = "1.0", + .flags = PFM_PMU_BUILTIN_FLAG, + .owner = THIS_MODULE, + .arch_info = &pfm_p4_pmu_info +}; + +static int __init pfm_p4_pmu_init_module(void) +{ + return pfm_pmu_register(&pfm_p4_pmu_conf); +} + +static void __exit pfm_p4_pmu_cleanup_module(void) +{ + pfm_pmu_unregister(&pfm_p4_pmu_conf); +} + +module_init(pfm_p4_pmu_init_module); +module_exit(pfm_p4_pmu_cleanup_module); Index: linux-2.6/arch/i386/perfmon/perfmon_p6.c =================================================================== --- /dev/null +++ linux-2.6/arch/i386/perfmon/perfmon_p6.c @@ -0,0 +1,172 @@ +/* + * This file contains the P6 family processor PMU register description tables + * + * This module supports original P6 processors + * (Pentium II, Pentium Pro, Pentium III) and Pentium M. + * + * Copyright (c) 2005-2007 Hewlett-Packard Development Company, L.P. + * Contributed by Stephane Eranian + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + * 02111-1307 USA + */ +#include +#include +#include +#include + +MODULE_AUTHOR("Stephane Eranian "); +MODULE_DESCRIPTION("P6 PMU description table"); +MODULE_LICENSE("GPL"); + +static int force_nmi; +MODULE_PARM_DESC(force_nmi, "bool: force use of NMI for PMU interrupt"); +module_param(force_nmi, bool, 0600); + +/* + * - upper 32 bits are reserved + * - INT: APIC enable bit is reserved (forced to 1) + * - bit 21 is reserved + * + * RSVD: reserved bits are 1 + */ +#define PFM_P6_PMC_RSVD ((~((1ULL<<32)-1)) \ + | (1ULL<<20) \ + | (1ULL<<21)) + +/* + * force Local APIC interrupt on overflow + * disable with NO_EMUL64 + */ +#define PFM_P6_PMC_VAL (1ULL<<20) +#define PFM_P6_NO64 (1ULL<<20) + +struct pfm_arch_pmu_info pfm_p6_pmu_info={ + .pmc_addrs = { + {{MSR_P6_EVNTSEL0, 0}, 0, PFM_REGT_EN}, /* has enable bit */ + {{MSR_P6_EVNTSEL1, 0}, 1, PFM_REGT_OTH} /* no enable bit */ + }, + .pmd_addrs = { + {{MSR_P6_PERFCTR0, 0}, 0, PFM_REGT_CTR}, + {{MSR_P6_PERFCTR1, 0}, 0, PFM_REGT_CTR} + }, + .pmu_style = PFM_X86_PMU_P6 +}; + +static struct pfm_regmap_desc pfm_p6_pmc_desc[]={ +/* pmc0 */ PMC_D(PFM_REG_I64, "PERFEVTSEL0", PFM_P6_PMC_VAL, PFM_P6_PMC_RSVD, PFM_P6_NO64, MSR_P6_EVNTSEL0), +/* pmc1 */ PMC_D(PFM_REG_I64, "PERFEVTSEL1", PFM_P6_PMC_VAL, PFM_P6_PMC_RSVD, PFM_P6_NO64, MSR_P6_EVNTSEL1) +}; +#define PFM_P6_NUM_PMCS ARRAY_SIZE(pfm_p6_pmc_desc) + +static struct pfm_regmap_desc pfm_p6_pmd_desc[]={ +/* pmd0 */ PMD_D(PFM_REG_C , "PERFCTR0", MSR_P6_PERFCTR0), +/* pmd1 */ PMD_D(PFM_REG_C , "PERFCTR1", MSR_P6_PERFCTR1) +}; +#define PFM_P6_NUM_PMDS ARRAY_SIZE(pfm_p6_pmd_desc) + +static int pfm_p6_probe_pmu(void) +{ + int high, low; + + if (cpu_data->x86_vendor != X86_VENDOR_INTEL) { + PFM_INFO("not an Intel processor"); + return -1; + } + + /* + * check for P6 processor family + */ + if (cpu_data->x86 != 6) { + PFM_INFO("unsupported family=%d", cpu_data->x86); + return -1; + } + + switch(cpu_data->x86_model) { + case 3: + case 5: /* Pentium II Deschutes */ + case 7 ... 11: + break; + case 13: + /* for Pentium M, we need to check if PMU exist */ + rdmsr(MSR_IA32_MISC_ENABLE, low, high); + if (low & (1U << 7)) + break; + default: + PFM_INFO("unsupported CPU model %d", + cpu_data->x86_model); + return -1; + + } + + if (!cpu_has_apic) { + PFM_INFO("no Local APIC, try rebooting with lapic"); + return -1; + } + + PFM_INFO("nmi_watchdog=%d nmi_active=%d force_nmi=%d", + nmi_watchdog, atomic_read(&nmi_active), force_nmi); + + /* + * we cannot have perfmon/nmi_watchdog running together as there + * is only one enable bit for both counters. + */ + if (nmi_watchdog == NMI_LOCAL_APIC) { + PFM_INFO("NMI watchdog using performance counters." + "perfmon cannot work correctly, reboot with nmi_watchdog=0"); + return -1; + } + + /* + * force NMI interrupt? + */ + if (force_nmi) + pfm_p6_pmu_info.flags |= PFM_X86_FL_USE_NMI; + + return 0; +} + +/* + * Counters have 40 bits implemented. However they are designed such + * that bits [32-39] are sign extensions of bit 31. As such the + * effective width of a counter for P6-like PMU is 31 bits only. + * + * See IA-32 Intel Architecture Software developer manual Vol 3B + */ +static struct pfm_pmu_config pfm_p6_pmu_conf={ + .pmu_name = "Intel P6 processor Family", + .counter_width = 31, + .pmd_desc = pfm_p6_pmd_desc, + .pmc_desc = pfm_p6_pmc_desc, + .num_pmc_entries = PFM_P6_NUM_PMCS, + .num_pmd_entries = PFM_P6_NUM_PMDS, + .probe_pmu = pfm_p6_probe_pmu, + .version = "1.0", + .flags = PFM_PMU_BUILTIN_FLAG, + .owner = THIS_MODULE, + .arch_info = &pfm_p6_pmu_info +}; + +static int __init pfm_p6_pmu_init_module(void) +{ + return pfm_pmu_register(&pfm_p6_pmu_conf); +} + +static void __exit pfm_p6_pmu_cleanup_module(void) +{ + pfm_pmu_unregister(&pfm_p6_pmu_conf); +} + +module_init(pfm_p6_pmu_init_module); +module_exit(pfm_p6_pmu_cleanup_module); Index: linux-2.6/arch/i386/perfmon/perfmon_pebs_smpl.c =================================================================== --- /dev/null +++ linux-2.6/arch/i386/perfmon/perfmon_pebs_smpl.c @@ -0,0 +1,247 @@ +/* + * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P. + * Contributed by Stephane Eranian + * + * This file implements the Precise Event Based Sampling (PEBS) + * sampling format. It supports the following processors: + * - 32-bit Pentium 4, Xeon, Core-based processors. + * - 64-bit Pentium 4, Xeon, Core-based processors. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + * 02111-1307 USA + */ +#include +#include +#include +#include +#include +#include + +#include +#include + +MODULE_AUTHOR("Stephane Eranian "); +MODULE_DESCRIPTION("Intel Precise Event-Based Sampling (PEBS)"); +MODULE_LICENSE("GPL"); + +#define ALIGN_PEBS(a, order) \ + ((a)+(1UL<<(order))-1) & ~((1UL<<(order))-1) + +#define PEBS_PADDING_ORDER 8 /* log2(256) padding for PEBS alignment constraint */ + +static int pfm_pebs_fmt_validate(u32 flags, u16 npmds, void *data) +{ + struct pfm_pebs_smpl_arg *arg = data; + size_t min_buf_size; + + /* + * need to define at least the size of the buffer + */ + if (data == NULL) { + PFM_DBG("no argument passed"); + return -EINVAL; + } + + /* + * compute min buf size. npmds is the maximum number + * of implemented PMD registers. + */ + min_buf_size = sizeof(struct pfm_pebs_smpl_hdr) + + sizeof(struct pfm_pebs_smpl_entry) + + (1UL<buf_size); + + /* + * must hold at least the buffer header + one minimally sized entry + */ + if (arg->buf_size < min_buf_size) + return -EINVAL; + + return 0; +} + +static int pfm_pebs_fmt_get_size(unsigned int flags, void *data, size_t *size) +{ + struct pfm_pebs_smpl_arg *arg = data; + + /* + * size has been validated in pfm_pebs_fmt_validate() + */ + *size = arg->buf_size + (1UL<ds; + + /* + * align PEBS buffer base + */ + pebs_start = ALIGN_PEBS((unsigned long)(hdr+1), PEBS_PADDING_ORDER); + pebs_end = pebs_start + arg->buf_size + 1; + + hdr->version = PFM_PEBS_SMPL_VERSION; + hdr->buf_size = arg->buf_size; + hdr->overflows = 0; + + /* + * express PEBS buffer base as offset from the end of the header + */ + hdr->start_offs = pebs_start - (unsigned long)(hdr+1); + + /* + * PEBS buffer boundaries + */ + ds->pebs_buf_base = pebs_start; + ds->pebs_abs_max = pebs_end; + + /* + * PEBS starting position + */ + ds->pebs_index = pebs_start; + + /* + * PEBS interrupt threshold + */ + ds->pebs_intr_thres = pebs_start + + arg->intr_thres + * sizeof(struct pfm_pebs_smpl_entry); + + /* + * save counter reset value for PEBS counter + */ + ds->pebs_cnt_reset = arg->cnt_reset; + + /* + * keep track of DS AREA + */ + ctx_arch->ds_area = (unsigned long)ds; + ctx_arch->flags |= PFM_X86_USE_PEBS; + + PFM_DBG("buffer=%p buf_size=%llu offs=%llu pebs_start=0x%lx " + "pebs_end=0x%lx ds=%p pebs_thres=0x%lx cnt_reset=0x%llx", + buf, + (unsigned long long)hdr->buf_size, + (unsigned long long)hdr->start_offs, + pebs_start, + pebs_end, + ds, + ds->pebs_intr_thres, + (unsigned long long)ds->pebs_cnt_reset); + + return 0; +} + +static int pfm_pebs_fmt_handler(void *buf, struct pfm_ovfl_arg *arg, + unsigned long ip, u64 tstamp, void *data) +{ + struct pfm_pebs_smpl_hdr *hdr; + + hdr = buf; + + PFM_DBG_ovfl("buffer full"); + /* + * increment number of buffer overflows. + * important to detect duplicate set of samples. + */ + hdr->overflows++; + + /* + * request notification and masking of monitoring. + * Notification is still subject to the overflowed + * register having the FL_NOTIFY flag set. + */ + arg->ovfl_ctrl = PFM_OVFL_CTRL_NOTIFY| PFM_OVFL_CTRL_MASK; + + return -ENOBUFS; /* we are full, sorry */ +} + +static int pfm_pebs_fmt_restart(int is_active, u32 *ovfl_ctrl, + void *buf) +{ + struct pfm_pebs_smpl_hdr *hdr = buf; + + /* + * reset index to base of buffer + */ + hdr->ds.pebs_index = hdr->ds.pebs_buf_base; + + *ovfl_ctrl = PFM_OVFL_CTRL_RESET; + + return 0; +} + +static int pfm_pebs_fmt_exit(void *buf) +{ + return 0; +} + +static struct pfm_smpl_fmt pebs_fmt={ + .fmt_name = PFM_PEBS_SMPL_NAME, + .fmt_version = 0x1, + .fmt_arg_size = sizeof(struct pfm_pebs_smpl_arg), + .fmt_validate = pfm_pebs_fmt_validate, + .fmt_getsize = pfm_pebs_fmt_get_size, + .fmt_init = pfm_pebs_fmt_init, + .fmt_handler = pfm_pebs_fmt_handler, + .fmt_restart = pfm_pebs_fmt_restart, + .fmt_exit = pfm_pebs_fmt_exit, + .fmt_flags = PFM_FMT_BUILTIN_FLAG, + .owner = THIS_MODULE, +}; + +static int __init pfm_pebs_fmt_init_module(void) +{ + int ht_enabled; + + if (!cpu_has_pebs) { + PFM_INFO("processor does not have PEBS support"); + return -1; + } +#ifdef CONFIG_SMP + ht_enabled = cpus_weight(cpu_core_map[smp_processor_id()]) + / cpu_data->x86_max_cores > 1; +#else + ht_enabled = 0; +#endif + if (ht_enabled) { + PFM_INFO("PEBS not available because HyperThreading is on"); + return -1; + } + return pfm_fmt_register(&pebs_fmt); +} + +static void __exit pfm_pebs_fmt_cleanup_module(void) +{ + pfm_fmt_unregister(&pebs_fmt); +} + +module_init(pfm_pebs_fmt_init_module); +module_exit(pfm_pebs_fmt_cleanup_module); Index: linux-2.6/arch/ia64/Kconfig =================================================================== --- linux-2.6.orig/arch/ia64/Kconfig +++ linux-2.6/arch/ia64/Kconfig @@ -424,14 +424,6 @@ config COMPAT config IA64_MCA_RECOVERY tristate "MCA recovery from errors other than TLB." -config PERFMON - bool "Performance monitor support" - help - Selects whether support for the IA-64 performance monitor hardware - is included in the kernel. This makes some kernel data-structures a - little bigger and slows down execution a bit, but it is generally - a good idea to turn this on. If you're unsure, say Y. - config IA64_PALINFO tristate "/proc/pal support" help @@ -493,6 +485,8 @@ source "drivers/firmware/Kconfig" source "fs/Kconfig.binfmt" +source "arch/ia64/perfmon/Kconfig" + endmenu menu "Power management and ACPI" Index: linux-2.6/arch/ia64/Makefile =================================================================== --- linux-2.6.orig/arch/ia64/Makefile +++ linux-2.6/arch/ia64/Makefile @@ -55,6 +55,7 @@ core-$(CONFIG_IA64_GENERIC) += arch/ia6 core-$(CONFIG_IA64_HP_ZX1) += arch/ia64/dig/ core-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += arch/ia64/dig/ core-$(CONFIG_IA64_SGI_SN2) += arch/ia64/sn/ +core-$(CONFIG_PERFMON) += arch/ia64/perfmon/ drivers-$(CONFIG_PCI) += arch/ia64/pci/ drivers-$(CONFIG_IA64_HP_SIM) += arch/ia64/hp/sim/ Index: linux-2.6/arch/ia64/defconfig =================================================================== --- linux-2.6.orig/arch/ia64/defconfig +++ linux-2.6/arch/ia64/defconfig @@ -162,7 +162,6 @@ CONFIG_HAVE_ARCH_NODEDATA_EXTENSION=y CONFIG_IA32_SUPPORT=y CONFIG_COMPAT=y CONFIG_IA64_MCA_RECOVERY=y -CONFIG_PERFMON=y CONFIG_IA64_PALINFO=y # CONFIG_MC_ERR_INJECT is not set CONFIG_SGI_SN=y @@ -184,6 +183,16 @@ CONFIG_BINFMT_ELF=y CONFIG_BINFMT_MISC=m # +# Hardware Performance Monitoring support +# +CONFIG_PERFMON=y +CONFIG_IA64_PERFMON_COMPAT=y +CONFIG_IA64_PERFMON_GENERIC=m +CONFIG_IA64_PERFMON_ITANIUM=y +CONFIG_IA64_PERFMON_MCKINLEY=y +CONFIG_IA64_PERFMON_MONTECITO=y + +# # Power management and ACPI # CONFIG_PM=y Index: linux-2.6/arch/ia64/kernel/Makefile =================================================================== --- linux-2.6.orig/arch/ia64/kernel/Makefile +++ linux-2.6/arch/ia64/kernel/Makefile @@ -5,7 +5,7 @@ extra-y := head.o init_task.o vmlinux.lds obj-y := acpi.o entry.o efi.o efi_stub.o gate-data.o fsys.o ia64_ksyms.o irq.o irq_ia64.o \ - irq_lsapic.o ivt.o machvec.o pal.o patch.o process.o perfmon.o ptrace.o sal.o \ + irq_lsapic.o ivt.o machvec.o pal.o patch.o process.o ptrace.o sal.o \ salinfo.o semaphore.o setup.o signal.o sys_ia64.o time.o traps.o unaligned.o \ unwind.o mca.o mca_asm.o topology.o @@ -23,7 +23,6 @@ obj-$(CONFIG_IOSAPIC) += iosapic.o obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_SMP) += smp.o smpboot.o obj-$(CONFIG_NUMA) += numa.o -obj-$(CONFIG_PERFMON) += perfmon_default_smpl.o obj-$(CONFIG_IA64_CYCLONE) += cyclone.o obj-$(CONFIG_CPU_FREQ) += cpufreq/ obj-$(CONFIG_IA64_MCA_RECOVERY) += mca_recovery.o Index: linux-2.6/arch/ia64/kernel/entry.S =================================================================== --- linux-2.6.orig/arch/ia64/kernel/entry.S +++ linux-2.6/arch/ia64/kernel/entry.S @@ -1588,5 +1588,17 @@ sys_call_table: data8 sys_signalfd data8 sys_timerfd data8 sys_eventfd + data8 sys_pfm_create_context // 1310 + data8 sys_pfm_write_pmcs + data8 sys_pfm_write_pmds + data8 sys_pfm_read_pmds + data8 sys_pfm_load_context + data8 sys_pfm_start // 1305 + data8 sys_pfm_stop + data8 sys_pfm_restart + data8 sys_pfm_create_evtsets + data8 sys_pfm_getinfo_evtsets + data8 sys_pfm_delete_evtsets // 1310 + data8 sys_pfm_unload_context .org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls Index: linux-2.6/arch/ia64/kernel/irq_ia64.c =================================================================== --- linux-2.6.orig/arch/ia64/kernel/irq_ia64.c +++ linux-2.6/arch/ia64/kernel/irq_ia64.c @@ -40,10 +40,6 @@ #include #include -#ifdef CONFIG_PERFMON -# include -#endif - #define IRQ_DEBUG 0 /* These can be overridden in platform_irq_init */ @@ -320,9 +316,6 @@ init_IRQ (void) register_percpu_irq(IA64_IPI_RESCHEDULE, &resched_irqaction); register_percpu_irq(IA64_IPI_LOCAL_TLB_FLUSH, &tlb_irqaction); #endif -#ifdef CONFIG_PERFMON - pfm_init_percpu(); -#endif platform_irq_init(); } Index: linux-2.6/arch/ia64/kernel/perfmon.c =================================================================== --- linux-2.6.orig/arch/ia64/kernel/perfmon.c +++ /dev/null @@ -1,6879 +0,0 @@ -/* - * This file implements the perfmon-2 subsystem which is used - * to program the IA-64 Performance Monitoring Unit (PMU). - * - * The initial version of perfmon.c was written by - * Ganesh Venkitachalam, IBM Corp. - * - * Then it was modified for perfmon-1.x by Stephane Eranian and - * David Mosberger, Hewlett Packard Co. - * - * Version Perfmon-2.x is a rewrite of perfmon-1.x - * by Stephane Eranian, Hewlett Packard Co. - * - * Copyright (C) 1999-2005 Hewlett Packard Co - * Stephane Eranian - * David Mosberger-Tang - * - * More information about perfmon available at: - * http://www.hpl.hp.com/research/linux/perfmon - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef CONFIG_PERFMON -/* - * perfmon context state - */ -#define PFM_CTX_UNLOADED 1 /* context is not loaded onto any task */ -#define PFM_CTX_LOADED 2 /* context is loaded onto a task */ -#define PFM_CTX_MASKED 3 /* context is loaded but monitoring is masked due to overflow */ -#define PFM_CTX_ZOMBIE 4 /* owner of the context is closing it */ - -#define PFM_INVALID_ACTIVATION (~0UL) - -#define PFM_NUM_PMC_REGS 64 /* PMC save area for ctxsw */ -#define PFM_NUM_PMD_REGS 64 /* PMD save area for ctxsw */ - -/* - * depth of message queue - */ -#define PFM_MAX_MSGS 32 -#define PFM_CTXQ_EMPTY(g) ((g)->ctx_msgq_head == (g)->ctx_msgq_tail) - -/* - * type of a PMU register (bitmask). - * bitmask structure: - * bit0 : register implemented - * bit1 : end marker - * bit2-3 : reserved - * bit4 : pmc has pmc.pm - * bit5 : pmc controls a counter (has pmc.oi), pmd is used as counter - * bit6-7 : register type - * bit8-31: reserved - */ -#define PFM_REG_NOTIMPL 0x0 /* not implemented at all */ -#define PFM_REG_IMPL 0x1 /* register implemented */ -#define PFM_REG_END 0x2 /* end marker */ -#define PFM_REG_MONITOR (0x1<<4|PFM_REG_IMPL) /* a PMC with a pmc.pm field only */ -#define PFM_REG_COUNTING (0x2<<4|PFM_REG_MONITOR) /* a monitor + pmc.oi+ PMD used as a counter */ -#define PFM_REG_CONTROL (0x4<<4|PFM_REG_IMPL) /* PMU control register */ -#define PFM_REG_CONFIG (0x8<<4|PFM_REG_IMPL) /* configuration register */ -#define PFM_REG_BUFFER (0xc<<4|PFM_REG_IMPL) /* PMD used as buffer */ - -#define PMC_IS_LAST(i) (pmu_conf->pmc_desc[i].type & PFM_REG_END) -#define PMD_IS_LAST(i) (pmu_conf->pmd_desc[i].type & PFM_REG_END) - -#define PMC_OVFL_NOTIFY(ctx, i) ((ctx)->ctx_pmds[i].flags & PFM_REGFL_OVFL_NOTIFY) - -/* i assumed unsigned */ -#define PMC_IS_IMPL(i) (i< PMU_MAX_PMCS && (pmu_conf->pmc_desc[i].type & PFM_REG_IMPL)) -#define PMD_IS_IMPL(i) (i< PMU_MAX_PMDS && (pmu_conf->pmd_desc[i].type & PFM_REG_IMPL)) - -/* XXX: these assume that register i is implemented */ -#define PMD_IS_COUNTING(i) ((pmu_conf->pmd_desc[i].type & PFM_REG_COUNTING) == PFM_REG_COUNTING) -#define PMC_IS_COUNTING(i) ((pmu_conf->pmc_desc[i].type & PFM_REG_COUNTING) == PFM_REG_COUNTING) -#define PMC_IS_MONITOR(i) ((pmu_conf->pmc_desc[i].type & PFM_REG_MONITOR) == PFM_REG_MONITOR) -#define PMC_IS_CONTROL(i) ((pmu_conf->pmc_desc[i].type & PFM_REG_CONTROL) == PFM_REG_CONTROL) - -#define PMC_DFL_VAL(i) pmu_conf->pmc_desc[i].default_value -#define PMC_RSVD_MASK(i) pmu_conf->pmc_desc[i].reserved_mask -#define PMD_PMD_DEP(i) pmu_conf->pmd_desc[i].dep_pmd[0] -#define PMC_PMD_DEP(i) pmu_conf->pmc_desc[i].dep_pmd[0] - -#define PFM_NUM_IBRS IA64_NUM_DBG_REGS -#define PFM_NUM_DBRS IA64_NUM_DBG_REGS - -#define CTX_OVFL_NOBLOCK(c) ((c)->ctx_fl_block == 0) -#define CTX_HAS_SMPL(c) ((c)->ctx_fl_is_sampling) -#define PFM_CTX_TASK(h) (h)->ctx_task - -#define PMU_PMC_OI 5 /* position of pmc.oi bit */ - -/* XXX: does not support more than 64 PMDs */ -#define CTX_USED_PMD(ctx, mask) (ctx)->ctx_used_pmds[0] |= (mask) -#define CTX_IS_USED_PMD(ctx, c) (((ctx)->ctx_used_pmds[0] & (1UL << (c))) != 0UL) - -#define CTX_USED_MONITOR(ctx, mask) (ctx)->ctx_used_monitors[0] |= (mask) - -#define CTX_USED_IBR(ctx,n) (ctx)->ctx_used_ibrs[(n)>>6] |= 1UL<< ((n) % 64) -#define CTX_USED_DBR(ctx,n) (ctx)->ctx_used_dbrs[(n)>>6] |= 1UL<< ((n) % 64) -#define CTX_USES_DBREGS(ctx) (((pfm_context_t *)(ctx))->ctx_fl_using_dbreg==1) -#define PFM_CODE_RR 0 /* requesting code range restriction */ -#define PFM_DATA_RR 1 /* requestion data range restriction */ - -#define PFM_CPUINFO_CLEAR(v) pfm_get_cpu_var(pfm_syst_info) &= ~(v) -#define PFM_CPUINFO_SET(v) pfm_get_cpu_var(pfm_syst_info) |= (v) -#define PFM_CPUINFO_GET() pfm_get_cpu_var(pfm_syst_info) - -#define RDEP(x) (1UL<<(x)) - -/* - * context protection macros - * in SMP: - * - we need to protect against CPU concurrency (spin_lock) - * - we need to protect against PMU overflow interrupts (local_irq_disable) - * in UP: - * - we need to protect against PMU overflow interrupts (local_irq_disable) - * - * spin_lock_irqsave()/spin_unlock_irqrestore(): - * in SMP: local_irq_disable + spin_lock - * in UP : local_irq_disable - * - * spin_lock()/spin_lock(): - * in UP : removed automatically - * in SMP: protect against context accesses from other CPU. interrupts - * are not masked. This is useful for the PMU interrupt handler - * because we know we will not get PMU concurrency in that code. - */ -#define PROTECT_CTX(c, f) \ - do { \ - DPRINT(("spinlock_irq_save ctx %p by [%d]\n", c, current->pid)); \ - spin_lock_irqsave(&(c)->ctx_lock, f); \ - DPRINT(("spinlocked ctx %p by [%d]\n", c, current->pid)); \ - } while(0) - -#define UNPROTECT_CTX(c, f) \ - do { \ - DPRINT(("spinlock_irq_restore ctx %p by [%d]\n", c, current->pid)); \ - spin_unlock_irqrestore(&(c)->ctx_lock, f); \ - } while(0) - -#define PROTECT_CTX_NOPRINT(c, f) \ - do { \ - spin_lock_irqsave(&(c)->ctx_lock, f); \ - } while(0) - - -#define UNPROTECT_CTX_NOPRINT(c, f) \ - do { \ - spin_unlock_irqrestore(&(c)->ctx_lock, f); \ - } while(0) - - -#define PROTECT_CTX_NOIRQ(c) \ - do { \ - spin_lock(&(c)->ctx_lock); \ - } while(0) - -#define UNPROTECT_CTX_NOIRQ(c) \ - do { \ - spin_unlock(&(c)->ctx_lock); \ - } while(0) - - -#ifdef CONFIG_SMP - -#define GET_ACTIVATION() pfm_get_cpu_var(pmu_activation_number) -#define INC_ACTIVATION() pfm_get_cpu_var(pmu_activation_number)++ -#define SET_ACTIVATION(c) (c)->ctx_last_activation = GET_ACTIVATION() - -#else /* !CONFIG_SMP */ -#define SET_ACTIVATION(t) do {} while(0) -#define GET_ACTIVATION(t) do {} while(0) -#define INC_ACTIVATION(t) do {} while(0) -#endif /* CONFIG_SMP */ - -#define SET_PMU_OWNER(t, c) do { pfm_get_cpu_var(pmu_owner) = (t); pfm_get_cpu_var(pmu_ctx) = (c); } while(0) -#define GET_PMU_OWNER() pfm_get_cpu_var(pmu_owner) -#define GET_PMU_CTX() pfm_get_cpu_var(pmu_ctx) - -#define LOCK_PFS(g) spin_lock_irqsave(&pfm_sessions.pfs_lock, g) -#define UNLOCK_PFS(g) spin_unlock_irqrestore(&pfm_sessions.pfs_lock, g) - -#define PFM_REG_RETFLAG_SET(flags, val) do { flags &= ~PFM_REG_RETFL_MASK; flags |= (val); } while(0) - -/* - * cmp0 must be the value of pmc0 - */ -#define PMC0_HAS_OVFL(cmp0) (cmp0 & ~0x1UL) - -#define PFMFS_MAGIC 0xa0b4d889 - -/* - * debugging - */ -#define PFM_DEBUGGING 1 -#ifdef PFM_DEBUGGING -#define DPRINT(a) \ - do { \ - if (unlikely(pfm_sysctl.debug >0)) { printk("%s.%d: CPU%d [%d] ", __FUNCTION__, __LINE__, smp_processor_id(), current->pid); printk a; } \ - } while (0) - -#define DPRINT_ovfl(a) \ - do { \ - if (unlikely(pfm_sysctl.debug > 0 && pfm_sysctl.debug_ovfl >0)) { printk("%s.%d: CPU%d [%d] ", __FUNCTION__, __LINE__, smp_processor_id(), current->pid); printk a; } \ - } while (0) -#endif - -/* - * 64-bit software counter structure - * - * the next_reset_type is applied to the next call to pfm_reset_regs() - */ -typedef struct { - unsigned long val; /* virtual 64bit counter value */ - unsigned long lval; /* last reset value */ - unsigned long long_reset; /* reset value on sampling overflow */ - unsigned long short_reset; /* reset value on overflow */ - unsigned long reset_pmds[4]; /* which other pmds to reset when this counter overflows */ - unsigned long smpl_pmds[4]; /* which pmds are accessed when counter overflow */ - unsigned long seed; /* seed for random-number generator */ - unsigned long mask; /* mask for random-number generator */ - unsigned int flags; /* notify/do not notify */ - unsigned long eventid; /* overflow event identifier */ -} pfm_counter_t; - -/* - * context flags - */ -typedef struct { - unsigned int block:1; /* when 1, task will blocked on user notifications */ - unsigned int system:1; /* do system wide monitoring */ - unsigned int using_dbreg:1; /* using range restrictions (debug registers) */ - unsigned int is_sampling:1; /* true if using a custom format */ - unsigned int excl_idle:1; /* exclude idle task in system wide session */ - unsigned int going_zombie:1; /* context is zombie (MASKED+blocking) */ - unsigned int trap_reason:2; /* reason for going into pfm_handle_work() */ - unsigned int no_msg:1; /* no message sent on overflow */ - unsigned int can_restart:1; /* allowed to issue a PFM_RESTART */ - unsigned int reserved:22; -} pfm_context_flags_t; - -#define PFM_TRAP_REASON_NONE 0x0 /* default value */ -#define PFM_TRAP_REASON_BLOCK 0x1 /* we need to block on overflow */ -#define PFM_TRAP_REASON_RESET 0x2 /* we need to reset PMDs */ - - -/* - * perfmon context: encapsulates all the state of a monitoring session - */ - -typedef struct pfm_context { - spinlock_t ctx_lock; /* context protection */ - - pfm_context_flags_t ctx_flags; /* bitmask of flags (block reason incl.) */ - unsigned int ctx_state; /* state: active/inactive (no bitfield) */ - - struct task_struct *ctx_task; /* task to which context is attached */ - - unsigned long ctx_ovfl_regs[4]; /* which registers overflowed (notification) */ - - struct completion ctx_restart_done; /* use for blocking notification mode */ - - unsigned long ctx_used_pmds[4]; /* bitmask of PMD used */ - unsigned long ctx_all_pmds[4]; /* bitmask of all accessible PMDs */ - unsigned long ctx_reload_pmds[4]; /* bitmask of force reload PMD on ctxsw in */ - - unsigned long ctx_all_pmcs[4]; /* bitmask of all accessible PMCs */ - unsigned long ctx_reload_pmcs[4]; /* bitmask of force reload PMC on ctxsw in */ - unsigned long ctx_used_monitors[4]; /* bitmask of monitor PMC being used */ - - unsigned long ctx_pmcs[PFM_NUM_PMC_REGS]; /* saved copies of PMC values */ - - unsigned int ctx_used_ibrs[1]; /* bitmask of used IBR (speedup ctxsw in) */ - unsigned int ctx_used_dbrs[1]; /* bitmask of used DBR (speedup ctxsw in) */ - unsigned long ctx_dbrs[IA64_NUM_DBG_REGS]; /* DBR values (cache) when not loaded */ - unsigned long ctx_ibrs[IA64_NUM_DBG_REGS]; /* IBR values (cache) when not loaded */ - - pfm_counter_t ctx_pmds[PFM_NUM_PMD_REGS]; /* software state for PMDS */ - - unsigned long th_pmcs[PFM_NUM_PMC_REGS]; /* PMC thread save state */ - unsigned long th_pmds[PFM_NUM_PMD_REGS]; /* PMD thread save state */ - - u64 ctx_saved_psr_up; /* only contains psr.up value */ - - unsigned long ctx_last_activation; /* context last activation number for last_cpu */ - unsigned int ctx_last_cpu; /* CPU id of current or last CPU used (SMP only) */ - unsigned int ctx_cpu; /* cpu to which perfmon is applied (system wide) */ - - int ctx_fd; /* file descriptor used my this context */ - pfm_ovfl_arg_t ctx_ovfl_arg; /* argument to custom buffer format handler */ - - pfm_buffer_fmt_t *ctx_buf_fmt; /* buffer format callbacks */ - void *ctx_smpl_hdr; /* points to sampling buffer header kernel vaddr */ - unsigned long ctx_smpl_size; /* size of sampling buffer */ - void *ctx_smpl_vaddr; /* user level virtual address of smpl buffer */ - - wait_queue_head_t ctx_msgq_wait; - pfm_msg_t ctx_msgq[PFM_MAX_MSGS]; - int ctx_msgq_head; - int ctx_msgq_tail; - struct fasync_struct *ctx_async_queue; - - wait_queue_head_t ctx_zombieq; /* termination cleanup wait queue */ -} pfm_context_t; - -/* - * magic number used to verify that structure is really - * a perfmon context - */ -#define PFM_IS_FILE(f) ((f)->f_op == &pfm_file_ops) - -#define PFM_GET_CTX(t) ((pfm_context_t *)(t)->thread.pfm_context) - -#ifdef CONFIG_SMP -#define SET_LAST_CPU(ctx, v) (ctx)->ctx_last_cpu = (v) -#define GET_LAST_CPU(ctx) (ctx)->ctx_last_cpu -#else -#define SET_LAST_CPU(ctx, v) do {} while(0) -#define GET_LAST_CPU(ctx) do {} while(0) -#endif - - -#define ctx_fl_block ctx_flags.block -#define ctx_fl_system ctx_flags.system -#define ctx_fl_using_dbreg ctx_flags.using_dbreg -#define ctx_fl_is_sampling ctx_flags.is_sampling -#define ctx_fl_excl_idle ctx_flags.excl_idle -#define ctx_fl_going_zombie ctx_flags.going_zombie -#define ctx_fl_trap_reason ctx_flags.trap_reason -#define ctx_fl_no_msg ctx_flags.no_msg -#define ctx_fl_can_restart ctx_flags.can_restart - -#define PFM_SET_WORK_PENDING(t, v) do { (t)->thread.pfm_needs_checking = v; } while(0); -#define PFM_GET_WORK_PENDING(t) (t)->thread.pfm_needs_checking - -/* - * global information about all sessions - * mostly used to synchronize between system wide and per-process - */ -typedef struct { - spinlock_t pfs_lock; /* lock the structure */ - - unsigned int pfs_task_sessions; /* number of per task sessions */ - unsigned int pfs_sys_sessions; /* number of per system wide sessions */ - unsigned int pfs_sys_use_dbregs; /* incremented when a system wide session uses debug regs */ - unsigned int pfs_ptrace_use_dbregs; /* incremented when a process uses debug regs */ - struct task_struct *pfs_sys_session[NR_CPUS]; /* point to task owning a system-wide session */ -} pfm_session_t; - -/* - * information about a PMC or PMD. - * dep_pmd[]: a bitmask of dependent PMD registers - * dep_pmc[]: a bitmask of dependent PMC registers - */ -typedef int (*pfm_reg_check_t)(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs); -typedef struct { - unsigned int type; - int pm_pos; - unsigned long default_value; /* power-on default value */ - unsigned long reserved_mask; /* bitmask of reserved bits */ - pfm_reg_check_t read_check; - pfm_reg_check_t write_check; - unsigned long dep_pmd[4]; - unsigned long dep_pmc[4]; -} pfm_reg_desc_t; - -/* assume cnum is a valid monitor */ -#define PMC_PM(cnum, val) (((val) >> (pmu_conf->pmc_desc[cnum].pm_pos)) & 0x1) - -/* - * This structure is initialized at boot time and contains - * a description of the PMU main characteristics. - * - * If the probe function is defined, detection is based - * on its return value: - * - 0 means recognized PMU - * - anything else means not supported - * When the probe function is not defined, then the pmu_family field - * is used and it must match the host CPU family such that: - * - cpu->family & config->pmu_family != 0 - */ -typedef struct { - unsigned long ovfl_val; /* overflow value for counters */ - - pfm_reg_desc_t *pmc_desc; /* detailed PMC register dependencies descriptions */ - pfm_reg_desc_t *pmd_desc; /* detailed PMD register dependencies descriptions */ - - unsigned int num_pmcs; /* number of PMCS: computed at init time */ - unsigned int num_pmds; /* number of PMDS: computed at init time */ - unsigned long impl_pmcs[4]; /* bitmask of implemented PMCS */ - unsigned long impl_pmds[4]; /* bitmask of implemented PMDS */ - - char *pmu_name; /* PMU family name */ - unsigned int pmu_family; /* cpuid family pattern used to identify pmu */ - unsigned int flags; /* pmu specific flags */ - unsigned int num_ibrs; /* number of IBRS: computed at init time */ - unsigned int num_dbrs; /* number of DBRS: computed at init time */ - unsigned int num_counters; /* PMC/PMD counting pairs : computed at init time */ - int (*probe)(void); /* customized probe routine */ - unsigned int use_rr_dbregs:1; /* set if debug registers used for range restriction */ -} pmu_config_t; -/* - * PMU specific flags - */ -#define PFM_PMU_IRQ_RESEND 1 /* PMU needs explicit IRQ resend */ - -/* - * debug register related type definitions - */ -typedef struct { - unsigned long ibr_mask:56; - unsigned long ibr_plm:4; - unsigned long ibr_ig:3; - unsigned long ibr_x:1; -} ibr_mask_reg_t; - -typedef struct { - unsigned long dbr_mask:56; - unsigned long dbr_plm:4; - unsigned long dbr_ig:2; - unsigned long dbr_w:1; - unsigned long dbr_r:1; -} dbr_mask_reg_t; - -typedef union { - unsigned long val; - ibr_mask_reg_t ibr; - dbr_mask_reg_t dbr; -} dbreg_t; - - -/* - * perfmon command descriptions - */ -typedef struct { - int (*cmd_func)(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs); - char *cmd_name; - int cmd_flags; - unsigned int cmd_narg; - size_t cmd_argsize; - int (*cmd_getsize)(void *arg, size_t *sz); -} pfm_cmd_desc_t; - -#define PFM_CMD_FD 0x01 /* command requires a file descriptor */ -#define PFM_CMD_ARG_READ 0x02 /* command must read argument(s) */ -#define PFM_CMD_ARG_RW 0x04 /* command must read/write argument(s) */ -#define PFM_CMD_STOP 0x08 /* command does not work on zombie context */ - - -#define PFM_CMD_NAME(cmd) pfm_cmd_tab[(cmd)].cmd_name -#define PFM_CMD_READ_ARG(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_ARG_READ) -#define PFM_CMD_RW_ARG(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_ARG_RW) -#define PFM_CMD_USE_FD(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_FD) -#define PFM_CMD_STOPPED(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_STOP) - -#define PFM_CMD_ARG_MANY -1 /* cannot be zero */ - -typedef struct { - unsigned long pfm_spurious_ovfl_intr_count; /* keep track of spurious ovfl interrupts */ - unsigned long pfm_replay_ovfl_intr_count; /* keep track of replayed ovfl interrupts */ - unsigned long pfm_ovfl_intr_count; /* keep track of ovfl interrupts */ - unsigned long pfm_ovfl_intr_cycles; /* cycles spent processing ovfl interrupts */ - unsigned long pfm_ovfl_intr_cycles_min; /* min cycles spent processing ovfl interrupts */ - unsigned long pfm_ovfl_intr_cycles_max; /* max cycles spent processing ovfl interrupts */ - unsigned long pfm_smpl_handler_calls; - unsigned long pfm_smpl_handler_cycles; - char pad[SMP_CACHE_BYTES] ____cacheline_aligned; -} pfm_stats_t; - -/* - * perfmon internal variables - */ -static pfm_stats_t pfm_stats[NR_CPUS]; -static pfm_session_t pfm_sessions; /* global sessions information */ - -static DEFINE_SPINLOCK(pfm_alt_install_check); -static pfm_intr_handler_desc_t *pfm_alt_intr_handler; - -static struct proc_dir_entry *perfmon_dir; -static pfm_uuid_t pfm_null_uuid = {0,}; - -static spinlock_t pfm_buffer_fmt_lock; -static LIST_HEAD(pfm_buffer_fmt_list); - -static pmu_config_t *pmu_conf; - -/* sysctl() controls */ -pfm_sysctl_t pfm_sysctl; -EXPORT_SYMBOL(pfm_sysctl); - -static ctl_table pfm_ctl_table[]={ - { - .ctl_name = CTL_UNNUMBERED, - .procname = "debug", - .data = &pfm_sysctl.debug, - .maxlen = sizeof(int), - .mode = 0666, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = CTL_UNNUMBERED, - .procname = "debug_ovfl", - .data = &pfm_sysctl.debug_ovfl, - .maxlen = sizeof(int), - .mode = 0666, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = CTL_UNNUMBERED, - .procname = "fastctxsw", - .data = &pfm_sysctl.fastctxsw, - .maxlen = sizeof(int), - .mode = 0600, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = CTL_UNNUMBERED, - .procname = "expert_mode", - .data = &pfm_sysctl.expert_mode, - .maxlen = sizeof(int), - .mode = 0600, - .proc_handler = &proc_dointvec, - }, - {} -}; -static ctl_table pfm_sysctl_dir[] = { - { - .ctl_name = CTL_UNNUMBERED, - .procname = "perfmon", - .mode = 0755, - .child = pfm_ctl_table, - }, - {} -}; -static ctl_table pfm_sysctl_root[] = { - { - .ctl_name = CTL_KERN, - .procname = "kernel", - .mode = 0755, - .child = pfm_sysctl_dir, - }, - {} -}; -static struct ctl_table_header *pfm_sysctl_header; - -static int pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs); - -#define pfm_get_cpu_var(v) __ia64_per_cpu_var(v) -#define pfm_get_cpu_data(a,b) per_cpu(a, b) - -static inline void -pfm_put_task(struct task_struct *task) -{ - if (task != current) put_task_struct(task); -} - -static inline void -pfm_set_task_notify(struct task_struct *task) -{ - struct thread_info *info; - - info = (struct thread_info *) ((char *) task + IA64_TASK_SIZE); - set_bit(TIF_NOTIFY_RESUME, &info->flags); -} - -static inline void -pfm_clear_task_notify(void) -{ - clear_thread_flag(TIF_NOTIFY_RESUME); -} - -static inline void -pfm_reserve_page(unsigned long a) -{ - SetPageReserved(vmalloc_to_page((void *)a)); -} -static inline void -pfm_unreserve_page(unsigned long a) -{ - ClearPageReserved(vmalloc_to_page((void*)a)); -} - -static inline unsigned long -pfm_protect_ctx_ctxsw(pfm_context_t *x) -{ - spin_lock(&(x)->ctx_lock); - return 0UL; -} - -static inline void -pfm_unprotect_ctx_ctxsw(pfm_context_t *x, unsigned long f) -{ - spin_unlock(&(x)->ctx_lock); -} - -static inline unsigned int -pfm_do_munmap(struct mm_struct *mm, unsigned long addr, size_t len, int acct) -{ - return do_munmap(mm, addr, len); -} - -static inline unsigned long -pfm_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags, unsigned long exec) -{ - return get_unmapped_area(file, addr, len, pgoff, flags); -} - - -static int -pfmfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, - struct vfsmount *mnt) -{ - return get_sb_pseudo(fs_type, "pfm:", NULL, PFMFS_MAGIC, mnt); -} - -static struct file_system_type pfm_fs_type = { - .name = "pfmfs", - .get_sb = pfmfs_get_sb, - .kill_sb = kill_anon_super, -}; - -DEFINE_PER_CPU(unsigned long, pfm_syst_info); -DEFINE_PER_CPU(struct task_struct *, pmu_owner); -DEFINE_PER_CPU(pfm_context_t *, pmu_ctx); -DEFINE_PER_CPU(unsigned long, pmu_activation_number); -EXPORT_PER_CPU_SYMBOL_GPL(pfm_syst_info); - - -/* forward declaration */ -static const struct file_operations pfm_file_ops; - -/* - * forward declarations - */ -#ifndef CONFIG_SMP -static void pfm_lazy_save_regs (struct task_struct *ta); -#endif - -void dump_pmu_state(const char *); -static int pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs); - -#include "perfmon_itanium.h" -#include "perfmon_mckinley.h" -#include "perfmon_montecito.h" -#include "perfmon_generic.h" - -static pmu_config_t *pmu_confs[]={ - &pmu_conf_mont, - &pmu_conf_mck, - &pmu_conf_ita, - &pmu_conf_gen, /* must be last */ - NULL -}; - - -static int pfm_end_notify_user(pfm_context_t *ctx); - -static inline void -pfm_clear_psr_pp(void) -{ - ia64_rsm(IA64_PSR_PP); - ia64_srlz_i(); -} - -static inline void -pfm_set_psr_pp(void) -{ - ia64_ssm(IA64_PSR_PP); - ia64_srlz_i(); -} - -static inline void -pfm_clear_psr_up(void) -{ - ia64_rsm(IA64_PSR_UP); - ia64_srlz_i(); -} - -static inline void -pfm_set_psr_up(void) -{ - ia64_ssm(IA64_PSR_UP); - ia64_srlz_i(); -} - -static inline unsigned long -pfm_get_psr(void) -{ - unsigned long tmp; - tmp = ia64_getreg(_IA64_REG_PSR); - ia64_srlz_i(); - return tmp; -} - -static inline void -pfm_set_psr_l(unsigned long val) -{ - ia64_setreg(_IA64_REG_PSR_L, val); - ia64_srlz_i(); -} - -static inline void -pfm_freeze_pmu(void) -{ - ia64_set_pmc(0,1UL); - ia64_srlz_d(); -} - -static inline void -pfm_unfreeze_pmu(void) -{ - ia64_set_pmc(0,0UL); - ia64_srlz_d(); -} - -static inline void -pfm_restore_ibrs(unsigned long *ibrs, unsigned int nibrs) -{ - int i; - - for (i=0; i < nibrs; i++) { - ia64_set_ibr(i, ibrs[i]); - ia64_dv_serialize_instruction(); - } - ia64_srlz_i(); -} - -static inline void -pfm_restore_dbrs(unsigned long *dbrs, unsigned int ndbrs) -{ - int i; - - for (i=0; i < ndbrs; i++) { - ia64_set_dbr(i, dbrs[i]); - ia64_dv_serialize_data(); - } - ia64_srlz_d(); -} - -/* - * PMD[i] must be a counter. no check is made - */ -static inline unsigned long -pfm_read_soft_counter(pfm_context_t *ctx, int i) -{ - return ctx->ctx_pmds[i].val + (ia64_get_pmd(i) & pmu_conf->ovfl_val); -} - -/* - * PMD[i] must be a counter. no check is made - */ -static inline void -pfm_write_soft_counter(pfm_context_t *ctx, int i, unsigned long val) -{ - unsigned long ovfl_val = pmu_conf->ovfl_val; - - ctx->ctx_pmds[i].val = val & ~ovfl_val; - /* - * writing to unimplemented part is ignore, so we do not need to - * mask off top part - */ - ia64_set_pmd(i, val & ovfl_val); -} - -static pfm_msg_t * -pfm_get_new_msg(pfm_context_t *ctx) -{ - int idx, next; - - next = (ctx->ctx_msgq_tail+1) % PFM_MAX_MSGS; - - DPRINT(("ctx_fd=%p head=%d tail=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail)); - if (next == ctx->ctx_msgq_head) return NULL; - - idx = ctx->ctx_msgq_tail; - ctx->ctx_msgq_tail = next; - - DPRINT(("ctx=%p head=%d tail=%d msg=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail, idx)); - - return ctx->ctx_msgq+idx; -} - -static pfm_msg_t * -pfm_get_next_msg(pfm_context_t *ctx) -{ - pfm_msg_t *msg; - - DPRINT(("ctx=%p head=%d tail=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail)); - - if (PFM_CTXQ_EMPTY(ctx)) return NULL; - - /* - * get oldest message - */ - msg = ctx->ctx_msgq+ctx->ctx_msgq_head; - - /* - * and move forward - */ - ctx->ctx_msgq_head = (ctx->ctx_msgq_head+1) % PFM_MAX_MSGS; - - DPRINT(("ctx=%p head=%d tail=%d type=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail, msg->pfm_gen_msg.msg_type)); - - return msg; -} - -static void -pfm_reset_msgq(pfm_context_t *ctx) -{ - ctx->ctx_msgq_head = ctx->ctx_msgq_tail = 0; - DPRINT(("ctx=%p msgq reset\n", ctx)); -} - -static void * -pfm_rvmalloc(unsigned long size) -{ - void *mem; - unsigned long addr; - - size = PAGE_ALIGN(size); - mem = vmalloc(size); - if (mem) { - //printk("perfmon: CPU%d pfm_rvmalloc(%ld)=%p\n", smp_processor_id(), size, mem); - memset(mem, 0, size); - addr = (unsigned long)mem; - while (size > 0) { - pfm_reserve_page(addr); - addr+=PAGE_SIZE; - size-=PAGE_SIZE; - } - } - return mem; -} - -static void -pfm_rvfree(void *mem, unsigned long size) -{ - unsigned long addr; - - if (mem) { - DPRINT(("freeing physical buffer @%p size=%lu\n", mem, size)); - addr = (unsigned long) mem; - while ((long) size > 0) { - pfm_unreserve_page(addr); - addr+=PAGE_SIZE; - size-=PAGE_SIZE; - } - vfree(mem); - } - return; -} - -static pfm_context_t * -pfm_context_alloc(void) -{ - pfm_context_t *ctx; - - /* - * allocate context descriptor - * must be able to free with interrupts disabled - */ - ctx = kzalloc(sizeof(pfm_context_t), GFP_KERNEL); - if (ctx) { - DPRINT(("alloc ctx @%p\n", ctx)); - } - return ctx; -} - -static void -pfm_context_free(pfm_context_t *ctx) -{ - if (ctx) { - DPRINT(("free ctx @%p\n", ctx)); - kfree(ctx); - } -} - -static void -pfm_mask_monitoring(struct task_struct *task) -{ - pfm_context_t *ctx = PFM_GET_CTX(task); - unsigned long mask, val, ovfl_mask; - int i; - - DPRINT_ovfl(("masking monitoring for [%d]\n", task->pid)); - - ovfl_mask = pmu_conf->ovfl_val; - /* - * monitoring can only be masked as a result of a valid - * counter overflow. In UP, it means that the PMU still - * has an owner. Note that the owner can be different - * from the current task. However the PMU state belongs - * to the owner. - * In SMP, a valid overflow only happens when task is - * current. Therefore if we come here, we know that - * the PMU state belongs to the current task, therefore - * we can access the live registers. - * - * So in both cases, the live register contains the owner's - * state. We can ONLY touch the PMU registers and NOT the PSR. - * - * As a consequence to this call, the ctx->th_pmds[] array - * contains stale information which must be ignored - * when context is reloaded AND monitoring is active (see - * pfm_restart). - */ - mask = ctx->ctx_used_pmds[0]; - for (i = 0; mask; i++, mask>>=1) { - /* skip non used pmds */ - if ((mask & 0x1) == 0) continue; - val = ia64_get_pmd(i); - - if (PMD_IS_COUNTING(i)) { - /* - * we rebuild the full 64 bit value of the counter - */ - ctx->ctx_pmds[i].val += (val & ovfl_mask); - } else { - ctx->ctx_pmds[i].val = val; - } - DPRINT_ovfl(("pmd[%d]=0x%lx hw_pmd=0x%lx\n", - i, - ctx->ctx_pmds[i].val, - val & ovfl_mask)); - } - /* - * mask monitoring by setting the privilege level to 0 - * we cannot use psr.pp/psr.up for this, it is controlled by - * the user - * - * if task is current, modify actual registers, otherwise modify - * thread save state, i.e., what will be restored in pfm_load_regs() - */ - mask = ctx->ctx_used_monitors[0] >> PMU_FIRST_COUNTER; - for(i= PMU_FIRST_COUNTER; mask; i++, mask>>=1) { - if ((mask & 0x1) == 0UL) continue; - ia64_set_pmc(i, ctx->th_pmcs[i] & ~0xfUL); - ctx->th_pmcs[i] &= ~0xfUL; - DPRINT_ovfl(("pmc[%d]=0x%lx\n", i, ctx->th_pmcs[i])); - } - /* - * make all of this visible - */ - ia64_srlz_d(); -} - -/* - * must always be done with task == current - * - * context must be in MASKED state when calling - */ -static void -pfm_restore_monitoring(struct task_struct *task) -{ - pfm_context_t *ctx = PFM_GET_CTX(task); - unsigned long mask, ovfl_mask; - unsigned long psr, val; - int i, is_system; - - is_system = ctx->ctx_fl_system; - ovfl_mask = pmu_conf->ovfl_val; - - if (task != current) { - printk(KERN_ERR "perfmon.%d: invalid task[%d] current[%d]\n", __LINE__, task->pid, current->pid); - return; - } - if (ctx->ctx_state != PFM_CTX_MASKED) { - printk(KERN_ERR "perfmon.%d: task[%d] current[%d] invalid state=%d\n", __LINE__, - task->pid, current->pid, ctx->ctx_state); - return; - } - psr = pfm_get_psr(); - /* - * monitoring is masked via the PMC. - * As we restore their value, we do not want each counter to - * restart right away. We stop monitoring using the PSR, - * restore the PMC (and PMD) and then re-establish the psr - * as it was. Note that there can be no pending overflow at - * this point, because monitoring was MASKED. - * - * system-wide session are pinned and self-monitoring - */ - if (is_system && (PFM_CPUINFO_GET() & PFM_CPUINFO_DCR_PP)) { - /* disable dcr pp */ - ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) & ~IA64_DCR_PP); - pfm_clear_psr_pp(); - } else { - pfm_clear_psr_up(); - } - /* - * first, we restore the PMD - */ - mask = ctx->ctx_used_pmds[0]; - for (i = 0; mask; i++, mask>>=1) { - /* skip non used pmds */ - if ((mask & 0x1) == 0) continue; - - if (PMD_IS_COUNTING(i)) { - /* - * we split the 64bit value according to - * counter width - */ - val = ctx->ctx_pmds[i].val & ovfl_mask; - ctx->ctx_pmds[i].val &= ~ovfl_mask; - } else { - val = ctx->ctx_pmds[i].val; - } - ia64_set_pmd(i, val); - - DPRINT(("pmd[%d]=0x%lx hw_pmd=0x%lx\n", - i, - ctx->ctx_pmds[i].val, - val)); - } - /* - * restore the PMCs - */ - mask = ctx->ctx_used_monitors[0] >> PMU_FIRST_COUNTER; - for(i= PMU_FIRST_COUNTER; mask; i++, mask>>=1) { - if ((mask & 0x1) == 0UL) continue; - ctx->th_pmcs[i] = ctx->ctx_pmcs[i]; - ia64_set_pmc(i, ctx->th_pmcs[i]); - DPRINT(("[%d] pmc[%d]=0x%lx\n", task->pid, i, ctx->th_pmcs[i])); - } - ia64_srlz_d(); - - /* - * must restore DBR/IBR because could be modified while masked - * XXX: need to optimize - */ - if (ctx->ctx_fl_using_dbreg) { - pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs); - pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs); - } - - /* - * now restore PSR - */ - if (is_system && (PFM_CPUINFO_GET() & PFM_CPUINFO_DCR_PP)) { - /* enable dcr pp */ - ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) | IA64_DCR_PP); - ia64_srlz_i(); - } - pfm_set_psr_l(psr); -} - -static inline void -pfm_save_pmds(unsigned long *pmds, unsigned long mask) -{ - int i; - - ia64_srlz_d(); - - for (i=0; mask; i++, mask>>=1) { - if (mask & 0x1) pmds[i] = ia64_get_pmd(i); - } -} - -/* - * reload from thread state (used for ctxw only) - */ -static inline void -pfm_restore_pmds(unsigned long *pmds, unsigned long mask) -{ - int i; - unsigned long val, ovfl_val = pmu_conf->ovfl_val; - - for (i=0; mask; i++, mask>>=1) { - if ((mask & 0x1) == 0) continue; - val = PMD_IS_COUNTING(i) ? pmds[i] & ovfl_val : pmds[i]; - ia64_set_pmd(i, val); - } - ia64_srlz_d(); -} - -/* - * propagate PMD from context to thread-state - */ -static inline void -pfm_copy_pmds(struct task_struct *task, pfm_context_t *ctx) -{ - unsigned long ovfl_val = pmu_conf->ovfl_val; - unsigned long mask = ctx->ctx_all_pmds[0]; - unsigned long val; - int i; - - DPRINT(("mask=0x%lx\n", mask)); - - for (i=0; mask; i++, mask>>=1) { - - val = ctx->ctx_pmds[i].val; - - /* - * We break up the 64 bit value into 2 pieces - * the lower bits go to the machine state in the - * thread (will be reloaded on ctxsw in). - * The upper part stays in the soft-counter. - */ - if (PMD_IS_COUNTING(i)) { - ctx->ctx_pmds[i].val = val & ~ovfl_val; - val &= ovfl_val; - } - ctx->th_pmds[i] = val; - - DPRINT(("pmd[%d]=0x%lx soft_val=0x%lx\n", - i, - ctx->th_pmds[i], - ctx->ctx_pmds[i].val)); - } -} - -/* - * propagate PMC from context to thread-state - */ -static inline void -pfm_copy_pmcs(struct task_struct *task, pfm_context_t *ctx) -{ - unsigned long mask = ctx->ctx_all_pmcs[0]; - int i; - - DPRINT(("mask=0x%lx\n", mask)); - - for (i=0; mask; i++, mask>>=1) { - /* masking 0 with ovfl_val yields 0 */ - ctx->th_pmcs[i] = ctx->ctx_pmcs[i]; - DPRINT(("pmc[%d]=0x%lx\n", i, ctx->th_pmcs[i])); - } -} - - - -static inline void -pfm_restore_pmcs(unsigned long *pmcs, unsigned long mask) -{ - int i; - - for (i=0; mask; i++, mask>>=1) { - if ((mask & 0x1) == 0) continue; - ia64_set_pmc(i, pmcs[i]); - } - ia64_srlz_d(); -} - -static inline int -pfm_uuid_cmp(pfm_uuid_t a, pfm_uuid_t b) -{ - return memcmp(a, b, sizeof(pfm_uuid_t)); -} - -static inline int -pfm_buf_fmt_exit(pfm_buffer_fmt_t *fmt, struct task_struct *task, void *buf, struct pt_regs *regs) -{ - int ret = 0; - if (fmt->fmt_exit) ret = (*fmt->fmt_exit)(task, buf, regs); - return ret; -} - -static inline int -pfm_buf_fmt_getsize(pfm_buffer_fmt_t *fmt, struct task_struct *task, unsigned int flags, int cpu, void *arg, unsigned long *size) -{ - int ret = 0; - if (fmt->fmt_getsize) ret = (*fmt->fmt_getsize)(task, flags, cpu, arg, size); - return ret; -} - - -static inline int -pfm_buf_fmt_validate(pfm_buffer_fmt_t *fmt, struct task_struct *task, unsigned int flags, - int cpu, void *arg) -{ - int ret = 0; - if (fmt->fmt_validate) ret = (*fmt->fmt_validate)(task, flags, cpu, arg); - return ret; -} - -static inline int -pfm_buf_fmt_init(pfm_buffer_fmt_t *fmt, struct task_struct *task, void *buf, unsigned int flags, - int cpu, void *arg) -{ - int ret = 0; - if (fmt->fmt_init) ret = (*fmt->fmt_init)(task, buf, flags, cpu, arg); - return ret; -} - -static inline int -pfm_buf_fmt_restart(pfm_buffer_fmt_t *fmt, struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs) -{ - int ret = 0; - if (fmt->fmt_restart) ret = (*fmt->fmt_restart)(task, ctrl, buf, regs); - return ret; -} - -static inline int -pfm_buf_fmt_restart_active(pfm_buffer_fmt_t *fmt, struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs) -{ - int ret = 0; - if (fmt->fmt_restart_active) ret = (*fmt->fmt_restart_active)(task, ctrl, buf, regs); - return ret; -} - -static pfm_buffer_fmt_t * -__pfm_find_buffer_fmt(pfm_uuid_t uuid) -{ - struct list_head * pos; - pfm_buffer_fmt_t * entry; - - list_for_each(pos, &pfm_buffer_fmt_list) { - entry = list_entry(pos, pfm_buffer_fmt_t, fmt_list); - if (pfm_uuid_cmp(uuid, entry->fmt_uuid) == 0) - return entry; - } - return NULL; -} - -/* - * find a buffer format based on its uuid - */ -static pfm_buffer_fmt_t * -pfm_find_buffer_fmt(pfm_uuid_t uuid) -{ - pfm_buffer_fmt_t * fmt; - spin_lock(&pfm_buffer_fmt_lock); - fmt = __pfm_find_buffer_fmt(uuid); - spin_unlock(&pfm_buffer_fmt_lock); - return fmt; -} - -int -pfm_register_buffer_fmt(pfm_buffer_fmt_t *fmt) -{ - int ret = 0; - - /* some sanity checks */ - if (fmt == NULL || fmt->fmt_name == NULL) return -EINVAL; - - /* we need at least a handler */ - if (fmt->fmt_handler == NULL) return -EINVAL; - - /* - * XXX: need check validity of fmt_arg_size - */ - - spin_lock(&pfm_buffer_fmt_lock); - - if (__pfm_find_buffer_fmt(fmt->fmt_uuid)) { - printk(KERN_ERR "perfmon: duplicate sampling format: %s\n", fmt->fmt_name); - ret = -EBUSY; - goto out; - } - list_add(&fmt->fmt_list, &pfm_buffer_fmt_list); - printk(KERN_INFO "perfmon: added sampling format %s\n", fmt->fmt_name); - -out: - spin_unlock(&pfm_buffer_fmt_lock); - return ret; -} -EXPORT_SYMBOL(pfm_register_buffer_fmt); - -int -pfm_unregister_buffer_fmt(pfm_uuid_t uuid) -{ - pfm_buffer_fmt_t *fmt; - int ret = 0; - - spin_lock(&pfm_buffer_fmt_lock); - - fmt = __pfm_find_buffer_fmt(uuid); - if (!fmt) { - printk(KERN_ERR "perfmon: cannot unregister format, not found\n"); - ret = -EINVAL; - goto out; - } - list_del_init(&fmt->fmt_list); - printk(KERN_INFO "perfmon: removed sampling format: %s\n", fmt->fmt_name); - -out: - spin_unlock(&pfm_buffer_fmt_lock); - return ret; - -} -EXPORT_SYMBOL(pfm_unregister_buffer_fmt); - -extern void update_pal_halt_status(int); - -static int -pfm_reserve_session(struct task_struct *task, int is_syswide, unsigned int cpu) -{ - unsigned long flags; - /* - * validity checks on cpu_mask have been done upstream - */ - LOCK_PFS(flags); - - DPRINT(("in sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n", - pfm_sessions.pfs_sys_sessions, - pfm_sessions.pfs_task_sessions, - pfm_sessions.pfs_sys_use_dbregs, - is_syswide, - cpu)); - - if (is_syswide) { - /* - * cannot mix system wide and per-task sessions - */ - if (pfm_sessions.pfs_task_sessions > 0UL) { - DPRINT(("system wide not possible, %u conflicting task_sessions\n", - pfm_sessions.pfs_task_sessions)); - goto abort; - } - - if (pfm_sessions.pfs_sys_session[cpu]) goto error_conflict; - - DPRINT(("reserving system wide session on CPU%u currently on CPU%u\n", cpu, smp_processor_id())); - - pfm_sessions.pfs_sys_session[cpu] = task; - - pfm_sessions.pfs_sys_sessions++ ; - - } else { - if (pfm_sessions.pfs_sys_sessions) goto abort; - pfm_sessions.pfs_task_sessions++; - } - - DPRINT(("out sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n", - pfm_sessions.pfs_sys_sessions, - pfm_sessions.pfs_task_sessions, - pfm_sessions.pfs_sys_use_dbregs, - is_syswide, - cpu)); - - /* - * disable default_idle() to go to PAL_HALT - */ - update_pal_halt_status(0); - - UNLOCK_PFS(flags); - - return 0; - -error_conflict: - DPRINT(("system wide not possible, conflicting session [%d] on CPU%d\n", - pfm_sessions.pfs_sys_session[cpu]->pid, - cpu)); -abort: - UNLOCK_PFS(flags); - - return -EBUSY; - -} - -static int -pfm_unreserve_session(pfm_context_t *ctx, int is_syswide, unsigned int cpu) -{ - unsigned long flags; - /* - * validity checks on cpu_mask have been done upstream - */ - LOCK_PFS(flags); - - DPRINT(("in sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n", - pfm_sessions.pfs_sys_sessions, - pfm_sessions.pfs_task_sessions, - pfm_sessions.pfs_sys_use_dbregs, - is_syswide, - cpu)); - - - if (is_syswide) { - pfm_sessions.pfs_sys_session[cpu] = NULL; - /* - * would not work with perfmon+more than one bit in cpu_mask - */ - if (ctx && ctx->ctx_fl_using_dbreg) { - if (pfm_sessions.pfs_sys_use_dbregs == 0) { - printk(KERN_ERR "perfmon: invalid release for ctx %p sys_use_dbregs=0\n", ctx); - } else { - pfm_sessions.pfs_sys_use_dbregs--; - } - } - pfm_sessions.pfs_sys_sessions--; - } else { - pfm_sessions.pfs_task_sessions--; - } - DPRINT(("out sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n", - pfm_sessions.pfs_sys_sessions, - pfm_sessions.pfs_task_sessions, - pfm_sessions.pfs_sys_use_dbregs, - is_syswide, - cpu)); - - /* - * if possible, enable default_idle() to go into PAL_HALT - */ - if (pfm_sessions.pfs_task_sessions == 0 && pfm_sessions.pfs_sys_sessions == 0) - update_pal_halt_status(1); - - UNLOCK_PFS(flags); - - return 0; -} - -/* - * removes virtual mapping of the sampling buffer. - * IMPORTANT: cannot be called with interrupts disable, e.g. inside - * a PROTECT_CTX() section. - */ -static int -pfm_remove_smpl_mapping(struct task_struct *task, void *vaddr, unsigned long size) -{ - int r; - - /* sanity checks */ - if (task->mm == NULL || size == 0UL || vaddr == NULL) { - printk(KERN_ERR "perfmon: pfm_remove_smpl_mapping [%d] invalid context mm=%p\n", task->pid, task->mm); - return -EINVAL; - } - - DPRINT(("smpl_vaddr=%p size=%lu\n", vaddr, size)); - - /* - * does the actual unmapping - */ - down_write(&task->mm->mmap_sem); - - DPRINT(("down_write done smpl_vaddr=%p size=%lu\n", vaddr, size)); - - r = pfm_do_munmap(task->mm, (unsigned long)vaddr, size, 0); - - up_write(&task->mm->mmap_sem); - if (r !=0) { - printk(KERN_ERR "perfmon: [%d] unable to unmap sampling buffer @%p size=%lu\n", task->pid, vaddr, size); - } - - DPRINT(("do_unmap(%p, %lu)=%d\n", vaddr, size, r)); - - return 0; -} - -/* - * free actual physical storage used by sampling buffer - */ -#if 0 -static int -pfm_free_smpl_buffer(pfm_context_t *ctx) -{ - pfm_buffer_fmt_t *fmt; - - if (ctx->ctx_smpl_hdr == NULL) goto invalid_free; - - /* - * we won't use the buffer format anymore - */ - fmt = ctx->ctx_buf_fmt; - - DPRINT(("sampling buffer @%p size %lu vaddr=%p\n", - ctx->ctx_smpl_hdr, - ctx->ctx_smpl_size, - ctx->ctx_smpl_vaddr)); - - pfm_buf_fmt_exit(fmt, current, NULL, NULL); - - /* - * free the buffer - */ - pfm_rvfree(ctx->ctx_smpl_hdr, ctx->ctx_smpl_size); - - ctx->ctx_smpl_hdr = NULL; - ctx->ctx_smpl_size = 0UL; - - return 0; - -invalid_free: - printk(KERN_ERR "perfmon: pfm_free_smpl_buffer [%d] no buffer\n", current->pid); - return -EINVAL; -} -#endif - -static inline void -pfm_exit_smpl_buffer(pfm_buffer_fmt_t *fmt) -{ - if (fmt == NULL) return; - - pfm_buf_fmt_exit(fmt, current, NULL, NULL); - -} - -/* - * pfmfs should _never_ be mounted by userland - too much of security hassle, - * no real gain from having the whole whorehouse mounted. So we don't need - * any operations on the root directory. However, we need a non-trivial - * d_name - pfm: will go nicely and kill the special-casing in procfs. - */ -static struct vfsmount *pfmfs_mnt; - -static int __init -init_pfm_fs(void) -{ - int err = register_filesystem(&pfm_fs_type); - if (!err) { - pfmfs_mnt = kern_mount(&pfm_fs_type); - err = PTR_ERR(pfmfs_mnt); - if (IS_ERR(pfmfs_mnt)) - unregister_filesystem(&pfm_fs_type); - else - err = 0; - } - return err; -} - -static void __exit -exit_pfm_fs(void) -{ - unregister_filesystem(&pfm_fs_type); - mntput(pfmfs_mnt); -} - -static ssize_t -pfm_read(struct file *filp, char __user *buf, size_t size, loff_t *ppos) -{ - pfm_context_t *ctx; - pfm_msg_t *msg; - ssize_t ret; - unsigned long flags; - DECLARE_WAITQUEUE(wait, current); - if (PFM_IS_FILE(filp) == 0) { - printk(KERN_ERR "perfmon: pfm_poll: bad magic [%d]\n", current->pid); - return -EINVAL; - } - - ctx = (pfm_context_t *)filp->private_data; - if (ctx == NULL) { - printk(KERN_ERR "perfmon: pfm_read: NULL ctx [%d]\n", current->pid); - return -EINVAL; - } - - /* - * check even when there is no message - */ - if (size < sizeof(pfm_msg_t)) { - DPRINT(("message is too small ctx=%p (>=%ld)\n", ctx, sizeof(pfm_msg_t))); - return -EINVAL; - } - - PROTECT_CTX(ctx, flags); - - /* - * put ourselves on the wait queue - */ - add_wait_queue(&ctx->ctx_msgq_wait, &wait); - - - for(;;) { - /* - * check wait queue - */ - - set_current_state(TASK_INTERRUPTIBLE); - - DPRINT(("head=%d tail=%d\n", ctx->ctx_msgq_head, ctx->ctx_msgq_tail)); - - ret = 0; - if(PFM_CTXQ_EMPTY(ctx) == 0) break; - - UNPROTECT_CTX(ctx, flags); - - /* - * check non-blocking read - */ - ret = -EAGAIN; - if(filp->f_flags & O_NONBLOCK) break; - - /* - * check pending signals - */ - if(signal_pending(current)) { - ret = -EINTR; - break; - } - /* - * no message, so wait - */ - schedule(); - - PROTECT_CTX(ctx, flags); - } - DPRINT(("[%d] back to running ret=%ld\n", current->pid, ret)); - set_current_state(TASK_RUNNING); - remove_wait_queue(&ctx->ctx_msgq_wait, &wait); - - if (ret < 0) goto abort; - - ret = -EINVAL; - msg = pfm_get_next_msg(ctx); - if (msg == NULL) { - printk(KERN_ERR "perfmon: pfm_read no msg for ctx=%p [%d]\n", ctx, current->pid); - goto abort_locked; - } - - DPRINT(("fd=%d type=%d\n", msg->pfm_gen_msg.msg_ctx_fd, msg->pfm_gen_msg.msg_type)); - - ret = -EFAULT; - if(copy_to_user(buf, msg, sizeof(pfm_msg_t)) == 0) ret = sizeof(pfm_msg_t); - -abort_locked: - UNPROTECT_CTX(ctx, flags); -abort: - return ret; -} - -static ssize_t -pfm_write(struct file *file, const char __user *ubuf, - size_t size, loff_t *ppos) -{ - DPRINT(("pfm_write called\n")); - return -EINVAL; -} - -static unsigned int -pfm_poll(struct file *filp, poll_table * wait) -{ - pfm_context_t *ctx; - unsigned long flags; - unsigned int mask = 0; - - if (PFM_IS_FILE(filp) == 0) { - printk(KERN_ERR "perfmon: pfm_poll: bad magic [%d]\n", current->pid); - return 0; - } - - ctx = (pfm_context_t *)filp->private_data; - if (ctx == NULL) { - printk(KERN_ERR "perfmon: pfm_poll: NULL ctx [%d]\n", current->pid); - return 0; - } - - - DPRINT(("pfm_poll ctx_fd=%d before poll_wait\n", ctx->ctx_fd)); - - poll_wait(filp, &ctx->ctx_msgq_wait, wait); - - PROTECT_CTX(ctx, flags); - - if (PFM_CTXQ_EMPTY(ctx) == 0) - mask = POLLIN | POLLRDNORM; - - UNPROTECT_CTX(ctx, flags); - - DPRINT(("pfm_poll ctx_fd=%d mask=0x%x\n", ctx->ctx_fd, mask)); - - return mask; -} - -static int -pfm_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) -{ - DPRINT(("pfm_ioctl called\n")); - return -EINVAL; -} - -/* - * interrupt cannot be masked when coming here - */ -static inline int -pfm_do_fasync(int fd, struct file *filp, pfm_context_t *ctx, int on) -{ - int ret; - - ret = fasync_helper (fd, filp, on, &ctx->ctx_async_queue); - - DPRINT(("pfm_fasync called by [%d] on ctx_fd=%d on=%d async_queue=%p ret=%d\n", - current->pid, - fd, - on, - ctx->ctx_async_queue, ret)); - - return ret; -} - -static int -pfm_fasync(int fd, struct file *filp, int on) -{ - pfm_context_t *ctx; - int ret; - - if (PFM_IS_FILE(filp) == 0) { - printk(KERN_ERR "perfmon: pfm_fasync bad magic [%d]\n", current->pid); - return -EBADF; - } - - ctx = (pfm_context_t *)filp->private_data; - if (ctx == NULL) { - printk(KERN_ERR "perfmon: pfm_fasync NULL ctx [%d]\n", current->pid); - return -EBADF; - } - /* - * we cannot mask interrupts during this call because this may - * may go to sleep if memory is not readily avalaible. - * - * We are protected from the conetxt disappearing by the get_fd()/put_fd() - * done in caller. Serialization of this function is ensured by caller. - */ - ret = pfm_do_fasync(fd, filp, ctx, on); - - - DPRINT(("pfm_fasync called on ctx_fd=%d on=%d async_queue=%p ret=%d\n", - fd, - on, - ctx->ctx_async_queue, ret)); - - return ret; -} - -#ifdef CONFIG_SMP -/* - * this function is exclusively called from pfm_close(). - * The context is not protected at that time, nor are interrupts - * on the remote CPU. That's necessary to avoid deadlocks. - */ -static void -pfm_syswide_force_stop(void *info) -{ - pfm_context_t *ctx = (pfm_context_t *)info; - struct pt_regs *regs = task_pt_regs(current); - struct task_struct *owner; - unsigned long flags; - int ret; - - if (ctx->ctx_cpu != smp_processor_id()) { - printk(KERN_ERR "perfmon: pfm_syswide_force_stop for CPU%d but on CPU%d\n", - ctx->ctx_cpu, - smp_processor_id()); - return; - } - owner = GET_PMU_OWNER(); - if (owner != ctx->ctx_task) { - printk(KERN_ERR "perfmon: pfm_syswide_force_stop CPU%d unexpected owner [%d] instead of [%d]\n", - smp_processor_id(), - owner->pid, ctx->ctx_task->pid); - return; - } - if (GET_PMU_CTX() != ctx) { - printk(KERN_ERR "perfmon: pfm_syswide_force_stop CPU%d unexpected ctx %p instead of %p\n", - smp_processor_id(), - GET_PMU_CTX(), ctx); - return; - } - - DPRINT(("on CPU%d forcing system wide stop for [%d]\n", smp_processor_id(), ctx->ctx_task->pid)); - /* - * the context is already protected in pfm_close(), we simply - * need to mask interrupts to avoid a PMU interrupt race on - * this CPU - */ - local_irq_save(flags); - - ret = pfm_context_unload(ctx, NULL, 0, regs); - if (ret) { - DPRINT(("context_unload returned %d\n", ret)); - } - - /* - * unmask interrupts, PMU interrupts are now spurious here - */ - local_irq_restore(flags); -} - -static void -pfm_syswide_cleanup_other_cpu(pfm_context_t *ctx) -{ - int ret; - - DPRINT(("calling CPU%d for cleanup\n", ctx->ctx_cpu)); - ret = smp_call_function_single(ctx->ctx_cpu, pfm_syswide_force_stop, ctx, 0, 1); - DPRINT(("called CPU%d for cleanup ret=%d\n", ctx->ctx_cpu, ret)); -} -#endif /* CONFIG_SMP */ - -/* - * called for each close(). Partially free resources. - * When caller is self-monitoring, the context is unloaded. - */ -static int -pfm_flush(struct file *filp, fl_owner_t id) -{ - pfm_context_t *ctx; - struct task_struct *task; - struct pt_regs *regs; - unsigned long flags; - unsigned long smpl_buf_size = 0UL; - void *smpl_buf_vaddr = NULL; - int state, is_system; - - if (PFM_IS_FILE(filp) == 0) { - DPRINT(("bad magic for\n")); - return -EBADF; - } - - ctx = (pfm_context_t *)filp->private_data; - if (ctx == NULL) { - printk(KERN_ERR "perfmon: pfm_flush: NULL ctx [%d]\n", current->pid); - return -EBADF; - } - - /* - * remove our file from the async queue, if we use this mode. - * This can be done without the context being protected. We come - * here when the context has become unreachable by other tasks. - * - * We may still have active monitoring at this point and we may - * end up in pfm_overflow_handler(). However, fasync_helper() - * operates with interrupts disabled and it cleans up the - * queue. If the PMU handler is called prior to entering - * fasync_helper() then it will send a signal. If it is - * invoked after, it will find an empty queue and no - * signal will be sent. In both case, we are safe - */ - if (filp->f_flags & FASYNC) { - DPRINT(("cleaning up async_queue=%p\n", ctx->ctx_async_queue)); - pfm_do_fasync (-1, filp, ctx, 0); - } - - PROTECT_CTX(ctx, flags); - - state = ctx->ctx_state; - is_system = ctx->ctx_fl_system; - - task = PFM_CTX_TASK(ctx); - regs = task_pt_regs(task); - - DPRINT(("ctx_state=%d is_current=%d\n", - state, - task == current ? 1 : 0)); - - /* - * if state == UNLOADED, then task is NULL - */ - - /* - * we must stop and unload because we are losing access to the context. - */ - if (task == current) { -#ifdef CONFIG_SMP - /* - * the task IS the owner but it migrated to another CPU: that's bad - * but we must handle this cleanly. Unfortunately, the kernel does - * not provide a mechanism to block migration (while the context is loaded). - * - * We need to release the resource on the ORIGINAL cpu. - */ - if (is_system && ctx->ctx_cpu != smp_processor_id()) { - - DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); - /* - * keep context protected but unmask interrupt for IPI - */ - local_irq_restore(flags); - - pfm_syswide_cleanup_other_cpu(ctx); - - /* - * restore interrupt masking - */ - local_irq_save(flags); - - /* - * context is unloaded at this point - */ - } else -#endif /* CONFIG_SMP */ - { - - DPRINT(("forcing unload\n")); - /* - * stop and unload, returning with state UNLOADED - * and session unreserved. - */ - pfm_context_unload(ctx, NULL, 0, regs); - - DPRINT(("ctx_state=%d\n", ctx->ctx_state)); - } - } - - /* - * remove virtual mapping, if any, for the calling task. - * cannot reset ctx field until last user is calling close(). - * - * ctx_smpl_vaddr must never be cleared because it is needed - * by every task with access to the context - * - * When called from do_exit(), the mm context is gone already, therefore - * mm is NULL, i.e., the VMA is already gone and we do not have to - * do anything here - */ - if (ctx->ctx_smpl_vaddr && current->mm) { - smpl_buf_vaddr = ctx->ctx_smpl_vaddr; - smpl_buf_size = ctx->ctx_smpl_size; - } - - UNPROTECT_CTX(ctx, flags); - - /* - * if there was a mapping, then we systematically remove it - * at this point. Cannot be done inside critical section - * because some VM function reenables interrupts. - * - */ - if (smpl_buf_vaddr) pfm_remove_smpl_mapping(current, smpl_buf_vaddr, smpl_buf_size); - - return 0; -} -/* - * called either on explicit close() or from exit_files(). - * Only the LAST user of the file gets to this point, i.e., it is - * called only ONCE. - * - * IMPORTANT: we get called ONLY when the refcnt on the file gets to zero - * (fput()),i.e, last task to access the file. Nobody else can access the - * file at this point. - * - * When called from exit_files(), the VMA has been freed because exit_mm() - * is executed before exit_files(). - * - * When called from exit_files(), the current task is not yet ZOMBIE but we - * flush the PMU state to the context. - */ -static int -pfm_close(struct inode *inode, struct file *filp) -{ - pfm_context_t *ctx; - struct task_struct *task; - struct pt_regs *regs; - DECLARE_WAITQUEUE(wait, current); - unsigned long flags; - unsigned long smpl_buf_size = 0UL; - void *smpl_buf_addr = NULL; - int free_possible = 1; - int state, is_system; - - DPRINT(("pfm_close called private=%p\n", filp->private_data)); - - if (PFM_IS_FILE(filp) == 0) { - DPRINT(("bad magic\n")); - return -EBADF; - } - - ctx = (pfm_context_t *)filp->private_data; - if (ctx == NULL) { - printk(KERN_ERR "perfmon: pfm_close: NULL ctx [%d]\n", current->pid); - return -EBADF; - } - - PROTECT_CTX(ctx, flags); - - state = ctx->ctx_state; - is_system = ctx->ctx_fl_system; - - task = PFM_CTX_TASK(ctx); - regs = task_pt_regs(task); - - DPRINT(("ctx_state=%d is_current=%d\n", - state, - task == current ? 1 : 0)); - - /* - * if task == current, then pfm_flush() unloaded the context - */ - if (state == PFM_CTX_UNLOADED) goto doit; - - /* - * context is loaded/masked and task != current, we need to - * either force an unload or go zombie - */ - - /* - * The task is currently blocked or will block after an overflow. - * we must force it to wakeup to get out of the - * MASKED state and transition to the unloaded state by itself. - * - * This situation is only possible for per-task mode - */ - if (state == PFM_CTX_MASKED && CTX_OVFL_NOBLOCK(ctx) == 0) { - - /* - * set a "partial" zombie state to be checked - * upon return from down() in pfm_handle_work(). - * - * We cannot use the ZOMBIE state, because it is checked - * by pfm_load_regs() which is called upon wakeup from down(). - * In such case, it would free the context and then we would - * return to pfm_handle_work() which would access the - * stale context. Instead, we set a flag invisible to pfm_load_regs() - * but visible to pfm_handle_work(). - * - * For some window of time, we have a zombie context with - * ctx_state = MASKED and not ZOMBIE - */ - ctx->ctx_fl_going_zombie = 1; - - /* - * force task to wake up from MASKED state - */ - complete(&ctx->ctx_restart_done); - - DPRINT(("waking up ctx_state=%d\n", state)); - - /* - * put ourself to sleep waiting for the other - * task to report completion - * - * the context is protected by mutex, therefore there - * is no risk of being notified of completion before - * begin actually on the waitq. - */ - set_current_state(TASK_INTERRUPTIBLE); - add_wait_queue(&ctx->ctx_zombieq, &wait); - - UNPROTECT_CTX(ctx, flags); - - /* - * XXX: check for signals : - * - ok for explicit close - * - not ok when coming from exit_files() - */ - schedule(); - - - PROTECT_CTX(ctx, flags); - - - remove_wait_queue(&ctx->ctx_zombieq, &wait); - set_current_state(TASK_RUNNING); - - /* - * context is unloaded at this point - */ - DPRINT(("after zombie wakeup ctx_state=%d for\n", state)); - } - else if (task != current) { -#ifdef CONFIG_SMP - /* - * switch context to zombie state - */ - ctx->ctx_state = PFM_CTX_ZOMBIE; - - DPRINT(("zombie ctx for [%d]\n", task->pid)); - /* - * cannot free the context on the spot. deferred until - * the task notices the ZOMBIE state - */ - free_possible = 0; -#else - pfm_context_unload(ctx, NULL, 0, regs); -#endif - } - -doit: - /* reload state, may have changed during opening of critical section */ - state = ctx->ctx_state; - - /* - * the context is still attached to a task (possibly current) - * we cannot destroy it right now - */ - - /* - * we must free the sampling buffer right here because - * we cannot rely on it being cleaned up later by the - * monitored task. It is not possible to free vmalloc'ed - * memory in pfm_load_regs(). Instead, we remove the buffer - * now. should there be subsequent PMU overflow originally - * meant for sampling, the will be converted to spurious - * and that's fine because the monitoring tools is gone anyway. - */ - if (ctx->ctx_smpl_hdr) { - smpl_buf_addr = ctx->ctx_smpl_hdr; - smpl_buf_size = ctx->ctx_smpl_size; - /* no more sampling */ - ctx->ctx_smpl_hdr = NULL; - ctx->ctx_fl_is_sampling = 0; - } - - DPRINT(("ctx_state=%d free_possible=%d addr=%p size=%lu\n", - state, - free_possible, - smpl_buf_addr, - smpl_buf_size)); - - if (smpl_buf_addr) pfm_exit_smpl_buffer(ctx->ctx_buf_fmt); - - /* - * UNLOADED that the session has already been unreserved. - */ - if (state == PFM_CTX_ZOMBIE) { - pfm_unreserve_session(ctx, ctx->ctx_fl_system , ctx->ctx_cpu); - } - - /* - * disconnect file descriptor from context must be done - * before we unlock. - */ - filp->private_data = NULL; - - /* - * if we free on the spot, the context is now completely unreachable - * from the callers side. The monitored task side is also cut, so we - * can freely cut. - * - * If we have a deferred free, only the caller side is disconnected. - */ - UNPROTECT_CTX(ctx, flags); - - /* - * All memory free operations (especially for vmalloc'ed memory) - * MUST be done with interrupts ENABLED. - */ - if (smpl_buf_addr) pfm_rvfree(smpl_buf_addr, smpl_buf_size); - - /* - * return the memory used by the context - */ - if (free_possible) pfm_context_free(ctx); - - return 0; -} - -static int -pfm_no_open(struct inode *irrelevant, struct file *dontcare) -{ - DPRINT(("pfm_no_open called\n")); - return -ENXIO; -} - - - -static const struct file_operations pfm_file_ops = { - .llseek = no_llseek, - .read = pfm_read, - .write = pfm_write, - .poll = pfm_poll, - .ioctl = pfm_ioctl, - .open = pfm_no_open, /* special open code to disallow open via /proc */ - .fasync = pfm_fasync, - .release = pfm_close, - .flush = pfm_flush -}; - -static int -pfmfs_delete_dentry(struct dentry *dentry) -{ - return 1; -} - -static struct dentry_operations pfmfs_dentry_operations = { - .d_delete = pfmfs_delete_dentry, -}; - - -static int -pfm_alloc_fd(struct file **cfile) -{ - int fd, ret = 0; - struct file *file = NULL; - struct inode * inode; - char name[32]; - struct qstr this; - - fd = get_unused_fd(); - if (fd < 0) return -ENFILE; - - ret = -ENFILE; - - file = get_empty_filp(); - if (!file) goto out; - - /* - * allocate a new inode - */ - inode = new_inode(pfmfs_mnt->mnt_sb); - if (!inode) goto out; - - DPRINT(("new inode ino=%ld @%p\n", inode->i_ino, inode)); - - inode->i_mode = S_IFCHR|S_IRUGO; - inode->i_uid = current->fsuid; - inode->i_gid = current->fsgid; - - sprintf(name, "[%lu]", inode->i_ino); - this.name = name; - this.len = strlen(name); - this.hash = inode->i_ino; - - ret = -ENOMEM; - - /* - * allocate a new dcache entry - */ - file->f_path.dentry = d_alloc(pfmfs_mnt->mnt_sb->s_root, &this); - if (!file->f_path.dentry) goto out; - - file->f_path.dentry->d_op = &pfmfs_dentry_operations; - - d_add(file->f_path.dentry, inode); - file->f_path.mnt = mntget(pfmfs_mnt); - file->f_mapping = inode->i_mapping; - - file->f_op = &pfm_file_ops; - file->f_mode = FMODE_READ; - file->f_flags = O_RDONLY; - file->f_pos = 0; - - /* - * may have to delay until context is attached? - */ - fd_install(fd, file); - - /* - * the file structure we will use - */ - *cfile = file; - - return fd; -out: - if (file) put_filp(file); - put_unused_fd(fd); - return ret; -} - -static void -pfm_free_fd(int fd, struct file *file) -{ - struct files_struct *files = current->files; - struct fdtable *fdt; - - /* - * there ie no fd_uninstall(), so we do it here - */ - spin_lock(&files->file_lock); - fdt = files_fdtable(files); - rcu_assign_pointer(fdt->fd[fd], NULL); - spin_unlock(&files->file_lock); - - if (file) - put_filp(file); - put_unused_fd(fd); -} - -static int -pfm_remap_buffer(struct vm_area_struct *vma, unsigned long buf, unsigned long addr, unsigned long size) -{ - DPRINT(("CPU%d buf=0x%lx addr=0x%lx size=%ld\n", smp_processor_id(), buf, addr, size)); - - while (size > 0) { - unsigned long pfn = ia64_tpa(buf) >> PAGE_SHIFT; - - - if (remap_pfn_range(vma, addr, pfn, PAGE_SIZE, PAGE_READONLY)) - return -ENOMEM; - - addr += PAGE_SIZE; - buf += PAGE_SIZE; - size -= PAGE_SIZE; - } - return 0; -} - -/* - * allocate a sampling buffer and remaps it into the user address space of the task - */ -static int -pfm_smpl_buffer_alloc(struct task_struct *task, struct file *filp, pfm_context_t *ctx, unsigned long rsize, void **user_vaddr) -{ - struct mm_struct *mm = task->mm; - struct vm_area_struct *vma = NULL; - unsigned long size; - void *smpl_buf; - - - /* - * the fixed header + requested size and align to page boundary - */ - size = PAGE_ALIGN(rsize); - - DPRINT(("sampling buffer rsize=%lu size=%lu bytes\n", rsize, size)); - - /* - * check requested size to avoid Denial-of-service attacks - * XXX: may have to refine this test - * Check against address space limit. - * - * if ((mm->total_vm << PAGE_SHIFT) + len> task->rlim[RLIMIT_AS].rlim_cur) - * return -ENOMEM; - */ - if (size > task->signal->rlim[RLIMIT_MEMLOCK].rlim_cur) - return -ENOMEM; - - /* - * We do the easy to undo allocations first. - * - * pfm_rvmalloc(), clears the buffer, so there is no leak - */ - smpl_buf = pfm_rvmalloc(size); - if (smpl_buf == NULL) { - DPRINT(("Can't allocate sampling buffer\n")); - return -ENOMEM; - } - - DPRINT(("smpl_buf @%p\n", smpl_buf)); - - /* allocate vma */ - vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); - if (!vma) { - DPRINT(("Cannot allocate vma\n")); - goto error_kmem; - } - - /* - * partially initialize the vma for the sampling buffer - */ - vma->vm_mm = mm; - vma->vm_file = filp; - vma->vm_flags = VM_READ| VM_MAYREAD |VM_RESERVED; - vma->vm_page_prot = PAGE_READONLY; /* XXX may need to change */ - - /* - * Now we have everything we need and we can initialize - * and connect all the data structures - */ - - ctx->ctx_smpl_hdr = smpl_buf; - ctx->ctx_smpl_size = size; /* aligned size */ - - /* - * Let's do the difficult operations next. - * - * now we atomically find some area in the address space and - * remap the buffer in it. - */ - down_write(&task->mm->mmap_sem); - - /* find some free area in address space, must have mmap sem held */ - vma->vm_start = pfm_get_unmapped_area(NULL, 0, size, 0, MAP_PRIVATE|MAP_ANONYMOUS, 0); - if (vma->vm_start == 0UL) { - DPRINT(("Cannot find unmapped area for size %ld\n", size)); - up_write(&task->mm->mmap_sem); - goto error; - } - vma->vm_end = vma->vm_start + size; - vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT; - - DPRINT(("aligned size=%ld, hdr=%p mapped @0x%lx\n", size, ctx->ctx_smpl_hdr, vma->vm_start)); - - /* can only be applied to current task, need to have the mm semaphore held when called */ - if (pfm_remap_buffer(vma, (unsigned long)smpl_buf, vma->vm_start, size)) { - DPRINT(("Can't remap buffer\n")); - up_write(&task->mm->mmap_sem); - goto error; - } - - get_file(filp); - - /* - * now insert the vma in the vm list for the process, must be - * done with mmap lock held - */ - insert_vm_struct(mm, vma); - - mm->total_vm += size >> PAGE_SHIFT; - vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file, - vma_pages(vma)); - up_write(&task->mm->mmap_sem); - - /* - * keep track of user level virtual address - */ - ctx->ctx_smpl_vaddr = (void *)vma->vm_start; - *(unsigned long *)user_vaddr = vma->vm_start; - - return 0; - -error: - kmem_cache_free(vm_area_cachep, vma); -error_kmem: - pfm_rvfree(smpl_buf, size); - - return -ENOMEM; -} - -/* - * XXX: do something better here - */ -static int -pfm_bad_permissions(struct task_struct *task) -{ - /* inspired by ptrace_attach() */ - DPRINT(("cur: uid=%d gid=%d task: euid=%d suid=%d uid=%d egid=%d sgid=%d\n", - current->uid, - current->gid, - task->euid, - task->suid, - task->uid, - task->egid, - task->sgid)); - - return ((current->uid != task->euid) - || (current->uid != task->suid) - || (current->uid != task->uid) - || (current->gid != task->egid) - || (current->gid != task->sgid) - || (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE); -} - -static int -pfarg_is_sane(struct task_struct *task, pfarg_context_t *pfx) -{ - int ctx_flags; - - /* valid signal */ - - ctx_flags = pfx->ctx_flags; - - if (ctx_flags & PFM_FL_SYSTEM_WIDE) { - - /* - * cannot block in this mode - */ - if (ctx_flags & PFM_FL_NOTIFY_BLOCK) { - DPRINT(("cannot use blocking mode when in system wide monitoring\n")); - return -EINVAL; - } - } else { - } - /* probably more to add here */ - - return 0; -} - -static int -pfm_setup_buffer_fmt(struct task_struct *task, struct file *filp, pfm_context_t *ctx, unsigned int ctx_flags, - unsigned int cpu, pfarg_context_t *arg) -{ - pfm_buffer_fmt_t *fmt = NULL; - unsigned long size = 0UL; - void *uaddr = NULL; - void *fmt_arg = NULL; - int ret = 0; -#define PFM_CTXARG_BUF_ARG(a) (pfm_buffer_fmt_t *)(a+1) - - /* invoke and lock buffer format, if found */ - fmt = pfm_find_buffer_fmt(arg->ctx_smpl_buf_id); - if (fmt == NULL) { - DPRINT(("[%d] cannot find buffer format\n", task->pid)); - return -EINVAL; - } - - /* - * buffer argument MUST be contiguous to pfarg_context_t - */ - if (fmt->fmt_arg_size) fmt_arg = PFM_CTXARG_BUF_ARG(arg); - - ret = pfm_buf_fmt_validate(fmt, task, ctx_flags, cpu, fmt_arg); - - DPRINT(("[%d] after validate(0x%x,%d,%p)=%d\n", task->pid, ctx_flags, cpu, fmt_arg, ret)); - - if (ret) goto error; - - /* link buffer format and context */ - ctx->ctx_buf_fmt = fmt; - - /* - * check if buffer format wants to use perfmon buffer allocation/mapping service - */ - ret = pfm_buf_fmt_getsize(fmt, task, ctx_flags, cpu, fmt_arg, &size); - if (ret) goto error; - - if (size) { - /* - * buffer is always remapped into the caller's address space - */ - ret = pfm_smpl_buffer_alloc(current, filp, ctx, size, &uaddr); - if (ret) goto error; - - /* keep track of user address of buffer */ - arg->ctx_smpl_vaddr = uaddr; - } - ret = pfm_buf_fmt_init(fmt, task, ctx->ctx_smpl_hdr, ctx_flags, cpu, fmt_arg); - -error: - return ret; -} - -static void -pfm_reset_pmu_state(pfm_context_t *ctx) -{ - int i; - - /* - * install reset values for PMC. - */ - for (i=1; PMC_IS_LAST(i) == 0; i++) { - if (PMC_IS_IMPL(i) == 0) continue; - ctx->ctx_pmcs[i] = PMC_DFL_VAL(i); - DPRINT(("pmc[%d]=0x%lx\n", i, ctx->ctx_pmcs[i])); - } - /* - * PMD registers are set to 0UL when the context in memset() - */ - - /* - * On context switched restore, we must restore ALL pmc and ALL pmd even - * when they are not actively used by the task. In UP, the incoming process - * may otherwise pick up left over PMC, PMD state from the previous process. - * As opposed to PMD, stale PMC can cause harm to the incoming - * process because they may change what is being measured. - * Therefore, we must systematically reinstall the entire - * PMC state. In SMP, the same thing is possible on the - * same CPU but also on between 2 CPUs. - * - * The problem with PMD is information leaking especially - * to user level when psr.sp=0 - * - * There is unfortunately no easy way to avoid this problem - * on either UP or SMP. This definitively slows down the - * pfm_load_regs() function. - */ - - /* - * bitmask of all PMCs accessible to this context - * - * PMC0 is treated differently. - */ - ctx->ctx_all_pmcs[0] = pmu_conf->impl_pmcs[0] & ~0x1; - - /* - * bitmask of all PMDs that are accessible to this context - */ - ctx->ctx_all_pmds[0] = pmu_conf->impl_pmds[0]; - - DPRINT(("<%d> all_pmcs=0x%lx all_pmds=0x%lx\n", ctx->ctx_fd, ctx->ctx_all_pmcs[0],ctx->ctx_all_pmds[0])); - - /* - * useful in case of re-enable after disable - */ - ctx->ctx_used_ibrs[0] = 0UL; - ctx->ctx_used_dbrs[0] = 0UL; -} - -static int -pfm_ctx_getsize(void *arg, size_t *sz) -{ - pfarg_context_t *req = (pfarg_context_t *)arg; - pfm_buffer_fmt_t *fmt; - - *sz = 0; - - if (!pfm_uuid_cmp(req->ctx_smpl_buf_id, pfm_null_uuid)) return 0; - - fmt = pfm_find_buffer_fmt(req->ctx_smpl_buf_id); - if (fmt == NULL) { - DPRINT(("cannot find buffer format\n")); - return -EINVAL; - } - /* get just enough to copy in user parameters */ - *sz = fmt->fmt_arg_size; - DPRINT(("arg_size=%lu\n", *sz)); - - return 0; -} - - - -/* - * cannot attach if : - * - kernel task - * - task not owned by caller - * - task incompatible with context mode - */ -static int -pfm_task_incompatible(pfm_context_t *ctx, struct task_struct *task) -{ - /* - * no kernel task or task not owner by caller - */ - if (task->mm == NULL) { - DPRINT(("task [%d] has not memory context (kernel thread)\n", task->pid)); - return -EPERM; - } - if (pfm_bad_permissions(task)) { - DPRINT(("no permission to attach to [%d]\n", task->pid)); - return -EPERM; - } - /* - * cannot block in self-monitoring mode - */ - if (CTX_OVFL_NOBLOCK(ctx) == 0 && task == current) { - DPRINT(("cannot load a blocking context on self for [%d]\n", task->pid)); - return -EINVAL; - } - - if (task->exit_state == EXIT_ZOMBIE) { - DPRINT(("cannot attach to zombie task [%d]\n", task->pid)); - return -EBUSY; - } - - /* - * always ok for self - */ - if (task == current) return 0; - - if ((task->state != TASK_STOPPED) && (task->state != TASK_TRACED)) { - DPRINT(("cannot attach to non-stopped task [%d] state=%ld\n", task->pid, task->state)); - return -EBUSY; - } - /* - * make sure the task is off any CPU - */ - wait_task_inactive(task); - - /* more to come... */ - - return 0; -} - -static int -pfm_get_task(pfm_context_t *ctx, pid_t pid, struct task_struct **task) -{ - struct task_struct *p = current; - int ret; - - /* XXX: need to add more checks here */ - if (pid < 2) return -EPERM; - - if (pid != current->pid) { - - read_lock(&tasklist_lock); - - p = find_task_by_pid(pid); - - /* make sure task cannot go away while we operate on it */ - if (p) get_task_struct(p); - - read_unlock(&tasklist_lock); - - if (p == NULL) return -ESRCH; - } - - ret = pfm_task_incompatible(ctx, p); - if (ret == 0) { - *task = p; - } else if (p != current) { - pfm_put_task(p); - } - return ret; -} - - - -static int -pfm_context_create(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) -{ - pfarg_context_t *req = (pfarg_context_t *)arg; - struct file *filp; - int ctx_flags; - int ret; - - /* let's check the arguments first */ - ret = pfarg_is_sane(current, req); - if (ret < 0) return ret; - - ctx_flags = req->ctx_flags; - - ret = -ENOMEM; - - ctx = pfm_context_alloc(); - if (!ctx) goto error; - - ret = pfm_alloc_fd(&filp); - if (ret < 0) goto error_file; - - req->ctx_fd = ctx->ctx_fd = ret; - - /* - * attach context to file - */ - filp->private_data = ctx; - - /* - * does the user want to sample? - */ - if (pfm_uuid_cmp(req->ctx_smpl_buf_id, pfm_null_uuid)) { - ret = pfm_setup_buffer_fmt(current, filp, ctx, ctx_flags, 0, req); - if (ret) goto buffer_error; - } - - /* - * init context protection lock - */ - spin_lock_init(&ctx->ctx_lock); - - /* - * context is unloaded - */ - ctx->ctx_state = PFM_CTX_UNLOADED; - - /* - * initialization of context's flags - */ - ctx->ctx_fl_block = (ctx_flags & PFM_FL_NOTIFY_BLOCK) ? 1 : 0; - ctx->ctx_fl_system = (ctx_flags & PFM_FL_SYSTEM_WIDE) ? 1: 0; - ctx->ctx_fl_is_sampling = ctx->ctx_buf_fmt ? 1 : 0; /* assume record() is defined */ - ctx->ctx_fl_no_msg = (ctx_flags & PFM_FL_OVFL_NO_MSG) ? 1: 0; - /* - * will move to set properties - * ctx->ctx_fl_excl_idle = (ctx_flags & PFM_FL_EXCL_IDLE) ? 1: 0; - */ - - /* - * init restart semaphore to locked - */ - init_completion(&ctx->ctx_restart_done); - - /* - * activation is used in SMP only - */ - ctx->ctx_last_activation = PFM_INVALID_ACTIVATION; - SET_LAST_CPU(ctx, -1); - - /* - * initialize notification message queue - */ - ctx->ctx_msgq_head = ctx->ctx_msgq_tail = 0; - init_waitqueue_head(&ctx->ctx_msgq_wait); - init_waitqueue_head(&ctx->ctx_zombieq); - - DPRINT(("ctx=%p flags=0x%x system=%d notify_block=%d excl_idle=%d no_msg=%d ctx_fd=%d \n", - ctx, - ctx_flags, - ctx->ctx_fl_system, - ctx->ctx_fl_block, - ctx->ctx_fl_excl_idle, - ctx->ctx_fl_no_msg, - ctx->ctx_fd)); - - /* - * initialize soft PMU state - */ - pfm_reset_pmu_state(ctx); - - return 0; - -buffer_error: - pfm_free_fd(ctx->ctx_fd, filp); - - if (ctx->ctx_buf_fmt) { - pfm_buf_fmt_exit(ctx->ctx_buf_fmt, current, NULL, regs); - } -error_file: - pfm_context_free(ctx); - -error: - return ret; -} - -static inline unsigned long -pfm_new_counter_value (pfm_counter_t *reg, int is_long_reset) -{ - unsigned long val = is_long_reset ? reg->long_reset : reg->short_reset; - unsigned long new_seed, old_seed = reg->seed, mask = reg->mask; - extern unsigned long carta_random32 (unsigned long seed); - - if (reg->flags & PFM_REGFL_RANDOM) { - new_seed = carta_random32(old_seed); - val -= (old_seed & mask); /* counter values are negative numbers! */ - if ((mask >> 32) != 0) - /* construct a full 64-bit random value: */ - new_seed |= carta_random32(old_seed >> 32) << 32; - reg->seed = new_seed; - } - reg->lval = val; - return val; -} - -static void -pfm_reset_regs_masked(pfm_context_t *ctx, unsigned long *ovfl_regs, int is_long_reset) -{ - unsigned long mask = ovfl_regs[0]; - unsigned long reset_others = 0UL; - unsigned long val; - int i; - - /* - * now restore reset value on sampling overflowed counters - */ - mask >>= PMU_FIRST_COUNTER; - for(i = PMU_FIRST_COUNTER; mask; i++, mask >>= 1) { - - if ((mask & 0x1UL) == 0UL) continue; - - ctx->ctx_pmds[i].val = val = pfm_new_counter_value(ctx->ctx_pmds+ i, is_long_reset); - reset_others |= ctx->ctx_pmds[i].reset_pmds[0]; - - DPRINT_ovfl((" %s reset ctx_pmds[%d]=%lx\n", is_long_reset ? "long" : "short", i, val)); - } - - /* - * Now take care of resetting the other registers - */ - for(i = 0; reset_others; i++, reset_others >>= 1) { - - if ((reset_others & 0x1) == 0) continue; - - ctx->ctx_pmds[i].val = val = pfm_new_counter_value(ctx->ctx_pmds + i, is_long_reset); - - DPRINT_ovfl(("%s reset_others pmd[%d]=%lx\n", - is_long_reset ? "long" : "short", i, val)); - } -} - -static void -pfm_reset_regs(pfm_context_t *ctx, unsigned long *ovfl_regs, int is_long_reset) -{ - unsigned long mask = ovfl_regs[0]; - unsigned long reset_others = 0UL; - unsigned long val; - int i; - - DPRINT_ovfl(("ovfl_regs=0x%lx is_long_reset=%d\n", ovfl_regs[0], is_long_reset)); - - if (ctx->ctx_state == PFM_CTX_MASKED) { - pfm_reset_regs_masked(ctx, ovfl_regs, is_long_reset); - return; - } - - /* - * now restore reset value on sampling overflowed counters - */ - mask >>= PMU_FIRST_COUNTER; - for(i = PMU_FIRST_COUNTER; mask; i++, mask >>= 1) { - - if ((mask & 0x1UL) == 0UL) continue; - - val = pfm_new_counter_value(ctx->ctx_pmds+ i, is_long_reset); - reset_others |= ctx->ctx_pmds[i].reset_pmds[0]; - - DPRINT_ovfl((" %s reset ctx_pmds[%d]=%lx\n", is_long_reset ? "long" : "short", i, val)); - - pfm_write_soft_counter(ctx, i, val); - } - - /* - * Now take care of resetting the other registers - */ - for(i = 0; reset_others; i++, reset_others >>= 1) { - - if ((reset_others & 0x1) == 0) continue; - - val = pfm_new_counter_value(ctx->ctx_pmds + i, is_long_reset); - - if (PMD_IS_COUNTING(i)) { - pfm_write_soft_counter(ctx, i, val); - } else { - ia64_set_pmd(i, val); - } - DPRINT_ovfl(("%s reset_others pmd[%d]=%lx\n", - is_long_reset ? "long" : "short", i, val)); - } - ia64_srlz_d(); -} - -static int -pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) -{ - struct task_struct *task; - pfarg_reg_t *req = (pfarg_reg_t *)arg; - unsigned long value, pmc_pm; - unsigned long smpl_pmds, reset_pmds, impl_pmds; - unsigned int cnum, reg_flags, flags, pmc_type; - int i, can_access_pmu = 0, is_loaded, is_system, expert_mode; - int is_monitor, is_counting, state; - int ret = -EINVAL; - pfm_reg_check_t wr_func; -#define PFM_CHECK_PMC_PM(x, y, z) ((x)->ctx_fl_system ^ PMC_PM(y, z)) - - state = ctx->ctx_state; - is_loaded = state == PFM_CTX_LOADED ? 1 : 0; - is_system = ctx->ctx_fl_system; - task = ctx->ctx_task; - impl_pmds = pmu_conf->impl_pmds[0]; - - if (state == PFM_CTX_ZOMBIE) return -EINVAL; - - if (is_loaded) { - /* - * In system wide and when the context is loaded, access can only happen - * when the caller is running on the CPU being monitored by the session. - * It does not have to be the owner (ctx_task) of the context per se. - */ - if (is_system && ctx->ctx_cpu != smp_processor_id()) { - DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); - return -EBUSY; - } - can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0; - } - expert_mode = pfm_sysctl.expert_mode; - - for (i = 0; i < count; i++, req++) { - - cnum = req->reg_num; - reg_flags = req->reg_flags; - value = req->reg_value; - smpl_pmds = req->reg_smpl_pmds[0]; - reset_pmds = req->reg_reset_pmds[0]; - flags = 0; - - - if (cnum >= PMU_MAX_PMCS) { - DPRINT(("pmc%u is invalid\n", cnum)); - goto error; - } - - pmc_type = pmu_conf->pmc_desc[cnum].type; - pmc_pm = (value >> pmu_conf->pmc_desc[cnum].pm_pos) & 0x1; - is_counting = (pmc_type & PFM_REG_COUNTING) == PFM_REG_COUNTING ? 1 : 0; - is_monitor = (pmc_type & PFM_REG_MONITOR) == PFM_REG_MONITOR ? 1 : 0; - - /* - * we reject all non implemented PMC as well - * as attempts to modify PMC[0-3] which are used - * as status registers by the PMU - */ - if ((pmc_type & PFM_REG_IMPL) == 0 || (pmc_type & PFM_REG_CONTROL) == PFM_REG_CONTROL) { - DPRINT(("pmc%u is unimplemented or no-access pmc_type=%x\n", cnum, pmc_type)); - goto error; - } - wr_func = pmu_conf->pmc_desc[cnum].write_check; - /* - * If the PMC is a monitor, then if the value is not the default: - * - system-wide session: PMCx.pm=1 (privileged monitor) - * - per-task : PMCx.pm=0 (user monitor) - */ - if (is_monitor && value != PMC_DFL_VAL(cnum) && is_system ^ pmc_pm) { - DPRINT(("pmc%u pmc_pm=%lu is_system=%d\n", - cnum, - pmc_pm, - is_system)); - goto error; - } - - if (is_counting) { - /* - * enforce generation of overflow interrupt. Necessary on all - * CPUs. - */ - value |= 1 << PMU_PMC_OI; - - if (reg_flags & PFM_REGFL_OVFL_NOTIFY) { - flags |= PFM_REGFL_OVFL_NOTIFY; - } - - if (reg_flags & PFM_REGFL_RANDOM) flags |= PFM_REGFL_RANDOM; - - /* verify validity of smpl_pmds */ - if ((smpl_pmds & impl_pmds) != smpl_pmds) { - DPRINT(("invalid smpl_pmds 0x%lx for pmc%u\n", smpl_pmds, cnum)); - goto error; - } - - /* verify validity of reset_pmds */ - if ((reset_pmds & impl_pmds) != reset_pmds) { - DPRINT(("invalid reset_pmds 0x%lx for pmc%u\n", reset_pmds, cnum)); - goto error; - } - } else { - if (reg_flags & (PFM_REGFL_OVFL_NOTIFY|PFM_REGFL_RANDOM)) { - DPRINT(("cannot set ovfl_notify or random on pmc%u\n", cnum)); - goto error; - } - /* eventid on non-counting monitors are ignored */ - } - - /* - * execute write checker, if any - */ - if (likely(expert_mode == 0 && wr_func)) { - ret = (*wr_func)(task, ctx, cnum, &value, regs); - if (ret) goto error; - ret = -EINVAL; - } - - /* - * no error on this register - */ - PFM_REG_RETFLAG_SET(req->reg_flags, 0); - - /* - * Now we commit the changes to the software state - */ - - /* - * update overflow information - */ - if (is_counting) { - /* - * full flag update each time a register is programmed - */ - ctx->ctx_pmds[cnum].flags = flags; - - ctx->ctx_pmds[cnum].reset_pmds[0] = reset_pmds; - ctx->ctx_pmds[cnum].smpl_pmds[0] = smpl_pmds; - ctx->ctx_pmds[cnum].eventid = req->reg_smpl_eventid; - - /* - * Mark all PMDS to be accessed as used. - * - * We do not keep track of PMC because we have to - * systematically restore ALL of them. - * - * We do not update the used_monitors mask, because - * if we have not programmed them, then will be in - * a quiescent state, therefore we will not need to - * mask/restore then when context is MASKED. - */ - CTX_USED_PMD(ctx, reset_pmds); - CTX_USED_PMD(ctx, smpl_pmds); - /* - * make sure we do not try to reset on - * restart because we have established new values - */ - if (state == PFM_CTX_MASKED) ctx->ctx_ovfl_regs[0] &= ~1UL << cnum; - } - /* - * Needed in case the user does not initialize the equivalent - * PMD. Clearing is done indirectly via pfm_reset_pmu_state() so there is no - * possible leak here. - */ - CTX_USED_PMD(ctx, pmu_conf->pmc_desc[cnum].dep_pmd[0]); - - /* - * keep track of the monitor PMC that we are using. - * we save the value of the pmc in ctx_pmcs[] and if - * the monitoring is not stopped for the context we also - * place it in the saved state area so that it will be - * picked up later by the context switch code. - * - * The value in ctx_pmcs[] can only be changed in pfm_write_pmcs(). - * - * The value in th_pmcs[] may be modified on overflow, i.e., when - * monitoring needs to be stopped. - */ - if (is_monitor) CTX_USED_MONITOR(ctx, 1UL << cnum); - - /* - * update context state - */ - ctx->ctx_pmcs[cnum] = value; - - if (is_loaded) { - /* - * write thread state - */ - if (is_system == 0) ctx->th_pmcs[cnum] = value; - - /* - * write hardware register if we can - */ - if (can_access_pmu) { - ia64_set_pmc(cnum, value); - } -#ifdef CONFIG_SMP - else { - /* - * per-task SMP only here - * - * we are guaranteed that the task is not running on the other CPU, - * we indicate that this PMD will need to be reloaded if the task - * is rescheduled on the CPU it ran last on. - */ - ctx->ctx_reload_pmcs[0] |= 1UL << cnum; - } -#endif - } - - DPRINT(("pmc[%u]=0x%lx ld=%d apmu=%d flags=0x%x all_pmcs=0x%lx used_pmds=0x%lx eventid=%ld smpl_pmds=0x%lx reset_pmds=0x%lx reloads_pmcs=0x%lx used_monitors=0x%lx ovfl_regs=0x%lx\n", - cnum, - value, - is_loaded, - can_access_pmu, - flags, - ctx->ctx_all_pmcs[0], - ctx->ctx_used_pmds[0], - ctx->ctx_pmds[cnum].eventid, - smpl_pmds, - reset_pmds, - ctx->ctx_reload_pmcs[0], - ctx->ctx_used_monitors[0], - ctx->ctx_ovfl_regs[0])); - } - - /* - * make sure the changes are visible - */ - if (can_access_pmu) ia64_srlz_d(); - - return 0; -error: - PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL); - return ret; -} - -static int -pfm_write_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) -{ - struct task_struct *task; - pfarg_reg_t *req = (pfarg_reg_t *)arg; - unsigned long value, hw_value, ovfl_mask; - unsigned int cnum; - int i, can_access_pmu = 0, state; - int is_counting, is_loaded, is_system, expert_mode; - int ret = -EINVAL; - pfm_reg_check_t wr_func; - - - state = ctx->ctx_state; - is_loaded = state == PFM_CTX_LOADED ? 1 : 0; - is_system = ctx->ctx_fl_system; - ovfl_mask = pmu_conf->ovfl_val; - task = ctx->ctx_task; - - if (unlikely(state == PFM_CTX_ZOMBIE)) return -EINVAL; - - /* - * on both UP and SMP, we can only write to the PMC when the task is - * the owner of the local PMU. - */ - if (likely(is_loaded)) { - /* - * In system wide and when the context is loaded, access can only happen - * when the caller is running on the CPU being monitored by the session. - * It does not have to be the owner (ctx_task) of the context per se. - */ - if (unlikely(is_system && ctx->ctx_cpu != smp_processor_id())) { - DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); - return -EBUSY; - } - can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0; - } - expert_mode = pfm_sysctl.expert_mode; - - for (i = 0; i < count; i++, req++) { - - cnum = req->reg_num; - value = req->reg_value; - - if (!PMD_IS_IMPL(cnum)) { - DPRINT(("pmd[%u] is unimplemented or invalid\n", cnum)); - goto abort_mission; - } - is_counting = PMD_IS_COUNTING(cnum); - wr_func = pmu_conf->pmd_desc[cnum].write_check; - - /* - * execute write checker, if any - */ - if (unlikely(expert_mode == 0 && wr_func)) { - unsigned long v = value; - - ret = (*wr_func)(task, ctx, cnum, &v, regs); - if (ret) goto abort_mission; - - value = v; - ret = -EINVAL; - } - - /* - * no error on this register - */ - PFM_REG_RETFLAG_SET(req->reg_flags, 0); - - /* - * now commit changes to software state - */ - hw_value = value; - - /* - * update virtualized (64bits) counter - */ - if (is_counting) { - /* - * write context state - */ - ctx->ctx_pmds[cnum].lval = value; - - /* - * when context is load we use the split value - */ - if (is_loaded) { - hw_value = value & ovfl_mask; - value = value & ~ovfl_mask; - } - } - /* - * update reset values (not just for counters) - */ - ctx->ctx_pmds[cnum].long_reset = req->reg_long_reset; - ctx->ctx_pmds[cnum].short_reset = req->reg_short_reset; - - /* - * update randomization parameters (not just for counters) - */ - ctx->ctx_pmds[cnum].seed = req->reg_random_seed; - ctx->ctx_pmds[cnum].mask = req->reg_random_mask; - - /* - * update context value - */ - ctx->ctx_pmds[cnum].val = value; - - /* - * Keep track of what we use - * - * We do not keep track of PMC because we have to - * systematically restore ALL of them. - */ - CTX_USED_PMD(ctx, PMD_PMD_DEP(cnum)); - - /* - * mark this PMD register used as well - */ - CTX_USED_PMD(ctx, RDEP(cnum)); - - /* - * make sure we do not try to reset on - * restart because we have established new values - */ - if (is_counting && state == PFM_CTX_MASKED) { - ctx->ctx_ovfl_regs[0] &= ~1UL << cnum; - } - - if (is_loaded) { - /* - * write thread state - */ - if (is_system == 0) ctx->th_pmds[cnum] = hw_value; - - /* - * write hardware register if we can - */ - if (can_access_pmu) { - ia64_set_pmd(cnum, hw_value); - } else { -#ifdef CONFIG_SMP - /* - * we are guaranteed that the task is not running on the other CPU, - * we indicate that this PMD will need to be reloaded if the task - * is rescheduled on the CPU it ran last on. - */ - ctx->ctx_reload_pmds[0] |= 1UL << cnum; -#endif - } - } - - DPRINT(("pmd[%u]=0x%lx ld=%d apmu=%d, hw_value=0x%lx ctx_pmd=0x%lx short_reset=0x%lx " - "long_reset=0x%lx notify=%c seed=0x%lx mask=0x%lx used_pmds=0x%lx reset_pmds=0x%lx reload_pmds=0x%lx all_pmds=0x%lx ovfl_regs=0x%lx\n", - cnum, - value, - is_loaded, - can_access_pmu, - hw_value, - ctx->ctx_pmds[cnum].val, - ctx->ctx_pmds[cnum].short_reset, - ctx->ctx_pmds[cnum].long_reset, - PMC_OVFL_NOTIFY(ctx, cnum) ? 'Y':'N', - ctx->ctx_pmds[cnum].seed, - ctx->ctx_pmds[cnum].mask, - ctx->ctx_used_pmds[0], - ctx->ctx_pmds[cnum].reset_pmds[0], - ctx->ctx_reload_pmds[0], - ctx->ctx_all_pmds[0], - ctx->ctx_ovfl_regs[0])); - } - - /* - * make changes visible - */ - if (can_access_pmu) ia64_srlz_d(); - - return 0; - -abort_mission: - /* - * for now, we have only one possibility for error - */ - PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL); - return ret; -} - -/* - * By the way of PROTECT_CONTEXT(), interrupts are masked while we are in this function. - * Therefore we know, we do not have to worry about the PMU overflow interrupt. If an - * interrupt is delivered during the call, it will be kept pending until we leave, making - * it appears as if it had been generated at the UNPROTECT_CONTEXT(). At least we are - * guaranteed to return consistent data to the user, it may simply be old. It is not - * trivial to treat the overflow while inside the call because you may end up in - * some module sampling buffer code causing deadlocks. - */ -static int -pfm_read_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) -{ - struct task_struct *task; - unsigned long val = 0UL, lval, ovfl_mask, sval; - pfarg_reg_t *req = (pfarg_reg_t *)arg; - unsigned int cnum, reg_flags = 0; - int i, can_access_pmu = 0, state; - int is_loaded, is_system, is_counting, expert_mode; - int ret = -EINVAL; - pfm_reg_check_t rd_func; - - /* - * access is possible when loaded only for - * self-monitoring tasks or in UP mode - */ - - state = ctx->ctx_state; - is_loaded = state == PFM_CTX_LOADED ? 1 : 0; - is_system = ctx->ctx_fl_system; - ovfl_mask = pmu_conf->ovfl_val; - task = ctx->ctx_task; - - if (state == PFM_CTX_ZOMBIE) return -EINVAL; - - if (likely(is_loaded)) { - /* - * In system wide and when the context is loaded, access can only happen - * when the caller is running on the CPU being monitored by the session. - * It does not have to be the owner (ctx_task) of the context per se. - */ - if (unlikely(is_system && ctx->ctx_cpu != smp_processor_id())) { - DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); - return -EBUSY; - } - /* - * this can be true when not self-monitoring only in UP - */ - can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0; - - if (can_access_pmu) ia64_srlz_d(); - } - expert_mode = pfm_sysctl.expert_mode; - - DPRINT(("ld=%d apmu=%d ctx_state=%d\n", - is_loaded, - can_access_pmu, - state)); - - /* - * on both UP and SMP, we can only read the PMD from the hardware register when - * the task is the owner of the local PMU. - */ - - for (i = 0; i < count; i++, req++) { - - cnum = req->reg_num; - reg_flags = req->reg_flags; - - if (unlikely(!PMD_IS_IMPL(cnum))) goto error; - /* - * we can only read the register that we use. That includes - * the one we explicitly initialize AND the one we want included - * in the sampling buffer (smpl_regs). - * - * Having this restriction allows optimization in the ctxsw routine - * without compromising security (leaks) - */ - if (unlikely(!CTX_IS_USED_PMD(ctx, cnum))) goto error; - - sval = ctx->ctx_pmds[cnum].val; - lval = ctx->ctx_pmds[cnum].lval; - is_counting = PMD_IS_COUNTING(cnum); - - /* - * If the task is not the current one, then we check if the - * PMU state is still in the local live register due to lazy ctxsw. - * If true, then we read directly from the registers. - */ - if (can_access_pmu){ - val = ia64_get_pmd(cnum); - } else { - /* - * context has been saved - * if context is zombie, then task does not exist anymore. - * In this case, we use the full value saved in the context (pfm_flush_regs()). - */ - val = is_loaded ? ctx->th_pmds[cnum] : 0UL; - } - rd_func = pmu_conf->pmd_desc[cnum].read_check; - - if (is_counting) { - /* - * XXX: need to check for overflow when loaded - */ - val &= ovfl_mask; - val += sval; - } - - /* - * execute read checker, if any - */ - if (unlikely(expert_mode == 0 && rd_func)) { - unsigned long v = val; - ret = (*rd_func)(ctx->ctx_task, ctx, cnum, &v, regs); - if (ret) goto error; - val = v; - ret = -EINVAL; - } - - PFM_REG_RETFLAG_SET(reg_flags, 0); - - DPRINT(("pmd[%u]=0x%lx\n", cnum, val)); - - /* - * update register return value, abort all if problem during copy. - * we only modify the reg_flags field. no check mode is fine because - * access has been verified upfront in sys_perfmonctl(). - */ - req->reg_value = val; - req->reg_flags = reg_flags; - req->reg_last_reset_val = lval; - } - - return 0; - -error: - PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL); - return ret; -} - -int -pfm_mod_write_pmcs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs) -{ - pfm_context_t *ctx; - - if (req == NULL) return -EINVAL; - - ctx = GET_PMU_CTX(); - - if (ctx == NULL) return -EINVAL; - - /* - * for now limit to current task, which is enough when calling - * from overflow handler - */ - if (task != current && ctx->ctx_fl_system == 0) return -EBUSY; - - return pfm_write_pmcs(ctx, req, nreq, regs); -} -EXPORT_SYMBOL(pfm_mod_write_pmcs); - -int -pfm_mod_read_pmds(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs) -{ - pfm_context_t *ctx; - - if (req == NULL) return -EINVAL; - - ctx = GET_PMU_CTX(); - - if (ctx == NULL) return -EINVAL; - - /* - * for now limit to current task, which is enough when calling - * from overflow handler - */ - if (task != current && ctx->ctx_fl_system == 0) return -EBUSY; - - return pfm_read_pmds(ctx, req, nreq, regs); -} -EXPORT_SYMBOL(pfm_mod_read_pmds); - -/* - * Only call this function when a process it trying to - * write the debug registers (reading is always allowed) - */ -int -pfm_use_debug_registers(struct task_struct *task) -{ - pfm_context_t *ctx = task->thread.pfm_context; - unsigned long flags; - int ret = 0; - - if (pmu_conf->use_rr_dbregs == 0) return 0; - - DPRINT(("called for [%d]\n", task->pid)); - - /* - * do it only once - */ - if (task->thread.flags & IA64_THREAD_DBG_VALID) return 0; - - /* - * Even on SMP, we do not need to use an atomic here because - * the only way in is via ptrace() and this is possible only when the - * process is stopped. Even in the case where the ctxsw out is not totally - * completed by the time we come here, there is no way the 'stopped' process - * could be in the middle of fiddling with the pfm_write_ibr_dbr() routine. - * So this is always safe. - */ - if (ctx && ctx->ctx_fl_using_dbreg == 1) return -1; - - LOCK_PFS(flags); - - /* - * We cannot allow setting breakpoints when system wide monitoring - * sessions are using the debug registers. - */ - if (pfm_sessions.pfs_sys_use_dbregs> 0) - ret = -1; - else - pfm_sessions.pfs_ptrace_use_dbregs++; - - DPRINT(("ptrace_use_dbregs=%u sys_use_dbregs=%u by [%d] ret = %d\n", - pfm_sessions.pfs_ptrace_use_dbregs, - pfm_sessions.pfs_sys_use_dbregs, - task->pid, ret)); - - UNLOCK_PFS(flags); - - return ret; -} - -/* - * This function is called for every task that exits with the - * IA64_THREAD_DBG_VALID set. This indicates a task which was - * able to use the debug registers for debugging purposes via - * ptrace(). Therefore we know it was not using them for - * perfmormance monitoring, so we only decrement the number - * of "ptraced" debug register users to keep the count up to date - */ -int -pfm_release_debug_registers(struct task_struct *task) -{ - unsigned long flags; - int ret; - - if (pmu_conf->use_rr_dbregs == 0) return 0; - - LOCK_PFS(flags); - if (pfm_sessions.pfs_ptrace_use_dbregs == 0) { - printk(KERN_ERR "perfmon: invalid release for [%d] ptrace_use_dbregs=0\n", task->pid); - ret = -1; - } else { - pfm_sessions.pfs_ptrace_use_dbregs--; - ret = 0; - } - UNLOCK_PFS(flags); - - return ret; -} - -static int -pfm_restart(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) -{ - struct task_struct *task; - pfm_buffer_fmt_t *fmt; - pfm_ovfl_ctrl_t rst_ctrl; - int state, is_system; - int ret = 0; - - state = ctx->ctx_state; - fmt = ctx->ctx_buf_fmt; - is_system = ctx->ctx_fl_system; - task = PFM_CTX_TASK(ctx); - - switch(state) { - case PFM_CTX_MASKED: - break; - case PFM_CTX_LOADED: - if (CTX_HAS_SMPL(ctx) && fmt->fmt_restart_active) break; - /* fall through */ - case PFM_CTX_UNLOADED: - case PFM_CTX_ZOMBIE: - DPRINT(("invalid state=%d\n", state)); - return -EBUSY; - default: - DPRINT(("state=%d, cannot operate (no active_restart handler)\n", state)); - return -EINVAL; - } - - /* - * In system wide and when the context is loaded, access can only happen - * when the caller is running on the CPU being monitored by the session. - * It does not have to be the owner (ctx_task) of the context per se. - */ - if (is_system && ctx->ctx_cpu != smp_processor_id()) { - DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); - return -EBUSY; - } - - /* sanity check */ - if (unlikely(task == NULL)) { - printk(KERN_ERR "perfmon: [%d] pfm_restart no task\n", current->pid); - return -EINVAL; - } - - if (task == current || is_system) { - - fmt = ctx->ctx_buf_fmt; - - DPRINT(("restarting self %d ovfl=0x%lx\n", - task->pid, - ctx->ctx_ovfl_regs[0])); - - if (CTX_HAS_SMPL(ctx)) { - - prefetch(ctx->ctx_smpl_hdr); - - rst_ctrl.bits.mask_monitoring = 0; - rst_ctrl.bits.reset_ovfl_pmds = 0; - - if (state == PFM_CTX_LOADED) - ret = pfm_buf_fmt_restart_active(fmt, task, &rst_ctrl, ctx->ctx_smpl_hdr, regs); - else - ret = pfm_buf_fmt_restart(fmt, task, &rst_ctrl, ctx->ctx_smpl_hdr, regs); - } else { - rst_ctrl.bits.mask_monitoring = 0; - rst_ctrl.bits.reset_ovfl_pmds = 1; - } - - if (ret == 0) { - if (rst_ctrl.bits.reset_ovfl_pmds) - pfm_reset_regs(ctx, ctx->ctx_ovfl_regs, PFM_PMD_LONG_RESET); - - if (rst_ctrl.bits.mask_monitoring == 0) { - DPRINT(("resuming monitoring for [%d]\n", task->pid)); - - if (state == PFM_CTX_MASKED) pfm_restore_monitoring(task); - } else { - DPRINT(("keeping monitoring stopped for [%d]\n", task->pid)); - - // cannot use pfm_stop_monitoring(task, regs); - } - } - /* - * clear overflowed PMD mask to remove any stale information - */ - ctx->ctx_ovfl_regs[0] = 0UL; - - /* - * back to LOADED state - */ - ctx->ctx_state = PFM_CTX_LOADED; - - /* - * XXX: not really useful for self monitoring - */ - ctx->ctx_fl_can_restart = 0; - - return 0; - } - - /* - * restart another task - */ - - /* - * When PFM_CTX_MASKED, we cannot issue a restart before the previous - * one is seen by the task. - */ - if (state == PFM_CTX_MASKED) { - if (ctx->ctx_fl_can_restart == 0) return -EINVAL; - /* - * will prevent subsequent restart before this one is - * seen by other task - */ - ctx->ctx_fl_can_restart = 0; - } - - /* - * if blocking, then post the semaphore is PFM_CTX_MASKED, i.e. - * the task is blocked or on its way to block. That's the normal - * restart path. If the monitoring is not masked, then the task - * can be actively monitoring and we cannot directly intervene. - * Therefore we use the trap mechanism to catch the task and - * force it to reset the buffer/reset PMDs. - * - * if non-blocking, then we ensure that the task will go into - * pfm_handle_work() before returning to user mode. - * - * We cannot explicitly reset another task, it MUST always - * be done by the task itself. This works for system wide because - * the tool that is controlling the session is logically doing - * "self-monitoring". - */ - if (CTX_OVFL_NOBLOCK(ctx) == 0 && state == PFM_CTX_MASKED) { - DPRINT(("unblocking [%d] \n", task->pid)); - complete(&ctx->ctx_restart_done); - } else { - DPRINT(("[%d] armed exit trap\n", task->pid)); - - ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_RESET; - - PFM_SET_WORK_PENDING(task, 1); - - pfm_set_task_notify(task); - - /* - * XXX: send reschedule if task runs on another CPU - */ - } - return 0; -} - -static int -pfm_debug(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) -{ - unsigned int m = *(unsigned int *)arg; - - pfm_sysctl.debug = m == 0 ? 0 : 1; - - printk(KERN_INFO "perfmon debugging %s (timing reset)\n", pfm_sysctl.debug ? "on" : "off"); - - if (m == 0) { - memset(pfm_stats, 0, sizeof(pfm_stats)); - for(m=0; m < NR_CPUS; m++) pfm_stats[m].pfm_ovfl_intr_cycles_min = ~0UL; - } - return 0; -} - -/* - * arg can be NULL and count can be zero for this function - */ -static int -pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) -{ - struct thread_struct *thread = NULL; - struct task_struct *task; - pfarg_dbreg_t *req = (pfarg_dbreg_t *)arg; - unsigned long flags; - dbreg_t dbreg; - unsigned int rnum; - int first_time; - int ret = 0, state; - int i, can_access_pmu = 0; - int is_system, is_loaded; - - if (pmu_conf->use_rr_dbregs == 0) return -EINVAL; - - state = ctx->ctx_state; - is_loaded = state == PFM_CTX_LOADED ? 1 : 0; - is_system = ctx->ctx_fl_system; - task = ctx->ctx_task; - - if (state == PFM_CTX_ZOMBIE) return -EINVAL; - - /* - * on both UP and SMP, we can only write to the PMC when the task is - * the owner of the local PMU. - */ - if (is_loaded) { - thread = &task->thread; - /* - * In system wide and when the context is loaded, access can only happen - * when the caller is running on the CPU being monitored by the session. - * It does not have to be the owner (ctx_task) of the context per se. - */ - if (unlikely(is_system && ctx->ctx_cpu != smp_processor_id())) { - DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); - return -EBUSY; - } - can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0; - } - - /* - * we do not need to check for ipsr.db because we do clear ibr.x, dbr.r, and dbr.w - * ensuring that no real breakpoint can be installed via this call. - * - * IMPORTANT: regs can be NULL in this function - */ - - first_time = ctx->ctx_fl_using_dbreg == 0; - - /* - * don't bother if we are loaded and task is being debugged - */ - if (is_loaded && (thread->flags & IA64_THREAD_DBG_VALID) != 0) { - DPRINT(("debug registers already in use for [%d]\n", task->pid)); - return -EBUSY; - } - - /* - * check for debug registers in system wide mode - * - * If though a check is done in pfm_context_load(), - * we must repeat it here, in case the registers are - * written after the context is loaded - */ - if (is_loaded) { - LOCK_PFS(flags); - - if (first_time && is_system) { - if (pfm_sessions.pfs_ptrace_use_dbregs) - ret = -EBUSY; - else - pfm_sessions.pfs_sys_use_dbregs++; - } - UNLOCK_PFS(flags); - } - - if (ret != 0) return ret; - - /* - * mark ourself as user of the debug registers for - * perfmon purposes. - */ - ctx->ctx_fl_using_dbreg = 1; - - /* - * clear hardware registers to make sure we don't - * pick up stale state. - * - * for a system wide session, we do not use - * thread.dbr, thread.ibr because this process - * never leaves the current CPU and the state - * is shared by all processes running on it - */ - if (first_time && can_access_pmu) { - DPRINT(("[%d] clearing ibrs, dbrs\n", task->pid)); - for (i=0; i < pmu_conf->num_ibrs; i++) { - ia64_set_ibr(i, 0UL); - ia64_dv_serialize_instruction(); - } - ia64_srlz_i(); - for (i=0; i < pmu_conf->num_dbrs; i++) { - ia64_set_dbr(i, 0UL); - ia64_dv_serialize_data(); - } - ia64_srlz_d(); - } - - /* - * Now install the values into the registers - */ - for (i = 0; i < count; i++, req++) { - - rnum = req->dbreg_num; - dbreg.val = req->dbreg_value; - - ret = -EINVAL; - - if ((mode == PFM_CODE_RR && rnum >= PFM_NUM_IBRS) || ((mode == PFM_DATA_RR) && rnum >= PFM_NUM_DBRS)) { - DPRINT(("invalid register %u val=0x%lx mode=%d i=%d count=%d\n", - rnum, dbreg.val, mode, i, count)); - - goto abort_mission; - } - - /* - * make sure we do not install enabled breakpoint - */ - if (rnum & 0x1) { - if (mode == PFM_CODE_RR) - dbreg.ibr.ibr_x = 0; - else - dbreg.dbr.dbr_r = dbreg.dbr.dbr_w = 0; - } - - PFM_REG_RETFLAG_SET(req->dbreg_flags, 0); - - /* - * Debug registers, just like PMC, can only be modified - * by a kernel call. Moreover, perfmon() access to those - * registers are centralized in this routine. The hardware - * does not modify the value of these registers, therefore, - * if we save them as they are written, we can avoid having - * to save them on context switch out. This is made possible - * by the fact that when perfmon uses debug registers, ptrace() - * won't be able to modify them concurrently. - */ - if (mode == PFM_CODE_RR) { - CTX_USED_IBR(ctx, rnum); - - if (can_access_pmu) { - ia64_set_ibr(rnum, dbreg.val); - ia64_dv_serialize_instruction(); - } - - ctx->ctx_ibrs[rnum] = dbreg.val; - - DPRINT(("write ibr%u=0x%lx used_ibrs=0x%x ld=%d apmu=%d\n", - rnum, dbreg.val, ctx->ctx_used_ibrs[0], is_loaded, can_access_pmu)); - } else { - CTX_USED_DBR(ctx, rnum); - - if (can_access_pmu) { - ia64_set_dbr(rnum, dbreg.val); - ia64_dv_serialize_data(); - } - ctx->ctx_dbrs[rnum] = dbreg.val; - - DPRINT(("write dbr%u=0x%lx used_dbrs=0x%x ld=%d apmu=%d\n", - rnum, dbreg.val, ctx->ctx_used_dbrs[0], is_loaded, can_access_pmu)); - } - } - - return 0; - -abort_mission: - /* - * in case it was our first attempt, we undo the global modifications - */ - if (first_time) { - LOCK_PFS(flags); - if (ctx->ctx_fl_system) { - pfm_sessions.pfs_sys_use_dbregs--; - } - UNLOCK_PFS(flags); - ctx->ctx_fl_using_dbreg = 0; - } - /* - * install error return flag - */ - PFM_REG_RETFLAG_SET(req->dbreg_flags, PFM_REG_RETFL_EINVAL); - - return ret; -} - -static int -pfm_write_ibrs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) -{ - return pfm_write_ibr_dbr(PFM_CODE_RR, ctx, arg, count, regs); -} - -static int -pfm_write_dbrs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) -{ - return pfm_write_ibr_dbr(PFM_DATA_RR, ctx, arg, count, regs); -} - -int -pfm_mod_write_ibrs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs) -{ - pfm_context_t *ctx; - - if (req == NULL) return -EINVAL; - - ctx = GET_PMU_CTX(); - - if (ctx == NULL) return -EINVAL; - - /* - * for now limit to current task, which is enough when calling - * from overflow handler - */ - if (task != current && ctx->ctx_fl_system == 0) return -EBUSY; - - return pfm_write_ibrs(ctx, req, nreq, regs); -} -EXPORT_SYMBOL(pfm_mod_write_ibrs); - -int -pfm_mod_write_dbrs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs) -{ - pfm_context_t *ctx; - - if (req == NULL) return -EINVAL; - - ctx = GET_PMU_CTX(); - - if (ctx == NULL) return -EINVAL; - - /* - * for now limit to current task, which is enough when calling - * from overflow handler - */ - if (task != current && ctx->ctx_fl_system == 0) return -EBUSY; - - return pfm_write_dbrs(ctx, req, nreq, regs); -} -EXPORT_SYMBOL(pfm_mod_write_dbrs); - - -static int -pfm_get_features(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) -{ - pfarg_features_t *req = (pfarg_features_t *)arg; - - req->ft_version = PFM_VERSION; - return 0; -} - -static int -pfm_stop(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) -{ - struct pt_regs *tregs; - struct task_struct *task = PFM_CTX_TASK(ctx); - int state, is_system; - - state = ctx->ctx_state; - is_system = ctx->ctx_fl_system; - - /* - * context must be attached to issue the stop command (includes LOADED,MASKED,ZOMBIE) - */ - if (state == PFM_CTX_UNLOADED) return -EINVAL; - - /* - * In system wide and when the context is loaded, access can only happen - * when the caller is running on the CPU being monitored by the session. - * It does not have to be the owner (ctx_task) of the context per se. - */ - if (is_system && ctx->ctx_cpu != smp_processor_id()) { - DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); - return -EBUSY; - } - DPRINT(("task [%d] ctx_state=%d is_system=%d\n", - PFM_CTX_TASK(ctx)->pid, - state, - is_system)); - /* - * in system mode, we need to update the PMU directly - * and the user level state of the caller, which may not - * necessarily be the creator of the context. - */ - if (is_system) { - /* - * Update local PMU first - * - * disable dcr pp - */ - ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) & ~IA64_DCR_PP); - ia64_srlz_i(); - - /* - * update local cpuinfo - */ - PFM_CPUINFO_CLEAR(PFM_CPUINFO_DCR_PP); - - /* - * stop monitoring, does srlz.i - */ - pfm_clear_psr_pp(); - - /* - * stop monitoring in the caller - */ - ia64_psr(regs)->pp = 0; - - return 0; - } - /* - * per-task mode - */ - - if (task == current) { - /* stop monitoring at kernel level */ - pfm_clear_psr_up(); - - /* - * stop monitoring at the user level - */ - ia64_psr(regs)->up = 0; - } else { - tregs = task_pt_regs(task); - - /* - * stop monitoring at the user level - */ - ia64_psr(tregs)->up = 0; - - /* - * monitoring disabled in kernel at next reschedule - */ - ctx->ctx_saved_psr_up = 0; - DPRINT(("task=[%d]\n", task->pid)); - } - return 0; -} - - -static int -pfm_start(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) -{ - struct pt_regs *tregs; - int state, is_system; - - state = ctx->ctx_state; - is_system = ctx->ctx_fl_system; - - if (state != PFM_CTX_LOADED) return -EINVAL; - - /* - * In system wide and when the context is loaded, access can only happen - * when the caller is running on the CPU being monitored by the session. - * It does not have to be the owner (ctx_task) of the context per se. - */ - if (is_system && ctx->ctx_cpu != smp_processor_id()) { - DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); - return -EBUSY; - } - - /* - * in system mode, we need to update the PMU directly - * and the user level state of the caller, which may not - * necessarily be the creator of the context. - */ - if (is_system) { - - /* - * set user level psr.pp for the caller - */ - ia64_psr(regs)->pp = 1; - - /* - * now update the local PMU and cpuinfo - */ - PFM_CPUINFO_SET(PFM_CPUINFO_DCR_PP); - - /* - * start monitoring at kernel level - */ - pfm_set_psr_pp(); - - /* enable dcr pp */ - ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) | IA64_DCR_PP); - ia64_srlz_i(); - - return 0; - } - - /* - * per-process mode - */ - - if (ctx->ctx_task == current) { - - /* start monitoring at kernel level */ - pfm_set_psr_up(); - - /* - * activate monitoring at user level - */ - ia64_psr(regs)->up = 1; - - } else { - tregs = task_pt_regs(ctx->ctx_task); - - /* - * start monitoring at the kernel level the next - * time the task is scheduled - */ - ctx->ctx_saved_psr_up = IA64_PSR_UP; - - /* - * activate monitoring at user level - */ - ia64_psr(tregs)->up = 1; - } - return 0; -} - -static int -pfm_get_pmc_reset(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) -{ - pfarg_reg_t *req = (pfarg_reg_t *)arg; - unsigned int cnum; - int i; - int ret = -EINVAL; - - for (i = 0; i < count; i++, req++) { - - cnum = req->reg_num; - - if (!PMC_IS_IMPL(cnum)) goto abort_mission; - - req->reg_value = PMC_DFL_VAL(cnum); - - PFM_REG_RETFLAG_SET(req->reg_flags, 0); - - DPRINT(("pmc_reset_val pmc[%u]=0x%lx\n", cnum, req->reg_value)); - } - return 0; - -abort_mission: - PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL); - return ret; -} - -static int -pfm_check_task_exist(pfm_context_t *ctx) -{ - struct task_struct *g, *t; - int ret = -ESRCH; - - read_lock(&tasklist_lock); - - do_each_thread (g, t) { - if (t->thread.pfm_context == ctx) { - ret = 0; - break; - } - } while_each_thread (g, t); - - read_unlock(&tasklist_lock); - - DPRINT(("pfm_check_task_exist: ret=%d ctx=%p\n", ret, ctx)); - - return ret; -} - -static int -pfm_context_load(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) -{ - struct task_struct *task; - struct thread_struct *thread; - struct pfm_context_t *old; - unsigned long flags; -#ifndef CONFIG_SMP - struct task_struct *owner_task = NULL; -#endif - pfarg_load_t *req = (pfarg_load_t *)arg; - unsigned long *pmcs_source, *pmds_source; - int the_cpu; - int ret = 0; - int state, is_system, set_dbregs = 0; - - state = ctx->ctx_state; - is_system = ctx->ctx_fl_system; - /* - * can only load from unloaded or terminated state - */ - if (state != PFM_CTX_UNLOADED) { - DPRINT(("cannot load to [%d], invalid ctx_state=%d\n", - req->load_pid, - ctx->ctx_state)); - return -EBUSY; - } - - DPRINT(("load_pid [%d] using_dbreg=%d\n", req->load_pid, ctx->ctx_fl_using_dbreg)); - - if (CTX_OVFL_NOBLOCK(ctx) == 0 && req->load_pid == current->pid) { - DPRINT(("cannot use blocking mode on self\n")); - return -EINVAL; - } - - ret = pfm_get_task(ctx, req->load_pid, &task); - if (ret) { - DPRINT(("load_pid [%d] get_task=%d\n", req->load_pid, ret)); - return ret; - } - - ret = -EINVAL; - - /* - * system wide is self monitoring only - */ - if (is_system && task != current) { - DPRINT(("system wide is self monitoring only load_pid=%d\n", - req->load_pid)); - goto error; - } - - thread = &task->thread; - - ret = 0; - /* - * cannot load a context which is using range restrictions, - * into a task that is being debugged. - */ - if (ctx->ctx_fl_using_dbreg) { - if (thread->flags & IA64_THREAD_DBG_VALID) { - ret = -EBUSY; - DPRINT(("load_pid [%d] task is debugged, cannot load range restrictions\n", req->load_pid)); - goto error; - } - LOCK_PFS(flags); - - if (is_system) { - if (pfm_sessions.pfs_ptrace_use_dbregs) { - DPRINT(("cannot load [%d] dbregs in use\n", task->pid)); - ret = -EBUSY; - } else { - pfm_sessions.pfs_sys_use_dbregs++; - DPRINT(("load [%d] increased sys_use_dbreg=%u\n", task->pid, pfm_sessions.pfs_sys_use_dbregs)); - set_dbregs = 1; - } - } - - UNLOCK_PFS(flags); - - if (ret) goto error; - } - - /* - * SMP system-wide monitoring implies self-monitoring. - * - * The programming model expects the task to - * be pinned on a CPU throughout the session. - * Here we take note of the current CPU at the - * time the context is loaded. No call from - * another CPU will be allowed. - * - * The pinning via shed_setaffinity() - * must be done by the calling task prior - * to this call. - * - * systemwide: keep track of CPU this session is supposed to run on - */ - the_cpu = ctx->ctx_cpu = smp_processor_id(); - - ret = -EBUSY; - /* - * now reserve the session - */ - ret = pfm_reserve_session(current, is_system, the_cpu); - if (ret) goto error; - - /* - * task is necessarily stopped at this point. - * - * If the previous context was zombie, then it got removed in - * pfm_save_regs(). Therefore we should not see it here. - * If we see a context, then this is an active context - * - * XXX: needs to be atomic - */ - DPRINT(("before cmpxchg() old_ctx=%p new_ctx=%p\n", - thread->pfm_context, ctx)); - - ret = -EBUSY; - old = ia64_cmpxchg(acq, &thread->pfm_context, NULL, ctx, sizeof(pfm_context_t *)); - if (old != NULL) { - DPRINT(("load_pid [%d] already has a context\n", req->load_pid)); - goto error_unres; - } - - pfm_reset_msgq(ctx); - - ctx->ctx_state = PFM_CTX_LOADED; - - /* - * link context to task - */ - ctx->ctx_task = task; - - if (is_system) { - /* - * we load as stopped - */ - PFM_CPUINFO_SET(PFM_CPUINFO_SYST_WIDE); - PFM_CPUINFO_CLEAR(PFM_CPUINFO_DCR_PP); - - if (ctx->ctx_fl_excl_idle) PFM_CPUINFO_SET(PFM_CPUINFO_EXCL_IDLE); - } else { - thread->flags |= IA64_THREAD_PM_VALID; - } - - /* - * propagate into thread-state - */ - pfm_copy_pmds(task, ctx); - pfm_copy_pmcs(task, ctx); - - pmcs_source = ctx->th_pmcs; - pmds_source = ctx->th_pmds; - - /* - * always the case for system-wide - */ - if (task == current) { - - if (is_system == 0) { - - /* allow user level control */ - ia64_psr(regs)->sp = 0; - DPRINT(("clearing psr.sp for [%d]\n", task->pid)); - - SET_LAST_CPU(ctx, smp_processor_id()); - INC_ACTIVATION(); - SET_ACTIVATION(ctx); -#ifndef CONFIG_SMP - /* - * push the other task out, if any - */ - owner_task = GET_PMU_OWNER(); - if (owner_task) pfm_lazy_save_regs(owner_task); -#endif - } - /* - * load all PMD from ctx to PMU (as opposed to thread state) - * restore all PMC from ctx to PMU - */ - pfm_restore_pmds(pmds_source, ctx->ctx_all_pmds[0]); - pfm_restore_pmcs(pmcs_source, ctx->ctx_all_pmcs[0]); - - ctx->ctx_reload_pmcs[0] = 0UL; - ctx->ctx_reload_pmds[0] = 0UL; - - /* - * guaranteed safe by earlier check against DBG_VALID - */ - if (ctx->ctx_fl_using_dbreg) { - pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs); - pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs); - } - /* - * set new ownership - */ - SET_PMU_OWNER(task, ctx); - - DPRINT(("context loaded on PMU for [%d]\n", task->pid)); - } else { - /* - * when not current, task MUST be stopped, so this is safe - */ - regs = task_pt_regs(task); - - /* force a full reload */ - ctx->ctx_last_activation = PFM_INVALID_ACTIVATION; - SET_LAST_CPU(ctx, -1); - - /* initial saved psr (stopped) */ - ctx->ctx_saved_psr_up = 0UL; - ia64_psr(regs)->up = ia64_psr(regs)->pp = 0; - } - - ret = 0; - -error_unres: - if (ret) pfm_unreserve_session(ctx, ctx->ctx_fl_system, the_cpu); -error: - /* - * we must undo the dbregs setting (for system-wide) - */ - if (ret && set_dbregs) { - LOCK_PFS(flags); - pfm_sessions.pfs_sys_use_dbregs--; - UNLOCK_PFS(flags); - } - /* - * release task, there is now a link with the context - */ - if (is_system == 0 && task != current) { - pfm_put_task(task); - - if (ret == 0) { - ret = pfm_check_task_exist(ctx); - if (ret) { - ctx->ctx_state = PFM_CTX_UNLOADED; - ctx->ctx_task = NULL; - } - } - } - return ret; -} - -/* - * in this function, we do not need to increase the use count - * for the task via get_task_struct(), because we hold the - * context lock. If the task were to disappear while having - * a context attached, it would go through pfm_exit_thread() - * which also grabs the context lock and would therefore be blocked - * until we are here. - */ -static void pfm_flush_pmds(struct task_struct *, pfm_context_t *ctx); - -static int -pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) -{ - struct task_struct *task = PFM_CTX_TASK(ctx); - struct pt_regs *tregs; - int prev_state, is_system; - int ret; - - DPRINT(("ctx_state=%d task [%d]\n", ctx->ctx_state, task ? task->pid : -1)); - - prev_state = ctx->ctx_state; - is_system = ctx->ctx_fl_system; - - /* - * unload only when necessary - */ - if (prev_state == PFM_CTX_UNLOADED) { - DPRINT(("ctx_state=%d, nothing to do\n", prev_state)); - return 0; - } - - /* - * clear psr and dcr bits - */ - ret = pfm_stop(ctx, NULL, 0, regs); - if (ret) return ret; - - ctx->ctx_state = PFM_CTX_UNLOADED; - - /* - * in system mode, we need to update the PMU directly - * and the user level state of the caller, which may not - * necessarily be the creator of the context. - */ - if (is_system) { - - /* - * Update cpuinfo - * - * local PMU is taken care of in pfm_stop() - */ - PFM_CPUINFO_CLEAR(PFM_CPUINFO_SYST_WIDE); - PFM_CPUINFO_CLEAR(PFM_CPUINFO_EXCL_IDLE); - - /* - * save PMDs in context - * release ownership - */ - pfm_flush_pmds(current, ctx); - - /* - * at this point we are done with the PMU - * so we can unreserve the resource. - */ - if (prev_state != PFM_CTX_ZOMBIE) - pfm_unreserve_session(ctx, 1 , ctx->ctx_cpu); - - /* - * disconnect context from task - */ - task->thread.pfm_context = NULL; - /* - * disconnect task from context - */ - ctx->ctx_task = NULL; - - /* - * There is nothing more to cleanup here. - */ - return 0; - } - - /* - * per-task mode - */ - tregs = task == current ? regs : task_pt_regs(task); - - if (task == current) { - /* - * cancel user level control - */ - ia64_psr(regs)->sp = 1; - - DPRINT(("setting psr.sp for [%d]\n", task->pid)); - } - /* - * save PMDs to context - * release ownership - */ - pfm_flush_pmds(task, ctx); - - /* - * at this point we are done with the PMU - * so we can unreserve the resource. - * - * when state was ZOMBIE, we have already unreserved. - */ - if (prev_state != PFM_CTX_ZOMBIE) - pfm_unreserve_session(ctx, 0 , ctx->ctx_cpu); - - /* - * reset activation counter and psr - */ - ctx->ctx_last_activation = PFM_INVALID_ACTIVATION; - SET_LAST_CPU(ctx, -1); - - /* - * PMU state will not be restored - */ - task->thread.flags &= ~IA64_THREAD_PM_VALID; - - /* - * break links between context and task - */ - task->thread.pfm_context = NULL; - ctx->ctx_task = NULL; - - PFM_SET_WORK_PENDING(task, 0); - - ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_NONE; - ctx->ctx_fl_can_restart = 0; - ctx->ctx_fl_going_zombie = 0; - - DPRINT(("disconnected [%d] from context\n", task->pid)); - - return 0; -} - - -/* - * called only from exit_thread(): task == current - * we come here only if current has a context attached (loaded or masked) - */ -void -pfm_exit_thread(struct task_struct *task) -{ - pfm_context_t *ctx; - unsigned long flags; - struct pt_regs *regs = task_pt_regs(task); - int ret, state; - int free_ok = 0; - - ctx = PFM_GET_CTX(task); - - PROTECT_CTX(ctx, flags); - - DPRINT(("state=%d task [%d]\n", ctx->ctx_state, task->pid)); - - state = ctx->ctx_state; - switch(state) { - case PFM_CTX_UNLOADED: - /* - * only comes to this function if pfm_context is not NULL, i.e., cannot - * be in unloaded state - */ - printk(KERN_ERR "perfmon: pfm_exit_thread [%d] ctx unloaded\n", task->pid); - break; - case PFM_CTX_LOADED: - case PFM_CTX_MASKED: - ret = pfm_context_unload(ctx, NULL, 0, regs); - if (ret) { - printk(KERN_ERR "perfmon: pfm_exit_thread [%d] state=%d unload failed %d\n", task->pid, state, ret); - } - DPRINT(("ctx unloaded for current state was %d\n", state)); - - pfm_end_notify_user(ctx); - break; - case PFM_CTX_ZOMBIE: - ret = pfm_context_unload(ctx, NULL, 0, regs); - if (ret) { - printk(KERN_ERR "perfmon: pfm_exit_thread [%d] state=%d unload failed %d\n", task->pid, state, ret); - } - free_ok = 1; - break; - default: - printk(KERN_ERR "perfmon: pfm_exit_thread [%d] unexpected state=%d\n", task->pid, state); - break; - } - UNPROTECT_CTX(ctx, flags); - - { u64 psr = pfm_get_psr(); - BUG_ON(psr & (IA64_PSR_UP|IA64_PSR_PP)); - BUG_ON(GET_PMU_OWNER()); - BUG_ON(ia64_psr(regs)->up); - BUG_ON(ia64_psr(regs)->pp); - } - - /* - * All memory free operations (especially for vmalloc'ed memory) - * MUST be done with interrupts ENABLED. - */ - if (free_ok) pfm_context_free(ctx); -} - -/* - * functions MUST be listed in the increasing order of their index (see permfon.h) - */ -#define PFM_CMD(name, flags, arg_count, arg_type, getsz) { name, #name, flags, arg_count, sizeof(arg_type), getsz } -#define PFM_CMD_S(name, flags) { name, #name, flags, 0, 0, NULL } -#define PFM_CMD_PCLRWS (PFM_CMD_FD|PFM_CMD_ARG_RW|PFM_CMD_STOP) -#define PFM_CMD_PCLRW (PFM_CMD_FD|PFM_CMD_ARG_RW) -#define PFM_CMD_NONE { NULL, "no-cmd", 0, 0, 0, NULL} - -static pfm_cmd_desc_t pfm_cmd_tab[]={ -/* 0 */PFM_CMD_NONE, -/* 1 */PFM_CMD(pfm_write_pmcs, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL), -/* 2 */PFM_CMD(pfm_write_pmds, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL), -/* 3 */PFM_CMD(pfm_read_pmds, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL), -/* 4 */PFM_CMD_S(pfm_stop, PFM_CMD_PCLRWS), -/* 5 */PFM_CMD_S(pfm_start, PFM_CMD_PCLRWS), -/* 6 */PFM_CMD_NONE, -/* 7 */PFM_CMD_NONE, -/* 8 */PFM_CMD(pfm_context_create, PFM_CMD_ARG_RW, 1, pfarg_context_t, pfm_ctx_getsize), -/* 9 */PFM_CMD_NONE, -/* 10 */PFM_CMD_S(pfm_restart, PFM_CMD_PCLRW), -/* 11 */PFM_CMD_NONE, -/* 12 */PFM_CMD(pfm_get_features, PFM_CMD_ARG_RW, 1, pfarg_features_t, NULL), -/* 13 */PFM_CMD(pfm_debug, 0, 1, unsigned int, NULL), -/* 14 */PFM_CMD_NONE, -/* 15 */PFM_CMD(pfm_get_pmc_reset, PFM_CMD_ARG_RW, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL), -/* 16 */PFM_CMD(pfm_context_load, PFM_CMD_PCLRWS, 1, pfarg_load_t, NULL), -/* 17 */PFM_CMD_S(pfm_context_unload, PFM_CMD_PCLRWS), -/* 18 */PFM_CMD_NONE, -/* 19 */PFM_CMD_NONE, -/* 20 */PFM_CMD_NONE, -/* 21 */PFM_CMD_NONE, -/* 22 */PFM_CMD_NONE, -/* 23 */PFM_CMD_NONE, -/* 24 */PFM_CMD_NONE, -/* 25 */PFM_CMD_NONE, -/* 26 */PFM_CMD_NONE, -/* 27 */PFM_CMD_NONE, -/* 28 */PFM_CMD_NONE, -/* 29 */PFM_CMD_NONE, -/* 30 */PFM_CMD_NONE, -/* 31 */PFM_CMD_NONE, -/* 32 */PFM_CMD(pfm_write_ibrs, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_dbreg_t, NULL), -/* 33 */PFM_CMD(pfm_write_dbrs, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_dbreg_t, NULL) -}; -#define PFM_CMD_COUNT (sizeof(pfm_cmd_tab)/sizeof(pfm_cmd_desc_t)) - -static int -pfm_check_task_state(pfm_context_t *ctx, int cmd, unsigned long flags) -{ - struct task_struct *task; - int state, old_state; - -recheck: - state = ctx->ctx_state; - task = ctx->ctx_task; - - if (task == NULL) { - DPRINT(("context %d no task, state=%d\n", ctx->ctx_fd, state)); - return 0; - } - - DPRINT(("context %d state=%d [%d] task_state=%ld must_stop=%d\n", - ctx->ctx_fd, - state, - task->pid, - task->state, PFM_CMD_STOPPED(cmd))); - - /* - * self-monitoring always ok. - * - * for system-wide the caller can either be the creator of the - * context (to one to which the context is attached to) OR - * a task running on the same CPU as the session. - */ - if (task == current || ctx->ctx_fl_system) return 0; - - /* - * we are monitoring another thread - */ - switch(state) { - case PFM_CTX_UNLOADED: - /* - * if context is UNLOADED we are safe to go - */ - return 0; - case PFM_CTX_ZOMBIE: - /* - * no command can operate on a zombie context - */ - DPRINT(("cmd %d state zombie cannot operate on context\n", cmd)); - return -EINVAL; - case PFM_CTX_MASKED: - /* - * PMU state has been saved to software even though - * the thread may still be running. - */ - if (cmd != PFM_UNLOAD_CONTEXT) return 0; - } - - /* - * context is LOADED or MASKED. Some commands may need to have - * the task stopped. - * - * We could lift this restriction for UP but it would mean that - * the user has no guarantee the task would not run between - * two successive calls to perfmonctl(). That's probably OK. - * If this user wants to ensure the task does not run, then - * the task must be stopped. - */ - if (PFM_CMD_STOPPED(cmd)) { - if ((task->state != TASK_STOPPED) && (task->state != TASK_TRACED)) { - DPRINT(("[%d] task not in stopped state\n", task->pid)); - return -EBUSY; - } - /* - * task is now stopped, wait for ctxsw out - * - * This is an interesting point in the code. - * We need to unprotect the context because - * the pfm_save_regs() routines needs to grab - * the same lock. There are danger in doing - * this because it leaves a window open for - * another task to get access to the context - * and possibly change its state. The one thing - * that is not possible is for the context to disappear - * because we are protected by the VFS layer, i.e., - * get_fd()/put_fd(). - */ - old_state = state; - - UNPROTECT_CTX(ctx, flags); - - wait_task_inactive(task); - - PROTECT_CTX(ctx, flags); - - /* - * we must recheck to verify if state has changed - */ - if (ctx->ctx_state != old_state) { - DPRINT(("old_state=%d new_state=%d\n", old_state, ctx->ctx_state)); - goto recheck; - } - } - return 0; -} - -/* - * system-call entry point (must return long) - */ -asmlinkage long -sys_perfmonctl (int fd, int cmd, void __user *arg, int count) -{ - struct file *file = NULL; - pfm_context_t *ctx = NULL; - unsigned long flags = 0UL; - void *args_k = NULL; - long ret; /* will expand int return types */ - size_t base_sz, sz, xtra_sz = 0; - int narg, completed_args = 0, call_made = 0, cmd_flags; - int (*func)(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs); - int (*getsize)(void *arg, size_t *sz); -#define PFM_MAX_ARGSIZE 4096 - - /* - * reject any call if perfmon was disabled at initialization - */ - if (unlikely(pmu_conf == NULL)) return -ENOSYS; - - if (unlikely(cmd < 0 || cmd >= PFM_CMD_COUNT)) { - DPRINT(("invalid cmd=%d\n", cmd)); - return -EINVAL; - } - - func = pfm_cmd_tab[cmd].cmd_func; - narg = pfm_cmd_tab[cmd].cmd_narg; - base_sz = pfm_cmd_tab[cmd].cmd_argsize; - getsize = pfm_cmd_tab[cmd].cmd_getsize; - cmd_flags = pfm_cmd_tab[cmd].cmd_flags; - - if (unlikely(func == NULL)) { - DPRINT(("invalid cmd=%d\n", cmd)); - return -EINVAL; - } - - DPRINT(("cmd=%s idx=%d narg=0x%x argsz=%lu count=%d\n", - PFM_CMD_NAME(cmd), - cmd, - narg, - base_sz, - count)); - - /* - * check if number of arguments matches what the command expects - */ - if (unlikely((narg == PFM_CMD_ARG_MANY && count <= 0) || (narg > 0 && narg != count))) - return -EINVAL; - -restart_args: - sz = xtra_sz + base_sz*count; - /* - * limit abuse to min page size - */ - if (unlikely(sz > PFM_MAX_ARGSIZE)) { - printk(KERN_ERR "perfmon: [%d] argument too big %lu\n", current->pid, sz); - return -E2BIG; - } - - /* - * allocate default-sized argument buffer - */ - if (likely(count && args_k == NULL)) { - args_k = kmalloc(PFM_MAX_ARGSIZE, GFP_KERNEL); - if (args_k == NULL) return -ENOMEM; - } - - ret = -EFAULT; - - /* - * copy arguments - * - * assume sz = 0 for command without parameters - */ - if (sz && copy_from_user(args_k, arg, sz)) { - DPRINT(("cannot copy_from_user %lu bytes @%p\n", sz, arg)); - goto error_args; - } - - /* - * check if command supports extra parameters - */ - if (completed_args == 0 && getsize) { - /* - * get extra parameters size (based on main argument) - */ - ret = (*getsize)(args_k, &xtra_sz); - if (ret) goto error_args; - - completed_args = 1; - - DPRINT(("restart_args sz=%lu xtra_sz=%lu\n", sz, xtra_sz)); - - /* retry if necessary */ - if (likely(xtra_sz)) goto restart_args; - } - - if (unlikely((cmd_flags & PFM_CMD_FD) == 0)) goto skip_fd; - - ret = -EBADF; - - file = fget(fd); - if (unlikely(file == NULL)) { - DPRINT(("invalid fd %d\n", fd)); - goto error_args; - } - if (unlikely(PFM_IS_FILE(file) == 0)) { - DPRINT(("fd %d not related to perfmon\n", fd)); - goto error_args; - } - - ctx = (pfm_context_t *)file->private_data; - if (unlikely(ctx == NULL)) { - DPRINT(("no context for fd %d\n", fd)); - goto error_args; - } - prefetch(&ctx->ctx_state); - - PROTECT_CTX(ctx, flags); - - /* - * check task is stopped - */ - ret = pfm_check_task_state(ctx, cmd, flags); - if (unlikely(ret)) goto abort_locked; - -skip_fd: - ret = (*func)(ctx, args_k, count, task_pt_regs(current)); - - call_made = 1; - -abort_locked: - if (likely(ctx)) { - DPRINT(("context unlocked\n")); - UNPROTECT_CTX(ctx, flags); - } - - /* copy argument back to user, if needed */ - if (call_made && PFM_CMD_RW_ARG(cmd) && copy_to_user(arg, args_k, base_sz*count)) ret = -EFAULT; - -error_args: - if (file) - fput(file); - - kfree(args_k); - - DPRINT(("cmd=%s ret=%ld\n", PFM_CMD_NAME(cmd), ret)); - - return ret; -} - -static void -pfm_resume_after_ovfl(pfm_context_t *ctx, unsigned long ovfl_regs, struct pt_regs *regs) -{ - pfm_buffer_fmt_t *fmt = ctx->ctx_buf_fmt; - pfm_ovfl_ctrl_t rst_ctrl; - int state; - int ret = 0; - - state = ctx->ctx_state; - /* - * Unlock sampling buffer and reset index atomically - * XXX: not really needed when blocking - */ - if (CTX_HAS_SMPL(ctx)) { - - rst_ctrl.bits.mask_monitoring = 0; - rst_ctrl.bits.reset_ovfl_pmds = 0; - - if (state == PFM_CTX_LOADED) - ret = pfm_buf_fmt_restart_active(fmt, current, &rst_ctrl, ctx->ctx_smpl_hdr, regs); - else - ret = pfm_buf_fmt_restart(fmt, current, &rst_ctrl, ctx->ctx_smpl_hdr, regs); - } else { - rst_ctrl.bits.mask_monitoring = 0; - rst_ctrl.bits.reset_ovfl_pmds = 1; - } - - if (ret == 0) { - if (rst_ctrl.bits.reset_ovfl_pmds) { - pfm_reset_regs(ctx, &ovfl_regs, PFM_PMD_LONG_RESET); - } - if (rst_ctrl.bits.mask_monitoring == 0) { - DPRINT(("resuming monitoring\n")); - if (ctx->ctx_state == PFM_CTX_MASKED) pfm_restore_monitoring(current); - } else { - DPRINT(("stopping monitoring\n")); - //pfm_stop_monitoring(current, regs); - } - ctx->ctx_state = PFM_CTX_LOADED; - } -} - -/* - * context MUST BE LOCKED when calling - * can only be called for current - */ -static void -pfm_context_force_terminate(pfm_context_t *ctx, struct pt_regs *regs) -{ - int ret; - - DPRINT(("entering for [%d]\n", current->pid)); - - ret = pfm_context_unload(ctx, NULL, 0, regs); - if (ret) { - printk(KERN_ERR "pfm_context_force_terminate: [%d] unloaded failed with %d\n", current->pid, ret); - } - - /* - * and wakeup controlling task, indicating we are now disconnected - */ - wake_up_interruptible(&ctx->ctx_zombieq); - - /* - * given that context is still locked, the controlling - * task will only get access when we return from - * pfm_handle_work(). - */ -} - -static int pfm_ovfl_notify_user(pfm_context_t *ctx, unsigned long ovfl_pmds); - /* - * pfm_handle_work() can be called with interrupts enabled - * (TIF_NEED_RESCHED) or disabled. The down_interruptible - * call may sleep, therefore we must re-enable interrupts - * to avoid deadlocks. It is safe to do so because this function - * is called ONLY when returning to user level (PUStk=1), in which case - * there is no risk of kernel stack overflow due to deep - * interrupt nesting. - */ -void -pfm_handle_work(void) -{ - pfm_context_t *ctx; - struct pt_regs *regs; - unsigned long flags, dummy_flags; - unsigned long ovfl_regs; - unsigned int reason; - int ret; - - ctx = PFM_GET_CTX(current); - if (ctx == NULL) { - printk(KERN_ERR "perfmon: [%d] has no PFM context\n", current->pid); - return; - } - - PROTECT_CTX(ctx, flags); - - PFM_SET_WORK_PENDING(current, 0); - - pfm_clear_task_notify(); - - regs = task_pt_regs(current); - - /* - * extract reason for being here and clear - */ - reason = ctx->ctx_fl_trap_reason; - ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_NONE; - ovfl_regs = ctx->ctx_ovfl_regs[0]; - - DPRINT(("reason=%d state=%d\n", reason, ctx->ctx_state)); - - /* - * must be done before we check for simple-reset mode - */ - if (ctx->ctx_fl_going_zombie || ctx->ctx_state == PFM_CTX_ZOMBIE) goto do_zombie; - - - //if (CTX_OVFL_NOBLOCK(ctx)) goto skip_blocking; - if (reason == PFM_TRAP_REASON_RESET) goto skip_blocking; - - /* - * restore interrupt mask to what it was on entry. - * Could be enabled/diasbled. - */ - UNPROTECT_CTX(ctx, flags); - - /* - * force interrupt enable because of down_interruptible() - */ - local_irq_enable(); - - DPRINT(("before block sleeping\n")); - - /* - * may go through without blocking on SMP systems - * if restart has been received already by the time we call down() - */ - ret = wait_for_completion_interruptible(&ctx->ctx_restart_done); - - DPRINT(("after block sleeping ret=%d\n", ret)); - - /* - * lock context and mask interrupts again - * We save flags into a dummy because we may have - * altered interrupts mask compared to entry in this - * function. - */ - PROTECT_CTX(ctx, dummy_flags); - - /* - * we need to read the ovfl_regs only after wake-up - * because we may have had pfm_write_pmds() in between - * and that can changed PMD values and therefore - * ovfl_regs is reset for these new PMD values. - */ - ovfl_regs = ctx->ctx_ovfl_regs[0]; - - if (ctx->ctx_fl_going_zombie) { -do_zombie: - DPRINT(("context is zombie, bailing out\n")); - pfm_context_force_terminate(ctx, regs); - goto nothing_to_do; - } - /* - * in case of interruption of down() we don't restart anything - */ - if (ret < 0) goto nothing_to_do; - -skip_blocking: - pfm_resume_after_ovfl(ctx, ovfl_regs, regs); - ctx->ctx_ovfl_regs[0] = 0UL; - -nothing_to_do: - /* - * restore flags as they were upon entry - */ - UNPROTECT_CTX(ctx, flags); -} - -static int -pfm_notify_user(pfm_context_t *ctx, pfm_msg_t *msg) -{ - if (ctx->ctx_state == PFM_CTX_ZOMBIE) { - DPRINT(("ignoring overflow notification, owner is zombie\n")); - return 0; - } - - DPRINT(("waking up somebody\n")); - - if (msg) wake_up_interruptible(&ctx->ctx_msgq_wait); - - /* - * safe, we are not in intr handler, nor in ctxsw when - * we come here - */ - kill_fasync (&ctx->ctx_async_queue, SIGIO, POLL_IN); - - return 0; -} - -static int -pfm_ovfl_notify_user(pfm_context_t *ctx, unsigned long ovfl_pmds) -{ - pfm_msg_t *msg = NULL; - - if (ctx->ctx_fl_no_msg == 0) { - msg = pfm_get_new_msg(ctx); - if (msg == NULL) { - printk(KERN_ERR "perfmon: pfm_ovfl_notify_user no more notification msgs\n"); - return -1; - } - - msg->pfm_ovfl_msg.msg_type = PFM_MSG_OVFL; - msg->pfm_ovfl_msg.msg_ctx_fd = ctx->ctx_fd; - msg->pfm_ovfl_msg.msg_active_set = 0; - msg->pfm_ovfl_msg.msg_ovfl_pmds[0] = ovfl_pmds; - msg->pfm_ovfl_msg.msg_ovfl_pmds[1] = 0UL; - msg->pfm_ovfl_msg.msg_ovfl_pmds[2] = 0UL; - msg->pfm_ovfl_msg.msg_ovfl_pmds[3] = 0UL; - msg->pfm_ovfl_msg.msg_tstamp = 0UL; - } - - DPRINT(("ovfl msg: msg=%p no_msg=%d fd=%d ovfl_pmds=0x%lx\n", - msg, - ctx->ctx_fl_no_msg, - ctx->ctx_fd, - ovfl_pmds)); - - return pfm_notify_user(ctx, msg); -} - -static int -pfm_end_notify_user(pfm_context_t *ctx) -{ - pfm_msg_t *msg; - - msg = pfm_get_new_msg(ctx); - if (msg == NULL) { - printk(KERN_ERR "perfmon: pfm_end_notify_user no more notification msgs\n"); - return -1; - } - /* no leak */ - memset(msg, 0, sizeof(*msg)); - - msg->pfm_end_msg.msg_type = PFM_MSG_END; - msg->pfm_end_msg.msg_ctx_fd = ctx->ctx_fd; - msg->pfm_ovfl_msg.msg_tstamp = 0UL; - - DPRINT(("end msg: msg=%p no_msg=%d ctx_fd=%d\n", - msg, - ctx->ctx_fl_no_msg, - ctx->ctx_fd)); - - return pfm_notify_user(ctx, msg); -} - -/* - * main overflow processing routine. - * it can be called from the interrupt path or explicitly during the context switch code - */ -static void -pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, struct pt_regs *regs) -{ - pfm_ovfl_arg_t *ovfl_arg; - unsigned long mask; - unsigned long old_val, ovfl_val, new_val; - unsigned long ovfl_notify = 0UL, ovfl_pmds = 0UL, smpl_pmds = 0UL, reset_pmds; - unsigned long tstamp; - pfm_ovfl_ctrl_t ovfl_ctrl; - unsigned int i, has_smpl; - int must_notify = 0; - - if (unlikely(ctx->ctx_state == PFM_CTX_ZOMBIE)) goto stop_monitoring; - - /* - * sanity test. Should never happen - */ - if (unlikely((pmc0 & 0x1) == 0)) goto sanity_check; - - tstamp = ia64_get_itc(); - mask = pmc0 >> PMU_FIRST_COUNTER; - ovfl_val = pmu_conf->ovfl_val; - has_smpl = CTX_HAS_SMPL(ctx); - - DPRINT_ovfl(("pmc0=0x%lx pid=%d iip=0x%lx, %s " - "used_pmds=0x%lx\n", - pmc0, - task ? task->pid: -1, - (regs ? regs->cr_iip : 0), - CTX_OVFL_NOBLOCK(ctx) ? "nonblocking" : "blocking", - ctx->ctx_used_pmds[0])); - - - /* - * first we update the virtual counters - * assume there was a prior ia64_srlz_d() issued - */ - for (i = PMU_FIRST_COUNTER; mask ; i++, mask >>= 1) { - - /* skip pmd which did not overflow */ - if ((mask & 0x1) == 0) continue; - - /* - * Note that the pmd is not necessarily 0 at this point as qualified events - * may have happened before the PMU was frozen. The residual count is not - * taken into consideration here but will be with any read of the pmd via - * pfm_read_pmds(). - */ - old_val = new_val = ctx->ctx_pmds[i].val; - new_val += 1 + ovfl_val; - ctx->ctx_pmds[i].val = new_val; - - /* - * check for overflow condition - */ - if (likely(old_val > new_val)) { - ovfl_pmds |= 1UL << i; - if (PMC_OVFL_NOTIFY(ctx, i)) ovfl_notify |= 1UL << i; - } - - DPRINT_ovfl(("ctx_pmd[%d].val=0x%lx old_val=0x%lx pmd=0x%lx ovfl_pmds=0x%lx ovfl_notify=0x%lx\n", - i, - new_val, - old_val, - ia64_get_pmd(i) & ovfl_val, - ovfl_pmds, - ovfl_notify)); - } - - /* - * there was no 64-bit overflow, nothing else to do - */ - if (ovfl_pmds == 0UL) return; - - /* - * reset all control bits - */ - ovfl_ctrl.val = 0; - reset_pmds = 0UL; - - /* - * if a sampling format module exists, then we "cache" the overflow by - * calling the module's handler() routine. - */ - if (has_smpl) { - unsigned long start_cycles, end_cycles; - unsigned long pmd_mask; - int j, k, ret = 0; - int this_cpu = smp_processor_id(); - - pmd_mask = ovfl_pmds >> PMU_FIRST_COUNTER; - ovfl_arg = &ctx->ctx_ovfl_arg; - - prefetch(ctx->ctx_smpl_hdr); - - for(i=PMU_FIRST_COUNTER; pmd_mask && ret == 0; i++, pmd_mask >>=1) { - - mask = 1UL << i; - - if ((pmd_mask & 0x1) == 0) continue; - - ovfl_arg->ovfl_pmd = (unsigned char )i; - ovfl_arg->ovfl_notify = ovfl_notify & mask ? 1 : 0; - ovfl_arg->active_set = 0; - ovfl_arg->ovfl_ctrl.val = 0; /* module must fill in all fields */ - ovfl_arg->smpl_pmds[0] = smpl_pmds = ctx->ctx_pmds[i].smpl_pmds[0]; - - ovfl_arg->pmd_value = ctx->ctx_pmds[i].val; - ovfl_arg->pmd_last_reset = ctx->ctx_pmds[i].lval; - ovfl_arg->pmd_eventid = ctx->ctx_pmds[i].eventid; - - /* - * copy values of pmds of interest. Sampling format may copy them - * into sampling buffer. - */ - if (smpl_pmds) { - for(j=0, k=0; smpl_pmds; j++, smpl_pmds >>=1) { - if ((smpl_pmds & 0x1) == 0) continue; - ovfl_arg->smpl_pmds_values[k++] = PMD_IS_COUNTING(j) ? pfm_read_soft_counter(ctx, j) : ia64_get_pmd(j); - DPRINT_ovfl(("smpl_pmd[%d]=pmd%u=0x%lx\n", k-1, j, ovfl_arg->smpl_pmds_values[k-1])); - } - } - - pfm_stats[this_cpu].pfm_smpl_handler_calls++; - - start_cycles = ia64_get_itc(); - - /* - * call custom buffer format record (handler) routine - */ - ret = (*ctx->ctx_buf_fmt->fmt_handler)(task, ctx->ctx_smpl_hdr, ovfl_arg, regs, tstamp); - - end_cycles = ia64_get_itc(); - - /* - * For those controls, we take the union because they have - * an all or nothing behavior. - */ - ovfl_ctrl.bits.notify_user |= ovfl_arg->ovfl_ctrl.bits.notify_user; - ovfl_ctrl.bits.block_task |= ovfl_arg->ovfl_ctrl.bits.block_task; - ovfl_ctrl.bits.mask_monitoring |= ovfl_arg->ovfl_ctrl.bits.mask_monitoring; - /* - * build the bitmask of pmds to reset now - */ - if (ovfl_arg->ovfl_ctrl.bits.reset_ovfl_pmds) reset_pmds |= mask; - - pfm_stats[this_cpu].pfm_smpl_handler_cycles += end_cycles - start_cycles; - } - /* - * when the module cannot handle the rest of the overflows, we abort right here - */ - if (ret && pmd_mask) { - DPRINT(("handler aborts leftover ovfl_pmds=0x%lx\n", - pmd_mask<ctx_ovfl_regs[0] = ovfl_pmds; - - /* - * check for blocking context - */ - if (CTX_OVFL_NOBLOCK(ctx) == 0 && ovfl_ctrl.bits.block_task) { - - ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_BLOCK; - - /* - * set the perfmon specific checking pending work for the task - */ - PFM_SET_WORK_PENDING(task, 1); - - /* - * when coming from ctxsw, current still points to the - * previous task, therefore we must work with task and not current. - */ - pfm_set_task_notify(task); - } - /* - * defer until state is changed (shorten spin window). the context is locked - * anyway, so the signal receiver would come spin for nothing. - */ - must_notify = 1; - } - - DPRINT_ovfl(("owner [%d] pending=%ld reason=%u ovfl_pmds=0x%lx ovfl_notify=0x%lx masked=%d\n", - GET_PMU_OWNER() ? GET_PMU_OWNER()->pid : -1, - PFM_GET_WORK_PENDING(task), - ctx->ctx_fl_trap_reason, - ovfl_pmds, - ovfl_notify, - ovfl_ctrl.bits.mask_monitoring ? 1 : 0)); - /* - * in case monitoring must be stopped, we toggle the psr bits - */ - if (ovfl_ctrl.bits.mask_monitoring) { - pfm_mask_monitoring(task); - ctx->ctx_state = PFM_CTX_MASKED; - ctx->ctx_fl_can_restart = 1; - } - - /* - * send notification now - */ - if (must_notify) pfm_ovfl_notify_user(ctx, ovfl_notify); - - return; - -sanity_check: - printk(KERN_ERR "perfmon: CPU%d overflow handler [%d] pmc0=0x%lx\n", - smp_processor_id(), - task ? task->pid : -1, - pmc0); - return; - -stop_monitoring: - /* - * in SMP, zombie context is never restored but reclaimed in pfm_load_regs(). - * Moreover, zombies are also reclaimed in pfm_save_regs(). Therefore we can - * come here as zombie only if the task is the current task. In which case, we - * can access the PMU hardware directly. - * - * Note that zombies do have PM_VALID set. So here we do the minimal. - * - * In case the context was zombified it could not be reclaimed at the time - * the monitoring program exited. At this point, the PMU reservation has been - * returned, the sampiing buffer has been freed. We must convert this call - * into a spurious interrupt. However, we must also avoid infinite overflows - * by stopping monitoring for this task. We can only come here for a per-task - * context. All we need to do is to stop monitoring using the psr bits which - * are always task private. By re-enabling secure montioring, we ensure that - * the monitored task will not be able to re-activate monitoring. - * The task will eventually be context switched out, at which point the context - * will be reclaimed (that includes releasing ownership of the PMU). - * - * So there might be a window of time where the number of per-task session is zero - * yet one PMU might have a owner and get at most one overflow interrupt for a zombie - * context. This is safe because if a per-task session comes in, it will push this one - * out and by the virtue on pfm_save_regs(), this one will disappear. If a system wide - * session is force on that CPU, given that we use task pinning, pfm_save_regs() will - * also push our zombie context out. - * - * Overall pretty hairy stuff.... - */ - DPRINT(("ctx is zombie for [%d], converted to spurious\n", task ? task->pid: -1)); - pfm_clear_psr_up(); - ia64_psr(regs)->up = 0; - ia64_psr(regs)->sp = 1; - return; -} - -static int -pfm_do_interrupt_handler(int irq, void *arg, struct pt_regs *regs) -{ - struct task_struct *task; - pfm_context_t *ctx; - unsigned long flags; - u64 pmc0; - int this_cpu = smp_processor_id(); - int retval = 0; - - pfm_stats[this_cpu].pfm_ovfl_intr_count++; - - /* - * srlz.d done before arriving here - */ - pmc0 = ia64_get_pmc(0); - - task = GET_PMU_OWNER(); - ctx = GET_PMU_CTX(); - - /* - * if we have some pending bits set - * assumes : if any PMC0.bit[63-1] is set, then PMC0.fr = 1 - */ - if (PMC0_HAS_OVFL(pmc0) && task) { - /* - * we assume that pmc0.fr is always set here - */ - - /* sanity check */ - if (!ctx) goto report_spurious1; - - if (ctx->ctx_fl_system == 0 && (task->thread.flags & IA64_THREAD_PM_VALID) == 0) - goto report_spurious2; - - PROTECT_CTX_NOPRINT(ctx, flags); - - pfm_overflow_handler(task, ctx, pmc0, regs); - - UNPROTECT_CTX_NOPRINT(ctx, flags); - - } else { - pfm_stats[this_cpu].pfm_spurious_ovfl_intr_count++; - retval = -1; - } - /* - * keep it unfrozen at all times - */ - pfm_unfreeze_pmu(); - - return retval; - -report_spurious1: - printk(KERN_INFO "perfmon: spurious overflow interrupt on CPU%d: process %d has no PFM context\n", - this_cpu, task->pid); - pfm_unfreeze_pmu(); - return -1; -report_spurious2: - printk(KERN_INFO "perfmon: spurious overflow interrupt on CPU%d: process %d, invalid flag\n", - this_cpu, - task->pid); - pfm_unfreeze_pmu(); - return -1; -} - -static irqreturn_t -pfm_interrupt_handler(int irq, void *arg) -{ - unsigned long start_cycles, total_cycles; - unsigned long min, max; - int this_cpu; - int ret; - struct pt_regs *regs = get_irq_regs(); - - this_cpu = get_cpu(); - if (likely(!pfm_alt_intr_handler)) { - min = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min; - max = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max; - - start_cycles = ia64_get_itc(); - - ret = pfm_do_interrupt_handler(irq, arg, regs); - - total_cycles = ia64_get_itc(); - - /* - * don't measure spurious interrupts - */ - if (likely(ret == 0)) { - total_cycles -= start_cycles; - - if (total_cycles < min) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min = total_cycles; - if (total_cycles > max) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max = total_cycles; - - pfm_stats[this_cpu].pfm_ovfl_intr_cycles += total_cycles; - } - } - else { - (*pfm_alt_intr_handler->handler)(irq, arg, regs); - } - - put_cpu_no_resched(); - return IRQ_HANDLED; -} - -/* - * /proc/perfmon interface, for debug only - */ - -#define PFM_PROC_SHOW_HEADER ((void *)NR_CPUS+1) - -static void * -pfm_proc_start(struct seq_file *m, loff_t *pos) -{ - if (*pos == 0) { - return PFM_PROC_SHOW_HEADER; - } - - while (*pos <= NR_CPUS) { - if (cpu_online(*pos - 1)) { - return (void *)*pos; - } - ++*pos; - } - return NULL; -} - -static void * -pfm_proc_next(struct seq_file *m, void *v, loff_t *pos) -{ - ++*pos; - return pfm_proc_start(m, pos); -} - -static void -pfm_proc_stop(struct seq_file *m, void *v) -{ -} - -static void -pfm_proc_show_header(struct seq_file *m) -{ - struct list_head * pos; - pfm_buffer_fmt_t * entry; - unsigned long flags; - - seq_printf(m, - "perfmon version : %u.%u\n" - "model : %s\n" - "fastctxsw : %s\n" - "expert mode : %s\n" - "ovfl_mask : 0x%lx\n" - "PMU flags : 0x%x\n", - PFM_VERSION_MAJ, PFM_VERSION_MIN, - pmu_conf->pmu_name, - pfm_sysctl.fastctxsw > 0 ? "Yes": "No", - pfm_sysctl.expert_mode > 0 ? "Yes": "No", - pmu_conf->ovfl_val, - pmu_conf->flags); - - LOCK_PFS(flags); - - seq_printf(m, - "proc_sessions : %u\n" - "sys_sessions : %u\n" - "sys_use_dbregs : %u\n" - "ptrace_use_dbregs : %u\n", - pfm_sessions.pfs_task_sessions, - pfm_sessions.pfs_sys_sessions, - pfm_sessions.pfs_sys_use_dbregs, - pfm_sessions.pfs_ptrace_use_dbregs); - - UNLOCK_PFS(flags); - - spin_lock(&pfm_buffer_fmt_lock); - - list_for_each(pos, &pfm_buffer_fmt_list) { - entry = list_entry(pos, pfm_buffer_fmt_t, fmt_list); - seq_printf(m, "format : %02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x %s\n", - entry->fmt_uuid[0], - entry->fmt_uuid[1], - entry->fmt_uuid[2], - entry->fmt_uuid[3], - entry->fmt_uuid[4], - entry->fmt_uuid[5], - entry->fmt_uuid[6], - entry->fmt_uuid[7], - entry->fmt_uuid[8], - entry->fmt_uuid[9], - entry->fmt_uuid[10], - entry->fmt_uuid[11], - entry->fmt_uuid[12], - entry->fmt_uuid[13], - entry->fmt_uuid[14], - entry->fmt_uuid[15], - entry->fmt_name); - } - spin_unlock(&pfm_buffer_fmt_lock); - -} - -static int -pfm_proc_show(struct seq_file *m, void *v) -{ - unsigned long psr; - unsigned int i; - int cpu; - - if (v == PFM_PROC_SHOW_HEADER) { - pfm_proc_show_header(m); - return 0; - } - - /* show info for CPU (v - 1) */ - - cpu = (long)v - 1; - seq_printf(m, - "CPU%-2d overflow intrs : %lu\n" - "CPU%-2d overflow cycles : %lu\n" - "CPU%-2d overflow min : %lu\n" - "CPU%-2d overflow max : %lu\n" - "CPU%-2d smpl handler calls : %lu\n" - "CPU%-2d smpl handler cycles : %lu\n" - "CPU%-2d spurious intrs : %lu\n" - "CPU%-2d replay intrs : %lu\n" - "CPU%-2d syst_wide : %d\n" - "CPU%-2d dcr_pp : %d\n" - "CPU%-2d exclude idle : %d\n" - "CPU%-2d owner : %d\n" - "CPU%-2d context : %p\n" - "CPU%-2d activations : %lu\n", - cpu, pfm_stats[cpu].pfm_ovfl_intr_count, - cpu, pfm_stats[cpu].pfm_ovfl_intr_cycles, - cpu, pfm_stats[cpu].pfm_ovfl_intr_cycles_min, - cpu, pfm_stats[cpu].pfm_ovfl_intr_cycles_max, - cpu, pfm_stats[cpu].pfm_smpl_handler_calls, - cpu, pfm_stats[cpu].pfm_smpl_handler_cycles, - cpu, pfm_stats[cpu].pfm_spurious_ovfl_intr_count, - cpu, pfm_stats[cpu].pfm_replay_ovfl_intr_count, - cpu, pfm_get_cpu_data(pfm_syst_info, cpu) & PFM_CPUINFO_SYST_WIDE ? 1 : 0, - cpu, pfm_get_cpu_data(pfm_syst_info, cpu) & PFM_CPUINFO_DCR_PP ? 1 : 0, - cpu, pfm_get_cpu_data(pfm_syst_info, cpu) & PFM_CPUINFO_EXCL_IDLE ? 1 : 0, - cpu, pfm_get_cpu_data(pmu_owner, cpu) ? pfm_get_cpu_data(pmu_owner, cpu)->pid: -1, - cpu, pfm_get_cpu_data(pmu_ctx, cpu), - cpu, pfm_get_cpu_data(pmu_activation_number, cpu)); - - if (num_online_cpus() == 1 && pfm_sysctl.debug > 0) { - - psr = pfm_get_psr(); - - ia64_srlz_d(); - - seq_printf(m, - "CPU%-2d psr : 0x%lx\n" - "CPU%-2d pmc0 : 0x%lx\n", - cpu, psr, - cpu, ia64_get_pmc(0)); - - for (i=0; PMC_IS_LAST(i) == 0; i++) { - if (PMC_IS_COUNTING(i) == 0) continue; - seq_printf(m, - "CPU%-2d pmc%u : 0x%lx\n" - "CPU%-2d pmd%u : 0x%lx\n", - cpu, i, ia64_get_pmc(i), - cpu, i, ia64_get_pmd(i)); - } - } - return 0; -} - -struct seq_operations pfm_seq_ops = { - .start = pfm_proc_start, - .next = pfm_proc_next, - .stop = pfm_proc_stop, - .show = pfm_proc_show -}; - -static int -pfm_proc_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &pfm_seq_ops); -} - - -/* - * we come here as soon as local_cpu_data->pfm_syst_wide is set. this happens - * during pfm_enable() hence before pfm_start(). We cannot assume monitoring - * is active or inactive based on mode. We must rely on the value in - * local_cpu_data->pfm_syst_info - */ -void -pfm_syst_wide_update_task(struct task_struct *task, unsigned long info, int is_ctxswin) -{ - struct pt_regs *regs; - unsigned long dcr; - unsigned long dcr_pp; - - dcr_pp = info & PFM_CPUINFO_DCR_PP ? 1 : 0; - - /* - * pid 0 is guaranteed to be the idle task. There is one such task with pid 0 - * on every CPU, so we can rely on the pid to identify the idle task. - */ - if ((info & PFM_CPUINFO_EXCL_IDLE) == 0 || task->pid) { - regs = task_pt_regs(task); - ia64_psr(regs)->pp = is_ctxswin ? dcr_pp : 0; - return; - } - /* - * if monitoring has started - */ - if (dcr_pp) { - dcr = ia64_getreg(_IA64_REG_CR_DCR); - /* - * context switching in? - */ - if (is_ctxswin) { - /* mask monitoring for the idle task */ - ia64_setreg(_IA64_REG_CR_DCR, dcr & ~IA64_DCR_PP); - pfm_clear_psr_pp(); - ia64_srlz_i(); - return; - } - /* - * context switching out - * restore monitoring for next task - * - * Due to inlining this odd if-then-else construction generates - * better code. - */ - ia64_setreg(_IA64_REG_CR_DCR, dcr |IA64_DCR_PP); - pfm_set_psr_pp(); - ia64_srlz_i(); - } -} - -#ifdef CONFIG_SMP - -static void -pfm_force_cleanup(pfm_context_t *ctx, struct pt_regs *regs) -{ - struct task_struct *task = ctx->ctx_task; - - ia64_psr(regs)->up = 0; - ia64_psr(regs)->sp = 1; - - if (GET_PMU_OWNER() == task) { - DPRINT(("cleared ownership for [%d]\n", ctx->ctx_task->pid)); - SET_PMU_OWNER(NULL, NULL); - } - - /* - * disconnect the task from the context and vice-versa - */ - PFM_SET_WORK_PENDING(task, 0); - - task->thread.pfm_context = NULL; - task->thread.flags &= ~IA64_THREAD_PM_VALID; - - DPRINT(("force cleanup for [%d]\n", task->pid)); -} - - -/* - * in 2.6, interrupts are masked when we come here and the runqueue lock is held - */ -void -pfm_save_regs(struct task_struct *task) -{ - pfm_context_t *ctx; - unsigned long flags; - u64 psr; - - - ctx = PFM_GET_CTX(task); - if (ctx == NULL) return; - - /* - * we always come here with interrupts ALREADY disabled by - * the scheduler. So we simply need to protect against concurrent - * access, not CPU concurrency. - */ - flags = pfm_protect_ctx_ctxsw(ctx); - - if (ctx->ctx_state == PFM_CTX_ZOMBIE) { - struct pt_regs *regs = task_pt_regs(task); - - pfm_clear_psr_up(); - - pfm_force_cleanup(ctx, regs); - - BUG_ON(ctx->ctx_smpl_hdr); - - pfm_unprotect_ctx_ctxsw(ctx, flags); - - pfm_context_free(ctx); - return; - } - - /* - * save current PSR: needed because we modify it - */ - ia64_srlz_d(); - psr = pfm_get_psr(); - - BUG_ON(psr & (IA64_PSR_I)); - - /* - * stop monitoring: - * This is the last instruction which may generate an overflow - * - * We do not need to set psr.sp because, it is irrelevant in kernel. - * It will be restored from ipsr when going back to user level - */ - pfm_clear_psr_up(); - - /* - * keep a copy of psr.up (for reload) - */ - ctx->ctx_saved_psr_up = psr & IA64_PSR_UP; - - /* - * release ownership of this PMU. - * PM interrupts are masked, so nothing - * can happen. - */ - SET_PMU_OWNER(NULL, NULL); - - /* - * we systematically save the PMD as we have no - * guarantee we will be schedule at that same - * CPU again. - */ - pfm_save_pmds(ctx->th_pmds, ctx->ctx_used_pmds[0]); - - /* - * save pmc0 ia64_srlz_d() done in pfm_save_pmds() - * we will need it on the restore path to check - * for pending overflow. - */ - ctx->th_pmcs[0] = ia64_get_pmc(0); - - /* - * unfreeze PMU if had pending overflows - */ - if (ctx->th_pmcs[0] & ~0x1UL) pfm_unfreeze_pmu(); - - /* - * finally, allow context access. - * interrupts will still be masked after this call. - */ - pfm_unprotect_ctx_ctxsw(ctx, flags); -} - -#else /* !CONFIG_SMP */ -void -pfm_save_regs(struct task_struct *task) -{ - pfm_context_t *ctx; - u64 psr; - - ctx = PFM_GET_CTX(task); - if (ctx == NULL) return; - - /* - * save current PSR: needed because we modify it - */ - psr = pfm_get_psr(); - - BUG_ON(psr & (IA64_PSR_I)); - - /* - * stop monitoring: - * This is the last instruction which may generate an overflow - * - * We do not need to set psr.sp because, it is irrelevant in kernel. - * It will be restored from ipsr when going back to user level - */ - pfm_clear_psr_up(); - - /* - * keep a copy of psr.up (for reload) - */ - ctx->ctx_saved_psr_up = psr & IA64_PSR_UP; -} - -static void -pfm_lazy_save_regs (struct task_struct *task) -{ - pfm_context_t *ctx; - unsigned long flags; - - { u64 psr = pfm_get_psr(); - BUG_ON(psr & IA64_PSR_UP); - } - - ctx = PFM_GET_CTX(task); - - /* - * we need to mask PMU overflow here to - * make sure that we maintain pmc0 until - * we save it. overflow interrupts are - * treated as spurious if there is no - * owner. - * - * XXX: I don't think this is necessary - */ - PROTECT_CTX(ctx,flags); - - /* - * release ownership of this PMU. - * must be done before we save the registers. - * - * after this call any PMU interrupt is treated - * as spurious. - */ - SET_PMU_OWNER(NULL, NULL); - - /* - * save all the pmds we use - */ - pfm_save_pmds(ctx->th_pmds, ctx->ctx_used_pmds[0]); - - /* - * save pmc0 ia64_srlz_d() done in pfm_save_pmds() - * it is needed to check for pended overflow - * on the restore path - */ - ctx->th_pmcs[0] = ia64_get_pmc(0); - - /* - * unfreeze PMU if had pending overflows - */ - if (ctx->th_pmcs[0] & ~0x1UL) pfm_unfreeze_pmu(); - - /* - * now get can unmask PMU interrupts, they will - * be treated as purely spurious and we will not - * lose any information - */ - UNPROTECT_CTX(ctx,flags); -} -#endif /* CONFIG_SMP */ - -#ifdef CONFIG_SMP -/* - * in 2.6, interrupts are masked when we come here and the runqueue lock is held - */ -void -pfm_load_regs (struct task_struct *task) -{ - pfm_context_t *ctx; - unsigned long pmc_mask = 0UL, pmd_mask = 0UL; - unsigned long flags; - u64 psr, psr_up; - int need_irq_resend; - - ctx = PFM_GET_CTX(task); - if (unlikely(ctx == NULL)) return; - - BUG_ON(GET_PMU_OWNER()); - - /* - * possible on unload - */ - if (unlikely((task->thread.flags & IA64_THREAD_PM_VALID) == 0)) return; - - /* - * we always come here with interrupts ALREADY disabled by - * the scheduler. So we simply need to protect against concurrent - * access, not CPU concurrency. - */ - flags = pfm_protect_ctx_ctxsw(ctx); - psr = pfm_get_psr(); - - need_irq_resend = pmu_conf->flags & PFM_PMU_IRQ_RESEND; - - BUG_ON(psr & (IA64_PSR_UP|IA64_PSR_PP)); - BUG_ON(psr & IA64_PSR_I); - - if (unlikely(ctx->ctx_state == PFM_CTX_ZOMBIE)) { - struct pt_regs *regs = task_pt_regs(task); - - BUG_ON(ctx->ctx_smpl_hdr); - - pfm_force_cleanup(ctx, regs); - - pfm_unprotect_ctx_ctxsw(ctx, flags); - - /* - * this one (kmalloc'ed) is fine with interrupts disabled - */ - pfm_context_free(ctx); - - return; - } - - /* - * we restore ALL the debug registers to avoid picking up - * stale state. - */ - if (ctx->ctx_fl_using_dbreg) { - pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs); - pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs); - } - /* - * retrieve saved psr.up - */ - psr_up = ctx->ctx_saved_psr_up; - - /* - * if we were the last user of the PMU on that CPU, - * then nothing to do except restore psr - */ - if (GET_LAST_CPU(ctx) == smp_processor_id() && ctx->ctx_last_activation == GET_ACTIVATION()) { - - /* - * retrieve partial reload masks (due to user modifications) - */ - pmc_mask = ctx->ctx_reload_pmcs[0]; - pmd_mask = ctx->ctx_reload_pmds[0]; - - } else { - /* - * To avoid leaking information to the user level when psr.sp=0, - * we must reload ALL implemented pmds (even the ones we don't use). - * In the kernel we only allow PFM_READ_PMDS on registers which - * we initialized or requested (sampling) so there is no risk there. - */ - pmd_mask = pfm_sysctl.fastctxsw ? ctx->ctx_used_pmds[0] : ctx->ctx_all_pmds[0]; - - /* - * ALL accessible PMCs are systematically reloaded, unused registers - * get their default (from pfm_reset_pmu_state()) values to avoid picking - * up stale configuration. - * - * PMC0 is never in the mask. It is always restored separately. - */ - pmc_mask = ctx->ctx_all_pmcs[0]; - } - /* - * when context is MASKED, we will restore PMC with plm=0 - * and PMD with stale information, but that's ok, nothing - * will be captured. - * - * XXX: optimize here - */ - if (pmd_mask) pfm_restore_pmds(ctx->th_pmds, pmd_mask); - if (pmc_mask) pfm_restore_pmcs(ctx->th_pmcs, pmc_mask); - - /* - * check for pending overflow at the time the state - * was saved. - */ - if (unlikely(PMC0_HAS_OVFL(ctx->th_pmcs[0]))) { - /* - * reload pmc0 with the overflow information - * On McKinley PMU, this will trigger a PMU interrupt - */ - ia64_set_pmc(0, ctx->th_pmcs[0]); - ia64_srlz_d(); - ctx->th_pmcs[0] = 0UL; - - /* - * will replay the PMU interrupt - */ - if (need_irq_resend) ia64_resend_irq(IA64_PERFMON_VECTOR); - - pfm_stats[smp_processor_id()].pfm_replay_ovfl_intr_count++; - } - - /* - * we just did a reload, so we reset the partial reload fields - */ - ctx->ctx_reload_pmcs[0] = 0UL; - ctx->ctx_reload_pmds[0] = 0UL; - - SET_LAST_CPU(ctx, smp_processor_id()); - - /* - * dump activation value for this PMU - */ - INC_ACTIVATION(); - /* - * record current activation for this context - */ - SET_ACTIVATION(ctx); - - /* - * establish new ownership. - */ - SET_PMU_OWNER(task, ctx); - - /* - * restore the psr.up bit. measurement - * is active again. - * no PMU interrupt can happen at this point - * because we still have interrupts disabled. - */ - if (likely(psr_up)) pfm_set_psr_up(); - - /* - * allow concurrent access to context - */ - pfm_unprotect_ctx_ctxsw(ctx, flags); -} -#else /* !CONFIG_SMP */ -/* - * reload PMU state for UP kernels - * in 2.5 we come here with interrupts disabled - */ -void -pfm_load_regs (struct task_struct *task) -{ - pfm_context_t *ctx; - struct task_struct *owner; - unsigned long pmd_mask, pmc_mask; - u64 psr, psr_up; - int need_irq_resend; - - owner = GET_PMU_OWNER(); - ctx = PFM_GET_CTX(task); - psr = pfm_get_psr(); - - BUG_ON(psr & (IA64_PSR_UP|IA64_PSR_PP)); - BUG_ON(psr & IA64_PSR_I); - - /* - * we restore ALL the debug registers to avoid picking up - * stale state. - * - * This must be done even when the task is still the owner - * as the registers may have been modified via ptrace() - * (not perfmon) by the previous task. - */ - if (ctx->ctx_fl_using_dbreg) { - pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs); - pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs); - } - - /* - * retrieved saved psr.up - */ - psr_up = ctx->ctx_saved_psr_up; - need_irq_resend = pmu_conf->flags & PFM_PMU_IRQ_RESEND; - - /* - * short path, our state is still there, just - * need to restore psr and we go - * - * we do not touch either PMC nor PMD. the psr is not touched - * by the overflow_handler. So we are safe w.r.t. to interrupt - * concurrency even without interrupt masking. - */ - if (likely(owner == task)) { - if (likely(psr_up)) pfm_set_psr_up(); - return; - } - - /* - * someone else is still using the PMU, first push it out and - * then we'll be able to install our stuff ! - * - * Upon return, there will be no owner for the current PMU - */ - if (owner) pfm_lazy_save_regs(owner); - - /* - * To avoid leaking information to the user level when psr.sp=0, - * we must reload ALL implemented pmds (even the ones we don't use). - * In the kernel we only allow PFM_READ_PMDS on registers which - * we initialized or requested (sampling) so there is no risk there. - */ - pmd_mask = pfm_sysctl.fastctxsw ? ctx->ctx_used_pmds[0] : ctx->ctx_all_pmds[0]; - - /* - * ALL accessible PMCs are systematically reloaded, unused registers - * get their default (from pfm_reset_pmu_state()) values to avoid picking - * up stale configuration. - * - * PMC0 is never in the mask. It is always restored separately - */ - pmc_mask = ctx->ctx_all_pmcs[0]; - - pfm_restore_pmds(ctx->th_pmds, pmd_mask); - pfm_restore_pmcs(ctx->th_pmcs, pmc_mask); - - /* - * check for pending overflow at the time the state - * was saved. - */ - if (unlikely(PMC0_HAS_OVFL(ctx->th_pmcs[0]))) { - /* - * reload pmc0 with the overflow information - * On McKinley PMU, this will trigger a PMU interrupt - */ - ia64_set_pmc(0, ctx->th_pmcs[0]); - ia64_srlz_d(); - - ctx->th_pmcs[0] = 0UL; - - /* - * will replay the PMU interrupt - */ - if (need_irq_resend) ia64_resend_irq(IA64_PERFMON_VECTOR); - - pfm_stats[smp_processor_id()].pfm_replay_ovfl_intr_count++; - } - - /* - * establish new ownership. - */ - SET_PMU_OWNER(task, ctx); - - /* - * restore the psr.up bit. measurement - * is active again. - * no PMU interrupt can happen at this point - * because we still have interrupts disabled. - */ - if (likely(psr_up)) pfm_set_psr_up(); -} -#endif /* CONFIG_SMP */ - -/* - * this function assumes monitoring is stopped - */ -static void -pfm_flush_pmds(struct task_struct *task, pfm_context_t *ctx) -{ - u64 pmc0; - unsigned long mask2, val, pmd_val, ovfl_val; - int i, can_access_pmu = 0; - int is_self; - - /* - * is the caller the task being monitored (or which initiated the - * session for system wide measurements) - */ - is_self = ctx->ctx_task == task ? 1 : 0; - - /* - * can access PMU is task is the owner of the PMU state on the current CPU - * or if we are running on the CPU bound to the context in system-wide mode - * (that is not necessarily the task the context is attached to in this mode). - * In system-wide we always have can_access_pmu true because a task running on an - * invalid processor is flagged earlier in the call stack (see pfm_stop). - */ - can_access_pmu = (GET_PMU_OWNER() == task) || (ctx->ctx_fl_system && ctx->ctx_cpu == smp_processor_id()); - if (can_access_pmu) { - /* - * Mark the PMU as not owned - * This will cause the interrupt handler to do nothing in case an overflow - * interrupt was in-flight - * This also guarantees that pmc0 will contain the final state - * It virtually gives us full control on overflow processing from that point - * on. - */ - SET_PMU_OWNER(NULL, NULL); - DPRINT(("releasing ownership\n")); - - /* - * read current overflow status: - * - * we are guaranteed to read the final stable state - */ - ia64_srlz_d(); - pmc0 = ia64_get_pmc(0); /* slow */ - - /* - * reset freeze bit, overflow status information destroyed - */ - pfm_unfreeze_pmu(); - } else { - pmc0 = ctx->th_pmcs[0]; - /* - * clear whatever overflow status bits there were - */ - ctx->th_pmcs[0] = 0; - } - ovfl_val = pmu_conf->ovfl_val; - /* - * we save all the used pmds - * we take care of overflows for counting PMDs - * - * XXX: sampling situation is not taken into account here - */ - mask2 = ctx->ctx_used_pmds[0]; - - DPRINT(("is_self=%d ovfl_val=0x%lx mask2=0x%lx\n", is_self, ovfl_val, mask2)); - - for (i = 0; mask2; i++, mask2>>=1) { - - /* skip non used pmds */ - if ((mask2 & 0x1) == 0) continue; - - /* - * can access PMU always true in system wide mode - */ - val = pmd_val = can_access_pmu ? ia64_get_pmd(i) : ctx->th_pmds[i]; - - if (PMD_IS_COUNTING(i)) { - DPRINT(("[%d] pmd[%d] ctx_pmd=0x%lx hw_pmd=0x%lx\n", - task->pid, - i, - ctx->ctx_pmds[i].val, - val & ovfl_val)); - - /* - * we rebuild the full 64 bit value of the counter - */ - val = ctx->ctx_pmds[i].val + (val & ovfl_val); - - /* - * now everything is in ctx_pmds[] and we need - * to clear the saved context from save_regs() such that - * pfm_read_pmds() gets the correct value - */ - pmd_val = 0UL; - - /* - * take care of overflow inline - */ - if (pmc0 & (1UL << i)) { - val += 1 + ovfl_val; - DPRINT(("[%d] pmd[%d] overflowed\n", task->pid, i)); - } - } - - DPRINT(("[%d] ctx_pmd[%d]=0x%lx pmd_val=0x%lx\n", task->pid, i, val, pmd_val)); - - if (is_self) ctx->th_pmds[i] = pmd_val; - - ctx->ctx_pmds[i].val = val; - } -} - -static struct irqaction perfmon_irqaction = { - .handler = pfm_interrupt_handler, - .flags = IRQF_DISABLED, - .name = "perfmon" -}; - -static void -pfm_alt_save_pmu_state(void *data) -{ - struct pt_regs *regs; - - regs = task_pt_regs(current); - - DPRINT(("called\n")); - - /* - * should not be necessary but - * let's take not risk - */ - pfm_clear_psr_up(); - pfm_clear_psr_pp(); - ia64_psr(regs)->pp = 0; - - /* - * This call is required - * May cause a spurious interrupt on some processors - */ - pfm_freeze_pmu(); - - ia64_srlz_d(); -} - -void -pfm_alt_restore_pmu_state(void *data) -{ - struct pt_regs *regs; - - regs = task_pt_regs(current); - - DPRINT(("called\n")); - - /* - * put PMU back in state expected - * by perfmon - */ - pfm_clear_psr_up(); - pfm_clear_psr_pp(); - ia64_psr(regs)->pp = 0; - - /* - * perfmon runs with PMU unfrozen at all times - */ - pfm_unfreeze_pmu(); - - ia64_srlz_d(); -} - -int -pfm_install_alt_pmu_interrupt(pfm_intr_handler_desc_t *hdl) -{ - int ret, i; - int reserve_cpu; - - /* some sanity checks */ - if (hdl == NULL || hdl->handler == NULL) return -EINVAL; - - /* do the easy test first */ - if (pfm_alt_intr_handler) return -EBUSY; - - /* one at a time in the install or remove, just fail the others */ - if (!spin_trylock(&pfm_alt_install_check)) { - return -EBUSY; - } - - /* reserve our session */ - for_each_online_cpu(reserve_cpu) { - ret = pfm_reserve_session(NULL, 1, reserve_cpu); - if (ret) goto cleanup_reserve; - } - - /* save the current system wide pmu states */ - ret = on_each_cpu(pfm_alt_save_pmu_state, NULL, 0, 1); - if (ret) { - DPRINT(("on_each_cpu() failed: %d\n", ret)); - goto cleanup_reserve; - } - - /* officially change to the alternate interrupt handler */ - pfm_alt_intr_handler = hdl; - - spin_unlock(&pfm_alt_install_check); - - return 0; - -cleanup_reserve: - for_each_online_cpu(i) { - /* don't unreserve more than we reserved */ - if (i >= reserve_cpu) break; - - pfm_unreserve_session(NULL, 1, i); - } - - spin_unlock(&pfm_alt_install_check); - - return ret; -} -EXPORT_SYMBOL_GPL(pfm_install_alt_pmu_interrupt); - -int -pfm_remove_alt_pmu_interrupt(pfm_intr_handler_desc_t *hdl) -{ - int i; - int ret; - - if (hdl == NULL) return -EINVAL; - - /* cannot remove someone else's handler! */ - if (pfm_alt_intr_handler != hdl) return -EINVAL; - - /* one at a time in the install or remove, just fail the others */ - if (!spin_trylock(&pfm_alt_install_check)) { - return -EBUSY; - } - - pfm_alt_intr_handler = NULL; - - ret = on_each_cpu(pfm_alt_restore_pmu_state, NULL, 0, 1); - if (ret) { - DPRINT(("on_each_cpu() failed: %d\n", ret)); - } - - for_each_online_cpu(i) { - pfm_unreserve_session(NULL, 1, i); - } - - spin_unlock(&pfm_alt_install_check); - - return 0; -} -EXPORT_SYMBOL_GPL(pfm_remove_alt_pmu_interrupt); - -/* - * perfmon initialization routine, called from the initcall() table - */ -static int init_pfm_fs(void); - -static int __init -pfm_probe_pmu(void) -{ - pmu_config_t **p; - int family; - - family = local_cpu_data->family; - p = pmu_confs; - - while(*p) { - if ((*p)->probe) { - if ((*p)->probe() == 0) goto found; - } else if ((*p)->pmu_family == family || (*p)->pmu_family == 0xff) { - goto found; - } - p++; - } - return -1; -found: - pmu_conf = *p; - return 0; -} - -static const struct file_operations pfm_proc_fops = { - .open = pfm_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -int __init -pfm_init(void) -{ - unsigned int n, n_counters, i; - - printk("perfmon: version %u.%u IRQ %u\n", - PFM_VERSION_MAJ, - PFM_VERSION_MIN, - IA64_PERFMON_VECTOR); - - if (pfm_probe_pmu()) { - printk(KERN_INFO "perfmon: disabled, there is no support for processor family %d\n", - local_cpu_data->family); - return -ENODEV; - } - - /* - * compute the number of implemented PMD/PMC from the - * description tables - */ - n = 0; - for (i=0; PMC_IS_LAST(i) == 0; i++) { - if (PMC_IS_IMPL(i) == 0) continue; - pmu_conf->impl_pmcs[i>>6] |= 1UL << (i&63); - n++; - } - pmu_conf->num_pmcs = n; - - n = 0; n_counters = 0; - for (i=0; PMD_IS_LAST(i) == 0; i++) { - if (PMD_IS_IMPL(i) == 0) continue; - pmu_conf->impl_pmds[i>>6] |= 1UL << (i&63); - n++; - if (PMD_IS_COUNTING(i)) n_counters++; - } - pmu_conf->num_pmds = n; - pmu_conf->num_counters = n_counters; - - /* - * sanity checks on the number of debug registers - */ - if (pmu_conf->use_rr_dbregs) { - if (pmu_conf->num_ibrs > IA64_NUM_DBG_REGS) { - printk(KERN_INFO "perfmon: unsupported number of code debug registers (%u)\n", pmu_conf->num_ibrs); - pmu_conf = NULL; - return -1; - } - if (pmu_conf->num_dbrs > IA64_NUM_DBG_REGS) { - printk(KERN_INFO "perfmon: unsupported number of data debug registers (%u)\n", pmu_conf->num_ibrs); - pmu_conf = NULL; - return -1; - } - } - - printk("perfmon: %s PMU detected, %u PMCs, %u PMDs, %u counters (%lu bits)\n", - pmu_conf->pmu_name, - pmu_conf->num_pmcs, - pmu_conf->num_pmds, - pmu_conf->num_counters, - ffz(pmu_conf->ovfl_val)); - - /* sanity check */ - if (pmu_conf->num_pmds >= PFM_NUM_PMD_REGS || pmu_conf->num_pmcs >= PFM_NUM_PMC_REGS) { - printk(KERN_ERR "perfmon: not enough pmc/pmd, perfmon disabled\n"); - pmu_conf = NULL; - return -1; - } - - /* - * create /proc/perfmon (mostly for debugging purposes) - */ - perfmon_dir = create_proc_entry("perfmon", S_IRUGO, NULL); - if (perfmon_dir == NULL) { - printk(KERN_ERR "perfmon: cannot create /proc entry, perfmon disabled\n"); - pmu_conf = NULL; - return -1; - } - /* - * install customized file operations for /proc/perfmon entry - */ - perfmon_dir->proc_fops = &pfm_proc_fops; - - /* - * create /proc/sys/kernel/perfmon (for debugging purposes) - */ - pfm_sysctl_header = register_sysctl_table(pfm_sysctl_root); - - /* - * initialize all our spinlocks - */ - spin_lock_init(&pfm_sessions.pfs_lock); - spin_lock_init(&pfm_buffer_fmt_lock); - - init_pfm_fs(); - - for(i=0; i < NR_CPUS; i++) pfm_stats[i].pfm_ovfl_intr_cycles_min = ~0UL; - - return 0; -} - -__initcall(pfm_init); - -/* - * this function is called before pfm_init() - */ -void -pfm_init_percpu (void) -{ - static int first_time=1; - /* - * make sure no measurement is active - * (may inherit programmed PMCs from EFI). - */ - pfm_clear_psr_pp(); - pfm_clear_psr_up(); - - /* - * we run with the PMU not frozen at all times - */ - pfm_unfreeze_pmu(); - - if (first_time) { - register_percpu_irq(IA64_PERFMON_VECTOR, &perfmon_irqaction); - first_time=0; - } - - ia64_setreg(_IA64_REG_CR_PMV, IA64_PERFMON_VECTOR); - ia64_srlz_d(); -} - -/* - * used for debug purposes only - */ -void -dump_pmu_state(const char *from) -{ - struct task_struct *task; - struct pt_regs *regs; - pfm_context_t *ctx; - unsigned long psr, dcr, info, flags; - int i, this_cpu; - - local_irq_save(flags); - - this_cpu = smp_processor_id(); - regs = task_pt_regs(current); - info = PFM_CPUINFO_GET(); - dcr = ia64_getreg(_IA64_REG_CR_DCR); - - if (info == 0 && ia64_psr(regs)->pp == 0 && (dcr & IA64_DCR_PP) == 0) { - local_irq_restore(flags); - return; - } - - printk("CPU%d from %s() current [%d] iip=0x%lx %s\n", - this_cpu, - from, - current->pid, - regs->cr_iip, - current->comm); - - task = GET_PMU_OWNER(); - ctx = GET_PMU_CTX(); - - printk("->CPU%d owner [%d] ctx=%p\n", this_cpu, task ? task->pid : -1, ctx); - - psr = pfm_get_psr(); - - printk("->CPU%d pmc0=0x%lx psr.pp=%d psr.up=%d dcr.pp=%d syst_info=0x%lx user_psr.up=%d user_psr.pp=%d\n", - this_cpu, - ia64_get_pmc(0), - psr & IA64_PSR_PP ? 1 : 0, - psr & IA64_PSR_UP ? 1 : 0, - dcr & IA64_DCR_PP ? 1 : 0, - info, - ia64_psr(regs)->up, - ia64_psr(regs)->pp); - - ia64_psr(regs)->up = 0; - ia64_psr(regs)->pp = 0; - - for (i=1; PMC_IS_LAST(i) == 0; i++) { - if (PMC_IS_IMPL(i) == 0) continue; - printk("->CPU%d pmc[%d]=0x%lx thread_pmc[%d]=0x%lx\n", this_cpu, i, ia64_get_pmc(i), i, ctx->th_pmcs[i]); - } - - for (i=1; PMD_IS_LAST(i) == 0; i++) { - if (PMD_IS_IMPL(i) == 0) continue; - printk("->CPU%d pmd[%d]=0x%lx thread_pmd[%d]=0x%lx\n", this_cpu, i, ia64_get_pmd(i), i, ctx->th_pmds[i]); - } - - if (ctx) { - printk("->CPU%d ctx_state=%d vaddr=%p addr=%p fd=%d ctx_task=[%d] saved_psr_up=0x%lx\n", - this_cpu, - ctx->ctx_state, - ctx->ctx_smpl_vaddr, - ctx->ctx_smpl_hdr, - ctx->ctx_msgq_head, - ctx->ctx_msgq_tail, - ctx->ctx_saved_psr_up); - } - local_irq_restore(flags); -} - -/* - * called from process.c:copy_thread(). task is new child. - */ -void -pfm_inherit(struct task_struct *task, struct pt_regs *regs) -{ - struct thread_struct *thread; - - DPRINT(("perfmon: pfm_inherit clearing state for [%d]\n", task->pid)); - - thread = &task->thread; - - /* - * cut links inherited from parent (current) - */ - thread->pfm_context = NULL; - - PFM_SET_WORK_PENDING(task, 0); - - /* - * the psr bits are already set properly in copy_threads() - */ -} -#else /* !CONFIG_PERFMON */ -asmlinkage long -sys_perfmonctl (int fd, int cmd, void *arg, int count) -{ - return -ENOSYS; -} -#endif /* CONFIG_PERFMON */ Index: linux-2.6/arch/ia64/kernel/perfmon_default_smpl.c =================================================================== --- linux-2.6.orig/arch/ia64/kernel/perfmon_default_smpl.c +++ /dev/null @@ -1,296 +0,0 @@ -/* - * Copyright (C) 2002-2003 Hewlett-Packard Co - * Stephane Eranian - * - * This file implements the default sampling buffer format - * for the Linux/ia64 perfmon-2 subsystem. - */ -#include -#include -#include -#include -#include -#include - -#include -#include - -MODULE_AUTHOR("Stephane Eranian "); -MODULE_DESCRIPTION("perfmon default sampling format"); -MODULE_LICENSE("GPL"); - -#define DEFAULT_DEBUG 1 - -#ifdef DEFAULT_DEBUG -#define DPRINT(a) \ - do { \ - if (unlikely(pfm_sysctl.debug >0)) { printk("%s.%d: CPU%d ", __FUNCTION__, __LINE__, smp_processor_id()); printk a; } \ - } while (0) - -#define DPRINT_ovfl(a) \ - do { \ - if (unlikely(pfm_sysctl.debug > 0 && pfm_sysctl.debug_ovfl >0)) { printk("%s.%d: CPU%d ", __FUNCTION__, __LINE__, smp_processor_id()); printk a; } \ - } while (0) - -#else -#define DPRINT(a) -#define DPRINT_ovfl(a) -#endif - -static int -default_validate(struct task_struct *task, unsigned int flags, int cpu, void *data) -{ - pfm_default_smpl_arg_t *arg = (pfm_default_smpl_arg_t*)data; - int ret = 0; - - if (data == NULL) { - DPRINT(("[%d] no argument passed\n", task->pid)); - return -EINVAL; - } - - DPRINT(("[%d] validate flags=0x%x CPU%d\n", task->pid, flags, cpu)); - - /* - * must hold at least the buffer header + one minimally sized entry - */ - if (arg->buf_size < PFM_DEFAULT_SMPL_MIN_BUF_SIZE) return -EINVAL; - - DPRINT(("buf_size=%lu\n", arg->buf_size)); - - return ret; -} - -static int -default_get_size(struct task_struct *task, unsigned int flags, int cpu, void *data, unsigned long *size) -{ - pfm_default_smpl_arg_t *arg = (pfm_default_smpl_arg_t *)data; - - /* - * size has been validated in default_validate - */ - *size = arg->buf_size; - - return 0; -} - -static int -default_init(struct task_struct *task, void *buf, unsigned int flags, int cpu, void *data) -{ - pfm_default_smpl_hdr_t *hdr; - pfm_default_smpl_arg_t *arg = (pfm_default_smpl_arg_t *)data; - - hdr = (pfm_default_smpl_hdr_t *)buf; - - hdr->hdr_version = PFM_DEFAULT_SMPL_VERSION; - hdr->hdr_buf_size = arg->buf_size; - hdr->hdr_cur_offs = sizeof(*hdr); - hdr->hdr_overflows = 0UL; - hdr->hdr_count = 0UL; - - DPRINT(("[%d] buffer=%p buf_size=%lu hdr_size=%lu hdr_version=%u cur_offs=%lu\n", - task->pid, - buf, - hdr->hdr_buf_size, - sizeof(*hdr), - hdr->hdr_version, - hdr->hdr_cur_offs)); - - return 0; -} - -static int -default_handler(struct task_struct *task, void *buf, pfm_ovfl_arg_t *arg, struct pt_regs *regs, unsigned long stamp) -{ - pfm_default_smpl_hdr_t *hdr; - pfm_default_smpl_entry_t *ent; - void *cur, *last; - unsigned long *e, entry_size; - unsigned int npmds, i; - unsigned char ovfl_pmd; - unsigned char ovfl_notify; - - if (unlikely(buf == NULL || arg == NULL|| regs == NULL || task == NULL)) { - DPRINT(("[%d] invalid arguments buf=%p arg=%p\n", task->pid, buf, arg)); - return -EINVAL; - } - - hdr = (pfm_default_smpl_hdr_t *)buf; - cur = buf+hdr->hdr_cur_offs; - last = buf+hdr->hdr_buf_size; - ovfl_pmd = arg->ovfl_pmd; - ovfl_notify = arg->ovfl_notify; - - /* - * precheck for sanity - */ - if ((last - cur) < PFM_DEFAULT_MAX_ENTRY_SIZE) goto full; - - npmds = hweight64(arg->smpl_pmds[0]); - - ent = (pfm_default_smpl_entry_t *)cur; - - prefetch(arg->smpl_pmds_values); - - entry_size = sizeof(*ent) + (npmds << 3); - - /* position for first pmd */ - e = (unsigned long *)(ent+1); - - hdr->hdr_count++; - - DPRINT_ovfl(("[%d] count=%lu cur=%p last=%p free_bytes=%lu ovfl_pmd=%d ovfl_notify=%d npmds=%u\n", - task->pid, - hdr->hdr_count, - cur, last, - last-cur, - ovfl_pmd, - ovfl_notify, npmds)); - - /* - * current = task running at the time of the overflow. - * - * per-task mode: - * - this is ususally the task being monitored. - * Under certain conditions, it might be a different task - * - * system-wide: - * - this is not necessarily the task controlling the session - */ - ent->pid = current->pid; - ent->ovfl_pmd = ovfl_pmd; - ent->last_reset_val = arg->pmd_last_reset; //pmd[0].reg_last_reset_val; - - /* - * where did the fault happen (includes slot number) - */ - ent->ip = regs->cr_iip | ((regs->cr_ipsr >> 41) & 0x3); - - ent->tstamp = stamp; - ent->cpu = smp_processor_id(); - ent->set = arg->active_set; - ent->tgid = current->tgid; - - /* - * selectively store PMDs in increasing index number - */ - if (npmds) { - unsigned long *val = arg->smpl_pmds_values; - for(i=0; i < npmds; i++) { - *e++ = *val++; - } - } - - /* - * update position for next entry - */ - hdr->hdr_cur_offs += entry_size; - cur += entry_size; - - /* - * post check to avoid losing the last sample - */ - if ((last - cur) < PFM_DEFAULT_MAX_ENTRY_SIZE) goto full; - - /* - * keep same ovfl_pmds, ovfl_notify - */ - arg->ovfl_ctrl.bits.notify_user = 0; - arg->ovfl_ctrl.bits.block_task = 0; - arg->ovfl_ctrl.bits.mask_monitoring = 0; - arg->ovfl_ctrl.bits.reset_ovfl_pmds = 1; /* reset before returning from interrupt handler */ - - return 0; -full: - DPRINT_ovfl(("sampling buffer full free=%lu, count=%lu, ovfl_notify=%d\n", last-cur, hdr->hdr_count, ovfl_notify)); - - /* - * increment number of buffer overflow. - * important to detect duplicate set of samples. - */ - hdr->hdr_overflows++; - - /* - * if no notification requested, then we saturate the buffer - */ - if (ovfl_notify == 0) { - arg->ovfl_ctrl.bits.notify_user = 0; - arg->ovfl_ctrl.bits.block_task = 0; - arg->ovfl_ctrl.bits.mask_monitoring = 1; - arg->ovfl_ctrl.bits.reset_ovfl_pmds = 0; - } else { - arg->ovfl_ctrl.bits.notify_user = 1; - arg->ovfl_ctrl.bits.block_task = 1; /* ignored for non-blocking context */ - arg->ovfl_ctrl.bits.mask_monitoring = 1; - arg->ovfl_ctrl.bits.reset_ovfl_pmds = 0; /* no reset now */ - } - return -1; /* we are full, sorry */ -} - -static int -default_restart(struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs) -{ - pfm_default_smpl_hdr_t *hdr; - - hdr = (pfm_default_smpl_hdr_t *)buf; - - hdr->hdr_count = 0UL; - hdr->hdr_cur_offs = sizeof(*hdr); - - ctrl->bits.mask_monitoring = 0; - ctrl->bits.reset_ovfl_pmds = 1; /* uses long-reset values */ - - return 0; -} - -static int -default_exit(struct task_struct *task, void *buf, struct pt_regs *regs) -{ - DPRINT(("[%d] exit(%p)\n", task->pid, buf)); - return 0; -} - -static pfm_buffer_fmt_t default_fmt={ - .fmt_name = "default_format", - .fmt_uuid = PFM_DEFAULT_SMPL_UUID, - .fmt_arg_size = sizeof(pfm_default_smpl_arg_t), - .fmt_validate = default_validate, - .fmt_getsize = default_get_size, - .fmt_init = default_init, - .fmt_handler = default_handler, - .fmt_restart = default_restart, - .fmt_restart_active = default_restart, - .fmt_exit = default_exit, -}; - -static int __init -pfm_default_smpl_init_module(void) -{ - int ret; - - ret = pfm_register_buffer_fmt(&default_fmt); - if (ret == 0) { - printk("perfmon_default_smpl: %s v%u.%u registered\n", - default_fmt.fmt_name, - PFM_DEFAULT_SMPL_VERSION_MAJ, - PFM_DEFAULT_SMPL_VERSION_MIN); - } else { - printk("perfmon_default_smpl: %s cannot register ret=%d\n", - default_fmt.fmt_name, - ret); - } - - return ret; -} - -static void __exit -pfm_default_smpl_cleanup_module(void) -{ - int ret; - ret = pfm_unregister_buffer_fmt(default_fmt.fmt_uuid); - - printk("perfmon_default_smpl: unregister %s=%d\n", default_fmt.fmt_name, ret); -} - -module_init(pfm_default_smpl_init_module); -module_exit(pfm_default_smpl_cleanup_module); - Index: linux-2.6/arch/ia64/kernel/perfmon_generic.h =================================================================== --- linux-2.6.orig/arch/ia64/kernel/perfmon_generic.h +++ /dev/null @@ -1,45 +0,0 @@ -/* - * This file contains the generic PMU register description tables - * and pmc checker used by perfmon.c. - * - * Copyright (C) 2002-2003 Hewlett Packard Co - * Stephane Eranian - */ - -static pfm_reg_desc_t pfm_gen_pmc_desc[PMU_MAX_PMCS]={ -/* pmc0 */ { PFM_REG_CONTROL , 0, 0x1UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, -/* pmc1 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, -/* pmc2 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, -/* pmc3 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, -/* pmc4 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(4),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, -/* pmc5 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(5),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, -/* pmc6 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(6),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, -/* pmc7 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(7),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, - { PFM_REG_END , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */ -}; - -static pfm_reg_desc_t pfm_gen_pmd_desc[PMU_MAX_PMDS]={ -/* pmd0 */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, -/* pmd1 */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, -/* pmd2 */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, -/* pmd3 */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, -/* pmd4 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(4),0UL, 0UL, 0UL}}, -/* pmd5 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(5),0UL, 0UL, 0UL}}, -/* pmd6 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(6),0UL, 0UL, 0UL}}, -/* pmd7 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(7),0UL, 0UL, 0UL}}, - { PFM_REG_END , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */ -}; - -/* - * impl_pmcs, impl_pmds are computed at runtime to minimize errors! - */ -static pmu_config_t pmu_conf_gen={ - .pmu_name = "Generic", - .pmu_family = 0xff, /* any */ - .ovfl_val = (1UL << 32) - 1, - .num_ibrs = 0, /* does not use */ - .num_dbrs = 0, /* does not use */ - .pmd_desc = pfm_gen_pmd_desc, - .pmc_desc = pfm_gen_pmc_desc -}; - Index: linux-2.6/arch/ia64/kernel/perfmon_itanium.h =================================================================== --- linux-2.6.orig/arch/ia64/kernel/perfmon_itanium.h +++ /dev/null @@ -1,115 +0,0 @@ -/* - * This file contains the Itanium PMU register description tables - * and pmc checker used by perfmon.c. - * - * Copyright (C) 2002-2003 Hewlett Packard Co - * Stephane Eranian - */ -static int pfm_ita_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs); - -static pfm_reg_desc_t pfm_ita_pmc_desc[PMU_MAX_PMCS]={ -/* pmc0 */ { PFM_REG_CONTROL , 0, 0x1UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, -/* pmc1 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, -/* pmc2 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, -/* pmc3 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, -/* pmc4 */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(4),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, -/* pmc5 */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(5),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, -/* pmc6 */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(6),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, -/* pmc7 */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(7),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, -/* pmc8 */ { PFM_REG_CONFIG , 0, 0xf00000003ffffff8UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, -/* pmc9 */ { PFM_REG_CONFIG , 0, 0xf00000003ffffff8UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, -/* pmc10 */ { PFM_REG_MONITOR , 6, 0x0UL, -1UL, NULL, NULL, {RDEP(0)|RDEP(1),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, -/* pmc11 */ { PFM_REG_MONITOR , 6, 0x0000000010000000UL, -1UL, NULL, pfm_ita_pmc_check, {RDEP(2)|RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, -/* pmc12 */ { PFM_REG_MONITOR , 6, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, -/* pmc13 */ { PFM_REG_CONFIG , 0, 0x0003ffff00000001UL, -1UL, NULL, pfm_ita_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, - { PFM_REG_END , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */ -}; - -static pfm_reg_desc_t pfm_ita_pmd_desc[PMU_MAX_PMDS]={ -/* pmd0 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(1),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}}, -/* pmd1 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(0),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}}, -/* pmd2 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}}, -/* pmd3 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}}, -/* pmd4 */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(4),0UL, 0UL, 0UL}}, -/* pmd5 */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(5),0UL, 0UL, 0UL}}, -/* pmd6 */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(6),0UL, 0UL, 0UL}}, -/* pmd7 */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(7),0UL, 0UL, 0UL}}, -/* pmd8 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, -/* pmd9 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, -/* pmd10 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, -/* pmd11 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, -/* pmd12 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, -/* pmd13 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, -/* pmd14 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, -/* pmd15 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, -/* pmd16 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, -/* pmd17 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(3),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}}, - { PFM_REG_END , 0, 0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */ -}; - -static int -pfm_ita_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs) -{ - int ret; - int is_loaded; - - /* sanitfy check */ - if (ctx == NULL) return -EINVAL; - - is_loaded = ctx->ctx_state == PFM_CTX_LOADED || ctx->ctx_state == PFM_CTX_MASKED; - - /* - * we must clear the (instruction) debug registers if pmc13.ta bit is cleared - * before they are written (fl_using_dbreg==0) to avoid picking up stale information. - */ - if (cnum == 13 && is_loaded && ((*val & 0x1) == 0UL) && ctx->ctx_fl_using_dbreg == 0) { - - DPRINT(("pmc[%d]=0x%lx has active pmc13.ta cleared, clearing ibr\n", cnum, *val)); - - /* don't mix debug with perfmon */ - if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL; - - /* - * a count of 0 will mark the debug registers as in use and also - * ensure that they are properly cleared. - */ - ret = pfm_write_ibr_dbr(1, ctx, NULL, 0, regs); - if (ret) return ret; - } - - /* - * we must clear the (data) debug registers if pmc11.pt bit is cleared - * before they are written (fl_using_dbreg==0) to avoid picking up stale information. - */ - if (cnum == 11 && is_loaded && ((*val >> 28)& 0x1) == 0 && ctx->ctx_fl_using_dbreg == 0) { - - DPRINT(("pmc[%d]=0x%lx has active pmc11.pt cleared, clearing dbr\n", cnum, *val)); - - /* don't mix debug with perfmon */ - if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL; - - /* - * a count of 0 will mark the debug registers as in use and also - * ensure that they are properly cleared. - */ - ret = pfm_write_ibr_dbr(0, ctx, NULL, 0, regs); - if (ret) return ret; - } - return 0; -} - -/* - * impl_pmcs, impl_pmds are computed at runtime to minimize errors! - */ -static pmu_config_t pmu_conf_ita={ - .pmu_name = "Itanium", - .pmu_family = 0x7, - .ovfl_val = (1UL << 32) - 1, - .pmd_desc = pfm_ita_pmd_desc, - .pmc_desc = pfm_ita_pmc_desc, - .num_ibrs = 8, - .num_dbrs = 8, - .use_rr_dbregs = 1, /* debug register are use for range retrictions */ -}; - - Index: linux-2.6/arch/ia64/kernel/perfmon_mckinley.h =================================================================== --- linux-2.6.orig/arch/ia64/kernel/perfmon_mckinley.h +++ /dev/null @@ -1,187 +0,0 @@ -/* - * This file contains the McKinley PMU register description tables - * and pmc checker used by perfmon.c. - * - * Copyright (C) 2002-2003 Hewlett Packard Co - * Stephane Eranian - */ -static int pfm_mck_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs); - -static pfm_reg_desc_t pfm_mck_pmc_desc[PMU_MAX_PMCS]={ -/* pmc0 */ { PFM_REG_CONTROL , 0, 0x1UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, -/* pmc1 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, -/* pmc2 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, -/* pmc3 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, -/* pmc4 */ { PFM_REG_COUNTING, 6, 0x0000000000800000UL, 0xfffff7fUL, NULL, pfm_mck_pmc_check, {RDEP(4),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, -/* pmc5 */ { PFM_REG_COUNTING, 6, 0x0UL, 0xfffff7fUL, NULL, pfm_mck_pmc_check, {RDEP(5),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, -/* pmc6 */ { PFM_REG_COUNTING, 6, 0x0UL, 0xfffff7fUL, NULL, pfm_mck_pmc_check, {RDEP(6),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, -/* pmc7 */ { PFM_REG_COUNTING, 6, 0x0UL, 0xfffff7fUL, NULL, pfm_mck_pmc_check, {RDEP(7),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, -/* pmc8 */ { PFM_REG_CONFIG , 0, 0xffffffff3fffffffUL, 0xffffffff3ffffffbUL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, -/* pmc9 */ { PFM_REG_CONFIG , 0, 0xffffffff3ffffffcUL, 0xffffffff3ffffffbUL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, -/* pmc10 */ { PFM_REG_MONITOR , 4, 0x0UL, 0xffffUL, NULL, pfm_mck_pmc_check, {RDEP(0)|RDEP(1),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, -/* pmc11 */ { PFM_REG_MONITOR , 6, 0x0UL, 0x30f01cf, NULL, pfm_mck_pmc_check, {RDEP(2)|RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, -/* pmc12 */ { PFM_REG_MONITOR , 6, 0x0UL, 0xffffUL, NULL, pfm_mck_pmc_check, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, -/* pmc13 */ { PFM_REG_CONFIG , 0, 0x00002078fefefefeUL, 0x1e00018181818UL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, -/* pmc14 */ { PFM_REG_CONFIG , 0, 0x0db60db60db60db6UL, 0x2492UL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, -/* pmc15 */ { PFM_REG_CONFIG , 0, 0x00000000fffffff0UL, 0xfUL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, - { PFM_REG_END , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */ -}; - -static pfm_reg_desc_t pfm_mck_pmd_desc[PMU_MAX_PMDS]={ -/* pmd0 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(1),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}}, -/* pmd1 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(0),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}}, -/* pmd2 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}}, -/* pmd3 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}}, -/* pmd4 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(4),0UL, 0UL, 0UL}}, -/* pmd5 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(5),0UL, 0UL, 0UL}}, -/* pmd6 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(6),0UL, 0UL, 0UL}}, -/* pmd7 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(7),0UL, 0UL, 0UL}}, -/* pmd8 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, -/* pmd9 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, -/* pmd10 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, -/* pmd11 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, -/* pmd12 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, -/* pmd13 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, -/* pmd14 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, -/* pmd15 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, -/* pmd16 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, -/* pmd17 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(3),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}}, - { PFM_REG_END , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */ -}; - -/* - * PMC reserved fields must have their power-up values preserved - */ -static int -pfm_mck_reserved(unsigned int cnum, unsigned long *val, struct pt_regs *regs) -{ - unsigned long tmp1, tmp2, ival = *val; - - /* remove reserved areas from user value */ - tmp1 = ival & PMC_RSVD_MASK(cnum); - - /* get reserved fields values */ - tmp2 = PMC_DFL_VAL(cnum) & ~PMC_RSVD_MASK(cnum); - - *val = tmp1 | tmp2; - - DPRINT(("pmc[%d]=0x%lx, mask=0x%lx, reset=0x%lx, val=0x%lx\n", - cnum, ival, PMC_RSVD_MASK(cnum), PMC_DFL_VAL(cnum), *val)); - return 0; -} - -/* - * task can be NULL if the context is unloaded - */ -static int -pfm_mck_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs) -{ - int ret = 0, check_case1 = 0; - unsigned long val8 = 0, val14 = 0, val13 = 0; - int is_loaded; - - /* first preserve the reserved fields */ - pfm_mck_reserved(cnum, val, regs); - - /* sanitfy check */ - if (ctx == NULL) return -EINVAL; - - is_loaded = ctx->ctx_state == PFM_CTX_LOADED || ctx->ctx_state == PFM_CTX_MASKED; - - /* - * we must clear the debug registers if pmc13 has a value which enable - * memory pipeline event constraints. In this case we need to clear the - * the debug registers if they have not yet been accessed. This is required - * to avoid picking stale state. - * PMC13 is "active" if: - * one of the pmc13.cfg_dbrpXX field is different from 0x3 - * AND - * at the corresponding pmc13.ena_dbrpXX is set. - */ - DPRINT(("cnum=%u val=0x%lx, using_dbreg=%d loaded=%d\n", cnum, *val, ctx->ctx_fl_using_dbreg, is_loaded)); - - if (cnum == 13 && is_loaded - && (*val & 0x1e00000000000UL) && (*val & 0x18181818UL) != 0x18181818UL && ctx->ctx_fl_using_dbreg == 0) { - - DPRINT(("pmc[%d]=0x%lx has active pmc13 settings, clearing dbr\n", cnum, *val)); - - /* don't mix debug with perfmon */ - if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL; - - /* - * a count of 0 will mark the debug registers as in use and also - * ensure that they are properly cleared. - */ - ret = pfm_write_ibr_dbr(PFM_DATA_RR, ctx, NULL, 0, regs); - if (ret) return ret; - } - /* - * we must clear the (instruction) debug registers if any pmc14.ibrpX bit is enabled - * before they are (fl_using_dbreg==0) to avoid picking up stale information. - */ - if (cnum == 14 && is_loaded && ((*val & 0x2222UL) != 0x2222UL) && ctx->ctx_fl_using_dbreg == 0) { - - DPRINT(("pmc[%d]=0x%lx has active pmc14 settings, clearing ibr\n", cnum, *val)); - - /* don't mix debug with perfmon */ - if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL; - - /* - * a count of 0 will mark the debug registers as in use and also - * ensure that they are properly cleared. - */ - ret = pfm_write_ibr_dbr(PFM_CODE_RR, ctx, NULL, 0, regs); - if (ret) return ret; - - } - - switch(cnum) { - case 4: *val |= 1UL << 23; /* force power enable bit */ - break; - case 8: val8 = *val; - val13 = ctx->ctx_pmcs[13]; - val14 = ctx->ctx_pmcs[14]; - check_case1 = 1; - break; - case 13: val8 = ctx->ctx_pmcs[8]; - val13 = *val; - val14 = ctx->ctx_pmcs[14]; - check_case1 = 1; - break; - case 14: val8 = ctx->ctx_pmcs[8]; - val13 = ctx->ctx_pmcs[13]; - val14 = *val; - check_case1 = 1; - break; - } - /* check illegal configuration which can produce inconsistencies in tagging - * i-side events in L1D and L2 caches - */ - if (check_case1) { - ret = ((val13 >> 45) & 0xf) == 0 - && ((val8 & 0x1) == 0) - && ((((val14>>1) & 0x3) == 0x2 || ((val14>>1) & 0x3) == 0x0) - ||(((val14>>4) & 0x3) == 0x2 || ((val14>>4) & 0x3) == 0x0)); - - if (ret) DPRINT((KERN_DEBUG "perfmon: failure check_case1\n")); - } - - return ret ? -EINVAL : 0; -} - -/* - * impl_pmcs, impl_pmds are computed at runtime to minimize errors! - */ -static pmu_config_t pmu_conf_mck={ - .pmu_name = "Itanium 2", - .pmu_family = 0x1f, - .flags = PFM_PMU_IRQ_RESEND, - .ovfl_val = (1UL << 47) - 1, - .pmd_desc = pfm_mck_pmd_desc, - .pmc_desc = pfm_mck_pmc_desc, - .num_ibrs = 8, - .num_dbrs = 8, - .use_rr_dbregs = 1 /* debug register are use for range restrictions */ -}; - - Index: linux-2.6/arch/ia64/kernel/perfmon_montecito.h =================================================================== --- linux-2.6.orig/arch/ia64/kernel/perfmon_montecito.h +++ /dev/null @@ -1,269 +0,0 @@ -/* - * This file contains the Montecito PMU register description tables - * and pmc checker used by perfmon.c. - * - * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P. - * Contributed by Stephane Eranian - */ -static int pfm_mont_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs); - -#define RDEP_MONT_ETB (RDEP(38)|RDEP(39)|RDEP(48)|RDEP(49)|RDEP(50)|RDEP(51)|RDEP(52)|RDEP(53)|RDEP(54)|\ - RDEP(55)|RDEP(56)|RDEP(57)|RDEP(58)|RDEP(59)|RDEP(60)|RDEP(61)|RDEP(62)|RDEP(63)) -#define RDEP_MONT_DEAR (RDEP(32)|RDEP(33)|RDEP(36)) -#define RDEP_MONT_IEAR (RDEP(34)|RDEP(35)) - -static pfm_reg_desc_t pfm_mont_pmc_desc[PMU_MAX_PMCS]={ -/* pmc0 */ { PFM_REG_CONTROL , 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {0,0, 0, 0}}, -/* pmc1 */ { PFM_REG_CONTROL , 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {0,0, 0, 0}}, -/* pmc2 */ { PFM_REG_CONTROL , 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {0,0, 0, 0}}, -/* pmc3 */ { PFM_REG_CONTROL , 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {0,0, 0, 0}}, -/* pmc4 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(4),0, 0, 0}, {0,0, 0, 0}}, -/* pmc5 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(5),0, 0, 0}, {0,0, 0, 0}}, -/* pmc6 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(6),0, 0, 0}, {0,0, 0, 0}}, -/* pmc7 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(7),0, 0, 0}, {0,0, 0, 0}}, -/* pmc8 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(8),0, 0, 0}, {0,0, 0, 0}}, -/* pmc9 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(9),0, 0, 0}, {0,0, 0, 0}}, -/* pmc10 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(10),0, 0, 0}, {0,0, 0, 0}}, -/* pmc11 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(11),0, 0, 0}, {0,0, 0, 0}}, -/* pmc12 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(12),0, 0, 0}, {0,0, 0, 0}}, -/* pmc13 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(13),0, 0, 0}, {0,0, 0, 0}}, -/* pmc14 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(14),0, 0, 0}, {0,0, 0, 0}}, -/* pmc15 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(15),0, 0, 0}, {0,0, 0, 0}}, -/* pmc16 */ { PFM_REG_NOTIMPL, }, -/* pmc17 */ { PFM_REG_NOTIMPL, }, -/* pmc18 */ { PFM_REG_NOTIMPL, }, -/* pmc19 */ { PFM_REG_NOTIMPL, }, -/* pmc20 */ { PFM_REG_NOTIMPL, }, -/* pmc21 */ { PFM_REG_NOTIMPL, }, -/* pmc22 */ { PFM_REG_NOTIMPL, }, -/* pmc23 */ { PFM_REG_NOTIMPL, }, -/* pmc24 */ { PFM_REG_NOTIMPL, }, -/* pmc25 */ { PFM_REG_NOTIMPL, }, -/* pmc26 */ { PFM_REG_NOTIMPL, }, -/* pmc27 */ { PFM_REG_NOTIMPL, }, -/* pmc28 */ { PFM_REG_NOTIMPL, }, -/* pmc29 */ { PFM_REG_NOTIMPL, }, -/* pmc30 */ { PFM_REG_NOTIMPL, }, -/* pmc31 */ { PFM_REG_NOTIMPL, }, -/* pmc32 */ { PFM_REG_CONFIG, 0, 0x30f01ffffffffffUL, 0x30f01ffffffffffUL, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}}, -/* pmc33 */ { PFM_REG_CONFIG, 0, 0x0, 0x1ffffffffffUL, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}}, -/* pmc34 */ { PFM_REG_CONFIG, 0, 0xf01ffffffffffUL, 0xf01ffffffffffUL, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}}, -/* pmc35 */ { PFM_REG_CONFIG, 0, 0x0, 0x1ffffffffffUL, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}}, -/* pmc36 */ { PFM_REG_CONFIG, 0, 0xfffffff0, 0xf, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}}, -/* pmc37 */ { PFM_REG_MONITOR, 4, 0x0, 0x3fff, NULL, pfm_mont_pmc_check, {RDEP_MONT_IEAR, 0, 0, 0}, {0, 0, 0, 0}}, -/* pmc38 */ { PFM_REG_CONFIG, 0, 0xdb6, 0x2492, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}}, -/* pmc39 */ { PFM_REG_MONITOR, 6, 0x0, 0xffcf, NULL, pfm_mont_pmc_check, {RDEP_MONT_ETB,0, 0, 0}, {0,0, 0, 0}}, -/* pmc40 */ { PFM_REG_MONITOR, 6, 0x2000000, 0xf01cf, NULL, pfm_mont_pmc_check, {RDEP_MONT_DEAR,0, 0, 0}, {0,0, 0, 0}}, -/* pmc41 */ { PFM_REG_CONFIG, 0, 0x00002078fefefefeUL, 0x1e00018181818UL, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}}, -/* pmc42 */ { PFM_REG_MONITOR, 6, 0x0, 0x7ff4f, NULL, pfm_mont_pmc_check, {RDEP_MONT_ETB,0, 0, 0}, {0,0, 0, 0}}, - { PFM_REG_END , 0, 0x0, -1, NULL, NULL, {0,}, {0,}}, /* end marker */ -}; - -static pfm_reg_desc_t pfm_mont_pmd_desc[PMU_MAX_PMDS]={ -/* pmd0 */ { PFM_REG_NOTIMPL, }, -/* pmd1 */ { PFM_REG_NOTIMPL, }, -/* pmd2 */ { PFM_REG_NOTIMPL, }, -/* pmd3 */ { PFM_REG_NOTIMPL, }, -/* pmd4 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(4),0, 0, 0}}, -/* pmd5 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(5),0, 0, 0}}, -/* pmd6 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(6),0, 0, 0}}, -/* pmd7 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(7),0, 0, 0}}, -/* pmd8 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(8),0, 0, 0}}, -/* pmd9 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(9),0, 0, 0}}, -/* pmd10 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(10),0, 0, 0}}, -/* pmd11 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(11),0, 0, 0}}, -/* pmd12 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(12),0, 0, 0}}, -/* pmd13 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(13),0, 0, 0}}, -/* pmd14 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(14),0, 0, 0}}, -/* pmd15 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(15),0, 0, 0}}, -/* pmd16 */ { PFM_REG_NOTIMPL, }, -/* pmd17 */ { PFM_REG_NOTIMPL, }, -/* pmd18 */ { PFM_REG_NOTIMPL, }, -/* pmd19 */ { PFM_REG_NOTIMPL, }, -/* pmd20 */ { PFM_REG_NOTIMPL, }, -/* pmd21 */ { PFM_REG_NOTIMPL, }, -/* pmd22 */ { PFM_REG_NOTIMPL, }, -/* pmd23 */ { PFM_REG_NOTIMPL, }, -/* pmd24 */ { PFM_REG_NOTIMPL, }, -/* pmd25 */ { PFM_REG_NOTIMPL, }, -/* pmd26 */ { PFM_REG_NOTIMPL, }, -/* pmd27 */ { PFM_REG_NOTIMPL, }, -/* pmd28 */ { PFM_REG_NOTIMPL, }, -/* pmd29 */ { PFM_REG_NOTIMPL, }, -/* pmd30 */ { PFM_REG_NOTIMPL, }, -/* pmd31 */ { PFM_REG_NOTIMPL, }, -/* pmd32 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP(33)|RDEP(36),0, 0, 0}, {RDEP(40),0, 0, 0}}, -/* pmd33 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP(32)|RDEP(36),0, 0, 0}, {RDEP(40),0, 0, 0}}, -/* pmd34 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP(35),0, 0, 0}, {RDEP(37),0, 0, 0}}, -/* pmd35 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP(34),0, 0, 0}, {RDEP(37),0, 0, 0}}, -/* pmd36 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP(32)|RDEP(33),0, 0, 0}, {RDEP(40),0, 0, 0}}, -/* pmd37 */ { PFM_REG_NOTIMPL, }, -/* pmd38 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, -/* pmd39 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, -/* pmd40 */ { PFM_REG_NOTIMPL, }, -/* pmd41 */ { PFM_REG_NOTIMPL, }, -/* pmd42 */ { PFM_REG_NOTIMPL, }, -/* pmd43 */ { PFM_REG_NOTIMPL, }, -/* pmd44 */ { PFM_REG_NOTIMPL, }, -/* pmd45 */ { PFM_REG_NOTIMPL, }, -/* pmd46 */ { PFM_REG_NOTIMPL, }, -/* pmd47 */ { PFM_REG_NOTIMPL, }, -/* pmd48 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, -/* pmd49 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, -/* pmd50 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, -/* pmd51 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, -/* pmd52 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, -/* pmd53 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, -/* pmd54 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, -/* pmd55 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, -/* pmd56 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, -/* pmd57 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, -/* pmd58 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, -/* pmd59 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, -/* pmd60 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, -/* pmd61 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, -/* pmd62 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, -/* pmd63 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, - { PFM_REG_END , 0, 0x0, -1, NULL, NULL, {0,}, {0,}}, /* end marker */ -}; - -/* - * PMC reserved fields must have their power-up values preserved - */ -static int -pfm_mont_reserved(unsigned int cnum, unsigned long *val, struct pt_regs *regs) -{ - unsigned long tmp1, tmp2, ival = *val; - - /* remove reserved areas from user value */ - tmp1 = ival & PMC_RSVD_MASK(cnum); - - /* get reserved fields values */ - tmp2 = PMC_DFL_VAL(cnum) & ~PMC_RSVD_MASK(cnum); - - *val = tmp1 | tmp2; - - DPRINT(("pmc[%d]=0x%lx, mask=0x%lx, reset=0x%lx, val=0x%lx\n", - cnum, ival, PMC_RSVD_MASK(cnum), PMC_DFL_VAL(cnum), *val)); - return 0; -} - -/* - * task can be NULL if the context is unloaded - */ -static int -pfm_mont_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs) -{ - int ret = 0; - unsigned long val32 = 0, val38 = 0, val41 = 0; - unsigned long tmpval; - int check_case1 = 0; - int is_loaded; - - /* first preserve the reserved fields */ - pfm_mont_reserved(cnum, val, regs); - - tmpval = *val; - - /* sanity check */ - if (ctx == NULL) return -EINVAL; - - is_loaded = ctx->ctx_state == PFM_CTX_LOADED || ctx->ctx_state == PFM_CTX_MASKED; - - /* - * we must clear the debug registers if pmc41 has a value which enable - * memory pipeline event constraints. In this case we need to clear the - * the debug registers if they have not yet been accessed. This is required - * to avoid picking stale state. - * PMC41 is "active" if: - * one of the pmc41.cfg_dtagXX field is different from 0x3 - * AND - * at the corresponding pmc41.en_dbrpXX is set. - * AND - * ctx_fl_using_dbreg == 0 (i.e., dbr not yet used) - */ - DPRINT(("cnum=%u val=0x%lx, using_dbreg=%d loaded=%d\n", cnum, tmpval, ctx->ctx_fl_using_dbreg, is_loaded)); - - if (cnum == 41 && is_loaded - && (tmpval & 0x1e00000000000UL) && (tmpval & 0x18181818UL) != 0x18181818UL && ctx->ctx_fl_using_dbreg == 0) { - - DPRINT(("pmc[%d]=0x%lx has active pmc41 settings, clearing dbr\n", cnum, tmpval)); - - /* don't mix debug with perfmon */ - if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL; - - /* - * a count of 0 will mark the debug registers if: - * AND - */ - ret = pfm_write_ibr_dbr(PFM_DATA_RR, ctx, NULL, 0, regs); - if (ret) return ret; - } - /* - * we must clear the (instruction) debug registers if: - * pmc38.ig_ibrpX is 0 (enabled) - * AND - * ctx_fl_using_dbreg == 0 (i.e., dbr not yet used) - */ - if (cnum == 38 && is_loaded && ((tmpval & 0x492UL) != 0x492UL) && ctx->ctx_fl_using_dbreg == 0) { - - DPRINT(("pmc38=0x%lx has active pmc38 settings, clearing ibr\n", tmpval)); - - /* don't mix debug with perfmon */ - if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL; - - /* - * a count of 0 will mark the debug registers as in use and also - * ensure that they are properly cleared. - */ - ret = pfm_write_ibr_dbr(PFM_CODE_RR, ctx, NULL, 0, regs); - if (ret) return ret; - - } - switch(cnum) { - case 32: val32 = *val; - val38 = ctx->ctx_pmcs[38]; - val41 = ctx->ctx_pmcs[41]; - check_case1 = 1; - break; - case 38: val38 = *val; - val32 = ctx->ctx_pmcs[32]; - val41 = ctx->ctx_pmcs[41]; - check_case1 = 1; - break; - case 41: val41 = *val; - val32 = ctx->ctx_pmcs[32]; - val38 = ctx->ctx_pmcs[38]; - check_case1 = 1; - break; - } - /* check illegal configuration which can produce inconsistencies in tagging - * i-side events in L1D and L2 caches - */ - if (check_case1) { - ret = (((val41 >> 45) & 0xf) == 0 && ((val32>>57) & 0x1) == 0) - && ((((val38>>1) & 0x3) == 0x2 || ((val38>>1) & 0x3) == 0) - || (((val38>>4) & 0x3) == 0x2 || ((val38>>4) & 0x3) == 0)); - if (ret) { - DPRINT(("invalid config pmc38=0x%lx pmc41=0x%lx pmc32=0x%lx\n", val38, val41, val32)); - return -EINVAL; - } - } - *val = tmpval; - return 0; -} - -/* - * impl_pmcs, impl_pmds are computed at runtime to minimize errors! - */ -static pmu_config_t pmu_conf_mont={ - .pmu_name = "Montecito", - .pmu_family = 0x20, - .flags = PFM_PMU_IRQ_RESEND, - .ovfl_val = (1UL << 47) - 1, - .pmd_desc = pfm_mont_pmd_desc, - .pmc_desc = pfm_mont_pmc_desc, - .num_ibrs = 8, - .num_dbrs = 8, - .use_rr_dbregs = 1 /* debug register are use for range retrictions */ -}; Index: linux-2.6/arch/ia64/kernel/process.c =================================================================== --- linux-2.6.orig/arch/ia64/kernel/process.c +++ linux-2.6/arch/ia64/kernel/process.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -44,10 +45,6 @@ #include "entry.h" -#ifdef CONFIG_PERFMON -# include -#endif - #include "sigframe.h" void (*ia64_mark_idle)(int); @@ -164,32 +161,23 @@ do_notify_resume_user (sigset_t *unused, return; } -#ifdef CONFIG_PERFMON - if (current->thread.pfm_needs_checking) - pfm_handle_work(); -#endif + if (test_thread_flag(TIF_PERFMON_WORK)) + pfm_handle_work(task_pt_regs(current)); /* deal with pending signal delivery */ if (test_thread_flag(TIF_SIGPENDING)||test_thread_flag(TIF_RESTORE_SIGMASK)) ia64_do_signal(scr, in_syscall); } -static int pal_halt = 1; static int can_do_pal_halt = 1; static int __init nohalt_setup(char * str) { - pal_halt = can_do_pal_halt = 0; + can_do_pal_halt = 0; return 1; } __setup("nohalt", nohalt_setup); -void -update_pal_halt_status(int status) -{ - can_do_pal_halt = pal_halt && status; -} - /* * We use this if we don't have any better idle routine.. */ @@ -198,10 +186,34 @@ default_idle (void) { local_irq_enable(); while (!need_resched()) { +#ifdef CONFIG_PERFMON + u64 psr = 0; + /* + * If requested, we stop the PMU to avoid + * measuring across the core idle loop. + * + * dcr.pp is not modified on purpose + * it is used when coming out of + * safe_halt() via interrupt + */ + if ((__get_cpu_var(pfm_syst_info) & PFM_ITA_CPUINFO_IDLE_EXCL)) { + psr = ia64_getreg(_IA64_REG_PSR); + if (psr & IA64_PSR_PP) + ia64_rsm(IA64_PSR_PP); + } +#endif + if (can_do_pal_halt) safe_halt(); else cpu_relax(); +#ifdef CONFIG_PERFMON + if ((__get_cpu_var(pfm_syst_info) & PFM_ITA_CPUINFO_IDLE_EXCL)) { + if (psr & IA64_PSR_PP) + ia64_ssm(IA64_PSR_PP); + } +#endif + } } @@ -315,22 +327,9 @@ cpu_idle (void) void ia64_save_extra (struct task_struct *task) { -#ifdef CONFIG_PERFMON - unsigned long info; -#endif - if ((task->thread.flags & IA64_THREAD_DBG_VALID) != 0) ia64_save_debug_regs(&task->thread.dbr[0]); -#ifdef CONFIG_PERFMON - if ((task->thread.flags & IA64_THREAD_PM_VALID) != 0) - pfm_save_regs(task); - - info = __get_cpu_var(pfm_syst_info); - if (info & PFM_CPUINFO_SYST_WIDE) - pfm_syst_wide_update_task(task, info, 0); -#endif - #ifdef CONFIG_IA32_SUPPORT if (IS_IA32_PROCESS(task_pt_regs(task))) ia32_save_state(task); @@ -340,22 +339,9 @@ ia64_save_extra (struct task_struct *tas void ia64_load_extra (struct task_struct *task) { -#ifdef CONFIG_PERFMON - unsigned long info; -#endif - if ((task->thread.flags & IA64_THREAD_DBG_VALID) != 0) ia64_load_debug_regs(&task->thread.dbr[0]); -#ifdef CONFIG_PERFMON - if ((task->thread.flags & IA64_THREAD_PM_VALID) != 0) - pfm_load_regs(task); - - info = __get_cpu_var(pfm_syst_info); - if (info & PFM_CPUINFO_SYST_WIDE) - pfm_syst_wide_update_task(task, info, 1); -#endif - #ifdef CONFIG_IA32_SUPPORT if (IS_IA32_PROCESS(task_pt_regs(task))) ia32_load_state(task); @@ -481,8 +467,7 @@ copy_thread (int nr, unsigned long clone * call behavior where scratch registers are preserved across * system calls (unless used by the system call itself). */ -# define THREAD_FLAGS_TO_CLEAR (IA64_THREAD_FPH_VALID | IA64_THREAD_DBG_VALID \ - | IA64_THREAD_PM_VALID) +# define THREAD_FLAGS_TO_CLEAR (IA64_THREAD_FPH_VALID | IA64_THREAD_DBG_VALID) # define THREAD_FLAGS_TO_SET 0 p->thread.flags = ((current->thread.flags & ~THREAD_FLAGS_TO_CLEAR) | THREAD_FLAGS_TO_SET); @@ -503,10 +488,8 @@ copy_thread (int nr, unsigned long clone } #endif -#ifdef CONFIG_PERFMON - if (current->thread.pfm_context) - pfm_inherit(p, child_ptregs); -#endif + pfm_copy_thread(p); + return retval; } @@ -743,15 +726,13 @@ exit_thread (void) { ia64_drop_fpu(current); -#ifdef CONFIG_PERFMON - /* if needed, stop monitoring and flush state to perfmon context */ - if (current->thread.pfm_context) - pfm_exit_thread(current); - - /* free debug register resources */ - if (current->thread.flags & IA64_THREAD_DBG_VALID) - pfm_release_debug_registers(current); -#endif + + /* if needed, stop monitoring and flush state to perfmon context */ + pfm_exit_thread(current); + + /* free debug register resources */ + pfm_release_dbregs(current); + if (IS_IA32_PROCESS(task_pt_regs(current))) ia32_drop_partial_page_list(current); } Index: linux-2.6/arch/ia64/kernel/ptrace.c =================================================================== --- linux-2.6.orig/arch/ia64/kernel/ptrace.c +++ linux-2.6/arch/ia64/kernel/ptrace.c @@ -25,9 +25,6 @@ #include #include #include -#ifdef CONFIG_PERFMON -#include -#endif #include "entry.h" Index: linux-2.6/arch/ia64/kernel/setup.c =================================================================== --- linux-2.6.orig/arch/ia64/kernel/setup.c +++ linux-2.6/arch/ia64/kernel/setup.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include @@ -988,6 +989,8 @@ cpu_init (void) } platform_cpu_init(); pm_idle = default_idle; + + pfm_init_percpu(); } /* Index: linux-2.6/arch/ia64/kernel/smpboot.c =================================================================== --- linux-2.6.orig/arch/ia64/kernel/smpboot.c +++ linux-2.6/arch/ia64/kernel/smpboot.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -378,10 +379,6 @@ smp_callin (void) extern void ia64_init_itm(void); extern volatile int time_keeper_id; -#ifdef CONFIG_PERFMON - extern void pfm_init_percpu(void); -#endif - cpuid = smp_processor_id(); phys_id = hard_smp_processor_id(); itc_master = time_keeper_id; @@ -403,10 +400,6 @@ smp_callin (void) ia64_mca_cmc_vector_setup(); /* Setup vector on AP */ -#ifdef CONFIG_PERFMON - pfm_init_percpu(); -#endif - local_irq_enable(); if (!(sal_platform_features & IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT)) { @@ -738,6 +731,7 @@ int __cpu_disable(void) fixup_irqs(); local_flush_tlb_all(); cpu_clear(cpu, cpu_callin_map); + pfm_cpu_disable(); return 0; } Index: linux-2.6/arch/ia64/kernel/sys_ia64.c =================================================================== --- linux-2.6.orig/arch/ia64/kernel/sys_ia64.c +++ linux-2.6/arch/ia64/kernel/sys_ia64.c @@ -284,3 +284,10 @@ sys_pciconfig_write (unsigned long bus, } #endif /* CONFIG_PCI */ + +#ifndef CONFIG_PERFMON +asmlinkage long sys_perfmonctl (int fd, int cmd, void __user *arg, int count) +{ + return -ENOSYS; +} +#endif Index: linux-2.6/arch/ia64/kernel/time.c =================================================================== --- linux-2.6.orig/arch/ia64/kernel/time.c +++ linux-2.6/arch/ia64/kernel/time.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -62,6 +63,8 @@ timer_interrupt (int irq, void *dev_id) profile_tick(CPU_PROFILING); + pfm_handle_switch_timeout(); + while (1) { update_process_times(user_mode(get_irq_regs())); Index: linux-2.6/arch/ia64/lib/Makefile =================================================================== --- linux-2.6.orig/arch/ia64/lib/Makefile +++ linux-2.6/arch/ia64/lib/Makefile @@ -13,7 +13,6 @@ lib-y := __divsi3.o __udivsi3.o __modsi3 lib-$(CONFIG_ITANIUM) += copy_page.o copy_user.o memcpy.o lib-$(CONFIG_MCKINLEY) += copy_page_mck.o memcpy_mck.o -lib-$(CONFIG_PERFMON) += carta_random.o AFLAGS___divdi3.o = AFLAGS___udivdi3.o = -DUNSIGNED Index: linux-2.6/arch/ia64/oprofile/init.c =================================================================== --- linux-2.6.orig/arch/ia64/oprofile/init.c +++ linux-2.6/arch/ia64/oprofile/init.c @@ -12,8 +12,8 @@ #include #include -extern int perfmon_init(struct oprofile_operations * ops); -extern void perfmon_exit(void); +extern int op_perfmon_init(struct oprofile_operations * ops); +extern void op_perfmon_exit(void); extern void ia64_backtrace(struct pt_regs * const regs, unsigned int depth); int __init oprofile_arch_init(struct oprofile_operations * ops) @@ -22,7 +22,7 @@ int __init oprofile_arch_init(struct opr #ifdef CONFIG_PERFMON /* perfmon_init() can fail, but we have no way to report it */ - ret = perfmon_init(ops); + ret = op_perfmon_init(ops); #endif ops->backtrace = ia64_backtrace; @@ -33,6 +33,6 @@ int __init oprofile_arch_init(struct opr void oprofile_arch_exit(void) { #ifdef CONFIG_PERFMON - perfmon_exit(); + op_perfmon_exit(); #endif } Index: linux-2.6/arch/ia64/oprofile/perfmon.c =================================================================== --- linux-2.6.orig/arch/ia64/oprofile/perfmon.c +++ linux-2.6/arch/ia64/oprofile/perfmon.c @@ -10,19 +10,21 @@ #include #include #include -#include +#include +#include #include #include static int allow_ints; static int -perfmon_handler(struct task_struct *task, void *buf, pfm_ovfl_arg_t *arg, - struct pt_regs *regs, unsigned long stamp) +perfmon_handler(void *buf, struct pfm_ovfl_arg *arg, + unsigned long ip, u64 stamp, void *data) { + struct pt_regs *regs = data; int event = arg->pmd_eventid; - arg->ovfl_ctrl.bits.reset_ovfl_pmds = 1; + arg->ovfl_ctrl = PFM_OVFL_CTRL_RESET; /* the owner of the oprofile event buffer may have exited * without perfmon being shutdown (e.g. SIGSEGV) @@ -45,17 +47,13 @@ static void perfmon_stop(void) allow_ints = 0; } - -#define OPROFILE_FMT_UUID { \ - 0x77, 0x7a, 0x6e, 0x61, 0x20, 0x65, 0x73, 0x69, 0x74, 0x6e, 0x72, 0x20, 0x61, 0x65, 0x0a, 0x6c } - -static pfm_buffer_fmt_t oprofile_fmt = { - .fmt_name = "oprofile_format", - .fmt_uuid = OPROFILE_FMT_UUID, - .fmt_handler = perfmon_handler, +static struct pfm_smpl_fmt oprofile_fmt = { + .fmt_name = "OProfile", + .fmt_handler = perfmon_handler, + .fmt_flags = PFM_FMT_BUILTIN_FLAG, + .owner = THIS_MODULE }; - static char * get_cpu_type(void) { __u8 family = local_cpu_data->family; @@ -75,25 +73,26 @@ static char * get_cpu_type(void) static int using_perfmon; -int perfmon_init(struct oprofile_operations * ops) +int __init op_perfmon_init(struct oprofile_operations * ops) { - int ret = pfm_register_buffer_fmt(&oprofile_fmt); + int ret = pfm_fmt_register(&oprofile_fmt); if (ret) return -ENODEV; ops->cpu_type = get_cpu_type(); ops->start = perfmon_start; ops->stop = perfmon_stop; + ops->implementation = "perfmon2"; using_perfmon = 1; printk(KERN_INFO "oprofile: using perfmon.\n"); return 0; } -void perfmon_exit(void) +void __exit op_perfmon_exit(void) { if (!using_perfmon) return; - pfm_unregister_buffer_fmt(oprofile_fmt.fmt_uuid); + pfm_fmt_unregister(&oprofile_fmt); } Index: linux-2.6/arch/ia64/perfmon/Kconfig =================================================================== --- /dev/null +++ linux-2.6/arch/ia64/perfmon/Kconfig @@ -0,0 +1,58 @@ +menu "Hardware Performance Monitoring support" +config PERFMON + bool "Perfmon2 performance monitoring interface" + default n + help + Enables the perfmon2 interface to access the hardware + performance counters. See for + more details. + +config PERFMON_DEBUG + bool "Perfmon debugging" + default n + depends on PERFMON + help + Enables perfmon debugging support + +config IA64_PERFMON_COMPAT + bool "Enable old perfmon-2 compatbility mode" + default n + depends PERFMON + help + Enable this option to allow performance tools which used the old + perfmon-2 interface to continue to work. Old tools are those using + the obsolete commands and arguments. Check your programs and look + in include/asm-ia64/perfmon_compat.h for more information. + +config IA64_PERFMON_GENERIC + tristate "Generic IA-64 PMU support" + depends PERFMON + default n + help + Enables generic IA-64 PMU support. + The generic PMU is defined by the IA-64 architecture document. + This option should only be necessary when running with a PMU that + is not yet explicitely supported. Even then, there is no guarantee + that this support will work. + +config IA64_PERFMON_ITANIUM + tristate "Itanium (Merced) Performance Monitoring support" + depends PERFMON + default n + help + Enables Itanium (Merced) PMU support. + +config IA64_PERFMON_MCKINLEY + tristate "Itanium 2 (McKinley) Performance Monitoring support" + depends PERFMON + default n + help + Enables Itanium 2 (McKinley, Madison, Deerfield) PMU support. + +config IA64_PERFMON_MONTECITO + tristate "Itanium 2 9000 (Montecito) Performance Monitoring support" + depends PERFMON + default n + help + Enables support for Itanium 2 9000 (Montecito) PMU. +endmenu Index: linux-2.6/arch/ia64/perfmon/Makefile =================================================================== --- /dev/null +++ linux-2.6/arch/ia64/perfmon/Makefile @@ -0,0 +1,11 @@ +# +# Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P. +# Contributed by Stephane Eranian +# +obj-$(CONFIG_PERFMON) += perfmon.o +obj-$(CONFIG_IA64_PERFMON_COMPAT) += perfmon_default_smpl.o \ + perfmon_compat.o +obj-$(CONFIG_IA64_PERFMON_GENERIC) += perfmon_generic.o +obj-$(CONFIG_IA64_PERFMON_ITANIUM) += perfmon_itanium.o +obj-$(CONFIG_IA64_PERFMON_MCKINLEY) += perfmon_mckinley.o +obj-$(CONFIG_IA64_PERFMON_MONTECITO) += perfmon_montecito.o Index: linux-2.6/arch/ia64/perfmon/perfmon.c =================================================================== --- /dev/null +++ linux-2.6/arch/ia64/perfmon/perfmon.c @@ -0,0 +1,951 @@ +/* + * This file implements the IA-64 specific + * support for the perfmon2 interface + * + * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P. + * Contributed by Stephane Eranian + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + * 02111-1307 USA + */ +#include +#include +#include +#include + +struct pfm_arch_session { + u32 pfs_sys_use_dbr; /* syswide session uses dbr */ + u32 pfs_ptrace_use_dbr; /* a thread uses dbr via ptrace()*/ +}; + +static struct pfm_arch_session pfm_arch_sessions; +static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pfm_arch_sessions_lock); + +static inline void pfm_clear_psr_pp(void) +{ + ia64_rsm(IA64_PSR_PP); +} + +static inline void pfm_set_psr_pp(void) +{ + ia64_ssm(IA64_PSR_PP); +} + +static inline void pfm_clear_psr_up(void) +{ + ia64_rsm(IA64_PSR_UP); +} + +static inline void pfm_set_psr_up(void) +{ + ia64_ssm(IA64_PSR_UP); +} + +static inline void pfm_set_psr_l(u64 val) +{ + ia64_setreg(_IA64_REG_PSR_L, val); +} + +static inline void pfm_restore_ibrs(u64 *ibrs, unsigned int nibrs) +{ + unsigned int i; + + for (i = 0; i < nibrs; i++) { + ia64_set_ibr(i, ibrs[i]); + ia64_dv_serialize_instruction(); + } + ia64_srlz_i(); +} + +static inline void pfm_restore_dbrs(u64 *dbrs, unsigned int ndbrs) +{ + unsigned int i; + + for (i = 0; i < ndbrs; i++) { + ia64_set_dbr(i, dbrs[i]); + ia64_dv_serialize_data(); + } + ia64_srlz_d(); +} + +irqreturn_t pmu_interrupt_handler(int irq, void *arg) +{ + struct pt_regs *regs; + regs = get_irq_regs(); + irq_enter(); + pfm_interrupt_handler(instruction_pointer(regs), regs); + irq_exit(); + return IRQ_HANDLED; +} +static struct irqaction perfmon_irqaction = { + .handler = pmu_interrupt_handler, + .flags = IRQF_DISABLED, /* means keep interrupts masked */ + .name = "perfmon" +}; + +void pfm_arch_quiesce_pmu_percpu(void) +{ + u64 dcr; + /* + * make sure no measurement is active + * (may inherit programmed PMCs from EFI). + */ + pfm_clear_psr_pp(); + pfm_clear_psr_up(); + + /* + * ensure dcr.pp is cleared + */ + dcr = ia64_getreg(_IA64_REG_CR_DCR); + ia64_setreg(_IA64_REG_CR_DCR, dcr & ~IA64_DCR_PP); + + /* + * we run with the PMU not frozen at all times + */ + ia64_set_pmc(0, 0); + ia64_srlz_d(); +} + +void pfm_arch_init_percpu(void) +{ + pfm_arch_quiesce_pmu_percpu(); + /* + * program PMU interrupt vector + */ + ia64_setreg(_IA64_REG_CR_PMV, IA64_PERFMON_VECTOR); + ia64_srlz_d(); +} + +int pfm_arch_context_create(struct pfm_context *ctx, u32 ctx_flags) +{ + struct pfm_arch_context *ctx_arch; + + ctx_arch = pfm_ctx_arch(ctx); + + ctx_arch->flags.use_dbr = 0; + ctx_arch->flags.insecure = (ctx_flags & PFM_ITA_FL_INSECURE) ? 1: 0; + + PFM_DBG("insecure=%d", ctx_arch->flags.insecure); + + return 0; +} + +/* + * Called from pfm_ctxsw(). Task is guaranteed to be current. + * Context is locked. Interrupts are masked. Monitoring may be active. + * PMU access is guaranteed. PMC and PMD registers are live in PMU. + * + * Return: + * non-zero : did not save PMDs (as part of stopping the PMU) + * 0 : saved PMDs (no need to save them in caller) + */ +int pfm_arch_ctxswout_thread(struct task_struct *task, struct pfm_context *ctx, + struct pfm_event_set *set) +{ + struct pfm_arch_context *ctx_arch; + u64 psr, tmp; + + ctx_arch = pfm_ctx_arch(ctx); + + /* + * save current PSR: needed because we modify it + */ + ia64_srlz_d(); + psr = ia64_getreg(_IA64_REG_PSR); + + /* + * stop monitoring: + * This is the last instruction which may generate an overflow + * + * we do not clear ipsr.up + */ + pfm_clear_psr_up(); + ia64_srlz_d(); + + /* + * extract overflow status bits + */ + tmp = ia64_get_pmc(0) & ~0xf; + + /* + * keep a copy of psr.up (for reload) + */ + ctx_arch->ctx_saved_psr_up = psr & IA64_PSR_UP; + + /* + * save overflow status bits + */ + set->povfl_pmds[0] = tmp; + + /* + * record how many pending overflows + * XXX: assume identity mapping for counters + */ + set->npend_ovfls = ia64_popcnt(tmp); + + /* + * make sure the PMU is unfrozen for the next task + */ + if (set->npend_ovfls) { + ia64_set_pmc(0, 0); + ia64_srlz_d(); + } + return 1; +} + +/* + * Called from pfm_ctxsw(). Task is guaranteed to be current. + * set cannot be NULL. Context is locked. Interrupts are masked. + * Caller has already restored all PMD and PMC registers. + * + * must reactivate monitoring + */ +void pfm_arch_ctxswin_thread(struct task_struct *task, struct pfm_context *ctx, + struct pfm_event_set *set) +{ + struct pfm_arch_context *ctx_arch; + + ctx_arch = pfm_ctx_arch(ctx); + + /* + * when monitoring is not explicitly started + * then psr_up = 0, in which case we do not + * need to restore + */ + if (likely(ctx_arch->ctx_saved_psr_up)) { + pfm_set_psr_up(); + ia64_srlz_d(); + } +} + +int pfm_arch_reserve_session(struct pfm_context *ctx, u32 cpu) +{ + struct pfm_arch_context *ctx_arch; + int is_system; + int ret = 0; + + ctx_arch = pfm_ctx_arch(ctx); + is_system = ctx->flags.system; + + spin_lock(&pfm_arch_sessions_lock); + + if (is_system && ctx_arch->flags.use_dbr) { + PFM_DBG("syswide context uses dbregs"); + + if (pfm_arch_sessions.pfs_ptrace_use_dbr) { + PFM_DBG("cannot reserve syswide context: " + "dbregs in use by ptrace"); + ret = -EBUSY; + } else { + pfm_arch_sessions.pfs_sys_use_dbr++; + } + } + spin_unlock(&pfm_arch_sessions_lock); + + return ret; +} + +void pfm_arch_release_session(struct pfm_context *ctx, u32 cpu) +{ + struct pfm_arch_context *ctx_arch; + int is_system; + + ctx_arch = pfm_ctx_arch(ctx); + is_system = ctx->flags.system; + + spin_lock(&pfm_arch_sessions_lock); + + if (is_system && ctx_arch->flags.use_dbr) { + pfm_arch_sessions.pfs_sys_use_dbr--; + } + spin_unlock(&pfm_arch_sessions_lock); +} + +/* + * function called from pfm_load_context_*(). Task is not guaranteed to be + * current task. If not then other task is guaranteed stopped and off any CPU. + * context is locked and interrupts are masked. + * + * On PFM_LOAD_CONTEXT, the interface guarantees monitoring is stopped. + * + * For system-wide task is NULL + */ +int pfm_arch_load_context(struct pfm_context *ctx, struct pfm_event_set *set, + struct task_struct *task) +{ + struct pfm_arch_context *ctx_arch; + struct pt_regs *regs; + int ret = 0; + + ctx_arch = pfm_ctx_arch(ctx); + + /* + * cannot load a context which is using range restrictions, + * into a thread that is being debugged. + * + * if one set out of several is using the debug registers, then + * we assume the context as whole is using them. + */ + if (ctx_arch->flags.use_dbr) { + if (ctx->flags.system) { + spin_lock(&pfm_arch_sessions_lock); + + if (pfm_arch_sessions.pfs_ptrace_use_dbr) { + PFM_DBG("cannot reserve syswide context: " + "dbregs in use by ptrace"); + ret = -EBUSY; + } else { + pfm_arch_sessions.pfs_sys_use_dbr++; + PFM_DBG("pfs_sys_use_dbr=%u", pfm_arch_sessions.pfs_sys_use_dbr); + } + spin_unlock(&pfm_arch_sessions_lock); + + } else if (task->thread.flags & IA64_THREAD_DBG_VALID) { + PFM_DBG("load_pid [%d] thread is debugged, cannot " + "use range restrictions", task->pid); + ret = -EBUSY; + } + if (ret) + return ret; + } + + /* + * We need to intervene on context switch to toggle the + * psr.pp bit in system-wide. As such, we set the TIF + * flag so that pfm_arch_ctxswout_sys() and the + * pfm_arch_ctxswin_sys() functions get called + * from pfm_ctxsw_sys(); + */ + if (ctx->flags.system) { + set_thread_flag(TIF_PERFMON_CTXSW); + PFM_DBG("[%d] set TIF", current->pid); + return 0; + } + + regs = task_pt_regs(task); + + /* + * self-monitoring systematically allows user level control + */ + if (task != current) { + /* + * when not current, task is stopped, so this is safe + */ + ctx_arch->ctx_saved_psr_up = 0; + ia64_psr(regs)->up = ia64_psr(regs)->pp = 0; + } else + ctx_arch->flags.insecure = 1; + + /* + * allow user level control (start/stop/read pmd) if: + * - self-monitoring + * - requested at context creation (PFM_IA64_FL_INSECURE) + * + * There is not security hole with PFM_IA64_FL_INSECURE because + * when not self-monitored, the caller must have permissions to + * attached to the task. + */ + if (ctx_arch->flags.insecure) { + ia64_psr(regs)->sp = 0; + PFM_DBG("clearing psr.sp for [%d]", task->pid); + } + return 0; +} + +int pfm_arch_setfl_sane(struct pfm_context *ctx, u32 flags) +{ +#define PFM_SETFL_BOTH_SWITCH (PFM_SETFL_OVFL_SWITCH|PFM_SETFL_TIME_SWITCH) +#define PFM_ITA_SETFL_BOTH_INTR (PFM_ITA_SETFL_INTR_ONLY|\ + PFM_ITA_SETFL_EXCL_INTR) + +/* exclude return value field */ +#define PFM_SETFL_ALL_MASK ( PFM_ITA_SETFL_BOTH_INTR \ + | PFM_SETFL_BOTH_SWITCH \ + | PFM_ITA_SETFL_IDLE_EXCL) + + if ((flags & ~PFM_SETFL_ALL_MASK)) { + PFM_DBG("invalid flags=0x%x", flags); + return -EINVAL; + } + + if ((flags & PFM_ITA_SETFL_BOTH_INTR) == PFM_ITA_SETFL_BOTH_INTR) { + PFM_DBG("both excl intr and ontr only are set"); + return -EINVAL; + } + + if ((flags & PFM_ITA_SETFL_IDLE_EXCL) && !ctx->flags.system) { + PFM_DBG("idle exclude flag only for system-wide context"); + return -EINVAL; + } + return 0; +} + +/* + * function called from pfm_unload_context_*(). Context is locked. + * interrupts are masked. task is not guaranteed to be current task. + * Access to PMU is not guaranteed. + * + * function must do whatever arch-specific action is required on unload + * of a context. + * + * called for both system-wide and per-thread. task is NULL for ssytem-wide + */ +int pfm_arch_unload_context(struct pfm_context *ctx, struct task_struct *task) +{ + struct pfm_arch_context *ctx_arch; + struct pt_regs *regs; + + ctx_arch = pfm_ctx_arch(ctx); + + if (ctx->flags.system) { + /* + * disable context switch hook + */ + clear_thread_flag(TIF_PERFMON_CTXSW); + + if (ctx_arch->flags.use_dbr) { + spin_lock(&pfm_arch_sessions_lock); + pfm_arch_sessions.pfs_sys_use_dbr--; + PFM_DBG("sys_use_dbr=%u", pfm_arch_sessions.pfs_sys_use_dbr); + spin_unlock(&pfm_arch_sessions_lock); + } + return 0; + } + + regs = task_pt_regs(task); + + /* + * cancel user level control for per-task context + */ + ia64_psr(regs)->sp = 1; + PFM_DBG("setting psr.sp for [%d]", task->pid); + return 0; +} + +/* + * mask monitoring by setting the privilege level to 0 + * we cannot use psr.pp/psr.up for this, it is controlled by + * the user + */ +void pfm_arch_mask_monitoring(struct pfm_context *ctx, struct pfm_event_set *set) +{ + struct pfm_arch_pmu_info *arch_info = pfm_pmu_conf->arch_info; + unsigned long mask; + unsigned int i; + + /* + * as an optimization we look at the first 64 PMC + * registers only starting at PMC4. + */ + mask = arch_info->mask_pmcs[0] >> PFM_ITA_FCNTR; + for(i = PFM_ITA_FCNTR; mask; i++, mask >>=1) { + if (likely(mask & 0x1)) + ia64_set_pmc(i, set->pmcs[i] & ~0xfUL); + } + /* + * make changes visisble + */ + ia64_srlz_d(); +} + +/* + * function called from pfm_switch_sets(), pfm_context_load_thread(), + * pfm_context_load_sys(), pfm_ctxsw(), pfm_switch_sets() + * context is locked. Interrupts are masked. set cannot be NULL. + * Access to the PMU is guaranteed. + * + * function must restore all PMD registers from set. + */ +void pfm_arch_restore_pmds(struct pfm_context *ctx, struct pfm_event_set *set) +{ + struct pfm_arch_context *ctx_arch; + unsigned long *mask; + u16 i, num; + + ctx_arch = pfm_ctx_arch(ctx); + + if (ctx_arch->flags.insecure) { + num = pfm_pmu_conf->regs.num_rw_pmd; + mask = pfm_pmu_conf->regs.rw_pmds; + } else { + num = set->nused_pmds; + mask = set->used_pmds; + } + PFM_DBG("num=%u mask=0x%lx", num, mask[0]); + /* + * must restore all implemented read-write PMDS to avoid leaking + * information especially when PFM_IA64_FL_INSECURE is set. + * + * XXX: should check PFM_IA64_FL_INSECURE==0 and use used_pmd instead + */ + for (i = 0; num; i++) { + if (likely(test_bit(i, mask))) { + pfm_arch_write_pmd(ctx, i, set->pmds[i].value); + num--; + } + } + ia64_srlz_d(); +} + +/* + * function called from pfm_switch_sets(), pfm_context_load_thread(), + * pfm_context_load_sys(), pfm_ctxsw(), pfm_switch_sets() + * context is locked. Interrupts are masked. set cannot be NULL. + * Access to the PMU is guaranteed. + * + * function must restore all PMC registers from set if needed + */ +void pfm_arch_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set) +{ + struct pfm_arch_pmu_info *arch_info = pfm_pmu_conf->arch_info; + u64 mask2 = 0, val, plm; + unsigned long impl_mask, mask_pmcs; + unsigned int i; + + /* + * as an optimization we only look at the first 64 + * PMC registers. In fact, we should never scan the + * entire impl_pmcs because ibr/dbr are implemented + * separately. + * + * always skip PMC0-PMC3. PMC0 taken care of when saving + * state. PMC1-PMC3 not used until we get counters in + * the 60 and above index range. + */ + impl_mask = pfm_pmu_conf->regs.pmcs[0] >> PFM_ITA_FCNTR; + mask_pmcs = arch_info->mask_pmcs[0] >> PFM_ITA_FCNTR; + plm = ctx->state == PFM_CTX_MASKED ? ~0xf : ~0x0; + + for (i = PFM_ITA_FCNTR; + impl_mask; + i++, impl_mask >>=1, mask_pmcs >>=1) { + if (likely(impl_mask & 0x1)) { + mask2 = mask_pmcs & 0x1 ? plm : ~0; + val = set->pmcs[i] & mask2; + ia64_set_pmc(i, val); + PFM_DBG_ovfl("pmc%u=0x%lx", i, val); + } + } + /* + * restore DBR/IBR + */ + if (set->priv_flags & PFM_ITA_SETFL_USE_DBR) { + pfm_restore_ibrs(set->pmcs+256, 8); + pfm_restore_dbrs(set->pmcs+264, 8); + } + ia64_srlz_d(); +} + +void pfm_arch_unmask_monitoring(struct pfm_context *ctx, struct pfm_event_set *set) +{ + u64 psr; + int is_system; + + is_system = ctx->flags.system; + + psr = ia64_getreg(_IA64_REG_PSR); + + /* + * monitoring is masked via the PMC.plm + * + * As we restore their value, we do not want each counter to + * restart right away. We stop monitoring using the PSR, + * restore the PMC (and PMD) and then re-establish the psr + * as it was. Note that there can be no pending overflow at + * this point, because monitoring is still MASKED. + * + * Because interrupts are masked we can avoid changing + * DCR.pp. + */ + if (is_system) + pfm_clear_psr_pp(); + else + pfm_clear_psr_up(); + + ia64_srlz_d(); + + pfm_arch_restore_pmcs(ctx, set); + + /* + * restore psr + * + * monitoring may start right now but interrupts + * are still masked + */ + pfm_set_psr_l(psr); + ia64_srlz_d(); +} + +/* + * Called from pfm_stop() + * + * For per-thread: + * task is not necessarily current. If not current task, then + * task is guaranteed stopped and off any cpu. Access to PMU + * is not guaranteed. Interrupts are masked. Context is locked. + * Set is the active set. + * + * must disable active monitoring. ctx cannot be NULL + */ +void pfm_arch_stop(struct task_struct *task, struct pfm_context *ctx, + struct pfm_event_set *set) +{ + struct pfm_arch_context *ctx_arch; + struct pt_regs *regs; + u64 dcr, psr; + + ctx_arch = pfm_ctx_arch(ctx); + regs = task_pt_regs(task); + + if (!ctx->flags.system) { + /* + * in ZOMBIE state we always have task == current due to + * pfm_exit_thread() + */ + ia64_psr(regs)->up = 0; + ctx_arch->ctx_saved_psr_up = 0; + + /* + * in case of ZOMBIE state, there is no unload to clear + * insecure monitoring, so we do it in stop instead. + */ + if (ctx->state == PFM_CTX_ZOMBIE) + ia64_psr(regs)->sp = 1; + + if (task == current) { + pfm_clear_psr_up(); + ia64_srlz_d(); + } + } else if (ctx->flags.started) { /* do not stop twice */ + dcr = ia64_getreg(_IA64_REG_CR_DCR); + psr = ia64_getreg(_IA64_REG_PSR); + + ia64_psr(regs)->pp = 0; + ia64_setreg(_IA64_REG_CR_DCR, dcr & ~IA64_DCR_PP); + pfm_clear_psr_pp(); + ia64_srlz_d(); + + if (set->flags & PFM_ITA_SETFL_IDLE_EXCL) { + PFM_DBG("disabling idle exclude"); + __get_cpu_var(pfm_syst_info) &= ~PFM_ITA_CPUINFO_IDLE_EXCL; + } + } +} + +/* + * called from pfm_start() + * + * Interrupts are masked. Context is locked. Set is the active set. + * + * For per-thread: + * Task is not necessarily current. If not current task, then task + * is guaranteed stopped and off any cpu. No access to PMU is task + * is not current. + * + * For system-wide: + * task is always current + * + * must enable active monitoring. + */ +void pfm_arch_start(struct task_struct *task, struct pfm_context *ctx, + struct pfm_event_set *set) +{ + struct pfm_arch_context *ctx_arch; + struct pt_regs *regs; + u64 dcr, dcr_pp, psr_pp; + u32 flags; + + ctx_arch = pfm_ctx_arch(ctx); + regs = task_pt_regs(task); + flags = set->flags; + + /* + * take care of per-thread mode + */ + if (!ctx->flags.system) { + + ia64_psr(regs)->up = 1; + + if (task == current) { + pfm_set_psr_up(); + ia64_srlz_d(); + } else { + /* + * start monitoring at the kernel level the next + * time the task is scheduled + */ + ctx_arch->ctx_saved_psr_up = IA64_PSR_UP; + } + return; + } + + /* + * take care of system-wide mode + */ + dcr = ia64_getreg(_IA64_REG_CR_DCR); + if (flags & PFM_ITA_SETFL_INTR_ONLY) { + dcr_pp = 1; + psr_pp = 0; + } else if (flags & PFM_ITA_SETFL_EXCL_INTR) { + dcr_pp = 0; + psr_pp = 1; + } else { + dcr_pp = psr_pp = 1; + } + PFM_DBG("dcr_pp=%lu psr_pp=%lu", dcr_pp, psr_pp); + + /* + * update dcr_pp and psr_pp + */ + if (dcr_pp) + ia64_setreg(_IA64_REG_CR_DCR, dcr | IA64_DCR_PP); + else + ia64_setreg(_IA64_REG_CR_DCR, dcr & ~IA64_DCR_PP); + + if (psr_pp) { + pfm_set_psr_pp(); + ia64_psr(regs)->pp = 1; + } else { + pfm_clear_psr_pp(); + ia64_psr(regs)->pp = 0; + } + ia64_srlz_d(); + + if (set->flags & PFM_ITA_SETFL_IDLE_EXCL) { + PFM_DBG("enable idle exclude"); + __get_cpu_var(pfm_syst_info) |= PFM_ITA_CPUINFO_IDLE_EXCL; + } +} + +/* + * Only call this function when a process is trying to + * write the debug registers (reading is always allowed) + * called from arch/ia64/kernel/ptrace.c:access_uarea() + */ +int __pfm_use_dbregs(struct task_struct *task) +{ + struct pfm_arch_context *ctx_arch; + struct pfm_context *ctx; + unsigned long flags; + int ret = 0; + + PFM_DBG("called for [%d]", task->pid); + + ctx = task->pfm_context; + + /* + * do it only once + */ + if (task->thread.flags & IA64_THREAD_DBG_VALID) { + PFM_DBG("IA64_THREAD_DBG_VALID already set"); + return 0; + } + if (ctx) { + spin_lock_irqsave(&ctx->lock, flags); + ctx_arch = pfm_ctx_arch(ctx); + + if (ctx_arch->flags.use_dbr == 1) { + PFM_DBG("PMU using dbregs already, no ptrace access"); + ret = -1; + } + spin_unlock_irqrestore(&ctx->lock, flags); + if (ret) + return ret; + } + + spin_lock(&pfm_arch_sessions_lock); + + /* + * We cannot allow setting breakpoints when system wide monitoring + * sessions are using the debug registers. + */ + if (!pfm_arch_sessions.pfs_sys_use_dbr) + pfm_arch_sessions.pfs_ptrace_use_dbr++; + else + ret = -1; + + PFM_DBG("ptrace_use_dbr=%u sys_use_dbr=%u by [%d] ret = %d", + pfm_arch_sessions.pfs_ptrace_use_dbr, + pfm_arch_sessions.pfs_sys_use_dbr, + task->pid, ret); + + spin_unlock(&pfm_arch_sessions_lock); + if (ret) + return ret; +#ifndef CONFIG_SMP + /* + * in UP, we need to check whether the current + * owner of the PMU is not using the debug registers + * for monitoring. Because we are using a lazy + * save on ctxswout, we must force a save in this + * case because the debug registers are being + * modified by another task. We save the current + * PMD registers, and clear ownership. In ctxswin, + * full state will be reloaded. + * + * Note: we overwrite task. + */ + task = __get_cpu_var(pmu_owner); + ctx = __get_cpu_var(pmu_ctx); + + if (task == NULL) + return 0; + + ctx_arch = pfm_ctx_arch(ctx); + + if (ctx_arch->flags.use_dbr) + pfm_save_pmds_release(ctx); +#endif + return 0; +} + +/* + * This function is called for every task that exits with the + * IA64_THREAD_DBG_VALID set. This indicates a task which was + * able to use the debug registers for debugging purposes via + * ptrace(). Therefore we know it was not using them for + * perfmormance monitoring, so we only decrement the number + * of "ptraced" debug register users to keep the count up to date + */ +int __pfm_release_dbregs(struct task_struct *task) +{ + int ret; + + spin_lock(&pfm_arch_sessions_lock); + + if (pfm_arch_sessions.pfs_ptrace_use_dbr == 0) { + PFM_ERR("invalid release for [%d] ptrace_use_dbr=0", task->pid); + ret = -1; + } else { + pfm_arch_sessions.pfs_ptrace_use_dbr--; + ret = 0; + } + spin_unlock(&pfm_arch_sessions_lock); + + return ret; +} + +int pfm_ia64_mark_dbregs_used(struct pfm_context *ctx, + struct pfm_event_set *set) +{ + struct pfm_arch_context *ctx_arch; + struct task_struct *task; + struct thread_struct *thread; + int ret = 0, state; + int i, can_access_pmu = 0; + int is_loaded, is_system; + + ctx_arch = pfm_ctx_arch(ctx); + state = ctx->state; + task = ctx->task; + is_loaded = state == PFM_CTX_LOADED || state == PFM_CTX_MASKED; + is_system = ctx->flags.system; + can_access_pmu = __get_cpu_var(pmu_owner) == task || is_system; + + if (is_loaded == 0) + goto done; + + if (is_system == 0) { + thread = &(task->thread); + + /* + * cannot use debug registers for montioring if they are + * already used for debugging + */ + if (thread->flags & IA64_THREAD_DBG_VALID) { + PFM_DBG("debug registers already in use for [%d]", + task->pid); + return -EBUSY; + } + } + + /* + * check for debug registers in system wide mode + */ + spin_lock(&pfm_arch_sessions_lock); + + if (is_system) { + if (pfm_arch_sessions.pfs_ptrace_use_dbr) + ret = -EBUSY; + else + pfm_arch_sessions.pfs_sys_use_dbr++; + } + + spin_unlock(&pfm_arch_sessions_lock); + + if (ret != 0) + return ret; + + /* + * clear hardware registers to make sure we don't + * pick up stale state. + */ + if (can_access_pmu) { + PFM_DBG("clearing ibrs, dbrs"); + for (i = 0; i < 8; i++) { + ia64_set_ibr(i, 0); + ia64_dv_serialize_instruction(); + } + ia64_srlz_i(); + for (i = 0; i < 8; i++) { + ia64_set_dbr(i, 0); + ia64_dv_serialize_data(); + } + ia64_srlz_d(); + } +done: + /* + * debug registers are now in use + */ + ctx_arch->flags.use_dbr = 1; + set->priv_flags |= PFM_ITA_SETFL_USE_DBR; + PFM_DBG("set%u use_dbr=1", set->id); + return 0; +} +EXPORT_SYMBOL(pfm_ia64_mark_dbregs_used); + +char *pfm_arch_get_pmu_module_name(void) +{ + switch(local_cpu_data->family) { + case 0x07: + return "perfmon_itanium"; + case 0x1f: + return "perfmon_mckinley"; + case 0x20: + return "perfmon_montecito"; + default: + return "perfmon_generic"; + } + return NULL; +} + +/* + * global arch-specific intialization, called only once + */ +int __init pfm_arch_init(void) +{ + int ret; + + spin_lock_init(&pfm_arch_sessions_lock); + +#ifdef CONFIG_IA64_PERFMON_COMPAT + ret = pfm_ia64_compat_init(); + if (ret) + return ret; +#endif + register_percpu_irq(IA64_PERFMON_VECTOR, &perfmon_irqaction); + + + return 0; +} Index: linux-2.6/arch/ia64/perfmon/perfmon_compat.c =================================================================== --- /dev/null +++ linux-2.6/arch/ia64/perfmon/perfmon_compat.c @@ -0,0 +1,1166 @@ +/* + * This file implements the IA-64 specific + * support for the perfmon2 interface + * + * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P. + * Contributed by Stephane Eranian + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + * 02111-1307 USA + */ +#include +#include +#include +#include +#include +#include +#include +#include + +asmlinkage long sys_pfm_stop(int fd); +asmlinkage long sys_pfm_start(int fd, struct pfarg_start __user *st); +asmlinkage long sys_pfm_unload_context(int fd); +asmlinkage long sys_pfm_restart(int fd); +asmlinkage long sys_pfm_load_context(int fd, struct pfarg_load __user *ld); + +/* + * function providing some help for backward compatiblity with old IA-64 + * applications. In the old model, certain attributes of a counter were + * passed via the PMC, now they are passed via the PMD. + */ +static int pfm_compat_update_pmd(struct pfm_context *ctx, u16 set_id, u16 cnum, + u32 rflags, + unsigned long *smpl_pmds, + unsigned long *reset_pmds, + u64 eventid) +{ + struct pfm_event_set *set; + int is_counting; + unsigned long *impl_pmds; + u32 flags = 0; + u16 max_pmd; + + impl_pmds = pfm_pmu_conf->regs.pmds; + max_pmd = pfm_pmu_conf->regs.max_pmd; + + /* + * given that we do not maintain PMC ->PMD dependencies + * we cannot figure out what to do in case PMCxx != PMDxx + */ + if (cnum > max_pmd) + return 0; + + /* + * assumes PMCxx controls PMDxx which is always true for counters + * on Itanium PMUs. + */ + is_counting = pfm_pmu_conf->pmd_desc[cnum].type & PFM_REG_C64; + set = pfm_find_set(ctx, set_id, 0); + + if (is_counting) { + if (rflags & PFM_REGFL_OVFL_NOTIFY) + flags |= PFM_REGFL_OVFL_NOTIFY; + if (rflags & PFM_REGFL_RANDOM) + flags |= PFM_REGFL_RANDOM; + /* + * verify validity of smpl_pmds + */ + if (unlikely(bitmap_subset(smpl_pmds, + impl_pmds, max_pmd) == 0)) { + PFM_DBG("invalid smpl_pmds=0x%llx for pmd%u", + (unsigned long long)smpl_pmds[0], cnum); + return -EINVAL; + } + /* + * verify validity of reset_pmds + */ + if (unlikely(bitmap_subset(reset_pmds, + impl_pmds, max_pmd) == 0)) { + PFM_DBG("invalid reset_pmds=0x%lx for pmd%u", + reset_pmds[0], cnum); + return -EINVAL; + } + /* + * ensures that a PFM_READ_PMDS succeeds with a + * corresponding PFM_WRITE_PMDS + */ + __set_bit(cnum, set->used_pmds); + + } else if (rflags & (PFM_REGFL_OVFL_NOTIFY|PFM_REGFL_RANDOM)) { + PFM_DBG("cannot set ovfl_notify or random on pmd%u", cnum); + return -EINVAL; + } + + set->pmds[cnum].flags = flags; + + if (is_counting) { + bitmap_copy(set->pmds[cnum].reset_pmds, + reset_pmds, + max_pmd); + + bitmap_copy(set->pmds[cnum].smpl_pmds, + smpl_pmds, + max_pmd); + + set->pmds[cnum].eventid = eventid; + + /* + * update ovfl_notify + */ + if (rflags & PFM_REGFL_OVFL_NOTIFY) + __set_bit(cnum, set->ovfl_notify); + else + __clear_bit(cnum, set->ovfl_notify); + + } + PFM_DBG("pmd%u flags=0x%x eventid=0x%lx r_pmds=0x%lx s_pmds=0x%lx", + cnum, flags, + eventid, + reset_pmds[0], + smpl_pmds[0]); + + return 0; +} + + +int __pfm_write_ibrs_old(struct pfm_context *ctx, void *arg, int count) +{ + struct pfarg_dbreg *req = arg; + struct pfarg_pmc pmc; + int i, ret = 0; + + memset(&pmc, 0, sizeof(pmc)); + + for (i = 0; i < count; i++, req++) { + pmc.reg_num = 256+req->dbreg_num; + pmc.reg_value = req->dbreg_value; + pmc.reg_flags = 0; + pmc.reg_set = req->dbreg_set; + + ret = __pfm_write_pmcs(ctx, &pmc, 1); + + req->dbreg_flags &= ~PFM_REG_RETFL_MASK; + req->dbreg_flags |= pmc.reg_flags; + + if (ret) + return ret; + } + return 0; +} + +static long pfm_write_ibrs_old(int fd, void __user *ureq, int count) +{ + struct pfm_context *ctx; + struct file *filp; + struct pfarg_dbreg *req = NULL; + void *fptr; + unsigned long flags; + size_t sz; + int ret, fput_needed; + + if (count < 1 || count >= PFM_MAX_ARG_COUNT(req)) + return -EINVAL; + + sz = count*sizeof(*req); + + filp = fget_light(fd, &fput_needed); + if (unlikely(filp == NULL)) { + PFM_DBG("invalid fd %d", fd); + return -EBADF; + } + + ctx = filp->private_data; + ret = -EBADF; + + if (unlikely(!ctx || filp->f_op != &pfm_file_ops)) { + PFM_DBG("fd %d not related to perfmon", fd); + goto error; + } + + ret = pfm_get_args(ureq, sz, 0, NULL, (void **)&req, &fptr); + if (ret) + goto error; + + spin_lock_irqsave(&ctx->lock, flags); + + ret = pfm_check_task_state(ctx, PFM_CMD_STOPPED, &flags); + if (ret == 0) + ret = __pfm_write_ibrs_old(ctx, req, count); + + spin_unlock_irqrestore(&ctx->lock, flags); + + if (copy_to_user(ureq, req, sz)) + ret = -EFAULT; + + kfree(fptr); +error: + fput_light(filp, fput_needed); + return ret; +} + +int __pfm_write_dbrs_old(struct pfm_context *ctx, void *arg, int count) +{ + struct pfarg_dbreg *req = arg; + struct pfarg_pmc pmc; + int i, ret = 0; + + memset(&pmc, 0, sizeof(pmc)); + + for (i = 0; i < count; i++, req++) { + pmc.reg_num = 264+req->dbreg_num; + pmc.reg_value = req->dbreg_value; + pmc.reg_flags = 0; + pmc.reg_set = req->dbreg_set; + + ret = __pfm_write_pmcs(ctx, &pmc, 1); + + req->dbreg_flags &= ~PFM_REG_RETFL_MASK; + req->dbreg_flags |= pmc.reg_flags; + if (ret) + return ret; + } + return 0; +} + +static long pfm_write_dbrs_old(int fd, void __user *ureq, int count) +{ + struct pfm_context *ctx; + struct file *filp; + struct pfarg_dbreg *req = NULL; + void *fptr; + unsigned long flags; + size_t sz; + int ret, fput_needed; + + if (count < 1 || count >= PFM_MAX_ARG_COUNT(req)) + return -EINVAL; + + sz = count*sizeof(*req); + + filp = fget_light(fd, &fput_needed); + if (unlikely(filp == NULL)) { + PFM_DBG("invalid fd %d", fd); + return -EBADF; + } + + ctx = filp->private_data; + ret = -EBADF; + + if (unlikely(!ctx || filp->f_op != &pfm_file_ops)) { + PFM_DBG("fd %d not related to perfmon", fd); + goto error; + } + + ret = pfm_get_args(ureq, sz, 0, NULL, (void **)&req, &fptr); + if (ret) + goto error; + + spin_lock_irqsave(&ctx->lock, flags); + + ret = pfm_check_task_state(ctx, PFM_CMD_STOPPED, &flags); + if (ret == 0) + ret = __pfm_write_dbrs_old(ctx, req, count); + + spin_unlock_irqrestore(&ctx->lock, flags); + + if (copy_to_user(ureq, req, sz)) + ret = -EFAULT; + + kfree(fptr); +error: + fput_light(filp, fput_needed); + return ret; +} + +int __pfm_write_pmcs_old(struct pfm_context *ctx, struct pfarg_reg *req_old, + int count) +{ + struct pfarg_pmc req; + unsigned int i; + int ret, error_code; + + memset(&req, 0, sizeof(req)); + + for (i = 0; i < count; i++, req_old++) { + req.reg_num = req_old->reg_num; + req.reg_set = req_old->reg_set; + req.reg_flags = 0; + req.reg_value = req_old->reg_value; + + ret = __pfm_write_pmcs(ctx, (void *)&req, 1); + req_old->reg_flags &= ~PFM_REG_RETFL_MASK; + req_old->reg_flags |= req.reg_flags; + + if (ret) + return ret; + + ret = pfm_compat_update_pmd(ctx, req_old->reg_set, + req_old->reg_num, + (u32)req_old->reg_flags, + req_old->reg_smpl_pmds, + req_old->reg_reset_pmds, + req_old->reg_smpl_eventid); + + error_code = ret ? PFM_REG_RETFL_EINVAL : 0; + req_old->reg_flags &= ~PFM_REG_RETFL_MASK; + req_old->reg_flags |= error_code; + + if (ret) + return ret; + } + return 0; +} + +static long pfm_write_pmcs_old(int fd, void __user *ureq, int count) +{ + struct pfm_context *ctx; + struct file *filp; + struct pfarg_reg *req = NULL; + void *fptr; + unsigned long flags; + size_t sz; + int ret, fput_needed; + + if (count < 1 || count >= PFM_MAX_ARG_COUNT(req)) + return -EINVAL; + + sz = count*sizeof(*req); + + filp = fget_light(fd, &fput_needed); + if (unlikely(filp == NULL)) { + PFM_DBG("invalid fd %d", fd); + return -EBADF; + } + + ctx = filp->private_data; + ret = -EBADF; + + if (unlikely(!ctx || filp->f_op != &pfm_file_ops)) { + PFM_DBG("fd %d not related to perfmon", fd); + goto error; + } + + ret = pfm_get_args(ureq, sz, 0, NULL, (void **)&req, &fptr); + if (ret) + goto error; + + spin_lock_irqsave(&ctx->lock, flags); + + ret = pfm_check_task_state(ctx, PFM_CMD_STOPPED, &flags); + if (ret == 0) + ret = __pfm_write_pmcs_old(ctx, req, count); + + spin_unlock_irqrestore(&ctx->lock, flags); + + if (copy_to_user(ureq, req, sz)) + ret = -EFAULT; + + kfree(fptr); + +error: + fput_light(filp, fput_needed); + return ret; +} + +int __pfm_write_pmds_old(struct pfm_context *ctx, struct pfarg_reg *req_old, + int count) +{ + struct pfarg_pmd req; + int i, ret; + + memset(&req, 0, sizeof(req)); + + for (i = 0; i < count; i++, req_old++) { + req.reg_num = req_old->reg_num; + req.reg_set = req_old->reg_set; + req.reg_value = req_old->reg_value; + req.reg_flags = req_old->reg_flags; + + req.reg_long_reset = req_old->reg_long_reset; + req.reg_short_reset = req_old->reg_short_reset; + req.reg_random_mask = req_old->reg_random_mask; + /* + * reg_random_seed is ignored since v2.3 + */ + + /* + * skip last_reset_val not used for writing + * skip smpl_pmds, reset_pmds, eventid, ovfl_swtch_cnt + * as set in pfm_write_pmcs_old. + */ + req.reg_ovfl_switch_cnt = req_old->reg_ovfl_switch_cnt; + + ret = __pfm_write_pmds(ctx, (void *)&req, 1, 1); + + req_old->reg_flags &= ~PFM_REG_RETFL_MASK; + req_old->reg_flags |= req.reg_flags; + + if (ret) + return ret; + } + return 0; +} + +static long pfm_write_pmds_old(int fd, void __user *ureq, int count) +{ + struct pfm_context *ctx; + struct file *filp; + struct pfarg_reg *req = NULL; + void *fptr; + unsigned long flags; + size_t sz; + int ret, fput_needed; + + if (count < 1 || count >= PFM_MAX_ARG_COUNT(req)) + return -EINVAL; + + sz = count*sizeof(*req); + + filp = fget_light(fd, &fput_needed); + if (unlikely(filp == NULL)) { + PFM_DBG("invalid fd %d", fd); + return -EBADF; + } + + ctx = filp->private_data; + ret = -EBADF; + + if (unlikely(!ctx || filp->f_op != &pfm_file_ops)) { + PFM_DBG("fd %d not related to perfmon", fd); + goto error; + } + + ret = pfm_get_args(ureq, sz, 0, NULL, (void **)&req, &fptr); + if (ret) + goto error; + + spin_lock_irqsave(&ctx->lock, flags); + + ret = pfm_check_task_state(ctx, PFM_CMD_STOPPED, &flags); + if (ret == 0) + ret = __pfm_write_pmds_old(ctx, req, count); + + spin_unlock_irqrestore(&ctx->lock, flags); + + if (copy_to_user(ureq, req, sz)) + ret = -EFAULT; + + kfree(fptr); +error: + fput_light(filp, fput_needed); + return ret; +} + +int __pfm_read_pmds_old(struct pfm_context *ctx, struct pfarg_reg *req_old, + int count) +{ + struct pfarg_pmd req; + int i, ret; + + memset(&req, 0, sizeof(req)); + + for (i = 0; i < count; i++, req_old++) { + req.reg_num = req_old->reg_num; + req.reg_set = req_old->reg_set; + + /* skip value not used for reading */ + req.reg_flags = req_old->reg_flags; + + /* skip short/long_reset not used for reading */ + /* skip last_reset_val not used for reading */ + /* skip ovfl_switch_cnt not used for reading */ + + ret = __pfm_read_pmds(ctx, (void *)&req, 1); + + req_old->reg_flags &= ~PFM_REG_RETFL_MASK; + req_old->reg_flags |= req.reg_flags; + if (ret) + return ret; + + /* update fields */ + req_old->reg_value = req.reg_value; + + req_old->reg_last_reset_val = req.reg_last_reset_val; + req_old->reg_ovfl_switch_cnt = req.reg_ovfl_switch_cnt; + } + return 0; +} + +static long pfm_read_pmds_old(int fd, void __user *ureq, int count) +{ + struct pfm_context *ctx; + struct file *filp; + struct pfarg_reg *req = NULL; + void *fptr; + unsigned long flags; + size_t sz; + int ret, fput_needed; + + if (count < 1 || count >= PFM_MAX_ARG_COUNT(req)) + return -EINVAL; + + sz = count*sizeof(*req); + + filp = fget_light(fd, &fput_needed); + if (unlikely(filp == NULL)) { + PFM_DBG("invalid fd %d", fd); + return -EBADF; + } + + ctx = filp->private_data; + ret = -EBADF; + + if (unlikely(!ctx || filp->f_op != &pfm_file_ops)) { + PFM_DBG("fd %d not related to perfmon", fd); + goto error; + } + + ret = pfm_get_args(ureq, sz, 0, NULL, (void **)&req, &fptr); + if (ret) + goto error; + + spin_lock_irqsave(&ctx->lock, flags); + + ret = pfm_check_task_state(ctx, PFM_CMD_STOPPED, &flags); + if (ret == 0) + ret = __pfm_read_pmds_old(ctx, req, count); + + spin_unlock_irqrestore(&ctx->lock, flags); + + if (copy_to_user(ureq, req, sz)) + ret = -EFAULT; + + kfree(fptr); +error: + fput_light(filp, fput_needed); + return ret; +} + +/* + * OBSOLETE: use /proc/perfmon_map instead + */ +static long pfm_get_default_pmcs_old(int fd, void __user *ureq, int count) +{ + struct pfarg_reg *req = NULL; + void *fptr; + size_t sz; + int ret, i; + unsigned int cnum; + + if (count < 1) + return -EINVAL; + + /* + * ensure the pfm_pmu_conf does not disappear while + * we use it + */ + ret = pfm_pmu_conf_get(1); + if (ret) + return ret; + + sz = count*sizeof(*ureq); + + ret = pfm_get_args(ureq, sz, 0, NULL, (void **)&req, &fptr); + if (ret) + goto error; + + + for (i = 0; i < count; i++, req++) { + cnum = req->reg_num; + + if (i >= PFM_MAX_PMCS || + (pfm_pmu_conf->pmc_desc[cnum].type & PFM_REG_I) == 0) { + req->reg_flags = PFM_REG_RETFL_EINVAL; + break; + } + req->reg_value = pfm_pmu_conf->pmc_desc[cnum].dfl_val; + req->reg_flags = 0; + + PFM_DBG("pmc[%u]=0x%lx", cnum, req->reg_value); + } + + if (copy_to_user(ureq, req, sz)) + ret = -EFAULT; + + kfree(fptr); +error: + pfm_pmu_conf_put(); + + return ret; +} + +/* + * allocate a sampling buffer and remaps it into the user address space of + * the task. This is only in compatibility mode + * + * function called ONLY on current task + */ +int pfm_smpl_buffer_alloc_compat(struct pfm_context *ctx, size_t rsize, + struct file *filp) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma = NULL; + struct pfm_arch_context *ctx_arch; + size_t size; + int ret; + extern struct vm_operations_struct pfm_buf_map_vm_ops; + + ctx_arch = pfm_ctx_arch(ctx); + + /* + * allocate buffer + map desc + */ + ret = pfm_smpl_buffer_alloc(ctx, rsize); + if (ret) + return ret; + + size = ctx->smpl_size; + + + /* allocate vma */ + vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); + if (!vma) { + PFM_DBG("Cannot allocate vma"); + goto error_kmem; + } + memset(vma, 0, sizeof(*vma)); + + /* + * partially initialize the vma for the sampling buffer + */ + vma->vm_mm = mm; + vma->vm_flags = VM_READ| VM_MAYREAD |VM_RESERVED; + vma->vm_page_prot = PAGE_READONLY; + vma->vm_ops = &pfm_buf_map_vm_ops; + vma->vm_file = filp; + vma->vm_private_data = ctx; + vma->vm_pgoff = 0; + + /* + * simulate effect of mmap() + */ + get_file(filp); + + /* + * Let's do the difficult operations next. + * + * now we atomically find some area in the address space and + * remap the buffer into it. + */ + down_write(¤t->mm->mmap_sem); + + /* find some free area in address space, must have mmap sem held */ + vma->vm_start = get_unmapped_area(NULL, 0, size, 0, + MAP_PRIVATE|MAP_ANONYMOUS); + if (vma->vm_start == 0) { + PFM_DBG("cannot find unmapped area of size %zu", size); + up_write(¤t->mm->mmap_sem); + goto error; + } + vma->vm_end = vma->vm_start + size; + + PFM_DBG("aligned_size=%zu mapped @0x%lx", size, vma->vm_start); + /* + * now insert the vma in the vm list for the process, must be + * done with mmap lock held + */ + insert_vm_struct(mm, vma); + + mm->total_vm += size >> PAGE_SHIFT; + + up_write(¤t->mm->mmap_sem); + + /* + * IMPORTANT: we do not issue the fput() + * because we want to increase the ref count + * on the descriptor to simulate what mmap() + * would do + */ + + /* + * used to propagate vaddr to syscall stub + */ + ctx_arch->ctx_smpl_vaddr = (void *)vma->vm_start; + + return 0; +error: + kmem_cache_free(vm_area_cachep, vma); +error_kmem: + pfm_release_buf_space(ctx, ctx->smpl_size); + vfree(ctx->smpl_addr); + return -ENOMEM; +} + +#define PFM_DEFAULT_SMPL_UUID { \ + 0x4d, 0x72, 0xbe, 0xc0, 0x06, 0x64, 0x41, 0x43, 0x82,\ + 0xb4, 0xd3, 0xfd, 0x27, 0x24, 0x3c, 0x97} + +static pfm_uuid_t old_default_uuid = PFM_DEFAULT_SMPL_UUID; +static pfm_uuid_t null_uuid; + +/* + * function invoked in case, pfm_context_create fails + * at the last operation, copy_to_user. It needs to + * undo memory allocations and free the file descriptor + */ +static void pfm_undo_create_context_fd(int fd, struct pfm_context *ctx) +{ + struct files_struct *files = current->files; + struct file *file; + int fput_needed; + + file = fget_light(fd, &fput_needed); + /* + * there is no fd_uninstall(), so we do it + * here. put_unused_fd() does not remove the + * effect of fd_install(). + */ + + spin_lock(&files->file_lock); + files->fd_array[fd] = NULL; + spin_unlock(&files->file_lock); + + fput_light(file, fput_needed); + + /* + * decrement ref count and kill file + */ + put_filp(file); + + put_unused_fd(fd); + + pfm_context_free(ctx); +} + +static int pfm_get_smpl_arg_old(pfm_uuid_t uuid, void __user *fmt_uarg, + size_t usize, void **arg, + struct pfm_smpl_fmt **fmt) +{ + struct pfm_smpl_fmt *f; + void *addr = NULL; + size_t sz; + int ret; + + if (!memcmp(uuid, null_uuid, sizeof(pfm_uuid_t))) + return 0; + + if (memcmp(uuid, old_default_uuid, sizeof(pfm_uuid_t))) { + PFM_DBG("compatibility mode supports only default sampling format"); + return -EINVAL; + } + /* + * find fmt and increase refcount + */ + f = pfm_smpl_fmt_get("default-old"); + if (f == NULL) { + PFM_DBG("default-old buffer format not found"); + return -EINVAL; + } + + /* + * expected format argument size + */ + sz = f->fmt_arg_size; + + /* + * check user size matches expected size + * usize = -1 is for IA-64 backward compatibility + */ + ret = -EINVAL; + if (sz != usize && usize != -1) { + PFM_DBG("invalid arg size %zu, format expects %zu", + usize, sz); + goto error; + } + + ret = -ENOMEM; + addr = kmalloc(sz, GFP_KERNEL); + if (addr == NULL) + goto error; + + ret = -EFAULT; + if (copy_from_user(addr, fmt_uarg, sz)) + goto error; + + *arg = addr; + *fmt = f; + return 0; + +error: + kfree(addr); + pfm_smpl_fmt_put(f); + return ret; +} + +static long pfm_create_context_old(int fd, void __user *ureq, int count) +{ + struct pfm_context *new_ctx; + struct pfm_arch_context *ctx_arch; + struct pfm_smpl_fmt *fmt = NULL; + struct pfarg_context req_old; + void __user *usmpl_arg; + void *smpl_arg = NULL; + struct pfarg_ctx req; + int ret; + + if (count != 1) + return -EINVAL; + + if (copy_from_user(&req_old, ureq, sizeof(req_old))) + return -EFAULT; + + memset(&req, 0, sizeof(req)); + + /* + * sampling format args are following pfarg_context + */ + usmpl_arg = ureq+sizeof(req_old); + + ret = pfm_get_smpl_arg_old(req_old.ctx_smpl_buf_id, usmpl_arg, -1, + &smpl_arg, &fmt); + if (ret) + return ret; + + req.ctx_flags = req_old.ctx_flags; + + /* + * returns file descriptor if >=0, or error code */ + ret = __pfm_create_context(&req, fmt, smpl_arg, PFM_COMPAT, &new_ctx); + if (ret >= 0) { + ctx_arch = pfm_ctx_arch(new_ctx); + req_old.ctx_fd = ret; + req_old.ctx_smpl_vaddr = ctx_arch->ctx_smpl_vaddr; + } + + if (copy_to_user(ureq, &req_old, sizeof(req_old))) { + pfm_undo_create_context_fd(req_old.ctx_fd, new_ctx); + ret = -EFAULT; + } + + kfree(smpl_arg); + + return ret; +} + +/* + * obsolete call: use /proc/perfmon + */ +static long pfm_get_features_old(int fd, void __user *arg, int count) +{ + struct pfarg_features req; + int ret = 0; + + if (count != 1) + return -EINVAL; + + memset(&req, 0, sizeof(req)); + + req.ft_version = PFM_VERSION; + + if (copy_to_user(arg, &req, sizeof(req))) + ret = -EFAULT; + + return ret; +} + +static long pfm_debug_old(int fd, void __user *arg, int count) +{ + int m; + + if (count != 1) + return -EINVAL; + + if (get_user(m, (int __user*)arg)) + return -EFAULT; + + + pfm_controls.debug = m == 0 ? 0 : 1; + + PFM_INFO("debugging %s (timing reset)", + pfm_controls.debug ? "on" : "off"); + + if (m == 0) + for_each_online_cpu(m) { + memset(&per_cpu(pfm_stats,m), 0, + sizeof(struct pfm_stats)); + } + return 0; +} + +static long pfm_unload_context_old(int fd, void __user *arg, int count) +{ + if (count) + return -EINVAL; + + return sys_pfm_unload_context(fd); +} + +static long pfm_restart_old(int fd, void __user *arg, int count) +{ + if (count) + return -EINVAL; + + return sys_pfm_restart(fd); +} + +static long pfm_stop_old(int fd, void __user *arg, int count) +{ + if (count) + return -EINVAL; + + return sys_pfm_stop(fd); +} + +static long pfm_start_old(int fd, void __user *arg, int count) +{ + if (count > 1) + return -EINVAL; + + return sys_pfm_start(fd, arg); +} + +static long pfm_load_context_old(int fd, void __user *ureq, int count) +{ + struct pfm_context *ctx; + struct task_struct *task; + struct file *filp; + unsigned long flags; + struct pfarg_load req; + int ret, fput_needed; + + if (count != 1) + return -EINVAL; + + if (copy_from_user(&req, ureq, sizeof(req))) + return -EFAULT; + + filp = fget_light(fd, &fput_needed); + if (unlikely(filp == NULL)) { + PFM_DBG("invalid fd %d", fd); + return -EBADF; + } + + task = NULL; + ctx = filp->private_data; + ret = -EBADF; + + if (unlikely(!ctx || filp->f_op != &pfm_file_ops)) { + PFM_DBG("fd %d not related to perfmon", fd); + goto error; + } + + /* + * in per-thread mode (not self-monitoring), get a reference + * on task to monitor. This must be done with interrupts enabled + * Upon succesful return, refcount on task is increased. + * + * fget_light() is protecting the context. + */ + if (!ctx->flags.system) { + if (req.load_pid != current->pid) { + ret = pfm_get_task(ctx, req.load_pid, &task); + if (ret) + goto error; + } else + task = current; + } + /* + * irqsave is required to avoid race in case context is already + * loaded or with switch timeout in the case of self-monitoring + */ + spin_lock_irqsave(&ctx->lock, flags); + + /* + * the new interface requires the desired CPU to be explicitely set + * in this field. the kernel then checks you are on the right CPU + */ + if (ctx->flags.system) + req.load_pid = smp_processor_id(); + + ret = pfm_check_task_state(ctx, PFM_CMD_UNLOADED, &flags); + if (!ret) + ret = __pfm_load_context(ctx, &req, task); + + spin_unlock_irqrestore(&ctx->lock, flags); + + /* + * in per-thread mode (not self-monitoring), we need + * to decrease refcount on task to monitor: + * - load successful: we have a reference to the task in ctx->task + * - load failed : undo the effect of pfm_get_task() + */ + if (task && task != current) + put_task_struct(task); + +error: + fput_light(filp, fput_needed); + return ret; +} + +/* + * perfmon command descriptions + */ +struct pfm_cmd_desc { + long (*cmd_func)(int fd, void __user *arg, int count); +}; + +/* + * functions MUST be listed in the increasing order of + * their index (see permfon.h) + */ +#define PFM_CMD(name) \ + { .cmd_func = name, \ + } +#define PFM_CMD_NONE \ + { .cmd_func = NULL \ + } + +static struct pfm_cmd_desc pfm_cmd_tab[]={ +/* 0 */PFM_CMD_NONE, +/* 1 */PFM_CMD(pfm_write_pmcs_old), +/* 2 */PFM_CMD(pfm_write_pmds_old), +/* 3 */PFM_CMD(pfm_read_pmds_old), +/* 4 */PFM_CMD(pfm_stop_old), +/* 5 */PFM_CMD(pfm_start_old), +/* 6 */PFM_CMD_NONE, +/* 7 */PFM_CMD_NONE, +/* 8 */PFM_CMD(pfm_create_context_old), +/* 9 */PFM_CMD_NONE, +/* 10 */PFM_CMD(pfm_restart_old), +/* 11 */PFM_CMD_NONE, +/* 12 */PFM_CMD(pfm_get_features_old), +/* 13 */PFM_CMD(pfm_debug_old), +/* 14 */PFM_CMD_NONE, +/* 15 */PFM_CMD(pfm_get_default_pmcs_old), +/* 16 */PFM_CMD(pfm_load_context_old), +/* 17 */PFM_CMD(pfm_unload_context_old), +/* 18 */PFM_CMD_NONE, +/* 19 */PFM_CMD_NONE, +/* 20 */PFM_CMD_NONE, +/* 21 */PFM_CMD_NONE, +/* 22 */PFM_CMD_NONE, +/* 23 */PFM_CMD_NONE, +/* 24 */PFM_CMD_NONE, +/* 25 */PFM_CMD_NONE, +/* 26 */PFM_CMD_NONE, +/* 27 */PFM_CMD_NONE, +/* 28 */PFM_CMD_NONE, +/* 29 */PFM_CMD_NONE, +/* 30 */PFM_CMD_NONE, +/* 31 */PFM_CMD_NONE, +/* 32 */PFM_CMD(pfm_write_ibrs_old), +/* 33 */PFM_CMD(pfm_write_dbrs_old), +}; +#define PFM_CMD_COUNT ARRAY_SIZE(pfm_cmd_tab) + +/* + * system-call entry point (must return long) + */ +asmlinkage long sys_perfmonctl (int fd, int cmd, void __user *arg, int count) +{ + if (perfmon_disabled) + return -ENOSYS; + + if (unlikely(cmd < 0 || cmd >= PFM_CMD_COUNT + || pfm_cmd_tab[cmd].cmd_func == NULL)) { + PFM_DBG("invalid cmd=%d", cmd); + return -EINVAL; + } + return (long)pfm_cmd_tab[cmd].cmd_func(fd, arg, count); +} + +/* + * legacy /proc/perfmon simplified interface (we only maintain the + * global information (no more per-cpu stats, use + * /sys/devices/system/cpu/cpuXX/perfmon + */ +static struct proc_dir_entry *perfmon_proc; + +static void *pfm_proc_start(struct seq_file *m, loff_t *pos) +{ + if (*pos == 0) + return (void *)1; + + return NULL; +} + +static void *pfm_proc_next(struct seq_file *m, void *v, loff_t *pos) +{ + ++*pos; + return pfm_proc_start(m, pos); +} + +static void pfm_proc_stop(struct seq_file *m, void *v) +{ +} + +/* + * this is a simplified version of the legacy /proc/perfmon. + * We have retained ONLY the key information that tools are actually + * using + */ +static void pfm_proc_show_header(struct seq_file *m) +{ + char buf[128]; + + pfm_sysfs_session_show(buf, sizeof(buf), 3); + + seq_printf(m, "perfmon version : %u.%u\n", + PFM_VERSION_MAJ, PFM_VERSION_MIN); + + seq_printf(m, "model : %s", buf); +} + +static int pfm_proc_show(struct seq_file *m, void *v) +{ + pfm_proc_show_header(m); + return 0; +} + +struct seq_operations pfm_proc_seq_ops = { + .start = pfm_proc_start, + .next = pfm_proc_next, + .stop = pfm_proc_stop, + .show = pfm_proc_show +}; + +static int pfm_proc_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &pfm_proc_seq_ops); +} + + +static struct file_operations pfm_proc_fops = { + .open = pfm_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +/* + * called from pfm_arch_init(), global initialization, called once + */ +int __init pfm_ia64_compat_init(void) +{ + /* + * create /proc/perfmon + */ + perfmon_proc = create_proc_entry("perfmon", S_IRUGO, NULL); + if (perfmon_proc == NULL) { + PFM_ERR("cannot create /proc entry, perfmon disabled"); + return -1; + } + perfmon_proc->proc_fops = &pfm_proc_fops; + return 0; +} Index: linux-2.6/arch/ia64/perfmon/perfmon_default_smpl.c =================================================================== --- /dev/null +++ linux-2.6/arch/ia64/perfmon/perfmon_default_smpl.c @@ -0,0 +1,268 @@ +/* + * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. + * Contributed by Stephane Eranian + * + * This file implements the old default sampling buffer format + * for the Linux/ia64 perfmon-2 subsystem. This is for backward + * compatibility only. use the new default format in perfmon/ + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + * 02111-1307 USA + */ +#include +#include +#include +#include +#include +#include +#include + +#ifdef MODULE +#define FMT_FLAGS 0 +#else +#define FMT_FLAGS PFM_FMTFL_IS_BUILTIN +#endif + +#include +#include + +MODULE_AUTHOR("Stephane Eranian "); +MODULE_DESCRIPTION("perfmon old default sampling format"); +MODULE_LICENSE("GPL"); + +static int pfm_default_fmt_validate(u32 flags, u16 npmds, void *data) +{ + struct pfm_default_smpl_arg *arg = data; + size_t min_buf_size; + + if (data == NULL) { + PFM_DBG("no argument passed"); + return -EINVAL; + } + + /* + * compute min buf size. All PMD are manipulated as 64bit entities + */ + min_buf_size = sizeof(struct pfm_default_smpl_hdr) + + (sizeof(struct pfm_default_smpl_entry) + + (npmds*sizeof(u64))); + + PFM_DBG("validate flags=0x%x npmds=%u min_buf_size=%lu " + "buf_size=%lu CPU%d", flags, npmds, min_buf_size, + arg->buf_size, smp_processor_id()); + + /* + * must hold at least the buffer header + one minimally sized entry + */ + if (arg->buf_size < min_buf_size) return -EINVAL; + + return 0; +} + +static int pfm_default_fmt_get_size(unsigned int flags, void *data, + size_t *size) +{ + struct pfm_default_smpl_arg *arg = data; + + /* + * size has been validated in default_validate + */ + *size = arg->buf_size; + + return 0; +} + +static int pfm_default_fmt_init(struct pfm_context *ctx, void *buf, + u32 flags, u16 npmds, void *data) +{ + struct pfm_default_smpl_hdr *hdr; + struct pfm_default_smpl_arg *arg = data; + + hdr = buf; + + hdr->hdr_version = PFM_DEFAULT_SMPL_VERSION; + hdr->hdr_buf_size = arg->buf_size; + hdr->hdr_cur_offs = sizeof(*hdr); + hdr->hdr_overflows = 0; + hdr->hdr_count = 0; + + PFM_DBG("buffer=%p buf_size=%lu hdr_size=%lu " + "hdr_version=%u cur_offs=%lu", + buf, + hdr->hdr_buf_size, + sizeof(*hdr), + hdr->hdr_version, + hdr->hdr_cur_offs); + + return 0; +} + +static int pfm_default_fmt_handler(void *buf, struct pfm_ovfl_arg *arg, + unsigned long ip, u64 tstamp, void *data) +{ + struct pfm_default_smpl_hdr *hdr; + struct pfm_default_smpl_entry *ent; + void *cur, *last; + u64 *e; + size_t entry_size; + u16 npmds, i, ovfl_pmd; + + hdr = buf; + cur = buf+hdr->hdr_cur_offs; + last = buf+hdr->hdr_buf_size; + ovfl_pmd = arg->ovfl_pmd; + + /* + * precheck for sanity + */ + if ((last - cur) < PFM_DEFAULT_MAX_ENTRY_SIZE) goto full; + + npmds = arg->num_smpl_pmds; + + ent = cur; + + prefetch(arg->smpl_pmds_values); + + entry_size = sizeof(*ent) + (npmds << 3); + + /* position for first pmd */ + e = (unsigned long *)(ent+1); + + hdr->hdr_count++; + + PFM_DBG_ovfl("count=%lu cur=%p last=%p free_bytes=%lu " + "ovfl_pmd=%d npmds=%u", + hdr->hdr_count, + cur, last, + last-cur, + ovfl_pmd, + npmds); + + /* + * current = task running at the time of the overflow. + * + * per-task mode: + * - this is ususally the task being monitored. + * Under certain conditions, it might be a different task + * + * system-wide: + * - this is not necessarily the task controlling the session + */ + ent->pid = current->pid; + ent->ovfl_pmd = ovfl_pmd; + ent->last_reset_val = arg->pmd_last_reset; + + /* + * where did the fault happen (includes slot number) + */ + ent->ip = ip; + + ent->tstamp = tstamp; + ent->cpu = smp_processor_id(); + ent->set = arg->active_set; + ent->tgid = current->tgid; + + /* + * selectively store PMDs in increasing index number + */ + if (npmds) { + u64 *val = arg->smpl_pmds_values; + for(i=0; i < npmds; i++) { + *e++ = *val++; + } + } + + /* + * update position for next entry + */ + hdr->hdr_cur_offs += entry_size; + cur += entry_size; + + /* + * post check to avoid losing the last sample + */ + if ((last - cur) < PFM_DEFAULT_MAX_ENTRY_SIZE) goto full; + + /* + * reset before returning from interrupt handler + */ + arg->ovfl_ctrl = PFM_OVFL_CTRL_RESET; + return 0; +full: + PFM_DBG_ovfl("smpl buffer full free=%lu, count=%lu", + last-cur, hdr->hdr_count); + + /* + * increment number of buffer overflow. + * important to detect duplicate set of samples. + */ + hdr->hdr_overflows++; + + /* + * request notification and masking of monitoring. + * Notification is still subject to the overflowed + */ + arg->ovfl_ctrl = PFM_OVFL_CTRL_NOTIFY| PFM_OVFL_CTRL_MASK; + + return -ENOBUFS; /* we are full, sorry */ +} + +static int pfm_default_fmt_restart(int is_active, u32 *ovfl_ctrl, void *buf) +{ + struct pfm_default_smpl_hdr *hdr; + + hdr = buf; + + hdr->hdr_count = 0; + hdr->hdr_cur_offs = sizeof(*hdr); + + *ovfl_ctrl = PFM_OVFL_CTRL_RESET; + + return 0; +} + +static int pfm_default_fmt_exit(void *buf) +{ + return 0; +} + +static struct pfm_smpl_fmt default_fmt={ + .fmt_name = "default-old", + .fmt_version = 0x10000, + .fmt_arg_size = sizeof(struct pfm_default_smpl_arg), + .fmt_validate = pfm_default_fmt_validate, + .fmt_getsize = pfm_default_fmt_get_size, + .fmt_init = pfm_default_fmt_init, + .fmt_handler = pfm_default_fmt_handler, + .fmt_restart = pfm_default_fmt_restart, + .fmt_exit = pfm_default_fmt_exit, + .fmt_flags = FMT_FLAGS, + .owner= THIS_MODULE +}; + +static int pfm_default_fmt_init_module(void) +{ + int ret; + + return pfm_fmt_register(&default_fmt); + return ret; +} + +static void pfm_default_fmt_cleanup_module(void) +{ + pfm_fmt_unregister(&default_fmt); +} + +module_init(pfm_default_fmt_init_module); +module_exit(pfm_default_fmt_cleanup_module); Index: linux-2.6/arch/ia64/perfmon/perfmon_generic.c =================================================================== --- /dev/null +++ linux-2.6/arch/ia64/perfmon/perfmon_generic.c @@ -0,0 +1,148 @@ +/* + * This file contains the generic PMU register description tables + * and pmc checker used by perfmon.c. + * + * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. + * contributed by Stephane Eranian + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + * 02111-1307 USA + */ +#include +#include +#include + +MODULE_AUTHOR("Stephane Eranian "); +MODULE_DESCRIPTION("Generic IA-64 PMU description tables"); +MODULE_LICENSE("GPL"); + +#define RDEP(x) (1UL << (x)) + +#define PFM_IA64GEN_MASK_PMCS (RDEP(4)|RDEP(5)|RDEP(6)|RDEP(7)) +#define PFM_IA64GEN_RSVD (0xffffffffffff0080UL) +#define PFM_IA64GEN_NO64 (1UL<<5) + +/* forward declaration */ +static struct pfm_pmu_config pfm_ia64gen_pmu_conf; + +static struct pfm_arch_pmu_info pfm_ia64gen_pmu_info={ + .mask_pmcs = {PFM_IA64GEN_MASK_PMCS,}, +}; + +static struct pfm_regmap_desc pfm_ia64gen_pmc_desc[]={ +/* pmc0 */ PMX_NA, +/* pmc1 */ PMX_NA, +/* pmc2 */ PMX_NA, +/* pmc3 */ PMX_NA, +/* pmc4 */ PMC_D(PFM_REG_W64, "PMC4", 0x0, PFM_IA64GEN_RSVD, PFM_IA64GEN_NO64, 4), +/* pmc5 */ PMC_D(PFM_REG_W64, "PMC5", 0x0, PFM_IA64GEN_RSVD, PFM_IA64GEN_NO64, 5), +/* pmc6 */ PMC_D(PFM_REG_W64, "PMC6", 0x0, PFM_IA64GEN_RSVD, PFM_IA64GEN_NO64, 6), +/* pmc7 */ PMC_D(PFM_REG_W64, "PMC7", 0x0, PFM_IA64GEN_RSVD, PFM_IA64GEN_NO64, 7) +}; +#define PFM_IA64GEN_NUM_PMCS ARRAY_SIZE(pfm_ia64gen_pmc_desc) + +static struct pfm_regmap_desc pfm_ia64gen_pmd_desc[]={ +/* pmd0 */ PMX_NA, +/* pmd1 */ PMX_NA, +/* pmd2 */ PMX_NA, +/* pmd3 */ PMX_NA, +/* pmd4 */ PMD_D(PFM_REG_C, "PMD4", 4), +/* pmd5 */ PMD_D(PFM_REG_C, "PMD5", 5), +/* pmd6 */ PMD_D(PFM_REG_C, "PMD6", 6), +/* pmd7 */ PMD_D(PFM_REG_C, "PMD7", 7) +}; +#define PFM_IA64GEN_NUM_PMDS ARRAY_SIZE(pfm_ia64gen_pmd_desc) + +static int pfm_ia64gen_pmc_check(struct pfm_context *ctx, + struct pfm_event_set *set, + struct pfarg_pmc *req) +{ +#define PFM_IA64GEN_PMC_PM_POS6 (1UL<< 6) + u64 tmpval; + int is_system; + + is_system = ctx->flags.system; + tmpval = req->reg_value; + + switch(req->reg_num) { + case 4: + case 5: + case 6: + case 7: + /* set pmc.oi for 64-bit emulation */ + tmpval |= 1UL << 5; + + if (is_system) + tmpval |= PFM_IA64GEN_PMC_PM_POS6; + else + tmpval &= ~PFM_IA64GEN_PMC_PM_POS6; + break; + + } + req->reg_value = tmpval; + + return 0; +} + +/* + * matches anything + */ +static int pfm_ia64gen_probe_pmu(void) +{ + u64 pm_buffer[16]; + pal_perf_mon_info_u_t pm_info; + + /* + * call PAL_PERFMON_INFO to retrieve counter width which + * is implementation specific + */ + if (ia64_pal_perf_mon_info(pm_buffer, &pm_info)) + return -1; + + pfm_ia64gen_pmu_conf.counter_width = pm_info.pal_perf_mon_info_s.width; + + return 0; +} + +/* + * impl_pmcs, impl_pmds are computed at runtime to minimize errors! + */ +static struct pfm_pmu_config pfm_ia64gen_pmu_conf={ + .pmu_name = "Generic IA-64", + .counter_width = 0, /* computed from PAL_PERFMON_INFO */ + .pmd_desc = pfm_ia64gen_pmd_desc, + .pmc_desc = pfm_ia64gen_pmc_desc, + .probe_pmu = pfm_ia64gen_probe_pmu, + .num_pmc_entries = PFM_IA64GEN_NUM_PMCS, + .num_pmd_entries = PFM_IA64GEN_NUM_PMDS, + .pmc_write_check = pfm_ia64gen_pmc_check, + .version = "1.0", + .flags = PFM_PMU_BUILTIN_FLAG, + .owner = THIS_MODULE, + .arch_info = & pfm_ia64gen_pmu_info + /* no read/write checkers */ +}; + +static int __init pfm_gen_pmu_init_module(void) +{ + return pfm_pmu_register(&pfm_ia64gen_pmu_conf); +} + +static void __exit pfm_gen_pmu_cleanup_module(void) +{ + pfm_pmu_unregister(&pfm_ia64gen_pmu_conf); +} + +module_init(pfm_gen_pmu_init_module); +module_exit(pfm_gen_pmu_cleanup_module); Index: linux-2.6/arch/ia64/perfmon/perfmon_itanium.c =================================================================== --- /dev/null +++ linux-2.6/arch/ia64/perfmon/perfmon_itanium.c @@ -0,0 +1,229 @@ +/* + * This file contains the Itanium PMU register description tables + * and pmc checker used by perfmon.c. + * + * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. + * Contributed by Stephane Eranian + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + * 02111-1307 USA + */ +#include +#include + +MODULE_AUTHOR("Stephane Eranian "); +MODULE_DESCRIPTION("Itanium (Merced) PMU description tables"); +MODULE_LICENSE("GPL"); + +#define RDEP(x) (1ULL << (x)) + +#define PFM_ITA_MASK_PMCS (RDEP(4)|RDEP(5)|RDEP(6)|RDEP(7)|RDEP(10)|RDEP(11)|\ + RDEP(12)) + +#define PFM_ITA_NO64 (1ULL<<5) + +static struct pfm_arch_pmu_info pfm_ita_pmu_info={ + .mask_pmcs = {PFM_ITA_MASK_PMCS,}, +}; +/* reserved bits are 1 in the mask */ +#define PFM_ITA_RSVD 0xfffffffffc8000a0UL +/* + * For debug registers, writing xBR(y) means we use also xBR(y+1). Hence using + * PMC256+y means we use PMC256+y+1. Yet, we do not have dependency information + * but this is fine because they are handled separately in the IA-64 specific + * code. + */ +static struct pfm_regmap_desc pfm_ita_pmc_desc[]={ +/* pmc0 */ PMX_NA, +/* pmc1 */ PMX_NA, +/* pmc2 */ PMX_NA, +/* pmc3 */ PMX_NA, +/* pmc4 */ PMC_D(PFM_REG_W64, "PMC4" , 0x20, PFM_ITA_RSVD, PFM_ITA_NO64, 4), +/* pmc5 */ PMC_D(PFM_REG_W64, "PMC5" , 0x20, PFM_ITA_RSVD, PFM_ITA_NO64, 5), +/* pmc6 */ PMC_D(PFM_REG_W64, "PMC6" , 0x20, PFM_ITA_RSVD, PFM_ITA_NO64, 6), +/* pmc7 */ PMC_D(PFM_REG_W64, "PMC7" , 0x20, PFM_ITA_RSVD, PFM_ITA_NO64, 7), +/* pmc8 */ PMC_D(PFM_REG_W , "PMC8" , 0xfffffffe3ffffff8UL, 0xfff00000001c0000UL, 0, 8), +/* pmc9 */ PMC_D(PFM_REG_W , "PMC9" , 0xfffffffe3ffffff8UL, 0xfff00000001c0000UL, 0, 9), +/* pmc10 */ PMC_D(PFM_REG_W , "PMC10", 0x0, 0xfffffffff3f0ff30UL, 0, 10), +/* pmc11 */ PMC_D(PFM_REG_W , "PMC11", 0x10000000UL, 0xffffffffecf0ff30UL, 0, 11), +/* pmc12 */ PMC_D(PFM_REG_W , "PMC12", 0x0, 0xffffffffffff0030UL, 0, 12), +/* pmc13 */ PMC_D(PFM_REG_W , "PMC13", 0x3ffff00000001UL, 0xfffffffffffffffeUL, 0, 13), +/* pmc14 */ PMX_NA, +/* pmc15 */ PMX_NA, +/* pmc16 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc24 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc32 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc40 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc48 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc56 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc64 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc72 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc80 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc88 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc96 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc104 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc112 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc120 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc128 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc136 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc144 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc152 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc160 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc168 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc176 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc184 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc192 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc200 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc208 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc216 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc224 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc232 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc240 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc248 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc256 */ PMC_D(PFM_REG_W , "IBR0", 0x0, 0, 0, 0), +/* pmc257 */ PMC_D(PFM_REG_W , "IBR1", 0x0, 0x8000000000000000UL, 0, 1), +/* pmc258 */ PMC_D(PFM_REG_W , "IBR2", 0x0, 0, 0, 2), +/* pmc259 */ PMC_D(PFM_REG_W , "IBR3", 0x0, 0x8000000000000000UL, 0, 3), +/* pmc260 */ PMC_D(PFM_REG_W , "IBR4", 0x0, 0, 0, 4), +/* pmc261 */ PMC_D(PFM_REG_W , "IBR5", 0x0, 0x8000000000000000UL, 0, 5), +/* pmc262 */ PMC_D(PFM_REG_W , "IBR6", 0x0, 0, 0, 6), +/* pmc263 */ PMC_D(PFM_REG_W , "IBR7", 0x0, 0x8000000000000000UL, 0, 7), +/* pmc264 */ PMC_D(PFM_REG_W , "DBR0", 0x0, 0, 0, 0), +/* pmc265 */ PMC_D(PFM_REG_W , "DBR1", 0x0, 0xc000000000000000UL, 0, 1), +/* pmc266 */ PMC_D(PFM_REG_W , "DBR2", 0x0, 0, 0, 2), +/* pmc267 */ PMC_D(PFM_REG_W , "DBR3", 0x0, 0xc000000000000000UL, 0, 3), +/* pmc268 */ PMC_D(PFM_REG_W , "DBR4", 0x0, 0, 0, 4), +/* pmc269 */ PMC_D(PFM_REG_W , "DBR5", 0x0, 0xc000000000000000UL, 0, 5), +/* pmc270 */ PMC_D(PFM_REG_W , "DBR6", 0x0, 0, 0, 6), +/* pmc271 */ PMC_D(PFM_REG_W , "DBR7", 0x0, 0xc000000000000000UL, 0, 7) +}; +#define PFM_ITA_NUM_PMCS ARRAY_SIZE(pfm_ita_pmc_desc) + +static struct pfm_regmap_desc pfm_ita_pmd_desc[]={ +/* pmd0 */ PMD_D(PFM_REG_I , "PMD0", 0), +/* pmd1 */ PMD_D(PFM_REG_I , "PMD1", 1), +/* pmd2 */ PMD_D(PFM_REG_I , "PMD2", 2), +/* pmd3 */ PMD_D(PFM_REG_I , "PMD3", 3), +/* pmd4 */ PMD_D(PFM_REG_C , "PMD4", 4), +/* pmd5 */ PMD_D(PFM_REG_C , "PMD5", 5), +/* pmd6 */ PMD_D(PFM_REG_C , "PMD6", 6), +/* pmd7 */ PMD_D(PFM_REG_C , "PMD7", 7), +/* pmd8 */ PMD_D(PFM_REG_I , "PMD8", 8), +/* pmd9 */ PMD_D(PFM_REG_I , "PMD9", 9), +/* pmd10 */ PMD_D(PFM_REG_I , "PMD10", 10), +/* pmd11 */ PMD_D(PFM_REG_I , "PMD11", 11), +/* pmd12 */ PMD_D(PFM_REG_I , "PMD12", 12), +/* pmd13 */ PMD_D(PFM_REG_I , "PMD13", 13), +/* pmd14 */ PMD_D(PFM_REG_I , "PMD14", 14), +/* pmd15 */ PMD_D(PFM_REG_I , "PMD15", 15), +/* pmd16 */ PMD_D(PFM_REG_I , "PMD16", 16), +/* pmd17 */ PMD_D(PFM_REG_I , "PMD17", 17) +}; +#define PFM_ITA_NUM_PMDS ARRAY_SIZE(pfm_ita_pmd_desc) + +static int pfm_ita_pmc_check(struct pfm_context *ctx, + struct pfm_event_set *set, + struct pfarg_pmc *req) +{ +#define PFM_ITA_PMC_PM_POS6 (1UL<< 6) + struct pfm_arch_context *ctx_arch; + u64 tmpval; + u16 cnum; + int ret = 0, is_system; + + tmpval = req->reg_value; + cnum = req->reg_num; + ctx_arch = pfm_ctx_arch(ctx); + is_system = ctx->flags.system; + + switch(cnum) { + case 4: + case 5: + case 6: + case 7: + case 10: + case 11: + case 12: if (is_system) + tmpval |= PFM_ITA_PMC_PM_POS6; + else + tmpval &= ~PFM_ITA_PMC_PM_POS6; + break; + } + + /* + * we must clear the (instruction) debug registers if pmc13.ta bit is + * cleared before they are written (fl_using_dbreg==0) to avoid + * picking up stale information. + */ + if (cnum == 13 && ((tmpval & 0x1) == 0) + && ctx_arch->flags.use_dbr == 0) { + PFM_DBG("pmc13 has pmc13.ta cleared, clearing ibr"); + ret = pfm_ia64_mark_dbregs_used(ctx, set); + if (ret) return ret; + } + + /* + * we must clear the (data) debug registers if pmc11.pt bit is cleared + * before they are written (fl_using_dbreg==0) to avoid picking up + * stale information. + */ + if (cnum == 11 && ((tmpval >> 28)& 0x1) == 0 + && ctx_arch->flags.use_dbr == 0) { + PFM_DBG("pmc11 has pmc11.pt cleared, clearing dbr"); + ret = pfm_ia64_mark_dbregs_used(ctx, set); + if (ret) return ret; + } + + req->reg_value = tmpval; + + return 0; +} + +static int pfm_ita_probe_pmu(void) +{ + return local_cpu_data->family == 0x7 && !ia64_platform_is("hpsim") + ? 0 : -1; +} + +/* + * impl_pmcs, impl_pmds are computed at runtime to minimize errors! + */ +static struct pfm_pmu_config pfm_ita_pmu_conf={ + .pmu_name = "Itanium", + .counter_width = 32, + .pmd_desc = pfm_ita_pmd_desc, + .pmc_desc = pfm_ita_pmc_desc, + .pmc_write_check = pfm_ita_pmc_check, + .num_pmc_entries = PFM_ITA_NUM_PMCS, + .num_pmd_entries = PFM_ITA_NUM_PMDS, + .probe_pmu = pfm_ita_probe_pmu, + .version = "1.0", + .flags = PFM_PMU_BUILTIN_FLAG, + .owner = THIS_MODULE, + .arch_info = &pfm_ita_pmu_info +}; + +static int __init pfm_ita_pmu_init_module(void) +{ + return pfm_pmu_register(&pfm_ita_pmu_conf); +} + +static void __exit pfm_ita_pmu_cleanup_module(void) +{ + pfm_pmu_unregister(&pfm_ita_pmu_conf); +} + +module_init(pfm_ita_pmu_init_module); +module_exit(pfm_ita_pmu_cleanup_module); + Index: linux-2.6/arch/ia64/perfmon/perfmon_mckinley.c =================================================================== --- /dev/null +++ linux-2.6/arch/ia64/perfmon/perfmon_mckinley.c @@ -0,0 +1,285 @@ +/* + * This file contains the McKinley PMU register description tables + * and pmc checker used by perfmon.c. + * + * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. + * Contributed by Stephane Eranian + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + * 02111-1307 USA + */ +#include +#include + +MODULE_AUTHOR("Stephane Eranian "); +MODULE_DESCRIPTION("Itanium 2 (McKinley) PMU description tables"); +MODULE_LICENSE("GPL"); + +#define RDEP(x) (1UL << (x)) + +#define PFM_MCK_MASK_PMCS (RDEP(4)|RDEP(5)|RDEP(6)|RDEP(7)|RDEP(10)|RDEP(11)|\ + RDEP(12)) + +#define PFM_MCK_NO64 (1UL<<5) + +static struct pfm_arch_pmu_info pfm_mck_pmu_info={ + .mask_pmcs = {PFM_MCK_MASK_PMCS,}, +}; + +/* reserved bits are 1 in the mask */ +#define PFM_ITA2_RSVD 0xfffffffffc8000a0UL + +/* + * For debug registers, writing xBR(y) means we use also xBR(y+1). Hence using + * PMC256+y means we use PMC256+y+1. Yet, we do not have dependency information + * but this is fine because they are handled separately in the IA-64 specific + * code. + */ +static struct pfm_regmap_desc pfm_mck_pmc_desc[]={ +/* pmc0 */ PMX_NA, +/* pmc1 */ PMX_NA, +/* pmc2 */ PMX_NA, +/* pmc3 */ PMX_NA, +/* pmc4 */ PMC_D(PFM_REG_W64, "PMC4" , 0x800020UL, 0xfffffffffc8000a0, PFM_MCK_NO64, 4), +/* pmc5 */ PMC_D(PFM_REG_W64, "PMC5" , 0x20UL, PFM_ITA2_RSVD, PFM_MCK_NO64, 5), +/* pmc6 */ PMC_D(PFM_REG_W64, "PMC6" , 0x20UL, PFM_ITA2_RSVD, PFM_MCK_NO64, 6), +/* pmc7 */ PMC_D(PFM_REG_W64, "PMC7" , 0x20UL, PFM_ITA2_RSVD, PFM_MCK_NO64, 7), +/* pmc8 */ PMC_D(PFM_REG_W , "PMC8" , 0xffffffff3fffffffUL, 0xc0000004UL, 0, 8), +/* pmc9 */ PMC_D(PFM_REG_W , "PMC9" , 0xffffffff3ffffffcUL, 0xc0000004UL, 0, 9), +/* pmc10 */ PMC_D(PFM_REG_W , "PMC10", 0x0, 0xffffffffffff0000UL, 0, 10), +/* pmc11 */ PMC_D(PFM_REG_W , "PMC11", 0x0, 0xfffffffffcf0fe30UL, 0, 11), +/* pmc12 */ PMC_D(PFM_REG_W , "PMC12", 0x0, 0xffffffffffff0000UL, 0, 12), +/* pmc13 */ PMC_D(PFM_REG_W , "PMC13", 0x2078fefefefeUL, 0xfffe1fffe7e7e7e7UL, 0, 13), +/* pmc14 */ PMC_D(PFM_REG_W , "PMC14", 0x0db60db60db60db6UL, 0xffffffffffffdb6dUL, 0, 14), +/* pmc15 */ PMC_D(PFM_REG_W , "PMC15", 0xfffffff0UL, 0xfffffffffffffff0UL, 0, 15), +/* pmc16 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc24 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc32 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc40 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc48 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc56 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc64 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc72 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc80 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc88 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc96 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc104 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc112 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc120 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc128 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc136 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc144 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc152 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc160 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc168 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc176 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc184 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc192 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc200 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc208 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc216 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc224 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc232 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc240 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc248 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc256 */ PMC_D(PFM_REG_W , "IBR0", 0x0, 0, 0, 0), +/* pmc257 */ PMC_D(PFM_REG_W , "IBR1", 0x0, 0x8000000000000000UL, 0, 1), +/* pmc258 */ PMC_D(PFM_REG_W , "IBR2", 0x0, 0, 0, 2), +/* pmc259 */ PMC_D(PFM_REG_W , "IBR3", 0x0, 0x8000000000000000UL, 0, 3), +/* pmc260 */ PMC_D(PFM_REG_W , "IBR4", 0x0, 0, 0, 4), +/* pmc261 */ PMC_D(PFM_REG_W , "IBR5", 0x0, 0x8000000000000000UL, 0, 5), +/* pmc262 */ PMC_D(PFM_REG_W , "IBR6", 0x0, 0, 0, 6), +/* pmc263 */ PMC_D(PFM_REG_W , "IBR7", 0x0, 0x8000000000000000UL, 0, 7), +/* pmc264 */ PMC_D(PFM_REG_W , "DBR0", 0x0, 0, 0, 0), +/* pmc265 */ PMC_D(PFM_REG_W , "DBR1", 0x0, 0xc000000000000000UL, 0, 1), +/* pmc266 */ PMC_D(PFM_REG_W , "DBR2", 0x0, 0, 0, 2), +/* pmc267 */ PMC_D(PFM_REG_W , "DBR3", 0x0, 0xc000000000000000UL, 0, 3), +/* pmc268 */ PMC_D(PFM_REG_W , "DBR4", 0x0, 0, 0, 4), +/* pmc269 */ PMC_D(PFM_REG_W , "DBR5", 0x0, 0xc000000000000000UL, 0, 5), +/* pmc270 */ PMC_D(PFM_REG_W , "DBR6", 0x0, 0, 0, 6), +/* pmc271 */ PMC_D(PFM_REG_W , "DBR7", 0x0, 0xc000000000000000UL, 0, 7) +}; +#define PFM_MCK_NUM_PMCS ARRAY_SIZE(pfm_mck_pmc_desc) + +static struct pfm_regmap_desc pfm_mck_pmd_desc[]={ +/* pmd0 */ PMD_D(PFM_REG_I, "PMD0", 0), +/* pmd1 */ PMD_D(PFM_REG_I, "PMD1", 1), +/* pmd2 */ PMD_D(PFM_REG_I, "PMD2", 2), +/* pmd3 */ PMD_D(PFM_REG_I, "PMD3", 3), +/* pmd4 */ PMD_D(PFM_REG_C, "PMD4", 4), +/* pmd5 */ PMD_D(PFM_REG_C, "PMD5", 5), +/* pmd6 */ PMD_D(PFM_REG_C, "PMD6", 6), +/* pmd7 */ PMD_D(PFM_REG_C, "PMD7", 7), +/* pmd8 */ PMD_D(PFM_REG_I, "PMD8", 8), +/* pmd9 */ PMD_D(PFM_REG_I, "PMD9", 9), +/* pmd10 */ PMD_D(PFM_REG_I, "PMD10", 10), +/* pmd11 */ PMD_D(PFM_REG_I, "PMD11", 11), +/* pmd12 */ PMD_D(PFM_REG_I, "PMD12", 12), +/* pmd13 */ PMD_D(PFM_REG_I, "PMD13", 13), +/* pmd14 */ PMD_D(PFM_REG_I, "PMD14", 14), +/* pmd15 */ PMD_D(PFM_REG_I, "PMD15", 15), +/* pmd16 */ PMD_D(PFM_REG_I, "PMD16", 16), +/* pmd17 */ PMD_D(PFM_REG_I, "PMD17", 17) +}; +#define PFM_MCK_NUM_PMDS ARRAY_SIZE(pfm_mck_pmd_desc) + +static int pfm_mck_pmc_check(struct pfm_context *ctx, + struct pfm_event_set *set, + struct pfarg_pmc *req) +{ + struct pfm_arch_context *ctx_arch; + u64 val8 = 0, val14 = 0, val13 = 0; + u64 tmpval; + u16 cnum; + int ret = 0, check_case1 = 0; + int is_system; + + tmpval = req->reg_value; + cnum = req->reg_num; + ctx_arch = pfm_ctx_arch(ctx); + is_system = ctx->flags.system; + +#define PFM_MCK_PMC_PM_POS6 (1UL<< 6) +#define PFM_MCK_PMC_PM_POS4 (1UL<< 4) + + switch(cnum) { + case 4: + case 5: + case 6: + case 7: + case 11: + case 12: if (is_system) + tmpval |= PFM_MCK_PMC_PM_POS6; + else + tmpval &= ~PFM_MCK_PMC_PM_POS6; + break; + + case 8: val8 = tmpval; + val13 = set->pmcs[13]; + val14 = set->pmcs[14]; + check_case1 = 1; + break; + + case 10: if (is_system) + tmpval |= PFM_MCK_PMC_PM_POS4; + else + tmpval &= ~PFM_MCK_PMC_PM_POS4; + break; + + case 13: + val8 = set->pmcs[8]; + val13 = tmpval; + val14 = set->pmcs[14]; + check_case1 = 1; + break; + + case 14: + val8 = set->pmcs[8]; + val13 = set->pmcs[13]; + val14 = tmpval; + check_case1 = 1; + break; + } + + /* + * check illegal configuration which can produce inconsistencies + * in tagging i-side events in L1D and L2 caches + */ + if (check_case1) { + ret = (((val13 >> 45) & 0xf) == 0 && ((val8 & 0x1) == 0)) + && ((((val14>>1) & 0x3) == 0x2 || ((val14>>1) & 0x3) == 0x0) + ||(((val14>>4) & 0x3) == 0x2 || ((val14>>4) & 0x3) == 0x0)); + + if (ret) { + PFM_DBG("perfmon: invalid config pmc8=0x%lx " + "pmc13=0x%lx pmc14=0x%lx", + val8, val13, val14); + return -EINVAL; + } + } + + /* + * check if configuration implicitely activates the use of + * the debug registers. If true, then we ensure that this is + * possible and that we do not pick up stale value in the HW + * registers. + * + * We postpone the checks of pmc13 and pmc14 to avoid side effects + * in case of errors + */ + + /* + * pmc13 is "active" if: + * one of the pmc13.cfg_dbrpXX field is different from 0x3 + * AND + * at the corresponding pmc13.ena_dbrpXX is set. + */ + if (cnum == 13 && (tmpval & 0x1e00000000000UL) + && (tmpval & 0x18181818UL) != 0x18181818UL + && ctx_arch->flags.use_dbr == 0) { + PFM_DBG("pmc13=0x%lx active", tmpval); + ret = pfm_ia64_mark_dbregs_used(ctx, set); + if (ret) return ret; + } + + /* + * if any pmc14.ibrpX bit is enabled we must clear the ibrs + */ + if (cnum == 14 && ((tmpval & 0x2222UL) != 0x2222UL) + && ctx_arch->flags.use_dbr == 0) { + PFM_DBG("pmc14=0x%lx active", tmpval); + ret = pfm_ia64_mark_dbregs_used(ctx, set); + if (ret) return ret; + } + + req->reg_value = tmpval; + + return 0; +} + +static int pfm_mck_probe_pmu(void) +{ + return local_cpu_data->family == 0x1f ? 0 : -1; +} + +/* + * impl_pmcs, impl_pmds are computed at runtime to minimize errors! + */ +static struct pfm_pmu_config pfm_mck_pmu_conf={ + .pmu_name = "Itanium 2", + .counter_width = 47, + .pmd_desc = pfm_mck_pmd_desc, + .pmc_desc = pfm_mck_pmc_desc, + .pmc_write_check = pfm_mck_pmc_check, + .num_pmc_entries = PFM_MCK_NUM_PMCS, + .num_pmd_entries = PFM_MCK_NUM_PMDS, + .probe_pmu = pfm_mck_probe_pmu, + .version = "1.0", + .flags = PFM_PMU_BUILTIN_FLAG, + .owner = THIS_MODULE, + .arch_info = &pfm_mck_pmu_info, +}; + +static int __init pfm_mck_pmu_init_module(void) +{ + return pfm_pmu_register(&pfm_mck_pmu_conf); +} + +static void __exit pfm_mck_pmu_cleanup_module(void) +{ + pfm_pmu_unregister(&pfm_mck_pmu_conf); +} + +module_init(pfm_mck_pmu_init_module); +module_exit(pfm_mck_pmu_cleanup_module); Index: linux-2.6/arch/ia64/perfmon/perfmon_montecito.c =================================================================== --- /dev/null +++ linux-2.6/arch/ia64/perfmon/perfmon_montecito.c @@ -0,0 +1,404 @@ +/* + * This file contains the McKinley PMU register description tables + * and pmc checker used by perfmon.c. + * + * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P. + * Contributed Stephane Eranian + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + * 02111-1307 USA + */ +#include +#include +#include + +MODULE_AUTHOR("Stephane Eranian "); +MODULE_DESCRIPTION("Dual-Core Itanium 2 (Montecito) PMU description table"); +MODULE_LICENSE("GPL"); + +#define RDEP(x) (1UL << (x)) + +#define PFM_MONT_MASK_PMCS (RDEP(4)|RDEP(5)|RDEP(6)|RDEP(7)|\ + RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|\ + RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|\ + RDEP(37)|RDEP(39)|RDEP(40)|RDEP(42)) + +#define PFM_MONT_NO64 (1UL<<5) + +static struct pfm_arch_pmu_info pfm_mont_pmu_info={ + .mask_pmcs = {PFM_MONT_MASK_PMCS,}, +}; + +#define PFM_MONT_RSVD 0xffffffff838000a0UL +/* + * + * For debug registers, writing xBR(y) means we use also xBR(y+1). Hence using + * PMC256+y means we use PMC256+y+1. Yet, we do not have dependency information + * but this is fine because they are handled separately in the IA-64 specific + * code. + * + * For PMC4-PMC15, PMC40: we force pmc.ism=2 (IA-64 mode only) + */ +static struct pfm_regmap_desc pfm_mont_pmc_desc[]={ +/* pmc0 */ PMX_NA, +/* pmc1 */ PMX_NA, +/* pmc2 */ PMX_NA, +/* pmc3 */ PMX_NA, +/* pmc4 */ PMC_D(PFM_REG_W64, "PMC4" , 0x2000020UL, PFM_MONT_RSVD, PFM_MONT_NO64, 4), +/* pmc5 */ PMC_D(PFM_REG_W64, "PMC5" , 0x2000020UL, PFM_MONT_RSVD, PFM_MONT_NO64, 5), +/* pmc6 */ PMC_D(PFM_REG_W64, "PMC6" , 0x2000020UL, PFM_MONT_RSVD, PFM_MONT_NO64, 6), +/* pmc7 */ PMC_D(PFM_REG_W64, "PMC7" , 0x2000020UL, PFM_MONT_RSVD, PFM_MONT_NO64, 7), +/* pmc8 */ PMC_D(PFM_REG_W64, "PMC8" , 0x2000020UL, PFM_MONT_RSVD, PFM_MONT_NO64, 8), +/* pmc9 */ PMC_D(PFM_REG_W64, "PMC9" , 0x2000020UL, PFM_MONT_RSVD, PFM_MONT_NO64, 9), +/* pmc10 */ PMC_D(PFM_REG_W64, "PMC10", 0x2000020UL, PFM_MONT_RSVD, PFM_MONT_NO64, 10), +/* pmc11 */ PMC_D(PFM_REG_W64, "PMC11", 0x2000020UL, PFM_MONT_RSVD, PFM_MONT_NO64, 11), +/* pmc12 */ PMC_D(PFM_REG_W64, "PMC12", 0x2000020UL, PFM_MONT_RSVD, PFM_MONT_NO64, 12), +/* pmc13 */ PMC_D(PFM_REG_W64, "PMC13", 0x2000020UL, PFM_MONT_RSVD, PFM_MONT_NO64, 13), +/* pmc14 */ PMC_D(PFM_REG_W64, "PMC14", 0x2000020UL, PFM_MONT_RSVD, PFM_MONT_NO64, 14), +/* pmc15 */ PMC_D(PFM_REG_W64, "PMC15", 0x2000020UL, PFM_MONT_RSVD, PFM_MONT_NO64, 15), +/* pmc16 */ PMX_NA, +/* pmc17 */ PMX_NA, +/* pmc18 */ PMX_NA, +/* pmc19 */ PMX_NA, +/* pmc20 */ PMX_NA, +/* pmc21 */ PMX_NA, +/* pmc22 */ PMX_NA, +/* pmc23 */ PMX_NA, +/* pmc24 */ PMX_NA, +/* pmc25 */ PMX_NA, +/* pmc26 */ PMX_NA, +/* pmc27 */ PMX_NA, +/* pmc28 */ PMX_NA, +/* pmc29 */ PMX_NA, +/* pmc30 */ PMX_NA, +/* pmc31 */ PMX_NA, +/* pmc32 */ PMC_D(PFM_REG_W , "PMC32", 0x30f01ffffffffffUL, 0xfcf0fe0000000000UL, 0, 32), +/* pmc33 */ PMC_D(PFM_REG_W , "PMC33", 0x0, 0xfffffe0000000000UL, 0, 33), +/* pmc34 */ PMC_D(PFM_REG_W , "PMC34", 0xf01ffffffffffUL, 0xfff0fe0000000000UL, 0, 34), +/* pmc35 */ PMC_D(PFM_REG_W , "PMC35", 0x0, 0x1ffffffffffUL, 0, 35), +/* pmc36 */ PMC_D(PFM_REG_W , "PMC36", 0xfffffff0UL, 0xfffffffffffffff0UL, 0, 36), +/* pmc37 */ PMC_D(PFM_REG_W , "PMC37", 0x0, 0xffffffffffffc000UL, 0, 37), +/* pmc38 */ PMC_D(PFM_REG_W , "PMC38", 0xdb6UL, 0xffffffffffffdb6dUL, 0, 38), +/* pmc39 */ PMC_D(PFM_REG_W , "PMC39", 0x0, 0xffffffffffff0030UL, 0, 39), +/* pmc40 */ PMC_D(PFM_REG_W , "PMC40", 0x2000000UL, 0xfffffffffff0fe30UL, 0, 40), +/* pmc41 */ PMC_D(PFM_REG_W , "PMC41", 0x00002078fefefefeUL, 0xfffe1fffe7e7e7e7UL, 0, 41), +/* pmc42 */ PMC_D(PFM_REG_W , "PMC42", 0x0, 0xfff800b0UL, 0, 42), +/* pmc43 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc48 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc56 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc64 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc72 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc80 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc88 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc96 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc104 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc112 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc120 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc128 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc136 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc144 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc152 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc160 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc168 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc176 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc184 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc192 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc200 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc208 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc216 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc224 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc232 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc240 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc248 */ PMX_NA, PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA,PMX_NA, +/* pmc256 */ PMC_D(PFM_REG_W, "IBR0", 0x0, 0, 0, 0), +/* pmc257 */ PMC_D(PFM_REG_W, "IBR1", 0x0, 0x8000000000000000UL, 0, 1), +/* pmc258 */ PMC_D(PFM_REG_W, "IBR2", 0x0, 0, 0, 2), +/* pmc259 */ PMC_D(PFM_REG_W, "IBR3", 0x0, 0x8000000000000000UL, 0, 3), +/* pmc260 */ PMC_D(PFM_REG_W, "IBR4", 0x0, 0, 0, 4), +/* pmc261 */ PMC_D(PFM_REG_W, "IBR5", 0x0, 0x8000000000000000UL, 0, 5), +/* pmc262 */ PMC_D(PFM_REG_W, "IBR6", 0x0, 0, 0, 6), +/* pmc263 */ PMC_D(PFM_REG_W, "IBR7", 0x0, 0x8000000000000000UL, 0, 7), +/* pmc264 */ PMC_D(PFM_REG_W, "DBR0", 0x0, 0, 0, 0), +/* pmc265 */ PMC_D(PFM_REG_W, "DBR1", 0x0, 0xc000000000000000UL, 0, 1), +/* pmc266 */ PMC_D(PFM_REG_W, "DBR2", 0x0, 0, 0, 2), +/* pmc267 */ PMC_D(PFM_REG_W, "DBR3", 0x0, 0xc000000000000000UL, 0, 3), +/* pmc268 */ PMC_D(PFM_REG_W, "DBR4", 0x0, 0, 0, 4), +/* pmc269 */ PMC_D(PFM_REG_W, "DBR5", 0x0, 0xc000000000000000UL, 0, 5), +/* pmc270 */ PMC_D(PFM_REG_W, "DBR6", 0x0, 0, 0, 6), +/* pmc271 */ PMC_D(PFM_REG_W, "DBR7", 0x0, 0xc000000000000000UL, 0, 7) +}; +#define PFM_MONT_NUM_PMCS ARRAY_SIZE(pfm_mont_pmc_desc) + +static struct pfm_regmap_desc pfm_mont_pmd_desc[]={ +/* pmd0 */ PMX_NA, +/* pmd1 */ PMX_NA, +/* pmd2 */ PMX_NA, +/* pmd3 */ PMX_NA, +/* pmd4 */ PMD_D(PFM_REG_C, "PMD4", 4), +/* pmd5 */ PMD_D(PFM_REG_C, "PMD5", 5), +/* pmd6 */ PMD_D(PFM_REG_C, "PMD6", 6), +/* pmd7 */ PMD_D(PFM_REG_C, "PMD7", 7), +/* pmd8 */ PMD_D(PFM_REG_C, "PMD8", 8), +/* pmd9 */ PMD_D(PFM_REG_C, "PMD9", 9), +/* pmd10 */ PMD_D(PFM_REG_C, "PMD10", 10), +/* pmd11 */ PMD_D(PFM_REG_C, "PMD11", 11), +/* pmd12 */ PMD_D(PFM_REG_C, "PMD12", 12), +/* pmd13 */ PMD_D(PFM_REG_C, "PMD13", 13), +/* pmd14 */ PMD_D(PFM_REG_C, "PMD14", 14), +/* pmd15 */ PMD_D(PFM_REG_C, "PMD15", 15), +/* pmd16 */ PMX_NA, +/* pmd17 */ PMX_NA, +/* pmd18 */ PMX_NA, +/* pmd19 */ PMX_NA, +/* pmd20 */ PMX_NA, +/* pmd21 */ PMX_NA, +/* pmd22 */ PMX_NA, +/* pmd23 */ PMX_NA, +/* pmd24 */ PMX_NA, +/* pmd25 */ PMX_NA, +/* pmd26 */ PMX_NA, +/* pmd27 */ PMX_NA, +/* pmd28 */ PMX_NA, +/* pmd29 */ PMX_NA, +/* pmd30 */ PMX_NA, +/* pmd31 */ PMX_NA, +/* pmd32 */ PMD_D(PFM_REG_I, "PMD32", 32), +/* pmd33 */ PMD_D(PFM_REG_I, "PMD33", 33), +/* pmd34 */ PMD_D(PFM_REG_I, "PMD34", 34), +/* pmd35 */ PMD_D(PFM_REG_I, "PMD35", 35), +/* pmd36 */ PMD_D(PFM_REG_I, "PMD36", 36), +/* pmd37 */ PMX_NA, +/* pmd38 */ PMD_D(PFM_REG_I, "PMD38", 38), +/* pmd39 */ PMD_D(PFM_REG_I, "PMD39", 39), +/* pmd40 */ PMX_NA, +/* pmd41 */ PMX_NA, +/* pmd42 */ PMX_NA, +/* pmd43 */ PMX_NA, +/* pmd44 */ PMX_NA, +/* pmd45 */ PMX_NA, +/* pmd46 */ PMX_NA, +/* pmd47 */ PMX_NA, +/* pmd48 */ PMD_D(PFM_REG_I, "PMD48", 48), +/* pmd49 */ PMD_D(PFM_REG_I, "PMD49", 49), +/* pmd50 */ PMD_D(PFM_REG_I, "PMD50", 50), +/* pmd51 */ PMD_D(PFM_REG_I, "PMD51", 51), +/* pmd52 */ PMD_D(PFM_REG_I, "PMD52", 52), +/* pmd53 */ PMD_D(PFM_REG_I, "PMD53", 53), +/* pmd54 */ PMD_D(PFM_REG_I, "PMD54", 54), +/* pmd55 */ PMD_D(PFM_REG_I, "PMD55", 55), +/* pmd56 */ PMD_D(PFM_REG_I, "PMD56", 56), +/* pmd57 */ PMD_D(PFM_REG_I, "PMD57", 57), +/* pmd58 */ PMD_D(PFM_REG_I, "PMD58", 58), +/* pmd59 */ PMD_D(PFM_REG_I, "PMD59", 59), +/* pmd60 */ PMD_D(PFM_REG_I, "PMD60", 60), +/* pmd61 */ PMD_D(PFM_REG_I, "PMD61", 61), +/* pmd62 */ PMD_D(PFM_REG_I, "PMD62", 62), +/* pmd63 */ PMD_D(PFM_REG_I, "PMD63", 63) +}; +#define PFM_MONT_NUM_PMDS ARRAY_SIZE(pfm_mont_pmd_desc) + +static int pfm_mont_has_ht; + +static int pfm_mont_pmc_check(struct pfm_context *ctx, + struct pfm_event_set *set, + struct pfarg_pmc *req) +{ + struct pfm_arch_context *ctx_arch; + u64 val32 = 0, val38 = 0, val41 = 0; + u64 tmpval; + u16 cnum; + int ret = 0, check_case1 = 0; + int is_system; + + tmpval = req->reg_value; + cnum = req->reg_num; + ctx_arch = pfm_ctx_arch(ctx); + is_system = ctx->flags.system; + +#define PFM_MONT_PMC_PM_POS6 (1UL<<6) +#define PFM_MONT_PMC_PM_POS4 (1UL<<4) + + switch(cnum) { + case 4: + case 5: + case 6: + case 7: + case 8: + case 9: if (is_system) + tmpval |= PFM_MONT_PMC_PM_POS6; + else + tmpval &= ~PFM_MONT_PMC_PM_POS6; + break; + case 10: + case 11: + case 12: + case 13: + case 14: + case 15: if ((req->reg_flags & PFM_REGFL_NO_EMUL64) == 0) { + if (pfm_mont_has_ht) { + PFM_INFO("perfmon: Errata 121 PMD10/PMD15 cannot be used to overflow" + "when threads on on"); + return -EINVAL; + } + } + if (is_system) + tmpval |= PFM_MONT_PMC_PM_POS6; + else + tmpval &= ~PFM_MONT_PMC_PM_POS6; + break; + case 39: + case 40: + case 42: if (pfm_mont_has_ht && ((req->reg_value >> 8) & 0x7) == 4) { + PFM_INFO("perfmon: Errata 120: IP-EAR not available when threads are on"); + return -EINVAL; + } + if (is_system) + tmpval |= PFM_MONT_PMC_PM_POS6; + else + tmpval &= ~PFM_MONT_PMC_PM_POS6; + break; + + case 32: val32 = tmpval; + val38 = set->pmcs[38]; + val41 = set->pmcs[41]; + check_case1 = 1; + break; + + case 37: + if (is_system) + tmpval |= PFM_MONT_PMC_PM_POS4; + else + tmpval &= ~PFM_MONT_PMC_PM_POS4; + break; + + case 38: val38 = tmpval; + val32 = set->pmcs[32]; + val41 = set->pmcs[41]; + check_case1 = 1; + break; + case 41: val41 = tmpval; + val32 = set->pmcs[32]; + val38 = set->pmcs[38]; + check_case1 = 1; + break; + } + + if (check_case1) { + ret = (((val41 >> 45) & 0xf) == 0 && ((val32>>57) & 0x1) == 0) + && ((((val38>>1) & 0x3) == 0x2 || ((val38>>1) & 0x3) == 0) + || (((val38>>4) & 0x3) == 0x2 || ((val38>>4) & 0x3) == 0)); + if (ret) { + PFM_DBG("perfmon: invalid config pmc38=0x%lx " + "pmc41=0x%lx pmc32=0x%lx", + val38, val41, val32); + return -EINVAL; + } + } + + /* + * check if configuration implicitely activates the use of the + * debug registers. If true, then we ensure that this is possible + * and that we do not pick up stale value in the HW registers. + */ + + /* + * + * pmc41 is "active" if: + * one of the pmc41.cfgdtagXX field is different from 0x3 + * AND + * the corsesponding pmc41.en_dbrpXX is set. + * AND + * ctx_fl_use_dbr (dbr not yet used) + */ + if (cnum == 41 + && (tmpval & 0x1e00000000000) + && (tmpval & 0x18181818) != 0x18181818 + && ctx_arch->flags.use_dbr == 0) { + PFM_DBG("pmc41=0x%lx active, clearing dbr", tmpval); + ret = pfm_ia64_mark_dbregs_used(ctx, set); + if (ret) return ret; + } + /* + * we must clear the (instruction) debug registers if: + * pmc38.ig_ibrpX is 0 (enabled) + * and + * fl_use_dbr == 0 (dbr not yet used) + */ + if (cnum == 38 && ((tmpval & 0x492) != 0x492) + && ctx_arch->flags.use_dbr == 0) { + PFM_DBG("pmc38=0x%lx active pmc38, clearing ibr", tmpval); + ret = pfm_ia64_mark_dbregs_used(ctx, set); + if (ret) return ret; + + } + req->reg_value = tmpval; + return 0; +} + +static void pfm_handle_errata(void) +{ + pfm_mont_has_ht = 1; + + PFM_INFO("activating workaround for errata 120 " + "(Disable IP-EAR when threads are on)"); + + PFM_INFO("activating workaround for Errata 121 " + "(PMC10-PMC15 cannot be used to overflow" + " when threads are on"); +} +static int pfm_mont_probe_pmu(void) +{ + if (local_cpu_data->family != 0x20) + return -1; + + /* + * the 2 errata must be activated when + * threads are/can be enabled + */ + if (is_multithreading_enabled()) + pfm_handle_errata(); + + return 0; +} + +/* + * impl_pmcs, impl_pmds are computed at runtime to minimize errors! + */ +static struct pfm_pmu_config pfm_mont_pmu_conf={ + .pmu_name = "Montecito", + .counter_width = 47, + .pmd_desc = pfm_mont_pmd_desc, + .pmc_desc = pfm_mont_pmc_desc, + .num_pmc_entries = PFM_MONT_NUM_PMCS, + .num_pmd_entries = PFM_MONT_NUM_PMDS, + .pmc_write_check = pfm_mont_pmc_check, + .probe_pmu = pfm_mont_probe_pmu, + .version = "1.0", + .arch_info = &pfm_mont_pmu_info, + .flags = PFM_PMU_BUILTIN_FLAG, + .owner = THIS_MODULE +}; + +static int __init pfm_mont_pmu_init_module(void) +{ + return pfm_pmu_register(&pfm_mont_pmu_conf); +} + +static void __exit pfm_mont_pmu_cleanup_module(void) +{ + pfm_pmu_unregister(&pfm_mont_pmu_conf); +} + +module_init(pfm_mont_pmu_init_module); +module_exit(pfm_mont_pmu_cleanup_module); Index: linux-2.6/arch/mips/Kconfig =================================================================== --- linux-2.6.orig/arch/mips/Kconfig +++ linux-2.6/arch/mips/Kconfig @@ -1775,6 +1775,8 @@ config SECCOMP If unsure, say Y. Only embedded should say N here. +source "arch/mips/perfmon/Kconfig" + endmenu config RWSEM_GENERIC_SPINLOCK Index: linux-2.6/arch/mips/Makefile =================================================================== --- linux-2.6.orig/arch/mips/Makefile +++ linux-2.6/arch/mips/Makefile @@ -148,6 +148,12 @@ endif endif # +# Perfmon support +# + +core-$(CONFIG_PERFMON) += arch/mips/perfmon/ + +# # Firmware support # libs-$(CONFIG_ARC) += arch/mips/arc/ Index: linux-2.6/arch/mips/kernel/process.c =================================================================== --- linux-2.6.orig/arch/mips/kernel/process.c +++ linux-2.6/arch/mips/kernel/process.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -90,6 +91,7 @@ void start_thread(struct pt_regs * regs, void exit_thread(void) { + pfm_exit_thread(current); } void flush_thread(void) @@ -164,6 +166,8 @@ int copy_thread(int nr, unsigned long cl if (clone_flags & CLONE_SETTLS) ti->tp_value = regs->regs[7]; + pfm_copy_thread(p); + return 0; } Index: linux-2.6/arch/mips/kernel/scall32-o32.S =================================================================== --- linux-2.6.orig/arch/mips/kernel/scall32-o32.S +++ linux-2.6/arch/mips/kernel/scall32-o32.S @@ -662,6 +662,18 @@ einval: li v0, -EINVAL sys sys_signalfd 3 sys sys_timerfd 4 sys sys_eventfd 1 + sys sys_pfm_create_context 4 /* 4320 */ + sys sys_pfm_write_pmcs 3 + sys sys_pfm_write_pmds 4 + sys sys_pfm_read_pmds 3 + sys sys_pfm_load_context 2 + sys sys_pfm_start 2 /* 4325 */ + sys sys_pfm_stop 1 + sys sys_pfm_restart 1 + sys sys_pfm_create_evtsets 3 + sys sys_pfm_getinfo_evtsets 3 + sys sys_pfm_delete_evtsets 3 /* 4326 */ + sys sys_pfm_unload_context 1 .endm /* We pre-compute the number of _instruction_ bytes needed to Index: linux-2.6/arch/mips/kernel/scall64-64.S =================================================================== --- linux-2.6.orig/arch/mips/kernel/scall64-64.S +++ linux-2.6/arch/mips/kernel/scall64-64.S @@ -477,4 +477,16 @@ sys_call_table: PTR sys_signalfd PTR sys_timerfd PTR sys_eventfd + PTR sys_pfm_create_context + PTR sys_pfm_write_pmcs /* 5280 */ + PTR sys_pfm_write_pmds + PTR sys_pfm_read_pmds + PTR sys_pfm_load_context + PTR sys_pfm_start + PTR sys_pfm_stop /* 5285 */ + PTR sys_pfm_restart + PTR sys_pfm_create_evtsets + PTR sys_pfm_getinfo_evtsets + PTR sys_pfm_delete_evtsets + PTR sys_pfm_unload_context /* 5290 */ .size sys_call_table,.-sys_call_table Index: linux-2.6/arch/mips/kernel/scall64-n32.S =================================================================== --- linux-2.6.orig/arch/mips/kernel/scall64-n32.S +++ linux-2.6/arch/mips/kernel/scall64-n32.S @@ -400,7 +400,19 @@ EXPORT(sysn32_call_table) PTR sys_ioprio_set PTR sys_ioprio_get PTR compat_sys_utimensat - PTR compat_sys_signalfd /* 5280 */ + PTR compat_sys_signalfd /* 6280 */ PTR compat_sys_timerfd PTR sys_eventfd + PTR sys_pfm_create_context + PTR sys_pfm_write_pmcs + PTR sys_pfm_write_pmds /* 6285 */ + PTR sys_pfm_read_pmds + PTR sys_pfm_load_context + PTR sys_pfm_start + PTR sys_pfm_stop + PTR sys_pfm_restart /* 6290 */ + PTR sys_pfm_create_evtsets + PTR sys_pfm_getinfo_evtsets + PTR sys_pfm_delete_evtsets + PTR sys_pfm_unload_context .size sysn32_call_table,.-sysn32_call_table Index: linux-2.6/arch/mips/kernel/scall64-o32.S =================================================================== --- linux-2.6.orig/arch/mips/kernel/scall64-o32.S +++ linux-2.6/arch/mips/kernel/scall64-o32.S @@ -525,4 +525,16 @@ sys_call_table: PTR compat_sys_signalfd PTR compat_sys_timerfd PTR sys_eventfd + PTR sys_pfm_create_context /* 4320 */ + PTR sys_pfm_write_pmcs + PTR sys_pfm_write_pmds + PTR sys_pfm_read_pmds + PTR sys_pfm_load_context + PTR sys_pfm_start /* 4325 */ + PTR sys_pfm_stop + PTR sys_pfm_restart + PTR sys_pfm_create_evtsets + PTR sys_pfm_getinfo_evtsets + PTR sys_pfm_delete_evtsets /* 4330 */ + PTR sys_pfm_unload_context .size sys_call_table,.-sys_call_table Index: linux-2.6/arch/mips/kernel/signal.c =================================================================== --- linux-2.6.orig/arch/mips/kernel/signal.c +++ linux-2.6/arch/mips/kernel/signal.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -696,6 +697,9 @@ static void do_signal(struct pt_regs *re asmlinkage void do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) { + if (thread_info_flags & _TIF_PERFMON_WORK) + pfm_handle_work(regs); + /* deal with pending signal delivery */ if (thread_info_flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK)) do_signal(regs); Index: linux-2.6/arch/mips/kernel/smp.c =================================================================== --- linux-2.6.orig/arch/mips/kernel/smp.c +++ linux-2.6/arch/mips/kernel/smp.c @@ -203,6 +203,52 @@ void smp_call_function_interrupt(void) } } +int smp_call_function_single (int cpu, void (*func) (void *info), void *info, int retry, + int wait) +{ + struct call_data_struct data; + int me = smp_processor_id(); + + /* + * Can die spectacularly if this CPU isn't yet marked online + */ + BUG_ON(!cpu_online(me)); + if (cpu == me) { + WARN_ON(1); + return -EBUSY; + } + + /* Can deadlock when called with interrupts disabled */ + WARN_ON(irqs_disabled()); + + data.func = func; + data.info = info; + atomic_set(&data.started, 0); + data.wait = wait; + if (wait) + atomic_set(&data.finished, 0); + + spin_lock(&smp_call_lock); + call_data = &data; + mb(); + + /* Send a message to the other CPU */ + core_send_ipi(cpu, SMP_CALL_FUNCTION); + + /* Wait for response */ + /* FIXME: lock-up detection, backtrace on lock-up */ + while (atomic_read(&data.started) != 1) + barrier(); + + if (wait) + while (atomic_read(&data.finished) != 1) + barrier(); + call_data = NULL; + spin_unlock(&smp_call_lock); + + return 0; +} + static void stop_this_cpu(void *dummy) { /* Index: linux-2.6/arch/mips/kernel/time.c =================================================================== --- linux-2.6.orig/arch/mips/kernel/time.c +++ linux-2.6/arch/mips/kernel/time.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -140,6 +141,7 @@ static long last_rtc_update; void local_timer_interrupt(int irq, void *dev_id) { profile_tick(CPU_PROFILING); + pfm_handle_switch_timeout(); update_process_times(user_mode(get_irq_regs())); } Index: linux-2.6/arch/mips/mips-boards/generic/time.c =================================================================== --- linux-2.6.orig/arch/mips/mips-boards/generic/time.c +++ linux-2.6/arch/mips/mips-boards/generic/time.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include Index: linux-2.6/arch/mips/perfmon/Kconfig =================================================================== --- /dev/null +++ linux-2.6/arch/mips/perfmon/Kconfig @@ -0,0 +1,23 @@ +menu "Hardware Performance Monitoring support" +config PERFMON + bool "Perfmon2 performance monitoring interface" + default n + help + Enables the perfmon2 interface to access the hardware + performance counters. See for + more details. + +config PERFMON_DEBUG + bool "Perfmon debugging" + default n + depends on PERFMON + help + Enables perfmon debugging support + +config PERFMON_MIPS64 + tristate "Support for MIPS64 hardware performance counters" + depends on PERFMON + default n + help + Enables support for the MIPS64 hardware performance counters" +endmenu Index: linux-2.6/arch/mips/perfmon/Makefile =================================================================== --- /dev/null +++ linux-2.6/arch/mips/perfmon/Makefile @@ -0,0 +1,2 @@ +obj-$(CONFIG_PERFMON) += perfmon.o +obj-$(CONFIG_PERFMON_MIPS64) += perfmon_mips64.o Index: linux-2.6/arch/mips/perfmon/perfmon.c =================================================================== --- /dev/null +++ linux-2.6/arch/mips/perfmon/perfmon.c @@ -0,0 +1,299 @@ +/* + * This file implements the MIPS64 specific + * support for the perfmon2 interface + * + * Copyright (c) 2005 Philip J. Mucci + * + * based on versions for other architectures: + * Copyright (c) 2005 Hewlett-Packard Development Company, L.P. + * Contributed by Stephane Eranian + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + * 02111-1307 USA + */ +#include +#include +#include + +/* + * collect pending overflowed PMDs. Called from pfm_ctxsw() + * and from PMU interrupt handler. Must fill in set->povfl_pmds[] + * and set->npend_ovfls. Interrupts are masked + */ +static void __pfm_get_ovfl_pmds(struct pfm_context *ctx, struct pfm_event_set *set) +{ + u64 new_val, wmask; + u64 *used_mask, *cnt_pmds; + u64 mask[PFM_PMD_BV]; + unsigned int i, max; + + max = pfm_pmu_conf->regs.max_cnt_pmd; + cnt_pmds = pfm_pmu_conf->regs.cnt_pmds; + used_mask = set->used_pmds; + wmask = 1ULL << pfm_pmu_conf->counter_width; + bitmap_and(cast_ulp(mask), + cast_ulp(cnt_pmds), + cast_ulp(used_mask),max); + + for (i = 0; i < max; i++) { + /* assume all PMD are counters */ + if (test_bit(i, mask)) { + new_val = pfm_arch_read_pmd(ctx, i); + + PFM_DBG_ovfl("pmd%u new_val=0x%llx bit=%d\n", + i, (unsigned long long)new_val, + (new_val&wmask) ? 1 : 0); + + if (new_val & wmask) { + __set_bit(i, set->povfl_pmds); + set->npend_ovfls++; + } + } + } +} + +static void pfm_stop_active(struct task_struct *task, struct pfm_context *ctx, + struct pfm_event_set *set) +{ + unsigned int i, max; + + max = pfm_pmu_conf->regs.max_pmc; + + /* + * clear enable bits + */ + for (i = 0; i < max; i++) { + if (test_bit(i, set->used_pmcs)) + pfm_arch_write_pmc(ctx, i,0); + } + + if (set->npend_ovfls) + return; + + __pfm_get_ovfl_pmds(ctx, set); +} + +/* + * Called from pfm_ctxsw(). Task is guaranteed to be current. + * Context is locked. Interrupts are masked. Monitoring is active. + * PMU access is guaranteed. PMC and PMD registers are live in PMU. + * + * for per-thread: + * must stop monitoring for the task + * + * Return: + * non-zero : did not save PMDs (as part of stopping the PMU) + * 0 : saved PMDs (no need to save them in caller) + */ +int pfm_arch_ctxswout_thread(struct task_struct *task, struct pfm_context *ctx, + struct pfm_event_set *set) +{ + /* + * disable lazy restore of PMC registers. + */ + set->priv_flags |= PFM_SETFL_PRIV_MOD_PMCS; + + pfm_stop_active(task, ctx, set); + + return 1; +} + +/* + * Called from pfm_stop() and pfm_ctxsw() when idle + * task and EXCL_IDLE is on. + * + * Interrupts are masked. Context is locked. Set is the active set. + * + * For per-thread: + * task is not necessarily current. If not current task, then + * task is guaranteed stopped and off any cpu. Access to PMU + * is not guaranteed. Interrupts are masked. Context is locked. + * Set is the active set. + * + * For system-wide: + * task is current + * + * must disable active monitoring. ctx cannot be NULL + */ +void pfm_arch_stop(struct task_struct *task, struct pfm_context *ctx, + struct pfm_event_set *set) +{ + /* + * no need to go through stop_save() + * if we are already stopped + */ + if (!ctx->flags.started) + return; + + /* + * stop live registers and collect pending overflow + */ + if (task == current) + pfm_stop_active(task, ctx, set); +} + +/* + * called from pfm_start() or pfm_ctxsw() when idle task and + * EXCL_IDLE is on. + * + * Interrupts are masked. Context is locked. Set is the active set. + * + * For per-trhead: + * Task is not necessarily current. If not current task, then task + * is guaranteed stopped and off any cpu. Access to PMU is not guaranteed. + * + * For system-wide: + * task is always current + * + * must enable active monitoring. + */ +void pfm_arch_start(struct task_struct *task, struct pfm_context *ctx, + struct pfm_event_set *set) +{ + unsigned int i, max_pmc; + + if (task != current) + return; + + max_pmc = pfm_pmu_conf->regs.max_pmc; + + for (i = 0; i < max_pmc; i++) { + if (test_bit(i, set->used_pmcs)) + pfm_arch_write_pmc(ctx, i, set->pmcs[i]); + } +} + +/* + * function called from pfm_switch_sets(), pfm_context_load_thread(), + * pfm_context_load_sys(), pfm_ctxsw(), pfm_switch_sets() + * context is locked. Interrupts are masked. set cannot be NULL. + * Access to the PMU is guaranteed. + * + * function must restore all PMD registers from set. + */ +void pfm_arch_restore_pmds(struct pfm_context *ctx, struct pfm_event_set *set) +{ + u64 ovfl_mask, val; + u64 *impl_pmds; + unsigned int i; + unsigned int max_pmd; + + max_pmd = pfm_pmu_conf->regs.max_pmd; + ovfl_mask = pfm_pmu_conf->ovfl_mask; + impl_pmds = pfm_pmu_conf->regs.pmds; + + /* + * must restore all pmds to avoid leaking + * information to user. + */ + for (i = 0; i < max_pmd; i++) { + + if (test_bit(i, impl_pmds) == 0) + continue; + + val = set->pmds[i].value; + + /* + * set upper bits for counter to ensure + * overflow will trigger + */ + val &= ovfl_mask; + + pfm_arch_write_pmd(ctx, i, val); + } +} + +/* + * function called from pfm_switch_sets(), pfm_context_load_thread(), + * pfm_context_load_sys(), pfm_ctxsw(). + * Context is locked. Interrupts are masked. set cannot be NULL. + * Access to the PMU is guaranteed. + * + * function must restore all PMC registers from set, if needed. + */ +void pfm_arch_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set) +{ + u64 *impl_pmcs; + unsigned int i, max_pmc; + + max_pmc = pfm_pmu_conf->regs.max_pmc; + impl_pmcs = pfm_pmu_conf->regs.pmcs; + + /* + * - by default no PMCS measures anything + * - on ctxswout, all used PMCs are disabled (cccr enable bit cleared) + * hence when masked we do not need to restore anything + */ + if (ctx->state == PFM_CTX_MASKED || ctx->flags.started == 0) + return; + + /* + * restore all pmcs + */ + for (i = 0; i < max_pmc; i++) + if (test_bit(i, impl_pmcs)) + pfm_arch_write_pmc(ctx, i, set->pmcs[i]); +} + +char *pfm_arch_get_pmu_module_name(void) +{ + switch(cpu_data->cputype) { +#ifndef CONFIG_SMP + case CPU_34K: +#if defined(CPU_74K) + case CPU_74K: +#endif +#endif + case CPU_SB1: + case CPU_SB1A: + case CPU_25KF: + case CPU_24K: + case CPU_20KC: + case CPU_5KC: + return "perfmon_mips64"; + default: + return NULL; + } + return NULL; +} + +int perfmon_perf_irq(void) +{ + /* BLATANTLY STOLEN FROM OPROFILE, then modified */ + struct pt_regs *regs; + unsigned int counters = pfm_pmu_conf->regs.max_pmc; + unsigned int control; + unsigned int counter; + + regs = get_irq_regs(); + switch (counters) { +#define HANDLE_COUNTER(n) \ + case n + 1: \ + control = read_c0_perfctrl ## n(); \ + counter = read_c0_perfcntr ## n(); \ + if ((control & MIPS64_PMC_INT_ENABLE_MASK) && \ + (counter & MIPS64_PMD_INTERRUPT)) { \ + pfm_interrupt_handler(instruction_pointer(regs),\ + regs); \ + return(1); \ + } + HANDLE_COUNTER(3) + HANDLE_COUNTER(2) + HANDLE_COUNTER(1) + HANDLE_COUNTER(0) + } + + return 0; +} +EXPORT_SYMBOL(perfmon_perf_irq); Index: linux-2.6/arch/mips/perfmon/perfmon_mips64.c =================================================================== --- /dev/null +++ linux-2.6/arch/mips/perfmon/perfmon_mips64.c @@ -0,0 +1,187 @@ +/* + * This file contains the MIPS64 and decendent PMU register description tables + * and pmc checker used by perfmon.c. + * + * Copyright (c) 2005 Philip Mucci + * + * Based on perfmon_p6.c: + * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P. + * Contributed by Stephane Eranian + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + * 02111-1307 USA + */ +#include +#include + +MODULE_AUTHOR("Philip Mucci "); +MODULE_DESCRIPTION("MIPS64 PMU description tables"); +MODULE_LICENSE("GPL"); + +/* + * reserved: + * - bit 63-9 + * RSVD: reserved bits must be 1 + */ +#define PFM_MIPS64_PMC_RSVD 0xfffffffffffffe10ULL +#define PFM_MIPS64_PMC_VAL (1ULL<<4) + +extern int null_perf_irq(struct pt_regs *regs); +extern int (*perf_irq)(struct pt_regs *regs); +extern int perfmon_perf_irq(struct pt_regs *regs); + +static struct pfm_arch_pmu_info pfm_mips64_pmu_info; + +static struct pfm_regmap_desc pfm_mips64_pmc_desc[]={ +/* pmc0 */ PMC_D(PFM_REG_I64, "CP0_25_0", PFM_MIPS64_PMC_VAL, PFM_MIPS64_PMC_RSVD, 0, 0), +/* pmc1 */ PMC_D(PFM_REG_I64, "CP0_25_1", PFM_MIPS64_PMC_VAL, PFM_MIPS64_PMC_RSVD, 0, 1), +/* pmc2 */ PMC_D(PFM_REG_I64, "CP0_25_2", PFM_MIPS64_PMC_VAL, PFM_MIPS64_PMC_RSVD, 0, 2), +/* pmc3 */ PMC_D(PFM_REG_I64, "CP0_25_3", PFM_MIPS64_PMC_VAL, PFM_MIPS64_PMC_RSVD, 0, 3) +}; +#define PFM_MIPS64_NUM_PMCS ARRAY_SIZE(pfm_mips64_pmc_desc) + +static struct pfm_regmap_desc pfm_mips64_pmd_desc[]={ +/* pmd0 */ PMD_D(PFM_REG_C, "CP0_25_0", 0), +/* pmd1 */ PMD_D(PFM_REG_C, "CP0_25_1", 1), +/* pmd2 */ PMD_D(PFM_REG_C, "CP0_25_2", 2), +/* pmd3 */ PMD_D(PFM_REG_C, "CP0_25_3", 3) +}; +#define PFM_MIPS64_NUM_PMDS ARRAY_SIZE(pfm_mips64_pmd_desc) + +static int pfm_mips64_probe_pmu(void) +{ + struct cpuinfo_mips *c = ¤t_cpu_data; + + switch (c->cputype) { +#ifndef CONFIG_SMP + case CPU_34K: +#if defined(CPU_74K) + case CPU_74K: +#endif +#endif + case CPU_SB1: + case CPU_SB1A: + case CPU_25KF: + case CPU_24K: + case CPU_20KC: + case CPU_5KC: + return 0; + break; + default: + PFM_INFO("Unknown cputype 0x%x",c->cputype); + } + return -1; +} + +/* + * impl_pmcs, impl_pmds are computed at runtime to minimize errors! + */ +static struct pfm_pmu_config pfm_mips64_pmu_conf = { + .pmu_name = "MIPS", /* placeholder */ + .counter_width = 31, + .pmd_desc = pfm_mips64_pmd_desc, + .pmc_desc = pfm_mips64_pmc_desc, + .num_pmc_entries = PFM_MIPS64_NUM_PMCS, + .num_pmd_entries = PFM_MIPS64_NUM_PMDS, + .probe_pmu = pfm_mips64_probe_pmu, + .flags = PFM_PMU_BUILTIN_FLAG, + .owner = THIS_MODULE, + .arch_info = &pfm_mips64_pmu_info +}; + +static inline int n_counters(void) +{ + if (!(read_c0_config1() & MIPS64_CONFIG_PMC_MASK)) + return 0; + if (!(read_c0_perfctrl0() & MIPS64_PMC_CTR_MASK)) + return 1; + if (!(read_c0_perfctrl1() & MIPS64_PMC_CTR_MASK)) + return 2; + if (!(read_c0_perfctrl2() & MIPS64_PMC_CTR_MASK)) + return 3; + return 4; +} + +static int __init pfm_mips64_pmu_init_module(void) +{ + struct cpuinfo_mips *c = ¤t_cpu_data; + int i, ret, num; + + switch (c->cputype) { + case CPU_5KC: + pfm_mips64_pmu_conf.pmu_name = "MIPS5KC"; + break; + case CPU_20KC: + pfm_mips64_pmu_conf.pmu_name = "MIPS20KC"; + break; + case CPU_24K: + pfm_mips64_pmu_conf.pmu_name = "MIPS24K"; + break; + case CPU_25KF: + pfm_mips64_pmu_conf.pmu_name = "MIPS25KF"; + break; + case CPU_SB1: + pfm_mips64_pmu_conf.pmu_name = "SB1"; + break; + case CPU_SB1A: + pfm_mips64_pmu_conf.pmu_name = "SB1A"; + break; +#ifndef CONFIG_SMP + case CPU_34K: + pfm_mips64_pmu_conf.pmu_name = "MIPS34K"; + break; +#if defined(CPU_74K) + case CPU_74K: + pfm_mips64_pmu_conf.pmu_name = "MIPS74K"; + break; +#endif +#endif + default: + PFM_INFO("Unknown cputype 0x%x",c->cputype); + return -1; + } + + num = n_counters(); + if (num == 0) { + PFM_INFO("cputype 0x%x has no counters",c->cputype); + return -1; + } + /* mark remaining counters unavailable */ + for(i=num; i < PFM_MIPS64_NUM_PMCS; i++) { + pfm_mips64_pmc_desc[i].type = PFM_REG_NA; + } + + for(i=num; i < PFM_MIPS64_NUM_PMDS; i++) { + pfm_mips64_pmd_desc[i].type = PFM_REG_NA; + } + + pfm_mips64_pmu_conf.num_pmc_entries = num; + pfm_mips64_pmu_conf.num_pmd_entries = num; + + pfm_mips64_pmu_info.pmu_style = c->cputype; + + ret = pfm_pmu_register(&pfm_mips64_pmu_conf); + if (ret == 0) + perf_irq = perfmon_perf_irq; + return ret; +} + +static void __exit pfm_mips64_pmu_cleanup_module(void) +{ + pfm_pmu_unregister(&pfm_mips64_pmu_conf); + perf_irq = null_perf_irq; +} + +module_init(pfm_mips64_pmu_init_module); +module_exit(pfm_mips64_pmu_cleanup_module); Index: linux-2.6/arch/powerpc/Kconfig =================================================================== --- linux-2.6.orig/arch/powerpc/Kconfig +++ linux-2.6/arch/powerpc/Kconfig @@ -140,6 +140,8 @@ config PPC_OF_PLATFORM_PCI depends on PPC64 # not supported on 32 bits yet default n +source "arch/powerpc/perfmon/Kconfig" + source "init/Kconfig" source "arch/powerpc/platforms/Kconfig" Index: linux-2.6/arch/powerpc/Makefile =================================================================== --- linux-2.6.orig/arch/powerpc/Makefile +++ linux-2.6/arch/powerpc/Makefile @@ -137,6 +137,7 @@ core-y += arch/powerpc/kernel/ \ arch/powerpc/platforms/ core-$(CONFIG_MATH_EMULATION) += arch/powerpc/math-emu/ core-$(CONFIG_XMON) += arch/powerpc/xmon/ +core-$(CONFIG_PERFMON) += arch/powerpc/perfmon/ drivers-$(CONFIG_OPROFILE) += arch/powerpc/oprofile/ Index: linux-2.6/arch/powerpc/kernel/entry_32.S =================================================================== --- linux-2.6.orig/arch/powerpc/kernel/entry_32.S +++ linux-2.6/arch/powerpc/kernel/entry_32.S @@ -38,7 +38,7 @@ * MSR_KERNEL is > 0x10000 on 4xx/Book-E since it include MSR_CE. */ #if MSR_KERNEL >= 0x10000 -#define LOAD_MSR_KERNEL(r, x) lis r,(x)@h; ori r,r,(x)@l +#define LOAD_MSR_KERNEL(r, x) lis r,(x)@ha; ori r,r,(x)@l #else #define LOAD_MSR_KERNEL(r, x) li r,(x) #endif Index: linux-2.6/arch/powerpc/kernel/entry_64.S =================================================================== --- linux-2.6.orig/arch/powerpc/kernel/entry_64.S +++ linux-2.6/arch/powerpc/kernel/entry_64.S @@ -588,6 +588,10 @@ user_work: b .ret_from_except_lite 1: bl .save_nvgprs +#ifdef CONFIG_PERFMON + addi r3,r1,STACK_FRAME_OVERHEAD + bl .pfm_handle_work +#endif /* CONFIG_PERFMON */ li r3,0 addi r4,r1,STACK_FRAME_OVERHEAD bl .do_signal Index: linux-2.6/arch/powerpc/kernel/process.c =================================================================== --- linux-2.6.orig/arch/powerpc/kernel/process.c +++ linux-2.6/arch/powerpc/kernel/process.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include @@ -324,6 +325,9 @@ struct task_struct *__switch_to(struct t new_thread->start_tb = current_tb; } #endif + if (test_tsk_thread_flag(new, TIF_PERFMON_CTXSW) + || test_tsk_thread_flag(prev, TIF_PERFMON_CTXSW)) + pfm_ctxsw(prev, new); local_irq_save(flags); @@ -457,6 +461,7 @@ void show_regs(struct pt_regs * regs) void exit_thread(void) { discard_lazy_cpu_state(); + pfm_exit_thread(current); } void flush_thread(void) @@ -570,6 +575,7 @@ int copy_thread(int nr, unsigned long cl #else kregs->nip = (unsigned long)ret_from_fork; #endif + pfm_copy_thread(p); return 0; } Index: linux-2.6/arch/powerpc/kernel/time.c =================================================================== --- linux-2.6.orig/arch/powerpc/kernel/time.c +++ linux-2.6/arch/powerpc/kernel/time.c @@ -52,6 +52,7 @@ #include #include #include +#include #include #include @@ -651,6 +652,8 @@ void timer_interrupt(struct pt_regs * re profile_tick(CPU_PROFILING); calculate_steal_time(); + pfm_handle_switch_timeout(); + #ifdef CONFIG_PPC_ISERIES if (firmware_has_feature(FW_FEATURE_ISERIES)) get_lppaca()->int_dword.fields.decr_int = 0; Index: linux-2.6/arch/powerpc/perfmon/Kconfig =================================================================== --- /dev/null +++ linux-2.6/arch/powerpc/perfmon/Kconfig @@ -0,0 +1,50 @@ +menu "Hardware Performance Monitoring support" +config PERFMON + bool "Perfmon2 performance monitoring interface" + default n + help + Enables the perfmon2 interface to access the hardware + performance counters. See for + more details. + +config PERFMON_DEBUG + bool "Perfmon debugging" + default n + depends on PERFMON + help + Enables perfmon debugging support + +config PERFMON_POWER5 + tristate "Support for Power5 hardware performance counters" + depends on PERFMON && PPC64 + default n + help + Enables support for the Power 5 hardware performance counters + If unsure, say M. + +config PERFMON_PPC32 + tristate "Support for PPC32 hardware performance counters" + depends on PERFMON && PPC32 + default n + help + Enables support for the PPC32 hardware performance counters + If unsure, say M. + +config PERFMON_CELL + tristate "Support for Cell hardware performance counters" + depends on PERFMON && PPC_CELL + default n + help + Enables support for the Cell hardware performance counters. + If unsure, say M. + +config PERFMON_CELL_HW_SMPL + tristate "Support for Cell hardware counter sampling" + depends on PERFMON_CELL + default n + help + Enables support for the Cell hardware counter sampling modes using + the PMU trace-buffer. + If unsure, say M. + +endmenu Index: linux-2.6/arch/powerpc/perfmon/Makefile =================================================================== --- /dev/null +++ linux-2.6/arch/powerpc/perfmon/Makefile @@ -0,0 +1,5 @@ +obj-$(CONFIG_PERFMON) += perfmon.o +obj-$(CONFIG_PERFMON_POWER5) += perfmon_power5.o +obj-$(CONFIG_PERFMON_PPC32) += perfmon_ppc32.o +obj-$(CONFIG_PERFMON_CELL) += perfmon_cell.o +obj-$(CONFIG_PERFMON_CELL_HW_SMPL) += perfmon_cell_hw_smpl.o Index: linux-2.6/arch/powerpc/perfmon/perfmon.c =================================================================== --- /dev/null +++ linux-2.6/arch/powerpc/perfmon/perfmon.c @@ -0,0 +1,282 @@ +/* + * This file implements the powerpc specific + * support for the perfmon2 interface + * + * Copyright (c) 2005 David Gibson, IBM Corporation. + * + * based on versions for other architectures: + * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P. + * Contributed by Stephane Eranian + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + * 02111-1307 USA + */ +#include +#include + +static void pfm_stop_active(struct task_struct *task, + struct pfm_context *ctx, struct pfm_event_set *set) +{ + struct pfm_arch_pmu_info *arch_info = pfm_pmu_conf->arch_info; + + BUG_ON(!arch_info->disable_counters || !arch_info->get_ovfl_pmds); + + arch_info->disable_counters(ctx, set); + + if (set->npend_ovfls) + return; + + arch_info->get_ovfl_pmds(ctx, set); +} + +/* + * Called from pfm_ctxsw(). Task is guaranteed to be current. + * Context is locked. Interrupts are masked. Monitoring is active. + * PMU access is guaranteed. PMC and PMD registers are live in PMU. + * + * for per-thread: + * must stop monitoring for the task + * Return: + * non-zero : did not save PMDs (as part of stopping the PMU) + * 0 : saved PMDs (no need to save them in caller) + */ +int pfm_arch_ctxswout_thread(struct task_struct *task, + struct pfm_context *ctx, struct pfm_event_set *set) +{ + struct pfm_arch_pmu_info *arch_info = pfm_pmu_conf->arch_info; + + /* + * disable lazy restore of PMC registers. + */ + set->priv_flags |= PFM_SETFL_PRIV_MOD_PMCS; + + pfm_stop_active(task, ctx, set); + + if (arch_info->ctxswout_thread) { + arch_info->ctxswout_thread(task, ctx, set); + } + + return 1; +} + +/* + * Called from pfm_ctxsw + */ +void pfm_arch_ctxswin_thread(struct task_struct *task, + struct pfm_context *ctx, struct pfm_event_set *set) +{ + struct pfm_arch_pmu_info *arch_info = pfm_pmu_conf->arch_info; + + if (ctx->state != PFM_CTX_MASKED && ctx->flags.started == 1) { + BUG_ON(!arch_info->enable_counters); + arch_info->enable_counters(ctx, set); + } + + if (arch_info->ctxswin_thread) { + arch_info->ctxswin_thread(task, ctx, set); + } +} + +/* + * Called from pfm_stop() and idle notifier + * + * Interrupts are masked. Context is locked. Set is the active set. + * + * For per-thread: + * task is not necessarily current. If not current task, then + * task is guaranteed stopped and off any cpu. Access to PMU + * is not guaranteed. Interrupts are masked. Context is locked. + * Set is the active set. + * + * For system-wide: + * task is current + * + * must disable active monitoring. ctx cannot be NULL + */ +void pfm_arch_stop(struct task_struct *task, + struct pfm_context *ctx, struct pfm_event_set *set) +{ + /* + * no need to go through stop_save() + * if we are already stopped + */ + if (!ctx->flags.started) + return; + + /* + * stop live registers and collect pending overflow + */ + if (task == current) + pfm_stop_active(task, ctx, set); +} + +/* + * called from pfm_start() and idle notifier + * + * Interrupts are masked. Context is locked. Set is the active set. + * + * For per-thread: + * Task is not necessarily current. If not current task, then task + * is guaranteed stopped and off any cpu. No access to PMU is task + * is not current. + * + * For system-wide: + * task is always current + * + * must enable active monitoring. + */ +void pfm_arch_start(struct task_struct *task, + struct pfm_context *ctx, struct pfm_event_set *set) +{ + struct pfm_arch_pmu_info *arch_info = pfm_pmu_conf->arch_info; + + if (task != current) + return; + + BUG_ON(!arch_info->enable_counters); + + arch_info->enable_counters(ctx, set); +} + +/* + * function called from pfm_switch_sets(), pfm_context_load_thread(), + * pfm_context_load_sys(), pfm_ctxsw(), pfm_switch_sets() + * context is locked. Interrupts are masked. set cannot be NULL. + * Access to the PMU is guaranteed. + * + * function must restore all PMD registers from set. + */ +void pfm_arch_restore_pmds(struct pfm_context *ctx, struct pfm_event_set *set) +{ + struct pfm_arch_pmu_info *arch_info = pfm_pmu_conf->arch_info; + u64 *used_pmds; + unsigned int i, max_pmd; + + /* The model-specific module can override the default + * restore-PMD method. + */ + if (arch_info->restore_pmds) { + return arch_info->restore_pmds(set); + } + + max_pmd = pfm_pmu_conf->regs.max_pmd; + used_pmds = set->used_pmds; + + for (i = 0; i < max_pmd; i++) + if (test_bit(i, used_pmds) && + !(pfm_pmu_conf->pmd_desc[i].type & PFM_REG_RO)) + pfm_arch_write_pmd(ctx, i, set->pmds[i].value); +} + +/* + * function called from pfm_switch_sets(), pfm_context_load_thread(), + * pfm_context_load_sys(), pfm_ctxsw(), pfm_switch_sets() + * context is locked. Interrupts are masked. set cannot be NULL. + * Access to the PMU is guaranteed. + * + * function must restore all PMC registers from set, if needed. + */ +void pfm_arch_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set) +{ + struct pfm_arch_pmu_info *arch_info; + u64 *impl_pmcs; + unsigned int i, max_pmc; + + /* The model-specific module can override the default + * restore-PMC method. + */ + arch_info = pfm_pmu_conf->arch_info; + if (arch_info->restore_pmcs) { + return arch_info->restore_pmcs(set); + } + + /* The "common" powerpc model's enable the counters simply by writing + * all the control registers. Therefore, if we're masked or stopped we + * don't need to bother restoring the PMCs now. + */ + if (ctx->state == PFM_CTX_MASKED || ctx->flags.started == 0) + return; + + max_pmc = pfm_pmu_conf->regs.max_pmc; + impl_pmcs = pfm_pmu_conf->regs.pmcs; + + /* + * restore all pmcs + */ + for (i = 0; i < max_pmc; i++) + if (test_bit(i, impl_pmcs)) + pfm_arch_write_pmc(ctx, i, set->pmcs[i]); +} + +char *pfm_arch_get_pmu_module_name(void) +{ + unsigned int pvr = mfspr(SPRN_PVR); + + switch (PVR_VER(pvr)) { + case 0x0004: /* 604 */ + case 0x0009: /* 604e; */ + case 0x000A: /* 604ev */ + case 0x0008: /* 750/740 */ + case 0x7000: /* 750FX */ + case 0x7001: + case 0x7002: /* 750GX */ + case 0x000C: /* 7400 */ + case 0x800C: /* 7410 */ + case 0x8000: /* 7451/7441 */ + case 0x8001: /* 7455/7445 */ + case 0x8002: /* 7457/7447 */ + case 0x8003: /* 7447A */ + case 0x8004: /* 7448 */ + return("perfmon_ppc32"); + case PV_POWER4: + case PV_POWER4p: + return "perfmon_power4"; + case PV_POWER5: + case PV_POWER5p: + return "perfmon_power5"; + case PV_970: + case PV_970FX: + case PV_970MP: + return "perfmon_ppc970"; + case PV_BE: + return "perfmon_cell"; + } + return NULL; +} + +void pfm_arch_init_percpu(void) +{ +#ifdef CONFIG_PPC64 + extern void ppc64_enable_pmcs(void); + ppc64_enable_pmcs(); +#endif +} + +/** + * powerpc_irq_handler + * + * Get the perfmon context that belongs to the current CPU, and call the + * model-specific interrupt handler. + **/ +void powerpc_irq_handler(struct pt_regs *regs) +{ + struct pfm_arch_pmu_info *arch_info = pfm_pmu_conf->arch_info; + struct pfm_context *ctx; + + if (arch_info->irq_handler) { + ctx = __get_cpu_var(pmu_ctx); + if (likely(ctx)) + arch_info->irq_handler(regs, ctx); + } +} Index: linux-2.6/arch/powerpc/perfmon/perfmon_cell.c =================================================================== --- /dev/null +++ linux-2.6/arch/powerpc/perfmon/perfmon_cell.c @@ -0,0 +1,610 @@ +/* + * This file contains the Cell PMU register description tables + * and pmc checker used by perfmon.c. + * + * Copyright IBM Corporation 2007 + * + * Based on other Perfmon2 PMU modules. + * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P. + * Contributed by Stephane Eranian + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + * 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include "../platforms/cell/cbe_regs.h" +#include + +MODULE_AUTHOR("Kevin Corry , " + "Carl Love "); +MODULE_DESCRIPTION("Cell PMU description table"); +MODULE_LICENSE("GPL"); + +/* + * Mapping from Perfmon logical control registers to Cell hardware registers. + */ +static struct pfm_regmap_desc pfm_cell_pmc_desc[] = { + /* Per-counter control registers. */ + PMC_D(PFM_REG_I, "pm0_control", 0, 0, 0, 0), + PMC_D(PFM_REG_I, "pm1_control", 0, 0, 0, 0), + PMC_D(PFM_REG_I, "pm2_control", 0, 0, 0, 0), + PMC_D(PFM_REG_I, "pm3_control", 0, 0, 0, 0), + PMC_D(PFM_REG_I, "pm4_control", 0, 0, 0, 0), + PMC_D(PFM_REG_I, "pm5_control", 0, 0, 0, 0), + PMC_D(PFM_REG_I, "pm6_control", 0, 0, 0, 0), + PMC_D(PFM_REG_I, "pm7_control", 0, 0, 0, 0), + + /* Per-counter RTAS arguments. Each of these registers has three fields. + * bits 63-48: debug-bus word + * bits 47-32: sub-unit + * bits 31-0 : full signal number + * (MSB = 63, LSB = 0) + */ + PMC_D(PFM_REG_I, "pm0_event", 0, 0, 0, 0), + PMC_D(PFM_REG_I, "pm1_event", 0, 0, 0, 0), + PMC_D(PFM_REG_I, "pm2_event", 0, 0, 0, 0), + PMC_D(PFM_REG_I, "pm3_event", 0, 0, 0, 0), + PMC_D(PFM_REG_I, "pm4_event", 0, 0, 0, 0), + PMC_D(PFM_REG_I, "pm5_event", 0, 0, 0, 0), + PMC_D(PFM_REG_I, "pm6_event", 0, 0, 0, 0), + PMC_D(PFM_REG_I, "pm7_event", 0, 0, 0, 0), + + /* Global control registers. Same order as enum pm_reg_name. */ + PMC_D(PFM_REG_I, "group_control", 0, 0, 0, 0), + PMC_D(PFM_REG_I, "debug_bus_control", 0, 0, 0, 0), + PMC_D(PFM_REG_I, "trace_address", 0, 0, 0, 0), /* KMC: Not sure if user-space needs access to this one. */ + PMC_D(PFM_REG_I, "ext_trace_timer", 0, 0, 0, 0), + PMC_D(PFM_REG_I, "pm_status", 0, 0, 0, 0), + PMC_D(PFM_REG_I, "pm_control", 0, 0, 0, 0), + PMC_D(PFM_REG_I, "pm_interval", 0, 0, 0, 0), /* KMC: Does user-space also need read access to this one? */ + PMC_D(PFM_REG_I, "pm_start_stop", 0, 0, 0, 0), +}; +#define PFM_PM_NUM_PMCS ARRAY_SIZE(pfm_cell_pmc_desc) + +#define CELL_PMC_PM_STATUS 20 +/* + * Mapping from Perfmon logical data counters to Cell hardware counters. + */ +static struct pfm_regmap_desc pfm_cell_pmd_desc[] = { + PMD_D(PFM_REG_C, "pm0", 0), + PMD_D(PFM_REG_C, "pm1", 0), + PMD_D(PFM_REG_C, "pm2", 0), + PMD_D(PFM_REG_C, "pm3", 0), + PMD_D(PFM_REG_C, "pm4", 0), + PMD_D(PFM_REG_C, "pm5", 0), + PMD_D(PFM_REG_C, "pm6", 0), + PMD_D(PFM_REG_C, "pm7", 0), +}; +#define PFM_PM_NUM_PMDS ARRAY_SIZE(pfm_cell_pmd_desc) + +/* The firmware only sees physical CPUs, so divide by 2 if SMT is on. */ +#ifdef CONFIG_SCHED_SMT +#define RTAS_CPU(cpu) ((cpu) / 2) +#else +#define RTAS_CPU(cpu) (cpu) +#endif +#define RTAS_BUS_WORD(x) (u16)(((x) >> 48) & 0x0000ffff) +#define RTAS_SUB_UNIT(x) (u16)(((x) >> 32) & 0x0000ffff) +#define RTAS_SIGNAL_NUMBER(x) (s32)( (x) & 0xffffffff) + +#define subfunc_RESET 1 +#define subfunc_ACTIVATE 2 + +#define passthru_ENABLE 1 +#define passthru_DISABLE 2 + +/** + * struct cell_rtas_arg + * + * @cpu: Processor to modify. Linux numbers CPUs based on SMT IDs, but the + * firmware only sees the physical CPUs. So this value should be the + * SMT ID (from smp_processor_id() or get_cpu()) divided by 2. + * @sub_unit: Hardware subunit this applies to (if applicable). + * @signal_group: Signal group to enable/disable on the trace bus. + * @bus_word: For signal groups that propagate via the trace bus, this trace + * bus word will be used. This is a mask of (1 << TraceBusWord). + * For other signal groups, this specifies the trigger or event bus. + * @bit: Trigger/Event bit, if applicable for the signal group. + * + * An array of these structures are passed to rtas_call() to set up the + * signals on the debug bus. + **/ +struct cell_rtas_arg { + u16 cpu; + u16 sub_unit; + s16 signal_group; + u8 bus_word; + u8 bit; +}; + +/** + * rtas_reset_signals + * + * Set up the RTAS arguments for a RESET command. The buffer will be only + * the first entry in the rtas_args[cpu].signal[] array. + **/ +static int rtas_reset_signals(u32 cpu) +{ + struct cell_rtas_arg signal; + u64 real_addr = virt_to_phys(&signal); + int rc; + + memset(&signal, 0, sizeof(signal)); + signal.cpu = RTAS_CPU(cpu); + rc = rtas_call(rtas_token("ibm,cbe-perftools"), + 5, 1, NULL, + subfunc_RESET, + passthru_DISABLE, + real_addr >> 32, + real_addr & 0xffffffff, + sizeof(signal)); + + return rc; +} + +/** + * rtas_activate_signals + * + * Set up the RTAS arguments for an ACTIVATE command. The buffer will be the + * number of entries in the rtas_args[cpu].signal[] array that were filled + * in by attach_signal_to_counter(). + **/ +static int rtas_activate_signals(struct cell_rtas_arg *signals, + int num_signals) +{ + u64 real_addr = virt_to_phys(signals); + int rc; + + rc = rtas_call(rtas_token("ibm,cbe-perftools"), + 5, 1, NULL, + subfunc_ACTIVATE, + passthru_ENABLE, + real_addr >> 32, + real_addr & 0xffffffff, + num_signals * sizeof(*signals)); + + return rc; +} + +/** + * write_pm07_event + * + * Pull out the RTAS arguments from the 64-bit register value and make the + * RTAS activate-signals call. + **/ +static void write_pm07_event(int cpu, unsigned int ctr, u64 value) +{ + struct cell_rtas_arg signal; + int rc; + + signal.cpu = RTAS_CPU(cpu); + signal.bus_word = 1 << RTAS_BUS_WORD(value); + signal.sub_unit = RTAS_SUB_UNIT(value); + signal.signal_group = RTAS_SIGNAL_NUMBER(value) / 100; + signal.bit = RTAS_SIGNAL_NUMBER(value) % 100; + + rc = rtas_activate_signals(&signal, 1); + if (rc) { + PFM_WARN("%s(%d, %u, %lu): Error calling " + "rtas_activate_signal(): %d\n", __FUNCTION__, + cpu, ctr, (unsigned long)value, rc); + /* FIX: Could we change this routine to return an error? */ + } +} + +/** + * pfm_cell_probe_pmu + * + * Simply check the processor version register to see if we're currently + * on a Cell system. + **/ +static int pfm_cell_probe_pmu(void) +{ + unsigned long pvr = mfspr(SPRN_PVR); + + if (PVR_VER(pvr) != PV_BE) + return -1; + + return 0; +} + +/** + * pfm_cell_write_pmc + **/ +static void pfm_cell_write_pmc(unsigned int cnum, u64 value) +{ + int cpu = smp_processor_id(); + + if (cnum < NR_CTRS) { + cbe_write_pm07_control(cpu, cnum, value); + + } else if (cnum < NR_CTRS * 2) { + write_pm07_event(cpu, cnum - NR_CTRS, value); + + } else if (cnum == CELL_PMC_PM_STATUS) { + /* The pm_status register must be treated separately from + * the other "global" PMCs. This call will ensure that + * the interrupts are routed to the correct CPU, as well + * as writing the desired value to the pm_status register. + */ + cbe_enable_pm_interrupts(cpu, cbe_get_hw_thread_id(cpu), value); + + } else if (cnum < PFM_PM_NUM_PMCS) { + cbe_write_pm(cpu, cnum - (NR_CTRS * 2), value); + } +} + +/** + * pfm_cell_write_pmd + **/ +static void pfm_cell_write_pmd(unsigned int cnum, u64 value) +{ + int cpu = smp_processor_id(); + + if (cnum < NR_CTRS) { + cbe_write_ctr(cpu, cnum, value); + } +} + +/** + * pfm_cell_read_pmd + **/ +static u64 pfm_cell_read_pmd(unsigned int cnum) +{ + int cpu = smp_processor_id(); + + if (cnum < NR_CTRS) { + return cbe_read_ctr(cpu, cnum); + } + + return -EINVAL; +} + +/** + * pfm_cell_enable_counters + * + * Just need to turn on the global disable bit in pm_control. + **/ +static void pfm_cell_enable_counters(struct pfm_context *ctx, + struct pfm_event_set *set) +{ + cbe_enable_pm(smp_processor_id()); +} + +/** + * pfm_cell_disable_counters + * + * Just need to turn off the global disable bit in pm_control. + * + * Also, if we're using the hardware-sampling module, we need to empty the + * trace-buffer, since it cannot be restored to its current state when this + * event-set is enabled again. + **/ +static void pfm_cell_disable_counters(struct pfm_context *ctx, + struct pfm_event_set *set) +{ + struct pfm_smpl_fmt *smpl_fmt = ctx->smpl_fmt; + struct pt_regs *regs; + + cbe_disable_pm(smp_processor_id()); + + if (smpl_fmt && !strcmp(smpl_fmt->fmt_name, PFM_CELL_HW_SMPL_NAME)) { + ctx->ovfl_arg.ovfl_pmd = PFM_CELL_HW_SMPL_OVFL_PMD; + ctx->ovfl_arg.active_set = ctx->active_set->id; + regs = current->thread.regs; + smpl_fmt->fmt_handler(ctx->smpl_addr, &ctx->ovfl_arg, + instruction_pointer(regs), 0, regs); + } +} + +/** + * pfm_cell_restore_pmcs + * + * Write all control register values that are saved in the specified event + * set. We could use the pfm_arch_write_pmc() function to restore each PMC + * individually (as is done in other architectures), but that results in + * multiple RTAS calls. As an optimization, we will setup the RTAS argument + * array so we can do all event-control registers in one RTAS call. + **/ +void pfm_cell_restore_pmcs(struct pfm_event_set *set) +{ + struct cell_rtas_arg signals[NR_CTRS]; + u64 value, *used_pmcs = set->used_pmcs; + int i, rc, num_used = 0, cpu = smp_processor_id(); + + memset(signals, 0, sizeof(signals)); + + for (i = 0; i < NR_CTRS; i++) { + /* Write the per-counter control register. If the PMC is not + * in use, then it will simply clear the register, which will + * disable the associated counter. + */ + cbe_write_pm07_control(cpu, i, set->pmcs[i]); + + if (test_bit(i + NR_CTRS, used_pmcs)) { + /* Set up the next RTAS array entry for this counter. + * Only include pm07_event registers that are in use + * by this set so the RTAS call doesn't have to + * process blank array entries. + */ + value = set->pmcs[i + NR_CTRS]; + signals[num_used].cpu = RTAS_CPU(cpu); + signals[num_used].sub_unit = RTAS_SUB_UNIT(value); + signals[num_used].bus_word = 1 << RTAS_BUS_WORD(value); + signals[num_used].bit = RTAS_SIGNAL_NUMBER(value) % 100; + signals[num_used].signal_group = + RTAS_SIGNAL_NUMBER(value) / 100; + num_used++; + } + } + + rc = rtas_activate_signals(signals, num_used); + if (rc) { + PFM_WARN("Error calling rtas_activate_signal(): %d\n", rc); + /* FIX: We will also need this routine to be able to return + * an error if Stephane agrees to change pfm_arch_write_pmc + * to return an error. + */ + } + + /* Write all the global PMCs. Need to call pfm_cell_write_pmc() + * instead of cbe_write_pm() due to special handling for the + * pm_status register. + */ + for (i *= 2; i < PFM_PM_NUM_PMCS; i++) + pfm_cell_write_pmc(i, set->pmcs[i]); +} + +/** + * pfm_cell_unload_context + * + * For system-wide contexts and self-monitored contexts, make the RTAS call + * to reset the debug-bus signals. + * + * For non-self-monitored contexts, the monitored thread will already have + * been taken off the CPU and we don't need to do anything additional. + **/ +static int pfm_cell_unload_context(struct pfm_context *ctx, + struct task_struct *task) +{ + if (task == current || ctx->flags.system) { + rtas_reset_signals(smp_processor_id()); + } + return 0; +} + +/** + * pfm_cell_ctxswout_thread + * + * When a monitored thread is switched out (self-monitored or externally + * monitored) we need to reset the debug-bus signals so the next context that + * gets switched in can start from a clean set of signals. + **/ +int pfm_cell_ctxswout_thread(struct task_struct *task, + struct pfm_context *ctx, struct pfm_event_set *set) +{ + rtas_reset_signals(smp_processor_id()); + return 0; +} + +/** + * pfm_cell_get_ovfl_pmds + * + * Determine which counters in this set have overflowed and fill in the + * set->povfl_pmds mask and set->npend_ovfls count. On Cell, the pm_status + * register contains a bit for each counter to indicate overflow. However, + * those 8 bits are in the reverse order than what Perfmon2 is expecting, + * so we need to reverse the order of the overflow bits. + **/ +static void pfm_cell_get_ovfl_pmds(struct pfm_context *ctx, + struct pfm_event_set *set) +{ + struct pfm_arch_context *ctx_arch = pfm_ctx_arch(ctx); + u32 pm_status, ovfl_ctrs; + u64 povfl_pmds = 0; + int i; + + if (!ctx_arch->last_read_updated) + /* This routine was not called via the interrupt handler. + * Need to start by getting interrupts and updating + * last_read_pm_status. + */ + ctx_arch->last_read_pm_status = + cbe_get_and_clear_pm_interrupts(smp_processor_id()); + + /* Reset the flag that the interrupt handler last read pm_status. */ + ctx_arch->last_read_updated = 0; + + pm_status = ctx_arch->last_read_pm_status & + set->pmcs[CELL_PMC_PM_STATUS]; + ovfl_ctrs = CBE_PM_OVERFLOW_CTRS(pm_status); + + /* Reverse the order of the bits in ovfl_ctrs + * and store the result in povfl_pmds. + */ + for (i = 0; i < PFM_PM_NUM_PMDS; i++) { + povfl_pmds = (povfl_pmds << 1) | (ovfl_ctrs & 1); + ovfl_ctrs >>= 1; + } + + /* Mask povfl_pmds with set->used_pmds to get set->povfl_pmds. + * Count the bits set in set->povfl_pmds to get set->npend_ovfls. + */ + bitmap_and(set->povfl_pmds, &povfl_pmds, + set->used_pmds, PFM_PM_NUM_PMDS); + set->npend_ovfls = bitmap_weight(set->povfl_pmds, PFM_PM_NUM_PMDS); +} + +/** + * handle_trace_buffer_interrupts + * + * This routine is for processing just the interval timer and trace buffer + * overflow interrupts. Performance counter interrupts are handled by the + * perf_irq_handler() routine, which reads and saves the pm_status register. + * This routine should not read the actual pm_status register, but rather + * the value passed in. + **/ +static void handle_trace_buffer_interrupts(unsigned long iip, + struct pt_regs *regs, + struct pfm_context *ctx, + u32 pm_status) +{ + struct pfm_smpl_fmt *smpl_fmt; + + if (pm_status & CBE_PM_TRACE_BUFFER_FULL_INTR) { + /* The trace-buffer is full. Get the + * sampling-buffer address and call the handler. + */ + smpl_fmt = ctx->smpl_fmt; + + if (smpl_fmt && + !strcmp(smpl_fmt->fmt_name, PFM_CELL_HW_SMPL_NAME)) { + ctx->ovfl_arg.ovfl_pmd = PFM_CELL_HW_SMPL_OVFL_PMD; + ctx->ovfl_arg.active_set = ctx->active_set->id; + smpl_fmt->fmt_handler(ctx->smpl_addr, &ctx->ovfl_arg, + iip, 0, regs); + } + } + + /* Currently the trace buffer underflow and interval timer + * interrupts are ignored. + */ + + return; +} + +/** + * pfm_cell_irq_handler + * + * Handler for all Cell performance-monitor interrupts. + **/ +static void pfm_cell_irq_handler(struct pt_regs *regs, struct pfm_context *ctx) +{ + struct pfm_arch_context *ctx_arch = pfm_ctx_arch(ctx); + u32 last_read_pm_status; + int cpu = smp_processor_id(); + + /* Need to disable and reenable the performance counters to get the + * desired behavior from the hardware. This is specific to the Cell + * PMU hardware. + */ + cbe_disable_pm(cpu); + + /* Read the pm_status register to get the interrupt bits. If a + * perfmormance counter overflow interrupt occurred, call the core + * perfmon interrupt handler to service the counter overflow. If the + * interrupt was for the interval timer or the trace_buffer, + * call the interval timer and trace buffer interrupt handler. + * + * The value read from the pm_status register is stored in the + * pmf_arch_context structure for use by other routines. Note that + * reading the pm_status register resets the interrupt flags to zero. + * Hence, it is important that the register is only read in one place. + * + * The pm_status reg interrupt reg format is: + * [pmd0:pmd1:pmd2:pmd3:pmd4:pmd5:pmd6:pmd7:intt:tbf:tbu:] + * - pmd0 to pm7 are the perf counter overflow interrupts. + * - intt is the interval timer overflowed interrupt. + * - tbf is the trace buffer full interrupt. + * - tbu is the trace buffer underflow interrupt. + * - The pmd0 bit is the MSB of the 32 bit register. + */ + ctx_arch->last_read_pm_status = last_read_pm_status = + cbe_get_and_clear_pm_interrupts(cpu); + + /* Set flag for pfm_cell_get_ovfl_pmds() routine so it knows + * last_read_pm_status was updated by the interrupt handler. + */ + ctx_arch->last_read_updated = 1; + + if (last_read_pm_status & CBE_PM_ALL_OVERFLOW_INTR) + /* At least one counter overflowed. */ + pfm_interrupt_handler(instruction_pointer(regs), regs); + + if (last_read_pm_status & (CBE_PM_INTERVAL_INTR | + CBE_PM_TRACE_BUFFER_FULL_INTR | + CBE_PM_TRACE_BUFFER_UNDERFLOW_INTR)) + /* Trace buffer or interval timer overflow. */ + handle_trace_buffer_interrupts(instruction_pointer(regs), + regs, ctx, last_read_pm_status); + + /* If the hardware-sampling module masked monitoring for this context, + * don't re-enable the PMU. + */ + if (ctx->state & PFM_CTX_MASKED) { + return; + } + + /* The interrupt settings is the value written to the pm_status + * register. It is saved in the context when the register is + * written. + */ + cbe_enable_pm_interrupts(cpu, cbe_get_hw_thread_id(cpu), + ctx->active_set->pmcs[CELL_PMC_PM_STATUS]); + + /* The writes to the various performance counters only writes to a + * latch. The new values (interrupt setting bits, reset counter value + * etc.) are not copied to the actual registers until the performance + * monitor is enabled. In order to get this to work as desired, the + * permormance monitor needs to be disabled while writting to the + * latches. This is a HW design issue. + */ + cbe_enable_pm(cpu); +} + +static struct pfm_arch_pmu_info pfm_cell_pmu_info = { + .pmu_style = PFM_POWERPC_PMU_CELL, + .write_pmc = pfm_cell_write_pmc, + .write_pmd = pfm_cell_write_pmd, + .read_pmd = pfm_cell_read_pmd, + .enable_counters = pfm_cell_enable_counters, + .disable_counters = pfm_cell_disable_counters, + .irq_handler = pfm_cell_irq_handler, + .get_ovfl_pmds = pfm_cell_get_ovfl_pmds, + .restore_pmcs = pfm_cell_restore_pmcs, + .ctxswout_thread = pfm_cell_ctxswout_thread, + .unload_context = pfm_cell_unload_context, +}; + +static struct pfm_pmu_config pfm_cell_pmu_conf = { + .pmu_name = "Cell", + .version = "0.1", + .counter_width = 32, + .pmd_desc = pfm_cell_pmd_desc, + .pmc_desc = pfm_cell_pmc_desc, + .num_pmc_entries = PFM_PM_NUM_PMCS, + .num_pmd_entries = PFM_PM_NUM_PMDS, + .probe_pmu = pfm_cell_probe_pmu, + .arch_info = &pfm_cell_pmu_info, + .flags = PFM_PMU_BUILTIN_FLAG, + .owner = THIS_MODULE, +}; + +static int __init pfm_cell_pmu_init_module(void) +{ + return pfm_pmu_register(&pfm_cell_pmu_conf); +} + +static void __exit pfm_cell_pmu_cleanup_module(void) +{ + pfm_pmu_unregister(&pfm_cell_pmu_conf); +} + +module_init(pfm_cell_pmu_init_module); +module_exit(pfm_cell_pmu_cleanup_module); Index: linux-2.6/arch/powerpc/perfmon/perfmon_cell_hw_smpl.c =================================================================== --- /dev/null +++ linux-2.6/arch/powerpc/perfmon/perfmon_cell_hw_smpl.c @@ -0,0 +1,298 @@ +/* + * Copyright IBM Corp 2007 + * + * Contributed by Carl Love + * and Kevin Corry + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + * 02111-1307 USA + * + * + * This file implements the IBM Cell PMU hardware-sampling module. + */ +#include +#include +#include +#include + +MODULE_AUTHOR("Carl Love , " + "Kevin Corry "); +MODULE_DESCRIPTION("Perfmon2 CELL hardware sampling format"); +MODULE_LICENSE("GPL"); + +/** + * pfm_cell_hw_smpl_validate + * + * Validate the arguments passed from user-space for creating the + * sampling-buffer. The buffer must be large enough to hold the + * sampling-buffer header and at least one copy of the trace-buffer. + **/ +static int pfm_cell_hw_smpl_validate(u32 flags, u16 npmds, void *data) +{ + struct pfm_cell_hw_smpl_arg *arg = data; + + if (!arg) { + PFM_ERR("No argument passed."); + return -EINVAL; + } + + if (arg->buf_size < PFM_CELL_HW_SMPL_MIN_BUF_SIZE) { + PFM_ERR("Specified buffer size (%lu) too small. " + "Min size is %lu bytes.", + arg->buf_size, PFM_CELL_HW_SMPL_MIN_BUF_SIZE); + return -EINVAL; + } + + return 0; +} + +/** + * pfm_cell_hw_smpl_get_size + * + * Tell the Perfmon2 core how large a buffer we need to have allocated, and + * it will do the allocation for us. The size of the buffer has already been + * validated. + **/ +static int pfm_cell_hw_smpl_get_size(unsigned int flags, + void *data, size_t *size) +{ + struct pfm_cell_hw_smpl_arg *arg = data; + *size = arg->buf_size; + return 0; +} + +/** + * pfm_cell_hw_smpl_init + * + * Initialize the start of the sampling-buffer with a header structure. + * The buffer has already been allocated by the Perfmon2 core. + **/ +static int pfm_cell_hw_smpl_init(struct pfm_context *ctx, void *buf, + u32 flags, u16 npmds, void *data) +{ + struct pfm_cell_hw_smpl_hdr *hdr = buf; + struct pfm_cell_hw_smpl_arg *arg = data; + + hdr->count = 0; + hdr->cur_offset = sizeof(*hdr); + hdr->overflows = 0; + hdr->buf_size = arg->buf_size; + hdr->version = PFM_CELL_HW_SMPL_VERSION; + hdr->buf_flags = arg->buf_flags; + + return 0; +} + +/** + * pfm_cell_hw_smpl_notify_user + * + * Add a "buffer full" message to the context and wake up any user-space + * process that is polling on the context's file descriptor. That process + * can then read() from the file-descriptor to get a copy of the message. + **/ +static int pfm_cell_hw_smpl_notify_user(struct pfm_context *ctx) +{ + union pfarg_msg *msg; + + if (ctx->flags.no_msg) { + return 0; + } + + msg = pfm_get_new_msg(ctx); + if (msg == NULL) { + /* The message queue is full. The user must have called + * pfm_restart(), but didn't extract any messages. + */ + PFM_ERR("No notification messages available."); + return -EBUSY; + } + + msg->type = PFM_MSG_CELL_HW_SMPL_BUF_FULL; + + return pfm_notify_user(ctx); +} + +/** + * pfm_cell_hw_smpl_handler + * + * Create a new entry-header in the sampling-buffer and copy the current + * contents of the trace-buffer into the sampling-buffer. + **/ +static int pfm_cell_hw_smpl_handler(void *buf, + struct pfm_ovfl_arg *arg, + unsigned long ip, + u64 tstamp, + void *data) +{ + struct pfm_cell_hw_smpl_hdr *hdr = buf; + struct pfm_cell_hw_smpl_entry_hdr *ent; + struct pfm_context *ctx; + void *cur, *end; + u64 *trace_buffer_lines; + u32 trace_addr; + + /* If this handler was called due to an actual PMD overflowing, do + * nothing. Only store the contents of the trace-buffer if the trace- + * buffer overflowed. + */ + if (arg->ovfl_pmd != PFM_CELL_HW_SMPL_OVFL_PMD) + return 0; + + cur = buf + hdr->cur_offset; + end = buf + hdr->buf_size; + ctx = __get_cpu_var(pmu_ctx); + + /* Check if the sampling-buffer is full. This should never happen, + * since we check if the buffer is full after adding the new entry. + */ + if ((end - cur) < PFM_CELL_HW_SMPL_MAX_ENTRY_SIZE) { + PFM_ERR("Cell HW Sampling: Buffer is full " + "before adding new entry."); + goto full; + } + + ent = cur; + + /* current = task running at the time of the overflow. + * + * per-task mode: + * - This is ususally the task being monitored. + * Under certain conditions, it might be a different task + * + * system-wide: + * - This is not necessarily the task controlling the session + */ + ent->pid = current->pid; + ent->tgid = current->tgid; + ent->cpu = smp_processor_id(); + ent->set = arg->active_set; + ent->num_samples = 0; + ent->entry_num = hdr->count; + + /* Read at most 1024 lines from the trace-buffer. Note, lines could be + * added to the trace-buffer while it is being read. However, we only + * made sure we had space for up to 1024 lines. + */ + + trace_buffer_lines = (u64 *)(ent + 1); + trace_addr = cbe_read_pm(ent->cpu, trace_address); + while (!(trace_addr & CBE_PM_TRACE_BUF_EMPTY) && + ent->num_samples < CBE_PM_TRACE_BUF_MAX_COUNT) { + cbe_read_trace_buffer(ent->cpu, trace_buffer_lines); + trace_buffer_lines += 2; + ent->num_samples++; + trace_addr = cbe_read_pm(ent->cpu, trace_address); + } + + /* Update the sampling-buffer header for the next entry. Since the + * hw_smpl_hdr and hw_smpl_entry_hdr structures are both padded to + * 128-bits, and each trace-buffer line is 128-bits, we know that + * every buffer entry will start on a 128-bit boundary. + */ + if (ent->num_samples) { + cur = trace_buffer_lines; + hdr->cur_offset = cur - (void *)hdr; + hdr->count++; + } + + /* Check the available size in the buffer again so we won't lose the + * next sample entry. + */ + if ((end - cur) < PFM_CELL_HW_SMPL_MAX_ENTRY_SIZE) + goto full; + + return 0; + +full: + PFM_DBG_ovfl("Sampling-buffer full. free bytes=%lu, count=%lu", + end-cur, hdr->count); + + /* Increment the number of sampling-buffer overflows. This + * is important for detecting duplicate sets of samples. + */ + hdr->overflows++; + + /* Add a message to the context's message queue and wake up any + * user-space program's that are polling on the context's file + * descriptor. + */ + pfm_cell_hw_smpl_notify_user(ctx); + + /* Mask monitoring until a pfm_restart() occurs. */ + pfm_mask_monitoring(ctx, ctx->active_set); + ctx->state = PFM_CTX_MASKED; + ctx->flags.can_restart = 1; + + return -ENOBUFS; +} + +/** + * pfm_cell_hw_smpl_restart + * + * Reinitialize the sampling-buffer header, effectively deleting all entries + * previously stored in the sampling-buffer. + * + * FIX: What is the "is_active" argument for? It's not used by any of the + * other sampling modules. + **/ +static int pfm_cell_hw_smpl_restart(int is_active, u32 *ovfl_ctrl, void *buf) +{ + struct pfm_cell_hw_smpl_hdr *hdr = buf; + + hdr->count = 0; + hdr->cur_offset = sizeof(*hdr); + hdr->overflows = 0; + + return 0; +} + +/** + * pfm_cell_hw_smpl_exit + **/ +static int pfm_cell_hw_smpl_exit(void *buf) +{ + return 0; +} + +/** + * cell_hw_smpl_fmt + * + * Structure to describe the Cell hardware-sampling module to the Perfmon2 core. + **/ +static struct pfm_smpl_fmt cell_hw_smpl_fmt = { + .fmt_name = PFM_CELL_HW_SMPL_NAME, + .fmt_arg_size = sizeof(struct pfm_cell_hw_smpl_arg), + .fmt_flags = PFM_FMT_BUILTIN_FLAG, + .fmt_version = PFM_CELL_HW_SMPL_VERSION, + .fmt_validate = pfm_cell_hw_smpl_validate, + .fmt_getsize = pfm_cell_hw_smpl_get_size, + .fmt_init = pfm_cell_hw_smpl_init, + .fmt_handler = pfm_cell_hw_smpl_handler, + .fmt_restart = pfm_cell_hw_smpl_restart, + .fmt_exit = pfm_cell_hw_smpl_exit, + .owner = THIS_MODULE, +}; + +static int __init pfm_cell_hw_smpl_init_module(void) +{ + return pfm_fmt_register(&cell_hw_smpl_fmt); +} + +static void __exit pfm_cell_hw_smpl_exit_module(void) +{ + pfm_fmt_unregister(&cell_hw_smpl_fmt); +} + +module_init(pfm_cell_hw_smpl_init_module); +module_exit(pfm_cell_hw_smpl_exit_module); Index: linux-2.6/arch/powerpc/perfmon/perfmon_power5.c =================================================================== --- /dev/null +++ linux-2.6/arch/powerpc/perfmon/perfmon_power5.c @@ -0,0 +1,292 @@ +/* + * This file contains the POWER5 PMU register description tables + * and pmc checker used by perfmon.c. + * + * Copyright (c) 2005 David Gibson, IBM Corporation. + * + * Based on perfmon_p6.c: + * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P. + * Contributed by Stephane Eranian + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + * 02111-1307 USA + */ +#include +#include + +MODULE_AUTHOR("David Gibson "); +MODULE_DESCRIPTION("POWER5 PMU description table"); +MODULE_LICENSE("GPL"); + +static struct pfm_regmap_desc pfm_power5_pmc_desc[]={ +/* mmcr0 */ PMC_D(PFM_REG_I, "MMCR0", MMCR0_FC, 0, 0, SPRN_MMCR0), +/* mmcr1 */ PMC_D(PFM_REG_I, "MMCR1", 0, 0, 0, SPRN_MMCR1), +/* mmcra */ PMC_D(PFM_REG_I, "MMCRA", 0, 0, 0, SPRN_MMCRA) +}; +#define PFM_PM_NUM_PMCS ARRAY_SIZE(pfm_power5_pmc_desc) + +/* The TB and PURR registers are read-only. Also, note that the TB register + * actually consists of both the 32-bit SPRN_TBRU and SPRN_TBRL registers. + * For Perfmon2's purposes, we'll treat it as a single 64-bit register. + */ +static struct pfm_regmap_desc pfm_power5_pmd_desc[]={ +/* tb */ PMD_D((PFM_REG_I|PFM_REG_RO), "TB", SPRN_TBRL), +/* pmd1 */ PMD_D(PFM_REG_C, "PMC1", SPRN_PMC1), +/* pmd2 */ PMD_D(PFM_REG_C, "PMC2", SPRN_PMC2), +/* pmd3 */ PMD_D(PFM_REG_C, "PMC3", SPRN_PMC3), +/* pmd4 */ PMD_D(PFM_REG_C, "PMC4", SPRN_PMC4), +/* pmd5 */ PMD_D(PFM_REG_C, "PMC5", SPRN_PMC5), +/* pmd6 */ PMD_D(PFM_REG_C, "PMC6", SPRN_PMC6), +/* purr */ PMD_D((PFM_REG_I|PFM_REG_RO), "PURR", SPRN_PURR), +}; +#define PFM_PM_NUM_PMDS ARRAY_SIZE(pfm_power5_pmd_desc) + +static int pfm_power5_probe_pmu(void) +{ + unsigned long pvr = mfspr(SPRN_PVR); + + if (PVR_VER(pvr) != PV_POWER5) + return -1; + + return 0; +} + +static void pfm_power5_write_pmc(unsigned int cnum, u64 value) +{ + switch (pfm_pmu_conf->pmc_desc[cnum].hw_addr) { + case SPRN_MMCR0: + mtspr(SPRN_MMCR0, value); + break; + case SPRN_MMCR1: + mtspr(SPRN_MMCR1, value); + break; + case SPRN_MMCRA: + mtspr(SPRN_MMCRA, value); + break; + default: + BUG(); + } +} + +static void pfm_power5_write_pmd(unsigned int cnum, u64 value) +{ + switch (pfm_pmu_conf->pmd_desc[cnum].hw_addr) { + case SPRN_PMC1: + mtspr(SPRN_PMC1, value); + break; + case SPRN_PMC2: + mtspr(SPRN_PMC2, value); + break; + case SPRN_PMC3: + mtspr(SPRN_PMC3, value); + break; + case SPRN_PMC4: + mtspr(SPRN_PMC4, value); + break; + case SPRN_PMC5: + mtspr(SPRN_PMC5, value); + break; + case SPRN_PMC6: + mtspr(SPRN_PMC6, value); + break; + case SPRN_PMC7: + mtspr(SPRN_PMC7, value); + break; + case SPRN_PMC8: + mtspr(SPRN_PMC8, value); + break; + case SPRN_TBRL: + case SPRN_PURR: + /* Ignore writes to read-only registers. */ + break; + default: + BUG(); + } +} + +static u64 pfm_power5_read_pmd(unsigned int cnum) +{ + switch (pfm_pmu_conf->pmd_desc[cnum].hw_addr) { + case SPRN_PMC1: + return mfspr(SPRN_PMC1); + case SPRN_PMC2: + return mfspr(SPRN_PMC2); + case SPRN_PMC3: + return mfspr(SPRN_PMC3); + case SPRN_PMC4: + return mfspr(SPRN_PMC4); + case SPRN_PMC5: + return mfspr(SPRN_PMC5); + case SPRN_PMC6: + return mfspr(SPRN_PMC6); + case SPRN_PMC7: + return mfspr(SPRN_PMC7); + case SPRN_PMC8: + return mfspr(SPRN_PMC8); + case SPRN_TBRL: + return ((u64)mfspr(SPRN_TBRU) << 32) | mfspr(SPRN_TBRL); + case SPRN_PURR: + if (cpu_has_feature(CPU_FTR_PURR)) + return mfspr(SPRN_PURR); + else + return 0; + default: + BUG(); + } +} + +/** + * pfm_power5_enable_counters + * + * Just need to load the current values into the control registers. + **/ +static void pfm_power5_enable_counters(struct pfm_context *ctx, + struct pfm_event_set *set) +{ + unsigned int i, max_pmc; + + max_pmc = pfm_pmu_conf->regs.max_pmc; + + for (i = 0; i < max_pmc; i++) + if (test_bit(i, set->used_pmcs)) + pfm_power5_write_pmc(i, set->pmcs[i]); +} + +/** + * pfm_power5_disable_counters + * + * Just need to zero all the control registers. + **/ +static void pfm_power5_disable_counters(struct pfm_context *ctx, + struct pfm_event_set *set) +{ + unsigned int i, max; + + max = pfm_pmu_conf->regs.max_pmc; + + for (i = 0; i < max; i++) + if (test_bit(i, set->used_pmcs)) + pfm_power5_write_pmc(i, 0); +} + +/** + * pfm_power5_get_ovfl_pmds + * + * Determine which counters in this set have overflowed and fill in the + * set->povfl_pmds mask and set->npend_ovfls count. + **/ +static void pfm_power5_get_ovfl_pmds(struct pfm_context *ctx, + struct pfm_event_set *set) +{ + unsigned int i; + unsigned int max_pmd = pfm_pmu_conf->regs.max_cnt_pmd; + u64 *used_pmds = set->used_pmds; + u64 *cntr_pmds = pfm_pmu_conf->regs.cnt_pmds; + u64 width_mask = 1 << pfm_pmu_conf->counter_width; + u64 new_val, mask[PFM_PMD_BV]; + + bitmap_and(cast_ulp(mask), cast_ulp(cntr_pmds), + cast_ulp(used_pmds), max_pmd); + + for (i = 0; i < max_pmd; i++) { + if (test_bit(i, mask)) { + new_val = pfm_power5_read_pmd(i); + if (new_val & width_mask) { + set_bit(i, set->povfl_pmds); + set->npend_ovfls++; + } + } + } +} + +static void pfm_power5_irq_handler(struct pt_regs *regs, + struct pfm_context *ctx) +{ + u32 mmcr0; + u64 mmcra; + + /* Disable the counters (set the freeze bit) to not polute + * the counts. + */ + mmcr0 = mfspr(SPRN_MMCR0); + mtspr(SPRN_MMCR0, (mmcr0 | MMCR0_FC)); + mmcra = mfspr(SPRN_MMCRA); + + /* Set the PMM bit (see comment below). */ + mtmsrd(mfmsr() | MSR_PMM); + + pfm_interrupt_handler(instruction_pointer(regs), regs); + + mmcr0 = mfspr(SPRN_MMCR0); + /* Reset the perfmon trigger. */ + mmcr0 |= MMCR0_PMXE; + + /* + * We must clear the PMAO bit on some (GQ) chips. Just do it + * all the time. + */ + mmcr0 &= ~MMCR0_PMAO; + + /* Clear the appropriate bits in the MMCRA. */ + mmcra &= POWER6_MMCRA_THRM | POWER6_MMCRA_OTHER; + mtspr(SPRN_MMCRA, mmcra); + + /* + * Now clear the freeze bit, counting will not start until we + * rfid from this exception, because only at that point will + * the PMM bit be cleared. + */ + mmcr0 &= ~MMCR0_FC; + mtspr(SPRN_MMCR0, mmcr0); +} + +struct pfm_arch_pmu_info pfm_power5_pmu_info = { + .pmu_style = PFM_POWERPC_PMU_POWER5, + .write_pmc = pfm_power5_write_pmc, + .write_pmd = pfm_power5_write_pmd, + .read_pmd = pfm_power5_read_pmd, + .irq_handler = pfm_power5_irq_handler, + .get_ovfl_pmds = pfm_power5_get_ovfl_pmds, + .enable_counters = pfm_power5_enable_counters, + .disable_counters = pfm_power5_disable_counters, +}; + +/* + * impl_pmcs, impl_pmds are computed at runtime to minimize errors! + */ +static struct pfm_pmu_config pfm_power5_pmu_conf = { + .pmu_name = "POWER5", + .counter_width = 31, + .pmd_desc = pfm_power5_pmd_desc, + .pmc_desc = pfm_power5_pmc_desc, + .num_pmc_entries = PFM_PM_NUM_PMCS, + .num_pmd_entries = PFM_PM_NUM_PMDS, + .probe_pmu = pfm_power5_probe_pmu, + .arch_info = &pfm_power5_pmu_info, + .flags = PFM_PMU_BUILTIN_FLAG, + .owner = THIS_MODULE +}; + +static int __init pfm_power5_pmu_init_module(void) +{ + return pfm_pmu_register(&pfm_power5_pmu_conf); +} + +static void __exit pfm_power5_pmu_cleanup_module(void) +{ + pfm_pmu_unregister(&pfm_power5_pmu_conf); +} + +module_init(pfm_power5_pmu_init_module); +module_exit(pfm_power5_pmu_cleanup_module); Index: linux-2.6/arch/powerpc/perfmon/perfmon_ppc32.c =================================================================== --- /dev/null +++ linux-2.6/arch/powerpc/perfmon/perfmon_ppc32.c @@ -0,0 +1,340 @@ +/* + * This file contains the PPC32 PMU register description tables + * and pmc checker used by perfmon.c. + * + * Philip Mucci, mucci@cs.utk.edu + * + * Based on code from: + * Copyright (c) 2005 David Gibson, IBM Corporation. + * + * Based on perfmon_p6.c: + * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P. + * Contributed by Stephane Eranian + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + * 02111-1307 USA + */ +#include +#include +#include + +MODULE_AUTHOR("Philip Mucci "); +MODULE_DESCRIPTION("PPC32 PMU description table"); +MODULE_LICENSE("GPL"); + +static struct pfm_pmu_config pfm_ppc32_pmu_conf; + +static struct pfm_regmap_desc pfm_ppc32_pmc_desc[] = { +/* mmcr0 */ PMC_D(PFM_REG_I, "MMCR0", 0x0, 0, 0, SPRN_MMCR0), +/* mmcr1 */ PMC_D(PFM_REG_I, "MMCR1", 0x0, 0, 0, SPRN_MMCR1), +/* mmcr2 */ PMC_D(PFM_REG_I, "MMCR2", 0x0, 0, 0, SPRN_MMCR2), +}; +#define PFM_PM_NUM_PMCS ARRAY_SIZE(pfm_ppc32_pmc_desc) + +static struct pfm_regmap_desc pfm_ppc32_pmd_desc[] = { +/* pmd0 */ PMD_D(PFM_REG_C, "PMC1", SPRN_PMC1), +/* pmd1 */ PMD_D(PFM_REG_C, "PMC2", SPRN_PMC2), +/* pmd2 */ PMD_D(PFM_REG_C, "PMC3", SPRN_PMC3), +/* pmd3 */ PMD_D(PFM_REG_C, "PMC4", SPRN_PMC4), +/* pmd4 */ PMD_D(PFM_REG_C, "PMC5", SPRN_PMC5), +/* pmd5 */ PMD_D(PFM_REG_C, "PMC6", SPRN_PMC6), +}; +#define PFM_PM_NUM_PMDS ARRAY_SIZE(pfm_ppc32_pmd_desc) + +static void perfmon_perf_irq(struct pt_regs *regs) +{ + u32 mmcr0; + + /* BLATANTLY STOLEN FROM OPROFILE, then modified */ + + /* set the PMM bit (see comment below) */ + mtmsr(mfmsr() | MSR_PMM); + + pfm_interrupt_handler(instruction_pointer(regs), regs); + + /* The freeze bit was set by the interrupt. + * Clear the freeze bit, and reenable the interrupt. + * The counters won't actually start until the rfi clears + * the PMM bit. + */ + + /* Unfreezes the counters on this CPU, enables the interrupt, + * enables the counters to trigger the interrupt, and sets the + * counters to only count when the mark bit is not set. + */ + mmcr0 = mfspr(SPRN_MMCR0); + + mmcr0 &= ~(MMCR0_FC | MMCR0_FCM0); + mmcr0 |= (MMCR0_FCECE | MMCR0_PMC1CE | MMCR0_PMCnCE | MMCR0_PMXE); + + mtspr(SPRN_MMCR0, mmcr0); +} + +static int pfm_ppc32_probe_pmu(void) +{ + enum ppc32_pmu_type pm_type; + int nmmcr = 0, npmds = 0, intsok = 0, i; + unsigned int pvr; + char *str; + + pvr = mfspr(SPRN_PVR); + + switch (PVR_VER(pvr)) { + case 0x0004: /* 604 */ + str = "PPC604"; + pm_type = PFM_POWERPC_PMU_604; + nmmcr = 1; + npmds = 2; + break; + case 0x0009: /* 604e; */ + case 0x000A: /* 604ev */ + str = "PPC604e"; + pm_type = PFM_POWERPC_PMU_604e; + nmmcr = 2; + npmds = 4; + break; + case 0x0008: /* 750/740 */ + str = "PPC750"; + pm_type = PFM_POWERPC_PMU_750; + nmmcr = 2; + npmds = 4; + break; + case 0x7000: /* 750FX */ + case 0x7001: + str = "PPC750"; + pm_type = PFM_POWERPC_PMU_750; + nmmcr = 2; + npmds = 4; + if ((pvr & 0xFF0F) >= 0x0203) + intsok = 1; + break; + case 0x7002: /* 750GX */ + str = "PPC750"; + pm_type = PFM_POWERPC_PMU_750; + nmmcr = 2; + npmds = 4; + intsok = 1; + case 0x000C: /* 7400 */ + str = "PPC7400"; + pm_type = PFM_POWERPC_PMU_7400; + nmmcr = 3; + npmds = 4; + break; + case 0x800C: /* 7410 */ + str = "PPC7410"; + pm_type = PFM_POWERPC_PMU_7400; + nmmcr = 3; + npmds = 4; + if ((pvr & 0xFFFF) >= 0x01103) + intsok = 1; + break; + case 0x8000: /* 7451/7441 */ + case 0x8001: /* 7455/7445 */ + case 0x8002: /* 7457/7447 */ + case 0x8003: /* 7447A */ + case 0x8004: /* 7448 */ + str = "PPC7450"; + pm_type = PFM_POWERPC_PMU_7450; + nmmcr = 3; npmds = 6; + intsok = 1; + break; + default: + PFM_INFO("Unknown PVR_VER(0x%x)\n", PVR_VER(pvr)); + return -1; + } + + /* + * deconfigure unimplemented registers + */ + for (i = npmds; i < PFM_PM_NUM_PMDS; i++) + pfm_ppc32_pmd_desc[i].type = PFM_REG_NA; + + for (i = nmmcr; i < PFM_PM_NUM_PMCS; i++) + pfm_ppc32_pmc_desc[i].type = PFM_REG_NA; + + /* + * update PMU description structure + */ + pfm_ppc32_pmu_conf.pmu_name = str; + pfm_ppc32_pmu_info.pmu_style = pm_type; + pfm_ppc32_pmu_conf.num_pmc_entries = nmmcr; + pfm_ppc32_pmu_conf.num_pmd_entries = npmds; + + if (intsok == 0) + PFM_INFO("Interrupts unlikely to work\n"); + + return reserve_pmc_hardware(perfmon_perf_irq); +} + +static void pfm_ppc32_write_pmc(unsigned int cnum, u64 value) +{ + switch (pfm_pmu_conf->pmc_desc[cnum].hw_addr) { + case SPRN_MMCR0: + mtspr(SPRN_MMCR0, value); + break; + case SPRN_MMCR1: + mtspr(SPRN_MMCR1, value); + break; + case SPRN_MMCR2: + mtspr(SPRN_MMCR2, value); + break; + default: + BUG(); + } +} + +static void pfm_ppc32_write_pmd(unsigned int cnum, u64 value) +{ + switch (pfm_pmu