mirror of
https://github.com/openwrt/openwrt.git
synced 2025-01-07 06:18:54 +00:00
5931564285
SVN-Revision: 13202
31653 lines
902 KiB
Diff
31653 lines
902 KiB
Diff
diff --git a/Documentation/ABI/testing/sysfs-perfmon b/Documentation/ABI/testing/sysfs-perfmon
|
|
new file mode 100644
|
|
index 0000000..bde434c
|
|
--- /dev/null
|
|
+++ b/Documentation/ABI/testing/sysfs-perfmon
|
|
@@ -0,0 +1,87 @@
|
|
+What: /sys/kernel/perfmon
|
|
+Date: Nov 2007
|
|
+KernelVersion: 2.6.24
|
|
+Contact: eranian@gmail.com
|
|
+
|
|
+Description: provide the configuration interface for the perfmon2 subsystems.
|
|
+ The tree contains information about the detected hardware, current
|
|
+ state of the subsystem as well as some configuration parameters.
|
|
+
|
|
+ The tree consists of the following entries:
|
|
+
|
|
+ /sys/kernel/perfmon/debug (read-write):
|
|
+
|
|
+ Enable perfmon2 debugging output via klogd. Debug messages produced during
|
|
+ PMU interrupt handling are not controlled by this entry. The traces a rate-limited
|
|
+ to avoid flooding of the console. It is possible to change the throttling
|
|
+ via /proc/sys/kernel/printk_ratelimit. The value is interpreted as a bitmask.
|
|
+ Each bit enables a particular type of debug messages. Refer to the file
|
|
+ include/linux/perfmon_kern.h for more information
|
|
+
|
|
+ /sys/kernel/perfmon/pmc_max_fast_arg (read-only):
|
|
+
|
|
+ Number of perfmon2 syscall arguments copied directly onto the
|
|
+ stack (copy_from_user) for pfm_write_pmcs(). Copying to the stack avoids
|
|
+ having to allocate a buffer. The unit is the number of pfarg_pmc_t
|
|
+ structures.
|
|
+
|
|
+ /sys/kernel/perfmon/pmd_max_fast_arg (read-only):
|
|
+
|
|
+ Number of perfmon2 syscall arguments copied directly onto the
|
|
+ stack (copy_from_user) for pfm_write_pmds()/pfm_read_pmds(). Copying
|
|
+ to the stack avoids having to allocate a buffer. The unit is the number
|
|
+ of pfarg_pmd_t structures.
|
|
+
|
|
+
|
|
+ /sys/kernel/perfmon/reset_stats (write-only):
|
|
+
|
|
+ Reset the statistics collected by perfmon2. Stats are available
|
|
+ per-cpu via debugfs.
|
|
+
|
|
+ /sys/kernel/perfmon/smpl_buffer_mem_cur (read-only):
|
|
+
|
|
+ Reports the amount of memory currently dedicated to sampling
|
|
+ buffers by the kernel. The unit is byte.
|
|
+
|
|
+ /sys/kernel/perfmon/smpl_buffer_mem_max (read-write):
|
|
+
|
|
+ Maximum amount of kernel memory usable for sampling buffers. -1 means
|
|
+ everything that is available. Unit is byte.
|
|
+
|
|
+ /sys/kernel/perfmon/smpl_buffer_mem_cur (read-only):
|
|
+
|
|
+ Current utilization of kernel memory in bytes.
|
|
+
|
|
+ /sys/kernel/perfmon/sys_group (read-write):
|
|
+
|
|
+ Users group allowed to create a system-wide perfmon2 context (session).
|
|
+ -1 means any group. This control will be kept until we find a package
|
|
+ able to control capabilities via PAM.
|
|
+
|
|
+ /sys/kernel/perfmon/task_group (read-write):
|
|
+
|
|
+ Users group allowed to create a per-thread context (session).
|
|
+ -1 means any group. This control will be kept until we find a
|
|
+ package able to control capabilities via PAM.
|
|
+
|
|
+ /sys/kernel/perfmon/sys_sessions_count (read-only):
|
|
+
|
|
+ Number of system-wide contexts currently attached to CPUs.
|
|
+
|
|
+ /sys/kernel/perfmon/task_sessions_count (read-only):
|
|
+
|
|
+ Number of per-thread contexts currently attached to threads.
|
|
+
|
|
+ /sys/kernel/perfmon/version (read-only):
|
|
+
|
|
+ Perfmon2 interface revision number.
|
|
+
|
|
+ /sys/kernel/perfmon/arg_mem_max(read-write):
|
|
+
|
|
+ Maximum size of vector arguments expressed in bytes. Can be modified
|
|
+
|
|
+ /sys/kernel/perfmon/mode(read-write):
|
|
+
|
|
+ Bitmask to enable/disable certain perfmon2 features.
|
|
+ Currently defined:
|
|
+ - bit 0: if set, then reserved bitfield are ignored on PMC writes
|
|
diff --git a/Documentation/ABI/testing/sysfs-perfmon-fmt b/Documentation/ABI/testing/sysfs-perfmon-fmt
|
|
new file mode 100644
|
|
index 0000000..1b45270
|
|
--- /dev/null
|
|
+++ b/Documentation/ABI/testing/sysfs-perfmon-fmt
|
|
@@ -0,0 +1,18 @@
|
|
+What: /sys/kernel/perfmon/formats
|
|
+Date: 2007
|
|
+KernelVersion: 2.6.24
|
|
+Contact: eranian@gmail.com
|
|
+
|
|
+Description: provide description of available perfmon2 custom sampling buffer formats
|
|
+ which are implemented as independent kernel modules. Each formats gets
|
|
+ a subdir which a few entries.
|
|
+
|
|
+ The name of the subdir is the name of the sampling format. The same name
|
|
+ must be passed to pfm_create_context() to use the format.
|
|
+
|
|
+ Each subdir XX contains the following entries:
|
|
+
|
|
+ /sys/kernel/perfmon/formats/XX/version (read-only):
|
|
+
|
|
+ Version number of the format in clear text and null terminated.
|
|
+
|
|
diff --git a/Documentation/ABI/testing/sysfs-perfmon-pmu b/Documentation/ABI/testing/sysfs-perfmon-pmu
|
|
new file mode 100644
|
|
index 0000000..a1afc7e
|
|
--- /dev/null
|
|
+++ b/Documentation/ABI/testing/sysfs-perfmon-pmu
|
|
@@ -0,0 +1,46 @@
|
|
+What: /sys/kernel/perfmon/pmu
|
|
+Date: Nov 2007
|
|
+KernelVersion: 2.6.24
|
|
+Contact: eranian@gmail.com
|
|
+
|
|
+Description: provide information about the currently loaded PMU description module.
|
|
+ The module contains the mapping of the actual performance counter registers
|
|
+ onto the logical PMU exposed by perfmon. There is at most one PMU description
|
|
+ module loaded at any time.
|
|
+
|
|
+ The sysfs PMU tree provides a description of the mapping for each register.
|
|
+ There is one subdir per config and data registers along an entry for the
|
|
+ name of the PMU model.
|
|
+
|
|
+ The model entry is as follows:
|
|
+
|
|
+ /sys/kernel/perfmon/pmu_desc/model (read-only):
|
|
+
|
|
+ Name of the PMU model is clear text and zero terminated.
|
|
+
|
|
+ Then for each logical PMU register, XX, gets a subtree with the following entries:
|
|
+
|
|
+ /sys/kernel/perfmon/pmu_desc/pm*XX/addr (read-only):
|
|
+
|
|
+ The physical address or index of the actual underlying hardware register.
|
|
+ On Itanium, it corresponds to the index. But on X86 processor, this is
|
|
+ the actual MSR address.
|
|
+
|
|
+ /sys/kernel/perfmon/pmu_desc/pm*XX/dfl_val (read-only):
|
|
+
|
|
+ The default value of the register in hexadecimal.
|
|
+
|
|
+ /sys/kernel/perfmon/pmu_desc/pm*XX/name (read-only):
|
|
+
|
|
+ The name of the hardware register.
|
|
+
|
|
+ /sys/kernel/perfmon/pmu_desc/pm*XX/rsvd_msk (read-only):
|
|
+
|
|
+ The bitmask of reserved bits, i.e., bits which cannot be changed by
|
|
+ applications. When a bit is set, it means the corresponding bit in the
|
|
+ actual register is reserved.
|
|
+
|
|
+ /sys/kernel/perfmon/pmu_desc/pm*XX/width (read-only):
|
|
+
|
|
+ the width in bits of the registers. This field is only relevant for counter
|
|
+ registers.
|
|
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
|
|
index 1150444..2652b6c 100644
|
|
--- a/Documentation/kernel-parameters.txt
|
|
+++ b/Documentation/kernel-parameters.txt
|
|
@@ -1643,6 +1643,9 @@ and is between 256 and 4096 characters. It is defined in the file
|
|
Format: { 0 | 1 }
|
|
See arch/parisc/kernel/pdc_chassis.c
|
|
|
|
+ perfmon_debug [PERFMON] Enables Perfmon debug messages. Needed
|
|
+ to see traces of the early startup startup phase.
|
|
+
|
|
pf. [PARIDE]
|
|
See Documentation/paride.txt.
|
|
|
|
diff --git a/Documentation/perfmon2-debugfs.txt b/Documentation/perfmon2-debugfs.txt
|
|
new file mode 100644
|
|
index 0000000..b30cae8
|
|
--- /dev/null
|
|
+++ b/Documentation/perfmon2-debugfs.txt
|
|
@@ -0,0 +1,126 @@
|
|
+ The perfmon2 debug and statistics interface
|
|
+ ------------------------------------------
|
|
+ Stephane Eranian
|
|
+ <eranian@gmail.com>
|
|
+
|
|
+The perfmon2 interfaces exports a set of statistics which are used to tune and
|
|
+debug the implementation. The data is composed of a set of very simple metrics
|
|
+mostly aggregated counts and durations. They instruments key points in the
|
|
+perfmon2 code, such as context switch and interrupt handling.
|
|
+
|
|
+The data is accessible via the debug filesystem (debugfs). Thus you need to
|
|
+have the filesystem support enabled in your kernel. Furthermore since, 2.6.25,
|
|
+the perfmon2 statistics interface is an optional component. It needs to be
|
|
+explicitely enabled in the kernel config file (CONFIG_PERFMON_DEBUG_FS).
|
|
+
|
|
+To access the data, the debugs filesystem must be mounted. Supposing the mount
|
|
+point is /debugfs, you would need to do:
|
|
+ $ mount -t debugs none /debugfs
|
|
+
|
|
+The data is located under the perfmon subdirectory and is organized per CPU.
|
|
+For each CPU, the same set of metrics is available, one metric per file in
|
|
+clear ASCII text.
|
|
+
|
|
+The metrics are as follows:
|
|
+
|
|
+ ctxswin_count (read-only):
|
|
+
|
|
+ Number of PMU context switch in.
|
|
+
|
|
+ ctxswin_ns (read-only):
|
|
+
|
|
+ Number of nanoseconds spent in the PMU context switch in
|
|
+ routine. Dividing this number by the value of ctxswin_count,
|
|
+ yields average cost of the PMU context switch in.
|
|
+
|
|
+ ctxswout_count (read-only):
|
|
+
|
|
+ Number of PMU context switch out.
|
|
+
|
|
+ ctxswout_ns (read-only):
|
|
+
|
|
+ Number of nanoseconds spent in the PMU context switch in
|
|
+ routine. Dividing this number by the value of ctxswout_count,
|
|
+ yields average cost of the PMU context switch out.
|
|
+
|
|
+ fmt_handler_calls (read-only):
|
|
+
|
|
+ Number of calls to the sampling format routine that handles
|
|
+ PMU interrupts, i.e., typically the routine that records a
|
|
+ sample.
|
|
+
|
|
+ fmt_handler_ns (read-only):
|
|
+
|
|
+ Number of nanoseconds spent in the routine that handle PMU
|
|
+ interrupt in the sampling format. Dividing this number by
|
|
+ the number of calls provided by fmt_handler_calls, yields
|
|
+ average time spent in this routine.
|
|
+
|
|
+ ovfl_intr_all_count (read-only):
|
|
+
|
|
+ Number of PMU interrupts received by the kernel.
|
|
+
|
|
+
|
|
+ ovfl_intr_nmi_count (read-only):
|
|
+
|
|
+ Number of Non Maskeable Interrupts (NMI) received by the kernel
|
|
+ for perfmon. This is relevant only on X86 hardware.
|
|
+
|
|
+ ovfl_intr_ns (read-only):
|
|
+
|
|
+ Number of nanoseconds spent in the perfmon2 PMU interrupt
|
|
+ handler routine. Dividing this number of ovfl_intr_all_count
|
|
+ yields the average time to handle one PMU interrupt.
|
|
+
|
|
+ ovfl_intr_regular_count (read-only):
|
|
+
|
|
+ Number of PMU interrupts which are actually processed by
|
|
+ the perfmon interrupt handler. There may be spurious or replay
|
|
+ interrupts.
|
|
+
|
|
+ ovfl_intr_replay_count (read-only):
|
|
+
|
|
+ Number of PMU interrupts which were replayed on context switch
|
|
+ in or on event set switching. Interrupts get replayed when they
|
|
+ were in flight at the time monitoring had to be stopped.
|
|
+
|
|
+ perfmon/ovfl_intr_spurious_count (read-only):
|
|
+
|
|
+ Number of PMU interrupts which were dropped because there was
|
|
+ no active context (session).
|
|
+
|
|
+ ovfl_notify_count (read-only):
|
|
+
|
|
+ Number of user level notifications sent. Notifications are
|
|
+ appended as messages to the context queue. Notifications may
|
|
+ be sent on PMU interrupts.
|
|
+
|
|
+ pfm_restart_count (read-only):
|
|
+
|
|
+ Number of times pfm_restart() is called.
|
|
+
|
|
+ reset_pmds_count (read-only):
|
|
+
|
|
+ Number of times pfm_reset_pmds() is called.
|
|
+
|
|
+ set_switch_count (read-only):
|
|
+
|
|
+ Number of event set switches.
|
|
+
|
|
+ set_switch_ns (read-only):
|
|
+
|
|
+ Number of nanoseconds spent in the set switching routine.
|
|
+ Dividing this number by set_switch_count yields the average
|
|
+ cost of switching sets.
|
|
+
|
|
+ handle_timeout_count (read-only):
|
|
+
|
|
+ Number of times the pfm_handle_timeout() routine is called.
|
|
+ It is used for timeout-based set switching.
|
|
+
|
|
+ handle_work_count (read-only):
|
|
+
|
|
+ Number of times pfm_handle_work() is called. The routine
|
|
+ handles asynchronous perfmon2 work for per-thread contexts
|
|
+ (sessions).
|
|
+
|
|
diff --git a/Documentation/perfmon2.txt b/Documentation/perfmon2.txt
|
|
new file mode 100644
|
|
index 0000000..4a8fada
|
|
--- /dev/null
|
|
+++ b/Documentation/perfmon2.txt
|
|
@@ -0,0 +1,213 @@
|
|
+ The perfmon2 hardware monitoring interface
|
|
+ ------------------------------------------
|
|
+ Stephane Eranian
|
|
+ <eranian@gmail.com>
|
|
+
|
|
+I/ Introduction
|
|
+
|
|
+ The perfmon2 interface provides access to the hardware performance counters of
|
|
+ major processors. Nowadays, all processors implement some flavors of performance
|
|
+ counters which capture micro-architectural level information such as the number
|
|
+ of elapsed cycles, number of cache misses, and so on.
|
|
+
|
|
+ The interface is implemented as a set of new system calls and a set of config files
|
|
+ in /sys.
|
|
+
|
|
+ It is possible to monitoring a single thread or a CPU. In either mode, applications
|
|
+ can count or collect samples. System-wide monitoring is supported by running a
|
|
+ monitoring session on each CPU. The interface support event-based sampling where the
|
|
+ sampling period is expressed as the number of occurrences of event, instead of just a
|
|
+ timeout. This approach provides a much better granularity and flexibility.
|
|
+
|
|
+ For performance reason, it is possible to use a kernel-level sampling buffer to minimize
|
|
+ the overhead incurred by sampling. The format of the buffer, i.e., what is recorded, how
|
|
+ it is recorded, and how it is exported to user-land is controlled by a kernel module called
|
|
+ a custom sampling format. The current implementation comes with a default format but
|
|
+ it is possible to create additional formats. There is an in-kernel registration
|
|
+ interface for formats. Each format is identified by a simple string which a tool
|
|
+ can pass when a monitoring session is created.
|
|
+
|
|
+ The interface also provides support for event set and multiplexing to work around
|
|
+ hardware limitations in the number of available counters or in how events can be
|
|
+ combined. Each set defines as many counters as the hardware can support. The kernel
|
|
+ then multiplexes the sets. The interface supports time-base switching but also
|
|
+ overflow based switching, i.e., after n overflows of designated counters.
|
|
+
|
|
+ Applications never manipulates the actual performance counter registers. Instead they see
|
|
+ a logical Performance Monitoring Unit (PMU) composed of a set of config register (PMC)
|
|
+ and a set of data registers (PMD). Note that PMD are not necessarily counters, they
|
|
+ can be buffers. The logical PMU is then mapped onto the actual PMU using a mapping
|
|
+ table which is implemented as a kernel module. The mapping is chosen once for each
|
|
+ new processor. It is visible in /sys/kernel/perfmon/pmu_desc. The kernel module
|
|
+ is automatically loaded on first use.
|
|
+
|
|
+ A monitoring session, or context, is uniquely identified by a file descriptor
|
|
+ obtained when the context is created. File sharing semantics apply to access
|
|
+ the context inside a process. A context is never inherited across fork. The file
|
|
+ descriptor can be used to received counter overflow notifications or when the
|
|
+ sampling buffer is full. It is possible to use poll/select on the descriptor
|
|
+ to wait for notifications from multiplex contexts. Similarly, the descriptor
|
|
+ supports asynchronous notification via SIGIO.
|
|
+
|
|
+ Counters are always exported as being 64-bit wide regardless of what the underlying
|
|
+ hardware implements.
|
|
+
|
|
+II/ Kernel compilation
|
|
+
|
|
+ To enable perfmon2, you need to enable CONFIG_PERFMON
|
|
+
|
|
+III/ OProfile interactions
|
|
+
|
|
+ The set of features offered by perfmon2 is rich enough to support migrating
|
|
+ Oprofile on top of it. That means that PMU programming and low-level interrupt
|
|
+ handling could be done by perfmon2. The Oprofile sampling buffer management code
|
|
+ in the kernel as well as how samples are exported to users could remain through
|
|
+ the use of a custom sampling buffer format. This is how Oprofile work on Itanium.
|
|
+
|
|
+ The current interactions with Oprofile are:
|
|
+ - on X86: Both subsystems can be compiled into the same kernel. There is enforced
|
|
+ mutual exclusion between the two subsystems. When there is an Oprofile
|
|
+ session, no perfmon2 session can exist and vice-versa. Perfmon2 session
|
|
+ encapsulates both per-thread and system-wide sessions here.
|
|
+
|
|
+ - On IA-64: Oprofile works on top of perfmon2. Oprofile being a system-wide monitoring
|
|
+ tool, the regular per-thread vs. system-wide session restrictions apply.
|
|
+
|
|
+ - on PPC: no integration yet. You need to enable/disble one of the two subsystems
|
|
+ - on MIPS: no integration yet. You need to enable/disble one of the two subsystems
|
|
+
|
|
+IV/ User tools
|
|
+
|
|
+ We have released a simple monitoring tool to demonstrate the feature of the
|
|
+ interface. The tool is called pfmon and it comes with a simple helper library
|
|
+ called libpfm. The library comes with a set of examples to show how to use the
|
|
+ kernel perfmon2 interface. Visit http://perfmon2.sf.net for details.
|
|
+
|
|
+ There maybe other tools available for perfmon2.
|
|
+
|
|
+V/ How to program?
|
|
+
|
|
+ The best way to learn how to program perfmon2, is to take a look at the source
|
|
+ code for the examples in libpfm. The source code is available from:
|
|
+ http://perfmon2.sf.net
|
|
+
|
|
+VI/ System calls overview
|
|
+
|
|
+ The interface is implemented by the following system calls:
|
|
+
|
|
+ * int pfm_create_context(pfarg_ctx_t *ctx, char *fmt, void *arg, size_t arg_size)
|
|
+
|
|
+ This function create a perfmon2 context. The type of context is per-thread by
|
|
+ default unless PFM_FL_SYSTEM_WIDE is passed in ctx. The sampling format name
|
|
+ is passed in fmt. Arguments to the format are passed in arg which is of size
|
|
+ arg_size. Upon successful return, the file descriptor identifying the context
|
|
+ is returned.
|
|
+
|
|
+ * int pfm_write_pmds(int fd, pfarg_pmd_t *pmds, int n)
|
|
+
|
|
+ This function is used to program the PMD registers. It is possible to pass
|
|
+ vectors of PMDs.
|
|
+
|
|
+ * int pfm_write_pmcs(int fd, pfarg_pmc_t *pmds, int n)
|
|
+
|
|
+ This function is used to program the PMC registers. It is possible to pass
|
|
+ vectors of PMDs.
|
|
+
|
|
+ * int pfm_read_pmds(int fd, pfarg_pmd_t *pmds, int n)
|
|
+
|
|
+ This function is used to read the PMD registers. It is possible to pass
|
|
+ vectors of PMDs.
|
|
+
|
|
+ * int pfm_load_context(int fd, pfarg_load_t *load)
|
|
+
|
|
+ This function is used to attach the context to a thread or CPU.
|
|
+ Thread means kernel-visible thread (NPTL). The thread identification
|
|
+ as obtained by gettid must be passed to load->load_target.
|
|
+
|
|
+ To operate on another thread (not self), it is mandatory that the thread
|
|
+ be stopped via ptrace().
|
|
+
|
|
+ To attach to a CPU, the CPU number must be specified in load->load_target
|
|
+ AND the call must be issued on that CPU. To monitor a CPU, a thread MUST
|
|
+ be pinned on that CPU.
|
|
+
|
|
+ Until the context is attached, the actual counters are not accessed.
|
|
+
|
|
+ * int pfm_unload_context(int fd)
|
|
+
|
|
+ The context is detached for the thread or CPU is was attached to.
|
|
+ As a consequence monitoring is stopped.
|
|
+
|
|
+ When monitoring another thread, the thread MUST be stopped via ptrace()
|
|
+ for this function to succeed.
|
|
+
|
|
+ * int pfm_start(int fd, pfarg_start_t *st)
|
|
+
|
|
+ Start monitoring. The context must be attached for this function to succeed.
|
|
+ Optionally, it is possible to specify the event set on which to start using the
|
|
+ st argument, otherwise just pass NULL.
|
|
+
|
|
+ When monitoring another thread, the thread MUST be stopped via ptrace()
|
|
+ for this function to succeed.
|
|
+
|
|
+ * int pfm_stop(int fd)
|
|
+
|
|
+ Stop monitoring. The context must be attached for this function to succeed.
|
|
+
|
|
+ When monitoring another thread, the thread MUST be stopped via ptrace()
|
|
+ for this function to succeed.
|
|
+
|
|
+
|
|
+ * int pfm_create_evtsets(int fd, pfarg_setdesc_t *sets, int n)
|
|
+
|
|
+ This function is used to create or change event sets. By default set 0 exists.
|
|
+ It is possible to create/change multiple sets in one call.
|
|
+
|
|
+ The context must be detached for this call to succeed.
|
|
+
|
|
+ Sets are identified by a 16-bit integer. They are sorted based on this
|
|
+ set and switching occurs in a round-robin fashion.
|
|
+
|
|
+ * int pfm_delete_evtsets(int fd, pfarg_setdesc_t *sets, int n)
|
|
+
|
|
+ Delete event sets. The context must be detached for this call to succeed.
|
|
+
|
|
+
|
|
+ * int pfm_getinfo_evtsets(int fd, pfarg_setinfo_t *sets, int n)
|
|
+
|
|
+ Retrieve information about event sets. In particular it is possible
|
|
+ to get the number of activation of a set. It is possible to retrieve
|
|
+ information about multiple sets in one call.
|
|
+
|
|
+
|
|
+ * int pfm_restart(int fd)
|
|
+
|
|
+ Indicate to the kernel that the application is done processing an overflow
|
|
+ notification. A consequence of this call could be that monitoring resumes.
|
|
+
|
|
+ * int read(fd, pfm_msg_t *msg, sizeof(pfm_msg_t))
|
|
+
|
|
+ the regular read() system call can be used with the context file descriptor to
|
|
+ receive overflow notification messages. Non-blocking read() is supported.
|
|
+
|
|
+ Each message carry information about the overflow such as which counter overflowed
|
|
+ and where the program was (interrupted instruction pointer).
|
|
+
|
|
+ * int close(int fd)
|
|
+
|
|
+ To destroy a context, the regular close() system call is used.
|
|
+
|
|
+
|
|
+VII/ /sys interface overview
|
|
+
|
|
+ Refer to Documentation/ABI/testing/sysfs-perfmon-* for a detailed description
|
|
+ of the sysfs interface of perfmon2.
|
|
+
|
|
+VIII/ debugfs interface overview
|
|
+
|
|
+ Refer to Documentation/perfmon2-debugfs.txt for a detailed description of the
|
|
+ debug and statistics interface of perfmon2.
|
|
+
|
|
+IX/ Documentation
|
|
+
|
|
+ Visit http://perfmon2.sf.net
|
|
diff --git a/MAINTAINERS b/MAINTAINERS
|
|
index 8dae455..fb38c2a 100644
|
|
--- a/MAINTAINERS
|
|
+++ b/MAINTAINERS
|
|
@@ -3239,6 +3239,14 @@ M: balbir@linux.vnet.ibm.com
|
|
L: linux-kernel@vger.kernel.org
|
|
S: Maintained
|
|
|
|
+PERFMON SUBSYSTEM
|
|
+P: Stephane Eranian
|
|
+M: eranian@gmail.com
|
|
+L: perfmon2-devel@lists.sf.net
|
|
+W: http://perfmon2.sf.net
|
|
+T: git kernel.org:/pub/scm/linux/kernel/git/eranian/linux-2.6
|
|
+S: Maintained
|
|
+
|
|
PERSONALITY HANDLING
|
|
P: Christoph Hellwig
|
|
M: hch@infradead.org
|
|
diff --git a/Makefile b/Makefile
|
|
index 16e3fbb..7bb1320 100644
|
|
--- a/Makefile
|
|
+++ b/Makefile
|
|
@@ -620,6 +620,7 @@ export mod_strip_cmd
|
|
|
|
ifeq ($(KBUILD_EXTMOD),)
|
|
core-y += kernel/ mm/ fs/ ipc/ security/ crypto/ block/
|
|
+core-$(CONFIG_PERFMON) += perfmon/
|
|
|
|
vmlinux-dirs := $(patsubst %/,%,$(filter %/, $(init-y) $(init-m) \
|
|
$(core-y) $(core-m) $(drivers-y) $(drivers-m) \
|
|
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
|
|
index 48e496f..1d79b01 100644
|
|
--- a/arch/ia64/Kconfig
|
|
+++ b/arch/ia64/Kconfig
|
|
@@ -470,14 +470,6 @@ config COMPAT_FOR_U64_ALIGNMENT
|
|
config IA64_MCA_RECOVERY
|
|
tristate "MCA recovery from errors other than TLB."
|
|
|
|
-config PERFMON
|
|
- bool "Performance monitor support"
|
|
- help
|
|
- Selects whether support for the IA-64 performance monitor hardware
|
|
- is included in the kernel. This makes some kernel data-structures a
|
|
- little bigger and slows down execution a bit, but it is generally
|
|
- a good idea to turn this on. If you're unsure, say Y.
|
|
-
|
|
config IA64_PALINFO
|
|
tristate "/proc/pal support"
|
|
help
|
|
@@ -549,6 +541,8 @@ source "drivers/firmware/Kconfig"
|
|
|
|
source "fs/Kconfig.binfmt"
|
|
|
|
+source "arch/ia64/perfmon/Kconfig"
|
|
+
|
|
endmenu
|
|
|
|
menu "Power management and ACPI"
|
|
diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile
|
|
index 905d25b..9aa622d 100644
|
|
--- a/arch/ia64/Makefile
|
|
+++ b/arch/ia64/Makefile
|
|
@@ -57,6 +57,7 @@ core-$(CONFIG_IA64_GENERIC) += arch/ia64/dig/
|
|
core-$(CONFIG_IA64_HP_ZX1) += arch/ia64/dig/
|
|
core-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += arch/ia64/dig/
|
|
core-$(CONFIG_IA64_SGI_SN2) += arch/ia64/sn/
|
|
+core-$(CONFIG_PERFMON) += arch/ia64/perfmon/
|
|
core-$(CONFIG_IA64_SGI_UV) += arch/ia64/uv/
|
|
core-$(CONFIG_KVM) += arch/ia64/kvm/
|
|
|
|
diff --git a/arch/ia64/configs/generic_defconfig b/arch/ia64/configs/generic_defconfig
|
|
index 9f48397..ff9572a 100644
|
|
--- a/arch/ia64/configs/generic_defconfig
|
|
+++ b/arch/ia64/configs/generic_defconfig
|
|
@@ -209,7 +209,6 @@ CONFIG_IA32_SUPPORT=y
|
|
CONFIG_COMPAT=y
|
|
CONFIG_COMPAT_FOR_U64_ALIGNMENT=y
|
|
CONFIG_IA64_MCA_RECOVERY=y
|
|
-CONFIG_PERFMON=y
|
|
CONFIG_IA64_PALINFO=y
|
|
# CONFIG_IA64_MC_ERR_INJECT is not set
|
|
CONFIG_SGI_SN=y
|
|
@@ -234,6 +233,16 @@ CONFIG_BINFMT_ELF=y
|
|
CONFIG_BINFMT_MISC=m
|
|
|
|
#
|
|
+# Hardware Performance Monitoring support
|
|
+#
|
|
+CONFIG_PERFMON=y
|
|
+CONFIG_IA64_PERFMON_COMPAT=y
|
|
+CONFIG_IA64_PERFMON_GENERIC=m
|
|
+CONFIG_IA64_PERFMON_ITANIUM=y
|
|
+CONFIG_IA64_PERFMON_MCKINLEY=y
|
|
+CONFIG_IA64_PERFMON_MONTECITO=y
|
|
+
|
|
+#
|
|
# Power management and ACPI
|
|
#
|
|
CONFIG_PM=y
|
|
diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild
|
|
index ccbe8ae..cf64b3b 100644
|
|
--- a/arch/ia64/include/asm/Kbuild
|
|
+++ b/arch/ia64/include/asm/Kbuild
|
|
@@ -5,10 +5,12 @@ header-y += fpu.h
|
|
header-y += fpswa.h
|
|
header-y += ia64regs.h
|
|
header-y += intel_intrin.h
|
|
-header-y += perfmon_default_smpl.h
|
|
header-y += ptrace_offsets.h
|
|
header-y += rse.h
|
|
header-y += ucontext.h
|
|
+header-y += perfmon.h
|
|
+header-y += perfmon_compat.h
|
|
+header-y += perfmon_default_smpl.h
|
|
|
|
unifdef-y += gcc_intrin.h
|
|
unifdef-y += intrinsics.h
|
|
diff --git a/arch/ia64/include/asm/hw_irq.h b/arch/ia64/include/asm/hw_irq.h
|
|
index 5c99cbc..4a45cb0 100644
|
|
--- a/arch/ia64/include/asm/hw_irq.h
|
|
+++ b/arch/ia64/include/asm/hw_irq.h
|
|
@@ -67,9 +67,9 @@ extern int ia64_last_device_vector;
|
|
#define IA64_NUM_DEVICE_VECTORS (IA64_LAST_DEVICE_VECTOR - IA64_FIRST_DEVICE_VECTOR + 1)
|
|
|
|
#define IA64_MCA_RENDEZ_VECTOR 0xe8 /* MCA rendez interrupt */
|
|
-#define IA64_PERFMON_VECTOR 0xee /* performance monitor interrupt vector */
|
|
#define IA64_TIMER_VECTOR 0xef /* use highest-prio group 15 interrupt for timer */
|
|
#define IA64_MCA_WAKEUP_VECTOR 0xf0 /* MCA wakeup (must be >MCA_RENDEZ_VECTOR) */
|
|
+#define IA64_PERFMON_VECTOR 0xf1 /* performance monitor interrupt vector */
|
|
#define IA64_IPI_LOCAL_TLB_FLUSH 0xfc /* SMP flush local TLB */
|
|
#define IA64_IPI_RESCHEDULE 0xfd /* SMP reschedule */
|
|
#define IA64_IPI_VECTOR 0xfe /* inter-processor interrupt vector */
|
|
diff --git a/arch/ia64/include/asm/perfmon.h b/arch/ia64/include/asm/perfmon.h
|
|
index 7f3333d..150c4b4 100644
|
|
--- a/arch/ia64/include/asm/perfmon.h
|
|
+++ b/arch/ia64/include/asm/perfmon.h
|
|
@@ -1,279 +1,59 @@
|
|
/*
|
|
- * Copyright (C) 2001-2003 Hewlett-Packard Co
|
|
- * Stephane Eranian <eranian@hpl.hp.com>
|
|
- */
|
|
-
|
|
-#ifndef _ASM_IA64_PERFMON_H
|
|
-#define _ASM_IA64_PERFMON_H
|
|
-
|
|
-/*
|
|
- * perfmon comamnds supported on all CPU models
|
|
- */
|
|
-#define PFM_WRITE_PMCS 0x01
|
|
-#define PFM_WRITE_PMDS 0x02
|
|
-#define PFM_READ_PMDS 0x03
|
|
-#define PFM_STOP 0x04
|
|
-#define PFM_START 0x05
|
|
-#define PFM_ENABLE 0x06 /* obsolete */
|
|
-#define PFM_DISABLE 0x07 /* obsolete */
|
|
-#define PFM_CREATE_CONTEXT 0x08
|
|
-#define PFM_DESTROY_CONTEXT 0x09 /* obsolete use close() */
|
|
-#define PFM_RESTART 0x0a
|
|
-#define PFM_PROTECT_CONTEXT 0x0b /* obsolete */
|
|
-#define PFM_GET_FEATURES 0x0c
|
|
-#define PFM_DEBUG 0x0d
|
|
-#define PFM_UNPROTECT_CONTEXT 0x0e /* obsolete */
|
|
-#define PFM_GET_PMC_RESET_VAL 0x0f
|
|
-#define PFM_LOAD_CONTEXT 0x10
|
|
-#define PFM_UNLOAD_CONTEXT 0x11
|
|
-
|
|
-/*
|
|
- * PMU model specific commands (may not be supported on all PMU models)
|
|
- */
|
|
-#define PFM_WRITE_IBRS 0x20
|
|
-#define PFM_WRITE_DBRS 0x21
|
|
-
|
|
-/*
|
|
- * context flags
|
|
- */
|
|
-#define PFM_FL_NOTIFY_BLOCK 0x01 /* block task on user level notifications */
|
|
-#define PFM_FL_SYSTEM_WIDE 0x02 /* create a system wide context */
|
|
-#define PFM_FL_OVFL_NO_MSG 0x80 /* do not post overflow/end messages for notification */
|
|
-
|
|
-/*
|
|
- * event set flags
|
|
- */
|
|
-#define PFM_SETFL_EXCL_IDLE 0x01 /* exclude idle task (syswide only) XXX: DO NOT USE YET */
|
|
-
|
|
-/*
|
|
- * PMC flags
|
|
- */
|
|
-#define PFM_REGFL_OVFL_NOTIFY 0x1 /* send notification on overflow */
|
|
-#define PFM_REGFL_RANDOM 0x2 /* randomize sampling interval */
|
|
-
|
|
-/*
|
|
- * PMD/PMC/IBR/DBR return flags (ignored on input)
|
|
+ * Copyright (c) 2001-2007 Hewlett-Packard Development Company, L.P.
|
|
+ * Contributed by Stephane Eranian <eranian@hpl.hp.com>
|
|
*
|
|
- * Those flags are used on output and must be checked in case EAGAIN is returned
|
|
- * by any of the calls using a pfarg_reg_t or pfarg_dbreg_t structure.
|
|
- */
|
|
-#define PFM_REG_RETFL_NOTAVAIL (1UL<<31) /* set if register is implemented but not available */
|
|
-#define PFM_REG_RETFL_EINVAL (1UL<<30) /* set if register entry is invalid */
|
|
-#define PFM_REG_RETFL_MASK (PFM_REG_RETFL_NOTAVAIL|PFM_REG_RETFL_EINVAL)
|
|
-
|
|
-#define PFM_REG_HAS_ERROR(flag) (((flag) & PFM_REG_RETFL_MASK) != 0)
|
|
-
|
|
-typedef unsigned char pfm_uuid_t[16]; /* custom sampling buffer identifier type */
|
|
-
|
|
-/*
|
|
- * Request structure used to define a context
|
|
- */
|
|
-typedef struct {
|
|
- pfm_uuid_t ctx_smpl_buf_id; /* which buffer format to use (if needed) */
|
|
- unsigned long ctx_flags; /* noblock/block */
|
|
- unsigned short ctx_nextra_sets; /* number of extra event sets (you always get 1) */
|
|
- unsigned short ctx_reserved1; /* for future use */
|
|
- int ctx_fd; /* return arg: unique identification for context */
|
|
- void *ctx_smpl_vaddr; /* return arg: virtual address of sampling buffer, is used */
|
|
- unsigned long ctx_reserved2[11];/* for future use */
|
|
-} pfarg_context_t;
|
|
-
|
|
-/*
|
|
- * Request structure used to write/read a PMC or PMD
|
|
- */
|
|
-typedef struct {
|
|
- unsigned int reg_num; /* which register */
|
|
- unsigned short reg_set; /* event set for this register */
|
|
- unsigned short reg_reserved1; /* for future use */
|
|
-
|
|
- unsigned long reg_value; /* initial pmc/pmd value */
|
|
- unsigned long reg_flags; /* input: pmc/pmd flags, return: reg error */
|
|
-
|
|
- unsigned long reg_long_reset; /* reset after buffer overflow notification */
|
|
- unsigned long reg_short_reset; /* reset after counter overflow */
|
|
-
|
|
- unsigned long reg_reset_pmds[4]; /* which other counters to reset on overflow */
|
|
- unsigned long reg_random_seed; /* seed value when randomization is used */
|
|
- unsigned long reg_random_mask; /* bitmask used to limit random value */
|
|
- unsigned long reg_last_reset_val;/* return: PMD last reset value */
|
|
-
|
|
- unsigned long reg_smpl_pmds[4]; /* which pmds are accessed when PMC overflows */
|
|
- unsigned long reg_smpl_eventid; /* opaque sampling event identifier */
|
|
-
|
|
- unsigned long reg_reserved2[3]; /* for future use */
|
|
-} pfarg_reg_t;
|
|
-
|
|
-typedef struct {
|
|
- unsigned int dbreg_num; /* which debug register */
|
|
- unsigned short dbreg_set; /* event set for this register */
|
|
- unsigned short dbreg_reserved1; /* for future use */
|
|
- unsigned long dbreg_value; /* value for debug register */
|
|
- unsigned long dbreg_flags; /* return: dbreg error */
|
|
- unsigned long dbreg_reserved2[1]; /* for future use */
|
|
-} pfarg_dbreg_t;
|
|
-
|
|
-typedef struct {
|
|
- unsigned int ft_version; /* perfmon: major [16-31], minor [0-15] */
|
|
- unsigned int ft_reserved; /* reserved for future use */
|
|
- unsigned long reserved[4]; /* for future use */
|
|
-} pfarg_features_t;
|
|
-
|
|
-typedef struct {
|
|
- pid_t load_pid; /* process to load the context into */
|
|
- unsigned short load_set; /* first event set to load */
|
|
- unsigned short load_reserved1; /* for future use */
|
|
- unsigned long load_reserved2[3]; /* for future use */
|
|
-} pfarg_load_t;
|
|
-
|
|
-typedef struct {
|
|
- int msg_type; /* generic message header */
|
|
- int msg_ctx_fd; /* generic message header */
|
|
- unsigned long msg_ovfl_pmds[4]; /* which PMDs overflowed */
|
|
- unsigned short msg_active_set; /* active set at the time of overflow */
|
|
- unsigned short msg_reserved1; /* for future use */
|
|
- unsigned int msg_reserved2; /* for future use */
|
|
- unsigned long msg_tstamp; /* for perf tuning/debug */
|
|
-} pfm_ovfl_msg_t;
|
|
-
|
|
-typedef struct {
|
|
- int msg_type; /* generic message header */
|
|
- int msg_ctx_fd; /* generic message header */
|
|
- unsigned long msg_tstamp; /* for perf tuning */
|
|
-} pfm_end_msg_t;
|
|
-
|
|
-typedef struct {
|
|
- int msg_type; /* type of the message */
|
|
- int msg_ctx_fd; /* unique identifier for the context */
|
|
- unsigned long msg_tstamp; /* for perf tuning */
|
|
-} pfm_gen_msg_t;
|
|
-
|
|
-#define PFM_MSG_OVFL 1 /* an overflow happened */
|
|
-#define PFM_MSG_END 2 /* task to which context was attached ended */
|
|
-
|
|
-typedef union {
|
|
- pfm_ovfl_msg_t pfm_ovfl_msg;
|
|
- pfm_end_msg_t pfm_end_msg;
|
|
- pfm_gen_msg_t pfm_gen_msg;
|
|
-} pfm_msg_t;
|
|
-
|
|
-/*
|
|
- * Define the version numbers for both perfmon as a whole and the sampling buffer format.
|
|
+ * This file contains Itanium Processor Family specific definitions
|
|
+ * for the perfmon interface.
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of version 2 of the GNU General Public
|
|
+ * License as published by the Free Software Foundation.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ * General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
+ * 02111-1307 USA
|
|
*/
|
|
-#define PFM_VERSION_MAJ 2U
|
|
-#define PFM_VERSION_MIN 0U
|
|
-#define PFM_VERSION (((PFM_VERSION_MAJ&0xffff)<<16)|(PFM_VERSION_MIN & 0xffff))
|
|
-#define PFM_VERSION_MAJOR(x) (((x)>>16) & 0xffff)
|
|
-#define PFM_VERSION_MINOR(x) ((x) & 0xffff)
|
|
-
|
|
+#ifndef _ASM_IA64_PERFMON_H_
|
|
+#define _ASM_IA64_PERFMON_H_
|
|
|
|
/*
|
|
- * miscellaneous architected definitions
|
|
+ * arch-specific user visible interface definitions
|
|
*/
|
|
-#define PMU_FIRST_COUNTER 4 /* first counting monitor (PMC/PMD) */
|
|
-#define PMU_MAX_PMCS 256 /* maximum architected number of PMC registers */
|
|
-#define PMU_MAX_PMDS 256 /* maximum architected number of PMD registers */
|
|
-
|
|
-#ifdef __KERNEL__
|
|
-
|
|
-extern long perfmonctl(int fd, int cmd, void *arg, int narg);
|
|
-
|
|
-typedef struct {
|
|
- void (*handler)(int irq, void *arg, struct pt_regs *regs);
|
|
-} pfm_intr_handler_desc_t;
|
|
-
|
|
-extern void pfm_save_regs (struct task_struct *);
|
|
-extern void pfm_load_regs (struct task_struct *);
|
|
|
|
-extern void pfm_exit_thread(struct task_struct *);
|
|
-extern int pfm_use_debug_registers(struct task_struct *);
|
|
-extern int pfm_release_debug_registers(struct task_struct *);
|
|
-extern void pfm_syst_wide_update_task(struct task_struct *, unsigned long info, int is_ctxswin);
|
|
-extern void pfm_inherit(struct task_struct *task, struct pt_regs *regs);
|
|
-extern void pfm_init_percpu(void);
|
|
-extern void pfm_handle_work(void);
|
|
-extern int pfm_install_alt_pmu_interrupt(pfm_intr_handler_desc_t *h);
|
|
-extern int pfm_remove_alt_pmu_interrupt(pfm_intr_handler_desc_t *h);
|
|
+#define PFM_ARCH_MAX_PMCS (256+64)
|
|
+#define PFM_ARCH_MAX_PMDS (256+64)
|
|
|
|
-
|
|
-
|
|
-/*
|
|
- * Reset PMD register flags
|
|
- */
|
|
-#define PFM_PMD_SHORT_RESET 0
|
|
-#define PFM_PMD_LONG_RESET 1
|
|
-
|
|
-typedef union {
|
|
- unsigned int val;
|
|
- struct {
|
|
- unsigned int notify_user:1; /* notify user program of overflow */
|
|
- unsigned int reset_ovfl_pmds:1; /* reset overflowed PMDs */
|
|
- unsigned int block_task:1; /* block monitored task on kernel exit */
|
|
- unsigned int mask_monitoring:1; /* mask monitors via PMCx.plm */
|
|
- unsigned int reserved:28; /* for future use */
|
|
- } bits;
|
|
-} pfm_ovfl_ctrl_t;
|
|
-
|
|
-typedef struct {
|
|
- unsigned char ovfl_pmd; /* index of overflowed PMD */
|
|
- unsigned char ovfl_notify; /* =1 if monitor requested overflow notification */
|
|
- unsigned short active_set; /* event set active at the time of the overflow */
|
|
- pfm_ovfl_ctrl_t ovfl_ctrl; /* return: perfmon controls to set by handler */
|
|
-
|
|
- unsigned long pmd_last_reset; /* last reset value of of the PMD */
|
|
- unsigned long smpl_pmds[4]; /* bitmask of other PMD of interest on overflow */
|
|
- unsigned long smpl_pmds_values[PMU_MAX_PMDS]; /* values for the other PMDs of interest */
|
|
- unsigned long pmd_value; /* current 64-bit value of the PMD */
|
|
- unsigned long pmd_eventid; /* eventid associated with PMD */
|
|
-} pfm_ovfl_arg_t;
|
|
-
|
|
-
|
|
-typedef struct {
|
|
- char *fmt_name;
|
|
- pfm_uuid_t fmt_uuid;
|
|
- size_t fmt_arg_size;
|
|
- unsigned long fmt_flags;
|
|
-
|
|
- int (*fmt_validate)(struct task_struct *task, unsigned int flags, int cpu, void *arg);
|
|
- int (*fmt_getsize)(struct task_struct *task, unsigned int flags, int cpu, void *arg, unsigned long *size);
|
|
- int (*fmt_init)(struct task_struct *task, void *buf, unsigned int flags, int cpu, void *arg);
|
|
- int (*fmt_handler)(struct task_struct *task, void *buf, pfm_ovfl_arg_t *arg, struct pt_regs *regs, unsigned long stamp);
|
|
- int (*fmt_restart)(struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs);
|
|
- int (*fmt_restart_active)(struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs);
|
|
- int (*fmt_exit)(struct task_struct *task, void *buf, struct pt_regs *regs);
|
|
-
|
|
- struct list_head fmt_list;
|
|
-} pfm_buffer_fmt_t;
|
|
-
|
|
-extern int pfm_register_buffer_fmt(pfm_buffer_fmt_t *fmt);
|
|
-extern int pfm_unregister_buffer_fmt(pfm_uuid_t uuid);
|
|
+#define PFM_ARCH_PMD_STK_ARG 8
|
|
+#define PFM_ARCH_PMC_STK_ARG 8
|
|
|
|
/*
|
|
- * perfmon interface exported to modules
|
|
+ * Itanium specific context flags
|
|
+ *
|
|
+ * bits[00-15]: generic flags (see asm/perfmon.h)
|
|
+ * bits[16-31]: arch-specific flags
|
|
*/
|
|
-extern int pfm_mod_read_pmds(struct task_struct *, void *req, unsigned int nreq, struct pt_regs *regs);
|
|
-extern int pfm_mod_write_pmcs(struct task_struct *, void *req, unsigned int nreq, struct pt_regs *regs);
|
|
-extern int pfm_mod_write_ibrs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs);
|
|
-extern int pfm_mod_write_dbrs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs);
|
|
+#define PFM_ITA_FL_INSECURE 0x10000 /* clear psr.sp on non system, non self */
|
|
|
|
/*
|
|
- * describe the content of the local_cpu_date->pfm_syst_info field
|
|
+ * Itanium specific public event set flags (set_flags)
|
|
+ *
|
|
+ * event set flags layout:
|
|
+ * bits[00-15] : generic flags
|
|
+ * bits[16-31] : arch-specific flags
|
|
*/
|
|
-#define PFM_CPUINFO_SYST_WIDE 0x1 /* if set a system wide session exists */
|
|
-#define PFM_CPUINFO_DCR_PP 0x2 /* if set the system wide session has started */
|
|
-#define PFM_CPUINFO_EXCL_IDLE 0x4 /* the system wide session excludes the idle task */
|
|
+#define PFM_ITA_SETFL_EXCL_INTR 0x10000 /* exclude interrupt execution */
|
|
+#define PFM_ITA_SETFL_INTR_ONLY 0x20000 /* include only interrupt execution */
|
|
+#define PFM_ITA_SETFL_IDLE_EXCL 0x40000 /* stop monitoring in idle loop */
|
|
|
|
/*
|
|
- * sysctl control structure. visible to sampling formats
|
|
+ * compatibility for version v2.0 of the interface
|
|
*/
|
|
-typedef struct {
|
|
- int debug; /* turn on/off debugging via syslog */
|
|
- int debug_ovfl; /* turn on/off debug printk in overflow handler */
|
|
- int fastctxsw; /* turn on/off fast (unsecure) ctxsw */
|
|
- int expert_mode; /* turn on/off value checking */
|
|
-} pfm_sysctl_t;
|
|
-extern pfm_sysctl_t pfm_sysctl;
|
|
-
|
|
-
|
|
-#endif /* __KERNEL__ */
|
|
+#include <asm/perfmon_compat.h>
|
|
|
|
-#endif /* _ASM_IA64_PERFMON_H */
|
|
+#endif /* _ASM_IA64_PERFMON_H_ */
|
|
diff --git a/arch/ia64/include/asm/perfmon_compat.h b/arch/ia64/include/asm/perfmon_compat.h
|
|
new file mode 100644
|
|
index 0000000..5c14514
|
|
--- /dev/null
|
|
+++ b/arch/ia64/include/asm/perfmon_compat.h
|
|
@@ -0,0 +1,167 @@
|
|
+/*
|
|
+ * Copyright (c) 2001-2006 Hewlett-Packard Development Company, L.P.
|
|
+ * Contributed by Stephane Eranian <eranian@hpl.hp.com>
|
|
+ *
|
|
+ * This header file contains perfmon interface definition
|
|
+ * that are now obsolete and should be dropped in favor
|
|
+ * of their equivalent functions as explained below.
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of version 2 of the GNU General Public
|
|
+ * License as published by the Free Software Foundation.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ * General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
+ * 02111-1307 USA
|
|
+ */
|
|
+
|
|
+#ifndef _ASM_IA64_PERFMON_COMPAT_H_
|
|
+#define _ASM_IA64_PERFMON_COMPAT_H_
|
|
+
|
|
+/*
|
|
+ * custom sampling buffer identifier type
|
|
+ */
|
|
+typedef __u8 pfm_uuid_t[16];
|
|
+
|
|
+/*
|
|
+ * obsolete perfmon commands. Supported only on IA-64 for
|
|
+ * backward compatiblity reasons with perfmon v2.0.
|
|
+ */
|
|
+#define PFM_WRITE_PMCS 0x01 /* use pfm_write_pmcs */
|
|
+#define PFM_WRITE_PMDS 0x02 /* use pfm_write_pmds */
|
|
+#define PFM_READ_PMDS 0x03 /* use pfm_read_pmds */
|
|
+#define PFM_STOP 0x04 /* use pfm_stop */
|
|
+#define PFM_START 0x05 /* use pfm_start */
|
|
+#define PFM_ENABLE 0x06 /* obsolete */
|
|
+#define PFM_DISABLE 0x07 /* obsolete */
|
|
+#define PFM_CREATE_CONTEXT 0x08 /* use pfm_create_context */
|
|
+#define PFM_DESTROY_CONTEXT 0x09 /* use close() */
|
|
+#define PFM_RESTART 0x0a /* use pfm_restart */
|
|
+#define PFM_PROTECT_CONTEXT 0x0b /* obsolete */
|
|
+#define PFM_GET_FEATURES 0x0c /* use /proc/sys/perfmon */
|
|
+#define PFM_DEBUG 0x0d /* /proc/sys/kernel/perfmon/debug */
|
|
+#define PFM_UNPROTECT_CONTEXT 0x0e /* obsolete */
|
|
+#define PFM_GET_PMC_RESET_VAL 0x0f /* use /proc/perfmon_map */
|
|
+#define PFM_LOAD_CONTEXT 0x10 /* use pfm_load_context */
|
|
+#define PFM_UNLOAD_CONTEXT 0x11 /* use pfm_unload_context */
|
|
+
|
|
+/*
|
|
+ * PMU model specific commands (may not be supported on all PMU models)
|
|
+ */
|
|
+#define PFM_WRITE_IBRS 0x20 /* obsolete: use PFM_WRITE_PMCS[256-263]*/
|
|
+#define PFM_WRITE_DBRS 0x21 /* obsolete: use PFM_WRITE_PMCS[264-271]*/
|
|
+
|
|
+/*
|
|
+ * argument to PFM_CREATE_CONTEXT
|
|
+ */
|
|
+struct pfarg_context {
|
|
+ pfm_uuid_t ctx_smpl_buf_id; /* buffer format to use */
|
|
+ unsigned long ctx_flags; /* noblock/block */
|
|
+ unsigned int ctx_reserved1; /* for future use */
|
|
+ int ctx_fd; /* return: fildesc */
|
|
+ void *ctx_smpl_vaddr; /* return: vaddr of buffer */
|
|
+ unsigned long ctx_reserved3[11];/* for future use */
|
|
+};
|
|
+
|
|
+/*
|
|
+ * argument structure for PFM_WRITE_PMCS/PFM_WRITE_PMDS/PFM_WRITE_PMDS
|
|
+ */
|
|
+struct pfarg_reg {
|
|
+ unsigned int reg_num; /* which register */
|
|
+ unsigned short reg_set; /* event set for this register */
|
|
+ unsigned short reg_reserved1; /* for future use */
|
|
+
|
|
+ unsigned long reg_value; /* initial pmc/pmd value */
|
|
+ unsigned long reg_flags; /* input: flags, ret: error */
|
|
+
|
|
+ unsigned long reg_long_reset; /* reset value after notification */
|
|
+ unsigned long reg_short_reset; /* reset after counter overflow */
|
|
+
|
|
+ unsigned long reg_reset_pmds[4]; /* registers to reset on overflow */
|
|
+ unsigned long reg_random_seed; /* seed for randomization */
|
|
+ unsigned long reg_random_mask; /* random range limit */
|
|
+ unsigned long reg_last_reset_val;/* return: PMD last reset value */
|
|
+
|
|
+ unsigned long reg_smpl_pmds[4]; /* pmds to be saved on overflow */
|
|
+ unsigned long reg_smpl_eventid; /* opaque sampling event id */
|
|
+ unsigned long reg_ovfl_switch_cnt;/* #overflows to switch */
|
|
+
|
|
+ unsigned long reg_reserved2[2]; /* for future use */
|
|
+};
|
|
+
|
|
+/*
|
|
+ * argument to PFM_WRITE_IBRS/PFM_WRITE_DBRS
|
|
+ */
|
|
+struct pfarg_dbreg {
|
|
+ unsigned int dbreg_num; /* which debug register */
|
|
+ unsigned short dbreg_set; /* event set */
|
|
+ unsigned short dbreg_reserved1; /* for future use */
|
|
+ unsigned long dbreg_value; /* value for debug register */
|
|
+ unsigned long dbreg_flags; /* return: dbreg error */
|
|
+ unsigned long dbreg_reserved2[1]; /* for future use */
|
|
+};
|
|
+
|
|
+/*
|
|
+ * argument to PFM_GET_FEATURES
|
|
+ */
|
|
+struct pfarg_features {
|
|
+ unsigned int ft_version; /* major [16-31], minor [0-15] */
|
|
+ unsigned int ft_reserved; /* reserved for future use */
|
|
+ unsigned long reserved[4]; /* for future use */
|
|
+};
|
|
+
|
|
+typedef struct {
|
|
+ int msg_type; /* generic message header */
|
|
+ int msg_ctx_fd; /* generic message header */
|
|
+ unsigned long msg_ovfl_pmds[4]; /* which PMDs overflowed */
|
|
+ unsigned short msg_active_set; /* active set on overflow */
|
|
+ unsigned short msg_reserved1; /* for future use */
|
|
+ unsigned int msg_reserved2; /* for future use */
|
|
+ unsigned long msg_tstamp; /* for perf tuning/debug */
|
|
+} pfm_ovfl_msg_t;
|
|
+
|
|
+typedef struct {
|
|
+ int msg_type; /* generic message header */
|
|
+ int msg_ctx_fd; /* generic message header */
|
|
+ unsigned long msg_tstamp; /* for perf tuning */
|
|
+} pfm_end_msg_t;
|
|
+
|
|
+typedef struct {
|
|
+ int msg_type; /* type of the message */
|
|
+ int msg_ctx_fd; /* context file descriptor */
|
|
+ unsigned long msg_tstamp; /* for perf tuning */
|
|
+} pfm_gen_msg_t;
|
|
+
|
|
+typedef union {
|
|
+ int type;
|
|
+ pfm_ovfl_msg_t pfm_ovfl_msg;
|
|
+ pfm_end_msg_t pfm_end_msg;
|
|
+ pfm_gen_msg_t pfm_gen_msg;
|
|
+} pfm_msg_t;
|
|
+
|
|
+/*
|
|
+ * PMD/PMC return flags in case of error (ignored on input)
|
|
+ *
|
|
+ * reg_flags layout:
|
|
+ * bit 00-15 : generic flags
|
|
+ * bits[16-23] : arch-specific flags (see asm/perfmon.h)
|
|
+ * bit 24-31 : error codes
|
|
+ *
|
|
+ * Those flags are used on output and must be checked in case EINVAL is
|
|
+ * returned by a command accepting a vector of values and each has a flag
|
|
+ * field, such as pfarg_reg or pfarg_reg
|
|
+ */
|
|
+#define PFM_REG_RETFL_NOTAVAIL (1<<31) /* not implemented or unaccessible */
|
|
+#define PFM_REG_RETFL_EINVAL (1<<30) /* entry is invalid */
|
|
+#define PFM_REG_RETFL_MASK (PFM_REG_RETFL_NOTAVAIL|\
|
|
+ PFM_REG_RETFL_EINVAL)
|
|
+
|
|
+#define PFM_REG_HAS_ERROR(flag) (((flag) & PFM_REG_RETFL_MASK) != 0)
|
|
+
|
|
+#endif /* _ASM_IA64_PERFMON_COMPAT_H_ */
|
|
diff --git a/arch/ia64/include/asm/perfmon_default_smpl.h b/arch/ia64/include/asm/perfmon_default_smpl.h
|
|
index 48822c0..8234f32 100644
|
|
--- a/arch/ia64/include/asm/perfmon_default_smpl.h
|
|
+++ b/arch/ia64/include/asm/perfmon_default_smpl.h
|
|
@@ -1,83 +1,106 @@
|
|
/*
|
|
- * Copyright (C) 2002-2003 Hewlett-Packard Co
|
|
- * Stephane Eranian <eranian@hpl.hp.com>
|
|
+ * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P.
|
|
+ * Contributed by Stephane Eranian <eranian@hpl.hp.com>
|
|
*
|
|
- * This file implements the default sampling buffer format
|
|
- * for Linux/ia64 perfmon subsystem.
|
|
+ * This file implements the old default sampling buffer format
|
|
+ * for the perfmon2 subsystem. For IA-64 only.
|
|
+ *
|
|
+ * It requires the use of the perfmon_compat.h header. It is recommended
|
|
+ * that applications be ported to the new format instead.
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of version 2 of the GNU General Public
|
|
+ * License as published by the Free Software Foundation.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ * General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
+ * 02111-1307 USA
|
|
*/
|
|
-#ifndef __PERFMON_DEFAULT_SMPL_H__
|
|
-#define __PERFMON_DEFAULT_SMPL_H__ 1
|
|
+#ifndef __ASM_IA64_PERFMON_DEFAULT_SMPL_H__
|
|
+#define __ASM_IA64_PERFMON_DEFAULT_SMPL_H__ 1
|
|
+
|
|
+#ifndef __ia64__
|
|
+#error "this file must be used for compatibility reasons only on IA-64"
|
|
+#endif
|
|
|
|
#define PFM_DEFAULT_SMPL_UUID { \
|
|
- 0x4d, 0x72, 0xbe, 0xc0, 0x06, 0x64, 0x41, 0x43, 0x82, 0xb4, 0xd3, 0xfd, 0x27, 0x24, 0x3c, 0x97}
|
|
+ 0x4d, 0x72, 0xbe, 0xc0, 0x06, 0x64, 0x41, 0x43, 0x82,\
|
|
+ 0xb4, 0xd3, 0xfd, 0x27, 0x24, 0x3c, 0x97}
|
|
|
|
/*
|
|
* format specific parameters (passed at context creation)
|
|
*/
|
|
-typedef struct {
|
|
+struct pfm_default_smpl_arg {
|
|
unsigned long buf_size; /* size of the buffer in bytes */
|
|
unsigned int flags; /* buffer specific flags */
|
|
unsigned int res1; /* for future use */
|
|
unsigned long reserved[2]; /* for future use */
|
|
-} pfm_default_smpl_arg_t;
|
|
+};
|
|
|
|
/*
|
|
* combined context+format specific structure. Can be passed
|
|
- * to PFM_CONTEXT_CREATE
|
|
+ * to PFM_CONTEXT_CREATE (not PFM_CONTEXT_CREATE2)
|
|
*/
|
|
-typedef struct {
|
|
- pfarg_context_t ctx_arg;
|
|
- pfm_default_smpl_arg_t buf_arg;
|
|
-} pfm_default_smpl_ctx_arg_t;
|
|
+struct pfm_default_smpl_ctx_arg {
|
|
+ struct pfarg_context ctx_arg;
|
|
+ struct pfm_default_smpl_arg buf_arg;
|
|
+};
|
|
|
|
/*
|
|
* This header is at the beginning of the sampling buffer returned to the user.
|
|
* It is directly followed by the first record.
|
|
*/
|
|
-typedef struct {
|
|
- unsigned long hdr_count; /* how many valid entries */
|
|
- unsigned long hdr_cur_offs; /* current offset from top of buffer */
|
|
- unsigned long hdr_reserved2; /* reserved for future use */
|
|
+struct pfm_default_smpl_hdr {
|
|
+ u64 hdr_count; /* how many valid entries */
|
|
+ u64 hdr_cur_offs; /* current offset from top of buffer */
|
|
+ u64 dr_reserved2; /* reserved for future use */
|
|
|
|
- unsigned long hdr_overflows; /* how many times the buffer overflowed */
|
|
- unsigned long hdr_buf_size; /* how many bytes in the buffer */
|
|
+ u64 hdr_overflows; /* how many times the buffer overflowed */
|
|
+ u64 hdr_buf_size; /* how many bytes in the buffer */
|
|
|
|
- unsigned int hdr_version; /* contains perfmon version (smpl format diffs) */
|
|
- unsigned int hdr_reserved1; /* for future use */
|
|
- unsigned long hdr_reserved[10]; /* for future use */
|
|
-} pfm_default_smpl_hdr_t;
|
|
+ u32 hdr_version; /* smpl format version*/
|
|
+ u32 hdr_reserved1; /* for future use */
|
|
+ u64 hdr_reserved[10]; /* for future use */
|
|
+};
|
|
|
|
/*
|
|
* Entry header in the sampling buffer. The header is directly followed
|
|
- * with the values of the PMD registers of interest saved in increasing
|
|
- * index order: PMD4, PMD5, and so on. How many PMDs are present depends
|
|
+ * with the values of the PMD registers of interest saved in increasing
|
|
+ * index order: PMD4, PMD5, and so on. How many PMDs are present depends
|
|
* on how the session was programmed.
|
|
*
|
|
* In the case where multiple counters overflow at the same time, multiple
|
|
* entries are written consecutively.
|
|
*
|
|
- * last_reset_value member indicates the initial value of the overflowed PMD.
|
|
+ * last_reset_value member indicates the initial value of the overflowed PMD.
|
|
*/
|
|
-typedef struct {
|
|
- int pid; /* thread id (for NPTL, this is gettid()) */
|
|
- unsigned char reserved1[3]; /* reserved for future use */
|
|
- unsigned char ovfl_pmd; /* index of overflowed PMD */
|
|
-
|
|
- unsigned long last_reset_val; /* initial value of overflowed PMD */
|
|
- unsigned long ip; /* where did the overflow interrupt happened */
|
|
- unsigned long tstamp; /* ar.itc when entering perfmon intr. handler */
|
|
-
|
|
- unsigned short cpu; /* cpu on which the overfow occured */
|
|
- unsigned short set; /* event set active when overflow ocurred */
|
|
- int tgid; /* thread group id (for NPTL, this is getpid()) */
|
|
-} pfm_default_smpl_entry_t;
|
|
+struct pfm_default_smpl_entry {
|
|
+ pid_t pid; /* thread id (for NPTL, this is gettid()) */
|
|
+ uint8_t reserved1[3]; /* for future use */
|
|
+ uint8_t ovfl_pmd; /* overflow pmd for this sample */
|
|
+ u64 last_reset_val; /* initial value of overflowed PMD */
|
|
+ unsigned long ip; /* where did the overflow interrupt happened */
|
|
+ u64 tstamp; /* overflow timetamp */
|
|
+ u16 cpu; /* cpu on which the overfow occured */
|
|
+ u16 set; /* event set active when overflow ocurred */
|
|
+ pid_t tgid; /* thread group id (for NPTL, this is getpid()) */
|
|
+};
|
|
|
|
-#define PFM_DEFAULT_MAX_PMDS 64 /* how many pmds supported by data structures (sizeof(unsigned long) */
|
|
-#define PFM_DEFAULT_MAX_ENTRY_SIZE (sizeof(pfm_default_smpl_entry_t)+(sizeof(unsigned long)*PFM_DEFAULT_MAX_PMDS))
|
|
-#define PFM_DEFAULT_SMPL_MIN_BUF_SIZE (sizeof(pfm_default_smpl_hdr_t)+PFM_DEFAULT_MAX_ENTRY_SIZE)
|
|
+#define PFM_DEFAULT_MAX_PMDS 64 /* #pmds supported */
|
|
+#define PFM_DEFAULT_MAX_ENTRY_SIZE (sizeof(struct pfm_default_smpl_entry)+\
|
|
+ (sizeof(u64)*PFM_DEFAULT_MAX_PMDS))
|
|
+#define PFM_DEFAULT_SMPL_MIN_BUF_SIZE (sizeof(struct pfm_default_smpl_hdr)+\
|
|
+ PFM_DEFAULT_MAX_ENTRY_SIZE)
|
|
|
|
#define PFM_DEFAULT_SMPL_VERSION_MAJ 2U
|
|
-#define PFM_DEFAULT_SMPL_VERSION_MIN 0U
|
|
-#define PFM_DEFAULT_SMPL_VERSION (((PFM_DEFAULT_SMPL_VERSION_MAJ&0xffff)<<16)|(PFM_DEFAULT_SMPL_VERSION_MIN & 0xffff))
|
|
+#define PFM_DEFAULT_SMPL_VERSION_MIN 1U
|
|
+#define PFM_DEFAULT_SMPL_VERSION (((PFM_DEFAULT_SMPL_VERSION_MAJ&0xffff)<<16)|\
|
|
+ (PFM_DEFAULT_SMPL_VERSION_MIN & 0xffff))
|
|
|
|
-#endif /* __PERFMON_DEFAULT_SMPL_H__ */
|
|
+#endif /* __ASM_IA64_PERFMON_DEFAULT_SMPL_H__ */
|
|
diff --git a/arch/ia64/include/asm/perfmon_kern.h b/arch/ia64/include/asm/perfmon_kern.h
|
|
new file mode 100644
|
|
index 0000000..fb40459
|
|
--- /dev/null
|
|
+++ b/arch/ia64/include/asm/perfmon_kern.h
|
|
@@ -0,0 +1,356 @@
|
|
+/*
|
|
+ * Copyright (c) 2001-2007 Hewlett-Packard Development Company, L.P.
|
|
+ * Contributed by Stephane Eranian <eranian@hpl.hp.com>
|
|
+ *
|
|
+ * This file contains Itanium Processor Family specific definitions
|
|
+ * for the perfmon interface.
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of version 2 of the GNU General Public
|
|
+ * License as published by the Free Software Foundation.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ * General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
+ * 02111-1307 USA
|
|
+ */
|
|
+#ifndef _ASM_IA64_PERFMON_KERN_H_
|
|
+#define _ASM_IA64_PERFMON_KERN_H_
|
|
+
|
|
+#ifdef __KERNEL__
|
|
+
|
|
+#ifdef CONFIG_PERFMON
|
|
+#include <asm/unistd.h>
|
|
+#include <asm/hw_irq.h>
|
|
+
|
|
+/*
|
|
+ * describe the content of the pfm_syst_info field
|
|
+ * layout:
|
|
+ * bits[00-15] : generic flags
|
|
+ * bits[16-31] : arch-specific flags
|
|
+ */
|
|
+#define PFM_ITA_CPUINFO_IDLE_EXCL 0x10000 /* stop monitoring in idle loop */
|
|
+
|
|
+/*
|
|
+ * For some CPUs, the upper bits of a counter must be set in order for the
|
|
+ * overflow interrupt to happen. On overflow, the counter has wrapped around,
|
|
+ * and the upper bits are cleared. This function may be used to set them back.
|
|
+ */
|
|
+static inline void pfm_arch_ovfl_reset_pmd(struct pfm_context *ctx,
|
|
+ unsigned int cnum)
|
|
+{}
|
|
+
|
|
+/*
|
|
+ * called from __pfm_interrupt_handler(). ctx is not NULL.
|
|
+ * ctx is locked. PMU interrupt is masked.
|
|
+ *
|
|
+ * must stop all monitoring to ensure handler has consistent view.
|
|
+ * must collect overflowed PMDs bitmask into povfls_pmds and
|
|
+ * npend_ovfls. If no interrupt detected then npend_ovfls
|
|
+ * must be set to zero.
|
|
+ */
|
|
+static inline void pfm_arch_intr_freeze_pmu(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set)
|
|
+{
|
|
+ u64 tmp;
|
|
+
|
|
+ /*
|
|
+ * do not overwrite existing value, must
|
|
+ * process those first (coming from context switch replay)
|
|
+ */
|
|
+ if (set->npend_ovfls)
|
|
+ return;
|
|
+
|
|
+ ia64_srlz_d();
|
|
+
|
|
+ tmp = ia64_get_pmc(0) & ~0xf;
|
|
+
|
|
+ set->povfl_pmds[0] = tmp;
|
|
+
|
|
+ set->npend_ovfls = ia64_popcnt(tmp);
|
|
+}
|
|
+
|
|
+static inline int pfm_arch_init_pmu_config(void)
|
|
+{
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static inline void pfm_arch_resend_irq(struct pfm_context *ctx)
|
|
+{
|
|
+ ia64_resend_irq(IA64_PERFMON_VECTOR);
|
|
+}
|
|
+
|
|
+static inline void pfm_arch_clear_pmd_ovfl_cond(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set)
|
|
+{}
|
|
+
|
|
+static inline void pfm_arch_serialize(void)
|
|
+{
|
|
+ ia64_srlz_d();
|
|
+}
|
|
+
|
|
+static inline void pfm_arch_intr_unfreeze_pmu(struct pfm_context *ctx)
|
|
+{
|
|
+ PFM_DBG_ovfl("state=%d", ctx->state);
|
|
+ ia64_set_pmc(0, 0);
|
|
+ /* no serialization */
|
|
+}
|
|
+
|
|
+static inline void pfm_arch_write_pmc(struct pfm_context *ctx,
|
|
+ unsigned int cnum, u64 value)
|
|
+{
|
|
+ if (cnum < 256) {
|
|
+ ia64_set_pmc(pfm_pmu_conf->pmc_desc[cnum].hw_addr, value);
|
|
+ } else if (cnum < 264) {
|
|
+ ia64_set_ibr(cnum-256, value);
|
|
+ ia64_dv_serialize_instruction();
|
|
+ } else {
|
|
+ ia64_set_dbr(cnum-264, value);
|
|
+ ia64_dv_serialize_instruction();
|
|
+ }
|
|
+}
|
|
+
|
|
+/*
|
|
+ * On IA-64, for per-thread context which have the ITA_FL_INSECURE
|
|
+ * flag, it is possible to start/stop monitoring directly from user evel
|
|
+ * without calling pfm_start()/pfm_stop. This allows very lightweight
|
|
+ * control yet the kernel sometimes needs to know if monitoring is actually
|
|
+ * on or off.
|
|
+ *
|
|
+ * Tracking of this information is normally done by pfm_start/pfm_stop
|
|
+ * in flags.started. Here we need to compensate by checking actual
|
|
+ * psr bit.
|
|
+ */
|
|
+static inline int pfm_arch_is_active(struct pfm_context *ctx)
|
|
+{
|
|
+ return ctx->flags.started
|
|
+ || ia64_getreg(_IA64_REG_PSR) & (IA64_PSR_UP|IA64_PSR_PP);
|
|
+}
|
|
+
|
|
+static inline void pfm_arch_write_pmd(struct pfm_context *ctx,
|
|
+ unsigned int cnum, u64 value)
|
|
+{
|
|
+ /*
|
|
+ * for a counting PMD, overflow bit must be cleared
|
|
+ */
|
|
+ if (pfm_pmu_conf->pmd_desc[cnum].type & PFM_REG_C64)
|
|
+ value &= pfm_pmu_conf->ovfl_mask;
|
|
+
|
|
+ /*
|
|
+ * for counters, write to upper bits are ignored, no need to mask
|
|
+ */
|
|
+ ia64_set_pmd(pfm_pmu_conf->pmd_desc[cnum].hw_addr, value);
|
|
+}
|
|
+
|
|
+static inline u64 pfm_arch_read_pmd(struct pfm_context *ctx, unsigned int cnum)
|
|
+{
|
|
+ return ia64_get_pmd(pfm_pmu_conf->pmd_desc[cnum].hw_addr);
|
|
+}
|
|
+
|
|
+static inline u64 pfm_arch_read_pmc(struct pfm_context *ctx, unsigned int cnum)
|
|
+{
|
|
+ return ia64_get_pmc(pfm_pmu_conf->pmc_desc[cnum].hw_addr);
|
|
+}
|
|
+
|
|
+static inline void pfm_arch_ctxswout_sys(struct task_struct *task,
|
|
+ struct pfm_context *ctx)
|
|
+{
|
|
+ struct pt_regs *regs;
|
|
+
|
|
+ regs = task_pt_regs(task);
|
|
+ ia64_psr(regs)->pp = 0;
|
|
+}
|
|
+
|
|
+static inline void pfm_arch_ctxswin_sys(struct task_struct *task,
|
|
+ struct pfm_context *ctx)
|
|
+{
|
|
+ struct pt_regs *regs;
|
|
+
|
|
+ if (!(ctx->active_set->flags & PFM_ITA_SETFL_INTR_ONLY)) {
|
|
+ regs = task_pt_regs(task);
|
|
+ ia64_psr(regs)->pp = 1;
|
|
+ }
|
|
+}
|
|
+
|
|
+/*
|
|
+ * On IA-64, the PMDs are NOT saved by pfm_arch_freeze_pmu()
|
|
+ * when entering the PMU interrupt handler, thus, we need
|
|
+ * to save them in pfm_switch_sets_from_intr()
|
|
+ */
|
|
+static inline void pfm_arch_save_pmds_from_intr(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set)
|
|
+{
|
|
+ pfm_save_pmds(ctx, set);
|
|
+}
|
|
+
|
|
+int pfm_arch_context_create(struct pfm_context *ctx, u32 ctx_flags);
|
|
+
|
|
+static inline void pfm_arch_context_free(struct pfm_context *ctx)
|
|
+{}
|
|
+
|
|
+int pfm_arch_ctxswout_thread(struct task_struct *task, struct pfm_context *ctx);
|
|
+void pfm_arch_ctxswin_thread(struct task_struct *task,
|
|
+ struct pfm_context *ctx);
|
|
+
|
|
+void pfm_arch_unload_context(struct pfm_context *ctx);
|
|
+int pfm_arch_load_context(struct pfm_context *ctx);
|
|
+int pfm_arch_setfl_sane(struct pfm_context *ctx, u32 flags);
|
|
+
|
|
+void pfm_arch_mask_monitoring(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set);
|
|
+void pfm_arch_unmask_monitoring(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set);
|
|
+
|
|
+void pfm_arch_restore_pmds(struct pfm_context *ctx, struct pfm_event_set *set);
|
|
+void pfm_arch_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set);
|
|
+
|
|
+void pfm_arch_stop(struct task_struct *task, struct pfm_context *ctx);
|
|
+void pfm_arch_start(struct task_struct *task, struct pfm_context *ctx);
|
|
+
|
|
+int pfm_arch_init(void);
|
|
+void pfm_arch_init_percpu(void);
|
|
+char *pfm_arch_get_pmu_module_name(void);
|
|
+
|
|
+int __pfm_use_dbregs(struct task_struct *task);
|
|
+int __pfm_release_dbregs(struct task_struct *task);
|
|
+int pfm_ia64_mark_dbregs_used(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set);
|
|
+
|
|
+void pfm_arch_show_session(struct seq_file *m);
|
|
+
|
|
+static inline int pfm_arch_pmu_acquire(u64 *unavail_pmcs, u64 *unavail_pmds)
|
|
+{
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static inline void pfm_arch_pmu_release(void)
|
|
+{}
|
|
+
|
|
+/* not necessary on IA-64 */
|
|
+static inline void pfm_cacheflush(void *addr, unsigned int len)
|
|
+{}
|
|
+
|
|
+/*
|
|
+ * miscellaneous architected definitions
|
|
+ */
|
|
+#define PFM_ITA_FCNTR 4 /* first counting monitor (PMC/PMD) */
|
|
+
|
|
+/*
|
|
+ * private event set flags (set_priv_flags)
|
|
+ */
|
|
+#define PFM_ITA_SETFL_USE_DBR 0x1000000 /* set uses debug registers */
|
|
+
|
|
+
|
|
+/*
|
|
+ * Itanium-specific data structures
|
|
+ */
|
|
+struct pfm_ia64_context_flags {
|
|
+ unsigned int use_dbr:1; /* use range restrictions (debug registers) */
|
|
+ unsigned int insecure:1; /* insecure monitoring for non-self session */
|
|
+ unsigned int reserved:30;/* for future use */
|
|
+};
|
|
+
|
|
+struct pfm_arch_context {
|
|
+ struct pfm_ia64_context_flags flags; /* arch specific ctx flags */
|
|
+ u64 ctx_saved_psr_up;/* storage for psr_up */
|
|
+#ifdef CONFIG_IA64_PERFMON_COMPAT
|
|
+ void *ctx_smpl_vaddr; /* vaddr of user mapping */
|
|
+#endif
|
|
+};
|
|
+
|
|
+#ifdef CONFIG_IA64_PERFMON_COMPAT
|
|
+ssize_t pfm_arch_compat_read(struct pfm_context *ctx,
|
|
+ char __user *buf,
|
|
+ int non_block,
|
|
+ size_t size);
|
|
+int pfm_ia64_compat_init(void);
|
|
+int pfm_smpl_buf_alloc_compat(struct pfm_context *ctx,
|
|
+ size_t rsize, struct file *filp);
|
|
+#else
|
|
+static inline ssize_t pfm_arch_compat_read(struct pfm_context *ctx,
|
|
+ char __user *buf,
|
|
+ int non_block,
|
|
+ size_t size)
|
|
+{
|
|
+ return -EINVAL;
|
|
+}
|
|
+
|
|
+static inline int pfm_smpl_buf_alloc_compat(struct pfm_context *ctx,
|
|
+ size_t rsize, struct file *filp)
|
|
+{
|
|
+ return -EINVAL;
|
|
+}
|
|
+#endif
|
|
+
|
|
+static inline void pfm_arch_arm_handle_work(struct task_struct *task)
|
|
+{
|
|
+ /*
|
|
+ * On IA-64, we ran out of bits in the bottom 7 bits of the
|
|
+ * threadinfo bitmask.Thus we used a 2-stage approach by piggybacking
|
|
+ * on NOTIFY_RESUME and then in do_notify_resume() we demultiplex and
|
|
+ * call pfm_handle_work() if needed
|
|
+ */
|
|
+ set_tsk_thread_flag(task, TIF_NOTIFY_RESUME);
|
|
+}
|
|
+
|
|
+static inline void pfm_arch_disarm_handle_work(struct task_struct *task)
|
|
+{
|
|
+ /*
|
|
+ * we cannot just clear TIF_NOTIFY_RESUME because other TIF flags are
|
|
+ * piggybackedonto it: TIF_PERFMON_WORK, TIF_RESTORE_RSE
|
|
+ *
|
|
+ * The tsk_clear_notify_resume() checks if any of those are set before
|
|
+ * clearing the * bit
|
|
+ */
|
|
+ tsk_clear_notify_resume(task);
|
|
+}
|
|
+
|
|
+static inline int pfm_arch_pmu_config_init(struct pfm_pmu_config *cfg)
|
|
+{
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+extern struct pfm_ia64_pmu_info *pfm_ia64_pmu_info;
|
|
+
|
|
+#define PFM_ARCH_CTX_SIZE (sizeof(struct pfm_arch_context))
|
|
+
|
|
+/*
|
|
+ * IA-64 does not need extra alignment requirements for the sampling buffer
|
|
+ */
|
|
+#define PFM_ARCH_SMPL_ALIGN_SIZE 0
|
|
+
|
|
+
|
|
+static inline void pfm_release_dbregs(struct task_struct *task)
|
|
+{
|
|
+ if (task->thread.flags & IA64_THREAD_DBG_VALID)
|
|
+ __pfm_release_dbregs(task);
|
|
+}
|
|
+
|
|
+#define pfm_use_dbregs(_t) __pfm_use_dbregs(_t)
|
|
+
|
|
+static inline int pfm_arch_get_base_syscall(void)
|
|
+{
|
|
+ return __NR_pfm_create_context;
|
|
+}
|
|
+
|
|
+struct pfm_arch_pmu_info {
|
|
+ unsigned long mask_pmcs[PFM_PMC_BV]; /* modify on when masking */
|
|
+};
|
|
+
|
|
+DECLARE_PER_CPU(u32, pfm_syst_info);
|
|
+#else /* !CONFIG_PERFMON */
|
|
+/*
|
|
+ * perfmon ia64-specific hooks
|
|
+ */
|
|
+#define pfm_release_dbregs(_t) do { } while (0)
|
|
+#define pfm_use_dbregs(_t) (0)
|
|
+
|
|
+#endif /* CONFIG_PERFMON */
|
|
+
|
|
+#endif /* __KERNEL__ */
|
|
+#endif /* _ASM_IA64_PERFMON_KERN_H_ */
|
|
diff --git a/arch/ia64/include/asm/processor.h b/arch/ia64/include/asm/processor.h
|
|
index f88fa05..9d6af9c 100644
|
|
--- a/arch/ia64/include/asm/processor.h
|
|
+++ b/arch/ia64/include/asm/processor.h
|
|
@@ -42,7 +42,6 @@
|
|
|
|
#define IA64_THREAD_FPH_VALID (__IA64_UL(1) << 0) /* floating-point high state valid? */
|
|
#define IA64_THREAD_DBG_VALID (__IA64_UL(1) << 1) /* debug registers valid? */
|
|
-#define IA64_THREAD_PM_VALID (__IA64_UL(1) << 2) /* performance registers valid? */
|
|
#define IA64_THREAD_UAC_NOPRINT (__IA64_UL(1) << 3) /* don't log unaligned accesses */
|
|
#define IA64_THREAD_UAC_SIGBUS (__IA64_UL(1) << 4) /* generate SIGBUS on unaligned acc. */
|
|
#define IA64_THREAD_MIGRATION (__IA64_UL(1) << 5) /* require migration
|
|
@@ -321,14 +320,6 @@ struct thread_struct {
|
|
#else
|
|
# define INIT_THREAD_IA32
|
|
#endif /* CONFIG_IA32_SUPPORT */
|
|
-#ifdef CONFIG_PERFMON
|
|
- void *pfm_context; /* pointer to detailed PMU context */
|
|
- unsigned long pfm_needs_checking; /* when >0, pending perfmon work on kernel exit */
|
|
-# define INIT_THREAD_PM .pfm_context = NULL, \
|
|
- .pfm_needs_checking = 0UL,
|
|
-#else
|
|
-# define INIT_THREAD_PM
|
|
-#endif
|
|
__u64 dbr[IA64_NUM_DBG_REGS];
|
|
__u64 ibr[IA64_NUM_DBG_REGS];
|
|
struct ia64_fpreg fph[96]; /* saved/loaded on demand */
|
|
@@ -343,7 +334,6 @@ struct thread_struct {
|
|
.task_size = DEFAULT_TASK_SIZE, \
|
|
.last_fph_cpu = -1, \
|
|
INIT_THREAD_IA32 \
|
|
- INIT_THREAD_PM \
|
|
.dbr = {0, }, \
|
|
.ibr = {0, }, \
|
|
.fph = {{{{0}}}, } \
|
|
diff --git a/arch/ia64/include/asm/system.h b/arch/ia64/include/asm/system.h
|
|
index 927a381..ab5aeea 100644
|
|
--- a/arch/ia64/include/asm/system.h
|
|
+++ b/arch/ia64/include/asm/system.h
|
|
@@ -217,6 +217,7 @@ struct task_struct;
|
|
extern void ia64_save_extra (struct task_struct *task);
|
|
extern void ia64_load_extra (struct task_struct *task);
|
|
|
|
+
|
|
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
|
|
extern void ia64_account_on_switch (struct task_struct *prev, struct task_struct *next);
|
|
# define IA64_ACCOUNT_ON_SWITCH(p,n) ia64_account_on_switch(p,n)
|
|
@@ -224,16 +225,9 @@ extern void ia64_account_on_switch (struct task_struct *prev, struct task_struct
|
|
# define IA64_ACCOUNT_ON_SWITCH(p,n)
|
|
#endif
|
|
|
|
-#ifdef CONFIG_PERFMON
|
|
- DECLARE_PER_CPU(unsigned long, pfm_syst_info);
|
|
-# define PERFMON_IS_SYSWIDE() (__get_cpu_var(pfm_syst_info) & 0x1)
|
|
-#else
|
|
-# define PERFMON_IS_SYSWIDE() (0)
|
|
-#endif
|
|
-
|
|
-#define IA64_HAS_EXTRA_STATE(t) \
|
|
- ((t)->thread.flags & (IA64_THREAD_DBG_VALID|IA64_THREAD_PM_VALID) \
|
|
- || IS_IA32_PROCESS(task_pt_regs(t)) || PERFMON_IS_SYSWIDE())
|
|
+#define IA64_HAS_EXTRA_STATE(t) \
|
|
+ (((t)->thread.flags & IA64_THREAD_DBG_VALID) \
|
|
+ || IS_IA32_PROCESS(task_pt_regs(t)))
|
|
|
|
#define __switch_to(prev,next,last) do { \
|
|
IA64_ACCOUNT_ON_SWITCH(prev, next); \
|
|
@@ -241,6 +235,10 @@ extern void ia64_account_on_switch (struct task_struct *prev, struct task_struct
|
|
ia64_save_extra(prev); \
|
|
if (IA64_HAS_EXTRA_STATE(next)) \
|
|
ia64_load_extra(next); \
|
|
+ if (test_tsk_thread_flag(prev, TIF_PERFMON_CTXSW)) \
|
|
+ pfm_ctxsw_out(prev, next); \
|
|
+ if (test_tsk_thread_flag(next, TIF_PERFMON_CTXSW)) \
|
|
+ pfm_ctxsw_in(prev, next); \
|
|
ia64_psr(task_pt_regs(next))->dfh = !ia64_is_local_fpu_owner(next); \
|
|
(last) = ia64_switch_to((next)); \
|
|
} while (0)
|
|
diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h
|
|
index 7c60fcd..3355332 100644
|
|
--- a/arch/ia64/include/asm/thread_info.h
|
|
+++ b/arch/ia64/include/asm/thread_info.h
|
|
@@ -110,6 +110,8 @@ extern void tsk_clear_notify_resume(struct task_struct *tsk);
|
|
#define TIF_DB_DISABLED 19 /* debug trap disabled for fsyscall */
|
|
#define TIF_FREEZE 20 /* is freezing for suspend */
|
|
#define TIF_RESTORE_RSE 21 /* user RBS is newer than kernel RBS */
|
|
+#define TIF_PERFMON_CTXSW 22 /* perfmon needs ctxsw calls */
|
|
+#define TIF_PERFMON_WORK 23 /* work for pfm_handle_work() */
|
|
|
|
#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
|
|
#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
|
|
@@ -123,6 +125,8 @@ extern void tsk_clear_notify_resume(struct task_struct *tsk);
|
|
#define _TIF_DB_DISABLED (1 << TIF_DB_DISABLED)
|
|
#define _TIF_FREEZE (1 << TIF_FREEZE)
|
|
#define _TIF_RESTORE_RSE (1 << TIF_RESTORE_RSE)
|
|
+#define _TIF_PERFMON_CTXSW (1 << TIF_PERFMON_CTXSW)
|
|
+#define _TIF_PERFMON_WORK (1 << TIF_PERFMON_WORK)
|
|
|
|
/* "work to do on user-return" bits */
|
|
#define TIF_ALLWORK_MASK (_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SYSCALL_AUDIT|\
|
|
diff --git a/arch/ia64/include/asm/unistd.h b/arch/ia64/include/asm/unistd.h
|
|
index d535833..29a43bc 100644
|
|
--- a/arch/ia64/include/asm/unistd.h
|
|
+++ b/arch/ia64/include/asm/unistd.h
|
|
@@ -308,11 +308,23 @@
|
|
#define __NR_dup3 1316
|
|
#define __NR_pipe2 1317
|
|
#define __NR_inotify_init1 1318
|
|
+#define __NR_pfm_create_context 1319
|
|
+#define __NR_pfm_write_pmcs (__NR_pfm_create_context+1)
|
|
+#define __NR_pfm_write_pmds (__NR_pfm_create_context+2)
|
|
+#define __NR_pfm_read_pmds (__NR_pfm_create_context+3)
|
|
+#define __NR_pfm_load_context (__NR_pfm_create_context+4)
|
|
+#define __NR_pfm_start (__NR_pfm_create_context+5)
|
|
+#define __NR_pfm_stop (__NR_pfm_create_context+6)
|
|
+#define __NR_pfm_restart (__NR_pfm_create_context+7)
|
|
+#define __NR_pfm_create_evtsets (__NR_pfm_create_context+8)
|
|
+#define __NR_pfm_getinfo_evtsets (__NR_pfm_create_context+9)
|
|
+#define __NR_pfm_delete_evtsets (__NR_pfm_create_context+10)
|
|
+#define __NR_pfm_unload_context (__NR_pfm_create_context+11)
|
|
|
|
#ifdef __KERNEL__
|
|
|
|
|
|
-#define NR_syscalls 295 /* length of syscall table */
|
|
+#define NR_syscalls 307 /* length of syscall table */
|
|
|
|
/*
|
|
* The following defines stop scripts/checksyscalls.sh from complaining about
|
|
diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile
|
|
index 87fea11..b5ac54c 100644
|
|
--- a/arch/ia64/kernel/Makefile
|
|
+++ b/arch/ia64/kernel/Makefile
|
|
@@ -5,7 +5,7 @@
|
|
extra-y := head.o init_task.o vmlinux.lds
|
|
|
|
obj-y := acpi.o entry.o efi.o efi_stub.o gate-data.o fsys.o ia64_ksyms.o irq.o irq_ia64.o \
|
|
- irq_lsapic.o ivt.o machvec.o pal.o patch.o process.o perfmon.o ptrace.o sal.o \
|
|
+ irq_lsapic.o ivt.o machvec.o pal.o patch.o process.o ptrace.o sal.o \
|
|
salinfo.o setup.o signal.o sys_ia64.o time.o traps.o unaligned.o \
|
|
unwind.o mca.o mca_asm.o topology.o
|
|
|
|
@@ -23,7 +23,6 @@ obj-$(CONFIG_IOSAPIC) += iosapic.o
|
|
obj-$(CONFIG_MODULES) += module.o
|
|
obj-$(CONFIG_SMP) += smp.o smpboot.o
|
|
obj-$(CONFIG_NUMA) += numa.o
|
|
-obj-$(CONFIG_PERFMON) += perfmon_default_smpl.o
|
|
obj-$(CONFIG_IA64_CYCLONE) += cyclone.o
|
|
obj-$(CONFIG_CPU_FREQ) += cpufreq/
|
|
obj-$(CONFIG_IA64_MCA_RECOVERY) += mca_recovery.o
|
|
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
|
|
index 0dd6c14..f1c3e41 100644
|
|
--- a/arch/ia64/kernel/entry.S
|
|
+++ b/arch/ia64/kernel/entry.S
|
|
@@ -1697,6 +1697,18 @@ sys_call_table:
|
|
data8 sys_dup3
|
|
data8 sys_pipe2
|
|
data8 sys_inotify_init1
|
|
+ data8 sys_pfm_create_context
|
|
+ data8 sys_pfm_write_pmcs // 1320
|
|
+ data8 sys_pfm_write_pmds
|
|
+ data8 sys_pfm_read_pmds
|
|
+ data8 sys_pfm_load_context
|
|
+ data8 sys_pfm_start
|
|
+ data8 sys_pfm_stop // 1325
|
|
+ data8 sys_pfm_restart
|
|
+ data8 sys_pfm_create_evtsets
|
|
+ data8 sys_pfm_getinfo_evtsets
|
|
+ data8 sys_pfm_delete_evtsets
|
|
+ data8 sys_pfm_unload_context // 1330
|
|
|
|
.org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls
|
|
#endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */
|
|
diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c
|
|
index 28d3d48..ede8024 100644
|
|
--- a/arch/ia64/kernel/irq_ia64.c
|
|
+++ b/arch/ia64/kernel/irq_ia64.c
|
|
@@ -40,10 +40,6 @@
|
|
#include <asm/system.h>
|
|
#include <asm/tlbflush.h>
|
|
|
|
-#ifdef CONFIG_PERFMON
|
|
-# include <asm/perfmon.h>
|
|
-#endif
|
|
-
|
|
#define IRQ_DEBUG 0
|
|
|
|
#define IRQ_VECTOR_UNASSIGNED (0)
|
|
@@ -660,9 +656,6 @@ init_IRQ (void)
|
|
}
|
|
#endif
|
|
#endif
|
|
-#ifdef CONFIG_PERFMON
|
|
- pfm_init_percpu();
|
|
-#endif
|
|
platform_irq_init();
|
|
}
|
|
|
|
diff --git a/arch/ia64/kernel/perfmon_default_smpl.c b/arch/ia64/kernel/perfmon_default_smpl.c
|
|
deleted file mode 100644
|
|
index 5f637bb..0000000
|
|
--- a/arch/ia64/kernel/perfmon_default_smpl.c
|
|
+++ /dev/null
|
|
@@ -1,296 +0,0 @@
|
|
-/*
|
|
- * Copyright (C) 2002-2003 Hewlett-Packard Co
|
|
- * Stephane Eranian <eranian@hpl.hp.com>
|
|
- *
|
|
- * This file implements the default sampling buffer format
|
|
- * for the Linux/ia64 perfmon-2 subsystem.
|
|
- */
|
|
-#include <linux/kernel.h>
|
|
-#include <linux/types.h>
|
|
-#include <linux/module.h>
|
|
-#include <linux/init.h>
|
|
-#include <asm/delay.h>
|
|
-#include <linux/smp.h>
|
|
-
|
|
-#include <asm/perfmon.h>
|
|
-#include <asm/perfmon_default_smpl.h>
|
|
-
|
|
-MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>");
|
|
-MODULE_DESCRIPTION("perfmon default sampling format");
|
|
-MODULE_LICENSE("GPL");
|
|
-
|
|
-#define DEFAULT_DEBUG 1
|
|
-
|
|
-#ifdef DEFAULT_DEBUG
|
|
-#define DPRINT(a) \
|
|
- do { \
|
|
- if (unlikely(pfm_sysctl.debug >0)) { printk("%s.%d: CPU%d ", __func__, __LINE__, smp_processor_id()); printk a; } \
|
|
- } while (0)
|
|
-
|
|
-#define DPRINT_ovfl(a) \
|
|
- do { \
|
|
- if (unlikely(pfm_sysctl.debug > 0 && pfm_sysctl.debug_ovfl >0)) { printk("%s.%d: CPU%d ", __func__, __LINE__, smp_processor_id()); printk a; } \
|
|
- } while (0)
|
|
-
|
|
-#else
|
|
-#define DPRINT(a)
|
|
-#define DPRINT_ovfl(a)
|
|
-#endif
|
|
-
|
|
-static int
|
|
-default_validate(struct task_struct *task, unsigned int flags, int cpu, void *data)
|
|
-{
|
|
- pfm_default_smpl_arg_t *arg = (pfm_default_smpl_arg_t*)data;
|
|
- int ret = 0;
|
|
-
|
|
- if (data == NULL) {
|
|
- DPRINT(("[%d] no argument passed\n", task_pid_nr(task)));
|
|
- return -EINVAL;
|
|
- }
|
|
-
|
|
- DPRINT(("[%d] validate flags=0x%x CPU%d\n", task_pid_nr(task), flags, cpu));
|
|
-
|
|
- /*
|
|
- * must hold at least the buffer header + one minimally sized entry
|
|
- */
|
|
- if (arg->buf_size < PFM_DEFAULT_SMPL_MIN_BUF_SIZE) return -EINVAL;
|
|
-
|
|
- DPRINT(("buf_size=%lu\n", arg->buf_size));
|
|
-
|
|
- return ret;
|
|
-}
|
|
-
|
|
-static int
|
|
-default_get_size(struct task_struct *task, unsigned int flags, int cpu, void *data, unsigned long *size)
|
|
-{
|
|
- pfm_default_smpl_arg_t *arg = (pfm_default_smpl_arg_t *)data;
|
|
-
|
|
- /*
|
|
- * size has been validated in default_validate
|
|
- */
|
|
- *size = arg->buf_size;
|
|
-
|
|
- return 0;
|
|
-}
|
|
-
|
|
-static int
|
|
-default_init(struct task_struct *task, void *buf, unsigned int flags, int cpu, void *data)
|
|
-{
|
|
- pfm_default_smpl_hdr_t *hdr;
|
|
- pfm_default_smpl_arg_t *arg = (pfm_default_smpl_arg_t *)data;
|
|
-
|
|
- hdr = (pfm_default_smpl_hdr_t *)buf;
|
|
-
|
|
- hdr->hdr_version = PFM_DEFAULT_SMPL_VERSION;
|
|
- hdr->hdr_buf_size = arg->buf_size;
|
|
- hdr->hdr_cur_offs = sizeof(*hdr);
|
|
- hdr->hdr_overflows = 0UL;
|
|
- hdr->hdr_count = 0UL;
|
|
-
|
|
- DPRINT(("[%d] buffer=%p buf_size=%lu hdr_size=%lu hdr_version=%u cur_offs=%lu\n",
|
|
- task_pid_nr(task),
|
|
- buf,
|
|
- hdr->hdr_buf_size,
|
|
- sizeof(*hdr),
|
|
- hdr->hdr_version,
|
|
- hdr->hdr_cur_offs));
|
|
-
|
|
- return 0;
|
|
-}
|
|
-
|
|
-static int
|
|
-default_handler(struct task_struct *task, void *buf, pfm_ovfl_arg_t *arg, struct pt_regs *regs, unsigned long stamp)
|
|
-{
|
|
- pfm_default_smpl_hdr_t *hdr;
|
|
- pfm_default_smpl_entry_t *ent;
|
|
- void *cur, *last;
|
|
- unsigned long *e, entry_size;
|
|
- unsigned int npmds, i;
|
|
- unsigned char ovfl_pmd;
|
|
- unsigned char ovfl_notify;
|
|
-
|
|
- if (unlikely(buf == NULL || arg == NULL|| regs == NULL || task == NULL)) {
|
|
- DPRINT(("[%d] invalid arguments buf=%p arg=%p\n", task->pid, buf, arg));
|
|
- return -EINVAL;
|
|
- }
|
|
-
|
|
- hdr = (pfm_default_smpl_hdr_t *)buf;
|
|
- cur = buf+hdr->hdr_cur_offs;
|
|
- last = buf+hdr->hdr_buf_size;
|
|
- ovfl_pmd = arg->ovfl_pmd;
|
|
- ovfl_notify = arg->ovfl_notify;
|
|
-
|
|
- /*
|
|
- * precheck for sanity
|
|
- */
|
|
- if ((last - cur) < PFM_DEFAULT_MAX_ENTRY_SIZE) goto full;
|
|
-
|
|
- npmds = hweight64(arg->smpl_pmds[0]);
|
|
-
|
|
- ent = (pfm_default_smpl_entry_t *)cur;
|
|
-
|
|
- prefetch(arg->smpl_pmds_values);
|
|
-
|
|
- entry_size = sizeof(*ent) + (npmds << 3);
|
|
-
|
|
- /* position for first pmd */
|
|
- e = (unsigned long *)(ent+1);
|
|
-
|
|
- hdr->hdr_count++;
|
|
-
|
|
- DPRINT_ovfl(("[%d] count=%lu cur=%p last=%p free_bytes=%lu ovfl_pmd=%d ovfl_notify=%d npmds=%u\n",
|
|
- task->pid,
|
|
- hdr->hdr_count,
|
|
- cur, last,
|
|
- last-cur,
|
|
- ovfl_pmd,
|
|
- ovfl_notify, npmds));
|
|
-
|
|
- /*
|
|
- * current = task running at the time of the overflow.
|
|
- *
|
|
- * per-task mode:
|
|
- * - this is ususally the task being monitored.
|
|
- * Under certain conditions, it might be a different task
|
|
- *
|
|
- * system-wide:
|
|
- * - this is not necessarily the task controlling the session
|
|
- */
|
|
- ent->pid = current->pid;
|
|
- ent->ovfl_pmd = ovfl_pmd;
|
|
- ent->last_reset_val = arg->pmd_last_reset; //pmd[0].reg_last_reset_val;
|
|
-
|
|
- /*
|
|
- * where did the fault happen (includes slot number)
|
|
- */
|
|
- ent->ip = regs->cr_iip | ((regs->cr_ipsr >> 41) & 0x3);
|
|
-
|
|
- ent->tstamp = stamp;
|
|
- ent->cpu = smp_processor_id();
|
|
- ent->set = arg->active_set;
|
|
- ent->tgid = current->tgid;
|
|
-
|
|
- /*
|
|
- * selectively store PMDs in increasing index number
|
|
- */
|
|
- if (npmds) {
|
|
- unsigned long *val = arg->smpl_pmds_values;
|
|
- for(i=0; i < npmds; i++) {
|
|
- *e++ = *val++;
|
|
- }
|
|
- }
|
|
-
|
|
- /*
|
|
- * update position for next entry
|
|
- */
|
|
- hdr->hdr_cur_offs += entry_size;
|
|
- cur += entry_size;
|
|
-
|
|
- /*
|
|
- * post check to avoid losing the last sample
|
|
- */
|
|
- if ((last - cur) < PFM_DEFAULT_MAX_ENTRY_SIZE) goto full;
|
|
-
|
|
- /*
|
|
- * keep same ovfl_pmds, ovfl_notify
|
|
- */
|
|
- arg->ovfl_ctrl.bits.notify_user = 0;
|
|
- arg->ovfl_ctrl.bits.block_task = 0;
|
|
- arg->ovfl_ctrl.bits.mask_monitoring = 0;
|
|
- arg->ovfl_ctrl.bits.reset_ovfl_pmds = 1; /* reset before returning from interrupt handler */
|
|
-
|
|
- return 0;
|
|
-full:
|
|
- DPRINT_ovfl(("sampling buffer full free=%lu, count=%lu, ovfl_notify=%d\n", last-cur, hdr->hdr_count, ovfl_notify));
|
|
-
|
|
- /*
|
|
- * increment number of buffer overflow.
|
|
- * important to detect duplicate set of samples.
|
|
- */
|
|
- hdr->hdr_overflows++;
|
|
-
|
|
- /*
|
|
- * if no notification requested, then we saturate the buffer
|
|
- */
|
|
- if (ovfl_notify == 0) {
|
|
- arg->ovfl_ctrl.bits.notify_user = 0;
|
|
- arg->ovfl_ctrl.bits.block_task = 0;
|
|
- arg->ovfl_ctrl.bits.mask_monitoring = 1;
|
|
- arg->ovfl_ctrl.bits.reset_ovfl_pmds = 0;
|
|
- } else {
|
|
- arg->ovfl_ctrl.bits.notify_user = 1;
|
|
- arg->ovfl_ctrl.bits.block_task = 1; /* ignored for non-blocking context */
|
|
- arg->ovfl_ctrl.bits.mask_monitoring = 1;
|
|
- arg->ovfl_ctrl.bits.reset_ovfl_pmds = 0; /* no reset now */
|
|
- }
|
|
- return -1; /* we are full, sorry */
|
|
-}
|
|
-
|
|
-static int
|
|
-default_restart(struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs)
|
|
-{
|
|
- pfm_default_smpl_hdr_t *hdr;
|
|
-
|
|
- hdr = (pfm_default_smpl_hdr_t *)buf;
|
|
-
|
|
- hdr->hdr_count = 0UL;
|
|
- hdr->hdr_cur_offs = sizeof(*hdr);
|
|
-
|
|
- ctrl->bits.mask_monitoring = 0;
|
|
- ctrl->bits.reset_ovfl_pmds = 1; /* uses long-reset values */
|
|
-
|
|
- return 0;
|
|
-}
|
|
-
|
|
-static int
|
|
-default_exit(struct task_struct *task, void *buf, struct pt_regs *regs)
|
|
-{
|
|
- DPRINT(("[%d] exit(%p)\n", task_pid_nr(task), buf));
|
|
- return 0;
|
|
-}
|
|
-
|
|
-static pfm_buffer_fmt_t default_fmt={
|
|
- .fmt_name = "default_format",
|
|
- .fmt_uuid = PFM_DEFAULT_SMPL_UUID,
|
|
- .fmt_arg_size = sizeof(pfm_default_smpl_arg_t),
|
|
- .fmt_validate = default_validate,
|
|
- .fmt_getsize = default_get_size,
|
|
- .fmt_init = default_init,
|
|
- .fmt_handler = default_handler,
|
|
- .fmt_restart = default_restart,
|
|
- .fmt_restart_active = default_restart,
|
|
- .fmt_exit = default_exit,
|
|
-};
|
|
-
|
|
-static int __init
|
|
-pfm_default_smpl_init_module(void)
|
|
-{
|
|
- int ret;
|
|
-
|
|
- ret = pfm_register_buffer_fmt(&default_fmt);
|
|
- if (ret == 0) {
|
|
- printk("perfmon_default_smpl: %s v%u.%u registered\n",
|
|
- default_fmt.fmt_name,
|
|
- PFM_DEFAULT_SMPL_VERSION_MAJ,
|
|
- PFM_DEFAULT_SMPL_VERSION_MIN);
|
|
- } else {
|
|
- printk("perfmon_default_smpl: %s cannot register ret=%d\n",
|
|
- default_fmt.fmt_name,
|
|
- ret);
|
|
- }
|
|
-
|
|
- return ret;
|
|
-}
|
|
-
|
|
-static void __exit
|
|
-pfm_default_smpl_cleanup_module(void)
|
|
-{
|
|
- int ret;
|
|
- ret = pfm_unregister_buffer_fmt(default_fmt.fmt_uuid);
|
|
-
|
|
- printk("perfmon_default_smpl: unregister %s=%d\n", default_fmt.fmt_name, ret);
|
|
-}
|
|
-
|
|
-module_init(pfm_default_smpl_init_module);
|
|
-module_exit(pfm_default_smpl_cleanup_module);
|
|
-
|
|
diff --git a/arch/ia64/kernel/perfmon_generic.h b/arch/ia64/kernel/perfmon_generic.h
|
|
deleted file mode 100644
|
|
index 6748947..0000000
|
|
--- a/arch/ia64/kernel/perfmon_generic.h
|
|
+++ /dev/null
|
|
@@ -1,45 +0,0 @@
|
|
-/*
|
|
- * This file contains the generic PMU register description tables
|
|
- * and pmc checker used by perfmon.c.
|
|
- *
|
|
- * Copyright (C) 2002-2003 Hewlett Packard Co
|
|
- * Stephane Eranian <eranian@hpl.hp.com>
|
|
- */
|
|
-
|
|
-static pfm_reg_desc_t pfm_gen_pmc_desc[PMU_MAX_PMCS]={
|
|
-/* pmc0 */ { PFM_REG_CONTROL , 0, 0x1UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
|
|
-/* pmc1 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
|
|
-/* pmc2 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
|
|
-/* pmc3 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
|
|
-/* pmc4 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(4),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
|
|
-/* pmc5 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(5),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
|
|
-/* pmc6 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(6),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
|
|
-/* pmc7 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(7),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
|
|
- { PFM_REG_END , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
|
|
-};
|
|
-
|
|
-static pfm_reg_desc_t pfm_gen_pmd_desc[PMU_MAX_PMDS]={
|
|
-/* pmd0 */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}},
|
|
-/* pmd1 */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}},
|
|
-/* pmd2 */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}},
|
|
-/* pmd3 */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}},
|
|
-/* pmd4 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(4),0UL, 0UL, 0UL}},
|
|
-/* pmd5 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(5),0UL, 0UL, 0UL}},
|
|
-/* pmd6 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(6),0UL, 0UL, 0UL}},
|
|
-/* pmd7 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(7),0UL, 0UL, 0UL}},
|
|
- { PFM_REG_END , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
|
|
-};
|
|
-
|
|
-/*
|
|
- * impl_pmcs, impl_pmds are computed at runtime to minimize errors!
|
|
- */
|
|
-static pmu_config_t pmu_conf_gen={
|
|
- .pmu_name = "Generic",
|
|
- .pmu_family = 0xff, /* any */
|
|
- .ovfl_val = (1UL << 32) - 1,
|
|
- .num_ibrs = 0, /* does not use */
|
|
- .num_dbrs = 0, /* does not use */
|
|
- .pmd_desc = pfm_gen_pmd_desc,
|
|
- .pmc_desc = pfm_gen_pmc_desc
|
|
-};
|
|
-
|
|
diff --git a/arch/ia64/kernel/perfmon_itanium.h b/arch/ia64/kernel/perfmon_itanium.h
|
|
deleted file mode 100644
|
|
index d1d508a..0000000
|
|
--- a/arch/ia64/kernel/perfmon_itanium.h
|
|
+++ /dev/null
|
|
@@ -1,115 +0,0 @@
|
|
-/*
|
|
- * This file contains the Itanium PMU register description tables
|
|
- * and pmc checker used by perfmon.c.
|
|
- *
|
|
- * Copyright (C) 2002-2003 Hewlett Packard Co
|
|
- * Stephane Eranian <eranian@hpl.hp.com>
|
|
- */
|
|
-static int pfm_ita_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs);
|
|
-
|
|
-static pfm_reg_desc_t pfm_ita_pmc_desc[PMU_MAX_PMCS]={
|
|
-/* pmc0 */ { PFM_REG_CONTROL , 0, 0x1UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
|
|
-/* pmc1 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
|
|
-/* pmc2 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
|
|
-/* pmc3 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
|
|
-/* pmc4 */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(4),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
|
|
-/* pmc5 */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(5),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
|
|
-/* pmc6 */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(6),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
|
|
-/* pmc7 */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(7),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
|
|
-/* pmc8 */ { PFM_REG_CONFIG , 0, 0xf00000003ffffff8UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
|
|
-/* pmc9 */ { PFM_REG_CONFIG , 0, 0xf00000003ffffff8UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
|
|
-/* pmc10 */ { PFM_REG_MONITOR , 6, 0x0UL, -1UL, NULL, NULL, {RDEP(0)|RDEP(1),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
|
|
-/* pmc11 */ { PFM_REG_MONITOR , 6, 0x0000000010000000UL, -1UL, NULL, pfm_ita_pmc_check, {RDEP(2)|RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
|
|
-/* pmc12 */ { PFM_REG_MONITOR , 6, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
|
|
-/* pmc13 */ { PFM_REG_CONFIG , 0, 0x0003ffff00000001UL, -1UL, NULL, pfm_ita_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
|
|
- { PFM_REG_END , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
|
|
-};
|
|
-
|
|
-static pfm_reg_desc_t pfm_ita_pmd_desc[PMU_MAX_PMDS]={
|
|
-/* pmd0 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(1),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}},
|
|
-/* pmd1 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(0),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}},
|
|
-/* pmd2 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
|
|
-/* pmd3 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
|
|
-/* pmd4 */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(4),0UL, 0UL, 0UL}},
|
|
-/* pmd5 */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(5),0UL, 0UL, 0UL}},
|
|
-/* pmd6 */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(6),0UL, 0UL, 0UL}},
|
|
-/* pmd7 */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(7),0UL, 0UL, 0UL}},
|
|
-/* pmd8 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
|
|
-/* pmd9 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
|
|
-/* pmd10 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
|
|
-/* pmd11 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
|
|
-/* pmd12 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
|
|
-/* pmd13 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
|
|
-/* pmd14 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
|
|
-/* pmd15 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
|
|
-/* pmd16 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
|
|
-/* pmd17 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(3),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
|
|
- { PFM_REG_END , 0, 0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
|
|
-};
|
|
-
|
|
-static int
|
|
-pfm_ita_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs)
|
|
-{
|
|
- int ret;
|
|
- int is_loaded;
|
|
-
|
|
- /* sanitfy check */
|
|
- if (ctx == NULL) return -EINVAL;
|
|
-
|
|
- is_loaded = ctx->ctx_state == PFM_CTX_LOADED || ctx->ctx_state == PFM_CTX_MASKED;
|
|
-
|
|
- /*
|
|
- * we must clear the (instruction) debug registers if pmc13.ta bit is cleared
|
|
- * before they are written (fl_using_dbreg==0) to avoid picking up stale information.
|
|
- */
|
|
- if (cnum == 13 && is_loaded && ((*val & 0x1) == 0UL) && ctx->ctx_fl_using_dbreg == 0) {
|
|
-
|
|
- DPRINT(("pmc[%d]=0x%lx has active pmc13.ta cleared, clearing ibr\n", cnum, *val));
|
|
-
|
|
- /* don't mix debug with perfmon */
|
|
- if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL;
|
|
-
|
|
- /*
|
|
- * a count of 0 will mark the debug registers as in use and also
|
|
- * ensure that they are properly cleared.
|
|
- */
|
|
- ret = pfm_write_ibr_dbr(1, ctx, NULL, 0, regs);
|
|
- if (ret) return ret;
|
|
- }
|
|
-
|
|
- /*
|
|
- * we must clear the (data) debug registers if pmc11.pt bit is cleared
|
|
- * before they are written (fl_using_dbreg==0) to avoid picking up stale information.
|
|
- */
|
|
- if (cnum == 11 && is_loaded && ((*val >> 28)& 0x1) == 0 && ctx->ctx_fl_using_dbreg == 0) {
|
|
-
|
|
- DPRINT(("pmc[%d]=0x%lx has active pmc11.pt cleared, clearing dbr\n", cnum, *val));
|
|
-
|
|
- /* don't mix debug with perfmon */
|
|
- if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL;
|
|
-
|
|
- /*
|
|
- * a count of 0 will mark the debug registers as in use and also
|
|
- * ensure that they are properly cleared.
|
|
- */
|
|
- ret = pfm_write_ibr_dbr(0, ctx, NULL, 0, regs);
|
|
- if (ret) return ret;
|
|
- }
|
|
- return 0;
|
|
-}
|
|
-
|
|
-/*
|
|
- * impl_pmcs, impl_pmds are computed at runtime to minimize errors!
|
|
- */
|
|
-static pmu_config_t pmu_conf_ita={
|
|
- .pmu_name = "Itanium",
|
|
- .pmu_family = 0x7,
|
|
- .ovfl_val = (1UL << 32) - 1,
|
|
- .pmd_desc = pfm_ita_pmd_desc,
|
|
- .pmc_desc = pfm_ita_pmc_desc,
|
|
- .num_ibrs = 8,
|
|
- .num_dbrs = 8,
|
|
- .use_rr_dbregs = 1, /* debug register are use for range retrictions */
|
|
-};
|
|
-
|
|
-
|
|
diff --git a/arch/ia64/kernel/perfmon_mckinley.h b/arch/ia64/kernel/perfmon_mckinley.h
|
|
deleted file mode 100644
|
|
index c4bec7a..0000000
|
|
--- a/arch/ia64/kernel/perfmon_mckinley.h
|
|
+++ /dev/null
|
|
@@ -1,187 +0,0 @@
|
|
-/*
|
|
- * This file contains the McKinley PMU register description tables
|
|
- * and pmc checker used by perfmon.c.
|
|
- *
|
|
- * Copyright (C) 2002-2003 Hewlett Packard Co
|
|
- * Stephane Eranian <eranian@hpl.hp.com>
|
|
- */
|
|
-static int pfm_mck_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs);
|
|
-
|
|
-static pfm_reg_desc_t pfm_mck_pmc_desc[PMU_MAX_PMCS]={
|
|
-/* pmc0 */ { PFM_REG_CONTROL , 0, 0x1UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
|
|
-/* pmc1 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
|
|
-/* pmc2 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
|
|
-/* pmc3 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
|
|
-/* pmc4 */ { PFM_REG_COUNTING, 6, 0x0000000000800000UL, 0xfffff7fUL, NULL, pfm_mck_pmc_check, {RDEP(4),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
|
|
-/* pmc5 */ { PFM_REG_COUNTING, 6, 0x0UL, 0xfffff7fUL, NULL, pfm_mck_pmc_check, {RDEP(5),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
|
|
-/* pmc6 */ { PFM_REG_COUNTING, 6, 0x0UL, 0xfffff7fUL, NULL, pfm_mck_pmc_check, {RDEP(6),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
|
|
-/* pmc7 */ { PFM_REG_COUNTING, 6, 0x0UL, 0xfffff7fUL, NULL, pfm_mck_pmc_check, {RDEP(7),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
|
|
-/* pmc8 */ { PFM_REG_CONFIG , 0, 0xffffffff3fffffffUL, 0xffffffff3ffffffbUL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
|
|
-/* pmc9 */ { PFM_REG_CONFIG , 0, 0xffffffff3ffffffcUL, 0xffffffff3ffffffbUL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
|
|
-/* pmc10 */ { PFM_REG_MONITOR , 4, 0x0UL, 0xffffUL, NULL, pfm_mck_pmc_check, {RDEP(0)|RDEP(1),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
|
|
-/* pmc11 */ { PFM_REG_MONITOR , 6, 0x0UL, 0x30f01cf, NULL, pfm_mck_pmc_check, {RDEP(2)|RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
|
|
-/* pmc12 */ { PFM_REG_MONITOR , 6, 0x0UL, 0xffffUL, NULL, pfm_mck_pmc_check, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
|
|
-/* pmc13 */ { PFM_REG_CONFIG , 0, 0x00002078fefefefeUL, 0x1e00018181818UL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
|
|
-/* pmc14 */ { PFM_REG_CONFIG , 0, 0x0db60db60db60db6UL, 0x2492UL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
|
|
-/* pmc15 */ { PFM_REG_CONFIG , 0, 0x00000000fffffff0UL, 0xfUL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
|
|
- { PFM_REG_END , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
|
|
-};
|
|
-
|
|
-static pfm_reg_desc_t pfm_mck_pmd_desc[PMU_MAX_PMDS]={
|
|
-/* pmd0 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(1),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}},
|
|
-/* pmd1 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(0),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}},
|
|
-/* pmd2 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
|
|
-/* pmd3 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
|
|
-/* pmd4 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(4),0UL, 0UL, 0UL}},
|
|
-/* pmd5 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(5),0UL, 0UL, 0UL}},
|
|
-/* pmd6 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(6),0UL, 0UL, 0UL}},
|
|
-/* pmd7 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(7),0UL, 0UL, 0UL}},
|
|
-/* pmd8 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
|
|
-/* pmd9 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
|
|
-/* pmd10 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
|
|
-/* pmd11 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
|
|
-/* pmd12 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
|
|
-/* pmd13 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
|
|
-/* pmd14 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
|
|
-/* pmd15 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
|
|
-/* pmd16 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
|
|
-/* pmd17 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(3),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
|
|
- { PFM_REG_END , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
|
|
-};
|
|
-
|
|
-/*
|
|
- * PMC reserved fields must have their power-up values preserved
|
|
- */
|
|
-static int
|
|
-pfm_mck_reserved(unsigned int cnum, unsigned long *val, struct pt_regs *regs)
|
|
-{
|
|
- unsigned long tmp1, tmp2, ival = *val;
|
|
-
|
|
- /* remove reserved areas from user value */
|
|
- tmp1 = ival & PMC_RSVD_MASK(cnum);
|
|
-
|
|
- /* get reserved fields values */
|
|
- tmp2 = PMC_DFL_VAL(cnum) & ~PMC_RSVD_MASK(cnum);
|
|
-
|
|
- *val = tmp1 | tmp2;
|
|
-
|
|
- DPRINT(("pmc[%d]=0x%lx, mask=0x%lx, reset=0x%lx, val=0x%lx\n",
|
|
- cnum, ival, PMC_RSVD_MASK(cnum), PMC_DFL_VAL(cnum), *val));
|
|
- return 0;
|
|
-}
|
|
-
|
|
-/*
|
|
- * task can be NULL if the context is unloaded
|
|
- */
|
|
-static int
|
|
-pfm_mck_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs)
|
|
-{
|
|
- int ret = 0, check_case1 = 0;
|
|
- unsigned long val8 = 0, val14 = 0, val13 = 0;
|
|
- int is_loaded;
|
|
-
|
|
- /* first preserve the reserved fields */
|
|
- pfm_mck_reserved(cnum, val, regs);
|
|
-
|
|
- /* sanitfy check */
|
|
- if (ctx == NULL) return -EINVAL;
|
|
-
|
|
- is_loaded = ctx->ctx_state == PFM_CTX_LOADED || ctx->ctx_state == PFM_CTX_MASKED;
|
|
-
|
|
- /*
|
|
- * we must clear the debug registers if pmc13 has a value which enable
|
|
- * memory pipeline event constraints. In this case we need to clear the
|
|
- * the debug registers if they have not yet been accessed. This is required
|
|
- * to avoid picking stale state.
|
|
- * PMC13 is "active" if:
|
|
- * one of the pmc13.cfg_dbrpXX field is different from 0x3
|
|
- * AND
|
|
- * at the corresponding pmc13.ena_dbrpXX is set.
|
|
- */
|
|
- DPRINT(("cnum=%u val=0x%lx, using_dbreg=%d loaded=%d\n", cnum, *val, ctx->ctx_fl_using_dbreg, is_loaded));
|
|
-
|
|
- if (cnum == 13 && is_loaded
|
|
- && (*val & 0x1e00000000000UL) && (*val & 0x18181818UL) != 0x18181818UL && ctx->ctx_fl_using_dbreg == 0) {
|
|
-
|
|
- DPRINT(("pmc[%d]=0x%lx has active pmc13 settings, clearing dbr\n", cnum, *val));
|
|
-
|
|
- /* don't mix debug with perfmon */
|
|
- if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL;
|
|
-
|
|
- /*
|
|
- * a count of 0 will mark the debug registers as in use and also
|
|
- * ensure that they are properly cleared.
|
|
- */
|
|
- ret = pfm_write_ibr_dbr(PFM_DATA_RR, ctx, NULL, 0, regs);
|
|
- if (ret) return ret;
|
|
- }
|
|
- /*
|
|
- * we must clear the (instruction) debug registers if any pmc14.ibrpX bit is enabled
|
|
- * before they are (fl_using_dbreg==0) to avoid picking up stale information.
|
|
- */
|
|
- if (cnum == 14 && is_loaded && ((*val & 0x2222UL) != 0x2222UL) && ctx->ctx_fl_using_dbreg == 0) {
|
|
-
|
|
- DPRINT(("pmc[%d]=0x%lx has active pmc14 settings, clearing ibr\n", cnum, *val));
|
|
-
|
|
- /* don't mix debug with perfmon */
|
|
- if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL;
|
|
-
|
|
- /*
|
|
- * a count of 0 will mark the debug registers as in use and also
|
|
- * ensure that they are properly cleared.
|
|
- */
|
|
- ret = pfm_write_ibr_dbr(PFM_CODE_RR, ctx, NULL, 0, regs);
|
|
- if (ret) return ret;
|
|
-
|
|
- }
|
|
-
|
|
- switch(cnum) {
|
|
- case 4: *val |= 1UL << 23; /* force power enable bit */
|
|
- break;
|
|
- case 8: val8 = *val;
|
|
- val13 = ctx->ctx_pmcs[13];
|
|
- val14 = ctx->ctx_pmcs[14];
|
|
- check_case1 = 1;
|
|
- break;
|
|
- case 13: val8 = ctx->ctx_pmcs[8];
|
|
- val13 = *val;
|
|
- val14 = ctx->ctx_pmcs[14];
|
|
- check_case1 = 1;
|
|
- break;
|
|
- case 14: val8 = ctx->ctx_pmcs[8];
|
|
- val13 = ctx->ctx_pmcs[13];
|
|
- val14 = *val;
|
|
- check_case1 = 1;
|
|
- break;
|
|
- }
|
|
- /* check illegal configuration which can produce inconsistencies in tagging
|
|
- * i-side events in L1D and L2 caches
|
|
- */
|
|
- if (check_case1) {
|
|
- ret = ((val13 >> 45) & 0xf) == 0
|
|
- && ((val8 & 0x1) == 0)
|
|
- && ((((val14>>1) & 0x3) == 0x2 || ((val14>>1) & 0x3) == 0x0)
|
|
- ||(((val14>>4) & 0x3) == 0x2 || ((val14>>4) & 0x3) == 0x0));
|
|
-
|
|
- if (ret) DPRINT((KERN_DEBUG "perfmon: failure check_case1\n"));
|
|
- }
|
|
-
|
|
- return ret ? -EINVAL : 0;
|
|
-}
|
|
-
|
|
-/*
|
|
- * impl_pmcs, impl_pmds are computed at runtime to minimize errors!
|
|
- */
|
|
-static pmu_config_t pmu_conf_mck={
|
|
- .pmu_name = "Itanium 2",
|
|
- .pmu_family = 0x1f,
|
|
- .flags = PFM_PMU_IRQ_RESEND,
|
|
- .ovfl_val = (1UL << 47) - 1,
|
|
- .pmd_desc = pfm_mck_pmd_desc,
|
|
- .pmc_desc = pfm_mck_pmc_desc,
|
|
- .num_ibrs = 8,
|
|
- .num_dbrs = 8,
|
|
- .use_rr_dbregs = 1 /* debug register are use for range restrictions */
|
|
-};
|
|
-
|
|
-
|
|
diff --git a/arch/ia64/kernel/perfmon_montecito.h b/arch/ia64/kernel/perfmon_montecito.h
|
|
deleted file mode 100644
|
|
index 7f8da4c..0000000
|
|
--- a/arch/ia64/kernel/perfmon_montecito.h
|
|
+++ /dev/null
|
|
@@ -1,269 +0,0 @@
|
|
-/*
|
|
- * This file contains the Montecito PMU register description tables
|
|
- * and pmc checker used by perfmon.c.
|
|
- *
|
|
- * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P.
|
|
- * Contributed by Stephane Eranian <eranian@hpl.hp.com>
|
|
- */
|
|
-static int pfm_mont_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs);
|
|
-
|
|
-#define RDEP_MONT_ETB (RDEP(38)|RDEP(39)|RDEP(48)|RDEP(49)|RDEP(50)|RDEP(51)|RDEP(52)|RDEP(53)|RDEP(54)|\
|
|
- RDEP(55)|RDEP(56)|RDEP(57)|RDEP(58)|RDEP(59)|RDEP(60)|RDEP(61)|RDEP(62)|RDEP(63))
|
|
-#define RDEP_MONT_DEAR (RDEP(32)|RDEP(33)|RDEP(36))
|
|
-#define RDEP_MONT_IEAR (RDEP(34)|RDEP(35))
|
|
-
|
|
-static pfm_reg_desc_t pfm_mont_pmc_desc[PMU_MAX_PMCS]={
|
|
-/* pmc0 */ { PFM_REG_CONTROL , 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {0,0, 0, 0}},
|
|
-/* pmc1 */ { PFM_REG_CONTROL , 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {0,0, 0, 0}},
|
|
-/* pmc2 */ { PFM_REG_CONTROL , 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {0,0, 0, 0}},
|
|
-/* pmc3 */ { PFM_REG_CONTROL , 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {0,0, 0, 0}},
|
|
-/* pmc4 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(4),0, 0, 0}, {0,0, 0, 0}},
|
|
-/* pmc5 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(5),0, 0, 0}, {0,0, 0, 0}},
|
|
-/* pmc6 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(6),0, 0, 0}, {0,0, 0, 0}},
|
|
-/* pmc7 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(7),0, 0, 0}, {0,0, 0, 0}},
|
|
-/* pmc8 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(8),0, 0, 0}, {0,0, 0, 0}},
|
|
-/* pmc9 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(9),0, 0, 0}, {0,0, 0, 0}},
|
|
-/* pmc10 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(10),0, 0, 0}, {0,0, 0, 0}},
|
|
-/* pmc11 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(11),0, 0, 0}, {0,0, 0, 0}},
|
|
-/* pmc12 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(12),0, 0, 0}, {0,0, 0, 0}},
|
|
-/* pmc13 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(13),0, 0, 0}, {0,0, 0, 0}},
|
|
-/* pmc14 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(14),0, 0, 0}, {0,0, 0, 0}},
|
|
-/* pmc15 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(15),0, 0, 0}, {0,0, 0, 0}},
|
|
-/* pmc16 */ { PFM_REG_NOTIMPL, },
|
|
-/* pmc17 */ { PFM_REG_NOTIMPL, },
|
|
-/* pmc18 */ { PFM_REG_NOTIMPL, },
|
|
-/* pmc19 */ { PFM_REG_NOTIMPL, },
|
|
-/* pmc20 */ { PFM_REG_NOTIMPL, },
|
|
-/* pmc21 */ { PFM_REG_NOTIMPL, },
|
|
-/* pmc22 */ { PFM_REG_NOTIMPL, },
|
|
-/* pmc23 */ { PFM_REG_NOTIMPL, },
|
|
-/* pmc24 */ { PFM_REG_NOTIMPL, },
|
|
-/* pmc25 */ { PFM_REG_NOTIMPL, },
|
|
-/* pmc26 */ { PFM_REG_NOTIMPL, },
|
|
-/* pmc27 */ { PFM_REG_NOTIMPL, },
|
|
-/* pmc28 */ { PFM_REG_NOTIMPL, },
|
|
-/* pmc29 */ { PFM_REG_NOTIMPL, },
|
|
-/* pmc30 */ { PFM_REG_NOTIMPL, },
|
|
-/* pmc31 */ { PFM_REG_NOTIMPL, },
|
|
-/* pmc32 */ { PFM_REG_CONFIG, 0, 0x30f01ffffffffffUL, 0x30f01ffffffffffUL, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}},
|
|
-/* pmc33 */ { PFM_REG_CONFIG, 0, 0x0, 0x1ffffffffffUL, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}},
|
|
-/* pmc34 */ { PFM_REG_CONFIG, 0, 0xf01ffffffffffUL, 0xf01ffffffffffUL, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}},
|
|
-/* pmc35 */ { PFM_REG_CONFIG, 0, 0x0, 0x1ffffffffffUL, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}},
|
|
-/* pmc36 */ { PFM_REG_CONFIG, 0, 0xfffffff0, 0xf, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}},
|
|
-/* pmc37 */ { PFM_REG_MONITOR, 4, 0x0, 0x3fff, NULL, pfm_mont_pmc_check, {RDEP_MONT_IEAR, 0, 0, 0}, {0, 0, 0, 0}},
|
|
-/* pmc38 */ { PFM_REG_CONFIG, 0, 0xdb6, 0x2492, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}},
|
|
-/* pmc39 */ { PFM_REG_MONITOR, 6, 0x0, 0xffcf, NULL, pfm_mont_pmc_check, {RDEP_MONT_ETB,0, 0, 0}, {0,0, 0, 0}},
|
|
-/* pmc40 */ { PFM_REG_MONITOR, 6, 0x2000000, 0xf01cf, NULL, pfm_mont_pmc_check, {RDEP_MONT_DEAR,0, 0, 0}, {0,0, 0, 0}},
|
|
-/* pmc41 */ { PFM_REG_CONFIG, 0, 0x00002078fefefefeUL, 0x1e00018181818UL, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}},
|
|
-/* pmc42 */ { PFM_REG_MONITOR, 6, 0x0, 0x7ff4f, NULL, pfm_mont_pmc_check, {RDEP_MONT_ETB,0, 0, 0}, {0,0, 0, 0}},
|
|
- { PFM_REG_END , 0, 0x0, -1, NULL, NULL, {0,}, {0,}}, /* end marker */
|
|
-};
|
|
-
|
|
-static pfm_reg_desc_t pfm_mont_pmd_desc[PMU_MAX_PMDS]={
|
|
-/* pmd0 */ { PFM_REG_NOTIMPL, },
|
|
-/* pmd1 */ { PFM_REG_NOTIMPL, },
|
|
-/* pmd2 */ { PFM_REG_NOTIMPL, },
|
|
-/* pmd3 */ { PFM_REG_NOTIMPL, },
|
|
-/* pmd4 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(4),0, 0, 0}},
|
|
-/* pmd5 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(5),0, 0, 0}},
|
|
-/* pmd6 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(6),0, 0, 0}},
|
|
-/* pmd7 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(7),0, 0, 0}},
|
|
-/* pmd8 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(8),0, 0, 0}},
|
|
-/* pmd9 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(9),0, 0, 0}},
|
|
-/* pmd10 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(10),0, 0, 0}},
|
|
-/* pmd11 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(11),0, 0, 0}},
|
|
-/* pmd12 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(12),0, 0, 0}},
|
|
-/* pmd13 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(13),0, 0, 0}},
|
|
-/* pmd14 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(14),0, 0, 0}},
|
|
-/* pmd15 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(15),0, 0, 0}},
|
|
-/* pmd16 */ { PFM_REG_NOTIMPL, },
|
|
-/* pmd17 */ { PFM_REG_NOTIMPL, },
|
|
-/* pmd18 */ { PFM_REG_NOTIMPL, },
|
|
-/* pmd19 */ { PFM_REG_NOTIMPL, },
|
|
-/* pmd20 */ { PFM_REG_NOTIMPL, },
|
|
-/* pmd21 */ { PFM_REG_NOTIMPL, },
|
|
-/* pmd22 */ { PFM_REG_NOTIMPL, },
|
|
-/* pmd23 */ { PFM_REG_NOTIMPL, },
|
|
-/* pmd24 */ { PFM_REG_NOTIMPL, },
|
|
-/* pmd25 */ { PFM_REG_NOTIMPL, },
|
|
-/* pmd26 */ { PFM_REG_NOTIMPL, },
|
|
-/* pmd27 */ { PFM_REG_NOTIMPL, },
|
|
-/* pmd28 */ { PFM_REG_NOTIMPL, },
|
|
-/* pmd29 */ { PFM_REG_NOTIMPL, },
|
|
-/* pmd30 */ { PFM_REG_NOTIMPL, },
|
|
-/* pmd31 */ { PFM_REG_NOTIMPL, },
|
|
-/* pmd32 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP(33)|RDEP(36),0, 0, 0}, {RDEP(40),0, 0, 0}},
|
|
-/* pmd33 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP(32)|RDEP(36),0, 0, 0}, {RDEP(40),0, 0, 0}},
|
|
-/* pmd34 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP(35),0, 0, 0}, {RDEP(37),0, 0, 0}},
|
|
-/* pmd35 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP(34),0, 0, 0}, {RDEP(37),0, 0, 0}},
|
|
-/* pmd36 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP(32)|RDEP(33),0, 0, 0}, {RDEP(40),0, 0, 0}},
|
|
-/* pmd37 */ { PFM_REG_NOTIMPL, },
|
|
-/* pmd38 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
|
|
-/* pmd39 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
|
|
-/* pmd40 */ { PFM_REG_NOTIMPL, },
|
|
-/* pmd41 */ { PFM_REG_NOTIMPL, },
|
|
-/* pmd42 */ { PFM_REG_NOTIMPL, },
|
|
-/* pmd43 */ { PFM_REG_NOTIMPL, },
|
|
-/* pmd44 */ { PFM_REG_NOTIMPL, },
|
|
-/* pmd45 */ { PFM_REG_NOTIMPL, },
|
|
-/* pmd46 */ { PFM_REG_NOTIMPL, },
|
|
-/* pmd47 */ { PFM_REG_NOTIMPL, },
|
|
-/* pmd48 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
|
|
-/* pmd49 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
|
|
-/* pmd50 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
|
|
-/* pmd51 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
|
|
-/* pmd52 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
|
|
-/* pmd53 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
|
|
-/* pmd54 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
|
|
-/* pmd55 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
|
|
-/* pmd56 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
|
|
-/* pmd57 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
|
|
-/* pmd58 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
|
|
-/* pmd59 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
|
|
-/* pmd60 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
|
|
-/* pmd61 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
|
|
-/* pmd62 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
|
|
-/* pmd63 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}},
|
|
- { PFM_REG_END , 0, 0x0, -1, NULL, NULL, {0,}, {0,}}, /* end marker */
|
|
-};
|
|
-
|
|
-/*
|
|
- * PMC reserved fields must have their power-up values preserved
|
|
- */
|
|
-static int
|
|
-pfm_mont_reserved(unsigned int cnum, unsigned long *val, struct pt_regs *regs)
|
|
-{
|
|
- unsigned long tmp1, tmp2, ival = *val;
|
|
-
|
|
- /* remove reserved areas from user value */
|
|
- tmp1 = ival & PMC_RSVD_MASK(cnum);
|
|
-
|
|
- /* get reserved fields values */
|
|
- tmp2 = PMC_DFL_VAL(cnum) & ~PMC_RSVD_MASK(cnum);
|
|
-
|
|
- *val = tmp1 | tmp2;
|
|
-
|
|
- DPRINT(("pmc[%d]=0x%lx, mask=0x%lx, reset=0x%lx, val=0x%lx\n",
|
|
- cnum, ival, PMC_RSVD_MASK(cnum), PMC_DFL_VAL(cnum), *val));
|
|
- return 0;
|
|
-}
|
|
-
|
|
-/*
|
|
- * task can be NULL if the context is unloaded
|
|
- */
|
|
-static int
|
|
-pfm_mont_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs)
|
|
-{
|
|
- int ret = 0;
|
|
- unsigned long val32 = 0, val38 = 0, val41 = 0;
|
|
- unsigned long tmpval;
|
|
- int check_case1 = 0;
|
|
- int is_loaded;
|
|
-
|
|
- /* first preserve the reserved fields */
|
|
- pfm_mont_reserved(cnum, val, regs);
|
|
-
|
|
- tmpval = *val;
|
|
-
|
|
- /* sanity check */
|
|
- if (ctx == NULL) return -EINVAL;
|
|
-
|
|
- is_loaded = ctx->ctx_state == PFM_CTX_LOADED || ctx->ctx_state == PFM_CTX_MASKED;
|
|
-
|
|
- /*
|
|
- * we must clear the debug registers if pmc41 has a value which enable
|
|
- * memory pipeline event constraints. In this case we need to clear the
|
|
- * the debug registers if they have not yet been accessed. This is required
|
|
- * to avoid picking stale state.
|
|
- * PMC41 is "active" if:
|
|
- * one of the pmc41.cfg_dtagXX field is different from 0x3
|
|
- * AND
|
|
- * at the corresponding pmc41.en_dbrpXX is set.
|
|
- * AND
|
|
- * ctx_fl_using_dbreg == 0 (i.e., dbr not yet used)
|
|
- */
|
|
- DPRINT(("cnum=%u val=0x%lx, using_dbreg=%d loaded=%d\n", cnum, tmpval, ctx->ctx_fl_using_dbreg, is_loaded));
|
|
-
|
|
- if (cnum == 41 && is_loaded
|
|
- && (tmpval & 0x1e00000000000UL) && (tmpval & 0x18181818UL) != 0x18181818UL && ctx->ctx_fl_using_dbreg == 0) {
|
|
-
|
|
- DPRINT(("pmc[%d]=0x%lx has active pmc41 settings, clearing dbr\n", cnum, tmpval));
|
|
-
|
|
- /* don't mix debug with perfmon */
|
|
- if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL;
|
|
-
|
|
- /*
|
|
- * a count of 0 will mark the debug registers if:
|
|
- * AND
|
|
- */
|
|
- ret = pfm_write_ibr_dbr(PFM_DATA_RR, ctx, NULL, 0, regs);
|
|
- if (ret) return ret;
|
|
- }
|
|
- /*
|
|
- * we must clear the (instruction) debug registers if:
|
|
- * pmc38.ig_ibrpX is 0 (enabled)
|
|
- * AND
|
|
- * ctx_fl_using_dbreg == 0 (i.e., dbr not yet used)
|
|
- */
|
|
- if (cnum == 38 && is_loaded && ((tmpval & 0x492UL) != 0x492UL) && ctx->ctx_fl_using_dbreg == 0) {
|
|
-
|
|
- DPRINT(("pmc38=0x%lx has active pmc38 settings, clearing ibr\n", tmpval));
|
|
-
|
|
- /* don't mix debug with perfmon */
|
|
- if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL;
|
|
-
|
|
- /*
|
|
- * a count of 0 will mark the debug registers as in use and also
|
|
- * ensure that they are properly cleared.
|
|
- */
|
|
- ret = pfm_write_ibr_dbr(PFM_CODE_RR, ctx, NULL, 0, regs);
|
|
- if (ret) return ret;
|
|
-
|
|
- }
|
|
- switch(cnum) {
|
|
- case 32: val32 = *val;
|
|
- val38 = ctx->ctx_pmcs[38];
|
|
- val41 = ctx->ctx_pmcs[41];
|
|
- check_case1 = 1;
|
|
- break;
|
|
- case 38: val38 = *val;
|
|
- val32 = ctx->ctx_pmcs[32];
|
|
- val41 = ctx->ctx_pmcs[41];
|
|
- check_case1 = 1;
|
|
- break;
|
|
- case 41: val41 = *val;
|
|
- val32 = ctx->ctx_pmcs[32];
|
|
- val38 = ctx->ctx_pmcs[38];
|
|
- check_case1 = 1;
|
|
- break;
|
|
- }
|
|
- /* check illegal configuration which can produce inconsistencies in tagging
|
|
- * i-side events in L1D and L2 caches
|
|
- */
|
|
- if (check_case1) {
|
|
- ret = (((val41 >> 45) & 0xf) == 0 && ((val32>>57) & 0x1) == 0)
|
|
- && ((((val38>>1) & 0x3) == 0x2 || ((val38>>1) & 0x3) == 0)
|
|
- || (((val38>>4) & 0x3) == 0x2 || ((val38>>4) & 0x3) == 0));
|
|
- if (ret) {
|
|
- DPRINT(("invalid config pmc38=0x%lx pmc41=0x%lx pmc32=0x%lx\n", val38, val41, val32));
|
|
- return -EINVAL;
|
|
- }
|
|
- }
|
|
- *val = tmpval;
|
|
- return 0;
|
|
-}
|
|
-
|
|
-/*
|
|
- * impl_pmcs, impl_pmds are computed at runtime to minimize errors!
|
|
- */
|
|
-static pmu_config_t pmu_conf_mont={
|
|
- .pmu_name = "Montecito",
|
|
- .pmu_family = 0x20,
|
|
- .flags = PFM_PMU_IRQ_RESEND,
|
|
- .ovfl_val = (1UL << 47) - 1,
|
|
- .pmd_desc = pfm_mont_pmd_desc,
|
|
- .pmc_desc = pfm_mont_pmc_desc,
|
|
- .num_ibrs = 8,
|
|
- .num_dbrs = 8,
|
|
- .use_rr_dbregs = 1 /* debug register are use for range retrictions */
|
|
-};
|
|
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
|
|
index 3ab8373..a7dfb39 100644
|
|
--- a/arch/ia64/kernel/process.c
|
|
+++ b/arch/ia64/kernel/process.c
|
|
@@ -28,6 +28,7 @@
|
|
#include <linux/delay.h>
|
|
#include <linux/kdebug.h>
|
|
#include <linux/utsname.h>
|
|
+#include <linux/perfmon_kern.h>
|
|
|
|
#include <asm/cpu.h>
|
|
#include <asm/delay.h>
|
|
@@ -45,10 +46,6 @@
|
|
|
|
#include "entry.h"
|
|
|
|
-#ifdef CONFIG_PERFMON
|
|
-# include <asm/perfmon.h>
|
|
-#endif
|
|
-
|
|
#include "sigframe.h"
|
|
|
|
void (*ia64_mark_idle)(int);
|
|
@@ -162,10 +159,8 @@ show_regs (struct pt_regs *regs)
|
|
|
|
void tsk_clear_notify_resume(struct task_struct *tsk)
|
|
{
|
|
-#ifdef CONFIG_PERFMON
|
|
- if (tsk->thread.pfm_needs_checking)
|
|
+ if (test_ti_thread_flag(task_thread_info(tsk), TIF_PERFMON_WORK))
|
|
return;
|
|
-#endif
|
|
if (test_ti_thread_flag(task_thread_info(tsk), TIF_RESTORE_RSE))
|
|
return;
|
|
clear_ti_thread_flag(task_thread_info(tsk), TIF_NOTIFY_RESUME);
|
|
@@ -188,14 +183,9 @@ do_notify_resume_user(sigset_t *unused, struct sigscratch *scr, long in_syscall)
|
|
return;
|
|
}
|
|
|
|
-#ifdef CONFIG_PERFMON
|
|
- if (current->thread.pfm_needs_checking)
|
|
- /*
|
|
- * Note: pfm_handle_work() allow us to call it with interrupts
|
|
- * disabled, and may enable interrupts within the function.
|
|
- */
|
|
- pfm_handle_work();
|
|
-#endif
|
|
+ /* process perfmon asynchronous work (e.g. block thread or reset) */
|
|
+ if (test_thread_flag(TIF_PERFMON_WORK))
|
|
+ pfm_handle_work(task_pt_regs(current));
|
|
|
|
/* deal with pending signal delivery */
|
|
if (test_thread_flag(TIF_SIGPENDING)) {
|
|
@@ -212,22 +202,15 @@ do_notify_resume_user(sigset_t *unused, struct sigscratch *scr, long in_syscall)
|
|
local_irq_disable(); /* force interrupt disable */
|
|
}
|
|
|
|
-static int pal_halt = 1;
|
|
static int can_do_pal_halt = 1;
|
|
|
|
static int __init nohalt_setup(char * str)
|
|
{
|
|
- pal_halt = can_do_pal_halt = 0;
|
|
+ can_do_pal_halt = 0;
|
|
return 1;
|
|
}
|
|
__setup("nohalt", nohalt_setup);
|
|
|
|
-void
|
|
-update_pal_halt_status(int status)
|
|
-{
|
|
- can_do_pal_halt = pal_halt && status;
|
|
-}
|
|
-
|
|
/*
|
|
* We use this if we don't have any better idle routine..
|
|
*/
|
|
@@ -236,6 +219,22 @@ default_idle (void)
|
|
{
|
|
local_irq_enable();
|
|
while (!need_resched()) {
|
|
+#ifdef CONFIG_PERFMON
|
|
+ u64 psr = 0;
|
|
+ /*
|
|
+ * If requested, we stop the PMU to avoid
|
|
+ * measuring across the core idle loop.
|
|
+ *
|
|
+ * dcr.pp is not modified on purpose
|
|
+ * it is used when coming out of
|
|
+ * safe_halt() via interrupt
|
|
+ */
|
|
+ if ((__get_cpu_var(pfm_syst_info) & PFM_ITA_CPUINFO_IDLE_EXCL)) {
|
|
+ psr = ia64_getreg(_IA64_REG_PSR);
|
|
+ if (psr & IA64_PSR_PP)
|
|
+ ia64_rsm(IA64_PSR_PP);
|
|
+ }
|
|
+#endif
|
|
if (can_do_pal_halt) {
|
|
local_irq_disable();
|
|
if (!need_resched()) {
|
|
@@ -244,6 +243,12 @@ default_idle (void)
|
|
local_irq_enable();
|
|
} else
|
|
cpu_relax();
|
|
+#ifdef CONFIG_PERFMON
|
|
+ if ((__get_cpu_var(pfm_syst_info) & PFM_ITA_CPUINFO_IDLE_EXCL)) {
|
|
+ if (psr & IA64_PSR_PP)
|
|
+ ia64_ssm(IA64_PSR_PP);
|
|
+ }
|
|
+#endif
|
|
}
|
|
}
|
|
|
|
@@ -344,22 +349,9 @@ cpu_idle (void)
|
|
void
|
|
ia64_save_extra (struct task_struct *task)
|
|
{
|
|
-#ifdef CONFIG_PERFMON
|
|
- unsigned long info;
|
|
-#endif
|
|
-
|
|
if ((task->thread.flags & IA64_THREAD_DBG_VALID) != 0)
|
|
ia64_save_debug_regs(&task->thread.dbr[0]);
|
|
|
|
-#ifdef CONFIG_PERFMON
|
|
- if ((task->thread.flags & IA64_THREAD_PM_VALID) != 0)
|
|
- pfm_save_regs(task);
|
|
-
|
|
- info = __get_cpu_var(pfm_syst_info);
|
|
- if (info & PFM_CPUINFO_SYST_WIDE)
|
|
- pfm_syst_wide_update_task(task, info, 0);
|
|
-#endif
|
|
-
|
|
#ifdef CONFIG_IA32_SUPPORT
|
|
if (IS_IA32_PROCESS(task_pt_regs(task)))
|
|
ia32_save_state(task);
|
|
@@ -369,22 +361,9 @@ ia64_save_extra (struct task_struct *task)
|
|
void
|
|
ia64_load_extra (struct task_struct *task)
|
|
{
|
|
-#ifdef CONFIG_PERFMON
|
|
- unsigned long info;
|
|
-#endif
|
|
-
|
|
if ((task->thread.flags & IA64_THREAD_DBG_VALID) != 0)
|
|
ia64_load_debug_regs(&task->thread.dbr[0]);
|
|
|
|
-#ifdef CONFIG_PERFMON
|
|
- if ((task->thread.flags & IA64_THREAD_PM_VALID) != 0)
|
|
- pfm_load_regs(task);
|
|
-
|
|
- info = __get_cpu_var(pfm_syst_info);
|
|
- if (info & PFM_CPUINFO_SYST_WIDE)
|
|
- pfm_syst_wide_update_task(task, info, 1);
|
|
-#endif
|
|
-
|
|
#ifdef CONFIG_IA32_SUPPORT
|
|
if (IS_IA32_PROCESS(task_pt_regs(task)))
|
|
ia32_load_state(task);
|
|
@@ -510,8 +489,7 @@ copy_thread (int nr, unsigned long clone_flags,
|
|
* call behavior where scratch registers are preserved across
|
|
* system calls (unless used by the system call itself).
|
|
*/
|
|
-# define THREAD_FLAGS_TO_CLEAR (IA64_THREAD_FPH_VALID | IA64_THREAD_DBG_VALID \
|
|
- | IA64_THREAD_PM_VALID)
|
|
+# define THREAD_FLAGS_TO_CLEAR (IA64_THREAD_FPH_VALID | IA64_THREAD_DBG_VALID)
|
|
# define THREAD_FLAGS_TO_SET 0
|
|
p->thread.flags = ((current->thread.flags & ~THREAD_FLAGS_TO_CLEAR)
|
|
| THREAD_FLAGS_TO_SET);
|
|
@@ -533,10 +511,8 @@ copy_thread (int nr, unsigned long clone_flags,
|
|
}
|
|
#endif
|
|
|
|
-#ifdef CONFIG_PERFMON
|
|
- if (current->thread.pfm_context)
|
|
- pfm_inherit(p, child_ptregs);
|
|
-#endif
|
|
+ pfm_copy_thread(p);
|
|
+
|
|
return retval;
|
|
}
|
|
|
|
@@ -745,15 +721,13 @@ exit_thread (void)
|
|
{
|
|
|
|
ia64_drop_fpu(current);
|
|
-#ifdef CONFIG_PERFMON
|
|
- /* if needed, stop monitoring and flush state to perfmon context */
|
|
- if (current->thread.pfm_context)
|
|
- pfm_exit_thread(current);
|
|
+
|
|
+ /* if needed, stop monitoring and flush state to perfmon context */
|
|
+ pfm_exit_thread();
|
|
|
|
/* free debug register resources */
|
|
- if (current->thread.flags & IA64_THREAD_DBG_VALID)
|
|
- pfm_release_debug_registers(current);
|
|
-#endif
|
|
+ pfm_release_dbregs(current);
|
|
+
|
|
if (IS_IA32_PROCESS(task_pt_regs(current)))
|
|
ia32_drop_ia64_partial_page_list(current);
|
|
}
|
|
diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c
|
|
index 2a9943b..bb1ca1e 100644
|
|
--- a/arch/ia64/kernel/ptrace.c
|
|
+++ b/arch/ia64/kernel/ptrace.c
|
|
@@ -20,6 +20,7 @@
|
|
#include <linux/security.h>
|
|
#include <linux/audit.h>
|
|
#include <linux/signal.h>
|
|
+#include <linux/perfmon_kern.h>
|
|
#include <linux/regset.h>
|
|
#include <linux/elf.h>
|
|
|
|
@@ -30,9 +31,6 @@
|
|
#include <asm/system.h>
|
|
#include <asm/uaccess.h>
|
|
#include <asm/unwind.h>
|
|
-#ifdef CONFIG_PERFMON
|
|
-#include <asm/perfmon.h>
|
|
-#endif
|
|
|
|
#include "entry.h"
|
|
|
|
@@ -2124,7 +2122,6 @@ access_uarea(struct task_struct *child, unsigned long addr,
|
|
"address 0x%lx\n", addr);
|
|
return -1;
|
|
}
|
|
-#ifdef CONFIG_PERFMON
|
|
/*
|
|
* Check if debug registers are used by perfmon. This
|
|
* test must be done once we know that we can do the
|
|
@@ -2142,9 +2139,8 @@ access_uarea(struct task_struct *child, unsigned long addr,
|
|
* IA64_THREAD_DBG_VALID. The registers are restored
|
|
* by the PMU context switch code.
|
|
*/
|
|
- if (pfm_use_debug_registers(child))
|
|
+ if (pfm_use_dbregs(child))
|
|
return -1;
|
|
-#endif
|
|
|
|
if (!(child->thread.flags & IA64_THREAD_DBG_VALID)) {
|
|
child->thread.flags |= IA64_THREAD_DBG_VALID;
|
|
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
|
|
index de636b2..677fa68 100644
|
|
--- a/arch/ia64/kernel/setup.c
|
|
+++ b/arch/ia64/kernel/setup.c
|
|
@@ -45,6 +45,7 @@
|
|
#include <linux/cpufreq.h>
|
|
#include <linux/kexec.h>
|
|
#include <linux/crash_dump.h>
|
|
+#include <linux/perfmon_kern.h>
|
|
|
|
#include <asm/ia32.h>
|
|
#include <asm/machvec.h>
|
|
@@ -1051,6 +1052,8 @@ cpu_init (void)
|
|
}
|
|
platform_cpu_init();
|
|
pm_idle = default_idle;
|
|
+
|
|
+ pfm_init_percpu();
|
|
}
|
|
|
|
void __init
|
|
diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c
|
|
index d8f05e5..3d7a739 100644
|
|
--- a/arch/ia64/kernel/smpboot.c
|
|
+++ b/arch/ia64/kernel/smpboot.c
|
|
@@ -39,6 +39,7 @@
|
|
#include <linux/efi.h>
|
|
#include <linux/percpu.h>
|
|
#include <linux/bitops.h>
|
|
+#include <linux/perfmon_kern.h>
|
|
|
|
#include <asm/atomic.h>
|
|
#include <asm/cache.h>
|
|
@@ -381,10 +382,6 @@ smp_callin (void)
|
|
extern void ia64_init_itm(void);
|
|
extern volatile int time_keeper_id;
|
|
|
|
-#ifdef CONFIG_PERFMON
|
|
- extern void pfm_init_percpu(void);
|
|
-#endif
|
|
-
|
|
cpuid = smp_processor_id();
|
|
phys_id = hard_smp_processor_id();
|
|
itc_master = time_keeper_id;
|
|
@@ -410,10 +407,6 @@ smp_callin (void)
|
|
|
|
ia64_mca_cmc_vector_setup(); /* Setup vector on AP */
|
|
|
|
-#ifdef CONFIG_PERFMON
|
|
- pfm_init_percpu();
|
|
-#endif
|
|
-
|
|
local_irq_enable();
|
|
|
|
if (!(sal_platform_features & IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT)) {
|
|
@@ -751,6 +744,7 @@ int __cpu_disable(void)
|
|
cpu_clear(cpu, cpu_online_map);
|
|
local_flush_tlb_all();
|
|
cpu_clear(cpu, cpu_callin_map);
|
|
+ pfm_cpu_disable();
|
|
return 0;
|
|
}
|
|
|
|
diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c
|
|
index bcbb6d8..a0ed33a 100644
|
|
--- a/arch/ia64/kernel/sys_ia64.c
|
|
+++ b/arch/ia64/kernel/sys_ia64.c
|
|
@@ -284,3 +284,11 @@ sys_pciconfig_write (unsigned long bus, unsigned long dfn, unsigned long off, un
|
|
}
|
|
|
|
#endif /* CONFIG_PCI */
|
|
+
|
|
+#ifndef CONFIG_IA64_PERFMON_COMPAT
|
|
+asmlinkage long
|
|
+sys_perfmonctl (int fd, int cmd, void __user *arg, int count)
|
|
+{
|
|
+ return -ENOSYS;
|
|
+}
|
|
+#endif
|
|
diff --git a/arch/ia64/lib/Makefile b/arch/ia64/lib/Makefile
|
|
index 98771e2..077fd09 100644
|
|
--- a/arch/ia64/lib/Makefile
|
|
+++ b/arch/ia64/lib/Makefile
|
|
@@ -13,7 +13,6 @@ lib-y := __divsi3.o __udivsi3.o __modsi3.o __umodsi3.o \
|
|
|
|
obj-$(CONFIG_ITANIUM) += copy_page.o copy_user.o memcpy.o
|
|
obj-$(CONFIG_MCKINLEY) += copy_page_mck.o memcpy_mck.o
|
|
-lib-$(CONFIG_PERFMON) += carta_random.o
|
|
|
|
AFLAGS___divdi3.o =
|
|
AFLAGS___udivdi3.o = -DUNSIGNED
|
|
diff --git a/arch/ia64/oprofile/init.c b/arch/ia64/oprofile/init.c
|
|
index 125a602..892de6a 100644
|
|
--- a/arch/ia64/oprofile/init.c
|
|
+++ b/arch/ia64/oprofile/init.c
|
|
@@ -12,8 +12,8 @@
|
|
#include <linux/init.h>
|
|
#include <linux/errno.h>
|
|
|
|
-extern int perfmon_init(struct oprofile_operations * ops);
|
|
-extern void perfmon_exit(void);
|
|
+extern int op_perfmon_init(struct oprofile_operations * ops);
|
|
+extern void op_perfmon_exit(void);
|
|
extern void ia64_backtrace(struct pt_regs * const regs, unsigned int depth);
|
|
|
|
int __init oprofile_arch_init(struct oprofile_operations * ops)
|
|
@@ -22,7 +22,7 @@ int __init oprofile_arch_init(struct oprofile_operations * ops)
|
|
|
|
#ifdef CONFIG_PERFMON
|
|
/* perfmon_init() can fail, but we have no way to report it */
|
|
- ret = perfmon_init(ops);
|
|
+ ret = op_perfmon_init(ops);
|
|
#endif
|
|
ops->backtrace = ia64_backtrace;
|
|
|
|
@@ -33,6 +33,6 @@ int __init oprofile_arch_init(struct oprofile_operations * ops)
|
|
void oprofile_arch_exit(void)
|
|
{
|
|
#ifdef CONFIG_PERFMON
|
|
- perfmon_exit();
|
|
+ op_perfmon_exit();
|
|
#endif
|
|
}
|
|
diff --git a/arch/ia64/oprofile/perfmon.c b/arch/ia64/oprofile/perfmon.c
|
|
index bc41dd3..6fa9d17 100644
|
|
--- a/arch/ia64/oprofile/perfmon.c
|
|
+++ b/arch/ia64/oprofile/perfmon.c
|
|
@@ -10,25 +10,30 @@
|
|
#include <linux/kernel.h>
|
|
#include <linux/oprofile.h>
|
|
#include <linux/sched.h>
|
|
-#include <asm/perfmon.h>
|
|
+#include <linux/module.h>
|
|
+#include <linux/perfmon_kern.h>
|
|
#include <asm/ptrace.h>
|
|
#include <asm/errno.h>
|
|
|
|
static int allow_ints;
|
|
|
|
static int
|
|
-perfmon_handler(struct task_struct *task, void *buf, pfm_ovfl_arg_t *arg,
|
|
- struct pt_regs *regs, unsigned long stamp)
|
|
+perfmon_handler(struct pfm_context *ctx,
|
|
+ unsigned long ip, u64 stamp, void *data)
|
|
{
|
|
- int event = arg->pmd_eventid;
|
|
+ struct pt_regs *regs;
|
|
+ struct pfm_ovfl_arg *arg;
|
|
+
|
|
+ regs = data;
|
|
+ arg = &ctx->ovfl_arg;
|
|
|
|
- arg->ovfl_ctrl.bits.reset_ovfl_pmds = 1;
|
|
+ arg->ovfl_ctrl = PFM_OVFL_CTRL_RESET;
|
|
|
|
/* the owner of the oprofile event buffer may have exited
|
|
* without perfmon being shutdown (e.g. SIGSEGV)
|
|
*/
|
|
if (allow_ints)
|
|
- oprofile_add_sample(regs, event);
|
|
+ oprofile_add_sample(regs, arg->pmd_eventid);
|
|
return 0;
|
|
}
|
|
|
|
@@ -45,17 +50,13 @@ static void perfmon_stop(void)
|
|
allow_ints = 0;
|
|
}
|
|
|
|
-
|
|
-#define OPROFILE_FMT_UUID { \
|
|
- 0x77, 0x7a, 0x6e, 0x61, 0x20, 0x65, 0x73, 0x69, 0x74, 0x6e, 0x72, 0x20, 0x61, 0x65, 0x0a, 0x6c }
|
|
-
|
|
-static pfm_buffer_fmt_t oprofile_fmt = {
|
|
- .fmt_name = "oprofile_format",
|
|
- .fmt_uuid = OPROFILE_FMT_UUID,
|
|
- .fmt_handler = perfmon_handler,
|
|
+static struct pfm_smpl_fmt oprofile_fmt = {
|
|
+ .fmt_name = "OProfile",
|
|
+ .fmt_handler = perfmon_handler,
|
|
+ .fmt_flags = PFM_FMT_BUILTIN_FLAG,
|
|
+ .owner = THIS_MODULE
|
|
};
|
|
|
|
-
|
|
static char * get_cpu_type(void)
|
|
{
|
|
__u8 family = local_cpu_data->family;
|
|
@@ -75,9 +76,9 @@ static char * get_cpu_type(void)
|
|
|
|
static int using_perfmon;
|
|
|
|
-int perfmon_init(struct oprofile_operations * ops)
|
|
+int __init op_perfmon_init(struct oprofile_operations * ops)
|
|
{
|
|
- int ret = pfm_register_buffer_fmt(&oprofile_fmt);
|
|
+ int ret = pfm_fmt_register(&oprofile_fmt);
|
|
if (ret)
|
|
return -ENODEV;
|
|
|
|
@@ -90,10 +91,10 @@ int perfmon_init(struct oprofile_operations * ops)
|
|
}
|
|
|
|
|
|
-void perfmon_exit(void)
|
|
+void __exit op_perfmon_exit(void)
|
|
{
|
|
if (!using_perfmon)
|
|
return;
|
|
|
|
- pfm_unregister_buffer_fmt(oprofile_fmt.fmt_uuid);
|
|
+ pfm_fmt_unregister(&oprofile_fmt);
|
|
}
|
|
diff --git a/arch/ia64/perfmon/Kconfig b/arch/ia64/perfmon/Kconfig
|
|
new file mode 100644
|
|
index 0000000..99c68bd
|
|
--- /dev/null
|
|
+++ b/arch/ia64/perfmon/Kconfig
|
|
@@ -0,0 +1,67 @@
|
|
+menu "Hardware Performance Monitoring support"
|
|
+config PERFMON
|
|
+ bool "Perfmon2 performance monitoring interface"
|
|
+ default n
|
|
+ help
|
|
+ Enables the perfmon2 interface to access the hardware
|
|
+ performance counters. See <http://perfmon2.sf.net/> for
|
|
+ more details.
|
|
+
|
|
+config PERFMON_DEBUG
|
|
+ bool "Perfmon debugging"
|
|
+ default n
|
|
+ depends on PERFMON
|
|
+ help
|
|
+ Enables perfmon debugging support
|
|
+
|
|
+config PERFMON_DEBUG_FS
|
|
+ bool "Enable perfmon statistics reporting via debugfs"
|
|
+ default y
|
|
+ depends on PERFMON && DEBUG_FS
|
|
+ help
|
|
+ Enable collection and reporting of perfmon timing statistics under
|
|
+ debugfs. This is used for debugging and performance analysis of the
|
|
+ subsystem. The debugfs filesystem must be mounted.
|
|
+
|
|
+config IA64_PERFMON_COMPAT
|
|
+ bool "Enable old perfmon-2 compatbility mode"
|
|
+ default n
|
|
+ depends on PERFMON
|
|
+ help
|
|
+ Enable this option to allow performance tools which used the old
|
|
+ perfmon-2 interface to continue to work. Old tools are those using
|
|
+ the obsolete commands and arguments. Check your programs and look
|
|
+ in include/asm-ia64/perfmon_compat.h for more information.
|
|
+
|
|
+config IA64_PERFMON_GENERIC
|
|
+ tristate "Generic IA-64 PMU support"
|
|
+ depends on PERFMON
|
|
+ default n
|
|
+ help
|
|
+ Enables generic IA-64 PMU support.
|
|
+ The generic PMU is defined by the IA-64 architecture document.
|
|
+ This option should only be necessary when running with a PMU that
|
|
+ is not yet explicitely supported. Even then, there is no guarantee
|
|
+ that this support will work.
|
|
+
|
|
+config IA64_PERFMON_ITANIUM
|
|
+ tristate "Itanium (Merced) Performance Monitoring support"
|
|
+ depends on PERFMON
|
|
+ default n
|
|
+ help
|
|
+ Enables Itanium (Merced) PMU support.
|
|
+
|
|
+config IA64_PERFMON_MCKINLEY
|
|
+ tristate "Itanium 2 (McKinley) Performance Monitoring support"
|
|
+ depends on PERFMON
|
|
+ default n
|
|
+ help
|
|
+ Enables Itanium 2 (McKinley, Madison, Deerfield) PMU support.
|
|
+
|
|
+config IA64_PERFMON_MONTECITO
|
|
+ tristate "Itanium 2 9000 (Montecito) Performance Monitoring support"
|
|
+ depends on PERFMON
|
|
+ default n
|
|
+ help
|
|
+ Enables support for Itanium 2 9000 (Montecito) PMU.
|
|
+endmenu
|
|
diff --git a/arch/ia64/perfmon/Makefile b/arch/ia64/perfmon/Makefile
|
|
new file mode 100644
|
|
index 0000000..c9cdf9f
|
|
--- /dev/null
|
|
+++ b/arch/ia64/perfmon/Makefile
|
|
@@ -0,0 +1,11 @@
|
|
+#
|
|
+# Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P.
|
|
+# Contributed by Stephane Eranian <eranian@hpl.hp.com>
|
|
+#
|
|
+obj-$(CONFIG_PERFMON) += perfmon.o
|
|
+obj-$(CONFIG_IA64_PERFMON_COMPAT) += perfmon_default_smpl.o \
|
|
+ perfmon_compat.o
|
|
+obj-$(CONFIG_IA64_PERFMON_GENERIC) += perfmon_generic.o
|
|
+obj-$(CONFIG_IA64_PERFMON_ITANIUM) += perfmon_itanium.o
|
|
+obj-$(CONFIG_IA64_PERFMON_MCKINLEY) += perfmon_mckinley.o
|
|
+obj-$(CONFIG_IA64_PERFMON_MONTECITO) += perfmon_montecito.o
|
|
diff --git a/arch/ia64/perfmon/perfmon.c b/arch/ia64/perfmon/perfmon.c
|
|
new file mode 100644
|
|
index 0000000..3f59410
|
|
--- /dev/null
|
|
+++ b/arch/ia64/perfmon/perfmon.c
|
|
@@ -0,0 +1,946 @@
|
|
+/*
|
|
+ * This file implements the IA-64 specific
|
|
+ * support for the perfmon2 interface
|
|
+ *
|
|
+ * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P.
|
|
+ * Contributed by Stephane Eranian <eranian@hpl.hp.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of version 2 of the GNU General Public
|
|
+ * License as published by the Free Software Foundation.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ * General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
+ * 02111-1307 USA
|
|
+ */
|
|
+#include <linux/module.h>
|
|
+#include <linux/perfmon_kern.h>
|
|
+
|
|
+struct pfm_arch_session {
|
|
+ u32 pfs_sys_use_dbr; /* syswide session uses dbr */
|
|
+ u32 pfs_ptrace_use_dbr; /* a thread uses dbr via ptrace()*/
|
|
+};
|
|
+
|
|
+DEFINE_PER_CPU(u32, pfm_syst_info);
|
|
+
|
|
+static struct pfm_arch_session pfm_arch_sessions;
|
|
+static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pfm_arch_sessions_lock);
|
|
+
|
|
+static inline void pfm_clear_psr_pp(void)
|
|
+{
|
|
+ ia64_rsm(IA64_PSR_PP);
|
|
+}
|
|
+
|
|
+static inline void pfm_set_psr_pp(void)
|
|
+{
|
|
+ ia64_ssm(IA64_PSR_PP);
|
|
+}
|
|
+
|
|
+static inline void pfm_clear_psr_up(void)
|
|
+{
|
|
+ ia64_rsm(IA64_PSR_UP);
|
|
+}
|
|
+
|
|
+static inline void pfm_set_psr_up(void)
|
|
+{
|
|
+ ia64_ssm(IA64_PSR_UP);
|
|
+}
|
|
+
|
|
+static inline void pfm_set_psr_l(u64 val)
|
|
+{
|
|
+ ia64_setreg(_IA64_REG_PSR_L, val);
|
|
+}
|
|
+
|
|
+static inline void pfm_restore_ibrs(u64 *ibrs, unsigned int nibrs)
|
|
+{
|
|
+ unsigned int i;
|
|
+
|
|
+ for (i = 0; i < nibrs; i++) {
|
|
+ ia64_set_ibr(i, ibrs[i]);
|
|
+ ia64_dv_serialize_instruction();
|
|
+ }
|
|
+ ia64_srlz_i();
|
|
+}
|
|
+
|
|
+static inline void pfm_restore_dbrs(u64 *dbrs, unsigned int ndbrs)
|
|
+{
|
|
+ unsigned int i;
|
|
+
|
|
+ for (i = 0; i < ndbrs; i++) {
|
|
+ ia64_set_dbr(i, dbrs[i]);
|
|
+ ia64_dv_serialize_data();
|
|
+ }
|
|
+ ia64_srlz_d();
|
|
+}
|
|
+
|
|
+irqreturn_t pmu_interrupt_handler(int irq, void *arg)
|
|
+{
|
|
+ struct pt_regs *regs;
|
|
+ regs = get_irq_regs();
|
|
+ irq_enter();
|
|
+ pfm_interrupt_handler(instruction_pointer(regs), regs);
|
|
+ irq_exit();
|
|
+ return IRQ_HANDLED;
|
|
+}
|
|
+static struct irqaction perfmon_irqaction = {
|
|
+ .handler = pmu_interrupt_handler,
|
|
+ .flags = IRQF_DISABLED, /* means keep interrupts masked */
|
|
+ .name = "perfmon"
|
|
+};
|
|
+
|
|
+void pfm_arch_quiesce_pmu_percpu(void)
|
|
+{
|
|
+ u64 dcr;
|
|
+ /*
|
|
+ * make sure no measurement is active
|
|
+ * (may inherit programmed PMCs from EFI).
|
|
+ */
|
|
+ pfm_clear_psr_pp();
|
|
+ pfm_clear_psr_up();
|
|
+
|
|
+ /*
|
|
+ * ensure dcr.pp is cleared
|
|
+ */
|
|
+ dcr = ia64_getreg(_IA64_REG_CR_DCR);
|
|
+ ia64_setreg(_IA64_REG_CR_DCR, dcr & ~IA64_DCR_PP);
|
|
+
|
|
+ /*
|
|
+ * we run with the PMU not frozen at all times
|
|
+ */
|
|
+ ia64_set_pmc(0, 0);
|
|
+ ia64_srlz_d();
|
|
+}
|
|
+
|
|
+void pfm_arch_init_percpu(void)
|
|
+{
|
|
+ pfm_arch_quiesce_pmu_percpu();
|
|
+ /*
|
|
+ * program PMU interrupt vector
|
|
+ */
|
|
+ ia64_setreg(_IA64_REG_CR_PMV, IA64_PERFMON_VECTOR);
|
|
+ ia64_srlz_d();
|
|
+}
|
|
+
|
|
+int pfm_arch_context_create(struct pfm_context *ctx, u32 ctx_flags)
|
|
+{
|
|
+ struct pfm_arch_context *ctx_arch;
|
|
+
|
|
+ ctx_arch = pfm_ctx_arch(ctx);
|
|
+
|
|
+ ctx_arch->flags.use_dbr = 0;
|
|
+ ctx_arch->flags.insecure = (ctx_flags & PFM_ITA_FL_INSECURE) ? 1: 0;
|
|
+
|
|
+ PFM_DBG("insecure=%d", ctx_arch->flags.insecure);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Called from pfm_ctxsw(). Task is guaranteed to be current.
|
|
+ * Context is locked. Interrupts are masked. Monitoring may be active.
|
|
+ * PMU access is guaranteed. PMC and PMD registers are live in PMU.
|
|
+ *
|
|
+ * Return:
|
|
+ * non-zero : did not save PMDs (as part of stopping the PMU)
|
|
+ * 0 : saved PMDs (no need to save them in caller)
|
|
+ */
|
|
+int pfm_arch_ctxswout_thread(struct task_struct *task, struct pfm_context *ctx)
|
|
+{
|
|
+ struct pfm_arch_context *ctx_arch;
|
|
+ struct pfm_event_set *set;
|
|
+ u64 psr, tmp;
|
|
+
|
|
+ ctx_arch = pfm_ctx_arch(ctx);
|
|
+ set = ctx->active_set;
|
|
+
|
|
+ /*
|
|
+ * save current PSR: needed because we modify it
|
|
+ */
|
|
+ ia64_srlz_d();
|
|
+ psr = ia64_getreg(_IA64_REG_PSR);
|
|
+
|
|
+ /*
|
|
+ * stop monitoring:
|
|
+ * This is the last instruction which may generate an overflow
|
|
+ *
|
|
+ * we do not clear ipsr.up
|
|
+ */
|
|
+ pfm_clear_psr_up();
|
|
+ ia64_srlz_d();
|
|
+
|
|
+ /*
|
|
+ * extract overflow status bits
|
|
+ */
|
|
+ tmp = ia64_get_pmc(0) & ~0xf;
|
|
+
|
|
+ /*
|
|
+ * keep a copy of psr.up (for reload)
|
|
+ */
|
|
+ ctx_arch->ctx_saved_psr_up = psr & IA64_PSR_UP;
|
|
+
|
|
+ /*
|
|
+ * save overflow status bits
|
|
+ */
|
|
+ set->povfl_pmds[0] = tmp;
|
|
+
|
|
+ /*
|
|
+ * record how many pending overflows
|
|
+ * XXX: assume identity mapping for counters
|
|
+ */
|
|
+ set->npend_ovfls = ia64_popcnt(tmp);
|
|
+
|
|
+ /*
|
|
+ * make sure the PMU is unfrozen for the next task
|
|
+ */
|
|
+ if (set->npend_ovfls) {
|
|
+ ia64_set_pmc(0, 0);
|
|
+ ia64_srlz_d();
|
|
+ }
|
|
+ return 1;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Called from pfm_ctxsw(). Task is guaranteed to be current.
|
|
+ * set cannot be NULL. Context is locked. Interrupts are masked.
|
|
+ * Caller has already restored all PMD and PMC registers.
|
|
+ *
|
|
+ * must reactivate monitoring
|
|
+ */
|
|
+void pfm_arch_ctxswin_thread(struct task_struct *task, struct pfm_context *ctx)
|
|
+{
|
|
+ struct pfm_arch_context *ctx_arch;
|
|
+
|
|
+ ctx_arch = pfm_ctx_arch(ctx);
|
|
+
|
|
+ /*
|
|
+ * when monitoring is not explicitly started
|
|
+ * then psr_up = 0, in which case we do not
|
|
+ * need to restore
|
|
+ */
|
|
+ if (likely(ctx_arch->ctx_saved_psr_up)) {
|
|
+ pfm_set_psr_up();
|
|
+ ia64_srlz_d();
|
|
+ }
|
|
+}
|
|
+
|
|
+int pfm_arch_reserve_session(struct pfm_context *ctx, u32 cpu)
|
|
+{
|
|
+ struct pfm_arch_context *ctx_arch;
|
|
+ int is_system;
|
|
+ int ret = 0;
|
|
+
|
|
+ ctx_arch = pfm_ctx_arch(ctx);
|
|
+ is_system = ctx->flags.system;
|
|
+
|
|
+ spin_lock(&pfm_arch_sessions_lock);
|
|
+
|
|
+ if (is_system && ctx_arch->flags.use_dbr) {
|
|
+ PFM_DBG("syswide context uses dbregs");
|
|
+
|
|
+ if (pfm_arch_sessions.pfs_ptrace_use_dbr) {
|
|
+ PFM_DBG("cannot reserve syswide context: "
|
|
+ "dbregs in use by ptrace");
|
|
+ ret = -EBUSY;
|
|
+ } else {
|
|
+ pfm_arch_sessions.pfs_sys_use_dbr++;
|
|
+ }
|
|
+ }
|
|
+ spin_unlock(&pfm_arch_sessions_lock);
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+void pfm_arch_release_session(struct pfm_context *ctx, u32 cpu)
|
|
+{
|
|
+ struct pfm_arch_context *ctx_arch;
|
|
+ int is_system;
|
|
+
|
|
+ ctx_arch = pfm_ctx_arch(ctx);
|
|
+ is_system = ctx->flags.system;
|
|
+
|
|
+ spin_lock(&pfm_arch_sessions_lock);
|
|
+
|
|
+ if (is_system && ctx_arch->flags.use_dbr)
|
|
+ pfm_arch_sessions.pfs_sys_use_dbr--;
|
|
+ spin_unlock(&pfm_arch_sessions_lock);
|
|
+}
|
|
+
|
|
+/*
|
|
+ * function called from pfm_load_context_*(). Task is not guaranteed to be
|
|
+ * current task. If not then other task is guaranteed stopped and off any CPU.
|
|
+ * context is locked and interrupts are masked.
|
|
+ *
|
|
+ * On PFM_LOAD_CONTEXT, the interface guarantees monitoring is stopped.
|
|
+ *
|
|
+ * For system-wide task is NULL
|
|
+ */
|
|
+int pfm_arch_load_context(struct pfm_context *ctx)
|
|
+{
|
|
+ struct pfm_arch_context *ctx_arch;
|
|
+ struct pt_regs *regs;
|
|
+ int ret = 0;
|
|
+
|
|
+ ctx_arch = pfm_ctx_arch(ctx);
|
|
+
|
|
+ /*
|
|
+ * cannot load a context which is using range restrictions,
|
|
+ * into a thread that is being debugged.
|
|
+ *
|
|
+ * if one set out of several is using the debug registers, then
|
|
+ * we assume the context as whole is using them.
|
|
+ */
|
|
+ if (ctx_arch->flags.use_dbr) {
|
|
+ if (ctx->flags.system) {
|
|
+ spin_lock(&pfm_arch_sessions_lock);
|
|
+
|
|
+ if (pfm_arch_sessions.pfs_ptrace_use_dbr) {
|
|
+ PFM_DBG("cannot reserve syswide context: "
|
|
+ "dbregs in use by ptrace");
|
|
+ ret = -EBUSY;
|
|
+ } else {
|
|
+ pfm_arch_sessions.pfs_sys_use_dbr++;
|
|
+ PFM_DBG("pfs_sys_use_dbr=%u",
|
|
+ pfm_arch_sessions.pfs_sys_use_dbr);
|
|
+ }
|
|
+ spin_unlock(&pfm_arch_sessions_lock);
|
|
+
|
|
+ } else if (ctx->task->thread.flags & IA64_THREAD_DBG_VALID) {
|
|
+ PFM_DBG("load_pid [%d] thread is debugged, cannot "
|
|
+ "use range restrictions", ctx->task->pid);
|
|
+ ret = -EBUSY;
|
|
+ }
|
|
+ if (ret)
|
|
+ return ret;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * We need to intervene on context switch to toggle the
|
|
+ * psr.pp bit in system-wide. As such, we set the TIF
|
|
+ * flag so that pfm_arch_ctxswout_sys() and the
|
|
+ * pfm_arch_ctxswin_sys() functions get called
|
|
+ * from pfm_ctxsw_sys();
|
|
+ */
|
|
+ if (ctx->flags.system) {
|
|
+ set_thread_flag(TIF_PERFMON_CTXSW);
|
|
+ PFM_DBG("[%d] set TIF", current->pid);
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+ regs = task_pt_regs(ctx->task);
|
|
+
|
|
+ /*
|
|
+ * self-monitoring systematically allows user level control
|
|
+ */
|
|
+ if (ctx->task != current) {
|
|
+ /*
|
|
+ * when not current, task is stopped, so this is safe
|
|
+ */
|
|
+ ctx_arch->ctx_saved_psr_up = 0;
|
|
+ ia64_psr(regs)->up = ia64_psr(regs)->pp = 0;
|
|
+ } else
|
|
+ ctx_arch->flags.insecure = 1;
|
|
+
|
|
+ /*
|
|
+ * allow user level control (start/stop/read pmd) if:
|
|
+ * - self-monitoring
|
|
+ * - requested at context creation (PFM_IA64_FL_INSECURE)
|
|
+ *
|
|
+ * There is not security hole with PFM_IA64_FL_INSECURE because
|
|
+ * when not self-monitored, the caller must have permissions to
|
|
+ * attached to the task.
|
|
+ */
|
|
+ if (ctx_arch->flags.insecure) {
|
|
+ ia64_psr(regs)->sp = 0;
|
|
+ PFM_DBG("clearing psr.sp for [%d]", ctx->task->pid);
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int pfm_arch_setfl_sane(struct pfm_context *ctx, u32 flags)
|
|
+{
|
|
+#define PFM_SETFL_BOTH_SWITCH (PFM_SETFL_OVFL_SWITCH|PFM_SETFL_TIME_SWITCH)
|
|
+#define PFM_ITA_SETFL_BOTH_INTR (PFM_ITA_SETFL_INTR_ONLY|\
|
|
+ PFM_ITA_SETFL_EXCL_INTR)
|
|
+
|
|
+/* exclude return value field */
|
|
+#define PFM_SETFL_ALL_MASK (PFM_ITA_SETFL_BOTH_INTR \
|
|
+ | PFM_SETFL_BOTH_SWITCH \
|
|
+ | PFM_ITA_SETFL_IDLE_EXCL)
|
|
+
|
|
+ if ((flags & ~PFM_SETFL_ALL_MASK)) {
|
|
+ PFM_DBG("invalid flags=0x%x", flags);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ if ((flags & PFM_ITA_SETFL_BOTH_INTR) == PFM_ITA_SETFL_BOTH_INTR) {
|
|
+ PFM_DBG("both excl intr and ontr only are set");
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ if ((flags & PFM_ITA_SETFL_IDLE_EXCL) && !ctx->flags.system) {
|
|
+ PFM_DBG("idle exclude flag only for system-wide context");
|
|
+ return -EINVAL;
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * function called from pfm_unload_context_*(). Context is locked.
|
|
+ * interrupts are masked. task is not guaranteed to be current task.
|
|
+ * Access to PMU is not guaranteed.
|
|
+ *
|
|
+ * function must do whatever arch-specific action is required on unload
|
|
+ * of a context.
|
|
+ *
|
|
+ * called for both system-wide and per-thread. task is NULL for ssytem-wide
|
|
+ */
|
|
+void pfm_arch_unload_context(struct pfm_context *ctx)
|
|
+{
|
|
+ struct pfm_arch_context *ctx_arch;
|
|
+ struct pt_regs *regs;
|
|
+
|
|
+ ctx_arch = pfm_ctx_arch(ctx);
|
|
+
|
|
+ if (ctx->flags.system) {
|
|
+ /*
|
|
+ * disable context switch hook
|
|
+ */
|
|
+ clear_thread_flag(TIF_PERFMON_CTXSW);
|
|
+
|
|
+ if (ctx_arch->flags.use_dbr) {
|
|
+ spin_lock(&pfm_arch_sessions_lock);
|
|
+ pfm_arch_sessions.pfs_sys_use_dbr--;
|
|
+ PFM_DBG("sys_use_dbr=%u", pfm_arch_sessions.pfs_sys_use_dbr);
|
|
+ spin_unlock(&pfm_arch_sessions_lock);
|
|
+ }
|
|
+ } else {
|
|
+ regs = task_pt_regs(ctx->task);
|
|
+
|
|
+ /*
|
|
+ * cancel user level control for per-task context
|
|
+ */
|
|
+ ia64_psr(regs)->sp = 1;
|
|
+ PFM_DBG("setting psr.sp for [%d]", ctx->task->pid);
|
|
+ }
|
|
+}
|
|
+
|
|
+/*
|
|
+ * mask monitoring by setting the privilege level to 0
|
|
+ * we cannot use psr.pp/psr.up for this, it is controlled by
|
|
+ * the user
|
|
+ */
|
|
+void pfm_arch_mask_monitoring(struct pfm_context *ctx, struct pfm_event_set *set)
|
|
+{
|
|
+ struct pfm_arch_pmu_info *arch_info;
|
|
+ unsigned long mask;
|
|
+ unsigned int i;
|
|
+
|
|
+ arch_info = pfm_pmu_info();
|
|
+ /*
|
|
+ * as an optimization we look at the first 64 PMC
|
|
+ * registers only starting at PMC4.
|
|
+ */
|
|
+ mask = arch_info->mask_pmcs[0] >> PFM_ITA_FCNTR;
|
|
+ for (i = PFM_ITA_FCNTR; mask; i++, mask >>= 1) {
|
|
+ if (likely(mask & 0x1))
|
|
+ ia64_set_pmc(i, set->pmcs[i] & ~0xfUL);
|
|
+ }
|
|
+ /*
|
|
+ * make changes visisble
|
|
+ */
|
|
+ ia64_srlz_d();
|
|
+}
|
|
+
|
|
+/*
|
|
+ * function called from pfm_switch_sets(), pfm_context_load_thread(),
|
|
+ * pfm_context_load_sys(), pfm_ctxsw(), pfm_switch_sets()
|
|
+ * context is locked. Interrupts are masked. set cannot be NULL.
|
|
+ * Access to the PMU is guaranteed.
|
|
+ *
|
|
+ * function must restore all PMD registers from set.
|
|
+ */
|
|
+void pfm_arch_restore_pmds(struct pfm_context *ctx, struct pfm_event_set *set)
|
|
+{
|
|
+ struct pfm_arch_context *ctx_arch;
|
|
+ unsigned long *mask;
|
|
+ u16 i, num;
|
|
+
|
|
+ ctx_arch = pfm_ctx_arch(ctx);
|
|
+
|
|
+ if (ctx_arch->flags.insecure) {
|
|
+ num = ctx->regs.num_rw_pmd;
|
|
+ mask = ctx->regs.rw_pmds;
|
|
+ } else {
|
|
+ num = set->nused_pmds;
|
|
+ mask = set->used_pmds;
|
|
+ }
|
|
+ /*
|
|
+ * must restore all implemented read-write PMDS to avoid leaking
|
|
+ * information especially when PFM_IA64_FL_INSECURE is set.
|
|
+ *
|
|
+ * XXX: should check PFM_IA64_FL_INSECURE==0 and use used_pmd instead
|
|
+ */
|
|
+ for (i = 0; num; i++) {
|
|
+ if (likely(test_bit(i, mask))) {
|
|
+ pfm_arch_write_pmd(ctx, i, set->pmds[i].value);
|
|
+ num--;
|
|
+ }
|
|
+ }
|
|
+ ia64_srlz_d();
|
|
+}
|
|
+
|
|
+/*
|
|
+ * function called from pfm_switch_sets(), pfm_context_load_thread(),
|
|
+ * pfm_context_load_sys(), pfm_ctxsw(), pfm_switch_sets()
|
|
+ * context is locked. Interrupts are masked. set cannot be NULL.
|
|
+ * Access to the PMU is guaranteed.
|
|
+ *
|
|
+ * function must restore all PMC registers from set if needed
|
|
+ */
|
|
+void pfm_arch_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set)
|
|
+{
|
|
+ struct pfm_arch_pmu_info *arch_info;
|
|
+ u64 mask2 = 0, val, plm;
|
|
+ unsigned long impl_mask, mask_pmcs;
|
|
+ unsigned int i;
|
|
+
|
|
+ arch_info = pfm_pmu_info();
|
|
+ /*
|
|
+ * as an optimization we only look at the first 64
|
|
+ * PMC registers. In fact, we should never scan the
|
|
+ * entire impl_pmcs because ibr/dbr are implemented
|
|
+ * separately.
|
|
+ *
|
|
+ * always skip PMC0-PMC3. PMC0 taken care of when saving
|
|
+ * state. PMC1-PMC3 not used until we get counters in
|
|
+ * the 60 and above index range.
|
|
+ */
|
|
+ impl_mask = ctx->regs.pmcs[0] >> PFM_ITA_FCNTR;
|
|
+ mask_pmcs = arch_info->mask_pmcs[0] >> PFM_ITA_FCNTR;
|
|
+ plm = ctx->state == PFM_CTX_MASKED ? ~0xf : ~0x0;
|
|
+
|
|
+ for (i = PFM_ITA_FCNTR;
|
|
+ impl_mask;
|
|
+ i++, impl_mask >>= 1, mask_pmcs >>= 1) {
|
|
+ if (likely(impl_mask & 0x1)) {
|
|
+ mask2 = mask_pmcs & 0x1 ? plm : ~0;
|
|
+ val = set->pmcs[i] & mask2;
|
|
+ ia64_set_pmc(i, val);
|
|
+ PFM_DBG_ovfl("pmc%u=0x%lx", i, val);
|
|
+ }
|
|
+ }
|
|
+ /*
|
|
+ * restore DBR/IBR
|
|
+ */
|
|
+ if (set->priv_flags & PFM_ITA_SETFL_USE_DBR) {
|
|
+ pfm_restore_ibrs(set->pmcs+256, 8);
|
|
+ pfm_restore_dbrs(set->pmcs+264, 8);
|
|
+ }
|
|
+ ia64_srlz_d();
|
|
+}
|
|
+
|
|
+void pfm_arch_unmask_monitoring(struct pfm_context *ctx, struct pfm_event_set *set)
|
|
+{
|
|
+ u64 psr;
|
|
+ int is_system;
|
|
+
|
|
+ is_system = ctx->flags.system;
|
|
+
|
|
+ psr = ia64_getreg(_IA64_REG_PSR);
|
|
+
|
|
+ /*
|
|
+ * monitoring is masked via the PMC.plm
|
|
+ *
|
|
+ * As we restore their value, we do not want each counter to
|
|
+ * restart right away. We stop monitoring using the PSR,
|
|
+ * restore the PMC (and PMD) and then re-establish the psr
|
|
+ * as it was. Note that there can be no pending overflow at
|
|
+ * this point, because monitoring is still MASKED.
|
|
+ *
|
|
+ * Because interrupts are masked we can avoid changing
|
|
+ * DCR.pp.
|
|
+ */
|
|
+ if (is_system)
|
|
+ pfm_clear_psr_pp();
|
|
+ else
|
|
+ pfm_clear_psr_up();
|
|
+
|
|
+ ia64_srlz_d();
|
|
+
|
|
+ pfm_arch_restore_pmcs(ctx, set);
|
|
+
|
|
+ /*
|
|
+ * restore psr
|
|
+ *
|
|
+ * monitoring may start right now but interrupts
|
|
+ * are still masked
|
|
+ */
|
|
+ pfm_set_psr_l(psr);
|
|
+ ia64_srlz_d();
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Called from pfm_stop()
|
|
+ *
|
|
+ * For per-thread:
|
|
+ * task is not necessarily current. If not current task, then
|
|
+ * task is guaranteed stopped and off any cpu. Access to PMU
|
|
+ * is not guaranteed. Interrupts are masked. Context is locked.
|
|
+ * Set is the active set.
|
|
+ *
|
|
+ * must disable active monitoring. ctx cannot be NULL
|
|
+ */
|
|
+void pfm_arch_stop(struct task_struct *task, struct pfm_context *ctx)
|
|
+{
|
|
+ struct pfm_arch_context *ctx_arch;
|
|
+ struct pt_regs *regs;
|
|
+ u64 dcr, psr;
|
|
+
|
|
+ ctx_arch = pfm_ctx_arch(ctx);
|
|
+ regs = task_pt_regs(task);
|
|
+
|
|
+ if (!ctx->flags.system) {
|
|
+ /*
|
|
+ * in ZOMBIE state we always have task == current due to
|
|
+ * pfm_exit_thread()
|
|
+ */
|
|
+ ia64_psr(regs)->up = 0;
|
|
+ ctx_arch->ctx_saved_psr_up = 0;
|
|
+
|
|
+ /*
|
|
+ * in case of ZOMBIE state, there is no unload to clear
|
|
+ * insecure monitoring, so we do it in stop instead.
|
|
+ */
|
|
+ if (ctx->state == PFM_CTX_ZOMBIE)
|
|
+ ia64_psr(regs)->sp = 1;
|
|
+
|
|
+ if (task == current) {
|
|
+ pfm_clear_psr_up();
|
|
+ ia64_srlz_d();
|
|
+ }
|
|
+ } else if (ctx->flags.started) { /* do not stop twice */
|
|
+ dcr = ia64_getreg(_IA64_REG_CR_DCR);
|
|
+ psr = ia64_getreg(_IA64_REG_PSR);
|
|
+
|
|
+ ia64_psr(regs)->pp = 0;
|
|
+ ia64_setreg(_IA64_REG_CR_DCR, dcr & ~IA64_DCR_PP);
|
|
+ pfm_clear_psr_pp();
|
|
+ ia64_srlz_d();
|
|
+
|
|
+ if (ctx->active_set->flags & PFM_ITA_SETFL_IDLE_EXCL) {
|
|
+ PFM_DBG("disabling idle exclude");
|
|
+ __get_cpu_var(pfm_syst_info) &= ~PFM_ITA_CPUINFO_IDLE_EXCL;
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+/*
|
|
+ * called from pfm_start()
|
|
+ *
|
|
+ * Interrupts are masked. Context is locked. Set is the active set.
|
|
+ *
|
|
+ * For per-thread:
|
|
+ * Task is not necessarily current. If not current task, then task
|
|
+ * is guaranteed stopped and off any cpu. No access to PMU is task
|
|
+ * is not current.
|
|
+ *
|
|
+ * For system-wide:
|
|
+ * task is always current
|
|
+ *
|
|
+ * must enable active monitoring.
|
|
+ */
|
|
+void pfm_arch_start(struct task_struct *task, struct pfm_context *ctx)
|
|
+{
|
|
+ struct pfm_arch_context *ctx_arch;
|
|
+ struct pt_regs *regs;
|
|
+ u64 dcr, dcr_pp, psr_pp;
|
|
+ u32 flags;
|
|
+
|
|
+ ctx_arch = pfm_ctx_arch(ctx);
|
|
+ regs = task_pt_regs(task);
|
|
+ flags = ctx->active_set->flags;
|
|
+
|
|
+ /*
|
|
+ * per-thread mode
|
|
+ */
|
|
+ if (!ctx->flags.system) {
|
|
+
|
|
+ ia64_psr(regs)->up = 1;
|
|
+
|
|
+ if (task == current) {
|
|
+ pfm_set_psr_up();
|
|
+ ia64_srlz_d();
|
|
+ } else {
|
|
+ /*
|
|
+ * activate monitoring at next ctxswin
|
|
+ */
|
|
+ ctx_arch->ctx_saved_psr_up = IA64_PSR_UP;
|
|
+ }
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * system-wide mode
|
|
+ */
|
|
+ dcr = ia64_getreg(_IA64_REG_CR_DCR);
|
|
+ if (flags & PFM_ITA_SETFL_INTR_ONLY) {
|
|
+ dcr_pp = 1;
|
|
+ psr_pp = 0;
|
|
+ } else if (flags & PFM_ITA_SETFL_EXCL_INTR) {
|
|
+ dcr_pp = 0;
|
|
+ psr_pp = 1;
|
|
+ } else {
|
|
+ dcr_pp = psr_pp = 1;
|
|
+ }
|
|
+ PFM_DBG("dcr_pp=%lu psr_pp=%lu", dcr_pp, psr_pp);
|
|
+
|
|
+ /*
|
|
+ * update dcr_pp and psr_pp
|
|
+ */
|
|
+ if (dcr_pp)
|
|
+ ia64_setreg(_IA64_REG_CR_DCR, dcr | IA64_DCR_PP);
|
|
+ else
|
|
+ ia64_setreg(_IA64_REG_CR_DCR, dcr & ~IA64_DCR_PP);
|
|
+
|
|
+ if (psr_pp) {
|
|
+ pfm_set_psr_pp();
|
|
+ ia64_psr(regs)->pp = 1;
|
|
+ } else {
|
|
+ pfm_clear_psr_pp();
|
|
+ ia64_psr(regs)->pp = 0;
|
|
+ }
|
|
+ ia64_srlz_d();
|
|
+
|
|
+ if (ctx->active_set->flags & PFM_ITA_SETFL_IDLE_EXCL) {
|
|
+ PFM_DBG("enable idle exclude");
|
|
+ __get_cpu_var(pfm_syst_info) |= PFM_ITA_CPUINFO_IDLE_EXCL;
|
|
+ }
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Only call this function when a process is trying to
|
|
+ * write the debug registers (reading is always allowed)
|
|
+ * called from arch/ia64/kernel/ptrace.c:access_uarea()
|
|
+ */
|
|
+int __pfm_use_dbregs(struct task_struct *task)
|
|
+{
|
|
+ struct pfm_arch_context *ctx_arch;
|
|
+ struct pfm_context *ctx;
|
|
+ unsigned long flags;
|
|
+ int ret = 0;
|
|
+
|
|
+ PFM_DBG("called for [%d]", task->pid);
|
|
+
|
|
+ ctx = task->pfm_context;
|
|
+
|
|
+ /*
|
|
+ * do it only once
|
|
+ */
|
|
+ if (task->thread.flags & IA64_THREAD_DBG_VALID) {
|
|
+ PFM_DBG("IA64_THREAD_DBG_VALID already set");
|
|
+ return 0;
|
|
+ }
|
|
+ if (ctx) {
|
|
+ spin_lock_irqsave(&ctx->lock, flags);
|
|
+ ctx_arch = pfm_ctx_arch(ctx);
|
|
+
|
|
+ if (ctx_arch->flags.use_dbr == 1) {
|
|
+ PFM_DBG("PMU using dbregs already, no ptrace access");
|
|
+ ret = -1;
|
|
+ }
|
|
+ spin_unlock_irqrestore(&ctx->lock, flags);
|
|
+ if (ret)
|
|
+ return ret;
|
|
+ }
|
|
+
|
|
+ spin_lock(&pfm_arch_sessions_lock);
|
|
+
|
|
+ /*
|
|
+ * We cannot allow setting breakpoints when system wide monitoring
|
|
+ * sessions are using the debug registers.
|
|
+ */
|
|
+ if (!pfm_arch_sessions.pfs_sys_use_dbr)
|
|
+ pfm_arch_sessions.pfs_ptrace_use_dbr++;
|
|
+ else
|
|
+ ret = -1;
|
|
+
|
|
+ PFM_DBG("ptrace_use_dbr=%u sys_use_dbr=%u by [%d] ret = %d",
|
|
+ pfm_arch_sessions.pfs_ptrace_use_dbr,
|
|
+ pfm_arch_sessions.pfs_sys_use_dbr,
|
|
+ task->pid, ret);
|
|
+
|
|
+ spin_unlock(&pfm_arch_sessions_lock);
|
|
+ if (ret)
|
|
+ return ret;
|
|
+#ifndef CONFIG_SMP
|
|
+ /*
|
|
+ * in UP, we need to check whether the current
|
|
+ * owner of the PMU is not using the debug registers
|
|
+ * for monitoring. Because we are using a lazy
|
|
+ * save on ctxswout, we must force a save in this
|
|
+ * case because the debug registers are being
|
|
+ * modified by another task. We save the current
|
|
+ * PMD registers, and clear ownership. In ctxswin,
|
|
+ * full state will be reloaded.
|
|
+ *
|
|
+ * Note: we overwrite task.
|
|
+ */
|
|
+ task = __get_cpu_var(pmu_owner);
|
|
+ ctx = __get_cpu_var(pmu_ctx);
|
|
+
|
|
+ if (task == NULL)
|
|
+ return 0;
|
|
+
|
|
+ ctx_arch = pfm_ctx_arch(ctx);
|
|
+
|
|
+ if (ctx_arch->flags.use_dbr)
|
|
+ pfm_save_pmds_release(ctx);
|
|
+#endif
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * This function is called for every task that exits with the
|
|
+ * IA64_THREAD_DBG_VALID set. This indicates a task which was
|
|
+ * able to use the debug registers for debugging purposes via
|
|
+ * ptrace(). Therefore we know it was not using them for
|
|
+ * perfmormance monitoring, so we only decrement the number
|
|
+ * of "ptraced" debug register users to keep the count up to date
|
|
+ */
|
|
+int __pfm_release_dbregs(struct task_struct *task)
|
|
+{
|
|
+ int ret;
|
|
+
|
|
+ spin_lock(&pfm_arch_sessions_lock);
|
|
+
|
|
+ if (pfm_arch_sessions.pfs_ptrace_use_dbr == 0) {
|
|
+ PFM_ERR("invalid release for [%d] ptrace_use_dbr=0", task->pid);
|
|
+ ret = -1;
|
|
+ } else {
|
|
+ pfm_arch_sessions.pfs_ptrace_use_dbr--;
|
|
+ ret = 0;
|
|
+ }
|
|
+ spin_unlock(&pfm_arch_sessions_lock);
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+int pfm_ia64_mark_dbregs_used(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set)
|
|
+{
|
|
+ struct pfm_arch_context *ctx_arch;
|
|
+ struct task_struct *task;
|
|
+ struct thread_struct *thread;
|
|
+ int ret = 0, state;
|
|
+ int i, can_access_pmu = 0;
|
|
+ int is_loaded, is_system;
|
|
+
|
|
+ ctx_arch = pfm_ctx_arch(ctx);
|
|
+ state = ctx->state;
|
|
+ task = ctx->task;
|
|
+ is_loaded = state == PFM_CTX_LOADED || state == PFM_CTX_MASKED;
|
|
+ is_system = ctx->flags.system;
|
|
+ can_access_pmu = __get_cpu_var(pmu_owner) == task || is_system;
|
|
+
|
|
+ if (is_loaded == 0)
|
|
+ goto done;
|
|
+
|
|
+ if (is_system == 0) {
|
|
+ thread = &(task->thread);
|
|
+
|
|
+ /*
|
|
+ * cannot use debug registers for montioring if they are
|
|
+ * already used for debugging
|
|
+ */
|
|
+ if (thread->flags & IA64_THREAD_DBG_VALID) {
|
|
+ PFM_DBG("debug registers already in use for [%d]",
|
|
+ task->pid);
|
|
+ return -EBUSY;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * check for debug registers in system wide mode
|
|
+ */
|
|
+ spin_lock(&pfm_arch_sessions_lock);
|
|
+
|
|
+ if (is_system) {
|
|
+ if (pfm_arch_sessions.pfs_ptrace_use_dbr)
|
|
+ ret = -EBUSY;
|
|
+ else
|
|
+ pfm_arch_sessions.pfs_sys_use_dbr++;
|
|
+ }
|
|
+
|
|
+ spin_unlock(&pfm_arch_sessions_lock);
|
|
+
|
|
+ if (ret != 0)
|
|
+ return ret;
|
|
+
|
|
+ /*
|
|
+ * clear hardware registers to make sure we don't
|
|
+ * pick up stale state.
|
|
+ */
|
|
+ if (can_access_pmu) {
|
|
+ PFM_DBG("clearing ibrs, dbrs");
|
|
+ for (i = 0; i < 8; i++) {
|
|
+ ia64_set_ibr(i, 0);
|
|
+ ia64_dv_serialize_instruction();
|
|
+ }
|
|
+ ia64_srlz_i();
|
|
+ for (i = 0; i < 8; i++) {
|
|
+ ia64_set_dbr(i, 0);
|
|
+ ia64_dv_serialize_data();
|
|
+ }
|
|
+ ia64_srlz_d();
|
|
+ }
|
|
+done:
|
|
+ /*
|
|
+ * debug registers are now in use
|
|
+ */
|
|
+ ctx_arch->flags.use_dbr = 1;
|
|
+ set->priv_flags |= PFM_ITA_SETFL_USE_DBR;
|
|
+ PFM_DBG("set%u use_dbr=1", set->id);
|
|
+ return 0;
|
|
+}
|
|
+EXPORT_SYMBOL(pfm_ia64_mark_dbregs_used);
|
|
+
|
|
+char *pfm_arch_get_pmu_module_name(void)
|
|
+{
|
|
+ switch (local_cpu_data->family) {
|
|
+ case 0x07:
|
|
+ return "perfmon_itanium";
|
|
+ case 0x1f:
|
|
+ return "perfmon_mckinley";
|
|
+ case 0x20:
|
|
+ return "perfmon_montecito";
|
|
+ default:
|
|
+ return "perfmon_generic";
|
|
+ }
|
|
+ return NULL;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * global arch-specific intialization, called only once
|
|
+ */
|
|
+int __init pfm_arch_init(void)
|
|
+{
|
|
+ int ret;
|
|
+
|
|
+ spin_lock_init(&pfm_arch_sessions_lock);
|
|
+
|
|
+#ifdef CONFIG_IA64_PERFMON_COMPAT
|
|
+ ret = pfm_ia64_compat_init();
|
|
+ if (ret)
|
|
+ return ret;
|
|
+#endif
|
|
+ register_percpu_irq(IA64_PERFMON_VECTOR, &perfmon_irqaction);
|
|
+
|
|
+
|
|
+ return 0;
|
|
+}
|
|
diff --git a/arch/ia64/perfmon/perfmon_compat.c b/arch/ia64/perfmon/perfmon_compat.c
|
|
new file mode 100644
|
|
index 0000000..2fd3d3c
|
|
--- /dev/null
|
|
+++ b/arch/ia64/perfmon/perfmon_compat.c
|
|
@@ -0,0 +1,1210 @@
|
|
+/*
|
|
+ * This file implements the IA-64 specific
|
|
+ * support for the perfmon2 interface
|
|
+ *
|
|
+ * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P.
|
|
+ * Contributed by Stephane Eranian <eranian@hpl.hp.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of version 2 of the GNU General Public
|
|
+ * License as published by the Free Software Foundation.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ * General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
+ * 02111-1307 USA
|
|
+ */
|
|
+#include <linux/interrupt.h>
|
|
+#include <linux/module.h>
|
|
+#include <linux/file.h>
|
|
+#include <linux/fdtable.h>
|
|
+#include <linux/seq_file.h>
|
|
+#include <linux/vmalloc.h>
|
|
+#include <linux/proc_fs.h>
|
|
+#include <linux/perfmon_kern.h>
|
|
+#include <linux/uaccess.h>
|
|
+
|
|
+asmlinkage long sys_pfm_stop(int fd);
|
|
+asmlinkage long sys_pfm_start(int fd, struct pfarg_start __user *st);
|
|
+asmlinkage long sys_pfm_unload_context(int fd);
|
|
+asmlinkage long sys_pfm_restart(int fd);
|
|
+asmlinkage long sys_pfm_load_context(int fd, struct pfarg_load __user *ld);
|
|
+
|
|
+ssize_t pfm_sysfs_res_show(char *buf, size_t sz, int what);
|
|
+
|
|
+extern ssize_t __pfm_read(struct pfm_context *ctx,
|
|
+ union pfarg_msg *msg_buf,
|
|
+ int non_block);
|
|
+/*
|
|
+ * function providing some help for backward compatiblity with old IA-64
|
|
+ * applications. In the old model, certain attributes of a counter were
|
|
+ * passed via the PMC, now they are passed via the PMD.
|
|
+ */
|
|
+static int pfm_compat_update_pmd(struct pfm_context *ctx, u16 set_id, u16 cnum,
|
|
+ u32 rflags,
|
|
+ unsigned long *smpl_pmds,
|
|
+ unsigned long *reset_pmds,
|
|
+ u64 eventid)
|
|
+{
|
|
+ struct pfm_event_set *set;
|
|
+ int is_counting;
|
|
+ unsigned long *impl_pmds;
|
|
+ u32 flags = 0;
|
|
+ u16 max_pmd;
|
|
+
|
|
+ impl_pmds = ctx->regs.pmds;
|
|
+ max_pmd = ctx->regs.max_pmd;
|
|
+
|
|
+ /*
|
|
+ * given that we do not maintain PMC ->PMD dependencies
|
|
+ * we cannot figure out what to do in case PMCxx != PMDxx
|
|
+ */
|
|
+ if (cnum > max_pmd)
|
|
+ return 0;
|
|
+
|
|
+ /*
|
|
+ * assumes PMCxx controls PMDxx which is always true for counters
|
|
+ * on Itanium PMUs.
|
|
+ */
|
|
+ is_counting = pfm_pmu_conf->pmd_desc[cnum].type & PFM_REG_C64;
|
|
+ set = pfm_find_set(ctx, set_id, 0);
|
|
+
|
|
+ /*
|
|
+ * for v2.0, we only allowed counting PMD to generate
|
|
+ * user-level notifications. Same thing with randomization.
|
|
+ */
|
|
+ if (is_counting) {
|
|
+ if (rflags & PFM_REGFL_OVFL_NOTIFY)
|
|
+ flags |= PFM_REGFL_OVFL_NOTIFY;
|
|
+ if (rflags & PFM_REGFL_RANDOM)
|
|
+ flags |= PFM_REGFL_RANDOM;
|
|
+ /*
|
|
+ * verify validity of smpl_pmds
|
|
+ */
|
|
+ if (unlikely(bitmap_subset(smpl_pmds,
|
|
+ impl_pmds, max_pmd) == 0)) {
|
|
+ PFM_DBG("invalid smpl_pmds=0x%llx for pmd%u",
|
|
+ (unsigned long long)smpl_pmds[0], cnum);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+ /*
|
|
+ * verify validity of reset_pmds
|
|
+ */
|
|
+ if (unlikely(bitmap_subset(reset_pmds,
|
|
+ impl_pmds, max_pmd) == 0)) {
|
|
+ PFM_DBG("invalid reset_pmds=0x%lx for pmd%u",
|
|
+ reset_pmds[0], cnum);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+ /*
|
|
+ * ensures that a PFM_READ_PMDS succeeds with a
|
|
+ * corresponding PFM_WRITE_PMDS
|
|
+ */
|
|
+ __set_bit(cnum, set->used_pmds);
|
|
+
|
|
+ } else if (rflags & (PFM_REGFL_OVFL_NOTIFY|PFM_REGFL_RANDOM)) {
|
|
+ PFM_DBG("cannot set ovfl_notify or random on pmd%u", cnum);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ set->pmds[cnum].flags = flags;
|
|
+
|
|
+ if (is_counting) {
|
|
+ bitmap_copy(set->pmds[cnum].reset_pmds,
|
|
+ reset_pmds,
|
|
+ max_pmd);
|
|
+
|
|
+ bitmap_copy(set->pmds[cnum].smpl_pmds,
|
|
+ smpl_pmds,
|
|
+ max_pmd);
|
|
+
|
|
+ set->pmds[cnum].eventid = eventid;
|
|
+
|
|
+ /*
|
|
+ * update ovfl_notify
|
|
+ */
|
|
+ if (rflags & PFM_REGFL_OVFL_NOTIFY)
|
|
+ __set_bit(cnum, set->ovfl_notify);
|
|
+ else
|
|
+ __clear_bit(cnum, set->ovfl_notify);
|
|
+
|
|
+ }
|
|
+ PFM_DBG("pmd%u flags=0x%x eventid=0x%lx r_pmds=0x%lx s_pmds=0x%lx",
|
|
+ cnum, flags,
|
|
+ eventid,
|
|
+ reset_pmds[0],
|
|
+ smpl_pmds[0]);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
+int __pfm_write_ibrs_old(struct pfm_context *ctx, void *arg, int count)
|
|
+{
|
|
+ struct pfarg_dbreg *req = arg;
|
|
+ struct pfarg_pmc pmc;
|
|
+ int i, ret = 0;
|
|
+
|
|
+ memset(&pmc, 0, sizeof(pmc));
|
|
+
|
|
+ for (i = 0; i < count; i++, req++) {
|
|
+ pmc.reg_num = 256+req->dbreg_num;
|
|
+ pmc.reg_value = req->dbreg_value;
|
|
+ pmc.reg_flags = 0;
|
|
+ pmc.reg_set = req->dbreg_set;
|
|
+
|
|
+ ret = __pfm_write_pmcs(ctx, &pmc, 1);
|
|
+
|
|
+ req->dbreg_flags &= ~PFM_REG_RETFL_MASK;
|
|
+ req->dbreg_flags |= pmc.reg_flags;
|
|
+
|
|
+ if (ret)
|
|
+ return ret;
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static long pfm_write_ibrs_old(int fd, void __user *ureq, int count)
|
|
+{
|
|
+ struct pfm_context *ctx;
|
|
+ struct task_struct *task;
|
|
+ struct file *filp;
|
|
+ struct pfarg_dbreg *req = NULL;
|
|
+ void *fptr, *resume;
|
|
+ unsigned long flags;
|
|
+ size_t sz;
|
|
+ int ret, fput_needed;
|
|
+
|
|
+ if (count < 1 || count >= PFM_MAX_ARG_COUNT(req))
|
|
+ return -EINVAL;
|
|
+
|
|
+ sz = count*sizeof(*req);
|
|
+
|
|
+ filp = fget_light(fd, &fput_needed);
|
|
+ if (unlikely(filp == NULL)) {
|
|
+ PFM_DBG("invalid fd %d", fd);
|
|
+ return -EBADF;
|
|
+ }
|
|
+
|
|
+ ctx = filp->private_data;
|
|
+ ret = -EBADF;
|
|
+
|
|
+ if (unlikely(!ctx || filp->f_op != &pfm_file_ops)) {
|
|
+ PFM_DBG("fd %d not related to perfmon", fd);
|
|
+ goto error;
|
|
+ }
|
|
+
|
|
+ ret = pfm_get_args(ureq, sz, 0, NULL, (void **)&req, &fptr);
|
|
+ if (ret)
|
|
+ goto error;
|
|
+
|
|
+ spin_lock_irqsave(&ctx->lock, flags);
|
|
+
|
|
+ task = ctx->task;
|
|
+
|
|
+ ret = pfm_check_task_state(ctx, PFM_CMD_STOPPED, &flags, &resume);
|
|
+ if (ret == 0)
|
|
+ ret = __pfm_write_ibrs_old(ctx, req, count);
|
|
+
|
|
+ spin_unlock_irqrestore(&ctx->lock, flags);
|
|
+
|
|
+ if (resume)
|
|
+ pfm_resume_task(task, resume);
|
|
+
|
|
+ if (copy_to_user(ureq, req, sz))
|
|
+ ret = -EFAULT;
|
|
+
|
|
+ kfree(fptr);
|
|
+error:
|
|
+ fput_light(filp, fput_needed);
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+int __pfm_write_dbrs_old(struct pfm_context *ctx, void *arg, int count)
|
|
+{
|
|
+ struct pfarg_dbreg *req = arg;
|
|
+ struct pfarg_pmc pmc;
|
|
+ int i, ret = 0;
|
|
+
|
|
+ memset(&pmc, 0, sizeof(pmc));
|
|
+
|
|
+ for (i = 0; i < count; i++, req++) {
|
|
+ pmc.reg_num = 264+req->dbreg_num;
|
|
+ pmc.reg_value = req->dbreg_value;
|
|
+ pmc.reg_flags = 0;
|
|
+ pmc.reg_set = req->dbreg_set;
|
|
+
|
|
+ ret = __pfm_write_pmcs(ctx, &pmc, 1);
|
|
+
|
|
+ req->dbreg_flags &= ~PFM_REG_RETFL_MASK;
|
|
+ req->dbreg_flags |= pmc.reg_flags;
|
|
+ if (ret)
|
|
+ return ret;
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static long pfm_write_dbrs_old(int fd, void __user *ureq, int count)
|
|
+{
|
|
+ struct pfm_context *ctx;
|
|
+ struct task_struct *task;
|
|
+ struct file *filp;
|
|
+ struct pfarg_dbreg *req = NULL;
|
|
+ void *fptr, *resume;
|
|
+ unsigned long flags;
|
|
+ size_t sz;
|
|
+ int ret, fput_needed;
|
|
+
|
|
+ if (count < 1 || count >= PFM_MAX_ARG_COUNT(req))
|
|
+ return -EINVAL;
|
|
+
|
|
+ sz = count*sizeof(*req);
|
|
+
|
|
+ filp = fget_light(fd, &fput_needed);
|
|
+ if (unlikely(filp == NULL)) {
|
|
+ PFM_DBG("invalid fd %d", fd);
|
|
+ return -EBADF;
|
|
+ }
|
|
+
|
|
+ ctx = filp->private_data;
|
|
+ ret = -EBADF;
|
|
+
|
|
+ if (unlikely(!ctx || filp->f_op != &pfm_file_ops)) {
|
|
+ PFM_DBG("fd %d not related to perfmon", fd);
|
|
+ goto error;
|
|
+ }
|
|
+
|
|
+ ret = pfm_get_args(ureq, sz, 0, NULL, (void **)&req, &fptr);
|
|
+ if (ret)
|
|
+ goto error;
|
|
+
|
|
+ spin_lock_irqsave(&ctx->lock, flags);
|
|
+
|
|
+ task = ctx->task;
|
|
+
|
|
+ ret = pfm_check_task_state(ctx, PFM_CMD_STOPPED, &flags, &resume);
|
|
+ if (ret == 0)
|
|
+ ret = __pfm_write_dbrs_old(ctx, req, count);
|
|
+
|
|
+ spin_unlock_irqrestore(&ctx->lock, flags);
|
|
+
|
|
+ if (resume)
|
|
+ pfm_resume_task(task, resume);
|
|
+
|
|
+ if (copy_to_user(ureq, req, sz))
|
|
+ ret = -EFAULT;
|
|
+
|
|
+ kfree(fptr);
|
|
+error:
|
|
+ fput_light(filp, fput_needed);
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+int __pfm_write_pmcs_old(struct pfm_context *ctx, struct pfarg_reg *req_old,
|
|
+ int count)
|
|
+{
|
|
+ struct pfarg_pmc req;
|
|
+ unsigned int i;
|
|
+ int ret, error_code;
|
|
+
|
|
+ memset(&req, 0, sizeof(req));
|
|
+
|
|
+ for (i = 0; i < count; i++, req_old++) {
|
|
+ req.reg_num = req_old->reg_num;
|
|
+ req.reg_set = req_old->reg_set;
|
|
+ req.reg_flags = 0;
|
|
+ req.reg_value = req_old->reg_value;
|
|
+
|
|
+ ret = __pfm_write_pmcs(ctx, (void *)&req, 1);
|
|
+ req_old->reg_flags &= ~PFM_REG_RETFL_MASK;
|
|
+ req_old->reg_flags |= req.reg_flags;
|
|
+
|
|
+ if (ret)
|
|
+ return ret;
|
|
+
|
|
+ ret = pfm_compat_update_pmd(ctx, req_old->reg_set,
|
|
+ req_old->reg_num,
|
|
+ (u32)req_old->reg_flags,
|
|
+ req_old->reg_smpl_pmds,
|
|
+ req_old->reg_reset_pmds,
|
|
+ req_old->reg_smpl_eventid);
|
|
+
|
|
+ error_code = ret ? PFM_REG_RETFL_EINVAL : 0;
|
|
+ req_old->reg_flags &= ~PFM_REG_RETFL_MASK;
|
|
+ req_old->reg_flags |= error_code;
|
|
+
|
|
+ if (ret)
|
|
+ return ret;
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static long pfm_write_pmcs_old(int fd, void __user *ureq, int count)
|
|
+{
|
|
+ struct pfm_context *ctx;
|
|
+ struct task_struct *task;
|
|
+ struct file *filp;
|
|
+ struct pfarg_reg *req = NULL;
|
|
+ void *fptr, *resume;
|
|
+ unsigned long flags;
|
|
+ size_t sz;
|
|
+ int ret, fput_needed;
|
|
+
|
|
+ if (count < 1 || count >= PFM_MAX_ARG_COUNT(req))
|
|
+ return -EINVAL;
|
|
+
|
|
+ sz = count*sizeof(*req);
|
|
+
|
|
+ filp = fget_light(fd, &fput_needed);
|
|
+ if (unlikely(filp == NULL)) {
|
|
+ PFM_DBG("invalid fd %d", fd);
|
|
+ return -EBADF;
|
|
+ }
|
|
+
|
|
+ ctx = filp->private_data;
|
|
+ ret = -EBADF;
|
|
+
|
|
+ if (unlikely(!ctx || filp->f_op != &pfm_file_ops)) {
|
|
+ PFM_DBG("fd %d not related to perfmon", fd);
|
|
+ goto error;
|
|
+ }
|
|
+
|
|
+ ret = pfm_get_args(ureq, sz, 0, NULL, (void **)&req, &fptr);
|
|
+ if (ret)
|
|
+ goto error;
|
|
+
|
|
+ spin_lock_irqsave(&ctx->lock, flags);
|
|
+
|
|
+ task = ctx->task;
|
|
+
|
|
+ ret = pfm_check_task_state(ctx, PFM_CMD_STOPPED, &flags, &resume);
|
|
+ if (ret == 0)
|
|
+ ret = __pfm_write_pmcs_old(ctx, req, count);
|
|
+
|
|
+ spin_unlock_irqrestore(&ctx->lock, flags);
|
|
+
|
|
+ if (resume)
|
|
+ pfm_resume_task(task, resume);
|
|
+
|
|
+ if (copy_to_user(ureq, req, sz))
|
|
+ ret = -EFAULT;
|
|
+
|
|
+ kfree(fptr);
|
|
+
|
|
+error:
|
|
+ fput_light(filp, fput_needed);
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+int __pfm_write_pmds_old(struct pfm_context *ctx, struct pfarg_reg *req_old,
|
|
+ int count)
|
|
+{
|
|
+ struct pfarg_pmd req;
|
|
+ int i, ret;
|
|
+
|
|
+ memset(&req, 0, sizeof(req));
|
|
+
|
|
+ for (i = 0; i < count; i++, req_old++) {
|
|
+ req.reg_num = req_old->reg_num;
|
|
+ req.reg_set = req_old->reg_set;
|
|
+ req.reg_value = req_old->reg_value;
|
|
+ /* flags passed with pmcs in v2.0 */
|
|
+
|
|
+ req.reg_long_reset = req_old->reg_long_reset;
|
|
+ req.reg_short_reset = req_old->reg_short_reset;
|
|
+ req.reg_random_mask = req_old->reg_random_mask;
|
|
+ /*
|
|
+ * reg_random_seed is ignored since v2.3
|
|
+ */
|
|
+
|
|
+ /*
|
|
+ * skip last_reset_val not used for writing
|
|
+ * skip smpl_pmds, reset_pmds, eventid, ovfl_swtch_cnt
|
|
+ * as set in pfm_write_pmcs_old.
|
|
+ *
|
|
+ * ovfl_switch_cnt ignored, not implemented in v2.0
|
|
+ */
|
|
+ ret = __pfm_write_pmds(ctx, (void *)&req, 1, 1);
|
|
+
|
|
+ req_old->reg_flags &= ~PFM_REG_RETFL_MASK;
|
|
+ req_old->reg_flags |= req.reg_flags;
|
|
+
|
|
+ if (ret)
|
|
+ return ret;
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static long pfm_write_pmds_old(int fd, void __user *ureq, int count)
|
|
+{
|
|
+ struct pfm_context *ctx;
|
|
+ struct task_struct *task;
|
|
+ struct file *filp;
|
|
+ struct pfarg_reg *req = NULL;
|
|
+ void *fptr, *resume;
|
|
+ unsigned long flags;
|
|
+ size_t sz;
|
|
+ int ret, fput_needed;
|
|
+
|
|
+ if (count < 1 || count >= PFM_MAX_ARG_COUNT(req))
|
|
+ return -EINVAL;
|
|
+
|
|
+ sz = count*sizeof(*req);
|
|
+
|
|
+ filp = fget_light(fd, &fput_needed);
|
|
+ if (unlikely(filp == NULL)) {
|
|
+ PFM_DBG("invalid fd %d", fd);
|
|
+ return -EBADF;
|
|
+ }
|
|
+
|
|
+ ctx = filp->private_data;
|
|
+ ret = -EBADF;
|
|
+
|
|
+ if (unlikely(!ctx || filp->f_op != &pfm_file_ops)) {
|
|
+ PFM_DBG("fd %d not related to perfmon", fd);
|
|
+ goto error;
|
|
+ }
|
|
+
|
|
+ ret = pfm_get_args(ureq, sz, 0, NULL, (void **)&req, &fptr);
|
|
+ if (ret)
|
|
+ goto error;
|
|
+
|
|
+ spin_lock_irqsave(&ctx->lock, flags);
|
|
+
|
|
+ task = ctx->task;
|
|
+
|
|
+ ret = pfm_check_task_state(ctx, PFM_CMD_STOPPED, &flags, &resume);
|
|
+ if (ret == 0)
|
|
+ ret = __pfm_write_pmds_old(ctx, req, count);
|
|
+
|
|
+ spin_unlock_irqrestore(&ctx->lock, flags);
|
|
+
|
|
+ if (copy_to_user(ureq, req, sz))
|
|
+ ret = -EFAULT;
|
|
+
|
|
+ if (resume)
|
|
+ pfm_resume_task(task, resume);
|
|
+
|
|
+ kfree(fptr);
|
|
+error:
|
|
+ fput_light(filp, fput_needed);
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+int __pfm_read_pmds_old(struct pfm_context *ctx, struct pfarg_reg *req_old,
|
|
+ int count)
|
|
+{
|
|
+ struct pfarg_pmd req;
|
|
+ int i, ret;
|
|
+
|
|
+ memset(&req, 0, sizeof(req));
|
|
+
|
|
+ for (i = 0; i < count; i++, req_old++) {
|
|
+ req.reg_num = req_old->reg_num;
|
|
+ req.reg_set = req_old->reg_set;
|
|
+
|
|
+ /* skip value not used for reading */
|
|
+ req.reg_flags = req_old->reg_flags;
|
|
+
|
|
+ /* skip short/long_reset not used for reading */
|
|
+ /* skip last_reset_val not used for reading */
|
|
+ /* skip ovfl_switch_cnt not used for reading */
|
|
+
|
|
+ ret = __pfm_read_pmds(ctx, (void *)&req, 1);
|
|
+
|
|
+ req_old->reg_flags &= ~PFM_REG_RETFL_MASK;
|
|
+ req_old->reg_flags |= req.reg_flags;
|
|
+ if (ret)
|
|
+ return ret;
|
|
+
|
|
+ /* update fields */
|
|
+ req_old->reg_value = req.reg_value;
|
|
+
|
|
+ req_old->reg_last_reset_val = req.reg_last_reset_val;
|
|
+ req_old->reg_ovfl_switch_cnt = req.reg_ovfl_switch_cnt;
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static long pfm_read_pmds_old(int fd, void __user *ureq, int count)
|
|
+{
|
|
+ struct pfm_context *ctx;
|
|
+ struct task_struct *task;
|
|
+ struct file *filp;
|
|
+ struct pfarg_reg *req = NULL;
|
|
+ void *fptr, *resume;
|
|
+ unsigned long flags;
|
|
+ size_t sz;
|
|
+ int ret, fput_needed;
|
|
+
|
|
+ if (count < 1 || count >= PFM_MAX_ARG_COUNT(req))
|
|
+ return -EINVAL;
|
|
+
|
|
+ sz = count*sizeof(*req);
|
|
+
|
|
+ filp = fget_light(fd, &fput_needed);
|
|
+ if (unlikely(filp == NULL)) {
|
|
+ PFM_DBG("invalid fd %d", fd);
|
|
+ return -EBADF;
|
|
+ }
|
|
+
|
|
+ ctx = filp->private_data;
|
|
+ ret = -EBADF;
|
|
+
|
|
+ if (unlikely(!ctx || filp->f_op != &pfm_file_ops)) {
|
|
+ PFM_DBG("fd %d not related to perfmon", fd);
|
|
+ goto error;
|
|
+ }
|
|
+
|
|
+ ret = pfm_get_args(ureq, sz, 0, NULL, (void **)&req, &fptr);
|
|
+ if (ret)
|
|
+ goto error;
|
|
+
|
|
+ spin_lock_irqsave(&ctx->lock, flags);
|
|
+
|
|
+ task = ctx->task;
|
|
+
|
|
+ ret = pfm_check_task_state(ctx, PFM_CMD_STOPPED, &flags, &resume);
|
|
+ if (ret == 0)
|
|
+ ret = __pfm_read_pmds_old(ctx, req, count);
|
|
+
|
|
+ spin_unlock_irqrestore(&ctx->lock, flags);
|
|
+
|
|
+ if (resume)
|
|
+ pfm_resume_task(task, resume);
|
|
+
|
|
+ if (copy_to_user(ureq, req, sz))
|
|
+ ret = -EFAULT;
|
|
+
|
|
+ kfree(fptr);
|
|
+error:
|
|
+ fput_light(filp, fput_needed);
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * OBSOLETE: use /proc/perfmon_map instead
|
|
+ */
|
|
+static long pfm_get_default_pmcs_old(int fd, void __user *ureq, int count)
|
|
+{
|
|
+ struct pfarg_reg *req = NULL;
|
|
+ void *fptr;
|
|
+ size_t sz;
|
|
+ int ret, i;
|
|
+ unsigned int cnum;
|
|
+
|
|
+ if (count < 1)
|
|
+ return -EINVAL;
|
|
+
|
|
+ /*
|
|
+ * ensure the pfm_pmu_conf does not disappear while
|
|
+ * we use it
|
|
+ */
|
|
+ ret = pfm_pmu_conf_get(1);
|
|
+ if (ret)
|
|
+ return ret;
|
|
+
|
|
+ sz = count*sizeof(*ureq);
|
|
+
|
|
+ ret = pfm_get_args(ureq, sz, 0, NULL, (void **)&req, &fptr);
|
|
+ if (ret)
|
|
+ goto error;
|
|
+
|
|
+
|
|
+ for (i = 0; i < count; i++, req++) {
|
|
+ cnum = req->reg_num;
|
|
+
|
|
+ if (i >= PFM_MAX_PMCS ||
|
|
+ (pfm_pmu_conf->pmc_desc[cnum].type & PFM_REG_I) == 0) {
|
|
+ req->reg_flags = PFM_REG_RETFL_EINVAL;
|
|
+ break;
|
|
+ }
|
|
+ req->reg_value = pfm_pmu_conf->pmc_desc[cnum].dfl_val;
|
|
+ req->reg_flags = 0;
|
|
+
|
|
+ PFM_DBG("pmc[%u]=0x%lx", cnum, req->reg_value);
|
|
+ }
|
|
+
|
|
+ if (copy_to_user(ureq, req, sz))
|
|
+ ret = -EFAULT;
|
|
+
|
|
+ kfree(fptr);
|
|
+error:
|
|
+ pfm_pmu_conf_put();
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * allocate a sampling buffer and remaps it into the user address space of
|
|
+ * the task. This is only in compatibility mode
|
|
+ *
|
|
+ * function called ONLY on current task
|
|
+ */
|
|
+int pfm_smpl_buf_alloc_compat(struct pfm_context *ctx, size_t rsize,
|
|
+ struct file *filp)
|
|
+{
|
|
+ struct mm_struct *mm = current->mm;
|
|
+ struct vm_area_struct *vma = NULL;
|
|
+ struct pfm_arch_context *ctx_arch;
|
|
+ size_t size;
|
|
+ int ret;
|
|
+ extern struct vm_operations_struct pfm_buf_map_vm_ops;
|
|
+
|
|
+ ctx_arch = pfm_ctx_arch(ctx);
|
|
+
|
|
+ /*
|
|
+ * allocate buffer + map desc
|
|
+ */
|
|
+ ret = pfm_smpl_buf_alloc(ctx, rsize);
|
|
+ if (ret)
|
|
+ return ret;
|
|
+
|
|
+ size = ctx->smpl_size;
|
|
+
|
|
+
|
|
+ /* allocate vma */
|
|
+ vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
|
|
+ if (!vma) {
|
|
+ PFM_DBG("Cannot allocate vma");
|
|
+ goto error_kmem;
|
|
+ }
|
|
+ memset(vma, 0, sizeof(*vma));
|
|
+
|
|
+ /*
|
|
+ * partially initialize the vma for the sampling buffer
|
|
+ */
|
|
+ vma->vm_mm = mm;
|
|
+ vma->vm_flags = VM_READ | VM_MAYREAD | VM_RESERVED;
|
|
+ vma->vm_page_prot = PAGE_READONLY;
|
|
+ vma->vm_ops = &pfm_buf_map_vm_ops;
|
|
+ vma->vm_file = filp;
|
|
+ vma->vm_private_data = ctx;
|
|
+ vma->vm_pgoff = 0;
|
|
+
|
|
+ /*
|
|
+ * simulate effect of mmap()
|
|
+ */
|
|
+ get_file(filp);
|
|
+
|
|
+ /*
|
|
+ * Let's do the difficult operations next.
|
|
+ *
|
|
+ * now we atomically find some area in the address space and
|
|
+ * remap the buffer into it.
|
|
+ */
|
|
+ down_write(¤t->mm->mmap_sem);
|
|
+
|
|
+ /* find some free area in address space, must have mmap sem held */
|
|
+ vma->vm_start = get_unmapped_area(NULL, 0, size, 0,
|
|
+ MAP_PRIVATE|MAP_ANONYMOUS);
|
|
+ if (vma->vm_start == 0) {
|
|
+ PFM_DBG("cannot find unmapped area of size %zu", size);
|
|
+ up_write(¤t->mm->mmap_sem);
|
|
+ goto error;
|
|
+ }
|
|
+ vma->vm_end = vma->vm_start + size;
|
|
+
|
|
+ PFM_DBG("aligned_size=%zu mapped @0x%lx", size, vma->vm_start);
|
|
+ /*
|
|
+ * now insert the vma in the vm list for the process, must be
|
|
+ * done with mmap lock held
|
|
+ */
|
|
+ insert_vm_struct(mm, vma);
|
|
+
|
|
+ mm->total_vm += size >> PAGE_SHIFT;
|
|
+
|
|
+ up_write(¤t->mm->mmap_sem);
|
|
+
|
|
+ /*
|
|
+ * IMPORTANT: we do not issue the fput()
|
|
+ * because we want to increase the ref count
|
|
+ * on the descriptor to simulate what mmap()
|
|
+ * would do
|
|
+ */
|
|
+
|
|
+ /*
|
|
+ * used to propagate vaddr to syscall stub
|
|
+ */
|
|
+ ctx_arch->ctx_smpl_vaddr = (void *)vma->vm_start;
|
|
+
|
|
+ return 0;
|
|
+error:
|
|
+ kmem_cache_free(vm_area_cachep, vma);
|
|
+error_kmem:
|
|
+ pfm_smpl_buf_space_release(ctx, ctx->smpl_size);
|
|
+ vfree(ctx->smpl_addr);
|
|
+ return -ENOMEM;
|
|
+}
|
|
+
|
|
+#define PFM_DEFAULT_SMPL_UUID { \
|
|
+ 0x4d, 0x72, 0xbe, 0xc0, 0x06, 0x64, 0x41, 0x43, 0x82,\
|
|
+ 0xb4, 0xd3, 0xfd, 0x27, 0x24, 0x3c, 0x97}
|
|
+
|
|
+static pfm_uuid_t old_default_uuid = PFM_DEFAULT_SMPL_UUID;
|
|
+static pfm_uuid_t null_uuid;
|
|
+
|
|
+/*
|
|
+ * function invoked in case, pfm_context_create fails
|
|
+ * at the last operation, copy_to_user. It needs to
|
|
+ * undo memory allocations and free the file descriptor
|
|
+ */
|
|
+static void pfm_undo_create_context_fd(int fd, struct pfm_context *ctx)
|
|
+{
|
|
+ struct files_struct *files = current->files;
|
|
+ struct file *file;
|
|
+ int fput_needed;
|
|
+
|
|
+ file = fget_light(fd, &fput_needed);
|
|
+ /*
|
|
+ * there is no fd_uninstall(), so we do it
|
|
+ * here. put_unused_fd() does not remove the
|
|
+ * effect of fd_install().
|
|
+ */
|
|
+
|
|
+ spin_lock(&files->file_lock);
|
|
+ files->fd_array[fd] = NULL;
|
|
+ spin_unlock(&files->file_lock);
|
|
+
|
|
+ fput_light(file, fput_needed);
|
|
+
|
|
+ /*
|
|
+ * decrement ref count and kill file
|
|
+ */
|
|
+ put_filp(file);
|
|
+
|
|
+ put_unused_fd(fd);
|
|
+
|
|
+ pfm_free_context(ctx);
|
|
+}
|
|
+
|
|
+static int pfm_get_smpl_arg_old(pfm_uuid_t uuid, void __user *fmt_uarg,
|
|
+ size_t usize, void **arg,
|
|
+ struct pfm_smpl_fmt **fmt)
|
|
+{
|
|
+ struct pfm_smpl_fmt *f;
|
|
+ void *addr = NULL;
|
|
+ size_t sz;
|
|
+ int ret;
|
|
+
|
|
+ if (!memcmp(uuid, null_uuid, sizeof(pfm_uuid_t)))
|
|
+ return 0;
|
|
+
|
|
+ if (memcmp(uuid, old_default_uuid, sizeof(pfm_uuid_t))) {
|
|
+ PFM_DBG("compatibility mode supports only default sampling format");
|
|
+ return -EINVAL;
|
|
+ }
|
|
+ /*
|
|
+ * find fmt and increase refcount
|
|
+ */
|
|
+ f = pfm_smpl_fmt_get("default-old");
|
|
+ if (f == NULL) {
|
|
+ PFM_DBG("default-old buffer format not found");
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * expected format argument size
|
|
+ */
|
|
+ sz = f->fmt_arg_size;
|
|
+
|
|
+ /*
|
|
+ * check user size matches expected size
|
|
+ * usize = -1 is for IA-64 backward compatibility
|
|
+ */
|
|
+ ret = -EINVAL;
|
|
+ if (sz != usize && usize != -1) {
|
|
+ PFM_DBG("invalid arg size %zu, format expects %zu",
|
|
+ usize, sz);
|
|
+ goto error;
|
|
+ }
|
|
+
|
|
+ ret = -ENOMEM;
|
|
+ addr = kmalloc(sz, GFP_KERNEL);
|
|
+ if (addr == NULL)
|
|
+ goto error;
|
|
+
|
|
+ ret = -EFAULT;
|
|
+ if (copy_from_user(addr, fmt_uarg, sz))
|
|
+ goto error;
|
|
+
|
|
+ *arg = addr;
|
|
+ *fmt = f;
|
|
+ return 0;
|
|
+
|
|
+error:
|
|
+ kfree(addr);
|
|
+ pfm_smpl_fmt_put(f);
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static long pfm_create_context_old(int fd, void __user *ureq, int count)
|
|
+{
|
|
+ struct pfm_context *new_ctx;
|
|
+ struct pfm_arch_context *ctx_arch;
|
|
+ struct pfm_smpl_fmt *fmt = NULL;
|
|
+ struct pfarg_context req_old;
|
|
+ void __user *usmpl_arg;
|
|
+ void *smpl_arg = NULL;
|
|
+ struct pfarg_ctx req;
|
|
+ int ret;
|
|
+
|
|
+ if (count != 1)
|
|
+ return -EINVAL;
|
|
+
|
|
+ if (copy_from_user(&req_old, ureq, sizeof(req_old)))
|
|
+ return -EFAULT;
|
|
+
|
|
+ memset(&req, 0, sizeof(req));
|
|
+
|
|
+ /*
|
|
+ * sampling format args are following pfarg_context
|
|
+ */
|
|
+ usmpl_arg = ureq+sizeof(req_old);
|
|
+
|
|
+ ret = pfm_get_smpl_arg_old(req_old.ctx_smpl_buf_id, usmpl_arg, -1,
|
|
+ &smpl_arg, &fmt);
|
|
+ if (ret)
|
|
+ return ret;
|
|
+
|
|
+ req.ctx_flags = req_old.ctx_flags;
|
|
+
|
|
+ /*
|
|
+ * returns file descriptor if >=0, or error code */
|
|
+ ret = __pfm_create_context(&req, fmt, smpl_arg, PFM_COMPAT, &new_ctx);
|
|
+ if (ret >= 0) {
|
|
+ ctx_arch = pfm_ctx_arch(new_ctx);
|
|
+ req_old.ctx_fd = ret;
|
|
+ req_old.ctx_smpl_vaddr = ctx_arch->ctx_smpl_vaddr;
|
|
+ }
|
|
+
|
|
+ if (copy_to_user(ureq, &req_old, sizeof(req_old))) {
|
|
+ pfm_undo_create_context_fd(req_old.ctx_fd, new_ctx);
|
|
+ ret = -EFAULT;
|
|
+ }
|
|
+
|
|
+ kfree(smpl_arg);
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * obsolete call: use /proc/perfmon
|
|
+ */
|
|
+static long pfm_get_features_old(int fd, void __user *arg, int count)
|
|
+{
|
|
+ struct pfarg_features req;
|
|
+ int ret = 0;
|
|
+
|
|
+ if (count != 1)
|
|
+ return -EINVAL;
|
|
+
|
|
+ memset(&req, 0, sizeof(req));
|
|
+
|
|
+ req.ft_version = PFM_VERSION;
|
|
+
|
|
+ if (copy_to_user(arg, &req, sizeof(req)))
|
|
+ ret = -EFAULT;
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static long pfm_debug_old(int fd, void __user *arg, int count)
|
|
+{
|
|
+ int m;
|
|
+
|
|
+ if (count != 1)
|
|
+ return -EINVAL;
|
|
+
|
|
+ if (get_user(m, (int __user *)arg))
|
|
+ return -EFAULT;
|
|
+
|
|
+
|
|
+ pfm_controls.debug = m == 0 ? 0 : 1;
|
|
+
|
|
+ PFM_INFO("debugging %s (timing reset)",
|
|
+ pfm_controls.debug ? "on" : "off");
|
|
+
|
|
+ if (m == 0)
|
|
+ for_each_online_cpu(m) {
|
|
+ memset(&per_cpu(pfm_stats, m), 0,
|
|
+ sizeof(struct pfm_stats));
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static long pfm_unload_context_old(int fd, void __user *arg, int count)
|
|
+{
|
|
+ if (count)
|
|
+ return -EINVAL;
|
|
+
|
|
+ return sys_pfm_unload_context(fd);
|
|
+}
|
|
+
|
|
+static long pfm_restart_old(int fd, void __user *arg, int count)
|
|
+{
|
|
+ if (count)
|
|
+ return -EINVAL;
|
|
+
|
|
+ return sys_pfm_restart(fd);
|
|
+}
|
|
+
|
|
+static long pfm_stop_old(int fd, void __user *arg, int count)
|
|
+{
|
|
+ if (count)
|
|
+ return -EINVAL;
|
|
+
|
|
+ return sys_pfm_stop(fd);
|
|
+}
|
|
+
|
|
+static long pfm_start_old(int fd, void __user *arg, int count)
|
|
+{
|
|
+ if (count > 1)
|
|
+ return -EINVAL;
|
|
+
|
|
+ return sys_pfm_start(fd, arg);
|
|
+}
|
|
+
|
|
+static long pfm_load_context_old(int fd, void __user *ureq, int count)
|
|
+{
|
|
+ if (count != 1)
|
|
+ return -EINVAL;
|
|
+
|
|
+ return sys_pfm_load_context(fd, ureq);
|
|
+}
|
|
+
|
|
+/*
|
|
+ * perfmon command descriptions
|
|
+ */
|
|
+struct pfm_cmd_desc {
|
|
+ long (*cmd_func)(int fd, void __user *arg, int count);
|
|
+};
|
|
+
|
|
+/*
|
|
+ * functions MUST be listed in the increasing order of
|
|
+ * their index (see permfon.h)
|
|
+ */
|
|
+#define PFM_CMD(name) \
|
|
+ { .cmd_func = name, \
|
|
+ }
|
|
+#define PFM_CMD_NONE \
|
|
+ { .cmd_func = NULL \
|
|
+ }
|
|
+
|
|
+static struct pfm_cmd_desc pfm_cmd_tab[] = {
|
|
+/* 0 */PFM_CMD_NONE,
|
|
+/* 1 */PFM_CMD(pfm_write_pmcs_old),
|
|
+/* 2 */PFM_CMD(pfm_write_pmds_old),
|
|
+/* 3 */PFM_CMD(pfm_read_pmds_old),
|
|
+/* 4 */PFM_CMD(pfm_stop_old),
|
|
+/* 5 */PFM_CMD(pfm_start_old),
|
|
+/* 6 */PFM_CMD_NONE,
|
|
+/* 7 */PFM_CMD_NONE,
|
|
+/* 8 */PFM_CMD(pfm_create_context_old),
|
|
+/* 9 */PFM_CMD_NONE,
|
|
+/* 10 */PFM_CMD(pfm_restart_old),
|
|
+/* 11 */PFM_CMD_NONE,
|
|
+/* 12 */PFM_CMD(pfm_get_features_old),
|
|
+/* 13 */PFM_CMD(pfm_debug_old),
|
|
+/* 14 */PFM_CMD_NONE,
|
|
+/* 15 */PFM_CMD(pfm_get_default_pmcs_old),
|
|
+/* 16 */PFM_CMD(pfm_load_context_old),
|
|
+/* 17 */PFM_CMD(pfm_unload_context_old),
|
|
+/* 18 */PFM_CMD_NONE,
|
|
+/* 19 */PFM_CMD_NONE,
|
|
+/* 20 */PFM_CMD_NONE,
|
|
+/* 21 */PFM_CMD_NONE,
|
|
+/* 22 */PFM_CMD_NONE,
|
|
+/* 23 */PFM_CMD_NONE,
|
|
+/* 24 */PFM_CMD_NONE,
|
|
+/* 25 */PFM_CMD_NONE,
|
|
+/* 26 */PFM_CMD_NONE,
|
|
+/* 27 */PFM_CMD_NONE,
|
|
+/* 28 */PFM_CMD_NONE,
|
|
+/* 29 */PFM_CMD_NONE,
|
|
+/* 30 */PFM_CMD_NONE,
|
|
+/* 31 */PFM_CMD_NONE,
|
|
+/* 32 */PFM_CMD(pfm_write_ibrs_old),
|
|
+/* 33 */PFM_CMD(pfm_write_dbrs_old),
|
|
+};
|
|
+#define PFM_CMD_COUNT ARRAY_SIZE(pfm_cmd_tab)
|
|
+
|
|
+/*
|
|
+ * system-call entry point (must return long)
|
|
+ */
|
|
+asmlinkage long sys_perfmonctl(int fd, int cmd, void __user *arg, int count)
|
|
+{
|
|
+ if (perfmon_disabled)
|
|
+ return -ENOSYS;
|
|
+
|
|
+ if (unlikely(cmd < 0 || cmd >= PFM_CMD_COUNT
|
|
+ || pfm_cmd_tab[cmd].cmd_func == NULL)) {
|
|
+ PFM_DBG("invalid cmd=%d", cmd);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+ return (long)pfm_cmd_tab[cmd].cmd_func(fd, arg, count);
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Called from pfm_read() for a perfmon v2.0 context.
|
|
+ *
|
|
+ * compatibility mode pfm_read() routine. We need a separate
|
|
+ * routine because the definition of the message has changed.
|
|
+ * The pfm_msg and pfarg_msg structures are different.
|
|
+ *
|
|
+ * return: sizeof(pfm_msg_t) on success, -errno otherwise
|
|
+ */
|
|
+ssize_t pfm_arch_compat_read(struct pfm_context *ctx,
|
|
+ char __user *buf,
|
|
+ int non_block,
|
|
+ size_t size)
|
|
+{
|
|
+ union pfarg_msg msg_buf;
|
|
+ pfm_msg_t old_msg_buf;
|
|
+ pfm_ovfl_msg_t *o_msg;
|
|
+ struct pfarg_ovfl_msg *n_msg;
|
|
+ int ret;
|
|
+
|
|
+ PFM_DBG("msg=%p size=%zu", buf, size);
|
|
+
|
|
+ /*
|
|
+ * cannot extract partial messages.
|
|
+ * check even when there is no message
|
|
+ *
|
|
+ * cannot extract more than one message per call. Bytes
|
|
+ * above sizeof(msg) are ignored.
|
|
+ */
|
|
+ if (size < sizeof(old_msg_buf)) {
|
|
+ PFM_DBG("message is too small size=%zu must be >=%zu)",
|
|
+ size,
|
|
+ sizeof(old_msg_buf));
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ ret = __pfm_read(ctx, &msg_buf, non_block);
|
|
+ if (ret < 1)
|
|
+ return ret;
|
|
+
|
|
+ /*
|
|
+ * force return value to old message size
|
|
+ */
|
|
+ ret = sizeof(old_msg_buf);
|
|
+
|
|
+ o_msg = &old_msg_buf.pfm_ovfl_msg;
|
|
+ n_msg = &msg_buf.pfm_ovfl_msg;
|
|
+
|
|
+ switch (msg_buf.type) {
|
|
+ case PFM_MSG_OVFL:
|
|
+ o_msg->msg_type = PFM_MSG_OVFL;
|
|
+ o_msg->msg_ctx_fd = 0;
|
|
+ o_msg->msg_active_set = n_msg->msg_active_set;
|
|
+ o_msg->msg_tstamp = 0;
|
|
+
|
|
+ o_msg->msg_ovfl_pmds[0] = n_msg->msg_ovfl_pmds[0];
|
|
+ o_msg->msg_ovfl_pmds[1] = n_msg->msg_ovfl_pmds[1];
|
|
+ o_msg->msg_ovfl_pmds[2] = n_msg->msg_ovfl_pmds[2];
|
|
+ o_msg->msg_ovfl_pmds[3] = n_msg->msg_ovfl_pmds[3];
|
|
+ break;
|
|
+ case PFM_MSG_END:
|
|
+ o_msg->msg_type = PFM_MSG_END;
|
|
+ o_msg->msg_ctx_fd = 0;
|
|
+ o_msg->msg_tstamp = 0;
|
|
+ break;
|
|
+ default:
|
|
+ PFM_DBG("unknown msg type=%d", msg_buf.type);
|
|
+ }
|
|
+ if (copy_to_user(buf, &old_msg_buf, sizeof(old_msg_buf)))
|
|
+ ret = -EFAULT;
|
|
+ PFM_DBG_ovfl("ret=%d", ret);
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * legacy /proc/perfmon simplified interface (we only maintain the
|
|
+ * global information (no more per-cpu stats, use
|
|
+ * /sys/devices/system/cpu/cpuXX/perfmon
|
|
+ */
|
|
+static struct proc_dir_entry *perfmon_proc;
|
|
+
|
|
+static void *pfm_proc_start(struct seq_file *m, loff_t *pos)
|
|
+{
|
|
+ if (*pos == 0)
|
|
+ return (void *)1;
|
|
+
|
|
+ return NULL;
|
|
+}
|
|
+
|
|
+static void *pfm_proc_next(struct seq_file *m, void *v, loff_t *pos)
|
|
+{
|
|
+ ++*pos;
|
|
+ return pfm_proc_start(m, pos);
|
|
+}
|
|
+
|
|
+static void pfm_proc_stop(struct seq_file *m, void *v)
|
|
+{
|
|
+}
|
|
+
|
|
+/*
|
|
+ * this is a simplified version of the legacy /proc/perfmon.
|
|
+ * We have retained ONLY the key information that tools are actually
|
|
+ * using
|
|
+ */
|
|
+static void pfm_proc_show_header(struct seq_file *m)
|
|
+{
|
|
+ char buf[128];
|
|
+
|
|
+ pfm_sysfs_res_show(buf, sizeof(buf), 3);
|
|
+
|
|
+ seq_printf(m, "perfmon version : %u.%u\n",
|
|
+ PFM_VERSION_MAJ, PFM_VERSION_MIN);
|
|
+
|
|
+ seq_printf(m, "model : %s", buf);
|
|
+}
|
|
+
|
|
+static int pfm_proc_show(struct seq_file *m, void *v)
|
|
+{
|
|
+ pfm_proc_show_header(m);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+struct seq_operations pfm_proc_seq_ops = {
|
|
+ .start = pfm_proc_start,
|
|
+ .next = pfm_proc_next,
|
|
+ .stop = pfm_proc_stop,
|
|
+ .show = pfm_proc_show
|
|
+};
|
|
+
|
|
+static int pfm_proc_open(struct inode *inode, struct file *file)
|
|
+{
|
|
+ return seq_open(file, &pfm_proc_seq_ops);
|
|
+}
|
|
+
|
|
+
|
|
+static struct file_operations pfm_proc_fops = {
|
|
+ .open = pfm_proc_open,
|
|
+ .read = seq_read,
|
|
+ .llseek = seq_lseek,
|
|
+ .release = seq_release,
|
|
+};
|
|
+
|
|
+/*
|
|
+ * called from pfm_arch_init(), global initialization, called once
|
|
+ */
|
|
+int __init pfm_ia64_compat_init(void)
|
|
+{
|
|
+ /*
|
|
+ * create /proc/perfmon
|
|
+ */
|
|
+ perfmon_proc = create_proc_entry("perfmon", S_IRUGO, NULL);
|
|
+ if (perfmon_proc == NULL) {
|
|
+ PFM_ERR("cannot create /proc entry, perfmon disabled");
|
|
+ return -1;
|
|
+ }
|
|
+ perfmon_proc->proc_fops = &pfm_proc_fops;
|
|
+ return 0;
|
|
+}
|
|
diff --git a/arch/ia64/perfmon/perfmon_default_smpl.c b/arch/ia64/perfmon/perfmon_default_smpl.c
|
|
new file mode 100644
|
|
index 0000000..b408a13
|
|
--- /dev/null
|
|
+++ b/arch/ia64/perfmon/perfmon_default_smpl.c
|
|
@@ -0,0 +1,273 @@
|
|
+/*
|
|
+ * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P.
|
|
+ * Contributed by Stephane Eranian <eranian@hpl.hp.com>
|
|
+ *
|
|
+ * This file implements the old default sampling buffer format
|
|
+ * for the Linux/ia64 perfmon-2 subsystem. This is for backward
|
|
+ * compatibility only. use the new default format in perfmon/
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of version 2 of the GNU General Public
|
|
+ * License as published by the Free Software Foundation.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ * General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
+ * 02111-1307 USA
|
|
+ */
|
|
+#include <linux/kernel.h>
|
|
+#include <linux/types.h>
|
|
+#include <linux/module.h>
|
|
+#include <linux/init.h>
|
|
+#include <linux/delay.h>
|
|
+#include <linux/smp.h>
|
|
+#include <linux/sysctl.h>
|
|
+
|
|
+#ifdef MODULE
|
|
+#define FMT_FLAGS 0
|
|
+#else
|
|
+#define FMT_FLAGS PFM_FMTFL_IS_BUILTIN
|
|
+#endif
|
|
+
|
|
+#include <linux/perfmon_kern.h>
|
|
+#include <asm/perfmon_default_smpl.h>
|
|
+
|
|
+MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>");
|
|
+MODULE_DESCRIPTION("perfmon old default sampling format");
|
|
+MODULE_LICENSE("GPL");
|
|
+
|
|
+static int pfm_default_fmt_validate(u32 flags, u16 npmds, void *data)
|
|
+{
|
|
+ struct pfm_default_smpl_arg *arg = data;
|
|
+ size_t min_buf_size;
|
|
+
|
|
+ if (data == NULL) {
|
|
+ PFM_DBG("no argument passed");
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * compute min buf size. All PMD are manipulated as 64bit entities
|
|
+ */
|
|
+ min_buf_size = sizeof(struct pfm_default_smpl_hdr)
|
|
+ + (sizeof(struct pfm_default_smpl_entry) + (npmds*sizeof(u64)));
|
|
+
|
|
+ PFM_DBG("validate flags=0x%x npmds=%u min_buf_size=%lu "
|
|
+ "buf_size=%lu CPU%d", flags, npmds, min_buf_size,
|
|
+ arg->buf_size, smp_processor_id());
|
|
+
|
|
+ /*
|
|
+ * must hold at least the buffer header + one minimally sized entry
|
|
+ */
|
|
+ if (arg->buf_size < min_buf_size)
|
|
+ return -EINVAL;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int pfm_default_fmt_get_size(unsigned int flags, void *data,
|
|
+ size_t *size)
|
|
+{
|
|
+ struct pfm_default_smpl_arg *arg = data;
|
|
+
|
|
+ /*
|
|
+ * size has been validated in default_validate
|
|
+ */
|
|
+ *size = arg->buf_size;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int pfm_default_fmt_init(struct pfm_context *ctx, void *buf,
|
|
+ u32 flags, u16 npmds, void *data)
|
|
+{
|
|
+ struct pfm_default_smpl_hdr *hdr;
|
|
+ struct pfm_default_smpl_arg *arg = data;
|
|
+
|
|
+ hdr = buf;
|
|
+
|
|
+ hdr->hdr_version = PFM_DEFAULT_SMPL_VERSION;
|
|
+ hdr->hdr_buf_size = arg->buf_size;
|
|
+ hdr->hdr_cur_offs = sizeof(*hdr);
|
|
+ hdr->hdr_overflows = 0;
|
|
+ hdr->hdr_count = 0;
|
|
+
|
|
+ PFM_DBG("buffer=%p buf_size=%lu hdr_size=%lu "
|
|
+ "hdr_version=%u cur_offs=%lu",
|
|
+ buf,
|
|
+ hdr->hdr_buf_size,
|
|
+ sizeof(*hdr),
|
|
+ hdr->hdr_version,
|
|
+ hdr->hdr_cur_offs);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int pfm_default_fmt_handler(struct pfm_context *ctx,
|
|
+ unsigned long ip, u64 tstamp, void *data)
|
|
+{
|
|
+ struct pfm_default_smpl_hdr *hdr;
|
|
+ struct pfm_default_smpl_entry *ent;
|
|
+ void *cur, *last, *buf;
|
|
+ u64 *e;
|
|
+ size_t entry_size;
|
|
+ u16 npmds, i, ovfl_pmd;
|
|
+ struct pfm_ovfl_arg *arg;
|
|
+
|
|
+ hdr = ctx->smpl_addr;
|
|
+ arg = &ctx->ovfl_arg;
|
|
+
|
|
+ buf = hdr;
|
|
+ cur = buf+hdr->hdr_cur_offs;
|
|
+ last = buf+hdr->hdr_buf_size;
|
|
+ ovfl_pmd = arg->ovfl_pmd;
|
|
+
|
|
+ /*
|
|
+ * precheck for sanity
|
|
+ */
|
|
+ if ((last - cur) < PFM_DEFAULT_MAX_ENTRY_SIZE)
|
|
+ goto full;
|
|
+
|
|
+ npmds = arg->num_smpl_pmds;
|
|
+
|
|
+ ent = cur;
|
|
+
|
|
+ prefetch(arg->smpl_pmds_values);
|
|
+
|
|
+ entry_size = sizeof(*ent) + (npmds << 3);
|
|
+
|
|
+ /* position for first pmd */
|
|
+ e = (unsigned long *)(ent+1);
|
|
+
|
|
+ hdr->hdr_count++;
|
|
+
|
|
+ PFM_DBG_ovfl("count=%lu cur=%p last=%p free_bytes=%lu "
|
|
+ "ovfl_pmd=%d npmds=%u",
|
|
+ hdr->hdr_count,
|
|
+ cur, last,
|
|
+ last-cur,
|
|
+ ovfl_pmd,
|
|
+ npmds);
|
|
+
|
|
+ /*
|
|
+ * current = task running at the time of the overflow.
|
|
+ *
|
|
+ * per-task mode:
|
|
+ * - this is ususally the task being monitored.
|
|
+ * Under certain conditions, it might be a different task
|
|
+ *
|
|
+ * system-wide:
|
|
+ * - this is not necessarily the task controlling the session
|
|
+ */
|
|
+ ent->pid = current->pid;
|
|
+ ent->ovfl_pmd = ovfl_pmd;
|
|
+ ent->last_reset_val = arg->pmd_last_reset;
|
|
+
|
|
+ /*
|
|
+ * where did the fault happen (includes slot number)
|
|
+ */
|
|
+ ent->ip = ip;
|
|
+
|
|
+ ent->tstamp = tstamp;
|
|
+ ent->cpu = smp_processor_id();
|
|
+ ent->set = arg->active_set;
|
|
+ ent->tgid = current->tgid;
|
|
+
|
|
+ /*
|
|
+ * selectively store PMDs in increasing index number
|
|
+ */
|
|
+ if (npmds) {
|
|
+ u64 *val = arg->smpl_pmds_values;
|
|
+ for (i = 0; i < npmds; i++)
|
|
+ *e++ = *val++;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * update position for next entry
|
|
+ */
|
|
+ hdr->hdr_cur_offs += entry_size;
|
|
+ cur += entry_size;
|
|
+
|
|
+ /*
|
|
+ * post check to avoid losing the last sample
|
|
+ */
|
|
+ if ((last - cur) < PFM_DEFAULT_MAX_ENTRY_SIZE)
|
|
+ goto full;
|
|
+
|
|
+ /*
|
|
+ * reset before returning from interrupt handler
|
|
+ */
|
|
+ arg->ovfl_ctrl = PFM_OVFL_CTRL_RESET;
|
|
+ return 0;
|
|
+full:
|
|
+ PFM_DBG_ovfl("smpl buffer full free=%lu, count=%lu",
|
|
+ last-cur, hdr->hdr_count);
|
|
+
|
|
+ /*
|
|
+ * increment number of buffer overflow.
|
|
+ * important to detect duplicate set of samples.
|
|
+ */
|
|
+ hdr->hdr_overflows++;
|
|
+
|
|
+ /*
|
|
+ * request notification and masking of monitoring.
|
|
+ * Notification is still subject to the overflowed
|
|
+ */
|
|
+ arg->ovfl_ctrl = PFM_OVFL_CTRL_NOTIFY | PFM_OVFL_CTRL_MASK;
|
|
+
|
|
+ return -ENOBUFS; /* we are full, sorry */
|
|
+}
|
|
+
|
|
+static int pfm_default_fmt_restart(int is_active, u32 *ovfl_ctrl, void *buf)
|
|
+{
|
|
+ struct pfm_default_smpl_hdr *hdr;
|
|
+
|
|
+ hdr = buf;
|
|
+
|
|
+ hdr->hdr_count = 0;
|
|
+ hdr->hdr_cur_offs = sizeof(*hdr);
|
|
+
|
|
+ *ovfl_ctrl = PFM_OVFL_CTRL_RESET;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int pfm_default_fmt_exit(void *buf)
|
|
+{
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static struct pfm_smpl_fmt default_fmt = {
|
|
+ .fmt_name = "default-old",
|
|
+ .fmt_version = 0x10000,
|
|
+ .fmt_arg_size = sizeof(struct pfm_default_smpl_arg),
|
|
+ .fmt_validate = pfm_default_fmt_validate,
|
|
+ .fmt_getsize = pfm_default_fmt_get_size,
|
|
+ .fmt_init = pfm_default_fmt_init,
|
|
+ .fmt_handler = pfm_default_fmt_handler,
|
|
+ .fmt_restart = pfm_default_fmt_restart,
|
|
+ .fmt_exit = pfm_default_fmt_exit,
|
|
+ .fmt_flags = FMT_FLAGS,
|
|
+ .owner = THIS_MODULE
|
|
+};
|
|
+
|
|
+static int pfm_default_fmt_init_module(void)
|
|
+{
|
|
+ int ret;
|
|
+
|
|
+ return pfm_fmt_register(&default_fmt);
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static void pfm_default_fmt_cleanup_module(void)
|
|
+{
|
|
+ pfm_fmt_unregister(&default_fmt);
|
|
+}
|
|
+
|
|
+module_init(pfm_default_fmt_init_module);
|
|
+module_exit(pfm_default_fmt_cleanup_module);
|
|
diff --git a/arch/ia64/perfmon/perfmon_generic.c b/arch/ia64/perfmon/perfmon_generic.c
|
|
new file mode 100644
|
|
index 0000000..47b1870
|
|
--- /dev/null
|
|
+++ b/arch/ia64/perfmon/perfmon_generic.c
|
|
@@ -0,0 +1,148 @@
|
|
+/*
|
|
+ * This file contains the generic PMU register description tables
|
|
+ * and pmc checker used by perfmon.c.
|
|
+ *
|
|
+ * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P.
|
|
+ * contributed by Stephane Eranian <eranian@hpl.hp.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of version 2 of the GNU General Public
|
|
+ * License as published by the Free Software Foundation.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ * General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
+ * 02111-1307 USA
|
|
+ */
|
|
+#include <linux/module.h>
|
|
+#include <linux/perfmon_kern.h>
|
|
+#include <asm/pal.h>
|
|
+
|
|
+MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>");
|
|
+MODULE_DESCRIPTION("Generic IA-64 PMU description tables");
|
|
+MODULE_LICENSE("GPL");
|
|
+
|
|
+#define RDEP(x) (1UL << (x))
|
|
+
|
|
+#define PFM_IA64GEN_MASK_PMCS (RDEP(4)|RDEP(5)|RDEP(6)|RDEP(7))
|
|
+#define PFM_IA64GEN_RSVD (0xffffffffffff0080UL)
|
|
+#define PFM_IA64GEN_NO64 (1UL<<5)
|
|
+
|
|
+/* forward declaration */
|
|
+static struct pfm_pmu_config pfm_ia64gen_pmu_conf;
|
|
+
|
|
+static struct pfm_arch_pmu_info pfm_ia64gen_pmu_info = {
|
|
+ .mask_pmcs = {PFM_IA64GEN_MASK_PMCS,},
|
|
+};
|
|
+
|
|
+static struct pfm_regmap_desc pfm_ia64gen_pmc_desc[] = {
|
|
+/* pmc0 */ PMX_NA,
|
|
+/* pmc1 */ PMX_NA,
|
|
+/* pmc2 */ PMX_NA,
|
|
+/* pmc3 */ PMX_NA,
|
|
+/* pmc4 */ PMC_D(PFM_REG_W64, "PMC4", 0x0, PFM_IA64GEN_RSVD, PFM_IA64GEN_NO64, 4),
|
|
+/* pmc5 */ PMC_D(PFM_REG_W64, "PMC5", 0x0, PFM_IA64GEN_RSVD, PFM_IA64GEN_NO64, 5),
|
|
+/* pmc6 */ PMC_D(PFM_REG_W64, "PMC6", 0x0, PFM_IA64GEN_RSVD, PFM_IA64GEN_NO64, 6),
|
|
+/* pmc7 */ PMC_D(PFM_REG_W64, "PMC7", 0x0, PFM_IA64GEN_RSVD, PFM_IA64GEN_NO64, 7)
|
|
+};
|
|
+#define PFM_IA64GEN_NUM_PMCS ARRAY_SIZE(pfm_ia64gen_pmc_desc)
|
|
+
|
|
+static struct pfm_regmap_desc pfm_ia64gen_pmd_desc[] = {
|
|
+/* pmd0 */ PMX_NA,
|
|
+/* pmd1 */ PMX_NA,
|
|
+/* pmd2 */ PMX_NA,
|
|
+/* pmd3 */ PMX_NA,
|
|
+/* pmd4 */ PMD_DP(PFM_REG_C, "PMD4", 4, 1ull << 4),
|
|
+/* pmd5 */ PMD_DP(PFM_REG_C, "PMD5", 5, 1ull << 5),
|
|
+/* pmd6 */ PMD_DP(PFM_REG_C, "PMD6", 6, 1ull << 6),
|
|
+/* pmd7 */ PMD_DP(PFM_REG_C, "PMD7", 7, 1ull << 7)
|
|
+};
|
|
+#define PFM_IA64GEN_NUM_PMDS ARRAY_SIZE(pfm_ia64gen_pmd_desc)
|
|
+
|
|
+static int pfm_ia64gen_pmc_check(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set,
|
|
+ struct pfarg_pmc *req)
|
|
+{
|
|
+#define PFM_IA64GEN_PMC_PM_POS6 (1UL<<6)
|
|
+ u64 tmpval;
|
|
+ int is_system;
|
|
+
|
|
+ is_system = ctx->flags.system;
|
|
+ tmpval = req->reg_value;
|
|
+
|
|
+ switch (req->reg_num) {
|
|
+ case 4:
|
|
+ case 5:
|
|
+ case 6:
|
|
+ case 7:
|
|
+ /* set pmc.oi for 64-bit emulation */
|
|
+ tmpval |= 1UL << 5;
|
|
+
|
|
+ if (is_system)
|
|
+ tmpval |= PFM_IA64GEN_PMC_PM_POS6;
|
|
+ else
|
|
+ tmpval &= ~PFM_IA64GEN_PMC_PM_POS6;
|
|
+ break;
|
|
+
|
|
+ }
|
|
+ req->reg_value = tmpval;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * matches anything
|
|
+ */
|
|
+static int pfm_ia64gen_probe_pmu(void)
|
|
+{
|
|
+ u64 pm_buffer[16];
|
|
+ pal_perf_mon_info_u_t pm_info;
|
|
+
|
|
+ /*
|
|
+ * call PAL_PERFMON_INFO to retrieve counter width which
|
|
+ * is implementation specific
|
|
+ */
|
|
+ if (ia64_pal_perf_mon_info(pm_buffer, &pm_info))
|
|
+ return -1;
|
|
+
|
|
+ pfm_ia64gen_pmu_conf.counter_width = pm_info.pal_perf_mon_info_s.width;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * impl_pmcs, impl_pmds are computed at runtime to minimize errors!
|
|
+ */
|
|
+static struct pfm_pmu_config pfm_ia64gen_pmu_conf = {
|
|
+ .pmu_name = "Generic IA-64",
|
|
+ .counter_width = 0, /* computed from PAL_PERFMON_INFO */
|
|
+ .pmd_desc = pfm_ia64gen_pmd_desc,
|
|
+ .pmc_desc = pfm_ia64gen_pmc_desc,
|
|
+ .probe_pmu = pfm_ia64gen_probe_pmu,
|
|
+ .num_pmc_entries = PFM_IA64GEN_NUM_PMCS,
|
|
+ .num_pmd_entries = PFM_IA64GEN_NUM_PMDS,
|
|
+ .pmc_write_check = pfm_ia64gen_pmc_check,
|
|
+ .version = "1.0",
|
|
+ .flags = PFM_PMU_BUILTIN_FLAG,
|
|
+ .owner = THIS_MODULE,
|
|
+ .pmu_info = &pfm_ia64gen_pmu_info
|
|
+ /* no read/write checkers */
|
|
+};
|
|
+
|
|
+static int __init pfm_gen_pmu_init_module(void)
|
|
+{
|
|
+ return pfm_pmu_register(&pfm_ia64gen_pmu_conf);
|
|
+}
|
|
+
|
|
+static void __exit pfm_gen_pmu_cleanup_module(void)
|
|
+{
|
|
+ pfm_pmu_unregister(&pfm_ia64gen_pmu_conf);
|
|
+}
|
|
+
|
|
+module_init(pfm_gen_pmu_init_module);
|
|
+module_exit(pfm_gen_pmu_cleanup_module);
|
|
diff --git a/arch/ia64/perfmon/perfmon_itanium.c b/arch/ia64/perfmon/perfmon_itanium.c
|
|
new file mode 100644
|
|
index 0000000..094b31b
|
|
--- /dev/null
|
|
+++ b/arch/ia64/perfmon/perfmon_itanium.c
|
|
@@ -0,0 +1,232 @@
|
|
+/*
|
|
+ * This file contains the Itanium PMU register description tables
|
|
+ * and pmc checker used by perfmon.c.
|
|
+ *
|
|
+ * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P.
|
|
+ * Contributed by Stephane Eranian <eranian@hpl.hp.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of version 2 of the GNU General Public
|
|
+ * License as published by the Free Software Foundation.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ * General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
+ * 02111-1307 USA
|
|
+ */
|
|
+#include <linux/module.h>
|
|
+#include <linux/perfmon_kern.h>
|
|
+
|
|
+MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>");
|
|
+MODULE_DESCRIPTION("Itanium (Merced) PMU description tables");
|
|
+MODULE_LICENSE("GPL");
|
|
+
|
|
+#define RDEP(x) (1ULL << (x))
|
|
+
|
|
+#define PFM_ITA_MASK_PMCS (RDEP(4)|RDEP(5)|RDEP(6)|RDEP(7)|RDEP(10)|RDEP(11)|\
|
|
+ RDEP(12))
|
|
+
|
|
+#define PFM_ITA_NO64 (1ULL<<5)
|
|
+
|
|
+static struct pfm_arch_pmu_info pfm_ita_pmu_info = {
|
|
+ .mask_pmcs = {PFM_ITA_MASK_PMCS,},
|
|
+};
|
|
+/* reserved bits are 1 in the mask */
|
|
+#define PFM_ITA_RSVD 0xfffffffffc8000a0UL
|
|
+/*
|
|
+ * For debug registers, writing xBR(y) means we use also xBR(y+1). Hence using
|
|
+ * PMC256+y means we use PMC256+y+1. Yet, we do not have dependency information
|
|
+ * but this is fine because they are handled separately in the IA-64 specific
|
|
+ * code.
|
|
+ */
|
|
+static struct pfm_regmap_desc pfm_ita_pmc_desc[] = {
|
|
+/* pmc0 */ PMX_NA,
|
|
+/* pmc1 */ PMX_NA,
|
|
+/* pmc2 */ PMX_NA,
|
|
+/* pmc3 */ PMX_NA,
|
|
+/* pmc4 */ PMC_D(PFM_REG_W64, "PMC4" , 0x20, PFM_ITA_RSVD, PFM_ITA_NO64, 4),
|
|
+/* pmc5 */ PMC_D(PFM_REG_W64, "PMC5" , 0x20, PFM_ITA_RSVD, PFM_ITA_NO64, 5),
|
|
+/* pmc6 */ PMC_D(PFM_REG_W64, "PMC6" , 0x20, PFM_ITA_RSVD, PFM_ITA_NO64, 6),
|
|
+/* pmc7 */ PMC_D(PFM_REG_W64, "PMC7" , 0x20, PFM_ITA_RSVD, PFM_ITA_NO64, 7),
|
|
+/* pmc8 */ PMC_D(PFM_REG_W , "PMC8" , 0xfffffffe3ffffff8UL, 0xfff00000001c0000UL, 0, 8),
|
|
+/* pmc9 */ PMC_D(PFM_REG_W , "PMC9" , 0xfffffffe3ffffff8UL, 0xfff00000001c0000UL, 0, 9),
|
|
+/* pmc10 */ PMC_D(PFM_REG_W , "PMC10", 0x0, 0xfffffffff3f0ff30UL, 0, 10),
|
|
+/* pmc11 */ PMC_D(PFM_REG_W , "PMC11", 0x10000000UL, 0xffffffffecf0ff30UL, 0, 11),
|
|
+/* pmc12 */ PMC_D(PFM_REG_W , "PMC12", 0x0, 0xffffffffffff0030UL, 0, 12),
|
|
+/* pmc13 */ PMC_D(PFM_REG_W , "PMC13", 0x3ffff00000001UL, 0xfffffffffffffffeUL, 0, 13),
|
|
+/* pmc14 */ PMX_NA,
|
|
+/* pmc15 */ PMX_NA,
|
|
+/* pmc16 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc24 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc32 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc40 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc48 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc56 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc64 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc72 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc80 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc88 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc96 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc104 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc112 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc120 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc128 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc136 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc144 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc152 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc160 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc168 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc176 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc184 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc192 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc200 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc208 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc216 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc224 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc232 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc240 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc248 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc256 */ PMC_D(PFM_REG_W , "IBR0", 0x0, 0, 0, 0),
|
|
+/* pmc257 */ PMC_D(PFM_REG_W , "IBR1", 0x0, 0x8000000000000000UL, 0, 1),
|
|
+/* pmc258 */ PMC_D(PFM_REG_W , "IBR2", 0x0, 0, 0, 2),
|
|
+/* pmc259 */ PMC_D(PFM_REG_W , "IBR3", 0x0, 0x8000000000000000UL, 0, 3),
|
|
+/* pmc260 */ PMC_D(PFM_REG_W , "IBR4", 0x0, 0, 0, 4),
|
|
+/* pmc261 */ PMC_D(PFM_REG_W , "IBR5", 0x0, 0x8000000000000000UL, 0, 5),
|
|
+/* pmc262 */ PMC_D(PFM_REG_W , "IBR6", 0x0, 0, 0, 6),
|
|
+/* pmc263 */ PMC_D(PFM_REG_W , "IBR7", 0x0, 0x8000000000000000UL, 0, 7),
|
|
+/* pmc264 */ PMC_D(PFM_REG_W , "DBR0", 0x0, 0, 0, 0),
|
|
+/* pmc265 */ PMC_D(PFM_REG_W , "DBR1", 0x0, 0xc000000000000000UL, 0, 1),
|
|
+/* pmc266 */ PMC_D(PFM_REG_W , "DBR2", 0x0, 0, 0, 2),
|
|
+/* pmc267 */ PMC_D(PFM_REG_W , "DBR3", 0x0, 0xc000000000000000UL, 0, 3),
|
|
+/* pmc268 */ PMC_D(PFM_REG_W , "DBR4", 0x0, 0, 0, 4),
|
|
+/* pmc269 */ PMC_D(PFM_REG_W , "DBR5", 0x0, 0xc000000000000000UL, 0, 5),
|
|
+/* pmc270 */ PMC_D(PFM_REG_W , "DBR6", 0x0, 0, 0, 6),
|
|
+/* pmc271 */ PMC_D(PFM_REG_W , "DBR7", 0x0, 0xc000000000000000UL, 0, 7)
|
|
+};
|
|
+#define PFM_ITA_NUM_PMCS ARRAY_SIZE(pfm_ita_pmc_desc)
|
|
+
|
|
+static struct pfm_regmap_desc pfm_ita_pmd_desc[] = {
|
|
+/* pmd0 */ PMD_DP(PFM_REG_I , "PMD0", 0, 1ull << 10),
|
|
+/* pmd1 */ PMD_DP(PFM_REG_I , "PMD1", 1, 1ull << 10),
|
|
+/* pmd2 */ PMD_DP(PFM_REG_I , "PMD2", 2, 1ull << 11),
|
|
+/* pmd3 */ PMD_DP(PFM_REG_I , "PMD3", 3, 1ull << 11),
|
|
+/* pmd4 */ PMD_DP(PFM_REG_C , "PMD4", 4, 1ull << 4),
|
|
+/* pmd5 */ PMD_DP(PFM_REG_C , "PMD5", 5, 1ull << 5),
|
|
+/* pmd6 */ PMD_DP(PFM_REG_C , "PMD6", 6, 1ull << 6),
|
|
+/* pmd7 */ PMD_DP(PFM_REG_C , "PMD7", 7, 1ull << 7),
|
|
+/* pmd8 */ PMD_DP(PFM_REG_I , "PMD8", 8, 1ull << 12),
|
|
+/* pmd9 */ PMD_DP(PFM_REG_I , "PMD9", 9, 1ull << 12),
|
|
+/* pmd10 */ PMD_DP(PFM_REG_I , "PMD10", 10, 1ull << 12),
|
|
+/* pmd11 */ PMD_DP(PFM_REG_I , "PMD11", 11, 1ull << 12),
|
|
+/* pmd12 */ PMD_DP(PFM_REG_I , "PMD12", 12, 1ull << 12),
|
|
+/* pmd13 */ PMD_DP(PFM_REG_I , "PMD13", 13, 1ull << 12),
|
|
+/* pmd14 */ PMD_DP(PFM_REG_I , "PMD14", 14, 1ull << 12),
|
|
+/* pmd15 */ PMD_DP(PFM_REG_I , "PMD15", 15, 1ull << 12),
|
|
+/* pmd16 */ PMD_DP(PFM_REG_I , "PMD16", 16, 1ull << 12),
|
|
+/* pmd17 */ PMD_DP(PFM_REG_I , "PMD17", 17, 1ull << 11)
|
|
+};
|
|
+#define PFM_ITA_NUM_PMDS ARRAY_SIZE(pfm_ita_pmd_desc)
|
|
+
|
|
+static int pfm_ita_pmc_check(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set,
|
|
+ struct pfarg_pmc *req)
|
|
+{
|
|
+#define PFM_ITA_PMC_PM_POS6 (1UL<<6)
|
|
+ struct pfm_arch_context *ctx_arch;
|
|
+ u64 tmpval;
|
|
+ u16 cnum;
|
|
+ int ret = 0, is_system;
|
|
+
|
|
+ tmpval = req->reg_value;
|
|
+ cnum = req->reg_num;
|
|
+ ctx_arch = pfm_ctx_arch(ctx);
|
|
+ is_system = ctx->flags.system;
|
|
+
|
|
+ switch (cnum) {
|
|
+ case 4:
|
|
+ case 5:
|
|
+ case 6:
|
|
+ case 7:
|
|
+ case 10:
|
|
+ case 11:
|
|
+ case 12:
|
|
+ if (is_system)
|
|
+ tmpval |= PFM_ITA_PMC_PM_POS6;
|
|
+ else
|
|
+ tmpval &= ~PFM_ITA_PMC_PM_POS6;
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * we must clear the (instruction) debug registers if pmc13.ta bit is
|
|
+ * cleared before they are written (fl_using_dbreg==0) to avoid
|
|
+ * picking up stale information.
|
|
+ */
|
|
+ if (cnum == 13 && ((tmpval & 0x1) == 0)
|
|
+ && ctx_arch->flags.use_dbr == 0) {
|
|
+ PFM_DBG("pmc13 has pmc13.ta cleared, clearing ibr");
|
|
+ ret = pfm_ia64_mark_dbregs_used(ctx, set);
|
|
+ if (ret)
|
|
+ return ret;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * we must clear the (data) debug registers if pmc11.pt bit is cleared
|
|
+ * before they are written (fl_using_dbreg==0) to avoid picking up
|
|
+ * stale information.
|
|
+ */
|
|
+ if (cnum == 11 && ((tmpval >> 28) & 0x1) == 0
|
|
+ && ctx_arch->flags.use_dbr == 0) {
|
|
+ PFM_DBG("pmc11 has pmc11.pt cleared, clearing dbr");
|
|
+ ret = pfm_ia64_mark_dbregs_used(ctx, set);
|
|
+ if (ret)
|
|
+ return ret;
|
|
+ }
|
|
+
|
|
+ req->reg_value = tmpval;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int pfm_ita_probe_pmu(void)
|
|
+{
|
|
+ return local_cpu_data->family == 0x7 && !ia64_platform_is("hpsim")
|
|
+ ? 0 : -1;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * impl_pmcs, impl_pmds are computed at runtime to minimize errors!
|
|
+ */
|
|
+static struct pfm_pmu_config pfm_ita_pmu_conf = {
|
|
+ .pmu_name = "Itanium",
|
|
+ .counter_width = 32,
|
|
+ .pmd_desc = pfm_ita_pmd_desc,
|
|
+ .pmc_desc = pfm_ita_pmc_desc,
|
|
+ .pmc_write_check = pfm_ita_pmc_check,
|
|
+ .num_pmc_entries = PFM_ITA_NUM_PMCS,
|
|
+ .num_pmd_entries = PFM_ITA_NUM_PMDS,
|
|
+ .probe_pmu = pfm_ita_probe_pmu,
|
|
+ .version = "1.0",
|
|
+ .flags = PFM_PMU_BUILTIN_FLAG,
|
|
+ .owner = THIS_MODULE,
|
|
+ .pmu_info = &pfm_ita_pmu_info
|
|
+};
|
|
+
|
|
+static int __init pfm_ita_pmu_init_module(void)
|
|
+{
|
|
+ return pfm_pmu_register(&pfm_ita_pmu_conf);
|
|
+}
|
|
+
|
|
+static void __exit pfm_ita_pmu_cleanup_module(void)
|
|
+{
|
|
+ pfm_pmu_unregister(&pfm_ita_pmu_conf);
|
|
+}
|
|
+
|
|
+module_init(pfm_ita_pmu_init_module);
|
|
+module_exit(pfm_ita_pmu_cleanup_module);
|
|
+
|
|
diff --git a/arch/ia64/perfmon/perfmon_mckinley.c b/arch/ia64/perfmon/perfmon_mckinley.c
|
|
new file mode 100644
|
|
index 0000000..dc59092
|
|
--- /dev/null
|
|
+++ b/arch/ia64/perfmon/perfmon_mckinley.c
|
|
@@ -0,0 +1,290 @@
|
|
+/*
|
|
+ * This file contains the McKinley PMU register description tables
|
|
+ * and pmc checker used by perfmon.c.
|
|
+ *
|
|
+ * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P.
|
|
+ * Contributed by Stephane Eranian <eranian@hpl.hp.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of version 2 of the GNU General Public
|
|
+ * License as published by the Free Software Foundation.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ * General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
+ * 02111-1307 USA
|
|
+ */
|
|
+#include <linux/module.h>
|
|
+#include <linux/perfmon_kern.h>
|
|
+
|
|
+MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>");
|
|
+MODULE_DESCRIPTION("Itanium 2 (McKinley) PMU description tables");
|
|
+MODULE_LICENSE("GPL");
|
|
+
|
|
+#define RDEP(x) (1UL << (x))
|
|
+
|
|
+#define PFM_MCK_MASK_PMCS (RDEP(4)|RDEP(5)|RDEP(6)|RDEP(7)|RDEP(10)|RDEP(11)|\
|
|
+ RDEP(12))
|
|
+
|
|
+#define PFM_MCK_NO64 (1UL<<5)
|
|
+
|
|
+static struct pfm_arch_pmu_info pfm_mck_pmu_info = {
|
|
+ .mask_pmcs = {PFM_MCK_MASK_PMCS,},
|
|
+};
|
|
+
|
|
+/* reserved bits are 1 in the mask */
|
|
+#define PFM_ITA2_RSVD 0xfffffffffc8000a0UL
|
|
+
|
|
+/*
|
|
+ * For debug registers, writing xBR(y) means we use also xBR(y+1). Hence using
|
|
+ * PMC256+y means we use PMC256+y+1. Yet, we do not have dependency information
|
|
+ * but this is fine because they are handled separately in the IA-64 specific
|
|
+ * code.
|
|
+ */
|
|
+static struct pfm_regmap_desc pfm_mck_pmc_desc[] = {
|
|
+/* pmc0 */ PMX_NA,
|
|
+/* pmc1 */ PMX_NA,
|
|
+/* pmc2 */ PMX_NA,
|
|
+/* pmc3 */ PMX_NA,
|
|
+/* pmc4 */ PMC_D(PFM_REG_W64, "PMC4" , 0x800020UL, 0xfffffffffc8000a0, PFM_MCK_NO64, 4),
|
|
+/* pmc5 */ PMC_D(PFM_REG_W64, "PMC5" , 0x20UL, PFM_ITA2_RSVD, PFM_MCK_NO64, 5),
|
|
+/* pmc6 */ PMC_D(PFM_REG_W64, "PMC6" , 0x20UL, PFM_ITA2_RSVD, PFM_MCK_NO64, 6),
|
|
+/* pmc7 */ PMC_D(PFM_REG_W64, "PMC7" , 0x20UL, PFM_ITA2_RSVD, PFM_MCK_NO64, 7),
|
|
+/* pmc8 */ PMC_D(PFM_REG_W , "PMC8" , 0xffffffff3fffffffUL, 0xc0000004UL, 0, 8),
|
|
+/* pmc9 */ PMC_D(PFM_REG_W , "PMC9" , 0xffffffff3ffffffcUL, 0xc0000004UL, 0, 9),
|
|
+/* pmc10 */ PMC_D(PFM_REG_W , "PMC10", 0x0, 0xffffffffffff0000UL, 0, 10),
|
|
+/* pmc11 */ PMC_D(PFM_REG_W , "PMC11", 0x0, 0xfffffffffcf0fe30UL, 0, 11),
|
|
+/* pmc12 */ PMC_D(PFM_REG_W , "PMC12", 0x0, 0xffffffffffff0000UL, 0, 12),
|
|
+/* pmc13 */ PMC_D(PFM_REG_W , "PMC13", 0x2078fefefefeUL, 0xfffe1fffe7e7e7e7UL, 0, 13),
|
|
+/* pmc14 */ PMC_D(PFM_REG_W , "PMC14", 0x0db60db60db60db6UL, 0xffffffffffffdb6dUL, 0, 14),
|
|
+/* pmc15 */ PMC_D(PFM_REG_W , "PMC15", 0xfffffff0UL, 0xfffffffffffffff0UL, 0, 15),
|
|
+/* pmc16 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc24 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc32 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc40 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc48 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc56 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc64 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc72 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc80 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc88 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc96 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc104 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc112 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc120 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc128 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc136 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc144 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc152 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc160 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc168 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc176 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc184 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc192 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc200 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc208 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc216 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc224 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc232 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc240 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc248 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc256 */ PMC_D(PFM_REG_W , "IBR0", 0x0, 0, 0, 0),
|
|
+/* pmc257 */ PMC_D(PFM_REG_W , "IBR1", 0x0, 0x8000000000000000UL, 0, 1),
|
|
+/* pmc258 */ PMC_D(PFM_REG_W , "IBR2", 0x0, 0, 0, 2),
|
|
+/* pmc259 */ PMC_D(PFM_REG_W , "IBR3", 0x0, 0x8000000000000000UL, 0, 3),
|
|
+/* pmc260 */ PMC_D(PFM_REG_W , "IBR4", 0x0, 0, 0, 4),
|
|
+/* pmc261 */ PMC_D(PFM_REG_W , "IBR5", 0x0, 0x8000000000000000UL, 0, 5),
|
|
+/* pmc262 */ PMC_D(PFM_REG_W , "IBR6", 0x0, 0, 0, 6),
|
|
+/* pmc263 */ PMC_D(PFM_REG_W , "IBR7", 0x0, 0x8000000000000000UL, 0, 7),
|
|
+/* pmc264 */ PMC_D(PFM_REG_W , "DBR0", 0x0, 0, 0, 0),
|
|
+/* pmc265 */ PMC_D(PFM_REG_W , "DBR1", 0x0, 0xc000000000000000UL, 0, 1),
|
|
+/* pmc266 */ PMC_D(PFM_REG_W , "DBR2", 0x0, 0, 0, 2),
|
|
+/* pmc267 */ PMC_D(PFM_REG_W , "DBR3", 0x0, 0xc000000000000000UL, 0, 3),
|
|
+/* pmc268 */ PMC_D(PFM_REG_W , "DBR4", 0x0, 0, 0, 4),
|
|
+/* pmc269 */ PMC_D(PFM_REG_W , "DBR5", 0x0, 0xc000000000000000UL, 0, 5),
|
|
+/* pmc270 */ PMC_D(PFM_REG_W , "DBR6", 0x0, 0, 0, 6),
|
|
+/* pmc271 */ PMC_D(PFM_REG_W , "DBR7", 0x0, 0xc000000000000000UL, 0, 7)
|
|
+};
|
|
+#define PFM_MCK_NUM_PMCS ARRAY_SIZE(pfm_mck_pmc_desc)
|
|
+
|
|
+static struct pfm_regmap_desc pfm_mck_pmd_desc[] = {
|
|
+/* pmd0 */ PMD_DP(PFM_REG_I, "PMD0", 0, 1ull << 10),
|
|
+/* pmd1 */ PMD_DP(PFM_REG_I, "PMD1", 1, 1ull << 10),
|
|
+/* pmd2 */ PMD_DP(PFM_REG_I, "PMD2", 2, 1ull << 11),
|
|
+/* pmd3 */ PMD_DP(PFM_REG_I, "PMD3", 3, 1ull << 11),
|
|
+/* pmd4 */ PMD_DP(PFM_REG_C, "PMD4", 4, 1ull << 4),
|
|
+/* pmd5 */ PMD_DP(PFM_REG_C, "PMD5", 5, 1ull << 5),
|
|
+/* pmd6 */ PMD_DP(PFM_REG_C, "PMD6", 6, 1ull << 6),
|
|
+/* pmd7 */ PMD_DP(PFM_REG_C, "PMD7", 7, 1ull << 7),
|
|
+/* pmd8 */ PMD_DP(PFM_REG_I, "PMD8", 8, 1ull << 12),
|
|
+/* pmd9 */ PMD_DP(PFM_REG_I, "PMD9", 9, 1ull << 12),
|
|
+/* pmd10 */ PMD_DP(PFM_REG_I, "PMD10", 10, 1ull << 12),
|
|
+/* pmd11 */ PMD_DP(PFM_REG_I, "PMD11", 11, 1ull << 12),
|
|
+/* pmd12 */ PMD_DP(PFM_REG_I, "PMD12", 12, 1ull << 12),
|
|
+/* pmd13 */ PMD_DP(PFM_REG_I, "PMD13", 13, 1ull << 12),
|
|
+/* pmd14 */ PMD_DP(PFM_REG_I, "PMD14", 14, 1ull << 12),
|
|
+/* pmd15 */ PMD_DP(PFM_REG_I, "PMD15", 15, 1ull << 12),
|
|
+/* pmd16 */ PMD_DP(PFM_REG_I, "PMD16", 16, 1ull << 12),
|
|
+/* pmd17 */ PMD_DP(PFM_REG_I, "PMD17", 17, 1ull << 11)
|
|
+};
|
|
+#define PFM_MCK_NUM_PMDS ARRAY_SIZE(pfm_mck_pmd_desc)
|
|
+
|
|
+static int pfm_mck_pmc_check(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set,
|
|
+ struct pfarg_pmc *req)
|
|
+{
|
|
+ struct pfm_arch_context *ctx_arch;
|
|
+ u64 val8 = 0, val14 = 0, val13 = 0;
|
|
+ u64 tmpval;
|
|
+ u16 cnum;
|
|
+ int ret = 0, check_case1 = 0;
|
|
+ int is_system;
|
|
+
|
|
+ tmpval = req->reg_value;
|
|
+ cnum = req->reg_num;
|
|
+ ctx_arch = pfm_ctx_arch(ctx);
|
|
+ is_system = ctx->flags.system;
|
|
+
|
|
+#define PFM_MCK_PMC_PM_POS6 (1UL<<6)
|
|
+#define PFM_MCK_PMC_PM_POS4 (1UL<<4)
|
|
+
|
|
+ switch (cnum) {
|
|
+ case 4:
|
|
+ case 5:
|
|
+ case 6:
|
|
+ case 7:
|
|
+ case 11:
|
|
+ case 12:
|
|
+ if (is_system)
|
|
+ tmpval |= PFM_MCK_PMC_PM_POS6;
|
|
+ else
|
|
+ tmpval &= ~PFM_MCK_PMC_PM_POS6;
|
|
+ break;
|
|
+
|
|
+ case 8:
|
|
+ val8 = tmpval;
|
|
+ val13 = set->pmcs[13];
|
|
+ val14 = set->pmcs[14];
|
|
+ check_case1 = 1;
|
|
+ break;
|
|
+
|
|
+ case 10:
|
|
+ if (is_system)
|
|
+ tmpval |= PFM_MCK_PMC_PM_POS4;
|
|
+ else
|
|
+ tmpval &= ~PFM_MCK_PMC_PM_POS4;
|
|
+ break;
|
|
+
|
|
+ case 13:
|
|
+ val8 = set->pmcs[8];
|
|
+ val13 = tmpval;
|
|
+ val14 = set->pmcs[14];
|
|
+ check_case1 = 1;
|
|
+ break;
|
|
+
|
|
+ case 14:
|
|
+ val8 = set->pmcs[8];
|
|
+ val13 = set->pmcs[13];
|
|
+ val14 = tmpval;
|
|
+ check_case1 = 1;
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * check illegal configuration which can produce inconsistencies
|
|
+ * in tagging i-side events in L1D and L2 caches
|
|
+ */
|
|
+ if (check_case1) {
|
|
+ ret = (((val13 >> 45) & 0xf) == 0 && ((val8 & 0x1) == 0))
|
|
+ && ((((val14>>1) & 0x3) == 0x2 || ((val14>>1) & 0x3) == 0x0)
|
|
+ || (((val14>>4) & 0x3) == 0x2 || ((val14>>4) & 0x3) == 0x0));
|
|
+
|
|
+ if (ret) {
|
|
+ PFM_DBG("perfmon: invalid config pmc8=0x%lx "
|
|
+ "pmc13=0x%lx pmc14=0x%lx",
|
|
+ val8, val13, val14);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * check if configuration implicitely activates the use of
|
|
+ * the debug registers. If true, then we ensure that this is
|
|
+ * possible and that we do not pick up stale value in the HW
|
|
+ * registers.
|
|
+ *
|
|
+ * We postpone the checks of pmc13 and pmc14 to avoid side effects
|
|
+ * in case of errors
|
|
+ */
|
|
+
|
|
+ /*
|
|
+ * pmc13 is "active" if:
|
|
+ * one of the pmc13.cfg_dbrpXX field is different from 0x3
|
|
+ * AND
|
|
+ * at the corresponding pmc13.ena_dbrpXX is set.
|
|
+ */
|
|
+ if (cnum == 13 && (tmpval & 0x1e00000000000UL)
|
|
+ && (tmpval & 0x18181818UL) != 0x18181818UL
|
|
+ && ctx_arch->flags.use_dbr == 0) {
|
|
+ PFM_DBG("pmc13=0x%lx active", tmpval);
|
|
+ ret = pfm_ia64_mark_dbregs_used(ctx, set);
|
|
+ if (ret)
|
|
+ return ret;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * if any pmc14.ibrpX bit is enabled we must clear the ibrs
|
|
+ */
|
|
+ if (cnum == 14 && ((tmpval & 0x2222UL) != 0x2222UL)
|
|
+ && ctx_arch->flags.use_dbr == 0) {
|
|
+ PFM_DBG("pmc14=0x%lx active", tmpval);
|
|
+ ret = pfm_ia64_mark_dbregs_used(ctx, set);
|
|
+ if (ret)
|
|
+ return ret;
|
|
+ }
|
|
+
|
|
+ req->reg_value = tmpval;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int pfm_mck_probe_pmu(void)
|
|
+{
|
|
+ return local_cpu_data->family == 0x1f ? 0 : -1;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * impl_pmcs, impl_pmds are computed at runtime to minimize errors!
|
|
+ */
|
|
+static struct pfm_pmu_config pfm_mck_pmu_conf = {
|
|
+ .pmu_name = "Itanium 2",
|
|
+ .counter_width = 47,
|
|
+ .pmd_desc = pfm_mck_pmd_desc,
|
|
+ .pmc_desc = pfm_mck_pmc_desc,
|
|
+ .pmc_write_check = pfm_mck_pmc_check,
|
|
+ .num_pmc_entries = PFM_MCK_NUM_PMCS,
|
|
+ .num_pmd_entries = PFM_MCK_NUM_PMDS,
|
|
+ .probe_pmu = pfm_mck_probe_pmu,
|
|
+ .version = "1.0",
|
|
+ .flags = PFM_PMU_BUILTIN_FLAG,
|
|
+ .owner = THIS_MODULE,
|
|
+ .pmu_info = &pfm_mck_pmu_info,
|
|
+};
|
|
+
|
|
+static int __init pfm_mck_pmu_init_module(void)
|
|
+{
|
|
+ return pfm_pmu_register(&pfm_mck_pmu_conf);
|
|
+}
|
|
+
|
|
+static void __exit pfm_mck_pmu_cleanup_module(void)
|
|
+{
|
|
+ pfm_pmu_unregister(&pfm_mck_pmu_conf);
|
|
+}
|
|
+
|
|
+module_init(pfm_mck_pmu_init_module);
|
|
+module_exit(pfm_mck_pmu_cleanup_module);
|
|
diff --git a/arch/ia64/perfmon/perfmon_montecito.c b/arch/ia64/perfmon/perfmon_montecito.c
|
|
new file mode 100644
|
|
index 0000000..3f76f73
|
|
--- /dev/null
|
|
+++ b/arch/ia64/perfmon/perfmon_montecito.c
|
|
@@ -0,0 +1,412 @@
|
|
+/*
|
|
+ * This file contains the McKinley PMU register description tables
|
|
+ * and pmc checker used by perfmon.c.
|
|
+ *
|
|
+ * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P.
|
|
+ * Contributed Stephane Eranian <eranian@hpl.hp.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of version 2 of the GNU General Public
|
|
+ * License as published by the Free Software Foundation.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ * General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
+ * 02111-1307 USA
|
|
+ */
|
|
+#include <linux/module.h>
|
|
+#include <linux/smp.h>
|
|
+#include <linux/perfmon_kern.h>
|
|
+
|
|
+MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>");
|
|
+MODULE_DESCRIPTION("Dual-Core Itanium 2 (Montecito) PMU description table");
|
|
+MODULE_LICENSE("GPL");
|
|
+
|
|
+#define RDEP(x) (1UL << (x))
|
|
+
|
|
+#define PFM_MONT_MASK_PMCS (RDEP(4)|RDEP(5)|RDEP(6)|RDEP(7)|\
|
|
+ RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|\
|
|
+ RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|\
|
|
+ RDEP(37)|RDEP(39)|RDEP(40)|RDEP(42))
|
|
+
|
|
+#define PFM_MONT_NO64 (1UL<<5)
|
|
+
|
|
+static struct pfm_arch_pmu_info pfm_mont_pmu_info = {
|
|
+ .mask_pmcs = {PFM_MONT_MASK_PMCS,},
|
|
+};
|
|
+
|
|
+#define PFM_MONT_RSVD 0xffffffff838000a0UL
|
|
+/*
|
|
+ *
|
|
+ * For debug registers, writing xBR(y) means we use also xBR(y+1). Hence using
|
|
+ * PMC256+y means we use PMC256+y+1. Yet, we do not have dependency information
|
|
+ * but this is fine because they are handled separately in the IA-64 specific
|
|
+ * code.
|
|
+ *
|
|
+ * For PMC4-PMC15, PMC40: we force pmc.ism=2 (IA-64 mode only)
|
|
+ */
|
|
+static struct pfm_regmap_desc pfm_mont_pmc_desc[] = {
|
|
+/* pmc0 */ PMX_NA,
|
|
+/* pmc1 */ PMX_NA,
|
|
+/* pmc2 */ PMX_NA,
|
|
+/* pmc3 */ PMX_NA,
|
|
+/* pmc4 */ PMC_D(PFM_REG_W64, "PMC4" , 0x2000020UL, PFM_MONT_RSVD, PFM_MONT_NO64, 4),
|
|
+/* pmc5 */ PMC_D(PFM_REG_W64, "PMC5" , 0x2000020UL, PFM_MONT_RSVD, PFM_MONT_NO64, 5),
|
|
+/* pmc6 */ PMC_D(PFM_REG_W64, "PMC6" , 0x2000020UL, PFM_MONT_RSVD, PFM_MONT_NO64, 6),
|
|
+/* pmc7 */ PMC_D(PFM_REG_W64, "PMC7" , 0x2000020UL, PFM_MONT_RSVD, PFM_MONT_NO64, 7),
|
|
+/* pmc8 */ PMC_D(PFM_REG_W64, "PMC8" , 0x2000020UL, PFM_MONT_RSVD, PFM_MONT_NO64, 8),
|
|
+/* pmc9 */ PMC_D(PFM_REG_W64, "PMC9" , 0x2000020UL, PFM_MONT_RSVD, PFM_MONT_NO64, 9),
|
|
+/* pmc10 */ PMC_D(PFM_REG_W64, "PMC10", 0x2000020UL, PFM_MONT_RSVD, PFM_MONT_NO64, 10),
|
|
+/* pmc11 */ PMC_D(PFM_REG_W64, "PMC11", 0x2000020UL, PFM_MONT_RSVD, PFM_MONT_NO64, 11),
|
|
+/* pmc12 */ PMC_D(PFM_REG_W64, "PMC12", 0x2000020UL, PFM_MONT_RSVD, PFM_MONT_NO64, 12),
|
|
+/* pmc13 */ PMC_D(PFM_REG_W64, "PMC13", 0x2000020UL, PFM_MONT_RSVD, PFM_MONT_NO64, 13),
|
|
+/* pmc14 */ PMC_D(PFM_REG_W64, "PMC14", 0x2000020UL, PFM_MONT_RSVD, PFM_MONT_NO64, 14),
|
|
+/* pmc15 */ PMC_D(PFM_REG_W64, "PMC15", 0x2000020UL, PFM_MONT_RSVD, PFM_MONT_NO64, 15),
|
|
+/* pmc16 */ PMX_NA,
|
|
+/* pmc17 */ PMX_NA,
|
|
+/* pmc18 */ PMX_NA,
|
|
+/* pmc19 */ PMX_NA,
|
|
+/* pmc20 */ PMX_NA,
|
|
+/* pmc21 */ PMX_NA,
|
|
+/* pmc22 */ PMX_NA,
|
|
+/* pmc23 */ PMX_NA,
|
|
+/* pmc24 */ PMX_NA,
|
|
+/* pmc25 */ PMX_NA,
|
|
+/* pmc26 */ PMX_NA,
|
|
+/* pmc27 */ PMX_NA,
|
|
+/* pmc28 */ PMX_NA,
|
|
+/* pmc29 */ PMX_NA,
|
|
+/* pmc30 */ PMX_NA,
|
|
+/* pmc31 */ PMX_NA,
|
|
+/* pmc32 */ PMC_D(PFM_REG_W , "PMC32", 0x30f01ffffffffffUL, 0xfcf0fe0000000000UL, 0, 32),
|
|
+/* pmc33 */ PMC_D(PFM_REG_W , "PMC33", 0x0, 0xfffffe0000000000UL, 0, 33),
|
|
+/* pmc34 */ PMC_D(PFM_REG_W , "PMC34", 0xf01ffffffffffUL, 0xfff0fe0000000000UL, 0, 34),
|
|
+/* pmc35 */ PMC_D(PFM_REG_W , "PMC35", 0x0, 0x1ffffffffffUL, 0, 35),
|
|
+/* pmc36 */ PMC_D(PFM_REG_W , "PMC36", 0xfffffff0UL, 0xfffffffffffffff0UL, 0, 36),
|
|
+/* pmc37 */ PMC_D(PFM_REG_W , "PMC37", 0x0, 0xffffffffffffc000UL, 0, 37),
|
|
+/* pmc38 */ PMC_D(PFM_REG_W , "PMC38", 0xdb6UL, 0xffffffffffffdb6dUL, 0, 38),
|
|
+/* pmc39 */ PMC_D(PFM_REG_W , "PMC39", 0x0, 0xffffffffffff0030UL, 0, 39),
|
|
+/* pmc40 */ PMC_D(PFM_REG_W , "PMC40", 0x2000000UL, 0xfffffffffff0fe30UL, 0, 40),
|
|
+/* pmc41 */ PMC_D(PFM_REG_W , "PMC41", 0x00002078fefefefeUL, 0xfffe1fffe7e7e7e7UL, 0, 41),
|
|
+/* pmc42 */ PMC_D(PFM_REG_W , "PMC42", 0x0, 0xfff800b0UL, 0, 42),
|
|
+/* pmc43 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc48 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc56 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc64 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc72 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc80 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc88 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc96 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc104 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc112 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc120 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc128 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc136 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc144 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc152 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc160 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc168 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc176 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc184 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc192 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc200 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc208 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc216 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc224 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc232 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc240 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc248 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc256 */ PMC_D(PFM_REG_W, "IBR0", 0x0, 0, 0, 0),
|
|
+/* pmc257 */ PMC_D(PFM_REG_W, "IBR1", 0x0, 0x8000000000000000UL, 0, 1),
|
|
+/* pmc258 */ PMC_D(PFM_REG_W, "IBR2", 0x0, 0, 0, 2),
|
|
+/* pmc259 */ PMC_D(PFM_REG_W, "IBR3", 0x0, 0x8000000000000000UL, 0, 3),
|
|
+/* pmc260 */ PMC_D(PFM_REG_W, "IBR4", 0x0, 0, 0, 4),
|
|
+/* pmc261 */ PMC_D(PFM_REG_W, "IBR5", 0x0, 0x8000000000000000UL, 0, 5),
|
|
+/* pmc262 */ PMC_D(PFM_REG_W, "IBR6", 0x0, 0, 0, 6),
|
|
+/* pmc263 */ PMC_D(PFM_REG_W, "IBR7", 0x0, 0x8000000000000000UL, 0, 7),
|
|
+/* pmc264 */ PMC_D(PFM_REG_W, "DBR0", 0x0, 0, 0, 0),
|
|
+/* pmc265 */ PMC_D(PFM_REG_W, "DBR1", 0x0, 0xc000000000000000UL, 0, 1),
|
|
+/* pmc266 */ PMC_D(PFM_REG_W, "DBR2", 0x0, 0, 0, 2),
|
|
+/* pmc267 */ PMC_D(PFM_REG_W, "DBR3", 0x0, 0xc000000000000000UL, 0, 3),
|
|
+/* pmc268 */ PMC_D(PFM_REG_W, "DBR4", 0x0, 0, 0, 4),
|
|
+/* pmc269 */ PMC_D(PFM_REG_W, "DBR5", 0x0, 0xc000000000000000UL, 0, 5),
|
|
+/* pmc270 */ PMC_D(PFM_REG_W, "DBR6", 0x0, 0, 0, 6),
|
|
+/* pmc271 */ PMC_D(PFM_REG_W, "DBR7", 0x0, 0xc000000000000000UL, 0, 7)
|
|
+};
|
|
+#define PFM_MONT_NUM_PMCS ARRAY_SIZE(pfm_mont_pmc_desc)
|
|
+
|
|
+static struct pfm_regmap_desc pfm_mont_pmd_desc[] = {
|
|
+/* pmd0 */ PMX_NA,
|
|
+/* pmd1 */ PMX_NA,
|
|
+/* pmd2 */ PMX_NA,
|
|
+/* pmd3 */ PMX_NA,
|
|
+/* pmd4 */ PMD_DP(PFM_REG_C, "PMD4", 4, 1ull << 4),
|
|
+/* pmd5 */ PMD_DP(PFM_REG_C, "PMD5", 5, 1ull << 5),
|
|
+/* pmd6 */ PMD_DP(PFM_REG_C, "PMD6", 6, 1ull << 6),
|
|
+/* pmd7 */ PMD_DP(PFM_REG_C, "PMD7", 7, 1ull << 7),
|
|
+/* pmd8 */ PMD_DP(PFM_REG_C, "PMD8", 8, 1ull << 8),
|
|
+/* pmd9 */ PMD_DP(PFM_REG_C, "PMD9", 9, 1ull << 9),
|
|
+/* pmd10 */ PMD_DP(PFM_REG_C, "PMD10", 10, 1ull << 10),
|
|
+/* pmd11 */ PMD_DP(PFM_REG_C, "PMD11", 11, 1ull << 11),
|
|
+/* pmd12 */ PMD_DP(PFM_REG_C, "PMD12", 12, 1ull << 12),
|
|
+/* pmd13 */ PMD_DP(PFM_REG_C, "PMD13", 13, 1ull << 13),
|
|
+/* pmd14 */ PMD_DP(PFM_REG_C, "PMD14", 14, 1ull << 14),
|
|
+/* pmd15 */ PMD_DP(PFM_REG_C, "PMD15", 15, 1ull << 15),
|
|
+/* pmd16 */ PMX_NA,
|
|
+/* pmd17 */ PMX_NA,
|
|
+/* pmd18 */ PMX_NA,
|
|
+/* pmd19 */ PMX_NA,
|
|
+/* pmd20 */ PMX_NA,
|
|
+/* pmd21 */ PMX_NA,
|
|
+/* pmd22 */ PMX_NA,
|
|
+/* pmd23 */ PMX_NA,
|
|
+/* pmd24 */ PMX_NA,
|
|
+/* pmd25 */ PMX_NA,
|
|
+/* pmd26 */ PMX_NA,
|
|
+/* pmd27 */ PMX_NA,
|
|
+/* pmd28 */ PMX_NA,
|
|
+/* pmd29 */ PMX_NA,
|
|
+/* pmd30 */ PMX_NA,
|
|
+/* pmd31 */ PMX_NA,
|
|
+/* pmd32 */ PMD_DP(PFM_REG_I, "PMD32", 32, 1ull << 40),
|
|
+/* pmd33 */ PMD_DP(PFM_REG_I, "PMD33", 33, 1ull << 40),
|
|
+/* pmd34 */ PMD_DP(PFM_REG_I, "PMD34", 34, 1ull << 37),
|
|
+/* pmd35 */ PMD_DP(PFM_REG_I, "PMD35", 35, 1ull << 37),
|
|
+/* pmd36 */ PMD_DP(PFM_REG_I, "PMD36", 36, 1ull << 40),
|
|
+/* pmd37 */ PMX_NA,
|
|
+/* pmd38 */ PMD_DP(PFM_REG_I, "PMD38", 38, (1ull<<39)|(1ull<<42)),
|
|
+/* pmd39 */ PMD_DP(PFM_REG_I, "PMD39", 39, (1ull<<39)|(1ull<<42)),
|
|
+/* pmd40 */ PMX_NA,
|
|
+/* pmd41 */ PMX_NA,
|
|
+/* pmd42 */ PMX_NA,
|
|
+/* pmd43 */ PMX_NA,
|
|
+/* pmd44 */ PMX_NA,
|
|
+/* pmd45 */ PMX_NA,
|
|
+/* pmd46 */ PMX_NA,
|
|
+/* pmd47 */ PMX_NA,
|
|
+/* pmd48 */ PMD_DP(PFM_REG_I, "PMD48", 48, (1ull<<39)|(1ull<<42)),
|
|
+/* pmd49 */ PMD_DP(PFM_REG_I, "PMD49", 49, (1ull<<39)|(1ull<<42)),
|
|
+/* pmd50 */ PMD_DP(PFM_REG_I, "PMD50", 50, (1ull<<39)|(1ull<<42)),
|
|
+/* pmd51 */ PMD_DP(PFM_REG_I, "PMD51", 51, (1ull<<39)|(1ull<<42)),
|
|
+/* pmd52 */ PMD_DP(PFM_REG_I, "PMD52", 52, (1ull<<39)|(1ull<<42)),
|
|
+/* pmd53 */ PMD_DP(PFM_REG_I, "PMD53", 53, (1ull<<39)|(1ull<<42)),
|
|
+/* pmd54 */ PMD_DP(PFM_REG_I, "PMD54", 54, (1ull<<39)|(1ull<<42)),
|
|
+/* pmd55 */ PMD_DP(PFM_REG_I, "PMD55", 55, (1ull<<39)|(1ull<<42)),
|
|
+/* pmd56 */ PMD_DP(PFM_REG_I, "PMD56", 56, (1ull<<39)|(1ull<<42)),
|
|
+/* pmd57 */ PMD_DP(PFM_REG_I, "PMD57", 57, (1ull<<39)|(1ull<<42)),
|
|
+/* pmd58 */ PMD_DP(PFM_REG_I, "PMD58", 58, (1ull<<39)|(1ull<<42)),
|
|
+/* pmd59 */ PMD_DP(PFM_REG_I, "PMD59", 59, (1ull<<39)|(1ull<<42)),
|
|
+/* pmd60 */ PMD_DP(PFM_REG_I, "PMD60", 60, (1ull<<39)|(1ull<<42)),
|
|
+/* pmd61 */ PMD_DP(PFM_REG_I, "PMD61", 61, (1ull<<39)|(1ull<<42)),
|
|
+/* pmd62 */ PMD_DP(PFM_REG_I, "PMD62", 62, (1ull<<39)|(1ull<<42)),
|
|
+/* pmd63 */ PMD_DP(PFM_REG_I, "PMD63", 63, (1ull<<39)|(1ull<<42))
|
|
+};
|
|
+#define PFM_MONT_NUM_PMDS ARRAY_SIZE(pfm_mont_pmd_desc)
|
|
+
|
|
+static int pfm_mont_has_ht;
|
|
+
|
|
+static int pfm_mont_pmc_check(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set,
|
|
+ struct pfarg_pmc *req)
|
|
+{
|
|
+ struct pfm_arch_context *ctx_arch;
|
|
+ u64 val32 = 0, val38 = 0, val41 = 0;
|
|
+ u64 tmpval;
|
|
+ u16 cnum;
|
|
+ int ret = 0, check_case1 = 0;
|
|
+ int is_system;
|
|
+
|
|
+ tmpval = req->reg_value;
|
|
+ cnum = req->reg_num;
|
|
+ ctx_arch = pfm_ctx_arch(ctx);
|
|
+ is_system = ctx->flags.system;
|
|
+
|
|
+#define PFM_MONT_PMC_PM_POS6 (1UL<<6)
|
|
+#define PFM_MONT_PMC_PM_POS4 (1UL<<4)
|
|
+
|
|
+ switch (cnum) {
|
|
+ case 4:
|
|
+ case 5:
|
|
+ case 6:
|
|
+ case 7:
|
|
+ case 8:
|
|
+ case 9:
|
|
+ if (is_system)
|
|
+ tmpval |= PFM_MONT_PMC_PM_POS6;
|
|
+ else
|
|
+ tmpval &= ~PFM_MONT_PMC_PM_POS6;
|
|
+ break;
|
|
+ case 10:
|
|
+ case 11:
|
|
+ case 12:
|
|
+ case 13:
|
|
+ case 14:
|
|
+ case 15:
|
|
+ if ((req->reg_flags & PFM_REGFL_NO_EMUL64) == 0) {
|
|
+ if (pfm_mont_has_ht) {
|
|
+ PFM_INFO("perfmon: Errata 121 PMD10/PMD15 cannot be used to overflow"
|
|
+ "when threads on on");
|
|
+ return -EINVAL;
|
|
+ }
|
|
+ }
|
|
+ if (is_system)
|
|
+ tmpval |= PFM_MONT_PMC_PM_POS6;
|
|
+ else
|
|
+ tmpval &= ~PFM_MONT_PMC_PM_POS6;
|
|
+ break;
|
|
+ case 39:
|
|
+ case 40:
|
|
+ case 42:
|
|
+ if (pfm_mont_has_ht && ((req->reg_value >> 8) & 0x7) == 4) {
|
|
+ PFM_INFO("perfmon: Errata 120: IP-EAR not available when threads are on");
|
|
+ return -EINVAL;
|
|
+ }
|
|
+ if (is_system)
|
|
+ tmpval |= PFM_MONT_PMC_PM_POS6;
|
|
+ else
|
|
+ tmpval &= ~PFM_MONT_PMC_PM_POS6;
|
|
+ break;
|
|
+
|
|
+ case 32:
|
|
+ val32 = tmpval;
|
|
+ val38 = set->pmcs[38];
|
|
+ val41 = set->pmcs[41];
|
|
+ check_case1 = 1;
|
|
+ break;
|
|
+
|
|
+ case 37:
|
|
+ if (is_system)
|
|
+ tmpval |= PFM_MONT_PMC_PM_POS4;
|
|
+ else
|
|
+ tmpval &= ~PFM_MONT_PMC_PM_POS4;
|
|
+ break;
|
|
+
|
|
+ case 38:
|
|
+ val38 = tmpval;
|
|
+ val32 = set->pmcs[32];
|
|
+ val41 = set->pmcs[41];
|
|
+ check_case1 = 1;
|
|
+ break;
|
|
+ case 41:
|
|
+ val41 = tmpval;
|
|
+ val32 = set->pmcs[32];
|
|
+ val38 = set->pmcs[38];
|
|
+ check_case1 = 1;
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ if (check_case1) {
|
|
+ ret = (((val41 >> 45) & 0xf) == 0 && ((val32>>57) & 0x1) == 0)
|
|
+ && ((((val38>>1) & 0x3) == 0x2 || ((val38>>1) & 0x3) == 0)
|
|
+ || (((val38>>4) & 0x3) == 0x2 || ((val38>>4) & 0x3) == 0));
|
|
+ if (ret) {
|
|
+ PFM_DBG("perfmon: invalid config pmc38=0x%lx "
|
|
+ "pmc41=0x%lx pmc32=0x%lx",
|
|
+ val38, val41, val32);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * check if configuration implicitely activates the use of the
|
|
+ * debug registers. If true, then we ensure that this is possible
|
|
+ * and that we do not pick up stale value in the HW registers.
|
|
+ */
|
|
+
|
|
+ /*
|
|
+ *
|
|
+ * pmc41 is "active" if:
|
|
+ * one of the pmc41.cfgdtagXX field is different from 0x3
|
|
+ * AND
|
|
+ * the corsesponding pmc41.en_dbrpXX is set.
|
|
+ * AND
|
|
+ * ctx_fl_use_dbr (dbr not yet used)
|
|
+ */
|
|
+ if (cnum == 41
|
|
+ && (tmpval & 0x1e00000000000)
|
|
+ && (tmpval & 0x18181818) != 0x18181818
|
|
+ && ctx_arch->flags.use_dbr == 0) {
|
|
+ PFM_DBG("pmc41=0x%lx active, clearing dbr", tmpval);
|
|
+ ret = pfm_ia64_mark_dbregs_used(ctx, set);
|
|
+ if (ret)
|
|
+ return ret;
|
|
+ }
|
|
+ /*
|
|
+ * we must clear the (instruction) debug registers if:
|
|
+ * pmc38.ig_ibrpX is 0 (enabled)
|
|
+ * and
|
|
+ * fl_use_dbr == 0 (dbr not yet used)
|
|
+ */
|
|
+ if (cnum == 38 && ((tmpval & 0x492) != 0x492)
|
|
+ && ctx_arch->flags.use_dbr == 0) {
|
|
+ PFM_DBG("pmc38=0x%lx active pmc38, clearing ibr", tmpval);
|
|
+ ret = pfm_ia64_mark_dbregs_used(ctx, set);
|
|
+ if (ret)
|
|
+ return ret;
|
|
+
|
|
+ }
|
|
+ req->reg_value = tmpval;
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static void pfm_handle_errata(void)
|
|
+{
|
|
+ pfm_mont_has_ht = 1;
|
|
+
|
|
+ PFM_INFO("activating workaround for errata 120 "
|
|
+ "(Disable IP-EAR when threads are on)");
|
|
+
|
|
+ PFM_INFO("activating workaround for Errata 121 "
|
|
+ "(PMC10-PMC15 cannot be used to overflow"
|
|
+ " when threads are on");
|
|
+}
|
|
+static int pfm_mont_probe_pmu(void)
|
|
+{
|
|
+ if (local_cpu_data->family != 0x20)
|
|
+ return -1;
|
|
+
|
|
+ /*
|
|
+ * the 2 errata must be activated when
|
|
+ * threads are/can be enabled
|
|
+ */
|
|
+ if (is_multithreading_enabled())
|
|
+ pfm_handle_errata();
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * impl_pmcs, impl_pmds are computed at runtime to minimize errors!
|
|
+ */
|
|
+static struct pfm_pmu_config pfm_mont_pmu_conf = {
|
|
+ .pmu_name = "Montecito",
|
|
+ .counter_width = 47,
|
|
+ .pmd_desc = pfm_mont_pmd_desc,
|
|
+ .pmc_desc = pfm_mont_pmc_desc,
|
|
+ .num_pmc_entries = PFM_MONT_NUM_PMCS,
|
|
+ .num_pmd_entries = PFM_MONT_NUM_PMDS,
|
|
+ .pmc_write_check = pfm_mont_pmc_check,
|
|
+ .probe_pmu = pfm_mont_probe_pmu,
|
|
+ .version = "1.0",
|
|
+ .pmu_info = &pfm_mont_pmu_info,
|
|
+ .flags = PFM_PMU_BUILTIN_FLAG,
|
|
+ .owner = THIS_MODULE
|
|
+};
|
|
+
|
|
+static int __init pfm_mont_pmu_init_module(void)
|
|
+{
|
|
+ return pfm_pmu_register(&pfm_mont_pmu_conf);
|
|
+}
|
|
+
|
|
+static void __exit pfm_mont_pmu_cleanup_module(void)
|
|
+{
|
|
+ pfm_pmu_unregister(&pfm_mont_pmu_conf);
|
|
+}
|
|
+
|
|
+module_init(pfm_mont_pmu_init_module);
|
|
+module_exit(pfm_mont_pmu_cleanup_module);
|
|
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
|
|
index 1e06d23..b87f445 100644
|
|
--- a/arch/mips/Kconfig
|
|
+++ b/arch/mips/Kconfig
|
|
@@ -1857,6 +1857,8 @@ config SECCOMP
|
|
|
|
If unsure, say Y. Only embedded should say N here.
|
|
|
|
+source "arch/mips/perfmon/Kconfig"
|
|
+
|
|
endmenu
|
|
|
|
config RWSEM_GENERIC_SPINLOCK
|
|
diff --git a/arch/mips/Makefile b/arch/mips/Makefile
|
|
index 9aab51c..712acf7 100644
|
|
--- a/arch/mips/Makefile
|
|
+++ b/arch/mips/Makefile
|
|
@@ -154,6 +154,12 @@ endif
|
|
endif
|
|
|
|
#
|
|
+# Perfmon support
|
|
+#
|
|
+
|
|
+core-$(CONFIG_PERFMON) += arch/mips/perfmon/
|
|
+
|
|
+#
|
|
# Firmware support
|
|
#
|
|
libs-$(CONFIG_ARC) += arch/mips/fw/arc/
|
|
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
|
|
index 22fc19b..4467361 100644
|
|
--- a/arch/mips/kernel/process.c
|
|
+++ b/arch/mips/kernel/process.c
|
|
@@ -27,6 +27,7 @@
|
|
#include <linux/completion.h>
|
|
#include <linux/kallsyms.h>
|
|
#include <linux/random.h>
|
|
+#include <linux/perfmon_kern.h>
|
|
|
|
#include <asm/asm.h>
|
|
#include <asm/bootinfo.h>
|
|
@@ -94,6 +95,7 @@ void start_thread(struct pt_regs * regs, unsigned long pc, unsigned long sp)
|
|
|
|
void exit_thread(void)
|
|
{
|
|
+ pfm_exit_thread();
|
|
}
|
|
|
|
void flush_thread(void)
|
|
@@ -162,6 +164,8 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long usp,
|
|
if (clone_flags & CLONE_SETTLS)
|
|
ti->tp_value = regs->regs[7];
|
|
|
|
+ pfm_copy_thread(p);
|
|
+
|
|
return 0;
|
|
}
|
|
|
|
diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S
|
|
index 5e75a31..e96ddd6 100644
|
|
--- a/arch/mips/kernel/scall32-o32.S
|
|
+++ b/arch/mips/kernel/scall32-o32.S
|
|
@@ -653,6 +653,18 @@ einval: li v0, -EINVAL
|
|
sys sys_dup3 3
|
|
sys sys_pipe2 2
|
|
sys sys_inotify_init1 1
|
|
+ sys sys_pfm_create_context 4 /* 4330 */
|
|
+ sys sys_pfm_write_pmcs 3
|
|
+ sys sys_pfm_write_pmds 4
|
|
+ sys sys_pfm_read_pmds 3
|
|
+ sys sys_pfm_load_context 2
|
|
+ sys sys_pfm_start 2 /* 4335 */
|
|
+ sys sys_pfm_stop 1
|
|
+ sys sys_pfm_restart 1
|
|
+ sys sys_pfm_create_evtsets 3
|
|
+ sys sys_pfm_getinfo_evtsets 3
|
|
+ sys sys_pfm_delete_evtsets 3 /* 4340 */
|
|
+ sys sys_pfm_unload_context 1
|
|
.endm
|
|
|
|
/* We pre-compute the number of _instruction_ bytes needed to
|
|
diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S
|
|
index 3d58204..adb2ba9 100644
|
|
--- a/arch/mips/kernel/scall64-64.S
|
|
+++ b/arch/mips/kernel/scall64-64.S
|
|
@@ -487,4 +487,16 @@ sys_call_table:
|
|
PTR sys_dup3
|
|
PTR sys_pipe2
|
|
PTR sys_inotify_init1
|
|
+ PTR sys_pfm_create_context
|
|
+ PTR sys_pfm_write_pmcs /* 5290 */
|
|
+ PTR sys_pfm_write_pmds
|
|
+ PTR sys_pfm_read_pmds
|
|
+ PTR sys_pfm_load_context
|
|
+ PTR sys_pfm_start
|
|
+ PTR sys_pfm_stop /* 5295 */
|
|
+ PTR sys_pfm_restart
|
|
+ PTR sys_pfm_create_evtsets
|
|
+ PTR sys_pfm_getinfo_evtsets
|
|
+ PTR sys_pfm_delete_evtsets
|
|
+ PTR sys_pfm_unload_context /* 5300 */
|
|
.size sys_call_table,.-sys_call_table
|
|
diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
|
|
index da7f1b6..6d12095 100644
|
|
--- a/arch/mips/kernel/scall64-n32.S
|
|
+++ b/arch/mips/kernel/scall64-n32.S
|
|
@@ -400,12 +400,12 @@ EXPORT(sysn32_call_table)
|
|
PTR sys_ioprio_set
|
|
PTR sys_ioprio_get
|
|
PTR compat_sys_utimensat
|
|
- PTR compat_sys_signalfd /* 5280 */
|
|
+ PTR compat_sys_signalfd /* 6280 */
|
|
PTR sys_ni_syscall
|
|
PTR sys_eventfd
|
|
PTR sys_fallocate
|
|
PTR sys_timerfd_create
|
|
- PTR sys_timerfd_gettime /* 5285 */
|
|
+ PTR sys_timerfd_gettime /* 6285 */
|
|
PTR sys_timerfd_settime
|
|
PTR sys_signalfd4
|
|
PTR sys_eventfd2
|
|
@@ -413,4 +413,16 @@ EXPORT(sysn32_call_table)
|
|
PTR sys_dup3 /* 5290 */
|
|
PTR sys_pipe2
|
|
PTR sys_inotify_init1
|
|
+ PTR sys_pfm_create_context
|
|
+ PTR sys_pfm_write_pmcs
|
|
+ PTR sys_pfm_write_pmds /* 6295 */
|
|
+ PTR sys_pfm_read_pmds
|
|
+ PTR sys_pfm_load_context
|
|
+ PTR sys_pfm_start
|
|
+ PTR sys_pfm_stop
|
|
+ PTR sys_pfm_restart /* 6300 */
|
|
+ PTR sys_pfm_create_evtsets
|
|
+ PTR sys_pfm_getinfo_evtsets
|
|
+ PTR sys_pfm_delete_evtsets
|
|
+ PTR sys_pfm_unload_context
|
|
.size sysn32_call_table,.-sysn32_call_table
|
|
diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
|
|
index d7cd1aa..e77f55a 100644
|
|
--- a/arch/mips/kernel/scall64-o32.S
|
|
+++ b/arch/mips/kernel/scall64-o32.S
|
|
@@ -535,4 +535,16 @@ sys_call_table:
|
|
PTR sys_dup3
|
|
PTR sys_pipe2
|
|
PTR sys_inotify_init1
|
|
+ PTR sys_pfm_create_context /* 4330 */
|
|
+ PTR sys_pfm_write_pmcs
|
|
+ PTR sys_pfm_write_pmds
|
|
+ PTR sys_pfm_read_pmds
|
|
+ PTR sys_pfm_load_context
|
|
+ PTR sys_pfm_start /* 4335 */
|
|
+ PTR sys_pfm_stop
|
|
+ PTR sys_pfm_restart
|
|
+ PTR sys_pfm_create_evtsets
|
|
+ PTR sys_pfm_getinfo_evtsets
|
|
+ PTR sys_pfm_delete_evtsets /* 4340 */
|
|
+ PTR sys_pfm_unload_context
|
|
.size sys_call_table,.-sys_call_table
|
|
diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c
|
|
index a4e106c..6a7e60c 100644
|
|
--- a/arch/mips/kernel/signal.c
|
|
+++ b/arch/mips/kernel/signal.c
|
|
@@ -20,6 +20,7 @@
|
|
#include <linux/unistd.h>
|
|
#include <linux/compiler.h>
|
|
#include <linux/uaccess.h>
|
|
+#include <linux/perfmon_kern.h>
|
|
|
|
#include <asm/abi.h>
|
|
#include <asm/asm.h>
|
|
@@ -694,8 +695,11 @@ static void do_signal(struct pt_regs *regs)
|
|
* - triggered by the TIF_WORK_MASK flags
|
|
*/
|
|
asmlinkage void do_notify_resume(struct pt_regs *regs, void *unused,
|
|
- __u32 thread_info_flags)
|
|
+ __u32 thread_info_flags)
|
|
{
|
|
+ if (thread_info_flags & _TIF_PERFMON_WORK)
|
|
+ pfm_handle_work(regs);
|
|
+
|
|
/* deal with pending signal delivery */
|
|
if (thread_info_flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK))
|
|
do_signal(regs);
|
|
diff --git a/arch/mips/kernel/time.c b/arch/mips/kernel/time.c
|
|
index 1f467d5..163dfe4 100644
|
|
--- a/arch/mips/kernel/time.c
|
|
+++ b/arch/mips/kernel/time.c
|
|
@@ -49,10 +49,11 @@ int update_persistent_clock(struct timespec now)
|
|
return rtc_mips_set_mmss(now.tv_sec);
|
|
}
|
|
|
|
-static int null_perf_irq(void)
|
|
+int null_perf_irq(void)
|
|
{
|
|
return 0;
|
|
}
|
|
+EXPORT_SYMBOL(null_perf_irq);
|
|
|
|
int (*perf_irq)(void) = null_perf_irq;
|
|
|
|
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
|
|
index b602ac6..9cbd75f 100644
|
|
--- a/arch/mips/kernel/traps.c
|
|
+++ b/arch/mips/kernel/traps.c
|
|
@@ -92,17 +92,15 @@ static void show_raw_backtrace(unsigned long reg29)
|
|
#ifdef CONFIG_KALLSYMS
|
|
printk("\n");
|
|
#endif
|
|
- while (!kstack_end(sp)) {
|
|
- unsigned long __user *p =
|
|
- (unsigned long __user *)(unsigned long)sp++;
|
|
- if (__get_user(addr, p)) {
|
|
- printk(" (Bad stack address)");
|
|
- break;
|
|
+#define IS_KVA01(a) ((((unsigned long)a) & 0xc0000000) == 0x80000000)
|
|
+ if (IS_KVA01(sp)) {
|
|
+ while (!kstack_end(sp)) {
|
|
+ addr = *sp++;
|
|
+ if (__kernel_text_address(addr))
|
|
+ print_ip_sym(addr);
|
|
}
|
|
- if (__kernel_text_address(addr))
|
|
- print_ip_sym(addr);
|
|
+ printk("\n");
|
|
}
|
|
- printk("\n");
|
|
}
|
|
|
|
#ifdef CONFIG_KALLSYMS
|
|
diff --git a/arch/mips/mti-malta/malta-time.c b/arch/mips/mti-malta/malta-time.c
|
|
index 0b97d47..d8f36b5 100644
|
|
--- a/arch/mips/mti-malta/malta-time.c
|
|
+++ b/arch/mips/mti-malta/malta-time.c
|
|
@@ -27,6 +27,7 @@
|
|
#include <linux/time.h>
|
|
#include <linux/timex.h>
|
|
#include <linux/mc146818rtc.h>
|
|
+#include <linux/perfmon_kern.h>
|
|
|
|
#include <asm/mipsregs.h>
|
|
#include <asm/mipsmtregs.h>
|
|
diff --git a/arch/mips/perfmon/Kconfig b/arch/mips/perfmon/Kconfig
|
|
new file mode 100644
|
|
index 0000000..b426eea
|
|
--- /dev/null
|
|
+++ b/arch/mips/perfmon/Kconfig
|
|
@@ -0,0 +1,61 @@
|
|
+menu "Hardware Performance Monitoring support"
|
|
+config PERFMON
|
|
+ bool "Perfmon2 performance monitoring interface"
|
|
+ default n
|
|
+ help
|
|
+ Enables the perfmon2 interface to access the hardware
|
|
+ performance counters. See <http://perfmon2.sf.net/> for
|
|
+ more details.
|
|
+
|
|
+config PERFMON_DEBUG
|
|
+ bool "Perfmon debugging"
|
|
+ default n
|
|
+ depends on PERFMON
|
|
+ help
|
|
+ Enables perfmon debugging support
|
|
+
|
|
+config PERFMON_DEBUG_FS
|
|
+ bool "Enable perfmon statistics reporting via debugfs"
|
|
+ default y
|
|
+ depends on PERFMON && DEBUG_FS
|
|
+ help
|
|
+ Enable collection and reporting of perfmon timing statistics under
|
|
+ debugfs. This is used for debugging and performance analysis of the
|
|
+ subsystem. The debugfs filesystem must be mounted.
|
|
+
|
|
+config PERFMON_FLUSH
|
|
+ bool "Flush sampling buffer when modified"
|
|
+ depends on PERFMON
|
|
+ default n
|
|
+ help
|
|
+ On some MIPS models, cache aliasing may cause invalid
|
|
+ data to be read from the perfmon sampling buffer. Use this option
|
|
+ to flush the buffer when it is modified to ensure valid data is
|
|
+ visible at the user level.
|
|
+
|
|
+config PERFMON_ALIGN
|
|
+ bool "Align sampling buffer to avoid cache aliasing"
|
|
+ depends on PERFMON
|
|
+ default n
|
|
+ help
|
|
+ On some MIPS models, cache aliasing may cause invalid
|
|
+ data to be read from the perfmon sampling buffer. By forcing a bigger
|
|
+ page alignment (4-page), one can guarantee the buffer virtual address
|
|
+ will conflict in the cache with the user level mapping of the buffer
|
|
+ thereby ensuring a consistent view by user programs.
|
|
+
|
|
+config PERFMON_DEBUG
|
|
+ bool "Perfmon debugging"
|
|
+ depends on PERFMON
|
|
+ default n
|
|
+ depends on PERFMON
|
|
+ help
|
|
+ Enables perfmon debugging support
|
|
+
|
|
+config PERFMON_MIPS64
|
|
+ tristate "Support for MIPS64 hardware performance counters"
|
|
+ depends on PERFMON
|
|
+ default n
|
|
+ help
|
|
+ Enables support for the MIPS64 hardware performance counters"
|
|
+endmenu
|
|
diff --git a/arch/mips/perfmon/Makefile b/arch/mips/perfmon/Makefile
|
|
new file mode 100644
|
|
index 0000000..153b83f
|
|
--- /dev/null
|
|
+++ b/arch/mips/perfmon/Makefile
|
|
@@ -0,0 +1,2 @@
|
|
+obj-$(CONFIG_PERFMON) += perfmon.o
|
|
+obj-$(CONFIG_PERFMON_MIPS64) += perfmon_mips64.o
|
|
diff --git a/arch/mips/perfmon/perfmon.c b/arch/mips/perfmon/perfmon.c
|
|
new file mode 100644
|
|
index 0000000..6615a77
|
|
--- /dev/null
|
|
+++ b/arch/mips/perfmon/perfmon.c
|
|
@@ -0,0 +1,313 @@
|
|
+/*
|
|
+ * This file implements the MIPS64 specific
|
|
+ * support for the perfmon2 interface
|
|
+ *
|
|
+ * Copyright (c) 2005 Philip J. Mucci
|
|
+ *
|
|
+ * based on versions for other architectures:
|
|
+ * Copyright (c) 2005 Hewlett-Packard Development Company, L.P.
|
|
+ * Contributed by Stephane Eranian <eranian@htrpl.hp.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of version 2 of the GNU General Public
|
|
+ * License as published by the Free Software Foundation.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ * General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
+ * 02111-1307 USA
|
|
+ */
|
|
+#include <linux/interrupt.h>
|
|
+#include <linux/module.h>
|
|
+#include <linux/perfmon_kern.h>
|
|
+
|
|
+/*
|
|
+ * collect pending overflowed PMDs. Called from pfm_ctxsw()
|
|
+ * and from PMU interrupt handler. Must fill in set->povfl_pmds[]
|
|
+ * and set->npend_ovfls. Interrupts are masked
|
|
+ */
|
|
+static void __pfm_get_ovfl_pmds(struct pfm_context *ctx, struct pfm_event_set *set)
|
|
+{
|
|
+ u64 new_val, wmask;
|
|
+ u64 *used_mask, *intr_pmds;
|
|
+ u64 mask[PFM_PMD_BV];
|
|
+ unsigned int i, max;
|
|
+
|
|
+ max = ctx->regs.max_intr_pmd;
|
|
+ intr_pmds = ctx->regs.intr_pmds;
|
|
+ used_mask = set->used_pmds;
|
|
+
|
|
+ wmask = 1ULL << pfm_pmu_conf->counter_width;
|
|
+
|
|
+ bitmap_and(cast_ulp(mask),
|
|
+ cast_ulp(intr_pmds),
|
|
+ cast_ulp(used_mask),
|
|
+ max);
|
|
+
|
|
+ /*
|
|
+ * check all PMD that can generate interrupts
|
|
+ * (that includes counters)
|
|
+ */
|
|
+ for (i = 0; i < max; i++) {
|
|
+ if (test_bit(i, mask)) {
|
|
+ new_val = pfm_arch_read_pmd(ctx, i);
|
|
+
|
|
+ PFM_DBG_ovfl("pmd%u new_val=0x%llx bit=%d\n",
|
|
+ i, (unsigned long long)new_val,
|
|
+ (new_val&wmask) ? 1 : 0);
|
|
+
|
|
+ if (new_val & wmask) {
|
|
+ __set_bit(i, set->povfl_pmds);
|
|
+ set->npend_ovfls++;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+static void pfm_stop_active(struct task_struct *task, struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set)
|
|
+{
|
|
+ unsigned int i, max;
|
|
+
|
|
+ max = ctx->regs.max_pmc;
|
|
+
|
|
+ /*
|
|
+ * clear enable bits, assume all pmcs are enable pmcs
|
|
+ */
|
|
+ for (i = 0; i < max; i++) {
|
|
+ if (test_bit(i, set->used_pmcs))
|
|
+ pfm_arch_write_pmc(ctx, i, 0);
|
|
+ }
|
|
+
|
|
+ if (set->npend_ovfls)
|
|
+ return;
|
|
+
|
|
+ __pfm_get_ovfl_pmds(ctx, set);
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Called from pfm_ctxsw(). Task is guaranteed to be current.
|
|
+ * Context is locked. Interrupts are masked. Monitoring is active.
|
|
+ * PMU access is guaranteed. PMC and PMD registers are live in PMU.
|
|
+ *
|
|
+ * for per-thread:
|
|
+ * must stop monitoring for the task
|
|
+ *
|
|
+ * Return:
|
|
+ * non-zero : did not save PMDs (as part of stopping the PMU)
|
|
+ * 0 : saved PMDs (no need to save them in caller)
|
|
+ */
|
|
+int pfm_arch_ctxswout_thread(struct task_struct *task, struct pfm_context *ctx)
|
|
+{
|
|
+ /*
|
|
+ * disable lazy restore of PMC registers.
|
|
+ */
|
|
+ ctx->active_set->priv_flags |= PFM_SETFL_PRIV_MOD_PMCS;
|
|
+
|
|
+ /*
|
|
+ * if masked, monitoring is stopped, thus there is no
|
|
+ * need to stop the PMU again and there is no need to
|
|
+ * check for pending overflows. This is not just an
|
|
+ * optimization, this is also for correctness as you
|
|
+ * may end up detecting overflows twice.
|
|
+ */
|
|
+ if (ctx->state == PFM_CTX_MASKED)
|
|
+ return 1;
|
|
+
|
|
+ pfm_stop_active(task, ctx, ctx->active_set);
|
|
+
|
|
+ return 1;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Called from pfm_stop() and pfm_ctxsw()
|
|
+ * Interrupts are masked. Context is locked. Set is the active set.
|
|
+ *
|
|
+ * For per-thread:
|
|
+ * task is not necessarily current. If not current task, then
|
|
+ * task is guaranteed stopped and off any cpu. Access to PMU
|
|
+ * is not guaranteed. Interrupts are masked. Context is locked.
|
|
+ * Set is the active set.
|
|
+ *
|
|
+ * For system-wide:
|
|
+ * task is current
|
|
+ *
|
|
+ * must disable active monitoring. ctx cannot be NULL
|
|
+ */
|
|
+void pfm_arch_stop(struct task_struct *task, struct pfm_context *ctx)
|
|
+{
|
|
+ /*
|
|
+ * no need to go through stop_save()
|
|
+ * if we are already stopped
|
|
+ */
|
|
+ if (!ctx->flags.started || ctx->state == PFM_CTX_MASKED)
|
|
+ return;
|
|
+
|
|
+ /*
|
|
+ * stop live registers and collect pending overflow
|
|
+ */
|
|
+ if (task == current)
|
|
+ pfm_stop_active(task, ctx, ctx->active_set);
|
|
+}
|
|
+
|
|
+/*
|
|
+ * called from pfm_start() or pfm_ctxsw() when idle task and
|
|
+ * EXCL_IDLE is on.
|
|
+ *
|
|
+ * Interrupts are masked. Context is locked. Set is the active set.
|
|
+ *
|
|
+ * For per-trhead:
|
|
+ * Task is not necessarily current. If not current task, then task
|
|
+ * is guaranteed stopped and off any cpu. Access to PMU is not guaranteed.
|
|
+ *
|
|
+ * For system-wide:
|
|
+ * task is always current
|
|
+ *
|
|
+ * must enable active monitoring.
|
|
+ */
|
|
+void pfm_arch_start(struct task_struct *task, struct pfm_context *ctx)
|
|
+{
|
|
+ struct pfm_event_set *set;
|
|
+ unsigned int i, max_pmc;
|
|
+
|
|
+ if (task != current)
|
|
+ return;
|
|
+
|
|
+ set = ctx->active_set;
|
|
+ max_pmc = ctx->regs.max_pmc;
|
|
+
|
|
+ for (i = 0; i < max_pmc; i++) {
|
|
+ if (test_bit(i, set->used_pmcs))
|
|
+ pfm_arch_write_pmc(ctx, i, set->pmcs[i]);
|
|
+ }
|
|
+}
|
|
+
|
|
+/*
|
|
+ * function called from pfm_switch_sets(), pfm_context_load_thread(),
|
|
+ * pfm_context_load_sys(), pfm_ctxsw(), pfm_switch_sets()
|
|
+ * context is locked. Interrupts are masked. set cannot be NULL.
|
|
+ * Access to the PMU is guaranteed.
|
|
+ *
|
|
+ * function must restore all PMD registers from set.
|
|
+ */
|
|
+void pfm_arch_restore_pmds(struct pfm_context *ctx, struct pfm_event_set *set)
|
|
+{
|
|
+ u64 ovfl_mask, val;
|
|
+ u64 *impl_pmds;
|
|
+ unsigned int i;
|
|
+ unsigned int max_pmd;
|
|
+
|
|
+ max_pmd = ctx->regs.max_pmd;
|
|
+ ovfl_mask = pfm_pmu_conf->ovfl_mask;
|
|
+ impl_pmds = ctx->regs.pmds;
|
|
+
|
|
+ /*
|
|
+ * must restore all pmds to avoid leaking
|
|
+ * information to user.
|
|
+ */
|
|
+ for (i = 0; i < max_pmd; i++) {
|
|
+
|
|
+ if (test_bit(i, impl_pmds) == 0)
|
|
+ continue;
|
|
+
|
|
+ val = set->pmds[i].value;
|
|
+
|
|
+ /*
|
|
+ * set upper bits for counter to ensure
|
|
+ * overflow will trigger
|
|
+ */
|
|
+ val &= ovfl_mask;
|
|
+
|
|
+ pfm_arch_write_pmd(ctx, i, val);
|
|
+ }
|
|
+}
|
|
+
|
|
+/*
|
|
+ * function called from pfm_switch_sets(), pfm_context_load_thread(),
|
|
+ * pfm_context_load_sys(), pfm_ctxsw().
|
|
+ * Context is locked. Interrupts are masked. set cannot be NULL.
|
|
+ * Access to the PMU is guaranteed.
|
|
+ *
|
|
+ * function must restore all PMC registers from set, if needed.
|
|
+ */
|
|
+void pfm_arch_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set)
|
|
+{
|
|
+ u64 *impl_pmcs;
|
|
+ unsigned int i, max_pmc;
|
|
+
|
|
+ max_pmc = ctx->regs.max_pmc;
|
|
+ impl_pmcs = ctx->regs.pmcs;
|
|
+
|
|
+ /*
|
|
+ * - by default no PMCS measures anything
|
|
+ * - on ctxswout, all used PMCs are disabled (cccr enable bit cleared)
|
|
+ * hence when masked we do not need to restore anything
|
|
+ */
|
|
+ if (ctx->state == PFM_CTX_MASKED || ctx->flags.started == 0)
|
|
+ return;
|
|
+
|
|
+ /*
|
|
+ * restore all pmcs
|
|
+ */
|
|
+ for (i = 0; i < max_pmc; i++)
|
|
+ if (test_bit(i, impl_pmcs))
|
|
+ pfm_arch_write_pmc(ctx, i, set->pmcs[i]);
|
|
+}
|
|
+
|
|
+char *pfm_arch_get_pmu_module_name(void)
|
|
+{
|
|
+ switch (cpu_data->cputype) {
|
|
+#ifndef CONFIG_SMP
|
|
+ case CPU_34K:
|
|
+#if defined(CPU_74K)
|
|
+ case CPU_74K:
|
|
+#endif
|
|
+#endif
|
|
+ case CPU_SB1:
|
|
+ case CPU_SB1A:
|
|
+ case CPU_R12000:
|
|
+ case CPU_25KF:
|
|
+ case CPU_24K:
|
|
+ case CPU_20KC:
|
|
+ case CPU_5KC:
|
|
+ return "perfmon_mips64";
|
|
+ default:
|
|
+ return NULL;
|
|
+ }
|
|
+ return NULL;
|
|
+}
|
|
+
|
|
+int perfmon_perf_irq(void)
|
|
+{
|
|
+ /* BLATANTLY STOLEN FROM OPROFILE, then modified */
|
|
+ struct pt_regs *regs;
|
|
+ unsigned int counters = pfm_pmu_conf->regs_all.max_pmc;
|
|
+ unsigned int control;
|
|
+ unsigned int counter;
|
|
+
|
|
+ regs = get_irq_regs();
|
|
+ switch (counters) {
|
|
+#define HANDLE_COUNTER(n) \
|
|
+ case n + 1: \
|
|
+ control = read_c0_perfctrl ## n(); \
|
|
+ counter = read_c0_perfcntr ## n(); \
|
|
+ if ((control & MIPS64_PMC_INT_ENABLE_MASK) && \
|
|
+ (counter & MIPS64_PMD_INTERRUPT)) { \
|
|
+ pfm_interrupt_handler(instruction_pointer(regs),\
|
|
+ regs); \
|
|
+ return(1); \
|
|
+ }
|
|
+ HANDLE_COUNTER(3)
|
|
+ HANDLE_COUNTER(2)
|
|
+ HANDLE_COUNTER(1)
|
|
+ HANDLE_COUNTER(0)
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+EXPORT_SYMBOL(perfmon_perf_irq);
|
|
diff --git a/arch/mips/perfmon/perfmon_mips64.c b/arch/mips/perfmon/perfmon_mips64.c
|
|
new file mode 100644
|
|
index 0000000..78cb43d
|
|
--- /dev/null
|
|
+++ b/arch/mips/perfmon/perfmon_mips64.c
|
|
@@ -0,0 +1,218 @@
|
|
+/*
|
|
+ * This file contains the MIPS64 and decendent PMU register description tables
|
|
+ * and pmc checker used by perfmon.c.
|
|
+ *
|
|
+ * Copyright (c) 2005 Philip Mucci
|
|
+ *
|
|
+ * Based on perfmon_p6.c:
|
|
+ * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P.
|
|
+ * Contributed by Stephane Eranian <eranian@hpl.hp.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of version 2 of the GNU General Public
|
|
+ * License as published by the Free Software Foundation.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ * General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
+ * 02111-1307 USA
|
|
+ */
|
|
+#include <linux/module.h>
|
|
+#include <linux/perfmon_kern.h>
|
|
+
|
|
+MODULE_AUTHOR("Philip Mucci <mucci@cs.utk.edu>");
|
|
+MODULE_DESCRIPTION("MIPS64 PMU description tables");
|
|
+MODULE_LICENSE("GPL");
|
|
+
|
|
+/*
|
|
+ * reserved:
|
|
+ * - bit 63-9
|
|
+ * RSVD: reserved bits must be 1
|
|
+ */
|
|
+#define PFM_MIPS64_PMC_RSVD 0xfffffffffffff810ULL
|
|
+#define PFM_MIPS64_PMC_VAL (1ULL<<4)
|
|
+
|
|
+extern int null_perf_irq(struct pt_regs *regs);
|
|
+extern int (*perf_irq)(struct pt_regs *regs);
|
|
+extern int perfmon_perf_irq(struct pt_regs *regs);
|
|
+
|
|
+static struct pfm_arch_pmu_info pfm_mips64_pmu_info;
|
|
+
|
|
+static struct pfm_regmap_desc pfm_mips64_pmc_desc[] = {
|
|
+/* pmc0 */ PMC_D(PFM_REG_I64, "CP0_25_0", PFM_MIPS64_PMC_VAL, PFM_MIPS64_PMC_RSVD, 0, 0),
|
|
+/* pmc1 */ PMC_D(PFM_REG_I64, "CP0_25_1", PFM_MIPS64_PMC_VAL, PFM_MIPS64_PMC_RSVD, 0, 1),
|
|
+/* pmc2 */ PMC_D(PFM_REG_I64, "CP0_25_2", PFM_MIPS64_PMC_VAL, PFM_MIPS64_PMC_RSVD, 0, 2),
|
|
+/* pmc3 */ PMC_D(PFM_REG_I64, "CP0_25_3", PFM_MIPS64_PMC_VAL, PFM_MIPS64_PMC_RSVD, 0, 3)
|
|
+};
|
|
+#define PFM_MIPS64_NUM_PMCS ARRAY_SIZE(pfm_mips64_pmc_desc)
|
|
+
|
|
+static struct pfm_regmap_desc pfm_mips64_pmd_desc[] = {
|
|
+/* pmd0 */ PMD_D(PFM_REG_C, "CP0_25_0", 0),
|
|
+/* pmd1 */ PMD_D(PFM_REG_C, "CP0_25_1", 1),
|
|
+/* pmd2 */ PMD_D(PFM_REG_C, "CP0_25_2", 2),
|
|
+/* pmd3 */ PMD_D(PFM_REG_C, "CP0_25_3", 3)
|
|
+};
|
|
+#define PFM_MIPS64_NUM_PMDS ARRAY_SIZE(pfm_mips64_pmd_desc)
|
|
+
|
|
+static int pfm_mips64_probe_pmu(void)
|
|
+{
|
|
+ struct cpuinfo_mips *c = ¤t_cpu_data;
|
|
+
|
|
+ switch (c->cputype) {
|
|
+#ifndef CONFIG_SMP
|
|
+ case CPU_34K:
|
|
+#if defined(CPU_74K)
|
|
+ case CPU_74K:
|
|
+#endif
|
|
+#endif
|
|
+ case CPU_SB1:
|
|
+ case CPU_SB1A:
|
|
+ case CPU_R12000:
|
|
+ case CPU_25KF:
|
|
+ case CPU_24K:
|
|
+ case CPU_20KC:
|
|
+ case CPU_5KC:
|
|
+ return 0;
|
|
+ break;
|
|
+ default:
|
|
+ PFM_INFO("Unknown cputype 0x%x", c->cputype);
|
|
+ }
|
|
+ return -1;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * impl_pmcs, impl_pmds are computed at runtime to minimize errors!
|
|
+ */
|
|
+static struct pfm_pmu_config pfm_mips64_pmu_conf = {
|
|
+ .pmu_name = "MIPS", /* placeholder */
|
|
+ .counter_width = 31,
|
|
+ .pmd_desc = pfm_mips64_pmd_desc,
|
|
+ .pmc_desc = pfm_mips64_pmc_desc,
|
|
+ .num_pmc_entries = PFM_MIPS64_NUM_PMCS,
|
|
+ .num_pmd_entries = PFM_MIPS64_NUM_PMDS,
|
|
+ .probe_pmu = pfm_mips64_probe_pmu,
|
|
+ .flags = PFM_PMU_BUILTIN_FLAG,
|
|
+ .owner = THIS_MODULE,
|
|
+ .pmu_info = &pfm_mips64_pmu_info
|
|
+};
|
|
+
|
|
+static inline int n_counters(void)
|
|
+{
|
|
+ if (!(read_c0_config1() & MIPS64_CONFIG_PMC_MASK))
|
|
+ return 0;
|
|
+ if (!(read_c0_perfctrl0() & MIPS64_PMC_CTR_MASK))
|
|
+ return 1;
|
|
+ if (!(read_c0_perfctrl1() & MIPS64_PMC_CTR_MASK))
|
|
+ return 2;
|
|
+ if (!(read_c0_perfctrl2() & MIPS64_PMC_CTR_MASK))
|
|
+ return 3;
|
|
+ return 4;
|
|
+}
|
|
+
|
|
+static int __init pfm_mips64_pmu_init_module(void)
|
|
+{
|
|
+ struct cpuinfo_mips *c = ¤t_cpu_data;
|
|
+ int i, ret, num;
|
|
+ u64 temp_mask;
|
|
+
|
|
+ switch (c->cputype) {
|
|
+ case CPU_5KC:
|
|
+ pfm_mips64_pmu_conf.pmu_name = "MIPS5KC";
|
|
+ break;
|
|
+ case CPU_R12000:
|
|
+ pfm_mips64_pmu_conf.pmu_name = "MIPSR12000";
|
|
+ break;
|
|
+ case CPU_20KC:
|
|
+ pfm_mips64_pmu_conf.pmu_name = "MIPS20KC";
|
|
+ break;
|
|
+ case CPU_24K:
|
|
+ pfm_mips64_pmu_conf.pmu_name = "MIPS24K";
|
|
+ break;
|
|
+ case CPU_25KF:
|
|
+ pfm_mips64_pmu_conf.pmu_name = "MIPS25KF";
|
|
+ break;
|
|
+ case CPU_SB1:
|
|
+ pfm_mips64_pmu_conf.pmu_name = "SB1";
|
|
+ break;
|
|
+ case CPU_SB1A:
|
|
+ pfm_mips64_pmu_conf.pmu_name = "SB1A";
|
|
+ break;
|
|
+#ifndef CONFIG_SMP
|
|
+ case CPU_34K:
|
|
+ pfm_mips64_pmu_conf.pmu_name = "MIPS34K";
|
|
+ break;
|
|
+#if defined(CPU_74K)
|
|
+ case CPU_74K:
|
|
+ pfm_mips64_pmu_conf.pmu_name = "MIPS74K";
|
|
+ break;
|
|
+#endif
|
|
+#endif
|
|
+ default:
|
|
+ PFM_INFO("Unknown cputype 0x%x", c->cputype);
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ /* The R14k and older performance counters have to */
|
|
+ /* be hard-coded, as there is no support for auto-detection */
|
|
+ if ((c->cputype == CPU_R12000) || (c->cputype == CPU_R14000))
|
|
+ num = 4;
|
|
+ else if (c->cputype == CPU_R10000)
|
|
+ num = 2;
|
|
+ else
|
|
+ num = n_counters();
|
|
+
|
|
+ if (num == 0) {
|
|
+ PFM_INFO("cputype 0x%x has no counters", c->cputype);
|
|
+ return -1;
|
|
+ }
|
|
+ /* mark remaining counters unavailable */
|
|
+ for (i = num; i < PFM_MIPS64_NUM_PMCS; i++)
|
|
+ pfm_mips64_pmc_desc[i].type = PFM_REG_NA;
|
|
+
|
|
+ for (i = num; i < PFM_MIPS64_NUM_PMDS; i++)
|
|
+ pfm_mips64_pmd_desc[i].type = PFM_REG_NA;
|
|
+
|
|
+ /* set the PMC_RSVD mask */
|
|
+ switch (c->cputype) {
|
|
+ case CPU_5KC:
|
|
+ case CPU_R10000:
|
|
+ case CPU_20KC:
|
|
+ /* 4-bits for event */
|
|
+ temp_mask = 0xfffffffffffffe10ULL;
|
|
+ break;
|
|
+ case CPU_R12000:
|
|
+ case CPU_R14000:
|
|
+ /* 5-bits for event */
|
|
+ temp_mask = 0xfffffffffffffc10ULL;
|
|
+ break;
|
|
+ default:
|
|
+ /* 6-bits for event */
|
|
+ temp_mask = 0xfffffffffffff810ULL;
|
|
+ }
|
|
+ for (i = 0; i < PFM_MIPS64_NUM_PMCS; i++)
|
|
+ pfm_mips64_pmc_desc[i].rsvd_msk = temp_mask;
|
|
+
|
|
+ pfm_mips64_pmu_conf.num_pmc_entries = num;
|
|
+ pfm_mips64_pmu_conf.num_pmd_entries = num;
|
|
+
|
|
+ pfm_mips64_pmu_info.pmu_style = c->cputype;
|
|
+
|
|
+ ret = pfm_pmu_register(&pfm_mips64_pmu_conf);
|
|
+ if (ret == 0)
|
|
+ perf_irq = perfmon_perf_irq;
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static void __exit pfm_mips64_pmu_cleanup_module(void)
|
|
+{
|
|
+ pfm_pmu_unregister(&pfm_mips64_pmu_conf);
|
|
+ perf_irq = null_perf_irq;
|
|
+}
|
|
+
|
|
+module_init(pfm_mips64_pmu_init_module);
|
|
+module_exit(pfm_mips64_pmu_cleanup_module);
|
|
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
|
|
index 587da5e..a411389 100644
|
|
--- a/arch/powerpc/Kconfig
|
|
+++ b/arch/powerpc/Kconfig
|
|
@@ -230,6 +230,8 @@ source "init/Kconfig"
|
|
source "arch/powerpc/sysdev/Kconfig"
|
|
source "arch/powerpc/platforms/Kconfig"
|
|
|
|
+source "arch/powerpc/perfmon/Kconfig"
|
|
+
|
|
menu "Kernel options"
|
|
|
|
config HIGHMEM
|
|
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
|
|
index c6be19e..7ea20cb 100644
|
|
--- a/arch/powerpc/Makefile
|
|
+++ b/arch/powerpc/Makefile
|
|
@@ -146,6 +146,7 @@ core-y += arch/powerpc/kernel/ \
|
|
arch/powerpc/platforms/
|
|
core-$(CONFIG_MATH_EMULATION) += arch/powerpc/math-emu/
|
|
core-$(CONFIG_XMON) += arch/powerpc/xmon/
|
|
+core-$(CONFIG_PERFMON) += arch/powerpc/perfmon/
|
|
core-$(CONFIG_KVM) += arch/powerpc/kvm/
|
|
|
|
drivers-$(CONFIG_OPROFILE) += arch/powerpc/oprofile/
|
|
diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild
|
|
index 5ab7d7f..88cb533 100644
|
|
--- a/arch/powerpc/include/asm/Kbuild
|
|
+++ b/arch/powerpc/include/asm/Kbuild
|
|
@@ -21,6 +21,7 @@ header-y += resource.h
|
|
header-y += sigcontext.h
|
|
header-y += statfs.h
|
|
header-y += ps3fb.h
|
|
+header-y += perfmon.h
|
|
|
|
unifdef-y += bootx.h
|
|
unifdef-y += byteorder.h
|
|
diff --git a/arch/powerpc/include/asm/cell-pmu.h b/arch/powerpc/include/asm/cell-pmu.h
|
|
index 8066eed..981db26 100644
|
|
--- a/arch/powerpc/include/asm/cell-pmu.h
|
|
+++ b/arch/powerpc/include/asm/cell-pmu.h
|
|
@@ -61,6 +61,11 @@
|
|
|
|
/* Macros for the pm_status register. */
|
|
#define CBE_PM_CTR_OVERFLOW_INTR(ctr) (1 << (31 - ((ctr) & 7)))
|
|
+#define CBE_PM_OVERFLOW_CTRS(pm_status) (((pm_status) >> 24) & 0xff)
|
|
+#define CBE_PM_ALL_OVERFLOW_INTR 0xff000000
|
|
+#define CBE_PM_INTERVAL_INTR 0x00800000
|
|
+#define CBE_PM_TRACE_BUFFER_FULL_INTR 0x00400000
|
|
+#define CBE_PM_TRACE_BUFFER_UNDERFLOW_INTR 0x00200000
|
|
|
|
enum pm_reg_name {
|
|
group_control,
|
|
diff --git a/arch/powerpc/include/asm/cell-regs.h b/arch/powerpc/include/asm/cell-regs.h
|
|
index fd6fd00..580786d 100644
|
|
--- a/arch/powerpc/include/asm/cell-regs.h
|
|
+++ b/arch/powerpc/include/asm/cell-regs.h
|
|
@@ -117,8 +117,9 @@ struct cbe_pmd_regs {
|
|
u8 pad_0x0c1c_0x0c20 [4]; /* 0x0c1c */
|
|
#define CBE_PMD_FIR_MODE_M8 0x00800
|
|
u64 fir_enable_mask; /* 0x0c20 */
|
|
-
|
|
- u8 pad_0x0c28_0x0ca8 [0x0ca8 - 0x0c28]; /* 0x0c28 */
|
|
+ u8 pad_0x0c28_0x0c98 [0x0c98 - 0x0c28]; /* 0x0c28 */
|
|
+ u64 on_ramp_trace; /* 0x0c98 */
|
|
+ u64 pad_0x0ca0; /* 0x0ca0 */
|
|
u64 ras_esc_0; /* 0x0ca8 */
|
|
u8 pad_0x0cb0_0x1000 [0x1000 - 0x0cb0]; /* 0x0cb0 */
|
|
};
|
|
@@ -218,7 +219,11 @@ extern struct cbe_iic_regs __iomem *cbe_get_cpu_iic_regs(int cpu);
|
|
|
|
|
|
struct cbe_mic_tm_regs {
|
|
- u8 pad_0x0000_0x0040[0x0040 - 0x0000]; /* 0x0000 */
|
|
+ u8 pad_0x0000_0x0010[0x0010 - 0x0000]; /* 0x0000 */
|
|
+
|
|
+ u64 MBL_debug; /* 0x0010 */
|
|
+
|
|
+ u8 pad_0x0018_0x0040[0x0040 - 0x0018]; /* 0x0018 */
|
|
|
|
u64 mic_ctl_cnfg2; /* 0x0040 */
|
|
#define CBE_MIC_ENABLE_AUX_TRC 0x8000000000000000LL
|
|
@@ -303,6 +308,25 @@ struct cbe_mic_tm_regs {
|
|
extern struct cbe_mic_tm_regs __iomem *cbe_get_mic_tm_regs(struct device_node *np);
|
|
extern struct cbe_mic_tm_regs __iomem *cbe_get_cpu_mic_tm_regs(int cpu);
|
|
|
|
+/*
|
|
+ *
|
|
+ * PPE Privileged MMIO Registers definition. (offset 0x500000 - 0x500fff)
|
|
+ *
|
|
+ */
|
|
+struct cbe_ppe_priv_regs {
|
|
+ u8 pad_0x0000_0x0858[0x0858 - 0x0000]; /* 0x0000 */
|
|
+
|
|
+ u64 L2_debug1; /* 0x0858 */
|
|
+
|
|
+ u8 pad_0x0860_0x0958[0x0958 - 0x0860]; /* 0x0860 */
|
|
+
|
|
+ u64 ciu_dr1; /* 0x0958 */
|
|
+
|
|
+ u8 pad_0x0960_0x1000[0x1000 - 0x0960]; /* 0x0960 */
|
|
+};
|
|
+
|
|
+extern struct cbe_ppe_priv_regs __iomem *cbe_get_cpu_ppe_priv_regs(int cpu);
|
|
+
|
|
/* some utility functions to deal with SMT */
|
|
extern u32 cbe_get_hw_thread_id(int cpu);
|
|
extern u32 cbe_cpu_to_node(int cpu);
|
|
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
|
|
index 6493a39..ba9ead4 100644
|
|
--- a/arch/powerpc/include/asm/paca.h
|
|
+++ b/arch/powerpc/include/asm/paca.h
|
|
@@ -97,6 +97,10 @@ struct paca_struct {
|
|
u8 soft_enabled; /* irq soft-enable flag */
|
|
u8 hard_enabled; /* set if irqs are enabled in MSR */
|
|
u8 io_sync; /* writel() needs spin_unlock sync */
|
|
+#ifdef CONFIG_PERFMON
|
|
+ u8 pmu_except_pending; /* PMU exception occurred while soft
|
|
+ * disabled */
|
|
+#endif
|
|
|
|
/* Stuff for accurate time accounting */
|
|
u64 user_time; /* accumulated usermode TB ticks */
|
|
diff --git a/arch/powerpc/include/asm/perfmon.h b/arch/powerpc/include/asm/perfmon.h
|
|
new file mode 100644
|
|
index 0000000..da0ae3b
|
|
--- /dev/null
|
|
+++ b/arch/powerpc/include/asm/perfmon.h
|
|
@@ -0,0 +1,33 @@
|
|
+/*
|
|
+ * Copyright (c) 2007 Hewlett-Packard Development Company, L.P.
|
|
+ * Contributed by Stephane Eranian <eranian@hpl.hp.com>
|
|
+ *
|
|
+ * This file contains powerpc specific definitions for the perfmon
|
|
+ * interface.
|
|
+ *
|
|
+ * This file MUST never be included directly. Use linux/perfmon.h.
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of version 2 of the GNU General Public
|
|
+ * License as published by the Free Software Foundation.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ * General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
+ * 02111-1307 USA
|
|
+ */
|
|
+#ifndef _ASM_POWERPC_PERFMON_H_
|
|
+#define _ASM_POWERPC_PERFMON_H_
|
|
+
|
|
+/*
|
|
+ * arch-specific user visible interface definitions
|
|
+ */
|
|
+#define PFM_ARCH_MAX_PMCS (256+64) /* 256 HW 64 SW */
|
|
+#define PFM_ARCH_MAX_PMDS (256+64) /* 256 HW 64 SW */
|
|
+
|
|
+#endif /* _ASM_POWERPC_PERFMON_H_ */
|
|
diff --git a/arch/powerpc/include/asm/perfmon_kern.h b/arch/powerpc/include/asm/perfmon_kern.h
|
|
new file mode 100644
|
|
index 0000000..65ec984
|
|
--- /dev/null
|
|
+++ b/arch/powerpc/include/asm/perfmon_kern.h
|
|
@@ -0,0 +1,390 @@
|
|
+/*
|
|
+ * Copyright (c) 2005 David Gibson, IBM Corporation.
|
|
+ *
|
|
+ * Based on other versions:
|
|
+ * Copyright (c) 2005 Hewlett-Packard Development Company, L.P.
|
|
+ * Contributed by Stephane Eranian <eranian@hpl.hp.com>
|
|
+ *
|
|
+ * This file contains powerpc specific definitions for the perfmon
|
|
+ * interface.
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of version 2 of the GNU General Public
|
|
+ * License as published by the Free Software Foundation.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ * General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
+ * 02111-1307 USA
|
|
+ */
|
|
+#ifndef _ASM_POWERPC_PERFMON_KERN_H_
|
|
+#define _ASM_POWERPC_PERFMON_KERN_H_
|
|
+
|
|
+#ifdef __KERNEL__
|
|
+
|
|
+#ifdef CONFIG_PERFMON
|
|
+
|
|
+#include <asm/pmc.h>
|
|
+#include <asm/unistd.h>
|
|
+
|
|
+#define HID0_PMC5_6_GR_MODE (1UL << (63 - 40))
|
|
+
|
|
+enum powerpc_pmu_type {
|
|
+ PFM_POWERPC_PMU_NONE,
|
|
+ PFM_POWERPC_PMU_604,
|
|
+ PFM_POWERPC_PMU_604e,
|
|
+ PFM_POWERPC_PMU_750, /* XXX: Minor event set diffs between IBM and Moto. */
|
|
+ PFM_POWERPC_PMU_7400,
|
|
+ PFM_POWERPC_PMU_7450,
|
|
+ PFM_POWERPC_PMU_POWER4,
|
|
+ PFM_POWERPC_PMU_POWER5,
|
|
+ PFM_POWERPC_PMU_POWER5p,
|
|
+ PFM_POWERPC_PMU_POWER6,
|
|
+ PFM_POWERPC_PMU_CELL,
|
|
+};
|
|
+
|
|
+struct pfm_arch_pmu_info {
|
|
+ enum powerpc_pmu_type pmu_style;
|
|
+
|
|
+ void (*write_pmc)(unsigned int cnum, u64 value);
|
|
+ void (*write_pmd)(unsigned int cnum, u64 value);
|
|
+
|
|
+ u64 (*read_pmd)(unsigned int cnum);
|
|
+
|
|
+ void (*enable_counters)(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set);
|
|
+ void (*disable_counters)(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set);
|
|
+
|
|
+ void (*irq_handler)(struct pt_regs *regs, struct pfm_context *ctx);
|
|
+ void (*get_ovfl_pmds)(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set);
|
|
+
|
|
+ /* The following routines are optional. */
|
|
+ void (*restore_pmcs)(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set);
|
|
+ void (*restore_pmds)(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set);
|
|
+
|
|
+ int (*ctxswout_thread)(struct task_struct *task,
|
|
+ struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set);
|
|
+ void (*ctxswin_thread)(struct task_struct *task,
|
|
+ struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set);
|
|
+ int (*load_context)(struct pfm_context *ctx);
|
|
+ void (*unload_context)(struct pfm_context *ctx);
|
|
+ int (*acquire_pmu)(u64 *unavail_pmcs, u64 *unavail_pmds);
|
|
+ void (*release_pmu)(void);
|
|
+ void *platform_info;
|
|
+ void (*resend_irq)(struct pfm_context *ctx);
|
|
+};
|
|
+
|
|
+#ifdef CONFIG_PPC32
|
|
+#define PFM_ARCH_PMD_STK_ARG 6 /* conservative value */
|
|
+#define PFM_ARCH_PMC_STK_ARG 6 /* conservative value */
|
|
+#else
|
|
+#define PFM_ARCH_PMD_STK_ARG 8 /* conservative value */
|
|
+#define PFM_ARCH_PMC_STK_ARG 8 /* conservative value */
|
|
+#endif
|
|
+
|
|
+static inline void pfm_arch_resend_irq(struct pfm_context *ctx)
|
|
+{
|
|
+ struct pfm_arch_pmu_info *arch_info;
|
|
+
|
|
+ arch_info = pfm_pmu_info();
|
|
+ arch_info->resend_irq(ctx);
|
|
+}
|
|
+
|
|
+static inline void pfm_arch_serialize(void)
|
|
+{}
|
|
+
|
|
+static inline void pfm_arch_write_pmc(struct pfm_context *ctx,
|
|
+ unsigned int cnum,
|
|
+ u64 value)
|
|
+{
|
|
+ struct pfm_arch_pmu_info *arch_info;
|
|
+
|
|
+ arch_info = pfm_pmu_info();
|
|
+
|
|
+ /*
|
|
+ * we only write to the actual register when monitoring is
|
|
+ * active (pfm_start was issued)
|
|
+ */
|
|
+ if (ctx && ctx->flags.started == 0)
|
|
+ return;
|
|
+
|
|
+ BUG_ON(!arch_info->write_pmc);
|
|
+
|
|
+ arch_info->write_pmc(cnum, value);
|
|
+}
|
|
+
|
|
+static inline void pfm_arch_write_pmd(struct pfm_context *ctx,
|
|
+ unsigned int cnum, u64 value)
|
|
+{
|
|
+ struct pfm_arch_pmu_info *arch_info;
|
|
+
|
|
+ arch_info = pfm_pmu_info();
|
|
+
|
|
+ value &= pfm_pmu_conf->ovfl_mask;
|
|
+
|
|
+ BUG_ON(!arch_info->write_pmd);
|
|
+
|
|
+ arch_info->write_pmd(cnum, value);
|
|
+}
|
|
+
|
|
+static inline u64 pfm_arch_read_pmd(struct pfm_context *ctx, unsigned int cnum)
|
|
+{
|
|
+ struct pfm_arch_pmu_info *arch_info;
|
|
+
|
|
+ arch_info = pfm_pmu_info();
|
|
+
|
|
+ BUG_ON(!arch_info->read_pmd);
|
|
+
|
|
+ return arch_info->read_pmd(cnum);
|
|
+}
|
|
+
|
|
+/*
|
|
+ * For some CPUs, the upper bits of a counter must be set in order for the
|
|
+ * overflow interrupt to happen. On overflow, the counter has wrapped around,
|
|
+ * and the upper bits are cleared. This function may be used to set them back.
|
|
+ */
|
|
+static inline void pfm_arch_ovfl_reset_pmd(struct pfm_context *ctx,
|
|
+ unsigned int cnum)
|
|
+{
|
|
+ u64 val = pfm_arch_read_pmd(ctx, cnum);
|
|
+
|
|
+ /* This masks out overflow bit 31 */
|
|
+ pfm_arch_write_pmd(ctx, cnum, val);
|
|
+}
|
|
+
|
|
+/*
|
|
+ * At certain points, perfmon needs to know if monitoring has been
|
|
+ * explicitely started/stopped by user via pfm_start/pfm_stop. The
|
|
+ * information is tracked in flags.started. However on certain
|
|
+ * architectures, it may be possible to start/stop directly from
|
|
+ * user level with a single assembly instruction bypassing
|
|
+ * the kernel. This function must be used to determine by
|
|
+ * an arch-specific mean if monitoring is actually started/stopped.
|
|
+ */
|
|
+static inline int pfm_arch_is_active(struct pfm_context *ctx)
|
|
+{
|
|
+ return ctx->flags.started;
|
|
+}
|
|
+
|
|
+static inline void pfm_arch_ctxswout_sys(struct task_struct *task,
|
|
+ struct pfm_context *ctx)
|
|
+{}
|
|
+
|
|
+static inline void pfm_arch_ctxswin_sys(struct task_struct *task,
|
|
+ struct pfm_context *ctx)
|
|
+{}
|
|
+
|
|
+void pfm_arch_init_percpu(void);
|
|
+int pfm_arch_is_monitoring_active(struct pfm_context *ctx);
|
|
+int pfm_arch_ctxswout_thread(struct task_struct *task, struct pfm_context *ctx);
|
|
+void pfm_arch_ctxswin_thread(struct task_struct *task, struct pfm_context *ctx);
|
|
+void pfm_arch_stop(struct task_struct *task, struct pfm_context *ctx);
|
|
+void pfm_arch_start(struct task_struct *task, struct pfm_context *ctx);
|
|
+void pfm_arch_restore_pmds(struct pfm_context *ctx, struct pfm_event_set *set);
|
|
+void pfm_arch_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set);
|
|
+void pfm_arch_clear_pmd_ovfl_cond(struct pfm_context *ctx, struct pfm_event_set *set);
|
|
+int pfm_arch_get_ovfl_pmds(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set);
|
|
+char *pfm_arch_get_pmu_module_name(void);
|
|
+/*
|
|
+ * called from __pfm_interrupt_handler(). ctx is not NULL.
|
|
+ * ctx is locked. PMU interrupt is masked.
|
|
+ *
|
|
+ * must stop all monitoring to ensure handler has consistent view.
|
|
+ * must collect overflowed PMDs bitmask into povfls_pmds and
|
|
+ * npend_ovfls. If no interrupt detected then npend_ovfls
|
|
+ * must be set to zero.
|
|
+ */
|
|
+static inline void pfm_arch_intr_freeze_pmu(struct pfm_context *ctx, struct pfm_event_set *set)
|
|
+{
|
|
+ pfm_arch_stop(current, ctx);
|
|
+}
|
|
+
|
|
+void powerpc_irq_handler(struct pt_regs *regs);
|
|
+
|
|
+/*
|
|
+ * unfreeze PMU from pfm_do_interrupt_handler()
|
|
+ * ctx may be NULL for spurious
|
|
+ */
|
|
+static inline void pfm_arch_intr_unfreeze_pmu(struct pfm_context *ctx)
|
|
+{
|
|
+ struct pfm_arch_pmu_info *arch_info;
|
|
+
|
|
+ if (!ctx)
|
|
+ return;
|
|
+
|
|
+ PFM_DBG_ovfl("state=%d", ctx->state);
|
|
+
|
|
+ ctx->flags.started = 1;
|
|
+
|
|
+ if (ctx->state == PFM_CTX_MASKED)
|
|
+ return;
|
|
+
|
|
+ arch_info = pfm_pmu_info();
|
|
+ BUG_ON(!arch_info->enable_counters);
|
|
+ arch_info->enable_counters(ctx, ctx->active_set);
|
|
+}
|
|
+
|
|
+/*
|
|
+ * PowerPC does not save the PMDs during pfm_arch_intr_freeze_pmu(), thus
|
|
+ * this routine needs to do it when switching sets on overflow
|
|
+ */
|
|
+static inline void pfm_arch_save_pmds_from_intr(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set)
|
|
+{
|
|
+ pfm_save_pmds(ctx, set);
|
|
+}
|
|
+
|
|
+/*
|
|
+ * this function is called from the PMU interrupt handler ONLY.
|
|
+ * On PPC, the PMU is frozen via arch_stop, masking would be implemented
|
|
+ * via arch-stop as well. Given that the PMU is already stopped when
|
|
+ * entering the interrupt handler, we do not need to stop it again, so
|
|
+ * this function is a nop.
|
|
+ */
|
|
+static inline void pfm_arch_mask_monitoring(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set)
|
|
+{}
|
|
+
|
|
+/*
|
|
+ * Simply need to start the context in order to unmask.
|
|
+ */
|
|
+static inline void pfm_arch_unmask_monitoring(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set)
|
|
+{
|
|
+ pfm_arch_start(current, ctx);
|
|
+}
|
|
+
|
|
+
|
|
+static inline int pfm_arch_pmu_config_init(struct pfm_pmu_config *cfg)
|
|
+{
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static inline int pfm_arch_context_create(struct pfm_context *ctx,
|
|
+ u32 ctx_flags)
|
|
+{
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static inline void pfm_arch_context_free(struct pfm_context *ctx)
|
|
+{}
|
|
+
|
|
+/* not necessary on PowerPC */
|
|
+static inline void pfm_cacheflush(void *addr, unsigned int len)
|
|
+{}
|
|
+
|
|
+/*
|
|
+ * function called from pfm_setfl_sane(). Context is locked
|
|
+ * and interrupts are masked.
|
|
+ * The value of flags is the value of ctx_flags as passed by
|
|
+ * user.
|
|
+ *
|
|
+ * function must check arch-specific set flags.
|
|
+ * Return:
|
|
+ * 1 when flags are valid
|
|
+ * 0 on error
|
|
+ */
|
|
+static inline int pfm_arch_setfl_sane(struct pfm_context *ctx, u32 flags)
|
|
+{
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static inline int pfm_arch_init(void)
|
|
+{
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static inline int pfm_arch_load_context(struct pfm_context *ctx)
|
|
+{
|
|
+ struct pfm_arch_pmu_info *arch_info;
|
|
+ int rc = 0;
|
|
+
|
|
+ arch_info = pfm_pmu_info();
|
|
+ if (arch_info->load_context)
|
|
+ rc = arch_info->load_context(ctx);
|
|
+
|
|
+ return rc;
|
|
+}
|
|
+
|
|
+static inline void pfm_arch_unload_context(struct pfm_context *ctx)
|
|
+{
|
|
+ struct pfm_arch_pmu_info *arch_info;
|
|
+
|
|
+ arch_info = pfm_pmu_info();
|
|
+ if (arch_info->unload_context)
|
|
+ arch_info->unload_context(ctx);
|
|
+}
|
|
+
|
|
+static inline int pfm_arch_pmu_acquire(u64 *unavail_pmcs, u64 *unavail_pmds)
|
|
+{
|
|
+ struct pfm_arch_pmu_info *arch_info;
|
|
+ int rc = 0;
|
|
+
|
|
+ arch_info = pfm_pmu_info();
|
|
+ if (arch_info->acquire_pmu) {
|
|
+ rc = arch_info->acquire_pmu(unavail_pmcs, unavail_pmds);
|
|
+ if (rc)
|
|
+ return rc;
|
|
+ }
|
|
+
|
|
+ return reserve_pmc_hardware(powerpc_irq_handler);
|
|
+}
|
|
+
|
|
+static inline void pfm_arch_pmu_release(void)
|
|
+{
|
|
+ struct pfm_arch_pmu_info *arch_info;
|
|
+
|
|
+ arch_info = pfm_pmu_info();
|
|
+ if (arch_info->release_pmu)
|
|
+ arch_info->release_pmu();
|
|
+
|
|
+ release_pmc_hardware();
|
|
+}
|
|
+
|
|
+static inline void pfm_arch_arm_handle_work(struct task_struct *task)
|
|
+{}
|
|
+
|
|
+static inline void pfm_arch_disarm_handle_work(struct task_struct *task)
|
|
+{}
|
|
+
|
|
+static inline int pfm_arch_get_base_syscall(void)
|
|
+{
|
|
+ return __NR_pfm_create_context;
|
|
+}
|
|
+
|
|
+struct pfm_arch_context {
|
|
+ /* Cell: Most recent value of the pm_status
|
|
+ * register read by the interrupt handler.
|
|
+ *
|
|
+ * Interrupt handler sets last_read_updated if it
|
|
+ * just read and updated last_read_pm_status
|
|
+ */
|
|
+ u32 last_read_pm_status;
|
|
+ u32 last_read_updated;
|
|
+ u64 powergs_pmc5, powergs_pmc6;
|
|
+ u64 delta_tb, delta_tb_start;
|
|
+ u64 delta_purr, delta_purr_start;
|
|
+};
|
|
+
|
|
+#define PFM_ARCH_CTX_SIZE sizeof(struct pfm_arch_context)
|
|
+/*
|
|
+ * PowerPC does not need extra alignment requirements for the sampling buffer
|
|
+ */
|
|
+#define PFM_ARCH_SMPL_ALIGN_SIZE 0
|
|
+
|
|
+#endif /* CONFIG_PERFMON */
|
|
+
|
|
+#endif /* __KERNEL__ */
|
|
+#endif /* _ASM_POWERPC_PERFMON_KERN_H_ */
|
|
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
|
|
index c6d1ab6..a9f3ad0 100644
|
|
--- a/arch/powerpc/include/asm/reg.h
|
|
+++ b/arch/powerpc/include/asm/reg.h
|
|
@@ -698,6 +698,7 @@
|
|
#define PV_POWER5 0x003A
|
|
#define PV_POWER5p 0x003B
|
|
#define PV_970FX 0x003C
|
|
+#define PV_POWER6 0x003E
|
|
#define PV_630 0x0040
|
|
#define PV_630p 0x0041
|
|
#define PV_970MP 0x0044
|
|
diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h
|
|
index f6cc7a4..0164841 100644
|
|
--- a/arch/powerpc/include/asm/systbl.h
|
|
+++ b/arch/powerpc/include/asm/systbl.h
|
|
@@ -322,3 +322,15 @@ SYSCALL_SPU(epoll_create1)
|
|
SYSCALL_SPU(dup3)
|
|
SYSCALL_SPU(pipe2)
|
|
SYSCALL(inotify_init1)
|
|
+SYSCALL(pfm_create_context)
|
|
+SYSCALL(pfm_write_pmcs)
|
|
+SYSCALL(pfm_write_pmds)
|
|
+SYSCALL(pfm_read_pmds)
|
|
+SYSCALL(pfm_load_context)
|
|
+SYSCALL(pfm_start)
|
|
+SYSCALL(pfm_stop)
|
|
+SYSCALL(pfm_restart)
|
|
+SYSCALL(pfm_create_evtsets)
|
|
+SYSCALL(pfm_getinfo_evtsets)
|
|
+SYSCALL(pfm_delete_evtsets)
|
|
+SYSCALL(pfm_unload_context)
|
|
diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
|
|
index 9665a26..6cda9f9 100644
|
|
--- a/arch/powerpc/include/asm/thread_info.h
|
|
+++ b/arch/powerpc/include/asm/thread_info.h
|
|
@@ -130,10 +130,12 @@ static inline struct thread_info *current_thread_info(void)
|
|
#define _TIF_FREEZE (1<<TIF_FREEZE)
|
|
#define _TIF_RUNLATCH (1<<TIF_RUNLATCH)
|
|
#define _TIF_ABI_PENDING (1<<TIF_ABI_PENDING)
|
|
+#define _TIF_PERFMON_WORK (1<<TIF_PERFMON_WORK)
|
|
+#define _TIF_PERFMON_CTXSW (1<<TIF_PERFMON_CTXSW)
|
|
#define _TIF_SYSCALL_T_OR_A (_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP)
|
|
|
|
#define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
|
|
- _TIF_NOTIFY_RESUME)
|
|
+ _TIF_NOTIFY_RESUME | _TIF_PERFMON_WORK)
|
|
#define _TIF_PERSYSCALL_MASK (_TIF_RESTOREALL|_TIF_NOERROR)
|
|
|
|
/* Bits in local_flags */
|
|
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
|
|
index e07d0c7..6226cba 100644
|
|
--- a/arch/powerpc/include/asm/unistd.h
|
|
+++ b/arch/powerpc/include/asm/unistd.h
|
|
@@ -341,10 +341,22 @@
|
|
#define __NR_dup3 316
|
|
#define __NR_pipe2 317
|
|
#define __NR_inotify_init1 318
|
|
+#define __NR_pfm_create_context 319
|
|
+#define __NR_pfm_write_pmcs 320
|
|
+#define __NR_pfm_write_pmds 321
|
|
+#define __NR_pfm_read_pmds 322
|
|
+#define __NR_pfm_load_context 323
|
|
+#define __NR_pfm_start 324
|
|
+#define __NR_pfm_stop 325
|
|
+#define __NR_pfm_restart 326
|
|
+#define __NR_pfm_create_evtsets 327
|
|
+#define __NR_pfm_getinfo_evtsets 328
|
|
+#define __NR_pfm_delete_evtsets 329
|
|
+#define __NR_pfm_unload_context 330
|
|
|
|
#ifdef __KERNEL__
|
|
|
|
-#define __NR_syscalls 319
|
|
+#define __NR_syscalls 331
|
|
|
|
#define __NR__exit __NR_exit
|
|
#define NR_syscalls __NR_syscalls
|
|
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
|
|
index 1cbbf70..198645f 100644
|
|
--- a/arch/powerpc/kernel/entry_32.S
|
|
+++ b/arch/powerpc/kernel/entry_32.S
|
|
@@ -39,7 +39,7 @@
|
|
* MSR_KERNEL is > 0x10000 on 4xx/Book-E since it include MSR_CE.
|
|
*/
|
|
#if MSR_KERNEL >= 0x10000
|
|
-#define LOAD_MSR_KERNEL(r, x) lis r,(x)@h; ori r,r,(x)@l
|
|
+#define LOAD_MSR_KERNEL(r, x) lis r,(x)@ha; ori r,r,(x)@l
|
|
#else
|
|
#define LOAD_MSR_KERNEL(r, x) li r,(x)
|
|
#endif
|
|
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
|
|
index 2d802e9..77a090d 100644
|
|
--- a/arch/powerpc/kernel/entry_64.S
|
|
+++ b/arch/powerpc/kernel/entry_64.S
|
|
@@ -643,6 +643,10 @@ user_work:
|
|
b .ret_from_except_lite
|
|
|
|
1: bl .save_nvgprs
|
|
+#ifdef CONFIG_PERFMON
|
|
+ addi r3,r1,STACK_FRAME_OVERHEAD
|
|
+ bl .pfm_handle_work
|
|
+#endif /* CONFIG_PERFMON */
|
|
addi r3,r1,STACK_FRAME_OVERHEAD
|
|
bl .do_signal
|
|
b .ret_from_except
|
|
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
|
|
index d972dec..b255fba 100644
|
|
--- a/arch/powerpc/kernel/irq.c
|
|
+++ b/arch/powerpc/kernel/irq.c
|
|
@@ -104,6 +104,24 @@ static inline notrace void set_soft_enabled(unsigned long enable)
|
|
: : "r" (enable), "i" (offsetof(struct paca_struct, soft_enabled)));
|
|
}
|
|
|
|
+#ifdef CONFIG_PERFMON
|
|
+static inline unsigned long get_pmu_except_pending(void)
|
|
+{
|
|
+ unsigned long pending;
|
|
+
|
|
+ __asm__ __volatile__("lbz %0,%1(13)"
|
|
+ : "=r" (pending) : "i" (offsetof(struct paca_struct, pmu_except_pending)));
|
|
+
|
|
+ return pending;
|
|
+}
|
|
+
|
|
+static inline void set_pmu_except_pending(unsigned long pending)
|
|
+{
|
|
+ __asm__ __volatile__("stb %0,%1(13)"
|
|
+ : : "r" (pending), "i" (offsetof(struct paca_struct, pmu_except_pending)));
|
|
+}
|
|
+#endif /* CONFIG_PERFMON */
|
|
+
|
|
notrace void raw_local_irq_restore(unsigned long en)
|
|
{
|
|
/*
|
|
@@ -162,6 +180,19 @@ notrace void raw_local_irq_restore(unsigned long en)
|
|
lv1_get_version_info(&tmp);
|
|
}
|
|
|
|
+#ifdef CONFIG_PERFMON
|
|
+ /*
|
|
+ * If a PMU exception occurred while interrupts were soft disabled,
|
|
+ * force a PMU exception.
|
|
+ */
|
|
+ if (get_pmu_except_pending()) {
|
|
+ set_pmu_except_pending(0);
|
|
+ /* Make sure we trigger the edge detection circuitry */
|
|
+ mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_PMAO);
|
|
+ mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_PMAO);
|
|
+ }
|
|
+#endif /* CONFIG_PERFMON */
|
|
+
|
|
__hard_irq_enable();
|
|
}
|
|
EXPORT_SYMBOL(raw_local_irq_restore);
|
|
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
|
|
index 957bded..32dbc8e 100644
|
|
--- a/arch/powerpc/kernel/process.c
|
|
+++ b/arch/powerpc/kernel/process.c
|
|
@@ -33,6 +33,7 @@
|
|
#include <linux/mqueue.h>
|
|
#include <linux/hardirq.h>
|
|
#include <linux/utsname.h>
|
|
+#include <linux/perfmon_kern.h>
|
|
|
|
#include <asm/pgtable.h>
|
|
#include <asm/uaccess.h>
|
|
@@ -393,9 +394,14 @@ struct task_struct *__switch_to(struct task_struct *prev,
|
|
new_thread->start_tb = current_tb;
|
|
}
|
|
#endif
|
|
-
|
|
local_irq_save(flags);
|
|
|
|
+ if (test_tsk_thread_flag(prev, TIF_PERFMON_CTXSW))
|
|
+ pfm_ctxsw_out(prev, new);
|
|
+
|
|
+ if (test_tsk_thread_flag(new, TIF_PERFMON_CTXSW))
|
|
+ pfm_ctxsw_in(prev, new);
|
|
+
|
|
account_system_vtime(current);
|
|
account_process_vtime(current);
|
|
calculate_steal_time();
|
|
@@ -544,6 +550,7 @@ void show_regs(struct pt_regs * regs)
|
|
void exit_thread(void)
|
|
{
|
|
discard_lazy_cpu_state();
|
|
+ pfm_exit_thread();
|
|
}
|
|
|
|
void flush_thread(void)
|
|
@@ -669,6 +676,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long usp,
|
|
#else
|
|
kregs->nip = (unsigned long)ret_from_fork;
|
|
#endif
|
|
+ pfm_copy_thread(p);
|
|
|
|
return 0;
|
|
}
|
|
diff --git a/arch/powerpc/perfmon/Kconfig b/arch/powerpc/perfmon/Kconfig
|
|
new file mode 100644
|
|
index 0000000..3f4bbf2
|
|
--- /dev/null
|
|
+++ b/arch/powerpc/perfmon/Kconfig
|
|
@@ -0,0 +1,67 @@
|
|
+menu "Hardware Performance Monitoring support"
|
|
+config PERFMON
|
|
+ bool "Perfmon2 performance monitoring interface"
|
|
+ default n
|
|
+ help
|
|
+ Enables the perfmon2 interface to access the hardware
|
|
+ performance counters. See <http://perfmon2.sf.net/> for
|
|
+ more details.
|
|
+
|
|
+config PERFMON_DEBUG
|
|
+ bool "Perfmon debugging"
|
|
+ default n
|
|
+ depends on PERFMON
|
|
+ help
|
|
+ Enables perfmon debugging support
|
|
+
|
|
+config PERFMON_DEBUG_FS
|
|
+ bool "Enable perfmon statistics reporting via debugfs"
|
|
+ default y
|
|
+ depends on PERFMON && DEBUG_FS
|
|
+ help
|
|
+ Enable collection and reporting of perfmon timing statistics under
|
|
+ debugfs. This is used for debugging and performance analysis of the
|
|
+ subsystem. The debugfs filesystem must be mounted.
|
|
+
|
|
+config PERFMON_POWER4
|
|
+ tristate "Support for Power4 hardware performance counters"
|
|
+ depends on PERFMON && PPC64
|
|
+ default n
|
|
+ help
|
|
+ Enables support for the Power 4 hardware performance counters
|
|
+ If unsure, say M.
|
|
+
|
|
+config PERFMON_POWER5
|
|
+ tristate "Support for Power5 hardware performance counters"
|
|
+ depends on PERFMON && PPC64
|
|
+ default n
|
|
+ help
|
|
+ Enables support for the Power 5 hardware performance counters
|
|
+ If unsure, say M.
|
|
+
|
|
+config PERFMON_POWER6
|
|
+ tristate "Support for Power6 hardware performance counters"
|
|
+ depends on PERFMON && PPC64
|
|
+ default n
|
|
+ help
|
|
+ Enables support for the Power 6 hardware performance counters
|
|
+ If unsure, say M.
|
|
+
|
|
+config PERFMON_PPC32
|
|
+ tristate "Support for PPC32 hardware performance counters"
|
|
+ depends on PERFMON && PPC32
|
|
+ default n
|
|
+ help
|
|
+ Enables support for the PPC32 hardware performance counters
|
|
+ If unsure, say M.
|
|
+
|
|
+config PERFMON_CELL
|
|
+ tristate "Support for Cell hardware performance counters"
|
|
+ depends on PERFMON && PPC_CELL
|
|
+ select PS3_LPM if PPC_PS3
|
|
+ default n
|
|
+ help
|
|
+ Enables support for the Cell hardware performance counters.
|
|
+ If unsure, say M.
|
|
+
|
|
+endmenu
|
|
diff --git a/arch/powerpc/perfmon/Makefile b/arch/powerpc/perfmon/Makefile
|
|
new file mode 100644
|
|
index 0000000..300661f
|
|
--- /dev/null
|
|
+++ b/arch/powerpc/perfmon/Makefile
|
|
@@ -0,0 +1,6 @@
|
|
+obj-$(CONFIG_PERFMON) += perfmon.o
|
|
+obj-$(CONFIG_PERFMON_POWER4) += perfmon_power4.o
|
|
+obj-$(CONFIG_PERFMON_POWER5) += perfmon_power5.o
|
|
+obj-$(CONFIG_PERFMON_POWER6) += perfmon_power6.o
|
|
+obj-$(CONFIG_PERFMON_PPC32) += perfmon_ppc32.o
|
|
+obj-$(CONFIG_PERFMON_CELL) += perfmon_cell.o
|
|
diff --git a/arch/powerpc/perfmon/perfmon.c b/arch/powerpc/perfmon/perfmon.c
|
|
new file mode 100644
|
|
index 0000000..51a8b6a
|
|
--- /dev/null
|
|
+++ b/arch/powerpc/perfmon/perfmon.c
|
|
@@ -0,0 +1,334 @@
|
|
+/*
|
|
+ * This file implements the powerpc specific
|
|
+ * support for the perfmon2 interface
|
|
+ *
|
|
+ * Copyright (c) 2005 David Gibson, IBM Corporation.
|
|
+ *
|
|
+ * based on versions for other architectures:
|
|
+ * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P.
|
|
+ * Contributed by Stephane Eranian <eranian@hpl.hp.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of version 2 of the GNU General Public
|
|
+ * License as published by the Free Software Foundation.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ * General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
+ * 02111-1307 USA
|
|
+ */
|
|
+#include <linux/interrupt.h>
|
|
+#include <linux/perfmon_kern.h>
|
|
+
|
|
+static void pfm_stop_active(struct task_struct *task,
|
|
+ struct pfm_context *ctx, struct pfm_event_set *set)
|
|
+{
|
|
+ struct pfm_arch_pmu_info *arch_info;
|
|
+
|
|
+ arch_info = pfm_pmu_info();
|
|
+ BUG_ON(!arch_info->disable_counters || !arch_info->get_ovfl_pmds);
|
|
+
|
|
+ arch_info->disable_counters(ctx, set);
|
|
+
|
|
+ if (set->npend_ovfls)
|
|
+ return;
|
|
+
|
|
+ arch_info->get_ovfl_pmds(ctx, set);
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Called from pfm_save_pmds(). Interrupts are masked. Registers are
|
|
+ * already saved away.
|
|
+ */
|
|
+void pfm_arch_clear_pmd_ovfl_cond(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set)
|
|
+{
|
|
+ int i, num;
|
|
+ u64 *used_pmds, *intr_pmds;
|
|
+
|
|
+ num = set->nused_pmds;
|
|
+ used_pmds = set->used_pmds;
|
|
+ intr_pmds = ctx->regs.intr_pmds;
|
|
+
|
|
+ for (i = 0; num; i++)
|
|
+ if (likely(test_bit(i, used_pmds))) {
|
|
+ if (likely(test_bit(i, intr_pmds)))
|
|
+ pfm_write_pmd(ctx, i, 0);
|
|
+ num--;
|
|
+ }
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Called from pfm_ctxsw(). Task is guaranteed to be current.
|
|
+ * Context is locked. Interrupts are masked. Monitoring is active.
|
|
+ * PMU access is guaranteed. PMC and PMD registers are live in PMU.
|
|
+ *
|
|
+ * for per-thread:
|
|
+ * must stop monitoring for the task
|
|
+ * Return:
|
|
+ * non-zero : did not save PMDs (as part of stopping the PMU)
|
|
+ * 0 : saved PMDs (no need to save them in caller)
|
|
+ */
|
|
+int pfm_arch_ctxswout_thread(struct task_struct *task, struct pfm_context *ctx)
|
|
+{
|
|
+ struct pfm_arch_pmu_info *arch_info;
|
|
+
|
|
+ arch_info = pfm_pmu_info();
|
|
+ /*
|
|
+ * disable lazy restore of the PMC/PMD registers.
|
|
+ */
|
|
+ ctx->active_set->priv_flags |= PFM_SETFL_PRIV_MOD_BOTH;
|
|
+
|
|
+ if (ctx->state == PFM_CTX_MASKED)
|
|
+ return 1;
|
|
+
|
|
+ pfm_stop_active(task, ctx, ctx->active_set);
|
|
+
|
|
+ if (arch_info->ctxswout_thread)
|
|
+ arch_info->ctxswout_thread(task, ctx, ctx->active_set);
|
|
+
|
|
+ return pfm_arch_is_active(ctx);
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Called from pfm_ctxsw
|
|
+ */
|
|
+void pfm_arch_ctxswin_thread(struct task_struct *task, struct pfm_context *ctx)
|
|
+{
|
|
+ struct pfm_arch_pmu_info *arch_info;
|
|
+
|
|
+ arch_info = pfm_pmu_info();
|
|
+ if (ctx->state != PFM_CTX_MASKED && ctx->flags.started == 1) {
|
|
+ BUG_ON(!arch_info->enable_counters);
|
|
+ arch_info->enable_counters(ctx, ctx->active_set);
|
|
+ }
|
|
+
|
|
+ if (arch_info->ctxswin_thread)
|
|
+ arch_info->ctxswin_thread(task, ctx, ctx->active_set);
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Called from pfm_stop() and idle notifier
|
|
+ *
|
|
+ * Interrupts are masked. Context is locked. Set is the active set.
|
|
+ *
|
|
+ * For per-thread:
|
|
+ * task is not necessarily current. If not current task, then
|
|
+ * task is guaranteed stopped and off any cpu. Access to PMU
|
|
+ * is not guaranteed. Interrupts are masked. Context is locked.
|
|
+ * Set is the active set.
|
|
+ *
|
|
+ * For system-wide:
|
|
+ * task is current
|
|
+ *
|
|
+ * must disable active monitoring. ctx cannot be NULL
|
|
+ */
|
|
+void pfm_arch_stop(struct task_struct *task, struct pfm_context *ctx)
|
|
+{
|
|
+ /*
|
|
+ * no need to go through stop_save()
|
|
+ * if we are already stopped
|
|
+ */
|
|
+ if (!ctx->flags.started || ctx->state == PFM_CTX_MASKED)
|
|
+ return;
|
|
+
|
|
+ /*
|
|
+ * stop live registers and collect pending overflow
|
|
+ */
|
|
+ if (task == current)
|
|
+ pfm_stop_active(task, ctx, ctx->active_set);
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Enable active monitoring. Called from pfm_start() and
|
|
+ * pfm_arch_unmask_monitoring().
|
|
+ *
|
|
+ * Interrupts are masked. Context is locked. Set is the active set.
|
|
+ *
|
|
+ * For per-thread:
|
|
+ * Task is not necessarily current. If not current task, then task
|
|
+ * is guaranteed stopped and off any cpu. No access to PMU if task
|
|
+ * is not current.
|
|
+ *
|
|
+ * For system-wide:
|
|
+ * Task is always current
|
|
+ */
|
|
+void pfm_arch_start(struct task_struct *task, struct pfm_context *ctx)
|
|
+{
|
|
+ struct pfm_arch_pmu_info *arch_info;
|
|
+
|
|
+ arch_info = pfm_pmu_info();
|
|
+ if (task != current)
|
|
+ return;
|
|
+
|
|
+ BUG_ON(!arch_info->enable_counters);
|
|
+
|
|
+ arch_info->enable_counters(ctx, ctx->active_set);
|
|
+}
|
|
+
|
|
+/*
|
|
+ * function called from pfm_switch_sets(), pfm_context_load_thread(),
|
|
+ * pfm_context_load_sys(), pfm_ctxsw(), pfm_switch_sets()
|
|
+ * context is locked. Interrupts are masked. set cannot be NULL.
|
|
+ * Access to the PMU is guaranteed.
|
|
+ *
|
|
+ * function must restore all PMD registers from set.
|
|
+ */
|
|
+void pfm_arch_restore_pmds(struct pfm_context *ctx, struct pfm_event_set *set)
|
|
+{
|
|
+ struct pfm_arch_pmu_info *arch_info;
|
|
+ u64 *used_pmds;
|
|
+ u16 i, num;
|
|
+
|
|
+ arch_info = pfm_pmu_info();
|
|
+
|
|
+ /* The model-specific module can override the default
|
|
+ * restore-PMD method.
|
|
+ */
|
|
+ if (arch_info->restore_pmds)
|
|
+ return arch_info->restore_pmds(ctx, set);
|
|
+
|
|
+ num = set->nused_pmds;
|
|
+ used_pmds = set->used_pmds;
|
|
+
|
|
+ for (i = 0; num; i++) {
|
|
+ if (likely(test_bit(i, used_pmds))) {
|
|
+ pfm_write_pmd(ctx, i, set->pmds[i].value);
|
|
+ num--;
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+/*
|
|
+ * function called from pfm_switch_sets(), pfm_context_load_thread(),
|
|
+ * pfm_context_load_sys(), pfm_ctxsw(), pfm_switch_sets()
|
|
+ * context is locked. Interrupts are masked. set cannot be NULL.
|
|
+ * Access to the PMU is guaranteed.
|
|
+ *
|
|
+ * function must restore all PMC registers from set, if needed.
|
|
+ */
|
|
+void pfm_arch_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set)
|
|
+{
|
|
+ struct pfm_arch_pmu_info *arch_info;
|
|
+ u64 *impl_pmcs;
|
|
+ unsigned int i, max_pmc, reg;
|
|
+
|
|
+ arch_info = pfm_pmu_info();
|
|
+ /* The model-specific module can override the default
|
|
+ * restore-PMC method.
|
|
+ */
|
|
+ if (arch_info->restore_pmcs)
|
|
+ return arch_info->restore_pmcs(ctx, set);
|
|
+
|
|
+ /* The "common" powerpc model's enable the counters simply by writing
|
|
+ * all the control registers. Therefore, if we're masked or stopped we
|
|
+ * don't need to bother restoring the PMCs now.
|
|
+ */
|
|
+ if (ctx->state == PFM_CTX_MASKED || ctx->flags.started == 0)
|
|
+ return;
|
|
+
|
|
+ max_pmc = ctx->regs.max_pmc;
|
|
+ impl_pmcs = ctx->regs.pmcs;
|
|
+
|
|
+ /*
|
|
+ * Restore all pmcs in reverse order to ensure the counters aren't
|
|
+ * enabled before their event selectors are set correctly.
|
|
+ */
|
|
+ reg = max_pmc - 1;
|
|
+ for (i = 0; i < max_pmc; i++) {
|
|
+ if (test_bit(reg, impl_pmcs))
|
|
+ pfm_arch_write_pmc(ctx, reg, set->pmcs[reg]);
|
|
+ reg--;
|
|
+ }
|
|
+}
|
|
+
|
|
+char *pfm_arch_get_pmu_module_name(void)
|
|
+{
|
|
+ unsigned int pvr = mfspr(SPRN_PVR);
|
|
+
|
|
+ switch (PVR_VER(pvr)) {
|
|
+ case 0x0004: /* 604 */
|
|
+ case 0x0009: /* 604e; */
|
|
+ case 0x000A: /* 604ev */
|
|
+ case 0x0008: /* 750/740 */
|
|
+ case 0x7000: /* 750FX */
|
|
+ case 0x7001:
|
|
+ case 0x7002: /* 750GX */
|
|
+ case 0x000C: /* 7400 */
|
|
+ case 0x800C: /* 7410 */
|
|
+ case 0x8000: /* 7451/7441 */
|
|
+ case 0x8001: /* 7455/7445 */
|
|
+ case 0x8002: /* 7457/7447 */
|
|
+ case 0x8003: /* 7447A */
|
|
+ case 0x8004: /* 7448 */
|
|
+ return("perfmon_ppc32");
|
|
+ case PV_POWER4:
|
|
+ case PV_POWER4p:
|
|
+ return "perfmon_power4";
|
|
+ case PV_POWER5:
|
|
+ return "perfmon_power5";
|
|
+ case PV_POWER5p:
|
|
+ if (PVR_REV(pvr) < 0x300)
|
|
+ /* PMU behaves like POWER5 */
|
|
+ return "perfmon_power5";
|
|
+ else
|
|
+ /* PMU behaves like POWER6 */
|
|
+ return "perfmon_power6";
|
|
+ case PV_POWER6:
|
|
+ return "perfmon_power6";
|
|
+ case PV_970:
|
|
+ case PV_970FX:
|
|
+ case PV_970MP:
|
|
+ return "perfmon_ppc970";
|
|
+ case PV_BE:
|
|
+ return "perfmon_cell";
|
|
+ }
|
|
+ return NULL;
|
|
+}
|
|
+
|
|
+void pfm_arch_init_percpu(void)
|
|
+{
|
|
+#ifdef CONFIG_PPC64
|
|
+ extern void ppc64_enable_pmcs(void);
|
|
+ ppc64_enable_pmcs();
|
|
+#endif
|
|
+}
|
|
+
|
|
+/**
|
|
+ * powerpc_irq_handler
|
|
+ *
|
|
+ * Get the perfmon context that belongs to the current CPU, and call the
|
|
+ * model-specific interrupt handler.
|
|
+ **/
|
|
+void powerpc_irq_handler(struct pt_regs *regs)
|
|
+{
|
|
+ struct pfm_arch_pmu_info *arch_info;
|
|
+ struct pfm_context *ctx;
|
|
+
|
|
+ if (! regs->softe) {
|
|
+ /*
|
|
+ * We got a PMU interrupt while interrupts were soft
|
|
+ * disabled. Disable hardware interrupts by clearing
|
|
+ * MSR_EE and also clear PMAO because we will need to set
|
|
+ * that again later when interrupts are re-enabled and
|
|
+ * raw_local_irq_restore() sees that the pmu_except_pending
|
|
+ * flag is set.
|
|
+ */
|
|
+ regs->msr &= ~MSR_EE;
|
|
+ get_paca()->pmu_except_pending = 1;
|
|
+ mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_PMAO);
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ arch_info = pfm_pmu_info();
|
|
+ if (arch_info->irq_handler) {
|
|
+ ctx = __get_cpu_var(pmu_ctx);
|
|
+ if (likely(ctx))
|
|
+ arch_info->irq_handler(regs, ctx);
|
|
+ }
|
|
+}
|
|
diff --git a/arch/powerpc/perfmon/perfmon_cell.c b/arch/powerpc/perfmon/perfmon_cell.c
|
|
new file mode 100644
|
|
index 0000000..e1ae12c
|
|
--- /dev/null
|
|
+++ b/arch/powerpc/perfmon/perfmon_cell.c
|
|
@@ -0,0 +1,1449 @@
|
|
+/*
|
|
+ * This file contains the Cell PMU register description tables
|
|
+ * and pmc checker used by perfmon.c.
|
|
+ *
|
|
+ * Copyright IBM Corporation 2007
|
|
+ * (C) Copyright 2007 TOSHIBA CORPORATION
|
|
+ *
|
|
+ * Based on other Perfmon2 PMU modules.
|
|
+ * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P.
|
|
+ * Contributed by Stephane Eranian <eranian@hpl.hp.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of version 2 of the GNU General Public
|
|
+ * License as published by the Free Software Foundation.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ * General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
+ * 02111-1307 USA
|
|
+ */
|
|
+
|
|
+#include <linux/module.h>
|
|
+#include <linux/perfmon_kern.h>
|
|
+#include <linux/io.h>
|
|
+#include <asm/cell-pmu.h>
|
|
+#include <asm/cell-regs.h>
|
|
+#include <asm/machdep.h>
|
|
+#include <asm/rtas.h>
|
|
+#include <asm/ps3.h>
|
|
+#include <asm/spu.h>
|
|
+
|
|
+MODULE_AUTHOR("Kevin Corry <kevcorry@us.ibm.com>, "
|
|
+ "Carl Love <carll@us.ibm.com>");
|
|
+MODULE_DESCRIPTION("Cell PMU description table");
|
|
+MODULE_LICENSE("GPL");
|
|
+
|
|
+struct pfm_cell_platform_pmu_info {
|
|
+ u32 (*read_ctr)(u32 cpu, u32 ctr);
|
|
+ void (*write_ctr)(u32 cpu, u32 ctr, u32 val);
|
|
+ void (*write_pm07_control)(u32 cpu, u32 ctr, u32 val);
|
|
+ void (*write_pm)(u32 cpu, enum pm_reg_name reg, u32 val);
|
|
+ void (*enable_pm)(u32 cpu);
|
|
+ void (*disable_pm)(u32 cpu);
|
|
+ void (*enable_pm_interrupts)(u32 cpu, u32 thread, u32 mask);
|
|
+ u32 (*get_and_clear_pm_interrupts)(u32 cpu);
|
|
+ u32 (*get_hw_thread_id)(int cpu);
|
|
+ struct cbe_ppe_priv_regs __iomem *(*get_cpu_ppe_priv_regs)(int cpu);
|
|
+ struct cbe_pmd_regs __iomem *(*get_cpu_pmd_regs)(int cpu);
|
|
+ struct cbe_mic_tm_regs __iomem *(*get_cpu_mic_tm_regs)(int cpu);
|
|
+ int (*rtas_token)(const char *service);
|
|
+ int (*rtas_call)(int token, int param1, int param2, int *param3, ...);
|
|
+};
|
|
+
|
|
+/*
|
|
+ * Mapping from Perfmon logical control registers to Cell hardware registers.
|
|
+ */
|
|
+static struct pfm_regmap_desc pfm_cell_pmc_desc[] = {
|
|
+ /* Per-counter control registers. */
|
|
+ PMC_D(PFM_REG_I, "pm0_control", 0, 0, 0, 0),
|
|
+ PMC_D(PFM_REG_I, "pm1_control", 0, 0, 0, 0),
|
|
+ PMC_D(PFM_REG_I, "pm2_control", 0, 0, 0, 0),
|
|
+ PMC_D(PFM_REG_I, "pm3_control", 0, 0, 0, 0),
|
|
+ PMC_D(PFM_REG_I, "pm4_control", 0, 0, 0, 0),
|
|
+ PMC_D(PFM_REG_I, "pm5_control", 0, 0, 0, 0),
|
|
+ PMC_D(PFM_REG_I, "pm6_control", 0, 0, 0, 0),
|
|
+ PMC_D(PFM_REG_I, "pm7_control", 0, 0, 0, 0),
|
|
+
|
|
+ /* Per-counter RTAS arguments. Each of these registers has three fields.
|
|
+ * bits 63-48: debug-bus word
|
|
+ * bits 47-32: sub-unit
|
|
+ * bits 31-0 : full signal number
|
|
+ * (MSB = 63, LSB = 0)
|
|
+ */
|
|
+ PMC_D(PFM_REG_I, "pm0_event", 0, 0, 0, 0),
|
|
+ PMC_D(PFM_REG_I, "pm1_event", 0, 0, 0, 0),
|
|
+ PMC_D(PFM_REG_I, "pm2_event", 0, 0, 0, 0),
|
|
+ PMC_D(PFM_REG_I, "pm3_event", 0, 0, 0, 0),
|
|
+ PMC_D(PFM_REG_I, "pm4_event", 0, 0, 0, 0),
|
|
+ PMC_D(PFM_REG_I, "pm5_event", 0, 0, 0, 0),
|
|
+ PMC_D(PFM_REG_I, "pm6_event", 0, 0, 0, 0),
|
|
+ PMC_D(PFM_REG_I, "pm7_event", 0, 0, 0, 0),
|
|
+
|
|
+ /* Global control registers. Same order as enum pm_reg_name. */
|
|
+ PMC_D(PFM_REG_I, "group_control", 0, 0, 0, 0),
|
|
+ PMC_D(PFM_REG_I, "debug_bus_control", 0, 0, 0, 0),
|
|
+ PMC_D(PFM_REG_I, "trace_address", 0, 0, 0, 0),
|
|
+ PMC_D(PFM_REG_I, "ext_trace_timer", 0, 0, 0, 0),
|
|
+ PMC_D(PFM_REG_I, "pm_status", 0, 0, 0, 0),
|
|
+ /* set the interrupt overflow bit for the four 32 bit counters
|
|
+ * that is currently supported. Will need to fix when 32 and 16
|
|
+ * bit counters are supported.
|
|
+ */
|
|
+ PMC_D(PFM_REG_I, "pm_control", 0xF0000000, 0xF0000000, 0, 0),
|
|
+ PMC_D(PFM_REG_I, "pm_interval", 0, 0, 0, 0), /* FIX: Does user-space also need read access to this one? */
|
|
+ PMC_D(PFM_REG_I, "pm_start_stop", 0, 0, 0, 0),
|
|
+};
|
|
+#define PFM_PM_NUM_PMCS ARRAY_SIZE(pfm_cell_pmc_desc)
|
|
+
|
|
+#define CELL_PMC_GROUP_CONTROL 16
|
|
+#define CELL_PMC_PM_STATUS 20
|
|
+#define CELL_PMC_PM_CONTROL 21
|
|
+#define CELL_PMC_PM_CONTROL_CNTR_MASK 0x01E00000UL
|
|
+#define CELL_PMC_PM_CONTROL_CNTR_16 0x01E00000UL
|
|
+
|
|
+/*
|
|
+ * Mapping from Perfmon logical data counters to Cell hardware counters.
|
|
+ */
|
|
+static struct pfm_regmap_desc pfm_cell_pmd_desc[] = {
|
|
+ PMD_D(PFM_REG_C, "pm0", 0),
|
|
+ PMD_D(PFM_REG_C, "pm1", 0),
|
|
+ PMD_D(PFM_REG_C, "pm2", 0),
|
|
+ PMD_D(PFM_REG_C, "pm3", 0),
|
|
+ PMD_D(PFM_REG_C, "pm4", 0),
|
|
+ PMD_D(PFM_REG_C, "pm5", 0),
|
|
+ PMD_D(PFM_REG_C, "pm6", 0),
|
|
+ PMD_D(PFM_REG_C, "pm7", 0),
|
|
+};
|
|
+#define PFM_PM_NUM_PMDS ARRAY_SIZE(pfm_cell_pmd_desc)
|
|
+
|
|
+#define PFM_EVENT_PMC_BUS_WORD(x) (((x) >> 48) & 0x00ff)
|
|
+#define PFM_EVENT_PMC_FULL_SIGNAL_NUMBER(x) ((x) & 0xffffffff)
|
|
+#define PFM_EVENT_PMC_SIGNAL_GROUP(x) (((x) & 0xffffffff) / 100)
|
|
+#define PFM_PM_CTR_INPUT_MUX_BIT(pm07_control) (((pm07_control) >> 26) & 0x1f)
|
|
+#define PFM_PM_CTR_INPUT_MUX_GROUP_INDEX(pm07_control) ((pm07_control) >> 31)
|
|
+#define PFM_GROUP_CONTROL_GROUP0_WORD(grp_ctrl) ((grp_ctrl) >> 30)
|
|
+#define PFM_GROUP_CONTROL_GROUP1_WORD(grp_ctrl) (((grp_ctrl) >> 28) & 0x3)
|
|
+#define PFM_NUM_OF_GROUPS 2
|
|
+#define PFM_PPU_IU1_THREAD1_BASE_BIT 19
|
|
+#define PFM_PPU_XU_THREAD1_BASE_BIT 16
|
|
+#define PFM_COUNTER_CTRL_PMC_PPU_TH0 0x100000000ULL
|
|
+#define PFM_COUNTER_CTRL_PMC_PPU_TH1 0x200000000ULL
|
|
+
|
|
+/*
|
|
+ * Debug-bus signal handling.
|
|
+ *
|
|
+ * Some Cell systems have firmware that can handle the debug-bus signal
|
|
+ * routing. For systems without this firmware, we have a minimal in-kernel
|
|
+ * implementation as well.
|
|
+ */
|
|
+
|
|
+/* The firmware only sees physical CPUs, so divide by 2 if SMT is on. */
|
|
+#ifdef CONFIG_SCHED_SMT
|
|
+#define RTAS_CPU(cpu) ((cpu) / 2)
|
|
+#else
|
|
+#define RTAS_CPU(cpu) (cpu)
|
|
+#endif
|
|
+#define RTAS_BUS_WORD(x) (u16)(((x) >> 48) & 0x0000ffff)
|
|
+#define RTAS_SUB_UNIT(x) (u16)(((x) >> 32) & 0x0000ffff)
|
|
+#define RTAS_SIGNAL_NUMBER(x) (s32)( (x) & 0xffffffff)
|
|
+#define RTAS_SIGNAL_GROUP(x) (RTAS_SIGNAL_NUMBER(x) / 100)
|
|
+
|
|
+#define subfunc_RESET 1
|
|
+#define subfunc_ACTIVATE 2
|
|
+
|
|
+#define passthru_ENABLE 1
|
|
+#define passthru_DISABLE 2
|
|
+
|
|
+/**
|
|
+ * struct cell_rtas_arg
|
|
+ *
|
|
+ * @cpu: Processor to modify. Linux numbers CPUs based on SMT IDs, but the
|
|
+ * firmware only sees the physical CPUs. So this value should be the
|
|
+ * SMT ID (from smp_processor_id() or get_cpu()) divided by 2.
|
|
+ * @sub_unit: Hardware subunit this applies to (if applicable).
|
|
+ * @signal_group: Signal group to enable/disable on the trace bus.
|
|
+ * @bus_word: For signal groups that propagate via the trace bus, this trace
|
|
+ * bus word will be used. This is a mask of (1 << TraceBusWord).
|
|
+ * For other signal groups, this specifies the trigger or event bus.
|
|
+ * @bit: Trigger/Event bit, if applicable for the signal group.
|
|
+ *
|
|
+ * An array of these structures are passed to rtas_call() to set up the
|
|
+ * signals on the debug bus.
|
|
+ **/
|
|
+struct cell_rtas_arg {
|
|
+ u16 cpu;
|
|
+ u16 sub_unit;
|
|
+ s16 signal_group;
|
|
+ u8 bus_word;
|
|
+ u8 bit;
|
|
+};
|
|
+
|
|
+/**
|
|
+ * rtas_reset_signals
|
|
+ *
|
|
+ * Use the firmware RTAS call to disable signal pass-thru and to reset the
|
|
+ * debug-bus signals.
|
|
+ **/
|
|
+static int rtas_reset_signals(u32 cpu)
|
|
+{
|
|
+ struct cell_rtas_arg signal;
|
|
+ u64 real_addr = virt_to_phys(&signal);
|
|
+ int rc;
|
|
+ struct pfm_cell_platform_pmu_info *info =
|
|
+ ((struct pfm_arch_pmu_info *)
|
|
+ (pfm_pmu_conf->pmu_info))->platform_info;
|
|
+
|
|
+ memset(&signal, 0, sizeof(signal));
|
|
+ signal.cpu = RTAS_CPU(cpu);
|
|
+ rc = info->rtas_call(info->rtas_token("ibm,cbe-perftools"),
|
|
+ 5, 1, NULL,
|
|
+ subfunc_RESET,
|
|
+ passthru_DISABLE,
|
|
+ real_addr >> 32,
|
|
+ real_addr & 0xffffffff,
|
|
+ sizeof(signal));
|
|
+
|
|
+ return rc;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * rtas_activate_signals
|
|
+ *
|
|
+ * Use the firmware RTAS call to enable signal pass-thru and to activate the
|
|
+ * desired signal groups on the debug-bus.
|
|
+ **/
|
|
+static int rtas_activate_signals(struct cell_rtas_arg *signals,
|
|
+ int num_signals)
|
|
+{
|
|
+ u64 real_addr = virt_to_phys(signals);
|
|
+ int rc;
|
|
+ struct pfm_cell_platform_pmu_info *info =
|
|
+ ((struct pfm_arch_pmu_info *)
|
|
+ (pfm_pmu_conf->pmu_info))->platform_info;
|
|
+
|
|
+ rc = info->rtas_call(info->rtas_token("ibm,cbe-perftools"),
|
|
+ 5, 1, NULL,
|
|
+ subfunc_ACTIVATE,
|
|
+ passthru_ENABLE,
|
|
+ real_addr >> 32,
|
|
+ real_addr & 0xffffffff,
|
|
+ num_signals * sizeof(*signals));
|
|
+
|
|
+ return rc;
|
|
+}
|
|
+
|
|
+#define HID1_RESET_MASK (~0x00000001ffffffffUL)
|
|
+#define PPU_IU1_WORD0_HID1_EN_MASK (~0x00000001f0c0802cUL)
|
|
+#define PPU_IU1_WORD0_HID1_EN_WORD ( 0x00000001f0400000UL)
|
|
+#define PPU_IU1_WORD1_HID1_EN_MASK (~0x000000010fc08023UL)
|
|
+#define PPU_IU1_WORD1_HID1_EN_WORD ( 0x000000010f400001UL)
|
|
+#define PPU_XU_WORD0_HID1_EN_MASK (~0x00000001f038402cUL)
|
|
+#define PPU_XU_WORD0_HID1_EN_WORD ( 0x00000001f0080008UL)
|
|
+#define PPU_XU_WORD1_HID1_EN_MASK (~0x000000010f074023UL)
|
|
+#define PPU_XU_WORD1_HID1_EN_WORD ( 0x000000010f030002UL)
|
|
+
|
|
+/* The bus_word field in the cell_rtas_arg structure is a bit-mask
|
|
+ * indicating which debug-bus word(s) to use.
|
|
+ */
|
|
+enum {
|
|
+ BUS_WORD_0 = 1,
|
|
+ BUS_WORD_1 = 2,
|
|
+ BUS_WORD_2 = 4,
|
|
+ BUS_WORD_3 = 8,
|
|
+};
|
|
+
|
|
+/* Definitions of the signal-groups that the built-in signal-activation
|
|
+ * code can handle.
|
|
+ */
|
|
+enum {
|
|
+ SIG_GROUP_NONE = 0,
|
|
+
|
|
+ /* 2.x PowerPC Processor Unit (PPU) Signal Groups */
|
|
+ SIG_GROUP_PPU_BASE = 20,
|
|
+ SIG_GROUP_PPU_IU1 = 21,
|
|
+ SIG_GROUP_PPU_XU = 22,
|
|
+
|
|
+ /* 3.x PowerPC Storage Subsystem (PPSS) Signal Groups */
|
|
+ SIG_GROUP_PPSS_BASE = 30,
|
|
+
|
|
+ /* 4.x Synergistic Processor Unit (SPU) Signal Groups */
|
|
+ SIG_GROUP_SPU_BASE = 40,
|
|
+
|
|
+ /* 5.x Memory Flow Controller (MFC) Signal Groups */
|
|
+ SIG_GROUP_MFC_BASE = 50,
|
|
+
|
|
+ /* 6.x Element )nterconnect Bus (EIB) Signal Groups */
|
|
+ SIG_GROUP_EIB_BASE = 60,
|
|
+
|
|
+ /* 7.x Memory Interface Controller (MIC) Signal Groups */
|
|
+ SIG_GROUP_MIC_BASE = 70,
|
|
+
|
|
+ /* 8.x Cell Broadband Engine Interface (BEI) Signal Groups */
|
|
+ SIG_GROUP_BEI_BASE = 80,
|
|
+};
|
|
+
|
|
+/**
|
|
+ * rmw_spr
|
|
+ *
|
|
+ * Read-modify-write for a special-purpose-register.
|
|
+ **/
|
|
+#define rmw_spr(spr_id, a_mask, o_mask) \
|
|
+ do { \
|
|
+ u64 value = mfspr(spr_id); \
|
|
+ value &= (u64)(a_mask); \
|
|
+ value |= (u64)(o_mask); \
|
|
+ mtspr((spr_id), value); \
|
|
+ } while (0)
|
|
+
|
|
+/**
|
|
+ * rmw_mmio_reg64
|
|
+ *
|
|
+ * Read-modify-write for a 64-bit MMIO register.
|
|
+ **/
|
|
+#define rmw_mmio_reg64(mem, a_mask, o_mask) \
|
|
+ do { \
|
|
+ u64 value = in_be64(&(mem)); \
|
|
+ value &= (u64)(a_mask); \
|
|
+ value |= (u64)(o_mask); \
|
|
+ out_be64(&(mem), value); \
|
|
+ } while (0)
|
|
+
|
|
+/**
|
|
+ * rmwb_mmio_reg64
|
|
+ *
|
|
+ * Set or unset a specified bit within a 64-bit MMIO register.
|
|
+ **/
|
|
+#define rmwb_mmio_reg64(mem, bit_num, set_bit) \
|
|
+ rmw_mmio_reg64((mem), ~(1UL << (63 - (bit_num))), \
|
|
+ ((set_bit) << (63 - (bit_num))))
|
|
+
|
|
+/**
|
|
+ * passthru
|
|
+ *
|
|
+ * Enable or disable passthru mode in all the Cell signal islands.
|
|
+ **/
|
|
+static int passthru(u32 cpu, u64 enable)
|
|
+{
|
|
+ struct cbe_ppe_priv_regs __iomem *ppe_priv_regs;
|
|
+ struct cbe_pmd_regs __iomem *pmd_regs;
|
|
+ struct cbe_mic_tm_regs __iomem *mic_tm_regs;
|
|
+ struct pfm_cell_platform_pmu_info *info =
|
|
+ ((struct pfm_arch_pmu_info *)
|
|
+ (pfm_pmu_conf->pmu_info))->platform_info;
|
|
+
|
|
+ ppe_priv_regs = info->get_cpu_ppe_priv_regs(cpu);
|
|
+ pmd_regs = info->get_cpu_pmd_regs(cpu);
|
|
+ mic_tm_regs = info->get_cpu_mic_tm_regs(cpu);
|
|
+
|
|
+ if (!ppe_priv_regs || !pmd_regs || !mic_tm_regs) {
|
|
+ PFM_ERR("Error getting Cell PPE, PMD, and MIC "
|
|
+ "register maps: 0x%p, 0x%p, 0x%p",
|
|
+ ppe_priv_regs, pmd_regs, mic_tm_regs);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ rmwb_mmio_reg64(ppe_priv_regs->L2_debug1, 61, enable);
|
|
+ rmwb_mmio_reg64(ppe_priv_regs->ciu_dr1, 5, enable);
|
|
+ rmwb_mmio_reg64(pmd_regs->on_ramp_trace, 39, enable);
|
|
+ rmwb_mmio_reg64(mic_tm_regs->MBL_debug, 20, enable);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+#define passthru_enable(cpu) passthru(cpu, 1)
|
|
+#define passthru_disable(cpu) passthru(cpu, 0)
|
|
+
|
|
+static inline void reset_signal_registers(u32 cpu)
|
|
+{
|
|
+ rmw_spr(SPRN_HID1, HID1_RESET_MASK, 0);
|
|
+}
|
|
+
|
|
+/**
|
|
+ * celleb_reset_signals
|
|
+ *
|
|
+ * Non-rtas version of resetting the debug-bus signals.
|
|
+ **/
|
|
+static int celleb_reset_signals(u32 cpu)
|
|
+{
|
|
+ int rc;
|
|
+ rc = passthru_disable(cpu);
|
|
+ if (!rc)
|
|
+ reset_signal_registers(cpu);
|
|
+ return rc;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * ppu_selection
|
|
+ *
|
|
+ * Write the HID1 register to connect the specified PPU signal-group to the
|
|
+ * debug-bus.
|
|
+ **/
|
|
+static int ppu_selection(struct cell_rtas_arg *signal)
|
|
+{
|
|
+ u64 hid1_enable_word = 0;
|
|
+ u64 hid1_enable_mask = 0;
|
|
+
|
|
+ switch (signal->signal_group) {
|
|
+
|
|
+ case SIG_GROUP_PPU_IU1: /* 2.1 PPU Instruction Unit - Group 1 */
|
|
+ switch (signal->bus_word) {
|
|
+ case BUS_WORD_0:
|
|
+ hid1_enable_mask = PPU_IU1_WORD0_HID1_EN_MASK;
|
|
+ hid1_enable_word = PPU_IU1_WORD0_HID1_EN_WORD;
|
|
+ break;
|
|
+ case BUS_WORD_1:
|
|
+ hid1_enable_mask = PPU_IU1_WORD1_HID1_EN_MASK;
|
|
+ hid1_enable_word = PPU_IU1_WORD1_HID1_EN_WORD;
|
|
+ break;
|
|
+ default:
|
|
+ PFM_ERR("Invalid bus-word (0x%x) for signal-group %d.",
|
|
+ signal->bus_word, signal->signal_group);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+ break;
|
|
+
|
|
+ case SIG_GROUP_PPU_XU: /* 2.2 PPU Execution Unit */
|
|
+ switch (signal->bus_word) {
|
|
+ case BUS_WORD_0:
|
|
+ hid1_enable_mask = PPU_XU_WORD0_HID1_EN_MASK;
|
|
+ hid1_enable_word = PPU_XU_WORD0_HID1_EN_WORD;
|
|
+ break;
|
|
+ case BUS_WORD_1:
|
|
+ hid1_enable_mask = PPU_XU_WORD1_HID1_EN_MASK;
|
|
+ hid1_enable_word = PPU_XU_WORD1_HID1_EN_WORD;
|
|
+ break;
|
|
+ default:
|
|
+ PFM_ERR("Invalid bus-word (0x%x) for signal-group %d.",
|
|
+ signal->bus_word, signal->signal_group);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+ break;
|
|
+
|
|
+ default:
|
|
+ PFM_ERR("Signal-group %d not implemented.",
|
|
+ signal->signal_group);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ rmw_spr(SPRN_HID1, hid1_enable_mask, hid1_enable_word);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * celleb_activate_signals
|
|
+ *
|
|
+ * Non-rtas version of activating the debug-bus signals.
|
|
+ **/
|
|
+static int celleb_activate_signals(struct cell_rtas_arg *signals,
|
|
+ int num_signals)
|
|
+{
|
|
+ int i, rc = -EINVAL;
|
|
+
|
|
+ for (i = 0; i < num_signals; i++) {
|
|
+ switch (signals[i].signal_group) {
|
|
+
|
|
+ /* 2.x PowerPC Processor Unit (PPU) Signal Selection */
|
|
+ case SIG_GROUP_PPU_IU1:
|
|
+ case SIG_GROUP_PPU_XU:
|
|
+ rc = ppu_selection(signals + i);
|
|
+ if (rc)
|
|
+ return rc;
|
|
+ break;
|
|
+
|
|
+ default:
|
|
+ PFM_ERR("Signal-group %d not implemented.",
|
|
+ signals[i].signal_group);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if (0 < i)
|
|
+ rc = passthru_enable(signals[0].cpu);
|
|
+
|
|
+ return rc;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * ps3_reset_signals
|
|
+ *
|
|
+ * ps3 version of resetting the debug-bus signals.
|
|
+ **/
|
|
+static int ps3_reset_signals(u32 cpu)
|
|
+{
|
|
+#ifdef CONFIG_PPC_PS3
|
|
+ return ps3_set_signal(0, 0, 0, 0);
|
|
+#else
|
|
+ return 0;
|
|
+#endif
|
|
+}
|
|
+
|
|
+/**
|
|
+ * ps3_activate_signals
|
|
+ *
|
|
+ * ps3 version of activating the debug-bus signals.
|
|
+ **/
|
|
+static int ps3_activate_signals(struct cell_rtas_arg *signals,
|
|
+ int num_signals)
|
|
+{
|
|
+#ifdef CONFIG_PPC_PS3
|
|
+ int i;
|
|
+
|
|
+ for (i = 0; i < num_signals; i++)
|
|
+ ps3_set_signal(signals[i].signal_group, signals[i].bit,
|
|
+ signals[i].sub_unit, signals[i].bus_word);
|
|
+#endif
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
+/**
|
|
+ * reset_signals
|
|
+ *
|
|
+ * Call to the firmware (if available) to reset the debug-bus signals.
|
|
+ * Otherwise call the built-in version.
|
|
+ **/
|
|
+int reset_signals(u32 cpu)
|
|
+{
|
|
+ int rc;
|
|
+
|
|
+ if (machine_is(celleb))
|
|
+ rc = celleb_reset_signals(cpu);
|
|
+ else if (machine_is(ps3))
|
|
+ rc = ps3_reset_signals(cpu);
|
|
+ else
|
|
+ rc = rtas_reset_signals(cpu);
|
|
+
|
|
+ return rc;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * activate_signals
|
|
+ *
|
|
+ * Call to the firmware (if available) to activate the debug-bus signals.
|
|
+ * Otherwise call the built-in version.
|
|
+ **/
|
|
+int activate_signals(struct cell_rtas_arg *signals, int num_signals)
|
|
+{
|
|
+ int rc;
|
|
+
|
|
+ if (machine_is(celleb))
|
|
+ rc = celleb_activate_signals(signals, num_signals);
|
|
+ else if (machine_is(ps3))
|
|
+ rc = ps3_activate_signals(signals, num_signals);
|
|
+ else
|
|
+ rc = rtas_activate_signals(signals, num_signals);
|
|
+
|
|
+ return rc;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_cell_pmc_check
|
|
+ *
|
|
+ * Verify that we are going to write a valid value to the specified PMC.
|
|
+ **/
|
|
+int pfm_cell_pmc_check(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set,
|
|
+ struct pfarg_pmc *req)
|
|
+{
|
|
+ u16 cnum, reg_num = req->reg_num;
|
|
+ s16 signal_group = RTAS_SIGNAL_GROUP(req->reg_value);
|
|
+ u8 bus_word = RTAS_BUS_WORD(req->reg_value);
|
|
+
|
|
+ if (reg_num < NR_CTRS || reg_num >= (NR_CTRS * 2))
|
|
+ return -EINVAL;
|
|
+
|
|
+ switch (signal_group) {
|
|
+ case SIG_GROUP_PPU_IU1:
|
|
+ case SIG_GROUP_PPU_XU:
|
|
+ if ((bus_word != 0) && (bus_word != 1)) {
|
|
+ PFM_ERR("Invalid bus word (%d) for signal-group %d",
|
|
+ bus_word, signal_group);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+ break;
|
|
+ default:
|
|
+ PFM_ERR("Signal-group %d not implemented.", signal_group);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ for (cnum = NR_CTRS; cnum < (NR_CTRS * 2); cnum++) {
|
|
+ if (test_bit(cnum, cast_ulp(set->used_pmcs)) &&
|
|
+ bus_word == RTAS_BUS_WORD(set->pmcs[cnum]) &&
|
|
+ signal_group != RTAS_SIGNAL_GROUP(set->pmcs[cnum])) {
|
|
+ PFM_ERR("Impossible signal-group combination: "
|
|
+ "(%u,%u,%d) (%u,%u,%d)",
|
|
+ reg_num, bus_word, signal_group, cnum,
|
|
+ RTAS_BUS_WORD(set->pmcs[cnum]),
|
|
+ RTAS_SIGNAL_GROUP(set->pmcs[cnum]));
|
|
+ return -EBUSY;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * write_pm07_event
|
|
+ *
|
|
+ * Pull out the RTAS arguments from the 64-bit register value and make the
|
|
+ * RTAS activate-signals call.
|
|
+ **/
|
|
+static void write_pm07_event(int cpu, unsigned int ctr, u64 value)
|
|
+{
|
|
+ struct cell_rtas_arg signal;
|
|
+ s32 signal_number;
|
|
+ int rc;
|
|
+
|
|
+ signal_number = RTAS_SIGNAL_NUMBER(value);
|
|
+ if (!signal_number) {
|
|
+ /* Don't include counters that are counting cycles. */
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ signal.cpu = RTAS_CPU(cpu);
|
|
+ signal.bus_word = 1 << RTAS_BUS_WORD(value);
|
|
+ signal.sub_unit = RTAS_SUB_UNIT(value);
|
|
+ signal.signal_group = signal_number / 100;
|
|
+ signal.bit = abs(signal_number) % 100;
|
|
+
|
|
+ rc = activate_signals(&signal, 1);
|
|
+ if (rc) {
|
|
+ PFM_WARN("%s(%d, %u, %lu): Error calling "
|
|
+ "activate_signals(): %d\n", __func__,
|
|
+ cpu, ctr, (unsigned long)value, rc);
|
|
+ /* FIX: Could we change this routine to return an error? */
|
|
+ }
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_cell_probe_pmu
|
|
+ *
|
|
+ * Simply check the processor version register to see if we're currently
|
|
+ * on a Cell system.
|
|
+ **/
|
|
+static int pfm_cell_probe_pmu(void)
|
|
+{
|
|
+ unsigned long pvr = mfspr(SPRN_PVR);
|
|
+
|
|
+ if (PVR_VER(pvr) != PV_BE)
|
|
+ return -1;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_cell_write_pmc
|
|
+ **/
|
|
+static void pfm_cell_write_pmc(unsigned int cnum, u64 value)
|
|
+{
|
|
+ int cpu = smp_processor_id();
|
|
+ struct pfm_cell_platform_pmu_info *info =
|
|
+ ((struct pfm_arch_pmu_info *)
|
|
+ (pfm_pmu_conf->pmu_info))->platform_info;
|
|
+
|
|
+ if (cnum < NR_CTRS) {
|
|
+ info->write_pm07_control(cpu, cnum, value);
|
|
+
|
|
+ } else if (cnum < NR_CTRS * 2) {
|
|
+ write_pm07_event(cpu, cnum - NR_CTRS, value);
|
|
+
|
|
+ } else if (cnum == CELL_PMC_PM_STATUS) {
|
|
+ /* The pm_status register must be treated separately from
|
|
+ * the other "global" PMCs. This call will ensure that
|
|
+ * the interrupts are routed to the correct CPU, as well
|
|
+ * as writing the desired value to the pm_status register.
|
|
+ */
|
|
+ info->enable_pm_interrupts(cpu, info->get_hw_thread_id(cpu),
|
|
+ value);
|
|
+
|
|
+ } else if (cnum < PFM_PM_NUM_PMCS) {
|
|
+ info->write_pm(cpu, cnum - (NR_CTRS * 2), value);
|
|
+ }
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_cell_write_pmd
|
|
+ **/
|
|
+static void pfm_cell_write_pmd(unsigned int cnum, u64 value)
|
|
+{
|
|
+ int cpu = smp_processor_id();
|
|
+ struct pfm_cell_platform_pmu_info *info =
|
|
+ ((struct pfm_arch_pmu_info *)
|
|
+ (pfm_pmu_conf->pmu_info))->platform_info;
|
|
+
|
|
+ if (cnum < NR_CTRS)
|
|
+ info->write_ctr(cpu, cnum, value);
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_cell_read_pmd
|
|
+ **/
|
|
+static u64 pfm_cell_read_pmd(unsigned int cnum)
|
|
+{
|
|
+ int cpu = smp_processor_id();
|
|
+ struct pfm_cell_platform_pmu_info *info =
|
|
+ ((struct pfm_arch_pmu_info *)
|
|
+ (pfm_pmu_conf->pmu_info))->platform_info;
|
|
+
|
|
+ if (cnum < NR_CTRS)
|
|
+ return info->read_ctr(cpu, cnum);
|
|
+
|
|
+ return -EINVAL;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_cell_enable_counters
|
|
+ *
|
|
+ * Just need to turn on the global disable bit in pm_control.
|
|
+ **/
|
|
+static void pfm_cell_enable_counters(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set)
|
|
+{
|
|
+ struct pfm_cell_platform_pmu_info *info =
|
|
+ ((struct pfm_arch_pmu_info *)
|
|
+ (pfm_pmu_conf->pmu_info))->platform_info;
|
|
+
|
|
+ info->enable_pm(smp_processor_id());
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_cell_disable_counters
|
|
+ *
|
|
+ * Just need to turn off the global disable bit in pm_control.
|
|
+ **/
|
|
+static void pfm_cell_disable_counters(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set)
|
|
+{
|
|
+ struct pfm_cell_platform_pmu_info *info =
|
|
+ ((struct pfm_arch_pmu_info *)
|
|
+ (pfm_pmu_conf->pmu_info))->platform_info;
|
|
+
|
|
+ info->disable_pm(smp_processor_id());
|
|
+ if (machine_is(ps3))
|
|
+ reset_signals(smp_processor_id());
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Return the thread id of the specified ppu signal.
|
|
+ */
|
|
+static inline u32 get_target_ppu_thread_id(u32 group, u32 bit)
|
|
+{
|
|
+ if ((group == SIG_GROUP_PPU_IU1 &&
|
|
+ bit < PFM_PPU_IU1_THREAD1_BASE_BIT) ||
|
|
+ (group == SIG_GROUP_PPU_XU &&
|
|
+ bit < PFM_PPU_XU_THREAD1_BASE_BIT))
|
|
+ return 0;
|
|
+ else
|
|
+ return 1;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Return whether the specified counter is for PPU signal group.
|
|
+ */
|
|
+static inline int is_counter_for_ppu_sig_grp(u32 counter_control, u32 sig_grp)
|
|
+{
|
|
+ if (!(counter_control & CBE_PM_CTR_INPUT_CONTROL) &&
|
|
+ (counter_control & CBE_PM_CTR_ENABLE) &&
|
|
+ ((sig_grp == SIG_GROUP_PPU_IU1) || (sig_grp == SIG_GROUP_PPU_XU)))
|
|
+ return 1;
|
|
+ else
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Search ppu signal groups.
|
|
+ */
|
|
+static int get_ppu_signal_groups(struct pfm_event_set *set,
|
|
+ u32 *ppu_sig_grp0, u32 *ppu_sig_grp1)
|
|
+{
|
|
+ u64 pm_event, *used_pmcs = set->used_pmcs;
|
|
+ int i, j;
|
|
+ u32 grp0_wd, grp1_wd, wd, sig_grp;
|
|
+
|
|
+ *ppu_sig_grp0 = 0;
|
|
+ *ppu_sig_grp1 = 0;
|
|
+ grp0_wd = PFM_GROUP_CONTROL_GROUP0_WORD(
|
|
+ set->pmcs[CELL_PMC_GROUP_CONTROL]);
|
|
+ grp1_wd = PFM_GROUP_CONTROL_GROUP1_WORD(
|
|
+ set->pmcs[CELL_PMC_GROUP_CONTROL]);
|
|
+
|
|
+ for (i = 0, j = 0; (i < NR_CTRS) && (j < PFM_NUM_OF_GROUPS); i++) {
|
|
+ if (test_bit(i + NR_CTRS, used_pmcs)) {
|
|
+ pm_event = set->pmcs[i + NR_CTRS];
|
|
+ wd = PFM_EVENT_PMC_BUS_WORD(pm_event);
|
|
+ sig_grp = PFM_EVENT_PMC_SIGNAL_GROUP(pm_event);
|
|
+ if ((sig_grp == SIG_GROUP_PPU_IU1) ||
|
|
+ (sig_grp == SIG_GROUP_PPU_XU)) {
|
|
+
|
|
+ if (wd == grp0_wd && *ppu_sig_grp0 == 0) {
|
|
+ *ppu_sig_grp0 = sig_grp;
|
|
+ j++;
|
|
+ } else if (wd == grp1_wd &&
|
|
+ *ppu_sig_grp1 == 0) {
|
|
+ *ppu_sig_grp1 = sig_grp;
|
|
+ j++;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ return j;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_cell_restore_pmcs
|
|
+ *
|
|
+ * Write all control register values that are saved in the specified event
|
|
+ * set. We could use the pfm_arch_write_pmc() function to restore each PMC
|
|
+ * individually (as is done in other architectures), but that results in
|
|
+ * multiple RTAS calls. As an optimization, we will setup the RTAS argument
|
|
+ * array so we can do all event-control registers in one RTAS call.
|
|
+ *
|
|
+ * In per-thread mode,
|
|
+ * The counter enable bit of the pmX_control PMC is enabled while the target
|
|
+ * task runs on the target HW thread.
|
|
+ **/
|
|
+void pfm_cell_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set)
|
|
+{
|
|
+ u64 ctr_ctrl;
|
|
+ u64 *used_pmcs = set->used_pmcs;
|
|
+ int i;
|
|
+ int cpu = smp_processor_id();
|
|
+ u32 current_th_id;
|
|
+ struct pfm_cell_platform_pmu_info *info =
|
|
+ ((struct pfm_arch_pmu_info *)
|
|
+ (pfm_pmu_conf->pmu_info))->platform_info;
|
|
+
|
|
+ for (i = 0; i < NR_CTRS; i++) {
|
|
+ ctr_ctrl = set->pmcs[i];
|
|
+
|
|
+ if (ctr_ctrl & PFM_COUNTER_CTRL_PMC_PPU_TH0) {
|
|
+ current_th_id = info->get_hw_thread_id(cpu);
|
|
+
|
|
+ /*
|
|
+ * Set the counter enable bit down if the current
|
|
+ * HW thread is NOT 0
|
|
+ **/
|
|
+ if (current_th_id)
|
|
+ ctr_ctrl = ctr_ctrl & ~CBE_PM_CTR_ENABLE;
|
|
+
|
|
+ } else if (ctr_ctrl & PFM_COUNTER_CTRL_PMC_PPU_TH1) {
|
|
+ current_th_id = info->get_hw_thread_id(cpu);
|
|
+
|
|
+ /*
|
|
+ * Set the counter enable bit down if the current
|
|
+ * HW thread is 0
|
|
+ **/
|
|
+ if (!current_th_id)
|
|
+ ctr_ctrl = ctr_ctrl & ~CBE_PM_CTR_ENABLE;
|
|
+ }
|
|
+
|
|
+ /* Write the per-counter control register. If the PMC is not
|
|
+ * in use, then it will simply clear the register, which will
|
|
+ * disable the associated counter.
|
|
+ */
|
|
+ info->write_pm07_control(cpu, i, ctr_ctrl);
|
|
+
|
|
+ if (test_bit(i + NR_CTRS, used_pmcs))
|
|
+ write_pm07_event(cpu, 0, set->pmcs[i + NR_CTRS]);
|
|
+ }
|
|
+
|
|
+ /* Write all the global PMCs. Need to call pfm_cell_write_pmc()
|
|
+ * instead of cbe_write_pm() due to special handling for the
|
|
+ * pm_status register.
|
|
+ */
|
|
+ for (i *= 2; i < PFM_PM_NUM_PMCS; i++)
|
|
+ pfm_cell_write_pmc(i, set->pmcs[i]);
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_cell_restore_pmds
|
|
+ *
|
|
+ * Write to pm_control register before writing to counter registers
|
|
+ * so that we can decide the counter width berfore writing to the couters.
|
|
+ **/
|
|
+void pfm_cell_restore_pmds(struct pfm_context *ctx, struct pfm_event_set *set)
|
|
+{
|
|
+ u64 *used_pmds;
|
|
+ unsigned int i, max_pmd;
|
|
+ int cpu = smp_processor_id();
|
|
+ struct pfm_cell_platform_pmu_info *info =
|
|
+ ((struct pfm_arch_pmu_info *)
|
|
+ (pfm_pmu_conf->pmu_info))->platform_info;
|
|
+
|
|
+ /*
|
|
+ * Write pm_control register value
|
|
+ */
|
|
+ info->write_pm(cpu, pm_control,
|
|
+ set->pmcs[CELL_PMC_PM_CONTROL] &
|
|
+ ~CBE_PM_ENABLE_PERF_MON);
|
|
+ PFM_DBG("restore pm_control(0x%lx) before restoring pmds",
|
|
+ set->pmcs[CELL_PMC_PM_CONTROL]);
|
|
+
|
|
+ max_pmd = ctx->regs.max_pmd;
|
|
+ used_pmds = set->used_pmds;
|
|
+
|
|
+ for (i = 0; i < max_pmd; i++)
|
|
+ if (test_bit(i, used_pmds) &&
|
|
+ !(pfm_pmu_conf->pmd_desc[i].type & PFM_REG_RO))
|
|
+ pfm_cell_write_pmd(i, set->pmds[i].value);
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_cell_get_cntr_width
|
|
+ *
|
|
+ * This function check the 16bit counter field in pm_control pmc.
|
|
+ *
|
|
+ * Return value
|
|
+ * 16 : all counters are 16bit width.
|
|
+ * 32 : all counters are 32bit width.
|
|
+ * 0 : several counter width exists.
|
|
+ **/
|
|
+static int pfm_cell_get_cntr_width(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *s)
|
|
+{
|
|
+ int width = 0;
|
|
+ int tmp = 0;
|
|
+ u64 cntr_field;
|
|
+
|
|
+ if (ctx->flags.switch_ovfl || ctx->flags.switch_time) {
|
|
+ list_for_each_entry(s, &ctx->set_list, list) {
|
|
+ cntr_field = s->pmcs[CELL_PMC_PM_CONTROL] &
|
|
+ CELL_PMC_PM_CONTROL_CNTR_MASK;
|
|
+
|
|
+ if (cntr_field == CELL_PMC_PM_CONTROL_CNTR_16)
|
|
+ tmp = 16;
|
|
+ else if (cntr_field == 0x0)
|
|
+ tmp = 32;
|
|
+ else
|
|
+ return 0;
|
|
+
|
|
+ if (tmp != width && width != 0)
|
|
+ return 0;
|
|
+
|
|
+ width = tmp;
|
|
+ }
|
|
+ } else {
|
|
+ cntr_field = s->pmcs[CELL_PMC_PM_CONTROL] &
|
|
+ CELL_PMC_PM_CONTROL_CNTR_MASK;
|
|
+
|
|
+ if (cntr_field == CELL_PMC_PM_CONTROL_CNTR_16)
|
|
+ width = 16;
|
|
+ else if (cntr_field == 0x0)
|
|
+ width = 32;
|
|
+ else
|
|
+ width = 0;
|
|
+ }
|
|
+ return width;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_cell_check_cntr_ovfl_mask
|
|
+ *
|
|
+ * Return value
|
|
+ * 1 : cntr_ovfl interrupt is used.
|
|
+ * 0 : cntr_ovfl interrupt is not used.
|
|
+ **/
|
|
+static int pfm_cell_check_cntr_ovfl(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *s)
|
|
+{
|
|
+ if (ctx->flags.switch_ovfl || ctx->flags.switch_time) {
|
|
+ list_for_each_entry(s, &ctx->set_list, list) {
|
|
+ if (CBE_PM_OVERFLOW_CTRS(s->pmcs[CELL_PMC_PM_STATUS]))
|
|
+ return 1;
|
|
+ }
|
|
+ } else {
|
|
+ if (CBE_PM_OVERFLOW_CTRS(s->pmcs[CELL_PMC_PM_STATUS]))
|
|
+ return 1;
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+#ifdef CONFIG_PPC_PS3
|
|
+/**
|
|
+ * update_sub_unit_field
|
|
+ *
|
|
+ **/
|
|
+static inline u64 update_sub_unit_field(u64 pm_event, u64 spe_id)
|
|
+{
|
|
+ return ((pm_event & 0xFFFF0000FFFFFFFF) | (spe_id << 32));
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_get_spe_id
|
|
+ *
|
|
+ **/
|
|
+static u64 pfm_get_spe_id(void *arg)
|
|
+{
|
|
+ struct spu *spu = arg;
|
|
+ u64 spe_id;
|
|
+
|
|
+ if (machine_is(ps3))
|
|
+ spe_id = ps3_get_spe_id(arg);
|
|
+ else
|
|
+ spe_id = spu->spe_id;
|
|
+
|
|
+ return spe_id;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_spu_number_to_id
|
|
+ *
|
|
+ **/
|
|
+static int pfm_spu_number_to_id(int number, u64 *spe_id)
|
|
+{
|
|
+ struct spu *spu;
|
|
+ int i;
|
|
+
|
|
+ for (i = 0; i < MAX_NUMNODES; i++) {
|
|
+ if (cbe_spu_info[i].n_spus == 0)
|
|
+ continue;
|
|
+
|
|
+ list_for_each_entry(spu, &cbe_spu_info[i].spus, cbe_list)
|
|
+ if (spu->number == number) {
|
|
+ *spe_id = pfm_get_spe_id(spu);
|
|
+ return 0;
|
|
+ }
|
|
+ }
|
|
+ return -ENODEV;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_update_pmX_event_subunit_field
|
|
+ *
|
|
+ * In system wide mode,
|
|
+ * This function updates the subunit field of SPE pmX_event.
|
|
+ **/
|
|
+static int pfm_update_pmX_event_subunit_field(struct pfm_context *ctx)
|
|
+{
|
|
+ struct pfm_event_set *set;
|
|
+ int i, last_pmc, ret;
|
|
+ u64 signal_group, spe_id;
|
|
+ int sub_unit;
|
|
+ u64 *used_pmcs;
|
|
+
|
|
+ last_pmc = NR_CTRS + 8;
|
|
+ ret = 0;
|
|
+ list_for_each_entry(set, &ctx->set_list, list) {
|
|
+
|
|
+ used_pmcs = set->used_pmcs;
|
|
+ for (i = NR_CTRS; i < last_pmc; i++) {
|
|
+ if (!test_bit(i, used_pmcs))
|
|
+ continue;
|
|
+
|
|
+ signal_group = PFM_EVENT_PMC_SIGNAL_GROUP(set->pmcs[i]);
|
|
+
|
|
+ /*
|
|
+ * If the target event is a SPE signal group event,
|
|
+ * The sub_unit field in pmX_event pmc is changed to the
|
|
+ * specified spe_id.
|
|
+ */
|
|
+ if (SIG_GROUP_SPU_BASE < signal_group &&
|
|
+ signal_group < SIG_GROUP_EIB_BASE) {
|
|
+ sub_unit = RTAS_SUB_UNIT(set->pmcs[i]);
|
|
+
|
|
+ ret = pfm_spu_number_to_id(sub_unit, &spe_id);
|
|
+ if (ret)
|
|
+ return ret;
|
|
+
|
|
+ set->pmcs[i] = update_sub_unit_field(
|
|
+ set->pmcs[i], spe_id);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+#endif
|
|
+
|
|
+/**
|
|
+ * pfm_cell_load_context
|
|
+ *
|
|
+ * In per-thread mode,
|
|
+ * The pmX_control PMCs which are used for PPU IU/XU event are marked with
|
|
+ * the thread id(PFM_COUNTER_CTRL_PMC_PPU_TH0/TH1).
|
|
+ **/
|
|
+static int pfm_cell_load_context(struct pfm_context *ctx)
|
|
+{
|
|
+ int i;
|
|
+ u32 ppu_sig_grp[PFM_NUM_OF_GROUPS] = {SIG_GROUP_NONE, SIG_GROUP_NONE};
|
|
+ u32 bit;
|
|
+ int index;
|
|
+ u32 target_th_id;
|
|
+ int ppu_sig_num = 0;
|
|
+ struct pfm_event_set *s;
|
|
+ int cntr_width = 32;
|
|
+ int ret = 0;
|
|
+
|
|
+ if (pfm_cell_check_cntr_ovfl(ctx, ctx->active_set)) {
|
|
+ cntr_width = pfm_cell_get_cntr_width(ctx, ctx->active_set);
|
|
+
|
|
+ /*
|
|
+ * Counter overflow interrupt works with only 32bit counter,
|
|
+ * because perfmon core uses pfm_cell_pmu_conf.counter_width
|
|
+ * to deal with the counter overflow. we can't change the
|
|
+ * counter width here.
|
|
+ */
|
|
+ if (cntr_width != 32)
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ if (ctx->flags.system) {
|
|
+#ifdef CONFIG_PPC_PS3
|
|
+ if (machine_is(ps3))
|
|
+ ret = pfm_update_pmX_event_subunit_field(ctx);
|
|
+#endif
|
|
+ return ret;
|
|
+ }
|
|
+
|
|
+ list_for_each_entry(s, &ctx->set_list, list) {
|
|
+ ppu_sig_num = get_ppu_signal_groups(s, &ppu_sig_grp[0],
|
|
+ &ppu_sig_grp[1]);
|
|
+
|
|
+ for (i = 0; i < NR_CTRS; i++) {
|
|
+ index = PFM_PM_CTR_INPUT_MUX_GROUP_INDEX(s->pmcs[i]);
|
|
+ if (ppu_sig_num &&
|
|
+ (ppu_sig_grp[index] != SIG_GROUP_NONE) &&
|
|
+ is_counter_for_ppu_sig_grp(s->pmcs[i],
|
|
+ ppu_sig_grp[index])) {
|
|
+
|
|
+ bit = PFM_PM_CTR_INPUT_MUX_BIT(s->pmcs[i]);
|
|
+ target_th_id = get_target_ppu_thread_id(
|
|
+ ppu_sig_grp[index], bit);
|
|
+ if (!target_th_id)
|
|
+ s->pmcs[i] |=
|
|
+ PFM_COUNTER_CTRL_PMC_PPU_TH0;
|
|
+ else
|
|
+ s->pmcs[i] |=
|
|
+ PFM_COUNTER_CTRL_PMC_PPU_TH1;
|
|
+ PFM_DBG("set:%d mark ctr:%d target_thread:%d",
|
|
+ s->id, i, target_th_id);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_cell_unload_context
|
|
+ *
|
|
+ * For system-wide contexts and self-monitored contexts, make the RTAS call
|
|
+ * to reset the debug-bus signals.
|
|
+ *
|
|
+ * For non-self-monitored contexts, the monitored thread will already have
|
|
+ * been taken off the CPU and we don't need to do anything additional.
|
|
+ **/
|
|
+static void pfm_cell_unload_context(struct pfm_context *ctx)
|
|
+{
|
|
+ if (ctx->task == current || ctx->flags.system)
|
|
+ reset_signals(smp_processor_id());
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_cell_ctxswout_thread
|
|
+ *
|
|
+ * When a monitored thread is switched out (self-monitored or externally
|
|
+ * monitored) we need to reset the debug-bus signals so the next context that
|
|
+ * gets switched in can start from a clean set of signals.
|
|
+ **/
|
|
+int pfm_cell_ctxswout_thread(struct task_struct *task,
|
|
+ struct pfm_context *ctx, struct pfm_event_set *set)
|
|
+{
|
|
+ reset_signals(smp_processor_id());
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_cell_get_ovfl_pmds
|
|
+ *
|
|
+ * Determine which counters in this set have overflowed and fill in the
|
|
+ * set->povfl_pmds mask and set->npend_ovfls count. On Cell, the pm_status
|
|
+ * register contains a bit for each counter to indicate overflow. However,
|
|
+ * those 8 bits are in the reverse order than what Perfmon2 is expecting,
|
|
+ * so we need to reverse the order of the overflow bits.
|
|
+ **/
|
|
+static void pfm_cell_get_ovfl_pmds(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set)
|
|
+{
|
|
+ struct pfm_arch_context *ctx_arch = pfm_ctx_arch(ctx);
|
|
+ u32 pm_status, ovfl_ctrs;
|
|
+ u64 povfl_pmds = 0;
|
|
+ int i;
|
|
+ struct pfm_cell_platform_pmu_info *info =
|
|
+ ((struct pfm_arch_pmu_info *)
|
|
+ (pfm_pmu_conf->pmu_info))->platform_info;
|
|
+
|
|
+ if (!ctx_arch->last_read_updated)
|
|
+ /* This routine was not called via the interrupt handler.
|
|
+ * Need to start by getting interrupts and updating
|
|
+ * last_read_pm_status.
|
|
+ */
|
|
+ ctx_arch->last_read_pm_status =
|
|
+ info->get_and_clear_pm_interrupts(smp_processor_id());
|
|
+
|
|
+ /* Reset the flag that the interrupt handler last read pm_status. */
|
|
+ ctx_arch->last_read_updated = 0;
|
|
+
|
|
+ pm_status = ctx_arch->last_read_pm_status &
|
|
+ set->pmcs[CELL_PMC_PM_STATUS];
|
|
+ ovfl_ctrs = CBE_PM_OVERFLOW_CTRS(pm_status);
|
|
+
|
|
+ /* Reverse the order of the bits in ovfl_ctrs
|
|
+ * and store the result in povfl_pmds.
|
|
+ */
|
|
+ for (i = 0; i < PFM_PM_NUM_PMDS; i++) {
|
|
+ povfl_pmds = (povfl_pmds << 1) | (ovfl_ctrs & 1);
|
|
+ ovfl_ctrs >>= 1;
|
|
+ }
|
|
+
|
|
+ /* Mask povfl_pmds with set->used_pmds to get set->povfl_pmds.
|
|
+ * Count the bits set in set->povfl_pmds to get set->npend_ovfls.
|
|
+ */
|
|
+ bitmap_and(set->povfl_pmds, &povfl_pmds,
|
|
+ set->used_pmds, PFM_PM_NUM_PMDS);
|
|
+ set->npend_ovfls = bitmap_weight(set->povfl_pmds, PFM_PM_NUM_PMDS);
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_cell_acquire_pmu
|
|
+ *
|
|
+ * acquire PMU resource.
|
|
+ * This acquisition is done when the first context is created.
|
|
+ **/
|
|
+int pfm_cell_acquire_pmu(u64 *unavail_pmcs, u64 *unavail_pmds)
|
|
+{
|
|
+#ifdef CONFIG_PPC_PS3
|
|
+ int ret;
|
|
+
|
|
+ if (machine_is(ps3)) {
|
|
+ PFM_DBG("");
|
|
+ ret = ps3_lpm_open(PS3_LPM_TB_TYPE_INTERNAL, NULL, 0);
|
|
+ if (ret) {
|
|
+ PFM_ERR("Can't create PS3 lpm. error:%d", ret);
|
|
+ return -EFAULT;
|
|
+ }
|
|
+ }
|
|
+#endif
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_cell_release_pmu
|
|
+ *
|
|
+ * release PMU resource.
|
|
+ * actual release happens when last context is destroyed
|
|
+ **/
|
|
+void pfm_cell_release_pmu(void)
|
|
+{
|
|
+#ifdef CONFIG_PPC_PS3
|
|
+ if (machine_is(ps3)) {
|
|
+ if (ps3_lpm_close())
|
|
+ PFM_ERR("Can't delete PS3 lpm.");
|
|
+ }
|
|
+#endif
|
|
+}
|
|
+
|
|
+/**
|
|
+ * handle_trace_buffer_interrupts
|
|
+ *
|
|
+ * This routine is for processing just the interval timer and trace buffer
|
|
+ * overflow interrupts. Performance counter interrupts are handled by the
|
|
+ * perf_irq_handler() routine, which reads and saves the pm_status register.
|
|
+ * This routine should not read the actual pm_status register, but rather
|
|
+ * the value passed in.
|
|
+ **/
|
|
+static void handle_trace_buffer_interrupts(unsigned long iip,
|
|
+ struct pt_regs *regs,
|
|
+ struct pfm_context *ctx,
|
|
+ u32 pm_status)
|
|
+{
|
|
+ /* FIX: Currently ignoring trace-buffer interrupts. */
|
|
+ return;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_cell_irq_handler
|
|
+ *
|
|
+ * Handler for all Cell performance-monitor interrupts.
|
|
+ **/
|
|
+static void pfm_cell_irq_handler(struct pt_regs *regs, struct pfm_context *ctx)
|
|
+{
|
|
+ struct pfm_arch_context *ctx_arch = pfm_ctx_arch(ctx);
|
|
+ u32 last_read_pm_status;
|
|
+ int cpu = smp_processor_id();
|
|
+ struct pfm_cell_platform_pmu_info *info =
|
|
+ ((struct pfm_arch_pmu_info *)
|
|
+ (pfm_pmu_conf->pmu_info))->platform_info;
|
|
+
|
|
+ /* Need to disable and reenable the performance counters to get the
|
|
+ * desired behavior from the hardware. This is specific to the Cell
|
|
+ * PMU hardware.
|
|
+ */
|
|
+ info->disable_pm(cpu);
|
|
+
|
|
+ /* Read the pm_status register to get the interrupt bits. If a
|
|
+ * perfmormance counter overflow interrupt occurred, call the core
|
|
+ * perfmon interrupt handler to service the counter overflow. If the
|
|
+ * interrupt was for the interval timer or the trace_buffer,
|
|
+ * call the interval timer and trace buffer interrupt handler.
|
|
+ *
|
|
+ * The value read from the pm_status register is stored in the
|
|
+ * pmf_arch_context structure for use by other routines. Note that
|
|
+ * reading the pm_status register resets the interrupt flags to zero.
|
|
+ * Hence, it is important that the register is only read in one place.
|
|
+ *
|
|
+ * The pm_status reg interrupt reg format is:
|
|
+ * [pmd0:pmd1:pmd2:pmd3:pmd4:pmd5:pmd6:pmd7:intt:tbf:tbu:]
|
|
+ * - pmd0 to pm7 are the perf counter overflow interrupts.
|
|
+ * - intt is the interval timer overflowed interrupt.
|
|
+ * - tbf is the trace buffer full interrupt.
|
|
+ * - tbu is the trace buffer underflow interrupt.
|
|
+ * - The pmd0 bit is the MSB of the 32 bit register.
|
|
+ */
|
|
+ ctx_arch->last_read_pm_status = last_read_pm_status =
|
|
+ info->get_and_clear_pm_interrupts(cpu);
|
|
+
|
|
+ /* Set flag for pfm_cell_get_ovfl_pmds() routine so it knows
|
|
+ * last_read_pm_status was updated by the interrupt handler.
|
|
+ */
|
|
+ ctx_arch->last_read_updated = 1;
|
|
+
|
|
+ if (last_read_pm_status & CBE_PM_ALL_OVERFLOW_INTR)
|
|
+ /* At least one counter overflowed. */
|
|
+ pfm_interrupt_handler(instruction_pointer(regs), regs);
|
|
+
|
|
+ if (last_read_pm_status & (CBE_PM_INTERVAL_INTR |
|
|
+ CBE_PM_TRACE_BUFFER_FULL_INTR |
|
|
+ CBE_PM_TRACE_BUFFER_UNDERFLOW_INTR))
|
|
+ /* Trace buffer or interval timer overflow. */
|
|
+ handle_trace_buffer_interrupts(instruction_pointer(regs),
|
|
+ regs, ctx, last_read_pm_status);
|
|
+
|
|
+ /* The interrupt settings is the value written to the pm_status
|
|
+ * register. It is saved in the context when the register is
|
|
+ * written.
|
|
+ */
|
|
+ info->enable_pm_interrupts(cpu, info->get_hw_thread_id(cpu),
|
|
+ ctx->active_set->pmcs[CELL_PMC_PM_STATUS]);
|
|
+
|
|
+ /* The writes to the various performance counters only writes to a
|
|
+ * latch. The new values (interrupt setting bits, reset counter value
|
|
+ * etc.) are not copied to the actual registers until the performance
|
|
+ * monitor is enabled. In order to get this to work as desired, the
|
|
+ * permormance monitor needs to be disabled while writting to the
|
|
+ * latches. This is a HW design issue.
|
|
+ */
|
|
+ info->enable_pm(cpu);
|
|
+}
|
|
+
|
|
+
|
|
+static struct pfm_cell_platform_pmu_info ps3_platform_pmu_info = {
|
|
+#ifdef CONFIG_PPC_PS3
|
|
+ .read_ctr = ps3_read_ctr,
|
|
+ .write_ctr = ps3_write_ctr,
|
|
+ .write_pm07_control = ps3_write_pm07_control,
|
|
+ .write_pm = ps3_write_pm,
|
|
+ .enable_pm = ps3_enable_pm,
|
|
+ .disable_pm = ps3_disable_pm,
|
|
+ .enable_pm_interrupts = ps3_enable_pm_interrupts,
|
|
+ .get_and_clear_pm_interrupts = ps3_get_and_clear_pm_interrupts,
|
|
+ .get_hw_thread_id = ps3_get_hw_thread_id,
|
|
+ .get_cpu_ppe_priv_regs = NULL,
|
|
+ .get_cpu_pmd_regs = NULL,
|
|
+ .get_cpu_mic_tm_regs = NULL,
|
|
+ .rtas_token = NULL,
|
|
+ .rtas_call = NULL,
|
|
+#endif
|
|
+};
|
|
+
|
|
+static struct pfm_cell_platform_pmu_info native_platform_pmu_info = {
|
|
+#ifdef CONFIG_PPC_CELL_NATIVE
|
|
+ .read_ctr = cbe_read_ctr,
|
|
+ .write_ctr = cbe_write_ctr,
|
|
+ .write_pm07_control = cbe_write_pm07_control,
|
|
+ .write_pm = cbe_write_pm,
|
|
+ .enable_pm = cbe_enable_pm,
|
|
+ .disable_pm = cbe_disable_pm,
|
|
+ .enable_pm_interrupts = cbe_enable_pm_interrupts,
|
|
+ .get_and_clear_pm_interrupts = cbe_get_and_clear_pm_interrupts,
|
|
+ .get_hw_thread_id = cbe_get_hw_thread_id,
|
|
+ .get_cpu_ppe_priv_regs = cbe_get_cpu_ppe_priv_regs,
|
|
+ .get_cpu_pmd_regs = cbe_get_cpu_pmd_regs,
|
|
+ .get_cpu_mic_tm_regs = cbe_get_cpu_mic_tm_regs,
|
|
+ .rtas_token = rtas_token,
|
|
+ .rtas_call = rtas_call,
|
|
+#endif
|
|
+};
|
|
+
|
|
+static struct pfm_arch_pmu_info pfm_cell_pmu_info = {
|
|
+ .pmu_style = PFM_POWERPC_PMU_CELL,
|
|
+ .acquire_pmu = pfm_cell_acquire_pmu,
|
|
+ .release_pmu = pfm_cell_release_pmu,
|
|
+ .write_pmc = pfm_cell_write_pmc,
|
|
+ .write_pmd = pfm_cell_write_pmd,
|
|
+ .read_pmd = pfm_cell_read_pmd,
|
|
+ .enable_counters = pfm_cell_enable_counters,
|
|
+ .disable_counters = pfm_cell_disable_counters,
|
|
+ .irq_handler = pfm_cell_irq_handler,
|
|
+ .get_ovfl_pmds = pfm_cell_get_ovfl_pmds,
|
|
+ .restore_pmcs = pfm_cell_restore_pmcs,
|
|
+ .restore_pmds = pfm_cell_restore_pmds,
|
|
+ .ctxswout_thread = pfm_cell_ctxswout_thread,
|
|
+ .load_context = pfm_cell_load_context,
|
|
+ .unload_context = pfm_cell_unload_context,
|
|
+};
|
|
+
|
|
+static struct pfm_pmu_config pfm_cell_pmu_conf = {
|
|
+ .pmu_name = "Cell",
|
|
+ .version = "0.1",
|
|
+ .counter_width = 32,
|
|
+ .pmd_desc = pfm_cell_pmd_desc,
|
|
+ .pmc_desc = pfm_cell_pmc_desc,
|
|
+ .num_pmc_entries = PFM_PM_NUM_PMCS,
|
|
+ .num_pmd_entries = PFM_PM_NUM_PMDS,
|
|
+ .probe_pmu = pfm_cell_probe_pmu,
|
|
+ .pmu_info = &pfm_cell_pmu_info,
|
|
+ .flags = PFM_PMU_BUILTIN_FLAG,
|
|
+ .owner = THIS_MODULE,
|
|
+};
|
|
+
|
|
+/**
|
|
+ * pfm_cell_platform_probe
|
|
+ *
|
|
+ * If we're on a system without the firmware rtas call available, set up the
|
|
+ * PMC write-checker for all the pmX_event control registers.
|
|
+ **/
|
|
+static void pfm_cell_platform_probe(void)
|
|
+{
|
|
+ if (machine_is(celleb)) {
|
|
+ int cnum;
|
|
+ pfm_cell_pmu_conf.pmc_write_check = pfm_cell_pmc_check;
|
|
+ for (cnum = NR_CTRS; cnum < (NR_CTRS * 2); cnum++)
|
|
+ pfm_cell_pmc_desc[cnum].type |= PFM_REG_WC;
|
|
+ }
|
|
+
|
|
+ if (machine_is(ps3))
|
|
+ pfm_cell_pmu_info.platform_info = &ps3_platform_pmu_info;
|
|
+ else
|
|
+ pfm_cell_pmu_info.platform_info = &native_platform_pmu_info;
|
|
+}
|
|
+
|
|
+static int __init pfm_cell_pmu_init_module(void)
|
|
+{
|
|
+ pfm_cell_platform_probe();
|
|
+ return pfm_pmu_register(&pfm_cell_pmu_conf);
|
|
+}
|
|
+
|
|
+static void __exit pfm_cell_pmu_cleanup_module(void)
|
|
+{
|
|
+ pfm_pmu_unregister(&pfm_cell_pmu_conf);
|
|
+}
|
|
+
|
|
+module_init(pfm_cell_pmu_init_module);
|
|
+module_exit(pfm_cell_pmu_cleanup_module);
|
|
diff --git a/arch/powerpc/perfmon/perfmon_power4.c b/arch/powerpc/perfmon/perfmon_power4.c
|
|
new file mode 100644
|
|
index 0000000..eba9e8c
|
|
--- /dev/null
|
|
+++ b/arch/powerpc/perfmon/perfmon_power4.c
|
|
@@ -0,0 +1,309 @@
|
|
+/*
|
|
+ * This file contains the POWER4 PMU register description tables
|
|
+ * and pmc checker used by perfmon.c.
|
|
+ *
|
|
+ * Copyright (c) 2007, IBM Corporation.
|
|
+ *
|
|
+ * Based on a simple modification of perfmon_power5.c for POWER4 by
|
|
+ * Corey Ashford <cjashfor@us.ibm.com>.
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of version 2 of the GNU General Public
|
|
+ * License as published by the Free Software Foundation.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ * General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
+ * 02111-1307 USA
|
|
+ */
|
|
+#include <linux/module.h>
|
|
+#include <linux/perfmon_kern.h>
|
|
+
|
|
+MODULE_AUTHOR("Corey Ashford <cjashfor@us.ibm.com>");
|
|
+MODULE_DESCRIPTION("POWER4 PMU description table");
|
|
+MODULE_LICENSE("GPL");
|
|
+
|
|
+static struct pfm_regmap_desc pfm_power4_pmc_desc[] = {
|
|
+/* mmcr0 */ PMC_D(PFM_REG_I, "MMCR0", MMCR0_FC, 0, 0, SPRN_MMCR0),
|
|
+/* mmcr1 */ PMC_D(PFM_REG_I, "MMCR1", 0, 0, 0, SPRN_MMCR1),
|
|
+/* mmcra */ PMC_D(PFM_REG_I, "MMCRA", 0, 0, 0, SPRN_MMCRA)
|
|
+};
|
|
+#define PFM_PM_NUM_PMCS ARRAY_SIZE(pfm_power4_pmc_desc)
|
|
+
|
|
+/* The TB and PURR registers are read-only. Also, note that the TB register
|
|
+ * actually consists of both the 32-bit SPRN_TBRU and SPRN_TBRL registers.
|
|
+ * For Perfmon2's purposes, we'll treat it as a single 64-bit register.
|
|
+ */
|
|
+static struct pfm_regmap_desc pfm_power4_pmd_desc[] = {
|
|
+/* tb */ PMD_D((PFM_REG_I|PFM_REG_RO), "TB", SPRN_TBRL),
|
|
+/* pmd1 */ PMD_D(PFM_REG_C, "PMC1", SPRN_PMC1),
|
|
+/* pmd2 */ PMD_D(PFM_REG_C, "PMC2", SPRN_PMC2),
|
|
+/* pmd3 */ PMD_D(PFM_REG_C, "PMC3", SPRN_PMC3),
|
|
+/* pmd4 */ PMD_D(PFM_REG_C, "PMC4", SPRN_PMC4),
|
|
+/* pmd5 */ PMD_D(PFM_REG_C, "PMC5", SPRN_PMC5),
|
|
+/* pmd6 */ PMD_D(PFM_REG_C, "PMC6", SPRN_PMC6),
|
|
+/* pmd7 */ PMD_D(PFM_REG_C, "PMC7", SPRN_PMC7),
|
|
+/* pmd8 */ PMD_D(PFM_REG_C, "PMC8", SPRN_PMC8)
|
|
+};
|
|
+#define PFM_PM_NUM_PMDS ARRAY_SIZE(pfm_power4_pmd_desc)
|
|
+
|
|
+static int pfm_power4_probe_pmu(void)
|
|
+{
|
|
+ unsigned long pvr = mfspr(SPRN_PVR);
|
|
+ int ver = PVR_VER(pvr);
|
|
+
|
|
+ if ((ver == PV_POWER4) || (ver == PV_POWER4p))
|
|
+ return 0;
|
|
+
|
|
+ return -1;
|
|
+}
|
|
+
|
|
+static void pfm_power4_write_pmc(unsigned int cnum, u64 value)
|
|
+{
|
|
+ switch (pfm_pmu_conf->pmc_desc[cnum].hw_addr) {
|
|
+ case SPRN_MMCR0:
|
|
+ mtspr(SPRN_MMCR0, value);
|
|
+ break;
|
|
+ case SPRN_MMCR1:
|
|
+ mtspr(SPRN_MMCR1, value);
|
|
+ break;
|
|
+ case SPRN_MMCRA:
|
|
+ mtspr(SPRN_MMCRA, value);
|
|
+ break;
|
|
+ default:
|
|
+ BUG();
|
|
+ }
|
|
+}
|
|
+
|
|
+static void pfm_power4_write_pmd(unsigned int cnum, u64 value)
|
|
+{
|
|
+ u64 ovfl_mask = pfm_pmu_conf->ovfl_mask;
|
|
+
|
|
+ switch (pfm_pmu_conf->pmd_desc[cnum].hw_addr) {
|
|
+ case SPRN_PMC1:
|
|
+ mtspr(SPRN_PMC1, value & ovfl_mask);
|
|
+ break;
|
|
+ case SPRN_PMC2:
|
|
+ mtspr(SPRN_PMC2, value & ovfl_mask);
|
|
+ break;
|
|
+ case SPRN_PMC3:
|
|
+ mtspr(SPRN_PMC3, value & ovfl_mask);
|
|
+ break;
|
|
+ case SPRN_PMC4:
|
|
+ mtspr(SPRN_PMC4, value & ovfl_mask);
|
|
+ break;
|
|
+ case SPRN_PMC5:
|
|
+ mtspr(SPRN_PMC5, value & ovfl_mask);
|
|
+ break;
|
|
+ case SPRN_PMC6:
|
|
+ mtspr(SPRN_PMC6, value & ovfl_mask);
|
|
+ break;
|
|
+ case SPRN_PMC7:
|
|
+ mtspr(SPRN_PMC7, value & ovfl_mask);
|
|
+ break;
|
|
+ case SPRN_PMC8:
|
|
+ mtspr(SPRN_PMC8, value & ovfl_mask);
|
|
+ break;
|
|
+ case SPRN_TBRL:
|
|
+ case SPRN_PURR:
|
|
+ /* Ignore writes to read-only registers. */
|
|
+ break;
|
|
+ default:
|
|
+ BUG();
|
|
+ }
|
|
+}
|
|
+
|
|
+static u64 pfm_power4_read_pmd(unsigned int cnum)
|
|
+{
|
|
+ switch (pfm_pmu_conf->pmd_desc[cnum].hw_addr) {
|
|
+ case SPRN_PMC1:
|
|
+ return mfspr(SPRN_PMC1);
|
|
+ case SPRN_PMC2:
|
|
+ return mfspr(SPRN_PMC2);
|
|
+ case SPRN_PMC3:
|
|
+ return mfspr(SPRN_PMC3);
|
|
+ case SPRN_PMC4:
|
|
+ return mfspr(SPRN_PMC4);
|
|
+ case SPRN_PMC5:
|
|
+ return mfspr(SPRN_PMC5);
|
|
+ case SPRN_PMC6:
|
|
+ return mfspr(SPRN_PMC6);
|
|
+ case SPRN_PMC7:
|
|
+ return mfspr(SPRN_PMC7);
|
|
+ case SPRN_PMC8:
|
|
+ return mfspr(SPRN_PMC8);
|
|
+ case SPRN_TBRL:
|
|
+ return ((u64)mfspr(SPRN_TBRU) << 32) | mfspr(SPRN_TBRL);
|
|
+ case SPRN_PURR:
|
|
+ if (cpu_has_feature(CPU_FTR_PURR))
|
|
+ return mfspr(SPRN_PURR);
|
|
+ else
|
|
+ return 0;
|
|
+ default:
|
|
+ BUG();
|
|
+ }
|
|
+}
|
|
+
|
|
+/* forward decl */
|
|
+static void pfm_power4_disable_counters(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set);
|
|
+
|
|
+/**
|
|
+ * pfm_power4_enable_counters
|
|
+ *
|
|
+ **/
|
|
+static void pfm_power4_enable_counters(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set)
|
|
+{
|
|
+ unsigned int i, max_pmc;
|
|
+
|
|
+ /* Make sure the counters are disabled before touching the other
|
|
+ control registers */
|
|
+ pfm_power4_disable_counters(ctx, set);
|
|
+
|
|
+ max_pmc = ctx->regs.max_pmc;
|
|
+
|
|
+ /* Write MMCR0 last, and a fairly easy way to do this is to write
|
|
+ the registers in the reverse order */
|
|
+ for (i = max_pmc; i != 0; i--)
|
|
+ if (test_bit(i - 1, set->used_pmcs))
|
|
+ pfm_power4_write_pmc(i - 1, set->pmcs[i - 1]);
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_power4_disable_counters
|
|
+ *
|
|
+ **/
|
|
+static void pfm_power4_disable_counters(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set)
|
|
+{
|
|
+ /* Set the Freeze Counters bit */
|
|
+ mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_FC);
|
|
+ asm volatile ("sync");
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_power4_get_ovfl_pmds
|
|
+ *
|
|
+ * Determine which counters in this set have overflowed and fill in the
|
|
+ * set->povfl_pmds mask and set->npend_ovfls count.
|
|
+ **/
|
|
+static void pfm_power4_get_ovfl_pmds(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set)
|
|
+{
|
|
+ unsigned int i;
|
|
+ unsigned int max_pmd = ctx->regs.max_intr_pmd;
|
|
+ u64 *used_pmds = set->used_pmds;
|
|
+ u64 *cntr_pmds = ctx->regs.cnt_pmds;
|
|
+ u64 width_mask = 1 << pfm_pmu_conf->counter_width;
|
|
+ u64 new_val, mask[PFM_PMD_BV];
|
|
+
|
|
+ bitmap_and(cast_ulp(mask), cast_ulp(cntr_pmds),
|
|
+ cast_ulp(used_pmds), max_pmd);
|
|
+
|
|
+ for (i = 0; i < max_pmd; i++) {
|
|
+ if (test_bit(i, mask)) {
|
|
+ new_val = pfm_power4_read_pmd(i);
|
|
+ if (new_val & width_mask) {
|
|
+ set_bit(i, set->povfl_pmds);
|
|
+ set->npend_ovfls++;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+static void pfm_power4_irq_handler(struct pt_regs *regs,
|
|
+ struct pfm_context *ctx)
|
|
+{
|
|
+ u32 mmcr0;
|
|
+
|
|
+ /* Disable the counters (set the freeze bit) to not polute
|
|
+ * the counts.
|
|
+ */
|
|
+ mmcr0 = mfspr(SPRN_MMCR0);
|
|
+ mtspr(SPRN_MMCR0, (mmcr0 | MMCR0_FC));
|
|
+
|
|
+ /* Set the PMM bit (see comment below). */
|
|
+ mtmsrd(mfmsr() | MSR_PMM);
|
|
+
|
|
+ pfm_interrupt_handler(instruction_pointer(regs), regs);
|
|
+
|
|
+ mmcr0 = mfspr(SPRN_MMCR0);
|
|
+
|
|
+ /*
|
|
+ * Reset the perfmon trigger if
|
|
+ * not in masking mode.
|
|
+ */
|
|
+ if (ctx->state != PFM_CTX_MASKED)
|
|
+ mmcr0 |= MMCR0_PMXE;
|
|
+
|
|
+ /*
|
|
+ * We must clear the PMAO bit on some (GQ) chips. Just do it
|
|
+ * all the time.
|
|
+ */
|
|
+ mmcr0 &= ~MMCR0_PMAO;
|
|
+
|
|
+ /*
|
|
+ * Now clear the freeze bit, counting will not start until we
|
|
+ * rfid from this exception, because only at that point will
|
|
+ * the PMM bit be cleared.
|
|
+ */
|
|
+ mmcr0 &= ~MMCR0_FC;
|
|
+ mtspr(SPRN_MMCR0, mmcr0);
|
|
+}
|
|
+
|
|
+static void pfm_power4_resend_irq(struct pfm_context *ctx)
|
|
+{
|
|
+ /*
|
|
+ * Assert the PMAO bit to cause a PMU interrupt. Make sure we
|
|
+ * trigger the edge detection circuitry for PMAO
|
|
+ */
|
|
+ mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_PMAO);
|
|
+ mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_PMAO);
|
|
+}
|
|
+
|
|
+struct pfm_arch_pmu_info pfm_power4_pmu_info = {
|
|
+ .pmu_style = PFM_POWERPC_PMU_POWER4,
|
|
+ .write_pmc = pfm_power4_write_pmc,
|
|
+ .write_pmd = pfm_power4_write_pmd,
|
|
+ .read_pmd = pfm_power4_read_pmd,
|
|
+ .irq_handler = pfm_power4_irq_handler,
|
|
+ .get_ovfl_pmds = pfm_power4_get_ovfl_pmds,
|
|
+ .enable_counters = pfm_power4_enable_counters,
|
|
+ .disable_counters = pfm_power4_disable_counters,
|
|
+ .resend_irq = pfm_power4_resend_irq
|
|
+};
|
|
+
|
|
+/*
|
|
+ * impl_pmcs, impl_pmds are computed at runtime to minimize errors!
|
|
+ */
|
|
+static struct pfm_pmu_config pfm_power4_pmu_conf = {
|
|
+ .pmu_name = "POWER4",
|
|
+ .counter_width = 31,
|
|
+ .pmd_desc = pfm_power4_pmd_desc,
|
|
+ .pmc_desc = pfm_power4_pmc_desc,
|
|
+ .num_pmc_entries = PFM_PM_NUM_PMCS,
|
|
+ .num_pmd_entries = PFM_PM_NUM_PMDS,
|
|
+ .probe_pmu = pfm_power4_probe_pmu,
|
|
+ .pmu_info = &pfm_power4_pmu_info,
|
|
+ .flags = PFM_PMU_BUILTIN_FLAG,
|
|
+ .owner = THIS_MODULE
|
|
+};
|
|
+
|
|
+static int __init pfm_power4_pmu_init_module(void)
|
|
+{
|
|
+ return pfm_pmu_register(&pfm_power4_pmu_conf);
|
|
+}
|
|
+
|
|
+static void __exit pfm_power4_pmu_cleanup_module(void)
|
|
+{
|
|
+ pfm_pmu_unregister(&pfm_power4_pmu_conf);
|
|
+}
|
|
+
|
|
+module_init(pfm_power4_pmu_init_module);
|
|
+module_exit(pfm_power4_pmu_cleanup_module);
|
|
diff --git a/arch/powerpc/perfmon/perfmon_power5.c b/arch/powerpc/perfmon/perfmon_power5.c
|
|
new file mode 100644
|
|
index 0000000..f4bb1ac
|
|
--- /dev/null
|
|
+++ b/arch/powerpc/perfmon/perfmon_power5.c
|
|
@@ -0,0 +1,326 @@
|
|
+/*
|
|
+ * This file contains the POWER5 PMU register description tables
|
|
+ * and pmc checker used by perfmon.c.
|
|
+ *
|
|
+ * Copyright (c) 2005 David Gibson, IBM Corporation.
|
|
+ *
|
|
+ * Based on perfmon_p6.c:
|
|
+ * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P.
|
|
+ * Contributed by Stephane Eranian <eranian@hpl.hp.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of version 2 of the GNU General Public
|
|
+ * License as published by the Free Software Foundation.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ * General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
+ * 02111-1307 USA
|
|
+ */
|
|
+#include <linux/module.h>
|
|
+#include <linux/perfmon_kern.h>
|
|
+
|
|
+MODULE_AUTHOR("David Gibson <dwg@au1.ibm.com>");
|
|
+MODULE_DESCRIPTION("POWER5 PMU description table");
|
|
+MODULE_LICENSE("GPL");
|
|
+
|
|
+static struct pfm_regmap_desc pfm_power5_pmc_desc[] = {
|
|
+/* mmcr0 */ PMC_D(PFM_REG_I, "MMCR0", MMCR0_FC, 0, 0, SPRN_MMCR0),
|
|
+/* mmcr1 */ PMC_D(PFM_REG_I, "MMCR1", 0, 0, 0, SPRN_MMCR1),
|
|
+/* mmcra */ PMC_D(PFM_REG_I, "MMCRA", 0, 0, 0, SPRN_MMCRA)
|
|
+};
|
|
+#define PFM_PM_NUM_PMCS ARRAY_SIZE(pfm_power5_pmc_desc)
|
|
+
|
|
+/* The TB and PURR registers are read-only. Also, note that the TB register
|
|
+ * actually consists of both the 32-bit SPRN_TBRU and SPRN_TBRL registers.
|
|
+ * For Perfmon2's purposes, we'll treat it as a single 64-bit register.
|
|
+ */
|
|
+static struct pfm_regmap_desc pfm_power5_pmd_desc[] = {
|
|
+/* tb */ PMD_D((PFM_REG_I|PFM_REG_RO), "TB", SPRN_TBRL),
|
|
+/* pmd1 */ PMD_D(PFM_REG_C, "PMC1", SPRN_PMC1),
|
|
+/* pmd2 */ PMD_D(PFM_REG_C, "PMC2", SPRN_PMC2),
|
|
+/* pmd3 */ PMD_D(PFM_REG_C, "PMC3", SPRN_PMC3),
|
|
+/* pmd4 */ PMD_D(PFM_REG_C, "PMC4", SPRN_PMC4),
|
|
+/* pmd5 */ PMD_D(PFM_REG_C, "PMC5", SPRN_PMC5),
|
|
+/* pmd6 */ PMD_D(PFM_REG_C, "PMC6", SPRN_PMC6),
|
|
+/* purr */ PMD_D((PFM_REG_I|PFM_REG_RO), "PURR", SPRN_PURR),
|
|
+};
|
|
+#define PFM_PM_NUM_PMDS ARRAY_SIZE(pfm_power5_pmd_desc)
|
|
+
|
|
+/* forward decl */
|
|
+static void pfm_power5_disable_counters(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set);
|
|
+
|
|
+static int pfm_power5_probe_pmu(void)
|
|
+{
|
|
+ unsigned long pvr = mfspr(SPRN_PVR);
|
|
+
|
|
+ switch (PVR_VER(pvr)) {
|
|
+ case PV_POWER5:
|
|
+ return 0;
|
|
+ case PV_POWER5p:
|
|
+ return (PVR_REV(pvr) < 0x300) ? 0 : -1;
|
|
+ default:
|
|
+ return -1;
|
|
+ }
|
|
+}
|
|
+
|
|
+static void pfm_power5_write_pmc(unsigned int cnum, u64 value)
|
|
+{
|
|
+ switch (pfm_pmu_conf->pmc_desc[cnum].hw_addr) {
|
|
+ case SPRN_MMCR0:
|
|
+ mtspr(SPRN_MMCR0, value);
|
|
+ break;
|
|
+ case SPRN_MMCR1:
|
|
+ mtspr(SPRN_MMCR1, value);
|
|
+ break;
|
|
+ case SPRN_MMCRA:
|
|
+ mtspr(SPRN_MMCRA, value);
|
|
+ break;
|
|
+ default:
|
|
+ BUG();
|
|
+ }
|
|
+}
|
|
+
|
|
+static void pfm_power5_write_pmd(unsigned int cnum, u64 value)
|
|
+{
|
|
+ u64 ovfl_mask = pfm_pmu_conf->ovfl_mask;
|
|
+
|
|
+ switch (pfm_pmu_conf->pmd_desc[cnum].hw_addr) {
|
|
+ case SPRN_PMC1:
|
|
+ mtspr(SPRN_PMC1, value & ovfl_mask);
|
|
+ break;
|
|
+ case SPRN_PMC2:
|
|
+ mtspr(SPRN_PMC2, value & ovfl_mask);
|
|
+ break;
|
|
+ case SPRN_PMC3:
|
|
+ mtspr(SPRN_PMC3, value & ovfl_mask);
|
|
+ break;
|
|
+ case SPRN_PMC4:
|
|
+ mtspr(SPRN_PMC4, value & ovfl_mask);
|
|
+ break;
|
|
+ case SPRN_PMC5:
|
|
+ mtspr(SPRN_PMC5, value & ovfl_mask);
|
|
+ break;
|
|
+ case SPRN_PMC6:
|
|
+ mtspr(SPRN_PMC6, value & ovfl_mask);
|
|
+ break;
|
|
+ case SPRN_TBRL:
|
|
+ case SPRN_PURR:
|
|
+ /* Ignore writes to read-only registers. */
|
|
+ break;
|
|
+ default:
|
|
+ BUG();
|
|
+ }
|
|
+}
|
|
+
|
|
+static u64 pfm_power5_read_pmd(unsigned int cnum)
|
|
+{
|
|
+ switch (pfm_pmu_conf->pmd_desc[cnum].hw_addr) {
|
|
+ case SPRN_PMC1:
|
|
+ return mfspr(SPRN_PMC1);
|
|
+ case SPRN_PMC2:
|
|
+ return mfspr(SPRN_PMC2);
|
|
+ case SPRN_PMC3:
|
|
+ return mfspr(SPRN_PMC3);
|
|
+ case SPRN_PMC4:
|
|
+ return mfspr(SPRN_PMC4);
|
|
+ case SPRN_PMC5:
|
|
+ return mfspr(SPRN_PMC5);
|
|
+ case SPRN_PMC6:
|
|
+ return mfspr(SPRN_PMC6);
|
|
+ case SPRN_TBRL:
|
|
+ return ((u64)mfspr(SPRN_TBRU) << 32) | mfspr(SPRN_TBRL);
|
|
+ case SPRN_PURR:
|
|
+ if (cpu_has_feature(CPU_FTR_PURR))
|
|
+ return mfspr(SPRN_PURR);
|
|
+ else
|
|
+ return 0;
|
|
+ default:
|
|
+ BUG();
|
|
+ }
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_power5_enable_counters
|
|
+ *
|
|
+ **/
|
|
+static void pfm_power5_enable_counters(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set)
|
|
+{
|
|
+ unsigned int i, max_pmc;
|
|
+
|
|
+ /*
|
|
+ * Make sure the counters are disabled before touching the
|
|
+ * other control registers
|
|
+ */
|
|
+ pfm_power5_disable_counters(ctx, set);
|
|
+
|
|
+ max_pmc = ctx->regs.max_pmc;
|
|
+
|
|
+ /*
|
|
+ * Write MMCR0 last, and a fairly easy way to do
|
|
+ * this is to write the registers in the reverse
|
|
+ * order
|
|
+ */
|
|
+ for (i = max_pmc; i != 0; i--)
|
|
+ if (test_bit(i - 1, set->used_pmcs))
|
|
+ pfm_power5_write_pmc(i - 1, set->pmcs[i - 1]);
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_power5_disable_counters
|
|
+ *
|
|
+ * Just need to zero all the control registers.
|
|
+ **/
|
|
+static void pfm_power5_disable_counters(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set)
|
|
+{
|
|
+ /* Set the Freeze Counters bit */
|
|
+ mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_FC);
|
|
+ asm volatile ("sync");
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_power5_get_ovfl_pmds
|
|
+ *
|
|
+ * Determine which counters in this set have overflowed and fill in the
|
|
+ * set->povfl_pmds mask and set->npend_ovfls count.
|
|
+ **/
|
|
+static void pfm_power5_get_ovfl_pmds(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set)
|
|
+{
|
|
+ unsigned int i;
|
|
+ unsigned int max = ctx->regs.max_intr_pmd;
|
|
+ u64 *used_pmds = set->used_pmds;
|
|
+ u64 *intr_pmds = ctx->regs.intr_pmds;
|
|
+ u64 width_mask = 1 << pfm_pmu_conf->counter_width;
|
|
+ u64 new_val, mask[PFM_PMD_BV];
|
|
+
|
|
+ bitmap_and(cast_ulp(mask), cast_ulp(intr_pmds),
|
|
+ cast_ulp(used_pmds), max);
|
|
+ /*
|
|
+ * If either PMC5 or PMC6 are not being used, just zero out the unused
|
|
+ * ones so that they won't interrupt again for another 2^31 counts.
|
|
+ * Note that if no other counters overflowed, set->npend_ovfls will
|
|
+ * be zero upon returning from this call (i.e. a spurious
|
|
+ * interrupt), but that should be ok.
|
|
+ *
|
|
+ * If neither PMC5 nor PMC6 are used, the counters should be frozen
|
|
+ * via MMCR0_FC5_6 and zeroed out.
|
|
+ *
|
|
+ * If both PMC5 and PMC6 are used, they can be handled correctly by
|
|
+ * the loop that follows.
|
|
+ */
|
|
+
|
|
+ if (!test_bit(5, cast_ulp(used_pmds)))
|
|
+ mtspr(SPRN_PMC5, 0);
|
|
+ if (!test_bit(6, cast_ulp(used_pmds)))
|
|
+ mtspr(SPRN_PMC6, 0);
|
|
+
|
|
+ for (i = 0; i < max; i++) {
|
|
+ if (test_bit(i, mask)) {
|
|
+ new_val = pfm_power5_read_pmd(i);
|
|
+ if (new_val & width_mask) {
|
|
+ set_bit(i, set->povfl_pmds);
|
|
+ set->npend_ovfls++;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+static void pfm_power5_irq_handler(struct pt_regs *regs,
|
|
+ struct pfm_context *ctx)
|
|
+{
|
|
+ u32 mmcr0;
|
|
+
|
|
+ /* Disable the counters (set the freeze bit) to not polute
|
|
+ * the counts.
|
|
+ */
|
|
+ mmcr0 = mfspr(SPRN_MMCR0);
|
|
+ mtspr(SPRN_MMCR0, (mmcr0 | MMCR0_FC));
|
|
+
|
|
+ /* Set the PMM bit (see comment below). */
|
|
+ mtmsrd(mfmsr() | MSR_PMM);
|
|
+
|
|
+ pfm_interrupt_handler(instruction_pointer(regs), regs);
|
|
+
|
|
+ mmcr0 = mfspr(SPRN_MMCR0);
|
|
+
|
|
+ /*
|
|
+ * Reset the perfmon trigger if
|
|
+ * not in masking mode.
|
|
+ */
|
|
+ if (ctx->state != PFM_CTX_MASKED)
|
|
+ mmcr0 |= MMCR0_PMXE;
|
|
+
|
|
+ /*
|
|
+ * We must clear the PMAO bit on some (GQ) chips. Just do it
|
|
+ * all the time.
|
|
+ */
|
|
+ mmcr0 &= ~MMCR0_PMAO;
|
|
+
|
|
+ /*
|
|
+ * Now clear the freeze bit, counting will not start until we
|
|
+ * rfid from this exception, because only at that point will
|
|
+ * the PMM bit be cleared.
|
|
+ */
|
|
+ mmcr0 &= ~MMCR0_FC;
|
|
+ mtspr(SPRN_MMCR0, mmcr0);
|
|
+}
|
|
+
|
|
+static void pfm_power5_resend_irq(struct pfm_context *ctx)
|
|
+{
|
|
+ /*
|
|
+ * Assert the PMAO bit to cause a PMU interrupt. Make sure we
|
|
+ * trigger the edge detection circuitry for PMAO
|
|
+ */
|
|
+ mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_PMAO);
|
|
+ mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_PMAO);
|
|
+}
|
|
+
|
|
+struct pfm_arch_pmu_info pfm_power5_pmu_info = {
|
|
+ .pmu_style = PFM_POWERPC_PMU_POWER5,
|
|
+ .write_pmc = pfm_power5_write_pmc,
|
|
+ .write_pmd = pfm_power5_write_pmd,
|
|
+ .read_pmd = pfm_power5_read_pmd,
|
|
+ .irq_handler = pfm_power5_irq_handler,
|
|
+ .get_ovfl_pmds = pfm_power5_get_ovfl_pmds,
|
|
+ .enable_counters = pfm_power5_enable_counters,
|
|
+ .disable_counters = pfm_power5_disable_counters,
|
|
+ .resend_irq = pfm_power5_resend_irq
|
|
+};
|
|
+
|
|
+/*
|
|
+ * impl_pmcs, impl_pmds are computed at runtime to minimize errors!
|
|
+ */
|
|
+static struct pfm_pmu_config pfm_power5_pmu_conf = {
|
|
+ .pmu_name = "POWER5",
|
|
+ .counter_width = 31,
|
|
+ .pmd_desc = pfm_power5_pmd_desc,
|
|
+ .pmc_desc = pfm_power5_pmc_desc,
|
|
+ .num_pmc_entries = PFM_PM_NUM_PMCS,
|
|
+ .num_pmd_entries = PFM_PM_NUM_PMDS,
|
|
+ .probe_pmu = pfm_power5_probe_pmu,
|
|
+ .pmu_info = &pfm_power5_pmu_info,
|
|
+ .flags = PFM_PMU_BUILTIN_FLAG,
|
|
+ .owner = THIS_MODULE
|
|
+};
|
|
+
|
|
+static int __init pfm_power5_pmu_init_module(void)
|
|
+{
|
|
+ return pfm_pmu_register(&pfm_power5_pmu_conf);
|
|
+}
|
|
+
|
|
+static void __exit pfm_power5_pmu_cleanup_module(void)
|
|
+{
|
|
+ pfm_pmu_unregister(&pfm_power5_pmu_conf);
|
|
+}
|
|
+
|
|
+module_init(pfm_power5_pmu_init_module);
|
|
+module_exit(pfm_power5_pmu_cleanup_module);
|
|
diff --git a/arch/powerpc/perfmon/perfmon_power6.c b/arch/powerpc/perfmon/perfmon_power6.c
|
|
new file mode 100644
|
|
index 0000000..7882feb
|
|
--- /dev/null
|
|
+++ b/arch/powerpc/perfmon/perfmon_power6.c
|
|
@@ -0,0 +1,520 @@
|
|
+/*
|
|
+ * This file contains the POWER6 PMU register description tables
|
|
+ * and pmc checker used by perfmon.c.
|
|
+ *
|
|
+ * Copyright (c) 2007, IBM Corporation
|
|
+ *
|
|
+ * Based on perfmon_power5.c, and written by Carl Love <carll@us.ibm.com>
|
|
+ * and Kevin Corry <kevcorry@us.ibm.com>. Some fixes and refinement by
|
|
+ * Corey Ashford <cjashfor@us.ibm.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of version 2 of the GNU General Public
|
|
+ * License as published by the Free Software Foundation.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ * General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
+ * 02111-1307 USA
|
|
+ */
|
|
+#include <linux/module.h>
|
|
+#include <linux/perfmon_kern.h>
|
|
+
|
|
+MODULE_AUTHOR("Corey Ashford <cjashfor@us.ibm.com>");
|
|
+MODULE_DESCRIPTION("POWER6 PMU description table");
|
|
+MODULE_LICENSE("GPL");
|
|
+
|
|
+static struct pfm_regmap_desc pfm_power6_pmc_desc[] = {
|
|
+/* mmcr0 */ PMC_D(PFM_REG_I, "MMCR0", MMCR0_FC, 0, 0, SPRN_MMCR0),
|
|
+/* mmcr1 */ PMC_D(PFM_REG_I, "MMCR1", 0, 0, 0, SPRN_MMCR1),
|
|
+/* mmcra */ PMC_D(PFM_REG_I, "MMCRA", 0, 0, 0, SPRN_MMCRA)
|
|
+};
|
|
+#define PFM_PM_NUM_PMCS ARRAY_SIZE(pfm_power6_pmc_desc)
|
|
+#define PFM_DELTA_TB 10000 /* Not a real registers */
|
|
+#define PFM_DELTA_PURR 10001
|
|
+
|
|
+/*
|
|
+ * counters wrap to zero at transition from 2^32-1 to 2^32. Note:
|
|
+ * interrupt generated at transition from 2^31-1 to 2^31
|
|
+ */
|
|
+#define OVERFLOW_VALUE 0x100000000UL
|
|
+
|
|
+/* The TB and PURR registers are read-only. Also, note that the TB register
|
|
+ * actually consists of both the 32-bit SPRN_TBRU and SPRN_TBRL registers.
|
|
+ * For Perfmon2's purposes, we'll treat it as a single 64-bit register.
|
|
+ */
|
|
+static struct pfm_regmap_desc pfm_power6_pmd_desc[] = {
|
|
+ /* On POWER 6 PMC5 and PMC6 are not writable, they do not
|
|
+ * generate interrupts, and do not qualify their counts
|
|
+ * based on problem mode, supervisor mode or hypervisor mode.
|
|
+ * These two counters are implemented as virtual counters
|
|
+ * to make the appear to work like the other counters. A
|
|
+ * kernel timer is used sample the real PMC5 and PMC6 and
|
|
+ * update the virtual counters.
|
|
+ */
|
|
+/* tb */ PMD_D((PFM_REG_I|PFM_REG_RO), "TB", SPRN_TBRL),
|
|
+/* pmd1 */ PMD_D(PFM_REG_C, "PMC1", SPRN_PMC1),
|
|
+/* pmd2 */ PMD_D(PFM_REG_C, "PMC2", SPRN_PMC2),
|
|
+/* pmd3 */ PMD_D(PFM_REG_C, "PMC3", SPRN_PMC3),
|
|
+/* pmd4 */ PMD_D(PFM_REG_C, "PMC4", SPRN_PMC4),
|
|
+/* pmd5 */ PMD_D((PFM_REG_I|PFM_REG_V), "PMC5", SPRN_PMC5),
|
|
+/* pmd6 */ PMD_D((PFM_REG_I|PFM_REG_V), "PMC6", SPRN_PMC6),
|
|
+/* purr */ PMD_D((PFM_REG_I|PFM_REG_RO), "PURR", SPRN_PURR),
|
|
+/* delta purr */ PMD_D((PFM_REG_I|PFM_REG_V), "DELTA_TB", PFM_DELTA_TB),
|
|
+/* delta tb */ PMD_D((PFM_REG_I|PFM_REG_V), "DELTA_PURR", PFM_DELTA_PURR),
|
|
+};
|
|
+
|
|
+#define PFM_PM_NUM_PMDS ARRAY_SIZE(pfm_power6_pmd_desc)
|
|
+
|
|
+u32 pmc5_start_save[NR_CPUS];
|
|
+u32 pmc6_start_save[NR_CPUS];
|
|
+
|
|
+static struct timer_list pmc5_6_update[NR_CPUS];
|
|
+u64 enable_cntrs_cnt;
|
|
+u64 disable_cntrs_cnt;
|
|
+u64 call_delta;
|
|
+u64 pm5_6_interrupt;
|
|
+u64 pm1_4_interrupt;
|
|
+/* need ctx_arch for kernel timer. Can't get it in context of the kernel
|
|
+ * timer.
|
|
+ */
|
|
+struct pfm_arch_context *pmc5_6_ctx_arch[NR_CPUS];
|
|
+long int update_time;
|
|
+
|
|
+static void delta(int cpu_num, struct pfm_arch_context *ctx_arch)
|
|
+{
|
|
+ u32 tmp5, tmp6;
|
|
+
|
|
+ call_delta++;
|
|
+
|
|
+ tmp5 = (u32) mfspr(SPRN_PMC5);
|
|
+ tmp6 = (u32) mfspr(SPRN_PMC6);
|
|
+
|
|
+ /*
|
|
+ * The following difference calculation relies on 32-bit modular
|
|
+ * arithmetic for the deltas to come out correct (especially in the
|
|
+ * presence of a 32-bit counter wrap).
|
|
+ */
|
|
+ ctx_arch->powergs_pmc5 += (u64)(tmp5 - pmc5_start_save[cpu_num]);
|
|
+ ctx_arch->powergs_pmc6 += (u64)(tmp6 - pmc6_start_save[cpu_num]);
|
|
+
|
|
+ pmc5_start_save[cpu_num] = tmp5;
|
|
+ pmc6_start_save[cpu_num] = tmp6;
|
|
+
|
|
+ return;
|
|
+}
|
|
+
|
|
+
|
|
+static void pmc5_6_updater(unsigned long cpu_num)
|
|
+{
|
|
+ /* update the virtual pmd 5 and pmd 6 counters */
|
|
+
|
|
+ delta(cpu_num, pmc5_6_ctx_arch[cpu_num]);
|
|
+ mod_timer(&pmc5_6_update[cpu_num], jiffies + update_time);
|
|
+}
|
|
+
|
|
+
|
|
+static int pfm_power6_probe_pmu(void)
|
|
+{
|
|
+ unsigned long pvr = mfspr(SPRN_PVR);
|
|
+
|
|
+ switch (PVR_VER(pvr)) {
|
|
+ case PV_POWER6:
|
|
+ return 0;
|
|
+ case PV_POWER5p:
|
|
+ /* If this is a POWER5+ and the revision is less than 0x300,
|
|
+ don't treat it as a POWER6. */
|
|
+ return (PVR_REV(pvr) < 0x300) ? -1 : 0;
|
|
+ default:
|
|
+ return -1;
|
|
+ }
|
|
+}
|
|
+
|
|
+static void pfm_power6_write_pmc(unsigned int cnum, u64 value)
|
|
+{
|
|
+ switch (pfm_pmu_conf->pmc_desc[cnum].hw_addr) {
|
|
+ case SPRN_MMCR0:
|
|
+ mtspr(SPRN_MMCR0, value);
|
|
+ break;
|
|
+ case SPRN_MMCR1:
|
|
+ mtspr(SPRN_MMCR1, value);
|
|
+ break;
|
|
+ case SPRN_MMCRA:
|
|
+ mtspr(SPRN_MMCRA, value);
|
|
+ break;
|
|
+ default:
|
|
+ BUG();
|
|
+ }
|
|
+}
|
|
+
|
|
+static void pfm_power6_write_pmd(unsigned int cnum, u64 value)
|
|
+{
|
|
+ /* On POWER 6 PMC5 and PMC6 are implemented as
|
|
+ * virtual counters. See comment in pfm_power6_pmd_desc
|
|
+ * definition.
|
|
+ */
|
|
+ u64 ovfl_mask = pfm_pmu_conf->ovfl_mask;
|
|
+
|
|
+ switch (pfm_pmu_conf->pmd_desc[cnum].hw_addr) {
|
|
+ case SPRN_PMC1:
|
|
+ mtspr(SPRN_PMC1, value & ovfl_mask);
|
|
+ break;
|
|
+ case SPRN_PMC2:
|
|
+ mtspr(SPRN_PMC2, value & ovfl_mask);
|
|
+ break;
|
|
+ case SPRN_PMC3:
|
|
+ mtspr(SPRN_PMC3, value & ovfl_mask);
|
|
+ break;
|
|
+ case SPRN_PMC4:
|
|
+ mtspr(SPRN_PMC4, value & ovfl_mask);
|
|
+ break;
|
|
+ case SPRN_TBRL:
|
|
+ case SPRN_PURR:
|
|
+ /* Ignore writes to read-only registers. */
|
|
+ break;
|
|
+ default:
|
|
+ BUG();
|
|
+ }
|
|
+}
|
|
+
|
|
+static u64 pfm_power6_sread(struct pfm_context *ctx, unsigned int cnum)
|
|
+{
|
|
+ struct pfm_arch_context *ctx_arch = pfm_ctx_arch(ctx);
|
|
+ int cpu_num = smp_processor_id();
|
|
+
|
|
+ /* On POWER 6 PMC5 and PMC6 are implemented as
|
|
+ * virtual counters. See comment in pfm_power6_pmd_desc
|
|
+ * definition.
|
|
+ */
|
|
+
|
|
+ switch (pfm_pmu_conf->pmd_desc[cnum].hw_addr) {
|
|
+ case SPRN_PMC5:
|
|
+ return ctx_arch->powergs_pmc5 + (u64)((u32)mfspr(SPRN_PMC5) - pmc5_start_save[cpu_num]);
|
|
+ break;
|
|
+
|
|
+ case SPRN_PMC6:
|
|
+ return ctx_arch->powergs_pmc6 + (u64)((u32)mfspr(SPRN_PMC6) - pmc6_start_save[cpu_num]);
|
|
+ break;
|
|
+
|
|
+ case PFM_DELTA_TB:
|
|
+ return ctx_arch->delta_tb
|
|
+ + (((u64)mfspr(SPRN_TBRU) << 32) | mfspr(SPRN_TBRL))
|
|
+ - ctx_arch->delta_tb_start;
|
|
+ break;
|
|
+
|
|
+ case PFM_DELTA_PURR:
|
|
+ return ctx_arch->delta_purr
|
|
+ + mfspr(SPRN_PURR)
|
|
+ - ctx_arch->delta_purr_start;
|
|
+ break;
|
|
+
|
|
+ default:
|
|
+ BUG();
|
|
+ }
|
|
+}
|
|
+
|
|
+void pfm_power6_swrite(struct pfm_context *ctx, unsigned int cnum,
|
|
+ u64 val)
|
|
+{
|
|
+ struct pfm_arch_context *ctx_arch = pfm_ctx_arch(ctx);
|
|
+ int cpu_num = smp_processor_id();
|
|
+
|
|
+ switch (pfm_pmu_conf->pmd_desc[cnum].hw_addr) {
|
|
+ case SPRN_PMC5:
|
|
+ pmc5_start_save[cpu_num] = mfspr(SPRN_PMC5);
|
|
+ ctx_arch->powergs_pmc5 = val;
|
|
+ break;
|
|
+
|
|
+ case SPRN_PMC6:
|
|
+ pmc6_start_save[cpu_num] = mfspr(SPRN_PMC6);
|
|
+ ctx_arch->powergs_pmc6 = val;
|
|
+ break;
|
|
+
|
|
+ case PFM_DELTA_TB:
|
|
+ ctx_arch->delta_tb_start =
|
|
+ (((u64)mfspr(SPRN_TBRU) << 32) | mfspr(SPRN_TBRL));
|
|
+ ctx_arch->delta_tb = val;
|
|
+ break;
|
|
+
|
|
+ case PFM_DELTA_PURR:
|
|
+ ctx_arch->delta_purr_start = mfspr(SPRN_PURR);
|
|
+ ctx_arch->delta_purr = val;
|
|
+ break;
|
|
+
|
|
+ default:
|
|
+ BUG();
|
|
+ }
|
|
+}
|
|
+
|
|
+static u64 pfm_power6_read_pmd(unsigned int cnum)
|
|
+{
|
|
+ switch (pfm_pmu_conf->pmd_desc[cnum].hw_addr) {
|
|
+ case SPRN_PMC1:
|
|
+ return mfspr(SPRN_PMC1);
|
|
+ case SPRN_PMC2:
|
|
+ return mfspr(SPRN_PMC2);
|
|
+ case SPRN_PMC3:
|
|
+ return mfspr(SPRN_PMC3);
|
|
+ case SPRN_PMC4:
|
|
+ return mfspr(SPRN_PMC4);
|
|
+ case SPRN_TBRL:
|
|
+ return ((u64)mfspr(SPRN_TBRU) << 32) | mfspr(SPRN_TBRL);
|
|
+ case SPRN_PURR:
|
|
+ if (cpu_has_feature(CPU_FTR_PURR))
|
|
+ return mfspr(SPRN_PURR);
|
|
+ else
|
|
+ return 0;
|
|
+ default:
|
|
+ BUG();
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+/**
|
|
+ * pfm_power6_enable_counters
|
|
+ *
|
|
+ **/
|
|
+static void pfm_power6_enable_counters(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set)
|
|
+{
|
|
+
|
|
+ unsigned int i, max_pmc;
|
|
+ int cpu_num = smp_processor_id();
|
|
+ struct pfm_arch_context *ctx_arch;
|
|
+
|
|
+ enable_cntrs_cnt++;
|
|
+
|
|
+ /* need the ctx passed down to the routine */
|
|
+ ctx_arch = pfm_ctx_arch(ctx);
|
|
+ max_pmc = ctx->regs.max_pmc;
|
|
+
|
|
+ /* Write MMCR0 last, and a fairly easy way to do this is to write
|
|
+ the registers in the reverse order */
|
|
+ for (i = max_pmc; i != 0; i--)
|
|
+ if (test_bit(i - 1, set->used_pmcs))
|
|
+ pfm_power6_write_pmc(i - 1, set->pmcs[i - 1]);
|
|
+
|
|
+ /* save current free running HW event count */
|
|
+ pmc5_start_save[cpu_num] = mfspr(SPRN_PMC5);
|
|
+ pmc6_start_save[cpu_num] = mfspr(SPRN_PMC6);
|
|
+
|
|
+ ctx_arch->delta_purr_start = mfspr(SPRN_PURR);
|
|
+
|
|
+ if (cpu_has_feature(CPU_FTR_PURR))
|
|
+ ctx_arch->delta_tb_start =
|
|
+ ((u64)mfspr(SPRN_TBRU) << 32) | mfspr(SPRN_TBRL);
|
|
+ else
|
|
+ ctx_arch->delta_tb_start = 0;
|
|
+
|
|
+ /* Start kernel timer for this cpu to periodically update
|
|
+ * the virtual counters.
|
|
+ */
|
|
+ init_timer(&pmc5_6_update[cpu_num]);
|
|
+ pmc5_6_update[cpu_num].function = pmc5_6_updater;
|
|
+ pmc5_6_update[cpu_num].data = (unsigned long) cpu_num;
|
|
+ pmc5_6_update[cpu_num].expires = jiffies + update_time;
|
|
+ /* context for this timer, timer will be removed if context
|
|
+ * is switched because the counters will be stopped first.
|
|
+ * NEEDS WORK, I think this is all ok, a little concerned about a
|
|
+ * race between the kernel timer going off right as the counters
|
|
+ * are being stopped and the context switching. Need to think
|
|
+ * about this.
|
|
+ */
|
|
+ pmc5_6_ctx_arch[cpu_num] = ctx_arch;
|
|
+ add_timer(&pmc5_6_update[cpu_num]);
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_power6_disable_counters
|
|
+ *
|
|
+ **/
|
|
+static void pfm_power6_disable_counters(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set)
|
|
+{
|
|
+ struct pfm_arch_context *ctx_arch;
|
|
+ int cpu_num = smp_processor_id();
|
|
+
|
|
+ disable_cntrs_cnt++;
|
|
+
|
|
+ /* Set the Freeze Counters bit */
|
|
+ mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_FC);
|
|
+ asm volatile ("sync");
|
|
+
|
|
+ /* delete kernel update timer */
|
|
+ del_timer_sync(&pmc5_6_update[cpu_num]);
|
|
+
|
|
+ /* Update the virtual pmd 5 and 6 counters from the free running
|
|
+ * HW counters
|
|
+ */
|
|
+ ctx_arch = pfm_ctx_arch(ctx);
|
|
+ delta(cpu_num, ctx_arch);
|
|
+
|
|
+ ctx_arch->delta_tb +=
|
|
+ (((u64)mfspr(SPRN_TBRU) << 32) | mfspr(SPRN_TBRL))
|
|
+ - ctx_arch->delta_tb_start;
|
|
+
|
|
+ ctx_arch->delta_purr += mfspr(SPRN_PURR)
|
|
+ - ctx_arch->delta_purr_start;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_power6_get_ovfl_pmds
|
|
+ *
|
|
+ * Determine which counters in this set have overflowed and fill in the
|
|
+ * set->povfl_pmds mask and set->npend_ovfls count.
|
|
+ **/
|
|
+static void pfm_power6_get_ovfl_pmds(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set)
|
|
+{
|
|
+ unsigned int i;
|
|
+ unsigned int first_intr_pmd = ctx->regs.first_intr_pmd;
|
|
+ unsigned int max_intr_pmd = ctx->regs.max_intr_pmd;
|
|
+ u64 *used_pmds = set->used_pmds;
|
|
+ u64 *cntr_pmds = ctx->regs.cnt_pmds;
|
|
+ u64 width_mask = 1 << pfm_pmu_conf->counter_width;
|
|
+ u64 new_val, mask[PFM_PMD_BV];
|
|
+
|
|
+ bitmap_and(cast_ulp(mask), cast_ulp(cntr_pmds), cast_ulp(used_pmds), max_intr_pmd);
|
|
+
|
|
+ /* max_intr_pmd is actually the last interrupting pmd register + 1 */
|
|
+ for (i = first_intr_pmd; i < max_intr_pmd; i++) {
|
|
+ if (test_bit(i, mask)) {
|
|
+ new_val = pfm_power6_read_pmd(i);
|
|
+ if (new_val & width_mask) {
|
|
+ set_bit(i, set->povfl_pmds);
|
|
+ set->npend_ovfls++;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+static void pfm_power6_irq_handler(struct pt_regs *regs,
|
|
+ struct pfm_context *ctx)
|
|
+{
|
|
+ u32 mmcr0;
|
|
+ u64 mmcra;
|
|
+
|
|
+ /* Disable the counters (set the freeze bit) to not polute
|
|
+ * the counts.
|
|
+ */
|
|
+ mmcr0 = mfspr(SPRN_MMCR0);
|
|
+ mtspr(SPRN_MMCR0, (mmcr0 | MMCR0_FC));
|
|
+ mmcra = mfspr(SPRN_MMCRA);
|
|
+
|
|
+ /* Set the PMM bit (see comment below). */
|
|
+ mtmsrd(mfmsr() | MSR_PMM);
|
|
+
|
|
+ pm1_4_interrupt++;
|
|
+
|
|
+ pfm_interrupt_handler(instruction_pointer(regs), regs);
|
|
+
|
|
+ mmcr0 = mfspr(SPRN_MMCR0);
|
|
+
|
|
+ /*
|
|
+ * Reset the perfmon trigger if
|
|
+ * not in masking mode.
|
|
+ */
|
|
+ if (ctx->state != PFM_CTX_MASKED)
|
|
+ mmcr0 |= MMCR0_PMXE;
|
|
+
|
|
+ /*
|
|
+ * Clear the PMU Alert Occurred bit
|
|
+ */
|
|
+ mmcr0 &= ~MMCR0_PMAO;
|
|
+
|
|
+ /* Clear the appropriate bits in the MMCRA. */
|
|
+ mmcra &= ~(POWER6_MMCRA_THRM | POWER6_MMCRA_OTHER);
|
|
+ mtspr(SPRN_MMCRA, mmcra);
|
|
+
|
|
+ /*
|
|
+ * Now clear the freeze bit, counting will not start until we
|
|
+ * rfid from this exception, because only at that point will
|
|
+ * the PMM bit be cleared.
|
|
+ */
|
|
+ mmcr0 &= ~MMCR0_FC;
|
|
+ mtspr(SPRN_MMCR0, mmcr0);
|
|
+}
|
|
+
|
|
+static void pfm_power6_resend_irq(struct pfm_context *ctx)
|
|
+{
|
|
+ /*
|
|
+ * Assert the PMAO bit to cause a PMU interrupt. Make sure we
|
|
+ * trigger the edge detection circuitry for PMAO
|
|
+ */
|
|
+ mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_PMAO);
|
|
+ mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_PMAO);
|
|
+}
|
|
+
|
|
+struct pfm_arch_pmu_info pfm_power6_pmu_info = {
|
|
+ .pmu_style = PFM_POWERPC_PMU_POWER6,
|
|
+ .write_pmc = pfm_power6_write_pmc,
|
|
+ .write_pmd = pfm_power6_write_pmd,
|
|
+ .read_pmd = pfm_power6_read_pmd,
|
|
+ .irq_handler = pfm_power6_irq_handler,
|
|
+ .get_ovfl_pmds = pfm_power6_get_ovfl_pmds,
|
|
+ .enable_counters = pfm_power6_enable_counters,
|
|
+ .disable_counters = pfm_power6_disable_counters,
|
|
+ .resend_irq = pfm_power6_resend_irq
|
|
+};
|
|
+
|
|
+/*
|
|
+ * impl_pmcs, impl_pmds are computed at runtime to minimize errors!
|
|
+ */
|
|
+static struct pfm_pmu_config pfm_power6_pmu_conf = {
|
|
+ .pmu_name = "POWER6",
|
|
+ .counter_width = 31,
|
|
+ .pmd_desc = pfm_power6_pmd_desc,
|
|
+ .pmc_desc = pfm_power6_pmc_desc,
|
|
+ .num_pmc_entries = PFM_PM_NUM_PMCS,
|
|
+ .num_pmd_entries = PFM_PM_NUM_PMDS,
|
|
+ .probe_pmu = pfm_power6_probe_pmu,
|
|
+ .pmu_info = &pfm_power6_pmu_info,
|
|
+ .pmd_sread = pfm_power6_sread,
|
|
+ .pmd_swrite = pfm_power6_swrite,
|
|
+ .flags = PFM_PMU_BUILTIN_FLAG,
|
|
+ .owner = THIS_MODULE
|
|
+};
|
|
+
|
|
+static int __init pfm_power6_pmu_init_module(void)
|
|
+{
|
|
+ int ret;
|
|
+ disable_cntrs_cnt = 0;
|
|
+ enable_cntrs_cnt = 0;
|
|
+ call_delta = 0;
|
|
+ pm5_6_interrupt = 0;
|
|
+ pm1_4_interrupt = 0;
|
|
+
|
|
+ /* calculate the time for updating counters 5 and 6 */
|
|
+
|
|
+ /*
|
|
+ * MAX_EVENT_RATE assumes a max instruction issue rate of 2
|
|
+ * instructions per clock cycle. Experience shows that this factor
|
|
+ * of 2 is more than adequate.
|
|
+ */
|
|
+
|
|
+# define MAX_EVENT_RATE (ppc_proc_freq * 2)
|
|
+
|
|
+ /*
|
|
+ * Calculate the time, in jiffies, it takes for event counter 5 or
|
|
+ * 6 to completely wrap when counting at the max event rate, and
|
|
+ * then figure on sampling at twice that rate.
|
|
+ */
|
|
+ update_time = (((unsigned long)HZ * OVERFLOW_VALUE)
|
|
+ / ((unsigned long)MAX_EVENT_RATE)) / 2;
|
|
+
|
|
+ ret = pfm_pmu_register(&pfm_power6_pmu_conf);
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static void __exit pfm_power6_pmu_cleanup_module(void)
|
|
+{
|
|
+ pfm_pmu_unregister(&pfm_power6_pmu_conf);
|
|
+}
|
|
+
|
|
+module_init(pfm_power6_pmu_init_module);
|
|
+module_exit(pfm_power6_pmu_cleanup_module);
|
|
diff --git a/arch/powerpc/perfmon/perfmon_ppc32.c b/arch/powerpc/perfmon/perfmon_ppc32.c
|
|
new file mode 100644
|
|
index 0000000..76f0b84
|
|
--- /dev/null
|
|
+++ b/arch/powerpc/perfmon/perfmon_ppc32.c
|
|
@@ -0,0 +1,340 @@
|
|
+/*
|
|
+ * This file contains the PPC32 PMU register description tables
|
|
+ * and pmc checker used by perfmon.c.
|
|
+ *
|
|
+ * Philip Mucci, mucci@cs.utk.edu
|
|
+ *
|
|
+ * Based on code from:
|
|
+ * Copyright (c) 2005 David Gibson, IBM Corporation.
|
|
+ *
|
|
+ * Based on perfmon_p6.c:
|
|
+ * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P.
|
|
+ * Contributed by Stephane Eranian <eranian@hpl.hp.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of version 2 of the GNU General Public
|
|
+ * License as published by the Free Software Foundation.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ * General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
+ * 02111-1307 USA
|
|
+ */
|
|
+#include <linux/module.h>
|
|
+#include <linux/perfmon_kern.h>
|
|
+#include <asm/reg.h>
|
|
+
|
|
+MODULE_AUTHOR("Philip Mucci <mucci@cs.utk.edu>");
|
|
+MODULE_DESCRIPTION("PPC32 PMU description table");
|
|
+MODULE_LICENSE("GPL");
|
|
+
|
|
+static struct pfm_pmu_config pfm_ppc32_pmu_conf;
|
|
+
|
|
+static struct pfm_regmap_desc pfm_ppc32_pmc_desc[] = {
|
|
+/* mmcr0 */ PMC_D(PFM_REG_I, "MMCR0", 0x0, 0, 0, SPRN_MMCR0),
|
|
+/* mmcr1 */ PMC_D(PFM_REG_I, "MMCR1", 0x0, 0, 0, SPRN_MMCR1),
|
|
+/* mmcr2 */ PMC_D(PFM_REG_I, "MMCR2", 0x0, 0, 0, SPRN_MMCR2),
|
|
+};
|
|
+#define PFM_PM_NUM_PMCS ARRAY_SIZE(pfm_ppc32_pmc_desc)
|
|
+
|
|
+static struct pfm_regmap_desc pfm_ppc32_pmd_desc[] = {
|
|
+/* pmd0 */ PMD_D(PFM_REG_C, "PMC1", SPRN_PMC1),
|
|
+/* pmd1 */ PMD_D(PFM_REG_C, "PMC2", SPRN_PMC2),
|
|
+/* pmd2 */ PMD_D(PFM_REG_C, "PMC3", SPRN_PMC3),
|
|
+/* pmd3 */ PMD_D(PFM_REG_C, "PMC4", SPRN_PMC4),
|
|
+/* pmd4 */ PMD_D(PFM_REG_C, "PMC5", SPRN_PMC5),
|
|
+/* pmd5 */ PMD_D(PFM_REG_C, "PMC6", SPRN_PMC6),
|
|
+};
|
|
+#define PFM_PM_NUM_PMDS ARRAY_SIZE(pfm_ppc32_pmd_desc)
|
|
+
|
|
+static void perfmon_perf_irq(struct pt_regs *regs)
|
|
+{
|
|
+ u32 mmcr0;
|
|
+
|
|
+ /* BLATANTLY STOLEN FROM OPROFILE, then modified */
|
|
+
|
|
+ /* set the PMM bit (see comment below) */
|
|
+ mtmsr(mfmsr() | MSR_PMM);
|
|
+
|
|
+ pfm_interrupt_handler(instruction_pointer(regs), regs);
|
|
+
|
|
+ /* The freeze bit was set by the interrupt.
|
|
+ * Clear the freeze bit, and reenable the interrupt.
|
|
+ * The counters won't actually start until the rfi clears
|
|
+ * the PMM bit.
|
|
+ */
|
|
+
|
|
+ /* Unfreezes the counters on this CPU, enables the interrupt,
|
|
+ * enables the counters to trigger the interrupt, and sets the
|
|
+ * counters to only count when the mark bit is not set.
|
|
+ */
|
|
+ mmcr0 = mfspr(SPRN_MMCR0);
|
|
+
|
|
+ mmcr0 &= ~(MMCR0_FC | MMCR0_FCM0);
|
|
+ mmcr0 |= (MMCR0_FCECE | MMCR0_PMC1CE | MMCR0_PMCnCE | MMCR0_PMXE);
|
|
+
|
|
+ mtspr(SPRN_MMCR0, mmcr0);
|
|
+}
|
|
+
|
|
+static int pfm_ppc32_probe_pmu(void)
|
|
+{
|
|
+ enum ppc32_pmu_type pm_type;
|
|
+ int nmmcr = 0, npmds = 0, intsok = 0, i;
|
|
+ unsigned int pvr;
|
|
+ char *str;
|
|
+
|
|
+ pvr = mfspr(SPRN_PVR);
|
|
+
|
|
+ switch (PVR_VER(pvr)) {
|
|
+ case 0x0004: /* 604 */
|
|
+ str = "PPC604";
|
|
+ pm_type = PFM_POWERPC_PMU_604;
|
|
+ nmmcr = 1;
|
|
+ npmds = 2;
|
|
+ break;
|
|
+ case 0x0009: /* 604e; */
|
|
+ case 0x000A: /* 604ev */
|
|
+ str = "PPC604e";
|
|
+ pm_type = PFM_POWERPC_PMU_604e;
|
|
+ nmmcr = 2;
|
|
+ npmds = 4;
|
|
+ break;
|
|
+ case 0x0008: /* 750/740 */
|
|
+ str = "PPC750";
|
|
+ pm_type = PFM_POWERPC_PMU_750;
|
|
+ nmmcr = 2;
|
|
+ npmds = 4;
|
|
+ break;
|
|
+ case 0x7000: /* 750FX */
|
|
+ case 0x7001:
|
|
+ str = "PPC750";
|
|
+ pm_type = PFM_POWERPC_PMU_750;
|
|
+ nmmcr = 2;
|
|
+ npmds = 4;
|
|
+ if ((pvr & 0xFF0F) >= 0x0203)
|
|
+ intsok = 1;
|
|
+ break;
|
|
+ case 0x7002: /* 750GX */
|
|
+ str = "PPC750";
|
|
+ pm_type = PFM_POWERPC_PMU_750;
|
|
+ nmmcr = 2;
|
|
+ npmds = 4;
|
|
+ intsok = 1;
|
|
+ case 0x000C: /* 7400 */
|
|
+ str = "PPC7400";
|
|
+ pm_type = PFM_POWERPC_PMU_7400;
|
|
+ nmmcr = 3;
|
|
+ npmds = 4;
|
|
+ break;
|
|
+ case 0x800C: /* 7410 */
|
|
+ str = "PPC7410";
|
|
+ pm_type = PFM_POWERPC_PMU_7400;
|
|
+ nmmcr = 3;
|
|
+ npmds = 4;
|
|
+ if ((pvr & 0xFFFF) >= 0x01103)
|
|
+ intsok = 1;
|
|
+ break;
|
|
+ case 0x8000: /* 7451/7441 */
|
|
+ case 0x8001: /* 7455/7445 */
|
|
+ case 0x8002: /* 7457/7447 */
|
|
+ case 0x8003: /* 7447A */
|
|
+ case 0x8004: /* 7448 */
|
|
+ str = "PPC7450";
|
|
+ pm_type = PFM_POWERPC_PMU_7450;
|
|
+ nmmcr = 3; npmds = 6;
|
|
+ intsok = 1;
|
|
+ break;
|
|
+ default:
|
|
+ PFM_INFO("Unknown PVR_VER(0x%x)\n", PVR_VER(pvr));
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * deconfigure unimplemented registers
|
|
+ */
|
|
+ for (i = npmds; i < PFM_PM_NUM_PMDS; i++)
|
|
+ pfm_ppc32_pmd_desc[i].type = PFM_REG_NA;
|
|
+
|
|
+ for (i = nmmcr; i < PFM_PM_NUM_PMCS; i++)
|
|
+ pfm_ppc32_pmc_desc[i].type = PFM_REG_NA;
|
|
+
|
|
+ /*
|
|
+ * update PMU description structure
|
|
+ */
|
|
+ pfm_ppc32_pmu_conf.pmu_name = str;
|
|
+ pfm_ppc32_pmu_info.pmu_style = pm_type;
|
|
+ pfm_ppc32_pmu_conf.num_pmc_entries = nmmcr;
|
|
+ pfm_ppc32_pmu_conf.num_pmd_entries = npmds;
|
|
+
|
|
+ if (intsok == 0)
|
|
+ PFM_INFO("Interrupts unlikely to work\n");
|
|
+
|
|
+ return reserve_pmc_hardware(perfmon_perf_irq);
|
|
+}
|
|
+
|
|
+static void pfm_ppc32_write_pmc(unsigned int cnum, u64 value)
|
|
+{
|
|
+ switch (pfm_pmu_conf->pmc_desc[cnum].hw_addr) {
|
|
+ case SPRN_MMCR0:
|
|
+ mtspr(SPRN_MMCR0, value);
|
|
+ break;
|
|
+ case SPRN_MMCR1:
|
|
+ mtspr(SPRN_MMCR1, value);
|
|
+ break;
|
|
+ case SPRN_MMCR2:
|
|
+ mtspr(SPRN_MMCR2, value);
|
|
+ break;
|
|
+ default:
|
|
+ BUG();
|
|
+ }
|
|
+}
|
|
+
|
|
+static void pfm_ppc32_write_pmd(unsigned int cnum, u64 value)
|
|
+{
|
|
+ switch (pfm_pmu_conf->pmd_desc[cnum].hw_addr) {
|
|
+ case SPRN_PMC1:
|
|
+ mtspr(SPRN_PMC1, value);
|
|
+ break;
|
|
+ case SPRN_PMC2:
|
|
+ mtspr(SPRN_PMC2, value);
|
|
+ break;
|
|
+ case SPRN_PMC3:
|
|
+ mtspr(SPRN_PMC3, value);
|
|
+ break;
|
|
+ case SPRN_PMC4:
|
|
+ mtspr(SPRN_PMC4, value);
|
|
+ break;
|
|
+ case SPRN_PMC5:
|
|
+ mtspr(SPRN_PMC5, value);
|
|
+ break;
|
|
+ case SPRN_PMC6:
|
|
+ mtspr(SPRN_PMC6, value);
|
|
+ break;
|
|
+ default:
|
|
+ BUG();
|
|
+ }
|
|
+}
|
|
+
|
|
+static u64 pfm_ppc32_read_pmd(unsigned int cnum)
|
|
+{
|
|
+ switch (pfm_pmu_conf->pmd_desc[cnum].hw_addr) {
|
|
+ case SPRN_PMC1:
|
|
+ return mfspr(SPRN_PMC1);
|
|
+ case SPRN_PMC2:
|
|
+ return mfspr(SPRN_PMC2);
|
|
+ case SPRN_PMC3:
|
|
+ return mfspr(SPRN_PMC3);
|
|
+ case SPRN_PMC4:
|
|
+ return mfspr(SPRN_PMC4);
|
|
+ case SPRN_PMC5:
|
|
+ return mfspr(SPRN_PMC5);
|
|
+ case SPRN_PMC6:
|
|
+ return mfspr(SPRN_PMC6);
|
|
+ default:
|
|
+ BUG();
|
|
+ }
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_ppc32_enable_counters
|
|
+ *
|
|
+ * Just need to load the current values into the control registers.
|
|
+ **/
|
|
+static void pfm_ppc32_enable_counters(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set)
|
|
+{
|
|
+ unsigned int i, max_pmc;
|
|
+
|
|
+ max_pmc = pfm_pmu_conf->regs.max_pmc;
|
|
+
|
|
+ for (i = 0; i < max_pmc; i++)
|
|
+ if (test_bit(i, set->used_pmcs))
|
|
+ pfm_ppc32_write_pmc(i, set->pmcs[i]);
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_ppc32_disable_counters
|
|
+ *
|
|
+ * Just need to zero all the control registers.
|
|
+ **/
|
|
+static void pfm_ppc32_disable_counters(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set)
|
|
+{
|
|
+ unsigned int i, max;
|
|
+
|
|
+ max = pfm_pmu_conf->regs.max_pmc;
|
|
+
|
|
+ for (i = 0; i < max; i++)
|
|
+ if (test_bit(i, set->used_pmcs))
|
|
+ pfm_ppc32_write_pmc(ctx, 0);
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_ppc32_get_ovfl_pmds
|
|
+ *
|
|
+ * Determine which counters in this set have overflowed and fill in the
|
|
+ * set->povfl_pmds mask and set->npend_ovfls count.
|
|
+ **/
|
|
+static void pfm_ppc32_get_ovfl_pmds(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set)
|
|
+{
|
|
+ unsigned int i;
|
|
+ unsigned int max_pmd = pfm_pmu_conf->regs.max_cnt_pmd;
|
|
+ u64 *used_pmds = set->used_pmds;
|
|
+ u64 *cntr_pmds = pfm_pmu_conf->regs.cnt_pmds;
|
|
+ u64 width_mask = 1 << pfm_pmu_conf->counter_width;
|
|
+ u64 new_val, mask[PFM_PMD_BV];
|
|
+
|
|
+ bitmap_and(cast_ulp(mask), cast_ulp(cntr_pmds),
|
|
+ cast_ulp(used_pmds), max_pmd);
|
|
+
|
|
+ for (i = 0; i < max_pmd; i++) {
|
|
+ if (test_bit(i, mask)) {
|
|
+ new_val = pfm_ppc32_read_pmd(i);
|
|
+ if (new_val & width_mask) {
|
|
+ set_bit(i, set->povfl_pmds);
|
|
+ set->npend_ovfls++;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+struct pfm_arch_pmu_info pfm_ppc32_pmu_info = {
|
|
+ .pmu_style = PFM_POWERPC_PMU_NONE,
|
|
+ .write_pmc = pfm_ppc32_write_pmc,
|
|
+ .write_pmd = pfm_ppc32_write_pmd,
|
|
+ .read_pmd = pfm_ppc32_read_pmd,
|
|
+ .get_ovfl_pmds = pfm_ppc32_get_ovfl_pmds,
|
|
+ .enable_counters = pfm_ppc32_enable_counters,
|
|
+ .disable_counters = pfm_ppc32_disable_counters,
|
|
+};
|
|
+
|
|
+static struct pfm_pmu_config pfm_ppc32_pmu_conf = {
|
|
+ .counter_width = 31,
|
|
+ .pmd_desc = pfm_ppc32_pmd_desc,
|
|
+ .pmc_desc = pfm_ppc32_pmc_desc,
|
|
+ .probe_pmu = pfm_ppc32_probe_pmu,
|
|
+ .flags = PFM_PMU_BUILTIN_FLAG,
|
|
+ .owner = THIS_MODULE,
|
|
+ .version = "0.1",
|
|
+ .arch_info = &pfm_ppc32_pmu_info,
|
|
+};
|
|
+
|
|
+static int __init pfm_ppc32_pmu_init_module(void)
|
|
+{
|
|
+ return pfm_pmu_register(&pfm_ppc32_pmu_conf);
|
|
+}
|
|
+
|
|
+static void __exit pfm_ppc32_pmu_cleanup_module(void)
|
|
+{
|
|
+ release_pmc_hardware();
|
|
+ pfm_pmu_unregister(&pfm_ppc32_pmu_conf);
|
|
+}
|
|
+
|
|
+module_init(pfm_ppc32_pmu_init_module);
|
|
+module_exit(pfm_ppc32_pmu_cleanup_module);
|
|
diff --git a/arch/powerpc/platforms/cell/cbe_regs.c b/arch/powerpc/platforms/cell/cbe_regs.c
|
|
index dbc338f..e24320e 100644
|
|
--- a/arch/powerpc/platforms/cell/cbe_regs.c
|
|
+++ b/arch/powerpc/platforms/cell/cbe_regs.c
|
|
@@ -33,6 +33,7 @@ static struct cbe_regs_map
|
|
struct cbe_iic_regs __iomem *iic_regs;
|
|
struct cbe_mic_tm_regs __iomem *mic_tm_regs;
|
|
struct cbe_pmd_shadow_regs pmd_shadow_regs;
|
|
+ struct cbe_ppe_priv_regs __iomem *ppe_priv_regs;
|
|
} cbe_regs_maps[MAX_CBE];
|
|
static int cbe_regs_map_count;
|
|
|
|
@@ -145,6 +146,23 @@ struct cbe_mic_tm_regs __iomem *cbe_get_cpu_mic_tm_regs(int cpu)
|
|
}
|
|
EXPORT_SYMBOL_GPL(cbe_get_cpu_mic_tm_regs);
|
|
|
|
+struct cbe_ppe_priv_regs __iomem *cbe_get_ppe_priv_regs(struct device_node *np)
|
|
+{
|
|
+ struct cbe_regs_map *map = cbe_find_map(np);
|
|
+ if (map == NULL)
|
|
+ return NULL;
|
|
+ return map->ppe_priv_regs;
|
|
+}
|
|
+
|
|
+struct cbe_ppe_priv_regs __iomem *cbe_get_cpu_ppe_priv_regs(int cpu)
|
|
+{
|
|
+ struct cbe_regs_map *map = cbe_thread_map[cpu].regs;
|
|
+ if (map == NULL)
|
|
+ return NULL;
|
|
+ return map->ppe_priv_regs;
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(cbe_get_cpu_ppe_priv_regs);
|
|
+
|
|
u32 cbe_get_hw_thread_id(int cpu)
|
|
{
|
|
return cbe_thread_map[cpu].thread_id;
|
|
@@ -206,6 +224,11 @@ void __init cbe_fill_regs_map(struct cbe_regs_map *map)
|
|
for_each_node_by_type(np, "mic-tm")
|
|
if (of_get_parent(np) == be)
|
|
map->mic_tm_regs = of_iomap(np, 0);
|
|
+
|
|
+ for_each_node_by_type(np, "ppe-mmio")
|
|
+ if (of_get_parent(np) == be)
|
|
+ map->ppe_priv_regs = of_iomap(np, 0);
|
|
+
|
|
} else {
|
|
struct device_node *cpu;
|
|
/* That hack must die die die ! */
|
|
@@ -227,6 +250,10 @@ void __init cbe_fill_regs_map(struct cbe_regs_map *map)
|
|
prop = of_get_property(cpu, "mic-tm", NULL);
|
|
if (prop != NULL)
|
|
map->mic_tm_regs = ioremap(prop->address, prop->len);
|
|
+
|
|
+ prop = of_get_property(cpu, "ppe-mmio", NULL);
|
|
+ if (prop != NULL)
|
|
+ map->ppe_priv_regs = ioremap(prop->address, prop->len);
|
|
}
|
|
}
|
|
|
|
diff --git a/arch/sparc/include/asm/hypervisor.h b/arch/sparc/include/asm/hypervisor.h
|
|
index 109ae24..bafe5a6 100644
|
|
--- a/arch/sparc/include/asm/hypervisor.h
|
|
+++ b/arch/sparc/include/asm/hypervisor.h
|
|
@@ -2713,6 +2713,30 @@ extern unsigned long sun4v_ldc_revoke(unsigned long channel,
|
|
*/
|
|
#define HV_FAST_SET_PERFREG 0x101
|
|
|
|
+#define HV_N2_PERF_SPARC_CTL 0x0
|
|
+#define HV_N2_PERF_DRAM_CTL0 0x1
|
|
+#define HV_N2_PERF_DRAM_CNT0 0x2
|
|
+#define HV_N2_PERF_DRAM_CTL1 0x3
|
|
+#define HV_N2_PERF_DRAM_CNT1 0x4
|
|
+#define HV_N2_PERF_DRAM_CTL2 0x5
|
|
+#define HV_N2_PERF_DRAM_CNT2 0x6
|
|
+#define HV_N2_PERF_DRAM_CTL3 0x7
|
|
+#define HV_N2_PERF_DRAM_CNT3 0x8
|
|
+
|
|
+#define HV_FAST_N2_GET_PERFREG 0x104
|
|
+#define HV_FAST_N2_SET_PERFREG 0x105
|
|
+
|
|
+#ifndef __ASSEMBLY__
|
|
+extern unsigned long sun4v_niagara_getperf(unsigned long reg,
|
|
+ unsigned long *val);
|
|
+extern unsigned long sun4v_niagara_setperf(unsigned long reg,
|
|
+ unsigned long val);
|
|
+extern unsigned long sun4v_niagara2_getperf(unsigned long reg,
|
|
+ unsigned long *val);
|
|
+extern unsigned long sun4v_niagara2_setperf(unsigned long reg,
|
|
+ unsigned long val);
|
|
+#endif
|
|
+
|
|
/* MMU statistics services.
|
|
*
|
|
* The hypervisor maintains MMU statistics and privileged code provides
|
|
diff --git a/arch/sparc/include/asm/irq_64.h b/arch/sparc/include/asm/irq_64.h
|
|
index e3dd930..6cf3aec 100644
|
|
--- a/arch/sparc/include/asm/irq_64.h
|
|
+++ b/arch/sparc/include/asm/irq_64.h
|
|
@@ -67,6 +67,9 @@ extern void virt_irq_free(unsigned int virt_irq);
|
|
extern void __init init_IRQ(void);
|
|
extern void fixup_irqs(void);
|
|
|
|
+extern int register_perfctr_intr(void (*handler)(struct pt_regs *));
|
|
+extern void release_perfctr_intr(void (*handler)(struct pt_regs *));
|
|
+
|
|
static inline void set_softint(unsigned long bits)
|
|
{
|
|
__asm__ __volatile__("wr %0, 0x0, %%set_softint"
|
|
diff --git a/arch/sparc/include/asm/perfmon.h b/arch/sparc/include/asm/perfmon.h
|
|
new file mode 100644
|
|
index 0000000..f20cbfa
|
|
--- /dev/null
|
|
+++ b/arch/sparc/include/asm/perfmon.h
|
|
@@ -0,0 +1,11 @@
|
|
+#ifndef _SPARC64_PERFMON_H_
|
|
+#define _SPARC64_PERFMON_H_
|
|
+
|
|
+/*
|
|
+ * arch-specific user visible interface definitions
|
|
+ */
|
|
+
|
|
+#define PFM_ARCH_MAX_PMCS 2
|
|
+#define PFM_ARCH_MAX_PMDS 3
|
|
+
|
|
+#endif /* _SPARC64_PERFMON_H_ */
|
|
diff --git a/arch/sparc/include/asm/perfmon_kern.h b/arch/sparc/include/asm/perfmon_kern.h
|
|
new file mode 100644
|
|
index 0000000..033eff5
|
|
--- /dev/null
|
|
+++ b/arch/sparc/include/asm/perfmon_kern.h
|
|
@@ -0,0 +1,286 @@
|
|
+#ifndef _SPARC64_PERFMON_KERN_H_
|
|
+#define _SPARC64_PERFMON_KERN_H_
|
|
+
|
|
+#ifdef __KERNEL__
|
|
+
|
|
+#ifdef CONFIG_PERFMON
|
|
+
|
|
+#include <linux/irq.h>
|
|
+#include <asm/system.h>
|
|
+
|
|
+#define PFM_ARCH_PMD_STK_ARG 2
|
|
+#define PFM_ARCH_PMC_STK_ARG 1
|
|
+
|
|
+struct pfm_arch_pmu_info {
|
|
+ u32 pmu_style;
|
|
+};
|
|
+
|
|
+static inline void pfm_arch_resend_irq(struct pfm_context *ctx)
|
|
+{
|
|
+}
|
|
+
|
|
+static inline void pfm_arch_clear_pmd_ovfl_cond(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set)
|
|
+{}
|
|
+
|
|
+static inline void pfm_arch_serialize(void)
|
|
+{
|
|
+}
|
|
+
|
|
+/*
|
|
+ * SPARC does not save the PMDs during pfm_arch_intr_freeze_pmu(), thus
|
|
+ * this routine needs to do it when switching sets on overflow
|
|
+ */
|
|
+static inline void pfm_arch_save_pmds_from_intr(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set)
|
|
+{
|
|
+ pfm_save_pmds(ctx, set);
|
|
+}
|
|
+
|
|
+extern void pfm_arch_write_pmc(struct pfm_context *ctx,
|
|
+ unsigned int cnum, u64 value);
|
|
+extern u64 pfm_arch_read_pmc(struct pfm_context *ctx, unsigned int cnum);
|
|
+
|
|
+static inline void pfm_arch_write_pmd(struct pfm_context *ctx,
|
|
+ unsigned int cnum, u64 value)
|
|
+{
|
|
+ u64 pic;
|
|
+
|
|
+ value &= pfm_pmu_conf->ovfl_mask;
|
|
+
|
|
+ read_pic(pic);
|
|
+
|
|
+ switch (cnum) {
|
|
+ case 0:
|
|
+ pic = (pic & 0xffffffff00000000UL) |
|
|
+ (value & 0xffffffffUL);
|
|
+ break;
|
|
+ case 1:
|
|
+ pic = (pic & 0xffffffffUL) |
|
|
+ (value << 32UL);
|
|
+ break;
|
|
+ default:
|
|
+ BUG();
|
|
+ }
|
|
+
|
|
+ write_pic(pic);
|
|
+}
|
|
+
|
|
+static inline u64 pfm_arch_read_pmd(struct pfm_context *ctx,
|
|
+ unsigned int cnum)
|
|
+{
|
|
+ u64 pic;
|
|
+
|
|
+ read_pic(pic);
|
|
+
|
|
+ switch (cnum) {
|
|
+ case 0:
|
|
+ return pic & 0xffffffffUL;
|
|
+ case 1:
|
|
+ return pic >> 32UL;
|
|
+ default:
|
|
+ BUG();
|
|
+ return 0;
|
|
+ }
|
|
+}
|
|
+
|
|
+/*
|
|
+ * For some CPUs, the upper bits of a counter must be set in order for the
|
|
+ * overflow interrupt to happen. On overflow, the counter has wrapped around,
|
|
+ * and the upper bits are cleared. This function may be used to set them back.
|
|
+ */
|
|
+static inline void pfm_arch_ovfl_reset_pmd(struct pfm_context *ctx,
|
|
+ unsigned int cnum)
|
|
+{
|
|
+ u64 val = pfm_arch_read_pmd(ctx, cnum);
|
|
+
|
|
+ /* This masks out overflow bit 31 */
|
|
+ pfm_arch_write_pmd(ctx, cnum, val);
|
|
+}
|
|
+
|
|
+/*
|
|
+ * At certain points, perfmon needs to know if monitoring has been
|
|
+ * explicitely started/stopped by user via pfm_start/pfm_stop. The
|
|
+ * information is tracked in ctx.flags.started. However on certain
|
|
+ * architectures, it may be possible to start/stop directly from
|
|
+ * user level with a single assembly instruction bypassing
|
|
+ * the kernel. This function must be used to determine by
|
|
+ * an arch-specific mean if monitoring is actually started/stopped.
|
|
+ */
|
|
+static inline int pfm_arch_is_active(struct pfm_context *ctx)
|
|
+{
|
|
+ return ctx->flags.started;
|
|
+}
|
|
+
|
|
+static inline void pfm_arch_ctxswout_sys(struct task_struct *task,
|
|
+ struct pfm_context *ctx)
|
|
+{
|
|
+}
|
|
+
|
|
+static inline void pfm_arch_ctxswin_sys(struct task_struct *task,
|
|
+ struct pfm_context *ctx)
|
|
+{
|
|
+}
|
|
+
|
|
+static inline void pfm_arch_ctxswin_thread(struct task_struct *task,
|
|
+ struct pfm_context *ctx)
|
|
+{
|
|
+}
|
|
+
|
|
+int pfm_arch_is_monitoring_active(struct pfm_context *ctx);
|
|
+int pfm_arch_ctxswout_thread(struct task_struct *task,
|
|
+ struct pfm_context *ctx);
|
|
+void pfm_arch_stop(struct task_struct *task, struct pfm_context *ctx);
|
|
+void pfm_arch_start(struct task_struct *task, struct pfm_context *ctx);
|
|
+void pfm_arch_restore_pmds(struct pfm_context *ctx, struct pfm_event_set *set);
|
|
+void pfm_arch_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set);
|
|
+char *pfm_arch_get_pmu_module_name(void);
|
|
+
|
|
+static inline void pfm_arch_intr_freeze_pmu(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set)
|
|
+{
|
|
+ pfm_arch_stop(current, ctx);
|
|
+ /*
|
|
+ * we mark monitoring as stopped to avoid
|
|
+ * certain side effects especially in
|
|
+ * pfm_switch_sets_from_intr() on
|
|
+ * pfm_arch_restore_pmcs()
|
|
+ */
|
|
+ ctx->flags.started = 0;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * unfreeze PMU from pfm_do_interrupt_handler()
|
|
+ * ctx may be NULL for spurious
|
|
+ */
|
|
+static inline void pfm_arch_intr_unfreeze_pmu(struct pfm_context *ctx)
|
|
+{
|
|
+ if (!ctx)
|
|
+ return;
|
|
+
|
|
+ PFM_DBG_ovfl("state=%d", ctx->state);
|
|
+
|
|
+ ctx->flags.started = 1;
|
|
+
|
|
+ if (ctx->state == PFM_CTX_MASKED)
|
|
+ return;
|
|
+
|
|
+ pfm_arch_restore_pmcs(ctx, ctx->active_set);
|
|
+}
|
|
+
|
|
+/*
|
|
+ * this function is called from the PMU interrupt handler ONLY.
|
|
+ * On SPARC, the PMU is frozen via arch_stop, masking would be implemented
|
|
+ * via arch-stop as well. Given that the PMU is already stopped when
|
|
+ * entering the interrupt handler, we do not need to stop it again, so
|
|
+ * this function is a nop.
|
|
+ */
|
|
+static inline void pfm_arch_mask_monitoring(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set)
|
|
+{
|
|
+}
|
|
+
|
|
+/*
|
|
+ * on MIPS masking/unmasking uses the start/stop mechanism, so we simply
|
|
+ * need to start here.
|
|
+ */
|
|
+static inline void pfm_arch_unmask_monitoring(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set)
|
|
+{
|
|
+ pfm_arch_start(current, ctx);
|
|
+}
|
|
+
|
|
+static inline void pfm_arch_pmu_config_remove(void)
|
|
+{
|
|
+}
|
|
+
|
|
+static inline int pfm_arch_context_create(struct pfm_context *ctx,
|
|
+ u32 ctx_flags)
|
|
+{
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static inline void pfm_arch_context_free(struct pfm_context *ctx)
|
|
+{
|
|
+}
|
|
+
|
|
+/*
|
|
+ * function called from pfm_setfl_sane(). Context is locked
|
|
+ * and interrupts are masked.
|
|
+ * The value of flags is the value of ctx_flags as passed by
|
|
+ * user.
|
|
+ *
|
|
+ * function must check arch-specific set flags.
|
|
+ * Return:
|
|
+ * 1 when flags are valid
|
|
+ * 0 on error
|
|
+ */
|
|
+static inline int pfm_arch_setfl_sane(struct pfm_context *ctx, u32 flags)
|
|
+{
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static inline int pfm_arch_init(void)
|
|
+{
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static inline void pfm_arch_init_percpu(void)
|
|
+{
|
|
+}
|
|
+
|
|
+static inline int pfm_arch_load_context(struct pfm_context *ctx)
|
|
+{
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static inline void pfm_arch_unload_context(struct pfm_context *ctx)
|
|
+{}
|
|
+
|
|
+extern void perfmon_interrupt(struct pt_regs *);
|
|
+
|
|
+static inline int pfm_arch_pmu_acquire(u64 *unavail_pmcs, u64 *unavail_pmds)
|
|
+{
|
|
+ return register_perfctr_intr(perfmon_interrupt);
|
|
+}
|
|
+
|
|
+static inline void pfm_arch_pmu_release(void)
|
|
+{
|
|
+ release_perfctr_intr(perfmon_interrupt);
|
|
+}
|
|
+
|
|
+static inline void pfm_arch_arm_handle_work(struct task_struct *task)
|
|
+{}
|
|
+
|
|
+static inline void pfm_arch_disarm_handle_work(struct task_struct *task)
|
|
+{}
|
|
+
|
|
+static inline int pfm_arch_pmu_config_init(struct pfm_pmu_config *cfg)
|
|
+{
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static inline int pfm_arch_get_base_syscall(void)
|
|
+{
|
|
+ return __NR_pfm_create_context;
|
|
+}
|
|
+
|
|
+struct pfm_arch_context {
|
|
+ /* empty */
|
|
+};
|
|
+
|
|
+#define PFM_ARCH_CTX_SIZE sizeof(struct pfm_arch_context)
|
|
+/*
|
|
+ * SPARC needs extra alignment for the sampling buffer
|
|
+ */
|
|
+#define PFM_ARCH_SMPL_ALIGN_SIZE (16 * 1024)
|
|
+
|
|
+static inline void pfm_cacheflush(void *addr, unsigned int len)
|
|
+{
|
|
+}
|
|
+
|
|
+#endif /* CONFIG_PERFMON */
|
|
+
|
|
+#endif /* __KERNEL__ */
|
|
+
|
|
+#endif /* _SPARC64_PERFMON_KERN_H_ */
|
|
diff --git a/arch/sparc/include/asm/system_64.h b/arch/sparc/include/asm/system_64.h
|
|
index db9e742..2a9ddb9 100644
|
|
--- a/arch/sparc/include/asm/system_64.h
|
|
+++ b/arch/sparc/include/asm/system_64.h
|
|
@@ -30,6 +30,9 @@ enum sparc_cpu {
|
|
#define ARCH_SUN4C_SUN4 0
|
|
#define ARCH_SUN4 0
|
|
|
|
+extern char *sparc_cpu_type;
|
|
+extern char *sparc_fpu_type;
|
|
+extern char *sparc_pmu_type;
|
|
extern char reboot_command[];
|
|
|
|
/* These are here in an effort to more fully work around Spitfire Errata
|
|
@@ -104,15 +107,13 @@ do { __asm__ __volatile__("ba,pt %%xcc, 1f\n\t" \
|
|
#define write_pcr(__p) __asm__ __volatile__("wr %0, 0x0, %%pcr" : : "r" (__p))
|
|
#define read_pic(__p) __asm__ __volatile__("rd %%pic, %0" : "=r" (__p))
|
|
|
|
-/* Blackbird errata workaround. See commentary in
|
|
- * arch/sparc64/kernel/smp.c:smp_percpu_timer_interrupt()
|
|
- * for more information.
|
|
- */
|
|
-#define reset_pic() \
|
|
- __asm__ __volatile__("ba,pt %xcc, 99f\n\t" \
|
|
+/* Blackbird errata workaround. */
|
|
+#define write_pic(val) \
|
|
+ __asm__ __volatile__("ba,pt %%xcc, 99f\n\t" \
|
|
".align 64\n" \
|
|
- "99:wr %g0, 0x0, %pic\n\t" \
|
|
- "rd %pic, %g0")
|
|
+ "99:wr %0, 0x0, %%pic\n\t" \
|
|
+ "rd %%pic, %%g0" : : "r" (val))
|
|
+#define reset_pic() write_pic(0)
|
|
|
|
#ifndef __ASSEMBLY__
|
|
|
|
@@ -145,14 +146,10 @@ do { \
|
|
* and 2 stores in this critical code path. -DaveM
|
|
*/
|
|
#define switch_to(prev, next, last) \
|
|
-do { if (test_thread_flag(TIF_PERFCTR)) { \
|
|
- unsigned long __tmp; \
|
|
- read_pcr(__tmp); \
|
|
- current_thread_info()->pcr_reg = __tmp; \
|
|
- read_pic(__tmp); \
|
|
- current_thread_info()->kernel_cntd0 += (unsigned int)(__tmp);\
|
|
- current_thread_info()->kernel_cntd1 += ((__tmp) >> 32); \
|
|
- } \
|
|
+do { if (test_tsk_thread_flag(prev, TIF_PERFMON_CTXSW)) \
|
|
+ pfm_ctxsw_out(prev, next); \
|
|
+ if (test_tsk_thread_flag(next, TIF_PERFMON_CTXSW)) \
|
|
+ pfm_ctxsw_in(prev, next); \
|
|
flush_tlb_pending(); \
|
|
save_and_clear_fpu(); \
|
|
/* If you are tempted to conditionalize the following */ \
|
|
@@ -197,11 +194,6 @@ do { if (test_thread_flag(TIF_PERFCTR)) { \
|
|
"l1", "l2", "l3", "l4", "l5", "l6", "l7", \
|
|
"i0", "i1", "i2", "i3", "i4", "i5", \
|
|
"o0", "o1", "o2", "o3", "o4", "o5", "o7"); \
|
|
- /* If you fuck with this, update ret_from_syscall code too. */ \
|
|
- if (test_thread_flag(TIF_PERFCTR)) { \
|
|
- write_pcr(current_thread_info()->pcr_reg); \
|
|
- reset_pic(); \
|
|
- } \
|
|
} while(0)
|
|
|
|
static inline unsigned long xchg32(__volatile__ unsigned int *m, unsigned int val)
|
|
diff --git a/arch/sparc/include/asm/thread_info_64.h b/arch/sparc/include/asm/thread_info_64.h
|
|
index c0a737d..53857f7 100644
|
|
--- a/arch/sparc/include/asm/thread_info_64.h
|
|
+++ b/arch/sparc/include/asm/thread_info_64.h
|
|
@@ -58,11 +58,6 @@ struct thread_info {
|
|
unsigned long gsr[7];
|
|
unsigned long xfsr[7];
|
|
|
|
- __u64 __user *user_cntd0;
|
|
- __u64 __user *user_cntd1;
|
|
- __u64 kernel_cntd0, kernel_cntd1;
|
|
- __u64 pcr_reg;
|
|
-
|
|
struct restart_block restart_block;
|
|
|
|
struct pt_regs *kern_una_regs;
|
|
@@ -96,15 +91,10 @@ struct thread_info {
|
|
#define TI_RWIN_SPTRS 0x000003c8
|
|
#define TI_GSR 0x00000400
|
|
#define TI_XFSR 0x00000438
|
|
-#define TI_USER_CNTD0 0x00000470
|
|
-#define TI_USER_CNTD1 0x00000478
|
|
-#define TI_KERN_CNTD0 0x00000480
|
|
-#define TI_KERN_CNTD1 0x00000488
|
|
-#define TI_PCR 0x00000490
|
|
-#define TI_RESTART_BLOCK 0x00000498
|
|
-#define TI_KUNA_REGS 0x000004c0
|
|
-#define TI_KUNA_INSN 0x000004c8
|
|
-#define TI_FPREGS 0x00000500
|
|
+#define TI_RESTART_BLOCK 0x00000470
|
|
+#define TI_KUNA_REGS 0x00000498
|
|
+#define TI_KUNA_INSN 0x000004a0
|
|
+#define TI_FPREGS 0x000004c0
|
|
|
|
/* We embed this in the uppermost byte of thread_info->flags */
|
|
#define FAULT_CODE_WRITE 0x01 /* Write access, implies D-TLB */
|
|
@@ -222,11 +212,11 @@ register struct thread_info *current_thread_info_reg asm("g6");
|
|
#define TIF_NOTIFY_RESUME 1 /* callback before returning to user */
|
|
#define TIF_SIGPENDING 2 /* signal pending */
|
|
#define TIF_NEED_RESCHED 3 /* rescheduling necessary */
|
|
-#define TIF_PERFCTR 4 /* performance counters active */
|
|
+/* Bit 4 is available */
|
|
#define TIF_UNALIGNED 5 /* allowed to do unaligned accesses */
|
|
/* flag bit 6 is available */
|
|
#define TIF_32BIT 7 /* 32-bit binary */
|
|
-/* flag bit 8 is available */
|
|
+#define TIF_PERFMON_WORK 8 /* work for pfm_handle_work() */
|
|
#define TIF_SECCOMP 9 /* secure computing */
|
|
#define TIF_SYSCALL_AUDIT 10 /* syscall auditing active */
|
|
/* flag bit 11 is available */
|
|
@@ -237,22 +227,24 @@ register struct thread_info *current_thread_info_reg asm("g6");
|
|
#define TIF_ABI_PENDING 12
|
|
#define TIF_MEMDIE 13
|
|
#define TIF_POLLING_NRFLAG 14
|
|
+#define TIF_PERFMON_CTXSW 15 /* perfmon needs ctxsw calls */
|
|
|
|
#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
|
|
#define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME)
|
|
#define _TIF_SIGPENDING (1<<TIF_SIGPENDING)
|
|
#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED)
|
|
-#define _TIF_PERFCTR (1<<TIF_PERFCTR)
|
|
#define _TIF_UNALIGNED (1<<TIF_UNALIGNED)
|
|
#define _TIF_32BIT (1<<TIF_32BIT)
|
|
+#define _TIF_PERFMON_WORK (1<<TIF_PERFMON_WORK)
|
|
#define _TIF_SECCOMP (1<<TIF_SECCOMP)
|
|
#define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT)
|
|
#define _TIF_ABI_PENDING (1<<TIF_ABI_PENDING)
|
|
#define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG)
|
|
+#define _TIF_PERFMON_CTXSW (1<<TIF_PERFMON_CTXSW)
|
|
|
|
#define _TIF_USER_WORK_MASK ((0xff << TI_FLAG_WSAVED_SHIFT) | \
|
|
_TIF_DO_NOTIFY_RESUME_MASK | \
|
|
- _TIF_NEED_RESCHED | _TIF_PERFCTR)
|
|
+ _TIF_NEED_RESCHED)
|
|
#define _TIF_DO_NOTIFY_RESUME_MASK (_TIF_NOTIFY_RESUME | _TIF_SIGPENDING)
|
|
|
|
/*
|
|
diff --git a/arch/sparc/include/asm/unistd_32.h b/arch/sparc/include/asm/unistd_32.h
|
|
index 648643a..efe4d86 100644
|
|
--- a/arch/sparc/include/asm/unistd_32.h
|
|
+++ b/arch/sparc/include/asm/unistd_32.h
|
|
@@ -338,8 +338,20 @@
|
|
#define __NR_dup3 320
|
|
#define __NR_pipe2 321
|
|
#define __NR_inotify_init1 322
|
|
+#define __NR_pfm_create_context 323
|
|
+#define __NR_pfm_write_pmcs 324
|
|
+#define __NR_pfm_write_pmds 325
|
|
+#define __NR_pfm_read_pmds 326
|
|
+#define __NR_pfm_load_context 327
|
|
+#define __NR_pfm_start 328
|
|
+#define __NR_pfm_stop 329
|
|
+#define __NR_pfm_restart 330
|
|
+#define __NR_pfm_create_evtsets 331
|
|
+#define __NR_pfm_getinfo_evtsets 332
|
|
+#define __NR_pfm_delete_evtsets 333
|
|
+#define __NR_pfm_unload_context 334
|
|
|
|
-#define NR_SYSCALLS 323
|
|
+#define NR_SYSCALLS 325
|
|
|
|
/* Sparc 32-bit only has the "setresuid32", "getresuid32" variants,
|
|
* it never had the plain ones and there is no value to adding those
|
|
diff --git a/arch/sparc/include/asm/unistd_64.h b/arch/sparc/include/asm/unistd_64.h
|
|
index c5cc0e0..cbbb0b5 100644
|
|
--- a/arch/sparc/include/asm/unistd_64.h
|
|
+++ b/arch/sparc/include/asm/unistd_64.h
|
|
@@ -340,8 +340,20 @@
|
|
#define __NR_dup3 320
|
|
#define __NR_pipe2 321
|
|
#define __NR_inotify_init1 322
|
|
+#define __NR_pfm_create_context 323
|
|
+#define __NR_pfm_write_pmcs 324
|
|
+#define __NR_pfm_write_pmds 325
|
|
+#define __NR_pfm_read_pmds 326
|
|
+#define __NR_pfm_load_context 327
|
|
+#define __NR_pfm_start 328
|
|
+#define __NR_pfm_stop 329
|
|
+#define __NR_pfm_restart 330
|
|
+#define __NR_pfm_create_evtsets 331
|
|
+#define __NR_pfm_getinfo_evtsets 332
|
|
+#define __NR_pfm_delete_evtsets 333
|
|
+#define __NR_pfm_unload_context 334
|
|
|
|
-#define NR_SYSCALLS 323
|
|
+#define NR_SYSCALLS 335
|
|
|
|
#ifdef __KERNEL__
|
|
#define __ARCH_WANT_IPC_PARSE_VERSION
|
|
diff --git a/arch/sparc/kernel/systbls.S b/arch/sparc/kernel/systbls.S
|
|
index e1b9233..727e4e7 100644
|
|
--- a/arch/sparc/kernel/systbls.S
|
|
+++ b/arch/sparc/kernel/systbls.S
|
|
@@ -81,4 +81,6 @@ sys_call_table:
|
|
/*305*/ .long sys_set_mempolicy, sys_kexec_load, sys_move_pages, sys_getcpu, sys_epoll_pwait
|
|
/*310*/ .long sys_utimensat, sys_signalfd, sys_timerfd_create, sys_eventfd, sys_fallocate
|
|
/*315*/ .long sys_timerfd_settime, sys_timerfd_gettime, sys_signalfd4, sys_eventfd2, sys_epoll_create1
|
|
-/*320*/ .long sys_dup3, sys_pipe2, sys_inotify_init1
|
|
+/*320*/ .long sys_dup3, sys_pipe2, sys_inotify_init1, sys_pfm_create_context, sys_pfm_write_pmcs, sys_pfm_write_pmds
|
|
+/*325*/ .long sys_pfm_write_pmds, sys_pfm_read_pmds, sys_pfm_load_context, sys_pfm_start, sys_pfm_stop
|
|
+/*330*/ .long sys_pfm_restart, sys_pfm_create_evtsets, sys_pfm_getinfo_evtsets, sys_pfm_delete_evtsets, sys_pfm_unload_context
|
|
diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig
|
|
index 36b4b7a..5555d1e 100644
|
|
--- a/arch/sparc64/Kconfig
|
|
+++ b/arch/sparc64/Kconfig
|
|
@@ -401,6 +401,8 @@ source "drivers/sbus/char/Kconfig"
|
|
|
|
source "fs/Kconfig"
|
|
|
|
+source "arch/sparc64/perfmon/Kconfig"
|
|
+
|
|
source "arch/sparc64/Kconfig.debug"
|
|
|
|
source "security/Kconfig"
|
|
diff --git a/arch/sparc64/Makefile b/arch/sparc64/Makefile
|
|
index b785a39..646731c 100644
|
|
--- a/arch/sparc64/Makefile
|
|
+++ b/arch/sparc64/Makefile
|
|
@@ -32,6 +32,8 @@ core-y += arch/sparc64/math-emu/
|
|
libs-y += arch/sparc64/prom/ arch/sparc64/lib/
|
|
drivers-$(CONFIG_OPROFILE) += arch/sparc64/oprofile/
|
|
|
|
+core-$(CONFIG_PERFMON) += arch/sparc64/perfmon/
|
|
+
|
|
boot := arch/sparc64/boot
|
|
|
|
image tftpboot.img vmlinux.aout: vmlinux
|
|
diff --git a/arch/sparc64/kernel/cpu.c b/arch/sparc64/kernel/cpu.c
|
|
index 0097c08..f839f84 100644
|
|
--- a/arch/sparc64/kernel/cpu.c
|
|
+++ b/arch/sparc64/kernel/cpu.c
|
|
@@ -20,16 +20,17 @@
|
|
DEFINE_PER_CPU(cpuinfo_sparc, __cpu_data) = { 0 };
|
|
|
|
struct cpu_iu_info {
|
|
- short manuf;
|
|
- short impl;
|
|
- char* cpu_name; /* should be enough I hope... */
|
|
+ short manuf;
|
|
+ short impl;
|
|
+ char *cpu_name;
|
|
+ char *pmu_name;
|
|
};
|
|
|
|
struct cpu_fp_info {
|
|
- short manuf;
|
|
- short impl;
|
|
- char fpu_vers;
|
|
- char* fp_name;
|
|
+ short manuf;
|
|
+ short impl;
|
|
+ char fpu_vers;
|
|
+ char* fp_name;
|
|
};
|
|
|
|
static struct cpu_fp_info linux_sparc_fpu[] = {
|
|
@@ -49,23 +50,24 @@ static struct cpu_fp_info linux_sparc_fpu[] = {
|
|
#define NSPARCFPU ARRAY_SIZE(linux_sparc_fpu)
|
|
|
|
static struct cpu_iu_info linux_sparc_chips[] = {
|
|
- { 0x17, 0x10, "TI UltraSparc I (SpitFire)"},
|
|
- { 0x22, 0x10, "TI UltraSparc I (SpitFire)"},
|
|
- { 0x17, 0x11, "TI UltraSparc II (BlackBird)"},
|
|
- { 0x17, 0x12, "TI UltraSparc IIi (Sabre)"},
|
|
- { 0x17, 0x13, "TI UltraSparc IIe (Hummingbird)"},
|
|
- { 0x3e, 0x14, "TI UltraSparc III (Cheetah)"},
|
|
- { 0x3e, 0x15, "TI UltraSparc III+ (Cheetah+)"},
|
|
- { 0x3e, 0x16, "TI UltraSparc IIIi (Jalapeno)"},
|
|
- { 0x3e, 0x18, "TI UltraSparc IV (Jaguar)"},
|
|
- { 0x3e, 0x19, "TI UltraSparc IV+ (Panther)"},
|
|
- { 0x3e, 0x22, "TI UltraSparc IIIi+ (Serrano)"},
|
|
-};
|
|
+ { 0x17, 0x10, "TI UltraSparc I (SpitFire)", "ultra12"},
|
|
+ { 0x22, 0x10, "TI UltraSparc I (SpitFire)", "ultra12"},
|
|
+ { 0x17, 0x11, "TI UltraSparc II (BlackBird)", "ultra12"},
|
|
+ { 0x17, 0x12, "TI UltraSparc IIi (Sabre)", "ultra12"},
|
|
+ { 0x17, 0x13, "TI UltraSparc IIe (Hummingbird)", "ultra12"},
|
|
+ { 0x3e, 0x14, "TI UltraSparc III (Cheetah)", "ultra3"},
|
|
+ { 0x3e, 0x15, "TI UltraSparc III+ (Cheetah+)", "ultra3+"},
|
|
+ { 0x3e, 0x16, "TI UltraSparc IIIi (Jalapeno)", "ultra3i"},
|
|
+ { 0x3e, 0x18, "TI UltraSparc IV (Jaguar)", "ultra4"},
|
|
+ { 0x3e, 0x19, "TI UltraSparc IV+ (Panther)", "ultra4+"},
|
|
+ { 0x3e, 0x22, "TI UltraSparc IIIi+ (Serrano)", "ultra3+"},
|
|
+ };
|
|
|
|
#define NSPARCCHIPS ARRAY_SIZE(linux_sparc_chips)
|
|
|
|
char *sparc_cpu_type;
|
|
char *sparc_fpu_type;
|
|
+char *sparc_pmu_type;
|
|
|
|
static void __init sun4v_cpu_probe(void)
|
|
{
|
|
@@ -73,11 +75,13 @@ static void __init sun4v_cpu_probe(void)
|
|
case SUN4V_CHIP_NIAGARA1:
|
|
sparc_cpu_type = "UltraSparc T1 (Niagara)";
|
|
sparc_fpu_type = "UltraSparc T1 integrated FPU";
|
|
+ sparc_pmu_type = "niagara";
|
|
break;
|
|
|
|
case SUN4V_CHIP_NIAGARA2:
|
|
sparc_cpu_type = "UltraSparc T2 (Niagara2)";
|
|
sparc_fpu_type = "UltraSparc T2 integrated FPU";
|
|
+ sparc_pmu_type = "niagara2";
|
|
break;
|
|
|
|
default:
|
|
@@ -85,6 +89,7 @@ static void __init sun4v_cpu_probe(void)
|
|
prom_cpu_compatible);
|
|
sparc_cpu_type = "Unknown SUN4V CPU";
|
|
sparc_fpu_type = "Unknown SUN4V FPU";
|
|
+ sparc_pmu_type = "Unknown SUN4V PMU";
|
|
break;
|
|
}
|
|
}
|
|
@@ -117,6 +122,8 @@ retry:
|
|
if (linux_sparc_chips[i].impl == impl) {
|
|
sparc_cpu_type =
|
|
linux_sparc_chips[i].cpu_name;
|
|
+ sparc_pmu_type =
|
|
+ linux_sparc_chips[i].pmu_name;
|
|
break;
|
|
}
|
|
}
|
|
@@ -134,7 +141,7 @@ retry:
|
|
printk("DEBUG: manuf[%lx] impl[%lx]\n",
|
|
manuf, impl);
|
|
}
|
|
- sparc_cpu_type = "Unknown CPU";
|
|
+ sparc_pmu_type = "Unknown PMU";
|
|
}
|
|
|
|
for (i = 0; i < NSPARCFPU; i++) {
|
|
diff --git a/arch/sparc64/kernel/hvcalls.S b/arch/sparc64/kernel/hvcalls.S
|
|
index a2810f3..b9f508c 100644
|
|
--- a/arch/sparc64/kernel/hvcalls.S
|
|
+++ b/arch/sparc64/kernel/hvcalls.S
|
|
@@ -884,3 +884,44 @@ sun4v_mmu_demap_all:
|
|
retl
|
|
nop
|
|
.size sun4v_mmu_demap_all, .-sun4v_mmu_demap_all
|
|
+
|
|
+ .globl sun4v_niagara_getperf
|
|
+ .type sun4v_niagara_getperf,#function
|
|
+sun4v_niagara_getperf:
|
|
+ mov %o0, %o4
|
|
+ mov HV_FAST_GET_PERFREG, %o5
|
|
+ ta HV_FAST_TRAP
|
|
+ stx %o1, [%o4]
|
|
+ retl
|
|
+ nop
|
|
+ .size sun4v_niagara_getperf, .-sun4v_niagara_getperf
|
|
+
|
|
+ .globl sun4v_niagara_setperf
|
|
+ .type sun4v_niagara_setperf,#function
|
|
+sun4v_niagara_setperf:
|
|
+ mov HV_FAST_SET_PERFREG, %o5
|
|
+ ta HV_FAST_TRAP
|
|
+ retl
|
|
+ nop
|
|
+ .size sun4v_niagara_setperf, .-sun4v_niagara_setperf
|
|
+
|
|
+ .globl sun4v_niagara2_getperf
|
|
+ .type sun4v_niagara2_getperf,#function
|
|
+sun4v_niagara2_getperf:
|
|
+ mov %o0, %o4
|
|
+ mov HV_FAST_N2_GET_PERFREG, %o5
|
|
+ ta HV_FAST_TRAP
|
|
+ stx %o1, [%o4]
|
|
+ retl
|
|
+ nop
|
|
+ .size sun4v_niagara2_getperf, .-sun4v_niagara2_getperf
|
|
+
|
|
+ .globl sun4v_niagara2_setperf
|
|
+ .type sun4v_niagara2_setperf,#function
|
|
+sun4v_niagara2_setperf:
|
|
+ mov HV_FAST_N2_SET_PERFREG, %o5
|
|
+ ta HV_FAST_TRAP
|
|
+ retl
|
|
+ nop
|
|
+ .size sun4v_niagara2_setperf, .-sun4v_niagara2_setperf
|
|
+
|
|
diff --git a/arch/sparc64/kernel/irq.c b/arch/sparc64/kernel/irq.c
|
|
index 7495bc7..e2bcca5 100644
|
|
--- a/arch/sparc64/kernel/irq.c
|
|
+++ b/arch/sparc64/kernel/irq.c
|
|
@@ -749,6 +749,20 @@ void handler_irq(int irq, struct pt_regs *regs)
|
|
irq_exit();
|
|
set_irq_regs(old_regs);
|
|
}
|
|
+static void unhandled_perf_irq(struct pt_regs *regs)
|
|
+{
|
|
+ unsigned long pcr, pic;
|
|
+
|
|
+ read_pcr(pcr);
|
|
+ read_pic(pic);
|
|
+
|
|
+ write_pcr(0);
|
|
+
|
|
+ printk(KERN_EMERG "CPU %d: Got unexpected perf counter IRQ.\n",
|
|
+ smp_processor_id());
|
|
+ printk(KERN_EMERG "CPU %d: PCR[%016lx] PIC[%016lx]\n",
|
|
+ smp_processor_id(), pcr, pic);
|
|
+}
|
|
|
|
void do_softirq(void)
|
|
{
|
|
@@ -776,6 +790,55 @@ void do_softirq(void)
|
|
local_irq_restore(flags);
|
|
}
|
|
|
|
+/* Almost a direct copy of the powerpc PMC code. */
|
|
+static DEFINE_SPINLOCK(perf_irq_lock);
|
|
+static void *perf_irq_owner_caller; /* mostly for debugging */
|
|
+static void (*perf_irq)(struct pt_regs *regs) = unhandled_perf_irq;
|
|
+
|
|
+/* Invoked from level 15 PIL handler in trap table. */
|
|
+void perfctr_irq(int irq, struct pt_regs *regs)
|
|
+{
|
|
+ clear_softint(1 << irq);
|
|
+ perf_irq(regs);
|
|
+}
|
|
+
|
|
+int register_perfctr_intr(void (*handler)(struct pt_regs *))
|
|
+{
|
|
+ int ret;
|
|
+
|
|
+ if (!handler)
|
|
+ return -EINVAL;
|
|
+
|
|
+ spin_lock(&perf_irq_lock);
|
|
+ if (perf_irq != unhandled_perf_irq) {
|
|
+ printk(KERN_WARNING "register_perfctr_intr: "
|
|
+ "perf IRQ busy (reserved by caller %p)\n",
|
|
+ perf_irq_owner_caller);
|
|
+ ret = -EBUSY;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ perf_irq_owner_caller = __builtin_return_address(0);
|
|
+ perf_irq = handler;
|
|
+
|
|
+ ret = 0;
|
|
+out:
|
|
+ spin_unlock(&perf_irq_lock);
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(register_perfctr_intr);
|
|
+
|
|
+void release_perfctr_intr(void (*handler)(struct pt_regs *))
|
|
+{
|
|
+ spin_lock(&perf_irq_lock);
|
|
+ perf_irq_owner_caller = NULL;
|
|
+ perf_irq = unhandled_perf_irq;
|
|
+ spin_unlock(&perf_irq_lock);
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(release_perfctr_intr);
|
|
+
|
|
+
|
|
#ifdef CONFIG_HOTPLUG_CPU
|
|
void fixup_irqs(void)
|
|
{
|
|
diff --git a/arch/sparc64/kernel/process.c b/arch/sparc64/kernel/process.c
|
|
index 15f4178..7282d21 100644
|
|
--- a/arch/sparc64/kernel/process.c
|
|
+++ b/arch/sparc64/kernel/process.c
|
|
@@ -30,6 +30,7 @@
|
|
#include <linux/cpu.h>
|
|
#include <linux/elfcore.h>
|
|
#include <linux/sysrq.h>
|
|
+#include <linux/perfmon_kern.h>
|
|
|
|
#include <asm/oplib.h>
|
|
#include <asm/uaccess.h>
|
|
@@ -385,11 +386,7 @@ void exit_thread(void)
|
|
t->utraps[0]--;
|
|
}
|
|
|
|
- if (test_and_clear_thread_flag(TIF_PERFCTR)) {
|
|
- t->user_cntd0 = t->user_cntd1 = NULL;
|
|
- t->pcr_reg = 0;
|
|
- write_pcr(0);
|
|
- }
|
|
+ pfm_exit_thread();
|
|
}
|
|
|
|
void flush_thread(void)
|
|
@@ -411,13 +408,6 @@ void flush_thread(void)
|
|
|
|
set_thread_wsaved(0);
|
|
|
|
- /* Turn off performance counters if on. */
|
|
- if (test_and_clear_thread_flag(TIF_PERFCTR)) {
|
|
- t->user_cntd0 = t->user_cntd1 = NULL;
|
|
- t->pcr_reg = 0;
|
|
- write_pcr(0);
|
|
- }
|
|
-
|
|
/* Clear FPU register state. */
|
|
t->fpsaved[0] = 0;
|
|
|
|
@@ -631,16 +621,6 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
|
|
t->kregs->u_regs[UREG_FP] =
|
|
((unsigned long) child_sf) - STACK_BIAS;
|
|
|
|
- /* Special case, if we are spawning a kernel thread from
|
|
- * a userspace task (usermode helper, NFS or similar), we
|
|
- * must disable performance counters in the child because
|
|
- * the address space and protection realm are changing.
|
|
- */
|
|
- if (t->flags & _TIF_PERFCTR) {
|
|
- t->user_cntd0 = t->user_cntd1 = NULL;
|
|
- t->pcr_reg = 0;
|
|
- t->flags &= ~_TIF_PERFCTR;
|
|
- }
|
|
t->flags |= ((long)ASI_P << TI_FLAG_CURRENT_DS_SHIFT);
|
|
t->kregs->u_regs[UREG_G6] = (unsigned long) t;
|
|
t->kregs->u_regs[UREG_G4] = (unsigned long) t->task;
|
|
@@ -673,6 +653,8 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
|
|
if (clone_flags & CLONE_SETTLS)
|
|
t->kregs->u_regs[UREG_G7] = regs->u_regs[UREG_I3];
|
|
|
|
+ pfm_copy_thread(p);
|
|
+
|
|
return 0;
|
|
}
|
|
|
|
diff --git a/arch/sparc64/kernel/rtrap.S b/arch/sparc64/kernel/rtrap.S
|
|
index 97a993c..c2af29d 100644
|
|
--- a/arch/sparc64/kernel/rtrap.S
|
|
+++ b/arch/sparc64/kernel/rtrap.S
|
|
@@ -65,55 +65,14 @@ __handle_user_windows:
|
|
ba,pt %xcc, __handle_user_windows_continue
|
|
|
|
andn %l1, %l4, %l1
|
|
-__handle_perfctrs:
|
|
- call update_perfctrs
|
|
- wrpr %g0, RTRAP_PSTATE, %pstate
|
|
- wrpr %g0, RTRAP_PSTATE_IRQOFF, %pstate
|
|
- ldub [%g6 + TI_WSAVED], %o2
|
|
- brz,pt %o2, 1f
|
|
- nop
|
|
- /* Redo userwin+sched+sig checks */
|
|
- call fault_in_user_windows
|
|
-
|
|
- wrpr %g0, RTRAP_PSTATE, %pstate
|
|
- wrpr %g0, RTRAP_PSTATE_IRQOFF, %pstate
|
|
- ldx [%g6 + TI_FLAGS], %l0
|
|
- andcc %l0, _TIF_NEED_RESCHED, %g0
|
|
- be,pt %xcc, 1f
|
|
-
|
|
- nop
|
|
- call schedule
|
|
- wrpr %g0, RTRAP_PSTATE, %pstate
|
|
- wrpr %g0, RTRAP_PSTATE_IRQOFF, %pstate
|
|
- ldx [%g6 + TI_FLAGS], %l0
|
|
-1: andcc %l0, _TIF_DO_NOTIFY_RESUME_MASK, %g0
|
|
-
|
|
- be,pt %xcc, __handle_perfctrs_continue
|
|
- sethi %hi(TSTATE_PEF), %o0
|
|
- mov %l5, %o1
|
|
- add %sp, PTREGS_OFF, %o0
|
|
- mov %l0, %o2
|
|
- call do_notify_resume
|
|
-
|
|
- wrpr %g0, RTRAP_PSTATE, %pstate
|
|
- wrpr %g0, RTRAP_PSTATE_IRQOFF, %pstate
|
|
- /* Signal delivery can modify pt_regs tstate, so we must
|
|
- * reload it.
|
|
- */
|
|
- ldx [%sp + PTREGS_OFF + PT_V9_TSTATE], %l1
|
|
- sethi %hi(0xf << 20), %l4
|
|
- and %l1, %l4, %l4
|
|
- andn %l1, %l4, %l1
|
|
- ba,pt %xcc, __handle_perfctrs_continue
|
|
-
|
|
- sethi %hi(TSTATE_PEF), %o0
|
|
__handle_userfpu:
|
|
rd %fprs, %l5
|
|
andcc %l5, FPRS_FEF, %g0
|
|
sethi %hi(TSTATE_PEF), %o0
|
|
be,a,pn %icc, __handle_userfpu_continue
|
|
andn %l1, %o0, %l1
|
|
- ba,a,pt %xcc, __handle_userfpu_continue
|
|
+ ba,pt %xcc, __handle_userfpu_continue
|
|
+ nop
|
|
|
|
__handle_signal:
|
|
mov %l5, %o1
|
|
@@ -202,12 +161,8 @@ __handle_signal_continue:
|
|
brnz,pn %o2, __handle_user_windows
|
|
nop
|
|
__handle_user_windows_continue:
|
|
- ldx [%g6 + TI_FLAGS], %l5
|
|
- andcc %l5, _TIF_PERFCTR, %g0
|
|
sethi %hi(TSTATE_PEF), %o0
|
|
- bne,pn %xcc, __handle_perfctrs
|
|
-__handle_perfctrs_continue:
|
|
- andcc %l1, %o0, %g0
|
|
+ andcc %l1, %o0, %g0
|
|
|
|
/* This fpdepth clear is necessary for non-syscall rtraps only */
|
|
user_nowork:
|
|
diff --git a/arch/sparc64/kernel/setup.c b/arch/sparc64/kernel/setup.c
|
|
index c8b03a4..248aa1f 100644
|
|
--- a/arch/sparc64/kernel/setup.c
|
|
+++ b/arch/sparc64/kernel/setup.c
|
|
@@ -352,6 +352,7 @@ static int show_cpuinfo(struct seq_file *m, void *__unused)
|
|
seq_printf(m,
|
|
"cpu\t\t: %s\n"
|
|
"fpu\t\t: %s\n"
|
|
+ "pmu\t\t: %s\n"
|
|
"prom\t\t: %s\n"
|
|
"type\t\t: %s\n"
|
|
"ncpus probed\t: %d\n"
|
|
@@ -364,6 +365,7 @@ static int show_cpuinfo(struct seq_file *m, void *__unused)
|
|
,
|
|
sparc_cpu_type,
|
|
sparc_fpu_type,
|
|
+ sparc_pmu_type,
|
|
prom_version,
|
|
((tlb_type == hypervisor) ?
|
|
"sun4v" :
|
|
diff --git a/arch/sparc64/kernel/signal.c b/arch/sparc64/kernel/signal.c
|
|
index ec82d76..cea1082 100644
|
|
--- a/arch/sparc64/kernel/signal.c
|
|
+++ b/arch/sparc64/kernel/signal.c
|
|
@@ -23,6 +23,7 @@
|
|
#include <linux/tty.h>
|
|
#include <linux/binfmts.h>
|
|
#include <linux/bitops.h>
|
|
+#include <linux/perfmon_kern.h>
|
|
|
|
#include <asm/uaccess.h>
|
|
#include <asm/ptrace.h>
|
|
@@ -608,6 +609,9 @@ static void do_signal(struct pt_regs *regs, unsigned long orig_i0)
|
|
|
|
void do_notify_resume(struct pt_regs *regs, unsigned long orig_i0, unsigned long thread_info_flags)
|
|
{
|
|
+ if (thread_info_flags & _TIF_PERFMON_WORK)
|
|
+ pfm_handle_work(regs);
|
|
+
|
|
if (thread_info_flags & _TIF_SIGPENDING)
|
|
do_signal(regs, orig_i0);
|
|
if (thread_info_flags & _TIF_NOTIFY_RESUME) {
|
|
diff --git a/arch/sparc64/kernel/sys_sparc.c b/arch/sparc64/kernel/sys_sparc.c
|
|
index 39749e3..384004b 100644
|
|
--- a/arch/sparc64/kernel/sys_sparc.c
|
|
+++ b/arch/sparc64/kernel/sys_sparc.c
|
|
@@ -26,7 +26,6 @@
|
|
|
|
#include <asm/uaccess.h>
|
|
#include <asm/utrap.h>
|
|
-#include <asm/perfctr.h>
|
|
#include <asm/unistd.h>
|
|
|
|
#include "entry.h"
|
|
@@ -791,106 +790,10 @@ asmlinkage long sys_rt_sigaction(int sig,
|
|
return ret;
|
|
}
|
|
|
|
-/* Invoked by rtrap code to update performance counters in
|
|
- * user space.
|
|
- */
|
|
-asmlinkage void update_perfctrs(void)
|
|
-{
|
|
- unsigned long pic, tmp;
|
|
-
|
|
- read_pic(pic);
|
|
- tmp = (current_thread_info()->kernel_cntd0 += (unsigned int)pic);
|
|
- __put_user(tmp, current_thread_info()->user_cntd0);
|
|
- tmp = (current_thread_info()->kernel_cntd1 += (pic >> 32));
|
|
- __put_user(tmp, current_thread_info()->user_cntd1);
|
|
- reset_pic();
|
|
-}
|
|
-
|
|
asmlinkage long sys_perfctr(int opcode, unsigned long arg0, unsigned long arg1, unsigned long arg2)
|
|
{
|
|
- int err = 0;
|
|
-
|
|
- switch(opcode) {
|
|
- case PERFCTR_ON:
|
|
- current_thread_info()->pcr_reg = arg2;
|
|
- current_thread_info()->user_cntd0 = (u64 __user *) arg0;
|
|
- current_thread_info()->user_cntd1 = (u64 __user *) arg1;
|
|
- current_thread_info()->kernel_cntd0 =
|
|
- current_thread_info()->kernel_cntd1 = 0;
|
|
- write_pcr(arg2);
|
|
- reset_pic();
|
|
- set_thread_flag(TIF_PERFCTR);
|
|
- break;
|
|
-
|
|
- case PERFCTR_OFF:
|
|
- err = -EINVAL;
|
|
- if (test_thread_flag(TIF_PERFCTR)) {
|
|
- current_thread_info()->user_cntd0 =
|
|
- current_thread_info()->user_cntd1 = NULL;
|
|
- current_thread_info()->pcr_reg = 0;
|
|
- write_pcr(0);
|
|
- clear_thread_flag(TIF_PERFCTR);
|
|
- err = 0;
|
|
- }
|
|
- break;
|
|
-
|
|
- case PERFCTR_READ: {
|
|
- unsigned long pic, tmp;
|
|
-
|
|
- if (!test_thread_flag(TIF_PERFCTR)) {
|
|
- err = -EINVAL;
|
|
- break;
|
|
- }
|
|
- read_pic(pic);
|
|
- tmp = (current_thread_info()->kernel_cntd0 += (unsigned int)pic);
|
|
- err |= __put_user(tmp, current_thread_info()->user_cntd0);
|
|
- tmp = (current_thread_info()->kernel_cntd1 += (pic >> 32));
|
|
- err |= __put_user(tmp, current_thread_info()->user_cntd1);
|
|
- reset_pic();
|
|
- break;
|
|
- }
|
|
-
|
|
- case PERFCTR_CLRPIC:
|
|
- if (!test_thread_flag(TIF_PERFCTR)) {
|
|
- err = -EINVAL;
|
|
- break;
|
|
- }
|
|
- current_thread_info()->kernel_cntd0 =
|
|
- current_thread_info()->kernel_cntd1 = 0;
|
|
- reset_pic();
|
|
- break;
|
|
-
|
|
- case PERFCTR_SETPCR: {
|
|
- u64 __user *user_pcr = (u64 __user *)arg0;
|
|
-
|
|
- if (!test_thread_flag(TIF_PERFCTR)) {
|
|
- err = -EINVAL;
|
|
- break;
|
|
- }
|
|
- err |= __get_user(current_thread_info()->pcr_reg, user_pcr);
|
|
- write_pcr(current_thread_info()->pcr_reg);
|
|
- current_thread_info()->kernel_cntd0 =
|
|
- current_thread_info()->kernel_cntd1 = 0;
|
|
- reset_pic();
|
|
- break;
|
|
- }
|
|
-
|
|
- case PERFCTR_GETPCR: {
|
|
- u64 __user *user_pcr = (u64 __user *)arg0;
|
|
-
|
|
- if (!test_thread_flag(TIF_PERFCTR)) {
|
|
- err = -EINVAL;
|
|
- break;
|
|
- }
|
|
- err |= __put_user(current_thread_info()->pcr_reg, user_pcr);
|
|
- break;
|
|
- }
|
|
-
|
|
- default:
|
|
- err = -EINVAL;
|
|
- break;
|
|
- };
|
|
- return err;
|
|
+ /* Superceded by perfmon2 */
|
|
+ return -ENOSYS;
|
|
}
|
|
|
|
/*
|
|
diff --git a/arch/sparc64/kernel/syscalls.S b/arch/sparc64/kernel/syscalls.S
|
|
index a2f2427..b20bf1e 100644
|
|
--- a/arch/sparc64/kernel/syscalls.S
|
|
+++ b/arch/sparc64/kernel/syscalls.S
|
|
@@ -117,26 +117,9 @@ ret_from_syscall:
|
|
stb %g0, [%g6 + TI_NEW_CHILD]
|
|
ldx [%g6 + TI_FLAGS], %l0
|
|
call schedule_tail
|
|
- mov %g7, %o0
|
|
- andcc %l0, _TIF_PERFCTR, %g0
|
|
- be,pt %icc, 1f
|
|
- nop
|
|
- ldx [%g6 + TI_PCR], %o7
|
|
- wr %g0, %o7, %pcr
|
|
-
|
|
- /* Blackbird errata workaround. See commentary in
|
|
- * smp.c:smp_percpu_timer_interrupt() for more
|
|
- * information.
|
|
- */
|
|
- ba,pt %xcc, 99f
|
|
- nop
|
|
-
|
|
- .align 64
|
|
-99: wr %g0, %g0, %pic
|
|
- rd %pic, %g0
|
|
-
|
|
-1: ba,pt %xcc, ret_sys_call
|
|
- ldx [%sp + PTREGS_OFF + PT_V9_I0], %o0
|
|
+ mov %g7, %o0
|
|
+ ba,pt %xcc, ret_sys_call
|
|
+ ldx [%sp + PTREGS_OFF + PT_V9_I0], %o0
|
|
|
|
.globl sparc_exit
|
|
.type sparc_exit,#function
|
|
diff --git a/arch/sparc64/kernel/systbls.S b/arch/sparc64/kernel/systbls.S
|
|
index 0fdbf3b..1a1a296 100644
|
|
--- a/arch/sparc64/kernel/systbls.S
|
|
+++ b/arch/sparc64/kernel/systbls.S
|
|
@@ -82,7 +82,9 @@ sys_call_table32:
|
|
.word compat_sys_set_mempolicy, compat_sys_kexec_load, compat_sys_move_pages, sys_getcpu, compat_sys_epoll_pwait
|
|
/*310*/ .word compat_sys_utimensat, compat_sys_signalfd, sys_timerfd_create, sys_eventfd, compat_sys_fallocate
|
|
.word compat_sys_timerfd_settime, compat_sys_timerfd_gettime, compat_sys_signalfd4, sys_eventfd2, sys_epoll_create1
|
|
-/*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1
|
|
+/*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_pfm_create_context, sys_pfm_write_pmcs
|
|
+ .word sys_pfm_write_pmds, sys_pfm_read_pmds, sys_pfm_load_context, sys_pfm_start, sys_pfm_stop
|
|
+/*330*/ .word sys_pfm_restart, sys_pfm_create_evtsets, sys_pfm_getinfo_evtsets, sys_pfm_delete_evtsets, sys_pfm_unload_context
|
|
|
|
#endif /* CONFIG_COMPAT */
|
|
|
|
@@ -156,4 +158,6 @@ sys_call_table:
|
|
.word sys_set_mempolicy, sys_kexec_load, sys_move_pages, sys_getcpu, sys_epoll_pwait
|
|
/*310*/ .word sys_utimensat, sys_signalfd, sys_timerfd_create, sys_eventfd, sys_fallocate
|
|
.word sys_timerfd_settime, sys_timerfd_gettime, sys_signalfd4, sys_eventfd2, sys_epoll_create1
|
|
-/*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1
|
|
+/*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_pfm_create_context, sys_pfm_write_pmcs
|
|
+ .word sys_pfm_write_pmds, sys_pfm_read_pmds, sys_pfm_load_context, sys_pfm_start, sys_pfm_stop
|
|
+/*330*/ .word sys_pfm_restart, sys_pfm_create_evtsets, sys_pfm_getinfo_evtsets, sys_pfm_delete_evtsets, sys_pfm_unload_context
|
|
diff --git a/arch/sparc64/kernel/traps.c b/arch/sparc64/kernel/traps.c
|
|
index c824df1..be45d09 100644
|
|
--- a/arch/sparc64/kernel/traps.c
|
|
+++ b/arch/sparc64/kernel/traps.c
|
|
@@ -2470,86 +2470,90 @@ extern void tsb_config_offsets_are_bolixed_dave(void);
|
|
/* Only invoked on boot processor. */
|
|
void __init trap_init(void)
|
|
{
|
|
- /* Compile time sanity check. */
|
|
- if (TI_TASK != offsetof(struct thread_info, task) ||
|
|
- TI_FLAGS != offsetof(struct thread_info, flags) ||
|
|
- TI_CPU != offsetof(struct thread_info, cpu) ||
|
|
- TI_FPSAVED != offsetof(struct thread_info, fpsaved) ||
|
|
- TI_KSP != offsetof(struct thread_info, ksp) ||
|
|
- TI_FAULT_ADDR != offsetof(struct thread_info, fault_address) ||
|
|
- TI_KREGS != offsetof(struct thread_info, kregs) ||
|
|
- TI_UTRAPS != offsetof(struct thread_info, utraps) ||
|
|
- TI_EXEC_DOMAIN != offsetof(struct thread_info, exec_domain) ||
|
|
- TI_REG_WINDOW != offsetof(struct thread_info, reg_window) ||
|
|
- TI_RWIN_SPTRS != offsetof(struct thread_info, rwbuf_stkptrs) ||
|
|
- TI_GSR != offsetof(struct thread_info, gsr) ||
|
|
- TI_XFSR != offsetof(struct thread_info, xfsr) ||
|
|
- TI_USER_CNTD0 != offsetof(struct thread_info, user_cntd0) ||
|
|
- TI_USER_CNTD1 != offsetof(struct thread_info, user_cntd1) ||
|
|
- TI_KERN_CNTD0 != offsetof(struct thread_info, kernel_cntd0) ||
|
|
- TI_KERN_CNTD1 != offsetof(struct thread_info, kernel_cntd1) ||
|
|
- TI_PCR != offsetof(struct thread_info, pcr_reg) ||
|
|
- TI_PRE_COUNT != offsetof(struct thread_info, preempt_count) ||
|
|
- TI_NEW_CHILD != offsetof(struct thread_info, new_child) ||
|
|
- TI_SYS_NOERROR != offsetof(struct thread_info, syscall_noerror) ||
|
|
- TI_RESTART_BLOCK != offsetof(struct thread_info, restart_block) ||
|
|
- TI_KUNA_REGS != offsetof(struct thread_info, kern_una_regs) ||
|
|
- TI_KUNA_INSN != offsetof(struct thread_info, kern_una_insn) ||
|
|
- TI_FPREGS != offsetof(struct thread_info, fpregs) ||
|
|
- (TI_FPREGS & (64 - 1)))
|
|
- thread_info_offsets_are_bolixed_dave();
|
|
-
|
|
- if (TRAP_PER_CPU_THREAD != offsetof(struct trap_per_cpu, thread) ||
|
|
- (TRAP_PER_CPU_PGD_PADDR !=
|
|
- offsetof(struct trap_per_cpu, pgd_paddr)) ||
|
|
- (TRAP_PER_CPU_CPU_MONDO_PA !=
|
|
- offsetof(struct trap_per_cpu, cpu_mondo_pa)) ||
|
|
- (TRAP_PER_CPU_DEV_MONDO_PA !=
|
|
- offsetof(struct trap_per_cpu, dev_mondo_pa)) ||
|
|
- (TRAP_PER_CPU_RESUM_MONDO_PA !=
|
|
- offsetof(struct trap_per_cpu, resum_mondo_pa)) ||
|
|
- (TRAP_PER_CPU_RESUM_KBUF_PA !=
|
|
- offsetof(struct trap_per_cpu, resum_kernel_buf_pa)) ||
|
|
- (TRAP_PER_CPU_NONRESUM_MONDO_PA !=
|
|
- offsetof(struct trap_per_cpu, nonresum_mondo_pa)) ||
|
|
- (TRAP_PER_CPU_NONRESUM_KBUF_PA !=
|
|
- offsetof(struct trap_per_cpu, nonresum_kernel_buf_pa)) ||
|
|
- (TRAP_PER_CPU_FAULT_INFO !=
|
|
- offsetof(struct trap_per_cpu, fault_info)) ||
|
|
- (TRAP_PER_CPU_CPU_MONDO_BLOCK_PA !=
|
|
- offsetof(struct trap_per_cpu, cpu_mondo_block_pa)) ||
|
|
- (TRAP_PER_CPU_CPU_LIST_PA !=
|
|
- offsetof(struct trap_per_cpu, cpu_list_pa)) ||
|
|
- (TRAP_PER_CPU_TSB_HUGE !=
|
|
- offsetof(struct trap_per_cpu, tsb_huge)) ||
|
|
- (TRAP_PER_CPU_TSB_HUGE_TEMP !=
|
|
- offsetof(struct trap_per_cpu, tsb_huge_temp)) ||
|
|
- (TRAP_PER_CPU_IRQ_WORKLIST_PA !=
|
|
- offsetof(struct trap_per_cpu, irq_worklist_pa)) ||
|
|
- (TRAP_PER_CPU_CPU_MONDO_QMASK !=
|
|
- offsetof(struct trap_per_cpu, cpu_mondo_qmask)) ||
|
|
- (TRAP_PER_CPU_DEV_MONDO_QMASK !=
|
|
- offsetof(struct trap_per_cpu, dev_mondo_qmask)) ||
|
|
- (TRAP_PER_CPU_RESUM_QMASK !=
|
|
- offsetof(struct trap_per_cpu, resum_qmask)) ||
|
|
- (TRAP_PER_CPU_NONRESUM_QMASK !=
|
|
- offsetof(struct trap_per_cpu, nonresum_qmask)))
|
|
- trap_per_cpu_offsets_are_bolixed_dave();
|
|
-
|
|
- if ((TSB_CONFIG_TSB !=
|
|
- offsetof(struct tsb_config, tsb)) ||
|
|
- (TSB_CONFIG_RSS_LIMIT !=
|
|
- offsetof(struct tsb_config, tsb_rss_limit)) ||
|
|
- (TSB_CONFIG_NENTRIES !=
|
|
- offsetof(struct tsb_config, tsb_nentries)) ||
|
|
- (TSB_CONFIG_REG_VAL !=
|
|
- offsetof(struct tsb_config, tsb_reg_val)) ||
|
|
- (TSB_CONFIG_MAP_VADDR !=
|
|
- offsetof(struct tsb_config, tsb_map_vaddr)) ||
|
|
- (TSB_CONFIG_MAP_PTE !=
|
|
- offsetof(struct tsb_config, tsb_map_pte)))
|
|
- tsb_config_offsets_are_bolixed_dave();
|
|
-
|
|
+ BUILD_BUG_ON(TI_TASK != offsetof(struct thread_info, task));
|
|
+ BUILD_BUG_ON(TI_FLAGS != offsetof(struct thread_info, flags));
|
|
+ BUILD_BUG_ON(TI_CPU != offsetof(struct thread_info, cpu));
|
|
+ BUILD_BUG_ON(TI_FPSAVED != offsetof(struct thread_info, fpsaved));
|
|
+ BUILD_BUG_ON(TI_KSP != offsetof(struct thread_info, ksp));
|
|
+ BUILD_BUG_ON(TI_FAULT_ADDR !=
|
|
+ offsetof(struct thread_info, fault_address));
|
|
+ BUILD_BUG_ON(TI_KREGS != offsetof(struct thread_info, kregs));
|
|
+ BUILD_BUG_ON(TI_UTRAPS != offsetof(struct thread_info, utraps));
|
|
+ BUILD_BUG_ON(TI_EXEC_DOMAIN !=
|
|
+ offsetof(struct thread_info, exec_domain));
|
|
+ BUILD_BUG_ON(TI_REG_WINDOW !=
|
|
+ offsetof(struct thread_info, reg_window));
|
|
+ BUILD_BUG_ON(TI_RWIN_SPTRS !=
|
|
+ offsetof(struct thread_info, rwbuf_stkptrs));
|
|
+ BUILD_BUG_ON(TI_GSR != offsetof(struct thread_info, gsr));
|
|
+ BUILD_BUG_ON(TI_XFSR != offsetof(struct thread_info, xfsr));
|
|
+ BUILD_BUG_ON(TI_PRE_COUNT !=
|
|
+ offsetof(struct thread_info, preempt_count));
|
|
+ BUILD_BUG_ON(TI_NEW_CHILD !=
|
|
+ offsetof(struct thread_info, new_child));
|
|
+ BUILD_BUG_ON(TI_SYS_NOERROR !=
|
|
+ offsetof(struct thread_info, syscall_noerror));
|
|
+ BUILD_BUG_ON(TI_RESTART_BLOCK !=
|
|
+ offsetof(struct thread_info, restart_block));
|
|
+ BUILD_BUG_ON(TI_KUNA_REGS !=
|
|
+ offsetof(struct thread_info, kern_una_regs));
|
|
+ BUILD_BUG_ON(TI_KUNA_INSN !=
|
|
+ offsetof(struct thread_info, kern_una_insn));
|
|
+ BUILD_BUG_ON(TI_FPREGS != offsetof(struct thread_info, fpregs));
|
|
+ BUILD_BUG_ON((TI_FPREGS & (64 - 1)));
|
|
+
|
|
+ BUILD_BUG_ON(TRAP_PER_CPU_THREAD !=
|
|
+ offsetof(struct trap_per_cpu, thread));
|
|
+ BUILD_BUG_ON(TRAP_PER_CPU_PGD_PADDR !=
|
|
+ offsetof(struct trap_per_cpu, pgd_paddr));
|
|
+ BUILD_BUG_ON(TRAP_PER_CPU_CPU_MONDO_PA !=
|
|
+ offsetof(struct trap_per_cpu, cpu_mondo_pa));
|
|
+ BUILD_BUG_ON(TRAP_PER_CPU_DEV_MONDO_PA !=
|
|
+ offsetof(struct trap_per_cpu, dev_mondo_pa));
|
|
+ BUILD_BUG_ON(TRAP_PER_CPU_RESUM_MONDO_PA !=
|
|
+ offsetof(struct trap_per_cpu, resum_mondo_pa));
|
|
+ BUILD_BUG_ON(TRAP_PER_CPU_RESUM_KBUF_PA !=
|
|
+ offsetof(struct trap_per_cpu, resum_kernel_buf_pa));
|
|
+ BUILD_BUG_ON(TRAP_PER_CPU_NONRESUM_MONDO_PA !=
|
|
+ offsetof(struct trap_per_cpu, nonresum_mondo_pa));
|
|
+ BUILD_BUG_ON(TRAP_PER_CPU_NONRESUM_KBUF_PA !=
|
|
+ offsetof(struct trap_per_cpu, nonresum_kernel_buf_pa));
|
|
+ BUILD_BUG_ON(TRAP_PER_CPU_FAULT_INFO !=
|
|
+ offsetof(struct trap_per_cpu, fault_info));
|
|
+ BUILD_BUG_ON(TRAP_PER_CPU_CPU_MONDO_BLOCK_PA !=
|
|
+ offsetof(struct trap_per_cpu, cpu_mondo_block_pa));
|
|
+ BUILD_BUG_ON(TRAP_PER_CPU_CPU_LIST_PA !=
|
|
+ offsetof(struct trap_per_cpu, cpu_list_pa));
|
|
+ BUILD_BUG_ON(TRAP_PER_CPU_TSB_HUGE !=
|
|
+ offsetof(struct trap_per_cpu, tsb_huge));
|
|
+ BUILD_BUG_ON(TRAP_PER_CPU_TSB_HUGE_TEMP !=
|
|
+ offsetof(struct trap_per_cpu, tsb_huge_temp));
|
|
+#if 0
|
|
+ BUILD_BUG_ON(TRAP_PER_CPU_IRQ_WORKLIST !=
|
|
+ offsetof(struct trap_per_cpu, irq_worklist));
|
|
+#endif
|
|
+ BUILD_BUG_ON(TRAP_PER_CPU_CPU_MONDO_QMASK !=
|
|
+ offsetof(struct trap_per_cpu, cpu_mondo_qmask));
|
|
+ BUILD_BUG_ON(TRAP_PER_CPU_DEV_MONDO_QMASK !=
|
|
+ offsetof(struct trap_per_cpu, dev_mondo_qmask));
|
|
+ BUILD_BUG_ON(TRAP_PER_CPU_RESUM_QMASK !=
|
|
+ offsetof(struct trap_per_cpu, resum_qmask));
|
|
+ BUILD_BUG_ON(TRAP_PER_CPU_NONRESUM_QMASK !=
|
|
+ offsetof(struct trap_per_cpu, nonresum_qmask));
|
|
+
|
|
+ BUILD_BUG_ON(TSB_CONFIG_TSB !=
|
|
+ offsetof(struct tsb_config, tsb));
|
|
+ BUILD_BUG_ON(TSB_CONFIG_RSS_LIMIT !=
|
|
+ offsetof(struct tsb_config, tsb_rss_limit));
|
|
+ BUILD_BUG_ON(TSB_CONFIG_NENTRIES !=
|
|
+ offsetof(struct tsb_config, tsb_nentries));
|
|
+ BUILD_BUG_ON(TSB_CONFIG_REG_VAL !=
|
|
+ offsetof(struct tsb_config, tsb_reg_val));
|
|
+ BUILD_BUG_ON(TSB_CONFIG_MAP_VADDR !=
|
|
+ offsetof(struct tsb_config, tsb_map_vaddr));
|
|
+ BUILD_BUG_ON(TSB_CONFIG_MAP_PTE !=
|
|
+ offsetof(struct tsb_config, tsb_map_pte));
|
|
+
|
|
/* Attach to the address space of init_task. On SMP we
|
|
* do this in smp.c:smp_callin for other cpus.
|
|
*/
|
|
diff --git a/arch/sparc64/kernel/ttable.S b/arch/sparc64/kernel/ttable.S
|
|
index 1ade3d6..2a31ffa 100644
|
|
--- a/arch/sparc64/kernel/ttable.S
|
|
+++ b/arch/sparc64/kernel/ttable.S
|
|
@@ -66,7 +66,7 @@ tl0_irq6: BTRAP(0x46)
|
|
tl0_irq7: BTRAP(0x47) BTRAP(0x48) BTRAP(0x49)
|
|
tl0_irq10: BTRAP(0x4a) BTRAP(0x4b) BTRAP(0x4c) BTRAP(0x4d)
|
|
tl0_irq14: TRAP_IRQ(timer_interrupt, 14)
|
|
-tl0_irq15: TRAP_IRQ(handler_irq, 15)
|
|
+tl0_irq15: TRAP_IRQ(perfctr_irq, 15)
|
|
tl0_resv050: BTRAP(0x50) BTRAP(0x51) BTRAP(0x52) BTRAP(0x53) BTRAP(0x54) BTRAP(0x55)
|
|
tl0_resv056: BTRAP(0x56) BTRAP(0x57) BTRAP(0x58) BTRAP(0x59) BTRAP(0x5a) BTRAP(0x5b)
|
|
tl0_resv05c: BTRAP(0x5c) BTRAP(0x5d) BTRAP(0x5e) BTRAP(0x5f)
|
|
diff --git a/arch/sparc64/perfmon/Kconfig b/arch/sparc64/perfmon/Kconfig
|
|
new file mode 100644
|
|
index 0000000..4672024
|
|
--- /dev/null
|
|
+++ b/arch/sparc64/perfmon/Kconfig
|
|
@@ -0,0 +1,26 @@
|
|
+menu "Hardware Performance Monitoring support"
|
|
+config PERFMON
|
|
+ bool "Perfmon2 performance monitoring interface"
|
|
+ default n
|
|
+ help
|
|
+ Enables the perfmon2 interface to access the hardware
|
|
+ performance counters. See <http://perfmon2.sf.net/> for
|
|
+ more details.
|
|
+
|
|
+config PERFMON_DEBUG
|
|
+ bool "Perfmon debugging"
|
|
+ depends on PERFMON
|
|
+ default n
|
|
+ help
|
|
+ Enables perfmon debugging support
|
|
+
|
|
+config PERFMON_DEBUG_FS
|
|
+ bool "Enable perfmon statistics reporting via debugfs"
|
|
+ default y
|
|
+ depends on PERFMON && DEBUG_FS
|
|
+ help
|
|
+ Enable collection and reporting of perfmon timing statistics under
|
|
+ debugfs. This is used for debugging and performance analysis of the
|
|
+ subsystem. The debugfs filesystem must be mounted.
|
|
+
|
|
+endmenu
|
|
diff --git a/arch/sparc64/perfmon/Makefile b/arch/sparc64/perfmon/Makefile
|
|
new file mode 100644
|
|
index 0000000..ad2d907
|
|
--- /dev/null
|
|
+++ b/arch/sparc64/perfmon/Makefile
|
|
@@ -0,0 +1 @@
|
|
+obj-$(CONFIG_PERFMON) += perfmon.o
|
|
diff --git a/arch/sparc64/perfmon/perfmon.c b/arch/sparc64/perfmon/perfmon.c
|
|
new file mode 100644
|
|
index 0000000..9e29833
|
|
--- /dev/null
|
|
+++ b/arch/sparc64/perfmon/perfmon.c
|
|
@@ -0,0 +1,422 @@
|
|
+/* perfmon.c: sparc64 perfmon support
|
|
+ *
|
|
+ * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
|
|
+ */
|
|
+
|
|
+#include <linux/kernel.h>
|
|
+#include <linux/module.h>
|
|
+#include <linux/irq.h>
|
|
+#include <linux/perfmon_kern.h>
|
|
+
|
|
+#include <asm/system.h>
|
|
+#include <asm/spitfire.h>
|
|
+#include <asm/hypervisor.h>
|
|
+
|
|
+struct pcr_ops {
|
|
+ void (*write)(u64);
|
|
+ u64 (*read)(void);
|
|
+};
|
|
+
|
|
+static void direct_write_pcr(u64 val)
|
|
+{
|
|
+ write_pcr(val);
|
|
+}
|
|
+
|
|
+static u64 direct_read_pcr(void)
|
|
+{
|
|
+ u64 pcr;
|
|
+
|
|
+ read_pcr(pcr);
|
|
+
|
|
+ return pcr;
|
|
+}
|
|
+
|
|
+static struct pcr_ops direct_pcr_ops = {
|
|
+ .write = direct_write_pcr,
|
|
+ .read = direct_read_pcr,
|
|
+};
|
|
+
|
|
+/* Using the hypervisor call is needed so that we can set the
|
|
+ * hypervisor trace bit correctly, which is hyperprivileged.
|
|
+ */
|
|
+static void n2_write_pcr(u64 val)
|
|
+{
|
|
+ unsigned long ret;
|
|
+
|
|
+ ret = sun4v_niagara2_setperf(HV_N2_PERF_SPARC_CTL, val);
|
|
+ if (val != HV_EOK)
|
|
+ write_pcr(val);
|
|
+}
|
|
+
|
|
+static u64 n2_read_pcr(void)
|
|
+{
|
|
+ u64 pcr;
|
|
+
|
|
+ read_pcr(pcr);
|
|
+
|
|
+ return pcr;
|
|
+}
|
|
+
|
|
+static struct pcr_ops n2_pcr_ops = {
|
|
+ .write = n2_write_pcr,
|
|
+ .read = n2_read_pcr,
|
|
+};
|
|
+
|
|
+static struct pcr_ops *pcr_ops;
|
|
+
|
|
+void pfm_arch_write_pmc(struct pfm_context *ctx,
|
|
+ unsigned int cnum, u64 value)
|
|
+{
|
|
+ /*
|
|
+ * we only write to the actual register when monitoring is
|
|
+ * active (pfm_start was issued)
|
|
+ */
|
|
+ if (ctx && ctx->flags.started == 0)
|
|
+ return;
|
|
+
|
|
+ pcr_ops->write(value);
|
|
+}
|
|
+
|
|
+u64 pfm_arch_read_pmc(struct pfm_context *ctx, unsigned int cnum)
|
|
+{
|
|
+ return pcr_ops->read();
|
|
+}
|
|
+
|
|
+/*
|
|
+ * collect pending overflowed PMDs. Called from pfm_ctxsw()
|
|
+ * and from PMU interrupt handler. Must fill in set->povfl_pmds[]
|
|
+ * and set->npend_ovfls. Interrupts are masked
|
|
+ */
|
|
+static void __pfm_get_ovfl_pmds(struct pfm_context *ctx, struct pfm_event_set *set)
|
|
+{
|
|
+ unsigned int max = ctx->regs.max_intr_pmd;
|
|
+ u64 wmask = 1ULL << pfm_pmu_conf->counter_width;
|
|
+ u64 *intr_pmds = ctx->regs.intr_pmds;
|
|
+ u64 *used_mask = set->used_pmds;
|
|
+ u64 mask[PFM_PMD_BV];
|
|
+ unsigned int i;
|
|
+
|
|
+ bitmap_and(cast_ulp(mask),
|
|
+ cast_ulp(intr_pmds),
|
|
+ cast_ulp(used_mask),
|
|
+ max);
|
|
+
|
|
+ /*
|
|
+ * check all PMD that can generate interrupts
|
|
+ * (that includes counters)
|
|
+ */
|
|
+ for (i = 0; i < max; i++) {
|
|
+ if (test_bit(i, mask)) {
|
|
+ u64 new_val = pfm_arch_read_pmd(ctx, i);
|
|
+
|
|
+ PFM_DBG_ovfl("pmd%u new_val=0x%llx bit=%d\n",
|
|
+ i, (unsigned long long)new_val,
|
|
+ (new_val&wmask) ? 1 : 0);
|
|
+
|
|
+ if (new_val & wmask) {
|
|
+ __set_bit(i, set->povfl_pmds);
|
|
+ set->npend_ovfls++;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+static void pfm_stop_active(struct task_struct *task, struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set)
|
|
+{
|
|
+ unsigned int i, max = ctx->regs.max_pmc;
|
|
+
|
|
+ /*
|
|
+ * clear enable bits, assume all pmcs are enable pmcs
|
|
+ */
|
|
+ for (i = 0; i < max; i++) {
|
|
+ if (test_bit(i, set->used_pmcs))
|
|
+ pfm_arch_write_pmc(ctx, i, 0);
|
|
+ }
|
|
+
|
|
+ if (set->npend_ovfls)
|
|
+ return;
|
|
+
|
|
+ __pfm_get_ovfl_pmds(ctx, set);
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Called from pfm_ctxsw(). Task is guaranteed to be current.
|
|
+ * Context is locked. Interrupts are masked. Monitoring is active.
|
|
+ * PMU access is guaranteed. PMC and PMD registers are live in PMU.
|
|
+ *
|
|
+ * for per-thread:
|
|
+ * must stop monitoring for the task
|
|
+ *
|
|
+ * Return:
|
|
+ * non-zero : did not save PMDs (as part of stopping the PMU)
|
|
+ * 0 : saved PMDs (no need to save them in caller)
|
|
+ */
|
|
+int pfm_arch_ctxswout_thread(struct task_struct *task, struct pfm_context *ctx)
|
|
+{
|
|
+ /*
|
|
+ * disable lazy restore of PMC registers.
|
|
+ */
|
|
+ ctx->active_set->priv_flags |= PFM_SETFL_PRIV_MOD_PMCS;
|
|
+
|
|
+ pfm_stop_active(task, ctx, ctx->active_set);
|
|
+
|
|
+ return 1;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Called from pfm_stop() and idle notifier
|
|
+ *
|
|
+ * Interrupts are masked. Context is locked. Set is the active set.
|
|
+ *
|
|
+ * For per-thread:
|
|
+ * task is not necessarily current. If not current task, then
|
|
+ * task is guaranteed stopped and off any cpu. Access to PMU
|
|
+ * is not guaranteed. Interrupts are masked. Context is locked.
|
|
+ * Set is the active set.
|
|
+ *
|
|
+ * For system-wide:
|
|
+ * task is current
|
|
+ *
|
|
+ * must disable active monitoring. ctx cannot be NULL
|
|
+ */
|
|
+void pfm_arch_stop(struct task_struct *task, struct pfm_context *ctx)
|
|
+{
|
|
+ /*
|
|
+ * no need to go through stop_save()
|
|
+ * if we are already stopped
|
|
+ */
|
|
+ if (!ctx->flags.started || ctx->state == PFM_CTX_MASKED)
|
|
+ return;
|
|
+
|
|
+ /*
|
|
+ * stop live registers and collect pending overflow
|
|
+ */
|
|
+ if (task == current)
|
|
+ pfm_stop_active(task, ctx, ctx->active_set);
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Enable active monitoring. Called from pfm_start() and
|
|
+ * pfm_arch_unmask_monitoring().
|
|
+ *
|
|
+ * Interrupts are masked. Context is locked. Set is the active set.
|
|
+ *
|
|
+ * For per-trhead:
|
|
+ * Task is not necessarily current. If not current task, then task
|
|
+ * is guaranteed stopped and off any cpu. Access to PMU is not guaranteed.
|
|
+ *
|
|
+ * For system-wide:
|
|
+ * task is always current
|
|
+ *
|
|
+ * must enable active monitoring.
|
|
+ */
|
|
+void pfm_arch_start(struct task_struct *task, struct pfm_context *ctx)
|
|
+{
|
|
+ struct pfm_event_set *set;
|
|
+ unsigned int max_pmc = ctx->regs.max_pmc;
|
|
+ unsigned int i;
|
|
+
|
|
+ if (task != current)
|
|
+ return;
|
|
+
|
|
+ set = ctx->active_set;
|
|
+ for (i = 0; i < max_pmc; i++) {
|
|
+ if (test_bit(i, set->used_pmcs))
|
|
+ pfm_arch_write_pmc(ctx, i, set->pmcs[i]);
|
|
+ }
|
|
+}
|
|
+
|
|
+/*
|
|
+ * function called from pfm_switch_sets(), pfm_context_load_thread(),
|
|
+ * pfm_context_load_sys(), pfm_ctxsw(), pfm_switch_sets()
|
|
+ * context is locked. Interrupts are masked. set cannot be NULL.
|
|
+ * Access to the PMU is guaranteed.
|
|
+ *
|
|
+ * function must restore all PMD registers from set.
|
|
+ */
|
|
+void pfm_arch_restore_pmds(struct pfm_context *ctx, struct pfm_event_set *set)
|
|
+{
|
|
+ unsigned int max_pmd = ctx->regs.max_pmd;
|
|
+ u64 ovfl_mask = pfm_pmu_conf->ovfl_mask;
|
|
+ u64 *impl_pmds = ctx->regs.pmds;
|
|
+ unsigned int i;
|
|
+
|
|
+ /*
|
|
+ * must restore all pmds to avoid leaking
|
|
+ * information to user.
|
|
+ */
|
|
+ for (i = 0; i < max_pmd; i++) {
|
|
+ u64 val;
|
|
+
|
|
+ if (test_bit(i, impl_pmds) == 0)
|
|
+ continue;
|
|
+
|
|
+ val = set->pmds[i].value;
|
|
+
|
|
+ /*
|
|
+ * set upper bits for counter to ensure
|
|
+ * overflow will trigger
|
|
+ */
|
|
+ val &= ovfl_mask;
|
|
+
|
|
+ pfm_arch_write_pmd(ctx, i, val);
|
|
+ }
|
|
+}
|
|
+
|
|
+/*
|
|
+ * function called from pfm_switch_sets(), pfm_context_load_thread(),
|
|
+ * pfm_context_load_sys(), pfm_ctxsw().
|
|
+ * Context is locked. Interrupts are masked. set cannot be NULL.
|
|
+ * Access to the PMU is guaranteed.
|
|
+ *
|
|
+ * function must restore all PMC registers from set, if needed.
|
|
+ */
|
|
+void pfm_arch_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set)
|
|
+{
|
|
+ unsigned int max_pmc = ctx->regs.max_pmc;
|
|
+ u64 *impl_pmcs = ctx->regs.pmcs;
|
|
+ unsigned int i;
|
|
+
|
|
+ /* If we're masked or stopped we don't need to bother restoring
|
|
+ * the PMCs now.
|
|
+ */
|
|
+ if (ctx->state == PFM_CTX_MASKED || ctx->flags.started == 0)
|
|
+ return;
|
|
+
|
|
+ /*
|
|
+ * restore all pmcs
|
|
+ */
|
|
+ for (i = 0; i < max_pmc; i++)
|
|
+ if (test_bit(i, impl_pmcs))
|
|
+ pfm_arch_write_pmc(ctx, i, set->pmcs[i]);
|
|
+}
|
|
+
|
|
+char *pfm_arch_get_pmu_module_name(void)
|
|
+{
|
|
+ return NULL;
|
|
+}
|
|
+
|
|
+void perfmon_interrupt(struct pt_regs *regs)
|
|
+{
|
|
+ pfm_interrupt_handler(instruction_pointer(regs), regs);
|
|
+}
|
|
+
|
|
+static struct pfm_regmap_desc pfm_sparc64_pmc_desc[] = {
|
|
+ PMC_D(PFM_REG_I, "PCR", 0, 0, 0, 0),
|
|
+};
|
|
+
|
|
+static struct pfm_regmap_desc pfm_sparc64_pmd_desc[] = {
|
|
+ PMD_D(PFM_REG_C, "PIC0", 0),
|
|
+ PMD_D(PFM_REG_C, "PIC1", 0),
|
|
+};
|
|
+
|
|
+static int pfm_sparc64_probe(void)
|
|
+{
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static struct pfm_pmu_config pmu_sparc64_pmu_conf = {
|
|
+ .counter_width = 31,
|
|
+ .pmd_desc = pfm_sparc64_pmd_desc,
|
|
+ .num_pmd_entries = 2,
|
|
+ .pmc_desc = pfm_sparc64_pmc_desc,
|
|
+ .num_pmc_entries = 1,
|
|
+ .probe_pmu = pfm_sparc64_probe,
|
|
+ .flags = PFM_PMU_BUILTIN_FLAG,
|
|
+ .owner = THIS_MODULE,
|
|
+};
|
|
+
|
|
+static unsigned long perf_hsvc_group;
|
|
+static unsigned long perf_hsvc_major;
|
|
+static unsigned long perf_hsvc_minor;
|
|
+
|
|
+static int __init register_perf_hsvc(void)
|
|
+{
|
|
+ if (tlb_type == hypervisor) {
|
|
+ switch (sun4v_chip_type) {
|
|
+ case SUN4V_CHIP_NIAGARA1:
|
|
+ perf_hsvc_group = HV_GRP_N2_CPU;
|
|
+ break;
|
|
+
|
|
+ case SUN4V_CHIP_NIAGARA2:
|
|
+ perf_hsvc_group = HV_GRP_N2_CPU;
|
|
+ break;
|
|
+
|
|
+ default:
|
|
+ return -ENODEV;
|
|
+ }
|
|
+
|
|
+
|
|
+ perf_hsvc_major = 1;
|
|
+ perf_hsvc_minor = 0;
|
|
+ if (sun4v_hvapi_register(perf_hsvc_group,
|
|
+ perf_hsvc_major,
|
|
+ &perf_hsvc_minor)) {
|
|
+ printk("perfmon: Could not register N2 hvapi.\n");
|
|
+ return -ENODEV;
|
|
+ }
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static void unregister_perf_hsvc(void)
|
|
+{
|
|
+ if (tlb_type != hypervisor)
|
|
+ return;
|
|
+ sun4v_hvapi_unregister(perf_hsvc_group);
|
|
+}
|
|
+
|
|
+static int __init pfm_sparc64_pmu_init(void)
|
|
+{
|
|
+ u64 mask;
|
|
+ int err;
|
|
+
|
|
+ err = register_perf_hsvc();
|
|
+ if (err)
|
|
+ return err;
|
|
+
|
|
+ if (tlb_type == hypervisor &&
|
|
+ sun4v_chip_type == SUN4V_CHIP_NIAGARA2)
|
|
+ pcr_ops = &n2_pcr_ops;
|
|
+ else
|
|
+ pcr_ops = &direct_pcr_ops;
|
|
+
|
|
+ if (!strcmp(sparc_pmu_type, "ultra12"))
|
|
+ mask = (0xf << 11) | (0xf << 4) | 0x7;
|
|
+ else if (!strcmp(sparc_pmu_type, "ultra3") ||
|
|
+ !strcmp(sparc_pmu_type, "ultra3i") ||
|
|
+ !strcmp(sparc_pmu_type, "ultra3+") ||
|
|
+ !strcmp(sparc_pmu_type, "ultra4+"))
|
|
+ mask = (0x3f << 11) | (0x3f << 4) | 0x7;
|
|
+ else if (!strcmp(sparc_pmu_type, "niagara2"))
|
|
+ mask = ((1UL << 63) | (1UL << 62) |
|
|
+ (1UL << 31) | (0xfUL << 27) | (0xffUL << 19) |
|
|
+ (1UL << 18) | (0xfUL << 14) | (0xff << 6) |
|
|
+ (0x3UL << 4) | 0x7UL);
|
|
+ else if (!strcmp(sparc_pmu_type, "niagara"))
|
|
+ mask = ((1UL << 9) | (1UL << 8) |
|
|
+ (0x7UL << 4) | 0x7UL);
|
|
+ else {
|
|
+ err = -ENODEV;
|
|
+ goto out_err;
|
|
+ }
|
|
+
|
|
+ pmu_sparc64_pmu_conf.pmu_name = sparc_pmu_type;
|
|
+ pfm_sparc64_pmc_desc[0].rsvd_msk = ~mask;
|
|
+
|
|
+ return pfm_pmu_register(&pmu_sparc64_pmu_conf);
|
|
+
|
|
+out_err:
|
|
+ unregister_perf_hsvc();
|
|
+ return err;
|
|
+}
|
|
+
|
|
+static void __exit pfm_sparc64_pmu_exit(void)
|
|
+{
|
|
+ unregister_perf_hsvc();
|
|
+ return pfm_pmu_unregister(&pmu_sparc64_pmu_conf);
|
|
+}
|
|
+
|
|
+module_init(pfm_sparc64_pmu_init);
|
|
+module_exit(pfm_sparc64_pmu_exit);
|
|
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
|
|
index ed92864..3a2b544 100644
|
|
--- a/arch/x86/Kconfig
|
|
+++ b/arch/x86/Kconfig
|
|
@@ -1378,6 +1378,8 @@ config COMPAT_VDSO
|
|
|
|
If unsure, say Y.
|
|
|
|
+source "arch/x86/perfmon/Kconfig"
|
|
+
|
|
endmenu
|
|
|
|
config ARCH_ENABLE_MEMORY_HOTPLUG
|
|
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
|
|
index f5631da..c868ad6 100644
|
|
--- a/arch/x86/Makefile
|
|
+++ b/arch/x86/Makefile
|
|
@@ -150,6 +150,8 @@ core-$(CONFIG_LGUEST_GUEST) += arch/x86/lguest/
|
|
core-y += arch/x86/kernel/
|
|
core-y += arch/x86/mm/
|
|
|
|
+core-$(CONFIG_PERFMON) += arch/x86/perfmon/
|
|
+
|
|
# Remaining sub architecture files
|
|
core-y += $(mcore-y)
|
|
|
|
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
|
|
index ffc1bb4..58e00cb 100644
|
|
--- a/arch/x86/ia32/ia32entry.S
|
|
+++ b/arch/x86/ia32/ia32entry.S
|
|
@@ -832,4 +832,16 @@ ia32_sys_call_table:
|
|
.quad sys_dup3 /* 330 */
|
|
.quad sys_pipe2
|
|
.quad sys_inotify_init1
|
|
+ .quad sys_pfm_create_context
|
|
+ .quad sys_pfm_write_pmcs
|
|
+ .quad sys_pfm_write_pmds /* 335 */
|
|
+ .quad sys_pfm_read_pmds
|
|
+ .quad sys_pfm_load_context
|
|
+ .quad sys_pfm_start
|
|
+ .quad sys_pfm_stop
|
|
+ .quad sys_pfm_restart /* 340 */
|
|
+ .quad sys_pfm_create_evtsets
|
|
+ .quad sys_pfm_getinfo_evtsets
|
|
+ .quad sys_pfm_delete_evtsets
|
|
+ .quad sys_pfm_unload_context
|
|
ia32_syscall_end:
|
|
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
|
|
index f88bd0d..53fe335 100644
|
|
--- a/arch/x86/kernel/apic_32.c
|
|
+++ b/arch/x86/kernel/apic_32.c
|
|
@@ -28,6 +28,7 @@
|
|
#include <linux/acpi_pmtmr.h>
|
|
#include <linux/module.h>
|
|
#include <linux/dmi.h>
|
|
+#include <linux/perfmon_kern.h>
|
|
|
|
#include <asm/atomic.h>
|
|
#include <asm/smp.h>
|
|
@@ -669,6 +670,7 @@ u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask)
|
|
setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask);
|
|
return APIC_EILVT_LVTOFF_IBS;
|
|
}
|
|
+EXPORT_SYMBOL(setup_APIC_eilvt_ibs);
|
|
|
|
/*
|
|
* Local APIC start and shutdown
|
|
@@ -1367,6 +1369,9 @@ void __init apic_intr_init(void)
|
|
#ifdef CONFIG_X86_MCE_P4THERMAL
|
|
alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
|
|
#endif
|
|
+#ifdef CONFIG_PERFMON
|
|
+ set_intr_gate(LOCAL_PERFMON_VECTOR, pmu_interrupt);
|
|
+#endif
|
|
}
|
|
|
|
/**
|
|
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
|
|
index 446c062..574cd3b 100644
|
|
--- a/arch/x86/kernel/apic_64.c
|
|
+++ b/arch/x86/kernel/apic_64.c
|
|
@@ -228,6 +228,7 @@ u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask)
|
|
setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask);
|
|
return APIC_EILVT_LVTOFF_IBS;
|
|
}
|
|
+EXPORT_SYMBOL(setup_APIC_eilvt_ibs);
|
|
|
|
/*
|
|
* Program the next event, relative to now
|
|
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
|
|
index 4e456bd..5b6d6ca 100644
|
|
--- a/arch/x86/kernel/cpu/common.c
|
|
+++ b/arch/x86/kernel/cpu/common.c
|
|
@@ -5,6 +5,7 @@
|
|
#include <linux/module.h>
|
|
#include <linux/percpu.h>
|
|
#include <linux/bootmem.h>
|
|
+#include <linux/perfmon_kern.h>
|
|
#include <asm/processor.h>
|
|
#include <asm/i387.h>
|
|
#include <asm/msr.h>
|
|
@@ -726,6 +727,8 @@ void __cpuinit cpu_init(void)
|
|
current_thread_info()->status = 0;
|
|
clear_used_math();
|
|
mxcsr_feature_mask_init();
|
|
+
|
|
+ pfm_init_percpu();
|
|
}
|
|
|
|
#ifdef CONFIG_HOTPLUG_CPU
|
|
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
|
|
index 109792b..0b6e34c 100644
|
|
--- a/arch/x86/kernel/entry_32.S
|
|
+++ b/arch/x86/kernel/entry_32.S
|
|
@@ -513,7 +513,7 @@ ENDPROC(system_call)
|
|
ALIGN
|
|
RING0_PTREGS_FRAME # can't unwind into user space anyway
|
|
work_pending:
|
|
- testb $_TIF_NEED_RESCHED, %cl
|
|
+ testw $(_TIF_NEED_RESCHED|_TIF_PERFMON_WORK), %cx
|
|
jz work_notifysig
|
|
work_resched:
|
|
call schedule
|
|
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
|
|
index 89434d4..34e44f5 100644
|
|
--- a/arch/x86/kernel/entry_64.S
|
|
+++ b/arch/x86/kernel/entry_64.S
|
|
@@ -888,7 +888,13 @@ END(error_interrupt)
|
|
ENTRY(spurious_interrupt)
|
|
apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
|
|
END(spurious_interrupt)
|
|
-
|
|
+
|
|
+#ifdef CONFIG_PERFMON
|
|
+ENTRY(pmu_interrupt)
|
|
+ apicinterrupt LOCAL_PERFMON_VECTOR,smp_pmu_interrupt
|
|
+END(pmu_interrupt)
|
|
+#endif
|
|
+
|
|
/*
|
|
* Exception entry points.
|
|
*/
|
|
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
|
|
index 1f26fd9..83f6bc1 100644
|
|
--- a/arch/x86/kernel/irqinit_64.c
|
|
+++ b/arch/x86/kernel/irqinit_64.c
|
|
@@ -11,6 +11,7 @@
|
|
#include <linux/kernel_stat.h>
|
|
#include <linux/sysdev.h>
|
|
#include <linux/bitops.h>
|
|
+#include <linux/perfmon_kern.h>
|
|
|
|
#include <asm/acpi.h>
|
|
#include <asm/atomic.h>
|
|
@@ -217,6 +218,10 @@ void __init native_init_IRQ(void)
|
|
alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
|
|
alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
|
|
|
|
+#ifdef CONFIG_PERFMON
|
|
+ alloc_intr_gate(LOCAL_PERFMON_VECTOR, pmu_interrupt);
|
|
+#endif
|
|
+
|
|
if (!acpi_ioapic)
|
|
setup_irq(2, &irq2);
|
|
}
|
|
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
|
|
index 31f40b2..ed27150 100644
|
|
--- a/arch/x86/kernel/process_32.c
|
|
+++ b/arch/x86/kernel/process_32.c
|
|
@@ -36,6 +36,7 @@
|
|
#include <linux/personality.h>
|
|
#include <linux/tick.h>
|
|
#include <linux/percpu.h>
|
|
+#include <linux/perfmon_kern.h>
|
|
#include <linux/prctl.h>
|
|
|
|
#include <asm/uaccess.h>
|
|
@@ -277,6 +278,7 @@ void exit_thread(void)
|
|
tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
|
|
put_cpu();
|
|
}
|
|
+ pfm_exit_thread();
|
|
}
|
|
|
|
void flush_thread(void)
|
|
@@ -334,6 +336,8 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
|
|
|
|
savesegment(gs, p->thread.gs);
|
|
|
|
+ pfm_copy_thread(p);
|
|
+
|
|
tsk = current;
|
|
if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
|
|
p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr,
|
|
@@ -448,6 +452,9 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
|
|
prev = &prev_p->thread;
|
|
next = &next_p->thread;
|
|
|
|
+ if (test_tsk_thread_flag(prev_p, TIF_PERFMON_CTXSW))
|
|
+ pfm_ctxsw_out(prev_p, next_p);
|
|
+
|
|
debugctl = prev->debugctlmsr;
|
|
if (next->ds_area_msr != prev->ds_area_msr) {
|
|
/* we clear debugctl to make sure DS
|
|
@@ -460,6 +467,9 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
|
|
if (next->debugctlmsr != debugctl)
|
|
update_debugctlmsr(next->debugctlmsr);
|
|
|
|
+ if (test_tsk_thread_flag(next_p, TIF_PERFMON_CTXSW))
|
|
+ pfm_ctxsw_in(prev_p, next_p);
|
|
+
|
|
if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
|
|
set_debugreg(next->debugreg0, 0);
|
|
set_debugreg(next->debugreg1, 1);
|
|
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
|
|
index e12e0e4..97d49ce 100644
|
|
--- a/arch/x86/kernel/process_64.c
|
|
+++ b/arch/x86/kernel/process_64.c
|
|
@@ -36,6 +36,7 @@
|
|
#include <linux/kprobes.h>
|
|
#include <linux/kdebug.h>
|
|
#include <linux/tick.h>
|
|
+#include <linux/perfmon_kern.h>
|
|
#include <linux/prctl.h>
|
|
|
|
#include <asm/uaccess.h>
|
|
@@ -240,6 +241,7 @@ void exit_thread(void)
|
|
t->io_bitmap_max = 0;
|
|
put_cpu();
|
|
}
|
|
+ pfm_exit_thread();
|
|
}
|
|
|
|
void flush_thread(void)
|
|
@@ -344,6 +346,8 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
|
|
savesegment(es, p->thread.es);
|
|
savesegment(ds, p->thread.ds);
|
|
|
|
+ pfm_copy_thread(p);
|
|
+
|
|
if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
|
|
p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
|
|
if (!p->thread.io_bitmap_ptr) {
|
|
@@ -472,6 +476,9 @@ static inline void __switch_to_xtra(struct task_struct *prev_p,
|
|
prev = &prev_p->thread,
|
|
next = &next_p->thread;
|
|
|
|
+ if (test_tsk_thread_flag(prev_p, TIF_PERFMON_CTXSW))
|
|
+ pfm_ctxsw_out(prev_p, next_p);
|
|
+
|
|
debugctl = prev->debugctlmsr;
|
|
if (next->ds_area_msr != prev->ds_area_msr) {
|
|
/* we clear debugctl to make sure DS
|
|
@@ -484,6 +491,9 @@ static inline void __switch_to_xtra(struct task_struct *prev_p,
|
|
if (next->debugctlmsr != debugctl)
|
|
update_debugctlmsr(next->debugctlmsr);
|
|
|
|
+ if (test_tsk_thread_flag(next_p, TIF_PERFMON_CTXSW))
|
|
+ pfm_ctxsw_in(prev_p, next_p);
|
|
+
|
|
if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
|
|
loaddebug(next, 0);
|
|
loaddebug(next, 1);
|
|
diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
|
|
index 6fb5bcd..53e6665 100644
|
|
--- a/arch/x86/kernel/signal_32.c
|
|
+++ b/arch/x86/kernel/signal_32.c
|
|
@@ -18,6 +18,7 @@
|
|
#include <linux/sched.h>
|
|
#include <linux/wait.h>
|
|
#include <linux/elf.h>
|
|
+#include <linux/perfmon_kern.h>
|
|
#include <linux/smp.h>
|
|
#include <linux/mm.h>
|
|
|
|
@@ -657,6 +658,10 @@ static void do_signal(struct pt_regs *regs)
|
|
void
|
|
do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
|
|
{
|
|
+ /* process perfmon asynchronous work (e.g. block thread or reset) */
|
|
+ if (thread_info_flags & _TIF_PERFMON_WORK)
|
|
+ pfm_handle_work(regs);
|
|
+
|
|
/* deal with pending signal delivery */
|
|
if (thread_info_flags & _TIF_SIGPENDING)
|
|
do_signal(regs);
|
|
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
|
|
index ca316b5..6e9fa74 100644
|
|
--- a/arch/x86/kernel/signal_64.c
|
|
+++ b/arch/x86/kernel/signal_64.c
|
|
@@ -19,6 +19,7 @@
|
|
#include <linux/stddef.h>
|
|
#include <linux/personality.h>
|
|
#include <linux/compiler.h>
|
|
+#include <linux/perfmon_kern.h>
|
|
#include <asm/processor.h>
|
|
#include <asm/ucontext.h>
|
|
#include <asm/uaccess.h>
|
|
@@ -549,12 +550,17 @@ static void do_signal(struct pt_regs *regs)
|
|
void do_notify_resume(struct pt_regs *regs, void *unused,
|
|
__u32 thread_info_flags)
|
|
{
|
|
+
|
|
#ifdef CONFIG_X86_MCE
|
|
/* notify userspace of pending MCEs */
|
|
if (thread_info_flags & _TIF_MCE_NOTIFY)
|
|
mce_notify_user();
|
|
#endif /* CONFIG_X86_MCE */
|
|
|
|
+ /* process perfmon asynchronous work (e.g. block thread or reset) */
|
|
+ if (thread_info_flags & _TIF_PERFMON_WORK)
|
|
+ pfm_handle_work(regs);
|
|
+
|
|
/* deal with pending signal delivery */
|
|
if (thread_info_flags & _TIF_SIGPENDING)
|
|
do_signal(regs);
|
|
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
|
|
index 7985c5b..9ddf6db 100644
|
|
--- a/arch/x86/kernel/smpboot.c
|
|
+++ b/arch/x86/kernel/smpboot.c
|
|
@@ -42,6 +42,7 @@
|
|
#include <linux/init.h>
|
|
#include <linux/smp.h>
|
|
#include <linux/module.h>
|
|
+#include <linux/perfmon_kern.h>
|
|
#include <linux/sched.h>
|
|
#include <linux/percpu.h>
|
|
#include <linux/bootmem.h>
|
|
@@ -1382,6 +1383,7 @@ int __cpu_disable(void)
|
|
remove_cpu_from_maps(cpu);
|
|
unlock_vector_lock();
|
|
fixup_irqs(cpu_online_map);
|
|
+ pfm_cpu_disable();
|
|
return 0;
|
|
}
|
|
|
|
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
|
|
index d44395f..e1384a9 100644
|
|
--- a/arch/x86/kernel/syscall_table_32.S
|
|
+++ b/arch/x86/kernel/syscall_table_32.S
|
|
@@ -332,3 +332,15 @@ ENTRY(sys_call_table)
|
|
.long sys_dup3 /* 330 */
|
|
.long sys_pipe2
|
|
.long sys_inotify_init1
|
|
+ .long sys_pfm_create_context
|
|
+ .long sys_pfm_write_pmcs
|
|
+ .long sys_pfm_write_pmds /* 335 */
|
|
+ .long sys_pfm_read_pmds
|
|
+ .long sys_pfm_load_context
|
|
+ .long sys_pfm_start
|
|
+ .long sys_pfm_stop
|
|
+ .long sys_pfm_restart /* 340 */
|
|
+ .long sys_pfm_create_evtsets
|
|
+ .long sys_pfm_getinfo_evtsets
|
|
+ .long sys_pfm_delete_evtsets
|
|
+ .long sys_pfm_unload_context
|
|
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
|
|
index 8a5f161..10faef5 100644
|
|
--- a/arch/x86/oprofile/nmi_int.c
|
|
+++ b/arch/x86/oprofile/nmi_int.c
|
|
@@ -16,6 +16,7 @@
|
|
#include <linux/moduleparam.h>
|
|
#include <linux/kdebug.h>
|
|
#include <linux/cpu.h>
|
|
+#include <linux/perfmon_kern.h>
|
|
#include <asm/nmi.h>
|
|
#include <asm/msr.h>
|
|
#include <asm/apic.h>
|
|
@@ -217,12 +218,18 @@ static int nmi_setup(void)
|
|
int err = 0;
|
|
int cpu;
|
|
|
|
- if (!allocate_msrs())
|
|
+ if (pfm_session_allcpus_acquire())
|
|
+ return -EBUSY;
|
|
+
|
|
+ if (!allocate_msrs()) {
|
|
+ pfm_session_allcpus_release();
|
|
return -ENOMEM;
|
|
+ }
|
|
|
|
err = register_die_notifier(&profile_exceptions_nb);
|
|
if (err) {
|
|
free_msrs();
|
|
+ pfm_session_allcpus_release();
|
|
return err;
|
|
}
|
|
|
|
@@ -304,6 +311,7 @@ static void nmi_shutdown(void)
|
|
model->shutdown(msrs);
|
|
free_msrs();
|
|
put_cpu_var(cpu_msrs);
|
|
+ pfm_session_allcpus_release();
|
|
}
|
|
|
|
static void nmi_cpu_start(void *dummy)
|
|
diff --git a/arch/x86/perfmon/Kconfig b/arch/x86/perfmon/Kconfig
|
|
new file mode 100644
|
|
index 0000000..08842e6
|
|
--- /dev/null
|
|
+++ b/arch/x86/perfmon/Kconfig
|
|
@@ -0,0 +1,89 @@
|
|
+menu "Hardware Performance Monitoring support"
|
|
+config PERFMON
|
|
+ bool "Perfmon2 performance monitoring interface"
|
|
+ select X86_LOCAL_APIC
|
|
+ default n
|
|
+ help
|
|
+ Enables the perfmon2 interface to access the hardware
|
|
+ performance counters. See <http://perfmon2.sf.net/> for
|
|
+ more details.
|
|
+
|
|
+config PERFMON_DEBUG
|
|
+ bool "Perfmon debugging"
|
|
+ default n
|
|
+ depends on PERFMON
|
|
+ help
|
|
+ Enables perfmon debugging support
|
|
+
|
|
+config PERFMON_DEBUG_FS
|
|
+ bool "Enable perfmon statistics reporting via debugfs"
|
|
+ default y
|
|
+ depends on PERFMON && DEBUG_FS
|
|
+ help
|
|
+ Enable collection and reporting of perfmon timing statistics under
|
|
+ debugfs. This is used for debugging and performance analysis of the
|
|
+ subsystem.The debugfs filesystem must be mounted.
|
|
+
|
|
+config X86_PERFMON_P6
|
|
+ tristate "Support for Intel P6/Pentium M processor hardware performance counters"
|
|
+ depends on PERFMON && X86_32
|
|
+ default n
|
|
+ help
|
|
+ Enables support for Intel P6-style hardware performance counters.
|
|
+ To be used for with Intel Pentium III, PentiumPro, Pentium M processors.
|
|
+
|
|
+config X86_PERFMON_P4
|
|
+ tristate "Support for Intel Pentium 4/Xeon hardware performance counters"
|
|
+ depends on PERFMON
|
|
+ default n
|
|
+ help
|
|
+ Enables support for Intel Pentium 4/Xeon (Netburst) hardware performance
|
|
+ counters.
|
|
+
|
|
+config X86_PERFMON_PEBS_P4
|
|
+ tristate "Support for Intel Netburst Precise Event-Based Sampling (PEBS)"
|
|
+ depends on PERFMON && X86_PERFMON_P4
|
|
+ default n
|
|
+ help
|
|
+ Enables support for Precise Event-Based Sampling (PEBS) on the Intel
|
|
+ Netburst processors such as Pentium 4, Xeon which support it.
|
|
+
|
|
+config X86_PERFMON_CORE
|
|
+ tristate "Support for Intel Core-based performance counters"
|
|
+ depends on PERFMON
|
|
+ default n
|
|
+ help
|
|
+ Enables support for Intel Core-based performance counters. Enable
|
|
+ this option to support Intel Core 2 processors.
|
|
+
|
|
+config X86_PERFMON_PEBS_CORE
|
|
+ tristate "Support for Intel Core Precise Event-Based Sampling (PEBS)"
|
|
+ depends on PERFMON && X86_PERFMON_CORE
|
|
+ default n
|
|
+ help
|
|
+ Enables support for Precise Event-Based Sampling (PEBS) on the Intel
|
|
+ Core processors.
|
|
+
|
|
+config X86_PERFMON_INTEL_ATOM
|
|
+ tristate "Support for Intel Atom processor"
|
|
+ depends on PERFMON
|
|
+ default n
|
|
+ help
|
|
+ Enables support for Intel Atom processors.
|
|
+
|
|
+config X86_PERFMON_INTEL_ARCH
|
|
+ tristate "Support for Intel architectural perfmon v1/v2"
|
|
+ depends on PERFMON
|
|
+ default n
|
|
+ help
|
|
+ Enables support for Intel architectural performance counters.
|
|
+ This feature was introduced with Intel Core Solo/Core Duo processors.
|
|
+
|
|
+config X86_PERFMON_AMD64
|
|
+ tristate "Support AMD Athlon64/Opteron64 hardware performance counters"
|
|
+ depends on PERFMON
|
|
+ default n
|
|
+ help
|
|
+ Enables support for Athlon64/Opterton64 hardware performance counters.
|
|
+ Support for family 6, 15 and 16(10H) processors.
|
|
+endmenu
|
|
diff --git a/arch/x86/perfmon/Makefile b/arch/x86/perfmon/Makefile
|
|
new file mode 100644
|
|
index 0000000..1cbed3e
|
|
--- /dev/null
|
|
+++ b/arch/x86/perfmon/Makefile
|
|
@@ -0,0 +1,13 @@
|
|
+#
|
|
+# Copyright (c) 2005-2007 Hewlett-Packard Development Company, L.P.
|
|
+# Contributed by Stephane Eranian <eranian@hpl.hp.com>
|
|
+#
|
|
+obj-$(CONFIG_PERFMON) += perfmon.o
|
|
+obj-$(CONFIG_X86_PERFMON_P6) += perfmon_p6.o
|
|
+obj-$(CONFIG_X86_PERFMON_P4) += perfmon_p4.o
|
|
+obj-$(CONFIG_X86_PERFMON_CORE) += perfmon_intel_core.o
|
|
+obj-$(CONFIG_X86_PERFMON_INTEL_ARCH) += perfmon_intel_arch.o
|
|
+obj-$(CONFIG_X86_PERFMON_PEBS_P4) += perfmon_pebs_p4_smpl.o
|
|
+obj-$(CONFIG_X86_PERFMON_PEBS_CORE) += perfmon_pebs_core_smpl.o
|
|
+obj-$(CONFIG_X86_PERFMON_AMD64) += perfmon_amd64.o
|
|
+obj-$(CONFIG_X86_PERFMON_INTEL_ATOM) += perfmon_intel_atom.o
|
|
diff --git a/arch/x86/perfmon/perfmon.c b/arch/x86/perfmon/perfmon.c
|
|
new file mode 100644
|
|
index 0000000..e727fed
|
|
--- /dev/null
|
|
+++ b/arch/x86/perfmon/perfmon.c
|
|
@@ -0,0 +1,761 @@
|
|
+/*
|
|
+ * This file implements the X86 specific support for the perfmon2 interface
|
|
+ *
|
|
+ * Copyright (c) 2005-2007 Hewlett-Packard Development Company, L.P.
|
|
+ * Contributed by Stephane Eranian <eranian@hpl.hp.com>
|
|
+ *
|
|
+ * Copyright (c) 2007 Advanced Micro Devices, Inc.
|
|
+ * Contributed by Robert Richter <robert.richter@amd.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of version 2 of the GNU General Public
|
|
+ * License as published by the Free Software Foundation.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ * General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
+ * 02111-1307 USA
|
|
+ */
|
|
+#include <linux/interrupt.h>
|
|
+#include <linux/perfmon_kern.h>
|
|
+#include <linux/kprobes.h>
|
|
+#include <linux/kdebug.h>
|
|
+#include <linux/nmi.h>
|
|
+
|
|
+#include <asm/apic.h>
|
|
+
|
|
+DEFINE_PER_CPU(unsigned long, real_iip);
|
|
+DEFINE_PER_CPU(int, pfm_using_nmi);
|
|
+DEFINE_PER_CPU(unsigned long, saved_lvtpc);
|
|
+
|
|
+/**
|
|
+ * pfm_arch_ctxswin_thread - thread context switch in
|
|
+ * @task: task switched in
|
|
+ * @ctx: context for the task
|
|
+ *
|
|
+ * Called from pfm_ctxsw(). Task is guaranteed to be current.
|
|
+ * set cannot be NULL. Context is locked. Interrupts are masked.
|
|
+ *
|
|
+ * Caller has already restored all PMD and PMC registers, if
|
|
+ * necessary (i.e., lazy restore scheme).
|
|
+ *
|
|
+ * On x86, the only common code just needs to unsecure RDPMC if necessary
|
|
+ *
|
|
+ * On model-specific features, e.g., PEBS, IBS, are taken care of in the
|
|
+ * corresponding PMU description module
|
|
+ */
|
|
+void pfm_arch_ctxswin_thread(struct task_struct *task, struct pfm_context *ctx)
|
|
+{
|
|
+ struct pfm_arch_context *ctx_arch;
|
|
+
|
|
+ ctx_arch = pfm_ctx_arch(ctx);
|
|
+
|
|
+ /*
|
|
+ * restore saved real iip
|
|
+ */
|
|
+ if (ctx->active_set->npend_ovfls)
|
|
+ __get_cpu_var(real_iip) = ctx_arch->saved_real_iip;
|
|
+
|
|
+ /*
|
|
+ * enable RDPMC on this CPU
|
|
+ */
|
|
+ if (ctx_arch->flags.insecure)
|
|
+ set_in_cr4(X86_CR4_PCE);
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_arch_ctxswout_thread - context switch out thread
|
|
+ * @task: task switched out
|
|
+ * @ctx : context switched out
|
|
+ *
|
|
+ * Called from pfm_ctxsw(). Task is guaranteed to be current.
|
|
+ * Context is locked. Interrupts are masked. Monitoring may be active.
|
|
+ * PMU access is guaranteed. PMC and PMD registers are live in PMU.
|
|
+ *
|
|
+ * Return:
|
|
+ * non-zero : did not save PMDs (as part of stopping the PMU)
|
|
+ * 0 : saved PMDs (no need to save them in caller)
|
|
+ */
|
|
+int pfm_arch_ctxswout_thread(struct task_struct *task, struct pfm_context *ctx)
|
|
+{
|
|
+ struct pfm_arch_context *ctx_arch;
|
|
+ struct pfm_arch_pmu_info *pmu_info;
|
|
+
|
|
+ ctx_arch = pfm_ctx_arch(ctx);
|
|
+ pmu_info = pfm_pmu_info();
|
|
+
|
|
+ /*
|
|
+ * disable lazy restore of PMCS on ctxswin because
|
|
+ * we modify some of them.
|
|
+ */
|
|
+ ctx->active_set->priv_flags |= PFM_SETFL_PRIV_MOD_PMCS;
|
|
+
|
|
+ if (ctx->active_set->npend_ovfls)
|
|
+ ctx_arch->saved_real_iip = __get_cpu_var(real_iip);
|
|
+
|
|
+ /*
|
|
+ * disable RDPMC on this CPU
|
|
+ */
|
|
+ if (ctx_arch->flags.insecure)
|
|
+ clear_in_cr4(X86_CR4_PCE);
|
|
+
|
|
+ if (ctx->state == PFM_CTX_MASKED)
|
|
+ return 1;
|
|
+
|
|
+ return pmu_info->stop_save(ctx, ctx->active_set);
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_arch_stop - deactivate monitoring
|
|
+ * @task: task to stop
|
|
+ * @ctx: context to stop
|
|
+ *
|
|
+ * Called from pfm_stop()
|
|
+ * Interrupts are masked. Context is locked. Set is the active set.
|
|
+ *
|
|
+ * For per-thread:
|
|
+ * task is not necessarily current. If not current task, then
|
|
+ * task is guaranteed stopped and off any cpu. Access to PMU
|
|
+ * is not guaranteed.
|
|
+ *
|
|
+ * For system-wide:
|
|
+ * task is current
|
|
+ *
|
|
+ * must disable active monitoring. ctx cannot be NULL
|
|
+ */
|
|
+void pfm_arch_stop(struct task_struct *task, struct pfm_context *ctx)
|
|
+{
|
|
+ struct pfm_arch_pmu_info *pmu_info;
|
|
+
|
|
+ pmu_info = pfm_pmu_info();
|
|
+
|
|
+ /*
|
|
+ * no need to go through stop_save()
|
|
+ * if we are already stopped
|
|
+ */
|
|
+ if (!ctx->flags.started || ctx->state == PFM_CTX_MASKED)
|
|
+ return;
|
|
+
|
|
+ if (task != current)
|
|
+ return;
|
|
+
|
|
+ pmu_info->stop_save(ctx, ctx->active_set);
|
|
+}
|
|
+
|
|
+
|
|
+/**
|
|
+ * pfm_arch_start - activate monitoring
|
|
+ * @task: task to start
|
|
+ * @ctx: context to stop
|
|
+ *
|
|
+ * Interrupts are masked. Context is locked.
|
|
+ *
|
|
+ * For per-thread:
|
|
+ * Task is not necessarily current. If not current task, then task
|
|
+ * is guaranteed stopped and off any cpu. No access to PMU is task
|
|
+ * is not current.
|
|
+ *
|
|
+ * For system-wide:
|
|
+ * task is always current
|
|
+ */
|
|
+void pfm_arch_start(struct task_struct *task, struct pfm_context *ctx)
|
|
+{
|
|
+ struct pfm_event_set *set;
|
|
+
|
|
+ set = ctx->active_set;
|
|
+
|
|
+ if (task != current)
|
|
+ return;
|
|
+
|
|
+ /*
|
|
+ * cannot restore PMC if no access to PMU. Will be done
|
|
+ * when the thread is switched back in
|
|
+ */
|
|
+
|
|
+ pfm_arch_restore_pmcs(ctx, set);
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_arch_restore_pmds - reload PMD registers
|
|
+ * @ctx: context to restore from
|
|
+ * @set: current event set
|
|
+ *
|
|
+ * function called from pfm_switch_sets(), pfm_context_load_thread(),
|
|
+ * pfm_context_load_sys(), pfm_ctxsw()
|
|
+ *
|
|
+ * Context is locked. Interrupts are masked. Set cannot be NULL.
|
|
+ * Access to the PMU is guaranteed.
|
|
+ */
|
|
+void pfm_arch_restore_pmds(struct pfm_context *ctx, struct pfm_event_set *set)
|
|
+{
|
|
+ struct pfm_arch_pmu_info *pmu_info;
|
|
+ u16 i, num;
|
|
+
|
|
+ pmu_info = pfm_pmu_info();
|
|
+
|
|
+ num = set->nused_pmds;
|
|
+
|
|
+ /*
|
|
+ * model-specific override
|
|
+ */
|
|
+ if (pmu_info->restore_pmds) {
|
|
+ pmu_info->restore_pmds(ctx, set);
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * we can restore only the PMD we use because:
|
|
+ *
|
|
+ * - can only read with pfm_read_pmds() the registers
|
|
+ * declared used via pfm_write_pmds(), smpl_pmds, reset_pmds
|
|
+ *
|
|
+ * - if cr4.pce=1, only counters are exposed to user. RDPMC
|
|
+ * does not work with other types of PMU registers.Thus, no
|
|
+ * address is ever exposed by counters
|
|
+ *
|
|
+ * - there is never a dependency between one pmd register and
|
|
+ * another
|
|
+ */
|
|
+ for (i = 0; num; i++) {
|
|
+ if (likely(test_bit(i, cast_ulp(set->used_pmds)))) {
|
|
+ pfm_write_pmd(ctx, i, set->pmds[i].value);
|
|
+ num--;
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_arch_restore_pmcs - reload PMC registers
|
|
+ * @ctx: context to restore from
|
|
+ * @set: current event set
|
|
+ *
|
|
+ * function called from pfm_switch_sets(), pfm_context_load_thread(),
|
|
+ * pfm_context_load_sys(), pfm_ctxsw().
|
|
+ *
|
|
+ * Context is locked. Interrupts are masked. set cannot be NULL.
|
|
+ * Access to the PMU is guaranteed.
|
|
+ *
|
|
+ * function must restore all PMC registers from set
|
|
+ */
|
|
+void pfm_arch_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set)
|
|
+{
|
|
+ struct pfm_arch_pmu_info *pmu_info;
|
|
+ u64 *mask;
|
|
+ u16 i, num;
|
|
+
|
|
+ pmu_info = pfm_pmu_info();
|
|
+
|
|
+ /*
|
|
+ * we need to restore PMCs only when:
|
|
+ * - context is not masked
|
|
+ * - monitoring activated
|
|
+ *
|
|
+ * Masking monitoring after an overflow does not change the
|
|
+ * value of flags.started
|
|
+ */
|
|
+ if (ctx->state == PFM_CTX_MASKED || !ctx->flags.started)
|
|
+ return;
|
|
+
|
|
+ /*
|
|
+ * model-specific override
|
|
+ */
|
|
+ if (pmu_info->restore_pmcs) {
|
|
+ pmu_info->restore_pmcs(ctx, set);
|
|
+ return;
|
|
+ }
|
|
+ /*
|
|
+ * restore all pmcs
|
|
+ *
|
|
+ * It is not possible to restore only the pmcs we used because
|
|
+ * certain PMU models (e.g. Pentium 4) have dependencies. Thus
|
|
+ * we do not want one application using stale PMC coming from
|
|
+ * another one.
|
|
+ *
|
|
+ * On PMU models where there is no dependencies between pmc, then
|
|
+ * it is possible to optimize by only restoring the registers that
|
|
+ * are used, and this can be done with the models-specific override
|
|
+ * for this function.
|
|
+ *
|
|
+ * The default code takes the safest approach, i.e., assume the worse
|
|
+ */
|
|
+ mask = ctx->regs.pmcs;
|
|
+ num = ctx->regs.num_pmcs;
|
|
+ for (i = 0; num; i++) {
|
|
+ if (test_bit(i, cast_ulp(mask))) {
|
|
+ pfm_arch_write_pmc(ctx, i, set->pmcs[i]);
|
|
+ num--;
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+/**
|
|
+ * smp_pmu_interrupt - lowest level PMU interrupt handler for X86
|
|
+ * @regs: machine state
|
|
+ *
|
|
+ * The PMU interrupt is handled through an interrupt gate, therefore
|
|
+ * the CPU automatically clears the EFLAGS.IF, i.e., masking interrupts.
|
|
+ *
|
|
+ * The perfmon interrupt handler MUST run with interrupts disabled due
|
|
+ * to possible race with other, higher priority interrupts, such as timer
|
|
+ * or IPI function calls.
|
|
+ *
|
|
+ * See description in IA-32 architecture manual, Vol 3 section 5.8.1
|
|
+ */
|
|
+void smp_pmu_interrupt(struct pt_regs *regs)
|
|
+{
|
|
+ struct pfm_arch_pmu_info *pmu_info;
|
|
+ struct pfm_context *ctx;
|
|
+ unsigned long iip;
|
|
+ int using_nmi;
|
|
+
|
|
+ using_nmi = __get_cpu_var(pfm_using_nmi);
|
|
+
|
|
+ ack_APIC_irq();
|
|
+
|
|
+ irq_enter();
|
|
+
|
|
+ /*
|
|
+ * when using NMI, pfm_handle_nmi() gets called
|
|
+ * first. It stops monitoring and record the
|
|
+ * iip into real_iip, then it repost the interrupt
|
|
+ * using the lower priority vector LOCAL_PERFMON_VECTOR
|
|
+ *
|
|
+ * On some processors, e.g., P4, it may be that some
|
|
+ * state is already recorded from pfm_handle_nmi()
|
|
+ * and it only needs to be copied back into the normal
|
|
+ * fields so it can be used transparently by higher level
|
|
+ * code.
|
|
+ */
|
|
+ if (using_nmi) {
|
|
+ ctx = __get_cpu_var(pmu_ctx);
|
|
+ pmu_info = pfm_pmu_info();
|
|
+ iip = __get_cpu_var(real_iip);
|
|
+ if (ctx && pmu_info->nmi_copy_state)
|
|
+ pmu_info->nmi_copy_state(ctx);
|
|
+ } else
|
|
+ iip = instruction_pointer(regs);
|
|
+
|
|
+ pfm_interrupt_handler(iip, regs);
|
|
+
|
|
+ /*
|
|
+ * On Intel P6, Pentium M, P4, Intel Core:
|
|
+ * - it is necessary to clear the MASK field for the LVTPC
|
|
+ * vector. Otherwise interrupts remain masked. See
|
|
+ * section 8.5.1
|
|
+ * AMD X86-64:
|
|
+ * - the documentation does not stipulate the behavior.
|
|
+ * To be safe, we also rewrite the vector to clear the
|
|
+ * mask field
|
|
+ */
|
|
+ if (!using_nmi && current_cpu_data.x86_vendor == X86_VENDOR_INTEL)
|
|
+ apic_write(APIC_LVTPC, LOCAL_PERFMON_VECTOR);
|
|
+
|
|
+ irq_exit();
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_handle_nmi - PMU NMI handler notifier callback
|
|
+ * @nb ; notifier block
|
|
+ * @val: type of die notifier
|
|
+ * @data: die notifier-specific data
|
|
+ *
|
|
+ * called from notify_die() notifier from an trap handler path. We only
|
|
+ * care about NMI related callbacks, and ignore everything else.
|
|
+ *
|
|
+ * Cannot grab any locks, include the perfmon context lock
|
|
+ *
|
|
+ * Must detect if NMI interrupt comes from perfmon, and if so it must
|
|
+ * stop the PMU and repost a lower-priority interrupt. The perfmon interrupt
|
|
+ * handler needs to grab the context lock, thus is cannot be run directly
|
|
+ * from the NMI interrupt call path.
|
|
+ */
|
|
+static int __kprobes pfm_handle_nmi(struct notifier_block *nb,
|
|
+ unsigned long val,
|
|
+ void *data)
|
|
+{
|
|
+ struct die_args *args = data;
|
|
+ struct pfm_context *ctx;
|
|
+ struct pfm_arch_pmu_info *pmu_info;
|
|
+
|
|
+ /*
|
|
+ * only NMI related calls
|
|
+ */
|
|
+ if (val != DIE_NMI_IPI)
|
|
+ return NOTIFY_DONE;
|
|
+
|
|
+ /*
|
|
+ * perfmon not using NMI
|
|
+ */
|
|
+ if (!__get_cpu_var(pfm_using_nmi))
|
|
+ return NOTIFY_DONE;
|
|
+
|
|
+ /*
|
|
+ * No context
|
|
+ */
|
|
+ ctx = __get_cpu_var(pmu_ctx);
|
|
+ if (!ctx) {
|
|
+ PFM_DBG_ovfl("no ctx");
|
|
+ return NOTIFY_DONE;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * Detect if we have overflows, i.e., NMI interrupt
|
|
+ * caused by PMU
|
|
+ */
|
|
+ pmu_info = pfm_pmu_conf->pmu_info;
|
|
+ if (!pmu_info->has_ovfls(ctx)) {
|
|
+ PFM_DBG_ovfl("no ovfl");
|
|
+ return NOTIFY_DONE;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * we stop the PMU to avoid further overflow before this
|
|
+ * one is treated by lower priority interrupt handler
|
|
+ */
|
|
+ pmu_info->quiesce();
|
|
+
|
|
+ /*
|
|
+ * record actual instruction pointer
|
|
+ */
|
|
+ __get_cpu_var(real_iip) = instruction_pointer(args->regs);
|
|
+
|
|
+ /*
|
|
+ * post lower priority interrupt (LOCAL_PERFMON_VECTOR)
|
|
+ */
|
|
+ pfm_arch_resend_irq(ctx);
|
|
+
|
|
+ pfm_stats_inc(ovfl_intr_nmi_count);
|
|
+
|
|
+ /*
|
|
+ * we need to rewrite the APIC vector on Intel
|
|
+ */
|
|
+ if (current_cpu_data.x86_vendor == X86_VENDOR_INTEL)
|
|
+ apic_write(APIC_LVTPC, APIC_DM_NMI);
|
|
+
|
|
+ /*
|
|
+ * the notification was for us
|
|
+ */
|
|
+ return NOTIFY_STOP;
|
|
+}
|
|
+
|
|
+static struct notifier_block pfm_nmi_nb = {
|
|
+ .notifier_call = pfm_handle_nmi
|
|
+};
|
|
+
|
|
+/**
|
|
+ * pfm_arch_get_pmu_module_name - get PMU description module name for autoload
|
|
+ *
|
|
+ * called from pfm_pmu_request_module
|
|
+ */
|
|
+char *pfm_arch_get_pmu_module_name(void)
|
|
+{
|
|
+ switch (current_cpu_data.x86) {
|
|
+ case 6:
|
|
+ switch (current_cpu_data.x86_model) {
|
|
+ case 3: /* Pentium II */
|
|
+ case 7 ... 11:
|
|
+ case 13:
|
|
+ return "perfmon_p6";
|
|
+ case 15: /* Merom */
|
|
+ case 23: /* Penryn */
|
|
+ return "perfmon_intel_core";
|
|
+ case 28: /* Atom/Silverthorne */
|
|
+ return "perfmon_intel_atom";
|
|
+ case 29: /* Dunnington */
|
|
+ return "perfmon_intel_core";
|
|
+ default:
|
|
+ goto try_arch;
|
|
+ }
|
|
+ case 15:
|
|
+ case 16:
|
|
+ /* All Opteron processors */
|
|
+ if (current_cpu_data.x86_vendor == X86_VENDOR_AMD)
|
|
+ return "perfmon_amd64";
|
|
+
|
|
+ switch (current_cpu_data.x86_model) {
|
|
+ case 0 ... 6:
|
|
+ return "perfmon_p4";
|
|
+ }
|
|
+ /* FALL THROUGH */
|
|
+ default:
|
|
+try_arch:
|
|
+ if (boot_cpu_has(X86_FEATURE_ARCH_PERFMON))
|
|
+ return "perfmon_intel_arch";
|
|
+ return NULL;
|
|
+ }
|
|
+ return NULL;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_arch_resend_irq - post perfmon interrupt on regular vector
|
|
+ *
|
|
+ * called from pfm_ctxswin_thread() and pfm_handle_nmi()
|
|
+ */
|
|
+void pfm_arch_resend_irq(struct pfm_context *ctx)
|
|
+{
|
|
+ unsigned long val, dest;
|
|
+ /*
|
|
+ * we cannot use hw_resend_irq() because it goes to
|
|
+ * the I/O APIC. We need to go to the Local APIC.
|
|
+ *
|
|
+ * The "int vec" is not the right solution either
|
|
+ * because it triggers a software intr. We need
|
|
+ * to regenerate the interrupt and have it pended
|
|
+ * until we unmask interrupts.
|
|
+ *
|
|
+ * Instead we send ourself an IPI on the perfmon
|
|
+ * vector.
|
|
+ */
|
|
+ val = APIC_DEST_SELF|APIC_INT_ASSERT|
|
|
+ APIC_DM_FIXED|LOCAL_PERFMON_VECTOR;
|
|
+
|
|
+ dest = apic_read(APIC_ID);
|
|
+ apic_write(APIC_ICR2, dest);
|
|
+ apic_write(APIC_ICR, val);
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_arch_pmu_acquire_percpu - setup APIC per CPU
|
|
+ * @data: contains pmu flags
|
|
+ */
|
|
+static void pfm_arch_pmu_acquire_percpu(void *data)
|
|
+{
|
|
+
|
|
+ struct pfm_arch_pmu_info *pmu_info;
|
|
+ unsigned int tmp, vec;
|
|
+ unsigned long flags = (unsigned long)data;
|
|
+ unsigned long lvtpc;
|
|
+
|
|
+ pmu_info = pfm_pmu_conf->pmu_info;
|
|
+
|
|
+ /*
|
|
+ * we only reprogram the LVTPC vector if we have detected
|
|
+ * no sharing, otherwise it means the APIC is already programmed
|
|
+ * and we use whatever vector (likely NMI) is there
|
|
+ */
|
|
+ if (!(flags & PFM_X86_FL_SHARING)) {
|
|
+ if (flags & PFM_X86_FL_USE_NMI)
|
|
+ vec = APIC_DM_NMI;
|
|
+ else
|
|
+ vec = LOCAL_PERFMON_VECTOR;
|
|
+
|
|
+ tmp = apic_read(APIC_LVTERR);
|
|
+ apic_write(APIC_LVTERR, tmp | APIC_LVT_MASKED);
|
|
+ apic_write(APIC_LVTPC, vec);
|
|
+ apic_write(APIC_LVTERR, tmp);
|
|
+ }
|
|
+ lvtpc = (unsigned long)apic_read(APIC_LVTPC);
|
|
+
|
|
+ __get_cpu_var(pfm_using_nmi) = lvtpc == APIC_DM_NMI;
|
|
+
|
|
+ PFM_DBG("LTVPC=0x%lx using_nmi=%d", lvtpc, __get_cpu_var(pfm_using_nmi));
|
|
+
|
|
+ /*
|
|
+ * invoke model specific acquire routine. May be used for
|
|
+ * model-specific initializations
|
|
+ */
|
|
+ if (pmu_info->acquire_pmu_percpu)
|
|
+ pmu_info->acquire_pmu_percpu();
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_arch_pmu_acquire - acquire PMU resource from system
|
|
+ * @unavail_pmcs : bitmask to use to set unavailable pmcs
|
|
+ * @unavail_pmds : bitmask to use to set unavailable pmds
|
|
+ *
|
|
+ * interrupts are not masked
|
|
+ *
|
|
+ * Grab PMU registers from lower level MSR allocator
|
|
+ *
|
|
+ * Program the APIC according the possible interrupt vector
|
|
+ * either LOCAL_PERFMON_VECTOR or NMI
|
|
+ */
|
|
+int pfm_arch_pmu_acquire(u64 *unavail_pmcs, u64 *unavail_pmds)
|
|
+{
|
|
+ struct pfm_arch_pmu_info *pmu_info;
|
|
+ struct pfm_regmap_desc *d;
|
|
+ u16 i, nlost;
|
|
+
|
|
+ pmu_info = pfm_pmu_conf->pmu_info;
|
|
+ pmu_info->flags &= ~PFM_X86_FL_SHARING;
|
|
+
|
|
+ nlost = 0;
|
|
+
|
|
+ d = pfm_pmu_conf->pmc_desc;
|
|
+ for (i = 0; i < pfm_pmu_conf->num_pmc_entries; i++, d++) {
|
|
+ if (!(d->type & PFM_REG_I))
|
|
+ continue;
|
|
+
|
|
+ if (d->type & PFM_REG_V)
|
|
+ continue;
|
|
+ /*
|
|
+ * reserve register with lower-level allocator
|
|
+ */
|
|
+ if (!reserve_evntsel_nmi(d->hw_addr)) {
|
|
+ PFM_DBG("pmc%d(%s) already used", i, d->desc);
|
|
+ __set_bit(i, cast_ulp(unavail_pmcs));
|
|
+ nlost++;
|
|
+ continue;
|
|
+ }
|
|
+ }
|
|
+ PFM_DBG("nlost=%d info_flags=0x%x\n", nlost, pmu_info->flags);
|
|
+ /*
|
|
+ * some PMU models (e.g., P6) do not support sharing
|
|
+ * so check if we found less than the expected number of PMC registers
|
|
+ */
|
|
+ if (nlost) {
|
|
+ if (pmu_info->flags & PFM_X86_FL_NO_SHARING) {
|
|
+ PFM_INFO("PMU already used by another subsystem, "
|
|
+ "PMU does not support sharing, "
|
|
+ "try disabling Oprofile or "
|
|
+ "reboot with nmi_watchdog=0");
|
|
+ goto undo;
|
|
+ }
|
|
+ pmu_info->flags |= PFM_X86_FL_SHARING;
|
|
+ }
|
|
+
|
|
+ d = pfm_pmu_conf->pmd_desc;
|
|
+ for (i = 0; i < pfm_pmu_conf->num_pmd_entries; i++, d++) {
|
|
+ if (!(d->type & PFM_REG_I))
|
|
+ continue;
|
|
+
|
|
+ if (d->type & PFM_REG_V)
|
|
+ continue;
|
|
+
|
|
+ if (!reserve_perfctr_nmi(d->hw_addr)) {
|
|
+ PFM_DBG("pmd%d(%s) already used", i, d->desc);
|
|
+ __set_bit(i, cast_ulp(unavail_pmds));
|
|
+ }
|
|
+ }
|
|
+ /*
|
|
+ * program APIC on each CPU
|
|
+ */
|
|
+ on_each_cpu(pfm_arch_pmu_acquire_percpu,
|
|
+ (void *)(unsigned long)pmu_info->flags , 1);
|
|
+
|
|
+ return 0;
|
|
+undo:
|
|
+ /*
|
|
+ * must undo reservation of pmcs in case of error
|
|
+ */
|
|
+ d = pfm_pmu_conf->pmc_desc;
|
|
+ for (i = 0; i < pfm_pmu_conf->num_pmc_entries; i++, d++) {
|
|
+ if (!(d->type & (PFM_REG_I|PFM_REG_V)))
|
|
+ continue;
|
|
+ if (!test_bit(i, cast_ulp(unavail_pmcs)))
|
|
+ release_evntsel_nmi(d->hw_addr);
|
|
+ }
|
|
+ return -EBUSY;
|
|
+}
|
|
+/**
|
|
+ * pfm-arch_pmu_release_percpu - clear NMI state for one CPU
|
|
+ *
|
|
+ */
|
|
+static void pfm_arch_pmu_release_percpu(void *data)
|
|
+{
|
|
+ struct pfm_arch_pmu_info *pmu_info;
|
|
+
|
|
+ pmu_info = pfm_pmu_conf->pmu_info;
|
|
+
|
|
+ __get_cpu_var(pfm_using_nmi) = 0;
|
|
+
|
|
+ /*
|
|
+ * invoke model specific release routine.
|
|
+ * May be used to undo certain initializations
|
|
+ * or free some model-specific ressources.
|
|
+ */
|
|
+ if (pmu_info->release_pmu_percpu)
|
|
+ pmu_info->release_pmu_percpu();
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_arch_pmu_release - release PMU resource to system
|
|
+ *
|
|
+ * called from pfm_pmu_release()
|
|
+ * interrupts are not masked
|
|
+ *
|
|
+ * On x86, we return the PMU registers to the MSR allocator
|
|
+ */
|
|
+void pfm_arch_pmu_release(void)
|
|
+{
|
|
+ struct pfm_regmap_desc *d;
|
|
+ u16 i, n;
|
|
+
|
|
+ d = pfm_pmu_conf->pmc_desc;
|
|
+ n = pfm_pmu_conf->regs_all.num_pmcs;
|
|
+ for (i = 0; n; i++, d++) {
|
|
+ if (!test_bit(i, cast_ulp(pfm_pmu_conf->regs_all.pmcs)))
|
|
+ continue;
|
|
+ release_evntsel_nmi(d->hw_addr);
|
|
+ n--;
|
|
+ PFM_DBG("pmc%u released", i);
|
|
+ }
|
|
+ d = pfm_pmu_conf->pmd_desc;
|
|
+ n = pfm_pmu_conf->regs_all.num_pmds;
|
|
+ for (i = 0; n; i++, d++) {
|
|
+ if (!test_bit(i, cast_ulp(pfm_pmu_conf->regs_all.pmds)))
|
|
+ continue;
|
|
+ release_perfctr_nmi(d->hw_addr);
|
|
+ n--;
|
|
+ PFM_DBG("pmd%u released", i);
|
|
+ }
|
|
+
|
|
+ /* clear NMI variable if used */
|
|
+ if (__get_cpu_var(pfm_using_nmi))
|
|
+ on_each_cpu(pfm_arch_pmu_release_percpu, NULL , 1);
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_arch_pmu_config_init - validate PMU description structure
|
|
+ * @cfg: PMU description structure
|
|
+ *
|
|
+ * return:
|
|
+ * 0 if valid
|
|
+ * errno otherwise
|
|
+ *
|
|
+ * called from pfm_pmu_register()
|
|
+ */
|
|
+int pfm_arch_pmu_config_init(struct pfm_pmu_config *cfg)
|
|
+{
|
|
+ struct pfm_arch_pmu_info *pmu_info;
|
|
+
|
|
+ pmu_info = pfm_pmu_info();
|
|
+ if (!pmu_info) {
|
|
+ PFM_DBG("%s missing pmu_info", cfg->pmu_name);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+ if (!pmu_info->has_ovfls) {
|
|
+ PFM_DBG("%s missing has_ovfls callback", cfg->pmu_name);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+ if (!pmu_info->quiesce) {
|
|
+ PFM_DBG("%s missing quiesce callback", cfg->pmu_name);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+ if (!pmu_info->stop_save) {
|
|
+ PFM_DBG("%s missing stop_save callback", cfg->pmu_name);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_arch_init - one time global arch-specific initialization
|
|
+ *
|
|
+ * called from pfm_init()
|
|
+ */
|
|
+int __init pfm_arch_init(void)
|
|
+{
|
|
+ /*
|
|
+ * we need to register our NMI handler when the kernels boots
|
|
+ * to avoid a deadlock condition with the NMI watchdog or Oprofile
|
|
+ * if we were to try and register/unregister on-demand.
|
|
+ */
|
|
+ register_die_notifier(&pfm_nmi_nb);
|
|
+ return 0;
|
|
+}
|
|
diff --git a/arch/x86/perfmon/perfmon_amd64.c b/arch/x86/perfmon/perfmon_amd64.c
|
|
new file mode 100644
|
|
index 0000000..f9b5f9c
|
|
--- /dev/null
|
|
+++ b/arch/x86/perfmon/perfmon_amd64.c
|
|
@@ -0,0 +1,754 @@
|
|
+/*
|
|
+ * This file contains the PMU description for the Athlon64 and Opteron64
|
|
+ * processors. It supports 32 and 64-bit modes.
|
|
+ *
|
|
+ * Copyright (c) 2005-2007 Hewlett-Packard Development Company, L.P.
|
|
+ * Contributed by Stephane Eranian <eranian@hpl.hp.com>
|
|
+ *
|
|
+ * Copyright (c) 2007 Advanced Micro Devices, Inc.
|
|
+ * Contributed by Robert Richter <robert.richter@amd.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of version 2 of the GNU General Public
|
|
+ * License as published by the Free Software Foundation.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ * General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
+ * 02111-1307 USA
|
|
+ */
|
|
+#include <linux/module.h>
|
|
+#include <linux/vmalloc.h>
|
|
+#include <linux/topology.h>
|
|
+#include <linux/kprobes.h>
|
|
+#include <linux/pci.h>
|
|
+#include <linux/perfmon_kern.h>
|
|
+#include <asm/hw_irq.h>
|
|
+#include <asm/apic.h>
|
|
+
|
|
+MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>");
|
|
+MODULE_AUTHOR("Robert Richter <robert.richter@amd.com>");
|
|
+MODULE_DESCRIPTION("AMD64 PMU description table");
|
|
+MODULE_LICENSE("GPL");
|
|
+
|
|
+#define PCI_DEVICE_ID_AMD_10H_NB_MISC 0x1203
|
|
+
|
|
+static int force_nmi;
|
|
+MODULE_PARM_DESC(force_nmi, "bool: force use of NMI for PMU interrupt");
|
|
+module_param(force_nmi, bool, 0600);
|
|
+
|
|
+#define HAS_IBS 0x01 /* has IBS support */
|
|
+
|
|
+static u8 ibs_eilvt_off, ibs_status; /* AMD: extended interrupt LVT offset */
|
|
+
|
|
+static void pfm_amd64_restore_pmcs(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set);
|
|
+static void __kprobes pfm_amd64_quiesce(void);
|
|
+static int pfm_amd64_has_ovfls(struct pfm_context *ctx);
|
|
+static int pfm_amd64_stop_save(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set);
|
|
+
|
|
+#define IBSFETCHCTL_PMC 4 /* pmc4 */
|
|
+#define IBSFETCHCTL_PMD 4 /* pmd4 */
|
|
+#define IBSOPSCTL_PMC 5 /* pmc5 */
|
|
+#define IBSOPSCTL_PMD 7 /* pmd7 */
|
|
+
|
|
+static u64 enable_mask[PFM_MAX_PMCS];
|
|
+static u16 max_enable;
|
|
+
|
|
+static struct pfm_arch_pmu_info pfm_amd64_pmu_info = {
|
|
+ .stop_save = pfm_amd64_stop_save,
|
|
+ .has_ovfls = pfm_amd64_has_ovfls,
|
|
+ .quiesce = pfm_amd64_quiesce,
|
|
+ .restore_pmcs = pfm_amd64_restore_pmcs
|
|
+};
|
|
+
|
|
+#define PFM_AMD64_IBSFETCHVAL (1ULL<<49) /* valid fetch sample */
|
|
+#define PFM_AMD64_IBSFETCHEN (1ULL<<48) /* fetch sampling enabled */
|
|
+#define PFM_AMD64_IBSOPVAL (1ULL<<18) /* valid execution sample */
|
|
+#define PFM_AMD64_IBSOPEN (1ULL<<17) /* execution sampling enabled */
|
|
+
|
|
+/*
|
|
+ * force Local APIC interrupt on overflow
|
|
+ */
|
|
+#define PFM_K8_VAL (1ULL<<20)
|
|
+#define PFM_K8_NO64 (1ULL<<20)
|
|
+
|
|
+/*
|
|
+ * reserved bits must be 1
|
|
+ *
|
|
+ * for family 15:
|
|
+ * - upper 32 bits are reserved
|
|
+ * - bit 20, bit 21
|
|
+ *
|
|
+ * for family 16:
|
|
+ * - bits 36-39 are reserved
|
|
+ * - bits 42-63 are reserved
|
|
+ * - bit 20, bit 21
|
|
+ *
|
|
+ * for IBS registers:
|
|
+ * IBSFETCHCTL: all bits are reserved except bits 57, 48, 15:0
|
|
+ * IBSOPSCTL : all bits are reserved except bits 17, 15:0
|
|
+ */
|
|
+#define PFM_K8_RSVD ((~((1ULL<<32)-1)) | (1ULL<<20) | (1ULL<<21))
|
|
+#define PFM_16_RSVD ((0x3fffffULL<<42) | (0xfULL<<36) | (1ULL<<20) | (1ULL<<21))
|
|
+#define PFM_AMD64_IBSFETCHCTL_RSVD (~((1ULL<<48)|(1ULL<<57)|0xffffULL))
|
|
+#define PFM_AMD64_IBSOPCTL_RSVD (~((1ULL<<17)|0xffffULL))
|
|
+
|
|
+static struct pfm_regmap_desc pfm_amd64_pmc_desc[] = {
|
|
+/* pmc0 */ PMC_D(PFM_REG_I64, "PERFSEL0", PFM_K8_VAL, PFM_K8_RSVD, PFM_K8_NO64, MSR_K7_EVNTSEL0),
|
|
+/* pmc1 */ PMC_D(PFM_REG_I64, "PERFSEL1", PFM_K8_VAL, PFM_K8_RSVD, PFM_K8_NO64, MSR_K7_EVNTSEL1),
|
|
+/* pmc2 */ PMC_D(PFM_REG_I64, "PERFSEL2", PFM_K8_VAL, PFM_K8_RSVD, PFM_K8_NO64, MSR_K7_EVNTSEL2),
|
|
+/* pmc3 */ PMC_D(PFM_REG_I64, "PERFSEL3", PFM_K8_VAL, PFM_K8_RSVD, PFM_K8_NO64, MSR_K7_EVNTSEL3),
|
|
+/* pmc4 */ PMC_D(PFM_REG_I, "IBSFETCHCTL", 0, PFM_AMD64_IBSFETCHCTL_RSVD, 0, MSR_AMD64_IBSFETCHCTL),
|
|
+/* pmc5 */ PMC_D(PFM_REG_I, "IBSOPCTL", 0, PFM_AMD64_IBSOPCTL_RSVD, 0, MSR_AMD64_IBSOPCTL),
|
|
+};
|
|
+#define PFM_AMD_NUM_PMCS ARRAY_SIZE(pfm_amd64_pmc_desc)
|
|
+
|
|
+#define PFM_REG_IBS (PFM_REG_I|PFM_REG_INTR)
|
|
+
|
|
+/*
|
|
+ * AMD64 counters are 48 bits, upper bits are reserved
|
|
+ */
|
|
+#define PFM_AMD64_CTR_RSVD (~((1ULL<<48)-1))
|
|
+
|
|
+#define PFM_AMD_D(n) \
|
|
+ { .type = PFM_REG_C, \
|
|
+ .desc = "PERFCTR"#n, \
|
|
+ .hw_addr = MSR_K7_PERFCTR0+n, \
|
|
+ .rsvd_msk = PFM_AMD64_CTR_RSVD, \
|
|
+ .dep_pmcs[0] = 1ULL << n \
|
|
+ }
|
|
+
|
|
+#define PFM_AMD_IBSO(t, s, a) \
|
|
+ { .type = t, \
|
|
+ .desc = s, \
|
|
+ .hw_addr = a, \
|
|
+ .rsvd_msk = 0, \
|
|
+ .dep_pmcs[0] = 1ULL << 5 \
|
|
+ }
|
|
+
|
|
+#define PFM_AMD_IBSF(t, s, a) \
|
|
+ { .type = t, \
|
|
+ .desc = s, \
|
|
+ .hw_addr = a, \
|
|
+ .rsvd_msk = 0, \
|
|
+ .dep_pmcs[0] = 1ULL << 6 \
|
|
+ }
|
|
+
|
|
+static struct pfm_regmap_desc pfm_amd64_pmd_desc[] = {
|
|
+/* pmd0 */ PFM_AMD_D(0),
|
|
+/* pmd1 */ PFM_AMD_D(1),
|
|
+/* pmd2 */ PFM_AMD_D(2),
|
|
+/* pmd3 */ PFM_AMD_D(3),
|
|
+/* pmd4 */ PFM_AMD_IBSF(PFM_REG_IBS, "IBSFETCHCTL", MSR_AMD64_IBSFETCHCTL),
|
|
+/* pmd5 */ PFM_AMD_IBSF(PFM_REG_IRO, "IBSFETCHLINAD", MSR_AMD64_IBSFETCHLINAD),
|
|
+/* pmd6 */ PFM_AMD_IBSF(PFM_REG_IRO, "IBSFETCHPHYSAD", MSR_AMD64_IBSFETCHPHYSAD),
|
|
+/* pmd7 */ PFM_AMD_IBSO(PFM_REG_IBS, "IBSOPCTL", MSR_AMD64_IBSOPCTL),
|
|
+/* pmd8 */ PFM_AMD_IBSO(PFM_REG_IRO, "IBSOPRIP", MSR_AMD64_IBSOPRIP),
|
|
+/* pmd9 */ PFM_AMD_IBSO(PFM_REG_IRO, "IBSOPDATA", MSR_AMD64_IBSOPDATA),
|
|
+/* pmd10 */ PFM_AMD_IBSO(PFM_REG_IRO, "IBSOPDATA2", MSR_AMD64_IBSOPDATA2),
|
|
+/* pmd11 */ PFM_AMD_IBSO(PFM_REG_IRO, "IBSOPDATA3", MSR_AMD64_IBSOPDATA3),
|
|
+/* pmd12 */ PFM_AMD_IBSO(PFM_REG_IRO, "IBSDCLINAD", MSR_AMD64_IBSDCLINAD),
|
|
+/* pmd13 */ PFM_AMD_IBSO(PFM_REG_IRO, "IBSDCPHYSAD", MSR_AMD64_IBSDCPHYSAD),
|
|
+};
|
|
+#define PFM_AMD_NUM_PMDS ARRAY_SIZE(pfm_amd64_pmd_desc)
|
|
+
|
|
+static struct pfm_context **pfm_nb_sys_owners;
|
|
+static struct pfm_context *pfm_nb_task_owner;
|
|
+
|
|
+static struct pfm_pmu_config pfm_amd64_pmu_conf;
|
|
+
|
|
+#define is_ibs_pmc(x) (x == 4 || x == 5)
|
|
+
|
|
+static void pfm_amd64_setup_eilvt_per_cpu(void *info)
|
|
+{
|
|
+ u8 lvt_off;
|
|
+
|
|
+ /* program the IBS vector to the perfmon vector */
|
|
+ lvt_off = setup_APIC_eilvt_ibs(LOCAL_PERFMON_VECTOR,
|
|
+ APIC_EILVT_MSG_FIX, 0);
|
|
+ PFM_DBG("APIC_EILVT%d set to 0x%x", lvt_off, LOCAL_PERFMON_VECTOR);
|
|
+ ibs_eilvt_off = lvt_off;
|
|
+}
|
|
+
|
|
+static int pfm_amd64_setup_eilvt(void)
|
|
+{
|
|
+#define IBSCTL_LVTOFFSETVAL (1 << 8)
|
|
+#define IBSCTL 0x1cc
|
|
+ struct pci_dev *cpu_cfg;
|
|
+ int nodes;
|
|
+ u32 value = 0;
|
|
+
|
|
+ /* per CPU setup */
|
|
+ on_each_cpu(pfm_amd64_setup_eilvt_per_cpu, NULL, 1);
|
|
+
|
|
+ nodes = 0;
|
|
+ cpu_cfg = NULL;
|
|
+ do {
|
|
+ cpu_cfg = pci_get_device(PCI_VENDOR_ID_AMD,
|
|
+ PCI_DEVICE_ID_AMD_10H_NB_MISC,
|
|
+ cpu_cfg);
|
|
+ if (!cpu_cfg)
|
|
+ break;
|
|
+ ++nodes;
|
|
+ pci_write_config_dword(cpu_cfg, IBSCTL, ibs_eilvt_off
|
|
+ | IBSCTL_LVTOFFSETVAL);
|
|
+ pci_read_config_dword(cpu_cfg, IBSCTL, &value);
|
|
+ if (value != (ibs_eilvt_off | IBSCTL_LVTOFFSETVAL)) {
|
|
+ PFM_DBG("Failed to setup IBS LVT offset, "
|
|
+ "IBSCTL = 0x%08x", value);
|
|
+ return 1;
|
|
+ }
|
|
+ } while (1);
|
|
+
|
|
+ if (!nodes) {
|
|
+ PFM_DBG("No CPU node configured for IBS");
|
|
+ return 1;
|
|
+ }
|
|
+
|
|
+#ifdef CONFIG_NUMA
|
|
+ /* Sanity check */
|
|
+ /* Works only for 64bit with proper numa implementation. */
|
|
+ if (nodes != num_possible_nodes()) {
|
|
+ PFM_DBG("Failed to setup CPU node(s) for IBS, "
|
|
+ "found: %d, expected %d",
|
|
+ nodes, num_possible_nodes());
|
|
+ return 1;
|
|
+ }
|
|
+#endif
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * There can only be one user per socket for the Northbridge (NB) events,
|
|
+ * so we enforce mutual exclusion as follows:
|
|
+ * - per-thread : only one context machine-wide can use NB events
|
|
+ * - system-wide: only one context per processor socket
|
|
+ *
|
|
+ * Exclusion is enforced at:
|
|
+ * - pfm_load_context()
|
|
+ * - pfm_write_pmcs() for attached contexts
|
|
+ *
|
|
+ * Exclusion is released at:
|
|
+ * - pfm_unload_context() or any calls that implicitely uses it
|
|
+ *
|
|
+ * return:
|
|
+ * 0 : successfully acquire NB access
|
|
+ * < 0: errno, failed to acquire NB access
|
|
+ */
|
|
+static int pfm_amd64_acquire_nb(struct pfm_context *ctx)
|
|
+{
|
|
+ struct pfm_context **entry, *old;
|
|
+ int proc_id;
|
|
+
|
|
+#ifdef CONFIG_SMP
|
|
+ proc_id = cpu_data(smp_processor_id()).phys_proc_id;
|
|
+#else
|
|
+ proc_id = 0;
|
|
+#endif
|
|
+
|
|
+ if (ctx->flags.system)
|
|
+ entry = &pfm_nb_sys_owners[proc_id];
|
|
+ else
|
|
+ entry = &pfm_nb_task_owner;
|
|
+
|
|
+ old = cmpxchg(entry, NULL, ctx);
|
|
+ if (!old) {
|
|
+ if (ctx->flags.system)
|
|
+ PFM_DBG("acquired Northbridge event access on socket %u", proc_id);
|
|
+ else
|
|
+ PFM_DBG("acquired Northbridge event access globally");
|
|
+ } else if (old != ctx) {
|
|
+ if (ctx->flags.system)
|
|
+ PFM_DBG("NorthBridge event conflict on socket %u", proc_id);
|
|
+ else
|
|
+ PFM_DBG("global NorthBridge event conflict");
|
|
+ return -EBUSY;
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * invoked from pfm_write_pmcs() when pfm_nb_sys_owners is not NULL,i.e.,
|
|
+ * when we have detected a multi-core processor.
|
|
+ *
|
|
+ * context is locked, interrupts are masked
|
|
+ */
|
|
+static int pfm_amd64_pmc_write_check(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set,
|
|
+ struct pfarg_pmc *req)
|
|
+{
|
|
+ unsigned int event;
|
|
+
|
|
+ /*
|
|
+ * delay checking NB event until we load the context
|
|
+ */
|
|
+ if (ctx->state == PFM_CTX_UNLOADED)
|
|
+ return 0;
|
|
+
|
|
+ /*
|
|
+ * check event is NB event
|
|
+ */
|
|
+ event = (unsigned int)(req->reg_value & 0xff);
|
|
+ if (event < 0xee)
|
|
+ return 0;
|
|
+
|
|
+ return pfm_amd64_acquire_nb(ctx);
|
|
+}
|
|
+
|
|
+/*
|
|
+ * invoked on pfm_load_context().
|
|
+ * context is locked, interrupts are masked
|
|
+ */
|
|
+static int pfm_amd64_load_context(struct pfm_context *ctx)
|
|
+{
|
|
+ struct pfm_event_set *set;
|
|
+ unsigned int i, n;
|
|
+
|
|
+ /*
|
|
+ * scan all sets for NB events
|
|
+ */
|
|
+ list_for_each_entry(set, &ctx->set_list, list) {
|
|
+ n = set->nused_pmcs;
|
|
+ for (i = 0; n; i++) {
|
|
+ if (!test_bit(i, cast_ulp(set->used_pmcs)))
|
|
+ continue;
|
|
+
|
|
+ if (!is_ibs_pmc(i) && (set->pmcs[i] & 0xff) >= 0xee)
|
|
+ goto found;
|
|
+ n--;
|
|
+ }
|
|
+ }
|
|
+ return 0;
|
|
+found:
|
|
+ return pfm_amd64_acquire_nb(ctx);
|
|
+}
|
|
+
|
|
+/*
|
|
+ * invoked on pfm_unload_context()
|
|
+ */
|
|
+static void pfm_amd64_unload_context(struct pfm_context *ctx)
|
|
+{
|
|
+ struct pfm_context **entry, *old;
|
|
+ int proc_id;
|
|
+
|
|
+#ifdef CONFIG_SMP
|
|
+ proc_id = cpu_data(smp_processor_id()).phys_proc_id;
|
|
+#else
|
|
+ proc_id = 0;
|
|
+#endif
|
|
+
|
|
+ /*
|
|
+ * unload always happens on the monitored CPU in system-wide
|
|
+ */
|
|
+ if (ctx->flags.system)
|
|
+ entry = &pfm_nb_sys_owners[proc_id];
|
|
+ else
|
|
+ entry = &pfm_nb_task_owner;
|
|
+
|
|
+ old = cmpxchg(entry, ctx, NULL);
|
|
+ if (old == ctx) {
|
|
+ if (ctx->flags.system)
|
|
+ PFM_DBG("released NorthBridge on socket %u", proc_id);
|
|
+ else
|
|
+ PFM_DBG("released NorthBridge events globally");
|
|
+ }
|
|
+}
|
|
+
|
|
+/*
|
|
+ * detect if we need to activate NorthBridge event access control
|
|
+ */
|
|
+static int pfm_amd64_setup_nb_event_control(void)
|
|
+{
|
|
+ unsigned int c, n = 0;
|
|
+ unsigned int max_phys = 0;
|
|
+
|
|
+#ifdef CONFIG_SMP
|
|
+ for_each_possible_cpu(c) {
|
|
+ if (cpu_data(c).phys_proc_id > max_phys)
|
|
+ max_phys = cpu_data(c).phys_proc_id;
|
|
+ }
|
|
+#else
|
|
+ max_phys = 0;
|
|
+#endif
|
|
+ if (max_phys > 255) {
|
|
+ PFM_INFO("socket id %d is too big to handle", max_phys);
|
|
+ return -ENOMEM;
|
|
+ }
|
|
+
|
|
+ n = max_phys + 1;
|
|
+ if (n < 2)
|
|
+ return 0;
|
|
+
|
|
+ pfm_nb_sys_owners = vmalloc(n * sizeof(*pfm_nb_sys_owners));
|
|
+ if (!pfm_nb_sys_owners)
|
|
+ return -ENOMEM;
|
|
+
|
|
+ memset(pfm_nb_sys_owners, 0, n * sizeof(*pfm_nb_sys_owners));
|
|
+ pfm_nb_task_owner = NULL;
|
|
+
|
|
+ /*
|
|
+ * activate write-checker for PMC registers
|
|
+ */
|
|
+ for (c = 0; c < PFM_AMD_NUM_PMCS; c++) {
|
|
+ if (!is_ibs_pmc(c))
|
|
+ pfm_amd64_pmc_desc[c].type |= PFM_REG_WC;
|
|
+ }
|
|
+
|
|
+ pfm_amd64_pmu_info.load_context = pfm_amd64_load_context;
|
|
+ pfm_amd64_pmu_info.unload_context = pfm_amd64_unload_context;
|
|
+
|
|
+ pfm_amd64_pmu_conf.pmc_write_check = pfm_amd64_pmc_write_check;
|
|
+
|
|
+ PFM_INFO("NorthBridge event access control enabled");
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * disable registers which are not available on
|
|
+ * the host (applies to IBS registers)
|
|
+ */
|
|
+static void pfm_amd64_check_registers(void)
|
|
+{
|
|
+ u16 i;
|
|
+
|
|
+ PFM_DBG("has_ibs=%d", !!(ibs_status & HAS_IBS));
|
|
+
|
|
+ __set_bit(0, cast_ulp(enable_mask));
|
|
+ __set_bit(1, cast_ulp(enable_mask));
|
|
+ __set_bit(2, cast_ulp(enable_mask));
|
|
+ __set_bit(3, cast_ulp(enable_mask));
|
|
+ max_enable = 3+1;
|
|
+
|
|
+
|
|
+ /*
|
|
+ * remove IBS registers if feature not present
|
|
+ */
|
|
+ if (!(ibs_status & HAS_IBS)) {
|
|
+ pfm_amd64_pmc_desc[4].type = PFM_REG_NA;
|
|
+ pfm_amd64_pmc_desc[5].type = PFM_REG_NA;
|
|
+ for (i = 4; i < 14; i++)
|
|
+ pfm_amd64_pmd_desc[i].type = PFM_REG_NA;
|
|
+ } else {
|
|
+ __set_bit(16, cast_ulp(enable_mask));
|
|
+ __set_bit(17, cast_ulp(enable_mask));
|
|
+ max_enable = 17 + 1;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * adjust reserved bit fields for family 16
|
|
+ */
|
|
+ if (current_cpu_data.x86 == 16) {
|
|
+ for (i = 0; i < PFM_AMD_NUM_PMCS; i++)
|
|
+ if (pfm_amd64_pmc_desc[i].rsvd_msk == PFM_K8_RSVD)
|
|
+ pfm_amd64_pmc_desc[i].rsvd_msk = PFM_16_RSVD;
|
|
+ }
|
|
+}
|
|
+
|
|
+static int pfm_amd64_probe_pmu(void)
|
|
+{
|
|
+ u64 val = 0;
|
|
+ if (current_cpu_data.x86_vendor != X86_VENDOR_AMD) {
|
|
+ PFM_INFO("not an AMD processor");
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ switch (current_cpu_data.x86) {
|
|
+ case 16:
|
|
+ case 15:
|
|
+ case 6:
|
|
+ break;
|
|
+ default:
|
|
+ PFM_INFO("unsupported family=%d", current_cpu_data.x86);
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ /* check for IBS */
|
|
+ if (cpu_has(¤t_cpu_data, X86_FEATURE_IBS)) {
|
|
+ ibs_status |= HAS_IBS;
|
|
+ rdmsrl(MSR_AMD64_IBSCTL, val);
|
|
+ }
|
|
+
|
|
+ PFM_INFO("found family=%d IBSCTL=0x%llx", current_cpu_data.x86, (unsigned long long)val);
|
|
+
|
|
+ /*
|
|
+ * check for local APIC (required)
|
|
+ */
|
|
+ if (!cpu_has_apic) {
|
|
+ PFM_INFO("no local APIC, unsupported");
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ if (current_cpu_data.x86_max_cores > 1
|
|
+ && pfm_amd64_setup_nb_event_control())
|
|
+ return -1;
|
|
+
|
|
+ if (force_nmi)
|
|
+ pfm_amd64_pmu_info.flags |= PFM_X86_FL_USE_NMI;
|
|
+
|
|
+ if (ibs_status & HAS_IBS) {
|
|
+ /* Setup extended interrupt */
|
|
+ if (pfm_amd64_setup_eilvt()) {
|
|
+ PFM_INFO("Failed to initialize extended interrupts "
|
|
+ "for IBS");
|
|
+ ibs_status &= ~HAS_IBS;
|
|
+ PFM_INFO("Unable to use IBS");
|
|
+ } else {
|
|
+ PFM_INFO("IBS supported");
|
|
+ }
|
|
+ }
|
|
+
|
|
+ pfm_amd64_check_registers();
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * detect is counters have overflowed.
|
|
+ * return:
|
|
+ * 0 : no overflow
|
|
+ * 1 : at least one overflow
|
|
+ */
|
|
+static int __kprobes pfm_amd64_has_ovfls(struct pfm_context *ctx)
|
|
+{
|
|
+ struct pfm_regmap_desc *xrd;
|
|
+ u64 *cnt_mask;
|
|
+ u64 wmask, val;
|
|
+ u16 i, num;
|
|
+
|
|
+ /*
|
|
+ * Check for IBS events
|
|
+ */
|
|
+ if (ibs_status & HAS_IBS) {
|
|
+ rdmsrl(MSR_AMD64_IBSFETCHCTL, val);
|
|
+ if (val & PFM_AMD64_IBSFETCHVAL)
|
|
+ return 1;
|
|
+ rdmsrl(MSR_AMD64_IBSOPCTL, val);
|
|
+ if (val & PFM_AMD64_IBSOPVAL)
|
|
+ return 1;
|
|
+ }
|
|
+ /*
|
|
+ * Check regular counters
|
|
+ */
|
|
+ cnt_mask = ctx->regs.cnt_pmds;
|
|
+ num = ctx->regs.num_counters;
|
|
+ wmask = 1ULL << pfm_pmu_conf->counter_width;
|
|
+ xrd = pfm_amd64_pmd_desc;
|
|
+
|
|
+ for (i = 0; num; i++) {
|
|
+ if (test_bit(i, cast_ulp(cnt_mask))) {
|
|
+ rdmsrl(xrd[i].hw_addr, val);
|
|
+ if (!(val & wmask))
|
|
+ return 1;
|
|
+ num--;
|
|
+ }
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Must check for IBS event BEFORE stop_save_p6 because
|
|
+ * stopping monitoring does destroy IBS state information
|
|
+ * in IBSFETCHCTL/IBSOPCTL because they are tagged as enable
|
|
+ * registers.
|
|
+ */
|
|
+static int pfm_amd64_stop_save(struct pfm_context *ctx, struct pfm_event_set *set)
|
|
+{
|
|
+ struct pfm_arch_pmu_info *pmu_info;
|
|
+ u64 used_mask[PFM_PMC_BV];
|
|
+ u64 *cnt_pmds;
|
|
+ u64 val, wmask, ovfl_mask;
|
|
+ u32 i, count, use_ibs;
|
|
+
|
|
+ pmu_info = pfm_pmu_info();
|
|
+
|
|
+ /*
|
|
+ * IBS used if:
|
|
+ * - on family 10h processor with IBS
|
|
+ * - at least one of the IBS PMD registers is used
|
|
+ */
|
|
+ use_ibs = (ibs_status & HAS_IBS)
|
|
+ && (test_bit(IBSFETCHCTL_PMD, cast_ulp(set->used_pmds))
|
|
+ || test_bit(IBSOPSCTL_PMD, cast_ulp(set->used_pmds)));
|
|
+
|
|
+ wmask = 1ULL << pfm_pmu_conf->counter_width;
|
|
+
|
|
+ bitmap_and(cast_ulp(used_mask),
|
|
+ cast_ulp(set->used_pmcs),
|
|
+ cast_ulp(enable_mask),
|
|
+ max_enable);
|
|
+
|
|
+ count = bitmap_weight(cast_ulp(used_mask), max_enable);
|
|
+
|
|
+ /*
|
|
+ * stop monitoring
|
|
+ * Unfortunately, this is very expensive!
|
|
+ * wrmsrl() is serializing.
|
|
+ *
|
|
+ * With IBS, we need to do read-modify-write to preserve the content
|
|
+ * for OpsCTL and FetchCTL because they are also used as PMDs and saved
|
|
+ * below
|
|
+ */
|
|
+ if (use_ibs) {
|
|
+ for (i = 0; count; i++) {
|
|
+ if (test_bit(i, cast_ulp(used_mask))) {
|
|
+ if (i == IBSFETCHCTL_PMC) {
|
|
+ rdmsrl(pfm_pmu_conf->pmc_desc[i].hw_addr, val);
|
|
+ val &= ~PFM_AMD64_IBSFETCHEN;
|
|
+ } else if (i == IBSOPSCTL_PMC) {
|
|
+ rdmsrl(pfm_pmu_conf->pmc_desc[i].hw_addr, val);
|
|
+ val &= ~PFM_AMD64_IBSOPEN;
|
|
+ } else
|
|
+ val = 0;
|
|
+ wrmsrl(pfm_pmu_conf->pmc_desc[i].hw_addr, val);
|
|
+ count--;
|
|
+ }
|
|
+ }
|
|
+ } else {
|
|
+ for (i = 0; count; i++) {
|
|
+ if (test_bit(i, cast_ulp(used_mask))) {
|
|
+ wrmsrl(pfm_pmu_conf->pmc_desc[i].hw_addr, 0);
|
|
+ count--;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * if we already having a pending overflow condition, we simply
|
|
+ * return to take care of this first.
|
|
+ */
|
|
+ if (set->npend_ovfls)
|
|
+ return 1;
|
|
+
|
|
+ ovfl_mask = pfm_pmu_conf->ovfl_mask;
|
|
+ cnt_pmds = ctx->regs.cnt_pmds;
|
|
+
|
|
+ /*
|
|
+ * check for pending overflows and save PMDs (combo)
|
|
+ * we employ used_pmds because we also need to save
|
|
+ * and not just check for pending interrupts.
|
|
+ *
|
|
+ * Must check for counting PMDs because of virtual PMDs and IBS
|
|
+ */
|
|
+ count = set->nused_pmds;
|
|
+ for (i = 0; count; i++) {
|
|
+ if (test_bit(i, cast_ulp(set->used_pmds))) {
|
|
+ val = pfm_arch_read_pmd(ctx, i);
|
|
+ if (likely(test_bit(i, cast_ulp(cnt_pmds)))) {
|
|
+ if (!(val & wmask)) {
|
|
+ __set_bit(i, cast_ulp(set->povfl_pmds));
|
|
+ set->npend_ovfls++;
|
|
+ }
|
|
+ val = (set->pmds[i].value & ~ovfl_mask) | (val & ovfl_mask);
|
|
+ }
|
|
+ set->pmds[i].value = val;
|
|
+ count--;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * check if IBS contains valid data, and mark the corresponding
|
|
+ * PMD has overflowed
|
|
+ */
|
|
+ if (use_ibs) {
|
|
+ if (set->pmds[IBSFETCHCTL_PMD].value & PFM_AMD64_IBSFETCHVAL) {
|
|
+ __set_bit(IBSFETCHCTL_PMD, cast_ulp(set->povfl_pmds));
|
|
+ set->npend_ovfls++;
|
|
+ }
|
|
+ if (set->pmds[IBSOPSCTL_PMD].value & PFM_AMD64_IBSOPVAL) {
|
|
+ __set_bit(IBSOPSCTL_PMD, cast_ulp(set->povfl_pmds));
|
|
+ set->npend_ovfls++;
|
|
+ }
|
|
+ }
|
|
+ /* 0 means: no need to save PMDs at upper level */
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_amd64_quiesce_pmu -- stop monitoring without grabbing any lock
|
|
+ *
|
|
+ * called from NMI interrupt handler to immediately stop monitoring
|
|
+ * cannot grab any lock, including perfmon related locks
|
|
+ */
|
|
+static void __kprobes pfm_amd64_quiesce(void)
|
|
+{
|
|
+ /*
|
|
+ * quiesce PMU by clearing available registers that have
|
|
+ * the start/stop capability
|
|
+ */
|
|
+ if (test_bit(0, cast_ulp(pfm_pmu_conf->regs_all.pmcs)))
|
|
+ wrmsrl(MSR_K7_EVNTSEL0, 0);
|
|
+ if (test_bit(1, cast_ulp(pfm_pmu_conf->regs_all.pmcs)))
|
|
+ wrmsrl(MSR_K7_EVNTSEL0+1, 0);
|
|
+ if (test_bit(2, cast_ulp(pfm_pmu_conf->regs_all.pmcs)))
|
|
+ wrmsrl(MSR_K7_EVNTSEL0+2, 0);
|
|
+ if (test_bit(3, cast_ulp(pfm_pmu_conf->regs_all.pmcs)))
|
|
+ wrmsrl(MSR_K7_EVNTSEL0+3, 0);
|
|
+
|
|
+ if (test_bit(4, cast_ulp(pfm_pmu_conf->regs_all.pmcs)))
|
|
+ wrmsrl(MSR_AMD64_IBSFETCHCTL, 0);
|
|
+ if (test_bit(5, cast_ulp(pfm_pmu_conf->regs_all.pmcs)))
|
|
+ wrmsrl(MSR_AMD64_IBSOPCTL, 0);
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_amd64_restore_pmcs - reload PMC registers
|
|
+ * @ctx: context to restore from
|
|
+ * @set: current event set
|
|
+ *
|
|
+ * optimized version of pfm_arch_restore_pmcs(). On AMD64, we can
|
|
+ * afford to only restore the pmcs registers we use, because they are
|
|
+ * all independent from each other.
|
|
+ */
|
|
+static void pfm_amd64_restore_pmcs(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set)
|
|
+{
|
|
+ u64 *mask;
|
|
+ u16 i, num;
|
|
+
|
|
+ mask = set->used_pmcs;
|
|
+ num = set->nused_pmcs;
|
|
+ for (i = 0; num; i++) {
|
|
+ if (test_bit(i, cast_ulp(mask))) {
|
|
+ wrmsrl(pfm_amd64_pmc_desc[i].hw_addr, set->pmcs[i]);
|
|
+ num--;
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+static struct pfm_pmu_config pfm_amd64_pmu_conf = {
|
|
+ .pmu_name = "AMD64",
|
|
+ .counter_width = 47,
|
|
+ .pmd_desc = pfm_amd64_pmd_desc,
|
|
+ .pmc_desc = pfm_amd64_pmc_desc,
|
|
+ .num_pmc_entries = PFM_AMD_NUM_PMCS,
|
|
+ .num_pmd_entries = PFM_AMD_NUM_PMDS,
|
|
+ .probe_pmu = pfm_amd64_probe_pmu,
|
|
+ .version = "1.2",
|
|
+ .pmu_info = &pfm_amd64_pmu_info,
|
|
+ .flags = PFM_PMU_BUILTIN_FLAG,
|
|
+ .owner = THIS_MODULE,
|
|
+};
|
|
+
|
|
+static int __init pfm_amd64_pmu_init_module(void)
|
|
+{
|
|
+ return pfm_pmu_register(&pfm_amd64_pmu_conf);
|
|
+}
|
|
+
|
|
+static void __exit pfm_amd64_pmu_cleanup_module(void)
|
|
+{
|
|
+ if (pfm_nb_sys_owners)
|
|
+ vfree(pfm_nb_sys_owners);
|
|
+
|
|
+ pfm_pmu_unregister(&pfm_amd64_pmu_conf);
|
|
+}
|
|
+
|
|
+module_init(pfm_amd64_pmu_init_module);
|
|
+module_exit(pfm_amd64_pmu_cleanup_module);
|
|
diff --git a/arch/x86/perfmon/perfmon_intel_arch.c b/arch/x86/perfmon/perfmon_intel_arch.c
|
|
new file mode 100644
|
|
index 0000000..e27a732
|
|
--- /dev/null
|
|
+++ b/arch/x86/perfmon/perfmon_intel_arch.c
|
|
@@ -0,0 +1,610 @@
|
|
+/*
|
|
+ * This file contains the Intel architectural perfmon v1, v2, v3
|
|
+ * description tables.
|
|
+ *
|
|
+ * Architectural perfmon was introduced with Intel Core Solo/Duo
|
|
+ * processors.
|
|
+ *
|
|
+ * Copyright (c) 2006-2007 Hewlett-Packard Development Company, L.P.
|
|
+ * Contributed by Stephane Eranian <eranian@hpl.hp.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of version 2 of the GNU General Public
|
|
+ * License as published by the Free Software Foundation.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ * General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
+ * 02111-1307 USA
|
|
+ */
|
|
+#include <linux/module.h>
|
|
+#include <linux/kprobes.h>
|
|
+#include <linux/perfmon_kern.h>
|
|
+#include <linux/nmi.h>
|
|
+#include <asm/msr.h>
|
|
+#include <asm/apic.h>
|
|
+
|
|
+MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>");
|
|
+MODULE_DESCRIPTION("Intel architectural perfmon v1");
|
|
+MODULE_LICENSE("GPL");
|
|
+
|
|
+static int force, force_nmi;
|
|
+MODULE_PARM_DESC(force, "bool: force module to load succesfully");
|
|
+MODULE_PARM_DESC(force_nmi, "bool: force use of NMI for PMU interrupt");
|
|
+module_param(force, bool, 0600);
|
|
+module_param(force_nmi, bool, 0600);
|
|
+
|
|
+static u64 enable_mask[PFM_MAX_PMCS];
|
|
+static u16 max_enable;
|
|
+
|
|
+/*
|
|
+ * - upper 32 bits are reserved
|
|
+ * - INT: APIC enable bit is reserved (forced to 1)
|
|
+ * - bit 21 is reserved
|
|
+ *
|
|
+ * RSVD: reserved bits are 1
|
|
+ */
|
|
+#define PFM_IA_PMC_RSVD ((~((1ULL<<32)-1)) \
|
|
+ | (1ULL<<20) \
|
|
+ | (1ULL<<21))
|
|
+
|
|
+/*
|
|
+ * force Local APIC interrupt on overflow
|
|
+ * disable with NO_EMUL64
|
|
+ */
|
|
+#define PFM_IA_PMC_VAL (1ULL<<20)
|
|
+#define PFM_IA_NO64 (1ULL<<20)
|
|
+
|
|
+/*
|
|
+ * architectuture specifies that:
|
|
+ * IA32_PMCx MSR : starts at 0x0c1 & occupy a contiguous block of MSR
|
|
+ * IA32_PERFEVTSELx MSR : starts at 0x186 & occupy a contiguous block of MSR
|
|
+ * MSR_GEN_FIXED_CTR0 : starts at 0x309 & occupy a contiguous block of MSR
|
|
+ */
|
|
+#define MSR_GEN_SEL_BASE MSR_P6_EVNTSEL0
|
|
+#define MSR_GEN_PMC_BASE MSR_P6_PERFCTR0
|
|
+#define MSR_GEN_FIXED_PMC_BASE MSR_CORE_PERF_FIXED_CTR0
|
|
+
|
|
+/*
|
|
+ * layout of EAX for CPUID.0xa leaf function
|
|
+ */
|
|
+struct pmu_eax {
|
|
+ unsigned int version:8; /* architectural perfmon version */
|
|
+ unsigned int num_cnt:8; /* number of generic counters */
|
|
+ unsigned int cnt_width:8; /* width of generic counters */
|
|
+ unsigned int ebx_length:8; /* number of architected events */
|
|
+};
|
|
+
|
|
+/*
|
|
+ * layout of EDX for CPUID.0xa leaf function when perfmon v2 is detected
|
|
+ */
|
|
+struct pmu_edx {
|
|
+ unsigned int num_cnt:5; /* number of fixed counters */
|
|
+ unsigned int cnt_width:8; /* width of fixed counters */
|
|
+ unsigned int reserved:19;
|
|
+};
|
|
+
|
|
+static void pfm_intel_arch_restore_pmcs(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set);
|
|
+static int pfm_intel_arch_stop_save(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set);
|
|
+static int pfm_intel_arch_has_ovfls(struct pfm_context *ctx);
|
|
+static void __kprobes pfm_intel_arch_quiesce(void);
|
|
+
|
|
+/*
|
|
+ * physical addresses of MSR controlling the perfevtsel and counter registers
|
|
+ */
|
|
+struct pfm_arch_pmu_info pfm_intel_arch_pmu_info = {
|
|
+ .stop_save = pfm_intel_arch_stop_save,
|
|
+ .has_ovfls = pfm_intel_arch_has_ovfls,
|
|
+ .quiesce = pfm_intel_arch_quiesce,
|
|
+ .restore_pmcs = pfm_intel_arch_restore_pmcs
|
|
+};
|
|
+
|
|
+#define PFM_IA_C(n) { \
|
|
+ .type = PFM_REG_I64, \
|
|
+ .desc = "PERFEVTSEL"#n, \
|
|
+ .dfl_val = PFM_IA_PMC_VAL, \
|
|
+ .rsvd_msk = PFM_IA_PMC_RSVD, \
|
|
+ .no_emul64_msk = PFM_IA_NO64, \
|
|
+ .hw_addr = MSR_GEN_SEL_BASE+(n) \
|
|
+ }
|
|
+
|
|
+#define PFM_IA_D(n) \
|
|
+ { .type = PFM_REG_C, \
|
|
+ .desc = "PMC"#n, \
|
|
+ .hw_addr = MSR_P6_PERFCTR0+n, \
|
|
+ .dep_pmcs[0] = 1ULL << n \
|
|
+ }
|
|
+
|
|
+#define PFM_IA_FD(n) \
|
|
+ { .type = PFM_REG_C, \
|
|
+ .desc = "FIXED_CTR"#n, \
|
|
+ .hw_addr = MSR_CORE_PERF_FIXED_CTR0+n,\
|
|
+ .dep_pmcs[0] = 1ULL << 16 \
|
|
+ }
|
|
+
|
|
+static struct pfm_regmap_desc pfm_intel_arch_pmc_desc[] = {
|
|
+/* pmc0 */ PFM_IA_C(0), PFM_IA_C(1), PFM_IA_C(2), PFM_IA_C(3),
|
|
+/* pmc4 */ PFM_IA_C(4), PFM_IA_C(5), PFM_IA_C(6), PFM_IA_C(7),
|
|
+/* pmc8 */ PFM_IA_C(8), PFM_IA_C(9), PFM_IA_C(10), PFM_IA_C(11),
|
|
+/* pmc12 */ PFM_IA_C(12), PFM_IA_C(13), PFM_IA_C(14), PFM_IA_C(15),
|
|
+
|
|
+/* pmc16 */ { .type = PFM_REG_I,
|
|
+ .desc = "FIXED_CTRL",
|
|
+ .dfl_val = 0x8888888888888888ULL, /* force PMI */
|
|
+ .rsvd_msk = 0, /* set dynamically */
|
|
+ .no_emul64_msk = 0,
|
|
+ .hw_addr = MSR_CORE_PERF_FIXED_CTR_CTRL
|
|
+ },
|
|
+};
|
|
+#define PFM_IA_MAX_PMCS ARRAY_SIZE(pfm_intel_arch_pmc_desc)
|
|
+
|
|
+static struct pfm_regmap_desc pfm_intel_arch_pmd_desc[] = {
|
|
+/* pmd0 */ PFM_IA_D(0), PFM_IA_D(1), PFM_IA_D(2), PFM_IA_D(3),
|
|
+/* pmd4 */ PFM_IA_D(4), PFM_IA_D(5), PFM_IA_D(6), PFM_IA_D(7),
|
|
+/* pmd8 */ PFM_IA_D(8), PFM_IA_D(9), PFM_IA_D(10), PFM_IA_D(11),
|
|
+/* pmd12 */ PFM_IA_D(12), PFM_IA_D(13), PFM_IA_D(14), PFM_IA_D(15),
|
|
+
|
|
+/* pmd16 */ PFM_IA_FD(0), PFM_IA_FD(1), PFM_IA_FD(2), PFM_IA_FD(3),
|
|
+/* pmd20 */ PFM_IA_FD(4), PFM_IA_FD(5), PFM_IA_FD(6), PFM_IA_FD(7),
|
|
+/* pmd24 */ PFM_IA_FD(8), PFM_IA_FD(9), PFM_IA_FD(10), PFM_IA_FD(11),
|
|
+/* pmd28 */ PFM_IA_FD(16), PFM_IA_FD(17), PFM_IA_FD(18), PFM_IA_FD(19)
|
|
+};
|
|
+#define PFM_IA_MAX_PMDS ARRAY_SIZE(pfm_intel_arch_pmd_desc)
|
|
+
|
|
+#define PFM_IA_MAX_CNT 16 /* # generic counters in mapping table */
|
|
+#define PFM_IA_MAX_FCNT 16 /* # of fixed counters in mapping table */
|
|
+#define PFM_IA_FCNT_BASE 16 /* base index of fixed counters PMD */
|
|
+
|
|
+static struct pfm_pmu_config pfm_intel_arch_pmu_conf;
|
|
+
|
|
+static void pfm_intel_arch_check_errata(void)
|
|
+{
|
|
+ /*
|
|
+ * Core Duo errata AE49 (no fix). Both counters share a single
|
|
+ * enable bit in PERFEVTSEL0
|
|
+ */
|
|
+ if (current_cpu_data.x86 == 6 && current_cpu_data.x86_model == 14)
|
|
+ pfm_intel_arch_pmu_info.flags |= PFM_X86_FL_NO_SHARING;
|
|
+}
|
|
+
|
|
+static inline void set_enable_mask(unsigned int i)
|
|
+{
|
|
+ __set_bit(i, cast_ulp(enable_mask));
|
|
+
|
|
+ /* max_enable = highest + 1 */
|
|
+ if ((i+1) > max_enable)
|
|
+ max_enable = i+ 1;
|
|
+}
|
|
+
|
|
+static void pfm_intel_arch_setup_generic(unsigned int version,
|
|
+ unsigned int width,
|
|
+ unsigned int count)
|
|
+{
|
|
+ u64 rsvd;
|
|
+ unsigned int i;
|
|
+
|
|
+ /*
|
|
+ * first we handle the generic counters:
|
|
+ *
|
|
+ * - ensure HW does not have more registers than hardcoded in the tables
|
|
+ * - adjust rsvd_msk to actual counter width
|
|
+ * - initialize enable_mask (list of PMC with start/stop capability)
|
|
+ * - mark unused hardcoded generic counters as unimplemented
|
|
+ */
|
|
+
|
|
+ /*
|
|
+ * min of number of Hw counters and hardcoded in the tables
|
|
+ */
|
|
+ if (count >= PFM_IA_MAX_CNT) {
|
|
+ printk(KERN_INFO "perfmon: Limiting number of generic counters"
|
|
+ " to %u, HW supports %u",
|
|
+ PFM_IA_MAX_CNT, count);
|
|
+ count = PFM_IA_MAX_CNT;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * adjust rsvd_msk for generic counters based on actual width
|
|
+ * initialize enable_mask (1 per pmd)
|
|
+ */
|
|
+ rsvd = ~((1ULL << width)-1);
|
|
+ for (i = 0; i < count; i++) {
|
|
+ pfm_intel_arch_pmd_desc[i].rsvd_msk = rsvd;
|
|
+ set_enable_mask(i);
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * handle version 3 new anythread bit (21)
|
|
+ */
|
|
+ if (version == 3) {
|
|
+ for (i = 0; i < count; i++)
|
|
+ pfm_intel_arch_pmc_desc[i].rsvd_msk &= ~(1ULL << 21);
|
|
+ }
|
|
+
|
|
+
|
|
+ /*
|
|
+ * mark unused generic counters as not available
|
|
+ */
|
|
+ for (i = count ; i < PFM_IA_MAX_CNT; i++) {
|
|
+ pfm_intel_arch_pmd_desc[i].type = PFM_REG_NA;
|
|
+ pfm_intel_arch_pmc_desc[i].type = PFM_REG_NA;
|
|
+ }
|
|
+}
|
|
+
|
|
+static void pfm_intel_arch_setup_fixed(unsigned int version,
|
|
+ unsigned int width,
|
|
+ unsigned int count)
|
|
+{
|
|
+ u64 rsvd, dfl;
|
|
+ unsigned int i;
|
|
+
|
|
+ /*
|
|
+ * handle the fixed counters (if any):
|
|
+ *
|
|
+ * - ensure HW does not have more registers than hardcoded in the tables
|
|
+ * - adjust rsvd_msk to actual counter width
|
|
+ * - initialize enable_mask (list of PMC with start/stop capability)
|
|
+ * - mark unused hardcoded generic counters as unimplemented
|
|
+ */
|
|
+ if (count >= PFM_IA_MAX_FCNT) {
|
|
+ printk(KERN_INFO "perfmon: Limiting number of fixed counters"
|
|
+ " to %u, HW supports %u",
|
|
+ PFM_IA_MAX_FCNT, count);
|
|
+ count = PFM_IA_MAX_FCNT;
|
|
+ }
|
|
+ /*
|
|
+ * adjust rsvd_msk for fixed counters based on actual width
|
|
+ */
|
|
+ rsvd = ~((1ULL << width)-1);
|
|
+ for (i = 0; i < count; i++)
|
|
+ pfm_intel_arch_pmd_desc[PFM_IA_FCNT_BASE+i].rsvd_msk = rsvd;
|
|
+
|
|
+ /*
|
|
+ * handle version new anythread bit (bit 2)
|
|
+ */
|
|
+ if (version == 3)
|
|
+ rsvd = 1ULL << 3;
|
|
+ else
|
|
+ rsvd = 3ULL << 2;
|
|
+
|
|
+ pfm_intel_arch_pmc_desc[16].rsvd_msk = 0;
|
|
+ for (i = 0; i < count; i++)
|
|
+ pfm_intel_arch_pmc_desc[16].rsvd_msk |= rsvd << (i<<2);
|
|
+
|
|
+ /*
|
|
+ * mark unused fixed counters as unimplemented
|
|
+ *
|
|
+ * update the rsvd_msk, dfl_val in FIXED_CTRL:
|
|
+ * - rsvd_msk: set all 4 bits
|
|
+ * - dfl_val : clear all 4 bits
|
|
+ */
|
|
+ dfl = pfm_intel_arch_pmc_desc[16].dfl_val;
|
|
+ rsvd = pfm_intel_arch_pmc_desc[16].rsvd_msk;
|
|
+
|
|
+ for (i = count ; i < PFM_IA_MAX_FCNT; i++) {
|
|
+ pfm_intel_arch_pmd_desc[PFM_IA_FCNT_BASE+i].type = PFM_REG_NA;
|
|
+ rsvd |= 0xfULL << (i<<2);
|
|
+ dfl &= ~(0xfULL << (i<<2));
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * FIXED_CTR_CTRL unavailable when no fixed counters are defined
|
|
+ */
|
|
+ if (!count) {
|
|
+ pfm_intel_arch_pmc_desc[16].type = PFM_REG_NA;
|
|
+ } else {
|
|
+ /* update rsvd_mask and dfl_val */
|
|
+ pfm_intel_arch_pmc_desc[16].rsvd_msk = rsvd;
|
|
+ pfm_intel_arch_pmc_desc[16].dfl_val = dfl;
|
|
+ set_enable_mask(16);
|
|
+ }
|
|
+}
|
|
+
|
|
+static int pfm_intel_arch_probe_pmu(void)
|
|
+{
|
|
+ union {
|
|
+ unsigned int val;
|
|
+ struct pmu_eax eax;
|
|
+ struct pmu_edx edx;
|
|
+ } eax, edx;
|
|
+ unsigned int ebx, ecx;
|
|
+ unsigned int width = 0;
|
|
+
|
|
+ edx.val = 0;
|
|
+
|
|
+ if (!(cpu_has_arch_perfmon || force)) {
|
|
+ PFM_INFO("no support for Intel architectural PMU");
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ if (!cpu_has_apic) {
|
|
+ PFM_INFO("no Local APIC, try rebooting with lapic option");
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ /* cpuid() call protected by cpu_has_arch_perfmon */
|
|
+ cpuid(0xa, &eax.val, &ebx, &ecx, &edx.val);
|
|
+
|
|
+ /*
|
|
+ * reject processors supported by perfmon_intel_core
|
|
+ *
|
|
+ * We need to do this explicitely to avoid depending
|
|
+ * on the link order in case, the modules are compiled as
|
|
+ * builtin.
|
|
+ *
|
|
+ * non Intel processors are rejected by cpu_has_arch_perfmon
|
|
+ */
|
|
+ if (current_cpu_data.x86 == 6 && !force) {
|
|
+ switch (current_cpu_data.x86_model) {
|
|
+ case 15: /* Merom: use perfmon_intel_core */
|
|
+ case 23: /* Penryn: use perfmon_intel_core */
|
|
+ return -1;
|
|
+ default:
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * some 6/15 models have buggy BIOS
|
|
+ */
|
|
+ if (eax.eax.version == 0
|
|
+ && current_cpu_data.x86 == 6 && current_cpu_data.x86_model == 15) {
|
|
+ PFM_INFO("buggy v2 BIOS, adjusting for 2 generic counters");
|
|
+ eax.eax.version = 2;
|
|
+ eax.eax.num_cnt = 2;
|
|
+ eax.eax.cnt_width = 40;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * Intel Atom processors have a buggy firmware which does not report
|
|
+ * the correct number of fixed counters
|
|
+ */
|
|
+ if (eax.eax.version == 3 && edx.edx.num_cnt < 3
|
|
+ && current_cpu_data.x86 == 6 && current_cpu_data.x86_model == 28) {
|
|
+ PFM_INFO("buggy v3 BIOS, adjusting for 3 fixed counters");
|
|
+ edx.edx.num_cnt = 3;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * some v2 BIOSes are incomplete
|
|
+ */
|
|
+ if (eax.eax.version == 2 && !edx.edx.num_cnt) {
|
|
+ PFM_INFO("buggy v2 BIOS, adjusting for 3 fixed counters");
|
|
+ edx.edx.num_cnt = 3;
|
|
+ edx.edx.cnt_width = 40;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * no fixed counters on earlier versions
|
|
+ */
|
|
+ if (eax.eax.version < 2) {
|
|
+ edx.val = 0;
|
|
+ } else {
|
|
+ /*
|
|
+ * use the min value of both widths until we support
|
|
+ * variable width counters
|
|
+ */
|
|
+ width = eax.eax.cnt_width < edx.edx.cnt_width ?
|
|
+ eax.eax.cnt_width : edx.edx.cnt_width;
|
|
+ }
|
|
+
|
|
+ PFM_INFO("detected architecural perfmon v%d", eax.eax.version);
|
|
+ PFM_INFO("num_gen=%d width=%d num_fixed=%d width=%d",
|
|
+ eax.eax.num_cnt,
|
|
+ eax.eax.cnt_width,
|
|
+ edx.edx.num_cnt,
|
|
+ edx.edx.cnt_width);
|
|
+
|
|
+
|
|
+ pfm_intel_arch_setup_generic(eax.eax.version,
|
|
+ width,
|
|
+ eax.eax.num_cnt);
|
|
+
|
|
+ pfm_intel_arch_setup_fixed(eax.eax.version,
|
|
+ width,
|
|
+ edx.edx.num_cnt);
|
|
+
|
|
+ if (force_nmi)
|
|
+ pfm_intel_arch_pmu_info.flags |= PFM_X86_FL_USE_NMI;
|
|
+
|
|
+ pfm_intel_arch_check_errata();
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_intel_arch_has_ovfls - check for pending overflow condition
|
|
+ * @ctx: context to work on
|
|
+ *
|
|
+ * detect if counters have overflowed.
|
|
+ * return:
|
|
+ * 0 : no overflow
|
|
+ * 1 : at least one overflow
|
|
+ */
|
|
+static int __kprobes pfm_intel_arch_has_ovfls(struct pfm_context *ctx)
|
|
+{
|
|
+ u64 *cnt_mask;
|
|
+ u64 wmask, val;
|
|
+ u16 i, num;
|
|
+
|
|
+ cnt_mask = ctx->regs.cnt_pmds;
|
|
+ num = ctx->regs.num_counters;
|
|
+ wmask = 1ULL << pfm_pmu_conf->counter_width;
|
|
+
|
|
+ /*
|
|
+ * we can leverage the fact that we know the mapping
|
|
+ * to hardcode the MSR address and avoid accessing
|
|
+ * more cachelines
|
|
+ *
|
|
+ * We need to check cnt_mask because not all registers
|
|
+ * may be available.
|
|
+ */
|
|
+ for (i = 0; num; i++) {
|
|
+ if (test_bit(i, cast_ulp(cnt_mask))) {
|
|
+ rdmsrl(pfm_intel_arch_pmd_desc[i].hw_addr, val);
|
|
+ if (!(val & wmask))
|
|
+ return 1;
|
|
+ num--;
|
|
+ }
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int pfm_intel_arch_stop_save(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set)
|
|
+{
|
|
+ u64 used_mask[PFM_PMC_BV];
|
|
+ u64 *cnt_pmds;
|
|
+ u64 val, wmask, ovfl_mask;
|
|
+ u32 i, count;
|
|
+
|
|
+ wmask = 1ULL << pfm_pmu_conf->counter_width;
|
|
+
|
|
+ bitmap_and(cast_ulp(used_mask),
|
|
+ cast_ulp(set->used_pmcs),
|
|
+ cast_ulp(enable_mask),
|
|
+ max_enable);
|
|
+
|
|
+ count = bitmap_weight(cast_ulp(used_mask), max_enable);
|
|
+
|
|
+ /*
|
|
+ * stop monitoring
|
|
+ * Unfortunately, this is very expensive!
|
|
+ * wrmsrl() is serializing.
|
|
+ */
|
|
+ for (i = 0; count; i++) {
|
|
+ if (test_bit(i, cast_ulp(used_mask))) {
|
|
+ wrmsrl(pfm_pmu_conf->pmc_desc[i].hw_addr, 0);
|
|
+ count--;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * if we already having a pending overflow condition, we simply
|
|
+ * return to take care of this first.
|
|
+ */
|
|
+ if (set->npend_ovfls)
|
|
+ return 1;
|
|
+
|
|
+ ovfl_mask = pfm_pmu_conf->ovfl_mask;
|
|
+ cnt_pmds = ctx->regs.cnt_pmds;
|
|
+
|
|
+ /*
|
|
+ * check for pending overflows and save PMDs (combo)
|
|
+ * we employ used_pmds because we also need to save
|
|
+ * and not just check for pending interrupts.
|
|
+ *
|
|
+ * Must check for counting PMDs because of virtual PMDs
|
|
+ */
|
|
+ count = set->nused_pmds;
|
|
+ for (i = 0; count; i++) {
|
|
+ if (test_bit(i, cast_ulp(set->used_pmds))) {
|
|
+ val = pfm_arch_read_pmd(ctx, i);
|
|
+ if (likely(test_bit(i, cast_ulp(cnt_pmds)))) {
|
|
+ if (!(val & wmask)) {
|
|
+ __set_bit(i, cast_ulp(set->povfl_pmds));
|
|
+ set->npend_ovfls++;
|
|
+ }
|
|
+ val = (set->pmds[i].value & ~ovfl_mask)
|
|
+ | (val & ovfl_mask);
|
|
+ }
|
|
+ set->pmds[i].value = val;
|
|
+ count--;
|
|
+ }
|
|
+ }
|
|
+ /* 0 means: no need to save PMDs at upper level */
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_intel_arch_quiesce - stop monitoring without grabbing any lock
|
|
+ *
|
|
+ * called from NMI interrupt handler to immediately stop monitoring
|
|
+ * cannot grab any lock, including perfmon related locks
|
|
+ */
|
|
+static void __kprobes pfm_intel_arch_quiesce(void)
|
|
+{
|
|
+ u16 i;
|
|
+
|
|
+ /*
|
|
+ * PMC16 is the fixed control control register so it has a
|
|
+ * distinct MSR address
|
|
+ *
|
|
+ * We do not use the hw_addr field in the table to avoid touching
|
|
+ * too many cachelines
|
|
+ */
|
|
+ for (i = 0; i < pfm_pmu_conf->regs_all.max_pmc; i++) {
|
|
+ if (test_bit(i, cast_ulp(pfm_pmu_conf->regs_all.pmcs))) {
|
|
+ if (i == 16)
|
|
+ wrmsrl(MSR_CORE_PERF_FIXED_CTR_CTRL, 0);
|
|
+ else
|
|
+ wrmsrl(MSR_P6_EVNTSEL0+i, 0);
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_intel_arch_restore_pmcs - reload PMC registers
|
|
+ * @ctx: context to restore from
|
|
+ * @set: current event set
|
|
+ *
|
|
+ * optimized version of pfm_arch_restore_pmcs(). On architectural perfmon,
|
|
+ * we can afford to only restore the pmcs registers we use, because they
|
|
+ * are all independent from each other.
|
|
+ */
|
|
+static void pfm_intel_arch_restore_pmcs(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set)
|
|
+{
|
|
+ u64 *mask;
|
|
+ u16 i, num;
|
|
+
|
|
+ mask = set->used_pmcs;
|
|
+ num = set->nused_pmcs;
|
|
+ for (i = 0; num; i++) {
|
|
+ if (test_bit(i, cast_ulp(mask))) {
|
|
+ wrmsrl(pfm_pmu_conf->pmc_desc[i].hw_addr, set->pmcs[i]);
|
|
+ num--;
|
|
+ }
|
|
+ }
|
|
+}
|
|
+/*
|
|
+ * Counters may have model-specific width. Yet the documentation says
|
|
+ * that only the lower 32 bits can be written to due to the specification
|
|
+ * of wrmsr. bits [32-(w-1)] are sign extensions of bit 31. Bits [w-63] must
|
|
+ * not be set (see rsvd_msk for PMDs). As such the effective width of a
|
|
+ * counter is 31 bits only regardless of what CPUID.0xa returns.
|
|
+ *
|
|
+ * See IA-32 Intel Architecture Software developer manual Vol 3B chapter 18
|
|
+ */
|
|
+static struct pfm_pmu_config pfm_intel_arch_pmu_conf = {
|
|
+ .pmu_name = "Intel architectural",
|
|
+ .pmd_desc = pfm_intel_arch_pmd_desc,
|
|
+ .counter_width = 31,
|
|
+ .num_pmc_entries = PFM_IA_MAX_PMCS,
|
|
+ .num_pmd_entries = PFM_IA_MAX_PMDS,
|
|
+ .pmc_desc = pfm_intel_arch_pmc_desc,
|
|
+ .probe_pmu = pfm_intel_arch_probe_pmu,
|
|
+ .version = "1.0",
|
|
+ .flags = PFM_PMU_BUILTIN_FLAG,
|
|
+ .owner = THIS_MODULE,
|
|
+ .pmu_info = &pfm_intel_arch_pmu_info
|
|
+};
|
|
+
|
|
+static int __init pfm_intel_arch_pmu_init_module(void)
|
|
+{
|
|
+ return pfm_pmu_register(&pfm_intel_arch_pmu_conf);
|
|
+}
|
|
+
|
|
+static void __exit pfm_intel_arch_pmu_cleanup_module(void)
|
|
+{
|
|
+ pfm_pmu_unregister(&pfm_intel_arch_pmu_conf);
|
|
+}
|
|
+
|
|
+module_init(pfm_intel_arch_pmu_init_module);
|
|
+module_exit(pfm_intel_arch_pmu_cleanup_module);
|
|
diff --git a/arch/x86/perfmon/perfmon_intel_atom.c b/arch/x86/perfmon/perfmon_intel_atom.c
|
|
new file mode 100644
|
|
index 0000000..9b94863
|
|
--- /dev/null
|
|
+++ b/arch/x86/perfmon/perfmon_intel_atom.c
|
|
@@ -0,0 +1,541 @@
|
|
+/*
|
|
+ * perfmon support for Intel Atom (architectural perfmon v3 + PEBS)
|
|
+ *
|
|
+ * Copyright (c) 2008 Google,Inc
|
|
+ * Contributed by Stephane Eranian <eranian@gmail.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of version 2 of the GNU General Public
|
|
+ * License as published by the Free Software Foundation.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ * General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
+ * 02111-1307 USA
|
|
+ */
|
|
+#include <linux/module.h>
|
|
+#include <linux/kprobes.h>
|
|
+#include <linux/perfmon_kern.h>
|
|
+#include <asm/msr.h>
|
|
+
|
|
+MODULE_AUTHOR("Stephane Eranian <eranian@gmail.com>");
|
|
+MODULE_DESCRIPTION("Intel Atom");
|
|
+MODULE_LICENSE("GPL");
|
|
+
|
|
+static int force, force_nmi;
|
|
+MODULE_PARM_DESC(force, "bool: force module to load succesfully");
|
|
+MODULE_PARM_DESC(force_nmi, "bool: force use of NMI for PMU interrupt");
|
|
+module_param(force, bool, 0600);
|
|
+module_param(force_nmi, bool, 0600);
|
|
+
|
|
+/*
|
|
+ * - upper 32 bits are reserved
|
|
+ * - INT: APIC enable bit is reserved (forced to 1)
|
|
+ *
|
|
+ * RSVD: reserved bits are 1
|
|
+ */
|
|
+#define PFM_ATOM_PMC_RSVD ((~((1ULL<<32)-1)) | (1ULL<<20))
|
|
+
|
|
+/*
|
|
+ * force Local APIC interrupt on overflow
|
|
+ * disable with NO_EMUL64
|
|
+ */
|
|
+#define PFM_ATOM_PMC_VAL (1ULL<<20)
|
|
+#define PFM_ATOM_NO64 (1ULL<<20)
|
|
+
|
|
+/*
|
|
+ * Atom counters are 40-bits. 40-bits can be read but ony 31 can be written
|
|
+ * to due to a limitation of wrmsr. Bits [[63-32] are sign extensions of bit 31.
|
|
+ * Bits [63-40] must not be set
|
|
+ *
|
|
+ * See IA-32 Intel Architecture Software developer manual Vol 3B chapter 18
|
|
+ */
|
|
+#define PFM_ATOM_PMD_WIDTH 31
|
|
+#define PFM_ATOM_PMD_RSVD ~((1ULL << 40)-1)
|
|
+
|
|
+static void pfm_intel_atom_acquire_pmu_percpu(void);
|
|
+static void pfm_intel_atom_release_pmu_percpu(void);
|
|
+static void pfm_intel_atom_restore_pmcs(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set);
|
|
+static int pfm_intel_atom_stop_save(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set);
|
|
+static int pfm_intel_atom_has_ovfls(struct pfm_context *ctx);
|
|
+static void __kprobes pfm_intel_atom_quiesce(void);
|
|
+
|
|
+struct pfm_arch_pmu_info pfm_intel_atom_pmu_info = {
|
|
+ .stop_save = pfm_intel_atom_stop_save,
|
|
+ .has_ovfls = pfm_intel_atom_has_ovfls,
|
|
+ .quiesce = pfm_intel_atom_quiesce,
|
|
+ .restore_pmcs = pfm_intel_atom_restore_pmcs,
|
|
+ .acquire_pmu_percpu = pfm_intel_atom_acquire_pmu_percpu,
|
|
+ .release_pmu_percpu = pfm_intel_atom_release_pmu_percpu
|
|
+
|
|
+};
|
|
+
|
|
+#define PFM_ATOM_C(n) { \
|
|
+ .type = PFM_REG_I64, \
|
|
+ .desc = "PERFEVTSEL"#n, \
|
|
+ .dfl_val = PFM_ATOM_PMC_VAL, \
|
|
+ .rsvd_msk = PFM_ATOM_PMC_RSVD, \
|
|
+ .no_emul64_msk = PFM_ATOM_NO64, \
|
|
+ .hw_addr = MSR_P6_EVNTSEL0 + (n) \
|
|
+ }
|
|
+
|
|
+
|
|
+static struct pfm_regmap_desc pfm_intel_atom_pmc_desc[] = {
|
|
+/* pmc0 */ PFM_ATOM_C(0),
|
|
+/* pmc1 */ PFM_ATOM_C(1),
|
|
+/* pmc2 */ PMX_NA, PMX_NA,
|
|
+/* pmc4 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc8 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc12 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc16 */ { .type = PFM_REG_I,
|
|
+ .desc = "FIXED_CTRL",
|
|
+ .dfl_val = 0x0000000000000888ULL, /* force PMI */
|
|
+ .rsvd_msk = 0xfffffffffffffcccULL, /* 3 fixed counters defined */
|
|
+ .no_emul64_msk = 0,
|
|
+ .hw_addr = MSR_CORE_PERF_FIXED_CTR_CTRL
|
|
+ },
|
|
+/* pmc17 */{ .type = PFM_REG_W,
|
|
+ .desc = "PEBS_ENABLE",
|
|
+ .dfl_val = 0,
|
|
+ .rsvd_msk = 0xfffffffffffffffeULL,
|
|
+ .no_emul64_msk = 0,
|
|
+ .hw_addr = MSR_IA32_PEBS_ENABLE
|
|
+ }
|
|
+};
|
|
+#define PFM_ATOM_MAX_PMCS ARRAY_SIZE(pfm_intel_atom_pmc_desc)
|
|
+
|
|
+#define PFM_ATOM_D(n) \
|
|
+ { .type = PFM_REG_C, \
|
|
+ .desc = "PMC"#n, \
|
|
+ .rsvd_msk = PFM_ATOM_PMD_RSVD, \
|
|
+ .hw_addr = MSR_P6_PERFCTR0+n, \
|
|
+ .dep_pmcs[0] = 1ULL << n \
|
|
+ }
|
|
+
|
|
+#define PFM_ATOM_FD(n) \
|
|
+ { .type = PFM_REG_C, \
|
|
+ .desc = "FIXED_CTR"#n, \
|
|
+ .rsvd_msk = PFM_ATOM_PMD_RSVD, \
|
|
+ .hw_addr = MSR_CORE_PERF_FIXED_CTR0+n,\
|
|
+ .dep_pmcs[0] = 1ULL << 16 \
|
|
+ }
|
|
+
|
|
+static struct pfm_regmap_desc pfm_intel_atom_pmd_desc[] = {
|
|
+/* pmd0 */ PFM_ATOM_D(0),
|
|
+/* pmd1 */ PFM_ATOM_D(1),
|
|
+/* pmd2 */ PMX_NA,
|
|
+/* pmd3 */ PMX_NA,
|
|
+/* pmd4 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmd8 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmd12 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmd16 */ PFM_ATOM_FD(0),
|
|
+/* pmd17 */ PFM_ATOM_FD(1),
|
|
+/* pmd18 */ PFM_ATOM_FD(2)
|
|
+};
|
|
+#define PFM_ATOM_MAX_PMDS ARRAY_SIZE(pfm_intel_atom_pmd_desc)
|
|
+
|
|
+static struct pfm_pmu_config pfm_intel_atom_pmu_conf;
|
|
+
|
|
+static int pfm_intel_atom_probe_pmu(void)
|
|
+{
|
|
+ if (force)
|
|
+ goto doit;
|
|
+
|
|
+ if (current_cpu_data.x86_vendor != X86_VENDOR_INTEL)
|
|
+ return -1;
|
|
+
|
|
+ if (current_cpu_data.x86 != 6)
|
|
+ return -1;
|
|
+
|
|
+ if (current_cpu_data.x86_model != 28)
|
|
+ return -1;
|
|
+doit:
|
|
+ /*
|
|
+ * having APIC is mandatory, so disregard force option
|
|
+ */
|
|
+ if (!cpu_has_apic) {
|
|
+ PFM_INFO("no Local APIC, try rebooting with lapic option");
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ PFM_INFO("detected Intel Atom PMU");
|
|
+
|
|
+ if (force_nmi)
|
|
+ pfm_intel_atom_pmu_info.flags |= PFM_X86_FL_USE_NMI;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_intel_atom_has_ovfls - check for pending overflow condition
|
|
+ * @ctx: context to work on
|
|
+ *
|
|
+ * detect if counters have overflowed.
|
|
+ * return:
|
|
+ * 0 : no overflow
|
|
+ * 1 : at least one overflow
|
|
+ */
|
|
+static int __kprobes pfm_intel_atom_has_ovfls(struct pfm_context *ctx)
|
|
+{
|
|
+ struct pfm_regmap_desc *d;
|
|
+ u64 ovf;
|
|
+
|
|
+ d = pfm_pmu_conf->pmd_desc;
|
|
+ /*
|
|
+ * read global overflow status register
|
|
+ * if sharing PMU, then not all bit are ours so must
|
|
+ * check only the ones we actually use
|
|
+ */
|
|
+ rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, ovf);
|
|
+
|
|
+ /*
|
|
+ * for pmd0, we also check PEBS overflow on bit 62
|
|
+ */
|
|
+ if ((d[0].type & PFM_REG_I) && (ovf & ((1ull << 62) | 1ull)))
|
|
+ return 1;
|
|
+
|
|
+ if ((d[1].type & PFM_REG_I) && (ovf & 2ull))
|
|
+ return 1;
|
|
+
|
|
+ if ((d[16].type & PFM_REG_I) && (ovf & (1ull << 32)))
|
|
+ return 1;
|
|
+
|
|
+ if ((d[17].type & PFM_REG_I) && (ovf & (2ull << 32)))
|
|
+ return 1;
|
|
+
|
|
+ if ((d[18].type & PFM_REG_I) && (ovf & (4ull << 32)))
|
|
+ return 1;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_intel_atom_stop_save - stop monitoring, collect pending overflow, save pmds
|
|
+ * @ctx: context to work on
|
|
+ * @set: active set
|
|
+ *
|
|
+ * return:
|
|
+ * 1: caller needs to save pmds
|
|
+ * 0: caller does not need to save pmds, they have been saved by this call
|
|
+ */
|
|
+static int pfm_intel_atom_stop_save(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set)
|
|
+{
|
|
+#define PFM_ATOM_WMASK (1ULL << 31)
|
|
+#define PFM_ATOM_OMASK ((1ULL << 31)-1)
|
|
+ u64 clear_ovf = 0;
|
|
+ u64 ovf, ovf2, val;
|
|
+
|
|
+ /*
|
|
+ * read global overflow status register
|
|
+ * if sharing PMU, then not all bit are ours so must
|
|
+ * check only the ones we actually use.
|
|
+ *
|
|
+ * XXX: Atom seems to have a bug with the stickyness of
|
|
+ * GLOBAL_STATUS. If we read GLOBAL_STATUS after we
|
|
+ * clear the generic counters, then their bits in
|
|
+ * GLOBAL_STATUS are cleared. This should not be the
|
|
+ * case accoding to architected PMU. To workaround
|
|
+ * the problem, we read GLOBAL_STATUS BEFORE we stop
|
|
+ * all monitoring.
|
|
+ */
|
|
+ rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, ovf);
|
|
+
|
|
+ /*
|
|
+ * stop monitoring
|
|
+ */
|
|
+ if (test_bit(0, cast_ulp(set->used_pmcs)))
|
|
+ wrmsrl(MSR_P6_EVNTSEL0, 0);
|
|
+
|
|
+ if (test_bit(1, cast_ulp(set->used_pmcs)))
|
|
+ wrmsrl(MSR_P6_EVNTSEL1, 0);
|
|
+
|
|
+ if (test_bit(16, cast_ulp(set->used_pmcs)))
|
|
+ wrmsrl(MSR_CORE_PERF_FIXED_CTR_CTRL, 0);
|
|
+
|
|
+ if (test_bit(17, cast_ulp(set->used_pmcs)))
|
|
+ wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
|
|
+
|
|
+ /*
|
|
+ * XXX: related to bug mentioned above
|
|
+ *
|
|
+ * read GLOBAL_STATUS again to avoid race condition
|
|
+ * with overflows happening after first read and
|
|
+ * before stop. That avoids missing overflows on
|
|
+ * the fixed counters and PEBS
|
|
+ */
|
|
+ rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, ovf2);
|
|
+ ovf |= ovf2;
|
|
+
|
|
+ /*
|
|
+ * if we already have a pending overflow condition, we simply
|
|
+ * return to take care of it first.
|
|
+ */
|
|
+ if (set->npend_ovfls)
|
|
+ return 1;
|
|
+
|
|
+ /*
|
|
+ * check PMD 0,1,16,17,18 for overflow and save their value
|
|
+ */
|
|
+ if (test_bit(0, cast_ulp(set->used_pmds))) {
|
|
+ rdmsrl(MSR_P6_PERFCTR0, val);
|
|
+ if (ovf & ((1ull<<62)|1ull)) {
|
|
+ __set_bit(0, cast_ulp(set->povfl_pmds));
|
|
+ set->npend_ovfls++;
|
|
+ clear_ovf = (1ull << 62) | 1ull;
|
|
+ }
|
|
+ val = (set->pmds[0].value & ~PFM_ATOM_OMASK)
|
|
+ | (val & PFM_ATOM_OMASK);
|
|
+ set->pmds[0].value = val;
|
|
+ }
|
|
+
|
|
+ if (test_bit(1, cast_ulp(set->used_pmds))) {
|
|
+ rdmsrl(MSR_P6_PERFCTR1, val);
|
|
+ if (ovf & 2ull) {
|
|
+ __set_bit(1, cast_ulp(set->povfl_pmds));
|
|
+ set->npend_ovfls++;
|
|
+ clear_ovf |= 2ull;
|
|
+ }
|
|
+ val = (set->pmds[1].value & ~PFM_ATOM_OMASK)
|
|
+ | (val & PFM_ATOM_OMASK);
|
|
+ set->pmds[1].value = val;
|
|
+ }
|
|
+
|
|
+ if (test_bit(16, cast_ulp(set->used_pmds))) {
|
|
+ rdmsrl(MSR_CORE_PERF_FIXED_CTR0, val);
|
|
+ if (ovf & (1ull << 32)) {
|
|
+ __set_bit(16, cast_ulp(set->povfl_pmds));
|
|
+ set->npend_ovfls++;
|
|
+ clear_ovf |= 1ull << 32;
|
|
+ }
|
|
+ val = (set->pmds[16].value & ~PFM_ATOM_OMASK)
|
|
+ | (val & PFM_ATOM_OMASK);
|
|
+ set->pmds[16].value = val;
|
|
+ }
|
|
+
|
|
+ if (test_bit(17, cast_ulp(set->used_pmds))) {
|
|
+ rdmsrl(MSR_CORE_PERF_FIXED_CTR0+1, val);
|
|
+ if (ovf & (2ull << 32)) {
|
|
+ __set_bit(17, cast_ulp(set->povfl_pmds));
|
|
+ set->npend_ovfls++;
|
|
+ clear_ovf |= 2ull << 32;
|
|
+ }
|
|
+ val = (set->pmds[17].value & ~PFM_ATOM_OMASK)
|
|
+ | (val & PFM_ATOM_OMASK);
|
|
+ set->pmds[17].value = val;
|
|
+ }
|
|
+
|
|
+ if (test_bit(18, cast_ulp(set->used_pmds))) {
|
|
+ rdmsrl(MSR_CORE_PERF_FIXED_CTR0+2, val);
|
|
+ if (ovf & (4ull << 32)) {
|
|
+ __set_bit(18, cast_ulp(set->povfl_pmds));
|
|
+ set->npend_ovfls++;
|
|
+ clear_ovf |= 4ull << 32;
|
|
+ }
|
|
+ val = (set->pmds[18].value & ~PFM_ATOM_OMASK)
|
|
+ | (val & PFM_ATOM_OMASK);
|
|
+ set->pmds[18].value = val;
|
|
+ }
|
|
+
|
|
+ if (clear_ovf)
|
|
+ wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, clear_ovf);
|
|
+
|
|
+ /* 0 means: no need to save PMDs at upper level */
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_intel_atom_quiesce - stop monitoring without grabbing any lock
|
|
+ *
|
|
+ * called from NMI interrupt handler to immediately stop monitoring
|
|
+ * cannot grab any lock, including perfmon related locks
|
|
+ */
|
|
+static void __kprobes pfm_intel_atom_quiesce(void)
|
|
+{
|
|
+ /*
|
|
+ * quiesce PMU by clearing available registers that have
|
|
+ * the start/stop capability
|
|
+ */
|
|
+ if (test_bit(0, cast_ulp(pfm_pmu_conf->regs_all.pmcs)))
|
|
+ wrmsrl(MSR_P6_EVNTSEL0, 0);
|
|
+
|
|
+ if (test_bit(1, cast_ulp(pfm_pmu_conf->regs_all.pmcs)))
|
|
+ wrmsrl(MSR_P6_EVNTSEL1, 0);
|
|
+
|
|
+ if (test_bit(16, cast_ulp(pfm_pmu_conf->regs_all.pmcs)))
|
|
+ wrmsrl(MSR_CORE_PERF_FIXED_CTR_CTRL, 0);
|
|
+
|
|
+ if (test_bit(17, cast_ulp(pfm_pmu_conf->regs_all.pmcs)))
|
|
+ wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_intel_atom_restore_pmcs - reload PMC registers
|
|
+ * @ctx: context to restore from
|
|
+ * @set: current event set
|
|
+ *
|
|
+ * restores pmcs and also PEBS Data Save area pointer
|
|
+ */
|
|
+static void pfm_intel_atom_restore_pmcs(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set)
|
|
+{
|
|
+ struct pfm_arch_context *ctx_arch;
|
|
+ u64 clear_ovf = 0;
|
|
+
|
|
+ ctx_arch = pfm_ctx_arch(ctx);
|
|
+ /*
|
|
+ * must restore DS pointer before restoring PMCs
|
|
+ * as this can potentially reactivate monitoring
|
|
+ */
|
|
+ if (ctx_arch->flags.use_ds)
|
|
+ wrmsrl(MSR_IA32_DS_AREA, (unsigned long)ctx_arch->ds_area);
|
|
+
|
|
+ if (test_bit(0, cast_ulp(set->used_pmcs))) {
|
|
+ wrmsrl(MSR_P6_EVNTSEL0, set->pmcs[0]);
|
|
+ clear_ovf = 1ull;
|
|
+ }
|
|
+
|
|
+ if (test_bit(1, cast_ulp(set->used_pmcs))) {
|
|
+ wrmsrl(MSR_P6_EVNTSEL1, set->pmcs[1]);
|
|
+ clear_ovf |= 2ull;
|
|
+ }
|
|
+
|
|
+ if (test_bit(16, cast_ulp(set->used_pmcs))) {
|
|
+ wrmsrl(MSR_CORE_PERF_FIXED_CTR_CTRL, set->pmcs[16]);
|
|
+ clear_ovf |= 7ull << 32;
|
|
+ }
|
|
+
|
|
+ if (test_bit(17, cast_ulp(set->used_pmcs))) {
|
|
+ wrmsrl(MSR_IA32_PEBS_ENABLE, set->pmcs[17]);
|
|
+ clear_ovf |= 1ull << 62;
|
|
+ }
|
|
+
|
|
+ if (clear_ovf)
|
|
+ wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, clear_ovf);
|
|
+}
|
|
+
|
|
+static int pfm_intel_atom_pmc17_check(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set,
|
|
+ struct pfarg_pmc *req)
|
|
+{
|
|
+ struct pfm_arch_context *ctx_arch;
|
|
+ ctx_arch = pfm_ctx_arch(ctx);
|
|
+
|
|
+ /*
|
|
+ * if user activates PEBS_ENABLE, then we need to have a valid
|
|
+ * DS Area setup. This only happens when the PEBS sampling format is
|
|
+ * used in which case PFM_X86_USE_PEBS is set. We must reject all other
|
|
+ * requests.
|
|
+ *
|
|
+ * Otherwise we may pickup stale MSR_IA32_DS_AREA values. It appears
|
|
+ * that a value of 0 for this MSR does crash the system with
|
|
+ * PEBS_ENABLE=1.
|
|
+ */
|
|
+ if (!ctx_arch->flags.use_pebs && req->reg_value) {
|
|
+ PFM_DBG("pmc17 useable only with a PEBS sampling format");
|
|
+ return -EINVAL;
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+DEFINE_PER_CPU(u64, saved_global_ctrl);
|
|
+
|
|
+/**
|
|
+ * pfm_intel_atom_acquire_pmu_percpu - acquire PMU resource per CPU
|
|
+ *
|
|
+ * For Atom, it is necessary to enable all available
|
|
+ * registers. The firmware rightfully has the fixed counters
|
|
+ * disabled for backward compatibility with architectural perfmon
|
|
+ * v1
|
|
+ *
|
|
+ * This function is invoked on each online CPU
|
|
+ */
|
|
+static void pfm_intel_atom_acquire_pmu_percpu(void)
|
|
+{
|
|
+ struct pfm_regmap_desc *d;
|
|
+ u64 mask = 0;
|
|
+ unsigned int i;
|
|
+
|
|
+ /*
|
|
+ * build bitmask of registers that are available to
|
|
+ * us. In some cases, there may be fewer registers than
|
|
+ * what Atom supports due to sharing with other kernel
|
|
+ * subsystems, such as NMI
|
|
+ */
|
|
+ d = pfm_pmu_conf->pmd_desc;
|
|
+ for (i=0; i < 16; i++) {
|
|
+ if ((d[i].type & PFM_REG_I) == 0)
|
|
+ continue;
|
|
+ mask |= 1ull << i;
|
|
+ }
|
|
+ for (i=16; i < PFM_ATOM_MAX_PMDS; i++) {
|
|
+ if ((d[i].type & PFM_REG_I) == 0)
|
|
+ continue;
|
|
+ mask |= 1ull << (32+i-16);
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * keep a local copy of the current MSR_CORE_PERF_GLOBAL_CTRL
|
|
+ */
|
|
+ rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, __get_cpu_var(saved_global_ctrl));
|
|
+
|
|
+ PFM_DBG("global=0x%llx set to 0x%llx",
|
|
+ __get_cpu_var(saved_global_ctrl),
|
|
+ mask);
|
|
+
|
|
+ /*
|
|
+ * enable all registers
|
|
+ *
|
|
+ * No need to quiesce PMU. If there is a overflow, it will be
|
|
+ * treated as spurious by the handler
|
|
+ */
|
|
+ wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, mask);
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_intel_atom_release_pmu_percpu - release PMU resource per CPU
|
|
+ *
|
|
+ * For Atom, we restore MSR_CORE_PERF_GLOBAL_CTRL to its orginal value
|
|
+ */
|
|
+static void pfm_intel_atom_release_pmu_percpu(void)
|
|
+{
|
|
+ PFM_DBG("global_ctrl restored to 0x%llx\n",
|
|
+ __get_cpu_var(saved_global_ctrl));
|
|
+
|
|
+ wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, __get_cpu_var(saved_global_ctrl));
|
|
+}
|
|
+
|
|
+static struct pfm_pmu_config pfm_intel_atom_pmu_conf = {
|
|
+ .pmu_name = "Intel Atom",
|
|
+ .pmd_desc = pfm_intel_atom_pmd_desc,
|
|
+ .counter_width = PFM_ATOM_PMD_WIDTH,
|
|
+ .num_pmc_entries = PFM_ATOM_MAX_PMCS,
|
|
+ .num_pmd_entries = PFM_ATOM_MAX_PMDS,
|
|
+ .pmc_desc = pfm_intel_atom_pmc_desc,
|
|
+ .probe_pmu = pfm_intel_atom_probe_pmu,
|
|
+ .version = "1.0",
|
|
+ .flags = PFM_PMU_BUILTIN_FLAG,
|
|
+ .owner = THIS_MODULE,
|
|
+ .pmc_write_check = pfm_intel_atom_pmc17_check,
|
|
+ .pmu_info = &pfm_intel_atom_pmu_info
|
|
+};
|
|
+
|
|
+static int __init pfm_intel_atom_pmu_init_module(void)
|
|
+{
|
|
+ return pfm_pmu_register(&pfm_intel_atom_pmu_conf);
|
|
+}
|
|
+
|
|
+static void __exit pfm_intel_atom_pmu_cleanup_module(void)
|
|
+{
|
|
+ pfm_pmu_unregister(&pfm_intel_atom_pmu_conf);
|
|
+}
|
|
+
|
|
+module_init(pfm_intel_atom_pmu_init_module);
|
|
+module_exit(pfm_intel_atom_pmu_cleanup_module);
|
|
diff --git a/arch/x86/perfmon/perfmon_intel_core.c b/arch/x86/perfmon/perfmon_intel_core.c
|
|
new file mode 100644
|
|
index 0000000..fddc436
|
|
--- /dev/null
|
|
+++ b/arch/x86/perfmon/perfmon_intel_core.c
|
|
@@ -0,0 +1,449 @@
|
|
+/*
|
|
+ * This file contains the Intel Core PMU registers description tables.
|
|
+ * Intel Core-based processors support architectural perfmon v2 + PEBS
|
|
+ *
|
|
+ * Copyright (c) 2006-2007 Hewlett-Packard Development Company, L.P.
|
|
+ * Contributed by Stephane Eranian <eranian@hpl.hp.com>
|
|
+ */
|
|
+#include <linux/module.h>
|
|
+#include <linux/kprobes.h>
|
|
+#include <linux/perfmon_kern.h>
|
|
+#include <linux/nmi.h>
|
|
+
|
|
+MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>");
|
|
+MODULE_DESCRIPTION("Intel Core");
|
|
+MODULE_LICENSE("GPL");
|
|
+
|
|
+static int force_nmi;
|
|
+MODULE_PARM_DESC(force_nmi, "bool: force use of NMI for PMU interrupt");
|
|
+module_param(force_nmi, bool, 0600);
|
|
+
|
|
+/*
|
|
+ * - upper 32 bits are reserved
|
|
+ * - INT: APIC enable bit is reserved (forced to 1)
|
|
+ * - bit 21 is reserved
|
|
+ *
|
|
+ * RSVD: reserved bits must be 1
|
|
+ */
|
|
+#define PFM_CORE_PMC_RSVD ((~((1ULL<<32)-1)) \
|
|
+ | (1ULL<<20) \
|
|
+ | (1ULL<<21))
|
|
+
|
|
+/*
|
|
+ * Core counters are 40-bits
|
|
+ */
|
|
+#define PFM_CORE_CTR_RSVD (~((1ULL<<40)-1))
|
|
+
|
|
+/*
|
|
+ * force Local APIC interrupt on overflow
|
|
+ * disable with NO_EMUL64
|
|
+ */
|
|
+#define PFM_CORE_PMC_VAL (1ULL<<20)
|
|
+#define PFM_CORE_NO64 (1ULL<<20)
|
|
+
|
|
+#define PFM_CORE_NA { .reg_type = PFM_REGT_NA}
|
|
+
|
|
+#define PFM_CORE_CA(m, c, t) \
|
|
+ { \
|
|
+ .addrs[0] = m, \
|
|
+ .ctr = c, \
|
|
+ .reg_type = t \
|
|
+ }
|
|
+
|
|
+struct pfm_ds_area_intel_core {
|
|
+ u64 bts_buf_base;
|
|
+ u64 bts_index;
|
|
+ u64 bts_abs_max;
|
|
+ u64 bts_intr_thres;
|
|
+ u64 pebs_buf_base;
|
|
+ u64 pebs_index;
|
|
+ u64 pebs_abs_max;
|
|
+ u64 pebs_intr_thres;
|
|
+ u64 pebs_cnt_reset;
|
|
+};
|
|
+
|
|
+static void pfm_core_restore_pmcs(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set);
|
|
+static int pfm_core_has_ovfls(struct pfm_context *ctx);
|
|
+static int pfm_core_stop_save(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set);
|
|
+static void __kprobes pfm_core_quiesce(void);
|
|
+
|
|
+static u64 enable_mask[PFM_MAX_PMCS];
|
|
+static u16 max_enable;
|
|
+
|
|
+struct pfm_arch_pmu_info pfm_core_pmu_info = {
|
|
+ .stop_save = pfm_core_stop_save,
|
|
+ .has_ovfls = pfm_core_has_ovfls,
|
|
+ .quiesce = pfm_core_quiesce,
|
|
+ .restore_pmcs = pfm_core_restore_pmcs
|
|
+};
|
|
+
|
|
+static struct pfm_regmap_desc pfm_core_pmc_desc[] = {
|
|
+/* pmc0 */ {
|
|
+ .type = PFM_REG_I64,
|
|
+ .desc = "PERFEVTSEL0",
|
|
+ .dfl_val = PFM_CORE_PMC_VAL,
|
|
+ .rsvd_msk = PFM_CORE_PMC_RSVD,
|
|
+ .no_emul64_msk = PFM_CORE_NO64,
|
|
+ .hw_addr = MSR_P6_EVNTSEL0
|
|
+ },
|
|
+/* pmc1 */ {
|
|
+ .type = PFM_REG_I64,
|
|
+ .desc = "PERFEVTSEL1",
|
|
+ .dfl_val = PFM_CORE_PMC_VAL,
|
|
+ .rsvd_msk = PFM_CORE_PMC_RSVD,
|
|
+ .no_emul64_msk = PFM_CORE_NO64,
|
|
+ .hw_addr = MSR_P6_EVNTSEL1
|
|
+ },
|
|
+/* pmc2 */ PMX_NA, PMX_NA,
|
|
+/* pmc4 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc8 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc12 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmc16 */ { .type = PFM_REG_I,
|
|
+ .desc = "FIXED_CTRL",
|
|
+ .dfl_val = 0x888ULL,
|
|
+ .rsvd_msk = 0xfffffffffffffcccULL,
|
|
+ .no_emul64_msk = 0,
|
|
+ .hw_addr = MSR_CORE_PERF_FIXED_CTR_CTRL
|
|
+ },
|
|
+/* pmc17 */ { .type = PFM_REG_W,
|
|
+ .desc = "PEBS_ENABLE",
|
|
+ .dfl_val = 0,
|
|
+ .rsvd_msk = 0xfffffffffffffffeULL,
|
|
+ .no_emul64_msk = 0,
|
|
+ .hw_addr = MSR_IA32_PEBS_ENABLE
|
|
+ }
|
|
+};
|
|
+
|
|
+#define PFM_CORE_D(n) \
|
|
+ { .type = PFM_REG_C, \
|
|
+ .desc = "PMC"#n, \
|
|
+ .rsvd_msk = PFM_CORE_CTR_RSVD, \
|
|
+ .hw_addr = MSR_P6_PERFCTR0+n, \
|
|
+ .dep_pmcs[0] = 1ULL << n \
|
|
+ }
|
|
+
|
|
+#define PFM_CORE_FD(n) \
|
|
+ { .type = PFM_REG_C, \
|
|
+ .desc = "FIXED_CTR"#n, \
|
|
+ .rsvd_msk = PFM_CORE_CTR_RSVD, \
|
|
+ .hw_addr = MSR_CORE_PERF_FIXED_CTR0+n,\
|
|
+ .dep_pmcs[0] = 1ULL << 16 \
|
|
+ }
|
|
+
|
|
+static struct pfm_regmap_desc pfm_core_pmd_desc[] = {
|
|
+/* pmd0 */ PFM_CORE_D(0),
|
|
+/* pmd1 */ PFM_CORE_D(1),
|
|
+/* pmd2 */ PMX_NA, PMX_NA,
|
|
+/* pmd4 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmd8 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmd12 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA,
|
|
+/* pmd16 */ PFM_CORE_FD(0),
|
|
+/* pmd17 */ PFM_CORE_FD(1),
|
|
+/* pmd18 */ PFM_CORE_FD(2)
|
|
+};
|
|
+#define PFM_CORE_NUM_PMCS ARRAY_SIZE(pfm_core_pmc_desc)
|
|
+#define PFM_CORE_NUM_PMDS ARRAY_SIZE(pfm_core_pmd_desc)
|
|
+
|
|
+static struct pfm_pmu_config pfm_core_pmu_conf;
|
|
+
|
|
+static int pfm_core_probe_pmu(void)
|
|
+{
|
|
+ /*
|
|
+ * Check for Intel Core processor explicitely
|
|
+ * Checking for cpu_has_perfmon is not enough as this
|
|
+ * matches intel Core Duo/Core Solo but none supports
|
|
+ * PEBS.
|
|
+ *
|
|
+ * Intel Core = arch perfmon v2 + PEBS
|
|
+ */
|
|
+ if (current_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
|
|
+ PFM_INFO("not an AMD processor");
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ if (current_cpu_data.x86 != 6)
|
|
+ return -1;
|
|
+
|
|
+ switch (current_cpu_data.x86_model) {
|
|
+ case 15: /* Merom */
|
|
+ break;
|
|
+ case 23: /* Penryn */
|
|
+ break;
|
|
+ case 29: /* Dunnington */
|
|
+ break;
|
|
+ default:
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ if (!cpu_has_apic) {
|
|
+ PFM_INFO("no Local APIC, unsupported");
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ PFM_INFO("nmi_watchdog=%d nmi_active=%d force_nmi=%d",
|
|
+ nmi_watchdog, atomic_read(&nmi_active), force_nmi);
|
|
+
|
|
+ /*
|
|
+ * Intel Core processors implement DS and PEBS, no need to check
|
|
+ */
|
|
+ if (cpu_has_pebs)
|
|
+ PFM_INFO("PEBS supported, enabled");
|
|
+
|
|
+ /*
|
|
+ * initialize bitmask of register with enable capability, i.e.,
|
|
+ * startstop. This is used to restrict the number of registers to
|
|
+ * touch on start/stop
|
|
+ * max_enable: number of bits to scan in enable_mask = highest + 1
|
|
+ *
|
|
+ * may be adjusted in pfm_arch_pmu_acquire()
|
|
+ */
|
|
+ __set_bit(0, cast_ulp(enable_mask));
|
|
+ __set_bit(1, cast_ulp(enable_mask));
|
|
+ __set_bit(16, cast_ulp(enable_mask));
|
|
+ __set_bit(17, cast_ulp(enable_mask));
|
|
+ max_enable = 17+1;
|
|
+
|
|
+ if (force_nmi)
|
|
+ pfm_core_pmu_info.flags |= PFM_X86_FL_USE_NMI;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int pfm_core_pmc17_check(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set,
|
|
+ struct pfarg_pmc *req)
|
|
+{
|
|
+ struct pfm_arch_context *ctx_arch;
|
|
+ ctx_arch = pfm_ctx_arch(ctx);
|
|
+
|
|
+ /*
|
|
+ * if user activates PEBS_ENABLE, then we need to have a valid
|
|
+ * DS Area setup. This only happens when the PEBS sampling format is
|
|
+ * used in which case PFM_X86_USE_PEBS is set. We must reject all other
|
|
+ * requests.
|
|
+ *
|
|
+ * Otherwise we may pickup stale MSR_IA32_DS_AREA values. It appears
|
|
+ * that a value of 0 for this MSR does crash the system with
|
|
+ * PEBS_ENABLE=1.
|
|
+ */
|
|
+ if (!ctx_arch->flags.use_pebs && req->reg_value) {
|
|
+ PFM_DBG("pmc17 useable only with a PEBS sampling format");
|
|
+ return -EINVAL;
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * detect is counters have overflowed.
|
|
+ * return:
|
|
+ * 0 : no overflow
|
|
+ * 1 : at least one overflow
|
|
+ *
|
|
+ * used by Intel Core-based processors
|
|
+ */
|
|
+static int __kprobes pfm_core_has_ovfls(struct pfm_context *ctx)
|
|
+{
|
|
+ struct pfm_arch_pmu_info *pmu_info;
|
|
+ u64 *cnt_mask;
|
|
+ u64 wmask, val;
|
|
+ u16 i, num;
|
|
+
|
|
+ pmu_info = &pfm_core_pmu_info;
|
|
+ cnt_mask = ctx->regs.cnt_pmds;
|
|
+ num = ctx->regs.num_counters;
|
|
+ wmask = 1ULL << pfm_pmu_conf->counter_width;
|
|
+
|
|
+ for (i = 0; num; i++) {
|
|
+ if (test_bit(i, cast_ulp(cnt_mask))) {
|
|
+ rdmsrl(pfm_core_pmd_desc[i].hw_addr, val);
|
|
+ if (!(val & wmask))
|
|
+ return 1;
|
|
+ num--;
|
|
+ }
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int pfm_core_stop_save(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set)
|
|
+{
|
|
+ struct pfm_arch_context *ctx_arch;
|
|
+ struct pfm_ds_area_intel_core *ds = NULL;
|
|
+ u64 used_mask[PFM_PMC_BV];
|
|
+ u64 *cnt_mask;
|
|
+ u64 val, wmask, ovfl_mask;
|
|
+ u16 count, has_ovfl;
|
|
+ u16 i, pebs_idx = ~0;
|
|
+
|
|
+ ctx_arch = pfm_ctx_arch(ctx);
|
|
+
|
|
+ wmask = 1ULL << pfm_pmu_conf->counter_width;
|
|
+
|
|
+ /*
|
|
+ * used enable pmc bitmask
|
|
+ */
|
|
+ bitmap_and(cast_ulp(used_mask),
|
|
+ cast_ulp(set->used_pmcs),
|
|
+ cast_ulp(enable_mask),
|
|
+ max_enable);
|
|
+
|
|
+ count = bitmap_weight(cast_ulp(used_mask), max_enable);
|
|
+ /*
|
|
+ * stop monitoring
|
|
+ * Unfortunately, this is very expensive!
|
|
+ * wrmsrl() is serializing.
|
|
+ */
|
|
+ for (i = 0; count; i++) {
|
|
+ if (test_bit(i, cast_ulp(used_mask))) {
|
|
+ wrmsrl(pfm_pmu_conf->pmc_desc[i].hw_addr, 0);
|
|
+ count--;
|
|
+ }
|
|
+ }
|
|
+ /*
|
|
+ * if we already having a pending overflow condition, we simply
|
|
+ * return to take care of this first.
|
|
+ */
|
|
+ if (set->npend_ovfls)
|
|
+ return 1;
|
|
+
|
|
+ ovfl_mask = pfm_pmu_conf->ovfl_mask;
|
|
+ cnt_mask = ctx->regs.cnt_pmds;
|
|
+
|
|
+ if (ctx_arch->flags.use_pebs) {
|
|
+ ds = ctx_arch->ds_area;
|
|
+ pebs_idx = 0; /* PMC0/PMD0 */
|
|
+ PFM_DBG("ds=%p pebs_idx=0x%llx thres=0x%llx",
|
|
+ ds,
|
|
+ (unsigned long long)ds->pebs_index,
|
|
+ (unsigned long long)ds->pebs_intr_thres);
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * Check for pending overflows and save PMDs (combo)
|
|
+ * We employ used_pmds and not intr_pmds because we must
|
|
+ * also saved on PMD registers.
|
|
+ * Must check for counting PMDs because of virtual PMDs
|
|
+ *
|
|
+ * XXX: should use the ovf_status register instead, yet
|
|
+ * we would have to check if NMI is used and fallback
|
|
+ * to individual pmd inspection.
|
|
+ */
|
|
+ count = set->nused_pmds;
|
|
+
|
|
+ for (i = 0; count; i++) {
|
|
+ if (test_bit(i, cast_ulp(set->used_pmds))) {
|
|
+ val = pfm_arch_read_pmd(ctx, i);
|
|
+ if (likely(test_bit(i, cast_ulp(cnt_mask)))) {
|
|
+ if (i == pebs_idx)
|
|
+ has_ovfl = (ds->pebs_index >=
|
|
+ ds->pebs_intr_thres);
|
|
+ else
|
|
+ has_ovfl = !(val & wmask);
|
|
+ if (has_ovfl) {
|
|
+ __set_bit(i, cast_ulp(set->povfl_pmds));
|
|
+ set->npend_ovfls++;
|
|
+ }
|
|
+ val = (set->pmds[i].value & ~ovfl_mask)
|
|
+ | (val & ovfl_mask);
|
|
+ }
|
|
+ set->pmds[i].value = val;
|
|
+ count--;
|
|
+ }
|
|
+ }
|
|
+ /* 0 means: no need to save PMDs at upper level */
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_core_quiesce - stop monitoring without grabbing any lock
|
|
+ *
|
|
+ * called from NMI interrupt handler to immediately stop monitoring
|
|
+ * cannot grab any lock, including perfmon related locks
|
|
+ */
|
|
+static void __kprobes pfm_core_quiesce(void)
|
|
+{
|
|
+ /*
|
|
+ * quiesce PMU by clearing available registers that have
|
|
+ * the start/stop capability
|
|
+ */
|
|
+ if (test_bit(0, cast_ulp(pfm_pmu_conf->regs_all.pmcs)))
|
|
+ wrmsrl(MSR_P6_EVNTSEL0, 0);
|
|
+ if (test_bit(1, cast_ulp(pfm_pmu_conf->regs_all.pmcs)))
|
|
+ wrmsrl(MSR_P6_EVNTSEL1, 0);
|
|
+ if (test_bit(16, cast_ulp(pfm_pmu_conf->regs_all.pmcs)))
|
|
+ wrmsrl(MSR_CORE_PERF_FIXED_CTR_CTRL, 0);
|
|
+ if (test_bit(17, cast_ulp(pfm_pmu_conf->regs_all.pmcs)))
|
|
+ wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
|
|
+}
|
|
+/**
|
|
+ * pfm_core_restore_pmcs - reload PMC registers
|
|
+ * @ctx: context to restore from
|
|
+ * @set: current event set
|
|
+ *
|
|
+ * optimized version of pfm_arch_restore_pmcs(). On Core, we can
|
|
+ * afford to only restore the pmcs registers we use, because they are
|
|
+ * all independent from each other.
|
|
+ */
|
|
+static void pfm_core_restore_pmcs(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set)
|
|
+{
|
|
+ struct pfm_arch_context *ctx_arch;
|
|
+ u64 *mask;
|
|
+ u16 i, num;
|
|
+
|
|
+ ctx_arch = pfm_ctx_arch(ctx);
|
|
+
|
|
+ /*
|
|
+ * must restore DS pointer before restoring PMCs
|
|
+ * as this can potentially reactivate monitoring
|
|
+ */
|
|
+ if (ctx_arch->flags.use_ds)
|
|
+ wrmsrl(MSR_IA32_DS_AREA, (unsigned long)ctx_arch->ds_area);
|
|
+
|
|
+ mask = set->used_pmcs;
|
|
+ num = set->nused_pmcs;
|
|
+ for (i = 0; num; i++) {
|
|
+ if (test_bit(i, cast_ulp(mask))) {
|
|
+ wrmsrl(pfm_pmu_conf->pmc_desc[i].hw_addr, set->pmcs[i]);
|
|
+ num--;
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Counters may have model-specific width which can be probed using
|
|
+ * the CPUID.0xa leaf. Yet, the documentation says: "
|
|
+ * In the initial implementation, only the read bit width is reported
|
|
+ * by CPUID, write operations are limited to the low 32 bits.
|
|
+ * Bits [w-32] are sign extensions of bit 31. As such the effective width
|
|
+ * of a counter is 31 bits only.
|
|
+ */
|
|
+static struct pfm_pmu_config pfm_core_pmu_conf = {
|
|
+ .pmu_name = "Intel Core",
|
|
+ .pmd_desc = pfm_core_pmd_desc,
|
|
+ .counter_width = 31,
|
|
+ .num_pmc_entries = PFM_CORE_NUM_PMCS,
|
|
+ .num_pmd_entries = PFM_CORE_NUM_PMDS,
|
|
+ .pmc_desc = pfm_core_pmc_desc,
|
|
+ .probe_pmu = pfm_core_probe_pmu,
|
|
+ .version = "1.2",
|
|
+ .flags = PFM_PMU_BUILTIN_FLAG,
|
|
+ .owner = THIS_MODULE,
|
|
+ .pmu_info = &pfm_core_pmu_info,
|
|
+ .pmc_write_check = pfm_core_pmc17_check
|
|
+};
|
|
+
|
|
+static int __init pfm_core_pmu_init_module(void)
|
|
+{
|
|
+ return pfm_pmu_register(&pfm_core_pmu_conf);
|
|
+}
|
|
+
|
|
+static void __exit pfm_core_pmu_cleanup_module(void)
|
|
+{
|
|
+ pfm_pmu_unregister(&pfm_core_pmu_conf);
|
|
+}
|
|
+
|
|
+module_init(pfm_core_pmu_init_module);
|
|
+module_exit(pfm_core_pmu_cleanup_module);
|
|
diff --git a/arch/x86/perfmon/perfmon_p4.c b/arch/x86/perfmon/perfmon_p4.c
|
|
new file mode 100644
|
|
index 0000000..1ffcf3c
|
|
--- /dev/null
|
|
+++ b/arch/x86/perfmon/perfmon_p4.c
|
|
@@ -0,0 +1,913 @@
|
|
+/*
|
|
+ * This file contains the P4/Xeon PMU register description tables
|
|
+ * for both 32 and 64 bit modes.
|
|
+ *
|
|
+ * Copyright (c) 2005 Intel Corporation
|
|
+ * Contributed by Bryan Wilkerson <bryan.p.wilkerson@intel.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of version 2 of the GNU General Public
|
|
+ * License as published by the Free Software Foundation.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ * General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
+ * 02111-1307 USA
|
|
+ */
|
|
+#include <linux/module.h>
|
|
+#include <linux/perfmon_kern.h>
|
|
+#include <linux/kprobes.h>
|
|
+#include <linux/nmi.h>
|
|
+#include <asm/msr.h>
|
|
+#include <asm/apic.h>
|
|
+
|
|
+MODULE_AUTHOR("Bryan Wilkerson <bryan.p.wilkerson@intel.com>");
|
|
+MODULE_DESCRIPTION("P4/Xeon/EM64T PMU description table");
|
|
+MODULE_LICENSE("GPL");
|
|
+
|
|
+static int force;
|
|
+MODULE_PARM_DESC(force, "bool: force module to load succesfully");
|
|
+module_param(force, bool, 0600);
|
|
+
|
|
+static int force_nmi;
|
|
+MODULE_PARM_DESC(force_nmi, "bool: force use of NMI for PMU interrupt");
|
|
+module_param(force_nmi, bool, 0600);
|
|
+
|
|
+/*
|
|
+ * For extended register information in addition to address that is used
|
|
+ * at runtime to figure out the mapping of reg addresses to logical procs
|
|
+ * and association of registers to hardware specific features
|
|
+ */
|
|
+struct pfm_p4_regmap {
|
|
+ /*
|
|
+ * one each for the logical CPUs. Index 0 corresponds to T0 and
|
|
+ * index 1 corresponds to T1. Index 1 can be zero if no T1
|
|
+ * complement reg exists.
|
|
+ */
|
|
+ unsigned long addrs[2]; /* 2 = number of threads */
|
|
+ unsigned int ctr; /* for CCCR/PERFEVTSEL, associated counter */
|
|
+ unsigned int reg_type;
|
|
+};
|
|
+
|
|
+/*
|
|
+ * bitmask for pfm_p4_regmap.reg_type
|
|
+ */
|
|
+#define PFM_REGT_NA 0x0000 /* not available */
|
|
+#define PFM_REGT_EN 0x0001 /* has enable bit (cleared on ctxsw) */
|
|
+#define PFM_REGT_ESCR 0x0002 /* P4: ESCR */
|
|
+#define PFM_REGT_CCCR 0x0004 /* P4: CCCR */
|
|
+#define PFM_REGT_PEBS 0x0010 /* PEBS related */
|
|
+#define PFM_REGT_NOHT 0x0020 /* unavailable with HT */
|
|
+#define PFM_REGT_CTR 0x0040 /* counter */
|
|
+
|
|
+/*
|
|
+ * architecture specific context extension.
|
|
+ * located at: (struct pfm_arch_context *)(ctx+1)
|
|
+ */
|
|
+struct pfm_arch_p4_context {
|
|
+ u32 npend_ovfls; /* P4 NMI #pending ovfls */
|
|
+ u32 reserved;
|
|
+ u64 povfl_pmds[PFM_PMD_BV]; /* P4 NMI overflowed counters */
|
|
+ u64 saved_cccrs[PFM_MAX_PMCS];
|
|
+};
|
|
+
|
|
+/*
|
|
+ * ESCR reserved bitmask:
|
|
+ * - bits 31 - 63 reserved
|
|
+ * - T1_OS and T1_USR bits are reserved - set depending on logical proc
|
|
+ * user mode application should use T0_OS and T0_USR to indicate
|
|
+ * RSVD: reserved bits must be 1
|
|
+ */
|
|
+#define PFM_ESCR_RSVD ~0x000000007ffffffcULL
|
|
+
|
|
+/*
|
|
+ * CCCR default value:
|
|
+ * - OVF_PMI_T0=1 (bit 26)
|
|
+ * - OVF_PMI_T1=0 (bit 27) (set if necessary in pfm_write_reg())
|
|
+ * - all other bits are zero
|
|
+ *
|
|
+ * OVF_PMI is forced to zero if PFM_REGFL_NO_EMUL64 is set on CCCR
|
|
+ */
|
|
+#define PFM_CCCR_DFL (1ULL<<26) | (3ULL<<16)
|
|
+
|
|
+/*
|
|
+ * CCCR reserved fields:
|
|
+ * - bits 0-11, 25-29, 31-63
|
|
+ * - OVF_PMI (26-27), override with REGFL_NO_EMUL64
|
|
+ *
|
|
+ * RSVD: reserved bits must be 1
|
|
+ */
|
|
+#define PFM_CCCR_RSVD ~((0xfull<<12) \
|
|
+ | (0x7full<<18) \
|
|
+ | (0x1ull<<30))
|
|
+
|
|
+#define PFM_P4_NO64 (3ULL<<26) /* use 3 even in non HT mode */
|
|
+
|
|
+#define PEBS_PMD 8 /* thread0: IQ_CTR4, thread1: IQ_CTR5 */
|
|
+
|
|
+/*
|
|
+ * With HyperThreading enabled:
|
|
+ *
|
|
+ * The ESCRs and CCCRs are divided in half with the top half
|
|
+ * belonging to logical processor 0 and the bottom half going to
|
|
+ * logical processor 1. Thus only half of the PMU resources are
|
|
+ * accessible to applications.
|
|
+ *
|
|
+ * PEBS is not available due to the fact that:
|
|
+ * - MSR_PEBS_MATRIX_VERT is shared between the threads
|
|
+ * - IA32_PEBS_ENABLE is shared between the threads
|
|
+ *
|
|
+ * With HyperThreading disabled:
|
|
+ *
|
|
+ * The full set of PMU resources is exposed to applications.
|
|
+ *
|
|
+ * The mapping is chosen such that PMCxx -> MSR is the same
|
|
+ * in HT and non HT mode, if register is present in HT mode.
|
|
+ *
|
|
+ */
|
|
+#define PFM_REGT_NHTESCR (PFM_REGT_ESCR|PFM_REGT_NOHT)
|
|
+#define PFM_REGT_NHTCCCR (PFM_REGT_CCCR|PFM_REGT_NOHT|PFM_REGT_EN)
|
|
+#define PFM_REGT_NHTPEBS (PFM_REGT_PEBS|PFM_REGT_NOHT|PFM_REGT_EN)
|
|
+#define PFM_REGT_NHTCTR (PFM_REGT_CTR|PFM_REGT_NOHT)
|
|
+#define PFM_REGT_ENAC (PFM_REGT_CCCR|PFM_REGT_EN)
|
|
+
|
|
+static void pfm_p4_write_pmc(struct pfm_context *ctx, unsigned int cnum, u64 value);
|
|
+static void pfm_p4_write_pmd(struct pfm_context *ctx, unsigned int cnum, u64 value);
|
|
+static u64 pfm_p4_read_pmd(struct pfm_context *ctx, unsigned int cnum);
|
|
+static u64 pfm_p4_read_pmc(struct pfm_context *ctx, unsigned int cnum);
|
|
+static int pfm_p4_create_context(struct pfm_context *ctx, u32 ctx_flags);
|
|
+static void pfm_p4_free_context(struct pfm_context *ctx);
|
|
+static int pfm_p4_has_ovfls(struct pfm_context *ctx);
|
|
+static int pfm_p4_stop_save(struct pfm_context *ctx, struct pfm_event_set *set);
|
|
+static void pfm_p4_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set);
|
|
+static void pfm_p4_nmi_copy_state(struct pfm_context *ctx);
|
|
+static void __kprobes pfm_p4_quiesce(void);
|
|
+
|
|
+static u64 enable_mask[PFM_MAX_PMCS];
|
|
+static u16 max_enable;
|
|
+
|
|
+static struct pfm_p4_regmap pmc_addrs[PFM_MAX_PMCS] = {
|
|
+ /*pmc 0 */ {{MSR_P4_BPU_ESCR0, MSR_P4_BPU_ESCR1}, 0, PFM_REGT_ESCR}, /* BPU_ESCR0,1 */
|
|
+ /*pmc 1 */ {{MSR_P4_IS_ESCR0, MSR_P4_IS_ESCR1}, 0, PFM_REGT_ESCR}, /* IS_ESCR0,1 */
|
|
+ /*pmc 2 */ {{MSR_P4_MOB_ESCR0, MSR_P4_MOB_ESCR1}, 0, PFM_REGT_ESCR}, /* MOB_ESCR0,1 */
|
|
+ /*pmc 3 */ {{MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1}, 0, PFM_REGT_ESCR}, /* ITLB_ESCR0,1 */
|
|
+ /*pmc 4 */ {{MSR_P4_PMH_ESCR0, MSR_P4_PMH_ESCR1}, 0, PFM_REGT_ESCR}, /* PMH_ESCR0,1 */
|
|
+ /*pmc 5 */ {{MSR_P4_IX_ESCR0, MSR_P4_IX_ESCR1}, 0, PFM_REGT_ESCR}, /* IX_ESCR0,1 */
|
|
+ /*pmc 6 */ {{MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1}, 0, PFM_REGT_ESCR}, /* FSB_ESCR0,1 */
|
|
+ /*pmc 7 */ {{MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1}, 0, PFM_REGT_ESCR}, /* BSU_ESCR0,1 */
|
|
+ /*pmc 8 */ {{MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1}, 0, PFM_REGT_ESCR}, /* MS_ESCR0,1 */
|
|
+ /*pmc 9 */ {{MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1}, 0, PFM_REGT_ESCR}, /* TC_ESCR0,1 */
|
|
+ /*pmc 10*/ {{MSR_P4_TBPU_ESCR0, MSR_P4_TBPU_ESCR1}, 0, PFM_REGT_ESCR}, /* TBPU_ESCR0,1 */
|
|
+ /*pmc 11*/ {{MSR_P4_FLAME_ESCR0, MSR_P4_FLAME_ESCR1}, 0, PFM_REGT_ESCR}, /* FLAME_ESCR0,1 */
|
|
+ /*pmc 12*/ {{MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1}, 0, PFM_REGT_ESCR}, /* FIRM_ESCR0,1 */
|
|
+ /*pmc 13*/ {{MSR_P4_SAAT_ESCR0, MSR_P4_SAAT_ESCR1}, 0, PFM_REGT_ESCR}, /* SAAT_ESCR0,1 */
|
|
+ /*pmc 14*/ {{MSR_P4_U2L_ESCR0, MSR_P4_U2L_ESCR1}, 0, PFM_REGT_ESCR}, /* U2L_ESCR0,1 */
|
|
+ /*pmc 15*/ {{MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1}, 0, PFM_REGT_ESCR}, /* DAC_ESCR0,1 */
|
|
+ /*pmc 16*/ {{MSR_P4_IQ_ESCR0, MSR_P4_IQ_ESCR1}, 0, PFM_REGT_ESCR}, /* IQ_ESCR0,1 (only model 1 and 2) */
|
|
+ /*pmc 17*/ {{MSR_P4_ALF_ESCR0, MSR_P4_ALF_ESCR1}, 0, PFM_REGT_ESCR}, /* ALF_ESCR0,1 */
|
|
+ /*pmc 18*/ {{MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1}, 0, PFM_REGT_ESCR}, /* RAT_ESCR0,1 */
|
|
+ /*pmc 19*/ {{MSR_P4_SSU_ESCR0, 0}, 0, PFM_REGT_ESCR}, /* SSU_ESCR0 */
|
|
+ /*pmc 20*/ {{MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1}, 0, PFM_REGT_ESCR}, /* CRU_ESCR0,1 */
|
|
+ /*pmc 21*/ {{MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3}, 0, PFM_REGT_ESCR}, /* CRU_ESCR2,3 */
|
|
+ /*pmc 22*/ {{MSR_P4_CRU_ESCR4, MSR_P4_CRU_ESCR5}, 0, PFM_REGT_ESCR}, /* CRU_ESCR4,5 */
|
|
+
|
|
+ /*pmc 23*/ {{MSR_P4_BPU_CCCR0, MSR_P4_BPU_CCCR2}, 0, PFM_REGT_ENAC}, /* BPU_CCCR0,2 */
|
|
+ /*pmc 24*/ {{MSR_P4_BPU_CCCR1, MSR_P4_BPU_CCCR3}, 1, PFM_REGT_ENAC}, /* BPU_CCCR1,3 */
|
|
+ /*pmc 25*/ {{MSR_P4_MS_CCCR0, MSR_P4_MS_CCCR2}, 2, PFM_REGT_ENAC}, /* MS_CCCR0,2 */
|
|
+ /*pmc 26*/ {{MSR_P4_MS_CCCR1, MSR_P4_MS_CCCR3}, 3, PFM_REGT_ENAC}, /* MS_CCCR1,3 */
|
|
+ /*pmc 27*/ {{MSR_P4_FLAME_CCCR0, MSR_P4_FLAME_CCCR2}, 4, PFM_REGT_ENAC}, /* FLAME_CCCR0,2 */
|
|
+ /*pmc 28*/ {{MSR_P4_FLAME_CCCR1, MSR_P4_FLAME_CCCR3}, 5, PFM_REGT_ENAC}, /* FLAME_CCCR1,3 */
|
|
+ /*pmc 29*/ {{MSR_P4_IQ_CCCR0, MSR_P4_IQ_CCCR2}, 6, PFM_REGT_ENAC}, /* IQ_CCCR0,2 */
|
|
+ /*pmc 30*/ {{MSR_P4_IQ_CCCR1, MSR_P4_IQ_CCCR3}, 7, PFM_REGT_ENAC}, /* IQ_CCCR1,3 */
|
|
+ /*pmc 31*/ {{MSR_P4_IQ_CCCR4, MSR_P4_IQ_CCCR5}, 8, PFM_REGT_ENAC}, /* IQ_CCCR4,5 */
|
|
+ /* non HT extensions */
|
|
+ /*pmc 32*/ {{MSR_P4_BPU_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* BPU_ESCR1 */
|
|
+ /*pmc 33*/ {{MSR_P4_IS_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* IS_ESCR1 */
|
|
+ /*pmc 34*/ {{MSR_P4_MOB_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* MOB_ESCR1 */
|
|
+ /*pmc 35*/ {{MSR_P4_ITLB_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* ITLB_ESCR1 */
|
|
+ /*pmc 36*/ {{MSR_P4_PMH_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* PMH_ESCR1 */
|
|
+ /*pmc 37*/ {{MSR_P4_IX_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* IX_ESCR1 */
|
|
+ /*pmc 38*/ {{MSR_P4_FSB_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* FSB_ESCR1 */
|
|
+ /*pmc 39*/ {{MSR_P4_BSU_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* BSU_ESCR1 */
|
|
+ /*pmc 40*/ {{MSR_P4_MS_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* MS_ESCR1 */
|
|
+ /*pmc 41*/ {{MSR_P4_TC_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* TC_ESCR1 */
|
|
+ /*pmc 42*/ {{MSR_P4_TBPU_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* TBPU_ESCR1 */
|
|
+ /*pmc 43*/ {{MSR_P4_FLAME_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* FLAME_ESCR1 */
|
|
+ /*pmc 44*/ {{MSR_P4_FIRM_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* FIRM_ESCR1 */
|
|
+ /*pmc 45*/ {{MSR_P4_SAAT_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* SAAT_ESCR1 */
|
|
+ /*pmc 46*/ {{MSR_P4_U2L_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* U2L_ESCR1 */
|
|
+ /*pmc 47*/ {{MSR_P4_DAC_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* DAC_ESCR1 */
|
|
+ /*pmc 48*/ {{MSR_P4_IQ_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* IQ_ESCR1 (only model 1 and 2) */
|
|
+ /*pmc 49*/ {{MSR_P4_ALF_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* ALF_ESCR1 */
|
|
+ /*pmc 50*/ {{MSR_P4_RAT_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* RAT_ESCR1 */
|
|
+ /*pmc 51*/ {{MSR_P4_CRU_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* CRU_ESCR1 */
|
|
+ /*pmc 52*/ {{MSR_P4_CRU_ESCR3, 0}, 0, PFM_REGT_NHTESCR}, /* CRU_ESCR3 */
|
|
+ /*pmc 53*/ {{MSR_P4_CRU_ESCR5, 0}, 0, PFM_REGT_NHTESCR}, /* CRU_ESCR5 */
|
|
+ /*pmc 54*/ {{MSR_P4_BPU_CCCR1, 0}, 9, PFM_REGT_NHTCCCR}, /* BPU_CCCR1 */
|
|
+ /*pmc 55*/ {{MSR_P4_BPU_CCCR3, 0}, 10, PFM_REGT_NHTCCCR}, /* BPU_CCCR3 */
|
|
+ /*pmc 56*/ {{MSR_P4_MS_CCCR1, 0}, 11, PFM_REGT_NHTCCCR}, /* MS_CCCR1 */
|
|
+ /*pmc 57*/ {{MSR_P4_MS_CCCR3, 0}, 12, PFM_REGT_NHTCCCR}, /* MS_CCCR3 */
|
|
+ /*pmc 58*/ {{MSR_P4_FLAME_CCCR1, 0}, 13, PFM_REGT_NHTCCCR}, /* FLAME_CCCR1 */
|
|
+ /*pmc 59*/ {{MSR_P4_FLAME_CCCR3, 0}, 14, PFM_REGT_NHTCCCR}, /* FLAME_CCCR3 */
|
|
+ /*pmc 60*/ {{MSR_P4_IQ_CCCR2, 0}, 15, PFM_REGT_NHTCCCR}, /* IQ_CCCR2 */
|
|
+ /*pmc 61*/ {{MSR_P4_IQ_CCCR3, 0}, 16, PFM_REGT_NHTCCCR}, /* IQ_CCCR3 */
|
|
+ /*pmc 62*/ {{MSR_P4_IQ_CCCR5, 0}, 17, PFM_REGT_NHTCCCR}, /* IQ_CCCR5 */
|
|
+ /*pmc 63*/ {{0x3f2, 0}, 0, PFM_REGT_NHTPEBS},/* PEBS_MATRIX_VERT */
|
|
+ /*pmc 64*/ {{0x3f1, 0}, 0, PFM_REGT_NHTPEBS} /* PEBS_ENABLE */
|
|
+};
|
|
+
|
|
+static struct pfm_p4_regmap pmd_addrs[PFM_MAX_PMDS] = {
|
|
+ /*pmd 0 */ {{MSR_P4_BPU_PERFCTR0, MSR_P4_BPU_PERFCTR2}, 0, PFM_REGT_CTR}, /* BPU_CTR0,2 */
|
|
+ /*pmd 1 */ {{MSR_P4_BPU_PERFCTR1, MSR_P4_BPU_PERFCTR3}, 0, PFM_REGT_CTR}, /* BPU_CTR1,3 */
|
|
+ /*pmd 2 */ {{MSR_P4_MS_PERFCTR0, MSR_P4_MS_PERFCTR2}, 0, PFM_REGT_CTR}, /* MS_CTR0,2 */
|
|
+ /*pmd 3 */ {{MSR_P4_MS_PERFCTR1, MSR_P4_MS_PERFCTR3}, 0, PFM_REGT_CTR}, /* MS_CTR1,3 */
|
|
+ /*pmd 4 */ {{MSR_P4_FLAME_PERFCTR0, MSR_P4_FLAME_PERFCTR2}, 0, PFM_REGT_CTR}, /* FLAME_CTR0,2 */
|
|
+ /*pmd 5 */ {{MSR_P4_FLAME_PERFCTR1, MSR_P4_FLAME_PERFCTR3}, 0, PFM_REGT_CTR}, /* FLAME_CTR1,3 */
|
|
+ /*pmd 6 */ {{MSR_P4_IQ_PERFCTR0, MSR_P4_IQ_PERFCTR2}, 0, PFM_REGT_CTR}, /* IQ_CTR0,2 */
|
|
+ /*pmd 7 */ {{MSR_P4_IQ_PERFCTR1, MSR_P4_IQ_PERFCTR3}, 0, PFM_REGT_CTR}, /* IQ_CTR1,3 */
|
|
+ /*pmd 8 */ {{MSR_P4_IQ_PERFCTR4, MSR_P4_IQ_PERFCTR5}, 0, PFM_REGT_CTR}, /* IQ_CTR4,5 */
|
|
+ /*
|
|
+ * non HT extensions
|
|
+ */
|
|
+ /*pmd 9 */ {{MSR_P4_BPU_PERFCTR2, 0}, 0, PFM_REGT_NHTCTR}, /* BPU_CTR2 */
|
|
+ /*pmd 10*/ {{MSR_P4_BPU_PERFCTR3, 0}, 0, PFM_REGT_NHTCTR}, /* BPU_CTR3 */
|
|
+ /*pmd 11*/ {{MSR_P4_MS_PERFCTR2, 0}, 0, PFM_REGT_NHTCTR}, /* MS_CTR2 */
|
|
+ /*pmd 12*/ {{MSR_P4_MS_PERFCTR3, 0}, 0, PFM_REGT_NHTCTR}, /* MS_CTR3 */
|
|
+ /*pmd 13*/ {{MSR_P4_FLAME_PERFCTR2, 0}, 0, PFM_REGT_NHTCTR}, /* FLAME_CTR2 */
|
|
+ /*pmd 14*/ {{MSR_P4_FLAME_PERFCTR3, 0}, 0, PFM_REGT_NHTCTR}, /* FLAME_CTR3 */
|
|
+ /*pmd 15*/ {{MSR_P4_IQ_PERFCTR2, 0}, 0, PFM_REGT_NHTCTR}, /* IQ_CTR2 */
|
|
+ /*pmd 16*/ {{MSR_P4_IQ_PERFCTR3, 0}, 0, PFM_REGT_NHTCTR}, /* IQ_CTR3 */
|
|
+ /*pmd 17*/ {{MSR_P4_IQ_PERFCTR5, 0}, 0, PFM_REGT_NHTCTR}, /* IQ_CTR5 */
|
|
+};
|
|
+
|
|
+static struct pfm_arch_pmu_info pfm_p4_pmu_info = {
|
|
+ .write_pmc = pfm_p4_write_pmc,
|
|
+ .write_pmd = pfm_p4_write_pmd,
|
|
+ .read_pmc = pfm_p4_read_pmc,
|
|
+ .read_pmd = pfm_p4_read_pmd,
|
|
+ .create_context = pfm_p4_create_context,
|
|
+ .free_context = pfm_p4_free_context,
|
|
+ .has_ovfls = pfm_p4_has_ovfls,
|
|
+ .stop_save = pfm_p4_stop_save,
|
|
+ .restore_pmcs = pfm_p4_restore_pmcs,
|
|
+ .nmi_copy_state = pfm_p4_nmi_copy_state,
|
|
+ .quiesce = pfm_p4_quiesce
|
|
+};
|
|
+
|
|
+static struct pfm_regmap_desc pfm_p4_pmc_desc[] = {
|
|
+/* pmc0 */ PMC_D(PFM_REG_I, "BPU_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_BPU_ESCR0),
|
|
+/* pmc1 */ PMC_D(PFM_REG_I, "IS_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_IQ_ESCR0),
|
|
+/* pmc2 */ PMC_D(PFM_REG_I, "MOB_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_MOB_ESCR0),
|
|
+/* pmc3 */ PMC_D(PFM_REG_I, "ITLB_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_ITLB_ESCR0),
|
|
+/* pmc4 */ PMC_D(PFM_REG_I, "PMH_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_PMH_ESCR0),
|
|
+/* pmc5 */ PMC_D(PFM_REG_I, "IX_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_IX_ESCR0),
|
|
+/* pmc6 */ PMC_D(PFM_REG_I, "FSB_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_FSB_ESCR0),
|
|
+/* pmc7 */ PMC_D(PFM_REG_I, "BSU_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_BSU_ESCR0),
|
|
+/* pmc8 */ PMC_D(PFM_REG_I, "MS_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_MS_ESCR0),
|
|
+/* pmc9 */ PMC_D(PFM_REG_I, "TC_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_TC_ESCR0),
|
|
+/* pmc10 */ PMC_D(PFM_REG_I, "TBPU_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_TBPU_ESCR0),
|
|
+/* pmc11 */ PMC_D(PFM_REG_I, "FLAME_ESCR0", 0x0, PFM_ESCR_RSVD, 0, MSR_P4_FLAME_ESCR0),
|
|
+/* pmc12 */ PMC_D(PFM_REG_I, "FIRM_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_FIRM_ESCR0),
|
|
+/* pmc13 */ PMC_D(PFM_REG_I, "SAAT_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_SAAT_ESCR0),
|
|
+/* pmc14 */ PMC_D(PFM_REG_I, "U2L_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_U2L_ESCR0),
|
|
+/* pmc15 */ PMC_D(PFM_REG_I, "DAC_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_DAC_ESCR0),
|
|
+/* pmc16 */ PMC_D(PFM_REG_I, "IQ_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_IQ_ESCR0), /* only model 1 and 2*/
|
|
+/* pmc17 */ PMC_D(PFM_REG_I, "ALF_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_ALF_ESCR0),
|
|
+/* pmc18 */ PMC_D(PFM_REG_I, "RAT_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_RAT_ESCR0),
|
|
+/* pmc19 */ PMC_D(PFM_REG_I, "SSU_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_SSU_ESCR0),
|
|
+/* pmc20 */ PMC_D(PFM_REG_I, "CRU_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_CRU_ESCR0),
|
|
+/* pmc21 */ PMC_D(PFM_REG_I, "CRU_ESCR2" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_CRU_ESCR2),
|
|
+/* pmc22 */ PMC_D(PFM_REG_I, "CRU_ESCR4" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_CRU_ESCR4),
|
|
+/* pmc23 */ PMC_D(PFM_REG_I64, "BPU_CCCR0" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_BPU_CCCR0),
|
|
+/* pmc24 */ PMC_D(PFM_REG_I64, "BPU_CCCR1" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_BPU_CCCR1),
|
|
+/* pmc25 */ PMC_D(PFM_REG_I64, "MS_CCCR0" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_MS_CCCR0),
|
|
+/* pmc26 */ PMC_D(PFM_REG_I64, "MS_CCCR1" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_MS_CCCR1),
|
|
+/* pmc27 */ PMC_D(PFM_REG_I64, "FLAME_CCCR0", PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_FLAME_CCCR0),
|
|
+/* pmc28 */ PMC_D(PFM_REG_I64, "FLAME_CCCR1", PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_FLAME_CCCR1),
|
|
+/* pmc29 */ PMC_D(PFM_REG_I64, "IQ_CCCR0" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_IQ_CCCR0),
|
|
+/* pmc30 */ PMC_D(PFM_REG_I64, "IQ_CCCR1" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_IQ_CCCR1),
|
|
+/* pmc31 */ PMC_D(PFM_REG_I64, "IQ_CCCR4" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_IQ_CCCR4),
|
|
+ /* No HT extension */
|
|
+/* pmc32 */ PMC_D(PFM_REG_I, "BPU_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_BPU_ESCR1),
|
|
+/* pmc33 */ PMC_D(PFM_REG_I, "IS_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_IS_ESCR1),
|
|
+/* pmc34 */ PMC_D(PFM_REG_I, "MOB_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_MOB_ESCR1),
|
|
+/* pmc35 */ PMC_D(PFM_REG_I, "ITLB_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_ITLB_ESCR1),
|
|
+/* pmc36 */ PMC_D(PFM_REG_I, "PMH_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_PMH_ESCR1),
|
|
+/* pmc37 */ PMC_D(PFM_REG_I, "IX_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_IX_ESCR1),
|
|
+/* pmc38 */ PMC_D(PFM_REG_I, "FSB_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_FSB_ESCR1),
|
|
+/* pmc39 */ PMC_D(PFM_REG_I, "BSU_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_BSU_ESCR1),
|
|
+/* pmc40 */ PMC_D(PFM_REG_I, "MS_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_MS_ESCR1),
|
|
+/* pmc41 */ PMC_D(PFM_REG_I, "TC_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_TC_ESCR1),
|
|
+/* pmc42 */ PMC_D(PFM_REG_I, "TBPU_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_TBPU_ESCR1),
|
|
+/* pmc43 */ PMC_D(PFM_REG_I, "FLAME_ESCR1", 0x0, PFM_ESCR_RSVD, 0, MSR_P4_FLAME_ESCR1),
|
|
+/* pmc44 */ PMC_D(PFM_REG_I, "FIRM_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_FIRM_ESCR1),
|
|
+/* pmc45 */ PMC_D(PFM_REG_I, "SAAT_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_SAAT_ESCR1),
|
|
+/* pmc46 */ PMC_D(PFM_REG_I, "U2L_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_U2L_ESCR1),
|
|
+/* pmc47 */ PMC_D(PFM_REG_I, "DAC_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_DAC_ESCR1),
|
|
+/* pmc48 */ PMC_D(PFM_REG_I, "IQ_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_IQ_ESCR1), /* only model 1 and 2 */
|
|
+/* pmc49 */ PMC_D(PFM_REG_I, "ALF_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_ALF_ESCR1),
|
|
+/* pmc50 */ PMC_D(PFM_REG_I, "RAT_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_RAT_ESCR1),
|
|
+/* pmc51 */ PMC_D(PFM_REG_I, "CRU_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_CRU_ESCR1),
|
|
+/* pmc52 */ PMC_D(PFM_REG_I, "CRU_ESCR3" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_CRU_ESCR3),
|
|
+/* pmc53 */ PMC_D(PFM_REG_I, "CRU_ESCR5" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_CRU_ESCR5),
|
|
+/* pmc54 */ PMC_D(PFM_REG_I64, "BPU_CCCR2" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_BPU_CCCR2),
|
|
+/* pmc55 */ PMC_D(PFM_REG_I64, "BPU_CCCR3" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_BPU_CCCR3),
|
|
+/* pmc56 */ PMC_D(PFM_REG_I64, "MS_CCCR2" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_MS_CCCR2),
|
|
+/* pmc57 */ PMC_D(PFM_REG_I64, "MS_CCCR3" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_MS_CCCR3),
|
|
+/* pmc58 */ PMC_D(PFM_REG_I64, "FLAME_CCCR2", PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_FLAME_CCCR2),
|
|
+/* pmc59 */ PMC_D(PFM_REG_I64, "FLAME_CCCR3", PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_FLAME_CCCR3),
|
|
+/* pmc60 */ PMC_D(PFM_REG_I64, "IQ_CCCR2" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_IQ_CCCR2),
|
|
+/* pmc61 */ PMC_D(PFM_REG_I64, "IQ_CCCR3" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_IQ_CCCR3),
|
|
+/* pmc62 */ PMC_D(PFM_REG_I64, "IQ_CCCR5" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_IQ_CCCR5),
|
|
+/* pmc63 */ PMC_D(PFM_REG_I, "PEBS_MATRIX_VERT", 0, 0xffffffffffffffecULL, 0, 0x3f2),
|
|
+/* pmc64 */ PMC_D(PFM_REG_I, "PEBS_ENABLE", 0, 0xfffffffff8ffe000ULL, 0, 0x3f1)
|
|
+};
|
|
+#define PFM_P4_NUM_PMCS ARRAY_SIZE(pfm_p4_pmc_desc)
|
|
+
|
|
+/*
|
|
+ * See section 15.10.6.6 for details about the IQ block
|
|
+ */
|
|
+static struct pfm_regmap_desc pfm_p4_pmd_desc[] = {
|
|
+/* pmd0 */ PMD_D(PFM_REG_C, "BPU_CTR0", MSR_P4_BPU_PERFCTR0),
|
|
+/* pmd1 */ PMD_D(PFM_REG_C, "BPU_CTR1", MSR_P4_BPU_PERFCTR1),
|
|
+/* pmd2 */ PMD_D(PFM_REG_C, "MS_CTR0", MSR_P4_MS_PERFCTR0),
|
|
+/* pmd3 */ PMD_D(PFM_REG_C, "MS_CTR1", MSR_P4_MS_PERFCTR1),
|
|
+/* pmd4 */ PMD_D(PFM_REG_C, "FLAME_CTR0", MSR_P4_FLAME_PERFCTR0),
|
|
+/* pmd5 */ PMD_D(PFM_REG_C, "FLAME_CTR1", MSR_P4_FLAME_PERFCTR1),
|
|
+/* pmd6 */ PMD_D(PFM_REG_C, "IQ_CTR0", MSR_P4_IQ_PERFCTR0),
|
|
+/* pmd7 */ PMD_D(PFM_REG_C, "IQ_CTR1", MSR_P4_IQ_PERFCTR1),
|
|
+/* pmd8 */ PMD_D(PFM_REG_C, "IQ_CTR4", MSR_P4_IQ_PERFCTR4),
|
|
+ /* no HT extension */
|
|
+/* pmd9 */ PMD_D(PFM_REG_C, "BPU_CTR2", MSR_P4_BPU_PERFCTR2),
|
|
+/* pmd10 */ PMD_D(PFM_REG_C, "BPU_CTR3", MSR_P4_BPU_PERFCTR3),
|
|
+/* pmd11 */ PMD_D(PFM_REG_C, "MS_CTR2", MSR_P4_MS_PERFCTR2),
|
|
+/* pmd12 */ PMD_D(PFM_REG_C, "MS_CTR3", MSR_P4_MS_PERFCTR3),
|
|
+/* pmd13 */ PMD_D(PFM_REG_C, "FLAME_CTR2", MSR_P4_FLAME_PERFCTR2),
|
|
+/* pmd14 */ PMD_D(PFM_REG_C, "FLAME_CTR3", MSR_P4_FLAME_PERFCTR3),
|
|
+/* pmd15 */ PMD_D(PFM_REG_C, "IQ_CTR2", MSR_P4_IQ_PERFCTR2),
|
|
+/* pmd16 */ PMD_D(PFM_REG_C, "IQ_CTR3", MSR_P4_IQ_PERFCTR3),
|
|
+/* pmd17 */ PMD_D(PFM_REG_C, "IQ_CTR5", MSR_P4_IQ_PERFCTR5)
|
|
+};
|
|
+#define PFM_P4_NUM_PMDS ARRAY_SIZE(pfm_p4_pmd_desc)
|
|
+
|
|
+/*
|
|
+ * Due to hotplug CPU support, threads may not necessarily
|
|
+ * be activated at the time the module is inserted. We need
|
|
+ * to check whether they could be activated by looking at
|
|
+ * the present CPU (present != online).
|
|
+ */
|
|
+static int pfm_p4_probe_pmu(void)
|
|
+{
|
|
+ unsigned int i;
|
|
+ int ht_enabled;
|
|
+
|
|
+ /*
|
|
+ * only works on Intel processors
|
|
+ */
|
|
+ if (current_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
|
|
+ PFM_INFO("not running on Intel processor");
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ if (current_cpu_data.x86 != 15) {
|
|
+ PFM_INFO("unsupported family=%d", current_cpu_data.x86);
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ switch (current_cpu_data.x86_model) {
|
|
+ case 0 ... 2:
|
|
+ break;
|
|
+ case 3 ... 6:
|
|
+ /*
|
|
+ * IQ_ESCR0, IQ_ESCR1 only present on model 1, 2
|
|
+ */
|
|
+ pfm_p4_pmc_desc[16].type = PFM_REG_NA;
|
|
+ pfm_p4_pmc_desc[48].type = PFM_REG_NA;
|
|
+ break;
|
|
+ default:
|
|
+ /*
|
|
+ * do not know if they all work the same, so reject
|
|
+ * for now
|
|
+ */
|
|
+ if (!force) {
|
|
+ PFM_INFO("unsupported model %d",
|
|
+ current_cpu_data.x86_model);
|
|
+ return -1;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * check for local APIC (required)
|
|
+ */
|
|
+ if (!cpu_has_apic) {
|
|
+ PFM_INFO("no local APIC, unsupported");
|
|
+ return -1;
|
|
+ }
|
|
+#ifdef CONFIG_SMP
|
|
+ ht_enabled = (cpus_weight(__get_cpu_var(cpu_core_map))
|
|
+ / current_cpu_data.x86_max_cores) > 1;
|
|
+#else
|
|
+ ht_enabled = 0;
|
|
+#endif
|
|
+ if (cpu_has_ht) {
|
|
+
|
|
+ PFM_INFO("HyperThreading supported, status %s",
|
|
+ ht_enabled ? "on": "off");
|
|
+ /*
|
|
+ * disable registers not supporting HT
|
|
+ */
|
|
+ if (ht_enabled) {
|
|
+ PFM_INFO("disabling half the registers for HT");
|
|
+ for (i = 0; i < PFM_P4_NUM_PMCS; i++) {
|
|
+ if (pmc_addrs[(i)].reg_type & PFM_REGT_NOHT)
|
|
+ pfm_p4_pmc_desc[i].type = PFM_REG_NA;
|
|
+ }
|
|
+ for (i = 0; i < PFM_P4_NUM_PMDS; i++) {
|
|
+ if (pmd_addrs[(i)].reg_type & PFM_REGT_NOHT)
|
|
+ pfm_p4_pmd_desc[i].type = PFM_REG_NA;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if (cpu_has_ds) {
|
|
+ PFM_INFO("Data Save Area (DS) supported");
|
|
+
|
|
+ if (cpu_has_pebs) {
|
|
+ /*
|
|
+ * PEBS does not work with HyperThreading enabled
|
|
+ */
|
|
+ if (ht_enabled)
|
|
+ PFM_INFO("PEBS supported, status off (because of HT)");
|
|
+ else
|
|
+ PFM_INFO("PEBS supported, status on");
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * build enable mask
|
|
+ */
|
|
+ for (i = 0; i < PFM_P4_NUM_PMCS; i++) {
|
|
+ if (pmc_addrs[(i)].reg_type & PFM_REGT_EN) {
|
|
+ __set_bit(i, cast_ulp(enable_mask));
|
|
+ max_enable = i + 1;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if (force_nmi)
|
|
+ pfm_p4_pmu_info.flags |= PFM_X86_FL_USE_NMI;
|
|
+ return 0;
|
|
+}
|
|
+static inline int get_smt_id(void)
|
|
+{
|
|
+#ifdef CONFIG_SMP
|
|
+ int cpu = smp_processor_id();
|
|
+ return (cpu != first_cpu(__get_cpu_var(cpu_sibling_map)));
|
|
+#else
|
|
+ return 0;
|
|
+#endif
|
|
+}
|
|
+
|
|
+static void __pfm_write_reg_p4(const struct pfm_p4_regmap *xreg, u64 val)
|
|
+{
|
|
+ u64 pmi;
|
|
+ int smt_id;
|
|
+
|
|
+ smt_id = get_smt_id();
|
|
+ /*
|
|
+ * HT is only supported by P4-style PMU
|
|
+ *
|
|
+ * Adjust for T1 if necessary:
|
|
+ *
|
|
+ * - move the T0_OS/T0_USR bits into T1 slots
|
|
+ * - move the OVF_PMI_T0 bits into T1 slot
|
|
+ *
|
|
+ * The P4/EM64T T1 is cleared by description table.
|
|
+ * User only works with T0.
|
|
+ */
|
|
+ if (smt_id) {
|
|
+ if (xreg->reg_type & PFM_REGT_ESCR) {
|
|
+
|
|
+ /* copy T0_USR & T0_OS to T1 */
|
|
+ val |= ((val & 0xc) >> 2);
|
|
+
|
|
+ /* clear bits T0_USR & T0_OS */
|
|
+ val &= ~0xc;
|
|
+
|
|
+ } else if (xreg->reg_type & PFM_REGT_CCCR) {
|
|
+ pmi = (val >> 26) & 0x1;
|
|
+ if (pmi) {
|
|
+ val &= ~(1UL<<26);
|
|
+ val |= 1UL<<27;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ if (xreg->addrs[smt_id])
|
|
+ wrmsrl(xreg->addrs[smt_id], val);
|
|
+}
|
|
+
|
|
+void __pfm_read_reg_p4(const struct pfm_p4_regmap *xreg, u64 *val)
|
|
+{
|
|
+ int smt_id;
|
|
+
|
|
+ smt_id = get_smt_id();
|
|
+
|
|
+ if (likely(xreg->addrs[smt_id])) {
|
|
+ rdmsrl(xreg->addrs[smt_id], *val);
|
|
+ /*
|
|
+ * HT is only supported by P4-style PMU
|
|
+ *
|
|
+ * move the Tx_OS and Tx_USR bits into
|
|
+ * T0 slots setting the T1 slots to zero
|
|
+ */
|
|
+ if (xreg->reg_type & PFM_REGT_ESCR) {
|
|
+ if (smt_id)
|
|
+ *val |= (((*val) & 0x3) << 2);
|
|
+
|
|
+ /*
|
|
+ * zero out bits that are reserved
|
|
+ * (including T1_OS and T1_USR)
|
|
+ */
|
|
+ *val &= PFM_ESCR_RSVD;
|
|
+ }
|
|
+ } else {
|
|
+ *val = 0;
|
|
+ }
|
|
+}
|
|
+static void pfm_p4_write_pmc(struct pfm_context *ctx, unsigned int cnum, u64 value)
|
|
+{
|
|
+ __pfm_write_reg_p4(&pmc_addrs[cnum], value);
|
|
+}
|
|
+
|
|
+static void pfm_p4_write_pmd(struct pfm_context *ctx, unsigned int cnum, u64 value)
|
|
+{
|
|
+ __pfm_write_reg_p4(&pmd_addrs[cnum], value);
|
|
+}
|
|
+
|
|
+static u64 pfm_p4_read_pmd(struct pfm_context *ctx, unsigned int cnum)
|
|
+{
|
|
+ u64 tmp;
|
|
+ __pfm_read_reg_p4(&pmd_addrs[cnum], &tmp);
|
|
+ return tmp;
|
|
+}
|
|
+
|
|
+static u64 pfm_p4_read_pmc(struct pfm_context *ctx, unsigned int cnum)
|
|
+{
|
|
+ u64 tmp;
|
|
+ __pfm_read_reg_p4(&pmc_addrs[cnum], &tmp);
|
|
+ return tmp;
|
|
+}
|
|
+
|
|
+struct pfm_ds_area_p4 {
|
|
+ unsigned long bts_buf_base;
|
|
+ unsigned long bts_index;
|
|
+ unsigned long bts_abs_max;
|
|
+ unsigned long bts_intr_thres;
|
|
+ unsigned long pebs_buf_base;
|
|
+ unsigned long pebs_index;
|
|
+ unsigned long pebs_abs_max;
|
|
+ unsigned long pebs_intr_thres;
|
|
+ u64 pebs_cnt_reset;
|
|
+};
|
|
+
|
|
+
|
|
+static int pfm_p4_stop_save(struct pfm_context *ctx, struct pfm_event_set *set)
|
|
+{
|
|
+ struct pfm_arch_pmu_info *pmu_info;
|
|
+ struct pfm_arch_context *ctx_arch;
|
|
+ struct pfm_ds_area_p4 *ds = NULL;
|
|
+ u64 used_mask[PFM_PMC_BV];
|
|
+ u16 i, j, count, pebs_idx = ~0;
|
|
+ u16 max_pmc;
|
|
+ u64 cccr, ctr1, ctr2, ovfl_mask;
|
|
+
|
|
+ pmu_info = &pfm_p4_pmu_info;
|
|
+ ctx_arch = pfm_ctx_arch(ctx);
|
|
+ max_pmc = ctx->regs.max_pmc;
|
|
+ ovfl_mask = pfm_pmu_conf->ovfl_mask;
|
|
+
|
|
+ /*
|
|
+ * build used enable PMC bitmask
|
|
+ * if user did not set any CCCR, then mask is
|
|
+ * empty and there is nothing to do because nothing
|
|
+ * was started
|
|
+ */
|
|
+ bitmap_and(cast_ulp(used_mask),
|
|
+ cast_ulp(set->used_pmcs),
|
|
+ cast_ulp(enable_mask),
|
|
+ max_enable);
|
|
+
|
|
+ count = bitmap_weight(cast_ulp(used_mask), max_enable);
|
|
+
|
|
+ PFM_DBG_ovfl("npend=%u ena_mask=0x%llx u_pmcs=0x%llx count=%u num=%u",
|
|
+ set->npend_ovfls,
|
|
+ (unsigned long long)enable_mask[0],
|
|
+ (unsigned long long)set->used_pmcs[0],
|
|
+ count, max_enable);
|
|
+
|
|
+ /*
|
|
+ * ensures we do not destroy pending overflow
|
|
+ * information. If pended interrupts are already
|
|
+ * known, then we just stop monitoring.
|
|
+ */
|
|
+ if (set->npend_ovfls) {
|
|
+ /*
|
|
+ * clear enable bit
|
|
+ * unfortunately, this is very expensive!
|
|
+ */
|
|
+ for (i = 0; count; i++) {
|
|
+ if (test_bit(i, cast_ulp(used_mask))) {
|
|
+ __pfm_write_reg_p4(pmc_addrs+i, 0);
|
|
+ count--;
|
|
+ }
|
|
+ }
|
|
+ /* need save PMDs at upper level */
|
|
+ return 1;
|
|
+ }
|
|
+
|
|
+ if (ctx_arch->flags.use_pebs) {
|
|
+ ds = ctx_arch->ds_area;
|
|
+ pebs_idx = PEBS_PMD;
|
|
+ PFM_DBG("ds=%p pebs_idx=0x%llx thres=0x%llx",
|
|
+ ds,
|
|
+ (unsigned long long)ds->pebs_index,
|
|
+ (unsigned long long)ds->pebs_intr_thres);
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * stop monitoring AND collect pending overflow information AND
|
|
+ * save pmds.
|
|
+ *
|
|
+ * We need to access the CCCR twice, once to get overflow info
|
|
+ * and a second to stop monitoring (which destroys the OVF flag)
|
|
+ * Similarly, we need to read the counter twice to check whether
|
|
+ * it did overflow between the CCR read and the CCCR write.
|
|
+ */
|
|
+ for (i = 0; count; i++) {
|
|
+ if (i != pebs_idx && test_bit(i, cast_ulp(used_mask))) {
|
|
+ /*
|
|
+ * controlled counter
|
|
+ */
|
|
+ j = pmc_addrs[i].ctr;
|
|
+
|
|
+ /* read CCCR (PMC) value */
|
|
+ __pfm_read_reg_p4(pmc_addrs+i, &cccr);
|
|
+
|
|
+ /* read counter (PMD) controlled by PMC */
|
|
+ __pfm_read_reg_p4(pmd_addrs+j, &ctr1);
|
|
+
|
|
+ /* clear CCCR value: stop counter but destroy OVF */
|
|
+ __pfm_write_reg_p4(pmc_addrs+i, 0);
|
|
+
|
|
+ /* read counter controlled by CCCR again */
|
|
+ __pfm_read_reg_p4(pmd_addrs+j, &ctr2);
|
|
+
|
|
+ /*
|
|
+ * there is an overflow if either:
|
|
+ * - CCCR.ovf is set (and we just cleared it)
|
|
+ * - ctr2 < ctr1
|
|
+ * in that case we set the bit corresponding to the
|
|
+ * overflowed PMD in povfl_pmds.
|
|
+ */
|
|
+ if ((cccr & (1ULL<<31)) || (ctr2 < ctr1)) {
|
|
+ __set_bit(j, cast_ulp(set->povfl_pmds));
|
|
+ set->npend_ovfls++;
|
|
+ }
|
|
+ ctr2 = (set->pmds[j].value & ~ovfl_mask) | (ctr2 & ovfl_mask);
|
|
+ set->pmds[j].value = ctr2;
|
|
+ count--;
|
|
+ }
|
|
+ }
|
|
+ /*
|
|
+ * check for PEBS buffer full and set the corresponding PMD overflow
|
|
+ */
|
|
+ if (ctx_arch->flags.use_pebs) {
|
|
+ PFM_DBG("ds=%p pebs_idx=0x%lx thres=0x%lx", ds, ds->pebs_index, ds->pebs_intr_thres);
|
|
+ if (ds->pebs_index >= ds->pebs_intr_thres
|
|
+ && test_bit(PEBS_PMD, cast_ulp(set->used_pmds))) {
|
|
+ __set_bit(PEBS_PMD, cast_ulp(set->povfl_pmds));
|
|
+ set->npend_ovfls++;
|
|
+ }
|
|
+ }
|
|
+ /* 0 means: no need to save the PMD at higher level */
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int pfm_p4_create_context(struct pfm_context *ctx, u32 ctx_flags)
|
|
+{
|
|
+ struct pfm_arch_context *ctx_arch;
|
|
+
|
|
+ ctx_arch = pfm_ctx_arch(ctx);
|
|
+
|
|
+ ctx_arch->data = kzalloc(sizeof(struct pfm_arch_p4_context), GFP_KERNEL);
|
|
+ if (!ctx_arch->data)
|
|
+ return -ENOMEM;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static void pfm_p4_free_context(struct pfm_context *ctx)
|
|
+{
|
|
+ struct pfm_arch_context *ctx_arch;
|
|
+
|
|
+ ctx_arch = pfm_ctx_arch(ctx);
|
|
+ /*
|
|
+ * we do not check if P4, because it would be NULL and
|
|
+ * kfree can deal with NULL
|
|
+ */
|
|
+ kfree(ctx_arch->data);
|
|
+}
|
|
+
|
|
+/*
|
|
+ * detect is counters have overflowed.
|
|
+ * return:
|
|
+ * 0 : no overflow
|
|
+ * 1 : at least one overflow
|
|
+ *
|
|
+ * used by Intel P4
|
|
+ */
|
|
+static int __kprobes pfm_p4_has_ovfls(struct pfm_context *ctx)
|
|
+{
|
|
+ struct pfm_arch_pmu_info *pmu_info;
|
|
+ struct pfm_p4_regmap *xrc, *xrd;
|
|
+ struct pfm_arch_context *ctx_arch;
|
|
+ struct pfm_arch_p4_context *p4;
|
|
+ u64 ena_mask[PFM_PMC_BV];
|
|
+ u64 cccr, ctr1, ctr2;
|
|
+ int n, i, j;
|
|
+
|
|
+ pmu_info = &pfm_p4_pmu_info;
|
|
+
|
|
+ ctx_arch = pfm_ctx_arch(ctx);
|
|
+ xrc = pmc_addrs;
|
|
+ xrd = pmd_addrs;
|
|
+ p4 = ctx_arch->data;
|
|
+
|
|
+ bitmap_and(cast_ulp(ena_mask),
|
|
+ cast_ulp(ctx->regs.pmcs),
|
|
+ cast_ulp(enable_mask),
|
|
+ max_enable);
|
|
+
|
|
+ n = bitmap_weight(cast_ulp(ena_mask), max_enable);
|
|
+
|
|
+ for (i = 0; n; i++) {
|
|
+ if (!test_bit(i, cast_ulp(ena_mask)))
|
|
+ continue;
|
|
+ /*
|
|
+ * controlled counter
|
|
+ */
|
|
+ j = xrc[i].ctr;
|
|
+
|
|
+ /* read CCCR (PMC) value */
|
|
+ __pfm_read_reg_p4(xrc+i, &cccr);
|
|
+
|
|
+ /* read counter (PMD) controlled by PMC */
|
|
+ __pfm_read_reg_p4(xrd+j, &ctr1);
|
|
+
|
|
+ /* clear CCCR value: stop counter but destroy OVF */
|
|
+ __pfm_write_reg_p4(xrc+i, 0);
|
|
+
|
|
+ /* read counter controlled by CCCR again */
|
|
+ __pfm_read_reg_p4(xrd+j, &ctr2);
|
|
+
|
|
+ /*
|
|
+ * there is an overflow if either:
|
|
+ * - CCCR.ovf is set (and we just cleared it)
|
|
+ * - ctr2 < ctr1
|
|
+ * in that case we set the bit corresponding to the
|
|
+ * overflowed PMD in povfl_pmds.
|
|
+ */
|
|
+ if ((cccr & (1ULL<<31)) || (ctr2 < ctr1)) {
|
|
+ __set_bit(j, cast_ulp(p4->povfl_pmds));
|
|
+ p4->npend_ovfls++;
|
|
+ }
|
|
+ p4->saved_cccrs[i] = cccr;
|
|
+ n--;
|
|
+ }
|
|
+ /*
|
|
+ * if there was no overflow, then it means the NMI was not really
|
|
+ * for us, so we have to resume monitoring
|
|
+ */
|
|
+ if (unlikely(!p4->npend_ovfls)) {
|
|
+ for (i = 0; n; i++) {
|
|
+ if (!test_bit(i, cast_ulp(ena_mask)))
|
|
+ continue;
|
|
+ __pfm_write_reg_p4(xrc+i, p4->saved_cccrs[i]);
|
|
+ }
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+void pfm_p4_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set)
|
|
+{
|
|
+ struct pfm_arch_pmu_info *pmu_info;
|
|
+ struct pfm_arch_context *ctx_arch;
|
|
+ u64 *mask;
|
|
+ u16 i, num;
|
|
+
|
|
+ ctx_arch = pfm_ctx_arch(ctx);
|
|
+ pmu_info = pfm_pmu_info();
|
|
+
|
|
+ /*
|
|
+ * must restore DS pointer before restoring PMCs
|
|
+ * as this can potentially reactivate monitoring
|
|
+ */
|
|
+ if (ctx_arch->flags.use_ds)
|
|
+ wrmsrl(MSR_IA32_DS_AREA, (unsigned long)ctx_arch->ds_area);
|
|
+
|
|
+ /*
|
|
+ * must restore everything because there are some dependencies
|
|
+ * (e.g., ESCR and CCCR)
|
|
+ */
|
|
+ num = ctx->regs.num_pmcs;
|
|
+ mask = ctx->regs.pmcs;
|
|
+ for (i = 0; num; i++) {
|
|
+ if (test_bit(i, cast_ulp(mask))) {
|
|
+ pfm_arch_write_pmc(ctx, i, set->pmcs[i]);
|
|
+ num--;
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+/*
|
|
+ * invoked only when NMI is used. Called from the LOCAL_PERFMON_VECTOR
|
|
+ * handler to copy P4 overflow state captured when the NMI triggered.
|
|
+ * Given that on P4, stopping monitoring destroy the overflow information
|
|
+ * we save it in pfm_has_ovfl_p4() where monitoring is also stopped.
|
|
+ *
|
|
+ * Here we propagate the overflow state to current active set. The
|
|
+ * freeze_pmu() call we not overwrite this state because npend_ovfls
|
|
+ * is non-zero.
|
|
+ */
|
|
+static void pfm_p4_nmi_copy_state(struct pfm_context *ctx)
|
|
+{
|
|
+ struct pfm_arch_context *ctx_arch;
|
|
+ struct pfm_event_set *set;
|
|
+ struct pfm_arch_p4_context *p4;
|
|
+
|
|
+ ctx_arch = pfm_ctx_arch(ctx);
|
|
+ p4 = ctx_arch->data;
|
|
+ set = ctx->active_set;
|
|
+
|
|
+ if (p4->npend_ovfls) {
|
|
+ set->npend_ovfls = p4->npend_ovfls;
|
|
+
|
|
+ bitmap_copy(cast_ulp(set->povfl_pmds),
|
|
+ cast_ulp(p4->povfl_pmds),
|
|
+ ctx->regs.max_pmd);
|
|
+
|
|
+ p4->npend_ovfls = 0;
|
|
+ }
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_p4_quiesce - stop monitoring without grabbing any lock
|
|
+ *
|
|
+ * called from NMI interrupt handler to immediately stop monitoring
|
|
+ * cannot grab any lock, including perfmon related locks
|
|
+ */
|
|
+static void __kprobes pfm_p4_quiesce(void)
|
|
+{
|
|
+ u16 i;
|
|
+ /*
|
|
+ * quiesce PMU by clearing available registers that have
|
|
+ * the start/stop capability
|
|
+ */
|
|
+ for (i = 0; i < pfm_pmu_conf->regs_all.max_pmc; i++) {
|
|
+ if (test_bit(i, cast_ulp(pfm_pmu_conf->regs_all.pmcs))
|
|
+ && test_bit(i, cast_ulp(enable_mask)))
|
|
+ __pfm_write_reg_p4(pmc_addrs+i, 0);
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+static struct pfm_pmu_config pfm_p4_pmu_conf = {
|
|
+ .pmu_name = "Intel P4",
|
|
+ .counter_width = 40,
|
|
+ .pmd_desc = pfm_p4_pmd_desc,
|
|
+ .pmc_desc = pfm_p4_pmc_desc,
|
|
+ .num_pmc_entries = PFM_P4_NUM_PMCS,
|
|
+ .num_pmd_entries = PFM_P4_NUM_PMDS,
|
|
+ .probe_pmu = pfm_p4_probe_pmu,
|
|
+ .version = "1.0",
|
|
+ .flags = PFM_PMU_BUILTIN_FLAG,
|
|
+ .owner = THIS_MODULE,
|
|
+ .pmu_info = &pfm_p4_pmu_info
|
|
+};
|
|
+
|
|
+static int __init pfm_p4_pmu_init_module(void)
|
|
+{
|
|
+ return pfm_pmu_register(&pfm_p4_pmu_conf);
|
|
+}
|
|
+
|
|
+static void __exit pfm_p4_pmu_cleanup_module(void)
|
|
+{
|
|
+ pfm_pmu_unregister(&pfm_p4_pmu_conf);
|
|
+}
|
|
+
|
|
+module_init(pfm_p4_pmu_init_module);
|
|
+module_exit(pfm_p4_pmu_cleanup_module);
|
|
diff --git a/arch/x86/perfmon/perfmon_p6.c b/arch/x86/perfmon/perfmon_p6.c
|
|
new file mode 100644
|
|
index 0000000..47c0a46
|
|
--- /dev/null
|
|
+++ b/arch/x86/perfmon/perfmon_p6.c
|
|
@@ -0,0 +1,310 @@
|
|
+/*
|
|
+ * This file contains the P6 family processor PMU register description tables
|
|
+ *
|
|
+ * This module supports original P6 processors
|
|
+ * (Pentium II, Pentium Pro, Pentium III) and Pentium M.
|
|
+ *
|
|
+ * Copyright (c) 2005-2007 Hewlett-Packard Development Company, L.P.
|
|
+ * Contributed by Stephane Eranian <eranian@hpl.hp.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of version 2 of the GNU General Public
|
|
+ * License as published by the Free Software Foundation.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ * General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
+ * 02111-1307 USA
|
|
+ */
|
|
+#include <linux/module.h>
|
|
+#include <linux/kprobes.h>
|
|
+#include <linux/perfmon_kern.h>
|
|
+#include <linux/nmi.h>
|
|
+#include <asm/msr.h>
|
|
+
|
|
+MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>");
|
|
+MODULE_DESCRIPTION("P6 PMU description table");
|
|
+MODULE_LICENSE("GPL");
|
|
+
|
|
+static int force_nmi;
|
|
+MODULE_PARM_DESC(force_nmi, "bool: force use of NMI for PMU interrupt");
|
|
+module_param(force_nmi, bool, 0600);
|
|
+
|
|
+/*
|
|
+ * - upper 32 bits are reserved
|
|
+ * - INT: APIC enable bit is reserved (forced to 1)
|
|
+ * - bit 21 is reserved
|
|
+ * - bit 22 is reserved on PEREVNTSEL1
|
|
+ *
|
|
+ * RSVD: reserved bits are 1
|
|
+ */
|
|
+#define PFM_P6_PMC0_RSVD ((~((1ULL<<32)-1)) | (1ULL<<20) | (1ULL<<21))
|
|
+#define PFM_P6_PMC1_RSVD ((~((1ULL<<32)-1)) | (1ULL<<20) | (3ULL<<21))
|
|
+
|
|
+/*
|
|
+ * force Local APIC interrupt on overflow
|
|
+ * disable with NO_EMUL64
|
|
+ */
|
|
+#define PFM_P6_PMC_VAL (1ULL<<20)
|
|
+#define PFM_P6_NO64 (1ULL<<20)
|
|
+
|
|
+
|
|
+static void __kprobes pfm_p6_quiesce(void);
|
|
+static int pfm_p6_has_ovfls(struct pfm_context *ctx);
|
|
+static int pfm_p6_stop_save(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set);
|
|
+
|
|
+static u64 enable_mask[PFM_MAX_PMCS];
|
|
+static u16 max_enable;
|
|
+
|
|
+/*
|
|
+ * PFM_X86_FL_NO_SHARING: because of the single enable bit on MSR_P6_EVNTSEL0
|
|
+ * the PMU cannot be shared with NMI watchdog or Oprofile
|
|
+ */
|
|
+struct pfm_arch_pmu_info pfm_p6_pmu_info = {
|
|
+ .stop_save = pfm_p6_stop_save,
|
|
+ .has_ovfls = pfm_p6_has_ovfls,
|
|
+ .quiesce = pfm_p6_quiesce,
|
|
+ .flags = PFM_X86_FL_NO_SHARING,
|
|
+};
|
|
+
|
|
+static struct pfm_regmap_desc pfm_p6_pmc_desc[] = {
|
|
+/* pmc0 */ PMC_D(PFM_REG_I64, "PERFEVTSEL0", PFM_P6_PMC_VAL, PFM_P6_PMC0_RSVD, PFM_P6_NO64, MSR_P6_EVNTSEL0),
|
|
+/* pmc1 */ PMC_D(PFM_REG_I64, "PERFEVTSEL1", PFM_P6_PMC_VAL, PFM_P6_PMC1_RSVD, PFM_P6_NO64, MSR_P6_EVNTSEL1)
|
|
+};
|
|
+#define PFM_P6_NUM_PMCS ARRAY_SIZE(pfm_p6_pmc_desc)
|
|
+
|
|
+#define PFM_P6_D(n) \
|
|
+ { .type = PFM_REG_C, \
|
|
+ .desc = "PERFCTR"#n, \
|
|
+ .hw_addr = MSR_P6_PERFCTR0+n, \
|
|
+ .rsvd_msk = 0, \
|
|
+ .dep_pmcs[0] = 1ULL << n \
|
|
+ }
|
|
+
|
|
+static struct pfm_regmap_desc pfm_p6_pmd_desc[] = {
|
|
+/* pmd0 */ PFM_P6_D(0),
|
|
+/* pmd1 */ PFM_P6_D(1)
|
|
+};
|
|
+#define PFM_P6_NUM_PMDS ARRAY_SIZE(pfm_p6_pmd_desc)
|
|
+
|
|
+static int pfm_p6_probe_pmu(void)
|
|
+{
|
|
+ int high, low;
|
|
+
|
|
+ if (current_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
|
|
+ PFM_INFO("not an Intel processor");
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * check for P6 processor family
|
|
+ */
|
|
+ if (current_cpu_data.x86 != 6) {
|
|
+ PFM_INFO("unsupported family=%d", current_cpu_data.x86);
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ switch (current_cpu_data.x86_model) {
|
|
+ case 1: /* Pentium Pro */
|
|
+ case 3:
|
|
+ case 5: /* Pentium II Deschutes */
|
|
+ case 7 ... 11:
|
|
+ break;
|
|
+ case 13:
|
|
+ /* for Pentium M, we need to check if PMU exist */
|
|
+ rdmsr(MSR_IA32_MISC_ENABLE, low, high);
|
|
+ if (low & (1U << 7))
|
|
+ break;
|
|
+ default:
|
|
+ PFM_INFO("unsupported CPU model %d",
|
|
+ current_cpu_data.x86_model);
|
|
+ return -1;
|
|
+
|
|
+ }
|
|
+
|
|
+ if (!cpu_has_apic) {
|
|
+ PFM_INFO("no Local APIC, try rebooting with lapic");
|
|
+ return -1;
|
|
+ }
|
|
+ __set_bit(0, cast_ulp(enable_mask));
|
|
+ __set_bit(1, cast_ulp(enable_mask));
|
|
+ max_enable = 1 + 1;
|
|
+ /*
|
|
+ * force NMI interrupt?
|
|
+ */
|
|
+ if (force_nmi)
|
|
+ pfm_p6_pmu_info.flags |= PFM_X86_FL_USE_NMI;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_p6_has_ovfls - check for pending overflow condition
|
|
+ * @ctx: context to work on
|
|
+ *
|
|
+ * detect if counters have overflowed.
|
|
+ * return:
|
|
+ * 0 : no overflow
|
|
+ * 1 : at least one overflow
|
|
+ */
|
|
+static int __kprobes pfm_p6_has_ovfls(struct pfm_context *ctx)
|
|
+{
|
|
+ u64 *cnt_mask;
|
|
+ u64 wmask, val;
|
|
+ u16 i, num;
|
|
+
|
|
+ cnt_mask = ctx->regs.cnt_pmds;
|
|
+ num = ctx->regs.num_counters;
|
|
+ wmask = 1ULL << pfm_pmu_conf->counter_width;
|
|
+
|
|
+ /*
|
|
+ * we can leverage the fact that we know the mapping
|
|
+ * to hardcode the MSR address and avoid accessing
|
|
+ * more cachelines
|
|
+ *
|
|
+ * We need to check cnt_mask because not all registers
|
|
+ * may be available.
|
|
+ */
|
|
+ for (i = 0; num; i++) {
|
|
+ if (test_bit(i, cast_ulp(cnt_mask))) {
|
|
+ rdmsrl(MSR_P6_PERFCTR0+i, val);
|
|
+ if (!(val & wmask))
|
|
+ return 1;
|
|
+ num--;
|
|
+ }
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_p6_stop_save -- stop monitoring and save PMD values
|
|
+ * @ctx: context to work on
|
|
+ * @set: current event set
|
|
+ *
|
|
+ * return value:
|
|
+ * 0 - no need to save PMDs in caller
|
|
+ * 1 - need to save PMDs in caller
|
|
+ */
|
|
+static int pfm_p6_stop_save(struct pfm_context *ctx, struct pfm_event_set *set)
|
|
+{
|
|
+ struct pfm_arch_pmu_info *pmu_info;
|
|
+ u64 used_mask[PFM_PMC_BV];
|
|
+ u64 *cnt_pmds;
|
|
+ u64 val, wmask, ovfl_mask;
|
|
+ u32 i, count;
|
|
+
|
|
+ pmu_info = pfm_pmu_info();
|
|
+
|
|
+ wmask = 1ULL << pfm_pmu_conf->counter_width;
|
|
+ bitmap_and(cast_ulp(used_mask),
|
|
+ cast_ulp(set->used_pmcs),
|
|
+ cast_ulp(enable_mask),
|
|
+ max_enable);
|
|
+
|
|
+ count = bitmap_weight(cast_ulp(used_mask), ctx->regs.max_pmc);
|
|
+
|
|
+ /*
|
|
+ * stop monitoring
|
|
+ * Unfortunately, this is very expensive!
|
|
+ * wrmsrl() is serializing.
|
|
+ */
|
|
+ for (i = 0; count; i++) {
|
|
+ if (test_bit(i, cast_ulp(used_mask))) {
|
|
+ wrmsrl(MSR_P6_EVNTSEL0+i, 0);
|
|
+ count--;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * if we already having a pending overflow condition, we simply
|
|
+ * return to take care of this first.
|
|
+ */
|
|
+ if (set->npend_ovfls)
|
|
+ return 1;
|
|
+
|
|
+ ovfl_mask = pfm_pmu_conf->ovfl_mask;
|
|
+ cnt_pmds = ctx->regs.cnt_pmds;
|
|
+
|
|
+ /*
|
|
+ * check for pending overflows and save PMDs (combo)
|
|
+ * we employ used_pmds because we also need to save
|
|
+ * and not just check for pending interrupts.
|
|
+ *
|
|
+ * Must check for counting PMDs because of virtual PMDs
|
|
+ */
|
|
+ count = set->nused_pmds;
|
|
+ for (i = 0; count; i++) {
|
|
+ if (test_bit(i, cast_ulp(set->used_pmds))) {
|
|
+ val = pfm_arch_read_pmd(ctx, i);
|
|
+ if (likely(test_bit(i, cast_ulp(cnt_pmds)))) {
|
|
+ if (!(val & wmask)) {
|
|
+ __set_bit(i, cast_ulp(set->povfl_pmds));
|
|
+ set->npend_ovfls++;
|
|
+ }
|
|
+ val = (set->pmds[i].value & ~ovfl_mask) | (val & ovfl_mask);
|
|
+ }
|
|
+ set->pmds[i].value = val;
|
|
+ count--;
|
|
+ }
|
|
+ }
|
|
+ /* 0 means: no need to save PMDs at upper level */
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_p6_quiesce_pmu -- stop monitoring without grabbing any lock
|
|
+ *
|
|
+ * called from NMI interrupt handler to immediately stop monitoring
|
|
+ * cannot grab any lock, including perfmon related locks
|
|
+ */
|
|
+static void __kprobes pfm_p6_quiesce(void)
|
|
+{
|
|
+ /*
|
|
+ * quiesce PMU by clearing available registers that have
|
|
+ * the start/stop capability
|
|
+ *
|
|
+ * P6 processors only have enable bit on PERFEVTSEL0
|
|
+ */
|
|
+ if (test_bit(0, cast_ulp(pfm_pmu_conf->regs_all.pmcs)))
|
|
+ wrmsrl(MSR_P6_EVNTSEL0, 0);
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Counters have 40 bits implemented. However they are designed such
|
|
+ * that bits [32-39] are sign extensions of bit 31. As such the
|
|
+ * effective width of a counter for P6-like PMU is 31 bits only.
|
|
+ *
|
|
+ * See IA-32 Intel Architecture Software developer manual Vol 3B
|
|
+ */
|
|
+static struct pfm_pmu_config pfm_p6_pmu_conf = {
|
|
+ .pmu_name = "Intel P6 processor Family",
|
|
+ .counter_width = 31,
|
|
+ .pmd_desc = pfm_p6_pmd_desc,
|
|
+ .pmc_desc = pfm_p6_pmc_desc,
|
|
+ .num_pmc_entries = PFM_P6_NUM_PMCS,
|
|
+ .num_pmd_entries = PFM_P6_NUM_PMDS,
|
|
+ .probe_pmu = pfm_p6_probe_pmu,
|
|
+ .version = "1.0",
|
|
+ .flags = PFM_PMU_BUILTIN_FLAG,
|
|
+ .owner = THIS_MODULE,
|
|
+ .pmu_info = &pfm_p6_pmu_info
|
|
+};
|
|
+
|
|
+static int __init pfm_p6_pmu_init_module(void)
|
|
+{
|
|
+ return pfm_pmu_register(&pfm_p6_pmu_conf);
|
|
+}
|
|
+
|
|
+static void __exit pfm_p6_pmu_cleanup_module(void)
|
|
+{
|
|
+ pfm_pmu_unregister(&pfm_p6_pmu_conf);
|
|
+}
|
|
+
|
|
+module_init(pfm_p6_pmu_init_module);
|
|
+module_exit(pfm_p6_pmu_cleanup_module);
|
|
diff --git a/arch/x86/perfmon/perfmon_pebs_core_smpl.c b/arch/x86/perfmon/perfmon_pebs_core_smpl.c
|
|
new file mode 100644
|
|
index 0000000..eeb9174
|
|
--- /dev/null
|
|
+++ b/arch/x86/perfmon/perfmon_pebs_core_smpl.c
|
|
@@ -0,0 +1,256 @@
|
|
+/*
|
|
+ * Copyright (c) 2005-2007 Hewlett-Packard Development Company, L.P.
|
|
+ * Contributed by Stephane Eranian <eranian@hpl.hp.com>
|
|
+ *
|
|
+ * This file implements the Precise Event Based Sampling (PEBS)
|
|
+ * sampling format for Intel Core and Atom processors.
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of version 2 of the GNU General Public
|
|
+ * License as published by the Free Software Foundation.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ * General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
+ * 02111-1307 USA
|
|
+ */
|
|
+#include <linux/kernel.h>
|
|
+#include <linux/types.h>
|
|
+#include <linux/module.h>
|
|
+#include <linux/init.h>
|
|
+#include <linux/smp.h>
|
|
+#include <linux/perfmon_kern.h>
|
|
+
|
|
+#include <asm/msr.h>
|
|
+#include <asm/perfmon_pebs_core_smpl.h>
|
|
+
|
|
+MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>");
|
|
+MODULE_DESCRIPTION("Intel Core Precise Event-Based Sampling (PEBS)");
|
|
+MODULE_LICENSE("GPL");
|
|
+
|
|
+#define ALIGN_PEBS(a, order) \
|
|
+ ((a)+(1UL<<(order))-1) & ~((1UL<<(order))-1)
|
|
+
|
|
+#define PEBS_PADDING_ORDER 8 /* log2(256) padding for PEBS alignment constraint */
|
|
+
|
|
+static int pfm_pebs_core_fmt_validate(u32 flags, u16 npmds, void *data)
|
|
+{
|
|
+ struct pfm_pebs_core_smpl_arg *arg = data;
|
|
+ size_t min_buf_size;
|
|
+
|
|
+ /*
|
|
+ * need to define at least the size of the buffer
|
|
+ */
|
|
+ if (data == NULL) {
|
|
+ PFM_DBG("no argument passed");
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * compute min buf size. npmds is the maximum number
|
|
+ * of implemented PMD registers.
|
|
+ */
|
|
+ min_buf_size = sizeof(struct pfm_pebs_core_smpl_hdr)
|
|
+ + sizeof(struct pfm_pebs_core_smpl_entry)
|
|
+ + (1UL<<PEBS_PADDING_ORDER); /* padding for alignment */
|
|
+
|
|
+ PFM_DBG("validate flags=0x%x min_buf_size=%zu buf_size=%zu",
|
|
+ flags,
|
|
+ min_buf_size,
|
|
+ arg->buf_size);
|
|
+
|
|
+ /*
|
|
+ * must hold at least the buffer header + one minimally sized entry
|
|
+ */
|
|
+ if (arg->buf_size < min_buf_size)
|
|
+ return -EINVAL;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int pfm_pebs_core_fmt_get_size(unsigned int flags, void *data, size_t *size)
|
|
+{
|
|
+ struct pfm_pebs_core_smpl_arg *arg = data;
|
|
+
|
|
+ /*
|
|
+ * size has been validated in pfm_pebs_core_fmt_validate()
|
|
+ */
|
|
+ *size = arg->buf_size + (1UL<<PEBS_PADDING_ORDER);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int pfm_pebs_core_fmt_init(struct pfm_context *ctx, void *buf,
|
|
+ u32 flags, u16 npmds, void *data)
|
|
+{
|
|
+ struct pfm_arch_context *ctx_arch;
|
|
+ struct pfm_pebs_core_smpl_hdr *hdr;
|
|
+ struct pfm_pebs_core_smpl_arg *arg = data;
|
|
+ u64 pebs_start, pebs_end;
|
|
+ struct pfm_ds_area_core *ds;
|
|
+
|
|
+ ctx_arch = pfm_ctx_arch(ctx);
|
|
+
|
|
+ hdr = buf;
|
|
+ ds = &hdr->ds;
|
|
+
|
|
+ /*
|
|
+ * align PEBS buffer base
|
|
+ */
|
|
+ pebs_start = ALIGN_PEBS((unsigned long)(hdr+1), PEBS_PADDING_ORDER);
|
|
+ pebs_end = pebs_start + arg->buf_size + 1;
|
|
+
|
|
+ hdr->version = PFM_PEBS_CORE_SMPL_VERSION;
|
|
+ hdr->buf_size = arg->buf_size;
|
|
+ hdr->overflows = 0;
|
|
+
|
|
+ /*
|
|
+ * express PEBS buffer base as offset from the end of the header
|
|
+ */
|
|
+ hdr->start_offs = pebs_start - (unsigned long)(hdr+1);
|
|
+
|
|
+ /*
|
|
+ * PEBS buffer boundaries
|
|
+ */
|
|
+ ds->pebs_buf_base = pebs_start;
|
|
+ ds->pebs_abs_max = pebs_end;
|
|
+
|
|
+ /*
|
|
+ * PEBS starting position
|
|
+ */
|
|
+ ds->pebs_index = pebs_start;
|
|
+
|
|
+ /*
|
|
+ * PEBS interrupt threshold
|
|
+ */
|
|
+ ds->pebs_intr_thres = pebs_start
|
|
+ + arg->intr_thres
|
|
+ * sizeof(struct pfm_pebs_core_smpl_entry);
|
|
+
|
|
+ /*
|
|
+ * save counter reset value for PEBS counter
|
|
+ */
|
|
+ ds->pebs_cnt_reset = arg->cnt_reset;
|
|
+
|
|
+ /*
|
|
+ * keep track of DS AREA
|
|
+ */
|
|
+ ctx_arch->ds_area = ds;
|
|
+ ctx_arch->flags.use_ds = 1;
|
|
+ ctx_arch->flags.use_pebs = 1;
|
|
+
|
|
+ PFM_DBG("buffer=%p buf_size=%llu offs=%llu pebs_start=0x%llx "
|
|
+ "pebs_end=0x%llx ds=%p pebs_thres=0x%llx cnt_reset=0x%llx",
|
|
+ buf,
|
|
+ (unsigned long long)hdr->buf_size,
|
|
+ (unsigned long long)hdr->start_offs,
|
|
+ (unsigned long long)pebs_start,
|
|
+ (unsigned long long)pebs_end,
|
|
+ ds,
|
|
+ (unsigned long long)ds->pebs_intr_thres,
|
|
+ (unsigned long long)ds->pebs_cnt_reset);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int pfm_pebs_core_fmt_handler(struct pfm_context *ctx,
|
|
+ unsigned long ip, u64 tstamp, void *data)
|
|
+{
|
|
+ struct pfm_pebs_core_smpl_hdr *hdr;
|
|
+ struct pfm_ovfl_arg *arg;
|
|
+
|
|
+ hdr = ctx->smpl_addr;
|
|
+ arg = &ctx->ovfl_arg;
|
|
+
|
|
+ PFM_DBG_ovfl("buffer full");
|
|
+ /*
|
|
+ * increment number of buffer overflows.
|
|
+ * important to detect duplicate set of samples.
|
|
+ */
|
|
+ hdr->overflows++;
|
|
+
|
|
+ /*
|
|
+ * request notification and masking of monitoring.
|
|
+ * Notification is still subject to the overflowed
|
|
+ * register having the FL_NOTIFY flag set.
|
|
+ */
|
|
+ arg->ovfl_ctrl = PFM_OVFL_CTRL_NOTIFY | PFM_OVFL_CTRL_MASK;
|
|
+
|
|
+ return -ENOBUFS; /* we are full, sorry */
|
|
+}
|
|
+
|
|
+static int pfm_pebs_core_fmt_restart(int is_active, u32 *ovfl_ctrl,
|
|
+ void *buf)
|
|
+{
|
|
+ struct pfm_pebs_core_smpl_hdr *hdr = buf;
|
|
+
|
|
+ /*
|
|
+ * reset index to base of buffer
|
|
+ */
|
|
+ hdr->ds.pebs_index = hdr->ds.pebs_buf_base;
|
|
+
|
|
+ *ovfl_ctrl = PFM_OVFL_CTRL_RESET;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int pfm_pebs_core_fmt_exit(void *buf)
|
|
+{
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static struct pfm_smpl_fmt pebs_core_fmt = {
|
|
+ .fmt_name = PFM_PEBS_CORE_SMPL_NAME,
|
|
+ .fmt_version = 0x1,
|
|
+ .fmt_arg_size = sizeof(struct pfm_pebs_core_smpl_arg),
|
|
+ .fmt_validate = pfm_pebs_core_fmt_validate,
|
|
+ .fmt_getsize = pfm_pebs_core_fmt_get_size,
|
|
+ .fmt_init = pfm_pebs_core_fmt_init,
|
|
+ .fmt_handler = pfm_pebs_core_fmt_handler,
|
|
+ .fmt_restart = pfm_pebs_core_fmt_restart,
|
|
+ .fmt_exit = pfm_pebs_core_fmt_exit,
|
|
+ .fmt_flags = PFM_FMT_BUILTIN_FLAG,
|
|
+ .owner = THIS_MODULE,
|
|
+};
|
|
+
|
|
+static int __init pfm_pebs_core_fmt_init_module(void)
|
|
+{
|
|
+ if (!cpu_has_pebs) {
|
|
+ PFM_INFO("processor does not have PEBS support");
|
|
+ return -1;
|
|
+ }
|
|
+ /*
|
|
+ * cpu_has_pebs is not enough to identify Intel Core PEBS
|
|
+ * which is different fro Pentium 4 PEBS. Therefore we do
|
|
+ * a more detailed check here
|
|
+ */
|
|
+ if (current_cpu_data.x86 != 6) {
|
|
+ PFM_INFO("not a supported Intel processor");
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ switch (current_cpu_data.x86_model) {
|
|
+ case 15: /* Merom */
|
|
+ case 23: /* Penryn */
|
|
+ case 28: /* Atom (Silverthorne) */
|
|
+ case 29: /* Dunnington */
|
|
+ break;
|
|
+ default:
|
|
+ PFM_INFO("not a supported Intel processor");
|
|
+ return -1;
|
|
+ }
|
|
+ return pfm_fmt_register(&pebs_core_fmt);
|
|
+}
|
|
+
|
|
+static void __exit pfm_pebs_core_fmt_cleanup_module(void)
|
|
+{
|
|
+ pfm_fmt_unregister(&pebs_core_fmt);
|
|
+}
|
|
+
|
|
+module_init(pfm_pebs_core_fmt_init_module);
|
|
+module_exit(pfm_pebs_core_fmt_cleanup_module);
|
|
diff --git a/arch/x86/perfmon/perfmon_pebs_p4_smpl.c b/arch/x86/perfmon/perfmon_pebs_p4_smpl.c
|
|
new file mode 100644
|
|
index 0000000..f4e9fd2
|
|
--- /dev/null
|
|
+++ b/arch/x86/perfmon/perfmon_pebs_p4_smpl.c
|
|
@@ -0,0 +1,253 @@
|
|
+/*
|
|
+ * Copyright (c) 2005-2007 Hewlett-Packard Development Company, L.P.
|
|
+ * Contributed by Stephane Eranian <eranian@hpl.hp.com>
|
|
+ *
|
|
+ * This file implements the Precise Event Based Sampling (PEBS)
|
|
+ * sampling format. It supports the following processors:
|
|
+ * - 32-bit Pentium 4 or other Netburst-based processors
|
|
+ * - 64-bit Pentium 4 or other Netburst-based processors
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of version 2 of the GNU General Public
|
|
+ * License as published by the Free Software Foundation.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ * General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
+ * 02111-1307 USA
|
|
+ */
|
|
+#include <linux/kernel.h>
|
|
+#include <linux/types.h>
|
|
+#include <linux/module.h>
|
|
+#include <linux/init.h>
|
|
+#include <linux/smp.h>
|
|
+#include <linux/perfmon_kern.h>
|
|
+
|
|
+#include <asm/msr.h>
|
|
+#include <asm/perfmon_pebs_p4_smpl.h>
|
|
+
|
|
+MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>");
|
|
+MODULE_DESCRIPTION("Intel P4 Precise Event-Based Sampling (PEBS)");
|
|
+MODULE_LICENSE("GPL");
|
|
+
|
|
+#define ALIGN_PEBS(a, order) \
|
|
+ ((a)+(1UL<<(order))-1) & ~((1UL<<(order))-1)
|
|
+
|
|
+#define PEBS_PADDING_ORDER 8 /* log2(256) padding for PEBS alignment constraint */
|
|
+
|
|
+static int pfm_pebs_p4_fmt_validate(u32 flags, u16 npmds, void *data)
|
|
+{
|
|
+ struct pfm_pebs_p4_smpl_arg *arg = data;
|
|
+ size_t min_buf_size;
|
|
+
|
|
+ /*
|
|
+ * need to define at least the size of the buffer
|
|
+ */
|
|
+ if (data == NULL) {
|
|
+ PFM_DBG("no argument passed");
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * compute min buf size. npmds is the maximum number
|
|
+ * of implemented PMD registers.
|
|
+ */
|
|
+ min_buf_size = sizeof(struct pfm_pebs_p4_smpl_hdr)
|
|
+ + sizeof(struct pfm_pebs_p4_smpl_entry)
|
|
+ + (1UL<<PEBS_PADDING_ORDER); /* padding for alignment */
|
|
+
|
|
+ PFM_DBG("validate flags=0x%x min_buf_size=%zu buf_size=%zu",
|
|
+ flags,
|
|
+ min_buf_size,
|
|
+ arg->buf_size);
|
|
+
|
|
+ /*
|
|
+ * must hold at least the buffer header + one minimally sized entry
|
|
+ */
|
|
+ if (arg->buf_size < min_buf_size)
|
|
+ return -EINVAL;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int pfm_pebs_p4_fmt_get_size(unsigned int flags, void *data, size_t *size)
|
|
+{
|
|
+ struct pfm_pebs_p4_smpl_arg *arg = data;
|
|
+
|
|
+ /*
|
|
+ * size has been validated in pfm_pebs_p4_fmt_validate()
|
|
+ */
|
|
+ *size = arg->buf_size + (1UL<<PEBS_PADDING_ORDER);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int pfm_pebs_p4_fmt_init(struct pfm_context *ctx, void *buf,
|
|
+ u32 flags, u16 npmds, void *data)
|
|
+{
|
|
+ struct pfm_arch_context *ctx_arch;
|
|
+ struct pfm_pebs_p4_smpl_hdr *hdr;
|
|
+ struct pfm_pebs_p4_smpl_arg *arg = data;
|
|
+ unsigned long pebs_start, pebs_end;
|
|
+ struct pfm_ds_area_p4 *ds;
|
|
+
|
|
+ ctx_arch = pfm_ctx_arch(ctx);
|
|
+
|
|
+ hdr = buf;
|
|
+ ds = &hdr->ds;
|
|
+
|
|
+ /*
|
|
+ * align PEBS buffer base
|
|
+ */
|
|
+ pebs_start = ALIGN_PEBS((unsigned long)(hdr+1), PEBS_PADDING_ORDER);
|
|
+ pebs_end = pebs_start + arg->buf_size + 1;
|
|
+
|
|
+ hdr->version = PFM_PEBS_P4_SMPL_VERSION;
|
|
+ hdr->buf_size = arg->buf_size;
|
|
+ hdr->overflows = 0;
|
|
+
|
|
+ /*
|
|
+ * express PEBS buffer base as offset from the end of the header
|
|
+ */
|
|
+ hdr->start_offs = pebs_start - (unsigned long)(hdr+1);
|
|
+
|
|
+ /*
|
|
+ * PEBS buffer boundaries
|
|
+ */
|
|
+ ds->pebs_buf_base = pebs_start;
|
|
+ ds->pebs_abs_max = pebs_end;
|
|
+
|
|
+ /*
|
|
+ * PEBS starting position
|
|
+ */
|
|
+ ds->pebs_index = pebs_start;
|
|
+
|
|
+ /*
|
|
+ * PEBS interrupt threshold
|
|
+ */
|
|
+ ds->pebs_intr_thres = pebs_start
|
|
+ + arg->intr_thres * sizeof(struct pfm_pebs_p4_smpl_entry);
|
|
+
|
|
+ /*
|
|
+ * save counter reset value for PEBS counter
|
|
+ */
|
|
+ ds->pebs_cnt_reset = arg->cnt_reset;
|
|
+
|
|
+ /*
|
|
+ * keep track of DS AREA
|
|
+ */
|
|
+ ctx_arch->ds_area = ds;
|
|
+ ctx_arch->flags.use_pebs = 1;
|
|
+ ctx_arch->flags.use_ds = 1;
|
|
+
|
|
+ PFM_DBG("buffer=%p buf_size=%llu offs=%llu pebs_start=0x%lx "
|
|
+ "pebs_end=0x%lx ds=%p pebs_thres=0x%lx cnt_reset=0x%llx",
|
|
+ buf,
|
|
+ (unsigned long long)hdr->buf_size,
|
|
+ (unsigned long long)hdr->start_offs,
|
|
+ pebs_start,
|
|
+ pebs_end,
|
|
+ ds,
|
|
+ ds->pebs_intr_thres,
|
|
+ (unsigned long long)ds->pebs_cnt_reset);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int pfm_pebs_p4_fmt_handler(struct pfm_context *ctx,
|
|
+ unsigned long ip, u64 tstamp, void *data)
|
|
+{
|
|
+ struct pfm_pebs_p4_smpl_hdr *hdr;
|
|
+ struct pfm_ovfl_arg *arg;
|
|
+
|
|
+ hdr = ctx->smpl_addr;
|
|
+ arg = &ctx->ovfl_arg;
|
|
+
|
|
+ PFM_DBG_ovfl("buffer full");
|
|
+ /*
|
|
+ * increment number of buffer overflows.
|
|
+ * important to detect duplicate set of samples.
|
|
+ */
|
|
+ hdr->overflows++;
|
|
+
|
|
+ /*
|
|
+ * request notification and masking of monitoring.
|
|
+ * Notification is still subject to the overflowed
|
|
+ * register having the FL_NOTIFY flag set.
|
|
+ */
|
|
+ arg->ovfl_ctrl = PFM_OVFL_CTRL_NOTIFY | PFM_OVFL_CTRL_MASK;
|
|
+
|
|
+ return -ENOBUFS; /* we are full, sorry */
|
|
+}
|
|
+
|
|
+static int pfm_pebs_p4_fmt_restart(int is_active, u32 *ovfl_ctrl,
|
|
+ void *buf)
|
|
+{
|
|
+ struct pfm_pebs_p4_smpl_hdr *hdr = buf;
|
|
+
|
|
+ /*
|
|
+ * reset index to base of buffer
|
|
+ */
|
|
+ hdr->ds.pebs_index = hdr->ds.pebs_buf_base;
|
|
+
|
|
+ *ovfl_ctrl = PFM_OVFL_CTRL_RESET;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int pfm_pebs_p4_fmt_exit(void *buf)
|
|
+{
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static struct pfm_smpl_fmt pebs_p4_fmt = {
|
|
+ .fmt_name = PFM_PEBS_P4_SMPL_NAME,
|
|
+ .fmt_version = 0x1,
|
|
+ .fmt_arg_size = sizeof(struct pfm_pebs_p4_smpl_arg),
|
|
+ .fmt_validate = pfm_pebs_p4_fmt_validate,
|
|
+ .fmt_getsize = pfm_pebs_p4_fmt_get_size,
|
|
+ .fmt_init = pfm_pebs_p4_fmt_init,
|
|
+ .fmt_handler = pfm_pebs_p4_fmt_handler,
|
|
+ .fmt_restart = pfm_pebs_p4_fmt_restart,
|
|
+ .fmt_exit = pfm_pebs_p4_fmt_exit,
|
|
+ .fmt_flags = PFM_FMT_BUILTIN_FLAG,
|
|
+ .owner = THIS_MODULE,
|
|
+};
|
|
+
|
|
+static int __init pfm_pebs_p4_fmt_init_module(void)
|
|
+{
|
|
+ int ht_enabled;
|
|
+
|
|
+ if (!cpu_has_pebs) {
|
|
+ PFM_INFO("processor does not have PEBS support");
|
|
+ return -1;
|
|
+ }
|
|
+ if (current_cpu_data.x86 != 15) {
|
|
+ PFM_INFO("not an Intel Pentium 4");
|
|
+ return -1;
|
|
+ }
|
|
+#ifdef CONFIG_SMP
|
|
+ ht_enabled = (cpus_weight(__get_cpu_var(cpu_core_map))
|
|
+ / current_cpu_data.x86_max_cores) > 1;
|
|
+#else
|
|
+ ht_enabled = 0;
|
|
+#endif
|
|
+ if (ht_enabled) {
|
|
+ PFM_INFO("PEBS not available because HyperThreading is on");
|
|
+ return -1;
|
|
+ }
|
|
+ return pfm_fmt_register(&pebs_p4_fmt);
|
|
+}
|
|
+
|
|
+static void __exit pfm_pebs_p4_fmt_cleanup_module(void)
|
|
+{
|
|
+ pfm_fmt_unregister(&pebs_p4_fmt);
|
|
+}
|
|
+
|
|
+module_init(pfm_pebs_p4_fmt_init_module);
|
|
+module_exit(pfm_pebs_p4_fmt_cleanup_module);
|
|
diff --git a/include/asm-mips/Kbuild b/include/asm-mips/Kbuild
|
|
index 7897f05..7ed16fc 100644
|
|
--- a/include/asm-mips/Kbuild
|
|
+++ b/include/asm-mips/Kbuild
|
|
@@ -1,3 +1,4 @@
|
|
include include/asm-generic/Kbuild.asm
|
|
|
|
header-y += cachectl.h sgidefs.h sysmips.h
|
|
+header-y += perfmon.h
|
|
diff --git a/include/asm-mips/perfmon.h b/include/asm-mips/perfmon.h
|
|
new file mode 100644
|
|
index 0000000..7915c17
|
|
--- /dev/null
|
|
+++ b/include/asm-mips/perfmon.h
|
|
@@ -0,0 +1,34 @@
|
|
+/*
|
|
+ * Copyright (c) 2007 Hewlett-Packard Development Company, L.P.
|
|
+ * Contributed by Stephane Eranian <eranian@hpl.hp.com>
|
|
+ *
|
|
+ * This file contains mips64 specific definitions for the perfmon
|
|
+ * interface.
|
|
+ *
|
|
+ * This file MUST never be included directly. Use linux/perfmon.h.
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of version 2 of the GNU General Public
|
|
+ * License as published by the Free Software Foundation.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ * General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
+ * 02111-1307 USA
|
|
+ */
|
|
+#ifndef _ASM_MIPS64_PERFMON_H_
|
|
+#define _ASM_MIPS64_PERFMON_H_
|
|
+
|
|
+/*
|
|
+ * arch-specific user visible interface definitions
|
|
+ */
|
|
+
|
|
+#define PFM_ARCH_MAX_PMCS (256+64) /* 256 HW 64 SW */
|
|
+#define PFM_ARCH_MAX_PMDS (256+64) /* 256 HW 64 SW */
|
|
+
|
|
+#endif /* _ASM_MIPS64_PERFMON_H_ */
|
|
diff --git a/include/asm-mips/perfmon_kern.h b/include/asm-mips/perfmon_kern.h
|
|
new file mode 100644
|
|
index 0000000..7d213df
|
|
--- /dev/null
|
|
+++ b/include/asm-mips/perfmon_kern.h
|
|
@@ -0,0 +1,412 @@
|
|
+/*
|
|
+ * Copyright (c) 2005 Philip Mucci.
|
|
+ *
|
|
+ * Based on other versions:
|
|
+ * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P.
|
|
+ * Contributed by Stephane Eranian <eranian@hpl.hp.com>
|
|
+ *
|
|
+ * This file contains mips64 specific definitions for the perfmon
|
|
+ * interface.
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of version 2 of the GNU General Public
|
|
+ * License as published by the Free Software Foundation.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ * General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
+ * 02111-1307 USA
|
|
+ */
|
|
+#ifndef _ASM_MIPS64_PERFMON_KERN_H_
|
|
+#define _ASM_MIPS64_PERFMON_KERN_H_
|
|
+
|
|
+#ifdef __KERNEL__
|
|
+
|
|
+#ifdef CONFIG_PERFMON
|
|
+#include <linux/unistd.h>
|
|
+#include <asm/cacheflush.h>
|
|
+
|
|
+#define PFM_ARCH_PMD_STK_ARG 2
|
|
+#define PFM_ARCH_PMC_STK_ARG 2
|
|
+
|
|
+struct pfm_arch_pmu_info {
|
|
+ u32 pmu_style;
|
|
+};
|
|
+
|
|
+#define MIPS64_CONFIG_PMC_MASK (1 << 4)
|
|
+#define MIPS64_PMC_INT_ENABLE_MASK (1 << 4)
|
|
+#define MIPS64_PMC_CNT_ENABLE_MASK (0xf)
|
|
+#define MIPS64_PMC_EVT_MASK (0x7 << 6)
|
|
+#define MIPS64_PMC_CTR_MASK (1 << 31)
|
|
+#define MIPS64_PMD_INTERRUPT (1 << 31)
|
|
+
|
|
+/* Coprocessor register 25 contains the PMU interface. */
|
|
+/* Sel 0 is control for counter 0 */
|
|
+/* Sel 1 is count for counter 0. */
|
|
+/* Sel 2 is control for counter 1. */
|
|
+/* Sel 3 is count for counter 1. */
|
|
+
|
|
+/*
|
|
+
|
|
+31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
|
|
+M 0--------------------------------------------------------------0 Event-- IE U S K EXL
|
|
+
|
|
+M 31 If this bit is one, another pair of Performance Control
|
|
+and Counter registers is implemented at a MTC0
|
|
+
|
|
+Event 8:5 Counter event enabled for this counter. Possible events
|
|
+are listed in Table 6-30. R/W Undefined
|
|
+
|
|
+IE 4 Counter Interrupt Enable. This bit masks bit 31 of the
|
|
+associated count register from the interrupt exception
|
|
+request output. R/W 0
|
|
+
|
|
+U 3 Count in User Mode. When this bit is set, the specified
|
|
+event is counted in User Mode. R/W Undefined
|
|
+
|
|
+S 2 Count in Supervisor Mode. When this bit is set, the
|
|
+specified event is counted in Supervisor Mode. R/W Undefined
|
|
+
|
|
+K 1 Count in Kernel Mode. When this bit is set, count the
|
|
+event in Kernel Mode when EXL and ERL both are 0. R/W Undefined
|
|
+
|
|
+EXL 0 Count when EXL. When this bit is set, count the event
|
|
+when EXL = 1 and ERL = 0. R/W Undefined
|
|
+*/
|
|
+
|
|
+static inline void pfm_arch_resend_irq(struct pfm_context *ctx)
|
|
+{}
|
|
+
|
|
+static inline void pfm_arch_clear_pmd_ovfl_cond(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set)
|
|
+{}
|
|
+
|
|
+static inline void pfm_arch_serialize(void)
|
|
+{}
|
|
+
|
|
+
|
|
+/*
|
|
+ * MIPS does not save the PMDs during pfm_arch_intr_freeze_pmu(), thus
|
|
+ * this routine needs to do it when switching sets on overflow
|
|
+ */
|
|
+static inline void pfm_arch_save_pmds_from_intr(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set)
|
|
+{
|
|
+ pfm_save_pmds(ctx, set);
|
|
+}
|
|
+
|
|
+static inline void pfm_arch_write_pmc(struct pfm_context *ctx,
|
|
+ unsigned int cnum, u64 value)
|
|
+{
|
|
+ /*
|
|
+ * we only write to the actual register when monitoring is
|
|
+ * active (pfm_start was issued)
|
|
+ */
|
|
+ if (ctx && (ctx->flags.started == 0))
|
|
+ return;
|
|
+
|
|
+ switch (pfm_pmu_conf->pmc_desc[cnum].hw_addr) {
|
|
+ case 0:
|
|
+ write_c0_perfctrl0(value);
|
|
+ break;
|
|
+ case 1:
|
|
+ write_c0_perfctrl1(value);
|
|
+ break;
|
|
+ case 2:
|
|
+ write_c0_perfctrl2(value);
|
|
+ break;
|
|
+ case 3:
|
|
+ write_c0_perfctrl3(value);
|
|
+ break;
|
|
+ default:
|
|
+ BUG();
|
|
+ }
|
|
+}
|
|
+
|
|
+static inline void pfm_arch_write_pmd(struct pfm_context *ctx,
|
|
+ unsigned int cnum, u64 value)
|
|
+{
|
|
+ value &= pfm_pmu_conf->ovfl_mask;
|
|
+
|
|
+ switch (pfm_pmu_conf->pmd_desc[cnum].hw_addr) {
|
|
+ case 0:
|
|
+ write_c0_perfcntr0(value);
|
|
+ break;
|
|
+ case 1:
|
|
+ write_c0_perfcntr1(value);
|
|
+ break;
|
|
+ case 2:
|
|
+ write_c0_perfcntr2(value);
|
|
+ break;
|
|
+ case 3:
|
|
+ write_c0_perfcntr3(value);
|
|
+ break;
|
|
+ default:
|
|
+ BUG();
|
|
+ }
|
|
+}
|
|
+
|
|
+static inline u64 pfm_arch_read_pmd(struct pfm_context *ctx, unsigned int cnum)
|
|
+{
|
|
+ switch (pfm_pmu_conf->pmd_desc[cnum].hw_addr) {
|
|
+ case 0:
|
|
+ return read_c0_perfcntr0();
|
|
+ break;
|
|
+ case 1:
|
|
+ return read_c0_perfcntr1();
|
|
+ break;
|
|
+ case 2:
|
|
+ return read_c0_perfcntr2();
|
|
+ break;
|
|
+ case 3:
|
|
+ return read_c0_perfcntr3();
|
|
+ break;
|
|
+ default:
|
|
+ BUG();
|
|
+ return 0;
|
|
+ }
|
|
+}
|
|
+
|
|
+static inline u64 pfm_arch_read_pmc(struct pfm_context *ctx, unsigned int cnum)
|
|
+{
|
|
+ switch (pfm_pmu_conf->pmc_desc[cnum].hw_addr) {
|
|
+ case 0:
|
|
+ return read_c0_perfctrl0();
|
|
+ break;
|
|
+ case 1:
|
|
+ return read_c0_perfctrl1();
|
|
+ break;
|
|
+ case 2:
|
|
+ return read_c0_perfctrl2();
|
|
+ break;
|
|
+ case 3:
|
|
+ return read_c0_perfctrl3();
|
|
+ break;
|
|
+ default:
|
|
+ BUG();
|
|
+ return 0;
|
|
+ }
|
|
+}
|
|
+
|
|
+/*
|
|
+ * For some CPUs, the upper bits of a counter must be set in order for the
|
|
+ * overflow interrupt to happen. On overflow, the counter has wrapped around,
|
|
+ * and the upper bits are cleared. This function may be used to set them back.
|
|
+ */
|
|
+static inline void pfm_arch_ovfl_reset_pmd(struct pfm_context *ctx,
|
|
+ unsigned int cnum)
|
|
+{
|
|
+ u64 val;
|
|
+ val = pfm_arch_read_pmd(ctx, cnum);
|
|
+ /* This masks out overflow bit 31 */
|
|
+ pfm_arch_write_pmd(ctx, cnum, val);
|
|
+}
|
|
+
|
|
+/*
|
|
+ * At certain points, perfmon needs to know if monitoring has been
|
|
+ * explicitely started/stopped by user via pfm_start/pfm_stop. The
|
|
+ * information is tracked in ctx.flags.started. However on certain
|
|
+ * architectures, it may be possible to start/stop directly from
|
|
+ * user level with a single assembly instruction bypassing
|
|
+ * the kernel. This function must be used to determine by
|
|
+ * an arch-specific mean if monitoring is actually started/stopped.
|
|
+ */
|
|
+static inline int pfm_arch_is_active(struct pfm_context *ctx)
|
|
+{
|
|
+ return ctx->flags.started;
|
|
+}
|
|
+
|
|
+static inline void pfm_arch_ctxswout_sys(struct task_struct *task,
|
|
+ struct pfm_context *ctx)
|
|
+{}
|
|
+
|
|
+static inline void pfm_arch_ctxswin_sys(struct task_struct *task,
|
|
+ struct pfm_context *ctx)
|
|
+{}
|
|
+
|
|
+static inline void pfm_arch_ctxswin_thread(struct task_struct *task,
|
|
+ struct pfm_context *ctx)
|
|
+{}
|
|
+int pfm_arch_ctxswout_thread(struct task_struct *task,
|
|
+ struct pfm_context *ctx);
|
|
+
|
|
+int pfm_arch_is_monitoring_active(struct pfm_context *ctx);
|
|
+void pfm_arch_stop(struct task_struct *task, struct pfm_context *ctx);
|
|
+void pfm_arch_start(struct task_struct *task, struct pfm_context *ctx);
|
|
+void pfm_arch_restore_pmds(struct pfm_context *ctx, struct pfm_event_set *set);
|
|
+void pfm_arch_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set);
|
|
+char *pfm_arch_get_pmu_module_name(void);
|
|
+
|
|
+static inline void pfm_arch_intr_freeze_pmu(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set)
|
|
+{
|
|
+ pfm_arch_stop(current, ctx);
|
|
+ /*
|
|
+ * we mark monitoring as stopped to avoid
|
|
+ * certain side effects especially in
|
|
+ * pfm_switch_sets_from_intr() on
|
|
+ * pfm_arch_restore_pmcs()
|
|
+ */
|
|
+ ctx->flags.started = 0;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * unfreeze PMU from pfm_do_interrupt_handler()
|
|
+ * ctx may be NULL for spurious
|
|
+ */
|
|
+static inline void pfm_arch_intr_unfreeze_pmu(struct pfm_context *ctx)
|
|
+{
|
|
+ if (!ctx)
|
|
+ return;
|
|
+
|
|
+ PFM_DBG_ovfl("state=%d", ctx->state);
|
|
+
|
|
+ ctx->flags.started = 1;
|
|
+
|
|
+ if (ctx->state == PFM_CTX_MASKED)
|
|
+ return;
|
|
+
|
|
+ pfm_arch_restore_pmcs(ctx, ctx->active_set);
|
|
+}
|
|
+
|
|
+/*
|
|
+ * this function is called from the PMU interrupt handler ONLY.
|
|
+ * On MIPS, the PMU is frozen via arch_stop, masking would be implemented
|
|
+ * via arch-stop as well. Given that the PMU is already stopped when
|
|
+ * entering the interrupt handler, we do not need to stop it again, so
|
|
+ * this function is a nop.
|
|
+ */
|
|
+static inline void pfm_arch_mask_monitoring(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set)
|
|
+{}
|
|
+
|
|
+/*
|
|
+ * on MIPS masking/unmasking uses the start/stop mechanism, so we simply
|
|
+ * need to start here.
|
|
+ */
|
|
+static inline void pfm_arch_unmask_monitoring(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set)
|
|
+{
|
|
+ pfm_arch_start(current, ctx);
|
|
+}
|
|
+
|
|
+static inline int pfm_arch_context_create(struct pfm_context *ctx,
|
|
+ u32 ctx_flags)
|
|
+{
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static inline void pfm_arch_context_free(struct pfm_context *ctx)
|
|
+{}
|
|
+
|
|
+
|
|
+
|
|
+
|
|
+
|
|
+/*
|
|
+ * function called from pfm_setfl_sane(). Context is locked
|
|
+ * and interrupts are masked.
|
|
+ * The value of flags is the value of ctx_flags as passed by
|
|
+ * user.
|
|
+ *
|
|
+ * function must check arch-specific set flags.
|
|
+ * Return:
|
|
+ * 1 when flags are valid
|
|
+ * 0 on error
|
|
+ */
|
|
+static inline int
|
|
+pfm_arch_setfl_sane(struct pfm_context *ctx, u32 flags)
|
|
+{
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static inline int pfm_arch_init(void)
|
|
+{
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static inline void pfm_arch_init_percpu(void)
|
|
+{}
|
|
+
|
|
+static inline int pfm_arch_load_context(struct pfm_context *ctx)
|
|
+{
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static inline void pfm_arch_unload_context(struct pfm_context *ctx)
|
|
+{}
|
|
+
|
|
+static inline int pfm_arch_pmu_acquire(u64 *unavail_pmcs, u64 *unavail_pmds)
|
|
+{
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static inline void pfm_arch_pmu_release(void)
|
|
+{}
|
|
+
|
|
+#ifdef CONFIG_PERFMON_FLUSH
|
|
+/*
|
|
+ * due to cache aliasing problem on MIPS, it is necessary to flush
|
|
+ * pages out of the cache when they are modified.
|
|
+ */
|
|
+static inline void pfm_cacheflush(void *addr, unsigned int len)
|
|
+{
|
|
+ unsigned long start, end;
|
|
+
|
|
+ start = (unsigned long)addr & PAGE_MASK;
|
|
+ end = ((unsigned long)addr + len + PAGE_SIZE - 1) & PAGE_MASK;
|
|
+
|
|
+ while (start < end) {
|
|
+ flush_data_cache_page(start);
|
|
+ start += PAGE_SIZE;
|
|
+ }
|
|
+}
|
|
+#else
|
|
+static inline void pfm_cacheflush(void *addr, unsigned int len)
|
|
+{}
|
|
+#endif
|
|
+
|
|
+static inline void pfm_arch_arm_handle_work(struct task_struct *task)
|
|
+{}
|
|
+
|
|
+static inline void pfm_arch_disarm_handle_work(struct task_struct *task)
|
|
+{}
|
|
+
|
|
+static inline int pfm_arch_pmu_config_init(struct pfm_pmu_config *cfg)
|
|
+{
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static inline int pfm_arch_get_base_syscall(void)
|
|
+{
|
|
+ if (test_thread_flag(TIF_32BIT_ADDR)) {
|
|
+ if (test_thread_flag(TIF_32BIT_REGS))
|
|
+ return __NR_O32_Linux+330;
|
|
+ return __NR_N32_Linux+293;
|
|
+ }
|
|
+ return __NR_64_Linux+289;
|
|
+}
|
|
+
|
|
+struct pfm_arch_context {
|
|
+ /* empty */
|
|
+};
|
|
+
|
|
+#define PFM_ARCH_CTX_SIZE sizeof(struct pfm_arch_context)
|
|
+/*
|
|
+ * MIPS may need extra alignment requirements for the sampling buffer
|
|
+ */
|
|
+#ifdef CONFIG_PERFMON_SMPL_ALIGN
|
|
+#define PFM_ARCH_SMPL_ALIGN_SIZE 0x4000
|
|
+#else
|
|
+#define PFM_ARCH_SMPL_ALIGN_SIZE 0
|
|
+#endif
|
|
+
|
|
+#endif /* CONFIG_PERFMON */
|
|
+
|
|
+#endif /* __KERNEL__ */
|
|
+#endif /* _ASM_MIPS64_PERFMON_KERN_H_ */
|
|
diff --git a/include/asm-mips/system.h b/include/asm-mips/system.h
|
|
index a944eda..470cdfc 100644
|
|
--- a/include/asm-mips/system.h
|
|
+++ b/include/asm-mips/system.h
|
|
@@ -67,6 +67,10 @@ do { \
|
|
__mips_mt_fpaff_switch_to(prev); \
|
|
if (cpu_has_dsp) \
|
|
__save_dsp(prev); \
|
|
+ if (test_tsk_thread_flag(prev, TIF_PERFMON_CTXSW)) \
|
|
+ pfm_ctxsw_out(prev, next); \
|
|
+ if (test_tsk_thread_flag(next, TIF_PERFMON_CTXSW)) \
|
|
+ pfm_ctxsw_in(prev, next); \
|
|
(last) = resume(prev, next, task_thread_info(next)); \
|
|
} while (0)
|
|
|
|
diff --git a/include/asm-mips/thread_info.h b/include/asm-mips/thread_info.h
|
|
index bb30606..34fd6aa 100644
|
|
--- a/include/asm-mips/thread_info.h
|
|
+++ b/include/asm-mips/thread_info.h
|
|
@@ -114,6 +114,7 @@ register struct thread_info *__current_thread_info __asm__("$28");
|
|
#define TIF_NEED_RESCHED 2 /* rescheduling necessary */
|
|
#define TIF_SYSCALL_AUDIT 3 /* syscall auditing active */
|
|
#define TIF_SECCOMP 4 /* secure computing */
|
|
+#define TIF_PERFMON_WORK 5 /* work for pfm_handle_work() */
|
|
#define TIF_RESTORE_SIGMASK 9 /* restore signal mask in do_signal() */
|
|
#define TIF_USEDFPU 16 /* FPU was used by this task this quantum (SMP) */
|
|
#define TIF_POLLING_NRFLAG 17 /* true if poll_idle() is polling TIF_NEED_RESCHED */
|
|
@@ -124,6 +125,7 @@ register struct thread_info *__current_thread_info __asm__("$28");
|
|
#define TIF_32BIT_REGS 22 /* also implies 16/32 fprs */
|
|
#define TIF_32BIT_ADDR 23 /* 32-bit address space (o32/n32) */
|
|
#define TIF_FPUBOUND 24 /* thread bound to FPU-full CPU set */
|
|
+#define TIF_PERFMON_CTXSW 25 /* perfmon needs ctxsw calls */
|
|
#define TIF_SYSCALL_TRACE 31 /* syscall trace active */
|
|
|
|
#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
|
|
@@ -140,6 +142,8 @@ register struct thread_info *__current_thread_info __asm__("$28");
|
|
#define _TIF_32BIT_REGS (1<<TIF_32BIT_REGS)
|
|
#define _TIF_32BIT_ADDR (1<<TIF_32BIT_ADDR)
|
|
#define _TIF_FPUBOUND (1<<TIF_FPUBOUND)
|
|
+#define _TIF_PERFMON_WORK (1<<TIF_PERFMON_WORK)
|
|
+#define _TIF_PERFMON_CTXSW (1<<TIF_PERFMON_CTXSW)
|
|
|
|
/* work to do on interrupt/exception return */
|
|
#define _TIF_WORK_MASK (0x0000ffef & ~_TIF_SECCOMP)
|
|
diff --git a/include/asm-mips/unistd.h b/include/asm-mips/unistd.h
|
|
index a73e153..200f654 100644
|
|
--- a/include/asm-mips/unistd.h
|
|
+++ b/include/asm-mips/unistd.h
|
|
@@ -350,11 +350,23 @@
|
|
#define __NR_dup3 (__NR_Linux + 327)
|
|
#define __NR_pipe2 (__NR_Linux + 328)
|
|
#define __NR_inotify_init1 (__NR_Linux + 329)
|
|
+#define __NR_pfm_create_context (__NR_Linux + 330)
|
|
+#define __NR_pfm_write_pmcs (__NR_pfm_create_context+1)
|
|
+#define __NR_pfm_write_pmds (__NR_pfm_create_context+2)
|
|
+#define __NR_pfm_read_pmds (__NR_pfm_create_context+3)
|
|
+#define __NR_pfm_load_context (__NR_pfm_create_context+4)
|
|
+#define __NR_pfm_start (__NR_pfm_create_context+5)
|
|
+#define __NR_pfm_stop (__NR_pfm_create_context+6)
|
|
+#define __NR_pfm_restart (__NR_pfm_create_context+7)
|
|
+#define __NR_pfm_create_evtsets (__NR_pfm_create_context+8)
|
|
+#define __NR_pfm_getinfo_evtsets (__NR_pfm_create_context+9)
|
|
+#define __NR_pfm_delete_evtsets (__NR_pfm_create_context+10)
|
|
+#define __NR_pfm_unload_context (__NR_pfm_create_context+11)
|
|
|
|
/*
|
|
* Offset of the last Linux o32 flavoured syscall
|
|
*/
|
|
-#define __NR_Linux_syscalls 329
|
|
+#define __NR_Linux_syscalls 341
|
|
|
|
#endif /* _MIPS_SIM == _MIPS_SIM_ABI32 */
|
|
|
|
@@ -656,16 +668,28 @@
|
|
#define __NR_dup3 (__NR_Linux + 286)
|
|
#define __NR_pipe2 (__NR_Linux + 287)
|
|
#define __NR_inotify_init1 (__NR_Linux + 288)
|
|
+#define __NR_pfm_create_context (__NR_Linux + 289)
|
|
+#define __NR_pfm_write_pmcs (__NR_pfm_create_context+1)
|
|
+#define __NR_pfm_write_pmds (__NR_pfm_create_context+2)
|
|
+#define __NR_pfm_read_pmds (__NR_pfm_create_context+3)
|
|
+#define __NR_pfm_load_context (__NR_pfm_create_context+4)
|
|
+#define __NR_pfm_start (__NR_pfm_create_context+5)
|
|
+#define __NR_pfm_stop (__NR_pfm_create_context+6)
|
|
+#define __NR_pfm_restart (__NR_pfm_create_context+7)
|
|
+#define __NR_pfm_create_evtsets (__NR_pfm_create_context+8)
|
|
+#define __NR_pfm_getinfo_evtsets (__NR_pfm_create_context+9)
|
|
+#define __NR_pfm_delete_evtsets (__NR_pfm_create_context+10)
|
|
+#define __NR_pfm_unload_context (__NR_pfm_create_context+11)
|
|
|
|
/*
|
|
* Offset of the last Linux 64-bit flavoured syscall
|
|
*/
|
|
-#define __NR_Linux_syscalls 288
|
|
+#define __NR_Linux_syscalls 300
|
|
|
|
#endif /* _MIPS_SIM == _MIPS_SIM_ABI64 */
|
|
|
|
#define __NR_64_Linux 5000
|
|
-#define __NR_64_Linux_syscalls 288
|
|
+#define __NR_64_Linux_syscalls 300
|
|
|
|
#if _MIPS_SIM == _MIPS_SIM_NABI32
|
|
|
|
@@ -966,16 +990,28 @@
|
|
#define __NR_dup3 (__NR_Linux + 290)
|
|
#define __NR_pipe2 (__NR_Linux + 291)
|
|
#define __NR_inotify_init1 (__NR_Linux + 292)
|
|
+#define __NR_pfm_create_context (__NR_Linux + 293)
|
|
+#define __NR_pfm_write_pmcs (__NR_pfm_create_context+1)
|
|
+#define __NR_pfm_write_pmds (__NR_pfm_create_context+2)
|
|
+#define __NR_pfm_read_pmds (__NR_pfm_create_context+3)
|
|
+#define __NR_pfm_load_context (__NR_pfm_create_context+4)
|
|
+#define __NR_pfm_start (__NR_pfm_create_context+5)
|
|
+#define __NR_pfm_stop (__NR_pfm_create_context+6)
|
|
+#define __NR_pfm_restart (__NR_pfm_create_context+7)
|
|
+#define __NR_pfm_create_evtsets (__NR_pfm_create_context+8)
|
|
+#define __NR_pfm_getinfo_evtsets (__NR_pfm_create_context+9)
|
|
+#define __NR_pfm_delete_evtsets (__NR_pfm_create_context+10)
|
|
+#define __NR_pfm_unload_context (__NR_pfm_create_context+11)
|
|
|
|
/*
|
|
* Offset of the last N32 flavoured syscall
|
|
*/
|
|
-#define __NR_Linux_syscalls 292
|
|
+#define __NR_Linux_syscalls 304
|
|
|
|
#endif /* _MIPS_SIM == _MIPS_SIM_NABI32 */
|
|
|
|
#define __NR_N32_Linux 6000
|
|
-#define __NR_N32_Linux_syscalls 292
|
|
+#define __NR_N32_Linux_syscalls 304
|
|
|
|
#ifdef __KERNEL__
|
|
|
|
diff --git a/include/asm-x86/Kbuild b/include/asm-x86/Kbuild
|
|
index 4a8e80c..d7d819e 100644
|
|
--- a/include/asm-x86/Kbuild
|
|
+++ b/include/asm-x86/Kbuild
|
|
@@ -9,6 +9,7 @@ header-y += prctl.h
|
|
header-y += ptrace-abi.h
|
|
header-y += sigcontext32.h
|
|
header-y += ucontext.h
|
|
+header-y += perfmon.h
|
|
header-y += processor-flags.h
|
|
|
|
unifdef-y += e820.h
|
|
diff --git a/include/asm-x86/ia32_unistd.h b/include/asm-x86/ia32_unistd.h
|
|
index 61cea9e..275e015 100644
|
|
--- a/include/asm-x86/ia32_unistd.h
|
|
+++ b/include/asm-x86/ia32_unistd.h
|
|
@@ -8,11 +8,12 @@
|
|
* the number. This should be otherwise in sync with asm-x86/unistd_32.h. -AK
|
|
*/
|
|
|
|
-#define __NR_ia32_restart_syscall 0
|
|
-#define __NR_ia32_exit 1
|
|
-#define __NR_ia32_read 3
|
|
-#define __NR_ia32_write 4
|
|
-#define __NR_ia32_sigreturn 119
|
|
-#define __NR_ia32_rt_sigreturn 173
|
|
+#define __NR_ia32_restart_syscall 0
|
|
+#define __NR_ia32_exit 1
|
|
+#define __NR_ia32_read 3
|
|
+#define __NR_ia32_write 4
|
|
+#define __NR_ia32_sigreturn 119
|
|
+#define __NR_ia32_rt_sigreturn 173
|
|
+#define __NR_ia32_pfm_create_context 333
|
|
|
|
#endif /* _ASM_X86_64_IA32_UNISTD_H_ */
|
|
diff --git a/include/asm-x86/irq_vectors.h b/include/asm-x86/irq_vectors.h
|
|
index a48c7f2..892fe8f 100644
|
|
--- a/include/asm-x86/irq_vectors.h
|
|
+++ b/include/asm-x86/irq_vectors.h
|
|
@@ -92,6 +92,11 @@
|
|
#define LOCAL_TIMER_VECTOR 0xef
|
|
|
|
/*
|
|
+ * Perfmon PMU interrupt vector
|
|
+ */
|
|
+#define LOCAL_PERFMON_VECTOR 0xee
|
|
+
|
|
+/*
|
|
* First APIC vector available to drivers: (vectors 0x30-0xee) we
|
|
* start at 0x31(0x41) to spread out vectors evenly between priority
|
|
* levels. (0x80 is the syscall vector)
|
|
diff --git a/include/asm-x86/mach-default/entry_arch.h b/include/asm-x86/mach-default/entry_arch.h
|
|
index 9283b60..ac31c2d 100644
|
|
--- a/include/asm-x86/mach-default/entry_arch.h
|
|
+++ b/include/asm-x86/mach-default/entry_arch.h
|
|
@@ -32,4 +32,8 @@ BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR)
|
|
BUILD_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR)
|
|
#endif
|
|
|
|
+#ifdef CONFIG_PERFMON
|
|
+BUILD_INTERRUPT(pmu_interrupt,LOCAL_PERFMON_VECTOR)
|
|
+#endif
|
|
+
|
|
#endif
|
|
diff --git a/include/asm-x86/perfmon.h b/include/asm-x86/perfmon.h
|
|
new file mode 100644
|
|
index 0000000..906f4b2
|
|
--- /dev/null
|
|
+++ b/include/asm-x86/perfmon.h
|
|
@@ -0,0 +1,34 @@
|
|
+/*
|
|
+ * Copyright (c) 2007 Hewlett-Packard Development Company, L.P.
|
|
+ * Contributed by Stephane Eranian <eranian@hpl.hp.com>
|
|
+ *
|
|
+ * This file contains i386/x86_64 specific definitions for the perfmon
|
|
+ * interface.
|
|
+ *
|
|
+ * This file MUST never be included directly. Use linux/perfmon.h.
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of version 2 of the GNU General Public
|
|
+ * License as published by the Free Software Foundation.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ * General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
+ * 02111-1307 USA
|
|
+ */
|
|
+#ifndef _ASM_X86_PERFMON__H_
|
|
+#define _ASM_X86_PERFMON__H_
|
|
+
|
|
+/*
|
|
+ * arch-specific user visible interface definitions
|
|
+ */
|
|
+
|
|
+#define PFM_ARCH_MAX_PMCS (256+64) /* 256 HW 64 SW */
|
|
+#define PFM_ARCH_MAX_PMDS (256+64) /* 256 HW 64 SW */
|
|
+
|
|
+#endif /* _ASM_X86_PERFMON_H_ */
|
|
diff --git a/include/asm-x86/perfmon_kern.h b/include/asm-x86/perfmon_kern.h
|
|
new file mode 100644
|
|
index 0000000..0e5d3a5
|
|
--- /dev/null
|
|
+++ b/include/asm-x86/perfmon_kern.h
|
|
@@ -0,0 +1,548 @@
|
|
+/*
|
|
+ * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P.
|
|
+ * Contributed by Stephane Eranian <eranian@hpl.hp.com>
|
|
+ *
|
|
+ * Copyright (c) 2007 Advanced Micro Devices, Inc.
|
|
+ * Contributed by Robert Richter <robert.richter@amd.com>
|
|
+ *
|
|
+ * This file contains X86 Processor Family specific definitions
|
|
+ * for the perfmon interface. This covers P6, Pentium M, P4/Xeon
|
|
+ * (32-bit and 64-bit, i.e., EM64T) and AMD X86-64.
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of version 2 of the GNU General Public
|
|
+ * License as published by the Free Software Foundation.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ * General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
+ * 02111-1307 USA
|
|
+ */
|
|
+#ifndef _ASM_X86_PERFMON_KERN_H_
|
|
+#define _ASM_X86_PERFMON_KERN_H_
|
|
+
|
|
+#ifdef CONFIG_PERFMON
|
|
+#include <linux/unistd.h>
|
|
+#ifdef CONFIG_4KSTACKS
|
|
+#define PFM_ARCH_PMD_STK_ARG 2
|
|
+#define PFM_ARCH_PMC_STK_ARG 2
|
|
+#else
|
|
+#define PFM_ARCH_PMD_STK_ARG 4 /* about 700 bytes of stack space */
|
|
+#define PFM_ARCH_PMC_STK_ARG 4 /* about 200 bytes of stack space */
|
|
+#endif
|
|
+
|
|
+struct pfm_arch_pmu_info {
|
|
+ u32 flags; /* PMU feature flags */
|
|
+ /*
|
|
+ * mandatory model-specific callbacks
|
|
+ */
|
|
+ int (*stop_save)(struct pfm_context *ctx, struct pfm_event_set *set);
|
|
+ int (*has_ovfls)(struct pfm_context *ctx);
|
|
+ void (*quiesce)(void);
|
|
+
|
|
+ /*
|
|
+ * optional model-specific callbacks
|
|
+ */
|
|
+ void (*acquire_pmu_percpu)(void);
|
|
+ void (*release_pmu_percpu)(void);
|
|
+ int (*create_context)(struct pfm_context *ctx, u32 ctx_flags);
|
|
+ void (*free_context)(struct pfm_context *ctx);
|
|
+ int (*load_context)(struct pfm_context *ctx);
|
|
+ void (*unload_context)(struct pfm_context *ctx);
|
|
+ void (*write_pmc)(struct pfm_context *ctx, unsigned int cnum, u64 value);
|
|
+ void (*write_pmd)(struct pfm_context *ctx, unsigned int cnum, u64 value);
|
|
+ u64 (*read_pmd)(struct pfm_context *ctx, unsigned int cnum);
|
|
+ u64 (*read_pmc)(struct pfm_context *ctx, unsigned int cnum);
|
|
+ void (*nmi_copy_state)(struct pfm_context *ctx);
|
|
+ void (*restore_pmcs)(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set);
|
|
+ void (*restore_pmds)(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set);
|
|
+};
|
|
+
|
|
+/*
|
|
+ * PMU feature flags
|
|
+ */
|
|
+#define PFM_X86_FL_USE_NMI 0x01 /* user asking for NMI */
|
|
+#define PFM_X86_FL_NO_SHARING 0x02 /* no sharing with other subsystems */
|
|
+#define PFM_X86_FL_SHARING 0x04 /* PMU is being shared */
|
|
+
|
|
+struct pfm_x86_ctx_flags {
|
|
+ unsigned int insecure:1; /* rdpmc per-thread self-monitoring */
|
|
+ unsigned int use_pebs:1; /* PEBS used */
|
|
+ unsigned int use_ds:1; /* DS used */
|
|
+ unsigned int reserved:29; /* for future use */
|
|
+};
|
|
+
|
|
+struct pfm_arch_context {
|
|
+ u64 saved_real_iip; /* instr pointer of last NMI intr */
|
|
+ struct pfm_x86_ctx_flags flags; /* flags */
|
|
+ void *ds_area; /* address of DS area (to go away) */
|
|
+ void *data; /* model-specific data */
|
|
+};
|
|
+
|
|
+/*
|
|
+ * functions implemented as inline on x86
|
|
+ */
|
|
+
|
|
+/**
|
|
+ * pfm_arch_write_pmc - write a single PMC register
|
|
+ * @ctx: context to work on
|
|
+ * @cnum: PMC index
|
|
+ * @value: PMC 64-bit value
|
|
+ *
|
|
+ * in certain situations, ctx may be NULL
|
|
+ */
|
|
+static inline void pfm_arch_write_pmc(struct pfm_context *ctx,
|
|
+ unsigned int cnum, u64 value)
|
|
+{
|
|
+ struct pfm_arch_pmu_info *pmu_info;
|
|
+
|
|
+ pmu_info = pfm_pmu_info();
|
|
+
|
|
+ /*
|
|
+ * we only write to the actual register when monitoring is
|
|
+ * active (pfm_start was issued)
|
|
+ */
|
|
+ if (ctx && ctx->flags.started == 0)
|
|
+ return;
|
|
+
|
|
+ /*
|
|
+ * model-specific override, if any
|
|
+ */
|
|
+ if (pmu_info->write_pmc) {
|
|
+ pmu_info->write_pmc(ctx, cnum, value);
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ PFM_DBG_ovfl("pfm_arch_write_pmc(0x%lx, 0x%Lx)",
|
|
+ pfm_pmu_conf->pmc_desc[cnum].hw_addr,
|
|
+ (unsigned long long) value);
|
|
+
|
|
+ wrmsrl(pfm_pmu_conf->pmc_desc[cnum].hw_addr, value);
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_arch_write_pmd - write a single PMD register
|
|
+ * @ctx: context to work on
|
|
+ * @cnum: PMD index
|
|
+ * @value: PMD 64-bit value
|
|
+ */
|
|
+static inline void pfm_arch_write_pmd(struct pfm_context *ctx,
|
|
+ unsigned int cnum, u64 value)
|
|
+{
|
|
+ struct pfm_arch_pmu_info *pmu_info;
|
|
+
|
|
+ pmu_info = pfm_pmu_info();
|
|
+
|
|
+ /*
|
|
+ * to make sure the counter overflows, we set the
|
|
+ * upper bits. we also clear any other unimplemented
|
|
+ * bits as this may cause crash on some processors.
|
|
+ */
|
|
+ if (pfm_pmu_conf->pmd_desc[cnum].type & PFM_REG_C64)
|
|
+ value = (value | ~pfm_pmu_conf->ovfl_mask)
|
|
+ & ~pfm_pmu_conf->pmd_desc[cnum].rsvd_msk;
|
|
+
|
|
+ PFM_DBG_ovfl("pfm_arch_write_pmd(0x%lx, 0x%Lx)",
|
|
+ pfm_pmu_conf->pmd_desc[cnum].hw_addr,
|
|
+ (unsigned long long) value);
|
|
+
|
|
+ /*
|
|
+ * model-specific override, if any
|
|
+ */
|
|
+ if (pmu_info->write_pmd) {
|
|
+ pmu_info->write_pmd(ctx, cnum, value);
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ wrmsrl(pfm_pmu_conf->pmd_desc[cnum].hw_addr, value);
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_arch_read_pmd - read a single PMD register
|
|
+ * @ctx: context to work on
|
|
+ * @cnum: PMD index
|
|
+ *
|
|
+ * return value is register 64-bit value
|
|
+ */
|
|
+static inline u64 pfm_arch_read_pmd(struct pfm_context *ctx, unsigned int cnum)
|
|
+{
|
|
+ struct pfm_arch_pmu_info *pmu_info;
|
|
+ u64 tmp;
|
|
+
|
|
+ pmu_info = pfm_pmu_info();
|
|
+
|
|
+ /*
|
|
+ * model-specific override, if any
|
|
+ */
|
|
+ if (pmu_info->read_pmd)
|
|
+ tmp = pmu_info->read_pmd(ctx, cnum);
|
|
+ else
|
|
+ rdmsrl(pfm_pmu_conf->pmd_desc[cnum].hw_addr, tmp);
|
|
+
|
|
+ PFM_DBG_ovfl("pfm_arch_read_pmd(0x%lx) = 0x%Lx",
|
|
+ pfm_pmu_conf->pmd_desc[cnum].hw_addr,
|
|
+ (unsigned long long) tmp);
|
|
+ return tmp;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_arch_read_pmc - read a single PMC register
|
|
+ * @ctx: context to work on
|
|
+ * @cnum: PMC index
|
|
+ *
|
|
+ * return value is register 64-bit value
|
|
+ */
|
|
+static inline u64 pfm_arch_read_pmc(struct pfm_context *ctx, unsigned int cnum)
|
|
+{
|
|
+ struct pfm_arch_pmu_info *pmu_info;
|
|
+ u64 tmp;
|
|
+
|
|
+ pmu_info = pfm_pmu_info();
|
|
+
|
|
+ /*
|
|
+ * model-specific override, if any
|
|
+ */
|
|
+ if (pmu_info->read_pmc)
|
|
+ tmp = pmu_info->read_pmc(ctx, cnum);
|
|
+ else
|
|
+ rdmsrl(pfm_pmu_conf->pmc_desc[cnum].hw_addr, tmp);
|
|
+
|
|
+ PFM_DBG_ovfl("pfm_arch_read_pmc(0x%lx) = 0x%016Lx",
|
|
+ pfm_pmu_conf->pmc_desc[cnum].hw_addr,
|
|
+ (unsigned long long) tmp);
|
|
+ return tmp;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_arch_is_active - return non-zero is monitoring has been started
|
|
+ * @ctx: context to check
|
|
+ *
|
|
+ * At certain points, perfmon needs to know if monitoring has been
|
|
+ * explicitly started.
|
|
+ *
|
|
+ * On x86, there is not other way but to use pfm_start/pfm_stop
|
|
+ * to activate monitoring, thus we can simply check flags.started
|
|
+ */
|
|
+static inline int pfm_arch_is_active(struct pfm_context *ctx)
|
|
+{
|
|
+ return ctx->flags.started;
|
|
+}
|
|
+
|
|
+
|
|
+/**
|
|
+ * pfm_arch_unload_context - detach context from thread or CPU
|
|
+ * @ctx: context to detach
|
|
+ *
|
|
+ * in system-wide ctx->task is NULL, otherwise it points to the
|
|
+ * attached thread
|
|
+ */
|
|
+static inline void pfm_arch_unload_context(struct pfm_context *ctx)
|
|
+{
|
|
+ struct pfm_arch_pmu_info *pmu_info;
|
|
+ struct pfm_arch_context *ctx_arch;
|
|
+
|
|
+ ctx_arch = pfm_ctx_arch(ctx);
|
|
+ pmu_info = pfm_pmu_info();
|
|
+
|
|
+ if (ctx_arch->flags.insecure) {
|
|
+ PFM_DBG("clear cr4.pce");
|
|
+ clear_in_cr4(X86_CR4_PCE);
|
|
+ }
|
|
+
|
|
+ if (pmu_info->unload_context)
|
|
+ pmu_info->unload_context(ctx);
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_arch_load_context - attach context to thread or CPU
|
|
+ * @ctx: context to attach
|
|
+ */
|
|
+static inline int pfm_arch_load_context(struct pfm_context *ctx)
|
|
+{
|
|
+ struct pfm_arch_pmu_info *pmu_info;
|
|
+ struct pfm_arch_context *ctx_arch;
|
|
+ int ret = 0;
|
|
+
|
|
+ ctx_arch = pfm_ctx_arch(ctx);
|
|
+ pmu_info = pfm_pmu_info();
|
|
+
|
|
+ /*
|
|
+ * RDPMC authorized in system-wide and
|
|
+ * per-thread self-monitoring.
|
|
+ *
|
|
+ * RDPMC only gives access to counts.
|
|
+ *
|
|
+ * The context-switch routine code does not restore
|
|
+ * all the PMD registers (optimization), thus there
|
|
+ * is a possible leak of counts there in per-thread
|
|
+ * mode.
|
|
+ */
|
|
+ if (ctx->task == current || ctx->flags.system) {
|
|
+ PFM_DBG("set cr4.pce");
|
|
+ set_in_cr4(X86_CR4_PCE);
|
|
+ ctx_arch->flags.insecure = 1;
|
|
+ }
|
|
+
|
|
+ if (pmu_info->load_context)
|
|
+ ret = pmu_info->load_context(ctx);
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+void pfm_arch_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set);
|
|
+void pfm_arch_start(struct task_struct *task, struct pfm_context *ctx);
|
|
+void pfm_arch_stop(struct task_struct *task, struct pfm_context *ctx);
|
|
+
|
|
+/**
|
|
+ * pfm_arch_unmask_monitoring - unmask monitoring
|
|
+ * @ctx: context to mask
|
|
+ * @set: current event set
|
|
+ *
|
|
+ * masking is slightly different from stopping in that, it does not undo
|
|
+ * the pfm_start() issued by user. This is used in conjunction with
|
|
+ * sampling. Masking means stop monitoring, but do not authorize user
|
|
+ * to issue pfm_start/stop during that time. Unmasking is achieved via
|
|
+ * pfm_restart() and also may also depend on the sampling format used.
|
|
+ *
|
|
+ * on x86 masking/unmasking use the start/stop mechanism, except
|
|
+ * that flags.started is not modified.
|
|
+ */
|
|
+static inline void pfm_arch_unmask_monitoring(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set)
|
|
+{
|
|
+ pfm_arch_start(current, ctx);
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_arch_intr_freeze_pmu - stop monitoring when handling PMU interrupt
|
|
+ * @ctx: current context
|
|
+ * @set: current event set
|
|
+ *
|
|
+ * called from __pfm_interrupt_handler().
|
|
+ * ctx is not NULL. ctx is locked. interrupts are masked
|
|
+ *
|
|
+ * The following actions must take place:
|
|
+ * - stop all monitoring to ensure handler has consistent view.
|
|
+ * - collect overflowed PMDs bitmask into povfls_pmds and
|
|
+ * npend_ovfls. If no interrupt detected then npend_ovfls
|
|
+ * must be set to zero.
|
|
+ */
|
|
+static inline void pfm_arch_intr_freeze_pmu(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set)
|
|
+{
|
|
+ /*
|
|
+ * on X86, freezing is equivalent to stopping
|
|
+ */
|
|
+ pfm_arch_stop(current, ctx);
|
|
+
|
|
+ /*
|
|
+ * we mark monitoring as stopped to avoid
|
|
+ * certain side effects especially in
|
|
+ * pfm_switch_sets_from_intr() and
|
|
+ * pfm_arch_restore_pmcs()
|
|
+ */
|
|
+ ctx->flags.started = 0;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_arch_intr_unfreeze_pmu - conditionally reactive monitoring
|
|
+ * @ctx: current context
|
|
+ *
|
|
+ * current context may be not when dealing when spurious interrupts
|
|
+ *
|
|
+ * Must re-activate monitoring if context is not MASKED.
|
|
+ * interrupts are masked.
|
|
+ */
|
|
+static inline void pfm_arch_intr_unfreeze_pmu(struct pfm_context *ctx)
|
|
+{
|
|
+ if (ctx == NULL)
|
|
+ return;
|
|
+
|
|
+ PFM_DBG_ovfl("state=%d", ctx->state);
|
|
+
|
|
+ /*
|
|
+ * restore flags.started which is cleared in
|
|
+ * pfm_arch_intr_freeze_pmu()
|
|
+ */
|
|
+ ctx->flags.started = 1;
|
|
+
|
|
+ if (ctx->state == PFM_CTX_MASKED)
|
|
+ return;
|
|
+
|
|
+ pfm_arch_restore_pmcs(ctx, ctx->active_set);
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_arch_setfl_sane - check arch/model specific event set flags
|
|
+ * @ctx: context to work on
|
|
+ * @flags: event set flags as passed by user
|
|
+ *
|
|
+ * called from pfm_setfl_sane(). Context is locked. Interrupts are masked.
|
|
+ *
|
|
+ * Return:
|
|
+ * 0 when flags are valid
|
|
+ * 1 on error
|
|
+ */
|
|
+static inline int pfm_arch_setfl_sane(struct pfm_context *ctx, u32 flags)
|
|
+{
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_arch_ovfl_reset_pmd - reset pmd on overflow
|
|
+ * @ctx: current context
|
|
+ * @cnum: PMD index
|
|
+ *
|
|
+ * On some CPUs, the upper bits of a counter must be set in order for the
|
|
+ * overflow interrupt to happen. On overflow, the counter has wrapped around,
|
|
+ * and the upper bits are cleared. This function may be used to set them back.
|
|
+ *
|
|
+ * For x86, the current version loses whatever is remaining in the counter,
|
|
+ * which is usually has a small count. In order not to loose this count,
|
|
+ * we do a read-modify-write to set the upper bits while preserving the
|
|
+ * low-order bits. This is slow but works.
|
|
+ */
|
|
+static inline void pfm_arch_ovfl_reset_pmd(struct pfm_context *ctx, unsigned int cnum)
|
|
+{
|
|
+ u64 val;
|
|
+ val = pfm_arch_read_pmd(ctx, cnum);
|
|
+ pfm_arch_write_pmd(ctx, cnum, val);
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_arch_context_create - create context
|
|
+ * @ctx: newly created context
|
|
+ * @flags: context flags as passed by user
|
|
+ *
|
|
+ * called from __pfm_create_context()
|
|
+ */
|
|
+static inline int pfm_arch_context_create(struct pfm_context *ctx, u32 ctx_flags)
|
|
+{
|
|
+ struct pfm_arch_pmu_info *pmu_info;
|
|
+
|
|
+ pmu_info = pfm_pmu_info();
|
|
+
|
|
+ if (pmu_info->create_context)
|
|
+ return pmu_info->create_context(ctx, ctx_flags);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_arch_context_free - free context
|
|
+ * @ctx: context to free
|
|
+ */
|
|
+static inline void pfm_arch_context_free(struct pfm_context *ctx)
|
|
+{
|
|
+ struct pfm_arch_pmu_info *pmu_info;
|
|
+
|
|
+ pmu_info = pfm_pmu_info();
|
|
+
|
|
+ if (pmu_info->free_context)
|
|
+ pmu_info->free_context(ctx);
|
|
+}
|
|
+
|
|
+/*
|
|
+ * pfm_arch_clear_pmd_ovfl_cond - alter the pmds in such a way that they
|
|
+ * will not cause cause interrupts when unused.
|
|
+ *
|
|
+ * This is a nop on x86
|
|
+ */
|
|
+static inline void pfm_arch_clear_pmd_ovfl_cond(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set)
|
|
+{}
|
|
+
|
|
+/*
|
|
+ * functions implemented in arch/x86/perfmon/perfmon.c
|
|
+ */
|
|
+int pfm_arch_init(void);
|
|
+void pfm_arch_resend_irq(struct pfm_context *ctx);
|
|
+
|
|
+int pfm_arch_ctxswout_thread(struct task_struct *task, struct pfm_context *ctx);
|
|
+void pfm_arch_ctxswin_thread(struct task_struct *task, struct pfm_context *ctx);
|
|
+
|
|
+void pfm_arch_restore_pmds(struct pfm_context *ctx, struct pfm_event_set *set);
|
|
+int pfm_arch_pmu_config_init(struct pfm_pmu_config *cfg);
|
|
+void pfm_arch_pmu_config_remove(void);
|
|
+char *pfm_arch_get_pmu_module_name(void);
|
|
+int pfm_arch_pmu_acquire(u64 *unavail_pmcs, u64 *unavail_pmds);
|
|
+void pfm_arch_pmu_release(void);
|
|
+
|
|
+/*
|
|
+ * pfm_arch_serialize - make PMU modifications visible to subsequent instructions
|
|
+ *
|
|
+ * This is a nop on x86
|
|
+ */
|
|
+static inline void pfm_arch_serialize(void)
|
|
+{}
|
|
+
|
|
+/*
|
|
+ * on x86, the PMDs are already saved by pfm_arch_freeze_pmu()
|
|
+ * when entering the PMU interrupt handler, thus, we do not need
|
|
+ * to save them again in pfm_switch_sets_from_intr()
|
|
+ */
|
|
+static inline void pfm_arch_save_pmds_from_intr(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set)
|
|
+{}
|
|
+
|
|
+
|
|
+static inline void pfm_arch_ctxswout_sys(struct task_struct *task,
|
|
+ struct pfm_context *ctx)
|
|
+{}
|
|
+
|
|
+static inline void pfm_arch_ctxswin_sys(struct task_struct *task,
|
|
+ struct pfm_context *ctx)
|
|
+{}
|
|
+
|
|
+static inline void pfm_arch_init_percpu(void)
|
|
+{}
|
|
+
|
|
+static inline void pfm_cacheflush(void *addr, unsigned int len)
|
|
+{}
|
|
+
|
|
+/*
|
|
+ * this function is called from the PMU interrupt handler ONLY.
|
|
+ * On x86, the PMU is frozen via arch_stop, masking would be implemented
|
|
+ * via arch-stop as well. Given that the PMU is already stopped when
|
|
+ * entering the interrupt handler, we do not need to stop it again, so
|
|
+ * this function is a nop.
|
|
+ */
|
|
+static inline void pfm_arch_mask_monitoring(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set)
|
|
+{}
|
|
+
|
|
+
|
|
+static inline void pfm_arch_arm_handle_work(struct task_struct *task)
|
|
+{}
|
|
+
|
|
+static inline void pfm_arch_disarm_handle_work(struct task_struct *task)
|
|
+{}
|
|
+
|
|
+static inline int pfm_arch_get_base_syscall(void)
|
|
+{
|
|
+#ifdef __x86_64__
|
|
+ /* 32-bit syscall definition coming from ia32_unistd.h */
|
|
+ if (test_thread_flag(TIF_IA32))
|
|
+ return __NR_ia32_pfm_create_context;
|
|
+#endif
|
|
+ return __NR_pfm_create_context;
|
|
+}
|
|
+
|
|
+#define PFM_ARCH_CTX_SIZE (sizeof(struct pfm_arch_context))
|
|
+/*
|
|
+ * x86 does not need extra alignment requirements for the sampling buffer
|
|
+ */
|
|
+#define PFM_ARCH_SMPL_ALIGN_SIZE 0
|
|
+
|
|
+asmlinkage void pmu_interrupt(void);
|
|
+
|
|
+#endif /* CONFIG_PEFMON */
|
|
+
|
|
+#endif /* _ASM_X86_PERFMON_KERN_H_ */
|
|
diff --git a/include/asm-x86/perfmon_pebs_core_smpl.h b/include/asm-x86/perfmon_pebs_core_smpl.h
|
|
new file mode 100644
|
|
index 0000000..4a12e0d
|
|
--- /dev/null
|
|
+++ b/include/asm-x86/perfmon_pebs_core_smpl.h
|
|
@@ -0,0 +1,164 @@
|
|
+/*
|
|
+ * Copyright (c) 2005-2007 Hewlett-Packard Development Company, L.P.
|
|
+ * Contributed by Stephane Eranian <eranian@hpl.hp.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of version 2 of the GNU General Public
|
|
+ * License as published by the Free Software Foundation.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ * General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
+ * 02111-1307 USA
|
|
+ *
|
|
+ * This file implements the sampling format to support Intel
|
|
+ * Precise Event Based Sampling (PEBS) feature of Intel Core
|
|
+ * processors, such as Intel Core 2.
|
|
+ *
|
|
+ * What is PEBS?
|
|
+ * ------------
|
|
+ * This is a hardware feature to enhance sampling by providing
|
|
+ * better precision as to where a sample is taken. This avoids the
|
|
+ * typical skew in the instruction one can observe with any
|
|
+ * interrupt-based sampling technique.
|
|
+ *
|
|
+ * PEBS also lowers sampling overhead significantly by having the
|
|
+ * processor store samples instead of the OS. PMU interrupt are only
|
|
+ * generated after multiple samples are written.
|
|
+ *
|
|
+ * Another benefit of PEBS is that samples can be captured inside
|
|
+ * critical sections where interrupts are masked.
|
|
+ *
|
|
+ * How does it work?
|
|
+ * PEBS effectively implements a Hw buffer. The Os must pass a region
|
|
+ * of memory where samples are to be stored. The region can have any
|
|
+ * size. The OS must also specify the sampling period to reload. The PMU
|
|
+ * will interrupt when it reaches the end of the buffer or a specified
|
|
+ * threshold location inside the memory region.
|
|
+ *
|
|
+ * The description of the buffer is stored in the Data Save Area (DS).
|
|
+ * The samples are stored sequentially in the buffer. The format of the
|
|
+ * buffer is fixed and specified in the PEBS documentation. The sample
|
|
+ * format does not change between 32-bit and 64-bit modes unlike on the
|
|
+ * Pentium 4 version of PEBS.
|
|
+ *
|
|
+ * PEBS does not work when HyperThreading is enabled due to certain MSR
|
|
+ * being shared being to two threads.
|
|
+ *
|
|
+ * What does the format do?
|
|
+ * It provides access to the PEBS feature for both 32-bit and 64-bit
|
|
+ * processors that support it.
|
|
+ *
|
|
+ * The same code and data structures are used for both 32-bit and 64-bi
|
|
+ * modes. A single format name is used for both modes. In 32-bit mode,
|
|
+ * some of the extended registers are written to zero in each sample.
|
|
+ *
|
|
+ * It is important to realize that the format provides a zero-copy
|
|
+ * environment for the samples, i.e,, the OS never touches the
|
|
+ * samples. Whatever the processor write is directly accessible to
|
|
+ * the user.
|
|
+ *
|
|
+ * Parameters to the buffer can be passed via pfm_create_context() in
|
|
+ * the pfm_pebs_smpl_arg structure.
|
|
+ */
|
|
+#ifndef __PERFMON_PEBS_CORE_SMPL_H__
|
|
+#define __PERFMON_PEBS_CORE_SMPL_H__ 1
|
|
+
|
|
+/*
|
|
+ * The 32-bit and 64-bit formats are identical, thus we use only
|
|
+ * one name for the format.
|
|
+ */
|
|
+#define PFM_PEBS_CORE_SMPL_NAME "pebs_core"
|
|
+
|
|
+/*
|
|
+ * format specific parameters (passed at context creation)
|
|
+ *
|
|
+ * intr_thres: index from start of buffer of entry where the
|
|
+ * PMU interrupt must be triggered. It must be several samples
|
|
+ * short of the end of the buffer.
|
|
+ */
|
|
+struct pfm_pebs_core_smpl_arg {
|
|
+ u64 cnt_reset; /* counter reset value */
|
|
+ size_t buf_size; /* size of the PEBS buffer in bytes */
|
|
+ size_t intr_thres;/* index of PEBS interrupt threshold entry */
|
|
+ u64 reserved[6]; /* for future use */
|
|
+};
|
|
+
|
|
+/*
|
|
+ * Data Save Area (32 and 64-bit mode)
|
|
+ *
|
|
+ * The DS area is exposed to the user. To determine the number
|
|
+ * of samples available in PEBS, it is necessary to substract
|
|
+ * pebs_index from pebs_base.
|
|
+ *
|
|
+ * Layout of the structure is mandated by hardware and specified
|
|
+ * in the Intel documentation.
|
|
+ */
|
|
+struct pfm_ds_area_core {
|
|
+ u64 bts_buf_base;
|
|
+ u64 bts_index;
|
|
+ u64 bts_abs_max;
|
|
+ u64 bts_intr_thres;
|
|
+ u64 pebs_buf_base;
|
|
+ u64 pebs_index;
|
|
+ u64 pebs_abs_max;
|
|
+ u64 pebs_intr_thres;
|
|
+ u64 pebs_cnt_reset;
|
|
+};
|
|
+
|
|
+/*
|
|
+ * This header is at the beginning of the sampling buffer returned to the user.
|
|
+ *
|
|
+ * Because of PEBS alignement constraints, the actual PEBS buffer area does
|
|
+ * not necessarily begin right after the header. The hdr_start_offs must be
|
|
+ * used to compute the first byte of the buffer. The offset is defined as
|
|
+ * the number of bytes between the end of the header and the beginning of
|
|
+ * the buffer. As such the formula is:
|
|
+ * actual_buffer = (unsigned long)(hdr+1)+hdr->hdr_start_offs
|
|
+ */
|
|
+struct pfm_pebs_core_smpl_hdr {
|
|
+ u64 overflows; /* #overflows for buffer */
|
|
+ size_t buf_size; /* bytes in the buffer */
|
|
+ size_t start_offs; /* actual buffer start offset */
|
|
+ u32 version; /* smpl format version */
|
|
+ u32 reserved1; /* for future use */
|
|
+ u64 reserved2[5]; /* for future use */
|
|
+ struct pfm_ds_area_core ds; /* data save area */
|
|
+};
|
|
+
|
|
+/*
|
|
+ * Sample format as mandated by Intel documentation.
|
|
+ * The same format is used in both 32 and 64 bit modes.
|
|
+ */
|
|
+struct pfm_pebs_core_smpl_entry {
|
|
+ u64 eflags;
|
|
+ u64 ip;
|
|
+ u64 eax;
|
|
+ u64 ebx;
|
|
+ u64 ecx;
|
|
+ u64 edx;
|
|
+ u64 esi;
|
|
+ u64 edi;
|
|
+ u64 ebp;
|
|
+ u64 esp;
|
|
+ u64 r8; /* 0 in 32-bit mode */
|
|
+ u64 r9; /* 0 in 32-bit mode */
|
|
+ u64 r10; /* 0 in 32-bit mode */
|
|
+ u64 r11; /* 0 in 32-bit mode */
|
|
+ u64 r12; /* 0 in 32-bit mode */
|
|
+ u64 r13; /* 0 in 32-bit mode */
|
|
+ u64 r14; /* 0 in 32-bit mode */
|
|
+ u64 r15; /* 0 in 32-bit mode */
|
|
+};
|
|
+
|
|
+#define PFM_PEBS_CORE_SMPL_VERSION_MAJ 1U
|
|
+#define PFM_PEBS_CORE_SMPL_VERSION_MIN 0U
|
|
+#define PFM_PEBS_CORE_SMPL_VERSION (((PFM_PEBS_CORE_SMPL_VERSION_MAJ&0xffff)<<16)|\
|
|
+ (PFM_PEBS_CORE_SMPL_VERSION_MIN & 0xffff))
|
|
+
|
|
+#endif /* __PERFMON_PEBS_CORE_SMPL_H__ */
|
|
diff --git a/include/asm-x86/perfmon_pebs_p4_smpl.h b/include/asm-x86/perfmon_pebs_p4_smpl.h
|
|
new file mode 100644
|
|
index 0000000..26b51b4
|
|
--- /dev/null
|
|
+++ b/include/asm-x86/perfmon_pebs_p4_smpl.h
|
|
@@ -0,0 +1,193 @@
|
|
+/*
|
|
+ * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P.
|
|
+ * Contributed by Stephane Eranian <eranian@hpl.hp.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of version 2 of the GNU General Public
|
|
+ * License as published by the Free Software Foundation.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ * General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
+ * 02111-1307 USA
|
|
+ *
|
|
+ * This file implements the sampling format to support Intel
|
|
+ * Precise Event Based Sampling (PEBS) feature of Pentium 4
|
|
+ * and other Netburst-based processors. Not to be used for
|
|
+ * Intel Core-based processors.
|
|
+ *
|
|
+ * What is PEBS?
|
|
+ * ------------
|
|
+ * This is a hardware feature to enhance sampling by providing
|
|
+ * better precision as to where a sample is taken. This avoids the
|
|
+ * typical skew in the instruction one can observe with any
|
|
+ * interrupt-based sampling technique.
|
|
+ *
|
|
+ * PEBS also lowers sampling overhead significantly by having the
|
|
+ * processor store samples instead of the OS. PMU interrupt are only
|
|
+ * generated after multiple samples are written.
|
|
+ *
|
|
+ * Another benefit of PEBS is that samples can be captured inside
|
|
+ * critical sections where interrupts are masked.
|
|
+ *
|
|
+ * How does it work?
|
|
+ * PEBS effectively implements a Hw buffer. The Os must pass a region
|
|
+ * of memory where samples are to be stored. The region can have any
|
|
+ * size. The OS must also specify the sampling period to reload. The PMU
|
|
+ * will interrupt when it reaches the end of the buffer or a specified
|
|
+ * threshold location inside the memory region.
|
|
+ *
|
|
+ * The description of the buffer is stored in the Data Save Area (DS).
|
|
+ * The samples are stored sequentially in the buffer. The format of the
|
|
+ * buffer is fixed and specified in the PEBS documentation. The sample
|
|
+ * format changes between 32-bit and 64-bit modes due to extended register
|
|
+ * file.
|
|
+ *
|
|
+ * PEBS does not work when HyperThreading is enabled due to certain MSR
|
|
+ * being shared being to two threads.
|
|
+ *
|
|
+ * What does the format do?
|
|
+ * It provides access to the PEBS feature for both 32-bit and 64-bit
|
|
+ * processors that support it.
|
|
+ *
|
|
+ * The same code is used for both 32-bit and 64-bit modes, but different
|
|
+ * format names are used because the two modes are not compatible due to
|
|
+ * data model and register file differences. Similarly the public data
|
|
+ * structures describing the samples are different.
|
|
+ *
|
|
+ * It is important to realize that the format provides a zero-copy environment
|
|
+ * for the samples, i.e,, the OS never touches the samples. Whatever the
|
|
+ * processor write is directly accessible to the user.
|
|
+ *
|
|
+ * Parameters to the buffer can be passed via pfm_create_context() in
|
|
+ * the pfm_pebs_smpl_arg structure.
|
|
+ *
|
|
+ * It is not possible to mix a 32-bit PEBS application on top of a 64-bit
|
|
+ * host kernel.
|
|
+ */
|
|
+#ifndef __PERFMON_PEBS_P4_SMPL_H__
|
|
+#define __PERFMON_PEBS_P4_SMPL_H__ 1
|
|
+
|
|
+#ifdef __i386__
|
|
+/*
|
|
+ * The 32-bit and 64-bit formats are not compatible, thus we have
|
|
+ * two different identifications so that 32-bit programs running on
|
|
+ * 64-bit OS will fail to use the 64-bit PEBS support.
|
|
+ */
|
|
+#define PFM_PEBS_P4_SMPL_NAME "pebs32_p4"
|
|
+#else
|
|
+#define PFM_PEBS_P4_SMPL_NAME "pebs64_p4"
|
|
+#endif
|
|
+
|
|
+/*
|
|
+ * format specific parameters (passed at context creation)
|
|
+ *
|
|
+ * intr_thres: index from start of buffer of entry where the
|
|
+ * PMU interrupt must be triggered. It must be several samples
|
|
+ * short of the end of the buffer.
|
|
+ */
|
|
+struct pfm_pebs_p4_smpl_arg {
|
|
+ u64 cnt_reset; /* counter reset value */
|
|
+ size_t buf_size; /* size of the PEBS buffer in bytes */
|
|
+ size_t intr_thres;/* index of PEBS interrupt threshold entry */
|
|
+ u64 reserved[6]; /* for future use */
|
|
+};
|
|
+
|
|
+/*
|
|
+ * Data Save Area (32 and 64-bit mode)
|
|
+ *
|
|
+ * The DS area must be exposed to the user because this is the only
|
|
+ * way to report on the number of valid entries recorded by the CPU.
|
|
+ * This is required when the buffer is not full, i..e, there was not
|
|
+ * PMU interrupt.
|
|
+ *
|
|
+ * Layout of the structure is mandated by hardware and specified in
|
|
+ * the Intel documentation.
|
|
+ */
|
|
+struct pfm_ds_area_p4 {
|
|
+ unsigned long bts_buf_base;
|
|
+ unsigned long bts_index;
|
|
+ unsigned long bts_abs_max;
|
|
+ unsigned long bts_intr_thres;
|
|
+ unsigned long pebs_buf_base;
|
|
+ unsigned long pebs_index;
|
|
+ unsigned long pebs_abs_max;
|
|
+ unsigned long pebs_intr_thres;
|
|
+ u64 pebs_cnt_reset;
|
|
+};
|
|
+
|
|
+/*
|
|
+ * This header is at the beginning of the sampling buffer returned to the user.
|
|
+ *
|
|
+ * Because of PEBS alignement constraints, the actual PEBS buffer area does
|
|
+ * not necessarily begin right after the header. The hdr_start_offs must be
|
|
+ * used to compute the first byte of the buffer. The offset is defined as
|
|
+ * the number of bytes between the end of the header and the beginning of
|
|
+ * the buffer. As such the formula is:
|
|
+ * actual_buffer = (unsigned long)(hdr+1)+hdr->hdr_start_offs
|
|
+ */
|
|
+struct pfm_pebs_p4_smpl_hdr {
|
|
+ u64 overflows; /* #overflows for buffer */
|
|
+ size_t buf_size; /* bytes in the buffer */
|
|
+ size_t start_offs; /* actual buffer start offset */
|
|
+ u32 version; /* smpl format version */
|
|
+ u32 reserved1; /* for future use */
|
|
+ u64 reserved2[5]; /* for future use */
|
|
+ struct pfm_ds_area_p4 ds; /* data save area */
|
|
+};
|
|
+
|
|
+/*
|
|
+ * 64-bit PEBS record format is described in
|
|
+ * http://www.intel.com/technology/64bitextensions/30083502.pdf
|
|
+ *
|
|
+ * The format does not peek at samples. The sample structure is only
|
|
+ * used to ensure that the buffer is large enough to accomodate one
|
|
+ * sample.
|
|
+ */
|
|
+#ifdef __i386__
|
|
+struct pfm_pebs_p4_smpl_entry {
|
|
+ u32 eflags;
|
|
+ u32 ip;
|
|
+ u32 eax;
|
|
+ u32 ebx;
|
|
+ u32 ecx;
|
|
+ u32 edx;
|
|
+ u32 esi;
|
|
+ u32 edi;
|
|
+ u32 ebp;
|
|
+ u32 esp;
|
|
+};
|
|
+#else
|
|
+struct pfm_pebs_p4_smpl_entry {
|
|
+ u64 eflags;
|
|
+ u64 ip;
|
|
+ u64 eax;
|
|
+ u64 ebx;
|
|
+ u64 ecx;
|
|
+ u64 edx;
|
|
+ u64 esi;
|
|
+ u64 edi;
|
|
+ u64 ebp;
|
|
+ u64 esp;
|
|
+ u64 r8;
|
|
+ u64 r9;
|
|
+ u64 r10;
|
|
+ u64 r11;
|
|
+ u64 r12;
|
|
+ u64 r13;
|
|
+ u64 r14;
|
|
+ u64 r15;
|
|
+};
|
|
+#endif
|
|
+
|
|
+#define PFM_PEBS_P4_SMPL_VERSION_MAJ 1U
|
|
+#define PFM_PEBS_P4_SMPL_VERSION_MIN 0U
|
|
+#define PFM_PEBS_P4_SMPL_VERSION (((PFM_PEBS_P4_SMPL_VERSION_MAJ&0xffff)<<16)|\
|
|
+ (PFM_PEBS_P4_SMPL_VERSION_MIN & 0xffff))
|
|
+
|
|
+#endif /* __PERFMON_PEBS_P4_SMPL_H__ */
|
|
diff --git a/include/asm-x86/thread_info.h b/include/asm-x86/thread_info.h
|
|
index da0a675..b3a6ae9 100644
|
|
--- a/include/asm-x86/thread_info.h
|
|
+++ b/include/asm-x86/thread_info.h
|
|
@@ -71,6 +71,7 @@ struct thread_info {
|
|
* Warning: layout of LSW is hardcoded in entry.S
|
|
*/
|
|
#define TIF_SYSCALL_TRACE 0 /* syscall trace active */
|
|
+#define TIF_PERFMON_WORK 1 /* work for pfm_handle_work() */
|
|
#define TIF_SIGPENDING 2 /* signal pending */
|
|
#define TIF_NEED_RESCHED 3 /* rescheduling necessary */
|
|
#define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/
|
|
@@ -91,6 +92,7 @@ struct thread_info {
|
|
#define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */
|
|
#define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */
|
|
#define TIF_BTS_TRACE_TS 27 /* record scheduling event timestamps */
|
|
+#define TIF_PERFMON_CTXSW 28 /* perfmon needs ctxsw calls */
|
|
|
|
#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
|
|
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
|
|
@@ -112,6 +114,8 @@ struct thread_info {
|
|
#define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR)
|
|
#define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR)
|
|
#define _TIF_BTS_TRACE_TS (1 << TIF_BTS_TRACE_TS)
|
|
+#define _TIF_PERFMON_WORK (1<<TIF_PERFMON_WORK)
|
|
+#define _TIF_PERFMON_CTXSW (1<<TIF_PERFMON_CTXSW)
|
|
|
|
/* work to do in syscall_trace_enter() */
|
|
#define _TIF_WORK_SYSCALL_ENTRY \
|
|
@@ -133,12 +137,12 @@ struct thread_info {
|
|
|
|
/* Only used for 64 bit */
|
|
#define _TIF_DO_NOTIFY_MASK \
|
|
- (_TIF_SIGPENDING|_TIF_MCE_NOTIFY)
|
|
+ (_TIF_SIGPENDING|_TIF_MCE_NOTIFY|_TIF_PERFMON_WORK)
|
|
|
|
/* flags to check in __switch_to() */
|
|
#define _TIF_WORK_CTXSW \
|
|
(_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_BTS_TRACE_TS| \
|
|
- _TIF_NOTSC)
|
|
+ _TIF_NOTSC|_TIF_PERFMON_CTXSW)
|
|
|
|
#define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW
|
|
#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG)
|
|
diff --git a/include/asm-x86/unistd_32.h b/include/asm-x86/unistd_32.h
|
|
index d739467..5d8cca1 100644
|
|
--- a/include/asm-x86/unistd_32.h
|
|
+++ b/include/asm-x86/unistd_32.h
|
|
@@ -338,9 +338,23 @@
|
|
#define __NR_dup3 330
|
|
#define __NR_pipe2 331
|
|
#define __NR_inotify_init1 332
|
|
+#define __NR_pfm_create_context 333
|
|
+#define __NR_pfm_write_pmcs (__NR_pfm_create_context+1)
|
|
+#define __NR_pfm_write_pmds (__NR_pfm_create_context+2)
|
|
+#define __NR_pfm_read_pmds (__NR_pfm_create_context+3)
|
|
+#define __NR_pfm_load_context (__NR_pfm_create_context+4)
|
|
+#define __NR_pfm_start (__NR_pfm_create_context+5)
|
|
+#define __NR_pfm_stop (__NR_pfm_create_context+6)
|
|
+#define __NR_pfm_restart (__NR_pfm_create_context+7)
|
|
+#define __NR_pfm_create_evtsets (__NR_pfm_create_context+8)
|
|
+#define __NR_pfm_getinfo_evtsets (__NR_pfm_create_context+9)
|
|
+#define __NR_pfm_delete_evtsets (__NR_pfm_create_context+10)
|
|
+#define __NR_pfm_unload_context (__NR_pfm_create_context+11)
|
|
|
|
#ifdef __KERNEL__
|
|
|
|
+#define NR_syscalls 345
|
|
+
|
|
#define __ARCH_WANT_IPC_PARSE_VERSION
|
|
#define __ARCH_WANT_OLD_READDIR
|
|
#define __ARCH_WANT_OLD_STAT
|
|
diff --git a/include/asm-x86/unistd_64.h b/include/asm-x86/unistd_64.h
|
|
index 3a341d7..75dac98 100644
|
|
--- a/include/asm-x86/unistd_64.h
|
|
+++ b/include/asm-x86/unistd_64.h
|
|
@@ -653,7 +653,30 @@ __SYSCALL(__NR_dup3, sys_dup3)
|
|
__SYSCALL(__NR_pipe2, sys_pipe2)
|
|
#define __NR_inotify_init1 294
|
|
__SYSCALL(__NR_inotify_init1, sys_inotify_init1)
|
|
-
|
|
+#define __NR_pfm_create_context 295
|
|
+__SYSCALL(__NR_pfm_create_context, sys_pfm_create_context)
|
|
+#define __NR_pfm_write_pmcs (__NR_pfm_create_context+1)
|
|
+__SYSCALL(__NR_pfm_write_pmcs, sys_pfm_write_pmcs)
|
|
+#define __NR_pfm_write_pmds (__NR_pfm_create_context+2)
|
|
+__SYSCALL(__NR_pfm_write_pmds, sys_pfm_write_pmds)
|
|
+#define __NR_pfm_read_pmds (__NR_pfm_create_context+3)
|
|
+ __SYSCALL(__NR_pfm_read_pmds, sys_pfm_read_pmds)
|
|
+#define __NR_pfm_load_context (__NR_pfm_create_context+4)
|
|
+__SYSCALL(__NR_pfm_load_context, sys_pfm_load_context)
|
|
+#define __NR_pfm_start (__NR_pfm_create_context+5)
|
|
+__SYSCALL(__NR_pfm_start, sys_pfm_start)
|
|
+#define __NR_pfm_stop (__NR_pfm_create_context+6)
|
|
+__SYSCALL(__NR_pfm_stop, sys_pfm_stop)
|
|
+#define __NR_pfm_restart (__NR_pfm_create_context+7)
|
|
+__SYSCALL(__NR_pfm_restart, sys_pfm_restart)
|
|
+#define __NR_pfm_create_evtsets (__NR_pfm_create_context+8)
|
|
+__SYSCALL(__NR_pfm_create_evtsets, sys_pfm_create_evtsets)
|
|
+#define __NR_pfm_getinfo_evtsets (__NR_pfm_create_context+9)
|
|
+__SYSCALL(__NR_pfm_getinfo_evtsets, sys_pfm_getinfo_evtsets)
|
|
+#define __NR_pfm_delete_evtsets (__NR_pfm_create_context+10)
|
|
+__SYSCALL(__NR_pfm_delete_evtsets, sys_pfm_delete_evtsets)
|
|
+#define __NR_pfm_unload_context (__NR_pfm_create_context+11)
|
|
+__SYSCALL(__NR_pfm_unload_context, sys_pfm_unload_context)
|
|
|
|
#ifndef __NO_STUBS
|
|
#define __ARCH_WANT_OLD_READDIR
|
|
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
|
|
index b68ec09..d37036a 100644
|
|
--- a/include/linux/Kbuild
|
|
+++ b/include/linux/Kbuild
|
|
@@ -162,6 +162,8 @@ header-y += video_decoder.h
|
|
header-y += video_encoder.h
|
|
header-y += videotext.h
|
|
header-y += x25.h
|
|
+header-y += perfmon.h
|
|
+header-y += perfmon_dfl_smpl.h
|
|
|
|
unifdef-y += acct.h
|
|
unifdef-y += adb.h
|
|
diff --git a/include/linux/perfmon.h b/include/linux/perfmon.h
|
|
new file mode 100644
|
|
index 0000000..5d9b977
|
|
--- /dev/null
|
|
+++ b/include/linux/perfmon.h
|
|
@@ -0,0 +1,213 @@
|
|
+/*
|
|
+ * Copyright (c) 2001-2006 Hewlett-Packard Development Company, L.P.
|
|
+ * Contributed by Stephane Eranian <eranian@hpl.hp.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of version 2 of the GNU General Public
|
|
+ * License as published by the Free Software Foundation.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ * General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
+ * 02111-1307 USA
|
|
+ */
|
|
+
|
|
+#ifndef __LINUX_PERFMON_H__
|
|
+#define __LINUX_PERFMON_H__
|
|
+
|
|
+/*
|
|
+ * This file contains all the user visible generic definitions for the
|
|
+ * interface. Model-specific user-visible definitions are located in
|
|
+ * the asm/perfmon.h file.
|
|
+ */
|
|
+
|
|
+/*
|
|
+ * include arch-specific user interface definitions
|
|
+ */
|
|
+#include <asm/perfmon.h>
|
|
+
|
|
+/*
|
|
+ * defined by each arch
|
|
+ */
|
|
+#define PFM_MAX_PMCS PFM_ARCH_MAX_PMCS
|
|
+#define PFM_MAX_PMDS PFM_ARCH_MAX_PMDS
|
|
+
|
|
+/*
|
|
+ * number of elements for each type of bitvector
|
|
+ * all bitvectors use u64 fixed size type on all architectures.
|
|
+ */
|
|
+#define PFM_BVSIZE(x) (((x)+(sizeof(__u64)<<3)-1) / (sizeof(__u64)<<3))
|
|
+#define PFM_PMD_BV PFM_BVSIZE(PFM_MAX_PMDS)
|
|
+#define PFM_PMC_BV PFM_BVSIZE(PFM_MAX_PMCS)
|
|
+
|
|
+/*
|
|
+ * register flags layout:
|
|
+ * bit[00-15] : generic flags
|
|
+ * bit[16-31] : arch-specific flags
|
|
+ *
|
|
+ * PFM_REGFL_NO_EMUL64: must be set on the PMC controlling the PMD
|
|
+ */
|
|
+#define PFM_REGFL_OVFL_NOTIFY 0x1 /* PMD: send notification on event */
|
|
+#define PFM_REGFL_RANDOM 0x2 /* PMD: randomize value after event */
|
|
+#define PFM_REGFL_NO_EMUL64 0x4 /* PMC: no 64-bit emulation */
|
|
+
|
|
+/*
|
|
+ * event set flags layout:
|
|
+ * bits[00-15] : generic flags
|
|
+ * bits[16-31] : arch-specific flags (see asm/perfmon.h)
|
|
+ */
|
|
+#define PFM_SETFL_OVFL_SWITCH 0x01 /* enable switch on overflow */
|
|
+#define PFM_SETFL_TIME_SWITCH 0x02 /* enable switch on timeout */
|
|
+
|
|
+/*
|
|
+ * argument to pfm_create_context() system call
|
|
+ * structure shared with user level
|
|
+ */
|
|
+struct pfarg_ctx {
|
|
+ __u32 ctx_flags; /* noblock/block/syswide */
|
|
+ __u32 ctx_reserved1; /* for future use */
|
|
+ __u64 ctx_reserved2[7]; /* for future use */
|
|
+};
|
|
+
|
|
+/*
|
|
+ * context flags layout:
|
|
+ * bits[00-15]: generic flags
|
|
+ * bits[16-31]: arch-specific flags (see perfmon_const.h)
|
|
+ */
|
|
+#define PFM_FL_NOTIFY_BLOCK 0x01 /* block task on user notifications */
|
|
+#define PFM_FL_SYSTEM_WIDE 0x02 /* create a system wide context */
|
|
+#define PFM_FL_OVFL_NO_MSG 0x80 /* no overflow msgs */
|
|
+
|
|
+/*
|
|
+ * argument to pfm_write_pmcs() system call.
|
|
+ * structure shared with user level
|
|
+ */
|
|
+struct pfarg_pmc {
|
|
+ __u16 reg_num; /* which register */
|
|
+ __u16 reg_set; /* event set for this register */
|
|
+ __u32 reg_flags; /* REGFL flags */
|
|
+ __u64 reg_value; /* pmc value */
|
|
+ __u64 reg_reserved2[4]; /* for future use */
|
|
+};
|
|
+
|
|
+/*
|
|
+ * argument to pfm_write_pmds() and pfm_read_pmds() system calls.
|
|
+ * structure shared with user level
|
|
+ */
|
|
+struct pfarg_pmd {
|
|
+ __u16 reg_num; /* which register */
|
|
+ __u16 reg_set; /* event set for this register */
|
|
+ __u32 reg_flags; /* REGFL flags */
|
|
+ __u64 reg_value; /* initial pmc/pmd value */
|
|
+ __u64 reg_long_reset; /* value to reload after notification */
|
|
+ __u64 reg_short_reset; /* reset after counter overflow */
|
|
+ __u64 reg_last_reset_val; /* return: PMD last reset value */
|
|
+ __u64 reg_ovfl_switch_cnt; /* #overflows before switch */
|
|
+ __u64 reg_reset_pmds[PFM_PMD_BV]; /* reset on overflow */
|
|
+ __u64 reg_smpl_pmds[PFM_PMD_BV]; /* record in sample */
|
|
+ __u64 reg_smpl_eventid; /* opaque event identifier */
|
|
+ __u64 reg_random_mask; /* bitmask used to limit random value */
|
|
+ __u32 reg_random_seed; /* seed for randomization (OBSOLETE) */
|
|
+ __u32 reg_reserved2[7]; /* for future use */
|
|
+};
|
|
+
|
|
+/*
|
|
+ * optional argument to pfm_start() system call. Pass NULL if not needed.
|
|
+ * structure shared with user level
|
|
+ */
|
|
+struct pfarg_start {
|
|
+ __u16 start_set; /* event set to start with */
|
|
+ __u16 start_reserved1; /* for future use */
|
|
+ __u32 start_reserved2; /* for future use */
|
|
+ __u64 reserved3[3]; /* for future use */
|
|
+};
|
|
+
|
|
+/*
|
|
+ * argument to pfm_load_context() system call.
|
|
+ * structure shared with user level
|
|
+ */
|
|
+struct pfarg_load {
|
|
+ __u32 load_pid; /* thread or CPU to attach to */
|
|
+ __u16 load_set; /* set to load first */
|
|
+ __u16 load_reserved1; /* for future use */
|
|
+ __u64 load_reserved2[3]; /* for future use */
|
|
+};
|
|
+
|
|
+/*
|
|
+ * argument to pfm_create_evtsets() and pfm_delete_evtsets() system calls.
|
|
+ * structure shared with user level.
|
|
+ */
|
|
+struct pfarg_setdesc {
|
|
+ __u16 set_id; /* which set */
|
|
+ __u16 set_reserved1; /* for future use */
|
|
+ __u32 set_flags; /* SETFL flags */
|
|
+ __u64 set_timeout; /* switch timeout in nsecs */
|
|
+ __u64 reserved[6]; /* for future use */
|
|
+};
|
|
+
|
|
+/*
|
|
+ * argument to pfm_getinfo_evtsets() system call.
|
|
+ * structure shared with user level
|
|
+ */
|
|
+struct pfarg_setinfo {
|
|
+ __u16 set_id; /* which set */
|
|
+ __u16 set_reserved1; /* for future use */
|
|
+ __u32 set_flags; /* out: SETFL flags */
|
|
+ __u64 set_ovfl_pmds[PFM_PMD_BV]; /* out: last ovfl PMDs */
|
|
+ __u64 set_runs; /* out: #times the set was active */
|
|
+ __u64 set_timeout; /* out: eff/leftover timeout (nsecs) */
|
|
+ __u64 set_act_duration; /* out: time set was active in nsecs */
|
|
+ __u64 set_avail_pmcs[PFM_PMC_BV];/* out: available PMCs */
|
|
+ __u64 set_avail_pmds[PFM_PMD_BV];/* out: available PMDs */
|
|
+ __u64 set_reserved3[6]; /* for future use */
|
|
+};
|
|
+
|
|
+/*
|
|
+ * default value for the user and group security parameters in
|
|
+ * /proc/sys/kernel/perfmon/sys_group
|
|
+ * /proc/sys/kernel/perfmon/task_group
|
|
+ */
|
|
+#define PFM_GROUP_PERM_ANY -1 /* any user/group */
|
|
+
|
|
+/*
|
|
+ * overflow notification message.
|
|
+ * structure shared with user level
|
|
+ */
|
|
+struct pfarg_ovfl_msg {
|
|
+ __u32 msg_type; /* message type: PFM_MSG_OVFL */
|
|
+ __u32 msg_ovfl_pid; /* process id */
|
|
+ __u16 msg_active_set; /* active set at overflow */
|
|
+ __u16 msg_ovfl_cpu; /* cpu of PMU interrupt */
|
|
+ __u32 msg_ovfl_tid; /* thread id */
|
|
+ __u64 msg_ovfl_ip; /* IP on PMU intr */
|
|
+ __u64 msg_ovfl_pmds[PFM_PMD_BV];/* overflowed PMDs */
|
|
+};
|
|
+
|
|
+#define PFM_MSG_OVFL 1 /* an overflow happened */
|
|
+#define PFM_MSG_END 2 /* task to which context was attached ended */
|
|
+
|
|
+/*
|
|
+ * generic notification message (union).
|
|
+ * union shared with user level
|
|
+ */
|
|
+union pfarg_msg {
|
|
+ __u32 type;
|
|
+ struct pfarg_ovfl_msg pfm_ovfl_msg;
|
|
+};
|
|
+
|
|
+/*
|
|
+ * perfmon version number
|
|
+ */
|
|
+#define PFM_VERSION_MAJ 2U
|
|
+#define PFM_VERSION_MIN 82U
|
|
+#define PFM_VERSION (((PFM_VERSION_MAJ&0xffff)<<16)|\
|
|
+ (PFM_VERSION_MIN & 0xffff))
|
|
+#define PFM_VERSION_MAJOR(x) (((x)>>16) & 0xffff)
|
|
+#define PFM_VERSION_MINOR(x) ((x) & 0xffff)
|
|
+
|
|
+#endif /* __LINUX_PERFMON_H__ */
|
|
diff --git a/include/linux/perfmon_dfl_smpl.h b/include/linux/perfmon_dfl_smpl.h
|
|
new file mode 100644
|
|
index 0000000..e0817a8
|
|
--- /dev/null
|
|
+++ b/include/linux/perfmon_dfl_smpl.h
|
|
@@ -0,0 +1,78 @@
|
|
+/*
|
|
+ * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P.
|
|
+ * Contributed by Stephane Eranian <eranian@hpl.hp.com>
|
|
+ *
|
|
+ * This file implements the new dfl sampling buffer format
|
|
+ * for perfmon2 subsystem.
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of version 2 of the GNU General Public
|
|
+ * License as published by the Free Software Foundation.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ * General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
+ * 02111-1307 USA
|
|
+ */
|
|
+#ifndef __PERFMON_DFL_SMPL_H__
|
|
+#define __PERFMON_DFL_SMPL_H__ 1
|
|
+
|
|
+/*
|
|
+ * format specific parameters (passed at context creation)
|
|
+ */
|
|
+struct pfm_dfl_smpl_arg {
|
|
+ __u64 buf_size; /* size of the buffer in bytes */
|
|
+ __u32 buf_flags; /* buffer specific flags */
|
|
+ __u32 reserved1; /* for future use */
|
|
+ __u64 reserved[6]; /* for future use */
|
|
+};
|
|
+
|
|
+/*
|
|
+ * This header is at the beginning of the sampling buffer returned to the user.
|
|
+ * It is directly followed by the first record.
|
|
+ */
|
|
+struct pfm_dfl_smpl_hdr {
|
|
+ __u64 hdr_count; /* how many valid entries */
|
|
+ __u64 hdr_cur_offs; /* current offset from top of buffer */
|
|
+ __u64 hdr_overflows; /* #overflows for buffer */
|
|
+ __u64 hdr_buf_size; /* bytes in the buffer */
|
|
+ __u64 hdr_min_buf_space;/* minimal buffer size (internal use) */
|
|
+ __u32 hdr_version; /* smpl format version */
|
|
+ __u32 hdr_buf_flags; /* copy of buf_flags */
|
|
+ __u64 hdr_reserved[10]; /* for future use */
|
|
+};
|
|
+
|
|
+/*
|
|
+ * Entry header in the sampling buffer. The header is directly followed
|
|
+ * with the values of the PMD registers of interest saved in increasing
|
|
+ * index order: PMD4, PMD5, and so on. How many PMDs are present depends
|
|
+ * on how the session was programmed.
|
|
+ *
|
|
+ * In the case where multiple counters overflow at the same time, multiple
|
|
+ * entries are written consecutively.
|
|
+ *
|
|
+ * last_reset_value member indicates the initial value of the overflowed PMD.
|
|
+ */
|
|
+struct pfm_dfl_smpl_entry {
|
|
+ __u32 pid; /* thread id (for NPTL, this is gettid()) */
|
|
+ __u16 ovfl_pmd; /* index of overflowed PMD for this sample */
|
|
+ __u16 reserved; /* for future use */
|
|
+ __u64 last_reset_val; /* initial value of overflowed PMD */
|
|
+ __u64 ip; /* where did the overflow intr happened */
|
|
+ __u64 tstamp; /* overflow timetamp */
|
|
+ __u16 cpu; /* cpu on which the overfow occurred */
|
|
+ __u16 set; /* event set active when overflow ocurred */
|
|
+ __u32 tgid; /* thread group id (getpid() for NPTL) */
|
|
+};
|
|
+
|
|
+#define PFM_DFL_SMPL_VERSION_MAJ 1U
|
|
+#define PFM_DFL_SMPL_VERSION_MIN 0U
|
|
+#define PFM_DFL_SMPL_VERSION (((PFM_DFL_SMPL_VERSION_MAJ&0xffff)<<16)|\
|
|
+ (PFM_DFL_SMPL_VERSION_MIN & 0xffff))
|
|
+
|
|
+#endif /* __PERFMON_DFL_SMPL_H__ */
|
|
diff --git a/include/linux/perfmon_fmt.h b/include/linux/perfmon_fmt.h
|
|
new file mode 100644
|
|
index 0000000..82a6a90
|
|
--- /dev/null
|
|
+++ b/include/linux/perfmon_fmt.h
|
|
@@ -0,0 +1,74 @@
|
|
+/*
|
|
+ * Copyright (c) 2001-2006 Hewlett-Packard Development Company, L.P.
|
|
+ * Contributed by Stephane Eranian <eranian@hpl.hp.com>
|
|
+ *
|
|
+ * Interface for custom sampling buffer format modules
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of version 2 of the GNU General Public
|
|
+ * License as published by the Free Software Foundation.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ * General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
+ * 02111-1307 USA
|
|
+ */
|
|
+#ifndef __PERFMON_FMT_H__
|
|
+#define __PERFMON_FMT_H__ 1
|
|
+
|
|
+#include <linux/kobject.h>
|
|
+
|
|
+typedef int (*fmt_validate_t)(u32 flags, u16 npmds, void *arg);
|
|
+typedef int (*fmt_getsize_t)(u32 flags, void *arg, size_t *size);
|
|
+typedef int (*fmt_init_t)(struct pfm_context *ctx, void *buf, u32 flags,
|
|
+ u16 nmpds, void *arg);
|
|
+typedef int (*fmt_restart_t)(int is_active, u32 *ovfl_ctrl, void *buf);
|
|
+typedef int (*fmt_exit_t)(void *buf);
|
|
+typedef int (*fmt_handler_t)(struct pfm_context *ctx,
|
|
+ unsigned long ip, u64 stamp, void *data);
|
|
+
|
|
+struct pfm_smpl_fmt {
|
|
+ char *fmt_name; /* name of the format (required) */
|
|
+ size_t fmt_arg_size; /* size of fmt args for ctx create */
|
|
+ u32 fmt_flags; /* format specific flags */
|
|
+ u32 fmt_version; /* format version number */
|
|
+
|
|
+ fmt_validate_t fmt_validate; /* validate context flags */
|
|
+ fmt_getsize_t fmt_getsize; /* get size for sampling buffer */
|
|
+ fmt_init_t fmt_init; /* initialize buffer area */
|
|
+ fmt_handler_t fmt_handler; /* overflow handler (required) */
|
|
+ fmt_restart_t fmt_restart; /* restart after notification */
|
|
+ fmt_exit_t fmt_exit; /* context termination */
|
|
+
|
|
+ struct list_head fmt_list; /* internal use only */
|
|
+
|
|
+ struct kobject kobj; /* sysfs internal use only */
|
|
+ struct module *owner; /* pointer to module owner */
|
|
+ u32 fmt_qdepth; /* Max notify queue depth (required) */
|
|
+};
|
|
+#define to_smpl_fmt(n) container_of(n, struct pfm_smpl_fmt, kobj)
|
|
+
|
|
+#define PFM_FMTFL_IS_BUILTIN 0x1 /* fmt is compiled in */
|
|
+/*
|
|
+ * we need to know whether the format is builtin or compiled
|
|
+ * as a module
|
|
+ */
|
|
+#ifdef MODULE
|
|
+#define PFM_FMT_BUILTIN_FLAG 0 /* not built as a module */
|
|
+#else
|
|
+#define PFM_FMT_BUILTIN_FLAG PFM_PMUFL_IS_BUILTIN /* built as a module */
|
|
+#endif
|
|
+
|
|
+int pfm_fmt_register(struct pfm_smpl_fmt *fmt);
|
|
+int pfm_fmt_unregister(struct pfm_smpl_fmt *fmt);
|
|
+void pfm_sysfs_builtin_fmt_add(void);
|
|
+
|
|
+int pfm_sysfs_add_fmt(struct pfm_smpl_fmt *fmt);
|
|
+void pfm_sysfs_remove_fmt(struct pfm_smpl_fmt *fmt);
|
|
+
|
|
+#endif /* __PERFMON_FMT_H__ */
|
|
diff --git a/include/linux/perfmon_kern.h b/include/linux/perfmon_kern.h
|
|
new file mode 100644
|
|
index 0000000..6c3b527
|
|
--- /dev/null
|
|
+++ b/include/linux/perfmon_kern.h
|
|
@@ -0,0 +1,551 @@
|
|
+/*
|
|
+ * Copyright (c) 2001-2006 Hewlett-Packard Development Company, L.P.
|
|
+ * Contributed by Stephane Eranian <eranian@hpl.hp.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of version 2 of the GNU General Public
|
|
+ * License as published by the Free Software Foundation.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ * General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
+ * 02111-1307 USA
|
|
+ */
|
|
+
|
|
+#ifndef __LINUX_PERFMON_KERN_H__
|
|
+#define __LINUX_PERFMON_KERN_H__
|
|
+/*
|
|
+ * This file contains all the definitions of data structures, variables, macros
|
|
+ * that are to be shared between generic code and arch-specific code
|
|
+ *
|
|
+ * For generic only definitions, use perfmon/perfmon_priv.h
|
|
+ */
|
|
+#ifdef CONFIG_PERFMON
|
|
+
|
|
+#include <linux/file.h>
|
|
+#include <linux/sched.h>
|
|
+#include <linux/perfmon.h>
|
|
+
|
|
+/*
|
|
+ * system adminstrator configuration controls available via
|
|
+ * the /sys/kerne/perfmon interface
|
|
+ */
|
|
+struct pfm_controls {
|
|
+ u32 debug; /* debugging control bitmask */
|
|
+ gid_t sys_group; /* gid to create a syswide context */
|
|
+ gid_t task_group; /* gid to create a per-task context */
|
|
+ u32 flags; /* control flags (see below) */
|
|
+ size_t arg_mem_max; /* maximum vector argument size */
|
|
+ size_t smpl_buffer_mem_max; /* max buf mem, -1 for infinity */
|
|
+};
|
|
+extern struct pfm_controls pfm_controls;
|
|
+
|
|
+/*
|
|
+ * control flags
|
|
+ */
|
|
+#define PFM_CTRL_FL_RW_EXPERT 0x1 /* bypass reserved fields on read/write */
|
|
+
|
|
+/*
|
|
+ * software PMD
|
|
+ */
|
|
+struct pfm_pmd {
|
|
+ u64 value; /* 64-bit value */
|
|
+ u64 lval; /* last reset value */
|
|
+ u64 ovflsw_thres; /* #ovfls left before switch */
|
|
+ u64 long_reset; /* long reset value on overflow */
|
|
+ u64 short_reset; /* short reset value on overflow */
|
|
+ u64 reset_pmds[PFM_PMD_BV]; /* pmds to reset on overflow */
|
|
+ u64 smpl_pmds[PFM_PMD_BV]; /* pmds to record on overflow */
|
|
+ u64 mask; /* range mask for random value */
|
|
+ u64 ovflsw_ref_thres; /* #ovfls before next set */
|
|
+ u64 eventid; /* opaque event identifier */
|
|
+ u32 flags; /* notify/do not notify */
|
|
+};
|
|
+
|
|
+/*
|
|
+ * event_set: encapsulates the full PMU state
|
|
+ */
|
|
+struct pfm_event_set {
|
|
+ struct list_head list; /* ordered chain of sets */
|
|
+ u16 id; /* set identification */
|
|
+ u16 nused_pmds; /* max number of used PMDs */
|
|
+ u16 nused_pmcs; /* max number of used PMCs */
|
|
+ u16 pad1; /* paddding */
|
|
+ u32 flags; /* public flags */
|
|
+ u32 priv_flags; /* private flags (see below) */
|
|
+ u64 runs; /* # of activations */
|
|
+ u32 npend_ovfls; /* number of pending PMD overflow */
|
|
+ u32 pad2; /* padding */
|
|
+ u64 used_pmds[PFM_PMD_BV]; /* used PMDs */
|
|
+ u64 povfl_pmds[PFM_PMD_BV]; /* pending overflowed PMDs */
|
|
+ u64 ovfl_pmds[PFM_PMD_BV]; /* last overflowed PMDs */
|
|
+ u64 reset_pmds[PFM_PMD_BV]; /* PMDs to reset after overflow */
|
|
+ u64 ovfl_notify[PFM_PMD_BV]; /* notify on overflow */
|
|
+ u64 used_pmcs[PFM_PMC_BV]; /* used PMCs */
|
|
+ u64 pmcs[PFM_MAX_PMCS]; /* PMC values */
|
|
+
|
|
+ struct pfm_pmd pmds[PFM_MAX_PMDS];
|
|
+
|
|
+ ktime_t hrtimer_exp; /* switch timeout reference */
|
|
+ ktime_t hrtimer_rem; /* per-thread remainder timeout */
|
|
+
|
|
+ u64 duration_start; /* start time in ns */
|
|
+ u64 duration; /* total active ns */
|
|
+};
|
|
+
|
|
+/*
|
|
+ * common private event set flags (priv_flags)
|
|
+ *
|
|
+ * upper 16 bits: for arch-specific use
|
|
+ * lower 16 bits: for common use
|
|
+ */
|
|
+#define PFM_SETFL_PRIV_MOD_PMDS 0x1 /* PMD register(s) modified */
|
|
+#define PFM_SETFL_PRIV_MOD_PMCS 0x2 /* PMC register(s) modified */
|
|
+#define PFM_SETFL_PRIV_SWITCH 0x4 /* must switch set on restart */
|
|
+#define PFM_SETFL_PRIV_MOD_BOTH (PFM_SETFL_PRIV_MOD_PMDS \
|
|
+ | PFM_SETFL_PRIV_MOD_PMCS)
|
|
+
|
|
+/*
|
|
+ * context flags
|
|
+ */
|
|
+struct pfm_context_flags {
|
|
+ unsigned int block:1; /* task blocks on user notifications */
|
|
+ unsigned int system:1; /* do system wide monitoring */
|
|
+ unsigned int no_msg:1; /* no message sent on overflow */
|
|
+ unsigned int switch_ovfl:1; /* switch set on counter ovfl */
|
|
+ unsigned int switch_time:1; /* switch set on timeout */
|
|
+ unsigned int started:1; /* pfm_start() issued */
|
|
+ unsigned int work_type:2; /* type of work for pfm_handle_work */
|
|
+ unsigned int mmap_nlock:1; /* no lock in pfm_release_buf_space */
|
|
+ unsigned int ia64_v20_compat:1; /* context is IA-64 v2.0 mode */
|
|
+ unsigned int can_restart:8; /* allowed to issue a PFM_RESTART */
|
|
+ unsigned int reset_count:8; /* number of pending resets */
|
|
+ unsigned int is_self:1; /* per-thread and self-montoring */
|
|
+ unsigned int reserved:5; /* for future use */
|
|
+};
|
|
+
|
|
+/*
|
|
+ * values for work_type (TIF_PERFMON_WORK must be set)
|
|
+ */
|
|
+#define PFM_WORK_NONE 0 /* nothing to do */
|
|
+#define PFM_WORK_RESET 1 /* reset overflowed counters */
|
|
+#define PFM_WORK_BLOCK 2 /* block current thread */
|
|
+#define PFM_WORK_ZOMBIE 3 /* cleanup zombie context */
|
|
+
|
|
+/*
|
|
+ * overflow description argument passed to sampling format
|
|
+ */
|
|
+struct pfm_ovfl_arg {
|
|
+ u16 ovfl_pmd; /* index of overflowed PMD */
|
|
+ u16 active_set; /* set active at the time of the overflow */
|
|
+ u32 ovfl_ctrl; /* control flags */
|
|
+ u64 pmd_last_reset; /* last reset value of overflowed PMD */
|
|
+ u64 smpl_pmds_values[PFM_MAX_PMDS]; /* values of other PMDs */
|
|
+ u64 pmd_eventid; /* eventid associated with PMD */
|
|
+ u16 num_smpl_pmds; /* number of PMDS in smpl_pmd_values */
|
|
+};
|
|
+/*
|
|
+ * depth of message queue
|
|
+ *
|
|
+ * Depth cannot be bigger than 255 (see reset_count)
|
|
+ */
|
|
+#define PFM_MSGS_ORDER 3 /* log2(number of messages) */
|
|
+#define PFM_MSGS_COUNT (1<<PFM_MSGS_ORDER) /* number of messages */
|
|
+#define PFM_MSGQ_MASK (PFM_MSGS_COUNT-1)
|
|
+
|
|
+/*
|
|
+ * perfmon context state
|
|
+ */
|
|
+#define PFM_CTX_UNLOADED 1 /* context is not loaded onto any task */
|
|
+#define PFM_CTX_LOADED 2 /* context is loaded onto a task */
|
|
+#define PFM_CTX_MASKED 3 /* context is loaded, monitoring is masked */
|
|
+#define PFM_CTX_ZOMBIE 4 /* context lost owner but still attached */
|
|
+
|
|
+/*
|
|
+ * registers description
|
|
+ */
|
|
+struct pfm_regdesc {
|
|
+ u64 pmcs[PFM_PMC_BV]; /* available PMC */
|
|
+ u64 pmds[PFM_PMD_BV]; /* available PMD */
|
|
+ u64 rw_pmds[PFM_PMD_BV]; /* available RW PMD */
|
|
+ u64 intr_pmds[PFM_PMD_BV]; /* PMD generating intr */
|
|
+ u64 cnt_pmds[PFM_PMD_BV]; /* PMD counters */
|
|
+ u16 max_pmc; /* highest+1 avail PMC */
|
|
+ u16 max_pmd; /* highest+1 avail PMD */
|
|
+ u16 max_rw_pmd; /* highest+1 avail RW PMD */
|
|
+ u16 first_intr_pmd; /* first intr PMD */
|
|
+ u16 max_intr_pmd; /* highest+1 intr PMD */
|
|
+ u16 num_rw_pmd; /* number of avail RW PMD */
|
|
+ u16 num_pmcs; /* number of logical PMCS */
|
|
+ u16 num_pmds; /* number of logical PMDS */
|
|
+ u16 num_counters; /* number of counting PMD */
|
|
+};
|
|
+
|
|
+/*
|
|
+ * context: contains all the state of a session
|
|
+ */
|
|
+struct pfm_context {
|
|
+ spinlock_t lock; /* context protection */
|
|
+
|
|
+ struct pfm_context_flags flags;
|
|
+ u32 state; /* current state */
|
|
+ struct task_struct *task; /* attached task */
|
|
+
|
|
+ struct completion restart_complete;/* block on notification */
|
|
+ u64 last_act; /* last activation */
|
|
+ u32 last_cpu; /* last CPU used (SMP only) */
|
|
+ u32 cpu; /* cpu bound to context */
|
|
+
|
|
+ struct pfm_smpl_fmt *smpl_fmt; /* sampling format callbacks */
|
|
+ void *smpl_addr; /* user smpl buffer base */
|
|
+ size_t smpl_size; /* user smpl buffer size */
|
|
+ void *smpl_real_addr;/* actual smpl buffer base */
|
|
+ size_t smpl_real_size; /* actual smpl buffer size */
|
|
+
|
|
+ wait_queue_head_t msgq_wait; /* pfm_read() wait queue */
|
|
+
|
|
+ union pfarg_msg msgq[PFM_MSGS_COUNT];
|
|
+ int msgq_head;
|
|
+ int msgq_tail;
|
|
+
|
|
+ struct fasync_struct *async_queue; /* async notification */
|
|
+
|
|
+ struct pfm_event_set *active_set; /* active set */
|
|
+ struct list_head set_list; /* ordered list of sets */
|
|
+
|
|
+ struct pfm_regdesc regs; /* registers available to context */
|
|
+
|
|
+ /*
|
|
+ * save stack space by allocating temporary variables for
|
|
+ * pfm_overflow_handler() in pfm_context
|
|
+ */
|
|
+ struct pfm_ovfl_arg ovfl_arg;
|
|
+ u64 tmp_ovfl_notify[PFM_PMD_BV];
|
|
+};
|
|
+
|
|
+/*
|
|
+ * ovfl_ctrl bitmask (used by interrupt handler)
|
|
+ */
|
|
+#define PFM_OVFL_CTRL_NOTIFY 0x1 /* notify user */
|
|
+#define PFM_OVFL_CTRL_RESET 0x2 /* reset overflowed pmds */
|
|
+#define PFM_OVFL_CTRL_MASK 0x4 /* mask monitoring */
|
|
+#define PFM_OVFL_CTRL_SWITCH 0x8 /* switch sets */
|
|
+
|
|
+/*
|
|
+ * logging
|
|
+ */
|
|
+#define PFM_ERR(f, x...) printk(KERN_ERR "perfmon: " f "\n", ## x)
|
|
+#define PFM_WARN(f, x...) printk(KERN_WARNING "perfmon: " f "\n", ## x)
|
|
+#define PFM_LOG(f, x...) printk(KERN_NOTICE "perfmon: " f "\n", ## x)
|
|
+#define PFM_INFO(f, x...) printk(KERN_INFO "perfmon: " f "\n", ## x)
|
|
+
|
|
+/*
|
|
+ * debugging
|
|
+ *
|
|
+ * Printk rate limiting is enforced to avoid getting flooded with too many
|
|
+ * error messages on the console (which could render the machine unresponsive).
|
|
+ * To get full debug output (turn off ratelimit):
|
|
+ * $ echo 0 >/proc/sys/kernel/printk_ratelimit
|
|
+ *
|
|
+ * debug is a bitmask where bits are defined as follows:
|
|
+ * bit 0: enable non-interrupt code degbug messages
|
|
+ * bit 1: enable interrupt code debug messages
|
|
+ */
|
|
+#ifdef CONFIG_PERFMON_DEBUG
|
|
+#define _PFM_DBG(lm, f, x...) \
|
|
+ do { \
|
|
+ if (unlikely((pfm_controls.debug & lm) && printk_ratelimit())) { \
|
|
+ preempt_disable(); \
|
|
+ printk("perfmon: %s.%d: CPU%d [%d]: " f "\n", \
|
|
+ __func__, __LINE__, \
|
|
+ smp_processor_id(), current->pid , ## x); \
|
|
+ preempt_enable(); \
|
|
+ } \
|
|
+ } while (0)
|
|
+
|
|
+#define PFM_DBG(f, x...) _PFM_DBG(0x1, f, ##x)
|
|
+#define PFM_DBG_ovfl(f, x...) _PFM_DBG(0x2, f, ## x)
|
|
+#else
|
|
+#define PFM_DBG(f, x...) do {} while (0)
|
|
+#define PFM_DBG_ovfl(f, x...) do {} while (0)
|
|
+#endif
|
|
+
|
|
+extern struct pfm_pmu_config *pfm_pmu_conf;
|
|
+extern int perfmon_disabled;
|
|
+
|
|
+static inline struct pfm_arch_context *pfm_ctx_arch(struct pfm_context *c)
|
|
+{
|
|
+ return (struct pfm_arch_context *)(c+1);
|
|
+}
|
|
+
|
|
+int pfm_get_args(void __user *ureq, size_t sz, size_t lsz, void *laddr,
|
|
+ void **req, void **to_free);
|
|
+
|
|
+int pfm_get_smpl_arg(char __user *fmt_uname, void __user *uaddr, size_t usize,
|
|
+ void **arg, struct pfm_smpl_fmt **fmt);
|
|
+
|
|
+int __pfm_write_pmcs(struct pfm_context *ctx, struct pfarg_pmc *req,
|
|
+ int count);
|
|
+int __pfm_write_pmds(struct pfm_context *ctx, struct pfarg_pmd *req, int count,
|
|
+ int compat);
|
|
+int __pfm_read_pmds(struct pfm_context *ctx, struct pfarg_pmd *req, int count);
|
|
+
|
|
+int __pfm_load_context(struct pfm_context *ctx, struct pfarg_load *req,
|
|
+ struct task_struct *task);
|
|
+int __pfm_unload_context(struct pfm_context *ctx, int *can_release);
|
|
+
|
|
+int __pfm_stop(struct pfm_context *ctx, int *release_info);
|
|
+int __pfm_restart(struct pfm_context *ctx, int *unblock);
|
|
+int __pfm_start(struct pfm_context *ctx, struct pfarg_start *start);
|
|
+
|
|
+void pfm_free_context(struct pfm_context *ctx);
|
|
+
|
|
+void pfm_smpl_buf_space_release(struct pfm_context *ctx, size_t size);
|
|
+
|
|
+int pfm_check_task_state(struct pfm_context *ctx, int check_mask,
|
|
+ unsigned long *flags, void **resume);
|
|
+/*
|
|
+ * check_mask bitmask values for pfm_check_task_state()
|
|
+ */
|
|
+#define PFM_CMD_STOPPED 0x01 /* command needs thread stopped */
|
|
+#define PFM_CMD_UNLOADED 0x02 /* command needs ctx unloaded */
|
|
+#define PFM_CMD_UNLOAD 0x04 /* command is unload */
|
|
+
|
|
+int __pfm_create_context(struct pfarg_ctx *req,
|
|
+ struct pfm_smpl_fmt *fmt,
|
|
+ void *fmt_arg,
|
|
+ int mode,
|
|
+ struct pfm_context **new_ctx);
|
|
+
|
|
+struct pfm_event_set *pfm_find_set(struct pfm_context *ctx, u16 set_id,
|
|
+ int alloc);
|
|
+
|
|
+int pfm_pmu_conf_get(int autoload);
|
|
+void pfm_pmu_conf_put(void);
|
|
+
|
|
+int pfm_session_allcpus_acquire(void);
|
|
+void pfm_session_allcpus_release(void);
|
|
+
|
|
+int pfm_smpl_buf_alloc(struct pfm_context *ctx, size_t rsize);
|
|
+void pfm_smpl_buf_free(struct pfm_context *ctx);
|
|
+
|
|
+struct pfm_smpl_fmt *pfm_smpl_fmt_get(char *name);
|
|
+void pfm_smpl_fmt_put(struct pfm_smpl_fmt *fmt);
|
|
+
|
|
+void pfm_interrupt_handler(unsigned long iip, struct pt_regs *regs);
|
|
+
|
|
+void pfm_resume_task(struct task_struct *t, void *data);
|
|
+
|
|
+#include <linux/perfmon_pmu.h>
|
|
+#include <linux/perfmon_fmt.h>
|
|
+
|
|
+extern const struct file_operations pfm_file_ops;
|
|
+/*
|
|
+ * upper limit for count in calls that take vector arguments. This is used
|
|
+ * to prevent for multiplication overflow when we compute actual storage size
|
|
+ */
|
|
+#define PFM_MAX_ARG_COUNT(m) (INT_MAX/sizeof(*(m)))
|
|
+
|
|
+#define cast_ulp(_x) ((unsigned long *)_x)
|
|
+
|
|
+#define PFM_NORMAL 0
|
|
+#define PFM_COMPAT 1
|
|
+
|
|
+void __pfm_exit_thread(void);
|
|
+void pfm_ctxsw_in(struct task_struct *prev, struct task_struct *next);
|
|
+void pfm_ctxsw_out(struct task_struct *prev, struct task_struct *next);
|
|
+void pfm_handle_work(struct pt_regs *regs);
|
|
+void __pfm_init_percpu(void *dummy);
|
|
+void pfm_save_pmds(struct pfm_context *ctx, struct pfm_event_set *set);
|
|
+
|
|
+static inline void pfm_exit_thread(void)
|
|
+{
|
|
+ if (current->pfm_context)
|
|
+ __pfm_exit_thread();
|
|
+}
|
|
+
|
|
+/*
|
|
+ * include arch-specific kernel level definitions
|
|
+ */
|
|
+#include <asm/perfmon_kern.h>
|
|
+
|
|
+static inline void pfm_copy_thread(struct task_struct *task)
|
|
+{
|
|
+ /*
|
|
+ * context or perfmon TIF state is NEVER inherited
|
|
+ * in child task. Holds for per-thread and system-wide
|
|
+ */
|
|
+ task->pfm_context = NULL;
|
|
+ clear_tsk_thread_flag(task, TIF_PERFMON_CTXSW);
|
|
+ clear_tsk_thread_flag(task, TIF_PERFMON_WORK);
|
|
+ pfm_arch_disarm_handle_work(task);
|
|
+}
|
|
+
|
|
+
|
|
+/*
|
|
+ * read a single PMD register.
|
|
+ *
|
|
+ * virtual PMD registers have special handler.
|
|
+ * Depends on definitions in asm/perfmon_kern.h
|
|
+ */
|
|
+static inline u64 pfm_read_pmd(struct pfm_context *ctx, unsigned int cnum)
|
|
+{
|
|
+ if (unlikely(pfm_pmu_conf->pmd_desc[cnum].type & PFM_REG_V))
|
|
+ return pfm_pmu_conf->pmd_sread(ctx, cnum);
|
|
+
|
|
+ return pfm_arch_read_pmd(ctx, cnum);
|
|
+}
|
|
+/*
|
|
+ * write a single PMD register.
|
|
+ *
|
|
+ * virtual PMD registers have special handler.
|
|
+ * Depends on definitions in asm/perfmon_kern.h
|
|
+ */
|
|
+static inline void pfm_write_pmd(struct pfm_context *ctx, unsigned int cnum,
|
|
+ u64 value)
|
|
+{
|
|
+ /*
|
|
+ * PMD writes are ignored for read-only registers
|
|
+ */
|
|
+ if (pfm_pmu_conf->pmd_desc[cnum].type & PFM_REG_RO)
|
|
+ return;
|
|
+
|
|
+ if (pfm_pmu_conf->pmd_desc[cnum].type & PFM_REG_V) {
|
|
+ pfm_pmu_conf->pmd_swrite(ctx, cnum, value);
|
|
+ return;
|
|
+ }
|
|
+ /*
|
|
+ * clear unimplemented bits
|
|
+ */
|
|
+ value &= ~pfm_pmu_conf->pmd_desc[cnum].rsvd_msk;
|
|
+
|
|
+ pfm_arch_write_pmd(ctx, cnum, value);
|
|
+}
|
|
+
|
|
+void __pfm_init_percpu(void *dummy);
|
|
+
|
|
+static inline void pfm_init_percpu(void)
|
|
+{
|
|
+ __pfm_init_percpu(NULL);
|
|
+}
|
|
+
|
|
+/*
|
|
+ * pfm statistics are available via debugfs
|
|
+ * and perfmon subdir.
|
|
+ *
|
|
+ * When adding/removing new stats, make sure you also
|
|
+ * update the name table in perfmon_debugfs.c
|
|
+ */
|
|
+enum pfm_stats_names {
|
|
+ PFM_ST_ovfl_intr_all_count = 0,
|
|
+ PFM_ST_ovfl_intr_ns,
|
|
+ PFM_ST_ovfl_intr_spurious_count,
|
|
+ PFM_ST_ovfl_intr_replay_count,
|
|
+ PFM_ST_ovfl_intr_regular_count,
|
|
+ PFM_ST_handle_work_count,
|
|
+ PFM_ST_ovfl_notify_count,
|
|
+ PFM_ST_reset_pmds_count,
|
|
+ PFM_ST_pfm_restart_count,
|
|
+ PFM_ST_fmt_handler_calls,
|
|
+ PFM_ST_fmt_handler_ns,
|
|
+ PFM_ST_set_switch_count,
|
|
+ PFM_ST_set_switch_ns,
|
|
+ PFM_ST_set_switch_exp,
|
|
+ PFM_ST_ctxswin_count,
|
|
+ PFM_ST_ctxswin_ns,
|
|
+ PFM_ST_handle_timeout_count,
|
|
+ PFM_ST_ovfl_intr_nmi_count,
|
|
+ PFM_ST_ctxswout_count,
|
|
+ PFM_ST_ctxswout_ns,
|
|
+ PFM_ST_LAST /* last entry marked */
|
|
+};
|
|
+#define PFM_NUM_STATS PFM_ST_LAST
|
|
+
|
|
+struct pfm_stats {
|
|
+ u64 v[PFM_NUM_STATS];
|
|
+ struct dentry *dirs[PFM_NUM_STATS];
|
|
+ struct dentry *cpu_dir;
|
|
+ char cpu_name[8];
|
|
+};
|
|
+
|
|
+#ifdef CONFIG_PERFMON_DEBUG_FS
|
|
+#define pfm_stats_get(x) __get_cpu_var(pfm_stats).v[PFM_ST_##x]
|
|
+#define pfm_stats_inc(x) __get_cpu_var(pfm_stats).v[PFM_ST_##x]++
|
|
+#define pfm_stats_add(x, y) __get_cpu_var(pfm_stats).v[PFM_ST_##x] += (y)
|
|
+void pfm_reset_stats(int cpu);
|
|
+#else
|
|
+#define pfm_stats_get(x)
|
|
+#define pfm_stats_inc(x)
|
|
+#define pfm_stats_add(x, y)
|
|
+static inline void pfm_reset_stats(int cpu)
|
|
+{}
|
|
+#endif
|
|
+
|
|
+
|
|
+
|
|
+DECLARE_PER_CPU(struct pfm_context *, pmu_ctx);
|
|
+DECLARE_PER_CPU(struct pfm_stats, pfm_stats);
|
|
+DECLARE_PER_CPU(struct task_struct *, pmu_owner);
|
|
+
|
|
+void pfm_cpu_disable(void);
|
|
+
|
|
+
|
|
+/*
|
|
+ * max vector argument elements for local storage (no kmalloc/kfree)
|
|
+ * The PFM_ARCH_PM*_ARG should be defined in perfmon_kern.h.
|
|
+ * If not, default (conservative) values are used
|
|
+ */
|
|
+#ifndef PFM_ARCH_PMC_STK_ARG
|
|
+#define PFM_ARCH_PMC_STK_ARG 1
|
|
+#endif
|
|
+
|
|
+#ifndef PFM_ARCH_PMD_STK_ARG
|
|
+#define PFM_ARCH_PMD_STK_ARG 1
|
|
+#endif
|
|
+
|
|
+#define PFM_PMC_STK_ARG PFM_ARCH_PMC_STK_ARG
|
|
+#define PFM_PMD_STK_ARG PFM_ARCH_PMD_STK_ARG
|
|
+
|
|
+#else /* !CONFIG_PERFMON */
|
|
+
|
|
+
|
|
+/*
|
|
+ * perfmon hooks are nops when CONFIG_PERFMON is undefined
|
|
+ */
|
|
+static inline void pfm_cpu_disable(void)
|
|
+{}
|
|
+
|
|
+static inline void pfm_exit_thread(void)
|
|
+{}
|
|
+
|
|
+static inline void pfm_handle_work(struct pt_regs *regs)
|
|
+{}
|
|
+
|
|
+static inline void pfm_copy_thread(struct task_struct *t)
|
|
+{}
|
|
+
|
|
+static inline void pfm_ctxsw_in(struct task_struct *p, struct task_struct *n)
|
|
+{}
|
|
+
|
|
+static inline void pfm_ctxsw_out(struct task_struct *p, struct task_struct *n)
|
|
+{}
|
|
+
|
|
+static inline void pfm_session_allcpus_release(void)
|
|
+{}
|
|
+
|
|
+static inline int pfm_session_allcpus_acquire(void)
|
|
+{
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static inline void pfm_init_percpu(void)
|
|
+{}
|
|
+
|
|
+#endif /* CONFIG_PERFMON */
|
|
+
|
|
+#endif /* __LINUX_PERFMON_KERN_H__ */
|
|
diff --git a/include/linux/perfmon_pmu.h b/include/linux/perfmon_pmu.h
|
|
new file mode 100644
|
|
index 0000000..3f5f9e8
|
|
--- /dev/null
|
|
+++ b/include/linux/perfmon_pmu.h
|
|
@@ -0,0 +1,192 @@
|
|
+/*
|
|
+ * Copyright (c) 2006 Hewlett-Packard Development Company, L.P.
|
|
+ * Contributed by Stephane Eranian <eranian@hpl.hp.com>
|
|
+ *
|
|
+ * Interface for PMU description modules
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of version 2 of the GNU General Public
|
|
+ * License as published by the Free Software Foundation.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ * General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
+ * 02111-1307 USA
|
|
+ */
|
|
+#ifndef __PERFMON_PMU_H__
|
|
+#define __PERFMON_PMU_H__ 1
|
|
+
|
|
+/*
|
|
+ * generic information about a PMC or PMD register
|
|
+ *
|
|
+ * Dependency bitmasks:
|
|
+ * They are used to allow lazy save/restore in the context switch
|
|
+ * code. To avoid picking up stale configuration from a previous
|
|
+ * thread. Usng the bitmask, the generic read/write routines can
|
|
+ * ensure that all registers needed to support the measurement are
|
|
+ * restored properly on context switch in.
|
|
+ */
|
|
+struct pfm_regmap_desc {
|
|
+ u16 type; /* role of the register */
|
|
+ u16 reserved1; /* for future use */
|
|
+ u32 reserved2; /* for future use */
|
|
+ u64 dfl_val; /* power-on default value (quiescent) */
|
|
+ u64 rsvd_msk; /* reserved bits: 1 means reserved */
|
|
+ u64 no_emul64_msk; /* bits to clear for PFM_REGFL_NO_EMUL64 */
|
|
+ unsigned long hw_addr; /* HW register address or index */
|
|
+ struct kobject kobj; /* for internal use only */
|
|
+ char *desc; /* HW register description string */
|
|
+ u64 dep_pmcs[PFM_PMC_BV];/* depending PMC registers */
|
|
+};
|
|
+#define to_reg(n) container_of(n, struct pfm_regmap_desc, kobj)
|
|
+
|
|
+/*
|
|
+ * pfm_reg_desc helper macros
|
|
+ */
|
|
+#define PMC_D(t, d, v, r, n, h) \
|
|
+ { .type = t, \
|
|
+ .desc = d, \
|
|
+ .dfl_val = v, \
|
|
+ .rsvd_msk = r, \
|
|
+ .no_emul64_msk = n, \
|
|
+ .hw_addr = h \
|
|
+ }
|
|
+
|
|
+#define PMD_D(t, d, h) \
|
|
+ { .type = t, \
|
|
+ .desc = d, \
|
|
+ .rsvd_msk = 0, \
|
|
+ .no_emul64_msk = 0, \
|
|
+ .hw_addr = h \
|
|
+ }
|
|
+
|
|
+#define PMD_DR(t, d, h, r) \
|
|
+ { .type = t, \
|
|
+ .desc = d, \
|
|
+ .rsvd_msk = r, \
|
|
+ .no_emul64_msk = 0, \
|
|
+ .hw_addr = h \
|
|
+ }
|
|
+
|
|
+#define PMX_NA \
|
|
+ { .type = PFM_REG_NA }
|
|
+
|
|
+#define PMD_DP(t, d, h, p) \
|
|
+ { .type = t, \
|
|
+ .desc = d, \
|
|
+ .rsvd_msk = 0, \
|
|
+ .no_emul64_msk = 0, \
|
|
+ .dep_pmcs[0] = p, \
|
|
+ .hw_addr = h \
|
|
+ }
|
|
+
|
|
+/*
|
|
+ * type of a PMU register (16-bit bitmask) for use with pfm_reg_desc.type
|
|
+ */
|
|
+#define PFM_REG_NA 0x00 /* not avail. (not impl.,no access) must be 0 */
|
|
+#define PFM_REG_I 0x01 /* PMC/PMD: implemented */
|
|
+#define PFM_REG_WC 0x02 /* PMC: has write_checker */
|
|
+#define PFM_REG_C64 0x04 /* PMD: 64-bit virtualization */
|
|
+#define PFM_REG_RO 0x08 /* PMD: read-only (writes ignored) */
|
|
+#define PFM_REG_V 0x10 /* PMD: virtual reg */
|
|
+#define PFM_REG_INTR 0x20 /* PMD: register can generate interrupt */
|
|
+#define PFM_REG_SYS 0x40 /* PMC/PMD: register is for system-wide only */
|
|
+#define PFM_REG_THR 0x80 /* PMC/PMD: register is for per-thread only */
|
|
+#define PFM_REG_NO64 0x100 /* PMC: supports PFM_REGFL_NO_EMUL64 */
|
|
+
|
|
+/*
|
|
+ * define some shortcuts for common types
|
|
+ */
|
|
+#define PFM_REG_W (PFM_REG_WC|PFM_REG_I)
|
|
+#define PFM_REG_W64 (PFM_REG_WC|PFM_REG_NO64|PFM_REG_I)
|
|
+#define PFM_REG_C (PFM_REG_C64|PFM_REG_INTR|PFM_REG_I)
|
|
+#define PFM_REG_I64 (PFM_REG_NO64|PFM_REG_I)
|
|
+#define PFM_REG_IRO (PFM_REG_I|PFM_REG_RO)
|
|
+
|
|
+typedef int (*pfm_pmc_check_t)(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set,
|
|
+ struct pfarg_pmc *req);
|
|
+
|
|
+typedef int (*pfm_pmd_check_t)(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set,
|
|
+ struct pfarg_pmd *req);
|
|
+
|
|
+
|
|
+typedef u64 (*pfm_sread_t)(struct pfm_context *ctx, unsigned int cnum);
|
|
+typedef void (*pfm_swrite_t)(struct pfm_context *ctx, unsigned int cnum, u64 val);
|
|
+
|
|
+/*
|
|
+ * structure used by pmu description modules
|
|
+ *
|
|
+ * probe_pmu() routine return value:
|
|
+ * - 1 means recognized PMU
|
|
+ * - 0 means not recognized PMU
|
|
+ */
|
|
+struct pfm_pmu_config {
|
|
+ char *pmu_name; /* PMU family name */
|
|
+ char *version; /* config module version */
|
|
+
|
|
+ int counter_width; /* width of hardware counter */
|
|
+
|
|
+ struct pfm_regmap_desc *pmc_desc; /* PMC register descriptions */
|
|
+ struct pfm_regmap_desc *pmd_desc; /* PMD register descriptions */
|
|
+
|
|
+ pfm_pmc_check_t pmc_write_check;/* write checker (optional) */
|
|
+ pfm_pmd_check_t pmd_write_check;/* write checker (optional) */
|
|
+ pfm_pmd_check_t pmd_read_check; /* read checker (optional) */
|
|
+
|
|
+ pfm_sread_t pmd_sread; /* virtual pmd read */
|
|
+ pfm_swrite_t pmd_swrite; /* virtual pmd write */
|
|
+
|
|
+ int (*probe_pmu)(void);/* probe PMU routine */
|
|
+
|
|
+ u16 num_pmc_entries;/* #entries in pmc_desc */
|
|
+ u16 num_pmd_entries;/* #entries in pmd_desc */
|
|
+
|
|
+ void *pmu_info; /* model-specific infos */
|
|
+ u32 flags; /* set of flags */
|
|
+
|
|
+ struct module *owner; /* pointer to module struct */
|
|
+
|
|
+ /*
|
|
+ * fields computed internally, do not set in module
|
|
+ */
|
|
+ struct pfm_regdesc regs_all; /* regs available to all */
|
|
+ struct pfm_regdesc regs_thr; /* regs avail per-thread */
|
|
+ struct pfm_regdesc regs_sys; /* regs avail system-wide */
|
|
+
|
|
+ u64 ovfl_mask; /* overflow mask */
|
|
+};
|
|
+
|
|
+static inline void *pfm_pmu_info(void)
|
|
+{
|
|
+ return pfm_pmu_conf->pmu_info;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * pfm_pmu_config flags
|
|
+ */
|
|
+#define PFM_PMUFL_IS_BUILTIN 0x1 /* pmu config is compiled in */
|
|
+
|
|
+/*
|
|
+ * we need to know whether the PMU description is builtin or compiled
|
|
+ * as a module
|
|
+ */
|
|
+#ifdef MODULE
|
|
+#define PFM_PMU_BUILTIN_FLAG 0 /* not built as a module */
|
|
+#else
|
|
+#define PFM_PMU_BUILTIN_FLAG PFM_PMUFL_IS_BUILTIN /* built as a module */
|
|
+#endif
|
|
+
|
|
+int pfm_pmu_register(struct pfm_pmu_config *cfg);
|
|
+void pfm_pmu_unregister(struct pfm_pmu_config *cfg);
|
|
+
|
|
+int pfm_sysfs_remove_pmu(struct pfm_pmu_config *pmu);
|
|
+int pfm_sysfs_add_pmu(struct pfm_pmu_config *pmu);
|
|
+
|
|
+#endif /* __PERFMON_PMU_H__ */
|
|
diff --git a/include/linux/sched.h b/include/linux/sched.h
|
|
index 3d9120c..8fb3b55 100644
|
|
--- a/include/linux/sched.h
|
|
+++ b/include/linux/sched.h
|
|
@@ -96,6 +96,7 @@ struct exec_domain;
|
|
struct futex_pi_state;
|
|
struct robust_list_head;
|
|
struct bio;
|
|
+struct pfm_context;
|
|
|
|
/*
|
|
* List of flags we want to share for kernel threads,
|
|
@@ -1301,6 +1302,9 @@ struct task_struct {
|
|
int latency_record_count;
|
|
struct latency_record latency_record[LT_SAVECOUNT];
|
|
#endif
|
|
+#ifdef CONFIG_PERFMON
|
|
+ struct pfm_context *pfm_context;
|
|
+#endif
|
|
};
|
|
|
|
/*
|
|
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
|
|
index d6ff145..e308523 100644
|
|
--- a/include/linux/syscalls.h
|
|
+++ b/include/linux/syscalls.h
|
|
@@ -29,6 +29,13 @@ struct msqid_ds;
|
|
struct new_utsname;
|
|
struct nfsctl_arg;
|
|
struct __old_kernel_stat;
|
|
+struct pfarg_ctx;
|
|
+struct pfarg_pmc;
|
|
+struct pfarg_pmd;
|
|
+struct pfarg_start;
|
|
+struct pfarg_load;
|
|
+struct pfarg_setinfo;
|
|
+struct pfarg_setdesc;
|
|
struct pollfd;
|
|
struct rlimit;
|
|
struct rusage;
|
|
@@ -625,4 +632,27 @@ asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len);
|
|
|
|
int kernel_execve(const char *filename, char *const argv[], char *const envp[]);
|
|
|
|
+asmlinkage long sys_pfm_create_context(struct pfarg_ctx __user *ureq,
|
|
+ void __user *uarg, size_t smpl_size);
|
|
+asmlinkage long sys_pfm_write_pmcs(int fd, struct pfarg_pmc __user *ureq,
|
|
+ int count);
|
|
+asmlinkage long sys_pfm_write_pmds(int fd, struct pfarg_pmd __user *ureq,
|
|
+ int count);
|
|
+asmlinkage long sys_pfm_read_pmds(int fd, struct pfarg_pmd __user *ureq,
|
|
+ int count);
|
|
+asmlinkage long sys_pfm_restart(int fd);
|
|
+asmlinkage long sys_pfm_stop(int fd);
|
|
+asmlinkage long sys_pfm_start(int fd, struct pfarg_start __user *ureq);
|
|
+asmlinkage long sys_pfm_load_context(int fd, struct pfarg_load __user *ureq);
|
|
+asmlinkage long sys_pfm_unload_context(int fd);
|
|
+asmlinkage long sys_pfm_delete_evtsets(int fd,
|
|
+ struct pfarg_setinfo __user *ureq,
|
|
+ int count);
|
|
+asmlinkage long sys_pfm_create_evtsets(int fd,
|
|
+ struct pfarg_setdesc __user *ureq,
|
|
+ int count);
|
|
+asmlinkage long sys_pfm_getinfo_evtsets(int fd,
|
|
+ struct pfarg_setinfo __user *ureq,
|
|
+ int count);
|
|
+
|
|
#endif
|
|
diff --git a/kernel/sched.c b/kernel/sched.c
|
|
index ad1962d..1bc8fcf 100644
|
|
--- a/kernel/sched.c
|
|
+++ b/kernel/sched.c
|
|
@@ -71,6 +71,7 @@
|
|
#include <linux/debugfs.h>
|
|
#include <linux/ctype.h>
|
|
#include <linux/ftrace.h>
|
|
+#include <linux/perfmon_kern.h>
|
|
|
|
#include <asm/tlb.h>
|
|
#include <asm/irq_regs.h>
|
|
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
|
|
index 08d6e1b..61f4155 100644
|
|
--- a/kernel/sys_ni.c
|
|
+++ b/kernel/sys_ni.c
|
|
@@ -126,6 +126,19 @@ cond_syscall(sys_vm86);
|
|
cond_syscall(compat_sys_ipc);
|
|
cond_syscall(compat_sys_sysctl);
|
|
|
|
+cond_syscall(sys_pfm_create_context);
|
|
+cond_syscall(sys_pfm_write_pmcs);
|
|
+cond_syscall(sys_pfm_write_pmds);
|
|
+cond_syscall(sys_pfm_read_pmds);
|
|
+cond_syscall(sys_pfm_restart);
|
|
+cond_syscall(sys_pfm_start);
|
|
+cond_syscall(sys_pfm_stop);
|
|
+cond_syscall(sys_pfm_load_context);
|
|
+cond_syscall(sys_pfm_unload_context);
|
|
+cond_syscall(sys_pfm_create_evtsets);
|
|
+cond_syscall(sys_pfm_delete_evtsets);
|
|
+cond_syscall(sys_pfm_getinfo_evtsets);
|
|
+
|
|
/* arch-specific weak syscall entries */
|
|
cond_syscall(sys_pciconfig_read);
|
|
cond_syscall(sys_pciconfig_write);
|
|
diff --git a/perfmon/Makefile b/perfmon/Makefile
|
|
new file mode 100644
|
|
index 0000000..32ff037
|
|
--- /dev/null
|
|
+++ b/perfmon/Makefile
|
|
@@ -0,0 +1,12 @@
|
|
+#
|
|
+# Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P.
|
|
+# Contributed by Stephane Eranian <eranian@hpl.hp.com>
|
|
+#
|
|
+obj-y = perfmon_init.o perfmon_rw.o perfmon_res.o \
|
|
+ perfmon_pmu.o perfmon_sysfs.o perfmon_syscalls.o \
|
|
+ perfmon_file.o perfmon_ctxsw.o perfmon_intr.o \
|
|
+ perfmon_dfl_smpl.o perfmon_sets.o perfmon_hotplug.o \
|
|
+ perfmon_msg.o perfmon_smpl.o perfmon_attach.o \
|
|
+ perfmon_activate.o perfmon_ctx.o perfmon_fmt.o
|
|
+
|
|
+obj-$(CONFIG_PERFMON_DEBUG_FS) += perfmon_debugfs.o
|
|
diff --git a/perfmon/perfmon_activate.c b/perfmon/perfmon_activate.c
|
|
new file mode 100644
|
|
index 0000000..d9f501d
|
|
--- /dev/null
|
|
+++ b/perfmon/perfmon_activate.c
|
|
@@ -0,0 +1,265 @@
|
|
+/*
|
|
+ * perfmon_activate.c: perfmon2 start/stop functions
|
|
+ *
|
|
+ * This file implements the perfmon2 interface which
|
|
+ * provides access to the hardware performance counters
|
|
+ * of the host processor.
|
|
+ *
|
|
+ *
|
|
+ * The initial version of perfmon.c was written by
|
|
+ * Ganesh Venkitachalam, IBM Corp.
|
|
+ *
|
|
+ * Then it was modified for perfmon-1.x by Stephane Eranian and
|
|
+ * David Mosberger, Hewlett Packard Co.
|
|
+ *
|
|
+ * Version Perfmon-2.x is a complete rewrite of perfmon-1.x
|
|
+ * by Stephane Eranian, Hewlett Packard Co.
|
|
+ *
|
|
+ * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P.
|
|
+ * Contributed by Stephane Eranian <eranian@hpl.hp.com>
|
|
+ * David Mosberger-Tang <davidm@hpl.hp.com>
|
|
+ *
|
|
+ * More information about perfmon available at:
|
|
+ * http://perfmon2.sf.net
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of version 2 of the GNU General Public
|
|
+ * License as published by the Free Software Foundation.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ * General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
+ * 02111-1307 USA
|
|
+ */
|
|
+#include <linux/kernel.h>
|
|
+#include <linux/perfmon_kern.h>
|
|
+#include "perfmon_priv.h"
|
|
+
|
|
+/**
|
|
+ * __pfm_start - activate monitoring
|
|
+ * @ctx: context to operate on
|
|
+ * @start: pfarg_start as passed by user
|
|
+ *
|
|
+ * When operating in per-thread mode and not self-monitoring, the monitored
|
|
+ * thread must be stopped. Activation will be effective next time the thread
|
|
+ * is context switched in.
|
|
+ *
|
|
+ * The pfarg_start argument is optional and may be used to designate
|
|
+ * the initial event set to activate. When not provided, the last active
|
|
+ * set is used. For the first activation, set0 is used when start is NULL.
|
|
+ *
|
|
+ * On some architectures, e.g., IA-64, it may be possible to start monitoring
|
|
+ * without calling this function under certain conditions (per-thread and self
|
|
+ * monitoring). In this case, either set0 or the last active set is used.
|
|
+ *
|
|
+ * the context is locked and interrupts are disabled.
|
|
+ */
|
|
+int __pfm_start(struct pfm_context *ctx, struct pfarg_start *start)
|
|
+{
|
|
+ struct task_struct *task, *owner_task;
|
|
+ struct pfm_event_set *new_set, *old_set;
|
|
+ int is_self;
|
|
+
|
|
+ task = ctx->task;
|
|
+
|
|
+ /*
|
|
+ * UNLOADED: error
|
|
+ * LOADED : normal start, nop if started unless set is different
|
|
+ * MASKED : nop or change set when unmasking
|
|
+ * ZOMBIE : cannot happen
|
|
+ */
|
|
+ if (ctx->state == PFM_CTX_UNLOADED)
|
|
+ return -EINVAL;
|
|
+
|
|
+ old_set = new_set = ctx->active_set;
|
|
+
|
|
+ /*
|
|
+ * always the case for system-wide
|
|
+ */
|
|
+ if (task == NULL)
|
|
+ task = current;
|
|
+
|
|
+ is_self = task == current;
|
|
+
|
|
+ /*
|
|
+ * argument is provided?
|
|
+ */
|
|
+ if (start) {
|
|
+ /*
|
|
+ * find the set to load first
|
|
+ */
|
|
+ new_set = pfm_find_set(ctx, start->start_set, 0);
|
|
+ if (new_set == NULL) {
|
|
+ PFM_DBG("event set%u does not exist",
|
|
+ start->start_set);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ PFM_DBG("cur_set=%u req_set=%u", old_set->id, new_set->id);
|
|
+
|
|
+ /*
|
|
+ * if we need to change the active set we need
|
|
+ * to check if we can access the PMU
|
|
+ */
|
|
+ if (new_set != old_set) {
|
|
+
|
|
+ owner_task = __get_cpu_var(pmu_owner);
|
|
+ /*
|
|
+ * system-wide: must run on the right CPU
|
|
+ * per-thread : must be the owner of the PMU context
|
|
+ *
|
|
+ * pfm_switch_sets() returns with monitoring stopped
|
|
+ */
|
|
+ if (is_self) {
|
|
+ pfm_switch_sets(ctx, new_set, PFM_PMD_RESET_LONG, 1);
|
|
+ } else {
|
|
+ /*
|
|
+ * In a UP kernel, the PMU may contain the state
|
|
+ * of the task we want to operate on, yet the task
|
|
+ * may be switched out (lazy save). We need to save
|
|
+ * current state (old_set), switch active_set and
|
|
+ * mark it for reload.
|
|
+ */
|
|
+ if (owner_task == task)
|
|
+ pfm_save_pmds(ctx, old_set);
|
|
+ ctx->active_set = new_set;
|
|
+ new_set->priv_flags |= PFM_SETFL_PRIV_MOD_BOTH;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * mark as started
|
|
+ * must be done before calling pfm_arch_start()
|
|
+ */
|
|
+ ctx->flags.started = 1;
|
|
+
|
|
+ pfm_arch_start(task, ctx);
|
|
+
|
|
+ /*
|
|
+ * we check whether we had a pending ovfl before restarting.
|
|
+ * If so we need to regenerate the interrupt to make sure we
|
|
+ * keep recorded samples. For non-self monitoring this check
|
|
+ * is done in the pfm_ctxswin_thread() routine.
|
|
+ *
|
|
+ * we check new_set/old_set because pfm_switch_sets() already
|
|
+ * takes care of replaying the pending interrupts
|
|
+ */
|
|
+ if (is_self && new_set != old_set && new_set->npend_ovfls) {
|
|
+ pfm_arch_resend_irq(ctx);
|
|
+ pfm_stats_inc(ovfl_intr_replay_count);
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * always start with full timeout
|
|
+ */
|
|
+ new_set->hrtimer_rem = new_set->hrtimer_exp;
|
|
+
|
|
+ /*
|
|
+ * activate timeout for system-wide, self-montoring
|
|
+ * Always start with full timeout
|
|
+ * Timeout is at least one tick away, so no risk of
|
|
+ * having hrtimer_start() trying to wakeup softirqd
|
|
+ * and thus causing troubles. This cannot happen anmyway
|
|
+ * because cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ
|
|
+ */
|
|
+ if (is_self && new_set->flags & PFM_SETFL_TIME_SWITCH) {
|
|
+ hrtimer_start(&__get_cpu_var(pfm_hrtimer),
|
|
+ new_set->hrtimer_rem,
|
|
+ HRTIMER_MODE_REL);
|
|
+
|
|
+ PFM_DBG("set%u started timeout=%lld",
|
|
+ new_set->id,
|
|
+ (unsigned long long)new_set->hrtimer_rem.tv64);
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * we restart total duration even if context was
|
|
+ * already started. In that case, counts are simply
|
|
+ * reset.
|
|
+ *
|
|
+ * For per-thread, if not self-monitoring, the statement
|
|
+ * below will have no effect because thread is stopped.
|
|
+ * The field is reset of ctxsw in.
|
|
+ */
|
|
+ new_set->duration_start = sched_clock();
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * __pfm_stop - stop monitoring
|
|
+ * @ctx: context to operate on
|
|
+ * @release_info: infos for caller (see below)
|
|
+ *
|
|
+ * When operating in per-thread* mode and when not self-monitoring,
|
|
+ * the monitored thread must be stopped.
|
|
+ *
|
|
+ * the context is locked and interrupts are disabled.
|
|
+ *
|
|
+ * release_info value upon return:
|
|
+ * - bit 0 : unused
|
|
+ * - bit 1 : when set, must cancel hrtimer
|
|
+ */
|
|
+int __pfm_stop(struct pfm_context *ctx, int *release_info)
|
|
+{
|
|
+ struct pfm_event_set *set;
|
|
+ struct task_struct *task;
|
|
+ u64 now;
|
|
+ int state;
|
|
+
|
|
+ *release_info = 0;
|
|
+
|
|
+ now = sched_clock();
|
|
+ state = ctx->state;
|
|
+ set = ctx->active_set;
|
|
+
|
|
+ /*
|
|
+ * context must be attached (zombie cannot happen)
|
|
+ */
|
|
+ if (state == PFM_CTX_UNLOADED)
|
|
+ return -EINVAL;
|
|
+
|
|
+ task = ctx->task;
|
|
+
|
|
+ PFM_DBG("ctx_task=[%d] ctx_state=%d is_system=%d",
|
|
+ task ? task->pid : -1,
|
|
+ state,
|
|
+ !task);
|
|
+
|
|
+ /*
|
|
+ * this happens for system-wide context
|
|
+ */
|
|
+ if (task == NULL)
|
|
+ task = current;
|
|
+
|
|
+ /*
|
|
+ * compute elapsed time
|
|
+ *
|
|
+ * unless masked, compute elapsed duration, stop timeout
|
|
+ */
|
|
+ if (task == current && state == PFM_CTX_LOADED) {
|
|
+ /*
|
|
+ * timeout cancel must be deferred until context is
|
|
+ * unlocked to avoid race with pfm_handle_switch_timeout()
|
|
+ */
|
|
+ if (set->flags & PFM_SETFL_TIME_SWITCH)
|
|
+ *release_info |= 0x2;
|
|
+
|
|
+ set->duration += now - set->duration_start;
|
|
+ }
|
|
+
|
|
+ pfm_arch_stop(task, ctx);
|
|
+
|
|
+ ctx->flags.started = 0;
|
|
+ /*
|
|
+ * starting now, in-flight PMU interrupt for this context
|
|
+ * are treated as spurious
|
|
+ */
|
|
+ return 0;
|
|
+}
|
|
diff --git a/perfmon/perfmon_attach.c b/perfmon/perfmon_attach.c
|
|
new file mode 100644
|
|
index 0000000..bbd1d1e
|
|
--- /dev/null
|
|
+++ b/perfmon/perfmon_attach.c
|
|
@@ -0,0 +1,474 @@
|
|
+/*
|
|
+ * perfmon_attach.c: perfmon2 load/unload functions
|
|
+ *
|
|
+ * This file implements the perfmon2 interface which
|
|
+ * provides access to the hardware performance counters
|
|
+ * of the host processor.
|
|
+ *
|
|
+ *
|
|
+ * The initial version of perfmon.c was written by
|
|
+ * Ganesh Venkitachalam, IBM Corp.
|
|
+ *
|
|
+ * Then it was modified for perfmon-1.x by Stephane Eranian and
|
|
+ * David Mosberger, Hewlett Packard Co.
|
|
+ *
|
|
+ * Version Perfmon-2.x is a complete rewrite of perfmon-1.x
|
|
+ * by Stephane Eranian, Hewlett Packard Co.
|
|
+ *
|
|
+ * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P.
|
|
+ * Contributed by Stephane Eranian <eranian@hpl.hp.com>
|
|
+ * David Mosberger-Tang <davidm@hpl.hp.com>
|
|
+ *
|
|
+ * More information about perfmon available at:
|
|
+ * http://perfmon2.sf.net
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of version 2 of the GNU General Public
|
|
+ * License as published by the Free Software Foundation.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ * General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
+ * 02111-1307 USA
|
|
+ */
|
|
+#include <linux/kernel.h>
|
|
+#include <linux/fs.h>
|
|
+#include <linux/perfmon_kern.h>
|
|
+#include "perfmon_priv.h"
|
|
+
|
|
+/**
|
|
+ * __pfm_load_context_sys - attach context to a CPU in system-wide mode
|
|
+ * @ctx: context to operate on
|
|
+ * @set_id: set to activate first
|
|
+ * @cpu: CPU to monitor
|
|
+ *
|
|
+ * The cpu specified in the pfarg_load.load_pid argument must be the current
|
|
+ * CPU.
|
|
+ *
|
|
+ * The function must be called with the context locked and interrupts disabled.
|
|
+ */
|
|
+static int pfm_load_ctx_sys(struct pfm_context *ctx, u16 set_id, u32 cpu)
|
|
+{
|
|
+ struct pfm_event_set *set;
|
|
+ int mycpu;
|
|
+ int ret;
|
|
+
|
|
+ mycpu = smp_processor_id();
|
|
+
|
|
+ /*
|
|
+ * system-wide: check we are running on the desired CPU
|
|
+ */
|
|
+ if (cpu != mycpu) {
|
|
+ PFM_DBG("wrong CPU: asking %u but on %u", cpu, mycpu);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * initialize sets
|
|
+ */
|
|
+ set = pfm_prepare_sets(ctx, set_id);
|
|
+ if (!set) {
|
|
+ PFM_DBG("event set%u does not exist", set_id);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ PFM_DBG("set=%u set_flags=0x%x", set->id, set->flags);
|
|
+
|
|
+ ctx->cpu = mycpu;
|
|
+ ctx->task = NULL;
|
|
+ ctx->active_set = set;
|
|
+
|
|
+ /*
|
|
+ * perform any architecture specific actions
|
|
+ */
|
|
+ ret = pfm_arch_load_context(ctx);
|
|
+ if (ret)
|
|
+ goto error_noload;
|
|
+
|
|
+ /*
|
|
+ * now reserve the session, before we can proceed with
|
|
+ * actually accessing the PMU hardware
|
|
+ */
|
|
+ ret = pfm_session_acquire(1, mycpu);
|
|
+ if (ret)
|
|
+ goto error;
|
|
+
|
|
+
|
|
+ /*
|
|
+ * caller must be on monitored CPU to access PMU, thus this is
|
|
+ * a form of self-monitoring
|
|
+ */
|
|
+ ctx->flags.is_self = 1;
|
|
+
|
|
+ set->runs++;
|
|
+
|
|
+ /*
|
|
+ * load PMD from set
|
|
+ * load PMC from set
|
|
+ */
|
|
+ pfm_arch_restore_pmds(ctx, set);
|
|
+ pfm_arch_restore_pmcs(ctx, set);
|
|
+
|
|
+ /*
|
|
+ * set new ownership
|
|
+ */
|
|
+ pfm_set_pmu_owner(NULL, ctx);
|
|
+
|
|
+ /*
|
|
+ * reset pending work
|
|
+ */
|
|
+ ctx->flags.work_type = PFM_WORK_NONE;
|
|
+ ctx->flags.reset_count = 0;
|
|
+
|
|
+ /*
|
|
+ * reset message queue
|
|
+ */
|
|
+ ctx->msgq_head = ctx->msgq_tail = 0;
|
|
+
|
|
+ ctx->state = PFM_CTX_LOADED;
|
|
+
|
|
+ return 0;
|
|
+error:
|
|
+ pfm_arch_unload_context(ctx);
|
|
+error_noload:
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * __pfm_load_context_thread - attach context to a thread
|
|
+ * @ctx: context to operate on
|
|
+ * @set_id: first set
|
|
+ * @task: threadf to attach to
|
|
+ *
|
|
+ * The function must be called with the context locked and interrupts disabled.
|
|
+ */
|
|
+static int pfm_load_ctx_thread(struct pfm_context *ctx, u16 set_id,
|
|
+ struct task_struct *task)
|
|
+{
|
|
+ struct pfm_event_set *set;
|
|
+ struct pfm_context *old;
|
|
+ int ret;
|
|
+
|
|
+ PFM_DBG("load_pid=%d set=%u", task->pid, set_id);
|
|
+ /*
|
|
+ * per-thread:
|
|
+ * - task to attach to is checked in sys_pfm_load_context() to avoid
|
|
+ * locking issues. if found, and not self, task refcount was
|
|
+ * incremented.
|
|
+ */
|
|
+ old = cmpxchg(&task->pfm_context, NULL, ctx);
|
|
+ if (old) {
|
|
+ PFM_DBG("load_pid=%d has a context "
|
|
+ "old=%p new=%p cur=%p",
|
|
+ task->pid,
|
|
+ old,
|
|
+ ctx,
|
|
+ task->pfm_context);
|
|
+ return -EEXIST;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * initialize sets
|
|
+ */
|
|
+ set = pfm_prepare_sets(ctx, set_id);
|
|
+ if (!set) {
|
|
+ PFM_DBG("event set%u does not exist", set_id);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+
|
|
+ ctx->task = task;
|
|
+ ctx->cpu = -1;
|
|
+ ctx->active_set = set;
|
|
+
|
|
+ /*
|
|
+ * perform any architecture specific actions
|
|
+ */
|
|
+ ret = pfm_arch_load_context(ctx);
|
|
+ if (ret)
|
|
+ goto error_noload;
|
|
+
|
|
+ /*
|
|
+ * now reserve the session, before we can proceed with
|
|
+ * actually accessing the PMU hardware
|
|
+ */
|
|
+ ret = pfm_session_acquire(0, -1);
|
|
+ if (ret)
|
|
+ goto error;
|
|
+
|
|
+
|
|
+ set->runs++;
|
|
+ if (ctx->task != current) {
|
|
+
|
|
+ ctx->flags.is_self = 0;
|
|
+
|
|
+ /* force a full reload */
|
|
+ ctx->last_act = PFM_INVALID_ACTIVATION;
|
|
+ ctx->last_cpu = -1;
|
|
+ set->priv_flags |= PFM_SETFL_PRIV_MOD_BOTH;
|
|
+
|
|
+ } else {
|
|
+ pfm_check_save_prev_ctx();
|
|
+
|
|
+ ctx->last_cpu = smp_processor_id();
|
|
+ __get_cpu_var(pmu_activation_number)++;
|
|
+ ctx->last_act = __get_cpu_var(pmu_activation_number);
|
|
+
|
|
+ ctx->flags.is_self = 1;
|
|
+
|
|
+ /*
|
|
+ * load PMD from set
|
|
+ * load PMC from set
|
|
+ */
|
|
+ pfm_arch_restore_pmds(ctx, set);
|
|
+ pfm_arch_restore_pmcs(ctx, set);
|
|
+
|
|
+ /*
|
|
+ * set new ownership
|
|
+ */
|
|
+ pfm_set_pmu_owner(ctx->task, ctx);
|
|
+ }
|
|
+ set_tsk_thread_flag(task, TIF_PERFMON_CTXSW);
|
|
+
|
|
+ /*
|
|
+ * reset pending work
|
|
+ */
|
|
+ ctx->flags.work_type = PFM_WORK_NONE;
|
|
+ ctx->flags.reset_count = 0;
|
|
+
|
|
+ /*
|
|
+ * reset message queue
|
|
+ */
|
|
+ ctx->msgq_head = ctx->msgq_tail = 0;
|
|
+
|
|
+ ctx->state = PFM_CTX_LOADED;
|
|
+
|
|
+ return 0;
|
|
+
|
|
+error:
|
|
+ pfm_arch_unload_context(ctx);
|
|
+ ctx->task = NULL;
|
|
+error_noload:
|
|
+ /*
|
|
+ * detach context
|
|
+ */
|
|
+ task->pfm_context = NULL;
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * __pfm_load_context - attach context to a CPU or thread
|
|
+ * @ctx: context to operate on
|
|
+ * @load: pfarg_load as passed by user
|
|
+ * @task: thread to attach to, NULL for system-wide
|
|
+ */
|
|
+int __pfm_load_context(struct pfm_context *ctx, struct pfarg_load *load,
|
|
+ struct task_struct *task)
|
|
+{
|
|
+ if (ctx->flags.system)
|
|
+ return pfm_load_ctx_sys(ctx, load->load_set, load->load_pid);
|
|
+ return pfm_load_ctx_thread(ctx, load->load_set, task);
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_update_ovfl_pmds - account for pending ovfls on PMDs
|
|
+ * @ctx: context to operate on
|
|
+ *
|
|
+ * This function is always called after pfm_stop has been issued
|
|
+ */
|
|
+static void pfm_update_ovfl_pmds(struct pfm_context *ctx)
|
|
+{
|
|
+ struct pfm_event_set *set;
|
|
+ u64 *cnt_pmds;
|
|
+ u64 ovfl_mask;
|
|
+ u16 num_ovfls, i, first;
|
|
+
|
|
+ ovfl_mask = pfm_pmu_conf->ovfl_mask;
|
|
+ first = ctx->regs.first_intr_pmd;
|
|
+ cnt_pmds = ctx->regs.cnt_pmds;
|
|
+
|
|
+ /*
|
|
+ * look for pending interrupts and adjust PMD values accordingly
|
|
+ */
|
|
+ list_for_each_entry(set, &ctx->set_list, list) {
|
|
+
|
|
+ if (!set->npend_ovfls)
|
|
+ continue;
|
|
+
|
|
+ num_ovfls = set->npend_ovfls;
|
|
+ PFM_DBG("set%u nintrs=%u", set->id, num_ovfls);
|
|
+
|
|
+ for (i = first; num_ovfls; i++) {
|
|
+ if (test_bit(i, cast_ulp(set->povfl_pmds))) {
|
|
+ /* only correct value for counters */
|
|
+ if (test_bit(i, cast_ulp(cnt_pmds)))
|
|
+ set->pmds[i].value += 1 + ovfl_mask;
|
|
+ num_ovfls--;
|
|
+ }
|
|
+ PFM_DBG("pmd%u set=%u val=0x%llx",
|
|
+ i,
|
|
+ set->id,
|
|
+ (unsigned long long)set->pmds[i].value);
|
|
+ }
|
|
+ /*
|
|
+ * we need to clear to prevent a pfm_getinfo_evtsets() from
|
|
+ * returning stale data even after the context is unloaded
|
|
+ */
|
|
+ set->npend_ovfls = 0;
|
|
+ bitmap_zero(cast_ulp(set->povfl_pmds), ctx->regs.max_intr_pmd);
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+/**
|
|
+ * __pfm_unload_context - detach context from CPU or thread
|
|
+ * @ctx: context to operate on
|
|
+ * @release_info: pointer to return info (see below)
|
|
+ *
|
|
+ * The function must be called with the context locked and interrupts disabled.
|
|
+ *
|
|
+ * release_info value upon return:
|
|
+ * - bit 0: when set, must free context
|
|
+ * - bit 1: when set, must cancel hrtimer
|
|
+ */
|
|
+int __pfm_unload_context(struct pfm_context *ctx, int *release_info)
|
|
+{
|
|
+ struct task_struct *task;
|
|
+ int ret;
|
|
+
|
|
+ PFM_DBG("ctx_state=%d task [%d]",
|
|
+ ctx->state,
|
|
+ ctx->task ? ctx->task->pid : -1);
|
|
+
|
|
+ *release_info = 0;
|
|
+
|
|
+ /*
|
|
+ * unload only when necessary
|
|
+ */
|
|
+ if (ctx->state == PFM_CTX_UNLOADED)
|
|
+ return 0;
|
|
+
|
|
+ task = ctx->task;
|
|
+
|
|
+ /*
|
|
+ * stop monitoring
|
|
+ */
|
|
+ ret = __pfm_stop(ctx, release_info);
|
|
+ if (ret)
|
|
+ return ret;
|
|
+
|
|
+ ctx->state = PFM_CTX_UNLOADED;
|
|
+ ctx->flags.can_restart = 0;
|
|
+
|
|
+ /*
|
|
+ * save active set
|
|
+ * UP:
|
|
+ * if not current task and due to lazy, state may
|
|
+ * still be live
|
|
+ * for system-wide, guaranteed to run on correct CPU
|
|
+ */
|
|
+ if (__get_cpu_var(pmu_ctx) == ctx) {
|
|
+ /*
|
|
+ * pending overflows have been saved by pfm_stop()
|
|
+ */
|
|
+ pfm_save_pmds(ctx, ctx->active_set);
|
|
+ pfm_set_pmu_owner(NULL, NULL);
|
|
+ PFM_DBG("released ownership");
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * account for pending overflows
|
|
+ */
|
|
+ pfm_update_ovfl_pmds(ctx);
|
|
+
|
|
+ /*
|
|
+ * arch-specific unload operations
|
|
+ */
|
|
+ pfm_arch_unload_context(ctx);
|
|
+
|
|
+ /*
|
|
+ * per-thread: disconnect from monitored task
|
|
+ */
|
|
+ if (task) {
|
|
+ task->pfm_context = NULL;
|
|
+ ctx->task = NULL;
|
|
+ clear_tsk_thread_flag(task, TIF_PERFMON_CTXSW);
|
|
+ clear_tsk_thread_flag(task, TIF_PERFMON_WORK);
|
|
+ pfm_arch_disarm_handle_work(task);
|
|
+ }
|
|
+ /*
|
|
+ * session can be freed, must have interrupts enabled
|
|
+ * thus we release in the caller. Bit 0 signals to the
|
|
+ * caller that the session can be released.
|
|
+ */
|
|
+ *release_info |= 0x1;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * __pfm_exit_thread - detach and free context on thread exit
|
|
+ */
|
|
+void __pfm_exit_thread(void)
|
|
+{
|
|
+ struct pfm_context *ctx;
|
|
+ unsigned long flags;
|
|
+ int free_ok = 0, release_info = 0;
|
|
+ int ret;
|
|
+
|
|
+ ctx = current->pfm_context;
|
|
+
|
|
+ BUG_ON(ctx->flags.system);
|
|
+
|
|
+ spin_lock_irqsave(&ctx->lock, flags);
|
|
+
|
|
+ PFM_DBG("state=%d is_self=%d", ctx->state, ctx->flags.is_self);
|
|
+
|
|
+ /*
|
|
+ * __pfm_unload_context() cannot fail
|
|
+ * in the context states we are interested in
|
|
+ */
|
|
+ switch (ctx->state) {
|
|
+ case PFM_CTX_LOADED:
|
|
+ case PFM_CTX_MASKED:
|
|
+ __pfm_unload_context(ctx, &release_info);
|
|
+ /*
|
|
+ * end notification only sent for non
|
|
+ * self-monitoring context
|
|
+ */
|
|
+ if (!ctx->flags.is_self)
|
|
+ pfm_end_notify(ctx);
|
|
+ break;
|
|
+ case PFM_CTX_ZOMBIE:
|
|
+ __pfm_unload_context(ctx, &release_info);
|
|
+ free_ok = 1;
|
|
+ break;
|
|
+ default:
|
|
+ BUG_ON(ctx->state != PFM_CTX_LOADED);
|
|
+ break;
|
|
+ }
|
|
+ spin_unlock_irqrestore(&ctx->lock, flags);
|
|
+
|
|
+ /*
|
|
+ * cancel timer now that context is unlocked
|
|
+ */
|
|
+ if (release_info & 0x2) {
|
|
+ ret = hrtimer_cancel(&__get_cpu_var(pfm_hrtimer));
|
|
+ PFM_DBG("timeout cancel=%d", ret);
|
|
+ }
|
|
+
|
|
+ if (release_info & 0x1)
|
|
+ pfm_session_release(0, 0);
|
|
+
|
|
+ /*
|
|
+ * All memory free operations (especially for vmalloc'ed memory)
|
|
+ * MUST be done with interrupts ENABLED.
|
|
+ */
|
|
+ if (free_ok)
|
|
+ pfm_free_context(ctx);
|
|
+}
|
|
diff --git a/perfmon/perfmon_ctx.c b/perfmon/perfmon_ctx.c
|
|
new file mode 100644
|
|
index 0000000..afe6078
|
|
--- /dev/null
|
|
+++ b/perfmon/perfmon_ctx.c
|
|
@@ -0,0 +1,314 @@
|
|
+/*
|
|
+ * perfmon_ctx.c: perfmon2 context functions
|
|
+ *
|
|
+ * This file implements the perfmon2 interface which
|
|
+ * provides access to the hardware performance counters
|
|
+ * of the host processor.
|
|
+ *
|
|
+ *
|
|
+ * The initial version of perfmon.c was written by
|
|
+ * Ganesh Venkitachalam, IBM Corp.
|
|
+ *
|
|
+ * Then it was modified for perfmon-1.x by Stephane Eranian and
|
|
+ * David Mosberger, Hewlett Packard Co.
|
|
+ *
|
|
+ * Version Perfmon-2.x is a complete rewrite of perfmon-1.x
|
|
+ * by Stephane Eranian, Hewlett Packard Co.
|
|
+ *
|
|
+ * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P.
|
|
+ * Contributed by Stephane Eranian <eranian@hpl.hp.com>
|
|
+ * David Mosberger-Tang <davidm@hpl.hp.com>
|
|
+ *
|
|
+ * More information about perfmon available at:
|
|
+ * http://perfmon2.sf.net
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of version 2 of the GNU General Public
|
|
+ * License as published by the Free Software Foundation.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ * General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
+ * 02111-1307 USA
|
|
+ */
|
|
+#include <linux/kernel.h>
|
|
+#include <linux/fs.h>
|
|
+#include <linux/perfmon_kern.h>
|
|
+#include "perfmon_priv.h"
|
|
+
|
|
+/*
|
|
+ * context memory pool pointer
|
|
+ */
|
|
+static struct kmem_cache *pfm_ctx_cachep;
|
|
+
|
|
+/**
|
|
+ * pfm_free_context - de-allocate context and associated resources
|
|
+ * @ctx: context to free
|
|
+ */
|
|
+void pfm_free_context(struct pfm_context *ctx)
|
|
+{
|
|
+ pfm_arch_context_free(ctx);
|
|
+
|
|
+ pfm_free_sets(ctx);
|
|
+
|
|
+ pfm_smpl_buf_free(ctx);
|
|
+
|
|
+ PFM_DBG("free ctx @0x%p", ctx);
|
|
+ kmem_cache_free(pfm_ctx_cachep, ctx);
|
|
+ /*
|
|
+ * decrease refcount on:
|
|
+ * - PMU description table
|
|
+ * - sampling format
|
|
+ */
|
|
+ pfm_pmu_conf_put();
|
|
+ pfm_pmu_release();
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_ctx_flags_sane - check if context flags passed by user are okay
|
|
+ * @ctx_flags: flags passed user on pfm_create_context
|
|
+ *
|
|
+ * return:
|
|
+ * 0 if successful
|
|
+ * <0 and error code otherwise
|
|
+ */
|
|
+static inline int pfm_ctx_flags_sane(u32 ctx_flags)
|
|
+{
|
|
+ if (ctx_flags & PFM_FL_SYSTEM_WIDE) {
|
|
+ if (ctx_flags & PFM_FL_NOTIFY_BLOCK) {
|
|
+ PFM_DBG("cannot use blocking mode in syswide mode");
|
|
+ return -EINVAL;
|
|
+ }
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_ctx_permissions - check authorization to create new context
|
|
+ * @ctx_flags: context flags passed by user
|
|
+ *
|
|
+ * check for permissions to create a context.
|
|
+ *
|
|
+ * A sysadmin may decide to restrict creation of per-thread
|
|
+ * and/or system-wide context to a group of users using the
|
|
+ * group id via /sys/kernel/perfmon/task_group and
|
|
+ * /sys/kernel/perfmon/sys_group.
|
|
+ *
|
|
+ * Once we identify a user level package which can be used
|
|
+ * to grant/revoke Linux capabilites at login via PAM, we will
|
|
+ * be able to use capabilities. We would also need to increase
|
|
+ * the size of cap_t to support more than 32 capabilities (it
|
|
+ * is currently defined as u32 and 32 capabilities are alrady
|
|
+ * defined).
|
|
+ */
|
|
+static inline int pfm_ctx_permissions(u32 ctx_flags)
|
|
+{
|
|
+ if ((ctx_flags & PFM_FL_SYSTEM_WIDE)
|
|
+ && pfm_controls.sys_group != PFM_GROUP_PERM_ANY
|
|
+ && !in_group_p(pfm_controls.sys_group)) {
|
|
+ PFM_DBG("user group not allowed to create a syswide ctx");
|
|
+ return -EPERM;
|
|
+ } else if (pfm_controls.task_group != PFM_GROUP_PERM_ANY
|
|
+ && !in_group_p(pfm_controls.task_group)) {
|
|
+ PFM_DBG("user group not allowed to create a task context");
|
|
+ return -EPERM;
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * __pfm_create_context - allocate and initialize a perfmon context
|
|
+ * @req : pfarg_ctx from user
|
|
+ * @fmt : pointer sampling format, NULL if not used
|
|
+ * @fmt_arg: pointer to argument to sampling format, NULL if not used
|
|
+ * @mode: PFM_NORMAL or PFM_COMPAT(IA-64 v2.0 compatibility)
|
|
+ * @ctx : address of new context upon succesful return, undefined otherwise
|
|
+ *
|
|
+ * function used to allocate a new context. A context is allocated along
|
|
+ * with the default event set. If a sampling format is used, the buffer
|
|
+ * may be allocated and initialized.
|
|
+ *
|
|
+ * The file descriptor identifying the context is allocated and returned
|
|
+ * to caller.
|
|
+ *
|
|
+ * This function operates with no locks and interrupts are enabled.
|
|
+ * return:
|
|
+ * >=0: the file descriptor to identify the context
|
|
+ * <0 : the error code
|
|
+ */
|
|
+int __pfm_create_context(struct pfarg_ctx *req,
|
|
+ struct pfm_smpl_fmt *fmt,
|
|
+ void *fmt_arg,
|
|
+ int mode,
|
|
+ struct pfm_context **new_ctx)
|
|
+{
|
|
+ struct pfm_context *ctx;
|
|
+ struct file *filp = NULL;
|
|
+ u32 ctx_flags;
|
|
+ int fd = 0, ret;
|
|
+
|
|
+ ctx_flags = req->ctx_flags;
|
|
+
|
|
+ /* Increase refcount on PMU description */
|
|
+ ret = pfm_pmu_conf_get(1);
|
|
+ if (ret < 0)
|
|
+ goto error_conf;
|
|
+
|
|
+ ret = pfm_ctx_flags_sane(ctx_flags);
|
|
+ if (ret < 0)
|
|
+ goto error_alloc;
|
|
+
|
|
+ ret = pfm_ctx_permissions(ctx_flags);
|
|
+ if (ret < 0)
|
|
+ goto error_alloc;
|
|
+
|
|
+ /*
|
|
+ * we can use GFP_KERNEL and potentially sleep because we do
|
|
+ * not hold any lock at this point.
|
|
+ */
|
|
+ might_sleep();
|
|
+ ret = -ENOMEM;
|
|
+ ctx = kmem_cache_zalloc(pfm_ctx_cachep, GFP_KERNEL);
|
|
+ if (!ctx)
|
|
+ goto error_alloc;
|
|
+
|
|
+ PFM_DBG("alloc ctx @0x%p", ctx);
|
|
+
|
|
+ INIT_LIST_HEAD(&ctx->set_list);
|
|
+ spin_lock_init(&ctx->lock);
|
|
+ init_completion(&ctx->restart_complete);
|
|
+ init_waitqueue_head(&ctx->msgq_wait);
|
|
+
|
|
+ /*
|
|
+ * context is unloaded
|
|
+ */
|
|
+ ctx->state = PFM_CTX_UNLOADED;
|
|
+
|
|
+ /*
|
|
+ * initialization of context's flags
|
|
+ * must be done before pfm_find_set()
|
|
+ */
|
|
+ ctx->flags.block = (ctx_flags & PFM_FL_NOTIFY_BLOCK) ? 1 : 0;
|
|
+ ctx->flags.system = (ctx_flags & PFM_FL_SYSTEM_WIDE) ? 1: 0;
|
|
+ ctx->flags.no_msg = (ctx_flags & PFM_FL_OVFL_NO_MSG) ? 1: 0;
|
|
+ ctx->flags.ia64_v20_compat = mode == PFM_COMPAT ? 1 : 0;
|
|
+
|
|
+ ret = pfm_pmu_acquire(ctx);
|
|
+ if (ret)
|
|
+ goto error_file;
|
|
+ /*
|
|
+ * check if PMU is usable
|
|
+ */
|
|
+ if (!(ctx->regs.num_pmcs && ctx->regs.num_pmcs)) {
|
|
+ PFM_DBG("no usable PMU registers");
|
|
+ ret = -EBUSY;
|
|
+ goto error_file;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * link to format, must be done first for correct
|
|
+ * error handling in pfm_context_free()
|
|
+ */
|
|
+ ctx->smpl_fmt = fmt;
|
|
+
|
|
+ ret = -ENFILE;
|
|
+ fd = pfm_alloc_fd(&filp);
|
|
+ if (fd < 0)
|
|
+ goto error_file;
|
|
+
|
|
+ /*
|
|
+ * initialize arch-specific section
|
|
+ * must be done before fmt_init()
|
|
+ */
|
|
+ ret = pfm_arch_context_create(ctx, ctx_flags);
|
|
+ if (ret)
|
|
+ goto error_set;
|
|
+
|
|
+ ret = -ENOMEM;
|
|
+
|
|
+ /*
|
|
+ * add initial set
|
|
+ */
|
|
+ if (pfm_create_initial_set(ctx))
|
|
+ goto error_set;
|
|
+
|
|
+ /*
|
|
+ * does the user want to sample?
|
|
+ * must be done after pfm_pmu_acquire() because
|
|
+ * needs ctx->regs
|
|
+ */
|
|
+ if (fmt) {
|
|
+ ret = pfm_setup_smpl_fmt(ctx, ctx_flags, fmt_arg, filp);
|
|
+ if (ret)
|
|
+ goto error_set;
|
|
+ }
|
|
+
|
|
+ filp->private_data = ctx;
|
|
+
|
|
+ ctx->last_act = PFM_INVALID_ACTIVATION;
|
|
+ ctx->last_cpu = -1;
|
|
+
|
|
+ /*
|
|
+ * initialize notification message queue
|
|
+ */
|
|
+ ctx->msgq_head = ctx->msgq_tail = 0;
|
|
+
|
|
+ PFM_DBG("flags=0x%x system=%d notify_block=%d no_msg=%d"
|
|
+ " use_fmt=%d ctx_fd=%d mode=%d",
|
|
+ ctx_flags,
|
|
+ ctx->flags.system,
|
|
+ ctx->flags.block,
|
|
+ ctx->flags.no_msg,
|
|
+ !!fmt,
|
|
+ fd, mode);
|
|
+
|
|
+ if (new_ctx)
|
|
+ *new_ctx = ctx;
|
|
+
|
|
+ /*
|
|
+ * we defer the fd_install until we are certain the call succeeded
|
|
+ * to ensure we do not have to undo its effect. Neither put_filp()
|
|
+ * nor put_unused_fd() undoes the effect of fd_install().
|
|
+ */
|
|
+ fd_install(fd, filp);
|
|
+
|
|
+ return fd;
|
|
+
|
|
+error_set:
|
|
+ put_filp(filp);
|
|
+ put_unused_fd(fd);
|
|
+error_file:
|
|
+ /*
|
|
+ * calls the right *_put() functions
|
|
+ * calls pfm_release_pmu()
|
|
+ */
|
|
+ pfm_free_context(ctx);
|
|
+ return ret;
|
|
+error_alloc:
|
|
+ pfm_pmu_conf_put();
|
|
+error_conf:
|
|
+ pfm_smpl_fmt_put(fmt);
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_init_ctx -- initialize context SLAB
|
|
+ *
|
|
+ * called from pfm_init
|
|
+ */
|
|
+int __init pfm_init_ctx(void)
|
|
+{
|
|
+ pfm_ctx_cachep = kmem_cache_create("pfm_context",
|
|
+ sizeof(struct pfm_context)+PFM_ARCH_CTX_SIZE,
|
|
+ SLAB_HWCACHE_ALIGN, 0, NULL);
|
|
+ if (!pfm_ctx_cachep) {
|
|
+ PFM_ERR("cannot initialize context slab");
|
|
+ return -ENOMEM;
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
diff --git a/perfmon/perfmon_ctxsw.c b/perfmon/perfmon_ctxsw.c
|
|
new file mode 100644
|
|
index 0000000..9a28d13
|
|
--- /dev/null
|
|
+++ b/perfmon/perfmon_ctxsw.c
|
|
@@ -0,0 +1,342 @@
|
|
+/*
|
|
+ * perfmon_cxtsw.c: perfmon2 context switch code
|
|
+ *
|
|
+ * This file implements the perfmon2 interface which
|
|
+ * provides access to the hardware performance counters
|
|
+ * of the host processor.
|
|
+ *
|
|
+ * The initial version of perfmon.c was written by
|
|
+ * Ganesh Venkitachalam, IBM Corp.
|
|
+ *
|
|
+ * Then it was modified for perfmon-1.x by Stephane Eranian and
|
|
+ * David Mosberger, Hewlett Packard Co.
|
|
+ *
|
|
+ * Version Perfmon-2.x is a complete rewrite of perfmon-1.x
|
|
+ * by Stephane Eranian, Hewlett Packard Co.
|
|
+ *
|
|
+ * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P.
|
|
+ * Contributed by Stephane Eranian <eranian@hpl.hp.com>
|
|
+ * David Mosberger-Tang <davidm@hpl.hp.com>
|
|
+ *
|
|
+ * More information about perfmon available at:
|
|
+ * http://perfmon2.sf.net
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of version 2 of the GNU General Public
|
|
+ * License as published by the Free Software Foundation.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ * General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
+ * 02111-1307 USA
|
|
+ */
|
|
+#include <linux/kernel.h>
|
|
+#include <linux/perfmon_kern.h>
|
|
+#include "perfmon_priv.h"
|
|
+
|
|
+void pfm_save_pmds(struct pfm_context *ctx, struct pfm_event_set *set)
|
|
+{
|
|
+ u64 val, ovfl_mask;
|
|
+ u64 *used_pmds, *cnt_pmds;
|
|
+ u16 i, num;
|
|
+
|
|
+ ovfl_mask = pfm_pmu_conf->ovfl_mask;
|
|
+ num = set->nused_pmds;
|
|
+ cnt_pmds = ctx->regs.cnt_pmds;
|
|
+ used_pmds = set->used_pmds;
|
|
+
|
|
+ /*
|
|
+ * save HW PMD, for counters, reconstruct 64-bit value
|
|
+ */
|
|
+ for (i = 0; num; i++) {
|
|
+ if (test_bit(i, cast_ulp(used_pmds))) {
|
|
+ val = pfm_read_pmd(ctx, i);
|
|
+ if (likely(test_bit(i, cast_ulp(cnt_pmds))))
|
|
+ val = (set->pmds[i].value & ~ovfl_mask) |
|
|
+ (val & ovfl_mask);
|
|
+ set->pmds[i].value = val;
|
|
+ num--;
|
|
+ }
|
|
+ }
|
|
+ pfm_arch_clear_pmd_ovfl_cond(ctx, set);
|
|
+}
|
|
+
|
|
+/*
|
|
+ * interrupts are disabled (no preemption)
|
|
+ */
|
|
+void __pfm_ctxswin_thread(struct task_struct *task,
|
|
+ struct pfm_context *ctx, u64 now)
|
|
+{
|
|
+ u64 cur_act;
|
|
+ struct pfm_event_set *set;
|
|
+ int reload_pmcs, reload_pmds;
|
|
+ int mycpu, is_active;
|
|
+
|
|
+ mycpu = smp_processor_id();
|
|
+
|
|
+ cur_act = __get_cpu_var(pmu_activation_number);
|
|
+ /*
|
|
+ * we need to lock context because it could be accessed
|
|
+ * from another CPU. Normally the schedule() functions
|
|
+ * has masked interrupts which should be enough to
|
|
+ * protect against PMU interrupts.
|
|
+ */
|
|
+ spin_lock(&ctx->lock);
|
|
+
|
|
+ is_active = pfm_arch_is_active(ctx);
|
|
+
|
|
+ set = ctx->active_set;
|
|
+
|
|
+ /*
|
|
+ * in case fo zombie, we do not complete ctswin of the
|
|
+ * PMU, and we force a call to pfm_handle_work() to finish
|
|
+ * cleanup, i.e., free context + smpl_buff. The reason for
|
|
+ * deferring to pfm_handle_work() is that it is not possible
|
|
+ * to vfree() with interrupts disabled.
|
|
+ */
|
|
+ if (unlikely(ctx->state == PFM_CTX_ZOMBIE)) {
|
|
+ pfm_post_work(task, ctx, PFM_WORK_ZOMBIE);
|
|
+ goto done;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * if we were the last user of the PMU on that CPU,
|
|
+ * then nothing to do except restore psr
|
|
+ */
|
|
+ if (ctx->last_cpu == mycpu && ctx->last_act == cur_act) {
|
|
+ /*
|
|
+ * check for forced reload conditions
|
|
+ */
|
|
+ reload_pmcs = set->priv_flags & PFM_SETFL_PRIV_MOD_PMCS;
|
|
+ reload_pmds = set->priv_flags & PFM_SETFL_PRIV_MOD_PMDS;
|
|
+ } else {
|
|
+#ifndef CONFIG_SMP
|
|
+ pfm_check_save_prev_ctx();
|
|
+#endif
|
|
+ reload_pmcs = 1;
|
|
+ reload_pmds = 1;
|
|
+ }
|
|
+ /* consumed */
|
|
+ set->priv_flags &= ~PFM_SETFL_PRIV_MOD_BOTH;
|
|
+
|
|
+ if (reload_pmds)
|
|
+ pfm_arch_restore_pmds(ctx, set);
|
|
+
|
|
+ /*
|
|
+ * need to check if had in-flight interrupt in
|
|
+ * pfm_ctxswout_thread(). If at least one bit set, then we must replay
|
|
+ * the interrupt to avoid losing some important performance data.
|
|
+ *
|
|
+ * npend_ovfls is cleared in interrupt handler
|
|
+ */
|
|
+ if (set->npend_ovfls) {
|
|
+ pfm_arch_resend_irq(ctx);
|
|
+ pfm_stats_inc(ovfl_intr_replay_count);
|
|
+ }
|
|
+
|
|
+ if (reload_pmcs)
|
|
+ pfm_arch_restore_pmcs(ctx, set);
|
|
+
|
|
+ /*
|
|
+ * record current activation for this context
|
|
+ */
|
|
+ __get_cpu_var(pmu_activation_number)++;
|
|
+ ctx->last_cpu = mycpu;
|
|
+ ctx->last_act = __get_cpu_var(pmu_activation_number);
|
|
+
|
|
+ /*
|
|
+ * establish new ownership.
|
|
+ */
|
|
+ pfm_set_pmu_owner(task, ctx);
|
|
+
|
|
+ pfm_arch_ctxswin_thread(task, ctx);
|
|
+ /*
|
|
+ * set->duration does not count when context in MASKED state.
|
|
+ * set->duration_start is reset in unmask_monitoring()
|
|
+ */
|
|
+ set->duration_start = now;
|
|
+
|
|
+ /*
|
|
+ * re-arm switch timeout, if necessary
|
|
+ * Timeout is active only if monitoring is active,
|
|
+ * i.e., LOADED + started
|
|
+ *
|
|
+ * We reload the remainder timeout or the full timeout.
|
|
+ * Remainder is recorded on context switch out or in
|
|
+ * pfm_load_context()
|
|
+ */
|
|
+ if (ctx->state == PFM_CTX_LOADED
|
|
+ && (set->flags & PFM_SETFL_TIME_SWITCH) && is_active) {
|
|
+ pfm_restart_timer(ctx, set);
|
|
+ /* careful here as pfm_restart_timer may switch sets */
|
|
+ }
|
|
+done:
|
|
+ spin_unlock(&ctx->lock);
|
|
+}
|
|
+
|
|
+/*
|
|
+ * interrupts are masked, runqueue lock is held.
|
|
+ *
|
|
+ * In UP. we simply stop monitoring and leave the state
|
|
+ * in place, i.e., lazy save
|
|
+ */
|
|
+void __pfm_ctxswout_thread(struct task_struct *task,
|
|
+ struct pfm_context *ctx, u64 now)
|
|
+{
|
|
+ struct pfm_event_set *set;
|
|
+ int need_save_pmds, is_active;
|
|
+
|
|
+ /*
|
|
+ * we need to lock context because it could be accessed
|
|
+ * from another CPU. Normally the schedule() functions
|
|
+ * has masked interrupts which should be enough to
|
|
+ * protect against PMU interrupts.
|
|
+ */
|
|
+
|
|
+ spin_lock(&ctx->lock);
|
|
+
|
|
+ is_active = pfm_arch_is_active(ctx);
|
|
+ set = ctx->active_set;
|
|
+
|
|
+ /*
|
|
+ * stop monitoring and
|
|
+ * collect pending overflow information
|
|
+ * needed on ctxswin. We cannot afford to lose
|
|
+ * a PMU interrupt.
|
|
+ */
|
|
+ need_save_pmds = pfm_arch_ctxswout_thread(task, ctx);
|
|
+
|
|
+ if (ctx->state == PFM_CTX_LOADED) {
|
|
+ /*
|
|
+ * accumulate only when set is actively monitoring,
|
|
+ */
|
|
+ set->duration += now - set->duration_start;
|
|
+
|
|
+ /*
|
|
+ * record remaining timeout
|
|
+ * reload in pfm_ctxsw_in()
|
|
+ */
|
|
+ if (is_active && (set->flags & PFM_SETFL_TIME_SWITCH)) {
|
|
+ struct hrtimer *h = NULL;
|
|
+ h = &__get_cpu_var(pfm_hrtimer);
|
|
+ hrtimer_cancel(h);
|
|
+ set->hrtimer_rem = hrtimer_get_remaining(h);
|
|
+ PFM_DBG_ovfl("hrtimer=%lld",
|
|
+ (long long)set->hrtimer_rem.tv64);
|
|
+ }
|
|
+ }
|
|
+
|
|
+#ifdef CONFIG_SMP
|
|
+ /*
|
|
+ * in SMP, release ownership of this PMU.
|
|
+ * PMU interrupts are masked, so nothing
|
|
+ * can happen.
|
|
+ */
|
|
+ pfm_set_pmu_owner(NULL, NULL);
|
|
+
|
|
+ /*
|
|
+ * On some architectures, it is necessary to read the
|
|
+ * PMD registers to check for pending overflow in
|
|
+ * pfm_arch_ctxswout_thread(). In that case, saving of
|
|
+ * the PMDs may be done there and not here.
|
|
+ */
|
|
+ if (need_save_pmds)
|
|
+ pfm_save_pmds(ctx, set);
|
|
+#endif
|
|
+ spin_unlock(&ctx->lock);
|
|
+}
|
|
+
|
|
+/*
|
|
+ *
|
|
+ */
|
|
+static void __pfm_ctxswout_sys(struct task_struct *prev,
|
|
+ struct task_struct *next)
|
|
+{
|
|
+ struct pfm_context *ctx;
|
|
+
|
|
+ ctx = __get_cpu_var(pmu_ctx);
|
|
+ BUG_ON(!ctx);
|
|
+
|
|
+ /*
|
|
+ * propagate TIF_PERFMON_CTXSW to ensure that:
|
|
+ * - previous task has TIF_PERFMON_CTXSW cleared, in case it is
|
|
+ * scheduled onto another CPU where there is syswide monitoring
|
|
+ * - next task has TIF_PERFMON_CTXSW set to ensure it will come back
|
|
+ * here when context switched out
|
|
+ */
|
|
+ clear_tsk_thread_flag(prev, TIF_PERFMON_CTXSW);
|
|
+ set_tsk_thread_flag(next, TIF_PERFMON_CTXSW);
|
|
+
|
|
+ /*
|
|
+ * nothing to do until actually started
|
|
+ * XXX: assumes no mean to start from user level
|
|
+ */
|
|
+ if (!ctx->flags.started)
|
|
+ return;
|
|
+
|
|
+ pfm_arch_ctxswout_sys(prev, ctx);
|
|
+}
|
|
+
|
|
+/*
|
|
+ *
|
|
+ */
|
|
+static void __pfm_ctxswin_sys(struct task_struct *prev,
|
|
+ struct task_struct *next)
|
|
+{
|
|
+ struct pfm_context *ctx;
|
|
+
|
|
+ ctx = __get_cpu_var(pmu_ctx);
|
|
+ BUG_ON(!ctx);
|
|
+
|
|
+ /*
|
|
+ * nothing to do until actually started
|
|
+ * XXX: assumes no mean to start from user level
|
|
+ */
|
|
+ if (!ctx->flags.started)
|
|
+ return;
|
|
+
|
|
+ pfm_arch_ctxswin_sys(next, ctx);
|
|
+}
|
|
+
|
|
+void pfm_ctxsw_out(struct task_struct *prev,
|
|
+ struct task_struct *next)
|
|
+{
|
|
+ struct pfm_context *ctxp;
|
|
+ u64 now;
|
|
+
|
|
+ now = sched_clock();
|
|
+
|
|
+ ctxp = prev->pfm_context;
|
|
+
|
|
+ if (ctxp)
|
|
+ __pfm_ctxswout_thread(prev, ctxp, now);
|
|
+ else
|
|
+ __pfm_ctxswout_sys(prev, next);
|
|
+
|
|
+ pfm_stats_inc(ctxswout_count);
|
|
+ pfm_stats_add(ctxswout_ns, sched_clock() - now);
|
|
+}
|
|
+
|
|
+void pfm_ctxsw_in(struct task_struct *prev,
|
|
+ struct task_struct *next)
|
|
+{
|
|
+ struct pfm_context *ctxn;
|
|
+ u64 now;
|
|
+
|
|
+ now = sched_clock();
|
|
+
|
|
+ ctxn = next->pfm_context;
|
|
+
|
|
+ if (ctxn)
|
|
+ __pfm_ctxswin_thread(next, ctxn, now);
|
|
+ else
|
|
+ __pfm_ctxswin_sys(prev, next);
|
|
+
|
|
+ pfm_stats_inc(ctxswin_count);
|
|
+ pfm_stats_add(ctxswin_ns, sched_clock() - now);
|
|
+}
|
|
diff --git a/perfmon/perfmon_debugfs.c b/perfmon/perfmon_debugfs.c
|
|
new file mode 100644
|
|
index 0000000..e4d2fad
|
|
--- /dev/null
|
|
+++ b/perfmon/perfmon_debugfs.c
|
|
@@ -0,0 +1,168 @@
|
|
+/*
|
|
+ * perfmon_debugfs.c: perfmon2 statistics interface to debugfs
|
|
+ *
|
|
+ * This file implements the perfmon2 interface which
|
|
+ * provides access to the hardware performance counters
|
|
+ * of the host processor.
|
|
+ *
|
|
+ * The initial version of perfmon.c was written by
|
|
+ * Ganesh Venkitachalam, IBM Corp.
|
|
+ *
|
|
+ * Then it was modified for perfmon-1.x by Stephane Eranian and
|
|
+ * David Mosberger, Hewlett Packard Co.
|
|
+ *
|
|
+ * Version Perfmon-2.x is a complete rewrite of perfmon-1.x
|
|
+ * by Stephane Eranian, Hewlett Packard Co.
|
|
+ *
|
|
+ * Copyright (c) 2007 Hewlett-Packard Development Company, L.P.
|
|
+ * Contributed by Stephane Eranian <eranian@hpl.hp.com>
|
|
+ *
|
|
+ * More information about perfmon available at:
|
|
+ * http://perfmon2.sf.net
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of version 2 of the GNU General Public
|
|
+ * License as published by the Free Software Foundation.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ * General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
+ * 02111-1307 USA
|
|
+ */
|
|
+#include <linux/kernel.h>
|
|
+#include <linux/debugfs.h>
|
|
+#include <linux/perfmon_kern.h>
|
|
+
|
|
+/*
|
|
+ * to make the statistics visible to user space:
|
|
+ * $ mount -t debugfs none /mnt
|
|
+ * $ cd /mnt/perfmon
|
|
+ * then choose a CPU subdir
|
|
+ */
|
|
+DECLARE_PER_CPU(struct pfm_stats, pfm_stats);
|
|
+
|
|
+static struct dentry *pfm_debugfs_dir;
|
|
+
|
|
+void pfm_reset_stats(int cpu)
|
|
+{
|
|
+ struct pfm_stats *st;
|
|
+ unsigned long flags;
|
|
+
|
|
+ st = &per_cpu(pfm_stats, cpu);
|
|
+
|
|
+ local_irq_save(flags);
|
|
+ memset(st->v, 0, sizeof(st->v));
|
|
+ local_irq_restore(flags);
|
|
+}
|
|
+
|
|
+static const char *pfm_stats_strs[] = {
|
|
+ "ovfl_intr_all_count",
|
|
+ "ovfl_intr_ns",
|
|
+ "ovfl_intr_spurious_count",
|
|
+ "ovfl_intr_replay_count",
|
|
+ "ovfl_intr_regular_count",
|
|
+ "handle_work_count",
|
|
+ "ovfl_notify_count",
|
|
+ "reset_pmds_count",
|
|
+ "pfm_restart_count",
|
|
+ "fmt_handler_calls",
|
|
+ "fmt_handler_ns",
|
|
+ "set_switch_count",
|
|
+ "set_switch_ns",
|
|
+ "set_switch_exp",
|
|
+ "ctxswin_count",
|
|
+ "ctxswin_ns",
|
|
+ "handle_timeout_count",
|
|
+ "ovfl_intr_nmi_count",
|
|
+ "ctxswout_count",
|
|
+ "ctxswout_ns",
|
|
+};
|
|
+#define PFM_NUM_STRS ARRAY_SIZE(pfm_stats_strs)
|
|
+
|
|
+void pfm_debugfs_del_cpu(int cpu)
|
|
+{
|
|
+ struct pfm_stats *st;
|
|
+ int i;
|
|
+
|
|
+ st = &per_cpu(pfm_stats, cpu);
|
|
+
|
|
+ for (i = 0; i < PFM_NUM_STATS; i++) {
|
|
+ if (st->dirs[i])
|
|
+ debugfs_remove(st->dirs[i]);
|
|
+ st->dirs[i] = NULL;
|
|
+ }
|
|
+ if (st->cpu_dir)
|
|
+ debugfs_remove(st->cpu_dir);
|
|
+ st->cpu_dir = NULL;
|
|
+}
|
|
+
|
|
+int pfm_debugfs_add_cpu(int cpu)
|
|
+{
|
|
+ struct pfm_stats *st;
|
|
+ int i;
|
|
+
|
|
+ /*
|
|
+ * sanity check between stats names and the number
|
|
+ * of entries in the pfm_stats value array.
|
|
+ */
|
|
+ if (PFM_NUM_STRS != PFM_NUM_STATS) {
|
|
+ PFM_ERR("PFM_NUM_STRS != PFM_NUM_STATS error");
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ st = &per_cpu(pfm_stats, cpu);
|
|
+ sprintf(st->cpu_name, "cpu%d", cpu);
|
|
+
|
|
+ st->cpu_dir = debugfs_create_dir(st->cpu_name, pfm_debugfs_dir);
|
|
+ if (!st->cpu_dir)
|
|
+ return -1;
|
|
+
|
|
+ for (i = 0; i < PFM_NUM_STATS; i++) {
|
|
+ st->dirs[i] = debugfs_create_u64(pfm_stats_strs[i],
|
|
+ S_IRUGO,
|
|
+ st->cpu_dir,
|
|
+ &st->v[i]);
|
|
+ if (!st->dirs[i])
|
|
+ goto error;
|
|
+ }
|
|
+ pfm_reset_stats(cpu);
|
|
+ return 0;
|
|
+error:
|
|
+ while (i >= 0) {
|
|
+ debugfs_remove(st->dirs[i]);
|
|
+ i--;
|
|
+ }
|
|
+ debugfs_remove(st->cpu_dir);
|
|
+ return -1;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * called once from pfm_init()
|
|
+ */
|
|
+int __init pfm_init_debugfs(void)
|
|
+{
|
|
+ int cpu1, cpu2, ret;
|
|
+
|
|
+ pfm_debugfs_dir = debugfs_create_dir("perfmon", NULL);
|
|
+ if (!pfm_debugfs_dir)
|
|
+ return -1;
|
|
+
|
|
+ for_each_online_cpu(cpu1) {
|
|
+ ret = pfm_debugfs_add_cpu(cpu1);
|
|
+ if (ret)
|
|
+ goto error;
|
|
+ }
|
|
+ return 0;
|
|
+error:
|
|
+ for_each_online_cpu(cpu2) {
|
|
+ if (cpu2 == cpu1)
|
|
+ break;
|
|
+ pfm_debugfs_del_cpu(cpu2);
|
|
+ }
|
|
+ return -1;
|
|
+}
|
|
diff --git a/perfmon/perfmon_dfl_smpl.c b/perfmon/perfmon_dfl_smpl.c
|
|
new file mode 100644
|
|
index 0000000..8c83489
|
|
--- /dev/null
|
|
+++ b/perfmon/perfmon_dfl_smpl.c
|
|
@@ -0,0 +1,298 @@
|
|
+/*
|
|
+ * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P.
|
|
+ * Contributed by Stephane Eranian <eranian@hpl.hp.com>
|
|
+ *
|
|
+ * This file implements the new default sampling buffer format
|
|
+ * for the perfmon2 subsystem.
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of version 2 of the GNU General Public
|
|
+ * License as published by the Free Software Foundation.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ * General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
+ * 02111-1307 USA
|
|
+ */
|
|
+#include <linux/kernel.h>
|
|
+#include <linux/types.h>
|
|
+#include <linux/module.h>
|
|
+#include <linux/init.h>
|
|
+#include <linux/smp.h>
|
|
+
|
|
+#include <linux/perfmon_kern.h>
|
|
+#include <linux/perfmon_dfl_smpl.h>
|
|
+
|
|
+MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>");
|
|
+MODULE_DESCRIPTION("new perfmon default sampling format");
|
|
+MODULE_LICENSE("GPL");
|
|
+
|
|
+static int pfm_dfl_fmt_validate(u32 ctx_flags, u16 npmds, void *data)
|
|
+{
|
|
+ struct pfm_dfl_smpl_arg *arg = data;
|
|
+ u64 min_buf_size;
|
|
+
|
|
+ if (data == NULL) {
|
|
+ PFM_DBG("no argument passed");
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * sanity check in case size_t is smaller then u64
|
|
+ */
|
|
+#if BITS_PER_LONG == 4
|
|
+#define MAX_SIZE_T (1ULL<<(sizeof(size_t)<<3))
|
|
+ if (sizeof(size_t) < sizeof(arg->buf_size)) {
|
|
+ if (arg->buf_size >= MAX_SIZE_T)
|
|
+ return -ETOOBIG;
|
|
+ }
|
|
+#endif
|
|
+
|
|
+ /*
|
|
+ * compute min buf size. npmds is the maximum number
|
|
+ * of implemented PMD registers.
|
|
+ */
|
|
+ min_buf_size = sizeof(struct pfm_dfl_smpl_hdr)
|
|
+ + (sizeof(struct pfm_dfl_smpl_entry) + (npmds*sizeof(u64)));
|
|
+
|
|
+ PFM_DBG("validate ctx_flags=0x%x flags=0x%x npmds=%u "
|
|
+ "min_buf_size=%llu buf_size=%llu\n",
|
|
+ ctx_flags,
|
|
+ arg->buf_flags,
|
|
+ npmds,
|
|
+ (unsigned long long)min_buf_size,
|
|
+ (unsigned long long)arg->buf_size);
|
|
+
|
|
+ /*
|
|
+ * must hold at least the buffer header + one minimally sized entry
|
|
+ */
|
|
+ if (arg->buf_size < min_buf_size)
|
|
+ return -EINVAL;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int pfm_dfl_fmt_get_size(u32 flags, void *data, size_t *size)
|
|
+{
|
|
+ struct pfm_dfl_smpl_arg *arg = data;
|
|
+
|
|
+ /*
|
|
+ * size has been validated in default_validate
|
|
+ * we can never loose bits from buf_size.
|
|
+ */
|
|
+ *size = (size_t)arg->buf_size;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int pfm_dfl_fmt_init(struct pfm_context *ctx, void *buf, u32 ctx_flags,
|
|
+ u16 npmds, void *data)
|
|
+{
|
|
+ struct pfm_dfl_smpl_hdr *hdr;
|
|
+ struct pfm_dfl_smpl_arg *arg = data;
|
|
+
|
|
+ hdr = buf;
|
|
+
|
|
+ hdr->hdr_version = PFM_DFL_SMPL_VERSION;
|
|
+ hdr->hdr_buf_size = arg->buf_size;
|
|
+ hdr->hdr_buf_flags = arg->buf_flags;
|
|
+ hdr->hdr_cur_offs = sizeof(*hdr);
|
|
+ hdr->hdr_overflows = 0;
|
|
+ hdr->hdr_count = 0;
|
|
+ hdr->hdr_min_buf_space = sizeof(struct pfm_dfl_smpl_entry) + (npmds*sizeof(u64));
|
|
+ /*
|
|
+ * due to cache aliasing, it may be necessary to flush the cache
|
|
+ * on certain architectures (e.g., MIPS)
|
|
+ */
|
|
+ pfm_cacheflush(hdr, sizeof(*hdr));
|
|
+
|
|
+ PFM_DBG("buffer=%p buf_size=%llu hdr_size=%zu hdr_version=%u.%u "
|
|
+ "min_space=%llu npmds=%u",
|
|
+ buf,
|
|
+ (unsigned long long)hdr->hdr_buf_size,
|
|
+ sizeof(*hdr),
|
|
+ PFM_VERSION_MAJOR(hdr->hdr_version),
|
|
+ PFM_VERSION_MINOR(hdr->hdr_version),
|
|
+ (unsigned long long)hdr->hdr_min_buf_space,
|
|
+ npmds);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * called from pfm_overflow_handler() to record a new sample
|
|
+ *
|
|
+ * context is locked, interrupts are disabled (no preemption)
|
|
+ */
|
|
+static int pfm_dfl_fmt_handler(struct pfm_context *ctx,
|
|
+ unsigned long ip, u64 tstamp, void *data)
|
|
+{
|
|
+ struct pfm_dfl_smpl_hdr *hdr;
|
|
+ struct pfm_dfl_smpl_entry *ent;
|
|
+ struct pfm_ovfl_arg *arg;
|
|
+ void *cur, *last;
|
|
+ u64 *e;
|
|
+ size_t entry_size, min_size;
|
|
+ u16 npmds, i;
|
|
+ u16 ovfl_pmd;
|
|
+ void *buf;
|
|
+
|
|
+ hdr = ctx->smpl_addr;
|
|
+ arg = &ctx->ovfl_arg;
|
|
+
|
|
+ buf = hdr;
|
|
+ cur = buf+hdr->hdr_cur_offs;
|
|
+ last = buf+hdr->hdr_buf_size;
|
|
+ ovfl_pmd = arg->ovfl_pmd;
|
|
+ min_size = hdr->hdr_min_buf_space;
|
|
+
|
|
+ /*
|
|
+ * precheck for sanity
|
|
+ */
|
|
+ if ((last - cur) < min_size)
|
|
+ goto full;
|
|
+
|
|
+ npmds = arg->num_smpl_pmds;
|
|
+
|
|
+ ent = (struct pfm_dfl_smpl_entry *)cur;
|
|
+
|
|
+ entry_size = sizeof(*ent) + (npmds << 3);
|
|
+
|
|
+ /* position for first pmd */
|
|
+ e = (u64 *)(ent+1);
|
|
+
|
|
+ hdr->hdr_count++;
|
|
+
|
|
+ PFM_DBG_ovfl("count=%llu cur=%p last=%p free_bytes=%zu ovfl_pmd=%d "
|
|
+ "npmds=%u",
|
|
+ (unsigned long long)hdr->hdr_count,
|
|
+ cur, last,
|
|
+ (last-cur),
|
|
+ ovfl_pmd,
|
|
+ npmds);
|
|
+
|
|
+ /*
|
|
+ * current = task running at the time of the overflow.
|
|
+ *
|
|
+ * per-task mode:
|
|
+ * - this is usually the task being monitored.
|
|
+ * Under certain conditions, it might be a different task
|
|
+ *
|
|
+ * system-wide:
|
|
+ * - this is not necessarily the task controlling the session
|
|
+ */
|
|
+ ent->pid = current->pid;
|
|
+ ent->ovfl_pmd = ovfl_pmd;
|
|
+ ent->last_reset_val = arg->pmd_last_reset;
|
|
+
|
|
+ /*
|
|
+ * where did the fault happen (includes slot number)
|
|
+ */
|
|
+ ent->ip = ip;
|
|
+
|
|
+ ent->tstamp = tstamp;
|
|
+ ent->cpu = smp_processor_id();
|
|
+ ent->set = arg->active_set;
|
|
+ ent->tgid = current->tgid;
|
|
+
|
|
+ /*
|
|
+ * selectively store PMDs in increasing index number
|
|
+ */
|
|
+ if (npmds) {
|
|
+ u64 *val = arg->smpl_pmds_values;
|
|
+ for (i = 0; i < npmds; i++)
|
|
+ *e++ = *val++;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * update position for next entry
|
|
+ */
|
|
+ hdr->hdr_cur_offs += entry_size;
|
|
+ cur += entry_size;
|
|
+
|
|
+ pfm_cacheflush(hdr, sizeof(*hdr));
|
|
+ pfm_cacheflush(ent, entry_size);
|
|
+
|
|
+ /*
|
|
+ * post check to avoid losing the last sample
|
|
+ */
|
|
+ if ((last - cur) < min_size)
|
|
+ goto full;
|
|
+
|
|
+ /* reset before returning from interrupt handler */
|
|
+ arg->ovfl_ctrl = PFM_OVFL_CTRL_RESET;
|
|
+
|
|
+ return 0;
|
|
+full:
|
|
+ PFM_DBG_ovfl("sampling buffer full free=%zu, count=%llu",
|
|
+ last-cur,
|
|
+ (unsigned long long)hdr->hdr_count);
|
|
+
|
|
+ /*
|
|
+ * increment number of buffer overflows.
|
|
+ * important to detect duplicate set of samples.
|
|
+ */
|
|
+ hdr->hdr_overflows++;
|
|
+
|
|
+ /*
|
|
+ * request notification and masking of monitoring.
|
|
+ * Notification is still subject to the overflowed
|
|
+ * register having the FL_NOTIFY flag set.
|
|
+ */
|
|
+ arg->ovfl_ctrl = PFM_OVFL_CTRL_NOTIFY | PFM_OVFL_CTRL_MASK;
|
|
+
|
|
+ return -ENOBUFS; /* we are full, sorry */
|
|
+}
|
|
+
|
|
+static int pfm_dfl_fmt_restart(int is_active, u32 *ovfl_ctrl, void *buf)
|
|
+{
|
|
+ struct pfm_dfl_smpl_hdr *hdr;
|
|
+
|
|
+ hdr = buf;
|
|
+
|
|
+ hdr->hdr_count = 0;
|
|
+ hdr->hdr_cur_offs = sizeof(*hdr);
|
|
+
|
|
+ pfm_cacheflush(hdr, sizeof(*hdr));
|
|
+
|
|
+ *ovfl_ctrl = PFM_OVFL_CTRL_RESET;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int pfm_dfl_fmt_exit(void *buf)
|
|
+{
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static struct pfm_smpl_fmt dfl_fmt = {
|
|
+ .fmt_name = "default",
|
|
+ .fmt_version = 0x10000,
|
|
+ .fmt_arg_size = sizeof(struct pfm_dfl_smpl_arg),
|
|
+ .fmt_validate = pfm_dfl_fmt_validate,
|
|
+ .fmt_getsize = pfm_dfl_fmt_get_size,
|
|
+ .fmt_init = pfm_dfl_fmt_init,
|
|
+ .fmt_handler = pfm_dfl_fmt_handler,
|
|
+ .fmt_restart = pfm_dfl_fmt_restart,
|
|
+ .fmt_exit = pfm_dfl_fmt_exit,
|
|
+ .fmt_flags = PFM_FMT_BUILTIN_FLAG,
|
|
+ .owner = THIS_MODULE
|
|
+};
|
|
+
|
|
+static int pfm_dfl_fmt_init_module(void)
|
|
+{
|
|
+ return pfm_fmt_register(&dfl_fmt);
|
|
+}
|
|
+
|
|
+static void pfm_dfl_fmt_cleanup_module(void)
|
|
+{
|
|
+ pfm_fmt_unregister(&dfl_fmt);
|
|
+}
|
|
+
|
|
+module_init(pfm_dfl_fmt_init_module);
|
|
+module_exit(pfm_dfl_fmt_cleanup_module);
|
|
diff --git a/perfmon/perfmon_file.c b/perfmon/perfmon_file.c
|
|
new file mode 100644
|
|
index 0000000..1cde81b
|
|
--- /dev/null
|
|
+++ b/perfmon/perfmon_file.c
|
|
@@ -0,0 +1,751 @@
|
|
+/*
|
|
+ * perfmon_file.c: perfmon2 file input/output functions
|
|
+ *
|
|
+ * This file implements the perfmon2 interface which
|
|
+ * provides access to the hardware performance counters
|
|
+ * of the host processor.
|
|
+ *
|
|
+ * The initial version of perfmon.c was written by
|
|
+ * Ganesh Venkitachalam, IBM Corp.
|
|
+ *
|
|
+ * Then it was modified for perfmon-1.x by Stephane Eranian and
|
|
+ * David Mosberger, Hewlett Packard Co.
|
|
+ *
|
|
+ * Version Perfmon-2.x is a complete rewrite of perfmon-1.x
|
|
+ * by Stephane Eranian, Hewlett Packard Co.
|
|
+ *
|
|
+ * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P.
|
|
+ * Contributed by Stephane Eranian <eranian@hpl.hp.com>
|
|
+ * David Mosberger-Tang <davidm@hpl.hp.com>
|
|
+ *
|
|
+ * More information about perfmon available at:
|
|
+ * http://perfmon2.sf.net
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of version 2 of the GNU General Public
|
|
+ * License as published by the Free Software Foundation.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ * General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
+ * 02111-1307 USA
|
|
+ */
|
|
+#include <linux/kernel.h>
|
|
+#include <linux/module.h>
|
|
+#include <linux/file.h>
|
|
+#include <linux/poll.h>
|
|
+#include <linux/vfs.h>
|
|
+#include <linux/pagemap.h>
|
|
+#include <linux/mount.h>
|
|
+#include <linux/perfmon_kern.h>
|
|
+#include "perfmon_priv.h"
|
|
+
|
|
+#define PFMFS_MAGIC 0xa0b4d889 /* perfmon filesystem magic number */
|
|
+
|
|
+struct pfm_controls pfm_controls = {
|
|
+ .sys_group = PFM_GROUP_PERM_ANY,
|
|
+ .task_group = PFM_GROUP_PERM_ANY,
|
|
+ .arg_mem_max = PAGE_SIZE,
|
|
+ .smpl_buffer_mem_max = ~0,
|
|
+};
|
|
+EXPORT_SYMBOL(pfm_controls);
|
|
+
|
|
+static int __init enable_debug(char *str)
|
|
+{
|
|
+ pfm_controls.debug = 1;
|
|
+ PFM_INFO("debug output enabled\n");
|
|
+ return 1;
|
|
+}
|
|
+__setup("perfmon_debug", enable_debug);
|
|
+
|
|
+static int pfmfs_delete_dentry(struct dentry *dentry)
|
|
+{
|
|
+ return 1;
|
|
+}
|
|
+
|
|
+static struct dentry_operations pfmfs_dentry_operations = {
|
|
+ .d_delete = pfmfs_delete_dentry,
|
|
+};
|
|
+
|
|
+int pfm_buf_map_pagefault(struct vm_area_struct *vma, struct vm_fault *vmf)
|
|
+{
|
|
+ void *kaddr;
|
|
+ unsigned long address;
|
|
+ struct pfm_context *ctx;
|
|
+ size_t size;
|
|
+
|
|
+ address = (unsigned long)vmf->virtual_address;
|
|
+
|
|
+ ctx = vma->vm_private_data;
|
|
+ if (ctx == NULL) {
|
|
+ PFM_DBG("no ctx");
|
|
+ return VM_FAULT_SIGBUS;
|
|
+ }
|
|
+ /*
|
|
+ * size available to user (maybe different from real_smpl_size
|
|
+ */
|
|
+ size = ctx->smpl_size;
|
|
+
|
|
+ if ((address < vma->vm_start) ||
|
|
+ (address >= (vma->vm_start + size)))
|
|
+ return VM_FAULT_SIGBUS;
|
|
+
|
|
+ kaddr = ctx->smpl_addr + (address - vma->vm_start);
|
|
+
|
|
+ vmf->page = vmalloc_to_page(kaddr);
|
|
+ get_page(vmf->page);
|
|
+
|
|
+ PFM_DBG("[%d] start=%p ref_count=%d",
|
|
+ current->pid,
|
|
+ kaddr, page_count(vmf->page));
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * we need to determine whther or not we are closing the last reference
|
|
+ * to the file and thus are going to end up in pfm_close() which eventually
|
|
+ * calls pfm_release_buf_space(). In that function, we update the accouting
|
|
+ * for locked_vm given that we are actually freeing the sampling buffer. The
|
|
+ * issue is that there are multiple paths leading to pfm_release_buf_space(),
|
|
+ * from exit(), munmap(), close(). The path coming from munmap() is problematic
|
|
+ * becuse do_munmap() grabs mmap_sem in write-mode which is also what
|
|
+ * pfm_release_buf_space does. To avoid deadlock, we need to determine where
|
|
+ * we are calling from and skip the locking. The vm_ops->close() callback
|
|
+ * is invoked for each remove_vma() independently of the number of references
|
|
+ * left on the file descriptor, therefore simple reference counter does not
|
|
+ * work. We need to determine if this is the last call, and then set a flag
|
|
+ * to skip the locking.
|
|
+ */
|
|
+static void pfm_buf_map_close(struct vm_area_struct *vma)
|
|
+{
|
|
+ struct file *file;
|
|
+ struct pfm_context *ctx;
|
|
+
|
|
+ file = vma->vm_file;
|
|
+ ctx = vma->vm_private_data;
|
|
+
|
|
+ /*
|
|
+ * if file is going to close, then pfm_close() will
|
|
+ * be called, do not lock in pfm_release_buf
|
|
+ */
|
|
+ if (atomic_read(&file->f_count) == 1)
|
|
+ ctx->flags.mmap_nlock = 1;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * we do not have a close callback because, the locked
|
|
+ * memory accounting must be done when the actual buffer
|
|
+ * is freed. Munmap does not free the page backing the vma
|
|
+ * because they may still be in use by the PMU interrupt handler.
|
|
+ */
|
|
+struct vm_operations_struct pfm_buf_map_vm_ops = {
|
|
+ .fault = pfm_buf_map_pagefault,
|
|
+ .close = pfm_buf_map_close
|
|
+};
|
|
+
|
|
+static int pfm_mmap_buffer(struct pfm_context *ctx, struct vm_area_struct *vma,
|
|
+ size_t size)
|
|
+{
|
|
+ if (ctx->smpl_addr == NULL) {
|
|
+ PFM_DBG("no sampling buffer to map");
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ if (size > ctx->smpl_size) {
|
|
+ PFM_DBG("mmap size=%zu >= actual buf size=%zu",
|
|
+ size,
|
|
+ ctx->smpl_size);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ vma->vm_ops = &pfm_buf_map_vm_ops;
|
|
+ vma->vm_private_data = ctx;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int pfm_mmap(struct file *file, struct vm_area_struct *vma)
|
|
+{
|
|
+ size_t size;
|
|
+ struct pfm_context *ctx;
|
|
+ unsigned long flags;
|
|
+ int ret;
|
|
+
|
|
+ PFM_DBG("pfm_file_ops");
|
|
+
|
|
+ ctx = file->private_data;
|
|
+ size = (vma->vm_end - vma->vm_start);
|
|
+
|
|
+ if (ctx == NULL)
|
|
+ return -EINVAL;
|
|
+
|
|
+ ret = -EINVAL;
|
|
+
|
|
+ spin_lock_irqsave(&ctx->lock, flags);
|
|
+
|
|
+ if (vma->vm_flags & VM_WRITE) {
|
|
+ PFM_DBG("cannot map buffer for writing");
|
|
+ goto done;
|
|
+ }
|
|
+
|
|
+ PFM_DBG("vm_pgoff=%lu size=%zu vm_start=0x%lx",
|
|
+ vma->vm_pgoff,
|
|
+ size,
|
|
+ vma->vm_start);
|
|
+
|
|
+ ret = pfm_mmap_buffer(ctx, vma, size);
|
|
+ if (ret == 0)
|
|
+ vma->vm_flags |= VM_RESERVED;
|
|
+
|
|
+ PFM_DBG("ret=%d vma_flags=0x%lx vma_start=0x%lx vma_size=%lu",
|
|
+ ret,
|
|
+ vma->vm_flags,
|
|
+ vma->vm_start,
|
|
+ vma->vm_end-vma->vm_start);
|
|
+done:
|
|
+ spin_unlock_irqrestore(&ctx->lock, flags);
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Extract one message from queue.
|
|
+ *
|
|
+ * return:
|
|
+ * -EAGAIN: when non-blocking and nothing is* in the queue.
|
|
+ * -ERESTARTSYS: when blocking and signal is pending
|
|
+ * Otherwise returns size of message (sizeof(pfarg_msg))
|
|
+ */
|
|
+ssize_t __pfm_read(struct pfm_context *ctx, union pfarg_msg *msg_buf, int non_block)
|
|
+{
|
|
+ ssize_t ret = 0;
|
|
+ unsigned long flags;
|
|
+ DECLARE_WAITQUEUE(wait, current);
|
|
+
|
|
+ /*
|
|
+ * we must masks interrupts to avoid a race condition
|
|
+ * with the PMU interrupt handler.
|
|
+ */
|
|
+ spin_lock_irqsave(&ctx->lock, flags);
|
|
+
|
|
+ while (pfm_msgq_is_empty(ctx)) {
|
|
+
|
|
+ /*
|
|
+ * handle non-blocking reads
|
|
+ * return -EAGAIN
|
|
+ */
|
|
+ ret = -EAGAIN;
|
|
+ if (non_block)
|
|
+ break;
|
|
+
|
|
+ add_wait_queue(&ctx->msgq_wait, &wait);
|
|
+ set_current_state(TASK_INTERRUPTIBLE);
|
|
+
|
|
+ spin_unlock_irqrestore(&ctx->lock, flags);
|
|
+
|
|
+ schedule();
|
|
+
|
|
+ /*
|
|
+ * during this window, another thread may call
|
|
+ * pfm_read() and steal our message
|
|
+ */
|
|
+
|
|
+ spin_lock_irqsave(&ctx->lock, flags);
|
|
+
|
|
+ remove_wait_queue(&ctx->msgq_wait, &wait);
|
|
+ set_current_state(TASK_RUNNING);
|
|
+
|
|
+ /*
|
|
+ * check for pending signals
|
|
+ * return -ERESTARTSYS
|
|
+ */
|
|
+ ret = -ERESTARTSYS;
|
|
+ if (signal_pending(current))
|
|
+ break;
|
|
+
|
|
+ /*
|
|
+ * we may have a message
|
|
+ */
|
|
+ ret = 0;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * extract message
|
|
+ */
|
|
+ if (ret == 0) {
|
|
+ /*
|
|
+ * copy the oldest message into msg_buf.
|
|
+ * We cannot directly call copy_to_user()
|
|
+ * because interrupts masked. This is done
|
|
+ * in the caller
|
|
+ */
|
|
+ pfm_get_next_msg(ctx, msg_buf);
|
|
+
|
|
+ ret = sizeof(*msg_buf);
|
|
+
|
|
+ PFM_DBG("extracted type=%d", msg_buf->type);
|
|
+ }
|
|
+
|
|
+ spin_unlock_irqrestore(&ctx->lock, flags);
|
|
+
|
|
+ PFM_DBG("blocking=%d ret=%zd", non_block, ret);
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static ssize_t pfm_read(struct file *filp, char __user *buf, size_t size,
|
|
+ loff_t *ppos)
|
|
+{
|
|
+ struct pfm_context *ctx;
|
|
+ union pfarg_msg msg_buf;
|
|
+ int non_block, ret;
|
|
+
|
|
+ PFM_DBG_ovfl("buf=%p size=%zu", buf, size);
|
|
+
|
|
+ ctx = filp->private_data;
|
|
+ if (ctx == NULL) {
|
|
+ PFM_ERR("no ctx for pfm_read");
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ non_block = filp->f_flags & O_NONBLOCK;
|
|
+
|
|
+#ifdef CONFIG_IA64_PERFMON_COMPAT
|
|
+ /*
|
|
+ * detect IA-64 v2.0 context read (message size is different)
|
|
+ * nops on all other architectures
|
|
+ */
|
|
+ if (unlikely(ctx->flags.ia64_v20_compat))
|
|
+ return pfm_arch_compat_read(ctx, buf, non_block, size);
|
|
+#endif
|
|
+ /*
|
|
+ * cannot extract partial messages.
|
|
+ * check even when there is no message
|
|
+ *
|
|
+ * cannot extract more than one message per call. Bytes
|
|
+ * above sizeof(msg) are ignored.
|
|
+ */
|
|
+ if (size < sizeof(msg_buf)) {
|
|
+ PFM_DBG("message is too small size=%zu must be >=%zu)",
|
|
+ size,
|
|
+ sizeof(msg_buf));
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ ret = __pfm_read(ctx, &msg_buf, non_block);
|
|
+ if (ret > 0) {
|
|
+ if (copy_to_user(buf, &msg_buf, sizeof(msg_buf)))
|
|
+ ret = -EFAULT;
|
|
+ }
|
|
+ PFM_DBG_ovfl("ret=%d", ret);
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static ssize_t pfm_write(struct file *file, const char __user *ubuf,
|
|
+ size_t size, loff_t *ppos)
|
|
+{
|
|
+ PFM_DBG("pfm_write called");
|
|
+ return -EINVAL;
|
|
+}
|
|
+
|
|
+static unsigned int pfm_poll(struct file *filp, poll_table *wait)
|
|
+{
|
|
+ struct pfm_context *ctx;
|
|
+ unsigned long flags;
|
|
+ unsigned int mask = 0;
|
|
+
|
|
+ PFM_DBG("pfm_file_ops");
|
|
+
|
|
+ if (filp->f_op != &pfm_file_ops) {
|
|
+ PFM_ERR("pfm_poll bad magic");
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+ ctx = filp->private_data;
|
|
+ if (ctx == NULL) {
|
|
+ PFM_ERR("pfm_poll no ctx");
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+ PFM_DBG("before poll_wait");
|
|
+
|
|
+ poll_wait(filp, &ctx->msgq_wait, wait);
|
|
+
|
|
+ /*
|
|
+ * pfm_msgq_is_empty() is non-atomic
|
|
+ *
|
|
+ * filp is protected by fget() at upper level
|
|
+ * context cannot be closed by another thread.
|
|
+ *
|
|
+ * There may be a race with a PMU interrupt adding
|
|
+ * messages to the queue. But we are interested in
|
|
+ * queue not empty, so adding more messages should
|
|
+ * not really be a problem.
|
|
+ *
|
|
+ * There may be a race with another thread issuing
|
|
+ * a read() and stealing messages from the queue thus
|
|
+ * may return the wrong answer. This could potentially
|
|
+ * lead to a blocking read, because nothing is
|
|
+ * available in the queue
|
|
+ */
|
|
+ spin_lock_irqsave(&ctx->lock, flags);
|
|
+
|
|
+ if (!pfm_msgq_is_empty(ctx))
|
|
+ mask = POLLIN | POLLRDNORM;
|
|
+
|
|
+ spin_unlock_irqrestore(&ctx->lock, flags);
|
|
+
|
|
+ PFM_DBG("after poll_wait mask=0x%x", mask);
|
|
+
|
|
+ return mask;
|
|
+}
|
|
+
|
|
+static int pfm_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
|
|
+ unsigned long arg)
|
|
+{
|
|
+ PFM_DBG("pfm_ioctl called");
|
|
+ return -EINVAL;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * interrupt cannot be masked when entering this function
|
|
+ */
|
|
+static inline int __pfm_fasync(int fd, struct file *filp,
|
|
+ struct pfm_context *ctx, int on)
|
|
+{
|
|
+ int ret;
|
|
+
|
|
+ PFM_DBG("in fd=%d on=%d async_q=%p",
|
|
+ fd,
|
|
+ on,
|
|
+ ctx->async_queue);
|
|
+
|
|
+ ret = fasync_helper(fd, filp, on, &ctx->async_queue);
|
|
+
|
|
+ PFM_DBG("out fd=%d on=%d async_q=%p ret=%d",
|
|
+ fd,
|
|
+ on,
|
|
+ ctx->async_queue, ret);
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static int pfm_fasync(int fd, struct file *filp, int on)
|
|
+{
|
|
+ struct pfm_context *ctx;
|
|
+ int ret;
|
|
+
|
|
+ PFM_DBG("pfm_file_ops");
|
|
+
|
|
+ ctx = filp->private_data;
|
|
+ if (ctx == NULL) {
|
|
+ PFM_ERR("pfm_fasync no ctx");
|
|
+ return -EBADF;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * we cannot mask interrupts during this call because this may
|
|
+ * may go to sleep if memory is not readily avalaible.
|
|
+ *
|
|
+ * We are protected from the context disappearing by the
|
|
+ * get_fd()/put_fd() done in caller. Serialization of this function
|
|
+ * is ensured by caller.
|
|
+ */
|
|
+ ret = __pfm_fasync(fd, filp, ctx, on);
|
|
+
|
|
+ PFM_DBG("pfm_fasync called on fd=%d on=%d async_queue=%p ret=%d",
|
|
+ fd,
|
|
+ on,
|
|
+ ctx->async_queue, ret);
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+#ifdef CONFIG_SMP
|
|
+static void __pfm_close_remote_cpu(void *info)
|
|
+{
|
|
+ struct pfm_context *ctx = info;
|
|
+ int can_release;
|
|
+
|
|
+ BUG_ON(ctx != __get_cpu_var(pmu_ctx));
|
|
+
|
|
+ /*
|
|
+ * we are in IPI interrupt handler which has always higher
|
|
+ * priority than PMU interrupt, therefore we do not need to
|
|
+ * mask interrupts. context locking is not needed because we
|
|
+ * are in close(), no more user references.
|
|
+ *
|
|
+ * can_release is ignored, release done on calling CPU
|
|
+ */
|
|
+ __pfm_unload_context(ctx, &can_release);
|
|
+
|
|
+ /*
|
|
+ * we cannot free context here because we are in_interrupt().
|
|
+ * we free on the calling CPU
|
|
+ */
|
|
+}
|
|
+
|
|
+static int pfm_close_remote_cpu(u32 cpu, struct pfm_context *ctx)
|
|
+{
|
|
+ BUG_ON(irqs_disabled());
|
|
+ return smp_call_function_single(cpu, __pfm_close_remote_cpu, ctx, 1);
|
|
+}
|
|
+#endif /* CONFIG_SMP */
|
|
+
|
|
+/*
|
|
+ * called either on explicit close() or from exit_files().
|
|
+ * Only the LAST user of the file gets to this point, i.e., it is
|
|
+ * called only ONCE.
|
|
+ *
|
|
+ * IMPORTANT: we get called ONLY when the refcnt on the file gets to zero
|
|
+ * (fput()),i.e, last task to access the file. Nobody else can access the
|
|
+ * file at this point.
|
|
+ *
|
|
+ * When called from exit_files(), the VMA has been freed because exit_mm()
|
|
+ * is executed before exit_files().
|
|
+ *
|
|
+ * When called from exit_files(), the current task is not yet ZOMBIE but we
|
|
+ * flush the PMU state to the context.
|
|
+ */
|
|
+int __pfm_close(struct pfm_context *ctx, struct file *filp)
|
|
+{
|
|
+ unsigned long flags;
|
|
+ int state;
|
|
+ int can_free = 1, can_unload = 1;
|
|
+ int is_system, can_release = 0;
|
|
+ u32 cpu;
|
|
+
|
|
+ /*
|
|
+ * no risk of ctx of filp disappearing so we can operate outside
|
|
+ * of spin_lock(). fasync_helper() runs with interrupts masked,
|
|
+ * thus there is no risk with the PMU interrupt handler
|
|
+ *
|
|
+ * In case of zombie, we will not have the async struct anymore
|
|
+ * thus kill_fasync() will not do anything
|
|
+ *
|
|
+ * fd is not used when removing the entry so we pass -1
|
|
+ */
|
|
+ if (filp->f_flags & FASYNC)
|
|
+ __pfm_fasync (-1, filp, ctx, 0);
|
|
+
|
|
+ spin_lock_irqsave(&ctx->lock, flags);
|
|
+
|
|
+ state = ctx->state;
|
|
+ is_system = ctx->flags.system;
|
|
+ cpu = ctx->cpu;
|
|
+
|
|
+ PFM_DBG("state=%d", state);
|
|
+
|
|
+ /*
|
|
+ * check if unload is needed
|
|
+ */
|
|
+ if (state == PFM_CTX_UNLOADED)
|
|
+ goto doit;
|
|
+
|
|
+#ifdef CONFIG_SMP
|
|
+ /*
|
|
+ * we need to release the resource on the ORIGINAL cpu.
|
|
+ * we need to release the context lock to avoid deadlocks
|
|
+ * on the original CPU, especially in the context switch
|
|
+ * routines. It is safe to unlock because we are in close(),
|
|
+ * in other words, there is no more access from user level.
|
|
+ * we can also unmask interrupts on this CPU because the
|
|
+ * context is running on the original CPU. Context will be
|
|
+ * unloaded and the session will be released on the original
|
|
+ * CPU. Upon return, the caller is guaranteed that the context
|
|
+ * is gone from original CPU.
|
|
+ */
|
|
+ if (is_system && cpu != smp_processor_id()) {
|
|
+ spin_unlock_irqrestore(&ctx->lock, flags);
|
|
+ pfm_close_remote_cpu(cpu, ctx);
|
|
+ can_release = 1;
|
|
+ goto free_it;
|
|
+ }
|
|
+
|
|
+ if (!is_system && ctx->task != current) {
|
|
+ /*
|
|
+ * switch context to zombie state
|
|
+ */
|
|
+ ctx->state = PFM_CTX_ZOMBIE;
|
|
+
|
|
+ PFM_DBG("zombie ctx for [%d]", ctx->task->pid);
|
|
+ /*
|
|
+ * must check if other thread is using block overflow
|
|
+ * notification mode. If so make sure it will not block
|
|
+ * because there will not be any pfm_restart() issued.
|
|
+ * When the thread notices the ZOMBIE state, it will clean
|
|
+ * up what is left of the context
|
|
+ */
|
|
+ if (state == PFM_CTX_MASKED && ctx->flags.block) {
|
|
+ /*
|
|
+ * force task to wake up from MASKED state
|
|
+ */
|
|
+ PFM_DBG("waking up [%d]", ctx->task->pid);
|
|
+
|
|
+ complete(&ctx->restart_complete);
|
|
+ }
|
|
+ /*
|
|
+ * PMU session will be release by monitored task when it notices
|
|
+ * ZOMBIE state as part of pfm_unload_context()
|
|
+ */
|
|
+ can_unload = can_free = 0;
|
|
+ }
|
|
+#endif
|
|
+ if (can_unload)
|
|
+ __pfm_unload_context(ctx, &can_release);
|
|
+doit:
|
|
+ spin_unlock_irqrestore(&ctx->lock, flags);
|
|
+
|
|
+#ifdef CONFIG_SMP
|
|
+free_it:
|
|
+#endif
|
|
+ if (can_release)
|
|
+ pfm_session_release(is_system, cpu);
|
|
+
|
|
+ if (can_free)
|
|
+ pfm_free_context(ctx);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int pfm_close(struct inode *inode, struct file *filp)
|
|
+{
|
|
+ struct pfm_context *ctx;
|
|
+
|
|
+ PFM_DBG("called filp=%p", filp);
|
|
+
|
|
+ ctx = filp->private_data;
|
|
+ if (ctx == NULL) {
|
|
+ PFM_ERR("no ctx");
|
|
+ return -EBADF;
|
|
+ }
|
|
+ return __pfm_close(ctx, filp);
|
|
+}
|
|
+
|
|
+static int pfm_no_open(struct inode *irrelevant, struct file *dontcare)
|
|
+{
|
|
+ PFM_DBG("pfm_file_ops");
|
|
+
|
|
+ return -ENXIO;
|
|
+}
|
|
+
|
|
+
|
|
+const struct file_operations pfm_file_ops = {
|
|
+ .llseek = no_llseek,
|
|
+ .read = pfm_read,
|
|
+ .write = pfm_write,
|
|
+ .poll = pfm_poll,
|
|
+ .ioctl = pfm_ioctl,
|
|
+ .open = pfm_no_open, /* special open to disallow open via /proc */
|
|
+ .fasync = pfm_fasync,
|
|
+ .release = pfm_close,
|
|
+ .mmap = pfm_mmap
|
|
+};
|
|
+
|
|
+static int pfmfs_get_sb(struct file_system_type *fs_type,
|
|
+ int flags, const char *dev_name,
|
|
+ void *data, struct vfsmount *mnt)
|
|
+{
|
|
+ return get_sb_pseudo(fs_type, "pfm:", NULL, PFMFS_MAGIC, mnt);
|
|
+}
|
|
+
|
|
+static struct file_system_type pfm_fs_type = {
|
|
+ .name = "pfmfs",
|
|
+ .get_sb = pfmfs_get_sb,
|
|
+ .kill_sb = kill_anon_super,
|
|
+};
|
|
+
|
|
+/*
|
|
+ * pfmfs should _never_ be mounted by userland - too much of security hassle,
|
|
+ * no real gain from having the whole whorehouse mounted. So we don't need
|
|
+ * any operations on the root directory. However, we need a non-trivial
|
|
+ * d_name - pfm: will go nicely and kill the special-casing in procfs.
|
|
+ */
|
|
+static struct vfsmount *pfmfs_mnt;
|
|
+
|
|
+int __init pfm_init_fs(void)
|
|
+{
|
|
+ int err = register_filesystem(&pfm_fs_type);
|
|
+ if (!err) {
|
|
+ pfmfs_mnt = kern_mount(&pfm_fs_type);
|
|
+ err = PTR_ERR(pfmfs_mnt);
|
|
+ if (IS_ERR(pfmfs_mnt))
|
|
+ unregister_filesystem(&pfm_fs_type);
|
|
+ else
|
|
+ err = 0;
|
|
+ }
|
|
+ return err;
|
|
+}
|
|
+
|
|
+int pfm_alloc_fd(struct file **cfile)
|
|
+{
|
|
+ int fd, ret = 0;
|
|
+ struct file *file = NULL;
|
|
+ struct inode * inode;
|
|
+ char name[32];
|
|
+ struct qstr this;
|
|
+
|
|
+ fd = get_unused_fd();
|
|
+ if (fd < 0)
|
|
+ return -ENFILE;
|
|
+
|
|
+ ret = -ENFILE;
|
|
+
|
|
+ file = get_empty_filp();
|
|
+ if (!file)
|
|
+ goto out;
|
|
+
|
|
+ /*
|
|
+ * allocate a new inode
|
|
+ */
|
|
+ inode = new_inode(pfmfs_mnt->mnt_sb);
|
|
+ if (!inode)
|
|
+ goto out;
|
|
+
|
|
+ PFM_DBG("new inode ino=%ld @%p", inode->i_ino, inode);
|
|
+
|
|
+ inode->i_sb = pfmfs_mnt->mnt_sb;
|
|
+ inode->i_mode = S_IFCHR|S_IRUGO;
|
|
+ inode->i_uid = current->fsuid;
|
|
+ inode->i_gid = current->fsgid;
|
|
+
|
|
+ sprintf(name, "[%lu]", inode->i_ino);
|
|
+ this.name = name;
|
|
+ this.hash = inode->i_ino;
|
|
+ this.len = strlen(name);
|
|
+
|
|
+ ret = -ENOMEM;
|
|
+
|
|
+ /*
|
|
+ * allocate a new dcache entry
|
|
+ */
|
|
+ file->f_dentry = d_alloc(pfmfs_mnt->mnt_sb->s_root, &this);
|
|
+ if (!file->f_dentry)
|
|
+ goto out;
|
|
+
|
|
+ file->f_dentry->d_op = &pfmfs_dentry_operations;
|
|
+
|
|
+ d_add(file->f_dentry, inode);
|
|
+ file->f_vfsmnt = mntget(pfmfs_mnt);
|
|
+ file->f_mapping = inode->i_mapping;
|
|
+
|
|
+ file->f_op = &pfm_file_ops;
|
|
+ file->f_mode = FMODE_READ;
|
|
+ file->f_flags = O_RDONLY;
|
|
+ file->f_pos = 0;
|
|
+
|
|
+ *cfile = file;
|
|
+
|
|
+ return fd;
|
|
+out:
|
|
+ if (file)
|
|
+ put_filp(file);
|
|
+ put_unused_fd(fd);
|
|
+ return ret;
|
|
+}
|
|
diff --git a/perfmon/perfmon_fmt.c b/perfmon/perfmon_fmt.c
|
|
new file mode 100644
|
|
index 0000000..27c4340
|
|
--- /dev/null
|
|
+++ b/perfmon/perfmon_fmt.c
|
|
@@ -0,0 +1,219 @@
|
|
+/*
|
|
+ * perfmon_fmt.c: perfmon2 sampling buffer format management
|
|
+ *
|
|
+ * This file implements the perfmon2 interface which
|
|
+ * provides access to the hardware performance counters
|
|
+ * of the host processor.
|
|
+ *
|
|
+ * The initial version of perfmon.c was written by
|
|
+ * Ganesh Venkitachalam, IBM Corp.
|
|
+ *
|
|
+ * Then it was modified for perfmon-1.x by Stephane Eranian and
|
|
+ * David Mosberger, Hewlett Packard Co.
|
|
+ *
|
|
+ * Version Perfmon-2.x is a complete rewrite of perfmon-1.x
|
|
+ * by Stephane Eranian, Hewlett Packard Co.
|
|
+ *
|
|
+ * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P.
|
|
+ * Contributed by Stephane Eranian <eranian@hpl.hp.com>
|
|
+ * David Mosberger-Tang <davidm@hpl.hp.com>
|
|
+ *
|
|
+ * More information about perfmon available at:
|
|
+ * http://perfmon2.sf.net
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of version 2 of the GNU General Public
|
|
+ * License as published by the Free Software Foundation.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ * General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
+ * 02111-1307 USA
|
|
+ */
|
|
+#include <linux/module.h>
|
|
+#include <linux/perfmon_kern.h>
|
|
+#include "perfmon_priv.h"
|
|
+
|
|
+static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pfm_smpl_fmt_lock);
|
|
+static LIST_HEAD(pfm_smpl_fmt_list);
|
|
+
|
|
+static inline int fmt_is_mod(struct pfm_smpl_fmt *f)
|
|
+{
|
|
+ return !(f->fmt_flags & PFM_FMTFL_IS_BUILTIN);
|
|
+}
|
|
+
|
|
+static struct pfm_smpl_fmt *pfm_find_fmt(char *name)
|
|
+{
|
|
+ struct pfm_smpl_fmt *entry;
|
|
+
|
|
+ list_for_each_entry(entry, &pfm_smpl_fmt_list, fmt_list) {
|
|
+ if (!strcmp(entry->fmt_name, name))
|
|
+ return entry;
|
|
+ }
|
|
+ return NULL;
|
|
+}
|
|
+/*
|
|
+ * find a buffer format based on its name
|
|
+ */
|
|
+struct pfm_smpl_fmt *pfm_smpl_fmt_get(char *name)
|
|
+{
|
|
+ struct pfm_smpl_fmt *fmt;
|
|
+
|
|
+ spin_lock(&pfm_smpl_fmt_lock);
|
|
+
|
|
+ fmt = pfm_find_fmt(name);
|
|
+
|
|
+ /*
|
|
+ * increase module refcount
|
|
+ */
|
|
+ if (fmt && fmt_is_mod(fmt) && !try_module_get(fmt->owner))
|
|
+ fmt = NULL;
|
|
+
|
|
+ spin_unlock(&pfm_smpl_fmt_lock);
|
|
+
|
|
+ return fmt;
|
|
+}
|
|
+
|
|
+void pfm_smpl_fmt_put(struct pfm_smpl_fmt *fmt)
|
|
+{
|
|
+ if (fmt == NULL || !fmt_is_mod(fmt))
|
|
+ return;
|
|
+ BUG_ON(fmt->owner == NULL);
|
|
+
|
|
+ spin_lock(&pfm_smpl_fmt_lock);
|
|
+ module_put(fmt->owner);
|
|
+ spin_unlock(&pfm_smpl_fmt_lock);
|
|
+}
|
|
+
|
|
+int pfm_fmt_register(struct pfm_smpl_fmt *fmt)
|
|
+{
|
|
+ int ret = 0;
|
|
+
|
|
+ if (perfmon_disabled) {
|
|
+ PFM_INFO("perfmon disabled, cannot add sampling format");
|
|
+ return -ENOSYS;
|
|
+ }
|
|
+
|
|
+ /* some sanity checks */
|
|
+ if (fmt == NULL) {
|
|
+ PFM_INFO("perfmon: NULL format for register");
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ if (fmt->fmt_name == NULL) {
|
|
+ PFM_INFO("perfmon: format has no name");
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ if (fmt->fmt_qdepth > PFM_MSGS_COUNT) {
|
|
+ PFM_INFO("perfmon: format %s requires %u msg queue depth (max %d)",
|
|
+ fmt->fmt_name,
|
|
+ fmt->fmt_qdepth,
|
|
+ PFM_MSGS_COUNT);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * fmt is missing the initialization of .owner = THIS_MODULE
|
|
+ * this is only valid when format is compiled as a module
|
|
+ */
|
|
+ if (fmt->owner == NULL && fmt_is_mod(fmt)) {
|
|
+ PFM_INFO("format %s has no module owner", fmt->fmt_name);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+ /*
|
|
+ * we need at least a handler
|
|
+ */
|
|
+ if (fmt->fmt_handler == NULL) {
|
|
+ PFM_INFO("format %s has no handler", fmt->fmt_name);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * format argument size cannot be bigger than PAGE_SIZE
|
|
+ */
|
|
+ if (fmt->fmt_arg_size > PAGE_SIZE) {
|
|
+ PFM_INFO("format %s arguments too big", fmt->fmt_name);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ spin_lock(&pfm_smpl_fmt_lock);
|
|
+
|
|
+ /*
|
|
+ * because of sysfs, we cannot have two formats with the same name
|
|
+ */
|
|
+ if (pfm_find_fmt(fmt->fmt_name)) {
|
|
+ PFM_INFO("format %s already registered", fmt->fmt_name);
|
|
+ ret = -EBUSY;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ ret = pfm_sysfs_add_fmt(fmt);
|
|
+ if (ret) {
|
|
+ PFM_INFO("sysfs cannot add format entry for %s", fmt->fmt_name);
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ list_add(&fmt->fmt_list, &pfm_smpl_fmt_list);
|
|
+
|
|
+ PFM_INFO("added sampling format %s", fmt->fmt_name);
|
|
+out:
|
|
+ spin_unlock(&pfm_smpl_fmt_lock);
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+EXPORT_SYMBOL(pfm_fmt_register);
|
|
+
|
|
+int pfm_fmt_unregister(struct pfm_smpl_fmt *fmt)
|
|
+{
|
|
+ struct pfm_smpl_fmt *fmt2;
|
|
+ int ret = 0;
|
|
+
|
|
+ if (!fmt || !fmt->fmt_name) {
|
|
+ PFM_DBG("invalid fmt");
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ spin_lock(&pfm_smpl_fmt_lock);
|
|
+
|
|
+ fmt2 = pfm_find_fmt(fmt->fmt_name);
|
|
+ if (!fmt) {
|
|
+ PFM_INFO("unregister failed, format not registered");
|
|
+ ret = -EINVAL;
|
|
+ goto out;
|
|
+ }
|
|
+ list_del_init(&fmt->fmt_list);
|
|
+
|
|
+ pfm_sysfs_remove_fmt(fmt);
|
|
+
|
|
+ PFM_INFO("removed sampling format: %s", fmt->fmt_name);
|
|
+
|
|
+out:
|
|
+ spin_unlock(&pfm_smpl_fmt_lock);
|
|
+ return ret;
|
|
+
|
|
+}
|
|
+EXPORT_SYMBOL(pfm_fmt_unregister);
|
|
+
|
|
+/*
|
|
+ * we defer adding the builtin formats to /sys/kernel/perfmon/formats
|
|
+ * until after the pfm sysfs subsystem is initialized. This function
|
|
+ * is called from pfm_init_sysfs()
|
|
+ */
|
|
+void __init pfm_sysfs_builtin_fmt_add(void)
|
|
+{
|
|
+ struct pfm_smpl_fmt *entry;
|
|
+
|
|
+ /*
|
|
+ * locking not needed, kernel not fully booted
|
|
+ * when called
|
|
+ */
|
|
+ list_for_each_entry(entry, &pfm_smpl_fmt_list, fmt_list) {
|
|
+ pfm_sysfs_add_fmt(entry);
|
|
+ }
|
|
+}
|
|
diff --git a/perfmon/perfmon_hotplug.c b/perfmon/perfmon_hotplug.c
|
|
new file mode 100644
|
|
index 0000000..eaaba81
|
|
--- /dev/null
|
|
+++ b/perfmon/perfmon_hotplug.c
|
|
@@ -0,0 +1,151 @@
|
|
+/*
|
|
+ * perfmon_hotplug.c: handling of CPU hotplug
|
|
+ *
|
|
+ * The initial version of perfmon.c was written by
|
|
+ * Ganesh Venkitachalam, IBM Corp.
|
|
+ *
|
|
+ * Then it was modified for perfmon-1.x by Stephane Eranian and
|
|
+ * David Mosberger, Hewlett Packard Co.
|
|
+ *
|
|
+ * Version Perfmon-2.x is a complete rewrite of perfmon-1.x
|
|
+ * by Stephane Eranian, Hewlett Packard Co.
|
|
+ *
|
|
+ * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P.
|
|
+ * Contributed by Stephane Eranian <eranian@hpl.hp.com>
|
|
+ * David Mosberger-Tang <davidm@hpl.hp.com>
|
|
+ *
|
|
+ * More information about perfmon available at:
|
|
+ * http://perfmon2.sf.net
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of version 2 of the GNU General Public
|
|
+ * License as published by the Free Software Foundation.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ * General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
+ * 02111-1307 USA
|
|
+ */
|
|
+#include <linux/kernel.h>
|
|
+#include <linux/perfmon_kern.h>
|
|
+#include <linux/cpu.h>
|
|
+#include "perfmon_priv.h"
|
|
+
|
|
+#ifndef CONFIG_HOTPLUG_CPU
|
|
+void pfm_cpu_disable(void)
|
|
+{}
|
|
+
|
|
+int __init pfm_init_hotplug(void)
|
|
+{
|
|
+ return 0;
|
|
+}
|
|
+#else /* CONFIG_HOTPLUG_CPU */
|
|
+/*
|
|
+ * CPU hotplug event nofication callback
|
|
+ *
|
|
+ * We use the callback to do manage the sysfs interface.
|
|
+ * Note that the actual shutdown of monitoring on the CPU
|
|
+ * is done in pfm_cpu_disable(), see comments there for more
|
|
+ * information.
|
|
+ */
|
|
+static int pfm_cpu_notify(struct notifier_block *nfb,
|
|
+ unsigned long action, void *hcpu)
|
|
+{
|
|
+ unsigned int cpu = (unsigned long)hcpu;
|
|
+ int ret = NOTIFY_OK;
|
|
+
|
|
+ pfm_pmu_conf_get(0);
|
|
+
|
|
+ switch (action) {
|
|
+ case CPU_ONLINE:
|
|
+ pfm_debugfs_add_cpu(cpu);
|
|
+ PFM_INFO("CPU%d is online", cpu);
|
|
+ break;
|
|
+ case CPU_UP_PREPARE:
|
|
+ PFM_INFO("CPU%d prepare online", cpu);
|
|
+ break;
|
|
+ case CPU_UP_CANCELED:
|
|
+ pfm_debugfs_del_cpu(cpu);
|
|
+ PFM_INFO("CPU%d is up canceled", cpu);
|
|
+ break;
|
|
+ case CPU_DOWN_PREPARE:
|
|
+ PFM_INFO("CPU%d prepare offline", cpu);
|
|
+ break;
|
|
+ case CPU_DOWN_FAILED:
|
|
+ PFM_INFO("CPU%d is down failed", cpu);
|
|
+ break;
|
|
+ case CPU_DEAD:
|
|
+ pfm_debugfs_del_cpu(cpu);
|
|
+ PFM_INFO("CPU%d is offline", cpu);
|
|
+ break;
|
|
+ }
|
|
+ pfm_pmu_conf_put();
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * called from cpu_disable() to detach the perfmon context
|
|
+ * from the CPU going down.
|
|
+ *
|
|
+ * We cannot use the cpu hotplug notifier because we MUST run
|
|
+ * on the CPU that is going down to save the PMU state
|
|
+ */
|
|
+void pfm_cpu_disable(void)
|
|
+{
|
|
+ struct pfm_context *ctx;
|
|
+ unsigned long flags;
|
|
+ int is_system, release_info = 0;
|
|
+ u32 cpu;
|
|
+ int r;
|
|
+
|
|
+ ctx = __get_cpu_var(pmu_ctx);
|
|
+ if (ctx == NULL)
|
|
+ return;
|
|
+
|
|
+ is_system = ctx->flags.system;
|
|
+ cpu = ctx->cpu;
|
|
+
|
|
+ /*
|
|
+ * context is LOADED or MASKED
|
|
+ *
|
|
+ * we unload from CPU. That stops monitoring and does
|
|
+ * all the bookeeping of saving values and updating duration
|
|
+ */
|
|
+ spin_lock_irqsave(&ctx->lock, flags);
|
|
+ if (is_system)
|
|
+ __pfm_unload_context(ctx, &release_info);
|
|
+ spin_unlock_irqrestore(&ctx->lock, flags);
|
|
+
|
|
+ /*
|
|
+ * cancel timer
|
|
+ */
|
|
+ if (release_info & 0x2) {
|
|
+ r = hrtimer_cancel(&__get_cpu_var(pfm_hrtimer));
|
|
+ PFM_DBG("timeout cancel=%d", r);
|
|
+ }
|
|
+
|
|
+ if (release_info & 0x1)
|
|
+ pfm_session_release(is_system, cpu);
|
|
+}
|
|
+
|
|
+static struct notifier_block pfm_cpu_notifier = {
|
|
+ .notifier_call = pfm_cpu_notify
|
|
+};
|
|
+
|
|
+int __init pfm_init_hotplug(void)
|
|
+{
|
|
+ int ret = 0;
|
|
+ /*
|
|
+ * register CPU hotplug event notifier
|
|
+ */
|
|
+ ret = register_cpu_notifier(&pfm_cpu_notifier);
|
|
+ if (!ret)
|
|
+ PFM_LOG("CPU hotplug support enabled");
|
|
+ return ret;
|
|
+}
|
|
+#endif /* CONFIG_HOTPLUG_CPU */
|
|
diff --git a/perfmon/perfmon_init.c b/perfmon/perfmon_init.c
|
|
new file mode 100644
|
|
index 0000000..bbb6e4d
|
|
--- /dev/null
|
|
+++ b/perfmon/perfmon_init.c
|
|
@@ -0,0 +1,131 @@
|
|
+/*
|
|
+ * perfmon.c: perfmon2 global initialization functions
|
|
+ *
|
|
+ * This file implements the perfmon2 interface which
|
|
+ * provides access to the hardware performance counters
|
|
+ * of the host processor.
|
|
+ *
|
|
+ *
|
|
+ * The initial version of perfmon.c was written by
|
|
+ * Ganesh Venkitachalam, IBM Corp.
|
|
+ *
|
|
+ * Then it was modified for perfmon-1.x by Stephane Eranian and
|
|
+ * David Mosberger, Hewlett Packard Co.
|
|
+ *
|
|
+ * Version Perfmon-2.x is a complete rewrite of perfmon-1.x
|
|
+ * by Stephane Eranian, Hewlett Packard Co.
|
|
+ *
|
|
+ * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P.
|
|
+ * Contributed by Stephane Eranian <eranian@hpl.hp.com>
|
|
+ * David Mosberger-Tang <davidm@hpl.hp.com>
|
|
+ *
|
|
+ * More information about perfmon available at:
|
|
+ * http://perfmon2.sf.net
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of version 2 of the GNU General Public
|
|
+ * License as published by the Free Software Foundation.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ * General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
+ * 02111-1307 USA
|
|
+ */
|
|
+#include <linux/kernel.h>
|
|
+#include <linux/perfmon_kern.h>
|
|
+#include "perfmon_priv.h"
|
|
+
|
|
+/*
|
|
+ * external variables
|
|
+ */
|
|
+DEFINE_PER_CPU(struct task_struct *, pmu_owner);
|
|
+DEFINE_PER_CPU(struct pfm_context *, pmu_ctx);
|
|
+DEFINE_PER_CPU(u64, pmu_activation_number);
|
|
+DEFINE_PER_CPU(struct pfm_stats, pfm_stats);
|
|
+DEFINE_PER_CPU(struct hrtimer, pfm_hrtimer);
|
|
+
|
|
+
|
|
+int perfmon_disabled; /* >0 if perfmon is disabled */
|
|
+
|
|
+/*
|
|
+ * called from cpu_init() and pfm_pmu_register()
|
|
+ */
|
|
+void __pfm_init_percpu(void *dummy)
|
|
+{
|
|
+ struct hrtimer *h;
|
|
+
|
|
+ h = &__get_cpu_var(pfm_hrtimer);
|
|
+
|
|
+ pfm_arch_init_percpu();
|
|
+
|
|
+ /*
|
|
+ * initialize per-cpu high res timer
|
|
+ */
|
|
+ hrtimer_init(h, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
|
|
+#ifdef CONFIG_HIGH_RES_TIMERS
|
|
+ /*
|
|
+ * avoid potential deadlock on the runqueue lock
|
|
+ * during context switch when multiplexing. Situation
|
|
+ * arises on architectures which run switch_to() with
|
|
+ * the runqueue lock held, e.g., x86. On others, e.g.,
|
|
+ * IA-64, the problem does not exist.
|
|
+ * Setting the callback mode to HRTIMER_CB_IRQSAFE_UNOCKED
|
|
+ * such that the callback routine is only called on hardirq
|
|
+ * context not on softirq, thus the context switch will not
|
|
+ * end up trying to wakeup the softirqd
|
|
+ */
|
|
+ h->cb_mode = HRTIMER_CB_IRQSAFE_UNLOCKED;
|
|
+#endif
|
|
+ h->function = pfm_handle_switch_timeout;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * global initialization routine, executed only once
|
|
+ */
|
|
+int __init pfm_init(void)
|
|
+{
|
|
+ PFM_LOG("version %u.%u", PFM_VERSION_MAJ, PFM_VERSION_MIN);
|
|
+
|
|
+ if (pfm_init_ctx())
|
|
+ goto error_disable;
|
|
+
|
|
+
|
|
+ if (pfm_init_sets())
|
|
+ goto error_disable;
|
|
+
|
|
+ if (pfm_init_fs())
|
|
+ goto error_disable;
|
|
+
|
|
+ if (pfm_init_sysfs())
|
|
+ goto error_disable;
|
|
+
|
|
+ /* not critical, so no error checking */
|
|
+ pfm_init_debugfs();
|
|
+
|
|
+ /*
|
|
+ * one time, arch-specific global initialization
|
|
+ */
|
|
+ if (pfm_arch_init())
|
|
+ goto error_disable;
|
|
+
|
|
+ if (pfm_init_hotplug())
|
|
+ goto error_disable;
|
|
+ return 0;
|
|
+
|
|
+error_disable:
|
|
+ PFM_ERR("perfmon is disabled due to initialization error");
|
|
+ perfmon_disabled = 1;
|
|
+ return -1;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * must use subsys_initcall() to ensure that the perfmon2 core
|
|
+ * is initialized before any PMU description module when they are
|
|
+ * compiled in.
|
|
+ */
|
|
+subsys_initcall(pfm_init);
|
|
diff --git a/perfmon/perfmon_intr.c b/perfmon/perfmon_intr.c
|
|
new file mode 100644
|
|
index 0000000..c5e3cda
|
|
--- /dev/null
|
|
+++ b/perfmon/perfmon_intr.c
|
|
@@ -0,0 +1,648 @@
|
|
+/*
|
|
+ * perfmon_intr.c: perfmon2 interrupt handling
|
|
+ *
|
|
+ * This file implements the perfmon2 interface which
|
|
+ * provides access to the hardware performance counters
|
|
+ * of the host processor.
|
|
+ *
|
|
+ * The initial version of perfmon.c was written by
|
|
+ * Ganesh Venkitachalam, IBM Corp.
|
|
+ *
|
|
+ * Then it was modified for perfmon-1.x by Stephane Eranian and
|
|
+ * David Mosberger, Hewlett Packard Co.
|
|
+ *
|
|
+ * Version Perfmon-2.x is a complete rewrite of perfmon-1.x
|
|
+ * by Stephane Eranian, Hewlett Packard Co.
|
|
+ *
|
|
+ * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P.
|
|
+ * Contributed by Stephane Eranian <eranian@hpl.hp.com>
|
|
+ * David Mosberger-Tang <davidm@hpl.hp.com>
|
|
+ *
|
|
+ * More information about perfmon available at:
|
|
+ * http://perfmon2.sf.net
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of version 2 of the GNU General Public
|
|
+ * License as published by the Free Software Foundation.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ * General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
+ * 02111-1307 USA
|
|
+ */
|
|
+#include <linux/kernel.h>
|
|
+#include <linux/module.h>
|
|
+#include <linux/perfmon_kern.h>
|
|
+#include "perfmon_priv.h"
|
|
+
|
|
+/**
|
|
+ * pfm_intr_process_64bit_ovfls - handle 64-bit counter emulation
|
|
+ * @ctx: context to operate on
|
|
+ * @set: set to operate on
|
|
+ *
|
|
+ * The function returns the number of 64-bit overflows detected.
|
|
+ *
|
|
+ * 64-bit software pmds are updated for overflowed pmd registers
|
|
+ * the set->reset_pmds is updated to the list of pmds to reset
|
|
+ *
|
|
+ * In any case, set->npend_ovfls is cleared
|
|
+ */
|
|
+static u16 pfm_intr_process_64bit_ovfls(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set,
|
|
+ u32 *ovfl_ctrl)
|
|
+{
|
|
+ u16 i, num_ovfls, max_pmd, max_intr;
|
|
+ u16 num_64b_ovfls, has_ovfl_sw, must_switch;
|
|
+ u64 ovfl_thres, old_val, new_val, ovfl_mask;
|
|
+
|
|
+ num_64b_ovfls = must_switch = 0;
|
|
+
|
|
+ ovfl_mask = pfm_pmu_conf->ovfl_mask;
|
|
+ max_pmd = ctx->regs.max_pmd;
|
|
+ max_intr = ctx->regs.max_intr_pmd;
|
|
+
|
|
+ num_ovfls = set->npend_ovfls;
|
|
+ has_ovfl_sw = set->flags & PFM_SETFL_OVFL_SWITCH;
|
|
+
|
|
+ bitmap_zero(cast_ulp(set->reset_pmds), max_pmd);
|
|
+
|
|
+ for (i = ctx->regs.first_intr_pmd; num_ovfls; i++) {
|
|
+ /*
|
|
+ * skip pmd which did not overflow
|
|
+ */
|
|
+ if (!test_bit(i, cast_ulp(set->povfl_pmds)))
|
|
+ continue;
|
|
+
|
|
+ num_ovfls--;
|
|
+
|
|
+ /*
|
|
+ * Update software value for counters ONLY
|
|
+ *
|
|
+ * Note that the pmd is not necessarily 0 at this point as
|
|
+ * qualified events may have happened before the PMU was
|
|
+ * frozen. The residual count is not taken into consideration
|
|
+ * here but will be with any read of the pmd
|
|
+ */
|
|
+ ovfl_thres = set->pmds[i].ovflsw_thres;
|
|
+
|
|
+ if (likely(test_bit(i, cast_ulp(ctx->regs.cnt_pmds)))) {
|
|
+ old_val = new_val = set->pmds[i].value;
|
|
+ new_val += 1 + ovfl_mask;
|
|
+ set->pmds[i].value = new_val;
|
|
+ } else {
|
|
+ /*
|
|
+ * for non counters which interrupt, e.g., AMD IBS,
|
|
+ * we consider this equivalent to a 64-bit counter
|
|
+ * overflow.
|
|
+ */
|
|
+ old_val = 1; new_val = 0;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * check for 64-bit overflow condition
|
|
+ */
|
|
+ if (likely(old_val > new_val)) {
|
|
+ num_64b_ovfls++;
|
|
+ if (has_ovfl_sw && ovfl_thres > 0) {
|
|
+ if (ovfl_thres == 1)
|
|
+ must_switch = 1;
|
|
+ set->pmds[i].ovflsw_thres = ovfl_thres - 1;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * what to reset because of this overflow
|
|
+ * - the overflowed register
|
|
+ * - its reset_smpls
|
|
+ */
|
|
+ __set_bit(i, cast_ulp(set->reset_pmds));
|
|
+
|
|
+ bitmap_or(cast_ulp(set->reset_pmds),
|
|
+ cast_ulp(set->reset_pmds),
|
|
+ cast_ulp(set->pmds[i].reset_pmds),
|
|
+ max_pmd);
|
|
+ } else {
|
|
+ /*
|
|
+ * only keep track of 64-bit overflows or
|
|
+ * assimilated
|
|
+ */
|
|
+ __clear_bit(i, cast_ulp(set->povfl_pmds));
|
|
+
|
|
+ /*
|
|
+ * on some PMU, it may be necessary to re-arm the PMD
|
|
+ */
|
|
+ pfm_arch_ovfl_reset_pmd(ctx, i);
|
|
+ }
|
|
+
|
|
+ PFM_DBG_ovfl("ovfl=%s pmd%u new=0x%llx old=0x%llx "
|
|
+ "hw_pmd=0x%llx o_pmds=0x%llx must_switch=%u "
|
|
+ "o_thres=%llu o_thres_ref=%llu",
|
|
+ old_val > new_val ? "64-bit" : "HW",
|
|
+ i,
|
|
+ (unsigned long long)new_val,
|
|
+ (unsigned long long)old_val,
|
|
+ (unsigned long long)pfm_read_pmd(ctx, i),
|
|
+ (unsigned long long)set->povfl_pmds[0],
|
|
+ must_switch,
|
|
+ (unsigned long long)set->pmds[i].ovflsw_thres,
|
|
+ (unsigned long long)set->pmds[i].ovflsw_ref_thres);
|
|
+ }
|
|
+ /*
|
|
+ * update public bitmask of 64-bit overflowed pmds
|
|
+ */
|
|
+ if (num_64b_ovfls)
|
|
+ bitmap_copy(cast_ulp(set->ovfl_pmds), cast_ulp(set->povfl_pmds),
|
|
+ max_intr);
|
|
+
|
|
+ if (must_switch)
|
|
+ *ovfl_ctrl |= PFM_OVFL_CTRL_SWITCH;
|
|
+
|
|
+ /*
|
|
+ * mark the overflows as consumed
|
|
+ */
|
|
+ set->npend_ovfls = 0;
|
|
+ bitmap_zero(cast_ulp(set->povfl_pmds), max_intr);
|
|
+
|
|
+ return num_64b_ovfls;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_intr_get_smpl_pmds_values - copy 64-bit pmd values for sampling format
|
|
+ * @ctx: context to work on
|
|
+ * @set: current event set
|
|
+ * @arg: overflow arg to be passed to format
|
|
+ * @smpl_pmds: list of PMDs of interest for the overflowed register
|
|
+ *
|
|
+ * build an array of 46-bit PMD values based on smpl_pmds. Values are
|
|
+ * stored in increasing order of the PMD indexes
|
|
+ */
|
|
+static void pfm_intr_get_smpl_pmds_values(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set,
|
|
+ struct pfm_ovfl_arg *arg,
|
|
+ u64 *smpl_pmds)
|
|
+{
|
|
+ u16 j, k, max_pmd;
|
|
+ u64 new_val, ovfl_mask;
|
|
+ u64 *cnt_pmds;
|
|
+
|
|
+ cnt_pmds = ctx->regs.cnt_pmds;
|
|
+ max_pmd = ctx->regs.max_pmd;
|
|
+ ovfl_mask = pfm_pmu_conf->ovfl_mask;
|
|
+
|
|
+ for (j = k = 0; j < max_pmd; j++) {
|
|
+
|
|
+ if (!test_bit(j, cast_ulp(smpl_pmds)))
|
|
+ continue;
|
|
+
|
|
+ new_val = pfm_read_pmd(ctx, j);
|
|
+
|
|
+ /* for counters, build 64-bit value */
|
|
+ if (test_bit(j, cast_ulp(cnt_pmds)))
|
|
+ new_val = (set->pmds[j].value & ~ovfl_mask)
|
|
+ | (new_val & ovfl_mask);
|
|
+
|
|
+ arg->smpl_pmds_values[k++] = new_val;
|
|
+
|
|
+ PFM_DBG_ovfl("s_pmd_val[%u]=pmd%u=0x%llx", k, j,
|
|
+ (unsigned long long)new_val);
|
|
+ }
|
|
+ arg->num_smpl_pmds = k;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_intr_process_smpl_fmt -- handle sampling format callback
|
|
+ * @ctx: context to work on
|
|
+ * @set: current event set
|
|
+ * @ip: interrupted instruction pointer
|
|
+ * @now: timestamp
|
|
+ * @num_ovfls: number of 64-bit overflows
|
|
+ * @ovfl_ctrl: set of controls for interrupt handler tail processing
|
|
+ * @regs: register state
|
|
+ *
|
|
+ * Prepare argument (ovfl_arg) to be passed to sampling format callback, then
|
|
+ * invoke the callback (fmt_handler)
|
|
+ */
|
|
+static int pfm_intr_process_smpl_fmt(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set,
|
|
+ unsigned long ip,
|
|
+ u64 now,
|
|
+ u64 num_ovfls,
|
|
+ u32 *ovfl_ctrl,
|
|
+ struct pt_regs *regs)
|
|
+{
|
|
+ struct pfm_ovfl_arg *ovfl_arg;
|
|
+ u64 start_cycles, end_cycles;
|
|
+ u16 i, max_pmd;
|
|
+ int ret = 0;
|
|
+
|
|
+ ovfl_arg = &ctx->ovfl_arg;
|
|
+
|
|
+ ovfl_arg->active_set = set->id;
|
|
+ max_pmd = ctx->regs.max_pmd;
|
|
+
|
|
+ /*
|
|
+ * first_intr_pmd: first PMD which can generate PMU interrupts
|
|
+ */
|
|
+ for (i = ctx->regs.first_intr_pmd; num_ovfls; i++) {
|
|
+ /*
|
|
+ * skip pmd which did not have 64-bit overflows
|
|
+ */
|
|
+ if (!test_bit(i, cast_ulp(set->ovfl_pmds)))
|
|
+ continue;
|
|
+
|
|
+ num_ovfls--;
|
|
+
|
|
+ /*
|
|
+ * prepare argument to fmt_handler
|
|
+ */
|
|
+ ovfl_arg->ovfl_pmd = i;
|
|
+ ovfl_arg->ovfl_ctrl = 0;
|
|
+
|
|
+ ovfl_arg->pmd_last_reset = set->pmds[i].lval;
|
|
+ ovfl_arg->pmd_eventid = set->pmds[i].eventid;
|
|
+ ovfl_arg->num_smpl_pmds = 0;
|
|
+
|
|
+ /*
|
|
+ * copy values of pmds of interest, if any
|
|
+ * Sampling format may use them
|
|
+ * We do not initialize the unused smpl_pmds_values
|
|
+ */
|
|
+ if (!bitmap_empty(cast_ulp(set->pmds[i].smpl_pmds), max_pmd))
|
|
+ pfm_intr_get_smpl_pmds_values(ctx, set, ovfl_arg,
|
|
+ set->pmds[i].smpl_pmds);
|
|
+
|
|
+ pfm_stats_inc(fmt_handler_calls);
|
|
+
|
|
+ /*
|
|
+ * call format record (handler) routine
|
|
+ */
|
|
+ start_cycles = sched_clock();
|
|
+ ret = (*ctx->smpl_fmt->fmt_handler)(ctx, ip, now, regs);
|
|
+ end_cycles = sched_clock();
|
|
+
|
|
+ /*
|
|
+ * The reset_pmds mask is constructed automatically
|
|
+ * on overflow. When the actual reset takes place
|
|
+ * depends on the masking, switch and notification
|
|
+ * status. It may be deferred until pfm_restart().
|
|
+ */
|
|
+ *ovfl_ctrl |= ovfl_arg->ovfl_ctrl;
|
|
+
|
|
+ pfm_stats_add(fmt_handler_ns, end_cycles - start_cycles);
|
|
+ }
|
|
+ /*
|
|
+ * when the format cannot handle the rest of the overflow, we abort
|
|
+ */
|
|
+ if (ret)
|
|
+ PFM_DBG_ovfl("handler aborted at PMD%u ret=%d", i, ret);
|
|
+ return ret;
|
|
+}
|
|
+/**
|
|
+ * pfm_overflow_handler - main overflow processing routine.
|
|
+ * @ctx: context to work on (always current context)
|
|
+ * @set: current event set
|
|
+ * @ip: interrupt instruction pointer
|
|
+ * @regs: machine state
|
|
+ *
|
|
+ * set->num_ovfl_pmds is 0 when returning from this function even though
|
|
+ * set->ovfl_pmds[] may have bits set. When leaving set->num_ovfl_pmds
|
|
+ * must never be used to determine if there was a pending overflow.
|
|
+ */
|
|
+static void pfm_overflow_handler(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set,
|
|
+ unsigned long ip,
|
|
+ struct pt_regs *regs)
|
|
+{
|
|
+ struct pfm_event_set *set_orig;
|
|
+ u64 now;
|
|
+ u32 ovfl_ctrl;
|
|
+ u16 max_intr, max_pmd;
|
|
+ u16 num_ovfls;
|
|
+ int ret, has_notify;
|
|
+
|
|
+ /*
|
|
+ * take timestamp
|
|
+ */
|
|
+ now = sched_clock();
|
|
+
|
|
+ max_pmd = ctx->regs.max_pmd;
|
|
+ max_intr = ctx->regs.max_intr_pmd;
|
|
+
|
|
+ set_orig = set;
|
|
+ ovfl_ctrl = 0;
|
|
+
|
|
+ /*
|
|
+ * skip ZOMBIE case
|
|
+ */
|
|
+ if (unlikely(ctx->state == PFM_CTX_ZOMBIE))
|
|
+ goto stop_monitoring;
|
|
+
|
|
+ PFM_DBG_ovfl("intr_pmds=0x%llx npend=%u ip=%p, blocking=%d "
|
|
+ "u_pmds=0x%llx use_fmt=%u",
|
|
+ (unsigned long long)set->povfl_pmds[0],
|
|
+ set->npend_ovfls,
|
|
+ (void *)ip,
|
|
+ ctx->flags.block,
|
|
+ (unsigned long long)set->used_pmds[0],
|
|
+ !!ctx->smpl_fmt);
|
|
+
|
|
+ /*
|
|
+ * return number of 64-bit overflows
|
|
+ */
|
|
+ num_ovfls = pfm_intr_process_64bit_ovfls(ctx, set, &ovfl_ctrl);
|
|
+
|
|
+ /*
|
|
+ * there were no 64-bit overflows
|
|
+ * nothing else to do
|
|
+ */
|
|
+ if (!num_ovfls)
|
|
+ return;
|
|
+
|
|
+ /*
|
|
+ * tmp_ovfl_notify = ovfl_pmds & ovfl_notify
|
|
+ * with:
|
|
+ * - ovfl_pmds: last 64-bit overflowed pmds
|
|
+ * - ovfl_notify: notify on overflow registers
|
|
+ */
|
|
+ bitmap_and(cast_ulp(ctx->tmp_ovfl_notify),
|
|
+ cast_ulp(set->ovfl_pmds),
|
|
+ cast_ulp(set->ovfl_notify),
|
|
+ max_intr);
|
|
+
|
|
+ has_notify = !bitmap_empty(cast_ulp(ctx->tmp_ovfl_notify), max_intr);
|
|
+
|
|
+ /*
|
|
+ * check for sampling format and invoke fmt_handler
|
|
+ */
|
|
+ if (likely(ctx->smpl_fmt)) {
|
|
+ pfm_intr_process_smpl_fmt(ctx, set, ip, now, num_ovfls,
|
|
+ &ovfl_ctrl, regs);
|
|
+ } else {
|
|
+ /*
|
|
+ * When no sampling format is used, the default
|
|
+ * is:
|
|
+ * - mask monitoring if not switching
|
|
+ * - notify user if requested
|
|
+ *
|
|
+ * If notification is not requested, monitoring is masked
|
|
+ * and overflowed registers are not reset (saturation).
|
|
+ * This mimics the behavior of the default sampling format.
|
|
+ */
|
|
+ ovfl_ctrl |= PFM_OVFL_CTRL_NOTIFY;
|
|
+ if (has_notify || !(ovfl_ctrl & PFM_OVFL_CTRL_SWITCH))
|
|
+ ovfl_ctrl |= PFM_OVFL_CTRL_MASK;
|
|
+ }
|
|
+
|
|
+ PFM_DBG_ovfl("set%u o_notify=0x%llx o_pmds=0x%llx "
|
|
+ "r_pmds=0x%llx ovfl_ctrl=0x%x",
|
|
+ set->id,
|
|
+ (unsigned long long)ctx->tmp_ovfl_notify[0],
|
|
+ (unsigned long long)set->ovfl_pmds[0],
|
|
+ (unsigned long long)set->reset_pmds[0],
|
|
+ ovfl_ctrl);
|
|
+
|
|
+ /*
|
|
+ * execute the various controls
|
|
+ * ORDER MATTERS
|
|
+ */
|
|
+
|
|
+
|
|
+ /*
|
|
+ * mask monitoring
|
|
+ */
|
|
+ if (ovfl_ctrl & PFM_OVFL_CTRL_MASK) {
|
|
+ pfm_mask_monitoring(ctx, set);
|
|
+ /*
|
|
+ * when masking, reset is deferred until
|
|
+ * pfm_restart()
|
|
+ */
|
|
+ ovfl_ctrl &= ~PFM_OVFL_CTRL_RESET;
|
|
+
|
|
+ /*
|
|
+ * when masking, switching is deferred until
|
|
+ * pfm_restart and we need to remember it
|
|
+ */
|
|
+ if (ovfl_ctrl & PFM_OVFL_CTRL_SWITCH) {
|
|
+ set->priv_flags |= PFM_SETFL_PRIV_SWITCH;
|
|
+ ovfl_ctrl &= ~PFM_OVFL_CTRL_SWITCH;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * switch event set
|
|
+ */
|
|
+ if (ovfl_ctrl & PFM_OVFL_CTRL_SWITCH) {
|
|
+ pfm_switch_sets_from_intr(ctx);
|
|
+ /* update view of active set */
|
|
+ set = ctx->active_set;
|
|
+ }
|
|
+ /*
|
|
+ * send overflow notification
|
|
+ *
|
|
+ * only necessary if at least one overflowed
|
|
+ * register had the notify flag set
|
|
+ */
|
|
+ if (has_notify && (ovfl_ctrl & PFM_OVFL_CTRL_NOTIFY)) {
|
|
+ /*
|
|
+ * block on notify, not on masking
|
|
+ */
|
|
+ if (ctx->flags.block)
|
|
+ pfm_post_work(current, ctx, PFM_WORK_BLOCK);
|
|
+
|
|
+ /*
|
|
+ * send notification and passed original set id
|
|
+ * if error, queue full, for instance, then default
|
|
+ * to masking monitoring, i.e., saturate
|
|
+ */
|
|
+ ret = pfm_ovfl_notify(ctx, set_orig, ip);
|
|
+ if (unlikely(ret)) {
|
|
+ if (ctx->state == PFM_CTX_LOADED) {
|
|
+ pfm_mask_monitoring(ctx, set);
|
|
+ ovfl_ctrl &= ~PFM_OVFL_CTRL_RESET;
|
|
+ }
|
|
+ } else {
|
|
+ ctx->flags.can_restart++;
|
|
+ PFM_DBG_ovfl("can_restart=%u", ctx->flags.can_restart);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * reset overflowed registers
|
|
+ */
|
|
+ if (ovfl_ctrl & PFM_OVFL_CTRL_RESET) {
|
|
+ u16 nn;
|
|
+ nn = bitmap_weight(cast_ulp(set->reset_pmds), max_pmd);
|
|
+ if (nn)
|
|
+ pfm_reset_pmds(ctx, set, nn, PFM_PMD_RESET_SHORT);
|
|
+ }
|
|
+ return;
|
|
+
|
|
+stop_monitoring:
|
|
+ /*
|
|
+ * Does not happen for a system-wide context nor for a
|
|
+ * self-monitored context. We cannot attach to kernel-only
|
|
+ * thread, thus it is safe to set TIF bits, i.e., the thread
|
|
+ * will eventually leave the kernel or die and either we will
|
|
+ * catch the context and clean it up in pfm_handler_work() or
|
|
+ * pfm_exit_thread().
|
|
+ *
|
|
+ * Mask until we get to pfm_handle_work()
|
|
+ */
|
|
+ pfm_mask_monitoring(ctx, set);
|
|
+
|
|
+ PFM_DBG_ovfl("ctx is zombie, converted to spurious");
|
|
+ pfm_post_work(current, ctx, PFM_WORK_ZOMBIE);
|
|
+}
|
|
+
|
|
+/**
|
|
+ * __pfm_interrupt_handler - 1st level interrupt handler
|
|
+ * @ip: interrupted instruction pointer
|
|
+ * @regs: machine state
|
|
+ *
|
|
+ * Function is static because we use a wrapper to easily capture timing infos.
|
|
+ *
|
|
+ *
|
|
+ * Context locking necessary to avoid concurrent accesses from other CPUs
|
|
+ * - For per-thread, we must prevent pfm_restart() which works when
|
|
+ * context is LOADED or MASKED
|
|
+ */
|
|
+static void __pfm_interrupt_handler(unsigned long ip, struct pt_regs *regs)
|
|
+{
|
|
+ struct task_struct *task;
|
|
+ struct pfm_context *ctx;
|
|
+ struct pfm_event_set *set;
|
|
+
|
|
+
|
|
+ task = __get_cpu_var(pmu_owner);
|
|
+ ctx = __get_cpu_var(pmu_ctx);
|
|
+
|
|
+ /*
|
|
+ * verify if there is a context on this CPU
|
|
+ */
|
|
+ if (unlikely(ctx == NULL)) {
|
|
+ PFM_DBG_ovfl("no ctx");
|
|
+ goto spurious;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * we need to lock context because it could be accessed
|
|
+ * from another CPU. Depending on the priority level of
|
|
+ * the PMU interrupt or the arch, it may be necessary to
|
|
+ * mask interrupts alltogether to avoid race condition with
|
|
+ * the timer interrupt in case of time-based set switching,
|
|
+ * for instance.
|
|
+ */
|
|
+ spin_lock(&ctx->lock);
|
|
+
|
|
+ set = ctx->active_set;
|
|
+
|
|
+ /*
|
|
+ * For SMP per-thread, it is not possible to have
|
|
+ * owner != NULL && task != current.
|
|
+ *
|
|
+ * For UP per-thread, because of lazy save, it
|
|
+ * is possible to receive an interrupt in another task
|
|
+ * which is not using the PMU. This means
|
|
+ * that the interrupt was in-flight at the
|
|
+ * time of pfm_ctxswout_thread(). In that
|
|
+ * case, it will be replayed when the task
|
|
+ * is scheduled again. Hence we convert to spurious.
|
|
+ *
|
|
+ * The basic rule is that an overflow is always
|
|
+ * processed in the context of the task that
|
|
+ * generated it for all per-thread contexts.
|
|
+ *
|
|
+ * for system-wide, task is always NULL
|
|
+ */
|
|
+#ifndef CONFIG_SMP
|
|
+ if (unlikely((task && current->pfm_context != ctx))) {
|
|
+ PFM_DBG_ovfl("spurious: not owned by current task");
|
|
+ goto spurious;
|
|
+ }
|
|
+#endif
|
|
+ if (unlikely(ctx->state == PFM_CTX_MASKED)) {
|
|
+ PFM_DBG_ovfl("spurious: monitoring masked");
|
|
+ goto spurious;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * check that monitoring is active, otherwise convert
|
|
+ * to spurious
|
|
+ */
|
|
+ if (unlikely(!pfm_arch_is_active(ctx))) {
|
|
+ PFM_DBG_ovfl("spurious: monitoring non active");
|
|
+ goto spurious;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * freeze PMU and collect overflowed PMD registers
|
|
+ * into set->povfl_pmds. Number of overflowed PMDs
|
|
+ * reported in set->npend_ovfls
|
|
+ */
|
|
+ pfm_arch_intr_freeze_pmu(ctx, set);
|
|
+
|
|
+ /*
|
|
+ * no overflow detected, interrupt may have come
|
|
+ * from the previous thread running on this CPU
|
|
+ */
|
|
+ if (unlikely(!set->npend_ovfls)) {
|
|
+ PFM_DBG_ovfl("no npend_ovfls");
|
|
+ goto spurious;
|
|
+ }
|
|
+
|
|
+ pfm_stats_inc(ovfl_intr_regular_count);
|
|
+
|
|
+ /*
|
|
+ * invoke actual handler
|
|
+ */
|
|
+ pfm_overflow_handler(ctx, set, ip, regs);
|
|
+
|
|
+ /*
|
|
+ * unfreeze PMU, monitoring may not actual be restarted
|
|
+ * if context is MASKED
|
|
+ */
|
|
+ pfm_arch_intr_unfreeze_pmu(ctx);
|
|
+
|
|
+ spin_unlock(&ctx->lock);
|
|
+
|
|
+ return;
|
|
+
|
|
+spurious:
|
|
+ /* ctx may be NULL */
|
|
+ pfm_arch_intr_unfreeze_pmu(ctx);
|
|
+ if (ctx)
|
|
+ spin_unlock(&ctx->lock);
|
|
+
|
|
+ pfm_stats_inc(ovfl_intr_spurious_count);
|
|
+}
|
|
+
|
|
+
|
|
+/**
|
|
+ * pfm_interrupt_handler - 1st level interrupt handler
|
|
+ * @ip: interrupt instruction pointer
|
|
+ * @regs: machine state
|
|
+ *
|
|
+ * Function called from the low-level assembly code or arch-specific perfmon
|
|
+ * code. Simple wrapper used for timing purpose. Actual work done in
|
|
+ * __pfm_overflow_handler()
|
|
+ */
|
|
+void pfm_interrupt_handler(unsigned long ip, struct pt_regs *regs)
|
|
+{
|
|
+ u64 start;
|
|
+
|
|
+ pfm_stats_inc(ovfl_intr_all_count);
|
|
+
|
|
+ BUG_ON(!irqs_disabled());
|
|
+
|
|
+ start = sched_clock();
|
|
+
|
|
+ __pfm_interrupt_handler(ip, regs);
|
|
+
|
|
+ pfm_stats_add(ovfl_intr_ns, sched_clock() - start);
|
|
+}
|
|
+EXPORT_SYMBOL(pfm_interrupt_handler);
|
|
+
|
|
diff --git a/perfmon/perfmon_msg.c b/perfmon/perfmon_msg.c
|
|
new file mode 100644
|
|
index 0000000..b8a1e4c
|
|
--- /dev/null
|
|
+++ b/perfmon/perfmon_msg.c
|
|
@@ -0,0 +1,229 @@
|
|
+/*
|
|
+ * perfmon_msg.c: perfmon2 notification message queue management
|
|
+ *
|
|
+ * This file implements the perfmon2 interface which
|
|
+ * provides access to the hardware performance counters
|
|
+ * of the host processor.
|
|
+ *
|
|
+ * The initial version of perfmon.c was written by
|
|
+ * Ganesh Venkitachalam, IBM Corp.
|
|
+ *
|
|
+ * Then it was modified for perfmon-1.x by Stephane Eranian and
|
|
+ * David Mosberger, Hewlett Packard Co.
|
|
+ *
|
|
+ * Version Perfmon-2.x is a complete rewrite of perfmon-1.x
|
|
+ * by Stephane Eranian, Hewlett Packard Co.
|
|
+ *
|
|
+ * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P.
|
|
+ * Contributed by Stephane Eranian <eranian@hpl.hp.com>
|
|
+ * David Mosberger-Tang <davidm@hpl.hp.com>
|
|
+ *
|
|
+ * More information about perfmon available at:
|
|
+ * http://perfmon2.sf.net
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of version 2 of the GNU General Public
|
|
+ * License as published by the Free Software Foundation.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ * General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
+ * 02111-1307 USA
|
|
+ */
|
|
+#include <linux/kernel.h>
|
|
+#include <linux/poll.h>
|
|
+#include <linux/perfmon_kern.h>
|
|
+
|
|
+/**
|
|
+ * pfm_get_new_msg - get a new message slot from the queue
|
|
+ * @ctx: context to operate on
|
|
+ *
|
|
+ * if queue if full NULL is returned
|
|
+ */
|
|
+static union pfarg_msg *pfm_get_new_msg(struct pfm_context *ctx)
|
|
+{
|
|
+ int next;
|
|
+
|
|
+ next = ctx->msgq_head & PFM_MSGQ_MASK;
|
|
+
|
|
+ if ((ctx->msgq_head - ctx->msgq_tail) == PFM_MSGS_COUNT)
|
|
+ return NULL;
|
|
+
|
|
+ /*
|
|
+ * move to next possible slot
|
|
+ */
|
|
+ ctx->msgq_head++;
|
|
+
|
|
+ PFM_DBG_ovfl("head=%d tail=%d msg=%d",
|
|
+ ctx->msgq_head & PFM_MSGQ_MASK,
|
|
+ ctx->msgq_tail & PFM_MSGQ_MASK,
|
|
+ next);
|
|
+
|
|
+ return ctx->msgq+next;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_notify_user - wakeup any thread wiating on msg queue, post SIGIO
|
|
+ * @ctx: context to operate on
|
|
+ *
|
|
+ * message is already enqueued
|
|
+ */
|
|
+static void pfm_notify_user(struct pfm_context *ctx)
|
|
+{
|
|
+ if (ctx->state == PFM_CTX_ZOMBIE) {
|
|
+ PFM_DBG("no notification, context is zombie");
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ PFM_DBG_ovfl("waking up");
|
|
+
|
|
+ wake_up_interruptible(&ctx->msgq_wait);
|
|
+
|
|
+ /*
|
|
+ * it is safe to call kill_fasync() from an interrupt
|
|
+ * handler. kill_fasync() grabs two RW locks (fasync_lock,
|
|
+ * tasklist_lock) in read mode. There is conflict only in
|
|
+ * case the PMU interrupt occurs during a write mode critical
|
|
+ * section. This cannot happen because for both locks, the
|
|
+ * write mode is always using interrupt masking (write_lock_irq).
|
|
+ */
|
|
+ kill_fasync(&ctx->async_queue, SIGIO, POLL_IN);
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_ovfl_notify - send overflow notification
|
|
+ * @ctx: context to operate on
|
|
+ * @set: which set the overflow comes from
|
|
+ * @ip: overflow interrupt instruction address (IIP)
|
|
+ *
|
|
+ * Appends an overflow notification message to context queue.
|
|
+ * call pfm_notify() to wakeup any threads and/or send a signal
|
|
+ *
|
|
+ * Context is locked and interrupts are disabled (no preemption).
|
|
+ */
|
|
+int pfm_ovfl_notify(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set,
|
|
+ unsigned long ip)
|
|
+{
|
|
+ union pfarg_msg *msg = NULL;
|
|
+ u64 *ovfl_pmds;
|
|
+
|
|
+ if (!ctx->flags.no_msg) {
|
|
+ msg = pfm_get_new_msg(ctx);
|
|
+ if (msg == NULL) {
|
|
+ /*
|
|
+ * when message queue fills up it is because the user
|
|
+ * did not extract the message, yet issued
|
|
+ * pfm_restart(). At this point, we stop sending
|
|
+ * notification, thus the user will not be able to get
|
|
+ * new samples when using the default format.
|
|
+ */
|
|
+ PFM_DBG_ovfl("no more notification msgs");
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ msg->pfm_ovfl_msg.msg_type = PFM_MSG_OVFL;
|
|
+ msg->pfm_ovfl_msg.msg_ovfl_pid = current->pid;
|
|
+ msg->pfm_ovfl_msg.msg_active_set = set->id;
|
|
+
|
|
+ ovfl_pmds = msg->pfm_ovfl_msg.msg_ovfl_pmds;
|
|
+
|
|
+ /*
|
|
+ * copy bitmask of all pmd that interrupted last
|
|
+ */
|
|
+ bitmap_copy(cast_ulp(ovfl_pmds), cast_ulp(set->ovfl_pmds),
|
|
+ ctx->regs.max_intr_pmd);
|
|
+
|
|
+ msg->pfm_ovfl_msg.msg_ovfl_cpu = smp_processor_id();
|
|
+ msg->pfm_ovfl_msg.msg_ovfl_tid = current->tgid;
|
|
+ msg->pfm_ovfl_msg.msg_ovfl_ip = ip;
|
|
+
|
|
+ pfm_stats_inc(ovfl_notify_count);
|
|
+ }
|
|
+
|
|
+ PFM_DBG_ovfl("ip=0x%lx o_pmds=0x%llx",
|
|
+ ip,
|
|
+ (unsigned long long)set->ovfl_pmds[0]);
|
|
+
|
|
+ pfm_notify_user(ctx);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_end_notify_user - notify of thread termination
|
|
+ * @ctx: context to operate on
|
|
+ *
|
|
+ * In per-thread mode, when not self-monitoring, perfmon
|
|
+ * sends a 'end' notification message when the monitored
|
|
+ * thread where the context is attached is exiting.
|
|
+ *
|
|
+ * This helper message alleviates the need to track the activity
|
|
+ * of the thread/process when it is not directly related, i.e.,
|
|
+ * was attached. In other words, no needto keep the thread
|
|
+ * ptraced.
|
|
+ *
|
|
+ * The context must be locked and interrupts disabled.
|
|
+ */
|
|
+int pfm_end_notify(struct pfm_context *ctx)
|
|
+{
|
|
+ union pfarg_msg *msg;
|
|
+
|
|
+ msg = pfm_get_new_msg(ctx);
|
|
+ if (msg == NULL) {
|
|
+ PFM_ERR("%s no more msgs", __func__);
|
|
+ return -1;
|
|
+ }
|
|
+ /* no leak */
|
|
+ memset(msg, 0, sizeof(*msg));
|
|
+
|
|
+ msg->type = PFM_MSG_END;
|
|
+
|
|
+ PFM_DBG("end msg: msg=%p no_msg=%d",
|
|
+ msg,
|
|
+ ctx->flags.no_msg);
|
|
+
|
|
+ pfm_notify_user(ctx);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_get_next_msg - copy the oldest message from the queue and move tail
|
|
+ * @ctx: context to use
|
|
+ * @m: where to copy the message into
|
|
+ *
|
|
+ * The tail of the queue is moved as a consequence of this call
|
|
+ */
|
|
+void pfm_get_next_msg(struct pfm_context *ctx, union pfarg_msg *m)
|
|
+{
|
|
+ union pfarg_msg *next;
|
|
+
|
|
+ PFM_DBG_ovfl("in head=%d tail=%d",
|
|
+ ctx->msgq_head & PFM_MSGQ_MASK,
|
|
+ ctx->msgq_tail & PFM_MSGQ_MASK);
|
|
+
|
|
+ /*
|
|
+ * get oldest message
|
|
+ */
|
|
+ next = ctx->msgq + (ctx->msgq_tail & PFM_MSGQ_MASK);
|
|
+
|
|
+ /*
|
|
+ * move tail forward
|
|
+ */
|
|
+ ctx->msgq_tail++;
|
|
+
|
|
+ /*
|
|
+ * copy message, we cannot simply point to it
|
|
+ * as it may be re-used before we copy it out
|
|
+ */
|
|
+ *m = *next;
|
|
+
|
|
+ PFM_DBG_ovfl("out head=%d tail=%d type=%d",
|
|
+ ctx->msgq_head & PFM_MSGQ_MASK,
|
|
+ ctx->msgq_tail & PFM_MSGQ_MASK,
|
|
+ m->type);
|
|
+}
|
|
diff --git a/perfmon/perfmon_pmu.c b/perfmon/perfmon_pmu.c
|
|
new file mode 100644
|
|
index 0000000..df7a9c9
|
|
--- /dev/null
|
|
+++ b/perfmon/perfmon_pmu.c
|
|
@@ -0,0 +1,590 @@
|
|
+/*
|
|
+ * perfmon_pmu.c: perfmon2 PMU configuration management
|
|
+ *
|
|
+ * This file implements the perfmon2 interface which
|
|
+ * provides access to the hardware performance counters
|
|
+ * of the host processor.
|
|
+ *
|
|
+ * The initial version of perfmon.c was written by
|
|
+ * Ganesh Venkitachalam, IBM Corp.
|
|
+ *
|
|
+ * Then it was modified for perfmon-1.x by Stephane Eranian and
|
|
+ * David Mosberger, Hewlett Packard Co.
|
|
+ *
|
|
+ * Version Perfmon-2.x is a complete rewrite of perfmon-1.x
|
|
+ * by Stephane Eranian, Hewlett Packard Co.
|
|
+ *
|
|
+ * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P.
|
|
+ * Contributed by Stephane Eranian <eranian@hpl.hp.com>
|
|
+ * David Mosberger-Tang <davidm@hpl.hp.com>
|
|
+ *
|
|
+ * More information about perfmon available at:
|
|
+ * http://perfmon2.sf.net
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of version 2 of the GNU General Public
|
|
+ * License as published by the Free Software Foundation.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ * General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
+ * 02111-1307 USA
|
|
+ */
|
|
+#include <linux/module.h>
|
|
+#include <linux/perfmon_kern.h>
|
|
+#include "perfmon_priv.h"
|
|
+
|
|
+#ifndef CONFIG_MODULE_UNLOAD
|
|
+#define module_refcount(n) 1
|
|
+#endif
|
|
+
|
|
+static __cacheline_aligned_in_smp int request_mod_in_progress;
|
|
+static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pfm_pmu_conf_lock);
|
|
+
|
|
+static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pfm_pmu_acq_lock);
|
|
+static u32 pfm_pmu_acquired;
|
|
+
|
|
+/*
|
|
+ * perfmon core must acces PMU information ONLY through pfm_pmu_conf
|
|
+ * if pfm_pmu_conf is NULL, then no description is registered
|
|
+ */
|
|
+struct pfm_pmu_config *pfm_pmu_conf;
|
|
+EXPORT_SYMBOL(pfm_pmu_conf);
|
|
+
|
|
+static inline int pmu_is_module(struct pfm_pmu_config *c)
|
|
+{
|
|
+ return !(c->flags & PFM_PMUFL_IS_BUILTIN);
|
|
+}
|
|
+/**
|
|
+ * pfm_pmu_regdesc_init -- initialize regdesc structure from PMU table
|
|
+ * @regs: the regdesc structure to initialize
|
|
+ * @excl_type: the register type(s) to exclude from this regdesc
|
|
+ * @unvail_pmcs: unavailable PMC registers
|
|
+ * @unavail_pmds: unavailable PMD registers
|
|
+ *
|
|
+ * Return:
|
|
+ * 0 success
|
|
+ * errno in case of error
|
|
+ */
|
|
+static int pfm_pmu_regdesc_init(struct pfm_regdesc *regs, int excl_type,
|
|
+ u64 *unavail_pmcs, u64 *unavail_pmds)
|
|
+{
|
|
+ struct pfm_regmap_desc *d;
|
|
+ u16 n, n2, n_counters, i;
|
|
+ int first_intr_pmd = -1, max1, max2, max3;
|
|
+
|
|
+ /*
|
|
+ * compute the number of implemented PMC from the
|
|
+ * description table
|
|
+ */
|
|
+ n = 0;
|
|
+ max1 = max2 = -1;
|
|
+ d = pfm_pmu_conf->pmc_desc;
|
|
+ for (i = 0; i < pfm_pmu_conf->num_pmc_entries; i++, d++) {
|
|
+ if (!(d->type & PFM_REG_I))
|
|
+ continue;
|
|
+
|
|
+ if (test_bit(i, cast_ulp(unavail_pmcs)))
|
|
+ continue;
|
|
+
|
|
+ if (d->type & excl_type)
|
|
+ continue;
|
|
+
|
|
+ __set_bit(i, cast_ulp(regs->pmcs));
|
|
+
|
|
+ max1 = i;
|
|
+ n++;
|
|
+ }
|
|
+
|
|
+ if (!n) {
|
|
+ PFM_INFO("%s PMU description has no PMC registers",
|
|
+ pfm_pmu_conf->pmu_name);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ regs->max_pmc = max1 + 1;
|
|
+ regs->num_pmcs = n;
|
|
+
|
|
+ n = n_counters = n2 = 0;
|
|
+ max1 = max2 = max3 = -1;
|
|
+ d = pfm_pmu_conf->pmd_desc;
|
|
+ for (i = 0; i < pfm_pmu_conf->num_pmd_entries; i++, d++) {
|
|
+ if (!(d->type & PFM_REG_I))
|
|
+ continue;
|
|
+
|
|
+ if (test_bit(i, cast_ulp(unavail_pmds)))
|
|
+ continue;
|
|
+
|
|
+ if (d->type & excl_type)
|
|
+ continue;
|
|
+
|
|
+ __set_bit(i, cast_ulp(regs->pmds));
|
|
+ max1 = i;
|
|
+ n++;
|
|
+
|
|
+ /*
|
|
+ * read-write registers
|
|
+ */
|
|
+ if (!(d->type & PFM_REG_RO)) {
|
|
+ __set_bit(i, cast_ulp(regs->rw_pmds));
|
|
+ max3 = i;
|
|
+ n2++;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * counter registers
|
|
+ */
|
|
+ if (d->type & PFM_REG_C64) {
|
|
+ __set_bit(i, cast_ulp(regs->cnt_pmds));
|
|
+ n_counters++;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * PMD with intr capabilities
|
|
+ */
|
|
+ if (d->type & PFM_REG_INTR) {
|
|
+ __set_bit(i, cast_ulp(regs->intr_pmds));
|
|
+ if (first_intr_pmd == -1)
|
|
+ first_intr_pmd = i;
|
|
+ max2 = i;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if (!n) {
|
|
+ PFM_INFO("%s PMU description has no PMD registers",
|
|
+ pfm_pmu_conf->pmu_name);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ regs->max_pmd = max1 + 1;
|
|
+ regs->first_intr_pmd = first_intr_pmd;
|
|
+ regs->max_intr_pmd = max2 + 1;
|
|
+
|
|
+ regs->num_counters = n_counters;
|
|
+ regs->num_pmds = n;
|
|
+ regs->max_rw_pmd = max3 + 1;
|
|
+ regs->num_rw_pmd = n2;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_pmu_regdesc_init_all -- initialize all regdesc structures
|
|
+ * @una_pmcs : unavailable PMC registers
|
|
+ * @una_pmds : unavailable PMD registers
|
|
+ *
|
|
+ * Return:
|
|
+ * 0 sucess
|
|
+ * errno if error
|
|
+ *
|
|
+ * We maintain 3 regdesc:
|
|
+ * regs_all: all available registers
|
|
+ * regs_sys: registers available to system-wide contexts only
|
|
+ * regs_thr: registers available to per-thread contexts only
|
|
+ */
|
|
+static int pfm_pmu_regdesc_init_all(u64 *una_pmcs, u64 *una_pmds)
|
|
+{
|
|
+ int ret;
|
|
+
|
|
+ memset(&pfm_pmu_conf->regs_all, 0, sizeof(struct pfm_regdesc));
|
|
+ memset(&pfm_pmu_conf->regs_thr, 0, sizeof(struct pfm_regdesc));
|
|
+ memset(&pfm_pmu_conf->regs_sys, 0, sizeof(struct pfm_regdesc));
|
|
+
|
|
+ ret = pfm_pmu_regdesc_init(&pfm_pmu_conf->regs_all,
|
|
+ 0,
|
|
+ una_pmcs, una_pmds);
|
|
+ if (ret)
|
|
+ return ret;
|
|
+
|
|
+ PFM_DBG("regs_all.pmcs=0x%llx",
|
|
+ (unsigned long long)pfm_pmu_conf->regs_all.pmcs[0]);
|
|
+
|
|
+ ret = pfm_pmu_regdesc_init(&pfm_pmu_conf->regs_thr,
|
|
+ PFM_REG_SYS,
|
|
+ una_pmcs, una_pmds);
|
|
+ if (ret)
|
|
+ return ret;
|
|
+ PFM_DBG("regs.thr.pmcs=0x%llx",
|
|
+ (unsigned long long)pfm_pmu_conf->regs_thr.pmcs[0]);
|
|
+
|
|
+ ret = pfm_pmu_regdesc_init(&pfm_pmu_conf->regs_sys,
|
|
+ PFM_REG_THR,
|
|
+ una_pmcs, una_pmds);
|
|
+
|
|
+ PFM_DBG("regs_sys.pmcs=0x%llx",
|
|
+ (unsigned long long)pfm_pmu_conf->regs_sys.pmcs[0]);
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+int pfm_pmu_register(struct pfm_pmu_config *cfg)
|
|
+{
|
|
+ u16 i, nspec, nspec_ro, num_pmcs, num_pmds, num_wc = 0;
|
|
+ int type, ret = -EBUSY;
|
|
+
|
|
+ if (perfmon_disabled) {
|
|
+ PFM_INFO("perfmon disabled, cannot add PMU description");
|
|
+ return -ENOSYS;
|
|
+ }
|
|
+
|
|
+ nspec = nspec_ro = num_pmds = num_pmcs = 0;
|
|
+
|
|
+ /* some sanity checks */
|
|
+ if (cfg == NULL || cfg->pmu_name == NULL) {
|
|
+ PFM_INFO("PMU config descriptor is invalid");
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ /* must have a probe */
|
|
+ if (cfg->probe_pmu == NULL) {
|
|
+ PFM_INFO("PMU config has no probe routine");
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * execute probe routine before anything else as it
|
|
+ * may update configuration tables
|
|
+ */
|
|
+ if ((*cfg->probe_pmu)() == -1) {
|
|
+ PFM_INFO("%s PMU detection failed", cfg->pmu_name);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ if (!(cfg->flags & PFM_PMUFL_IS_BUILTIN) && cfg->owner == NULL) {
|
|
+ PFM_INFO("PMU config %s is missing owner", cfg->pmu_name);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ if (!cfg->num_pmd_entries) {
|
|
+ PFM_INFO("%s needs to define num_pmd_entries", cfg->pmu_name);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ if (!cfg->num_pmc_entries) {
|
|
+ PFM_INFO("%s needs to define num_pmc_entries", cfg->pmu_name);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ if (!cfg->counter_width) {
|
|
+ PFM_INFO("PMU config %s, zero width counters", cfg->pmu_name);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * REG_RO, REG_V not supported on PMC registers
|
|
+ */
|
|
+ for (i = 0; i < cfg->num_pmc_entries; i++) {
|
|
+
|
|
+ type = cfg->pmc_desc[i].type;
|
|
+
|
|
+ if (type & PFM_REG_I)
|
|
+ num_pmcs++;
|
|
+
|
|
+ if (type & PFM_REG_WC)
|
|
+ num_wc++;
|
|
+
|
|
+ if (type & PFM_REG_V) {
|
|
+ PFM_INFO("PFM_REG_V is not supported on "
|
|
+ "PMCs (PMC%d)", i);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+ if (type & PFM_REG_RO) {
|
|
+ PFM_INFO("PFM_REG_RO meaningless on "
|
|
+ "PMCs (PMC%u)", i);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if (num_wc && cfg->pmc_write_check == NULL) {
|
|
+ PFM_INFO("some PMCs have write-checker but no callback provided\n");
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * check virtual PMD registers
|
|
+ */
|
|
+ num_wc = 0;
|
|
+ for (i = 0; i < cfg->num_pmd_entries; i++) {
|
|
+
|
|
+ type = cfg->pmd_desc[i].type;
|
|
+
|
|
+ if (type & PFM_REG_I)
|
|
+ num_pmds++;
|
|
+
|
|
+ if (type & PFM_REG_V) {
|
|
+ nspec++;
|
|
+ if (type & PFM_REG_RO)
|
|
+ nspec_ro++;
|
|
+ }
|
|
+
|
|
+ if (type & PFM_REG_WC)
|
|
+ num_wc++;
|
|
+ }
|
|
+
|
|
+ if (num_wc && cfg->pmd_write_check == NULL) {
|
|
+ PFM_INFO("PMD have write-checker but no callback provided\n");
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ if (nspec && cfg->pmd_sread == NULL) {
|
|
+ PFM_INFO("PMU config is missing pmd_sread()");
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ nspec = nspec - nspec_ro;
|
|
+ if (nspec && cfg->pmd_swrite == NULL) {
|
|
+ PFM_INFO("PMU config is missing pmd_swrite()");
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ if (num_pmcs >= PFM_MAX_PMCS) {
|
|
+ PFM_INFO("%s PMCS registers exceed name space [0-%u]",
|
|
+ cfg->pmu_name,
|
|
+ PFM_MAX_PMCS);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+ if (num_pmds >= PFM_MAX_PMDS) {
|
|
+ PFM_INFO("%s PMDS registers exceed name space [0-%u]",
|
|
+ cfg->pmu_name,
|
|
+ PFM_MAX_PMDS);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+ spin_lock(&pfm_pmu_conf_lock);
|
|
+
|
|
+ if (pfm_pmu_conf)
|
|
+ goto unlock;
|
|
+
|
|
+ if (!cfg->version)
|
|
+ cfg->version = "0.0";
|
|
+
|
|
+ pfm_pmu_conf = cfg;
|
|
+ pfm_pmu_conf->ovfl_mask = (1ULL << cfg->counter_width) - 1;
|
|
+
|
|
+ ret = pfm_arch_pmu_config_init(cfg);
|
|
+ if (ret)
|
|
+ goto unlock;
|
|
+
|
|
+ ret = pfm_sysfs_add_pmu(pfm_pmu_conf);
|
|
+ if (ret)
|
|
+ pfm_pmu_conf = NULL;
|
|
+
|
|
+unlock:
|
|
+ spin_unlock(&pfm_pmu_conf_lock);
|
|
+
|
|
+ if (ret) {
|
|
+ PFM_INFO("register %s PMU error %d", cfg->pmu_name, ret);
|
|
+ } else {
|
|
+ PFM_INFO("%s PMU installed", cfg->pmu_name);
|
|
+ /*
|
|
+ * (re)initialize PMU on each PMU now that we have a description
|
|
+ */
|
|
+ on_each_cpu(__pfm_init_percpu, cfg, 0);
|
|
+ }
|
|
+ return ret;
|
|
+}
|
|
+EXPORT_SYMBOL(pfm_pmu_register);
|
|
+
|
|
+/*
|
|
+ * remove PMU description. Caller must pass address of current
|
|
+ * configuration. This is mostly for sanity checking as only
|
|
+ * one config can exist at any time.
|
|
+ *
|
|
+ * We are using the module refcount mechanism to protect against
|
|
+ * removal while the configuration is being used. As long as there is
|
|
+ * one context, a PMU configuration cannot be removed. The protection is
|
|
+ * managed in module logic.
|
|
+ */
|
|
+void pfm_pmu_unregister(struct pfm_pmu_config *cfg)
|
|
+{
|
|
+ if (!(cfg || pfm_pmu_conf))
|
|
+ return;
|
|
+
|
|
+ spin_lock(&pfm_pmu_conf_lock);
|
|
+
|
|
+ BUG_ON(module_refcount(pfm_pmu_conf->owner));
|
|
+
|
|
+ if (cfg->owner == pfm_pmu_conf->owner) {
|
|
+ pfm_sysfs_remove_pmu(pfm_pmu_conf);
|
|
+ pfm_pmu_conf = NULL;
|
|
+ }
|
|
+
|
|
+ spin_unlock(&pfm_pmu_conf_lock);
|
|
+}
|
|
+EXPORT_SYMBOL(pfm_pmu_unregister);
|
|
+
|
|
+static int pfm_pmu_request_module(void)
|
|
+{
|
|
+ char *mod_name;
|
|
+ int ret;
|
|
+
|
|
+ mod_name = pfm_arch_get_pmu_module_name();
|
|
+ if (mod_name == NULL)
|
|
+ return -ENOSYS;
|
|
+
|
|
+ ret = request_module(mod_name);
|
|
+
|
|
+ PFM_DBG("mod=%s ret=%d\n", mod_name, ret);
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * autoload:
|
|
+ * 0 : do not try to autoload the PMU description module
|
|
+ * not 0 : try to autoload the PMU description module
|
|
+ */
|
|
+int pfm_pmu_conf_get(int autoload)
|
|
+{
|
|
+ int ret;
|
|
+
|
|
+ spin_lock(&pfm_pmu_conf_lock);
|
|
+
|
|
+ if (request_mod_in_progress) {
|
|
+ ret = -ENOSYS;
|
|
+ goto skip;
|
|
+ }
|
|
+
|
|
+ if (autoload && pfm_pmu_conf == NULL) {
|
|
+
|
|
+ request_mod_in_progress = 1;
|
|
+
|
|
+ spin_unlock(&pfm_pmu_conf_lock);
|
|
+
|
|
+ pfm_pmu_request_module();
|
|
+
|
|
+ spin_lock(&pfm_pmu_conf_lock);
|
|
+
|
|
+ request_mod_in_progress = 0;
|
|
+
|
|
+ /*
|
|
+ * request_module() may succeed but the module
|
|
+ * may not have registered properly so we need
|
|
+ * to check
|
|
+ */
|
|
+ }
|
|
+
|
|
+ ret = pfm_pmu_conf == NULL ? -ENOSYS : 0;
|
|
+ if (!ret && pmu_is_module(pfm_pmu_conf)
|
|
+ && !try_module_get(pfm_pmu_conf->owner))
|
|
+ ret = -ENOSYS;
|
|
+
|
|
+skip:
|
|
+ spin_unlock(&pfm_pmu_conf_lock);
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+void pfm_pmu_conf_put(void)
|
|
+{
|
|
+ if (pfm_pmu_conf == NULL || !pmu_is_module(pfm_pmu_conf))
|
|
+ return;
|
|
+
|
|
+ spin_lock(&pfm_pmu_conf_lock);
|
|
+ module_put(pfm_pmu_conf->owner);
|
|
+ spin_unlock(&pfm_pmu_conf_lock);
|
|
+}
|
|
+
|
|
+
|
|
+/*
|
|
+ * acquire PMU resource from lower-level PMU register allocator
|
|
+ * (currently perfctr-watchdog.c)
|
|
+ *
|
|
+ * acquisition is done when the first context is created (and not
|
|
+ * when it is loaded). We grab all that is defined in the description
|
|
+ * module and then we make adjustments at the arch-specific level.
|
|
+ *
|
|
+ * The PMU resource is released when the last perfmon context is
|
|
+ * destroyed.
|
|
+ *
|
|
+ * interrupts are not masked
|
|
+ */
|
|
+int pfm_pmu_acquire(struct pfm_context *ctx)
|
|
+{
|
|
+ u64 unavail_pmcs[PFM_PMC_BV];
|
|
+ u64 unavail_pmds[PFM_PMD_BV];
|
|
+ int ret = 0;
|
|
+
|
|
+ spin_lock(&pfm_pmu_acq_lock);
|
|
+
|
|
+ PFM_DBG("pmu_acquired=%u", pfm_pmu_acquired);
|
|
+
|
|
+ pfm_pmu_acquired++;
|
|
+
|
|
+ /*
|
|
+ * we need to initialize regdesc each time we re-acquire
|
|
+ * the PMU for the first time as there may have been changes
|
|
+ * in the list of available registers, e.g., NMI may have
|
|
+ * been disabled. Checking on PMU module insert is not
|
|
+ * enough
|
|
+ */
|
|
+ if (pfm_pmu_acquired == 1) {
|
|
+ memset(unavail_pmcs, 0, sizeof(unavail_pmcs));
|
|
+ memset(unavail_pmds, 0, sizeof(unavail_pmds));
|
|
+
|
|
+ ret = pfm_arch_pmu_acquire(unavail_pmcs, unavail_pmds);
|
|
+ if (ret) {
|
|
+ pfm_pmu_acquired--;
|
|
+ } else {
|
|
+ pfm_pmu_regdesc_init_all(unavail_pmcs, unavail_pmds);
|
|
+
|
|
+ /* available PMU ressources */
|
|
+ PFM_DBG("PMU acquired: %u PMCs, %u PMDs, %u counters",
|
|
+ pfm_pmu_conf->regs_all.num_pmcs,
|
|
+ pfm_pmu_conf->regs_all.num_pmds,
|
|
+ pfm_pmu_conf->regs_all.num_counters);
|
|
+ }
|
|
+ }
|
|
+ spin_unlock(&pfm_pmu_acq_lock);
|
|
+
|
|
+ /*
|
|
+ * copy the regdesc that corresponds to the context
|
|
+ * we copy and not just point because it helps with
|
|
+ * memory locality. the regdesc structure is accessed
|
|
+ * very frequently in performance critical code such
|
|
+ * as context switch and interrupt handling. By using
|
|
+ * a local copy, we increase memory footprint, but
|
|
+ * increase chance to have local memory access,
|
|
+ * especially for system-wide contexts.
|
|
+ */
|
|
+ if (ctx->flags.system)
|
|
+ ctx->regs = pfm_pmu_conf->regs_sys;
|
|
+ else
|
|
+ ctx->regs = pfm_pmu_conf->regs_thr;
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * release the PMU resource
|
|
+ *
|
|
+ * actual release happens when last context is destroyed
|
|
+ *
|
|
+ * interrupts are not masked
|
|
+ */
|
|
+void pfm_pmu_release(void)
|
|
+{
|
|
+ BUG_ON(irqs_disabled());
|
|
+
|
|
+ /*
|
|
+ * we need to use a spinlock because release takes some time
|
|
+ * and we may have a race with pfm_pmu_acquire()
|
|
+ */
|
|
+ spin_lock(&pfm_pmu_acq_lock);
|
|
+
|
|
+ PFM_DBG("pmu_acquired=%d", pfm_pmu_acquired);
|
|
+
|
|
+ /*
|
|
+ * we decouple test and decrement because if we had errors
|
|
+ * in pfm_pmu_acquire(), we still come here on pfm_context_free()
|
|
+ * but with pfm_pmu_acquire=0
|
|
+ */
|
|
+ if (pfm_pmu_acquired > 0 && --pfm_pmu_acquired == 0) {
|
|
+ pfm_arch_pmu_release();
|
|
+ PFM_DBG("PMU released");
|
|
+ }
|
|
+ spin_unlock(&pfm_pmu_acq_lock);
|
|
+}
|
|
diff --git a/perfmon/perfmon_priv.h b/perfmon/perfmon_priv.h
|
|
new file mode 100644
|
|
index 0000000..5b485de
|
|
--- /dev/null
|
|
+++ b/perfmon/perfmon_priv.h
|
|
@@ -0,0 +1,182 @@
|
|
+/*
|
|
+ * Copyright (c) 2001-2006 Hewlett-Packard Development Company, L.P.
|
|
+ * Contributed by Stephane Eranian <eranian@hpl.hp.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of version 2 of the GNU General Public
|
|
+ * License as published by the Free Software Foundation.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ * General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
+ * 02111-1307 USA
|
|
+ */
|
|
+
|
|
+#ifndef __PERFMON_PRIV_H__
|
|
+#define __PERFMON_PRIV_H__
|
|
+/*
|
|
+ * This file contains all the definitions of data structures, variables, macros
|
|
+ * that are to private to the generic code, i.e., not shared with any code that
|
|
+ * lives under arch/ or include/asm-XX
|
|
+ *
|
|
+ * For shared definitions, use include/linux/perfmon_kern.h
|
|
+ */
|
|
+
|
|
+#ifdef CONFIG_PERFMON
|
|
+
|
|
+/*
|
|
+ * type of PMD reset for pfm_reset_pmds() or pfm_switch_sets*()
|
|
+ */
|
|
+#define PFM_PMD_RESET_SHORT 1 /* use short reset value */
|
|
+#define PFM_PMD_RESET_LONG 2 /* use long reset value */
|
|
+
|
|
+/*
|
|
+ * context lazy save/restore activation count
|
|
+ */
|
|
+#define PFM_INVALID_ACTIVATION ((u64)~0)
|
|
+
|
|
+DECLARE_PER_CPU(u64, pmu_activation_number);
|
|
+DECLARE_PER_CPU(struct hrtimer, pfm_hrtimer);
|
|
+
|
|
+static inline void pfm_set_pmu_owner(struct task_struct *task,
|
|
+ struct pfm_context *ctx)
|
|
+{
|
|
+ __get_cpu_var(pmu_owner) = task;
|
|
+ __get_cpu_var(pmu_ctx) = ctx;
|
|
+}
|
|
+
|
|
+static inline int pfm_msgq_is_empty(struct pfm_context *ctx)
|
|
+{
|
|
+ return ctx->msgq_head == ctx->msgq_tail;
|
|
+}
|
|
+
|
|
+void pfm_get_next_msg(struct pfm_context *ctx, union pfarg_msg *m);
|
|
+int pfm_end_notify(struct pfm_context *ctx);
|
|
+int pfm_ovfl_notify(struct pfm_context *ctx, struct pfm_event_set *set,
|
|
+ unsigned long ip);
|
|
+
|
|
+int pfm_alloc_fd(struct file **cfile);
|
|
+
|
|
+int __pfm_delete_evtsets(struct pfm_context *ctx, void *arg, int count);
|
|
+int __pfm_getinfo_evtsets(struct pfm_context *ctx, struct pfarg_setinfo *req,
|
|
+ int count);
|
|
+int __pfm_create_evtsets(struct pfm_context *ctx, struct pfarg_setdesc *req,
|
|
+ int count);
|
|
+
|
|
+
|
|
+int pfm_init_ctx(void);
|
|
+
|
|
+int pfm_pmu_acquire(struct pfm_context *ctx);
|
|
+void pfm_pmu_release(void);
|
|
+
|
|
+int pfm_session_acquire(int is_system, u32 cpu);
|
|
+void pfm_session_release(int is_system, u32 cpu);
|
|
+
|
|
+int pfm_smpl_buf_space_acquire(struct pfm_context *ctx, size_t size);
|
|
+int pfm_smpl_buf_load_context(struct pfm_context *ctx);
|
|
+void pfm_smpl_buf_unload_context(struct pfm_context *ctx);
|
|
+
|
|
+int pfm_init_sysfs(void);
|
|
+
|
|
+#ifdef CONFIG_PERFMON_DEBUG_FS
|
|
+int pfm_init_debugfs(void);
|
|
+int pfm_debugfs_add_cpu(int mycpu);
|
|
+void pfm_debugfs_del_cpu(int mycpu);
|
|
+#else
|
|
+static inline int pfm_init_debugfs(void)
|
|
+{
|
|
+ return 0;
|
|
+}
|
|
+static inline int pfm_debugfs_add_cpu(int mycpu)
|
|
+{
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static inline void pfm_debugfs_del_cpu(int mycpu)
|
|
+{}
|
|
+#endif
|
|
+
|
|
+
|
|
+void pfm_reset_pmds(struct pfm_context *ctx, struct pfm_event_set *set,
|
|
+ int num_pmds,
|
|
+ int reset_mode);
|
|
+
|
|
+struct pfm_event_set *pfm_prepare_sets(struct pfm_context *ctx, u16 load_set);
|
|
+int pfm_init_sets(void);
|
|
+
|
|
+ssize_t pfm_sysfs_res_show(char *buf, size_t sz, int what);
|
|
+
|
|
+void pfm_free_sets(struct pfm_context *ctx);
|
|
+int pfm_create_initial_set(struct pfm_context *ctx);
|
|
+void pfm_switch_sets_from_intr(struct pfm_context *ctx);
|
|
+void pfm_restart_timer(struct pfm_context *ctx, struct pfm_event_set *set);
|
|
+enum hrtimer_restart pfm_handle_switch_timeout(struct hrtimer *t);
|
|
+
|
|
+enum hrtimer_restart pfm_switch_sets(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *new_set,
|
|
+ int reset_mode,
|
|
+ int no_restart);
|
|
+
|
|
+/**
|
|
+ * pfm_save_prev_ctx - check if previous context exists and save state
|
|
+ *
|
|
+ * called from pfm_load_ctx_thread() and __pfm_ctxsin_thread() to
|
|
+ * check if previous context exists. If so saved its PMU state. This is used
|
|
+ * only for UP kernels.
|
|
+ *
|
|
+ * PMU ownership is not cleared because the function is always called while
|
|
+ * trying to install a new owner.
|
|
+ */
|
|
+static inline void pfm_check_save_prev_ctx(void)
|
|
+{
|
|
+#ifdef CONFIG_SMP
|
|
+ struct pfm_event_set *set;
|
|
+ struct pfm_context *ctxp;
|
|
+
|
|
+ ctxp = __get_cpu_var(pmu_ctx);
|
|
+ if (!ctxp)
|
|
+ return;
|
|
+ /*
|
|
+ * in UP per-thread, due to lazy save
|
|
+ * there could be a context from another
|
|
+ * task. We need to push it first before
|
|
+ * installing our new state
|
|
+ */
|
|
+ set = ctxp->active_set;
|
|
+ pfm_save_pmds(ctxp, set);
|
|
+ /*
|
|
+ * do not clear ownership because we rewrite
|
|
+ * right away
|
|
+ */
|
|
+#endif
|
|
+}
|
|
+
|
|
+
|
|
+int pfm_init_fs(void);
|
|
+
|
|
+int pfm_init_hotplug(void);
|
|
+
|
|
+void pfm_mask_monitoring(struct pfm_context *ctx, struct pfm_event_set *set);
|
|
+void pfm_resume_after_ovfl(struct pfm_context *ctx);
|
|
+int pfm_setup_smpl_fmt(struct pfm_context *ctx, u32 ctx_flags, void *fmt_arg,
|
|
+ struct file *filp);
|
|
+
|
|
+static inline void pfm_post_work(struct task_struct *task,
|
|
+ struct pfm_context *ctx, int type)
|
|
+{
|
|
+ ctx->flags.work_type = type;
|
|
+ set_tsk_thread_flag(task, TIF_PERFMON_WORK);
|
|
+ pfm_arch_arm_handle_work(task);
|
|
+}
|
|
+
|
|
+#define PFM_PMC_STK_ARG PFM_ARCH_PMC_STK_ARG
|
|
+#define PFM_PMD_STK_ARG PFM_ARCH_PMD_STK_ARG
|
|
+
|
|
+#endif /* CONFIG_PERFMON */
|
|
+
|
|
+#endif /* __PERFMON_PRIV_H__ */
|
|
diff --git a/perfmon/perfmon_res.c b/perfmon/perfmon_res.c
|
|
new file mode 100644
|
|
index 0000000..7b0382b
|
|
--- /dev/null
|
|
+++ b/perfmon/perfmon_res.c
|
|
@@ -0,0 +1,450 @@
|
|
+/*
|
|
+ * perfmon_res.c: perfmon2 resource allocations
|
|
+ *
|
|
+ * This file implements the perfmon2 interface which
|
|
+ * provides access to the hardware performance counters
|
|
+ * of the host processor.
|
|
+ *
|
|
+ * The initial version of perfmon.c was written by
|
|
+ * Ganesh Venkitachalam, IBM Corp.
|
|
+ *
|
|
+ * Then it was modified for perfmon-1.x by Stephane Eranian and
|
|
+ * David Mosberger, Hewlett Packard Co.
|
|
+ *
|
|
+ * Version Perfmon-2.x is a complete rewrite of perfmon-1.x
|
|
+ * by Stephane Eranian, Hewlett Packard Co.
|
|
+ *
|
|
+ * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P.
|
|
+ * Contributed by Stephane Eranian <eranian@hpl.hp.com>
|
|
+ * David Mosberger-Tang <davidm@hpl.hp.com>
|
|
+ *
|
|
+ * More information about perfmon available at:
|
|
+ * http://perfmon2.sf.net
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of version 2 of the GNU General Public
|
|
+ * License as published by the Free Software Foundation.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ * General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
+ * 02111-1307 USA
|
|
+ */
|
|
+#include <linux/kernel.h>
|
|
+#include <linux/module.h>
|
|
+#include <linux/perfmon_kern.h>
|
|
+#include "perfmon_priv.h"
|
|
+
|
|
+/*
|
|
+ * global information about all sessions
|
|
+ * mostly used to synchronize between system wide and per-process
|
|
+ */
|
|
+struct pfm_resources {
|
|
+ size_t smpl_buf_mem_cur;/* current smpl buf mem usage */
|
|
+ cpumask_t sys_cpumask; /* bitmask of used cpus */
|
|
+ u32 thread_sessions; /* #num loaded per-thread sessions */
|
|
+};
|
|
+
|
|
+static struct pfm_resources pfm_res;
|
|
+
|
|
+static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pfm_res_lock);
|
|
+
|
|
+/**
|
|
+ * pfm_smpl_buf_space_acquire - check memory resource usage for sampling buffer
|
|
+ * @ctx: context of interest
|
|
+ * @size: size fo requested buffer
|
|
+ *
|
|
+ * sampling buffer allocated by perfmon must be
|
|
+ * checked against max locked memory usage thresholds
|
|
+ * for security reasons.
|
|
+ *
|
|
+ * The first level check is against the system wide limit
|
|
+ * as indicated by the system administrator in /sys/kernel/perfmon
|
|
+ *
|
|
+ * The second level check is on a per-process basis using
|
|
+ * RLIMIT_MEMLOCK limit.
|
|
+ *
|
|
+ * Operating on the current task only.
|
|
+ */
|
|
+int pfm_smpl_buf_space_acquire(struct pfm_context *ctx, size_t size)
|
|
+{
|
|
+ struct mm_struct *mm;
|
|
+ unsigned long locked;
|
|
+ unsigned long buf_mem, buf_mem_max;
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&pfm_res_lock, flags);
|
|
+
|
|
+ /*
|
|
+ * check against global buffer limit
|
|
+ */
|
|
+ buf_mem_max = pfm_controls.smpl_buffer_mem_max;
|
|
+ buf_mem = pfm_res.smpl_buf_mem_cur + size;
|
|
+
|
|
+ if (buf_mem <= buf_mem_max) {
|
|
+ pfm_res.smpl_buf_mem_cur = buf_mem;
|
|
+
|
|
+ PFM_DBG("buf_mem_max=%lu current_buf_mem=%lu",
|
|
+ buf_mem_max,
|
|
+ buf_mem);
|
|
+ }
|
|
+
|
|
+ spin_unlock_irqrestore(&pfm_res_lock, flags);
|
|
+
|
|
+ if (buf_mem > buf_mem_max) {
|
|
+ PFM_DBG("smpl buffer memory threshold reached");
|
|
+ return -ENOMEM;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * check against per-process RLIMIT_MEMLOCK
|
|
+ */
|
|
+ mm = get_task_mm(current);
|
|
+
|
|
+ down_write(&mm->mmap_sem);
|
|
+
|
|
+ locked = mm->locked_vm << PAGE_SHIFT;
|
|
+ locked += size;
|
|
+
|
|
+ if (locked > current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur) {
|
|
+
|
|
+ PFM_DBG("RLIMIT_MEMLOCK reached ask_locked=%lu rlim_cur=%lu",
|
|
+ locked,
|
|
+ current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur);
|
|
+
|
|
+ up_write(&mm->mmap_sem);
|
|
+ mmput(mm);
|
|
+ goto unres;
|
|
+ }
|
|
+
|
|
+ mm->locked_vm = locked >> PAGE_SHIFT;
|
|
+
|
|
+ up_write(&mm->mmap_sem);
|
|
+
|
|
+ mmput(mm);
|
|
+
|
|
+ return 0;
|
|
+
|
|
+unres:
|
|
+ /*
|
|
+ * remove global buffer memory allocation
|
|
+ */
|
|
+ spin_lock_irqsave(&pfm_res_lock, flags);
|
|
+
|
|
+ pfm_res.smpl_buf_mem_cur -= size;
|
|
+
|
|
+ spin_unlock_irqrestore(&pfm_res_lock, flags);
|
|
+
|
|
+ return -ENOMEM;
|
|
+}
|
|
+/**
|
|
+ * pfm_smpl_buf_space_release - release resource usage for sampling buffer
|
|
+ * @ctx: perfmon context of interest
|
|
+ *
|
|
+ * There exist multiple paths leading to this function. We need to
|
|
+ * be very careful withlokcing on the mmap_sem as it may already be
|
|
+ * held by the time we come here.
|
|
+ * The following paths exist:
|
|
+ *
|
|
+ * exit path:
|
|
+ * sys_exit_group
|
|
+ * do_group_exit
|
|
+ * do_exit
|
|
+ * exit_mm
|
|
+ * mmput
|
|
+ * exit_mmap
|
|
+ * remove_vma
|
|
+ * fput
|
|
+ * __fput
|
|
+ * pfm_close
|
|
+ * __pfm_close
|
|
+ * pfm_context_free
|
|
+ * pfm_release_buf_space
|
|
+ * munmap path:
|
|
+ * sys_munmap
|
|
+ * do_munmap
|
|
+ * remove_vma
|
|
+ * fput
|
|
+ * __fput
|
|
+ * pfm_close
|
|
+ * __pfm_close
|
|
+ * pfm_context_free
|
|
+ * pfm_release_buf_space
|
|
+ *
|
|
+ * close path:
|
|
+ * sys_close
|
|
+ * filp_close
|
|
+ * fput
|
|
+ * __fput
|
|
+ * pfm_close
|
|
+ * __pfm_close
|
|
+ * pfm_context_free
|
|
+ * pfm_release_buf_space
|
|
+ *
|
|
+ * The issue is that on the munmap() path, the mmap_sem is already held
|
|
+ * in write-mode by the time we come here. To avoid the deadlock, we need
|
|
+ * to know where we are coming from and skip down_write(). If is fairly
|
|
+ * difficult to know this because of the lack of good hooks and
|
|
+ * the fact that, there may not have been any mmap() of the sampling buffer
|
|
+ * (i.e. create_context() followed by close() or exit()).
|
|
+ *
|
|
+ * We use a set flag ctx->flags.mmap_nlock which is toggled in the vm_ops
|
|
+ * callback in remove_vma() which is called systematically for the call, so
|
|
+ * on all but the pure close() path. The exit path does not already hold
|
|
+ * the lock but this is exit so there is no task->mm by the time we come here.
|
|
+ *
|
|
+ * The mmap_nlock is set only when unmapping and this is the LAST reference
|
|
+ * to the file (i.e., close() followed by munmap()).
|
|
+ */
|
|
+void pfm_smpl_buf_space_release(struct pfm_context *ctx, size_t size)
|
|
+{
|
|
+ unsigned long flags;
|
|
+ struct mm_struct *mm;
|
|
+
|
|
+ mm = get_task_mm(current);
|
|
+ if (mm) {
|
|
+ if (ctx->flags.mmap_nlock == 0) {
|
|
+ PFM_DBG("doing down_write");
|
|
+ down_write(&mm->mmap_sem);
|
|
+ }
|
|
+
|
|
+ mm->locked_vm -= size >> PAGE_SHIFT;
|
|
+
|
|
+ PFM_DBG("size=%zu locked_vm=%lu", size, mm->locked_vm);
|
|
+
|
|
+ if (ctx->flags.mmap_nlock == 0)
|
|
+ up_write(&mm->mmap_sem);
|
|
+
|
|
+ mmput(mm);
|
|
+ }
|
|
+
|
|
+ spin_lock_irqsave(&pfm_res_lock, flags);
|
|
+
|
|
+ pfm_res.smpl_buf_mem_cur -= size;
|
|
+
|
|
+ spin_unlock_irqrestore(&pfm_res_lock, flags);
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_session_acquire - reserve a per-thread or per-cpu session
|
|
+ * @is_system: true if per-cpu session
|
|
+ * @cpu: cpu number for per-cpu session
|
|
+ *
|
|
+ * return:
|
|
+ * 0 : success
|
|
+ * -EBUSY: if conflicting session exist
|
|
+ */
|
|
+int pfm_session_acquire(int is_system, u32 cpu)
|
|
+{
|
|
+ unsigned long flags;
|
|
+ u32 nsys_cpus;
|
|
+ int ret = 0;
|
|
+
|
|
+ /*
|
|
+ * validy checks on cpu_mask have been done upstream
|
|
+ */
|
|
+ spin_lock_irqsave(&pfm_res_lock, flags);
|
|
+
|
|
+ nsys_cpus = cpus_weight(pfm_res.sys_cpumask);
|
|
+
|
|
+ PFM_DBG("in sys=%u task=%u is_sys=%d cpu=%u",
|
|
+ nsys_cpus,
|
|
+ pfm_res.thread_sessions,
|
|
+ is_system,
|
|
+ cpu);
|
|
+
|
|
+ if (is_system) {
|
|
+ /*
|
|
+ * cannot mix system wide and per-task sessions
|
|
+ */
|
|
+ if (pfm_res.thread_sessions > 0) {
|
|
+ PFM_DBG("%u conflicting thread_sessions",
|
|
+ pfm_res.thread_sessions);
|
|
+ ret = -EBUSY;
|
|
+ goto abort;
|
|
+ }
|
|
+
|
|
+ if (cpu_isset(cpu, pfm_res.sys_cpumask)) {
|
|
+ PFM_DBG("conflicting session on CPU%u", cpu);
|
|
+ ret = -EBUSY;
|
|
+ goto abort;
|
|
+ }
|
|
+
|
|
+ PFM_DBG("reserved session on CPU%u", cpu);
|
|
+
|
|
+ cpu_set(cpu, pfm_res.sys_cpumask);
|
|
+ nsys_cpus++;
|
|
+ } else {
|
|
+ if (nsys_cpus) {
|
|
+ ret = -EBUSY;
|
|
+ goto abort;
|
|
+ }
|
|
+ pfm_res.thread_sessions++;
|
|
+ }
|
|
+
|
|
+ PFM_DBG("out sys=%u task=%u is_sys=%d cpu=%u",
|
|
+ nsys_cpus,
|
|
+ pfm_res.thread_sessions,
|
|
+ is_system,
|
|
+ cpu);
|
|
+
|
|
+abort:
|
|
+ spin_unlock_irqrestore(&pfm_res_lock, flags);
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_session_release - release a per-cpu or per-thread session
|
|
+ * @is_system: true if per-cpu session
|
|
+ * @cpu: cpu number for per-cpu session
|
|
+ *
|
|
+ * called from __pfm_unload_context()
|
|
+ */
|
|
+void pfm_session_release(int is_system, u32 cpu)
|
|
+{
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&pfm_res_lock, flags);
|
|
+
|
|
+ PFM_DBG("in sys_sessions=%u thread_sessions=%u syswide=%d cpu=%u",
|
|
+ cpus_weight(pfm_res.sys_cpumask),
|
|
+ pfm_res.thread_sessions,
|
|
+ is_system, cpu);
|
|
+
|
|
+ if (is_system)
|
|
+ cpu_clear(cpu, pfm_res.sys_cpumask);
|
|
+ else
|
|
+ pfm_res.thread_sessions--;
|
|
+
|
|
+ PFM_DBG("out sys_sessions=%u thread_sessions=%u syswide=%d cpu=%u",
|
|
+ cpus_weight(pfm_res.sys_cpumask),
|
|
+ pfm_res.thread_sessions,
|
|
+ is_system, cpu);
|
|
+
|
|
+ spin_unlock_irqrestore(&pfm_res_lock, flags);
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_session_allcpus_acquire - acquire per-cpu sessions on all available cpus
|
|
+ *
|
|
+ * currently used by Oprofile on X86
|
|
+ */
|
|
+int pfm_session_allcpus_acquire(void)
|
|
+{
|
|
+ unsigned long flags;
|
|
+ u32 nsys_cpus, cpu;
|
|
+ int ret = -EBUSY;
|
|
+
|
|
+ spin_lock_irqsave(&pfm_res_lock, flags);
|
|
+
|
|
+ nsys_cpus = cpus_weight(pfm_res.sys_cpumask);
|
|
+
|
|
+ PFM_DBG("in sys=%u task=%u",
|
|
+ nsys_cpus,
|
|
+ pfm_res.thread_sessions);
|
|
+
|
|
+ if (nsys_cpus) {
|
|
+ PFM_DBG("already some system-wide sessions");
|
|
+ goto abort;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * cannot mix system wide and per-task sessions
|
|
+ */
|
|
+ if (pfm_res.thread_sessions) {
|
|
+ PFM_DBG("%u conflicting thread_sessions",
|
|
+ pfm_res.thread_sessions);
|
|
+ goto abort;
|
|
+ }
|
|
+
|
|
+ for_each_online_cpu(cpu) {
|
|
+ cpu_set(cpu, pfm_res.sys_cpumask);
|
|
+ nsys_cpus++;
|
|
+ }
|
|
+
|
|
+ PFM_DBG("out sys=%u task=%u",
|
|
+ nsys_cpus,
|
|
+ pfm_res.thread_sessions);
|
|
+
|
|
+ ret = 0;
|
|
+abort:
|
|
+ spin_unlock_irqrestore(&pfm_res_lock, flags);
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+EXPORT_SYMBOL(pfm_session_allcpus_acquire);
|
|
+
|
|
+/**
|
|
+ * pfm_session_allcpus_release - relase per-cpu sessions on all cpus
|
|
+ *
|
|
+ * currently used by Oprofile code
|
|
+ */
|
|
+void pfm_session_allcpus_release(void)
|
|
+{
|
|
+ unsigned long flags;
|
|
+ u32 nsys_cpus, cpu;
|
|
+
|
|
+ spin_lock_irqsave(&pfm_res_lock, flags);
|
|
+
|
|
+ nsys_cpus = cpus_weight(pfm_res.sys_cpumask);
|
|
+
|
|
+ PFM_DBG("in sys=%u task=%u",
|
|
+ nsys_cpus,
|
|
+ pfm_res.thread_sessions);
|
|
+
|
|
+ /*
|
|
+ * XXX: could use __cpus_clear() with nbits
|
|
+ */
|
|
+ for_each_online_cpu(cpu) {
|
|
+ cpu_clear(cpu, pfm_res.sys_cpumask);
|
|
+ nsys_cpus--;
|
|
+ }
|
|
+
|
|
+ PFM_DBG("out sys=%u task=%u",
|
|
+ nsys_cpus,
|
|
+ pfm_res.thread_sessions);
|
|
+
|
|
+ spin_unlock_irqrestore(&pfm_res_lock, flags);
|
|
+}
|
|
+EXPORT_SYMBOL(pfm_session_allcpus_release);
|
|
+
|
|
+/**
|
|
+ * pfm_sysfs_res_show - return currnt resourcde usage for sysfs
|
|
+ * @buf: buffer to hold string in return
|
|
+ * @sz: size of buf
|
|
+ * @what: what to produce
|
|
+ * what=0 : thread_sessions
|
|
+ * what=1 : cpus_weight(sys_cpumask)
|
|
+ * what=2 : smpl_buf_mem_cur
|
|
+ * what=3 : pmu model name
|
|
+ *
|
|
+ * called from perfmon_sysfs.c
|
|
+ * return number of bytes written into buf (up to sz)
|
|
+ */
|
|
+ssize_t pfm_sysfs_res_show(char *buf, size_t sz, int what)
|
|
+{
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&pfm_res_lock, flags);
|
|
+
|
|
+ switch (what) {
|
|
+ case 0: snprintf(buf, sz, "%u\n", pfm_res.thread_sessions);
|
|
+ break;
|
|
+ case 1: snprintf(buf, sz, "%d\n", cpus_weight(pfm_res.sys_cpumask));
|
|
+ break;
|
|
+ case 2: snprintf(buf, sz, "%zu\n", pfm_res.smpl_buf_mem_cur);
|
|
+ break;
|
|
+ case 3:
|
|
+ snprintf(buf, sz, "%s\n",
|
|
+ pfm_pmu_conf ? pfm_pmu_conf->pmu_name
|
|
+ : "unknown\n");
|
|
+ }
|
|
+ spin_unlock_irqrestore(&pfm_res_lock, flags);
|
|
+ return strlen(buf);
|
|
+}
|
|
diff --git a/perfmon/perfmon_rw.c b/perfmon/perfmon_rw.c
|
|
new file mode 100644
|
|
index 0000000..3168eb7
|
|
--- /dev/null
|
|
+++ b/perfmon/perfmon_rw.c
|
|
@@ -0,0 +1,733 @@
|
|
+/*
|
|
+ * perfmon.c: perfmon2 PMC/PMD read/write system calls
|
|
+ *
|
|
+ * This file implements the perfmon2 interface which
|
|
+ * provides access to the hardware performance counters
|
|
+ * of the host processor.
|
|
+ *
|
|
+ * The initial version of perfmon.c was written by
|
|
+ * Ganesh Venkitachalam, IBM Corp.
|
|
+ *
|
|
+ * Then it was modified for perfmon-1.x by Stephane Eranian and
|
|
+ * David Mosberger, Hewlett Packard Co.
|
|
+ *
|
|
+ * Version Perfmon-2.x is a complete rewrite of perfmon-1.x
|
|
+ * by Stephane Eranian, Hewlett Packard Co.
|
|
+ *
|
|
+ * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P.
|
|
+ * Contributed by Stephane Eranian <eranian@hpl.hp.com>
|
|
+ * David Mosberger-Tang <davidm@hpl.hp.com>
|
|
+ *
|
|
+ * More information about perfmon available at:
|
|
+ * http://perfmon2.sf.net/
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of version 2 of the GNU General Public
|
|
+ * License as published by the Free Software Foundation.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ * General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
+ * 02111-1307 USA
|
|
+ */
|
|
+#include <linux/module.h>
|
|
+#include <linux/kernel.h>
|
|
+#include <linux/perfmon_kern.h>
|
|
+#include "perfmon_priv.h"
|
|
+
|
|
+#define PFM_REGFL_PMC_ALL (PFM_REGFL_NO_EMUL64)
|
|
+#define PFM_REGFL_PMD_ALL (PFM_REGFL_RANDOM|PFM_REGFL_OVFL_NOTIFY)
|
|
+
|
|
+/**
|
|
+ * update_used_reg -- updated used_pmcs for a single PMD
|
|
+ * @set: set to update
|
|
+ * @cnum: new PMD to add
|
|
+ *
|
|
+ * This function adds the pmds and pmcs depending on PMD cnum
|
|
+ */
|
|
+static inline void update_used_reg(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set, u16 cnum)
|
|
+{
|
|
+ bitmap_or(cast_ulp(set->used_pmcs),
|
|
+ cast_ulp(set->used_pmcs),
|
|
+ cast_ulp(pfm_pmu_conf->pmd_desc[cnum].dep_pmcs),
|
|
+ ctx->regs.max_pmc);
|
|
+}
|
|
+
|
|
+/**
|
|
+ * update_used -- update used_pmcs bitmask
|
|
+ * @set: event set to update
|
|
+ * @bv: bitmask to inspect for new PMD registers
|
|
+ *
|
|
+ * This function updates the used_pmcs bitmask for
|
|
+ * the set using bv, a bitmask of pmds. For each pmd in bv,
|
|
+ * its depending pmcs are added to used_pmcs.
|
|
+ */
|
|
+static void update_used_pmcs(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set, unsigned long *bv)
|
|
+{
|
|
+ u16 max_pmd;
|
|
+ int n, p, q;
|
|
+
|
|
+ max_pmd = ctx->regs.max_pmd;
|
|
+
|
|
+ n = bitmap_weight(bv, max_pmd);
|
|
+ for(p = 0; n; n--, p = q+1) {
|
|
+ q = find_next_bit(bv, max_pmd, p);
|
|
+ update_used_reg(ctx, set, q);
|
|
+ }
|
|
+}
|
|
+
|
|
+/**
|
|
+ * update_changes -- update nused_pmcs, nused_pmds, write newly touched pmcs
|
|
+ * @ctx: context to use
|
|
+ * @set: event set to use
|
|
+ * @old_used_pmcs: former used_pmc bitmask
|
|
+ * @can_access: non-zero if PMU is accessible, i.e., can be written to
|
|
+ *
|
|
+ * This function updates nused_pmcs and nused_pmds after the last modificiation
|
|
+ * to an event set. When new pmcs are used, then they must be initialized such
|
|
+ * that we do not pick up stale values from another session.
|
|
+ */
|
|
+static inline int update_changes(struct pfm_context *ctx, struct pfm_event_set *set,
|
|
+ unsigned long *old_used_pmcs)
|
|
+{
|
|
+ struct pfarg_pmc req;
|
|
+ u16 max_pmc, max_pmd;
|
|
+ int n, p, q, ret = 0;
|
|
+
|
|
+ max_pmd = ctx->regs.max_pmd;
|
|
+ max_pmc = ctx->regs.max_pmc;
|
|
+
|
|
+ /*
|
|
+ * update used counts
|
|
+ */
|
|
+ set->nused_pmds = bitmap_weight(cast_ulp(set->used_pmds), max_pmd);
|
|
+ set->nused_pmcs = bitmap_weight(cast_ulp(set->used_pmcs), max_pmc);
|
|
+
|
|
+ PFM_DBG("set%u u_pmds=0x%llx nu_pmds=%u u_pmcs=0x%llx nu_pmcs=%u",
|
|
+ set->id,
|
|
+ (unsigned long long)set->used_pmds[0],
|
|
+ set->nused_pmds,
|
|
+ (unsigned long long)set->used_pmcs[0],
|
|
+ set->nused_pmcs);
|
|
+
|
|
+ memset(&req, 0, sizeof(req));
|
|
+
|
|
+ n = bitmap_weight(cast_ulp(set->used_pmcs), max_pmc);
|
|
+ for(p = 0; n; n--, p = q+1) {
|
|
+ q = find_next_bit(cast_ulp(set->used_pmcs), max_pmc, p);
|
|
+
|
|
+ if (test_bit(q, cast_ulp(old_used_pmcs)))
|
|
+ continue;
|
|
+
|
|
+ req.reg_num = q;
|
|
+ req.reg_value = set->pmcs[q];
|
|
+
|
|
+ ret = __pfm_write_pmcs(ctx, &req, 1);
|
|
+ if (ret)
|
|
+ break;
|
|
+ }
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * handle_smpl_bv - checks sampling bitmasks for new PMDs
|
|
+ * @ctx: context to use
|
|
+ * @set: set to use
|
|
+ * @bv: sampling bitmask
|
|
+ *
|
|
+ * scans the smpl bitmask looking for new PMDs (not yet used), if found
|
|
+ * invoke pfm_write_pmds() on them to get them initialized and marked used
|
|
+ */
|
|
+static int handle_smpl_bv(struct pfm_context *ctx, struct pfm_event_set *set,
|
|
+ unsigned long *bv)
|
|
+{
|
|
+ struct pfarg_pmd req;
|
|
+ int p, q, n, ret = 0;
|
|
+ u16 max_pmd;
|
|
+
|
|
+ memset(&req, 0, sizeof(req));
|
|
+
|
|
+ max_pmd = ctx->regs.max_pmd;
|
|
+
|
|
+ n = bitmap_weight(cast_ulp(bv), max_pmd);
|
|
+
|
|
+ for(p = 0; n; n--, p = q+1) {
|
|
+ q = find_next_bit(cast_ulp(bv), max_pmd, p);
|
|
+
|
|
+ if (test_bit(q, cast_ulp(set->used_pmds)))
|
|
+ continue;
|
|
+
|
|
+ req.reg_num = q;
|
|
+ req.reg_value = 0;
|
|
+
|
|
+ ret = __pfm_write_pmds(ctx, &req, 1, 0);
|
|
+ if (ret)
|
|
+ break;
|
|
+ }
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * is_invalid -- check if register index is within limits
|
|
+ * @cnum: register index
|
|
+ * @impl: bitmask of implemented registers
|
|
+ * @max: highest implemented registers + 1
|
|
+ *
|
|
+ * return:
|
|
+ * 0 is register index is valid
|
|
+ * 1 if invalid
|
|
+ */
|
|
+static inline int is_invalid(u16 cnum, unsigned long *impl, u16 max)
|
|
+{
|
|
+ return cnum >= max || !test_bit(cnum, impl);
|
|
+}
|
|
+
|
|
+/**
|
|
+ * __pfm_write_pmds - modified data registers
|
|
+ * @ctx: context to operate on
|
|
+ * @req: pfarg_pmd_t request from user
|
|
+ * @count: number of element in the pfarg_pmd_t vector
|
|
+ * @compat: used only on IA-64 to maintain backward compatibility with v2.0
|
|
+ *
|
|
+ * The function succeeds whether the context is attached or not.
|
|
+ * When attached to another thread, that thread must be stopped.
|
|
+ *
|
|
+ * The context is locked and interrupts are disabled.
|
|
+ */
|
|
+int __pfm_write_pmds(struct pfm_context *ctx, struct pfarg_pmd *req, int count,
|
|
+ int compat)
|
|
+{
|
|
+ struct pfm_event_set *set, *active_set;
|
|
+ u64 old_used_pmcs[PFM_PMC_BV];
|
|
+ unsigned long *smpl_pmds, *reset_pmds, *impl_pmds, *impl_rw_pmds;
|
|
+ u32 req_flags, flags;
|
|
+ u16 cnum, pmd_type, max_pmd;
|
|
+ u16 set_id;
|
|
+ int i, can_access_pmu;
|
|
+ int ret;
|
|
+ pfm_pmd_check_t wr_func;
|
|
+
|
|
+ active_set = ctx->active_set;
|
|
+ max_pmd = ctx->regs.max_pmd;
|
|
+ impl_pmds = cast_ulp(ctx->regs.pmds);
|
|
+ impl_rw_pmds = cast_ulp(ctx->regs.rw_pmds);
|
|
+ wr_func = pfm_pmu_conf->pmd_write_check;
|
|
+ set = list_first_entry(&ctx->set_list, struct pfm_event_set, list);
|
|
+
|
|
+ can_access_pmu = 0;
|
|
+
|
|
+ /*
|
|
+ * we cannot access the actual PMD registers when monitoring is masked
|
|
+ */
|
|
+ if (unlikely(ctx->state == PFM_CTX_LOADED))
|
|
+ can_access_pmu = __get_cpu_var(pmu_owner) == ctx->task
|
|
+ || ctx->flags.system;
|
|
+
|
|
+ bitmap_copy(cast_ulp(old_used_pmcs),
|
|
+ cast_ulp(set->used_pmcs),
|
|
+ ctx->regs.max_pmc);
|
|
+
|
|
+ ret = -EINVAL;
|
|
+ for (i = 0; i < count; i++, req++) {
|
|
+
|
|
+ cnum = req->reg_num;
|
|
+ set_id = req->reg_set;
|
|
+ req_flags = req->reg_flags;
|
|
+ smpl_pmds = cast_ulp(req->reg_smpl_pmds);
|
|
+ reset_pmds = cast_ulp(req->reg_reset_pmds);
|
|
+ flags = 0;
|
|
+
|
|
+ /*
|
|
+ * cannot write to unexisting
|
|
+ * writes to read-only register are ignored
|
|
+ */
|
|
+ if (unlikely(is_invalid(cnum, impl_pmds, max_pmd))) {
|
|
+ PFM_DBG("pmd%u is not available", cnum);
|
|
+ goto error;
|
|
+ }
|
|
+
|
|
+ pmd_type = pfm_pmu_conf->pmd_desc[cnum].type;
|
|
+
|
|
+ /*
|
|
+ * ensure only valid flags are set
|
|
+ */
|
|
+ if (req_flags & ~(PFM_REGFL_PMD_ALL)) {
|
|
+ PFM_DBG("pmd%u: invalid flags=0x%x",
|
|
+ cnum, req_flags);
|
|
+ goto error;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * OVFL_NOTIFY is valid for all types of PMD.
|
|
+ * non counting PMD may trigger PMU interrupt
|
|
+ * and thus may trigger recording of a sample.
|
|
+ * This is true with IBS on AMD family 16.
|
|
+ */
|
|
+ if (req_flags & PFM_REGFL_OVFL_NOTIFY)
|
|
+ flags |= PFM_REGFL_OVFL_NOTIFY;
|
|
+
|
|
+ /*
|
|
+ * We allow randomization to non counting PMD
|
|
+ */
|
|
+ if (req_flags & PFM_REGFL_RANDOM)
|
|
+ flags |= PFM_REGFL_RANDOM;
|
|
+
|
|
+ /*
|
|
+ * verify validity of smpl_pmds
|
|
+ */
|
|
+ if (unlikely(!bitmap_subset(smpl_pmds, impl_pmds, PFM_MAX_PMDS))) {
|
|
+ PFM_DBG("invalid smpl_pmds=0x%llx for pmd%u",
|
|
+ (unsigned long long)req->reg_smpl_pmds[0],
|
|
+ cnum);
|
|
+ goto error;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * verify validity of reset_pmds
|
|
+ * check against impl_rw_pmds because it is not
|
|
+ * possible to reset read-only PMDs
|
|
+ */
|
|
+ if (unlikely(!bitmap_subset(reset_pmds, impl_rw_pmds, PFM_MAX_PMDS))) {
|
|
+ PFM_DBG("invalid reset_pmds=0x%llx for pmd%u",
|
|
+ (unsigned long long)req->reg_reset_pmds[0],
|
|
+ cnum);
|
|
+ goto error;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * locate event set
|
|
+ */
|
|
+ if (set_id != set->id) {
|
|
+ /* update number of used register for previous set */
|
|
+ if (i) {
|
|
+ ret = update_changes(ctx, set, cast_ulp(old_used_pmcs));
|
|
+ if (ret)
|
|
+ goto error;
|
|
+ }
|
|
+
|
|
+ set = pfm_find_set(ctx, set_id, 0);
|
|
+ if (set == NULL) {
|
|
+ PFM_DBG("event set%u does not exist",
|
|
+ set_id);
|
|
+ goto error;
|
|
+ }
|
|
+ bitmap_copy(cast_ulp(old_used_pmcs),
|
|
+ cast_ulp(set->used_pmcs),
|
|
+ ctx->regs.max_pmc);
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * execute write checker, if any
|
|
+ */
|
|
+ if (unlikely(wr_func && (pmd_type & PFM_REG_WC))) {
|
|
+ ret = (*wr_func)(ctx, set, req);
|
|
+ if (ret)
|
|
+ goto error;
|
|
+
|
|
+ }
|
|
+
|
|
+
|
|
+ /*
|
|
+ * now commit changes to software state
|
|
+ */
|
|
+
|
|
+ if (unlikely(compat))
|
|
+ goto skip_set;
|
|
+
|
|
+ if (bitmap_weight(smpl_pmds, max_pmd)) {
|
|
+ ret = handle_smpl_bv(ctx, set, smpl_pmds);
|
|
+ if (ret)
|
|
+ goto error;
|
|
+ update_used_pmcs(ctx, set, cast_ulp(smpl_pmds));
|
|
+ }
|
|
+
|
|
+ bitmap_copy(cast_ulp(set->pmds[cnum].smpl_pmds),
|
|
+ smpl_pmds,
|
|
+ max_pmd);
|
|
+
|
|
+
|
|
+ if (bitmap_weight(reset_pmds, max_pmd)) {
|
|
+ ret = handle_smpl_bv(ctx, set, reset_pmds);
|
|
+ if (ret)
|
|
+ goto error;
|
|
+ update_used_pmcs(ctx, set, cast_ulp(reset_pmds));
|
|
+ }
|
|
+
|
|
+ bitmap_copy(cast_ulp(set->pmds[cnum].reset_pmds),
|
|
+ reset_pmds,
|
|
+ max_pmd);
|
|
+
|
|
+ set->pmds[cnum].flags = flags;
|
|
+
|
|
+ __set_bit(cnum, cast_ulp(set->used_pmds));
|
|
+ update_used_reg(ctx, set, cnum);
|
|
+
|
|
+ /*
|
|
+ * we reprogram the PMD hence, we clear any pending
|
|
+ * ovfl. Does affect ovfl switch on restart but new
|
|
+ * value has already been established here
|
|
+ */
|
|
+ if (test_bit(cnum, cast_ulp(set->povfl_pmds))) {
|
|
+ set->npend_ovfls--;
|
|
+ __clear_bit(cnum, cast_ulp(set->povfl_pmds));
|
|
+ }
|
|
+ __clear_bit(cnum, cast_ulp(set->ovfl_pmds));
|
|
+
|
|
+ /*
|
|
+ * update ovfl_notify
|
|
+ */
|
|
+ if (flags & PFM_REGFL_OVFL_NOTIFY)
|
|
+ __set_bit(cnum, cast_ulp(set->ovfl_notify));
|
|
+ else
|
|
+ __clear_bit(cnum, cast_ulp(set->ovfl_notify));
|
|
+
|
|
+ /*
|
|
+ * establish new switch count
|
|
+ */
|
|
+ set->pmds[cnum].ovflsw_thres = req->reg_ovfl_switch_cnt;
|
|
+ set->pmds[cnum].ovflsw_ref_thres = req->reg_ovfl_switch_cnt;
|
|
+skip_set:
|
|
+
|
|
+ /*
|
|
+ * set last value to new value for all types of PMD
|
|
+ */
|
|
+ set->pmds[cnum].lval = req->reg_value;
|
|
+ set->pmds[cnum].value = req->reg_value;
|
|
+
|
|
+ /*
|
|
+ * update reset values (not just for counters)
|
|
+ */
|
|
+ set->pmds[cnum].long_reset = req->reg_long_reset;
|
|
+ set->pmds[cnum].short_reset = req->reg_short_reset;
|
|
+
|
|
+ /*
|
|
+ * update randomization mask
|
|
+ */
|
|
+ set->pmds[cnum].mask = req->reg_random_mask;
|
|
+
|
|
+ set->pmds[cnum].eventid = req->reg_smpl_eventid;
|
|
+
|
|
+ if (set == active_set) {
|
|
+ set->priv_flags |= PFM_SETFL_PRIV_MOD_PMDS;
|
|
+ if (can_access_pmu)
|
|
+ pfm_write_pmd(ctx, cnum, req->reg_value);
|
|
+ }
|
|
+
|
|
+
|
|
+ PFM_DBG("set%u pmd%u=0x%llx flags=0x%x a_pmu=%d "
|
|
+ "ctx_pmd=0x%llx s_reset=0x%llx "
|
|
+ "l_reset=0x%llx s_pmds=0x%llx "
|
|
+ "r_pmds=0x%llx o_pmds=0x%llx "
|
|
+ "o_thres=%llu compat=%d eventid=%llx",
|
|
+ set->id,
|
|
+ cnum,
|
|
+ (unsigned long long)req->reg_value,
|
|
+ set->pmds[cnum].flags,
|
|
+ can_access_pmu,
|
|
+ (unsigned long long)set->pmds[cnum].value,
|
|
+ (unsigned long long)set->pmds[cnum].short_reset,
|
|
+ (unsigned long long)set->pmds[cnum].long_reset,
|
|
+ (unsigned long long)set->pmds[cnum].smpl_pmds[0],
|
|
+ (unsigned long long)set->pmds[cnum].reset_pmds[0],
|
|
+ (unsigned long long)set->ovfl_pmds[0],
|
|
+ (unsigned long long)set->pmds[cnum].ovflsw_thres,
|
|
+ compat,
|
|
+ (unsigned long long)set->pmds[cnum].eventid);
|
|
+ }
|
|
+ ret = 0;
|
|
+
|
|
+error:
|
|
+ update_changes(ctx, set, cast_ulp(old_used_pmcs));
|
|
+
|
|
+ /*
|
|
+ * make changes visible
|
|
+ */
|
|
+ if (can_access_pmu)
|
|
+ pfm_arch_serialize();
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * __pfm_write_pmcs - modified config registers
|
|
+ * @ctx: context to operate on
|
|
+ * @req: pfarg_pmc_t request from user
|
|
+ * @count: number of element in the pfarg_pmc_t vector
|
|
+ *
|
|
+ *
|
|
+ * The function succeeds whether the context is * attached or not.
|
|
+ * When attached to another thread, that thread must be stopped.
|
|
+ *
|
|
+ * The context is locked and interrupts are disabled.
|
|
+ */
|
|
+int __pfm_write_pmcs(struct pfm_context *ctx, struct pfarg_pmc *req, int count)
|
|
+{
|
|
+ struct pfm_event_set *set, *active_set;
|
|
+ u64 value, dfl_val, rsvd_msk;
|
|
+ unsigned long *impl_pmcs;
|
|
+ int i, can_access_pmu;
|
|
+ int ret;
|
|
+ u16 set_id;
|
|
+ u16 cnum, pmc_type, max_pmc;
|
|
+ u32 flags, expert;
|
|
+ pfm_pmc_check_t wr_func;
|
|
+
|
|
+ active_set = ctx->active_set;
|
|
+
|
|
+ wr_func = pfm_pmu_conf->pmc_write_check;
|
|
+ max_pmc = ctx->regs.max_pmc;
|
|
+ impl_pmcs = cast_ulp(ctx->regs.pmcs);
|
|
+ set = list_first_entry(&ctx->set_list, struct pfm_event_set, list);
|
|
+
|
|
+ expert = pfm_controls.flags & PFM_CTRL_FL_RW_EXPERT;
|
|
+
|
|
+ can_access_pmu = 0;
|
|
+
|
|
+ /*
|
|
+ * we cannot access the actual PMC registers when monitoring is masked
|
|
+ */
|
|
+ if (unlikely(ctx->state == PFM_CTX_LOADED))
|
|
+ can_access_pmu = __get_cpu_var(pmu_owner) == ctx->task
|
|
+ || ctx->flags.system;
|
|
+
|
|
+ ret = -EINVAL;
|
|
+
|
|
+ for (i = 0; i < count; i++, req++) {
|
|
+
|
|
+ cnum = req->reg_num;
|
|
+ set_id = req->reg_set;
|
|
+ value = req->reg_value;
|
|
+ flags = req->reg_flags;
|
|
+
|
|
+ /*
|
|
+ * no access to unavailable PMC register
|
|
+ */
|
|
+ if (unlikely(is_invalid(cnum, impl_pmcs, max_pmc))) {
|
|
+ PFM_DBG("pmc%u is not available", cnum);
|
|
+ goto error;
|
|
+ }
|
|
+
|
|
+ pmc_type = pfm_pmu_conf->pmc_desc[cnum].type;
|
|
+ dfl_val = pfm_pmu_conf->pmc_desc[cnum].dfl_val;
|
|
+ rsvd_msk = pfm_pmu_conf->pmc_desc[cnum].rsvd_msk;
|
|
+
|
|
+ /*
|
|
+ * ensure only valid flags are set
|
|
+ */
|
|
+ if (flags & ~PFM_REGFL_PMC_ALL) {
|
|
+ PFM_DBG("pmc%u: invalid flags=0x%x", cnum, flags);
|
|
+ goto error;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * locate event set
|
|
+ */
|
|
+ if (set_id != set->id) {
|
|
+ set = pfm_find_set(ctx, set_id, 0);
|
|
+ if (set == NULL) {
|
|
+ PFM_DBG("event set%u does not exist",
|
|
+ set_id);
|
|
+ goto error;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * set reserved bits to default values
|
|
+ * (reserved bits must be 1 in rsvd_msk)
|
|
+ *
|
|
+ * bypass via /sys/kernel/perfmon/mode = 1
|
|
+ */
|
|
+ if (likely(!expert))
|
|
+ value = (value & ~rsvd_msk) | (dfl_val & rsvd_msk);
|
|
+
|
|
+ if (flags & PFM_REGFL_NO_EMUL64) {
|
|
+ if (!(pmc_type & PFM_REG_NO64)) {
|
|
+ PFM_DBG("pmc%u no support for "
|
|
+ "PFM_REGFL_NO_EMUL64", cnum);
|
|
+ goto error;
|
|
+ }
|
|
+ value &= ~pfm_pmu_conf->pmc_desc[cnum].no_emul64_msk;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * execute write checker, if any
|
|
+ */
|
|
+ if (likely(wr_func && (pmc_type & PFM_REG_WC))) {
|
|
+ req->reg_value = value;
|
|
+ ret = (*wr_func)(ctx, set, req);
|
|
+ if (ret)
|
|
+ goto error;
|
|
+ value = req->reg_value;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * Now we commit the changes
|
|
+ */
|
|
+
|
|
+ /*
|
|
+ * mark PMC register as used
|
|
+ * We do not track associated PMC register based on
|
|
+ * the fact that they will likely need to be written
|
|
+ * in order to become useful at which point the statement
|
|
+ * below will catch that.
|
|
+ *
|
|
+ * The used_pmcs bitmask is only useful on architectures where
|
|
+ * the PMC needs to be modified for particular bits, especially
|
|
+ * on overflow or to stop/start.
|
|
+ */
|
|
+ if (!test_bit(cnum, cast_ulp(set->used_pmcs))) {
|
|
+ __set_bit(cnum, cast_ulp(set->used_pmcs));
|
|
+ set->nused_pmcs++;
|
|
+ }
|
|
+
|
|
+ set->pmcs[cnum] = value;
|
|
+
|
|
+ if (set == active_set) {
|
|
+ set->priv_flags |= PFM_SETFL_PRIV_MOD_PMCS;
|
|
+ if (can_access_pmu)
|
|
+ pfm_arch_write_pmc(ctx, cnum, value);
|
|
+ }
|
|
+
|
|
+ PFM_DBG("set%u pmc%u=0x%llx a_pmu=%d "
|
|
+ "u_pmcs=0x%llx nu_pmcs=%u",
|
|
+ set->id,
|
|
+ cnum,
|
|
+ (unsigned long long)value,
|
|
+ can_access_pmu,
|
|
+ (unsigned long long)set->used_pmcs[0],
|
|
+ set->nused_pmcs);
|
|
+ }
|
|
+ ret = 0;
|
|
+error:
|
|
+ /*
|
|
+ * make sure the changes are visible
|
|
+ */
|
|
+ if (can_access_pmu)
|
|
+ pfm_arch_serialize();
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * __pfm_read_pmds - read data registers
|
|
+ * @ctx: context to operate on
|
|
+ * @req: pfarg_pmd_t request from user
|
|
+ * @count: number of element in the pfarg_pmd_t vector
|
|
+ *
|
|
+ *
|
|
+ * The function succeeds whether the context is attached or not.
|
|
+ * When attached to another thread, that thread must be stopped.
|
|
+ *
|
|
+ * The context is locked and interrupts are disabled.
|
|
+ */
|
|
+int __pfm_read_pmds(struct pfm_context *ctx, struct pfarg_pmd *req, int count)
|
|
+{
|
|
+ u64 val = 0, lval, ovfl_mask, hw_val;
|
|
+ u64 sw_cnt;
|
|
+ unsigned long *impl_pmds;
|
|
+ struct pfm_event_set *set, *active_set;
|
|
+ int i, ret, can_access_pmu = 0;
|
|
+ u16 cnum, pmd_type, set_id, max_pmd;
|
|
+
|
|
+ ovfl_mask = pfm_pmu_conf->ovfl_mask;
|
|
+ impl_pmds = cast_ulp(ctx->regs.pmds);
|
|
+ max_pmd = ctx->regs.max_pmd;
|
|
+ active_set = ctx->active_set;
|
|
+ set = list_first_entry(&ctx->set_list, struct pfm_event_set, list);
|
|
+
|
|
+ if (likely(ctx->state == PFM_CTX_LOADED)) {
|
|
+ can_access_pmu = __get_cpu_var(pmu_owner) == ctx->task
|
|
+ || ctx->flags.system;
|
|
+
|
|
+ if (can_access_pmu)
|
|
+ pfm_arch_serialize();
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * on both UP and SMP, we can only read the PMD from the hardware
|
|
+ * register when the task is the owner of the local PMU.
|
|
+ */
|
|
+ ret = -EINVAL;
|
|
+ for (i = 0; i < count; i++, req++) {
|
|
+
|
|
+ cnum = req->reg_num;
|
|
+ set_id = req->reg_set;
|
|
+
|
|
+ if (unlikely(is_invalid(cnum, impl_pmds, max_pmd))) {
|
|
+ PFM_DBG("pmd%u is not implemented/unaccessible", cnum);
|
|
+ goto error;
|
|
+ }
|
|
+
|
|
+ pmd_type = pfm_pmu_conf->pmd_desc[cnum].type;
|
|
+
|
|
+ /*
|
|
+ * locate event set
|
|
+ */
|
|
+ if (set_id != set->id) {
|
|
+ set = pfm_find_set(ctx, set_id, 0);
|
|
+ if (set == NULL) {
|
|
+ PFM_DBG("event set%u does not exist",
|
|
+ set_id);
|
|
+ goto error;
|
|
+ }
|
|
+ }
|
|
+ /*
|
|
+ * it is not possible to read a PMD which was not requested:
|
|
+ * - explicitly written via pfm_write_pmds()
|
|
+ * - provided as a reg_smpl_pmds[] to another PMD during
|
|
+ * pfm_write_pmds()
|
|
+ *
|
|
+ * This is motivated by security and for optimization purposes:
|
|
+ * - on context switch restore, we can restore only what
|
|
+ * we use (except when regs directly readable at user
|
|
+ * level, e.g., IA-64 self-monitoring, I386 RDPMC).
|
|
+ * - do not need to maintain PMC -> PMD dependencies
|
|
+ */
|
|
+ if (unlikely(!test_bit(cnum, cast_ulp(set->used_pmds)))) {
|
|
+ PFM_DBG("pmd%u cannot read, because not used", cnum);
|
|
+ goto error;
|
|
+ }
|
|
+
|
|
+ val = set->pmds[cnum].value;
|
|
+ lval = set->pmds[cnum].lval;
|
|
+
|
|
+ /*
|
|
+ * extract remaining ovfl to switch
|
|
+ */
|
|
+ sw_cnt = set->pmds[cnum].ovflsw_thres;
|
|
+
|
|
+ /*
|
|
+ * If the task is not the current one, then we check if the
|
|
+ * PMU state is still in the local live register due to lazy
|
|
+ * ctxsw. If true, then we read directly from the registers.
|
|
+ */
|
|
+ if (set == active_set && can_access_pmu) {
|
|
+ hw_val = pfm_read_pmd(ctx, cnum);
|
|
+ if (pmd_type & PFM_REG_C64)
|
|
+ val = (val & ~ovfl_mask) | (hw_val & ovfl_mask);
|
|
+ else
|
|
+ val = hw_val;
|
|
+ }
|
|
+
|
|
+ PFM_DBG("set%u pmd%u=0x%llx sw_thr=%llu lval=0x%llx",
|
|
+ set->id,
|
|
+ cnum,
|
|
+ (unsigned long long)val,
|
|
+ (unsigned long long)sw_cnt,
|
|
+ (unsigned long long)lval);
|
|
+
|
|
+ req->reg_value = val;
|
|
+ req->reg_last_reset_val = lval;
|
|
+ req->reg_ovfl_switch_cnt = sw_cnt;
|
|
+ }
|
|
+ ret = 0;
|
|
+error:
|
|
+ return ret;
|
|
+}
|
|
diff --git a/perfmon/perfmon_sets.c b/perfmon/perfmon_sets.c
|
|
new file mode 100644
|
|
index 0000000..24534cb
|
|
--- /dev/null
|
|
+++ b/perfmon/perfmon_sets.c
|
|
@@ -0,0 +1,873 @@
|
|
+/*
|
|
+ * perfmon_sets.c: perfmon2 event sets and multiplexing functions
|
|
+ *
|
|
+ * This file implements the perfmon2 interface which
|
|
+ * provides access to the hardware performance counters
|
|
+ * of the host processor.
|
|
+ *
|
|
+ * The initial version of perfmon.c was written by
|
|
+ * Ganesh Venkitachalam, IBM Corp.
|
|
+ *
|
|
+ * Then it was modified for perfmon-1.x by Stephane Eranian and
|
|
+ * David Mosberger, Hewlett Packard Co.
|
|
+ *
|
|
+ * Version Perfmon-2.x is a complete rewrite of perfmon-1.x
|
|
+ * by Stephane Eranian, Hewlett Packard Co.
|
|
+ *
|
|
+ * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P.
|
|
+ * Contributed by Stephane Eranian <eranian@hpl.hp.com>
|
|
+ * David Mosberger-Tang <davidm@hpl.hp.com>
|
|
+ *
|
|
+ * More information about perfmon available at:
|
|
+ * http://perfmon2.sf.net
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of version 2 of the GNU General Public
|
|
+ * License as published by the Free Software Foundation.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ * General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
+ * 02111-1307 USA
|
|
+ */
|
|
+#include <linux/kernel.h>
|
|
+#include <linux/perfmon_kern.h>
|
|
+#include "perfmon_priv.h"
|
|
+
|
|
+static struct kmem_cache *pfm_set_cachep;
|
|
+
|
|
+/**
|
|
+ * pfm_reload_switch_thresholds - reload overflow-based switch thresholds per set
|
|
+ * @set: the set for which to reload thresholds
|
|
+ *
|
|
+ */
|
|
+static void pfm_reload_switch_thresholds(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set)
|
|
+{
|
|
+ u64 *used_pmds;
|
|
+ u16 i, max, first;
|
|
+
|
|
+ used_pmds = set->used_pmds;
|
|
+ first = ctx->regs.first_intr_pmd;
|
|
+ max = ctx->regs.max_intr_pmd;
|
|
+
|
|
+ for (i = first; i < max; i++) {
|
|
+ if (test_bit(i, cast_ulp(used_pmds))) {
|
|
+ set->pmds[i].ovflsw_thres = set->pmds[i].ovflsw_ref_thres;
|
|
+
|
|
+ PFM_DBG("set%u pmd%u ovflsw_thres=%llu",
|
|
+ set->id,
|
|
+ i,
|
|
+ (unsigned long long)set->pmds[i].ovflsw_thres);
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_prepare_sets - initialize sets on pfm_load_context
|
|
+ * @ctx : context to operate on
|
|
+ * @load_set: set to activate first
|
|
+ *
|
|
+ * connect all sets, reset internal fields
|
|
+ */
|
|
+struct pfm_event_set *pfm_prepare_sets(struct pfm_context *ctx, u16 load_set)
|
|
+{
|
|
+ struct pfm_event_set *set, *p;
|
|
+ u16 max;
|
|
+
|
|
+ /*
|
|
+ * locate first set to activate
|
|
+ */
|
|
+ set = pfm_find_set(ctx, load_set, 0);
|
|
+ if (!set)
|
|
+ return NULL;
|
|
+
|
|
+ if (set->flags & PFM_SETFL_OVFL_SWITCH)
|
|
+ pfm_reload_switch_thresholds(ctx, set);
|
|
+
|
|
+ max = ctx->regs.max_intr_pmd;
|
|
+
|
|
+ list_for_each_entry(p, &ctx->set_list, list) {
|
|
+ /*
|
|
+ * cleanup bitvectors
|
|
+ */
|
|
+ bitmap_zero(cast_ulp(p->ovfl_pmds), max);
|
|
+ bitmap_zero(cast_ulp(p->povfl_pmds), max);
|
|
+
|
|
+ p->npend_ovfls = 0;
|
|
+
|
|
+ /*
|
|
+ * we cannot just use plain clear because of arch-specific flags
|
|
+ */
|
|
+ p->priv_flags &= ~(PFM_SETFL_PRIV_MOD_BOTH|PFM_SETFL_PRIV_SWITCH);
|
|
+ /*
|
|
+ * neither duration nor runs are reset because typically loading/unloading
|
|
+ * does not mean counts are reset. To reset, the set must be modified
|
|
+ */
|
|
+ }
|
|
+ return set;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * called by hrtimer_interrupt()
|
|
+ *
|
|
+ * This is the only function where we come with
|
|
+ * cpu_base->lock held before ctx->lock
|
|
+ *
|
|
+ * interrupts are disabled
|
|
+ */
|
|
+enum hrtimer_restart pfm_handle_switch_timeout(struct hrtimer *t)
|
|
+{
|
|
+ struct pfm_event_set *set;
|
|
+ struct pfm_context *ctx;
|
|
+ unsigned long flags;
|
|
+ enum hrtimer_restart ret = HRTIMER_NORESTART;
|
|
+
|
|
+ /*
|
|
+ * prevent against race with unload
|
|
+ */
|
|
+ ctx = __get_cpu_var(pmu_ctx);
|
|
+ if (!ctx)
|
|
+ return HRTIMER_NORESTART;
|
|
+
|
|
+ spin_lock_irqsave(&ctx->lock, flags);
|
|
+
|
|
+ set = ctx->active_set;
|
|
+
|
|
+ /*
|
|
+ * switching occurs only when context is attached
|
|
+ */
|
|
+ if (ctx->state != PFM_CTX_LOADED)
|
|
+ goto done;
|
|
+ /*
|
|
+ * timer does not run while monitoring is inactive (not started)
|
|
+ */
|
|
+ if (!pfm_arch_is_active(ctx))
|
|
+ goto done;
|
|
+
|
|
+ pfm_stats_inc(handle_timeout_count);
|
|
+
|
|
+ ret = pfm_switch_sets(ctx, NULL, PFM_PMD_RESET_SHORT, 0);
|
|
+done:
|
|
+ spin_unlock_irqrestore(&ctx->lock, flags);
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+/*
|
|
+ *
|
|
+ * always operating on the current task
|
|
+ * interrupts are masked
|
|
+ *
|
|
+ * input:
|
|
+ * - new_set: new set to switch to, if NULL follow normal chain
|
|
+ */
|
|
+enum hrtimer_restart pfm_switch_sets(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *new_set,
|
|
+ int reset_mode,
|
|
+ int no_restart)
|
|
+{
|
|
+ struct pfm_event_set *set;
|
|
+ u64 now, end;
|
|
+ u32 new_flags;
|
|
+ int is_system, is_active, nn;
|
|
+ enum hrtimer_restart ret = HRTIMER_NORESTART;
|
|
+
|
|
+ now = sched_clock();
|
|
+ set = ctx->active_set;
|
|
+ is_active = pfm_arch_is_active(ctx);
|
|
+
|
|
+ /*
|
|
+ * if no set is explicitly requested,
|
|
+ * use the set_switch_next field
|
|
+ */
|
|
+ if (!new_set) {
|
|
+ /*
|
|
+ * we use round-robin unless the user specified
|
|
+ * a particular set to go to.
|
|
+ */
|
|
+ new_set = list_first_entry(&set->list, struct pfm_event_set, list);
|
|
+ if (&new_set->list == &ctx->set_list)
|
|
+ new_set = list_first_entry(&ctx->set_list, struct pfm_event_set, list);
|
|
+ }
|
|
+
|
|
+ PFM_DBG_ovfl("state=%d act=%d cur_set=%u cur_runs=%llu cur_npend=%d next_set=%u "
|
|
+ "next_runs=%llu new_npend=%d reset_mode=%d reset_pmds=%llx",
|
|
+ ctx->state,
|
|
+ is_active,
|
|
+ set->id,
|
|
+ (unsigned long long)set->runs,
|
|
+ set->npend_ovfls,
|
|
+ new_set->id,
|
|
+ (unsigned long long)new_set->runs,
|
|
+ new_set->npend_ovfls,
|
|
+ reset_mode,
|
|
+ (unsigned long long)new_set->reset_pmds[0]);
|
|
+
|
|
+ is_system = ctx->flags.system;
|
|
+ new_flags = new_set->flags;
|
|
+
|
|
+ /*
|
|
+ * nothing more to do
|
|
+ */
|
|
+ if (new_set == set)
|
|
+ goto skip_same_set;
|
|
+
|
|
+ if (is_active) {
|
|
+ pfm_arch_stop(current, ctx);
|
|
+ pfm_save_pmds(ctx, set);
|
|
+ /*
|
|
+ * compute elapsed ns for active set
|
|
+ */
|
|
+ set->duration += now - set->duration_start;
|
|
+ }
|
|
+
|
|
+ pfm_arch_restore_pmds(ctx, new_set);
|
|
+ /*
|
|
+ * if masked, we must restore the pmcs such that they
|
|
+ * do not capture anything.
|
|
+ */
|
|
+ pfm_arch_restore_pmcs(ctx, new_set);
|
|
+
|
|
+ if (new_set->npend_ovfls) {
|
|
+ pfm_arch_resend_irq(ctx);
|
|
+ pfm_stats_inc(ovfl_intr_replay_count);
|
|
+ }
|
|
+
|
|
+ new_set->priv_flags &= ~PFM_SETFL_PRIV_MOD_BOTH;
|
|
+
|
|
+skip_same_set:
|
|
+ new_set->runs++;
|
|
+ /*
|
|
+ * reset switch threshold
|
|
+ */
|
|
+ if (new_flags & PFM_SETFL_OVFL_SWITCH)
|
|
+ pfm_reload_switch_thresholds(ctx, new_set);
|
|
+
|
|
+ /*
|
|
+ * reset overflowed PMD registers in new set
|
|
+ */
|
|
+ nn = bitmap_weight(cast_ulp(new_set->reset_pmds), ctx->regs.max_pmd);
|
|
+ if (nn)
|
|
+ pfm_reset_pmds(ctx, new_set, nn, reset_mode);
|
|
+
|
|
+
|
|
+ /*
|
|
+ * This is needed when coming from pfm_start()
|
|
+ *
|
|
+ * When switching to the same set, there is no
|
|
+ * need to restart
|
|
+ */
|
|
+ if (no_restart)
|
|
+ goto skip_restart;
|
|
+
|
|
+ if (is_active) {
|
|
+ /*
|
|
+ * do not need to restart when same set
|
|
+ */
|
|
+ if (new_set != set) {
|
|
+ ctx->active_set = new_set;
|
|
+ new_set->duration_start = now;
|
|
+ pfm_arch_start(current, ctx);
|
|
+ }
|
|
+ /*
|
|
+ * install new timeout if necessary
|
|
+ */
|
|
+ if (new_flags & PFM_SETFL_TIME_SWITCH) {
|
|
+ struct hrtimer *h;
|
|
+ h = &__get_cpu_var(pfm_hrtimer);
|
|
+ hrtimer_forward(h, h->base->get_time(), new_set->hrtimer_exp);
|
|
+ new_set->hrtimer_rem = new_set->hrtimer_exp;
|
|
+ ret = HRTIMER_RESTART;
|
|
+ }
|
|
+ }
|
|
+
|
|
+skip_restart:
|
|
+ ctx->active_set = new_set;
|
|
+
|
|
+ end = sched_clock();
|
|
+
|
|
+ pfm_stats_inc(set_switch_count);
|
|
+ pfm_stats_add(set_switch_ns, end - now);
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * called from __pfm_overflow_handler() to switch event sets.
|
|
+ * monitoring is stopped, task is current, interrupts are masked.
|
|
+ * compared to pfm_switch_sets(), this version is simplified because
|
|
+ * it knows about the call path. There is no need to stop monitoring
|
|
+ * because it is already frozen by PMU handler.
|
|
+ */
|
|
+void pfm_switch_sets_from_intr(struct pfm_context *ctx)
|
|
+{
|
|
+ struct pfm_event_set *set, *new_set;
|
|
+ u64 now, end;
|
|
+ u32 new_flags;
|
|
+ int is_system, n;
|
|
+
|
|
+ now = sched_clock();
|
|
+ set = ctx->active_set;
|
|
+ new_set = list_first_entry(&set->list, struct pfm_event_set, list);
|
|
+ if (&new_set->list == &ctx->set_list)
|
|
+ new_set = list_first_entry(&ctx->set_list, struct pfm_event_set, list);
|
|
+
|
|
+ PFM_DBG_ovfl("state=%d cur_set=%u cur_runs=%llu cur_npend=%d next_set=%u "
|
|
+ "next_runs=%llu new_npend=%d new_r_pmds=%llx",
|
|
+ ctx->state,
|
|
+ set->id,
|
|
+ (unsigned long long)set->runs,
|
|
+ set->npend_ovfls,
|
|
+ new_set->id,
|
|
+ (unsigned long long)new_set->runs,
|
|
+ new_set->npend_ovfls,
|
|
+ (unsigned long long)new_set->reset_pmds[0]);
|
|
+
|
|
+ is_system = ctx->flags.system;
|
|
+ new_flags = new_set->flags;
|
|
+
|
|
+ /*
|
|
+ * nothing more to do
|
|
+ */
|
|
+ if (new_set == set)
|
|
+ goto skip_same_set;
|
|
+
|
|
+ /*
|
|
+ * switch on intr only when set has OVFL_SWITCH
|
|
+ */
|
|
+ BUG_ON(set->flags & PFM_SETFL_TIME_SWITCH);
|
|
+
|
|
+ /*
|
|
+ * when called from PMU intr handler, monitoring
|
|
+ * is already stopped
|
|
+ *
|
|
+ * save current PMD registers, we use a special
|
|
+ * form for performance reason. On some architectures,
|
|
+ * such as x86, the pmds are already saved when entering
|
|
+ * the PMU interrupt handler via pfm-arch_intr_freeze()
|
|
+ * so we don't need to save them again. On the contrary,
|
|
+ * on IA-64, they are not saved by freeze, thus we have to
|
|
+ * to it here.
|
|
+ */
|
|
+ pfm_arch_save_pmds_from_intr(ctx, set);
|
|
+
|
|
+ /*
|
|
+ * compute elapsed ns for active set
|
|
+ */
|
|
+ set->duration += now - set->duration_start;
|
|
+
|
|
+ pfm_arch_restore_pmds(ctx, new_set);
|
|
+
|
|
+ /*
|
|
+ * must not be restored active as we are still executing in the
|
|
+ * PMU interrupt handler. activation is deferred to unfreeze PMU
|
|
+ */
|
|
+ pfm_arch_restore_pmcs(ctx, new_set);
|
|
+
|
|
+ /*
|
|
+ * check for pending interrupt on incoming set.
|
|
+ * interrupts are masked so handler call deferred
|
|
+ */
|
|
+ if (new_set->npend_ovfls) {
|
|
+ pfm_arch_resend_irq(ctx);
|
|
+ pfm_stats_inc(ovfl_intr_replay_count);
|
|
+ }
|
|
+ /*
|
|
+ * no need to restore anything, that is already done
|
|
+ */
|
|
+ new_set->priv_flags &= ~PFM_SETFL_PRIV_MOD_BOTH;
|
|
+ /*
|
|
+ * reset duration counter
|
|
+ */
|
|
+ new_set->duration_start = now;
|
|
+
|
|
+skip_same_set:
|
|
+ new_set->runs++;
|
|
+
|
|
+ /*
|
|
+ * reset switch threshold
|
|
+ */
|
|
+ if (new_flags & PFM_SETFL_OVFL_SWITCH)
|
|
+ pfm_reload_switch_thresholds(ctx, new_set);
|
|
+
|
|
+ /*
|
|
+ * reset overflowed PMD registers
|
|
+ */
|
|
+ n = bitmap_weight(cast_ulp(new_set->reset_pmds), ctx->regs.max_pmd);
|
|
+ if (n)
|
|
+ pfm_reset_pmds(ctx, new_set, n, PFM_PMD_RESET_SHORT);
|
|
+
|
|
+ /*
|
|
+ * XXX: isactive?
|
|
+ *
|
|
+ * Came here following a interrupt which triggered a switch, i.e.,
|
|
+ * previous set was using OVFL_SWITCH, thus we just need to arm
|
|
+ * check if the next set is using timeout, and if so arm the timer.
|
|
+ *
|
|
+ * Timeout is always at least one tick away. No risk of having to
|
|
+ * invoke the timeout handler right now. In any case, cb_mode is
|
|
+ * set to HRTIMER_CB_IRQSAFE_NO_SOFTIRQ such that hrtimer_start
|
|
+ * will not try to wakeup the softirqd which could cause a locking
|
|
+ * problem.
|
|
+ */
|
|
+ if (new_flags & PFM_SETFL_TIME_SWITCH) {
|
|
+ hrtimer_start(&__get_cpu_var(pfm_hrtimer), set->hrtimer_exp, HRTIMER_MODE_REL);
|
|
+ PFM_DBG("armed new timeout for set%u", new_set->id);
|
|
+ }
|
|
+
|
|
+ ctx->active_set = new_set;
|
|
+
|
|
+ end = sched_clock();
|
|
+
|
|
+ pfm_stats_inc(set_switch_count);
|
|
+ pfm_stats_add(set_switch_ns, end - now);
|
|
+}
|
|
+
|
|
+
|
|
+static int pfm_setfl_sane(struct pfm_context *ctx, u32 flags)
|
|
+{
|
|
+#define PFM_SETFL_BOTH_SWITCH (PFM_SETFL_OVFL_SWITCH|PFM_SETFL_TIME_SWITCH)
|
|
+ int ret;
|
|
+
|
|
+ ret = pfm_arch_setfl_sane(ctx, flags);
|
|
+ if (ret)
|
|
+ return ret;
|
|
+
|
|
+ if ((flags & PFM_SETFL_BOTH_SWITCH) == PFM_SETFL_BOTH_SWITCH) {
|
|
+ PFM_DBG("both switch ovfl and switch time are set");
|
|
+ return -EINVAL;
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * it is never possible to change the identification of an existing set
|
|
+ */
|
|
+static int pfm_change_evtset(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set,
|
|
+ struct pfarg_setdesc *req)
|
|
+{
|
|
+ struct timeval tv;
|
|
+ struct timespec ts;
|
|
+ ktime_t kt;
|
|
+ long d, res_ns;
|
|
+ s32 rem;
|
|
+ u32 flags;
|
|
+ int ret;
|
|
+ u16 set_id;
|
|
+
|
|
+ BUG_ON(ctx->state == PFM_CTX_LOADED);
|
|
+
|
|
+ set_id = req->set_id;
|
|
+ flags = req->set_flags;
|
|
+
|
|
+ ret = pfm_setfl_sane(ctx, flags);
|
|
+ if (ret) {
|
|
+ PFM_DBG("invalid flags 0x%x set %u", flags, set_id);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * compute timeout value
|
|
+ */
|
|
+ if (flags & PFM_SETFL_TIME_SWITCH) {
|
|
+ /*
|
|
+ * timeout value of zero is illegal
|
|
+ */
|
|
+ if (req->set_timeout == 0) {
|
|
+ PFM_DBG("invalid timeout 0");
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ hrtimer_get_res(CLOCK_MONOTONIC, &ts);
|
|
+ res_ns = (long)ktime_to_ns(timespec_to_ktime(ts));
|
|
+
|
|
+ /*
|
|
+ * round-up to multiple of clock resolution
|
|
+ * timeout = ((req->set_timeout+res_ns-1)/res_ns)*res_ns;
|
|
+ *
|
|
+ * u64 division missing on 32-bit arch, so use div_s64_rem
|
|
+ */
|
|
+ d = div_s64_rem(req->set_timeout, res_ns, &rem);
|
|
+
|
|
+ PFM_DBG("set%u flags=0x%x req_timeout=%lluns "
|
|
+ "HZ=%u TICK_NSEC=%lu clock_res=%ldns rem=%dns",
|
|
+ set_id,
|
|
+ flags,
|
|
+ (unsigned long long)req->set_timeout,
|
|
+ HZ, TICK_NSEC,
|
|
+ res_ns,
|
|
+ rem);
|
|
+
|
|
+ /*
|
|
+ * Only accept timeout, we can actually achieve.
|
|
+ * users can invoke clock_getres(CLOCK_MONOTONIC)
|
|
+ * to figure out resolution and adjust timeout
|
|
+ */
|
|
+ if (rem) {
|
|
+ PFM_DBG("set%u invalid timeout=%llu",
|
|
+ set_id,
|
|
+ (unsigned long long)req->set_timeout);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ tv = ns_to_timeval(req->set_timeout);
|
|
+ kt = timeval_to_ktime(tv);
|
|
+ set->hrtimer_exp = kt;
|
|
+ } else {
|
|
+ set->hrtimer_exp = ktime_set(0, 0);
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * commit changes
|
|
+ */
|
|
+ set->id = set_id;
|
|
+ set->flags = flags;
|
|
+ set->priv_flags = 0;
|
|
+
|
|
+ /*
|
|
+ * activation and duration counters are reset as
|
|
+ * most likely major things will change in the set
|
|
+ */
|
|
+ set->runs = 0;
|
|
+ set->duration = 0;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * this function does not modify the next field
|
|
+ */
|
|
+static void pfm_initialize_set(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set)
|
|
+{
|
|
+ u64 *impl_pmcs;
|
|
+ u16 i, max_pmc;
|
|
+
|
|
+ max_pmc = ctx->regs.max_pmc;
|
|
+ impl_pmcs = ctx->regs.pmcs;
|
|
+
|
|
+ /*
|
|
+ * install default values for all PMC registers
|
|
+ */
|
|
+ for (i = 0; i < max_pmc; i++) {
|
|
+ if (test_bit(i, cast_ulp(impl_pmcs))) {
|
|
+ set->pmcs[i] = pfm_pmu_conf->pmc_desc[i].dfl_val;
|
|
+ PFM_DBG("set%u pmc%u=0x%llx",
|
|
+ set->id,
|
|
+ i,
|
|
+ (unsigned long long)set->pmcs[i]);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * PMD registers are set to 0 when the event set is allocated,
|
|
+ * hence we do not need to explicitly initialize them.
|
|
+ *
|
|
+ * For virtual PMD registers (i.e., those tied to a SW resource)
|
|
+ * their value becomes meaningful once the context is attached.
|
|
+ */
|
|
+}
|
|
+
|
|
+/*
|
|
+ * look for an event set using its identification. If the set does not
|
|
+ * exist:
|
|
+ * - if alloc == 0 then return error
|
|
+ * - if alloc == 1 then allocate set
|
|
+ *
|
|
+ * alloc is one ONLY when coming from pfm_create_evtsets() which can only
|
|
+ * be called when the context is detached, i.e. monitoring is stopped.
|
|
+ */
|
|
+struct pfm_event_set *pfm_find_set(struct pfm_context *ctx, u16 set_id, int alloc)
|
|
+{
|
|
+ struct pfm_event_set *set = NULL, *prev, *new_set;
|
|
+
|
|
+ PFM_DBG("looking for set=%u", set_id);
|
|
+
|
|
+ prev = NULL;
|
|
+ list_for_each_entry(set, &ctx->set_list, list) {
|
|
+ if (set->id == set_id)
|
|
+ return set;
|
|
+ if (set->id > set_id)
|
|
+ break;
|
|
+ prev = set;
|
|
+ }
|
|
+
|
|
+ if (!alloc)
|
|
+ return NULL;
|
|
+
|
|
+ /*
|
|
+ * we are holding the context spinlock and interrupts
|
|
+ * are unmasked. We must use GFP_ATOMIC as we cannot
|
|
+ * sleep while holding a spin lock.
|
|
+ */
|
|
+ new_set = kmem_cache_zalloc(pfm_set_cachep, GFP_ATOMIC);
|
|
+ if (!new_set)
|
|
+ return NULL;
|
|
+
|
|
+ new_set->id = set_id;
|
|
+
|
|
+ INIT_LIST_HEAD(&new_set->list);
|
|
+
|
|
+ if (prev == NULL) {
|
|
+ list_add(&(new_set->list), &ctx->set_list);
|
|
+ } else {
|
|
+ PFM_DBG("add after set=%u", prev->id);
|
|
+ list_add(&(new_set->list), &prev->list);
|
|
+ }
|
|
+ return new_set;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_create_initial_set - create initial set from __pfm_c reate_context
|
|
+ * @ctx: context to atatched the set to
|
|
+ */
|
|
+int pfm_create_initial_set(struct pfm_context *ctx)
|
|
+{
|
|
+ struct pfm_event_set *set;
|
|
+
|
|
+ /*
|
|
+ * create initial set0
|
|
+ */
|
|
+ if (!pfm_find_set(ctx, 0, 1))
|
|
+ return -ENOMEM;
|
|
+
|
|
+ set = list_first_entry(&ctx->set_list, struct pfm_event_set, list);
|
|
+
|
|
+ pfm_initialize_set(ctx, set);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * context is unloaded for this command. Interrupts are enabled
|
|
+ */
|
|
+int __pfm_create_evtsets(struct pfm_context *ctx, struct pfarg_setdesc *req,
|
|
+ int count)
|
|
+{
|
|
+ struct pfm_event_set *set;
|
|
+ u16 set_id;
|
|
+ int i, ret;
|
|
+
|
|
+ for (i = 0; i < count; i++, req++) {
|
|
+ set_id = req->set_id;
|
|
+
|
|
+ PFM_DBG("set_id=%u", set_id);
|
|
+
|
|
+ set = pfm_find_set(ctx, set_id, 1);
|
|
+ if (set == NULL)
|
|
+ goto error_mem;
|
|
+
|
|
+ ret = pfm_change_evtset(ctx, set, req);
|
|
+ if (ret)
|
|
+ goto error_params;
|
|
+
|
|
+ pfm_initialize_set(ctx, set);
|
|
+ }
|
|
+ return 0;
|
|
+error_mem:
|
|
+ PFM_DBG("cannot allocate set %u", set_id);
|
|
+ return -ENOMEM;
|
|
+error_params:
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+int __pfm_getinfo_evtsets(struct pfm_context *ctx, struct pfarg_setinfo *req,
|
|
+ int count)
|
|
+{
|
|
+ struct pfm_event_set *set;
|
|
+ int i, is_system, is_loaded, is_self, ret;
|
|
+ u16 set_id;
|
|
+ u64 end;
|
|
+
|
|
+ end = sched_clock();
|
|
+
|
|
+ is_system = ctx->flags.system;
|
|
+ is_loaded = ctx->state == PFM_CTX_LOADED;
|
|
+ is_self = ctx->task == current || is_system;
|
|
+
|
|
+ ret = -EINVAL;
|
|
+ for (i = 0; i < count; i++, req++) {
|
|
+
|
|
+ set_id = req->set_id;
|
|
+
|
|
+ list_for_each_entry(set, &ctx->set_list, list) {
|
|
+ if (set->id == set_id)
|
|
+ goto found;
|
|
+ if (set->id > set_id)
|
|
+ goto error;
|
|
+ }
|
|
+found:
|
|
+ req->set_flags = set->flags;
|
|
+
|
|
+ /*
|
|
+ * compute leftover timeout
|
|
+ *
|
|
+ * lockdep may complain about lock inversion
|
|
+ * because of get_remaining() however, this
|
|
+ * applies to self-montoring only, thus the
|
|
+ * thread cannot be in the timeout handler
|
|
+ * and here at the same time given that we
|
|
+ * run with interrupts disabled
|
|
+ */
|
|
+ if (is_loaded && is_self) {
|
|
+ struct hrtimer *h;
|
|
+ h = &__get_cpu_var(pfm_hrtimer);
|
|
+ req->set_timeout = ktime_to_ns(hrtimer_get_remaining(h));
|
|
+ } else {
|
|
+ /*
|
|
+ * hrtimer_rem zero when not using
|
|
+ * timeout-based switching
|
|
+ */
|
|
+ req->set_timeout = ktime_to_ns(set->hrtimer_rem);
|
|
+ }
|
|
+
|
|
+ req->set_runs = set->runs;
|
|
+ req->set_act_duration = set->duration;
|
|
+
|
|
+ /*
|
|
+ * adjust for active set if needed
|
|
+ */
|
|
+ if (is_system && is_loaded && ctx->flags.started
|
|
+ && set == ctx->active_set)
|
|
+ req->set_act_duration += end - set->duration_start;
|
|
+
|
|
+ /*
|
|
+ * copy the list of pmds which last overflowed
|
|
+ */
|
|
+ bitmap_copy(cast_ulp(req->set_ovfl_pmds),
|
|
+ cast_ulp(set->ovfl_pmds),
|
|
+ PFM_MAX_PMDS);
|
|
+
|
|
+ /*
|
|
+ * copy bitmask of available PMU registers
|
|
+ *
|
|
+ * must copy over the entire vector to avoid
|
|
+ * returning bogus upper bits pass by user
|
|
+ */
|
|
+ bitmap_copy(cast_ulp(req->set_avail_pmcs),
|
|
+ cast_ulp(ctx->regs.pmcs),
|
|
+ PFM_MAX_PMCS);
|
|
+
|
|
+ bitmap_copy(cast_ulp(req->set_avail_pmds),
|
|
+ cast_ulp(ctx->regs.pmds),
|
|
+ PFM_MAX_PMDS);
|
|
+
|
|
+ PFM_DBG("set%u flags=0x%x eff_usec=%llu runs=%llu "
|
|
+ "a_pmcs=0x%llx a_pmds=0x%llx",
|
|
+ set_id,
|
|
+ set->flags,
|
|
+ (unsigned long long)req->set_timeout,
|
|
+ (unsigned long long)set->runs,
|
|
+ (unsigned long long)ctx->regs.pmcs[0],
|
|
+ (unsigned long long)ctx->regs.pmds[0]);
|
|
+ }
|
|
+ ret = 0;
|
|
+error:
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * context is unloaded for this command. Interrupts are enabled
|
|
+ */
|
|
+int __pfm_delete_evtsets(struct pfm_context *ctx, void *arg, int count)
|
|
+{
|
|
+ struct pfarg_setdesc *req = arg;
|
|
+ struct pfm_event_set *set;
|
|
+ u16 set_id;
|
|
+ int i, ret;
|
|
+
|
|
+ ret = -EINVAL;
|
|
+ for (i = 0; i < count; i++, req++) {
|
|
+ set_id = req->set_id;
|
|
+
|
|
+ list_for_each_entry(set, &ctx->set_list, list) {
|
|
+ if (set->id == set_id)
|
|
+ goto found;
|
|
+ if (set->id > set_id)
|
|
+ goto error;
|
|
+ }
|
|
+ goto error;
|
|
+found:
|
|
+ /*
|
|
+ * clear active set if necessary.
|
|
+ * will be updated when context is loaded
|
|
+ */
|
|
+ if (set == ctx->active_set)
|
|
+ ctx->active_set = NULL;
|
|
+
|
|
+ list_del(&set->list);
|
|
+
|
|
+ kmem_cache_free(pfm_set_cachep, set);
|
|
+
|
|
+ PFM_DBG("set%u deleted", set_id);
|
|
+ }
|
|
+ ret = 0;
|
|
+error:
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * called from pfm_context_free() to free all sets
|
|
+ */
|
|
+void pfm_free_sets(struct pfm_context *ctx)
|
|
+{
|
|
+ struct pfm_event_set *set, *tmp;
|
|
+
|
|
+ list_for_each_entry_safe(set, tmp, &ctx->set_list, list) {
|
|
+ list_del(&set->list);
|
|
+ kmem_cache_free(pfm_set_cachep, set);
|
|
+ }
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_restart_timer - restart hrtimer taking care of expired timeout
|
|
+ * @ctx : context to work with
|
|
+ * @set : current active set
|
|
+ *
|
|
+ * Must be called on the processor on which the timer is to be armed.
|
|
+ * Assumes context is locked and interrupts are masked
|
|
+ *
|
|
+ * Upon return the active set for the context may have changed
|
|
+ */
|
|
+void pfm_restart_timer(struct pfm_context *ctx, struct pfm_event_set *set)
|
|
+{
|
|
+ struct hrtimer *h;
|
|
+ enum hrtimer_restart ret;
|
|
+
|
|
+ h = &__get_cpu_var(pfm_hrtimer);
|
|
+
|
|
+ PFM_DBG_ovfl("hrtimer=%lld", (long long)ktime_to_ns(set->hrtimer_rem));
|
|
+
|
|
+ if (ktime_to_ns(set->hrtimer_rem) > 0) {
|
|
+ hrtimer_start(h, set->hrtimer_rem, HRTIMER_MODE_REL);
|
|
+ } else {
|
|
+ /*
|
|
+ * timer was not re-armed because it has already expired
|
|
+ * timer was not enqueued, we need to switch set now
|
|
+ */
|
|
+ pfm_stats_inc(set_switch_exp);
|
|
+
|
|
+ ret = pfm_switch_sets(ctx, NULL, 1, 0);
|
|
+ set = ctx->active_set;
|
|
+ if (ret == HRTIMER_RESTART)
|
|
+ hrtimer_start(h, set->hrtimer_rem, HRTIMER_MODE_REL);
|
|
+ }
|
|
+}
|
|
+
|
|
+int __init pfm_init_sets(void)
|
|
+{
|
|
+ pfm_set_cachep = kmem_cache_create("pfm_event_set",
|
|
+ sizeof(struct pfm_event_set),
|
|
+ SLAB_HWCACHE_ALIGN, 0, NULL);
|
|
+ if (!pfm_set_cachep) {
|
|
+ PFM_ERR("cannot initialize event set slab");
|
|
+ return -ENOMEM;
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
diff --git a/perfmon/perfmon_smpl.c b/perfmon/perfmon_smpl.c
|
|
new file mode 100644
|
|
index 0000000..e31fb15
|
|
--- /dev/null
|
|
+++ b/perfmon/perfmon_smpl.c
|
|
@@ -0,0 +1,865 @@
|
|
+/*
|
|
+ * perfmon_smpl.c: perfmon2 sampling management
|
|
+ *
|
|
+ * This file implements the perfmon2 interface which
|
|
+ * provides access to the hardware performance counters
|
|
+ * of the host processor.
|
|
+ *
|
|
+ *
|
|
+ * The initial version of perfmon.c was written by
|
|
+ * Ganesh Venkitachalam, IBM Corp.
|
|
+ *
|
|
+ * Then it was modified for perfmon-1.x by Stephane Eranian and
|
|
+ * David Mosberger, Hewlett Packard Co.
|
|
+ *
|
|
+ * Version Perfmon-2.x is a complete rewrite of perfmon-1.x
|
|
+ * by Stephane Eranian, Hewlett Packard Co.
|
|
+ *
|
|
+ * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P.
|
|
+ * Contributed by Stephane Eranian <eranian@hpl.hp.com>
|
|
+ * David Mosberger-Tang <davidm@hpl.hp.com>
|
|
+ *
|
|
+ * More information about perfmon available at:
|
|
+ * http://perfmon2.sf.net
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of version 2 of the GNU General Public
|
|
+ * License as published by the Free Software Foundation.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ * General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
+ * 02111-1307 USA
|
|
+ */
|
|
+#include <linux/module.h>
|
|
+#include <linux/kernel.h>
|
|
+#include <linux/vmalloc.h>
|
|
+#include <linux/fs.h>
|
|
+#include <linux/mm.h>
|
|
+#include <linux/random.h>
|
|
+#include <linux/uaccess.h>
|
|
+#include <linux/perfmon_kern.h>
|
|
+
|
|
+#include "perfmon_priv.h"
|
|
+
|
|
+/**
|
|
+ * pfm_smpl_buf_alloc - allocate memory for sampling buffer
|
|
+ * @ctx: context to operate on
|
|
+ * @rsize: requested size
|
|
+ *
|
|
+ * called from pfm_smpl_buffer_alloc_old() (IA64-COMPAT)
|
|
+ * and pfm_setup_smpl_fmt()
|
|
+ *
|
|
+ * interrupts are enabled, context is not locked.
|
|
+ *
|
|
+ * function is not static because it is called from the IA-64
|
|
+ * compatibility module (perfmon_compat.c)
|
|
+ */
|
|
+int pfm_smpl_buf_alloc(struct pfm_context *ctx, size_t rsize)
|
|
+{
|
|
+#if PFM_ARCH_SMPL_ALIGN_SIZE > 0
|
|
+#define PFM_ALIGN_SMPL(a, f) (void *)((((unsigned long)(a))+(f-1)) & ~(f-1))
|
|
+#else
|
|
+#define PFM_ALIGN_SMPL(a, f) (a)
|
|
+#endif
|
|
+ void *addr, *real_addr;
|
|
+ size_t size, real_size;
|
|
+ int ret;
|
|
+
|
|
+ might_sleep();
|
|
+
|
|
+ /*
|
|
+ * align page boundary
|
|
+ */
|
|
+ size = PAGE_ALIGN(rsize);
|
|
+
|
|
+ /*
|
|
+ * On some arch, it may be necessary to get an alignment greater
|
|
+ * than page size to avoid certain cache effects (e.g., MIPS).
|
|
+ * This is the reason for PFM_ARCH_SMPL_ALIGN_SIZE.
|
|
+ */
|
|
+ real_size = size + PFM_ARCH_SMPL_ALIGN_SIZE;
|
|
+
|
|
+ PFM_DBG("req_size=%zu size=%zu real_size=%zu",
|
|
+ rsize,
|
|
+ size,
|
|
+ real_size);
|
|
+
|
|
+ ret = pfm_smpl_buf_space_acquire(ctx, real_size);
|
|
+ if (ret)
|
|
+ return ret;
|
|
+
|
|
+ /*
|
|
+ * vmalloc can sleep. we do not hold
|
|
+ * any spinlock and interrupts are enabled
|
|
+ */
|
|
+ real_addr = addr = vmalloc(real_size);
|
|
+ if (!real_addr) {
|
|
+ PFM_DBG("cannot allocate sampling buffer");
|
|
+ goto unres;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * align the useable sampling buffer address to the arch requirement
|
|
+ * This is a nop on most architectures
|
|
+ */
|
|
+ addr = PFM_ALIGN_SMPL(real_addr, PFM_ARCH_SMPL_ALIGN_SIZE);
|
|
+
|
|
+ memset(addr, 0, real_size);
|
|
+
|
|
+ /*
|
|
+ * due to cache aliasing, it may be necessary to flush the pages
|
|
+ * on certain architectures (e.g., MIPS)
|
|
+ */
|
|
+ pfm_cacheflush(addr, real_size);
|
|
+
|
|
+ /*
|
|
+ * what needs to be freed
|
|
+ */
|
|
+ ctx->smpl_real_addr = real_addr;
|
|
+ ctx->smpl_real_size = real_size;
|
|
+
|
|
+ /*
|
|
+ * what is actually available to user
|
|
+ */
|
|
+ ctx->smpl_addr = addr;
|
|
+ ctx->smpl_size = size;
|
|
+
|
|
+ PFM_DBG("addr=%p real_addr=%p", addr, real_addr);
|
|
+
|
|
+ return 0;
|
|
+unres:
|
|
+ /*
|
|
+ * smpl_addr is NULL, no double freeing possible in pfm_context_free()
|
|
+ */
|
|
+ pfm_smpl_buf_space_release(ctx, real_size);
|
|
+
|
|
+ return -ENOMEM;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_smpl_buf_free - free resources associated with sampling
|
|
+ * @ctx: context to operate on
|
|
+ */
|
|
+void pfm_smpl_buf_free(struct pfm_context *ctx)
|
|
+{
|
|
+ struct pfm_smpl_fmt *fmt;
|
|
+
|
|
+ fmt = ctx->smpl_fmt;
|
|
+
|
|
+ /*
|
|
+ * some formats may not use a buffer, yet they may
|
|
+ * need to be called on exit
|
|
+ */
|
|
+ if (fmt) {
|
|
+ if (fmt->fmt_exit)
|
|
+ (*fmt->fmt_exit)(ctx->smpl_addr);
|
|
+ /*
|
|
+ * decrease refcount of sampling format
|
|
+ */
|
|
+ pfm_smpl_fmt_put(fmt);
|
|
+ }
|
|
+
|
|
+ if (ctx->smpl_addr) {
|
|
+ pfm_smpl_buf_space_release(ctx, ctx->smpl_real_size);
|
|
+
|
|
+ PFM_DBG("free buffer real_addr=0x%p real_size=%zu",
|
|
+ ctx->smpl_real_addr,
|
|
+ ctx->smpl_real_size);
|
|
+
|
|
+ vfree(ctx->smpl_real_addr);
|
|
+ }
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_setup_smpl_fmt - initialization of sampling format and buffer
|
|
+ * @ctx: context to operate on
|
|
+ * @fmt_arg: smapling format arguments
|
|
+ * @ctx_flags: context flags as passed by user
|
|
+ * @filp: file descriptor associated with context
|
|
+ *
|
|
+ * called from __pfm_create_context()
|
|
+ */
|
|
+int pfm_setup_smpl_fmt(struct pfm_context *ctx, u32 ctx_flags, void *fmt_arg,
|
|
+ struct file *filp)
|
|
+{
|
|
+ struct pfm_smpl_fmt *fmt;
|
|
+ size_t size = 0;
|
|
+ int ret = 0;
|
|
+
|
|
+ fmt = ctx->smpl_fmt;
|
|
+
|
|
+ /*
|
|
+ * validate parameters
|
|
+ */
|
|
+ if (fmt->fmt_validate) {
|
|
+ ret = (*fmt->fmt_validate)(ctx_flags,
|
|
+ ctx->regs.num_pmds,
|
|
+ fmt_arg);
|
|
+ PFM_DBG("validate(0x%x,%p)=%d", ctx_flags, fmt_arg, ret);
|
|
+ if (ret)
|
|
+ goto error;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * check if buffer format needs buffer allocation
|
|
+ */
|
|
+ size = 0;
|
|
+ if (fmt->fmt_getsize) {
|
|
+ ret = (*fmt->fmt_getsize)(ctx_flags, fmt_arg, &size);
|
|
+ if (ret) {
|
|
+ PFM_DBG("cannot get size ret=%d", ret);
|
|
+ goto error;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * allocate buffer
|
|
+ * v20_compat is for IA-64 backward compatibility with perfmon v2.0
|
|
+ */
|
|
+ if (size) {
|
|
+#ifdef CONFIG_IA64_PERFMON_COMPAT
|
|
+ /*
|
|
+ * backward compatibility with perfmon v2.0 on Ia-64
|
|
+ */
|
|
+ if (ctx->flags.ia64_v20_compat)
|
|
+ ret = pfm_smpl_buf_alloc_compat(ctx, size, filp);
|
|
+ else
|
|
+#endif
|
|
+ ret = pfm_smpl_buf_alloc(ctx, size);
|
|
+
|
|
+ if (ret)
|
|
+ goto error;
|
|
+
|
|
+ }
|
|
+
|
|
+ if (fmt->fmt_init) {
|
|
+ ret = (*fmt->fmt_init)(ctx, ctx->smpl_addr, ctx_flags,
|
|
+ ctx->regs.num_pmds,
|
|
+ fmt_arg);
|
|
+ }
|
|
+ /*
|
|
+ * if there was an error, the buffer/resource will be freed by
|
|
+ * via pfm_context_free()
|
|
+ */
|
|
+error:
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+void pfm_mask_monitoring(struct pfm_context *ctx, struct pfm_event_set *set)
|
|
+{
|
|
+ u64 now;
|
|
+
|
|
+ now = sched_clock();
|
|
+
|
|
+ /*
|
|
+ * we save the PMD values such that we can read them while
|
|
+ * MASKED without having the thread stopped
|
|
+ * because monitoring is stopped
|
|
+ *
|
|
+ * pfm_save_pmds() could be avoided if we knew
|
|
+ * that pfm_arch_intr_freeze() had saved them already
|
|
+ */
|
|
+ pfm_save_pmds(ctx, set);
|
|
+ pfm_arch_mask_monitoring(ctx, set);
|
|
+ /*
|
|
+ * accumulate the set duration up to this point
|
|
+ */
|
|
+ set->duration += now - set->duration_start;
|
|
+
|
|
+ ctx->state = PFM_CTX_MASKED;
|
|
+
|
|
+ /*
|
|
+ * need to stop timer and remember remaining time
|
|
+ * will be reloaded in pfm_unmask_monitoring
|
|
+ * hrtimer is cancelled in the tail of the interrupt
|
|
+ * handler once the context is unlocked
|
|
+ */
|
|
+ if (set->flags & PFM_SETFL_TIME_SWITCH) {
|
|
+ struct hrtimer *h = &__get_cpu_var(pfm_hrtimer);
|
|
+ hrtimer_cancel(h);
|
|
+ set->hrtimer_rem = hrtimer_get_remaining(h);
|
|
+ }
|
|
+ PFM_DBG_ovfl("can_restart=%u", ctx->flags.can_restart);
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_unmask_monitoring - unmask monitoring
|
|
+ * @ctx: context to work with
|
|
+ * @set: current active set
|
|
+ *
|
|
+ * interrupts are masked when entering this function.
|
|
+ * context must be in MASKED state when calling.
|
|
+ *
|
|
+ * Upon return, the active set may have changed when using timeout
|
|
+ * based switching.
|
|
+ */
|
|
+static void pfm_unmask_monitoring(struct pfm_context *ctx, struct pfm_event_set *set)
|
|
+{
|
|
+ if (ctx->state != PFM_CTX_MASKED)
|
|
+ return;
|
|
+
|
|
+ PFM_DBG_ovfl("unmasking monitoring");
|
|
+
|
|
+ /*
|
|
+ * must be done before calling
|
|
+ * pfm_arch_unmask_monitoring()
|
|
+ */
|
|
+ ctx->state = PFM_CTX_LOADED;
|
|
+
|
|
+ /*
|
|
+ * we need to restore the PMDs because they
|
|
+ * may have been modified by user while MASKED in
|
|
+ * which case the actual registers have no yet
|
|
+ * been updated
|
|
+ */
|
|
+ pfm_arch_restore_pmds(ctx, set);
|
|
+
|
|
+ /*
|
|
+ * call arch specific handler
|
|
+ */
|
|
+ pfm_arch_unmask_monitoring(ctx, set);
|
|
+
|
|
+ /*
|
|
+ * clear force reload flag. May have been set
|
|
+ * in pfm_write_pmcs or pfm_write_pmds
|
|
+ */
|
|
+ set->priv_flags &= ~PFM_SETFL_PRIV_MOD_BOTH;
|
|
+
|
|
+ /*
|
|
+ * reset set duration timer
|
|
+ */
|
|
+ set->duration_start = sched_clock();
|
|
+
|
|
+ /*
|
|
+ * restart hrtimer if needed
|
|
+ */
|
|
+ if (set->flags & PFM_SETFL_TIME_SWITCH) {
|
|
+ pfm_restart_timer(ctx, set);
|
|
+ /* careful here as pfm_restart_timer may switch sets */
|
|
+ }
|
|
+}
|
|
+
|
|
+void pfm_reset_pmds(struct pfm_context *ctx,
|
|
+ struct pfm_event_set *set,
|
|
+ int num_pmds,
|
|
+ int reset_mode)
|
|
+{
|
|
+ u64 val, mask, new_seed;
|
|
+ struct pfm_pmd *reg;
|
|
+ unsigned int i, not_masked;
|
|
+
|
|
+ not_masked = ctx->state != PFM_CTX_MASKED;
|
|
+
|
|
+ PFM_DBG_ovfl("%s r_pmds=0x%llx not_masked=%d",
|
|
+ reset_mode == PFM_PMD_RESET_LONG ? "long" : "short",
|
|
+ (unsigned long long)set->reset_pmds[0],
|
|
+ not_masked);
|
|
+
|
|
+ pfm_stats_inc(reset_pmds_count);
|
|
+
|
|
+ for (i = 0; num_pmds; i++) {
|
|
+ if (test_bit(i, cast_ulp(set->reset_pmds))) {
|
|
+ num_pmds--;
|
|
+
|
|
+ reg = set->pmds + i;
|
|
+
|
|
+ val = reset_mode == PFM_PMD_RESET_LONG ?
|
|
+ reg->long_reset : reg->short_reset;
|
|
+
|
|
+ if (reg->flags & PFM_REGFL_RANDOM) {
|
|
+ mask = reg->mask;
|
|
+ new_seed = random32();
|
|
+
|
|
+ /* construct a full 64-bit random value: */
|
|
+ if ((unlikely(mask >> 32) != 0))
|
|
+ new_seed |= (u64)random32() << 32;
|
|
+
|
|
+ /* counter values are negative numbers! */
|
|
+ val -= (new_seed & mask);
|
|
+ }
|
|
+
|
|
+ set->pmds[i].value = val;
|
|
+ reg->lval = val;
|
|
+
|
|
+ /*
|
|
+ * not all PMD to reset are necessarily
|
|
+ * counters
|
|
+ */
|
|
+ if (not_masked)
|
|
+ pfm_write_pmd(ctx, i, val);
|
|
+
|
|
+ PFM_DBG_ovfl("set%u pmd%u sval=0x%llx",
|
|
+ set->id,
|
|
+ i,
|
|
+ (unsigned long long)val);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * done with reset
|
|
+ */
|
|
+ bitmap_zero(cast_ulp(set->reset_pmds), i);
|
|
+
|
|
+ /*
|
|
+ * make changes visible
|
|
+ */
|
|
+ if (not_masked)
|
|
+ pfm_arch_serialize();
|
|
+}
|
|
+
|
|
+/*
|
|
+ * called from pfm_handle_work() and __pfm_restart()
|
|
+ * for system-wide and per-thread context to resume
|
|
+ * monitoring after a user level notification.
|
|
+ *
|
|
+ * In both cases, the context is locked and interrupts
|
|
+ * are disabled.
|
|
+ */
|
|
+void pfm_resume_after_ovfl(struct pfm_context *ctx)
|
|
+{
|
|
+ struct pfm_smpl_fmt *fmt;
|
|
+ u32 rst_ctrl;
|
|
+ struct pfm_event_set *set;
|
|
+ u64 *reset_pmds;
|
|
+ void *hdr;
|
|
+ int state, ret;
|
|
+
|
|
+ hdr = ctx->smpl_addr;
|
|
+ fmt = ctx->smpl_fmt;
|
|
+ state = ctx->state;
|
|
+ set = ctx->active_set;
|
|
+ ret = 0;
|
|
+
|
|
+ if (hdr) {
|
|
+ rst_ctrl = 0;
|
|
+ prefetch(hdr);
|
|
+ } else {
|
|
+ rst_ctrl = PFM_OVFL_CTRL_RESET;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * if using a sampling buffer format and it has a restart callback,
|
|
+ * then invoke it. hdr may be NULL, if the format does not use a
|
|
+ * perfmon buffer
|
|
+ */
|
|
+ if (fmt && fmt->fmt_restart)
|
|
+ ret = (*fmt->fmt_restart)(state == PFM_CTX_LOADED, &rst_ctrl,
|
|
+ hdr);
|
|
+
|
|
+ reset_pmds = set->reset_pmds;
|
|
+
|
|
+ PFM_DBG("fmt_restart=%d reset_count=%d set=%u r_pmds=0x%llx switch=%d "
|
|
+ "ctx_state=%d",
|
|
+ ret,
|
|
+ ctx->flags.reset_count,
|
|
+ set->id,
|
|
+ (unsigned long long)reset_pmds[0],
|
|
+ (set->priv_flags & PFM_SETFL_PRIV_SWITCH),
|
|
+ state);
|
|
+
|
|
+ if (!ret) {
|
|
+ /*
|
|
+ * switch set if needed
|
|
+ */
|
|
+ if (set->priv_flags & PFM_SETFL_PRIV_SWITCH) {
|
|
+ set->priv_flags &= ~PFM_SETFL_PRIV_SWITCH;
|
|
+ pfm_switch_sets(ctx, NULL, PFM_PMD_RESET_LONG, 0);
|
|
+ set = ctx->active_set;
|
|
+ } else if (rst_ctrl & PFM_OVFL_CTRL_RESET) {
|
|
+ int nn;
|
|
+ nn = bitmap_weight(cast_ulp(set->reset_pmds),
|
|
+ ctx->regs.max_pmd);
|
|
+ if (nn)
|
|
+ pfm_reset_pmds(ctx, set, nn, PFM_PMD_RESET_LONG);
|
|
+ }
|
|
+
|
|
+ if (!(rst_ctrl & PFM_OVFL_CTRL_MASK))
|
|
+ pfm_unmask_monitoring(ctx, set);
|
|
+ else
|
|
+ PFM_DBG("stopping monitoring?");
|
|
+ ctx->state = PFM_CTX_LOADED;
|
|
+ }
|
|
+}
|
|
+
|
|
+/*
|
|
+ * This function is called when we need to perform asynchronous
|
|
+ * work on a context. This function is called ONLY when about to
|
|
+ * return to user mode (very much like with signal handling).
|
|
+ *
|
|
+ * There are several reasons why we come here:
|
|
+ *
|
|
+ * - per-thread mode, not self-monitoring, to reset the counters
|
|
+ * after a pfm_restart()
|
|
+ *
|
|
+ * - we are zombie and we need to cleanup our state
|
|
+ *
|
|
+ * - we need to block after an overflow notification
|
|
+ * on a context with the PFM_OVFL_NOTIFY_BLOCK flag
|
|
+ *
|
|
+ * This function is never called for a system-wide context.
|
|
+ *
|
|
+ * pfm_handle_work() can be called with interrupts enabled
|
|
+ * (TIF_NEED_RESCHED) or disabled. The down_interruptible
|
|
+ * call may sleep, therefore we must re-enable interrupts
|
|
+ * to avoid deadlocks. It is safe to do so because this function
|
|
+ * is called ONLY when returning to user level, in which case
|
|
+ * there is no risk of kernel stack overflow due to deep
|
|
+ * interrupt nesting.
|
|
+ */
|
|
+void pfm_handle_work(struct pt_regs *regs)
|
|
+{
|
|
+ struct pfm_context *ctx;
|
|
+ unsigned long flags, dummy_flags;
|
|
+ int type, ret, info;
|
|
+
|
|
+#ifdef CONFIG_PPC
|
|
+ /*
|
|
+ * This is just a temporary fix. Obviously we'd like to fix the powerpc
|
|
+ * code to make that check before calling __pfm_handle_work() to
|
|
+ * prevent the function call overhead, but the call is made from
|
|
+ * assembly code, so it will take a little while to figure out how to
|
|
+ * perform the check correctly.
|
|
+ */
|
|
+ if (!test_thread_flag(TIF_PERFMON_WORK))
|
|
+ return;
|
|
+#endif
|
|
+
|
|
+ if (!user_mode(regs))
|
|
+ return;
|
|
+
|
|
+ clear_thread_flag(TIF_PERFMON_WORK);
|
|
+
|
|
+ pfm_stats_inc(handle_work_count);
|
|
+
|
|
+ ctx = current->pfm_context;
|
|
+ if (ctx == NULL) {
|
|
+ PFM_DBG("[%d] has no ctx", current->pid);
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ BUG_ON(ctx->flags.system);
|
|
+
|
|
+ spin_lock_irqsave(&ctx->lock, flags);
|
|
+
|
|
+ type = ctx->flags.work_type;
|
|
+ ctx->flags.work_type = PFM_WORK_NONE;
|
|
+
|
|
+ PFM_DBG("work_type=%d reset_count=%d",
|
|
+ type,
|
|
+ ctx->flags.reset_count);
|
|
+
|
|
+ switch (type) {
|
|
+ case PFM_WORK_ZOMBIE:
|
|
+ goto do_zombie;
|
|
+ case PFM_WORK_RESET:
|
|
+ /* simply reset, no blocking */
|
|
+ goto skip_blocking;
|
|
+ case PFM_WORK_NONE:
|
|
+ PFM_DBG("unexpected PFM_WORK_NONE");
|
|
+ goto nothing_todo;
|
|
+ case PFM_WORK_BLOCK:
|
|
+ break;
|
|
+ default:
|
|
+ PFM_DBG("unkown type=%d", type);
|
|
+ goto nothing_todo;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * restore interrupt mask to what it was on entry.
|
|
+ * Could be enabled/disabled.
|
|
+ */
|
|
+ spin_unlock_irqrestore(&ctx->lock, flags);
|
|
+
|
|
+ /*
|
|
+ * force interrupt enable because of down_interruptible()
|
|
+ */
|
|
+ local_irq_enable();
|
|
+
|
|
+ PFM_DBG("before block sleeping");
|
|
+
|
|
+ /*
|
|
+ * may go through without blocking on SMP systems
|
|
+ * if restart has been received already by the time we call down()
|
|
+ */
|
|
+ ret = wait_for_completion_interruptible(&ctx->restart_complete);
|
|
+
|
|
+ PFM_DBG("after block sleeping ret=%d", ret);
|
|
+
|
|
+ /*
|
|
+ * lock context and mask interrupts again
|
|
+ * We save flags into a dummy because we may have
|
|
+ * altered interrupts mask compared to entry in this
|
|
+ * function.
|
|
+ */
|
|
+ spin_lock_irqsave(&ctx->lock, dummy_flags);
|
|
+
|
|
+ if (ctx->state == PFM_CTX_ZOMBIE)
|
|
+ goto do_zombie;
|
|
+
|
|
+ /*
|
|
+ * in case of interruption of down() we don't restart anything
|
|
+ */
|
|
+ if (ret < 0)
|
|
+ goto nothing_todo;
|
|
+
|
|
+skip_blocking:
|
|
+ /*
|
|
+ * iterate over the number of pending resets
|
|
+ * There are certain situations where there may be
|
|
+ * multiple notifications sent before a pfm_restart().
|
|
+ * As such, it may be that multiple pfm_restart() are
|
|
+ * issued before the monitored thread gets to
|
|
+ * pfm_handle_work(). To avoid losing restarts, pfm_restart()
|
|
+ * increments a counter (reset_counts). Here, we take this
|
|
+ * into account by potentially calling pfm_resume_after_ovfl()
|
|
+ * multiple times. It is up to the sampling format to take the
|
|
+ * appropriate actions.
|
|
+ */
|
|
+ while (ctx->flags.reset_count) {
|
|
+ pfm_resume_after_ovfl(ctx);
|
|
+ /* careful as active set may have changed */
|
|
+ ctx->flags.reset_count--;
|
|
+ }
|
|
+
|
|
+nothing_todo:
|
|
+ /*
|
|
+ * restore flags as they were upon entry
|
|
+ */
|
|
+ spin_unlock_irqrestore(&ctx->lock, flags);
|
|
+ return;
|
|
+
|
|
+do_zombie:
|
|
+ PFM_DBG("context is zombie, bailing out");
|
|
+
|
|
+ __pfm_unload_context(ctx, &info);
|
|
+
|
|
+ /*
|
|
+ * keep the spinlock check happy
|
|
+ */
|
|
+ spin_unlock(&ctx->lock);
|
|
+
|
|
+ /*
|
|
+ * enable interrupt for vfree()
|
|
+ */
|
|
+ local_irq_enable();
|
|
+
|
|
+ /*
|
|
+ * cancel timer now that context is unlocked
|
|
+ */
|
|
+ if (info & 0x2) {
|
|
+ ret = hrtimer_cancel(&__get_cpu_var(pfm_hrtimer));
|
|
+ PFM_DBG("timeout cancel=%d", ret);
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * actual context free
|
|
+ */
|
|
+ pfm_free_context(ctx);
|
|
+
|
|
+ /*
|
|
+ * restore interrupts as they were upon entry
|
|
+ */
|
|
+ local_irq_restore(flags);
|
|
+
|
|
+ /* always true */
|
|
+ if (info & 0x1)
|
|
+ pfm_session_release(0, 0);
|
|
+}
|
|
+
|
|
+/**
|
|
+ * __pfm_restart - resume monitoring after user-level notification
|
|
+ * @ctx: context to operate on
|
|
+ * @info: return information used to free resource once unlocked
|
|
+ *
|
|
+ * function called from sys_pfm_restart(). It is used when overflow
|
|
+ * notification is requested. For each notification received, the user
|
|
+ * must call pfm_restart() to indicate to the kernel that it is done
|
|
+ * processing the notification.
|
|
+ *
|
|
+ * When the caller is doing user level sampling, this function resets
|
|
+ * the overflowed counters and resumes monitoring which is normally stopped
|
|
+ * during notification (always the consequence of a counter overflow).
|
|
+ *
|
|
+ * When using a sampling format, the format restart() callback is invoked,
|
|
+ * overflowed PMDS may be reset based upon decision from sampling format.
|
|
+ *
|
|
+ * When operating in per-thread mode, and when not self-monitoring, the
|
|
+ * monitored thread DOES NOT need to be stopped, unlike for many other calls.
|
|
+ *
|
|
+ * This means that the effect of the restart may not necessarily be observed
|
|
+ * right when returning from the call. For instance, counters may not already
|
|
+ * be reset in the other thread.
|
|
+ *
|
|
+ * When operating in system-wide, the caller must be running on the monitored
|
|
+ * CPU.
|
|
+ *
|
|
+ * The context is locked and interrupts are disabled.
|
|
+ *
|
|
+ * info value upon return:
|
|
+ * - bit 0: when set, mudt issue complete() on restart semaphore
|
|
+ */
|
|
+int __pfm_restart(struct pfm_context *ctx, int *info)
|
|
+{
|
|
+ int state;
|
|
+
|
|
+ state = ctx->state;
|
|
+
|
|
+ PFM_DBG("state=%d can_restart=%d reset_count=%d",
|
|
+ state,
|
|
+ ctx->flags.can_restart,
|
|
+ ctx->flags.reset_count);
|
|
+
|
|
+ *info = 0;
|
|
+
|
|
+ switch (state) {
|
|
+ case PFM_CTX_MASKED:
|
|
+ break;
|
|
+ case PFM_CTX_LOADED:
|
|
+ if (ctx->smpl_addr && ctx->smpl_fmt->fmt_restart)
|
|
+ break;
|
|
+ default:
|
|
+ PFM_DBG("invalid state=%d", state);
|
|
+ return -EBUSY;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * first check if allowed to restart, i.e., notifications received
|
|
+ */
|
|
+ if (!ctx->flags.can_restart) {
|
|
+ PFM_DBG("no restart can_restart=0");
|
|
+ return -EBUSY;
|
|
+ }
|
|
+
|
|
+ pfm_stats_inc(pfm_restart_count);
|
|
+
|
|
+ /*
|
|
+ * at this point, the context is either LOADED or MASKED
|
|
+ */
|
|
+ ctx->flags.can_restart--;
|
|
+
|
|
+ /*
|
|
+ * handle self-monitoring case and system-wide
|
|
+ */
|
|
+ if (ctx->task == current || ctx->flags.system) {
|
|
+ pfm_resume_after_ovfl(ctx);
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * restart another task
|
|
+ */
|
|
+
|
|
+ /*
|
|
+ * if blocking, then post the semaphore if PFM_CTX_MASKED, i.e.
|
|
+ * the task is blocked or on its way to block. That's the normal
|
|
+ * restart path. If the monitoring is not masked, then the task
|
|
+ * can be actively monitoring and we cannot directly intervene.
|
|
+ * Therefore we use the trap mechanism to catch the task and
|
|
+ * force it to reset the buffer/reset PMDs.
|
|
+ *
|
|
+ * if non-blocking, then we ensure that the task will go into
|
|
+ * pfm_handle_work() before returning to user mode.
|
|
+ *
|
|
+ * We cannot explicitly reset another task, it MUST always
|
|
+ * be done by the task itself. This works for system wide because
|
|
+ * the tool that is controlling the session is logically doing
|
|
+ * "self-monitoring".
|
|
+ */
|
|
+ if (ctx->flags.block && state == PFM_CTX_MASKED) {
|
|
+ PFM_DBG("unblocking [%d]", ctx->task->pid);
|
|
+ /*
|
|
+ * It is not possible to call complete() with the context locked
|
|
+ * otherwise we have a potential deadlock with the PMU context
|
|
+ * switch code due to a lock inversion between task_rq_lock()
|
|
+ * and the context lock.
|
|
+ * Instead we mark whether or not we need to issue the complete
|
|
+ * and we invoke the function once the context lock is released
|
|
+ * in sys_pfm_restart()
|
|
+ */
|
|
+ *info = 1;
|
|
+ } else {
|
|
+ PFM_DBG("[%d] armed exit trap", ctx->task->pid);
|
|
+ pfm_post_work(ctx->task, ctx, PFM_WORK_RESET);
|
|
+ }
|
|
+ ctx->flags.reset_count++;
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_get_smpl_arg -- copy user arguments to pfm_create_context() related to sampling format
|
|
+ * @name: format name as passed by user
|
|
+ * @fmt_arg: format optional argument as passed by user
|
|
+ * @uszie: size of structure pass in fmt_arg
|
|
+ * @arg: kernel copy of fmt_arg
|
|
+ * @fmt: pointer to sampling format upon success
|
|
+ *
|
|
+ * arg is kmalloc'ed, thus it needs a kfree by caller
|
|
+ */
|
|
+int pfm_get_smpl_arg(char __user *fmt_uname, void __user *fmt_uarg, size_t usize, void **arg,
|
|
+ struct pfm_smpl_fmt **fmt)
|
|
+{
|
|
+ struct pfm_smpl_fmt *f;
|
|
+ char *fmt_name;
|
|
+ void *addr = NULL;
|
|
+ size_t sz;
|
|
+ int ret;
|
|
+
|
|
+ fmt_name = getname(fmt_uname);
|
|
+ if (!fmt_name) {
|
|
+ PFM_DBG("getname failed");
|
|
+ return -ENOMEM;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * find fmt and increase refcount
|
|
+ */
|
|
+ f = pfm_smpl_fmt_get(fmt_name);
|
|
+
|
|
+ putname(fmt_name);
|
|
+
|
|
+ if (f == NULL) {
|
|
+ PFM_DBG("buffer format not found");
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * expected format argument size
|
|
+ */
|
|
+ sz = f->fmt_arg_size;
|
|
+
|
|
+ /*
|
|
+ * check user size matches expected size
|
|
+ * usize = -1 is for IA-64 backward compatibility
|
|
+ */
|
|
+ ret = -EINVAL;
|
|
+ if (sz != usize && usize != -1) {
|
|
+ PFM_DBG("invalid arg size %zu, format expects %zu",
|
|
+ usize, sz);
|
|
+ goto error;
|
|
+ }
|
|
+
|
|
+ if (sz) {
|
|
+ ret = -ENOMEM;
|
|
+ addr = kmalloc(sz, GFP_KERNEL);
|
|
+ if (addr == NULL)
|
|
+ goto error;
|
|
+
|
|
+ ret = -EFAULT;
|
|
+ if (copy_from_user(addr, fmt_uarg, sz))
|
|
+ goto error;
|
|
+ }
|
|
+ *arg = addr;
|
|
+ *fmt = f;
|
|
+ return 0;
|
|
+
|
|
+error:
|
|
+ kfree(addr);
|
|
+ pfm_smpl_fmt_put(f);
|
|
+ return ret;
|
|
+}
|
|
diff --git a/perfmon/perfmon_syscalls.c b/perfmon/perfmon_syscalls.c
|
|
new file mode 100644
|
|
index 0000000..8777b58
|
|
--- /dev/null
|
|
+++ b/perfmon/perfmon_syscalls.c
|
|
@@ -0,0 +1,1060 @@
|
|
+/*
|
|
+ * perfmon_syscalls.c: perfmon2 system call interface
|
|
+ *
|
|
+ * This file implements the perfmon2 interface which
|
|
+ * provides access to the hardware performance counters
|
|
+ * of the host processor.
|
|
+ *
|
|
+ * The initial version of perfmon.c was written by
|
|
+ * Ganesh Venkitachalam, IBM Corp.
|
|
+ *
|
|
+ * Then it was modified for perfmon-1.x by Stephane Eranian and
|
|
+ * David Mosberger, Hewlett Packard Co.
|
|
+ *
|
|
+ * Version Perfmon-2.x is a complete rewrite of perfmon-1.x
|
|
+ * by Stephane Eranian, Hewlett Packard Co.
|
|
+ *
|
|
+ * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P.
|
|
+ * Contributed by Stephane Eranian <eranian@hpl.hp.com>
|
|
+ * David Mosberger-Tang <davidm@hpl.hp.com>
|
|
+ *
|
|
+ * More information about perfmon available at:
|
|
+ * http://perfmon2.sf.net
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of version 2 of the GNU General Public
|
|
+ * License as published by the Free Software Foundation.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ * General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
+ * 02111-1307 USA
|
|
+ */
|
|
+#include <linux/kernel.h>
|
|
+#include <linux/fs.h>
|
|
+#include <linux/ptrace.h>
|
|
+#include <linux/perfmon_kern.h>
|
|
+#include <linux/uaccess.h>
|
|
+#include "perfmon_priv.h"
|
|
+
|
|
+/*
|
|
+ * Context locking rules:
|
|
+ * ---------------------
|
|
+ * - any thread with access to the file descriptor of a context can
|
|
+ * potentially issue perfmon calls
|
|
+ *
|
|
+ * - calls must be serialized to guarantee correctness
|
|
+ *
|
|
+ * - as soon as a context is attached to a thread or CPU, it may be
|
|
+ * actively monitoring. On some architectures, such as IA-64, this
|
|
+ * is true even though the pfm_start() call has not been made. This
|
|
+ * comes from the fact that on some architectures, it is possible to
|
|
+ * start/stop monitoring from userland.
|
|
+ *
|
|
+ * - If monitoring is active, then there can PMU interrupts. Because
|
|
+ * context accesses must be serialized, the perfmon system calls
|
|
+ * must mask interrupts as soon as the context is attached.
|
|
+ *
|
|
+ * - perfmon system calls that operate with the context unloaded cannot
|
|
+ * assume it is actually unloaded when they are called. They first need
|
|
+ * to check and for that they need interrupts masked. Then, if the
|
|
+ * context is actually unloaded, they can unmask interrupts.
|
|
+ *
|
|
+ * - interrupt masking holds true for other internal perfmon functions as
|
|
+ * well. Except for PMU interrupt handler because those interrupts
|
|
+ * cannot be nested.
|
|
+ *
|
|
+ * - we mask ALL interrupts instead of just the PMU interrupt because we
|
|
+ * also need to protect against timer interrupts which could trigger
|
|
+ * a set switch.
|
|
+ */
|
|
+#ifdef CONFIG_UTRACE
|
|
+#include <linux/utrace.h>
|
|
+
|
|
+static u32
|
|
+stopper_quiesce(struct utrace_attached_engine *engine, struct task_struct *tsk)
|
|
+{
|
|
+ PFM_DBG("quiesced [%d]", tsk->pid);
|
|
+ complete(engine->data);
|
|
+ return UTRACE_ACTION_RESUME;
|
|
+}
|
|
+
|
|
+void
|
|
+pfm_resume_task(struct task_struct *t, void *data)
|
|
+{
|
|
+ PFM_DBG("utrace detach [%d]", t->pid);
|
|
+ (void) utrace_detach(t, data);
|
|
+}
|
|
+
|
|
+static const struct utrace_engine_ops utrace_ops =
|
|
+{
|
|
+ .report_quiesce = stopper_quiesce,
|
|
+};
|
|
+
|
|
+static int pfm_wait_task_stopped(struct task_struct *task, void **data)
|
|
+{
|
|
+ DECLARE_COMPLETION_ONSTACK(done);
|
|
+ struct utrace_attached_engine *eng;
|
|
+ int ret;
|
|
+
|
|
+ eng = utrace_attach(task, UTRACE_ATTACH_CREATE, &utrace_ops, &done);
|
|
+ if (IS_ERR(eng))
|
|
+ return PTR_ERR(eng);
|
|
+
|
|
+ ret = utrace_set_flags(task, eng,
|
|
+ UTRACE_ACTION_QUIESCE | UTRACE_EVENT(QUIESCE));
|
|
+ PFM_DBG("wait quiesce [%d]", task->pid);
|
|
+ if (!ret)
|
|
+ ret = wait_for_completion_interruptible(&done);
|
|
+
|
|
+ if (ret)
|
|
+ (void) utrace_detach(task, eng);
|
|
+ else
|
|
+ *data = eng;
|
|
+ return 0;
|
|
+}
|
|
+#else /* !CONFIG_UTRACE */
|
|
+static int pfm_wait_task_stopped(struct task_struct *task, void **data)
|
|
+{
|
|
+ int ret;
|
|
+
|
|
+ *data = NULL;
|
|
+
|
|
+ /*
|
|
+ * returns 0 if cannot attach
|
|
+ */
|
|
+ ret = ptrace_may_access(task, PTRACE_MODE_ATTACH);
|
|
+ PFM_DBG("may_attach=%d", ret);
|
|
+ if (!ret)
|
|
+ return -EPERM;
|
|
+
|
|
+ ret = ptrace_check_attach(task, 0);
|
|
+ PFM_DBG("check_attach=%d", ret);
|
|
+ return ret;
|
|
+}
|
|
+void pfm_resume_task(struct task_struct *t, void *data)
|
|
+{}
|
|
+#endif
|
|
+
|
|
+struct pfm_syscall_cookie {
|
|
+ struct file *filp;
|
|
+ int fput_needed;
|
|
+};
|
|
+
|
|
+/*
|
|
+ * cannot attach if :
|
|
+ * - kernel task
|
|
+ * - task not owned by caller (checked by ptrace_may_attach())
|
|
+ * - task is dead or zombie
|
|
+ * - cannot use blocking notification when self-monitoring
|
|
+ */
|
|
+static int pfm_task_incompatible(struct pfm_context *ctx,
|
|
+ struct task_struct *task)
|
|
+{
|
|
+ /*
|
|
+ * cannot attach to a kernel thread
|
|
+ */
|
|
+ if (!task->mm) {
|
|
+ PFM_DBG("cannot attach to kernel thread [%d]", task->pid);
|
|
+ return -EPERM;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * cannot use block on notification when
|
|
+ * self-monitoring.
|
|
+ */
|
|
+ if (ctx->flags.block && task == current) {
|
|
+ PFM_DBG("cannot use block on notification when self-monitoring"
|
|
+ "[%d]", task->pid);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+ /*
|
|
+ * cannot attach to a zombie task
|
|
+ */
|
|
+ if (task->exit_state == EXIT_ZOMBIE || task->exit_state == EXIT_DEAD) {
|
|
+ PFM_DBG("cannot attach to zombie/dead task [%d]", task->pid);
|
|
+ return -EBUSY;
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_get_task -- check permission and acquire task to monitor
|
|
+ * @ctx: perfmon context
|
|
+ * @pid: identification of the task to check
|
|
+ * @task: upon return, a pointer to the task to monitor
|
|
+ *
|
|
+ * This function is used in per-thread mode only AND when not
|
|
+ * self-monitoring. It finds the task to monitor and checks
|
|
+ * that the caller has permissions to attach. It also checks
|
|
+ * that the task is stopped via ptrace so that we can safely
|
|
+ * modify its state.
|
|
+ *
|
|
+ * task refcount is incremented when succesful.
|
|
+ */
|
|
+static int pfm_get_task(struct pfm_context *ctx, pid_t pid,
|
|
+ struct task_struct **task, void **data)
|
|
+{
|
|
+ struct task_struct *p;
|
|
+ int ret = 0, ret1 = 0;
|
|
+
|
|
+ *data = NULL;
|
|
+
|
|
+ /*
|
|
+ * When attaching to another thread we must ensure
|
|
+ * that the thread is actually stopped.
|
|
+ *
|
|
+ * As a consequence, only the ptracing parent can actually
|
|
+ * attach a context to a thread. Obviously, this constraint
|
|
+ * does not exist for self-monitoring threads.
|
|
+ *
|
|
+ * We use ptrace_may_attach() to check for permission.
|
|
+ */
|
|
+ read_lock(&tasklist_lock);
|
|
+
|
|
+ p = find_task_by_vpid(pid);
|
|
+ if (p)
|
|
+ get_task_struct(p);
|
|
+
|
|
+ read_unlock(&tasklist_lock);
|
|
+
|
|
+ if (!p) {
|
|
+ PFM_DBG("task not found %d", pid);
|
|
+ return -ESRCH;
|
|
+ }
|
|
+
|
|
+ ret = pfm_task_incompatible(ctx, p);
|
|
+ if (ret)
|
|
+ goto error;
|
|
+
|
|
+ ret = pfm_wait_task_stopped(p, data);
|
|
+ if (ret)
|
|
+ goto error;
|
|
+
|
|
+ *task = p;
|
|
+
|
|
+ return 0;
|
|
+error:
|
|
+ if (!(ret1 || ret))
|
|
+ ret = -EPERM;
|
|
+
|
|
+ put_task_struct(p);
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * context must be locked when calling this function
|
|
+ */
|
|
+int pfm_check_task_state(struct pfm_context *ctx, int check_mask,
|
|
+ unsigned long *flags, void **resume)
|
|
+{
|
|
+ struct task_struct *task;
|
|
+ unsigned long local_flags, new_flags;
|
|
+ int state, ret;
|
|
+
|
|
+ *resume = NULL;
|
|
+
|
|
+recheck:
|
|
+ /*
|
|
+ * task is NULL for system-wide context
|
|
+ */
|
|
+ task = ctx->task;
|
|
+ state = ctx->state;
|
|
+ local_flags = *flags;
|
|
+
|
|
+ PFM_DBG("state=%d check_mask=0x%x", state, check_mask);
|
|
+ /*
|
|
+ * if the context is detached, then we do not touch
|
|
+ * hardware, therefore there is not restriction on when we can
|
|
+ * access it.
|
|
+ */
|
|
+ if (state == PFM_CTX_UNLOADED)
|
|
+ return 0;
|
|
+ /*
|
|
+ * no command can operate on a zombie context.
|
|
+ * A context becomes zombie when the file that identifies
|
|
+ * it is closed while the context is still attached to the
|
|
+ * thread it monitors.
|
|
+ */
|
|
+ if (state == PFM_CTX_ZOMBIE)
|
|
+ return -EINVAL;
|
|
+
|
|
+ /*
|
|
+ * at this point, state is PFM_CTX_LOADED or PFM_CTX_MASKED
|
|
+ */
|
|
+
|
|
+ /*
|
|
+ * some commands require the context to be unloaded to operate
|
|
+ */
|
|
+ if (check_mask & PFM_CMD_UNLOADED) {
|
|
+ PFM_DBG("state=%d, cmd needs context unloaded", state);
|
|
+ return -EBUSY;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * self-monitoring always ok.
|
|
+ */
|
|
+ if (task == current)
|
|
+ return 0;
|
|
+
|
|
+ /*
|
|
+ * for syswide, the calling thread must be running on the cpu
|
|
+ * the context is bound to.
|
|
+ */
|
|
+ if (ctx->flags.system) {
|
|
+ if (ctx->cpu != smp_processor_id())
|
|
+ return -EBUSY;
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * at this point, monitoring another thread
|
|
+ */
|
|
+
|
|
+ /*
|
|
+ * the pfm_unload_context() command is allowed on masked context
|
|
+ */
|
|
+ if (state == PFM_CTX_MASKED && !(check_mask & PFM_CMD_UNLOAD))
|
|
+ return 0;
|
|
+
|
|
+ /*
|
|
+ * When we operate on another thread, we must wait for it to be
|
|
+ * stopped and completely off any CPU as we need to access the
|
|
+ * PMU state (or machine state).
|
|
+ *
|
|
+ * A thread can be put in the STOPPED state in various ways
|
|
+ * including PTRACE_ATTACH, or when it receives a SIGSTOP signal.
|
|
+ * We enforce that the thread must be ptraced, so it is stopped
|
|
+ * AND it CANNOT wake up while we operate on it because this
|
|
+ * would require an action from the ptracing parent which is the
|
|
+ * thread that is calling this function.
|
|
+ *
|
|
+ * The dependency on ptrace, imposes that only the ptracing
|
|
+ * parent can issue command on a thread. This is unfortunate
|
|
+ * but we do not know of a better way of doing this.
|
|
+ */
|
|
+ if (check_mask & PFM_CMD_STOPPED) {
|
|
+
|
|
+ spin_unlock_irqrestore(&ctx->lock, local_flags);
|
|
+
|
|
+ /*
|
|
+ * check that the thread is ptraced AND STOPPED
|
|
+ */
|
|
+ ret = pfm_wait_task_stopped(task, resume);
|
|
+
|
|
+ spin_lock_irqsave(&ctx->lock, new_flags);
|
|
+
|
|
+ /*
|
|
+ * flags may be different than when we released the lock
|
|
+ */
|
|
+ *flags = new_flags;
|
|
+
|
|
+ if (ret)
|
|
+ return ret;
|
|
+ /*
|
|
+ * we must recheck to verify if state has changed
|
|
+ */
|
|
+ if (unlikely(ctx->state != state)) {
|
|
+ PFM_DBG("old_state=%d new_state=%d",
|
|
+ state,
|
|
+ ctx->state);
|
|
+ goto recheck;
|
|
+ }
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * pfm_get_args - Function used to copy the syscall argument into kernel memory.
|
|
+ * @ureq: user argument
|
|
+ * @sz: user argument size
|
|
+ * @lsz: size of stack buffer
|
|
+ * @laddr: stack buffer address
|
|
+ * @req: point to start of kernel copy of the argument
|
|
+ * @ptr_free: address of kernel copy to free
|
|
+ *
|
|
+ * There are two options:
|
|
+ * - use a stack buffer described by laddr (addresses) and lsz (size)
|
|
+ * - allocate memory
|
|
+ *
|
|
+ * return:
|
|
+ * < 0 : in case of error (ptr_free may not be updated)
|
|
+ * 0 : success
|
|
+ * - req: points to base of kernel copy of arguments
|
|
+ * - ptr_free: address of buffer to free by caller on exit.
|
|
+ * NULL if using the stack buffer
|
|
+ *
|
|
+ * when ptr_free is not NULL upon return, the caller must kfree()
|
|
+ */
|
|
+int pfm_get_args(void __user *ureq, size_t sz, size_t lsz, void *laddr,
|
|
+ void **req, void **ptr_free)
|
|
+{
|
|
+ void *addr;
|
|
+
|
|
+ /*
|
|
+ * check syadmin argument limit
|
|
+ */
|
|
+ if (unlikely(sz > pfm_controls.arg_mem_max)) {
|
|
+ PFM_DBG("argument too big %zu max=%zu",
|
|
+ sz,
|
|
+ pfm_controls.arg_mem_max);
|
|
+ return -E2BIG;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * check if vector fits on stack buffer
|
|
+ */
|
|
+ if (sz > lsz) {
|
|
+ addr = kmalloc(sz, GFP_KERNEL);
|
|
+ if (unlikely(addr == NULL))
|
|
+ return -ENOMEM;
|
|
+ *ptr_free = addr;
|
|
+ } else {
|
|
+ addr = laddr;
|
|
+ *req = laddr;
|
|
+ *ptr_free = NULL;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * bring the data in
|
|
+ */
|
|
+ if (unlikely(copy_from_user(addr, ureq, sz))) {
|
|
+ if (addr != laddr)
|
|
+ kfree(addr);
|
|
+ return -EFAULT;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * base address of kernel buffer
|
|
+ */
|
|
+ *req = addr;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_acquire_ctx_from_fd -- get ctx from file descriptor
|
|
+ * @fd: file descriptor
|
|
+ * @ctx: pointer to pointer of context updated on return
|
|
+ * @cookie: opaque structure to use for release
|
|
+ *
|
|
+ * This helper function extracts the ctx from the file descriptor.
|
|
+ * It also increments the refcount of the file structure. Thus
|
|
+ * it updates the cookie so the refcount can be decreased when
|
|
+ * leaving the perfmon syscall via pfm_release_ctx_from_fd
|
|
+ */
|
|
+static int pfm_acquire_ctx_from_fd(int fd, struct pfm_context **ctx,
|
|
+ struct pfm_syscall_cookie *cookie)
|
|
+{
|
|
+ struct file *filp;
|
|
+ int fput_needed;
|
|
+
|
|
+ filp = fget_light(fd, &fput_needed);
|
|
+ if (unlikely(filp == NULL)) {
|
|
+ PFM_DBG("invalid fd %d", fd);
|
|
+ return -EBADF;
|
|
+ }
|
|
+
|
|
+ *ctx = filp->private_data;
|
|
+
|
|
+ if (unlikely(!*ctx || filp->f_op != &pfm_file_ops)) {
|
|
+ PFM_DBG("fd %d not related to perfmon", fd);
|
|
+ return -EBADF;
|
|
+ }
|
|
+ cookie->filp = filp;
|
|
+ cookie->fput_needed = fput_needed;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * pfm_release_ctx_from_fd -- decrease refcount of file associated with context
|
|
+ * @cookie: the cookie structure initialized by pfm_acquire_ctx_from_fd
|
|
+ */
|
|
+static inline void pfm_release_ctx_from_fd(struct pfm_syscall_cookie *cookie)
|
|
+{
|
|
+ fput_light(cookie->filp, cookie->fput_needed);
|
|
+}
|
|
+
|
|
+/*
|
|
+ * unlike the other perfmon system calls, this one returns a file descriptor
|
|
+ * or a value < 0 in case of error, very much like open() or socket()
|
|
+ */
|
|
+asmlinkage long sys_pfm_create_context(struct pfarg_ctx __user *ureq,
|
|
+ char __user *fmt_name,
|
|
+ void __user *fmt_uarg, size_t fmt_size)
|
|
+{
|
|
+ struct pfarg_ctx req;
|
|
+ struct pfm_smpl_fmt *fmt = NULL;
|
|
+ void *fmt_arg = NULL;
|
|
+ int ret;
|
|
+
|
|
+ PFM_DBG("req=%p fmt=%p fmt_arg=%p size=%zu",
|
|
+ ureq, fmt_name, fmt_uarg, fmt_size);
|
|
+
|
|
+ if (perfmon_disabled)
|
|
+ return -ENOSYS;
|
|
+
|
|
+ if (copy_from_user(&req, ureq, sizeof(req)))
|
|
+ return -EFAULT;
|
|
+
|
|
+ if (fmt_name) {
|
|
+ ret = pfm_get_smpl_arg(fmt_name, fmt_uarg, fmt_size, &fmt_arg, &fmt);
|
|
+ if (ret)
|
|
+ goto abort;
|
|
+ }
|
|
+
|
|
+ ret = __pfm_create_context(&req, fmt, fmt_arg, PFM_NORMAL, NULL);
|
|
+
|
|
+ kfree(fmt_arg);
|
|
+abort:
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+asmlinkage long sys_pfm_write_pmcs(int fd, struct pfarg_pmc __user *ureq, int count)
|
|
+{
|
|
+ struct pfm_context *ctx;
|
|
+ struct task_struct *task;
|
|
+ struct pfm_syscall_cookie cookie;
|
|
+ struct pfarg_pmc pmcs[PFM_PMC_STK_ARG];
|
|
+ struct pfarg_pmc *req;
|
|
+ void *fptr, *resume;
|
|
+ unsigned long flags;
|
|
+ size_t sz;
|
|
+ int ret;
|
|
+
|
|
+ PFM_DBG("fd=%d req=%p count=%d", fd, ureq, count);
|
|
+
|
|
+ if (count < 0 || count >= PFM_MAX_ARG_COUNT(ureq)) {
|
|
+ PFM_DBG("invalid arg count %d", count);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ sz = count*sizeof(*ureq);
|
|
+
|
|
+ ret = pfm_acquire_ctx_from_fd(fd, &ctx, &cookie);
|
|
+ if (ret)
|
|
+ return ret;
|
|
+
|
|
+ ret = pfm_get_args(ureq, sz, sizeof(pmcs), pmcs, (void **)&req, &fptr);
|
|
+ if (ret)
|
|
+ goto error;
|
|
+
|
|
+ spin_lock_irqsave(&ctx->lock, flags);
|
|
+
|
|
+ task = ctx->task;
|
|
+
|
|
+ ret = pfm_check_task_state(ctx, PFM_CMD_STOPPED, &flags, &resume);
|
|
+ if (!ret)
|
|
+ ret = __pfm_write_pmcs(ctx, req, count);
|
|
+
|
|
+ spin_unlock_irqrestore(&ctx->lock, flags);
|
|
+
|
|
+ if (resume)
|
|
+ pfm_resume_task(task, resume);
|
|
+
|
|
+ /*
|
|
+ * This function may be on the critical path.
|
|
+ * We want to avoid the branch if unecessary.
|
|
+ */
|
|
+ if (fptr)
|
|
+ kfree(fptr);
|
|
+error:
|
|
+ pfm_release_ctx_from_fd(&cookie);
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+asmlinkage long sys_pfm_write_pmds(int fd, struct pfarg_pmd __user *ureq, int count)
|
|
+{
|
|
+ struct pfm_context *ctx;
|
|
+ struct task_struct *task;
|
|
+ struct pfm_syscall_cookie cookie;
|
|
+ struct pfarg_pmd pmds[PFM_PMD_STK_ARG];
|
|
+ struct pfarg_pmd *req;
|
|
+ void *fptr, *resume;
|
|
+ unsigned long flags;
|
|
+ size_t sz;
|
|
+ int ret;
|
|
+
|
|
+ PFM_DBG("fd=%d req=%p count=%d", fd, ureq, count);
|
|
+
|
|
+ if (count < 0 || count >= PFM_MAX_ARG_COUNT(ureq)) {
|
|
+ PFM_DBG("invalid arg count %d", count);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ sz = count*sizeof(*ureq);
|
|
+
|
|
+ ret = pfm_acquire_ctx_from_fd(fd, &ctx, &cookie);
|
|
+ if (ret)
|
|
+ return ret;
|
|
+
|
|
+ ret = pfm_get_args(ureq, sz, sizeof(pmds), pmds, (void **)&req, &fptr);
|
|
+ if (ret)
|
|
+ goto error;
|
|
+
|
|
+ spin_lock_irqsave(&ctx->lock, flags);
|
|
+
|
|
+ task = ctx->task;
|
|
+
|
|
+ ret = pfm_check_task_state(ctx, PFM_CMD_STOPPED, &flags, &resume);
|
|
+ if (!ret)
|
|
+ ret = __pfm_write_pmds(ctx, req, count, 0);
|
|
+
|
|
+ spin_unlock_irqrestore(&ctx->lock, flags);
|
|
+
|
|
+ if (resume)
|
|
+ pfm_resume_task(task, resume);
|
|
+
|
|
+ if (fptr)
|
|
+ kfree(fptr);
|
|
+error:
|
|
+ pfm_release_ctx_from_fd(&cookie);
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+asmlinkage long sys_pfm_read_pmds(int fd, struct pfarg_pmd __user *ureq, int count)
|
|
+{
|
|
+ struct pfm_context *ctx;
|
|
+ struct task_struct *task;
|
|
+ struct pfm_syscall_cookie cookie;
|
|
+ struct pfarg_pmd pmds[PFM_PMD_STK_ARG];
|
|
+ struct pfarg_pmd *req;
|
|
+ void *fptr, *resume;
|
|
+ unsigned long flags;
|
|
+ size_t sz;
|
|
+ int ret;
|
|
+
|
|
+ PFM_DBG("fd=%d req=%p count=%d", fd, ureq, count);
|
|
+
|
|
+ if (count < 0 || count >= PFM_MAX_ARG_COUNT(ureq))
|
|
+ return -EINVAL;
|
|
+
|
|
+ sz = count*sizeof(*ureq);
|
|
+
|
|
+ ret = pfm_acquire_ctx_from_fd(fd, &ctx, &cookie);
|
|
+ if (ret)
|
|
+ return ret;
|
|
+
|
|
+ ret = pfm_get_args(ureq, sz, sizeof(pmds), pmds, (void **)&req, &fptr);
|
|
+ if (ret)
|
|
+ goto error;
|
|
+
|
|
+ spin_lock_irqsave(&ctx->lock, flags);
|
|
+
|
|
+ task = ctx->task;
|
|
+
|
|
+ ret = pfm_check_task_state(ctx, PFM_CMD_STOPPED, &flags, &resume);
|
|
+ if (!ret)
|
|
+ ret = __pfm_read_pmds(ctx, req, count);
|
|
+
|
|
+ spin_unlock_irqrestore(&ctx->lock, flags);
|
|
+
|
|
+ if (copy_to_user(ureq, req, sz))
|
|
+ ret = -EFAULT;
|
|
+
|
|
+ if (resume)
|
|
+ pfm_resume_task(task, resume);
|
|
+
|
|
+ if (fptr)
|
|
+ kfree(fptr);
|
|
+error:
|
|
+ pfm_release_ctx_from_fd(&cookie);
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+asmlinkage long sys_pfm_restart(int fd)
|
|
+{
|
|
+ struct pfm_context *ctx;
|
|
+ struct task_struct *task;
|
|
+ struct pfm_syscall_cookie cookie;
|
|
+ void *resume;
|
|
+ unsigned long flags;
|
|
+ int ret, info;
|
|
+
|
|
+ PFM_DBG("fd=%d", fd);
|
|
+
|
|
+ ret = pfm_acquire_ctx_from_fd(fd, &ctx, &cookie);
|
|
+ if (ret)
|
|
+ return ret;
|
|
+
|
|
+ spin_lock_irqsave(&ctx->lock, flags);
|
|
+
|
|
+ task = ctx->task;
|
|
+
|
|
+ ret = pfm_check_task_state(ctx, 0, &flags, &resume);
|
|
+ if (!ret)
|
|
+ ret = __pfm_restart(ctx, &info);
|
|
+
|
|
+ spin_unlock_irqrestore(&ctx->lock, flags);
|
|
+
|
|
+ if (resume)
|
|
+ pfm_resume_task(task, resume);
|
|
+ /*
|
|
+ * In per-thread mode with blocking notification, i.e.
|
|
+ * ctx->flags.blocking=1, we need to defer issuing the
|
|
+ * complete to unblock the blocked monitored thread.
|
|
+ * Otherwise we have a potential deadlock due to a lock
|
|
+ * inversion between the context lock and the task_rq_lock()
|
|
+ * which can happen if one thread is in this call and the other
|
|
+ * (the monitored thread) is in the context switch code.
|
|
+ *
|
|
+ * It is safe to access the context outside the critical section
|
|
+ * because:
|
|
+ * - we are protected by the fget_light(), thus the context
|
|
+ * cannot disappear
|
|
+ */
|
|
+ if (ret == 0 && info == 1)
|
|
+ complete(&ctx->restart_complete);
|
|
+
|
|
+ pfm_release_ctx_from_fd(&cookie);
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+asmlinkage long sys_pfm_stop(int fd)
|
|
+{
|
|
+ struct pfm_context *ctx;
|
|
+ struct task_struct *task;
|
|
+ struct pfm_syscall_cookie cookie;
|
|
+ void *resume;
|
|
+ unsigned long flags;
|
|
+ int ret;
|
|
+ int release_info;
|
|
+
|
|
+ PFM_DBG("fd=%d", fd);
|
|
+
|
|
+ ret = pfm_acquire_ctx_from_fd(fd, &ctx, &cookie);
|
|
+ if (ret)
|
|
+ return ret;
|
|
+
|
|
+ spin_lock_irqsave(&ctx->lock, flags);
|
|
+
|
|
+ task = ctx->task;
|
|
+
|
|
+ ret = pfm_check_task_state(ctx, PFM_CMD_STOPPED, &flags, &resume);
|
|
+ if (!ret)
|
|
+ ret = __pfm_stop(ctx, &release_info);
|
|
+
|
|
+ spin_unlock_irqrestore(&ctx->lock, flags);
|
|
+
|
|
+ if (resume)
|
|
+ pfm_resume_task(task, resume);
|
|
+
|
|
+ /*
|
|
+ * defer cancellation of timer to avoid race
|
|
+ * with pfm_handle_switch_timeout()
|
|
+ *
|
|
+ * applies only when self-monitoring
|
|
+ */
|
|
+ if (release_info & 0x2)
|
|
+ hrtimer_cancel(&__get_cpu_var(pfm_hrtimer));
|
|
+
|
|
+ pfm_release_ctx_from_fd(&cookie);
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+asmlinkage long sys_pfm_start(int fd, struct pfarg_start __user *ureq)
|
|
+{
|
|
+ struct pfm_context *ctx;
|
|
+ struct task_struct *task;
|
|
+ struct pfm_syscall_cookie cookie;
|
|
+ void *resume;
|
|
+ struct pfarg_start req;
|
|
+ unsigned long flags;
|
|
+ int ret;
|
|
+
|
|
+ PFM_DBG("fd=%d req=%p", fd, ureq);
|
|
+
|
|
+ ret = pfm_acquire_ctx_from_fd(fd, &ctx, &cookie);
|
|
+ if (ret)
|
|
+ return ret;
|
|
+
|
|
+ /*
|
|
+ * the one argument is actually optional
|
|
+ */
|
|
+ if (ureq && copy_from_user(&req, ureq, sizeof(req)))
|
|
+ return -EFAULT;
|
|
+
|
|
+ spin_lock_irqsave(&ctx->lock, flags);
|
|
+
|
|
+ task = ctx->task;
|
|
+
|
|
+ ret = pfm_check_task_state(ctx, PFM_CMD_STOPPED, &flags, &resume);
|
|
+ if (!ret)
|
|
+ ret = __pfm_start(ctx, ureq ? &req : NULL);
|
|
+
|
|
+ spin_unlock_irqrestore(&ctx->lock, flags);
|
|
+
|
|
+ if (resume)
|
|
+ pfm_resume_task(task, resume);
|
|
+
|
|
+ pfm_release_ctx_from_fd(&cookie);
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+asmlinkage long sys_pfm_load_context(int fd, struct pfarg_load __user *ureq)
|
|
+{
|
|
+ struct pfm_context *ctx;
|
|
+ struct task_struct *task;
|
|
+ struct pfm_syscall_cookie cookie;
|
|
+ void *resume, *dummy_resume;
|
|
+ unsigned long flags;
|
|
+ struct pfarg_load req;
|
|
+ int ret;
|
|
+
|
|
+ PFM_DBG("fd=%d req=%p", fd, ureq);
|
|
+
|
|
+ if (copy_from_user(&req, ureq, sizeof(req)))
|
|
+ return -EFAULT;
|
|
+
|
|
+ ret = pfm_acquire_ctx_from_fd(fd, &ctx, &cookie);
|
|
+ if (ret)
|
|
+ return ret;
|
|
+
|
|
+ task = current;
|
|
+
|
|
+ /*
|
|
+ * in per-thread mode (not self-monitoring), get a reference
|
|
+ * on task to monitor. This must be done with interrupts enabled
|
|
+ * Upon succesful return, refcount on task is increased.
|
|
+ *
|
|
+ * fget_light() is protecting the context.
|
|
+ */
|
|
+ if (!ctx->flags.system && req.load_pid != current->pid) {
|
|
+ ret = pfm_get_task(ctx, req.load_pid, &task, &resume);
|
|
+ if (ret)
|
|
+ goto error;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * irqsave is required to avoid race in case context is already
|
|
+ * loaded or with switch timeout in the case of self-monitoring
|
|
+ */
|
|
+ spin_lock_irqsave(&ctx->lock, flags);
|
|
+
|
|
+ ret = pfm_check_task_state(ctx, PFM_CMD_UNLOADED, &flags, &dummy_resume);
|
|
+ if (!ret)
|
|
+ ret = __pfm_load_context(ctx, &req, task);
|
|
+
|
|
+ spin_unlock_irqrestore(&ctx->lock, flags);
|
|
+
|
|
+ if (resume)
|
|
+ pfm_resume_task(task, resume);
|
|
+
|
|
+ /*
|
|
+ * in per-thread mode (not self-monitoring), we need
|
|
+ * to decrease refcount on task to monitor:
|
|
+ * - load successful: we have a reference to the task in ctx->task
|
|
+ * - load failed : undo the effect of pfm_get_task()
|
|
+ */
|
|
+ if (task != current)
|
|
+ put_task_struct(task);
|
|
+error:
|
|
+ pfm_release_ctx_from_fd(&cookie);
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+asmlinkage long sys_pfm_unload_context(int fd)
|
|
+{
|
|
+ struct pfm_context *ctx;
|
|
+ struct task_struct *task;
|
|
+ struct pfm_syscall_cookie cookie;
|
|
+ void *resume;
|
|
+ unsigned long flags;
|
|
+ int ret;
|
|
+ int is_system, release_info = 0;
|
|
+ u32 cpu;
|
|
+
|
|
+ PFM_DBG("fd=%d", fd);
|
|
+
|
|
+ ret = pfm_acquire_ctx_from_fd(fd, &ctx, &cookie);
|
|
+ if (ret)
|
|
+ return ret;
|
|
+
|
|
+ is_system = ctx->flags.system;
|
|
+
|
|
+ spin_lock_irqsave(&ctx->lock, flags);
|
|
+
|
|
+ cpu = ctx->cpu;
|
|
+ task = ctx->task;
|
|
+
|
|
+ ret = pfm_check_task_state(ctx, PFM_CMD_STOPPED|PFM_CMD_UNLOAD,
|
|
+ &flags, &resume);
|
|
+ if (!ret)
|
|
+ ret = __pfm_unload_context(ctx, &release_info);
|
|
+
|
|
+ spin_unlock_irqrestore(&ctx->lock, flags);
|
|
+
|
|
+ if (resume)
|
|
+ pfm_resume_task(task, resume);
|
|
+
|
|
+ /*
|
|
+ * cancel time now that context is unlocked
|
|
+ * avoid race with pfm_handle_switch_timeout()
|
|
+ */
|
|
+ if (release_info & 0x2) {
|
|
+ int r;
|
|
+ r = hrtimer_cancel(&__get_cpu_var(pfm_hrtimer));
|
|
+ PFM_DBG("timeout cancel=%d", r);
|
|
+ }
|
|
+
|
|
+ if (release_info & 0x1)
|
|
+ pfm_session_release(is_system, cpu);
|
|
+
|
|
+ pfm_release_ctx_from_fd(&cookie);
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+asmlinkage long sys_pfm_create_evtsets(int fd, struct pfarg_setdesc __user *ureq, int count)
|
|
+{
|
|
+ struct pfm_context *ctx;
|
|
+ struct pfm_syscall_cookie cookie;
|
|
+ struct pfarg_setdesc *req;
|
|
+ void *fptr, *resume;
|
|
+ unsigned long flags;
|
|
+ size_t sz;
|
|
+ int ret;
|
|
+
|
|
+ PFM_DBG("fd=%d req=%p count=%d", fd, ureq, count);
|
|
+
|
|
+ if (count < 0 || count >= PFM_MAX_ARG_COUNT(ureq))
|
|
+ return -EINVAL;
|
|
+
|
|
+ sz = count*sizeof(*ureq);
|
|
+
|
|
+ ret = pfm_acquire_ctx_from_fd(fd, &ctx, &cookie);
|
|
+ if (ret)
|
|
+ return ret;
|
|
+
|
|
+ ret = pfm_get_args(ureq, sz, 0, NULL, (void **)&req, &fptr);
|
|
+ if (ret)
|
|
+ goto error;
|
|
+
|
|
+ /*
|
|
+ * must mask interrupts because we do not know the state of context,
|
|
+ * could be attached and we could be getting PMU interrupts. So
|
|
+ * we mask and lock context and we check and possibly relax masking
|
|
+ */
|
|
+ spin_lock_irqsave(&ctx->lock, flags);
|
|
+
|
|
+ ret = pfm_check_task_state(ctx, PFM_CMD_UNLOADED, &flags, &resume);
|
|
+ if (!ret)
|
|
+ ret = __pfm_create_evtsets(ctx, req, count);
|
|
+
|
|
+ spin_unlock_irqrestore(&ctx->lock, flags);
|
|
+ /*
|
|
+ * context must be unloaded for this command. The resume pointer
|
|
+ * is necessarily NULL, thus no need to call pfm_resume_task()
|
|
+ */
|
|
+ kfree(fptr);
|
|
+
|
|
+error:
|
|
+ pfm_release_ctx_from_fd(&cookie);
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+asmlinkage long sys_pfm_getinfo_evtsets(int fd, struct pfarg_setinfo __user *ureq, int count)
|
|
+{
|
|
+ struct pfm_context *ctx;
|
|
+ struct task_struct *task;
|
|
+ struct pfm_syscall_cookie cookie;
|
|
+ struct pfarg_setinfo *req;
|
|
+ void *fptr, *resume;
|
|
+ unsigned long flags;
|
|
+ size_t sz;
|
|
+ int ret;
|
|
+
|
|
+ PFM_DBG("fd=%d req=%p count=%d", fd, ureq, count);
|
|
+
|
|
+ if (count < 0 || count >= PFM_MAX_ARG_COUNT(ureq))
|
|
+ return -EINVAL;
|
|
+
|
|
+ sz = count*sizeof(*ureq);
|
|
+
|
|
+ ret = pfm_acquire_ctx_from_fd(fd, &ctx, &cookie);
|
|
+ if (ret)
|
|
+ return ret;
|
|
+
|
|
+ ret = pfm_get_args(ureq, sz, 0, NULL, (void **)&req, &fptr);
|
|
+ if (ret)
|
|
+ goto error;
|
|
+
|
|
+ /*
|
|
+ * this command operates even when context is loaded, so we need
|
|
+ * to keep interrupts masked to avoid a race with PMU interrupt
|
|
+ * which may switch the active set
|
|
+ */
|
|
+ spin_lock_irqsave(&ctx->lock, flags);
|
|
+
|
|
+ task = ctx->task;
|
|
+
|
|
+ ret = pfm_check_task_state(ctx, 0, &flags, &resume);
|
|
+ if (!ret)
|
|
+ ret = __pfm_getinfo_evtsets(ctx, req, count);
|
|
+
|
|
+ spin_unlock_irqrestore(&ctx->lock, flags);
|
|
+
|
|
+ if (resume)
|
|
+ pfm_resume_task(task, resume);
|
|
+
|
|
+ if (copy_to_user(ureq, req, sz))
|
|
+ ret = -EFAULT;
|
|
+
|
|
+ kfree(fptr);
|
|
+error:
|
|
+ pfm_release_ctx_from_fd(&cookie);
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+asmlinkage long sys_pfm_delete_evtsets(int fd, struct pfarg_setinfo __user *ureq, int count)
|
|
+{
|
|
+ struct pfm_context *ctx;
|
|
+ struct pfm_syscall_cookie cookie;
|
|
+ struct pfarg_setinfo *req;
|
|
+ void *fptr, *resume;
|
|
+ unsigned long flags;
|
|
+ size_t sz;
|
|
+ int ret;
|
|
+
|
|
+ PFM_DBG("fd=%d req=%p count=%d", fd, ureq, count);
|
|
+
|
|
+ if (count < 0 || count >= PFM_MAX_ARG_COUNT(ureq))
|
|
+ return -EINVAL;
|
|
+
|
|
+ sz = count*sizeof(*ureq);
|
|
+
|
|
+ ret = pfm_acquire_ctx_from_fd(fd, &ctx, &cookie);
|
|
+ if (ret)
|
|
+ return ret;
|
|
+
|
|
+ ret = pfm_get_args(ureq, sz, 0, NULL, (void **)&req, &fptr);
|
|
+ if (ret)
|
|
+ goto error;
|
|
+
|
|
+ /*
|
|
+ * must mask interrupts because we do not know the state of context,
|
|
+ * could be attached and we could be getting PMU interrupts
|
|
+ */
|
|
+ spin_lock_irqsave(&ctx->lock, flags);
|
|
+
|
|
+ ret = pfm_check_task_state(ctx, PFM_CMD_UNLOADED, &flags, &resume);
|
|
+ if (!ret)
|
|
+ ret = __pfm_delete_evtsets(ctx, req, count);
|
|
+
|
|
+ spin_unlock_irqrestore(&ctx->lock, flags);
|
|
+ /*
|
|
+ * context must be unloaded for this command. The resume pointer
|
|
+ * is necessarily NULL, thus no need to call pfm_resume_task()
|
|
+ */
|
|
+ kfree(fptr);
|
|
+
|
|
+error:
|
|
+ pfm_release_ctx_from_fd(&cookie);
|
|
+ return ret;
|
|
+}
|
|
diff --git a/perfmon/perfmon_sysfs.c b/perfmon/perfmon_sysfs.c
|
|
new file mode 100644
|
|
index 0000000..7353c3b
|
|
--- /dev/null
|
|
+++ b/perfmon/perfmon_sysfs.c
|
|
@@ -0,0 +1,525 @@
|
|
+/*
|
|
+ * perfmon_sysfs.c: perfmon2 sysfs interface
|
|
+ *
|
|
+ * This file implements the perfmon2 interface which
|
|
+ * provides access to the hardware performance counters
|
|
+ * of the host processor.
|
|
+ *
|
|
+ * The initial version of perfmon.c was written by
|
|
+ * Ganesh Venkitachalam, IBM Corp.
|
|
+ *
|
|
+ * Then it was modified for perfmon-1.x by Stephane Eranian and
|
|
+ * David Mosberger, Hewlett Packard Co.
|
|
+ *
|
|
+ * Version Perfmon-2.x is a complete rewrite of perfmon-1.x
|
|
+ * by Stephane Eranian, Hewlett Packard Co.
|
|
+ *
|
|
+ * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P.
|
|
+ * Contributed by Stephane Eranian <eranian@hpl.hp.com>
|
|
+ * David Mosberger-Tang <davidm@hpl.hp.com>
|
|
+ *
|
|
+ * More information about perfmon available at:
|
|
+ * http://perfmon2.sf.net
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of version 2 of the GNU General Public
|
|
+ * License as published by the Free Software Foundation.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ * General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
+ * 02111-1307 USA
|
|
+ */
|
|
+#include <linux/kernel.h>
|
|
+#include <linux/module.h> /* for EXPORT_SYMBOL */
|
|
+#include <linux/perfmon_kern.h>
|
|
+#include "perfmon_priv.h"
|
|
+
|
|
+struct pfm_attribute {
|
|
+ struct attribute attr;
|
|
+ ssize_t (*show)(void *, struct pfm_attribute *attr, char *);
|
|
+ ssize_t (*store)(void *, const char *, size_t);
|
|
+};
|
|
+#define to_attr(n) container_of(n, struct pfm_attribute, attr);
|
|
+
|
|
+#define PFM_RO_ATTR(_name, _show) \
|
|
+ struct kobj_attribute attr_##_name = __ATTR(_name, 0444, _show, NULL)
|
|
+
|
|
+#define PFM_RW_ATTR(_name, _show, _store) \
|
|
+ struct kobj_attribute attr_##_name = __ATTR(_name, 0644, _show, _store)
|
|
+
|
|
+#define PFM_ROS_ATTR(_name, _show) \
|
|
+ struct pfm_attribute attr_##_name = __ATTR(_name, 0444, _show, NULL)
|
|
+
|
|
+#define is_attr_name(a, n) (!strcmp((a)->attr.name, n))
|
|
+int pfm_sysfs_add_pmu(struct pfm_pmu_config *pmu);
|
|
+
|
|
+static struct kobject *pfm_kernel_kobj, *pfm_fmt_kobj;
|
|
+static struct kobject *pfm_pmu_kobj;
|
|
+
|
|
+static ssize_t pfm_regs_attr_show(struct kobject *kobj,
|
|
+ struct attribute *attr, char *buf)
|
|
+{
|
|
+ struct pfm_regmap_desc *reg = to_reg(kobj);
|
|
+ struct pfm_attribute *attribute = to_attr(attr);
|
|
+ return attribute->show ? attribute->show(reg, attribute, buf) : -EIO;
|
|
+}
|
|
+
|
|
+static ssize_t pfm_fmt_attr_show(struct kobject *kobj,
|
|
+ struct attribute *attr, char *buf)
|
|
+{
|
|
+ struct pfm_smpl_fmt *fmt = to_smpl_fmt(kobj);
|
|
+ struct pfm_attribute *attribute = to_attr(attr);
|
|
+ return attribute->show ? attribute->show(fmt, attribute, buf) : -EIO;
|
|
+}
|
|
+
|
|
+static struct sysfs_ops pfm_regs_sysfs_ops = {
|
|
+ .show = pfm_regs_attr_show
|
|
+};
|
|
+
|
|
+static struct sysfs_ops pfm_fmt_sysfs_ops = {
|
|
+ .show = pfm_fmt_attr_show
|
|
+};
|
|
+
|
|
+static struct kobj_type pfm_regs_ktype = {
|
|
+ .sysfs_ops = &pfm_regs_sysfs_ops,
|
|
+};
|
|
+
|
|
+static struct kobj_type pfm_fmt_ktype = {
|
|
+ .sysfs_ops = &pfm_fmt_sysfs_ops,
|
|
+};
|
|
+
|
|
+static ssize_t pfm_controls_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
|
|
+{
|
|
+ int base;
|
|
+
|
|
+ if (is_attr_name(attr, "version"))
|
|
+ return snprintf(buf, PAGE_SIZE, "%u.%u\n", PFM_VERSION_MAJ, PFM_VERSION_MIN);
|
|
+
|
|
+ if (is_attr_name(attr, "task_sessions_count"))
|
|
+ return pfm_sysfs_res_show(buf, PAGE_SIZE, 0);
|
|
+
|
|
+ if (is_attr_name(attr, "debug"))
|
|
+ return snprintf(buf, PAGE_SIZE, "%d\n", pfm_controls.debug);
|
|
+
|
|
+ if (is_attr_name(attr, "task_group"))
|
|
+ return snprintf(buf, PAGE_SIZE, "%d\n", pfm_controls.task_group);
|
|
+
|
|
+ if (is_attr_name(attr, "mode"))
|
|
+ return snprintf(buf, PAGE_SIZE, "%d\n", pfm_controls.flags);
|
|
+
|
|
+ if (is_attr_name(attr, "arg_mem_max"))
|
|
+ return snprintf(buf, PAGE_SIZE, "%zu\n", pfm_controls.arg_mem_max);
|
|
+
|
|
+ if (is_attr_name(attr, "syscall")) {
|
|
+ base = pfm_arch_get_base_syscall();
|
|
+ return snprintf(buf, PAGE_SIZE, "%d\n", base);
|
|
+ }
|
|
+
|
|
+ if (is_attr_name(attr, "sys_sessions_count"))
|
|
+ return pfm_sysfs_res_show(buf, PAGE_SIZE, 1);
|
|
+
|
|
+ if (is_attr_name(attr, "smpl_buffer_mem_max"))
|
|
+ return snprintf(buf, PAGE_SIZE, "%zu\n", pfm_controls.smpl_buffer_mem_max);
|
|
+
|
|
+ if (is_attr_name(attr, "smpl_buffer_mem_cur"))
|
|
+ return pfm_sysfs_res_show(buf, PAGE_SIZE, 2);
|
|
+
|
|
+ if (is_attr_name(attr, "sys_group"))
|
|
+ return snprintf(buf, PAGE_SIZE, "%d\n", pfm_controls.sys_group);
|
|
+
|
|
+ /* XXX: could be set to write-only */
|
|
+ if (is_attr_name(attr, "reset_stats")) {
|
|
+ buf[0] = '0';
|
|
+ buf[1] = '\0';
|
|
+ return strnlen(buf, PAGE_SIZE);
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static ssize_t pfm_controls_store(struct kobject *kobj, struct kobj_attribute *attr,
|
|
+ const char *buf, size_t count)
|
|
+{
|
|
+ int i;
|
|
+ size_t d;
|
|
+
|
|
+ if (sscanf(buf, "%zu", &d) != 1)
|
|
+ goto skip;
|
|
+
|
|
+ if (is_attr_name(attr, "debug"))
|
|
+ pfm_controls.debug = d;
|
|
+
|
|
+ if (is_attr_name(attr, "task_group"))
|
|
+ pfm_controls.task_group = d;
|
|
+
|
|
+ if (is_attr_name(attr, "sys_group"))
|
|
+ pfm_controls.sys_group = d;
|
|
+
|
|
+ if (is_attr_name(attr, "mode"))
|
|
+ pfm_controls.flags = d ? PFM_CTRL_FL_RW_EXPERT : 0;
|
|
+
|
|
+ if (is_attr_name(attr, "arg_mem_max")) {
|
|
+ /*
|
|
+ * we impose a page as the minimum.
|
|
+ *
|
|
+ * This limit may be smaller than the stack buffer
|
|
+ * available and that is fine.
|
|
+ */
|
|
+ if (d >= PAGE_SIZE)
|
|
+ pfm_controls.arg_mem_max = d;
|
|
+ }
|
|
+ if (is_attr_name(attr, "reset_stats")) {
|
|
+ for_each_online_cpu(i) {
|
|
+ pfm_reset_stats(i);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if (is_attr_name(attr, "smpl_buffer_mem_max")) {
|
|
+ if (d >= PAGE_SIZE)
|
|
+ pfm_controls.smpl_buffer_mem_max = d;
|
|
+ }
|
|
+skip:
|
|
+ return count;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * /sys/kernel/perfmon attributes
|
|
+ */
|
|
+static PFM_RO_ATTR(version, pfm_controls_show);
|
|
+static PFM_RO_ATTR(task_sessions_count, pfm_controls_show);
|
|
+static PFM_RO_ATTR(syscall, pfm_controls_show);
|
|
+static PFM_RO_ATTR(sys_sessions_count, pfm_controls_show);
|
|
+static PFM_RO_ATTR(smpl_buffer_mem_cur, pfm_controls_show);
|
|
+
|
|
+static PFM_RW_ATTR(debug, pfm_controls_show, pfm_controls_store);
|
|
+static PFM_RW_ATTR(task_group, pfm_controls_show, pfm_controls_store);
|
|
+static PFM_RW_ATTR(mode, pfm_controls_show, pfm_controls_store);
|
|
+static PFM_RW_ATTR(sys_group, pfm_controls_show, pfm_controls_store);
|
|
+static PFM_RW_ATTR(arg_mem_max, pfm_controls_show, pfm_controls_store);
|
|
+static PFM_RW_ATTR(smpl_buffer_mem_max, pfm_controls_show, pfm_controls_store);
|
|
+static PFM_RW_ATTR(reset_stats, pfm_controls_show, pfm_controls_store);
|
|
+
|
|
+static struct attribute *pfm_kernel_attrs[] = {
|
|
+ &attr_version.attr,
|
|
+ &attr_syscall.attr,
|
|
+ &attr_task_sessions_count.attr,
|
|
+ &attr_sys_sessions_count.attr,
|
|
+ &attr_smpl_buffer_mem_cur.attr,
|
|
+ &attr_debug.attr,
|
|
+ &attr_reset_stats.attr,
|
|
+ &attr_sys_group.attr,
|
|
+ &attr_task_group.attr,
|
|
+ &attr_mode.attr,
|
|
+ &attr_smpl_buffer_mem_max.attr,
|
|
+ &attr_arg_mem_max.attr,
|
|
+ NULL
|
|
+};
|
|
+
|
|
+static struct attribute_group pfm_kernel_attr_group = {
|
|
+ .attrs = pfm_kernel_attrs,
|
|
+};
|
|
+
|
|
+/*
|
|
+ * per-reg attributes
|
|
+ */
|
|
+static ssize_t pfm_reg_show(void *data, struct pfm_attribute *attr, char *buf)
|
|
+{
|
|
+ struct pfm_regmap_desc *reg;
|
|
+ int w;
|
|
+
|
|
+ reg = data;
|
|
+
|
|
+ if (is_attr_name(attr, "name"))
|
|
+ return snprintf(buf, PAGE_SIZE, "%s\n", reg->desc);
|
|
+
|
|
+ if (is_attr_name(attr, "dfl_val"))
|
|
+ return snprintf(buf, PAGE_SIZE, "0x%llx\n",
|
|
+ (unsigned long long)reg->dfl_val);
|
|
+
|
|
+ if (is_attr_name(attr, "width")) {
|
|
+ w = (reg->type & PFM_REG_C64) ?
|
|
+ pfm_pmu_conf->counter_width : 64;
|
|
+ return snprintf(buf, PAGE_SIZE, "%d\n", w);
|
|
+ }
|
|
+
|
|
+ if (is_attr_name(attr, "rsvd_msk"))
|
|
+ return snprintf(buf, PAGE_SIZE, "0x%llx\n",
|
|
+ (unsigned long long)reg->rsvd_msk);
|
|
+
|
|
+ if (is_attr_name(attr, "addr"))
|
|
+ return snprintf(buf, PAGE_SIZE, "0x%lx\n", reg->hw_addr);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static PFM_ROS_ATTR(name, pfm_reg_show);
|
|
+static PFM_ROS_ATTR(dfl_val, pfm_reg_show);
|
|
+static PFM_ROS_ATTR(rsvd_msk, pfm_reg_show);
|
|
+static PFM_ROS_ATTR(width, pfm_reg_show);
|
|
+static PFM_ROS_ATTR(addr, pfm_reg_show);
|
|
+
|
|
+static struct attribute *pfm_reg_attrs[] = {
|
|
+ &attr_name.attr,
|
|
+ &attr_dfl_val.attr,
|
|
+ &attr_rsvd_msk.attr,
|
|
+ &attr_width.attr,
|
|
+ &attr_addr.attr,
|
|
+ NULL
|
|
+};
|
|
+
|
|
+static struct attribute_group pfm_reg_attr_group = {
|
|
+ .attrs = pfm_reg_attrs,
|
|
+};
|
|
+
|
|
+static ssize_t pfm_pmu_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
|
|
+{
|
|
+ if (is_attr_name(attr, "model"))
|
|
+ return snprintf(buf, PAGE_SIZE, "%s\n", pfm_pmu_conf->pmu_name);
|
|
+ return 0;
|
|
+}
|
|
+static PFM_RO_ATTR(model, pfm_pmu_show);
|
|
+
|
|
+static struct attribute *pfm_pmu_desc_attrs[] = {
|
|
+ &attr_model.attr,
|
|
+ NULL
|
|
+};
|
|
+
|
|
+static struct attribute_group pfm_pmu_desc_attr_group = {
|
|
+ .attrs = pfm_pmu_desc_attrs,
|
|
+};
|
|
+
|
|
+static int pfm_sysfs_add_pmu_regs(struct pfm_pmu_config *pmu)
|
|
+{
|
|
+ struct pfm_regmap_desc *reg;
|
|
+ unsigned int i, k;
|
|
+ int ret;
|
|
+
|
|
+ reg = pmu->pmc_desc;
|
|
+ for (i = 0; i < pmu->num_pmc_entries; i++, reg++) {
|
|
+
|
|
+ if (!(reg->type & PFM_REG_I))
|
|
+ continue;
|
|
+
|
|
+ ret = kobject_init_and_add(®->kobj, &pfm_regs_ktype,
|
|
+ pfm_pmu_kobj, "pmc%u", i);
|
|
+ if (ret)
|
|
+ goto undo_pmcs;
|
|
+
|
|
+ ret = sysfs_create_group(®->kobj, &pfm_reg_attr_group);
|
|
+ if (ret) {
|
|
+ kobject_del(®->kobj);
|
|
+ goto undo_pmcs;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ reg = pmu->pmd_desc;
|
|
+ for (i = 0; i < pmu->num_pmd_entries; i++, reg++) {
|
|
+
|
|
+ if (!(reg->type & PFM_REG_I))
|
|
+ continue;
|
|
+
|
|
+ ret = kobject_init_and_add(®->kobj, &pfm_regs_ktype,
|
|
+ pfm_pmu_kobj, "pmd%u", i);
|
|
+ if (ret)
|
|
+ goto undo_pmds;
|
|
+
|
|
+ ret = sysfs_create_group(®->kobj, &pfm_reg_attr_group);
|
|
+ if (ret) {
|
|
+ kobject_del(®->kobj);
|
|
+ goto undo_pmds;
|
|
+ }
|
|
+ }
|
|
+ return 0;
|
|
+undo_pmds:
|
|
+ reg = pmu->pmd_desc;
|
|
+ for (k = 0; k < i; k++, reg++) {
|
|
+ if (!(reg->type & PFM_REG_I))
|
|
+ continue;
|
|
+ sysfs_remove_group(®->kobj, &pfm_reg_attr_group);
|
|
+ kobject_del(®->kobj);
|
|
+ }
|
|
+ i = pmu->num_pmc_entries;
|
|
+ /* fall through */
|
|
+undo_pmcs:
|
|
+ reg = pmu->pmc_desc;
|
|
+ for (k = 0; k < i; k++, reg++) {
|
|
+ if (!(reg->type & PFM_REG_I))
|
|
+ continue;
|
|
+ sysfs_remove_group(®->kobj, &pfm_reg_attr_group);
|
|
+ kobject_del(®->kobj);
|
|
+ }
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static int pfm_sysfs_del_pmu_regs(struct pfm_pmu_config *pmu)
|
|
+{
|
|
+ struct pfm_regmap_desc *reg;
|
|
+ unsigned int i;
|
|
+
|
|
+ reg = pmu->pmc_desc;
|
|
+ for (i = 0; i < pmu->num_pmc_entries; i++, reg++) {
|
|
+
|
|
+ if (!(reg->type & PFM_REG_I))
|
|
+ continue;
|
|
+
|
|
+ sysfs_remove_group(®->kobj, &pfm_reg_attr_group);
|
|
+ kobject_del(®->kobj);
|
|
+ }
|
|
+
|
|
+ reg = pmu->pmd_desc;
|
|
+ for (i = 0; i < pmu->num_pmd_entries; i++, reg++) {
|
|
+
|
|
+ if (!(reg->type & PFM_REG_I))
|
|
+ continue;
|
|
+
|
|
+ sysfs_remove_group(®->kobj, &pfm_reg_attr_group);
|
|
+ kobject_del(®->kobj);
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * when a PMU description module is inserted, we create
|
|
+ * a pmu_desc subdir in sysfs and we populate it with
|
|
+ * PMU specific information, such as register mappings
|
|
+ */
|
|
+int pfm_sysfs_add_pmu(struct pfm_pmu_config *pmu)
|
|
+{
|
|
+ int ret;
|
|
+
|
|
+ pfm_pmu_kobj = kobject_create_and_add("pmu_desc", pfm_kernel_kobj);
|
|
+ if (!pfm_pmu_kobj)
|
|
+ return -ENOMEM;
|
|
+
|
|
+ ret = sysfs_create_group(pfm_pmu_kobj, &pfm_pmu_desc_attr_group);
|
|
+ if (ret) {
|
|
+ /* will release pfm_pmu_kobj */
|
|
+ kobject_put(pfm_pmu_kobj);
|
|
+ return ret;
|
|
+ }
|
|
+
|
|
+ ret = pfm_sysfs_add_pmu_regs(pmu);
|
|
+ if (ret) {
|
|
+ sysfs_remove_group(pfm_pmu_kobj, &pfm_pmu_desc_attr_group);
|
|
+ /* will release pfm_pmu_kobj */
|
|
+ kobject_put(pfm_pmu_kobj);
|
|
+ } else
|
|
+ kobject_uevent(pfm_pmu_kobj, KOBJ_ADD);
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * when a PMU description module is removed, we also remove
|
|
+ * all its information from sysfs, i.e., the pmu_desc subdir
|
|
+ * disappears
|
|
+ */
|
|
+int pfm_sysfs_remove_pmu(struct pfm_pmu_config *pmu)
|
|
+{
|
|
+ pfm_sysfs_del_pmu_regs(pmu);
|
|
+ sysfs_remove_group(pfm_pmu_kobj, &pfm_pmu_desc_attr_group);
|
|
+ kobject_uevent(pfm_pmu_kobj, KOBJ_REMOVE);
|
|
+ kobject_put(pfm_pmu_kobj);
|
|
+ pfm_pmu_kobj = NULL;
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static ssize_t pfm_fmt_show(void *data, struct pfm_attribute *attr, char *buf)
|
|
+{
|
|
+ struct pfm_smpl_fmt *fmt = data;
|
|
+
|
|
+ if (is_attr_name(attr, "version"))
|
|
+ return snprintf(buf, PAGE_SIZE, "%u.%u\n",
|
|
+ fmt->fmt_version >> 16 & 0xffff,
|
|
+ fmt->fmt_version & 0xffff);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * do not use predefined macros because of name conflict
|
|
+ * with /sys/kernel/perfmon/version
|
|
+ */
|
|
+struct pfm_attribute attr_fmt_version = {
|
|
+ .attr = { .name = "version", .mode = 0444 },
|
|
+ .show = pfm_fmt_show,
|
|
+};
|
|
+
|
|
+static struct attribute *pfm_fmt_attrs[] = {
|
|
+ &attr_fmt_version.attr,
|
|
+ NULL
|
|
+};
|
|
+
|
|
+static struct attribute_group pfm_fmt_attr_group = {
|
|
+ .attrs = pfm_fmt_attrs,
|
|
+};
|
|
+
|
|
+/*
|
|
+ * when a sampling format module is inserted, we populate
|
|
+ * sysfs with some information
|
|
+ */
|
|
+int pfm_sysfs_add_fmt(struct pfm_smpl_fmt *fmt)
|
|
+{
|
|
+ int ret;
|
|
+
|
|
+ ret = kobject_init_and_add(&fmt->kobj, &pfm_fmt_ktype,
|
|
+ pfm_fmt_kobj, fmt->fmt_name);
|
|
+ if (ret)
|
|
+ return ret;
|
|
+
|
|
+ ret = sysfs_create_group(&fmt->kobj, &pfm_fmt_attr_group);
|
|
+ if (ret)
|
|
+ kobject_del(&fmt->kobj);
|
|
+ else
|
|
+ kobject_uevent(&fmt->kobj, KOBJ_ADD);
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * when a sampling format module is removed, its information
|
|
+ * must also be removed from sysfs
|
|
+ */
|
|
+void pfm_sysfs_remove_fmt(struct pfm_smpl_fmt *fmt)
|
|
+{
|
|
+ sysfs_remove_group(&fmt->kobj, &pfm_fmt_attr_group);
|
|
+ kobject_uevent(&fmt->kobj, KOBJ_REMOVE);
|
|
+ kobject_del(&fmt->kobj);
|
|
+}
|
|
+
|
|
+int __init pfm_init_sysfs(void)
|
|
+{
|
|
+ int ret;
|
|
+
|
|
+ pfm_kernel_kobj = kobject_create_and_add("perfmon", kernel_kobj);
|
|
+ if (!pfm_kernel_kobj) {
|
|
+ PFM_ERR("cannot add kernel object: /sys/kernel/perfmon");
|
|
+ return -ENOMEM;
|
|
+ }
|
|
+
|
|
+ ret = sysfs_create_group(pfm_kernel_kobj, &pfm_kernel_attr_group);
|
|
+ if (ret) {
|
|
+ kobject_put(pfm_kernel_kobj);
|
|
+ return ret;
|
|
+ }
|
|
+
|
|
+ pfm_fmt_kobj = kobject_create_and_add("formats", pfm_kernel_kobj);
|
|
+ if (ret) {
|
|
+ PFM_ERR("cannot add fmt object: %d", ret);
|
|
+ goto error_fmt;
|
|
+ }
|
|
+ if (pfm_pmu_conf)
|
|
+ pfm_sysfs_add_pmu(pfm_pmu_conf);
|
|
+
|
|
+ pfm_sysfs_builtin_fmt_add();
|
|
+
|
|
+ return 0;
|
|
+
|
|
+error_fmt:
|
|
+ kobject_del(pfm_kernel_kobj);
|
|
+ return ret;
|
|
+}
|