18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * Cell Broadband Engine OProfile Support 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * (C) Copyright IBM Corporation 2006 68c2ecf20Sopenharmony_ci * 78c2ecf20Sopenharmony_ci * Author: David Erb (djerb@us.ibm.com) 88c2ecf20Sopenharmony_ci * Modifications: 98c2ecf20Sopenharmony_ci * Carl Love <carll@us.ibm.com> 108c2ecf20Sopenharmony_ci * Maynard Johnson <maynardj@us.ibm.com> 118c2ecf20Sopenharmony_ci */ 128c2ecf20Sopenharmony_ci 138c2ecf20Sopenharmony_ci#include <linux/cpufreq.h> 148c2ecf20Sopenharmony_ci#include <linux/delay.h> 158c2ecf20Sopenharmony_ci#include <linux/jiffies.h> 168c2ecf20Sopenharmony_ci#include <linux/kthread.h> 178c2ecf20Sopenharmony_ci#include <linux/oprofile.h> 188c2ecf20Sopenharmony_ci#include <linux/percpu.h> 198c2ecf20Sopenharmony_ci#include <linux/smp.h> 208c2ecf20Sopenharmony_ci#include <linux/spinlock.h> 218c2ecf20Sopenharmony_ci#include <linux/timer.h> 228c2ecf20Sopenharmony_ci#include <asm/cell-pmu.h> 238c2ecf20Sopenharmony_ci#include <asm/cputable.h> 248c2ecf20Sopenharmony_ci#include <asm/firmware.h> 258c2ecf20Sopenharmony_ci#include <asm/io.h> 268c2ecf20Sopenharmony_ci#include <asm/oprofile_impl.h> 278c2ecf20Sopenharmony_ci#include <asm/processor.h> 288c2ecf20Sopenharmony_ci#include <asm/prom.h> 298c2ecf20Sopenharmony_ci#include <asm/ptrace.h> 308c2ecf20Sopenharmony_ci#include <asm/reg.h> 318c2ecf20Sopenharmony_ci#include <asm/rtas.h> 328c2ecf20Sopenharmony_ci#include <asm/cell-regs.h> 338c2ecf20Sopenharmony_ci 348c2ecf20Sopenharmony_ci#include "../platforms/cell/interrupt.h" 358c2ecf20Sopenharmony_ci#include "cell/pr_util.h" 368c2ecf20Sopenharmony_ci 378c2ecf20Sopenharmony_ci#define PPU_PROFILING 0 388c2ecf20Sopenharmony_ci#define SPU_PROFILING_CYCLES 1 398c2ecf20Sopenharmony_ci#define SPU_PROFILING_EVENTS 2 408c2ecf20Sopenharmony_ci 418c2ecf20Sopenharmony_ci#define SPU_EVENT_NUM_START 4100 428c2ecf20Sopenharmony_ci#define SPU_EVENT_NUM_STOP 4399 438c2ecf20Sopenharmony_ci#define SPU_PROFILE_EVENT_ADDR 4363 /* spu, address trace, decimal */ 448c2ecf20Sopenharmony_ci#define SPU_PROFILE_EVENT_ADDR_MASK_A 0x146 /* sub unit set to zero */ 458c2ecf20Sopenharmony_ci#define SPU_PROFILE_EVENT_ADDR_MASK_B 0x186 /* sub unit set to zero */ 468c2ecf20Sopenharmony_ci 478c2ecf20Sopenharmony_ci#define NUM_SPUS_PER_NODE 8 488c2ecf20Sopenharmony_ci#define SPU_CYCLES_EVENT_NUM 2 /* event number for SPU_CYCLES */ 498c2ecf20Sopenharmony_ci 508c2ecf20Sopenharmony_ci#define PPU_CYCLES_EVENT_NUM 1 /* event number for CYCLES */ 518c2ecf20Sopenharmony_ci#define PPU_CYCLES_GRP_NUM 1 /* special group number for identifying 528c2ecf20Sopenharmony_ci * PPU_CYCLES event 538c2ecf20Sopenharmony_ci */ 548c2ecf20Sopenharmony_ci#define CBE_COUNT_ALL_CYCLES 0x42800000 /* PPU cycle event specifier */ 558c2ecf20Sopenharmony_ci 568c2ecf20Sopenharmony_ci#define NUM_THREADS 2 /* number of physical threads in 578c2ecf20Sopenharmony_ci * physical processor 588c2ecf20Sopenharmony_ci */ 598c2ecf20Sopenharmony_ci#define NUM_DEBUG_BUS_WORDS 4 608c2ecf20Sopenharmony_ci#define NUM_INPUT_BUS_WORDS 2 618c2ecf20Sopenharmony_ci 628c2ecf20Sopenharmony_ci#define MAX_SPU_COUNT 0xFFFFFF /* maximum 24 bit LFSR value */ 638c2ecf20Sopenharmony_ci 648c2ecf20Sopenharmony_ci/* Minimum HW interval timer setting to send value to trace buffer is 10 cycle. 658c2ecf20Sopenharmony_ci * To configure counter to send value every N cycles set counter to 668c2ecf20Sopenharmony_ci * 2^32 - 1 - N. 678c2ecf20Sopenharmony_ci */ 688c2ecf20Sopenharmony_ci#define NUM_INTERVAL_CYC 0xFFFFFFFF - 10 698c2ecf20Sopenharmony_ci 708c2ecf20Sopenharmony_ci/* 718c2ecf20Sopenharmony_ci * spu_cycle_reset is the number of cycles between samples. 728c2ecf20Sopenharmony_ci * This variable is used for SPU profiling and should ONLY be set 738c2ecf20Sopenharmony_ci * at the beginning of cell_reg_setup; otherwise, it's read-only. 748c2ecf20Sopenharmony_ci */ 758c2ecf20Sopenharmony_cistatic unsigned int spu_cycle_reset; 768c2ecf20Sopenharmony_cistatic unsigned int profiling_mode; 778c2ecf20Sopenharmony_cistatic int spu_evnt_phys_spu_indx; 788c2ecf20Sopenharmony_ci 798c2ecf20Sopenharmony_cistruct pmc_cntrl_data { 808c2ecf20Sopenharmony_ci unsigned long vcntr; 818c2ecf20Sopenharmony_ci unsigned long evnts; 828c2ecf20Sopenharmony_ci unsigned long masks; 838c2ecf20Sopenharmony_ci unsigned long enabled; 848c2ecf20Sopenharmony_ci}; 858c2ecf20Sopenharmony_ci 868c2ecf20Sopenharmony_ci/* 878c2ecf20Sopenharmony_ci * ibm,cbe-perftools rtas parameters 888c2ecf20Sopenharmony_ci */ 898c2ecf20Sopenharmony_cistruct pm_signal { 908c2ecf20Sopenharmony_ci u16 cpu; /* Processor to modify */ 918c2ecf20Sopenharmony_ci u16 sub_unit; /* hw subunit this applies to (if applicable)*/ 928c2ecf20Sopenharmony_ci short int signal_group; /* Signal Group to Enable/Disable */ 938c2ecf20Sopenharmony_ci u8 bus_word; /* Enable/Disable on this Trace/Trigger/Event 948c2ecf20Sopenharmony_ci * Bus Word(s) (bitmask) 958c2ecf20Sopenharmony_ci */ 968c2ecf20Sopenharmony_ci u8 bit; /* Trigger/Event bit (if applicable) */ 978c2ecf20Sopenharmony_ci}; 988c2ecf20Sopenharmony_ci 998c2ecf20Sopenharmony_ci/* 1008c2ecf20Sopenharmony_ci * rtas call arguments 1018c2ecf20Sopenharmony_ci */ 1028c2ecf20Sopenharmony_cienum { 1038c2ecf20Sopenharmony_ci SUBFUNC_RESET = 1, 1048c2ecf20Sopenharmony_ci SUBFUNC_ACTIVATE = 2, 1058c2ecf20Sopenharmony_ci SUBFUNC_DEACTIVATE = 3, 1068c2ecf20Sopenharmony_ci 1078c2ecf20Sopenharmony_ci PASSTHRU_IGNORE = 0, 1088c2ecf20Sopenharmony_ci PASSTHRU_ENABLE = 1, 1098c2ecf20Sopenharmony_ci PASSTHRU_DISABLE = 2, 1108c2ecf20Sopenharmony_ci}; 1118c2ecf20Sopenharmony_ci 1128c2ecf20Sopenharmony_cistruct pm_cntrl { 1138c2ecf20Sopenharmony_ci u16 enable; 1148c2ecf20Sopenharmony_ci u16 stop_at_max; 1158c2ecf20Sopenharmony_ci u16 trace_mode; 1168c2ecf20Sopenharmony_ci u16 freeze; 1178c2ecf20Sopenharmony_ci u16 count_mode; 1188c2ecf20Sopenharmony_ci u16 spu_addr_trace; 1198c2ecf20Sopenharmony_ci u8 trace_buf_ovflw; 1208c2ecf20Sopenharmony_ci}; 1218c2ecf20Sopenharmony_ci 1228c2ecf20Sopenharmony_cistatic struct { 1238c2ecf20Sopenharmony_ci u32 group_control; 1248c2ecf20Sopenharmony_ci u32 debug_bus_control; 1258c2ecf20Sopenharmony_ci struct pm_cntrl pm_cntrl; 1268c2ecf20Sopenharmony_ci u32 pm07_cntrl[NR_PHYS_CTRS]; 1278c2ecf20Sopenharmony_ci} pm_regs; 1288c2ecf20Sopenharmony_ci 1298c2ecf20Sopenharmony_ci#define GET_SUB_UNIT(x) ((x & 0x0000f000) >> 12) 1308c2ecf20Sopenharmony_ci#define GET_BUS_WORD(x) ((x & 0x000000f0) >> 4) 1318c2ecf20Sopenharmony_ci#define GET_BUS_TYPE(x) ((x & 0x00000300) >> 8) 1328c2ecf20Sopenharmony_ci#define GET_POLARITY(x) ((x & 0x00000002) >> 1) 1338c2ecf20Sopenharmony_ci#define GET_COUNT_CYCLES(x) (x & 0x00000001) 1348c2ecf20Sopenharmony_ci#define GET_INPUT_CONTROL(x) ((x & 0x00000004) >> 2) 1358c2ecf20Sopenharmony_ci 1368c2ecf20Sopenharmony_cistatic DEFINE_PER_CPU(unsigned long[NR_PHYS_CTRS], pmc_values); 1378c2ecf20Sopenharmony_cistatic unsigned long spu_pm_cnt[MAX_NUMNODES * NUM_SPUS_PER_NODE]; 1388c2ecf20Sopenharmony_cistatic struct pmc_cntrl_data pmc_cntrl[NUM_THREADS][NR_PHYS_CTRS]; 1398c2ecf20Sopenharmony_ci 1408c2ecf20Sopenharmony_ci/* 1418c2ecf20Sopenharmony_ci * The CELL profiling code makes rtas calls to setup the debug bus to 1428c2ecf20Sopenharmony_ci * route the performance signals. Additionally, SPU profiling requires 1438c2ecf20Sopenharmony_ci * a second rtas call to setup the hardware to capture the SPU PCs. 1448c2ecf20Sopenharmony_ci * The EIO error value is returned if the token lookups or the rtas 1458c2ecf20Sopenharmony_ci * call fail. The EIO error number is the best choice of the existing 1468c2ecf20Sopenharmony_ci * error numbers. The probability of rtas related error is very low. But 1478c2ecf20Sopenharmony_ci * by returning EIO and printing additional information to dmsg the user 1488c2ecf20Sopenharmony_ci * will know that OProfile did not start and dmesg will tell them why. 1498c2ecf20Sopenharmony_ci * OProfile does not support returning errors on Stop. Not a huge issue 1508c2ecf20Sopenharmony_ci * since failure to reset the debug bus or stop the SPU PC collection is 1518c2ecf20Sopenharmony_ci * not a fatel issue. Chances are if the Stop failed, Start doesn't work 1528c2ecf20Sopenharmony_ci * either. 1538c2ecf20Sopenharmony_ci */ 1548c2ecf20Sopenharmony_ci 1558c2ecf20Sopenharmony_ci/* 1568c2ecf20Sopenharmony_ci * Interpetation of hdw_thread: 1578c2ecf20Sopenharmony_ci * 0 - even virtual cpus 0, 2, 4,... 1588c2ecf20Sopenharmony_ci * 1 - odd virtual cpus 1, 3, 5, ... 1598c2ecf20Sopenharmony_ci * 1608c2ecf20Sopenharmony_ci * FIXME: this is strictly wrong, we need to clean this up in a number 1618c2ecf20Sopenharmony_ci * of places. It works for now. -arnd 1628c2ecf20Sopenharmony_ci */ 1638c2ecf20Sopenharmony_cistatic u32 hdw_thread; 1648c2ecf20Sopenharmony_ci 1658c2ecf20Sopenharmony_cistatic u32 virt_cntr_inter_mask; 1668c2ecf20Sopenharmony_cistatic struct timer_list timer_virt_cntr; 1678c2ecf20Sopenharmony_cistatic struct timer_list timer_spu_event_swap; 1688c2ecf20Sopenharmony_ci 1698c2ecf20Sopenharmony_ci/* 1708c2ecf20Sopenharmony_ci * pm_signal needs to be global since it is initialized in 1718c2ecf20Sopenharmony_ci * cell_reg_setup at the time when the necessary information 1728c2ecf20Sopenharmony_ci * is available. 1738c2ecf20Sopenharmony_ci */ 1748c2ecf20Sopenharmony_cistatic struct pm_signal pm_signal[NR_PHYS_CTRS]; 1758c2ecf20Sopenharmony_cistatic int pm_rtas_token; /* token for debug bus setup call */ 1768c2ecf20Sopenharmony_cistatic int spu_rtas_token; /* token for SPU cycle profiling */ 1778c2ecf20Sopenharmony_ci 1788c2ecf20Sopenharmony_cistatic u32 reset_value[NR_PHYS_CTRS]; 1798c2ecf20Sopenharmony_cistatic int num_counters; 1808c2ecf20Sopenharmony_cistatic int oprofile_running; 1818c2ecf20Sopenharmony_cistatic DEFINE_SPINLOCK(cntr_lock); 1828c2ecf20Sopenharmony_ci 1838c2ecf20Sopenharmony_cistatic u32 ctr_enabled; 1848c2ecf20Sopenharmony_ci 1858c2ecf20Sopenharmony_cistatic unsigned char input_bus[NUM_INPUT_BUS_WORDS]; 1868c2ecf20Sopenharmony_ci 1878c2ecf20Sopenharmony_ci/* 1888c2ecf20Sopenharmony_ci * Firmware interface functions 1898c2ecf20Sopenharmony_ci */ 1908c2ecf20Sopenharmony_cistatic int 1918c2ecf20Sopenharmony_cirtas_ibm_cbe_perftools(int subfunc, int passthru, 1928c2ecf20Sopenharmony_ci void *address, unsigned long length) 1938c2ecf20Sopenharmony_ci{ 1948c2ecf20Sopenharmony_ci u64 paddr = __pa(address); 1958c2ecf20Sopenharmony_ci 1968c2ecf20Sopenharmony_ci return rtas_call(pm_rtas_token, 5, 1, NULL, subfunc, 1978c2ecf20Sopenharmony_ci passthru, paddr >> 32, paddr & 0xffffffff, length); 1988c2ecf20Sopenharmony_ci} 1998c2ecf20Sopenharmony_ci 2008c2ecf20Sopenharmony_cistatic void pm_rtas_reset_signals(u32 node) 2018c2ecf20Sopenharmony_ci{ 2028c2ecf20Sopenharmony_ci int ret; 2038c2ecf20Sopenharmony_ci struct pm_signal pm_signal_local; 2048c2ecf20Sopenharmony_ci 2058c2ecf20Sopenharmony_ci /* 2068c2ecf20Sopenharmony_ci * The debug bus is being set to the passthru disable state. 2078c2ecf20Sopenharmony_ci * However, the FW still expects at least one legal signal routing 2088c2ecf20Sopenharmony_ci * entry or it will return an error on the arguments. If we don't 2098c2ecf20Sopenharmony_ci * supply a valid entry, we must ignore all return values. Ignoring 2108c2ecf20Sopenharmony_ci * all return values means we might miss an error we should be 2118c2ecf20Sopenharmony_ci * concerned about. 2128c2ecf20Sopenharmony_ci */ 2138c2ecf20Sopenharmony_ci 2148c2ecf20Sopenharmony_ci /* fw expects physical cpu #. */ 2158c2ecf20Sopenharmony_ci pm_signal_local.cpu = node; 2168c2ecf20Sopenharmony_ci pm_signal_local.signal_group = 21; 2178c2ecf20Sopenharmony_ci pm_signal_local.bus_word = 1; 2188c2ecf20Sopenharmony_ci pm_signal_local.sub_unit = 0; 2198c2ecf20Sopenharmony_ci pm_signal_local.bit = 0; 2208c2ecf20Sopenharmony_ci 2218c2ecf20Sopenharmony_ci ret = rtas_ibm_cbe_perftools(SUBFUNC_RESET, PASSTHRU_DISABLE, 2228c2ecf20Sopenharmony_ci &pm_signal_local, 2238c2ecf20Sopenharmony_ci sizeof(struct pm_signal)); 2248c2ecf20Sopenharmony_ci 2258c2ecf20Sopenharmony_ci if (unlikely(ret)) 2268c2ecf20Sopenharmony_ci /* 2278c2ecf20Sopenharmony_ci * Not a fatal error. For Oprofile stop, the oprofile 2288c2ecf20Sopenharmony_ci * functions do not support returning an error for 2298c2ecf20Sopenharmony_ci * failure to stop OProfile. 2308c2ecf20Sopenharmony_ci */ 2318c2ecf20Sopenharmony_ci printk(KERN_WARNING "%s: rtas returned: %d\n", 2328c2ecf20Sopenharmony_ci __func__, ret); 2338c2ecf20Sopenharmony_ci} 2348c2ecf20Sopenharmony_ci 2358c2ecf20Sopenharmony_cistatic int pm_rtas_activate_signals(u32 node, u32 count) 2368c2ecf20Sopenharmony_ci{ 2378c2ecf20Sopenharmony_ci int ret; 2388c2ecf20Sopenharmony_ci int i, j; 2398c2ecf20Sopenharmony_ci struct pm_signal pm_signal_local[NR_PHYS_CTRS]; 2408c2ecf20Sopenharmony_ci 2418c2ecf20Sopenharmony_ci /* 2428c2ecf20Sopenharmony_ci * There is no debug setup required for the cycles event. 2438c2ecf20Sopenharmony_ci * Note that only events in the same group can be used. 2448c2ecf20Sopenharmony_ci * Otherwise, there will be conflicts in correctly routing 2458c2ecf20Sopenharmony_ci * the signals on the debug bus. It is the responsibility 2468c2ecf20Sopenharmony_ci * of the OProfile user tool to check the events are in 2478c2ecf20Sopenharmony_ci * the same group. 2488c2ecf20Sopenharmony_ci */ 2498c2ecf20Sopenharmony_ci i = 0; 2508c2ecf20Sopenharmony_ci for (j = 0; j < count; j++) { 2518c2ecf20Sopenharmony_ci if (pm_signal[j].signal_group != PPU_CYCLES_GRP_NUM) { 2528c2ecf20Sopenharmony_ci 2538c2ecf20Sopenharmony_ci /* fw expects physical cpu # */ 2548c2ecf20Sopenharmony_ci pm_signal_local[i].cpu = node; 2558c2ecf20Sopenharmony_ci pm_signal_local[i].signal_group 2568c2ecf20Sopenharmony_ci = pm_signal[j].signal_group; 2578c2ecf20Sopenharmony_ci pm_signal_local[i].bus_word = pm_signal[j].bus_word; 2588c2ecf20Sopenharmony_ci pm_signal_local[i].sub_unit = pm_signal[j].sub_unit; 2598c2ecf20Sopenharmony_ci pm_signal_local[i].bit = pm_signal[j].bit; 2608c2ecf20Sopenharmony_ci i++; 2618c2ecf20Sopenharmony_ci } 2628c2ecf20Sopenharmony_ci } 2638c2ecf20Sopenharmony_ci 2648c2ecf20Sopenharmony_ci if (i != 0) { 2658c2ecf20Sopenharmony_ci ret = rtas_ibm_cbe_perftools(SUBFUNC_ACTIVATE, PASSTHRU_ENABLE, 2668c2ecf20Sopenharmony_ci pm_signal_local, 2678c2ecf20Sopenharmony_ci i * sizeof(struct pm_signal)); 2688c2ecf20Sopenharmony_ci 2698c2ecf20Sopenharmony_ci if (unlikely(ret)) { 2708c2ecf20Sopenharmony_ci printk(KERN_WARNING "%s: rtas returned: %d\n", 2718c2ecf20Sopenharmony_ci __func__, ret); 2728c2ecf20Sopenharmony_ci return -EIO; 2738c2ecf20Sopenharmony_ci } 2748c2ecf20Sopenharmony_ci } 2758c2ecf20Sopenharmony_ci 2768c2ecf20Sopenharmony_ci return 0; 2778c2ecf20Sopenharmony_ci} 2788c2ecf20Sopenharmony_ci 2798c2ecf20Sopenharmony_ci/* 2808c2ecf20Sopenharmony_ci * PM Signal functions 2818c2ecf20Sopenharmony_ci */ 2828c2ecf20Sopenharmony_cistatic void set_pm_event(u32 ctr, int event, u32 unit_mask) 2838c2ecf20Sopenharmony_ci{ 2848c2ecf20Sopenharmony_ci struct pm_signal *p; 2858c2ecf20Sopenharmony_ci u32 signal_bit; 2868c2ecf20Sopenharmony_ci u32 bus_word, bus_type, count_cycles, polarity, input_control; 2878c2ecf20Sopenharmony_ci int j, i; 2888c2ecf20Sopenharmony_ci 2898c2ecf20Sopenharmony_ci if (event == PPU_CYCLES_EVENT_NUM) { 2908c2ecf20Sopenharmony_ci /* Special Event: Count all cpu cycles */ 2918c2ecf20Sopenharmony_ci pm_regs.pm07_cntrl[ctr] = CBE_COUNT_ALL_CYCLES; 2928c2ecf20Sopenharmony_ci p = &(pm_signal[ctr]); 2938c2ecf20Sopenharmony_ci p->signal_group = PPU_CYCLES_GRP_NUM; 2948c2ecf20Sopenharmony_ci p->bus_word = 1; 2958c2ecf20Sopenharmony_ci p->sub_unit = 0; 2968c2ecf20Sopenharmony_ci p->bit = 0; 2978c2ecf20Sopenharmony_ci goto out; 2988c2ecf20Sopenharmony_ci } else { 2998c2ecf20Sopenharmony_ci pm_regs.pm07_cntrl[ctr] = 0; 3008c2ecf20Sopenharmony_ci } 3018c2ecf20Sopenharmony_ci 3028c2ecf20Sopenharmony_ci bus_word = GET_BUS_WORD(unit_mask); 3038c2ecf20Sopenharmony_ci bus_type = GET_BUS_TYPE(unit_mask); 3048c2ecf20Sopenharmony_ci count_cycles = GET_COUNT_CYCLES(unit_mask); 3058c2ecf20Sopenharmony_ci polarity = GET_POLARITY(unit_mask); 3068c2ecf20Sopenharmony_ci input_control = GET_INPUT_CONTROL(unit_mask); 3078c2ecf20Sopenharmony_ci signal_bit = (event % 100); 3088c2ecf20Sopenharmony_ci 3098c2ecf20Sopenharmony_ci p = &(pm_signal[ctr]); 3108c2ecf20Sopenharmony_ci 3118c2ecf20Sopenharmony_ci p->signal_group = event / 100; 3128c2ecf20Sopenharmony_ci p->bus_word = bus_word; 3138c2ecf20Sopenharmony_ci p->sub_unit = GET_SUB_UNIT(unit_mask); 3148c2ecf20Sopenharmony_ci 3158c2ecf20Sopenharmony_ci pm_regs.pm07_cntrl[ctr] = 0; 3168c2ecf20Sopenharmony_ci pm_regs.pm07_cntrl[ctr] |= PM07_CTR_COUNT_CYCLES(count_cycles); 3178c2ecf20Sopenharmony_ci pm_regs.pm07_cntrl[ctr] |= PM07_CTR_POLARITY(polarity); 3188c2ecf20Sopenharmony_ci pm_regs.pm07_cntrl[ctr] |= PM07_CTR_INPUT_CONTROL(input_control); 3198c2ecf20Sopenharmony_ci 3208c2ecf20Sopenharmony_ci /* 3218c2ecf20Sopenharmony_ci * Some of the islands signal selection is based on 64 bit words. 3228c2ecf20Sopenharmony_ci * The debug bus words are 32 bits, the input words to the performance 3238c2ecf20Sopenharmony_ci * counters are defined as 32 bits. Need to convert the 64 bit island 3248c2ecf20Sopenharmony_ci * specification to the appropriate 32 input bit and bus word for the 3258c2ecf20Sopenharmony_ci * performance counter event selection. See the CELL Performance 3268c2ecf20Sopenharmony_ci * monitoring signals manual and the Perf cntr hardware descriptions 3278c2ecf20Sopenharmony_ci * for the details. 3288c2ecf20Sopenharmony_ci */ 3298c2ecf20Sopenharmony_ci if (input_control == 0) { 3308c2ecf20Sopenharmony_ci if (signal_bit > 31) { 3318c2ecf20Sopenharmony_ci signal_bit -= 32; 3328c2ecf20Sopenharmony_ci if (bus_word == 0x3) 3338c2ecf20Sopenharmony_ci bus_word = 0x2; 3348c2ecf20Sopenharmony_ci else if (bus_word == 0xc) 3358c2ecf20Sopenharmony_ci bus_word = 0x8; 3368c2ecf20Sopenharmony_ci } 3378c2ecf20Sopenharmony_ci 3388c2ecf20Sopenharmony_ci if ((bus_type == 0) && p->signal_group >= 60) 3398c2ecf20Sopenharmony_ci bus_type = 2; 3408c2ecf20Sopenharmony_ci if ((bus_type == 1) && p->signal_group >= 50) 3418c2ecf20Sopenharmony_ci bus_type = 0; 3428c2ecf20Sopenharmony_ci 3438c2ecf20Sopenharmony_ci pm_regs.pm07_cntrl[ctr] |= PM07_CTR_INPUT_MUX(signal_bit); 3448c2ecf20Sopenharmony_ci } else { 3458c2ecf20Sopenharmony_ci pm_regs.pm07_cntrl[ctr] = 0; 3468c2ecf20Sopenharmony_ci p->bit = signal_bit; 3478c2ecf20Sopenharmony_ci } 3488c2ecf20Sopenharmony_ci 3498c2ecf20Sopenharmony_ci for (i = 0; i < NUM_DEBUG_BUS_WORDS; i++) { 3508c2ecf20Sopenharmony_ci if (bus_word & (1 << i)) { 3518c2ecf20Sopenharmony_ci pm_regs.debug_bus_control |= 3528c2ecf20Sopenharmony_ci (bus_type << (30 - (2 * i))); 3538c2ecf20Sopenharmony_ci 3548c2ecf20Sopenharmony_ci for (j = 0; j < NUM_INPUT_BUS_WORDS; j++) { 3558c2ecf20Sopenharmony_ci if (input_bus[j] == 0xff) { 3568c2ecf20Sopenharmony_ci input_bus[j] = i; 3578c2ecf20Sopenharmony_ci pm_regs.group_control |= 3588c2ecf20Sopenharmony_ci (i << (30 - (2 * j))); 3598c2ecf20Sopenharmony_ci 3608c2ecf20Sopenharmony_ci break; 3618c2ecf20Sopenharmony_ci } 3628c2ecf20Sopenharmony_ci } 3638c2ecf20Sopenharmony_ci } 3648c2ecf20Sopenharmony_ci } 3658c2ecf20Sopenharmony_ciout: 3668c2ecf20Sopenharmony_ci ; 3678c2ecf20Sopenharmony_ci} 3688c2ecf20Sopenharmony_ci 3698c2ecf20Sopenharmony_cistatic void write_pm_cntrl(int cpu) 3708c2ecf20Sopenharmony_ci{ 3718c2ecf20Sopenharmony_ci /* 3728c2ecf20Sopenharmony_ci * Oprofile will use 32 bit counters, set bits 7:10 to 0 3738c2ecf20Sopenharmony_ci * pmregs.pm_cntrl is a global 3748c2ecf20Sopenharmony_ci */ 3758c2ecf20Sopenharmony_ci 3768c2ecf20Sopenharmony_ci u32 val = 0; 3778c2ecf20Sopenharmony_ci if (pm_regs.pm_cntrl.enable == 1) 3788c2ecf20Sopenharmony_ci val |= CBE_PM_ENABLE_PERF_MON; 3798c2ecf20Sopenharmony_ci 3808c2ecf20Sopenharmony_ci if (pm_regs.pm_cntrl.stop_at_max == 1) 3818c2ecf20Sopenharmony_ci val |= CBE_PM_STOP_AT_MAX; 3828c2ecf20Sopenharmony_ci 3838c2ecf20Sopenharmony_ci if (pm_regs.pm_cntrl.trace_mode != 0) 3848c2ecf20Sopenharmony_ci val |= CBE_PM_TRACE_MODE_SET(pm_regs.pm_cntrl.trace_mode); 3858c2ecf20Sopenharmony_ci 3868c2ecf20Sopenharmony_ci if (pm_regs.pm_cntrl.trace_buf_ovflw == 1) 3878c2ecf20Sopenharmony_ci val |= CBE_PM_TRACE_BUF_OVFLW(pm_regs.pm_cntrl.trace_buf_ovflw); 3888c2ecf20Sopenharmony_ci if (pm_regs.pm_cntrl.freeze == 1) 3898c2ecf20Sopenharmony_ci val |= CBE_PM_FREEZE_ALL_CTRS; 3908c2ecf20Sopenharmony_ci 3918c2ecf20Sopenharmony_ci val |= CBE_PM_SPU_ADDR_TRACE_SET(pm_regs.pm_cntrl.spu_addr_trace); 3928c2ecf20Sopenharmony_ci 3938c2ecf20Sopenharmony_ci /* 3948c2ecf20Sopenharmony_ci * Routine set_count_mode must be called previously to set 3958c2ecf20Sopenharmony_ci * the count mode based on the user selection of user and kernel. 3968c2ecf20Sopenharmony_ci */ 3978c2ecf20Sopenharmony_ci val |= CBE_PM_COUNT_MODE_SET(pm_regs.pm_cntrl.count_mode); 3988c2ecf20Sopenharmony_ci cbe_write_pm(cpu, pm_control, val); 3998c2ecf20Sopenharmony_ci} 4008c2ecf20Sopenharmony_ci 4018c2ecf20Sopenharmony_cistatic inline void 4028c2ecf20Sopenharmony_ciset_count_mode(u32 kernel, u32 user) 4038c2ecf20Sopenharmony_ci{ 4048c2ecf20Sopenharmony_ci /* 4058c2ecf20Sopenharmony_ci * The user must specify user and kernel if they want them. If 4068c2ecf20Sopenharmony_ci * neither is specified, OProfile will count in hypervisor mode. 4078c2ecf20Sopenharmony_ci * pm_regs.pm_cntrl is a global 4088c2ecf20Sopenharmony_ci */ 4098c2ecf20Sopenharmony_ci if (kernel) { 4108c2ecf20Sopenharmony_ci if (user) 4118c2ecf20Sopenharmony_ci pm_regs.pm_cntrl.count_mode = CBE_COUNT_ALL_MODES; 4128c2ecf20Sopenharmony_ci else 4138c2ecf20Sopenharmony_ci pm_regs.pm_cntrl.count_mode = 4148c2ecf20Sopenharmony_ci CBE_COUNT_SUPERVISOR_MODE; 4158c2ecf20Sopenharmony_ci } else { 4168c2ecf20Sopenharmony_ci if (user) 4178c2ecf20Sopenharmony_ci pm_regs.pm_cntrl.count_mode = CBE_COUNT_PROBLEM_MODE; 4188c2ecf20Sopenharmony_ci else 4198c2ecf20Sopenharmony_ci pm_regs.pm_cntrl.count_mode = 4208c2ecf20Sopenharmony_ci CBE_COUNT_HYPERVISOR_MODE; 4218c2ecf20Sopenharmony_ci } 4228c2ecf20Sopenharmony_ci} 4238c2ecf20Sopenharmony_ci 4248c2ecf20Sopenharmony_cistatic inline void enable_ctr(u32 cpu, u32 ctr, u32 *pm07_cntrl) 4258c2ecf20Sopenharmony_ci{ 4268c2ecf20Sopenharmony_ci 4278c2ecf20Sopenharmony_ci pm07_cntrl[ctr] |= CBE_PM_CTR_ENABLE; 4288c2ecf20Sopenharmony_ci cbe_write_pm07_control(cpu, ctr, pm07_cntrl[ctr]); 4298c2ecf20Sopenharmony_ci} 4308c2ecf20Sopenharmony_ci 4318c2ecf20Sopenharmony_ci/* 4328c2ecf20Sopenharmony_ci * Oprofile is expected to collect data on all CPUs simultaneously. 4338c2ecf20Sopenharmony_ci * However, there is one set of performance counters per node. There are 4348c2ecf20Sopenharmony_ci * two hardware threads or virtual CPUs on each node. Hence, OProfile must 4358c2ecf20Sopenharmony_ci * multiplex in time the performance counter collection on the two virtual 4368c2ecf20Sopenharmony_ci * CPUs. The multiplexing of the performance counters is done by this 4378c2ecf20Sopenharmony_ci * virtual counter routine. 4388c2ecf20Sopenharmony_ci * 4398c2ecf20Sopenharmony_ci * The pmc_values used below is defined as 'per-cpu' but its use is 4408c2ecf20Sopenharmony_ci * more akin to 'per-node'. We need to store two sets of counter 4418c2ecf20Sopenharmony_ci * values per node -- one for the previous run and one for the next. 4428c2ecf20Sopenharmony_ci * The per-cpu[NR_PHYS_CTRS] gives us the storage we need. Each odd/even 4438c2ecf20Sopenharmony_ci * pair of per-cpu arrays is used for storing the previous and next 4448c2ecf20Sopenharmony_ci * pmc values for a given node. 4458c2ecf20Sopenharmony_ci * NOTE: We use the per-cpu variable to improve cache performance. 4468c2ecf20Sopenharmony_ci * 4478c2ecf20Sopenharmony_ci * This routine will alternate loading the virtual counters for 4488c2ecf20Sopenharmony_ci * virtual CPUs 4498c2ecf20Sopenharmony_ci */ 4508c2ecf20Sopenharmony_cistatic void cell_virtual_cntr(struct timer_list *unused) 4518c2ecf20Sopenharmony_ci{ 4528c2ecf20Sopenharmony_ci int i, prev_hdw_thread, next_hdw_thread; 4538c2ecf20Sopenharmony_ci u32 cpu; 4548c2ecf20Sopenharmony_ci unsigned long flags; 4558c2ecf20Sopenharmony_ci 4568c2ecf20Sopenharmony_ci /* 4578c2ecf20Sopenharmony_ci * Make sure that the interrupt_hander and the virt counter are 4588c2ecf20Sopenharmony_ci * not both playing with the counters on the same node. 4598c2ecf20Sopenharmony_ci */ 4608c2ecf20Sopenharmony_ci 4618c2ecf20Sopenharmony_ci spin_lock_irqsave(&cntr_lock, flags); 4628c2ecf20Sopenharmony_ci 4638c2ecf20Sopenharmony_ci prev_hdw_thread = hdw_thread; 4648c2ecf20Sopenharmony_ci 4658c2ecf20Sopenharmony_ci /* switch the cpu handling the interrupts */ 4668c2ecf20Sopenharmony_ci hdw_thread = 1 ^ hdw_thread; 4678c2ecf20Sopenharmony_ci next_hdw_thread = hdw_thread; 4688c2ecf20Sopenharmony_ci 4698c2ecf20Sopenharmony_ci pm_regs.group_control = 0; 4708c2ecf20Sopenharmony_ci pm_regs.debug_bus_control = 0; 4718c2ecf20Sopenharmony_ci 4728c2ecf20Sopenharmony_ci for (i = 0; i < NUM_INPUT_BUS_WORDS; i++) 4738c2ecf20Sopenharmony_ci input_bus[i] = 0xff; 4748c2ecf20Sopenharmony_ci 4758c2ecf20Sopenharmony_ci /* 4768c2ecf20Sopenharmony_ci * There are some per thread events. Must do the 4778c2ecf20Sopenharmony_ci * set event, for the thread that is being started 4788c2ecf20Sopenharmony_ci */ 4798c2ecf20Sopenharmony_ci for (i = 0; i < num_counters; i++) 4808c2ecf20Sopenharmony_ci set_pm_event(i, 4818c2ecf20Sopenharmony_ci pmc_cntrl[next_hdw_thread][i].evnts, 4828c2ecf20Sopenharmony_ci pmc_cntrl[next_hdw_thread][i].masks); 4838c2ecf20Sopenharmony_ci 4848c2ecf20Sopenharmony_ci /* 4858c2ecf20Sopenharmony_ci * The following is done only once per each node, but 4868c2ecf20Sopenharmony_ci * we need cpu #, not node #, to pass to the cbe_xxx functions. 4878c2ecf20Sopenharmony_ci */ 4888c2ecf20Sopenharmony_ci for_each_online_cpu(cpu) { 4898c2ecf20Sopenharmony_ci if (cbe_get_hw_thread_id(cpu)) 4908c2ecf20Sopenharmony_ci continue; 4918c2ecf20Sopenharmony_ci 4928c2ecf20Sopenharmony_ci /* 4938c2ecf20Sopenharmony_ci * stop counters, save counter values, restore counts 4948c2ecf20Sopenharmony_ci * for previous thread 4958c2ecf20Sopenharmony_ci */ 4968c2ecf20Sopenharmony_ci cbe_disable_pm(cpu); 4978c2ecf20Sopenharmony_ci cbe_disable_pm_interrupts(cpu); 4988c2ecf20Sopenharmony_ci for (i = 0; i < num_counters; i++) { 4998c2ecf20Sopenharmony_ci per_cpu(pmc_values, cpu + prev_hdw_thread)[i] 5008c2ecf20Sopenharmony_ci = cbe_read_ctr(cpu, i); 5018c2ecf20Sopenharmony_ci 5028c2ecf20Sopenharmony_ci if (per_cpu(pmc_values, cpu + next_hdw_thread)[i] 5038c2ecf20Sopenharmony_ci == 0xFFFFFFFF) 5048c2ecf20Sopenharmony_ci /* If the cntr value is 0xffffffff, we must 5058c2ecf20Sopenharmony_ci * reset that to 0xfffffff0 when the current 5068c2ecf20Sopenharmony_ci * thread is restarted. This will generate a 5078c2ecf20Sopenharmony_ci * new interrupt and make sure that we never 5088c2ecf20Sopenharmony_ci * restore the counters to the max value. If 5098c2ecf20Sopenharmony_ci * the counters were restored to the max value, 5108c2ecf20Sopenharmony_ci * they do not increment and no interrupts are 5118c2ecf20Sopenharmony_ci * generated. Hence no more samples will be 5128c2ecf20Sopenharmony_ci * collected on that cpu. 5138c2ecf20Sopenharmony_ci */ 5148c2ecf20Sopenharmony_ci cbe_write_ctr(cpu, i, 0xFFFFFFF0); 5158c2ecf20Sopenharmony_ci else 5168c2ecf20Sopenharmony_ci cbe_write_ctr(cpu, i, 5178c2ecf20Sopenharmony_ci per_cpu(pmc_values, 5188c2ecf20Sopenharmony_ci cpu + 5198c2ecf20Sopenharmony_ci next_hdw_thread)[i]); 5208c2ecf20Sopenharmony_ci } 5218c2ecf20Sopenharmony_ci 5228c2ecf20Sopenharmony_ci /* 5238c2ecf20Sopenharmony_ci * Switch to the other thread. Change the interrupt 5248c2ecf20Sopenharmony_ci * and control regs to be scheduled on the CPU 5258c2ecf20Sopenharmony_ci * corresponding to the thread to execute. 5268c2ecf20Sopenharmony_ci */ 5278c2ecf20Sopenharmony_ci for (i = 0; i < num_counters; i++) { 5288c2ecf20Sopenharmony_ci if (pmc_cntrl[next_hdw_thread][i].enabled) { 5298c2ecf20Sopenharmony_ci /* 5308c2ecf20Sopenharmony_ci * There are some per thread events. 5318c2ecf20Sopenharmony_ci * Must do the set event, enable_cntr 5328c2ecf20Sopenharmony_ci * for each cpu. 5338c2ecf20Sopenharmony_ci */ 5348c2ecf20Sopenharmony_ci enable_ctr(cpu, i, 5358c2ecf20Sopenharmony_ci pm_regs.pm07_cntrl); 5368c2ecf20Sopenharmony_ci } else { 5378c2ecf20Sopenharmony_ci cbe_write_pm07_control(cpu, i, 0); 5388c2ecf20Sopenharmony_ci } 5398c2ecf20Sopenharmony_ci } 5408c2ecf20Sopenharmony_ci 5418c2ecf20Sopenharmony_ci /* Enable interrupts on the CPU thread that is starting */ 5428c2ecf20Sopenharmony_ci cbe_enable_pm_interrupts(cpu, next_hdw_thread, 5438c2ecf20Sopenharmony_ci virt_cntr_inter_mask); 5448c2ecf20Sopenharmony_ci cbe_enable_pm(cpu); 5458c2ecf20Sopenharmony_ci } 5468c2ecf20Sopenharmony_ci 5478c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&cntr_lock, flags); 5488c2ecf20Sopenharmony_ci 5498c2ecf20Sopenharmony_ci mod_timer(&timer_virt_cntr, jiffies + HZ / 10); 5508c2ecf20Sopenharmony_ci} 5518c2ecf20Sopenharmony_ci 5528c2ecf20Sopenharmony_cistatic void start_virt_cntrs(void) 5538c2ecf20Sopenharmony_ci{ 5548c2ecf20Sopenharmony_ci timer_setup(&timer_virt_cntr, cell_virtual_cntr, 0); 5558c2ecf20Sopenharmony_ci timer_virt_cntr.expires = jiffies + HZ / 10; 5568c2ecf20Sopenharmony_ci add_timer(&timer_virt_cntr); 5578c2ecf20Sopenharmony_ci} 5588c2ecf20Sopenharmony_ci 5598c2ecf20Sopenharmony_cistatic int cell_reg_setup_spu_cycles(struct op_counter_config *ctr, 5608c2ecf20Sopenharmony_ci struct op_system_config *sys, int num_ctrs) 5618c2ecf20Sopenharmony_ci{ 5628c2ecf20Sopenharmony_ci spu_cycle_reset = ctr[0].count; 5638c2ecf20Sopenharmony_ci 5648c2ecf20Sopenharmony_ci /* 5658c2ecf20Sopenharmony_ci * Each node will need to make the rtas call to start 5668c2ecf20Sopenharmony_ci * and stop SPU profiling. Get the token once and store it. 5678c2ecf20Sopenharmony_ci */ 5688c2ecf20Sopenharmony_ci spu_rtas_token = rtas_token("ibm,cbe-spu-perftools"); 5698c2ecf20Sopenharmony_ci 5708c2ecf20Sopenharmony_ci if (unlikely(spu_rtas_token == RTAS_UNKNOWN_SERVICE)) { 5718c2ecf20Sopenharmony_ci printk(KERN_ERR 5728c2ecf20Sopenharmony_ci "%s: rtas token ibm,cbe-spu-perftools unknown\n", 5738c2ecf20Sopenharmony_ci __func__); 5748c2ecf20Sopenharmony_ci return -EIO; 5758c2ecf20Sopenharmony_ci } 5768c2ecf20Sopenharmony_ci return 0; 5778c2ecf20Sopenharmony_ci} 5788c2ecf20Sopenharmony_ci 5798c2ecf20Sopenharmony_ci/* Unfortunately, the hardware will only support event profiling 5808c2ecf20Sopenharmony_ci * on one SPU per node at a time. Therefore, we must time slice 5818c2ecf20Sopenharmony_ci * the profiling across all SPUs in the node. Note, we do this 5828c2ecf20Sopenharmony_ci * in parallel for each node. The following routine is called 5838c2ecf20Sopenharmony_ci * periodically based on kernel timer to switch which SPU is 5848c2ecf20Sopenharmony_ci * being monitored in a round robbin fashion. 5858c2ecf20Sopenharmony_ci */ 5868c2ecf20Sopenharmony_cistatic void spu_evnt_swap(struct timer_list *unused) 5878c2ecf20Sopenharmony_ci{ 5888c2ecf20Sopenharmony_ci int node; 5898c2ecf20Sopenharmony_ci int cur_phys_spu, nxt_phys_spu, cur_spu_evnt_phys_spu_indx; 5908c2ecf20Sopenharmony_ci unsigned long flags; 5918c2ecf20Sopenharmony_ci int cpu; 5928c2ecf20Sopenharmony_ci int ret; 5938c2ecf20Sopenharmony_ci u32 interrupt_mask; 5948c2ecf20Sopenharmony_ci 5958c2ecf20Sopenharmony_ci 5968c2ecf20Sopenharmony_ci /* enable interrupts on cntr 0 */ 5978c2ecf20Sopenharmony_ci interrupt_mask = CBE_PM_CTR_OVERFLOW_INTR(0); 5988c2ecf20Sopenharmony_ci 5998c2ecf20Sopenharmony_ci hdw_thread = 0; 6008c2ecf20Sopenharmony_ci 6018c2ecf20Sopenharmony_ci /* Make sure spu event interrupt handler and spu event swap 6028c2ecf20Sopenharmony_ci * don't access the counters simultaneously. 6038c2ecf20Sopenharmony_ci */ 6048c2ecf20Sopenharmony_ci spin_lock_irqsave(&cntr_lock, flags); 6058c2ecf20Sopenharmony_ci 6068c2ecf20Sopenharmony_ci cur_spu_evnt_phys_spu_indx = spu_evnt_phys_spu_indx; 6078c2ecf20Sopenharmony_ci 6088c2ecf20Sopenharmony_ci if (++(spu_evnt_phys_spu_indx) == NUM_SPUS_PER_NODE) 6098c2ecf20Sopenharmony_ci spu_evnt_phys_spu_indx = 0; 6108c2ecf20Sopenharmony_ci 6118c2ecf20Sopenharmony_ci pm_signal[0].sub_unit = spu_evnt_phys_spu_indx; 6128c2ecf20Sopenharmony_ci pm_signal[1].sub_unit = spu_evnt_phys_spu_indx; 6138c2ecf20Sopenharmony_ci pm_signal[2].sub_unit = spu_evnt_phys_spu_indx; 6148c2ecf20Sopenharmony_ci 6158c2ecf20Sopenharmony_ci /* switch the SPU being profiled on each node */ 6168c2ecf20Sopenharmony_ci for_each_online_cpu(cpu) { 6178c2ecf20Sopenharmony_ci if (cbe_get_hw_thread_id(cpu)) 6188c2ecf20Sopenharmony_ci continue; 6198c2ecf20Sopenharmony_ci 6208c2ecf20Sopenharmony_ci node = cbe_cpu_to_node(cpu); 6218c2ecf20Sopenharmony_ci cur_phys_spu = (node * NUM_SPUS_PER_NODE) 6228c2ecf20Sopenharmony_ci + cur_spu_evnt_phys_spu_indx; 6238c2ecf20Sopenharmony_ci nxt_phys_spu = (node * NUM_SPUS_PER_NODE) 6248c2ecf20Sopenharmony_ci + spu_evnt_phys_spu_indx; 6258c2ecf20Sopenharmony_ci 6268c2ecf20Sopenharmony_ci /* 6278c2ecf20Sopenharmony_ci * stop counters, save counter values, restore counts 6288c2ecf20Sopenharmony_ci * for previous physical SPU 6298c2ecf20Sopenharmony_ci */ 6308c2ecf20Sopenharmony_ci cbe_disable_pm(cpu); 6318c2ecf20Sopenharmony_ci cbe_disable_pm_interrupts(cpu); 6328c2ecf20Sopenharmony_ci 6338c2ecf20Sopenharmony_ci spu_pm_cnt[cur_phys_spu] 6348c2ecf20Sopenharmony_ci = cbe_read_ctr(cpu, 0); 6358c2ecf20Sopenharmony_ci 6368c2ecf20Sopenharmony_ci /* restore previous count for the next spu to sample */ 6378c2ecf20Sopenharmony_ci /* NOTE, hardware issue, counter will not start if the 6388c2ecf20Sopenharmony_ci * counter value is at max (0xFFFFFFFF). 6398c2ecf20Sopenharmony_ci */ 6408c2ecf20Sopenharmony_ci if (spu_pm_cnt[nxt_phys_spu] >= 0xFFFFFFFF) 6418c2ecf20Sopenharmony_ci cbe_write_ctr(cpu, 0, 0xFFFFFFF0); 6428c2ecf20Sopenharmony_ci else 6438c2ecf20Sopenharmony_ci cbe_write_ctr(cpu, 0, spu_pm_cnt[nxt_phys_spu]); 6448c2ecf20Sopenharmony_ci 6458c2ecf20Sopenharmony_ci pm_rtas_reset_signals(cbe_cpu_to_node(cpu)); 6468c2ecf20Sopenharmony_ci 6478c2ecf20Sopenharmony_ci /* setup the debug bus measure the one event and 6488c2ecf20Sopenharmony_ci * the two events to route the next SPU's PC on 6498c2ecf20Sopenharmony_ci * the debug bus 6508c2ecf20Sopenharmony_ci */ 6518c2ecf20Sopenharmony_ci ret = pm_rtas_activate_signals(cbe_cpu_to_node(cpu), 3); 6528c2ecf20Sopenharmony_ci if (ret) 6538c2ecf20Sopenharmony_ci printk(KERN_ERR "%s: pm_rtas_activate_signals failed, " 6548c2ecf20Sopenharmony_ci "SPU event swap\n", __func__); 6558c2ecf20Sopenharmony_ci 6568c2ecf20Sopenharmony_ci /* clear the trace buffer, don't want to take PC for 6578c2ecf20Sopenharmony_ci * previous SPU*/ 6588c2ecf20Sopenharmony_ci cbe_write_pm(cpu, trace_address, 0); 6598c2ecf20Sopenharmony_ci 6608c2ecf20Sopenharmony_ci enable_ctr(cpu, 0, pm_regs.pm07_cntrl); 6618c2ecf20Sopenharmony_ci 6628c2ecf20Sopenharmony_ci /* Enable interrupts on the CPU thread that is starting */ 6638c2ecf20Sopenharmony_ci cbe_enable_pm_interrupts(cpu, hdw_thread, 6648c2ecf20Sopenharmony_ci interrupt_mask); 6658c2ecf20Sopenharmony_ci cbe_enable_pm(cpu); 6668c2ecf20Sopenharmony_ci } 6678c2ecf20Sopenharmony_ci 6688c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&cntr_lock, flags); 6698c2ecf20Sopenharmony_ci 6708c2ecf20Sopenharmony_ci /* swap approximately every 0.1 seconds */ 6718c2ecf20Sopenharmony_ci mod_timer(&timer_spu_event_swap, jiffies + HZ / 25); 6728c2ecf20Sopenharmony_ci} 6738c2ecf20Sopenharmony_ci 6748c2ecf20Sopenharmony_cistatic void start_spu_event_swap(void) 6758c2ecf20Sopenharmony_ci{ 6768c2ecf20Sopenharmony_ci timer_setup(&timer_spu_event_swap, spu_evnt_swap, 0); 6778c2ecf20Sopenharmony_ci timer_spu_event_swap.expires = jiffies + HZ / 25; 6788c2ecf20Sopenharmony_ci add_timer(&timer_spu_event_swap); 6798c2ecf20Sopenharmony_ci} 6808c2ecf20Sopenharmony_ci 6818c2ecf20Sopenharmony_cistatic int cell_reg_setup_spu_events(struct op_counter_config *ctr, 6828c2ecf20Sopenharmony_ci struct op_system_config *sys, int num_ctrs) 6838c2ecf20Sopenharmony_ci{ 6848c2ecf20Sopenharmony_ci int i; 6858c2ecf20Sopenharmony_ci 6868c2ecf20Sopenharmony_ci /* routine is called once for all nodes */ 6878c2ecf20Sopenharmony_ci 6888c2ecf20Sopenharmony_ci spu_evnt_phys_spu_indx = 0; 6898c2ecf20Sopenharmony_ci /* 6908c2ecf20Sopenharmony_ci * For all events except PPU CYCLEs, each node will need to make 6918c2ecf20Sopenharmony_ci * the rtas cbe-perftools call to setup and reset the debug bus. 6928c2ecf20Sopenharmony_ci * Make the token lookup call once and store it in the global 6938c2ecf20Sopenharmony_ci * variable pm_rtas_token. 6948c2ecf20Sopenharmony_ci */ 6958c2ecf20Sopenharmony_ci pm_rtas_token = rtas_token("ibm,cbe-perftools"); 6968c2ecf20Sopenharmony_ci 6978c2ecf20Sopenharmony_ci if (unlikely(pm_rtas_token == RTAS_UNKNOWN_SERVICE)) { 6988c2ecf20Sopenharmony_ci printk(KERN_ERR 6998c2ecf20Sopenharmony_ci "%s: rtas token ibm,cbe-perftools unknown\n", 7008c2ecf20Sopenharmony_ci __func__); 7018c2ecf20Sopenharmony_ci return -EIO; 7028c2ecf20Sopenharmony_ci } 7038c2ecf20Sopenharmony_ci 7048c2ecf20Sopenharmony_ci /* setup the pm_control register settings, 7058c2ecf20Sopenharmony_ci * settings will be written per node by the 7068c2ecf20Sopenharmony_ci * cell_cpu_setup() function. 7078c2ecf20Sopenharmony_ci */ 7088c2ecf20Sopenharmony_ci pm_regs.pm_cntrl.trace_buf_ovflw = 1; 7098c2ecf20Sopenharmony_ci 7108c2ecf20Sopenharmony_ci /* Use the occurrence trace mode to have SPU PC saved 7118c2ecf20Sopenharmony_ci * to the trace buffer. Occurrence data in trace buffer 7128c2ecf20Sopenharmony_ci * is not used. Bit 2 must be set to store SPU addresses. 7138c2ecf20Sopenharmony_ci */ 7148c2ecf20Sopenharmony_ci pm_regs.pm_cntrl.trace_mode = 2; 7158c2ecf20Sopenharmony_ci 7168c2ecf20Sopenharmony_ci pm_regs.pm_cntrl.spu_addr_trace = 0x1; /* using debug bus 7178c2ecf20Sopenharmony_ci event 2 & 3 */ 7188c2ecf20Sopenharmony_ci 7198c2ecf20Sopenharmony_ci /* setup the debug bus event array with the SPU PC routing events. 7208c2ecf20Sopenharmony_ci * Note, pm_signal[0] will be filled in by set_pm_event() call below. 7218c2ecf20Sopenharmony_ci */ 7228c2ecf20Sopenharmony_ci pm_signal[1].signal_group = SPU_PROFILE_EVENT_ADDR / 100; 7238c2ecf20Sopenharmony_ci pm_signal[1].bus_word = GET_BUS_WORD(SPU_PROFILE_EVENT_ADDR_MASK_A); 7248c2ecf20Sopenharmony_ci pm_signal[1].bit = SPU_PROFILE_EVENT_ADDR % 100; 7258c2ecf20Sopenharmony_ci pm_signal[1].sub_unit = spu_evnt_phys_spu_indx; 7268c2ecf20Sopenharmony_ci 7278c2ecf20Sopenharmony_ci pm_signal[2].signal_group = SPU_PROFILE_EVENT_ADDR / 100; 7288c2ecf20Sopenharmony_ci pm_signal[2].bus_word = GET_BUS_WORD(SPU_PROFILE_EVENT_ADDR_MASK_B); 7298c2ecf20Sopenharmony_ci pm_signal[2].bit = SPU_PROFILE_EVENT_ADDR % 100; 7308c2ecf20Sopenharmony_ci pm_signal[2].sub_unit = spu_evnt_phys_spu_indx; 7318c2ecf20Sopenharmony_ci 7328c2ecf20Sopenharmony_ci /* Set the user selected spu event to profile on, 7338c2ecf20Sopenharmony_ci * note, only one SPU profiling event is supported 7348c2ecf20Sopenharmony_ci */ 7358c2ecf20Sopenharmony_ci num_counters = 1; /* Only support one SPU event at a time */ 7368c2ecf20Sopenharmony_ci set_pm_event(0, ctr[0].event, ctr[0].unit_mask); 7378c2ecf20Sopenharmony_ci 7388c2ecf20Sopenharmony_ci reset_value[0] = 0xFFFFFFFF - ctr[0].count; 7398c2ecf20Sopenharmony_ci 7408c2ecf20Sopenharmony_ci /* global, used by cell_cpu_setup */ 7418c2ecf20Sopenharmony_ci ctr_enabled |= 1; 7428c2ecf20Sopenharmony_ci 7438c2ecf20Sopenharmony_ci /* Initialize the count for each SPU to the reset value */ 7448c2ecf20Sopenharmony_ci for (i=0; i < MAX_NUMNODES * NUM_SPUS_PER_NODE; i++) 7458c2ecf20Sopenharmony_ci spu_pm_cnt[i] = reset_value[0]; 7468c2ecf20Sopenharmony_ci 7478c2ecf20Sopenharmony_ci return 0; 7488c2ecf20Sopenharmony_ci} 7498c2ecf20Sopenharmony_ci 7508c2ecf20Sopenharmony_cistatic int cell_reg_setup_ppu(struct op_counter_config *ctr, 7518c2ecf20Sopenharmony_ci struct op_system_config *sys, int num_ctrs) 7528c2ecf20Sopenharmony_ci{ 7538c2ecf20Sopenharmony_ci /* routine is called once for all nodes */ 7548c2ecf20Sopenharmony_ci int i, j, cpu; 7558c2ecf20Sopenharmony_ci 7568c2ecf20Sopenharmony_ci num_counters = num_ctrs; 7578c2ecf20Sopenharmony_ci 7588c2ecf20Sopenharmony_ci if (unlikely(num_ctrs > NR_PHYS_CTRS)) { 7598c2ecf20Sopenharmony_ci printk(KERN_ERR 7608c2ecf20Sopenharmony_ci "%s: Oprofile, number of specified events " \ 7618c2ecf20Sopenharmony_ci "exceeds number of physical counters\n", 7628c2ecf20Sopenharmony_ci __func__); 7638c2ecf20Sopenharmony_ci return -EIO; 7648c2ecf20Sopenharmony_ci } 7658c2ecf20Sopenharmony_ci 7668c2ecf20Sopenharmony_ci set_count_mode(sys->enable_kernel, sys->enable_user); 7678c2ecf20Sopenharmony_ci 7688c2ecf20Sopenharmony_ci /* Setup the thread 0 events */ 7698c2ecf20Sopenharmony_ci for (i = 0; i < num_ctrs; ++i) { 7708c2ecf20Sopenharmony_ci 7718c2ecf20Sopenharmony_ci pmc_cntrl[0][i].evnts = ctr[i].event; 7728c2ecf20Sopenharmony_ci pmc_cntrl[0][i].masks = ctr[i].unit_mask; 7738c2ecf20Sopenharmony_ci pmc_cntrl[0][i].enabled = ctr[i].enabled; 7748c2ecf20Sopenharmony_ci pmc_cntrl[0][i].vcntr = i; 7758c2ecf20Sopenharmony_ci 7768c2ecf20Sopenharmony_ci for_each_possible_cpu(j) 7778c2ecf20Sopenharmony_ci per_cpu(pmc_values, j)[i] = 0; 7788c2ecf20Sopenharmony_ci } 7798c2ecf20Sopenharmony_ci 7808c2ecf20Sopenharmony_ci /* 7818c2ecf20Sopenharmony_ci * Setup the thread 1 events, map the thread 0 event to the 7828c2ecf20Sopenharmony_ci * equivalent thread 1 event. 7838c2ecf20Sopenharmony_ci */ 7848c2ecf20Sopenharmony_ci for (i = 0; i < num_ctrs; ++i) { 7858c2ecf20Sopenharmony_ci if ((ctr[i].event >= 2100) && (ctr[i].event <= 2111)) 7868c2ecf20Sopenharmony_ci pmc_cntrl[1][i].evnts = ctr[i].event + 19; 7878c2ecf20Sopenharmony_ci else if (ctr[i].event == 2203) 7888c2ecf20Sopenharmony_ci pmc_cntrl[1][i].evnts = ctr[i].event; 7898c2ecf20Sopenharmony_ci else if ((ctr[i].event >= 2200) && (ctr[i].event <= 2215)) 7908c2ecf20Sopenharmony_ci pmc_cntrl[1][i].evnts = ctr[i].event + 16; 7918c2ecf20Sopenharmony_ci else 7928c2ecf20Sopenharmony_ci pmc_cntrl[1][i].evnts = ctr[i].event; 7938c2ecf20Sopenharmony_ci 7948c2ecf20Sopenharmony_ci pmc_cntrl[1][i].masks = ctr[i].unit_mask; 7958c2ecf20Sopenharmony_ci pmc_cntrl[1][i].enabled = ctr[i].enabled; 7968c2ecf20Sopenharmony_ci pmc_cntrl[1][i].vcntr = i; 7978c2ecf20Sopenharmony_ci } 7988c2ecf20Sopenharmony_ci 7998c2ecf20Sopenharmony_ci for (i = 0; i < NUM_INPUT_BUS_WORDS; i++) 8008c2ecf20Sopenharmony_ci input_bus[i] = 0xff; 8018c2ecf20Sopenharmony_ci 8028c2ecf20Sopenharmony_ci /* 8038c2ecf20Sopenharmony_ci * Our counters count up, and "count" refers to 8048c2ecf20Sopenharmony_ci * how much before the next interrupt, and we interrupt 8058c2ecf20Sopenharmony_ci * on overflow. So we calculate the starting value 8068c2ecf20Sopenharmony_ci * which will give us "count" until overflow. 8078c2ecf20Sopenharmony_ci * Then we set the events on the enabled counters. 8088c2ecf20Sopenharmony_ci */ 8098c2ecf20Sopenharmony_ci for (i = 0; i < num_counters; ++i) { 8108c2ecf20Sopenharmony_ci /* start with virtual counter set 0 */ 8118c2ecf20Sopenharmony_ci if (pmc_cntrl[0][i].enabled) { 8128c2ecf20Sopenharmony_ci /* Using 32bit counters, reset max - count */ 8138c2ecf20Sopenharmony_ci reset_value[i] = 0xFFFFFFFF - ctr[i].count; 8148c2ecf20Sopenharmony_ci set_pm_event(i, 8158c2ecf20Sopenharmony_ci pmc_cntrl[0][i].evnts, 8168c2ecf20Sopenharmony_ci pmc_cntrl[0][i].masks); 8178c2ecf20Sopenharmony_ci 8188c2ecf20Sopenharmony_ci /* global, used by cell_cpu_setup */ 8198c2ecf20Sopenharmony_ci ctr_enabled |= (1 << i); 8208c2ecf20Sopenharmony_ci } 8218c2ecf20Sopenharmony_ci } 8228c2ecf20Sopenharmony_ci 8238c2ecf20Sopenharmony_ci /* initialize the previous counts for the virtual cntrs */ 8248c2ecf20Sopenharmony_ci for_each_online_cpu(cpu) 8258c2ecf20Sopenharmony_ci for (i = 0; i < num_counters; ++i) { 8268c2ecf20Sopenharmony_ci per_cpu(pmc_values, cpu)[i] = reset_value[i]; 8278c2ecf20Sopenharmony_ci } 8288c2ecf20Sopenharmony_ci 8298c2ecf20Sopenharmony_ci return 0; 8308c2ecf20Sopenharmony_ci} 8318c2ecf20Sopenharmony_ci 8328c2ecf20Sopenharmony_ci 8338c2ecf20Sopenharmony_ci/* This function is called once for all cpus combined */ 8348c2ecf20Sopenharmony_cistatic int cell_reg_setup(struct op_counter_config *ctr, 8358c2ecf20Sopenharmony_ci struct op_system_config *sys, int num_ctrs) 8368c2ecf20Sopenharmony_ci{ 8378c2ecf20Sopenharmony_ci int ret=0; 8388c2ecf20Sopenharmony_ci spu_cycle_reset = 0; 8398c2ecf20Sopenharmony_ci 8408c2ecf20Sopenharmony_ci /* initialize the spu_arr_trace value, will be reset if 8418c2ecf20Sopenharmony_ci * doing spu event profiling. 8428c2ecf20Sopenharmony_ci */ 8438c2ecf20Sopenharmony_ci pm_regs.group_control = 0; 8448c2ecf20Sopenharmony_ci pm_regs.debug_bus_control = 0; 8458c2ecf20Sopenharmony_ci pm_regs.pm_cntrl.stop_at_max = 1; 8468c2ecf20Sopenharmony_ci pm_regs.pm_cntrl.trace_mode = 0; 8478c2ecf20Sopenharmony_ci pm_regs.pm_cntrl.freeze = 1; 8488c2ecf20Sopenharmony_ci pm_regs.pm_cntrl.trace_buf_ovflw = 0; 8498c2ecf20Sopenharmony_ci pm_regs.pm_cntrl.spu_addr_trace = 0; 8508c2ecf20Sopenharmony_ci 8518c2ecf20Sopenharmony_ci /* 8528c2ecf20Sopenharmony_ci * For all events except PPU CYCLEs, each node will need to make 8538c2ecf20Sopenharmony_ci * the rtas cbe-perftools call to setup and reset the debug bus. 8548c2ecf20Sopenharmony_ci * Make the token lookup call once and store it in the global 8558c2ecf20Sopenharmony_ci * variable pm_rtas_token. 8568c2ecf20Sopenharmony_ci */ 8578c2ecf20Sopenharmony_ci pm_rtas_token = rtas_token("ibm,cbe-perftools"); 8588c2ecf20Sopenharmony_ci 8598c2ecf20Sopenharmony_ci if (unlikely(pm_rtas_token == RTAS_UNKNOWN_SERVICE)) { 8608c2ecf20Sopenharmony_ci printk(KERN_ERR 8618c2ecf20Sopenharmony_ci "%s: rtas token ibm,cbe-perftools unknown\n", 8628c2ecf20Sopenharmony_ci __func__); 8638c2ecf20Sopenharmony_ci return -EIO; 8648c2ecf20Sopenharmony_ci } 8658c2ecf20Sopenharmony_ci 8668c2ecf20Sopenharmony_ci if (ctr[0].event == SPU_CYCLES_EVENT_NUM) { 8678c2ecf20Sopenharmony_ci profiling_mode = SPU_PROFILING_CYCLES; 8688c2ecf20Sopenharmony_ci ret = cell_reg_setup_spu_cycles(ctr, sys, num_ctrs); 8698c2ecf20Sopenharmony_ci } else if ((ctr[0].event >= SPU_EVENT_NUM_START) && 8708c2ecf20Sopenharmony_ci (ctr[0].event <= SPU_EVENT_NUM_STOP)) { 8718c2ecf20Sopenharmony_ci profiling_mode = SPU_PROFILING_EVENTS; 8728c2ecf20Sopenharmony_ci spu_cycle_reset = ctr[0].count; 8738c2ecf20Sopenharmony_ci 8748c2ecf20Sopenharmony_ci /* for SPU event profiling, need to setup the 8758c2ecf20Sopenharmony_ci * pm_signal array with the events to route the 8768c2ecf20Sopenharmony_ci * SPU PC before making the FW call. Note, only 8778c2ecf20Sopenharmony_ci * one SPU event for profiling can be specified 8788c2ecf20Sopenharmony_ci * at a time. 8798c2ecf20Sopenharmony_ci */ 8808c2ecf20Sopenharmony_ci cell_reg_setup_spu_events(ctr, sys, num_ctrs); 8818c2ecf20Sopenharmony_ci } else { 8828c2ecf20Sopenharmony_ci profiling_mode = PPU_PROFILING; 8838c2ecf20Sopenharmony_ci ret = cell_reg_setup_ppu(ctr, sys, num_ctrs); 8848c2ecf20Sopenharmony_ci } 8858c2ecf20Sopenharmony_ci 8868c2ecf20Sopenharmony_ci return ret; 8878c2ecf20Sopenharmony_ci} 8888c2ecf20Sopenharmony_ci 8898c2ecf20Sopenharmony_ci 8908c2ecf20Sopenharmony_ci 8918c2ecf20Sopenharmony_ci/* This function is called once for each cpu */ 8928c2ecf20Sopenharmony_cistatic int cell_cpu_setup(struct op_counter_config *cntr) 8938c2ecf20Sopenharmony_ci{ 8948c2ecf20Sopenharmony_ci u32 cpu = smp_processor_id(); 8958c2ecf20Sopenharmony_ci u32 num_enabled = 0; 8968c2ecf20Sopenharmony_ci int i; 8978c2ecf20Sopenharmony_ci int ret; 8988c2ecf20Sopenharmony_ci 8998c2ecf20Sopenharmony_ci /* Cycle based SPU profiling does not use the performance 9008c2ecf20Sopenharmony_ci * counters. The trace array is configured to collect 9018c2ecf20Sopenharmony_ci * the data. 9028c2ecf20Sopenharmony_ci */ 9038c2ecf20Sopenharmony_ci if (profiling_mode == SPU_PROFILING_CYCLES) 9048c2ecf20Sopenharmony_ci return 0; 9058c2ecf20Sopenharmony_ci 9068c2ecf20Sopenharmony_ci /* There is one performance monitor per processor chip (i.e. node), 9078c2ecf20Sopenharmony_ci * so we only need to perform this function once per node. 9088c2ecf20Sopenharmony_ci */ 9098c2ecf20Sopenharmony_ci if (cbe_get_hw_thread_id(cpu)) 9108c2ecf20Sopenharmony_ci return 0; 9118c2ecf20Sopenharmony_ci 9128c2ecf20Sopenharmony_ci /* Stop all counters */ 9138c2ecf20Sopenharmony_ci cbe_disable_pm(cpu); 9148c2ecf20Sopenharmony_ci cbe_disable_pm_interrupts(cpu); 9158c2ecf20Sopenharmony_ci 9168c2ecf20Sopenharmony_ci cbe_write_pm(cpu, pm_start_stop, 0); 9178c2ecf20Sopenharmony_ci cbe_write_pm(cpu, group_control, pm_regs.group_control); 9188c2ecf20Sopenharmony_ci cbe_write_pm(cpu, debug_bus_control, pm_regs.debug_bus_control); 9198c2ecf20Sopenharmony_ci write_pm_cntrl(cpu); 9208c2ecf20Sopenharmony_ci 9218c2ecf20Sopenharmony_ci for (i = 0; i < num_counters; ++i) { 9228c2ecf20Sopenharmony_ci if (ctr_enabled & (1 << i)) { 9238c2ecf20Sopenharmony_ci pm_signal[num_enabled].cpu = cbe_cpu_to_node(cpu); 9248c2ecf20Sopenharmony_ci num_enabled++; 9258c2ecf20Sopenharmony_ci } 9268c2ecf20Sopenharmony_ci } 9278c2ecf20Sopenharmony_ci 9288c2ecf20Sopenharmony_ci /* 9298c2ecf20Sopenharmony_ci * The pm_rtas_activate_signals will return -EIO if the FW 9308c2ecf20Sopenharmony_ci * call failed. 9318c2ecf20Sopenharmony_ci */ 9328c2ecf20Sopenharmony_ci if (profiling_mode == SPU_PROFILING_EVENTS) { 9338c2ecf20Sopenharmony_ci /* For SPU event profiling also need to setup the 9348c2ecf20Sopenharmony_ci * pm interval timer 9358c2ecf20Sopenharmony_ci */ 9368c2ecf20Sopenharmony_ci ret = pm_rtas_activate_signals(cbe_cpu_to_node(cpu), 9378c2ecf20Sopenharmony_ci num_enabled+2); 9388c2ecf20Sopenharmony_ci /* store PC from debug bus to Trace buffer as often 9398c2ecf20Sopenharmony_ci * as possible (every 10 cycles) 9408c2ecf20Sopenharmony_ci */ 9418c2ecf20Sopenharmony_ci cbe_write_pm(cpu, pm_interval, NUM_INTERVAL_CYC); 9428c2ecf20Sopenharmony_ci return ret; 9438c2ecf20Sopenharmony_ci } else 9448c2ecf20Sopenharmony_ci return pm_rtas_activate_signals(cbe_cpu_to_node(cpu), 9458c2ecf20Sopenharmony_ci num_enabled); 9468c2ecf20Sopenharmony_ci} 9478c2ecf20Sopenharmony_ci 9488c2ecf20Sopenharmony_ci#define ENTRIES 303 9498c2ecf20Sopenharmony_ci#define MAXLFSR 0xFFFFFF 9508c2ecf20Sopenharmony_ci 9518c2ecf20Sopenharmony_ci/* precomputed table of 24 bit LFSR values */ 9528c2ecf20Sopenharmony_cistatic int initial_lfsr[] = { 9538c2ecf20Sopenharmony_ci 8221349, 12579195, 5379618, 10097839, 7512963, 7519310, 3955098, 10753424, 9548c2ecf20Sopenharmony_ci 15507573, 7458917, 285419, 2641121, 9780088, 3915503, 6668768, 1548716, 9558c2ecf20Sopenharmony_ci 4885000, 8774424, 9650099, 2044357, 2304411, 9326253, 10332526, 4421547, 9568c2ecf20Sopenharmony_ci 3440748, 10179459, 13332843, 10375561, 1313462, 8375100, 5198480, 6071392, 9578c2ecf20Sopenharmony_ci 9341783, 1526887, 3985002, 1439429, 13923762, 7010104, 11969769, 4547026, 9588c2ecf20Sopenharmony_ci 2040072, 4025602, 3437678, 7939992, 11444177, 4496094, 9803157, 10745556, 9598c2ecf20Sopenharmony_ci 3671780, 4257846, 5662259, 13196905, 3237343, 12077182, 16222879, 7587769, 9608c2ecf20Sopenharmony_ci 14706824, 2184640, 12591135, 10420257, 7406075, 3648978, 11042541, 15906893, 9618c2ecf20Sopenharmony_ci 11914928, 4732944, 10695697, 12928164, 11980531, 4430912, 11939291, 2917017, 9628c2ecf20Sopenharmony_ci 6119256, 4172004, 9373765, 8410071, 14788383, 5047459, 5474428, 1737756, 9638c2ecf20Sopenharmony_ci 15967514, 13351758, 6691285, 8034329, 2856544, 14394753, 11310160, 12149558, 9648c2ecf20Sopenharmony_ci 7487528, 7542781, 15668898, 12525138, 12790975, 3707933, 9106617, 1965401, 9658c2ecf20Sopenharmony_ci 16219109, 12801644, 2443203, 4909502, 8762329, 3120803, 6360315, 9309720, 9668c2ecf20Sopenharmony_ci 15164599, 10844842, 4456529, 6667610, 14924259, 884312, 6234963, 3326042, 9678c2ecf20Sopenharmony_ci 15973422, 13919464, 5272099, 6414643, 3909029, 2764324, 5237926, 4774955, 9688c2ecf20Sopenharmony_ci 10445906, 4955302, 5203726, 10798229, 11443419, 2303395, 333836, 9646934, 9698c2ecf20Sopenharmony_ci 3464726, 4159182, 568492, 995747, 10318756, 13299332, 4836017, 8237783, 9708c2ecf20Sopenharmony_ci 3878992, 2581665, 11394667, 5672745, 14412947, 3159169, 9094251, 16467278, 9718c2ecf20Sopenharmony_ci 8671392, 15230076, 4843545, 7009238, 15504095, 1494895, 9627886, 14485051, 9728c2ecf20Sopenharmony_ci 8304291, 252817, 12421642, 16085736, 4774072, 2456177, 4160695, 15409741, 9738c2ecf20Sopenharmony_ci 4902868, 5793091, 13162925, 16039714, 782255, 11347835, 14884586, 366972, 9748c2ecf20Sopenharmony_ci 16308990, 11913488, 13390465, 2958444, 10340278, 1177858, 1319431, 10426302, 9758c2ecf20Sopenharmony_ci 2868597, 126119, 5784857, 5245324, 10903900, 16436004, 3389013, 1742384, 9768c2ecf20Sopenharmony_ci 14674502, 10279218, 8536112, 10364279, 6877778, 14051163, 1025130, 6072469, 9778c2ecf20Sopenharmony_ci 1988305, 8354440, 8216060, 16342977, 13112639, 3976679, 5913576, 8816697, 9788c2ecf20Sopenharmony_ci 6879995, 14043764, 3339515, 9364420, 15808858, 12261651, 2141560, 5636398, 9798c2ecf20Sopenharmony_ci 10345425, 10414756, 781725, 6155650, 4746914, 5078683, 7469001, 6799140, 9808c2ecf20Sopenharmony_ci 10156444, 9667150, 10116470, 4133858, 2121972, 1124204, 1003577, 1611214, 9818c2ecf20Sopenharmony_ci 14304602, 16221850, 13878465, 13577744, 3629235, 8772583, 10881308, 2410386, 9828c2ecf20Sopenharmony_ci 7300044, 5378855, 9301235, 12755149, 4977682, 8083074, 10327581, 6395087, 9838c2ecf20Sopenharmony_ci 9155434, 15501696, 7514362, 14520507, 15808945, 3244584, 4741962, 9658130, 9848c2ecf20Sopenharmony_ci 14336147, 8654727, 7969093, 15759799, 14029445, 5038459, 9894848, 8659300, 9858c2ecf20Sopenharmony_ci 13699287, 8834306, 10712885, 14753895, 10410465, 3373251, 309501, 9561475, 9868c2ecf20Sopenharmony_ci 5526688, 14647426, 14209836, 5339224, 207299, 14069911, 8722990, 2290950, 9878c2ecf20Sopenharmony_ci 3258216, 12505185, 6007317, 9218111, 14661019, 10537428, 11731949, 9027003, 9888c2ecf20Sopenharmony_ci 6641507, 9490160, 200241, 9720425, 16277895, 10816638, 1554761, 10431375, 9898c2ecf20Sopenharmony_ci 7467528, 6790302, 3429078, 14633753, 14428997, 11463204, 3576212, 2003426, 9908c2ecf20Sopenharmony_ci 6123687, 820520, 9992513, 15784513, 5778891, 6428165, 8388607 9918c2ecf20Sopenharmony_ci}; 9928c2ecf20Sopenharmony_ci 9938c2ecf20Sopenharmony_ci/* 9948c2ecf20Sopenharmony_ci * The hardware uses an LFSR counting sequence to determine when to capture 9958c2ecf20Sopenharmony_ci * the SPU PCs. An LFSR sequence is like a puesdo random number sequence 9968c2ecf20Sopenharmony_ci * where each number occurs once in the sequence but the sequence is not in 9978c2ecf20Sopenharmony_ci * numerical order. The SPU PC capture is done when the LFSR sequence reaches 9988c2ecf20Sopenharmony_ci * the last value in the sequence. Hence the user specified value N 9998c2ecf20Sopenharmony_ci * corresponds to the LFSR number that is N from the end of the sequence. 10008c2ecf20Sopenharmony_ci * 10018c2ecf20Sopenharmony_ci * To avoid the time to compute the LFSR, a lookup table is used. The 24 bit 10028c2ecf20Sopenharmony_ci * LFSR sequence is broken into four ranges. The spacing of the precomputed 10038c2ecf20Sopenharmony_ci * values is adjusted in each range so the error between the user specified 10048c2ecf20Sopenharmony_ci * number (N) of events between samples and the actual number of events based 10058c2ecf20Sopenharmony_ci * on the precomputed value will be les then about 6.2%. Note, if the user 10068c2ecf20Sopenharmony_ci * specifies N < 2^16, the LFSR value that is 2^16 from the end will be used. 10078c2ecf20Sopenharmony_ci * This is to prevent the loss of samples because the trace buffer is full. 10088c2ecf20Sopenharmony_ci * 10098c2ecf20Sopenharmony_ci * User specified N Step between Index in 10108c2ecf20Sopenharmony_ci * precomputed values precomputed 10118c2ecf20Sopenharmony_ci * table 10128c2ecf20Sopenharmony_ci * 0 to 2^16-1 ---- 0 10138c2ecf20Sopenharmony_ci * 2^16 to 2^16+2^19-1 2^12 1 to 128 10148c2ecf20Sopenharmony_ci * 2^16+2^19 to 2^16+2^19+2^22-1 2^15 129 to 256 10158c2ecf20Sopenharmony_ci * 2^16+2^19+2^22 to 2^24-1 2^18 257 to 302 10168c2ecf20Sopenharmony_ci * 10178c2ecf20Sopenharmony_ci * 10188c2ecf20Sopenharmony_ci * For example, the LFSR values in the second range are computed for 2^16, 10198c2ecf20Sopenharmony_ci * 2^16+2^12, ... , 2^19-2^16, 2^19 and stored in the table at indicies 10208c2ecf20Sopenharmony_ci * 1, 2,..., 127, 128. 10218c2ecf20Sopenharmony_ci * 10228c2ecf20Sopenharmony_ci * The 24 bit LFSR value for the nth number in the sequence can be 10238c2ecf20Sopenharmony_ci * calculated using the following code: 10248c2ecf20Sopenharmony_ci * 10258c2ecf20Sopenharmony_ci * #define size 24 10268c2ecf20Sopenharmony_ci * int calculate_lfsr(int n) 10278c2ecf20Sopenharmony_ci * { 10288c2ecf20Sopenharmony_ci * int i; 10298c2ecf20Sopenharmony_ci * unsigned int newlfsr0; 10308c2ecf20Sopenharmony_ci * unsigned int lfsr = 0xFFFFFF; 10318c2ecf20Sopenharmony_ci * unsigned int howmany = n; 10328c2ecf20Sopenharmony_ci * 10338c2ecf20Sopenharmony_ci * for (i = 2; i < howmany + 2; i++) { 10348c2ecf20Sopenharmony_ci * newlfsr0 = (((lfsr >> (size - 1 - 0)) & 1) ^ 10358c2ecf20Sopenharmony_ci * ((lfsr >> (size - 1 - 1)) & 1) ^ 10368c2ecf20Sopenharmony_ci * (((lfsr >> (size - 1 - 6)) & 1) ^ 10378c2ecf20Sopenharmony_ci * ((lfsr >> (size - 1 - 23)) & 1))); 10388c2ecf20Sopenharmony_ci * 10398c2ecf20Sopenharmony_ci * lfsr >>= 1; 10408c2ecf20Sopenharmony_ci * lfsr = lfsr | (newlfsr0 << (size - 1)); 10418c2ecf20Sopenharmony_ci * } 10428c2ecf20Sopenharmony_ci * return lfsr; 10438c2ecf20Sopenharmony_ci * } 10448c2ecf20Sopenharmony_ci */ 10458c2ecf20Sopenharmony_ci 10468c2ecf20Sopenharmony_ci#define V2_16 (0x1 << 16) 10478c2ecf20Sopenharmony_ci#define V2_19 (0x1 << 19) 10488c2ecf20Sopenharmony_ci#define V2_22 (0x1 << 22) 10498c2ecf20Sopenharmony_ci 10508c2ecf20Sopenharmony_cistatic int calculate_lfsr(int n) 10518c2ecf20Sopenharmony_ci{ 10528c2ecf20Sopenharmony_ci /* 10538c2ecf20Sopenharmony_ci * The ranges and steps are in powers of 2 so the calculations 10548c2ecf20Sopenharmony_ci * can be done using shifts rather then divide. 10558c2ecf20Sopenharmony_ci */ 10568c2ecf20Sopenharmony_ci int index; 10578c2ecf20Sopenharmony_ci 10588c2ecf20Sopenharmony_ci if ((n >> 16) == 0) 10598c2ecf20Sopenharmony_ci index = 0; 10608c2ecf20Sopenharmony_ci else if (((n - V2_16) >> 19) == 0) 10618c2ecf20Sopenharmony_ci index = ((n - V2_16) >> 12) + 1; 10628c2ecf20Sopenharmony_ci else if (((n - V2_16 - V2_19) >> 22) == 0) 10638c2ecf20Sopenharmony_ci index = ((n - V2_16 - V2_19) >> 15 ) + 1 + 128; 10648c2ecf20Sopenharmony_ci else if (((n - V2_16 - V2_19 - V2_22) >> 24) == 0) 10658c2ecf20Sopenharmony_ci index = ((n - V2_16 - V2_19 - V2_22) >> 18 ) + 1 + 256; 10668c2ecf20Sopenharmony_ci else 10678c2ecf20Sopenharmony_ci index = ENTRIES-1; 10688c2ecf20Sopenharmony_ci 10698c2ecf20Sopenharmony_ci /* make sure index is valid */ 10708c2ecf20Sopenharmony_ci if ((index >= ENTRIES) || (index < 0)) 10718c2ecf20Sopenharmony_ci index = ENTRIES-1; 10728c2ecf20Sopenharmony_ci 10738c2ecf20Sopenharmony_ci return initial_lfsr[index]; 10748c2ecf20Sopenharmony_ci} 10758c2ecf20Sopenharmony_ci 10768c2ecf20Sopenharmony_cistatic int pm_rtas_activate_spu_profiling(u32 node) 10778c2ecf20Sopenharmony_ci{ 10788c2ecf20Sopenharmony_ci int ret, i; 10798c2ecf20Sopenharmony_ci struct pm_signal pm_signal_local[NUM_SPUS_PER_NODE]; 10808c2ecf20Sopenharmony_ci 10818c2ecf20Sopenharmony_ci /* 10828c2ecf20Sopenharmony_ci * Set up the rtas call to configure the debug bus to 10838c2ecf20Sopenharmony_ci * route the SPU PCs. Setup the pm_signal for each SPU 10848c2ecf20Sopenharmony_ci */ 10858c2ecf20Sopenharmony_ci for (i = 0; i < ARRAY_SIZE(pm_signal_local); i++) { 10868c2ecf20Sopenharmony_ci pm_signal_local[i].cpu = node; 10878c2ecf20Sopenharmony_ci pm_signal_local[i].signal_group = 41; 10888c2ecf20Sopenharmony_ci /* spu i on word (i/2) */ 10898c2ecf20Sopenharmony_ci pm_signal_local[i].bus_word = 1 << i / 2; 10908c2ecf20Sopenharmony_ci /* spu i */ 10918c2ecf20Sopenharmony_ci pm_signal_local[i].sub_unit = i; 10928c2ecf20Sopenharmony_ci pm_signal_local[i].bit = 63; 10938c2ecf20Sopenharmony_ci } 10948c2ecf20Sopenharmony_ci 10958c2ecf20Sopenharmony_ci ret = rtas_ibm_cbe_perftools(SUBFUNC_ACTIVATE, 10968c2ecf20Sopenharmony_ci PASSTHRU_ENABLE, pm_signal_local, 10978c2ecf20Sopenharmony_ci (ARRAY_SIZE(pm_signal_local) 10988c2ecf20Sopenharmony_ci * sizeof(struct pm_signal))); 10998c2ecf20Sopenharmony_ci 11008c2ecf20Sopenharmony_ci if (unlikely(ret)) { 11018c2ecf20Sopenharmony_ci printk(KERN_WARNING "%s: rtas returned: %d\n", 11028c2ecf20Sopenharmony_ci __func__, ret); 11038c2ecf20Sopenharmony_ci return -EIO; 11048c2ecf20Sopenharmony_ci } 11058c2ecf20Sopenharmony_ci 11068c2ecf20Sopenharmony_ci return 0; 11078c2ecf20Sopenharmony_ci} 11088c2ecf20Sopenharmony_ci 11098c2ecf20Sopenharmony_ci#ifdef CONFIG_CPU_FREQ 11108c2ecf20Sopenharmony_cistatic int 11118c2ecf20Sopenharmony_cioprof_cpufreq_notify(struct notifier_block *nb, unsigned long val, void *data) 11128c2ecf20Sopenharmony_ci{ 11138c2ecf20Sopenharmony_ci int ret = 0; 11148c2ecf20Sopenharmony_ci struct cpufreq_freqs *frq = data; 11158c2ecf20Sopenharmony_ci if ((val == CPUFREQ_PRECHANGE && frq->old < frq->new) || 11168c2ecf20Sopenharmony_ci (val == CPUFREQ_POSTCHANGE && frq->old > frq->new)) 11178c2ecf20Sopenharmony_ci set_spu_profiling_frequency(frq->new, spu_cycle_reset); 11188c2ecf20Sopenharmony_ci return ret; 11198c2ecf20Sopenharmony_ci} 11208c2ecf20Sopenharmony_ci 11218c2ecf20Sopenharmony_cistatic struct notifier_block cpu_freq_notifier_block = { 11228c2ecf20Sopenharmony_ci .notifier_call = oprof_cpufreq_notify 11238c2ecf20Sopenharmony_ci}; 11248c2ecf20Sopenharmony_ci#endif 11258c2ecf20Sopenharmony_ci 11268c2ecf20Sopenharmony_ci/* 11278c2ecf20Sopenharmony_ci * Note the generic OProfile stop calls do not support returning 11288c2ecf20Sopenharmony_ci * an error on stop. Hence, will not return an error if the FW 11298c2ecf20Sopenharmony_ci * calls fail on stop. Failure to reset the debug bus is not an issue. 11308c2ecf20Sopenharmony_ci * Failure to disable the SPU profiling is not an issue. The FW calls 11318c2ecf20Sopenharmony_ci * to enable the performance counters and debug bus will work even if 11328c2ecf20Sopenharmony_ci * the hardware was not cleanly reset. 11338c2ecf20Sopenharmony_ci */ 11348c2ecf20Sopenharmony_cistatic void cell_global_stop_spu_cycles(void) 11358c2ecf20Sopenharmony_ci{ 11368c2ecf20Sopenharmony_ci int subfunc, rtn_value; 11378c2ecf20Sopenharmony_ci unsigned int lfsr_value; 11388c2ecf20Sopenharmony_ci int cpu; 11398c2ecf20Sopenharmony_ci 11408c2ecf20Sopenharmony_ci oprofile_running = 0; 11418c2ecf20Sopenharmony_ci smp_wmb(); 11428c2ecf20Sopenharmony_ci 11438c2ecf20Sopenharmony_ci#ifdef CONFIG_CPU_FREQ 11448c2ecf20Sopenharmony_ci cpufreq_unregister_notifier(&cpu_freq_notifier_block, 11458c2ecf20Sopenharmony_ci CPUFREQ_TRANSITION_NOTIFIER); 11468c2ecf20Sopenharmony_ci#endif 11478c2ecf20Sopenharmony_ci 11488c2ecf20Sopenharmony_ci for_each_online_cpu(cpu) { 11498c2ecf20Sopenharmony_ci if (cbe_get_hw_thread_id(cpu)) 11508c2ecf20Sopenharmony_ci continue; 11518c2ecf20Sopenharmony_ci 11528c2ecf20Sopenharmony_ci subfunc = 3; /* 11538c2ecf20Sopenharmony_ci * 2 - activate SPU tracing, 11548c2ecf20Sopenharmony_ci * 3 - deactivate 11558c2ecf20Sopenharmony_ci */ 11568c2ecf20Sopenharmony_ci lfsr_value = 0x8f100000; 11578c2ecf20Sopenharmony_ci 11588c2ecf20Sopenharmony_ci rtn_value = rtas_call(spu_rtas_token, 3, 1, NULL, 11598c2ecf20Sopenharmony_ci subfunc, cbe_cpu_to_node(cpu), 11608c2ecf20Sopenharmony_ci lfsr_value); 11618c2ecf20Sopenharmony_ci 11628c2ecf20Sopenharmony_ci if (unlikely(rtn_value != 0)) { 11638c2ecf20Sopenharmony_ci printk(KERN_ERR 11648c2ecf20Sopenharmony_ci "%s: rtas call ibm,cbe-spu-perftools " \ 11658c2ecf20Sopenharmony_ci "failed, return = %d\n", 11668c2ecf20Sopenharmony_ci __func__, rtn_value); 11678c2ecf20Sopenharmony_ci } 11688c2ecf20Sopenharmony_ci 11698c2ecf20Sopenharmony_ci /* Deactivate the signals */ 11708c2ecf20Sopenharmony_ci pm_rtas_reset_signals(cbe_cpu_to_node(cpu)); 11718c2ecf20Sopenharmony_ci } 11728c2ecf20Sopenharmony_ci 11738c2ecf20Sopenharmony_ci stop_spu_profiling_cycles(); 11748c2ecf20Sopenharmony_ci} 11758c2ecf20Sopenharmony_ci 11768c2ecf20Sopenharmony_cistatic void cell_global_stop_spu_events(void) 11778c2ecf20Sopenharmony_ci{ 11788c2ecf20Sopenharmony_ci int cpu; 11798c2ecf20Sopenharmony_ci oprofile_running = 0; 11808c2ecf20Sopenharmony_ci 11818c2ecf20Sopenharmony_ci stop_spu_profiling_events(); 11828c2ecf20Sopenharmony_ci smp_wmb(); 11838c2ecf20Sopenharmony_ci 11848c2ecf20Sopenharmony_ci for_each_online_cpu(cpu) { 11858c2ecf20Sopenharmony_ci if (cbe_get_hw_thread_id(cpu)) 11868c2ecf20Sopenharmony_ci continue; 11878c2ecf20Sopenharmony_ci 11888c2ecf20Sopenharmony_ci cbe_sync_irq(cbe_cpu_to_node(cpu)); 11898c2ecf20Sopenharmony_ci /* Stop the counters */ 11908c2ecf20Sopenharmony_ci cbe_disable_pm(cpu); 11918c2ecf20Sopenharmony_ci cbe_write_pm07_control(cpu, 0, 0); 11928c2ecf20Sopenharmony_ci 11938c2ecf20Sopenharmony_ci /* Deactivate the signals */ 11948c2ecf20Sopenharmony_ci pm_rtas_reset_signals(cbe_cpu_to_node(cpu)); 11958c2ecf20Sopenharmony_ci 11968c2ecf20Sopenharmony_ci /* Deactivate interrupts */ 11978c2ecf20Sopenharmony_ci cbe_disable_pm_interrupts(cpu); 11988c2ecf20Sopenharmony_ci } 11998c2ecf20Sopenharmony_ci del_timer_sync(&timer_spu_event_swap); 12008c2ecf20Sopenharmony_ci} 12018c2ecf20Sopenharmony_ci 12028c2ecf20Sopenharmony_cistatic void cell_global_stop_ppu(void) 12038c2ecf20Sopenharmony_ci{ 12048c2ecf20Sopenharmony_ci int cpu; 12058c2ecf20Sopenharmony_ci 12068c2ecf20Sopenharmony_ci /* 12078c2ecf20Sopenharmony_ci * This routine will be called once for the system. 12088c2ecf20Sopenharmony_ci * There is one performance monitor per node, so we 12098c2ecf20Sopenharmony_ci * only need to perform this function once per node. 12108c2ecf20Sopenharmony_ci */ 12118c2ecf20Sopenharmony_ci del_timer_sync(&timer_virt_cntr); 12128c2ecf20Sopenharmony_ci oprofile_running = 0; 12138c2ecf20Sopenharmony_ci smp_wmb(); 12148c2ecf20Sopenharmony_ci 12158c2ecf20Sopenharmony_ci for_each_online_cpu(cpu) { 12168c2ecf20Sopenharmony_ci if (cbe_get_hw_thread_id(cpu)) 12178c2ecf20Sopenharmony_ci continue; 12188c2ecf20Sopenharmony_ci 12198c2ecf20Sopenharmony_ci cbe_sync_irq(cbe_cpu_to_node(cpu)); 12208c2ecf20Sopenharmony_ci /* Stop the counters */ 12218c2ecf20Sopenharmony_ci cbe_disable_pm(cpu); 12228c2ecf20Sopenharmony_ci 12238c2ecf20Sopenharmony_ci /* Deactivate the signals */ 12248c2ecf20Sopenharmony_ci pm_rtas_reset_signals(cbe_cpu_to_node(cpu)); 12258c2ecf20Sopenharmony_ci 12268c2ecf20Sopenharmony_ci /* Deactivate interrupts */ 12278c2ecf20Sopenharmony_ci cbe_disable_pm_interrupts(cpu); 12288c2ecf20Sopenharmony_ci } 12298c2ecf20Sopenharmony_ci} 12308c2ecf20Sopenharmony_ci 12318c2ecf20Sopenharmony_cistatic void cell_global_stop(void) 12328c2ecf20Sopenharmony_ci{ 12338c2ecf20Sopenharmony_ci if (profiling_mode == PPU_PROFILING) 12348c2ecf20Sopenharmony_ci cell_global_stop_ppu(); 12358c2ecf20Sopenharmony_ci else if (profiling_mode == SPU_PROFILING_EVENTS) 12368c2ecf20Sopenharmony_ci cell_global_stop_spu_events(); 12378c2ecf20Sopenharmony_ci else 12388c2ecf20Sopenharmony_ci cell_global_stop_spu_cycles(); 12398c2ecf20Sopenharmony_ci} 12408c2ecf20Sopenharmony_ci 12418c2ecf20Sopenharmony_cistatic int cell_global_start_spu_cycles(struct op_counter_config *ctr) 12428c2ecf20Sopenharmony_ci{ 12438c2ecf20Sopenharmony_ci int subfunc; 12448c2ecf20Sopenharmony_ci unsigned int lfsr_value; 12458c2ecf20Sopenharmony_ci int cpu; 12468c2ecf20Sopenharmony_ci int ret; 12478c2ecf20Sopenharmony_ci int rtas_error; 12488c2ecf20Sopenharmony_ci unsigned int cpu_khzfreq = 0; 12498c2ecf20Sopenharmony_ci 12508c2ecf20Sopenharmony_ci /* The SPU profiling uses time-based profiling based on 12518c2ecf20Sopenharmony_ci * cpu frequency, so if configured with the CPU_FREQ 12528c2ecf20Sopenharmony_ci * option, we should detect frequency changes and react 12538c2ecf20Sopenharmony_ci * accordingly. 12548c2ecf20Sopenharmony_ci */ 12558c2ecf20Sopenharmony_ci#ifdef CONFIG_CPU_FREQ 12568c2ecf20Sopenharmony_ci ret = cpufreq_register_notifier(&cpu_freq_notifier_block, 12578c2ecf20Sopenharmony_ci CPUFREQ_TRANSITION_NOTIFIER); 12588c2ecf20Sopenharmony_ci if (ret < 0) 12598c2ecf20Sopenharmony_ci /* this is not a fatal error */ 12608c2ecf20Sopenharmony_ci printk(KERN_ERR "CPU freq change registration failed: %d\n", 12618c2ecf20Sopenharmony_ci ret); 12628c2ecf20Sopenharmony_ci 12638c2ecf20Sopenharmony_ci else 12648c2ecf20Sopenharmony_ci cpu_khzfreq = cpufreq_quick_get(smp_processor_id()); 12658c2ecf20Sopenharmony_ci#endif 12668c2ecf20Sopenharmony_ci 12678c2ecf20Sopenharmony_ci set_spu_profiling_frequency(cpu_khzfreq, spu_cycle_reset); 12688c2ecf20Sopenharmony_ci 12698c2ecf20Sopenharmony_ci for_each_online_cpu(cpu) { 12708c2ecf20Sopenharmony_ci if (cbe_get_hw_thread_id(cpu)) 12718c2ecf20Sopenharmony_ci continue; 12728c2ecf20Sopenharmony_ci 12738c2ecf20Sopenharmony_ci /* 12748c2ecf20Sopenharmony_ci * Setup SPU cycle-based profiling. 12758c2ecf20Sopenharmony_ci * Set perf_mon_control bit 0 to a zero before 12768c2ecf20Sopenharmony_ci * enabling spu collection hardware. 12778c2ecf20Sopenharmony_ci */ 12788c2ecf20Sopenharmony_ci cbe_write_pm(cpu, pm_control, 0); 12798c2ecf20Sopenharmony_ci 12808c2ecf20Sopenharmony_ci if (spu_cycle_reset > MAX_SPU_COUNT) 12818c2ecf20Sopenharmony_ci /* use largest possible value */ 12828c2ecf20Sopenharmony_ci lfsr_value = calculate_lfsr(MAX_SPU_COUNT-1); 12838c2ecf20Sopenharmony_ci else 12848c2ecf20Sopenharmony_ci lfsr_value = calculate_lfsr(spu_cycle_reset); 12858c2ecf20Sopenharmony_ci 12868c2ecf20Sopenharmony_ci /* must use a non zero value. Zero disables data collection. */ 12878c2ecf20Sopenharmony_ci if (lfsr_value == 0) 12888c2ecf20Sopenharmony_ci lfsr_value = calculate_lfsr(1); 12898c2ecf20Sopenharmony_ci 12908c2ecf20Sopenharmony_ci lfsr_value = lfsr_value << 8; /* shift lfsr to correct 12918c2ecf20Sopenharmony_ci * register location 12928c2ecf20Sopenharmony_ci */ 12938c2ecf20Sopenharmony_ci 12948c2ecf20Sopenharmony_ci /* debug bus setup */ 12958c2ecf20Sopenharmony_ci ret = pm_rtas_activate_spu_profiling(cbe_cpu_to_node(cpu)); 12968c2ecf20Sopenharmony_ci 12978c2ecf20Sopenharmony_ci if (unlikely(ret)) { 12988c2ecf20Sopenharmony_ci rtas_error = ret; 12998c2ecf20Sopenharmony_ci goto out; 13008c2ecf20Sopenharmony_ci } 13018c2ecf20Sopenharmony_ci 13028c2ecf20Sopenharmony_ci 13038c2ecf20Sopenharmony_ci subfunc = 2; /* 2 - activate SPU tracing, 3 - deactivate */ 13048c2ecf20Sopenharmony_ci 13058c2ecf20Sopenharmony_ci /* start profiling */ 13068c2ecf20Sopenharmony_ci ret = rtas_call(spu_rtas_token, 3, 1, NULL, subfunc, 13078c2ecf20Sopenharmony_ci cbe_cpu_to_node(cpu), lfsr_value); 13088c2ecf20Sopenharmony_ci 13098c2ecf20Sopenharmony_ci if (unlikely(ret != 0)) { 13108c2ecf20Sopenharmony_ci printk(KERN_ERR 13118c2ecf20Sopenharmony_ci "%s: rtas call ibm,cbe-spu-perftools failed, " \ 13128c2ecf20Sopenharmony_ci "return = %d\n", __func__, ret); 13138c2ecf20Sopenharmony_ci rtas_error = -EIO; 13148c2ecf20Sopenharmony_ci goto out; 13158c2ecf20Sopenharmony_ci } 13168c2ecf20Sopenharmony_ci } 13178c2ecf20Sopenharmony_ci 13188c2ecf20Sopenharmony_ci rtas_error = start_spu_profiling_cycles(spu_cycle_reset); 13198c2ecf20Sopenharmony_ci if (rtas_error) 13208c2ecf20Sopenharmony_ci goto out_stop; 13218c2ecf20Sopenharmony_ci 13228c2ecf20Sopenharmony_ci oprofile_running = 1; 13238c2ecf20Sopenharmony_ci return 0; 13248c2ecf20Sopenharmony_ci 13258c2ecf20Sopenharmony_ciout_stop: 13268c2ecf20Sopenharmony_ci cell_global_stop_spu_cycles(); /* clean up the PMU/debug bus */ 13278c2ecf20Sopenharmony_ciout: 13288c2ecf20Sopenharmony_ci return rtas_error; 13298c2ecf20Sopenharmony_ci} 13308c2ecf20Sopenharmony_ci 13318c2ecf20Sopenharmony_cistatic int cell_global_start_spu_events(struct op_counter_config *ctr) 13328c2ecf20Sopenharmony_ci{ 13338c2ecf20Sopenharmony_ci int cpu; 13348c2ecf20Sopenharmony_ci u32 interrupt_mask = 0; 13358c2ecf20Sopenharmony_ci int rtn = 0; 13368c2ecf20Sopenharmony_ci 13378c2ecf20Sopenharmony_ci hdw_thread = 0; 13388c2ecf20Sopenharmony_ci 13398c2ecf20Sopenharmony_ci /* spu event profiling, uses the performance counters to generate 13408c2ecf20Sopenharmony_ci * an interrupt. The hardware is setup to store the SPU program 13418c2ecf20Sopenharmony_ci * counter into the trace array. The occurrence mode is used to 13428c2ecf20Sopenharmony_ci * enable storing data to the trace buffer. The bits are set 13438c2ecf20Sopenharmony_ci * to send/store the SPU address in the trace buffer. The debug 13448c2ecf20Sopenharmony_ci * bus must be setup to route the SPU program counter onto the 13458c2ecf20Sopenharmony_ci * debug bus. The occurrence data in the trace buffer is not used. 13468c2ecf20Sopenharmony_ci */ 13478c2ecf20Sopenharmony_ci 13488c2ecf20Sopenharmony_ci /* This routine gets called once for the system. 13498c2ecf20Sopenharmony_ci * There is one performance monitor per node, so we 13508c2ecf20Sopenharmony_ci * only need to perform this function once per node. 13518c2ecf20Sopenharmony_ci */ 13528c2ecf20Sopenharmony_ci 13538c2ecf20Sopenharmony_ci for_each_online_cpu(cpu) { 13548c2ecf20Sopenharmony_ci if (cbe_get_hw_thread_id(cpu)) 13558c2ecf20Sopenharmony_ci continue; 13568c2ecf20Sopenharmony_ci 13578c2ecf20Sopenharmony_ci /* 13588c2ecf20Sopenharmony_ci * Setup SPU event-based profiling. 13598c2ecf20Sopenharmony_ci * Set perf_mon_control bit 0 to a zero before 13608c2ecf20Sopenharmony_ci * enabling spu collection hardware. 13618c2ecf20Sopenharmony_ci * 13628c2ecf20Sopenharmony_ci * Only support one SPU event on one SPU per node. 13638c2ecf20Sopenharmony_ci */ 13648c2ecf20Sopenharmony_ci if (ctr_enabled & 1) { 13658c2ecf20Sopenharmony_ci cbe_write_ctr(cpu, 0, reset_value[0]); 13668c2ecf20Sopenharmony_ci enable_ctr(cpu, 0, pm_regs.pm07_cntrl); 13678c2ecf20Sopenharmony_ci interrupt_mask |= 13688c2ecf20Sopenharmony_ci CBE_PM_CTR_OVERFLOW_INTR(0); 13698c2ecf20Sopenharmony_ci } else { 13708c2ecf20Sopenharmony_ci /* Disable counter */ 13718c2ecf20Sopenharmony_ci cbe_write_pm07_control(cpu, 0, 0); 13728c2ecf20Sopenharmony_ci } 13738c2ecf20Sopenharmony_ci 13748c2ecf20Sopenharmony_ci cbe_get_and_clear_pm_interrupts(cpu); 13758c2ecf20Sopenharmony_ci cbe_enable_pm_interrupts(cpu, hdw_thread, interrupt_mask); 13768c2ecf20Sopenharmony_ci cbe_enable_pm(cpu); 13778c2ecf20Sopenharmony_ci 13788c2ecf20Sopenharmony_ci /* clear the trace buffer */ 13798c2ecf20Sopenharmony_ci cbe_write_pm(cpu, trace_address, 0); 13808c2ecf20Sopenharmony_ci } 13818c2ecf20Sopenharmony_ci 13828c2ecf20Sopenharmony_ci /* Start the timer to time slice collecting the event profile 13838c2ecf20Sopenharmony_ci * on each of the SPUs. Note, can collect profile on one SPU 13848c2ecf20Sopenharmony_ci * per node at a time. 13858c2ecf20Sopenharmony_ci */ 13868c2ecf20Sopenharmony_ci start_spu_event_swap(); 13878c2ecf20Sopenharmony_ci start_spu_profiling_events(); 13888c2ecf20Sopenharmony_ci oprofile_running = 1; 13898c2ecf20Sopenharmony_ci smp_wmb(); 13908c2ecf20Sopenharmony_ci 13918c2ecf20Sopenharmony_ci return rtn; 13928c2ecf20Sopenharmony_ci} 13938c2ecf20Sopenharmony_ci 13948c2ecf20Sopenharmony_cistatic int cell_global_start_ppu(struct op_counter_config *ctr) 13958c2ecf20Sopenharmony_ci{ 13968c2ecf20Sopenharmony_ci u32 cpu, i; 13978c2ecf20Sopenharmony_ci u32 interrupt_mask = 0; 13988c2ecf20Sopenharmony_ci 13998c2ecf20Sopenharmony_ci /* This routine gets called once for the system. 14008c2ecf20Sopenharmony_ci * There is one performance monitor per node, so we 14018c2ecf20Sopenharmony_ci * only need to perform this function once per node. 14028c2ecf20Sopenharmony_ci */ 14038c2ecf20Sopenharmony_ci for_each_online_cpu(cpu) { 14048c2ecf20Sopenharmony_ci if (cbe_get_hw_thread_id(cpu)) 14058c2ecf20Sopenharmony_ci continue; 14068c2ecf20Sopenharmony_ci 14078c2ecf20Sopenharmony_ci interrupt_mask = 0; 14088c2ecf20Sopenharmony_ci 14098c2ecf20Sopenharmony_ci for (i = 0; i < num_counters; ++i) { 14108c2ecf20Sopenharmony_ci if (ctr_enabled & (1 << i)) { 14118c2ecf20Sopenharmony_ci cbe_write_ctr(cpu, i, reset_value[i]); 14128c2ecf20Sopenharmony_ci enable_ctr(cpu, i, pm_regs.pm07_cntrl); 14138c2ecf20Sopenharmony_ci interrupt_mask |= CBE_PM_CTR_OVERFLOW_INTR(i); 14148c2ecf20Sopenharmony_ci } else { 14158c2ecf20Sopenharmony_ci /* Disable counter */ 14168c2ecf20Sopenharmony_ci cbe_write_pm07_control(cpu, i, 0); 14178c2ecf20Sopenharmony_ci } 14188c2ecf20Sopenharmony_ci } 14198c2ecf20Sopenharmony_ci 14208c2ecf20Sopenharmony_ci cbe_get_and_clear_pm_interrupts(cpu); 14218c2ecf20Sopenharmony_ci cbe_enable_pm_interrupts(cpu, hdw_thread, interrupt_mask); 14228c2ecf20Sopenharmony_ci cbe_enable_pm(cpu); 14238c2ecf20Sopenharmony_ci } 14248c2ecf20Sopenharmony_ci 14258c2ecf20Sopenharmony_ci virt_cntr_inter_mask = interrupt_mask; 14268c2ecf20Sopenharmony_ci oprofile_running = 1; 14278c2ecf20Sopenharmony_ci smp_wmb(); 14288c2ecf20Sopenharmony_ci 14298c2ecf20Sopenharmony_ci /* 14308c2ecf20Sopenharmony_ci * NOTE: start_virt_cntrs will result in cell_virtual_cntr() being 14318c2ecf20Sopenharmony_ci * executed which manipulates the PMU. We start the "virtual counter" 14328c2ecf20Sopenharmony_ci * here so that we do not need to synchronize access to the PMU in 14338c2ecf20Sopenharmony_ci * the above for-loop. 14348c2ecf20Sopenharmony_ci */ 14358c2ecf20Sopenharmony_ci start_virt_cntrs(); 14368c2ecf20Sopenharmony_ci 14378c2ecf20Sopenharmony_ci return 0; 14388c2ecf20Sopenharmony_ci} 14398c2ecf20Sopenharmony_ci 14408c2ecf20Sopenharmony_cistatic int cell_global_start(struct op_counter_config *ctr) 14418c2ecf20Sopenharmony_ci{ 14428c2ecf20Sopenharmony_ci if (profiling_mode == SPU_PROFILING_CYCLES) 14438c2ecf20Sopenharmony_ci return cell_global_start_spu_cycles(ctr); 14448c2ecf20Sopenharmony_ci else if (profiling_mode == SPU_PROFILING_EVENTS) 14458c2ecf20Sopenharmony_ci return cell_global_start_spu_events(ctr); 14468c2ecf20Sopenharmony_ci else 14478c2ecf20Sopenharmony_ci return cell_global_start_ppu(ctr); 14488c2ecf20Sopenharmony_ci} 14498c2ecf20Sopenharmony_ci 14508c2ecf20Sopenharmony_ci 14518c2ecf20Sopenharmony_ci/* The SPU interrupt handler 14528c2ecf20Sopenharmony_ci * 14538c2ecf20Sopenharmony_ci * SPU event profiling works as follows: 14548c2ecf20Sopenharmony_ci * The pm_signal[0] holds the one SPU event to be measured. It is routed on 14558c2ecf20Sopenharmony_ci * the debug bus using word 0 or 1. The value of pm_signal[1] and 14568c2ecf20Sopenharmony_ci * pm_signal[2] contain the necessary events to route the SPU program 14578c2ecf20Sopenharmony_ci * counter for the selected SPU onto the debug bus using words 2 and 3. 14588c2ecf20Sopenharmony_ci * The pm_interval register is setup to write the SPU PC value into the 14598c2ecf20Sopenharmony_ci * trace buffer at the maximum rate possible. The trace buffer is configured 14608c2ecf20Sopenharmony_ci * to store the PCs, wrapping when it is full. The performance counter is 14618c2ecf20Sopenharmony_ci * initialized to the max hardware count minus the number of events, N, between 14628c2ecf20Sopenharmony_ci * samples. Once the N events have occurred, a HW counter overflow occurs 14638c2ecf20Sopenharmony_ci * causing the generation of a HW counter interrupt which also stops the 14648c2ecf20Sopenharmony_ci * writing of the SPU PC values to the trace buffer. Hence the last PC 14658c2ecf20Sopenharmony_ci * written to the trace buffer is the SPU PC that we want. Unfortunately, 14668c2ecf20Sopenharmony_ci * we have to read from the beginning of the trace buffer to get to the 14678c2ecf20Sopenharmony_ci * last value written. We just hope the PPU has nothing better to do then 14688c2ecf20Sopenharmony_ci * service this interrupt. The PC for the specific SPU being profiled is 14698c2ecf20Sopenharmony_ci * extracted from the trace buffer processed and stored. The trace buffer 14708c2ecf20Sopenharmony_ci * is cleared, interrupts are cleared, the counter is reset to max - N. 14718c2ecf20Sopenharmony_ci * A kernel timer is used to periodically call the routine spu_evnt_swap() 14728c2ecf20Sopenharmony_ci * to switch to the next physical SPU in the node to profile in round robbin 14738c2ecf20Sopenharmony_ci * order. This way data is collected for all SPUs on the node. It does mean 14748c2ecf20Sopenharmony_ci * that we need to use a relatively small value of N to ensure enough samples 14758c2ecf20Sopenharmony_ci * on each SPU are collected each SPU is being profiled 1/8 of the time. 14768c2ecf20Sopenharmony_ci * It may also be necessary to use a longer sample collection period. 14778c2ecf20Sopenharmony_ci */ 14788c2ecf20Sopenharmony_cistatic void cell_handle_interrupt_spu(struct pt_regs *regs, 14798c2ecf20Sopenharmony_ci struct op_counter_config *ctr) 14808c2ecf20Sopenharmony_ci{ 14818c2ecf20Sopenharmony_ci u32 cpu, cpu_tmp; 14828c2ecf20Sopenharmony_ci u64 trace_entry; 14838c2ecf20Sopenharmony_ci u32 interrupt_mask; 14848c2ecf20Sopenharmony_ci u64 trace_buffer[2]; 14858c2ecf20Sopenharmony_ci u64 last_trace_buffer; 14868c2ecf20Sopenharmony_ci u32 sample; 14878c2ecf20Sopenharmony_ci u32 trace_addr; 14888c2ecf20Sopenharmony_ci unsigned long sample_array_lock_flags; 14898c2ecf20Sopenharmony_ci int spu_num; 14908c2ecf20Sopenharmony_ci unsigned long flags; 14918c2ecf20Sopenharmony_ci 14928c2ecf20Sopenharmony_ci /* Make sure spu event interrupt handler and spu event swap 14938c2ecf20Sopenharmony_ci * don't access the counters simultaneously. 14948c2ecf20Sopenharmony_ci */ 14958c2ecf20Sopenharmony_ci cpu = smp_processor_id(); 14968c2ecf20Sopenharmony_ci spin_lock_irqsave(&cntr_lock, flags); 14978c2ecf20Sopenharmony_ci 14988c2ecf20Sopenharmony_ci cpu_tmp = cpu; 14998c2ecf20Sopenharmony_ci cbe_disable_pm(cpu); 15008c2ecf20Sopenharmony_ci 15018c2ecf20Sopenharmony_ci interrupt_mask = cbe_get_and_clear_pm_interrupts(cpu); 15028c2ecf20Sopenharmony_ci 15038c2ecf20Sopenharmony_ci sample = 0xABCDEF; 15048c2ecf20Sopenharmony_ci trace_entry = 0xfedcba; 15058c2ecf20Sopenharmony_ci last_trace_buffer = 0xdeadbeaf; 15068c2ecf20Sopenharmony_ci 15078c2ecf20Sopenharmony_ci if ((oprofile_running == 1) && (interrupt_mask != 0)) { 15088c2ecf20Sopenharmony_ci /* disable writes to trace buff */ 15098c2ecf20Sopenharmony_ci cbe_write_pm(cpu, pm_interval, 0); 15108c2ecf20Sopenharmony_ci 15118c2ecf20Sopenharmony_ci /* only have one perf cntr being used, cntr 0 */ 15128c2ecf20Sopenharmony_ci if ((interrupt_mask & CBE_PM_CTR_OVERFLOW_INTR(0)) 15138c2ecf20Sopenharmony_ci && ctr[0].enabled) 15148c2ecf20Sopenharmony_ci /* The SPU PC values will be read 15158c2ecf20Sopenharmony_ci * from the trace buffer, reset counter 15168c2ecf20Sopenharmony_ci */ 15178c2ecf20Sopenharmony_ci 15188c2ecf20Sopenharmony_ci cbe_write_ctr(cpu, 0, reset_value[0]); 15198c2ecf20Sopenharmony_ci 15208c2ecf20Sopenharmony_ci trace_addr = cbe_read_pm(cpu, trace_address); 15218c2ecf20Sopenharmony_ci 15228c2ecf20Sopenharmony_ci while (!(trace_addr & CBE_PM_TRACE_BUF_EMPTY)) { 15238c2ecf20Sopenharmony_ci /* There is data in the trace buffer to process 15248c2ecf20Sopenharmony_ci * Read the buffer until you get to the last 15258c2ecf20Sopenharmony_ci * entry. This is the value we want. 15268c2ecf20Sopenharmony_ci */ 15278c2ecf20Sopenharmony_ci 15288c2ecf20Sopenharmony_ci cbe_read_trace_buffer(cpu, trace_buffer); 15298c2ecf20Sopenharmony_ci trace_addr = cbe_read_pm(cpu, trace_address); 15308c2ecf20Sopenharmony_ci } 15318c2ecf20Sopenharmony_ci 15328c2ecf20Sopenharmony_ci /* SPU Address 16 bit count format for 128 bit 15338c2ecf20Sopenharmony_ci * HW trace buffer is used for the SPU PC storage 15348c2ecf20Sopenharmony_ci * HDR bits 0:15 15358c2ecf20Sopenharmony_ci * SPU Addr 0 bits 16:31 15368c2ecf20Sopenharmony_ci * SPU Addr 1 bits 32:47 15378c2ecf20Sopenharmony_ci * unused bits 48:127 15388c2ecf20Sopenharmony_ci * 15398c2ecf20Sopenharmony_ci * HDR: bit4 = 1 SPU Address 0 valid 15408c2ecf20Sopenharmony_ci * HDR: bit5 = 1 SPU Address 1 valid 15418c2ecf20Sopenharmony_ci * - unfortunately, the valid bits don't seem to work 15428c2ecf20Sopenharmony_ci * 15438c2ecf20Sopenharmony_ci * Note trace_buffer[0] holds bits 0:63 of the HW 15448c2ecf20Sopenharmony_ci * trace buffer, trace_buffer[1] holds bits 64:127 15458c2ecf20Sopenharmony_ci */ 15468c2ecf20Sopenharmony_ci 15478c2ecf20Sopenharmony_ci trace_entry = trace_buffer[0] 15488c2ecf20Sopenharmony_ci & 0x00000000FFFF0000; 15498c2ecf20Sopenharmony_ci 15508c2ecf20Sopenharmony_ci /* only top 16 of the 18 bit SPU PC address 15518c2ecf20Sopenharmony_ci * is stored in trace buffer, hence shift right 15528c2ecf20Sopenharmony_ci * by 16 -2 bits */ 15538c2ecf20Sopenharmony_ci sample = trace_entry >> 14; 15548c2ecf20Sopenharmony_ci last_trace_buffer = trace_buffer[0]; 15558c2ecf20Sopenharmony_ci 15568c2ecf20Sopenharmony_ci spu_num = spu_evnt_phys_spu_indx 15578c2ecf20Sopenharmony_ci + (cbe_cpu_to_node(cpu) * NUM_SPUS_PER_NODE); 15588c2ecf20Sopenharmony_ci 15598c2ecf20Sopenharmony_ci /* make sure only one process at a time is calling 15608c2ecf20Sopenharmony_ci * spu_sync_buffer() 15618c2ecf20Sopenharmony_ci */ 15628c2ecf20Sopenharmony_ci spin_lock_irqsave(&oprof_spu_smpl_arry_lck, 15638c2ecf20Sopenharmony_ci sample_array_lock_flags); 15648c2ecf20Sopenharmony_ci spu_sync_buffer(spu_num, &sample, 1); 15658c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&oprof_spu_smpl_arry_lck, 15668c2ecf20Sopenharmony_ci sample_array_lock_flags); 15678c2ecf20Sopenharmony_ci 15688c2ecf20Sopenharmony_ci smp_wmb(); /* insure spu event buffer updates are written 15698c2ecf20Sopenharmony_ci * don't want events intermingled... */ 15708c2ecf20Sopenharmony_ci 15718c2ecf20Sopenharmony_ci /* The counters were frozen by the interrupt. 15728c2ecf20Sopenharmony_ci * Reenable the interrupt and restart the counters. 15738c2ecf20Sopenharmony_ci */ 15748c2ecf20Sopenharmony_ci cbe_write_pm(cpu, pm_interval, NUM_INTERVAL_CYC); 15758c2ecf20Sopenharmony_ci cbe_enable_pm_interrupts(cpu, hdw_thread, 15768c2ecf20Sopenharmony_ci virt_cntr_inter_mask); 15778c2ecf20Sopenharmony_ci 15788c2ecf20Sopenharmony_ci /* clear the trace buffer, re-enable writes to trace buff */ 15798c2ecf20Sopenharmony_ci cbe_write_pm(cpu, trace_address, 0); 15808c2ecf20Sopenharmony_ci cbe_write_pm(cpu, pm_interval, NUM_INTERVAL_CYC); 15818c2ecf20Sopenharmony_ci 15828c2ecf20Sopenharmony_ci /* The writes to the various performance counters only writes 15838c2ecf20Sopenharmony_ci * to a latch. The new values (interrupt setting bits, reset 15848c2ecf20Sopenharmony_ci * counter value etc.) are not copied to the actual registers 15858c2ecf20Sopenharmony_ci * until the performance monitor is enabled. In order to get 15868c2ecf20Sopenharmony_ci * this to work as desired, the performance monitor needs to 15878c2ecf20Sopenharmony_ci * be disabled while writing to the latches. This is a 15888c2ecf20Sopenharmony_ci * HW design issue. 15898c2ecf20Sopenharmony_ci */ 15908c2ecf20Sopenharmony_ci write_pm_cntrl(cpu); 15918c2ecf20Sopenharmony_ci cbe_enable_pm(cpu); 15928c2ecf20Sopenharmony_ci } 15938c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&cntr_lock, flags); 15948c2ecf20Sopenharmony_ci} 15958c2ecf20Sopenharmony_ci 15968c2ecf20Sopenharmony_cistatic void cell_handle_interrupt_ppu(struct pt_regs *regs, 15978c2ecf20Sopenharmony_ci struct op_counter_config *ctr) 15988c2ecf20Sopenharmony_ci{ 15998c2ecf20Sopenharmony_ci u32 cpu; 16008c2ecf20Sopenharmony_ci u64 pc; 16018c2ecf20Sopenharmony_ci int is_kernel; 16028c2ecf20Sopenharmony_ci unsigned long flags = 0; 16038c2ecf20Sopenharmony_ci u32 interrupt_mask; 16048c2ecf20Sopenharmony_ci int i; 16058c2ecf20Sopenharmony_ci 16068c2ecf20Sopenharmony_ci cpu = smp_processor_id(); 16078c2ecf20Sopenharmony_ci 16088c2ecf20Sopenharmony_ci /* 16098c2ecf20Sopenharmony_ci * Need to make sure the interrupt handler and the virt counter 16108c2ecf20Sopenharmony_ci * routine are not running at the same time. See the 16118c2ecf20Sopenharmony_ci * cell_virtual_cntr() routine for additional comments. 16128c2ecf20Sopenharmony_ci */ 16138c2ecf20Sopenharmony_ci spin_lock_irqsave(&cntr_lock, flags); 16148c2ecf20Sopenharmony_ci 16158c2ecf20Sopenharmony_ci /* 16168c2ecf20Sopenharmony_ci * Need to disable and reenable the performance counters 16178c2ecf20Sopenharmony_ci * to get the desired behavior from the hardware. This 16188c2ecf20Sopenharmony_ci * is hardware specific. 16198c2ecf20Sopenharmony_ci */ 16208c2ecf20Sopenharmony_ci 16218c2ecf20Sopenharmony_ci cbe_disable_pm(cpu); 16228c2ecf20Sopenharmony_ci 16238c2ecf20Sopenharmony_ci interrupt_mask = cbe_get_and_clear_pm_interrupts(cpu); 16248c2ecf20Sopenharmony_ci 16258c2ecf20Sopenharmony_ci /* 16268c2ecf20Sopenharmony_ci * If the interrupt mask has been cleared, then the virt cntr 16278c2ecf20Sopenharmony_ci * has cleared the interrupt. When the thread that generated 16288c2ecf20Sopenharmony_ci * the interrupt is restored, the data count will be restored to 16298c2ecf20Sopenharmony_ci * 0xffffff0 to cause the interrupt to be regenerated. 16308c2ecf20Sopenharmony_ci */ 16318c2ecf20Sopenharmony_ci 16328c2ecf20Sopenharmony_ci if ((oprofile_running == 1) && (interrupt_mask != 0)) { 16338c2ecf20Sopenharmony_ci pc = regs->nip; 16348c2ecf20Sopenharmony_ci is_kernel = is_kernel_addr(pc); 16358c2ecf20Sopenharmony_ci 16368c2ecf20Sopenharmony_ci for (i = 0; i < num_counters; ++i) { 16378c2ecf20Sopenharmony_ci if ((interrupt_mask & CBE_PM_CTR_OVERFLOW_INTR(i)) 16388c2ecf20Sopenharmony_ci && ctr[i].enabled) { 16398c2ecf20Sopenharmony_ci oprofile_add_ext_sample(pc, regs, i, is_kernel); 16408c2ecf20Sopenharmony_ci cbe_write_ctr(cpu, i, reset_value[i]); 16418c2ecf20Sopenharmony_ci } 16428c2ecf20Sopenharmony_ci } 16438c2ecf20Sopenharmony_ci 16448c2ecf20Sopenharmony_ci /* 16458c2ecf20Sopenharmony_ci * The counters were frozen by the interrupt. 16468c2ecf20Sopenharmony_ci * Reenable the interrupt and restart the counters. 16478c2ecf20Sopenharmony_ci * If there was a race between the interrupt handler and 16488c2ecf20Sopenharmony_ci * the virtual counter routine. The virtual counter 16498c2ecf20Sopenharmony_ci * routine may have cleared the interrupts. Hence must 16508c2ecf20Sopenharmony_ci * use the virt_cntr_inter_mask to re-enable the interrupts. 16518c2ecf20Sopenharmony_ci */ 16528c2ecf20Sopenharmony_ci cbe_enable_pm_interrupts(cpu, hdw_thread, 16538c2ecf20Sopenharmony_ci virt_cntr_inter_mask); 16548c2ecf20Sopenharmony_ci 16558c2ecf20Sopenharmony_ci /* 16568c2ecf20Sopenharmony_ci * The writes to the various performance counters only writes 16578c2ecf20Sopenharmony_ci * to a latch. The new values (interrupt setting bits, reset 16588c2ecf20Sopenharmony_ci * counter value etc.) are not copied to the actual registers 16598c2ecf20Sopenharmony_ci * until the performance monitor is enabled. In order to get 16608c2ecf20Sopenharmony_ci * this to work as desired, the performance monitor needs to 16618c2ecf20Sopenharmony_ci * be disabled while writing to the latches. This is a 16628c2ecf20Sopenharmony_ci * HW design issue. 16638c2ecf20Sopenharmony_ci */ 16648c2ecf20Sopenharmony_ci cbe_enable_pm(cpu); 16658c2ecf20Sopenharmony_ci } 16668c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&cntr_lock, flags); 16678c2ecf20Sopenharmony_ci} 16688c2ecf20Sopenharmony_ci 16698c2ecf20Sopenharmony_cistatic void cell_handle_interrupt(struct pt_regs *regs, 16708c2ecf20Sopenharmony_ci struct op_counter_config *ctr) 16718c2ecf20Sopenharmony_ci{ 16728c2ecf20Sopenharmony_ci if (profiling_mode == PPU_PROFILING) 16738c2ecf20Sopenharmony_ci cell_handle_interrupt_ppu(regs, ctr); 16748c2ecf20Sopenharmony_ci else 16758c2ecf20Sopenharmony_ci cell_handle_interrupt_spu(regs, ctr); 16768c2ecf20Sopenharmony_ci} 16778c2ecf20Sopenharmony_ci 16788c2ecf20Sopenharmony_ci/* 16798c2ecf20Sopenharmony_ci * This function is called from the generic OProfile 16808c2ecf20Sopenharmony_ci * driver. When profiling PPUs, we need to do the 16818c2ecf20Sopenharmony_ci * generic sync start; otherwise, do spu_sync_start. 16828c2ecf20Sopenharmony_ci */ 16838c2ecf20Sopenharmony_cistatic int cell_sync_start(void) 16848c2ecf20Sopenharmony_ci{ 16858c2ecf20Sopenharmony_ci if ((profiling_mode == SPU_PROFILING_CYCLES) || 16868c2ecf20Sopenharmony_ci (profiling_mode == SPU_PROFILING_EVENTS)) 16878c2ecf20Sopenharmony_ci return spu_sync_start(); 16888c2ecf20Sopenharmony_ci else 16898c2ecf20Sopenharmony_ci return DO_GENERIC_SYNC; 16908c2ecf20Sopenharmony_ci} 16918c2ecf20Sopenharmony_ci 16928c2ecf20Sopenharmony_cistatic int cell_sync_stop(void) 16938c2ecf20Sopenharmony_ci{ 16948c2ecf20Sopenharmony_ci if ((profiling_mode == SPU_PROFILING_CYCLES) || 16958c2ecf20Sopenharmony_ci (profiling_mode == SPU_PROFILING_EVENTS)) 16968c2ecf20Sopenharmony_ci return spu_sync_stop(); 16978c2ecf20Sopenharmony_ci else 16988c2ecf20Sopenharmony_ci return 1; 16998c2ecf20Sopenharmony_ci} 17008c2ecf20Sopenharmony_ci 17018c2ecf20Sopenharmony_cistruct op_powerpc_model op_model_cell = { 17028c2ecf20Sopenharmony_ci .reg_setup = cell_reg_setup, 17038c2ecf20Sopenharmony_ci .cpu_setup = cell_cpu_setup, 17048c2ecf20Sopenharmony_ci .global_start = cell_global_start, 17058c2ecf20Sopenharmony_ci .global_stop = cell_global_stop, 17068c2ecf20Sopenharmony_ci .sync_start = cell_sync_start, 17078c2ecf20Sopenharmony_ci .sync_stop = cell_sync_stop, 17088c2ecf20Sopenharmony_ci .handle_interrupt = cell_handle_interrupt, 17098c2ecf20Sopenharmony_ci}; 1710