18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * Cell Broadband Engine OProfile Support 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * (C) Copyright IBM Corporation 2006 68c2ecf20Sopenharmony_ci * 78c2ecf20Sopenharmony_ci * Author: Maynard Johnson <maynardj@us.ibm.com> 88c2ecf20Sopenharmony_ci */ 98c2ecf20Sopenharmony_ci 108c2ecf20Sopenharmony_ci/* The purpose of this file is to handle SPU event task switching 118c2ecf20Sopenharmony_ci * and to record SPU context information into the OProfile 128c2ecf20Sopenharmony_ci * event buffer. 138c2ecf20Sopenharmony_ci * 148c2ecf20Sopenharmony_ci * Additionally, the spu_sync_buffer function is provided as a helper 158c2ecf20Sopenharmony_ci * for recoding actual SPU program counter samples to the event buffer. 168c2ecf20Sopenharmony_ci */ 178c2ecf20Sopenharmony_ci#include <linux/dcookies.h> 188c2ecf20Sopenharmony_ci#include <linux/kref.h> 198c2ecf20Sopenharmony_ci#include <linux/mm.h> 208c2ecf20Sopenharmony_ci#include <linux/fs.h> 218c2ecf20Sopenharmony_ci#include <linux/file.h> 228c2ecf20Sopenharmony_ci#include <linux/module.h> 238c2ecf20Sopenharmony_ci#include <linux/notifier.h> 248c2ecf20Sopenharmony_ci#include <linux/numa.h> 258c2ecf20Sopenharmony_ci#include <linux/oprofile.h> 268c2ecf20Sopenharmony_ci#include <linux/slab.h> 278c2ecf20Sopenharmony_ci#include <linux/spinlock.h> 288c2ecf20Sopenharmony_ci#include "pr_util.h" 298c2ecf20Sopenharmony_ci 308c2ecf20Sopenharmony_ci#define RELEASE_ALL 9999 318c2ecf20Sopenharmony_ci 328c2ecf20Sopenharmony_cistatic DEFINE_SPINLOCK(buffer_lock); 338c2ecf20Sopenharmony_cistatic DEFINE_SPINLOCK(cache_lock); 348c2ecf20Sopenharmony_cistatic int num_spu_nodes; 358c2ecf20Sopenharmony_cistatic int spu_prof_num_nodes; 368c2ecf20Sopenharmony_ci 378c2ecf20Sopenharmony_cistruct spu_buffer spu_buff[MAX_NUMNODES * SPUS_PER_NODE]; 388c2ecf20Sopenharmony_cistruct delayed_work spu_work; 398c2ecf20Sopenharmony_cistatic unsigned max_spu_buff; 408c2ecf20Sopenharmony_ci 418c2ecf20Sopenharmony_cistatic void spu_buff_add(unsigned long int value, int spu) 428c2ecf20Sopenharmony_ci{ 438c2ecf20Sopenharmony_ci /* spu buff is a circular buffer. Add entries to the 448c2ecf20Sopenharmony_ci * head. Head is the index to store the next value. 458c2ecf20Sopenharmony_ci * The buffer is full when there is one available entry 468c2ecf20Sopenharmony_ci * in the queue, i.e. head and tail can't be equal. 478c2ecf20Sopenharmony_ci * That way we can tell the difference between the 488c2ecf20Sopenharmony_ci * buffer being full versus empty. 498c2ecf20Sopenharmony_ci * 508c2ecf20Sopenharmony_ci * ASSUMPTION: the buffer_lock is held when this function 518c2ecf20Sopenharmony_ci * is called to lock the buffer, head and tail. 528c2ecf20Sopenharmony_ci */ 538c2ecf20Sopenharmony_ci int full = 1; 548c2ecf20Sopenharmony_ci 558c2ecf20Sopenharmony_ci if (spu_buff[spu].head >= spu_buff[spu].tail) { 568c2ecf20Sopenharmony_ci if ((spu_buff[spu].head - spu_buff[spu].tail) 578c2ecf20Sopenharmony_ci < (max_spu_buff - 1)) 588c2ecf20Sopenharmony_ci full = 0; 598c2ecf20Sopenharmony_ci 608c2ecf20Sopenharmony_ci } else if (spu_buff[spu].tail > spu_buff[spu].head) { 618c2ecf20Sopenharmony_ci if ((spu_buff[spu].tail - spu_buff[spu].head) 628c2ecf20Sopenharmony_ci > 1) 638c2ecf20Sopenharmony_ci full = 0; 648c2ecf20Sopenharmony_ci } 658c2ecf20Sopenharmony_ci 668c2ecf20Sopenharmony_ci if (!full) { 678c2ecf20Sopenharmony_ci spu_buff[spu].buff[spu_buff[spu].head] = value; 688c2ecf20Sopenharmony_ci spu_buff[spu].head++; 698c2ecf20Sopenharmony_ci 708c2ecf20Sopenharmony_ci if (spu_buff[spu].head >= max_spu_buff) 718c2ecf20Sopenharmony_ci spu_buff[spu].head = 0; 728c2ecf20Sopenharmony_ci } else { 738c2ecf20Sopenharmony_ci /* From the user's perspective make the SPU buffer 748c2ecf20Sopenharmony_ci * size management/overflow look like we are using 758c2ecf20Sopenharmony_ci * per cpu buffers. The user uses the same 768c2ecf20Sopenharmony_ci * per cpu parameter to adjust the SPU buffer size. 778c2ecf20Sopenharmony_ci * Increment the sample_lost_overflow to inform 788c2ecf20Sopenharmony_ci * the user the buffer size needs to be increased. 798c2ecf20Sopenharmony_ci */ 808c2ecf20Sopenharmony_ci oprofile_cpu_buffer_inc_smpl_lost(); 818c2ecf20Sopenharmony_ci } 828c2ecf20Sopenharmony_ci} 838c2ecf20Sopenharmony_ci 848c2ecf20Sopenharmony_ci/* This function copies the per SPU buffers to the 858c2ecf20Sopenharmony_ci * OProfile kernel buffer. 868c2ecf20Sopenharmony_ci */ 878c2ecf20Sopenharmony_cistatic void sync_spu_buff(void) 888c2ecf20Sopenharmony_ci{ 898c2ecf20Sopenharmony_ci int spu; 908c2ecf20Sopenharmony_ci unsigned long flags; 918c2ecf20Sopenharmony_ci int curr_head; 928c2ecf20Sopenharmony_ci 938c2ecf20Sopenharmony_ci for (spu = 0; spu < num_spu_nodes; spu++) { 948c2ecf20Sopenharmony_ci /* In case there was an issue and the buffer didn't 958c2ecf20Sopenharmony_ci * get created skip it. 968c2ecf20Sopenharmony_ci */ 978c2ecf20Sopenharmony_ci if (spu_buff[spu].buff == NULL) 988c2ecf20Sopenharmony_ci continue; 998c2ecf20Sopenharmony_ci 1008c2ecf20Sopenharmony_ci /* Hold the lock to make sure the head/tail 1018c2ecf20Sopenharmony_ci * doesn't change while spu_buff_add() is 1028c2ecf20Sopenharmony_ci * deciding if the buffer is full or not. 1038c2ecf20Sopenharmony_ci * Being a little paranoid. 1048c2ecf20Sopenharmony_ci */ 1058c2ecf20Sopenharmony_ci spin_lock_irqsave(&buffer_lock, flags); 1068c2ecf20Sopenharmony_ci curr_head = spu_buff[spu].head; 1078c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&buffer_lock, flags); 1088c2ecf20Sopenharmony_ci 1098c2ecf20Sopenharmony_ci /* Transfer the current contents to the kernel buffer. 1108c2ecf20Sopenharmony_ci * data can still be added to the head of the buffer. 1118c2ecf20Sopenharmony_ci */ 1128c2ecf20Sopenharmony_ci oprofile_put_buff(spu_buff[spu].buff, 1138c2ecf20Sopenharmony_ci spu_buff[spu].tail, 1148c2ecf20Sopenharmony_ci curr_head, max_spu_buff); 1158c2ecf20Sopenharmony_ci 1168c2ecf20Sopenharmony_ci spin_lock_irqsave(&buffer_lock, flags); 1178c2ecf20Sopenharmony_ci spu_buff[spu].tail = curr_head; 1188c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&buffer_lock, flags); 1198c2ecf20Sopenharmony_ci } 1208c2ecf20Sopenharmony_ci 1218c2ecf20Sopenharmony_ci} 1228c2ecf20Sopenharmony_ci 1238c2ecf20Sopenharmony_cistatic void wq_sync_spu_buff(struct work_struct *work) 1248c2ecf20Sopenharmony_ci{ 1258c2ecf20Sopenharmony_ci /* move data from spu buffers to kernel buffer */ 1268c2ecf20Sopenharmony_ci sync_spu_buff(); 1278c2ecf20Sopenharmony_ci 1288c2ecf20Sopenharmony_ci /* only reschedule if profiling is not done */ 1298c2ecf20Sopenharmony_ci if (spu_prof_running) 1308c2ecf20Sopenharmony_ci schedule_delayed_work(&spu_work, DEFAULT_TIMER_EXPIRE); 1318c2ecf20Sopenharmony_ci} 1328c2ecf20Sopenharmony_ci 1338c2ecf20Sopenharmony_ci/* Container for caching information about an active SPU task. */ 1348c2ecf20Sopenharmony_cistruct cached_info { 1358c2ecf20Sopenharmony_ci struct vma_to_fileoffset_map *map; 1368c2ecf20Sopenharmony_ci struct spu *the_spu; /* needed to access pointer to local_store */ 1378c2ecf20Sopenharmony_ci struct kref cache_ref; 1388c2ecf20Sopenharmony_ci}; 1398c2ecf20Sopenharmony_ci 1408c2ecf20Sopenharmony_cistatic struct cached_info *spu_info[MAX_NUMNODES * 8]; 1418c2ecf20Sopenharmony_ci 1428c2ecf20Sopenharmony_cistatic void destroy_cached_info(struct kref *kref) 1438c2ecf20Sopenharmony_ci{ 1448c2ecf20Sopenharmony_ci struct cached_info *info; 1458c2ecf20Sopenharmony_ci 1468c2ecf20Sopenharmony_ci info = container_of(kref, struct cached_info, cache_ref); 1478c2ecf20Sopenharmony_ci vma_map_free(info->map); 1488c2ecf20Sopenharmony_ci kfree(info); 1498c2ecf20Sopenharmony_ci module_put(THIS_MODULE); 1508c2ecf20Sopenharmony_ci} 1518c2ecf20Sopenharmony_ci 1528c2ecf20Sopenharmony_ci/* Return the cached_info for the passed SPU number. 1538c2ecf20Sopenharmony_ci * ATTENTION: Callers are responsible for obtaining the 1548c2ecf20Sopenharmony_ci * cache_lock if needed prior to invoking this function. 1558c2ecf20Sopenharmony_ci */ 1568c2ecf20Sopenharmony_cistatic struct cached_info *get_cached_info(struct spu *the_spu, int spu_num) 1578c2ecf20Sopenharmony_ci{ 1588c2ecf20Sopenharmony_ci struct kref *ref; 1598c2ecf20Sopenharmony_ci struct cached_info *ret_info; 1608c2ecf20Sopenharmony_ci 1618c2ecf20Sopenharmony_ci if (spu_num >= num_spu_nodes) { 1628c2ecf20Sopenharmony_ci printk(KERN_ERR "SPU_PROF: " 1638c2ecf20Sopenharmony_ci "%s, line %d: Invalid index %d into spu info cache\n", 1648c2ecf20Sopenharmony_ci __func__, __LINE__, spu_num); 1658c2ecf20Sopenharmony_ci ret_info = NULL; 1668c2ecf20Sopenharmony_ci goto out; 1678c2ecf20Sopenharmony_ci } 1688c2ecf20Sopenharmony_ci if (!spu_info[spu_num] && the_spu) { 1698c2ecf20Sopenharmony_ci ref = spu_get_profile_private_kref(the_spu->ctx); 1708c2ecf20Sopenharmony_ci if (ref) { 1718c2ecf20Sopenharmony_ci spu_info[spu_num] = container_of(ref, struct cached_info, cache_ref); 1728c2ecf20Sopenharmony_ci kref_get(&spu_info[spu_num]->cache_ref); 1738c2ecf20Sopenharmony_ci } 1748c2ecf20Sopenharmony_ci } 1758c2ecf20Sopenharmony_ci 1768c2ecf20Sopenharmony_ci ret_info = spu_info[spu_num]; 1778c2ecf20Sopenharmony_ci out: 1788c2ecf20Sopenharmony_ci return ret_info; 1798c2ecf20Sopenharmony_ci} 1808c2ecf20Sopenharmony_ci 1818c2ecf20Sopenharmony_ci 1828c2ecf20Sopenharmony_ci/* Looks for cached info for the passed spu. If not found, the 1838c2ecf20Sopenharmony_ci * cached info is created for the passed spu. 1848c2ecf20Sopenharmony_ci * Returns 0 for success; otherwise, -1 for error. 1858c2ecf20Sopenharmony_ci */ 1868c2ecf20Sopenharmony_cistatic int 1878c2ecf20Sopenharmony_ciprepare_cached_spu_info(struct spu *spu, unsigned long objectId) 1888c2ecf20Sopenharmony_ci{ 1898c2ecf20Sopenharmony_ci unsigned long flags; 1908c2ecf20Sopenharmony_ci struct vma_to_fileoffset_map *new_map; 1918c2ecf20Sopenharmony_ci int retval = 0; 1928c2ecf20Sopenharmony_ci struct cached_info *info; 1938c2ecf20Sopenharmony_ci 1948c2ecf20Sopenharmony_ci /* We won't bother getting cache_lock here since 1958c2ecf20Sopenharmony_ci * don't do anything with the cached_info that's returned. 1968c2ecf20Sopenharmony_ci */ 1978c2ecf20Sopenharmony_ci info = get_cached_info(spu, spu->number); 1988c2ecf20Sopenharmony_ci 1998c2ecf20Sopenharmony_ci if (info) { 2008c2ecf20Sopenharmony_ci pr_debug("Found cached SPU info.\n"); 2018c2ecf20Sopenharmony_ci goto out; 2028c2ecf20Sopenharmony_ci } 2038c2ecf20Sopenharmony_ci 2048c2ecf20Sopenharmony_ci /* Create cached_info and set spu_info[spu->number] to point to it. 2058c2ecf20Sopenharmony_ci * spu->number is a system-wide value, not a per-node value. 2068c2ecf20Sopenharmony_ci */ 2078c2ecf20Sopenharmony_ci info = kzalloc(sizeof(*info), GFP_KERNEL); 2088c2ecf20Sopenharmony_ci if (!info) { 2098c2ecf20Sopenharmony_ci printk(KERN_ERR "SPU_PROF: " 2108c2ecf20Sopenharmony_ci "%s, line %d: create vma_map failed\n", 2118c2ecf20Sopenharmony_ci __func__, __LINE__); 2128c2ecf20Sopenharmony_ci retval = -ENOMEM; 2138c2ecf20Sopenharmony_ci goto err_alloc; 2148c2ecf20Sopenharmony_ci } 2158c2ecf20Sopenharmony_ci new_map = create_vma_map(spu, objectId); 2168c2ecf20Sopenharmony_ci if (!new_map) { 2178c2ecf20Sopenharmony_ci printk(KERN_ERR "SPU_PROF: " 2188c2ecf20Sopenharmony_ci "%s, line %d: create vma_map failed\n", 2198c2ecf20Sopenharmony_ci __func__, __LINE__); 2208c2ecf20Sopenharmony_ci retval = -ENOMEM; 2218c2ecf20Sopenharmony_ci goto err_alloc; 2228c2ecf20Sopenharmony_ci } 2238c2ecf20Sopenharmony_ci 2248c2ecf20Sopenharmony_ci pr_debug("Created vma_map\n"); 2258c2ecf20Sopenharmony_ci info->map = new_map; 2268c2ecf20Sopenharmony_ci info->the_spu = spu; 2278c2ecf20Sopenharmony_ci kref_init(&info->cache_ref); 2288c2ecf20Sopenharmony_ci spin_lock_irqsave(&cache_lock, flags); 2298c2ecf20Sopenharmony_ci spu_info[spu->number] = info; 2308c2ecf20Sopenharmony_ci /* Increment count before passing off ref to SPUFS. */ 2318c2ecf20Sopenharmony_ci kref_get(&info->cache_ref); 2328c2ecf20Sopenharmony_ci 2338c2ecf20Sopenharmony_ci /* We increment the module refcount here since SPUFS is 2348c2ecf20Sopenharmony_ci * responsible for the final destruction of the cached_info, 2358c2ecf20Sopenharmony_ci * and it must be able to access the destroy_cached_info() 2368c2ecf20Sopenharmony_ci * function defined in the OProfile module. We decrement 2378c2ecf20Sopenharmony_ci * the module refcount in destroy_cached_info. 2388c2ecf20Sopenharmony_ci */ 2398c2ecf20Sopenharmony_ci try_module_get(THIS_MODULE); 2408c2ecf20Sopenharmony_ci spu_set_profile_private_kref(spu->ctx, &info->cache_ref, 2418c2ecf20Sopenharmony_ci destroy_cached_info); 2428c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&cache_lock, flags); 2438c2ecf20Sopenharmony_ci goto out; 2448c2ecf20Sopenharmony_ci 2458c2ecf20Sopenharmony_cierr_alloc: 2468c2ecf20Sopenharmony_ci kfree(info); 2478c2ecf20Sopenharmony_ciout: 2488c2ecf20Sopenharmony_ci return retval; 2498c2ecf20Sopenharmony_ci} 2508c2ecf20Sopenharmony_ci 2518c2ecf20Sopenharmony_ci/* 2528c2ecf20Sopenharmony_ci * NOTE: The caller is responsible for locking the 2538c2ecf20Sopenharmony_ci * cache_lock prior to calling this function. 2548c2ecf20Sopenharmony_ci */ 2558c2ecf20Sopenharmony_cistatic int release_cached_info(int spu_index) 2568c2ecf20Sopenharmony_ci{ 2578c2ecf20Sopenharmony_ci int index, end; 2588c2ecf20Sopenharmony_ci 2598c2ecf20Sopenharmony_ci if (spu_index == RELEASE_ALL) { 2608c2ecf20Sopenharmony_ci end = num_spu_nodes; 2618c2ecf20Sopenharmony_ci index = 0; 2628c2ecf20Sopenharmony_ci } else { 2638c2ecf20Sopenharmony_ci if (spu_index >= num_spu_nodes) { 2648c2ecf20Sopenharmony_ci printk(KERN_ERR "SPU_PROF: " 2658c2ecf20Sopenharmony_ci "%s, line %d: " 2668c2ecf20Sopenharmony_ci "Invalid index %d into spu info cache\n", 2678c2ecf20Sopenharmony_ci __func__, __LINE__, spu_index); 2688c2ecf20Sopenharmony_ci goto out; 2698c2ecf20Sopenharmony_ci } 2708c2ecf20Sopenharmony_ci end = spu_index + 1; 2718c2ecf20Sopenharmony_ci index = spu_index; 2728c2ecf20Sopenharmony_ci } 2738c2ecf20Sopenharmony_ci for (; index < end; index++) { 2748c2ecf20Sopenharmony_ci if (spu_info[index]) { 2758c2ecf20Sopenharmony_ci kref_put(&spu_info[index]->cache_ref, 2768c2ecf20Sopenharmony_ci destroy_cached_info); 2778c2ecf20Sopenharmony_ci spu_info[index] = NULL; 2788c2ecf20Sopenharmony_ci } 2798c2ecf20Sopenharmony_ci } 2808c2ecf20Sopenharmony_ci 2818c2ecf20Sopenharmony_ciout: 2828c2ecf20Sopenharmony_ci return 0; 2838c2ecf20Sopenharmony_ci} 2848c2ecf20Sopenharmony_ci 2858c2ecf20Sopenharmony_ci/* The source code for fast_get_dcookie was "borrowed" 2868c2ecf20Sopenharmony_ci * from drivers/oprofile/buffer_sync.c. 2878c2ecf20Sopenharmony_ci */ 2888c2ecf20Sopenharmony_ci 2898c2ecf20Sopenharmony_ci/* Optimisation. We can manage without taking the dcookie sem 2908c2ecf20Sopenharmony_ci * because we cannot reach this code without at least one 2918c2ecf20Sopenharmony_ci * dcookie user still being registered (namely, the reader 2928c2ecf20Sopenharmony_ci * of the event buffer). 2938c2ecf20Sopenharmony_ci */ 2948c2ecf20Sopenharmony_cistatic inline unsigned long fast_get_dcookie(const struct path *path) 2958c2ecf20Sopenharmony_ci{ 2968c2ecf20Sopenharmony_ci unsigned long cookie; 2978c2ecf20Sopenharmony_ci 2988c2ecf20Sopenharmony_ci if (path->dentry->d_flags & DCACHE_COOKIE) 2998c2ecf20Sopenharmony_ci return (unsigned long)path->dentry; 3008c2ecf20Sopenharmony_ci get_dcookie(path, &cookie); 3018c2ecf20Sopenharmony_ci return cookie; 3028c2ecf20Sopenharmony_ci} 3038c2ecf20Sopenharmony_ci 3048c2ecf20Sopenharmony_ci/* Look up the dcookie for the task's mm->exe_file, 3058c2ecf20Sopenharmony_ci * which corresponds loosely to "application name". Also, determine 3068c2ecf20Sopenharmony_ci * the offset for the SPU ELF object. If computed offset is 3078c2ecf20Sopenharmony_ci * non-zero, it implies an embedded SPU object; otherwise, it's a 3088c2ecf20Sopenharmony_ci * separate SPU binary, in which case we retrieve it's dcookie. 3098c2ecf20Sopenharmony_ci * For the embedded case, we must determine if SPU ELF is embedded 3108c2ecf20Sopenharmony_ci * in the executable application or another file (i.e., shared lib). 3118c2ecf20Sopenharmony_ci * If embedded in a shared lib, we must get the dcookie and return 3128c2ecf20Sopenharmony_ci * that to the caller. 3138c2ecf20Sopenharmony_ci */ 3148c2ecf20Sopenharmony_cistatic unsigned long 3158c2ecf20Sopenharmony_ciget_exec_dcookie_and_offset(struct spu *spu, unsigned int *offsetp, 3168c2ecf20Sopenharmony_ci unsigned long *spu_bin_dcookie, 3178c2ecf20Sopenharmony_ci unsigned long spu_ref) 3188c2ecf20Sopenharmony_ci{ 3198c2ecf20Sopenharmony_ci unsigned long app_cookie = 0; 3208c2ecf20Sopenharmony_ci unsigned int my_offset = 0; 3218c2ecf20Sopenharmony_ci struct vm_area_struct *vma; 3228c2ecf20Sopenharmony_ci struct file *exe_file; 3238c2ecf20Sopenharmony_ci struct mm_struct *mm = spu->mm; 3248c2ecf20Sopenharmony_ci 3258c2ecf20Sopenharmony_ci if (!mm) 3268c2ecf20Sopenharmony_ci goto out; 3278c2ecf20Sopenharmony_ci 3288c2ecf20Sopenharmony_ci exe_file = get_mm_exe_file(mm); 3298c2ecf20Sopenharmony_ci if (exe_file) { 3308c2ecf20Sopenharmony_ci app_cookie = fast_get_dcookie(&exe_file->f_path); 3318c2ecf20Sopenharmony_ci pr_debug("got dcookie for %pD\n", exe_file); 3328c2ecf20Sopenharmony_ci fput(exe_file); 3338c2ecf20Sopenharmony_ci } 3348c2ecf20Sopenharmony_ci 3358c2ecf20Sopenharmony_ci mmap_read_lock(mm); 3368c2ecf20Sopenharmony_ci for (vma = mm->mmap; vma; vma = vma->vm_next) { 3378c2ecf20Sopenharmony_ci if (vma->vm_start > spu_ref || vma->vm_end <= spu_ref) 3388c2ecf20Sopenharmony_ci continue; 3398c2ecf20Sopenharmony_ci my_offset = spu_ref - vma->vm_start; 3408c2ecf20Sopenharmony_ci if (!vma->vm_file) 3418c2ecf20Sopenharmony_ci goto fail_no_image_cookie; 3428c2ecf20Sopenharmony_ci 3438c2ecf20Sopenharmony_ci pr_debug("Found spu ELF at %X(object-id:%lx) for file %pD\n", 3448c2ecf20Sopenharmony_ci my_offset, spu_ref, vma->vm_file); 3458c2ecf20Sopenharmony_ci *offsetp = my_offset; 3468c2ecf20Sopenharmony_ci break; 3478c2ecf20Sopenharmony_ci } 3488c2ecf20Sopenharmony_ci 3498c2ecf20Sopenharmony_ci *spu_bin_dcookie = fast_get_dcookie(&vma->vm_file->f_path); 3508c2ecf20Sopenharmony_ci pr_debug("got dcookie for %pD\n", vma->vm_file); 3518c2ecf20Sopenharmony_ci 3528c2ecf20Sopenharmony_ci mmap_read_unlock(mm); 3538c2ecf20Sopenharmony_ci 3548c2ecf20Sopenharmony_ciout: 3558c2ecf20Sopenharmony_ci return app_cookie; 3568c2ecf20Sopenharmony_ci 3578c2ecf20Sopenharmony_cifail_no_image_cookie: 3588c2ecf20Sopenharmony_ci mmap_read_unlock(mm); 3598c2ecf20Sopenharmony_ci 3608c2ecf20Sopenharmony_ci printk(KERN_ERR "SPU_PROF: " 3618c2ecf20Sopenharmony_ci "%s, line %d: Cannot find dcookie for SPU binary\n", 3628c2ecf20Sopenharmony_ci __func__, __LINE__); 3638c2ecf20Sopenharmony_ci goto out; 3648c2ecf20Sopenharmony_ci} 3658c2ecf20Sopenharmony_ci 3668c2ecf20Sopenharmony_ci 3678c2ecf20Sopenharmony_ci 3688c2ecf20Sopenharmony_ci/* This function finds or creates cached context information for the 3698c2ecf20Sopenharmony_ci * passed SPU and records SPU context information into the OProfile 3708c2ecf20Sopenharmony_ci * event buffer. 3718c2ecf20Sopenharmony_ci */ 3728c2ecf20Sopenharmony_cistatic int process_context_switch(struct spu *spu, unsigned long objectId) 3738c2ecf20Sopenharmony_ci{ 3748c2ecf20Sopenharmony_ci unsigned long flags; 3758c2ecf20Sopenharmony_ci int retval; 3768c2ecf20Sopenharmony_ci unsigned int offset = 0; 3778c2ecf20Sopenharmony_ci unsigned long spu_cookie = 0, app_dcookie; 3788c2ecf20Sopenharmony_ci 3798c2ecf20Sopenharmony_ci retval = prepare_cached_spu_info(spu, objectId); 3808c2ecf20Sopenharmony_ci if (retval) 3818c2ecf20Sopenharmony_ci goto out; 3828c2ecf20Sopenharmony_ci 3838c2ecf20Sopenharmony_ci /* Get dcookie first because a mutex_lock is taken in that 3848c2ecf20Sopenharmony_ci * code path, so interrupts must not be disabled. 3858c2ecf20Sopenharmony_ci */ 3868c2ecf20Sopenharmony_ci app_dcookie = get_exec_dcookie_and_offset(spu, &offset, &spu_cookie, objectId); 3878c2ecf20Sopenharmony_ci if (!app_dcookie || !spu_cookie) { 3888c2ecf20Sopenharmony_ci retval = -ENOENT; 3898c2ecf20Sopenharmony_ci goto out; 3908c2ecf20Sopenharmony_ci } 3918c2ecf20Sopenharmony_ci 3928c2ecf20Sopenharmony_ci /* Record context info in event buffer */ 3938c2ecf20Sopenharmony_ci spin_lock_irqsave(&buffer_lock, flags); 3948c2ecf20Sopenharmony_ci spu_buff_add(ESCAPE_CODE, spu->number); 3958c2ecf20Sopenharmony_ci spu_buff_add(SPU_CTX_SWITCH_CODE, spu->number); 3968c2ecf20Sopenharmony_ci spu_buff_add(spu->number, spu->number); 3978c2ecf20Sopenharmony_ci spu_buff_add(spu->pid, spu->number); 3988c2ecf20Sopenharmony_ci spu_buff_add(spu->tgid, spu->number); 3998c2ecf20Sopenharmony_ci spu_buff_add(app_dcookie, spu->number); 4008c2ecf20Sopenharmony_ci spu_buff_add(spu_cookie, spu->number); 4018c2ecf20Sopenharmony_ci spu_buff_add(offset, spu->number); 4028c2ecf20Sopenharmony_ci 4038c2ecf20Sopenharmony_ci /* Set flag to indicate SPU PC data can now be written out. If 4048c2ecf20Sopenharmony_ci * the SPU program counter data is seen before an SPU context 4058c2ecf20Sopenharmony_ci * record is seen, the postprocessing will fail. 4068c2ecf20Sopenharmony_ci */ 4078c2ecf20Sopenharmony_ci spu_buff[spu->number].ctx_sw_seen = 1; 4088c2ecf20Sopenharmony_ci 4098c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&buffer_lock, flags); 4108c2ecf20Sopenharmony_ci smp_wmb(); /* insure spu event buffer updates are written */ 4118c2ecf20Sopenharmony_ci /* don't want entries intermingled... */ 4128c2ecf20Sopenharmony_ciout: 4138c2ecf20Sopenharmony_ci return retval; 4148c2ecf20Sopenharmony_ci} 4158c2ecf20Sopenharmony_ci 4168c2ecf20Sopenharmony_ci/* 4178c2ecf20Sopenharmony_ci * This function is invoked on either a bind_context or unbind_context. 4188c2ecf20Sopenharmony_ci * If called for an unbind_context, the val arg is 0; otherwise, 4198c2ecf20Sopenharmony_ci * it is the object-id value for the spu context. 4208c2ecf20Sopenharmony_ci * The data arg is of type 'struct spu *'. 4218c2ecf20Sopenharmony_ci */ 4228c2ecf20Sopenharmony_cistatic int spu_active_notify(struct notifier_block *self, unsigned long val, 4238c2ecf20Sopenharmony_ci void *data) 4248c2ecf20Sopenharmony_ci{ 4258c2ecf20Sopenharmony_ci int retval; 4268c2ecf20Sopenharmony_ci unsigned long flags; 4278c2ecf20Sopenharmony_ci struct spu *the_spu = data; 4288c2ecf20Sopenharmony_ci 4298c2ecf20Sopenharmony_ci pr_debug("SPU event notification arrived\n"); 4308c2ecf20Sopenharmony_ci if (!val) { 4318c2ecf20Sopenharmony_ci spin_lock_irqsave(&cache_lock, flags); 4328c2ecf20Sopenharmony_ci retval = release_cached_info(the_spu->number); 4338c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&cache_lock, flags); 4348c2ecf20Sopenharmony_ci } else { 4358c2ecf20Sopenharmony_ci retval = process_context_switch(the_spu, val); 4368c2ecf20Sopenharmony_ci } 4378c2ecf20Sopenharmony_ci return retval; 4388c2ecf20Sopenharmony_ci} 4398c2ecf20Sopenharmony_ci 4408c2ecf20Sopenharmony_cistatic struct notifier_block spu_active = { 4418c2ecf20Sopenharmony_ci .notifier_call = spu_active_notify, 4428c2ecf20Sopenharmony_ci}; 4438c2ecf20Sopenharmony_ci 4448c2ecf20Sopenharmony_cistatic int number_of_online_nodes(void) 4458c2ecf20Sopenharmony_ci{ 4468c2ecf20Sopenharmony_ci u32 cpu; u32 tmp; 4478c2ecf20Sopenharmony_ci int nodes = 0; 4488c2ecf20Sopenharmony_ci for_each_online_cpu(cpu) { 4498c2ecf20Sopenharmony_ci tmp = cbe_cpu_to_node(cpu) + 1; 4508c2ecf20Sopenharmony_ci if (tmp > nodes) 4518c2ecf20Sopenharmony_ci nodes++; 4528c2ecf20Sopenharmony_ci } 4538c2ecf20Sopenharmony_ci return nodes; 4548c2ecf20Sopenharmony_ci} 4558c2ecf20Sopenharmony_ci 4568c2ecf20Sopenharmony_cistatic int oprofile_spu_buff_create(void) 4578c2ecf20Sopenharmony_ci{ 4588c2ecf20Sopenharmony_ci int spu; 4598c2ecf20Sopenharmony_ci 4608c2ecf20Sopenharmony_ci max_spu_buff = oprofile_get_cpu_buffer_size(); 4618c2ecf20Sopenharmony_ci 4628c2ecf20Sopenharmony_ci for (spu = 0; spu < num_spu_nodes; spu++) { 4638c2ecf20Sopenharmony_ci /* create circular buffers to store the data in. 4648c2ecf20Sopenharmony_ci * use locks to manage accessing the buffers 4658c2ecf20Sopenharmony_ci */ 4668c2ecf20Sopenharmony_ci spu_buff[spu].head = 0; 4678c2ecf20Sopenharmony_ci spu_buff[spu].tail = 0; 4688c2ecf20Sopenharmony_ci 4698c2ecf20Sopenharmony_ci /* 4708c2ecf20Sopenharmony_ci * Create a buffer for each SPU. Can't reliably 4718c2ecf20Sopenharmony_ci * create a single buffer for all spus due to not 4728c2ecf20Sopenharmony_ci * enough contiguous kernel memory. 4738c2ecf20Sopenharmony_ci */ 4748c2ecf20Sopenharmony_ci 4758c2ecf20Sopenharmony_ci spu_buff[spu].buff = kzalloc((max_spu_buff 4768c2ecf20Sopenharmony_ci * sizeof(unsigned long)), 4778c2ecf20Sopenharmony_ci GFP_KERNEL); 4788c2ecf20Sopenharmony_ci 4798c2ecf20Sopenharmony_ci if (!spu_buff[spu].buff) { 4808c2ecf20Sopenharmony_ci printk(KERN_ERR "SPU_PROF: " 4818c2ecf20Sopenharmony_ci "%s, line %d: oprofile_spu_buff_create " 4828c2ecf20Sopenharmony_ci "failed to allocate spu buffer %d.\n", 4838c2ecf20Sopenharmony_ci __func__, __LINE__, spu); 4848c2ecf20Sopenharmony_ci 4858c2ecf20Sopenharmony_ci /* release the spu buffers that have been allocated */ 4868c2ecf20Sopenharmony_ci while (spu >= 0) { 4878c2ecf20Sopenharmony_ci kfree(spu_buff[spu].buff); 4888c2ecf20Sopenharmony_ci spu_buff[spu].buff = 0; 4898c2ecf20Sopenharmony_ci spu--; 4908c2ecf20Sopenharmony_ci } 4918c2ecf20Sopenharmony_ci return -ENOMEM; 4928c2ecf20Sopenharmony_ci } 4938c2ecf20Sopenharmony_ci } 4948c2ecf20Sopenharmony_ci return 0; 4958c2ecf20Sopenharmony_ci} 4968c2ecf20Sopenharmony_ci 4978c2ecf20Sopenharmony_ci/* The main purpose of this function is to synchronize 4988c2ecf20Sopenharmony_ci * OProfile with SPUFS by registering to be notified of 4998c2ecf20Sopenharmony_ci * SPU task switches. 5008c2ecf20Sopenharmony_ci * 5018c2ecf20Sopenharmony_ci * NOTE: When profiling SPUs, we must ensure that only 5028c2ecf20Sopenharmony_ci * spu_sync_start is invoked and not the generic sync_start 5038c2ecf20Sopenharmony_ci * in drivers/oprofile/oprof.c. A return value of 5048c2ecf20Sopenharmony_ci * SKIP_GENERIC_SYNC or SYNC_START_ERROR will 5058c2ecf20Sopenharmony_ci * accomplish this. 5068c2ecf20Sopenharmony_ci */ 5078c2ecf20Sopenharmony_ciint spu_sync_start(void) 5088c2ecf20Sopenharmony_ci{ 5098c2ecf20Sopenharmony_ci int spu; 5108c2ecf20Sopenharmony_ci int ret = SKIP_GENERIC_SYNC; 5118c2ecf20Sopenharmony_ci int register_ret; 5128c2ecf20Sopenharmony_ci unsigned long flags = 0; 5138c2ecf20Sopenharmony_ci 5148c2ecf20Sopenharmony_ci spu_prof_num_nodes = number_of_online_nodes(); 5158c2ecf20Sopenharmony_ci num_spu_nodes = spu_prof_num_nodes * 8; 5168c2ecf20Sopenharmony_ci INIT_DELAYED_WORK(&spu_work, wq_sync_spu_buff); 5178c2ecf20Sopenharmony_ci 5188c2ecf20Sopenharmony_ci /* create buffer for storing the SPU data to put in 5198c2ecf20Sopenharmony_ci * the kernel buffer. 5208c2ecf20Sopenharmony_ci */ 5218c2ecf20Sopenharmony_ci ret = oprofile_spu_buff_create(); 5228c2ecf20Sopenharmony_ci if (ret) 5238c2ecf20Sopenharmony_ci goto out; 5248c2ecf20Sopenharmony_ci 5258c2ecf20Sopenharmony_ci spin_lock_irqsave(&buffer_lock, flags); 5268c2ecf20Sopenharmony_ci for (spu = 0; spu < num_spu_nodes; spu++) { 5278c2ecf20Sopenharmony_ci spu_buff_add(ESCAPE_CODE, spu); 5288c2ecf20Sopenharmony_ci spu_buff_add(SPU_PROFILING_CODE, spu); 5298c2ecf20Sopenharmony_ci spu_buff_add(num_spu_nodes, spu); 5308c2ecf20Sopenharmony_ci } 5318c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&buffer_lock, flags); 5328c2ecf20Sopenharmony_ci 5338c2ecf20Sopenharmony_ci for (spu = 0; spu < num_spu_nodes; spu++) { 5348c2ecf20Sopenharmony_ci spu_buff[spu].ctx_sw_seen = 0; 5358c2ecf20Sopenharmony_ci spu_buff[spu].last_guard_val = 0; 5368c2ecf20Sopenharmony_ci } 5378c2ecf20Sopenharmony_ci 5388c2ecf20Sopenharmony_ci /* Register for SPU events */ 5398c2ecf20Sopenharmony_ci register_ret = spu_switch_event_register(&spu_active); 5408c2ecf20Sopenharmony_ci if (register_ret) { 5418c2ecf20Sopenharmony_ci ret = SYNC_START_ERROR; 5428c2ecf20Sopenharmony_ci goto out; 5438c2ecf20Sopenharmony_ci } 5448c2ecf20Sopenharmony_ci 5458c2ecf20Sopenharmony_ci pr_debug("spu_sync_start -- running.\n"); 5468c2ecf20Sopenharmony_ciout: 5478c2ecf20Sopenharmony_ci return ret; 5488c2ecf20Sopenharmony_ci} 5498c2ecf20Sopenharmony_ci 5508c2ecf20Sopenharmony_ci/* Record SPU program counter samples to the oprofile event buffer. */ 5518c2ecf20Sopenharmony_civoid spu_sync_buffer(int spu_num, unsigned int *samples, 5528c2ecf20Sopenharmony_ci int num_samples) 5538c2ecf20Sopenharmony_ci{ 5548c2ecf20Sopenharmony_ci unsigned long long file_offset; 5558c2ecf20Sopenharmony_ci unsigned long flags; 5568c2ecf20Sopenharmony_ci int i; 5578c2ecf20Sopenharmony_ci struct vma_to_fileoffset_map *map; 5588c2ecf20Sopenharmony_ci struct spu *the_spu; 5598c2ecf20Sopenharmony_ci unsigned long long spu_num_ll = spu_num; 5608c2ecf20Sopenharmony_ci unsigned long long spu_num_shifted = spu_num_ll << 32; 5618c2ecf20Sopenharmony_ci struct cached_info *c_info; 5628c2ecf20Sopenharmony_ci 5638c2ecf20Sopenharmony_ci /* We need to obtain the cache_lock here because it's 5648c2ecf20Sopenharmony_ci * possible that after getting the cached_info, the SPU job 5658c2ecf20Sopenharmony_ci * corresponding to this cached_info may end, thus resulting 5668c2ecf20Sopenharmony_ci * in the destruction of the cached_info. 5678c2ecf20Sopenharmony_ci */ 5688c2ecf20Sopenharmony_ci spin_lock_irqsave(&cache_lock, flags); 5698c2ecf20Sopenharmony_ci c_info = get_cached_info(NULL, spu_num); 5708c2ecf20Sopenharmony_ci if (!c_info) { 5718c2ecf20Sopenharmony_ci /* This legitimately happens when the SPU task ends before all 5728c2ecf20Sopenharmony_ci * samples are recorded. 5738c2ecf20Sopenharmony_ci * No big deal -- so we just drop a few samples. 5748c2ecf20Sopenharmony_ci */ 5758c2ecf20Sopenharmony_ci pr_debug("SPU_PROF: No cached SPU context " 5768c2ecf20Sopenharmony_ci "for SPU #%d. Dropping samples.\n", spu_num); 5778c2ecf20Sopenharmony_ci goto out; 5788c2ecf20Sopenharmony_ci } 5798c2ecf20Sopenharmony_ci 5808c2ecf20Sopenharmony_ci map = c_info->map; 5818c2ecf20Sopenharmony_ci the_spu = c_info->the_spu; 5828c2ecf20Sopenharmony_ci spin_lock(&buffer_lock); 5838c2ecf20Sopenharmony_ci for (i = 0; i < num_samples; i++) { 5848c2ecf20Sopenharmony_ci unsigned int sample = *(samples+i); 5858c2ecf20Sopenharmony_ci int grd_val = 0; 5868c2ecf20Sopenharmony_ci file_offset = 0; 5878c2ecf20Sopenharmony_ci if (sample == 0) 5888c2ecf20Sopenharmony_ci continue; 5898c2ecf20Sopenharmony_ci file_offset = vma_map_lookup( map, sample, the_spu, &grd_val); 5908c2ecf20Sopenharmony_ci 5918c2ecf20Sopenharmony_ci /* If overlays are used by this SPU application, the guard 5928c2ecf20Sopenharmony_ci * value is non-zero, indicating which overlay section is in 5938c2ecf20Sopenharmony_ci * use. We need to discard samples taken during the time 5948c2ecf20Sopenharmony_ci * period which an overlay occurs (i.e., guard value changes). 5958c2ecf20Sopenharmony_ci */ 5968c2ecf20Sopenharmony_ci if (grd_val && grd_val != spu_buff[spu_num].last_guard_val) { 5978c2ecf20Sopenharmony_ci spu_buff[spu_num].last_guard_val = grd_val; 5988c2ecf20Sopenharmony_ci /* Drop the rest of the samples. */ 5998c2ecf20Sopenharmony_ci break; 6008c2ecf20Sopenharmony_ci } 6018c2ecf20Sopenharmony_ci 6028c2ecf20Sopenharmony_ci /* We must ensure that the SPU context switch has been written 6038c2ecf20Sopenharmony_ci * out before samples for the SPU. Otherwise, the SPU context 6048c2ecf20Sopenharmony_ci * information is not available and the postprocessing of the 6058c2ecf20Sopenharmony_ci * SPU PC will fail with no available anonymous map information. 6068c2ecf20Sopenharmony_ci */ 6078c2ecf20Sopenharmony_ci if (spu_buff[spu_num].ctx_sw_seen) 6088c2ecf20Sopenharmony_ci spu_buff_add((file_offset | spu_num_shifted), 6098c2ecf20Sopenharmony_ci spu_num); 6108c2ecf20Sopenharmony_ci } 6118c2ecf20Sopenharmony_ci spin_unlock(&buffer_lock); 6128c2ecf20Sopenharmony_ciout: 6138c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&cache_lock, flags); 6148c2ecf20Sopenharmony_ci} 6158c2ecf20Sopenharmony_ci 6168c2ecf20Sopenharmony_ci 6178c2ecf20Sopenharmony_ciint spu_sync_stop(void) 6188c2ecf20Sopenharmony_ci{ 6198c2ecf20Sopenharmony_ci unsigned long flags = 0; 6208c2ecf20Sopenharmony_ci int ret; 6218c2ecf20Sopenharmony_ci int k; 6228c2ecf20Sopenharmony_ci 6238c2ecf20Sopenharmony_ci ret = spu_switch_event_unregister(&spu_active); 6248c2ecf20Sopenharmony_ci 6258c2ecf20Sopenharmony_ci if (ret) 6268c2ecf20Sopenharmony_ci printk(KERN_ERR "SPU_PROF: " 6278c2ecf20Sopenharmony_ci "%s, line %d: spu_switch_event_unregister " \ 6288c2ecf20Sopenharmony_ci "returned %d\n", 6298c2ecf20Sopenharmony_ci __func__, __LINE__, ret); 6308c2ecf20Sopenharmony_ci 6318c2ecf20Sopenharmony_ci /* flush any remaining data in the per SPU buffers */ 6328c2ecf20Sopenharmony_ci sync_spu_buff(); 6338c2ecf20Sopenharmony_ci 6348c2ecf20Sopenharmony_ci spin_lock_irqsave(&cache_lock, flags); 6358c2ecf20Sopenharmony_ci ret = release_cached_info(RELEASE_ALL); 6368c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&cache_lock, flags); 6378c2ecf20Sopenharmony_ci 6388c2ecf20Sopenharmony_ci /* remove scheduled work queue item rather then waiting 6398c2ecf20Sopenharmony_ci * for every queued entry to execute. Then flush pending 6408c2ecf20Sopenharmony_ci * system wide buffer to event buffer. 6418c2ecf20Sopenharmony_ci */ 6428c2ecf20Sopenharmony_ci cancel_delayed_work(&spu_work); 6438c2ecf20Sopenharmony_ci 6448c2ecf20Sopenharmony_ci for (k = 0; k < num_spu_nodes; k++) { 6458c2ecf20Sopenharmony_ci spu_buff[k].ctx_sw_seen = 0; 6468c2ecf20Sopenharmony_ci 6478c2ecf20Sopenharmony_ci /* 6488c2ecf20Sopenharmony_ci * spu_sys_buff will be null if there was a problem 6498c2ecf20Sopenharmony_ci * allocating the buffer. Only delete if it exists. 6508c2ecf20Sopenharmony_ci */ 6518c2ecf20Sopenharmony_ci kfree(spu_buff[k].buff); 6528c2ecf20Sopenharmony_ci spu_buff[k].buff = 0; 6538c2ecf20Sopenharmony_ci } 6548c2ecf20Sopenharmony_ci pr_debug("spu_sync_stop -- done.\n"); 6558c2ecf20Sopenharmony_ci return ret; 6568c2ecf20Sopenharmony_ci} 6578c2ecf20Sopenharmony_ci 658