18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * Cell Broadband Engine OProfile Support
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci * (C) Copyright IBM Corporation 2006
68c2ecf20Sopenharmony_ci *
78c2ecf20Sopenharmony_ci * Author: Maynard Johnson <maynardj@us.ibm.com>
88c2ecf20Sopenharmony_ci */
98c2ecf20Sopenharmony_ci
108c2ecf20Sopenharmony_ci/* The purpose of this file is to handle SPU event task switching
118c2ecf20Sopenharmony_ci * and to record SPU context information into the OProfile
128c2ecf20Sopenharmony_ci * event buffer.
138c2ecf20Sopenharmony_ci *
148c2ecf20Sopenharmony_ci * Additionally, the spu_sync_buffer function is provided as a helper
158c2ecf20Sopenharmony_ci * for recoding actual SPU program counter samples to the event buffer.
168c2ecf20Sopenharmony_ci */
178c2ecf20Sopenharmony_ci#include <linux/dcookies.h>
188c2ecf20Sopenharmony_ci#include <linux/kref.h>
198c2ecf20Sopenharmony_ci#include <linux/mm.h>
208c2ecf20Sopenharmony_ci#include <linux/fs.h>
218c2ecf20Sopenharmony_ci#include <linux/file.h>
228c2ecf20Sopenharmony_ci#include <linux/module.h>
238c2ecf20Sopenharmony_ci#include <linux/notifier.h>
248c2ecf20Sopenharmony_ci#include <linux/numa.h>
258c2ecf20Sopenharmony_ci#include <linux/oprofile.h>
268c2ecf20Sopenharmony_ci#include <linux/slab.h>
278c2ecf20Sopenharmony_ci#include <linux/spinlock.h>
288c2ecf20Sopenharmony_ci#include "pr_util.h"
298c2ecf20Sopenharmony_ci
308c2ecf20Sopenharmony_ci#define RELEASE_ALL 9999
318c2ecf20Sopenharmony_ci
328c2ecf20Sopenharmony_cistatic DEFINE_SPINLOCK(buffer_lock);
338c2ecf20Sopenharmony_cistatic DEFINE_SPINLOCK(cache_lock);
348c2ecf20Sopenharmony_cistatic int num_spu_nodes;
358c2ecf20Sopenharmony_cistatic int spu_prof_num_nodes;
368c2ecf20Sopenharmony_ci
378c2ecf20Sopenharmony_cistruct spu_buffer spu_buff[MAX_NUMNODES * SPUS_PER_NODE];
388c2ecf20Sopenharmony_cistruct delayed_work spu_work;
398c2ecf20Sopenharmony_cistatic unsigned max_spu_buff;
408c2ecf20Sopenharmony_ci
418c2ecf20Sopenharmony_cistatic void spu_buff_add(unsigned long int value, int spu)
428c2ecf20Sopenharmony_ci{
438c2ecf20Sopenharmony_ci	/* spu buff is a circular buffer.  Add entries to the
448c2ecf20Sopenharmony_ci	 * head.  Head is the index to store the next value.
458c2ecf20Sopenharmony_ci	 * The buffer is full when there is one available entry
468c2ecf20Sopenharmony_ci	 * in the queue, i.e. head and tail can't be equal.
478c2ecf20Sopenharmony_ci	 * That way we can tell the difference between the
488c2ecf20Sopenharmony_ci	 * buffer being full versus empty.
498c2ecf20Sopenharmony_ci	 *
508c2ecf20Sopenharmony_ci	 *  ASSUMPTION: the buffer_lock is held when this function
518c2ecf20Sopenharmony_ci	 *             is called to lock the buffer, head and tail.
528c2ecf20Sopenharmony_ci	 */
538c2ecf20Sopenharmony_ci	int full = 1;
548c2ecf20Sopenharmony_ci
558c2ecf20Sopenharmony_ci	if (spu_buff[spu].head >= spu_buff[spu].tail) {
568c2ecf20Sopenharmony_ci		if ((spu_buff[spu].head - spu_buff[spu].tail)
578c2ecf20Sopenharmony_ci		    <  (max_spu_buff - 1))
588c2ecf20Sopenharmony_ci			full = 0;
598c2ecf20Sopenharmony_ci
608c2ecf20Sopenharmony_ci	} else if (spu_buff[spu].tail > spu_buff[spu].head) {
618c2ecf20Sopenharmony_ci		if ((spu_buff[spu].tail - spu_buff[spu].head)
628c2ecf20Sopenharmony_ci		    > 1)
638c2ecf20Sopenharmony_ci			full = 0;
648c2ecf20Sopenharmony_ci	}
658c2ecf20Sopenharmony_ci
668c2ecf20Sopenharmony_ci	if (!full) {
678c2ecf20Sopenharmony_ci		spu_buff[spu].buff[spu_buff[spu].head] = value;
688c2ecf20Sopenharmony_ci		spu_buff[spu].head++;
698c2ecf20Sopenharmony_ci
708c2ecf20Sopenharmony_ci		if (spu_buff[spu].head >= max_spu_buff)
718c2ecf20Sopenharmony_ci			spu_buff[spu].head = 0;
728c2ecf20Sopenharmony_ci	} else {
738c2ecf20Sopenharmony_ci		/* From the user's perspective make the SPU buffer
748c2ecf20Sopenharmony_ci		 * size management/overflow look like we are using
758c2ecf20Sopenharmony_ci		 * per cpu buffers.  The user uses the same
768c2ecf20Sopenharmony_ci		 * per cpu parameter to adjust the SPU buffer size.
778c2ecf20Sopenharmony_ci		 * Increment the sample_lost_overflow to inform
788c2ecf20Sopenharmony_ci		 * the user the buffer size needs to be increased.
798c2ecf20Sopenharmony_ci		 */
808c2ecf20Sopenharmony_ci		oprofile_cpu_buffer_inc_smpl_lost();
818c2ecf20Sopenharmony_ci	}
828c2ecf20Sopenharmony_ci}
838c2ecf20Sopenharmony_ci
848c2ecf20Sopenharmony_ci/* This function copies the per SPU buffers to the
858c2ecf20Sopenharmony_ci * OProfile kernel buffer.
868c2ecf20Sopenharmony_ci */
878c2ecf20Sopenharmony_cistatic void sync_spu_buff(void)
888c2ecf20Sopenharmony_ci{
898c2ecf20Sopenharmony_ci	int spu;
908c2ecf20Sopenharmony_ci	unsigned long flags;
918c2ecf20Sopenharmony_ci	int curr_head;
928c2ecf20Sopenharmony_ci
938c2ecf20Sopenharmony_ci	for (spu = 0; spu < num_spu_nodes; spu++) {
948c2ecf20Sopenharmony_ci		/* In case there was an issue and the buffer didn't
958c2ecf20Sopenharmony_ci		 * get created skip it.
968c2ecf20Sopenharmony_ci		 */
978c2ecf20Sopenharmony_ci		if (spu_buff[spu].buff == NULL)
988c2ecf20Sopenharmony_ci			continue;
998c2ecf20Sopenharmony_ci
1008c2ecf20Sopenharmony_ci		/* Hold the lock to make sure the head/tail
1018c2ecf20Sopenharmony_ci		 * doesn't change while spu_buff_add() is
1028c2ecf20Sopenharmony_ci		 * deciding if the buffer is full or not.
1038c2ecf20Sopenharmony_ci		 * Being a little paranoid.
1048c2ecf20Sopenharmony_ci		 */
1058c2ecf20Sopenharmony_ci		spin_lock_irqsave(&buffer_lock, flags);
1068c2ecf20Sopenharmony_ci		curr_head = spu_buff[spu].head;
1078c2ecf20Sopenharmony_ci		spin_unlock_irqrestore(&buffer_lock, flags);
1088c2ecf20Sopenharmony_ci
1098c2ecf20Sopenharmony_ci		/* Transfer the current contents to the kernel buffer.
1108c2ecf20Sopenharmony_ci		 * data can still be added to the head of the buffer.
1118c2ecf20Sopenharmony_ci		 */
1128c2ecf20Sopenharmony_ci		oprofile_put_buff(spu_buff[spu].buff,
1138c2ecf20Sopenharmony_ci				  spu_buff[spu].tail,
1148c2ecf20Sopenharmony_ci				  curr_head, max_spu_buff);
1158c2ecf20Sopenharmony_ci
1168c2ecf20Sopenharmony_ci		spin_lock_irqsave(&buffer_lock, flags);
1178c2ecf20Sopenharmony_ci		spu_buff[spu].tail = curr_head;
1188c2ecf20Sopenharmony_ci		spin_unlock_irqrestore(&buffer_lock, flags);
1198c2ecf20Sopenharmony_ci	}
1208c2ecf20Sopenharmony_ci
1218c2ecf20Sopenharmony_ci}
1228c2ecf20Sopenharmony_ci
1238c2ecf20Sopenharmony_cistatic void wq_sync_spu_buff(struct work_struct *work)
1248c2ecf20Sopenharmony_ci{
1258c2ecf20Sopenharmony_ci	/* move data from spu buffers to kernel buffer */
1268c2ecf20Sopenharmony_ci	sync_spu_buff();
1278c2ecf20Sopenharmony_ci
1288c2ecf20Sopenharmony_ci	/* only reschedule if profiling is not done */
1298c2ecf20Sopenharmony_ci	if (spu_prof_running)
1308c2ecf20Sopenharmony_ci		schedule_delayed_work(&spu_work, DEFAULT_TIMER_EXPIRE);
1318c2ecf20Sopenharmony_ci}
1328c2ecf20Sopenharmony_ci
1338c2ecf20Sopenharmony_ci/* Container for caching information about an active SPU task. */
1348c2ecf20Sopenharmony_cistruct cached_info {
1358c2ecf20Sopenharmony_ci	struct vma_to_fileoffset_map *map;
1368c2ecf20Sopenharmony_ci	struct spu *the_spu;	/* needed to access pointer to local_store */
1378c2ecf20Sopenharmony_ci	struct kref cache_ref;
1388c2ecf20Sopenharmony_ci};
1398c2ecf20Sopenharmony_ci
1408c2ecf20Sopenharmony_cistatic struct cached_info *spu_info[MAX_NUMNODES * 8];
1418c2ecf20Sopenharmony_ci
1428c2ecf20Sopenharmony_cistatic void destroy_cached_info(struct kref *kref)
1438c2ecf20Sopenharmony_ci{
1448c2ecf20Sopenharmony_ci	struct cached_info *info;
1458c2ecf20Sopenharmony_ci
1468c2ecf20Sopenharmony_ci	info = container_of(kref, struct cached_info, cache_ref);
1478c2ecf20Sopenharmony_ci	vma_map_free(info->map);
1488c2ecf20Sopenharmony_ci	kfree(info);
1498c2ecf20Sopenharmony_ci	module_put(THIS_MODULE);
1508c2ecf20Sopenharmony_ci}
1518c2ecf20Sopenharmony_ci
1528c2ecf20Sopenharmony_ci/* Return the cached_info for the passed SPU number.
1538c2ecf20Sopenharmony_ci * ATTENTION:  Callers are responsible for obtaining the
1548c2ecf20Sopenharmony_ci *	       cache_lock if needed prior to invoking this function.
1558c2ecf20Sopenharmony_ci */
1568c2ecf20Sopenharmony_cistatic struct cached_info *get_cached_info(struct spu *the_spu, int spu_num)
1578c2ecf20Sopenharmony_ci{
1588c2ecf20Sopenharmony_ci	struct kref *ref;
1598c2ecf20Sopenharmony_ci	struct cached_info *ret_info;
1608c2ecf20Sopenharmony_ci
1618c2ecf20Sopenharmony_ci	if (spu_num >= num_spu_nodes) {
1628c2ecf20Sopenharmony_ci		printk(KERN_ERR "SPU_PROF: "
1638c2ecf20Sopenharmony_ci		       "%s, line %d: Invalid index %d into spu info cache\n",
1648c2ecf20Sopenharmony_ci		       __func__, __LINE__, spu_num);
1658c2ecf20Sopenharmony_ci		ret_info = NULL;
1668c2ecf20Sopenharmony_ci		goto out;
1678c2ecf20Sopenharmony_ci	}
1688c2ecf20Sopenharmony_ci	if (!spu_info[spu_num] && the_spu) {
1698c2ecf20Sopenharmony_ci		ref = spu_get_profile_private_kref(the_spu->ctx);
1708c2ecf20Sopenharmony_ci		if (ref) {
1718c2ecf20Sopenharmony_ci			spu_info[spu_num] = container_of(ref, struct cached_info, cache_ref);
1728c2ecf20Sopenharmony_ci			kref_get(&spu_info[spu_num]->cache_ref);
1738c2ecf20Sopenharmony_ci		}
1748c2ecf20Sopenharmony_ci	}
1758c2ecf20Sopenharmony_ci
1768c2ecf20Sopenharmony_ci	ret_info = spu_info[spu_num];
1778c2ecf20Sopenharmony_ci out:
1788c2ecf20Sopenharmony_ci	return ret_info;
1798c2ecf20Sopenharmony_ci}
1808c2ecf20Sopenharmony_ci
1818c2ecf20Sopenharmony_ci
1828c2ecf20Sopenharmony_ci/* Looks for cached info for the passed spu.  If not found, the
1838c2ecf20Sopenharmony_ci * cached info is created for the passed spu.
1848c2ecf20Sopenharmony_ci * Returns 0 for success; otherwise, -1 for error.
1858c2ecf20Sopenharmony_ci */
1868c2ecf20Sopenharmony_cistatic int
1878c2ecf20Sopenharmony_ciprepare_cached_spu_info(struct spu *spu, unsigned long objectId)
1888c2ecf20Sopenharmony_ci{
1898c2ecf20Sopenharmony_ci	unsigned long flags;
1908c2ecf20Sopenharmony_ci	struct vma_to_fileoffset_map *new_map;
1918c2ecf20Sopenharmony_ci	int retval = 0;
1928c2ecf20Sopenharmony_ci	struct cached_info *info;
1938c2ecf20Sopenharmony_ci
1948c2ecf20Sopenharmony_ci	/* We won't bother getting cache_lock here since
1958c2ecf20Sopenharmony_ci	 * don't do anything with the cached_info that's returned.
1968c2ecf20Sopenharmony_ci	 */
1978c2ecf20Sopenharmony_ci	info = get_cached_info(spu, spu->number);
1988c2ecf20Sopenharmony_ci
1998c2ecf20Sopenharmony_ci	if (info) {
2008c2ecf20Sopenharmony_ci		pr_debug("Found cached SPU info.\n");
2018c2ecf20Sopenharmony_ci		goto out;
2028c2ecf20Sopenharmony_ci	}
2038c2ecf20Sopenharmony_ci
2048c2ecf20Sopenharmony_ci	/* Create cached_info and set spu_info[spu->number] to point to it.
2058c2ecf20Sopenharmony_ci	 * spu->number is a system-wide value, not a per-node value.
2068c2ecf20Sopenharmony_ci	 */
2078c2ecf20Sopenharmony_ci	info = kzalloc(sizeof(*info), GFP_KERNEL);
2088c2ecf20Sopenharmony_ci	if (!info) {
2098c2ecf20Sopenharmony_ci		printk(KERN_ERR "SPU_PROF: "
2108c2ecf20Sopenharmony_ci		       "%s, line %d: create vma_map failed\n",
2118c2ecf20Sopenharmony_ci		       __func__, __LINE__);
2128c2ecf20Sopenharmony_ci		retval = -ENOMEM;
2138c2ecf20Sopenharmony_ci		goto err_alloc;
2148c2ecf20Sopenharmony_ci	}
2158c2ecf20Sopenharmony_ci	new_map = create_vma_map(spu, objectId);
2168c2ecf20Sopenharmony_ci	if (!new_map) {
2178c2ecf20Sopenharmony_ci		printk(KERN_ERR "SPU_PROF: "
2188c2ecf20Sopenharmony_ci		       "%s, line %d: create vma_map failed\n",
2198c2ecf20Sopenharmony_ci		       __func__, __LINE__);
2208c2ecf20Sopenharmony_ci		retval = -ENOMEM;
2218c2ecf20Sopenharmony_ci		goto err_alloc;
2228c2ecf20Sopenharmony_ci	}
2238c2ecf20Sopenharmony_ci
2248c2ecf20Sopenharmony_ci	pr_debug("Created vma_map\n");
2258c2ecf20Sopenharmony_ci	info->map = new_map;
2268c2ecf20Sopenharmony_ci	info->the_spu = spu;
2278c2ecf20Sopenharmony_ci	kref_init(&info->cache_ref);
2288c2ecf20Sopenharmony_ci	spin_lock_irqsave(&cache_lock, flags);
2298c2ecf20Sopenharmony_ci	spu_info[spu->number] = info;
2308c2ecf20Sopenharmony_ci	/* Increment count before passing off ref to SPUFS. */
2318c2ecf20Sopenharmony_ci	kref_get(&info->cache_ref);
2328c2ecf20Sopenharmony_ci
2338c2ecf20Sopenharmony_ci	/* We increment the module refcount here since SPUFS is
2348c2ecf20Sopenharmony_ci	 * responsible for the final destruction of the cached_info,
2358c2ecf20Sopenharmony_ci	 * and it must be able to access the destroy_cached_info()
2368c2ecf20Sopenharmony_ci	 * function defined in the OProfile module.  We decrement
2378c2ecf20Sopenharmony_ci	 * the module refcount in destroy_cached_info.
2388c2ecf20Sopenharmony_ci	 */
2398c2ecf20Sopenharmony_ci	try_module_get(THIS_MODULE);
2408c2ecf20Sopenharmony_ci	spu_set_profile_private_kref(spu->ctx, &info->cache_ref,
2418c2ecf20Sopenharmony_ci				destroy_cached_info);
2428c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&cache_lock, flags);
2438c2ecf20Sopenharmony_ci	goto out;
2448c2ecf20Sopenharmony_ci
2458c2ecf20Sopenharmony_cierr_alloc:
2468c2ecf20Sopenharmony_ci	kfree(info);
2478c2ecf20Sopenharmony_ciout:
2488c2ecf20Sopenharmony_ci	return retval;
2498c2ecf20Sopenharmony_ci}
2508c2ecf20Sopenharmony_ci
2518c2ecf20Sopenharmony_ci/*
2528c2ecf20Sopenharmony_ci * NOTE:  The caller is responsible for locking the
2538c2ecf20Sopenharmony_ci *	  cache_lock prior to calling this function.
2548c2ecf20Sopenharmony_ci */
2558c2ecf20Sopenharmony_cistatic int release_cached_info(int spu_index)
2568c2ecf20Sopenharmony_ci{
2578c2ecf20Sopenharmony_ci	int index, end;
2588c2ecf20Sopenharmony_ci
2598c2ecf20Sopenharmony_ci	if (spu_index == RELEASE_ALL) {
2608c2ecf20Sopenharmony_ci		end = num_spu_nodes;
2618c2ecf20Sopenharmony_ci		index = 0;
2628c2ecf20Sopenharmony_ci	} else {
2638c2ecf20Sopenharmony_ci		if (spu_index >= num_spu_nodes) {
2648c2ecf20Sopenharmony_ci			printk(KERN_ERR "SPU_PROF: "
2658c2ecf20Sopenharmony_ci				"%s, line %d: "
2668c2ecf20Sopenharmony_ci				"Invalid index %d into spu info cache\n",
2678c2ecf20Sopenharmony_ci				__func__, __LINE__, spu_index);
2688c2ecf20Sopenharmony_ci			goto out;
2698c2ecf20Sopenharmony_ci		}
2708c2ecf20Sopenharmony_ci		end = spu_index + 1;
2718c2ecf20Sopenharmony_ci		index = spu_index;
2728c2ecf20Sopenharmony_ci	}
2738c2ecf20Sopenharmony_ci	for (; index < end; index++) {
2748c2ecf20Sopenharmony_ci		if (spu_info[index]) {
2758c2ecf20Sopenharmony_ci			kref_put(&spu_info[index]->cache_ref,
2768c2ecf20Sopenharmony_ci				 destroy_cached_info);
2778c2ecf20Sopenharmony_ci			spu_info[index] = NULL;
2788c2ecf20Sopenharmony_ci		}
2798c2ecf20Sopenharmony_ci	}
2808c2ecf20Sopenharmony_ci
2818c2ecf20Sopenharmony_ciout:
2828c2ecf20Sopenharmony_ci	return 0;
2838c2ecf20Sopenharmony_ci}
2848c2ecf20Sopenharmony_ci
2858c2ecf20Sopenharmony_ci/* The source code for fast_get_dcookie was "borrowed"
2868c2ecf20Sopenharmony_ci * from drivers/oprofile/buffer_sync.c.
2878c2ecf20Sopenharmony_ci */
2888c2ecf20Sopenharmony_ci
2898c2ecf20Sopenharmony_ci/* Optimisation. We can manage without taking the dcookie sem
2908c2ecf20Sopenharmony_ci * because we cannot reach this code without at least one
2918c2ecf20Sopenharmony_ci * dcookie user still being registered (namely, the reader
2928c2ecf20Sopenharmony_ci * of the event buffer).
2938c2ecf20Sopenharmony_ci */
2948c2ecf20Sopenharmony_cistatic inline unsigned long fast_get_dcookie(const struct path *path)
2958c2ecf20Sopenharmony_ci{
2968c2ecf20Sopenharmony_ci	unsigned long cookie;
2978c2ecf20Sopenharmony_ci
2988c2ecf20Sopenharmony_ci	if (path->dentry->d_flags & DCACHE_COOKIE)
2998c2ecf20Sopenharmony_ci		return (unsigned long)path->dentry;
3008c2ecf20Sopenharmony_ci	get_dcookie(path, &cookie);
3018c2ecf20Sopenharmony_ci	return cookie;
3028c2ecf20Sopenharmony_ci}
3038c2ecf20Sopenharmony_ci
3048c2ecf20Sopenharmony_ci/* Look up the dcookie for the task's mm->exe_file,
3058c2ecf20Sopenharmony_ci * which corresponds loosely to "application name". Also, determine
3068c2ecf20Sopenharmony_ci * the offset for the SPU ELF object.  If computed offset is
3078c2ecf20Sopenharmony_ci * non-zero, it implies an embedded SPU object; otherwise, it's a
3088c2ecf20Sopenharmony_ci * separate SPU binary, in which case we retrieve it's dcookie.
3098c2ecf20Sopenharmony_ci * For the embedded case, we must determine if SPU ELF is embedded
3108c2ecf20Sopenharmony_ci * in the executable application or another file (i.e., shared lib).
3118c2ecf20Sopenharmony_ci * If embedded in a shared lib, we must get the dcookie and return
3128c2ecf20Sopenharmony_ci * that to the caller.
3138c2ecf20Sopenharmony_ci */
3148c2ecf20Sopenharmony_cistatic unsigned long
3158c2ecf20Sopenharmony_ciget_exec_dcookie_and_offset(struct spu *spu, unsigned int *offsetp,
3168c2ecf20Sopenharmony_ci			    unsigned long *spu_bin_dcookie,
3178c2ecf20Sopenharmony_ci			    unsigned long spu_ref)
3188c2ecf20Sopenharmony_ci{
3198c2ecf20Sopenharmony_ci	unsigned long app_cookie = 0;
3208c2ecf20Sopenharmony_ci	unsigned int my_offset = 0;
3218c2ecf20Sopenharmony_ci	struct vm_area_struct *vma;
3228c2ecf20Sopenharmony_ci	struct file *exe_file;
3238c2ecf20Sopenharmony_ci	struct mm_struct *mm = spu->mm;
3248c2ecf20Sopenharmony_ci
3258c2ecf20Sopenharmony_ci	if (!mm)
3268c2ecf20Sopenharmony_ci		goto out;
3278c2ecf20Sopenharmony_ci
3288c2ecf20Sopenharmony_ci	exe_file = get_mm_exe_file(mm);
3298c2ecf20Sopenharmony_ci	if (exe_file) {
3308c2ecf20Sopenharmony_ci		app_cookie = fast_get_dcookie(&exe_file->f_path);
3318c2ecf20Sopenharmony_ci		pr_debug("got dcookie for %pD\n", exe_file);
3328c2ecf20Sopenharmony_ci		fput(exe_file);
3338c2ecf20Sopenharmony_ci	}
3348c2ecf20Sopenharmony_ci
3358c2ecf20Sopenharmony_ci	mmap_read_lock(mm);
3368c2ecf20Sopenharmony_ci	for (vma = mm->mmap; vma; vma = vma->vm_next) {
3378c2ecf20Sopenharmony_ci		if (vma->vm_start > spu_ref || vma->vm_end <= spu_ref)
3388c2ecf20Sopenharmony_ci			continue;
3398c2ecf20Sopenharmony_ci		my_offset = spu_ref - vma->vm_start;
3408c2ecf20Sopenharmony_ci		if (!vma->vm_file)
3418c2ecf20Sopenharmony_ci			goto fail_no_image_cookie;
3428c2ecf20Sopenharmony_ci
3438c2ecf20Sopenharmony_ci		pr_debug("Found spu ELF at %X(object-id:%lx) for file %pD\n",
3448c2ecf20Sopenharmony_ci			 my_offset, spu_ref, vma->vm_file);
3458c2ecf20Sopenharmony_ci		*offsetp = my_offset;
3468c2ecf20Sopenharmony_ci		break;
3478c2ecf20Sopenharmony_ci	}
3488c2ecf20Sopenharmony_ci
3498c2ecf20Sopenharmony_ci	*spu_bin_dcookie = fast_get_dcookie(&vma->vm_file->f_path);
3508c2ecf20Sopenharmony_ci	pr_debug("got dcookie for %pD\n", vma->vm_file);
3518c2ecf20Sopenharmony_ci
3528c2ecf20Sopenharmony_ci	mmap_read_unlock(mm);
3538c2ecf20Sopenharmony_ci
3548c2ecf20Sopenharmony_ciout:
3558c2ecf20Sopenharmony_ci	return app_cookie;
3568c2ecf20Sopenharmony_ci
3578c2ecf20Sopenharmony_cifail_no_image_cookie:
3588c2ecf20Sopenharmony_ci	mmap_read_unlock(mm);
3598c2ecf20Sopenharmony_ci
3608c2ecf20Sopenharmony_ci	printk(KERN_ERR "SPU_PROF: "
3618c2ecf20Sopenharmony_ci		"%s, line %d: Cannot find dcookie for SPU binary\n",
3628c2ecf20Sopenharmony_ci		__func__, __LINE__);
3638c2ecf20Sopenharmony_ci	goto out;
3648c2ecf20Sopenharmony_ci}
3658c2ecf20Sopenharmony_ci
3668c2ecf20Sopenharmony_ci
3678c2ecf20Sopenharmony_ci
3688c2ecf20Sopenharmony_ci/* This function finds or creates cached context information for the
3698c2ecf20Sopenharmony_ci * passed SPU and records SPU context information into the OProfile
3708c2ecf20Sopenharmony_ci * event buffer.
3718c2ecf20Sopenharmony_ci */
3728c2ecf20Sopenharmony_cistatic int process_context_switch(struct spu *spu, unsigned long objectId)
3738c2ecf20Sopenharmony_ci{
3748c2ecf20Sopenharmony_ci	unsigned long flags;
3758c2ecf20Sopenharmony_ci	int retval;
3768c2ecf20Sopenharmony_ci	unsigned int offset = 0;
3778c2ecf20Sopenharmony_ci	unsigned long spu_cookie = 0, app_dcookie;
3788c2ecf20Sopenharmony_ci
3798c2ecf20Sopenharmony_ci	retval = prepare_cached_spu_info(spu, objectId);
3808c2ecf20Sopenharmony_ci	if (retval)
3818c2ecf20Sopenharmony_ci		goto out;
3828c2ecf20Sopenharmony_ci
3838c2ecf20Sopenharmony_ci	/* Get dcookie first because a mutex_lock is taken in that
3848c2ecf20Sopenharmony_ci	 * code path, so interrupts must not be disabled.
3858c2ecf20Sopenharmony_ci	 */
3868c2ecf20Sopenharmony_ci	app_dcookie = get_exec_dcookie_and_offset(spu, &offset, &spu_cookie, objectId);
3878c2ecf20Sopenharmony_ci	if (!app_dcookie || !spu_cookie) {
3888c2ecf20Sopenharmony_ci		retval  = -ENOENT;
3898c2ecf20Sopenharmony_ci		goto out;
3908c2ecf20Sopenharmony_ci	}
3918c2ecf20Sopenharmony_ci
3928c2ecf20Sopenharmony_ci	/* Record context info in event buffer */
3938c2ecf20Sopenharmony_ci	spin_lock_irqsave(&buffer_lock, flags);
3948c2ecf20Sopenharmony_ci	spu_buff_add(ESCAPE_CODE, spu->number);
3958c2ecf20Sopenharmony_ci	spu_buff_add(SPU_CTX_SWITCH_CODE, spu->number);
3968c2ecf20Sopenharmony_ci	spu_buff_add(spu->number, spu->number);
3978c2ecf20Sopenharmony_ci	spu_buff_add(spu->pid, spu->number);
3988c2ecf20Sopenharmony_ci	spu_buff_add(spu->tgid, spu->number);
3998c2ecf20Sopenharmony_ci	spu_buff_add(app_dcookie, spu->number);
4008c2ecf20Sopenharmony_ci	spu_buff_add(spu_cookie, spu->number);
4018c2ecf20Sopenharmony_ci	spu_buff_add(offset, spu->number);
4028c2ecf20Sopenharmony_ci
4038c2ecf20Sopenharmony_ci	/* Set flag to indicate SPU PC data can now be written out.  If
4048c2ecf20Sopenharmony_ci	 * the SPU program counter data is seen before an SPU context
4058c2ecf20Sopenharmony_ci	 * record is seen, the postprocessing will fail.
4068c2ecf20Sopenharmony_ci	 */
4078c2ecf20Sopenharmony_ci	spu_buff[spu->number].ctx_sw_seen = 1;
4088c2ecf20Sopenharmony_ci
4098c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&buffer_lock, flags);
4108c2ecf20Sopenharmony_ci	smp_wmb();	/* insure spu event buffer updates are written */
4118c2ecf20Sopenharmony_ci			/* don't want entries intermingled... */
4128c2ecf20Sopenharmony_ciout:
4138c2ecf20Sopenharmony_ci	return retval;
4148c2ecf20Sopenharmony_ci}
4158c2ecf20Sopenharmony_ci
4168c2ecf20Sopenharmony_ci/*
4178c2ecf20Sopenharmony_ci * This function is invoked on either a bind_context or unbind_context.
4188c2ecf20Sopenharmony_ci * If called for an unbind_context, the val arg is 0; otherwise,
4198c2ecf20Sopenharmony_ci * it is the object-id value for the spu context.
4208c2ecf20Sopenharmony_ci * The data arg is of type 'struct spu *'.
4218c2ecf20Sopenharmony_ci */
4228c2ecf20Sopenharmony_cistatic int spu_active_notify(struct notifier_block *self, unsigned long val,
4238c2ecf20Sopenharmony_ci				void *data)
4248c2ecf20Sopenharmony_ci{
4258c2ecf20Sopenharmony_ci	int retval;
4268c2ecf20Sopenharmony_ci	unsigned long flags;
4278c2ecf20Sopenharmony_ci	struct spu *the_spu = data;
4288c2ecf20Sopenharmony_ci
4298c2ecf20Sopenharmony_ci	pr_debug("SPU event notification arrived\n");
4308c2ecf20Sopenharmony_ci	if (!val) {
4318c2ecf20Sopenharmony_ci		spin_lock_irqsave(&cache_lock, flags);
4328c2ecf20Sopenharmony_ci		retval = release_cached_info(the_spu->number);
4338c2ecf20Sopenharmony_ci		spin_unlock_irqrestore(&cache_lock, flags);
4348c2ecf20Sopenharmony_ci	} else {
4358c2ecf20Sopenharmony_ci		retval = process_context_switch(the_spu, val);
4368c2ecf20Sopenharmony_ci	}
4378c2ecf20Sopenharmony_ci	return retval;
4388c2ecf20Sopenharmony_ci}
4398c2ecf20Sopenharmony_ci
4408c2ecf20Sopenharmony_cistatic struct notifier_block spu_active = {
4418c2ecf20Sopenharmony_ci	.notifier_call = spu_active_notify,
4428c2ecf20Sopenharmony_ci};
4438c2ecf20Sopenharmony_ci
4448c2ecf20Sopenharmony_cistatic int number_of_online_nodes(void)
4458c2ecf20Sopenharmony_ci{
4468c2ecf20Sopenharmony_ci        u32 cpu; u32 tmp;
4478c2ecf20Sopenharmony_ci        int nodes = 0;
4488c2ecf20Sopenharmony_ci        for_each_online_cpu(cpu) {
4498c2ecf20Sopenharmony_ci                tmp = cbe_cpu_to_node(cpu) + 1;
4508c2ecf20Sopenharmony_ci                if (tmp > nodes)
4518c2ecf20Sopenharmony_ci                        nodes++;
4528c2ecf20Sopenharmony_ci        }
4538c2ecf20Sopenharmony_ci        return nodes;
4548c2ecf20Sopenharmony_ci}
4558c2ecf20Sopenharmony_ci
4568c2ecf20Sopenharmony_cistatic int oprofile_spu_buff_create(void)
4578c2ecf20Sopenharmony_ci{
4588c2ecf20Sopenharmony_ci	int spu;
4598c2ecf20Sopenharmony_ci
4608c2ecf20Sopenharmony_ci	max_spu_buff = oprofile_get_cpu_buffer_size();
4618c2ecf20Sopenharmony_ci
4628c2ecf20Sopenharmony_ci	for (spu = 0; spu < num_spu_nodes; spu++) {
4638c2ecf20Sopenharmony_ci		/* create circular buffers to store the data in.
4648c2ecf20Sopenharmony_ci		 * use locks to manage accessing the buffers
4658c2ecf20Sopenharmony_ci		 */
4668c2ecf20Sopenharmony_ci		spu_buff[spu].head = 0;
4678c2ecf20Sopenharmony_ci		spu_buff[spu].tail = 0;
4688c2ecf20Sopenharmony_ci
4698c2ecf20Sopenharmony_ci		/*
4708c2ecf20Sopenharmony_ci		 * Create a buffer for each SPU.  Can't reliably
4718c2ecf20Sopenharmony_ci		 * create a single buffer for all spus due to not
4728c2ecf20Sopenharmony_ci		 * enough contiguous kernel memory.
4738c2ecf20Sopenharmony_ci		 */
4748c2ecf20Sopenharmony_ci
4758c2ecf20Sopenharmony_ci		spu_buff[spu].buff = kzalloc((max_spu_buff
4768c2ecf20Sopenharmony_ci					      * sizeof(unsigned long)),
4778c2ecf20Sopenharmony_ci					     GFP_KERNEL);
4788c2ecf20Sopenharmony_ci
4798c2ecf20Sopenharmony_ci		if (!spu_buff[spu].buff) {
4808c2ecf20Sopenharmony_ci			printk(KERN_ERR "SPU_PROF: "
4818c2ecf20Sopenharmony_ci			       "%s, line %d:  oprofile_spu_buff_create "
4828c2ecf20Sopenharmony_ci		       "failed to allocate spu buffer %d.\n",
4838c2ecf20Sopenharmony_ci			       __func__, __LINE__, spu);
4848c2ecf20Sopenharmony_ci
4858c2ecf20Sopenharmony_ci			/* release the spu buffers that have been allocated */
4868c2ecf20Sopenharmony_ci			while (spu >= 0) {
4878c2ecf20Sopenharmony_ci				kfree(spu_buff[spu].buff);
4888c2ecf20Sopenharmony_ci				spu_buff[spu].buff = 0;
4898c2ecf20Sopenharmony_ci				spu--;
4908c2ecf20Sopenharmony_ci			}
4918c2ecf20Sopenharmony_ci			return -ENOMEM;
4928c2ecf20Sopenharmony_ci		}
4938c2ecf20Sopenharmony_ci	}
4948c2ecf20Sopenharmony_ci	return 0;
4958c2ecf20Sopenharmony_ci}
4968c2ecf20Sopenharmony_ci
4978c2ecf20Sopenharmony_ci/* The main purpose of this function is to synchronize
4988c2ecf20Sopenharmony_ci * OProfile with SPUFS by registering to be notified of
4998c2ecf20Sopenharmony_ci * SPU task switches.
5008c2ecf20Sopenharmony_ci *
5018c2ecf20Sopenharmony_ci * NOTE: When profiling SPUs, we must ensure that only
5028c2ecf20Sopenharmony_ci * spu_sync_start is invoked and not the generic sync_start
5038c2ecf20Sopenharmony_ci * in drivers/oprofile/oprof.c.	 A return value of
5048c2ecf20Sopenharmony_ci * SKIP_GENERIC_SYNC or SYNC_START_ERROR will
5058c2ecf20Sopenharmony_ci * accomplish this.
5068c2ecf20Sopenharmony_ci */
5078c2ecf20Sopenharmony_ciint spu_sync_start(void)
5088c2ecf20Sopenharmony_ci{
5098c2ecf20Sopenharmony_ci	int spu;
5108c2ecf20Sopenharmony_ci	int ret = SKIP_GENERIC_SYNC;
5118c2ecf20Sopenharmony_ci	int register_ret;
5128c2ecf20Sopenharmony_ci	unsigned long flags = 0;
5138c2ecf20Sopenharmony_ci
5148c2ecf20Sopenharmony_ci	spu_prof_num_nodes = number_of_online_nodes();
5158c2ecf20Sopenharmony_ci	num_spu_nodes = spu_prof_num_nodes * 8;
5168c2ecf20Sopenharmony_ci	INIT_DELAYED_WORK(&spu_work, wq_sync_spu_buff);
5178c2ecf20Sopenharmony_ci
5188c2ecf20Sopenharmony_ci	/* create buffer for storing the SPU data to put in
5198c2ecf20Sopenharmony_ci	 * the kernel buffer.
5208c2ecf20Sopenharmony_ci	 */
5218c2ecf20Sopenharmony_ci	ret = oprofile_spu_buff_create();
5228c2ecf20Sopenharmony_ci	if (ret)
5238c2ecf20Sopenharmony_ci		goto out;
5248c2ecf20Sopenharmony_ci
5258c2ecf20Sopenharmony_ci	spin_lock_irqsave(&buffer_lock, flags);
5268c2ecf20Sopenharmony_ci	for (spu = 0; spu < num_spu_nodes; spu++) {
5278c2ecf20Sopenharmony_ci		spu_buff_add(ESCAPE_CODE, spu);
5288c2ecf20Sopenharmony_ci		spu_buff_add(SPU_PROFILING_CODE, spu);
5298c2ecf20Sopenharmony_ci		spu_buff_add(num_spu_nodes, spu);
5308c2ecf20Sopenharmony_ci	}
5318c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&buffer_lock, flags);
5328c2ecf20Sopenharmony_ci
5338c2ecf20Sopenharmony_ci	for (spu = 0; spu < num_spu_nodes; spu++) {
5348c2ecf20Sopenharmony_ci		spu_buff[spu].ctx_sw_seen = 0;
5358c2ecf20Sopenharmony_ci		spu_buff[spu].last_guard_val = 0;
5368c2ecf20Sopenharmony_ci	}
5378c2ecf20Sopenharmony_ci
5388c2ecf20Sopenharmony_ci	/* Register for SPU events  */
5398c2ecf20Sopenharmony_ci	register_ret = spu_switch_event_register(&spu_active);
5408c2ecf20Sopenharmony_ci	if (register_ret) {
5418c2ecf20Sopenharmony_ci		ret = SYNC_START_ERROR;
5428c2ecf20Sopenharmony_ci		goto out;
5438c2ecf20Sopenharmony_ci	}
5448c2ecf20Sopenharmony_ci
5458c2ecf20Sopenharmony_ci	pr_debug("spu_sync_start -- running.\n");
5468c2ecf20Sopenharmony_ciout:
5478c2ecf20Sopenharmony_ci	return ret;
5488c2ecf20Sopenharmony_ci}
5498c2ecf20Sopenharmony_ci
5508c2ecf20Sopenharmony_ci/* Record SPU program counter samples to the oprofile event buffer. */
5518c2ecf20Sopenharmony_civoid spu_sync_buffer(int spu_num, unsigned int *samples,
5528c2ecf20Sopenharmony_ci		     int num_samples)
5538c2ecf20Sopenharmony_ci{
5548c2ecf20Sopenharmony_ci	unsigned long long file_offset;
5558c2ecf20Sopenharmony_ci	unsigned long flags;
5568c2ecf20Sopenharmony_ci	int i;
5578c2ecf20Sopenharmony_ci	struct vma_to_fileoffset_map *map;
5588c2ecf20Sopenharmony_ci	struct spu *the_spu;
5598c2ecf20Sopenharmony_ci	unsigned long long spu_num_ll = spu_num;
5608c2ecf20Sopenharmony_ci	unsigned long long spu_num_shifted = spu_num_ll << 32;
5618c2ecf20Sopenharmony_ci	struct cached_info *c_info;
5628c2ecf20Sopenharmony_ci
5638c2ecf20Sopenharmony_ci	/* We need to obtain the cache_lock here because it's
5648c2ecf20Sopenharmony_ci	 * possible that after getting the cached_info, the SPU job
5658c2ecf20Sopenharmony_ci	 * corresponding to this cached_info may end, thus resulting
5668c2ecf20Sopenharmony_ci	 * in the destruction of the cached_info.
5678c2ecf20Sopenharmony_ci	 */
5688c2ecf20Sopenharmony_ci	spin_lock_irqsave(&cache_lock, flags);
5698c2ecf20Sopenharmony_ci	c_info = get_cached_info(NULL, spu_num);
5708c2ecf20Sopenharmony_ci	if (!c_info) {
5718c2ecf20Sopenharmony_ci		/* This legitimately happens when the SPU task ends before all
5728c2ecf20Sopenharmony_ci		 * samples are recorded.
5738c2ecf20Sopenharmony_ci		 * No big deal -- so we just drop a few samples.
5748c2ecf20Sopenharmony_ci		 */
5758c2ecf20Sopenharmony_ci		pr_debug("SPU_PROF: No cached SPU context "
5768c2ecf20Sopenharmony_ci			  "for SPU #%d. Dropping samples.\n", spu_num);
5778c2ecf20Sopenharmony_ci		goto out;
5788c2ecf20Sopenharmony_ci	}
5798c2ecf20Sopenharmony_ci
5808c2ecf20Sopenharmony_ci	map = c_info->map;
5818c2ecf20Sopenharmony_ci	the_spu = c_info->the_spu;
5828c2ecf20Sopenharmony_ci	spin_lock(&buffer_lock);
5838c2ecf20Sopenharmony_ci	for (i = 0; i < num_samples; i++) {
5848c2ecf20Sopenharmony_ci		unsigned int sample = *(samples+i);
5858c2ecf20Sopenharmony_ci		int grd_val = 0;
5868c2ecf20Sopenharmony_ci		file_offset = 0;
5878c2ecf20Sopenharmony_ci		if (sample == 0)
5888c2ecf20Sopenharmony_ci			continue;
5898c2ecf20Sopenharmony_ci		file_offset = vma_map_lookup( map, sample, the_spu, &grd_val);
5908c2ecf20Sopenharmony_ci
5918c2ecf20Sopenharmony_ci		/* If overlays are used by this SPU application, the guard
5928c2ecf20Sopenharmony_ci		 * value is non-zero, indicating which overlay section is in
5938c2ecf20Sopenharmony_ci		 * use.	 We need to discard samples taken during the time
5948c2ecf20Sopenharmony_ci		 * period which an overlay occurs (i.e., guard value changes).
5958c2ecf20Sopenharmony_ci		 */
5968c2ecf20Sopenharmony_ci		if (grd_val && grd_val != spu_buff[spu_num].last_guard_val) {
5978c2ecf20Sopenharmony_ci			spu_buff[spu_num].last_guard_val = grd_val;
5988c2ecf20Sopenharmony_ci			/* Drop the rest of the samples. */
5998c2ecf20Sopenharmony_ci			break;
6008c2ecf20Sopenharmony_ci		}
6018c2ecf20Sopenharmony_ci
6028c2ecf20Sopenharmony_ci		/* We must ensure that the SPU context switch has been written
6038c2ecf20Sopenharmony_ci		 * out before samples for the SPU.  Otherwise, the SPU context
6048c2ecf20Sopenharmony_ci		 * information is not available and the postprocessing of the
6058c2ecf20Sopenharmony_ci		 * SPU PC will fail with no available anonymous map information.
6068c2ecf20Sopenharmony_ci		 */
6078c2ecf20Sopenharmony_ci		if (spu_buff[spu_num].ctx_sw_seen)
6088c2ecf20Sopenharmony_ci			spu_buff_add((file_offset | spu_num_shifted),
6098c2ecf20Sopenharmony_ci					 spu_num);
6108c2ecf20Sopenharmony_ci	}
6118c2ecf20Sopenharmony_ci	spin_unlock(&buffer_lock);
6128c2ecf20Sopenharmony_ciout:
6138c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&cache_lock, flags);
6148c2ecf20Sopenharmony_ci}
6158c2ecf20Sopenharmony_ci
6168c2ecf20Sopenharmony_ci
6178c2ecf20Sopenharmony_ciint spu_sync_stop(void)
6188c2ecf20Sopenharmony_ci{
6198c2ecf20Sopenharmony_ci	unsigned long flags = 0;
6208c2ecf20Sopenharmony_ci	int ret;
6218c2ecf20Sopenharmony_ci	int k;
6228c2ecf20Sopenharmony_ci
6238c2ecf20Sopenharmony_ci	ret = spu_switch_event_unregister(&spu_active);
6248c2ecf20Sopenharmony_ci
6258c2ecf20Sopenharmony_ci	if (ret)
6268c2ecf20Sopenharmony_ci		printk(KERN_ERR "SPU_PROF: "
6278c2ecf20Sopenharmony_ci		       "%s, line %d: spu_switch_event_unregister "	\
6288c2ecf20Sopenharmony_ci		       "returned %d\n",
6298c2ecf20Sopenharmony_ci		       __func__, __LINE__, ret);
6308c2ecf20Sopenharmony_ci
6318c2ecf20Sopenharmony_ci	/* flush any remaining data in the per SPU buffers */
6328c2ecf20Sopenharmony_ci	sync_spu_buff();
6338c2ecf20Sopenharmony_ci
6348c2ecf20Sopenharmony_ci	spin_lock_irqsave(&cache_lock, flags);
6358c2ecf20Sopenharmony_ci	ret = release_cached_info(RELEASE_ALL);
6368c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&cache_lock, flags);
6378c2ecf20Sopenharmony_ci
6388c2ecf20Sopenharmony_ci	/* remove scheduled work queue item rather then waiting
6398c2ecf20Sopenharmony_ci	 * for every queued entry to execute.  Then flush pending
6408c2ecf20Sopenharmony_ci	 * system wide buffer to event buffer.
6418c2ecf20Sopenharmony_ci	 */
6428c2ecf20Sopenharmony_ci	cancel_delayed_work(&spu_work);
6438c2ecf20Sopenharmony_ci
6448c2ecf20Sopenharmony_ci	for (k = 0; k < num_spu_nodes; k++) {
6458c2ecf20Sopenharmony_ci		spu_buff[k].ctx_sw_seen = 0;
6468c2ecf20Sopenharmony_ci
6478c2ecf20Sopenharmony_ci		/*
6488c2ecf20Sopenharmony_ci		 * spu_sys_buff will be null if there was a problem
6498c2ecf20Sopenharmony_ci		 * allocating the buffer.  Only delete if it exists.
6508c2ecf20Sopenharmony_ci		 */
6518c2ecf20Sopenharmony_ci		kfree(spu_buff[k].buff);
6528c2ecf20Sopenharmony_ci		spu_buff[k].buff = 0;
6538c2ecf20Sopenharmony_ci	}
6548c2ecf20Sopenharmony_ci	pr_debug("spu_sync_stop -- done.\n");
6558c2ecf20Sopenharmony_ci	return ret;
6568c2ecf20Sopenharmony_ci}
6578c2ecf20Sopenharmony_ci
658