1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * Hypervisor supplied "gpci" ("get performance counter info") performance
4 * counter support
5 *
6 * Author: Cody P Schafer <cody@linux.vnet.ibm.com>
7 * Copyright 2014 IBM Corporation.
8 */
9
10#define pr_fmt(fmt) "hv-gpci: " fmt
11
12#include <linux/init.h>
13#include <linux/perf_event.h>
14#include <asm/firmware.h>
15#include <asm/hvcall.h>
16#include <asm/io.h>
17
18#include "hv-gpci.h"
19#include "hv-common.h"
20
21/*
22 * Example usage:
23 *  perf stat -e 'hv_gpci/counter_info_version=3,offset=0,length=8,
24 *		  secondary_index=0,starting_index=0xffffffff,request=0x10/' ...
25 */
26
27/* u32 */
28EVENT_DEFINE_RANGE_FORMAT(request, config, 0, 31);
29/* u32 */
30/*
31 * Note that starting_index, phys_processor_idx, sibling_part_id,
32 * hw_chip_id, partition_id all refer to the same bit range. They
33 * are basically aliases for the starting_index. The specific alias
34 * used depends on the event. See REQUEST_IDX_KIND in hv-gpci-requests.h
35 */
36EVENT_DEFINE_RANGE_FORMAT(starting_index, config, 32, 63);
37EVENT_DEFINE_RANGE_FORMAT_LITE(phys_processor_idx, config, 32, 63);
38EVENT_DEFINE_RANGE_FORMAT_LITE(sibling_part_id, config, 32, 63);
39EVENT_DEFINE_RANGE_FORMAT_LITE(hw_chip_id, config, 32, 63);
40EVENT_DEFINE_RANGE_FORMAT_LITE(partition_id, config, 32, 63);
41
42/* u16 */
43EVENT_DEFINE_RANGE_FORMAT(secondary_index, config1, 0, 15);
44/* u8 */
45EVENT_DEFINE_RANGE_FORMAT(counter_info_version, config1, 16, 23);
46/* u8, bytes of data (1-8) */
47EVENT_DEFINE_RANGE_FORMAT(length, config1, 24, 31);
48/* u32, byte offset */
49EVENT_DEFINE_RANGE_FORMAT(offset, config1, 32, 63);
50
51static cpumask_t hv_gpci_cpumask;
52
53static struct attribute *format_attrs[] = {
54	&format_attr_request.attr,
55	&format_attr_starting_index.attr,
56	&format_attr_phys_processor_idx.attr,
57	&format_attr_sibling_part_id.attr,
58	&format_attr_hw_chip_id.attr,
59	&format_attr_partition_id.attr,
60	&format_attr_secondary_index.attr,
61	&format_attr_counter_info_version.attr,
62
63	&format_attr_offset.attr,
64	&format_attr_length.attr,
65	NULL,
66};
67
68static const struct attribute_group format_group = {
69	.name = "format",
70	.attrs = format_attrs,
71};
72
73static struct attribute_group event_group = {
74	.name  = "events",
75	/* .attrs is set in init */
76};
77
78#define HV_CAPS_ATTR(_name, _format)				\
79static ssize_t _name##_show(struct device *dev,			\
80			    struct device_attribute *attr,	\
81			    char *page)				\
82{								\
83	struct hv_perf_caps caps;				\
84	unsigned long hret = hv_perf_caps_get(&caps);		\
85	if (hret)						\
86		return -EIO;					\
87								\
88	return sprintf(page, _format, caps._name);		\
89}								\
90static struct device_attribute hv_caps_attr_##_name = __ATTR_RO(_name)
91
92static ssize_t kernel_version_show(struct device *dev,
93				   struct device_attribute *attr,
94				   char *page)
95{
96	return sprintf(page, "0x%x\n", COUNTER_INFO_VERSION_CURRENT);
97}
98
99static ssize_t cpumask_show(struct device *dev,
100			    struct device_attribute *attr, char *buf)
101{
102	return cpumap_print_to_pagebuf(true, buf, &hv_gpci_cpumask);
103}
104
105/* Interface attribute array index to store system information */
106#define INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR	6
107#define INTERFACE_PROCESSOR_CONFIG_ATTR		7
108#define INTERFACE_AFFINITY_DOMAIN_VIA_VP_ATTR	8
109#define INTERFACE_AFFINITY_DOMAIN_VIA_DOM_ATTR	9
110#define INTERFACE_AFFINITY_DOMAIN_VIA_PAR_ATTR	10
111#define INTERFACE_NULL_ATTR			11
112
113/* Counter request value to retrieve system information */
114enum {
115	PROCESSOR_BUS_TOPOLOGY,
116	PROCESSOR_CONFIG,
117	AFFINITY_DOMAIN_VIA_VP, /* affinity domain via virtual processor */
118	AFFINITY_DOMAIN_VIA_DOM, /* affinity domain via domain */
119	AFFINITY_DOMAIN_VIA_PAR, /* affinity domain via partition */
120};
121
122static int sysinfo_counter_request[] = {
123	[PROCESSOR_BUS_TOPOLOGY] = 0xD0,
124	[PROCESSOR_CONFIG] = 0x90,
125	[AFFINITY_DOMAIN_VIA_VP] = 0xA0,
126	[AFFINITY_DOMAIN_VIA_DOM] = 0xB0,
127	[AFFINITY_DOMAIN_VIA_PAR] = 0xB1,
128};
129
130static DEFINE_PER_CPU(char, hv_gpci_reqb[HGPCI_REQ_BUFFER_SIZE]) __aligned(sizeof(uint64_t));
131
132static unsigned long systeminfo_gpci_request(u32 req, u32 starting_index,
133			u16 secondary_index, char *buf,
134			size_t *n, struct hv_gpci_request_buffer *arg)
135{
136	unsigned long ret;
137	size_t i, j;
138
139	arg->params.counter_request = cpu_to_be32(req);
140	arg->params.starting_index = cpu_to_be32(starting_index);
141	arg->params.secondary_index = cpu_to_be16(secondary_index);
142
143	ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO,
144			virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE);
145
146	/*
147	 * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL',
148	 * which means that the current buffer size cannot accommodate
149	 * all the information and a partial buffer returned.
150	 * hcall fails incase of ret value other than H_SUCCESS or H_PARAMETER.
151	 *
152	 * ret value as H_AUTHORITY implies that partition is not permitted to retrieve
153	 * performance information, and required to set
154	 * "Enable Performance Information Collection" option.
155	 */
156	if (ret == H_AUTHORITY)
157		return -EPERM;
158
159	/*
160	 * hcall can fail with other possible ret value like H_PRIVILEGE/H_HARDWARE
161	 * because of invalid buffer-length/address or due to some hardware
162	 * error.
163	 */
164	if (ret && (ret != H_PARAMETER))
165		return -EIO;
166
167	/*
168	 * hcall H_GET_PERF_COUNTER_INFO populates the 'returned_values'
169	 * to show the total number of counter_value array elements
170	 * returned via hcall.
171	 * hcall also populates 'cv_element_size' corresponds to individual
172	 * counter_value array element size. Below loop go through all
173	 * counter_value array elements as per their size and add it to
174	 * the output buffer.
175	 */
176	for (i = 0; i < be16_to_cpu(arg->params.returned_values); i++) {
177		j = i * be16_to_cpu(arg->params.cv_element_size);
178
179		for (; j < (i + 1) * be16_to_cpu(arg->params.cv_element_size); j++)
180			*n += sprintf(buf + *n,  "%02x", (u8)arg->bytes[j]);
181		*n += sprintf(buf + *n,  "\n");
182	}
183
184	if (*n >= PAGE_SIZE) {
185		pr_info("System information exceeds PAGE_SIZE\n");
186		return -EFBIG;
187	}
188
189	return ret;
190}
191
192static ssize_t processor_bus_topology_show(struct device *dev, struct device_attribute *attr,
193				char *buf)
194{
195	struct hv_gpci_request_buffer *arg;
196	unsigned long ret;
197	size_t n = 0;
198
199	arg = (void *)get_cpu_var(hv_gpci_reqb);
200	memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
201
202	/*
203	 * Pass the counter request value 0xD0 corresponds to request
204	 * type 'Processor_bus_topology', to retrieve
205	 * the system topology information.
206	 * starting_index value implies the starting hardware
207	 * chip id.
208	 */
209	ret = systeminfo_gpci_request(sysinfo_counter_request[PROCESSOR_BUS_TOPOLOGY],
210			0, 0, buf, &n, arg);
211
212	if (!ret)
213		return n;
214
215	if (ret != H_PARAMETER)
216		goto out;
217
218	/*
219	 * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which
220	 * implies that buffer can't accommodate all information, and a partial buffer
221	 * returned. To handle that, we need to make subsequent requests
222	 * with next starting index to retrieve additional (missing) data.
223	 * Below loop do subsequent hcalls with next starting index and add it
224	 * to buffer util we get all the information.
225	 */
226	while (ret == H_PARAMETER) {
227		int returned_values = be16_to_cpu(arg->params.returned_values);
228		int elementsize = be16_to_cpu(arg->params.cv_element_size);
229		int last_element = (returned_values - 1) * elementsize;
230
231		/*
232		 * Since the starting index value is part of counter_value
233		 * buffer elements, use the starting index value in the last
234		 * element and add 1 to make subsequent hcalls.
235		 */
236		u32 starting_index = arg->bytes[last_element + 3] +
237				(arg->bytes[last_element + 2] << 8) +
238				(arg->bytes[last_element + 1] << 16) +
239				(arg->bytes[last_element] << 24) + 1;
240
241		memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
242
243		ret = systeminfo_gpci_request(sysinfo_counter_request[PROCESSOR_BUS_TOPOLOGY],
244				starting_index, 0, buf, &n, arg);
245
246		if (!ret)
247			return n;
248
249		if (ret != H_PARAMETER)
250			goto out;
251	}
252
253	return n;
254
255out:
256	put_cpu_var(hv_gpci_reqb);
257	return ret;
258}
259
260static ssize_t processor_config_show(struct device *dev, struct device_attribute *attr,
261					char *buf)
262{
263	struct hv_gpci_request_buffer *arg;
264	unsigned long ret;
265	size_t n = 0;
266
267	arg = (void *)get_cpu_var(hv_gpci_reqb);
268	memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
269
270	/*
271	 * Pass the counter request value 0x90 corresponds to request
272	 * type 'Processor_config', to retrieve
273	 * the system processor information.
274	 * starting_index value implies the starting hardware
275	 * processor index.
276	 */
277	ret = systeminfo_gpci_request(sysinfo_counter_request[PROCESSOR_CONFIG],
278			0, 0, buf, &n, arg);
279
280	if (!ret)
281		return n;
282
283	if (ret != H_PARAMETER)
284		goto out;
285
286	/*
287	 * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which
288	 * implies that buffer can't accommodate all information, and a partial buffer
289	 * returned. To handle that, we need to take subsequent requests
290	 * with next starting index to retrieve additional (missing) data.
291	 * Below loop do subsequent hcalls with next starting index and add it
292	 * to buffer util we get all the information.
293	 */
294	while (ret == H_PARAMETER) {
295		int returned_values = be16_to_cpu(arg->params.returned_values);
296		int elementsize = be16_to_cpu(arg->params.cv_element_size);
297		int last_element = (returned_values - 1) * elementsize;
298
299		/*
300		 * Since the starting index is part of counter_value
301		 * buffer elements, use the starting index value in the last
302		 * element and add 1 to subsequent hcalls.
303		 */
304		u32 starting_index = arg->bytes[last_element + 3] +
305				(arg->bytes[last_element + 2] << 8) +
306				(arg->bytes[last_element + 1] << 16) +
307				(arg->bytes[last_element] << 24) + 1;
308
309		memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
310
311		ret = systeminfo_gpci_request(sysinfo_counter_request[PROCESSOR_CONFIG],
312				starting_index, 0, buf, &n, arg);
313
314		if (!ret)
315			return n;
316
317		if (ret != H_PARAMETER)
318			goto out;
319	}
320
321	return n;
322
323out:
324	put_cpu_var(hv_gpci_reqb);
325	return ret;
326}
327
328static ssize_t affinity_domain_via_virtual_processor_show(struct device *dev,
329			struct device_attribute *attr, char *buf)
330{
331	struct hv_gpci_request_buffer *arg;
332	unsigned long ret;
333	size_t n = 0;
334
335	arg = (void *)get_cpu_var(hv_gpci_reqb);
336	memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
337
338	/*
339	 * Pass the counter request 0xA0 corresponds to request
340	 * type 'Affinity_domain_information_by_virutal_processor',
341	 * to retrieve the system affinity domain information.
342	 * starting_index value refers to the starting hardware
343	 * processor index.
344	 */
345	ret = systeminfo_gpci_request(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_VP],
346			0, 0, buf, &n, arg);
347
348	if (!ret)
349		return n;
350
351	if (ret != H_PARAMETER)
352		goto out;
353
354	/*
355	 * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which
356	 * implies that buffer can't accommodate all information, and a partial buffer
357	 * returned. To handle that, we need to take subsequent requests
358	 * with next secondary index to retrieve additional (missing) data.
359	 * Below loop do subsequent hcalls with next secondary index and add it
360	 * to buffer util we get all the information.
361	 */
362	while (ret == H_PARAMETER) {
363		int returned_values = be16_to_cpu(arg->params.returned_values);
364		int elementsize = be16_to_cpu(arg->params.cv_element_size);
365		int last_element = (returned_values - 1) * elementsize;
366
367		/*
368		 * Since the starting index and secondary index type is part of the
369		 * counter_value buffer elements, use the starting index value in the
370		 * last array element as subsequent starting index, and use secondary index
371		 * value in the last array element plus 1 as subsequent secondary index.
372		 * For counter request '0xA0', starting index points to partition id
373		 * and secondary index points to corresponding virtual processor index.
374		 */
375		u32 starting_index = arg->bytes[last_element + 1] + (arg->bytes[last_element] << 8);
376		u16 secondary_index = arg->bytes[last_element + 3] +
377				(arg->bytes[last_element + 2] << 8) + 1;
378
379		memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
380
381		ret = systeminfo_gpci_request(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_VP],
382				starting_index, secondary_index, buf, &n, arg);
383
384		if (!ret)
385			return n;
386
387		if (ret != H_PARAMETER)
388			goto out;
389	}
390
391	return n;
392
393out:
394	put_cpu_var(hv_gpci_reqb);
395	return ret;
396}
397
398static ssize_t affinity_domain_via_domain_show(struct device *dev, struct device_attribute *attr,
399						char *buf)
400{
401	struct hv_gpci_request_buffer *arg;
402	unsigned long ret;
403	size_t n = 0;
404
405	arg = (void *)get_cpu_var(hv_gpci_reqb);
406	memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
407
408	/*
409	 * Pass the counter request 0xB0 corresponds to request
410	 * type 'Affinity_domain_information_by_domain',
411	 * to retrieve the system affinity domain information.
412	 * starting_index value refers to the starting hardware
413	 * processor index.
414	 */
415	ret = systeminfo_gpci_request(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_DOM],
416			0, 0, buf, &n, arg);
417
418	if (!ret)
419		return n;
420
421	if (ret != H_PARAMETER)
422		goto out;
423
424	/*
425	 * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which
426	 * implies that buffer can't accommodate all information, and a partial buffer
427	 * returned. To handle that, we need to take subsequent requests
428	 * with next starting index to retrieve additional (missing) data.
429	 * Below loop do subsequent hcalls with next starting index and add it
430	 * to buffer util we get all the information.
431	 */
432	while (ret == H_PARAMETER) {
433		int returned_values = be16_to_cpu(arg->params.returned_values);
434		int elementsize = be16_to_cpu(arg->params.cv_element_size);
435		int last_element = (returned_values - 1) * elementsize;
436
437		/*
438		 * Since the starting index value is part of counter_value
439		 * buffer elements, use the starting index value in the last
440		 * element and add 1 to make subsequent hcalls.
441		 */
442		u32 starting_index = arg->bytes[last_element + 1] +
443			(arg->bytes[last_element] << 8) + 1;
444
445		memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
446
447		ret = systeminfo_gpci_request(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_DOM],
448					starting_index, 0, buf, &n, arg);
449
450		if (!ret)
451			return n;
452
453		if (ret != H_PARAMETER)
454			goto out;
455	}
456
457	return n;
458
459out:
460	put_cpu_var(hv_gpci_reqb);
461	return ret;
462}
463
464static void affinity_domain_via_partition_result_parse(int returned_values,
465			int element_size, char *buf, size_t *last_element,
466			size_t *n, struct hv_gpci_request_buffer *arg)
467{
468	size_t i = 0, j = 0;
469	size_t k, l, m;
470	uint16_t total_affinity_domain_ele, size_of_each_affinity_domain_ele;
471
472	/*
473	 * hcall H_GET_PERF_COUNTER_INFO populates the 'returned_values'
474	 * to show the total number of counter_value array elements
475	 * returned via hcall.
476	 * Unlike other request types, the data structure returned by this
477	 * request is variable-size. For this counter request type,
478	 * hcall populates 'cv_element_size' corresponds to minimum size of
479	 * the structure returned i.e; the size of the structure with no domain
480	 * information. Below loop go through all counter_value array
481	 * to determine the number and size of each domain array element and
482	 * add it to the output buffer.
483	 */
484	while (i < returned_values) {
485		k = j;
486		for (; k < j + element_size; k++)
487			*n += sprintf(buf + *n,  "%02x", (u8)arg->bytes[k]);
488		*n += sprintf(buf + *n,  "\n");
489
490		total_affinity_domain_ele = (u8)arg->bytes[k - 2] << 8 | (u8)arg->bytes[k - 3];
491		size_of_each_affinity_domain_ele = (u8)arg->bytes[k] << 8 | (u8)arg->bytes[k - 1];
492
493		for (l = 0; l < total_affinity_domain_ele; l++) {
494			for (m = 0; m < size_of_each_affinity_domain_ele; m++) {
495				*n += sprintf(buf + *n,  "%02x", (u8)arg->bytes[k]);
496				k++;
497			}
498			*n += sprintf(buf + *n,  "\n");
499		}
500
501		*n += sprintf(buf + *n,  "\n");
502		i++;
503		j = k;
504	}
505
506	*last_element = k;
507}
508
509static ssize_t affinity_domain_via_partition_show(struct device *dev, struct device_attribute *attr,
510							char *buf)
511{
512	struct hv_gpci_request_buffer *arg;
513	unsigned long ret;
514	size_t n = 0;
515	size_t last_element = 0;
516	u32 starting_index;
517
518	arg = (void *)get_cpu_var(hv_gpci_reqb);
519	memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
520
521	/*
522	 * Pass the counter request value 0xB1 corresponds to counter request
523	 * type 'Affinity_domain_information_by_partition',
524	 * to retrieve the system affinity domain by partition information.
525	 * starting_index value refers to the starting hardware
526	 * processor index.
527	 */
528	arg->params.counter_request = cpu_to_be32(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_PAR]);
529	arg->params.starting_index = cpu_to_be32(0);
530
531	ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO,
532			virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE);
533
534	if (!ret)
535		goto parse_result;
536
537	if (ret && (ret != H_PARAMETER))
538		goto out;
539
540	/*
541	 * ret value as 'H_PARAMETER' implies that the current buffer size
542	 * can't accommodate all the information, and a partial buffer
543	 * returned. To handle that, we need to make subsequent requests
544	 * with next starting index to retrieve additional (missing) data.
545	 * Below loop do subsequent hcalls with next starting index and add it
546	 * to buffer util we get all the information.
547	 */
548	while (ret == H_PARAMETER) {
549		affinity_domain_via_partition_result_parse(
550			be16_to_cpu(arg->params.returned_values) - 1,
551			be16_to_cpu(arg->params.cv_element_size), buf,
552			&last_element, &n, arg);
553
554		if (n >= PAGE_SIZE) {
555			put_cpu_var(hv_gpci_reqb);
556			pr_debug("System information exceeds PAGE_SIZE\n");
557			return -EFBIG;
558		}
559
560		/*
561		 * Since the starting index value is part of counter_value
562		 * buffer elements, use the starting_index value in the last
563		 * element and add 1 to make subsequent hcalls.
564		 */
565		starting_index = (u8)arg->bytes[last_element] << 8 |
566				(u8)arg->bytes[last_element + 1];
567
568		memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
569		arg->params.counter_request = cpu_to_be32(
570				sysinfo_counter_request[AFFINITY_DOMAIN_VIA_PAR]);
571		arg->params.starting_index = cpu_to_be32(starting_index);
572
573		ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO,
574				virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE);
575
576		if (ret && (ret != H_PARAMETER))
577			goto out;
578	}
579
580parse_result:
581	affinity_domain_via_partition_result_parse(
582		be16_to_cpu(arg->params.returned_values),
583		be16_to_cpu(arg->params.cv_element_size),
584		buf, &last_element, &n, arg);
585
586	put_cpu_var(hv_gpci_reqb);
587	return n;
588
589out:
590	put_cpu_var(hv_gpci_reqb);
591
592	/*
593	 * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL',
594	 * which means that the current buffer size cannot accommodate
595	 * all the information and a partial buffer returned.
596	 * hcall fails incase of ret value other than H_SUCCESS or H_PARAMETER.
597	 *
598	 * ret value as H_AUTHORITY implies that partition is not permitted to retrieve
599	 * performance information, and required to set
600	 * "Enable Performance Information Collection" option.
601	 */
602	if (ret == H_AUTHORITY)
603		return -EPERM;
604
605	/*
606	 * hcall can fail with other possible ret value like H_PRIVILEGE/H_HARDWARE
607	 * because of invalid buffer-length/address or due to some hardware
608	 * error.
609	 */
610	return -EIO;
611}
612
613static DEVICE_ATTR_RO(kernel_version);
614static DEVICE_ATTR_RO(cpumask);
615
616HV_CAPS_ATTR(version, "0x%x\n");
617HV_CAPS_ATTR(ga, "%d\n");
618HV_CAPS_ATTR(expanded, "%d\n");
619HV_CAPS_ATTR(lab, "%d\n");
620HV_CAPS_ATTR(collect_privileged, "%d\n");
621
622static struct attribute *interface_attrs[] = {
623	&dev_attr_kernel_version.attr,
624	&hv_caps_attr_version.attr,
625	&hv_caps_attr_ga.attr,
626	&hv_caps_attr_expanded.attr,
627	&hv_caps_attr_lab.attr,
628	&hv_caps_attr_collect_privileged.attr,
629	/*
630	 * This NULL is a placeholder for the processor_bus_topology
631	 * attribute, set in init function if applicable.
632	 */
633	NULL,
634	/*
635	 * This NULL is a placeholder for the processor_config
636	 * attribute, set in init function if applicable.
637	 */
638	NULL,
639	/*
640	 * This NULL is a placeholder for the affinity_domain_via_virtual_processor
641	 * attribute, set in init function if applicable.
642	 */
643	NULL,
644	/*
645	 * This NULL is a placeholder for the affinity_domain_via_domain
646	 * attribute, set in init function if applicable.
647	 */
648	NULL,
649	/*
650	 * This NULL is a placeholder for the affinity_domain_via_partition
651	 * attribute, set in init function if applicable.
652	 */
653	NULL,
654	NULL,
655};
656
657static struct attribute *cpumask_attrs[] = {
658	&dev_attr_cpumask.attr,
659	NULL,
660};
661
662static const struct attribute_group cpumask_attr_group = {
663	.attrs = cpumask_attrs,
664};
665
666static const struct attribute_group interface_group = {
667	.name = "interface",
668	.attrs = interface_attrs,
669};
670
671static const struct attribute_group *attr_groups[] = {
672	&format_group,
673	&event_group,
674	&interface_group,
675	&cpumask_attr_group,
676	NULL,
677};
678
679static unsigned long single_gpci_request(u32 req, u32 starting_index,
680		u16 secondary_index, u8 version_in, u32 offset, u8 length,
681		u64 *value)
682{
683	unsigned long ret;
684	size_t i;
685	u64 count;
686	struct hv_gpci_request_buffer *arg;
687
688	arg = (void *)get_cpu_var(hv_gpci_reqb);
689	memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
690
691	arg->params.counter_request = cpu_to_be32(req);
692	arg->params.starting_index = cpu_to_be32(starting_index);
693	arg->params.secondary_index = cpu_to_be16(secondary_index);
694	arg->params.counter_info_version_in = version_in;
695
696	ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO,
697			virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE);
698
699	/*
700	 * ret value as 'H_PARAMETER' with detail_rc as 'GEN_BUF_TOO_SMALL',
701	 * specifies that the current buffer size cannot accommodate
702	 * all the information and a partial buffer returned.
703	 * Since in this function we are only accessing data for a given starting index,
704	 * we don't need to accommodate whole data and can get required count by
705	 * accessing first entry data.
706	 * Hence hcall fails only incase the ret value is other than H_SUCCESS or
707	 * H_PARAMETER with detail_rc value as GEN_BUF_TOO_SMALL(0x1B).
708	 */
709	if (ret == H_PARAMETER && be32_to_cpu(arg->params.detail_rc) == 0x1B)
710		ret = 0;
711
712	if (ret) {
713		pr_devel("hcall failed: 0x%lx\n", ret);
714		goto out;
715	}
716
717	/*
718	 * we verify offset and length are within the zeroed buffer at event
719	 * init.
720	 */
721	count = 0;
722	for (i = offset; i < offset + length; i++)
723		count |= (u64)(arg->bytes[i]) << ((length - 1 - (i - offset)) * 8);
724
725	*value = count;
726out:
727	put_cpu_var(hv_gpci_reqb);
728	return ret;
729}
730
731static u64 h_gpci_get_value(struct perf_event *event)
732{
733	u64 count;
734	unsigned long ret = single_gpci_request(event_get_request(event),
735					event_get_starting_index(event),
736					event_get_secondary_index(event),
737					event_get_counter_info_version(event),
738					event_get_offset(event),
739					event_get_length(event),
740					&count);
741	if (ret)
742		return 0;
743	return count;
744}
745
746static void h_gpci_event_update(struct perf_event *event)
747{
748	s64 prev;
749	u64 now = h_gpci_get_value(event);
750	prev = local64_xchg(&event->hw.prev_count, now);
751	local64_add(now - prev, &event->count);
752}
753
754static void h_gpci_event_start(struct perf_event *event, int flags)
755{
756	local64_set(&event->hw.prev_count, h_gpci_get_value(event));
757}
758
759static void h_gpci_event_stop(struct perf_event *event, int flags)
760{
761	h_gpci_event_update(event);
762}
763
764static int h_gpci_event_add(struct perf_event *event, int flags)
765{
766	if (flags & PERF_EF_START)
767		h_gpci_event_start(event, flags);
768
769	return 0;
770}
771
772static int h_gpci_event_init(struct perf_event *event)
773{
774	u64 count;
775	u8 length;
776	unsigned long ret;
777
778	/* Not our event */
779	if (event->attr.type != event->pmu->type)
780		return -ENOENT;
781
782	/* config2 is unused */
783	if (event->attr.config2) {
784		pr_devel("config2 set when reserved\n");
785		return -EINVAL;
786	}
787
788	/* no branch sampling */
789	if (has_branch_stack(event))
790		return -EOPNOTSUPP;
791
792	length = event_get_length(event);
793	if (length < 1 || length > 8) {
794		pr_devel("length invalid\n");
795		return -EINVAL;
796	}
797
798	/* last byte within the buffer? */
799	if ((event_get_offset(event) + length) > HGPCI_MAX_DATA_BYTES) {
800		pr_devel("request outside of buffer: %zu > %zu\n",
801				(size_t)event_get_offset(event) + length,
802				HGPCI_MAX_DATA_BYTES);
803		return -EINVAL;
804	}
805
806	/* check if the request works... */
807	ret = single_gpci_request(event_get_request(event),
808				event_get_starting_index(event),
809				event_get_secondary_index(event),
810				event_get_counter_info_version(event),
811				event_get_offset(event),
812				length,
813				&count);
814
815	/*
816	 * ret value as H_AUTHORITY implies that partition is not permitted to retrieve
817	 * performance information, and required to set
818	 * "Enable Performance Information Collection" option.
819	 */
820	if (ret == H_AUTHORITY)
821		return -EPERM;
822
823	if (ret) {
824		pr_devel("gpci hcall failed\n");
825		return -EINVAL;
826	}
827
828	return 0;
829}
830
831static struct pmu h_gpci_pmu = {
832	.task_ctx_nr = perf_invalid_context,
833
834	.name = "hv_gpci",
835	.attr_groups = attr_groups,
836	.event_init  = h_gpci_event_init,
837	.add         = h_gpci_event_add,
838	.del         = h_gpci_event_stop,
839	.start       = h_gpci_event_start,
840	.stop        = h_gpci_event_stop,
841	.read        = h_gpci_event_update,
842	.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
843};
844
845static int ppc_hv_gpci_cpu_online(unsigned int cpu)
846{
847	if (cpumask_empty(&hv_gpci_cpumask))
848		cpumask_set_cpu(cpu, &hv_gpci_cpumask);
849
850	return 0;
851}
852
853static int ppc_hv_gpci_cpu_offline(unsigned int cpu)
854{
855	int target;
856
857	/* Check if exiting cpu is used for collecting gpci events */
858	if (!cpumask_test_and_clear_cpu(cpu, &hv_gpci_cpumask))
859		return 0;
860
861	/* Find a new cpu to collect gpci events */
862	target = cpumask_last(cpu_active_mask);
863
864	if (target < 0 || target >= nr_cpu_ids) {
865		pr_err("hv_gpci: CPU hotplug init failed\n");
866		return -1;
867	}
868
869	/* Migrate gpci events to the new target */
870	cpumask_set_cpu(target, &hv_gpci_cpumask);
871	perf_pmu_migrate_context(&h_gpci_pmu, cpu, target);
872
873	return 0;
874}
875
876static int hv_gpci_cpu_hotplug_init(void)
877{
878	return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_HV_GPCI_ONLINE,
879			  "perf/powerpc/hv_gcpi:online",
880			  ppc_hv_gpci_cpu_online,
881			  ppc_hv_gpci_cpu_offline);
882}
883
884static struct device_attribute *sysinfo_device_attr_create(int
885		sysinfo_interface_group_index, u32 req)
886{
887	struct device_attribute *attr = NULL;
888	unsigned long ret;
889	struct hv_gpci_request_buffer *arg;
890
891	if (sysinfo_interface_group_index < INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR ||
892			sysinfo_interface_group_index >= INTERFACE_NULL_ATTR) {
893		pr_info("Wrong interface group index for system information\n");
894		return NULL;
895	}
896
897	/* Check for given counter request value support */
898	arg = (void *)get_cpu_var(hv_gpci_reqb);
899	memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
900
901	arg->params.counter_request = cpu_to_be32(req);
902
903	ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO,
904			virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE);
905
906	put_cpu_var(hv_gpci_reqb);
907
908	/*
909	 * Add given counter request value attribute in the interface_attrs
910	 * attribute array, only for valid return types.
911	 */
912	if (!ret || ret == H_AUTHORITY || ret == H_PARAMETER) {
913		attr = kzalloc(sizeof(*attr), GFP_KERNEL);
914		if (!attr)
915			return NULL;
916
917		sysfs_attr_init(&attr->attr);
918		attr->attr.mode = 0444;
919
920		switch (sysinfo_interface_group_index) {
921		case INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR:
922			attr->attr.name = "processor_bus_topology";
923			attr->show = processor_bus_topology_show;
924		break;
925		case INTERFACE_PROCESSOR_CONFIG_ATTR:
926			attr->attr.name = "processor_config";
927			attr->show = processor_config_show;
928		break;
929		case INTERFACE_AFFINITY_DOMAIN_VIA_VP_ATTR:
930			attr->attr.name = "affinity_domain_via_virtual_processor";
931			attr->show = affinity_domain_via_virtual_processor_show;
932		break;
933		case INTERFACE_AFFINITY_DOMAIN_VIA_DOM_ATTR:
934			attr->attr.name = "affinity_domain_via_domain";
935			attr->show = affinity_domain_via_domain_show;
936		break;
937		case INTERFACE_AFFINITY_DOMAIN_VIA_PAR_ATTR:
938			attr->attr.name = "affinity_domain_via_partition";
939			attr->show = affinity_domain_via_partition_show;
940		break;
941		}
942	} else
943		pr_devel("hcall failed, with error: 0x%lx\n", ret);
944
945	return attr;
946}
947
948static void add_sysinfo_interface_files(void)
949{
950	int sysfs_count;
951	struct device_attribute *attr[INTERFACE_NULL_ATTR - INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR];
952	int i;
953
954	sysfs_count = INTERFACE_NULL_ATTR - INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR;
955
956	/* Get device attribute for a given counter request value */
957	for (i = 0; i < sysfs_count; i++) {
958		attr[i] = sysinfo_device_attr_create(i + INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR,
959				sysinfo_counter_request[i]);
960
961		if (!attr[i])
962			goto out;
963	}
964
965	/* Add sysinfo interface attributes in the interface_attrs attribute array */
966	for (i = 0; i < sysfs_count; i++)
967		interface_attrs[i + INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR] = &attr[i]->attr;
968
969	return;
970
971out:
972	/*
973	 * The sysinfo interface attributes will be added, only if hcall passed for
974	 * all the counter request values. Free the device attribute array incase
975	 * of any hcall failure.
976	 */
977	if (i > 0) {
978		while (i >= 0) {
979			kfree(attr[i]);
980			i--;
981		}
982	}
983}
984
985static int hv_gpci_init(void)
986{
987	int r;
988	unsigned long hret;
989	struct hv_perf_caps caps;
990	struct hv_gpci_request_buffer *arg;
991
992	hv_gpci_assert_offsets_correct();
993
994	if (!firmware_has_feature(FW_FEATURE_LPAR)) {
995		pr_debug("not a virtualized system, not enabling\n");
996		return -ENODEV;
997	}
998
999	hret = hv_perf_caps_get(&caps);
1000	if (hret) {
1001		pr_debug("could not obtain capabilities, not enabling, rc=%ld\n",
1002				hret);
1003		return -ENODEV;
1004	}
1005
1006	/* init cpuhotplug */
1007	r = hv_gpci_cpu_hotplug_init();
1008	if (r)
1009		return r;
1010
1011	/* sampling not supported */
1012	h_gpci_pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
1013
1014	arg = (void *)get_cpu_var(hv_gpci_reqb);
1015	memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
1016
1017	/*
1018	 * hcall H_GET_PERF_COUNTER_INFO populates the output
1019	 * counter_info_version value based on the system hypervisor.
1020	 * Pass the counter request 0x10 corresponds to request type
1021	 * 'Dispatch_timebase_by_processor', to get the supported
1022	 * counter_info_version.
1023	 */
1024	arg->params.counter_request = cpu_to_be32(0x10);
1025
1026	r = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO,
1027			virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE);
1028	if (r) {
1029		pr_devel("hcall failed, can't get supported counter_info_version: 0x%x\n", r);
1030		arg->params.counter_info_version_out = 0x8;
1031	}
1032
1033	/*
1034	 * Use counter_info_version_out value to assign
1035	 * required hv-gpci event list.
1036	 */
1037	if (arg->params.counter_info_version_out >= 0x8)
1038		event_group.attrs = hv_gpci_event_attrs;
1039	else
1040		event_group.attrs = hv_gpci_event_attrs_v6;
1041
1042	put_cpu_var(hv_gpci_reqb);
1043
1044	r = perf_pmu_register(&h_gpci_pmu, h_gpci_pmu.name, -1);
1045	if (r)
1046		return r;
1047
1048	/* sysinfo interface files are only available for power10 and above platforms */
1049	if (PVR_VER(mfspr(SPRN_PVR)) >= PVR_POWER10)
1050		add_sysinfo_interface_files();
1051
1052	return 0;
1053}
1054
1055device_initcall(hv_gpci_init);
1056